From 88a309465b3f05a100c3b81966982c0f9f5d23a6 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Thu, 20 Jan 2022 05:20:06 -0800
Subject: [PATCH 0001/4122] lib: zstd: clean up double word in comment.

Remove the second 'a' and 'into'.

Signed-off-by: Tom Rix <trix@redhat.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 include/linux/zstd_lib.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
index b8c7dbf98390f..6b91758b61af9 100644
--- a/include/linux/zstd_lib.h
+++ b/include/linux/zstd_lib.h
@@ -1330,7 +1330,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
 
 /*! ZSTD_mergeBlockDelimiters() :
  * Given an array of ZSTD_Sequence, remove all sequences that represent block delimiters/last literals
- * by merging them into into the literals of the next sequence.
+ * by merging them into the literals of the next sequence.
  *
  * As such, the final generated result has no explicit representation of block boundaries,
  * and the final last literals segment is not represented in the sequences.
@@ -1377,7 +1377,7 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size
 /*! ZSTD_writeSkippableFrame() :
  * Generates a zstd skippable frame containing data given by src, and writes it to dst buffer.
  *
- * Skippable frames begin with a a 4-byte magic number. There are 16 possible choices of magic number,
+ * Skippable frames begin with a 4-byte magic number. There are 16 possible choices of magic number,
  * ranging from ZSTD_MAGIC_SKIPPABLE_START to ZSTD_MAGIC_SKIPPABLE_START+15.
  * As such, the parameter magicVariant controls the exact skippable frame magic number variant used, so
  * the magic number used will be ZSTD_MAGIC_SKIPPABLE_START + magicVariant.
-- 
GitLab


From c73275cf6834787ca090317f1d20dbfa3b7f05aa Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Tue, 23 Aug 2022 09:15:03 +0800
Subject: [PATCH 0002/4122] apparmor: fix a memleak in multi_transaction_new()

In multi_transaction_new(), the variable t is not freed or passed out
on the failure of copy_from_user(t->data, buf, size), which could lead
to a memleak.

Fix this bug by adding a put_multi_transaction(t) in the error path.

Fixes: 1dea3b41e84c5 ("apparmor: speed up transactional queries")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index d066ccc219e2d..7160e7aa58b94 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -868,8 +868,10 @@ static struct multi_transaction *multi_transaction_new(struct file *file,
 	if (!t)
 		return ERR_PTR(-ENOMEM);
 	kref_init(&t->count);
-	if (copy_from_user(t->data, buf, size))
+	if (copy_from_user(t->data, buf, size)) {
+		put_multi_transaction(t);
 		return ERR_PTR(-EFAULT);
+	}
 
 	return t;
 }
-- 
GitLab


From 9c4557efc558a68e4cd973490fd936d6e3414db8 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 6 Sep 2022 03:39:55 -0700
Subject: [PATCH 0003/4122] apparmor: fix lockdep warning when removing a
 namespace

Fix the following lockdep warning

[ 1119.158984] ============================================
[ 1119.158988] WARNING: possible recursive locking detected
[ 1119.158996] 6.0.0-rc1+ #257 Tainted: G            E    N
[ 1119.158999] --------------------------------------------
[ 1119.159001] bash/80100 is trying to acquire lock:
[ 1119.159007] ffff88803e79b4a0 (&ns->lock/1){+.+.}-{4:4}, at: destroy_ns.part.0+0x43/0x140
[ 1119.159028]
               but task is already holding lock:
[ 1119.159030] ffff8881009764a0 (&ns->lock/1){+.+.}-{4:4}, at: aa_remove_profiles+0x3f0/0x640
[ 1119.159040]
               other info that might help us debug this:
[ 1119.159042]  Possible unsafe locking scenario:

[ 1119.159043]        CPU0
[ 1119.159045]        ----
[ 1119.159047]   lock(&ns->lock/1);
[ 1119.159051]   lock(&ns->lock/1);
[ 1119.159055]
                *** DEADLOCK ***

Which is caused by an incorrect lockdep nesting notation

Fixes: feb3c766a3ab ("apparmor: fix possible recursive lock warning in __aa_create_ns")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 499c0209b6a46..fbdfcef91c616 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -1170,7 +1170,7 @@ ssize_t aa_remove_profiles(struct aa_ns *policy_ns, struct aa_label *subj,
 
 	if (!name) {
 		/* remove namespace - can only happen if fqname[0] == ':' */
-		mutex_lock_nested(&ns->parent->lock, ns->level);
+		mutex_lock_nested(&ns->parent->lock, ns->parent->level);
 		__aa_bump_ns_revision(ns);
 		__aa_remove_ns(ns);
 		mutex_unlock(&ns->parent->lock);
-- 
GitLab


From f47acc4b7c43d566bf42816335830c4c17f9c200 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 6 Sep 2022 14:03:44 -0700
Subject: [PATCH 0004/4122] apparmor: reserve mediation classes

Reserve mediation classes that exist in out of tree development
branches or are used by userspace mediation helpers.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/apparmor.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 9c3fc36a07023..dd2c131ed1706 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -28,8 +28,15 @@
 #define AA_CLASS_SIGNAL		10
 #define AA_CLASS_NET		14
 #define AA_CLASS_LABEL		16
+#define AA_CLASS_POSIX_MQUEUE	17
+#define AA_CLASS_IO_URING	18
+#define AA_CLASS_MODULE		19
+#define AA_CLASS_DISPLAY_LSM	20
 
-#define AA_CLASS_LAST		AA_CLASS_LABEL
+#define AA_CLASS_X		31
+#define AA_CLASS_DBUS		32
+
+#define AA_CLASS_LAST		AA_CLASS_DBUS
 
 /* Control parameters settable through module/boot flags */
 extern enum audit_mode aa_g_audit;
-- 
GitLab


From f4d6b94b40c966ddd9eeb0d451e8a02c595ec7e3 Mon Sep 17 00:00:00 2001
From: Jon Tourville <jon.tourville@canonical.com>
Date: Mon, 11 Jul 2022 11:36:08 -0500
Subject: [PATCH 0005/4122] apparmor: use zstd compression for profile data

Change the algorithm used by apparmor to compress profile data from
zlib to zstd, using the new zstd API introduced in 5.16.

Zstd provides a larger range of compression levels than zlib and
significantly better performance at the default level (for a relatively
small increase in compressed size).

The apparmor module parameter raw_data_compression_level is now clamped
to the minimum and maximum compression levels reported by the zstd
library. A compression level of 0 retains the previous behavior of
disabling policy compression instead of using zstd's behavior, which is
to use the default compression level.

Signed-off-by: Jon Tourville <jon.tourville@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/Kconfig         |   4 +-
 security/apparmor/apparmorfs.c    |  60 +++++++---------
 security/apparmor/lsm.c           |  10 +--
 security/apparmor/policy_unpack.c | 109 ++++++++++++++----------------
 4 files changed, 81 insertions(+), 102 deletions(-)

diff --git a/security/apparmor/Kconfig b/security/apparmor/Kconfig
index cb3496e00d8a6..acac3bb3eef2b 100644
--- a/security/apparmor/Kconfig
+++ b/security/apparmor/Kconfig
@@ -85,8 +85,8 @@ config SECURITY_APPARMOR_HASH_DEFAULT
 config SECURITY_APPARMOR_EXPORT_BINARY
 	bool "Allow exporting the raw binary policy"
 	depends on SECURITY_APPARMOR_INTROSPECT_POLICY
-	select ZLIB_INFLATE
-	select ZLIB_DEFLATE
+	select ZSTD_COMPRESS
+	select ZSTD_DECOMPRESS
 	default y
 	help
 	  This option allows reading back binary policy as it was loaded.
diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 7160e7aa58b94..d98bbf267fc77 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -21,7 +21,7 @@
 #include <linux/fs.h>
 #include <linux/fs_context.h>
 #include <linux/poll.h>
-#include <linux/zlib.h>
+#include <linux/zstd.h>
 #include <uapi/linux/major.h>
 #include <uapi/linux/magic.h>
 
@@ -1297,42 +1297,30 @@ SEQ_RAWDATA_FOPS(revision);
 SEQ_RAWDATA_FOPS(hash);
 SEQ_RAWDATA_FOPS(compressed_size);
 
-static int deflate_decompress(char *src, size_t slen, char *dst, size_t dlen)
+static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen)
 {
 #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
-	if (aa_g_rawdata_compression_level != 0) {
-		int error = 0;
-		struct z_stream_s strm;
-
-		memset(&strm, 0, sizeof(strm));
-
-		strm.workspace = kvzalloc(zlib_inflate_workspacesize(), GFP_KERNEL);
-		if (!strm.workspace)
-			return -ENOMEM;
-
-		strm.next_in = src;
-		strm.avail_in = slen;
-
-		error = zlib_inflateInit(&strm);
-		if (error != Z_OK) {
-			error = -ENOMEM;
-			goto fail_inflate_init;
+	if (aa_g_rawdata_compression_level == 0) {
+		const size_t wksp_len = zstd_dctx_workspace_bound();
+		zstd_dctx *ctx;
+		void *wksp;
+		size_t out_len;
+		int ret = 0;
+
+		wksp = kvzalloc(wksp_len, GFP_KERNEL);
+		if (!wksp) {
+			ret = -ENOMEM;
+			goto cleanup;
 		}
 
-		strm.next_out = dst;
-		strm.avail_out = dlen;
-
-		error = zlib_inflate(&strm, Z_FINISH);
-		if (error != Z_STREAM_END)
-			error = -EINVAL;
-		else
-			error = 0;
-
-		zlib_inflateEnd(&strm);
-fail_inflate_init:
-		kvfree(strm.workspace);
-
-		return error;
+		out_len = zstd_decompress_dctx(ctx, dst, dlen, src, slen);
+		if (zstd_is_error(out_len)) {
+			ret = -EINVAL;
+			goto cleanup;
+		}
+cleanup:
+		kvfree(wksp);
+		return ret;
 	}
 #endif
 
@@ -1381,9 +1369,9 @@ static int rawdata_open(struct inode *inode, struct file *file)
 
 	private->loaddata = loaddata;
 
-	error = deflate_decompress(loaddata->data, loaddata->compressed_size,
-				   RAWDATA_F_DATA_BUF(private),
-				   loaddata->size);
+	error = decompress_zstd(loaddata->data, loaddata->compressed_size,
+				RAWDATA_F_DATA_BUF(private),
+				loaddata->size);
 	if (error)
 		goto fail_decompress;
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index e29cade7b6627..ec873ff0a4bbb 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -21,7 +21,7 @@
 #include <linux/user_namespace.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
-#include <linux/zlib.h>
+#include <linux/zstd.h>
 #include <net/sock.h>
 #include <uapi/linux/mount.h>
 
@@ -1361,7 +1361,7 @@ module_param_named(export_binary, aa_g_export_binary, aabool, 0600);
 #endif
 
 /* policy loaddata compression level */
-int aa_g_rawdata_compression_level = Z_DEFAULT_COMPRESSION;
+int aa_g_rawdata_compression_level = ZSTD_CLEVEL_DEFAULT;
 module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level,
 		   aacompressionlevel, 0400);
 
@@ -1543,9 +1543,9 @@ static int param_set_aacompressionlevel(const char *val,
 	error = param_set_int(val, kp);
 
 	aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level,
-					       Z_NO_COMPRESSION,
-					       Z_BEST_COMPRESSION);
-	pr_info("AppArmor: policy rawdata compression level set to %u\n",
+					       zstd_min_clevel(),
+					       zstd_max_clevel());
+	pr_info("AppArmor: policy rawdata compression level set to %d\n",
 		aa_g_rawdata_compression_level);
 
 	return error;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 55d31bac4f35b..10e462d003213 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -16,7 +16,7 @@
 #include <asm/unaligned.h>
 #include <linux/ctype.h>
 #include <linux/errno.h>
-#include <linux/zlib.h>
+#include <linux/zstd.h>
 
 #include "include/apparmor.h"
 #include "include/audit.h"
@@ -1059,81 +1059,73 @@ struct aa_load_ent *aa_load_ent_alloc(void)
 	return ent;
 }
 
-static int deflate_compress(const char *src, size_t slen, char **dst,
-			    size_t *dlen)
+static int compress_zstd(const char *src, size_t slen, char **dst, size_t *dlen)
 {
 #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
-	int error;
-	struct z_stream_s strm;
-	void *stgbuf, *dstbuf;
-	size_t stglen = deflateBound(slen);
-
-	memset(&strm, 0, sizeof(strm));
-
-	if (stglen < slen)
-		return -EFBIG;
-
-	strm.workspace = kvzalloc(zlib_deflate_workspacesize(MAX_WBITS,
-							     MAX_MEM_LEVEL),
-				  GFP_KERNEL);
-	if (!strm.workspace)
-		return -ENOMEM;
-
-	error = zlib_deflateInit(&strm, aa_g_rawdata_compression_level);
-	if (error != Z_OK) {
-		error = -ENOMEM;
-		goto fail_deflate_init;
+	const zstd_parameters params =
+		zstd_get_params(aa_g_rawdata_compression_level, slen);
+	const size_t wksp_len = zstd_cctx_workspace_bound(&params.cParams);
+	void *wksp = NULL;
+	zstd_cctx *ctx = NULL;
+	size_t out_len = zstd_compress_bound(slen);
+	void *out = NULL;
+	int ret = 0;
+
+	out = kvzalloc(out_len, GFP_KERNEL);
+	if (!out) {
+		ret = -ENOMEM;
+		goto cleanup;
 	}
 
-	stgbuf = kvzalloc(stglen, GFP_KERNEL);
-	if (!stgbuf) {
-		error = -ENOMEM;
-		goto fail_stg_alloc;
+	wksp = kvzalloc(wksp_len, GFP_KERNEL);
+	if (!wksp) {
+		ret = -ENOMEM;
+		goto cleanup;
 	}
 
-	strm.next_in = src;
-	strm.avail_in = slen;
-	strm.next_out = stgbuf;
-	strm.avail_out = stglen;
+	ctx = zstd_init_cctx(wksp, wksp_len);
+	if (!ctx) {
+		ret = -EINVAL;
+		goto cleanup;
+	}
 
-	error = zlib_deflate(&strm, Z_FINISH);
-	if (error != Z_STREAM_END) {
-		error = -EINVAL;
-		goto fail_deflate;
+	out_len = zstd_compress_cctx(ctx, out, out_len, src, slen, &params);
+	if (zstd_is_error(out_len)) {
+		ret = -EINVAL;
+		goto cleanup;
 	}
-	error = 0;
 
-	if (is_vmalloc_addr(stgbuf)) {
-		dstbuf = kvzalloc(strm.total_out, GFP_KERNEL);
-		if (dstbuf) {
-			memcpy(dstbuf, stgbuf, strm.total_out);
-			kvfree(stgbuf);
+	if (is_vmalloc_addr(out)) {
+		*dst = kvzalloc(out_len, GFP_KERNEL);
+		if (*dst) {
+			memcpy(*dst, out, out_len);
+			kvfree(out);
+			out = NULL;
 		}
-	} else
+	} else {
 		/*
 		 * If the staging buffer was kmalloc'd, then using krealloc is
 		 * probably going to be faster. The destination buffer will
 		 * always be smaller, so it's just shrunk, avoiding a memcpy
 		 */
-		dstbuf = krealloc(stgbuf, strm.total_out, GFP_KERNEL);
+		*dst = krealloc(out, out_len, GFP_KERNEL);
+	}
 
-	if (!dstbuf) {
-		error = -ENOMEM;
-		goto fail_deflate;
+	if (!*dst) {
+		ret = -ENOMEM;
+		goto cleanup;
 	}
 
-	*dst = dstbuf;
-	*dlen = strm.total_out;
+	*dlen = out_len;
 
-fail_stg_alloc:
-	zlib_deflateEnd(&strm);
-fail_deflate_init:
-	kvfree(strm.workspace);
-	return error;
+cleanup:
+	if (ret) {
+		kvfree(out);
+		*dst = NULL;
+	}
 
-fail_deflate:
-	kvfree(stgbuf);
-	goto fail_stg_alloc;
+	kvfree(wksp);
+	return ret;
 #else
 	*dlen = slen;
 	return 0;
@@ -1142,7 +1134,6 @@ fail_deflate:
 
 static int compress_loaddata(struct aa_loaddata *data)
 {
-
 	AA_BUG(data->compressed_size > 0);
 
 	/*
@@ -1151,8 +1142,8 @@ static int compress_loaddata(struct aa_loaddata *data)
 	 */
 	if (aa_g_rawdata_compression_level != 0) {
 		void *udata = data->data;
-		int error = deflate_compress(udata, data->size, &data->data,
-					     &data->compressed_size);
+		int error = compress_zstd(udata, data->size, &data->data,
+					  &data->compressed_size);
 		if (error)
 			return error;
 
-- 
GitLab


From 2218d08123362c63bab257caf5ec3bc1a6e87ae9 Mon Sep 17 00:00:00 2001
From: Jon Tourville <jon.tourville@canonical.com>
Date: Mon, 11 Jul 2022 11:36:09 -0500
Subject: [PATCH 0006/4122] apparmor: expose compression level limits in sysfs

Create two new files in apparmor's sysfs:

/sys/kernel/security/apparmor/raw_data_compression_level_min
/sys/kernel/security/apparmor/raw_data_compression_level_max

These correspond to the minimum and maximum zstd compression levels
that can be assigned to the apparmor module parameter
raw_data_compression_level.

Signed-off-by: Jon Tourville <jon.tourville@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index d98bbf267fc77..044affb1ce839 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1199,10 +1199,24 @@ static int seq_ns_name_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+static int seq_ns_compress_min_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%d\n", zstd_min_clevel());
+	return 0;
+}
+
+static int seq_ns_compress_max_show(struct seq_file *seq, void *v)
+{
+	seq_printf(seq, "%d\n", zstd_max_clevel());
+	return 0;
+}
+
 SEQ_NS_FOPS(stacked);
 SEQ_NS_FOPS(nsstacked);
 SEQ_NS_FOPS(level);
 SEQ_NS_FOPS(name);
+SEQ_NS_FOPS(compress_min);
+SEQ_NS_FOPS(compress_max);
 
 
 /* policy/raw_data/ * file ops */
@@ -2382,6 +2396,8 @@ static struct aa_sfs_entry aa_sfs_entry_apparmor[] = {
 	AA_SFS_FILE_FOPS(".ns_level", 0444, &seq_ns_level_fops),
 	AA_SFS_FILE_FOPS(".ns_name", 0444, &seq_ns_name_fops),
 	AA_SFS_FILE_FOPS("profiles", 0444, &aa_sfs_profiles_fops),
+	AA_SFS_FILE_FOPS("raw_data_compression_level_min", 0444, &seq_ns_compress_min_fops),
+	AA_SFS_FILE_FOPS("raw_data_compression_level_max", 0444, &seq_ns_compress_max_fops),
 	AA_SFS_DIR("features", aa_sfs_entry_features),
 	{ }
 };
-- 
GitLab


From 408d53e923bd852d5d80243a642004163db53a87 Mon Sep 17 00:00:00 2001
From: Mike Salvatore <mike.salvatore@canonical.com>
Date: Mon, 30 Mar 2020 16:43:29 -0400
Subject: [PATCH 0007/4122] apparmor: compute file permissions on profile load

Rather than computing file permissions for each file access, file
permissions can be computed once on profile load and stored for lookup.

Signed-off-by: Mike Salvatore <mike.salvatore@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c    |   2 +-
 security/apparmor/domain.c        |  10 +--
 security/apparmor/file.c          | 128 +++++++++++++++++++++---------
 security/apparmor/include/file.h  |  15 +++-
 security/apparmor/policy_unpack.c |   3 +
 5 files changed, 110 insertions(+), 48 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 044affb1ce839..825b3093dcddf 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -624,7 +624,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 		if (state) {
 			struct path_cond cond = { };
 
-			tmp = aa_compute_fperms(dfa, state, &cond);
+			tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
 		}
 	} else if (profile->policy.dfa) {
 		if (!PROFILE_MEDIATES(profile, *match_str))
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 91689d34d2811..2c99edd8953a1 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -162,7 +162,7 @@ next:
 		if (!state)
 			goto fail;
 	}
-	*perms = aa_compute_fperms(profile->file.dfa, state, &cond);
+	*perms = *(aa_lookup_fperms(&(profile->file), state, &cond));
 	aa_apply_modes_to_perms(profile, perms);
 	if ((perms->allow & request) != request)
 		return -EACCES;
@@ -215,7 +215,7 @@ static int label_components_match(struct aa_profile *profile,
 	return 0;
 
 next:
-	tmp = aa_compute_fperms(profile->file.dfa, state, &cond);
+	tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum(perms, &tmp);
 	label_for_each_cont(i, label, tp) {
@@ -224,7 +224,7 @@ next:
 		state = match_component(profile, tp, stack, start);
 		if (!state)
 			goto fail;
-		tmp = aa_compute_fperms(profile->file.dfa, state, &cond);
+		tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
 		aa_apply_modes_to_perms(profile, &tmp);
 		aa_perms_accum(perms, &tmp);
 	}
@@ -661,7 +661,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 	}
 
 	/* find exec permissions for name */
-	state = aa_str_perms(profile->file.dfa, state, name, cond, &perms);
+	state = aa_str_perms(&(profile->file), state, name, cond, &perms);
 	if (perms.allow & MAY_EXEC) {
 		/* exec permission determine how to transition */
 		new = x_to_label(profile, bprm, name, perms.xindex, &target,
@@ -756,7 +756,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 	}
 
 	/* find exec permissions for name */
-	state = aa_str_perms(profile->file.dfa, state, xname, cond, &perms);
+	state = aa_str_perms(&(profile->file), state, xname, cond, &perms);
 	if (!(perms.allow & AA_MAY_ONEXEC)) {
 		info = "no change_onexec valid for executable";
 		goto audit;
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index e1b7e93602e4c..710b7d7517eb4 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -201,47 +201,97 @@ static u32 map_old_perms(u32 old)
 	return new;
 }
 
+static void __aa_compute_fperms_allow(struct aa_perms *perms,
+				      struct aa_dfa *dfa,
+				      unsigned int state)
+{
+	perms->allow |= AA_MAY_GETATTR;
+
+	/* change_profile wasn't determined by ownership in old mapping */
+	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
+		perms->allow |= AA_MAY_CHANGE_PROFILE;
+	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
+		perms->allow |= AA_MAY_ONEXEC;
+}
+
+static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa,
+						unsigned int state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_user_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_user_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
+	perms.xindex = dfa_user_xindex(dfa, state);
+
+	__aa_compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
+						 unsigned int state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_other_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_other_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
+	perms.xindex = dfa_other_xindex(dfa, state);
+
+	__aa_compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+/**
+ * aa_compute_fperms - convert dfa compressed perms to internal perms and store
+ *		       them so they can be retrieved later.
+ * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which
+ *		permissions will be computed   (NOT NULL)
+ *
+ * TODO: convert from dfa + state to permission entry
+ */
+void aa_compute_fperms(struct aa_file_rules *file_rules)
+{
+	int state;
+	int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen;
+
+	// DFAs are restricted from having a state_count of less than 2
+	file_rules->fperms_table = kvzalloc(
+			state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
+
+	// Since fperms_table is initialized with zeroes via kvzalloc(), we can
+	// skip the trap state (state == 0)
+	for (state = 1; state < state_count; state++) {
+		file_rules->fperms_table[state * 2] =
+			__aa_compute_fperms_user(file_rules->dfa, state);
+		file_rules->fperms_table[state * 2 + 1] =
+			__aa_compute_fperms_other(file_rules->dfa, state);
+	}
+}
+
 /**
- * aa_compute_fperms - convert dfa compressed perms to internal perms
- * @dfa: dfa to compute perms for   (NOT NULL)
+ * aa_lookup_fperms - convert dfa compressed perms to internal perms
+ * @dfa: dfa to lookup perms for   (NOT NULL)
  * @state: state in dfa
  * @cond:  conditions to consider  (NOT NULL)
  *
- * TODO: convert from dfa + state to permission entry, do computation conversion
- *       at load time.
+ * TODO: convert from dfa + state to permission entry
  *
- * Returns: computed permission set
+ * Returns: a pointer to a file permission set
  */
-struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state,
-				  struct path_cond *cond)
+struct aa_perms default_perms = {};
+struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
+				 unsigned int state, struct path_cond *cond)
 {
-	/* FIXME: change over to new dfa format
-	 * currently file perms are encoded in the dfa, new format
-	 * splits the permissions from the dfa.  This mapping can be
-	 * done at profile load
-	 */
-	struct aa_perms perms = { };
+	if (!(file_rules->fperms_table))
+		return &default_perms;
 
-	if (uid_eq(current_fsuid(), cond->uid)) {
-		perms.allow = map_old_perms(dfa_user_allow(dfa, state));
-		perms.audit = map_old_perms(dfa_user_audit(dfa, state));
-		perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
-		perms.xindex = dfa_user_xindex(dfa, state);
-	} else {
-		perms.allow = map_old_perms(dfa_other_allow(dfa, state));
-		perms.audit = map_old_perms(dfa_other_audit(dfa, state));
-		perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
-		perms.xindex = dfa_other_xindex(dfa, state);
-	}
-	perms.allow |= AA_MAY_GETATTR;
+	if (uid_eq(current_fsuid(), cond->uid))
+		return &(file_rules->fperms_table[state * 2]);
 
-	/* change_profile wasn't determined by ownership in old mapping */
-	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
-		perms.allow |= AA_MAY_CHANGE_PROFILE;
-	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
-		perms.allow |= AA_MAY_ONEXEC;
-
-	return perms;
+	return &(file_rules->fperms_table[state * 2 + 1]);
 }
 
 /**
@@ -254,13 +304,13 @@ struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state,
  *
  * Returns: the final state in @dfa when beginning @start and walking @name
  */
-unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start,
+unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start,
 			  const char *name, struct path_cond *cond,
 			  struct aa_perms *perms)
 {
 	unsigned int state;
-	state = aa_dfa_match(dfa, start, name);
-	*perms = aa_compute_fperms(dfa, state, cond);
+	state = aa_dfa_match(file_rules->dfa, start, name);
+	*perms = *(aa_lookup_fperms(file_rules, state, cond));
 
 	return state;
 }
@@ -273,7 +323,7 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name,
 
 	if (profile_unconfined(profile))
 		return 0;
-	aa_str_perms(profile->file.dfa, profile->file.start, name, cond, perms);
+	aa_str_perms(&(profile->file), profile->file.start, name, cond, perms);
 	if (request & ~perms->allow)
 		e = -EACCES;
 	return aa_audit_file(profile, perms, op, request, name, NULL, NULL,
@@ -380,7 +430,7 @@ static int profile_path_link(struct aa_profile *profile,
 
 	error = -EACCES;
 	/* aa_str_perms - handles the case of the dfa being NULL */
-	state = aa_str_perms(profile->file.dfa, profile->file.start, lname,
+	state = aa_str_perms(&(profile->file), profile->file.start, lname,
 			     cond, &lperms);
 
 	if (!(lperms.allow & AA_MAY_LINK))
@@ -388,7 +438,7 @@ static int profile_path_link(struct aa_profile *profile,
 
 	/* test to see if target can be paired with link */
 	state = aa_dfa_null_transition(profile->file.dfa, state);
-	aa_str_perms(profile->file.dfa, state, tname, cond, &perms);
+	aa_str_perms(&(profile->file), state, tname, cond, &perms);
 
 	/* force audit/quiet masks for link are stored in the second entry
 	 * in the link pair.
@@ -410,7 +460,7 @@ static int profile_path_link(struct aa_profile *profile,
 	/* Do link perm subset test requiring allowed permission on link are
 	 * a subset of the allowed permissions on target.
 	 */
-	aa_str_perms(profile->file.dfa, profile->file.start, tname, cond,
+	aa_str_perms(&(profile->file), profile->file.start, tname, cond,
 		     &perms);
 
 	/* AA_MAY_LINK is not considered in the subset test */
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 029cb20e322d5..ab201d625a344 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -181,11 +181,13 @@ struct aa_file_rules {
 	/* struct perms perms; */
 	struct aa_domain trans;
 	/* TODO: add delegate table */
+	struct aa_perms *fperms_table;
 };
 
-struct aa_perms aa_compute_fperms(struct aa_dfa *dfa, unsigned int state,
-				    struct path_cond *cond);
-unsigned int aa_str_perms(struct aa_dfa *dfa, unsigned int start,
+void aa_compute_fperms(struct aa_file_rules *file_rules);
+struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
+				 unsigned int state, struct path_cond *cond);
+unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start,
 			  const char *name, struct path_cond *cond,
 			  struct aa_perms *perms);
 
@@ -204,10 +206,17 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 
 void aa_inherit_files(const struct cred *cred, struct files_struct *files);
 
+static inline void aa_free_fperms_table(struct aa_perms *fperms_table)
+{
+	if (fperms_table)
+		kvfree(fperms_table);
+}
+
 static inline void aa_free_file_rules(struct aa_file_rules *rules)
 {
 	aa_put_dfa(rules->dfa);
 	aa_free_domain_entries(&rules->trans);
+	aa_free_fperms_table(rules->fperms_table);
 }
 
 /**
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 10e462d003213..54175bca42563 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -22,6 +22,7 @@
 #include "include/audit.h"
 #include "include/cred.h"
 #include "include/crypto.h"
+#include "include/file.h"
 #include "include/match.h"
 #include "include/path.h"
 #include "include/policy.h"
@@ -878,6 +879,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
+	aa_compute_fperms(&(profile->file));
+
 	if (!unpack_trans_table(e, profile)) {
 		info = "failed to unpack profile transition table";
 		goto fail;
-- 
GitLab


From b5b57993504f91785fa70e002e5e494fb549726e Mon Sep 17 00:00:00 2001
From: Mike Salvatore <mike.salvatore@canonical.com>
Date: Sun, 31 May 2020 10:52:06 -0400
Subject: [PATCH 0008/4122] apparmor: compute xmatch permissions on profile
 load

Rather than computing xmatch permissions each time access is requested,
these permissions can be computed once on profile load and stored for
lookup.

Signed-off-by: Mike Salvatore <mike.salvatore@canonical.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c         |  4 ++--
 security/apparmor/include/policy.h |  2 ++
 security/apparmor/policy.c         |  1 +
 security/apparmor/policy_unpack.c  | 22 +++++++++++++++++++++-
 4 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 2c99edd8953a1..22351b6d71e65 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -339,7 +339,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 			/* Check xattr value */
 			state = aa_dfa_match_len(profile->xmatch, state, value,
 						 size);
-			perm = dfa_user_allow(profile->xmatch, state);
+			perm = profile->xmatch_perms[state];
 			if (!(perm & MAY_EXEC)) {
 				ret = -EINVAL;
 				goto out;
@@ -419,7 +419,7 @@ restart:
 
 			state = aa_dfa_leftmatch(profile->xmatch, DFA_START,
 						 name, &count);
-			perm = dfa_user_allow(profile->xmatch, state);
+			perm = profile->xmatch_perms[state];
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
 				int ret = 0;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 639b5b248e63c..128c6a9430d46 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -104,6 +104,7 @@ struct aa_data {
  * @attach: human readable attachment string
  * @xmatch: optional extended matching for unconfined executables names
  * @xmatch_len: xmatch prefix len, used to determine xmatch priority
+ * @xmatch_perms: precomputed permissions for the xmatch DFA indexed by state
  * @audit: the auditing mode of the profile
  * @mode: the enforcement mode of the profile
  * @path_flags: flags controlling path generation behavior
@@ -140,6 +141,7 @@ struct aa_profile {
 	const char *attach;
 	struct aa_dfa *xmatch;
 	unsigned int xmatch_len;
+	u32 *xmatch_perms;
 	enum audit_mode audit;
 	long mode;
 	u32 path_flags;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index fbdfcef91c616..e2d23cd85cd20 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -231,6 +231,7 @@ void aa_free_profile(struct aa_profile *profile)
 	kfree_sensitive(profile->secmark);
 	kfree_sensitive(profile->dirname);
 	aa_put_dfa(profile->xmatch);
+	kvfree(profile->xmatch_perms);
 	aa_put_dfa(profile->policy.dfa);
 
 	if (profile->data) {
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 54175bca42563..70b7a35b5b96a 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -669,6 +669,23 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
 	return strcmp(data->key, *key);
 }
 
+static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch)
+{
+	u32 *perms_table;
+	int state;
+	int state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
+
+	// DFAs are restricted from having a state_count of less than 2
+	perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL);
+
+	// Since perms_table is initialized with zeroes via kvcalloc(), we can
+	// skip the trap state (state == 0)
+	for (state = 1; state < state_count; state++)
+		perms_table[state] = dfa_user_allow(xmatch, state);
+
+	return perms_table;
+}
+
 /**
  * unpack_profile - unpack a serialized profile
  * @e: serialized data extent information (NOT NULL)
@@ -727,13 +744,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		info = "bad xmatch";
 		goto fail;
 	}
-	/* xmatch_len is not optional if xmatch is set */
+	/* neither xmatch_len not xmatch_perms are optional if xmatch is set */
 	if (profile->xmatch) {
 		if (!unpack_u32(e, &tmp, NULL)) {
 			info = "missing xmatch len";
 			goto fail;
 		}
 		profile->xmatch_len = tmp;
+
+		profile->xmatch_perms = aa_compute_xmatch_perms(
+			profile->xmatch);
 	}
 
 	/* disconnected attachment string is optional */
-- 
GitLab


From 754f209b811ac462e00ed0f79b48047c446f5c43 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 12 Nov 2020 10:07:25 -0800
Subject: [PATCH 0009/4122] apparmor: move fperm computation into policy_unpack

fperm computation is only needed during policy_unpack so move the
code there to isolate it fromt the run time code.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c          | 97 ------------------------------
 security/apparmor/include/file.h  |  1 -
 security/apparmor/policy_unpack.c | 98 +++++++++++++++++++++++++++++++
 3 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 710b7d7517eb4..1227ae8391546 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -174,103 +174,6 @@ static int path_name(const char *op, struct aa_label *label,
 	return 0;
 }
 
-/**
- * map_old_perms - map old file perms layout to the new layout
- * @old: permission set in old mapping
- *
- * Returns: new permission mapping
- */
-static u32 map_old_perms(u32 old)
-{
-	u32 new = old & 0xf;
-	if (old & MAY_READ)
-		new |= AA_MAY_GETATTR | AA_MAY_OPEN;
-	if (old & MAY_WRITE)
-		new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE |
-		       AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN;
-	if (old & 0x10)
-		new |= AA_MAY_LINK;
-	/* the old mapping lock and link_subset flags where overlaid
-	 * and use was determined by part of a pair that they were in
-	 */
-	if (old & 0x20)
-		new |= AA_MAY_LOCK | AA_LINK_SUBSET;
-	if (old & 0x40)	/* AA_EXEC_MMAP */
-		new |= AA_EXEC_MMAP;
-
-	return new;
-}
-
-static void __aa_compute_fperms_allow(struct aa_perms *perms,
-				      struct aa_dfa *dfa,
-				      unsigned int state)
-{
-	perms->allow |= AA_MAY_GETATTR;
-
-	/* change_profile wasn't determined by ownership in old mapping */
-	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
-		perms->allow |= AA_MAY_CHANGE_PROFILE;
-	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
-		perms->allow |= AA_MAY_ONEXEC;
-}
-
-static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa,
-						unsigned int state)
-{
-	struct aa_perms perms = { };
-
-	perms.allow = map_old_perms(dfa_user_allow(dfa, state));
-	perms.audit = map_old_perms(dfa_user_audit(dfa, state));
-	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
-	perms.xindex = dfa_user_xindex(dfa, state);
-
-	__aa_compute_fperms_allow(&perms, dfa, state);
-
-	return perms;
-}
-
-static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
-						 unsigned int state)
-{
-	struct aa_perms perms = { };
-
-	perms.allow = map_old_perms(dfa_other_allow(dfa, state));
-	perms.audit = map_old_perms(dfa_other_audit(dfa, state));
-	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
-	perms.xindex = dfa_other_xindex(dfa, state);
-
-	__aa_compute_fperms_allow(&perms, dfa, state);
-
-	return perms;
-}
-
-/**
- * aa_compute_fperms - convert dfa compressed perms to internal perms and store
- *		       them so they can be retrieved later.
- * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which
- *		permissions will be computed   (NOT NULL)
- *
- * TODO: convert from dfa + state to permission entry
- */
-void aa_compute_fperms(struct aa_file_rules *file_rules)
-{
-	int state;
-	int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen;
-
-	// DFAs are restricted from having a state_count of less than 2
-	file_rules->fperms_table = kvzalloc(
-			state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
-
-	// Since fperms_table is initialized with zeroes via kvzalloc(), we can
-	// skip the trap state (state == 0)
-	for (state = 1; state < state_count; state++) {
-		file_rules->fperms_table[state * 2] =
-			__aa_compute_fperms_user(file_rules->dfa, state);
-		file_rules->fperms_table[state * 2 + 1] =
-			__aa_compute_fperms_other(file_rules->dfa, state);
-	}
-}
-
 /**
  * aa_lookup_fperms - convert dfa compressed perms to internal perms
  * @dfa: dfa to lookup perms for   (NOT NULL)
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index ab201d625a344..1f9e54aa1adfe 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -184,7 +184,6 @@ struct aa_file_rules {
 	struct aa_perms *fperms_table;
 };
 
-void aa_compute_fperms(struct aa_file_rules *file_rules);
 struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
 				 unsigned int state, struct path_cond *cond);
 unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start,
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 70b7a35b5b96a..c22c6815ff4bb 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -669,6 +669,104 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
 	return strcmp(data->key, *key);
 }
 
+/**
+ * map_old_perms - map old file perms layout to the new layout
+ * @old: permission set in old mapping
+ *
+ * Returns: new permission mapping
+ */
+static u32 map_old_perms(u32 old)
+{
+	u32 new = old & 0xf;
+
+	if (old & MAY_READ)
+		new |= AA_MAY_GETATTR | AA_MAY_OPEN;
+	if (old & MAY_WRITE)
+		new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE |
+		       AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN;
+	if (old & 0x10)
+		new |= AA_MAY_LINK;
+	/* the old mapping lock and link_subset flags where overlaid
+	 * and use was determined by part of a pair that they were in
+	 */
+	if (old & 0x20)
+		new |= AA_MAY_LOCK | AA_LINK_SUBSET;
+	if (old & 0x40)	/* AA_EXEC_MMAP */
+		new |= AA_EXEC_MMAP;
+
+	return new;
+}
+
+static void __aa_compute_fperms_allow(struct aa_perms *perms,
+				      struct aa_dfa *dfa,
+				      unsigned int state)
+{
+	perms->allow |= AA_MAY_GETATTR;
+
+	/* change_profile wasn't determined by ownership in old mapping */
+	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
+		perms->allow |= AA_MAY_CHANGE_PROFILE;
+	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
+		perms->allow |= AA_MAY_ONEXEC;
+}
+
+static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa,
+						unsigned int state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_user_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_user_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
+	perms.xindex = dfa_user_xindex(dfa, state);
+
+	__aa_compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
+						 unsigned int state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_other_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_other_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
+	perms.xindex = dfa_other_xindex(dfa, state);
+
+	__aa_compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+/**
+ * aa_compute_fperms - convert dfa compressed perms to internal perms and store
+ *		       them so they can be retrieved later.
+ * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which
+ *		permissions will be computed   (NOT NULL)
+ *
+ * TODO: convert from dfa + state to permission entry
+ */
+static void aa_compute_fperms(struct aa_file_rules *file_rules)
+{
+	int state;
+	int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen;
+
+	// DFAs are restricted from having a state_count of less than 2
+	file_rules->fperms_table = kvzalloc(
+			state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
+
+	// Since fperms_table is initialized with zeroes via kvzalloc(), we can
+	// skip the trap state (state == 0)
+	for (state = 1; state < state_count; state++) {
+		file_rules->fperms_table[state * 2] =
+			__aa_compute_fperms_user(file_rules->dfa, state);
+		file_rules->fperms_table[state * 2 + 1] =
+			__aa_compute_fperms_other(file_rules->dfa, state);
+	}
+}
+
 static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch)
 {
 	u32 *perms_table;
-- 
GitLab


From 0310f093ba95e7640c886298de36560c123df5bd Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 12 Nov 2020 10:26:26 -0800
Subject: [PATCH 0010/4122] apparmor: rework and cleanup fperm computation

shorten the name of some of the mapping functions which shortens line
lengths.

change the mapping so it returns the perm table instead of operating
directly on the file struct.

Handle potential memory allocation failure.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 70 +++++++++++++++++--------------
 1 file changed, 38 insertions(+), 32 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index c22c6815ff4bb..0f9a88354d639 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -697,9 +697,8 @@ static u32 map_old_perms(u32 old)
 	return new;
 }
 
-static void __aa_compute_fperms_allow(struct aa_perms *perms,
-				      struct aa_dfa *dfa,
-				      unsigned int state)
+static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa,
+				 unsigned int state)
 {
 	perms->allow |= AA_MAY_GETATTR;
 
@@ -710,8 +709,8 @@ static void __aa_compute_fperms_allow(struct aa_perms *perms,
 		perms->allow |= AA_MAY_ONEXEC;
 }
 
-static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa,
-						unsigned int state)
+static struct aa_perms compute_fperms_user(struct aa_dfa *dfa,
+					   unsigned int state)
 {
 	struct aa_perms perms = { };
 
@@ -720,13 +719,13 @@ static struct aa_perms __aa_compute_fperms_user(struct aa_dfa *dfa,
 	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
 	perms.xindex = dfa_user_xindex(dfa, state);
 
-	__aa_compute_fperms_allow(&perms, dfa, state);
+	compute_fperms_allow(&perms, dfa, state);
 
 	return perms;
 }
 
-static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
-						 unsigned int state)
+static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
+					    unsigned int state)
 {
 	struct aa_perms perms = { };
 
@@ -735,7 +734,7 @@ static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
 	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
 	perms.xindex = dfa_other_xindex(dfa, state);
 
-	__aa_compute_fperms_allow(&perms, dfa, state);
+	compute_fperms_allow(&perms, dfa, state);
 
 	return perms;
 }
@@ -743,41 +742,46 @@ static struct aa_perms __aa_compute_fperms_other(struct aa_dfa *dfa,
 /**
  * aa_compute_fperms - convert dfa compressed perms to internal perms and store
  *		       them so they can be retrieved later.
- * @file_rules: a file_rules structure containing a dfa (NOT NULL) for which
- *		permissions will be computed   (NOT NULL)
+ * @dfa: a dfa using fperms to remap to internal permissions
  *
- * TODO: convert from dfa + state to permission entry
+ * Returns: remapped perm table
  */
-static void aa_compute_fperms(struct aa_file_rules *file_rules)
+static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
 {
 	int state;
-	int state_count = file_rules->dfa->tables[YYTD_ID_BASE]->td_lolen;
+	int state_count;
+	struct aa_perms *table;
 
-	// DFAs are restricted from having a state_count of less than 2
-	file_rules->fperms_table = kvzalloc(
-			state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
+	AA_BUG(!dfa);
 
-	// Since fperms_table is initialized with zeroes via kvzalloc(), we can
-	// skip the trap state (state == 0)
+	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
+	table = kvzalloc(state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	/* zero init so skip the trap state (state == 0) */
 	for (state = 1; state < state_count; state++) {
-		file_rules->fperms_table[state * 2] =
-			__aa_compute_fperms_user(file_rules->dfa, state);
-		file_rules->fperms_table[state * 2 + 1] =
-			__aa_compute_fperms_other(file_rules->dfa, state);
+		table[state * 2] = compute_fperms_user(dfa, state);
+		table[state * 2 + 1] = compute_fperms_other(dfa, state);
 	}
+
+	return table;
 }
 
-static u32 *aa_compute_xmatch_perms(struct aa_dfa *xmatch)
+static u32 *compute_xmatch_perms(struct aa_dfa *xmatch)
 {
 	u32 *perms_table;
 	int state;
-	int state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
+	int state_count;
+
+	AA_BUG(!xmatch);
 
-	// DFAs are restricted from having a state_count of less than 2
+	state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
 	perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL);
 
-	// Since perms_table is initialized with zeroes via kvcalloc(), we can
-	// skip the trap state (state == 0)
+	/* zero init so skip the trap state (state == 0) */
 	for (state = 1; state < state_count; state++)
 		perms_table[state] = dfa_user_allow(xmatch, state);
 
@@ -850,8 +854,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		profile->xmatch_len = tmp;
 
-		profile->xmatch_perms = aa_compute_xmatch_perms(
-			profile->xmatch);
+		profile->xmatch_perms = compute_xmatch_perms(profile->xmatch);
 	}
 
 	/* disconnected attachment string is optional */
@@ -997,8 +1000,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
-	aa_compute_fperms(&(profile->file));
-
+	profile->file.fperms_table = compute_fperms(profile->file.dfa);
+	if (!profile->file.fperms_table) {
+		info = "failed to remap file permission table";
+		goto fail;
+	}
 	if (!unpack_trans_table(e, profile)) {
 		info = "failed to unpack profile transition table";
 		goto fail;
-- 
GitLab


From e48ffd24c1d87dba227225615790cd059a707adb Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 13 Nov 2020 16:30:47 -0800
Subject: [PATCH 0011/4122] apparmor: convert xmatch to use aa_perms structure

Convert xmatch from using perms encoded in the accept entry of the
dfa to the common external aa_perms in a table.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c         |  4 ++--
 security/apparmor/include/policy.h |  3 ++-
 security/apparmor/policy_unpack.c  | 13 +++++++++----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 22351b6d71e65..4fcdcc0de48cc 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -339,7 +339,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 			/* Check xattr value */
 			state = aa_dfa_match_len(profile->xmatch, state, value,
 						 size);
-			perm = profile->xmatch_perms[state];
+			perm = profile->xmatch_perms[state].allow;
 			if (!(perm & MAY_EXEC)) {
 				ret = -EINVAL;
 				goto out;
@@ -419,7 +419,7 @@ restart:
 
 			state = aa_dfa_leftmatch(profile->xmatch, DFA_START,
 						 name, &count);
-			perm = profile->xmatch_perms[state];
+			perm = profile->xmatch_perms[state].allow;
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
 				int ret = 0;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 128c6a9430d46..7882d5e5096b1 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -141,7 +141,8 @@ struct aa_profile {
 	const char *attach;
 	struct aa_dfa *xmatch;
 	unsigned int xmatch_len;
-	u32 *xmatch_perms;
+	struct aa_perms *xmatch_perms;
+
 	enum audit_mode audit;
 	long mode;
 	u32 path_flags;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 0f9a88354d639..44910c201c492 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -769,9 +769,9 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
 	return table;
 }
 
-static u32 *compute_xmatch_perms(struct aa_dfa *xmatch)
+static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
 {
-	u32 *perms_table;
+	struct aa_perms *perms_table;
 	int state;
 	int state_count;
 
@@ -779,11 +779,12 @@ static u32 *compute_xmatch_perms(struct aa_dfa *xmatch)
 
 	state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
 	/* DFAs are restricted from having a state_count of less than 2 */
-	perms_table = kvcalloc(state_count, sizeof(u32), GFP_KERNEL);
+	  perms_table = kvcalloc(state_count, sizeof(struct aa_perms),
+			       GFP_KERNEL);
 
 	/* zero init so skip the trap state (state == 0) */
 	for (state = 1; state < state_count; state++)
-		perms_table[state] = dfa_user_allow(xmatch, state);
+		perms_table[state].allow = dfa_user_allow(xmatch, state);
 
 	return perms_table;
 }
@@ -855,6 +856,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		profile->xmatch_len = tmp;
 
 		profile->xmatch_perms = compute_xmatch_perms(profile->xmatch);
+		if (!profile->xmatch_perms) {
+			info = "failed to convert xmatch permission table";
+			goto fail;
+		}
 	}
 
 	/* disconnected attachment string is optional */
-- 
GitLab


From e2967ede22978f132cd52929edff96c701bde0eb Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 17 Nov 2020 01:38:16 -0800
Subject: [PATCH 0012/4122] apparmor: compute policydb permission on profile
 load

Rather than computing policydb permissions for each access
permissions can be computed once on profile load and stored for lookup.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  2 +-
 security/apparmor/include/perms.h  | 13 +++++--
 security/apparmor/include/policy.h |  1 +
 security/apparmor/label.c          |  6 ++--
 security/apparmor/lib.c            | 42 -----------------------
 security/apparmor/mount.c          | 53 ++++++++++------------------
 security/apparmor/net.c            |  2 +-
 security/apparmor/policy.c         |  2 +-
 security/apparmor/policy_unpack.c  | 55 +++++++++++++++++++++++++++++-
 9 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 825b3093dcddf..117783779337d 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -633,7 +633,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 		state = aa_dfa_match_len(dfa, profile->policy.start[0],
 					 match_str, match_len);
 		if (state)
-			aa_compute_perms(dfa, state, &tmp);
+			tmp = *aa_lookup_perms(profile->policy.perms, state);
 	}
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum_raw(perms, &tmp);
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 13f20c5984480..de9631edb1ff9 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -133,6 +133,17 @@ extern struct aa_perms allperms;
 	xcheck(fn_for_each((L1), (P), (FN1)), fn_for_each((L2), (P), (FN2)))
 
 
+extern struct aa_perms default_perms;
+
+static inline struct aa_perms *aa_lookup_perms(struct aa_perms *perms,
+					       unsigned int state)
+{
+	if (!(perms))
+		return &default_perms;
+
+	return &(perms[state]);
+}
+
 void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs,
 			 u32 mask);
 void aa_audit_perm_names(struct audit_buffer *ab, const char * const *names,
@@ -141,8 +152,6 @@ void aa_audit_perm_mask(struct audit_buffer *ab, u32 mask, const char *chrs,
 			u32 chrsmask, const char * const *names, u32 namesmask);
 void aa_apply_modes_to_perms(struct aa_profile *profile,
 			     struct aa_perms *perms);
-void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
-		      struct aa_perms *perms);
 void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend);
 void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend);
 void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 7882d5e5096b1..0dec18cd95e56 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -77,6 +77,7 @@ enum profile_mode {
 struct aa_policydb {
 	/* Generic policy DFA specific rule types will be subsections of it */
 	struct aa_dfa *dfa;
+	struct aa_perms *perms;
 	unsigned int start[AA_CLASS_LAST + 1];
 
 };
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 0f36ee9074381..ddb04417bdabf 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1328,7 +1328,7 @@ next:
 		if (!state)
 			goto fail;
 	}
-	aa_compute_perms(profile->policy.dfa, state, perms);
+	*perms = *aa_lookup_perms(profile->policy.perms, state);
 	aa_apply_modes_to_perms(profile, perms);
 	if ((perms->allow & request) != request)
 		return -EACCES;
@@ -1379,7 +1379,7 @@ static int label_components_match(struct aa_profile *profile,
 	return 0;
 
 next:
-	aa_compute_perms(profile->policy.dfa, state, &tmp);
+	tmp = *aa_lookup_perms(profile->policy.perms, state);
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum(perms, &tmp);
 	label_for_each_cont(i, label, tp) {
@@ -1388,7 +1388,7 @@ next:
 		state = match_component(profile, tp, start);
 		if (!state)
 			goto fail;
-		aa_compute_perms(profile->policy.dfa, state, &tmp);
+		tmp = *aa_lookup_perms(profile->policy.perms, state);
 		aa_apply_modes_to_perms(profile, &tmp);
 		aa_perms_accum(perms, &tmp);
 	}
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 1c72a61108d3f..505ef5848f7ce 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -315,48 +315,6 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms)
  */
 }
 
-static u32 map_other(u32 x)
-{
-	return ((x & 0x3) << 8) |	/* SETATTR/GETATTR */
-		((x & 0x1c) << 18) |	/* ACCEPT/BIND/LISTEN */
-		((x & 0x60) << 19);	/* SETOPT/GETOPT */
-}
-
-static u32 map_xbits(u32 x)
-{
-	return ((x & 0x1) << 7) |
-		((x & 0x7e) << 9);
-}
-
-void aa_compute_perms(struct aa_dfa *dfa, unsigned int state,
-		      struct aa_perms *perms)
-{
-	/* This mapping is convulated due to history.
-	 * v1-v4: only file perms
-	 * v5: added policydb which dropped in perm user conditional to
-	 *     gain new perm bits, but had to map around the xbits because
-	 *     the userspace compiler was still munging them.
-	 * v9: adds using the xbits in policydb because the compiler now
-	 *     supports treating policydb permission bits different.
-	 *     Unfortunately there is not way to force auditing on the
-	 *     perms represented by the xbits
-	 */
-	*perms = (struct aa_perms) {
-		.allow = dfa_user_allow(dfa, state) |
-			 map_xbits(dfa_user_xbits(dfa, state)),
-		.audit = dfa_user_audit(dfa, state),
-		.quiet = dfa_user_quiet(dfa, state) |
-			 map_xbits(dfa_other_xbits(dfa, state)),
-	};
-
-	/* for v5-v9 perm mapping in the policydb, the other set is used
-	 * to extend the general perm set
-	 */
-	perms->allow |= map_other(dfa_other_allow(dfa, state));
-	perms->audit |= map_other(dfa_other_audit(dfa, state));
-	perms->quiet |= map_other(dfa_other_quiet(dfa, state));
-}
-
 /**
  * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms
  * @accum - perms struct to accumulate into
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index f612472418033..1e978c2b1ee45 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -203,25 +203,6 @@ static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
 	return state;
 }
 
-/**
- * compute_mnt_perms - compute mount permission associated with @state
- * @dfa: dfa to match against (NOT NULL)
- * @state: state match finished in
- *
- * Returns: mount permissions
- */
-static struct aa_perms compute_mnt_perms(struct aa_dfa *dfa,
-					   unsigned int state)
-{
-	struct aa_perms perms = {
-		.allow = dfa_user_allow(dfa, state),
-		.audit = dfa_user_audit(dfa, state),
-		.quiet = dfa_user_quiet(dfa, state),
-	};
-
-	return perms;
-}
-
 static const char * const mnt_info_table[] = {
 	"match succeeded",
 	"failed mntpnt match",
@@ -236,50 +217,52 @@ static const char * const mnt_info_table[] = {
  * Returns 0 on success else element that match failed in, this is the
  * index into the mnt_info_table above
  */
-static int do_match_mnt(struct aa_dfa *dfa, unsigned int start,
+static int do_match_mnt(struct aa_policydb *policy, unsigned int start,
 			const char *mntpnt, const char *devname,
 			const char *type, unsigned long flags,
 			void *data, bool binary, struct aa_perms *perms)
 {
 	unsigned int state;
 
-	AA_BUG(!dfa);
+	AA_BUG(!policy);
+	AA_BUG(!policy->dfa);
+	AA_BUG(!policy->perms);
 	AA_BUG(!perms);
 
-	state = aa_dfa_match(dfa, start, mntpnt);
-	state = aa_dfa_null_transition(dfa, state);
+	state = aa_dfa_match(policy->dfa, start, mntpnt);
+	state = aa_dfa_null_transition(policy->dfa, state);
 	if (!state)
 		return 1;
 
 	if (devname)
-		state = aa_dfa_match(dfa, state, devname);
-	state = aa_dfa_null_transition(dfa, state);
+		state = aa_dfa_match(policy->dfa, state, devname);
+	state = aa_dfa_null_transition(policy->dfa, state);
 	if (!state)
 		return 2;
 
 	if (type)
-		state = aa_dfa_match(dfa, state, type);
-	state = aa_dfa_null_transition(dfa, state);
+		state = aa_dfa_match(policy->dfa, state, type);
+	state = aa_dfa_null_transition(policy->dfa, state);
 	if (!state)
 		return 3;
 
-	state = match_mnt_flags(dfa, state, flags);
+	state = match_mnt_flags(policy->dfa, state, flags);
 	if (!state)
 		return 4;
-	*perms = compute_mnt_perms(dfa, state);
+	*perms = *aa_lookup_perms(policy->perms, state);
 	if (perms->allow & AA_MAY_MOUNT)
 		return 0;
 
 	/* only match data if not binary and the DFA flags data is expected */
 	if (data && !binary && (perms->allow & AA_MNT_CONT_MATCH)) {
-		state = aa_dfa_null_transition(dfa, state);
+		state = aa_dfa_null_transition(policy->dfa, state);
 		if (!state)
 			return 4;
 
-		state = aa_dfa_match(dfa, state, data);
+		state = aa_dfa_match(policy->dfa, state, data);
 		if (!state)
 			return 5;
-		*perms = compute_mnt_perms(dfa, state);
+		*perms = *aa_lookup_perms(policy->perms, state);
 		if (perms->allow & AA_MAY_MOUNT)
 			return 0;
 	}
@@ -341,7 +324,7 @@ static int match_mnt_path_str(struct aa_profile *profile,
 	}
 
 	error = -EACCES;
-	pos = do_match_mnt(profile->policy.dfa,
+	pos = do_match_mnt(&profile->policy,
 			   profile->policy.start[AA_CLASS_MOUNT],
 			   mntpnt, devname, type, flags, data, binary, &perms);
 	if (pos) {
@@ -601,7 +584,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path,
 	state = aa_dfa_match(profile->policy.dfa,
 			     profile->policy.start[AA_CLASS_MOUNT],
 			     name);
-	perms = compute_mnt_perms(profile->policy.dfa, state);
+	perms = *aa_lookup_perms(profile->policy.perms, state);
 	if (AA_MAY_UMOUNT & ~perms.allow)
 		error = -EACCES;
 
@@ -672,7 +655,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 			     new_name);
 	state = aa_dfa_null_transition(profile->policy.dfa, state);
 	state = aa_dfa_match(profile->policy.dfa, state, old_name);
-	perms = compute_mnt_perms(profile->policy.dfa, state);
+	perms = *aa_lookup_perms(profile->policy.perms, state);
 
 	if (AA_MAY_PIVOTROOT & perms.allow)
 		error = 0;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index 7efe4d17273d9..88e8a7ea54c0c 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -125,7 +125,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 	buffer[1] = cpu_to_be16((u16) type);
 	state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer,
 				 4);
-	aa_compute_perms(profile->policy.dfa, state, &perms);
+	perms = *aa_lookup_perms(profile->policy.perms, state);
 	aa_apply_modes_to_perms(profile, &perms);
 
 	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index e2d23cd85cd20..6c3086e2c8201 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -233,7 +233,7 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_put_dfa(profile->xmatch);
 	kvfree(profile->xmatch_perms);
 	aa_put_dfa(profile->policy.dfa);
-
+	kvfree(profile->policy.perms);
 	if (profile->data) {
 		rht = profile->data;
 		profile->data = NULL;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 44910c201c492..ed063385a83b4 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -756,7 +756,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
 
 	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
 	/* DFAs are restricted from having a state_count of less than 2 */
-	table = kvzalloc(state_count * 2 * sizeof(struct aa_perms), GFP_KERNEL);
+	table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL);
 	if (!table)
 		return NULL;
 
@@ -789,6 +789,54 @@ static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
 	return perms_table;
 }
 
+static u32 map_other(u32 x)
+{
+	return ((x & 0x3) << 8) |	/* SETATTR/GETATTR */
+		((x & 0x1c) << 18) |	/* ACCEPT/BIND/LISTEN */
+		((x & 0x60) << 19);	/* SETOPT/GETOPT */
+}
+
+static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
+					   unsigned int state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = dfa_user_allow(dfa, state);
+	perms.audit = dfa_user_audit(dfa, state);
+	perms.quiet = dfa_user_quiet(dfa, state);
+
+	/* for v5 perm mapping in the policydb, the other set is used
+	 * to extend the general perm set
+	 */
+
+	perms.allow |= map_other(dfa_other_allow(dfa, state));
+	perms.audit |= map_other(dfa_other_audit(dfa, state));
+	perms.quiet |= map_other(dfa_other_quiet(dfa, state));
+
+	return perms;
+}
+
+static struct aa_perms *compute_perms(struct aa_dfa *dfa)
+{
+	int state;
+	int state_count;
+	struct aa_perms *table;
+
+	AA_BUG(!dfa);
+
+	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
+	table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	/* zero init so skip the trap state (state == 0) */
+	for (state = 1; state < state_count; state++)
+		table[state] = compute_perms_entry(dfa, state);
+
+	return table;
+}
+
 /**
  * unpack_profile - unpack a serialized profile
  * @e: serialized data extent information (NOT NULL)
@@ -986,6 +1034,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			goto fail;
 	} else
 		profile->policy.dfa = aa_get_dfa(nulldfa);
+	profile->policy.perms = compute_perms(profile->policy.dfa);
+	if (!profile->policy.perms) {
+		info = "failed to remap policydb permission table";
+		goto fail;
+	}
 
 	/* get file rules */
 	profile->file.dfa = unpack_dfa(e);
-- 
GitLab


From 53bdc46f4bdd20d477afb374767cabe627fd04ae Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 19 Nov 2020 10:37:48 -0800
Subject: [PATCH 0013/4122] apparmor: combine file_rules and aa_policydb into a
 single shared struct

file_rules and policydb are almost the same and will need the same
features in the future so combine them.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  3 ++-
 security/apparmor/domain.c         |  7 +++---
 security/apparmor/file.c           | 20 ++++++++-------
 security/apparmor/include/file.h   | 39 +++---------------------------
 security/apparmor/include/policy.h | 14 ++++++++---
 security/apparmor/policy.c         |  5 ++--
 security/apparmor/policy_unpack.c  | 11 +++++----
 7 files changed, 40 insertions(+), 59 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 117783779337d..1625fee17fc75 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -619,7 +619,8 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 		return;
 	if (profile->file.dfa && *match_str == AA_CLASS_FILE) {
 		dfa = profile->file.dfa;
-		state = aa_dfa_match_len(dfa, profile->file.start,
+		state = aa_dfa_match_len(dfa,
+					 profile->file.start[AA_CLASS_FILE],
 					 match_str + 1, match_len - 1);
 		if (state) {
 			struct path_cond cond = { };
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 4fcdcc0de48cc..819b7828cbc44 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -627,7 +627,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 {
 	struct aa_label *new = NULL;
 	const char *info = NULL, *name = NULL, *target = NULL;
-	unsigned int state = profile->file.start;
+	unsigned int state = profile->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	bool nonewprivs = false;
 	int error = 0;
@@ -723,7 +723,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 			  char *buffer, struct path_cond *cond,
 			  bool *secure_exec)
 {
-	unsigned int state = profile->file.start;
+	unsigned int state = profile->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	const char *xname = NULL, *info = "change_profile onexec";
 	int error = -EACCES;
@@ -1267,7 +1267,8 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
 
 	if (!error)
 		error = change_profile_perms(profile, target, stack, request,
-					     profile->file.start, perms);
+					     profile->file.start[AA_CLASS_FILE],
+					     perms);
 	if (error)
 		error = aa_audit_file(profile, perms, op, request, name,
 				      NULL, target, GLOBAL_ROOT_UID, info,
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 1227ae8391546..d2be851be4121 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -185,16 +185,16 @@ static int path_name(const char *op, struct aa_label *label,
  * Returns: a pointer to a file permission set
  */
 struct aa_perms default_perms = {};
-struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
+struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
 				 unsigned int state, struct path_cond *cond)
 {
-	if (!(file_rules->fperms_table))
+	if (!(file_rules->perms))
 		return &default_perms;
 
 	if (uid_eq(current_fsuid(), cond->uid))
-		return &(file_rules->fperms_table[state * 2]);
+		return &(file_rules->perms[state * 2]);
 
-	return &(file_rules->fperms_table[state * 2 + 1]);
+	return &(file_rules->perms[state * 2 + 1]);
 }
 
 /**
@@ -207,7 +207,7 @@ struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
  *
  * Returns: the final state in @dfa when beginning @start and walking @name
  */
-unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start,
+unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start,
 			  const char *name, struct path_cond *cond,
 			  struct aa_perms *perms)
 {
@@ -226,7 +226,8 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name,
 
 	if (profile_unconfined(profile))
 		return 0;
-	aa_str_perms(&(profile->file), profile->file.start, name, cond, perms);
+	aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE],
+		     name, cond, perms);
 	if (request & ~perms->allow)
 		e = -EACCES;
 	return aa_audit_file(profile, perms, op, request, name, NULL, NULL,
@@ -333,7 +334,8 @@ static int profile_path_link(struct aa_profile *profile,
 
 	error = -EACCES;
 	/* aa_str_perms - handles the case of the dfa being NULL */
-	state = aa_str_perms(&(profile->file), profile->file.start, lname,
+	state = aa_str_perms(&(profile->file),
+			     profile->file.start[AA_CLASS_FILE], lname,
 			     cond, &lperms);
 
 	if (!(lperms.allow & AA_MAY_LINK))
@@ -363,8 +365,8 @@ static int profile_path_link(struct aa_profile *profile,
 	/* Do link perm subset test requiring allowed permission on link are
 	 * a subset of the allowed permissions on target.
 	 */
-	aa_str_perms(&(profile->file), profile->file.start, tname, cond,
-		     &perms);
+	aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE],
+		     tname, cond, &perms);
 
 	/* AA_MAY_LINK is not considered in the subset test */
 	request = lperms.allow & ~AA_MAY_LINK;
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 1f9e54aa1adfe..736b8f6554044 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -17,6 +17,7 @@
 #include "match.h"
 #include "perms.h"
 
+struct aa_policydb;
 struct aa_profile;
 struct path;
 
@@ -164,29 +165,9 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms,
 		  const char *target, struct aa_label *tlabel, kuid_t ouid,
 		  const char *info, int error);
 
-/**
- * struct aa_file_rules - components used for file rule permissions
- * @dfa: dfa to match path names and conditionals against
- * @perms: permission table indexed by the matched state accept entry of @dfa
- * @trans: transition table for indexed by named x transitions
- *
- * File permission are determined by matching a path against @dfa and
- * then using the value of the accept entry for the matching state as
- * an index into @perms.  If a named exec transition is required it is
- * looked up in the transition table.
- */
-struct aa_file_rules {
-	unsigned int start;
-	struct aa_dfa *dfa;
-	/* struct perms perms; */
-	struct aa_domain trans;
-	/* TODO: add delegate table */
-	struct aa_perms *fperms_table;
-};
-
-struct aa_perms *aa_lookup_fperms(struct aa_file_rules *file_rules,
-				 unsigned int state, struct path_cond *cond);
-unsigned int aa_str_perms(struct aa_file_rules *file_rules, unsigned int start,
+struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
+				  unsigned int state, struct path_cond *cond);
+unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start,
 			  const char *name, struct path_cond *cond,
 			  struct aa_perms *perms);
 
@@ -205,18 +186,6 @@ int aa_file_perm(const char *op, struct aa_label *label, struct file *file,
 
 void aa_inherit_files(const struct cred *cred, struct files_struct *files);
 
-static inline void aa_free_fperms_table(struct aa_perms *fperms_table)
-{
-	if (fperms_table)
-		kvfree(fperms_table);
-}
-
-static inline void aa_free_file_rules(struct aa_file_rules *rules)
-{
-	aa_put_dfa(rules->dfa);
-	aa_free_domain_entries(&rules->trans);
-	aa_free_fperms_table(rules->fperms_table);
-}
 
 /**
  * aa_map_file_perms - map file flags to AppArmor permissions
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 0dec18cd95e56..9bafeb3847d59 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -75,13 +75,21 @@ enum profile_mode {
  * start: set of start states for the different classes of data
  */
 struct aa_policydb {
-	/* Generic policy DFA specific rule types will be subsections of it */
 	struct aa_dfa *dfa;
 	struct aa_perms *perms;
+	struct aa_domain trans;
 	unsigned int start[AA_CLASS_LAST + 1];
-
 };
 
+static inline void aa_destroy_policydb(struct aa_policydb *policy)
+{
+	aa_put_dfa(policy->dfa);
+	if (policy->perms)
+		kvfree(policy->perms);
+	aa_free_domain_entries(&policy->trans);
+
+}
+
 /* struct aa_data - generic data structure
  * key: name for retrieving this data
  * size: size of data in bytes
@@ -151,7 +159,7 @@ struct aa_profile {
 	int size;
 
 	struct aa_policydb policy;
-	struct aa_file_rules file;
+	struct aa_policydb file;
 	struct aa_caps caps;
 
 	int xattr_count;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 6c3086e2c8201..0814ee57a06bf 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -219,7 +219,7 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_put_ns(profile->ns);
 	kfree_sensitive(profile->rename);
 
-	aa_free_file_rules(&profile->file);
+	aa_destroy_policydb(&profile->file);
 	aa_free_cap_rules(&profile->caps);
 	aa_free_rlimit_rules(&profile->rlimits);
 
@@ -232,8 +232,7 @@ void aa_free_profile(struct aa_profile *profile)
 	kfree_sensitive(profile->dirname);
 	aa_put_dfa(profile->xmatch);
 	kvfree(profile->xmatch_perms);
-	aa_put_dfa(profile->policy.dfa);
-	kvfree(profile->policy.perms);
+	aa_destroy_policydb(&profile->policy);
 	if (profile->data) {
 		rht = profile->data;
 		profile->data = NULL;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index ed063385a83b4..726fa02026b57 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -1048,18 +1048,19 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		info = "failed to unpack profile file rules";
 		goto fail;
 	} else if (profile->file.dfa) {
-		if (!unpack_u32(e, &profile->file.start, "dfa_start"))
+		if (!unpack_u32(e, &profile->file.start[AA_CLASS_FILE],
+				"dfa_start"))
 			/* default start state */
-			profile->file.start = DFA_START;
+			profile->file.start[AA_CLASS_FILE] = DFA_START;
 	} else if (profile->policy.dfa &&
 		   profile->policy.start[AA_CLASS_FILE]) {
 		profile->file.dfa = aa_get_dfa(profile->policy.dfa);
-		profile->file.start = profile->policy.start[AA_CLASS_FILE];
+		profile->file.start[AA_CLASS_FILE] = profile->policy.start[AA_CLASS_FILE];
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
-	profile->file.fperms_table = compute_fperms(profile->file.dfa);
-	if (!profile->file.fperms_table) {
+	profile->file.perms = compute_fperms(profile->file.dfa);
+	if (!profile->file.perms) {
 		info = "failed to remap file permission table";
 		goto fail;
 	}
-- 
GitLab


From 048d49544455b3e3a535c4ec89057ea5ca8676f0 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 21 Nov 2020 01:42:40 -0800
Subject: [PATCH 0014/4122] apparmor: convert xmatch to using the new shared
 policydb struct

continue permission unification by converting xmatch to use the
policydb struct that is used by the other profile dfas.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c       |  2 +-
 security/apparmor/domain.c           | 22 ++++++++++++----------
 security/apparmor/include/apparmor.h |  1 +
 security/apparmor/include/policy.h   |  4 +---
 security/apparmor/policy.c           |  3 +--
 security/apparmor/policy_unpack.c    | 25 ++++++++++++-------------
 6 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 1625fee17fc75..a2d12b80592bd 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1095,7 +1095,7 @@ static int seq_profile_attach_show(struct seq_file *seq, void *v)
 	struct aa_profile *profile = labels_profile(label);
 	if (profile->attach)
 		seq_printf(seq, "%s\n", profile->attach);
-	else if (profile->xmatch)
+	else if (profile->xmatch.dfa)
 		seq_puts(seq, "<unknown>\n");
 	else
 		seq_printf(seq, "%s\n", profile->base.name);
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 819b7828cbc44..0df17fb236c78 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -321,7 +321,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 	might_sleep();
 
 	/* transition from exec match to xattr set */
-	state = aa_dfa_outofband_transition(profile->xmatch, state);
+	state = aa_dfa_outofband_transition(profile->xmatch.dfa, state);
 	d = bprm->file->f_path.dentry;
 
 	for (i = 0; i < profile->xattr_count; i++) {
@@ -335,18 +335,19 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 			 * that not present xattr can be distinguished from a 0
 			 * length value or rule that matches any value
 			 */
-			state = aa_dfa_null_transition(profile->xmatch, state);
+			state = aa_dfa_null_transition(profile->xmatch.dfa,
+						       state);
 			/* Check xattr value */
-			state = aa_dfa_match_len(profile->xmatch, state, value,
-						 size);
-			perm = profile->xmatch_perms[state].allow;
+			state = aa_dfa_match_len(profile->xmatch.dfa, state,
+						 value, size);
+			perm = profile->xmatch.perms[state].allow;
 			if (!(perm & MAY_EXEC)) {
 				ret = -EINVAL;
 				goto out;
 			}
 		}
 		/* transition to next element */
-		state = aa_dfa_outofband_transition(profile->xmatch, state);
+		state = aa_dfa_outofband_transition(profile->xmatch.dfa, state);
 		if (size < 0) {
 			/*
 			 * No xattr match, so verify if transition to
@@ -413,13 +414,14 @@ restart:
 		 * as another profile, signal a conflict and refuse to
 		 * match.
 		 */
-		if (profile->xmatch) {
+		if (profile->xmatch.dfa) {
 			unsigned int state, count;
 			u32 perm;
 
-			state = aa_dfa_leftmatch(profile->xmatch, DFA_START,
-						 name, &count);
-			perm = profile->xmatch_perms[state].allow;
+			state = aa_dfa_leftmatch(profile->xmatch.dfa,
+					profile->xmatch.start[AA_CLASS_XMATCH],
+					name, &count);
+			perm = profile->xmatch.perms[state].allow;
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
 				int ret = 0;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index dd2c131ed1706..8fd66a4ca0b86 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -26,6 +26,7 @@
 #define AA_CLASS_MOUNT		7
 #define AA_CLASS_PTRACE		9
 #define AA_CLASS_SIGNAL		10
+#define AA_CLASS_XMATCH		11
 #define AA_CLASS_NET		14
 #define AA_CLASS_LABEL		16
 #define AA_CLASS_POSIX_MQUEUE	17
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 9bafeb3847d59..44d8cbb1c3685 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -113,7 +113,6 @@ struct aa_data {
  * @attach: human readable attachment string
  * @xmatch: optional extended matching for unconfined executables names
  * @xmatch_len: xmatch prefix len, used to determine xmatch priority
- * @xmatch_perms: precomputed permissions for the xmatch DFA indexed by state
  * @audit: the auditing mode of the profile
  * @mode: the enforcement mode of the profile
  * @path_flags: flags controlling path generation behavior
@@ -148,9 +147,8 @@ struct aa_profile {
 	const char *rename;
 
 	const char *attach;
-	struct aa_dfa *xmatch;
+	struct aa_policydb xmatch;
 	unsigned int xmatch_len;
-	struct aa_perms *xmatch_perms;
 
 	enum audit_mode audit;
 	long mode;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 0814ee57a06bf..cdcf26c9bed57 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -230,8 +230,7 @@ void aa_free_profile(struct aa_profile *profile)
 		kfree_sensitive(profile->secmark[i].label);
 	kfree_sensitive(profile->secmark);
 	kfree_sensitive(profile->dirname);
-	aa_put_dfa(profile->xmatch);
-	kvfree(profile->xmatch_perms);
+	aa_destroy_policydb(&profile->xmatch);
 	aa_destroy_policydb(&profile->policy);
 	if (profile->data) {
 		rht = profile->data;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 726fa02026b57..f2a075986e49f 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -771,7 +771,7 @@ static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
 
 static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
 {
-	struct aa_perms *perms_table;
+	struct aa_perms *perms;
 	int state;
 	int state_count;
 
@@ -779,14 +779,13 @@ static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
 
 	state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
 	/* DFAs are restricted from having a state_count of less than 2 */
-	  perms_table = kvcalloc(state_count, sizeof(struct aa_perms),
-			       GFP_KERNEL);
+	perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
 
 	/* zero init so skip the trap state (state == 0) */
 	for (state = 1; state < state_count; state++)
-		perms_table[state].allow = dfa_user_allow(xmatch, state);
+		perms[state].allow = dfa_user_allow(xmatch, state);
 
-	return perms_table;
+	return perms;
 }
 
 static u32 map_other(u32 x)
@@ -888,23 +887,23 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	(void) unpack_str(e, &profile->attach, "attach");
 
 	/* xmatch is optional and may be NULL */
-	profile->xmatch = unpack_dfa(e);
-	if (IS_ERR(profile->xmatch)) {
-		error = PTR_ERR(profile->xmatch);
-		profile->xmatch = NULL;
+	profile->xmatch.dfa = unpack_dfa(e);
+	if (IS_ERR(profile->xmatch.dfa)) {
+		error = PTR_ERR(profile->xmatch.dfa);
+		profile->xmatch.dfa = NULL;
 		info = "bad xmatch";
 		goto fail;
 	}
 	/* neither xmatch_len not xmatch_perms are optional if xmatch is set */
-	if (profile->xmatch) {
+	if (profile->xmatch.dfa) {
 		if (!unpack_u32(e, &tmp, NULL)) {
 			info = "missing xmatch len";
 			goto fail;
 		}
 		profile->xmatch_len = tmp;
-
-		profile->xmatch_perms = compute_xmatch_perms(profile->xmatch);
-		if (!profile->xmatch_perms) {
+		profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START;
+		profile->xmatch.perms = compute_xmatch_perms(profile->xmatch.dfa);
+		if (!profile->xmatch.perms) {
 			info = "failed to convert xmatch permission table";
 			goto fail;
 		}
-- 
GitLab


From 7572fea31e3e5c4c19154ccc064eb1f83dfe1333 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 13 Nov 2020 01:46:23 -0800
Subject: [PATCH 0015/4122] apparmor: convert fperm lookup to use accept as an
 index

Remap file dfa accept table from embedded perms to index and then move
fperm lookup to use the accept entry as an index into the fperm table.

This is a step toward unifying permission lookup.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c          |  6 ++--
 security/apparmor/policy_unpack.c | 57 ++++++++++++++++++++++---------
 2 files changed, 44 insertions(+), 19 deletions(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index d2be851be4121..7bddec3df75f7 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -188,13 +188,15 @@ struct aa_perms default_perms = {};
 struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
 				 unsigned int state, struct path_cond *cond)
 {
+	unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state];
+
 	if (!(file_rules->perms))
 		return &default_perms;
 
 	if (uid_eq(current_fsuid(), cond->uid))
-		return &(file_rules->perms[state * 2]);
+		return &(file_rules->perms[index]);
 
-	return &(file_rules->perms[state * 2 + 1]);
+	return &(file_rules->perms[index + 1]);
 }
 
 /**
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index f2a075986e49f..4cf62c1be388f 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -836,6 +836,29 @@ static struct aa_perms *compute_perms(struct aa_dfa *dfa)
 	return table;
 }
 
+/**
+ * remap_dfa_accept - remap old dfa accept table to be an index
+ * @dfa: dfa to do the remapping on
+ * @factor: scaling factor for the index conversion.
+ *
+ * Used in conjunction with compute_Xperms, it converts old style perms
+ * that are encoded in the dfa accept tables to the new style where
+ * there is a permission table and the accept table is an index into
+ * the permission table.
+ */
+static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor)
+{
+	unsigned int state;
+	unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+
+	AA_BUG(!dfa);
+
+	for (state = 0; state < state_count; state++)
+		ACCEPT_TABLE(dfa)[state] = state * factor;
+	kvfree(dfa->tables[YYTD_ID_ACCEPT2]);
+	dfa->tables[YYTD_ID_ACCEPT2] = NULL;
+}
+
 /**
  * unpack_profile - unpack a serialized profile
  * @e: serialized data extent information (NOT NULL)
@@ -1051,6 +1074,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 				"dfa_start"))
 			/* default start state */
 			profile->file.start[AA_CLASS_FILE] = DFA_START;
+		profile->file.perms = compute_fperms(profile->file.dfa);
+		if (!profile->file.perms) {
+			info = "failed to remap file permission table";
+			goto fail;
+		}
+		remap_dfa_accept(profile->file.dfa, 2);
+		if (!unpack_trans_table(e, profile)) {
+			info = "failed to unpack profile transition table";
+			goto fail;
+		}
 	} else if (profile->policy.dfa &&
 		   profile->policy.start[AA_CLASS_FILE]) {
 		profile->file.dfa = aa_get_dfa(profile->policy.dfa);
@@ -1058,16 +1091,6 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		profile->file.dfa = aa_get_dfa(nulldfa);
 
-	profile->file.perms = compute_fperms(profile->file.dfa);
-	if (!profile->file.perms) {
-		info = "failed to remap file permission table";
-		goto fail;
-	}
-	if (!unpack_trans_table(e, profile)) {
-		info = "failed to unpack profile transition table";
-		goto fail;
-	}
-
 	if (unpack_nameX(e, AA_STRUCT, "data")) {
 		info = "out of memory";
 		profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL);
@@ -1198,9 +1221,7 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size)
 {
 	int i;
 	for (i = 0; i < dfa->tables[YYTD_ID_ACCEPT]->td_lolen; i++) {
-		if (!verify_xindex(dfa_user_xindex(dfa, i), table_size))
-			return false;
-		if (!verify_xindex(dfa_other_xindex(dfa, i), table_size))
+		if (!verify_xindex(ACCEPT_TABLE(dfa)[i], table_size))
 			return false;
 	}
 	return true;
@@ -1211,14 +1232,16 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size)
  * @profile: profile to verify (NOT NULL)
  *
  * Returns: 0 if passes verification else error
+ *
+ * This verification is post any unpack mapping or changes
  */
 static int verify_profile(struct aa_profile *profile)
 {
 	if (profile->file.dfa &&
-	    !verify_dfa_xindex(profile->file.dfa,
-			       profile->file.trans.size)) {
-		audit_iface(profile, NULL, NULL, "Invalid named transition",
-			    NULL, -EPROTO);
+	     !verify_dfa_xindex(profile->file.dfa,
+				profile->file.trans.size)) {
+		audit_iface(profile, NULL, NULL,
+			    "Unpack: Invalid named transition", NULL, -EPROTO);
 		return -EPROTO;
 	}
 
-- 
GitLab


From 2d63dd43ae334ec6f5374d37bb06c4cc57621b3c Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 13 Nov 2020 23:36:09 -0800
Subject: [PATCH 0016/4122] apparmor: convert xmatch lookup to use accept as an
 index

Remap xmatch dfa accept table from embedded perms to an index and then
move xmatch lookup to use accept entry to index into the xmatch table.

This is step towards unifying permission lookup and reducing the
size of permissions tables.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c        | 10 ++++++----
 security/apparmor/policy_unpack.c |  1 +
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 0df17fb236c78..45a8887021f10 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -328,7 +328,7 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 		size = vfs_getxattr_alloc(&init_user_ns, d, profile->xattrs[i],
 					  &value, value_size, GFP_KERNEL);
 		if (size >= 0) {
-			u32 perm;
+			u32 index, perm;
 
 			/*
 			 * Check the xattr presence before value. This ensure
@@ -340,7 +340,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 			/* Check xattr value */
 			state = aa_dfa_match_len(profile->xmatch.dfa, state,
 						 value, size);
-			perm = profile->xmatch.perms[state].allow;
+			index = ACCEPT_TABLE(profile->xmatch.dfa)[state];
+			perm = profile->xmatch.perms[index].allow;
 			if (!(perm & MAY_EXEC)) {
 				ret = -EINVAL;
 				goto out;
@@ -416,12 +417,13 @@ restart:
 		 */
 		if (profile->xmatch.dfa) {
 			unsigned int state, count;
-			u32 perm;
+			u32 index, perm;
 
 			state = aa_dfa_leftmatch(profile->xmatch.dfa,
 					profile->xmatch.start[AA_CLASS_XMATCH],
 					name, &count);
-			perm = profile->xmatch.perms[state].allow;
+			index = ACCEPT_TABLE(profile->xmatch.dfa)[state];
+			perm = profile->xmatch.perms[index].allow;
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
 				int ret = 0;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 4cf62c1be388f..4cdc969887832 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -930,6 +930,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			info = "failed to convert xmatch permission table";
 			goto fail;
 		}
+		remap_dfa_accept(profile->xmatch.dfa, 1);
 	}
 
 	/* disconnected attachment string is optional */
-- 
GitLab


From bf690f59d0429c62de4db1234f16557eedcb39bf Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 10 Apr 2021 02:09:44 -0700
Subject: [PATCH 0017/4122] apparmor: cleanup shared permission struct

The shared permissions struct has the stop field which is unneeded
and the "reserved" subtree field commented which is needed. Also
reorganize so that the entries are logically grouped.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/perms.h | 17 +++++++----------
 security/apparmor/lib.c           |  4 ++--
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index de9631edb1ff9..1f3e7680e8092 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -65,22 +65,19 @@ extern const char *aa_file_perm_names[];
 
 struct aa_perms {
 	u32 allow;
-	u32 audit;	/* set only when allow is set */
-
 	u32 deny;	/* explicit deny, or conflict if allow also set */
-	u32 quiet;	/* set only when ~allow | deny */
-	u32 kill;	/* set only when ~allow | deny */
-	u32 stop;	/* set only when ~allow | deny */
 
-	u32 complain;	/* accumulates only used when ~allow & ~deny */
+	u32 subtree;	/* allow perm on full subtree only when allow is set */
 	u32 cond;	/* set only when ~allow and ~deny */
 
-	u32 hide;	/* set only when  ~allow | deny */
+	u32 kill;	/* set only when ~allow | deny */
+	u32 complain;	/* accumulates only used when ~allow & ~deny */
 	u32 prompt;	/* accumulates only used when ~allow & ~deny */
 
-	/* Reserved:
-	 * u32 subtree;	/ * set only when allow is set * /
-	 */
+	u32 audit;	/* set only when allow is set */
+	u32 quiet;	/* set only when ~allow | deny */
+	u32 hide;	/* set only when  ~allow | deny */
+
 	u16 xindex;
 };
 
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 505ef5848f7ce..974a217218a6e 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -327,11 +327,11 @@ void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend)
 	accum->audit |= addend->audit & addend->allow;
 	accum->quiet &= addend->quiet & ~addend->allow;
 	accum->kill |= addend->kill & ~addend->allow;
-	accum->stop |= addend->stop & ~addend->allow;
 	accum->complain |= addend->complain & ~addend->allow & ~addend->deny;
 	accum->cond |= addend->cond & ~addend->allow & ~addend->deny;
 	accum->hide &= addend->hide & ~addend->allow;
 	accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny;
+	accum->subtree |= addend->subtree & ~addend->deny;
 }
 
 /**
@@ -346,11 +346,11 @@ void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend)
 	accum->audit |= addend->audit & accum->allow;
 	accum->quiet &= addend->quiet & ~accum->allow;
 	accum->kill |= addend->kill & ~accum->allow;
-	accum->stop |= addend->stop & ~accum->allow;
 	accum->complain |= addend->complain & ~accum->allow & ~accum->deny;
 	accum->cond |= addend->cond & ~accum->allow & ~accum->deny;
 	accum->hide &= addend->hide & ~accum->allow;
 	accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny;
+	accum->subtree &= addend->subtree & ~accum->deny;
 }
 
 void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
-- 
GitLab


From e844fe9b51c984472ea98be3b2d1201ba9ee3213 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 16 Jul 2022 01:53:46 -0700
Subject: [PATCH 0018/4122] apparmor: convert policy lookup to use accept as an
 index

Remap polidydb dfa accept table from embedded perms to an index, and
then move the perm lookup to use the accept entry as an index into the
perm table. This is done so that the perm table can be separated from
the dfa, allowing dfa accept to index to share expanded permission
sets.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  2 +-
 security/apparmor/include/perms.h  |  8 --------
 security/apparmor/include/policy.h | 12 ++++++++++++
 security/apparmor/label.c          |  6 +++---
 security/apparmor/mount.c          |  8 ++++----
 security/apparmor/net.c            |  2 +-
 security/apparmor/policy_unpack.c  | 19 ++++++++++++-------
 7 files changed, 33 insertions(+), 24 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index a2d12b80592bd..f2b78108bae81 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -634,7 +634,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 		state = aa_dfa_match_len(dfa, profile->policy.start[0],
 					 match_str, match_len);
 		if (state)
-			tmp = *aa_lookup_perms(profile->policy.perms, state);
+			tmp = *aa_lookup_perms(&profile->policy, state);
 	}
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum_raw(perms, &tmp);
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 1f3e7680e8092..1014a7bbc0278 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -132,14 +132,6 @@ extern struct aa_perms allperms;
 
 extern struct aa_perms default_perms;
 
-static inline struct aa_perms *aa_lookup_perms(struct aa_perms *perms,
-					       unsigned int state)
-{
-	if (!(perms))
-		return &default_perms;
-
-	return &(perms[state]);
-}
 
 void aa_perm_mask_to_str(char *str, size_t str_size, const char *chrs,
 			 u32 mask);
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 44d8cbb1c3685..31c0af8762505 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -90,6 +90,18 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy)
 
 }
 
+static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy,
+					       unsigned int state)
+{
+	unsigned int index = ACCEPT_TABLE(policy->dfa)[state];
+
+	if (!(policy->perms))
+		return &default_perms;
+
+	return &(policy->perms[index]);
+}
+
+
 /* struct aa_data - generic data structure
  * key: name for retrieving this data
  * size: size of data in bytes
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index ddb04417bdabf..30cb68641c0f5 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1328,7 +1328,7 @@ next:
 		if (!state)
 			goto fail;
 	}
-	*perms = *aa_lookup_perms(profile->policy.perms, state);
+	*perms = *aa_lookup_perms(&profile->policy, state);
 	aa_apply_modes_to_perms(profile, perms);
 	if ((perms->allow & request) != request)
 		return -EACCES;
@@ -1379,7 +1379,7 @@ static int label_components_match(struct aa_profile *profile,
 	return 0;
 
 next:
-	tmp = *aa_lookup_perms(profile->policy.perms, state);
+	tmp = *aa_lookup_perms(&profile->policy, state);
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum(perms, &tmp);
 	label_for_each_cont(i, label, tp) {
@@ -1388,7 +1388,7 @@ next:
 		state = match_component(profile, tp, start);
 		if (!state)
 			goto fail;
-		tmp = *aa_lookup_perms(profile->policy.perms, state);
+		tmp = *aa_lookup_perms(&profile->policy, state);
 		aa_apply_modes_to_perms(profile, &tmp);
 		aa_perms_accum(perms, &tmp);
 	}
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 1e978c2b1ee45..7594f3a3441e5 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -249,7 +249,7 @@ static int do_match_mnt(struct aa_policydb *policy, unsigned int start,
 	state = match_mnt_flags(policy->dfa, state, flags);
 	if (!state)
 		return 4;
-	*perms = *aa_lookup_perms(policy->perms, state);
+	*perms = *aa_lookup_perms(policy, state);
 	if (perms->allow & AA_MAY_MOUNT)
 		return 0;
 
@@ -262,7 +262,7 @@ static int do_match_mnt(struct aa_policydb *policy, unsigned int start,
 		state = aa_dfa_match(policy->dfa, state, data);
 		if (!state)
 			return 5;
-		*perms = *aa_lookup_perms(policy->perms, state);
+		*perms = *aa_lookup_perms(policy, state);
 		if (perms->allow & AA_MAY_MOUNT)
 			return 0;
 	}
@@ -584,7 +584,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path,
 	state = aa_dfa_match(profile->policy.dfa,
 			     profile->policy.start[AA_CLASS_MOUNT],
 			     name);
-	perms = *aa_lookup_perms(profile->policy.perms, state);
+	perms = *aa_lookup_perms(&profile->policy, state);
 	if (AA_MAY_UMOUNT & ~perms.allow)
 		error = -EACCES;
 
@@ -655,7 +655,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 			     new_name);
 	state = aa_dfa_null_transition(profile->policy.dfa, state);
 	state = aa_dfa_match(profile->policy.dfa, state, old_name);
-	perms = *aa_lookup_perms(profile->policy.perms, state);
+	perms = *aa_lookup_perms(&profile->policy, state);
 
 	if (AA_MAY_PIVOTROOT & perms.allow)
 		error = 0;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index 88e8a7ea54c0c..fcfb97079e1be 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -125,7 +125,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 	buffer[1] = cpu_to_be16((u16) type);
 	state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer,
 				 4);
-	perms = *aa_lookup_perms(profile->policy.perms, state);
+	perms = *aa_lookup_perms(&profile->policy, state);
 	aa_apply_modes_to_perms(profile, &perms);
 
 	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 4cdc969887832..0917412ba48f4 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -1055,13 +1055,15 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
+		profile->policy.perms = compute_perms(profile->policy.dfa);
+		if (!profile->policy.perms) {
+			info = "failed to remap policydb permission table";
+			goto fail;
+		}
+		/* Do not remap internal dfas */
+		remap_dfa_accept(profile->policy.dfa, 1);
 	} else
 		profile->policy.dfa = aa_get_dfa(nulldfa);
-	profile->policy.perms = compute_perms(profile->policy.dfa);
-	if (!profile->policy.perms) {
-		info = "failed to remap policydb permission table";
-		goto fail;
-	}
 
 	/* get file rules */
 	profile->file.dfa = unpack_dfa(e);
@@ -1238,9 +1240,12 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size)
  */
 static int verify_profile(struct aa_profile *profile)
 {
-	if (profile->file.dfa &&
+	if ((profile->file.dfa &&
 	     !verify_dfa_xindex(profile->file.dfa,
-				profile->file.trans.size)) {
+				profile->file.trans.size)) ||
+	    (profile->policy.dfa &&
+	     !verify_dfa_xindex(profile->policy.dfa,
+				profile->policy.trans.size))) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid named transition", NULL, -EPROTO);
 		return -EPROTO;
-- 
GitLab


From 33fc95d8293cfca352ac875668857293e22d7d51 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 17 Jan 2022 13:43:49 -0800
Subject: [PATCH 0019/4122] apparmor: preparse for state being more than just
 an integer

Convert from an unsigned int to a state_t for state position. This is
a step in prepping for the state position carrying some additional
flags, and a limited form of backtracking to support variables.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  2 +-
 security/apparmor/domain.c         | 25 ++++++-------
 security/apparmor/file.c           | 12 +++----
 security/apparmor/include/file.h   |  8 ++---
 security/apparmor/include/label.h  |  6 ++--
 security/apparmor/include/lib.h    |  4 +--
 security/apparmor/include/match.h  | 28 +++++++--------
 security/apparmor/include/policy.h | 14 ++++----
 security/apparmor/ipc.c            |  2 +-
 security/apparmor/label.c          | 14 ++++----
 security/apparmor/lib.c            |  2 +-
 security/apparmor/match.c          | 58 +++++++++++++++---------------
 security/apparmor/mount.c          | 10 +++---
 security/apparmor/net.c            |  2 +-
 security/apparmor/policy_unpack.c  | 16 ++++-----
 15 files changed, 101 insertions(+), 102 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index f2b78108bae81..fb9d2ccb34d60 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -613,7 +613,7 @@ static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 {
 	struct aa_perms tmp = { };
 	struct aa_dfa *dfa;
-	unsigned int state = 0;
+	aa_state_t state = DFA_NOMATCH;
 
 	if (profile_unconfined(profile))
 		return;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 45a8887021f10..5883f0fc02d3d 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -95,9 +95,9 @@ out:
  * If a subns profile is not to be matched should be prescreened with
  * visibility test.
  */
-static inline unsigned int match_component(struct aa_profile *profile,
-					   struct aa_profile *tp,
-					   bool stack, unsigned int state)
+static inline aa_state_t match_component(struct aa_profile *profile,
+					 struct aa_profile *tp,
+					 bool stack, aa_state_t state)
 {
 	const char *ns_name;
 
@@ -132,7 +132,7 @@ static inline unsigned int match_component(struct aa_profile *profile,
  */
 static int label_compound_match(struct aa_profile *profile,
 				struct aa_label *label, bool stack,
-				unsigned int state, bool subns, u32 request,
+				aa_state_t state, bool subns, u32 request,
 				struct aa_perms *perms)
 {
 	struct aa_profile *tp;
@@ -192,14 +192,14 @@ fail:
  */
 static int label_components_match(struct aa_profile *profile,
 				  struct aa_label *label, bool stack,
-				  unsigned int start, bool subns, u32 request,
+				  aa_state_t start, bool subns, u32 request,
 				  struct aa_perms *perms)
 {
 	struct aa_profile *tp;
 	struct label_it i;
 	struct aa_perms tmp;
 	struct path_cond cond = { };
-	unsigned int state = 0;
+	aa_state_t state = 0;
 
 	/* find first subcomponent to test */
 	label_for_each(i, label, tp) {
@@ -252,7 +252,7 @@ fail:
  * Returns: the state the match finished in, may be the none matching state
  */
 static int label_match(struct aa_profile *profile, struct aa_label *label,
-		       bool stack, unsigned int state, bool subns, u32 request,
+		       bool stack, aa_state_t state, bool subns, u32 request,
 		       struct aa_perms *perms)
 {
 	int error;
@@ -286,7 +286,7 @@ static int label_match(struct aa_profile *profile, struct aa_label *label,
  */
 static int change_profile_perms(struct aa_profile *profile,
 				struct aa_label *target, bool stack,
-				u32 request, unsigned int start,
+				u32 request, aa_state_t start,
 				struct aa_perms *perms)
 {
 	if (profile_unconfined(profile)) {
@@ -308,7 +308,7 @@ static int change_profile_perms(struct aa_profile *profile,
  * Returns: number of extended attributes that matched, or < 0 on error
  */
 static int aa_xattrs_match(const struct linux_binprm *bprm,
-			   struct aa_profile *profile, unsigned int state)
+			   struct aa_profile *profile, aa_state_t state)
 {
 	int i;
 	ssize_t size;
@@ -416,7 +416,8 @@ restart:
 		 * match.
 		 */
 		if (profile->xmatch.dfa) {
-			unsigned int state, count;
+			unsigned int count;
+			aa_state_t state;
 			u32 index, perm;
 
 			state = aa_dfa_leftmatch(profile->xmatch.dfa,
@@ -631,7 +632,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 {
 	struct aa_label *new = NULL;
 	const char *info = NULL, *name = NULL, *target = NULL;
-	unsigned int state = profile->file.start[AA_CLASS_FILE];
+	aa_state_t state = profile->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	bool nonewprivs = false;
 	int error = 0;
@@ -727,7 +728,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 			  char *buffer, struct path_cond *cond,
 			  bool *secure_exec)
 {
-	unsigned int state = profile->file.start[AA_CLASS_FILE];
+	aa_state_t state = profile->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	const char *xname = NULL, *info = "change_profile onexec";
 	int error = -EACCES;
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 7bddec3df75f7..636efcade3f58 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -186,7 +186,7 @@ static int path_name(const char *op, struct aa_label *label,
  */
 struct aa_perms default_perms = {};
 struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
-				 unsigned int state, struct path_cond *cond)
+				 aa_state_t state, struct path_cond *cond)
 {
 	unsigned int index = ACCEPT_TABLE(file_rules->dfa)[state];
 
@@ -209,11 +209,11 @@ struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
  *
  * Returns: the final state in @dfa when beginning @start and walking @name
  */
-unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start,
-			  const char *name, struct path_cond *cond,
-			  struct aa_perms *perms)
+aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
+			const char *name, struct path_cond *cond,
+			struct aa_perms *perms)
 {
-	unsigned int state;
+	aa_state_t state;
 	state = aa_dfa_match(file_rules->dfa, start, name);
 	*perms = *(aa_lookup_fperms(file_rules, state, cond));
 
@@ -320,7 +320,7 @@ static int profile_path_link(struct aa_profile *profile,
 	struct aa_perms lperms = {}, perms;
 	const char *info = NULL;
 	u32 request = AA_MAY_LINK;
-	unsigned int state;
+	aa_state_t state;
 	int error;
 
 	error = path_name(OP_LINK, &profile->label, link, profile->path_flags,
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 736b8f6554044..8c82cf279dc2a 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -166,10 +166,10 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms,
 		  const char *info, int error);
 
 struct aa_perms *aa_lookup_fperms(struct aa_policydb *file_rules,
-				  unsigned int state, struct path_cond *cond);
-unsigned int aa_str_perms(struct aa_policydb *file_rules, unsigned int start,
-			  const char *name, struct path_cond *cond,
-			  struct aa_perms *perms);
+				  aa_state_t state, struct path_cond *cond);
+aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
+			const char *name, struct path_cond *cond,
+			struct aa_perms *perms);
 
 int __aa_path_perm(const char *op, struct aa_profile *profile,
 		   const char *name, u32 request, struct path_cond *cond,
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 860484c6f99a6..1130ba10a1526 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -333,7 +333,7 @@ struct aa_label *aa_label_parse(struct aa_label *base, const char *str,
 static inline const char *aa_label_strn_split(const char *str, int n)
 {
 	const char *pos;
-	unsigned int state;
+	aa_state_t state;
 
 	state = aa_dfa_matchn_until(stacksplitdfa, DFA_START, str, n, &pos);
 	if (!ACCEPT_TABLE(stacksplitdfa)[state])
@@ -345,7 +345,7 @@ static inline const char *aa_label_strn_split(const char *str, int n)
 static inline const char *aa_label_str_split(const char *str)
 {
 	const char *pos;
-	unsigned int state;
+	aa_state_t state;
 
 	state = aa_dfa_match_until(stacksplitdfa, DFA_START, str, &pos);
 	if (!ACCEPT_TABLE(stacksplitdfa)[state])
@@ -358,7 +358,7 @@ static inline const char *aa_label_str_split(const char *str)
 
 struct aa_perms;
 int aa_label_match(struct aa_profile *profile, struct aa_label *label,
-		   unsigned int state, bool subns, u32 request,
+		   aa_state_t state, bool subns, u32 request,
 		   struct aa_perms *perms);
 
 
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index f42359f58eb58..f176f3ced2a36 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -87,8 +87,8 @@ static inline bool aa_strneq(const char *str, const char *sub, int len)
  * character which is not used in standard matching and is only
  * used to separate pairs.
  */
-static inline unsigned int aa_dfa_null_transition(struct aa_dfa *dfa,
-						  unsigned int start)
+static inline aa_state_t aa_dfa_null_transition(struct aa_dfa *dfa,
+						aa_state_t start)
 {
 	/* the null transition only needs the string's null terminator byte */
 	return aa_dfa_next(dfa, start, 0);
diff --git a/security/apparmor/include/match.h b/security/apparmor/include/match.h
index 8844895905881..58fbf67139b9c 100644
--- a/security/apparmor/include/match.h
+++ b/security/apparmor/include/match.h
@@ -125,19 +125,19 @@ static inline size_t table_size(size_t len, size_t el_size)
 int aa_setup_dfa_engine(void);
 void aa_teardown_dfa_engine(void);
 
+#define aa_state_t unsigned int
+
 struct aa_dfa *aa_dfa_unpack(void *blob, size_t size, int flags);
-unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start,
-			      const char *str, int len);
-unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
-			  const char *str);
-unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
-			 const char c);
-unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa,
-					 unsigned int state);
-unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
-				const char *str, const char **retpos);
-unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start,
-				 const char *str, int n, const char **retpos);
+aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start,
+			    const char *str, int len);
+aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start,
+			const char *str);
+aa_state_t aa_dfa_next(struct aa_dfa *dfa, aa_state_t state, const char c);
+aa_state_t aa_dfa_outofband_transition(struct aa_dfa *dfa, aa_state_t state);
+aa_state_t aa_dfa_match_until(struct aa_dfa *dfa, aa_state_t start,
+			      const char *str, const char **retpos);
+aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start,
+			       const char *str, int n, const char **retpos);
 
 void aa_dfa_free_kref(struct kref *kref);
 
@@ -156,8 +156,8 @@ struct match_workbuf N = {		\
 	.len = 0,			\
 }
 
-unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start,
-			      const char *str, unsigned int *count);
+aa_state_t aa_dfa_leftmatch(struct aa_dfa *dfa, aa_state_t start,
+			    const char *str, unsigned int *count);
 
 /**
  * aa_get_dfa - increment refcount on dfa @p
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 31c0af8762505..3a7d165e8fcc1 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -78,7 +78,7 @@ struct aa_policydb {
 	struct aa_dfa *dfa;
 	struct aa_perms *perms;
 	struct aa_domain trans;
-	unsigned int start[AA_CLASS_LAST + 1];
+	aa_state_t start[AA_CLASS_LAST + 1];
 };
 
 static inline void aa_destroy_policydb(struct aa_policydb *policy)
@@ -91,7 +91,7 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy)
 }
 
 static inline struct aa_perms *aa_lookup_perms(struct aa_policydb *policy,
-					       unsigned int state)
+					       aa_state_t state)
 {
 	unsigned int index = ACCEPT_TABLE(policy->dfa)[state];
 
@@ -239,7 +239,7 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
 	return labels_profile(aa_get_newest_label(&p->label));
 }
 
-static inline unsigned int PROFILE_MEDIATES(struct aa_profile *profile,
+static inline aa_state_t PROFILE_MEDIATES(struct aa_profile *profile,
 					    unsigned char class)
 {
 	if (class <= AA_CLASS_LAST)
@@ -249,13 +249,13 @@ static inline unsigned int PROFILE_MEDIATES(struct aa_profile *profile,
 					profile->policy.start[0], &class, 1);
 }
 
-static inline unsigned int PROFILE_MEDIATES_AF(struct aa_profile *profile,
-					       u16 AF) {
-	unsigned int state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+static inline aa_state_t PROFILE_MEDIATES_AF(struct aa_profile *profile,
+					     u16 AF) {
+	aa_state_t state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
 	__be16 be_af = cpu_to_be16(AF);
 
 	if (!state)
-		return 0;
+		return DFA_NOMATCH;
 	return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
 }
 
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 3dbbc59d440da..7255a9d523728 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -79,7 +79,7 @@ static int profile_signal_perm(struct aa_profile *profile,
 			       struct common_audit_data *sa)
 {
 	struct aa_perms perms;
-	unsigned int state;
+	aa_state_t state;
 
 	if (profile_unconfined(profile) ||
 	    !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 30cb68641c0f5..3a967003fa7c7 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1265,9 +1265,9 @@ static inline bool label_is_visible(struct aa_profile *profile,
  * If a subns profile is not to be matched should be prescreened with
  * visibility test.
  */
-static inline unsigned int match_component(struct aa_profile *profile,
-					   struct aa_profile *tp,
-					   unsigned int state)
+static inline aa_state_t match_component(struct aa_profile *profile,
+					 struct aa_profile *tp,
+					 aa_state_t state)
 {
 	const char *ns_name;
 
@@ -1299,7 +1299,7 @@ static inline unsigned int match_component(struct aa_profile *profile,
  */
 static int label_compound_match(struct aa_profile *profile,
 				struct aa_label *label,
-				unsigned int state, bool subns, u32 request,
+				aa_state_t state, bool subns, u32 request,
 				struct aa_perms *perms)
 {
 	struct aa_profile *tp;
@@ -1356,14 +1356,14 @@ fail:
  *        check to be stacked.
  */
 static int label_components_match(struct aa_profile *profile,
-				  struct aa_label *label, unsigned int start,
+				  struct aa_label *label, aa_state_t start,
 				  bool subns, u32 request,
 				  struct aa_perms *perms)
 {
 	struct aa_profile *tp;
 	struct label_it i;
 	struct aa_perms tmp;
-	unsigned int state = 0;
+	aa_state_t state = 0;
 
 	/* find first subcomponent to test */
 	label_for_each(i, label, tp) {
@@ -1415,7 +1415,7 @@ fail:
  * Returns: the state the match finished in, may be the none matching state
  */
 int aa_label_match(struct aa_profile *profile, struct aa_label *label,
-		   unsigned int state, bool subns, u32 request,
+		   aa_state_t state, bool subns, u32 request,
 		   struct aa_perms *perms)
 {
 	int error = label_compound_match(profile, label, state, subns, request,
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 974a217218a6e..60deb4dc30c7f 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -357,7 +357,7 @@ void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
 			    int type, u32 request, struct aa_perms *perms)
 {
 	/* TODO: doesn't yet handle extended types */
-	unsigned int state;
+	aa_state_t state;
 
 	state = aa_dfa_next(profile->policy.dfa,
 			    profile->policy.start[AA_CLASS_LABEL],
diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index 3e9e1eaf990ed..5095c26ca6836 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -436,17 +436,17 @@ do {							\
  *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start,
-			      const char *str, int len)
+aa_state_t aa_dfa_match_len(struct aa_dfa *dfa, aa_state_t start,
+			    const char *str, int len)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
-	unsigned int state = start;
+	aa_state_t state = start;
 
-	if (state == 0)
-		return 0;
+	if (state == DFA_NOMATCH)
+		return DFA_NOMATCH;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
@@ -476,17 +476,16 @@ unsigned int aa_dfa_match_len(struct aa_dfa *dfa, unsigned int start,
  *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
-			  const char *str)
+aa_state_t aa_dfa_match(struct aa_dfa *dfa, aa_state_t start, const char *str)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
-	unsigned int state = start;
+	aa_state_t state = start;
 
-	if (state == 0)
-		return 0;
+	if (state == DFA_NOMATCH)
+		return DFA_NOMATCH;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
@@ -515,8 +514,7 @@ unsigned int aa_dfa_match(struct aa_dfa *dfa, unsigned int start,
  *
  * Returns: state reach after input @c
  */
-unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
-			  const char c)
+aa_state_t aa_dfa_next(struct aa_dfa *dfa, aa_state_t state, const char c)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
@@ -534,7 +532,7 @@ unsigned int aa_dfa_next(struct aa_dfa *dfa, unsigned int state,
 	return state;
 }
 
-unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa, unsigned int state)
+aa_state_t aa_dfa_outofband_transition(struct aa_dfa *dfa, aa_state_t state)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
 	u32 *base = BASE_TABLE(dfa);
@@ -564,7 +562,7 @@ unsigned int aa_dfa_outofband_transition(struct aa_dfa *dfa, unsigned int state)
  *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
+aa_state_t aa_dfa_match_until(struct aa_dfa *dfa, aa_state_t start,
 				const char *str, const char **retpos)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
@@ -572,10 +570,10 @@ unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
 	u32 *accept = ACCEPT_TABLE(dfa);
-	unsigned int state = start, pos;
+	aa_state_t state = start, pos;
 
-	if (state == 0)
-		return 0;
+	if (state == DFA_NOMATCH)
+		return DFA_NOMATCH;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
@@ -625,7 +623,7 @@ unsigned int aa_dfa_match_until(struct aa_dfa *dfa, unsigned int start,
  *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start,
+aa_state_t aa_dfa_matchn_until(struct aa_dfa *dfa, aa_state_t start,
 				 const char *str, int n, const char **retpos)
 {
 	u16 *def = DEFAULT_TABLE(dfa);
@@ -633,11 +631,11 @@ unsigned int aa_dfa_matchn_until(struct aa_dfa *dfa, unsigned int start,
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
 	u32 *accept = ACCEPT_TABLE(dfa);
-	unsigned int state = start, pos;
+	aa_state_t state = start, pos;
 
 	*retpos = NULL;
-	if (state == 0)
-		return 0;
+	if (state == DFA_NOMATCH)
+		return DFA_NOMATCH;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
@@ -677,11 +675,11 @@ do {								\
 } while (0)
 
 /* For DFAs that don't support extended tagging of states */
-static bool is_loop(struct match_workbuf *wb, unsigned int state,
+static bool is_loop(struct match_workbuf *wb, aa_state_t state,
 		    unsigned int *adjust)
 {
-	unsigned int pos = wb->pos;
-	unsigned int i;
+	aa_state_t pos = wb->pos;
+	aa_state_t i;
 
 	if (wb->history[pos] < state)
 		return false;
@@ -700,7 +698,7 @@ static bool is_loop(struct match_workbuf *wb, unsigned int state,
 	return true;
 }
 
-static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start,
+static aa_state_t leftmatch_fb(struct aa_dfa *dfa, aa_state_t start,
 				 const char *str, struct match_workbuf *wb,
 				 unsigned int *count)
 {
@@ -708,7 +706,7 @@ static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start,
 	u32 *base = BASE_TABLE(dfa);
 	u16 *next = NEXT_TABLE(dfa);
 	u16 *check = CHECK_TABLE(dfa);
-	unsigned int state = start, pos;
+	aa_state_t state = start, pos;
 
 	AA_BUG(!dfa);
 	AA_BUG(!str);
@@ -716,8 +714,8 @@ static unsigned int leftmatch_fb(struct aa_dfa *dfa, unsigned int start,
 	AA_BUG(!count);
 
 	*count = 0;
-	if (state == 0)
-		return 0;
+	if (state == DFA_NOMATCH)
+		return DFA_NOMATCH;
 
 	/* current state is <state>, matching character *str */
 	if (dfa->tables[YYTD_ID_EC]) {
@@ -781,8 +779,8 @@ out:
  *
  * Returns: final state reached after input is consumed
  */
-unsigned int aa_dfa_leftmatch(struct aa_dfa *dfa, unsigned int start,
-			      const char *str, unsigned int *count)
+aa_state_t aa_dfa_leftmatch(struct aa_dfa *dfa, aa_state_t start,
+			    const char *str, unsigned int *count)
 {
 	DEFINE_MATCH_WB(wb);
 
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 7594f3a3441e5..84aaf25e5deef 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -190,7 +190,7 @@ static int audit_mount(struct aa_profile *profile, const char *op,
  *
  * Returns: next state after flags match
  */
-static unsigned int match_mnt_flags(struct aa_dfa *dfa, unsigned int state,
+static aa_state_t match_mnt_flags(struct aa_dfa *dfa, aa_state_t state,
 				    unsigned long flags)
 {
 	unsigned int i;
@@ -217,12 +217,12 @@ static const char * const mnt_info_table[] = {
  * Returns 0 on success else element that match failed in, this is the
  * index into the mnt_info_table above
  */
-static int do_match_mnt(struct aa_policydb *policy, unsigned int start,
+static int do_match_mnt(struct aa_policydb *policy, aa_state_t start,
 			const char *mntpnt, const char *devname,
 			const char *type, unsigned long flags,
 			void *data, bool binary, struct aa_perms *perms)
 {
-	unsigned int state;
+	aa_state_t state;
 
 	AA_BUG(!policy);
 	AA_BUG(!policy->dfa);
@@ -567,7 +567,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path,
 {
 	struct aa_perms perms = { };
 	const char *name = NULL, *info = NULL;
-	unsigned int state;
+	aa_state_t state;
 	int error;
 
 	AA_BUG(!profile);
@@ -627,7 +627,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 	const char *old_name, *new_name = NULL, *info = NULL;
 	const char *trans_name = NULL;
 	struct aa_perms perms = { };
-	unsigned int state;
+	aa_state_t state;
 	int error;
 
 	AA_BUG(!profile);
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index fcfb97079e1be..d420d3aec3b85 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -109,7 +109,7 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 		       u32 request, u16 family, int type)
 {
 	struct aa_perms perms = { };
-	unsigned int state;
+	aa_state_t state;
 	__be16 buffer[2];
 
 	AA_BUG(family >= AF_MAX);
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 0917412ba48f4..3ea591d31be7e 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -698,7 +698,7 @@ static u32 map_old_perms(u32 old)
 }
 
 static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa,
-				 unsigned int state)
+				 aa_state_t state)
 {
 	perms->allow |= AA_MAY_GETATTR;
 
@@ -710,7 +710,7 @@ static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa,
 }
 
 static struct aa_perms compute_fperms_user(struct aa_dfa *dfa,
-					   unsigned int state)
+					   aa_state_t state)
 {
 	struct aa_perms perms = { };
 
@@ -725,7 +725,7 @@ static struct aa_perms compute_fperms_user(struct aa_dfa *dfa,
 }
 
 static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
-					    unsigned int state)
+					    aa_state_t state)
 {
 	struct aa_perms perms = { };
 
@@ -748,8 +748,8 @@ static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
  */
 static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
 {
-	int state;
-	int state_count;
+	aa_state_t state;
+	unsigned int state_count;
 	struct aa_perms *table;
 
 	AA_BUG(!dfa);
@@ -796,7 +796,7 @@ static u32 map_other(u32 x)
 }
 
 static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
-					   unsigned int state)
+					   aa_state_t state)
 {
 	struct aa_perms perms = { };
 
@@ -817,8 +817,8 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
 
 static struct aa_perms *compute_perms(struct aa_dfa *dfa)
 {
-	int state;
-	int state_count;
+	unsigned int state;
+	unsigned int state_count;
 	struct aa_perms *table;
 
 	AA_BUG(!dfa);
-- 
GitLab


From 1b5a6198f5a9d0aa5497da0dc4bcd4fc166ee516 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 6 May 2022 18:57:12 -0700
Subject: [PATCH 0020/4122] apparmor: Fix abi check to include v8 abi

The v8 abi is supported by the kernel but the userspace supported
version check does not allow for it. This was missed when v8 was added
due to a bug in the userspace compiler which was setting an older abi
version for v8 encoding (which is forward compatible except on the
network encoding). However it is possible to detect the network
encoding by checking the policydb network support which the code
does. The end result was that missing the abi flag worked until
userspace was fixed and began correctly checking for the v8 abi
version.

Fixes: 56974a6fcfef ("apparmor: add base infastructure for socket mediation")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 3ea591d31be7e..0203e43460b66 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -1183,7 +1183,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
 	 * if not specified use previous version
 	 * Mask off everything that is not kernel abi version
 	 */
-	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v7)) {
+	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) {
 		audit_iface(NULL, NULL, NULL, "unsupported interface version",
 			    e, error);
 		return error;
-- 
GitLab


From 1cf26c3d2c4c2098e39a9905174d7842b531e693 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 7 May 2022 01:58:36 -0700
Subject: [PATCH 0021/4122] apparmor: fix apparmor mediating locking non-fs
 unix sockets

the v8 and earlier policy does not encode the locking permission for
no-fs unix sockets. However the kernel is enforcing mediation.

Add the AA_MAY_LOCK perm to v8 and earlier computed perm mask which will
grant permission for all current abi profiles, but still allow specifying
auditing of the operation if needed.

Link: http://bugs.launchpad.net/bugs/1780227
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 0203e43460b66..2406c5c4caaf2 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -31,6 +31,7 @@
 #define K_ABI_MASK 0x3ff
 #define FORCE_COMPLAIN_FLAG 0x800
 #define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK))
+#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK))
 #define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK))
 
 #define v5	5	/* base version */
@@ -796,7 +797,8 @@ static u32 map_other(u32 x)
 }
 
 static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
-					   aa_state_t state)
+					   aa_state_t state,
+					   u32 version)
 {
 	struct aa_perms perms = { };
 
@@ -809,13 +811,15 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
 	 */
 
 	perms.allow |= map_other(dfa_other_allow(dfa, state));
+	if (VERSION_LE(version, v8))
+		perms.allow |= AA_MAY_LOCK;
 	perms.audit |= map_other(dfa_other_audit(dfa, state));
 	perms.quiet |= map_other(dfa_other_quiet(dfa, state));
 
 	return perms;
 }
 
-static struct aa_perms *compute_perms(struct aa_dfa *dfa)
+static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version)
 {
 	unsigned int state;
 	unsigned int state_count;
@@ -831,7 +835,7 @@ static struct aa_perms *compute_perms(struct aa_dfa *dfa)
 
 	/* zero init so skip the trap state (state == 0) */
 	for (state = 1; state < state_count; state++)
-		table[state] = compute_perms_entry(dfa, state);
+		table[state] = compute_perms_entry(dfa, state, version);
 
 	return table;
 }
@@ -1055,7 +1059,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
-		profile->policy.perms = compute_perms(profile->policy.dfa);
+		profile->policy.perms = compute_perms(profile->policy.dfa,
+						      e->version);
 		if (!profile->policy.perms) {
 			info = "failed to remap policydb permission table";
 			goto fail;
-- 
GitLab


From 3c076531c5529c94cee330dffc4615ad02bb6edb Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 10 May 2022 02:21:22 -0700
Subject: [PATCH 0022/4122] apparmor: extend policydb permission set by making
 use of the xbits

The policydb permission set has left the xbits unused. Make them
available for mediation.

Note: that this does not bring full auditing control of the
permissions as there are not enough bits. The quieting of denials is
provided as that is used more than forced auditing of allowed
permissions.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 31 +++++++++++++++++++++++++++----
 1 file changed, 27 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 2406c5c4caaf2..e918831166630 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -38,6 +38,7 @@
 #define v6	6	/* per entry policydb mediation check */
 #define v7	7
 #define v8	8	/* full network masking */
+#define v9	9	/* xbits are used as permission bits in policydb */
 
 /*
  * The AppArmor interface treats data as a type byte followed by the
@@ -796,6 +797,12 @@ static u32 map_other(u32 x)
 		((x & 0x60) << 19);	/* SETOPT/GETOPT */
 }
 
+static u32 map_xbits(u32 x)
+{
+	return ((x & 0x1) << 7) |
+		((x & 0x7e) << 9);
+}
+
 static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
 					   aa_state_t state,
 					   u32 version)
@@ -806,15 +813,31 @@ static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
 	perms.audit = dfa_user_audit(dfa, state);
 	perms.quiet = dfa_user_quiet(dfa, state);
 
-	/* for v5 perm mapping in the policydb, the other set is used
-	 * to extend the general perm set
+	/*
+	 * This mapping is convulated due to history.
+	 * v1-v4: only file perms, which are handled by compute_fperms
+	 * v5: added policydb which dropped user conditional to gain new
+	 *     perm bits, but had to map around the xbits because the
+	 *     userspace compiler was still munging them.
+	 * v9: adds using the xbits in policydb because the compiler now
+	 *     supports treating policydb permission bits different.
+	 *     Unfortunately there is no way to force auditing on the
+	 *     perms represented by the xbits
 	 */
-
 	perms.allow |= map_other(dfa_other_allow(dfa, state));
 	if (VERSION_LE(version, v8))
 		perms.allow |= AA_MAY_LOCK;
+	else
+		perms.allow |= map_xbits(dfa_user_xbits(dfa, state));
+
+	/*
+	 * for v5-v9 perm mapping in the policydb, the other set is used
+	 * to extend the general perm set
+	 */
 	perms.audit |= map_other(dfa_other_audit(dfa, state));
 	perms.quiet |= map_other(dfa_other_quiet(dfa, state));
+	if (VERSION_GT(version, v8))
+		perms.quiet |= map_xbits(dfa_other_xbits(dfa, state));
 
 	return perms;
 }
@@ -1188,7 +1211,7 @@ static int verify_header(struct aa_ext *e, int required, const char **ns)
 	 * if not specified use previous version
 	 * Mask off everything that is not kernel abi version
 	 */
-	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v8)) {
+	if (VERSION_LT(e->version, v5) || VERSION_GT(e->version, v9)) {
 		audit_iface(NULL, NULL, NULL, "unsupported interface version",
 			    e, error);
 		return error;
-- 
GitLab


From b06a62ebf5a3f041b22def1608f1a8ab9bbfa951 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 16 May 2022 04:37:08 -0700
Subject: [PATCH 0023/4122] apparmor: move dfa perm macros into policy_unpack

Now that the permission remapping macros aren't needed anywhere except
during profile unpack, move them.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/file.h  | 51 -------------------------------
 security/apparmor/policy_unpack.c | 49 +++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 51 deletions(-)

diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 8c82cf279dc2a..4212426020cbe 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -109,57 +109,6 @@ struct path_cond {
 
 #define COMBINED_PERM_MASK(X) ((X).allow | (X).audit | (X).quiet | (X).kill)
 
-/* FIXME: split perms from dfa and match this to description
- *        also add delegation info.
- */
-static inline u16 dfa_map_xindex(u16 mask)
-{
-	u16 old_index = (mask >> 10) & 0xf;
-	u16 index = 0;
-
-	if (mask & 0x100)
-		index |= AA_X_UNSAFE;
-	if (mask & 0x200)
-		index |= AA_X_INHERIT;
-	if (mask & 0x80)
-		index |= AA_X_UNCONFINED;
-
-	if (old_index == 1) {
-		index |= AA_X_UNCONFINED;
-	} else if (old_index == 2) {
-		index |= AA_X_NAME;
-	} else if (old_index == 3) {
-		index |= AA_X_NAME | AA_X_CHILD;
-	} else if (old_index) {
-		index |= AA_X_TABLE;
-		index |= old_index - 4;
-	}
-
-	return index;
-}
-
-/*
- * map old dfa inline permissions to new format
- */
-#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \
-				    ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
-#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f)
-#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f)
-#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f)
-#define dfa_user_xindex(dfa, state) \
-	(dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff))
-
-#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \
-				      0x7f) |				\
-				     ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
-#define dfa_other_xbits(dfa, state) \
-	((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f)
-#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f)
-#define dfa_other_quiet(dfa, state) \
-	((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f)
-#define dfa_other_xindex(dfa, state) \
-	dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff)
-
 int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms,
 		  const char *op, u32 request, const char *name,
 		  const char *target, struct aa_label *tlabel, kuid_t ouid,
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index e918831166630..32cca5f27b8ff 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -671,6 +671,55 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
 	return strcmp(data->key, *key);
 }
 
+/* remap old accept table embedded permissions to separate permission table */
+static u16 dfa_map_xindex(u16 mask)
+{
+	u16 old_index = (mask >> 10) & 0xf;
+	u16 index = 0;
+
+	if (mask & 0x100)
+		index |= AA_X_UNSAFE;
+	if (mask & 0x200)
+		index |= AA_X_INHERIT;
+	if (mask & 0x80)
+		index |= AA_X_UNCONFINED;
+
+	if (old_index == 1) {
+		index |= AA_X_UNCONFINED;
+	} else if (old_index == 2) {
+		index |= AA_X_NAME;
+	} else if (old_index == 3) {
+		index |= AA_X_NAME | AA_X_CHILD;
+	} else if (old_index) {
+		index |= AA_X_TABLE;
+		index |= old_index - 4;
+	}
+
+	return index;
+}
+
+/*
+ * map old dfa inline permissions to new format
+ */
+#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \
+				    ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
+#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f)
+#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f)
+#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f)
+#define dfa_user_xindex(dfa, state) \
+	(dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff))
+
+#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \
+				      0x7f) |				\
+				     ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
+#define dfa_other_xbits(dfa, state) \
+	((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f)
+#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f)
+#define dfa_other_quiet(dfa, state) \
+	((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f)
+#define dfa_other_xindex(dfa, state) \
+	dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff)
+
 /**
  * map_old_perms - map old file perms layout to the new layout
  * @old: permission set in old mapping
-- 
GitLab


From ae6d35ed0a481824a8730c39d5b319c8a76ea00e Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 16 Jul 2022 03:29:19 -0700
Subject: [PATCH 0024/4122] apparmor: extend xindex size

Allow the xindex to have 2^24 entries.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/file.h  | 23 +++++++++++------------
 security/apparmor/include/perms.h |  2 +-
 security/apparmor/policy_unpack.c |  8 ++++----
 3 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 4212426020cbe..521c8568f6d49 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -88,18 +88,17 @@ static inline struct aa_label *aa_get_file_label(struct aa_file_ctx *ctx)
  * - exec type - which determines how the executable name and index are used
  * - flags - which modify how the destination name is applied
  */
-#define AA_X_INDEX_MASK		0x03ff
-
-#define AA_X_TYPE_MASK		0x0c00
-#define AA_X_TYPE_SHIFT		10
-#define AA_X_NONE		0x0000
-#define AA_X_NAME		0x0400	/* use executable name px */
-#define AA_X_TABLE		0x0800	/* use a specified name ->n# */
-
-#define AA_X_UNSAFE		0x1000
-#define AA_X_CHILD		0x2000	/* make >AA_X_NONE apply to children */
-#define AA_X_INHERIT		0x4000
-#define AA_X_UNCONFINED		0x8000
+#define AA_X_INDEX_MASK		0x00ffffff
+
+#define AA_X_TYPE_MASK		0x0c000000
+#define AA_X_NONE		0x00000000
+#define AA_X_NAME		0x04000000 /* use executable name px */
+#define AA_X_TABLE		0x08000000 /* use a specified name ->n# */
+
+#define AA_X_UNSAFE		0x10000000
+#define AA_X_CHILD		0x20000000
+#define AA_X_INHERIT		0x40000000
+#define AA_X_UNCONFINED		0x80000000
 
 /* need to make conditional which ones are being set */
 struct path_cond {
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 1014a7bbc0278..8739cef735493 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -78,7 +78,7 @@ struct aa_perms {
 	u32 quiet;	/* set only when ~allow | deny */
 	u32 hide;	/* set only when  ~allow | deny */
 
-	u16 xindex;
+	u32 xindex;
 };
 
 #define ALL_PERMS_MASK 0xffffffff
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 32cca5f27b8ff..c578d9af785e0 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -489,8 +489,8 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 		int i, size;
 
 		size = unpack_array(e, NULL);
-		/* currently 4 exec bits and entries 0-3 are reserved iupcx */
-		if (size > 16 - 4)
+		/* currently 2^24 bits entries 0-3 */
+		if (size > (1 << 24))
 			goto fail;
 		profile->file.trans.table = kcalloc(size, sizeof(char *),
 						    GFP_KERNEL);
@@ -672,10 +672,10 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
 }
 
 /* remap old accept table embedded permissions to separate permission table */
-static u16 dfa_map_xindex(u16 mask)
+static u32 dfa_map_xindex(u16 mask)
 {
 	u16 old_index = (mask >> 10) & 0xf;
-	u16 index = 0;
+	u32 index = 0;
 
 	if (mask & 0x100)
 		index |= AA_X_UNSAFE;
-- 
GitLab


From caa9f579ca7255e9d6c25f072447d895c5928c97 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 21 Aug 2022 22:48:32 -0700
Subject: [PATCH 0025/4122] apparmor: isolate policy backwards compatibility to
 its own file

The details of mapping old policy into newer policy formats clutters
up the unpack code and makes it possible to accidentally use old
mappings in code, so isolate the mapping code into its own file.

This will become more important when the dfa remapping code lands,
as it will greatly expand the compat code base.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/Makefile                |   3 +-
 security/apparmor/include/policy_compat.h |  33 +++
 security/apparmor/include/policy_unpack.h |   1 +
 security/apparmor/policy_compat.c         | 319 ++++++++++++++++++++++
 security/apparmor/policy_unpack.c         | 290 +-------------------
 5 files changed, 359 insertions(+), 287 deletions(-)
 create mode 100644 security/apparmor/include/policy_compat.h
 create mode 100644 security/apparmor/policy_compat.c

diff --git a/security/apparmor/Makefile b/security/apparmor/Makefile
index ff23fcfefe196..4377123c2b988 100644
--- a/security/apparmor/Makefile
+++ b/security/apparmor/Makefile
@@ -5,7 +5,8 @@ obj-$(CONFIG_SECURITY_APPARMOR) += apparmor.o
 
 apparmor-y := apparmorfs.o audit.o capability.o task.o ipc.o lib.o match.o \
               path.o domain.o policy.o policy_unpack.o procattr.o lsm.o \
-              resource.o secid.o file.o policy_ns.o label.o mount.o net.o
+              resource.o secid.o file.o policy_ns.o label.o mount.o net.o \
+              policy_compat.o
 apparmor-$(CONFIG_SECURITY_APPARMOR_HASH) += crypto.o
 
 clean-files := capability_names.h rlim_names.h net_names.h
diff --git a/security/apparmor/include/policy_compat.h b/security/apparmor/include/policy_compat.h
new file mode 100644
index 0000000000000..af0e174332df5
--- /dev/null
+++ b/security/apparmor/include/policy_compat.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AppArmor security module
+ *
+ * Code to provide backwards compatibility with older policy versions,
+ * by converting/mapping older policy formats into the newer internal
+ * formats.
+ *
+ * Copyright 2022 Canonical Ltd.
+ */
+
+#ifndef __POLICY_COMPAT_H
+#define __POLICY_COMPAT_H
+
+#include "policy.h"
+
+#define K_ABI_MASK 0x3ff
+#define FORCE_COMPLAIN_FLAG 0x800
+#define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK))
+#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK))
+#define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK))
+
+#define v5	5	/* base version */
+#define v6	6	/* per entry policydb mediation check */
+#define v7	7
+#define v8	8	/* full network masking */
+#define v9	9	/* xbits are used as permission bits in policydb */
+
+int aa_compat_map_xmatch(struct aa_policydb *policy);
+int aa_compat_map_policy(struct aa_policydb *policy, u32 version);
+int aa_compat_map_file(struct aa_policydb *policy);
+
+#endif /* __POLICY_COMPAT_H */
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index eb5f7d7f132bb..cdfbc8a54a9d2 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -16,6 +16,7 @@
 #include <linux/dcache.h>
 #include <linux/workqueue.h>
 
+
 struct aa_load_ent {
 	struct list_head list;
 	struct aa_profile *new;
diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c
new file mode 100644
index 0000000000000..1aa5cced935ee
--- /dev/null
+++ b/security/apparmor/policy_compat.c
@@ -0,0 +1,319 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AppArmor security module
+ *
+ * This file contains AppArmor functions for unpacking policy loaded
+ * from userspace.
+ *
+ * Copyright (C) 1998-2008 Novell/SUSE
+ * Copyright 2009-2022 Canonical Ltd.
+ *
+ * Code to provide backwards compatibility with older policy versions,
+ * by converting/mapping older policy formats into the newer internal
+ * formats.
+ */
+
+#include <linux/ctype.h>
+#include <linux/errno.h>
+
+#include "include/lib.h"
+#include "include/policy_unpack.h"
+#include "include/policy_compat.h"
+
+/* remap old accept table embedded permissions to separate permission table */
+static u32 dfa_map_xindex(u16 mask)
+{
+	u16 old_index = (mask >> 10) & 0xf;
+	u32 index = 0;
+
+	if (mask & 0x100)
+		index |= AA_X_UNSAFE;
+	if (mask & 0x200)
+		index |= AA_X_INHERIT;
+	if (mask & 0x80)
+		index |= AA_X_UNCONFINED;
+
+	if (old_index == 1) {
+		index |= AA_X_UNCONFINED;
+	} else if (old_index == 2) {
+		index |= AA_X_NAME;
+	} else if (old_index == 3) {
+		index |= AA_X_NAME | AA_X_CHILD;
+	} else if (old_index) {
+		index |= AA_X_TABLE;
+		index |= old_index - 4;
+	}
+
+	return index;
+}
+
+/*
+ * map old dfa inline permissions to new format
+ */
+#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \
+				    ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
+#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f)
+#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f)
+#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f)
+#define dfa_user_xindex(dfa, state) \
+	(dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff))
+
+#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \
+				      0x7f) |				\
+				     ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
+#define dfa_other_xbits(dfa, state) \
+	((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f)
+#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f)
+#define dfa_other_quiet(dfa, state) \
+	((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f)
+#define dfa_other_xindex(dfa, state) \
+	dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff)
+
+/**
+ * map_old_perms - map old file perms layout to the new layout
+ * @old: permission set in old mapping
+ *
+ * Returns: new permission mapping
+ */
+static u32 map_old_perms(u32 old)
+{
+	u32 new = old & 0xf;
+
+	if (old & MAY_READ)
+		new |= AA_MAY_GETATTR | AA_MAY_OPEN;
+	if (old & MAY_WRITE)
+		new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE |
+		       AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN;
+	if (old & 0x10)
+		new |= AA_MAY_LINK;
+	/* the old mapping lock and link_subset flags where overlaid
+	 * and use was determined by part of a pair that they were in
+	 */
+	if (old & 0x20)
+		new |= AA_MAY_LOCK | AA_LINK_SUBSET;
+	if (old & 0x40)	/* AA_EXEC_MMAP */
+		new |= AA_EXEC_MMAP;
+
+	return new;
+}
+
+static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa,
+				 aa_state_t state)
+{
+	perms->allow |= AA_MAY_GETATTR;
+
+	/* change_profile wasn't determined by ownership in old mapping */
+	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
+		perms->allow |= AA_MAY_CHANGE_PROFILE;
+	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
+		perms->allow |= AA_MAY_ONEXEC;
+}
+
+static struct aa_perms compute_fperms_user(struct aa_dfa *dfa,
+					   aa_state_t state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_user_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_user_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
+	perms.xindex = dfa_user_xindex(dfa, state);
+
+	compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
+					    aa_state_t state)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = map_old_perms(dfa_other_allow(dfa, state));
+	perms.audit = map_old_perms(dfa_other_audit(dfa, state));
+	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
+	perms.xindex = dfa_other_xindex(dfa, state);
+
+	compute_fperms_allow(&perms, dfa, state);
+
+	return perms;
+}
+
+/**
+ * aa_compute_fperms - convert dfa compressed perms to internal perms and store
+ *		       them so they can be retrieved later.
+ * @dfa: a dfa using fperms to remap to internal permissions
+ *
+ * Returns: remapped perm table
+ */
+static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
+{
+	aa_state_t state;
+	unsigned int state_count;
+	struct aa_perms *table;
+
+	AA_BUG(!dfa);
+
+	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
+	table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	/* zero init so skip the trap state (state == 0) */
+	for (state = 1; state < state_count; state++) {
+		table[state * 2] = compute_fperms_user(dfa, state);
+		table[state * 2 + 1] = compute_fperms_other(dfa, state);
+	}
+
+	return table;
+}
+
+static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
+{
+	struct aa_perms *perms;
+	int state;
+	int state_count;
+
+	AA_BUG(!xmatch);
+
+	state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
+	perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
+
+	/* zero init so skip the trap state (state == 0) */
+	for (state = 1; state < state_count; state++)
+		perms[state].allow = dfa_user_allow(xmatch, state);
+
+	return perms;
+}
+
+static u32 map_other(u32 x)
+{
+	return ((x & 0x3) << 8) |	/* SETATTR/GETATTR */
+		((x & 0x1c) << 18) |	/* ACCEPT/BIND/LISTEN */
+		((x & 0x60) << 19);	/* SETOPT/GETOPT */
+}
+
+static u32 map_xbits(u32 x)
+{
+	return ((x & 0x1) << 7) |
+		((x & 0x7e) << 9);
+}
+
+static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
+					   aa_state_t state,
+					   u32 version)
+{
+	struct aa_perms perms = { };
+
+	perms.allow = dfa_user_allow(dfa, state);
+	perms.audit = dfa_user_audit(dfa, state);
+	perms.quiet = dfa_user_quiet(dfa, state);
+
+	/*
+	 * This mapping is convulated due to history.
+	 * v1-v4: only file perms, which are handled by compute_fperms
+	 * v5: added policydb which dropped user conditional to gain new
+	 *     perm bits, but had to map around the xbits because the
+	 *     userspace compiler was still munging them.
+	 * v9: adds using the xbits in policydb because the compiler now
+	 *     supports treating policydb permission bits different.
+	 *     Unfortunately there is no way to force auditing on the
+	 *     perms represented by the xbits
+	 */
+	perms.allow |= map_other(dfa_other_allow(dfa, state));
+	if (VERSION_LE(version, v8))
+		perms.allow |= AA_MAY_LOCK;
+	else
+		perms.allow |= map_xbits(dfa_user_xbits(dfa, state));
+
+	/*
+	 * for v5-v9 perm mapping in the policydb, the other set is used
+	 * to extend the general perm set
+	 */
+	perms.audit |= map_other(dfa_other_audit(dfa, state));
+	perms.quiet |= map_other(dfa_other_quiet(dfa, state));
+	if (VERSION_GT(version, v8))
+		perms.quiet |= map_xbits(dfa_other_xbits(dfa, state));
+
+	return perms;
+}
+
+static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version)
+{
+	unsigned int state;
+	unsigned int state_count;
+	struct aa_perms *table;
+
+	AA_BUG(!dfa);
+
+	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+	/* DFAs are restricted from having a state_count of less than 2 */
+	table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
+	if (!table)
+		return NULL;
+
+	/* zero init so skip the trap state (state == 0) */
+	for (state = 1; state < state_count; state++)
+		table[state] = compute_perms_entry(dfa, state, version);
+
+	return table;
+}
+
+/**
+ * remap_dfa_accept - remap old dfa accept table to be an index
+ * @dfa: dfa to do the remapping on
+ * @factor: scaling factor for the index conversion.
+ *
+ * Used in conjunction with compute_Xperms, it converts old style perms
+ * that are encoded in the dfa accept tables to the new style where
+ * there is a permission table and the accept table is an index into
+ * the permission table.
+ */
+static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor)
+{
+	unsigned int state;
+	unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
+
+	AA_BUG(!dfa);
+
+	for (state = 0; state < state_count; state++)
+		ACCEPT_TABLE(dfa)[state] = state * factor;
+	kvfree(dfa->tables[YYTD_ID_ACCEPT2]);
+	dfa->tables[YYTD_ID_ACCEPT2] = NULL;
+}
+
+/* TODO: merge different dfa mappings into single map_policy fn */
+int aa_compat_map_xmatch(struct aa_policydb *policy)
+{
+	policy->perms = compute_xmatch_perms(policy->dfa);
+	if (!policy->perms)
+		return -ENOMEM;
+
+	remap_dfa_accept(policy->dfa, 1);
+
+	return 0;
+}
+
+int aa_compat_map_policy(struct aa_policydb *policy, u32 version)
+{
+	policy->perms = compute_perms(policy->dfa, version);
+	if (!policy->perms)
+		return -ENOMEM;
+
+	remap_dfa_accept(policy->dfa, 1);
+
+	return 0;
+}
+
+int aa_compat_map_file(struct aa_policydb *policy)
+{
+	policy->perms = compute_fperms(policy->dfa);
+	if (!policy->perms)
+		return -ENOMEM;
+
+	remap_dfa_accept(policy->dfa, 2);
+
+	return 0;
+}
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index c578d9af785e0..63196df2841b5 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -27,18 +27,8 @@
 #include "include/path.h"
 #include "include/policy.h"
 #include "include/policy_unpack.h"
+#include "include/policy_compat.h"
 
-#define K_ABI_MASK 0x3ff
-#define FORCE_COMPLAIN_FLAG 0x800
-#define VERSION_LT(X, Y) (((X) & K_ABI_MASK) < ((Y) & K_ABI_MASK))
-#define VERSION_LE(X, Y) (((X) & K_ABI_MASK) <= ((Y) & K_ABI_MASK))
-#define VERSION_GT(X, Y) (((X) & K_ABI_MASK) > ((Y) & K_ABI_MASK))
-
-#define v5	5	/* base version */
-#define v6	6	/* per entry policydb mediation check */
-#define v7	7
-#define v8	8	/* full network masking */
-#define v9	9	/* xbits are used as permission bits in policydb */
 
 /*
  * The AppArmor interface treats data as a type byte followed by the
@@ -671,270 +661,6 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
 	return strcmp(data->key, *key);
 }
 
-/* remap old accept table embedded permissions to separate permission table */
-static u32 dfa_map_xindex(u16 mask)
-{
-	u16 old_index = (mask >> 10) & 0xf;
-	u32 index = 0;
-
-	if (mask & 0x100)
-		index |= AA_X_UNSAFE;
-	if (mask & 0x200)
-		index |= AA_X_INHERIT;
-	if (mask & 0x80)
-		index |= AA_X_UNCONFINED;
-
-	if (old_index == 1) {
-		index |= AA_X_UNCONFINED;
-	} else if (old_index == 2) {
-		index |= AA_X_NAME;
-	} else if (old_index == 3) {
-		index |= AA_X_NAME | AA_X_CHILD;
-	} else if (old_index) {
-		index |= AA_X_TABLE;
-		index |= old_index - 4;
-	}
-
-	return index;
-}
-
-/*
- * map old dfa inline permissions to new format
- */
-#define dfa_user_allow(dfa, state) (((ACCEPT_TABLE(dfa)[state]) & 0x7f) | \
-				    ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
-#define dfa_user_xbits(dfa, state) (((ACCEPT_TABLE(dfa)[state]) >> 7) & 0x7f)
-#define dfa_user_audit(dfa, state) ((ACCEPT_TABLE2(dfa)[state]) & 0x7f)
-#define dfa_user_quiet(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 7) & 0x7f)
-#define dfa_user_xindex(dfa, state) \
-	(dfa_map_xindex(ACCEPT_TABLE(dfa)[state] & 0x3fff))
-
-#define dfa_other_allow(dfa, state) ((((ACCEPT_TABLE(dfa)[state]) >> 14) & \
-				      0x7f) |				\
-				     ((ACCEPT_TABLE(dfa)[state]) & 0x80000000))
-#define dfa_other_xbits(dfa, state) \
-	((((ACCEPT_TABLE(dfa)[state]) >> 7) >> 14) & 0x7f)
-#define dfa_other_audit(dfa, state) (((ACCEPT_TABLE2(dfa)[state]) >> 14) & 0x7f)
-#define dfa_other_quiet(dfa, state) \
-	((((ACCEPT_TABLE2(dfa)[state]) >> 7) >> 14) & 0x7f)
-#define dfa_other_xindex(dfa, state) \
-	dfa_map_xindex((ACCEPT_TABLE(dfa)[state] >> 14) & 0x3fff)
-
-/**
- * map_old_perms - map old file perms layout to the new layout
- * @old: permission set in old mapping
- *
- * Returns: new permission mapping
- */
-static u32 map_old_perms(u32 old)
-{
-	u32 new = old & 0xf;
-
-	if (old & MAY_READ)
-		new |= AA_MAY_GETATTR | AA_MAY_OPEN;
-	if (old & MAY_WRITE)
-		new |= AA_MAY_SETATTR | AA_MAY_CREATE | AA_MAY_DELETE |
-		       AA_MAY_CHMOD | AA_MAY_CHOWN | AA_MAY_OPEN;
-	if (old & 0x10)
-		new |= AA_MAY_LINK;
-	/* the old mapping lock and link_subset flags where overlaid
-	 * and use was determined by part of a pair that they were in
-	 */
-	if (old & 0x20)
-		new |= AA_MAY_LOCK | AA_LINK_SUBSET;
-	if (old & 0x40)	/* AA_EXEC_MMAP */
-		new |= AA_EXEC_MMAP;
-
-	return new;
-}
-
-static void compute_fperms_allow(struct aa_perms *perms, struct aa_dfa *dfa,
-				 aa_state_t state)
-{
-	perms->allow |= AA_MAY_GETATTR;
-
-	/* change_profile wasn't determined by ownership in old mapping */
-	if (ACCEPT_TABLE(dfa)[state] & 0x80000000)
-		perms->allow |= AA_MAY_CHANGE_PROFILE;
-	if (ACCEPT_TABLE(dfa)[state] & 0x40000000)
-		perms->allow |= AA_MAY_ONEXEC;
-}
-
-static struct aa_perms compute_fperms_user(struct aa_dfa *dfa,
-					   aa_state_t state)
-{
-	struct aa_perms perms = { };
-
-	perms.allow = map_old_perms(dfa_user_allow(dfa, state));
-	perms.audit = map_old_perms(dfa_user_audit(dfa, state));
-	perms.quiet = map_old_perms(dfa_user_quiet(dfa, state));
-	perms.xindex = dfa_user_xindex(dfa, state);
-
-	compute_fperms_allow(&perms, dfa, state);
-
-	return perms;
-}
-
-static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
-					    aa_state_t state)
-{
-	struct aa_perms perms = { };
-
-	perms.allow = map_old_perms(dfa_other_allow(dfa, state));
-	perms.audit = map_old_perms(dfa_other_audit(dfa, state));
-	perms.quiet = map_old_perms(dfa_other_quiet(dfa, state));
-	perms.xindex = dfa_other_xindex(dfa, state);
-
-	compute_fperms_allow(&perms, dfa, state);
-
-	return perms;
-}
-
-/**
- * aa_compute_fperms - convert dfa compressed perms to internal perms and store
- *		       them so they can be retrieved later.
- * @dfa: a dfa using fperms to remap to internal permissions
- *
- * Returns: remapped perm table
- */
-static struct aa_perms *compute_fperms(struct aa_dfa *dfa)
-{
-	aa_state_t state;
-	unsigned int state_count;
-	struct aa_perms *table;
-
-	AA_BUG(!dfa);
-
-	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
-	/* DFAs are restricted from having a state_count of less than 2 */
-	table = kvcalloc(state_count * 2, sizeof(struct aa_perms), GFP_KERNEL);
-	if (!table)
-		return NULL;
-
-	/* zero init so skip the trap state (state == 0) */
-	for (state = 1; state < state_count; state++) {
-		table[state * 2] = compute_fperms_user(dfa, state);
-		table[state * 2 + 1] = compute_fperms_other(dfa, state);
-	}
-
-	return table;
-}
-
-static struct aa_perms *compute_xmatch_perms(struct aa_dfa *xmatch)
-{
-	struct aa_perms *perms;
-	int state;
-	int state_count;
-
-	AA_BUG(!xmatch);
-
-	state_count = xmatch->tables[YYTD_ID_BASE]->td_lolen;
-	/* DFAs are restricted from having a state_count of less than 2 */
-	perms = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
-
-	/* zero init so skip the trap state (state == 0) */
-	for (state = 1; state < state_count; state++)
-		perms[state].allow = dfa_user_allow(xmatch, state);
-
-	return perms;
-}
-
-static u32 map_other(u32 x)
-{
-	return ((x & 0x3) << 8) |	/* SETATTR/GETATTR */
-		((x & 0x1c) << 18) |	/* ACCEPT/BIND/LISTEN */
-		((x & 0x60) << 19);	/* SETOPT/GETOPT */
-}
-
-static u32 map_xbits(u32 x)
-{
-	return ((x & 0x1) << 7) |
-		((x & 0x7e) << 9);
-}
-
-static struct aa_perms compute_perms_entry(struct aa_dfa *dfa,
-					   aa_state_t state,
-					   u32 version)
-{
-	struct aa_perms perms = { };
-
-	perms.allow = dfa_user_allow(dfa, state);
-	perms.audit = dfa_user_audit(dfa, state);
-	perms.quiet = dfa_user_quiet(dfa, state);
-
-	/*
-	 * This mapping is convulated due to history.
-	 * v1-v4: only file perms, which are handled by compute_fperms
-	 * v5: added policydb which dropped user conditional to gain new
-	 *     perm bits, but had to map around the xbits because the
-	 *     userspace compiler was still munging them.
-	 * v9: adds using the xbits in policydb because the compiler now
-	 *     supports treating policydb permission bits different.
-	 *     Unfortunately there is no way to force auditing on the
-	 *     perms represented by the xbits
-	 */
-	perms.allow |= map_other(dfa_other_allow(dfa, state));
-	if (VERSION_LE(version, v8))
-		perms.allow |= AA_MAY_LOCK;
-	else
-		perms.allow |= map_xbits(dfa_user_xbits(dfa, state));
-
-	/*
-	 * for v5-v9 perm mapping in the policydb, the other set is used
-	 * to extend the general perm set
-	 */
-	perms.audit |= map_other(dfa_other_audit(dfa, state));
-	perms.quiet |= map_other(dfa_other_quiet(dfa, state));
-	if (VERSION_GT(version, v8))
-		perms.quiet |= map_xbits(dfa_other_xbits(dfa, state));
-
-	return perms;
-}
-
-static struct aa_perms *compute_perms(struct aa_dfa *dfa, u32 version)
-{
-	unsigned int state;
-	unsigned int state_count;
-	struct aa_perms *table;
-
-	AA_BUG(!dfa);
-
-	state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
-	/* DFAs are restricted from having a state_count of less than 2 */
-	table = kvcalloc(state_count, sizeof(struct aa_perms), GFP_KERNEL);
-	if (!table)
-		return NULL;
-
-	/* zero init so skip the trap state (state == 0) */
-	for (state = 1; state < state_count; state++)
-		table[state] = compute_perms_entry(dfa, state, version);
-
-	return table;
-}
-
-/**
- * remap_dfa_accept - remap old dfa accept table to be an index
- * @dfa: dfa to do the remapping on
- * @factor: scaling factor for the index conversion.
- *
- * Used in conjunction with compute_Xperms, it converts old style perms
- * that are encoded in the dfa accept tables to the new style where
- * there is a permission table and the accept table is an index into
- * the permission table.
- */
-static void remap_dfa_accept(struct aa_dfa *dfa, unsigned int factor)
-{
-	unsigned int state;
-	unsigned int state_count = dfa->tables[YYTD_ID_BASE]->td_lolen;
-
-	AA_BUG(!dfa);
-
-	for (state = 0; state < state_count; state++)
-		ACCEPT_TABLE(dfa)[state] = state * factor;
-	kvfree(dfa->tables[YYTD_ID_ACCEPT2]);
-	dfa->tables[YYTD_ID_ACCEPT2] = NULL;
-}
-
 /**
  * unpack_profile - unpack a serialized profile
  * @e: serialized data extent information (NOT NULL)
@@ -1001,12 +727,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		profile->xmatch_len = tmp;
 		profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START;
-		profile->xmatch.perms = compute_xmatch_perms(profile->xmatch.dfa);
-		if (!profile->xmatch.perms) {
+		if (aa_compat_map_xmatch(&profile->xmatch)) {
 			info = "failed to convert xmatch permission table";
 			goto fail;
 		}
-		remap_dfa_accept(profile->xmatch.dfa, 1);
 	}
 
 	/* disconnected attachment string is optional */
@@ -1131,14 +855,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
-		profile->policy.perms = compute_perms(profile->policy.dfa,
-						      e->version);
-		if (!profile->policy.perms) {
+		if (aa_compat_map_policy(&profile->policy, e->version)) {
 			info = "failed to remap policydb permission table";
 			goto fail;
 		}
-		/* Do not remap internal dfas */
-		remap_dfa_accept(profile->policy.dfa, 1);
 	} else
 		profile->policy.dfa = aa_get_dfa(nulldfa);
 
@@ -1154,12 +874,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 				"dfa_start"))
 			/* default start state */
 			profile->file.start[AA_CLASS_FILE] = DFA_START;
-		profile->file.perms = compute_fperms(profile->file.dfa);
-		if (!profile->file.perms) {
+		if (aa_compat_map_file(&profile->file)) {
 			info = "failed to remap file permission table";
 			goto fail;
 		}
-		remap_dfa_accept(profile->file.dfa, 2);
 		if (!unpack_trans_table(e, profile)) {
 			info = "failed to unpack profile transition table";
 			goto fail;
-- 
GitLab


From 90917d5b6866df79d892087ba51b46c983d2fcfe Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 16 Jul 2022 03:33:43 -0700
Subject: [PATCH 0026/4122] apparmor: extend permissions to support a label and
 tag string

add indexes for label and tag entries. Rename the domain table to the
str_table as its a shared string table with label and tags.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c         | 18 ------------------
 security/apparmor/include/domain.h |  6 ------
 security/apparmor/include/lib.h    |  6 ++++++
 security/apparmor/include/perms.h  |  2 ++
 security/apparmor/include/policy.h |  6 ++++--
 security/apparmor/lib.c            | 19 +++++++++++++++++++
 security/apparmor/policy_unpack.c  |  2 +-
 7 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 5883f0fc02d3d..4cb046cf3a14f 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -29,24 +29,6 @@
 #include "include/policy.h"
 #include "include/policy_ns.h"
 
-/**
- * aa_free_domain_entries - free entries in a domain table
- * @domain: the domain table to free  (MAYBE NULL)
- */
-void aa_free_domain_entries(struct aa_domain *domain)
-{
-	int i;
-	if (domain) {
-		if (!domain->table)
-			return;
-
-		for (i = 0; i < domain->size; i++)
-			kfree_sensitive(domain->table[i]);
-		kfree_sensitive(domain->table);
-		domain->table = NULL;
-	}
-}
-
 /**
  * may_change_ptraced_domain - check if can change profile on ptraced task
  * @to_label: profile to change to  (NOT NULL)
diff --git a/security/apparmor/include/domain.h b/security/apparmor/include/domain.h
index d14928fe1c6f1..77f9a0ed0f048 100644
--- a/security/apparmor/include/domain.h
+++ b/security/apparmor/include/domain.h
@@ -16,11 +16,6 @@
 #ifndef __AA_DOMAIN_H
 #define __AA_DOMAIN_H
 
-struct aa_domain {
-	int size;
-	char **table;
-};
-
 #define AA_CHANGE_NOFLAGS 0
 #define AA_CHANGE_TEST 1
 #define AA_CHANGE_CHILD 2
@@ -32,7 +27,6 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
 
 int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm);
 
-void aa_free_domain_entries(struct aa_domain *domain);
 int aa_change_hat(const char *hats[], int count, u64 token, int flags);
 int aa_change_profile(const char *fqname, int flags);
 
diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h
index f176f3ced2a36..f1a29ab7ea1b4 100644
--- a/security/apparmor/include/lib.h
+++ b/security/apparmor/include/lib.h
@@ -99,6 +99,12 @@ static inline bool path_mediated_fs(struct dentry *dentry)
 	return !(dentry->d_sb->s_flags & SB_NOUSER);
 }
 
+struct aa_str_table {
+	int size;
+	char **table;
+};
+
+void aa_free_str_table(struct aa_str_table *table);
 
 struct counted_str {
 	struct kref count;
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 8739cef735493..d66059fcebb45 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -79,6 +79,8 @@ struct aa_perms {
 	u32 hide;	/* set only when  ~allow | deny */
 
 	u32 xindex;
+	u32 tag;	/* tag string index, if present */
+	u32 label;	/* label string index, if present */
 };
 
 #define ALL_PERMS_MASK 0xffffffff
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 3a7d165e8fcc1..a28a662a06221 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -72,12 +72,14 @@ enum profile_mode {
 
 /* struct aa_policydb - match engine for a policy
  * dfa: dfa pattern match
+ * perms: table of permissions
+ * strs: table of strings, index by x
  * start: set of start states for the different classes of data
  */
 struct aa_policydb {
 	struct aa_dfa *dfa;
 	struct aa_perms *perms;
-	struct aa_domain trans;
+	struct aa_str_table trans;
 	aa_state_t start[AA_CLASS_LAST + 1];
 };
 
@@ -86,7 +88,7 @@ static inline void aa_destroy_policydb(struct aa_policydb *policy)
 	aa_put_dfa(policy->dfa);
 	if (policy->perms)
 		kvfree(policy->perms);
-	aa_free_domain_entries(&policy->trans);
+	aa_free_str_table(&policy->trans);
 
 }
 
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 60deb4dc30c7f..69aeb2dbd6d6b 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -25,6 +25,25 @@ struct aa_perms allperms = { .allow = ALL_PERMS_MASK,
 			     .quiet = ALL_PERMS_MASK,
 			     .hide = ALL_PERMS_MASK };
 
+/**
+ * aa_free_str_table - free entries str table
+ * @str: the string table to free  (MAYBE NULL)
+ */
+void aa_free_str_table(struct aa_str_table *t)
+{
+	int i;
+
+	if (t) {
+		if (!t->table)
+			return;
+
+		for (i = 0; i < t->size; i++)
+			kfree_sensitive(t->table[i]);
+		kfree_sensitive(t->table);
+		t->table = NULL;
+	}
+}
+
 /**
  * aa_split_fqname - split a fqname into a profile and namespace name
  * @fqname: a full qualified name in namespace profile format (NOT NULL)
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 63196df2841b5..df39ee8f4e03d 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -534,7 +534,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 	return true;
 
 fail:
-	aa_free_domain_entries(&profile->file.trans);
+	aa_free_str_table(&profile->file.trans);
 	e->pos = saved_pos;
 	return false;
 }
-- 
GitLab


From 8c4b785a86be1219f7d50f7b38266c454d6a9bbc Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 19 Apr 2022 16:25:55 -0700
Subject: [PATCH 0027/4122] apparmor: add mediation class information to
 auditing

Audit messages currently don't contain the mediation class which can
make them less clear than they should be in some circumstances. With
newer mediation classes coming this potential confusion will become
worse.

Fix this by adding the mediatin class to the messages.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/audit.c            | 28 ++++++++++++++++++++++++++++
 security/apparmor/capability.c       |  2 +-
 security/apparmor/file.c             |  2 +-
 security/apparmor/include/apparmor.h |  2 +-
 security/apparmor/include/audit.h    |  8 ++++++--
 security/apparmor/include/net.h      |  1 +
 security/apparmor/ipc.c              |  2 +-
 security/apparmor/lib.c              |  2 +-
 security/apparmor/lsm.c              |  3 ++-
 security/apparmor/mount.c            |  2 +-
 security/apparmor/policy.c           |  2 +-
 security/apparmor/policy_unpack.c    |  2 +-
 security/apparmor/resource.c         |  3 ++-
 security/apparmor/task.c             |  2 +-
 14 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index 704b0c895605a..e638f7bc9f528 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -36,6 +36,28 @@ static const char *const aa_audit_type[] = {
 	"AUTO"
 };
 
+static const char *const aa_class_names[] = {
+	"none",
+	"unknown",
+	"file",
+	"cap",
+	"net",
+	"rlimits",
+	"domain",
+	"mount",
+	"unknown",
+	"ptrace",
+	"signal",
+	"unknown",
+	"unknown",
+	"unknown",
+	"net",
+	"unknown",
+	"label",
+	"lsm",
+};
+
+
 /*
  * Currently AppArmor auditing is fed straight into the audit framework.
  *
@@ -65,6 +87,12 @@ static void audit_pre(struct audit_buffer *ab, void *ca)
 		audit_log_format(ab, " operation=\"%s\"", aad(sa)->op);
 	}
 
+	if (aad(sa)->class)
+		audit_log_format(ab, " class=\"%s\"",
+				 aad(sa)->class <= AA_CLASS_LAST ?
+				 aa_class_names[aad(sa)->class] :
+				 "unknown");
+
 	if (aad(sa)->info) {
 		audit_log_format(ab, " info=\"%s\"", aad(sa)->info);
 		if (aad(sa)->error)
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index deccea8654ad8..6cabd6109f12a 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -148,7 +148,7 @@ int aa_capable(struct aa_label *label, int cap, unsigned int opts)
 {
 	struct aa_profile *profile;
 	int error = 0;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_CAP, OP_CAPABLE);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_CAP, AA_CLASS_CAP, OP_CAPABLE);
 
 	sa.u.cap = cap;
 	error = fn_for_each_confined(label, profile,
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 636efcade3f58..69d936d04f948 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -95,7 +95,7 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms,
 		  kuid_t ouid, const char *info, int error)
 {
 	int type = AUDIT_APPARMOR_AUTO;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_TASK, op);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_TASK, AA_CLASS_FILE, op);
 
 	sa.u.tsk = NULL;
 	aad(&sa)->request = request;
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 8fd66a4ca0b86..6d9ca075fcb9c 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -16,7 +16,7 @@
 /*
  * Class of mediation types in the AppArmor policy db
  */
-#define AA_CLASS_ENTRY		0
+#define AA_CLASS_NONE		0
 #define AA_CLASS_UNKNOWN	1
 #define AA_CLASS_FILE		2
 #define AA_CLASS_CAP		3
diff --git a/security/apparmor/include/audit.h b/security/apparmor/include/audit.h
index 18519a4eb67e3..c328f07f11cd8 100644
--- a/security/apparmor/include/audit.h
+++ b/security/apparmor/include/audit.h
@@ -107,6 +107,7 @@ enum audit_type {
 struct apparmor_audit_data {
 	int error;
 	int type;
+	u16 class;
 	const char *op;
 	struct aa_label *label;
 	const char *name;
@@ -155,9 +156,12 @@ struct apparmor_audit_data {
 
 /* macros for dealing with  apparmor_audit_data structure */
 #define aad(SA) ((SA)->apparmor_audit_data)
-#define DEFINE_AUDIT_DATA(NAME, T, X)					\
+#define DEFINE_AUDIT_DATA(NAME, T, C, X)				\
 	/* TODO: cleanup audit init so we don't need _aad = {0,} */	\
-	struct apparmor_audit_data NAME ## _aad = { .op = (X), };	\
+	struct apparmor_audit_data NAME ## _aad = {                     \
+		.class = (C),						\
+		.op = (X),                                              \
+	};                                                              \
 	struct common_audit_data NAME =					\
 	{								\
 	.type = (T),							\
diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h
index aadb4b29fb66e..6fa440b5daed8 100644
--- a/security/apparmor/include/net.h
+++ b/security/apparmor/include/net.h
@@ -59,6 +59,7 @@ struct aa_sk_ctx {
 	DEFINE_AUDIT_DATA(NAME,						  \
 			  ((SK) && (F) != AF_UNIX) ? LSM_AUDIT_DATA_NET : \
 						     LSM_AUDIT_DATA_NONE, \
+						     AA_CLASS_NET,        \
 			  OP);						  \
 	NAME.u.net = &(NAME ## _net);					  \
 	aad(&NAME)->net.type = (T);					  \
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 7255a9d523728..4ecaf2ba26c54 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -98,7 +98,7 @@ static int profile_signal_perm(struct aa_profile *profile,
 int aa_may_signal(struct aa_label *sender, struct aa_label *target, int sig)
 {
 	struct aa_profile *profile;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SIGNAL);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_SIGNAL, OP_SIGNAL);
 
 	aad(&sa)->signal = map_signal_num(sig);
 	aad(&sa)->unmappedsig = sig;
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 69aeb2dbd6d6b..768cc182e9cab 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -143,7 +143,7 @@ const char *aa_splitn_fqname(const char *fqname, size_t n, const char **ns_name,
 void aa_info_message(const char *str)
 {
 	if (audit_enabled) {
-		DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, NULL);
+		DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL);
 
 		aad(&sa)->info = str;
 		aa_audit_msg(AUDIT_APPARMOR_STATUS, &sa, NULL);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index ec873ff0a4bbb..784709286a626 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -647,7 +647,8 @@ static int apparmor_setprocattr(const char *name, void *value,
 	char *command, *largs = NULL, *args = value;
 	size_t arg_size;
 	int error;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SETPROCATTR);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE,
+			  OP_SETPROCATTR);
 
 	if (size == 0)
 		return -EINVAL;
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 84aaf25e5deef..02d8215cb9fd6 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -134,7 +134,7 @@ static int audit_mount(struct aa_profile *profile, const char *op,
 		       struct aa_perms *perms, const char *info, int error)
 {
 	int audit_type = AUDIT_APPARMOR_AUTO;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_MOUNT, op);
 
 	if (likely(!error)) {
 		u32 mask = perms->audit;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index cdcf26c9bed57..6222236de0216 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -617,7 +617,7 @@ static int audit_policy(struct aa_label *label, const char *op,
 			const char *ns_name, const char *name,
 			const char *info, int error)
 {
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, op);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, op);
 
 	aad(&sa)->iface.ns = ns_name;
 	aad(&sa)->name = name;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index df39ee8f4e03d..4bf33bd0ca694 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -100,7 +100,7 @@ static int audit_iface(struct aa_profile *new, const char *ns_name,
 		       int error)
 {
 	struct aa_profile *profile = labels_profile(aa_current_raw_label());
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, NULL);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_NONE, NULL);
 	if (e)
 		aad(&sa)->iface.pos = e->pos - e->start;
 	aad(&sa)->iface.ns = ns_name;
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index 1ae4874251a96..cc018469e22d7 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -53,7 +53,8 @@ static int audit_resource(struct aa_profile *profile, unsigned int resource,
 			  unsigned long value, struct aa_label *peer,
 			  const char *info, int error)
 {
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_SETRLIMIT);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_RLIMITS,
+			  OP_SETRLIMIT);
 
 	aad(&sa)->rlim.rlim = resource;
 	aad(&sa)->rlim.max = value;
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index 503dc0877fb1a..b19900f85c148 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -285,7 +285,7 @@ int aa_may_ptrace(struct aa_label *tracer, struct aa_label *tracee,
 {
 	struct aa_profile *profile;
 	u32 xrequest = request << PTRACE_PERM_SHIFT;
-	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, OP_PTRACE);
+	DEFINE_AUDIT_DATA(sa, LSM_AUDIT_DATA_NONE, AA_CLASS_PTRACE, OP_PTRACE);
 
 	return xcheck_labels(tracer, tracee, profile,
 			profile_tracer_perm(profile, tracee, request, &sa),
-- 
GitLab


From 22fac8a051191113becc0da62bf88b0ba8ce6c08 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 17 Dec 2019 15:40:41 -0800
Subject: [PATCH 0028/4122] apparmor: add user mode flag

Allow the profile to contain a user mode prompt flag. This works similar
to complain mode but will try to send messages to a userspace daemon.
If the daemon is not present or timesout regular informent will occur.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/policy.h        | 3 +++
 security/apparmor/include/policy_unpack.h | 1 +
 security/apparmor/lib.c                   | 7 ++-----
 security/apparmor/policy.c                | 1 +
 security/apparmor/policy_unpack.c         | 2 ++
 5 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index a28a662a06221..9fc5d7fa36e86 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -44,6 +44,8 @@ extern const char *const aa_profile_mode_names[];
 
 #define COMPLAIN_MODE(_profile)	PROFILE_MODE((_profile), APPARMOR_COMPLAIN)
 
+#define USER_MODE(_profile)	PROFILE_MODE((_profile), APPARMOR_USER)
+
 #define KILL_MODE(_profile) PROFILE_MODE((_profile), APPARMOR_KILL)
 
 #define PROFILE_IS_HAT(_profile) ((_profile)->label.flags & FLAG_HAT)
@@ -67,6 +69,7 @@ enum profile_mode {
 	APPARMOR_COMPLAIN,	/* allow and log access violations */
 	APPARMOR_KILL,		/* kill task on access violation */
 	APPARMOR_UNCONFINED,	/* profile set to unconfined */
+	APPARMOR_USER,		/* modified complain mode to userspace */
 };
 
 
diff --git a/security/apparmor/include/policy_unpack.h b/security/apparmor/include/policy_unpack.h
index cdfbc8a54a9d2..1e10e360a0ec1 100644
--- a/security/apparmor/include/policy_unpack.h
+++ b/security/apparmor/include/policy_unpack.h
@@ -36,6 +36,7 @@ struct aa_load_ent *aa_load_ent_alloc(void);
 #define PACKED_MODE_COMPLAIN	1
 #define PACKED_MODE_KILL	2
 #define PACKED_MODE_UNCONFINED	3
+#define PACKED_MODE_USER	4
 
 struct aa_ns;
 
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 768cc182e9cab..b0fcec8932745 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -327,11 +327,8 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms)
 		perms->kill = ALL_PERMS_MASK;
 	else if (COMPLAIN_MODE(profile))
 		perms->complain = ALL_PERMS_MASK;
-/*
- *  TODO:
- *	else if (PROMPT_MODE(profile))
- *		perms->prompt = ALL_PERMS_MASK;
- */
+	else if (USER_MODE(profile))
+		perms->prompt = ALL_PERMS_MASK;
 }
 
 /**
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 6222236de0216..3c3a5263695de 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -94,6 +94,7 @@ const char *const aa_profile_mode_names[] = {
 	"complain",
 	"kill",
 	"unconfined",
+	"user",
 };
 
 
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 4bf33bd0ca694..04e9fca250df0 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -761,6 +761,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else if (tmp == PACKED_MODE_UNCONFINED) {
 		profile->mode = APPARMOR_UNCONFINED;
 		profile->label.flags |= FLAG_UNCONFINED;
+	} else if (tmp == PACKED_MODE_USER) {
+		profile->mode = APPARMOR_USER;
 	} else {
 		goto fail;
 	}
-- 
GitLab


From a0792e2ceddc1bff8bda34a82b5ef7f00cbe7a9f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 23 Aug 2022 01:06:15 -0700
Subject: [PATCH 0029/4122] apparmor: make transition table unpack generic so
 it can be reused

Currently the transition table is tied to the file dfa. Make it so
we can unpack a transition table against any dfa.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 04e9fca250df0..052e3b914c180 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -466,13 +466,14 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e)
 /**
  * unpack_trans_table - unpack a profile transition table
  * @e: serialized data extent information  (NOT NULL)
- * @profile: profile to add the accept table to (NOT NULL)
+ * @table: str table to unpack to (NOT NULL)
  *
- * Returns: true if table successfully unpacked
+ * Returns: true if table successfully unpacked or not present
  */
-static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
+static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 {
 	void *saved_pos = e->pos;
+	char **table;
 
 	/* exec table is optional */
 	if (unpack_nameX(e, AA_STRUCT, "xtable")) {
@@ -482,12 +483,10 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 		/* currently 2^24 bits entries 0-3 */
 		if (size > (1 << 24))
 			goto fail;
-		profile->file.trans.table = kcalloc(size, sizeof(char *),
-						    GFP_KERNEL);
-		if (!profile->file.trans.table)
+		table = kcalloc(size, sizeof(char *), GFP_KERNEL);
+		if (!table)
 			goto fail;
 
-		profile->file.trans.size = size;
 		for (i = 0; i < size; i++) {
 			char *str;
 			int c, j, pos, size2 = unpack_strdup(e, &str, NULL);
@@ -496,7 +495,7 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 			 */
 			if (!size2)
 				goto fail;
-			profile->file.trans.table[i] = str;
+			table[i] = str;
 			/* verify that name doesn't start with space */
 			if (isspace(*str))
 				goto fail;
@@ -530,11 +529,14 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_profile *profile)
 			goto fail;
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
+
+		strs->table = table;
+		strs->size = size;
 	}
 	return true;
 
 fail:
-	aa_free_str_table(&profile->file.trans);
+	kfree_sensitive(table);
 	e->pos = saved_pos;
 	return false;
 }
@@ -880,7 +882,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			info = "failed to remap file permission table";
 			goto fail;
 		}
-		if (!unpack_trans_table(e, profile)) {
+		if (!unpack_trans_table(e, &profile->file.trans)) {
 			info = "failed to unpack profile transition table";
 			goto fail;
 		}
-- 
GitLab


From ad596ea74e746d60bb7e13f3adde097a08b2089b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 18 Jul 2022 16:53:17 -0700
Subject: [PATCH 0030/4122] apparmor: group dfa policydb unpacking

There are currently three policydb rule groupings (xmatch, file,
policydb) that each do their own slightly different thing. Group them
into a single routine and unify.

This extends/unifies dfa features by
- all dfas are allowed having an optional start field
- all dfas are allowed having a string/transition table

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 101 +++++++++++++++++++-----------
 1 file changed, 63 insertions(+), 38 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 052e3b914c180..a1fe0a5e8e57d 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -648,6 +648,54 @@ fail:
 	return false;
 }
 
+static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy,
+		      bool required_dfa, bool required_trans,
+		      const char **info)
+{
+	int i;
+
+	policy->dfa = unpack_dfa(e);
+	if (IS_ERR(policy->dfa)) {
+		int error = PTR_ERR(policy->dfa);
+
+		policy->dfa = NULL;
+		*info = "failed to unpack - dfa";
+		return error;
+	} else if (!policy->dfa) {
+		if (required_dfa) {
+			*info = "missing required dfa";
+			return -EPROTO;
+		}
+		goto out;
+	}
+
+	/*
+	 * only unpack the following if a dfa is present
+	 *
+	 * sadly start was given different names for file and policydb
+	 * but since it is optional we can try both
+	 */
+	if (!unpack_u32(e, &policy->start[0], "start"))
+		/* default start state */
+		policy->start[0] = DFA_START;
+	if (!unpack_u32(e, &policy->start[AA_CLASS_FILE], "dfa_start")) {
+		/* default start state for xmatch and file dfa */
+		policy->start[AA_CLASS_FILE] = DFA_START;
+	}	/* setup class index */
+	for (i = AA_CLASS_FILE + 1; i <= AA_CLASS_LAST; i++) {
+		policy->start[i] = aa_dfa_next(policy->dfa, policy->start[0],
+					       i);
+	}
+	if (!unpack_trans_table(e, &policy->trans) && required_trans) {
+		*info = "failed to unpack profile transition table";
+		return -EPROTO;
+	}
+	/* TODO: move compat mapping here, requires dfa merging first */
+
+out:
+	return 0;
+}
+
 static u32 strhash(const void *data, u32 len, u32 seed)
 {
 	const char * const *key = data;
@@ -679,7 +727,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	struct rhashtable_params params = { 0 };
 	char *key = NULL;
 	struct aa_data *data;
-	int i, error = -EPROTO;
+	int error = -EPROTO;
 	kernel_cap_t tmpcap;
 	u32 tmp;
 
@@ -714,13 +762,10 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	(void) unpack_str(e, &profile->attach, "attach");
 
 	/* xmatch is optional and may be NULL */
-	profile->xmatch.dfa = unpack_dfa(e);
-	if (IS_ERR(profile->xmatch.dfa)) {
-		error = PTR_ERR(profile->xmatch.dfa);
-		profile->xmatch.dfa = NULL;
-		info = "bad xmatch";
+	error = unpack_pdb(e, &profile->xmatch, false, false, &info);
+	if (error)
 		goto fail;
-	}
+
 	/* neither xmatch_len not xmatch_perms are optional if xmatch is set */
 	if (profile->xmatch.dfa) {
 		if (!unpack_u32(e, &tmp, NULL)) {
@@ -838,25 +883,16 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
 		info = "failed to unpack policydb";
-		profile->policy.dfa = unpack_dfa(e);
-		if (IS_ERR(profile->policy.dfa)) {
-			error = PTR_ERR(profile->policy.dfa);
-			profile->policy.dfa = NULL;
-			goto fail;
-		} else if (!profile->policy.dfa) {
-			error = -EPROTO;
+		error = unpack_pdb(e, &profile->policy, true, false, &info);
+		if (error)
 			goto fail;
-		}
-		if (!unpack_u32(e, &profile->policy.start[0], "start"))
-			/* default start state */
-			profile->policy.start[0] = DFA_START;
-		/* setup class index */
-		for (i = AA_CLASS_FILE; i <= AA_CLASS_LAST; i++) {
-			profile->policy.start[i] =
-				aa_dfa_next(profile->policy.dfa,
-					    profile->policy.start[0],
-					    i);
-		}
+		/* Fixup: drop when we get rid of start array */
+		if (aa_dfa_next(profile->policy.dfa, profile->policy.start[0],
+				AA_CLASS_FILE))
+			profile->policy.start[AA_CLASS_FILE] =
+			  aa_dfa_next(profile->policy.dfa,
+				      profile->policy.start[0],
+				      AA_CLASS_FILE);
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
 		if (aa_compat_map_policy(&profile->policy, e->version)) {
@@ -867,25 +903,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		profile->policy.dfa = aa_get_dfa(nulldfa);
 
 	/* get file rules */
-	profile->file.dfa = unpack_dfa(e);
-	if (IS_ERR(profile->file.dfa)) {
-		error = PTR_ERR(profile->file.dfa);
-		profile->file.dfa = NULL;
-		info = "failed to unpack profile file rules";
+	error = unpack_pdb(e, &profile->file, false, true, &info);
+	if (error) {
 		goto fail;
 	} else if (profile->file.dfa) {
-		if (!unpack_u32(e, &profile->file.start[AA_CLASS_FILE],
-				"dfa_start"))
-			/* default start state */
-			profile->file.start[AA_CLASS_FILE] = DFA_START;
 		if (aa_compat_map_file(&profile->file)) {
 			info = "failed to remap file permission table";
 			goto fail;
 		}
-		if (!unpack_trans_table(e, &profile->file.trans)) {
-			info = "failed to unpack profile transition table";
-			goto fail;
-		}
 	} else if (profile->policy.dfa &&
 		   profile->policy.start[AA_CLASS_FILE]) {
 		profile->file.dfa = aa_get_dfa(profile->policy.dfa);
-- 
GitLab


From 371e50a0b19f9765bfb9e4f172e72f4e9a4625bc Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 26 Aug 2022 09:26:57 -0700
Subject: [PATCH 0031/4122] apparmor: make unpack_array return a trianary value

currently unpack_array() does not return an error nor whether the
array is not present. The ability to detect an error or the array
not being present is needed so rework the unpack_array() to return
the needed information.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c      | 43 ++++++++++++++++----------
 security/apparmor/policy_unpack_test.c | 12 +++----
 2 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index a1fe0a5e8e57d..7d3b3e664c1c1 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -67,6 +67,11 @@ struct aa_ext {
 	u32 version;
 };
 
+#define tri int
+#define TRI_TRUE 1
+#define TRI_NONE 0
+#define TRI_FALSE -1
+
 /* audit callback for unpack fields */
 static void audit_cb(struct audit_buffer *ab, void *va)
 {
@@ -344,22 +349,22 @@ fail:
 	return false;
 }
 
-static size_t unpack_array(struct aa_ext *e, const char *name)
+static tri unpack_array(struct aa_ext *e, const char *name, u16 *size)
 {
 	void *pos = e->pos;
 
 	if (unpack_nameX(e, AA_ARRAY, name)) {
-		int size;
 		if (!inbounds(e, sizeof(u16)))
 			goto fail;
-		size = (int)le16_to_cpu(get_unaligned((__le16 *) e->pos));
+		*size = le16_to_cpu(get_unaligned((__le16 *) e->pos));
 		e->pos += sizeof(u16);
-		return size;
+		return TRI_TRUE;
 	}
 
+	return TRI_NONE;
 fail:
 	e->pos = pos;
-	return 0;
+	return TRI_FALSE;
 }
 
 static size_t unpack_blob(struct aa_ext *e, char **blob, const char *name)
@@ -477,11 +482,12 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 
 	/* exec table is optional */
 	if (unpack_nameX(e, AA_STRUCT, "xtable")) {
-		int i, size;
+		u16 size;
+		int i;
 
-		size = unpack_array(e, NULL);
-		/* currently 2^24 bits entries 0-3 */
-		if (size > (1 << 24))
+		if (unpack_array(e, NULL, &size) != TRI_TRUE ||
+		    size > (1 << 24))
+		  /* currently 2^24 bits entries 0-3 */
 			goto fail;
 		table = kcalloc(size, sizeof(char *), GFP_KERNEL);
 		if (!table)
@@ -546,9 +552,11 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
 	void *pos = e->pos;
 
 	if (unpack_nameX(e, AA_STRUCT, "xattrs")) {
-		int i, size;
+		u16 size;
+		int i;
 
-		size = unpack_array(e, NULL);
+		if (unpack_array(e, NULL, &size) != TRI_TRUE)
+			goto fail;
 		profile->xattr_count = size;
 		profile->xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL);
 		if (!profile->xattrs)
@@ -573,10 +581,12 @@ fail:
 static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
 {
 	void *pos = e->pos;
-	int i, size;
+	u16 size;
+	int i;
 
 	if (unpack_nameX(e, AA_STRUCT, "secmark")) {
-		size = unpack_array(e, NULL);
+		if (unpack_array(e, NULL, &size) != TRI_TRUE)
+			goto fail;
 
 		profile->secmark = kcalloc(size, sizeof(struct aa_secmark),
 					   GFP_KERNEL);
@@ -620,14 +630,15 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 
 	/* rlimits are optional */
 	if (unpack_nameX(e, AA_STRUCT, "rlimits")) {
-		int i, size;
+		u16 size;
+		int i;
 		u32 tmp = 0;
 		if (!unpack_u32(e, &tmp, NULL))
 			goto fail;
 		profile->rlimits.mask = tmp;
 
-		size = unpack_array(e, NULL);
-		if (size > RLIM_NLIMITS)
+		if (unpack_array(e, NULL, &size) != TRI_TRUE ||
+		    size > RLIM_NLIMITS)
 			goto fail;
 		for (i = 0; i < size; i++) {
 			u64 tmp2 = 0;
diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c
index 0a969b2e03dba..1a43d538c4c04 100644
--- a/security/apparmor/policy_unpack_test.c
+++ b/security/apparmor/policy_unpack_test.c
@@ -144,8 +144,8 @@ static void policy_unpack_test_unpack_array_with_null_name(struct kunit *test)
 
 	puf->e->pos += TEST_ARRAY_BUF_OFFSET;
 
-	array_size = unpack_array(puf->e, NULL);
-
+	KUNIT_EXPECT_EQ(test, unpack_array(puf->e, NULL, &array_size),
+			TRI_TRUE);
 	KUNIT_EXPECT_EQ(test, array_size, (u16)TEST_ARRAY_SIZE);
 	KUNIT_EXPECT_PTR_EQ(test, puf->e->pos,
 		puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16) + 1);
@@ -159,8 +159,8 @@ static void policy_unpack_test_unpack_array_with_name(struct kunit *test)
 
 	puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET;
 
-	array_size = unpack_array(puf->e, name);
-
+	KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size),
+			TRI_TRUE);
 	KUNIT_EXPECT_EQ(test, array_size, (u16)TEST_ARRAY_SIZE);
 	KUNIT_EXPECT_PTR_EQ(test, puf->e->pos,
 		puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16) + 1);
@@ -175,8 +175,8 @@ static void policy_unpack_test_unpack_array_out_of_bounds(struct kunit *test)
 	puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET;
 	puf->e->end = puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16);
 
-	array_size = unpack_array(puf->e, name);
-
+	KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size),
+			TRI_TRUE);
 	KUNIT_EXPECT_EQ(test, array_size, 0);
 	KUNIT_EXPECT_PTR_EQ(test, puf->e->pos,
 		puf->e->start + TEST_NAMED_ARRAY_BUF_OFFSET);
-- 
GitLab


From fd1b2b95a21177eaa9e26989637e477be4d93b2f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 26 Aug 2022 08:53:42 -0700
Subject: [PATCH 0032/4122] apparmor: add the ability for policy to specify a
 permission table

Currently permissions are encoded in the dfa accept entries that are
then mapped to an internal permission structure. This limits the
permissions that userspace can specify, so allow userspace to directly
specify the permission table.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/policy.h |   5 +-
 security/apparmor/policy_unpack.c  | 104 ++++++++++++++++++++++++++---
 2 files changed, 98 insertions(+), 11 deletions(-)

diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 9fc5d7fa36e86..2c39bd389f878 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -81,7 +81,10 @@ enum profile_mode {
  */
 struct aa_policydb {
 	struct aa_dfa *dfa;
-	struct aa_perms *perms;
+	struct {
+		struct aa_perms *perms;
+		u32 size;
+	};
 	struct aa_str_table trans;
 	aa_state_t start[AA_CLASS_LAST + 1];
 };
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 7d3b3e664c1c1..b85dbdde89390 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -435,10 +435,11 @@ static int unpack_strdup(struct aa_ext *e, char **string, const char *name)
 /**
  * unpack_dfa - unpack a file rule dfa
  * @e: serialized data extent information (NOT NULL)
+ * @flags: dfa flags to check
  *
  * returns dfa or ERR_PTR or NULL if no dfa
  */
-static struct aa_dfa *unpack_dfa(struct aa_ext *e)
+static struct aa_dfa *unpack_dfa(struct aa_ext *e, int flags)
 {
 	char *blob = NULL;
 	size_t size;
@@ -454,8 +455,6 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e)
 		size_t sz = blob - (char *) e->start -
 			((e->pos - e->start) & 7);
 		size_t pad = ALIGN(sz, 8) - sz;
-		int flags = TO_ACCEPT1_FLAG(YYTD_DATA32) |
-			TO_ACCEPT2_FLAG(YYTD_DATA32);
 		if (aa_g_paranoid_load)
 			flags |= DFA_FLAG_VERIFY_STATES;
 		dfa = aa_dfa_unpack(blob + pad, size - pad, flags);
@@ -659,23 +658,104 @@ fail:
 	return false;
 }
 
+static bool unpack_perm(struct aa_ext *e, u32 version, struct aa_perms *perm)
+{
+	bool res;
+
+	if (version != 1)
+		return false;
+
+	res = unpack_u32(e, &perm->allow, NULL);
+	res = res && unpack_u32(e, &perm->allow, NULL);
+	res = res && unpack_u32(e, &perm->deny, NULL);
+	res = res && unpack_u32(e, &perm->subtree, NULL);
+	res = res && unpack_u32(e, &perm->cond, NULL);
+	res = res && unpack_u32(e, &perm->kill, NULL);
+	res = res && unpack_u32(e, &perm->complain, NULL);
+	res = res && unpack_u32(e, &perm->prompt, NULL);
+	res = res && unpack_u32(e, &perm->audit, NULL);
+	res = res && unpack_u32(e, &perm->quiet, NULL);
+	res = res && unpack_u32(e, &perm->hide, NULL);
+	res = res && unpack_u32(e, &perm->xindex, NULL);
+	res = res && unpack_u32(e, &perm->tag, NULL);
+	res = res && unpack_u32(e, &perm->label, NULL);
+
+	return res;
+}
+
+static ssize_t unpack_perms_table(struct aa_ext *e, struct aa_perms **perms)
+{
+	void *pos = e->pos;
+	u16 size = 0;
+
+	AA_BUG(!perms);
+	/*
+	 * policy perms are optional, in which case perms are embedded
+	 * in the dfa accept table
+	 */
+	if (unpack_nameX(e, AA_STRUCT, "perms")) {
+		int i;
+		u32 version;
+
+		if (!unpack_u32(e, &version, "version"))
+			goto fail_reset;
+		if (unpack_array(e, NULL, &size) != TRI_TRUE)
+			goto fail_reset;
+		*perms = kcalloc(size, sizeof(struct aa_perms), GFP_KERNEL);
+		if (!*perms)
+			goto fail_reset;
+		for (i = 0; i < size; i++) {
+			if (!unpack_perm(e, version, &(*perms)[i]))
+				goto fail;
+		}
+		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
+			goto fail;
+		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
+			goto fail;
+	} else
+		*perms = NULL;
+
+	return size;
+
+fail:
+	kfree(*perms);
+fail_reset:
+	e->pos = pos;
+	return -EPROTO;
+}
+
 static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy,
 		      bool required_dfa, bool required_trans,
 		      const char **info)
 {
-	int i;
+	void *pos = e->pos;
+	int i, flags, error = -EPROTO;
 
-	policy->dfa = unpack_dfa(e);
-	if (IS_ERR(policy->dfa)) {
-		int error = PTR_ERR(policy->dfa);
+	policy->size = unpack_perms_table(e, &policy->perms);
+	if (policy->size < 0) {
+		error = policy->size;
+		policy->perms = NULL;
+		*info = "failed to unpack - perms";
+		goto fail;
+	} else if (policy->perms) {
+		/* perms table present accept is index */
+		flags = TO_ACCEPT1_FLAG(YYTD_DATA32);
+	} else {
+		/* packed perms in accept1 and accept2 */
+		flags = TO_ACCEPT1_FLAG(YYTD_DATA32) |
+			TO_ACCEPT2_FLAG(YYTD_DATA32);
+	}
 
+	policy->dfa = unpack_dfa(e, flags);
+	if (IS_ERR(policy->dfa)) {
+		error = PTR_ERR(policy->dfa);
 		policy->dfa = NULL;
 		*info = "failed to unpack - dfa";
-		return error;
+		goto fail;
 	} else if (!policy->dfa) {
 		if (required_dfa) {
 			*info = "missing required dfa";
-			return -EPROTO;
+			goto fail;
 		}
 		goto out;
 	}
@@ -699,12 +779,16 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy,
 	}
 	if (!unpack_trans_table(e, &policy->trans) && required_trans) {
 		*info = "failed to unpack profile transition table";
-		return -EPROTO;
+		goto fail;
 	}
 	/* TODO: move compat mapping here, requires dfa merging first */
 
 out:
 	return 0;
+
+fail:
+	e->pos = pos;
+	return error;
 }
 
 static u32 strhash(const void *data, u32 len, u32 seed)
-- 
GitLab


From 670f31774ab6bf8e2d756f27444b035b9be8a0c9 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 26 Aug 2022 13:32:34 -0700
Subject: [PATCH 0033/4122] apparmor: verify permission table indexes

While the dfa xindex's are verified, the indexes in the permission
table are not currently verified. Fix this.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 35 ++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index b85dbdde89390..312bd632a472b 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -781,8 +781,9 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy,
 		*info = "failed to unpack profile transition table";
 		goto fail;
 	}
-	/* TODO: move compat mapping here, requires dfa merging first */
 
+	/* TODO: move compat mapping here, requires dfa merging first */
+	/* TODO: move verify here, it has to be done after compat mappings */
 out:
 	return 0;
 
@@ -1149,6 +1150,22 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size)
 	return true;
 }
 
+static bool verify_perm_indexes(struct aa_policydb *pdb)
+{
+	int i;
+
+	for (i = 0; i < pdb->size; i++) {
+		if (pdb->perms[i].xindex >= pdb->trans.size)
+			return false;
+		if (pdb->perms[i].tag >= pdb->trans.size)
+			return false;
+		if (pdb->perms[i].label >= pdb->trans.size)
+			return false;
+	}
+
+	return true;
+}
+
 /**
  * verify_profile - Do post unpack analysis to verify profile consistency
  * @profile: profile to verify (NOT NULL)
@@ -1170,6 +1187,22 @@ static int verify_profile(struct aa_profile *profile)
 		return -EPROTO;
 	}
 
+	if (!verify_perm_indexes(&profile->file)) {
+		audit_iface(profile, NULL, NULL,
+			    "Unpack: Invalid perm index", NULL, -EPROTO);
+		return -EPROTO;
+	}
+	if (!verify_perm_indexes(&profile->policy)) {
+		audit_iface(profile, NULL, NULL,
+			    "Unpack: Invalid perm index", NULL, -EPROTO);
+		return -EPROTO;
+	}
+	if (!verify_perm_indexes(&profile->xmatch)) {
+		audit_iface(profile, NULL, NULL,
+			    "Unpack: Invalid perm index", NULL, -EPROTO);
+		return -EPROTO;
+	}
+
 	return 0;
 }
 
-- 
GitLab


From 0bece4fa97a2bd397da66d4fced78f76eb214a3e Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 5 Sep 2022 23:53:29 -0700
Subject: [PATCH 0034/4122] apparmor: make sure perm indexes are accumulated

accumulate permission indexes on a first encountered basis. This
favors original rulesets so that new ones can not override without
profile replacement.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/file.h  |  4 ++--
 security/apparmor/include/perms.h |  9 +++++++++
 security/apparmor/lib.c           | 14 ++++++++++++++
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 521c8568f6d49..1a1c0f0c50710 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -88,10 +88,10 @@ static inline struct aa_label *aa_get_file_label(struct aa_file_ctx *ctx)
  * - exec type - which determines how the executable name and index are used
  * - flags - which modify how the destination name is applied
  */
-#define AA_X_INDEX_MASK		0x00ffffff
+#define AA_X_INDEX_MASK		AA_INDEX_MASK
 
 #define AA_X_TYPE_MASK		0x0c000000
-#define AA_X_NONE		0x00000000
+#define AA_X_NONE		AA_INDEX_NONE
 #define AA_X_NAME		0x04000000 /* use executable name px */
 #define AA_X_TABLE		0x08000000 /* use a specified name ->n# */
 
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index d66059fcebb45..0de8c3fb090dc 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -78,11 +78,20 @@ struct aa_perms {
 	u32 quiet;	/* set only when ~allow | deny */
 	u32 hide;	/* set only when  ~allow | deny */
 
+
 	u32 xindex;
 	u32 tag;	/* tag string index, if present */
 	u32 label;	/* label string index, if present */
 };
 
+/*
+ * Indexes are broken into a 24 bit index and 8 bit flag.
+ * For the index to be valid there must be a value in the flag
+ */
+#define AA_INDEX_MASK			0x00ffffff
+#define AA_INDEX_FLAG_MASK		0xff000000
+#define AA_INDEX_NONE			0
+
 #define ALL_PERMS_MASK 0xffffffff
 extern struct aa_perms nullperms;
 extern struct aa_perms allperms;
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index b0fcec8932745..d6a8c361025b4 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -348,6 +348,13 @@ void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend)
 	accum->hide &= addend->hide & ~addend->allow;
 	accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny;
 	accum->subtree |= addend->subtree & ~addend->deny;
+
+	if (!accum->xindex)
+		accum->xindex = addend->xindex;
+	if (!accum->tag)
+		accum->tag = addend->tag;
+	if (!accum->label)
+		accum->label = addend->label;
 }
 
 /**
@@ -367,6 +374,13 @@ void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend)
 	accum->hide &= addend->hide & ~accum->allow;
 	accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny;
 	accum->subtree &= addend->subtree & ~accum->deny;
+
+	if (!accum->xindex)
+		accum->xindex = addend->xindex;
+	if (!accum->tag)
+		accum->tag = addend->tag;
+	if (!accum->label)
+		accum->label = addend->label;
 }
 
 void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
-- 
GitLab


From 3dfd16ab697ff23973b6fbb89808372bcd008dd1 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 5 Sep 2022 23:57:51 -0700
Subject: [PATCH 0035/4122] apparmor: cleanup: move perm accumulation into
 perms.h

Perm accumulation is going to be used much more frequently so let
the compiler figure out if it can be optimized when used.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/perms.h | 53 +++++++++++++++++++++++++++++++
 security/apparmor/lib.c           | 52 ------------------------------
 2 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 0de8c3fb090dc..9fa71957ac3a0 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -96,6 +96,59 @@ struct aa_perms {
 extern struct aa_perms nullperms;
 extern struct aa_perms allperms;
 
+/**
+ * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms
+ * @accum - perms struct to accumulate into
+ * @addend - perms struct to add to @accum
+ */
+static inline void aa_perms_accum_raw(struct aa_perms *accum,
+				      struct aa_perms *addend)
+{
+	accum->deny |= addend->deny;
+	accum->allow &= addend->allow & ~addend->deny;
+	accum->audit |= addend->audit & addend->allow;
+	accum->quiet &= addend->quiet & ~addend->allow;
+	accum->kill |= addend->kill & ~addend->allow;
+	accum->complain |= addend->complain & ~addend->allow & ~addend->deny;
+	accum->cond |= addend->cond & ~addend->allow & ~addend->deny;
+	accum->hide &= addend->hide & ~addend->allow;
+	accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny;
+	accum->subtree |= addend->subtree & ~addend->deny;
+
+	if (!accum->xindex)
+		accum->xindex = addend->xindex;
+	if (!accum->tag)
+		accum->tag = addend->tag;
+	if (!accum->label)
+		accum->label = addend->label;
+}
+
+/**
+ * aa_perms_accum - accumulate perms, masking off overlapping perms
+ * @accum - perms struct to accumulate into
+ * @addend - perms struct to add to @accum
+ */
+static inline void aa_perms_accum(struct aa_perms *accum,
+				  struct aa_perms *addend)
+{
+	accum->deny |= addend->deny;
+	accum->allow &= addend->allow & ~accum->deny;
+	accum->audit |= addend->audit & accum->allow;
+	accum->quiet &= addend->quiet & ~accum->allow;
+	accum->kill |= addend->kill & ~accum->allow;
+	accum->complain |= addend->complain & ~accum->allow & ~accum->deny;
+	accum->cond |= addend->cond & ~accum->allow & ~accum->deny;
+	accum->hide &= addend->hide & ~accum->allow;
+	accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny;
+	accum->subtree &= addend->subtree & ~accum->deny;
+
+	if (!accum->xindex)
+		accum->xindex = addend->xindex;
+	if (!accum->tag)
+		accum->tag = addend->tag;
+	if (!accum->label)
+		accum->label = addend->label;
+}
 
 #define xcheck(FN1, FN2)	\
 ({				\
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index d6a8c361025b4..10e3b11e02adf 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -331,58 +331,6 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms)
 		perms->prompt = ALL_PERMS_MASK;
 }
 
-/**
- * aa_perms_accum_raw - accumulate perms with out masking off overlapping perms
- * @accum - perms struct to accumulate into
- * @addend - perms struct to add to @accum
- */
-void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend)
-{
-	accum->deny |= addend->deny;
-	accum->allow &= addend->allow & ~addend->deny;
-	accum->audit |= addend->audit & addend->allow;
-	accum->quiet &= addend->quiet & ~addend->allow;
-	accum->kill |= addend->kill & ~addend->allow;
-	accum->complain |= addend->complain & ~addend->allow & ~addend->deny;
-	accum->cond |= addend->cond & ~addend->allow & ~addend->deny;
-	accum->hide &= addend->hide & ~addend->allow;
-	accum->prompt |= addend->prompt & ~addend->allow & ~addend->deny;
-	accum->subtree |= addend->subtree & ~addend->deny;
-
-	if (!accum->xindex)
-		accum->xindex = addend->xindex;
-	if (!accum->tag)
-		accum->tag = addend->tag;
-	if (!accum->label)
-		accum->label = addend->label;
-}
-
-/**
- * aa_perms_accum - accumulate perms, masking off overlapping perms
- * @accum - perms struct to accumulate into
- * @addend - perms struct to add to @accum
- */
-void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend)
-{
-	accum->deny |= addend->deny;
-	accum->allow &= addend->allow & ~accum->deny;
-	accum->audit |= addend->audit & accum->allow;
-	accum->quiet &= addend->quiet & ~accum->allow;
-	accum->kill |= addend->kill & ~accum->allow;
-	accum->complain |= addend->complain & ~accum->allow & ~accum->deny;
-	accum->cond |= addend->cond & ~accum->allow & ~accum->deny;
-	accum->hide &= addend->hide & ~accum->allow;
-	accum->prompt |= addend->prompt & ~accum->allow & ~accum->deny;
-	accum->subtree &= addend->subtree & ~accum->deny;
-
-	if (!accum->xindex)
-		accum->xindex = addend->xindex;
-	if (!accum->tag)
-		accum->tag = addend->tag;
-	if (!accum->label)
-		accum->label = addend->label;
-}
-
 void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
 			    int type, u32 request, struct aa_perms *perms)
 {
-- 
GitLab


From 3bf3d728a58d7dcf2bbf179e3263fb8651f6097b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 6 Sep 2022 00:38:20 -0700
Subject: [PATCH 0036/4122] apparmor: verify loaded permission bits masks don't
 overlap

Add an additional verification that loaded permission sets don't
overlap in ways that are not intended. This will help ensure that
permission accumulation can't result in an invalid permission set.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 34 +++++++++++++++++++++++++++----
 1 file changed, 30 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 312bd632a472b..5a78aaa0eea49 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -1150,11 +1150,37 @@ static bool verify_dfa_xindex(struct aa_dfa *dfa, int table_size)
 	return true;
 }
 
-static bool verify_perm_indexes(struct aa_policydb *pdb)
+static bool verify_perm(struct aa_perms *perm)
+{
+	/* TODO: allow option to just force the perms into a valid state */
+	if (perm->allow & perm->deny)
+		return false;
+	if (perm->subtree & ~perm->allow)
+		return false;
+	if (perm->cond & (perm->allow | perm->deny))
+		return false;
+	if (perm->kill & perm->allow)
+		return false;
+	if (perm->complain & (perm->allow | perm->deny))
+		return false;
+	if (perm->prompt & (perm->allow | perm->deny))
+		return false;
+	if (perm->complain & perm->prompt)
+		return false;
+	if (perm->hide & perm->allow)
+		return false;
+
+	return true;
+}
+
+static bool verify_perms(struct aa_policydb *pdb)
 {
 	int i;
 
 	for (i = 0; i < pdb->size; i++) {
+		if (!verify_perm(&pdb->perms[i]))
+			return false;
+		/* verify indexes into str table */
 		if (pdb->perms[i].xindex >= pdb->trans.size)
 			return false;
 		if (pdb->perms[i].tag >= pdb->trans.size)
@@ -1187,17 +1213,17 @@ static int verify_profile(struct aa_profile *profile)
 		return -EPROTO;
 	}
 
-	if (!verify_perm_indexes(&profile->file)) {
+	if (!verify_perms(&profile->file)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
 	}
-	if (!verify_perm_indexes(&profile->policy)) {
+	if (!verify_perms(&profile->policy)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
 	}
-	if (!verify_perm_indexes(&profile->xmatch)) {
+	if (!verify_perms(&profile->xmatch)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
-- 
GitLab


From 217af7e2f4deb629aaa49622685ccfee923898ca Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Fri, 29 Jul 2022 17:17:31 -0700
Subject: [PATCH 0037/4122] apparmor: refactor profile rules and attachments

In preparation for moving from a single set of rules and a single
attachment to multiple rulesets and attachments separate from the
profile refactor attachment information and ruleset info into their
own structures.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  27 ++++---
 security/apparmor/capability.c     |  12 +--
 security/apparmor/domain.c         |  81 +++++++++++---------
 security/apparmor/file.c           |  14 ++--
 security/apparmor/include/label.h  |   9 ++-
 security/apparmor/include/perms.h  |   3 +-
 security/apparmor/include/policy.h |  84 ++++++++++++--------
 security/apparmor/ipc.c            |   9 ++-
 security/apparmor/label.c          |  45 ++++++-----
 security/apparmor/lib.c            |  13 ++--
 security/apparmor/lsm.c            |   4 +-
 security/apparmor/mount.c          |  31 ++++----
 security/apparmor/net.c            |  24 +++---
 security/apparmor/policy.c         |  44 +++++++----
 security/apparmor/policy_ns.c      |   4 +-
 security/apparmor/policy_unpack.c  | 118 +++++++++++++++--------------
 security/apparmor/resource.c       |  15 ++--
 security/apparmor/task.c           |  10 +--
 18 files changed, 308 insertions(+), 239 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index fb9d2ccb34d60..84ef8b400b401 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -611,30 +611,29 @@ static const struct file_operations aa_fs_ns_revision_fops = {
 static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 			     const char *match_str, size_t match_len)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_perms tmp = { };
-	struct aa_dfa *dfa;
 	aa_state_t state = DFA_NOMATCH;
 
 	if (profile_unconfined(profile))
 		return;
-	if (profile->file.dfa && *match_str == AA_CLASS_FILE) {
-		dfa = profile->file.dfa;
-		state = aa_dfa_match_len(dfa,
-					 profile->file.start[AA_CLASS_FILE],
+	if (rules->file.dfa && *match_str == AA_CLASS_FILE) {
+		state = aa_dfa_match_len(rules->file.dfa,
+					 rules->file.start[AA_CLASS_FILE],
 					 match_str + 1, match_len - 1);
 		if (state) {
 			struct path_cond cond = { };
 
-			tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
+			tmp = *(aa_lookup_fperms(&(rules->file), state, &cond));
 		}
-	} else if (profile->policy.dfa) {
-		if (!PROFILE_MEDIATES(profile, *match_str))
+	} else if (rules->policy.dfa) {
+		if (!RULE_MEDIATES(rules, *match_str))
 			return;	/* no change to current perms */
-		dfa = profile->policy.dfa;
-		state = aa_dfa_match_len(dfa, profile->policy.start[0],
+		state = aa_dfa_match_len(rules->policy.dfa,
+					 rules->policy.start[0],
 					 match_str, match_len);
 		if (state)
-			tmp = *aa_lookup_perms(&profile->policy, state);
+			tmp = *aa_lookup_perms(&rules->policy, state);
 	}
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum_raw(perms, &tmp);
@@ -1093,9 +1092,9 @@ static int seq_profile_attach_show(struct seq_file *seq, void *v)
 	struct aa_proxy *proxy = seq->private;
 	struct aa_label *label = aa_get_label_rcu(&proxy->label);
 	struct aa_profile *profile = labels_profile(label);
-	if (profile->attach)
-		seq_printf(seq, "%s\n", profile->attach);
-	else if (profile->xmatch.dfa)
+	if (profile->attach.xmatch_str)
+		seq_printf(seq, "%s\n", profile->attach.xmatch_str);
+	else if (profile->attach.xmatch.dfa)
 		seq_puts(seq, "<unknown>\n");
 	else
 		seq_printf(seq, "%s\n", profile->base.name);
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index 6cabd6109f12a..b66ec63e2a489 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -64,6 +64,7 @@ static void audit_cb(struct audit_buffer *ab, void *va)
 static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
 		      int cap, int error)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct audit_cache *ent;
 	int type = AUDIT_APPARMOR_AUTO;
 
@@ -72,13 +73,13 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
 	if (likely(!error)) {
 		/* test if auditing is being forced */
 		if (likely((AUDIT_MODE(profile) != AUDIT_ALL) &&
-			   !cap_raised(profile->caps.audit, cap)))
+			   !cap_raised(rules->caps.audit, cap)))
 			return 0;
 		type = AUDIT_APPARMOR_AUDIT;
 	} else if (KILL_MODE(profile) ||
-		   cap_raised(profile->caps.kill, cap)) {
+		   cap_raised(rules->caps.kill, cap)) {
 		type = AUDIT_APPARMOR_KILL;
-	} else if (cap_raised(profile->caps.quiet, cap) &&
+	} else if (cap_raised(rules->caps.quiet, cap) &&
 		   AUDIT_MODE(profile) != AUDIT_NOQUIET &&
 		   AUDIT_MODE(profile) != AUDIT_ALL) {
 		/* quiet auditing */
@@ -114,10 +115,11 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
 static int profile_capable(struct aa_profile *profile, int cap,
 			   unsigned int opts, struct common_audit_data *sa)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	int error;
 
-	if (cap_raised(profile->caps.allow, cap) &&
-	    !cap_raised(profile->caps.denied, cap))
+	if (cap_raised(rules->caps.allow, cap) &&
+	    !cap_raised(rules->caps.denied, cap))
 		error = 0;
 	else
 		error = -EPERM;
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index 4cb046cf3a14f..ad035d14cfc57 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -81,19 +81,20 @@ static inline aa_state_t match_component(struct aa_profile *profile,
 					 struct aa_profile *tp,
 					 bool stack, aa_state_t state)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	const char *ns_name;
 
 	if (stack)
-		state = aa_dfa_match(profile->file.dfa, state, "&");
+		state = aa_dfa_match(rules->file.dfa, state, "&");
 	if (profile->ns == tp->ns)
-		return aa_dfa_match(profile->file.dfa, state, tp->base.hname);
+		return aa_dfa_match(rules->file.dfa, state, tp->base.hname);
 
 	/* try matching with namespace name and then profile */
 	ns_name = aa_ns_name(profile->ns, tp->ns, true);
-	state = aa_dfa_match_len(profile->file.dfa, state, ":", 1);
-	state = aa_dfa_match(profile->file.dfa, state, ns_name);
-	state = aa_dfa_match_len(profile->file.dfa, state, ":", 1);
-	return aa_dfa_match(profile->file.dfa, state, tp->base.hname);
+	state = aa_dfa_match_len(rules->file.dfa, state, ":", 1);
+	state = aa_dfa_match(rules->file.dfa, state, ns_name);
+	state = aa_dfa_match_len(rules->file.dfa, state, ":", 1);
+	return aa_dfa_match(rules->file.dfa, state, tp->base.hname);
 }
 
 /**
@@ -117,6 +118,7 @@ static int label_compound_match(struct aa_profile *profile,
 				aa_state_t state, bool subns, u32 request,
 				struct aa_perms *perms)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_profile *tp;
 	struct label_it i;
 	struct path_cond cond = { };
@@ -139,12 +141,12 @@ next:
 	label_for_each_cont(i, label, tp) {
 		if (!aa_ns_visible(profile->ns, tp->ns, subns))
 			continue;
-		state = aa_dfa_match(profile->file.dfa, state, "//&");
+		state = aa_dfa_match(rules->file.dfa, state, "//&");
 		state = match_component(profile, tp, false, state);
 		if (!state)
 			goto fail;
 	}
-	*perms = *(aa_lookup_fperms(&(profile->file), state, &cond));
+	*perms = *(aa_lookup_fperms(&(rules->file), state, &cond));
 	aa_apply_modes_to_perms(profile, perms);
 	if ((perms->allow & request) != request)
 		return -EACCES;
@@ -177,6 +179,7 @@ static int label_components_match(struct aa_profile *profile,
 				  aa_state_t start, bool subns, u32 request,
 				  struct aa_perms *perms)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_profile *tp;
 	struct label_it i;
 	struct aa_perms tmp;
@@ -197,7 +200,7 @@ static int label_components_match(struct aa_profile *profile,
 	return 0;
 
 next:
-	tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
+	tmp = *(aa_lookup_fperms(&(rules->file), state, &cond));
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum(perms, &tmp);
 	label_for_each_cont(i, label, tp) {
@@ -206,7 +209,7 @@ next:
 		state = match_component(profile, tp, stack, start);
 		if (!state)
 			goto fail;
-		tmp = *(aa_lookup_fperms(&(profile->file), state, &cond));
+		tmp = *(aa_lookup_fperms(&(rules->file), state, &cond));
 		aa_apply_modes_to_perms(profile, &tmp);
 		aa_perms_accum(perms, &tmp);
 	}
@@ -296,18 +299,19 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 	ssize_t size;
 	struct dentry *d;
 	char *value = NULL;
-	int value_size = 0, ret = profile->xattr_count;
+	struct aa_attachment *attach = &profile->attach;
+	int value_size = 0, ret = attach->xattr_count;
 
-	if (!bprm || !profile->xattr_count)
+	if (!bprm || !attach->xattr_count)
 		return 0;
 	might_sleep();
 
 	/* transition from exec match to xattr set */
-	state = aa_dfa_outofband_transition(profile->xmatch.dfa, state);
+	state = aa_dfa_outofband_transition(attach->xmatch.dfa, state);
 	d = bprm->file->f_path.dentry;
 
-	for (i = 0; i < profile->xattr_count; i++) {
-		size = vfs_getxattr_alloc(&init_user_ns, d, profile->xattrs[i],
+	for (i = 0; i < attach->xattr_count; i++) {
+		size = vfs_getxattr_alloc(&init_user_ns, d, attach->xattrs[i],
 					  &value, value_size, GFP_KERNEL);
 		if (size >= 0) {
 			u32 index, perm;
@@ -317,20 +321,20 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
 			 * that not present xattr can be distinguished from a 0
 			 * length value or rule that matches any value
 			 */
-			state = aa_dfa_null_transition(profile->xmatch.dfa,
+			state = aa_dfa_null_transition(attach->xmatch.dfa,
 						       state);
 			/* Check xattr value */
-			state = aa_dfa_match_len(profile->xmatch.dfa, state,
+			state = aa_dfa_match_len(attach->xmatch.dfa, state,
 						 value, size);
-			index = ACCEPT_TABLE(profile->xmatch.dfa)[state];
-			perm = profile->xmatch.perms[index].allow;
+			index = ACCEPT_TABLE(attach->xmatch.dfa)[state];
+			perm = attach->xmatch.perms[index].allow;
 			if (!(perm & MAY_EXEC)) {
 				ret = -EINVAL;
 				goto out;
 			}
 		}
 		/* transition to next element */
-		state = aa_dfa_outofband_transition(profile->xmatch.dfa, state);
+		state = aa_dfa_outofband_transition(attach->xmatch.dfa, state);
 		if (size < 0) {
 			/*
 			 * No xattr match, so verify if transition to
@@ -382,6 +386,8 @@ static struct aa_label *find_attach(const struct linux_binprm *bprm,
 	rcu_read_lock();
 restart:
 	list_for_each_entry_rcu(profile, head, base.list) {
+		struct aa_attachment *attach = &profile->attach;
+
 		if (profile->label.flags & FLAG_NULL &&
 		    &profile->label == ns_unconfined(profile->ns))
 			continue;
@@ -397,16 +403,16 @@ restart:
 		 * as another profile, signal a conflict and refuse to
 		 * match.
 		 */
-		if (profile->xmatch.dfa) {
+		if (attach->xmatch.dfa) {
 			unsigned int count;
 			aa_state_t state;
 			u32 index, perm;
 
-			state = aa_dfa_leftmatch(profile->xmatch.dfa,
-					profile->xmatch.start[AA_CLASS_XMATCH],
+			state = aa_dfa_leftmatch(attach->xmatch.dfa,
+					attach->xmatch.start[AA_CLASS_XMATCH],
 					name, &count);
-			index = ACCEPT_TABLE(profile->xmatch.dfa)[state];
-			perm = profile->xmatch.perms[index].allow;
+			index = ACCEPT_TABLE(attach->xmatch.dfa)[state];
+			perm = attach->xmatch.perms[index].allow;
 			/* any accepting state means a valid match. */
 			if (perm & MAY_EXEC) {
 				int ret = 0;
@@ -414,7 +420,7 @@ restart:
 				if (count < candidate_len)
 					continue;
 
-				if (bprm && profile->xattr_count) {
+				if (bprm && attach->xattr_count) {
 					long rev = READ_ONCE(ns->revision);
 
 					if (!aa_get_profile_not0(profile))
@@ -453,7 +459,7 @@ restart:
 				 * xattrs, or a longer match
 				 */
 				candidate = profile;
-				candidate_len = max(count, profile->xmatch_len);
+				candidate_len = max(count, attach->xmatch_len);
 				candidate_xattrs = ret;
 				conflict = false;
 			}
@@ -497,6 +503,7 @@ static const char *next_name(int xtype, const char *name)
 struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
 				const char **name)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_label *label = NULL;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
 	int index = xindex & AA_X_INDEX_MASK;
@@ -507,7 +514,7 @@ struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
 	/* TODO: move lookup parsing to unpack time so this is a straight
 	 *       index into the resultant label
 	 */
-	for (*name = profile->file.trans.table[index]; !label && *name;
+	for (*name = rules->file.trans.table[index]; !label && *name;
 	     *name = next_name(xtype, *name)) {
 		if (xindex & AA_X_CHILD) {
 			struct aa_profile *new_profile;
@@ -546,6 +553,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
 				   const char **lookupname,
 				   const char **info)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_label *new = NULL;
 	struct aa_ns *ns = profile->ns;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
@@ -558,7 +566,7 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
 		break;
 	case AA_X_TABLE:
 		/* TODO: fix when perm mapping done at unload */
-		stack = profile->file.trans.table[xindex & AA_X_INDEX_MASK];
+		stack = rules->file.trans.table[xindex & AA_X_INDEX_MASK];
 		if (*stack != '&') {
 			/* released by caller */
 			new = x_table_lookup(profile, xindex, lookupname);
@@ -612,9 +620,10 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 					   char *buffer, struct path_cond *cond,
 					   bool *secure_exec)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_label *new = NULL;
 	const char *info = NULL, *name = NULL, *target = NULL;
-	aa_state_t state = profile->file.start[AA_CLASS_FILE];
+	aa_state_t state = rules->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	bool nonewprivs = false;
 	int error = 0;
@@ -648,7 +657,7 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 	}
 
 	/* find exec permissions for name */
-	state = aa_str_perms(&(profile->file), state, name, cond, &perms);
+	state = aa_str_perms(&(rules->file), state, name, cond, &perms);
 	if (perms.allow & MAY_EXEC) {
 		/* exec permission determine how to transition */
 		new = x_to_label(profile, bprm, name, perms.xindex, &target,
@@ -710,7 +719,8 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 			  char *buffer, struct path_cond *cond,
 			  bool *secure_exec)
 {
-	aa_state_t state = profile->file.start[AA_CLASS_FILE];
+	struct aa_ruleset *rules = &profile->rules;
+	aa_state_t state = rules->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	const char *xname = NULL, *info = "change_profile onexec";
 	int error = -EACCES;
@@ -743,7 +753,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 	}
 
 	/* find exec permissions for name */
-	state = aa_str_perms(&(profile->file), state, xname, cond, &perms);
+	state = aa_str_perms(&(rules->file), state, xname, cond, &perms);
 	if (!(perms.allow & AA_MAY_ONEXEC)) {
 		info = "no change_onexec valid for executable";
 		goto audit;
@@ -752,7 +762,7 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 	 * onexec permission is linked to exec with a standard pairing
 	 * exec\0change_profile
 	 */
-	state = aa_dfa_null_transition(profile->file.dfa, state);
+	state = aa_dfa_null_transition(rules->file.dfa, state);
 	error = change_profile_perms(profile, onexec, stack, AA_MAY_ONEXEC,
 				     state, &perms);
 	if (error) {
@@ -1249,12 +1259,13 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
 					struct aa_label *target, bool stack,
 					u32 request, struct aa_perms *perms)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	const char *info = NULL;
 	int error = 0;
 
 	if (!error)
 		error = change_profile_perms(profile, target, stack, request,
-					     profile->file.start[AA_CLASS_FILE],
+					     rules->file.start[AA_CLASS_FILE],
 					     perms);
 	if (error)
 		error = aa_audit_file(profile, perms, op, request, name,
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index 69d936d04f948..ef5d98f81a2b8 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -224,11 +224,12 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name,
 		   u32 request, struct path_cond *cond, int flags,
 		   struct aa_perms *perms)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	int e = 0;
 
 	if (profile_unconfined(profile))
 		return 0;
-	aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE],
+	aa_str_perms(&(rules->file), rules->file.start[AA_CLASS_FILE],
 		     name, cond, perms);
 	if (request & ~perms->allow)
 		e = -EACCES;
@@ -316,6 +317,7 @@ static int profile_path_link(struct aa_profile *profile,
 			     const struct path *target, char *buffer2,
 			     struct path_cond *cond)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	const char *lname, *tname = NULL;
 	struct aa_perms lperms = {}, perms;
 	const char *info = NULL;
@@ -336,16 +338,16 @@ static int profile_path_link(struct aa_profile *profile,
 
 	error = -EACCES;
 	/* aa_str_perms - handles the case of the dfa being NULL */
-	state = aa_str_perms(&(profile->file),
-			     profile->file.start[AA_CLASS_FILE], lname,
+	state = aa_str_perms(&(rules->file),
+			     rules->file.start[AA_CLASS_FILE], lname,
 			     cond, &lperms);
 
 	if (!(lperms.allow & AA_MAY_LINK))
 		goto audit;
 
 	/* test to see if target can be paired with link */
-	state = aa_dfa_null_transition(profile->file.dfa, state);
-	aa_str_perms(&(profile->file), state, tname, cond, &perms);
+	state = aa_dfa_null_transition(rules->file.dfa, state);
+	aa_str_perms(&(rules->file), state, tname, cond, &perms);
 
 	/* force audit/quiet masks for link are stored in the second entry
 	 * in the link pair.
@@ -367,7 +369,7 @@ static int profile_path_link(struct aa_profile *profile,
 	/* Do link perm subset test requiring allowed permission on link are
 	 * a subset of the allowed permissions on target.
 	 */
-	aa_str_perms(&(profile->file), profile->file.start[AA_CLASS_FILE],
+	aa_str_perms(&(rules->file), rules->file.start[AA_CLASS_FILE],
 		     tname, cond, &perms);
 
 	/* AA_MAY_LINK is not considered in the subset test */
diff --git a/security/apparmor/include/label.h b/security/apparmor/include/label.h
index 1130ba10a1526..2a72e6b17d68b 100644
--- a/security/apparmor/include/label.h
+++ b/security/apparmor/include/label.h
@@ -261,7 +261,7 @@ for ((I).i = (I).j = 0;							\
 	struct label_it i;						\
 	int ret = 0;							\
 	label_for_each(i, (L), profile) {				\
-		if (PROFILE_MEDIATES(profile, (C))) {			\
+		if (RULE_MEDIATES(&profile->rules, (C))) {		\
 			ret = 1;					\
 			break;						\
 		}							\
@@ -357,9 +357,10 @@ static inline const char *aa_label_str_split(const char *str)
 
 
 struct aa_perms;
-int aa_label_match(struct aa_profile *profile, struct aa_label *label,
-		   aa_state_t state, bool subns, u32 request,
-		   struct aa_perms *perms);
+struct aa_ruleset;
+int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules,
+		   struct aa_label *label, aa_state_t state, bool subns,
+		   u32 request, struct aa_perms *perms);
 
 
 /**
diff --git a/security/apparmor/include/perms.h b/security/apparmor/include/perms.h
index 9fa71957ac3a0..797a7a00644d2 100644
--- a/security/apparmor/include/perms.h
+++ b/security/apparmor/include/perms.h
@@ -207,7 +207,8 @@ void aa_apply_modes_to_perms(struct aa_profile *profile,
 			     struct aa_perms *perms);
 void aa_perms_accum(struct aa_perms *accum, struct aa_perms *addend);
 void aa_perms_accum_raw(struct aa_perms *accum, struct aa_perms *addend);
-void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
+void aa_profile_match_label(struct aa_profile *profile,
+			    struct aa_ruleset *rules, struct aa_label *label,
 			    int type, u32 request, struct aa_perms *perms);
 int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target,
 			  u32 request, int type, u32 *deny,
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 2c39bd389f878..9ee2c05e28951 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -123,6 +123,43 @@ struct aa_data {
 	struct rhash_head head;
 };
 
+/* struct aa_ruleset - data covering mediation rules
+ * @size: the memory consumed by this ruleset
+ * @policy: general match rules governing policy
+ * @file: The set of rules governing basic file access and domain transitions
+ * @caps: capabilities for the profile
+ * @rlimits: rlimits for the profile
+ * @secmark_count: number of secmark entries
+ * @secmark: secmark label match info
+ */
+struct aa_ruleset {
+	int size;
+
+	/* TODO: merge policy and file */
+	struct aa_policydb policy;
+	struct aa_policydb file;
+	struct aa_caps caps;
+
+	struct aa_rlimit rlimits;
+
+	int secmark_count;
+	struct aa_secmark *secmark;
+};
+
+/* struct aa_attachment - data and rules for a profiles attachment
+ * @xmatch_str: human readable attachment string
+ * @xmatch: optional extended matching for unconfined executables names
+ * @xmatch_len: xmatch prefix len, used to determine xmatch priority
+ * @xattr_count: number of xattrs in table
+ * @xattrs: table of xattrs
+ */
+struct aa_attachment {
+	const char *xmatch_str;
+	struct aa_policydb xmatch;
+	unsigned int xmatch_len;
+	int xattr_count;
+	char **xattrs;
+};
 
 /* struct aa_profile - basic confinement data
  * @base - base components of the profile (name, refcount, lists, lock ...)
@@ -130,18 +167,13 @@ struct aa_data {
  * @parent: parent of profile
  * @ns: namespace the profile is in
  * @rename: optional profile name that this profile renamed
- * @attach: human readable attachment string
- * @xmatch: optional extended matching for unconfined executables names
- * @xmatch_len: xmatch prefix len, used to determine xmatch priority
+ *
  * @audit: the auditing mode of the profile
  * @mode: the enforcement mode of the profile
  * @path_flags: flags controlling path generation behavior
  * @disconnected: what to prepend if attach_disconnected is specified
- * @size: the memory consumed by this profiles rules
- * @policy: general match rules governing policy
- * @file: The set of rules governing basic file access and domain transitions
- * @caps: capabilities for the profile
- * @rlimits: rlimits for the profile
+ * @attach: attachment rules for the profile
+ * @rules: rules to be enforced
  *
  * @dents: dentries for the profiles file entries in apparmorfs
  * @dirname: name of the profile dir in apparmorfs
@@ -166,27 +198,13 @@ struct aa_profile {
 	struct aa_ns *ns;
 	const char *rename;
 
-	const char *attach;
-	struct aa_policydb xmatch;
-	unsigned int xmatch_len;
-
 	enum audit_mode audit;
 	long mode;
 	u32 path_flags;
 	const char *disconnected;
-	int size;
 
-	struct aa_policydb policy;
-	struct aa_policydb file;
-	struct aa_caps caps;
-
-	int xattr_count;
-	char **xattrs;
-
-	struct aa_rlimit rlimits;
-
-	int secmark_count;
-	struct aa_secmark *secmark;
+	struct aa_attachment attach;
+	struct aa_ruleset rules;
 
 	struct aa_loaddata *rawdata;
 	unsigned char *hash;
@@ -247,24 +265,24 @@ static inline struct aa_profile *aa_get_newest_profile(struct aa_profile *p)
 	return labels_profile(aa_get_newest_label(&p->label));
 }
 
-static inline aa_state_t PROFILE_MEDIATES(struct aa_profile *profile,
-					    unsigned char class)
+static inline aa_state_t RULE_MEDIATES(struct aa_ruleset *rules,
+				       unsigned char class)
 {
 	if (class <= AA_CLASS_LAST)
-		return profile->policy.start[class];
+		return rules->policy.start[class];
 	else
-		return aa_dfa_match_len(profile->policy.dfa,
-					profile->policy.start[0], &class, 1);
+		return aa_dfa_match_len(rules->policy.dfa,
+					rules->policy.start[0], &class, 1);
 }
 
-static inline aa_state_t PROFILE_MEDIATES_AF(struct aa_profile *profile,
-					     u16 AF) {
-	aa_state_t state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF)
+{
+	aa_state_t state = RULE_MEDIATES(rules, AA_CLASS_NET);
 	__be16 be_af = cpu_to_be16(AF);
 
 	if (!state)
 		return DFA_NOMATCH;
-	return aa_dfa_match_len(profile->policy.dfa, state, (char *) &be_af, 2);
+	return aa_dfa_match_len(rules->policy.dfa, state, (char *) &be_af, 2);
 }
 
 /**
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 4ecaf2ba26c54..dc2fa548312db 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -78,19 +78,20 @@ static int profile_signal_perm(struct aa_profile *profile,
 			       struct aa_label *peer, u32 request,
 			       struct common_audit_data *sa)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_perms perms;
 	aa_state_t state;
 
 	if (profile_unconfined(profile) ||
-	    !PROFILE_MEDIATES(profile, AA_CLASS_SIGNAL))
+	    !RULE_MEDIATES(rules, AA_CLASS_SIGNAL))
 		return 0;
 
 	aad(sa)->peer = peer;
 	/* TODO: secondary cache check <profile, profile, perm> */
-	state = aa_dfa_next(profile->policy.dfa,
-			    profile->policy.start[AA_CLASS_SIGNAL],
+	state = aa_dfa_next(rules->policy.dfa,
+			    rules->policy.start[AA_CLASS_SIGNAL],
 			    aad(sa)->signal);
-	aa_label_match(profile, peer, state, false, request, &perms);
+	aa_label_match(profile, rules, peer, state, false, request, &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	return aa_check_perms(profile, &perms, request, sa, audit_signal_cb);
 }
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 3a967003fa7c7..98dadd9609772 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1266,20 +1266,21 @@ static inline bool label_is_visible(struct aa_profile *profile,
  * visibility test.
  */
 static inline aa_state_t match_component(struct aa_profile *profile,
+					 struct aa_ruleset *rules,
 					 struct aa_profile *tp,
 					 aa_state_t state)
 {
 	const char *ns_name;
 
 	if (profile->ns == tp->ns)
-		return aa_dfa_match(profile->policy.dfa, state, tp->base.hname);
+		return aa_dfa_match(rules->policy.dfa, state, tp->base.hname);
 
 	/* try matching with namespace name and then profile */
 	ns_name = aa_ns_name(profile->ns, tp->ns, true);
-	state = aa_dfa_match_len(profile->policy.dfa, state, ":", 1);
-	state = aa_dfa_match(profile->policy.dfa, state, ns_name);
-	state = aa_dfa_match_len(profile->policy.dfa, state, ":", 1);
-	return aa_dfa_match(profile->policy.dfa, state, tp->base.hname);
+	state = aa_dfa_match_len(rules->policy.dfa, state, ":", 1);
+	state = aa_dfa_match(rules->policy.dfa, state, ns_name);
+	state = aa_dfa_match_len(rules->policy.dfa, state, ":", 1);
+	return aa_dfa_match(rules->policy.dfa, state, tp->base.hname);
 }
 
 /**
@@ -1298,6 +1299,7 @@ static inline aa_state_t match_component(struct aa_profile *profile,
  *        check to be stacked.
  */
 static int label_compound_match(struct aa_profile *profile,
+				struct aa_ruleset *rules,
 				struct aa_label *label,
 				aa_state_t state, bool subns, u32 request,
 				struct aa_perms *perms)
@@ -1309,7 +1311,7 @@ static int label_compound_match(struct aa_profile *profile,
 	label_for_each(i, label, tp) {
 		if (!aa_ns_visible(profile->ns, tp->ns, subns))
 			continue;
-		state = match_component(profile, tp, state);
+		state = match_component(profile, rules, tp, state);
 		if (!state)
 			goto fail;
 		goto next;
@@ -1323,12 +1325,12 @@ next:
 	label_for_each_cont(i, label, tp) {
 		if (!aa_ns_visible(profile->ns, tp->ns, subns))
 			continue;
-		state = aa_dfa_match(profile->policy.dfa, state, "//&");
-		state = match_component(profile, tp, state);
+		state = aa_dfa_match(rules->policy.dfa, state, "//&");
+		state = match_component(profile, rules, tp, state);
 		if (!state)
 			goto fail;
 	}
-	*perms = *aa_lookup_perms(&profile->policy, state);
+	*perms = *aa_lookup_perms(&rules->policy, state);
 	aa_apply_modes_to_perms(profile, perms);
 	if ((perms->allow & request) != request)
 		return -EACCES;
@@ -1343,6 +1345,7 @@ fail:
 /**
  * label_components_match - find perms for all subcomponents of a label
  * @profile: profile to find perms for
+ * @rules: ruleset to search
  * @label: label to check access permissions for
  * @start: state to start match in
  * @subns: whether to do permission checks on components in a subns
@@ -1356,6 +1359,7 @@ fail:
  *        check to be stacked.
  */
 static int label_components_match(struct aa_profile *profile,
+				  struct aa_ruleset *rules,
 				  struct aa_label *label, aa_state_t start,
 				  bool subns, u32 request,
 				  struct aa_perms *perms)
@@ -1369,7 +1373,7 @@ static int label_components_match(struct aa_profile *profile,
 	label_for_each(i, label, tp) {
 		if (!aa_ns_visible(profile->ns, tp->ns, subns))
 			continue;
-		state = match_component(profile, tp, start);
+		state = match_component(profile, rules, tp, start);
 		if (!state)
 			goto fail;
 		goto next;
@@ -1379,16 +1383,16 @@ static int label_components_match(struct aa_profile *profile,
 	return 0;
 
 next:
-	tmp = *aa_lookup_perms(&profile->policy, state);
+	tmp = *aa_lookup_perms(&rules->policy, state);
 	aa_apply_modes_to_perms(profile, &tmp);
 	aa_perms_accum(perms, &tmp);
 	label_for_each_cont(i, label, tp) {
 		if (!aa_ns_visible(profile->ns, tp->ns, subns))
 			continue;
-		state = match_component(profile, tp, start);
+		state = match_component(profile, rules, tp, start);
 		if (!state)
 			goto fail;
-		tmp = *aa_lookup_perms(&profile->policy, state);
+		tmp = *aa_lookup_perms(&rules->policy, state);
 		aa_apply_modes_to_perms(profile, &tmp);
 		aa_perms_accum(perms, &tmp);
 	}
@@ -1406,6 +1410,7 @@ fail:
 /**
  * aa_label_match - do a multi-component label match
  * @profile: profile to match against (NOT NULL)
+ * @rules: ruleset to search
  * @label: label to match (NOT NULL)
  * @state: state to start in
  * @subns: whether to match subns components
@@ -1414,18 +1419,18 @@ fail:
  *
  * Returns: the state the match finished in, may be the none matching state
  */
-int aa_label_match(struct aa_profile *profile, struct aa_label *label,
-		   aa_state_t state, bool subns, u32 request,
-		   struct aa_perms *perms)
+int aa_label_match(struct aa_profile *profile, struct aa_ruleset *rules,
+		   struct aa_label *label, aa_state_t state, bool subns,
+		   u32 request, struct aa_perms *perms)
 {
-	int error = label_compound_match(profile, label, state, subns, request,
-					 perms);
+	int error = label_compound_match(profile, rules, label, state, subns,
+					 request, perms);
 	if (!error)
 		return error;
 
 	*perms = allperms;
-	return label_components_match(profile, label, state, subns, request,
-				      perms);
+	return label_components_match(profile, rules, label, state, subns,
+				      request, perms);
 }
 
 
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index 10e3b11e02adf..ec73e51ca7e3b 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -331,16 +331,18 @@ void aa_apply_modes_to_perms(struct aa_profile *profile, struct aa_perms *perms)
 		perms->prompt = ALL_PERMS_MASK;
 }
 
-void aa_profile_match_label(struct aa_profile *profile, struct aa_label *label,
+void aa_profile_match_label(struct aa_profile *profile,
+			    struct aa_ruleset *rules,
+			    struct aa_label *label,
 			    int type, u32 request, struct aa_perms *perms)
 {
 	/* TODO: doesn't yet handle extended types */
 	aa_state_t state;
 
-	state = aa_dfa_next(profile->policy.dfa,
-			    profile->policy.start[AA_CLASS_LABEL],
+	state = aa_dfa_next(rules->policy.dfa,
+			    rules->policy.start[AA_CLASS_LABEL],
 			    type);
-	aa_label_match(profile, label, state, false, request, perms);
+	aa_label_match(profile, rules, label, state, false, request, perms);
 }
 
 
@@ -355,7 +357,8 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target,
 	aad(sa)->peer = &target->label;
 	aad(sa)->request = request;
 
-	aa_profile_match_label(profile, &target->label, type, request, &perms);
+	aa_profile_match_label(profile, &profile->rules, &target->label, type,
+			       request, &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	*deny |= request & perms.deny;
 	return aa_check_perms(profile, &perms, request, sa, aa_audit_perms_cb);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 784709286a626..62f2ca32b9596 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -166,9 +166,9 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
 			if (COMPLAIN_MODE(profile))
 				continue;
 			*effective = cap_intersect(*effective,
-						   profile->caps.allow);
+						   profile->rules.caps.allow);
 			*permitted = cap_intersect(*permitted,
-						   profile->caps.allow);
+						   profile->rules.caps.allow);
 		}
 	}
 	rcu_read_unlock();
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index 02d8215cb9fd6..d4724bdcb07f1 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -303,13 +303,14 @@ static int match_mnt_path_str(struct aa_profile *profile,
 {
 	struct aa_perms perms = { };
 	const char *mntpnt = NULL, *info = NULL;
+	struct aa_ruleset *rules = &profile->rules;
 	int pos, error;
 
 	AA_BUG(!profile);
 	AA_BUG(!mntpath);
 	AA_BUG(!buffer);
 
-	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+	if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT))
 		return 0;
 
 	error = aa_path_name(mntpath, path_flags(profile, mntpath), buffer,
@@ -324,8 +325,8 @@ static int match_mnt_path_str(struct aa_profile *profile,
 	}
 
 	error = -EACCES;
-	pos = do_match_mnt(&profile->policy,
-			   profile->policy.start[AA_CLASS_MOUNT],
+	pos = do_match_mnt(&rules->policy,
+			   rules->policy.start[AA_CLASS_MOUNT],
 			   mntpnt, devname, type, flags, data, binary, &perms);
 	if (pos) {
 		info = mnt_info_table[pos];
@@ -363,7 +364,7 @@ static int match_mnt(struct aa_profile *profile, const struct path *path,
 	AA_BUG(!profile);
 	AA_BUG(devpath && !devbuffer);
 
-	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+	if (!RULE_MEDIATES(&profile->rules, AA_CLASS_MOUNT))
 		return 0;
 
 	if (devpath) {
@@ -565,6 +566,7 @@ out:
 static int profile_umount(struct aa_profile *profile, const struct path *path,
 			  char *buffer)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_perms perms = { };
 	const char *name = NULL, *info = NULL;
 	aa_state_t state;
@@ -573,7 +575,7 @@ static int profile_umount(struct aa_profile *profile, const struct path *path,
 	AA_BUG(!profile);
 	AA_BUG(!path);
 
-	if (!PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+	if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT))
 		return 0;
 
 	error = aa_path_name(path, path_flags(profile, path), buffer, &name,
@@ -581,10 +583,10 @@ static int profile_umount(struct aa_profile *profile, const struct path *path,
 	if (error)
 		goto audit;
 
-	state = aa_dfa_match(profile->policy.dfa,
-			     profile->policy.start[AA_CLASS_MOUNT],
+	state = aa_dfa_match(rules->policy.dfa,
+			     rules->policy.start[AA_CLASS_MOUNT],
 			     name);
-	perms = *aa_lookup_perms(&profile->policy, state);
+	perms = *aa_lookup_perms(&rules->policy, state);
 	if (AA_MAY_UMOUNT & ~perms.allow)
 		error = -EACCES;
 
@@ -624,6 +626,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 					const struct path *old_path,
 					char *old_buffer)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	const char *old_name, *new_name = NULL, *info = NULL;
 	const char *trans_name = NULL;
 	struct aa_perms perms = { };
@@ -635,7 +638,7 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 	AA_BUG(!old_path);
 
 	if (profile_unconfined(profile) ||
-	    !PROFILE_MEDIATES(profile, AA_CLASS_MOUNT))
+	    !RULE_MEDIATES(rules, AA_CLASS_MOUNT))
 		return aa_get_newest_label(&profile->label);
 
 	error = aa_path_name(old_path, path_flags(profile, old_path),
@@ -650,12 +653,12 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 		goto audit;
 
 	error = -EACCES;
-	state = aa_dfa_match(profile->policy.dfa,
-			     profile->policy.start[AA_CLASS_MOUNT],
+	state = aa_dfa_match(rules->policy.dfa,
+			     rules->policy.start[AA_CLASS_MOUNT],
 			     new_name);
-	state = aa_dfa_null_transition(profile->policy.dfa, state);
-	state = aa_dfa_match(profile->policy.dfa, state, old_name);
-	perms = *aa_lookup_perms(&profile->policy, state);
+	state = aa_dfa_null_transition(rules->policy.dfa, state);
+	state = aa_dfa_match(rules->policy.dfa, state, old_name);
+	perms = *aa_lookup_perms(&rules->policy, state);
 
 	if (AA_MAY_PIVOTROOT & perms.allow)
 		error = 0;
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index d420d3aec3b85..ae789ee834ada 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -108,6 +108,7 @@ void audit_net_cb(struct audit_buffer *ab, void *va)
 int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 		       u32 request, u16 family, int type)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	struct aa_perms perms = { };
 	aa_state_t state;
 	__be16 buffer[2];
@@ -117,15 +118,15 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 
 	if (profile_unconfined(profile))
 		return 0;
-	state = PROFILE_MEDIATES(profile, AA_CLASS_NET);
+	state = RULE_MEDIATES(rules, AA_CLASS_NET);
 	if (!state)
 		return 0;
 
 	buffer[0] = cpu_to_be16(family);
 	buffer[1] = cpu_to_be16((u16) type);
-	state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer,
+	state = aa_dfa_match_len(rules->policy.dfa, state, (char *) &buffer,
 				 4);
-	perms = *aa_lookup_perms(&profile->policy, state);
+	perms = *aa_lookup_perms(&rules->policy, state);
 	aa_apply_modes_to_perms(profile, &perms);
 
 	return aa_check_perms(profile, &perms, request, sa, audit_net_cb);
@@ -216,25 +217,26 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
 {
 	int i, ret;
 	struct aa_perms perms = { };
+	struct aa_ruleset *rules = &profile->rules;
 
-	if (profile->secmark_count == 0)
+	if (rules->secmark_count == 0)
 		return 0;
 
-	for (i = 0; i < profile->secmark_count; i++) {
-		if (!profile->secmark[i].secid) {
-			ret = apparmor_secmark_init(&profile->secmark[i]);
+	for (i = 0; i < rules->secmark_count; i++) {
+		if (!rules->secmark[i].secid) {
+			ret = apparmor_secmark_init(&rules->secmark[i]);
 			if (ret)
 				return ret;
 		}
 
-		if (profile->secmark[i].secid == secid ||
-		    profile->secmark[i].secid == AA_SECID_WILDCARD) {
-			if (profile->secmark[i].deny)
+		if (rules->secmark[i].secid == secid ||
+		    rules->secmark[i].secid == AA_SECID_WILDCARD) {
+			if (rules->secmark[i].deny)
 				perms.deny = ALL_PERMS_MASK;
 			else
 				perms.allow = ALL_PERMS_MASK;
 
-			if (profile->secmark[i].audit)
+			if (rules->secmark[i].audit)
 				perms.audit = ALL_PERMS_MASK;
 		}
 	}
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 3c3a5263695de..74c0a3b34e9ba 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -193,6 +193,30 @@ static void aa_free_data(void *ptr, void *arg)
 	kfree_sensitive(data);
 }
 
+static void free_attachment(struct aa_attachment *attach)
+{
+	int i;
+
+	for (i = 0; i < attach->xattr_count; i++)
+		kfree_sensitive(attach->xattrs[i]);
+	kfree_sensitive(attach->xattrs);
+	aa_destroy_policydb(&attach->xmatch);
+}
+
+static void free_ruleset(struct aa_ruleset *rules)
+{
+	int i;
+
+	aa_destroy_policydb(&rules->file);
+	aa_destroy_policydb(&rules->policy);
+	aa_free_cap_rules(&rules->caps);
+	aa_free_rlimit_rules(&rules->rlimits);
+
+	for (i = 0; i < rules->secmark_count; i++)
+		kfree_sensitive(rules->secmark[i].label);
+	kfree_sensitive(rules->secmark);
+}
+
 /**
  * aa_free_profile - free a profile
  * @profile: the profile to free  (MAYBE NULL)
@@ -206,7 +230,6 @@ static void aa_free_data(void *ptr, void *arg)
 void aa_free_profile(struct aa_profile *profile)
 {
 	struct rhashtable *rht;
-	int i;
 
 	AA_DEBUG("%s(%p)\n", __func__, profile);
 
@@ -220,19 +243,10 @@ void aa_free_profile(struct aa_profile *profile)
 	aa_put_ns(profile->ns);
 	kfree_sensitive(profile->rename);
 
-	aa_destroy_policydb(&profile->file);
-	aa_free_cap_rules(&profile->caps);
-	aa_free_rlimit_rules(&profile->rlimits);
-
-	for (i = 0; i < profile->xattr_count; i++)
-		kfree_sensitive(profile->xattrs[i]);
-	kfree_sensitive(profile->xattrs);
-	for (i = 0; i < profile->secmark_count; i++)
-		kfree_sensitive(profile->secmark[i].label);
-	kfree_sensitive(profile->secmark);
+	free_attachment(&profile->attach);
+	free_ruleset(&profile->rules);
 	kfree_sensitive(profile->dirname);
-	aa_destroy_policydb(&profile->xmatch);
-	aa_destroy_policydb(&profile->policy);
+
 	if (profile->data) {
 		rht = profile->data;
 		profile->data = NULL;
@@ -544,8 +558,8 @@ name:
 	/* released on free_profile */
 	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
 	profile->ns = aa_get_ns(parent->ns);
-	profile->file.dfa = aa_get_dfa(nulldfa);
-	profile->policy.dfa = aa_get_dfa(nulldfa);
+	profile->rules.file.dfa = aa_get_dfa(nulldfa);
+	profile->rules.policy.dfa = aa_get_dfa(nulldfa);
 
 	mutex_lock_nested(&profile->ns->lock, profile->ns->level);
 	p = __find_child(&parent->base.profiles, bname);
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 43beaad083feb..cb10994cd3b6b 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -91,8 +91,8 @@ static struct aa_profile *alloc_unconfined(const char *name)
 	profile->label.flags |= FLAG_IX_ON_NAME_ERROR |
 		FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
 	profile->mode = APPARMOR_UNCONFINED;
-	profile->file.dfa = aa_get_dfa(nulldfa);
-	profile->policy.dfa = aa_get_dfa(nulldfa);
+	profile->rules.file.dfa = aa_get_dfa(nulldfa);
+	profile->rules.policy.dfa = aa_get_dfa(nulldfa);
 
 	return profile;
 }
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 5a78aaa0eea49..bbca7772dfa2b 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -556,12 +556,12 @@ static bool unpack_xattrs(struct aa_ext *e, struct aa_profile *profile)
 
 		if (unpack_array(e, NULL, &size) != TRI_TRUE)
 			goto fail;
-		profile->xattr_count = size;
-		profile->xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL);
-		if (!profile->xattrs)
+		profile->attach.xattr_count = size;
+		profile->attach.xattrs = kcalloc(size, sizeof(char *), GFP_KERNEL);
+		if (!profile->attach.xattrs)
 			goto fail;
 		for (i = 0; i < size; i++) {
-			if (!unpack_strdup(e, &profile->xattrs[i], NULL))
+			if (!unpack_strdup(e, &profile->attach.xattrs[i], NULL))
 				goto fail;
 		}
 		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
@@ -579,6 +579,7 @@ fail:
 
 static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	void *pos = e->pos;
 	u16 size;
 	int i;
@@ -587,19 +588,19 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
 		if (unpack_array(e, NULL, &size) != TRI_TRUE)
 			goto fail;
 
-		profile->secmark = kcalloc(size, sizeof(struct aa_secmark),
+		rules->secmark = kcalloc(size, sizeof(struct aa_secmark),
 					   GFP_KERNEL);
-		if (!profile->secmark)
+		if (!rules->secmark)
 			goto fail;
 
-		profile->secmark_count = size;
+		rules->secmark_count = size;
 
 		for (i = 0; i < size; i++) {
-			if (!unpack_u8(e, &profile->secmark[i].audit, NULL))
+			if (!unpack_u8(e, &rules->secmark[i].audit, NULL))
 				goto fail;
-			if (!unpack_u8(e, &profile->secmark[i].deny, NULL))
+			if (!unpack_u8(e, &rules->secmark[i].deny, NULL))
 				goto fail;
-			if (!unpack_strdup(e, &profile->secmark[i].label, NULL))
+			if (!unpack_strdup(e, &rules->secmark[i].label, NULL))
 				goto fail;
 		}
 		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
@@ -611,12 +612,12 @@ static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
 	return true;
 
 fail:
-	if (profile->secmark) {
+	if (rules->secmark) {
 		for (i = 0; i < size; i++)
-			kfree(profile->secmark[i].label);
-		kfree(profile->secmark);
-		profile->secmark_count = 0;
-		profile->secmark = NULL;
+			kfree(rules->secmark[i].label);
+		kfree(rules->secmark);
+		rules->secmark_count = 0;
+		rules->secmark = NULL;
 	}
 
 	e->pos = pos;
@@ -634,7 +635,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 		u32 tmp = 0;
 		if (!unpack_u32(e, &tmp, NULL))
 			goto fail;
-		profile->rlimits.mask = tmp;
+		profile->rules.rlimits.mask = tmp;
 
 		if (unpack_array(e, NULL, &size) != TRI_TRUE ||
 		    size > RLIM_NLIMITS)
@@ -644,7 +645,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 			int a = aa_map_resource(i);
 			if (!unpack_u64(e, &tmp2, NULL))
 				goto fail;
-			profile->rlimits.limits[a].rlim_max = tmp2;
+			profile->rules.rlimits.limits[a].rlim_max = tmp2;
 		}
 		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
 			goto fail;
@@ -816,6 +817,7 @@ static int datacmp(struct rhashtable_compare_arg *arg, const void *obj)
  */
 static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 {
+	struct aa_ruleset *rules;
 	struct aa_profile *profile = NULL;
 	const char *tmpname, *tmpns = NULL, *name = NULL;
 	const char *info = "failed to unpack profile";
@@ -850,27 +852,30 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	profile = aa_alloc_profile(name, NULL, GFP_KERNEL);
 	if (!profile)
 		return ERR_PTR(-ENOMEM);
+	rules = &profile->rules;
 
 	/* profile renaming is optional */
 	(void) unpack_str(e, &profile->rename, "rename");
 
 	/* attachment string is optional */
-	(void) unpack_str(e, &profile->attach, "attach");
+	(void) unpack_str(e, &profile->attach.xmatch_str, "attach");
 
 	/* xmatch is optional and may be NULL */
-	error = unpack_pdb(e, &profile->xmatch, false, false, &info);
-	if (error)
+	error = unpack_pdb(e, &profile->attach.xmatch, false, false, &info);
+	if (error) {
+		info = "bad xmatch";
 		goto fail;
+	}
 
 	/* neither xmatch_len not xmatch_perms are optional if xmatch is set */
-	if (profile->xmatch.dfa) {
+	if (profile->attach.xmatch.dfa) {
 		if (!unpack_u32(e, &tmp, NULL)) {
 			info = "missing xmatch len";
 			goto fail;
 		}
-		profile->xmatch_len = tmp;
-		profile->xmatch.start[AA_CLASS_XMATCH] = DFA_START;
-		if (aa_compat_map_xmatch(&profile->xmatch)) {
+		profile->attach.xmatch_len = tmp;
+		profile->attach.xmatch.start[AA_CLASS_XMATCH] = DFA_START;
+		if (aa_compat_map_xmatch(&profile->attach.xmatch)) {
 			info = "failed to convert xmatch permission table";
 			goto fail;
 		}
@@ -926,11 +931,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		profile->path_flags = PATH_MEDIATE_DELETED;
 
 	info = "failed to unpack profile capabilities";
-	if (!unpack_u32(e, &(profile->caps.allow.cap[0]), NULL))
+	if (!unpack_u32(e, &(rules->caps.allow.cap[0]), NULL))
 		goto fail;
-	if (!unpack_u32(e, &(profile->caps.audit.cap[0]), NULL))
+	if (!unpack_u32(e, &(rules->caps.audit.cap[0]), NULL))
 		goto fail;
-	if (!unpack_u32(e, &(profile->caps.quiet.cap[0]), NULL))
+	if (!unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL))
 		goto fail;
 	if (!unpack_u32(e, &tmpcap.cap[0], NULL))
 		goto fail;
@@ -938,11 +943,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	info = "failed to unpack upper profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "caps64")) {
 		/* optional upper half of 64 bit caps */
-		if (!unpack_u32(e, &(profile->caps.allow.cap[1]), NULL))
+		if (!unpack_u32(e, &(rules->caps.allow.cap[1]), NULL))
 			goto fail;
-		if (!unpack_u32(e, &(profile->caps.audit.cap[1]), NULL))
+		if (!unpack_u32(e, &(rules->caps.audit.cap[1]), NULL))
 			goto fail;
-		if (!unpack_u32(e, &(profile->caps.quiet.cap[1]), NULL))
+		if (!unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL))
 			goto fail;
 		if (!unpack_u32(e, &(tmpcap.cap[1]), NULL))
 			goto fail;
@@ -953,9 +958,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	info = "failed to unpack extended profile capabilities";
 	if (unpack_nameX(e, AA_STRUCT, "capsx")) {
 		/* optional extended caps mediation mask */
-		if (!unpack_u32(e, &(profile->caps.extended.cap[0]), NULL))
+		if (!unpack_u32(e, &(rules->caps.extended.cap[0]), NULL))
 			goto fail;
-		if (!unpack_u32(e, &(profile->caps.extended.cap[1]), NULL))
+		if (!unpack_u32(e, &(rules->caps.extended.cap[1]), NULL))
 			goto fail;
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
@@ -979,40 +984,41 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (unpack_nameX(e, AA_STRUCT, "policydb")) {
 		/* generic policy dfa - optional and may be NULL */
 		info = "failed to unpack policydb";
-		error = unpack_pdb(e, &profile->policy, true, false, &info);
+		error = unpack_pdb(e, &rules->policy, true, false,
+				   &info);
 		if (error)
 			goto fail;
 		/* Fixup: drop when we get rid of start array */
-		if (aa_dfa_next(profile->policy.dfa, profile->policy.start[0],
+		if (aa_dfa_next(rules->policy.dfa, rules->policy.start[0],
 				AA_CLASS_FILE))
-			profile->policy.start[AA_CLASS_FILE] =
-			  aa_dfa_next(profile->policy.dfa,
-				      profile->policy.start[0],
+			rules->policy.start[AA_CLASS_FILE] =
+			  aa_dfa_next(rules->policy.dfa,
+				      rules->policy.start[0],
 				      AA_CLASS_FILE);
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
-		if (aa_compat_map_policy(&profile->policy, e->version)) {
+		if (aa_compat_map_policy(&rules->policy, e->version)) {
 			info = "failed to remap policydb permission table";
 			goto fail;
 		}
 	} else
-		profile->policy.dfa = aa_get_dfa(nulldfa);
+		rules->policy.dfa = aa_get_dfa(nulldfa);
 
 	/* get file rules */
-	error = unpack_pdb(e, &profile->file, false, true, &info);
+	error = unpack_pdb(e, &rules->file, false, true, &info);
 	if (error) {
 		goto fail;
-	} else if (profile->file.dfa) {
-		if (aa_compat_map_file(&profile->file)) {
+	} else if (rules->file.dfa) {
+		if (aa_compat_map_file(&rules->file)) {
 			info = "failed to remap file permission table";
 			goto fail;
 		}
-	} else if (profile->policy.dfa &&
-		   profile->policy.start[AA_CLASS_FILE]) {
-		profile->file.dfa = aa_get_dfa(profile->policy.dfa);
-		profile->file.start[AA_CLASS_FILE] = profile->policy.start[AA_CLASS_FILE];
+	} else if (rules->policy.dfa &&
+		   rules->policy.start[AA_CLASS_FILE]) {
+		rules->file.dfa = aa_get_dfa(rules->policy.dfa);
+		rules->file.start[AA_CLASS_FILE] = rules->policy.start[AA_CLASS_FILE];
 	} else
-		profile->file.dfa = aa_get_dfa(nulldfa);
+		rules->file.dfa = aa_get_dfa(nulldfa);
 
 	if (unpack_nameX(e, AA_STRUCT, "data")) {
 		info = "out of memory";
@@ -1202,28 +1208,28 @@ static bool verify_perms(struct aa_policydb *pdb)
  */
 static int verify_profile(struct aa_profile *profile)
 {
-	if ((profile->file.dfa &&
-	     !verify_dfa_xindex(profile->file.dfa,
-				profile->file.trans.size)) ||
-	    (profile->policy.dfa &&
-	     !verify_dfa_xindex(profile->policy.dfa,
-				profile->policy.trans.size))) {
+	if ((profile->rules.file.dfa &&
+	     !verify_dfa_xindex(profile->rules.file.dfa,
+				profile->rules.file.trans.size)) ||
+	    (profile->rules.policy.dfa &&
+	     !verify_dfa_xindex(profile->rules.policy.dfa,
+				profile->rules.policy.trans.size))) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid named transition", NULL, -EPROTO);
 		return -EPROTO;
 	}
 
-	if (!verify_perms(&profile->file)) {
+	if (!verify_perms(&profile->rules.file)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
 	}
-	if (!verify_perms(&profile->policy)) {
+	if (!verify_perms(&profile->rules.policy)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
 	}
-	if (!verify_perms(&profile->xmatch)) {
+	if (!verify_perms(&profile->attach.xmatch)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index cc018469e22d7..f28026804d13d 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -82,10 +82,11 @@ int aa_map_resource(int resource)
 static int profile_setrlimit(struct aa_profile *profile, unsigned int resource,
 			     struct rlimit *new_rlim)
 {
+	struct aa_ruleset *rules = &profile->rules;
 	int e = 0;
 
-	if (profile->rlimits.mask & (1 << resource) && new_rlim->rlim_max >
-	    profile->rlimits.limits[resource].rlim_max)
+	if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max >
+	    rules->rlimits.limits[resource].rlim_max)
 		e = -EACCES;
 	return audit_resource(profile, resource, new_rlim->rlim_max, NULL, NULL,
 			      e);
@@ -153,12 +154,12 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
 	 * to the lesser of the tasks hard limit and the init tasks soft limit
 	 */
 	label_for_each_confined(i, old_l, old) {
-		if (old->rlimits.mask) {
+		if (old->rules.rlimits.mask) {
 			int j;
 
 			for (j = 0, mask = 1; j < RLIM_NLIMITS; j++,
 				     mask <<= 1) {
-				if (old->rlimits.mask & mask) {
+				if (old->rules.rlimits.mask & mask) {
 					rlim = current->signal->rlim + j;
 					initrlim = init_task.signal->rlim + j;
 					rlim->rlim_cur = min(rlim->rlim_max,
@@ -172,15 +173,15 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
 	label_for_each_confined(i, new_l, new) {
 		int j;
 
-		if (!new->rlimits.mask)
+		if (!new->rules.rlimits.mask)
 			continue;
 		for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) {
-			if (!(new->rlimits.mask & mask))
+			if (!(new->rules.rlimits.mask & mask))
 				continue;
 
 			rlim = current->signal->rlim + j;
 			rlim->rlim_max = min(rlim->rlim_max,
-					     new->rlimits.limits[j].rlim_max);
+					     new->rules.rlimits.limits[j].rlim_max);
 			/* soft limit should not exceed hard limit */
 			rlim->rlim_cur = min(rlim->rlim_cur, rlim->rlim_max);
 		}
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index b19900f85c148..7e64fba42ca3d 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -223,7 +223,7 @@ static void audit_ptrace_cb(struct audit_buffer *ab, void *va)
 			FLAGS_NONE, GFP_ATOMIC);
 }
 
-/* assumes check for PROFILE_MEDIATES is already done */
+/* assumes check for RULE_MEDIATES is already done */
 /* TODO: conditionals */
 static int profile_ptrace_perm(struct aa_profile *profile,
 			     struct aa_label *peer, u32 request,
@@ -232,8 +232,8 @@ static int profile_ptrace_perm(struct aa_profile *profile,
 	struct aa_perms perms = { };
 
 	aad(sa)->peer = peer;
-	aa_profile_match_label(profile, peer, AA_CLASS_PTRACE, request,
-			       &perms);
+	aa_profile_match_label(profile, &profile->rules, peer,
+			       AA_CLASS_PTRACE, request, &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
 }
@@ -243,7 +243,7 @@ static int profile_tracee_perm(struct aa_profile *tracee,
 			       struct common_audit_data *sa)
 {
 	if (profile_unconfined(tracee) || unconfined(tracer) ||
-	    !PROFILE_MEDIATES(tracee, AA_CLASS_PTRACE))
+	    !RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE))
 		return 0;
 
 	return profile_ptrace_perm(tracee, tracer, request, sa);
@@ -256,7 +256,7 @@ static int profile_tracer_perm(struct aa_profile *tracer,
 	if (profile_unconfined(tracer))
 		return 0;
 
-	if (PROFILE_MEDIATES(tracer, AA_CLASS_PTRACE))
+	if (RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE))
 		return profile_ptrace_perm(tracer, tracee, request, sa);
 
 	/* profile uses the old style capability check for ptrace */
-- 
GitLab


From 1ad22fcc4d0d2fb2e0f35aed555a86d016d5e590 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 5 Sep 2022 20:47:36 -0700
Subject: [PATCH 0038/4122] apparmor: rework profile->rules to be a list

Convert profile->rules to a list as the next step towards supporting
multiple rulesets in a profile. For this step only support a single
list entry item. The logic for iterating the list will come as a
separate step.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c     |  3 ++-
 security/apparmor/capability.c     |  6 +++--
 security/apparmor/domain.c         | 24 ++++++++++++-------
 security/apparmor/file.c           |  6 +++--
 security/apparmor/include/policy.h | 17 +++++++++++++-
 security/apparmor/ipc.c            |  5 ++--
 security/apparmor/lib.c            |  6 +++--
 security/apparmor/lsm.c            |  7 ++++--
 security/apparmor/mount.c          | 13 +++++++----
 security/apparmor/net.c            |  6 +++--
 security/apparmor/policy.c         | 37 +++++++++++++++++++++++++++---
 security/apparmor/policy_ns.c      |  6 +++--
 security/apparmor/policy_unpack.c  | 34 ++++++++++++++-------------
 security/apparmor/resource.c       | 19 ++++++++++-----
 security/apparmor/task.c           | 10 ++++----
 15 files changed, 142 insertions(+), 57 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 84ef8b400b401..f6d83ffde3c42 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -611,7 +611,8 @@ static const struct file_operations aa_fs_ns_revision_fops = {
 static void profile_query_cb(struct aa_profile *profile, struct aa_perms *perms,
 			     const char *match_str, size_t match_len)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms tmp = { };
 	aa_state_t state = DFA_NOMATCH;
 
diff --git a/security/apparmor/capability.c b/security/apparmor/capability.c
index b66ec63e2a489..326a51838ef28 100644
--- a/security/apparmor/capability.c
+++ b/security/apparmor/capability.c
@@ -64,7 +64,8 @@ static void audit_cb(struct audit_buffer *ab, void *va)
 static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
 		      int cap, int error)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct audit_cache *ent;
 	int type = AUDIT_APPARMOR_AUTO;
 
@@ -115,7 +116,8 @@ static int audit_caps(struct common_audit_data *sa, struct aa_profile *profile,
 static int profile_capable(struct aa_profile *profile, int cap,
 			   unsigned int opts, struct common_audit_data *sa)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	int error;
 
 	if (cap_raised(rules->caps.allow, cap) &&
diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index ad035d14cfc57..d4b09f061aee2 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -81,7 +81,8 @@ static inline aa_state_t match_component(struct aa_profile *profile,
 					 struct aa_profile *tp,
 					 bool stack, aa_state_t state)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	const char *ns_name;
 
 	if (stack)
@@ -118,7 +119,8 @@ static int label_compound_match(struct aa_profile *profile,
 				aa_state_t state, bool subns, u32 request,
 				struct aa_perms *perms)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_profile *tp;
 	struct label_it i;
 	struct path_cond cond = { };
@@ -179,7 +181,8 @@ static int label_components_match(struct aa_profile *profile,
 				  aa_state_t start, bool subns, u32 request,
 				  struct aa_perms *perms)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_profile *tp;
 	struct label_it i;
 	struct aa_perms tmp;
@@ -503,7 +506,8 @@ static const char *next_name(int xtype, const char *name)
 struct aa_label *x_table_lookup(struct aa_profile *profile, u32 xindex,
 				const char **name)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_label *label = NULL;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
 	int index = xindex & AA_X_INDEX_MASK;
@@ -553,7 +557,8 @@ static struct aa_label *x_to_label(struct aa_profile *profile,
 				   const char **lookupname,
 				   const char **info)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_label *new = NULL;
 	struct aa_ns *ns = profile->ns;
 	u32 xtype = xindex & AA_X_TYPE_MASK;
@@ -620,7 +625,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 					   char *buffer, struct path_cond *cond,
 					   bool *secure_exec)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_label *new = NULL;
 	const char *info = NULL, *name = NULL, *target = NULL;
 	aa_state_t state = rules->file.start[AA_CLASS_FILE];
@@ -719,7 +725,8 @@ static int profile_onexec(struct aa_profile *profile, struct aa_label *onexec,
 			  char *buffer, struct path_cond *cond,
 			  bool *secure_exec)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	aa_state_t state = rules->file.start[AA_CLASS_FILE];
 	struct aa_perms perms = {};
 	const char *xname = NULL, *info = "change_profile onexec";
@@ -1259,7 +1266,8 @@ static int change_profile_perms_wrapper(const char *op, const char *name,
 					struct aa_label *target, bool stack,
 					u32 request, struct aa_perms *perms)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	const char *info = NULL;
 	int error = 0;
 
diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index ef5d98f81a2b8..d7f27848e7cc9 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -224,7 +224,8 @@ int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name,
 		   u32 request, struct path_cond *cond, int flags,
 		   struct aa_perms *perms)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	int e = 0;
 
 	if (profile_unconfined(profile))
@@ -317,7 +318,8 @@ static int profile_path_link(struct aa_profile *profile,
 			     const struct path *target, char *buffer2,
 			     struct path_cond *cond)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	const char *lname, *tname = NULL;
 	struct aa_perms lperms = {}, perms;
 	const char *info = NULL;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 9ee2c05e28951..5cadfb20df294 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -124,6 +124,7 @@ struct aa_data {
 };
 
 /* struct aa_ruleset - data covering mediation rules
+ * @list: list the rule is on
  * @size: the memory consumed by this ruleset
  * @policy: general match rules governing policy
  * @file: The set of rules governing basic file access and domain transitions
@@ -133,6 +134,8 @@ struct aa_data {
  * @secmark: secmark label match info
  */
 struct aa_ruleset {
+	struct list_head list;
+
 	int size;
 
 	/* TODO: merge policy and file */
@@ -147,6 +150,7 @@ struct aa_ruleset {
 };
 
 /* struct aa_attachment - data and rules for a profiles attachment
+ * @list:
  * @xmatch_str: human readable attachment string
  * @xmatch: optional extended matching for unconfined executables names
  * @xmatch_len: xmatch prefix len, used to determine xmatch priority
@@ -204,7 +208,7 @@ struct aa_profile {
 	const char *disconnected;
 
 	struct aa_attachment attach;
-	struct aa_ruleset rules;
+	struct list_head rules;
 
 	struct aa_loaddata *rawdata;
 	unsigned char *hash;
@@ -227,6 +231,7 @@ void aa_add_profile(struct aa_policy *common, struct aa_profile *profile);
 
 
 void aa_free_proxy_kref(struct kref *kref);
+struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp);
 struct aa_profile *aa_alloc_profile(const char *name, struct aa_proxy *proxy,
 				    gfp_t gfp);
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
@@ -285,6 +290,16 @@ static inline aa_state_t RULE_MEDIATES_AF(struct aa_ruleset *rules, u16 AF)
 	return aa_dfa_match_len(rules->policy.dfa, state, (char *) &be_af, 2);
 }
 
+static inline aa_state_t ANY_RULE_MEDIATES(struct list_head *head,
+					   unsigned char class)
+{
+	struct aa_ruleset *rule;
+
+	/* TODO: change to list walk */
+	rule = list_first_entry(head, typeof(*rule), list);
+	return RULE_MEDIATES(rule, class);
+}
+
 /**
  * aa_get_profile - increment refcount on profile @p
  * @p: profile  (MAYBE NULL)
diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index dc2fa548312db..1d4099385bdf8 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -78,12 +78,13 @@ static int profile_signal_perm(struct aa_profile *profile,
 			       struct aa_label *peer, u32 request,
 			       struct common_audit_data *sa)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms perms;
 	aa_state_t state;
 
 	if (profile_unconfined(profile) ||
-	    !RULE_MEDIATES(rules, AA_CLASS_SIGNAL))
+	    !ANY_RULE_MEDIATES(&profile->rules, AA_CLASS_SIGNAL))
 		return 0;
 
 	aad(sa)->peer = peer;
diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c
index ec73e51ca7e3b..a630c951bb3b8 100644
--- a/security/apparmor/lib.c
+++ b/security/apparmor/lib.c
@@ -351,14 +351,16 @@ int aa_profile_label_perm(struct aa_profile *profile, struct aa_profile *target,
 			  u32 request, int type, u32 *deny,
 			  struct common_audit_data *sa)
 {
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms perms;
 
 	aad(sa)->label = &profile->label;
 	aad(sa)->peer = &target->label;
 	aad(sa)->request = request;
 
-	aa_profile_match_label(profile, &profile->rules, &target->label, type,
-			       request, &perms);
+	aa_profile_match_label(profile, rules, &target->label, type, request,
+			       &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	*deny |= request & perms.deny;
 	return aa_check_perms(profile, &perms, request, sa, aa_audit_perms_cb);
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 62f2ca32b9596..a22e53e44123b 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -163,12 +163,15 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective,
 		struct label_it i;
 
 		label_for_each_confined(i, label, profile) {
+			struct aa_ruleset *rules;
 			if (COMPLAIN_MODE(profile))
 				continue;
+			rules = list_first_entry(&profile->rules,
+						 typeof(*rules), list);
 			*effective = cap_intersect(*effective,
-						   profile->rules.caps.allow);
+						   rules->caps.allow);
 			*permitted = cap_intersect(*permitted,
-						   profile->rules.caps.allow);
+						   rules->caps.allow);
 		}
 	}
 	rcu_read_unlock();
diff --git a/security/apparmor/mount.c b/security/apparmor/mount.c
index d4724bdcb07f1..cdfa430ae2161 100644
--- a/security/apparmor/mount.c
+++ b/security/apparmor/mount.c
@@ -303,7 +303,8 @@ static int match_mnt_path_str(struct aa_profile *profile,
 {
 	struct aa_perms perms = { };
 	const char *mntpnt = NULL, *info = NULL;
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	int pos, error;
 
 	AA_BUG(!profile);
@@ -359,12 +360,14 @@ static int match_mnt(struct aa_profile *profile, const struct path *path,
 		     bool binary)
 {
 	const char *devname = NULL, *info = NULL;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	int error = -EACCES;
 
 	AA_BUG(!profile);
 	AA_BUG(devpath && !devbuffer);
 
-	if (!RULE_MEDIATES(&profile->rules, AA_CLASS_MOUNT))
+	if (!RULE_MEDIATES(rules, AA_CLASS_MOUNT))
 		return 0;
 
 	if (devpath) {
@@ -566,7 +569,8 @@ out:
 static int profile_umount(struct aa_profile *profile, const struct path *path,
 			  char *buffer)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms perms = { };
 	const char *name = NULL, *info = NULL;
 	aa_state_t state;
@@ -626,7 +630,8 @@ static struct aa_label *build_pivotroot(struct aa_profile *profile,
 					const struct path *old_path,
 					char *old_buffer)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	const char *old_name, *new_name = NULL, *info = NULL;
 	const char *trans_name = NULL;
 	struct aa_perms perms = { };
diff --git a/security/apparmor/net.c b/security/apparmor/net.c
index ae789ee834ada..788be1609a865 100644
--- a/security/apparmor/net.c
+++ b/security/apparmor/net.c
@@ -108,7 +108,8 @@ void audit_net_cb(struct audit_buffer *ab, void *va)
 int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa,
 		       u32 request, u16 family, int type)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms perms = { };
 	aa_state_t state;
 	__be16 buffer[2];
@@ -217,7 +218,8 @@ static int aa_secmark_perm(struct aa_profile *profile, u32 request, u32 secid,
 {
 	int i, ret;
 	struct aa_perms perms = { };
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 
 	if (rules->secmark_count == 0)
 		return 0;
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 74c0a3b34e9ba..6f4cc8bfe03de 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -217,6 +217,17 @@ static void free_ruleset(struct aa_ruleset *rules)
 	kfree_sensitive(rules->secmark);
 }
 
+struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp)
+{
+	struct aa_ruleset *rules;
+
+	rules = kzalloc(sizeof(*rules), gfp);
+	if (rules)
+		INIT_LIST_HEAD(&rules->list);
+
+	return rules;
+}
+
 /**
  * aa_free_profile - free a profile
  * @profile: the profile to free  (MAYBE NULL)
@@ -229,6 +240,7 @@ static void free_ruleset(struct aa_ruleset *rules)
  */
 void aa_free_profile(struct aa_profile *profile)
 {
+	struct aa_ruleset *rule, *tmp;
 	struct rhashtable *rht;
 
 	AA_DEBUG("%s(%p)\n", __func__, profile);
@@ -244,7 +256,15 @@ void aa_free_profile(struct aa_profile *profile)
 	kfree_sensitive(profile->rename);
 
 	free_attachment(&profile->attach);
-	free_ruleset(&profile->rules);
+
+	/*
+	 * at this point there are no tasks that can have a reference
+	 * to rules
+	 */
+	list_for_each_entry_safe(rule, tmp, &profile->rules, list) {
+		list_del_init(&rule->list);
+		free_ruleset(rule);
+	}
 	kfree_sensitive(profile->dirname);
 
 	if (profile->data) {
@@ -272,6 +292,7 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
 				    gfp_t gfp)
 {
 	struct aa_profile *profile;
+	struct aa_ruleset *rules;
 
 	/* freed by free_profile - usually through aa_put_profile */
 	profile = kzalloc(struct_size(profile, label.vec, 2), gfp);
@@ -283,6 +304,14 @@ struct aa_profile *aa_alloc_profile(const char *hname, struct aa_proxy *proxy,
 	if (!aa_label_init(&profile->label, 1, gfp))
 		goto fail;
 
+	INIT_LIST_HEAD(&profile->rules);
+
+	/* allocate the first ruleset, but leave it empty */
+	rules = aa_alloc_ruleset(gfp);
+	if (!rules)
+		goto fail;
+	list_add(&rules->list, &profile->rules);
+
 	/* update being set needed by fs interface */
 	if (!proxy) {
 		proxy = aa_alloc_proxy(&profile->label, gfp);
@@ -516,6 +545,7 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
 				       const char *base, gfp_t gfp)
 {
+	struct aa_ruleset *rules;
 	struct aa_profile *p, *profile;
 	const char *bname;
 	char *name = NULL;
@@ -558,8 +588,9 @@ name:
 	/* released on free_profile */
 	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
 	profile->ns = aa_get_ns(parent->ns);
-	profile->rules.file.dfa = aa_get_dfa(nulldfa);
-	profile->rules.policy.dfa = aa_get_dfa(nulldfa);
+	rules = list_first_entry(&profile->rules, typeof(*rules), list);
+	rules->file.dfa = aa_get_dfa(nulldfa);
+	rules->policy.dfa = aa_get_dfa(nulldfa);
 
 	mutex_lock_nested(&profile->ns->lock, profile->ns->level);
 	p = __find_child(&parent->base.profiles, bname);
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index cb10994cd3b6b..121aa79bccaac 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -83,6 +83,7 @@ const char *aa_ns_name(struct aa_ns *curr, struct aa_ns *view, bool subns)
 static struct aa_profile *alloc_unconfined(const char *name)
 {
 	struct aa_profile *profile;
+	struct aa_ruleset *rules;
 
 	profile = aa_alloc_profile(name, NULL, GFP_KERNEL);
 	if (!profile)
@@ -91,8 +92,9 @@ static struct aa_profile *alloc_unconfined(const char *name)
 	profile->label.flags |= FLAG_IX_ON_NAME_ERROR |
 		FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
 	profile->mode = APPARMOR_UNCONFINED;
-	profile->rules.file.dfa = aa_get_dfa(nulldfa);
-	profile->rules.policy.dfa = aa_get_dfa(nulldfa);
+	rules = list_first_entry(&profile->rules, typeof(*rules), list);
+	rules->file.dfa = aa_get_dfa(nulldfa);
+	rules->policy.dfa = aa_get_dfa(nulldfa);
 
 	return profile;
 }
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index bbca7772dfa2b..ac9955ef5d4a7 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -577,9 +577,8 @@ fail:
 	return false;
 }
 
-static bool unpack_secmark(struct aa_ext *e, struct aa_profile *profile)
+static bool unpack_secmark(struct aa_ext *e, struct aa_ruleset *rules)
 {
-	struct aa_ruleset *rules = &profile->rules;
 	void *pos = e->pos;
 	u16 size;
 	int i;
@@ -624,7 +623,7 @@ fail:
 	return false;
 }
 
-static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
+static bool unpack_rlimits(struct aa_ext *e, struct aa_ruleset *rules)
 {
 	void *pos = e->pos;
 
@@ -635,7 +634,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 		u32 tmp = 0;
 		if (!unpack_u32(e, &tmp, NULL))
 			goto fail;
-		profile->rules.rlimits.mask = tmp;
+		rules->rlimits.mask = tmp;
 
 		if (unpack_array(e, NULL, &size) != TRI_TRUE ||
 		    size > RLIM_NLIMITS)
@@ -645,7 +644,7 @@ static bool unpack_rlimits(struct aa_ext *e, struct aa_profile *profile)
 			int a = aa_map_resource(i);
 			if (!unpack_u64(e, &tmp2, NULL))
 				goto fail;
-			profile->rules.rlimits.limits[a].rlim_max = tmp2;
+			rules->rlimits.limits[a].rlim_max = tmp2;
 		}
 		if (!unpack_nameX(e, AA_ARRAYEND, NULL))
 			goto fail;
@@ -852,7 +851,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	profile = aa_alloc_profile(name, NULL, GFP_KERNEL);
 	if (!profile)
 		return ERR_PTR(-ENOMEM);
-	rules = &profile->rules;
+	rules = list_first_entry(&profile->rules, typeof(*rules), list);
 
 	/* profile renaming is optional */
 	(void) unpack_str(e, &profile->rename, "rename");
@@ -971,12 +970,12 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		goto fail;
 	}
 
-	if (!unpack_rlimits(e, profile)) {
+	if (!unpack_rlimits(e, rules)) {
 		info = "failed to unpack profile rlimits";
 		goto fail;
 	}
 
-	if (!unpack_secmark(e, profile)) {
+	if (!unpack_secmark(e, rules)) {
 		info = "failed to unpack profile secmark rules";
 		goto fail;
 	}
@@ -1208,23 +1207,26 @@ static bool verify_perms(struct aa_policydb *pdb)
  */
 static int verify_profile(struct aa_profile *profile)
 {
-	if ((profile->rules.file.dfa &&
-	     !verify_dfa_xindex(profile->rules.file.dfa,
-				profile->rules.file.trans.size)) ||
-	    (profile->rules.policy.dfa &&
-	     !verify_dfa_xindex(profile->rules.policy.dfa,
-				profile->rules.policy.trans.size))) {
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
+	if (!rules)
+		return 0;
+
+	if ((rules->file.dfa && !verify_dfa_xindex(rules->file.dfa,
+						  rules->file.trans.size)) ||
+	    (rules->policy.dfa &&
+	     !verify_dfa_xindex(rules->policy.dfa, rules->policy.trans.size))) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid named transition", NULL, -EPROTO);
 		return -EPROTO;
 	}
 
-	if (!verify_perms(&profile->rules.file)) {
+	if (!verify_perms(&rules->file)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
 	}
-	if (!verify_perms(&profile->rules.policy)) {
+	if (!verify_perms(&rules->policy)) {
 		audit_iface(profile, NULL, NULL,
 			    "Unpack: Invalid perm index", NULL, -EPROTO);
 		return -EPROTO;
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index f28026804d13d..ed543f4edfd9f 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -82,7 +82,8 @@ int aa_map_resource(int resource)
 static int profile_setrlimit(struct aa_profile *profile, unsigned int resource,
 			     struct rlimit *new_rlim)
 {
-	struct aa_ruleset *rules = &profile->rules;
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	int e = 0;
 
 	if (rules->rlimits.mask & (1 << resource) && new_rlim->rlim_max >
@@ -154,12 +155,15 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
 	 * to the lesser of the tasks hard limit and the init tasks soft limit
 	 */
 	label_for_each_confined(i, old_l, old) {
-		if (old->rules.rlimits.mask) {
+		struct aa_ruleset *rules = list_first_entry(&old->rules,
+							    typeof(*rules),
+							    list);
+		if (rules->rlimits.mask) {
 			int j;
 
 			for (j = 0, mask = 1; j < RLIM_NLIMITS; j++,
 				     mask <<= 1) {
-				if (old->rules.rlimits.mask & mask) {
+				if (rules->rlimits.mask & mask) {
 					rlim = current->signal->rlim + j;
 					initrlim = init_task.signal->rlim + j;
 					rlim->rlim_cur = min(rlim->rlim_max,
@@ -171,17 +175,20 @@ void __aa_transition_rlimits(struct aa_label *old_l, struct aa_label *new_l)
 
 	/* set any new hard limits as dictated by the new profile */
 	label_for_each_confined(i, new_l, new) {
+		struct aa_ruleset *rules = list_first_entry(&new->rules,
+							    typeof(*rules),
+							    list);
 		int j;
 
-		if (!new->rules.rlimits.mask)
+		if (!rules->rlimits.mask)
 			continue;
 		for (j = 0, mask = 1; j < RLIM_NLIMITS; j++, mask <<= 1) {
-			if (!(new->rules.rlimits.mask & mask))
+			if (!(rules->rlimits.mask & mask))
 				continue;
 
 			rlim = current->signal->rlim + j;
 			rlim->rlim_max = min(rlim->rlim_max,
-					     new->rules.rlimits.limits[j].rlim_max);
+					     rules->rlimits.limits[j].rlim_max);
 			/* soft limit should not exceed hard limit */
 			rlim->rlim_cur = min(rlim->rlim_cur, rlim->rlim_max);
 		}
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index 7e64fba42ca3d..5000cbd055b62 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -229,11 +229,13 @@ static int profile_ptrace_perm(struct aa_profile *profile,
 			     struct aa_label *peer, u32 request,
 			     struct common_audit_data *sa)
 {
+	struct aa_ruleset *rules = list_first_entry(&profile->rules,
+						    typeof(*rules), list);
 	struct aa_perms perms = { };
 
 	aad(sa)->peer = peer;
-	aa_profile_match_label(profile, &profile->rules, peer,
-			       AA_CLASS_PTRACE, request, &perms);
+	aa_profile_match_label(profile, rules, peer, AA_CLASS_PTRACE, request,
+			       &perms);
 	aa_apply_modes_to_perms(profile, &perms);
 	return aa_check_perms(profile, &perms, request, sa, audit_ptrace_cb);
 }
@@ -243,7 +245,7 @@ static int profile_tracee_perm(struct aa_profile *tracee,
 			       struct common_audit_data *sa)
 {
 	if (profile_unconfined(tracee) || unconfined(tracer) ||
-	    !RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE))
+	    !ANY_RULE_MEDIATES(&tracee->rules, AA_CLASS_PTRACE))
 		return 0;
 
 	return profile_ptrace_perm(tracee, tracer, request, sa);
@@ -256,7 +258,7 @@ static int profile_tracer_perm(struct aa_profile *tracer,
 	if (profile_unconfined(tracer))
 		return 0;
 
-	if (RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE))
+	if (ANY_RULE_MEDIATES(&tracer->rules, AA_CLASS_PTRACE))
 		return profile_ptrace_perm(tracer, tracee, request, sa);
 
 	/* profile uses the old style capability check for ptrace */
-- 
GitLab


From 961f3e3de14467f3babe252f7b6cc44a36ebba64 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 11 Sep 2022 22:05:26 -0700
Subject: [PATCH 0039/4122] apparmor: fix aa_class_names[] to match reserved
 classes

The class name map did not have the reserved names added. Fix this

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/audit.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index e638f7bc9f528..8dfdda98fbf13 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -48,13 +48,28 @@ static const char *const aa_class_names[] = {
 	"unknown",
 	"ptrace",
 	"signal",
-	"unknown",
+	"xmatch",
 	"unknown",
 	"unknown",
 	"net",
 	"unknown",
 	"label",
+	"posix_mqueue",
+	"io_uring",
+	"module",
 	"lsm",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"unknown",
+	"X",
+	"dbus",
 };
 
 
-- 
GitLab


From 1f939c6bd1512d0b39b470396740added3cb403f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 20 Sep 2022 04:01:28 -0700
Subject: [PATCH 0040/4122] apparmor: Fix regression in stacking due to label
 flags

The unconfined label flag is not being computed correctly. It
should only be set if all the profiles in the vector are set, which
is different than what is required for the debug and stale flag
that are set if any on the profile flags are set.

Fixes: c1ed5da19765 ("apparmor: allow label to carry debug flags")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/label.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index 98dadd9609772..aa4031628af55 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -197,15 +197,18 @@ static bool vec_is_stale(struct aa_profile **vec, int n)
 	return false;
 }
 
-static long union_vec_flags(struct aa_profile **vec, int n, long mask)
+static long accum_vec_flags(struct aa_profile **vec, int n)
 {
-	long u = 0;
+	long u = FLAG_UNCONFINED;
 	int i;
 
 	AA_BUG(!vec);
 
 	for (i = 0; i < n; i++) {
-		u |= vec[i]->label.flags & mask;
+		u |= vec[i]->label.flags & (FLAG_DEBUG1 | FLAG_DEBUG2 |
+					    FLAG_STALE);
+		if (!(u & vec[i]->label.flags & FLAG_UNCONFINED))
+			u &= ~FLAG_UNCONFINED;
 	}
 
 	return u;
@@ -1097,8 +1100,7 @@ static struct aa_label *label_merge_insert(struct aa_label *new,
 		else if (k == b->size)
 			return aa_get_label(b);
 	}
-	new->flags |= union_vec_flags(new->vec, new->size, FLAG_UNCONFINED |
-					      FLAG_DEBUG1 | FLAG_DEBUG2);
+	new->flags |= accum_vec_flags(new->vec, new->size);
 	ls = labels_set(new);
 	write_lock_irqsave(&ls->lock, flags);
 	label = __label_insert(labels_set(new), new, false);
-- 
GitLab


From adaa9a3f72e6f98538bfac54f6dc4afc0537f410 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Fri, 23 Sep 2022 17:21:18 +0800
Subject: [PATCH 0041/4122] apparmor: Simplify obtain the newest label on a
 cred

In aa_get_task_label(), aa_get_newest_cred_label(__task_cred(task))
can do the same things as aa_get_newest_label(__aa_task_raw_label(task)),
so we can replace it and remove __aa_task_raw_label() to simplify the code.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/include/cred.h | 13 -------------
 security/apparmor/task.c         |  2 +-
 2 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/security/apparmor/include/cred.h b/security/apparmor/include/cred.h
index 0b9ae4804ef73..58fdc72af6645 100644
--- a/security/apparmor/include/cred.h
+++ b/security/apparmor/include/cred.h
@@ -63,19 +63,6 @@ static inline struct aa_label *aa_get_newest_cred_label(const struct cred *cred)
 	return aa_get_newest_label(aa_cred_raw_label(cred));
 }
 
-/**
- * __aa_task_raw_label - retrieve another task's label
- * @task: task to query  (NOT NULL)
- *
- * Returns: @task's label without incrementing its ref count
- *
- * If @task != current needs to be called in RCU safe critical section
- */
-static inline struct aa_label *__aa_task_raw_label(struct task_struct *task)
-{
-	return aa_cred_raw_label(__task_cred(task));
-}
-
 /**
  * aa_current_raw_label - find the current tasks confining label
  *
diff --git a/security/apparmor/task.c b/security/apparmor/task.c
index 5000cbd055b62..84d16a29bfcbc 100644
--- a/security/apparmor/task.c
+++ b/security/apparmor/task.c
@@ -31,7 +31,7 @@ struct aa_label *aa_get_task_label(struct task_struct *task)
 	struct aa_label *p;
 
 	rcu_read_lock();
-	p = aa_get_newest_label(__aa_task_raw_label(task));
+	p = aa_get_newest_cred_label(__task_cred(task));
 	rcu_read_unlock();
 
 	return p;
-- 
GitLab


From 65f7f666f21ce374628d58b3cc48515070f31e72 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Wed, 14 Sep 2022 15:46:07 +0800
Subject: [PATCH 0042/4122] apparmor: make __aa_path_perm() static

Make __aa_path_perm() static as it's only used inside apparmor/file.c.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c         | 7 ++++---
 security/apparmor/include/file.h | 3 ---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index d7f27848e7cc9..e7dc5ea389973 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -220,9 +220,10 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
 	return state;
 }
 
-int __aa_path_perm(const char *op, struct aa_profile *profile, const char *name,
-		   u32 request, struct path_cond *cond, int flags,
-		   struct aa_perms *perms)
+static int __aa_path_perm(const char *op, struct aa_profile *profile,
+			  const char *name, u32 request,
+			  struct path_cond *cond, int flags,
+			  struct aa_perms *perms)
 {
 	struct aa_ruleset *rules = list_first_entry(&profile->rules,
 						    typeof(*rules), list);
diff --git a/security/apparmor/include/file.h b/security/apparmor/include/file.h
index 1a1c0f0c50710..5be620af33ba0 100644
--- a/security/apparmor/include/file.h
+++ b/security/apparmor/include/file.h
@@ -119,9 +119,6 @@ aa_state_t aa_str_perms(struct aa_policydb *file_rules, aa_state_t start,
 			const char *name, struct path_cond *cond,
 			struct aa_perms *perms);
 
-int __aa_path_perm(const char *op, struct aa_profile *profile,
-		   const char *name, u32 request, struct path_cond *cond,
-		   int flags, struct aa_perms *perms);
 int aa_path_perm(const char *op, struct aa_label *label,
 		 const struct path *path, int flags, u32 request,
 		 struct path_cond *cond);
-- 
GitLab


From 1ddece8cd0f43582085497eacff2e3cd37f93d1f Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 24 Sep 2022 22:25:25 -0700
Subject: [PATCH 0043/4122] apparmor: Fix doc comment for compute_fperms

When compute_fperms was moved to policy_compat and made static it
was renamed from aa_compute_fperms to just compute_fperms to help
indicate it is only available statically. Unfortunately the doc
comment did not also get updated to reflect the change.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_compat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/policy_compat.c b/security/apparmor/policy_compat.c
index 1aa5cced935ee..9e52e218bf308 100644
--- a/security/apparmor/policy_compat.c
+++ b/security/apparmor/policy_compat.c
@@ -140,8 +140,8 @@ static struct aa_perms compute_fperms_other(struct aa_dfa *dfa,
 }
 
 /**
- * aa_compute_fperms - convert dfa compressed perms to internal perms and store
- *		       them so they can be retrieved later.
+ * compute_fperms - convert dfa compressed perms to internal perms and store
+ *		    them so they can be retrieved later.
  * @dfa: a dfa using fperms to remap to internal permissions
  *
  * Returns: remapped perm table
-- 
GitLab


From 73c7e91c8bc98a5da94be62a9a4ba2793f86a97b Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sat, 24 Sep 2022 22:34:07 -0700
Subject: [PATCH 0044/4122] apparmor: Remove unnecessary size check when
 unpacking trans_table

The index into the trans_table has a max size of 2^24 bits which the
code was testing but this is unnecessary as unpack_array can only
unpack a table of 2^16 bits in size so the table unpacked will never
be larger than what can be indexed, and any test here is redundant.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index ac9955ef5d4a7..6deaeecb76feb 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -484,9 +484,13 @@ static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 		u16 size;
 		int i;
 
-		if (unpack_array(e, NULL, &size) != TRI_TRUE ||
-		    size > (1 << 24))
-		  /* currently 2^24 bits entries 0-3 */
+		if (unpack_array(e, NULL, &size) != TRI_TRUE)
+			/*
+			 * Note: index into trans table array is a max
+			 * of 2^24, but unpack array can only unpack
+			 * an array of 2^16 in size atm so no need
+			 * for size check here
+			 */
 			goto fail;
 		table = kcalloc(size, sizeof(char *), GFP_KERNEL);
 		if (!table)
-- 
GitLab


From 14d37a7f14569adbf7a019710762271fa2a9e739 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Sun, 25 Sep 2022 15:36:45 -0700
Subject: [PATCH 0045/4122] apparmor: make sure the decompression ctx is
 promperly initialized

The decompress ctx was not properly initialized when reading raw
profile data back to userspace.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/apparmorfs.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index f6d83ffde3c42..ddd64b8ebf05e 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1327,7 +1327,11 @@ static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen)
 			ret = -ENOMEM;
 			goto cleanup;
 		}
-
+		ctx = zstd_init_dctx(wksp, wksp_len);
+		if (ctx == NULL) {
+			ret = -ENOMEM;
+			goto cleanup;
+		}
 		out_len = zstd_decompress_dctx(ctx, dst, dlen, src, slen);
 		if (zstd_is_error(out_len)) {
 			ret = -EINVAL;
-- 
GitLab


From 70f24a9f9084b7fffd95daa707cce8e339b189dd Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 29 Sep 2022 06:24:29 -0700
Subject: [PATCH 0046/4122] apparmor: Fix undefined references to zstd_ symbols

Unfortunately the switch to using zstd compression did not properly
ifdef all the code that uses zstd_ symbols. So that if exporting of
binary policy is disabled in the config the compile will fail with the
following errors

security/apparmor/lsm.c:1545: undefined reference to `zstd_min_clevel'
aarch64-linux-ld: security/apparmor/lsm.c:1545: undefined reference to `zstd_max_clevel'

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data")
Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Jon Tourville <jon.tourville@canonical.com>
---
 security/apparmor/apparmorfs.c       |  4 ++--
 security/apparmor/include/apparmor.h | 11 +++++++++++
 security/apparmor/lsm.c              |  5 ++---
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index ddd64b8ebf05e..2c138309ad665 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1202,13 +1202,13 @@ static int seq_ns_name_show(struct seq_file *seq, void *v)
 
 static int seq_ns_compress_min_show(struct seq_file *seq, void *v)
 {
-	seq_printf(seq, "%d\n", zstd_min_clevel());
+	seq_printf(seq, "%d\n", AA_MIN_CLEVEL);
 	return 0;
 }
 
 static int seq_ns_compress_max_show(struct seq_file *seq, void *v)
 {
-	seq_printf(seq, "%d\n", zstd_max_clevel());
+	seq_printf(seq, "%d\n", AA_MAX_CLEVEL);
 	return 0;
 }
 
diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h
index 6d9ca075fcb9c..8a81557c9d59b 100644
--- a/security/apparmor/include/apparmor.h
+++ b/security/apparmor/include/apparmor.h
@@ -51,4 +51,15 @@ extern bool aa_g_logsyscall;
 extern bool aa_g_paranoid_load;
 extern unsigned int aa_g_path_max;
 
+#ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
+#define AA_MIN_CLEVEL zstd_min_clevel()
+#define AA_MAX_CLEVEL zstd_max_clevel()
+#define AA_DEFAULT_CLEVEL ZSTD_CLEVEL_DEFAULT
+#else
+#define AA_MIN_CLEVEL 0
+#define AA_MAX_CLEVEL 0
+#define AA_DEFAULT_CLEVEL 0
+#endif /* CONFIG_SECURITY_APPARMOR_EXPORT_BINARY */
+
+
 #endif /* __APPARMOR_H */
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index a22e53e44123b..8e2b951c4988c 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1365,7 +1365,7 @@ module_param_named(export_binary, aa_g_export_binary, aabool, 0600);
 #endif
 
 /* policy loaddata compression level */
-int aa_g_rawdata_compression_level = ZSTD_CLEVEL_DEFAULT;
+int aa_g_rawdata_compression_level = AA_DEFAULT_CLEVEL;
 module_param_named(rawdata_compression_level, aa_g_rawdata_compression_level,
 		   aacompressionlevel, 0400);
 
@@ -1547,8 +1547,7 @@ static int param_set_aacompressionlevel(const char *val,
 	error = param_set_int(val, kp);
 
 	aa_g_rawdata_compression_level = clamp(aa_g_rawdata_compression_level,
-					       zstd_min_clevel(),
-					       zstd_max_clevel());
+					       AA_MIN_CLEVEL, AA_MAX_CLEVEL);
 	pr_info("AppArmor: policy rawdata compression level set to %d\n",
 		aa_g_rawdata_compression_level);
 
-- 
GitLab


From a2f31df06b7aa1769f12ec6f9ae7f18e78582cad Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Thu, 29 Sep 2022 06:48:10 -0700
Subject: [PATCH 0047/4122] apparmor: Fix decompression of rawdata for read
 back to userspace

The rawdata readback has a few of problems. First if compression is
enabled when the data is read then the compressed data is read out
instead decompressing the data. Second if compression of the data
fails, the code does not handle holding onto the raw_data in
uncompressed form. Third if the compression is enabled/disabled after
the rawdata was loaded, the check against the global control of
whether to use compression does not reflect what was already done to
the data.

Fix these by always storing the compressed size, along with the
original data size even if compression fails or is not used. And use
this to detect whether the rawdata is actually compressed.

Fixes: 52ccc20c652b ("apparmor: use zstd compression for profile data")
Signed-off-by: John Johansen <john.johansen@canonical.com>
Acked-by: Jon Tourville <jon.tourville@canonical.com>
---
 security/apparmor/apparmorfs.c    | 2 +-
 security/apparmor/policy_unpack.c | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c
index 2c138309ad665..424b2c1e586d5 100644
--- a/security/apparmor/apparmorfs.c
+++ b/security/apparmor/apparmorfs.c
@@ -1315,7 +1315,7 @@ SEQ_RAWDATA_FOPS(compressed_size);
 static int decompress_zstd(char *src, size_t slen, char *dst, size_t dlen)
 {
 #ifdef CONFIG_SECURITY_APPARMOR_EXPORT_BINARY
-	if (aa_g_rawdata_compression_level == 0) {
+	if (slen < dlen) {
 		const size_t wksp_len = zstd_dctx_workspace_bound();
 		zstd_dctx *ctx;
 		void *wksp;
diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 6deaeecb76feb..45c9dfdc8e0de 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -1294,7 +1294,7 @@ static int compress_zstd(const char *src, size_t slen, char **dst, size_t *dlen)
 	}
 
 	out_len = zstd_compress_cctx(ctx, out, out_len, src, slen, &params);
-	if (zstd_is_error(out_len)) {
+	if (zstd_is_error(out_len) || out_len >= slen) {
 		ret = -EINVAL;
 		goto cleanup;
 	}
@@ -1348,9 +1348,10 @@ static int compress_loaddata(struct aa_loaddata *data)
 		void *udata = data->data;
 		int error = compress_zstd(udata, data->size, &data->data,
 					  &data->compressed_size);
-		if (error)
+		if (error) {
+			data->compressed_size = data->size;
 			return error;
-
+		}
 		if (udata != data->data)
 			kvfree(udata);
 	} else
-- 
GitLab


From 32490541682bf8ea445e9bd29c866981851e0912 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 3 Oct 2022 01:30:38 -0700
Subject: [PATCH 0048/4122] apparmor: Fix kunit test for out of bounds array

The apparmor kunit tests are failing on the out of bounds array check
with the following failure

  # policy_unpack_test_unpack_array_out_of_bounds: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:178
  Expected unpack_array(puf->e, name, &array_size) == 1, but
  unpack_array(puf->e, name, &array_size) == -1
  # policy_unpack_test_unpack_array_out_of_bounds: EXPECTATION FAILED at security/apparmor/policy_unpack_test.c:180
  Expected array_size == 0, but
  array_size == 64192
  not ok 5 - policy_unpack_test_unpack_array_out_of_bounds

This is because unpack_array changed to allow distinguishing between
the array not being present and an error. In the error case the array
size is not set and should not be tested.

Reported-by: kernel test robot <yujie.liu@intel.com>
Fixes: 995a5b64620e ("apparmor: make unpack_array return a trianary value")
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack_test.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c
index 1a43d538c4c04..b214f6ea8a725 100644
--- a/security/apparmor/policy_unpack_test.c
+++ b/security/apparmor/policy_unpack_test.c
@@ -176,8 +176,7 @@ static void policy_unpack_test_unpack_array_out_of_bounds(struct kunit *test)
 	puf->e->end = puf->e->start + TEST_ARRAY_BUF_OFFSET + sizeof(u16);
 
 	KUNIT_EXPECT_EQ(test, unpack_array(puf->e, name, &array_size),
-			TRI_TRUE);
-	KUNIT_EXPECT_EQ(test, array_size, 0);
+			TRI_FALSE);
 	KUNIT_EXPECT_PTR_EQ(test, puf->e->pos,
 		puf->e->start + TEST_NAMED_ARRAY_BUF_OFFSET);
 }
-- 
GitLab


From 5515a8e30eaa8ae0d57ec59c908716cf2af114ae Mon Sep 17 00:00:00 2001
From: Muhammad Usama Anjum <usama.anjum@collabora.com>
Date: Tue, 4 Oct 2022 13:45:15 +0500
Subject: [PATCH 0049/4122] apparmor: store return value of
 unpack_perms_table() to signed variable

The unpack_perms_table() can return error which is negative value. Store
the return value to a signed variable. policy->size is unsigned
variable. It shouldn't be used to store the return status.

Fixes: 2d6b2dea7f3c ("apparmor: add the ability for policy to specify a permission table")
Signed-off-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 45c9dfdc8e0de..09f3169439512 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -734,14 +734,18 @@ static int unpack_pdb(struct aa_ext *e, struct aa_policydb *policy,
 {
 	void *pos = e->pos;
 	int i, flags, error = -EPROTO;
+	ssize_t size;
 
-	policy->size = unpack_perms_table(e, &policy->perms);
-	if (policy->size < 0) {
-		error = policy->size;
+	size = unpack_perms_table(e, &policy->perms);
+	if (size < 0) {
+		error = size;
 		policy->perms = NULL;
 		*info = "failed to unpack - perms";
 		goto fail;
-	} else if (policy->perms) {
+	}
+	policy->size = size;
+
+	if (policy->perms) {
 		/* perms table present accept is index */
 		flags = TO_ACCEPT1_FLAG(YYTD_DATA32);
 	} else {
-- 
GitLab


From ee21a175ecfa821b74822881d354c7f848930738 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 10 Oct 2022 11:18:50 -0700
Subject: [PATCH 0050/4122] apparmor: fix uninitialize table variable in error
 in unpack_trans_table

The error path has one case where *table is uninitialized, initialize
it.

Fixes: a0792e2ceddc ("apparmor: make transition table unpack generic so it can be reused")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 09f3169439512..3b956b1235f3a 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -477,7 +477,7 @@ static struct aa_dfa *unpack_dfa(struct aa_ext *e, int flags)
 static bool unpack_trans_table(struct aa_ext *e, struct aa_str_table *strs)
 {
 	void *saved_pos = e->pos;
-	char **table;
+	char **table = NULL;
 
 	/* exec table is optional */
 	if (unpack_nameX(e, AA_STRUCT, "xtable")) {
-- 
GitLab


From 53991aedcd34760be23f1b0ef312e39b6add84af Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 10 Oct 2022 12:15:10 -0700
Subject: [PATCH 0051/4122] apparmor: Fix unpack_profile() warn: passing zero
 to 'ERR_PTR'

unpack_profile() sets a default error on entry but this gets overridden
by error assignment by functions called in its body. If an error
check that was relying on the default value is triggered after one
of these error assignments then zero will be passed to ERR_PTR.

Fix this by setting up a default -EPROTO assignment in the error
path and while we are at it make sure the correct error is returned
in non-default cases.

Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 3b956b1235f3a..2e028d540c6b9 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -851,6 +851,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		*ns_name = kstrndup(tmpns, ns_len, GFP_KERNEL);
 		if (!*ns_name) {
 			info = "out of memory";
+			error = -ENOMEM;
 			goto fail;
 		}
 		name = tmpname;
@@ -882,7 +883,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 		}
 		profile->attach.xmatch_len = tmp;
 		profile->attach.xmatch.start[AA_CLASS_XMATCH] = DFA_START;
-		if (aa_compat_map_xmatch(&profile->attach.xmatch)) {
+		error = aa_compat_map_xmatch(&profile->attach.xmatch);
+		if (error) {
 			info = "failed to convert xmatch permission table";
 			goto fail;
 		}
@@ -1004,7 +1006,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 				      AA_CLASS_FILE);
 		if (!unpack_nameX(e, AA_STRUCTEND, NULL))
 			goto fail;
-		if (aa_compat_map_policy(&rules->policy, e->version)) {
+		error = aa_compat_map_policy(&rules->policy, e->version);
+		if (error) {
 			info = "failed to remap policydb permission table";
 			goto fail;
 		}
@@ -1016,7 +1019,8 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	if (error) {
 		goto fail;
 	} else if (rules->file.dfa) {
-		if (aa_compat_map_file(&rules->file)) {
+		error = aa_compat_map_file(&rules->file);
+		if (error) {
 			info = "failed to remap file permission table";
 			goto fail;
 		}
@@ -1027,12 +1031,14 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	} else
 		rules->file.dfa = aa_get_dfa(nulldfa);
 
+	error = -EPROTO;
 	if (unpack_nameX(e, AA_STRUCT, "data")) {
 		info = "out of memory";
 		profile->data = kzalloc(sizeof(*profile->data), GFP_KERNEL);
-		if (!profile->data)
+		if (!profile->data) {
+			error = -ENOMEM;
 			goto fail;
-
+		}
 		params.nelem_hint = 3;
 		params.key_len = sizeof(void *);
 		params.key_offset = offsetof(struct aa_data, key);
@@ -1049,6 +1055,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			data = kzalloc(sizeof(*data), GFP_KERNEL);
 			if (!data) {
 				kfree_sensitive(key);
+				error = -ENOMEM;
 				goto fail;
 			}
 
@@ -1058,6 +1065,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 			if (data->size && !data->data) {
 				kfree_sensitive(data->key);
 				kfree_sensitive(data);
+				error = -ENOMEM;
 				goto fail;
 			}
 
@@ -1079,6 +1087,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	return profile;
 
 fail:
+	if (error == 0)
+		/* default error covers most cases */
+		error = -EPROTO;
 	if (profile)
 		name = NULL;
 	else if (!name)
-- 
GitLab


From f37722ac71cc8b5ab86f4b3c4d9b9388e1315e8b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 14 Oct 2022 12:25:06 +0300
Subject: [PATCH 0052/4122] phy: stm32: fix an error code in probe

If "index > usbphyc->nphys" is true then this returns success but it
should return -EINVAL.

Fixes: 94c358da3a05 ("phy: stm32: add support for STM32 USB PHY Controller (USBPHYC)")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Amelie Delaunay <amelie.delaunay@foss.st.com>
Link: https://lore.kernel.org/r/Y0kq8j6S+5nDdMpr@kili
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/st/phy-stm32-usbphyc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/phy/st/phy-stm32-usbphyc.c b/drivers/phy/st/phy-stm32-usbphyc.c
index a98c911cc37ae..5bb9647b078f1 100644
--- a/drivers/phy/st/phy-stm32-usbphyc.c
+++ b/drivers/phy/st/phy-stm32-usbphyc.c
@@ -710,6 +710,8 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
 		ret = of_property_read_u32(child, "reg", &index);
 		if (ret || index > usbphyc->nphys) {
 			dev_err(&phy->dev, "invalid reg property: %d\n", ret);
+			if (!ret)
+				ret = -EINVAL;
 			goto put_child;
 		}
 
-- 
GitLab


From cbdbe312c9b6f9dbf698c3db1a5bec4140fe1c21 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Sat, 15 Oct 2022 13:11:22 -0700
Subject: [PATCH 0053/4122] dt-bindings: phy-j721e-wiz: add j784s4 compatible
 string

Add ti,j784s4-wiz-10g compatible string to binding documentation.

Signed-off-by: Matt Ranostay <mranostay@ti.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221015201123.195477-2-mranostay@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
index 2225925b6dad8..a9e38739c0100 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
@@ -17,6 +17,7 @@ properties:
       - ti,j721e-wiz-10g
       - ti,am64-wiz-10g
       - ti,j7200-wiz-10g
+      - ti,j784s4-wiz-10g
 
   power-domains:
     maxItems: 1
-- 
GitLab


From e27ecef8a8ccc13c54df54f5d100aa608de4c306 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Sat, 15 Oct 2022 13:11:23 -0700
Subject: [PATCH 0054/4122] phy: ti: phy-j721e-wiz: add j784s4-wiz-10g module
 support

Add support for j784s4-wiz-10g device which has two core reference
clocks (e.g core_ref_clk, core_ref1_clk) which requires an additional
mux selection option.

Acked-by: Roger Quadros <rogerq@kernel.org>
Signed-off-by: Matt Ranostay <mranostay@ti.com>
Link: https://lore.kernel.org/r/20221015201123.195477-3-mranostay@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-j721e-wiz.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
index 41725c6bcdf6f..141b51af4427a 100644
--- a/drivers/phy/ti/phy-j721e-wiz.c
+++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -81,14 +81,20 @@ static const struct reg_field phy_reset_n = REG_FIELD(WIZ_SERDES_RST, 31, 31);
 static const struct reg_field phy_en_refclk = REG_FIELD(WIZ_SERDES_RST, 30, 30);
 static const struct reg_field pll1_refclk_mux_sel =
 					REG_FIELD(WIZ_SERDES_RST, 29, 29);
+static const struct reg_field pll1_refclk_mux_sel_2 =
+					REG_FIELD(WIZ_SERDES_RST, 22, 23);
 static const struct reg_field pll0_refclk_mux_sel =
 					REG_FIELD(WIZ_SERDES_RST, 28, 28);
+static const struct reg_field pll0_refclk_mux_sel_2 =
+					REG_FIELD(WIZ_SERDES_RST, 28, 29);
 static const struct reg_field refclk_dig_sel_16g =
 					REG_FIELD(WIZ_SERDES_RST, 24, 25);
 static const struct reg_field refclk_dig_sel_10g =
 					REG_FIELD(WIZ_SERDES_RST, 24, 24);
 static const struct reg_field pma_cmn_refclk_int_mode =
 					REG_FIELD(WIZ_SERDES_TOP_CTRL, 28, 29);
+static const struct reg_field pma_cmn_refclk1_int_mode =
+					REG_FIELD(WIZ_SERDES_TOP_CTRL, 20, 21);
 static const struct reg_field pma_cmn_refclk_mode =
 					REG_FIELD(WIZ_SERDES_TOP_CTRL, 30, 31);
 static const struct reg_field pma_cmn_refclk_dig_div =
@@ -315,6 +321,7 @@ enum wiz_type {
 	J721E_WIZ_10G,	/* Also for J7200 SR1.0 */
 	AM64_WIZ_10G,
 	J7200_WIZ_10G,  /* J7200 SR2.0 */
+	J784S4_WIZ_10G,
 };
 
 struct wiz_data {
@@ -992,6 +999,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node)
 	switch (wiz->type) {
 	case AM64_WIZ_10G:
 	case J7200_WIZ_10G:
+	case J784S4_WIZ_10G:
 		of_clk_del_provider(dev->of_node);
 		return;
 	default:
@@ -1123,6 +1131,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node)
 	switch (wiz->type) {
 	case AM64_WIZ_10G:
 	case J7200_WIZ_10G:
+	case J784S4_WIZ_10G:
 		ret = wiz_clock_register(wiz);
 		if (ret)
 			dev_err(dev, "Failed to register wiz clocks\n");
@@ -1299,6 +1308,16 @@ static struct wiz_data j7200_pg2_10g_data = {
 	.clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G,
 };
 
+static struct wiz_data j784s4_10g_data = {
+	.type = J784S4_WIZ_10G,
+	.pll0_refclk_mux_sel = &pll0_refclk_mux_sel_2,
+	.pll1_refclk_mux_sel = &pll1_refclk_mux_sel_2,
+	.refclk_dig_sel = &refclk_dig_sel_16g,
+	.pma_cmn_refclk1_int_mode = &pma_cmn_refclk1_int_mode,
+	.clk_mux_sel = clk_mux_sel_10g_2_refclk,
+	.clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G,
+};
+
 static const struct of_device_id wiz_id_table[] = {
 	{
 		.compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data,
@@ -1312,6 +1331,9 @@ static const struct of_device_id wiz_id_table[] = {
 	{
 		.compatible = "ti,j7200-wiz-10g", .data = &j7200_pg2_10g_data,
 	},
+	{
+		.compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, wiz_id_table);
-- 
GitLab


From 25caed3dcadacd0443dce4fb820e4a33029bba40 Mon Sep 17 00:00:00 2001
From: Richard Zhu <hongxing.zhu@nxp.com>
Date: Thu, 13 Oct 2022 09:46:59 +0800
Subject: [PATCH 0055/4122] dt-binding: phy: Add i.MX8MP PCIe PHY binding

Add i.MX8MP PCIe PHY binding.
On i.MX8MM, the initialized default value of PERST bit(BIT3) of
SRC_PCIEPHY_RCR is 1b'1.
But i.MX8MP has one inversed default value 1b'0 of PERST bit.

And the PERST bit should be kept 1b'1 after power and clocks are stable.
So add one more PERST explicitly for i.MX8MP PCIe PHY.

Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
Tested-by: Marek Vasut <marex@denx.de>
Tested-by: Richard Leitner <richard.leitner@skidata.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/1665625622-20551-2-git-send-email-hongxing.zhu@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/phy/fsl,imx8-pcie-phy.yaml          | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml
index 0af765ba27932..182a219387b0e 100644
--- a/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8-pcie-phy.yaml
@@ -16,6 +16,7 @@ properties:
   compatible:
     enum:
       - fsl,imx8mm-pcie-phy
+      - fsl,imx8mp-pcie-phy
 
   reg:
     maxItems: 1
@@ -28,11 +29,16 @@ properties:
       - const: ref
 
   resets:
-    maxItems: 1
+    minItems: 1
+    maxItems: 2
 
   reset-names:
-    items:
-      - const: pciephy
+    oneOf:
+      - items:          # for iMX8MM
+          - const: pciephy
+      - items:          # for IMX8MP
+          - const: pciephy
+          - const: perst
 
   fsl,refclk-pad-mode:
     description: |
@@ -60,6 +66,10 @@ properties:
     description: A boolean property indicating the CLKREQ# signal is
       not supported in the board design (optional)
 
+  power-domains:
+    description: PCIe PHY  power domain (optional).
+    maxItems: 1
+
 required:
   - "#phy-cells"
   - compatible
-- 
GitLab


From e9e7dca53bf5a5bddf70c87157660a29cdcdd2d8 Mon Sep 17 00:00:00 2001
From: Richard Zhu <hongxing.zhu@nxp.com>
Date: Thu, 13 Oct 2022 09:47:00 +0800
Subject: [PATCH 0056/4122] phy: freescale: imx8m-pcie: Refine register
 definitions

No function changes, refine PHY register definitions.
- Keep align with other CMN PHY registers, refine the definitions of
  PHY_CMN_REG75.
- Remove two BIT definitions that are not used at all.

Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Marek Vasut <marex@denx.de>
Tested-by: Richard Leitner <richard.leitner@skidata.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Link: https://lore.kernel.org/r/1665625622-20551-3-git-send-email-hongxing.zhu@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index c93286483b425..3c8c255499cda 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -31,12 +31,10 @@
 #define IMX8MM_PCIE_PHY_CMN_REG065	0x194
 #define  ANA_AUX_RX_TERM		(BIT(7) | BIT(4))
 #define  ANA_AUX_TX_LVL			GENMASK(3, 0)
-#define IMX8MM_PCIE_PHY_CMN_REG75	0x1D4
-#define  PCIE_PHY_CMN_REG75_PLL_DONE	0x3
+#define IMX8MM_PCIE_PHY_CMN_REG075	0x1D4
+#define  ANA_PLL_DONE			0x3
 #define PCIE_PHY_TRSV_REG5		0x414
-#define  PCIE_PHY_TRSV_REG5_GEN1_DEEMP	0x2D
 #define PCIE_PHY_TRSV_REG6		0x418
-#define  PCIE_PHY_TRSV_REG6_GEN2_DEEMP	0xF
 
 #define IMX8MM_GPR_PCIE_REF_CLK_SEL	GENMASK(25, 24)
 #define IMX8MM_GPR_PCIE_REF_CLK_PLL	FIELD_PREP(IMX8MM_GPR_PCIE_REF_CLK_SEL, 0x3)
@@ -131,9 +129,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 	reset_control_deassert(imx8_phy->reset);
 
 	/* Polling to check the phy is ready or not. */
-	ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG75,
-				 val, val == PCIE_PHY_CMN_REG75_PLL_DONE,
-				 10, 20000);
+	ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075,
+				 val, val == ANA_PLL_DONE, 10, 20000);
 	return ret;
 }
 
-- 
GitLab


From ca679c49c4463595499a053ba94328acb574fffa Mon Sep 17 00:00:00 2001
From: Richard Zhu <hongxing.zhu@nxp.com>
Date: Thu, 13 Oct 2022 09:47:01 +0800
Subject: [PATCH 0057/4122] phy: freescale: imx8m-pcie: Refine i.MX8MM PCIe PHY
 driver

To make it more flexible and easy to expand. Refine i.MX8MM PCIe PHY
driver.
- Use gpr compatible string to avoid the codes duplications when add
  another platform PCIe PHY support.
- Re-arrange the codes to let it more flexible and easy to expand.
No functional change. Re-arrange the TX tuning, since internal registers
can be wrote through APB interface before assertion of CMN_RST.

Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Marek Vasut <marex@denx.de>
Tested-by: Richard Leitner <richard.leitner@skidata.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Link: https://lore.kernel.org/r/1665625622-20551-4-git-send-email-hongxing.zhu@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 106 +++++++++++++--------
 1 file changed, 66 insertions(+), 40 deletions(-)

diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index 3c8c255499cda..3e494612db3c0 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -11,6 +11,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx7-iomuxc-gpr.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/regmap.h>
@@ -45,6 +46,15 @@
 #define IMX8MM_GPR_PCIE_SSC_EN		BIT(16)
 #define IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE	BIT(9)
 
+enum imx8_pcie_phy_type {
+	IMX8MM,
+};
+
+struct imx8_pcie_phy_drvdata {
+	const	char			*gpr;
+	enum	imx8_pcie_phy_type	variant;
+};
+
 struct imx8_pcie_phy {
 	void __iomem		*base;
 	struct clk		*clk;
@@ -55,6 +65,7 @@ struct imx8_pcie_phy {
 	u32			tx_deemph_gen1;
 	u32			tx_deemph_gen2;
 	bool			clkreq_unused;
+	const struct imx8_pcie_phy_drvdata	*drvdata;
 };
 
 static int imx8_pcie_phy_power_on(struct phy *phy)
@@ -66,31 +77,17 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 	reset_control_assert(imx8_phy->reset);
 
 	pad_mode = imx8_phy->refclk_pad_mode;
-	/* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE,
-			   imx8_phy->clkreq_unused ?
-			   0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE);
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_AUX_EN,
-			   IMX8MM_GPR_PCIE_AUX_EN);
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_POWER_OFF, 0);
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_SSC_EN, 0);
-
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_REF_CLK_SEL,
-			   pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ?
-			   IMX8MM_GPR_PCIE_REF_CLK_EXT :
-			   IMX8MM_GPR_PCIE_REF_CLK_PLL);
-	usleep_range(100, 200);
-
-	/* Do the PHY common block reset */
-	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
-			   IMX8MM_GPR_PCIE_CMN_RST,
-			   IMX8MM_GPR_PCIE_CMN_RST);
-	usleep_range(200, 500);
+	switch (imx8_phy->drvdata->variant) {
+	case IMX8MM:
+		/* Tune PHY de-emphasis setting to pass PCIe compliance. */
+		if (imx8_phy->tx_deemph_gen1)
+			writel(imx8_phy->tx_deemph_gen1,
+			       imx8_phy->base + PCIE_PHY_TRSV_REG5);
+		if (imx8_phy->tx_deemph_gen2)
+			writel(imx8_phy->tx_deemph_gen2,
+			       imx8_phy->base + PCIE_PHY_TRSV_REG6);
+		break;
+	}
 
 	if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ||
 	    pad_mode == IMX8_PCIE_REFCLK_PAD_UNUSED) {
@@ -118,15 +115,37 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 		       imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG065);
 	}
 
-	/* Tune PHY de-emphasis setting to pass PCIe compliance. */
-	if (imx8_phy->tx_deemph_gen1)
-		writel(imx8_phy->tx_deemph_gen1,
-		       imx8_phy->base + PCIE_PHY_TRSV_REG5);
-	if (imx8_phy->tx_deemph_gen2)
-		writel(imx8_phy->tx_deemph_gen2,
-		       imx8_phy->base + PCIE_PHY_TRSV_REG6);
+	/* Set AUX_EN_OVERRIDE 1'b0, when the CLKREQ# isn't hooked */
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE,
+			   imx8_phy->clkreq_unused ?
+			   0 : IMX8MM_GPR_PCIE_AUX_EN_OVERRIDE);
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_AUX_EN,
+			   IMX8MM_GPR_PCIE_AUX_EN);
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_POWER_OFF, 0);
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_SSC_EN, 0);
 
-	reset_control_deassert(imx8_phy->reset);
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_REF_CLK_SEL,
+			   pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ?
+			   IMX8MM_GPR_PCIE_REF_CLK_EXT :
+			   IMX8MM_GPR_PCIE_REF_CLK_PLL);
+	usleep_range(100, 200);
+
+	/* Do the PHY common block reset */
+	regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+			   IMX8MM_GPR_PCIE_CMN_RST,
+			   IMX8MM_GPR_PCIE_CMN_RST);
+
+	switch (imx8_phy->drvdata->variant) {
+	case IMX8MM:
+		reset_control_deassert(imx8_phy->reset);
+		usleep_range(200, 500);
+		break;
+	}
 
 	/* Polling to check the phy is ready or not. */
 	ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075,
@@ -157,6 +176,17 @@ static const struct phy_ops imx8_pcie_phy_ops = {
 	.owner		= THIS_MODULE,
 };
 
+static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = {
+	.gpr = "fsl,imx8mm-iomuxc-gpr",
+	.variant = IMX8MM,
+};
+
+static const struct of_device_id imx8_pcie_phy_of_match[] = {
+	{.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match);
+
 static int imx8_pcie_phy_probe(struct platform_device *pdev)
 {
 	struct phy_provider *phy_provider;
@@ -169,6 +199,8 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
 	if (!imx8_phy)
 		return -ENOMEM;
 
+	imx8_phy->drvdata = of_device_get_match_data(dev);
+
 	/* get PHY refclk pad mode */
 	of_property_read_u32(np, "fsl,refclk-pad-mode",
 			     &imx8_phy->refclk_pad_mode);
@@ -194,7 +226,7 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
 
 	/* Grab GPR config register range */
 	imx8_phy->iomuxc_gpr =
-		 syscon_regmap_lookup_by_compatible("fsl,imx6q-iomuxc-gpr");
+		 syscon_regmap_lookup_by_compatible(imx8_phy->drvdata->gpr);
 	if (IS_ERR(imx8_phy->iomuxc_gpr)) {
 		dev_err(dev, "unable to find iomuxc registers\n");
 		return PTR_ERR(imx8_phy->iomuxc_gpr);
@@ -222,12 +254,6 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
 	return PTR_ERR_OR_ZERO(phy_provider);
 }
 
-static const struct of_device_id imx8_pcie_phy_of_match[] = {
-	{.compatible = "fsl,imx8mm-pcie-phy",},
-	{ },
-};
-MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match);
-
 static struct platform_driver imx8_pcie_phy_driver = {
 	.probe	= imx8_pcie_phy_probe,
 	.driver = {
-- 
GitLab


From dce9edff16ee8df20e791e82e0704c4667cc3908 Mon Sep 17 00:00:00 2001
From: Richard Zhu <hongxing.zhu@nxp.com>
Date: Thu, 13 Oct 2022 09:47:02 +0800
Subject: [PATCH 0058/4122] phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY
 support

Add i.MX8MP PCIe PHY support.

Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Tested-by: Marek Vasut <marex@denx.de>
Tested-by: Richard Leitner <richard.leitner@skidata.com>
Tested-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Reviewed-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Link: https://lore.kernel.org/r/1665625622-20551-5-git-send-email-hongxing.zhu@nxp.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 25 ++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
index 3e494612db3c0..7585e8080b77d 100644
--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
@@ -48,6 +48,7 @@
 
 enum imx8_pcie_phy_type {
 	IMX8MM,
+	IMX8MP,
 };
 
 struct imx8_pcie_phy_drvdata {
@@ -60,6 +61,7 @@ struct imx8_pcie_phy {
 	struct clk		*clk;
 	struct phy		*phy;
 	struct regmap		*iomuxc_gpr;
+	struct reset_control	*perst;
 	struct reset_control	*reset;
 	u32			refclk_pad_mode;
 	u32			tx_deemph_gen1;
@@ -74,11 +76,11 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 	u32 val, pad_mode;
 	struct imx8_pcie_phy *imx8_phy = phy_get_drvdata(phy);
 
-	reset_control_assert(imx8_phy->reset);
-
 	pad_mode = imx8_phy->refclk_pad_mode;
 	switch (imx8_phy->drvdata->variant) {
 	case IMX8MM:
+		reset_control_assert(imx8_phy->reset);
+
 		/* Tune PHY de-emphasis setting to pass PCIe compliance. */
 		if (imx8_phy->tx_deemph_gen1)
 			writel(imx8_phy->tx_deemph_gen1,
@@ -87,6 +89,8 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 			writel(imx8_phy->tx_deemph_gen2,
 			       imx8_phy->base + PCIE_PHY_TRSV_REG6);
 		break;
+	case IMX8MP: /* Do nothing. */
+		break;
 	}
 
 	if (pad_mode == IMX8_PCIE_REFCLK_PAD_INPUT ||
@@ -141,6 +145,9 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
 			   IMX8MM_GPR_PCIE_CMN_RST);
 
 	switch (imx8_phy->drvdata->variant) {
+	case IMX8MP:
+		reset_control_deassert(imx8_phy->perst);
+		fallthrough;
 	case IMX8MM:
 		reset_control_deassert(imx8_phy->reset);
 		usleep_range(200, 500);
@@ -181,8 +188,14 @@ static const struct imx8_pcie_phy_drvdata imx8mm_drvdata = {
 	.variant = IMX8MM,
 };
 
+static const struct imx8_pcie_phy_drvdata imx8mp_drvdata = {
+	.gpr = "fsl,imx8mp-iomuxc-gpr",
+	.variant = IMX8MP,
+};
+
 static const struct of_device_id imx8_pcie_phy_of_match[] = {
 	{.compatible = "fsl,imx8mm-pcie-phy", .data = &imx8mm_drvdata, },
+	{.compatible = "fsl,imx8mp-pcie-phy", .data = &imx8mp_drvdata, },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, imx8_pcie_phy_of_match);
@@ -238,6 +251,14 @@ static int imx8_pcie_phy_probe(struct platform_device *pdev)
 		return PTR_ERR(imx8_phy->reset);
 	}
 
+	if (imx8_phy->drvdata->variant == IMX8MP) {
+		imx8_phy->perst =
+			devm_reset_control_get_exclusive(dev, "perst");
+		if (IS_ERR(imx8_phy->perst))
+			dev_err_probe(dev, PTR_ERR(imx8_phy->perst),
+				      "Failed to get PCIE PHY PERST control\n");
+	}
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	imx8_phy->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(imx8_phy->base))
-- 
GitLab


From b01d622d76134e9401970ffd3fbbb9a7051f976a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 20 Sep 2022 14:11:54 +0200
Subject: [PATCH 0059/4122] phy: marvell: phy-mvebu-a3700-comphy: Reset COMPHY
 registers before USB 3.0 power on
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Turris MOX board with older ARM Trusted Firmware version v1.5 is not able
to detect any USB 3.0 device connected to USB-A port on Mox-A module after
commit 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken
reset support"). On the other hand USB 2.0 devices connected to the same
USB-A port are working fine.

It looks as if the older firmware configures COMPHY registers for USB 3.0
somehow incompatibly for kernel driver. Experiments show that resetting
COMPHY registers via setting SFT_RST auto-clearing bit in COMPHY_SFT_RESET
register fixes this issue.

Reset the COMPHY in mvebu_a3700_comphy_usb3_power_on() function as a first
step after selecting COMPHY lane and USB 3.0 function. With this change
Turris MOX board can successfully detect USB 3.0 devices again.

Before the above mentioned commit this reset was implemented in PHY reset
method, so this is the reason why there was no issue with older firmware
version then.

Fixes: 0a6fc70d76bd ("phy: marvell: phy-mvebu-a3700-comphy: Remove broken reset support")
Reported-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Pali Rohár <pali@kernel.org>
Tested-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Link: https://lore.kernel.org/r/20220920121154.30115-1-pali@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/marvell/phy-mvebu-a3700-comphy.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
index 67712c77d806f..d641b345afa35 100644
--- a/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
+++ b/drivers/phy/marvell/phy-mvebu-a3700-comphy.c
@@ -826,6 +826,9 @@ mvebu_a3700_comphy_usb3_power_on(struct mvebu_a3700_comphy_lane *lane)
 	if (ret)
 		return ret;
 
+	/* COMPHY register reset (cleared automatically) */
+	comphy_lane_reg_set(lane, COMPHY_SFT_RESET, SFT_RST, SFT_RST);
+
 	/*
 	 * 0. Set PHY OTG Control(0x5d034), bit 4, Power up OTG module The
 	 * register belong to UTMI module, so it is set in UTMI phy driver.
-- 
GitLab


From 2566ad8ec418934c213cb50fd2084ffd896a2fea Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 27 Sep 2022 12:22:02 +0300
Subject: [PATCH 0060/4122] phy: qcom-qmp-pcie: split register tables into
 common and extra parts

SM8250 configuration tables are split into two parts: the common one and
the PHY-specific tables. Make this split more formal. Rather than having
a blind renamed copy of all QMP table fields, add separate struct
qmp_phy_cfg_tables and add two instances of this structure to the struct
qmp_phy_cfg. Later on this will be used to support different PHY modes
(RC vs EP).

Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220927092207.161501-2-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 406 +++++++++++++----------
 1 file changed, 222 insertions(+), 184 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 5be5348fbb26b..ae0d7b49dfa36 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1300,31 +1300,30 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e),
 };
 
+struct qmp_phy_cfg_tables {
+	const struct qmp_phy_init_tbl *serdes;
+	int serdes_num;
+	const struct qmp_phy_init_tbl *tx;
+	int tx_num;
+	const struct qmp_phy_init_tbl *rx;
+	int rx_num;
+	const struct qmp_phy_init_tbl *pcs;
+	int pcs_num;
+	const struct qmp_phy_init_tbl *pcs_misc;
+	int pcs_misc_num;
+};
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
 	int lanes;
 
-	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
-	const struct qmp_phy_init_tbl *serdes_tbl;
-	int serdes_tbl_num;
-	const struct qmp_phy_init_tbl *serdes_tbl_sec;
-	int serdes_tbl_num_sec;
-	const struct qmp_phy_init_tbl *tx_tbl;
-	int tx_tbl_num;
-	const struct qmp_phy_init_tbl *tx_tbl_sec;
-	int tx_tbl_num_sec;
-	const struct qmp_phy_init_tbl *rx_tbl;
-	int rx_tbl_num;
-	const struct qmp_phy_init_tbl *rx_tbl_sec;
-	int rx_tbl_num_sec;
-	const struct qmp_phy_init_tbl *pcs_tbl;
-	int pcs_tbl_num;
-	const struct qmp_phy_init_tbl *pcs_tbl_sec;
-	int pcs_tbl_num_sec;
-	const struct qmp_phy_init_tbl *pcs_misc_tbl;
-	int pcs_misc_tbl_num;
-	const struct qmp_phy_init_tbl *pcs_misc_tbl_sec;
-	int pcs_misc_tbl_num_sec;
+	/* Main init sequence for PHY blocks - serdes, tx, rx, pcs */
+	const struct qmp_phy_cfg_tables tables;
+	/*
+	 * Additional init sequence for PHY blocks, providing additional
+	 * register programming. Unless required it can be left omitted.
+	 */
+	const struct qmp_phy_cfg_tables *tables_rc;
 
 	/* clock ids to be requested */
 	const char * const *clk_list;
@@ -1459,14 +1458,16 @@ static const char * const sdm845_pciephy_reset_l[] = {
 static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= ipq8074_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(ipq8074_pcie_serdes_tbl),
-	.tx_tbl			= ipq8074_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(ipq8074_pcie_tx_tbl),
-	.rx_tbl			= ipq8074_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(ipq8074_pcie_rx_tbl),
-	.pcs_tbl		= ipq8074_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(ipq8074_pcie_pcs_tbl),
+	.tables = {
+		.serdes		= ipq8074_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(ipq8074_pcie_serdes_tbl),
+		.tx		= ipq8074_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(ipq8074_pcie_tx_tbl),
+		.rx		= ipq8074_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(ipq8074_pcie_rx_tbl),
+		.pcs		= ipq8074_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(ipq8074_pcie_pcs_tbl),
+	},
 	.clk_list		= ipq8074_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(ipq8074_pciephy_clk_l),
 	.reset_list		= ipq8074_pciephy_reset_l,
@@ -1487,14 +1488,16 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= ipq8074_pcie_gen3_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl),
-	.tx_tbl			= ipq8074_pcie_gen3_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl),
-	.rx_tbl			= ipq8074_pcie_gen3_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl),
-	.pcs_tbl		= ipq8074_pcie_gen3_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl),
+	.tables = {
+		.serdes		= ipq8074_pcie_gen3_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl),
+		.tx		= ipq8074_pcie_gen3_tx_tbl,
+		.tx_num		= ARRAY_SIZE(ipq8074_pcie_gen3_tx_tbl),
+		.rx		= ipq8074_pcie_gen3_rx_tbl,
+		.rx_num		= ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl),
+		.pcs		= ipq8074_pcie_gen3_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl),
+	},
 	.clk_list		= ipq8074_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(ipq8074_pciephy_clk_l),
 	.reset_list		= ipq8074_pciephy_reset_l,
@@ -1516,16 +1519,18 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= ipq6018_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(ipq6018_pcie_serdes_tbl),
-	.tx_tbl			= ipq6018_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(ipq6018_pcie_tx_tbl),
-	.rx_tbl			= ipq6018_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(ipq6018_pcie_rx_tbl),
-	.pcs_tbl		= ipq6018_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(ipq6018_pcie_pcs_tbl),
-	.pcs_misc_tbl		= ipq6018_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= ipq6018_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(ipq6018_pcie_serdes_tbl),
+		.tx		= ipq6018_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(ipq6018_pcie_tx_tbl),
+		.rx		= ipq6018_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(ipq6018_pcie_rx_tbl),
+		.pcs		= ipq6018_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(ipq6018_pcie_pcs_tbl),
+		.pcs_misc	= ipq6018_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(ipq6018_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= ipq8074_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(ipq8074_pciephy_clk_l),
 	.reset_list		= ipq8074_pciephy_reset_l,
@@ -1545,16 +1550,18 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= sdm845_qmp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl),
-	.tx_tbl			= sdm845_qmp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl),
-	.rx_tbl			= sdm845_qmp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl),
-	.pcs_tbl		= sdm845_qmp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sdm845_qmp_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sdm845_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl),
+		.tx		= sdm845_qmp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sdm845_qmp_pcie_tx_tbl),
+		.rx		= sdm845_qmp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sdm845_qmp_pcie_rx_tbl),
+		.pcs		= sdm845_qmp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sdm845_qmp_pcie_pcs_tbl),
+		.pcs_misc	= sdm845_qmp_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sdm845_qmp_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1575,14 +1582,16 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= sdm845_qhp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl),
-	.tx_tbl			= sdm845_qhp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl),
-	.rx_tbl			= sdm845_qhp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl),
-	.pcs_tbl		= sdm845_qhp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl),
+	.tables = {
+		.serdes		= sdm845_qhp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl),
+		.tx		= sdm845_qhp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sdm845_qhp_pcie_tx_tbl),
+		.rx		= sdm845_qhp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sdm845_qhp_pcie_rx_tbl),
+		.pcs		= sdm845_qhp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sdm845_qhp_pcie_pcs_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1603,24 +1612,28 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= sm8250_qmp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
-	.serdes_tbl_sec		= sm8250_qmp_gen3x1_pcie_serdes_tbl,
-	.serdes_tbl_num_sec	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl),
-	.tx_tbl			= sm8250_qmp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl),
-	.rx_tbl			= sm8250_qmp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl),
-	.rx_tbl_sec		= sm8250_qmp_gen3x1_pcie_rx_tbl,
-	.rx_tbl_num_sec		= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl),
-	.pcs_tbl		= sm8250_qmp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl),
-	.pcs_tbl_sec		= sm8250_qmp_gen3x1_pcie_pcs_tbl,
-	.pcs_tbl_num_sec		= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sm8250_qmp_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
-	.pcs_misc_tbl_sec		= sm8250_qmp_gen3x1_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num_sec	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sm8250_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
+		.tx		= sm8250_qmp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl),
+		.rx		= sm8250_qmp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl),
+		.pcs		= sm8250_qmp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl),
+		.pcs_misc	= sm8250_qmp_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
+	},
+	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+		.serdes		= sm8250_qmp_gen3x1_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl),
+		.rx		= sm8250_qmp_gen3x1_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_rx_tbl),
+		.pcs		= sm8250_qmp_gen3x1_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_tbl),
+		.pcs_misc	= sm8250_qmp_gen3x1_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1641,24 +1654,28 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 	.lanes			= 2,
 
-	.serdes_tbl		= sm8250_qmp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
-	.tx_tbl			= sm8250_qmp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl),
-	.tx_tbl_sec		= sm8250_qmp_gen3x2_pcie_tx_tbl,
-	.tx_tbl_num_sec		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl),
-	.rx_tbl			= sm8250_qmp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl),
-	.rx_tbl_sec		= sm8250_qmp_gen3x2_pcie_rx_tbl,
-	.rx_tbl_num_sec		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl),
-	.pcs_tbl		= sm8250_qmp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl),
-	.pcs_tbl_sec		= sm8250_qmp_gen3x2_pcie_pcs_tbl,
-	.pcs_tbl_num_sec		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sm8250_qmp_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
-	.pcs_misc_tbl_sec		= sm8250_qmp_gen3x2_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num_sec	= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sm8250_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
+		.tx		= sm8250_qmp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sm8250_qmp_pcie_tx_tbl),
+		.rx		= sm8250_qmp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8250_qmp_pcie_rx_tbl),
+		.pcs		= sm8250_qmp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_tbl),
+		.pcs_misc	= sm8250_qmp_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
+	},
+	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+		.tx		= sm8250_qmp_gen3x2_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl),
+		.rx		= sm8250_qmp_gen3x2_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_rx_tbl),
+		.pcs		= sm8250_qmp_gen3x2_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_tbl),
+		.pcs_misc	= sm8250_qmp_gen3x2_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1679,14 +1696,16 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= msm8998_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(msm8998_pcie_serdes_tbl),
-	.tx_tbl			= msm8998_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(msm8998_pcie_tx_tbl),
-	.rx_tbl			= msm8998_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(msm8998_pcie_rx_tbl),
-	.pcs_tbl		= msm8998_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(msm8998_pcie_pcs_tbl),
+	.tables = {
+		.serdes		= msm8998_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(msm8998_pcie_serdes_tbl),
+		.tx		= msm8998_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(msm8998_pcie_tx_tbl),
+		.rx		= msm8998_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(msm8998_pcie_rx_tbl),
+		.pcs		= msm8998_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(msm8998_pcie_pcs_tbl),
+	},
 	.clk_list		= msm8996_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(msm8996_phy_clk_l),
 	.reset_list		= ipq8074_pciephy_reset_l,
@@ -1703,16 +1722,18 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= sc8180x_qmp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl),
-	.tx_tbl			= sc8180x_qmp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl),
-	.rx_tbl			= sc8180x_qmp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl),
-	.pcs_tbl		= sc8180x_qmp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sc8180x_qmp_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sc8180x_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl),
+		.tx		= sc8180x_qmp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sc8180x_qmp_pcie_tx_tbl),
+		.rx		= sc8180x_qmp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sc8180x_qmp_pcie_rx_tbl),
+		.pcs		= sc8180x_qmp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sc8180x_qmp_pcie_pcs_tbl),
+		.pcs_misc	= sc8180x_qmp_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sc8180x_qmp_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1732,16 +1753,18 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.lanes			= 2,
 
-	.serdes_tbl		= sdx55_qmp_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl),
-	.tx_tbl			= sdx55_qmp_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl),
-	.rx_tbl			= sdx55_qmp_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl),
-	.pcs_tbl		= sdx55_qmp_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sdx55_qmp_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sdx55_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl),
+		.tx		= sdx55_qmp_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sdx55_qmp_pcie_tx_tbl),
+		.rx		= sdx55_qmp_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sdx55_qmp_pcie_rx_tbl),
+		.pcs		= sdx55_qmp_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sdx55_qmp_pcie_pcs_tbl),
+		.pcs_misc	= sdx55_qmp_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sdx55_qmp_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1762,16 +1785,18 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 	.lanes			= 1,
 
-	.serdes_tbl		= sm8450_qmp_gen3x1_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl),
-	.tx_tbl			= sm8450_qmp_gen3x1_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl),
-	.rx_tbl			= sm8450_qmp_gen3x1_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl),
-	.pcs_tbl		= sm8450_qmp_gen3x1_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sm8450_qmp_gen3x1_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sm8450_qmp_gen3x1_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl),
+		.tx		= sm8450_qmp_gen3x1_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_tx_tbl),
+		.rx		= sm8450_qmp_gen3x1_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_rx_tbl),
+		.pcs		= sm8450_qmp_gen3x1_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_tbl),
+		.pcs_misc	= sm8450_qmp_gen3x1_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1792,16 +1817,18 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 	.lanes			= 2,
 
-	.serdes_tbl		= sm8450_qmp_gen4x2_pcie_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl),
-	.tx_tbl			= sm8450_qmp_gen4x2_pcie_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl),
-	.rx_tbl			= sm8450_qmp_gen4x2_pcie_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl),
-	.pcs_tbl		= sm8450_qmp_gen4x2_pcie_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl),
-	.pcs_misc_tbl		= sm8450_qmp_gen4x2_pcie_pcs_misc_tbl,
-	.pcs_misc_tbl_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl),
+	.tables = {
+		.serdes		= sm8450_qmp_gen4x2_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl),
+		.tx		= sm8450_qmp_gen4x2_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_tx_tbl),
+		.rx		= sm8450_qmp_gen4x2_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rx_tbl),
+		.pcs		= sm8450_qmp_gen4x2_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_tbl),
+		.pcs_misc	= sm8450_qmp_gen4x2_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl),
+	},
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
@@ -1850,17 +1877,49 @@ static void qmp_pcie_configure(void __iomem *base,
 	qmp_pcie_configure_lane(base, regs, tbl, num, 0xff);
 }
 
-static int qmp_pcie_serdes_init(struct qmp_phy *qphy)
+static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
 {
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *serdes = qphy->serdes;
-	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
-	int serdes_tbl_num = cfg->serdes_tbl_num;
 
-	qmp_pcie_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num);
-	qmp_pcie_configure(serdes, cfg->regs, cfg->serdes_tbl_sec, cfg->serdes_tbl_num_sec);
+	if (!tables)
+		return;
 
-	return 0;
+	qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num);
+}
+
+static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
+{
+	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	void __iomem *tx = qphy->tx;
+	void __iomem *rx = qphy->rx;
+
+	if (!tables)
+		return;
+
+	qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1);
+
+	if (cfg->lanes >= 2)
+		qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2);
+
+	qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1);
+	if (cfg->lanes >= 2)
+		qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2);
+}
+
+static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
+{
+	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	void __iomem *pcs = qphy->pcs;
+	void __iomem *pcs_misc = qphy->pcs_misc;
+
+	if (!tables)
+		return;
+
+	qmp_pcie_configure(pcs, cfg->regs,
+			   tables->pcs, tables->pcs_num);
+	qmp_pcie_configure(pcs_misc, cfg->regs,
+			   tables->pcs_misc, tables->pcs_misc_num);
 }
 
 static int qmp_pcie_init(struct phy *phy)
@@ -1932,15 +1991,13 @@ static int qmp_pcie_power_on(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->tx;
-	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
 	void __iomem *status;
 	unsigned int mask, val, ready;
 	int ret;
 
-	qmp_pcie_serdes_init(qphy);
+	qmp_pcie_serdes_init(qphy, &cfg->tables);
+	qmp_pcie_serdes_init(qphy, cfg->tables_rc);
 
 	ret = clk_prepare_enable(qphy->pipe_clk);
 	if (ret) {
@@ -1949,31 +2006,11 @@ static int qmp_pcie_power_on(struct phy *phy)
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-	qmp_pcie_configure_lane(tx, cfg->regs, cfg->tx_tbl_sec, cfg->tx_tbl_num_sec, 1);
-
-	if (cfg->lanes >= 2) {
-		qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl,
-					cfg->tx_tbl_num, 2);
-		qmp_pcie_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl_sec,
-					cfg->tx_tbl_num_sec, 2);
-	}
-
-	qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1);
-	qmp_pcie_configure_lane(rx, cfg->regs, cfg->rx_tbl_sec, cfg->rx_tbl_num_sec, 1);
-
-	if (cfg->lanes >= 2) {
-		qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl,
-					cfg->rx_tbl_num, 2);
-		qmp_pcie_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl_sec,
-					cfg->rx_tbl_num_sec, 2);
-	}
-
-	qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
-	qmp_pcie_configure(pcs, cfg->regs, cfg->pcs_tbl_sec, cfg->pcs_tbl_num_sec);
+	qmp_pcie_lanes_init(qphy, &cfg->tables);
+	qmp_pcie_lanes_init(qphy, cfg->tables_rc);
 
-	qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl, cfg->pcs_misc_tbl_num);
-	qmp_pcie_configure(pcs_misc, cfg->regs, cfg->pcs_misc_tbl_sec, cfg->pcs_misc_tbl_num_sec);
+	qmp_pcie_pcs_init(qphy, &cfg->tables);
+	qmp_pcie_pcs_init(qphy, cfg->tables_rc);
 
 	/*
 	 * Pull out PHY from POWER DOWN state.
@@ -2240,7 +2277,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 		qphy->pcs_misc = qphy->pcs + 0x400;
 
 	if (IS_ERR(qphy->pcs_misc)) {
-		if (cfg->pcs_misc_tbl || cfg->pcs_misc_tbl_sec)
+		if (cfg->tables.pcs_misc ||
+		    (cfg->tables_rc && cfg->tables_rc->pcs_misc))
 			return PTR_ERR(qphy->pcs_misc);
 	}
 
-- 
GitLab


From 11bf53a38c82baef349b4efc6a84f069dab7085a Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 27 Sep 2022 12:22:03 +0300
Subject: [PATCH 0061/4122] phy: qcom-qmp-pcie: support separate tables for EP
 mode

The PCIe QMP PHY requires different programming sequences when being
used for the RC (Root Complex) or for the EP (End Point) modes. Allow
selecting the submode and thus selecting a set of PHY programming
tables.

Since the RC and EP modes share common some common init sequence, the
common sequence is kept in the main table and the sequence differences
are pushed to the extra tables.

Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220927092207.161501-3-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 46 ++++++++++++++++++++----
 1 file changed, 40 insertions(+), 6 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index ae0d7b49dfa36..ba01338d93acd 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -14,6 +14,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_address.h>
+#include <linux/phy/pcie.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
@@ -1320,10 +1321,14 @@ struct qmp_phy_cfg {
 	/* Main init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_cfg_tables tables;
 	/*
-	 * Additional init sequence for PHY blocks, providing additional
-	 * register programming. Unless required it can be left omitted.
+	 * Additional init sequences for PHY blocks, providing additional
+	 * register programming. They are used for providing separate sequences
+	 * for the Root Complex and End Point use cases.
+	 *
+	 * If EP mode is not supported, both tables can be left unset.
 	 */
 	const struct qmp_phy_cfg_tables *tables_rc;
+	const struct qmp_phy_cfg_tables *tables_ep;
 
 	/* clock ids to be requested */
 	const char * const *clk_list;
@@ -1367,6 +1372,7 @@ struct qmp_phy_cfg {
  * @pcs_misc: iomapped memory space for lane's pcs_misc
  * @pipe_clk: pipe clock
  * @qmp: QMP phy to which this lane belongs
+ * @mode: currently selected PHY mode
  */
 struct qmp_phy {
 	struct phy *phy;
@@ -1380,6 +1386,7 @@ struct qmp_phy {
 	void __iomem *pcs_misc;
 	struct clk *pipe_clk;
 	struct qcom_qmp *qmp;
+	int mode;
 };
 
 /**
@@ -1991,13 +1998,19 @@ static int qmp_pcie_power_on(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	const struct qmp_phy_cfg_tables *mode_tables;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
 	unsigned int mask, val, ready;
 	int ret;
 
+	if (qphy->mode == PHY_MODE_PCIE_RC)
+		mode_tables = cfg->tables_rc;
+	else
+		mode_tables = cfg->tables_ep;
+
 	qmp_pcie_serdes_init(qphy, &cfg->tables);
-	qmp_pcie_serdes_init(qphy, cfg->tables_rc);
+	qmp_pcie_serdes_init(qphy, mode_tables);
 
 	ret = clk_prepare_enable(qphy->pipe_clk);
 	if (ret) {
@@ -2007,10 +2020,10 @@ static int qmp_pcie_power_on(struct phy *phy)
 
 	/* Tx, Rx, and PCS configurations */
 	qmp_pcie_lanes_init(qphy, &cfg->tables);
-	qmp_pcie_lanes_init(qphy, cfg->tables_rc);
+	qmp_pcie_lanes_init(qphy, mode_tables);
 
 	qmp_pcie_pcs_init(qphy, &cfg->tables);
-	qmp_pcie_pcs_init(qphy, cfg->tables_rc);
+	qmp_pcie_pcs_init(qphy, mode_tables);
 
 	/*
 	 * Pull out PHY from POWER DOWN state.
@@ -2097,6 +2110,23 @@ static int qmp_pcie_disable(struct phy *phy)
 	return qmp_pcie_exit(phy);
 }
 
+static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode)
+{
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
+
+	switch (submode) {
+	case PHY_MODE_PCIE_RC:
+	case PHY_MODE_PCIE_EP:
+		qphy->mode = submode;
+		break;
+	default:
+		dev_err(&phy->dev, "Unsupported submode %d\n", submode);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
@@ -2220,6 +2250,7 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
 static const struct phy_ops qmp_pcie_ops = {
 	.power_on	= qmp_pcie_enable,
 	.power_off	= qmp_pcie_disable,
+	.set_mode	= qmp_pcie_set_mode,
 	.owner		= THIS_MODULE,
 };
 
@@ -2235,6 +2266,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 	if (!qphy)
 		return -ENOMEM;
 
+	qphy->mode = PHY_MODE_PCIE_RC;
+
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
@@ -2278,7 +2311,8 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 
 	if (IS_ERR(qphy->pcs_misc)) {
 		if (cfg->tables.pcs_misc ||
-		    (cfg->tables_rc && cfg->tables_rc->pcs_misc))
+		    (cfg->tables_rc && cfg->tables_rc->pcs_misc) ||
+		    (cfg->tables_ep && cfg->tables_ep->pcs_misc))
 			return PTR_ERR(qphy->pcs_misc);
 	}
 
-- 
GitLab


From f5682f13b7ab0bbdffd11934afe4b5c011d5be74 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 27 Sep 2022 12:22:04 +0300
Subject: [PATCH 0062/4122] phy: qcom-qmp-pcie: Support SM8450 PCIe1 PHY in EP
 mode

Add support for using PCIe1 (gen4x2) in EP mode on SM8450. The tables to
program are mostly common with the RC mode tables, so only register
difference are split into separate RC and EP tables.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220927092207.161501-4-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c      | 78 +++++++++++++++----
 .../qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h    |  1 +
 2 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index ba01338d93acd..f3f75eda01a6d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1185,15 +1185,29 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen3x1_pcie_pcs_misc_tbl[] = {
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_serdes_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x97),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x0c),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16),
@@ -1201,8 +1215,6 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x46),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_CFG, 0x04),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14),
@@ -1215,17 +1227,8 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_serdes_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0x55),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0x55),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x05),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x12),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE0, 0x0a),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x04),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MISC1, 0x88),
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x20),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_CONFIG, 0x06),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CMN_MODE, 0x14),
-	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_DC_LEVEL_CTRL, 0x0f),
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_tx_tbl[] = {
@@ -1293,14 +1296,44 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = {
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
-	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
-	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5, 0x02),
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_EQ_CONFIG1, 0x16),
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3, 0x28),
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN, 0x2e),
 };
 
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BG_TIMER, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYS_CLK_CTRL, 0x07),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x27),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x17),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x19),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x03),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x04),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x09),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x19),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x28),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE0, 0xfb),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE0, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN0_MODE1, 0xfb),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_INTEGLOOP_GAIN1_MODE1, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORE_CLK_EN, 0x60),
+};
+
+static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08),
+};
+
 struct qmp_phy_cfg_tables {
 	const struct qmp_phy_init_tbl *serdes;
 	int serdes_num;
@@ -1836,6 +1869,21 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 		.pcs_misc	= sm8450_qmp_gen4x2_pcie_pcs_misc_tbl,
 		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl),
 	},
+
+	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+		.serdes		= sm8450_qmp_gen4x2_pcie_rc_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl),
+		.pcs_misc	= sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl),
+	},
+
+	.tables_ep = &(const struct qmp_phy_cfg_tables) {
+		.serdes		= sm8450_qmp_gen4x2_pcie_ep_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl),
+		.pcs_misc	= sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl),
+	},
+
 	.clk_list		= sdm845_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(sdm845_pciephy_clk_l),
 	.reset_list		= sdm845_pciephy_reset_l,
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
index 1eedf50cf9cbc..c9fa90b454756 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
@@ -8,6 +8,7 @@
 
 /* Only for QMP V5_20 PHY - PCIe PCS registers */
 #define QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE	0x01c
+#define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5	0x084
 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS		0x090
 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1			0x0a0
 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5		0x108
-- 
GitLab


From f90747d1b641aad244cca7d6aa20aa25f33ae8e4 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 27 Sep 2022 12:22:05 +0300
Subject: [PATCH 0063/4122] PCI: qcom: Setup PHY to work in RC mode

Call phy_set_mode_ext() to notify the PHY driver that the PHY is being
used in the RC mode.

Reviewed-by: Jingoo Han <jingoohan1@gmail.com>
Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Link: https://lore.kernel.org/r/20220927092207.161501-5-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index f711acacaeaf8..7db94a22238d1 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -22,6 +22,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
+#include <linux/phy/pcie.h>
 #include <linux/phy/phy.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
@@ -1497,6 +1498,10 @@ static int qcom_pcie_host_init(struct dw_pcie_rp *pp)
 	if (ret)
 		return ret;
 
+	ret = phy_set_mode_ext(pcie->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_RC);
+	if (ret)
+		goto err_deinit;
+
 	ret = phy_power_on(pcie->phy);
 	if (ret)
 		goto err_deinit;
-- 
GitLab


From a84ed1919fb3fc767ae3aad13bbff8ea8eaceedd Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Tue, 27 Sep 2022 12:22:06 +0300
Subject: [PATCH 0064/4122] PCI: qcom-ep: Setup PHY to work in EP mode

Call phy_set_mode_ext() to notify the PHY driver that the PHY is being
used in the EP mode.

Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Reviewed-by: Jingoo Han <jingoohan1@gmail.com>
Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Acked-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Link: https://lore.kernel.org/r/20220927092207.161501-6-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/pci/controller/dwc/pcie-qcom-ep.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/dwc/pcie-qcom-ep.c b/drivers/pci/controller/dwc/pcie-qcom-ep.c
index 6d0d1b759ca24..19b32839ea261 100644
--- a/drivers/pci/controller/dwc/pcie-qcom-ep.c
+++ b/drivers/pci/controller/dwc/pcie-qcom-ep.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
 #include <linux/mfd/syscon.h>
+#include <linux/phy/pcie.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/pm_domain.h>
@@ -268,6 +269,10 @@ static int qcom_pcie_enable_resources(struct qcom_pcie_ep *pcie_ep)
 	if (ret)
 		goto err_disable_clk;
 
+	ret = phy_set_mode_ext(pcie_ep->phy, PHY_MODE_PCIE, PHY_MODE_PCIE_EP);
+	if (ret)
+		goto err_phy_exit;
+
 	ret = phy_power_on(pcie_ep->phy);
 	if (ret)
 		goto err_phy_exit;
-- 
GitLab


From 8d3bf72497a8def5dc75e10a2229f1c692598b97 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:03 +0200
Subject: [PATCH 0065/4122] phy: qcom-qmp: fix obsolete lane comments

All QMP drivers but the MSM8996 and combo ones handle exactly one PHY
and the corresponding memory resources are not per-lane, but per PHY.

Update the obsolete comments.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c        | 2 +-
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 +-
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c         | 2 +-
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c          | 2 +-
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c          | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 9807c4d935cdb..8a2a35c0855b4 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2669,7 +2669,7 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each phy lane:
+	 * Get memory resources for each PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 461f0b5d464a8..707ec81c7a2ae 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -777,7 +777,7 @@ static int qmp_pcie_msm8996_create(struct device *dev, struct device_node *np, i
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each phy lane:
+	 * Get memory resources for each PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 */
 	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index f3f75eda01a6d..e0408c423ac6d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2319,7 +2319,7 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each phy lane:
+	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index c08d34ad1313e..db5642e1f7154 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -1094,7 +1094,7 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id,
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each phy lane:
+	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index b84c0d4b57541..965e486ab87df 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2581,7 +2581,7 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id,
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each phy lane:
+	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
-- 
GitLab


From f823346de8b1fa44bbab3ef62d40e9616332c7ee Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:04 +0200
Subject: [PATCH 0066/4122] phy: qcom-qmp-combo: drop unused UFS reset

Drop the unused UFS reset code which isn't used since the QMP driver
split.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 8a2a35c0855b4..c21512b9ab528 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -944,7 +944,6 @@ struct qmp_phy_dp_clks {
  * @phys: array of per-lane phy descriptors
  * @phy_mutex: mutex lock for PHY common block initialization
  * @init_count: phy common block initialization count
- * @ufs_reset: optional UFS PHY reset handle
  */
 struct qcom_qmp {
 	struct device *dev;
@@ -958,8 +957,6 @@ struct qcom_qmp {
 
 	struct mutex phy_mutex;
 	int init_count;
-
-	struct reset_control *ufs_reset;
 };
 
 static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy);
@@ -2027,8 +2024,6 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy)
 		return 0;
 	}
 
-	reset_control_assert(qmp->ufs_reset);
-
 	reset_control_bulk_assert(cfg->num_resets, qmp->resets);
 
 	clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
@@ -2103,10 +2098,6 @@ static int qmp_combo_power_on(struct phy *phy)
 	else
 		qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
-	ret = reset_control_deassert(qmp->ufs_reset);
-	if (ret)
-		goto err_disable_pipe_clk;
-
 	if (cfg->has_pwrdn_delay)
 		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
 
-- 
GitLab


From 4567bb1799d253ceb81ba9c9837ae13a86e4b50a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:05 +0200
Subject: [PATCH 0067/4122] phy: qcom-qmp-pcie: drop unused common-block
 registers

Drop the common-block register defines that are unused since the QMP
driver split.

Reviewed-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index e0408c423ac6d..bdbd18b89a8cd 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -78,11 +78,6 @@ struct qmp_phy_init_tbl {
 
 /* set of registers with offsets different per-PHY */
 enum qphy_reg_layout {
-	/* Common block control registers */
-	QPHY_COM_SW_RESET,
-	QPHY_COM_POWER_DOWN_CONTROL,
-	QPHY_COM_START_CONTROL,
-	QPHY_COM_PCS_READY_STATUS,
 	/* PCS registers */
 	QPHY_SW_RESET,
 	QPHY_START_CTRL,
@@ -100,10 +95,6 @@ static const unsigned int ipq_pciephy_gen3_regs_layout[QPHY_LAYOUT_SIZE] = {
 };
 
 static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = {
-	[QPHY_COM_SW_RESET]		= 0x400,
-	[QPHY_COM_POWER_DOWN_CONTROL]	= 0x404,
-	[QPHY_COM_START_CONTROL]	= 0x408,
-	[QPHY_COM_PCS_READY_STATUS]	= 0x448,
 	[QPHY_SW_RESET]			= 0x00,
 	[QPHY_START_CTRL]		= 0x08,
 	[QPHY_PCS_STATUS]		= 0x174,
-- 
GitLab


From 6d5b1e2067aef151747b4ec1cd927d44b61e4293 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:06 +0200
Subject: [PATCH 0068/4122] phy: qcom-qmp-pcie: clean up power-down handling

Always define the POWER_DOWN_CONTROL register instead of falling back to
the v2 offset during power on and power off.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index bdbd18b89a8cd..1105d439828c7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -98,18 +98,21 @@ static const unsigned int pciephy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_SW_RESET]			= 0x00,
 	[QPHY_START_CTRL]		= 0x08,
 	[QPHY_PCS_STATUS]		= 0x174,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sdm845_qmp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_SW_RESET]			= 0x00,
 	[QPHY_START_CTRL]		= 0x08,
 	[QPHY_PCS_STATUS]		= 0x174,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sdm845_qhp_pciephy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_SW_RESET]			= 0x00,
 	[QPHY_START_CTRL]		= 0x08,
 	[QPHY_PCS_STATUS]		= 0x2ac,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sm8250_pcie_regs_layout[QPHY_LAYOUT_SIZE] = {
@@ -1999,13 +2002,8 @@ static int qmp_pcie_init(struct phy *phy)
 	if (ret)
 		goto err_assert_reset;
 
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
-		qphy_setbits(pcs,
-				cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-				cfg->pwrdn_ctrl);
-	else
-		qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 
@@ -2112,13 +2110,8 @@ static int qmp_pcie_power_off(struct phy *phy)
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	/* Put PHY into POWER DOWN state: active low */
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) {
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	} else {
-		qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
-	}
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 }
-- 
GitLab


From 5b68d95c3fc72b4a89b9c7549e1ef638a01a3e15 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:07 +0200
Subject: [PATCH 0069/4122] phy: qcom-qmp-pcie: move power-down update

Move the power-down-control register update that powers on the PHY to
the power-on handler so that it matches the power-off handler.

Note that the power-on handler is currently always called directly
after init.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 1105d439828c7..b42c5e185228c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1976,7 +1976,6 @@ static int qmp_pcie_init(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs = qphy->pcs;
 	int ret;
 
 	/* turn on regulator supplies */
@@ -2002,9 +2001,6 @@ static int qmp_pcie_init(struct phy *phy)
 	if (ret)
 		goto err_assert_reset;
 
-	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
-
 	return 0;
 
 err_assert_reset:
@@ -2041,6 +2037,9 @@ static int qmp_pcie_power_on(struct phy *phy)
 	unsigned int mask, val, ready;
 	int ret;
 
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
+
 	if (qphy->mode == PHY_MODE_PCIE_RC)
 		mode_tables = cfg->tables_rc;
 	else
-- 
GitLab


From 4d3701f94f274ac67cddd1e87a2311a2a40c0138 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:08 +0200
Subject: [PATCH 0070/4122] phy: qcom-qmp-pcie-msm8996: clean up power-down
 handling

This driver uses v2 registers only so drop the unnecessary
POWER_DOWN_CONTROL override.

Note that this register is already hard-coded when powering on the PHY.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 707ec81c7a2ae..5fdd85a1dc3ef 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -91,7 +91,6 @@ enum qphy_reg_layout {
 	QPHY_SW_RESET,
 	QPHY_START_CTRL,
 	QPHY_PCS_STATUS,
-	QPHY_PCS_POWER_DOWN_CONTROL,
 	/* Keep last to ensure regs_layout arrays are properly initialized */
 	QPHY_LAYOUT_SIZE
 };
@@ -591,13 +590,8 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy)
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	/* Put PHY into POWER DOWN state: active low */
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) {
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	} else {
-		qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
-	}
+	qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
+			cfg->pwrdn_ctrl);
 
 	return 0;
 }
-- 
GitLab


From 2e52ddf045a08fcae8dc4c88d10aa01252dd4165 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:09 +0200
Subject: [PATCH 0071/4122] phy: qcom-qmp-combo: clean up power-down handling

Always define the POWER_DOWN_CONTROL register instead of falling back to
the v2 (and v3) offset during power on and power off.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 19 +++++--------------
 1 file changed, 5 insertions(+), 14 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index c21512b9ab528..7b434e2ee640e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -121,6 +121,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_SW_RESET]			= 0x00,
 	[QPHY_START_CTRL]		= 0x08,
 	[QPHY_PCS_STATUS]		= 0x174,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 	[QPHY_PCS_AUTONOMOUS_MODE_CTRL]	= 0x0d8,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x0dc,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170,
@@ -1991,13 +1992,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
-		qphy_setbits(pcs,
-				cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-				cfg->pwrdn_ctrl);
-	else
-		qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	mutex_unlock(&qmp->phy_mutex);
 
@@ -2144,13 +2140,8 @@ static int qmp_combo_power_off(struct phy *phy)
 		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 		/* Put PHY into POWER DOWN state: active low */
-		if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) {
-			qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-				     cfg->pwrdn_ctrl);
-		} else {
-			qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-					cfg->pwrdn_ctrl);
-		}
+		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+				cfg->pwrdn_ctrl);
 	}
 
 	return 0;
-- 
GitLab


From 2d3068cf8d9aa80cfbe2dd4226abbf425c26f8b7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:10 +0200
Subject: [PATCH 0072/4122] phy: qcom-qmp-ufs: clean up power-down handling

Always define the POWER_DOWN_CONTROL register instead of falling back to
the v2 (and v4) offset during power on and power off.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 22 ++++++++--------------
 1 file changed, 8 insertions(+), 14 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index db5642e1f7154..e28c45ab74ea2 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -89,22 +89,26 @@ enum qphy_reg_layout {
 static const unsigned int msm8996_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_START_CTRL]		= 0x00,
 	[QPHY_PCS_READY_STATUS]		= 0x168,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_START_CTRL]		= 0x00,
 	[QPHY_PCS_READY_STATUS]		= 0x160,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sm6115_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_START_CTRL]		= 0x00,
 	[QPHY_PCS_READY_STATUS]		= 0x168,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int sm8150_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_START_CTRL]		= QPHY_V4_PCS_UFS_PHY_START,
 	[QPHY_PCS_READY_STATUS]		= QPHY_V4_PCS_UFS_READY_STATUS,
 	[QPHY_SW_RESET]			= QPHY_V4_PCS_UFS_SW_RESET,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= QPHY_V4_PCS_UFS_POWER_DOWN_CONTROL,
 };
 
 static const struct qmp_phy_init_tbl msm8996_ufs_serdes_tbl[] = {
@@ -856,13 +860,8 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy)
 	if (ret)
 		goto err_disable_regulators;
 
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
-		qphy_setbits(pcs,
-			     cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	else
-		qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-			     cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 
@@ -996,13 +995,8 @@ static int qmp_ufs_power_off(struct phy *phy)
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	/* Put PHY into POWER DOWN state: active low */
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) {
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	} else {
-		qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
-	}
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 }
-- 
GitLab


From 645d3d04702401e002928b934b830bd25be9e277 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:11 +0200
Subject: [PATCH 0073/4122] phy: qcom-qmp-usb: clean up power-down handling

Always define the POWER_DOWN_CONTROL register instead of falling back to
the v2 (and v3) offset during power on and power off.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 965e486ab87df..b0b13fb6cb59e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -126,6 +126,7 @@ static const unsigned int usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_PCS_AUTONOMOUS_MODE_CTRL]	= 0x0d4,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x0d8,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x178,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
@@ -135,6 +136,7 @@ static const unsigned int qmp_v3_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
 	[QPHY_PCS_AUTONOMOUS_MODE_CTRL]	= 0x0d8,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x0dc,
 	[QPHY_PCS_LFPS_RXTERM_IRQ_STATUS] = 0x170,
+	[QPHY_PCS_POWER_DOWN_CONTROL]	= 0x04,
 };
 
 static const unsigned int qmp_v4_usb3phy_regs_layout[QPHY_LAYOUT_SIZE] = {
@@ -2164,13 +2166,8 @@ static int qmp_usb_init(struct phy *phy)
 		qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 	}
 
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL])
-		qphy_setbits(pcs,
-			     cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	else
-		qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-			     cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 
@@ -2277,13 +2274,8 @@ static int qmp_usb_power_off(struct phy *phy)
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	/* Put PHY into POWER DOWN state: active low */
-	if (cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL]) {
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			     cfg->pwrdn_ctrl);
-	} else {
-		qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-				cfg->pwrdn_ctrl);
-	}
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			cfg->pwrdn_ctrl);
 
 	return 0;
 }
-- 
GitLab


From 5b76f5ec63e0bfd20d955fc9d09dc2cff7742bec Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:12 +0200
Subject: [PATCH 0074/4122] phy: qcom-qmp-pcie: clean up clock lists

Keep the clock lists together and sorted by symbol name.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index b42c5e185228c..cb2128e5a78d4 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1462,6 +1462,10 @@ static inline void qphy_clrbits(void __iomem *base, u32 offset, u32 val)
 }
 
 /* list of clocks required by phy */
+static const char * const ipq8074_pciephy_clk_l[] = {
+	"aux", "cfg_ahb",
+};
+
 static const char * const msm8996_phy_clk_l[] = {
 	"aux", "cfg_ahb", "ref",
 };
@@ -1476,10 +1480,6 @@ static const char * const qmp_phy_vreg_l[] = {
 	"vdda-phy", "vdda-pll",
 };
 
-static const char * const ipq8074_pciephy_clk_l[] = {
-	"aux", "cfg_ahb",
-};
-
 /* list of resets */
 static const char * const ipq8074_pciephy_reset_l[] = {
 	"phy", "common",
-- 
GitLab


From 2d93887cb4bac0a36ce9e146956f631ab7994680 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 17 Oct 2022 08:50:13 +0200
Subject: [PATCH 0075/4122] phy: qcom-qmp-pcie: drop bogus register update

Since commit 0d58280cf1e6 ("phy: Update PHY power control sequence") the
PHY is powered on before configuring the registers and only the MSM8996
PCIe PHY, which includes the POWER_DOWN_CONTROL register in its PCS
initialisation table, may possibly require a second update afterwards.

To make things worse, the POWER_DOWN_CONTROL register lies at a
different offset on more recent SoCs so that the second update, which
still used a hard-coded offset, would write to an unrelated register
(e.g. a revision-id register on SC8280XP).

As the MSM8996 PCIe PHY is now handled by a separate driver, simply drop
the bogus register update.

Fixes: e4d8b05ad5f9 ("phy: qcom-qmp: Use proper PWRDOWN offset for sm8150 USB") added support
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> #RB3
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221017065013.19647-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index cb2128e5a78d4..30838ae8f0272 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2061,12 +2061,6 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qmp_pcie_pcs_init(qphy, &cfg->tables);
 	qmp_pcie_pcs_init(qphy, mode_tables);
 
-	/*
-	 * Pull out PHY from POWER DOWN state.
-	 * This is active low enable signal to power-down PHY.
-	 */
-	qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
-
 	if (cfg->has_pwrdn_delay)
 		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
 
-- 
GitLab


From bf08ce132cd069afc45635e1ffeb6adb0523cc60 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Wed, 12 Oct 2022 16:25:23 +0200
Subject: [PATCH 0076/4122] drivers/gpio: use simple i2c probe

All these drivers have an i2c probe function which doesn't use the
"struct i2c_device_id *id" parameter, so they can trivially be
converted to the "probe_new" style of probe with a single argument.

This is part of an ongoing transition to single-argument i2c probe
functions. Old-style probe functions involve a call to i2c_match_id:
in drivers/i2c/i2c-core-base.c,

         /*
          * When there are no more users of probe(),
          * rename probe_new to probe.
          */
         if (driver->probe_new)
                 status = driver->probe_new(client);
         else if (driver->probe)
                 status = driver->probe(client,
                                        i2c_match_id(driver->id_table, client));
         else
                 status = -EINVAL;

Drivers which don't need the second parameter can be declared using
probe_new instead, avoiding the call to i2c_match_id. Drivers which do
can still be converted to probe_new-style, calling i2c_match_id
themselves (as is done currently for of_match_id).

This change was done using the following Coccinelle script, and fixed
up for whitespace changes:

@ rule1 @
identifier fn;
identifier client, id;
@@

- static int fn(struct i2c_client *client, const struct i2c_device_id *id)
+ static int fn(struct i2c_client *client)
{
...when != id
}

@ rule2 depends on rule1 @
identifier rule1.fn;
identifier driver;
@@

struct i2c_driver driver = {
-       .probe
+       .probe_new
                =
(
                   fn
|
-                  &fn
+                  fn
)
                ,
};

Signed-off-by: Stephen Kitt <steve@sk2.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-gw-pld.c   | 5 ++---
 drivers/gpio/gpio-max7300.c  | 5 ++---
 drivers/gpio/gpio-tpic2810.c | 5 ++---
 drivers/gpio/gpio-ts4900.c   | 5 ++---
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/gpio/gpio-gw-pld.c b/drivers/gpio/gpio-gw-pld.c
index 2109803ffb386..5057fa9ad6108 100644
--- a/drivers/gpio/gpio-gw-pld.c
+++ b/drivers/gpio/gpio-gw-pld.c
@@ -67,8 +67,7 @@ static void gw_pld_set8(struct gpio_chip *gc, unsigned offset, int value)
 	gw_pld_output8(gc, offset, value);
 }
 
-static int gw_pld_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int gw_pld_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct gw_pld *gw;
@@ -126,7 +125,7 @@ static struct i2c_driver gw_pld_driver = {
 		.name = "gw_pld",
 		.of_match_table = gw_pld_dt_ids,
 	},
-	.probe = gw_pld_probe,
+	.probe_new = gw_pld_probe,
 	.id_table = gw_pld_id,
 };
 module_i2c_driver(gw_pld_driver);
diff --git a/drivers/gpio/gpio-max7300.c b/drivers/gpio/gpio-max7300.c
index 43da381a4d7e7..cf482f4f00982 100644
--- a/drivers/gpio/gpio-max7300.c
+++ b/drivers/gpio/gpio-max7300.c
@@ -28,8 +28,7 @@ static int max7300_i2c_read(struct device *dev, unsigned int reg)
 	return i2c_smbus_read_byte_data(client, reg);
 }
 
-static int max7300_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int max7300_probe(struct i2c_client *client)
 {
 	struct max7301 *ts;
 
@@ -63,7 +62,7 @@ static struct i2c_driver max7300_driver = {
 	.driver = {
 		.name = "max7300",
 	},
-	.probe = max7300_probe,
+	.probe_new = max7300_probe,
 	.remove = max7300_remove,
 	.id_table = max7300_id,
 };
diff --git a/drivers/gpio/gpio-tpic2810.c b/drivers/gpio/gpio-tpic2810.c
index d642c35cb97c0..349c5fbd9b027 100644
--- a/drivers/gpio/gpio-tpic2810.c
+++ b/drivers/gpio/gpio-tpic2810.c
@@ -98,8 +98,7 @@ static const struct of_device_id tpic2810_of_match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, tpic2810_of_match_table);
 
-static int tpic2810_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int tpic2810_probe(struct i2c_client *client)
 {
 	struct tpic2810 *gpio;
 	int ret;
@@ -144,7 +143,7 @@ static struct i2c_driver tpic2810_driver = {
 		.name = "tpic2810",
 		.of_match_table = tpic2810_of_match_table,
 	},
-	.probe = tpic2810_probe,
+	.probe_new = tpic2810_probe,
 	.remove = tpic2810_remove,
 	.id_table = tpic2810_id_table,
 };
diff --git a/drivers/gpio/gpio-ts4900.c b/drivers/gpio/gpio-ts4900.c
index 416725c26e949..43e8b66e04f78 100644
--- a/drivers/gpio/gpio-ts4900.c
+++ b/drivers/gpio/gpio-ts4900.c
@@ -136,8 +136,7 @@ static const struct of_device_id ts4900_gpio_of_match_table[] = {
 };
 MODULE_DEVICE_TABLE(of, ts4900_gpio_of_match_table);
 
-static int ts4900_gpio_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ts4900_gpio_probe(struct i2c_client *client)
 {
 	struct ts4900_gpio_priv *priv;
 	u32 ngpio;
@@ -186,7 +185,7 @@ static struct i2c_driver ts4900_gpio_driver = {
 		.name = "ts4900-gpio",
 		.of_match_table = ts4900_gpio_of_match_table,
 	},
-	.probe = ts4900_gpio_probe,
+	.probe_new = ts4900_gpio_probe,
 	.id_table = ts4900_gpio_id_table,
 };
 module_i2c_driver(ts4900_gpio_driver);
-- 
GitLab


From 317627a4a19e2a6f8d60e3e2eefe6dfd87059d79 Mon Sep 17 00:00:00 2001
From: Davide Ciminaghi <ciminaghi@gnudd.com>
Date: Fri, 2 Sep 2022 14:42:01 +0200
Subject: [PATCH 0077/4122] gpio: Remove sta2x11 GPIO driver

The Connext chip has 4 gpio cells looking very similar to those of the
Nomadik, whose gpio/pinctrl driver (already featuring devicetree support)
will be used instead of the sta2x11 specific one.

Signed-off-by: Davide Ciminaghi <ciminaghi@gnudd.com>
Acked-by: Giancarlo Asnaghi <giancarlo.asnaghi@st.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig        |   8 -
 drivers/gpio/Makefile       |   1 -
 drivers/gpio/gpio-sta2x11.c | 411 ------------------------------------
 3 files changed, 420 deletions(-)
 delete mode 100644 drivers/gpio/gpio-sta2x11.c

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a01af11806164..e034f752e7cef 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -600,14 +600,6 @@ config GPIO_SPRD
 	help
 	  Say yes here to support Spreadtrum GPIO device.
 
-config GPIO_STA2X11
-	bool "STA2x11/ConneXt GPIO support"
-	depends on MFD_STA2X11
-	select GENERIC_IRQ_CHIP
-	help
-	  Say yes here to support the STA2x11/ConneXt GPIO device.
-	  The GPIO module has 128 GPIO pins with alternate functions.
-
 config GPIO_STP_XWAY
 	bool "XWAY STP GPIOs"
 	depends on SOC_XWAY || COMPILE_TEST
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 29e3beb6548cb..84fae267e8ebf 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -140,7 +140,6 @@ obj-$(CONFIG_GPIO_SL28CPLD)		+= gpio-sl28cpld.o
 obj-$(CONFIG_GPIO_SODAVILLE)		+= gpio-sodaville.o
 obj-$(CONFIG_GPIO_SPEAR_SPICS)		+= gpio-spear-spics.o
 obj-$(CONFIG_GPIO_SPRD)			+= gpio-sprd.o
-obj-$(CONFIG_GPIO_STA2X11)		+= gpio-sta2x11.o
 obj-$(CONFIG_GPIO_STMPE)		+= gpio-stmpe.o
 obj-$(CONFIG_GPIO_STP_XWAY)		+= gpio-stp-xway.o
 obj-$(CONFIG_GPIO_SYSCON)		+= gpio-syscon.o
diff --git a/drivers/gpio/gpio-sta2x11.c b/drivers/gpio/gpio-sta2x11.c
deleted file mode 100644
index e07cca0f8d353..0000000000000
--- a/drivers/gpio/gpio-sta2x11.c
+++ /dev/null
@@ -1,411 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * STMicroelectronics ConneXt (STA2X11) GPIO driver
- *
- * Copyright 2012 ST Microelectronics (Alessandro Rubini)
- * Based on gpio-ml-ioh.c, Copyright 2010 OKI Semiconductors Ltd.
- * Also based on previous sta2x11 work, Copyright 2011 Wind River Systems, Inc.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/gpio/driver.h>
-#include <linux/bitops.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-#include <linux/mfd/sta2x11-mfd.h>
-
-struct gsta_regs {
-	u32 dat;		/* 0x00 */
-	u32 dats;
-	u32 datc;
-	u32 pdis;
-	u32 dir;		/* 0x10 */
-	u32 dirs;
-	u32 dirc;
-	u32 unused_1c;
-	u32 afsela;		/* 0x20 */
-	u32 unused_24[7];
-	u32 rimsc;		/* 0x40 */
-	u32 fimsc;
-	u32 is;
-	u32 ic;
-};
-
-struct gsta_gpio {
-	spinlock_t			lock;
-	struct device			*dev;
-	void __iomem			*reg_base;
-	struct gsta_regs __iomem	*regs[GSTA_NR_BLOCKS];
-	struct gpio_chip		gpio;
-	int				irq_base;
-	/* FIXME: save the whole config here (AF, ...) */
-	unsigned			irq_type[GSTA_NR_GPIO];
-};
-
-/*
- * gpio methods
- */
-
-static void gsta_gpio_set(struct gpio_chip *gpio, unsigned nr, int val)
-{
-	struct gsta_gpio *chip = gpiochip_get_data(gpio);
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-
-	if (val)
-		writel(bit, &regs->dats);
-	else
-		writel(bit, &regs->datc);
-}
-
-static int gsta_gpio_get(struct gpio_chip *gpio, unsigned nr)
-{
-	struct gsta_gpio *chip = gpiochip_get_data(gpio);
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-
-	return !!(readl(&regs->dat) & bit);
-}
-
-static int gsta_gpio_direction_output(struct gpio_chip *gpio, unsigned nr,
-				      int val)
-{
-	struct gsta_gpio *chip = gpiochip_get_data(gpio);
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-
-	writel(bit, &regs->dirs);
-	/* Data register after direction, otherwise pullup/down is selected */
-	if (val)
-		writel(bit, &regs->dats);
-	else
-		writel(bit, &regs->datc);
-	return 0;
-}
-
-static int gsta_gpio_direction_input(struct gpio_chip *gpio, unsigned nr)
-{
-	struct gsta_gpio *chip = gpiochip_get_data(gpio);
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-
-	writel(bit, &regs->dirc);
-	return 0;
-}
-
-static int gsta_gpio_to_irq(struct gpio_chip *gpio, unsigned offset)
-{
-	struct gsta_gpio *chip = gpiochip_get_data(gpio);
-	return chip->irq_base + offset;
-}
-
-static void gsta_gpio_setup(struct gsta_gpio *chip) /* called from probe */
-{
-	struct gpio_chip *gpio = &chip->gpio;
-
-	/*
-	 * ARCH_NR_GPIOS is currently 256 and dynamic allocation starts
-	 * from the end. However, for compatibility, we need the first
-	 * ConneXt device to start from gpio 0: it's the main chipset
-	 * on most boards so documents and drivers assume gpio0..gpio127
-	 */
-	static int gpio_base;
-
-	gpio->label = dev_name(chip->dev);
-	gpio->owner = THIS_MODULE;
-	gpio->direction_input = gsta_gpio_direction_input;
-	gpio->get = gsta_gpio_get;
-	gpio->direction_output = gsta_gpio_direction_output;
-	gpio->set = gsta_gpio_set;
-	gpio->dbg_show = NULL;
-	gpio->base = gpio_base;
-	gpio->ngpio = GSTA_NR_GPIO;
-	gpio->can_sleep = false;
-	gpio->to_irq = gsta_gpio_to_irq;
-
-	/*
-	 * After the first device, turn to dynamic gpio numbers.
-	 * For example, with ARCH_NR_GPIOS = 256 we can fit two cards
-	 */
-	if (!gpio_base)
-		gpio_base = -1;
-}
-
-/*
- * Special method: alternate functions and pullup/pulldown. This is only
- * invoked on startup to configure gpio's according to platform data.
- * FIXME : this functionality shall be managed (and exported to other drivers)
- * via the pin control subsystem.
- */
-static void gsta_set_config(struct gsta_gpio *chip, int nr, unsigned cfg)
-{
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	unsigned long flags;
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-	u32 val;
-	int err = 0;
-
-	pr_info("%s: %p %i %i\n", __func__, chip, nr, cfg);
-
-	if (cfg == PINMUX_TYPE_NONE)
-		return;
-
-	/* Alternate function or not? */
-	spin_lock_irqsave(&chip->lock, flags);
-	val = readl(&regs->afsela);
-	if (cfg == PINMUX_TYPE_FUNCTION)
-		val |= bit;
-	else
-		val &= ~bit;
-	writel(val | bit, &regs->afsela);
-	if (cfg == PINMUX_TYPE_FUNCTION) {
-		spin_unlock_irqrestore(&chip->lock, flags);
-		return;
-	}
-
-	/* not alternate function: set details */
-	switch (cfg) {
-	case PINMUX_TYPE_OUTPUT_LOW:
-		writel(bit, &regs->dirs);
-		writel(bit, &regs->datc);
-		break;
-	case PINMUX_TYPE_OUTPUT_HIGH:
-		writel(bit, &regs->dirs);
-		writel(bit, &regs->dats);
-		break;
-	case PINMUX_TYPE_INPUT:
-		writel(bit, &regs->dirc);
-		val = readl(&regs->pdis) | bit;
-		writel(val, &regs->pdis);
-		break;
-	case PINMUX_TYPE_INPUT_PULLUP:
-		writel(bit, &regs->dirc);
-		val = readl(&regs->pdis) & ~bit;
-		writel(val, &regs->pdis);
-		writel(bit, &regs->dats);
-		break;
-	case PINMUX_TYPE_INPUT_PULLDOWN:
-		writel(bit, &regs->dirc);
-		val = readl(&regs->pdis) & ~bit;
-		writel(val, &regs->pdis);
-		writel(bit, &regs->datc);
-		break;
-	default:
-		err = 1;
-	}
-	spin_unlock_irqrestore(&chip->lock, flags);
-	if (err)
-		pr_err("%s: chip %p, pin %i, cfg %i is invalid\n",
-		       __func__, chip, nr, cfg);
-}
-
-/*
- * Irq methods
- */
-
-static void gsta_irq_disable(struct irq_data *data)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
-	struct gsta_gpio *chip = gc->private;
-	int nr = data->irq - chip->irq_base;
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-	u32 val;
-	unsigned long flags;
-
-	spin_lock_irqsave(&chip->lock, flags);
-	if (chip->irq_type[nr] & IRQ_TYPE_EDGE_RISING) {
-		val = readl(&regs->rimsc) & ~bit;
-		writel(val, &regs->rimsc);
-	}
-	if (chip->irq_type[nr] & IRQ_TYPE_EDGE_FALLING) {
-		val = readl(&regs->fimsc) & ~bit;
-		writel(val, &regs->fimsc);
-	}
-	spin_unlock_irqrestore(&chip->lock, flags);
-	return;
-}
-
-static void gsta_irq_enable(struct irq_data *data)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(data);
-	struct gsta_gpio *chip = gc->private;
-	int nr = data->irq - chip->irq_base;
-	struct gsta_regs __iomem *regs = chip->regs[nr / GSTA_GPIO_PER_BLOCK];
-	u32 bit = BIT(nr % GSTA_GPIO_PER_BLOCK);
-	u32 val;
-	int type;
-	unsigned long flags;
-
-	type = chip->irq_type[nr];
-
-	spin_lock_irqsave(&chip->lock, flags);
-	val = readl(&regs->rimsc);
-	if (type & IRQ_TYPE_EDGE_RISING)
-		writel(val | bit, &regs->rimsc);
-	else
-		writel(val & ~bit, &regs->rimsc);
-	val = readl(&regs->rimsc);
-	if (type & IRQ_TYPE_EDGE_FALLING)
-		writel(val | bit, &regs->fimsc);
-	else
-		writel(val & ~bit, &regs->fimsc);
-	spin_unlock_irqrestore(&chip->lock, flags);
-	return;
-}
-
-static int gsta_irq_type(struct irq_data *d, unsigned int type)
-{
-	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
-	struct gsta_gpio *chip = gc->private;
-	int nr = d->irq - chip->irq_base;
-
-	/* We only support edge interrupts */
-	if (!(type & (IRQ_TYPE_EDGE_RISING | IRQ_TYPE_EDGE_FALLING))) {
-		pr_debug("%s: unsupported type 0x%x\n", __func__, type);
-		return -EINVAL;
-	}
-
-	chip->irq_type[nr] = type; /* used for enable/disable */
-
-	gsta_irq_enable(d);
-	return 0;
-}
-
-static irqreturn_t gsta_gpio_handler(int irq, void *dev_id)
-{
-	struct gsta_gpio *chip = dev_id;
-	struct gsta_regs __iomem *regs;
-	u32 is;
-	int i, nr, base;
-	irqreturn_t ret = IRQ_NONE;
-
-	for (i = 0; i < GSTA_NR_BLOCKS; i++) {
-		regs = chip->regs[i];
-		base = chip->irq_base + i * GSTA_GPIO_PER_BLOCK;
-		while ((is = readl(&regs->is))) {
-			nr = __ffs(is);
-			irq = base + nr;
-			generic_handle_irq(irq);
-			writel(1 << nr, &regs->ic);
-			ret = IRQ_HANDLED;
-		}
-	}
-	return ret;
-}
-
-static int gsta_alloc_irq_chip(struct gsta_gpio *chip)
-{
-	struct irq_chip_generic *gc;
-	struct irq_chip_type *ct;
-	int rv;
-
-	gc = devm_irq_alloc_generic_chip(chip->dev, KBUILD_MODNAME, 1,
-					 chip->irq_base,
-					 chip->reg_base, handle_simple_irq);
-	if (!gc)
-		return -ENOMEM;
-
-	gc->private = chip;
-	ct = gc->chip_types;
-
-	ct->chip.irq_set_type = gsta_irq_type;
-	ct->chip.irq_disable = gsta_irq_disable;
-	ct->chip.irq_enable = gsta_irq_enable;
-
-	/* FIXME: this makes at most 32 interrupts. Request 0 by now */
-	rv = devm_irq_setup_generic_chip(chip->dev, gc,
-					 0 /* IRQ_MSK(GSTA_GPIO_PER_BLOCK) */,
-					 0, IRQ_NOREQUEST | IRQ_NOPROBE, 0);
-	if (rv)
-		return rv;
-
-	/* Set up all 128 interrupts: code from setup_generic_chip */
-	{
-		struct irq_chip_type *ct = gc->chip_types;
-		int i, j;
-		for (j = 0; j < GSTA_NR_GPIO; j++) {
-			i = chip->irq_base + j;
-			irq_set_chip_and_handler(i, &ct->chip, ct->handler);
-			irq_set_chip_data(i, gc);
-			irq_clear_status_flags(i, IRQ_NOREQUEST | IRQ_NOPROBE);
-		}
-		gc->irq_cnt = i - gc->irq_base;
-	}
-
-	return 0;
-}
-
-/* The platform device used here is instantiated by the MFD device */
-static int gsta_probe(struct platform_device *dev)
-{
-	int i, err;
-	struct pci_dev *pdev;
-	struct sta2x11_gpio_pdata *gpio_pdata;
-	struct gsta_gpio *chip;
-
-	pdev = *(struct pci_dev **)dev_get_platdata(&dev->dev);
-	gpio_pdata = dev_get_platdata(&pdev->dev);
-
-	if (gpio_pdata == NULL)
-		dev_err(&dev->dev, "no gpio config\n");
-	pr_debug("gpio config: %p\n", gpio_pdata);
-
-	chip = devm_kzalloc(&dev->dev, sizeof(*chip), GFP_KERNEL);
-	if (!chip)
-		return -ENOMEM;
-	chip->dev = &dev->dev;
-	chip->reg_base = devm_platform_ioremap_resource(dev, 0);
-	if (IS_ERR(chip->reg_base))
-		return PTR_ERR(chip->reg_base);
-
-	for (i = 0; i < GSTA_NR_BLOCKS; i++) {
-		chip->regs[i] = chip->reg_base + i * 4096;
-		/* disable all irqs */
-		writel(0, &chip->regs[i]->rimsc);
-		writel(0, &chip->regs[i]->fimsc);
-		writel(~0, &chip->regs[i]->ic);
-	}
-	spin_lock_init(&chip->lock);
-	gsta_gpio_setup(chip);
-	if (gpio_pdata)
-		for (i = 0; i < GSTA_NR_GPIO; i++)
-			gsta_set_config(chip, i, gpio_pdata->pinconfig[i]);
-
-	/* 384 was used in previous code: be compatible for other drivers */
-	err = devm_irq_alloc_descs(&dev->dev, -1, 384,
-				   GSTA_NR_GPIO, NUMA_NO_NODE);
-	if (err < 0) {
-		dev_warn(&dev->dev, "sta2x11 gpio: Can't get irq base (%i)\n",
-			 -err);
-		return err;
-	}
-	chip->irq_base = err;
-
-	err = gsta_alloc_irq_chip(chip);
-	if (err)
-		return err;
-
-	err = devm_request_irq(&dev->dev, pdev->irq, gsta_gpio_handler,
-			       IRQF_SHARED, KBUILD_MODNAME, chip);
-	if (err < 0) {
-		dev_err(&dev->dev, "sta2x11 gpio: Can't request irq (%i)\n",
-			-err);
-		return err;
-	}
-
-	return devm_gpiochip_add_data(&dev->dev, &chip->gpio, chip);
-}
-
-static struct platform_driver sta2x11_gpio_platform_driver = {
-	.driver = {
-		.name	= "sta2x11-gpio",
-		.suppress_bind_attrs = true,
-	},
-	.probe = gsta_probe,
-};
-builtin_platform_driver(sta2x11_gpio_platform_driver);
-- 
GitLab


From 95b39792c6646322e0684f1a1aa395ee82b6f3fb Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:02 +0200
Subject: [PATCH 0078/4122] gpio: aggregator: Stop using ARCH_NR_GPIOS

ARCH_NR_GPIOS is used locally in aggr_parse() as the maximum number
of GPIOs to be aggregated together by the driver since
commit ec75039d5550 ("gpio: aggregator: Use bitmap_parselist() for
parsing GPIO offsets").

Don't rely on the total possible number of GPIOs in the system but
define a local arbitrary macro for that, set to 512 which should be
large enough as it is also the default value for ARCH_NR_GPIOS.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-aggregator.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-aggregator.c b/drivers/gpio/gpio-aggregator.c
index 0cb2664085cf8..6d17d262ad917 100644
--- a/drivers/gpio/gpio-aggregator.c
+++ b/drivers/gpio/gpio-aggregator.c
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/string.h>
 
+#define AGGREGATOR_MAX_GPIOS 512
 
 /*
  * GPIO Aggregator sysfs interface
@@ -64,7 +65,7 @@ static int aggr_parse(struct gpio_aggregator *aggr)
 	unsigned int i, n = 0;
 	int error = 0;
 
-	bitmap = bitmap_alloc(ARCH_NR_GPIOS, GFP_KERNEL);
+	bitmap = bitmap_alloc(AGGREGATOR_MAX_GPIOS, GFP_KERNEL);
 	if (!bitmap)
 		return -ENOMEM;
 
@@ -84,13 +85,13 @@ static int aggr_parse(struct gpio_aggregator *aggr)
 		}
 
 		/* GPIO chip + offset(s) */
-		error = bitmap_parselist(offsets, bitmap, ARCH_NR_GPIOS);
+		error = bitmap_parselist(offsets, bitmap, AGGREGATOR_MAX_GPIOS);
 		if (error) {
 			pr_err("Cannot parse %s: %d\n", offsets, error);
 			goto free_bitmap;
 		}
 
-		for_each_set_bit(i, bitmap, ARCH_NR_GPIOS) {
+		for_each_set_bit(i, bitmap, AGGREGATOR_MAX_GPIOS) {
 			error = aggr_add_gpio(aggr, name, i, &n);
 			if (error)
 				goto free_bitmap;
-- 
GitLab


From 95e827a1b0b7c8334d24da7b4a2d17ec5aa7374c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:03 +0200
Subject: [PATCH 0079/4122] gpio: davinci: Stop using ARCH_NR_GPIOS

Since commit 14e85c0e69d5 ("gpio: remove gpio_descs global array")
there is no global limitation anymore on the number of GPIOs in
the system so don't clamp the number of GPIOs with ARCH_NR_GPIOS.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-davinci.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 59c4c48d8296b..1018860c83c27 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -217,9 +217,6 @@ static int davinci_gpio_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	if (WARN_ON(ARCH_NR_GPIOS < ngpio))
-		ngpio = ARCH_NR_GPIOS;
-
 	/*
 	 * If there are unbanked interrupts then the number of
 	 * interrupts is equal to number of gpios else all are banked so
-- 
GitLab


From 502df79b860563d79143be7a1453c2b3224cd836 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:04 +0200
Subject: [PATCH 0080/4122] gpiolib: Warn on drivers still using static
 gpiobase allocation

In the preparation of getting completely rid of static gpiobase
allocation in the future, emit a warning in drivers still doing so.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4756ea08894f6..5c64d1a412c7a 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -715,6 +715,9 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 		 * a poison instead.
 		 */
 		gc->base = base;
+	} else {
+		dev_warn(&gdev->dev,
+			 "Static allocation of GPIO base is deprecated, use dynamic allocation.\n");
 	}
 	gdev->base = base;
 
-- 
GitLab


From 7b61212f2a07a5afd213c8876e52b5c9946441e2 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:05 +0200
Subject: [PATCH 0081/4122] gpiolib: Get rid of ARCH_NR_GPIOS

Since commit 14e85c0e69d5 ("gpio: remove gpio_descs global array")
there is no limitation on the number of GPIOs that can be allocated
in the system since the allocation is fully dynamic.

ARCH_NR_GPIOS is today only used in order to provide downwards
gpiobase allocation from that value, while static allocation is
performed upwards from 0. However that has the disadvantage of
limiting the number of GPIOs that can be registered in the system.

To overcome this limitation without requiring each and every
platform to provide its 'best-guess' maximum number, rework the
allocation to allocate upwards, allowing approx 2 millions of
GPIOs.

In order to still allow static allocation for legacy drivers, define
GPIO_DYNAMIC_BASE with the value 512 as the start for dynamic
allocation. The 512 value is chosen because it is the end of
the current default range so all current static allocations are
expected to be below that value. Of course that's just a rough
estimate based on the default value, but assuming static
allocations come first, even if there are more static allocations
it should fit under the 512 value.

In the future, it is expected that all static allocations go away
and then dynamic allocation will be patched to start at 0.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 arch/arm/include/asm/gpio.h |  1 -
 drivers/gpio/gpiolib.c      | 10 +++----
 include/asm-generic/gpio.h  | 55 ++++++++++++++-----------------------
 3 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/arch/arm/include/asm/gpio.h b/arch/arm/include/asm/gpio.h
index f3bb8a2bf788e..4ebbb58f06ea6 100644
--- a/arch/arm/include/asm/gpio.h
+++ b/arch/arm/include/asm/gpio.h
@@ -2,7 +2,6 @@
 #ifndef _ARCH_ARM_GPIO_H
 #define _ARCH_ARM_GPIO_H
 
-/* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */
 #include <asm-generic/gpio.h>
 
 /* The trivial gpiolib dispatchers */
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5c64d1a412c7a..e8faedca6b14f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -183,14 +183,14 @@ EXPORT_SYMBOL_GPL(gpiod_to_chip);
 static int gpiochip_find_base(int ngpio)
 {
 	struct gpio_device *gdev;
-	int base = ARCH_NR_GPIOS - ngpio;
+	int base = GPIO_DYNAMIC_BASE;
 
-	list_for_each_entry_reverse(gdev, &gpio_devices, list) {
+	list_for_each_entry(gdev, &gpio_devices, list) {
 		/* found a free space? */
-		if (gdev->base + gdev->ngpio <= base)
+		if (gdev->base >= base + ngpio)
 			break;
-		/* nope, check the space right before the chip */
-		base = gdev->base - ngpio;
+		/* nope, check the space right after the chip */
+		base = gdev->base + gdev->ngpio;
 	}
 
 	if (gpio_is_valid(base)) {
diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h
index aea9aee1f3e97..a7752cf152ce4 100644
--- a/include/asm-generic/gpio.h
+++ b/include/asm-generic/gpio.h
@@ -11,40 +11,18 @@
 #include <linux/gpio/driver.h>
 #include <linux/gpio/consumer.h>
 
-/* Platforms may implement their GPIO interface with library code,
+/*
+ * Platforms may implement their GPIO interface with library code,
  * at a small performance cost for non-inlined operations and some
  * extra memory (for code and for per-GPIO table entries).
- *
- * While the GPIO programming interface defines valid GPIO numbers
- * to be in the range 0..MAX_INT, this library restricts them to the
- * smaller range 0..ARCH_NR_GPIOS-1.
- *
- * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of
- * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is
- * actually an estimate of a board-specific value.
  */
 
-#ifndef ARCH_NR_GPIOS
-#if defined(CONFIG_ARCH_NR_GPIO) && CONFIG_ARCH_NR_GPIO > 0
-#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
-#else
-#define ARCH_NR_GPIOS		512
-#endif
-#endif
-
 /*
- * "valid" GPIO numbers are nonnegative and may be passed to
- * setup routines like gpio_request().  only some valid numbers
- * can successfully be requested and used.
- *
- * Invalid GPIO numbers are useful for indicating no-such-GPIO in
- * platform data and other tables.
+ * At the end we want all GPIOs to be dynamically allocated from 0.
+ * However, some legacy drivers still perform fixed allocation.
+ * Until they are all fixed, leave 0-512 space for them.
  */
-
-static inline bool gpio_is_valid(int number)
-{
-	return number >= 0 && number < ARCH_NR_GPIOS;
-}
+#define GPIO_DYNAMIC_BASE	512
 
 struct device;
 struct gpio;
@@ -140,12 +118,6 @@ static inline void gpio_unexport(unsigned gpio)
 
 #include <linux/kernel.h>
 
-static inline bool gpio_is_valid(int number)
-{
-	/* only non-negative numbers are valid */
-	return number >= 0;
-}
-
 /* platforms that don't directly support access to GPIOs through I2C, SPI,
  * or other blocking infrastructure can use these wrappers.
  */
@@ -169,4 +141,19 @@ static inline void gpio_set_value_cansleep(unsigned gpio, int value)
 
 #endif /* !CONFIG_GPIOLIB */
 
+/*
+ * "valid" GPIO numbers are nonnegative and may be passed to
+ * setup routines like gpio_request().  only some valid numbers
+ * can successfully be requested and used.
+ *
+ * Invalid GPIO numbers are useful for indicating no-such-GPIO in
+ * platform data and other tables.
+ */
+
+static inline bool gpio_is_valid(int number)
+{
+	/* only non-negative numbers are valid */
+	return number >= 0;
+}
+
 #endif /* _ASM_GENERIC_GPIO_H */
-- 
GitLab


From f2b470f036770805ebd20fb5cfe800395c7215af Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:06 +0200
Subject: [PATCH 0082/4122] Documentation: gpio: Remove text about
 ARCH_NR_GPIOS

ARCH_NR_GPIOS have been removed, clean up the documentation.

After this patch, the only place when ARCH_NR_GPIOS remains is in
translations/zh_CN/gpio.txt and translations/zh_TW/gpio.txt.
I don't have the skills to update that, anyway those two files are
already out of sync as they are still mentionning ARCH_REQUIRE_GPIOLIB
which was removed by commit 65053e1a7743 ("gpio: delete
ARCH_[WANTS_OPTIONAL|REQUIRE]_GPIOLIB")

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/driver-api/gpio/legacy.rst | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/Documentation/driver-api/gpio/legacy.rst b/Documentation/driver-api/gpio/legacy.rst
index 9b12eeb89170f..e17910cc32719 100644
--- a/Documentation/driver-api/gpio/legacy.rst
+++ b/Documentation/driver-api/gpio/legacy.rst
@@ -558,11 +558,6 @@ Platform Support
 To force-enable this framework, a platform's Kconfig will "select" GPIOLIB,
 else it is up to the user to configure support for GPIO.
 
-It may also provide a custom value for ARCH_NR_GPIOS, so that it better
-reflects the number of GPIOs in actual use on that platform, without
-wasting static table space.  (It should count both built-in/SoC GPIOs and
-also ones on GPIO expanders.
-
 If neither of these options are selected, the platform does not support
 GPIOs through GPIO-lib and the code cannot be enabled by the user.
 
-- 
GitLab


From f71806d8dc6c053b5a5344536380c5b2bb82b3fc Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:07 +0200
Subject: [PATCH 0083/4122] x86: Remove CONFIG_ARCH_NR_GPIO

CONFIG_ARCH_NR_GPIO is not used anymore, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 arch/x86/Kconfig | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6d1879ef933a2..a3288f7e7b2bd 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -357,11 +357,6 @@ config ARCH_HAS_CPU_RELAX
 config ARCH_HIBERNATION_POSSIBLE
 	def_bool y
 
-config ARCH_NR_GPIO
-	int
-	default 1024 if X86_64
-	default 512
-
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
-- 
GitLab


From 8937944f4ee4f5763de8784ccdb068d93d9b0f3e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:08 +0200
Subject: [PATCH 0084/4122] arm: Remove CONFIG_ARCH_NR_GPIO

CONFIG_ARCH_NR_GPIO is not used anymore, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 arch/arm/Kconfig | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a08c9d092a332..b03fd451122ee 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -1155,27 +1155,6 @@ config ARM_PSCI
 	  0022A ("Power State Coordination Interface System Software on
 	  ARM processors").
 
-# The GPIO number here must be sorted by descending number. In case of
-# a multiplatform kernel, we just want the highest value required by the
-# selected platforms.
-config ARCH_NR_GPIO
-	int
-	default 2048 if ARCH_INTEL_SOCFPGA
-	default 1024 if ARCH_BRCMSTB || ARCH_RENESAS || ARCH_TEGRA || \
-		ARCH_ZYNQ || ARCH_ASPEED
-	default 512 if ARCH_EXYNOS || ARCH_KEYSTONE || SOC_OMAP5 || \
-		SOC_DRA7XX || ARCH_S3C24XX || ARCH_S3C64XX || ARCH_S5PV210
-	default 416 if ARCH_SUNXI
-	default 392 if ARCH_U8500
-	default 352 if ARCH_VT8500
-	default 288 if ARCH_ROCKCHIP
-	default 264 if MACH_H4700
-	default 0
-	help
-	  Maximum number of GPIOs in the system.
-
-	  If unsure, leave the default value.
-
 config HZ_FIXED
 	int
 	default 128 if SOC_AT91RM9200
-- 
GitLab


From f5a681d238885f238a5f06fcfda625a90d87a327 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Sep 2022 14:42:09 +0200
Subject: [PATCH 0085/4122] arm64: Remove CONFIG_ARCH_NR_GPIO

CONFIG_ARCH_NR_GPIO is not used anymore, remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 arch/arm64/Kconfig | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0c..a0c36763d9547 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -2145,18 +2145,6 @@ config STACKPROTECTOR_PER_TASK
 	def_bool y
 	depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_SYSREG
 
-# The GPIO number here must be sorted by descending number. In case of
-# a multiplatform kernel, we just want the highest value required by the
-# selected platforms.
-config ARCH_NR_GPIO
-        int
-        default 2048 if ARCH_APPLE
-        default 0
-        help
-          Maximum number of GPIOs in the system.
-
-          If unsure, leave the default value.
-
 endmenu # "Kernel Features"
 
 menu "Boot options"
-- 
GitLab


From b5636d45aae42aa345b4c7918bdef245ed63da68 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:41 +0200
Subject: [PATCH 0086/4122] x86/cpu: Remove segment load from
 switch_to_new_gdt()

On 32bit FS and on 64bit GS segments are already set up correctly, but
load_percpu_segment() still sets [FG]S after switching from the early GDT
to the direct GDT.

For 32bit the segment load has no side effects, but on 64bit it causes
GSBASE to become 0, which means that any per CPU access before GSBASE is
set to the new value is going to fault. That's the reason why the whole
file containing this code has stackprotector removed.

But that's a pointless exercise for both 32 and 64 bit as the relevant
segment selector is already correct. Loading the new GDT does not change
that.

Remove the segment loads and add comments.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.097052006@infradead.org
---
 arch/x86/include/asm/processor.h |  1 -
 arch/x86/kernel/cpu/common.c     | 47 +++++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 67c9d73b31faa..e21ec970d41ab 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -670,7 +670,6 @@ extern struct desc_ptr		early_gdt_descr;
 extern void switch_to_new_gdt(int);
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
-extern void load_percpu_segment(int);
 extern void cpu_init(void);
 extern void cpu_init_secondary(void);
 extern void cpu_init_exception_handling(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3e508f2390983..c09abee6f4d5b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -701,16 +701,6 @@ static const char *table_lookup_model(struct cpuinfo_x86 *c)
 __u32 cpu_caps_cleared[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
 __u32 cpu_caps_set[NCAPINTS + NBUGINTS] __aligned(sizeof(unsigned long));
 
-void load_percpu_segment(int cpu)
-{
-#ifdef CONFIG_X86_32
-	loadsegment(fs, __KERNEL_PERCPU);
-#else
-	__loadsegment_simple(gs, 0);
-	wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
-#endif
-}
-
 #ifdef CONFIG_X86_32
 /* The 32-bit entry code needs to find cpu_entry_area. */
 DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
@@ -738,16 +728,41 @@ void load_fixmap_gdt(int cpu)
 }
 EXPORT_SYMBOL_GPL(load_fixmap_gdt);
 
-/*
- * Current gdt points %fs at the "master" per-cpu area: after this,
- * it's on the real one.
+/**
+ * switch_to_new_gdt - Switch form early GDT to the direct one
+ * @cpu:	The CPU number for which this is invoked
+ *
+ * Invoked during early boot to switch from early GDT and early per CPU
+ * (%fs on 32bit, GS_BASE on 64bit) to the direct GDT and the runtime per
+ * CPU area.
  */
 void switch_to_new_gdt(int cpu)
 {
-	/* Load the original GDT */
 	load_direct_gdt(cpu);
-	/* Reload the per-cpu base */
-	load_percpu_segment(cpu);
+
+#ifdef CONFIG_X86_64
+	/*
+	 * No need to load %gs. It is already correct.
+	 *
+	 * Writing %gs on 64bit would zero GSBASE which would make any per
+	 * CPU operation up to the point of the wrmsrl() fault.
+	 *
+	 * Set GSBASE to the new offset. Until the wrmsrl() happens the
+	 * early mapping is still valid. That means the GSBASE update will
+	 * lose any prior per CPU data which was not copied over in
+	 * setup_per_cpu_areas().
+	 */
+	wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
+#else
+	/*
+	 * %fs is already set to __KERNEL_PERCPU, but after switching GDT
+	 * it is required to load FS again so that the 'hidden' part is
+	 * updated from the new GDT. Up to this point the early per CPU
+	 * translation is active. Any content of the early per CPU data
+	 * which was not copied over in setup_per_cpu_areas() is lost.
+	 */
+	loadsegment(fs, __KERNEL_PERCPU);
+#endif
 }
 
 static const struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
-- 
GitLab


From 1f19e2d50baf6515991844eaa8a84a0b0037da70 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:42 +0200
Subject: [PATCH 0087/4122] x86/cpu: Get rid of redundant switch_to_new_gdt()
 invocations

The only place where switch_to_new_gdt() is required is early boot to
switch from the early GDT to the direct GDT. Any other invocation is
completely redundant because it does not change anything.

Secondary CPUs come out of the ASM code with GDT and GSBASE correctly set
up. The same is true for XEN_PV.

Remove all the voodoo invocations which are left overs from the ancient
past, rename the function to switch_gdt_and_percpu_base() and mark it init.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.198076128@infradead.org
---
 arch/x86/include/asm/processor.h |  2 +-
 arch/x86/kernel/cpu/common.c     | 17 ++++++-----------
 arch/x86/kernel/setup_percpu.c   |  2 +-
 arch/x86/kernel/smpboot.c        |  6 +++++-
 arch/x86/xen/enlighten_pv.c      |  2 +-
 5 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e21ec970d41ab..c660700ecfc61 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -667,7 +667,7 @@ extern int sysenter_setup(void);
 /* Defined in head.S */
 extern struct desc_ptr		early_gdt_descr;
 
-extern void switch_to_new_gdt(int);
+extern void switch_gdt_and_percpu_base(int);
 extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void cpu_init(void);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index c09abee6f4d5b..f51928dd275ad 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -729,14 +729,15 @@ void load_fixmap_gdt(int cpu)
 EXPORT_SYMBOL_GPL(load_fixmap_gdt);
 
 /**
- * switch_to_new_gdt - Switch form early GDT to the direct one
+ * switch_gdt_and_percpu_base - Switch to direct GDT and runtime per CPU base
  * @cpu:	The CPU number for which this is invoked
  *
- * Invoked during early boot to switch from early GDT and early per CPU
- * (%fs on 32bit, GS_BASE on 64bit) to the direct GDT and the runtime per
- * CPU area.
+ * Invoked during early boot to switch from early GDT and early per CPU to
+ * the direct GDT and the runtime per CPU area. On 32-bit the percpu base
+ * switch is implicit by loading the direct GDT. On 64bit this requires
+ * to update GSBASE.
  */
-void switch_to_new_gdt(int cpu)
+void __init switch_gdt_and_percpu_base(int cpu)
 {
 	load_direct_gdt(cpu);
 
@@ -2263,12 +2264,6 @@ void cpu_init(void)
 	    boot_cpu_has(X86_FEATURE_TSC) || boot_cpu_has(X86_FEATURE_DE))
 		cr4_clear_bits(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
 
-	/*
-	 * Initialize the per-CPU GDT with the boot GDT,
-	 * and set up the GDT descriptor:
-	 */
-	switch_to_new_gdt(cpu);
-
 	if (IS_ENABLED(CONFIG_X86_64)) {
 		loadsegment(fs, 0);
 		memset(cur->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 49325caa7307d..555089a5b446d 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -211,7 +211,7 @@ void __init setup_per_cpu_areas(void)
 		 * area.  Reload any changed state for the boot CPU.
 		 */
 		if (!cpu)
-			switch_to_new_gdt(cpu);
+			switch_gdt_and_percpu_base(cpu);
 	}
 
 	/* indicate the early static arrays will soon be gone */
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 3f3ea0287f694..ce8728d2e5efa 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1453,7 +1453,11 @@ void arch_thaw_secondary_cpus_end(void)
 void __init native_smp_prepare_boot_cpu(void)
 {
 	int me = smp_processor_id();
-	switch_to_new_gdt(me);
+
+	/* SMP handles this from setup_per_cpu_areas() */
+	if (!IS_ENABLED(CONFIG_SMP))
+		switch_gdt_and_percpu_base(me);
+
 	/* already set me in cpu_online_mask in boot_cpu_init() */
 	cpumask_set_cpu(me, cpu_callout_mask);
 	cpu_set_state_online(me);
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index f82857e488152..9b892079581ba 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -1209,7 +1209,7 @@ static void __init xen_setup_gdt(int cpu)
 	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry_boot;
 	pv_ops.cpu.load_gdt = xen_load_gdt_boot;
 
-	switch_to_new_gdt(cpu);
+	switch_gdt_and_percpu_base(cpu);
 
 	pv_ops.cpu.write_gdt_entry = xen_write_gdt_entry;
 	pv_ops.cpu.load_gdt = xen_load_gdt;
-- 
GitLab


From 2cb15faaedeb67f52f2ddc32b5ca152acfc422c2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:43 +0200
Subject: [PATCH 0088/4122] x86/cpu: Re-enable stackprotector

Commit 5416c2663517 ("x86: make sure load_percpu_segment has no
stackprotector") disabled the stackprotector for cpu/common.c because of
load_percpu_segment(). Back then the boot stack canary was initialized very
early in start_kernel(). Switching the per CPU area by loading the GDT
caused the stackprotector to fail with paravirt enabled kernels as the
GSBASE was not updated yet. In hindsight a wrong change because it would
have been sufficient to ensure that the canary is the same in both per CPU
areas.

Commit d55535232c3d ("random: move rand_initialize() earlier") moved the
stack canary initialization to a later point in the init sequence. As a
consequence the per CPU stack canary is 0 when switching the per CPU areas,
so there is no requirement anymore to exclude this file.

Add a comment to load_percpu_segment().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.303010511@infradead.org
---
 arch/x86/kernel/cpu/Makefile | 3 ---
 arch/x86/kernel/cpu/common.c | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index f10a921ee7565..d7e3ceaf75c16 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,9 +17,6 @@ KMSAN_SANITIZE_common.o := n
 # As above, instrumenting secondary CPU boot code causes boot hangs.
 KCSAN_SANITIZE_common.o := n
 
-# Make sure load_percpu_segment has no stackprotector
-CFLAGS_common.o		:= -fno-stack-protector
-
 obj-y			:= cacheinfo.o scattered.o topology.o
 obj-y			+= common.o
 obj-y			+= rdrand.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f51928dd275ad..8e873181759a2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -752,6 +752,9 @@ void __init switch_gdt_and_percpu_base(int cpu)
 	 * early mapping is still valid. That means the GSBASE update will
 	 * lose any prior per CPU data which was not copied over in
 	 * setup_per_cpu_areas().
+	 *
+	 * This works even with stackprotector enabled because the
+	 * per CPU stack canary is 0 in both per CPU areas.
 	 */
 	wrmsrl(MSR_GS_BASE, cpu_kernelmode_gs_base(cpu));
 #else
-- 
GitLab


From 4c4eb3ecc91f4fee6d6bf7cfbc1e21f2e38d19ff Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:44 +0200
Subject: [PATCH 0089/4122] x86/modules: Set VM_FLUSH_RESET_PERMS in
 module_alloc()

Instead of resetting permissions all over the place when freeing module
memory tell the vmalloc code to do so. Avoids the exercise for the next
upcoming user.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.406703869@infradead.org
---
 arch/x86/kernel/ftrace.c       | 2 --
 arch/x86/kernel/kprobes/core.c | 1 -
 arch/x86/kernel/module.c       | 9 +++++----
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bd165004776d9..00eac455a3a1f 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -413,8 +413,6 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* ALLOC_TRAMP flags lets us know we created it */
 	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
 
-	set_vm_flush_reset_perms(trampoline);
-
 	if (likely(system_state != SYSTEM_BOOTING))
 		set_memory_ro((unsigned long)trampoline, npages);
 	set_memory_x((unsigned long)trampoline, npages);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index eb8bc82846b99..01b8d956aa76d 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -414,7 +414,6 @@ void *alloc_insn_page(void)
 	if (!page)
 		return NULL;
 
-	set_vm_flush_reset_perms(page);
 	/*
 	 * First make the page read-only, and only then make it executable to
 	 * prevent it from being W+X in between.
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index c032edcd3d95e..43f0112772192 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -74,10 +74,11 @@ void *module_alloc(unsigned long size)
 		return NULL;
 
 	p = __vmalloc_node_range(size, MODULE_ALIGN,
-				    MODULES_VADDR + get_module_load_offset(),
-				    MODULES_END, gfp_mask,
-				    PAGE_KERNEL, VM_DEFER_KMEMLEAK, NUMA_NO_NODE,
-				    __builtin_return_address(0));
+				 MODULES_VADDR + get_module_load_offset(),
+				 MODULES_END, gfp_mask, PAGE_KERNEL,
+				 VM_FLUSH_RESET_PERMS | VM_DEFER_KMEMLEAK,
+				 NUMA_NO_NODE, __builtin_return_address(0));
+
 	if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) {
 		vfree(p);
 		return NULL;
-- 
GitLab


From b26d66f8dace32c46ce58147002964ce8cdfde5f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:45 +0200
Subject: [PATCH 0090/4122] x86/vdso: Ensure all kernel code is seen by objtool

extable.c is kernel code and not part of the VDSO

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.512144110@infradead.org
---
 arch/x86/entry/vdso/Makefile | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 3e88b9df8c8f1..3ef611044c8f6 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -33,11 +33,12 @@ vobjs32-y += vdso32/vclock_gettime.o
 vobjs-$(CONFIG_X86_SGX)	+= vsgx.o
 
 # files to link into kernel
-obj-y				+= vma.o extable.o
-KASAN_SANITIZE_vma.o		:= y
-UBSAN_SANITIZE_vma.o		:= y
-KCSAN_SANITIZE_vma.o		:= y
-OBJECT_FILES_NON_STANDARD_vma.o	:= n
+obj-y					+= vma.o extable.o
+KASAN_SANITIZE_vma.o			:= y
+UBSAN_SANITIZE_vma.o			:= y
+KCSAN_SANITIZE_vma.o			:= y
+OBJECT_FILES_NON_STANDARD_vma.o		:= n
+OBJECT_FILES_NON_STANDARD_extable.o	:= n
 
 # vDSO images to build
 vdso_img-$(VDSO64-y)		+= 64
-- 
GitLab


From 24a9c543d2114d416f84e386c2fa90089bd97e4c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:46 +0200
Subject: [PATCH 0091/4122] x86: Sanitize linker script

The section ordering in the text section is more than suboptimal:

    ALIGN_ENTRY_TEXT_BEGIN
    ENTRY_TEXT
    ALIGN_ENTRY_TEXT_END
    SOFTIRQENTRY_TEXT
    STATIC_CALL_TEXT
    INDIRECT_THUNK_TEXT

ENTRY_TEXT is in a seperate PMD so it can be mapped into the cpu entry area
when KPTI is enabled. That means the sections after it are also in a
seperate PMD. That's wasteful especially as the indirect thunk text is a
hotpath on retpoline enabled systems and the static call text is fairly hot
on 32bit.

Move the entry text section last so that the other sections share a PMD
with the text before it. This is obviously just best effort and not
guaranteed when the previous text is just at a PMD boundary.

The text section placement needs an overhaul in general. There is e.g. no
point to have debugfs, sysfs, cpuhotplug and other rarely used functions
next to hot path text.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.614728935@infradead.org
---
 arch/x86/kernel/vmlinux.lds.S | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 15f29053cec46..0e9fc080c417d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -132,18 +132,19 @@ SECTIONS
 		CPUIDLE_TEXT
 		LOCK_TEXT
 		KPROBES_TEXT
-		ALIGN_ENTRY_TEXT_BEGIN
-		ENTRY_TEXT
-		ALIGN_ENTRY_TEXT_END
 		SOFTIRQENTRY_TEXT
-		STATIC_CALL_TEXT
-		*(.gnu.warning)
-
 #ifdef CONFIG_RETPOLINE
 		__indirect_thunk_start = .;
 		*(.text.__x86.*)
 		__indirect_thunk_end = .;
 #endif
+		STATIC_CALL_TEXT
+
+		ALIGN_ENTRY_TEXT_BEGIN
+		ENTRY_TEXT
+		ALIGN_ENTRY_TEXT_END
+		*(.gnu.warning)
+
 	} :text =0xcccc
 
 	/* End of text section, which should occupy whole number of pages */
-- 
GitLab


From d49a0626216b95cd4bf696f6acf55f39a16ab0bb Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:10:47 +0200
Subject: [PATCH 0092/4122] arch: Introduce CONFIG_FUNCTION_ALIGNMENT

Generic function-alignment infrastructure.

Architectures can select FUNCTION_ALIGNMENT_xxB symbols; the
FUNCTION_ALIGNMENT symbol is then set to the largest such selected
size, 0 otherwise.

From this the -falign-functions compiler argument and __ALIGN macro
are set.

This incorporates the DEBUG_FORCE_FUNCTION_ALIGN_64B knob and future
alignment requirements for x86_64 (later in this series) into a single
place.

NOTE: also removes the 0x90 filler byte from the generic __ALIGN
      primitive, that value makes no sense outside of x86.

NOTE: .balign 0 reverts to a no-op.

Requested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.719248727@infradead.org
---
 Makefile                           |  4 ++--
 arch/Kconfig                       | 24 ++++++++++++++++++++++++
 arch/ia64/Kconfig                  |  1 +
 arch/ia64/Makefile                 |  2 +-
 arch/x86/Kconfig                   |  2 ++
 arch/x86/boot/compressed/head_64.S |  8 ++++++++
 arch/x86/include/asm/linkage.h     |  4 +---
 include/asm-generic/vmlinux.lds.h  |  4 ++--
 include/linux/linkage.h            |  4 ++--
 lib/Kconfig.debug                  |  1 +
 10 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index f41ec8c8426ba..141e1bcc06715 100644
--- a/Makefile
+++ b/Makefile
@@ -1004,8 +1004,8 @@ KBUILD_CFLAGS	+= $(CC_FLAGS_CFI)
 export CC_FLAGS_CFI
 endif
 
-ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_64B
-KBUILD_CFLAGS += -falign-functions=64
+ifneq ($(CONFIG_FUNCTION_ALIGNMENT),0)
+KBUILD_CFLAGS += -falign-functions=$(CONFIG_FUNCTION_ALIGNMENT)
 endif
 
 # arch Makefile may override CC so keep this after arch Makefile is included
diff --git a/arch/Kconfig b/arch/Kconfig
index 8f138e580d1ae..4025802538029 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1428,4 +1428,28 @@ source "kernel/gcov/Kconfig"
 
 source "scripts/gcc-plugins/Kconfig"
 
+config FUNCTION_ALIGNMENT_4B
+	bool
+
+config FUNCTION_ALIGNMENT_8B
+	bool
+
+config FUNCTION_ALIGNMENT_16B
+	bool
+
+config FUNCTION_ALIGNMENT_32B
+	bool
+
+config FUNCTION_ALIGNMENT_64B
+	bool
+
+config FUNCTION_ALIGNMENT
+	int
+	default 64 if FUNCTION_ALIGNMENT_64B
+	default 32 if FUNCTION_ALIGNMENT_32B
+	default 16 if FUNCTION_ALIGNMENT_16B
+	default 8 if FUNCTION_ALIGNMENT_8B
+	default 4 if FUNCTION_ALIGNMENT_4B
+	default 0
+
 endmenu
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index c6e06cdc738f0..d7e4a24e8644c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -63,6 +63,7 @@ config IA64
 	select NUMA if !FLATMEM
 	select PCI_MSI_ARCH_FALLBACKS if PCI_MSI
 	select ZONE_DMA32
+	select FUNCTION_ALIGNMENT_32B
 	default y
 	help
 	  The Itanium Processor Family is Intel's 64-bit successor to
diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile
index 56c4bb276b6ed..d553ab7022fe4 100644
--- a/arch/ia64/Makefile
+++ b/arch/ia64/Makefile
@@ -23,7 +23,7 @@ KBUILD_AFLAGS_KERNEL := -mconstant-gp
 EXTRA		:=
 
 cflags-y	:= -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \
-		   -falign-functions=32 -frename-registers -fno-optimize-sibling-calls
+		   -frename-registers -fno-optimize-sibling-calls
 KBUILD_CFLAGS_KERNEL := -mconstant-gp
 
 GAS_STATUS	= $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)")
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6d1879ef933a2..f408fa87ed947 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -290,6 +290,8 @@ config X86
 	select X86_FEATURE_NAMES		if PROC_FS
 	select PROC_PID_ARCH_STATUS		if PROC_FS
 	select HAVE_ARCH_NODE_DEV_GROUP		if X86_SGX
+	select FUNCTION_ALIGNMENT_16B		if X86_64 || X86_ALIGNMENT_16
+	select FUNCTION_ALIGNMENT_4B
 	imply IMA_SECURE_AND_OR_TRUSTED_BOOT    if EFI
 	select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
 
diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index d33f060900d23..190b803eb7871 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -37,6 +37,14 @@
 #include <asm/trapnr.h>
 #include "pgtable.h"
 
+/*
+ * Fix alignment at 16 bytes. Following CONFIG_FUNCTION_ALIGNMENT will result
+ * in assembly errors due to trying to move .org backward due to the excessive
+ * alignment.
+ */
+#undef __ALIGN
+#define __ALIGN		.balign	16, 0x90
+
 /*
  * Locally defined symbols should be marked hidden:
  */
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index f484d656d34ee..9ee0e28517423 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -14,10 +14,8 @@
 
 #ifdef __ASSEMBLY__
 
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_ALIGNMENT_16)
-#define __ALIGN		.p2align 4, 0x90
+#define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
 #define __ALIGN_STR	__stringify(__ALIGN)
-#endif
 
 #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 #define RET	jmp __x86_return_thunk
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c15de165ec8ff..335b5711a7ede 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -81,8 +81,8 @@
 #define RO_EXCEPTION_TABLE
 #endif
 
-/* Align . to a 8 byte boundary equals to maximum function alignment. */
-#define ALIGN_FUNCTION()  . = ALIGN(8)
+/* Align . function alignment. */
+#define ALIGN_FUNCTION()  . = ALIGN(CONFIG_FUNCTION_ALIGNMENT)
 
 /*
  * LD_DEAD_CODE_DATA_ELIMINATION option enables -fdata-sections, which
diff --git a/include/linux/linkage.h b/include/linux/linkage.h
index 1feab6136b5b5..5c8865bb59d91 100644
--- a/include/linux/linkage.h
+++ b/include/linux/linkage.h
@@ -69,8 +69,8 @@
 #endif
 
 #ifndef __ALIGN
-#define __ALIGN		.align 4,0x90
-#define __ALIGN_STR	".align 4,0x90"
+#define __ALIGN			.balign CONFIG_FUNCTION_ALIGNMENT
+#define __ALIGN_STR		__stringify(__ALIGN)
 #endif
 
 #ifdef __ASSEMBLY__
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3fc7abffc7aa2..e90dc67385347 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -467,6 +467,7 @@ config SECTION_MISMATCH_WARN_ONLY
 config DEBUG_FORCE_FUNCTION_ALIGN_64B
 	bool "Force all function address 64B aligned"
 	depends on EXPERT && (X86_64 || ARM64 || PPC32 || PPC64 || ARC)
+	select FUNCTION_ALIGNMENT_64B
 	help
 	  There are cases that a commit from one domain changes the function
 	  address alignment of other domains, and cause magic performance
-- 
GitLab


From 8eb5d34e77c63fde8af21c691bcf6e3cd87f7829 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:48 +0200
Subject: [PATCH 0093/4122] x86/asm: Differentiate between code and function
 alignment

Create SYM_F_ALIGN to differentiate alignment requirements between
SYM_CODE and SYM_FUNC.

This distinction is useful later when adding padding in front of
functions; IOW this allows following the compiler's
patchable-function-entry option.

[peterz: Changelog]

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.824822743@infradead.org
---
 arch/x86/include/asm/linkage.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 9ee0e28517423..c2d6e2733b111 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -12,11 +12,15 @@
 #define asmlinkage CPP_ASMLINKAGE __attribute__((regparm(0)))
 #endif /* CONFIG_X86_32 */
 
-#ifdef __ASSEMBLY__
-
 #define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
 #define __ALIGN_STR	__stringify(__ALIGN)
 
+#define ASM_FUNC_ALIGN		__ALIGN_STR
+#define __FUNC_ALIGN		__ALIGN
+#define SYM_F_ALIGN		__FUNC_ALIGN
+
+#ifdef __ASSEMBLY__
+
 #if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 #define RET	jmp __x86_return_thunk
 #else /* CONFIG_RETPOLINE */
@@ -55,7 +59,7 @@
 
 /* SYM_FUNC_START -- use for global functions */
 #define SYM_FUNC_START(name)				\
-	SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN)	\
+	SYM_START(name, SYM_L_GLOBAL, SYM_F_ALIGN)	\
 	ENDBR
 
 /* SYM_FUNC_START_NOALIGN -- use for global functions, w/o alignment */
@@ -65,7 +69,7 @@
 
 /* SYM_FUNC_START_LOCAL -- use for local functions */
 #define SYM_FUNC_START_LOCAL(name)			\
-	SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN)	\
+	SYM_START(name, SYM_L_LOCAL, SYM_F_ALIGN)	\
 	ENDBR
 
 /* SYM_FUNC_START_LOCAL_NOALIGN -- use for local functions, w/o alignment */
@@ -75,7 +79,7 @@
 
 /* SYM_FUNC_START_WEAK -- use for weak functions */
 #define SYM_FUNC_START_WEAK(name)			\
-	SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN)	\
+	SYM_START(name, SYM_L_WEAK, SYM_F_ALIGN)	\
 	ENDBR
 
 /* SYM_FUNC_START_WEAK_NOALIGN -- use for weak functions, w/o alignment */
-- 
GitLab


From 1934dc9a8a92fda36ab80cfd55edab1708dcdf9a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:10:49 +0200
Subject: [PATCH 0094/4122] x86/error_inject: Align function properly

Ensure inline asm functions are consistently aligned with compiler
generated and SYM_FUNC_START*() functions.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111143.930201368@infradead.org
---
 arch/x86/lib/error-inject.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/lib/error-inject.c b/arch/x86/lib/error-inject.c
index 1e3de0769b810..b5a6d83106bc2 100644
--- a/arch/x86/lib/error-inject.c
+++ b/arch/x86/lib/error-inject.c
@@ -11,6 +11,7 @@ asm(
 	".text\n"
 	".type just_return_func, @function\n"
 	".globl just_return_func\n"
+	ASM_FUNC_ALIGN
 	"just_return_func:\n"
 		ANNOTATE_NOENDBR
 		ASM_RET
-- 
GitLab


From 1d293758e548aa6ff65e4dd3f5a9bc2a34b38ce3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:50 +0200
Subject: [PATCH 0095/4122] x86/paravirt: Properly align PV functions

Ensure inline asm functions are consistently aligned with compiler
generated and SYM_FUNC_START*() functions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220915111144.038540008@infradead.org
---
 arch/x86/include/asm/paravirt.h           | 1 +
 arch/x86/include/asm/qspinlock_paravirt.h | 2 +-
 arch/x86/kernel/kvm.c                     | 1 +
 arch/x86/kernel/paravirt.c                | 2 ++
 4 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2a0b8dd4ec335..1be66c15ecbd7 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -665,6 +665,7 @@ bool __raw_callee_save___native_vcpu_is_preempted(long cpu);
 	asm(".pushsection " section ", \"ax\";"				\
 	    ".globl " PV_THUNK_NAME(func) ";"				\
 	    ".type " PV_THUNK_NAME(func) ", @function;"			\
+	    ASM_FUNC_ALIGN						\
 	    PV_THUNK_NAME(func) ":"					\
 	    ASM_ENDBR							\
 	    FRAME_BEGIN							\
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index 60ece592b2207..082551b3c75ed 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -40,7 +40,7 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
 asm    (".pushsection .spinlock.text;"
 	".globl " PV_UNLOCK ";"
 	".type " PV_UNLOCK ", @function;"
-	".align 4,0x90;"
+	ASM_FUNC_ALIGN
 	PV_UNLOCK ": "
 	ASM_ENDBR
 	FRAME_BEGIN
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d4e48b4a438b2..95fb85bea1118 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -802,6 +802,7 @@ asm(
 ".pushsection .text;"
 ".global __raw_callee_save___kvm_vcpu_is_preempted;"
 ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
+ASM_FUNC_ALIGN
 "__raw_callee_save___kvm_vcpu_is_preempted:"
 ASM_ENDBR
 "movq	__per_cpu_offset(,%rdi,8), %rax;"
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 7ca2d46c08cc9..e244c49b52d7e 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -40,6 +40,7 @@
 extern void _paravirt_nop(void);
 asm (".pushsection .entry.text, \"ax\"\n"
      ".global _paravirt_nop\n"
+     ASM_FUNC_ALIGN
      "_paravirt_nop:\n\t"
      ASM_ENDBR
      ASM_RET
@@ -50,6 +51,7 @@ asm (".pushsection .entry.text, \"ax\"\n"
 /* stub always returning 0. */
 asm (".pushsection .entry.text, \"ax\"\n"
      ".global paravirt_ret0\n"
+     ASM_FUNC_ALIGN
      "paravirt_ret0:\n\t"
      ASM_ENDBR
      "xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
-- 
GitLab


From 67e93ddd5d0b84ac17bddb13d98533e425282421 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:51 +0200
Subject: [PATCH 0096/4122] x86/entry: Align SYM_CODE_START() variants

Explicitly align a bunch of commonly called SYM_CODE_START() symbols.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.144068841@infradead.org
---
 arch/x86/entry/entry_64.S | 16 ++++++++++------
 arch/x86/entry/thunk_64.S |  4 ++--
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9953d966d1244..e635f962afb80 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -284,7 +284,8 @@ SYM_FUNC_END(__switch_to_asm)
  * r12: kernel thread arg
  */
 .pushsection .text, "ax"
-SYM_CODE_START(ret_from_fork)
+	__FUNC_ALIGN
+SYM_CODE_START_NOALIGN(ret_from_fork)
 	UNWIND_HINT_EMPTY
 	ANNOTATE_NOENDBR // copy_thread
 	movq	%rax, %rdi
@@ -600,13 +601,13 @@ SYM_CODE_END(\asmsym)
  * shared between 32 and 64 bit and emit the __irqentry_text_* markers
  * so the stacktrace boundary checks work.
  */
-	.align 16
+	__ALIGN
 	.globl __irqentry_text_start
 __irqentry_text_start:
 
 #include <asm/idtentry.h>
 
-	.align 16
+	__ALIGN
 	.globl __irqentry_text_end
 __irqentry_text_end:
 	ANNOTATE_NOENDBR
@@ -828,7 +829,8 @@ EXPORT_SYMBOL(asm_load_gs_index)
  *
  * C calling convention: exc_xen_hypervisor_callback(struct *pt_regs)
  */
-SYM_CODE_START_LOCAL(exc_xen_hypervisor_callback)
+	__FUNC_ALIGN
+SYM_CODE_START_LOCAL_NOALIGN(exc_xen_hypervisor_callback)
 
 /*
  * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will
@@ -856,7 +858,8 @@ SYM_CODE_END(exc_xen_hypervisor_callback)
  * We distinguish between categories by comparing each saved segment register
  * with its current contents: any discrepancy means we in category 1.
  */
-SYM_CODE_START(xen_failsafe_callback)
+	__FUNC_ALIGN
+SYM_CODE_START_NOALIGN(xen_failsafe_callback)
 	UNWIND_HINT_EMPTY
 	ENDBR
 	movl	%ds, %ecx
@@ -1516,7 +1519,8 @@ SYM_CODE_END(ignore_sysret)
 #endif
 
 .pushsection .text, "ax"
-SYM_CODE_START(rewind_stack_and_make_dead)
+	__FUNC_ALIGN
+SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
 	UNWIND_HINT_FUNC
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S
index f38b07d2768bb..5e37f41e5f14d 100644
--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -11,7 +11,7 @@
 
 	/* rdi:	arg1 ... normal C conventions. rax is saved/restored. */
 	.macro THUNK name, func
-SYM_FUNC_START_NOALIGN(\name)
+SYM_FUNC_START(\name)
 	pushq %rbp
 	movq %rsp, %rbp
 
@@ -36,7 +36,7 @@ SYM_FUNC_END(\name)
 	EXPORT_SYMBOL(preempt_schedule_thunk)
 	EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
 
-SYM_CODE_START_LOCAL_NOALIGN(__thunk_restore)
+SYM_CODE_START_LOCAL(__thunk_restore)
 	popq %r11
 	popq %r10
 	popq %r9
-- 
GitLab


From f6dabc817e1f0da8f4088734ad0b9814adad0bce Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:52 +0200
Subject: [PATCH 0097/4122] crypto: x86/camellia: Remove redundant alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.248229966@infradead.org
---
 arch/x86/crypto/camellia-aesni-avx-asm_64.S  | 2 --
 arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 4 ----
 2 files changed, 6 deletions(-)

diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
index 2e1658ddbe1a9..4a30618281ec2 100644
--- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S
@@ -712,7 +712,6 @@ SYM_FUNC_END(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 .text
 
-.align 8
 SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -799,7 +798,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk16)
 	jmp .Lenc_done;
 SYM_FUNC_END(__camellia_enc_blk16)
 
-.align 8
 SYM_FUNC_START_LOCAL(__camellia_dec_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 0e4e9abbf4de3..deaf62aa73a6b 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -221,7 +221,6 @@
  * Size optimization... with inlined roundsm32 binary would be over 5 times
  * larger and would only marginally faster.
  */
-.align 8
 SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 	roundsm32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
 		  %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, %ymm15,
@@ -229,7 +228,6 @@ SYM_FUNC_START_LOCAL(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_c
 	RET;
 SYM_FUNC_END(roundsm32_x0_x1_x2_x3_x4_x5_x6_x7_y0_y1_y2_y3_y4_y5_y6_y7_cd)
 
-.align 8
 SYM_FUNC_START_LOCAL(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 	roundsm32(%ymm4, %ymm5, %ymm6, %ymm7, %ymm0, %ymm1, %ymm2, %ymm3,
 		  %ymm12, %ymm13, %ymm14, %ymm15, %ymm8, %ymm9, %ymm10, %ymm11,
@@ -748,7 +746,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
 
 .text
 
-.align 8
 SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -835,7 +832,6 @@ SYM_FUNC_START_LOCAL(__camellia_enc_blk32)
 	jmp .Lenc_done;
 SYM_FUNC_END(__camellia_enc_blk32)
 
-.align 8
 SYM_FUNC_START_LOCAL(__camellia_dec_blk32)
 	/* input:
 	 *	%rdi: ctx, CTX
-- 
GitLab


From 88cdf02551f9aef9284d778ecd375c400555d900 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:53 +0200
Subject: [PATCH 0098/4122] crypto: x86/cast5: Remove redundant alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.353555711@infradead.org
---
 arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
index b258af420c92c..0326a01503c3a 100644
--- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S
@@ -208,7 +208,6 @@
 
 .text
 
-.align 16
 SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
 	/* input:
 	 *	%rdi: ctx
@@ -282,7 +281,6 @@ SYM_FUNC_START_LOCAL(__cast5_enc_blk16)
 	RET;
 SYM_FUNC_END(__cast5_enc_blk16)
 
-.align 16
 SYM_FUNC_START_LOCAL(__cast5_dec_blk16)
 	/* input:
 	 *	%rdi: ctx
-- 
GitLab


From ba1b270c20dfb7f7b7a076b1a97ef4b7dcb539b5 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:54 +0200
Subject: [PATCH 0099/4122] crypto: x86/crct10dif-pcl: Remove redundant
 alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.456602381@infradead.org
---
 arch/x86/crypto/crct10dif-pcl-asm_64.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S
index 721474abfb719..5286db5b8165e 100644
--- a/arch/x86/crypto/crct10dif-pcl-asm_64.S
+++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S
@@ -94,7 +94,6 @@
 #
 # Assumes len >= 16.
 #
-.align 16
 SYM_FUNC_START(crc_t10dif_pcl)
 
 	movdqa	.Lbswap_mask(%rip), BSWAP_MASK
-- 
GitLab


From 8b44221671ec45d725a4558ff7aa5ea90ecfc885 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:55 +0200
Subject: [PATCH 0100/4122] crypto: x86/serpent: Remove redundant alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.558544791@infradead.org
---
 arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 2 --
 arch/x86/crypto/serpent-avx2-asm_64.S       | 2 --
 2 files changed, 4 deletions(-)

diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
index 82f2313f512b8..97e2836218518 100644
--- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S
@@ -550,7 +550,6 @@
 #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
-.align 8
 SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk8_avx)
 	RET;
 SYM_FUNC_END(__serpent_enc_blk8_avx)
 
-.align 8
 SYM_FUNC_START_LOCAL(__serpent_dec_blk8_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S
index 8ea34c9b93160..6d60c50593a9b 100644
--- a/arch/x86/crypto/serpent-avx2-asm_64.S
+++ b/arch/x86/crypto/serpent-avx2-asm_64.S
@@ -550,7 +550,6 @@
 #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
 
-.align 8
 SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -604,7 +603,6 @@ SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
 	RET;
 SYM_FUNC_END(__serpent_enc_blk16)
 
-.align 8
 SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
 	/* input:
 	 *	%rdi: ctx, CTX
-- 
GitLab


From c2a3ce6fdb122b12bc4cfffd28ecf8a9fb0d6736 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:56 +0200
Subject: [PATCH 0101/4122] crypto: x86/sha1: Remove custom alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.662580589@infradead.org
---
 arch/x86/crypto/sha1_ni_asm.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 2f94ec0e763bf..cd943b2af2c41 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -92,7 +92,6 @@
  * numBlocks: Number of blocks to process
  */
 .text
-.align 32
 SYM_FUNC_START(sha1_ni_transform)
 	push		%rbp
 	mov		%rsp, %rbp
-- 
GitLab


From 3ba56d0b87113785413dfc5b9910d45001cc4eeb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:57 +0200
Subject: [PATCH 0102/4122] crypto: x86/sha256: Remove custom alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.766564176@infradead.org
---
 arch/x86/crypto/sha256-avx-asm.S   | 1 -
 arch/x86/crypto/sha256-avx2-asm.S  | 1 -
 arch/x86/crypto/sha256-ssse3-asm.S | 1 -
 arch/x86/crypto/sha256_ni_asm.S    | 1 -
 4 files changed, 4 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 3baa1ec390974..3649370690c5d 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -347,7 +347,6 @@ a = TMP_
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_avx)
-.align 32
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9bcdbc47b8b4b..c4c1dc5ee078d 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -524,7 +524,6 @@ STACK_SIZE	= _CTX      + _CTX_SIZE
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_rorx)
-.align 32
 	pushq	%rbx
 	pushq	%r12
 	pushq	%r13
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index c4a5db612c327..96b7dcdeaebe4 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -356,7 +356,6 @@ a = TMP_
 ########################################################################
 .text
 SYM_FUNC_START(sha256_transform_ssse3)
-.align 32
 	pushq   %rbx
 	pushq   %r12
 	pushq   %r13
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index 94d50dd27cb53..b3f1a1a12027e 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -96,7 +96,6 @@
  */
 
 .text
-.align 32
 SYM_FUNC_START(sha256_ni_transform)
 
 	shl		$6, NUM_BLKS		/*  convert to bytes */
-- 
GitLab


From 2f93238b87ddbbe1b050ec48ab5843fc61346adb Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:58 +0200
Subject: [PATCH 0103/4122] crypto: x86/sm[34]: Remove redundant alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

( this code couldn't seem to make up it's mind about what alignment it
  actually wanted, randomly mixing 8 and 16 bytes )

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.868540856@infradead.org
---
 arch/x86/crypto/sm3-avx-asm_64.S        | 1 -
 arch/x86/crypto/sm4-aesni-avx-asm_64.S  | 7 -------
 arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 6 ------
 3 files changed, 14 deletions(-)

diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
index b12b9efb5ec51..b28d804ee10de 100644
--- a/arch/x86/crypto/sm3-avx-asm_64.S
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -327,7 +327,6 @@
  * void sm3_transform_avx(struct sm3_state *state,
  *                        const u8 *data, int nblocks);
  */
-.align 16
 SYM_FUNC_START(sm3_transform_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index 4767ab61ff489..e13c8537b2ec3 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -139,13 +139,11 @@
 
 
 .text
-.align 16
 
 /*
  * void sm4_aesni_avx_crypt4(const u32 *rk, u8 *dst,
  *                           const u8 *src, int nblocks)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx_crypt4)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -249,7 +247,6 @@ SYM_FUNC_START(sm4_aesni_avx_crypt4)
 	RET;
 SYM_FUNC_END(sm4_aesni_avx_crypt4)
 
-.align 8
 SYM_FUNC_START_LOCAL(__sm4_crypt_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -363,7 +360,6 @@ SYM_FUNC_END(__sm4_crypt_blk8)
  * void sm4_aesni_avx_crypt8(const u32 *rk, u8 *dst,
  *                           const u8 *src, int nblocks)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx_crypt8)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -419,7 +415,6 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8)
  * void sm4_aesni_avx_ctr_enc_blk8(const u32 *rk, u8 *dst,
  *                                 const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -494,7 +489,6 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
  * void sm4_aesni_avx_cbc_dec_blk8(const u32 *rk, u8 *dst,
  *                                 const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -544,7 +538,6 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
  * void sm4_aesni_avx_cfb_dec_blk8(const u32 *rk, u8 *dst,
  *                                 const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
index 4732fe8bb65b6..2212705f7da6c 100644
--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -153,9 +153,6 @@
 	.long 0xdeadbeef, 0xdeadbeef, 0xdeadbeef
 
 .text
-.align 16
-
-.align 8
 SYM_FUNC_START_LOCAL(__sm4_crypt_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -281,7 +278,6 @@ SYM_FUNC_END(__sm4_crypt_blk16)
  * void sm4_aesni_avx2_ctr_enc_blk16(const u32 *rk, u8 *dst,
  *                                   const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -394,7 +390,6 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
  * void sm4_aesni_avx2_cbc_dec_blk16(const u32 *rk, u8 *dst,
  *                                   const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
@@ -448,7 +443,6 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
  * void sm4_aesni_avx2_cfb_dec_blk16(const u32 *rk, u8 *dst,
  *                                   const u8 *src, u8 *iv)
  */
-.align 8
 SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
-- 
GitLab


From e2c9475e88f70820270ca5cc81a1ae2fda262278 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:10:59 +0200
Subject: [PATCH 0104/4122] crypto: twofish: Remove redundant alignments

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Also, with having pushed the function alignment to 16 bytes, this
custom alignment is completely superfluous.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111144.971229477@infradead.org
---
 arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
index 31f9b2ec3857d..12fde271cd3f6 100644
--- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
+++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S
@@ -228,7 +228,6 @@
 	vpxor		x2, wkey, x2; \
 	vpxor		x3, wkey, x3;
 
-.align 8
 SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
 	/* input:
 	 *	%rdi: ctx, CTX
@@ -270,7 +269,6 @@ SYM_FUNC_START_LOCAL(__twofish_enc_blk8)
 	RET;
 SYM_FUNC_END(__twofish_enc_blk8)
 
-.align 8
 SYM_FUNC_START_LOCAL(__twofish_dec_blk8)
 	/* input:
 	 *	%rdi: ctx, CTX
-- 
GitLab


From fdc9ee7e97aa2c1dfa7ebb092fffec40ffa59108 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:00 +0200
Subject: [PATCH 0105/4122] crypto: x86/poly1305: Remove custom function
 alignment

SYM_FUNC_START*() and friends already imply alignment, remove custom
alignment hacks to make code consistent. This prepares for future
function call ABI changes.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.073285765@infradead.org
---
 arch/x86/crypto/poly1305-x86_64-cryptogams.pl | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
index 2077ce7a56479..b9abcd79c1f43 100644
--- a/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
+++ b/arch/x86/crypto/poly1305-x86_64-cryptogams.pl
@@ -108,7 +108,6 @@ if (!$kernel) {
 sub declare_function() {
 	my ($name, $align, $nargs) = @_;
 	if($kernel) {
-		$code .= ".align $align\n";
 		$code .= "SYM_FUNC_START($name)\n";
 		$code .= ".L$name:\n";
 	} else {
-- 
GitLab


From e57ef2ed97c1d078973298658a8096644a1e9e09 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:01 +0200
Subject: [PATCH 0106/4122] x86: Put hot per CPU variables into a struct

The layout of per-cpu variables is at the mercy of the compiler. This
can lead to random performance fluctuations from build to build.

Create a structure to hold some of the hottest per-cpu variables,
starting with current_task.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.179707194@infradead.org
---
 arch/x86/include/asm/current.h | 19 ++++++++++++++++---
 arch/x86/kernel/cpu/common.c   | 14 +++++---------
 arch/x86/kernel/process_32.c   |  2 +-
 arch/x86/kernel/process_64.c   |  2 +-
 arch/x86/kernel/smpboot.c      |  2 +-
 5 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 3e204e6140b51..63c42ac3cd86d 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -3,16 +3,29 @@
 #define _ASM_X86_CURRENT_H
 
 #include <linux/compiler.h>
-#include <asm/percpu.h>
 
 #ifndef __ASSEMBLY__
+
+#include <linux/cache.h>
+#include <asm/percpu.h>
+
 struct task_struct;
 
-DECLARE_PER_CPU(struct task_struct *, current_task);
+struct pcpu_hot {
+	union {
+		struct {
+			struct task_struct	*current_task;
+		};
+		u8	pad[64];
+	};
+};
+static_assert(sizeof(struct pcpu_hot) == 64);
+
+DECLARE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot);
 
 static __always_inline struct task_struct *get_current(void)
 {
-	return this_cpu_read_stable(current_task);
+	return this_cpu_read_stable(pcpu_hot.current_task);
 }
 
 #define current get_current()
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 8e873181759a2..52071539a14ca 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2012,18 +2012,16 @@ static __init int setup_clearcpuid(char *arg)
 }
 __setup("clearcpuid=", setup_clearcpuid);
 
+DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
+	.current_task	= &init_task,
+};
+EXPORT_PER_CPU_SYMBOL(pcpu_hot);
+
 #ifdef CONFIG_X86_64
 DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
 		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
-/*
- * The following percpu variables are hot.  Align current_task to
- * cacheline size such that they fall in the same cacheline.
- */
-DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
-	&init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
 
 DEFINE_PER_CPU(void *, hardirq_stack_ptr);
 DEFINE_PER_CPU(bool, hardirq_stack_inuse);
@@ -2083,8 +2081,6 @@ void syscall_init(void)
 
 #else	/* CONFIG_X86_64 */
 
-DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
-EXPORT_PER_CPU_SYMBOL(current_task);
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
 
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2f314b170c9f0..807da45d84c72 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -207,7 +207,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	if (prev->gs | next->gs)
 		loadsegment(gs, next->gs);
 
-	this_cpu_write(current_task, next_p);
+	raw_cpu_write(pcpu_hot.current_task, next_p);
 
 	switch_fpu_finish();
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 6b3418bff3261..c4f6cacf6599d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -617,7 +617,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	/*
 	 * Switch the PDA and FPU contexts.
 	 */
-	this_cpu_write(current_task, next_p);
+	raw_cpu_write(pcpu_hot.current_task, next_p);
 	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
 	switch_fpu_finish();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index ce8728d2e5efa..05f315777691c 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1046,7 +1046,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 	/* Just in case we booted with a single CPU. */
 	alternatives_enable_smp();
 
-	per_cpu(current_task, cpu) = idle;
+	per_cpu(pcpu_hot.current_task, cpu) = idle;
 	cpu_init_stack_canary(cpu, idle);
 
 	/* Initialize the interrupt stack(s) */
-- 
GitLab


From 64701838bf0575ef8acb1ad2db5934e864f3e6c3 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:02 +0200
Subject: [PATCH 0107/4122] x86/percpu: Move preempt_count next to current_task

Add preempt_count to pcpu_hot, since it is once of the most used
per-cpu variables.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.284170644@infradead.org
---
 arch/x86/include/asm/current.h |  1 +
 arch/x86/include/asm/preempt.h | 27 ++++++++++++++-------------
 arch/x86/kernel/cpu/common.c   |  8 +-------
 3 files changed, 16 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 63c42ac3cd86d..0f4b46293c6c0 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -15,6 +15,7 @@ struct pcpu_hot {
 	union {
 		struct {
 			struct task_struct	*current_task;
+			int			preempt_count;
 		};
 		u8	pad[64];
 	};
diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h
index 5f6daea1ee248..2d13f25b1bd8f 100644
--- a/arch/x86/include/asm/preempt.h
+++ b/arch/x86/include/asm/preempt.h
@@ -4,11 +4,11 @@
 
 #include <asm/rmwcc.h>
 #include <asm/percpu.h>
+#include <asm/current.h>
+
 #include <linux/thread_info.h>
 #include <linux/static_call_types.h>
 
-DECLARE_PER_CPU(int, __preempt_count);
-
 /* We use the MSB mostly because its available */
 #define PREEMPT_NEED_RESCHED	0x80000000
 
@@ -24,7 +24,7 @@ DECLARE_PER_CPU(int, __preempt_count);
  */
 static __always_inline int preempt_count(void)
 {
-	return raw_cpu_read_4(__preempt_count) & ~PREEMPT_NEED_RESCHED;
+	return raw_cpu_read_4(pcpu_hot.preempt_count) & ~PREEMPT_NEED_RESCHED;
 }
 
 static __always_inline void preempt_count_set(int pc)
@@ -32,10 +32,10 @@ static __always_inline void preempt_count_set(int pc)
 	int old, new;
 
 	do {
-		old = raw_cpu_read_4(__preempt_count);
+		old = raw_cpu_read_4(pcpu_hot.preempt_count);
 		new = (old & PREEMPT_NEED_RESCHED) |
 			(pc & ~PREEMPT_NEED_RESCHED);
-	} while (raw_cpu_cmpxchg_4(__preempt_count, old, new) != old);
+	} while (raw_cpu_cmpxchg_4(pcpu_hot.preempt_count, old, new) != old);
 }
 
 /*
@@ -44,7 +44,7 @@ static __always_inline void preempt_count_set(int pc)
 #define init_task_preempt_count(p) do { } while (0)
 
 #define init_idle_preempt_count(p, cpu) do { \
-	per_cpu(__preempt_count, (cpu)) = PREEMPT_DISABLED; \
+	per_cpu(pcpu_hot.preempt_count, (cpu)) = PREEMPT_DISABLED; \
 } while (0)
 
 /*
@@ -58,17 +58,17 @@ static __always_inline void preempt_count_set(int pc)
 
 static __always_inline void set_preempt_need_resched(void)
 {
-	raw_cpu_and_4(__preempt_count, ~PREEMPT_NEED_RESCHED);
+	raw_cpu_and_4(pcpu_hot.preempt_count, ~PREEMPT_NEED_RESCHED);
 }
 
 static __always_inline void clear_preempt_need_resched(void)
 {
-	raw_cpu_or_4(__preempt_count, PREEMPT_NEED_RESCHED);
+	raw_cpu_or_4(pcpu_hot.preempt_count, PREEMPT_NEED_RESCHED);
 }
 
 static __always_inline bool test_preempt_need_resched(void)
 {
-	return !(raw_cpu_read_4(__preempt_count) & PREEMPT_NEED_RESCHED);
+	return !(raw_cpu_read_4(pcpu_hot.preempt_count) & PREEMPT_NEED_RESCHED);
 }
 
 /*
@@ -77,12 +77,12 @@ static __always_inline bool test_preempt_need_resched(void)
 
 static __always_inline void __preempt_count_add(int val)
 {
-	raw_cpu_add_4(__preempt_count, val);
+	raw_cpu_add_4(pcpu_hot.preempt_count, val);
 }
 
 static __always_inline void __preempt_count_sub(int val)
 {
-	raw_cpu_add_4(__preempt_count, -val);
+	raw_cpu_add_4(pcpu_hot.preempt_count, -val);
 }
 
 /*
@@ -92,7 +92,8 @@ static __always_inline void __preempt_count_sub(int val)
  */
 static __always_inline bool __preempt_count_dec_and_test(void)
 {
-	return GEN_UNARY_RMWcc("decl", __preempt_count, e, __percpu_arg([var]));
+	return GEN_UNARY_RMWcc("decl", pcpu_hot.preempt_count, e,
+			       __percpu_arg([var]));
 }
 
 /*
@@ -100,7 +101,7 @@ static __always_inline bool __preempt_count_dec_and_test(void)
  */
 static __always_inline bool should_resched(int preempt_offset)
 {
-	return unlikely(raw_cpu_read_4(__preempt_count) == preempt_offset);
+	return unlikely(raw_cpu_read_4(pcpu_hot.preempt_count) == preempt_offset);
 }
 
 #ifdef CONFIG_PREEMPTION
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 52071539a14ca..cafb6bd90d107 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2014,6 +2014,7 @@ __setup("clearcpuid=", setup_clearcpuid);
 
 DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
 	.current_task	= &init_task,
+	.preempt_count	= INIT_PREEMPT_COUNT,
 };
 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
 
@@ -2022,13 +2023,9 @@ DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
 		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
-
 DEFINE_PER_CPU(void *, hardirq_stack_ptr);
 DEFINE_PER_CPU(bool, hardirq_stack_inuse);
 
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
 DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
 
 static void wrmsrl_cstar(unsigned long val)
@@ -2081,9 +2078,6 @@ void syscall_init(void)
 
 #else	/* CONFIG_X86_64 */
 
-DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
-EXPORT_PER_CPU_SYMBOL(__preempt_count);
-
 /*
  * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
  * the top of the kernel stack.  Use an extra percpu variable to track the
-- 
GitLab


From 7443b296e699e6922f5be243c8d2e316de8cacbe Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:03 +0200
Subject: [PATCH 0108/4122] x86/percpu: Move cpu_number next to current_task

Also add cpu_number to the pcpu_hot structure, it is often referenced
and this cacheline is there.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.387678283@infradead.org
---
 arch/x86/include/asm/current.h |  1 +
 arch/x86/include/asm/smp.h     | 12 +++++-------
 arch/x86/kernel/setup_percpu.c |  5 +----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0f4b46293c6c0..8ac6589e9a1b7 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -16,6 +16,7 @@ struct pcpu_hot {
 		struct {
 			struct task_struct	*current_task;
 			int			preempt_count;
+			int			cpu_number;
 		};
 		u8	pad[64];
 	};
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a73bced40e241..b4dbb20dab1a1 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -3,10 +3,10 @@
 #define _ASM_X86_SMP_H
 #ifndef __ASSEMBLY__
 #include <linux/cpumask.h>
-#include <asm/percpu.h>
 
-#include <asm/thread_info.h>
 #include <asm/cpumask.h>
+#include <asm/current.h>
+#include <asm/thread_info.h>
 
 extern int smp_num_siblings;
 extern unsigned int num_processors;
@@ -19,7 +19,6 @@ DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
 DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
-DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number);
 
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
 DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
@@ -150,11 +149,10 @@ __visible void smp_call_function_single_interrupt(struct pt_regs *r);
 
 /*
  * This function is needed by all SMP systems. It must _always_ be valid
- * from the initial startup. We map APIC_BASE very early in page_setup(),
- * so this is correct in the x86 case.
+ * from the initial startup.
  */
-#define raw_smp_processor_id()  this_cpu_read(cpu_number)
-#define __smp_processor_id() __this_cpu_read(cpu_number)
+#define raw_smp_processor_id()  this_cpu_read(pcpu_hot.cpu_number)
+#define __smp_processor_id() __this_cpu_read(pcpu_hot.cpu_number)
 
 #ifdef CONFIG_X86_32
 extern int safe_smp_processor_id(void);
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 555089a5b446d..c2fc4c41c1644 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -23,9 +23,6 @@
 #include <asm/cpu.h>
 #include <asm/stackprotector.h>
 
-DEFINE_PER_CPU_READ_MOSTLY(int, cpu_number);
-EXPORT_PER_CPU_SYMBOL(cpu_number);
-
 #ifdef CONFIG_X86_64
 #define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
 #else
@@ -172,7 +169,7 @@ void __init setup_per_cpu_areas(void)
 	for_each_possible_cpu(cpu) {
 		per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
 		per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
-		per_cpu(cpu_number, cpu) = cpu;
+		per_cpu(pcpu_hot.cpu_number, cpu) = cpu;
 		setup_percpu_segment(cpu);
 		/*
 		 * Copy data used in early init routines from the
-- 
GitLab


From c063a217bc0726c2560138229de5673dbb253a02 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:04 +0200
Subject: [PATCH 0109/4122] x86/percpu: Move current_top_of_stack next to
 current_task

Extend the struct pcpu_hot cacheline with current_top_of_stack;
another very frequently used value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.493038635@infradead.org
---
 arch/x86/entry/entry_32.S        |  4 ++--
 arch/x86/entry/entry_64.S        |  6 +++---
 arch/x86/entry/entry_64_compat.S |  6 +++---
 arch/x86/include/asm/current.h   |  1 +
 arch/x86/include/asm/processor.h |  4 +---
 arch/x86/kernel/asm-offsets.c    |  2 ++
 arch/x86/kernel/cpu/common.c     | 12 +-----------
 arch/x86/kernel/process_32.c     |  4 ++--
 arch/x86/kernel/process_64.c     |  2 +-
 arch/x86/kernel/smpboot.c        |  2 +-
 arch/x86/kernel/traps.c          |  4 ++--
 11 files changed, 19 insertions(+), 28 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index e309e71560389..91397f58ac300 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1181,7 +1181,7 @@ SYM_CODE_START(asm_exc_nmi)
 	 * is using the thread stack right now, so it's safe for us to use it.
 	 */
 	movl	%esp, %ebx
-	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esp
+	movl	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esp
 	call	exc_nmi
 	movl	%ebx, %esp
 
@@ -1243,7 +1243,7 @@ SYM_CODE_START(rewind_stack_and_make_dead)
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
 
-	movl	PER_CPU_VAR(cpu_current_top_of_stack), %esi
+	movl	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %esi
 	leal	-TOP_OF_KERNEL_STACK_PADDING-PTREGS_SIZE(%esi), %esp
 
 	call	make_task_dead
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index e635f962afb80..9249a45cf53fd 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -92,7 +92,7 @@ SYM_CODE_START(entry_SYSCALL_64)
 	/* tss.sp2 is scratch space. */
 	movq	%rsp, PER_CPU_VAR(cpu_tss_rw + TSS_sp2)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
 
 SYM_INNER_LABEL(entry_SYSCALL_64_safe_stack, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
@@ -1209,7 +1209,7 @@ SYM_CODE_START(asm_exc_nmi)
 	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rdx
 	movq	%rsp, %rdx
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
 	UNWIND_HINT_IRET_REGS base=%rdx offset=8
 	pushq	5*8(%rdx)	/* pt_regs->ss */
 	pushq	4*8(%rdx)	/* pt_regs->rsp */
@@ -1525,7 +1525,7 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead)
 	/* Prevent any naive code from trying to unwind to our caller. */
 	xorl	%ebp, %ebp
 
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rax
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rax
 	leaq	-PTREGS_SIZE(%rax), %rsp
 	UNWIND_HINT_REGS
 
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4dd19819053a5..1dfee868d4a10 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -58,7 +58,7 @@ SYM_CODE_START(entry_SYSENTER_compat)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 	popq	%rax
 
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
 
 	/* Construct struct pt_regs on stack */
 	pushq	$__USER32_DS		/* pt_regs->ss */
@@ -191,7 +191,7 @@ SYM_CODE_START(entry_SYSCALL_compat)
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rsp
 
 	/* Switch to the kernel stack */
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
 
 SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
@@ -332,7 +332,7 @@ SYM_CODE_START(entry_INT80_compat)
 	ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV
 
 	movq	%rsp, %rax
-	movq	PER_CPU_VAR(cpu_current_top_of_stack), %rsp
+	movq	PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp
 
 	pushq	5*8(%rax)		/* regs->ss */
 	pushq	4*8(%rax)		/* regs->rsp */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 8ac6589e9a1b7..2dd013128f1ef 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -17,6 +17,7 @@ struct pcpu_hot {
 			struct task_struct	*current_task;
 			int			preempt_count;
 			int			cpu_number;
+			unsigned long		top_of_stack;
 		};
 		u8	pad[64];
 	};
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c660700ecfc61..c345f3096c80b 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -426,8 +426,6 @@ struct irq_stack {
 	char		stack[IRQ_STACK_SIZE];
 } __aligned(IRQ_STACK_SIZE);
 
-DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
-
 #ifdef CONFIG_X86_64
 struct fixed_percpu_data {
 	/*
@@ -566,7 +564,7 @@ static __always_inline unsigned long current_top_of_stack(void)
 	 *  and around vm86 mode and sp0 on x86_64 is special because of the
 	 *  entry trampoline.
 	 */
-	return this_cpu_read_stable(cpu_current_top_of_stack);
+	return this_cpu_read_stable(pcpu_hot.top_of_stack);
 }
 
 static __always_inline bool on_thread_stack(void)
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index cb50589a7102f..a9824318e1c55 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -109,6 +109,8 @@ static void __used common(void)
 	OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
 	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
 
+	OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
+
 	if (IS_ENABLED(CONFIG_KVM_INTEL)) {
 		BLANK();
 		OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index cafb6bd90d107..408245c2eead8 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2015,6 +2015,7 @@ __setup("clearcpuid=", setup_clearcpuid);
 DEFINE_PER_CPU_ALIGNED(struct pcpu_hot, pcpu_hot) = {
 	.current_task	= &init_task,
 	.preempt_count	= INIT_PREEMPT_COUNT,
+	.top_of_stack	= TOP_OF_INIT_STACK,
 };
 EXPORT_PER_CPU_SYMBOL(pcpu_hot);
 
@@ -2026,8 +2027,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 DEFINE_PER_CPU(void *, hardirq_stack_ptr);
 DEFINE_PER_CPU(bool, hardirq_stack_inuse);
 
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) = TOP_OF_INIT_STACK;
-
 static void wrmsrl_cstar(unsigned long val)
 {
 	/*
@@ -2078,15 +2077,6 @@ void syscall_init(void)
 
 #else	/* CONFIG_X86_64 */
 
-/*
- * On x86_32, vm86 modifies tss.sp0, so sp0 isn't a reliable way to find
- * the top of the kernel stack.  Use an extra percpu variable to track the
- * top of the kernel stack directly.
- */
-DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
-	(unsigned long)&init_thread_union + THREAD_SIZE;
-EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-
 #ifdef CONFIG_STACKPROTECTOR
 DEFINE_PER_CPU(unsigned long, __stack_chk_guard);
 EXPORT_PER_CPU_SYMBOL(__stack_chk_guard);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 807da45d84c72..470c128759eab 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -191,13 +191,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	arch_end_context_switch(next_p);
 
 	/*
-	 * Reload esp0 and cpu_current_top_of_stack.  This changes
+	 * Reload esp0 and pcpu_hot.top_of_stack.  This changes
 	 * current_thread_info().  Refresh the SYSENTER configuration in
 	 * case prev or next is vm86.
 	 */
 	update_task_stack(next_p);
 	refresh_sysenter_cs(next);
-	this_cpu_write(cpu_current_top_of_stack,
+	this_cpu_write(pcpu_hot.top_of_stack,
 		       (unsigned long)task_stack_page(next_p) +
 		       THREAD_SIZE);
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index c4f6cacf6599d..7f807e8bc9237 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -618,7 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	 * Switch the PDA and FPU contexts.
 	 */
 	raw_cpu_write(pcpu_hot.current_task, next_p);
-	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
+	raw_cpu_write(pcpu_hot.top_of_stack, task_top_of_stack(next_p));
 
 	switch_fpu_finish();
 
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 05f315777691c..87863a93e9181 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -1056,7 +1056,7 @@ int common_cpu_up(unsigned int cpu, struct task_struct *idle)
 
 #ifdef CONFIG_X86_32
 	/* Stack for startup_32 can be just as for start_secondary onwards */
-	per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
+	per_cpu(pcpu_hot.top_of_stack, cpu) = task_top_of_stack(idle);
 #else
 	initial_gs = per_cpu_offset(cpu);
 #endif
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 178015a820f08..7ac19aba89830 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -851,7 +851,7 @@ DEFINE_IDTENTRY_RAW(exc_int3)
  */
 asmlinkage __visible noinstr struct pt_regs *sync_regs(struct pt_regs *eregs)
 {
-	struct pt_regs *regs = (struct pt_regs *)this_cpu_read(cpu_current_top_of_stack) - 1;
+	struct pt_regs *regs = (struct pt_regs *)this_cpu_read(pcpu_hot.top_of_stack) - 1;
 	if (regs != eregs)
 		*regs = *eregs;
 	return regs;
@@ -869,7 +869,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
 	 * trust it and switch to the current kernel stack
 	 */
 	if (ip_within_syscall_gap(regs)) {
-		sp = this_cpu_read(cpu_current_top_of_stack);
+		sp = this_cpu_read(pcpu_hot.top_of_stack);
 		goto sync;
 	}
 
-- 
GitLab


From d7b6d709a76a4f4ef3108ac41e1b39eb80f5c084 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:05 +0200
Subject: [PATCH 0110/4122] x86/percpu: Move irq_stack variables next to
 current_task

Further extend struct pcpu_hot with the hard and soft irq stack
pointers.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.599170752@infradead.org
---
 arch/x86/include/asm/current.h   |  6 ++++++
 arch/x86/include/asm/irq_stack.h | 12 ++++++------
 arch/x86/include/asm/processor.h |  4 ----
 arch/x86/kernel/cpu/common.c     |  3 ---
 arch/x86/kernel/dumpstack_32.c   |  4 ++--
 arch/x86/kernel/dumpstack_64.c   |  2 +-
 arch/x86/kernel/irq_32.c         | 13 +++++--------
 arch/x86/kernel/irq_64.c         |  6 +++---
 arch/x86/kernel/process_64.c     |  2 +-
 9 files changed, 24 insertions(+), 28 deletions(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 2dd013128f1ef..ac3090ddf34e8 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -18,6 +18,12 @@ struct pcpu_hot {
 			int			preempt_count;
 			int			cpu_number;
 			unsigned long		top_of_stack;
+			void			*hardirq_stack_ptr;
+#ifdef CONFIG_X86_64
+			bool			hardirq_stack_inuse;
+#else
+			void			*softirq_stack_ptr;
+#endif
 		};
 		u8	pad[64];
 	};
diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h
index 147cb8fdda92e..798183867d789 100644
--- a/arch/x86/include/asm/irq_stack.h
+++ b/arch/x86/include/asm/irq_stack.h
@@ -116,7 +116,7 @@
 	ASM_CALL_ARG2
 
 #define call_on_irqstack(func, asm_call, argconstr...)			\
-	call_on_stack(__this_cpu_read(hardirq_stack_ptr),		\
+	call_on_stack(__this_cpu_read(pcpu_hot.hardirq_stack_ptr),	\
 		      func, asm_call, argconstr)
 
 /* Macros to assert type correctness for run_*_on_irqstack macros */
@@ -135,7 +135,7 @@
 	 * User mode entry and interrupt on the irq stack do not	\
 	 * switch stacks. If from user mode the task stack is empty.	\
 	 */								\
-	if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) {	\
+	if (user_mode(regs) || __this_cpu_read(pcpu_hot.hardirq_stack_inuse)) { \
 		irq_enter_rcu();					\
 		func(c_args);						\
 		irq_exit_rcu();						\
@@ -146,9 +146,9 @@
 		 * places. Invoke the stack switch macro with the call	\
 		 * sequence which matches the above direct invocation.	\
 		 */							\
-		__this_cpu_write(hardirq_stack_inuse, true);		\
+		__this_cpu_write(pcpu_hot.hardirq_stack_inuse, true);	\
 		call_on_irqstack(func, asm_call, constr);		\
-		__this_cpu_write(hardirq_stack_inuse, false);		\
+		__this_cpu_write(pcpu_hot.hardirq_stack_inuse, false);	\
 	}								\
 }
 
@@ -212,9 +212,9 @@
  */
 #define do_softirq_own_stack()						\
 {									\
-	__this_cpu_write(hardirq_stack_inuse, true);			\
+	__this_cpu_write(pcpu_hot.hardirq_stack_inuse, true);		\
 	call_on_irqstack(__do_softirq, ASM_CALL_ARG0);			\
-	__this_cpu_write(hardirq_stack_inuse, false);			\
+	__this_cpu_write(pcpu_hot.hardirq_stack_inuse, false);		\
 }
 
 #endif
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c345f3096c80b..bdde68744eb3a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -448,8 +448,6 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
 	return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
 }
 
-DECLARE_PER_CPU(void *, hardirq_stack_ptr);
-DECLARE_PER_CPU(bool, hardirq_stack_inuse);
 extern asmlinkage void ignore_sysret(void);
 
 /* Save actual FS/GS selectors and bases to current->thread */
@@ -458,8 +456,6 @@ void current_save_fsgs(void);
 #ifdef CONFIG_STACKPROTECTOR
 DECLARE_PER_CPU(unsigned long, __stack_chk_guard);
 #endif
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
 #endif	/* !X86_64 */
 
 struct perf_event;
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 408245c2eead8..2bec4b4b2c50d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -2024,9 +2024,6 @@ DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
 		     fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
 EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
 
-DEFINE_PER_CPU(void *, hardirq_stack_ptr);
-DEFINE_PER_CPU(bool, hardirq_stack_inuse);
-
 static void wrmsrl_cstar(unsigned long val)
 {
 	/*
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 722fd712e1cf0..b4905d5173fd3 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -37,7 +37,7 @@ const char *stack_type_name(enum stack_type type)
 
 static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+	unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
 	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
 	/*
@@ -62,7 +62,7 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
 
 static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
+	unsigned long *begin = (unsigned long *)this_cpu_read(pcpu_hot.softirq_stack_ptr);
 	unsigned long *end   = begin + (THREAD_SIZE / sizeof(long));
 
 	/*
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 6c5defd6569a3..f05339fee7785 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -134,7 +134,7 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 
 static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 {
-	unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+	unsigned long *end = (unsigned long *)this_cpu_read(pcpu_hot.hardirq_stack_ptr);
 	unsigned long *begin;
 
 	/*
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 01833ebf5e8e3..dc1049c01f9b9 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -52,9 +52,6 @@ static inline int check_stack_overflow(void) { return 0; }
 static inline void print_stack_overflow(void) { }
 #endif
 
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
-
 static void call_on_stack(void *func, void *stack)
 {
 	asm volatile("xchgl	%%ebx,%%esp	\n"
@@ -77,7 +74,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
 	u32 *isp, *prev_esp, arg1;
 
 	curstk = (struct irq_stack *) current_stack();
-	irqstk = __this_cpu_read(hardirq_stack_ptr);
+	irqstk = __this_cpu_read(pcpu_hot.hardirq_stack_ptr);
 
 	/*
 	 * this is where we switch to the IRQ stack. However, if we are
@@ -115,7 +112,7 @@ int irq_init_percpu_irqstack(unsigned int cpu)
 	int node = cpu_to_node(cpu);
 	struct page *ph, *ps;
 
-	if (per_cpu(hardirq_stack_ptr, cpu))
+	if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
 		return 0;
 
 	ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
@@ -127,8 +124,8 @@ int irq_init_percpu_irqstack(unsigned int cpu)
 		return -ENOMEM;
 	}
 
-	per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
-	per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
+	per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = page_address(ph);
+	per_cpu(pcpu_hot.softirq_stack_ptr, cpu) = page_address(ps);
 	return 0;
 }
 
@@ -138,7 +135,7 @@ void do_softirq_own_stack(void)
 	struct irq_stack *irqstk;
 	u32 *isp, *prev_esp;
 
-	irqstk = __this_cpu_read(softirq_stack_ptr);
+	irqstk = __this_cpu_read(pcpu_hot.softirq_stack_ptr);
 
 	/* build the stack frame on the softirq stack */
 	isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1c0fb96b9e390..fe0c859873d17 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -50,7 +50,7 @@ static int map_irq_stack(unsigned int cpu)
 		return -ENOMEM;
 
 	/* Store actual TOS to avoid adjustment in the hotpath */
-	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+	per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
 	return 0;
 }
 #else
@@ -63,14 +63,14 @@ static int map_irq_stack(unsigned int cpu)
 	void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
 
 	/* Store actual TOS to avoid adjustment in the hotpath */
-	per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
+	per_cpu(pcpu_hot.hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
 	return 0;
 }
 #endif
 
 int irq_init_percpu_irqstack(unsigned int cpu)
 {
-	if (per_cpu(hardirq_stack_ptr, cpu))
+	if (per_cpu(pcpu_hot.hardirq_stack_ptr, cpu))
 		return 0;
 	return map_irq_stack(cpu);
 }
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 7f807e8bc9237..1312de5b76aa4 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -563,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	int cpu = smp_processor_id();
 
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
-		     this_cpu_read(hardirq_stack_inuse));
+		     this_cpu_read(pcpu_hot.hardirq_stack_inuse));
 
 	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
 		switch_fpu_prepare(prev_fpu, cpu);
-- 
GitLab


From 7fcecafebed90d03f35bec6e147fc0b5f6e1bc71 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:06 +0200
Subject: [PATCH 0111/4122] x86/softirq: Move softirq pending next to current
 task

Another hot variable which is strict per CPU and benefits from
being in the same cache line.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.702133710@infradead.org
---
 arch/x86/include/asm/current.h | 1 +
 arch/x86/include/asm/hardirq.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index ac3090ddf34e8..b89aba077b84b 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -19,6 +19,7 @@ struct pcpu_hot {
 			int			cpu_number;
 			unsigned long		top_of_stack;
 			void			*hardirq_stack_ptr;
+			u16			softirq_pending;
 #ifdef CONFIG_X86_64
 			bool			hardirq_stack_inuse;
 #else
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 275e7fd20310f..66837b8c67f1a 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -3,9 +3,9 @@
 #define _ASM_X86_HARDIRQ_H
 
 #include <linux/threads.h>
+#include <asm/current.h>
 
 typedef struct {
-	u16	     __softirq_pending;
 #if IS_ENABLED(CONFIG_KVM_INTEL)
 	u8	     kvm_cpu_l1tf_flush_l1d;
 #endif
@@ -60,6 +60,7 @@ extern u64 arch_irq_stat_cpu(unsigned int cpu);
 extern u64 arch_irq_stat(void);
 #define arch_irq_stat		arch_irq_stat
 
+#define local_softirq_pending_ref       pcpu_hot.softirq_pending
 
 #if IS_ENABLED(CONFIG_KVM_INTEL)
 static inline void kvm_set_cpu_l1tf_flush_l1d(void)
-- 
GitLab


From 5b71ac8a2a3185da34a6556e791b533b48183a41 Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Mon, 17 Oct 2022 16:41:06 +0200
Subject: [PATCH 0112/4122] x86: Fixup asm-offsets duplicate

It turns out that 'stack_canary_offset' is a variable name; shadowing
that with a #define is ripe of fail when the asm-offsets.h header gets
included. Rename the thing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/entry/entry_64.S        | 2 +-
 arch/x86/kernel/asm-offsets_64.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 9249a45cf53fd..5c578a7dfcd79 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -252,7 +252,7 @@ SYM_FUNC_START(__switch_to_asm)
 
 #ifdef CONFIG_STACKPROTECTOR
 	movq	TASK_stack_canary(%rsi), %rbx
-	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
+	movq	%rbx, PER_CPU_VAR(fixed_percpu_data) + FIXED_stack_canary
 #endif
 
 	/*
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 9b698215d2618..bb65371ea9df5 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -57,7 +57,7 @@ int main(void)
 	BLANK();
 
 #ifdef CONFIG_STACKPROTECTOR
-	DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
+	OFFSET(FIXED_stack_canary, fixed_percpu_data, stack_canary);
 	BLANK();
 #endif
 	return 0;
-- 
GitLab


From 61c6065ef7ec0447a280179d04b2d81c80c2f479 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:07 +0200
Subject: [PATCH 0113/4122] objtool: Allow !PC relative relocations

Objtool doesn't currently much like per-cpu usage in alternatives:

arch/x86/entry/entry_64.o: warning: objtool: .altinstr_replacement+0xf: unsupported relocation in alternatives section
  f:   65 c7 04 25 00 00 00 00 00 00 00 80     movl   $0x80000000,%gs:0x0      13: R_X86_64_32S        __x86_call_depth

Since the R_X86_64_32S relocation is location invariant (it's
computation doesn't include P - the address of the location itself),
it can be trivially allowed.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.806607235@infradead.org
---
 tools/objtool/arch/x86/decode.c      | 24 ++++++++++++++++++++++++
 tools/objtool/check.c                |  2 +-
 tools/objtool/include/objtool/arch.h |  2 ++
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 1c253b4b7ce00..f0943830add75 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -73,6 +73,30 @@ unsigned long arch_jump_destination(struct instruction *insn)
 	return insn->offset + insn->len + insn->immediate;
 }
 
+bool arch_pc_relative_reloc(struct reloc *reloc)
+{
+	/*
+	 * All relocation types where P (the address of the target)
+	 * is included in the computation.
+	 */
+	switch (reloc->type) {
+	case R_X86_64_PC8:
+	case R_X86_64_PC16:
+	case R_X86_64_PC32:
+	case R_X86_64_PC64:
+
+	case R_X86_64_PLT32:
+	case R_X86_64_GOTPC32:
+	case R_X86_64_GOTPCREL:
+		return true;
+
+	default:
+		break;
+	}
+
+	return false;
+}
+
 #define ADD_OP(op) \
 	if (!(op = calloc(1, sizeof(*op)))) \
 		return -1; \
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 43ec14c29a60c..7174bba144940 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1645,7 +1645,7 @@ static int handle_group_alt(struct objtool_file *file,
 		 * accordingly.
 		 */
 		alt_reloc = insn_reloc(file, insn);
-		if (alt_reloc &&
+		if (alt_reloc && arch_pc_relative_reloc(alt_reloc) &&
 		    !arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
 
 			WARN_FUNC("unsupported relocation in alternatives section",
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index beb2f3aa94ffc..fe2ea4b892c34 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -93,4 +93,6 @@ bool arch_is_rethunk(struct symbol *sym);
 
 int arch_rewrite_retpolines(struct objtool_file *file);
 
+bool arch_pc_relative_reloc(struct reloc *reloc);
+
 #endif /* _ARCH_H */
-- 
GitLab


From 6644ee846cb983437063da8fd24b7cae671fd019 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:08 +0200
Subject: [PATCH 0114/4122] objtool: Track init section

For future usage of .init.text exclusion track the init section in the
instruction decoder and use the result in retpoline validation.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111145.910334431@infradead.org
---
 tools/objtool/check.c               | 17 ++++++++++-------
 tools/objtool/include/objtool/elf.h |  2 +-
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 7174bba144940..bb7c8196bf57c 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -382,6 +382,15 @@ static int decode_instructions(struct objtool_file *file)
 		    !strncmp(sec->name, ".text.__x86.", 12))
 			sec->noinstr = true;
 
+		/*
+		 * .init.text code is ran before userspace and thus doesn't
+		 * strictly need retpolines, except for modules which are
+		 * loaded late, they very much do need retpoline in their
+		 * .init.text
+		 */
+		if (!strcmp(sec->name, ".init.text") && !opts.module)
+			sec->init = true;
+
 		for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
 			insn = malloc(sizeof(*insn));
 			if (!insn) {
@@ -3748,13 +3757,7 @@ static int validate_retpoline(struct objtool_file *file)
 		if (insn->retpoline_safe)
 			continue;
 
-		/*
-		 * .init.text code is ran before userspace and thus doesn't
-		 * strictly need retpolines, except for modules which are
-		 * loaded late, they very much do need retpoline in their
-		 * .init.text
-		 */
-		if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
+		if (insn->sec->init)
 			continue;
 
 		if (insn->type == INSN_RETURN) {
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 16f4067b82aea..baa808583c4fa 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -38,7 +38,7 @@ struct section {
 	Elf_Data *data;
 	char *name;
 	int idx;
-	bool changed, text, rodata, noinstr;
+	bool changed, text, rodata, noinstr, init;
 };
 
 struct symbol {
-- 
GitLab


From 00abd38408127a57861698a8bffba65849de6bbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:09 +0200
Subject: [PATCH 0115/4122] objtool: Add .call_sites section

In preparation for call depth tracking provide a section which collects all
direct calls.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.016511961@infradead.org
---
 arch/x86/kernel/vmlinux.lds.S           |  7 ++++
 tools/objtool/check.c                   | 51 +++++++++++++++++++++++++
 tools/objtool/include/objtool/objtool.h |  1 +
 tools/objtool/objtool.c                 |  1 +
 4 files changed, 60 insertions(+)

diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 0e9fc080c417d..b69df9e013cca 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -291,6 +291,13 @@ SECTIONS
 		*(.return_sites)
 		__return_sites_end = .;
 	}
+
+	. = ALIGN(8);
+	.call_sites : AT(ADDR(.call_sites) - LOAD_OFFSET) {
+		__call_sites = .;
+		*(.call_sites)
+		__call_sites_end = .;
+	}
 #endif
 
 #ifdef CONFIG_X86_KERNEL_IBT
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index bb7c8196bf57c..f578e030e8bb9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -902,6 +902,49 @@ static int create_mcount_loc_sections(struct objtool_file *file)
 	return 0;
 }
 
+static int create_direct_call_sections(struct objtool_file *file)
+{
+	struct instruction *insn;
+	struct section *sec;
+	unsigned int *loc;
+	int idx;
+
+	sec = find_section_by_name(file->elf, ".call_sites");
+	if (sec) {
+		INIT_LIST_HEAD(&file->call_list);
+		WARN("file already has .call_sites section, skipping");
+		return 0;
+	}
+
+	if (list_empty(&file->call_list))
+		return 0;
+
+	idx = 0;
+	list_for_each_entry(insn, &file->call_list, call_node)
+		idx++;
+
+	sec = elf_create_section(file->elf, ".call_sites", 0, sizeof(unsigned int), idx);
+	if (!sec)
+		return -1;
+
+	idx = 0;
+	list_for_each_entry(insn, &file->call_list, call_node) {
+
+		loc = (unsigned int *)sec->data->d_buf + idx;
+		memset(loc, 0, sizeof(unsigned int));
+
+		if (elf_add_reloc_to_insn(file->elf, sec,
+					  idx * sizeof(unsigned int),
+					  R_X86_64_PC32,
+					  insn->sec, insn->offset))
+			return -1;
+
+		idx++;
+	}
+
+	return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -1279,6 +1322,9 @@ static void annotate_call_site(struct objtool_file *file,
 		return;
 	}
 
+	if (insn->type == INSN_CALL && !insn->sec->init)
+		list_add_tail(&insn->call_node, &file->call_list);
+
 	if (!sibling && dead_end_function(file, sym))
 		insn->dead_end = true;
 }
@@ -4305,6 +4351,11 @@ int check(struct objtool_file *file)
 		if (ret < 0)
 			goto out;
 		warnings += ret;
+
+		ret = create_direct_call_sections(file);
+		if (ret < 0)
+			goto out;
+		warnings += ret;
 	}
 
 	if (opts.mcount) {
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index 7f2d1b0953333..6b40977bcdb1a 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -28,6 +28,7 @@ struct objtool_file {
 	struct list_head static_call_list;
 	struct list_head mcount_loc_list;
 	struct list_head endbr_list;
+	struct list_head call_list;
 	bool ignore_unreachables, hints, rodata;
 
 	unsigned int nr_endbr;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index a7ecc32e35125..6affd8067f837 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -106,6 +106,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
 	INIT_LIST_HEAD(&file.static_call_list);
 	INIT_LIST_HEAD(&file.mcount_loc_list);
 	INIT_LIST_HEAD(&file.endbr_list);
+	INIT_LIST_HEAD(&file.call_list);
 	file.ignore_unreachables = opts.no_unreachable;
 	file.hints = false;
 
-- 
GitLab


From 0c0a6d8934e2081df93ba0bfc0cf615cc9c06988 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:10 +0200
Subject: [PATCH 0116/4122] objtool: Add --hacks=skylake

Make the call/func sections selectable via the --hacks option.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.120821440@infradead.org
---
 scripts/Makefile.lib                    |  1 +
 tools/objtool/builtin-check.c           |  7 ++++++-
 tools/objtool/check.c                   | 10 ++++++----
 tools/objtool/include/objtool/builtin.h |  1 +
 4 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 3aa384cec76b8..85f02756dc9c1 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -254,6 +254,7 @@ objtool := $(objtree)/tools/objtool/objtool
 
 objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK)		+= --hacks=jump_label
 objtool-args-$(CONFIG_HAVE_NOINSTR_HACK)		+= --hacks=noinstr
+objtool-args-$(CONFIG_CALL_DEPTH_TRACKING)		+= --hacks=skylake
 objtool-args-$(CONFIG_X86_KERNEL_IBT)			+= --ibt
 objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL)	+= --mcount
 objtool-args-$(CONFIG_UNWINDER_ORC)			+= --orc
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 24fbe803a0d32..0a04f8ea4432d 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -57,12 +57,17 @@ static int parse_hacks(const struct option *opt, const char *str, int unset)
 		found = true;
 	}
 
+	if (!str || strstr(str, "skylake")) {
+		opts.hack_skylake = true;
+		found = true;
+	}
+
 	return found ? 0 : -1;
 }
 
 const struct option check_options[] = {
 	OPT_GROUP("Actions:"),
-	OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr", "patch toolchain bugs/limitations", parse_hacks),
+	OPT_CALLBACK_OPTARG('h', "hacks", NULL, NULL, "jump_label,noinstr,skylake", "patch toolchain bugs/limitations", parse_hacks),
 	OPT_BOOLEAN('i', "ibt", &opts.ibt, "validate and annotate IBT"),
 	OPT_BOOLEAN('m', "mcount", &opts.mcount, "annotate mcount/fentry calls for ftrace"),
 	OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index f578e030e8bb9..1461c8894fb70 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4352,10 +4352,12 @@ int check(struct objtool_file *file)
 			goto out;
 		warnings += ret;
 
-		ret = create_direct_call_sections(file);
-		if (ret < 0)
-			goto out;
-		warnings += ret;
+		if (opts.hack_skylake) {
+			ret = create_direct_call_sections(file);
+			if (ret < 0)
+				goto out;
+			warnings += ret;
+		}
 	}
 
 	if (opts.mcount) {
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 42a52f1a0add5..22092a9f3cf6c 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -14,6 +14,7 @@ struct opts {
 	bool dump_orc;
 	bool hack_jump_label;
 	bool hack_noinstr;
+	bool hack_skylake;
 	bool ibt;
 	bool mcount;
 	bool noinstr;
-- 
GitLab


From 5da6aea375cde499fdfac3cde4f26df4a840eb9f Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:12 +0200
Subject: [PATCH 0117/4122] objtool: Fix find_{symbol,func}_containing()

The current find_{symbol,func}_containing() functions are broken in
the face of overlapping symbols, exactly the case that is needed for a
new ibt/endbr supression.

Import interval_tree_generic.h into the tools tree and convert the
symbol tree to an interval tree to support proper range stabs.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.330203761@infradead.org
---
 tools/include/linux/interval_tree_generic.h | 187 ++++++++++++++++++++
 tools/objtool/elf.c                         |  93 +++++-----
 tools/objtool/include/objtool/elf.h         |   3 +-
 3 files changed, 229 insertions(+), 54 deletions(-)
 create mode 100644 tools/include/linux/interval_tree_generic.h

diff --git a/tools/include/linux/interval_tree_generic.h b/tools/include/linux/interval_tree_generic.h
new file mode 100644
index 0000000000000..aaa8a0767aa3a
--- /dev/null
+++ b/tools/include/linux/interval_tree_generic.h
@@ -0,0 +1,187 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+  Interval Trees
+  (C) 2012  Michel Lespinasse <walken@google.com>
+
+
+  include/linux/interval_tree_generic.h
+*/
+
+#include <linux/rbtree_augmented.h>
+
+/*
+ * Template for implementing interval trees
+ *
+ * ITSTRUCT:   struct type of the interval tree nodes
+ * ITRB:       name of struct rb_node field within ITSTRUCT
+ * ITTYPE:     type of the interval endpoints
+ * ITSUBTREE:  name of ITTYPE field within ITSTRUCT holding last-in-subtree
+ * ITSTART(n): start endpoint of ITSTRUCT node n
+ * ITLAST(n):  last endpoint of ITSTRUCT node n
+ * ITSTATIC:   'static' or empty
+ * ITPREFIX:   prefix to use for the inline tree definitions
+ *
+ * Note - before using this, please consider if generic version
+ * (interval_tree.h) would work for you...
+ */
+
+#define INTERVAL_TREE_DEFINE(ITSTRUCT, ITRB, ITTYPE, ITSUBTREE,		      \
+			     ITSTART, ITLAST, ITSTATIC, ITPREFIX)	      \
+									      \
+/* Callbacks for augmented rbtree insert and remove */			      \
+									      \
+RB_DECLARE_CALLBACKS_MAX(static, ITPREFIX ## _augment,			      \
+			 ITSTRUCT, ITRB, ITTYPE, ITSUBTREE, ITLAST)	      \
+									      \
+/* Insert / remove interval nodes from the tree */			      \
+									      \
+ITSTATIC void ITPREFIX ## _insert(ITSTRUCT *node,			      \
+				  struct rb_root_cached *root)	 	      \
+{									      \
+	struct rb_node **link = &root->rb_root.rb_node, *rb_parent = NULL;    \
+	ITTYPE start = ITSTART(node), last = ITLAST(node);		      \
+	ITSTRUCT *parent;						      \
+	bool leftmost = true;						      \
+									      \
+	while (*link) {							      \
+		rb_parent = *link;					      \
+		parent = rb_entry(rb_parent, ITSTRUCT, ITRB);		      \
+		if (parent->ITSUBTREE < last)				      \
+			parent->ITSUBTREE = last;			      \
+		if (start < ITSTART(parent))				      \
+			link = &parent->ITRB.rb_left;			      \
+		else {							      \
+			link = &parent->ITRB.rb_right;			      \
+			leftmost = false;				      \
+		}							      \
+	}								      \
+									      \
+	node->ITSUBTREE = last;						      \
+	rb_link_node(&node->ITRB, rb_parent, link);			      \
+	rb_insert_augmented_cached(&node->ITRB, root,			      \
+				   leftmost, &ITPREFIX ## _augment);	      \
+}									      \
+									      \
+ITSTATIC void ITPREFIX ## _remove(ITSTRUCT *node,			      \
+				  struct rb_root_cached *root)		      \
+{									      \
+	rb_erase_augmented_cached(&node->ITRB, root, &ITPREFIX ## _augment);  \
+}									      \
+									      \
+/*									      \
+ * Iterate over intervals intersecting [start;last]			      \
+ *									      \
+ * Note that a node's interval intersects [start;last] iff:		      \
+ *   Cond1: ITSTART(node) <= last					      \
+ * and									      \
+ *   Cond2: start <= ITLAST(node)					      \
+ */									      \
+									      \
+static ITSTRUCT *							      \
+ITPREFIX ## _subtree_search(ITSTRUCT *node, ITTYPE start, ITTYPE last)	      \
+{									      \
+	while (true) {							      \
+		/*							      \
+		 * Loop invariant: start <= node->ITSUBTREE		      \
+		 * (Cond2 is satisfied by one of the subtree nodes)	      \
+		 */							      \
+		if (node->ITRB.rb_left) {				      \
+			ITSTRUCT *left = rb_entry(node->ITRB.rb_left,	      \
+						  ITSTRUCT, ITRB);	      \
+			if (start <= left->ITSUBTREE) {			      \
+				/*					      \
+				 * Some nodes in left subtree satisfy Cond2.  \
+				 * Iterate to find the leftmost such node N.  \
+				 * If it also satisfies Cond1, that's the     \
+				 * match we are looking for. Otherwise, there \
+				 * is no matching interval as nodes to the    \
+				 * right of N can't satisfy Cond1 either.     \
+				 */					      \
+				node = left;				      \
+				continue;				      \
+			}						      \
+		}							      \
+		if (ITSTART(node) <= last) {		/* Cond1 */	      \
+			if (start <= ITLAST(node))	/* Cond2 */	      \
+				return node;	/* node is leftmost match */  \
+			if (node->ITRB.rb_right) {			      \
+				node = rb_entry(node->ITRB.rb_right,	      \
+						ITSTRUCT, ITRB);	      \
+				if (start <= node->ITSUBTREE)		      \
+					continue;			      \
+			}						      \
+		}							      \
+		return NULL;	/* No match */				      \
+	}								      \
+}									      \
+									      \
+ITSTATIC ITSTRUCT *							      \
+ITPREFIX ## _iter_first(struct rb_root_cached *root,			      \
+			ITTYPE start, ITTYPE last)			      \
+{									      \
+	ITSTRUCT *node, *leftmost;					      \
+									      \
+	if (!root->rb_root.rb_node)					      \
+		return NULL;						      \
+									      \
+	/*								      \
+	 * Fastpath range intersection/overlap between A: [a0, a1] and	      \
+	 * B: [b0, b1] is given by:					      \
+	 *								      \
+	 *         a0 <= b1 && b0 <= a1					      \
+	 *								      \
+	 *  ... where A holds the lock range and B holds the smallest	      \
+	 * 'start' and largest 'last' in the tree. For the later, we	      \
+	 * rely on the root node, which by augmented interval tree	      \
+	 * property, holds the largest value in its last-in-subtree.	      \
+	 * This allows mitigating some of the tree walk overhead for	      \
+	 * for non-intersecting ranges, maintained and consulted in O(1).     \
+	 */								      \
+	node = rb_entry(root->rb_root.rb_node, ITSTRUCT, ITRB);		      \
+	if (node->ITSUBTREE < start)					      \
+		return NULL;						      \
+									      \
+	leftmost = rb_entry(root->rb_leftmost, ITSTRUCT, ITRB);		      \
+	if (ITSTART(leftmost) > last)					      \
+		return NULL;						      \
+									      \
+	return ITPREFIX ## _subtree_search(node, start, last);		      \
+}									      \
+									      \
+ITSTATIC ITSTRUCT *							      \
+ITPREFIX ## _iter_next(ITSTRUCT *node, ITTYPE start, ITTYPE last)	      \
+{									      \
+	struct rb_node *rb = node->ITRB.rb_right, *prev;		      \
+									      \
+	while (true) {							      \
+		/*							      \
+		 * Loop invariants:					      \
+		 *   Cond1: ITSTART(node) <= last			      \
+		 *   rb == node->ITRB.rb_right				      \
+		 *							      \
+		 * First, search right subtree if suitable		      \
+		 */							      \
+		if (rb) {						      \
+			ITSTRUCT *right = rb_entry(rb, ITSTRUCT, ITRB);	      \
+			if (start <= right->ITSUBTREE)			      \
+				return ITPREFIX ## _subtree_search(right,     \
+								start, last); \
+		}							      \
+									      \
+		/* Move up the tree until we come from a node's left child */ \
+		do {							      \
+			rb = rb_parent(&node->ITRB);			      \
+			if (!rb)					      \
+				return NULL;				      \
+			prev = &node->ITRB;				      \
+			node = rb_entry(rb, ITSTRUCT, ITRB);		      \
+			rb = node->ITRB.rb_right;			      \
+		} while (prev == rb);					      \
+									      \
+		/* Check if the node intersects [start;last] */		      \
+		if (last < ITSTART(node))		/* !Cond1 */	      \
+			return NULL;					      \
+		else if (start <= ITLAST(node))		/* Cond2 */	      \
+			return node;					      \
+	}								      \
+}
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7e24b09b1163a..89b37cd4ab1dc 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
+#include <linux/interval_tree_generic.h>
 #include <objtool/builtin.h>
 
 #include <objtool/elf.h>
@@ -50,38 +51,22 @@ static inline u32 str_hash(const char *str)
 	__elf_table(name); \
 })
 
-static bool symbol_to_offset(struct rb_node *a, const struct rb_node *b)
+static inline unsigned long __sym_start(struct symbol *s)
 {
-	struct symbol *sa = rb_entry(a, struct symbol, node);
-	struct symbol *sb = rb_entry(b, struct symbol, node);
-
-	if (sa->offset < sb->offset)
-		return true;
-	if (sa->offset > sb->offset)
-		return false;
-
-	if (sa->len < sb->len)
-		return true;
-	if (sa->len > sb->len)
-		return false;
-
-	sa->alias = sb;
-
-	return false;
+	return s->offset;
 }
 
-static int symbol_by_offset(const void *key, const struct rb_node *node)
+static inline unsigned long __sym_last(struct symbol *s)
 {
-	const struct symbol *s = rb_entry(node, struct symbol, node);
-	const unsigned long *o = key;
+	return s->offset + s->len - 1;
+}
 
-	if (*o < s->offset)
-		return -1;
-	if (*o >= s->offset + s->len)
-		return 1;
+INTERVAL_TREE_DEFINE(struct symbol, node, unsigned long, __subtree_last,
+		     __sym_start, __sym_last, static, __sym)
 
-	return 0;
-}
+#define __sym_for_each(_iter, _tree, _start, _end)			\
+	for (_iter = __sym_iter_first((_tree), (_start), (_end));	\
+	     _iter; _iter = __sym_iter_next(_iter, (_start), (_end)))
 
 struct symbol_hole {
 	unsigned long key;
@@ -147,13 +132,12 @@ static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
 
 struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 {
-	struct rb_node *node;
-
-	rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
-		struct symbol *s = rb_entry(node, struct symbol, node);
+	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
+	struct symbol *iter;
 
-		if (s->offset == offset && s->type != STT_SECTION)
-			return s;
+	__sym_for_each(iter, tree, offset, offset) {
+		if (iter->offset == offset && iter->type != STT_SECTION)
+			return iter;
 	}
 
 	return NULL;
@@ -161,13 +145,12 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
 
 struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 {
-	struct rb_node *node;
+	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
+	struct symbol *iter;
 
-	rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
-		struct symbol *s = rb_entry(node, struct symbol, node);
-
-		if (s->offset == offset && s->type == STT_FUNC)
-			return s;
+	__sym_for_each(iter, tree, offset, offset) {
+		if (iter->offset == offset && iter->type == STT_FUNC)
+			return iter;
 	}
 
 	return NULL;
@@ -175,13 +158,12 @@ struct symbol *find_func_by_offset(struct section *sec, unsigned long offset)
 
 struct symbol *find_symbol_containing(const struct section *sec, unsigned long offset)
 {
-	struct rb_node *node;
-
-	rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
-		struct symbol *s = rb_entry(node, struct symbol, node);
+	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
+	struct symbol *iter;
 
-		if (s->type != STT_SECTION)
-			return s;
+	__sym_for_each(iter, tree, offset, offset) {
+		if (iter->type != STT_SECTION)
+			return iter;
 	}
 
 	return NULL;
@@ -202,7 +184,7 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
 	/*
 	 * Find the rightmost symbol for which @offset is after it.
 	 */
-	n = rb_find(&hole, &sec->symbol_tree, symbol_hole_by_offset);
+	n = rb_find(&hole, &sec->symbol_tree.rb_root, symbol_hole_by_offset);
 
 	/* found a symbol that contains @offset */
 	if (n)
@@ -224,13 +206,12 @@ int find_symbol_hole_containing(const struct section *sec, unsigned long offset)
 
 struct symbol *find_func_containing(struct section *sec, unsigned long offset)
 {
-	struct rb_node *node;
-
-	rb_for_each(node, &offset, &sec->symbol_tree, symbol_by_offset) {
-		struct symbol *s = rb_entry(node, struct symbol, node);
+	struct rb_root_cached *tree = (struct rb_root_cached *)&sec->symbol_tree;
+	struct symbol *iter;
 
-		if (s->type == STT_FUNC)
-			return s;
+	__sym_for_each(iter, tree, offset, offset) {
+		if (iter->type == STT_FUNC)
+			return iter;
 	}
 
 	return NULL;
@@ -373,6 +354,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 {
 	struct list_head *entry;
 	struct rb_node *pnode;
+	struct symbol *iter;
 
 	INIT_LIST_HEAD(&sym->pv_target);
 	sym->alias = sym;
@@ -386,7 +368,12 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	sym->offset = sym->sym.st_value;
 	sym->len = sym->sym.st_size;
 
-	rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
+	__sym_for_each(iter, &sym->sec->symbol_tree, sym->offset, sym->offset) {
+		if (iter->offset == sym->offset && iter->type == sym->type)
+			iter->alias = sym;
+	}
+
+	__sym_insert(sym, &sym->sec->symbol_tree);
 	pnode = rb_prev(&sym->node);
 	if (pnode)
 		entry = &rb_entry(pnode, struct symbol, node)->list;
@@ -401,7 +388,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	 * can exist within a function, confusing the sorting.
 	 */
 	if (!sym->len)
-		rb_erase(&sym->node, &sym->sec->symbol_tree);
+		__sym_remove(sym, &sym->sec->symbol_tree);
 }
 
 static int read_symbols(struct elf *elf)
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index baa808583c4fa..d28533106b780 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -30,7 +30,7 @@ struct section {
 	struct hlist_node hash;
 	struct hlist_node name_hash;
 	GElf_Shdr sh;
-	struct rb_root symbol_tree;
+	struct rb_root_cached symbol_tree;
 	struct list_head symbol_list;
 	struct list_head reloc_list;
 	struct section *base, *reloc;
@@ -53,6 +53,7 @@ struct symbol {
 	unsigned char bind, type;
 	unsigned long offset;
 	unsigned int len;
+	unsigned long __subtree_last;
 	struct symbol *pfunc, *cfunc, *alias;
 	u8 uaccess_safe      : 1;
 	u8 static_call_tramp : 1;
-- 
GitLab


From 08ef8c40112b8cd157515cd532f65cb82c934a76 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:13 +0200
Subject: [PATCH 0118/4122] objtool: Allow symbol range comparisons for
 IBT/ENDBR

A semi common pattern is where code checks if a code address is
within a specific range. All text addresses require either ENDBR or
ANNOTATE_ENDBR, however the ANNOTATE_NOENDBR past the range is
unnatural.

Instead, suppress this warning when this is exactly at the end of a
symbol that itself starts with either ENDBR/ANNOTATE_ENDBR.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.434642471@infradead.org
---
 arch/x86/entry/entry_64_compat.S |  1 -
 tools/objtool/check.c            | 28 ++++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 1dfee868d4a10..bc45ea7d08ee7 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -128,7 +128,6 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL)
 	popfq
 	jmp	.Lsysenter_flags_fixed
 SYM_INNER_LABEL(__end_entry_SYSENTER_compat, SYM_L_GLOBAL)
-	ANNOTATE_NOENDBR // is_sysenter_singlestep
 SYM_CODE_END(entry_SYSENTER_compat)
 
 /*
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 1461c8894fb70..3f46f46ab85c3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4033,6 +4033,24 @@ static void mark_endbr_used(struct instruction *insn)
 		list_del_init(&insn->call_node);
 }
 
+static bool noendbr_range(struct objtool_file *file, struct instruction *insn)
+{
+	struct symbol *sym = find_symbol_containing(insn->sec, insn->offset-1);
+	struct instruction *first;
+
+	if (!sym)
+		return false;
+
+	first = find_insn(file, sym->sec, sym->offset);
+	if (!first)
+		return false;
+
+	if (first->type != INSN_ENDBR && !first->noendbr)
+		return false;
+
+	return insn->offset == sym->offset + sym->len;
+}
+
 static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn)
 {
 	struct instruction *dest;
@@ -4105,9 +4123,19 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
 			continue;
 		}
 
+		/*
+		 * Accept anything ANNOTATE_NOENDBR.
+		 */
 		if (dest->noendbr)
 			continue;
 
+		/*
+		 * Accept if this is the instruction after a symbol
+		 * that is (no)endbr -- typical code-range usage.
+		 */
+		if (noendbr_range(file, dest))
+			continue;
+
 		WARN_FUNC("relocation to !ENDBR: %s",
 			  insn->sec, insn->offset,
 			  offstr(dest->sec, dest->offset));
-- 
GitLab


From dbcdbdfdf137b49144204571f1a5e5dc01b8aaad Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 22 Sep 2022 22:03:50 +0200
Subject: [PATCH 0119/4122] objtool: Rework instruction -> symbol mapping

Currently insn->func contains a instruction -> symbol link for
STT_FUNC symbols. A NULL value is assumed to mean STT_NOTYPE.
However, there are also instructions not covered by any symbol at all.
This can happen due to __weak symbols for example.

Since the current scheme cannot differentiate between no symbol and
STT_NOTYPE symbol, change things around. Make insn->sym point to any
symbol type such that !insn->sym means no symbol and add a helper
insn_func() that check the sym->type to retain the old functionality.

This then prepares the way to add code that depends on the distinction
between STT_NOTYPE and no symbol at all.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c                 | 105 ++++++++++++++------------
 tools/objtool/include/objtool/check.h |  12 ++-
 2 files changed, 66 insertions(+), 51 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 3f46f46ab85c3..e532efb9b5ab3 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -62,12 +62,12 @@ static struct instruction *next_insn_same_func(struct objtool_file *file,
 					       struct instruction *insn)
 {
 	struct instruction *next = list_next_entry(insn, list);
-	struct symbol *func = insn->func;
+	struct symbol *func = insn_func(insn);
 
 	if (!func)
 		return NULL;
 
-	if (&next->list != &file->insn_list && next->func == func)
+	if (&next->list != &file->insn_list && insn_func(next) == func)
 		return next;
 
 	/* Check if we're already in the subfunction: */
@@ -83,7 +83,7 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 {
 	struct instruction *prev = list_prev_entry(insn, list);
 
-	if (&prev->list != &file->insn_list && prev->func == insn->func)
+	if (&prev->list != &file->insn_list && insn_func(prev) == insn_func(insn))
 		return prev;
 
 	return NULL;
@@ -133,7 +133,7 @@ static bool is_sibling_call(struct instruction *insn)
 	 * sibling call detection consistency between vmlinux.o and individual
 	 * objects.
 	 */
-	if (!insn->func)
+	if (!insn_func(insn))
 		return false;
 
 	/* An indirect jump is either a sibling call or a jump to a table. */
@@ -207,7 +207,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 		return false;
 
 	insn = find_insn(file, func->sec, func->offset);
-	if (!insn->func)
+	if (!insn_func(insn))
 		return false;
 
 	func_for_each_insn(file, func, insn) {
@@ -243,7 +243,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 				return false;
 			}
 
-			return __dead_end_function(file, dest->func, recursion+1);
+			return __dead_end_function(file, insn_func(dest), recursion+1);
 		}
 	}
 
@@ -427,7 +427,10 @@ static int decode_instructions(struct objtool_file *file)
 		}
 
 		list_for_each_entry(func, &sec->symbol_list, list) {
-			if (func->type != STT_FUNC || func->alias != func)
+			if (func->type != STT_NOTYPE && func->type != STT_FUNC)
+				continue;
+
+			if (func->return_thunk || func->alias != func)
 				continue;
 
 			if (!find_insn(file, sec, func->offset)) {
@@ -437,9 +440,11 @@ static int decode_instructions(struct objtool_file *file)
 			}
 
 			sym_for_each_insn(file, func, insn) {
-				insn->func = func;
-				if (insn->type == INSN_ENDBR && list_empty(&insn->call_node)) {
-					if (insn->offset == insn->func->offset) {
+				insn->sym = func;
+				if (func->type == STT_FUNC &&
+				    insn->type == INSN_ENDBR &&
+				    list_empty(&insn->call_node)) {
+					if (insn->offset == func->offset) {
 						list_add_tail(&insn->call_node, &file->endbr_list);
 						file->nr_endbr++;
 					} else {
@@ -1397,19 +1402,19 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
 
 static bool same_function(struct instruction *insn1, struct instruction *insn2)
 {
-	return insn1->func->pfunc == insn2->func->pfunc;
+	return insn_func(insn1)->pfunc == insn_func(insn2)->pfunc;
 }
 
 static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn)
 {
-	if (insn->offset == insn->func->offset)
+	if (insn->offset == insn_func(insn)->offset)
 		return true;
 
 	if (opts.ibt) {
 		struct instruction *prev = prev_insn_same_sym(file, insn);
 
 		if (prev && prev->type == INSN_ENDBR &&
-		    insn->offset == insn->func->offset + prev->len)
+		    insn->offset == insn_func(insn)->offset + prev->len)
 			return true;
 	}
 
@@ -1450,7 +1455,7 @@ static int add_jump_destinations(struct objtool_file *file)
 		} else if (reloc->sym->return_thunk) {
 			add_return_call(file, insn, true);
 			continue;
-		} else if (insn->func) {
+		} else if (insn_func(insn)) {
 			/*
 			 * External sibling call or internal sibling call with
 			 * STT_FUNC reloc.
@@ -1492,8 +1497,8 @@ static int add_jump_destinations(struct objtool_file *file)
 		/*
 		 * Cross-function jump.
 		 */
-		if (insn->func && jump_dest->func &&
-		    insn->func != jump_dest->func) {
+		if (insn_func(insn) && insn_func(jump_dest) &&
+		    insn_func(insn) != insn_func(jump_dest)) {
 
 			/*
 			 * For GCC 8+, create parent/child links for any cold
@@ -1510,10 +1515,10 @@ static int add_jump_destinations(struct objtool_file *file)
 			 * case where the parent function's only reference to a
 			 * subfunction is through a jump table.
 			 */
-			if (!strstr(insn->func->name, ".cold") &&
-			    strstr(jump_dest->func->name, ".cold")) {
-				insn->func->cfunc = jump_dest->func;
-				jump_dest->func->pfunc = insn->func;
+			if (!strstr(insn_func(insn)->name, ".cold") &&
+			    strstr(insn_func(jump_dest)->name, ".cold")) {
+				insn_func(insn)->cfunc = insn_func(jump_dest);
+				insn_func(jump_dest)->pfunc = insn_func(insn);
 
 			} else if (!same_function(insn, jump_dest) &&
 				   is_first_func_insn(file, jump_dest)) {
@@ -1521,7 +1526,7 @@ static int add_jump_destinations(struct objtool_file *file)
 				 * Internal sibling call without reloc or with
 				 * STT_SECTION reloc.
 				 */
-				add_call_dest(file, insn, jump_dest->func, true);
+				add_call_dest(file, insn, insn_func(jump_dest), true);
 				continue;
 			}
 		}
@@ -1572,7 +1577,7 @@ static int add_call_destinations(struct objtool_file *file)
 				return -1;
 			}
 
-			if (insn->func && insn->call_dest->type != STT_FUNC) {
+			if (insn_func(insn) && insn->call_dest->type != STT_FUNC) {
 				WARN_FUNC("unsupported call to non-function",
 					  insn->sec, insn->offset);
 				return -1;
@@ -1668,7 +1673,7 @@ static int handle_group_alt(struct objtool_file *file,
 		nop->offset = special_alt->new_off + special_alt->new_len;
 		nop->len = special_alt->orig_len - special_alt->new_len;
 		nop->type = INSN_NOP;
-		nop->func = orig_insn->func;
+		nop->sym = orig_insn->sym;
 		nop->alt_group = new_alt_group;
 		nop->ignore = orig_insn->ignore_alts;
 	}
@@ -1688,7 +1693,7 @@ static int handle_group_alt(struct objtool_file *file,
 		last_new_insn = insn;
 
 		insn->ignore = orig_insn->ignore_alts;
-		insn->func = orig_insn->func;
+		insn->sym = orig_insn->sym;
 		insn->alt_group = new_alt_group;
 
 		/*
@@ -1882,7 +1887,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 	struct reloc *reloc = table;
 	struct instruction *dest_insn;
 	struct alternative *alt;
-	struct symbol *pfunc = insn->func->pfunc;
+	struct symbol *pfunc = insn_func(insn)->pfunc;
 	unsigned int prev_offset = 0;
 
 	/*
@@ -1909,7 +1914,7 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
 			break;
 
 		/* Make sure the destination is in the same function: */
-		if (!dest_insn->func || dest_insn->func->pfunc != pfunc)
+		if (!insn_func(dest_insn) || insn_func(dest_insn)->pfunc != pfunc)
 			break;
 
 		alt = malloc(sizeof(*alt));
@@ -1949,7 +1954,7 @@ static struct reloc *find_jump_table(struct objtool_file *file,
 	 * it.
 	 */
 	for (;
-	     insn && insn->func && insn->func->pfunc == func;
+	     insn && insn_func(insn) && insn_func(insn)->pfunc == func;
 	     insn = insn->first_jump_src ?: prev_insn_same_sym(file, insn)) {
 
 		if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC)
@@ -1966,7 +1971,7 @@ static struct reloc *find_jump_table(struct objtool_file *file,
 		if (!table_reloc)
 			continue;
 		dest_insn = find_insn(file, table_reloc->sym->sec, table_reloc->addend);
-		if (!dest_insn || !dest_insn->func || dest_insn->func->pfunc != func)
+		if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
 			continue;
 
 		return table_reloc;
@@ -2415,6 +2420,13 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
+	/*
+	 * Must be before add_{jump_call}_destination.
+	 */
+	ret = classify_symbols(file);
+	if (ret)
+		return ret;
+
 	ret = decode_instructions(file);
 	if (ret)
 		return ret;
@@ -2433,13 +2445,6 @@ static int decode_sections(struct objtool_file *file)
 	if (ret)
 		return ret;
 
-	/*
-	 * Must be before add_{jump_call}_destination.
-	 */
-	ret = classify_symbols(file);
-	if (ret)
-		return ret;
-
 	/*
 	 * Must be before add_jump_destinations(), which depends on 'func'
 	 * being set for alternatives, to enable proper sibling call detection.
@@ -2648,7 +2653,7 @@ static int update_cfi_state(struct instruction *insn,
 
 	/* stack operations don't make sense with an undefined CFA */
 	if (cfa->base == CFI_UNDEFINED) {
-		if (insn->func) {
+		if (insn_func(insn)) {
 			WARN_FUNC("undefined stack state", insn->sec, insn->offset);
 			return -1;
 		}
@@ -2994,7 +2999,7 @@ static int update_cfi_state(struct instruction *insn,
 		}
 
 		/* detect when asm code uses rbp as a scratch register */
-		if (opts.stackval && insn->func && op->src.reg == CFI_BP &&
+		if (opts.stackval && insn_func(insn) && op->src.reg == CFI_BP &&
 		    cfa->base != CFI_BP)
 			cfi->bp_scratch = true;
 		break;
@@ -3390,13 +3395,13 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 	while (1) {
 		next_insn = next_insn_to_validate(file, insn);
 
-		if (func && insn->func && func != insn->func->pfunc) {
+		if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
 			/* Ignore KCFI type preambles, which always fall through */
 			if (!strncmp(func->name, "__cfi_", 6))
 				return 0;
 
 			WARN("%s() falls through to next function %s()",
-			     func->name, insn->func->name);
+			     func->name, insn_func(insn)->name);
 			return 1;
 		}
 
@@ -3638,7 +3643,7 @@ static int validate_unwind_hints(struct objtool_file *file, struct section *sec)
 
 	while (&insn->list != &file->insn_list && (!sec || insn->sec == sec)) {
 		if (insn->hint && !insn->visited && !insn->ignore) {
-			ret = validate_branch(file, insn->func, insn, state);
+			ret = validate_branch(file, insn_func(insn), insn, state);
 			if (ret && opts.backtrace)
 				BT_FUNC("<=== (hint)", insn);
 			warnings += ret;
@@ -3861,7 +3866,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	 * In this case we'll find a piece of code (whole function) that is not
 	 * covered by a !section symbol. Ignore them.
 	 */
-	if (opts.link && !insn->func) {
+	if (opts.link && !insn_func(insn)) {
 		int size = find_symbol_hole_containing(insn->sec, insn->offset);
 		unsigned long end = insn->offset + size;
 
@@ -3885,10 +3890,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 			/*
 			 * If this hole jumps to a .cold function, mark it ignore too.
 			 */
-			if (insn->jump_dest && insn->jump_dest->func &&
-			    strstr(insn->jump_dest->func->name, ".cold")) {
+			if (insn->jump_dest && insn_func(insn->jump_dest) &&
+			    strstr(insn_func(insn->jump_dest)->name, ".cold")) {
 				struct instruction *dest = insn->jump_dest;
-				func_for_each_insn(file, dest->func, dest)
+				func_for_each_insn(file, insn_func(dest), dest)
 					dest->ignore = true;
 			}
 		}
@@ -3896,10 +3901,10 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 		return false;
 	}
 
-	if (!insn->func)
+	if (!insn_func(insn))
 		return false;
 
-	if (insn->func->static_call_tramp)
+	if (insn_func(insn)->static_call_tramp)
 		return true;
 
 	/*
@@ -3930,7 +3935,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 
 		if (insn->type == INSN_JUMP_UNCONDITIONAL) {
 			if (insn->jump_dest &&
-			    insn->jump_dest->func == insn->func) {
+			    insn_func(insn->jump_dest) == insn_func(insn)) {
 				insn = insn->jump_dest;
 				continue;
 			}
@@ -3938,7 +3943,7 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 			break;
 		}
 
-		if (insn->offset + insn->len >= insn->func->offset + insn->func->len)
+		if (insn->offset + insn->len >= insn_func(insn)->offset + insn_func(insn)->len)
 			break;
 
 		insn = list_next_entry(insn, list);
@@ -3967,7 +3972,7 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
 
 	state->uaccess = sym->uaccess_safe;
 
-	ret = validate_branch(file, insn->func, insn, *state);
+	ret = validate_branch(file, insn_func(insn), insn, *state);
 	if (ret && opts.backtrace)
 		BT_FUNC("<=== (sym)", insn);
 	return ret;
@@ -4104,7 +4109,7 @@ static int validate_ibt_insn(struct objtool_file *file, struct instruction *insn
 			continue;
 		}
 
-		if (dest->func && dest->func == insn->func) {
+		if (insn_func(dest) && insn_func(dest) == insn_func(insn)) {
 			/*
 			 * Anything from->to self is either _THIS_IP_ or
 			 * IRET-to-self.
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index 036129cebeeef..acd7fae593484 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -67,11 +67,21 @@ struct instruction {
 	struct reloc *jump_table;
 	struct reloc *reloc;
 	struct list_head alts;
-	struct symbol *func;
+	struct symbol *sym;
 	struct list_head stack_ops;
 	struct cfi_state *cfi;
 };
 
+static inline struct symbol *insn_func(struct instruction *insn)
+{
+	struct symbol *sym = insn->sym;
+
+	if (sym && sym->type != STT_FUNC)
+		sym = NULL;
+
+	return sym;
+}
+
 #define VISITED_BRANCH		0x01
 #define VISITED_BRANCH_UACCESS	0x02
 #define VISITED_BRANCH_MASK	0x03
-- 
GitLab


From 5a9c361a416fc3a3301e859ff09587cc1b933eb8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 11 Jul 2022 11:49:50 +0200
Subject: [PATCH 0120/4122] objtool: Allow STT_NOTYPE -> STT_FUNC+0
 sibling-calls

Teach objtool about STT_NOTYPE -> STT_FUNC+0 sibling calls. Doing do
allows slightly simpler .S files.

There is a slight complication in that we specifically do not want to
allow sibling calls from symbol holes (previously covered by STT_WEAK
symbols) -- such things exist where a weak function has a .cold
subfunction for example.

Additionally, STT_NOTYPE tail-calls are allowed to happen with a
modified stack frame, they don't need to obey the normal rules after
all.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c | 74 +++++++++++++++++++++++++++----------------
 1 file changed, 47 insertions(+), 27 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e532efb9b5ab3..7936312e10c79 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -129,16 +129,13 @@ static bool is_jump_table_jump(struct instruction *insn)
 static bool is_sibling_call(struct instruction *insn)
 {
 	/*
-	 * Assume only ELF functions can make sibling calls.  This ensures
-	 * sibling call detection consistency between vmlinux.o and individual
-	 * objects.
+	 * Assume only STT_FUNC calls have jump-tables.
 	 */
-	if (!insn_func(insn))
-		return false;
-
-	/* An indirect jump is either a sibling call or a jump to a table. */
-	if (insn->type == INSN_JUMP_DYNAMIC)
-		return !is_jump_table_jump(insn);
+	if (insn_func(insn)) {
+		/* An indirect jump is either a sibling call or a jump to a table. */
+		if (insn->type == INSN_JUMP_DYNAMIC)
+			return !is_jump_table_jump(insn);
+	}
 
 	/* add_jump_destinations() sets insn->call_dest for sibling calls. */
 	return (is_static_jump(insn) && insn->call_dest);
@@ -1400,27 +1397,50 @@ static void add_return_call(struct objtool_file *file, struct instruction *insn,
 		list_add_tail(&insn->call_node, &file->return_thunk_list);
 }
 
-static bool same_function(struct instruction *insn1, struct instruction *insn2)
-{
-	return insn_func(insn1)->pfunc == insn_func(insn2)->pfunc;
-}
-
-static bool is_first_func_insn(struct objtool_file *file, struct instruction *insn)
+static bool is_first_func_insn(struct objtool_file *file,
+			       struct instruction *insn, struct symbol *sym)
 {
-	if (insn->offset == insn_func(insn)->offset)
+	if (insn->offset == sym->offset)
 		return true;
 
+	/* Allow direct CALL/JMP past ENDBR */
 	if (opts.ibt) {
 		struct instruction *prev = prev_insn_same_sym(file, insn);
 
 		if (prev && prev->type == INSN_ENDBR &&
-		    insn->offset == insn_func(insn)->offset + prev->len)
+		    insn->offset == sym->offset + prev->len)
 			return true;
 	}
 
 	return false;
 }
 
+/*
+ * A sibling call is a tail-call to another symbol -- to differentiate from a
+ * recursive tail-call which is to the same symbol.
+ */
+static bool jump_is_sibling_call(struct objtool_file *file,
+				 struct instruction *from, struct instruction *to)
+{
+	struct symbol *fs = from->sym;
+	struct symbol *ts = to->sym;
+
+	/* Not a sibling call if from/to a symbol hole */
+	if (!fs || !ts)
+		return false;
+
+	/* Not a sibling call if not targeting the start of a symbol. */
+	if (!is_first_func_insn(file, to, ts))
+		return false;
+
+	/* Disallow sibling calls into STT_NOTYPE */
+	if (ts->type == STT_NOTYPE)
+		return false;
+
+	/* Must not be self to be a sibling */
+	return fs->pfunc != ts->pfunc;
+}
+
 /*
  * Find the destination instructions for all jumps.
  */
@@ -1519,18 +1539,18 @@ static int add_jump_destinations(struct objtool_file *file)
 			    strstr(insn_func(jump_dest)->name, ".cold")) {
 				insn_func(insn)->cfunc = insn_func(jump_dest);
 				insn_func(jump_dest)->pfunc = insn_func(insn);
-
-			} else if (!same_function(insn, jump_dest) &&
-				   is_first_func_insn(file, jump_dest)) {
-				/*
-				 * Internal sibling call without reloc or with
-				 * STT_SECTION reloc.
-				 */
-				add_call_dest(file, insn, insn_func(jump_dest), true);
-				continue;
 			}
 		}
 
+		if (jump_is_sibling_call(file, insn, jump_dest)) {
+			/*
+			 * Internal sibling call without reloc or with
+			 * STT_SECTION reloc.
+			 */
+			add_call_dest(file, insn, insn_func(jump_dest), true);
+			continue;
+		}
+
 		insn->jump_dest = jump_dest;
 	}
 
@@ -3309,7 +3329,7 @@ static int validate_sibling_call(struct objtool_file *file,
 				 struct instruction *insn,
 				 struct insn_state *state)
 {
-	if (has_modified_stack_frame(insn, state)) {
+	if (insn_func(insn) && has_modified_stack_frame(insn, state)) {
 		WARN_FUNC("sibling call from callable instruction with modified stack frame",
 				insn->sec, insn->offset);
 		return 1;
-- 
GitLab


From ef79ed20e3ae9ee9ac2e0f3a4e12814893972e63 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:14 +0200
Subject: [PATCH 0121/4122] x86/entry: Make sync_regs() invocation a tail call

No point in having a call there. Spare the call/ret overhead.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.539578813@infradead.org
---
 arch/x86/entry/entry_64.S | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 5c578a7dfcd79..b24b84b3425fb 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -1062,11 +1062,8 @@ SYM_CODE_START_LOCAL(error_entry)
 	UNTRAIN_RET
 
 	leaq	8(%rsp), %rdi			/* arg0 = pt_regs pointer */
-.Lerror_entry_from_usermode_after_swapgs:
-
 	/* Put us onto the real thread stack. */
-	call	sync_regs
-	RET
+	jmp	sync_regs
 
 	/*
 	 * There are two places in the kernel that can potentially fault with
@@ -1124,7 +1121,7 @@ SYM_CODE_START_LOCAL(error_entry)
 	leaq	8(%rsp), %rdi			/* arg0 = pt_regs pointer */
 	call	fixup_bad_iret
 	mov	%rax, %rdi
-	jmp	.Lerror_entry_from_usermode_after_swapgs
+	jmp	sync_regs
 SYM_CODE_END(error_entry)
 
 SYM_CODE_START_LOCAL(error_return)
-- 
GitLab


From cb855971d717a2dd752241f66fedad9dc178388c Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:16 +0200
Subject: [PATCH 0122/4122] x86/putuser: Provide room for padding

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.746429822@infradead.org
---
 arch/x86/lib/putuser.S | 62 +++++++++++++++++++++++++++++++++---------
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index b7dfd60243b75..32125224fccaa 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -47,8 +47,6 @@ SYM_FUNC_START(__put_user_1)
 	LOAD_TASK_SIZE_MINUS_N(0)
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
-	ENDBR
 	ASM_STAC
 1:	movb %al,(%_ASM_CX)
 	xor %ecx,%ecx
@@ -56,54 +54,87 @@ SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
 	RET
 SYM_FUNC_END(__put_user_1)
 EXPORT_SYMBOL(__put_user_1)
+
+SYM_FUNC_START(__put_user_nocheck_1)
+	ENDBR
+	ASM_STAC
+2:	movb %al,(%_ASM_CX)
+	xor %ecx,%ecx
+	ASM_CLAC
+	RET
+SYM_FUNC_END(__put_user_nocheck_1)
 EXPORT_SYMBOL(__put_user_nocheck_1)
 
 SYM_FUNC_START(__put_user_2)
 	LOAD_TASK_SIZE_MINUS_N(1)
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
-	ENDBR
 	ASM_STAC
-2:	movw %ax,(%_ASM_CX)
+3:	movw %ax,(%_ASM_CX)
 	xor %ecx,%ecx
 	ASM_CLAC
 	RET
 SYM_FUNC_END(__put_user_2)
 EXPORT_SYMBOL(__put_user_2)
+
+SYM_FUNC_START(__put_user_nocheck_2)
+	ENDBR
+	ASM_STAC
+4:	movw %ax,(%_ASM_CX)
+	xor %ecx,%ecx
+	ASM_CLAC
+	RET
+SYM_FUNC_END(__put_user_nocheck_2)
 EXPORT_SYMBOL(__put_user_nocheck_2)
 
 SYM_FUNC_START(__put_user_4)
 	LOAD_TASK_SIZE_MINUS_N(3)
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
-	ENDBR
 	ASM_STAC
-3:	movl %eax,(%_ASM_CX)
+5:	movl %eax,(%_ASM_CX)
 	xor %ecx,%ecx
 	ASM_CLAC
 	RET
 SYM_FUNC_END(__put_user_4)
 EXPORT_SYMBOL(__put_user_4)
+
+SYM_FUNC_START(__put_user_nocheck_4)
+	ENDBR
+	ASM_STAC
+6:	movl %eax,(%_ASM_CX)
+	xor %ecx,%ecx
+	ASM_CLAC
+	RET
+SYM_FUNC_END(__put_user_nocheck_4)
 EXPORT_SYMBOL(__put_user_nocheck_4)
 
 SYM_FUNC_START(__put_user_8)
 	LOAD_TASK_SIZE_MINUS_N(7)
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
-SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
-	ENDBR
 	ASM_STAC
-4:	mov %_ASM_AX,(%_ASM_CX)
+7:	mov %_ASM_AX,(%_ASM_CX)
 #ifdef CONFIG_X86_32
-5:	movl %edx,4(%_ASM_CX)
+8:	movl %edx,4(%_ASM_CX)
 #endif
 	xor %ecx,%ecx
 	ASM_CLAC
 	RET
 SYM_FUNC_END(__put_user_8)
 EXPORT_SYMBOL(__put_user_8)
+
+SYM_FUNC_START(__put_user_nocheck_8)
+	ENDBR
+	ASM_STAC
+9:	mov %_ASM_AX,(%_ASM_CX)
+#ifdef CONFIG_X86_32
+10:	movl %edx,4(%_ASM_CX)
+#endif
+	xor %ecx,%ecx
+	ASM_CLAC
+	RET
+SYM_FUNC_END(__put_user_nocheck_8)
 EXPORT_SYMBOL(__put_user_nocheck_8)
 
 SYM_CODE_START_LOCAL(.Lbad_put_user_clac)
@@ -117,6 +148,11 @@ SYM_CODE_END(.Lbad_put_user_clac)
 	_ASM_EXTABLE_UA(2b, .Lbad_put_user_clac)
 	_ASM_EXTABLE_UA(3b, .Lbad_put_user_clac)
 	_ASM_EXTABLE_UA(4b, .Lbad_put_user_clac)
-#ifdef CONFIG_X86_32
 	_ASM_EXTABLE_UA(5b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(6b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(7b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(9b, .Lbad_put_user_clac)
+#ifdef CONFIG_X86_32
+	_ASM_EXTABLE_UA(8b, .Lbad_put_user_clac)
+	_ASM_EXTABLE_UA(10b, .Lbad_put_user_clac)
 #endif
-- 
GitLab


From 8f7c0d8b23c3f5f740a48db31ebadef28af17a22 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:17 +0200
Subject: [PATCH 0123/4122] x86/Kconfig: Add CONFIG_CALL_THUNKS

In preparation for mitigating the Intel SKL RSB underflow issue in
software, add a new configuration symbol which allows to build the
required call thunk infrastructure conditionally.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.849523555@infradead.org
---
 arch/x86/Kconfig | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f408fa87ed947..e18963e77cb1d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2446,6 +2446,14 @@ config CC_HAS_SLS
 config CC_HAS_RETURN_THUNK
 	def_bool $(cc-option,-mfunction-return=thunk-extern)
 
+config HAVE_CALL_THUNKS
+	def_bool y
+	depends on RETHUNK && OBJTOOL
+
+config CALL_THUNKS
+	def_bool n
+	select FUNCTION_ALIGNMENT_16B
+
 menuconfig SPECULATION_MITIGATIONS
 	bool "Mitigations for speculative execution vulnerabilities"
 	default y
-- 
GitLab


From bea75b33895f7f87f0c40023e36a2d087e87ffa1 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:18 +0200
Subject: [PATCH 0124/4122] x86/Kconfig: Introduce function padding

Now that all functions are 16 byte aligned, add 16 bytes of NOP
padding in front of each function. This prepares things for software
call stack tracking and kCFI/FineIBT.

This significantly increases kernel .text size, around 5.1% on a
x86_64-defconfig-ish build.

However, per the random access argument used for alignment, these 16
extra bytes are code that wouldn't be used. Performance measurements
back this up by showing no significant performance regressions.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111146.950884492@infradead.org
---
 arch/x86/Kconfig               | 20 ++++++++++++-
 arch/x86/Makefile              |  6 ++++
 arch/x86/entry/vdso/Makefile   |  3 +-
 arch/x86/include/asm/linkage.h | 51 ++++++++++++++++++++++++++++++++--
 include/linux/bpf.h            |  4 +++
 5 files changed, 79 insertions(+), 5 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e18963e77cb1d..e368fc0daa4ac 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2446,9 +2446,27 @@ config CC_HAS_SLS
 config CC_HAS_RETURN_THUNK
 	def_bool $(cc-option,-mfunction-return=thunk-extern)
 
+config CC_HAS_ENTRY_PADDING
+	def_bool $(cc-option,-fpatchable-function-entry=16,16)
+
+config FUNCTION_PADDING_CFI
+	int
+	default 59 if FUNCTION_ALIGNMENT_64B
+	default 27 if FUNCTION_ALIGNMENT_32B
+	default 11 if FUNCTION_ALIGNMENT_16B
+	default  3 if FUNCTION_ALIGNMENT_8B
+	default  0
+
+# Basically: FUNCTION_ALIGNMENT - 5*CFI_CLANG
+# except Kconfig can't do arithmetic :/
+config FUNCTION_PADDING_BYTES
+	int
+	default FUNCTION_PADDING_CFI if CFI_CLANG
+	default FUNCTION_ALIGNMENT
+
 config HAVE_CALL_THUNKS
 	def_bool y
-	depends on RETHUNK && OBJTOOL
+	depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
 
 config CALL_THUNKS
 	def_bool n
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 415a5d138de47..1640e005092b9 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -208,6 +208,12 @@ ifdef CONFIG_SLS
   KBUILD_CFLAGS += -mharden-sls=all
 endif
 
+ifdef CONFIG_CALL_THUNKS
+PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
+KBUILD_CFLAGS += $(PADDING_CFLAGS)
+export PADDING_CFLAGS
+endif
+
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
 
 ifdef CONFIG_LTO_CLANG
diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile
index 3ef611044c8f6..838613ac15b82 100644
--- a/arch/x86/entry/vdso/Makefile
+++ b/arch/x86/entry/vdso/Makefile
@@ -95,7 +95,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
 endif
 endif
 
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(PADDING_CFLAGS) $(CC_FLAGS_LTO) $(CC_FLAGS_CFI) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 $(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
 
 #
@@ -158,6 +158,7 @@ KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_CFI),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(PADDING_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
 KBUILD_CFLAGS_32 += -fno-stack-protector
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index c2d6e2733b111..45e0df850645c 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -15,8 +15,19 @@
 #define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
 #define __ALIGN_STR	__stringify(__ALIGN)
 
-#define ASM_FUNC_ALIGN		__ALIGN_STR
-#define __FUNC_ALIGN		__ALIGN
+#if defined(CONFIG_CALL_THUNKS) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define FUNCTION_PADDING	.skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
+#else
+#define FUNCTION_PADDING
+#endif
+
+#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO)
+# define __FUNC_ALIGN		__ALIGN; FUNCTION_PADDING
+#else
+# define __FUNC_ALIGN		__ALIGN
+#endif
+
+#define ASM_FUNC_ALIGN		__stringify(__FUNC_ALIGN)
 #define SYM_F_ALIGN		__FUNC_ALIGN
 
 #ifdef __ASSEMBLY__
@@ -45,11 +56,45 @@
 
 #endif /* __ASSEMBLY__ */
 
+/*
+ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_THUNKS) the
+ * CFI symbol layout changes.
+ *
+ * Without CALL_THUNKS:
+ *
+ * 	.align	FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * 	.skip	FUNCTION_PADDING, 0x90
+ * 	.byte   0xb8
+ * 	.long	__kcfi_typeid_##name
+ * name:
+ *
+ * With CALL_THUNKS:
+ *
+ * 	.align FUNCTION_ALIGNMENT
+ * __cfi_##name:
+ * 	.byte	0xb8
+ * 	.long	__kcfi_typeid_##name
+ * 	.skip	FUNCTION_PADDING, 0x90
+ * name:
+ *
+ * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
+ */
+
+#ifdef CONFIG_CALL_THUNKS
+#define CFI_PRE_PADDING
+#define CFI_POST_PADDING	.skip	CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#else
+#define CFI_PRE_PADDING		.skip	CONFIG_FUNCTION_PADDING_BYTES, 0x90;
+#define CFI_POST_PADDING
+#endif
+
 #define __CFI_TYPE(name)					\
 	SYM_START(__cfi_##name, SYM_L_LOCAL, SYM_A_NONE)	\
-	.fill 11, 1, 0x90 ASM_NL				\
+	CFI_PRE_PADDING						\
 	.byte 0xb8 ASM_NL					\
 	.long __kcfi_typeid_##name ASM_NL			\
+	CFI_POST_PADDING					\
 	SYM_FUNC_END(__cfi_##name)
 
 /* SYM_TYPED_FUNC_START -- use for indirectly called globals, w/ CFI type */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9e7d46d16032f..5296aea9b5b40 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -984,7 +984,11 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 }
 
 #ifdef CONFIG_X86_64
+#ifdef CONFIG_CALL_THUNKS
+#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5+CONFIG_FUNCTION_PADDING_BYTES,CONFIG_FUNCTION_PADDING_BYTES)))
+#else
 #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5)))
+#endif
 #else
 #define BPF_DISPATCHER_ATTRIBUTES
 #endif
-- 
GitLab


From 80e4c1cd42fff110bfdae8fce7ac4f22465f9664 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:19 +0200
Subject: [PATCH 0125/4122] x86/retbleed: Add X86_FEATURE_CALL_DEPTH

Intel SKL CPUs fall back to other predictors when the RSB underflows. The
only microcode mitigation is IBRS which is insanely expensive. It comes
with performance drops of up to 30% depending on the workload.

A way less expensive, but nevertheless horrible mitigation is to track the
call depth in software and overeagerly fill the RSB when returns underflow
the software counter.

Provide a configuration symbol and a CPU misfeature bit.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.056176424@infradead.org
---
 arch/x86/Kconfig                         | 19 +++++++++++++++++++
 arch/x86/include/asm/cpufeatures.h       |  1 +
 arch/x86/include/asm/disabled-features.h |  9 ++++++++-
 3 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e368fc0daa4ac..6ae7fa4b8eb7d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2523,6 +2523,25 @@ config CPU_UNRET_ENTRY
 	help
 	  Compile the kernel with support for the retbleed=unret mitigation.
 
+config CALL_DEPTH_TRACKING
+	bool "Mitigate RSB underflow with call depth tracking"
+	depends on CPU_SUP_INTEL && HAVE_CALL_THUNKS
+	select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
+	select CALL_THUNKS
+	default y
+	help
+	  Compile the kernel with call depth tracking to mitigate the Intel
+	  SKL Return-Speculation-Buffer (RSB) underflow issue. The
+	  mitigation is off by default and needs to be enabled on the
+	  kernel command line via the retbleed=stuff option. For
+	  non-affected systems the overhead of this option is marginal as
+	  the call depth tracking is using run-time generated call thunks
+	  in a compiler generated padding area and call patching. This
+	  increases text size by ~5%. For non affected systems this space
+	  is unused. On affected SKL systems this results in a significant
+	  performance gain over the IBRS mitigation.
+
+
 config CPU_IBPB_ENTRY
 	bool "Enable IBPB on kernel entry"
 	depends on CPU_SUP_AMD && X86_64
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b71f4f2ecdd57..aefd0816a333f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -304,6 +304,7 @@
 #define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */
 #define X86_FEATURE_USE_IBPB_FW		(11*32+16) /* "" Use IBPB during runtime firmware calls */
 #define X86_FEATURE_RSB_VMEXIT_LITE	(11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
+#define X86_FEATURE_CALL_DEPTH		(11*32+18) /* "" Call depth tracking for RSB stuffing */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index 33d2cd04d2544..bbb03b25263e7 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -69,6 +69,12 @@
 # define DISABLE_UNRET		(1 << (X86_FEATURE_UNRET & 31))
 #endif
 
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+# define DISABLE_CALL_DEPTH_TRACKING	0
+#else
+# define DISABLE_CALL_DEPTH_TRACKING	(1 << (X86_FEATURE_CALL_DEPTH & 31))
+#endif
+
 #ifdef CONFIG_INTEL_IOMMU_SVM
 # define DISABLE_ENQCMD		0
 #else
@@ -101,7 +107,8 @@
 #define DISABLED_MASK8	(DISABLE_TDX_GUEST)
 #define DISABLED_MASK9	(DISABLE_SGX)
 #define DISABLED_MASK10	0
-#define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
+#define DISABLED_MASK11	(DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET| \
+			 DISABLE_CALL_DEPTH_TRACKING)
 #define DISABLED_MASK12	0
 #define DISABLED_MASK13	0
 #define DISABLED_MASK14	0
-- 
GitLab


From fe54d0793796ccdb213d8ea7bff0b49903b6afaa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:20 +0200
Subject: [PATCH 0126/4122] x86/alternatives: Provide text_poke_copy_locked()

The upcoming call thunk patching must hold text_mutex and needs access to
text_poke_copy(), which takes text_mutex.

Provide a _locked postfixed variant to expose the inner workings.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.159977224@infradead.org
---
 arch/x86/include/asm/text-patching.h |  1 +
 arch/x86/kernel/alternative.c        | 37 +++++++++++++++++-----------
 2 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/arch/x86/include/asm/text-patching.h b/arch/x86/include/asm/text-patching.h
index 1cc15528ce29b..f4b87f08f5c50 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -45,6 +45,7 @@ extern void *text_poke(void *addr, const void *opcode, size_t len);
 extern void text_poke_sync(void);
 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len);
 extern void *text_poke_copy(void *addr, const void *opcode, size_t len);
+extern void *text_poke_copy_locked(void *addr, const void *opcode, size_t len, bool core_ok);
 extern void *text_poke_set(void *addr, int c, size_t len);
 extern int poke_int3_handler(struct pt_regs *regs);
 extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate);
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cadcea035e04..fad3c0e4838ea 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1236,27 +1236,15 @@ void *text_poke_kgdb(void *addr, const void *opcode, size_t len)
 	return __text_poke(text_poke_memcpy, addr, opcode, len);
 }
 
-/**
- * text_poke_copy - Copy instructions into (an unused part of) RX memory
- * @addr: address to modify
- * @opcode: source of the copy
- * @len: length to copy, could be more than 2x PAGE_SIZE
- *
- * Not safe against concurrent execution; useful for JITs to dump
- * new code blocks into unused regions of RX memory. Can be used in
- * conjunction with synchronize_rcu_tasks() to wait for existing
- * execution to quiesce after having made sure no existing functions
- * pointers are live.
- */
-void *text_poke_copy(void *addr, const void *opcode, size_t len)
+void *text_poke_copy_locked(void *addr, const void *opcode, size_t len,
+			    bool core_ok)
 {
 	unsigned long start = (unsigned long)addr;
 	size_t patched = 0;
 
-	if (WARN_ON_ONCE(core_kernel_text(start)))
+	if (WARN_ON_ONCE(!core_ok && core_kernel_text(start)))
 		return NULL;
 
-	mutex_lock(&text_mutex);
 	while (patched < len) {
 		unsigned long ptr = start + patched;
 		size_t s;
@@ -1266,6 +1254,25 @@ void *text_poke_copy(void *addr, const void *opcode, size_t len)
 		__text_poke(text_poke_memcpy, (void *)ptr, opcode + patched, s);
 		patched += s;
 	}
+	return addr;
+}
+
+/**
+ * text_poke_copy - Copy instructions into (an unused part of) RX memory
+ * @addr: address to modify
+ * @opcode: source of the copy
+ * @len: length to copy, could be more than 2x PAGE_SIZE
+ *
+ * Not safe against concurrent execution; useful for JITs to dump
+ * new code blocks into unused regions of RX memory. Can be used in
+ * conjunction with synchronize_rcu_tasks() to wait for existing
+ * execution to quiesce after having made sure no existing functions
+ * pointers are live.
+ */
+void *text_poke_copy(void *addr, const void *opcode, size_t len)
+{
+	mutex_lock(&text_mutex);
+	addr = text_poke_copy_locked(addr, opcode, len, false);
 	mutex_unlock(&text_mutex);
 	return addr;
 }
-- 
GitLab


From c22cf380c79c4bb0e502b0343f57271b17626424 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:21 +0200
Subject: [PATCH 0127/4122] x86/entry: Make some entry symbols global

paranoid_entry(), error_entry() and xen_error_entry() have to be
exempted from call accounting by thunk patching because they are
before UNTRAIN_RET.

Expose them so they are available in the alternative code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.265598113@infradead.org
---
 arch/x86/entry/entry_64.S | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index b24b84b3425fb..4cc0125fdfdc2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -327,7 +327,8 @@ SYM_CODE_END(ret_from_fork)
 #endif
 .endm
 
-SYM_CODE_START_LOCAL(xen_error_entry)
+SYM_CODE_START(xen_error_entry)
+	ANNOTATE_NOENDBR
 	UNWIND_HINT_FUNC
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
@@ -906,7 +907,8 @@ SYM_CODE_END(xen_failsafe_callback)
  * R14 - old CR3
  * R15 - old SPEC_CTRL
  */
-SYM_CODE_START_LOCAL(paranoid_entry)
+SYM_CODE_START(paranoid_entry)
+	ANNOTATE_NOENDBR
 	UNWIND_HINT_FUNC
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
@@ -1041,7 +1043,8 @@ SYM_CODE_END(paranoid_exit)
 /*
  * Switch GS and CR3 if needed.
  */
-SYM_CODE_START_LOCAL(error_entry)
+SYM_CODE_START(error_entry)
+	ANNOTATE_NOENDBR
 	UNWIND_HINT_FUNC
 
 	PUSH_AND_CLEAR_REGS save_ret=1
-- 
GitLab


From 239f2e248ef12840178a3ed1a217f19b5fbfde26 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:22 +0200
Subject: [PATCH 0128/4122] x86/paravirt: Make struct paravirt_call_site
 unconditionally available

For the upcoming call thunk patching it's less ifdeffery when the data
structure is unconditionally available. The code can then be trivially
fenced off with IS_ENABLED().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.367853167@infradead.org
---
 arch/x86/include/asm/paravirt.h       |  4 ++--
 arch/x86/include/asm/paravirt_types.h | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 1be66c15ecbd7..2851bc2339d58 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -4,13 +4,13 @@
 /* Various instructions on x86 need to be replaced for
  * para-virtualization: those hooks are defined here. */
 
+#include <asm/paravirt_types.h>
+
 #ifdef CONFIG_PARAVIRT
 #include <asm/pgtable_types.h>
 #include <asm/asm.h>
 #include <asm/nospec-branch.h>
 
-#include <asm/paravirt_types.h>
-
 #ifndef __ASSEMBLY__
 #include <linux/bug.h>
 #include <linux/types.h>
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index f3d601574730c..e137d94121232 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -2,6 +2,17 @@
 #ifndef _ASM_X86_PARAVIRT_TYPES_H
 #define _ASM_X86_PARAVIRT_TYPES_H
 
+#ifndef __ASSEMBLY__
+/* These all sit in the .parainstructions section to tell us what to patch. */
+struct paravirt_patch_site {
+	u8 *instr;		/* original instructions */
+	u8 type;		/* type of this instruction */
+	u8 len;			/* length of original instruction */
+};
+#endif
+
+#ifdef CONFIG_PARAVIRT
+
 /* Bitmask of what can be clobbered: usually at least eax. */
 #define CLBR_EAX  (1 << 0)
 #define CLBR_ECX  (1 << 1)
@@ -593,16 +604,9 @@ unsigned long paravirt_ret0(void);
 
 #define paravirt_nop	((void *)_paravirt_nop)
 
-/* These all sit in the .parainstructions section to tell us what to patch. */
-struct paravirt_patch_site {
-	u8 *instr;		/* original instructions */
-	u8 type;		/* type of this instruction */
-	u8 len;			/* length of original instruction */
-};
-
 extern struct paravirt_patch_site __parainstructions[],
 	__parainstructions_end[];
 
 #endif	/* __ASSEMBLY__ */
-
+#endif  /* CONFIG_PARAVIRT */
 #endif	/* _ASM_X86_PARAVIRT_TYPES_H */
-- 
GitLab


From e81dc127ef69887c72735a3e3868930e2bf313ed Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:23 +0200
Subject: [PATCH 0129/4122] x86/callthunks: Add call patching for call depth
 tracking

Mitigating the Intel SKL RSB underflow issue in software requires to
track the call depth. That is every CALL and every RET need to be
intercepted and additional code injected.

The existing retbleed mitigations already include means of redirecting
RET to __x86_return_thunk; this can be re-purposed and RET can be
redirected to another function doing RET accounting.

CALL accounting will use the function padding introduced in prior
patches. For each CALL instruction, the destination symbol's padding
is rewritten to do the accounting and the CALL instruction is adjusted
to call into the padding.

This ensures only affected CPUs pay the overhead of this accounting.
Unaffected CPUs will leave the padding unused and have their 'JMP
__x86_return_thunk' replaced with an actual 'RET' instruction.

Objtool has been modified to supply a .call_sites section that lists
all the 'CALL' instructions. Additionally the paravirt instruction
sites are iterated since they will have been patched from an indirect
call to direct calls (or direct instructions in which case it'll be
ignored).

Module handling and the actual thunk code for SKL will be added in
subsequent steps.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.470877038@infradead.org
---
 arch/x86/Kconfig                     |  12 ++
 arch/x86/include/asm/alternative.h   |  12 ++
 arch/x86/kernel/Makefile             |   2 +
 arch/x86/kernel/alternative.c        |   6 +
 arch/x86/kernel/callthunks.c         | 251 +++++++++++++++++++++++++++
 arch/x86/kernel/head_64.S            |   1 +
 arch/x86/kernel/relocate_kernel_64.S |   5 +-
 arch/x86/kernel/vmlinux.lds.S        |   8 -
 8 files changed, 287 insertions(+), 10 deletions(-)
 create mode 100644 arch/x86/kernel/callthunks.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6ae7fa4b8eb7d..a1dae9d5e3dab 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2541,6 +2541,18 @@ config CALL_DEPTH_TRACKING
 	  is unused. On affected SKL systems this results in a significant
 	  performance gain over the IBRS mitigation.
 
+config CALL_THUNKS_DEBUG
+	bool "Enable call thunks and call depth tracking debugging"
+	depends on CALL_DEPTH_TRACKING
+	select FUNCTION_ALIGNMENT_32B
+	default n
+	help
+	  Enable call/ret counters for imbalance detection and build in
+	  a noisy dmesg about callthunks generation and call patching for
+	  trouble shooting. The debug prints need to be enabled on the
+	  kernel command line with 'debug-callthunks'.
+	  Only enable this, when you are debugging call thunks as this
+	  creates a noticable runtime overhead. If unsure say N.
 
 config CPU_IBPB_ENTRY
 	bool "Enable IBPB on kernel entry"
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 9542c582d546b..6b7bbd0db2480 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -80,6 +80,18 @@ extern void apply_returns(s32 *start, s32 *end);
 extern void apply_ibt_endbr(s32 *start, s32 *end);
 
 struct module;
+struct paravirt_patch_site;
+
+struct callthunk_sites {
+	s32				*call_start, *call_end;
+	struct paravirt_patch_site	*pv_start, *pv_end;
+};
+
+#ifdef CONFIG_CALL_THUNKS
+extern void callthunks_patch_builtin_calls(void);
+#else
+static __always_inline void callthunks_patch_builtin_calls(void) {}
+#endif
 
 #ifdef CONFIG_SMP
 extern void alternatives_smp_module_add(struct module *mod, char *name,
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index f901658d9f7c0..c2739a5886fa2 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -143,6 +143,8 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT)		+= sev.o
 
 obj-$(CONFIG_CFI_CLANG)			+= cfi.o
 
+obj-$(CONFIG_CALL_THUNKS)		+= callthunks.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index fad3c0e4838ea..963872d177075 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -947,6 +947,12 @@ void __init alternative_instructions(void)
 	 */
 	apply_alternatives(__alt_instructions, __alt_instructions_end);
 
+	/*
+	 * Now all calls are established. Apply the call thunks if
+	 * required.
+	 */
+	callthunks_patch_builtin_calls();
+
 	apply_ibt_endbr(__ibt_endbr_seal, __ibt_endbr_seal_end);
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
new file mode 100644
index 0000000000000..e5275d6e674d9
--- /dev/null
+++ b/arch/x86/kernel/callthunks.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define pr_fmt(fmt) "callthunks: " fmt
+
+#include <linux/kallsyms.h>
+#include <linux/memory.h>
+#include <linux/moduleloader.h>
+
+#include <asm/alternative.h>
+#include <asm/cpu.h>
+#include <asm/ftrace.h>
+#include <asm/insn.h>
+#include <asm/kexec.h>
+#include <asm/nospec-branch.h>
+#include <asm/paravirt.h>
+#include <asm/sections.h>
+#include <asm/switch_to.h>
+#include <asm/sync_core.h>
+#include <asm/text-patching.h>
+#include <asm/xen/hypercall.h>
+
+static int __initdata_or_module debug_callthunks;
+
+#define prdbg(fmt, args...)					\
+do {								\
+	if (debug_callthunks)					\
+		printk(KERN_DEBUG pr_fmt(fmt), ##args);		\
+} while(0)
+
+static int __init debug_thunks(char *str)
+{
+	debug_callthunks = 1;
+	return 1;
+}
+__setup("debug-callthunks", debug_thunks);
+
+extern s32 __call_sites[], __call_sites_end[];
+
+struct thunk_desc {
+	void		*template;
+	unsigned int	template_size;
+};
+
+struct core_text {
+	unsigned long	base;
+	unsigned long	end;
+	const char	*name;
+};
+
+static bool thunks_initialized __ro_after_init;
+
+static const struct core_text builtin_coretext = {
+	.base = (unsigned long)_text,
+	.end  = (unsigned long)_etext,
+	.name = "builtin",
+};
+
+static struct thunk_desc callthunk_desc __ro_after_init;
+
+extern void error_entry(void);
+extern void xen_error_entry(void);
+extern void paranoid_entry(void);
+
+static inline bool within_coretext(const struct core_text *ct, void *addr)
+{
+	unsigned long p = (unsigned long)addr;
+
+	return ct->base <= p && p < ct->end;
+}
+
+static inline bool within_module_coretext(void *addr)
+{
+	bool ret = false;
+
+#ifdef CONFIG_MODULES
+	struct module *mod;
+
+	preempt_disable();
+	mod = __module_address((unsigned long)addr);
+	if (mod && within_module_core((unsigned long)addr, mod))
+		ret = true;
+	preempt_enable();
+#endif
+	return ret;
+}
+
+static bool is_coretext(const struct core_text *ct, void *addr)
+{
+	if (ct && within_coretext(ct, addr))
+		return true;
+	if (within_coretext(&builtin_coretext, addr))
+		return true;
+	return within_module_coretext(addr);
+}
+
+static __init_or_module bool skip_addr(void *dest)
+{
+	if (dest == error_entry)
+		return true;
+	if (dest == paranoid_entry)
+		return true;
+	if (dest == xen_error_entry)
+		return true;
+	/* Does FILL_RSB... */
+	if (dest == __switch_to_asm)
+		return true;
+	/* Accounts directly */
+	if (dest == ret_from_fork)
+		return true;
+#ifdef CONFIG_HOTPLUG_CPU
+	if (dest == start_cpu0)
+		return true;
+#endif
+#ifdef CONFIG_FUNCTION_TRACER
+	if (dest == __fentry__)
+		return true;
+#endif
+#ifdef CONFIG_KEXEC_CORE
+	if (dest >= (void *)relocate_kernel &&
+	    dest < (void*)relocate_kernel + KEXEC_CONTROL_CODE_MAX_SIZE)
+		return true;
+#endif
+#ifdef CONFIG_XEN
+	if (dest >= (void *)hypercall_page &&
+	    dest < (void*)hypercall_page + PAGE_SIZE)
+		return true;
+#endif
+	return false;
+}
+
+static __init_or_module void *call_get_dest(void *addr)
+{
+	struct insn insn;
+	void *dest;
+	int ret;
+
+	ret = insn_decode_kernel(&insn, addr);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* Patched out call? */
+	if (insn.opcode.bytes[0] != CALL_INSN_OPCODE)
+		return NULL;
+
+	dest = addr + insn.length + insn.immediate.value;
+	if (skip_addr(dest))
+		return NULL;
+	return dest;
+}
+
+static const u8 nops[] = {
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+	0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90, 0x90,
+};
+
+static __init_or_module void *patch_dest(void *dest, bool direct)
+{
+	unsigned int tsize = callthunk_desc.template_size;
+	u8 *pad = dest - tsize;
+
+	/* Already patched? */
+	if (!bcmp(pad, callthunk_desc.template, tsize))
+		return pad;
+
+	/* Ensure there are nops */
+	if (bcmp(pad, nops, tsize)) {
+		pr_warn_once("Invalid padding area for %pS\n", dest);
+		return NULL;
+	}
+
+	if (direct)
+		memcpy(pad, callthunk_desc.template, tsize);
+	else
+		text_poke_copy_locked(pad, callthunk_desc.template, tsize, true);
+	return pad;
+}
+
+static __init_or_module void patch_call(void *addr, const struct core_text *ct)
+{
+	void *pad, *dest;
+	u8 bytes[8];
+
+	if (!within_coretext(ct, addr))
+		return;
+
+	dest = call_get_dest(addr);
+	if (!dest || WARN_ON_ONCE(IS_ERR(dest)))
+		return;
+
+	if (!is_coretext(ct, dest))
+		return;
+
+	pad = patch_dest(dest, within_coretext(ct, dest));
+	if (!pad)
+		return;
+
+	prdbg("Patch call at: %pS %px to %pS %px -> %px \n", addr, addr,
+		dest, dest, pad);
+	__text_gen_insn(bytes, CALL_INSN_OPCODE, addr, pad, CALL_INSN_SIZE);
+	text_poke_early(addr, bytes, CALL_INSN_SIZE);
+}
+
+static __init_or_module void
+patch_call_sites(s32 *start, s32 *end, const struct core_text *ct)
+{
+	s32 *s;
+
+	for (s = start; s < end; s++)
+		patch_call((void *)s + *s, ct);
+}
+
+static __init_or_module void
+patch_paravirt_call_sites(struct paravirt_patch_site *start,
+			  struct paravirt_patch_site *end,
+			  const struct core_text *ct)
+{
+	struct paravirt_patch_site *p;
+
+	for (p = start; p < end; p++)
+		patch_call(p->instr, ct);
+}
+
+static __init_or_module void
+callthunks_setup(struct callthunk_sites *cs, const struct core_text *ct)
+{
+	prdbg("Patching call sites %s\n", ct->name);
+	patch_call_sites(cs->call_start, cs->call_end, ct);
+	patch_paravirt_call_sites(cs->pv_start, cs->pv_end, ct);
+	prdbg("Patching call sites done%s\n", ct->name);
+}
+
+void __init callthunks_patch_builtin_calls(void)
+{
+	struct callthunk_sites cs = {
+		.call_start	= __call_sites,
+		.call_end	= __call_sites_end,
+		.pv_start	= __parainstructions,
+		.pv_end		= __parainstructions_end
+	};
+
+	if (!cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+		return;
+
+	pr_info("Setting up call depth tracking\n");
+	mutex_lock(&text_mutex);
+	callthunks_setup(&cs, &builtin_coretext);
+	thunks_initialized = true;
+	mutex_unlock(&text_mutex);
+}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index d860d437631b6..222efd4a09bc8 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -370,6 +370,7 @@ SYM_CODE_END(secondary_startup_64)
  * start_secondary() via .Ljump_to_C_code.
  */
 SYM_CODE_START(start_cpu0)
+	ANNOTATE_NOENDBR
 	UNWIND_HINT_EMPTY
 	movq	initial_stack(%rip), %rsp
 	jmp	.Ljump_to_C_code
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index 4809c0dc4eb0c..4a73351f87f88 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -41,6 +41,7 @@
 	.text
 	.align PAGE_SIZE
 	.code64
+SYM_CODE_START_NOALIGN(relocate_range)
 SYM_CODE_START_NOALIGN(relocate_kernel)
 	UNWIND_HINT_EMPTY
 	ANNOTATE_NOENDBR
@@ -312,5 +313,5 @@ SYM_CODE_START_LOCAL_NOALIGN(swap_pages)
 	int3
 SYM_CODE_END(swap_pages)
 
-	.globl kexec_control_code_size
-.set kexec_control_code_size, . - relocate_kernel
+	.skip KEXEC_CONTROL_CODE_MAX_SIZE - (. - relocate_kernel), 0xcc
+SYM_CODE_END(relocate_range);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index b69df9e013cca..49f3f86433c7d 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -501,11 +501,3 @@ INIT_PER_CPU(irq_stack_backing_store);
 #endif
 
 #endif /* CONFIG_X86_64 */
-
-#ifdef CONFIG_KEXEC_CORE
-#include <asm/kexec.h>
-
-. = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,
-           "kexec control code size is too big");
-#endif
-
-- 
GitLab


From eaf44c816ed8d1ef94c354e3ed47d53cd5a5cb13 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:24 +0200
Subject: [PATCH 0130/4122] x86/modules: Add call patching

As for the builtins create call thunks and patch the call sites to call the
thunk on Intel SKL CPUs for retbleed mitigation.

Note, that module init functions are ignored for sake of simplicity because
loading modules is not something which is done in high frequent loops and
the attacker has not really a handle on when this happens in order to
launch a matching attack. The depth tracking will still work for calls into
the builtins and because the call is not accounted it will underflow faster
and overstuff, but that's mitigated by the saturating counter and the side
effect is only temporary.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.575673066@infradead.org
---
 arch/x86/include/asm/alternative.h |  5 +++++
 arch/x86/kernel/callthunks.c       | 19 +++++++++++++++++++
 arch/x86/kernel/module.c           | 20 +++++++++++++++++++-
 3 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 6b7bbd0db2480..ef007fa33dc41 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -89,8 +89,13 @@ struct callthunk_sites {
 
 #ifdef CONFIG_CALL_THUNKS
 extern void callthunks_patch_builtin_calls(void);
+extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
+					  struct module *mod);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
+static __always_inline void
+callthunks_patch_module_calls(struct callthunk_sites *sites,
+			      struct module *mod) {}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index e5275d6e674d9..7b9d998ebd7dc 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -249,3 +249,22 @@ void __init callthunks_patch_builtin_calls(void)
 	thunks_initialized = true;
 	mutex_unlock(&text_mutex);
 }
+
+#ifdef CONFIG_MODULES
+void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
+					    struct module *mod)
+{
+	struct core_text ct = {
+		.base = (unsigned long)mod->core_layout.base,
+		.end  = (unsigned long)mod->core_layout.base + mod->core_layout.size,
+		.name = mod->name,
+	};
+
+	if (!thunks_initialized)
+		return;
+
+	mutex_lock(&text_mutex);
+	callthunks_setup(cs, &ct);
+	mutex_unlock(&text_mutex);
+}
+#endif /* CONFIG_MODULES */
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 43f0112772192..2fb9de2cef40e 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -254,7 +254,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 {
 	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
 		*para = NULL, *orc = NULL, *orc_ip = NULL,
-		*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
+		*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
+		*calls = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -274,6 +275,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 			retpolines = s;
 		if (!strcmp(".return_sites", secstrings + s->sh_name))
 			returns = s;
+		if (!strcmp(".call_sites", secstrings + s->sh_name))
+			calls = s;
 		if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
 			ibt_endbr = s;
 	}
@@ -299,6 +302,21 @@ int module_finalize(const Elf_Ehdr *hdr,
 		void *aseg = (void *)alt->sh_addr;
 		apply_alternatives(aseg, aseg + alt->sh_size);
 	}
+	if (calls || para) {
+		struct callthunk_sites cs = {};
+
+		if (calls) {
+			cs.call_start = (void *)calls->sh_addr;
+			cs.call_end = (void *)calls->sh_addr + calls->sh_size;
+		}
+
+		if (para) {
+			cs.pv_start = (void *)para->sh_addr;
+			cs.pv_end = (void *)para->sh_addr + para->sh_size;
+		}
+
+		callthunks_patch_module_calls(&cs, me);
+	}
 	if (ibt_endbr) {
 		void *iseg = (void *)ibt_endbr->sh_addr;
 		apply_ibt_endbr(iseg, iseg + ibt_endbr->sh_size);
-- 
GitLab


From 770ae1b709528a6a173b5c7b183818ee9b45e376 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:25 +0200
Subject: [PATCH 0131/4122] x86/returnthunk: Allow different return thunks

In preparation for call depth tracking on Intel SKL CPUs, make it possible
to patch in a SKL specific return thunk.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.680469665@infradead.org
---
 arch/x86/include/asm/nospec-branch.h |  6 ++++++
 arch/x86/kernel/alternative.c        | 17 +++++++++++++----
 arch/x86/kernel/ftrace.c             |  2 +-
 arch/x86/kernel/static_call.c        |  2 +-
 arch/x86/net/bpf_jit_comp.c          |  2 +-
 5 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index c936ce9f0c47c..f10ca334dd752 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -208,6 +208,12 @@ extern void __x86_return_thunk(void);
 extern void zen_untrain_ret(void);
 extern void entry_ibpb(void);
 
+#ifdef CONFIG_CALL_THUNKS
+extern void (*x86_return_thunk)(void);
+#else
+#define x86_return_thunk	(&__x86_return_thunk)
+#endif
+
 #ifdef CONFIG_RETPOLINE
 
 #define GEN(reg) \
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 963872d177075..04d1e3d35b0e7 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -518,6 +518,11 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end)
 }
 
 #ifdef CONFIG_RETHUNK
+
+#ifdef CONFIG_CALL_THUNKS
+void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk;
+#endif
+
 /*
  * Rewrite the compiler generated return thunk tail-calls.
  *
@@ -533,14 +538,18 @@ static int patch_return(void *addr, struct insn *insn, u8 *bytes)
 {
 	int i = 0;
 
-	if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-		return -1;
+	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+		if (x86_return_thunk == __x86_return_thunk)
+			return -1;
 
-	bytes[i++] = RET_INSN_OPCODE;
+		i = JMP32_INSN_SIZE;
+		__text_gen_insn(bytes, JMP32_INSN_OPCODE, addr, x86_return_thunk, i);
+	} else {
+		bytes[i++] = RET_INSN_OPCODE;
+	}
 
 	for (; i < insn->length;)
 		bytes[i++] = INT3_INSN_OPCODE;
-
 	return i;
 }
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 00eac455a3a1f..4ac6692d5ef82 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -359,7 +359,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	ip = trampoline + size;
 	if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-		__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+		__text_gen_insn(ip, JMP32_INSN_OPCODE, ip, x86_return_thunk, JMP32_INSN_SIZE);
 	else
 		memcpy(ip, retq, sizeof(retq));
 
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index aaaba85d6d7ff..5d3844a98373e 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -52,7 +52,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
 
 	case RET:
 		if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
-			code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+			code = text_gen_insn(JMP32_INSN_OPCODE, insn, x86_return_thunk);
 		else
 			code = &retinsn;
 		break;
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 99620428ad785..0df391ecd4d83 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -432,7 +432,7 @@ static void emit_return(u8 **pprog, u8 *ip)
 	u8 *prog = *pprog;
 
 	if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
-		emit_jump(&prog, &__x86_return_thunk, ip);
+		emit_jump(&prog, x86_return_thunk, ip);
 	} else {
 		EMIT1(0xC3);		/* ret */
 		if (IS_ENABLED(CONFIG_SLS))
-- 
GitLab


From 52354973573cc260ff2fc661cb28ff8eaa7b879b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:26 +0200
Subject: [PATCH 0132/4122] x86/asm: Provide ALTERNATIVE_3

Fairly straight forward adaptation/extention of ALTERNATIVE_2.

Required for call depth tracking.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.787711192@infradead.org
---
 arch/x86/include/asm/alternative.h | 33 +++++++++++++++++++++++++++---
 1 file changed, 30 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index ef007fa33dc41..4c416b21bac80 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -364,6 +364,7 @@ static inline int alternatives_text_reserved(void *start, void *end)
 #define old_len			141b-140b
 #define new_len1		144f-143f
 #define new_len2		145f-144f
+#define new_len3		146f-145f
 
 /*
  * gas compatible max based on the idea from:
@@ -371,7 +372,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
  *
  * The additional "-" is needed because gas uses a "true" value of -1.
  */
-#define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+#define alt_max_2(a, b)		((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+#define alt_max_3(a, b, c)	(alt_max_2(alt_max_2(a, b), c))
 
 
 /*
@@ -383,8 +385,8 @@ static inline int alternatives_text_reserved(void *start, void *end)
 140:
 	\oldinstr
 141:
-	.skip -((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
-		(alt_max_short(new_len1, new_len2) - (old_len)),0x90
+	.skip -((alt_max_2(new_len1, new_len2) - (old_len)) > 0) * \
+		(alt_max_2(new_len1, new_len2) - (old_len)),0x90
 142:
 
 	.pushsection .altinstructions,"a"
@@ -401,6 +403,31 @@ static inline int alternatives_text_reserved(void *start, void *end)
 	.popsection
 .endm
 
+.macro ALTERNATIVE_3 oldinstr, newinstr1, feature1, newinstr2, feature2, newinstr3, feature3
+140:
+	\oldinstr
+141:
+	.skip -((alt_max_3(new_len1, new_len2, new_len3) - (old_len)) > 0) * \
+		(alt_max_3(new_len1, new_len2, new_len3) - (old_len)),0x90
+142:
+
+	.pushsection .altinstructions,"a"
+	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f
+	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f
+	altinstruction_entry 140b,145f,\feature3,142b-140b,146f-145f
+	.popsection
+
+	.pushsection .altinstr_replacement,"ax"
+143:
+	\newinstr1
+144:
+	\newinstr2
+145:
+	\newinstr3
+146:
+	.popsection
+.endm
+
 /* If @feature is set, patch in @newinstr_yes, otherwise @newinstr_no. */
 #define ALTERNATIVE_TERNARY(oldinstr, feature, newinstr_yes, newinstr_no) \
 	ALTERNATIVE_2 oldinstr, newinstr_no, X86_FEATURE_ALWAYS,	\
-- 
GitLab


From 5d8213864ade86b48fc492584ea86d65a62f892e Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:27 +0200
Subject: [PATCH 0133/4122] x86/retbleed: Add SKL return thunk

To address the Intel SKL RSB underflow issue in software it's required to
do call depth tracking.

Provide a return thunk for call depth tracking on Intel SKL CPUs.

The tracking does not use a counter. It uses uses arithmetic shift
right on call entry and logical shift left on return.

The depth tracking variable is initialized to 0x8000.... when the call
depth is zero. The arithmetic shift right sign extends the MSB and
saturates after the 12th call. The shift count is 5 so the tracking covers
12 nested calls. On return the variable is shifted left logically so it
becomes zero again.

 CALL	 	   	RET
 0: 0x8000000000000000	0x0000000000000000
 1: 0xfc00000000000000	0xf000000000000000
...
11: 0xfffffffffffffff8	0xfffffffffffffc00
12: 0xffffffffffffffff	0xffffffffffffffe0

After a return buffer fill the depth is credited 12 calls before the next
stuffing has to take place.

There is a inaccuracy for situations like this:

   10 calls
    5 returns
    3 calls
    4 returns
    3 calls
    ....

The shift count might cause this to be off by one in either direction, but
there is still a cushion vs. the RSB depth. The algorithm does not claim to
be perfect, but it should obfuscate the problem enough to make exploitation
extremly difficult.

The theory behind this is:

RSB is a stack with depth 16 which is filled on every call. On the return
path speculation "pops" entries to speculate down the call chain. Once the
speculative RSB is empty it switches to other predictors, e.g. the Branch
History Buffer, which can be mistrained by user space and misguide the
speculation path to a gadget.

Call depth tracking is designed to break this speculation path by stuffing
speculation trap calls into the RSB which are never getting a corresponding
return executed. This stalls the prediction path until it gets resteered,

The assumption is that stuffing at the 12th return is sufficient to break
the speculation before it hits the underflow and the fallback to the other
predictors. Testing confirms that it works. Johannes, one of the retbleed
researchers. tried to attack this approach but failed.

There is obviously no scientific proof that this will withstand future
research progress, but all we can do right now is to speculate about it.

The SAR/SHL usage was suggested by Andi Kleen.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.890071690@infradead.org
---
 arch/x86/entry/entry_64.S            |  10 ++-
 arch/x86/include/asm/current.h       |   3 +
 arch/x86/include/asm/nospec-branch.h | 121 +++++++++++++++++++++++++--
 arch/x86/kernel/asm-offsets.c        |   3 +
 arch/x86/kvm/svm/vmenter.S           |   1 +
 arch/x86/lib/retpoline.S             |  31 +++++++
 6 files changed, 159 insertions(+), 10 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4cc0125fdfdc2..15739a2c09833 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -288,6 +288,7 @@ SYM_FUNC_END(__switch_to_asm)
 SYM_CODE_START_NOALIGN(ret_from_fork)
 	UNWIND_HINT_EMPTY
 	ANNOTATE_NOENDBR // copy_thread
+	CALL_DEPTH_ACCOUNT
 	movq	%rax, %rdi
 	call	schedule_tail			/* rdi: 'prev' task parameter */
 
@@ -332,7 +333,7 @@ SYM_CODE_START(xen_error_entry)
 	UNWIND_HINT_FUNC
 	PUSH_AND_CLEAR_REGS save_ret=1
 	ENCODE_FRAME_POINTER 8
-	UNTRAIN_RET
+	UNTRAIN_RET_FROM_CALL
 	RET
 SYM_CODE_END(xen_error_entry)
 
@@ -977,7 +978,7 @@ SYM_CODE_START(paranoid_entry)
 	 * CR3 above, keep the old value in a callee saved register.
 	 */
 	IBRS_ENTER save_reg=%r15
-	UNTRAIN_RET
+	UNTRAIN_RET_FROM_CALL
 
 	RET
 SYM_CODE_END(paranoid_entry)
@@ -1062,7 +1063,7 @@ SYM_CODE_START(error_entry)
 	/* We have user CR3.  Change to kernel CR3. */
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 	IBRS_ENTER
-	UNTRAIN_RET
+	UNTRAIN_RET_FROM_CALL
 
 	leaq	8(%rsp), %rdi			/* arg0 = pt_regs pointer */
 	/* Put us onto the real thread stack. */
@@ -1097,6 +1098,7 @@ SYM_CODE_START(error_entry)
 	 */
 .Lerror_entry_done_lfence:
 	FENCE_SWAPGS_KERNEL_ENTRY
+	CALL_DEPTH_ACCOUNT
 	leaq	8(%rsp), %rax			/* return pt_regs pointer */
 	ANNOTATE_UNRET_END
 	RET
@@ -1115,7 +1117,7 @@ SYM_CODE_START(error_entry)
 	FENCE_SWAPGS_USER_ENTRY
 	SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
 	IBRS_ENTER
-	UNTRAIN_RET
+	UNTRAIN_RET_FROM_CALL
 
 	/*
 	 * Pretend that the exception came from user mode: set up pt_regs
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index b89aba077b84b..a1168e7b69e5b 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -17,6 +17,9 @@ struct pcpu_hot {
 			struct task_struct	*current_task;
 			int			preempt_count;
 			int			cpu_number;
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+			u64			call_depth;
+#endif
 			unsigned long		top_of_stack;
 			void			*hardirq_stack_ptr;
 			u16			softirq_pending;
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f10ca334dd752..d4be826a22824 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -12,8 +12,83 @@
 #include <asm/msr-index.h>
 #include <asm/unwind_hints.h>
 #include <asm/percpu.h>
+#include <asm/current.h>
 
-#define RETPOLINE_THUNK_SIZE	32
+/*
+ * Call depth tracking for Intel SKL CPUs to address the RSB underflow
+ * issue in software.
+ *
+ * The tracking does not use a counter. It uses uses arithmetic shift
+ * right on call entry and logical shift left on return.
+ *
+ * The depth tracking variable is initialized to 0x8000.... when the call
+ * depth is zero. The arithmetic shift right sign extends the MSB and
+ * saturates after the 12th call. The shift count is 5 for both directions
+ * so the tracking covers 12 nested calls.
+ *
+ *  Call
+ *  0: 0x8000000000000000	0x0000000000000000
+ *  1: 0xfc00000000000000	0xf000000000000000
+ * ...
+ * 11: 0xfffffffffffffff8	0xfffffffffffffc00
+ * 12: 0xffffffffffffffff	0xffffffffffffffe0
+ *
+ * After a return buffer fill the depth is credited 12 calls before the
+ * next stuffing has to take place.
+ *
+ * There is a inaccuracy for situations like this:
+ *
+ *  10 calls
+ *   5 returns
+ *   3 calls
+ *   4 returns
+ *   3 calls
+ *   ....
+ *
+ * The shift count might cause this to be off by one in either direction,
+ * but there is still a cushion vs. the RSB depth. The algorithm does not
+ * claim to be perfect and it can be speculated around by the CPU, but it
+ * is considered that it obfuscates the problem enough to make exploitation
+ * extremly difficult.
+ */
+#define RET_DEPTH_SHIFT			5
+#define RSB_RET_STUFF_LOOPS		16
+#define RET_DEPTH_INIT			0x8000000000000000ULL
+#define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL
+#define RET_DEPTH_CREDIT		0xffffffffffffffffULL
+
+#if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
+
+#include <asm/asm-offsets.h>
+
+#define CREDIT_CALL_DEPTH					\
+	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define ASM_CREDIT_CALL_DEPTH					\
+	movq	$-1, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define RESET_CALL_DEPTH					\
+	mov	$0x80, %rax;					\
+	shl	$56, %rax;					\
+	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define RESET_CALL_DEPTH_FROM_CALL				\
+	mov	$0xfc, %rax;					\
+	shl	$56, %rax;					\
+	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#define INCREMENT_CALL_DEPTH					\
+	sarq	$5, %gs:pcpu_hot + X86_call_depth;
+
+#define ASM_INCREMENT_CALL_DEPTH				\
+	sarq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+
+#else
+#define CREDIT_CALL_DEPTH
+#define RESET_CALL_DEPTH
+#define INCREMENT_CALL_DEPTH
+#define RESET_CALL_DEPTH_FROM_CALL
+#endif
 
 /*
  * Fill the CPU return stack buffer.
@@ -32,6 +107,7 @@
  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
  */
 
+#define RETPOLINE_THUNK_SIZE	32
 #define RSB_CLEAR_LOOPS		32	/* To forcibly overwrite all entries */
 
 /*
@@ -60,7 +136,8 @@
 	dec	reg;					\
 	jnz	771b;					\
 	/* barrier for jnz misprediction */		\
-	lfence;
+	lfence;						\
+	ASM_CREDIT_CALL_DEPTH
 #else
 /*
  * i386 doesn't unconditionally have LFENCE, as such it can't
@@ -185,11 +262,32 @@
  * where we have a stack but before any RET instruction.
  */
 .macro UNTRAIN_RET
-#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+	defined(CONFIG_X86_FEATURE_CALL_DEPTH)
 	ANNOTATE_UNRET_END
-	ALTERNATIVE_2 "",						\
-	              CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,		\
-		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
+	ALTERNATIVE_3 "",						\
+		      CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,		\
+		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
+		      __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+.macro UNTRAIN_RET_FROM_CALL
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
+	defined(CONFIG_X86_FEATURE_CALL_DEPTH)
+	ANNOTATE_UNRET_END
+	ALTERNATIVE_3 "",						\
+		      CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,		\
+		      "call entry_ibpb", X86_FEATURE_ENTRY_IBPB,	\
+		      __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH
+#endif
+.endm
+
+
+.macro CALL_DEPTH_ACCOUNT
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+	ALTERNATIVE "",							\
+		    __stringify(ASM_INCREMENT_CALL_DEPTH), X86_FEATURE_CALL_DEPTH
 #endif
 .endm
 
@@ -214,6 +312,17 @@ extern void (*x86_return_thunk)(void);
 #define x86_return_thunk	(&__x86_return_thunk)
 #endif
 
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+extern void __x86_return_skl(void);
+
+static inline void x86_set_skl_return_thunk(void)
+{
+	x86_return_thunk = &__x86_return_skl;
+}
+#else
+static inline void x86_set_skl_return_thunk(void) {}
+#endif
+
 #ifdef CONFIG_RETPOLINE
 
 #define GEN(reg) \
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index a9824318e1c55..13afdbbee349e 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -110,6 +110,9 @@ static void __used common(void)
 	OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
 
 	OFFSET(X86_top_of_stack, pcpu_hot, top_of_stack);
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+	OFFSET(X86_call_depth, pcpu_hot, call_depth);
+#endif
 
 	if (IS_ENABLED(CONFIG_KVM_INTEL)) {
 		BLANK();
diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S
index 723f8534986c3..09eacf19d7185 100644
--- a/arch/x86/kvm/svm/vmenter.S
+++ b/arch/x86/kvm/svm/vmenter.S
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/linkage.h>
 #include <asm/asm.h>
+#include <asm/asm-offsets.h>
 #include <asm/bitsperlong.h>
 #include <asm/kvm_vcpu_regs.h>
 #include <asm/nospec-branch.h>
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 073289a55f849..1e79eccc1d698 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -5,9 +5,11 @@
 #include <asm/dwarf2.h>
 #include <asm/cpufeatures.h>
 #include <asm/alternative.h>
+#include <asm/asm-offsets.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
 #include <asm/unwind_hints.h>
+#include <asm/percpu.h>
 #include <asm/frame.h>
 
 	.section .text.__x86.indirect_thunk
@@ -140,3 +142,32 @@ __EXPORT_THUNK(zen_untrain_ret)
 EXPORT_SYMBOL(__x86_return_thunk)
 
 #endif /* CONFIG_RETHUNK */
+
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+
+	.align 64
+SYM_FUNC_START(__x86_return_skl)
+	ANNOTATE_NOENDBR
+	/* Keep the hotpath in a 16byte I-fetch */
+	shlq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
+	jz	1f
+	ANNOTATE_UNRET_SAFE
+	ret
+	int3
+1:
+	.rept	16
+	ANNOTATE_INTRA_FUNCTION_CALL
+	call	2f
+	int3
+2:
+	.endr
+	add	$(8*16), %rsp
+
+	CREDIT_CALL_DEPTH
+
+	ANNOTATE_UNRET_SAFE
+	ret
+	int3
+SYM_FUNC_END(__x86_return_skl)
+
+#endif /* CONFIG_CALL_DEPTH_TRACKING */
-- 
GitLab


From 3b6c1747da48ff40ab746b0e860cffe83619f5c5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:28 +0200
Subject: [PATCH 0134/4122] x86/retpoline: Add SKL retthunk retpolines

Ensure that retpolines do the proper call accounting so that the return
accounting works correctly.

Specifically; retpolines are used to replace both 'jmp *%reg' and
'call *%reg', however these two cases do not have the same accounting
requirements. Therefore split things up and provide two different
retpoline arrays for SKL.

The 'jmp *%reg' case needs no accounting, the
__x86_indirect_jump_thunk_array[] covers this. The retpoline is
changed to not use the return thunk; it's a simple call;ret construct.

[ strictly speaking it should do:
	andq $(~0x1f), PER_CPU_VAR(__x86_call_depth)
  but we can argue this can be covered by the fuzz we already have
  in the accounting depth (12) vs the RSB depth (16) ]

The 'call *%reg' case does need accounting, the
__x86_indirect_call_thunk_array[] covers this. Again, this retpoline
avoids the use of the return-thunk, in this case to avoid double
accounting.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111147.996634749@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 12 +++++
 arch/x86/kernel/alternative.c        | 59 +++++++++++++++++++++--
 arch/x86/lib/retpoline.S             | 71 ++++++++++++++++++++++++----
 arch/x86/net/bpf_jit_comp.c          |  5 +-
 4 files changed, 135 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index d4be826a22824..06ba7caa0cad0 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -301,6 +301,8 @@
 
 typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
 extern retpoline_thunk_t __x86_indirect_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_call_thunk_array[];
+extern retpoline_thunk_t __x86_indirect_jump_thunk_array[];
 
 extern void __x86_return_thunk(void);
 extern void zen_untrain_ret(void);
@@ -330,6 +332,16 @@ static inline void x86_set_skl_return_thunk(void) {}
 #include <asm/GEN-for-each-reg.h>
 #undef GEN
 
+#define GEN(reg)						\
+	extern retpoline_thunk_t __x86_indirect_call_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+#define GEN(reg)						\
+	extern retpoline_thunk_t __x86_indirect_jump_thunk_ ## reg;
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
 #ifdef CONFIG_X86_64
 
 /*
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 04d1e3d35b0e7..19221d77dc275 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -377,6 +377,56 @@ static int emit_indirect(int op, int reg, u8 *bytes)
 	return i;
 }
 
+static inline bool is_jcc32(struct insn *insn)
+{
+	/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
+	return insn->opcode.bytes[0] == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80;
+}
+
+static int emit_call_track_retpoline(void *addr, struct insn *insn, int reg, u8 *bytes)
+{
+	u8 op = insn->opcode.bytes[0];
+	int i = 0;
+
+	/*
+	 * Clang does 'weird' Jcc __x86_indirect_thunk_r11 conditional
+	 * tail-calls. Deal with them.
+	 */
+	if (is_jcc32(insn)) {
+		bytes[i++] = op;
+		op = insn->opcode.bytes[1];
+		goto clang_jcc;
+	}
+
+	if (insn->length == 6)
+		bytes[i++] = 0x2e; /* CS-prefix */
+
+	switch (op) {
+	case CALL_INSN_OPCODE:
+		__text_gen_insn(bytes+i, op, addr+i,
+				__x86_indirect_call_thunk_array[reg],
+				CALL_INSN_SIZE);
+		i += CALL_INSN_SIZE;
+		break;
+
+	case JMP32_INSN_OPCODE:
+clang_jcc:
+		__text_gen_insn(bytes+i, op, addr+i,
+				__x86_indirect_jump_thunk_array[reg],
+				JMP32_INSN_SIZE);
+		i += JMP32_INSN_SIZE;
+		break;
+
+	default:
+		WARN("%pS %px %*ph\n", addr, addr, 6, addr);
+		return -1;
+	}
+
+	WARN_ON_ONCE(i != insn->length);
+
+	return i;
+}
+
 /*
  * Rewrite the compiler generated retpoline thunk calls.
  *
@@ -409,8 +459,12 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 	BUG_ON(reg == 4);
 
 	if (cpu_feature_enabled(X86_FEATURE_RETPOLINE) &&
-	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE))
+	    !cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+		if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+			return emit_call_track_retpoline(addr, insn, reg, bytes);
+
 		return -1;
+	}
 
 	op = insn->opcode.bytes[0];
 
@@ -427,8 +481,7 @@ static int patch_retpoline(void *addr, struct insn *insn, u8 *bytes)
 	 *   [ NOP ]
 	 * 1:
 	 */
-	/* Jcc.d32 second opcode byte is in the range: 0x80-0x8f */
-	if (op == 0x0f && (insn->opcode.bytes[1] & 0xf0) == 0x80) {
+	if (is_jcc32(insn)) {
 		cc = insn->opcode.bytes[1] & 0xf;
 		cc ^= 1; /* invert condition */
 
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 1e79eccc1d698..e00206077ae91 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -14,17 +14,18 @@
 
 	.section .text.__x86.indirect_thunk
 
-.macro RETPOLINE reg
+
+.macro POLINE reg
 	ANNOTATE_INTRA_FUNCTION_CALL
 	call    .Ldo_rop_\@
-.Lspec_trap_\@:
-	UNWIND_HINT_EMPTY
-	pause
-	lfence
-	jmp .Lspec_trap_\@
+	int3
 .Ldo_rop_\@:
 	mov     %\reg, (%_ASM_SP)
 	UNWIND_HINT_FUNC
+.endm
+
+.macro RETPOLINE reg
+	POLINE \reg
 	RET
 .endm
 
@@ -54,7 +55,6 @@ SYM_INNER_LABEL(__x86_indirect_thunk_\reg, SYM_L_GLOBAL)
  */
 
 #define __EXPORT_THUNK(sym)	_ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym)
-#define EXPORT_THUNK(reg)	__EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
 
 	.align RETPOLINE_THUNK_SIZE
 SYM_CODE_START(__x86_indirect_thunk_array)
@@ -66,10 +66,65 @@ SYM_CODE_START(__x86_indirect_thunk_array)
 	.align RETPOLINE_THUNK_SIZE
 SYM_CODE_END(__x86_indirect_thunk_array)
 
-#define GEN(reg) EXPORT_THUNK(reg)
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+#ifdef CONFIG_CALL_DEPTH_TRACKING
+.macro CALL_THUNK reg
+	.align RETPOLINE_THUNK_SIZE
+
+SYM_INNER_LABEL(__x86_indirect_call_thunk_\reg, SYM_L_GLOBAL)
+	UNWIND_HINT_EMPTY
+	ANNOTATE_NOENDBR
+
+	CALL_DEPTH_ACCOUNT
+	POLINE \reg
+	ANNOTATE_UNRET_SAFE
+	ret
+	int3
+.endm
+
+	.align RETPOLINE_THUNK_SIZE
+SYM_CODE_START(__x86_indirect_call_thunk_array)
+
+#define GEN(reg) CALL_THUNK reg
 #include <asm/GEN-for-each-reg.h>
 #undef GEN
 
+	.align RETPOLINE_THUNK_SIZE
+SYM_CODE_END(__x86_indirect_call_thunk_array)
+
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_call_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+.macro JUMP_THUNK reg
+	.align RETPOLINE_THUNK_SIZE
+
+SYM_INNER_LABEL(__x86_indirect_jump_thunk_\reg, SYM_L_GLOBAL)
+	UNWIND_HINT_EMPTY
+	ANNOTATE_NOENDBR
+	POLINE \reg
+	ANNOTATE_UNRET_SAFE
+	ret
+	int3
+.endm
+
+	.align RETPOLINE_THUNK_SIZE
+SYM_CODE_START(__x86_indirect_jump_thunk_array)
+
+#define GEN(reg) JUMP_THUNK reg
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+
+	.align RETPOLINE_THUNK_SIZE
+SYM_CODE_END(__x86_indirect_jump_thunk_array)
+
+#define GEN(reg) __EXPORT_THUNK(__x86_indirect_jump_thunk_ ## reg)
+#include <asm/GEN-for-each-reg.h>
+#undef GEN
+#endif
 /*
  * This function name is magical and is used by -mfunction-return=thunk-extern
  * for the compiler to generate JMPs to it.
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 0df391ecd4d83..ad8cb7f15ab8a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -417,7 +417,10 @@ static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
 		EMIT2(0xFF, 0xE0 + reg);
 	} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
 		OPTIMIZER_HIDE_VAR(reg);
-		emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+		if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
+			emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
+		else
+			emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
 	} else {
 		EMIT2(0xFF, 0xE0 + reg);	/* jmp *%\reg */
 		if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS))
-- 
GitLab


From bbaceb189a21d7245e8063701fe10985396028f9 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:29 +0200
Subject: [PATCH 0135/4122] x86/retbleed: Add SKL call thunk

Add the actual SKL call thunk for call depth accounting.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.101125588@infradead.org
---
 arch/x86/kernel/callthunks.c | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 7b9d998ebd7dc..01f6f6b5a93cd 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -7,6 +7,7 @@
 #include <linux/moduleloader.h>
 
 #include <asm/alternative.h>
+#include <asm/asm-offsets.h>
 #include <asm/cpu.h>
 #include <asm/ftrace.h>
 #include <asm/insn.h>
@@ -55,7 +56,21 @@ static const struct core_text builtin_coretext = {
 	.name = "builtin",
 };
 
-static struct thunk_desc callthunk_desc __ro_after_init;
+asm (
+	".pushsection .rodata				\n"
+	".global skl_call_thunk_template		\n"
+	"skl_call_thunk_template:			\n"
+		__stringify(INCREMENT_CALL_DEPTH)"	\n"
+	".global skl_call_thunk_tail			\n"
+	"skl_call_thunk_tail:				\n"
+	".popsection					\n"
+);
+
+extern u8 skl_call_thunk_template[];
+extern u8 skl_call_thunk_tail[];
+
+#define SKL_TMPL_SIZE \
+	((unsigned int)(skl_call_thunk_tail - skl_call_thunk_template))
 
 extern void error_entry(void);
 extern void xen_error_entry(void);
@@ -157,11 +172,11 @@ static const u8 nops[] = {
 
 static __init_or_module void *patch_dest(void *dest, bool direct)
 {
-	unsigned int tsize = callthunk_desc.template_size;
+	unsigned int tsize = SKL_TMPL_SIZE;
 	u8 *pad = dest - tsize;
 
 	/* Already patched? */
-	if (!bcmp(pad, callthunk_desc.template, tsize))
+	if (!bcmp(pad, skl_call_thunk_template, tsize))
 		return pad;
 
 	/* Ensure there are nops */
@@ -171,9 +186,9 @@ static __init_or_module void *patch_dest(void *dest, bool direct)
 	}
 
 	if (direct)
-		memcpy(pad, callthunk_desc.template, tsize);
+		memcpy(pad, skl_call_thunk_template, tsize);
 	else
-		text_poke_copy_locked(pad, callthunk_desc.template, tsize, true);
+		text_poke_copy_locked(pad, skl_call_thunk_template, tsize, true);
 	return pad;
 }
 
-- 
GitLab


From f5c1bb2afe93396d41c5cbdcb909b08a75b8dde4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:30 +0200
Subject: [PATCH 0136/4122] x86/calldepth: Add ret/call counting for debug

Add a debuigfs mechanism to validate the accounting, e.g. vs. call/ret
balance and to gather statistics about the stuffing to call ratio.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.204285506@infradead.org
---
 arch/x86/include/asm/nospec-branch.h | 36 ++++++++++++++++---
 arch/x86/kernel/callthunks.c         | 53 ++++++++++++++++++++++++++++
 arch/x86/lib/retpoline.S             |  7 +++-
 3 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 06ba7caa0cad0..4771147c7c5a6 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -57,6 +57,22 @@
 #define RET_DEPTH_INIT_FROM_CALL	0xfc00000000000000ULL
 #define RET_DEPTH_CREDIT		0xffffffffffffffffULL
 
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+# define CALL_THUNKS_DEBUG_INC_CALLS				\
+	incq	%gs:__x86_call_count;
+# define CALL_THUNKS_DEBUG_INC_RETS				\
+	incq	%gs:__x86_ret_count;
+# define CALL_THUNKS_DEBUG_INC_STUFFS				\
+	incq	%gs:__x86_stuffs_count;
+# define CALL_THUNKS_DEBUG_INC_CTXSW				\
+	incq	%gs:__x86_ctxsw_count;
+#else
+# define CALL_THUNKS_DEBUG_INC_CALLS
+# define CALL_THUNKS_DEBUG_INC_RETS
+# define CALL_THUNKS_DEBUG_INC_STUFFS
+# define CALL_THUNKS_DEBUG_INC_CTXSW
+#endif
+
 #if defined(CONFIG_CALL_DEPTH_TRACKING) && !defined(COMPILE_OFFSETS)
 
 #include <asm/asm-offsets.h>
@@ -75,18 +91,23 @@
 #define RESET_CALL_DEPTH_FROM_CALL				\
 	mov	$0xfc, %rax;					\
 	shl	$56, %rax;					\
-	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+	movq	%rax, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
+	CALL_THUNKS_DEBUG_INC_CALLS
 
 #define INCREMENT_CALL_DEPTH					\
-	sarq	$5, %gs:pcpu_hot + X86_call_depth;
+	sarq	$5, %gs:pcpu_hot + X86_call_depth;		\
+	CALL_THUNKS_DEBUG_INC_CALLS
 
 #define ASM_INCREMENT_CALL_DEPTH				\
-	sarq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth);
+	sarq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth);	\
+	CALL_THUNKS_DEBUG_INC_CALLS
 
 #else
 #define CREDIT_CALL_DEPTH
+#define ASM_CREDIT_CALL_DEPTH
 #define RESET_CALL_DEPTH
 #define INCREMENT_CALL_DEPTH
+#define ASM_INCREMENT_CALL_DEPTH
 #define RESET_CALL_DEPTH_FROM_CALL
 #endif
 
@@ -137,7 +158,8 @@
 	jnz	771b;					\
 	/* barrier for jnz misprediction */		\
 	lfence;						\
-	ASM_CREDIT_CALL_DEPTH
+	ASM_CREDIT_CALL_DEPTH				\
+	CALL_THUNKS_DEBUG_INC_CTXSW
 #else
 /*
  * i386 doesn't unconditionally have LFENCE, as such it can't
@@ -321,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
 {
 	x86_return_thunk = &__x86_return_skl;
 }
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DECLARE_PER_CPU(u64, __x86_call_count);
+DECLARE_PER_CPU(u64, __x86_ret_count);
+DECLARE_PER_CPU(u64, __x86_stuffs_count);
+DECLARE_PER_CPU(u64, __x86_ctxsw_count);
+#endif
 #else
 static inline void x86_set_skl_return_thunk(void) {}
 #endif
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 01f6f6b5a93cd..dfe7ffff88b93 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -2,6 +2,7 @@
 
 #define pr_fmt(fmt) "callthunks: " fmt
 
+#include <linux/debugfs.h>
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/moduleloader.h>
@@ -35,6 +36,15 @@ static int __init debug_thunks(char *str)
 }
 __setup("debug-callthunks", debug_thunks);
 
+#ifdef CONFIG_CALL_THUNKS_DEBUG
+DEFINE_PER_CPU(u64, __x86_call_count);
+DEFINE_PER_CPU(u64, __x86_ret_count);
+DEFINE_PER_CPU(u64, __x86_stuffs_count);
+DEFINE_PER_CPU(u64, __x86_ctxsw_count);
+EXPORT_SYMBOL_GPL(__x86_ctxsw_count);
+EXPORT_SYMBOL_GPL(__x86_call_count);
+#endif
+
 extern s32 __call_sites[], __call_sites_end[];
 
 struct thunk_desc {
@@ -283,3 +293,46 @@ void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
 	mutex_unlock(&text_mutex);
 }
 #endif /* CONFIG_MODULES */
+
+#if defined(CONFIG_CALL_THUNKS_DEBUG) && defined(CONFIG_DEBUG_FS)
+static int callthunks_debug_show(struct seq_file *m, void *p)
+{
+	unsigned long cpu = (unsigned long)m->private;
+
+	seq_printf(m, "C: %16llu R: %16llu S: %16llu X: %16llu\n,",
+		   per_cpu(__x86_call_count, cpu),
+		   per_cpu(__x86_ret_count, cpu),
+		   per_cpu(__x86_stuffs_count, cpu),
+		   per_cpu(__x86_ctxsw_count, cpu));
+	return 0;
+}
+
+static int callthunks_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, callthunks_debug_show, inode->i_private);
+}
+
+static const struct file_operations dfs_ops = {
+	.open		= callthunks_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int __init callthunks_debugfs_init(void)
+{
+	struct dentry *dir;
+	unsigned long cpu;
+
+	dir = debugfs_create_dir("callthunks", NULL);
+	for_each_possible_cpu(cpu) {
+		void *arg = (void *)cpu;
+		char name [10];
+
+		sprintf(name, "cpu%lu", cpu);
+		debugfs_create_file(name, 0644, dir, arg, &dfs_ops);
+	}
+	return 0;
+}
+__initcall(callthunks_debugfs_init);
+#endif
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index e00206077ae91..5f61c65322bea 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -203,13 +203,18 @@ EXPORT_SYMBOL(__x86_return_thunk)
 	.align 64
 SYM_FUNC_START(__x86_return_skl)
 	ANNOTATE_NOENDBR
-	/* Keep the hotpath in a 16byte I-fetch */
+	/*
+	 * Keep the hotpath in a 16byte I-fetch for the non-debug
+	 * case.
+	 */
+	CALL_THUNKS_DEBUG_INC_RETS
 	shlq	$5, PER_CPU_VAR(pcpu_hot + X86_call_depth)
 	jz	1f
 	ANNOTATE_UNRET_SAFE
 	ret
 	int3
 1:
+	CALL_THUNKS_DEBUG_INC_STUFFS
 	.rept	16
 	ANNOTATE_INTRA_FUNCTION_CALL
 	call	2f
-- 
GitLab


From 7825451fa4dc04660f1f53d236e4302161d0ebd1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:31 +0200
Subject: [PATCH 0137/4122] static_call: Add call depth tracking support

When indirect calls are switched to direct calls then it has to be ensured
that the call target is not the function, but the call thunk when call
depth tracking is enabled. But static calls are available before call
thunks have been set up.

Ensure a second run through the static call patching code after call thunks
have been created. When call thunks are not enabled this has no side
effects.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.306100465@infradead.org
---
 arch/x86/include/asm/alternative.h |  5 +++++
 arch/x86/kernel/callthunks.c       | 18 ++++++++++++++++++
 arch/x86/kernel/static_call.c      |  1 +
 include/linux/static_call.h        |  2 ++
 kernel/static_call_inline.c        | 23 ++++++++++++++++++-----
 5 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4c416b21bac80..07ac25793a3f4 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -91,11 +91,16 @@ struct callthunk_sites {
 extern void callthunks_patch_builtin_calls(void);
 extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
 					  struct module *mod);
+extern void *callthunks_translate_call_dest(void *dest);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
 static __always_inline void
 callthunks_patch_module_calls(struct callthunk_sites *sites,
 			      struct module *mod) {}
+static __always_inline void *callthunks_translate_call_dest(void *dest)
+{
+	return dest;
+}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index dfe7ffff88b93..071003605a865 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -6,6 +6,7 @@
 #include <linux/kallsyms.h>
 #include <linux/memory.h>
 #include <linux/moduleloader.h>
+#include <linux/static_call.h>
 
 #include <asm/alternative.h>
 #include <asm/asm-offsets.h>
@@ -271,10 +272,27 @@ void __init callthunks_patch_builtin_calls(void)
 	pr_info("Setting up call depth tracking\n");
 	mutex_lock(&text_mutex);
 	callthunks_setup(&cs, &builtin_coretext);
+	static_call_force_reinit();
 	thunks_initialized = true;
 	mutex_unlock(&text_mutex);
 }
 
+void *callthunks_translate_call_dest(void *dest)
+{
+	void *target;
+
+	lockdep_assert_held(&text_mutex);
+
+	if (!thunks_initialized || skip_addr(dest))
+		return dest;
+
+	if (!is_coretext(NULL, dest))
+		return dest;
+
+	target = patch_dest(dest, false);
+	return target ? : dest;
+}
+
 #ifdef CONFIG_MODULES
 void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
 					    struct module *mod)
diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c
index 5d3844a98373e..2ebc338980bcd 100644
--- a/arch/x86/kernel/static_call.c
+++ b/arch/x86/kernel/static_call.c
@@ -34,6 +34,7 @@ static void __ref __static_call_transform(void *insn, enum insn_type type,
 
 	switch (type) {
 	case CALL:
+		func = callthunks_translate_call_dest(func);
 		code = text_gen_insn(CALL_INSN_OPCODE, insn, func);
 		if (func == &__static_call_return0) {
 			emulate = code;
diff --git a/include/linux/static_call.h b/include/linux/static_call.h
index df53bed9d71f1..141e6b176a1b3 100644
--- a/include/linux/static_call.h
+++ b/include/linux/static_call.h
@@ -162,6 +162,8 @@ extern void arch_static_call_transform(void *site, void *tramp, void *func, bool
 
 extern int __init static_call_init(void);
 
+extern void static_call_force_reinit(void);
+
 struct static_call_mod {
 	struct static_call_mod *next;
 	struct module *mod; /* for vmlinux, mod == NULL */
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index dc5665b628140..639397b5491ca 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -15,7 +15,18 @@ extern struct static_call_site __start_static_call_sites[],
 extern struct static_call_tramp_key __start_static_call_tramp_key[],
 				    __stop_static_call_tramp_key[];
 
-static bool static_call_initialized;
+static int static_call_initialized;
+
+/*
+ * Must be called before early_initcall() to be effective.
+ */
+void static_call_force_reinit(void)
+{
+	if (WARN_ON_ONCE(!static_call_initialized))
+		return;
+
+	static_call_initialized++;
+}
 
 /* mutex to protect key modules/sites */
 static DEFINE_MUTEX(static_call_mutex);
@@ -475,7 +486,8 @@ int __init static_call_init(void)
 {
 	int ret;
 
-	if (static_call_initialized)
+	/* See static_call_force_reinit(). */
+	if (static_call_initialized == 1)
 		return 0;
 
 	cpus_read_lock();
@@ -490,11 +502,12 @@ int __init static_call_init(void)
 		BUG();
 	}
 
-	static_call_initialized = true;
-
 #ifdef CONFIG_MODULES
-	register_module_notifier(&static_call_module_nb);
+	if (!static_call_initialized)
+		register_module_notifier(&static_call_module_nb);
 #endif
+
+	static_call_initialized = 1;
 	return 0;
 }
 early_initcall(static_call_init);
-- 
GitLab


From f1389181622a08d6f1c71407a64df36b809d632b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:32 +0200
Subject: [PATCH 0138/4122] kallsyms: Take callthunks into account

Since the pre-symbol function padding is an integral part of the
symbol make kallsyms report it as part of the symbol by reporting it
as sym-x instead of prev_sym+y.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.409656012@infradead.org
---
 kernel/kallsyms.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 60c20f301a6ba..cc244c02b4cf6 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -293,6 +293,12 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	return low;
 }
 
+#ifdef CONFIG_FUNCTION_PADDING_BYTES
+#define PADDING_BYTES	CONFIG_FUNCTION_PADDING_BYTES
+#else
+#define PADDING_BYTES	0
+#endif
+
 /*
  * Lookup an address but don't bother to find any names.
  */
@@ -300,13 +306,25 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
 				unsigned long *offset)
 {
 	char namebuf[KSYM_NAME_LEN];
+	int ret;
+
+	addr += PADDING_BYTES;
 
 	if (is_ksym_addr(addr)) {
 		get_symbol_pos(addr, symbolsize, offset);
-		return 1;
+		ret = 1;
+		goto found;
+	}
+
+	ret = !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf);
+	if (!ret) {
+		ret = !!__bpf_address_lookup(addr, symbolsize,
+					     offset, namebuf);
 	}
-	return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
-	       !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
+found:
+	if (ret && offset)
+		*offset -= PADDING_BYTES;
+	return ret;
 }
 
 static const char *kallsyms_lookup_buildid(unsigned long addr,
@@ -319,6 +337,8 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
 	namebuf[KSYM_NAME_LEN - 1] = 0;
 	namebuf[0] = 0;
 
+	addr += PADDING_BYTES;
+
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -348,6 +368,8 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
 
 found:
 	cleanup_symbol_name(namebuf);
+	if (ret && offset)
+		*offset -= PADDING_BYTES;
 	return ret;
 }
 
@@ -374,6 +396,8 @@ int lookup_symbol_name(unsigned long addr, char *symname)
 	symname[0] = '\0';
 	symname[KSYM_NAME_LEN - 1] = '\0';
 
+	addr += PADDING_BYTES;
+
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -401,6 +425,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 	name[0] = '\0';
 	name[KSYM_NAME_LEN - 1] = '\0';
 
+	addr += PADDING_BYTES;
+
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -417,6 +443,8 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 		return res;
 
 found:
+	if (offset)
+		*offset -= PADDING_BYTES;
 	cleanup_symbol_name(name);
 	return 0;
 }
@@ -442,8 +470,15 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 	len = strlen(buffer);
 	offset -= symbol_offset;
 
-	if (add_offset)
-		len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
+	if (add_offset) {
+		char s = '+';
+
+		if ((long)offset < 0) {
+			s = '-';
+			offset = 0UL - offset;
+		}
+		len += sprintf(buffer + len, "%c%#lx/%#lx", s, offset, size);
+	}
 
 	if (modname) {
 		len += sprintf(buffer + len, " [%s", modname);
-- 
GitLab


From 396e0b8e09e86440c2119d12c2101110d3cd5bf9 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:33 +0200
Subject: [PATCH 0139/4122] x86/orc: Make it callthunk aware

Callthunks addresses on the stack would confuse the ORC unwinder. Handle
them correctly and tell ORC to proceed further down the stack.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.511637628@infradead.org
---
 arch/x86/include/asm/alternative.h |  5 +++++
 arch/x86/kernel/callthunks.c       | 13 +++++++++++++
 arch/x86/kernel/unwind_orc.c       | 21 ++++++++++++++++++++-
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 07ac25793a3f4..4b8cd256c95ec 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -92,6 +92,7 @@ extern void callthunks_patch_builtin_calls(void);
 extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
 					  struct module *mod);
 extern void *callthunks_translate_call_dest(void *dest);
+extern bool is_callthunk(void *addr);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
 static __always_inline void
@@ -101,6 +102,10 @@ static __always_inline void *callthunks_translate_call_dest(void *dest)
 {
 	return dest;
 }
+static __always_inline bool is_callthunk(void *addr)
+{
+	return false;
+}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 071003605a865..7f9788194eb5a 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -293,6 +293,19 @@ void *callthunks_translate_call_dest(void *dest)
 	return target ? : dest;
 }
 
+bool is_callthunk(void *addr)
+{
+	unsigned int tmpl_size = SKL_TMPL_SIZE;
+	void *tmpl = skl_call_thunk_template;
+	unsigned long dest;
+
+	dest = roundup((unsigned long)addr, CONFIG_FUNCTION_ALIGNMENT);
+	if (!thunks_initialized || skip_addr((void *)dest))
+		return false;
+
+	return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
+}
+
 #ifdef CONFIG_MODULES
 void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
 					    struct module *mod)
diff --git a/arch/x86/kernel/unwind_orc.c b/arch/x86/kernel/unwind_orc.c
index 0ea57da929407..cfac2b54b37bc 100644
--- a/arch/x86/kernel/unwind_orc.c
+++ b/arch/x86/kernel/unwind_orc.c
@@ -136,6 +136,21 @@ static struct orc_entry null_orc_entry = {
 	.type = UNWIND_HINT_TYPE_CALL
 };
 
+#ifdef CONFIG_CALL_THUNKS
+static struct orc_entry *orc_callthunk_find(unsigned long ip)
+{
+	if (!is_callthunk((void *)ip))
+		return NULL;
+
+	return &null_orc_entry;
+}
+#else
+static struct orc_entry *orc_callthunk_find(unsigned long ip)
+{
+	return NULL;
+}
+#endif
+
 /* Fake frame pointer entry -- used as a fallback for generated code */
 static struct orc_entry orc_fp_entry = {
 	.type		= UNWIND_HINT_TYPE_CALL,
@@ -189,7 +204,11 @@ static struct orc_entry *orc_find(unsigned long ip)
 	if (orc)
 		return orc;
 
-	return orc_ftrace_find(ip);
+	orc =  orc_ftrace_find(ip);
+	if (orc)
+		return orc;
+
+	return orc_callthunk_find(ip);
 }
 
 #ifdef CONFIG_MODULES
-- 
GitLab


From b2e9dfe54be4d023124d588d6f03d16a9c0d2507 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:34 +0200
Subject: [PATCH 0140/4122] x86/bpf: Emit call depth accounting if required

Ensure that calls in BPF jitted programs are emitting call depth accounting
when enabled to keep the call/return balanced. The return thunk jump is
already injected due to the earlier retbleed mitigations.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.615413406@infradead.org
---
 arch/x86/include/asm/alternative.h |  6 ++++++
 arch/x86/kernel/callthunks.c       | 19 ++++++++++++++++++
 arch/x86/net/bpf_jit_comp.c        | 32 +++++++++++++++++++++---------
 3 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 4b8cd256c95ec..664c0779375c1 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -93,6 +93,7 @@ extern void callthunks_patch_module_calls(struct callthunk_sites *sites,
 					  struct module *mod);
 extern void *callthunks_translate_call_dest(void *dest);
 extern bool is_callthunk(void *addr);
+extern int x86_call_depth_emit_accounting(u8 **pprog, void *func);
 #else
 static __always_inline void callthunks_patch_builtin_calls(void) {}
 static __always_inline void
@@ -106,6 +107,11 @@ static __always_inline bool is_callthunk(void *addr)
 {
 	return false;
 }
+static __always_inline int x86_call_depth_emit_accounting(u8 **pprog,
+							  void *func)
+{
+	return 0;
+}
 #endif
 
 #ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index 7f9788194eb5a..a03d646b5e692 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -306,6 +306,25 @@ bool is_callthunk(void *addr)
 	return !bcmp((void *)(dest - tmpl_size), tmpl, tmpl_size);
 }
 
+#ifdef CONFIG_BPF_JIT
+int x86_call_depth_emit_accounting(u8 **pprog, void *func)
+{
+	unsigned int tmpl_size = SKL_TMPL_SIZE;
+	void *tmpl = skl_call_thunk_template;
+
+	if (!thunks_initialized)
+		return 0;
+
+	/* Is function call target a thunk? */
+	if (is_callthunk(func))
+		return 0;
+
+	memcpy(*pprog, tmpl, tmpl_size);
+	*pprog += tmpl_size;
+	return tmpl_size;
+}
+#endif
+
 #ifdef CONFIG_MODULES
 void noinline callthunks_patch_module_calls(struct callthunk_sites *cs,
 					    struct module *mod)
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index ad8cb7f15ab8a..a6b46740ea300 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -340,6 +340,13 @@ static int emit_call(u8 **pprog, void *func, void *ip)
 	return emit_patch(pprog, func, ip, 0xE8);
 }
 
+static int emit_rsb_call(u8 **pprog, void *func, void *ip)
+{
+	OPTIMIZER_HIDE_VAR(func);
+	x86_call_depth_emit_accounting(pprog, func);
+	return emit_patch(pprog, func, ip, 0xE8);
+}
+
 static int emit_jump(u8 **pprog, void *func, void *ip)
 {
 	return emit_patch(pprog, func, ip, 0xE9);
@@ -1436,19 +1443,26 @@ st:			if (is_imm8(insn->off))
 			break;
 
 			/* call */
-		case BPF_JMP | BPF_CALL:
+		case BPF_JMP | BPF_CALL: {
+			int offs;
+
 			func = (u8 *) __bpf_call_base + imm32;
 			if (tail_call_reachable) {
 				/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
 				EMIT3_off32(0x48, 0x8B, 0x85,
 					    -round_up(bpf_prog->aux->stack_depth, 8) - 8);
-				if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
+				if (!imm32)
 					return -EINVAL;
+				offs = 7 + x86_call_depth_emit_accounting(&prog, func);
 			} else {
-				if (!imm32 || emit_call(&prog, func, image + addrs[i - 1]))
+				if (!imm32)
 					return -EINVAL;
+				offs = x86_call_depth_emit_accounting(&prog, func);
 			}
+			if (emit_call(&prog, func, image + addrs[i - 1] + offs))
+				return -EINVAL;
 			break;
+		}
 
 		case BPF_JMP | BPF_TAIL_CALL:
 			if (imm32)
@@ -1854,7 +1868,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	/* arg2: lea rsi, [rbp - ctx_cookie_off] */
 	EMIT4(0x48, 0x8D, 0x75, -run_ctx_off);
 
-	if (emit_call(&prog, enter, prog))
+	if (emit_rsb_call(&prog, enter, prog))
 		return -EINVAL;
 	/* remember prog start time returned by __bpf_prog_enter */
 	emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0);
@@ -1875,7 +1889,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 			       (long) p->insnsi >> 32,
 			       (u32) (long) p->insnsi);
 	/* call JITed bpf program or interpreter */
-	if (emit_call(&prog, p->bpf_func, prog))
+	if (emit_rsb_call(&prog, p->bpf_func, prog))
 		return -EINVAL;
 
 	/*
@@ -1899,7 +1913,7 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
 	emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
 	/* arg3: lea rdx, [rbp - run_ctx_off] */
 	EMIT4(0x48, 0x8D, 0x55, -run_ctx_off);
-	if (emit_call(&prog, exit, prog))
+	if (emit_rsb_call(&prog, exit, prog))
 		return -EINVAL;
 
 	*pprog = prog;
@@ -2147,7 +2161,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	if (flags & BPF_TRAMP_F_CALL_ORIG) {
 		/* arg1: mov rdi, im */
 		emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
-		if (emit_call(&prog, __bpf_tramp_enter, prog)) {
+		if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) {
 			ret = -EINVAL;
 			goto cleanup;
 		}
@@ -2179,7 +2193,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 			EMIT2(0xff, 0xd0); /* call *rax */
 		} else {
 			/* call original function */
-			if (emit_call(&prog, orig_call, prog)) {
+			if (emit_rsb_call(&prog, orig_call, prog)) {
 				ret = -EINVAL;
 				goto cleanup;
 			}
@@ -2223,7 +2237,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 		im->ip_epilogue = prog;
 		/* arg1: mov rdi, im */
 		emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
-		if (emit_call(&prog, __bpf_tramp_exit, prog)) {
+		if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) {
 			ret = -EINVAL;
 			goto cleanup;
 		}
-- 
GitLab


From eac828eaef295cd0cc8b58f55fa5c8401fdc2370 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:35 +0200
Subject: [PATCH 0141/4122] x86/ftrace: Remove ftrace_epilogue()

Remove the weird jumps to RET and simply use RET.

This then promotes ftrace_stub() to a real function; which becomes
important for kcfi.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.719080593@infradead.org
---
 arch/x86/kernel/ftrace_64.S | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index dfeb227de5617..a90c55a6b4817 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -172,20 +172,14 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	 */
 SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
-
-	jmp ftrace_epilogue
+	RET
 SYM_FUNC_END(ftrace_caller);
 STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
 
-SYM_FUNC_START(ftrace_epilogue)
-/*
- * This is weak to keep gas from relaxing the jumps.
- */
-SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+SYM_FUNC_START(ftrace_stub)
 	UNWIND_HINT_FUNC
-	ENDBR
 	RET
-SYM_FUNC_END(ftrace_epilogue)
+SYM_FUNC_END(ftrace_stub)
 
 SYM_FUNC_START(ftrace_regs_caller)
 	/* Save the current flags before any operations that can change them */
@@ -262,14 +256,11 @@ SYM_INNER_LABEL(ftrace_regs_caller_jmp, SYM_L_GLOBAL)
 	popfq
 
 	/*
-	 * As this jmp to ftrace_epilogue can be a short jump
-	 * it must not be copied into the trampoline.
-	 * The trampoline will add the code to jump
-	 * to the return.
+	 * The trampoline will add the return.
 	 */
 SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
-	jmp ftrace_epilogue
+	RET
 
 	/* Swap the flags with orig_rax */
 1:	movq MCOUNT_REG_SIZE(%rsp), %rdi
@@ -280,7 +271,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
 	/* Restore flags */
 	popfq
 	UNWIND_HINT_FUNC
-	jmp	ftrace_epilogue
+	RET
 
 SYM_FUNC_END(ftrace_regs_caller)
 STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
-- 
GitLab


From 36b64f101219dd9e6e4f0ea880b64e8a90da547b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:36 +0200
Subject: [PATCH 0142/4122] x86/ftrace: Rebalance RSB

ftrace_regs_caller() uses a PUSH;RET pattern to tail-call into a
direct-call function, this unbalances the RSB, fix that.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.823216933@infradead.org
---
 arch/x86/kernel/ftrace_64.S | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index a90c55a6b4817..b5b54f58957e7 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -271,6 +271,17 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
 	/* Restore flags */
 	popfq
 	UNWIND_HINT_FUNC
+
+	/*
+	 * The above left an extra return value on the stack; effectively
+	 * doing a tail-call without using a register. This PUSH;RET
+	 * pattern unbalances the RSB, inject a pointless CALL to rebalance.
+	 */
+	ANNOTATE_INTRA_FUNCTION_CALL
+	CALL .Ldo_rebalance
+	int3
+.Ldo_rebalance:
+	add $8, %rsp
 	RET
 
 SYM_FUNC_END(ftrace_regs_caller)
-- 
GitLab


From ee3e2469b3463d28ca4cde20e0283319ac6a562d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 15 Sep 2022 13:11:37 +0200
Subject: [PATCH 0143/4122] x86/ftrace: Make it call depth tracking aware

Since ftrace has trampolines, don't use thunks for the __fentry__ site
but instead require that every function called from there includes
accounting. This very much includes all the direct-call functions.

Additionally, ftrace uses ROP tricks in two places:

 - return_to_handler(), and
 - ftrace_regs_caller() when pt_regs->orig_ax is set by a direct-call.

return_to_handler() already uses a retpoline to replace an
indirect-jump to defeat IBT, since this is a jump-type retpoline, make
sure there is no accounting done and ALTERNATIVE the RET into a ret.

ftrace_regs_caller() does much the same and gets the same treatment.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111148.927545073@infradead.org
---
 arch/x86/include/asm/nospec-branch.h        |  9 +++++++++
 arch/x86/kernel/callthunks.c                |  2 +-
 arch/x86/kernel/ftrace.c                    | 16 +++++++++++----
 arch/x86/kernel/ftrace_64.S                 | 22 +++++++++++++++++++--
 arch/x86/net/bpf_jit_comp.c                 |  6 ++++++
 kernel/trace/trace_selftest.c               |  9 ++++++++-
 samples/ftrace/ftrace-direct-modify.c       |  3 +++
 samples/ftrace/ftrace-direct-multi-modify.c |  3 +++
 samples/ftrace/ftrace-direct-multi.c        |  2 ++
 samples/ftrace/ftrace-direct-too.c          |  2 ++
 samples/ftrace/ftrace-direct.c              |  2 ++
 11 files changed, 68 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 4771147c7c5a6..82580adbca4b2 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -343,6 +343,12 @@ static inline void x86_set_skl_return_thunk(void)
 {
 	x86_return_thunk = &__x86_return_skl;
 }
+
+#define CALL_DEPTH_ACCOUNT					\
+	ALTERNATIVE("",						\
+		    __stringify(INCREMENT_CALL_DEPTH),		\
+		    X86_FEATURE_CALL_DEPTH)
+
 #ifdef CONFIG_CALL_THUNKS_DEBUG
 DECLARE_PER_CPU(u64, __x86_call_count);
 DECLARE_PER_CPU(u64, __x86_ret_count);
@@ -351,6 +357,9 @@ DECLARE_PER_CPU(u64, __x86_ctxsw_count);
 #endif
 #else
 static inline void x86_set_skl_return_thunk(void) {}
+
+#define CALL_DEPTH_ACCOUNT ""
+
 #endif
 
 #ifdef CONFIG_RETPOLINE
diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c
index a03d646b5e692..7d2c75ec9a8cd 100644
--- a/arch/x86/kernel/callthunks.c
+++ b/arch/x86/kernel/callthunks.c
@@ -316,7 +316,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func)
 		return 0;
 
 	/* Is function call target a thunk? */
-	if (is_callthunk(func))
+	if (func && is_callthunk(func))
 		return 0;
 
 	memcpy(*pprog, tmpl, tmpl_size);
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4ac6692d5ef82..cf15ef5aecff2 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -69,6 +69,10 @@ static const char *ftrace_nop_replace(void)
 
 static const char *ftrace_call_replace(unsigned long ip, unsigned long addr)
 {
+	/*
+	 * No need to translate into a callthunk. The trampoline does
+	 * the depth accounting itself.
+	 */
 	return text_gen_insn(CALL_INSN_OPCODE, (void *)ip, (void *)addr);
 }
 
@@ -317,7 +321,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	unsigned long size;
 	unsigned long *ptr;
 	void *trampoline;
-	void *ip;
+	void *ip, *dest;
 	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
 	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
 	unsigned const char retq[] = { RET_INSN_OPCODE, INT3_INSN_OPCODE };
@@ -404,10 +408,14 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 	/* put in the call to the function */
 	mutex_lock(&text_mutex);
 	call_offset -= start_offset;
+	/*
+	 * No need to translate into a callthunk. The trampoline does
+	 * the depth accounting before the call already.
+	 */
+	dest = ftrace_ops_get_func(ops);
 	memcpy(trampoline + call_offset,
-	       text_gen_insn(CALL_INSN_OPCODE,
-			     trampoline + call_offset,
-			     ftrace_ops_get_func(ops)), CALL_INSN_SIZE);
+	       text_gen_insn(CALL_INSN_OPCODE, trampoline + call_offset, dest),
+	       CALL_INSN_SIZE);
 	mutex_unlock(&text_mutex);
 
 	/* ALLOC_TRAMP flags lets us know we created it */
diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S
index b5b54f58957e7..6a7e6d666a12b 100644
--- a/arch/x86/kernel/ftrace_64.S
+++ b/arch/x86/kernel/ftrace_64.S
@@ -4,6 +4,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/asm-offsets.h>
 #include <asm/ptrace.h>
 #include <asm/ftrace.h>
 #include <asm/export.h>
@@ -132,6 +133,7 @@
 #ifdef CONFIG_DYNAMIC_FTRACE
 
 SYM_FUNC_START(__fentry__)
+	CALL_DEPTH_ACCOUNT
 	RET
 SYM_FUNC_END(__fentry__)
 EXPORT_SYMBOL(__fentry__)
@@ -140,6 +142,8 @@ SYM_FUNC_START(ftrace_caller)
 	/* save_mcount_regs fills in first two parameters */
 	save_mcount_regs
 
+	CALL_DEPTH_ACCOUNT
+
 	/* Stack - skipping return address of ftrace_caller */
 	leaq MCOUNT_REG_SIZE+8(%rsp), %rcx
 	movq %rcx, RSP(%rsp)
@@ -155,6 +159,9 @@ SYM_INNER_LABEL(ftrace_caller_op_ptr, SYM_L_GLOBAL)
 	/* Only ops with REGS flag set should have CS register set */
 	movq $0, CS(%rsp)
 
+	/* Account for the function call below */
+	CALL_DEPTH_ACCOUNT
+
 SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
 	call ftrace_stub
@@ -189,6 +196,8 @@ SYM_FUNC_START(ftrace_regs_caller)
 	save_mcount_regs 8
 	/* save_mcount_regs fills in first two parameters */
 
+	CALL_DEPTH_ACCOUNT
+
 SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
 	/* Load the ftrace_ops into the 3rd parameter */
@@ -219,6 +228,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_op_ptr, SYM_L_GLOBAL)
 	/* regs go into 4th parameter */
 	leaq (%rsp), %rcx
 
+	/* Account for the function call below */
+	CALL_DEPTH_ACCOUNT
+
 SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL)
 	ANNOTATE_NOENDBR
 	call ftrace_stub
@@ -282,7 +294,9 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
 	int3
 .Ldo_rebalance:
 	add $8, %rsp
-	RET
+	ALTERNATIVE __stringify(RET), \
+		    __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
+		    X86_FEATURE_CALL_DEPTH
 
 SYM_FUNC_END(ftrace_regs_caller)
 STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
@@ -291,6 +305,8 @@ STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
 #else /* ! CONFIG_DYNAMIC_FTRACE */
 
 SYM_FUNC_START(__fentry__)
+	CALL_DEPTH_ACCOUNT
+
 	cmpq $ftrace_stub, ftrace_trace_function
 	jnz trace
 
@@ -347,6 +363,8 @@ SYM_CODE_START(return_to_handler)
 	int3
 .Ldo_rop:
 	mov %rdi, (%rsp)
-	RET
+	ALTERNATIVE __stringify(RET), \
+		    __stringify(ANNOTATE_UNRET_SAFE; ret; int3), \
+		    X86_FEATURE_CALL_DEPTH
 SYM_CODE_END(return_to_handler)
 #endif
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a6b46740ea300..f46b62029d91e 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -12,6 +12,7 @@
 #include <linux/memory.h>
 #include <linux/sort.h>
 #include <asm/extable.h>
+#include <asm/ftrace.h>
 #include <asm/set_memory.h>
 #include <asm/nospec-branch.h>
 #include <asm/text-patching.h>
@@ -2135,6 +2136,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
 	prog = image;
 
 	EMIT_ENDBR();
+	/*
+	 * This is the direct-call trampoline, as such it needs accounting
+	 * for the __fentry__ call.
+	 */
+	x86_call_depth_emit_accounting(&prog, NULL);
 	EMIT1(0x55);		 /* push rbp */
 	EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
 	EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index a2d301f58ceda..ff0536cea9682 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -785,7 +785,14 @@ static struct fgraph_ops fgraph_ops __initdata  = {
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS
-noinline __noclone static void trace_direct_tramp(void) { }
+#ifndef CALL_DEPTH_ACCOUNT
+#define CALL_DEPTH_ACCOUNT ""
+#endif
+
+noinline __noclone static void trace_direct_tramp(void)
+{
+	asm(CALL_DEPTH_ACCOUNT);
+}
 #endif
 
 /*
diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c
index 39146fa83e20b..de5a0f67f320c 100644
--- a/samples/ftrace/ftrace-direct-modify.c
+++ b/samples/ftrace/ftrace-direct-modify.c
@@ -3,6 +3,7 @@
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-branch.h>
 
 extern void my_direct_func1(void);
 extern void my_direct_func2(void);
@@ -34,6 +35,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	call my_direct_func1\n"
 "	leave\n"
 "	.size		my_tramp1, .-my_tramp1\n"
@@ -45,6 +47,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	call my_direct_func2\n"
 "	leave\n"
 	ASM_RET
diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c
index 65aa94d96f4e3..d52370cad0b6e 100644
--- a/samples/ftrace/ftrace-direct-multi-modify.c
+++ b/samples/ftrace/ftrace-direct-multi-modify.c
@@ -3,6 +3,7 @@
 #include <linux/kthread.h>
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-branch.h>
 
 extern void my_direct_func1(unsigned long ip);
 extern void my_direct_func2(unsigned long ip);
@@ -32,6 +33,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	pushq %rdi\n"
 "	movq 8(%rbp), %rdi\n"
 "	call my_direct_func1\n"
@@ -46,6 +48,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	pushq %rdi\n"
 "	movq 8(%rbp), %rdi\n"
 "	call my_direct_func2\n"
diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c
index 41ded7c615c7f..ec1088922517d 100644
--- a/samples/ftrace/ftrace-direct-multi.c
+++ b/samples/ftrace/ftrace-direct-multi.c
@@ -5,6 +5,7 @@
 #include <linux/ftrace.h>
 #include <linux/sched/stat.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-branch.h>
 
 extern void my_direct_func(unsigned long ip);
 
@@ -27,6 +28,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	pushq %rdi\n"
 "	movq 8(%rbp), %rdi\n"
 "	call my_direct_func\n"
diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c
index 6690468c5cc2d..e13fb59a2b47a 100644
--- a/samples/ftrace/ftrace-direct-too.c
+++ b/samples/ftrace/ftrace-direct-too.c
@@ -4,6 +4,7 @@
 #include <linux/mm.h> /* for handle_mm_fault() */
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-branch.h>
 
 extern void my_direct_func(struct vm_area_struct *vma,
 			   unsigned long address, unsigned int flags);
@@ -29,6 +30,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	pushq %rdi\n"
 "	pushq %rsi\n"
 "	pushq %rdx\n"
diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c
index e8f1e440b9b8a..1f769d0db20f3 100644
--- a/samples/ftrace/ftrace-direct.c
+++ b/samples/ftrace/ftrace-direct.c
@@ -4,6 +4,7 @@
 #include <linux/sched.h> /* for wake_up_process() */
 #include <linux/ftrace.h>
 #include <asm/asm-offsets.h>
+#include <asm/nospec-branch.h>
 
 extern void my_direct_func(struct task_struct *p);
 
@@ -26,6 +27,7 @@ asm (
 	ASM_ENDBR
 "	pushq %rbp\n"
 "	movq %rsp, %rbp\n"
+	CALL_DEPTH_ACCOUNT
 "	pushq %rdi\n"
 "	call my_direct_func\n"
 "	popq %rdi\n"
-- 
GitLab


From d82a0345cf218f5050f5ad913e1ae6c579105731 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 15 Sep 2022 13:11:38 +0200
Subject: [PATCH 0144/4122] x86/retbleed: Add call depth tracking mitigation

The fully secure mitigation for RSB underflow on Intel SKL CPUs is IBRS,
which inflicts up to 30% penalty for pathological syscall heavy work loads.

Software based call depth tracking and RSB refill is not perfect, but
reduces the attack surface massively. The penalty for the pathological case
is about 8% which is still annoying but definitely more palatable than IBRS.

Add a retbleed=stuff command line option to enable the call depth tracking
and software refill of the RSB.

This gives admins a choice. IBeeRS are safe and cause headaches, call depth
tracking is considered to be s(t)ufficiently safe.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220915111149.029587352@infradead.org
---
 arch/x86/kernel/cpu/bugs.c | 32 ++++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index da7c361f47e0d..e6c23ead16170 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -787,6 +787,7 @@ enum retbleed_mitigation {
 	RETBLEED_MITIGATION_IBPB,
 	RETBLEED_MITIGATION_IBRS,
 	RETBLEED_MITIGATION_EIBRS,
+	RETBLEED_MITIGATION_STUFF,
 };
 
 enum retbleed_mitigation_cmd {
@@ -794,6 +795,7 @@ enum retbleed_mitigation_cmd {
 	RETBLEED_CMD_AUTO,
 	RETBLEED_CMD_UNRET,
 	RETBLEED_CMD_IBPB,
+	RETBLEED_CMD_STUFF,
 };
 
 static const char * const retbleed_strings[] = {
@@ -802,6 +804,7 @@ static const char * const retbleed_strings[] = {
 	[RETBLEED_MITIGATION_IBPB]	= "Mitigation: IBPB",
 	[RETBLEED_MITIGATION_IBRS]	= "Mitigation: IBRS",
 	[RETBLEED_MITIGATION_EIBRS]	= "Mitigation: Enhanced IBRS",
+	[RETBLEED_MITIGATION_STUFF]	= "Mitigation: Stuffing",
 };
 
 static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
@@ -831,6 +834,8 @@ static int __init retbleed_parse_cmdline(char *str)
 			retbleed_cmd = RETBLEED_CMD_UNRET;
 		} else if (!strcmp(str, "ibpb")) {
 			retbleed_cmd = RETBLEED_CMD_IBPB;
+		} else if (!strcmp(str, "stuff")) {
+			retbleed_cmd = RETBLEED_CMD_STUFF;
 		} else if (!strcmp(str, "nosmt")) {
 			retbleed_nosmt = true;
 		} else {
@@ -879,6 +884,21 @@ static void __init retbleed_select_mitigation(void)
 		}
 		break;
 
+	case RETBLEED_CMD_STUFF:
+		if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING) &&
+		    spectre_v2_enabled == SPECTRE_V2_RETPOLINE) {
+			retbleed_mitigation = RETBLEED_MITIGATION_STUFF;
+
+		} else {
+			if (IS_ENABLED(CONFIG_CALL_DEPTH_TRACKING))
+				pr_err("WARNING: retbleed=stuff depends on spectre_v2=retpoline\n");
+			else
+				pr_err("WARNING: kernel not compiled with CALL_DEPTH_TRACKING.\n");
+
+			goto do_cmd_auto;
+		}
+		break;
+
 do_cmd_auto:
 	case RETBLEED_CMD_AUTO:
 	default:
@@ -916,6 +936,12 @@ do_cmd_auto:
 		mitigate_smt = true;
 		break;
 
+	case RETBLEED_MITIGATION_STUFF:
+		setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+		setup_force_cpu_cap(X86_FEATURE_CALL_DEPTH);
+		x86_set_skl_return_thunk();
+		break;
+
 	default:
 		break;
 	}
@@ -926,7 +952,7 @@ do_cmd_auto:
 
 	/*
 	 * Let IBRS trump all on Intel without affecting the effects of the
-	 * retbleed= cmdline option.
+	 * retbleed= cmdline option except for call depth based stuffing
 	 */
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
 		switch (spectre_v2_enabled) {
@@ -939,7 +965,8 @@ do_cmd_auto:
 			retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
 			break;
 		default:
-			pr_err(RETBLEED_INTEL_MSG);
+			if (retbleed_mitigation != RETBLEED_MITIGATION_STUFF)
+				pr_err(RETBLEED_INTEL_MSG);
 		}
 	}
 
@@ -1413,6 +1440,7 @@ static void __init spectre_v2_select_mitigation(void)
 		if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
 		    boot_cpu_has_bug(X86_BUG_RETBLEED) &&
 		    retbleed_cmd != RETBLEED_CMD_OFF &&
+		    retbleed_cmd != RETBLEED_CMD_STUFF &&
 		    boot_cpu_has(X86_FEATURE_IBRS) &&
 		    boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
 			mode = SPECTRE_V2_IBRS;
-- 
GitLab


From 5c9a92dec3235b0c1d51e92860f8014753161593 Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Mon, 17 Oct 2022 16:41:20 +0200
Subject: [PATCH 0145/4122] x86/bugs: Add retbleed=force

Debug aid, allows running retbleed=force,stuff on non-affected uarchs

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/kernel/cpu/bugs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e6c23ead16170..b307b83e22bef 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -838,6 +838,8 @@ static int __init retbleed_parse_cmdline(char *str)
 			retbleed_cmd = RETBLEED_CMD_STUFF;
 		} else if (!strcmp(str, "nosmt")) {
 			retbleed_nosmt = true;
+		} else if (!strcmp(str, "force")) {
+			setup_force_cpu_bug(X86_BUG_RETBLEED);
 		} else {
 			pr_err("Ignoring unknown retbleed option (%s).", str);
 		}
-- 
GitLab


From 44112922674b94a7d699dfff6307fc830018df7c Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 17 Oct 2022 17:20:28 +0800
Subject: [PATCH 0146/4122] scsi: libsas: Add sas_ata_device_link_abort()

Similar to how AHCI handles NCQ errors in ahci_error_intr() ->
ata_port_abort() -> ata_do_link_abort(), add an NCQ error handler for LLDDs
to call to initiate a link abort.

This will mark all outstanding QCs as failed and kick-off EH.

Note:

A "force reset" argument is added for drivers which require the ATA error
handling to always reset the device.

A driver may require this feature for when SATA device per-SCSI cmnd
resources are only released during reset for ATA EH. As such, we need an
option to force reset to be done, regardless of what any EH autopsy
decides.

The SATA device FIS fields are set to indicate a device error from
ata_eh_analyze_tf().

Suggested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Suggested-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-2-git-send-email-john.garry@huawei.com
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Niklas Cassel <niklas.cassel@wdc.com> # pm80xx
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_ata.c | 15 +++++++++++++++
 include/scsi/sas_ata.h        |  6 ++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index d35c9296f7388..61f64d54e67da 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -861,6 +861,21 @@ void sas_ata_wait_eh(struct domain_device *dev)
 	ata_port_wait_eh(ap);
 }
 
+void sas_ata_device_link_abort(struct domain_device *device, bool force_reset)
+{
+	struct ata_port *ap = device->sata_dev.ap;
+	struct ata_link *link = &ap->link;
+
+	device->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */
+	device->sata_dev.fis[3] = ATA_ABORTED; /* tf error */
+
+	link->eh_info.err_mask |= AC_ERR_DEV;
+	if (force_reset)
+		link->eh_info.action |= ATA_EH_RESET;
+	ata_link_abort(link);
+}
+EXPORT_SYMBOL_GPL(sas_ata_device_link_abort);
+
 int sas_execute_ata_cmd(struct domain_device *device, u8 *fis, int force_phy_id)
 {
 	struct sas_tmf_task tmf_task = {};
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index a1df4f9d57a31..e47f0aec0722f 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -32,6 +32,7 @@ void sas_probe_sata(struct asd_sas_port *port);
 void sas_suspend_sata(struct asd_sas_port *port);
 void sas_resume_sata(struct asd_sas_port *port);
 void sas_ata_end_eh(struct ata_port *ap);
+void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset);
 int sas_execute_ata_cmd(struct domain_device *device, u8 *fis,
 			int force_phy_id);
 int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline);
@@ -87,6 +88,11 @@ static inline void sas_ata_end_eh(struct ata_port *ap)
 {
 }
 
+static inline void sas_ata_device_link_abort(struct domain_device *dev,
+					     bool force_reset)
+{
+}
+
 static inline int sas_execute_ata_cmd(struct domain_device *device, u8 *fis,
 				      int force_phy_id)
 {
-- 
GitLab


From 4b329abc91800d23941ac773e69b322a13981ecb Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui@huawei.com>
Date: Mon, 17 Oct 2022 17:20:29 +0800
Subject: [PATCH 0147/4122] scsi: hisi_sas: Move slot variable definition in
 hisi_sas_abort_task()

Each branch currently defines a slot variable independently, and it is
neater to move it to the function head.

Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-3-git-send-email-john.garry@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 699b07abb6b0b..8303aa5eaf259 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1547,6 +1547,7 @@ static int hisi_sas_abort_task(struct sas_task *task)
 	struct hisi_sas_internal_abort_data internal_abort_data = { false };
 	struct domain_device *device = task->dev;
 	struct hisi_sas_device *sas_dev = device->lldd_dev;
+	struct hisi_sas_slot *slot = task->lldd_task;
 	struct hisi_hba *hisi_hba;
 	struct device *dev;
 	int rc = TMF_RESP_FUNC_FAILED;
@@ -1560,7 +1561,6 @@ static int hisi_sas_abort_task(struct sas_task *task)
 
 	spin_lock_irqsave(&task->task_state_lock, flags);
 	if (task->task_state_flags & SAS_TASK_STATE_DONE) {
-		struct hisi_sas_slot *slot = task->lldd_task;
 		struct hisi_sas_cq *cq;
 
 		if (slot) {
@@ -1578,8 +1578,7 @@ static int hisi_sas_abort_task(struct sas_task *task)
 	task->task_state_flags |= SAS_TASK_STATE_ABORTED;
 	spin_unlock_irqrestore(&task->task_state_lock, flags);
 
-	if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SSP) {
-		struct hisi_sas_slot *slot = task->lldd_task;
+	if (slot && task->task_proto & SAS_PROTOCOL_SSP) {
 		u16 tag = slot->idx;
 		int rc2;
 
@@ -1613,9 +1612,8 @@ static int hisi_sas_abort_task(struct sas_task *task)
 			hisi_sas_dereg_device(hisi_hba, device);
 			rc = hisi_sas_softreset_ata_disk(device);
 		}
-	} else if (task->lldd_task && task->task_proto & SAS_PROTOCOL_SMP) {
+	} else if (slot && task->task_proto & SAS_PROTOCOL_SMP) {
 		/* SMP */
-		struct hisi_sas_slot *slot = task->lldd_task;
 		u32 tag = slot->idx;
 		struct hisi_sas_cq *cq = &hisi_hba->cq[slot->dlvry_queue];
 
-- 
GitLab


From 930d97dabdd56681aef752a35475f0212a171741 Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui@huawei.com>
Date: Mon, 17 Oct 2022 17:20:30 +0800
Subject: [PATCH 0148/4122] scsi: hisi_sas: Add SATA_DISK_ERR bit handling for
 v3 hw

When CQ header dw3 SATA_DISK_ERR is set it means this SATA disk is in error
state and the current IPTT is invalid. An invalid IPTT does not correspond
to any slot.

In this scenario, new I/Os that delivered to disk will be rejected by the
controller and all I/Os remaining in the disk should be aborted, which we
add here with the sas_ata_device_link_abort() call.

In hisi_sas_abort_task() we don't want to issue a soft reset as it may
cause info to be lost in the target disk for the ATA EH autopsy. In this
case, just release resources - the disk won't return other I/Os normally
after NCQ Error, so this is safe.

Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-4-git-send-email-john.garry@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  1 +
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 18 +++++++++-
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 48 ++++++++++++++++++++++++--
 3 files changed, 64 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 9aebf4a26b132..6f8a52a1b8087 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -104,6 +104,7 @@ enum {
 enum dev_status {
 	HISI_SAS_DEV_INIT,
 	HISI_SAS_DEV_NORMAL,
+	HISI_SAS_DEV_NCQ_ERR,
 };
 
 enum {
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 8303aa5eaf259..4c37ae9eb6b60 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1604,13 +1604,26 @@ static int hisi_sas_abort_task(struct sas_task *task)
 	} else if (task->task_proto & SAS_PROTOCOL_SATA ||
 		task->task_proto & SAS_PROTOCOL_STP) {
 		if (task->dev->dev_type == SAS_SATA_DEV) {
+			struct ata_queued_cmd *qc = task->uldd_task;
+
 			rc = hisi_sas_internal_task_abort_dev(sas_dev, false);
 			if (rc < 0) {
 				dev_err(dev, "abort task: internal abort failed\n");
 				goto out;
 			}
 			hisi_sas_dereg_device(hisi_hba, device);
-			rc = hisi_sas_softreset_ata_disk(device);
+
+			/*
+			 * If an ATA internal command times out in ATA EH, it
+			 * need to execute soft reset, so check the scsicmd
+			 */
+			if ((sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR) &&
+			    qc && qc->scsicmd) {
+				hisi_sas_do_release_task(hisi_hba, task, slot);
+				rc = TMF_RESP_FUNC_COMPLETE;
+			} else {
+				rc = hisi_sas_softreset_ata_disk(device);
+			}
 		}
 	} else if (slot && task->task_proto & SAS_PROTOCOL_SMP) {
 		/* SMP */
@@ -1727,6 +1740,9 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device)
 	struct device *dev = hisi_hba->dev;
 	int rc;
 
+	if (sas_dev->dev_status == HISI_SAS_DEV_NCQ_ERR)
+		sas_dev->dev_status = HISI_SAS_DEV_NORMAL;
+
 	rc = hisi_sas_internal_task_abort_dev(sas_dev, false);
 	if (rc < 0) {
 		dev_err(dev, "I_T nexus reset: internal abort (%d)\n", rc);
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index d56b4bfd27678..0ae8a60aaf936 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -404,6 +404,11 @@
 #define CMPLT_HDR_CMPLT_MSK		(0x3 << CMPLT_HDR_CMPLT_OFF)
 #define CMPLT_HDR_ERROR_PHASE_OFF   2
 #define CMPLT_HDR_ERROR_PHASE_MSK   (0xff << CMPLT_HDR_ERROR_PHASE_OFF)
+/* bit[9:2] Error Phase */
+#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF	\
+					8
+#define ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK	\
+	(0x1 << ERR_PHASE_RESPONSE_FRAME_REV_STAGE_OFF)
 #define CMPLT_HDR_RSPNS_XFRD_OFF	10
 #define CMPLT_HDR_RSPNS_XFRD_MSK	(0x1 << CMPLT_HDR_RSPNS_XFRD_OFF)
 #define CMPLT_HDR_RSPNS_GOOD_OFF	11
@@ -423,8 +428,15 @@
 #define CMPLT_HDR_DEV_ID_OFF		16
 #define CMPLT_HDR_DEV_ID_MSK		(0xffff << CMPLT_HDR_DEV_ID_OFF)
 /* dw3 */
+#define CMPLT_HDR_SATA_DISK_ERR_OFF	16
+#define CMPLT_HDR_SATA_DISK_ERR_MSK	(0x1 << CMPLT_HDR_SATA_DISK_ERR_OFF)
 #define CMPLT_HDR_IO_IN_TARGET_OFF	17
 #define CMPLT_HDR_IO_IN_TARGET_MSK	(0x1 << CMPLT_HDR_IO_IN_TARGET_OFF)
+/* bit[23:18] ERR_FIS_ATA_STATUS */
+#define FIS_ATA_STATUS_ERR_OFF		18
+#define FIS_ATA_STATUS_ERR_MSK		(0x1 << FIS_ATA_STATUS_ERR_OFF)
+#define FIS_TYPE_SDB_OFF		31
+#define FIS_TYPE_SDB_MSK		(0x1 << FIS_TYPE_SDB_OFF)
 
 /* ITCT header */
 /* qw0 */
@@ -2148,6 +2160,18 @@ static irqreturn_t fatal_axi_int_v3_hw(int irq_no, void *p)
 	return IRQ_HANDLED;
 }
 
+static bool is_ncq_err_v3_hw(struct hisi_sas_complete_v3_hdr *complete_hdr)
+{
+	u32 dw0, dw3;
+
+	dw0 = le32_to_cpu(complete_hdr->dw0);
+	dw3 = le32_to_cpu(complete_hdr->dw3);
+
+	return (dw0 & ERR_PHASE_RESPONSE_FRAME_REV_STAGE_MSK) &&
+	       (dw3 & FIS_TYPE_SDB_MSK) &&
+	       (dw3 & FIS_ATA_STATUS_ERR_MSK);
+}
+
 static bool
 slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
 	       struct hisi_sas_slot *slot)
@@ -2381,14 +2405,34 @@ static irqreturn_t  cq_thread_v3_hw(int irq_no, void *p)
 	while (rd_point != wr_point) {
 		struct hisi_sas_complete_v3_hdr *complete_hdr;
 		struct device *dev = hisi_hba->dev;
-		u32 dw1;
+		u32 dw0, dw1, dw3;
 		int iptt;
 
 		complete_hdr = &complete_queue[rd_point];
+		dw0 = le32_to_cpu(complete_hdr->dw0);
 		dw1 = le32_to_cpu(complete_hdr->dw1);
+		dw3 = le32_to_cpu(complete_hdr->dw3);
 
 		iptt = dw1 & CMPLT_HDR_IPTT_MSK;
-		if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) {
+		if (unlikely((dw0 & CMPLT_HDR_CMPLT_MSK) == 0x3) &&
+			     (dw3 & CMPLT_HDR_SATA_DISK_ERR_MSK)) {
+			int device_id = (dw1 & CMPLT_HDR_DEV_ID_MSK) >>
+					CMPLT_HDR_DEV_ID_OFF;
+			struct hisi_sas_itct *itct =
+				&hisi_hba->itct[device_id];
+			struct hisi_sas_device *sas_dev =
+				&hisi_hba->devices[device_id];
+			struct domain_device *device = sas_dev->sas_device;
+
+			dev_err(dev, "erroneous completion disk err dev id=%d sas_addr=0x%llx CQ hdr: 0x%x 0x%x 0x%x 0x%x\n",
+				device_id, itct->sas_addr, dw0, dw1,
+				complete_hdr->act, dw3);
+
+			if (is_ncq_err_v3_hw(complete_hdr))
+				sas_dev->dev_status = HISI_SAS_DEV_NCQ_ERR;
+
+			sas_ata_device_link_abort(device, true);
+		} else if (likely(iptt < HISI_SAS_COMMAND_ENTRIES_V3_HW)) {
 			slot = &hisi_hba->slot_info[iptt];
 			slot->cmplt_queue_slot = rd_point;
 			slot->cmplt_queue = queue;
-- 
GitLab


From 4ef4f1a6155571d3d53583a4e8e7ccbbec220b8a Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui@huawei.com>
Date: Mon, 17 Oct 2022 17:20:31 +0800
Subject: [PATCH 0149/4122] scsi: hisi_sas: Modify v3 HW SATA disk error state
 completion processing

When an NCQ error occurs, the controller will abnormally complete the I/Os
that are newly delivered to disk, and bit8 in CQ dw3 will be set which
indicates that the SATA disk is in error state. The current processing flow
is to set ts->stat to SAS_OPEN_REJECT and then sas_ata_task_done() will set
FIS stat to ATA_ERR. After analyzing the I/O by ata_eh_analyze_tf(),
err_mask will set to AC_ERR_HSM. If media error occurs for four times
within 10 minutes and the chip rejects new I/Os for four times, NCQ will be
disabled due to excessive errors, which is undesirable.

Therefore, use sas_task_abort() to handle abnormally completed I/Os when
SATA disk is in error state, as these abnormally completed I/Os are already
processed by sas_ata_device_link_abort() and qc->flag are set to
ATA_QCFLAG_FAILED. If sas_task_abort() is used, qc->err_mask will not be
modified in EH. Unlike the current process flow, it will not increase the
count of ECAT_TOUT_HSM and not turn off NCQ. Like other I/Os on the disk
that do not have an error but do not return after the NCQ error, they are
retried after the EH.

Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-5-git-send-email-john.garry@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 0ae8a60aaf936..0c3fcb8078062 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -428,6 +428,8 @@
 #define CMPLT_HDR_DEV_ID_OFF		16
 #define CMPLT_HDR_DEV_ID_MSK		(0xffff << CMPLT_HDR_DEV_ID_OFF)
 /* dw3 */
+#define SATA_DISK_IN_ERROR_STATUS_OFF	8
+#define SATA_DISK_IN_ERROR_STATUS_MSK	(0x1 << SATA_DISK_IN_ERROR_STATUS_OFF)
 #define CMPLT_HDR_SATA_DISK_ERR_OFF	16
 #define CMPLT_HDR_SATA_DISK_ERR_MSK	(0x1 << CMPLT_HDR_SATA_DISK_ERR_OFF)
 #define CMPLT_HDR_IO_IN_TARGET_OFF	17
@@ -2219,7 +2221,8 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
 		} else if (dma_rx_err_type & RX_DATA_LEN_UNDERFLOW_MSK) {
 			ts->residual = trans_tx_fail_type;
 			ts->stat = SAS_DATA_UNDERRUN;
-		} else if (dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) {
+		} else if ((dw3 & CMPLT_HDR_IO_IN_TARGET_MSK) ||
+			   (dw3 & SATA_DISK_IN_ERROR_STATUS_MSK)) {
 			ts->stat = SAS_PHY_DOWN;
 			slot->abort = 1;
 		} else {
-- 
GitLab


From 0b639decf65160b1afd9993019be37d7869c0340 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 17 Oct 2022 17:20:32 +0800
Subject: [PATCH 0150/4122] scsi: pm8001: Modify task abort handling for SATA
 task

When we try to abort a SATA task, the CCB of the task which we are trying
to avoid may still complete. In this case, we should not touch the task
associated with that CCB as we can race with libsas freeing the last later
in sas_eh_handle_sas_errors() -> sas_eh_finish_cmd() for when
TASK_IS_ABORTED is returned from sas_scsi_find_task()

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-6-git-send-email-john.garry@huawei.com
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Niklas Cassel <niklas.cassel@wdc.com> # pm80xx
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_hwi.c | 15 +++++++++++++--
 drivers/scsi/pm8001/pm8001_sas.c |  8 ++++++++
 drivers/scsi/pm8001/pm80xx_hwi.c | 14 ++++++++++----
 3 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index 628b08ba6770b..c0adc3a9d1967 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -2295,7 +2295,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		if (t->dev && (t->dev->lldd_dev))
 			pm8001_dev = t->dev->lldd_dev;
 	} else {
-		pm8001_dbg(pm8001_ha, FAIL, "task null\n");
+		pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n",
+			   ccb->ccb_tag);
+		pm8001_ccb_free(pm8001_ha, ccb);
 		return;
 	}
 
@@ -2675,8 +2677,17 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	pm8001_dev = ccb->device;
 	if (event)
 		pm8001_dbg(pm8001_ha, FAIL, "sata IO status 0x%x\n", event);
-	if (unlikely(!t || !t->lldd_task || !t->dev))
+
+	if (unlikely(!t)) {
+		pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n",
+			   ccb->ccb_tag);
+		pm8001_ccb_free(pm8001_ha, ccb);
 		return;
+	}
+
+	if (unlikely(!t->lldd_task || !t->dev))
+		return;
+
 	ts = &t->task_status;
 	pm8001_dbg(pm8001_ha, DEVIO,
 		   "port_id:0x%x, device_id:0x%x, tag:0x%x, event:0x%x\n",
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 8e3f2f9ddaacd..d5ec29f69be31 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -983,6 +983,7 @@ int pm8001_query_task(struct sas_task *task)
 /*  mandatory SAM-3, still need free task/ccb info, abort the specified task */
 int pm8001_abort_task(struct sas_task *task)
 {
+	struct pm8001_ccb_info *ccb = task->lldd_task;
 	unsigned long flags;
 	u32 tag;
 	struct domain_device *dev ;
@@ -1113,6 +1114,13 @@ int pm8001_abort_task(struct sas_task *task)
 				pm8001_dev, DS_OPERATIONAL);
 			wait_for_completion(&completion);
 		} else {
+			/*
+			 * Ensure that if we see a completion for the ccb
+			 * associated with the task which we are trying to
+			 * abort then we should not touch the sas_task as it
+			 * may race with libsas freeing it when return here.
+			 */
+			ccb->task = NULL;
 			ret = sas_execute_internal_abort_single(dev, tag, 0, NULL);
 		}
 		rc = TMF_RESP_FUNC_COMPLETE;
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index f8b8624458f73..dd0e06983cd38 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -2396,7 +2396,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
 		if (t->dev && (t->dev->lldd_dev))
 			pm8001_dev = t->dev->lldd_dev;
 	} else {
-		pm8001_dbg(pm8001_ha, FAIL, "task null\n");
+		pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n",
+			   ccb->ccb_tag);
+		pm8001_ccb_free(pm8001_ha, ccb);
 		return;
 	}
 
@@ -2813,12 +2815,16 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
 	ccb = &pm8001_ha->ccb_info[tag];
 	t = ccb->task;
 	pm8001_dev = ccb->device;
-
-	if (unlikely(!t || !t->lldd_task || !t->dev)) {
-		pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n");
+	if (unlikely(!t)) {
+		pm8001_dbg(pm8001_ha, FAIL, "task null, freeing CCB tag %d\n",
+			   ccb->ccb_tag);
+		pm8001_ccb_free(pm8001_ha, ccb);
 		return;
 	}
 
+	if (unlikely(!t->lldd_task || !t->dev))
+		return;
+
 	ts = &t->task_status;
 	pm8001_dbg(pm8001_ha, IOERR, "port_id:0x%x, tag:0x%x, event:0x%x\n",
 		   port_id, tag, event);
-- 
GitLab


From 811be570a9a8df96b4fd43ff00837b947bbaf49b Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 17 Oct 2022 17:20:33 +0800
Subject: [PATCH 0151/4122] scsi: pm8001: Use sas_ata_device_link_abort() to
 handle NCQ errors

In commit c6b9ef5779c3 ("[SCSI] pm80xx: NCQ error handling changes") the
driver had support added to handle NCQ errors but much of what is done in
this handling is duplicated from the libata EH.

In that named commit we handle in 2x main steps:

 a. Issue read log ext10 to examine and clear the errors

 b. Issue SATA_ABORT all command

Indeed, in libata EH, we do similar to above:

 a. ata_do_eh() -> ata_eh_autopsy() -> ata_eh_link_autopsy() ->
    ata_eh_analyze_ncq_error() -> ata_eh_read_log_10h()

 b. ata_do_eh() -> ata_eh_recover() which will issue a device soft reset
    or hard reset

Since there is so much duplication, use sas_ata_device_link_abort() which
will abort all pending IOs and kick of ATA EH which will do the steps,
above.

However we will not follow the advisory to send the SATA_ABORT all command
after the autopsy in read log ext10. Indeed, in libsas EH, we already send
a per-task SATA_ABORT command, and this is prior to the ATA EH kicking in
and issuing the read log ext10 in the recovery process. I judge that this
is ok as the SATA_ABORT command does not actually send any protocol on the
link to abort I/O on the other side, so would not change any state on the
disk (for the read log ext10 command).

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-7-git-send-email-john.garry@huawei.com
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Niklas Cassel <niklas.cassel@wdc.com> # pm80xx
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_hwi.c | 171 +++----------------------------
 drivers/scsi/pm8001/pm8001_sas.c |   6 --
 drivers/scsi/pm8001/pm8001_sas.h |   5 -
 drivers/scsi/pm8001/pm80xx_hwi.c | 163 ++---------------------------
 4 files changed, 19 insertions(+), 326 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_hwi.c b/drivers/scsi/pm8001/pm8001_hwi.c
index c0adc3a9d1967..ec1a9ab618146 100644
--- a/drivers/scsi/pm8001/pm8001_hwi.c
+++ b/drivers/scsi/pm8001/pm8001_hwi.c
@@ -1724,7 +1724,14 @@ void pm8001_work_fn(struct work_struct *work)
 				pm8001_free_dev(pm8001_dev);
 			}
 		}
-	}	break;
+	}
+	break;
+	case IO_XFER_ERROR_ABORTED_NCQ_MODE:
+	{
+		dev = pm8001_dev->sas_device;
+		sas_ata_device_link_abort(dev, false);
+	}
+	break;
 	}
 	kfree(pw);
 }
@@ -1748,110 +1755,6 @@ int pm8001_handle_event(struct pm8001_hba_info *pm8001_ha, void *data,
 	return ret;
 }
 
-static void pm8001_send_abort_all(struct pm8001_hba_info *pm8001_ha,
-		struct pm8001_device *pm8001_ha_dev)
-{
-	struct pm8001_ccb_info *ccb;
-	struct sas_task *task;
-	struct task_abort_req task_abort;
-	u32 opc = OPC_INB_SATA_ABORT;
-	int ret;
-
-	pm8001_ha_dev->id |= NCQ_ABORT_ALL_FLAG;
-	pm8001_ha_dev->id &= ~NCQ_READ_LOG_FLAG;
-
-	task = sas_alloc_slow_task(GFP_ATOMIC);
-	if (!task) {
-		pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n");
-		return;
-	}
-
-	task->task_done = pm8001_task_done;
-
-	ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task);
-	if (!ccb) {
-		sas_free_task(task);
-		return;
-	}
-
-	memset(&task_abort, 0, sizeof(task_abort));
-	task_abort.abort_all = cpu_to_le32(1);
-	task_abort.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
-	task_abort.tag = cpu_to_le32(ccb->ccb_tag);
-
-	ret = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &task_abort,
-				   sizeof(task_abort), 0);
-	if (ret) {
-		sas_free_task(task);
-		pm8001_ccb_free(pm8001_ha, ccb);
-	}
-}
-
-static void pm8001_send_read_log(struct pm8001_hba_info *pm8001_ha,
-		struct pm8001_device *pm8001_ha_dev)
-{
-	struct sata_start_req sata_cmd;
-	int res;
-	struct pm8001_ccb_info *ccb;
-	struct sas_task *task = NULL;
-	struct host_to_dev_fis fis;
-	struct domain_device *dev;
-	u32 opc = OPC_INB_SATA_HOST_OPSTART;
-
-	task = sas_alloc_slow_task(GFP_ATOMIC);
-	if (!task) {
-		pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task !!!\n");
-		return;
-	}
-	task->task_done = pm8001_task_done;
-
-	/*
-	 * Allocate domain device by ourselves as libsas is not going to
-	 * provide any.
-	 */
-	dev = kzalloc(sizeof(struct domain_device), GFP_ATOMIC);
-	if (!dev) {
-		sas_free_task(task);
-		pm8001_dbg(pm8001_ha, FAIL,
-			   "Domain device cannot be allocated\n");
-		return;
-	}
-	task->dev = dev;
-	task->dev->lldd_dev = pm8001_ha_dev;
-
-	ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task);
-	if (!ccb) {
-		sas_free_task(task);
-		kfree(dev);
-		return;
-	}
-
-	pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG;
-	pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG;
-
-	/* construct read log FIS */
-	memset(&fis, 0, sizeof(struct host_to_dev_fis));
-	fis.fis_type = 0x27;
-	fis.flags = 0x80;
-	fis.command = ATA_CMD_READ_LOG_EXT;
-	fis.lbal = 0x10;
-	fis.sector_count = 0x1;
-
-	memset(&sata_cmd, 0, sizeof(sata_cmd));
-	sata_cmd.tag = cpu_to_le32(ccb->ccb_tag);
-	sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
-	sata_cmd.ncqtag_atap_dir_m = cpu_to_le32((0x1 << 7) | (0x5 << 9));
-	memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis));
-
-	res = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd,
-				   sizeof(sata_cmd), 0);
-	if (res) {
-		sas_free_task(task);
-		pm8001_ccb_free(pm8001_ha, ccb);
-		kfree(dev);
-	}
-}
-
 /**
  * mpi_ssp_completion- process the event that FW response to the SSP request.
  * @pm8001_ha: our hba card information
@@ -2301,8 +2204,7 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		return;
 	}
 
-	if ((pm8001_dev && !(pm8001_dev->id & NCQ_READ_LOG_FLAG))
-		&& unlikely(!t || !t->lldd_task || !t->dev)) {
+	if (pm8001_dev && unlikely(!t || !t->lldd_task || !t->dev)) {
 		pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n");
 		return;
 	}
@@ -2360,15 +2262,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha, void *piomb)
 		if (param == 0) {
 			ts->resp = SAS_TASK_COMPLETE;
 			ts->stat = SAS_SAM_STAT_GOOD;
-			/* check if response is for SEND READ LOG */
-			if (pm8001_dev &&
-			    (pm8001_dev->id & NCQ_READ_LOG_FLAG)) {
-				pm8001_send_abort_all(pm8001_ha, pm8001_dev);
-				/* Free the tag */
-				pm8001_tag_free(pm8001_ha, tag);
-				sas_free_task(t);
-				return;
-			}
 		} else {
 			u8 len;
 			ts->resp = SAS_TASK_COMPLETE;
@@ -2666,9 +2559,10 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	if (event == IO_XFER_ERROR_ABORTED_NCQ_MODE) {
 		/* find device using device id */
 		pm8001_dev = pm8001_find_dev(pm8001_ha, dev_id);
-		/* send read log extension */
 		if (pm8001_dev)
-			pm8001_send_read_log(pm8001_ha, pm8001_dev);
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_XFER_ERROR_ABORTED_NCQ_MODE);
 		return;
 	}
 
@@ -3649,12 +3543,7 @@ int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	pm8001_ccb_task_free(pm8001_ha, ccb);
 	mb();
 
-	if (pm8001_dev->id & NCQ_ABORT_ALL_FLAG) {
-		sas_free_task(t);
-		pm8001_dev->id &= ~NCQ_ABORT_ALL_FLAG;
-	} else {
-		t->task_done(t);
-	}
+	t->task_done(t);
 
 	return 0;
 }
@@ -4206,7 +4095,6 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
 	u64 phys_addr;
 	u32 ATAP = 0x0;
 	u32 dir;
-	unsigned long flags;
 	u32  opc = OPC_INB_SATA_HOST_OPSTART;
 
 	memset(&sata_cmd, 0, sizeof(sata_cmd));
@@ -4261,39 +4149,6 @@ static int pm8001_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
 		sata_cmd.esgl = 0;
 	}
 
-	/* Check for read log for failed drive and return */
-	if (sata_cmd.sata_fis.command == 0x2f) {
-		if (((pm8001_ha_dev->id & NCQ_READ_LOG_FLAG) ||
-			(pm8001_ha_dev->id & NCQ_ABORT_ALL_FLAG) ||
-			(pm8001_ha_dev->id & NCQ_2ND_RLE_FLAG))) {
-			struct task_status_struct *ts;
-
-			pm8001_ha_dev->id &= 0xDFFFFFFF;
-			ts = &task->task_status;
-
-			spin_lock_irqsave(&task->task_state_lock, flags);
-			ts->resp = SAS_TASK_COMPLETE;
-			ts->stat = SAS_SAM_STAT_GOOD;
-			task->task_state_flags &= ~SAS_TASK_STATE_PENDING;
-			task->task_state_flags |= SAS_TASK_STATE_DONE;
-			if (unlikely((task->task_state_flags &
-					SAS_TASK_STATE_ABORTED))) {
-				spin_unlock_irqrestore(&task->task_state_lock,
-							flags);
-				pm8001_dbg(pm8001_ha, FAIL,
-					   "task 0x%p resp 0x%x  stat 0x%x but aborted by upper layer\n",
-					   task, ts->resp,
-					   ts->stat);
-				pm8001_ccb_task_free(pm8001_ha, ccb);
-			} else {
-				spin_unlock_irqrestore(&task->task_state_lock,
-							flags);
-				pm8001_ccb_task_free_done(pm8001_ha, ccb);
-				return 0;
-			}
-		}
-	}
-
 	return pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd,
 				    sizeof(sata_cmd), 0);
 }
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index d5ec29f69be31..2d84ae95a1f94 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -687,12 +687,6 @@ int pm8001_dev_found(struct domain_device *dev)
 	return pm8001_dev_found_notify(dev);
 }
 
-void pm8001_task_done(struct sas_task *task)
-{
-	del_timer(&task->slow_task->timer);
-	complete(&task->slow_task->completion);
-}
-
 #define PM8001_TASK_TIMEOUT 20
 
 /**
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index b08f52673889c..16a753d5e8a7d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -579,10 +579,6 @@ struct pm8001_fw_image_header {
 #define FLASH_UPDATE_DNLD_NOT_SUPPORTED		0x10
 #define FLASH_UPDATE_DISABLED			0x11
 
-#define	NCQ_READ_LOG_FLAG			0x80000000
-#define	NCQ_ABORT_ALL_FLAG			0x40000000
-#define	NCQ_2ND_RLE_FLAG			0x20000000
-
 /* Device states */
 #define DS_OPERATIONAL				0x01
 #define DS_PORT_IN_RESET			0x02
@@ -709,7 +705,6 @@ int pm8001_mpi_fw_flash_update_resp(struct pm8001_hba_info *pm8001_ha,
 int pm8001_mpi_general_event(struct pm8001_hba_info *pm8001_ha, void *piomb);
 int pm8001_mpi_task_abort_resp(struct pm8001_hba_info *pm8001_ha, void *piomb);
 struct sas_task *pm8001_alloc_task(void);
-void pm8001_task_done(struct sas_task *task);
 void pm8001_free_task(struct sas_task *task);
 void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag);
 struct pm8001_device *pm8001_find_dev(struct pm8001_hba_info *pm8001_ha,
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index dd0e06983cd38..4484c498bcb62 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -1778,113 +1778,6 @@ pm80xx_chip_interrupt_disable(struct pm8001_hba_info *pm8001_ha, u8 vec)
 	pm80xx_chip_intx_interrupt_disable(pm8001_ha);
 }
 
-static void pm80xx_send_abort_all(struct pm8001_hba_info *pm8001_ha,
-		struct pm8001_device *pm8001_ha_dev)
-{
-	struct pm8001_ccb_info *ccb;
-	struct sas_task *task;
-	struct task_abort_req task_abort;
-	u32 opc = OPC_INB_SATA_ABORT;
-	int ret;
-
-	pm8001_ha_dev->id |= NCQ_ABORT_ALL_FLAG;
-	pm8001_ha_dev->id &= ~NCQ_READ_LOG_FLAG;
-
-	task = sas_alloc_slow_task(GFP_ATOMIC);
-	if (!task) {
-		pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task\n");
-		return;
-	}
-	task->task_done = pm8001_task_done;
-
-	ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task);
-	if (!ccb) {
-		sas_free_task(task);
-		return;
-	}
-
-	memset(&task_abort, 0, sizeof(task_abort));
-	task_abort.abort_all = cpu_to_le32(1);
-	task_abort.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
-	task_abort.tag = cpu_to_le32(ccb->ccb_tag);
-
-	ret = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &task_abort,
-				   sizeof(task_abort), 0);
-	pm8001_dbg(pm8001_ha, FAIL, "Executing abort task end\n");
-	if (ret) {
-		sas_free_task(task);
-		pm8001_ccb_free(pm8001_ha, ccb);
-	}
-}
-
-static void pm80xx_send_read_log(struct pm8001_hba_info *pm8001_ha,
-		struct pm8001_device *pm8001_ha_dev)
-{
-	struct sata_start_req sata_cmd;
-	int res;
-	struct pm8001_ccb_info *ccb;
-	struct sas_task *task = NULL;
-	struct host_to_dev_fis fis;
-	struct domain_device *dev;
-	u32 opc = OPC_INB_SATA_HOST_OPSTART;
-
-	task = sas_alloc_slow_task(GFP_ATOMIC);
-	if (!task) {
-		pm8001_dbg(pm8001_ha, FAIL, "cannot allocate task !!!\n");
-		return;
-	}
-	task->task_done = pm8001_task_done;
-
-	/*
-	 * Allocate domain device by ourselves as libsas is not going to
-	 * provide any.
-	 */
-	dev = kzalloc(sizeof(struct domain_device), GFP_ATOMIC);
-	if (!dev) {
-		sas_free_task(task);
-		pm8001_dbg(pm8001_ha, FAIL,
-			   "Domain device cannot be allocated\n");
-		return;
-	}
-
-	task->dev = dev;
-	task->dev->lldd_dev = pm8001_ha_dev;
-
-	ccb = pm8001_ccb_alloc(pm8001_ha, pm8001_ha_dev, task);
-	if (!ccb) {
-		sas_free_task(task);
-		kfree(dev);
-		return;
-	}
-
-	pm8001_ha_dev->id |= NCQ_READ_LOG_FLAG;
-	pm8001_ha_dev->id |= NCQ_2ND_RLE_FLAG;
-
-	memset(&sata_cmd, 0, sizeof(sata_cmd));
-
-	/* construct read log FIS */
-	memset(&fis, 0, sizeof(struct host_to_dev_fis));
-	fis.fis_type = 0x27;
-	fis.flags = 0x80;
-	fis.command = ATA_CMD_READ_LOG_EXT;
-	fis.lbal = 0x10;
-	fis.sector_count = 0x1;
-
-	sata_cmd.tag = cpu_to_le32(ccb->ccb_tag);
-	sata_cmd.device_id = cpu_to_le32(pm8001_ha_dev->device_id);
-	sata_cmd.ncqtag_atap_dir_m_dad = cpu_to_le32(((0x1 << 7) | (0x5 << 9)));
-	memcpy(&sata_cmd.sata_fis, &fis, sizeof(struct host_to_dev_fis));
-
-	res = pm8001_mpi_build_cmd(pm8001_ha, 0, opc, &sata_cmd,
-				   sizeof(sata_cmd), 0);
-	pm8001_dbg(pm8001_ha, FAIL, "Executing read log end\n");
-	if (res) {
-		sas_free_task(task);
-		pm8001_ccb_free(pm8001_ha, ccb);
-		kfree(dev);
-	}
-}
-
 /**
  * mpi_ssp_completion - process the event that FW response to the SSP request.
  * @pm8001_ha: our hba card information
@@ -2402,11 +2295,9 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
 		return;
 	}
 
-	if ((pm8001_dev && !(pm8001_dev->id & NCQ_READ_LOG_FLAG))
-		&& unlikely(!t || !t->lldd_task || !t->dev)) {
-		pm8001_dbg(pm8001_ha, FAIL, "task or dev null\n");
+
+	if (pm8001_dev && unlikely(!t->lldd_task || !t->dev))
 		return;
-	}
 
 	ts = &t->task_status;
 
@@ -2463,15 +2354,6 @@ mpi_sata_completion(struct pm8001_hba_info *pm8001_ha,
 		if (param == 0) {
 			ts->resp = SAS_TASK_COMPLETE;
 			ts->stat = SAS_SAM_STAT_GOOD;
-			/* check if response is for SEND READ LOG */
-			if (pm8001_dev &&
-			    (pm8001_dev->id & NCQ_READ_LOG_FLAG)) {
-				pm80xx_send_abort_all(pm8001_ha, pm8001_dev);
-				/* Free the tag */
-				pm8001_tag_free(pm8001_ha, tag);
-				sas_free_task(t);
-				return;
-			}
 		} else {
 			u8 len;
 			ts->resp = SAS_TASK_COMPLETE;
@@ -2806,9 +2688,11 @@ static void mpi_sata_event(struct pm8001_hba_info *pm8001_ha,
 	if (event == IO_XFER_ERROR_ABORTED_NCQ_MODE) {
 		/* find device using device id */
 		pm8001_dev = pm8001_find_dev(pm8001_ha, dev_id);
-		/* send read log extension */
+		/* send read log extension by aborting the link - libata does what we want */
 		if (pm8001_dev)
-			pm80xx_send_read_log(pm8001_ha, pm8001_dev);
+			pm8001_handle_event(pm8001_ha,
+				pm8001_dev,
+				IO_XFER_ERROR_ABORTED_NCQ_MODE);
 		return;
 	}
 
@@ -4556,7 +4440,6 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
 	u32 end_addr_high, end_addr_low;
 	u32 ATAP = 0x0;
 	u32 dir;
-	unsigned long flags;
 	u32 opc = OPC_INB_SATA_HOST_OPSTART;
 	memset(&sata_cmd, 0, sizeof(sata_cmd));
 
@@ -4735,40 +4618,6 @@ static int pm80xx_chip_sata_req(struct pm8001_hba_info *pm8001_ha,
 				     (task->ata_task.atapi_packet[15] << 24)));
 	}
 
-	/* Check for read log for failed drive and return */
-	if (sata_cmd.sata_fis.command == 0x2f) {
-		if (pm8001_ha_dev && ((pm8001_ha_dev->id & NCQ_READ_LOG_FLAG) ||
-			(pm8001_ha_dev->id & NCQ_ABORT_ALL_FLAG) ||
-			(pm8001_ha_dev->id & NCQ_2ND_RLE_FLAG))) {
-			struct task_status_struct *ts;
-
-			pm8001_ha_dev->id &= 0xDFFFFFFF;
-			ts = &task->task_status;
-
-			spin_lock_irqsave(&task->task_state_lock, flags);
-			ts->resp = SAS_TASK_COMPLETE;
-			ts->stat = SAS_SAM_STAT_GOOD;
-			task->task_state_flags &= ~SAS_TASK_STATE_PENDING;
-			task->task_state_flags |= SAS_TASK_STATE_DONE;
-			if (unlikely((task->task_state_flags &
-					SAS_TASK_STATE_ABORTED))) {
-				spin_unlock_irqrestore(&task->task_state_lock,
-							flags);
-				pm8001_dbg(pm8001_ha, FAIL,
-					   "task 0x%p resp 0x%x  stat 0x%x but aborted by upper layer\n",
-					   task, ts->resp,
-					   ts->stat);
-				pm8001_ccb_task_free(pm8001_ha, ccb);
-				return 0;
-			} else {
-				spin_unlock_irqrestore(&task->task_state_lock,
-							flags);
-				pm8001_ccb_task_free_done(pm8001_ha, ccb);
-				atomic_dec(&pm8001_ha_dev->running_req);
-				return 0;
-			}
-		}
-	}
 	trace_pm80xx_request_issue(pm8001_ha->id,
 				ccb->device ? ccb->device->attached_phy : PM8001_MAX_PHYS,
 				ccb->ccb_tag, opc,
-- 
GitLab


From 8e8d43642f2f9bbed9e7823c6e5b6fd7c7fbc3dc Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 17 Oct 2022 17:20:34 +0800
Subject: [PATCH 0152/4122] scsi: libsas: Make sas_{alloc, alloc_slow,
 free}_task() private

We have no users outside libsas any longer, so make sas_alloc_task(),
sas_alloc_slow_task(), and sas_free_task() private.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-8-git-send-email-john.garry@huawei.com
Tested-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Tested-by: Niklas Cassel <niklas.cassel@wdc.com> # pm80xx
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_init.c     | 3 ---
 drivers/scsi/libsas/sas_internal.h | 4 ++++
 include/scsi/libsas.h              | 4 ----
 3 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/libsas/sas_init.c b/drivers/scsi/libsas/sas_init.c
index e4f77072a58d2..f2c05ebeb72ff 100644
--- a/drivers/scsi/libsas/sas_init.c
+++ b/drivers/scsi/libsas/sas_init.c
@@ -35,7 +35,6 @@ struct sas_task *sas_alloc_task(gfp_t flags)
 
 	return task;
 }
-EXPORT_SYMBOL_GPL(sas_alloc_task);
 
 struct sas_task *sas_alloc_slow_task(gfp_t flags)
 {
@@ -56,7 +55,6 @@ struct sas_task *sas_alloc_slow_task(gfp_t flags)
 
 	return task;
 }
-EXPORT_SYMBOL_GPL(sas_alloc_slow_task);
 
 void sas_free_task(struct sas_task *task)
 {
@@ -65,7 +63,6 @@ void sas_free_task(struct sas_task *task)
 		kmem_cache_free(sas_task_cache, task);
 	}
 }
-EXPORT_SYMBOL_GPL(sas_free_task);
 
 /*------------ SAS addr hash -----------*/
 void sas_hash_addr(u8 *hashed, const u8 *sas_addr)
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 8d0ad3abc7b5c..b54bcf3c9a9d6 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -52,6 +52,10 @@ void sas_unregister_phys(struct sas_ha_struct *sas_ha);
 struct asd_sas_event *sas_alloc_event(struct asd_sas_phy *phy, gfp_t gfp_flags);
 void sas_free_event(struct asd_sas_event *event);
 
+struct sas_task *sas_alloc_task(gfp_t flags);
+struct sas_task *sas_alloc_slow_task(gfp_t flags);
+void sas_free_task(struct sas_task *task);
+
 int  sas_register_ports(struct sas_ha_struct *sas_ha);
 void sas_unregister_ports(struct sas_ha_struct *sas_ha);
 
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index 2dbead74a2afe..f86b56bf78337 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -639,10 +639,6 @@ struct sas_task_slow {
 #define SAS_TASK_STATE_ABORTED      4
 #define SAS_TASK_NEED_DEV_RESET     8
 
-extern struct sas_task *sas_alloc_task(gfp_t flags);
-extern struct sas_task *sas_alloc_slow_task(gfp_t flags);
-extern void sas_free_task(struct sas_task *task);
-
 static inline bool sas_is_internal_abort(struct sas_task *task)
 {
 	return task->task_proto == SAS_PROTOCOL_INTERNAL_ABORT;
-- 
GitLab


From cc22efbec0110181725b1f5f6778155a2e352522 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Mon, 17 Oct 2022 17:20:35 +0800
Subject: [PATCH 0153/4122] scsi: libsas: Update SATA dev FIS in
 sas_ata_task_done()

In sas_ata_task_done(), for commands which complete with error we set the
SATA dev FIS status field with ATA_ERR. In ata_eh_analyze_tf() this would
be interpreted as a HSM error. Set ATA_DRDY, which will lead libata to
judge as a device error, which is a safer bet.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1665998435-199946-9-git-send-email-john.garry@huawei.com
Reviewed-by: Niklas Cassel <niklas.cassel@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_ata.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 61f64d54e67da..78e6046fb55ad 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -139,8 +139,8 @@ static void sas_ata_task_done(struct sas_task *task)
 				qc->flags |= ATA_QCFLAG_FAILED;
 			}
 
-			dev->sata_dev.fis[3] = 0x04; /* status err */
-			dev->sata_dev.fis[2] = ATA_ERR;
+			dev->sata_dev.fis[2] = ATA_ERR | ATA_DRDY; /* tf status */
+			dev->sata_dev.fis[3] = ATA_ABORTED; /* tf error */
 		}
 	}
 
-- 
GitLab


From b6da92356cd6106dd9e7e8e168e3b7df4fe37d5d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:11 -0700
Subject: [PATCH 0154/4122] scsi: esas2r: Initialize two host template members
 implicitly

Prepare for removing the 'proc_dir' and 'present' members from the SCSI
host template by implicitly initializing 'present' and 'emulated' in
'driver_template'.

Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Bradley Grove <linuxdrivers@attotech.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-2-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/esas2r/esas2r_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 7a4eadad23d70..27f6e7ccded8a 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -248,8 +248,6 @@ static struct scsi_host_template driver_template = {
 	.sg_tablesize			= SG_CHUNK_SIZE,
 	.cmd_per_lun			=
 		ESAS2R_DEFAULT_CMD_PER_LUN,
-	.present			= 0,
-	.emulated			= 0,
 	.proc_name			= ESAS2R_DRVR_NAME,
 	.change_queue_depth		= scsi_change_queue_depth,
 	.max_sectors			= 0xFFFF,
-- 
GitLab


From 77916da7e4a0975bd2b93e5214295e3318886cdb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:12 -0700
Subject: [PATCH 0155/4122] scsi: esas2r: Introduce scsi_template_proc_dir()

Prepare for removing the 'proc_dir' and 'present' members from the SCSI
host template. This commit does not change any functionality.

Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Bradley Grove <linuxdrivers@attotech.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-3-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/esas2r/esas2r_main.c | 17 +++++++++++------
 drivers/scsi/scsi_proc.c          | 11 +++++++++++
 include/scsi/scsi_host.h          |  6 ++++++
 3 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 27f6e7ccded8a..d7a2c49ff5eeb 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -635,10 +635,13 @@ static void __exit esas2r_exit(void)
 	esas2r_log(ESAS2R_LOG_INFO, "%s called", __func__);
 
 	if (esas2r_proc_major > 0) {
+		struct proc_dir_entry *proc_dir;
+
 		esas2r_log(ESAS2R_LOG_INFO, "unregister proc");
 
-		remove_proc_entry(ATTONODE_NAME,
-				  esas2r_proc_host->hostt->proc_dir);
+		proc_dir = scsi_template_proc_dir(esas2r_proc_host->hostt);
+		if (proc_dir)
+			remove_proc_entry(ATTONODE_NAME, proc_dir);
 		unregister_chrdev(esas2r_proc_major, ESAS2R_DRVR_NAME);
 
 		esas2r_proc_major = 0;
@@ -728,11 +731,13 @@ const char *esas2r_info(struct Scsi_Host *sh)
 			       esas2r_proc_major);
 
 		if (esas2r_proc_major > 0) {
-			struct proc_dir_entry *pde;
+			struct proc_dir_entry *proc_dir;
+			struct proc_dir_entry *pde = NULL;
 
-			pde = proc_create(ATTONODE_NAME, 0,
-					  sh->hostt->proc_dir,
-					  &esas2r_proc_ops);
+			proc_dir = scsi_template_proc_dir(sh->hostt);
+			if (proc_dir)
+				pde = proc_create(ATTONODE_NAME, 0, proc_dir,
+						  &esas2r_proc_ops);
 
 			if (!pde) {
 				esas2r_log_dev(ESAS2R_LOG_WARN,
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 95aee1ad13834..456b430972887 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -83,6 +83,17 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file)
 				4 * PAGE_SIZE);
 }
 
+/**
+ * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template
+ * @sht: SCSI host template pointer.
+ */
+struct proc_dir_entry *
+scsi_template_proc_dir(const struct scsi_host_template *sht)
+{
+	return sht->proc_dir;
+}
+EXPORT_SYMBOL_GPL(scsi_template_proc_dir);
+
 static const struct proc_ops proc_scsi_ops = {
 	.proc_open	= proc_scsi_host_open,
 	.proc_release	= single_release,
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index fcf25f1642a3a..3854ffcb0b3ef 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -751,6 +751,12 @@ extern struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *, int);
 extern int __must_check scsi_add_host_with_dma(struct Scsi_Host *,
 					       struct device *,
 					       struct device *);
+#if defined(CONFIG_SCSI_PROC_FS)
+struct proc_dir_entry *
+scsi_template_proc_dir(const struct scsi_host_template *sht);
+#else
+#define scsi_template_proc_dir(sht) NULL
+#endif
 extern void scsi_scan_host(struct Scsi_Host *);
 extern void scsi_rescan_device(struct device *);
 extern void scsi_remove_host(struct Scsi_Host *);
-- 
GitLab


From ecca3f9b16366e601a6748bf31e9fe227812248f Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:13 -0700
Subject: [PATCH 0156/4122] scsi: core: Fail host creation if creating the proc
 directory fails

Users expect that the contents of /proc/scsi is in sync with the contents
of /sys/class/scsi_host. Hence fail host creation if creating the proc
directory fails.

Suggested-by: John Garry <john.garry@huawei.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-4-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hosts.c     |  3 ++-
 drivers/scsi/scsi_priv.h |  4 ++--
 drivers/scsi/scsi_proc.c | 13 +++++++++----
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 9857dba09c951..12346e2297fdb 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -519,7 +519,8 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 			     "failed to create tmf workq\n");
 		goto fail;
 	}
-	scsi_proc_hostdir_add(shost->hostt);
+	if (scsi_proc_hostdir_add(shost->hostt) < 0)
+		goto fail;
 	return shost;
  fail:
 	/*
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index c52de9a973e46..494f48e03e90c 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -111,14 +111,14 @@ extern void scsi_evt_thread(struct work_struct *work);
 
 /* scsi_proc.c */
 #ifdef CONFIG_SCSI_PROC_FS
-extern void scsi_proc_hostdir_add(struct scsi_host_template *);
+extern int scsi_proc_hostdir_add(struct scsi_host_template *);
 extern void scsi_proc_hostdir_rm(struct scsi_host_template *);
 extern void scsi_proc_host_add(struct Scsi_Host *);
 extern void scsi_proc_host_rm(struct Scsi_Host *);
 extern int scsi_init_procfs(void);
 extern void scsi_exit_procfs(void);
 #else
-# define scsi_proc_hostdir_add(sht)	do { } while (0)
+# define scsi_proc_hostdir_add(sht)	0
 # define scsi_proc_hostdir_rm(sht)	do { } while (0)
 # define scsi_proc_host_add(shost)	do { } while (0)
 # define scsi_proc_host_rm(shost)	do { } while (0)
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 456b430972887..1b09cea2a7523 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -108,20 +108,25 @@ static const struct proc_ops proc_scsi_ops = {
  *
  * Sets sht->proc_dir to the new directory.
  */
-
-void scsi_proc_hostdir_add(struct scsi_host_template *sht)
+int scsi_proc_hostdir_add(struct scsi_host_template *sht)
 {
+	int ret = 0;
+
 	if (!sht->show_info)
-		return;
+		return 0;
 
 	mutex_lock(&global_host_template_mutex);
 	if (!sht->present++) {
 		sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
-        	if (!sht->proc_dir)
+		if (!sht->proc_dir) {
 			printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
 			       __func__, sht->proc_name);
+			ret = -ENOMEM;
+		}
 	}
 	mutex_unlock(&global_host_template_mutex);
+
+	return ret;
 }
 
 /**
-- 
GitLab


From 036abd6140078b4125f60e731f28e15de708f87d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:14 -0700
Subject: [PATCH 0157/4122] scsi: core: Introduce a new list for SCSI proc
 directory entries

Instead of using scsi_host_template members to track the SCSI proc
directory entries, track these entries in a list. This changes the time
needed for looking up the proc dir pointer from O(1) into O(n). This is
considered acceptable since the number of SCSI host adapter types per host
is usually small (less than ten).

This change has been tested by attaching two USB storage devices to a qemu
host:

$ grep -aH . /proc/scsi/usb-storage/*
/proc/scsi/usb-storage/7:   Host scsi7: usb-storage
/proc/scsi/usb-storage/7:       Vendor: QEMU
/proc/scsi/usb-storage/7:      Product: QEMU USB HARDDRIVE
/proc/scsi/usb-storage/7:Serial Number: 1-0000:00:02.1:00.0-6
/proc/scsi/usb-storage/7:     Protocol: Transparent SCSI
/proc/scsi/usb-storage/7:    Transport: Bulk
/proc/scsi/usb-storage/7:       Quirks: SANE_SENSE
/proc/scsi/usb-storage/8:   Host scsi8: usb-storage
/proc/scsi/usb-storage/8:       Vendor: QEMU
/proc/scsi/usb-storage/8:      Product: QEMU USB HARDDRIVE
/proc/scsi/usb-storage/8:Serial Number: 1-0000:00:02.1:00.0-7
/proc/scsi/usb-storage/8:     Protocol: Transparent SCSI
/proc/scsi/usb-storage/8:    Transport: Bulk
/proc/scsi/usb-storage/8:       Quirks: SANE_SENSE

This commit prepares for constifying most SCSI host templates.

Reviewed-by: John Garry <john.garry@huawei.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-5-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_priv.h |   4 +-
 drivers/scsi/scsi_proc.c | 121 ++++++++++++++++++++++++++++++++-------
 include/scsi/scsi_host.h |  12 ----
 3 files changed, 102 insertions(+), 35 deletions(-)

diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 494f48e03e90c..96284a0e13fea 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -111,8 +111,8 @@ extern void scsi_evt_thread(struct work_struct *work);
 
 /* scsi_proc.c */
 #ifdef CONFIG_SCSI_PROC_FS
-extern int scsi_proc_hostdir_add(struct scsi_host_template *);
-extern void scsi_proc_hostdir_rm(struct scsi_host_template *);
+extern int scsi_proc_hostdir_add(const struct scsi_host_template *);
+extern void scsi_proc_hostdir_rm(const struct scsi_host_template *);
 extern void scsi_proc_host_add(struct Scsi_Host *);
 extern void scsi_proc_host_rm(struct Scsi_Host *);
 extern int scsi_init_procfs(void);
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 1b09cea2a7523..4a6eb1741be0d 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -43,8 +43,23 @@
 
 static struct proc_dir_entry *proc_scsi;
 
-/* Protect sht->present and sht->proc_dir */
+/* Protects scsi_proc_list */
 static DEFINE_MUTEX(global_host_template_mutex);
+static LIST_HEAD(scsi_proc_list);
+
+/**
+ * struct scsi_proc_entry - (host template, SCSI proc dir) association
+ * @entry: entry in scsi_proc_list.
+ * @sht: SCSI host template associated with the procfs directory.
+ * @proc_dir: procfs directory associated with the SCSI host template.
+ * @present: Number of SCSI hosts instantiated for @sht.
+ */
+struct scsi_proc_entry {
+	struct list_head	entry;
+	const struct scsi_host_template *sht;
+	struct proc_dir_entry	*proc_dir;
+	unsigned int		present;
+};
 
 static ssize_t proc_scsi_host_write(struct file *file, const char __user *buf,
                            size_t count, loff_t *ppos)
@@ -83,6 +98,32 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file)
 				4 * PAGE_SIZE);
 }
 
+static struct scsi_proc_entry *
+__scsi_lookup_proc_entry(const struct scsi_host_template *sht)
+{
+	struct scsi_proc_entry *e;
+
+	lockdep_assert_held(&global_host_template_mutex);
+
+	list_for_each_entry(e, &scsi_proc_list, entry)
+		if (e->sht == sht)
+			return e;
+
+	return NULL;
+}
+
+static struct scsi_proc_entry *
+scsi_lookup_proc_entry(const struct scsi_host_template *sht)
+{
+	struct scsi_proc_entry *e;
+
+	mutex_lock(&global_host_template_mutex);
+	e = __scsi_lookup_proc_entry(sht);
+	mutex_unlock(&global_host_template_mutex);
+
+	return e;
+}
+
 /**
  * scsi_template_proc_dir() - returns the procfs dir for a SCSI host template
  * @sht: SCSI host template pointer.
@@ -90,7 +131,9 @@ static int proc_scsi_host_open(struct inode *inode, struct file *file)
 struct proc_dir_entry *
 scsi_template_proc_dir(const struct scsi_host_template *sht)
 {
-	return sht->proc_dir;
+	struct scsi_proc_entry *e = scsi_lookup_proc_entry(sht);
+
+	return e ? e->proc_dir : NULL;
 }
 EXPORT_SYMBOL_GPL(scsi_template_proc_dir);
 
@@ -108,24 +151,41 @@ static const struct proc_ops proc_scsi_ops = {
  *
  * Sets sht->proc_dir to the new directory.
  */
-int scsi_proc_hostdir_add(struct scsi_host_template *sht)
+int scsi_proc_hostdir_add(const struct scsi_host_template *sht)
 {
-	int ret = 0;
+	struct scsi_proc_entry *e;
+	int ret;
 
 	if (!sht->show_info)
 		return 0;
 
 	mutex_lock(&global_host_template_mutex);
-	if (!sht->present++) {
-		sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
-		if (!sht->proc_dir) {
-			printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
-			       __func__, sht->proc_name);
+	e = __scsi_lookup_proc_entry(sht);
+	if (!e) {
+		e = kzalloc(sizeof(*e), GFP_KERNEL);
+		if (!e) {
 			ret = -ENOMEM;
+			goto unlock;
 		}
 	}
+	if (e->present++)
+		goto success;
+	e->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
+	if (!e->proc_dir) {
+		printk(KERN_ERR "%s: proc_mkdir failed for %s\n", __func__,
+		       sht->proc_name);
+		ret = -ENOMEM;
+		goto unlock;
+	}
+	e->sht = sht;
+	list_add_tail(&e->entry, &scsi_proc_list);
+success:
+	e = NULL;
+	ret = 0;
+unlock:
 	mutex_unlock(&global_host_template_mutex);
 
+	kfree(e);
 	return ret;
 }
 
@@ -133,15 +193,19 @@ int scsi_proc_hostdir_add(struct scsi_host_template *sht)
  * scsi_proc_hostdir_rm - remove directory in /proc for a scsi host
  * @sht: owner of directory
  */
-void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
+void scsi_proc_hostdir_rm(const struct scsi_host_template *sht)
 {
+	struct scsi_proc_entry *e;
+
 	if (!sht->show_info)
 		return;
 
 	mutex_lock(&global_host_template_mutex);
-	if (!--sht->present && sht->proc_dir) {
+	e = __scsi_lookup_proc_entry(sht);
+	if (e && !--e->present) {
 		remove_proc_entry(sht->proc_name, proc_scsi);
-		sht->proc_dir = NULL;
+		list_del(&e->entry);
+		kfree(e);
 	}
 	mutex_unlock(&global_host_template_mutex);
 }
@@ -153,20 +217,29 @@ void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
  */
 void scsi_proc_host_add(struct Scsi_Host *shost)
 {
-	struct scsi_host_template *sht = shost->hostt;
+	const struct scsi_host_template *sht = shost->hostt;
+	struct scsi_proc_entry *e;
 	struct proc_dir_entry *p;
 	char name[10];
 
-	if (!sht->proc_dir)
+	if (!sht->show_info)
 		return;
 
+	e = scsi_lookup_proc_entry(sht);
+	if (!e)
+		goto err;
+
 	sprintf(name,"%d", shost->host_no);
-	p = proc_create_data(name, S_IRUGO | S_IWUSR,
-		sht->proc_dir, &proc_scsi_ops, shost);
+	p = proc_create_data(name, S_IRUGO | S_IWUSR, e->proc_dir,
+			     &proc_scsi_ops, shost);
 	if (!p)
-		printk(KERN_ERR "%s: Failed to register host %d in"
-		       "%s\n", __func__, shost->host_no,
-		       sht->proc_name);
+		goto err;
+	return;
+
+err:
+	shost_printk(KERN_ERR, shost,
+		     "%s: Failed to register host (%s failed)\n", __func__,
+		     e ? "proc_create_data()" : "scsi_proc_hostdir_add()");
 }
 
 /**
@@ -175,13 +248,19 @@ void scsi_proc_host_add(struct Scsi_Host *shost)
  */
 void scsi_proc_host_rm(struct Scsi_Host *shost)
 {
+	const struct scsi_host_template *sht = shost->hostt;
+	struct scsi_proc_entry *e;
 	char name[10];
 
-	if (!shost->hostt->proc_dir)
+	if (!sht->show_info)
+		return;
+
+	e = scsi_lookup_proc_entry(sht);
+	if (!e)
 		return;
 
 	sprintf(name,"%d", shost->host_no);
-	remove_proc_entry(name, shost->hostt->proc_dir);
+	remove_proc_entry(name, e->proc_dir);
 }
 /**
  * proc_print_scsidevice - return data about this host
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 3854ffcb0b3ef..e71436183c0d5 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -357,12 +357,6 @@ struct scsi_host_template {
 	 */
 	const char *proc_name;
 
-	/*
-	 * Used to store the procfs directory if a driver implements the
-	 * show_info method.
-	 */
-	struct proc_dir_entry *proc_dir;
-
 	/*
 	 * This determines if we will use a non-interrupt driven
 	 * or an interrupt driven scheme.  It is set to the maximum number
@@ -423,12 +417,6 @@ struct scsi_host_template {
 	 */
 	short cmd_per_lun;
 
-	/*
-	 * present contains counter indicating how many boards of this
-	 * type were found when we did the scan.
-	 */
-	unsigned char present;
-
 	/* If use block layer to manage tags, this is tag allocation policy */
 	int tag_alloc_policy;
 
-- 
GitLab


From d460f624059266c2e7f0280bdd3ae806d4b75211 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:15 -0700
Subject: [PATCH 0158/4122] scsi: core: Rework scsi_single_lun_run()

Use __starget_for_each_device() instead of open-coding
starget_for_each_device(). Run the queues asynchronously instead of
synchronously.

This commit removes code that calls scsi_device_put() from atomic context.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: John Garry <john.garry@huawei.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-6-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 32 +++++++++++++++-----------------
 1 file changed, 15 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 8b89fab7c4206..fa96d3cfdfa30 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -307,6 +307,18 @@ static void scsi_kick_queue(struct request_queue *q)
 	blk_mq_run_hw_queues(q, false);
 }
 
+/*
+ * Kick the queue of SCSI device @sdev if @sdev != current_sdev. Called with
+ * interrupts disabled.
+ */
+static void scsi_kick_sdev_queue(struct scsi_device *sdev, void *data)
+{
+	struct scsi_device *current_sdev = data;
+
+	if (sdev != current_sdev)
+		blk_mq_run_hw_queues(sdev->request_queue, true);
+}
+
 /*
  * Called for single_lun devices on IO completion. Clear starget_sdev_user,
  * and call blk_run_queue for all the scsi_devices on the target -
@@ -317,7 +329,6 @@ static void scsi_kick_queue(struct request_queue *q)
 static void scsi_single_lun_run(struct scsi_device *current_sdev)
 {
 	struct Scsi_Host *shost = current_sdev->host;
-	struct scsi_device *sdev, *tmp;
 	struct scsi_target *starget = scsi_target(current_sdev);
 	unsigned long flags;
 
@@ -334,22 +345,9 @@ static void scsi_single_lun_run(struct scsi_device *current_sdev)
 	scsi_kick_queue(current_sdev->request_queue);
 
 	spin_lock_irqsave(shost->host_lock, flags);
-	if (starget->starget_sdev_user)
-		goto out;
-	list_for_each_entry_safe(sdev, tmp, &starget->devices,
-			same_target_siblings) {
-		if (sdev == current_sdev)
-			continue;
-		if (scsi_device_get(sdev))
-			continue;
-
-		spin_unlock_irqrestore(shost->host_lock, flags);
-		scsi_kick_queue(sdev->request_queue);
-		spin_lock_irqsave(shost->host_lock, flags);
-
-		scsi_device_put(sdev);
-	}
- out:
+	if (!starget->starget_sdev_user)
+		__starget_for_each_device(starget, current_sdev,
+					  scsi_kick_sdev_queue);
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
-- 
GitLab


From 6d1aa3b0589bdd17a46ed74fbd2c2d0fc59038ff Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:16 -0700
Subject: [PATCH 0159/4122] scsi: ufs: Simplify ufshcd_set_dev_pwr_mode()

Simplify the code for incrementing the SCSI device reference count in
ufshcd_set_dev_pwr_mode(). This commit removes one scsi_device_put() call
that happens from atomic context.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-7-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 7256e6c43ca68..c8f0fe7400051 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8752,15 +8752,10 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	sdp = hba->ufs_device_wlun;
-	if (sdp) {
+	if (sdp && scsi_device_online(sdp))
 		ret = scsi_device_get(sdp);
-		if (!ret && !scsi_device_online(sdp)) {
-			ret = -ENODEV;
-			scsi_device_put(sdp);
-		}
-	} else {
+	else
 		ret = -ENODEV;
-	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	if (ret)
-- 
GitLab


From 195fae206ef20a29b09f281b6db8ea30fafaa908 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:17 -0700
Subject: [PATCH 0160/4122] scsi: core: Remove the put_device() call from
 scsi_device_get()

scsi_device_get() may be called from atomic context, e.g. by
shost_for_each_device(). A later commit will allow put_device() to sleep
for SCSI devices. Hence remove the put_device() call from
scsi_device_get().

According to Rusty Russell's "Module Refcount and Stuff mini-FAQ", calling
module_put() from atomic context is allowed since considerable time. See
also https://lkml.org/lkml/2002/11/18/330.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-8-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index c59eac7a32f2a..9feb0323bc44a 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -563,14 +563,14 @@ int scsi_device_get(struct scsi_device *sdev)
 {
 	if (sdev->sdev_state == SDEV_DEL || sdev->sdev_state == SDEV_CANCEL)
 		goto fail;
-	if (!get_device(&sdev->sdev_gendev))
-		goto fail;
 	if (!try_module_get(sdev->host->hostt->module))
-		goto fail_put_device;
+		goto fail;
+	if (!get_device(&sdev->sdev_gendev))
+		goto fail_put_module;
 	return 0;
 
-fail_put_device:
-	put_device(&sdev->sdev_gendev);
+fail_put_module:
+	module_put(sdev->host->hostt->module);
 fail:
 	return -ENXIO;
 }
-- 
GitLab


From f93ed747e2c7e6bfbf309291879b33b0d0231a7d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 14 Oct 2022 17:24:18 -0700
Subject: [PATCH 0161/4122] scsi: core: Release SCSI devices synchronously

All upstream scsi_device_put() calls happen from thread context. Hence
simplify scsi_device_put() by always calling the release function
synchronously. This commit prepares for constifying the SCSI host template
by removing an assignment that clears the module pointer in the SCSI host
template.

scsi_device_dev_release_usercontext() was introduced in 2006 via
commit 65110b216895 ("[SCSI] fix wrong context bugs in SCSI").

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: John Garry <john.garry@huawei.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221015002418.30955-9-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi.c        |  2 ++
 drivers/scsi/scsi_sysfs.c  | 22 ++--------------------
 include/scsi/scsi_device.h |  1 -
 3 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 9feb0323bc44a..1426b9b03612f 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -588,6 +588,8 @@ void scsi_device_put(struct scsi_device *sdev)
 {
 	struct module *mod = sdev->host->hostt->module;
 
+	might_sleep();
+
 	put_device(&sdev->sdev_gendev);
 	module_put(mod);
 }
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index c95177ca6ed26..f2a345cc0f8af 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -441,20 +441,15 @@ static void scsi_device_cls_release(struct device *class_dev)
 	put_device(&sdev->sdev_gendev);
 }
 
-static void scsi_device_dev_release_usercontext(struct work_struct *work)
+static void scsi_device_dev_release(struct device *dev)
 {
-	struct scsi_device *sdev;
+	struct scsi_device *sdev = to_scsi_device(dev);
 	struct device *parent;
 	struct list_head *this, *tmp;
 	struct scsi_vpd *vpd_pg80 = NULL, *vpd_pg83 = NULL;
 	struct scsi_vpd *vpd_pg0 = NULL, *vpd_pg89 = NULL;
 	struct scsi_vpd *vpd_pgb0 = NULL, *vpd_pgb1 = NULL, *vpd_pgb2 = NULL;
 	unsigned long flags;
-	struct module *mod;
-
-	sdev = container_of(work, struct scsi_device, ew.work);
-
-	mod = sdev->host->hostt->module;
 
 	scsi_dh_release_device(sdev);
 
@@ -518,19 +513,6 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 
 	if (parent)
 		put_device(parent);
-	module_put(mod);
-}
-
-static void scsi_device_dev_release(struct device *dev)
-{
-	struct scsi_device *sdp = to_scsi_device(dev);
-
-	/* Set module pointer as NULL in case of module unloading */
-	if (!try_module_get(sdp->host->hostt->module))
-		sdp->host->hostt->module = NULL;
-
-	execute_in_process_context(scsi_device_dev_release_usercontext,
-				   &sdp->ew);
 }
 
 static struct class sdev_class = {
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index c36656d8ac6c7..24bdbf7999ab4 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -236,7 +236,6 @@ struct scsi_device {
 	struct device		sdev_gendev,
 				sdev_dev;
 
-	struct execute_work	ew; /* used to get process context on put */
 	struct work_struct	requeue_work;
 
 	struct scsi_device_handler *handler;
-- 
GitLab


From 773792e4e704ca1c47e3d9bc6ed5be2a00a22ad5 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:23 +0800
Subject: [PATCH 0162/4122] scsi: libsas: Introduce SAS address comparison
 helpers

SAS address comparison is widely used in libsas. However they are all
opencoded and to avoid the line spill over 80 columns, are mostly split
into multi-lines. Introduce some helpers to prepare for some refactoring.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-2-yanaijie@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_internal.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index b54bcf3c9a9d6..6cf190ade35e2 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -115,6 +115,23 @@ static inline void sas_smp_host_handler(struct bsg_job *job,
 }
 #endif
 
+static inline bool sas_phy_match_dev_addr(struct domain_device *dev,
+					 struct ex_phy *phy)
+{
+	return SAS_ADDR(dev->sas_addr) == SAS_ADDR(phy->attached_sas_addr);
+}
+
+static inline bool sas_phy_match_port_addr(struct asd_sas_port *port,
+					   struct ex_phy *phy)
+{
+	return SAS_ADDR(port->sas_addr) == SAS_ADDR(phy->attached_sas_addr);
+}
+
+static inline bool sas_phy_addr_match(struct ex_phy *p1, struct ex_phy *p2)
+{
+	return  SAS_ADDR(p1->attached_sas_addr) == SAS_ADDR(p2->attached_sas_addr);
+}
+
 static inline void sas_fail_probe(struct domain_device *dev, const char *func, int err)
 {
 	pr_warn("%s: for %s device %016llx returned %d\n",
-- 
GitLab


From 2d08f329a4f2eace6b041d60132f441fc8e0b616 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:24 +0800
Subject: [PATCH 0163/4122] scsi: libsas: Introduce sas_find_attached_phy_id()
 helper

LLDDs are all implementing their own attached phy ID finding code.  Factor
it out to libsas.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-3-yanaijie@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: John Garry <john.garry@huawei.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_expander.c | 16 ++++++++++++++++
 include/scsi/libsas.h              |  2 ++
 2 files changed, 18 insertions(+)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 5ce2518301040..7ffb429463353 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2107,6 +2107,22 @@ int sas_ex_revalidate_domain(struct domain_device *port_dev)
 	return res;
 }
 
+int sas_find_attached_phy_id(struct expander_device *ex_dev,
+			     struct domain_device *dev)
+{
+	struct ex_phy *phy;
+	int phy_id;
+
+	for (phy_id = 0; phy_id < ex_dev->num_phys; phy_id++) {
+		phy = &ex_dev->ex_phy[phy_id];
+		if (sas_phy_match_dev_addr(dev, phy))
+			return phy_id;
+	}
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(sas_find_attached_phy_id);
+
 void sas_smp_handler(struct bsg_job *job, struct Scsi_Host *shost,
 		struct sas_rphy *rphy)
 {
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index f86b56bf78337..ec6c9ecd8d12a 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -746,6 +746,8 @@ int sas_clear_task_set(struct domain_device *dev, u8 *lun);
 int sas_lu_reset(struct domain_device *dev, u8 *lun);
 int sas_query_task(struct sas_task *task, u16 tag);
 int sas_abort_task(struct sas_task *task, u16 tag);
+int sas_find_attached_phy_id(struct expander_device *ex_dev,
+			     struct domain_device *dev);
 
 void sas_notify_port_event(struct asd_sas_phy *phy, enum port_event event,
 			   gfp_t gfp_flags);
-- 
GitLab


From ec64858657a8c393e2ae956d37c23bf94aee8200 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:25 +0800
Subject: [PATCH 0164/4122] scsi: pm8001: Use sas_find_attached_phy_id()
 instead of open coding it

The attached phy id finding is open coded. Replace it with
sas_find_attached_phy_id(). To keep things consistent, the return value of
pm8001_dev_found_notify() is also changed to -ENODEV after calling
sas_find_attathed_phy_id() failed.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-4-yanaijie@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_sas.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 2d84ae95a1f94..51230b8271492 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -645,22 +645,16 @@ static int pm8001_dev_found_notify(struct domain_device *dev)
 	pm8001_device->dcompletion = &completion;
 	if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
 		int phy_id;
-		struct ex_phy *phy;
-		for (phy_id = 0; phy_id < parent_dev->ex_dev.num_phys;
-		phy_id++) {
-			phy = &parent_dev->ex_dev.ex_phy[phy_id];
-			if (SAS_ADDR(phy->attached_sas_addr)
-				== SAS_ADDR(dev->sas_addr)) {
-				pm8001_device->attached_phy = phy_id;
-				break;
-			}
-		}
-		if (phy_id == parent_dev->ex_dev.num_phys) {
+
+		phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev);
+		if (phy_id < 0) {
 			pm8001_dbg(pm8001_ha, FAIL,
 				   "Error: no attached dev:%016llx at ex:%016llx.\n",
 				   SAS_ADDR(dev->sas_addr),
 				   SAS_ADDR(parent_dev->sas_addr));
-			res = -1;
+			res = phy_id;
+		} else {
+			pm8001_device->attached_phy = phy_id;
 		}
 	} else {
 		if (dev->dev_type == SAS_SATA_DEV) {
-- 
GitLab


From 178c39d94ac2cf9524ff797d90dcdf96b110fb27 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:26 +0800
Subject: [PATCH 0165/4122] scsi: mvsas: Use sas_find_attached_phy_id() instead
 of open coding it

The attached phy finding is open coded. Replace it with
sas_find_attached_phy_id(). To keep things consistent, the return value of
mvs_dev_found_notify() is also changed to -ENODEV after calling
sas_find_attathed_phy_id() failed.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-5-yanaijie@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mvsas/mv_sas.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index a6867dae0e7c2..bf7d4995b2574 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -1190,23 +1190,16 @@ static int mvs_dev_found_notify(struct domain_device *dev, int lock)
 	mvi_device->sas_device = dev;
 	if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
 		int phy_id;
-		u8 phy_num = parent_dev->ex_dev.num_phys;
-		struct ex_phy *phy;
-		for (phy_id = 0; phy_id < phy_num; phy_id++) {
-			phy = &parent_dev->ex_dev.ex_phy[phy_id];
-			if (SAS_ADDR(phy->attached_sas_addr) ==
-				SAS_ADDR(dev->sas_addr)) {
-				mvi_device->attached_phy = phy_id;
-				break;
-			}
-		}
 
-		if (phy_id == phy_num) {
+		phy_id = sas_find_attached_phy_id(&parent_dev->ex_dev, dev);
+		if (phy_id < 0) {
 			mv_printk("Error: no attached dev:%016llx"
 				"at ex:%016llx.\n",
 				SAS_ADDR(dev->sas_addr),
 				SAS_ADDR(parent_dev->sas_addr));
-			res = -1;
+			res = phy_id;
+		} else {
+			mvi_device->attached_phy = phy_id;
 		}
 	}
 
-- 
GitLab


From f0ed7bd5d9137b8e736e44ce353620ec19ee6242 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:27 +0800
Subject: [PATCH 0166/4122] scsi: hisi_sas: Use sas_find_attathed_phy_id()
 instead of open coding it

The attached phy finding is open coded. Replace it with
sas_find_attached_phy_id(). To keep things consistent, the return value of
hisi_sas_dev_found() is also changed to -ENODEV after calling
sas_find_attathed_phy_id() failed.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-6-yanaijie@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Acked-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 4c37ae9eb6b60..10813836a7288 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -792,22 +792,14 @@ static int hisi_sas_dev_found(struct domain_device *device)
 
 	if (parent_dev && dev_is_expander(parent_dev->dev_type)) {
 		int phy_no;
-		u8 phy_num = parent_dev->ex_dev.num_phys;
-		struct ex_phy *phy;
 
-		for (phy_no = 0; phy_no < phy_num; phy_no++) {
-			phy = &parent_dev->ex_dev.ex_phy[phy_no];
-			if (SAS_ADDR(phy->attached_sas_addr) ==
-				SAS_ADDR(device->sas_addr))
-				break;
-		}
-
-		if (phy_no == phy_num) {
+		phy_no = sas_find_attached_phy_id(&parent_dev->ex_dev, device);
+		if (phy_no < 0) {
 			dev_info(dev, "dev found: no attached "
 				 "dev:%016llx at ex:%016llx\n",
 				 SAS_ADDR(device->sas_addr),
 				 SAS_ADDR(parent_dev->sas_addr));
-			rc = -EINVAL;
+			rc = phy_no;
 			goto err_out;
 		}
 	}
-- 
GitLab


From ad74d1dadbe9fc5ff7f80796f7cac0f126a5ea74 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:28 +0800
Subject: [PATCH 0167/4122] scsi: libsas: Use sas_phy_match_dev_addr() instead
 of open coding it

The SAS address comparison of domain device and expander phy is open
coded. Replace it with sas_phy_match_dev_addr().

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-7-yanaijie@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_expander.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 7ffb429463353..9d6330c55cbf5 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -738,9 +738,7 @@ static void sas_ex_get_linkrate(struct domain_device *parent,
 		    phy->phy_state == PHY_NOT_PRESENT)
 			continue;
 
-		if (SAS_ADDR(phy->attached_sas_addr) ==
-		    SAS_ADDR(child->sas_addr)) {
-
+		if (sas_phy_match_dev_addr(child, phy)) {
 			child->min_linkrate = min(parent->min_linkrate,
 						  phy->linkrate);
 			child->max_linkrate = max(parent->max_linkrate,
@@ -1012,8 +1010,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
 		sas_add_parent_port(dev, phy_id);
 		return 0;
 	}
-	if (dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
-			    SAS_ADDR(dev->parent->sas_addr))) {
+	if (dev->parent && sas_phy_match_dev_addr(dev->parent, ex_phy)) {
 		sas_add_parent_port(dev, phy_id);
 		if (ex_phy->routing_attr == TABLE_ROUTING)
 			sas_configure_phy(dev, phy_id, dev->port->sas_addr, 1);
@@ -1312,7 +1309,7 @@ static int sas_check_parent_topology(struct domain_device *child)
 		    parent_phy->phy_state == PHY_NOT_PRESENT)
 			continue;
 
-		if (SAS_ADDR(parent_phy->attached_sas_addr) != SAS_ADDR(child->sas_addr))
+		if (!sas_phy_match_dev_addr(child, parent_phy))
 			continue;
 
 		child_phy = &child_ex->ex_phy[parent_phy->attached_phy_id];
@@ -1522,8 +1519,7 @@ static int sas_configure_parent(struct domain_device *parent,
 		struct ex_phy *phy = &ex_parent->ex_phy[i];
 
 		if ((phy->routing_attr == TABLE_ROUTING) &&
-		    (SAS_ADDR(phy->attached_sas_addr) ==
-		     SAS_ADDR(child->sas_addr))) {
+		    sas_phy_match_dev_addr(child, phy)) {
 			res = sas_configure_phy(parent, i, sas_addr, include);
 			if (res)
 				return res;
@@ -1858,8 +1854,7 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 	if (last) {
 		list_for_each_entry_safe(child, n,
 			&ex_dev->children, siblings) {
-			if (SAS_ADDR(child->sas_addr) ==
-			    SAS_ADDR(phy->attached_sas_addr)) {
+			if (sas_phy_match_dev_addr(child, phy)) {
 				set_bit(SAS_DEV_GONE, &child->state);
 				if (dev_is_expander(child->dev_type))
 					sas_unregister_ex_tree(parent->port, child);
@@ -1941,8 +1936,7 @@ static int sas_discover_new(struct domain_device *dev, int phy_id)
 	if (res)
 		return res;
 	list_for_each_entry(child, &dev->ex_dev.children, siblings) {
-		if (SAS_ADDR(child->sas_addr) ==
-		    SAS_ADDR(ex_phy->attached_sas_addr)) {
+		if (sas_phy_match_dev_addr(child, ex_phy)) {
 			if (dev_is_expander(child->dev_type))
 				res = sas_discover_bfs_by_root(child);
 			break;
-- 
GitLab


From bfa22905f3865469479f028770a352126ad0d2e8 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:29 +0800
Subject: [PATCH 0168/4122] scsi: libsas: Use sas_phy_addr_match() instead of
 open coding it

The SAS address comparison of expander phys is open coded. Replace it with
sas_phy_addr_match().

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-8-yanaijie@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_expander.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 9d6330c55cbf5..caa0b22867335 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -2058,8 +2058,7 @@ static int sas_rediscover(struct domain_device *dev, const int phy_id)
 
 			if (i == phy_id)
 				continue;
-			if (SAS_ADDR(phy->attached_sas_addr) ==
-			    SAS_ADDR(changed_phy->attached_sas_addr)) {
+			if (sas_phy_addr_match(phy, changed_phy)) {
 				last = false;
 				break;
 			}
-- 
GitLab


From 868a8824838f1f0d781e838fa36dbb2de6bc7fdd Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 28 Sep 2022 15:01:30 +0800
Subject: [PATCH 0169/4122] scsi: libsas: Use sas_phy_match_port_addr() instead
 of open coding it

The SAS address comparison of asd_sas_port and expander phy is open
coded. Replace it with sas_phy_match_port_addr().

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Link: https://lore.kernel.org/r/20220928070130.3657183-9-yanaijie@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_expander.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index caa0b22867335..2907ca5d0ed43 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1005,8 +1005,7 @@ static int sas_ex_discover_dev(struct domain_device *dev, int phy_id)
 	}
 
 	/* Parent and domain coherency */
-	if (!dev->parent && (SAS_ADDR(ex_phy->attached_sas_addr) ==
-			     SAS_ADDR(dev->port->sas_addr))) {
+	if (!dev->parent && sas_phy_match_port_addr(dev->port, ex_phy)) {
 		sas_add_parent_port(dev, phy_id);
 		return 0;
 	}
-- 
GitLab


From ce3a0a29fb9f36d1cd221fffa64a3c405308868f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 8 Oct 2022 22:33:53 +0300
Subject: [PATCH 0170/4122] gpio: merrifield: Use str_enable_disable() helper

Use str_enable_disable() helper instead of open coding the same.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpio-merrifield.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-merrifield.c b/drivers/gpio/gpio-merrifield.c
index 72ac09a597029..92ea8411050dd 100644
--- a/drivers/gpio/gpio-merrifield.c
+++ b/drivers/gpio/gpio-merrifield.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/string_helpers.h>
 
 #define GCCR		0x000	/* controller configuration */
 #define GPLR		0x004	/* pin level r/o */
@@ -331,7 +332,7 @@ static int mrfld_irq_set_wake(struct irq_data *d, unsigned int on)
 
 	raw_spin_unlock_irqrestore(&priv->lock, flags);
 
-	dev_dbg(priv->dev, "%sable wake for gpio %u\n", on ? "en" : "dis", gpio);
+	dev_dbg(priv->dev, "%s wake for gpio %u\n", str_enable_disable(on), gpio);
 	return 0;
 }
 
-- 
GitLab


From 26312973bfbc1db24b157797776ee2b5b48f5c50 Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Thu, 6 Oct 2022 12:14:56 -0400
Subject: [PATCH 0171/4122] IB/uverbs: fix the typo of optional

Fix the typo of optional in the function of UVERBS_HANDLER.

Signed-off-by: Deming Wang <wangdeming@inspur.com>
Link: https://lore.kernel.org/r/20221006161456.2998-1-wangdeming@inspur.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/uverbs_std_types_qp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/uverbs_std_types_qp.c b/drivers/infiniband/core/uverbs_std_types_qp.c
index dd1075466f61b..7b4773fa4bc0b 100644
--- a/drivers/infiniband/core/uverbs_std_types_qp.c
+++ b/drivers/infiniband/core/uverbs_std_types_qp.c
@@ -163,7 +163,7 @@ static int UVERBS_HANDLER(UVERBS_METHOD_QP_CREATE)(
 					UVERBS_ATTR_CREATE_QP_SRQ_HANDLE))
 				return -EINVAL;
 
-			/* send_cq is optinal */
+			/* send_cq is optional */
 			if (cap.max_send_wr) {
 				send_cq = uverbs_attr_get_obj(attrs,
 					UVERBS_ATTR_CREATE_QP_SEND_CQ_HANDLE);
-- 
GitLab


From 53c2d5b14a82f6e7f0f8089083972df20e66a354 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Sat, 1 Oct 2022 10:00:45 +0800
Subject: [PATCH 0172/4122] RDMA/core: return -EOPNOSUPP for ODP unsupported
 device

ib_reg_mr(3) which is used to register a MR with specific access flags
for specific HCA will set errno when something go wrong.
So, here we should return the specific -EOPNOTSUPP when the being
requested ODP access flag is unsupported by the HCA(such as RXE).

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Link: https://lore.kernel.org/r/20221001020045.8324-1-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/ib_verbs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 975d6e9efbcb4..a1f4d53a4bb63 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -4334,7 +4334,7 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
 
 	if (flags & IB_ACCESS_ON_DEMAND &&
 	    !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	return 0;
 }
 
-- 
GitLab


From 7ac7bfe746d8faddbd79abed526ee67f46d8867c Mon Sep 17 00:00:00 2001
From: Jiangshan Yi <yijiangshan@kylinos.cn>
Date: Sun, 9 Oct 2022 16:10:47 +0800
Subject: [PATCH 0173/4122] RDMA/opa_vnic: fix spelling typo in comment

Fix spelling typo in comment.

Reported-by: k2ci <kernel-bot@kylinos.cn>
Signed-off-by: Jiangshan Yi <yijiangshan@kylinos.cn>
Link: https://lore.kernel.org/r/20221009081047.2643471-1-13667453960@163.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 include/rdma/opa_vnic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/rdma/opa_vnic.h b/include/rdma/opa_vnic.h
index f3d5377b217a6..d297f084001a5 100644
--- a/include/rdma/opa_vnic.h
+++ b/include/rdma/opa_vnic.h
@@ -51,7 +51,7 @@ static inline void *opa_vnic_dev_priv(const struct net_device *dev)
 	return oparn->dev_priv;
 }
 
-/* opa_vnic skb meta data structrue */
+/* opa_vnic skb meta data structure */
 struct opa_vnic_skb_mdata {
 	u8 vl;
 	u8 entropy;
-- 
GitLab


From acc7d94ab431fb6f34e41588f2784410c2d9008e Mon Sep 17 00:00:00 2001
From: Sergey Gorenko <sergeygo@nvidia.com>
Date: Sun, 16 Oct 2022 12:38:31 +0300
Subject: [PATCH 0174/4122] IB/iser: open code iser_conn_state_comp_exch

There is a single caller to iser_conn_state_comp_exch. Open code its
logic and remove it.

Acked-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Signed-off-by: Sergey Gorenko <sergeygo@nvidia.com>
Link: https://lore.kernel.org/r/20221016093833.12537-2-mgurtovoy@nvidia.com
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 20 ++------------------
 1 file changed, 2 insertions(+), 18 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a00ca117303a9..a73c30230ff9e 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -347,22 +347,6 @@ static void iser_device_try_release(struct iser_device *device)
 	mutex_unlock(&ig.device_list_mutex);
 }
 
-/*
- * Called with state mutex held
- */
-static int iser_conn_state_comp_exch(struct iser_conn *iser_conn,
-				     enum iser_conn_state comp,
-				     enum iser_conn_state exch)
-{
-	int ret;
-
-	ret = (iser_conn->state == comp);
-	if (ret)
-		iser_conn->state = exch;
-
-	return ret;
-}
-
 void iser_release_work(struct work_struct *work)
 {
 	struct iser_conn *iser_conn;
@@ -465,10 +449,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
 	int err = 0;
 
 	/* terminate the iser conn only if the conn state is UP */
-	if (!iser_conn_state_comp_exch(iser_conn, ISER_CONN_UP,
-				       ISER_CONN_TERMINATING))
+	if (iser_conn->state != ISER_CONN_UP)
 		return 0;
 
+	iser_conn->state = ISER_CONN_TERMINATING;
 	iser_info("iser_conn %p state %d\n", iser_conn, iser_conn->state);
 
 	/* suspend queuing of new iscsi commands */
-- 
GitLab


From a75243ae08d23272235cb26117464843538936b4 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Sun, 16 Oct 2022 12:38:32 +0300
Subject: [PATCH 0175/4122] IB/iser: add safety checks for state_mutex lock

In some cases, we need to make sure that state_mutex is taken. Use
lockdep_assert_held to warn us in case it doesn't while it should.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Link: https://lore.kernel.org/r/20221016093833.12537-3-mgurtovoy@nvidia.com
Reviewed-by: Sergey Gorenko <sergeygo@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 26 ++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index a73c30230ff9e..f33e3a7f605d2 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -448,6 +448,8 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	int err = 0;
 
+	lockdep_assert_held(&iser_conn->state_mutex);
+
 	/* terminate the iser conn only if the conn state is UP */
 	if (iser_conn->state != ISER_CONN_UP)
 		return 0;
@@ -482,9 +484,10 @@ int iser_conn_terminate(struct iser_conn *iser_conn)
  */
 static void iser_connect_error(struct rdma_cm_id *cma_id)
 {
-	struct iser_conn *iser_conn;
+	struct iser_conn *iser_conn = cma_id->context;
+
+	lockdep_assert_held(&iser_conn->state_mutex);
 
-	iser_conn = cma_id->context;
 	iser_conn->state = ISER_CONN_TERMINATING;
 }
 
@@ -526,12 +529,13 @@ static void iser_calc_scsi_params(struct iser_conn *iser_conn,
  */
 static void iser_addr_handler(struct rdma_cm_id *cma_id)
 {
+	struct iser_conn *iser_conn = cma_id->context;
 	struct iser_device *device;
-	struct iser_conn *iser_conn;
 	struct ib_conn *ib_conn;
 	int    ret;
 
-	iser_conn = cma_id->context;
+	lockdep_assert_held(&iser_conn->state_mutex);
+
 	if (iser_conn->state != ISER_CONN_PENDING)
 		/* bailout */
 		return;
@@ -581,6 +585,8 @@ static void iser_route_handler(struct rdma_cm_id *cma_id)
 	struct ib_conn *ib_conn = &iser_conn->ib_conn;
 	struct ib_device *ib_dev = ib_conn->device->ib_device;
 
+	lockdep_assert_held(&iser_conn->state_mutex);
+
 	if (iser_conn->state != ISER_CONN_PENDING)
 		/* bailout */
 		return;
@@ -613,14 +619,18 @@ failure:
 	iser_connect_error(cma_id);
 }
 
+/*
+ * Called with state mutex held
+ */
 static void iser_connected_handler(struct rdma_cm_id *cma_id,
 				   const void *private_data)
 {
-	struct iser_conn *iser_conn;
+	struct iser_conn *iser_conn = cma_id->context;
 	struct ib_qp_attr attr;
 	struct ib_qp_init_attr init_attr;
 
-	iser_conn = cma_id->context;
+	lockdep_assert_held(&iser_conn->state_mutex);
+
 	if (iser_conn->state != ISER_CONN_PENDING)
 		/* bailout */
 		return;
@@ -654,11 +664,15 @@ static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
 	}
 }
 
+/*
+ * Called with state mutex held
+ */
 static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
 				 bool destroy)
 {
 	struct iser_conn *iser_conn = cma_id->context;
 
+	lockdep_assert_held(&iser_conn->state_mutex);
 	/*
 	 * We are not guaranteed that we visited disconnected_handler
 	 * by now, call it here to be safe that we handle CM drep
-- 
GitLab


From c1842f34fceef47d6285e558004f8e2d6ed91b91 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <mgurtovoy@nvidia.com>
Date: Sun, 16 Oct 2022 12:38:33 +0300
Subject: [PATCH 0176/4122] IB/iser: open code iser_disconnected_handler

There is a single caller to iser_disconnected_handler. Open code its
logic and remove it.

Signed-off-by: Max Gurtovoy <mgurtovoy@nvidia.com>
Link: https://lore.kernel.org/r/20221016093833.12537-4-mgurtovoy@nvidia.com
Reviewed-by: Sergey Gorenko <sergeygo@nvidia.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/iser/iser_verbs.c | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c
index f33e3a7f605d2..1b8eda0dae4e0 100644
--- a/drivers/infiniband/ulp/iser/iser_verbs.c
+++ b/drivers/infiniband/ulp/iser/iser_verbs.c
@@ -651,19 +651,6 @@ static void iser_connected_handler(struct rdma_cm_id *cma_id,
 	complete(&iser_conn->up_completion);
 }
 
-static void iser_disconnected_handler(struct rdma_cm_id *cma_id)
-{
-	struct iser_conn *iser_conn = cma_id->context;
-
-	if (iser_conn_terminate(iser_conn)) {
-		if (iser_conn->iscsi_conn)
-			iscsi_conn_failure(iser_conn->iscsi_conn,
-					   ISCSI_ERR_CONN_FAILED);
-		else
-			iser_err("iscsi_iser connection isn't bound\n");
-	}
-}
-
 /*
  * Called with state mutex held
  */
@@ -678,7 +665,13 @@ static void iser_cleanup_handler(struct rdma_cm_id *cma_id,
 	 * by now, call it here to be safe that we handle CM drep
 	 * and flush errors.
 	 */
-	iser_disconnected_handler(cma_id);
+	if (iser_conn_terminate(iser_conn)) {
+		if (iser_conn->iscsi_conn)
+			iscsi_conn_failure(iser_conn->iscsi_conn,
+					   ISCSI_ERR_CONN_FAILED);
+		else
+			iser_err("iscsi_iser connection isn't bound\n");
+	}
 	iser_free_ib_conn_res(iser_conn, destroy);
 	complete(&iser_conn->ib_completion);
 }
-- 
GitLab


From f74495761df10c25a98256d16ea7465191b6e2cd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 18 Oct 2022 09:25:00 +0800
Subject: [PATCH 0177/4122] soundwire: dmi-quirks: add quirk variant for
 LAPBC710 NUC15

Some NUC15 LAPBC710 devices don't expose the same DMI information as
the Intel reference, add additional entry in the match table.

BugLink: https://github.com/thesofproject/linux/issues/3885
Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221018012500.1592994-1-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/dmi-quirks.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/soundwire/dmi-quirks.c b/drivers/soundwire/dmi-quirks.c
index f81cdd83ec26e..7969881f126dc 100644
--- a/drivers/soundwire/dmi-quirks.c
+++ b/drivers/soundwire/dmi-quirks.c
@@ -90,6 +90,14 @@ static const struct dmi_system_id adr_remap_quirk_table[] = {
 		},
 		.driver_data = (void *)intel_tgl_bios,
 	},
+	{
+		/* quirk used for NUC15 LAPBC710 skew */
+		.matches = {
+			DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
+			DMI_MATCH(DMI_BOARD_NAME, "LAPBC710"),
+		},
+		.driver_data = (void *)intel_tgl_bios,
+	},
 	{
 		.matches = {
 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc"),
-- 
GitLab


From 3c6bd6fa83bb6c7a891891a8a32aea2820aadb06 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 17 Oct 2022 20:57:37 -0400
Subject: [PATCH 0178/4122] dt-bindings: dma: qcom: gpi: add fallback
 compatible

The drivers are transitioning from matching against lists of specific
compatible strings to matching against smaller lists of more generic
compatible strings. Use the SDM845 compatible string as a fallback in
the schema to support this change.

Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221018005740.23952-2-mailingradian@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../devicetree/bindings/dma/qcom,gpi.yaml     | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index eabf8a76d3a05..182b8573230d3 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -18,14 +18,18 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - qcom,sc7280-gpi-dma
-      - qcom,sdm845-gpi-dma
-      - qcom,sm6350-gpi-dma
-      - qcom,sm8150-gpi-dma
-      - qcom,sm8250-gpi-dma
-      - qcom,sm8350-gpi-dma
-      - qcom,sm8450-gpi-dma
+    oneOf:
+      - enum:
+          - qcom,sc7280-gpi-dma
+          - qcom,sdm845-gpi-dma
+          - qcom,sm6350-gpi-dma
+          - qcom,sm8350-gpi-dma
+          - qcom,sm8450-gpi-dma
+      - items:
+          - enum:
+              - qcom,sm8150-gpi-dma
+              - qcom,sm8250-gpi-dma
+          - const: qcom,sdm845-gpi-dma
 
   reg:
     maxItems: 1
-- 
GitLab


From 67fd570d734d35ef6b5e8ee5a3195a2aa843c2d8 Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 17 Oct 2022 20:57:38 -0400
Subject: [PATCH 0179/4122] dt-bindings: dma: qcom: gpi: add compatible for
 sdm670

The Snapdragon 670 uses GPI DMA for its GENI interface. Add a compatible
string for it in the documentation.

Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221018005740.23952-3-mailingradian@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 182b8573230d3..6f7dcae944e4d 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -27,6 +27,7 @@ properties:
           - qcom,sm8450-gpi-dma
       - items:
           - enum:
+              - qcom,sdm670-gpi-dma
               - qcom,sm8150-gpi-dma
               - qcom,sm8250-gpi-dma
           - const: qcom,sdm845-gpi-dma
-- 
GitLab


From 8527721ee6bd596a211fa2a1bbaf939e994cb89c Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Mon, 17 Oct 2022 20:57:39 -0400
Subject: [PATCH 0180/4122] dmaengine: qcom: deprecate redundant of_device_id
 entries

The drivers are transitioning from matching against lists of specific
compatible strings to matching against smaller lists of more generic
compatible strings. Add a message that the compatible strings with an
ee_offset of 0 are deprecated except for the SDM845 compatible string.

Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221018005740.23952-4-mailingradian@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/gpi.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index 3f56514bbef8f..f8e19e6e6117d 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -2289,6 +2289,10 @@ static const struct of_device_id gpi_of_match[] = {
 	{ .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 },
 	{ .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 },
+	/*
+	 * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as
+	 * fallback and not need their own entries here.
+	 */
 	{ .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 },
-- 
GitLab


From dfad1e14b27734b204ea821977d43b16d1d1919f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 18 Oct 2022 19:03:48 -0400
Subject: [PATCH 0181/4122] dt-bindings: dma: qcom: gpi: Use sm6350 fallback

Several devices like SM6350, SM8150 and SC7280 are actually compatible,
so use one compatible fallback for all of them.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221018230352.1238479-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 6f7dcae944e4d..0c28944988456 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -20,11 +20,14 @@ properties:
   compatible:
     oneOf:
       - enum:
-          - qcom,sc7280-gpi-dma
           - qcom,sdm845-gpi-dma
           - qcom,sm6350-gpi-dma
-          - qcom,sm8350-gpi-dma
-          - qcom,sm8450-gpi-dma
+      - items:
+          - enum:
+              - qcom,sc7280-gpi-dma
+              - qcom,sm8350-gpi-dma
+              - qcom,sm8450-gpi-dma
+          - const: qcom,sm6350-gpi-dma
       - items:
           - enum:
               - qcom,sdm670-gpi-dma
-- 
GitLab


From 88bc8ac63db045e74b2ea7015c51384c05b09ae5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 18 Oct 2022 19:03:49 -0400
Subject: [PATCH 0182/4122] dmaengine: qcom: gpi: Document preferred SM6350
 binding

Devices with ee offset of 0x10000 should rather bind with SM6350
compatible, so the list will not unnecessarily grow for compatible
devices.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Richard Acayan <mailingradian@gmail.com>
Link: https://lore.kernel.org/r/20221018230352.1238479-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/qcom/gpi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/qcom/gpi.c b/drivers/dma/qcom/gpi.c
index f8e19e6e6117d..061add8322951 100644
--- a/drivers/dma/qcom/gpi.c
+++ b/drivers/dma/qcom/gpi.c
@@ -2286,13 +2286,14 @@ static int gpi_probe(struct platform_device *pdev)
 }
 
 static const struct of_device_id gpi_of_match[] = {
-	{ .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 },
 	{ .compatible = "qcom,sdm845-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm6350-gpi-dma", .data = (void *)0x10000 },
 	/*
-	 * Deprecated, devices with ee_offset = 0 should use sdm845-gpi-dma as
-	 * fallback and not need their own entries here.
+	 * Do not grow the list for compatible devices. Instead use
+	 * qcom,sdm845-gpi-dma (for ee_offset = 0x0) or qcom,sm6350-gpi-dma
+	 * (for ee_offset = 0x10000).
 	 */
+	{ .compatible = "qcom,sc7280-gpi-dma", .data = (void *)0x10000 },
 	{ .compatible = "qcom,sm8150-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8250-gpi-dma", .data = (void *)0x0 },
 	{ .compatible = "qcom,sm8350-gpi-dma", .data = (void *)0x10000 },
-- 
GitLab


From 4967a7803c341361a8bf67ace206bca8b390dc22 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 4 Oct 2022 17:09:13 +0100
Subject: [PATCH 0183/4122] dmaengine: ioat: Fix spelling mistake "idel" ->
 "idle"

There is a spelling mistake in the module description. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Acked-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20221004160913.154739-1-colin.i.king@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ioat/dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index e2070df6cad28..79d244011093c 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -33,7 +33,7 @@ MODULE_PARM_DESC(completion_timeout,
 static int idle_timeout = 2000;
 module_param(idle_timeout, int, 0644);
 MODULE_PARM_DESC(idle_timeout,
-		"set ioat idel timeout [msec] (default 2000 [msec])");
+		"set ioat idle timeout [msec] (default 2000 [msec])");
 
 #define IDLE_TIMEOUT msecs_to_jiffies(idle_timeout)
 #define COMPLETION_TIMEOUT msecs_to_jiffies(completion_timeout)
-- 
GitLab


From beb6f6493853d862490f0d5b99910caa358dd3d4 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Thu, 29 Sep 2022 16:48:18 -0700
Subject: [PATCH 0184/4122] of/irq: export of_msi_get_domain

Export of_mis_get_domain to enable it for users from outside.

Signed-off-by: Matthias Brugger <mbrugger@suse.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Peter Ujfalusi <peter.ujfalusi@ti.com>
Link: https://lore.kernel.org/r/20200122104723.16955-1-peter.ujfalusi@ti.com
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Link: https://lore.kernel.org/r/20220929234820.940048-2-khilman@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/of/irq.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 2bac44f09554b..e9bf5236ed892 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c
@@ -730,6 +730,7 @@ struct irq_domain *of_msi_get_domain(struct device *dev,
 
 	return NULL;
 }
+EXPORT_SYMBOL_GPL(of_msi_get_domain);
 
 /**
  * of_msi_configure - Set the msi_domain field of a device
-- 
GitLab


From 56b0a668cb35c5f04ef98ffc22b297f116fe7108 Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Thu, 29 Sep 2022 16:48:19 -0700
Subject: [PATCH 0185/4122] dmaengine: ti: convert k3-udma to module

Currently k3-udma driver is built as separate platform drivers with a
shared probe and identical code path, just differnet platform data.

To enable to build as module, convert the separate platform driver
into a single module_platform_driver with the data selection done via
compatible string and of_match.  The separate of_match tables are also
combined into a single table to avoid the multiple calls to
of_match_node()

Since all modern TI platforms using this are DT enabled, the removal
of separate platform_drivers should have no functional change.

Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Link: https://lore.kernel.org/r/20220929234820.940048-3-khilman@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/Kconfig        |  4 ++--
 drivers/dma/ti/k3-udma-glue.c |  5 ++++-
 drivers/dma/ti/k3-udma.c      | 40 +++++------------------------------
 3 files changed, 11 insertions(+), 38 deletions(-)

diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index 79618fac119a7..f196be3b222f9 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -35,7 +35,7 @@ config DMA_OMAP
 	  DMA engine is found on OMAP and DRA7xx parts.
 
 config TI_K3_UDMA
-	bool "Texas Instruments UDMA support"
+	tristate "Texas Instruments UDMA support"
 	depends on ARCH_K3
 	depends on TI_SCI_PROTOCOL
 	depends on TI_SCI_INTA_IRQCHIP
@@ -48,7 +48,7 @@ config TI_K3_UDMA
 	  DMA engine is used in AM65x and j721e.
 
 config TI_K3_UDMA_GLUE_LAYER
-	bool "Texas Instruments UDMA Glue layer for non DMAengine users"
+	tristate "Texas Instruments UDMA Glue layer for non DMAengine users"
 	depends on ARCH_K3
 	depends on TI_K3_UDMA
 	help
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
index 4fdd9f06b7235..c29de4695ae7a 100644
--- a/drivers/dma/ti/k3-udma-glue.c
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -6,6 +6,7 @@
  *
  */
 
+#include <linux/module.h>
 #include <linux/atomic.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
@@ -1433,4 +1434,6 @@ static int __init k3_udma_glue_class_init(void)
 {
 	return class_register(&k3_udma_glue_devclass);
 }
-arch_initcall(k3_udma_glue_class_init);
+
+module_init(k3_udma_glue_class_init);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 7b5081989b3d6..ce8b80bb34d7d 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
@@ -4335,18 +4336,10 @@ static const struct of_device_id udma_of_match[] = {
 		.compatible = "ti,j721e-navss-mcu-udmap",
 		.data = &j721e_mcu_data,
 	},
-	{ /* Sentinel */ },
-};
-
-static const struct of_device_id bcdma_of_match[] = {
 	{
 		.compatible = "ti,am64-dmss-bcdma",
 		.data = &am64_bcdma_data,
 	},
-	{ /* Sentinel */ },
-};
-
-static const struct of_device_id pktdma_of_match[] = {
 	{
 		.compatible = "ti,am64-dmss-pktdma",
 		.data = &am64_pktdma_data,
@@ -5271,14 +5264,9 @@ static int udma_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	match = of_match_node(udma_of_match, dev->of_node);
-	if (!match)
-		match = of_match_node(bcdma_of_match, dev->of_node);
 	if (!match) {
-		match = of_match_node(pktdma_of_match, dev->of_node);
-		if (!match) {
-			dev_err(dev, "No compatible match found\n");
-			return -ENODEV;
-		}
+		dev_err(dev, "No compatible match found\n");
+		return -ENODEV;
 	}
 	ud->match_data = match->data;
 
@@ -5511,27 +5499,9 @@ static struct platform_driver udma_driver = {
 	},
 	.probe		= udma_probe,
 };
-builtin_platform_driver(udma_driver);
 
-static struct platform_driver bcdma_driver = {
-	.driver = {
-		.name	= "ti-bcdma",
-		.of_match_table = bcdma_of_match,
-		.suppress_bind_attrs = true,
-	},
-	.probe		= udma_probe,
-};
-builtin_platform_driver(bcdma_driver);
-
-static struct platform_driver pktdma_driver = {
-	.driver = {
-		.name	= "ti-pktdma",
-		.of_match_table = pktdma_of_match,
-		.suppress_bind_attrs = true,
-	},
-	.probe		= udma_probe,
-};
-builtin_platform_driver(pktdma_driver);
+module_platform_driver(udma_driver);
+MODULE_LICENSE("GPL v2");
 
 /* Private interfaces to UDMA */
 #include "k3-udma-private.c"
-- 
GitLab


From d15aae73a9f6c321167b9120f263df7dbc08d2ba Mon Sep 17 00:00:00 2001
From: Kevin Hilman <khilman@baylibre.com>
Date: Thu, 29 Sep 2022 16:48:20 -0700
Subject: [PATCH 0186/4122] dmaengine: ti: convert PSIL to be buildable as
 module

Combine all the SoC specific files into a single lib that can be
built-in or built as a module.

Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Kevin Hilman <khilman@baylibre.com>
Link: https://lore.kernel.org/r/20220929234820.940048-4-khilman@baylibre.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/ti/Kconfig   |  3 ++-
 drivers/dma/ti/Makefile  | 15 ++++++++-------
 drivers/dma/ti/k3-psil.c |  2 ++
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/dma/ti/Kconfig b/drivers/dma/ti/Kconfig
index f196be3b222f9..2adc2cca10e92 100644
--- a/drivers/dma/ti/Kconfig
+++ b/drivers/dma/ti/Kconfig
@@ -56,7 +56,8 @@ config TI_K3_UDMA_GLUE_LAYER
 	  If unsure, say N.
 
 config TI_K3_PSIL
-	bool
+       tristate
+       default TI_K3_UDMA
 
 config TI_DMA_CROSSBAR
 	bool
diff --git a/drivers/dma/ti/Makefile b/drivers/dma/ti/Makefile
index d3a303f0d7c62..b53d05b11ca5e 100644
--- a/drivers/dma/ti/Makefile
+++ b/drivers/dma/ti/Makefile
@@ -4,11 +4,12 @@ obj-$(CONFIG_TI_EDMA) += edma.o
 obj-$(CONFIG_DMA_OMAP) += omap-dma.o
 obj-$(CONFIG_TI_K3_UDMA) += k3-udma.o
 obj-$(CONFIG_TI_K3_UDMA_GLUE_LAYER) += k3-udma-glue.o
-obj-$(CONFIG_TI_K3_PSIL) += k3-psil.o \
-			    k3-psil-am654.o \
-			    k3-psil-j721e.o \
-			    k3-psil-j7200.o \
-			    k3-psil-am64.o \
-			    k3-psil-j721s2.o \
-			    k3-psil-am62.o
+k3-psil-lib-objs := k3-psil.o \
+		    k3-psil-am654.o \
+		    k3-psil-j721e.o \
+		    k3-psil-j7200.o \
+		    k3-psil-am64.o \
+		    k3-psil-j721s2.o \
+		    k3-psil-am62.o
+obj-$(CONFIG_TI_K3_PSIL) += k3-psil-lib.o
 obj-$(CONFIG_TI_DMA_CROSSBAR) += dma-crossbar.o
diff --git a/drivers/dma/ti/k3-psil.c b/drivers/dma/ti/k3-psil.c
index 761a384093d20..8b6533a1eeeb9 100644
--- a/drivers/dma/ti/k3-psil.c
+++ b/drivers/dma/ti/k3-psil.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/device.h>
 #include <linux/init.h>
 #include <linux/mutex.h>
@@ -101,3 +102,4 @@ int psil_set_new_ep_config(struct device *dev, const char *name,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(psil_set_new_ep_config);
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 97c4cf380ff2d5a58ff13b9ac415ad998f623510 Mon Sep 17 00:00:00 2001
From: Siarhei Volkau <lis8215@gmail.com>
Date: Wed, 19 Oct 2022 09:39:33 +0300
Subject: [PATCH 0187/4122] dt-bindings: ingenic: Add support for the JZ4755
 dmaengine

Update documentation prior to adding driver changes.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Link: https://lore.kernel.org/r/20221019063934.3278444-2-lis8215@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/ingenic,dma.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
index 3b0b3b919af8c..e42b8ce948db1 100644
--- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
+++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
@@ -18,6 +18,7 @@ properties:
       - enum:
           - ingenic,jz4740-dma
           - ingenic,jz4725b-dma
+          - ingenic,jz4755-dma
           - ingenic,jz4760-dma
           - ingenic,jz4760-bdma
           - ingenic,jz4760-mdma
-- 
GitLab


From 042427ea0e415ea25468605f1b562f4ecec43541 Mon Sep 17 00:00:00 2001
From: Siarhei Volkau <lis8215@gmail.com>
Date: Wed, 19 Oct 2022 09:39:34 +0300
Subject: [PATCH 0188/4122] dmaengine: JZ4780: Add support for the JZ4755.

The JZ4755 has 4 DMA channels per DMA unit, two idential DMA units.

The JZ4755 has the similar DMA engine to JZ4725b and it has the
same bug as JZ4725b, see commit a40c94be2336.
At least the JZ_SOC_DATA_BREAK_LINKS flag make it work much better,
although not ideal.

Reviewed-by: Paul Cercueil <paul@crapouillou.net>
Tested-by: Siarhei Volkau <lis8215@gmail.com>
Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Link: https://lore.kernel.org/r/20221019063934.3278444-3-lis8215@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/dma-jz4780.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/dma/dma-jz4780.c b/drivers/dma/dma-jz4780.c
index 2a483802d9eeb..9c1a6e9a9c030 100644
--- a/drivers/dma/dma-jz4780.c
+++ b/drivers/dma/dma-jz4780.c
@@ -1038,6 +1038,13 @@ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
 		 JZ_SOC_DATA_BREAK_LINKS,
 };
 
+static const struct jz4780_dma_soc_data jz4755_dma_soc_data = {
+	.nb_channels = 4,
+	.transfer_ord_max = 5,
+	.flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC |
+		 JZ_SOC_DATA_BREAK_LINKS,
+};
+
 static const struct jz4780_dma_soc_data jz4760_dma_soc_data = {
 	.nb_channels = 5,
 	.transfer_ord_max = 6,
@@ -1101,6 +1108,7 @@ static const struct jz4780_dma_soc_data x1830_dma_soc_data = {
 static const struct of_device_id jz4780_dma_dt_match[] = {
 	{ .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
 	{ .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
+	{ .compatible = "ingenic,jz4755-dma", .data = &jz4755_dma_soc_data },
 	{ .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data },
 	{ .compatible = "ingenic,jz4760-mdma", .data = &jz4760_mdma_soc_data },
 	{ .compatible = "ingenic,jz4760-bdma", .data = &jz4760_bdma_soc_data },
-- 
GitLab


From c3b63380f52a5cc945c092259c3545fb4915719d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 14 Oct 2022 18:12:50 +0200
Subject: [PATCH 0189/4122] dmaengine: idma64: Make idma64_remove() return void
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function idma64_remove() returns zero unconditionally. Make it
return void.

This is a preparation for making platform remove callbacks return void.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221014161250.468687-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idma64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/dma/idma64.c b/drivers/dma/idma64.c
index f4c07ad3be15b..c33087c5cd021 100644
--- a/drivers/dma/idma64.c
+++ b/drivers/dma/idma64.c
@@ -600,7 +600,7 @@ static int idma64_probe(struct idma64_chip *chip)
 	return 0;
 }
 
-static int idma64_remove(struct idma64_chip *chip)
+static void idma64_remove(struct idma64_chip *chip)
 {
 	struct idma64 *idma64 = chip->idma64;
 	unsigned short i;
@@ -618,8 +618,6 @@ static int idma64_remove(struct idma64_chip *chip)
 
 		tasklet_kill(&idma64c->vchan.task);
 	}
-
-	return 0;
 }
 
 /* ---------------------------------------------------------------------- */
@@ -664,7 +662,9 @@ static int idma64_platform_remove(struct platform_device *pdev)
 {
 	struct idma64_chip *chip = platform_get_drvdata(pdev);
 
-	return idma64_remove(chip);
+	idma64_remove(chip);
+
+	return 0;
 }
 
 static int __maybe_unused idma64_pm_suspend(struct device *dev)
-- 
GitLab


From 91123b37e8a99cc489d5bdcfebd1c25f29382504 Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Sat, 1 Oct 2022 04:15:28 +0800
Subject: [PATCH 0190/4122] dmaengine: idxd: Make max batch size attributes in
 sysfs invisible for Intel IAA

In current code, dev.max_batch_size and wq.max_batch_size attributes in
sysfs are exposed to user to show or update the values.

>From Intel IAA spec [1], Intel IAA does not support batch processing. So
these sysfs attributes should not be supported on IAA device.

Fix this issue by making the attributes of max_batch_size invisible in
sysfs through is_visible() filter when the device is IAA.

Add description in the ABI documentation to mention that the attributes
are not visible when the device does not support batch.

[1]: https://cdrdv2.intel.com/v1/dl/getContent/721858

Fixes: e7184b159dd3 ("dmaengine: idxd: add support for configurable max wq batch size")
Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver")
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Link: https://lore.kernel.org/r/20220930201528.18621-3-xiaochen.shen@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../ABI/stable/sysfs-driver-dma-idxd          |  2 ++
 drivers/dma/idxd/sysfs.c                      | 32 +++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 8e2c2c405db22..69e2d9155e0d9 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -22,6 +22,7 @@ Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The largest number of work descriptors in a batch.
+                It's not visible when the device does not support batch.
 
 What:           /sys/bus/dsa/devices/dsa<m>/max_work_queues_size
 Date:           Oct 25, 2019
@@ -205,6 +206,7 @@ KernelVersion:	5.10.0
 Contact:	dmaengine@vger.kernel.org
 Description:	The max batch size for this workqueue. Cannot exceed device
 		max batch size. Configurable parameter.
+		It's not visible when the device does not support batch.
 
 What:		/sys/bus/dsa/devices/wq<m>.<n>/ats_disable
 Date:		Nov 13, 2020
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index bdaccf9e04363..f30aad90537bc 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -1233,6 +1233,14 @@ static bool idxd_wq_attr_op_config_invisible(struct attribute *attr,
 	       !idxd->hw.wq_cap.op_config;
 }
 
+static bool idxd_wq_attr_max_batch_size_invisible(struct attribute *attr,
+						  struct idxd_device *idxd)
+{
+	/* Intel IAA does not support batch processing, make it invisible */
+	return attr == &dev_attr_wq_max_batch_size.attr &&
+	       idxd->data->type == IDXD_TYPE_IAX;
+}
+
 static umode_t idxd_wq_attr_visible(struct kobject *kobj,
 				    struct attribute *attr, int n)
 {
@@ -1243,6 +1251,9 @@ static umode_t idxd_wq_attr_visible(struct kobject *kobj,
 	if (idxd_wq_attr_op_config_invisible(attr, idxd))
 		return 0;
 
+	if (idxd_wq_attr_max_batch_size_invisible(attr, idxd))
+		return 0;
+
 	return attr->mode;
 }
 
@@ -1533,6 +1544,26 @@ static ssize_t cmd_status_store(struct device *dev, struct device_attribute *att
 }
 static DEVICE_ATTR_RW(cmd_status);
 
+static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr,
+						      struct idxd_device *idxd)
+{
+	/* Intel IAA does not support batch processing, make it invisible */
+	return attr == &dev_attr_max_batch_size.attr &&
+	       idxd->data->type == IDXD_TYPE_IAX;
+}
+
+static umode_t idxd_device_attr_visible(struct kobject *kobj,
+					struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct idxd_device *idxd = confdev_to_idxd(dev);
+
+	if (idxd_device_attr_max_batch_size_invisible(attr, idxd))
+		return 0;
+
+	return attr->mode;
+}
+
 static struct attribute *idxd_device_attributes[] = {
 	&dev_attr_version.attr,
 	&dev_attr_max_groups.attr,
@@ -1560,6 +1591,7 @@ static struct attribute *idxd_device_attributes[] = {
 
 static const struct attribute_group idxd_device_attribute_group = {
 	.attrs = idxd_device_attributes,
+	.is_visible = idxd_device_attr_visible,
 };
 
 static const struct attribute_group *idxd_attribute_groups[] = {
-- 
GitLab


From 568aa6dd641f63166bb60d769e256789b3ac42d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Povi=C5=A1er?= <povik+lin@cutebit.org>
Date: Wed, 19 Oct 2022 15:23:24 +0200
Subject: [PATCH 0191/4122] dmaengine: apple-admac: Allocate cache SRAM to
 channels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There's a previously unknown part of the controller interface: We have
to assign SRAM carveouts to channels to store their in-flight samples
in. So, obtain the size of the SRAM from a read-only register and divide
it into 2K blocks for allocation to channels. The FIFO depths we
configure will always fit into 2K.

(This fixes audio artifacts during simultaneous playback/capture on
multiple channels -- which looking back is fully accounted for by having
had the caches in the DMA controller overlap in memory.)

Fixes: b127315d9a78 ("dmaengine: apple-admac: Add Apple ADMAC driver")
Signed-off-by: Martin Povišer <povik+lin@cutebit.org>
Link: https://lore.kernel.org/r/20221019132324.8585-2-povik+lin@cutebit.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/apple-admac.c | 102 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/apple-admac.c b/drivers/dma/apple-admac.c
index 317ca76ccafd2..8f27445994354 100644
--- a/drivers/dma/apple-admac.c
+++ b/drivers/dma/apple-admac.c
@@ -21,6 +21,12 @@
 #define NCHANNELS_MAX	64
 #define IRQ_NOUTPUTS	4
 
+/*
+ * For allocation purposes we split the cache
+ * memory into blocks of fixed size (given in bytes).
+ */
+#define SRAM_BLOCK	2048
+
 #define RING_WRITE_SLOT		GENMASK(1, 0)
 #define RING_READ_SLOT		GENMASK(5, 4)
 #define RING_FULL		BIT(9)
@@ -36,6 +42,9 @@
 #define REG_TX_STOP		0x0004
 #define REG_RX_START		0x0008
 #define REG_RX_STOP		0x000c
+#define REG_IMPRINT		0x0090
+#define REG_TX_SRAM_SIZE	0x0094
+#define REG_RX_SRAM_SIZE	0x0098
 
 #define REG_CHAN_CTL(ch)	(0x8000 + (ch) * 0x200)
 #define REG_CHAN_CTL_RST_RINGS	BIT(0)
@@ -53,7 +62,9 @@
 #define BUS_WIDTH_FRAME_2_WORDS	0x10
 #define BUS_WIDTH_FRAME_4_WORDS	0x20
 
-#define CHAN_BUFSIZE		0x8000
+#define REG_CHAN_SRAM_CARVEOUT(ch)	(0x8050 + (ch) * 0x200)
+#define CHAN_SRAM_CARVEOUT_SIZE		GENMASK(31, 16)
+#define CHAN_SRAM_CARVEOUT_BASE		GENMASK(15, 0)
 
 #define REG_CHAN_FIFOCTL(ch)	(0x8054 + (ch) * 0x200)
 #define CHAN_FIFOCTL_LIMIT	GENMASK(31, 16)
@@ -76,6 +87,8 @@ struct admac_chan {
 	struct dma_chan chan;
 	struct tasklet_struct tasklet;
 
+	u32 carveout;
+
 	spinlock_t lock;
 	struct admac_tx *current_tx;
 	int nperiod_acks;
@@ -92,12 +105,24 @@ struct admac_chan {
 	struct list_head to_free;
 };
 
+struct admac_sram {
+	u32 size;
+	/*
+	 * SRAM_CARVEOUT has 16-bit fields, so the SRAM cannot be larger than
+	 * 64K and a 32-bit bitfield over 2K blocks covers it.
+	 */
+	u32 allocated;
+};
+
 struct admac_data {
 	struct dma_device dma;
 	struct device *dev;
 	__iomem void *base;
 	struct reset_control *rstc;
 
+	struct mutex cache_alloc_lock;
+	struct admac_sram txcache, rxcache;
+
 	int irq;
 	int irq_index;
 	int nchannels;
@@ -118,6 +143,60 @@ struct admac_tx {
 	struct list_head node;
 };
 
+static int admac_alloc_sram_carveout(struct admac_data *ad,
+				     enum dma_transfer_direction dir,
+				     u32 *out)
+{
+	struct admac_sram *sram;
+	int i, ret = 0, nblocks;
+
+	if (dir == DMA_MEM_TO_DEV)
+		sram = &ad->txcache;
+	else
+		sram = &ad->rxcache;
+
+	mutex_lock(&ad->cache_alloc_lock);
+
+	nblocks = sram->size / SRAM_BLOCK;
+	for (i = 0; i < nblocks; i++)
+		if (!(sram->allocated & BIT(i)))
+			break;
+
+	if (i < nblocks) {
+		*out = FIELD_PREP(CHAN_SRAM_CARVEOUT_BASE, i * SRAM_BLOCK) |
+			FIELD_PREP(CHAN_SRAM_CARVEOUT_SIZE, SRAM_BLOCK);
+		sram->allocated |= BIT(i);
+	} else {
+		ret = -EBUSY;
+	}
+
+	mutex_unlock(&ad->cache_alloc_lock);
+
+	return ret;
+}
+
+static void admac_free_sram_carveout(struct admac_data *ad,
+				     enum dma_transfer_direction dir,
+				     u32 carveout)
+{
+	struct admac_sram *sram;
+	u32 base = FIELD_GET(CHAN_SRAM_CARVEOUT_BASE, carveout);
+	int i;
+
+	if (dir == DMA_MEM_TO_DEV)
+		sram = &ad->txcache;
+	else
+		sram = &ad->rxcache;
+
+	if (WARN_ON(base >= sram->size))
+		return;
+
+	mutex_lock(&ad->cache_alloc_lock);
+	i = base / SRAM_BLOCK;
+	sram->allocated &= ~BIT(i);
+	mutex_unlock(&ad->cache_alloc_lock);
+}
+
 static void admac_modify(struct admac_data *ad, int reg, u32 mask, u32 val)
 {
 	void __iomem *addr = ad->base + reg;
@@ -466,15 +545,28 @@ static void admac_synchronize(struct dma_chan *chan)
 static int admac_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct admac_chan *adchan = to_admac_chan(chan);
+	struct admac_data *ad = adchan->host;
+	int ret;
 
 	dma_cookie_init(&adchan->chan);
+	ret = admac_alloc_sram_carveout(ad, admac_chan_direction(adchan->no),
+					&adchan->carveout);
+	if (ret < 0)
+		return ret;
+
+	writel_relaxed(adchan->carveout,
+		       ad->base + REG_CHAN_SRAM_CARVEOUT(adchan->no));
 	return 0;
 }
 
 static void admac_free_chan_resources(struct dma_chan *chan)
 {
+	struct admac_chan *adchan = to_admac_chan(chan);
+
 	admac_terminate_all(chan);
 	admac_synchronize(chan);
+	admac_free_sram_carveout(adchan->host, admac_chan_direction(adchan->no),
+				 adchan->carveout);
 }
 
 static struct dma_chan *admac_dma_of_xlate(struct of_phandle_args *dma_spec,
@@ -712,6 +804,7 @@ static int admac_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ad);
 	ad->dev = &pdev->dev;
 	ad->nchannels = nchannels;
+	mutex_init(&ad->cache_alloc_lock);
 
 	/*
 	 * The controller has 4 IRQ outputs. Try them all until
@@ -801,6 +894,13 @@ static int admac_probe(struct platform_device *pdev)
 		goto free_irq;
 	}
 
+	ad->txcache.size = readl_relaxed(ad->base + REG_TX_SRAM_SIZE);
+	ad->rxcache.size = readl_relaxed(ad->base + REG_RX_SRAM_SIZE);
+
+	dev_info(&pdev->dev, "Audio DMA Controller\n");
+	dev_info(&pdev->dev, "imprint %x TX cache %u RX cache %u\n",
+		 readl_relaxed(ad->base + REG_IMPRINT), ad->txcache.size, ad->rxcache.size);
+
 	return 0;
 
 free_irq:
-- 
GitLab


From cd0ab43ec91a6114ea309e9e72382fdb184e7b9a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Oct 2022 17:03:32 +0200
Subject: [PATCH 0192/4122] dmaengine: remove iop-adma driver

The iop32x platform was removed, so this driver is no longer
needed.

Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20221019150410.3851944-10-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig    |    8 -
 drivers/dma/Makefile   |    1 -
 drivers/dma/iop-adma.c | 1554 ----------------------------------------
 drivers/dma/iop-adma.h |  914 -----------------------
 4 files changed, 2477 deletions(-)
 delete mode 100644 drivers/dma/iop-adma.c
 delete mode 100644 drivers/dma/iop-adma.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 7524b62a8870a..b73fc89ba8776 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -357,14 +357,6 @@ config INTEL_IOATDMA
 
 	  If unsure, say N.
 
-config INTEL_IOP_ADMA
-	tristate "Intel IOP32x ADMA support"
-	depends on ARCH_IOP32X || COMPILE_TEST
-	select DMA_ENGINE
-	select ASYNC_TX_ENABLE_CHANNEL_SWITCH
-	help
-	  Enable support for the Intel(R) IOP Series RAID engines.
-
 config K3_DMA
 	tristate "Hisilicon K3 DMA support"
 	depends on ARCH_HI3xxx || ARCH_HISI || COMPILE_TEST
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 10f7d42410017..5b55ada052a7f 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
 obj-y += idxd/
-obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
 obj-$(CONFIG_MILBEAUT_HDMAC) += milbeaut-hdmac.o
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
deleted file mode 100644
index 310b899d581fa..0000000000000
--- a/drivers/dma/iop-adma.c
+++ /dev/null
@@ -1,1554 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * offload engine driver for the Intel Xscale series of i/o processors
- * Copyright © 2006, Intel Corporation.
- */
-
-/*
- * This driver supports the asynchrounous DMA copy and RAID engines available
- * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/dma-mapping.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/platform_device.h>
-#include <linux/prefetch.h>
-#include <linux/memory.h>
-#include <linux/ioport.h>
-#include <linux/raid/pq.h>
-#include <linux/slab.h>
-
-#include "iop-adma.h"
-#include "dmaengine.h"
-
-#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
-#define to_iop_adma_device(dev) \
-	container_of(dev, struct iop_adma_device, common)
-#define tx_to_iop_adma_slot(tx) \
-	container_of(tx, struct iop_adma_desc_slot, async_tx)
-
-/**
- * iop_adma_free_slots - flags descriptor slots for reuse
- * @slot: Slot to free
- * Caller must hold &iop_chan->lock while calling this function
- */
-static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
-{
-	int stride = slot->slots_per_op;
-
-	while (stride--) {
-		slot->slots_per_op = 0;
-		slot = list_entry(slot->slot_node.next,
-				struct iop_adma_desc_slot,
-				slot_node);
-	}
-}
-
-static dma_cookie_t
-iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
-	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
-{
-	struct dma_async_tx_descriptor *tx = &desc->async_tx;
-
-	BUG_ON(tx->cookie < 0);
-	if (tx->cookie > 0) {
-		cookie = tx->cookie;
-		tx->cookie = 0;
-
-		/* call the callback (must not sleep or submit new
-		 * operations to this channel)
-		 */
-		dmaengine_desc_get_callback_invoke(tx, NULL);
-
-		dma_descriptor_unmap(tx);
-		if (desc->group_head)
-			desc->group_head = NULL;
-	}
-
-	/* run dependent operations */
-	dma_run_dependencies(tx);
-
-	return cookie;
-}
-
-static int
-iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
-	struct iop_adma_chan *iop_chan)
-{
-	/* the client is allowed to attach dependent operations
-	 * until 'ack' is set
-	 */
-	if (!async_tx_test_ack(&desc->async_tx))
-		return 0;
-
-	/* leave the last descriptor in the chain
-	 * so we can append to it
-	 */
-	if (desc->chain_node.next == &iop_chan->chain)
-		return 1;
-
-	dev_dbg(iop_chan->device->common.dev,
-		"\tfree slot: %d slots_per_op: %d\n",
-		desc->idx, desc->slots_per_op);
-
-	list_del(&desc->chain_node);
-	iop_adma_free_slots(desc);
-
-	return 0;
-}
-
-static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
-{
-	struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
-	dma_cookie_t cookie = 0;
-	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
-	int busy = iop_chan_is_busy(iop_chan);
-	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
-
-	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
-	/* free completed slots from the chain starting with
-	 * the oldest descriptor
-	 */
-	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
-					chain_node) {
-		pr_debug("\tcookie: %d slot: %d busy: %d "
-			"this_desc: %pad next_desc: %#llx ack: %d\n",
-			iter->async_tx.cookie, iter->idx, busy,
-			&iter->async_tx.phys, (u64)iop_desc_get_next_desc(iter),
-			async_tx_test_ack(&iter->async_tx));
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
-
-		/* do not advance past the current descriptor loaded into the
-		 * hardware channel, subsequent descriptors are either in
-		 * process or have not been submitted
-		 */
-		if (seen_current)
-			break;
-
-		/* stop the search if we reach the current descriptor and the
-		 * channel is busy, or if it appears that the current descriptor
-		 * needs to be re-read (i.e. has been appended to)
-		 */
-		if (iter->async_tx.phys == current_desc) {
-			BUG_ON(seen_current++);
-			if (busy || iop_desc_get_next_desc(iter))
-				break;
-		}
-
-		/* detect the start of a group transaction */
-		if (!slot_cnt && !slots_per_op) {
-			slot_cnt = iter->slot_cnt;
-			slots_per_op = iter->slots_per_op;
-			if (slot_cnt <= slots_per_op) {
-				slot_cnt = 0;
-				slots_per_op = 0;
-			}
-		}
-
-		if (slot_cnt) {
-			pr_debug("\tgroup++\n");
-			if (!grp_start)
-				grp_start = iter;
-			slot_cnt -= slots_per_op;
-		}
-
-		/* all the members of a group are complete */
-		if (slots_per_op != 0 && slot_cnt == 0) {
-			struct iop_adma_desc_slot *grp_iter, *_grp_iter;
-			int end_of_chain = 0;
-			pr_debug("\tgroup end\n");
-
-			/* collect the total results */
-			if (grp_start->xor_check_result) {
-				u32 zero_sum_result = 0;
-				slot_cnt = grp_start->slot_cnt;
-				grp_iter = grp_start;
-
-				list_for_each_entry_from(grp_iter,
-					&iop_chan->chain, chain_node) {
-					zero_sum_result |=
-					    iop_desc_get_zero_result(grp_iter);
-					pr_debug("\titer%d result: %d\n",
-					    grp_iter->idx, zero_sum_result);
-					slot_cnt -= slots_per_op;
-					if (slot_cnt == 0)
-						break;
-				}
-				pr_debug("\tgrp_start->xor_check_result: %p\n",
-					grp_start->xor_check_result);
-				*grp_start->xor_check_result = zero_sum_result;
-			}
-
-			/* clean up the group */
-			slot_cnt = grp_start->slot_cnt;
-			grp_iter = grp_start;
-			list_for_each_entry_safe_from(grp_iter, _grp_iter,
-				&iop_chan->chain, chain_node) {
-				cookie = iop_adma_run_tx_complete_actions(
-					grp_iter, iop_chan, cookie);
-
-				slot_cnt -= slots_per_op;
-				end_of_chain = iop_adma_clean_slot(grp_iter,
-					iop_chan);
-
-				if (slot_cnt == 0 || end_of_chain)
-					break;
-			}
-
-			/* the group should be complete at this point */
-			BUG_ON(slot_cnt);
-
-			slots_per_op = 0;
-			grp_start = NULL;
-			if (end_of_chain)
-				break;
-			else
-				continue;
-		} else if (slots_per_op) /* wait for group completion */
-			continue;
-
-		/* write back zero sum results (single descriptor case) */
-		if (iter->xor_check_result && iter->async_tx.cookie)
-			*iter->xor_check_result =
-				iop_desc_get_zero_result(iter);
-
-		cookie = iop_adma_run_tx_complete_actions(
-					iter, iop_chan, cookie);
-
-		if (iop_adma_clean_slot(iter, iop_chan))
-			break;
-	}
-
-	if (cookie > 0) {
-		iop_chan->common.completed_cookie = cookie;
-		pr_debug("\tcompleted cookie %d\n", cookie);
-	}
-}
-
-static void
-iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
-{
-	spin_lock_bh(&iop_chan->lock);
-	__iop_adma_slot_cleanup(iop_chan);
-	spin_unlock_bh(&iop_chan->lock);
-}
-
-static void iop_adma_tasklet(struct tasklet_struct *t)
-{
-	struct iop_adma_chan *iop_chan = from_tasklet(iop_chan, t,
-						      irq_tasklet);
-
-	/* lockdep will flag depedency submissions as potentially
-	 * recursive locking, this is not the case as a dependency
-	 * submission will never recurse a channels submit routine.
-	 * There are checks in async_tx.c to prevent this.
-	 */
-	spin_lock_nested(&iop_chan->lock, SINGLE_DEPTH_NESTING);
-	__iop_adma_slot_cleanup(iop_chan);
-	spin_unlock(&iop_chan->lock);
-}
-
-static struct iop_adma_desc_slot *
-iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
-			int slots_per_op)
-{
-	struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
-	LIST_HEAD(chain);
-	int slots_found, retry = 0;
-
-	/* start search from the last allocated descrtiptor
-	 * if a contiguous allocation can not be found start searching
-	 * from the beginning of the list
-	 */
-retry:
-	slots_found = 0;
-	if (retry == 0)
-		iter = iop_chan->last_used;
-	else
-		iter = list_entry(&iop_chan->all_slots,
-			struct iop_adma_desc_slot,
-			slot_node);
-
-	list_for_each_entry_safe_continue(
-		iter, _iter, &iop_chan->all_slots, slot_node) {
-		prefetch(_iter);
-		prefetch(&_iter->async_tx);
-		if (iter->slots_per_op) {
-			/* give up after finding the first busy slot
-			 * on the second pass through the list
-			 */
-			if (retry)
-				break;
-
-			slots_found = 0;
-			continue;
-		}
-
-		/* start the allocation if the slot is correctly aligned */
-		if (!slots_found++) {
-			if (iop_desc_is_aligned(iter, slots_per_op))
-				alloc_start = iter;
-			else {
-				slots_found = 0;
-				continue;
-			}
-		}
-
-		if (slots_found == num_slots) {
-			struct iop_adma_desc_slot *alloc_tail = NULL;
-			struct iop_adma_desc_slot *last_used = NULL;
-			iter = alloc_start;
-			while (num_slots) {
-				int i;
-				dev_dbg(iop_chan->device->common.dev,
-					"allocated slot: %d "
-					"(desc %p phys: %#llx) slots_per_op %d\n",
-					iter->idx, iter->hw_desc,
-					(u64)iter->async_tx.phys, slots_per_op);
-
-				/* pre-ack all but the last descriptor */
-				if (num_slots != slots_per_op)
-					async_tx_ack(&iter->async_tx);
-
-				list_add_tail(&iter->chain_node, &chain);
-				alloc_tail = iter;
-				iter->async_tx.cookie = 0;
-				iter->slot_cnt = num_slots;
-				iter->xor_check_result = NULL;
-				for (i = 0; i < slots_per_op; i++) {
-					iter->slots_per_op = slots_per_op - i;
-					last_used = iter;
-					iter = list_entry(iter->slot_node.next,
-						struct iop_adma_desc_slot,
-						slot_node);
-				}
-				num_slots -= slots_per_op;
-			}
-			alloc_tail->group_head = alloc_start;
-			alloc_tail->async_tx.cookie = -EBUSY;
-			list_splice(&chain, &alloc_tail->tx_list);
-			iop_chan->last_used = last_used;
-			iop_desc_clear_next_desc(alloc_start);
-			iop_desc_clear_next_desc(alloc_tail);
-			return alloc_tail;
-		}
-	}
-	if (!retry++)
-		goto retry;
-
-	/* perform direct reclaim if the allocation fails */
-	__iop_adma_slot_cleanup(iop_chan);
-
-	return NULL;
-}
-
-static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
-{
-	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
-		iop_chan->pending);
-
-	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
-		iop_chan->pending = 0;
-		iop_chan_append(iop_chan);
-	}
-}
-
-static dma_cookie_t
-iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
-	struct iop_adma_desc_slot *grp_start, *old_chain_tail;
-	int slot_cnt;
-	dma_cookie_t cookie;
-	dma_addr_t next_dma;
-
-	grp_start = sw_desc->group_head;
-	slot_cnt = grp_start->slot_cnt;
-
-	spin_lock_bh(&iop_chan->lock);
-	cookie = dma_cookie_assign(tx);
-
-	old_chain_tail = list_entry(iop_chan->chain.prev,
-		struct iop_adma_desc_slot, chain_node);
-	list_splice_init(&sw_desc->tx_list,
-			 &old_chain_tail->chain_node);
-
-	/* fix up the hardware chain */
-	next_dma = grp_start->async_tx.phys;
-	iop_desc_set_next_desc(old_chain_tail, next_dma);
-	BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
-
-	/* check for pre-chained descriptors */
-	iop_paranoia(iop_desc_get_next_desc(sw_desc));
-
-	/* increment the pending count by the number of slots
-	 * memcpy operations have a 1:1 (slot:operation) relation
-	 * other operations are heavier and will pop the threshold
-	 * more often.
-	 */
-	iop_chan->pending += slot_cnt;
-	iop_adma_check_threshold(iop_chan);
-	spin_unlock_bh(&iop_chan->lock);
-
-	dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
-		__func__, sw_desc->async_tx.cookie, sw_desc->idx);
-
-	return cookie;
-}
-
-static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
-static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
-
-/**
- * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
- * @chan: allocate descriptor resources for this channel
- *
- * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
- * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
- * greater than 2x the number slots needed to satisfy a device->max_xor
- * request.
- * */
-static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
-{
-	char *hw_desc;
-	dma_addr_t dma_desc;
-	int idx;
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *slot = NULL;
-	int init = iop_chan->slots_allocated ? 0 : 1;
-	struct iop_adma_platform_data *plat_data =
-		dev_get_platdata(&iop_chan->device->pdev->dev);
-	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
-
-	/* Allocate descriptor slots */
-	do {
-		idx = iop_chan->slots_allocated;
-		if (idx == num_descs_in_pool)
-			break;
-
-		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
-		if (!slot) {
-			printk(KERN_INFO "IOP ADMA Channel only initialized"
-				" %d descriptor slots", idx);
-			break;
-		}
-		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
-		slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
-
-		dma_async_tx_descriptor_init(&slot->async_tx, chan);
-		slot->async_tx.tx_submit = iop_adma_tx_submit;
-		INIT_LIST_HEAD(&slot->tx_list);
-		INIT_LIST_HEAD(&slot->chain_node);
-		INIT_LIST_HEAD(&slot->slot_node);
-		dma_desc = iop_chan->device->dma_desc_pool;
-		slot->async_tx.phys = dma_desc + idx * IOP_ADMA_SLOT_SIZE;
-		slot->idx = idx;
-
-		spin_lock_bh(&iop_chan->lock);
-		iop_chan->slots_allocated++;
-		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
-		spin_unlock_bh(&iop_chan->lock);
-	} while (iop_chan->slots_allocated < num_descs_in_pool);
-
-	if (idx && !iop_chan->last_used)
-		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
-					struct iop_adma_desc_slot,
-					slot_node);
-
-	dev_dbg(iop_chan->device->common.dev,
-		"allocated %d descriptor slots last_used: %p\n",
-		iop_chan->slots_allocated, iop_chan->last_used);
-
-	/* initialize the channel and the chain with a null operation */
-	if (init) {
-		if (dma_has_cap(DMA_MEMCPY,
-			iop_chan->device->common.cap_mask))
-			iop_chan_start_null_memcpy(iop_chan);
-		else if (dma_has_cap(DMA_XOR,
-			iop_chan->device->common.cap_mask))
-			iop_chan_start_null_xor(iop_chan);
-		else
-			BUG();
-	}
-
-	return (idx > 0) ? idx : -ENOMEM;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	int slot_cnt, slots_per_op;
-
-	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-		iop_desc_init_interrupt(grp_start, iop_chan);
-		sw_desc->async_tx.flags = flags;
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
-			 dma_addr_t dma_src, size_t len, unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	int slot_cnt, slots_per_op;
-
-	if (unlikely(!len))
-		return NULL;
-	BUG_ON(len > IOP_ADMA_MAX_BYTE_COUNT);
-
-	dev_dbg(iop_chan->device->common.dev, "%s len: %zu\n",
-		__func__, len);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-		iop_desc_init_memcpy(grp_start, flags);
-		iop_desc_set_byte_count(grp_start, iop_chan, len);
-		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
-		sw_desc->async_tx.flags = flags;
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
-		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
-		      unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	int slot_cnt, slots_per_op;
-
-	if (unlikely(!len))
-		return NULL;
-	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
-
-	dev_dbg(iop_chan->device->common.dev,
-		"%s src_cnt: %d len: %zu flags: %lx\n",
-		__func__, src_cnt, len, flags);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-		iop_desc_init_xor(grp_start, src_cnt, flags);
-		iop_desc_set_byte_count(grp_start, iop_chan, len);
-		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
-		sw_desc->async_tx.flags = flags;
-		while (src_cnt--)
-			iop_desc_set_xor_src_addr(grp_start, src_cnt,
-						  dma_src[src_cnt]);
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src,
-			  unsigned int src_cnt, size_t len, u32 *result,
-			  unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	int slot_cnt, slots_per_op;
-
-	if (unlikely(!len))
-		return NULL;
-
-	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n",
-		__func__, src_cnt, len);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
-		iop_desc_set_zero_sum_byte_count(grp_start, len);
-		grp_start->xor_check_result = result;
-		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
-			__func__, grp_start->xor_check_result);
-		sw_desc->async_tx.flags = flags;
-		while (src_cnt--)
-			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
-						       dma_src[src_cnt]);
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src,
-		     unsigned int src_cnt, const unsigned char *scf, size_t len,
-		     unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *g;
-	int slot_cnt, slots_per_op;
-	int continue_srcs;
-
-	if (unlikely(!len))
-		return NULL;
-	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
-
-	dev_dbg(iop_chan->device->common.dev,
-		"%s src_cnt: %d len: %zu flags: %lx\n",
-		__func__, src_cnt, len, flags);
-
-	if (dmaf_p_disabled_continue(flags))
-		continue_srcs = 1+src_cnt;
-	else if (dmaf_continue(flags))
-		continue_srcs = 3+src_cnt;
-	else
-		continue_srcs = 0+src_cnt;
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_pq_slot_count(len, continue_srcs, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		int i;
-
-		g = sw_desc->group_head;
-		iop_desc_set_byte_count(g, iop_chan, len);
-
-		/* even if P is disabled its destination address (bits
-		 * [3:0]) must match Q.  It is ok if P points to an
-		 * invalid address, it won't be written.
-		 */
-		if (flags & DMA_PREP_PQ_DISABLE_P)
-			dst[0] = dst[1] & 0x7;
-
-		iop_desc_set_pq_addr(g, dst);
-		sw_desc->async_tx.flags = flags;
-		for (i = 0; i < src_cnt; i++)
-			iop_desc_set_pq_src_addr(g, i, src[i], scf[i]);
-
-		/* if we are continuing a previous operation factor in
-		 * the old p and q values, see the comment for dma_maxpq
-		 * in include/linux/dmaengine.h
-		 */
-		if (dmaf_p_disabled_continue(flags))
-			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
-		else if (dmaf_continue(flags)) {
-			iop_desc_set_pq_src_addr(g, i++, dst[0], 0);
-			iop_desc_set_pq_src_addr(g, i++, dst[1], 1);
-			iop_desc_set_pq_src_addr(g, i++, dst[1], 0);
-		}
-		iop_desc_init_pq(g, i, flags);
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static struct dma_async_tx_descriptor *
-iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src,
-			 unsigned int src_cnt, const unsigned char *scf,
-			 size_t len, enum sum_check_flags *pqres,
-			 unsigned long flags)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *sw_desc, *g;
-	int slot_cnt, slots_per_op;
-
-	if (unlikely(!len))
-		return NULL;
-	BUG_ON(len > IOP_ADMA_XOR_MAX_BYTE_COUNT);
-
-	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %zu\n",
-		__func__, src_cnt, len);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_pq_zero_sum_slot_count(len, src_cnt + 2, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		/* for validate operations p and q are tagged onto the
-		 * end of the source list
-		 */
-		int pq_idx = src_cnt;
-
-		g = sw_desc->group_head;
-		iop_desc_init_pq_zero_sum(g, src_cnt+2, flags);
-		iop_desc_set_pq_zero_sum_byte_count(g, len);
-		g->pq_check_result = pqres;
-		pr_debug("\t%s: g->pq_check_result: %p\n",
-			__func__, g->pq_check_result);
-		sw_desc->async_tx.flags = flags;
-		while (src_cnt--)
-			iop_desc_set_pq_zero_sum_src_addr(g, src_cnt,
-							  src[src_cnt],
-							  scf[src_cnt]);
-		iop_desc_set_pq_zero_sum_addr(g, pq_idx, src);
-	}
-	spin_unlock_bh(&iop_chan->lock);
-
-	return sw_desc ? &sw_desc->async_tx : NULL;
-}
-
-static void iop_adma_free_chan_resources(struct dma_chan *chan)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	struct iop_adma_desc_slot *iter, *_iter;
-	int in_use_descs = 0;
-
-	iop_adma_slot_cleanup(iop_chan);
-
-	spin_lock_bh(&iop_chan->lock);
-	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
-					chain_node) {
-		in_use_descs++;
-		list_del(&iter->chain_node);
-	}
-	list_for_each_entry_safe_reverse(
-		iter, _iter, &iop_chan->all_slots, slot_node) {
-		list_del(&iter->slot_node);
-		kfree(iter);
-		iop_chan->slots_allocated--;
-	}
-	iop_chan->last_used = NULL;
-
-	dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
-		__func__, iop_chan->slots_allocated);
-	spin_unlock_bh(&iop_chan->lock);
-
-	/* one is ok since we left it on there on purpose */
-	if (in_use_descs > 1)
-		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
-			in_use_descs - 1);
-}
-
-/**
- * iop_adma_status - poll the status of an ADMA transaction
- * @chan: ADMA channel handle
- * @cookie: ADMA transaction identifier
- * @txstate: a holder for the current state of the channel or NULL
- */
-static enum dma_status iop_adma_status(struct dma_chan *chan,
-					dma_cookie_t cookie,
-					struct dma_tx_state *txstate)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-	int ret;
-
-	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_COMPLETE)
-		return ret;
-
-	iop_adma_slot_cleanup(iop_chan);
-
-	return dma_cookie_status(chan, cookie, txstate);
-}
-
-static irqreturn_t iop_adma_eot_handler(int irq, void *data)
-{
-	struct iop_adma_chan *chan = data;
-
-	dev_dbg(chan->device->common.dev, "%s\n", __func__);
-
-	tasklet_schedule(&chan->irq_tasklet);
-
-	iop_adma_device_clear_eot_status(chan);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
-{
-	struct iop_adma_chan *chan = data;
-
-	dev_dbg(chan->device->common.dev, "%s\n", __func__);
-
-	tasklet_schedule(&chan->irq_tasklet);
-
-	iop_adma_device_clear_eoc_status(chan);
-
-	return IRQ_HANDLED;
-}
-
-static irqreturn_t iop_adma_err_handler(int irq, void *data)
-{
-	struct iop_adma_chan *chan = data;
-	unsigned long status = iop_chan_get_status(chan);
-
-	dev_err(chan->device->common.dev,
-		"error ( %s%s%s%s%s%s%s)\n",
-		iop_is_err_int_parity(status, chan) ? "int_parity " : "",
-		iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
-		iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
-		iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
-		iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
-		iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
-		iop_is_err_split_tx(status, chan) ? "split_tx " : "");
-
-	iop_adma_device_clear_err_status(chan);
-
-	BUG();
-
-	return IRQ_HANDLED;
-}
-
-static void iop_adma_issue_pending(struct dma_chan *chan)
-{
-	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
-
-	if (iop_chan->pending) {
-		iop_chan->pending = 0;
-		iop_chan_append(iop_chan);
-	}
-}
-
-/*
- * Perform a transaction to verify the HW works.
- */
-#define IOP_ADMA_TEST_SIZE 2000
-
-static int iop_adma_memcpy_self_test(struct iop_adma_device *device)
-{
-	int i;
-	void *src, *dest;
-	dma_addr_t src_dma, dest_dma;
-	struct dma_chan *dma_chan;
-	dma_cookie_t cookie;
-	struct dma_async_tx_descriptor *tx;
-	int err = 0;
-	struct iop_adma_chan *iop_chan;
-
-	dev_dbg(device->common.dev, "%s\n", __func__);
-
-	src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
-	if (!src)
-		return -ENOMEM;
-	dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
-	if (!dest) {
-		kfree(src);
-		return -ENOMEM;
-	}
-
-	/* Fill in src buffer */
-	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
-		((u8 *) src)[i] = (u8)i;
-
-	/* Start copy, using first DMA channel */
-	dma_chan = container_of(device->common.channels.next,
-				struct dma_chan,
-				device_node);
-	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	dest_dma = dma_map_single(dma_chan->device->dev, dest,
-				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
-	src_dma = dma_map_single(dma_chan->device->dev, src,
-				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
-	tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
-				      IOP_ADMA_TEST_SIZE,
-				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(1);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) !=
-			DMA_COMPLETE) {
-		dev_err(dma_chan->device->dev,
-			"Self-test copy timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	iop_chan = to_iop_adma_chan(dma_chan);
-	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
-		IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
-	if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
-		dev_err(dma_chan->device->dev,
-			"Self-test copy failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-free_resources:
-	iop_adma_free_chan_resources(dma_chan);
-out:
-	kfree(src);
-	kfree(dest);
-	return err;
-}
-
-#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
-static int
-iop_adma_xor_val_self_test(struct iop_adma_device *device)
-{
-	int i, src_idx;
-	struct page *dest;
-	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
-	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
-	dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
-	dma_addr_t dest_dma;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_chan *dma_chan;
-	dma_cookie_t cookie;
-	u8 cmp_byte = 0;
-	u32 cmp_word;
-	u32 zero_sum_result;
-	int err = 0;
-	struct iop_adma_chan *iop_chan;
-
-	dev_dbg(device->common.dev, "%s\n", __func__);
-
-	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
-		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
-		if (!xor_srcs[src_idx]) {
-			while (src_idx--)
-				__free_page(xor_srcs[src_idx]);
-			return -ENOMEM;
-		}
-	}
-
-	dest = alloc_page(GFP_KERNEL);
-	if (!dest) {
-		while (src_idx--)
-			__free_page(xor_srcs[src_idx]);
-		return -ENOMEM;
-	}
-
-	/* Fill in src buffers */
-	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
-		u8 *ptr = page_address(xor_srcs[src_idx]);
-		for (i = 0; i < PAGE_SIZE; i++)
-			ptr[i] = (1 << src_idx);
-	}
-
-	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
-		cmp_byte ^= (u8) (1 << src_idx);
-
-	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
-			(cmp_byte << 8) | cmp_byte;
-
-	memset(page_address(dest), 0, PAGE_SIZE);
-
-	dma_chan = container_of(device->common.channels.next,
-				struct dma_chan,
-				device_node);
-	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	/* test xor */
-	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
-				PAGE_SIZE, DMA_FROM_DEVICE);
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
-		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
-					   0, PAGE_SIZE, DMA_TO_DEVICE);
-	tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
-				   IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
-				   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_COMPLETE) {
-		dev_err(dma_chan->device->dev,
-			"Self-test xor timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	iop_chan = to_iop_adma_chan(dma_chan);
-	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
-		PAGE_SIZE, DMA_FROM_DEVICE);
-	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
-		u32 *ptr = page_address(dest);
-		if (ptr[i] != cmp_word) {
-			dev_err(dma_chan->device->dev,
-				"Self-test xor failed compare, disabling\n");
-			err = -ENODEV;
-			goto free_resources;
-		}
-	}
-	dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
-		PAGE_SIZE, DMA_TO_DEVICE);
-
-	/* skip zero sum if the capability is not present */
-	if (!dma_has_cap(DMA_XOR_VAL, dma_chan->device->cap_mask))
-		goto free_resources;
-
-	/* zero sum the sources with the destintation page */
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
-		zero_sum_srcs[i] = xor_srcs[i];
-	zero_sum_srcs[i] = dest;
-
-	zero_sum_result = 1;
-
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
-		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
-					   zero_sum_srcs[i], 0, PAGE_SIZE,
-					   DMA_TO_DEVICE);
-	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
-				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-				       &zero_sum_result,
-				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
-		dev_err(dma_chan->device->dev,
-			"Self-test zero sum timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	if (zero_sum_result != 0) {
-		dev_err(dma_chan->device->dev,
-			"Self-test zero sum failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* test for non-zero parity sum */
-	zero_sum_result = 0;
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
-		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
-					   zero_sum_srcs[i], 0, PAGE_SIZE,
-					   DMA_TO_DEVICE);
-	tx = iop_adma_prep_dma_xor_val(dma_chan, dma_srcs,
-				       IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
-				       &zero_sum_result,
-				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) != DMA_COMPLETE) {
-		dev_err(dma_chan->device->dev,
-			"Self-test non-zero sum timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	if (zero_sum_result != 1) {
-		dev_err(dma_chan->device->dev,
-			"Self-test non-zero sum failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-free_resources:
-	iop_adma_free_chan_resources(dma_chan);
-out:
-	src_idx = IOP_ADMA_NUM_SRC_TEST;
-	while (src_idx--)
-		__free_page(xor_srcs[src_idx]);
-	__free_page(dest);
-	return err;
-}
-
-#ifdef CONFIG_RAID6_PQ
-static int
-iop_adma_pq_zero_sum_self_test(struct iop_adma_device *device)
-{
-	/* combined sources, software pq results, and extra hw pq results */
-	struct page *pq[IOP_ADMA_NUM_SRC_TEST+2+2];
-	/* ptr to the extra hw pq buffers defined above */
-	struct page **pq_hw = &pq[IOP_ADMA_NUM_SRC_TEST+2];
-	/* address conversion buffers (dma_map / page_address) */
-	void *pq_sw[IOP_ADMA_NUM_SRC_TEST+2];
-	dma_addr_t pq_src[IOP_ADMA_NUM_SRC_TEST+2];
-	dma_addr_t *pq_dest = &pq_src[IOP_ADMA_NUM_SRC_TEST];
-
-	int i;
-	struct dma_async_tx_descriptor *tx;
-	struct dma_chan *dma_chan;
-	dma_cookie_t cookie;
-	u32 zero_sum_result;
-	int err = 0;
-	struct device *dev;
-
-	dev_dbg(device->common.dev, "%s\n", __func__);
-
-	for (i = 0; i < ARRAY_SIZE(pq); i++) {
-		pq[i] = alloc_page(GFP_KERNEL);
-		if (!pq[i]) {
-			while (i--)
-				__free_page(pq[i]);
-			return -ENOMEM;
-		}
-	}
-
-	/* Fill in src buffers */
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) {
-		pq_sw[i] = page_address(pq[i]);
-		memset(pq_sw[i], 0x11111111 * (1<<i), PAGE_SIZE);
-	}
-	pq_sw[i] = page_address(pq[i]);
-	pq_sw[i+1] = page_address(pq[i+1]);
-
-	dma_chan = container_of(device->common.channels.next,
-				struct dma_chan,
-				device_node);
-	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
-		err = -ENODEV;
-		goto out;
-	}
-
-	dev = dma_chan->device->dev;
-
-	/* initialize the dests */
-	memset(page_address(pq_hw[0]), 0 , PAGE_SIZE);
-	memset(page_address(pq_hw[1]), 0 , PAGE_SIZE);
-
-	/* test pq */
-	pq_dest[0] = dma_map_page(dev, pq_hw[0], 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	pq_dest[1] = dma_map_page(dev, pq_hw[1], 0, PAGE_SIZE, DMA_FROM_DEVICE);
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
-		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
-					 DMA_TO_DEVICE);
-
-	tx = iop_adma_prep_dma_pq(dma_chan, pq_dest, pq_src,
-				  IOP_ADMA_NUM_SRC_TEST, (u8 *)raid6_gfexp,
-				  PAGE_SIZE,
-				  DMA_PREP_INTERRUPT |
-				  DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_COMPLETE) {
-		dev_err(dev, "Self-test pq timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	raid6_call.gen_syndrome(IOP_ADMA_NUM_SRC_TEST+2, PAGE_SIZE, pq_sw);
-
-	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST],
-		   page_address(pq_hw[0]), PAGE_SIZE) != 0) {
-		dev_err(dev, "Self-test p failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-	if (memcmp(pq_sw[IOP_ADMA_NUM_SRC_TEST+1],
-		   page_address(pq_hw[1]), PAGE_SIZE) != 0) {
-		dev_err(dev, "Self-test q failed compare, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* test correct zero sum using the software generated pq values */
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
-		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
-					 DMA_TO_DEVICE);
-
-	zero_sum_result = ~0;
-	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
-				      pq_src, IOP_ADMA_NUM_SRC_TEST,
-				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
-				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_COMPLETE) {
-		dev_err(dev, "Self-test pq-zero-sum timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	if (zero_sum_result != 0) {
-		dev_err(dev, "Self-test pq-zero-sum failed to validate: %x\n",
-			zero_sum_result);
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	/* test incorrect zero sum */
-	i = IOP_ADMA_NUM_SRC_TEST;
-	memset(pq_sw[i] + 100, 0, 100);
-	memset(pq_sw[i+1] + 200, 0, 200);
-	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 2; i++)
-		pq_src[i] = dma_map_page(dev, pq[i], 0, PAGE_SIZE,
-					 DMA_TO_DEVICE);
-
-	zero_sum_result = 0;
-	tx = iop_adma_prep_dma_pq_val(dma_chan, &pq_src[IOP_ADMA_NUM_SRC_TEST],
-				      pq_src, IOP_ADMA_NUM_SRC_TEST,
-				      raid6_gfexp, PAGE_SIZE, &zero_sum_result,
-				      DMA_PREP_INTERRUPT|DMA_CTRL_ACK);
-
-	cookie = iop_adma_tx_submit(tx);
-	iop_adma_issue_pending(dma_chan);
-	msleep(8);
-
-	if (iop_adma_status(dma_chan, cookie, NULL) !=
-		DMA_COMPLETE) {
-		dev_err(dev, "Self-test !pq-zero-sum timed out, disabling\n");
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-	if (zero_sum_result != (SUM_CHECK_P_RESULT | SUM_CHECK_Q_RESULT)) {
-		dev_err(dev, "Self-test !pq-zero-sum failed to validate: %x\n",
-			zero_sum_result);
-		err = -ENODEV;
-		goto free_resources;
-	}
-
-free_resources:
-	iop_adma_free_chan_resources(dma_chan);
-out:
-	i = ARRAY_SIZE(pq);
-	while (i--)
-		__free_page(pq[i]);
-	return err;
-}
-#endif
-
-static int iop_adma_remove(struct platform_device *dev)
-{
-	struct iop_adma_device *device = platform_get_drvdata(dev);
-	struct dma_chan *chan, *_chan;
-	struct iop_adma_chan *iop_chan;
-	struct iop_adma_platform_data *plat_data = dev_get_platdata(&dev->dev);
-
-	dma_async_device_unregister(&device->common);
-
-	dma_free_coherent(&dev->dev, plat_data->pool_size,
-			device->dma_desc_pool_virt, device->dma_desc_pool);
-
-	list_for_each_entry_safe(chan, _chan, &device->common.channels,
-				device_node) {
-		iop_chan = to_iop_adma_chan(chan);
-		list_del(&chan->device_node);
-		kfree(iop_chan);
-	}
-	kfree(device);
-
-	return 0;
-}
-
-static int iop_adma_probe(struct platform_device *pdev)
-{
-	struct resource *res;
-	int ret = 0, i;
-	struct iop_adma_device *adev;
-	struct iop_adma_chan *iop_chan;
-	struct dma_device *dma_dev;
-	struct iop_adma_platform_data *plat_data = dev_get_platdata(&pdev->dev);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res)
-		return -ENODEV;
-
-	if (!devm_request_mem_region(&pdev->dev, res->start,
-				resource_size(res), pdev->name))
-		return -EBUSY;
-
-	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
-	if (!adev)
-		return -ENOMEM;
-	dma_dev = &adev->common;
-
-	/* allocate coherent memory for hardware descriptors
-	 * note: writecombine gives slightly better performance, but
-	 * requires that we explicitly flush the writes
-	 */
-	adev->dma_desc_pool_virt = dma_alloc_wc(&pdev->dev,
-						plat_data->pool_size,
-						&adev->dma_desc_pool,
-						GFP_KERNEL);
-	if (!adev->dma_desc_pool_virt) {
-		ret = -ENOMEM;
-		goto err_free_adev;
-	}
-
-	dev_dbg(&pdev->dev, "%s: allocated descriptor pool virt %p phys %pad\n",
-		__func__, adev->dma_desc_pool_virt, &adev->dma_desc_pool);
-
-	adev->id = plat_data->hw_id;
-
-	/* discover transaction capabilites from the platform data */
-	dma_dev->cap_mask = plat_data->cap_mask;
-
-	adev->pdev = pdev;
-	platform_set_drvdata(pdev, adev);
-
-	INIT_LIST_HEAD(&dma_dev->channels);
-
-	/* set base routines */
-	dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
-	dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
-	dma_dev->device_tx_status = iop_adma_status;
-	dma_dev->device_issue_pending = iop_adma_issue_pending;
-	dma_dev->dev = &pdev->dev;
-
-	/* set prep routines based on capability */
-	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
-		dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
-	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		dma_dev->max_xor = iop_adma_get_max_xor();
-		dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
-	}
-	if (dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask))
-		dma_dev->device_prep_dma_xor_val =
-			iop_adma_prep_dma_xor_val;
-	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask)) {
-		dma_set_maxpq(dma_dev, iop_adma_get_max_pq(), 0);
-		dma_dev->device_prep_dma_pq = iop_adma_prep_dma_pq;
-	}
-	if (dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask))
-		dma_dev->device_prep_dma_pq_val =
-			iop_adma_prep_dma_pq_val;
-	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
-		dma_dev->device_prep_dma_interrupt =
-			iop_adma_prep_dma_interrupt;
-
-	iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
-	if (!iop_chan) {
-		ret = -ENOMEM;
-		goto err_free_dma;
-	}
-	iop_chan->device = adev;
-
-	iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
-					resource_size(res));
-	if (!iop_chan->mmr_base) {
-		ret = -ENOMEM;
-		goto err_free_iop_chan;
-	}
-	tasklet_setup(&iop_chan->irq_tasklet, iop_adma_tasklet);
-
-	/* clear errors before enabling interrupts */
-	iop_adma_device_clear_err_status(iop_chan);
-
-	for (i = 0; i < 3; i++) {
-		static const irq_handler_t handler[] = {
-			iop_adma_eot_handler,
-			iop_adma_eoc_handler,
-			iop_adma_err_handler
-		};
-		int irq = platform_get_irq(pdev, i);
-		if (irq < 0) {
-			ret = -ENXIO;
-			goto err_free_iop_chan;
-		} else {
-			ret = devm_request_irq(&pdev->dev, irq,
-					handler[i], 0, pdev->name, iop_chan);
-			if (ret)
-				goto err_free_iop_chan;
-		}
-	}
-
-	spin_lock_init(&iop_chan->lock);
-	INIT_LIST_HEAD(&iop_chan->chain);
-	INIT_LIST_HEAD(&iop_chan->all_slots);
-	iop_chan->common.device = dma_dev;
-	dma_cookie_init(&iop_chan->common);
-	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
-
-	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
-		ret = iop_adma_memcpy_self_test(adev);
-		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
-		if (ret)
-			goto err_free_iop_chan;
-	}
-
-	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
-		ret = iop_adma_xor_val_self_test(adev);
-		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
-		if (ret)
-			goto err_free_iop_chan;
-	}
-
-	if (dma_has_cap(DMA_PQ, dma_dev->cap_mask) &&
-	    dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask)) {
-		#ifdef CONFIG_RAID6_PQ
-		ret = iop_adma_pq_zero_sum_self_test(adev);
-		dev_dbg(&pdev->dev, "pq self test returned %d\n", ret);
-		#else
-		/* can not test raid6, so do not publish capability */
-		dma_cap_clear(DMA_PQ, dma_dev->cap_mask);
-		dma_cap_clear(DMA_PQ_VAL, dma_dev->cap_mask);
-		ret = 0;
-		#endif
-		if (ret)
-			goto err_free_iop_chan;
-	}
-
-	dev_info(&pdev->dev, "Intel(R) IOP: ( %s%s%s%s%s%s)\n",
-		 dma_has_cap(DMA_PQ, dma_dev->cap_mask) ? "pq " : "",
-		 dma_has_cap(DMA_PQ_VAL, dma_dev->cap_mask) ? "pq_val " : "",
-		 dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
-		 dma_has_cap(DMA_XOR_VAL, dma_dev->cap_mask) ? "xor_val " : "",
-		 dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
-		 dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
-
-	dma_async_device_register(dma_dev);
-	goto out;
-
- err_free_iop_chan:
-	kfree(iop_chan);
- err_free_dma:
-	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
-			adev->dma_desc_pool_virt, adev->dma_desc_pool);
- err_free_adev:
-	kfree(adev);
- out:
-	return ret;
-}
-
-static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
-{
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	dma_cookie_t cookie;
-	int slot_cnt, slots_per_op;
-
-	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-
-		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
-		async_tx_ack(&sw_desc->async_tx);
-		iop_desc_init_memcpy(grp_start, 0);
-		iop_desc_set_byte_count(grp_start, iop_chan, 0);
-		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
-		iop_desc_set_memcpy_src_addr(grp_start, 0);
-
-		cookie = dma_cookie_assign(&sw_desc->async_tx);
-
-		/* initialize the completed cookie to be less than
-		 * the most recently used cookie
-		 */
-		iop_chan->common.completed_cookie = cookie - 1;
-
-		/* channel should not be busy */
-		BUG_ON(iop_chan_is_busy(iop_chan));
-
-		/* clear any prior error-status bits */
-		iop_adma_device_clear_err_status(iop_chan);
-
-		/* disable operation */
-		iop_chan_disable(iop_chan);
-
-		/* set the descriptor address */
-		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
-
-		/* 1/ don't add pre-chained descriptors
-		 * 2/ dummy read to flush next_desc write
-		 */
-		BUG_ON(iop_desc_get_next_desc(sw_desc));
-
-		/* run the descriptor */
-		iop_chan_enable(iop_chan);
-	} else
-		dev_err(iop_chan->device->common.dev,
-			"failed to allocate null descriptor\n");
-	spin_unlock_bh(&iop_chan->lock);
-}
-
-static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
-{
-	struct iop_adma_desc_slot *sw_desc, *grp_start;
-	dma_cookie_t cookie;
-	int slot_cnt, slots_per_op;
-
-	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
-
-	spin_lock_bh(&iop_chan->lock);
-	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
-	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
-	if (sw_desc) {
-		grp_start = sw_desc->group_head;
-		list_splice_init(&sw_desc->tx_list, &iop_chan->chain);
-		async_tx_ack(&sw_desc->async_tx);
-		iop_desc_init_null_xor(grp_start, 2, 0);
-		iop_desc_set_byte_count(grp_start, iop_chan, 0);
-		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
-		iop_desc_set_xor_src_addr(grp_start, 0, 0);
-		iop_desc_set_xor_src_addr(grp_start, 1, 0);
-
-		cookie = dma_cookie_assign(&sw_desc->async_tx);
-
-		/* initialize the completed cookie to be less than
-		 * the most recently used cookie
-		 */
-		iop_chan->common.completed_cookie = cookie - 1;
-
-		/* channel should not be busy */
-		BUG_ON(iop_chan_is_busy(iop_chan));
-
-		/* clear any prior error-status bits */
-		iop_adma_device_clear_err_status(iop_chan);
-
-		/* disable operation */
-		iop_chan_disable(iop_chan);
-
-		/* set the descriptor address */
-		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
-
-		/* 1/ don't add pre-chained descriptors
-		 * 2/ dummy read to flush next_desc write
-		 */
-		BUG_ON(iop_desc_get_next_desc(sw_desc));
-
-		/* run the descriptor */
-		iop_chan_enable(iop_chan);
-	} else
-		dev_err(iop_chan->device->common.dev,
-			"failed to allocate null descriptor\n");
-	spin_unlock_bh(&iop_chan->lock);
-}
-
-static struct platform_driver iop_adma_driver = {
-	.probe		= iop_adma_probe,
-	.remove		= iop_adma_remove,
-	.driver		= {
-		.name	= "iop-adma",
-	},
-};
-
-module_platform_driver(iop_adma_driver);
-
-MODULE_AUTHOR("Intel Corporation");
-MODULE_DESCRIPTION("IOP ADMA Engine Driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:iop-adma");
diff --git a/drivers/dma/iop-adma.h b/drivers/dma/iop-adma.h
deleted file mode 100644
index d44eabb6f5ebd..0000000000000
--- a/drivers/dma/iop-adma.h
+++ /dev/null
@@ -1,914 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright © 2006, Intel Corporation.
- */
-#ifndef _ADMA_H
-#define _ADMA_H
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/platform_data/dma-iop32x.h>
-
-/* Memory copy units */
-#define DMA_CCR(chan)		(chan->mmr_base + 0x0)
-#define DMA_CSR(chan)		(chan->mmr_base + 0x4)
-#define DMA_DAR(chan)		(chan->mmr_base + 0xc)
-#define DMA_NDAR(chan)		(chan->mmr_base + 0x10)
-#define DMA_PADR(chan)		(chan->mmr_base + 0x14)
-#define DMA_PUADR(chan)	(chan->mmr_base + 0x18)
-#define DMA_LADR(chan)		(chan->mmr_base + 0x1c)
-#define DMA_BCR(chan)		(chan->mmr_base + 0x20)
-#define DMA_DCR(chan)		(chan->mmr_base + 0x24)
-
-/* Application accelerator unit  */
-#define AAU_ACR(chan)		(chan->mmr_base + 0x0)
-#define AAU_ASR(chan)		(chan->mmr_base + 0x4)
-#define AAU_ADAR(chan)		(chan->mmr_base + 0x8)
-#define AAU_ANDAR(chan)	(chan->mmr_base + 0xc)
-#define AAU_SAR(src, chan)	(chan->mmr_base + (0x10 + ((src) << 2)))
-#define AAU_DAR(chan)		(chan->mmr_base + 0x20)
-#define AAU_ABCR(chan)		(chan->mmr_base + 0x24)
-#define AAU_ADCR(chan)		(chan->mmr_base + 0x28)
-#define AAU_SAR_EDCR(src_edc)	(chan->mmr_base + (0x02c + ((src_edc-4) << 2)))
-#define AAU_EDCR0_IDX	8
-#define AAU_EDCR1_IDX	17
-#define AAU_EDCR2_IDX	26
-
-struct iop3xx_aau_desc_ctrl {
-	unsigned int int_en:1;
-	unsigned int blk1_cmd_ctrl:3;
-	unsigned int blk2_cmd_ctrl:3;
-	unsigned int blk3_cmd_ctrl:3;
-	unsigned int blk4_cmd_ctrl:3;
-	unsigned int blk5_cmd_ctrl:3;
-	unsigned int blk6_cmd_ctrl:3;
-	unsigned int blk7_cmd_ctrl:3;
-	unsigned int blk8_cmd_ctrl:3;
-	unsigned int blk_ctrl:2;
-	unsigned int dual_xor_en:1;
-	unsigned int tx_complete:1;
-	unsigned int zero_result_err:1;
-	unsigned int zero_result_en:1;
-	unsigned int dest_write_en:1;
-};
-
-struct iop3xx_aau_e_desc_ctrl {
-	unsigned int reserved:1;
-	unsigned int blk1_cmd_ctrl:3;
-	unsigned int blk2_cmd_ctrl:3;
-	unsigned int blk3_cmd_ctrl:3;
-	unsigned int blk4_cmd_ctrl:3;
-	unsigned int blk5_cmd_ctrl:3;
-	unsigned int blk6_cmd_ctrl:3;
-	unsigned int blk7_cmd_ctrl:3;
-	unsigned int blk8_cmd_ctrl:3;
-	unsigned int reserved2:7;
-};
-
-struct iop3xx_dma_desc_ctrl {
-	unsigned int pci_transaction:4;
-	unsigned int int_en:1;
-	unsigned int dac_cycle_en:1;
-	unsigned int mem_to_mem_en:1;
-	unsigned int crc_data_tx_en:1;
-	unsigned int crc_gen_en:1;
-	unsigned int crc_seed_dis:1;
-	unsigned int reserved:21;
-	unsigned int crc_tx_complete:1;
-};
-
-struct iop3xx_desc_dma {
-	u32 next_desc;
-	union {
-		u32 pci_src_addr;
-		u32 pci_dest_addr;
-		u32 src_addr;
-	};
-	union {
-		u32 upper_pci_src_addr;
-		u32 upper_pci_dest_addr;
-	};
-	union {
-		u32 local_pci_src_addr;
-		u32 local_pci_dest_addr;
-		u32 dest_addr;
-	};
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_dma_desc_ctrl desc_ctrl_field;
-	};
-	u32 crc_addr;
-};
-
-struct iop3xx_desc_aau {
-	u32 next_desc;
-	u32 src[4];
-	u32 dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	union {
-		u32 src_addr;
-		u32 e_desc_ctrl;
-		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
-	} src_edc[31];
-};
-
-struct iop3xx_aau_gfmr {
-	unsigned int gfmr1:8;
-	unsigned int gfmr2:8;
-	unsigned int gfmr3:8;
-	unsigned int gfmr4:8;
-};
-
-struct iop3xx_desc_pq_xor {
-	u32 next_desc;
-	u32 src[3];
-	union {
-		u32 data_mult1;
-		struct iop3xx_aau_gfmr data_mult1_field;
-	};
-	u32 dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	union {
-		u32 src_addr;
-		u32 e_desc_ctrl;
-		struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field;
-		u32 data_multiplier;
-		struct iop3xx_aau_gfmr data_mult_field;
-		u32 reserved;
-	} src_edc_gfmr[19];
-};
-
-struct iop3xx_desc_dual_xor {
-	u32 next_desc;
-	u32 src0_addr;
-	u32 src1_addr;
-	u32 h_src_addr;
-	u32 d_src_addr;
-	u32 h_dest_addr;
-	u32 byte_count;
-	union {
-		u32 desc_ctrl;
-		struct iop3xx_aau_desc_ctrl desc_ctrl_field;
-	};
-	u32 d_dest_addr;
-};
-
-union iop3xx_desc {
-	struct iop3xx_desc_aau *aau;
-	struct iop3xx_desc_dma *dma;
-	struct iop3xx_desc_pq_xor *pq_xor;
-	struct iop3xx_desc_dual_xor *dual_xor;
-	void *ptr;
-};
-
-/* No support for p+q operations */
-static inline int
-iop_chan_pq_slot_count(size_t len, int src_cnt, int *slots_per_op)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt,
-		  unsigned long flags)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_addr(struct iop_adma_desc_slot *desc, dma_addr_t *addr)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
-			 dma_addr_t addr, unsigned char coef)
-{
-	BUG();
-}
-
-static inline int
-iop_chan_pq_zero_sum_slot_count(size_t len, int src_cnt, int *slots_per_op)
-{
-	BUG();
-	return 0;
-}
-
-static inline void
-iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
-			  unsigned long flags)
-{
-	BUG();
-}
-
-static inline void
-iop_desc_set_pq_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
-{
-	BUG();
-}
-
-#define iop_desc_set_pq_zero_sum_src_addr iop_desc_set_pq_src_addr
-
-static inline void
-iop_desc_set_pq_zero_sum_addr(struct iop_adma_desc_slot *desc, int pq_idx,
-			      dma_addr_t *src)
-{
-	BUG();
-}
-
-static inline int iop_adma_get_max_xor(void)
-{
-	return 32;
-}
-
-static inline int iop_adma_get_max_pq(void)
-{
-	BUG();
-	return 0;
-}
-
-static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan)
-{
-	int id = chan->device->id;
-
-	switch (id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return __raw_readl(DMA_DAR(chan));
-	case AAU_ID:
-		return __raw_readl(AAU_ADAR(chan));
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan,
-						u32 next_desc_addr)
-{
-	int id = chan->device->id;
-
-	switch (id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		__raw_writel(next_desc_addr, DMA_NDAR(chan));
-		break;
-	case AAU_ID:
-		__raw_writel(next_desc_addr, AAU_ANDAR(chan));
-		break;
-	}
-
-}
-
-#define IOP_ADMA_STATUS_BUSY (1 << 10)
-#define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024)
-#define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024)
-#define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024)
-
-static inline int iop_chan_is_busy(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0;
-}
-
-static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc,
-					int num_slots)
-{
-	/* num_slots will only ever be 1, 2, 4, or 8 */
-	return (desc->idx & (num_slots - 1)) ? 0 : 1;
-}
-
-/* to do: support large (i.e. > hw max) buffer sizes */
-static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op)
-{
-	*slots_per_op = 1;
-	return 1;
-}
-
-/* to do: support large (i.e. > hw max) buffer sizes */
-static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op)
-{
-	*slots_per_op = 1;
-	return 1;
-}
-
-static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt,
-					int *slots_per_op)
-{
-	static const char slot_count_table[] = {
-						1, 1, 1, 1, /* 01 - 04 */
-						2, 2, 2, 2, /* 05 - 08 */
-						4, 4, 4, 4, /* 09 - 12 */
-						4, 4, 4, 4, /* 13 - 16 */
-						8, 8, 8, 8, /* 17 - 20 */
-						8, 8, 8, 8, /* 21 - 24 */
-						8, 8, 8, 8, /* 25 - 28 */
-						8, 8, 8, 8, /* 29 - 32 */
-					      };
-	*slots_per_op = slot_count_table[src_cnt - 1];
-	return *slots_per_op;
-}
-
-static inline int
-iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return iop_chan_memcpy_slot_count(0, slots_per_op);
-	case AAU_ID:
-		return iop3xx_aau_xor_slot_count(0, 2, slots_per_op);
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-static inline int iop_chan_xor_slot_count(size_t len, int src_cnt,
-						int *slots_per_op)
-{
-	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
-
-	if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT)
-		return slot_cnt;
-
-	len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
-	while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) {
-		len -= IOP_ADMA_XOR_MAX_BYTE_COUNT;
-		slot_cnt += *slots_per_op;
-	}
-
-	slot_cnt += *slots_per_op;
-
-	return slot_cnt;
-}
-
-/* zero sum on iop3xx is limited to 1k at a time so it requires multiple
- * descriptors
- */
-static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt,
-						int *slots_per_op)
-{
-	int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op);
-
-	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT)
-		return slot_cnt;
-
-	len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-	while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-		slot_cnt += *slots_per_op;
-	}
-
-	slot_cnt += *slots_per_op;
-
-	return slot_cnt;
-}
-
-static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->byte_count;
-	case AAU_ID:
-		return hw_desc.aau->byte_count;
-	default:
-		BUG();
-	}
-	return 0;
-}
-
-/* translate the src_idx to a descriptor word index */
-static inline int __desc_idx(int src_idx)
-{
-	static const int desc_idx_table[] = { 0, 0, 0, 0,
-					      0, 1, 2, 3,
-					      5, 6, 7, 8,
-					      9, 10, 11, 12,
-					      14, 15, 16, 17,
-					      18, 19, 20, 21,
-					      23, 24, 25, 26,
-					      27, 28, 29, 30,
-					    };
-
-	return desc_idx_table[src_idx];
-}
-
-static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					int src_idx)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return hw_desc.dma->src_addr;
-	case AAU_ID:
-		break;
-	default:
-		BUG();
-	}
-
-	if (src_idx < 4)
-		return hw_desc.aau->src[src_idx];
-	else
-		return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr;
-}
-
-static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc,
-					int src_idx, dma_addr_t addr)
-{
-	if (src_idx < 4)
-		hw_desc->src[src_idx] = addr;
-	else
-		hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr;
-}
-
-static inline void
-iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, unsigned long flags)
-{
-	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_dma_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	u_desc_ctrl.field.mem_to_mem_en = 1;
-	u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-	hw_desc->upper_pci_src_addr = 0;
-	hw_desc->crc_addr = 0;
-}
-
-static inline void
-iop_desc_init_memset(struct iop_adma_desc_slot *desc, unsigned long flags)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */
-	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-}
-
-static inline u32
-iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt,
-		     unsigned long flags)
-{
-	int i, shift;
-	u32 edcr;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	switch (src_cnt) {
-	case 25 ... 32:
-		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		edcr = 0;
-		shift = 1;
-		for (i = 24; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr;
-		src_cnt = 24;
-		fallthrough;
-	case 17 ... 24:
-		if (!u_desc_ctrl.field.blk_ctrl) {
-			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		}
-		edcr = 0;
-		shift = 1;
-		for (i = 16; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr;
-		src_cnt = 16;
-		fallthrough;
-	case 9 ... 16:
-		if (!u_desc_ctrl.field.blk_ctrl)
-			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
-		edcr = 0;
-		shift = 1;
-		for (i = 8; i < src_cnt; i++) {
-			edcr |= (1 << shift);
-			shift += 3;
-		}
-		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr;
-		src_cnt = 8;
-		fallthrough;
-	case 2 ... 8:
-		shift = 1;
-		for (i = 0; i < src_cnt; i++) {
-			u_desc_ctrl.value |= (1 << shift);
-			shift += 3;
-		}
-
-		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
-			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
-	}
-
-	u_desc_ctrl.field.dest_write_en = 1;
-	u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-
-	return u_desc_ctrl.value;
-}
-
-static inline void
-iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt,
-		  unsigned long flags)
-{
-	iop3xx_desc_init_xor(desc->hw_desc, src_cnt, flags);
-}
-
-/* return the number of operations */
-static inline int
-iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt,
-		       unsigned long flags)
-{
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-	int i, j;
-
-	hw_desc = desc->hw_desc;
-
-	for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, j++) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, flags);
-		u_desc_ctrl.field.dest_write_en = 0;
-		u_desc_ctrl.field.zero_result_en = 1;
-		u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-		iter->desc_ctrl = u_desc_ctrl.value;
-
-		/* for the subsequent descriptors preserve the store queue
-		 * and chain them together
-		 */
-		if (i) {
-			prev_hw_desc =
-				iop_hw_desc_slot_idx(hw_desc, i - slots_per_op);
-			prev_hw_desc->next_desc =
-				(u32) (desc->async_tx.phys + (i << 5));
-		}
-	}
-
-	return j;
-}
-
-static inline void
-iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt,
-		       unsigned long flags)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	union {
-		u32 value;
-		struct iop3xx_aau_desc_ctrl field;
-	} u_desc_ctrl;
-
-	u_desc_ctrl.value = 0;
-	switch (src_cnt) {
-	case 25 ... 32:
-		u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-		fallthrough;
-	case 17 ... 24:
-		if (!u_desc_ctrl.field.blk_ctrl) {
-			hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0;
-			u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */
-		}
-		hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0;
-		fallthrough;
-	case 9 ... 16:
-		if (!u_desc_ctrl.field.blk_ctrl)
-			u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */
-		hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0;
-		fallthrough;
-	case 1 ... 8:
-		if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4)
-			u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */
-	}
-
-	u_desc_ctrl.field.dest_write_en = 0;
-	u_desc_ctrl.field.int_en = flags & DMA_PREP_INTERRUPT;
-	hw_desc->desc_ctrl = u_desc_ctrl.value;
-}
-
-static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					u32 byte_count)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		hw_desc.dma->byte_count = byte_count;
-		break;
-	case AAU_ID:
-		hw_desc.aau->byte_count = byte_count;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void
-iop_desc_init_interrupt(struct iop_adma_desc_slot *desc,
-			struct iop_adma_chan *chan)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		iop_desc_init_memcpy(desc, 1);
-		hw_desc.dma->byte_count = 0;
-		hw_desc.dma->dest_addr = 0;
-		hw_desc.dma->src_addr = 0;
-		break;
-	case AAU_ID:
-		iop_desc_init_null_xor(desc, 2, 1);
-		hw_desc.aau->byte_count = 0;
-		hw_desc.aau->dest_addr = 0;
-		hw_desc.aau->src[0] = 0;
-		hw_desc.aau->src[1] = 0;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void
-iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len)
-{
-	int slots_per_op = desc->slots_per_op;
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int i = 0;
-
-	if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		hw_desc->byte_count = len;
-	} else {
-		do {
-			iter = iop_hw_desc_slot_idx(hw_desc, i);
-			iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-			len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT;
-			i += slots_per_op;
-		} while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT);
-
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iter->byte_count = len;
-	}
-}
-
-static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc,
-					struct iop_adma_chan *chan,
-					dma_addr_t addr)
-{
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		hw_desc.dma->dest_addr = addr;
-		break;
-	case AAU_ID:
-		hw_desc.aau->dest_addr = addr;
-		break;
-	default:
-		BUG();
-	}
-}
-
-static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc,
-					dma_addr_t addr)
-{
-	struct iop3xx_desc_dma *hw_desc = desc->hw_desc;
-	hw_desc->src_addr = addr;
-}
-
-static inline void
-iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx,
-				dma_addr_t addr)
-{
-
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	int i;
-
-	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
-	}
-}
-
-static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc,
-					int src_idx, dma_addr_t addr)
-{
-
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter;
-	int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op;
-	int i;
-
-	for (i = 0; (slot_cnt -= slots_per_op) >= 0;
-		i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) {
-		iter = iop_hw_desc_slot_idx(hw_desc, i);
-		iop3xx_aau_desc_set_src_addr(iter, src_idx, addr);
-	}
-}
-
-static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc,
-					u32 next_desc_addr)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-
-	iop_paranoia(hw_desc.dma->next_desc);
-	hw_desc.dma->next_desc = next_desc_addr;
-}
-
-static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-	return hw_desc.dma->next_desc;
-}
-
-static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc)
-{
-	/* hw_desc->next_desc is the same location for all channels */
-	union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, };
-	hw_desc.dma->next_desc = 0;
-}
-
-static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc,
-						u32 val)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	hw_desc->src[0] = val;
-}
-
-static inline enum sum_check_flags
-iop_desc_get_zero_result(struct iop_adma_desc_slot *desc)
-{
-	struct iop3xx_desc_aau *hw_desc = desc->hw_desc;
-	struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field;
-
-	iop_paranoia(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en));
-	return desc_ctrl.zero_result_err << SUM_CHECK_P;
-}
-
-static inline void iop_chan_append(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl;
-
-	dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-	dma_chan_ctrl |= 0x2;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline u32 iop_chan_get_status(struct iop_adma_chan *chan)
-{
-	return __raw_readl(DMA_CSR(chan));
-}
-
-static inline void iop_chan_disable(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-	dma_chan_ctrl &= ~1;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline void iop_chan_enable(struct iop_adma_chan *chan)
-{
-	u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan));
-
-	dma_chan_ctrl |= 1;
-	__raw_writel(dma_chan_ctrl, DMA_CCR(chan));
-}
-
-static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	status &= (1 << 9);
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-	status &= (1 << 8);
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan)
-{
-	u32 status = __raw_readl(DMA_CSR(chan));
-
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1);
-		break;
-	case AAU_ID:
-		status &= (1 << 5);
-		break;
-	default:
-		BUG();
-	}
-
-	__raw_writel(status, DMA_CSR(chan));
-}
-
-static inline int
-iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return 0;
-}
-
-static inline int
-iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	return test_bit(5, &status);
-}
-
-static inline int
-iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(2, &status);
-	default:
-		return 0;
-	}
-}
-
-static inline int
-iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(3, &status);
-	default:
-		return 0;
-	}
-}
-
-static inline int
-iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan)
-{
-	switch (chan->device->id) {
-	case DMA0_ID:
-	case DMA1_ID:
-		return test_bit(1, &status);
-	default:
-		return 0;
-	}
-}
-#endif /* _ADMA_H */
-- 
GitLab


From 7ebe49b76a001b10b007193b1771f33e6cbc4f3f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 16 Oct 2022 12:41:26 +0200
Subject: [PATCH 0193/4122] driver core: allow kobj_to_dev() to take a const
 pointer

If a const * to a kobject is passed to kobj_to_dev(), we want to return
back a const * to a device as the driver core shouldn't be modifying a
constant structure.  But when dealing with container_of() the pointer
const attribute is cast away, so we need to manually handle this by
determining the type of the pointer passed in to know the type of the
pointer to pass out.

Luckily _Generic can do this type of magic, and as the kernel now
supports C11 it is availble to us to handle this type of build-time type
detection.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221016104126.1259809-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 424b55df02727..023ea50b19162 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -680,11 +680,27 @@ struct device_link {
 	bool supplier_preactivated; /* Owned by consumer probe. */
 };
 
-static inline struct device *kobj_to_dev(struct kobject *kobj)
+static inline struct device *__kobj_to_dev(struct kobject *kobj)
 {
 	return container_of(kobj, struct device, kobj);
 }
 
+static inline const struct device *__kobj_to_dev_const(const struct kobject *kobj)
+{
+	return container_of(kobj, const struct device, kobj);
+}
+
+/*
+ * container_of() will happily take a const * and spit back a non-const * as it
+ * is just doing pointer math.  But we want to be a bit more careful in the
+ * driver code, so manually force any const * of a kobject to also be a const *
+ * to a device.
+ */
+#define kobj_to_dev(kobj)					\
+	_Generic((kobj),					\
+		 const struct kobject *: __kobj_to_dev_const,	\
+		 struct kobject *: __kobj_to_dev)(kobj)
+
 /**
  * device_iommu_mapped - Returns true when the device DMA is translated
  *			 by an IOMMU
-- 
GitLab


From 593efa4091f5f05c224f8b7fd204d18dbff97e31 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sun, 16 Oct 2022 12:41:55 +0200
Subject: [PATCH 0194/4122] USB: allow some usb functions to take a const
 pointer.

The functions to_usb_interface(), to_usb_device, and
interface_to_usbdev() sometimes would like to take a const * and return
a const * back.  As we are doing pointer math, a call to container_of()
loses the const-ness of a pointer, so use a _Generic() macro to pick the
proper inline function to call instead.

Link: https://lore.kernel.org/r/20221016104155.1260201-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 55 ++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 52 insertions(+), 3 deletions(-)

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 9ff1ad4dfad12..3a55131e0ad4a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -258,7 +258,27 @@ struct usb_interface {
 	struct device *usb_dev;
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
-#define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
+
+static inline struct usb_interface *__to_usb_interface(struct device *d)
+{
+	return container_of(d, struct usb_interface, dev);
+}
+
+static inline const struct usb_interface *__to_usb_interface_const(const struct device *d)
+{
+	return container_of(d, struct usb_interface, dev);
+}
+
+/*
+ * container_of() will happily take a const * and spit back a non-const * as it
+ * is just doing pointer math.  But we want to be a bit more careful in the USB
+ * driver code, so manually force any const * of a device to also be a const *
+ * to a usb_device.
+ */
+#define to_usb_interface(dev)						\
+	_Generic((dev),							\
+		 const struct device *: __to_usb_interface_const,	\
+		 struct device *: __to_usb_interface)(dev)
 
 static inline void *usb_get_intfdata(struct usb_interface *intf)
 {
@@ -709,12 +729,41 @@ struct usb_device {
 	u16 hub_delay;
 	unsigned use_generic_driver:1;
 };
-#define	to_usb_device(d) container_of(d, struct usb_device, dev)
 
-static inline struct usb_device *interface_to_usbdev(struct usb_interface *intf)
+static inline struct usb_device *__to_usb_device(struct device *d)
+{
+	return container_of(d, struct usb_device, dev);
+}
+
+static inline const struct usb_device *__to_usb_device_const(const struct device *d)
+{
+	return container_of(d, struct usb_device, dev);
+}
+
+/*
+ * container_of() will happily take a const * and spit back a non-const * as it
+ * is just doing pointer math.  But we want to be a bit more careful in the USB
+ * driver code, so manually force any const * of a device to also be a const *
+ * to a usb_device.
+ */
+#define to_usb_device(dev)					\
+	_Generic((dev),						\
+		 const struct device *: __to_usb_device_const,	\
+		 struct device *: __to_usb_device)(dev)
+
+static inline struct usb_device *__intf_to_usbdev(struct usb_interface *intf)
 {
 	return to_usb_device(intf->dev.parent);
 }
+static inline const struct usb_device *__intf_to_usbdev_const(const struct usb_interface *intf)
+{
+	return to_usb_device((const struct device *)intf->dev.parent);
+}
+
+#define interface_to_usbdev(intf)					\
+	_Generic((intf),						\
+		 const struct usb_interface *: __intf_to_usbdev_const,	\
+		 struct usb_interface *: __intf_to_usbdev)(intf)
 
 extern struct usb_device *usb_get_dev(struct usb_device *dev);
 extern void usb_put_dev(struct usb_device *dev);
-- 
GitLab


From 5033ac5c580cb22245a0c2b9e53d508e8fdd50d8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 1 Oct 2022 18:51:28 +0200
Subject: [PATCH 0195/4122] USB: make devnode() callback in usb_class_driver
 take a const *

With the changes to the driver core to make more pointers const, the USB
subsystem also needs to be modified to take a const * for the devnode
callback so that the driver core's constant pointer will also be
properly propagated.

Cc: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Cc: Juergen Stuber <starblue@users.sourceforge.net>
Reviewed-by: Johan Hovold <johan@kernel.org>
Acked-by: Pete Zaitcev <zaitcev@redhat.com>
Reviewed-by: Jiri Kosina <jkosina@suse.cz>
Link: https://lore.kernel.org/r/20221001165128.2688526-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hid/usbhid/hiddev.c     | 2 +-
 drivers/usb/class/usblp.c       | 2 +-
 drivers/usb/misc/iowarrior.c    | 2 +-
 drivers/usb/misc/legousbtower.c | 2 +-
 include/linux/usb.h             | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c
index 2fb2991dbe4c3..59cf3ddfdf78c 100644
--- a/drivers/hid/usbhid/hiddev.c
+++ b/drivers/hid/usbhid/hiddev.c
@@ -857,7 +857,7 @@ static const struct file_operations hiddev_fops = {
 	.llseek		= noop_llseek,
 };
 
-static char *hiddev_devnode(struct device *dev, umode_t *mode)
+static char *hiddev_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index f27b4aecff3d4..5a2e43331064e 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1090,7 +1090,7 @@ static const struct file_operations usblp_fops = {
 	.llseek =	noop_llseek,
 };
 
-static char *usblp_devnode(struct device *dev, umode_t *mode)
+static char *usblp_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/usb/misc/iowarrior.c b/drivers/usb/misc/iowarrior.c
index 988a8c02e7e24..f9427a67789cf 100644
--- a/drivers/usb/misc/iowarrior.c
+++ b/drivers/usb/misc/iowarrior.c
@@ -717,7 +717,7 @@ static const struct file_operations iowarrior_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *iowarrior_devnode(struct device *dev, umode_t *mode)
+static char *iowarrior_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 1c9e09138c109..379cf01a6e96e 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -245,7 +245,7 @@ static const struct file_operations tower_fops = {
 	.llseek =	tower_llseek,
 };
 
-static char *legousbtower_devnode(struct device *dev, umode_t *mode)
+static char *legousbtower_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "usb/%s", dev_name(dev));
 }
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3a55131e0ad4a..4b463a5e4ba22 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1321,7 +1321,7 @@ struct usb_device_driver {
  */
 struct usb_class_driver {
 	char *name;
-	char *(*devnode)(struct device *dev, umode_t *mode);
+	char *(*devnode)(const struct device *dev, umode_t *mode);
 	const struct file_operations *fops;
 	int minor_base;
 };
-- 
GitLab


From 326c3753a6358ffab607749ea0aa95d1d0ad79b0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:02 -0700
Subject: [PATCH 0196/4122] gpiolib: of: add a quirk for legacy names in
 Mediatek mt2701-cs42448

The driver is using non-standard "i2s1-in-sel-gpio1" and
"i2s1-in-sel-gpio2" names to describe its gpios. In preparation to
converting to the standard naming (i2s1-in-sel-gpios) and switching the
driver to gpiod API add a quirk to gpiolib to keep compatibility with
existing DTSes.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 0e4e1291604d6..cef4f66341256 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -488,6 +488,38 @@ static struct gpio_desc *of_find_usb_gpio(struct device_node *np,
 	return of_get_named_gpiod_flags(np, con_id, idx, of_flags);
 }
 
+static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np,
+					     const char *con_id,
+					     unsigned int idx,
+					     enum of_gpio_flags *of_flags)
+{
+	struct gpio_desc *desc;
+	const char *legacy_id;
+
+	if (!IS_ENABLED(CONFIG_SND_SOC_MT2701_CS42448))
+		return ERR_PTR(-ENOENT);
+
+	if (!of_device_is_compatible(np, "mediatek,mt2701-cs42448-machine"))
+		return ERR_PTR(-ENOENT);
+
+	if (!con_id || strcmp(con_id, "i2s1-in-sel"))
+		return ERR_PTR(-ENOENT);
+
+	if (idx == 0)
+		legacy_id = "i2s1-in-sel-gpio1";
+	else if (idx == 1)
+		legacy_id = "i2s1-in-sel-gpio2";
+	else
+		return ERR_PTR(-ENOENT);
+
+	desc = of_get_named_gpiod_flags(np, legacy_id, 0, of_flags);
+	if (!gpiod_not_found(desc))
+		pr_info("%s is using legacy gpio name '%s' instead of '%s-gpios'\n",
+			of_node_full_name(np), legacy_id, con_id);
+
+	return desc;
+}
+
 typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np,
 						const char *con_id,
 						unsigned int idx,
@@ -498,6 +530,7 @@ static const of_find_gpio_quirk of_find_gpio_quirks[] = {
 	of_find_regulator_gpio,
 	of_find_arizona_gpio,
 	of_find_usb_gpio,
+	of_find_mt2701_gpio,
 	NULL
 };
 
-- 
GitLab


From b311c5cba779a87e85525d351965bbd2c18111de Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:03 -0700
Subject: [PATCH 0197/4122] gpiolib: of: consolidate simple renames into a
 single quirk

This consolidates all quirks doing simple renames (either allowing
suffix-less names or trivial renames, when index changes are not
required) into a single quirk.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 183 +++++++++++++++-----------------------
 1 file changed, 71 insertions(+), 112 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index cef4f66341256..63c6fa3086f3c 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -365,127 +365,90 @@ struct gpio_desc *gpiod_get_from_of_node(const struct device_node *node,
 }
 EXPORT_SYMBOL_GPL(gpiod_get_from_of_node);
 
-/*
- * The SPI GPIO bindings happened before we managed to establish that GPIO
- * properties should be named "foo-gpios" so we have this special kludge for
- * them.
- */
-static struct gpio_desc *of_find_spi_gpio(struct device_node *np,
-					  const char *con_id,
-					  unsigned int idx,
-					  enum of_gpio_flags *of_flags)
-{
-	char prop_name[32]; /* 32 is max size of property name */
-
-	/*
-	 * Hopefully the compiler stubs the rest of the function if this
-	 * is false.
-	 */
-	if (!IS_ENABLED(CONFIG_SPI_MASTER))
-		return ERR_PTR(-ENOENT);
-
-	/* Allow this specifically for "spi-gpio" devices */
-	if (!of_device_is_compatible(np, "spi-gpio") || !con_id)
-		return ERR_PTR(-ENOENT);
-
-	/* Will be "gpio-sck", "gpio-mosi" or "gpio-miso" */
-	snprintf(prop_name, sizeof(prop_name), "%s-%s", "gpio", con_id);
-
-	return of_get_named_gpiod_flags(np, prop_name, idx, of_flags);
-}
-
-/*
- * The old Freescale bindings use simply "gpios" as name for the chip select
- * lines rather than "cs-gpios" like all other SPI hardware. Account for this
- * with a special quirk.
- */
-static struct gpio_desc *of_find_spi_cs_gpio(struct device_node *np,
+static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 					     const char *con_id,
 					     unsigned int idx,
 					     enum of_gpio_flags *of_flags)
 {
-	if (!IS_ENABLED(CONFIG_SPI_MASTER))
-		return ERR_PTR(-ENOENT);
-
-	/* Allow this specifically for Freescale and PPC devices */
-	if (!of_device_is_compatible(np, "fsl,spi") &&
-	    !of_device_is_compatible(np, "aeroflexgaisler,spictrl") &&
-	    !of_device_is_compatible(np, "ibm,ppc4xx-spi"))
-		return ERR_PTR(-ENOENT);
-	/* Allow only if asking for "cs-gpios" */
-	if (!con_id || strcmp(con_id, "cs"))
-		return ERR_PTR(-ENOENT);
+	static const struct of_rename_gpio {
+		const char *con_id;
+		const char *legacy_id;	/* NULL - same as con_id */
+		/*
+		 * Compatible string can be set to NULL in case where
+		 * matching to a particular compatible is not practical,
+		 * but it should only be done for gpio names that have
+		 * vendor prefix to reduce risk of false positives.
+		 * Addition of such entries is strongly discouraged.
+		 */
+		const char *compatible;
+	} gpios[] = {
+#if IS_ENABLED(CONFIG_MFD_ARIZONA)
+		{ "wlf,reset",	NULL,		NULL },
+#endif
+#if IS_ENABLED(CONFIG_REGULATOR)
+		/*
+		 * Some regulator bindings happened before we managed to
+		 * establish that GPIO properties should be named
+		 * "foo-gpios" so we have this special kludge for them.
+		 */
+		{ "wlf,ldoena",  NULL,		NULL }, /* Arizona */
+		{ "wlf,ldo1ena", NULL,		NULL }, /* WM8994 */
+		{ "wlf,ldo2ena", NULL,		NULL }, /* WM8994 */
+#endif
+#if IS_ENABLED(CONFIG_SPI_MASTER)
 
-	/*
-	 * While all other SPI controllers use "cs-gpios" the Freescale
-	 * uses just "gpios" so translate to that when "cs-gpios" is
-	 * requested.
-	 */
-	return of_get_named_gpiod_flags(np, "gpios", idx, of_flags);
-}
+		/*
+		 * The SPI GPIO bindings happened before we managed to
+		 * establish that GPIO properties should be named
+		 * "foo-gpios" so we have this special kludge for them.
+		 */
+		{ "miso",	"gpio-miso",	"spi-gpio" },
+		{ "mosi",	"gpio-mosi",	"spi-gpio" },
+		{ "sck",	"gpio-sck",	"spi-gpio" },
 
-/*
- * Some regulator bindings happened before we managed to establish that GPIO
- * properties should be named "foo-gpios" so we have this special kludge for
- * them.
- */
-static struct gpio_desc *of_find_regulator_gpio(struct device_node *np,
-						const char *con_id,
-						unsigned int idx,
-						enum of_gpio_flags *of_flags)
-{
-	/* These are the connection IDs we accept as legacy GPIO phandles */
-	const char *whitelist[] = {
-		"wlf,ldoena", /* Arizona */
-		"wlf,ldo1ena", /* WM8994 */
-		"wlf,ldo2ena", /* WM8994 */
+		/*
+		 * The old Freescale bindings use simply "gpios" as name
+		 * for the chip select lines rather than "cs-gpios" like
+		 * all other SPI hardware. Allow this specifically for
+		 * Freescale and PPC devices.
+		 */
+		{ "cs",		"gpios",	"fsl,spi" },
+		{ "cs",		"gpios",	"aeroflexgaisler,spictrl" },
+		{ "cs",		"gpios",	"ibm,ppc4xx-spi" },
+#endif
+#if IS_ENABLED(CONFIG_TYPEC_FUSB302)
+		/*
+		 * Fairchild FUSB302 host is using undocumented "fcs,int_n"
+		 * property without the compulsory "-gpios" suffix.
+		 */
+		{ "fcs,int_n",	NULL,		"fcs,fusb302" },
+#endif
 	};
-	int i;
-
-	if (!IS_ENABLED(CONFIG_REGULATOR))
-		return ERR_PTR(-ENOENT);
+	struct gpio_desc *desc;
+	const char *legacy_id;
+	unsigned int i;
 
 	if (!con_id)
 		return ERR_PTR(-ENOENT);
 
-	i = match_string(whitelist, ARRAY_SIZE(whitelist), con_id);
-	if (i < 0)
-		return ERR_PTR(-ENOENT);
-
-	return of_get_named_gpiod_flags(np, con_id, idx, of_flags);
-}
-
-static struct gpio_desc *of_find_arizona_gpio(struct device_node *np,
-					      const char *con_id,
-					      unsigned int idx,
-					      enum of_gpio_flags *of_flags)
-{
-	if (!IS_ENABLED(CONFIG_MFD_ARIZONA))
-		return ERR_PTR(-ENOENT);
-
-	if (!con_id || strcmp(con_id, "wlf,reset"))
-		return ERR_PTR(-ENOENT);
-
-	return of_get_named_gpiod_flags(np, con_id, idx, of_flags);
-}
+	for (i = 0; i < ARRAY_SIZE(gpios); i++) {
+		if (strcmp(con_id, gpios[i].con_id))
+			continue;
 
-static struct gpio_desc *of_find_usb_gpio(struct device_node *np,
-					  const char *con_id,
-					  unsigned int idx,
-					  enum of_gpio_flags *of_flags)
-{
-	/*
-	 * Currently this USB quirk is only for the Fairchild FUSB302 host
-	 * which is using an undocumented DT GPIO line named "fcs,int_n"
-	 * without the compulsory "-gpios" suffix.
-	 */
-	if (!IS_ENABLED(CONFIG_TYPEC_FUSB302))
-		return ERR_PTR(-ENOENT);
+		if (gpios[i].compatible &&
+		    !of_device_is_compatible(np, gpios[i].compatible))
+			continue;
 
-	if (!con_id || strcmp(con_id, "fcs,int_n"))
-		return ERR_PTR(-ENOENT);
+		legacy_id = gpios[i].legacy_id ?: gpios[i].con_id;
+		desc = of_get_named_gpiod_flags(np, legacy_id, idx, of_flags);
+		if (!gpiod_not_found(desc)) {
+			pr_info("%s uses legacy gpio name '%s' instead of '%s-gpios'\n",
+				of_node_full_name(np), legacy_id, con_id);
+			return desc;
+		}
+	}
 
-	return of_get_named_gpiod_flags(np, con_id, idx, of_flags);
+	return ERR_PTR(-ENOENT);
 }
 
 static struct gpio_desc *of_find_mt2701_gpio(struct device_node *np,
@@ -525,11 +488,7 @@ typedef struct gpio_desc *(*of_find_gpio_quirk)(struct device_node *np,
 						unsigned int idx,
 						enum of_gpio_flags *of_flags);
 static const of_find_gpio_quirk of_find_gpio_quirks[] = {
-	of_find_spi_gpio,
-	of_find_spi_cs_gpio,
-	of_find_regulator_gpio,
-	of_find_arizona_gpio,
-	of_find_usb_gpio,
+	of_find_gpio_rename,
 	of_find_mt2701_gpio,
 	NULL
 };
-- 
GitLab


From 307c593ba5f915e308fd23a2daae7e9a5209b604 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:04 -0700
Subject: [PATCH 0198/4122] gpiolib: of: tighten selection of gpio renaming
 quirks

Tighten selection of legacy gpio renaming quirks so that they only
considered on more relevant configurations.

Suggested-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 63c6fa3086f3c..7d4bbf6484bc7 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -385,18 +385,21 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 #if IS_ENABLED(CONFIG_MFD_ARIZONA)
 		{ "wlf,reset",	NULL,		NULL },
 #endif
-#if IS_ENABLED(CONFIG_REGULATOR)
+
 		/*
 		 * Some regulator bindings happened before we managed to
 		 * establish that GPIO properties should be named
 		 * "foo-gpios" so we have this special kludge for them.
 		 */
+#if IS_ENABLED(CONFIG_REGULATOR_ARIZONA_LDO1)
 		{ "wlf,ldoena",  NULL,		NULL }, /* Arizona */
+#endif
+#if IS_ENABLED(CONFIG_REGULATOR_WM8994)
 		{ "wlf,ldo1ena", NULL,		NULL }, /* WM8994 */
 		{ "wlf,ldo2ena", NULL,		NULL }, /* WM8994 */
 #endif
-#if IS_ENABLED(CONFIG_SPI_MASTER)
 
+#if IS_ENABLED(CONFIG_SPI_GPIO)
 		/*
 		 * The SPI GPIO bindings happened before we managed to
 		 * establish that GPIO properties should be named
@@ -405,6 +408,7 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 		{ "miso",	"gpio-miso",	"spi-gpio" },
 		{ "mosi",	"gpio-mosi",	"spi-gpio" },
 		{ "sck",	"gpio-sck",	"spi-gpio" },
+#endif
 
 		/*
 		 * The old Freescale bindings use simply "gpios" as name
@@ -412,10 +416,14 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 		 * all other SPI hardware. Allow this specifically for
 		 * Freescale and PPC devices.
 		 */
+#if IS_ENABLED(CONFIG_SPI_FSL_SPI)
 		{ "cs",		"gpios",	"fsl,spi" },
 		{ "cs",		"gpios",	"aeroflexgaisler,spictrl" },
+#endif
+#if IS_ENABLED(CONFIG_SPI_PPC4xx)
 		{ "cs",		"gpios",	"ibm,ppc4xx-spi" },
 #endif
+
 #if IS_ENABLED(CONFIG_TYPEC_FUSB302)
 		/*
 		 * Fairchild FUSB302 host is using undocumented "fcs,int_n"
-- 
GitLab


From fbbbcd177a27508a47c5136b31de5cf4c8d0ab1c Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:05 -0700
Subject: [PATCH 0199/4122] gpiolib: of: add quirk for locating reset lines
 with legacy bindings

Some legacy mappings used "gpio[s]-reset" instead of "reset-gpios",
add a quirk so that gpiod API will still work on unmodified DTSes.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 7d4bbf6484bc7..2b5d1b3095c7b 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -382,9 +382,18 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 		 */
 		const char *compatible;
 	} gpios[] = {
+#if !IS_ENABLED(CONFIG_LCD_HX8357)
+		/* Himax LCD controllers used "gpios-reset" */
+		{ "reset",	"gpios-reset",	"himax,hx8357" },
+		{ "reset",	"gpios-reset",	"himax,hx8369" },
+#endif
 #if IS_ENABLED(CONFIG_MFD_ARIZONA)
 		{ "wlf,reset",	NULL,		NULL },
 #endif
+#if !IS_ENABLED(CONFIG_PCI_LANTIQ)
+		/* MIPS Lantiq PCI */
+		{ "reset",	"gpios-reset",	"lantiq,pci-xway" },
+#endif
 
 		/*
 		 * Some regulator bindings happened before we managed to
@@ -399,6 +408,13 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 		{ "wlf,ldo2ena", NULL,		NULL }, /* WM8994 */
 #endif
 
+#if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X)
+		{ "reset",	"gpio-reset",	"ti,tlv320aic3x" },
+		{ "reset",	"gpio-reset",	"ti,tlv320aic33" },
+		{ "reset",	"gpio-reset",	"ti,tlv320aic3007" },
+		{ "reset",	"gpio-reset",	"ti,tlv320aic3104" },
+		{ "reset",	"gpio-reset",	"ti,tlv320aic3106" },
+#endif
 #if IS_ENABLED(CONFIG_SPI_GPIO)
 		/*
 		 * The SPI GPIO bindings happened before we managed to
-- 
GitLab


From 9c2cc7171e08eef52110d272fdf2225d6dcd81b6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:06 -0700
Subject: [PATCH 0200/4122] gpiolib: of: add a quirk for reset line for Marvell
 NFC controller

The controller is using non-standard "reset-n-io" name for its reset
gpio property, whereas gpiod API expects "<name>-gpios". Add a quirk
so that gpiod API will still work on unmodified DTSes.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 2b5d1b3095c7b..a9cedc39a2459 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -390,6 +390,16 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 #if IS_ENABLED(CONFIG_MFD_ARIZONA)
 		{ "wlf,reset",	NULL,		NULL },
 #endif
+#if IS_ENABLED(CONFIG_NFC_MRVL_I2C)
+		{ "reset",	"reset-n-io",	"marvell,nfc-i2c" },
+#endif
+#if IS_ENABLED(CONFIG_NFC_MRVL_SPI)
+		{ "reset",	"reset-n-io",	"marvell,nfc-spi" },
+#endif
+#if IS_ENABLED(CONFIG_NFC_MRVL_UART)
+		{ "reset",	"reset-n-io",	"marvell,nfc-uart" },
+		{ "reset",	"reset-n-io",	"mrvl,nfc-uart" },
+#endif
 #if !IS_ENABLED(CONFIG_PCI_LANTIQ)
 		/* MIPS Lantiq PCI */
 		{ "reset",	"gpios-reset",	"lantiq,pci-xway" },
-- 
GitLab


From 944004eb56dc977ad5f882ca4338f45396052317 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:07 -0700
Subject: [PATCH 0201/4122] gpiolib: of: add a quirk for reset line for Cirrus
 CS42L56 codec

The controller is using non-standard "cirrus,gpio-nreset" name for its
reset gpio property, whereas gpiod API expects "<name>-gpios".
Add a quirk so that gpiod API will still work on unmodified DTSes.

Reviewed-by: Daniel Thompson <daniel.thompson@linaro.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a9cedc39a2459..ffdbac2eeaa6f 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -418,6 +418,9 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 		{ "wlf,ldo2ena", NULL,		NULL }, /* WM8994 */
 #endif
 
+#if IS_ENABLED(CONFIG_SND_SOC_CS42L56)
+		{ "reset",	"cirrus,gpio-nreset",	"cirrus,cs42l56" },
+#endif
 #if IS_ENABLED(CONFIG_SND_SOC_TLV320AIC3X)
 		{ "reset",	"gpio-reset",	"ti,tlv320aic3x" },
 		{ "reset",	"gpio-reset",	"ti,tlv320aic33" },
-- 
GitLab


From eaf1a29665cda1c767cac0d523828892bd77a842 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:08 -0700
Subject: [PATCH 0202/4122] gpiolib: of: add a quirk for legacy names in MOXA
 ART RTC

The driver is using non-standard "gpio-rtc-data", "gpio-rtc-sclk", and
"gpio-rtc-reset" names for properties describing its gpios. In
preparation to converting to the standard naming ("rtc-*-gpios") and
switching the driver to gpiod API add a quirk to gpiolib to keep
compatibility with existing DTSes.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index ffdbac2eeaa6f..d22498c72a678 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -390,6 +390,11 @@ static struct gpio_desc *of_find_gpio_rename(struct device_node *np,
 #if IS_ENABLED(CONFIG_MFD_ARIZONA)
 		{ "wlf,reset",	NULL,		NULL },
 #endif
+#if IS_ENABLED(CONFIG_RTC_DRV_MOXART)
+		{ "rtc-data",	"gpio-rtc-data",	"moxa,moxart-rtc" },
+		{ "rtc-sclk",	"gpio-rtc-sclk",	"moxa,moxart-rtc" },
+		{ "rtc-reset",	"gpio-rtc-reset",	"moxa,moxart-rtc" },
+#endif
 #if IS_ENABLED(CONFIG_NFC_MRVL_I2C)
 		{ "reset",	"reset-n-io",	"marvell,nfc-i2c" },
 #endif
-- 
GitLab


From aa1d058d48f292aa138e33ad12b7b4d18b5407cd Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 18 Oct 2022 10:32:42 +0800
Subject: [PATCH 0203/4122] kernfs: dont take i_lock on inode attr read

The kernfs write lock is held when the kernfs node inode attributes
are updated. Therefore, when either kernfs_iop_getattr() or
kernfs_iop_permission() are called the kernfs node inode attributes
won't change.

Consequently concurrent kernfs_refresh_inode() calls always copy the
same values from the kernfs node.

So there's no need to take the inode i_lock to get consistent values
for generic_fillattr() and generic_permission(), the kernfs read lock
is sufficient.

Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Ian Kent <raven@themaw.net>
Link: https://lore.kernel.org/r/166606036215.13363.1288735296954908554.stgit@donald.themaw.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/inode.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 3d783d80f5daa..74f3453f4639f 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -190,10 +190,8 @@ int kernfs_iop_getattr(struct user_namespace *mnt_userns,
 	struct kernfs_root *root = kernfs_root(kn);
 
 	down_read(&root->kernfs_rwsem);
-	spin_lock(&inode->i_lock);
 	kernfs_refresh_inode(kn, inode);
 	generic_fillattr(&init_user_ns, inode, stat);
-	spin_unlock(&inode->i_lock);
 	up_read(&root->kernfs_rwsem);
 
 	return 0;
@@ -288,10 +286,8 @@ int kernfs_iop_permission(struct user_namespace *mnt_userns,
 	root = kernfs_root(kn);
 
 	down_read(&root->kernfs_rwsem);
-	spin_lock(&inode->i_lock);
 	kernfs_refresh_inode(kn, inode);
 	ret = generic_permission(&init_user_ns, inode, mask);
-	spin_unlock(&inode->i_lock);
 	up_read(&root->kernfs_rwsem);
 
 	return ret;
-- 
GitLab


From 92b57842f43014e6ca81ddf6d5d59e9ddf762e12 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Tue, 18 Oct 2022 10:32:49 +0800
Subject: [PATCH 0204/4122] kernfs: dont take i_lock on revalidate

In kernfs_dop_revalidate() when the passed in dentry is negative the
dentry directory is checked to see if it has changed and if so the
negative dentry is discarded so it can refreshed. During this check
the dentry inode i_lock is taken to mitigate against a possible
concurrent rename.

But if it's racing with a rename, becuase the dentry is negative, it
can't be the source it must be the target and it must be going to do
a d_move() otherwise the rename will return an error.

In this case the parent dentry of the target will not change, it will
be the same over the d_move(), only the source dentry parent may change
so the inode i_lock isn't needed.

Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Ian Kent <raven@themaw.net>
Link: https://lore.kernel.org/r/166606036967.13363.9336408133975631967.stgit@donald.themaw.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c | 24 +++++++++++++++++-------
 1 file changed, 17 insertions(+), 7 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 3990f3e270cb6..6acd9c3d4cff1 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -1073,20 +1073,30 @@ static int kernfs_dop_revalidate(struct dentry *dentry, unsigned int flags)
 
 		/* If the kernfs parent node has changed discard and
 		 * proceed to ->lookup.
+		 *
+		 * There's nothing special needed here when getting the
+		 * dentry parent, even if a concurrent rename is in
+		 * progress. That's because the dentry is negative so
+		 * it can only be the target of the rename and it will
+		 * be doing a d_move() not a replace. Consequently the
+		 * dentry d_parent won't change over the d_move().
+		 *
+		 * Also kernfs negative dentries transitioning from
+		 * negative to positive during revalidate won't happen
+		 * because they are invalidated on containing directory
+		 * changes and the lookup re-done so that a new positive
+		 * dentry can be properly created.
 		 */
-		spin_lock(&dentry->d_lock);
+		root = kernfs_root_from_sb(dentry->d_sb);
+		down_read(&root->kernfs_rwsem);
 		parent = kernfs_dentry_node(dentry->d_parent);
 		if (parent) {
-			spin_unlock(&dentry->d_lock);
-			root = kernfs_root(parent);
-			down_read(&root->kernfs_rwsem);
 			if (kernfs_dir_changed(parent, dentry)) {
 				up_read(&root->kernfs_rwsem);
 				return 0;
 			}
-			up_read(&root->kernfs_rwsem);
-		} else
-			spin_unlock(&dentry->d_lock);
+		}
+		up_read(&root->kernfs_rwsem);
 
 		/* The kernfs parent node hasn't changed, leave the
 		 * dentry negative and return success.
-- 
GitLab


From e3186e36925fc18384492491ebcf3da749780a30 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:09 -0700
Subject: [PATCH 0205/4122] gpiolib: of: factor out code overriding gpio line
 polarity

There are several instances where we use a separate property to
override polarity specified in gpio property. Factor it out into
a separate function.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 48 +++++++++++++++++++++++----------------
 1 file changed, 28 insertions(+), 20 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index d22498c72a678..6faf0dc7bc31b 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -130,6 +130,28 @@ bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
 	return false;
 }
 
+/*
+ * Overrides stated polarity of a gpio line and warns when there is a
+ * discrepancy.
+ */
+static void of_gpio_quirk_polarity(const struct device_node *np,
+				   bool active_high,
+				   enum of_gpio_flags *flags)
+{
+	if (active_high) {
+		if (*flags & OF_GPIO_ACTIVE_LOW) {
+			pr_warn("%s GPIO handle specifies active low - ignored\n",
+				of_node_full_name(np));
+			*flags &= ~OF_GPIO_ACTIVE_LOW;
+		}
+	} else {
+		if (!(*flags & OF_GPIO_ACTIVE_LOW))
+			pr_info("%s enforce active low on GPIO handle\n",
+				of_node_full_name(np));
+		*flags |= OF_GPIO_ACTIVE_LOW;
+	}
+}
+
 static void of_gpio_flags_quirks(const struct device_node *np,
 				 const char *propname,
 				 enum of_gpio_flags *flags,
@@ -145,7 +167,7 @@ static void of_gpio_flags_quirks(const struct device_node *np,
 	     (!(strcmp(propname, "enable-gpio") &&
 		strcmp(propname, "enable-gpios")) &&
 	      of_device_is_compatible(np, "regulator-gpio")))) {
-		bool active_low = !of_property_read_bool(np,
+		bool active_high = of_property_read_bool(np,
 							 "enable-active-high");
 		/*
 		 * The regulator GPIO handles are specified such that the
@@ -153,13 +175,7 @@ static void of_gpio_flags_quirks(const struct device_node *np,
 		 * the polarity of the GPIO line. Any phandle flags must
 		 * be actively ignored.
 		 */
-		if ((*flags & OF_GPIO_ACTIVE_LOW) && !active_low) {
-			pr_warn("%s GPIO handle specifies active low - ignored\n",
-				of_node_full_name(np));
-			*flags &= ~OF_GPIO_ACTIVE_LOW;
-		}
-		if (active_low)
-			*flags |= OF_GPIO_ACTIVE_LOW;
+		of_gpio_quirk_polarity(np, active_high, flags);
 	}
 	/*
 	 * Legacy open drain handling for fixed voltage regulators.
@@ -200,18 +216,10 @@ static void of_gpio_flags_quirks(const struct device_node *np,
 				 * conflict and the "spi-cs-high" flag will
 				 * take precedence.
 				 */
-				if (of_property_read_bool(child, "spi-cs-high")) {
-					if (*flags & OF_GPIO_ACTIVE_LOW) {
-						pr_warn("%s GPIO handle specifies active low - ignored\n",
-							of_node_full_name(child));
-						*flags &= ~OF_GPIO_ACTIVE_LOW;
-					}
-				} else {
-					if (!(*flags & OF_GPIO_ACTIVE_LOW))
-						pr_info("%s enforce active low on chipselect handle\n",
-							of_node_full_name(child));
-					*flags |= OF_GPIO_ACTIVE_LOW;
-				}
+				bool active_high = of_property_read_bool(child,
+								"spi-cs-high");
+				of_gpio_quirk_polarity(child, active_high,
+						       flags);
 				of_node_put(child);
 				break;
 			}
-- 
GitLab


From b02c85c9458cdd15e2c43413d7d2541a468cde57 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:10 -0700
Subject: [PATCH 0206/4122] gpiolib: of: add quirk for phy reset polarity for
 Freescale Ethernet

Bindings for Freescale Fast Ethernet Controller use a separate
property "phy-reset-active-high" to specify polarity of its phy
gpio line. To allow converting the driver to gpiod API we need
to add this quirk to gpiolib.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 6faf0dc7bc31b..c2a55ffb2b203 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -231,6 +231,33 @@ static void of_gpio_flags_quirks(const struct device_node *np,
 	    !strcmp(propname, "snps,reset-gpio") &&
 	    of_property_read_bool(np, "snps,reset-active-low"))
 		*flags |= OF_GPIO_ACTIVE_LOW;
+
+	/*
+	 * Freescale Fast Ethernet Controller uses a separate property to
+	 * describe polarity of the phy reset line.
+	 */
+	if (IS_ENABLED(CONFIG_FEC)) {
+		static const char * const fec_devices[] = {
+			"fsl,imx25-fec",
+			"fsl,imx27-fec",
+			"fsl,imx28-fec",
+			"fsl,imx6q-fec",
+			"fsl,mvf600-fec",
+			"fsl,imx6sx-fec",
+			"fsl,imx6ul-fec",
+			"fsl,imx8mq-fec",
+			"fsl,imx8qm-fec",
+			"fsl,s32v234-fec",
+			NULL
+		};
+
+		if (!strcmp(propname, "phy-reset-gpios") &&
+		    of_device_compatible_match(np, fec_devices)) {
+			bool active_high = of_property_read_bool(np,
+						"phy-reset-active-high");
+			of_gpio_quirk_polarity(np, active_high, flags);
+		}
+	}
 }
 
 /**
-- 
GitLab


From 99d18d42c942854a073191714a311dc2420ec7d3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 17 Oct 2022 22:41:11 -0700
Subject: [PATCH 0207/4122] gpiolib: of: add a quirk for reset line polarity
 for Himax LCDs

Existing DTS that use legacy (non-standard) property name for the reset
line "gpios-reset" also specify incorrect polarity (0 which maps to
"active high"). Add a quirk to force polarity to "active low" so that
once driver is converted to gpiod API that pays attention to line
polarity it will work properly.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index c2a55ffb2b203..52616848a37c4 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -152,11 +152,47 @@ static void of_gpio_quirk_polarity(const struct device_node *np,
 	}
 }
 
+/*
+ * This quirk does static polarity overrides in cases where existing
+ * DTS specified incorrect polarity.
+ */
+static void of_gpio_try_fixup_polarity(const struct device_node *np,
+				       const char *propname,
+				       enum of_gpio_flags *flags)
+{
+	static const struct {
+		const char *compatible;
+		const char *propname;
+		bool active_high;
+	} gpios[] = {
+#if !IS_ENABLED(CONFIG_LCD_HX8357)
+		/*
+		 * Himax LCD controllers used incorrectly named
+		 * "gpios-reset" property and also specified wrong
+		 * polarity.
+		 */
+		{ "himax,hx8357",	"gpios-reset",	false },
+		{ "himax,hx8369",	"gpios-reset",	false },
+#endif
+	};
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(gpios); i++) {
+		if (of_device_is_compatible(np, gpios[i].compatible) &&
+		    !strcmp(propname, gpios[i].propname)) {
+			of_gpio_quirk_polarity(np, gpios[i].active_high, flags);
+			break;
+		}
+	}
+}
+
 static void of_gpio_flags_quirks(const struct device_node *np,
 				 const char *propname,
 				 enum of_gpio_flags *flags,
 				 int index)
 {
+	of_gpio_try_fixup_polarity(np, propname, flags);
+
 	/*
 	 * Some GPIO fixed regulator quirks.
 	 * Note that active low is the default.
-- 
GitLab


From dbf53a29b28b277fa952a000245b558536c6bdd7 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@alien8.de>
Date: Wed, 19 Oct 2022 18:59:45 +0200
Subject: [PATCH 0208/4122] x86/paravirt: Fix a !PARAVIRT build warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix

  ./include/trace/events/xen.h:28:31: warning: ‘enum paravirt_lazy_mode’ \
    declared inside parameter list will not be visible outside of this definition or declaration

which turns into a build error:

  ./include/trace/events/xen.h:28:50: error: parameter 1 (‘mode’) has incomplete type
     28 |                 TP_PROTO(enum paravirt_lazy_mode mode), \

due to enum paravirt_lazy_mode being visible only under CONFIG_PARAVIRT.
Just pull it up where it is unconditionally visible.

Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/Y1AtAXM8YjtBm2cj@zn.tnic
---
 arch/x86/include/asm/paravirt_types.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index e137d94121232..27c692791b7ea 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -9,6 +9,13 @@ struct paravirt_patch_site {
 	u8 type;		/* type of this instruction */
 	u8 len;			/* length of original instruction */
 };
+
+/* Lazy mode for batching updates / context switch */
+enum paravirt_lazy_mode {
+	PARAVIRT_LAZY_NONE,
+	PARAVIRT_LAZY_MMU,
+	PARAVIRT_LAZY_CPU,
+};
 #endif
 
 #ifdef CONFIG_PARAVIRT
@@ -582,13 +589,6 @@ int paravirt_disable_iospace(void);
 	__PVOP_VCALL(op, PVOP_CALL_ARG1(arg1), PVOP_CALL_ARG2(arg2),	\
 		     PVOP_CALL_ARG3(arg3), PVOP_CALL_ARG4(arg4))
 
-/* Lazy mode for batching updates / context switch */
-enum paravirt_lazy_mode {
-	PARAVIRT_LAZY_NONE,
-	PARAVIRT_LAZY_MMU,
-	PARAVIRT_LAZY_CPU,
-};
-
 enum paravirt_lazy_mode paravirt_get_lazy_mode(void);
 void paravirt_start_context_switch(struct task_struct *prev);
 void paravirt_end_context_switch(struct task_struct *next);
-- 
GitLab


From 0c04b83d95e02c9def6e2db49fc2cab618faf949 Mon Sep 17 00:00:00 2001
From: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Date: Tue, 18 Oct 2022 12:01:54 +0000
Subject: [PATCH 0209/4122] staging: rts5208: split long line of code

Fix checkpatch warning by splitting up a long line of code, improving
code readability

Signed-off-by: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Acked-by: Julia Lawall <julia.lawall@inria.fr>
Link: https://lore.kernel.org/r/Y06Vsr7JVvpPem5T@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rts5208/sd.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rts5208/sd.c b/drivers/staging/rts5208/sd.c
index d1fafd530c80e..4b7122add51aa 100644
--- a/drivers/staging/rts5208/sd.c
+++ b/drivers/staging/rts5208/sd.c
@@ -4506,7 +4506,8 @@ int sd_execute_write_data(struct scsi_cmnd *srb, struct rtsx_chip *chip)
 				if (CHK_SD(sd_card)) {
 					retval = reset_sd(chip);
 					if (retval != STATUS_SUCCESS) {
-						sd_card->sd_lock_status &= ~(SD_UNLOCK_POW_ON | SD_SDR_RST);
+						sd_card->sd_lock_status &=
+							~(SD_UNLOCK_POW_ON | SD_SDR_RST);
 						goto sd_execute_write_cmd_failed;
 					}
 				}
-- 
GitLab


From fd22186003a98daaab1ea08aaa177c041fdaa649 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Mon, 17 Oct 2022 12:02:30 +0530
Subject: [PATCH 0210/4122] staging: iio: frequency: ad9834: merge unnecessary
 split lines

Improve code readability by merging unnecessary split lines that are
well within the code-style guidelines post merge.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Acked-by: Julia Lawall <julia.lawall@inria.fr>
Link: https://lore.kernel.org/r/Y0z2/qFe3kW96MTs@debian-BULLSEYE-live-builder-AMD64
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/frequency/ad9834.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c
index 2b4267a87e65e..285df0e489a62 100644
--- a/drivers/staging/iio/frequency/ad9834.c
+++ b/drivers/staging/iio/frequency/ad9834.c
@@ -331,11 +331,9 @@ static IIO_DEV_ATTR_PHASE(0, 1, 0200, NULL, ad9834_write, AD9834_REG_PHASE1);
 static IIO_DEV_ATTR_PHASESYMBOL(0, 0200, NULL, ad9834_write, AD9834_PSEL);
 static IIO_CONST_ATTR_PHASE_SCALE(0, "0.0015339808"); /* 2PI/2^12 rad*/
 
-static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL,
-	ad9834_write, AD9834_PIN_SW);
+static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, ad9834_write, AD9834_PIN_SW);
 static IIO_DEV_ATTR_OUT_ENABLE(0, 0200, NULL, ad9834_write, AD9834_RESET);
-static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL,
-	ad9834_write, AD9834_OPBITEN);
+static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, ad9834_write, AD9834_OPBITEN);
 static IIO_DEV_ATTR_OUT_WAVETYPE(0, 0, ad9834_store_wavetype, 0);
 static IIO_DEV_ATTR_OUT_WAVETYPE(0, 1, ad9834_store_wavetype, 1);
 
-- 
GitLab


From febdb8eea98b1c506cf8624531a334e507c4a771 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 1 Oct 2022 19:06:18 +0200
Subject: [PATCH 0211/4122] staging: r8188eu: remove bLedLinkBlinkInProgress

Remove the bLedLinkBlinkInProgress component from struct led_priv. Its
only use is to block requests for "link blinking" when this blinking
pattern is already active.

The "link blinking" pattern is a continuous blinking pattern (as opposed
to other patterns where we blink N times), it's no problem to restart this
pattern when it's already running.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221001170618.444444-1-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c    | 14 --------------
 drivers/staging/r8188eu/include/rtw_led.h |  1 -
 2 files changed, 15 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 1e316e6358ea2..2527c252c3e95 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -26,7 +26,6 @@ static void ResetLedStatus(struct led_priv *pLed)
 
 	pLed->BlinkTimes = 0; /*  Number of times to toggle led state for blinking. */
 
-	pLed->bLedLinkBlinkInProgress = false;
 	pLed->bLedScanBlinkInProgress = false;
 }
 
@@ -99,7 +98,6 @@ static void blink_work(struct work_struct *work)
 		pLed->BlinkTimes--;
 		if (pLed->BlinkTimes == 0) {
 			if (check_fwstate(pmlmepriv, _FW_LINKED)) {
-				pLed->bLedLinkBlinkInProgress = true;
 				pLed->CurrLedState = LED_BLINK_NORMAL;
 				schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
 			} else {
@@ -115,7 +113,6 @@ static void blink_work(struct work_struct *work)
 		pLed->BlinkTimes--;
 		if (pLed->BlinkTimes == 0) {
 			if (check_fwstate(pmlmepriv, _FW_LINKED)) {
-				pLed->bLedLinkBlinkInProgress = true;
 				pLed->CurrLedState = LED_BLINK_NORMAL;
 				schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
 			} else {
@@ -132,7 +129,6 @@ static void blink_work(struct work_struct *work)
 		break;
 	case LED_BLINK_WPS_STOP:	/* WPS success */
 		if (!pLed->bLedOn) {
-			pLed->bLedLinkBlinkInProgress = true;
 			pLed->CurrLedState = LED_BLINK_NORMAL;
 			schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
 
@@ -189,23 +185,18 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 
 		cancel_delayed_work(&pLed->blink_work);
 
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = false;
 
 		pLed->CurrLedState = LED_BLINK_SLOWLY;
 		schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL);
 		break;
 	case LED_CTL_LINK:
-		if (!pLed->bLedLinkBlinkInProgress)
-			return;
-
 		if (pLed->CurrLedState == LED_BLINK_SCAN || IS_LED_WPS_BLINKING(pLed))
 			return;
 
 		cancel_delayed_work(&pLed->blink_work);
 
 		pLed->bLedBlinkInProgress = false;
-		pLed->bLedLinkBlinkInProgress = true;
 
 		pLed->CurrLedState = LED_BLINK_NORMAL;
 		schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
@@ -222,7 +213,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 
 		cancel_delayed_work(&pLed->blink_work);
 
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = false;
 		pLed->bLedScanBlinkInProgress = true;
 
@@ -240,7 +230,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 
 		cancel_delayed_work(&pLed->blink_work);
 
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = true;
 
 		pLed->CurrLedState = LED_BLINK_TXRX;
@@ -253,7 +242,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 
 		cancel_delayed_work(&pLed->blink_work);
 
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = false;
 		pLed->bLedScanBlinkInProgress = false;
 		pLed->bLedWPSBlinkInProgress = true;
@@ -263,7 +251,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 	case LED_CTL_STOP_WPS:
 		cancel_delayed_work(&pLed->blink_work);
 
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = false;
 		pLed->bLedScanBlinkInProgress = false;
 		pLed->bLedWPSBlinkInProgress = true;
@@ -283,7 +270,6 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 		break;
 	case LED_CTL_POWER_OFF:
 		pLed->CurrLedState = RTW_LED_OFF;
-		pLed->bLedLinkBlinkInProgress = false;
 		pLed->bLedBlinkInProgress = false;
 		pLed->bLedWPSBlinkInProgress = false;
 		pLed->bLedScanBlinkInProgress = false;
diff --git a/drivers/staging/r8188eu/include/rtw_led.h b/drivers/staging/r8188eu/include/rtw_led.h
index 8520f022a67f2..f57dcf6c8b24e 100644
--- a/drivers/staging/r8188eu/include/rtw_led.h
+++ b/drivers/staging/r8188eu/include/rtw_led.h
@@ -47,7 +47,6 @@ struct led_priv {
 
 	u32 BlinkTimes; /*  Number of times to toggle led state for blinking. */
 
-	bool bLedLinkBlinkInProgress;
 	bool bLedScanBlinkInProgress;
 	struct delayed_work blink_work;
 };
-- 
GitLab


From 85eba7ac39f7e86a2b473aae9d664f9ef47cad2c Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:40:29 +0200
Subject: [PATCH 0212/4122] staging: rtl8192e: Remove unchanged variable
 bFwCtrlLPS

bFwCtrlLPS is just once initialized and never changed. The evaluation will
always have the same result. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/d5b5cfd26648180d082f38085a807c932e87703c.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c |  1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c   | 15 ++++-----------
 drivers/staging/rtl8192e/rtllib.h            |  3 ---
 3 files changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 89bc989cffbae..10b79003c6339 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -889,7 +889,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rtllib->PowerSaveControl.bInactivePs = true;
 	priv->rtllib->PowerSaveControl.bIPSModeBackup = false;
 	priv->rtllib->PowerSaveControl.bLeisurePs = true;
-	priv->rtllib->PowerSaveControl.bFwCtrlLPS = false;
 	priv->rtllib->LPSDelayCnt = 0;
 	priv->rtllib->sta_sleep = LPS_IS_WAKE;
 	priv->rtllib->rf_power_state = rf_on;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index 8c00b111ddb2b..49c50ec21d04f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -229,11 +229,8 @@ void rtl92e_leisure_ps_enter(struct net_device *dev)
 		if (pPSC->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) {
 
 			if (priv->rtllib->ps == RTLLIB_PS_DISABLED) {
-				if (!pPSC->bFwCtrlLPS) {
-					if (priv->rtllib->SetFwCmdHandler)
-						priv->rtllib->SetFwCmdHandler(
-							dev, FW_CMD_LPS_ENTER);
-				}
+				if (priv->rtllib->SetFwCmdHandler)
+					priv->rtllib->SetFwCmdHandler(dev, FW_CMD_LPS_ENTER);
 				_rtl92e_ps_set_mode(dev, RTLLIB_PS_MBCAST |
 							 RTLLIB_PS_UNICAST);
 			}
@@ -251,12 +248,8 @@ void rtl92e_leisure_ps_leave(struct net_device *dev)
 	if (pPSC->bLeisurePs) {
 		if (priv->rtllib->ps != RTLLIB_PS_DISABLED) {
 			_rtl92e_ps_set_mode(dev, RTLLIB_PS_DISABLED);
-
-			if (!pPSC->bFwCtrlLPS) {
-				if (priv->rtllib->SetFwCmdHandler)
-					priv->rtllib->SetFwCmdHandler(dev,
-							 FW_CMD_LPS_LEAVE);
-			}
+			if (priv->rtllib->SetFwCmdHandler)
+				priv->rtllib->SetFwCmdHandler(dev, FW_CMD_LPS_LEAVE);
 		}
 	}
 }
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 3c72ed2a30a41..40bea71bcb22c 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1264,9 +1264,6 @@ struct rt_pwr_save_ctrl {
 
 	u32				CurPsLevel;
 	u32				RegRfPsLevel;
-
-	bool				bFwCtrlLPS;
-
 };
 
 #define RT_RF_CHANGE_SOURCE u32
-- 
GitLab


From e77996261369bb5b202d3739270fcc5bc50c5c91 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:40:36 +0200
Subject: [PATCH 0213/4122] staging: rtl8192e: Remove unchanged variable
 bInactivePs

bInactivePs is just once initialized and never changed. The evaluation
will always have the same result. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/9f46eebf8220a06a1889eaf2d6bac74dd08cfd1f.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_cam.c  | 18 ++++---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c |  1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c   | 49 +++++++++-----------
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c   | 48 +++++++++----------
 drivers/staging/rtl8192e/rtllib.h            |  2 -
 5 files changed, 51 insertions(+), 67 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
index 41faeb4b9b9b4..8c3ce6cc2541a 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
@@ -81,17 +81,15 @@ void rtl92e_set_key(struct net_device *dev, u8 EntryNo, u8 KeyIndex,
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
-	if (priv->rtllib->PowerSaveControl.bInactivePs) {
-		if (rt_state == rf_off) {
-			if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) {
-				netdev_warn(dev, "%s(): RF is OFF.\n",
-					    __func__);
-				return;
-			}
-			mutex_lock(&priv->rtllib->ips_mutex);
-			rtl92e_ips_leave(dev);
-			mutex_unlock(&priv->rtllib->ips_mutex);
+	if (rt_state == rf_off) {
+		if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) {
+			netdev_warn(dev, "%s(): RF is OFF.\n",
+				    __func__);
+			return;
 		}
+		mutex_lock(&priv->rtllib->ips_mutex);
+		rtl92e_ips_leave(dev);
+		mutex_unlock(&priv->rtllib->ips_mutex);
 	}
 	priv->rtllib->is_set_key = true;
 	if (EntryNo >= TOTAL_CAM_ENTRY) {
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 10b79003c6339..7c0fd2bce923a 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -886,7 +886,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
 	priv->SetRFPowerStateInProgress = false;
-	priv->rtllib->PowerSaveControl.bInactivePs = true;
 	priv->rtllib->PowerSaveControl.bIPSModeBackup = false;
 	priv->rtllib->PowerSaveControl.bLeisurePs = true;
 	priv->rtllib->LPSDelayCnt = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index 49c50ec21d04f..fba86ef730b53 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -116,16 +116,14 @@ void rtl92e_ips_enter(struct net_device *dev)
 					&(priv->rtllib->PowerSaveControl);
 	enum rt_rf_power_state rt_state;
 
-	if (pPSC->bInactivePs) {
-		rt_state = priv->rtllib->rf_power_state;
-		if (rt_state == rf_on && !pPSC->bSwRfProcessing &&
-			(priv->rtllib->state != RTLLIB_LINKED) &&
-			(priv->rtllib->iw_mode != IW_MODE_MASTER)) {
-			pPSC->eInactivePowerState = rf_off;
-			priv->isRFOff = true;
-			priv->bInPowerSaveMode = true;
-			_rtl92e_ps_update_rf_state(dev);
-		}
+	rt_state = priv->rtllib->rf_power_state;
+	if (rt_state == rf_on && !pPSC->bSwRfProcessing &&
+		(priv->rtllib->state != RTLLIB_LINKED) &&
+		(priv->rtllib->iw_mode != IW_MODE_MASTER)) {
+		pPSC->eInactivePowerState = rf_off;
+		priv->isRFOff = true;
+		priv->bInPowerSaveMode = true;
+		_rtl92e_ps_update_rf_state(dev);
 	}
 }
 
@@ -136,14 +134,12 @@ void rtl92e_ips_leave(struct net_device *dev)
 					&(priv->rtllib->PowerSaveControl);
 	enum rt_rf_power_state rt_state;
 
-	if (pPSC->bInactivePs) {
-		rt_state = priv->rtllib->rf_power_state;
-		if (rt_state != rf_on  && !pPSC->bSwRfProcessing &&
-		    priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) {
-			pPSC->eInactivePowerState = rf_on;
-			priv->bInPowerSaveMode = false;
-			_rtl92e_ps_update_rf_state(dev);
-		}
+	rt_state = priv->rtllib->rf_power_state;
+	if (rt_state != rf_on  && !pPSC->bSwRfProcessing &&
+	    priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) {
+		pPSC->eInactivePowerState = rf_on;
+		priv->bInPowerSaveMode = false;
+		_rtl92e_ps_update_rf_state(dev);
 	}
 }
 
@@ -165,18 +161,15 @@ void rtl92e_rtllib_ips_leave_wq(struct net_device *dev)
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
-
-	if (priv->rtllib->PowerSaveControl.bInactivePs) {
-		if (rt_state == rf_off) {
-			if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) {
-				netdev_warn(dev, "%s(): RF is OFF.\n",
-					    __func__);
-				return;
-			}
-			netdev_info(dev, "=========>%s(): rtl92e_ips_leave\n",
+	if (rt_state == rf_off) {
+		if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_IPS) {
+			netdev_warn(dev, "%s(): RF is OFF.\n",
 				    __func__);
-			schedule_work(&priv->rtllib->ips_leave_wq);
+			return;
 		}
+		netdev_info(dev, "=========>%s(): rtl92e_ips_leave\n",
+			    __func__);
+		schedule_work(&priv->rtllib->ips_leave_wq);
 	}
 }
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index 4920cb49e3811..12eea4fcb9dde 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -252,22 +252,20 @@ static int _rtl92e_wx_set_mode(struct net_device *dev,
 	mutex_lock(&priv->wx_mutex);
 	if (wrqu->mode == IW_MODE_ADHOC || wrqu->mode == IW_MODE_MONITOR ||
 	    ieee->bNetPromiscuousMode) {
-		if (priv->rtllib->PowerSaveControl.bInactivePs) {
-			if (rt_state == rf_off) {
-				if (priv->rtllib->rf_off_reason >
-				    RF_CHANGE_BY_IPS) {
-					netdev_warn(dev, "%s(): RF is OFF.\n",
-						    __func__);
-					mutex_unlock(&priv->wx_mutex);
-					return -1;
-				}
-				netdev_info(dev,
-					    "=========>%s(): rtl92e_ips_leave\n",
+		if (rt_state == rf_off) {
+			if (priv->rtllib->rf_off_reason >
+			    RF_CHANGE_BY_IPS) {
+				netdev_warn(dev, "%s(): RF is OFF.\n",
 					    __func__);
-				mutex_lock(&priv->rtllib->ips_mutex);
-				rtl92e_ips_leave(dev);
-				mutex_unlock(&priv->rtllib->ips_mutex);
+				mutex_unlock(&priv->wx_mutex);
+				return -1;
 			}
+			netdev_info(dev,
+				    "=========>%s(): rtl92e_ips_leave\n",
+				    __func__);
+			mutex_lock(&priv->rtllib->ips_mutex);
+			rtl92e_ips_leave(dev);
+			mutex_unlock(&priv->rtllib->ips_mutex);
 		}
 	}
 	ret = rtllib_wx_set_mode(priv->rtllib, a, wrqu, b);
@@ -414,19 +412,17 @@ static int _rtl92e_wx_set_scan(struct net_device *dev,
 	priv->rtllib->FirstIe_InScan = true;
 
 	if (priv->rtllib->state != RTLLIB_LINKED) {
-		if (priv->rtllib->PowerSaveControl.bInactivePs) {
-			if (rt_state == rf_off) {
-				if (priv->rtllib->rf_off_reason >
-				    RF_CHANGE_BY_IPS) {
-					netdev_warn(dev, "%s(): RF is OFF.\n",
-						    __func__);
-					mutex_unlock(&priv->wx_mutex);
-					return -1;
-				}
-				mutex_lock(&priv->rtllib->ips_mutex);
-				rtl92e_ips_leave(dev);
-				mutex_unlock(&priv->rtllib->ips_mutex);
+		if (rt_state == rf_off) {
+			if (priv->rtllib->rf_off_reason >
+			    RF_CHANGE_BY_IPS) {
+				netdev_warn(dev, "%s(): RF is OFF.\n",
+					    __func__);
+				mutex_unlock(&priv->wx_mutex);
+				return -1;
 			}
+			mutex_lock(&priv->rtllib->ips_mutex);
+			rtl92e_ips_leave(dev);
+			mutex_unlock(&priv->rtllib->ips_mutex);
 		}
 		rtllib_stop_scan(priv->rtllib);
 		if (priv->rtllib->LedControlHandler)
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 40bea71bcb22c..d23d1c3ce39b8 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1250,8 +1250,6 @@ enum rt_rf_power_state {
 };
 
 struct rt_pwr_save_ctrl {
-
-	bool				bInactivePs;
 	bool				bIPSModeBackup;
 	bool				bSwRfProcessing;
 	enum rt_rf_power_state eInactivePowerState;
-- 
GitLab


From 3700c365b6b204eb6d76dac7312fc3bb21faa350 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:40:49 +0200
Subject: [PATCH 0214/4122] staging: rtl8192e: Remove unused variable
 bIPSModeBackup

bIPSModeBackup is just once initialized and never used. Remove resulting
dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/657035f84d266fd5c6f96e9b530a96c2ab4ff900.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 -
 drivers/staging/rtl8192e/rtllib.h            | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 7c0fd2bce923a..cbb65cfea83ac 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -886,7 +886,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
 	priv->SetRFPowerStateInProgress = false;
-	priv->rtllib->PowerSaveControl.bIPSModeBackup = false;
 	priv->rtllib->PowerSaveControl.bLeisurePs = true;
 	priv->rtllib->LPSDelayCnt = 0;
 	priv->rtllib->sta_sleep = LPS_IS_WAKE;
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index d23d1c3ce39b8..e0fa87b12cebb 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1250,7 +1250,6 @@ enum rt_rf_power_state {
 };
 
 struct rt_pwr_save_ctrl {
-	bool				bIPSModeBackup;
 	bool				bSwRfProcessing;
 	enum rt_rf_power_state eInactivePowerState;
 	enum ips_callback_function ReturnPoint;
-- 
GitLab


From 697541a0e53d16a12b00ffa4c9d204b275e68053 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:40:56 +0200
Subject: [PATCH 0215/4122] staging: rtl8192e: Remove unused variable
 bInPowerSaveMode

bInPowerSaveMode is just once initialized and changed but never evaluated.
Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/be3ebecd88b85ba1d87b9b3fbe02f2e78e6a669e.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c   | 2 --
 3 files changed, 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index cbb65cfea83ac..db55b9b65192c 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -881,7 +881,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->hw_radio_off = false;
 	priv->RegRfOff = false;
 	priv->isRFOff = false;
-	priv->bInPowerSaveMode = false;
 	priv->rtllib->rf_off_reason = 0;
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 7021f9c435d96..28733dc2038b1 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -475,7 +475,6 @@ struct r8192_priv {
 
 	bool RegRfOff;
 	bool isRFOff;
-	bool bInPowerSaveMode;
 	u8 bHwRfOffAction;
 
 	bool rf_change_in_progress;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index fba86ef730b53..1ced5bcebdabb 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -122,7 +122,6 @@ void rtl92e_ips_enter(struct net_device *dev)
 		(priv->rtllib->iw_mode != IW_MODE_MASTER)) {
 		pPSC->eInactivePowerState = rf_off;
 		priv->isRFOff = true;
-		priv->bInPowerSaveMode = true;
 		_rtl92e_ps_update_rf_state(dev);
 	}
 }
@@ -138,7 +137,6 @@ void rtl92e_ips_leave(struct net_device *dev)
 	if (rt_state != rf_on  && !pPSC->bSwRfProcessing &&
 	    priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) {
 		pPSC->eInactivePowerState = rf_on;
-		priv->bInPowerSaveMode = false;
 		_rtl92e_ps_update_rf_state(dev);
 	}
 }
-- 
GitLab


From 11dc999d77e4c76deb353d9923e783cdc16c3a7d Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:04 +0200
Subject: [PATCH 0216/4122] staging: rtl8192e: Remove unused variable isRFOff

isRFOff is just once initialized and changed but never evaluated. Remove
resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/53242638126775f6698fdcfc49ac552a4e08578b.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index db55b9b65192c..3cddc9a86f28f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -880,7 +880,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rtllib->wx_set_enc = 0;
 	priv->hw_radio_off = false;
 	priv->RegRfOff = false;
-	priv->isRFOff = false;
 	priv->rtllib->rf_off_reason = 0;
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 28733dc2038b1..23dccd6079ea5 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -474,7 +474,6 @@ struct r8192_priv {
 	u16 ChannelPlan;
 
 	bool RegRfOff;
-	bool isRFOff;
 	u8 bHwRfOffAction;
 
 	bool rf_change_in_progress;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index 1ced5bcebdabb..1501f7be8eee9 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -121,7 +121,6 @@ void rtl92e_ips_enter(struct net_device *dev)
 		(priv->rtllib->state != RTLLIB_LINKED) &&
 		(priv->rtllib->iw_mode != IW_MODE_MASTER)) {
 		pPSC->eInactivePowerState = rf_off;
-		priv->isRFOff = true;
 		_rtl92e_ps_update_rf_state(dev);
 	}
 }
-- 
GitLab


From 20401e6c6a59f56d9235756168f620e7bffbae26 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:10 +0200
Subject: [PATCH 0217/4122] staging: rtl8192e: Remove unchanged variable
 RegRfOff

RegRfOff is just once initialized with false and then set to false again.
All evaluations will result in false. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/fa306d364b43fee7b81f5289309e93bb6fccdba5.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 7 +------
 drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   | 2 --
 4 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 18e4e5d84878a..f2cbec6eb08f2 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -624,9 +624,6 @@ start:
 	}
 	priv->pFirmware->status = FW_STATUS_0_INIT;
 
-	if (priv->RegRfOff)
-		priv->rtllib->rf_power_state = rf_off;
-
 	ulRegRead = rtl92e_readl(dev, CPU_GEN);
 	if (priv->pFirmware->status == FW_STATUS_0_INIT)
 		ulRegRead |= CPU_GEN_SYSTEM_RESET;
@@ -756,9 +753,7 @@ start:
 
 	rtl92e_writeb(dev, 0x87, 0x0);
 
-	if (priv->RegRfOff) {
-		rtl92e_set_rf_state(dev, rf_off, RF_CHANGE_BY_SW);
-	} else if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_PS) {
+	if (priv->rtllib->rf_off_reason > RF_CHANGE_BY_PS) {
 		rtl92e_set_rf_state(dev, rf_off, priv->rtllib->rf_off_reason);
 	} else if (priv->rtllib->rf_off_reason >= RF_CHANGE_BY_IPS) {
 		rtl92e_set_rf_state(dev, rf_off, priv->rtllib->rf_off_reason);
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index 1b592258e6406..a44dffa76a39e 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -1325,7 +1325,6 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 
 				do {
 					InitilizeCount--;
-					priv->RegRfOff = false;
 					rtstatus = rtl92e_enable_nic(dev);
 				} while (!rtstatus && (InitilizeCount > 0));
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 3cddc9a86f28f..e77a73df9bc57 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -879,7 +879,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->RxCounter = 0;
 	priv->rtllib->wx_set_enc = 0;
 	priv->hw_radio_off = false;
-	priv->RegRfOff = false;
 	priv->rtllib->rf_off_reason = 0;
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 23dccd6079ea5..7d716fa129953 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -472,8 +472,6 @@ struct r8192_priv {
 
 	u16 RegChannelPlan;
 	u16 ChannelPlan;
-
-	bool RegRfOff;
 	u8 bHwRfOffAction;
 
 	bool rf_change_in_progress;
-- 
GitLab


From ec437736505f4ba1152730d8c0ec3d0167bb358b Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:18 +0200
Subject: [PATCH 0218/4122] staging: rtl8192e: Remove unchanged variable
 bDisableNormalResetCheck

bDisableNormalResetCheck is just once initialized with false. All
evaluations will result in !false. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/026313f17cf708bf8fa0661f83b2c2b515e55b12.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 6 ++----
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index e77a73df9bc57..9dcae5d71fb99 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -871,7 +871,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->ScanDelay = 50;
 	priv->ResetProgress = RESET_TYPE_NORESET;
 	priv->bForcedSilentReset = false;
-	priv->bDisableNormalResetCheck = false;
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
 
@@ -1403,13 +1402,12 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	}
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
-	if (!priv->bDisableNormalResetCheck && ResetType == RESET_TYPE_NORMAL) {
+	if (ResetType == RESET_TYPE_NORMAL) {
 		priv->ResetProgress = RESET_TYPE_NORMAL;
 		return;
 	}
 
-	if (((priv->force_reset) || (!priv->bDisableNormalResetCheck &&
-	      ResetType == RESET_TYPE_SILENT)))
+	if ((priv->force_reset || ResetType == RESET_TYPE_SILENT))
 		_rtl92e_if_silent_reset(dev);
 	priv->force_reset = false;
 	priv->bForcedSilentReset = false;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 7d716fa129953..8a24037a93ec6 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -541,7 +541,6 @@ struct r8192_priv {
 
 	enum reset_type ResetProgress;
 	bool		bForcedSilentReset;
-	bool		bDisableNormalResetCheck;
 	u16		TxCounter;
 	u16		RxCounter;
 	bool		bResetInProgress;
-- 
GitLab


From eda244c081e60a576eecaf635205c4fa5dc9032f Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:24 +0200
Subject: [PATCH 0219/4122] staging: rtl8192e: Remove unused variable
 bForcedSilentReset

bForcedSilentReset is never evaluated. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/10504e628eae110d73cd43050e7cc5801ce7f17b.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 4 ----
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 2 files changed, 5 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 9dcae5d71fb99..ae0fcbbee2bb5 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -870,7 +870,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->CckPwEnl = 6;
 	priv->ScanDelay = 50;
 	priv->ResetProgress = RESET_TYPE_NORESET;
-	priv->bForcedSilentReset = false;
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
 
@@ -1240,8 +1239,6 @@ RESET_START:
 END:
 		priv->ResetProgress = RESET_TYPE_NORESET;
 		priv->reset_count++;
-
-		priv->bForcedSilentReset = false;
 		priv->bResetInProgress = false;
 
 		rtl92e_writeb(dev, UFWP, 1);
@@ -1410,7 +1407,6 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	if ((priv->force_reset || ResetType == RESET_TYPE_SILENT))
 		_rtl92e_if_silent_reset(dev);
 	priv->force_reset = false;
-	priv->bForcedSilentReset = false;
 	priv->bResetInProgress = false;
 }
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 8a24037a93ec6..763ed761bb38d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -540,7 +540,6 @@ struct r8192_priv {
 	u32		reset_count;
 
 	enum reset_type ResetProgress;
-	bool		bForcedSilentReset;
 	u16		TxCounter;
 	u16		RxCounter;
 	bool		bResetInProgress;
-- 
GitLab


From 93057f8354462708e9721ad6ff6a2294a5ae8a98 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:31 +0200
Subject: [PATCH 0220/4122] staging: rtl8192e: Remove unused variable ScanDelay

ScanDelay is never evaluated. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/13d0b993d077490c65768f2b11a631a447f8e86f.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   | 2 --
 3 files changed, 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index f2cbec6eb08f2..c3dcaa27fd2ea 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -503,7 +503,6 @@ static void _rtl92e_read_eeprom_info(struct net_device *dev)
 			priv->ChannelPlan = 0x0;
 		break;
 	case EEPROM_CID_Nettronix:
-		priv->ScanDelay = 100;
 		priv->CustomerID = RT_CID_Nettronix;
 		break;
 	case EEPROM_CID_Pronet:
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index ae0fcbbee2bb5..f07f0fc690a37 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -868,7 +868,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rfa_txpowertrackingindex = 0;
 	priv->rfc_txpowertrackingindex = 0;
 	priv->CckPwEnl = 6;
-	priv->ScanDelay = 50;
 	priv->ResetProgress = RESET_TYPE_NORESET;
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 763ed761bb38d..c536131ecd5d4 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -410,8 +410,6 @@ struct r8192_priv {
 	short	chan;
 	short	sens;
 	short	max_sens;
-
-	u8 ScanDelay;
 	bool ps_force;
 
 	u32 irq_mask[2];
-- 
GitLab


From 753def95543cf1a01f5487c0ae8cce3ee5180d77 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sat, 1 Oct 2022 11:41:38 +0200
Subject: [PATCH 0221/4122] staging: rtl8192e: Remove unused variable
 bDriverIsGoingToUnload

bDriverIsGoingToUnload is never evaluated. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/48a84cbe359f159cc9c296b261256a405ee3884e.1664616227.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 3 ---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index f07f0fc690a37..213aac943ef76 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -686,7 +686,6 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset)
 					(&priv->rtllib->PowerSaveControl);
 	bool init_status;
 
-	priv->bDriverIsGoingToUnload = false;
 	priv->bdisable_nic = false;
 
 	priv->up = 1;
@@ -735,7 +734,6 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf)
 	if (priv->rtllib->state == RTLLIB_LINKED)
 		rtl92e_leisure_ps_leave(dev);
 
-	priv->bDriverIsGoingToUnload = true;
 	priv->up = 0;
 	priv->rtllib->ieee_up = 0;
 	priv->bfirst_after_down = true;
@@ -838,7 +836,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->polling_timer_on = 0;
 	priv->up_first_time = 1;
 	priv->blinked_ingpio = false;
-	priv->bDriverIsGoingToUnload = false;
 	priv->being_init_adapter = false;
 	priv->initialized_at_probe = false;
 	priv->bdisable_nic = false;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index c536131ecd5d4..1ae3c77e2fefc 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -311,7 +311,6 @@ struct r8192_priv {
 	bool		bfirst_after_down;
 	bool		initialized_at_probe;
 	bool		being_init_adapter;
-	bool		bDriverIsGoingToUnload;
 
 	int		irq;
 	short	irq_enabled;
-- 
GitLab


From 11247c998b00298e9b621f3121380494146c40ff Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 09:48:23 +0200
Subject: [PATCH 0222/4122] staging: r8188eu: merge odm_types.h into other
 headers

The macros SET_TX_DESC_ANTSEL_{A,B,C}_88E are used in odm_RTL8188E.c.
Move them from odm_types.h to odm_RTL8188E.h.

ODM_CE is used in places where also ODM_ITRF_USB is used in the code.
Move the ODM_CE define to the header that defines ODM_ITRF_USB. While
at it remove an extra space between '#' and 'define ODM_ITRF_USB'.

The haeder odm_types.h is now empty and we can remove it.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002074827.8566-2-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/odm.h          |  3 ++-
 drivers/staging/r8188eu/include/odm_RTL8188E.h |  7 +++++++
 drivers/staging/r8188eu/include/odm_types.h    | 16 ----------------
 drivers/staging/r8188eu/include/rtl8188e_hal.h |  1 -
 4 files changed, 9 insertions(+), 18 deletions(-)
 delete mode 100644 drivers/staging/r8188eu/include/odm_types.h

diff --git a/drivers/staging/r8188eu/include/odm.h b/drivers/staging/r8188eu/include/odm.h
index f131e17167bfb..89b01dd614ba9 100644
--- a/drivers/staging/r8188eu/include/odm.h
+++ b/drivers/staging/r8188eu/include/odm.h
@@ -119,7 +119,8 @@ enum odm_ability_def {
 	ODM_BB_PWR_TRA			= BIT(8),
 };
 
-# define ODM_ITRF_USB 0x2
+#define ODM_ITRF_USB	0x2
+#define ODM_CE		0x04
 
 /*  ODM_CMNINFO_WM_MODE */
 enum odm_wireless_mode {
diff --git a/drivers/staging/r8188eu/include/odm_RTL8188E.h b/drivers/staging/r8188eu/include/odm_RTL8188E.h
index 3c6471f1a893f..4f16af2485913 100644
--- a/drivers/staging/r8188eu/include/odm_RTL8188E.h
+++ b/drivers/staging/r8188eu/include/odm_RTL8188E.h
@@ -11,6 +11,13 @@
 #define	MAIN_ANT_CGCS_RX	0
 #define	AUX_ANT_CGCS_RX	1
 
+#define SET_TX_DESC_ANTSEL_A_88E(__ptxdesc, __value)			\
+	le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(24))
+#define SET_TX_DESC_ANTSEL_B_88E(__ptxdesc, __value)			\
+	le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(25))
+#define SET_TX_DESC_ANTSEL_C_88E(__ptxdesc, __value)			\
+	le32p_replace_bits((__le32 *)(__ptxdesc + 28), __value, BIT(29))
+
 void ODM_AntennaDiversityInit_88E(struct odm_dm_struct *pDM_Odm);
 
 void ODM_AntennaDiversity_88E(struct odm_dm_struct *pDM_Odm);
diff --git a/drivers/staging/r8188eu/include/odm_types.h b/drivers/staging/r8188eu/include/odm_types.h
deleted file mode 100644
index 76302df4b330b..0000000000000
--- a/drivers/staging/r8188eu/include/odm_types.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
-/* Copyright(c) 2007 - 2011 Realtek Corporation. */
-
-#ifndef __ODM_TYPES_H__
-#define __ODM_TYPES_H__
-
-#define	ODM_CE			0x04	/* BIT(2) */
-
-#define SET_TX_DESC_ANTSEL_A_88E(__ptxdesc, __value)			\
-	le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(24))
-#define SET_TX_DESC_ANTSEL_B_88E(__ptxdesc, __value)			\
-	le32p_replace_bits((__le32 *)(__ptxdesc + 8), __value, BIT(25))
-#define SET_TX_DESC_ANTSEL_C_88E(__ptxdesc, __value)			\
-	le32p_replace_bits((__le32 *)(__ptxdesc + 28), __value, BIT(29))
-
-#endif /*  __ODM_TYPES_H__ */
diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h
index ed4091e7cc7e6..a1e88e6d5c0c7 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_hal.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h
@@ -14,7 +14,6 @@
 #include "rtl8188e_xmit.h"
 #include "rtl8188e_cmd.h"
 #include "rtw_efuse.h"
-#include "odm_types.h"
 #include "odm.h"
 #include "odm_HWConfig.h"
 #include "odm_RegDefine11N.h"
-- 
GitLab


From ef2a2422ab86d99e0bfd940c38c60b77c9ad9954 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 09:48:24 +0200
Subject: [PATCH 0223/4122] staging: r8188eu: convert rtw_init_evt_priv() to
 common error logic

Convert the function rtw_init_evt_priv() to common kernel error logic.
Return 0 on success and negative value on failure. This is part of
getting rid of returning _SUCCESS and _FAIL which uses inverted error
logic and is used all over the driver.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002074827.8566-3-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_cmd.c    | 8 +++-----
 drivers/staging/r8188eu/include/rtw_cmd.h | 2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c
index 3fadace33de65..88b939e42f5a1 100644
--- a/drivers/staging/r8188eu/core/rtw_cmd.c
+++ b/drivers/staging/r8188eu/core/rtw_cmd.c
@@ -90,10 +90,8 @@ u32	rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
 	return _SUCCESS;
 }
 
-u32 rtw_init_evt_priv(struct evt_priv *pevtpriv)
+int rtw_init_evt_priv(struct evt_priv *pevtpriv)
 {
-	u32 res = _SUCCESS;
-
 	/* allocate DMA-able/Non-Page memory for cmd_buf and rsp_buf */
 	atomic_set(&pevtpriv->event_seq, 0);
 
@@ -101,9 +99,9 @@ u32 rtw_init_evt_priv(struct evt_priv *pevtpriv)
 	pevtpriv->c2h_wk_alive = false;
 	pevtpriv->c2h_queue = rtw_cbuf_alloc(C2H_QUEUE_MAX_LEN + 1);
 	if (!pevtpriv->c2h_queue)
-		res = _FAIL;
+		return -ENOMEM;
 
-	return res;
+	return 0;
 }
 
 void rtw_free_cmd_priv(struct	cmd_priv *pcmdpriv)
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 9a76aa85de940..c6c48e1fb9618 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -85,7 +85,7 @@ int rtw_cmd_thread(void *context);
 u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv);
 void rtw_free_cmd_priv(struct cmd_priv *pcmdpriv);
 
-u32 rtw_init_evt_priv(struct evt_priv *pevtpriv);
+int rtw_init_evt_priv(struct evt_priv *pevtpriv);
 void rtw_free_evt_priv(struct evt_priv *pevtpriv);
 void rtw_evt_notify_isr(struct evt_priv *pevtpriv);
 u8 p2p_protocol_wk_cmd(struct adapter *padapter, int intCmdType);
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 6a45315d01a29..dd4c89d7390dd 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -468,7 +468,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter)
 
 	padapter->cmdpriv.padapter = padapter;
 
-	if ((rtw_init_evt_priv(&padapter->evtpriv)) == _FAIL) {
+	if (rtw_init_evt_priv(&padapter->evtpriv)) {
 		dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_evt_priv failed\n");
 		goto free_cmd_priv;
 	}
-- 
GitLab


From d4fda24757678311ff0a219bd150a3c3aeb6a2f8 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 09:48:25 +0200
Subject: [PATCH 0224/4122] staging: r8188eu: convert rtw_init_cmd_priv() to
 common error logic

Convert the function rtw_init_cmd_priv() to common kernel error logic.
Return 0 on success and negative value on failure. This is part of
getting rid of returning _SUCCESS and _FAIL which uses inverted error
logic and is used all over the driver.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002074827.8566-4-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_cmd.c    | 8 ++++----
 drivers/staging/r8188eu/include/rtw_cmd.h | 2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c
index 88b939e42f5a1..fdc0c71e28a45 100644
--- a/drivers/staging/r8188eu/core/rtw_cmd.c
+++ b/drivers/staging/r8188eu/core/rtw_cmd.c
@@ -54,7 +54,7 @@ exit:
 	return _SUCCESS;
 }
 
-u32	rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
+int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
 {
 	init_completion(&pcmdpriv->enqueue_cmd);
 	/* sema_init(&(pcmdpriv->cmd_done_sema), 0); */
@@ -71,7 +71,7 @@ u32	rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
 					      GFP_KERNEL);
 
 	if (!pcmdpriv->cmd_allocated_buf)
-		return _FAIL;
+		return -ENOMEM;
 
 	pcmdpriv->cmd_buf = pcmdpriv->cmd_allocated_buf  +  CMDBUFF_ALIGN_SZ - ((size_t)(pcmdpriv->cmd_allocated_buf) & (CMDBUFF_ALIGN_SZ - 1));
 
@@ -79,7 +79,7 @@ u32	rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
 
 	if (!pcmdpriv->rsp_allocated_buf) {
 		kfree(pcmdpriv->cmd_allocated_buf);
-		return _FAIL;
+		return -ENOMEM;
 	}
 
 	pcmdpriv->rsp_buf = pcmdpriv->rsp_allocated_buf  +  4 - ((size_t)(pcmdpriv->rsp_allocated_buf) & 3);
@@ -87,7 +87,7 @@ u32	rtw_init_cmd_priv(struct cmd_priv *pcmdpriv)
 	pcmdpriv->cmd_done_cnt = 0;
 	pcmdpriv->rsp_cnt = 0;
 
-	return _SUCCESS;
+	return 0;
 }
 
 int rtw_init_evt_priv(struct evt_priv *pevtpriv)
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index c6c48e1fb9618..8bbfeb42604fa 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -82,7 +82,7 @@ void rtw_free_cmd_obj(struct cmd_obj *pcmd);
 
 int rtw_cmd_thread(void *context);
 
-u32 rtw_init_cmd_priv(struct cmd_priv *pcmdpriv);
+int rtw_init_cmd_priv(struct cmd_priv *pcmdpriv);
 void rtw_free_cmd_priv(struct cmd_priv *pcmdpriv);
 
 int rtw_init_evt_priv(struct evt_priv *pevtpriv);
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index dd4c89d7390dd..490e0c7dc0345 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -461,7 +461,7 @@ u8 rtw_reset_drv_sw(struct adapter *padapter)
 
 u8 rtw_init_drv_sw(struct adapter *padapter)
 {
-	if ((rtw_init_cmd_priv(&padapter->cmdpriv)) == _FAIL) {
+	if (rtw_init_cmd_priv(&padapter->cmdpriv)) {
 		dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_cmd_priv failed\n");
 		return _FAIL;
 	}
-- 
GitLab


From 40b3f62227d46d21eab71832e331e3aa740b1b34 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 09:48:26 +0200
Subject: [PATCH 0225/4122] staging: r8188eu: convert rtw_init_mlme_priv() to
 common error logic

Convert the function rtw_init_mlme_priv() to common kernel error logic.
Return 0 on success and negative value on failure. This is part of
getting rid of returning _SUCCESS and _FAIL which uses inverted error
logic and is used all over the driver.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002074827.8566-5-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme.c   | 12 ++++--------
 drivers/staging/r8188eu/os_dep/os_intfs.c |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c
index 5ca03d6cac32a..1f69e5c57d5d5 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme.c
@@ -224,7 +224,6 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct	mlme_priv *pmlmepriv)
 	u8	*pbuf;
 	struct wlan_network	*pnetwork;
 	struct mlme_priv		*pmlmepriv = &padapter->mlmepriv;
-	int	res = _SUCCESS;
 
 	/*  We don't need to memset padapter->XXX to zero, because adapter is allocated by vzalloc(). */
 
@@ -245,10 +244,9 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct	mlme_priv *pmlmepriv)
 
 	pbuf = vzalloc(MAX_BSS_CNT * (sizeof(struct wlan_network)));
 
-	if (!pbuf) {
-		res = _FAIL;
-		goto exit;
-	}
+	if (!pbuf)
+		return -ENOMEM;
+
 	pmlmepriv->free_bss_buf = pbuf;
 
 	pnetwork = (struct wlan_network *)pbuf;
@@ -265,9 +263,7 @@ int rtw_init_mlme_priv(struct adapter *padapter)/* struct	mlme_priv *pmlmepriv)
 
 	rtw_init_mlme_timer(padapter);
 
-exit:
-
-	return res;
+	return 0;
 }
 
 void rtw_free_mlme_priv(struct mlme_priv *pmlmepriv)
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 490e0c7dc0345..d8b8a5291e402 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -473,7 +473,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter)
 		goto free_cmd_priv;
 	}
 
-	if (rtw_init_mlme_priv(padapter) == _FAIL) {
+	if (rtw_init_mlme_priv(padapter)) {
 		dev_err(dvobj_to_dev(padapter->dvobj), "rtw_init_mlme_priv failed\n");
 		goto free_evt_priv;
 	}
-- 
GitLab


From fd692ab4c511a4dd5685e9e34e4ee69ddc362675 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 09:48:27 +0200
Subject: [PATCH 0226/4122] staging: r8188eu: convert _rtw_init_sta_priv() to
 common error logic

Convert the function _rtw_init_sta_priv() to common kernel error logic.
Return 0 on success and negative value on failure. This is part of
getting rid of returning _SUCCESS and _FAIL which uses inverted error
logic and is used all over the driver.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002074827.8566-6-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_sta_mgt.c | 6 +++---
 drivers/staging/r8188eu/include/sta_info.h | 2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
index 98eeb16cab6c8..bbde5c03f9ae0 100644
--- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c
+++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
@@ -45,7 +45,7 @@ static void _rtw_init_stainfo(struct sta_info *psta)
 	psta->keep_alive_trycnt = 0;
 }
 
-u32	_rtw_init_sta_priv(struct	sta_priv *pstapriv)
+int _rtw_init_sta_priv(struct sta_priv *pstapriv)
 {
 	struct sta_info *psta;
 	s32 i;
@@ -53,7 +53,7 @@ u32	_rtw_init_sta_priv(struct	sta_priv *pstapriv)
 	pstapriv->pallocated_stainfo_buf = vzalloc(sizeof(struct sta_info) * NUM_STA + 4);
 
 	if (!pstapriv->pallocated_stainfo_buf)
-		return _FAIL;
+		return -ENOMEM;
 
 	pstapriv->pstainfo_buf = pstapriv->pallocated_stainfo_buf + 4 -
 		((size_t)(pstapriv->pallocated_stainfo_buf) & 3);
@@ -93,7 +93,7 @@ u32	_rtw_init_sta_priv(struct	sta_priv *pstapriv)
 	pstapriv->expire_to = 3; /*  3*2 = 6 sec */
 	pstapriv->max_num_sta = NUM_STA;
 
-	return _SUCCESS;
+	return 0;
 }
 
 inline int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta)
diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h
index 4112c837bcefb..f76e086b5701f 100644
--- a/drivers/staging/r8188eu/include/sta_info.h
+++ b/drivers/staging/r8188eu/include/sta_info.h
@@ -295,7 +295,7 @@ static inline u32 wifi_mac_hash(u8 *mac)
 	return x;
 }
 
-extern u32	_rtw_init_sta_priv(struct sta_priv *pstapriv);
+extern int _rtw_init_sta_priv(struct sta_priv *pstapriv);
 extern void _rtw_free_sta_priv(struct sta_priv *pstapriv);
 
 #define stainfo_offset_valid(offset) (offset < NUM_STA && offset >= 0)
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index d8b8a5291e402..e43ef7e5ba708 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -494,7 +494,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter)
 		goto free_xmit_priv;
 	}
 
-	if (_rtw_init_sta_priv(&padapter->stapriv) == _FAIL) {
+	if (_rtw_init_sta_priv(&padapter->stapriv)) {
 		dev_err(dvobj_to_dev(padapter->dvobj), "_rtw_init_sta_priv failed\n");
 		goto free_recv_priv;
 	}
-- 
GitLab


From a609750c8bf0e29060862f2f232f775129c31593 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 16:35:43 +0200
Subject: [PATCH 0227/4122] staging: r8188eu: convert rtw_reset_drv_sw() to
 void

The function rtw_reset_drv_sw() always returns _SUCCESS. None of its
callers use the return value. Convert the return type of that function
from u8 to void. This is part of getting rid of _FAIL / _SUCCESS.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002143544.7974-2-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/osdep_intf.h | 2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c    | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h
index 36511c4695461..ce8e1bc4a61ca 100644
--- a/drivers/staging/r8188eu/include/osdep_intf.h
+++ b/drivers/staging/r8188eu/include/osdep_intf.h
@@ -44,7 +44,7 @@ int netdev_close(struct net_device *pnetdev);
 
 u8 rtw_init_drv_sw(struct adapter *padapter);
 u8 rtw_free_drv_sw(struct adapter *padapter);
-u8 rtw_reset_drv_sw(struct adapter *padapter);
+void rtw_reset_drv_sw(struct adapter *padapter);
 
 u32 rtw_start_drv_threads(struct adapter *padapter);
 void rtw_stop_drv_threads (struct adapter *padapter);
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index e43ef7e5ba708..2983e665bd5c4 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -433,7 +433,7 @@ static void rtw_init_default_value(struct adapter *padapter)
 	padapter->bShowGetP2PState = 1;
 }
 
-u8 rtw_reset_drv_sw(struct adapter *padapter)
+void rtw_reset_drv_sw(struct adapter *padapter)
 {
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 
@@ -455,8 +455,6 @@ u8 rtw_reset_drv_sw(struct adapter *padapter)
 	padapter->mlmeextpriv.sitesurvey_res.state = SCAN_DISABLE;
 
 	rtw_set_signal_stat_timer(&padapter->recvpriv);
-
-	return _SUCCESS;
 }
 
 u8 rtw_init_drv_sw(struct adapter *padapter)
-- 
GitLab


From c55f29446d175049700bdfb41a8d30fa67640d15 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 2 Oct 2022 16:35:44 +0200
Subject: [PATCH 0228/4122] staging: r8188eu: convert rtw_free_drv_sw() to void

The function rtw_free_drv_sw() always returns _SUCCESS. None of its
callers use the return value. Convert the return type of that function
from u8 to void. This is part of getting rid of _FAIL / _SUCCESS.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221002143544.7974-3-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/osdep_intf.h | 2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c    | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h
index ce8e1bc4a61ca..0f7d74a3ff6df 100644
--- a/drivers/staging/r8188eu/include/osdep_intf.h
+++ b/drivers/staging/r8188eu/include/osdep_intf.h
@@ -43,7 +43,7 @@ int netdev_open(struct net_device *pnetdev);
 int netdev_close(struct net_device *pnetdev);
 
 u8 rtw_init_drv_sw(struct adapter *padapter);
-u8 rtw_free_drv_sw(struct adapter *padapter);
+void rtw_free_drv_sw(struct adapter *padapter);
 void rtw_reset_drv_sw(struct adapter *padapter);
 
 u32 rtw_start_drv_threads(struct adapter *padapter);
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 2983e665bd5c4..38e324754c8fa 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -548,7 +548,7 @@ void rtw_cancel_all_timer(struct adapter *padapter)
 	_cancel_timer_ex(&padapter->recvpriv.signal_stat_timer);
 }
 
-u8 rtw_free_drv_sw(struct adapter *padapter)
+void rtw_free_drv_sw(struct adapter *padapter)
 {
 	/* we can call rtw_p2p_enable here, but: */
 	/*  1. rtw_p2p_enable may have IO operation */
@@ -585,8 +585,6 @@ u8 rtw_free_drv_sw(struct adapter *padapter)
 	/*  clear pbuddystruct adapter to avoid access wrong pointer. */
 	if (padapter->pbuddy_adapter)
 		padapter->pbuddy_adapter->pbuddy_adapter = NULL;
-
-	return _SUCCESS;
 }
 
 void netdev_br_init(struct net_device *netdev)
-- 
GitLab


From c21899dcab317de9007e65dbf218400782ee064c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 4 Oct 2022 16:09:44 +0100
Subject: [PATCH 0229/4122] staging: rtl8192u: Fix spelling mistake athros ->
 Atheros and fix grammer

There is a spellig mistake, correct it and fix capital letter on the
proper noun. Also fix the grammar.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221004150944.148157-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
index b58e75932ecd5..f142d09869901 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
@@ -1806,7 +1806,7 @@ int ieee80211_parse_info_param(struct ieee80211_device *ieee,
 				info_element->data[0] == 0x00 &&
 				info_element->data[1] == 0x13 &&
 				info_element->data[2] == 0x74)) {
-				netdev_dbg(ieee->dev, "========> athros AP is exist\n");
+				netdev_dbg(ieee->dev, "========> Atheros AP exists\n");
 				network->atheros_cap_exist = true;
 			} else
 				network->atheros_cap_exist = false;
-- 
GitLab


From 0fd4d8b7e87e472d54fdb4912703c1c09d12ac70 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 4 Oct 2022 16:35:39 +0100
Subject: [PATCH 0230/4122] staging: rtl8723bs: Remove redundant initialization
 of variable efuseValue

The variable efuseValue is being initialized with a value that is never
read. The variable is being re-assigned later on. The initialization is
redundant and can be removed.

Cleans up warning:
drivers/staging/rtl8723bs/core/rtw_efuse.c:285:6: warning: variable
'efuseValue' set but not used [-Wunused-but-set-variable]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221004153539.150867-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_efuse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_efuse.c b/drivers/staging/rtl8723bs/core/rtw_efuse.c
index 06e727ce9cc29..eb848f9bbf2c3 100644
--- a/drivers/staging/rtl8723bs/core/rtw_efuse.c
+++ b/drivers/staging/rtl8723bs/core/rtw_efuse.c
@@ -282,7 +282,7 @@ u8 efuse_OneByteWrite(struct adapter *padapter, u16 addr, u8 data, bool bPseudoT
 {
 	u8 tmpidx = 0;
 	u8 bResult = false;
-	u32 efuseValue = 0;
+	u32 efuseValue;
 
 	if (bPseudoTest)
 		return Efuse_Write1ByteToFakeContent(addr, data);
-- 
GitLab


From dcf478ab1566ceb6c7796b3830bd340e50bb3f73 Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Wed, 19 Oct 2022 12:31:10 -0700
Subject: [PATCH 0231/4122] staging: rtl8723bs: Removed extra tabs in
 conditional statements

checkpatch found extra tabs in two conditional statements in
rtw_ieee80211.c. Should be one tab instead of two.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/Y1BQfiwOXzAZpCCa@marshmallow
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ieee80211.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
index 3d8a64f694481..30e7457a9c312 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ieee80211.c
@@ -1063,18 +1063,18 @@ void rtw_get_bcn_info(struct wlan_network *pnetwork)
 	/* parsing HT_CAP_IE */
 	p = rtw_get_ie(pnetwork->network.ies + _FIXED_IE_LENGTH_, WLAN_EID_HT_CAPABILITY, &len, pnetwork->network.ie_length - _FIXED_IE_LENGTH_);
 	if (p && len > 0) {
-			pht_cap = (struct ieee80211_ht_cap *)(p + 2);
-			pnetwork->bcn_info.ht_cap_info = le16_to_cpu(pht_cap->cap_info);
+		pht_cap = (struct ieee80211_ht_cap *)(p + 2);
+		pnetwork->bcn_info.ht_cap_info = le16_to_cpu(pht_cap->cap_info);
 	} else {
-			pnetwork->bcn_info.ht_cap_info = 0;
+		pnetwork->bcn_info.ht_cap_info = 0;
 	}
 	/* parsing HT_INFO_IE */
 	p = rtw_get_ie(pnetwork->network.ies + _FIXED_IE_LENGTH_, WLAN_EID_HT_OPERATION, &len, pnetwork->network.ie_length - _FIXED_IE_LENGTH_);
 	if (p && len > 0) {
-			pht_info = (struct HT_info_element *)(p + 2);
-			pnetwork->bcn_info.ht_info_infos_0 = pht_info->infos[0];
+		pht_info = (struct HT_info_element *)(p + 2);
+		pnetwork->bcn_info.ht_info_infos_0 = pht_info->infos[0];
 	} else {
-			pnetwork->bcn_info.ht_info_infos_0 = 0;
+		pnetwork->bcn_info.ht_info_infos_0 = 0;
 	}
 }
 
-- 
GitLab


From 1850f1598d4a991289628487648e17f2631e37df Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Wed, 19 Oct 2022 19:10:52 -0700
Subject: [PATCH 0232/4122] staging: rtl8723bs: align block comment stars

Align '*' on each line of block comment in rtw_ioctl_set.
Issue found by checkpatch.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/3d9738edd0992b72bf8fc8a05706a490772b5317.1666230736.git.eperi1024@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
index 8c11daff2d590..f15dbf450ff4d 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
@@ -462,11 +462,11 @@ exit:
 }
 
 /*
-* rtw_get_cur_max_rate -
-* @adapter: pointer to struct adapter structure
-*
-* Return 0 or 100Kbps
-*/
+ * rtw_get_cur_max_rate -
+ * @adapter: pointer to struct adapter structure
+ *
+ * Return 0 or 100Kbps
+ */
 u16 rtw_get_cur_max_rate(struct adapter *adapter)
 {
 	int	i = 0;
-- 
GitLab


From d9e57cc302f33fcbcd12fe9cca6213d520e0537c Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Wed, 19 Oct 2022 19:10:53 -0700
Subject: [PATCH 0233/4122] staging: rtl8723bs: remove unnecessary parenthesis

Remove extra parenthesis in conditional statement in rtw_ioctl_set.
Issue found by checkpatch.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/a08a440eabddd8e78d045ca9898a415d81f6f6d7.1666230736.git.eperi1024@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
index f15dbf450ff4d..8c7daab141db4 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
@@ -159,7 +159,7 @@ u8 rtw_set_802_11_ssid(struct adapter *padapter, struct ndis_802_11_ssid *ssid)
 	if (check_fwstate(pmlmepriv, _FW_LINKED|WIFI_ADHOC_MASTER_STATE) == true) {
 		if ((pmlmepriv->assoc_ssid.ssid_length == ssid->ssid_length) &&
 		    (!memcmp(&pmlmepriv->assoc_ssid.ssid, ssid->ssid, ssid->ssid_length))) {
-			if ((check_fwstate(pmlmepriv, WIFI_STATION_STATE) == false)) {
+			if (check_fwstate(pmlmepriv, WIFI_STATION_STATE) == false) {
 				if (rtw_is_same_ibss(padapter, pnetwork) == false) {
 					/* if in WIFI_ADHOC_MASTER_STATE | WIFI_ADHOC_STATE, create bss or rejoin again */
 					rtw_disassoc_cmd(padapter, 0, true);
-- 
GitLab


From b860ce04ac409386e4261931525ac3b3ac1758e1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 7 Oct 2022 21:29:41 +0100
Subject: [PATCH 0234/4122] staging: sm750fb: Kconfig: Fix spelling mistake
 "accelearion" -> "acceleration"

There is a spelling mistake in a Kconfig description. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221007202941.2756304-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/sm750fb/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/sm750fb/Kconfig b/drivers/staging/sm750fb/Kconfig
index 8c0d8a873d5b0..2101a6605efc3 100644
--- a/drivers/staging/sm750fb/Kconfig
+++ b/drivers/staging/sm750fb/Kconfig
@@ -8,7 +8,7 @@ config FB_SM750
 	select FB_CFB_IMAGEBLIT
 	help
 	  Frame buffer driver for the Silicon Motion SM750 chip
-	  with 2D accelearion and dual head support.
+	  with 2D acceleration and dual head support.
 
 	  This driver is also available as a module. The module will be
 	  called sm750fb. If you want to compile it as a module, say M
-- 
GitLab


From 6c2fb5dfb640ebc8b40a98b6c4337e9fb9f87ee3 Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Date: Mon, 10 Oct 2022 20:02:51 +0100
Subject: [PATCH 0235/4122] staging: rtl8192e: Remove single statement braces

Remove braces around single line statement, to resolve checkpatch.pl
warnings "braces {} are not necessary for single statement blocks"

Signed-off-by: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Link: https://lore.kernel.org/r/20221010190252.12402-1-dragan.m.cvetic@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index 702551056227d..d7bfaf68291c2 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -267,10 +267,8 @@ static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev)
 			"PATH=/usr/bin:/bin",
 			 NULL};
 
-	if (priv->ResetProgress == RESET_TYPE_SILENT) {
+	if (priv->ResetProgress == RESET_TYPE_SILENT)
 		return;
-	}
-
 	if (priv->rtllib->state != RTLLIB_LINKED)
 		return;
 	call_usermodehelper(ac_dc_script, argv, envp, UMH_WAIT_PROC);
@@ -330,9 +328,8 @@ static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev)
 	bool bshort_gi_enabled = false;
 	static u8 ping_rssi_state;
 
-	if (!priv->up) {
+	if (!priv->up)
 		return;
-	}
 
 	if (pra->rate_adaptive_disabled)
 		return;
@@ -777,9 +774,8 @@ static void _rtl92e_dm_tx_power_tracking_cb_thermal(struct net_device *dev)
 		tmpRegA = rtl92e_get_bb_reg(dev, rOFDM0_XATxIQImbalance,
 					    bMaskDWord);
 		for (i = 0; i < OFDM_Table_Length; i++) {
-			if (tmpRegA == OFDMSwingTable[i]) {
+			if (tmpRegA == OFDMSwingTable[i])
 				priv->OFDM_index[0] = i;
-			}
 		}
 
 		TempCCk = rtl92e_get_bb_reg(dev, rCCK0_TxFilter1, bMaskByte2);
@@ -1066,9 +1062,8 @@ void rtl92e_dm_restore_state(struct net_device *dev)
 	u32	reg_ratr = priv->rate_adaptive.last_ratr;
 	u32 ratr_value;
 
-	if (!priv->up) {
+	if (!priv->up)
 		return;
-	}
 
 	if (priv->rate_adaptive.rate_adaptive_disabled)
 		return;
-- 
GitLab


From 2122a86d426381069f279d9e6a71323cfc0da39f Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Date: Mon, 10 Oct 2022 20:04:55 +0100
Subject: [PATCH 0236/4122] staging: rtl8192e: Rename CurSTAConnectState and
 PreSTAConnectState

Rename variable CurSTAConnectState to cur_sta_connect_state,
PreSTAConnectState to pre_sta_connect_state to avoid CamelCase
which is not accepted by checkpatch.

Signed-off-by: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/20221010190457.13199-2-dragan.m.cvetic@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 22 +++++++++++-----------
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.h |  4 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index d7bfaf68291c2..0572ce40290e8 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -1138,8 +1138,8 @@ static void _rtl92e_dm_dig_init(struct net_device *dev)
 
 	dm_digtable.dig_state		= DM_STA_DIG_MAX;
 	dm_digtable.dig_highpwr_state	= DM_STA_DIG_MAX;
-	dm_digtable.CurSTAConnectState = DIG_STA_DISCONNECT;
-	dm_digtable.PreSTAConnectState = DIG_STA_DISCONNECT;
+	dm_digtable.cur_sta_connect_state = DIG_STA_DISCONNECT;
+	dm_digtable.pre_sta_connect_state = DIG_STA_DISCONNECT;
 
 	dm_digtable.rssi_low_thresh	= DM_DIG_THRESH_LOW;
 	dm_digtable.rssi_high_thresh	= DM_DIG_THRESH_HIGH;
@@ -1207,9 +1207,9 @@ static void _rtl92e_dm_ctrl_initgain_byrssi_driver(struct net_device *dev)
 	}
 
 	if (priv->rtllib->state == RTLLIB_LINKED)
-		dm_digtable.CurSTAConnectState = DIG_STA_CONNECT;
+		dm_digtable.cur_sta_connect_state = DIG_STA_CONNECT;
 	else
-		dm_digtable.CurSTAConnectState = DIG_STA_DISCONNECT;
+		dm_digtable.cur_sta_connect_state = DIG_STA_DISCONNECT;
 
 
 	dm_digtable.rssi_val = priv->undecorated_smoothed_pwdb;
@@ -1218,7 +1218,7 @@ static void _rtl92e_dm_ctrl_initgain_byrssi_driver(struct net_device *dev)
 	_rtl92e_dm_cs_ratio(dev);
 	if (dm_digtable.dig_algorithm_switch)
 		dm_digtable.dig_algorithm_switch = 0;
-	dm_digtable.PreSTAConnectState = dm_digtable.CurSTAConnectState;
+	dm_digtable.pre_sta_connect_state = dm_digtable.cur_sta_connect_state;
 
 }
 
@@ -1368,8 +1368,8 @@ static void _rtl92e_dm_initial_gain(struct net_device *dev)
 		return;
 	}
 
-	if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) {
-		if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) {
+	if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) {
+		if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) {
 			long gain_range = dm_digtable.rssi_val + 10 -
 					  dm_digtable.backoff_val;
 			gain_range = clamp_t(long, gain_range,
@@ -1419,8 +1419,8 @@ static void _rtl92e_dm_pd_th(struct net_device *dev)
 		reset_cnt = 0;
 	}
 
-	if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) {
-		if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) {
+	if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) {
+		if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) {
 			if (dm_digtable.rssi_val >=
 			    dm_digtable.rssi_high_power_highthresh)
 				dm_digtable.curpd_thstate =
@@ -1487,8 +1487,8 @@ static void _rtl92e_dm_cs_ratio(struct net_device *dev)
 		reset_cnt = 0;
 	}
 
-	if (dm_digtable.PreSTAConnectState == dm_digtable.CurSTAConnectState) {
-		if (dm_digtable.CurSTAConnectState == DIG_STA_CONNECT) {
+	if (dm_digtable.pre_sta_connect_state == dm_digtable.cur_sta_connect_state) {
+		if (dm_digtable.cur_sta_connect_state == DIG_STA_CONNECT) {
 			if (dm_digtable.rssi_val <= dm_digtable.rssi_low_thresh)
 				dm_digtable.curcs_ratio_state = DIG_CS_RATIO_LOWER;
 			else if (dm_digtable.rssi_val >= dm_digtable.rssi_high_thresh)
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
index 51e295d389a83..89c58e38c1f7d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
@@ -66,8 +66,8 @@ struct dig_t {
 
 	u8		dig_state;
 	u8		dig_highpwr_state;
-	u8		CurSTAConnectState;
-	u8		PreSTAConnectState;
+	u8		cur_sta_connect_state;
+	u8		pre_sta_connect_state;
 
 	u8		curpd_thstate;
 	u8		prepd_thstate;
-- 
GitLab


From 9adc341cdcf63a4343fdb8c45aa46cafcd8dffae Mon Sep 17 00:00:00 2001
From: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Date: Mon, 10 Oct 2022 20:04:57 +0100
Subject: [PATCH 0237/4122] staging: rtl8192e: Rename Op, Length and Value

Rename variable Op to op, Length to length and Value to value to avoid
CamelCase which is not accepted by checkpatch.

Signed-off-by: Dragan Cvetic <dragan.m.cvetic@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/20221010190457.13199-4-dragan.m.cvetic@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 6 +++---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index 0572ce40290e8..c9e495538e2c7 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -628,9 +628,9 @@ static void _rtl92e_dm_tx_power_tracking_callback_tssi(struct net_device *dev)
 
 	for (j = 0; j <= 30; j++) {
 
-		tx_cmd.Op		= TXCMD_SET_TX_PWR_TRACKING;
-		tx_cmd.Length	= 4;
-		tx_cmd.Value		= Value;
+		tx_cmd.op	= TXCMD_SET_TX_PWR_TRACKING;
+		tx_cmd.length	= 4;
+		tx_cmd.value	= Value;
 		rtl92e_send_cmd_pkt(dev, DESC_PACKET_TYPE_NORMAL, (u8 *)&tx_cmd,
 				    sizeof(struct dcmd_txcmd));
 		mdelay(1);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
index 89c58e38c1f7d..1d4d7d98a8599 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.h
@@ -152,9 +152,9 @@ enum dm_cck_rx_path_method {
 
 
 struct dcmd_txcmd {
-	u32	Op;
-	u32	Length;
-	u32	Value;
+	u32	op;
+	u32	length;
+	u32	value;
 };
 /*------------------------------Define structure----------------------------*/
 
-- 
GitLab


From 04a5673391a5c56831f09b11a7735035e4a1ea62 Mon Sep 17 00:00:00 2001
From: Rui Li <me@lirui.org>
Date: Wed, 12 Oct 2022 22:36:33 +0800
Subject: [PATCH 0238/4122] staging: rtl8192e: remove unnecessary braces for
 single statement blocks

This commit cleans up checkpatch warning as follows:
braces {} are not necessary for single statement blocks

Signed-off-by: Rui Li <me@lirui.org>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/166558541522.9.15423282339326993462.68459319@lirui.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 3 +--
 drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 9 +++------
 drivers/staging/rtl8192e/rtllib_softmac_wx.c   | 3 +--
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index c3dcaa27fd2ea..210b7ecc273ab 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1106,9 +1106,8 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 	if (cb_desc->bHwSec) {
 		static u8 tmp;
 
-		if (!tmp) {
+		if (!tmp)
 			tmp = 1;
-		}
 		switch (priv->rtllib->pairwise_key_type) {
 		case KEY_TYPE_WEP40:
 		case KEY_TYPE_WEP104:
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index a44dffa76a39e..58da2dab55bd6 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -522,9 +522,8 @@ static bool _rtl92e_bb_config_para_file(struct net_device *dev)
 		rtStatus  = rtl92e_check_bb_and_rf(dev,
 						   (enum hw90_block)eCheckItem,
 						   (enum rf90_radio_path)0);
-		if (!rtStatus) {
+		if (!rtStatus)
 			return rtStatus;
-		}
 	}
 	rtl92e_set_bb_reg(dev, rFPGA0_RFMOD, bCCKEn|bOFDMEn, 0x0);
 	_rtl92e_phy_config_bb(dev, BaseBand_Config_PHY_REG);
@@ -1378,9 +1377,8 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 					i++;
 				}
 
-				if (i >= MAX_DOZE_WAITING_TIMES_9x) {
+				if (i >= MAX_DOZE_WAITING_TIMES_9x)
 					break;
-				}
 			}
 			rtl92e_set_rf_off(dev);
 			break;
@@ -1397,9 +1395,8 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 					i++;
 				}
 
-				if (i >= MAX_DOZE_WAITING_TIMES_9x) {
+				if (i >= MAX_DOZE_WAITING_TIMES_9x)
 					break;
-				}
 			}
 
 			if (pPSC->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC &&
diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
index f9589c5b62bac..fdf867a5dd7a2 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
@@ -571,9 +571,8 @@ int rtllib_wx_set_power(struct rtllib_device *ieee,
 		ieee->ps = RTLLIB_PS_DISABLED;
 		goto exit;
 	}
-	if (wrqu->power.flags & IW_POWER_TIMEOUT) {
+	if (wrqu->power.flags & IW_POWER_TIMEOUT)
 		ieee->ps_timeout = wrqu->power.value / 1000;
-	}
 
 	if (wrqu->power.flags & IW_POWER_PERIOD)
 		ieee->ps_period = wrqu->power.value / 1000;
-- 
GitLab


From 06b764cf60877bdd6604c173963501e16236ccdf Mon Sep 17 00:00:00 2001
From: Rigel Di Scala <zedr@zedr.com>
Date: Wed, 12 Oct 2022 21:16:12 +0000
Subject: [PATCH 0239/4122] Staging: rtl8192e: fix a brace style issue

Fixed a coding style issue affecting a conditional if statement.

Signed-off-by: Rigel Di Scala <zedr@zedr.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/20221012211612.75871-1-zedr@zedr.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_BAProc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index 19d13b3fcecf7..e932ad1a9e962 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -180,11 +180,10 @@ static void rtllib_send_ADDBAReq(struct rtllib_device *ieee, u8 *dst,
 
 	skb = rtllib_ADDBA(ieee, dst, pBA, 0, ACT_ADDBAREQ);
 
-	if (skb) {
+	if (skb)
 		softmac_mgmt_xmit(skb, ieee);
-	} else {
+	else
 		netdev_dbg(ieee->dev, "Failed to generate ADDBAReq packet.\n");
-	}
 }
 
 static void rtllib_send_ADDBARsp(struct rtllib_device *ieee, u8 *dst,
-- 
GitLab


From c5997186452ae29b535fd6cb65b4f5534b52ea25 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:32 +0200
Subject: [PATCH 0240/4122] staging: r8188eu: replace one GetAddr3Ptr call

Define a struct ieee80211_mgmt for the message that we process in
OnDeAuth. Use this struct to read the bssid.

This patch removes one GetAddr3Ptr call, getting us a tiny step closer to
removing GetAddr3Ptr.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 07905e2ae8e05..0c4b3b99150d7 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1457,6 +1457,7 @@ report_assoc_result:
 
 unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	unsigned short	reason;
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
@@ -1464,8 +1465,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 	u8 *pframe = precv_frame->rx_data;
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 
-	/* check A3 */
-	if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)))
+	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
 		return _SUCCESS;
 
 	if (pwdinfo->rx_invitereq_info.scan_op_ch_only) {
-- 
GitLab


From f54ded554f04cc0e7f5edcc571d9c09581f67312 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:33 +0200
Subject: [PATCH 0241/4122] staging: r8188eu: get reason code from mgmt struct

Read the deauth reson code from the newly added mgmt structure instead of
calculating the offset ourselves.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 0c4b3b99150d7..5c59fc91ecaec 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1473,7 +1473,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		_set_timer(&pwdinfo->reset_ch_sitesurvey, 10);
 	}
 
-	reason = le16_to_cpu(*(__le16 *)(pframe + WLAN_HDR_A3_LEN));
+	reason = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
 		struct sta_info *psta;
-- 
GitLab


From 8ea03e32f51475f10ebf430fece7c3f8b8fa476b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:34 +0200
Subject: [PATCH 0242/4122] staging: r8188eu: clarify the bBusyTraffic
 assignment

bBusyTraffic is set only if we're not in WIFI_AP_STATE, i.e. in the else
branch. If we were not in WIFI_AP_STATE, we'd go into the if branch and
return _SUCCESS before making it to the bBusyTraffic assignment.

Move the assignment into the else branch to make this clearer.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 5c59fc91ecaec..fd2daeca71128 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1515,8 +1515,9 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 
 		if (!ignore_received_deauth)
 			receive_disconnect(padapter, GetAddr3Ptr(pframe), reason);
+
+		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 	}
-	pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 	return _SUCCESS;
 }
 
-- 
GitLab


From 15697b04e42e399f1c74d484f130859df756b1ae Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:35 +0200
Subject: [PATCH 0243/4122] staging: r8188eu: use sa instead of Addr2

For management frames, Addr2 is the Source Address (SA).

Use sa from the mgmt structure and remove the GetAddr2Ptr call.
GetAddr2Ptr is a driver-specific function that we should eventually
remove.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index fd2daeca71128..732ada6ab9325 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1479,7 +1479,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		struct sta_info *psta;
 		struct sta_priv *pstapriv = &padapter->stapriv;
 
-		psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe));
+		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 		if (psta) {
 			u8 updated = 0;
 
-- 
GitLab


From 094fbfbac3570345dbcb946dd2785bad03b680cf Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:36 +0200
Subject: [PATCH 0244/4122] staging: r8188eu: get bssid from mgmt struct

For management frames, Addr3 is the BSSID. Read it from the mgmt
structure instead of calling GetAddr3Ptr.

The pframe variable is now unused and can be removed.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 732ada6ab9325..742976c38cd56 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1462,7 +1462,6 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
-	u8 *pframe = precv_frame->rx_data;
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 
 	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
@@ -1514,7 +1513,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		}
 
 		if (!ignore_received_deauth)
-			receive_disconnect(padapter, GetAddr3Ptr(pframe), reason);
+			receive_disconnect(padapter, mgmt->bssid, reason);
 
 		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 	}
-- 
GitLab


From ece8119070de777c7a1685211c8056a2ce6d3df8 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:37 +0200
Subject: [PATCH 0245/4122] staging: r8188eu: exit for deauth from unknown
 station

If we receive a deauth message from an unknown station, we can drop this
message and exit immediately. Reorder the code to make this clearer, don't
wrap everything in an if statement.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 22 ++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 742976c38cd56..40df0f9982f4a 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1477,21 +1477,21 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 	if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
 		struct sta_info *psta;
 		struct sta_priv *pstapriv = &padapter->stapriv;
+		u8 updated = 0;
 
 		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-		if (psta) {
-			u8 updated = 0;
-
-			spin_lock_bh(&pstapriv->asoc_list_lock);
-			if (!list_empty(&psta->asoc_list)) {
-				list_del_init(&psta->asoc_list);
-				pstapriv->asoc_list_cnt--;
-				updated = ap_free_sta(padapter, psta, false, reason);
-			}
-			spin_unlock_bh(&pstapriv->asoc_list_lock);
+		if (!psta)
+			return _SUCCESS;
 
-			associated_clients_update(padapter, updated);
+		spin_lock_bh(&pstapriv->asoc_list_lock);
+		if (!list_empty(&psta->asoc_list)) {
+			list_del_init(&psta->asoc_list);
+			pstapriv->asoc_list_cnt--;
+			updated = ap_free_sta(padapter, psta, false, reason);
 		}
+		spin_unlock_bh(&pstapriv->asoc_list_lock);
+
+		associated_clients_update(padapter, updated);
 
 		return _SUCCESS;
 	} else {
-- 
GitLab


From 6325d858c48d2fefe1c93da36f74b506d7abf71d Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:38 +0200
Subject: [PATCH 0246/4122] staging: r8188eu: remove unnecessary return

Remove the return statement at the end of the if branch. We can continue
to the final return after the if-else.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 40df0f9982f4a..465f51bce0e30 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1492,8 +1492,6 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		spin_unlock_bh(&pstapriv->asoc_list_lock);
 
 		associated_clients_update(padapter, updated);
-
-		return _SUCCESS;
 	} else {
 		int	ignore_received_deauth = 0;
 
-- 
GitLab


From 000848a511fc77b26864f2d95c8efa853e6bf82f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:39 +0200
Subject: [PATCH 0247/4122] staging: r8188eu: summarize two flags checks

Summarize the two statements to check if either WIFI_FW_AUTH_STATE or
WIFI_FW_ASSOC_STATE is set.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 465f51bce0e30..09ffecc5b2b3a 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1500,8 +1500,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		 *	However, the Win8.1 with BRCM Wi-Fi will send the deauth with reason code 6 to us after receieving our deauth.
 		 *	Added the following code to avoid this case.
 		 */
-		if ((pmlmeinfo->state & WIFI_FW_AUTH_STATE) ||
-		    (pmlmeinfo->state & WIFI_FW_ASSOC_STATE)) {
+		if (pmlmeinfo->state & (WIFI_FW_AUTH_STATE | WIFI_FW_ASSOC_STATE)) {
 			if (reason == WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA) {
 				ignore_received_deauth = 1;
 			} else if (reason == WLAN_REASON_PREV_AUTH_NOT_VALID) {
-- 
GitLab


From e2c532aadd86fd51ebde566e6af74b1eb1be89e0 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:24:40 +0200
Subject: [PATCH 0248/4122] staging: r8188eu: ignore_received_deauth is a
 boolean

The ignore_received_deauth is in fact a boolean variable. Change its type
to bool and use true, false for its values.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Acked-by: Pavel Skripkin <paskripkin@gmail.com>
Link: https://lore.kernel.org/r/20221015152440.232281-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 09ffecc5b2b3a..fda446b6779c3 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1493,7 +1493,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 
 		associated_clients_update(padapter, updated);
 	} else {
-		int	ignore_received_deauth = 0;
+		bool ignore_received_deauth = false;
 
 		/* Before sending the auth frame to start the STA/GC mode connection with AP/GO,
 		 *	we will send the deauth first.
@@ -1502,10 +1502,10 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 		 */
 		if (pmlmeinfo->state & (WIFI_FW_AUTH_STATE | WIFI_FW_ASSOC_STATE)) {
 			if (reason == WLAN_REASON_CLASS2_FRAME_FROM_NONAUTH_STA) {
-				ignore_received_deauth = 1;
+				ignore_received_deauth = true;
 			} else if (reason == WLAN_REASON_PREV_AUTH_NOT_VALID) {
 				// TODO: 802.11r
-				ignore_received_deauth = 1;
+				ignore_received_deauth = true;
 			}
 		}
 
-- 
GitLab


From 92b81816d5f867c2b1017970786e0f50882a33f7 Mon Sep 17 00:00:00 2001
From: Anjandev Momi <anjan@momi.ca>
Date: Fri, 14 Oct 2022 01:18:37 -0700
Subject: [PATCH 0249/4122] Staging: rtl8192e: remove unnecessary parentheses

This patch removes the following CHECK generated by checkpatch.pl:

	./drivers/staging/rtl8192e/rtl819x_BAProc.c:116: CHECK: Unnecessary
	parentheses around pBA->ba_start_seq_ctrl
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:261: CHECK: Unnecessary
	parentheses around '&pTS'
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:346: CHECK: Unnecessary
	parentheses around '&pTS'

Signed-off-by: Anjandev Momi <anjan@momi.ca>
Link: https://lore.kernel.org/r/20221014081839.23902-2-anjan@momi.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_BAProc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index e932ad1a9e962..f208ee1b29555 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -111,7 +111,7 @@ static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst,
 	tag += 2;
 
 	if (type == ACT_ADDBAREQ) {
-		memcpy(tag, (u8 *)&(pBA->ba_start_seq_ctrl), 2);
+		memcpy(tag, (u8 *)&pBA->ba_start_seq_ctrl, 2);
 		tag += 2;
 	}
 
@@ -253,7 +253,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb)
 			    ieee->pHTInfo->bCurrentHTSupport);
 		goto OnADDBAReq_Fail;
 	}
-	if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst,
+	if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst,
 	    (u8)(pBaParamSet->field.tid), RX_DIR, true)) {
 		rc = ADDBA_STATUS_REFUSED;
 		netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__);
@@ -337,7 +337,7 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb)
 		goto OnADDBARsp_Reject;
 	}
 
-	if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst,
+	if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst,
 		   (u8)(pBaParamSet->field.tid), TX_DIR, false)) {
 		netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__);
 		ReasonCode = DELBA_REASON_UNKNOWN_BA;
-- 
GitLab


From a079a4b2c9bc40b00cad01a1babf209acab1eea2 Mon Sep 17 00:00:00 2001
From: Anjandev Momi <anjan@momi.ca>
Date: Fri, 14 Oct 2022 01:18:38 -0700
Subject: [PATCH 0250/4122] Staging: rtl8192e: remove multiple blank lines

This patch removes the following checks generated by checkpatch.pl:

	./drivers/staging/rtl8192e/rtl819x_BAProc.c:164: CHECK: Please
	don't use multiple blank lines
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:383: CHECK: Please
	don't use multiple blank lines

Signed-off-by: Anjandev Momi <anjan@momi.ca>
Link: https://lore.kernel.org/r/20221014081839.23902-3-anjan@momi.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_BAProc.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index f208ee1b29555..94131a5983f1e 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -159,7 +159,6 @@ static struct sk_buff *rtllib_DELBA(struct rtllib_device *ieee, u8 *dst,
 	*tag++ = ACT_CAT_BA;
 	*tag++ = ACT_DELBA;
 
-
 	put_unaligned_le16(DelbaParamSet.short_data, tag);
 	tag += 2;
 
@@ -374,7 +373,6 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb)
 			goto OnADDBARsp_Reject;
 		}
 
-
 		pAdmittedBA->dialog_token = *pDialogToken;
 		pAdmittedBA->ba_timeout_value = *pBaTimeoutVal;
 		pAdmittedBA->ba_start_seq_ctrl = pPendingBA->ba_start_seq_ctrl;
-- 
GitLab


From 2f618d1167b59dc7270be86f89819cf3417c192f Mon Sep 17 00:00:00 2001
From: Anjandev Momi <anjan@momi.ca>
Date: Fri, 14 Oct 2022 01:18:39 -0700
Subject: [PATCH 0251/4122] Staging: rtl8192e: make alignment match open
 parenthesis

This patch removes the following checks generated by checkpatch.pl:

	./drivers/staging/rtl8192e/rtl819x_BAProc.c:261: CHECK:
	Alignment should match open parenthesis
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:284: CHECK:
	Alignment should match open parenthesis
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:421: CHECK:
	Alignment should match open parenthesis
	./drivers/staging/rtl8192e/rtl819x_BAProc.c:441: CHECK:
	Alignment should match open parenthesis

Signed-off-by: Anjandev Momi <anjan@momi.ca>
Link: https://lore.kernel.org/r/20221014081839.23902-4-anjan@momi.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_BAProc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index 94131a5983f1e..a7aa772778d39 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -253,7 +253,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb)
 		goto OnADDBAReq_Fail;
 	}
 	if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst,
-	    (u8)(pBaParamSet->field.tid), RX_DIR, true)) {
+		   (u8)(pBaParamSet->field.tid), RX_DIR, true)) {
 		rc = ADDBA_STATUS_REFUSED;
 		netdev_warn(ieee->dev, "%s(): can't get TS\n", __func__);
 		goto OnADDBAReq_Fail;
@@ -412,7 +412,7 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb)
 	}
 
 	if (!ieee->current_network.qos_data.active ||
-		!ieee->pHTInfo->bCurrentHTSupport) {
+	    !ieee->pHTInfo->bCurrentHTSupport) {
 		netdev_warn(ieee->dev,
 			    "received DELBA while QOS or HT is not supported(%d, %d)\n",
 			    ieee->current_network. qos_data.active,
@@ -432,7 +432,7 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb)
 		struct rx_ts_record *pRxTs;
 
 		if (!GetTs(ieee, (struct ts_common_info **)&pRxTs, dst,
-		    (u8)pDelBaParamSet->field.tid, RX_DIR, false)) {
+			   (u8)pDelBaParamSet->field.tid, RX_DIR, false)) {
 			netdev_warn(ieee->dev,
 				    "%s(): can't get TS for RXTS. dst:%pM TID:%d\n",
 				    __func__, dst,
-- 
GitLab


From 98703e4264afe64cbf2c5a4b6589d6e2a3068833 Mon Sep 17 00:00:00 2001
From: Anjandev Momi <anjan@momi.ca>
Date: Fri, 14 Oct 2022 01:18:40 -0700
Subject: [PATCH 0252/4122] Staging: rtl8192e: add blank line after function
 declaration

This patch removes the following check generated by checkpatch.pl

	./drivers/staging/rtl8192e/rtl819x_BAProc.c:65: CHECK: Please
	use a blank line after function/struct/union/enum declarations

Signed-off-by: Anjandev Momi <anjan@momi.ca>
Link: https://lore.kernel.org/r/20221014081839.23902-5-anjan@momi.ca
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_BAProc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index a7aa772778d39..7c0369319f97f 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -62,6 +62,7 @@ void ResetBaEntry(struct ba_record *pBA)
 	pBA->dialog_token		  = 0;
 	pBA->ba_start_seq_ctrl.short_data = 0;
 }
+
 static struct sk_buff *rtllib_ADDBA(struct rtllib_device *ieee, u8 *Dst,
 				    struct ba_record *pBA,
 				    u16 StatusCode, u8 type)
-- 
GitLab


From 12c6223fc1804fd9295dc50d358294539b4a4184 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:06 +0200
Subject: [PATCH 0253/4122] staging: r8188eu: fix led register settings

Using an InterTech DMG-02 dongle, the led remains on when the system goes
into standby mode. After wakeup, it's no longer possible to control the
led.

It turned out that the register settings to enable or disable the led were
not correct. They worked for some dongles like the Edimax V2 but not for
others like the InterTech DMG-02.

This patch fixes the register settings. Bit 3 in the led_cfg2 register
controls the led status, bit 5 must always be set to be able to control
the led, bit 6 has no influence on the led. Setting the mac_pinmux_cfg
register is not necessary.

These settings were tested with Edimax V2 and InterTech DMG-02.

Cc: stable@vger.kernel.org
Fixes: 8cd574e6af54 ("staging: r8188eu: introduce new hal dir for RTL8188eu driver")
Suggested-by: Michael Straube <straube.linux@gmail.com>
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Michael Straube <straube.linux@gmail.com> # InterTech DMG-02,
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 2527c252c3e95..5b214488571bb 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -31,40 +31,19 @@ static void ResetLedStatus(struct led_priv *pLed)
 
 static void SwLedOn(struct adapter *padapter, struct led_priv *pLed)
 {
-	u8	LedCfg;
-	int res;
-
 	if (padapter->bDriverStopped)
 		return;
 
-	res = rtw_read8(padapter, REG_LEDCFG2, &LedCfg);
-	if (res)
-		return;
-
-	rtw_write8(padapter, REG_LEDCFG2, (LedCfg & 0xf0) | BIT(5) | BIT(6)); /*  SW control led0 on. */
+	rtw_write8(padapter, REG_LEDCFG2, BIT(5)); /*  SW control led0 on. */
 	pLed->bLedOn = true;
 }
 
 static void SwLedOff(struct adapter *padapter, struct led_priv *pLed)
 {
-	u8	LedCfg;
-	int res;
-
 	if (padapter->bDriverStopped)
 		goto exit;
 
-	res = rtw_read8(padapter, REG_LEDCFG2, &LedCfg);/* 0x4E */
-	if (res)
-		goto exit;
-
-	LedCfg &= 0x90; /*  Set to software control. */
-	rtw_write8(padapter, REG_LEDCFG2, (LedCfg | BIT(3)));
-	res = rtw_read8(padapter, REG_MAC_PINMUX_CFG, &LedCfg);
-	if (res)
-		goto exit;
-
-	LedCfg &= 0xFE;
-	rtw_write8(padapter, REG_MAC_PINMUX_CFG, LedCfg);
+	rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3));
 exit:
 	pLed->bLedOn = false;
 }
-- 
GitLab


From e5931b7ba5a9ccfea6e36f9f07eac89091782e9b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:07 +0200
Subject: [PATCH 0254/4122] staging: r8188eu: handle rtw_write8 errors in
 SwLedOn

Check the status returned by rtw_write8. Update bLedOn only if we could
update the REG_LEDCFG2 register.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 5b214488571bb..4f1cad890cae7 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -34,7 +34,9 @@ static void SwLedOn(struct adapter *padapter, struct led_priv *pLed)
 	if (padapter->bDriverStopped)
 		return;
 
-	rtw_write8(padapter, REG_LEDCFG2, BIT(5)); /*  SW control led0 on. */
+	if (rtw_write8(padapter, REG_LEDCFG2, BIT(5)) != _SUCCESS)
+		return;
+
 	pLed->bLedOn = true;
 }
 
-- 
GitLab


From ef032c4fa43655715a39378d23cf3cc943cfdafa Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:08 +0200
Subject: [PATCH 0255/4122] staging: r8188eu: fix status updates in SwLedOff

Update bLedOn only if we could update the REG_LEDCFG2 register.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 4f1cad890cae7..38433296d327f 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -43,10 +43,11 @@ static void SwLedOn(struct adapter *padapter, struct led_priv *pLed)
 static void SwLedOff(struct adapter *padapter, struct led_priv *pLed)
 {
 	if (padapter->bDriverStopped)
-		goto exit;
+		return;
+
+	if (rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3)) != _SUCCESS)
+		return;
 
-	rtw_write8(padapter, REG_LEDCFG2, BIT(5) | BIT(3));
-exit:
 	pLed->bLedOn = false;
 }
 
-- 
GitLab


From c16a98833a9979c15febdc60a3ab5d00c72c1b5e Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:09 +0200
Subject: [PATCH 0256/4122] staging: r8188eu: SwLedOn needs no padapter
 parameter

Remove the padapter parameter from the SwLedOn function. padapter can be
derived from the pLed parameter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 38433296d327f..aa8f41edfade8 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -29,8 +29,10 @@ static void ResetLedStatus(struct led_priv *pLed)
 	pLed->bLedScanBlinkInProgress = false;
 }
 
-static void SwLedOn(struct adapter *padapter, struct led_priv *pLed)
+static void SwLedOn(struct led_priv *pLed)
 {
+	struct adapter *padapter = container_of(pLed, struct adapter, ledpriv);
+
 	if (padapter->bDriverStopped)
 		return;
 
@@ -67,7 +69,7 @@ static void blink_work(struct work_struct *work)
 	if (pLed->bLedOn)
 		SwLedOff(padapter, pLed);
 	else
-		SwLedOn(padapter, pLed);
+		SwLedOn(pLed);
 
 	switch (pLed->CurrLedState) {
 	case LED_BLINK_SLOWLY:
-- 
GitLab


From 728a14bf8f1e095f6edddcf15f70c9307ec1f0ca Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:10 +0200
Subject: [PATCH 0257/4122] staging: r8188eu: SwLedOff needs no padapter
 parameter

Remove the padapter parameter from the SwLedOff function. padapter can
be derived from the pLed parameter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index aa8f41edfade8..56f043d8ff38e 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -42,8 +42,10 @@ static void SwLedOn(struct led_priv *pLed)
 	pLed->bLedOn = true;
 }
 
-static void SwLedOff(struct adapter *padapter, struct led_priv *pLed)
+static void SwLedOff(struct led_priv *pLed)
 {
+	struct adapter *padapter = container_of(pLed, struct adapter, ledpriv);
+
 	if (padapter->bDriverStopped)
 		return;
 
@@ -61,13 +63,13 @@ static void blink_work(struct work_struct *work)
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 
 	if (padapter->pwrctrlpriv.rf_pwrstate != rf_on) {
-		SwLedOff(padapter, pLed);
+		SwLedOff(pLed);
 		ResetLedStatus(pLed);
 		return;
 	}
 
 	if (pLed->bLedOn)
-		SwLedOff(padapter, pLed);
+		SwLedOff(pLed);
 	else
 		SwLedOn(pLed);
 
@@ -141,7 +143,7 @@ void rtl8188eu_DeInitSwLeds(struct adapter *padapter)
 
 	cancel_delayed_work_sync(&ledpriv->blink_work);
 	ResetLedStatus(ledpriv);
-	SwLedOff(padapter, ledpriv);
+	SwLedOff(ledpriv);
 }
 
 void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
@@ -258,7 +260,7 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 		pLed->bLedWPSBlinkInProgress = false;
 		pLed->bLedScanBlinkInProgress = false;
 		cancel_delayed_work(&pLed->blink_work);
-		SwLedOff(padapter, pLed);
+		SwLedOff(pLed);
 		break;
 	default:
 		break;
-- 
GitLab


From 4fc4de550eb0fb47514ee0f37f9a0abd4d4bf2c9 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:11 +0200
Subject: [PATCH 0258/4122] staging: r8188eu: remove two unused defines

The C2H_MEM_SZ and FREE_CMDOBJ_SZ defines are not used by the r8188eu
driver. Remove them.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_cmd.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 8bbfeb42604fa..0cb054909bc65 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -8,13 +8,9 @@
 #include "rtw_rf.h"
 #include "rtw_led.h"
 
-#define C2H_MEM_SZ (16*1024)
-
 #include "osdep_service.h"
 #include "ieee80211.h" /*  <ieee80211/ieee80211.h> */
 
-#define FREE_CMDOBJ_SZ	128
-
 #define MAX_CMDSZ	1024
 #define MAX_RSPSZ	512
 #define MAX_EVTSZ	1024
-- 
GitLab


From 5ecf2bb6b1f9659e79e99465c327bb97234bfdaf Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:12 +0200
Subject: [PATCH 0259/4122] staging: r8188eu: don't include rtw_led.h from
 rtw_cmd.h

The rtw_cmd.h does not need any definitions from the led layer, there's
no reason to include rtw_led.h.

When I tried to remove this component

struct led_priv {
   struct adapter *padapter;
   ...

I saw compiler errors because of this chain of include files:
drv_types.h -> rtw_cmd.h -> rtw_led.h

rtw_led.h uses struct adapter before it sees the definiton near the end
of drv_types.h. (It seems that a simple struct adapter * prevents this
problem.)

The best option for fixing this issue is to not include rtw_led.h in
rtw_cmd.h.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_cmd.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 0cb054909bc65..ee9218b1d7a94 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -6,7 +6,6 @@
 
 #include "wlan_bssdef.h"
 #include "rtw_rf.h"
-#include "rtw_led.h"
 
 #include "osdep_service.h"
 #include "ieee80211.h" /*  <ieee80211/ieee80211.h> */
-- 
GitLab


From 1188cfa646ba2a7c295fe0cfac24529aec28ff2f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:13 +0200
Subject: [PATCH 0260/4122] staging: r8188eu: remove padapter from struct
 led_priv

The only struct led_priv that's used in the r8188eu driver in embedded in
the driver's global struct adapter. We can use container_of to access the
"outer" structure, there's no need to store a pointer to it.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c    | 3 +--
 drivers/staging/r8188eu/include/rtw_led.h | 2 --
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 56f043d8ff38e..2dbd7b5ffdd03 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -59,7 +59,7 @@ static void blink_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct led_priv *pLed = container_of(dwork, struct led_priv, blink_work);
-	struct adapter *padapter = pLed->padapter;
+	struct adapter *padapter = container_of(pLed, struct adapter, ledpriv);
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 
 	if (padapter->pwrctrlpriv.rf_pwrstate != rf_on) {
@@ -132,7 +132,6 @@ void rtl8188eu_InitSwLeds(struct adapter *padapter)
 {
 	struct led_priv *pledpriv = &padapter->ledpriv;
 
-	pledpriv->padapter = padapter;
 	ResetLedStatus(pledpriv);
 	INIT_DELAYED_WORK(&pledpriv->blink_work, blink_work);
 }
diff --git a/drivers/staging/r8188eu/include/rtw_led.h b/drivers/staging/r8188eu/include/rtw_led.h
index f57dcf6c8b24e..ea5f5edd90131 100644
--- a/drivers/staging/r8188eu/include/rtw_led.h
+++ b/drivers/staging/r8188eu/include/rtw_led.h
@@ -33,8 +33,6 @@ enum LED_STATE_871x {
 };
 
 struct led_priv {
-	struct adapter *padapter;
-
 	bool bRegUseLed;
 
 	enum LED_STATE_871x	CurrLedState; /*  Current LED state. */
-- 
GitLab


From 126647d113dc6adf06cc4997ef8026f1972c34f7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:14 +0200
Subject: [PATCH 0261/4122] staging: r8188eu: set two more state variables

Set two more state variables in the blink worker when scan blinking and
tx/rx blinking are finished.

bLedBlinkInProgress is true during tx/rx blinking, bLedScanBlinkInProgress
is true during scan blinking. If we doing neither of the two, we may
safely set both variables to false.

This change makes the scan and tx/rx cases almost identical, we are now
ready to summarize the two cases.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index 2dbd7b5ffdd03..f8bd183fba1e5 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -90,6 +90,7 @@ static void blink_work(struct work_struct *work)
 				pLed->CurrLedState = LED_BLINK_SLOWLY;
 				schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL);
 			}
+			pLed->bLedBlinkInProgress = false;
 			pLed->bLedScanBlinkInProgress = false;
 		} else {
 			schedule_delayed_work(&pLed->blink_work, LED_BLINK_SCAN_INTVL);
@@ -106,6 +107,7 @@ static void blink_work(struct work_struct *work)
 				schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL);
 			}
 			pLed->bLedBlinkInProgress = false;
+			pLed->bLedScanBlinkInProgress = false;
 		} else {
 			schedule_delayed_work(&pLed->blink_work, LED_BLINK_FASTER_INTVL);
 		}
-- 
GitLab


From e1445e7b003b2b2eace1145f58670200ddadd100 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 15 Oct 2022 17:11:15 +0200
Subject: [PATCH 0262/4122] staging: r8188eu: summarize tx/rx and scan blinking

Summarize the code for tx/rx blinking and for scan blinking in blink_work.
The only difference is the delay for scheduling the next worker.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221015151115.232095-11-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index f8bd183fba1e5..ce8de2eb7845f 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -81,21 +81,6 @@ static void blink_work(struct work_struct *work)
 		schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
 		break;
 	case LED_BLINK_SCAN:
-		pLed->BlinkTimes--;
-		if (pLed->BlinkTimes == 0) {
-			if (check_fwstate(pmlmepriv, _FW_LINKED)) {
-				pLed->CurrLedState = LED_BLINK_NORMAL;
-				schedule_delayed_work(&pLed->blink_work, LED_BLINK_LINK_INTVL);
-			} else {
-				pLed->CurrLedState = LED_BLINK_SLOWLY;
-				schedule_delayed_work(&pLed->blink_work, LED_BLINK_NO_LINK_INTVL);
-			}
-			pLed->bLedBlinkInProgress = false;
-			pLed->bLedScanBlinkInProgress = false;
-		} else {
-			schedule_delayed_work(&pLed->blink_work, LED_BLINK_SCAN_INTVL);
-		}
-		break;
 	case LED_BLINK_TXRX:
 		pLed->BlinkTimes--;
 		if (pLed->BlinkTimes == 0) {
@@ -109,7 +94,9 @@ static void blink_work(struct work_struct *work)
 			pLed->bLedBlinkInProgress = false;
 			pLed->bLedScanBlinkInProgress = false;
 		} else {
-			schedule_delayed_work(&pLed->blink_work, LED_BLINK_FASTER_INTVL);
+			schedule_delayed_work(&pLed->blink_work,
+					      pLed->CurrLedState == LED_BLINK_SCAN ?
+					      LED_BLINK_SCAN_INTVL : LED_BLINK_FASTER_INTVL);
 		}
 		break;
 	case LED_BLINK_WPS:
-- 
GitLab


From 31760f04cae2019b6eb4617af4409d8eaf0ac7ac Mon Sep 17 00:00:00 2001
From: Danijel Korent <danijel.korent@gmail.com>
Date: Sat, 15 Oct 2022 18:50:23 +0200
Subject: [PATCH 0263/4122] staging: rtl8192e: Added spaces around operators in
 rtl_cam.c/rtl_eeprom.c

Fixed "spaces preferred around operator" type of problems reported by
checkpatch

Signed-off-by: Danijel Korent <danijel.korent@gmail.com>
Link: https://lore.kernel.org/r/20221015165023.487200-1-danijel.korent@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_cam.c   | 23 +++++++++----------
 .../staging/rtl8192e/rtl8192e/rtl_eeprom.c    |  2 +-
 2 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
index 8c3ce6cc2541a..9d8d4837e6b20 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
@@ -17,7 +17,7 @@ void rtl92e_cam_reset(struct net_device *dev)
 {
 	u32 ulcommand = 0;
 
-	ulcommand |= BIT31|BIT30;
+	ulcommand |= BIT31 | BIT30;
 	rtl92e_writel(dev, RWCAM, ulcommand);
 }
 
@@ -40,7 +40,6 @@ void rtl92e_enable_hw_security_config(struct net_device *dev)
 		SECR_value |= SCR_TxUseDK;
 	}
 
-
 	ieee->hwsec_active = 1;
 	if ((ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) {
 		ieee->hwsec_active = 0;
@@ -98,33 +97,33 @@ void rtl92e_set_key(struct net_device *dev, u8 EntryNo, u8 KeyIndex,
 	}
 
 	if (DefaultKey)
-		usConfig |= BIT15 | (KeyType<<2);
+		usConfig |= BIT15 | (KeyType << 2);
 	else
-		usConfig |= BIT15 | (KeyType<<2) | KeyIndex;
+		usConfig |= BIT15 | (KeyType << 2) | KeyIndex;
 
 
 	for (i = 0; i < CAM_CONTENT_COUNT; i++) {
 		TargetCommand  = i + CAM_CONTENT_COUNT * EntryNo;
-		TargetCommand |= BIT31|BIT16;
+		TargetCommand |= BIT31 | BIT16;
 
 		if (i == 0) {
-			TargetContent = (u32)(*(MacAddr+0)) << 16 |
-				(u32)(*(MacAddr+1)) << 24 |
+			TargetContent = (u32)(*(MacAddr + 0)) << 16 |
+				(u32)(*(MacAddr + 1)) << 24 |
 				(u32)usConfig;
 
 			rtl92e_writel(dev, WCAMI, TargetContent);
 			rtl92e_writel(dev, RWCAM, TargetCommand);
 		} else if (i == 1) {
-			TargetContent = (u32)(*(MacAddr+2)) |
-				(u32)(*(MacAddr+3)) <<  8 |
-				(u32)(*(MacAddr+4)) << 16 |
-				(u32)(*(MacAddr+5)) << 24;
+			TargetContent = (u32)(*(MacAddr + 2)) |
+				(u32)(*(MacAddr + 3)) <<  8 |
+				(u32)(*(MacAddr + 4)) << 16 |
+				(u32)(*(MacAddr + 5)) << 24;
 			rtl92e_writel(dev, WCAMI, TargetContent);
 			rtl92e_writel(dev, RWCAM, TargetCommand);
 		} else {
 			if (KeyContent != NULL) {
 				rtl92e_writel(dev, WCAMI,
-					      (u32)(*(KeyContent+i-2)));
+					      (u32)(*(KeyContent + i - 2)));
 				rtl92e_writel(dev, RWCAM, TargetCommand);
 				udelay(100);
 			}
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c b/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c
index 59532ed2156d3..db57c655c6953 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_eeprom.c
@@ -79,6 +79,6 @@ u32 rtl92e_eeprom_read(struct net_device *dev, u32 addr)
 		ret = _rtl92e_eeprom_xfer(dev, (addr & 0x3F) | (0x6 << 6), 9);
 
 	rtl92e_writeb(dev, EPROM_CMD,
-		      (EPROM_CMD_NORMAL<<EPROM_CMD_OPERATING_MODE_SHIFT));
+		      (EPROM_CMD_NORMAL << EPROM_CMD_OPERATING_MODE_SHIFT));
 	return ret;
 }
-- 
GitLab


From 404971602898ab3517b077be08f34eda4af0a482 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 20 Oct 2022 14:16:09 +0100
Subject: [PATCH 0264/4122] staging: octeon: remove redundant variable
 total_freed

The variable total_freed is accumulating skb_to_free however it is not
being used after this. The use of total_freed is redundant and hence
the variable can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221020131609.1546667-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/octeon/ethernet-tx.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/octeon/ethernet-tx.c b/drivers/staging/octeon/ethernet-tx.c
index a36e36701c743..bbf33b88bb7c2 100644
--- a/drivers/staging/octeon/ethernet-tx.c
+++ b/drivers/staging/octeon/ethernet-tx.c
@@ -73,7 +73,6 @@ static void cvm_oct_free_tx_skbs(struct net_device *dev)
 {
 	int skb_to_free;
 	int qos, queues_per_port;
-	int total_freed = 0;
 	int total_remaining = 0;
 	unsigned long flags;
 	struct octeon_ethernet *priv = netdev_priv(dev);
@@ -87,7 +86,6 @@ static void cvm_oct_free_tx_skbs(struct net_device *dev)
 						       MAX_SKB_TO_FREE);
 		skb_to_free = cvm_oct_adjust_skb_to_free(skb_to_free,
 							 priv->fau + qos * 4);
-		total_freed += skb_to_free;
 		if (skb_to_free > 0) {
 			struct sk_buff *to_free_list = NULL;
 
-- 
GitLab


From 45e6319bd5f2154d8b8c9f1eaa4ac030ba0d330c Mon Sep 17 00:00:00 2001
From: Zhiqi Song <songzhiqi1@huawei.com>
Date: Sat, 24 Sep 2022 15:38:31 +0800
Subject: [PATCH 0265/4122] crypto: hisilicon/hpre - fix resource leak in
 remove process

In hpre_remove(), when the disable operation of qm sriov failed,
the following logic should continue to be executed to release the
remaining resources that have been allocated, instead of returning
directly, otherwise there will be resource leakage.

Signed-off-by: Zhiqi Song <songzhiqi1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 471e5ca720f57..baf1faec7046f 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -1437,18 +1437,12 @@ err_with_qm_init:
 static void hpre_remove(struct pci_dev *pdev)
 {
 	struct hisi_qm *qm = pci_get_drvdata(pdev);
-	int ret;
 
 	hisi_qm_pm_uninit(qm);
 	hisi_qm_wait_task_finish(qm, &hpre_devices);
 	hisi_qm_alg_unregister(qm, &hpre_devices);
-	if (qm->fun_type == QM_HW_PF && qm->vfs_num) {
-		ret = hisi_qm_sriov_disable(pdev, true);
-		if (ret) {
-			pci_err(pdev, "Disable SRIOV fail!\n");
-			return;
-		}
-	}
+	if (qm->fun_type == QM_HW_PF && qm->vfs_num)
+		hisi_qm_sriov_disable(pdev, true);
 
 	hpre_debugfs_exit(qm);
 	hisi_qm_stop(qm, QM_NORMAL);
-- 
GitLab


From 7001141d34e550854425afa76e960513cf150a62 Mon Sep 17 00:00:00 2001
From: Yicong Yang <yangyicong@hisilicon.com>
Date: Sat, 24 Sep 2022 17:34:24 +0800
Subject: [PATCH 0266/4122] crypto: hisilicon/qm - drop unnecessary
 IS_ENABLE(CONFIG_NUMA) check

dev_to_node() can handle the case when CONFIG_NUMA is not set, so the
check of CONFIG_NUMA is redundant and can be removed.

Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 8b387de69d229..9a38e170fb1d1 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4277,16 +4277,14 @@ static int hisi_qm_sort_devices(int node, struct list_head *head,
 	struct hisi_qm *qm;
 	struct list_head *n;
 	struct device *dev;
-	int dev_node = 0;
+	int dev_node;
 
 	list_for_each_entry(qm, &qm_list->list, list) {
 		dev = &qm->pdev->dev;
 
-		if (IS_ENABLED(CONFIG_NUMA)) {
-			dev_node = dev_to_node(dev);
-			if (dev_node < 0)
-				dev_node = 0;
-		}
+		dev_node = dev_to_node(dev);
+		if (dev_node < 0)
+			dev_node = 0;
 
 		res = kzalloc(sizeof(*res), GFP_KERNEL);
 		if (!res)
-- 
GitLab


From f57e292897cac13b6ddee078aea21173b234ecb7 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 24 Sep 2022 18:14:42 +0800
Subject: [PATCH 0267/4122] crypto: hisilicon/qm - fix incorrect parameters
 usage

In qm_get_xqc_depth(), parameters low_bits and high_bits save
the values of the corresponding bits. However, the values saved by the
two parameters are opposite. As a result, the values returned to the
callers are incorrect.

Fixes: 129a9f340172 ("crypto: hisilicon/qm - get qp num and depth from hardware registers")
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 9a38e170fb1d1..01c083e2c4bd3 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -909,8 +909,8 @@ static void qm_get_xqc_depth(struct hisi_qm *qm, u16 *low_bits,
 	u32 depth;
 
 	depth = hisi_qm_get_hw_info(qm, qm_basic_info, type, qm->cap_ver);
-	*high_bits = depth & QM_XQ_DEPTH_MASK;
-	*low_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK;
+	*low_bits = depth & QM_XQ_DEPTH_MASK;
+	*high_bits = (depth >> QM_XQ_DEPTH_SHIFT) & QM_XQ_DEPTH_MASK;
 }
 
 static u32 qm_get_irq_num(struct hisi_qm *qm)
-- 
GitLab


From 94adb03fd58bbe355e3d7a9d0f701889313e4a51 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 24 Sep 2022 18:34:45 +0800
Subject: [PATCH 0268/4122] crypto: hisilicon/sec - enabling clock gating of
 the address prefetch module

Change the value of clock gating register to 0x7fff to enable
clock gating of the address prefetch module. When the device is
idle, the clock is turned off to save power.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 3705412bac5f1..6eb8a16ba0a70 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -55,7 +55,7 @@
 #define SEC_CONTROL_REG		0x301200
 #define SEC_DYNAMIC_GATE_REG		0x30121c
 #define SEC_CORE_AUTO_GATE		0x30212c
-#define SEC_DYNAMIC_GATE_EN		0x7bff
+#define SEC_DYNAMIC_GATE_EN		0x7fff
 #define SEC_CORE_AUTO_GATE_EN		GENMASK(3, 0)
 #define SEC_CLK_GATE_ENABLE		BIT(3)
 #define SEC_CLK_GATE_DISABLE		(~BIT(3))
-- 
GitLab


From ee1537fe3dd89860d0336563891f6cac707d0cb5 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 24 Sep 2022 19:04:31 +0800
Subject: [PATCH 0269/4122] crypto: hisilicon/qm - re-enable communicate
 interrupt before notifying PF

After the device is reset, the VF needs to re-enable communication
interrupt before the VF sends restart complete message to the PF.
If the interrupt is re-enabled after the VF notifies the PF, the PF
may fail to send messages to the VF after receiving VF's restart
complete message.

Fixes: 760fe22cf5e9 ("crypto: hisilicon/qm - update reset flow")
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 01c083e2c4bd3..e3edb176d9765 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -5723,6 +5723,7 @@ static void qm_pf_reset_vf_done(struct hisi_qm *qm)
 		cmd = QM_VF_START_FAIL;
 	}
 
+	qm_cmd_init(qm);
 	ret = qm_ping_pf(qm, cmd);
 	if (ret)
 		dev_warn(&pdev->dev, "PF responds timeout in reset done!\n");
@@ -5784,7 +5785,6 @@ static void qm_pf_reset_vf_process(struct hisi_qm *qm,
 		goto err_get_status;
 
 	qm_pf_reset_vf_done(qm);
-	qm_cmd_init(qm);
 
 	dev_info(dev, "device reset done.\n");
 
-- 
GitLab


From ad981647dbe1ea91071b9783dd62d74e22c6d955 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Mon, 26 Sep 2022 17:14:21 +0800
Subject: [PATCH 0270/4122] crypto: ccm - use local variables instead of
 indirect references

The variable odata has been introduced into the function scope as
a variable and should be used directly.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/ccm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/ccm.c b/crypto/ccm.c
index 6b815ece51c6a..30dbae72728f7 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -218,7 +218,7 @@ static int crypto_ccm_auth(struct aead_request *req, struct scatterlist *plain,
 		cryptlen += ilen;
 	}
 
-	ahash_request_set_crypt(ahreq, plain, pctx->odata, cryptlen);
+	ahash_request_set_crypt(ahreq, plain, odata, cryptlen);
 	err = crypto_ahash_finup(ahreq);
 out:
 	return err;
-- 
GitLab


From f30fe6314698d107edbb9db50bc3c3443a30ec80 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Mon, 26 Sep 2022 17:14:40 +0800
Subject: [PATCH 0271/4122] crypto: scatterwalk - remove duplicate function
 declarations

scatterwalk_map() is an inline function already defined in the
header file, it is necessary to delete the re-declaration at the
same location, which was left out in the header file by an
earlier modification.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/scatterwalk.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/crypto/scatterwalk.h b/include/crypto/scatterwalk.h
index ccdb05f68a75c..f2c42b4111b1b 100644
--- a/include/crypto/scatterwalk.h
+++ b/include/crypto/scatterwalk.h
@@ -93,7 +93,6 @@ static inline void scatterwalk_done(struct scatter_walk *walk, int out,
 
 void scatterwalk_copychunks(void *buf, struct scatter_walk *walk,
 			    size_t nbytes, int out);
-void *scatterwalk_map(struct scatter_walk *walk);
 
 void scatterwalk_map_and_copy(void *buf, struct scatterlist *sg,
 			      unsigned int start, unsigned int nbytes, int out);
-- 
GitLab


From 237f9eceb2f3c888c1a26a4209243607d3cc0c7c Mon Sep 17 00:00:00 2001
From: ruanjinjie <ruanjinjie@huawei.com>
Date: Mon, 26 Sep 2022 17:27:11 +0800
Subject: [PATCH 0272/4122] crypto: ccp - Add __init/__exit annotations to
 module init/exit funcs

Add missing __init/__exit annotations to module init/exit funcs

Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
Acked-by: John Allen <john.allen@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 5976530c00a8a..3321810273053 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -400,7 +400,7 @@ static void ccp_unregister_algs(void)
 	}
 }
 
-static int ccp_crypto_init(void)
+static int __init ccp_crypto_init(void)
 {
 	int ret;
 
@@ -421,7 +421,7 @@ static int ccp_crypto_init(void)
 	return ret;
 }
 
-static void ccp_crypto_exit(void)
+static void __exit ccp_crypto_exit(void)
 {
 	ccp_unregister_algs();
 }
-- 
GitLab


From 224f3a050e495a7c3c1bcee2c613d0996bc661dc Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Mon, 26 Sep 2022 16:45:45 -0500
Subject: [PATCH 0273/4122] crypto: talitos - Replace zero-length arrays with
 DECLARE_FLEX_ARRAY() helper

Zero-length arrays are deprecated and we are moving towards adopting
C99 flexible-array members, instead. So, replace zero-length arrays
declarations in anonymous union with the new DECLARE_FLEX_ARRAY()
helper macro.

This helper allows for flexible-array members in unions.

Link: https://github.com/KSPP/linux/issues/193
Link: https://github.com/KSPP/linux/issues/216
Link: https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/talitos.h b/drivers/crypto/talitos.h
index 32825119e8805..1a93ee355929d 100644
--- a/drivers/crypto/talitos.h
+++ b/drivers/crypto/talitos.h
@@ -65,8 +65,8 @@ struct talitos_edesc {
 	dma_addr_t dma_link_tbl;
 	struct talitos_desc desc;
 	union {
-		struct talitos_ptr link_tbl[0];
-		u8 buf[0];
+		DECLARE_FLEX_ARRAY(struct talitos_ptr, link_tbl);
+		DECLARE_FLEX_ARRAY(u8, buf);
 	};
 };
 
-- 
GitLab


From 22044d9b04b593831d8e16ba7aafabf4e75964f5 Mon Sep 17 00:00:00 2001
From: Peter Harliman Liem <pliem@maxlinear.com>
Date: Tue, 27 Sep 2022 11:10:08 +0800
Subject: [PATCH 0274/4122] crypto: inside-secure - Expand soc data structure

Currently platform data is assigned directly to
version string(instead of struct). To make it more
scalable, we move it to use data struct instead.
This allows customization for individual platforms other
than version string.

Signed-off-by: Peter Harliman Liem <pliem@maxlinear.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel.c | 44 +++++++++++++++++--------
 drivers/crypto/inside-secure/safexcel.h |  6 +++-
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index ad0d8c4a71ac1..8f4872470529c 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -410,10 +410,10 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
 	int i, j, ret = 0, pe;
 	int ipuesz, ifppsz, minifw = 0;
 
-	if (priv->version == EIP197D_MRVL)
+	if (priv->data->version == EIP197D_MRVL)
 		dir = "eip197d";
-	else if (priv->version == EIP197B_MRVL ||
-		 priv->version == EIP197_DEVBRD)
+	else if (priv->data->version == EIP197B_MRVL ||
+		 priv->data->version == EIP197_DEVBRD)
 		dir = "eip197b";
 	else
 		return -ENODEV;
@@ -423,7 +423,7 @@ retry_fw:
 		snprintf(fw_path, 37, "inside-secure/%s/%s", dir, fw_name[i]);
 		ret = firmware_request_nowarn(&fw[i], fw_path, priv->dev);
 		if (ret) {
-			if (minifw || priv->version != EIP197B_MRVL)
+			if (minifw || priv->data->version != EIP197B_MRVL)
 				goto release_fw;
 
 			/* Fallback to the old firmware location for the
@@ -1597,7 +1597,7 @@ static int safexcel_probe_generic(void *pdev,
 
 	safexcel_configure(priv);
 
-	if (IS_ENABLED(CONFIG_PCI) && priv->version == EIP197_DEVBRD) {
+	if (IS_ENABLED(CONFIG_PCI) && priv->data->version == EIP197_DEVBRD) {
 		/*
 		 * Request MSI vectors for global + 1 per ring -
 		 * or just 1 for older dev images
@@ -1731,7 +1731,7 @@ static int safexcel_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	priv->dev = dev;
-	priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev);
+	priv->data = (struct safexcel_priv_data *)of_device_get_match_data(dev);
 
 	platform_set_drvdata(pdev, priv);
 
@@ -1806,27 +1806,43 @@ static int safexcel_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct safexcel_priv_data eip97ies_mrvl_data = {
+	.version = EIP97IES_MRVL,
+};
+
+static const struct safexcel_priv_data eip197b_mrvl_data = {
+	.version = EIP197B_MRVL,
+};
+
+static const struct safexcel_priv_data eip197d_mrvl_data = {
+	.version = EIP197D_MRVL,
+};
+
+static const struct safexcel_priv_data eip197_devbrd_data = {
+	.version = EIP197_DEVBRD,
+};
+
 static const struct of_device_id safexcel_of_match_table[] = {
 	{
 		.compatible = "inside-secure,safexcel-eip97ies",
-		.data = (void *)EIP97IES_MRVL,
+		.data = &eip97ies_mrvl_data,
 	},
 	{
 		.compatible = "inside-secure,safexcel-eip197b",
-		.data = (void *)EIP197B_MRVL,
+		.data = &eip197b_mrvl_data,
 	},
 	{
 		.compatible = "inside-secure,safexcel-eip197d",
-		.data = (void *)EIP197D_MRVL,
+		.data = &eip197d_mrvl_data,
 	},
 	/* For backward compatibility and intended for generic use */
 	{
 		.compatible = "inside-secure,safexcel-eip97",
-		.data = (void *)EIP97IES_MRVL,
+		.data = &eip97ies_mrvl_data,
 	},
 	{
 		.compatible = "inside-secure,safexcel-eip197",
-		.data = (void *)EIP197B_MRVL,
+		.data = &eip197b_mrvl_data,
 	},
 	{},
 };
@@ -1862,7 +1878,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	priv->dev = dev;
-	priv->version = (enum safexcel_eip_version)ent->driver_data;
+	priv->data = (struct safexcel_priv_data *)ent->driver_data;
 
 	pci_set_drvdata(pdev, priv);
 
@@ -1881,7 +1897,7 @@ static int safexcel_pci_probe(struct pci_dev *pdev,
 	}
 	priv->base = pcim_iomap_table(pdev)[0];
 
-	if (priv->version == EIP197_DEVBRD) {
+	if (priv->data->version == EIP197_DEVBRD) {
 		dev_dbg(dev, "Device identified as FPGA based development board - applying HW reset\n");
 
 		rc = pcim_iomap_regions(pdev, 4, "crypto_safexcel");
@@ -1949,7 +1965,7 @@ static const struct pci_device_id safexcel_pci_ids[] = {
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_XILINX, 0x9038,
 			       0x16ae, 0xc522),
-		.driver_data = EIP197_DEVBRD,
+		.driver_data = (kernel_ulong_t)&eip197_devbrd_data,
 	},
 	{},
 };
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index 797ff91512e0d..e8da8b30a3923 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -733,6 +733,10 @@ enum safexcel_eip_version {
 	EIP197_DEVBRD
 };
 
+struct safexcel_priv_data {
+	enum safexcel_eip_version version;
+};
+
 /* Priority we use for advertising our algorithms */
 #define SAFEXCEL_CRA_PRIORITY		300
 
@@ -815,7 +819,7 @@ struct safexcel_crypto_priv {
 	struct clk *reg_clk;
 	struct safexcel_config config;
 
-	enum safexcel_eip_version version;
+	struct safexcel_priv_data *data;
 	struct safexcel_register_offsets offsets;
 	struct safexcel_hwconfig hwconfig;
 	u32 flags;
-- 
GitLab


From 594ed3d245d3e2d0760f30724e02ecf1604b2c01 Mon Sep 17 00:00:00 2001
From: Peter Harliman Liem <pliem@maxlinear.com>
Date: Tue, 27 Sep 2022 11:10:09 +0800
Subject: [PATCH 0275/4122] crypto: inside-secure - Add fw_little_endian option

This is to add fw_little_endian option, which can
be used for platform which firmware is using little-endian
(instead of big-endian).

Signed-off-by: Peter Harliman Liem <pliem@maxlinear.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel.c | 14 ++++++++++----
 drivers/crypto/inside-secure/safexcel.h |  1 +
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 8f4872470529c..4d6d64ff9a0f2 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -316,14 +316,20 @@ static void eip197_init_firmware(struct safexcel_crypto_priv *priv)
 static int eip197_write_firmware(struct safexcel_crypto_priv *priv,
 				  const struct firmware *fw)
 {
-	const __be32 *data = (const __be32 *)fw->data;
+	u32 val;
 	int i;
 
 	/* Write the firmware */
-	for (i = 0; i < fw->size / sizeof(u32); i++)
-		writel(be32_to_cpu(data[i]),
+	for (i = 0; i < fw->size / sizeof(u32); i++) {
+		if (priv->data->fw_little_endian)
+			val = le32_to_cpu(((const __le32 *)fw->data)[i]);
+		else
+			val = be32_to_cpu(((const __be32 *)fw->data)[i]);
+
+		writel(val,
 		       priv->base + EIP197_CLASSIFICATION_RAMS +
-		       i * sizeof(__be32));
+		       i * sizeof(val));
+	}
 
 	/* Exclude final 2 NOPs from size */
 	return i - EIP197_FW_TERMINAL_NOPS;
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index e8da8b30a3923..f049293870b48 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -735,6 +735,7 @@ enum safexcel_eip_version {
 
 struct safexcel_priv_data {
 	enum safexcel_eip_version version;
+	bool fw_little_endian;
 };
 
 /* Priority we use for advertising our algorithms */
-- 
GitLab


From 36dd88b1c09c78f993cb11dcc5f4211d78a10e5f Mon Sep 17 00:00:00 2001
From: Peter Harliman Liem <pliem@maxlinear.com>
Date: Tue, 27 Sep 2022 11:10:10 +0800
Subject: [PATCH 0276/4122] crypto: inside-secure - Add MaxLinear platform

This is to add MaxLinear platform into compatible id.
Firmware endianness option is added since MaxLinear
firmware is in little endian format.

Signed-off-by: Peter Harliman Liem <pliem@maxlinear.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel.c | 11 +++++++++++
 drivers/crypto/inside-secure/safexcel.h |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index 4d6d64ff9a0f2..ae6110376e21c 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -421,6 +421,8 @@ static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
 	else if (priv->data->version == EIP197B_MRVL ||
 		 priv->data->version == EIP197_DEVBRD)
 		dir = "eip197b";
+	else if (priv->data->version == EIP197C_MXL)
+		dir = "eip197c";
 	else
 		return -ENODEV;
 
@@ -1828,6 +1830,11 @@ static const struct safexcel_priv_data eip197_devbrd_data = {
 	.version = EIP197_DEVBRD,
 };
 
+static const struct safexcel_priv_data eip197c_mxl_data = {
+	.version = EIP197C_MXL,
+	.fw_little_endian = true,
+};
+
 static const struct of_device_id safexcel_of_match_table[] = {
 	{
 		.compatible = "inside-secure,safexcel-eip97ies",
@@ -1841,6 +1848,10 @@ static const struct of_device_id safexcel_of_match_table[] = {
 		.compatible = "inside-secure,safexcel-eip197d",
 		.data = &eip197d_mrvl_data,
 	},
+	{
+		.compatible = "inside-secure,safexcel-eip197c-mxl",
+		.data = &eip197c_mxl_data,
+	},
 	/* For backward compatibility and intended for generic use */
 	{
 		.compatible = "inside-secure,safexcel-eip97",
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index f049293870b48..6c2fc662f64ff 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -730,7 +730,8 @@ enum safexcel_eip_version {
 	EIP97IES_MRVL,
 	EIP197B_MRVL,
 	EIP197D_MRVL,
-	EIP197_DEVBRD
+	EIP197_DEVBRD,
+	EIP197C_MXL,
 };
 
 struct safexcel_priv_data {
-- 
GitLab


From 839b8ae2fc10f205317bcc32c9de18456756e1f5 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 08:55:55 +0000
Subject: [PATCH 0277/4122] crypto: sun8i-ss - use dma_addr instead u32

The DMA address need to be stored in a dma_addr_t

Fixes: 359e893e8af4 ("crypto: sun8i-ss - rework handling of IV")
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
index 910d6751644cf..902f6be057ec6 100644
--- a/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
+++ b/drivers/crypto/allwinner/sun8i-ss/sun8i-ss-cipher.c
@@ -124,7 +124,7 @@ static int sun8i_ss_setup_ivs(struct skcipher_request *areq)
 	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
 	struct sun8i_ss_flow *sf = &ss->flows[rctx->flow];
 	int i = 0;
-	u32 a;
+	dma_addr_t a;
 	int err;
 
 	rctx->ivlen = ivsize;
-- 
GitLab


From 375de984a3cb691a0bbaf9756e8595e0b54e27e0 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 27 Sep 2022 13:39:55 +0000
Subject: [PATCH 0278/4122] crypto: ccp - Remove unused struct ccp_crypto_cpu

After commit bc3854476f36("crypto: ccp - Use a single queue for proper ordering
of tfm requests"), no one use struct ccp_crypto_cpu, so remove it.

Signed-off-by: Yuan Can <yuancan@huawei.com>
Acked-by: John Allen <john.allen@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-main.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 3321810273053..dd86d2650bea4 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -78,13 +78,6 @@ struct ccp_crypto_cmd {
 	int ret;
 };
 
-struct ccp_crypto_cpu {
-	struct work_struct work;
-	struct completion completion;
-	struct ccp_crypto_cmd *crypto_cmd;
-	int err;
-};
-
 static inline bool ccp_crypto_success(int err)
 {
 	if (err && (err != -EINPROGRESS) && (err != -EBUSY))
-- 
GitLab


From 094528b6a5a755b1195a01e10b13597d67d1a0e6 Mon Sep 17 00:00:00 2001
From: Natalia Petrova <n.petrova@fintech.ru>
Date: Wed, 28 Sep 2022 13:25:05 +0300
Subject: [PATCH 0279/4122] crypto: nitrox - avoid double free on error path in
 nitrox_sriov_init()

If alloc_workqueue() fails in nitrox_mbox_init() it deallocates
ndev->iov.vfdev and returns error code, but then nitrox_sriov_init()
calls nitrox_sriov_cleanup() where ndev->iov.vfdev is deallocated
again.

Fix this by nulling ndev->iov.vfdev after the first deallocation.

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Fixes: 9e5de3e06e54 ("crypto: cavium/nitrox - Add mailbox...")
Signed-off-by: Natalia Petrova <n.petrova@fintech.ru>
Signed-off-by: Alexey Khoroshilov <khoroshilov@ispras.ru>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/nitrox/nitrox_mbx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index 9e7308e39b304..d4e06999af9b7 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -195,6 +195,7 @@ int nitrox_mbox_init(struct nitrox_device *ndev)
 	ndev->iov.pf2vf_wq = alloc_workqueue("nitrox_pf2vf", 0, 0);
 	if (!ndev->iov.pf2vf_wq) {
 		kfree(ndev->iov.vfdev);
+		ndev->iov.vfdev = NULL;
 		return -ENOMEM;
 	}
 	/* enable pf2vf mailbox interrupts */
-- 
GitLab


From 10da230a4df1dfe32a58eb09246f5ffe82346f27 Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Wed, 28 Sep 2022 13:45:05 -0500
Subject: [PATCH 0280/4122] crypto: ccp - Add support for TEE for PCI ID 0x14CA

SoCs containing 0x14CA are present both in datacenter parts that
support SEV as well as client parts that support TEE.

Cc: stable@vger.kernel.org # 5.15+
Tested-by: Rijo-john Thomas <Rijo-john.Thomas@amd.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Acked-by: Tom Lendacky <thomas.lendacky@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/sp-pci.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index 792d6da7f0c07..084d052fddccb 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -381,6 +381,15 @@ static const struct psp_vdata pspv3 = {
 	.inten_reg		= 0x10690,
 	.intsts_reg		= 0x10694,
 };
+
+static const struct psp_vdata pspv4 = {
+	.sev			= &sevv2,
+	.tee			= &teev1,
+	.feature_reg		= 0x109fc,
+	.inten_reg		= 0x10690,
+	.intsts_reg		= 0x10694,
+};
+
 #endif
 
 static const struct sp_dev_vdata dev_vdata[] = {
@@ -426,7 +435,7 @@ static const struct sp_dev_vdata dev_vdata[] = {
 	{	/* 5 */
 		.bar = 2,
 #ifdef CONFIG_CRYPTO_DEV_SP_PSP
-		.psp_vdata = &pspv2,
+		.psp_vdata = &pspv4,
 #endif
 	},
 	{	/* 6 */
-- 
GitLab


From be7f5ef9ff4bbe99e4fcdf63057a993be178af46 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 28 Sep 2022 23:24:43 +0100
Subject: [PATCH 0281/4122] crypto: stm32 - Fix spelling mistake "wite" ->
 "write"

There are a couple of spelling mistakes in dev_err messages. Fix them.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Acked-by: nicolas.toromanoff@foss.st.com
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-cryp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 59ef541123ae6..59638dfce573b 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -1400,7 +1400,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 	/* wait end of process */
 	err = stm32_cryp_wait_output(cryp);
 	if (err) {
-		dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+		dev_err(cryp->dev, "Timeout (write ccm padded data)\n");
 		return stm32_cryp_finish_req(cryp, err);
 	}
 
@@ -1440,7 +1440,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 	/* h) wait for completion */
 	err = stm32_cryp_wait_busy(cryp);
 	if (err)
-		dev_err(cryp->dev, "Timeout (wite ccm padded data)\n");
+		dev_err(cryp->dev, "Timeout (write ccm padded data)\n");
 
 	/* i) run the he normal Final phase */
 	stm32_cryp_finish_req(cryp, err);
-- 
GitLab


From 518a198f41d6539dc025f0e4fe2785f9031fa1eb Mon Sep 17 00:00:00 2001
From: Tomer Maimon <tmaimon77@gmail.com>
Date: Thu, 29 Sep 2022 16:31:10 +0300
Subject: [PATCH 0282/4122] dt-bindings: rng: nuvoton,npcm-rng: Add npcm845
 compatible string

Add a compatible string for Nuvoton BMC NPCM845 RNG.

Signed-off-by: Tomer Maimon <tmaimon77@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
index abd134c9d4009..e8e4ab1e5b95d 100644
--- a/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/nuvoton,npcm-rng.yaml
@@ -16,7 +16,9 @@ maintainers:
 
 properties:
   compatible:
-    const: nuvoton,npcm750-rng
+    enum:
+      - nuvoton,npcm750-rng
+      - nuvoton,npcm845-rng
 
   reg:
     maxItems: 1
-- 
GitLab


From f07b3e87fe62984db66fd4179ae7e960e4fc43e8 Mon Sep 17 00:00:00 2001
From: Tomer Maimon <tmaimon77@gmail.com>
Date: Thu, 29 Sep 2022 16:31:11 +0300
Subject: [PATCH 0283/4122] hwrng: npcm - Add NPCM8XX support

Adding RNG NPCM8XX support to NPCM RNG driver.
RNG NPCM8XX uses a different clock prescaler.

As part of adding NPCM8XX support:
- Add NPCM8XX specific compatible string.
- Add data to handle architecture specific clock prescaler.

Signed-off-by: Tomer Maimon <tmaimon77@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/npcm-rng.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
index 1ec5f267a6566..5bf7f370f9859 100644
--- a/drivers/char/hw_random/npcm-rng.c
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -13,11 +13,13 @@
 #include <linux/delay.h>
 #include <linux/of_irq.h>
 #include <linux/pm_runtime.h>
+#include <linux/of_device.h>
 
 #define NPCM_RNGCS_REG		0x00	/* Control and status register */
 #define NPCM_RNGD_REG		0x04	/* Data register */
 #define NPCM_RNGMODE_REG	0x08	/* Mode register */
 
+#define NPCM_RNG_CLK_SET_62_5MHZ	BIT(2) /* 60-80 MHz */
 #define NPCM_RNG_CLK_SET_25MHZ	GENMASK(4, 3) /* 20-25 MHz */
 #define NPCM_RNG_DATA_VALID	BIT(1)
 #define NPCM_RNG_ENABLE		BIT(0)
@@ -31,14 +33,14 @@
 struct npcm_rng {
 	void __iomem *base;
 	struct hwrng rng;
+	u32 clkp;
 };
 
 static int npcm_rng_init(struct hwrng *rng)
 {
 	struct npcm_rng *priv = to_npcm_rng(rng);
 
-	writel(NPCM_RNG_CLK_SET_25MHZ | NPCM_RNG_ENABLE,
-	       priv->base + NPCM_RNGCS_REG);
+	writel(priv->clkp | NPCM_RNG_ENABLE, priv->base + NPCM_RNGCS_REG);
 
 	return 0;
 }
@@ -47,7 +49,7 @@ static void npcm_rng_cleanup(struct hwrng *rng)
 {
 	struct npcm_rng *priv = to_npcm_rng(rng);
 
-	writel(NPCM_RNG_CLK_SET_25MHZ, priv->base + NPCM_RNGCS_REG);
+	writel(priv->clkp, priv->base + NPCM_RNGCS_REG);
 }
 
 static int npcm_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -110,6 +112,7 @@ static int npcm_rng_probe(struct platform_device *pdev)
 	priv->rng.read = npcm_rng_read;
 	priv->rng.priv = (unsigned long)&pdev->dev;
 	priv->rng.quality = 1000;
+	priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev);
 
 	writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
 
@@ -162,7 +165,10 @@ static const struct dev_pm_ops npcm_rng_pm_ops = {
 };
 
 static const struct of_device_id rng_dt_id[] __maybe_unused = {
-	{ .compatible = "nuvoton,npcm750-rng",  },
+	{ .compatible = "nuvoton,npcm750-rng",
+		.data = (void *)NPCM_RNG_CLK_SET_25MHZ },
+	{ .compatible = "nuvoton,npcm845-rng",
+		.data = (void *)NPCM_RNG_CLK_SET_62_5MHZ },
 	{},
 };
 MODULE_DEVICE_TABLE(of, rng_dt_id);
-- 
GitLab


From 46beeade05c6a7673873ba0a7b6396cd3a3b3473 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 30 Sep 2022 14:09:34 +0800
Subject: [PATCH 0284/4122] crypto: ixp4xx - Fix sparse warnings

This fixes a number of trivial sparse warnings in ixp4xx.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Tested-by: Corentin Labbe <clabbe.montjoie@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ixp4xx_crypto.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index d39a386b31ac7..984b3cc0237ca 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -420,7 +420,7 @@ static void one_packet(dma_addr_t phys)
 		break;
 	case CTL_FLAG_GEN_REVAES:
 		ctx = crypto_tfm_ctx(crypt->data.tfm);
-		*(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
+		*(__be32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
 		if (atomic_dec_and_test(&ctx->configuring))
 			complete(&ctx->completion);
 		break;
@@ -720,7 +720,7 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
 	crypt->init_len = init_len;
 	crypt->ctl_flags |= CTL_FLAG_GEN_ICV;
 
-	buf->next = 0;
+	buf->next = NULL;
 	buf->buf_len = HMAC_PAD_BLOCKLEN;
 	buf->pkt_len = 0;
 	buf->phys_addr = pad_phys;
@@ -751,7 +751,7 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize
 #ifndef __ARMEB__
 	cfgword ^= 0xAA000000; /* change the "byte swap" flags */
 #endif
-	*(u32 *)cinfo = cpu_to_be32(cfgword);
+	*(__be32 *)cinfo = cpu_to_be32(cfgword);
 	cinfo += sizeof(cfgword);
 
 	/* write ICV to cryptinfo */
@@ -788,7 +788,7 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
 	if (!crypt)
 		return -EAGAIN;
 
-	*(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
+	*(__be32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
 
 	crypt->data.tfm = tfm;
 	crypt->crypt_offs = 0;
@@ -846,7 +846,7 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key,
 			return err;
 	}
 	/* write cfg word to cryptinfo */
-	*(u32 *)cinfo = cpu_to_be32(cipher_cfg);
+	*(__be32 *)cinfo = cpu_to_be32(cipher_cfg);
 	cinfo += sizeof(cipher_cfg);
 
 	/* write cipher key to cryptinfo */
-- 
GitLab


From 65c92cbb3f2365627a10cf97560d51e88fb4e588 Mon Sep 17 00:00:00 2001
From: Robert Elliott <elliott@hpe.com>
Date: Fri, 30 Sep 2022 16:40:14 -0500
Subject: [PATCH 0285/4122] crypto: tcrypt - fix return value for multiple
 subtests

When a test mode invokes multiple tests (e.g., mode 0 invokes modes
1 through 199, and mode 3 tests three block cipher modes with des),
don't keep accumulating the return values with ret += tcrypt_test(),
which results in a bogus value if more than one report a nonzero
value (e.g., two reporting -2 (-ENOENT) end up reporting -4 (-EINTR)).
Instead, keep track of the minimum return value reported by any
subtest.

Fixes: 4e033a6bc70f ("crypto: tcrypt - Do not exit on success in fips mode")
Signed-off-by: Robert Elliott <elliott@hpe.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 256 ++++++++++++++++++++++++------------------------
 1 file changed, 128 insertions(+), 128 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index a82679b576bb4..3f7dc94a63e0b 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1471,387 +1471,387 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		}
 
 		for (i = 1; i < 200; i++)
-			ret += do_test(NULL, 0, 0, i, num_mb);
+			ret = min(ret, do_test(NULL, 0, 0, i, num_mb));
 		break;
 
 	case 1:
-		ret += tcrypt_test("md5");
+		ret = min(ret, tcrypt_test("md5"));
 		break;
 
 	case 2:
-		ret += tcrypt_test("sha1");
+		ret = min(ret, tcrypt_test("sha1"));
 		break;
 
 	case 3:
-		ret += tcrypt_test("ecb(des)");
-		ret += tcrypt_test("cbc(des)");
-		ret += tcrypt_test("ctr(des)");
+		ret = min(ret, tcrypt_test("ecb(des)"));
+		ret = min(ret, tcrypt_test("cbc(des)"));
+		ret = min(ret, tcrypt_test("ctr(des)"));
 		break;
 
 	case 4:
-		ret += tcrypt_test("ecb(des3_ede)");
-		ret += tcrypt_test("cbc(des3_ede)");
-		ret += tcrypt_test("ctr(des3_ede)");
+		ret = min(ret, tcrypt_test("ecb(des3_ede)"));
+		ret = min(ret, tcrypt_test("cbc(des3_ede)"));
+		ret = min(ret, tcrypt_test("ctr(des3_ede)"));
 		break;
 
 	case 5:
-		ret += tcrypt_test("md4");
+		ret = min(ret, tcrypt_test("md4"));
 		break;
 
 	case 6:
-		ret += tcrypt_test("sha256");
+		ret = min(ret, tcrypt_test("sha256"));
 		break;
 
 	case 7:
-		ret += tcrypt_test("ecb(blowfish)");
-		ret += tcrypt_test("cbc(blowfish)");
-		ret += tcrypt_test("ctr(blowfish)");
+		ret = min(ret, tcrypt_test("ecb(blowfish)"));
+		ret = min(ret, tcrypt_test("cbc(blowfish)"));
+		ret = min(ret, tcrypt_test("ctr(blowfish)"));
 		break;
 
 	case 8:
-		ret += tcrypt_test("ecb(twofish)");
-		ret += tcrypt_test("cbc(twofish)");
-		ret += tcrypt_test("ctr(twofish)");
-		ret += tcrypt_test("lrw(twofish)");
-		ret += tcrypt_test("xts(twofish)");
+		ret = min(ret, tcrypt_test("ecb(twofish)"));
+		ret = min(ret, tcrypt_test("cbc(twofish)"));
+		ret = min(ret, tcrypt_test("ctr(twofish)"));
+		ret = min(ret, tcrypt_test("lrw(twofish)"));
+		ret = min(ret, tcrypt_test("xts(twofish)"));
 		break;
 
 	case 9:
-		ret += tcrypt_test("ecb(serpent)");
-		ret += tcrypt_test("cbc(serpent)");
-		ret += tcrypt_test("ctr(serpent)");
-		ret += tcrypt_test("lrw(serpent)");
-		ret += tcrypt_test("xts(serpent)");
+		ret = min(ret, tcrypt_test("ecb(serpent)"));
+		ret = min(ret, tcrypt_test("cbc(serpent)"));
+		ret = min(ret, tcrypt_test("ctr(serpent)"));
+		ret = min(ret, tcrypt_test("lrw(serpent)"));
+		ret = min(ret, tcrypt_test("xts(serpent)"));
 		break;
 
 	case 10:
-		ret += tcrypt_test("ecb(aes)");
-		ret += tcrypt_test("cbc(aes)");
-		ret += tcrypt_test("lrw(aes)");
-		ret += tcrypt_test("xts(aes)");
-		ret += tcrypt_test("ctr(aes)");
-		ret += tcrypt_test("rfc3686(ctr(aes))");
-		ret += tcrypt_test("ofb(aes)");
-		ret += tcrypt_test("cfb(aes)");
-		ret += tcrypt_test("xctr(aes)");
+		ret = min(ret, tcrypt_test("ecb(aes)"));
+		ret = min(ret, tcrypt_test("cbc(aes)"));
+		ret = min(ret, tcrypt_test("lrw(aes)"));
+		ret = min(ret, tcrypt_test("xts(aes)"));
+		ret = min(ret, tcrypt_test("ctr(aes)"));
+		ret = min(ret, tcrypt_test("rfc3686(ctr(aes))"));
+		ret = min(ret, tcrypt_test("ofb(aes)"));
+		ret = min(ret, tcrypt_test("cfb(aes)"));
+		ret = min(ret, tcrypt_test("xctr(aes)"));
 		break;
 
 	case 11:
-		ret += tcrypt_test("sha384");
+		ret = min(ret, tcrypt_test("sha384"));
 		break;
 
 	case 12:
-		ret += tcrypt_test("sha512");
+		ret = min(ret, tcrypt_test("sha512"));
 		break;
 
 	case 13:
-		ret += tcrypt_test("deflate");
+		ret = min(ret, tcrypt_test("deflate"));
 		break;
 
 	case 14:
-		ret += tcrypt_test("ecb(cast5)");
-		ret += tcrypt_test("cbc(cast5)");
-		ret += tcrypt_test("ctr(cast5)");
+		ret = min(ret, tcrypt_test("ecb(cast5)"));
+		ret = min(ret, tcrypt_test("cbc(cast5)"));
+		ret = min(ret, tcrypt_test("ctr(cast5)"));
 		break;
 
 	case 15:
-		ret += tcrypt_test("ecb(cast6)");
-		ret += tcrypt_test("cbc(cast6)");
-		ret += tcrypt_test("ctr(cast6)");
-		ret += tcrypt_test("lrw(cast6)");
-		ret += tcrypt_test("xts(cast6)");
+		ret = min(ret, tcrypt_test("ecb(cast6)"));
+		ret = min(ret, tcrypt_test("cbc(cast6)"));
+		ret = min(ret, tcrypt_test("ctr(cast6)"));
+		ret = min(ret, tcrypt_test("lrw(cast6)"));
+		ret = min(ret, tcrypt_test("xts(cast6)"));
 		break;
 
 	case 16:
-		ret += tcrypt_test("ecb(arc4)");
+		ret = min(ret, tcrypt_test("ecb(arc4)"));
 		break;
 
 	case 17:
-		ret += tcrypt_test("michael_mic");
+		ret = min(ret, tcrypt_test("michael_mic"));
 		break;
 
 	case 18:
-		ret += tcrypt_test("crc32c");
+		ret = min(ret, tcrypt_test("crc32c"));
 		break;
 
 	case 19:
-		ret += tcrypt_test("ecb(tea)");
+		ret = min(ret, tcrypt_test("ecb(tea)"));
 		break;
 
 	case 20:
-		ret += tcrypt_test("ecb(xtea)");
+		ret = min(ret, tcrypt_test("ecb(xtea)"));
 		break;
 
 	case 21:
-		ret += tcrypt_test("ecb(khazad)");
+		ret = min(ret, tcrypt_test("ecb(khazad)"));
 		break;
 
 	case 22:
-		ret += tcrypt_test("wp512");
+		ret = min(ret, tcrypt_test("wp512"));
 		break;
 
 	case 23:
-		ret += tcrypt_test("wp384");
+		ret = min(ret, tcrypt_test("wp384"));
 		break;
 
 	case 24:
-		ret += tcrypt_test("wp256");
+		ret = min(ret, tcrypt_test("wp256"));
 		break;
 
 	case 26:
-		ret += tcrypt_test("ecb(anubis)");
-		ret += tcrypt_test("cbc(anubis)");
+		ret = min(ret, tcrypt_test("ecb(anubis)"));
+		ret = min(ret, tcrypt_test("cbc(anubis)"));
 		break;
 
 	case 30:
-		ret += tcrypt_test("ecb(xeta)");
+		ret = min(ret, tcrypt_test("ecb(xeta)"));
 		break;
 
 	case 31:
-		ret += tcrypt_test("pcbc(fcrypt)");
+		ret = min(ret, tcrypt_test("pcbc(fcrypt)"));
 		break;
 
 	case 32:
-		ret += tcrypt_test("ecb(camellia)");
-		ret += tcrypt_test("cbc(camellia)");
-		ret += tcrypt_test("ctr(camellia)");
-		ret += tcrypt_test("lrw(camellia)");
-		ret += tcrypt_test("xts(camellia)");
+		ret = min(ret, tcrypt_test("ecb(camellia)"));
+		ret = min(ret, tcrypt_test("cbc(camellia)"));
+		ret = min(ret, tcrypt_test("ctr(camellia)"));
+		ret = min(ret, tcrypt_test("lrw(camellia)"));
+		ret = min(ret, tcrypt_test("xts(camellia)"));
 		break;
 
 	case 33:
-		ret += tcrypt_test("sha224");
+		ret = min(ret, tcrypt_test("sha224"));
 		break;
 
 	case 35:
-		ret += tcrypt_test("gcm(aes)");
+		ret = min(ret, tcrypt_test("gcm(aes)"));
 		break;
 
 	case 36:
-		ret += tcrypt_test("lzo");
+		ret = min(ret, tcrypt_test("lzo"));
 		break;
 
 	case 37:
-		ret += tcrypt_test("ccm(aes)");
+		ret = min(ret, tcrypt_test("ccm(aes)"));
 		break;
 
 	case 38:
-		ret += tcrypt_test("cts(cbc(aes))");
+		ret = min(ret, tcrypt_test("cts(cbc(aes))"));
 		break;
 
         case 39:
-		ret += tcrypt_test("xxhash64");
+		ret = min(ret, tcrypt_test("xxhash64"));
 		break;
 
         case 40:
-		ret += tcrypt_test("rmd160");
+		ret = min(ret, tcrypt_test("rmd160"));
 		break;
 
 	case 42:
-		ret += tcrypt_test("blake2b-512");
+		ret = min(ret, tcrypt_test("blake2b-512"));
 		break;
 
 	case 43:
-		ret += tcrypt_test("ecb(seed)");
+		ret = min(ret, tcrypt_test("ecb(seed)"));
 		break;
 
 	case 45:
-		ret += tcrypt_test("rfc4309(ccm(aes))");
+		ret = min(ret, tcrypt_test("rfc4309(ccm(aes))"));
 		break;
 
 	case 46:
-		ret += tcrypt_test("ghash");
+		ret = min(ret, tcrypt_test("ghash"));
 		break;
 
 	case 47:
-		ret += tcrypt_test("crct10dif");
+		ret = min(ret, tcrypt_test("crct10dif"));
 		break;
 
 	case 48:
-		ret += tcrypt_test("sha3-224");
+		ret = min(ret, tcrypt_test("sha3-224"));
 		break;
 
 	case 49:
-		ret += tcrypt_test("sha3-256");
+		ret = min(ret, tcrypt_test("sha3-256"));
 		break;
 
 	case 50:
-		ret += tcrypt_test("sha3-384");
+		ret = min(ret, tcrypt_test("sha3-384"));
 		break;
 
 	case 51:
-		ret += tcrypt_test("sha3-512");
+		ret = min(ret, tcrypt_test("sha3-512"));
 		break;
 
 	case 52:
-		ret += tcrypt_test("sm3");
+		ret = min(ret, tcrypt_test("sm3"));
 		break;
 
 	case 53:
-		ret += tcrypt_test("streebog256");
+		ret = min(ret, tcrypt_test("streebog256"));
 		break;
 
 	case 54:
-		ret += tcrypt_test("streebog512");
+		ret = min(ret, tcrypt_test("streebog512"));
 		break;
 
 	case 55:
-		ret += tcrypt_test("gcm(sm4)");
+		ret = min(ret, tcrypt_test("gcm(sm4)"));
 		break;
 
 	case 56:
-		ret += tcrypt_test("ccm(sm4)");
+		ret = min(ret, tcrypt_test("ccm(sm4)"));
 		break;
 
 	case 57:
-		ret += tcrypt_test("polyval");
+		ret = min(ret, tcrypt_test("polyval"));
 		break;
 
 	case 58:
-		ret += tcrypt_test("gcm(aria)");
+		ret = min(ret, tcrypt_test("gcm(aria)"));
 		break;
 
 	case 100:
-		ret += tcrypt_test("hmac(md5)");
+		ret = min(ret, tcrypt_test("hmac(md5)"));
 		break;
 
 	case 101:
-		ret += tcrypt_test("hmac(sha1)");
+		ret = min(ret, tcrypt_test("hmac(sha1)"));
 		break;
 
 	case 102:
-		ret += tcrypt_test("hmac(sha256)");
+		ret = min(ret, tcrypt_test("hmac(sha256)"));
 		break;
 
 	case 103:
-		ret += tcrypt_test("hmac(sha384)");
+		ret = min(ret, tcrypt_test("hmac(sha384)"));
 		break;
 
 	case 104:
-		ret += tcrypt_test("hmac(sha512)");
+		ret = min(ret, tcrypt_test("hmac(sha512)"));
 		break;
 
 	case 105:
-		ret += tcrypt_test("hmac(sha224)");
+		ret = min(ret, tcrypt_test("hmac(sha224)"));
 		break;
 
 	case 106:
-		ret += tcrypt_test("xcbc(aes)");
+		ret = min(ret, tcrypt_test("xcbc(aes)"));
 		break;
 
 	case 108:
-		ret += tcrypt_test("hmac(rmd160)");
+		ret = min(ret, tcrypt_test("hmac(rmd160)"));
 		break;
 
 	case 109:
-		ret += tcrypt_test("vmac64(aes)");
+		ret = min(ret, tcrypt_test("vmac64(aes)"));
 		break;
 
 	case 111:
-		ret += tcrypt_test("hmac(sha3-224)");
+		ret = min(ret, tcrypt_test("hmac(sha3-224)"));
 		break;
 
 	case 112:
-		ret += tcrypt_test("hmac(sha3-256)");
+		ret = min(ret, tcrypt_test("hmac(sha3-256)"));
 		break;
 
 	case 113:
-		ret += tcrypt_test("hmac(sha3-384)");
+		ret = min(ret, tcrypt_test("hmac(sha3-384)"));
 		break;
 
 	case 114:
-		ret += tcrypt_test("hmac(sha3-512)");
+		ret = min(ret, tcrypt_test("hmac(sha3-512)"));
 		break;
 
 	case 115:
-		ret += tcrypt_test("hmac(streebog256)");
+		ret = min(ret, tcrypt_test("hmac(streebog256)"));
 		break;
 
 	case 116:
-		ret += tcrypt_test("hmac(streebog512)");
+		ret = min(ret, tcrypt_test("hmac(streebog512)"));
 		break;
 
 	case 150:
-		ret += tcrypt_test("ansi_cprng");
+		ret = min(ret, tcrypt_test("ansi_cprng"));
 		break;
 
 	case 151:
-		ret += tcrypt_test("rfc4106(gcm(aes))");
+		ret = min(ret, tcrypt_test("rfc4106(gcm(aes))"));
 		break;
 
 	case 152:
-		ret += tcrypt_test("rfc4543(gcm(aes))");
+		ret = min(ret, tcrypt_test("rfc4543(gcm(aes))"));
 		break;
 
 	case 153:
-		ret += tcrypt_test("cmac(aes)");
+		ret = min(ret, tcrypt_test("cmac(aes)"));
 		break;
 
 	case 154:
-		ret += tcrypt_test("cmac(des3_ede)");
+		ret = min(ret, tcrypt_test("cmac(des3_ede)"));
 		break;
 
 	case 155:
-		ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(aes))"));
 		break;
 
 	case 156:
-		ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))");
+		ret = min(ret, tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"));
 		break;
 
 	case 157:
-		ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"));
 		break;
 
 	case 158:
-		ret += tcrypt_test("cbcmac(sm4)");
+		ret = min(ret, tcrypt_test("cbcmac(sm4)"));
 		break;
 
 	case 159:
-		ret += tcrypt_test("cmac(sm4)");
+		ret = min(ret, tcrypt_test("cmac(sm4)"));
 		break;
 
 	case 181:
-		ret += tcrypt_test("authenc(hmac(sha1),cbc(des))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))"));
 		break;
 	case 182:
-		ret += tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des3_ede))"));
 		break;
 	case 183:
-		ret += tcrypt_test("authenc(hmac(sha224),cbc(des))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des))"));
 		break;
 	case 184:
-		ret += tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha224),cbc(des3_ede))"));
 		break;
 	case 185:
-		ret += tcrypt_test("authenc(hmac(sha256),cbc(des))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des))"));
 		break;
 	case 186:
-		ret += tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha256),cbc(des3_ede))"));
 		break;
 	case 187:
-		ret += tcrypt_test("authenc(hmac(sha384),cbc(des))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des))"));
 		break;
 	case 188:
-		ret += tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha384),cbc(des3_ede))"));
 		break;
 	case 189:
-		ret += tcrypt_test("authenc(hmac(sha512),cbc(des))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des))"));
 		break;
 	case 190:
-		ret += tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))");
+		ret = min(ret, tcrypt_test("authenc(hmac(sha512),cbc(des3_ede))"));
 		break;
 	case 191:
-		ret += tcrypt_test("ecb(sm4)");
-		ret += tcrypt_test("cbc(sm4)");
-		ret += tcrypt_test("cfb(sm4)");
-		ret += tcrypt_test("ctr(sm4)");
+		ret = min(ret, tcrypt_test("ecb(sm4)"));
+		ret = min(ret, tcrypt_test("cbc(sm4)"));
+		ret = min(ret, tcrypt_test("cfb(sm4)"));
+		ret = min(ret, tcrypt_test("ctr(sm4)"));
 		break;
 	case 192:
-		ret += tcrypt_test("ecb(aria)");
-		ret += tcrypt_test("cbc(aria)");
-		ret += tcrypt_test("cfb(aria)");
-		ret += tcrypt_test("ctr(aria)");
+		ret = min(ret, tcrypt_test("ecb(aria)"));
+		ret = min(ret, tcrypt_test("cbc(aria)"));
+		ret = min(ret, tcrypt_test("cfb(aria)"));
+		ret = min(ret, tcrypt_test("ctr(aria)"));
 		break;
 	case 200:
 		test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0,
-- 
GitLab


From 76a4e874593543a2dff91d249c95bac728df2774 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Thu, 6 Oct 2022 04:34:19 +0000
Subject: [PATCH 0286/4122] crypto: n2 - add missing hash statesize

Add missing statesize to hash templates.
This is mandatory otherwise no algorithms can be registered as the core
requires statesize to be set.

CC: stable@kernel.org # 4.3+
Reported-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Tested-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Fixes: 0a625fd2abaa ("crypto: n2 - Add Niagara2 crypto driver")
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/n2_core.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 31e24df18877f..20d0dcd50344b 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -1229,6 +1229,7 @@ struct n2_hash_tmpl {
 	const u8	*hash_init;
 	u8		hw_op_hashsz;
 	u8		digest_size;
+	u8		statesize;
 	u8		block_size;
 	u8		auth_type;
 	u8		hmac_type;
@@ -1260,6 +1261,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
 	  .hmac_type	= AUTH_TYPE_HMAC_MD5,
 	  .hw_op_hashsz	= MD5_DIGEST_SIZE,
 	  .digest_size	= MD5_DIGEST_SIZE,
+	  .statesize	= sizeof(struct md5_state),
 	  .block_size	= MD5_HMAC_BLOCK_SIZE },
 	{ .name		= "sha1",
 	  .hash_zero	= sha1_zero_message_hash,
@@ -1268,6 +1270,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
 	  .hmac_type	= AUTH_TYPE_HMAC_SHA1,
 	  .hw_op_hashsz	= SHA1_DIGEST_SIZE,
 	  .digest_size	= SHA1_DIGEST_SIZE,
+	  .statesize	= sizeof(struct sha1_state),
 	  .block_size	= SHA1_BLOCK_SIZE },
 	{ .name		= "sha256",
 	  .hash_zero	= sha256_zero_message_hash,
@@ -1276,6 +1279,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
 	  .hmac_type	= AUTH_TYPE_HMAC_SHA256,
 	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA256_DIGEST_SIZE,
+	  .statesize	= sizeof(struct sha256_state),
 	  .block_size	= SHA256_BLOCK_SIZE },
 	{ .name		= "sha224",
 	  .hash_zero	= sha224_zero_message_hash,
@@ -1284,6 +1288,7 @@ static const struct n2_hash_tmpl hash_tmpls[] = {
 	  .hmac_type	= AUTH_TYPE_RESERVED,
 	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
 	  .digest_size	= SHA224_DIGEST_SIZE,
+	  .statesize	= sizeof(struct sha256_state),
 	  .block_size	= SHA224_BLOCK_SIZE },
 };
 #define NUM_HASH_TMPLS ARRAY_SIZE(hash_tmpls)
@@ -1424,6 +1429,7 @@ static int __n2_register_one_ahash(const struct n2_hash_tmpl *tmpl)
 
 	halg = &ahash->halg;
 	halg->digestsize = tmpl->digest_size;
+	halg->statesize = tmpl->statesize;
 
 	base = &halg->base;
 	snprintf(base->cra_name, CRYPTO_MAX_ALG_NAME, "%s", tmpl->name);
-- 
GitLab


From f1da27b7c4191f78ed81d3dabf64c769f896296c Mon Sep 17 00:00:00 2001
From: "Mingming.Su" <Mingming.Su@mediatek.com>
Date: Sat, 8 Oct 2022 18:45:53 +0200
Subject: [PATCH 0287/4122] hwrng: mtk - add mt7986 support

1. Add trng compatible name for MT7986
2. Fix mtk_rng_wait_ready() function

Signed-off-by: Mingming.Su <Mingming.Su@mediatek.com>
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/mtk-rng.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/mtk-rng.c b/drivers/char/hw_random/mtk-rng.c
index 6c00ea0085553..aa993753ab120 100644
--- a/drivers/char/hw_random/mtk-rng.c
+++ b/drivers/char/hw_random/mtk-rng.c
@@ -22,7 +22,7 @@
 #define RNG_AUTOSUSPEND_TIMEOUT		100
 
 #define USEC_POLL			2
-#define TIMEOUT_POLL			20
+#define TIMEOUT_POLL			60
 
 #define RNG_CTRL			0x00
 #define RNG_EN				BIT(0)
@@ -77,7 +77,7 @@ static bool mtk_rng_wait_ready(struct hwrng *rng, bool wait)
 		readl_poll_timeout_atomic(priv->base + RNG_CTRL, ready,
 					  ready & RNG_READY, USEC_POLL,
 					  TIMEOUT_POLL);
-	return !!ready;
+	return !!(ready & RNG_READY);
 }
 
 static int mtk_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
@@ -179,6 +179,7 @@ static const struct dev_pm_ops mtk_rng_pm_ops = {
 #endif	/* CONFIG_PM */
 
 static const struct of_device_id mtk_rng_match[] = {
+	{ .compatible = "mediatek,mt7986-rng" },
 	{ .compatible = "mediatek,mt7623-rng" },
 	{},
 };
-- 
GitLab


From 854e25a6d653b76007c142b7edbaba81a8789a7f Mon Sep 17 00:00:00 2001
From: jianchunfu <jianchunfu@cmss.chinamobile.com>
Date: Sun, 9 Oct 2022 17:52:54 +0800
Subject: [PATCH 0288/4122] crypto: talitos - Use the defined variable to clean
 code

Use the defined variable "dev" to make the code cleaner.

Signed-off-by: jianchunfu <jianchunfu@cmss.chinamobile.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/talitos.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c9ad6c2130901..71db6450b6aa4 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -1999,7 +1999,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 		/* Buffer up to one whole block */
 		nents = sg_nents_for_len(areq->src, nbytes);
 		if (nents < 0) {
-			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			dev_err(dev, "Invalid number of src SG.\n");
 			return nents;
 		}
 		sg_copy_to_buffer(areq->src, nents,
@@ -2040,7 +2040,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 			offset = nbytes_to_hash - req_ctx->nbuf;
 		nents = sg_nents_for_len(areq->src, offset);
 		if (nents < 0) {
-			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			dev_err(dev, "Invalid number of src SG.\n");
 			return nents;
 		}
 		sg_copy_to_buffer(areq->src, nents,
@@ -2054,7 +2054,7 @@ static int ahash_process_req(struct ahash_request *areq, unsigned int nbytes)
 	if (to_hash_later) {
 		nents = sg_nents_for_len(areq->src, nbytes);
 		if (nents < 0) {
-			dev_err(ctx->dev, "Invalid number of src SG.\n");
+			dev_err(dev, "Invalid number of src SG.\n");
 			return nents;
 		}
 		sg_pcopy_to_buffer(areq->src, nents,
-- 
GitLab


From 7e11a4fc84dcc9746936c46d9a88489a365fea45 Mon Sep 17 00:00:00 2001
From: Tomas Marek <tomas.marek@elrest.cz>
Date: Wed, 12 Oct 2022 18:09:23 +0200
Subject: [PATCH 0289/4122] hwrng: stm32 - fix number of returned bytes on read

The stm32_rng_read() function uses `retval` variable as a counter of
generated random bytes. However, the same variable is used to store
a result of the polling function in case the driver is waiting until
the TRNG is ready. The TRNG generates random numbers by 16B. One
loop read 4B. So, the function calls the polling every 16B, i.e.
every 4th loop. The `retval` counter is reset on poll call and only
number of bytes read after the last poll call is returned to the
caller. The remaining sampled random bytes (for example 48 out of
64 in case 64 bytes are read) are not used.

Use different variable to store the polling function result and
do not overwrite `retval` counter.

Cc: Oleg Karfich <oleg.karfich@wago.com>
Signed-off-by: Tomas Marek <tomas.marek@elrest.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/stm32-rng.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index bc22178f83e83..8eaacefd498bb 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -49,11 +49,13 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 		/* Manage timeout which is based on timer and take */
 		/* care of initial delay time when enabling rng	*/
 		if (!sr && wait) {
-			retval = readl_relaxed_poll_timeout_atomic(priv->base
+			int ret;
+
+			ret = readl_relaxed_poll_timeout_atomic(priv->base
 								   + RNG_SR,
 								   sr, sr,
 								   10, 50000);
-			if (retval)
+			if (ret)
 				dev_err((struct device *)priv->rng.priv,
 					"%s: timeout %x!\n", __func__, sr);
 		}
-- 
GitLab


From e64f57e8cd5abe167cdf453869d6274608480519 Mon Sep 17 00:00:00 2001
From: Tomas Marek <tomas.marek@elrest.cz>
Date: Wed, 12 Oct 2022 18:09:24 +0200
Subject: [PATCH 0290/4122] hwrng: stm32 - fix read of the last word

The stm32_rng_read() function samples TRNG by 4 bytes until at
least 5 bytes are free in the input buffer. The last four bytes
are never read. For example, 60 bytes are returned in case the
input buffer size is 64 bytes.

Read until at least 4 bytes are free in the input buffer. Fill
the buffer entirely in case the buffer size is divisible by 4.

Cc: Oleg Karfich <oleg.karfich@wago.com>
Signed-off-by: Tomas Marek <tomas.marek@elrest.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/stm32-rng.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 8eaacefd498bb..366edda4848b6 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -44,7 +44,7 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 
 	pm_runtime_get_sync((struct device *) priv->rng.priv);
 
-	while (max > sizeof(u32)) {
+	while (max >= sizeof(u32)) {
 		sr = readl_relaxed(priv->base + RNG_SR);
 		/* Manage timeout which is based on timer and take */
 		/* care of initial delay time when enabling rng	*/
-- 
GitLab


From 38bcb51f81af17a6d40fc135e565fc1fb8aa8e9d Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Mon, 17 Oct 2022 16:10:53 +0800
Subject: [PATCH 0291/4122] leds-pca955x: Remove the unused function
 pca95xx_num_led_regs()

The function pca95xx_num_led_regs() is defined in the leds-pca955x.c
file, but not called elsewhere, so delete this unused function.

drivers/leds/leds-pca955x.c:149:19: warning: unused function 'pca95xx_num_led_regs'.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2411
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-pca955x.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/leds/leds-pca955x.c b/drivers/leds/leds-pca955x.c
index 81aaf21212d7d..33ec4543fb4f8 100644
--- a/drivers/leds/leds-pca955x.c
+++ b/drivers/leds/leds-pca955x.c
@@ -145,12 +145,6 @@ static inline int pca95xx_num_input_regs(int bits)
 	return (bits + 7) / 8;
 }
 
-/* 4 bits per LED selector register */
-static inline int pca95xx_num_led_regs(int bits)
-{
-	return (bits + 3)  / 4;
-}
-
 /*
  * Return an LED selector register value based on an existing one, with
  * the appropriate 2-bit state value set for the given LED number (0-3).
-- 
GitLab


From fb0f4051ee8e0ae89697e417f1a547e715acc824 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 20 Oct 2022 22:07:31 +0100
Subject: [PATCH 0292/4122] leds: lp55xx: remove variable j

The variable j being incremented but it is never referenced,
it is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-lp55xx-common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index 9fdfc1b9a1a0c..ca2e28fb843f1 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -166,7 +166,7 @@ static int lp55xx_init_led(struct lp55xx_led *led,
 	struct mc_subled *mc_led_info;
 	struct led_classdev *led_cdev;
 	char name[32];
-	int i, j = 0;
+	int i;
 	int ret;
 
 	if (chan >= max_channel) {
@@ -201,7 +201,6 @@ static int lp55xx_init_led(struct lp55xx_led *led,
 				pdata->led_config[chan].color_id[i];
 			mc_led_info[i].channel =
 					pdata->led_config[chan].output_num[i];
-			j++;
 		}
 
 		led->mc_cdev.subled_info = mc_led_info;
-- 
GitLab


From 621a323c3a7e23b364deaddf769e731f2da6ff03 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Tue, 18 Oct 2022 16:12:35 +0800
Subject: [PATCH 0293/4122] scsi: target: Remove the unused function
 transport_lba_64_ext()

The function transport_lba_64_ext() is defined in the target_core_sbc.c
file, but not called elsewhere, so remove this unused function.

drivers/target/target_core_sbc.c:276:34: warning: unused function 'transport_lba_64_ext'.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2427
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221018081235.124662-1-jiapeng.chong@linux.alibaba.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_sbc.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 1e3216de1e04d..1cd41e3834bb0 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -270,14 +270,6 @@ static inline unsigned long long transport_lba_64(unsigned char *cdb)
 	return get_unaligned_be64(&cdb[2]);
 }
 
-/*
- * For VARIABLE_LENGTH_CDB w/ 32 byte extended CDBs
- */
-static inline unsigned long long transport_lba_64_ext(unsigned char *cdb)
-{
-	return get_unaligned_be64(&cdb[12]);
-}
-
 static sense_reason_t
 sbc_setup_write_same(struct se_cmd *cmd, unsigned char flags, struct sbc_ops *ops)
 {
-- 
GitLab


From a9ee3f840646e2ec419c734e592ffe997195435e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:15:57 +0800
Subject: [PATCH 0294/4122] scsi: libsas: Add sas_task_find_rq()

blk-mq already provides a unique tag per request. Some libsas LLDDs - like
hisi_sas - already use this tag as the unique per-I/O HW tag.

Add a common function to provide the request associated with a sas_task for
all libsas LLDDs.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-2-git-send-email-john.garry@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/libsas.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index ec6c9ecd8d12a..1aee3d0ebbb2e 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -644,6 +644,24 @@ static inline bool sas_is_internal_abort(struct sas_task *task)
 	return task->task_proto == SAS_PROTOCOL_INTERNAL_ABORT;
 }
 
+static inline struct request *sas_task_find_rq(struct sas_task *task)
+{
+	struct scsi_cmnd *scmd;
+
+	if (task->task_proto & SAS_PROTOCOL_STP_ALL) {
+		struct ata_queued_cmd *qc = task->uldd_task;
+
+		scmd = qc ? qc->scsicmd : NULL;
+	} else {
+		scmd = task->uldd_task;
+	}
+
+	if (!scmd)
+		return NULL;
+
+	return scsi_cmd_to_rq(scmd);
+}
+
 struct sas_domain_function_template {
 	/* The class calls these to notify the LLDD of an event. */
 	void (*lldd_port_formed)(struct asd_sas_phy *);
-- 
GitLab


From 295fd2330a91f295522ad2b7fe2109833ae32e33 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:15:58 +0800
Subject: [PATCH 0295/4122] scsi: hisi_sas: Use sas_task_find_rq()

Use sas_task_find_rq() to lookup the request per task for its driver tag.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-3-git-send-email-john.garry@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 26 ++++++++------------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 10813836a7288..26e474b0f53f2 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -177,13 +177,13 @@ static void hisi_sas_slot_index_set(struct hisi_hba *hisi_hba, int slot_idx)
 }
 
 static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
-				     struct scsi_cmnd *scsi_cmnd)
+				     struct request *rq)
 {
 	int index;
 	void *bitmap = hisi_hba->slot_index_tags;
 
-	if (scsi_cmnd)
-		return scsi_cmd_to_rq(scsi_cmnd)->tag;
+	if (rq)
+		return rq->tag;
 
 	spin_lock(&hisi_hba->lock);
 	index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count,
@@ -461,11 +461,11 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags)
 	struct asd_sas_port *sas_port = device->port;
 	struct hisi_sas_device *sas_dev = device->lldd_dev;
 	bool internal_abort = sas_is_internal_abort(task);
-	struct scsi_cmnd *scmd = NULL;
 	struct hisi_sas_dq *dq = NULL;
 	struct hisi_sas_port *port;
 	struct hisi_hba *hisi_hba;
 	struct hisi_sas_slot *slot;
+	struct request *rq = NULL;
 	struct device *dev;
 	int rc;
 
@@ -520,22 +520,12 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags)
 				return -ECOMM;
 		}
 
-		if (task->uldd_task) {
-			struct ata_queued_cmd *qc;
-
-			if (dev_is_sata(device)) {
-				qc = task->uldd_task;
-				scmd = qc->scsicmd;
-			} else {
-				scmd = task->uldd_task;
-			}
-		}
-
-		if (scmd) {
+		rq = sas_task_find_rq(task);
+		if (rq) {
 			unsigned int dq_index;
 			u32 blk_tag;
 
-			blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+			blk_tag = blk_mq_unique_tag(rq);
 			dq_index = blk_mq_unique_tag_to_hwq(blk_tag);
 			dq = &hisi_hba->dq[dq_index];
 		} else {
@@ -580,7 +570,7 @@ static int hisi_sas_queue_command(struct sas_task *task, gfp_t gfp_flags)
 	if (!internal_abort && hisi_hba->hw->slot_index_alloc)
 		rc = hisi_hba->hw->slot_index_alloc(hisi_hba, device);
 	else
-		rc = hisi_sas_slot_index_alloc(hisi_hba, scmd);
+		rc = hisi_sas_slot_index_alloc(hisi_hba, rq);
 
 	if (rc < 0)
 		goto err_out_dif_dma_unmap;
-- 
GitLab


From f7d190a94e35a2784af8871e275b86e68ff8034a Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:15:59 +0800
Subject: [PATCH 0296/4122] scsi: hisi_sas: Put reserved tags in lower region
 of tagset

To be consistent with blk-mq, put the reserved tags in the lower region of
the tagset. Eventually we hope to get rid of all this reserved tag
management.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-4-git-send-email-john.garry@huawei.com
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 26e474b0f53f2..54860d2524667 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -183,16 +183,16 @@ static int hisi_sas_slot_index_alloc(struct hisi_hba *hisi_hba,
 	void *bitmap = hisi_hba->slot_index_tags;
 
 	if (rq)
-		return rq->tag;
+		return rq->tag + HISI_SAS_RESERVED_IPTT;
 
 	spin_lock(&hisi_hba->lock);
-	index = find_next_zero_bit(bitmap, hisi_hba->slot_index_count,
+	index = find_next_zero_bit(bitmap, HISI_SAS_RESERVED_IPTT,
 				   hisi_hba->last_slot_index + 1);
-	if (index >= hisi_hba->slot_index_count) {
+	if (index >= HISI_SAS_RESERVED_IPTT) {
 		index = find_next_zero_bit(bitmap,
-				hisi_hba->slot_index_count,
-				HISI_SAS_UNRESERVED_IPTT);
-		if (index >= hisi_hba->slot_index_count) {
+				HISI_SAS_RESERVED_IPTT,
+				0);
+		if (index >= HISI_SAS_RESERVED_IPTT) {
 			spin_unlock(&hisi_hba->lock);
 			return -SAS_QUEUE_FULL;
 		}
@@ -2216,7 +2216,7 @@ int hisi_sas_alloc(struct hisi_hba *hisi_hba)
 	if (!hisi_hba->sata_breakpoint)
 		goto err_out;
 
-	hisi_hba->last_slot_index = HISI_SAS_UNRESERVED_IPTT;
+	hisi_hba->last_slot_index = 0;
 
 	hisi_hba->wq = create_singlethread_workqueue(dev_name(dev));
 	if (!hisi_hba->wq) {
-- 
GitLab


From 1baa70d36403aa572453eee9fdd4f637455ecaaf Mon Sep 17 00:00:00 2001
From: Igor Pylypiv <ipylypiv@google.com>
Date: Tue, 18 Oct 2022 19:16:00 +0800
Subject: [PATCH 0297/4122] scsi: pm8001: Remove pm8001_tag_init()

In commit 5a141315ed7c ("scsi: pm80xx: Increase the number of outstanding
I/O supported to 1024") the pm8001_ha->tags allocation was moved into
pm8001_init_ccb_tag(). This changed the execution order of allocation.
pm8001_tag_init() used to be called after the pm8001_ha->tags allocation
and now it is called before the allocation.

Before:

pm8001_pci_probe()
`--> pm8001_pci_alloc()
     `--> pm8001_alloc()
          `--> pm8001_ha->tags = kzalloc(...)
          `--> pm8001_tag_init(pm8001_ha); // OK: tags are allocated

After:

pm8001_pci_probe()
`--> pm8001_pci_alloc()
|    `--> pm8001_alloc()
|         `--> pm8001_tag_init(pm8001_ha); // NOK: tags are not allocated
|
`--> pm8001_init_ccb_tag()
     `-->  pm8001_ha->tags = kzalloc(...) // today it is bitmap_zalloc()

Since pm8001_ha->tags_num is zero when pm8001_tag_init() is called it does
nothing. Tags memory is allocated with bitmap_zalloc() so there is no need
to manually clear each bit with pm8001_tag_free().

Reviewed-by: Changyuan Lyu <changyuanl@google.com>
Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-5-git-send-email-john.garry@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_init.c | 2 --
 drivers/scsi/pm8001/pm8001_sas.c  | 7 -------
 drivers/scsi/pm8001/pm8001_sas.h  | 1 -
 3 files changed, 10 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 2ff2fac1e403d..040a8280f23bc 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -436,8 +436,6 @@ static int pm8001_alloc(struct pm8001_hba_info *pm8001_ha,
 		atomic_set(&pm8001_ha->devices[i].running_req, 0);
 	}
 	pm8001_ha->flags = PM8001F_INIT_TIME;
-	/* Initialize tags */
-	pm8001_tag_init(pm8001_ha);
 	return 0;
 
 err_out_nodev:
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 51230b8271492..c9fa3328f3fac 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -96,13 +96,6 @@ int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out)
 	return 0;
 }
 
-void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha)
-{
-	int i;
-	for (i = 0; i < pm8001_ha->tags_num; ++i)
-		pm8001_tag_free(pm8001_ha, i);
-}
-
 /**
  * pm8001_mem_alloc - allocate memory for pm8001.
  * @pdev: pci device.
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index 16a753d5e8a7d..ecb98bc5a8d0e 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -632,7 +632,6 @@ extern struct workqueue_struct *pm8001_wq;
 
 /******************** function prototype *********************/
 int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out);
-void pm8001_tag_init(struct pm8001_hba_info *pm8001_ha);
 u32 pm8001_get_ncq_tag(struct sas_task *task, u32 *tag);
 void pm8001_ccb_task_free(struct pm8001_hba_info *pm8001_ha,
 			  struct pm8001_ccb_info *ccb);
-- 
GitLab


From 6472cfb418a0ba783a469deeb6586fb2f133c268 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:16:01 +0800
Subject: [PATCH 0298/4122] scsi: pm8001: Use sas_task_find_rq() for tagging

The request associated with a SCSI command coming from the block layer has
a unique tag, so use that when possible for getting a CCB.

Unfortunately we don't support reserved commands in the SCSI midlayer yet,
so in the interim continue to manage those tags internally (along with
tags for private commands).

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-6-git-send-email-john.garry@huawei.com
Reviewed-by: Jack Wang <jinpu.wang@ionos.com>
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_init.c | 12 ++++--------
 drivers/scsi/pm8001/pm8001_sas.c  | 13 +++++++++----
 drivers/scsi/pm8001/pm8001_sas.h  | 11 ++++++++---
 drivers/scsi/pm8001/pm80xx_hwi.c  | 19 +++----------------
 4 files changed, 24 insertions(+), 31 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_init.c b/drivers/scsi/pm8001/pm8001_init.c
index 040a8280f23bc..a1df61205b20b 100644
--- a/drivers/scsi/pm8001/pm8001_init.c
+++ b/drivers/scsi/pm8001/pm8001_init.c
@@ -196,7 +196,7 @@ static void pm8001_free(struct pm8001_hba_info *pm8001_ha)
 	}
 	PM8001_CHIP_DISP->chip_iounmap(pm8001_ha);
 	flush_workqueue(pm8001_wq);
-	bitmap_free(pm8001_ha->tags);
+	bitmap_free(pm8001_ha->rsvd_tags);
 	kfree(pm8001_ha);
 }
 
@@ -1208,18 +1208,15 @@ static int pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha)
 	struct Scsi_Host *shost = pm8001_ha->shost;
 	struct device *dev = pm8001_ha->dev;
 	u32 max_out_io, ccb_count;
-	u32 can_queue;
 	int i;
 
 	max_out_io = pm8001_ha->main_cfg_tbl.pm80xx_tbl.max_out_io;
 	ccb_count = min_t(int, PM8001_MAX_CCB, max_out_io);
 
-	/* Update to the scsi host*/
-	can_queue = ccb_count - PM8001_RESERVE_SLOT;
-	shost->can_queue = can_queue;
+	shost->can_queue = ccb_count - PM8001_RESERVE_SLOT;
 
-	pm8001_ha->tags = bitmap_zalloc(ccb_count, GFP_KERNEL);
-	if (!pm8001_ha->tags)
+	pm8001_ha->rsvd_tags = bitmap_zalloc(PM8001_RESERVE_SLOT, GFP_KERNEL);
+	if (!pm8001_ha->rsvd_tags)
 		goto err_out;
 
 	/* Memory region for ccb_info*/
@@ -1244,7 +1241,6 @@ static int pm8001_init_ccb_tag(struct pm8001_hba_info *pm8001_ha)
 		pm8001_ha->ccb_info[i].task = NULL;
 		pm8001_ha->ccb_info[i].ccb_tag = PM8001_INVALID_TAG;
 		pm8001_ha->ccb_info[i].device = NULL;
-		++pm8001_ha->tags_num;
 	}
 
 	return 0;
diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index c9fa3328f3fac..2359e827c9e61 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -65,9 +65,12 @@ static int pm8001_find_tag(struct sas_task *task, u32 *tag)
   */
 void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag)
 {
-	void *bitmap = pm8001_ha->tags;
+	void *bitmap = pm8001_ha->rsvd_tags;
 	unsigned long flags;
 
+	if (tag >= PM8001_RESERVE_SLOT)
+		return;
+
 	spin_lock_irqsave(&pm8001_ha->bitmap_lock, flags);
 	__clear_bit(tag, bitmap);
 	spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags);
@@ -80,18 +83,20 @@ void pm8001_tag_free(struct pm8001_hba_info *pm8001_ha, u32 tag)
   */
 int pm8001_tag_alloc(struct pm8001_hba_info *pm8001_ha, u32 *tag_out)
 {
-	void *bitmap = pm8001_ha->tags;
+	void *bitmap = pm8001_ha->rsvd_tags;
 	unsigned long flags;
 	unsigned int tag;
 
 	spin_lock_irqsave(&pm8001_ha->bitmap_lock, flags);
-	tag = find_first_zero_bit(bitmap, pm8001_ha->tags_num);
-	if (tag >= pm8001_ha->tags_num) {
+	tag = find_first_zero_bit(bitmap, PM8001_RESERVE_SLOT);
+	if (tag >= PM8001_RESERVE_SLOT) {
 		spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags);
 		return -SAS_QUEUE_FULL;
 	}
 	__set_bit(tag, bitmap);
 	spin_unlock_irqrestore(&pm8001_ha->bitmap_lock, flags);
+
+	/* reserved tags are in the lower region of the tagset */
 	*tag_out = tag;
 	return 0;
 }
diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index ecb98bc5a8d0e..cf5f1b091959f 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -510,8 +510,7 @@ struct pm8001_hba_info {
 	u32			chip_id;
 	const struct pm8001_chip_info	*chip;
 	struct completion	*nvmd_completion;
-	int			tags_num;
-	unsigned long		*tags;
+	unsigned long		*rsvd_tags;
 	struct pm8001_phy	phy[PM8001_MAX_PHYS];
 	struct pm8001_port	port[PM8001_MAX_PHYS];
 	u32			id;
@@ -736,9 +735,15 @@ pm8001_ccb_alloc(struct pm8001_hba_info *pm8001_ha,
 		 struct pm8001_device *dev, struct sas_task *task)
 {
 	struct pm8001_ccb_info *ccb;
+	struct request *rq = NULL;
 	u32 tag;
 
-	if (pm8001_tag_alloc(pm8001_ha, &tag)) {
+	if (task)
+		rq = sas_task_find_rq(task);
+
+	if (rq) {
+		tag = rq->tag + PM8001_RESERVE_SLOT;
+	} else if (pm8001_tag_alloc(pm8001_ha, &tag)) {
 		pm8001_dbg(pm8001_ha, FAIL, "Failed to allocate a tag\n");
 		return NULL;
 	}
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index 4484c498bcb62..bc71db442dd9c 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -4247,25 +4247,12 @@ static int check_enc_sat_cmd(struct sas_task *task)
 
 static u32 pm80xx_chip_get_q_index(struct sas_task *task)
 {
-	struct scsi_cmnd *scmd = NULL;
-	u32 blk_tag;
+	struct request *rq = sas_task_find_rq(task);
 
-	if (task->uldd_task) {
-		struct ata_queued_cmd *qc;
-
-		if (dev_is_sata(task->dev)) {
-			qc = task->uldd_task;
-			scmd = qc->scsicmd;
-		} else {
-			scmd = task->uldd_task;
-		}
-	}
-
-	if (!scmd)
+	if (!rq)
 		return 0;
 
-	blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
-	return blk_mq_unique_tag_to_hwq(blk_tag);
+	return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(rq));
 }
 
 /**
-- 
GitLab


From ffc9f9bf3f14876d019f67ef17d41138802529a8 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:16:02 +0800
Subject: [PATCH 0299/4122] scsi: mvsas: Delete mvs_tag_init()

All mvs_tag_init() does is zero the tag bitmap, but this is already done
with the kzalloc() call to alloc the tags, so delete this unneeded
function.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-7-git-send-email-john.garry@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mvsas/mv_init.c | 2 --
 drivers/scsi/mvsas/mv_sas.c  | 7 -------
 drivers/scsi/mvsas/mv_sas.h  | 1 -
 3 files changed, 10 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index 2fde496fff5f7..c85fb812ad437 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -286,8 +286,6 @@ static int mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
 	}
 	mvi->tags_num = slot_nr;
 
-	/* Initialize tags */
-	mvs_tag_init(mvi);
 	return 0;
 err_out:
 	return 1;
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index bf7d4995b2574..3aed5e3e0c8cd 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -51,13 +51,6 @@ inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
 	return 0;
 }
 
-void mvs_tag_init(struct mvs_info *mvi)
-{
-	int i;
-	for (i = 0; i < mvi->tags_num; ++i)
-		mvs_tag_clear(mvi, i);
-}
-
 static struct mvs_info *mvs_find_dev_mvi(struct domain_device *dev)
 {
 	unsigned long i = 0, j = 0, hi = 0;
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index 509d8f32a04ff..fe57665bdb505 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -428,7 +428,6 @@ void mvs_tag_clear(struct mvs_info *mvi, u32 tag);
 void mvs_tag_free(struct mvs_info *mvi, u32 tag);
 void mvs_tag_set(struct mvs_info *mvi, unsigned int tag);
 int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out);
-void mvs_tag_init(struct mvs_info *mvi);
 void mvs_iounmap(void __iomem *regs);
 int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex);
 void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard);
-- 
GitLab


From 2acf97f199f9eba8321390325519e9b6bff60108 Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Tue, 18 Oct 2022 19:16:03 +0800
Subject: [PATCH 0300/4122] scsi: mvsas: Use sas_task_find_rq() for tagging

The request associated with a SCSI command coming from the block layer has
a unique tag, so use that when possible for getting a slot.

Unfortunately we don't support reserved commands in the SCSI midlayer yet.
As such, SMP tasks - as an example - will not have a request associated, so
in the interim continue to manage those tags for that type of sas_task
internally.

We reserve an arbitrary 4 tags for these internal tags. Indeed, we already
decrement MVS_RSVD_SLOTS by 2 for the shost can_queue when flag
MVF_FLAG_SOC is set. This change was made in commit 20b09c2992fe ("[SCSI]
mvsas: add support for 94xx; layout change; bug fixes"), but what those 2
slots are used for is not obvious.

Also make the tag management functions static, where possible.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666091763-11023-8-git-send-email-john.garry@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mvsas/mv_defs.h |  1 +
 drivers/scsi/mvsas/mv_init.c |  9 +++++----
 drivers/scsi/mvsas/mv_sas.c  | 35 ++++++++++++++++++++++-------------
 drivers/scsi/mvsas/mv_sas.h  |  7 +------
 4 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/scsi/mvsas/mv_defs.h b/drivers/scsi/mvsas/mv_defs.h
index 7123a2efbf583..8ef174cd4d374 100644
--- a/drivers/scsi/mvsas/mv_defs.h
+++ b/drivers/scsi/mvsas/mv_defs.h
@@ -40,6 +40,7 @@ enum driver_configuration {
 	MVS_ATA_CMD_SZ		= 96,	/* SATA command table buffer size */
 	MVS_OAF_SZ		= 64,	/* Open address frame buffer size */
 	MVS_QUEUE_SIZE		= 64,	/* Support Queue depth */
+	MVS_RSVD_SLOTS		= 4,
 	MVS_SOC_CAN_QUEUE	= MVS_SOC_SLOTS - 2,
 };
 
diff --git a/drivers/scsi/mvsas/mv_init.c b/drivers/scsi/mvsas/mv_init.c
index c85fb812ad437..cfe84473a5153 100644
--- a/drivers/scsi/mvsas/mv_init.c
+++ b/drivers/scsi/mvsas/mv_init.c
@@ -142,7 +142,7 @@ static void mvs_free(struct mvs_info *mvi)
 		scsi_host_put(mvi->shost);
 	list_for_each_entry(mwq, &mvi->wq_list, entry)
 		cancel_delayed_work(&mwq->work_q);
-	kfree(mvi->tags);
+	kfree(mvi->rsvd_tags);
 	kfree(mvi);
 }
 
@@ -284,7 +284,6 @@ static int mvs_alloc(struct mvs_info *mvi, struct Scsi_Host *shost)
 			printk(KERN_DEBUG "failed to create dma pool %s.\n", pool_name);
 			goto err_out;
 	}
-	mvi->tags_num = slot_nr;
 
 	return 0;
 err_out:
@@ -367,8 +366,8 @@ static struct mvs_info *mvs_pci_alloc(struct pci_dev *pdev,
 	mvi->sas = sha;
 	mvi->shost = shost;
 
-	mvi->tags = kzalloc(MVS_CHIP_SLOT_SZ>>3, GFP_KERNEL);
-	if (!mvi->tags)
+	mvi->rsvd_tags = bitmap_zalloc(MVS_RSVD_SLOTS, GFP_KERNEL);
+	if (!mvi->rsvd_tags)
 		goto err_out;
 
 	if (MVS_CHIP_DISP->chip_ioremap(mvi))
@@ -469,6 +468,8 @@ static void  mvs_post_sas_ha_init(struct Scsi_Host *shost,
 	else
 		can_queue = MVS_CHIP_SLOT_SZ;
 
+	can_queue -= MVS_RSVD_SLOTS;
+
 	shost->sg_tablesize = min_t(u16, SG_ALL, MVS_MAX_SG);
 	shost->can_queue = can_queue;
 	mvi->shost->cmd_per_lun = MVS_QUEUE_SIZE;
diff --git a/drivers/scsi/mvsas/mv_sas.c b/drivers/scsi/mvsas/mv_sas.c
index 3aed5e3e0c8cd..9978c424214cd 100644
--- a/drivers/scsi/mvsas/mv_sas.c
+++ b/drivers/scsi/mvsas/mv_sas.c
@@ -20,31 +20,34 @@ static int mvs_find_tag(struct mvs_info *mvi, struct sas_task *task, u32 *tag)
 	return 0;
 }
 
-void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
+static void mvs_tag_clear(struct mvs_info *mvi, u32 tag)
 {
-	void *bitmap = mvi->tags;
+	void *bitmap = mvi->rsvd_tags;
 	clear_bit(tag, bitmap);
 }
 
-void mvs_tag_free(struct mvs_info *mvi, u32 tag)
+static void mvs_tag_free(struct mvs_info *mvi, u32 tag)
 {
+	if (tag >= MVS_RSVD_SLOTS)
+		return;
+
 	mvs_tag_clear(mvi, tag);
 }
 
-void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
+static void mvs_tag_set(struct mvs_info *mvi, unsigned int tag)
 {
-	void *bitmap = mvi->tags;
+	void *bitmap = mvi->rsvd_tags;
 	set_bit(tag, bitmap);
 }
 
-inline int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
+static int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out)
 {
 	unsigned int index, tag;
-	void *bitmap = mvi->tags;
+	void *bitmap = mvi->rsvd_tags;
 
-	index = find_first_zero_bit(bitmap, mvi->tags_num);
+	index = find_first_zero_bit(bitmap, MVS_RSVD_SLOTS);
 	tag = index;
-	if (tag >= mvi->tags_num)
+	if (tag >= MVS_RSVD_SLOTS)
 		return -SAS_QUEUE_FULL;
 	mvs_tag_set(mvi, tag);
 	*tag_out = tag;
@@ -696,6 +699,7 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf
 	struct mvs_task_exec_info tei;
 	struct mvs_slot_info *slot;
 	u32 tag = 0xdeadbeef, n_elem = 0;
+	struct request *rq;
 	int rc = 0;
 
 	if (!dev->port) {
@@ -760,9 +764,14 @@ static int mvs_task_prep(struct sas_task *task, struct mvs_info *mvi, int is_tmf
 		n_elem = task->num_scatter;
 	}
 
-	rc = mvs_tag_alloc(mvi, &tag);
-	if (rc)
-		goto err_out;
+	rq = sas_task_find_rq(task);
+	if (rq) {
+		tag = rq->tag + MVS_RSVD_SLOTS;
+	} else {
+		rc = mvs_tag_alloc(mvi, &tag);
+		if (rc)
+			goto err_out;
+	}
 
 	slot = &mvi->slot_info[tag];
 
@@ -857,7 +866,7 @@ int mvs_queue_command(struct sas_task *task, gfp_t gfp_flags)
 static void mvs_slot_free(struct mvs_info *mvi, u32 rx_desc)
 {
 	u32 slot_idx = rx_desc & RXQ_SLOT_MASK;
-	mvs_tag_clear(mvi, slot_idx);
+	mvs_tag_free(mvi, slot_idx);
 }
 
 static void mvs_slot_task_free(struct mvs_info *mvi, struct sas_task *task,
diff --git a/drivers/scsi/mvsas/mv_sas.h b/drivers/scsi/mvsas/mv_sas.h
index fe57665bdb505..68df771e29759 100644
--- a/drivers/scsi/mvsas/mv_sas.h
+++ b/drivers/scsi/mvsas/mv_sas.h
@@ -370,8 +370,7 @@ struct mvs_info {
 	u32 chip_id;
 	const struct mvs_chip_info *chip;
 
-	int tags_num;
-	unsigned long *tags;
+	unsigned long *rsvd_tags;
 	/* further per-slot information */
 	struct mvs_phy phy[MVS_MAX_PHYS];
 	struct mvs_port port[MVS_MAX_PHYS];
@@ -424,10 +423,6 @@ struct mvs_task_exec_info {
 
 /******************** function prototype *********************/
 void mvs_get_sas_addr(void *buf, u32 buflen);
-void mvs_tag_clear(struct mvs_info *mvi, u32 tag);
-void mvs_tag_free(struct mvs_info *mvi, u32 tag);
-void mvs_tag_set(struct mvs_info *mvi, unsigned int tag);
-int mvs_tag_alloc(struct mvs_info *mvi, u32 *tag_out);
 void mvs_iounmap(void __iomem *regs);
 int mvs_ioremap(struct mvs_info *mvi, int bar, int bar_ex);
 void mvs_phys_reset(struct mvs_info *mvi, u32 phy_mask, int hard);
-- 
GitLab


From 5f62639dc2b668d8fa3bd3d4a92cf6e51b7574c6 Mon Sep 17 00:00:00 2001
From: Igor Pylypiv <ipylypiv@google.com>
Date: Fri, 7 Oct 2022 16:07:51 -0700
Subject: [PATCH 0301/4122] scsi: pm80xx: Remove unused reset_in_progress flag
 logic

The reset_in_progress flag was never set.

Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
Link: https://lore.kernel.org/r/20221007230751.309363-1-ipylypiv@google.com
Reviewed-by: Andrew Konecki <awkonecki@google.com>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_sas.h | 1 -
 drivers/scsi/pm8001/pm80xx_hwi.c | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/drivers/scsi/pm8001/pm8001_sas.h b/drivers/scsi/pm8001/pm8001_sas.h
index cf5f1b091959f..dc1f4d958e03d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.h
+++ b/drivers/scsi/pm8001/pm8001_sas.h
@@ -534,7 +534,6 @@ struct pm8001_hba_info {
 	bool			controller_fatal_error;
 	const struct firmware 	*fw_image;
 	struct isr_param irq_vector[PM8001_MAX_MSIX_VEC];
-	u32			reset_in_progress;
 	u32			non_fatal_count;
 	u32			non_fatal_read_length;
 	u32 max_q_num;
diff --git a/drivers/scsi/pm8001/pm80xx_hwi.c b/drivers/scsi/pm8001/pm80xx_hwi.c
index bc71db442dd9c..9584cadc42013 100644
--- a/drivers/scsi/pm8001/pm80xx_hwi.c
+++ b/drivers/scsi/pm8001/pm80xx_hwi.c
@@ -3440,10 +3440,6 @@ static int mpi_hw_event(struct pm8001_hba_info *pm8001_ha, void *piomb)
 	case HW_EVENT_PHY_DOWN:
 		pm8001_dbg(pm8001_ha, MSG, "HW_EVENT_PHY_DOWN\n");
 		hw_event_phy_down(pm8001_ha, piomb);
-		if (pm8001_ha->reset_in_progress) {
-			pm8001_dbg(pm8001_ha, MSG, "Reset in progress\n");
-			return 0;
-		}
 		phy->phy_attached = 0;
 		phy->phy_state = PHY_LINK_DISABLE;
 		break;
-- 
GitLab


From e6f8a22ff4a14aeada44c8f78dfb7503f2ca318f Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sun, 9 Oct 2022 14:02:49 +0800
Subject: [PATCH 0302/4122] scsi: qedf: Remove set but unused variable 'page'

The variable page is not used in the function, so delete it.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2348
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221009060249.40178-1-jiapeng.chong@linux.alibaba.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qedf/qedf_main.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/qedf/qedf_main.c b/drivers/scsi/qedf/qedf_main.c
index e045c6e250902..35e16600fc637 100644
--- a/drivers/scsi/qedf/qedf_main.c
+++ b/drivers/scsi/qedf/qedf_main.c
@@ -2951,7 +2951,6 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
 	int i;
 	struct scsi_bd *pbl;
 	u64 *list;
-	dma_addr_t page;
 
 	/* Alloc dma memory for BDQ buffers */
 	for (i = 0; i < QEDF_BDQ_SIZE; i++) {
@@ -3012,11 +3011,9 @@ static int qedf_alloc_bdq(struct qedf_ctx *qedf)
 	qedf->bdq_pbl_list_num_entries = qedf->bdq_pbl_mem_size /
 	    QEDF_PAGE_SIZE;
 	list = (u64 *)qedf->bdq_pbl_list;
-	page = qedf->bdq_pbl_list_dma;
 	for (i = 0; i < qedf->bdq_pbl_list_num_entries; i++) {
 		*list = qedf->bdq_pbl_dma;
 		list++;
-		page += QEDF_PAGE_SIZE;
 	}
 
 	return 0;
-- 
GitLab


From 4fc66e7b16adf054e8dc7a5cd189085b8f545091 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Mon, 17 Oct 2022 09:43:19 -0700
Subject: [PATCH 0303/4122] scsi: lpfc: Set sli4_param's cmf option to zero
 when CMF is turned off

Add missed clearing of phba->sli4_hba.pc_sli4_params.cmf when CMF is turned
off.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221017164323.14536-1-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 99d06dc7ddf6b..768294b9bc0b0 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -8354,6 +8354,7 @@ no_cmf:
 			phba->cgn_i = NULL;
 			/* Ensure CGN Mode is off */
 			phba->cmf_active_mode = LPFC_CFG_OFF;
+			sli4_params->cmf = 0;
 			return 0;
 		}
 	}
-- 
GitLab


From c44e50f4a0ec00c2298f31f91bc2c3e9bbd81c7e Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Mon, 17 Oct 2022 09:43:20 -0700
Subject: [PATCH 0304/4122] scsi: lpfc: Fix hard lockup when reading the
 rx_monitor from debugfs

During I/O and simultaneous cat of /sys/kernel/debug/lpfc/fnX/rx_monitor, a
hard lockup similar to the call trace below may occur.

The spin_lock_bh in lpfc_rx_monitor_report is not protecting from timer
interrupts as expected, so change the strength of the spin lock to _irq.

Kernel panic - not syncing: Hard LOCKUP
CPU: 3 PID: 110402 Comm: cat Kdump: loaded

exception RIP: native_queued_spin_lock_slowpath+91

[IRQ stack]
 native_queued_spin_lock_slowpath at ffffffffb814e30b
 _raw_spin_lock at ffffffffb89a667a
 lpfc_rx_monitor_record at ffffffffc0a73a36 [lpfc]
 lpfc_cmf_timer at ffffffffc0abbc67 [lpfc]
 __hrtimer_run_queues at ffffffffb8184250
 hrtimer_interrupt at ffffffffb8184ab0
 smp_apic_timer_interrupt at ffffffffb8a026ba
 apic_timer_interrupt at ffffffffb8a01c4f
[End of IRQ stack]

 apic_timer_interrupt at ffffffffb8a01c4f
 lpfc_rx_monitor_report at ffffffffc0a73c80 [lpfc]
 lpfc_rx_monitor_read at ffffffffc0addde1 [lpfc]
 full_proxy_read at ffffffffb83e7fc3
 vfs_read at ffffffffb833fe71
 ksys_read at ffffffffb83402af
 do_syscall_64 at ffffffffb800430b
 entry_SYSCALL_64_after_hwframe at ffffffffb8a000ad

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221017164323.14536-2-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 768294b9bc0b0..86ba45ac91c82 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -8150,10 +8150,10 @@ u32 lpfc_rx_monitor_report(struct lpfc_hba *phba,
 					"IO_cnt", "Info", "BWutil(ms)");
 	}
 
-	/* Needs to be _bh because record is called from timer interrupt
+	/* Needs to be _irq because record is called from timer interrupt
 	 * context
 	 */
-	spin_lock_bh(ring_lock);
+	spin_lock_irq(ring_lock);
 	while (*head_idx != *tail_idx) {
 		entry = &ring[*head_idx];
 
@@ -8197,7 +8197,7 @@ u32 lpfc_rx_monitor_report(struct lpfc_hba *phba,
 		if (cnt >= max_read_entries)
 			break;
 	}
-	spin_unlock_bh(ring_lock);
+	spin_unlock_irq(ring_lock);
 
 	return cnt;
 }
-- 
GitLab


From eaf660e4282ba11239704b2b89ae94feae2010e0 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Mon, 17 Oct 2022 09:43:21 -0700
Subject: [PATCH 0305/4122] scsi: lpfc: Log when congestion management limits
 are in effect

When bandwidth reduces from or recovers back to 100% due to congestion
management, log the event.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221017164323.14536-3-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 86ba45ac91c82..d25afc9dde142 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1848,6 +1848,18 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				  phba->cmf_link_byte_count);
 		bwpcent = div64_u64(bw * 100 + slop,
 				    phba->cmf_link_byte_count);
+
+		if (phba->cmf_max_bytes_per_interval < bw &&
+		    bwpcent > 95)
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6208 Congestion bandwidth "
+					"limits removed\n");
+		else if ((phba->cmf_max_bytes_per_interval > bw) &&
+			 ((bwpcent + pcent) <= 100) && ((bwpcent + pcent) > 95))
+			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
+					"6209 Congestion bandwidth "
+					"limits in effect\n");
+
 		if (asig) {
 			lpfc_printf_log(phba, KERN_INFO, LOG_CGN_MGMT,
 					"6237 BW Threshold %lld%% (%lld): "
-- 
GitLab


From 479b0917e4477f49df2e3be454aac3cfa5dec171 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Mon, 17 Oct 2022 09:43:22 -0700
Subject: [PATCH 0306/4122] scsi: lpfc: Create a sysfs entry called
 lpfc_xcvr_data for transceiver info

The DUMP_MEMORY mailbox command is implemented for page A0 and A2 to
retrieve transceiver information from firmware.

The mailbox command output is then formatted to print raw data values for
userspace to parse via sysfs.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221017164323.14536-4-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 118 +++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_crtn.h |   3 +
 drivers/scsi/lpfc/lpfc_els.c  | 128 ++++++++++++++++++++++++++++++++++
 drivers/scsi/lpfc/lpfc_hw4.h  |   5 +-
 4 files changed, 252 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index ef1481326fd7d..030ad1d59cbd6 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -1877,6 +1877,122 @@ lpfc_set_trunking(struct lpfc_hba *phba, char *buff_out)
 	return 0;
 }
 
+static ssize_t
+lpfc_xcvr_data_show(struct device *dev, struct device_attribute *attr,
+		    char *buf)
+{
+	struct Scsi_Host  *shost = class_to_shost(dev);
+	struct lpfc_vport *vport = (struct lpfc_vport *)shost->hostdata;
+	struct lpfc_hba   *phba = vport->phba;
+	int rc;
+	int len = 0;
+	struct lpfc_rdp_context	*rdp_context;
+	u16 temperature;
+	u16 rx_power;
+	u16 tx_bias;
+	u16 tx_power;
+	u16 vcc;
+	char chbuf[128];
+	u16 wavelength = 0;
+	struct sff_trasnceiver_codes_byte7 *trasn_code_byte7;
+
+	/* Get transceiver information */
+	rdp_context = kmalloc(sizeof(*rdp_context), GFP_KERNEL);
+
+	rc = lpfc_get_sfp_info_wait(phba, rdp_context);
+	if (rc) {
+		len = scnprintf(buf, PAGE_SIZE - len, "SFP info NA:\n");
+		goto out_free_rdp;
+	}
+
+	strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_NAME], 16);
+	chbuf[16] = 0;
+
+	len = scnprintf(buf, PAGE_SIZE - len, "VendorName:\t%s\n", chbuf);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			 "VendorOUI:\t%02x-%02x-%02x\n",
+			 (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI],
+			 (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI + 1],
+			 (uint8_t)rdp_context->page_a0[SSF_VENDOR_OUI + 2]);
+	strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_PN], 16);
+	chbuf[16] = 0;
+	len += scnprintf(buf + len, PAGE_SIZE - len, "VendorPN:\t%s\n", chbuf);
+	strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_SN], 16);
+	chbuf[16] = 0;
+	len += scnprintf(buf + len, PAGE_SIZE - len, "VendorSN:\t%s\n", chbuf);
+	strncpy(chbuf, &rdp_context->page_a0[SSF_VENDOR_REV], 4);
+	chbuf[4] = 0;
+	len += scnprintf(buf + len, PAGE_SIZE - len, "VendorRev:\t%s\n", chbuf);
+	strncpy(chbuf, &rdp_context->page_a0[SSF_DATE_CODE], 8);
+	chbuf[8] = 0;
+	len += scnprintf(buf + len, PAGE_SIZE - len, "DateCode:\t%s\n", chbuf);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "Identifier:\t%xh\n",
+			 (uint8_t)rdp_context->page_a0[SSF_IDENTIFIER]);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "ExtIdentifier:\t%xh\n",
+			 (uint8_t)rdp_context->page_a0[SSF_EXT_IDENTIFIER]);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "Connector:\t%xh\n",
+			 (uint8_t)rdp_context->page_a0[SSF_CONNECTOR]);
+	wavelength = (rdp_context->page_a0[SSF_WAVELENGTH_B1] << 8) |
+		      rdp_context->page_a0[SSF_WAVELENGTH_B0];
+
+	len += scnprintf(buf + len, PAGE_SIZE - len, "Wavelength:\t%d nm\n",
+			 wavelength);
+	trasn_code_byte7 = (struct sff_trasnceiver_codes_byte7 *)
+			&rdp_context->page_a0[SSF_TRANSCEIVER_CODE_B7];
+
+	len += scnprintf(buf + len, PAGE_SIZE - len, "Speeds: \t");
+		if (*(uint8_t *)trasn_code_byte7 == 0) {
+			len += scnprintf(buf + len, PAGE_SIZE - len,
+					 "Unknown\n");
+		} else {
+			if (trasn_code_byte7->fc_sp_100MB)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "1 ");
+			if (trasn_code_byte7->fc_sp_200mb)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "2 ");
+			if (trasn_code_byte7->fc_sp_400MB)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "4 ");
+			if (trasn_code_byte7->fc_sp_800MB)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "8 ");
+			if (trasn_code_byte7->fc_sp_1600MB)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "16 ");
+			if (trasn_code_byte7->fc_sp_3200MB)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "32 ");
+			if (trasn_code_byte7->speed_chk_ecc)
+				len += scnprintf(buf + len, PAGE_SIZE - len,
+						 "64 ");
+			len += scnprintf(buf + len, PAGE_SIZE - len, "GB\n");
+		}
+	temperature = (rdp_context->page_a2[SFF_TEMPERATURE_B1] << 8 |
+		       rdp_context->page_a2[SFF_TEMPERATURE_B0]);
+	vcc = (rdp_context->page_a2[SFF_VCC_B1] << 8 |
+	       rdp_context->page_a2[SFF_VCC_B0]);
+	tx_power = (rdp_context->page_a2[SFF_TXPOWER_B1] << 8 |
+		    rdp_context->page_a2[SFF_TXPOWER_B0]);
+	tx_bias = (rdp_context->page_a2[SFF_TX_BIAS_CURRENT_B1] << 8 |
+		   rdp_context->page_a2[SFF_TX_BIAS_CURRENT_B0]);
+	rx_power = (rdp_context->page_a2[SFF_RXPOWER_B1] << 8 |
+		    rdp_context->page_a2[SFF_RXPOWER_B0]);
+
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			 "Temperature:\tx%04x C\n", temperature);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "Vcc:\t\tx%04x V\n", vcc);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			 "TxBiasCurrent:\tx%04x mA\n", tx_bias);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "TxPower:\tx%04x mW\n",
+			 tx_power);
+	len += scnprintf(buf + len, PAGE_SIZE - len, "RxPower:\tx%04x mW\n",
+			 rx_power);
+out_free_rdp:
+	kfree(rdp_context);
+	return len;
+}
+
 /**
  * lpfc_board_mode_show - Return the state of the board
  * @dev: class device that is converted into a Scsi_host.
@@ -2810,6 +2926,7 @@ static DEVICE_ATTR_RO(lpfc_drvr_version);
 static DEVICE_ATTR_RO(lpfc_enable_fip);
 static DEVICE_ATTR(board_mode, S_IRUGO | S_IWUSR,
 		   lpfc_board_mode_show, lpfc_board_mode_store);
+static DEVICE_ATTR_RO(lpfc_xcvr_data);
 static DEVICE_ATTR(issue_reset, S_IWUSR, NULL, lpfc_issue_reset);
 static DEVICE_ATTR(max_vpi, S_IRUGO, lpfc_max_vpi_show, NULL);
 static DEVICE_ATTR(used_vpi, S_IRUGO, lpfc_used_vpi_show, NULL);
@@ -5906,6 +6023,7 @@ static struct attribute *lpfc_hba_attrs[] = {
 	&dev_attr_lpfc_fcp_wait_abts_rsp.attr,
 	&dev_attr_nport_evt_cnt.attr,
 	&dev_attr_board_mode.attr,
+	&dev_attr_lpfc_xcvr_data.attr,
 	&dev_attr_max_vpi.attr,
 	&dev_attr_used_vpi.attr,
 	&dev_attr_max_rpi.attr,
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index d2d207791056c..8928f016d09e2 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -687,3 +687,6 @@ int lpfc_issue_els_qfpa(struct lpfc_vport *vport);
 
 void lpfc_sli_rpi_release(struct lpfc_vport *vport,
 			  struct lpfc_nodelist *ndlp);
+
+int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
+			   struct lpfc_rdp_context *rdp_context);
diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 863b2125fed6c..2b03210264bb0 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -7190,6 +7190,134 @@ rdp_fail:
 	return 1;
 }
 
+int lpfc_get_sfp_info_wait(struct lpfc_hba *phba,
+			   struct lpfc_rdp_context *rdp_context)
+{
+	LPFC_MBOXQ_t *mbox = NULL;
+	int rc;
+	struct lpfc_dmabuf *mp;
+	struct lpfc_dmabuf *mpsave;
+	void *virt;
+	MAILBOX_t *mb;
+
+	mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
+	if (!mbox) {
+		lpfc_printf_log(phba, KERN_WARNING, LOG_MBOX | LOG_ELS,
+				"7205 failed to allocate mailbox memory");
+		return 1;
+	}
+
+	if (lpfc_sli4_dump_page_a0(phba, mbox))
+		goto sfp_fail;
+	mp = mbox->ctx_buf;
+	mpsave = mp;
+	virt = mp->virt;
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		mb = &mbox->u.mb;
+		mb->un.varDmp.cv = 1;
+		mb->un.varDmp.co = 1;
+		mb->un.varWords[2] = 0;
+		mb->un.varWords[3] = DMP_SFF_PAGE_A0_SIZE / 4;
+		mb->un.varWords[4] = 0;
+		mb->un.varWords[5] = 0;
+		mb->un.varWords[6] = 0;
+		mb->un.varWords[7] = 0;
+		mb->un.varWords[8] = 0;
+		mb->un.varWords[9] = 0;
+		mb->un.varWords[10] = 0;
+		mbox->in_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
+		mbox->out_ext_byte_len = DMP_SFF_PAGE_A0_SIZE;
+		mbox->mbox_offset_word = 5;
+		mbox->ctx_buf = virt;
+	} else {
+		bf_set(lpfc_mbx_memory_dump_type3_length,
+		       &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A0_SIZE);
+		mbox->u.mqe.un.mem_dump_type3.addr_lo = putPaddrLow(mp->phys);
+		mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
+	}
+	mbox->vport = phba->pport;
+	mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+
+	rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
+	if (rc == MBX_NOT_FINISHED) {
+		rc = 1;
+		goto error;
+	}
+
+	if (phba->sli_rev == LPFC_SLI_REV4)
+		mp = (struct lpfc_dmabuf *)(mbox->ctx_buf);
+	else
+		mp = mpsave;
+
+	if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) {
+		rc = 1;
+		goto error;
+	}
+
+	lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a0,
+			     DMP_SFF_PAGE_A0_SIZE);
+
+	memset(mbox, 0, sizeof(*mbox));
+	memset(mp->virt, 0, DMP_SFF_PAGE_A2_SIZE);
+	INIT_LIST_HEAD(&mp->list);
+
+	/* save address for completion */
+	mbox->ctx_buf = mp;
+	mbox->vport = phba->pport;
+
+	bf_set(lpfc_mqe_command, &mbox->u.mqe, MBX_DUMP_MEMORY);
+	bf_set(lpfc_mbx_memory_dump_type3_type,
+	       &mbox->u.mqe.un.mem_dump_type3, DMP_LMSD);
+	bf_set(lpfc_mbx_memory_dump_type3_link,
+	       &mbox->u.mqe.un.mem_dump_type3, phba->sli4_hba.physical_port);
+	bf_set(lpfc_mbx_memory_dump_type3_page_no,
+	       &mbox->u.mqe.un.mem_dump_type3, DMP_PAGE_A2);
+	if (phba->sli_rev < LPFC_SLI_REV4) {
+		mb = &mbox->u.mb;
+		mb->un.varDmp.cv = 1;
+		mb->un.varDmp.co = 1;
+		mb->un.varWords[2] = 0;
+		mb->un.varWords[3] = DMP_SFF_PAGE_A2_SIZE / 4;
+		mb->un.varWords[4] = 0;
+		mb->un.varWords[5] = 0;
+		mb->un.varWords[6] = 0;
+		mb->un.varWords[7] = 0;
+		mb->un.varWords[8] = 0;
+		mb->un.varWords[9] = 0;
+		mb->un.varWords[10] = 0;
+		mbox->in_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
+		mbox->out_ext_byte_len = DMP_SFF_PAGE_A2_SIZE;
+		mbox->mbox_offset_word = 5;
+		mbox->ctx_buf = virt;
+	} else {
+		bf_set(lpfc_mbx_memory_dump_type3_length,
+		       &mbox->u.mqe.un.mem_dump_type3, DMP_SFF_PAGE_A2_SIZE);
+		mbox->u.mqe.un.mem_dump_type3.addr_lo = putPaddrLow(mp->phys);
+		mbox->u.mqe.un.mem_dump_type3.addr_hi = putPaddrHigh(mp->phys);
+	}
+
+	mbox->ctx_ndlp = (struct lpfc_rdp_context *)rdp_context;
+	rc = lpfc_sli_issue_mbox_wait(phba, mbox, 30);
+	if (bf_get(lpfc_mqe_status, &mbox->u.mqe)) {
+		rc = 1;
+		goto error;
+	}
+	rc = 0;
+
+	lpfc_sli_bemem_bcopy(mp->virt, &rdp_context->page_a2,
+			     DMP_SFF_PAGE_A2_SIZE);
+
+error:
+	mbox->ctx_buf = mpsave;
+	lpfc_mbox_rsrc_cleanup(phba, mbox, MBOX_THD_UNLOCKED);
+
+	return rc;
+
+sfp_fail:
+	mempool_free(mbox, phba->mbox_mem_pool);
+	return 1;
+}
+
 /*
  * lpfc_els_rcv_rdp - Process an unsolicited RDP ELS.
  * @vport: pointer to a host virtual N_Port data structure.
diff --git a/drivers/scsi/lpfc/lpfc_hw4.h b/drivers/scsi/lpfc/lpfc_hw4.h
index 5288fc69908a6..fb3504dbb8993 100644
--- a/drivers/scsi/lpfc/lpfc_hw4.h
+++ b/drivers/scsi/lpfc/lpfc_hw4.h
@@ -3162,7 +3162,8 @@ struct lpfc_mbx_memory_dump_type3 {
 #define SFF_LENGTH_COPPER		18
 #define SSF_LENGTH_50UM_OM3		19
 #define SSF_VENDOR_NAME			20
-#define SSF_VENDOR_OUI			36
+#define SSF_TRANSCEIVER2		36
+#define SSF_VENDOR_OUI			37
 #define SSF_VENDOR_PN			40
 #define SSF_VENDOR_REV			56
 #define SSF_WAVELENGTH_B1		60
@@ -3281,7 +3282,7 @@ struct sff_trasnceiver_codes_byte6 {
 
 struct sff_trasnceiver_codes_byte7 {
 	uint8_t fc_sp_100MB:1;   /*  100 MB/sec */
-	uint8_t reserve:1;
+	uint8_t speed_chk_ecc:1;
 	uint8_t fc_sp_200mb:1;   /*  200 MB/sec */
 	uint8_t fc_sp_3200MB:1;  /* 3200 MB/sec */
 	uint8_t fc_sp_400MB:1;   /*  400 MB/sec */
-- 
GitLab


From 24b3e45ca9c53185baec34488efcb75bbe162f7a Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Mon, 17 Oct 2022 09:43:23 -0700
Subject: [PATCH 0307/4122] scsi: lpfc: Update lpfc version to 14.2.0.8

Update lpfc version to 14.2.0.8

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221017164323.14536-5-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 192d5630a44da..378eba7b09d9f 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "14.2.0.7"
+#define LPFC_DRIVER_VERSION "14.2.0.8"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
-- 
GitLab


From 978b7922d3dca672b41bb4b8ce6c06ab77112741 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:49 -0700
Subject: [PATCH 0308/4122] scsi: core: Fix a race between scsi_done() and
 scsi_timeout()

If there is a race between scsi_done() and scsi_timeout() and if
scsi_timeout() loses the race, scsi_timeout() should not reset the request
timer. Hence change the return value for this case from BLK_EH_RESET_TIMER
into BLK_EH_DONE.

Although the block layer holds a reference on a request (req->ref) while
calling a timeout handler, restarting the timer (blk_add_timer()) while a
request is being completed is racy.

Reviewed-by: Mike Christie <michael.christie@oracle.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Hannes Reinecke <hare@suse.de>
Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Fixes: 15f73f5b3e59 ("blk-mq: move failure injection out of blk_mq_complete_request")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-2-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_error.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 6995c89792300..02520f9123066 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -343,19 +343,11 @@ enum blk_eh_timer_return scsi_timeout(struct request *req)
 
 	if (rtn == BLK_EH_DONE) {
 		/*
-		 * Set the command to complete first in order to prevent a real
-		 * completion from releasing the command while error handling
-		 * is using it. If the command was already completed, then the
-		 * lower level driver beat the timeout handler, and it is safe
-		 * to return without escalating error recovery.
-		 *
-		 * If timeout handling lost the race to a real completion, the
-		 * block layer may ignore that due to a fake timeout injection,
-		 * so return RESET_TIMER to allow error handling another shot
-		 * at this command.
+		 * If scsi_done() has already set SCMD_STATE_COMPLETE, do not
+		 * modify *scmd.
 		 */
 		if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
-			return BLK_EH_RESET_TIMER;
+			return BLK_EH_DONE;
 		if (scsi_abort_command(scmd) != SUCCESS) {
 			set_host_byte(scmd, DID_TIME_OUT);
 			scsi_eh_scmd_add(scmd);
-- 
GitLab


From dee7121e8c0a3ce41af2b02d516f54eaec32abcd Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:50 -0700
Subject: [PATCH 0309/4122] scsi: core: Change the return type of
 .eh_timed_out()

Commit 6600593cbd93 ("block: rename BLK_EH_NOT_HANDLED to BLK_EH_DONE")
made it impossible for .eh_timed_out() implementations to call
scsi_done() without causing a crash.

Restore support for SCSI timeout handlers to call scsi_done() as follows:

 * Change all .eh_timed_out() handlers as follows:

   - Change the return type into enum scsi_timeout_action.
   - Change BLK_EH_RESET_TIMER into SCSI_EH_RESET_TIMER.
   - Change BLK_EH_DONE into SCSI_EH_NOT_HANDLED.

 * In scsi_timeout(), convert the SCSI_EH_* values into BLK_EH_* values.

Reviewed-by: Lee Duncan <lduncan@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-3-bvanassche@acm.org
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 Documentation/scsi/scsi_eh.rst            |  7 +++--
 drivers/message/fusion/mptsas.c           |  8 +++---
 drivers/scsi/libiscsi.c                   | 26 +++++++++---------
 drivers/scsi/megaraid/megaraid_sas_base.c |  7 +++--
 drivers/scsi/mvumi.c                      |  4 +--
 drivers/scsi/qla4xxx/ql4_os.c             |  8 +++---
 drivers/scsi/scsi_error.c                 | 33 +++++++++++++----------
 drivers/scsi/scsi_transport_fc.c          |  7 +++--
 drivers/scsi/scsi_transport_srp.c         |  8 +++---
 drivers/scsi/storvsc_drv.c                |  4 +--
 drivers/scsi/virtio_scsi.c                |  4 +--
 include/scsi/libiscsi.h                   |  2 +-
 include/scsi/scsi_host.h                  | 14 +++++++++-
 include/scsi/scsi_transport_fc.h          |  2 +-
 include/scsi/scsi_transport_srp.h         |  2 +-
 15 files changed, 77 insertions(+), 59 deletions(-)

diff --git a/Documentation/scsi/scsi_eh.rst b/Documentation/scsi/scsi_eh.rst
index bad624fab8231..104d09e9af099 100644
--- a/Documentation/scsi/scsi_eh.rst
+++ b/Documentation/scsi/scsi_eh.rst
@@ -92,14 +92,17 @@ The timeout handler is scsi_timeout().  When a timeout occurs, this function
  1. invokes optional hostt->eh_timed_out() callback.  Return value can
     be one of
 
-    - BLK_EH_RESET_TIMER
+    - SCSI_EH_RESET_TIMER
 	This indicates that more time is required to finish the
 	command.  Timer is restarted.
 
-    - BLK_EH_DONE
+    - SCSI_EH_NOT_HANDLED
         eh_timed_out() callback did not handle the command.
 	Step #2 is taken.
 
+    - SCSI_EH_DONE
+        eh_timed_out() completed the command.
+
  2. scsi_abort_command() is invoked to schedule an asynchronous abort which may
     issue a retry scmd->allowed + 1 times.  Asynchronous aborts are not invoked
     for commands for which the SCSI_EH_ABORT_SCHEDULED flag is set (this
diff --git a/drivers/message/fusion/mptsas.c b/drivers/message/fusion/mptsas.c
index 34901bcd1ce81..88fe4a860ae51 100644
--- a/drivers/message/fusion/mptsas.c
+++ b/drivers/message/fusion/mptsas.c
@@ -1952,12 +1952,12 @@ mptsas_qcmd(struct Scsi_Host *shost, struct scsi_cmnd *SCpnt)
  *	@sc: scsi command that the midlayer is about to time out
  *
  **/
-static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
+static enum scsi_timeout_action mptsas_eh_timed_out(struct scsi_cmnd *sc)
 {
 	MPT_SCSI_HOST *hd;
 	MPT_ADAPTER   *ioc;
 	VirtDevice    *vdevice;
-	enum blk_eh_timer_return rc = BLK_EH_DONE;
+	enum scsi_timeout_action rc = SCSI_EH_NOT_HANDLED;
 
 	hd = shost_priv(sc->device->host);
 	if (hd == NULL) {
@@ -1980,7 +1980,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
 		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: ioc is in reset,"
 		    "SML need to reset the timer (sc=%p)\n",
 		    ioc->name, __func__, sc));
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 	}
 	vdevice = sc->device->hostdata;
 	if (vdevice && vdevice->vtarget && (vdevice->vtarget->inDMD
@@ -1988,7 +1988,7 @@ static enum blk_eh_timer_return mptsas_eh_timed_out(struct scsi_cmnd *sc)
 		dtmprintk(ioc, printk(MYIOC_s_WARN_FMT ": %s: target removed "
 		    "or in device removal delay (sc=%p)\n",
 		    ioc->name, __func__, sc));
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index d95f4bcdeb2ec..ef2fc860257e6 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -2071,9 +2071,9 @@ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
 		return 0;
 }
 
-enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
+enum scsi_timeout_action iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
-	enum blk_eh_timer_return rc = BLK_EH_DONE;
+	enum scsi_timeout_action rc = SCSI_EH_NOT_HANDLED;
 	struct iscsi_task *task = NULL, *running_task;
 	struct iscsi_cls_session *cls_session;
 	struct iscsi_session *session;
@@ -2093,7 +2093,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 		 * Raced with completion. Blk layer has taken ownership
 		 * so let timeout code complete it now.
 		 */
-		rc = BLK_EH_DONE;
+		rc = SCSI_EH_NOT_HANDLED;
 		spin_unlock(&session->back_lock);
 		goto done;
 	}
@@ -2102,7 +2102,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 		 * Racing with the completion path right now, so give it more
 		 * time so that path can complete it like normal.
 		 */
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		task = NULL;
 		spin_unlock(&session->back_lock);
 		goto done;
@@ -2120,21 +2120,21 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 		if (unlikely(system_state != SYSTEM_RUNNING)) {
 			sc->result = DID_NO_CONNECT << 16;
 			ISCSI_DBG_EH(session, "sc on shutdown, handled\n");
-			rc = BLK_EH_DONE;
+			rc = SCSI_EH_NOT_HANDLED;
 			goto done;
 		}
 		/*
 		 * We are probably in the middle of iscsi recovery so let
 		 * that complete and handle the error.
 		 */
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
 	conn = session->leadconn;
 	if (!conn) {
 		/* In the middle of shuting down */
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
@@ -2151,7 +2151,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 			     "Last data xfer at %lu. Last timeout was at "
 			     "%lu\n.", task->last_xfer, task->last_timeout);
 		task->have_checked_conn = false;
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
@@ -2162,7 +2162,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 	 * and can let the iscsi eh handle it
 	 */
 	if (iscsi_has_ping_timed_out(conn)) {
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
@@ -2200,7 +2200,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 				     task->last_xfer, running_task->last_xfer,
 				     task->last_timeout);
 			spin_unlock(&session->back_lock);
-			rc = BLK_EH_RESET_TIMER;
+			rc = SCSI_EH_RESET_TIMER;
 			goto done;
 		}
 	}
@@ -2216,14 +2216,14 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 	 */
 	if (READ_ONCE(conn->ping_task)) {
 		task->have_checked_conn = true;
-		rc = BLK_EH_RESET_TIMER;
+		rc = SCSI_EH_RESET_TIMER;
 		goto done;
 	}
 
 	/* Make sure there is a transport check done */
 	iscsi_send_nopout(conn, NULL);
 	task->have_checked_conn = true;
-	rc = BLK_EH_RESET_TIMER;
+	rc = SCSI_EH_RESET_TIMER;
 
 done:
 	spin_unlock_bh(&session->frwd_lock);
@@ -2232,7 +2232,7 @@ done:
 		task->last_timeout = jiffies;
 		iscsi_put_task(task);
 	}
-	ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
+	ISCSI_DBG_EH(session, "return %s\n", rc == SCSI_EH_RESET_TIMER ?
 		     "timer reset" : "shutdown or nh");
 	return rc;
 }
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 9be4ba61a076c..6940043a91ae9 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2927,15 +2927,14 @@ static int megasas_generic_reset(struct scsi_cmnd *scmd)
  * Sets the FW busy flag and reduces the host->can_queue if the
  * cmd has not been completed within the timeout period.
  */
-static enum
-blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
+static enum scsi_timeout_action megasas_reset_timer(struct scsi_cmnd *scmd)
 {
 	struct megasas_instance *instance;
 	unsigned long flags;
 
 	if (time_after(jiffies, scmd->jiffies_at_alloc +
 				(scmd_timeout * 2) * HZ)) {
-		return BLK_EH_DONE;
+		return SCSI_EH_NOT_HANDLED;
 	}
 
 	instance = (struct megasas_instance *)scmd->device->host->hostdata;
@@ -2949,7 +2948,7 @@ blk_eh_timer_return megasas_reset_timer(struct scsi_cmnd *scmd)
 
 		spin_unlock_irqrestore(instance->host->host_lock, flags);
 	}
-	return BLK_EH_RESET_TIMER;
+	return SCSI_EH_RESET_TIMER;
 }
 
 /**
diff --git a/drivers/scsi/mvumi.c b/drivers/scsi/mvumi.c
index 05d3ce9b72dba..b3dcb89186186 100644
--- a/drivers/scsi/mvumi.c
+++ b/drivers/scsi/mvumi.c
@@ -2109,7 +2109,7 @@ out_return_cmd:
 	return 0;
 }
 
-static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd)
+static enum scsi_timeout_action mvumi_timed_out(struct scsi_cmnd *scmd)
 {
 	struct mvumi_cmd *cmd = mvumi_priv(scmd)->cmd_priv;
 	struct Scsi_Host *host = scmd->device->host;
@@ -2137,7 +2137,7 @@ static enum blk_eh_timer_return mvumi_timed_out(struct scsi_cmnd *scmd)
 	mvumi_return_cmd(mhba, cmd);
 	spin_unlock_irqrestore(mhba->shost->host_lock, flags);
 
-	return BLK_EH_DONE;
+	return SCSI_EH_NOT_HANDLED;
 }
 
 static int
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 9e849f6b0d0f7..005502125b270 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -116,7 +116,7 @@ static int qla4xxx_iface_set_param(struct Scsi_Host *shost, void *data,
 static int qla4xxx_get_iface_param(struct iscsi_iface *iface,
 				   enum iscsi_param_type param_type,
 				   int param, char *buf);
-static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc);
+static enum scsi_timeout_action qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc);
 static struct iscsi_endpoint *qla4xxx_ep_connect(struct Scsi_Host *shost,
 						 struct sockaddr *dst_addr,
 						 int non_blocking);
@@ -1871,17 +1871,17 @@ exit_get_stats:
 	return;
 }
 
-static enum blk_eh_timer_return qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc)
+static enum scsi_timeout_action qla4xxx_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
 	struct iscsi_cls_session *session;
 	unsigned long flags;
-	enum blk_eh_timer_return ret = BLK_EH_DONE;
+	enum scsi_timeout_action ret = SCSI_EH_NOT_HANDLED;
 
 	session = starget_to_session(scsi_target(sc->device));
 
 	spin_lock_irqsave(&session->lock, flags);
 	if (session->state == ISCSI_SESSION_FAILED)
-		ret = BLK_EH_RESET_TIMER;
+		ret = SCSI_EH_RESET_TIMER;
 	spin_unlock_irqrestore(&session->lock, flags);
 
 	return ret;
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 02520f9123066..be2a70c5ac6dc 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -328,7 +328,6 @@ void scsi_eh_scmd_add(struct scsi_cmnd *scmd)
 enum blk_eh_timer_return scsi_timeout(struct request *req)
 {
 	struct scsi_cmnd *scmd = blk_mq_rq_to_pdu(req);
-	enum blk_eh_timer_return rtn = BLK_EH_DONE;
 	struct Scsi_Host *host = scmd->device->host;
 
 	trace_scsi_dispatch_cmd_timeout(scmd);
@@ -338,23 +337,29 @@ enum blk_eh_timer_return scsi_timeout(struct request *req)
 	if (host->eh_deadline != -1 && !host->last_reset)
 		host->last_reset = jiffies;
 
-	if (host->hostt->eh_timed_out)
-		rtn = host->hostt->eh_timed_out(scmd);
-
-	if (rtn == BLK_EH_DONE) {
-		/*
-		 * If scsi_done() has already set SCMD_STATE_COMPLETE, do not
-		 * modify *scmd.
-		 */
-		if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
+	if (host->hostt->eh_timed_out) {
+		switch (host->hostt->eh_timed_out(scmd)) {
+		case SCSI_EH_DONE:
 			return BLK_EH_DONE;
-		if (scsi_abort_command(scmd) != SUCCESS) {
-			set_host_byte(scmd, DID_TIME_OUT);
-			scsi_eh_scmd_add(scmd);
+		case SCSI_EH_RESET_TIMER:
+			return BLK_EH_RESET_TIMER;
+		case SCSI_EH_NOT_HANDLED:
+			break;
 		}
 	}
 
-	return rtn;
+	/*
+	 * If scsi_done() has already set SCMD_STATE_COMPLETE, do not modify
+	 * *scmd.
+	 */
+	if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
+		return BLK_EH_DONE;
+	if (scsi_abort_command(scmd) != SUCCESS) {
+		set_host_byte(scmd, DID_TIME_OUT);
+		scsi_eh_scmd_add(scmd);
+	}
+
+	return BLK_EH_DONE;
 }
 
 /**
diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 8934160c4a33b..0965f8a7134f0 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -2530,15 +2530,14 @@ static int fc_vport_match(struct attribute_container *cont,
  * Notes:
  *	This routine assumes no locks are held on entry.
  */
-enum blk_eh_timer_return
-fc_eh_timed_out(struct scsi_cmnd *scmd)
+enum scsi_timeout_action fc_eh_timed_out(struct scsi_cmnd *scmd)
 {
 	struct fc_rport *rport = starget_to_rport(scsi_target(scmd->device));
 
 	if (rport->port_state == FC_PORTSTATE_BLOCKED)
-		return BLK_EH_RESET_TIMER;
+		return SCSI_EH_RESET_TIMER;
 
-	return BLK_EH_DONE;
+	return SCSI_EH_NOT_HANDLED;
 }
 EXPORT_SYMBOL(fc_eh_timed_out);
 
diff --git a/drivers/scsi/scsi_transport_srp.c b/drivers/scsi/scsi_transport_srp.c
index 98a34ed10f1a0..87d0fb8dc5032 100644
--- a/drivers/scsi/scsi_transport_srp.c
+++ b/drivers/scsi/scsi_transport_srp.c
@@ -594,13 +594,13 @@ EXPORT_SYMBOL(srp_reconnect_rport);
  * @scmd: SCSI command.
  *
  * If a timeout occurs while an rport is in the blocked state, ask the SCSI
- * EH to continue waiting (BLK_EH_RESET_TIMER). Otherwise let the SCSI core
- * handle the timeout (BLK_EH_DONE).
+ * EH to continue waiting (SCSI_EH_RESET_TIMER). Otherwise let the SCSI core
+ * handle the timeout (SCSI_EH_NOT_HANDLED).
  *
  * Note: This function is called from soft-IRQ context and with the request
  * queue lock held.
  */
-enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
+enum scsi_timeout_action srp_timed_out(struct scsi_cmnd *scmd)
 {
 	struct scsi_device *sdev = scmd->device;
 	struct Scsi_Host *shost = sdev->host;
@@ -611,7 +611,7 @@ enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd)
 	return rport && rport->fast_io_fail_tmo < 0 &&
 		rport->dev_loss_tmo < 0 &&
 		i->f->reset_timer_if_blocked && scsi_device_blocked(sdev) ?
-		BLK_EH_RESET_TIMER : BLK_EH_DONE;
+		SCSI_EH_RESET_TIMER : SCSI_EH_NOT_HANDLED;
 }
 EXPORT_SYMBOL(srp_timed_out);
 
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index bc46721aa01cb..a84194d823471 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1652,13 +1652,13 @@ static int storvsc_host_reset_handler(struct scsi_cmnd *scmnd)
  * be unbounded on Azure.  Reset the timer unconditionally to give the host a
  * chance to perform EH.
  */
-static enum blk_eh_timer_return storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
+static enum scsi_timeout_action storvsc_eh_timed_out(struct scsi_cmnd *scmnd)
 {
 #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS)
 	if (scmnd->device->host->transportt == fc_transport_template)
 		return fc_eh_timed_out(scmnd);
 #endif
-	return BLK_EH_RESET_TIMER;
+	return SCSI_EH_RESET_TIMER;
 }
 
 static bool storvsc_scsi_cmd_ok(struct scsi_cmnd *scmnd)
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 2a79ab16134b1..d07d24c06b546 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -731,9 +731,9 @@ static void virtscsi_commit_rqs(struct Scsi_Host *shost, u16 hwq)
  * latencies might be higher than on bare metal.  Reset the timer
  * unconditionally to give the host a chance to perform EH.
  */
-static enum blk_eh_timer_return virtscsi_eh_timed_out(struct scsi_cmnd *scmnd)
+static enum scsi_timeout_action virtscsi_eh_timed_out(struct scsi_cmnd *scmnd)
 {
-	return BLK_EH_RESET_TIMER;
+	return SCSI_EH_RESET_TIMER;
 }
 
 static struct scsi_host_template virtscsi_host_template = {
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 654cc3918c948..695eebc6f2c89 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -393,7 +393,7 @@ extern int iscsi_eh_recover_target(struct scsi_cmnd *sc);
 extern int iscsi_eh_session_reset(struct scsi_cmnd *sc);
 extern int iscsi_eh_device_reset(struct scsi_cmnd *sc);
 extern int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc);
-extern enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc);
+extern enum scsi_timeout_action iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc);
 
 /*
  * iSCSI host helpers.
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index e71436183c0d5..587cc767bb677 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -27,6 +27,18 @@ struct scsi_transport_template;
 #define MODE_INITIATOR 0x01
 #define MODE_TARGET 0x02
 
+/**
+ * enum scsi_timeout_action - How to handle a command that timed out.
+ * @SCSI_EH_DONE: The command has already been completed.
+ * @SCSI_EH_RESET_TIMER: Reset the timer and continue waiting for completion.
+ * @SCSI_EH_NOT_HANDLED: The command has not yet finished. Abort the command.
+ */
+enum scsi_timeout_action {
+	SCSI_EH_DONE,
+	SCSI_EH_RESET_TIMER,
+	SCSI_EH_NOT_HANDLED,
+};
+
 struct scsi_host_template {
 	/*
 	 * Put fields referenced in IO submission path together in
@@ -331,7 +343,7 @@ struct scsi_host_template {
 	 *
 	 * Status: OPTIONAL
 	 */
-	enum blk_eh_timer_return (*eh_timed_out)(struct scsi_cmnd *);
+	enum scsi_timeout_action (*eh_timed_out)(struct scsi_cmnd *);
 	/*
 	 * Optional routine that allows the transport to decide if a cmd
 	 * is retryable. Return true if the transport is in a state the
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index e80a7c542c881..3dcda19d35202 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -862,7 +862,7 @@ struct fc_vport *fc_vport_create(struct Scsi_Host *shost, int channel,
 int fc_vport_terminate(struct fc_vport *vport);
 int fc_block_rport(struct fc_rport *rport);
 int fc_block_scsi_eh(struct scsi_cmnd *cmnd);
-enum blk_eh_timer_return fc_eh_timed_out(struct scsi_cmnd *scmd);
+enum scsi_timeout_action fc_eh_timed_out(struct scsi_cmnd *scmd);
 bool fc_eh_should_retry_cmd(struct scsi_cmnd *scmd);
 
 static inline struct Scsi_Host *fc_bsg_to_shost(struct bsg_job *job)
diff --git a/include/scsi/scsi_transport_srp.h b/include/scsi/scsi_transport_srp.h
index d22df12584f99..dfc78aa112ad3 100644
--- a/include/scsi/scsi_transport_srp.h
+++ b/include/scsi/scsi_transport_srp.h
@@ -118,7 +118,7 @@ extern int srp_reconnect_rport(struct srp_rport *rport);
 extern void srp_start_tl_fail_timers(struct srp_rport *rport);
 extern void srp_remove_host(struct Scsi_Host *);
 extern void srp_stop_rport_timers(struct srp_rport *rport);
-enum blk_eh_timer_return srp_timed_out(struct scsi_cmnd *scmd);
+enum scsi_timeout_action srp_timed_out(struct scsi_cmnd *scmd);
 
 /**
  * srp_chkready() - evaluate the transport layer state before I/O
-- 
GitLab


From 310bcaef6d7ed1626bba95dd9b5c5acd189c0e35 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:51 -0700
Subject: [PATCH 0310/4122] scsi: core: Support failing requests while
 recovering

The current behavior for SCSI commands submitted while error recovery is
ongoing is to retry command submission after error recovery has finished.
See also the scsi_host_in_recovery() check in scsi_host_queue_ready(). Add
support for failing SCSI commands while host recovery is in progress. This
functionality will be used to fix a deadlock in the UFS driver.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: John Garry <john.garry@huawei.com>
Cc: Mike Christie <michael.christie@oracle.com>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-4-bvanassche@acm.org
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c  | 8 +++++---
 include/scsi/scsi_cmnd.h | 3 ++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index fa96d3cfdfa30..ec890865abae0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1341,9 +1341,6 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
 				   struct scsi_device *sdev,
 				   struct scsi_cmnd *cmd)
 {
-	if (scsi_host_in_recovery(shost))
-		return 0;
-
 	if (atomic_read(&shost->host_blocked) > 0) {
 		if (scsi_host_busy(shost) > 0)
 			goto starved;
@@ -1732,6 +1729,11 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 	ret = BLK_STS_RESOURCE;
 	if (!scsi_target_queue_ready(shost, sdev))
 		goto out_put_budget;
+	if (unlikely(scsi_host_in_recovery(shost))) {
+		if (cmd->flags & SCMD_FAIL_IF_RECOVERING)
+			ret = BLK_STS_OFFLINE;
+		goto out_dec_target_busy;
+	}
 	if (!scsi_host_queue_ready(q, shost, sdev, cmd))
 		goto out_dec_target_busy;
 
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 7d3622db38edc..c2cb5f69635c6 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -52,8 +52,9 @@ struct scsi_pointer {
 #define SCMD_TAGGED		(1 << 0)
 #define SCMD_INITIALIZED	(1 << 1)
 #define SCMD_LAST		(1 << 2)
+#define SCMD_FAIL_IF_RECOVERING	(1 << 4)
 /* flags preserved across unprep / reprep */
-#define SCMD_PRESERVED_FLAGS	(SCMD_INITIALIZED)
+#define SCMD_PRESERVED_FLAGS	(SCMD_INITIALIZED | SCMD_FAIL_IF_RECOVERING)
 
 /* for scmd->state */
 #define SCMD_STATE_COMPLETE	0
-- 
GitLab


From 1626c7bba1c42499d6753bd919803158e5792f08 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:52 -0700
Subject: [PATCH 0311/4122] scsi: ufs: Remove an outdated comment

Although the host lock had to be held by ufshcd_clk_scaling_start_busy()
callers when that function was introduced, that is no longer the case
today. Hence remove the comment that claims that callers of this function
must hold the host lock.

Reviewed-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-5-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index c8f0fe7400051..bdee494381cab 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -2013,7 +2013,6 @@ static void ufshcd_exit_clk_gating(struct ufs_hba *hba)
 	destroy_workqueue(hba->clk_gating.clk_gating_workq);
 }
 
-/* Must be called with host lock acquired */
 static void ufshcd_clk_scaling_start_busy(struct ufs_hba *hba)
 {
 	bool queue_resume_work = false;
-- 
GitLab


From 836d322d73cb08486ecc50787695175a135e62ba Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:53 -0700
Subject: [PATCH 0312/4122] scsi: ufs: Use 'else' in ufshcd_set_dev_pwr_mode()

Convert if (ret) { ... } if (!ret) { ... } into
if (ret) { ... } else { ... }.

Reviewed-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-6-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index bdee494381cab..db1997e99da2d 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8797,10 +8797,9 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 				scsi_print_sense_hdr(sdp, NULL, &sshdr);
 			ret = -EIO;
 		}
-	}
-
-	if (!ret)
+	} else {
 		hba->curr_dev_pwr_mode = pwr_mode;
+	}
 
 	scsi_device_put(sdp);
 	hba->host->eh_noresume = 0;
-- 
GitLab


From dcd5b7637c6d442d957f73780a03047413ed3a10 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:54 -0700
Subject: [PATCH 0313/4122] scsi: ufs: Reduce the START STOP UNIT timeout

Reduce the START STOP UNIT command timeout to one second since on Android
devices a kernel panic is triggered if an attempt to suspend the system
takes more than 20 seconds. One second should be enough for the START STOP
UNIT command since this command completes in less than a millisecond for
the UFS devices I have access to.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-7-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index db1997e99da2d..f83a0045a129a 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8746,8 +8746,6 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	struct scsi_device *sdp;
 	unsigned long flags;
 	int ret, retries;
-	unsigned long deadline;
-	int32_t remaining;
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	sdp = hba->ufs_device_wlun;
@@ -8775,14 +8773,9 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
 	 * already suspended childs.
 	 */
-	deadline = jiffies + 10 * HZ;
 	for (retries = 3; retries > 0; --retries) {
-		ret = -ETIMEDOUT;
-		remaining = deadline - jiffies;
-		if (remaining <= 0)
-			break;
 		ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
-				   remaining / HZ, 0, 0, RQF_PM, NULL);
+				   HZ, 0, 0, RQF_PM, NULL);
 		if (!scsi_status_is_check_condition(ret) ||
 				!scsi_sense_valid(&sshdr) ||
 				sshdr.sense_key != UNIT_ATTENTION)
-- 
GitLab


From 579a4e9dbd53978cad8df88dc612837cdd210ce0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:55 -0700
Subject: [PATCH 0314/4122] scsi: ufs: Try harder to change the power mode

Instead of only retrying the START STOP UNIT command if a unit attention is
reported, repeat it if any SCSI error is reported by the device or if the
command timed out.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-8-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index f83a0045a129a..84ca17d29898e 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8776,9 +8776,11 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	for (retries = 3; retries > 0; --retries) {
 		ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
 				   HZ, 0, 0, RQF_PM, NULL);
-		if (!scsi_status_is_check_condition(ret) ||
-				!scsi_sense_valid(&sshdr) ||
-				sshdr.sense_key != UNIT_ATTENTION)
+		/*
+		 * scsi_execute() only returns a negative value if the request
+		 * queue is dying.
+		 */
+		if (ret <= 0)
 			break;
 	}
 	if (ret) {
-- 
GitLab


From 1a547cbc6fdd07992f915a614a3f7ba3fccef8fb Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:56 -0700
Subject: [PATCH 0315/4122] scsi: ufs: Track system suspend / resume activity

Add a new boolean variable that tracks whether the system is suspending,
suspended or resuming. This information will be used in a later commit to
fix a deadlock between the SCSI error handler and the suspend code.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-9-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 2 ++
 include/ufs/ufshcd.h      | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 84ca17d29898e..2a32bcc93d2e7 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -9247,6 +9247,7 @@ static int ufshcd_wl_suspend(struct device *dev)
 
 	hba = shost_priv(sdev->host);
 	down(&hba->host_sem);
+	hba->system_suspending = true;
 
 	if (pm_runtime_suspended(dev))
 		goto out;
@@ -9288,6 +9289,7 @@ out:
 		hba->curr_dev_pwr_mode, hba->uic_link_state);
 	if (!ret)
 		hba->is_sys_suspended = false;
+	hba->system_suspending = false;
 	up(&hba->host_sem);
 	return ret;
 }
diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 9f28349ebcff5..96538eb3a6c01 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -802,7 +802,9 @@ struct ufs_hba_monitor {
  * @caps: bitmask with information about UFS controller capabilities
  * @devfreq: frequency scaling information owned by the devfreq core
  * @clk_scaling: frequency scaling information owned by the UFS driver
- * @is_sys_suspended: whether or not the entire system has been suspended
+ * @system_suspending: system suspend has been started and system resume has
+ *	not yet finished.
+ * @is_sys_suspended: UFS device has been suspended because of system suspend
  * @urgent_bkops_lvl: keeps track of urgent bkops level for device
  * @is_urgent_bkops_lvl_checked: keeps track if the urgent bkops level for
  *  device is known or not.
@@ -943,6 +945,7 @@ struct ufs_hba {
 
 	struct devfreq *devfreq;
 	struct ufs_clk_scaling clk_scaling;
+	bool system_suspending;
 	bool is_sys_suspended;
 
 	enum bkops_status urgent_bkops_lvl;
-- 
GitLab


From 6a354a7e740ee779d8595bb3c555d415433f2b19 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:57 -0700
Subject: [PATCH 0316/4122] scsi: ufs: Introduce the function
 ufshcd_execute_start_stop()

Open-code scsi_execute() because a later patch will modify scmd->flags and
because scsi_execute() does not support setting scmd->flags. No
functionality is changed.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-10-bvanassche@acm.org
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 2a32bcc93d2e7..c5ccc7ba583ba 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8729,6 +8729,39 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
 	}
 }
 
+static int ufshcd_execute_start_stop(struct scsi_device *sdev,
+				     enum ufs_dev_pwr_mode pwr_mode,
+				     struct scsi_sense_hdr *sshdr)
+{
+	unsigned char cdb[6] = { START_STOP, 0, 0, 0, pwr_mode << 4, 0 };
+	struct request *req;
+	struct scsi_cmnd *scmd;
+	int ret;
+
+	req = scsi_alloc_request(sdev->request_queue, REQ_OP_DRV_IN,
+				 BLK_MQ_REQ_PM);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	scmd = blk_mq_rq_to_pdu(req);
+	scmd->cmd_len = COMMAND_SIZE(cdb[0]);
+	memcpy(scmd->cmnd, cdb, scmd->cmd_len);
+	scmd->allowed = 0/*retries*/;
+	req->timeout = 1 * HZ;
+	req->rq_flags |= RQF_PM | RQF_QUIET;
+
+	blk_execute_rq(req, /*at_head=*/true);
+
+	if (sshdr)
+		scsi_normalize_sense(scmd->sense_buffer, scmd->sense_len,
+				     sshdr);
+	ret = scmd->result;
+
+	blk_mq_free_request(req);
+
+	return ret;
+}
+
 /**
  * ufshcd_set_dev_pwr_mode - sends START STOP UNIT command to set device
  *			     power mode
@@ -8741,7 +8774,6 @@ static void ufshcd_hba_exit(struct ufs_hba *hba)
 static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 				     enum ufs_dev_pwr_mode pwr_mode)
 {
-	unsigned char cmd[6] = { START_STOP };
 	struct scsi_sense_hdr sshdr;
 	struct scsi_device *sdp;
 	unsigned long flags;
@@ -8766,16 +8798,13 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 	 */
 	hba->host->eh_noresume = 1;
 
-	cmd[4] = pwr_mode << 4;
-
 	/*
 	 * Current function would be generally called from the power management
 	 * callbacks hence set the RQF_PM flag so that it doesn't resume the
 	 * already suspended childs.
 	 */
 	for (retries = 3; retries > 0; --retries) {
-		ret = scsi_execute(sdp, cmd, DMA_NONE, NULL, 0, NULL, &sshdr,
-				   HZ, 0, 0, RQF_PM, NULL);
+		ret = ufshcd_execute_start_stop(sdp, pwr_mode, &sshdr);
 		/*
 		 * scsi_execute() only returns a negative value if the request
 		 * queue is dying.
-- 
GitLab


From 7029e2151a7c6a5c60b35996d026528e7d51aae3 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 18 Oct 2022 13:29:58 -0700
Subject: [PATCH 0317/4122] scsi: ufs: Fix a deadlock between PM and the SCSI
 error handler

The following deadlock has been observed on multiple test setups:

 * ufshcd_wl_suspend() is waiting for blk_execute_rq(START STOP UNIT) to
   complete while ufshcd_wl_suspend() holds host_sem.

 * The SCSI error handler is activated, changes the host state to
   SHOST_RECOVERY, ufshcd_eh_host_reset_handler() and ufshcd_err_handler()
   are called and the latter function tries to obtain host_sem.

This is a deadlock because blk_execute_rq() can't execute SCSI commands
while the host is in the SHOST_RECOVERY state and because the error handler
cannot make progress because host_sem is held by another thread.

Fix this deadlock as follows:

 * Fail attempts to suspend the system while the SCSI error handler is in
   progress by setting the SCMD_FAIL_IF_RECOVERING flag for START STOP UNIT
   commands.

 * If the system is suspending and a START STOP UNIT command times out,
   handle the SCSI command timeout from inside the context of the SCSI
   timeout handler instead of activating the SCSI error handler.

The runtime power management code is not affected by this deadlock since
hba->host_sem is not touched by the runtime power management functions in
the UFS driver.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221018202958.1902564-11-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index c5ccc7ba583ba..b2203dd79e8c3 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8292,6 +8292,28 @@ out:
 	}
 }
 
+static enum scsi_timeout_action ufshcd_eh_timed_out(struct scsi_cmnd *scmd)
+{
+	struct ufs_hba *hba = shost_priv(scmd->device->host);
+
+	if (!hba->system_suspending) {
+		/* Activate the error handler in the SCSI core. */
+		return SCSI_EH_NOT_HANDLED;
+	}
+
+	/*
+	 * If we get here we know that no TMFs are outstanding and also that
+	 * the only pending command is a START STOP UNIT command. Handle the
+	 * timeout of that command directly to prevent a deadlock between
+	 * ufshcd_set_dev_pwr_mode() and ufshcd_err_handler().
+	 */
+	ufshcd_link_recovery(hba);
+	dev_info(hba->dev, "%s() finished; outstanding_tasks = %#lx.\n",
+		 __func__, hba->outstanding_tasks);
+
+	return hba->outstanding_reqs ? SCSI_EH_RESET_TIMER : SCSI_EH_DONE;
+}
+
 static const struct attribute_group *ufshcd_driver_groups[] = {
 	&ufs_sysfs_unit_descriptor_group,
 	&ufs_sysfs_lun_attributes_group,
@@ -8326,6 +8348,7 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.eh_abort_handler	= ufshcd_abort,
 	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
 	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
+	.eh_timed_out		= ufshcd_eh_timed_out,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
@@ -8747,6 +8770,7 @@ static int ufshcd_execute_start_stop(struct scsi_device *sdev,
 	scmd->cmd_len = COMMAND_SIZE(cdb[0]);
 	memcpy(scmd->cmnd, cdb, scmd->cmd_len);
 	scmd->allowed = 0/*retries*/;
+	scmd->flags |= SCMD_FAIL_IF_RECOVERING;
 	req->timeout = 1 * HZ;
 	req->rq_flags |= RQF_PM | RQF_QUIET;
 
-- 
GitLab


From b203c67ebe752c8f2a2babf5e58d244c82680922 Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Sat, 22 Oct 2022 01:43:40 +0000
Subject: [PATCH 0318/4122] csky: add arch support current_stack_pointer

To follow the existing per-arch conventions, using "current_stack_pointer"
to set sp.

This will let it be used in non-arch places(like HARDENED_USERCOPY).

Refer to the implementation of riscv commit fdecfea09328 ("riscv: Rename
"sp_in_global" to "current_stack_pointer"").

Link: https://lore.kernel.org/lkml/20220224060411.1855683-1-keescook@chromium.org/
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/csky/Kconfig                 | 1 +
 arch/csky/include/asm/processor.h | 2 ++
 arch/csky/kernel/stacktrace.c     | 6 ++----
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index adee6ab36862e..2236b5c0c2136 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -9,6 +9,7 @@ config CSKY
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_INLINE_READ_LOCK if !PREEMPTION
 	select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION
 	select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPTION
diff --git a/arch/csky/include/asm/processor.h b/arch/csky/include/asm/processor.h
index 63ad71fab30d7..ea75d72dea869 100644
--- a/arch/csky/include/asm/processor.h
+++ b/arch/csky/include/asm/processor.h
@@ -84,4 +84,6 @@ unsigned long __get_wchan(struct task_struct *p);
 
 #define cpu_relax() barrier()
 
+register unsigned long current_stack_pointer __asm__("sp");
+
 #endif /* __ASM_CSKY_PROCESSOR_H */
diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c
index 9f78f5d215117..27ecd63e321bb 100644
--- a/arch/csky/kernel/stacktrace.c
+++ b/arch/csky/kernel/stacktrace.c
@@ -23,10 +23,9 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 		sp = user_stack_pointer(regs);
 		pc = instruction_pointer(regs);
 	} else if (task == NULL || task == current) {
-		const register unsigned long current_sp __asm__ ("sp");
 		const register unsigned long current_fp __asm__ ("r8");
 		fp = current_fp;
-		sp = current_sp;
+		sp = current_stack_pointer;
 		pc = (unsigned long)walk_stackframe;
 	} else {
 		/* task blocked in __switch_to */
@@ -68,8 +67,7 @@ static void notrace walk_stackframe(struct task_struct *task,
 		sp = user_stack_pointer(regs);
 		pc = instruction_pointer(regs);
 	} else if (task == NULL || task == current) {
-		const register unsigned long current_sp __asm__ ("sp");
-		sp = current_sp;
+		sp = current_stack_pointer;
 		pc = (unsigned long)walk_stackframe;
 	} else {
 		/* task blocked in __switch_to */
-- 
GitLab


From ce0ba954805e0783ceb7304d4fb357a02038e231 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 7 Oct 2022 21:16:48 +0100
Subject: [PATCH 0319/4122] csky: Kconfig: Fix spelling mistake "Meory" ->
 "Memory"

There is a spelling mistake in a Kconfig option description. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
---
 arch/csky/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 2236b5c0c2136..e0ecd1cc81a94 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -270,7 +270,7 @@ menuconfig HAVE_TCM
 	bool "Tightly-Coupled/Sram Memory"
 	depends on !COMPILE_TEST
 	help
-	  The implementation are not only used by TCM (Tightly-Coupled Meory)
+	  The implementation are not only used by TCM (Tightly-Coupled Memory)
 	  but also used by sram on SOC bus. It follow existed linux tcm
 	  software interface, so that old tcm application codes could be
 	  re-used directly.
-- 
GitLab


From 33a0a1e3b3d17445832177981dc7a1c6a5b009f8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 1 Oct 2022 18:53:15 +0200
Subject: [PATCH 0320/4122] kobject: modify kobject_get_path() to take a const
 *

kobject_get_path() does not modify the kobject passed to it, so make the
pointer constant.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221001165315.2690141-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h |  2 +-
 lib/kobject.c           | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 57fb972fea05b..592f9785b058a 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -115,7 +115,7 @@ extern void kobject_put(struct kobject *kobj);
 extern const void *kobject_namespace(struct kobject *kobj);
 extern void kobject_get_ownership(struct kobject *kobj,
 				  kuid_t *uid, kgid_t *gid);
-extern char *kobject_get_path(struct kobject *kobj, gfp_t flag);
+extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag);
 
 struct kobj_type {
 	void (*release)(struct kobject *kobj);
diff --git a/lib/kobject.c b/lib/kobject.c
index a0b2dbfcfa233..0380ec889a6af 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -94,10 +94,10 @@ static int create_dir(struct kobject *kobj)
 	return 0;
 }
 
-static int get_kobj_path_length(struct kobject *kobj)
+static int get_kobj_path_length(const struct kobject *kobj)
 {
 	int length = 1;
-	struct kobject *parent = kobj;
+	const struct kobject *parent = kobj;
 
 	/* walk up the ancestors until we hit the one pointing to the
 	 * root.
@@ -112,9 +112,9 @@ static int get_kobj_path_length(struct kobject *kobj)
 	return length;
 }
 
-static void fill_kobj_path(struct kobject *kobj, char *path, int length)
+static void fill_kobj_path(const struct kobject *kobj, char *path, int length)
 {
-	struct kobject *parent;
+	const struct kobject *parent;
 
 	--length;
 	for (parent = kobj; parent; parent = parent->parent) {
@@ -136,7 +136,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length)
  *
  * Return: The newly allocated memory, caller must free with kfree().
  */
-char *kobject_get_path(struct kobject *kobj, gfp_t gfp_mask)
+char *kobject_get_path(const struct kobject *kobj, gfp_t gfp_mask)
 {
 	char *path;
 	int len;
-- 
GitLab


From 3d24903a6dd27ab817b4c6c24bee245ff06f7c8e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 21 Oct 2022 09:23:10 +0200
Subject: [PATCH 0321/4122] kobject: make get_ktype() take a const pointer

get_ktype() does not modify the structure passed to it, so mark the
parameter as being const to allow other const structures to be passed to
it in the future.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221021072310.3931690-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 592f9785b058a..fc40fc81aeb1d 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -198,7 +198,7 @@ static inline void kset_put(struct kset *k)
 	kobject_put(&k->kobj);
 }
 
-static inline const struct kobj_type *get_ktype(struct kobject *kobj)
+static inline const struct kobj_type *get_ktype(const struct kobject *kobj)
 {
 	return kobj->ktype;
 }
-- 
GitLab


From acd50e52a7f2e75b3109523d2f114fa46a4362c0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 21 Oct 2022 19:09:50 +0100
Subject: [PATCH 0322/4122] staging: rtl8712: Remove variable xcnt

The variable xcnt being incremented but it is never referenced,
it is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221021180950.29139-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/rtl8712_xmit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8712/rtl8712_xmit.c b/drivers/staging/rtl8712/rtl8712_xmit.c
index 84a22eba7ebfd..4cb01f590673e 100644
--- a/drivers/staging/rtl8712/rtl8712_xmit.c
+++ b/drivers/staging/rtl8712/rtl8712_xmit.c
@@ -601,7 +601,7 @@ int r8712_xmitframe_complete(struct _adapter *padapter,
 #ifdef CONFIG_R8712_TX_AGGR
 	struct xmit_frame *p2ndxmitframe = NULL;
 #else
-	int res = _SUCCESS, xcnt = 0;
+	int res = _SUCCESS;
 #endif
 
 	phwxmits = pxmitpriv->hwxmits;
@@ -673,7 +673,6 @@ int r8712_xmitframe_complete(struct _adapter *padapter,
 			dump_xframe(padapter, pxmitframe);
 		else
 			r8712_free_xmitframe_ex(pxmitpriv, pxmitframe);
-		xcnt++;
 #endif
 
 	} else { /* pxmitframe == NULL && p2ndxmitframe == NULL */
-- 
GitLab


From 02119c5d84e327483595687821f9c38c88d2d929 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sun, 16 Oct 2022 12:07:40 +0100
Subject: [PATCH 0323/4122] staging: rtl8192u: Provide a TODO file for this
 driver

Provide a TODO file that lists the tasks that should be carried out in
order to move this driver off drivers/staging. It's missing from original
addition of this driver.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Link: https://lore.kernel.org/r/20221016110743.1448067-1-pbrobinson@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192u/TODO | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 drivers/staging/rtl8192u/TODO

diff --git a/drivers/staging/rtl8192u/TODO b/drivers/staging/rtl8192u/TODO
new file mode 100644
index 0000000000000..ab9d5d145b3bc
--- /dev/null
+++ b/drivers/staging/rtl8192u/TODO
@@ -0,0 +1,16 @@
+To-do list:
+
+* Correct the coding style according to Linux guidelines; please read the document
+  at https://www.kernel.org/doc/html/latest/process/coding-style.html.
+* Remove unnecessary debugging/printing macros; for those that are still needed
+  use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()).
+* Remove dead code such as unusued functions, variables, fields, etc..
+* Use in-kernel API and remove unnecessary wrappers where possible.
+* Fix bugs due to code that sleeps in atomic context.
+* Remove the HAL layer and migrate its functionality into the relevant parts of
+  the driver.
+* Switch to use LIB80211.
+* Switch to use MAC80211.
+* Switch to use CFG80211.
+* Improve the error handling of various functions, particularly those that use
+  existing kernel APIs.
-- 
GitLab


From f124c1751844c4592110a292403f342c50a89344 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sun, 16 Oct 2022 12:07:41 +0100
Subject: [PATCH 0324/4122] staging: rtl8192e: Update the TODO file for this
 driver

The driver directory where it was proposed to move to has changed,
while add it add some more pieces this driver to update.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Link: https://lore.kernel.org/r/20221016110743.1448067-2-pbrobinson@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/TODO | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/TODO b/drivers/staging/rtl8192e/TODO
index d51f159d1adff..7221ae65d63eb 100644
--- a/drivers/staging/rtl8192e/TODO
+++ b/drivers/staging/rtl8192e/TODO
@@ -1,2 +1,18 @@
-* merge into drivers/net/wireless/rtllib/rtl8192e
+To-do list:
+
+* merge into drivers/net/wireless/realtek/rtlwifi/rtl8192*
 * clean up function naming
+* Correct the coding style according to Linux guidelines; please read the document
+  at https://www.kernel.org/doc/html/latest/process/coding-style.html.
+* Remove unnecessary debugging/printing macros; for those that are still needed
+  use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()).
+* Remove dead code such as unusued functions, variables, fields, etc..
+* Use in-kernel API and remove unnecessary wrappers where possible.
+* Fix bugs due to code that sleeps in atomic context.
+* Remove the HAL layer and migrate its functionality into the relevant parts of
+  the driver.
+* Switch to use LIB80211.
+* Switch to use MAC80211.
+* Switch to use CFG80211.
+* Improve the error handling of various functions, particularly those that use
+  existing kernel APIs.
-- 
GitLab


From ce51c0c7fae3390f070e6b515e3a57f6bfdfd18e Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sun, 16 Oct 2022 12:07:42 +0100
Subject: [PATCH 0325/4122] staging: ks7010: Update the TODO file for this
 driver

Add move to mac80211 from wext to the todo for this driver.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Link: https://lore.kernel.org/r/20221016110743.1448067-3-pbrobinson@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/ks7010/TODO | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/staging/ks7010/TODO b/drivers/staging/ks7010/TODO
index ab6f39175d992..80c97543b9772 100644
--- a/drivers/staging/ks7010/TODO
+++ b/drivers/staging/ks7010/TODO
@@ -27,6 +27,9 @@ Now the TODOs:
 - fix the 'card removal' event when card is inserted when booting
 - check what other upstream wireless mechanisms can be used instead of the
   custom ones here
+- Switch to use LIB80211.
+- Switch to use MAC80211.
+- Switch to use CFG80211.
 
 Please send any patches to:
 Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-- 
GitLab


From 1f3e0b41919208be71f869a753609d6c11e12e85 Mon Sep 17 00:00:00 2001
From: Peter Robinson <pbrobinson@gmail.com>
Date: Sun, 16 Oct 2022 12:07:43 +0100
Subject: [PATCH 0326/4122] staging: wlan-ng: Provide a TODO file for this
 driver

Provide a TODO file that lists the tasks that should be carried out in
order to move this driver off drivers/staging. It's missing from original
addition of this driver.

Signed-off-by: Peter Robinson <pbrobinson@gmail.com>
Link: https://lore.kernel.org/r/20221016110743.1448067-4-pbrobinson@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/TODO | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)
 create mode 100644 drivers/staging/wlan-ng/TODO

diff --git a/drivers/staging/wlan-ng/TODO b/drivers/staging/wlan-ng/TODO
new file mode 100644
index 0000000000000..ab9d5d145b3bc
--- /dev/null
+++ b/drivers/staging/wlan-ng/TODO
@@ -0,0 +1,16 @@
+To-do list:
+
+* Correct the coding style according to Linux guidelines; please read the document
+  at https://www.kernel.org/doc/html/latest/process/coding-style.html.
+* Remove unnecessary debugging/printing macros; for those that are still needed
+  use the proper kernel API (pr_debug(), dev_dbg(), netdev_dbg()).
+* Remove dead code such as unusued functions, variables, fields, etc..
+* Use in-kernel API and remove unnecessary wrappers where possible.
+* Fix bugs due to code that sleeps in atomic context.
+* Remove the HAL layer and migrate its functionality into the relevant parts of
+  the driver.
+* Switch to use LIB80211.
+* Switch to use MAC80211.
+* Switch to use CFG80211.
+* Improve the error handling of various functions, particularly those that use
+  existing kernel APIs.
-- 
GitLab


From ea679d8e032b0affc65c9d56b7bad3ef144366e1 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Thu, 20 Oct 2022 22:50:00 +0530
Subject: [PATCH 0327/4122] staging: most: dim2: correct misleading struct type
 name

Correct the misleading struct type name dim_ch_state_t to dim_ch_state
since this not a typedef but a normal structure declaration.

Suggested-by: Julia Lawall <julia.lawall@inria.fr>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y1GDQO+06fD24Pf/@debian-BULLSEYE-live-builder-AMD64
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/most/dim2/dim2.c | 4 ++--
 drivers/staging/most/dim2/hal.c  | 4 ++--
 drivers/staging/most/dim2/hal.h  | 6 +++---
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/most/dim2/dim2.c b/drivers/staging/most/dim2/dim2.c
index 97dff82b7a5fb..7a5f80e637a0b 100644
--- a/drivers/staging/most/dim2/dim2.c
+++ b/drivers/staging/most/dim2/dim2.c
@@ -161,7 +161,7 @@ static int try_start_dim_transfer(struct hdm_channel *hdm_ch)
 	struct list_head *head = &hdm_ch->pending_list;
 	struct mbo *mbo;
 	unsigned long flags;
-	struct dim_ch_state_t st;
+	struct dim_ch_state st;
 
 	BUG_ON(!hdm_ch);
 	BUG_ON(!hdm_ch->is_initialized);
@@ -259,7 +259,7 @@ static void retrieve_netinfo(struct dim2_hdm *dev, struct mbo *mbo)
 static void service_done_flag(struct dim2_hdm *dev, int ch_idx)
 {
 	struct hdm_channel *hdm_ch = dev->hch + ch_idx;
-	struct dim_ch_state_t st;
+	struct dim_ch_state st;
 	struct list_head *head;
 	struct mbo *mbo;
 	int done_buffers;
diff --git a/drivers/staging/most/dim2/hal.c b/drivers/staging/most/dim2/hal.c
index 65282c2768628..a5d40b5b138ab 100644
--- a/drivers/staging/most/dim2/hal.c
+++ b/drivers/staging/most/dim2/hal.c
@@ -943,8 +943,8 @@ u8 dim_service_channel(struct dim_channel *ch)
 	return channel_service(ch);
 }
 
-struct dim_ch_state_t *dim_get_channel_state(struct dim_channel *ch,
-					     struct dim_ch_state_t *state_ptr)
+struct dim_ch_state *dim_get_channel_state(struct dim_channel *ch,
+					   struct dim_ch_state *state_ptr)
 {
 	if (!ch || !state_ptr)
 		return NULL;
diff --git a/drivers/staging/most/dim2/hal.h b/drivers/staging/most/dim2/hal.h
index 20531449acab1..ef10a8741c10b 100644
--- a/drivers/staging/most/dim2/hal.h
+++ b/drivers/staging/most/dim2/hal.h
@@ -27,7 +27,7 @@ enum mlb_clk_speed {
 	CLK_8192FS = 7,
 };
 
-struct dim_ch_state_t {
+struct dim_ch_state {
 	bool ready; /* Shows readiness to enqueue next buffer */
 	u16 done_buffers; /* Number of completed buffers */
 };
@@ -87,8 +87,8 @@ void dim_service_ahb_int_irq(struct dim_channel *const *channels);
 
 u8 dim_service_channel(struct dim_channel *ch);
 
-struct dim_ch_state_t *dim_get_channel_state(struct dim_channel *ch,
-					     struct dim_ch_state_t *state_ptr);
+struct dim_ch_state *dim_get_channel_state(struct dim_channel *ch,
+					   struct dim_ch_state *state_ptr);
 
 u16 dim_dbr_space(struct dim_channel *ch);
 
-- 
GitLab


From 6cc7b783f8295677a19e88c12e47021c06c3b6ef Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 02:56:47 +0530
Subject: [PATCH 0328/4122] staging: r8188eu: use Linux kernel variable naming
 convention

Follow the Linux Kernel coding style variable naming convention instead
of using camelCase style. Issue reported by checkpatch script for
these variables:
        tagLen, tagType, networkAddr, ipAddr, macAddr

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/a107c527e9032c22a62e93ff12d5fae625e70212.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c | 112 +++++++++++-----------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index 4c5f30792a46c..e509b8454e25b 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -50,17 +50,17 @@
 static unsigned char *__nat25_find_pppoe_tag(struct pppoe_hdr *ph, unsigned short type)
 {
 	unsigned char *cur_ptr, *start_ptr;
-	unsigned short tagLen, tagType;
+	unsigned short tag_len, tag_type;
 
 	start_ptr = (unsigned char *)ph->tag;
 	cur_ptr = (unsigned char *)ph->tag;
 	while ((cur_ptr - start_ptr) < ntohs(ph->length)) {
 		/*  prevent un-alignment access */
-		tagType = (unsigned short)((cur_ptr[0] << 8) + cur_ptr[1]);
-		tagLen  = (unsigned short)((cur_ptr[2] << 8) + cur_ptr[3]);
-		if (tagType == type)
+		tag_type = (unsigned short)((cur_ptr[0] << 8) + cur_ptr[1]);
+		tag_len  = (unsigned short)((cur_ptr[2] << 8) + cur_ptr[3]);
+		if (tag_type == type)
 			return cur_ptr;
-		cur_ptr = cur_ptr + TAG_HDR_LEN + tagLen;
+		cur_ptr = cur_ptr + TAG_HDR_LEN + tag_len;
 	}
 	return NULL;
 }
@@ -111,32 +111,32 @@ static int  __nat25_has_expired(struct nat25_network_db_entry *fdb)
 	return 0;
 }
 
-static void __nat25_generate_ipv4_network_addr(unsigned char *networkAddr,
-				unsigned int *ipAddr)
+static void __nat25_generate_ipv4_network_addr(unsigned char *addr,
+				unsigned int *ip_addr)
 {
-	memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN);
+	memset(addr, 0, MAX_NETWORK_ADDR_LEN);
 
-	networkAddr[0] = NAT25_IPV4;
-	memcpy(networkAddr + 7, (unsigned char *)ipAddr, 4);
+	addr[0] = NAT25_IPV4;
+	memcpy(addr + 7, (unsigned char *)ip_addr, 4);
 }
 
-static void __nat25_generate_pppoe_network_addr(unsigned char *networkAddr,
+static void __nat25_generate_pppoe_network_addr(unsigned char *addr,
 				unsigned char *ac_mac, __be16 *sid)
 {
-	memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN);
+	memset(addr, 0, MAX_NETWORK_ADDR_LEN);
 
-	networkAddr[0] = NAT25_PPPOE;
-	memcpy(networkAddr + 1, (unsigned char *)sid, 2);
-	memcpy(networkAddr + 3, (unsigned char *)ac_mac, 6);
+	addr[0] = NAT25_PPPOE;
+	memcpy(addr + 1, (unsigned char *)sid, 2);
+	memcpy(addr + 3, (unsigned char *)ac_mac, 6);
 }
 
-static  void __nat25_generate_ipv6_network_addr(unsigned char *networkAddr,
-				unsigned int *ipAddr)
+static  void __nat25_generate_ipv6_network_addr(unsigned char *addr,
+				unsigned int *ip_addr)
 {
-	memset(networkAddr, 0, MAX_NETWORK_ADDR_LEN);
+	memset(addr, 0, MAX_NETWORK_ADDR_LEN);
 
-	networkAddr[0] = NAT25_IPV6;
-	memcpy(networkAddr + 1, (unsigned char *)ipAddr, 16);
+	addr[0] = NAT25_IPV6;
+	memcpy(addr + 1, (unsigned char *)ip_addr, 16);
 }
 
 static unsigned char *scan_tlv(unsigned char *data, int len, unsigned char tag, unsigned char len8b)
@@ -200,40 +200,40 @@ static int update_nd_link_layer_addr(unsigned char *data, int len, unsigned char
 	return 0;
 }
 
-static int __nat25_network_hash(unsigned char *networkAddr)
+static int __nat25_network_hash(unsigned char *addr)
 {
-	if (networkAddr[0] == NAT25_IPV4) {
+	if (addr[0] == NAT25_IPV4) {
 		unsigned long x;
 
-		x = networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10];
+		x = addr[7] ^ addr[8] ^ addr[9] ^ addr[10];
 
 		return x & (NAT25_HASH_SIZE - 1);
-	} else if (networkAddr[0] == NAT25_IPX) {
+	} else if (addr[0] == NAT25_IPX) {
 		unsigned long x;
 
-		x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^
-			networkAddr[6] ^ networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10];
+		x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^
+			addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10];
 
 		return x & (NAT25_HASH_SIZE - 1);
-	} else if (networkAddr[0] == NAT25_APPLE) {
+	} else if (addr[0] == NAT25_APPLE) {
 		unsigned long x;
 
-		x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3];
+		x = addr[1] ^ addr[2] ^ addr[3];
 
 		return x & (NAT25_HASH_SIZE - 1);
-	} else if (networkAddr[0] == NAT25_PPPOE) {
+	} else if (addr[0] == NAT25_PPPOE) {
 		unsigned long x;
 
-		x = networkAddr[0] ^ networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^ networkAddr[6] ^ networkAddr[7] ^ networkAddr[8];
+		x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^ addr[7] ^ addr[8];
 
 		return x & (NAT25_HASH_SIZE - 1);
-	} else if (networkAddr[0] == NAT25_IPV6) {
+	} else if (addr[0] == NAT25_IPV6) {
 		unsigned long x;
 
-		x = networkAddr[1] ^ networkAddr[2] ^ networkAddr[3] ^ networkAddr[4] ^ networkAddr[5] ^
-			networkAddr[6] ^ networkAddr[7] ^ networkAddr[8] ^ networkAddr[9] ^ networkAddr[10] ^
-			networkAddr[11] ^ networkAddr[12] ^ networkAddr[13] ^ networkAddr[14] ^ networkAddr[15] ^
-			networkAddr[16];
+		x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^
+			addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^
+			addr[11] ^ addr[12] ^ addr[13] ^ addr[14] ^ addr[15] ^
+			addr[16];
 
 		return x & (NAT25_HASH_SIZE - 1);
 	} else {
@@ -241,7 +241,7 @@ static int __nat25_network_hash(unsigned char *networkAddr)
 		int i;
 
 		for (i = 0; i < MAX_NETWORK_ADDR_LEN; i++)
-			x ^= networkAddr[i];
+			x ^= addr[i];
 
 		return x & (NAT25_HASH_SIZE - 1);
 	}
@@ -269,17 +269,17 @@ static void __network_hash_unlink(struct nat25_network_db_entry *ent)
 }
 
 static void __nat25_db_network_insert(struct adapter *priv,
-				unsigned char *macAddr, unsigned char *networkAddr)
+				unsigned char *mac_addr, unsigned char *addr)
 {
 	struct nat25_network_db_entry *db;
 	int hash;
 
 	spin_lock_bh(&priv->br_ext_lock);
-	hash = __nat25_network_hash(networkAddr);
+	hash = __nat25_network_hash(addr);
 	db = priv->nethash[hash];
 	while (db) {
-		if (!memcmp(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN)) {
-			memcpy(db->macAddr, macAddr, ETH_ALEN);
+		if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) {
+			memcpy(db->macAddr, mac_addr, ETH_ALEN);
 			db->ageing_timer = jiffies;
 			spin_unlock_bh(&priv->br_ext_lock);
 			return;
@@ -291,8 +291,8 @@ static void __nat25_db_network_insert(struct adapter *priv,
 		spin_unlock_bh(&priv->br_ext_lock);
 		return;
 	}
-	memcpy(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN);
-	memcpy(db->macAddr, macAddr, ETH_ALEN);
+	memcpy(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN);
+	memcpy(db->macAddr, mac_addr, ETH_ALEN);
 	atomic_set(&db->use_count, 1);
 	db->ageing_timer = jiffies;
 
@@ -366,7 +366,7 @@ void nat25_db_expire(struct adapter *priv)
 int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method)
 {
 	unsigned short protocol;
-	unsigned char networkAddr[MAX_NETWORK_ADDR_LEN];
+	unsigned char addr[MAX_NETWORK_ADDR_LEN];
 	unsigned int tmp;
 
 	if (!skb)
@@ -395,9 +395,9 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method)
 			if (iph->saddr == 0)
 				return 0;
 			tmp = be32_to_cpu(iph->saddr);
-			__nat25_generate_ipv4_network_addr(networkAddr, &tmp);
+			__nat25_generate_ipv4_network_addr(addr, &tmp);
 			/* record source IP address and , source mac address into db */
-			__nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr);
+			__nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr);
 			return 0;
 		default:
 			return -1;
@@ -421,8 +421,8 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method)
 			memcpy(arp_ptr, GET_MY_HWADDR(priv), ETH_ALEN);
 			arp_ptr += arp->ar_hln;
 			sender = (unsigned int *)arp_ptr;
-			__nat25_generate_ipv4_network_addr(networkAddr, sender);
-			__nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr);
+			__nat25_generate_ipv4_network_addr(addr, sender);
+			__nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr);
 			return 0;
 		default:
 			return -1;
@@ -495,9 +495,9 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method)
 					return -1;
 				}
 			} else {	/*  session phase */
-				__nat25_generate_pppoe_network_addr(networkAddr, skb->data, &ph->sid);
+				__nat25_generate_pppoe_network_addr(addr, skb->data, &ph->sid);
 
-				__nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr);
+				__nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr);
 
 				if (!priv->ethBrExtInfo.addPPPoETag &&
 				    priv->pppoe_connection_in_progress &&
@@ -548,8 +548,8 @@ int nat25_db_handle(struct adapter *priv, struct sk_buff *skb, int method)
 			return -1;
 		case NAT25_INSERT:
 			if (memcmp(&iph->saddr, "\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0\x0", 16)) {
-				__nat25_generate_ipv6_network_addr(networkAddr, (unsigned int *)&iph->saddr);
-				__nat25_db_network_insert(priv, skb->data + ETH_ALEN, networkAddr);
+				__nat25_generate_ipv6_network_addr(addr, (unsigned int *)&iph->saddr);
+				__nat25_db_network_insert(priv, skb->data + ETH_ALEN, addr);
 
 				if (iph->nexthdr == IPPROTO_ICMPV6 &&
 						skb->len > (ETH_HLEN +  sizeof(*iph) + 4)) {
@@ -639,17 +639,17 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb)
 	}
 }
 
-void *scdb_findEntry(struct adapter *priv, unsigned char *ipAddr)
+void *scdb_findEntry(struct adapter *priv, unsigned char *ip_addr)
 {
-	unsigned char networkAddr[MAX_NETWORK_ADDR_LEN];
+	unsigned char addr[MAX_NETWORK_ADDR_LEN];
 	struct nat25_network_db_entry *db;
 	int hash;
 
-	__nat25_generate_ipv4_network_addr(networkAddr, (unsigned int *)ipAddr);
-	hash = __nat25_network_hash(networkAddr);
+	__nat25_generate_ipv4_network_addr(addr, (unsigned int *)ip_addr);
+	hash = __nat25_network_hash(addr);
 	db = priv->nethash[hash];
 	while (db) {
-		if (!memcmp(db->networkAddr, networkAddr, MAX_NETWORK_ADDR_LEN)) {
+		if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) {
 			return (void *)db;
 		}
 
-- 
GitLab


From 9d76dae6c86e7945b51054cd477c5b8f0bcb3a87 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 02:57:16 +0530
Subject: [PATCH 0329/4122] staging: r8188eu: reformat long computation lines

Reformat long running computation instructions to improve code readability.
Address checkpatch script complaints like:
	CHECK: line length of 171 exceeds 100 columns

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/e07506ef1dc4ac1d3f8b076a8182628bd0e5cec0.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index e509b8454e25b..d4059f0fc3629 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -212,7 +212,7 @@ static int __nat25_network_hash(unsigned char *addr)
 		unsigned long x;
 
 		x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^
-			addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10];
+		    addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10];
 
 		return x & (NAT25_HASH_SIZE - 1);
 	} else if (addr[0] == NAT25_APPLE) {
@@ -224,16 +224,16 @@ static int __nat25_network_hash(unsigned char *addr)
 	} else if (addr[0] == NAT25_PPPOE) {
 		unsigned long x;
 
-		x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^ addr[7] ^ addr[8];
+		x = addr[0] ^ addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^
+		    addr[5] ^ addr[6] ^ addr[7] ^ addr[8];
 
 		return x & (NAT25_HASH_SIZE - 1);
 	} else if (addr[0] == NAT25_IPV6) {
 		unsigned long x;
 
-		x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^
-			addr[6] ^ addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^
-			addr[11] ^ addr[12] ^ addr[13] ^ addr[14] ^ addr[15] ^
-			addr[16];
+		x = addr[1] ^ addr[2] ^ addr[3] ^ addr[4] ^ addr[5] ^ addr[6] ^
+		    addr[7] ^ addr[8] ^ addr[9] ^ addr[10] ^ addr[11] ^ addr[12] ^
+		    addr[13] ^ addr[14] ^ addr[15] ^ addr[16];
 
 		return x & (NAT25_HASH_SIZE - 1);
 	} else {
-- 
GitLab


From 3c480b32431272e0a4a78ee1f20bba96deaddd72 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 02:57:47 +0530
Subject: [PATCH 0330/4122] staging: r8188eu: remove {} for single statement
 blocks

As per the Linux kernel coding-style guidelines, there is no need to
use {} for single statement blocks. Issue flagged by checkpatch script.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/a50460e1507621b29a7901cc4ff9501b172417db.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index d4059f0fc3629..b418cbc307b37 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -649,9 +649,8 @@ void *scdb_findEntry(struct adapter *priv, unsigned char *ip_addr)
 	hash = __nat25_network_hash(addr);
 	db = priv->nethash[hash];
 	while (db) {
-		if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN)) {
+		if (!memcmp(db->networkAddr, addr, MAX_NETWORK_ADDR_LEN))
 			return (void *)db;
-		}
 
 		db = db->next_hash;
 	}
-- 
GitLab


From 227041c5d88aace2a1c95d6741438015ac1d9560 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 02:58:39 +0530
Subject: [PATCH 0331/4122] staging: r8188eu: use htons macro instead of
 __constant_htons

Macro "htons" is more efficient and clearer. It should be used for
constants instead of the __constant_htons macro. Resolves following
checkpatch script complaint:
        WARNING: __constant_htons should be htons

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/b46adfbdce0362ed0dbe0fc957ef2f47a93c24bb.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c |  6 +++---
 drivers/staging/r8188eu/core/rtw_xmit.c   | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index b418cbc307b37..a23f7df373ede 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -606,14 +606,14 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb)
 	if (!priv->ethBrExtInfo.dhcp_bcst_disable) {
 		__be16 protocol = *((__be16 *)(skb->data + 2 * ETH_ALEN));
 
-		if (protocol == __constant_htons(ETH_P_IP)) { /*  IP */
+		if (protocol == htons(ETH_P_IP)) { /*  IP */
 			struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN);
 
 			if (iph->protocol == IPPROTO_UDP) { /*  UDP */
 				struct udphdr *udph = (struct udphdr *)((size_t)iph + (iph->ihl << 2));
 
-				if ((udph->source == __constant_htons(CLIENT_PORT)) &&
-				    (udph->dest == __constant_htons(SERVER_PORT))) { /*  DHCP request */
+				if ((udph->source == htons(CLIENT_PORT)) &&
+				    (udph->dest == htons(SERVER_PORT))) { /*  DHCP request */
 					struct dhcpMessage *dhcph =
 						(struct dhcpMessage *)((size_t)udph + sizeof(struct udphdr));
 					u32 cookie = be32_to_cpu((__be32)dhcph->cookie);
diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index 873d2c5c36344..4f82204283282 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -1622,14 +1622,14 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb)
 	spin_lock_bh(&padapter->br_ext_lock);
 	if (!(skb->data[0] & 1) && br_port &&
 	    memcmp(skb->data + ETH_ALEN, padapter->br_mac, ETH_ALEN) &&
-	    *((__be16 *)(skb->data + ETH_ALEN * 2)) != __constant_htons(ETH_P_8021Q) &&
-	    *((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP) &&
+	    *((__be16 *)(skb->data + ETH_ALEN * 2)) != htons(ETH_P_8021Q) &&
+	    *((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP) &&
 	    !memcmp(padapter->scdb_mac, skb->data + ETH_ALEN, ETH_ALEN) && padapter->scdb_entry) {
 		memcpy(skb->data + ETH_ALEN, GET_MY_HWADDR(padapter), ETH_ALEN);
 		padapter->scdb_entry->ageing_timer = jiffies;
 		spin_unlock_bh(&padapter->br_ext_lock);
 	} else {
-		if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_8021Q)) {
+		if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_8021Q)) {
 			is_vlan_tag = 1;
 			vlan_hdr = *((unsigned short *)(skb->data + ETH_ALEN * 2 + 2));
 			for (i = 0; i < 6; i++)
@@ -1637,10 +1637,10 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb)
 			skb_pull(skb, 4);
 		}
 		if (!memcmp(skb->data + ETH_ALEN, padapter->br_mac, ETH_ALEN) &&
-		    (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP)))
+		    (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP)))
 			memcpy(padapter->br_ip, skb->data + WLAN_ETHHDR_LEN + 12, 4);
 
-		if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == __constant_htons(ETH_P_IP)) {
+		if (*((__be16 *)(skb->data + ETH_ALEN * 2)) == htons(ETH_P_IP)) {
 			if (memcmp(padapter->scdb_mac, skb->data + ETH_ALEN, ETH_ALEN)) {
 				padapter->scdb_entry = (struct nat25_network_db_entry *)scdb_findEntry(padapter,
 							skb->data + WLAN_ETHHDR_LEN + 12);
@@ -1669,7 +1669,7 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb)
 					skb_push(skb, 4);
 					for (i = 0; i < 6; i++)
 						*((unsigned short *)(skb->data + i * 2)) = *((unsigned short *)(skb->data + 4 + i * 2));
-					*((__be16 *)(skb->data + ETH_ALEN * 2)) = __constant_htons(ETH_P_8021Q);
+					*((__be16 *)(skb->data + ETH_ALEN * 2)) = htons(ETH_P_8021Q);
 					*((unsigned short *)(skb->data + ETH_ALEN * 2 + 2)) = vlan_hdr;
 				}
 
@@ -1708,7 +1708,7 @@ static int rtw_br_client_tx(struct adapter *padapter, struct sk_buff **pskb)
 			skb_push(skb, 4);
 			for (i = 0; i < 6; i++)
 				*((unsigned short *)(skb->data + i * 2)) = *((unsigned short *)(skb->data + 4 + i * 2));
-			*((__be16 *)(skb->data + ETH_ALEN * 2)) = __constant_htons(ETH_P_8021Q);
+			*((__be16 *)(skb->data + ETH_ALEN * 2)) = htons(ETH_P_8021Q);
 			*((unsigned short *)(skb->data + ETH_ALEN * 2 + 2)) = vlan_hdr;
 		}
 	}
-- 
GitLab


From 0f2635b3ff1057d50bfe4a011c5706117e97e114 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 02:59:37 +0530
Subject: [PATCH 0332/4122] staging: r8188eu: correct misspelled words in
 comments

Fix spelling mistakes in code comments across the driver.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/00be5f2a97b0c899279bd8f9cd27634186b77b9d.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_ioctl_set.c  |  2 +-
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   | 36 +++++++++----------
 drivers/staging/r8188eu/core/rtw_recv.c       |  8 ++---
 drivers/staging/r8188eu/hal/HalPhyRf_8188e.c  |  2 +-
 drivers/staging/r8188eu/hal/odm_RTL8188E.c    |  2 +-
 .../staging/r8188eu/hal/rtl8188e_hal_init.c   |  2 +-
 drivers/staging/r8188eu/hal/rtl8188e_phycfg.c | 16 ++++-----
 .../staging/r8188eu/include/Hal8188EPhyReg.h  |  4 +--
 .../staging/r8188eu/include/rtl8188e_hal.h    |  2 +-
 .../staging/r8188eu/include/rtl8188e_spec.h   |  6 ++--
 drivers/staging/r8188eu/include/rtw_cmd.h     |  4 +--
 drivers/staging/r8188eu/include/rtw_recv.h    |  4 +--
 drivers/staging/r8188eu/include/rtw_xmit.h    |  2 +-
 drivers/staging/r8188eu/include/wifi.h        | 12 +++----
 drivers/staging/r8188eu/os_dep/ioctl_linux.c  |  6 ++--
 15 files changed, 54 insertions(+), 54 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_ioctl_set.c b/drivers/staging/r8188eu/core/rtw_ioctl_set.c
index 55e6b0f41dc32..7864318266598 100644
--- a/drivers/staging/r8188eu/core/rtw_ioctl_set.c
+++ b/drivers/staging/r8188eu/core/rtw_ioctl_set.c
@@ -287,7 +287,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter,
 
 		if ((*pold_state == Ndis802_11Infrastructure) || (*pold_state == Ndis802_11IBSS)) {
 			if (check_fwstate(pmlmepriv, _FW_LINKED))
-				rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have chked whether  issue dis-assoc_cmd or not */
+				rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have checked whether issue dis-assoc_cmd or not */
 	       }
 
 		*pold_state = networktype;
diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index fda446b6779c3..d146b94307b80 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -137,7 +137,7 @@ static struct rt_channel_plan_map	RTW_ChannelPlanMap[RT_CHANNEL_DOMAIN_MAX] = {
 	{0x03},	/* 0x41, RT_CHANNEL_DOMAIN_GLOBAL_DOAMIN_2G */
 };
 
-static struct rt_channel_plan_map RTW_CHANNEL_PLAN_MAP_REALTEK_DEFINE = {0x03}; /* use the conbination for max channel numbers */
+static struct rt_channel_plan_map RTW_CHANNEL_PLAN_MAP_REALTEK_DEFINE = {0x03}; /* use the combination for max channel numbers */
 
 /*
  * Search the @param channel_num in given @param channel_set
@@ -1751,7 +1751,7 @@ void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr)
 	p2pie[p2pielen++] = 0x09;	/*	WFA P2P v1.0 */
 
 	/*	Commented by Albert 20110306 */
-	/*	According to the P2P Specification, the group negoitation request frame should contain 9 P2P attributes */
+	/*	According to the P2P Specification, the group negotiation request frame should contain 9 P2P attributes */
 	/*	1. P2P Capability */
 	/*	2. Group Owner Intent */
 	/*	3. Configuration Timeout */
@@ -2106,7 +2106,7 @@ static void issue_p2p_GO_response(struct adapter *padapter, u8 *raddr, u8 *frame
 	p2pie[p2pielen++] = 0x09;	/*	WFA P2P v1.0 */
 
 	/*	Commented by Albert 20100908 */
-	/*	According to the P2P Specification, the group negoitation response frame should contain 9 P2P attributes */
+	/*	According to the P2P Specification, the group negotiation response frame should contain 9 P2P attributes */
 	/*	1. Status */
 	/*	2. P2P Capability */
 	/*	3. Group Owner Intent */
@@ -2402,7 +2402,7 @@ static void issue_p2p_GO_confirm(struct adapter *padapter, u8 *raddr, u8 result)
 	p2pie[p2pielen++] = 0x09;	/*	WFA P2P v1.0 */
 
 	/*	Commented by Albert 20110306 */
-	/*	According to the P2P Specification, the group negoitation request frame should contain 5 P2P attributes */
+	/*	According to the P2P Specification, the group negotiation request frame should contain 5 P2P attributes */
 	/*	1. Status */
 	/*	2. P2P Capability */
 	/*	3. Operating Channel */
@@ -4010,7 +4010,7 @@ struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv)
 
 /****************************************************************************
 
-Following are some TX fuctions for WiFi MLME
+Following are some TX functions for WiFi MLME
 
 *****************************************************************************/
 
@@ -4612,7 +4612,7 @@ exit:
 	return ret;
 }
 
-/*  if psta == NULL, indiate we are station(client) now... */
+/*  if psta == NULL, indicate we are station (client) now... */
 void issue_auth(struct adapter *padapter, struct sta_info *psta, unsigned short status)
 {
 	struct xmit_frame *pmgntframe;
@@ -5011,7 +5011,7 @@ void issue_assocreq(struct adapter *padapter)
 				if (!padapter->registrypriv.wifi_spec) {
 					/* Commented by Kurt 20110629 */
 					/* In some older APs, WPS handshake */
-					/* would be fail if we append vender extensions informations to AP */
+					/* would be fail if we append vendor extension information to AP */
 					if (!memcmp(pIE->data, WPS_OUI, 4))
 						pIE->Length = 14;
 				}
@@ -5166,7 +5166,7 @@ exit:
 		kfree(pmlmepriv->assoc_req);
 }
 
-/* when wait_ack is ture, this function shoule be called at process context */
+/* when wait_ack is true, this function should be called at process context */
 static int _issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int wait_ack)
 {
 	int ret = _FAIL;
@@ -5235,7 +5235,7 @@ exit:
 	return ret;
 }
 
-/* when wait_ms > 0 , this function shoule be called at process context */
+/* when wait_ms > 0, this function should be called at process context */
 /* da == NULL for station mode */
 int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int power_mode, int try_cnt, int wait_ms)
 {
@@ -5244,7 +5244,7 @@ int issue_nulldata(struct adapter *padapter, unsigned char *da, unsigned int pow
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 
-	/* da == NULL, assum it's null data for sta to ap*/
+	/* da == NULL, assume it's null data for sta to ap*/
 	if (!da)
 		da = get_my_bssid(&pmlmeinfo->network);
 
@@ -5268,7 +5268,7 @@ exit:
 	return ret;
 }
 
-/* when wait_ack is ture, this function shoule be called at process context */
+/* when wait_ack is true, this function should be called at process context */
 static int _issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int wait_ack)
 {
 	int ret = _FAIL;
@@ -5341,7 +5341,7 @@ exit:
 	return ret;
 }
 
-/* when wait_ms > 0 , this function shoule be called at process context */
+/* when wait_ms > 0 , this function should be called at process context */
 /* da == NULL for station mode */
 int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int try_cnt, int wait_ms)
 {
@@ -5350,7 +5350,7 @@ int issue_qos_nulldata(struct adapter *padapter, unsigned char *da, u16 tid, int
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 
-	/* da == NULL, assum it's null data for sta to ap*/
+	/* da == NULL, assume it's null data for sta to ap*/
 	if (!da)
 		da = get_my_bssid(&pmlmeinfo->network);
 
@@ -5882,7 +5882,7 @@ static void rtw_set_opmode(struct adapter *adapter, u8 mode)
 
 /****************************************************************************
 
-Following are some utitity fuctions for WiFi MLME
+Following are some utility functions for WiFi MLME
 
 *****************************************************************************/
 
@@ -6055,7 +6055,7 @@ void site_survey(struct adapter *padapter)
 		} else {
 			/*  20100721:Interrupt scan operation here. */
 			/*  For SW antenna diversity before link, it needs to switch to another antenna and scan again. */
-			/*  It compares the scan result and select beter one to do connection. */
+			/*  It compares the scan result and selects a better one to do connection. */
 			if (AntDivBeforeLink8188E(padapter)) {
 				pmlmeext->sitesurvey_res.bss_cnt = 0;
 				pmlmeext->sitesurvey_res.channel_idx = -1;
@@ -6325,7 +6325,7 @@ void start_create_ibss(struct adapter *padapter)
 	/* update wireless mode */
 	update_wireless_mode(padapter);
 
-	/* udpate capability */
+	/* update capability */
 	caps = rtw_get_capability((struct wlan_bssid_ex *)pnetwork);
 	update_capinfo(padapter, caps);
 	if (caps & cap_IBSS) {/* adhoc master */
@@ -6375,7 +6375,7 @@ void start_clnt_join(struct adapter *padapter)
 	/* update wireless mode */
 	update_wireless_mode(padapter);
 
-	/* udpate capability */
+	/* update capability */
 	caps = rtw_get_capability((struct wlan_bssid_ex *)pnetwork);
 	update_capinfo(padapter, caps);
 	if (caps & cap_ESS) {
@@ -6969,7 +6969,7 @@ void mlmeext_joinbss_event_callback(struct adapter *padapter, int join_res)
 	/* BCN interval */
 	rtw_write16(padapter, REG_BCN_INTERVAL, pmlmeinfo->bcn_interval);
 
-	/* udpate capability */
+	/* update capability */
 	update_capinfo(padapter, pmlmeinfo->capability);
 
 	/* WMM, Update EDCA param */
diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
index bb5c3b3888e08..4b68a543f68b7 100644
--- a/drivers/staging/r8188eu/core/rtw_recv.c
+++ b/drivers/staging/r8188eu/core/rtw_recv.c
@@ -972,7 +972,7 @@ static void validate_recv_ctrl_frame(struct adapter *padapter,
 			if (psta->sleepq_len == 0) {
 				pstapriv->tim_bitmap &= ~BIT(psta->aid);
 
-				/* upate BCN for TIM IE */
+				/* update BCN for TIM IE */
 				/* update_BCNTIM(padapter); */
 				update_beacon(padapter, _TIM_IE_, NULL, false);
 			}
@@ -986,7 +986,7 @@ static void validate_recv_ctrl_frame(struct adapter *padapter,
 
 				pstapriv->tim_bitmap &= ~BIT(psta->aid);
 
-				/* upate BCN for TIM IE */
+				/* update BCN for TIM IE */
 				/* update_BCNTIM(padapter); */
 				update_beacon(padapter, _TIM_IE_, NULL, false);
 			}
@@ -1984,13 +1984,13 @@ static void rtw_signal_stat_timer_hdl(struct timer_list *t)
 	} else {
 		if (recvpriv->signal_strength_data.update_req == 0) {/*  update_req is clear, means we got rx */
 			avg_signal_strength = recvpriv->signal_strength_data.avg_val;
-			/*  after avg_vals are accquired, we can re-stat the signal values */
+			/*  after avg_vals are acquired, we can re-stat the signal values */
 			recvpriv->signal_strength_data.update_req = 1;
 		}
 
 		if (recvpriv->signal_qual_data.update_req == 0) {/*  update_req is clear, means we got rx */
 			avg_signal_qual = recvpriv->signal_qual_data.avg_val;
-			/*  after avg_vals are accquired, we can re-stat the signal values */
+			/*  after avg_vals are acquired, we can re-stat the signal values */
 			recvpriv->signal_qual_data.update_req = 1;
 		}
 
diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
index 525deab10820b..60cdfcf80daad 100644
--- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
+++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
@@ -69,7 +69,7 @@ void ODM_TxPwrTrackAdjust88E(struct odm_dm_struct *dm_odm, u8 Type,/*  0 = OFDM,
 /*-----------------------------------------------------------------------------
  * Function:	odm_TxPwrTrackSetPwr88E()
  *
- * Overview:	88E change all channel tx power accordign to flag.
+ * Overview:	88E change all channel tx power according to flag.
  *				OFDM & CCK are all different.
  *
  * Input:		NONE
diff --git a/drivers/staging/r8188eu/hal/odm_RTL8188E.c b/drivers/staging/r8188eu/hal/odm_RTL8188E.c
index c8a3c521bd607..dd9c8291f025d 100644
--- a/drivers/staging/r8188eu/hal/odm_RTL8188E.c
+++ b/drivers/staging/r8188eu/hal/odm_RTL8188E.c
@@ -194,7 +194,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm)
 	for (i = 0; i < ODM_ASSOCIATE_ENTRY_NUM; i++) {
 		pEntry = dm_odm->pODM_StaInfo[i];
 		if (IS_STA_VALID(pEntry)) {
-			/* 2 Caculate RSSI per Antenna */
+			/* 2 Calculate RSSI per Antenna */
 			Main_RSSI = (dm_fat_tbl->MainAnt_Cnt[i] != 0) ? (dm_fat_tbl->MainAnt_Sum[i] / dm_fat_tbl->MainAnt_Cnt[i]) : 0;
 			Aux_RSSI = (dm_fat_tbl->AuxAnt_Cnt[i] != 0) ? (dm_fat_tbl->AuxAnt_Sum[i] / dm_fat_tbl->AuxAnt_Cnt[i]) : 0;
 			TargetAnt = (Main_RSSI >= Aux_RSSI) ? MAIN_ANT : AUX_ANT;
diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
index 158260547f2b2..cc29963f4b491 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
@@ -355,7 +355,7 @@ void rtl8188e_EfusePowerSwitch(struct adapter *pAdapter, u8 PwrState)
 	if (PwrState) {
 		rtw_write8(pAdapter, REG_EFUSE_ACCESS, EFUSE_ACCESS_ON);
 
-		/*  1.2V Power: From VDDON with Power Cut(0x0000h[15]), defualt valid */
+		/*  1.2V Power: From VDDON with Power Cut(0x0000h[15]), default valid */
 		res = rtw_read16(pAdapter, REG_SYS_ISO_CTRL, &tmpV16);
 		if (res)
 			return;
diff --git a/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c b/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c
index 532c63bce0bf0..b7f3c7a670fb4 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_phycfg.c
@@ -23,7 +23,7 @@ static u32 phy_calculate_bit_shift(u32 bitmask)
 /**
 * Function:	PHY_QueryBBReg
 *
-* OverView:	Read "sepcific bits" from BB register
+* Overview:	Read "sepcific bits" from BB register
 *
 * Input:
 *			struct adapter *Adapter,
@@ -56,7 +56,7 @@ rtl8188e_PHY_QueryBBReg(
 /**
 * Function:	PHY_SetBBReg
 *
-* OverView:	Write "Specific bits" to BB register (page 8~)
+* Overview:	Write "Specific bits" to BB register (page 8~)
 *
 * Input:
 *			struct adapter *Adapter,
@@ -94,7 +94,7 @@ void rtl8188e_PHY_SetBBReg(struct adapter *Adapter, u32 RegAddr, u32 BitMask, u3
 /**
 * Function:	phy_RFSerialRead
 *
-* OverView:	Read regster from RF chips
+* Overview:	Read register from RF chips
 *
 * Input:
 *			struct adapter *Adapter,
@@ -160,7 +160,7 @@ phy_RFSerialRead(
 /**
 * Function:	phy_RFSerialWrite
 *
-* OverView:	Write data to RF register (page 8~)
+* Overview:	Write data to RF register (page 8~)
 *
 * Input:
 *			struct adapter *Adapter,
@@ -235,7 +235,7 @@ phy_RFSerialWrite(
 /**
 * Function:	PHY_QueryRFReg
 *
-* OverView:	Query "Specific bits" to RF register (page 8~)
+* Overview:	Query "Specific bits" to RF register (page 8~)
 *
 * Input:
 *			struct adapter *Adapter,
@@ -261,7 +261,7 @@ u32 rtl8188e_PHY_QueryRFReg(struct adapter *Adapter, u32 RegAddr, u32 BitMask)
 /**
 * Function:	PHY_SetRFReg
 *
-* OverView:	Write "Specific bits" to RF register (page 8~)
+* Overview:	Write "Specific bits" to RF register (page 8~)
 *
 * Input:
 *			struct adapter *Adapter,
@@ -335,7 +335,7 @@ s32 PHY_MACConfig8188E(struct adapter *Adapter)
 /**
 * Function:	phy_InitBBRFRegisterDefinition
 *
-* OverView:	Initialize Register definition offset for Radio Path A/B/C/D
+* Overview:	Initialize Register definition offset for Radio Path A/B/C/D
 *
 * Input:
 *			struct adapter *Adapter,
@@ -363,7 +363,7 @@ phy_InitBBRFRegisterDefinition(
 	/*  RF Interface (Output and)  Enable */
 	pHalData->PHYRegDef.rfintfe = rFPGA0_XA_RFInterfaceOE; /*  16 MSBs if read 32-bit from 0x860 (16-bit for 0x862) */
 
-	/* Addr of LSSI. Wirte RF register by driver */
+	/* Addr of LSSI. Write RF register by driver */
 	pHalData->PHYRegDef.rf3wireOffset = rFPGA0_XA_LSSIParameter; /* LSSI Parameter */
 
 	/*  RF parameter */
diff --git a/drivers/staging/r8188eu/include/Hal8188EPhyReg.h b/drivers/staging/r8188eu/include/Hal8188EPhyReg.h
index 8b8c75a1f1490..da2329be4474a 100644
--- a/drivers/staging/r8188eu/include/Hal8188EPhyReg.h
+++ b/drivers/staging/r8188eu/include/Hal8188EPhyReg.h
@@ -92,7 +92,7 @@
 #define	rFPGA0_AdDaClockEn		0x888
 #define	rFPGA0_AnalogParameter4		0x88c
 
-#define	rFPGA0_XA_LSSIReadBack		0x8a0	/*  Tranceiver LSSI Readback */
+#define	rFPGA0_XA_LSSIReadBack		0x8a0	/*  Transceiver LSSI Readback */
 #define	rFPGA0_XB_LSSIReadBack		0x8a4
 #define	rFPGA0_XC_LSSIReadBack		0x8a8
 #define	rFPGA0_XD_LSSIReadBack		0x8ac
@@ -167,7 +167,7 @@
 
 /* RxIQ DC offset, Rx digital filter, DC notch filter */
 #define	rOFDM0_XARxAFE			0xc10
-#define	rOFDM0_XARxIQImbalance		0xc14  /* RxIQ imblance matrix */
+#define	rOFDM0_XARxIQImbalance		0xc14  /* RxIQ imbalance matrix */
 #define	rOFDM0_XBRxAFE			0xc18
 #define	rOFDM0_XBRxIQImbalance		0xc1c
 #define	rOFDM0_XCRxAFE			0xc20
diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h
index a1e88e6d5c0c7..69faaaa7d3730 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_hal.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h
@@ -87,7 +87,7 @@ struct txpowerinfo24g {
 /*  9bytes + 1byt + 5bytes and pre 1byte. */
 /*  For worst case: */
 /*  | 2byte|----8bytes----|1byte|--7bytes--| 92D */
-/*  PG data exclude header, dummy 7 bytes frome CP test and reserved 1byte. */
+/*  PG data exclude header, dummy 7 bytes from CP test and reserved 1byte. */
 #define		EFUSE_OOB_PROTECT_BYTES_88E	18
 
 #define EFUSE_PROTECT_BYTES_BANK	16
diff --git a/drivers/staging/r8188eu/include/rtl8188e_spec.h b/drivers/staging/r8188eu/include/rtl8188e_spec.h
index e34619140e33c..e34ecdc096886 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_spec.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_spec.h
@@ -4,7 +4,7 @@
 #ifndef __RTL8188E_SPEC_H__
 #define __RTL8188E_SPEC_H__
 
-/*        8192C Regsiter offset definition */
+/*        8192C Register offset definition */
 
 #define		HAL_PS_TIMER_INT_DELAY	50	/*   50 microseconds */
 #define		HAL_92C_NAV_UPPER_UNIT	128	/*  micro-second */
@@ -674,7 +674,7 @@ Current IOREG MAP
 
 #define REG_USB_HRPWM			0xFE58
 #define REG_USB_HCPWM			0xFE57
-/*        8192C Regsiter Bit and Content definition */
+/*        8192C Register Bit and Content definition */
 /* 	0x0000h ~ 0x00FFh	System Configuration */
 
 /* 2 SYS_ISO_CTRL */
@@ -1135,7 +1135,7 @@ Current IOREG MAP
 #define EEPROM_Default_CrystalCap_88E		0x20
 #define	EEPROM_Default_ThermalMeter_88E		0x18
 
-/* New EFUSE deafult value */
+/* New EFUSE default value */
 #define		EEPROM_DEFAULT_24G_INDEX	0x2D
 #define		EEPROM_DEFAULT_24G_HT20_DIFF	0X02
 #define		EEPROM_DEFAULT_24G_OFDM_DIFF	0X04
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index ee9218b1d7a94..2896a732780b1 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -450,7 +450,7 @@ struct Tx_Beacon_param
 
 	mac[0] == 0
 	==> CMD mode, return H2C_SUCCESS.
-	The following condition must be ture under CMD mode
+	The following condition must be true under CMD mode
 		mac[1] == mac[4], mac[2] == mac[3], mac[0]=mac[5]= 0;
 		s0 == 0x1234, s1 == 0xabcd, w0 == 0x78563412, w1 == 0x5aa5def7;
 		s2 == (b1 << 8 | b0);
@@ -503,7 +503,7 @@ struct drvextra_cmd_parm {
 	unsigned char *pbuf;
 };
 
-/*------------------- Below are used for RF/BB tunning ---------------------*/
+/*------------------- Below are used for RF/BB tuning ---------------------*/
 
 struct	setantenna_parm {
 	u8	tx_antset;
diff --git a/drivers/staging/r8188eu/include/rtw_recv.h b/drivers/staging/r8188eu/include/rtw_recv.h
index 7768b0c5988c9..12026431a3d29 100644
--- a/drivers/staging/r8188eu/include/rtw_recv.h
+++ b/drivers/staging/r8188eu/include/rtw_recv.h
@@ -92,7 +92,7 @@ struct rx_pkt_attrib {
 	u8	privacy; /* in frame_ctrl field */
 	u8	bdecrypted;
 	u8	encrypt; /* when 0 indicate no encrypt. when non-zero,
-			  * indicate the encrypt algorith */
+			  * indicate the encrypt algorithm */
 	u8	iv_len;
 	u8	icv_len;
 	u8	crc_err;
@@ -175,7 +175,7 @@ struct recv_priv {
 	u8 *precv_buf;    /*  4 alignment */
 	struct __queue free_recv_buf_queue;
 	u32	free_recv_buf_queue_cnt;
-	/* For display the phy informatiom */
+	/* For display the phy information */
 	u8 is_signal_dbg;	/*  for debug */
 	u8 signal_strength_dbg;	/*  for debug */
 	s8 rssi;
diff --git a/drivers/staging/r8188eu/include/rtw_xmit.h b/drivers/staging/r8188eu/include/rtw_xmit.h
index 82efcd54af3f9..cff065554608d 100644
--- a/drivers/staging/r8188eu/include/rtw_xmit.h
+++ b/drivers/staging/r8188eu/include/rtw_xmit.h
@@ -116,7 +116,7 @@ struct pkt_attrib {
 	u32	last_txcmdsz;
 	u8	nr_frags;
 	u8	encrypt;	/* when 0 indicate no encrypt. when non-zero,
-				 * indicate the encrypt algorith */
+				 * indicate the encrypt algorithm */
 	u8	iv_len;
 	u8	icv_len;
 	u8	iv[18];
diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index 0254310bdf440..381385a7e1189 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -701,7 +701,7 @@ struct ADDBA_request {
 
 #define	P2P_WILDCARD_SSID_LEN			7
 
-/* default value, used when: (1)p2p disabed or (2)p2p enabled
+/* default value, used when: (1)p2p disabled or (2)p2p enabled
  * but only do 1 scan phase */
 #define	P2P_FINDPHASE_EX_NONE		0
 /*  used when p2p enabled and want to do 1 scan phase and
@@ -766,11 +766,11 @@ enum P2P_STATE {
 	P2P_STATE_TX_PROVISION_DIS_REQ = 6,
 	P2P_STATE_RX_PROVISION_DIS_RSP = 7,
 	P2P_STATE_RX_PROVISION_DIS_REQ = 8,
-	/* Doing the group owner negoitation handshake */
+	/* Doing the group owner negotiation handshake */
 	P2P_STATE_GONEGO_ING = 9,
-	/* finish the group negoitation handshake with success */
+	/* finish the group negotiation handshake with success */
 	P2P_STATE_GONEGO_OK = 10,
-	/* finish the group negoitation handshake with failure */
+	/* finish the group negotiation handshake with failure */
 	P2P_STATE_GONEGO_FAIL = 11,
 	/* receiving the P2P Inviation request and match with the profile. */
 	P2P_STATE_RECV_INVITE_REQ_MATCH = 12,
@@ -790,9 +790,9 @@ enum P2P_STATE {
 	P2P_STATE_RECV_INVITE_REQ_JOIN = 19,
 	/* recveing the P2P Inviation response with failure */
 	P2P_STATE_RX_INVITE_RESP_FAIL = 20,
-	/* receiving p2p negoitation response with information is not available */
+	/* receiving p2p negotiation response with information is not available */
 	P2P_STATE_RX_INFOR_NOREADY = 21,
-	/* sending p2p negoitation response with information is not available */
+	/* sending p2p negotiation response with information is not available */
 	P2P_STATE_TX_INFOR_NOREADY = 22,
 };
 
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 2de2e1e32738a..8516e253bb031 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -2647,7 +2647,7 @@ static int rtw_p2p_connect(struct net_device *dev,
 	u32 peer_channel = 0;
 
 	/*	Commented by Albert 20110304 */
-	/*	The input data contains two informations. */
+	/*	The input data contains two information. */
 	/*	1. First information is the MAC address which wants to formate with */
 	/*	2. Second information is the WPS PINCode or "pbc" string for push button method */
 	/*	Format: 00:E0:4C:00:00:05 */
@@ -2721,7 +2721,7 @@ static void rtw_p2p_invite_req(struct net_device *dev,
 	uint p2pielen = 0, attr_contentlen = 0;
 	struct tx_invite_req_info *pinvite_req_info = &pwdinfo->invitereq_info;
 
-	/*	The input data contains two informations. */
+	/*	The input data contains two information items. */
 	/*	1. First information is the P2P device address which you want to send to. */
 	/*	2. Second information is the group id which combines with GO's mac address, space and GO's ssid. */
 	/*	Command line sample: iwpriv wlan0 p2p_set invite ="00:11:22:33:44:55 00:E0:4C:00:00:05 DIRECT-xy" */
@@ -2845,7 +2845,7 @@ static void rtw_p2p_prov_disc(struct net_device *dev,
 	u8 *p2pie;
 	uint p2pielen = 0, attr_contentlen = 0;
 
-	/*	The input data contains two informations. */
+	/*	The input data contains two information items. */
 	/*	1. First information is the MAC address which wants to issue the provisioning discovery request frame. */
 	/*	2. Second information is the WPS configuration method which wants to discovery */
 	/*	Format: 00:E0:4C:00:00:05_display */
-- 
GitLab


From 4b66ec6961f170cf782fb66520c36f2e950c7601 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:00:13 +0530
Subject: [PATCH 0333/4122] staging: r8188eu: Add space between function &
 macro parameters

Space required between function and macro parameters to improve code
readability. This Linux kernel coding style guideline resolves following
error reported by checkpatch script:
	ERROR: space required after that ',' (ctx:VxV)

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/ce200b3a986628f943dfb0c4e412276793e59bbc.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/r8188eu/include/osdep_service.h   |  4 +--
 .../staging/r8188eu/include/rtl8188e_hal.h    |  2 +-
 drivers/staging/r8188eu/include/rtw_cmd.h     | 18 ++++++------
 drivers/staging/r8188eu/include/rtw_io.h      | 28 +++++++++----------
 4 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h
index 72990a1cdc661..ec2631455f08c 100644
--- a/drivers/staging/r8188eu/include/osdep_service.h
+++ b/drivers/staging/r8188eu/include/osdep_service.h
@@ -53,7 +53,7 @@ static inline struct list_head *get_list_head(struct __queue *queue)
 	return (&(queue->queue));
 }
 
-static inline void _set_timer(struct timer_list *ptimer,u32 delay_time)
+static inline void _set_timer(struct timer_list *ptimer, u32 delay_time)
 {
 	mod_timer(ptimer, jiffies + msecs_to_jiffies(delay_time));
 }
@@ -108,7 +108,7 @@ void rtw_free_netdev(struct net_device *netdev);
 #define FUNC_ADPT_FMT "%s(%s)"
 #define FUNC_ADPT_ARG(adapter) __func__, adapter->pnetdev->name
 
-#define rtw_signal_process(pid, sig) kill_pid(find_vpid((pid)),(sig), 1)
+#define rtw_signal_process(pid, sig) kill_pid(find_vpid((pid)), (sig), 1)
 
 /* Macros for handling unaligned memory accesses */
 
diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h
index 69faaaa7d3730..d9681f9b39153 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_hal.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h
@@ -164,7 +164,7 @@ void Hal_ReadTxPowerInfo88E(struct adapter *padapter, u8 *hwinfo,
 
 void rtl8188e_EfuseParseChnlPlan(struct adapter *padapter, u8 *hwinfo,
 				 bool AutoLoadFail);
-void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter,u8 *PROMContent,
+void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter, u8 *PROMContent,
 				 bool AutoLoadFail);
 void Hal_ReadThermalMeter_88E(struct adapter *	dapter, u8 *PROMContent,
 			      bool AutoloadFail);
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 2896a732780b1..b4d4ac056e91b 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -725,17 +725,17 @@ Result:
 #define H2C_CMD_OVERFLOW	0x06
 #define H2C_RESERVED		0x07
 
-u8 rtw_sitesurvey_cmd(struct adapter  *padapter, struct ndis_802_11_ssid *ssid, int ssid_num);
-u8 rtw_createbss_cmd(struct adapter  *padapter);
+u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, int ssid_num);
+u8 rtw_createbss_cmd(struct adapter *padapter);
 u8 rtw_setstakey_cmd(struct adapter *padapter, u8 *psta, u8 unicast_key);
 u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue);
-u8 rtw_joinbss_cmd(struct adapter  *padapter, struct wlan_network* pnetwork);
+u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network* pnetwork);
 u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue);
-u8 rtw_setopmode_cmd(struct adapter  *padapter, enum ndis_802_11_network_infra networktype);
-u8 rtw_setdatarate_cmd(struct adapter  *padapter, u8 *rateset);
-u8 rtw_setrfintfs_cmd(struct adapter  *padapter, u8 mode);
+u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype);
+u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset);
+u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode);
 
-u8 rtw_gettssi_cmd(struct adapter  *padapter, u8 offset,u8 *pval);
+u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval);
 u8 rtw_setfwdig_cmd(struct adapter*padapter, u8 type);
 u8 rtw_setfwra_cmd(struct adapter*padapter, u8 type);
 
@@ -746,10 +746,10 @@ u8 rtw_dynamic_chk_wk_cmd(struct adapter *adapter);
 u8 rtw_lps_ctrl_wk_cmd(struct adapter*padapter, u8 lps_ctrl_type, u8 enqueue);
 u8 rtw_rpt_timer_cfg_cmd(struct adapter*padapter, u16 minRptTime);
 
- u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna,u8 enqueue);
+u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna, u8 enqueue);
 u8 rtw_ps_cmd(struct adapter*padapter);
 
-u8 rtw_chk_hi_queue_cmd(struct adapter*padapter);
+u8 rtw_chk_hi_queue_cmd(struct adapter *padapter);
 
 u8 rtw_set_chplan_cmd(struct adapter *padapter, u8 chplan);
 
diff --git a/drivers/staging/r8188eu/include/rtw_io.h b/drivers/staging/r8188eu/include/rtw_io.h
index 925c7967ac04e..87fcf6c94ff32 100644
--- a/drivers/staging/r8188eu/include/rtw_io.h
+++ b/drivers/staging/r8188eu/include/rtw_io.h
@@ -209,7 +209,7 @@ struct io_priv {
 };
 
 uint ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue);
-void sync_ioreq_enqueue(struct io_req *preq,struct io_queue *ioqueue);
+void sync_ioreq_enqueue(struct io_req *preq, struct io_queue *ioqueue);
 uint sync_ioreq_flush(struct adapter *adapter, struct io_queue *ioqueue);
 uint free_ioreq(struct io_req *preq, struct io_queue *pio_queue);
 struct io_req *alloc_ioreq(struct io_queue *pio_q);
@@ -285,18 +285,18 @@ void bus_sync_io(struct io_queue *pio_q);
 u32 _ioreq2rwmem(struct io_queue *pio_q);
 void dev_power_down(struct adapter *Adapter, u8 bpwrup);
 
-#define PlatformEFIOWrite1Byte(_a,_b,_c)		\
-	rtw_write8(_a,_b,_c)
-#define PlatformEFIOWrite2Byte(_a,_b,_c)		\
-	rtw_write16(_a,_b,_c)
-#define PlatformEFIOWrite4Byte(_a,_b,_c)		\
-	rtw_write32(_a,_b,_c)
-
-#define PlatformEFIORead1Byte(_a,_b)		\
-		rtw_read8(_a,_b)
-#define PlatformEFIORead2Byte(_a,_b)		\
-		rtw_read16(_a,_b)
-#define PlatformEFIORead4Byte(_a,_b)		\
-		rtw_read32(_a,_b)
+#define PlatformEFIOWrite1Byte(_a, _b, _c)		\
+	rtw_write8(_a, _b, _c)
+#define PlatformEFIOWrite2Byte(_a, _b, _c)		\
+	rtw_write16(_a, _b, _c)
+#define PlatformEFIOWrite4Byte(_a, _b, _c)		\
+	rtw_write32(_a, _b, _c)
+
+#define PlatformEFIORead1Byte(_a, _b)		\
+		rtw_read8(_a, _b)
+#define PlatformEFIORead2Byte(_a, _b)		\
+		rtw_read16(_a, _b)
+#define PlatformEFIORead4Byte(_a, _b)		\
+		rtw_read32(_a, _b)
 
 #endif	/* _RTL8711_IO_H_ */
-- 
GitLab


From 2bf279885c596c7680426cacd0fe137db0e00a85 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:00:35 +0530
Subject: [PATCH 0334/4122] staging: r8188eu: Associate pointer symbol with
 parameter name

The pointer symbol '*' should be associated with the function parameter
name and not its type. This improves code readability and adheres to the
coding-style guidelines. Address following checkpatch reported error:
	ERROR: "foo *   bar" should be "foo *bar"

While in there, update parameter name at one place to match other function
declarations.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/d946b69bfdfb44baae3a130e412ed2e217a710a7.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtl8188e_hal.h  |  2 +-
 drivers/staging/r8188eu/include/rtw_cmd.h       | 16 ++++++++--------
 drivers/staging/r8188eu/include/rtw_ioctl_set.h |  4 ++--
 drivers/staging/r8188eu/include/rtw_mlme.h      |  2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h  |  8 ++++----
 5 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtl8188e_hal.h b/drivers/staging/r8188eu/include/rtl8188e_hal.h
index d9681f9b39153..feeb37c228977 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_hal.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_hal.h
@@ -166,7 +166,7 @@ void rtl8188e_EfuseParseChnlPlan(struct adapter *padapter, u8 *hwinfo,
 				 bool AutoLoadFail);
 void Hal_ReadAntennaDiversity88E(struct adapter *pAdapter, u8 *PROMContent,
 				 bool AutoLoadFail);
-void Hal_ReadThermalMeter_88E(struct adapter *	dapter, u8 *PROMContent,
+void Hal_ReadThermalMeter_88E(struct adapter *padapter, u8 *PROMContent,
 			      bool AutoloadFail);
 void Hal_EfuseParseXtal_8188E(struct adapter *pAdapter, u8 *hwinfo,
 			      bool AutoLoadFail);
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index b4d4ac056e91b..98c9a7b677198 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -729,25 +729,25 @@ u8 rtw_sitesurvey_cmd(struct adapter *padapter, struct ndis_802_11_ssid *ssid, i
 u8 rtw_createbss_cmd(struct adapter *padapter);
 u8 rtw_setstakey_cmd(struct adapter *padapter, u8 *psta, u8 unicast_key);
 u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue);
-u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network* pnetwork);
+u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network *pnetwork);
 u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue);
 u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype);
 u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset);
 u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode);
 
 u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval);
-u8 rtw_setfwdig_cmd(struct adapter*padapter, u8 type);
-u8 rtw_setfwra_cmd(struct adapter*padapter, u8 type);
+u8 rtw_setfwdig_cmd(struct adapter *padapter, u8 type);
+u8 rtw_setfwra_cmd(struct adapter *padapter, u8 type);
 
-u8 rtw_addbareq_cmd(struct adapter*padapter, u8 tid, u8 *addr);
+u8 rtw_addbareq_cmd(struct adapter *padapter, u8 tid, u8 *addr);
 
 u8 rtw_dynamic_chk_wk_cmd(struct adapter *adapter);
 
-u8 rtw_lps_ctrl_wk_cmd(struct adapter*padapter, u8 lps_ctrl_type, u8 enqueue);
-u8 rtw_rpt_timer_cfg_cmd(struct adapter*padapter, u16 minRptTime);
+u8 rtw_lps_ctrl_wk_cmd(struct adapter *padapter, u8 lps_ctrl_type, u8 enqueue);
+u8 rtw_rpt_timer_cfg_cmd(struct adapter *padapter, u16 minRptTime);
 
-u8 rtw_antenna_select_cmd(struct adapter*padapter, u8 antenna, u8 enqueue);
-u8 rtw_ps_cmd(struct adapter*padapter);
+u8 rtw_antenna_select_cmd(struct adapter *padapter, u8 antenna, u8 enqueue);
+u8 rtw_ps_cmd(struct adapter *padapter);
 
 u8 rtw_chk_hi_queue_cmd(struct adapter *padapter);
 
diff --git a/drivers/staging/r8188eu/include/rtw_ioctl_set.h b/drivers/staging/r8188eu/include/rtw_ioctl_set.h
index 7365079c704f2..abe460d6504d0 100644
--- a/drivers/staging/r8188eu/include/rtw_ioctl_set.h
+++ b/drivers/staging/r8188eu/include/rtw_ioctl_set.h
@@ -10,10 +10,10 @@ typedef u8 NDIS_802_11_PMKID_VALUE[16];
 
 u8 rtw_set_802_11_authentication_mode(struct adapter *adapt,
 				      enum ndis_802_11_auth_mode authmode);
-u8 rtw_set_802_11_bssid(struct adapter*adapter, u8 *bssid);
+u8 rtw_set_802_11_bssid(struct adapter *adapter, u8 *bssid);
 u8 rtw_set_802_11_add_wep(struct adapter *adapter, struct ndis_802_11_wep *wep);
 u8 rtw_set_802_11_disassociate(struct adapter *adapter);
-u8 rtw_set_802_11_bssid_list_scan(struct adapter*adapter,
+u8 rtw_set_802_11_bssid_list_scan(struct adapter *adapter,
 				  struct ndis_802_11_ssid *pssid,
 				  int ssid_max_num);
 u8 rtw_set_802_11_infrastructure_mode(struct adapter *adapter,
diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h
index b69989cbab21d..7658f864136e8 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme.h
@@ -547,7 +547,7 @@ void _rtw_free_network(struct mlme_priv *pmlmepriv,
 void _rtw_free_network_nolock(struct mlme_priv *pmlmepriv,
 			      struct wlan_network *pnetwork);
 
-struct wlan_network* _rtw_find_network(struct __queue *scanned_queue, u8 *addr);
+struct wlan_network *_rtw_find_network(struct __queue *scanned_queue, u8 *addr);
 
 void _rtw_free_network_queue(struct adapter *padapter, u8 isfreeall);
 
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index b322d0848db9b..be470f913a94a 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -479,11 +479,11 @@ void report_survey_event(struct adapter *padapter, struct recv_frame *precv_fram
 void report_surveydone_event(struct adapter *padapter);
 void report_del_sta_event(struct adapter *padapter,
 			  unsigned char *addr, unsigned short reason);
-void report_add_sta_event(struct adapter *padapter, unsigned char* addr,
+void report_add_sta_event(struct adapter *padapter, unsigned char *addr,
 			  int cam_idx);
 
 void beacon_timing_control(struct adapter *padapter);
-extern u8 set_tx_beacon_cmd(struct adapter*padapter);
+extern u8 set_tx_beacon_cmd(struct adapter *padapter);
 unsigned int setup_beacon_frame(struct adapter *padapter,
 				unsigned char *beacon_frame);
 void update_mgnt_tx_rate(struct adapter *padapter, u8 rate);
@@ -502,7 +502,7 @@ void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr);
 void issue_probereq_p2p(struct adapter *padapter, u8 *da);
 void issue_p2p_invitation_response(struct adapter *padapter, u8 *raddr,
 				   u8 dialogToken, u8 success);
-void issue_p2p_invitation_request(struct adapter *padapter, u8* raddr);
+void issue_p2p_invitation_request(struct adapter *padapter, u8 *raddr);
 void issue_beacon(struct adapter *padapter, int timeout_ms);
 void issue_probersp(struct adapter *padapter, unsigned char *da,
 		    u8 is_valid_p2p_probereq);
@@ -514,7 +514,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta,
 void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 		    u8 *da);
 s32 issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid,
-		      u8* da, int try_cnt, int wait_ms);
+		      u8 *da, int try_cnt, int wait_ms);
 int issue_nulldata(struct adapter *padapter, unsigned char *da,
 		   unsigned int power_mode, int try_cnt, int wait_ms);
 int issue_qos_nulldata(struct adapter *padapter, unsigned char *da,
-- 
GitLab


From 285e8d027ecccb4623331c62896d225371502262 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:01:05 +0530
Subject: [PATCH 0335/4122] staging: r8188eu: replace leading spaces by tabs

Spaces are prohibited as per the Linux coding style guidelines. Replace
those by tabs wherever possible to improve code alignment. Error
reported by checkpatch script.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/c32b702c61ea3367d60f0a4c2443093d6ce45a69.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_cmd.h      | 10 +++++-----
 drivers/staging/r8188eu/include/rtw_mlme.h     |  2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 98c9a7b677198..45734d32fec60 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -587,14 +587,14 @@ struct setratable_parm {
 };
 
 struct getratable_parm {
-                uint rsvd;
+	uint rsvd;
 };
 
 struct getratable_rsp {
-        u8 ss_ForceUp[NumRates];
-        u8 ss_ULevel[NumRates];
-        u8 ss_DLevel[NumRates];
-        u8 count_judge[NumRates];
+	u8 ss_ForceUp[NumRates];
+	u8 ss_ULevel[NumRates];
+	u8 ss_DLevel[NumRates];
+	u8 count_judge[NumRates];
 };
 
 /* to get TX,RX retry count */
diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h
index 7658f864136e8..ebf7168a7ef9c 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme.h
@@ -528,7 +528,7 @@ void rtw_indicate_scan_done(struct adapter *padapter);
 int rtw_restruct_sec_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie,
 			uint in_len);
 int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie,
-		        uint in_len, uint initial_out_len);
+			uint in_len, uint initial_out_len);
 void rtw_init_registrypriv_dev_network(struct adapter *adapter);
 
 void rtw_update_registrypriv_dev_network(struct adapter *adapter);
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index be470f913a94a..413b94e387446 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -449,7 +449,7 @@ void ERP_IE_handler(struct adapter *padapter, struct ndis_802_11_var_ie *pIE);
 void VCS_update(struct adapter *padapter, struct sta_info *psta);
 
 void update_beacon_info(struct adapter *padapter, u8 *pframe, uint len,
-		        struct sta_info *psta);
+			struct sta_info *psta);
 int rtw_check_bcn_info(struct adapter  *Adapter, u8 *pframe, u32 packet_len);
 void update_IOT_info(struct adapter *padapter);
 void update_capinfo(struct adapter *adapter, u16 updatecap);
-- 
GitLab


From d119c18fa41dd455c6237e0b3dbb266de6a3c065 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:01:37 +0530
Subject: [PATCH 0336/4122] staging: r8188eu: Put '{" on the symbol declaration
 line

Open braces '{" should be placed on the line of symbol declaration as
per the coding-style guidelines. Improves readability and matches with
style used in rest of the code. Issue reported by checkpatch script.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/375f742936493b562bd4dfba90eb75bd8ab84f8a.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_cmd.h | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index 45734d32fec60..a740a9a101d88 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -440,8 +440,7 @@ struct getrfintfs_parm {
 	u8	rfintfs;
 };
 
-struct Tx_Beacon_param
-{
+struct Tx_Beacon_param {
 	struct wlan_bssid_ex network;
 };
 
@@ -677,26 +676,22 @@ struct set_ch_parm {
 };
 
 /*H2C Handler index: 59 */
-struct SetChannelPlan_param
-{
+struct SetChannelPlan_param {
 	u8 channel_plan;
 };
 
 /*H2C Handler index: 60 */
-struct LedBlink_param
-{
+struct LedBlink_param {
 	struct LED_871x *pLed;
 };
 
 /*H2C Handler index: 61 */
-struct SetChannelSwitch_param
-{
+struct SetChannelSwitch_param {
 	u8 new_ch_no;
 };
 
 /*H2C Handler index: 62 */
-struct TDLSoption_param
-{
+struct TDLSoption_param {
 	u8 addr[ETH_ALEN];
 	u8 option;
 };
@@ -854,8 +849,7 @@ enum rtw_h2c_cmd {
 #define _SetRFReg_CMD_		_Write_RFREG_CMD_
 
 #ifdef _RTW_CMD_C_
-static struct _cmd_callback	rtw_cmd_callback[] =
-{
+static struct _cmd_callback	rtw_cmd_callback[] = {
 	{GEN_CMD_CODE(_Read_MACREG), NULL}, /*0*/
 	{GEN_CMD_CODE(_Write_MACREG), NULL},
 	{GEN_CMD_CODE(_Read_BBREG), &rtw_getbbrfreg_cmdrsp_callback},
-- 
GitLab


From c61c8480e3a59d469d4ff7d6ed9c88d43ba1722e Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:02:12 +0530
Subject: [PATCH 0337/4122] staging: r8188eu: Correct missing or extra space in
 the statements

Properly spacing out code statements/instructions improves code
readability. Add missing or remove extra space as necessary according
to the Linux Kernel coding-style guidelines. Following errors reported
by checkpatch script for inconsistent code spacing:
	ERROR: space prohibited before that close parenthesis ')'
	ERROR: space prohibited before that ',' (ctx:WxW)
	CHECK: spaces preferred around that '&' (ctx:VxV)

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/4559d1a406b9f32379ec01cfadacea13a11803ac.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/r8188eu/include/rtl8188e_spec.h    | 12 ++++++------
 drivers/staging/r8188eu/include/rtw_mlme.h     | 18 +++++++++---------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  2 +-
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtl8188e_spec.h b/drivers/staging/r8188eu/include/rtl8188e_spec.h
index e34ecdc096886..3fa3b3e5dd646 100644
--- a/drivers/staging/r8188eu/include/rtl8188e_spec.h
+++ b/drivers/staging/r8188eu/include/rtl8188e_spec.h
@@ -900,12 +900,12 @@ Current IOREG MAP
 #define HQSEL_HIQ			BIT(5)
 
 /*  For normal driver, 0x10C */
-#define _TXDMA_HIQ_MAP(x)		(((x)&0x3) << 14)
-#define _TXDMA_MGQ_MAP(x)		(((x)&0x3) << 12)
-#define _TXDMA_BKQ_MAP(x)		(((x)&0x3) << 10)
-#define _TXDMA_BEQ_MAP(x)		(((x)&0x3) << 8 )
-#define _TXDMA_VIQ_MAP(x)		(((x)&0x3) << 6 )
-#define _TXDMA_VOQ_MAP(x)		(((x)&0x3) << 4 )
+#define _TXDMA_HIQ_MAP(x)		(((x) & 0x3) << 14)
+#define _TXDMA_MGQ_MAP(x)		(((x) & 0x3) << 12)
+#define _TXDMA_BKQ_MAP(x)		(((x) & 0x3) << 10)
+#define _TXDMA_BEQ_MAP(x)		(((x) & 0x3) << 8)
+#define _TXDMA_VIQ_MAP(x)		(((x) & 0x3) << 6)
+#define _TXDMA_VOQ_MAP(x)		(((x) & 0x3) << 4)
 
 #define QUEUE_LOW			1
 #define QUEUE_NORMAL			2
diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h
index ebf7168a7ef9c..ca539c652f26d 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme.h
@@ -101,17 +101,17 @@ struct rt_link_detect {
 
 struct profile_info {
 	u8	ssidlen;
-	u8	ssid[ WLAN_SSID_MAXLEN ];
-	u8	peermac[ ETH_ALEN ];
+	u8	ssid[WLAN_SSID_MAXLEN];
+	u8	peermac[ETH_ALEN];
 };
 
 struct tx_invite_req_info {
 	u8	token;
 	u8	benable;
-	u8	go_ssid[ WLAN_SSID_MAXLEN ];
+	u8	go_ssid[WLAN_SSID_MAXLEN];
 	u8	ssidlen;
-	u8	go_bssid[ ETH_ALEN ];
-	u8	peer_macaddr[ ETH_ALEN ];
+	u8	go_bssid[ETH_ALEN];
+	u8	peer_macaddr[ETH_ALEN];
 	u8	operating_ch;	/* This information will be set by using the
 				 * p2p_set op_ch=x */
 	u8	peer_ch;	/* The listen channel for peer P2P device */
@@ -154,9 +154,9 @@ struct tx_nego_req_info {
 };
 
 struct group_id_info {
-	u8	go_device_addr[ ETH_ALEN ];	/* The GO's device address of
+	u8	go_device_addr[ETH_ALEN];	/* The GO's device address of
 						 * this P2P group */
-	u8	ssid[ WLAN_SSID_MAXLEN ];	/* The SSID of this P2P group */
+	u8	ssid[WLAN_SSID_MAXLEN];	/* The SSID of this P2P group */
 };
 
 struct scan_limit_info {
@@ -459,7 +459,7 @@ static inline void set_fwstate(struct mlme_priv *pmlmepriv, int state)
 {
 	pmlmepriv->fw_state |= state;
 	/* FOR HW integration */
-	if (_FW_UNDER_SURVEY==state)
+	if (_FW_UNDER_SURVEY == state)
 		pmlmepriv->bScanInProcess = true;
 }
 
@@ -467,7 +467,7 @@ static inline void _clr_fwstate_(struct mlme_priv *pmlmepriv, int state)
 {
 	pmlmepriv->fw_state &= ~state;
 	/* FOR HW integration */
-	if (_FW_UNDER_SURVEY==state)
+	if (_FW_UNDER_SURVEY == state)
 		pmlmepriv->bScanInProcess = false;
 }
 
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 413b94e387446..66aa8b497aa1d 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -729,7 +729,7 @@ enum rtw_c2h_event {
 	GEN_EVT_CODE(_Survey),	 /*8*/
 	GEN_EVT_CODE(_SurveyDone),	 /*9*/
 
-	GEN_EVT_CODE(_JoinBss) , /*10*/
+	GEN_EVT_CODE(_JoinBss), /*10*/
 	GEN_EVT_CODE(_AddSTA),
 	GEN_EVT_CODE(_DelSTA),
 	GEN_EVT_CODE(_AtimDone),
-- 
GitLab


From 325d0d2c9d1187143eb0a3d5a6f16308a986bdb4 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Fri, 21 Oct 2022 03:02:35 +0530
Subject: [PATCH 0338/4122] staging: r8188eu: Remove unused macros

Simple variants of macros PlatformEFIOWrite and PlatformEFIORead are
defined but never used. As they do not appear to be designed for anything
significant, we can remove them to avoid unexpected usage.

Suggested-by: Julia Lawall <julia.lawall@inria.fr>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/efaf637a14b6f7fdd0178e2aecf8abf17e6922f6.1666299151.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_io.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtw_io.h b/drivers/staging/r8188eu/include/rtw_io.h
index 87fcf6c94ff32..e9744694204bc 100644
--- a/drivers/staging/r8188eu/include/rtw_io.h
+++ b/drivers/staging/r8188eu/include/rtw_io.h
@@ -285,18 +285,4 @@ void bus_sync_io(struct io_queue *pio_q);
 u32 _ioreq2rwmem(struct io_queue *pio_q);
 void dev_power_down(struct adapter *Adapter, u8 bpwrup);
 
-#define PlatformEFIOWrite1Byte(_a, _b, _c)		\
-	rtw_write8(_a, _b, _c)
-#define PlatformEFIOWrite2Byte(_a, _b, _c)		\
-	rtw_write16(_a, _b, _c)
-#define PlatformEFIOWrite4Byte(_a, _b, _c)		\
-	rtw_write32(_a, _b, _c)
-
-#define PlatformEFIORead1Byte(_a, _b)		\
-		rtw_read8(_a, _b)
-#define PlatformEFIORead2Byte(_a, _b)		\
-		rtw_read16(_a, _b)
-#define PlatformEFIORead4Byte(_a, _b)		\
-		rtw_read32(_a, _b)
-
 #endif	/* _RTL8711_IO_H_ */
-- 
GitLab


From 302baa63e7553e7bf57df2f98d82855461d81bbe Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Fri, 21 Oct 2022 12:54:55 -0700
Subject: [PATCH 0339/4122] staging: rtl8723bs: remove tab in variable
 definition

Remove unnecessary tab in variable definition in rtw_ioctl_set. Issue
found by checkpatch.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/ee1bcccb23b3d24eb87d0b08bfa817b4af692dc5.1666380274.git.eperi1024@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
index 8c7daab141db4..ede7d29308444 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
@@ -367,7 +367,7 @@ u8 rtw_set_802_11_disassociate(struct adapter *padapter)
 
 u8 rtw_set_802_11_bssid_list_scan(struct adapter *padapter, struct ndis_802_11_ssid *pssid, int ssid_max_num)
 {
-	struct	mlme_priv 	*pmlmepriv = &padapter->mlmepriv;
+	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	u8 res = true;
 
 	if (!padapter) {
-- 
GitLab


From 83df6f3f2690533bf094c18bef39a016311b7a46 Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Fri, 21 Oct 2022 12:54:56 -0700
Subject: [PATCH 0340/4122] staging: rtl8723bs: add newline after variable
 declaration

Fix checkpatch style warning by adding newline after variable
declaration in rtw_ioctl_set

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/76e1bfd210d79e6d3f7cc09233621c8b741b2370.1666380274.git.eperi1024@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
index ede7d29308444..47323160e72d8 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
@@ -78,6 +78,7 @@ u8 rtw_do_join(struct adapter *padapter)
 		goto exit;
 	} else {
 		int select_ret;
+
 		spin_unlock_bh(&(pmlmepriv->scanned_queue.lock));
 		select_ret = rtw_select_and_join_from_scanned_queue(pmlmepriv);
 		if (select_ret == _SUCCESS) {
-- 
GitLab


From 19cbe487f55fd04bc0b83bc34b6175f9dc6ab4e4 Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Fri, 21 Oct 2022 12:54:57 -0700
Subject: [PATCH 0341/4122] staging: rtl8723bs: use tab instead of spaces for
 indent

Replace spaces with tab for indent and correct alignment for closing
brace in rtw_ioctl_set. Issue found by checkpatch.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/0516cac046c617b55718fddb2aac3a50d543d84c.1666380274.git.eperi1024@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_ioctl_set.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
index 47323160e72d8..3b44f0dd5b0ae 100644
--- a/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
+++ b/drivers/staging/rtl8723bs/core/rtw_ioctl_set.c
@@ -312,7 +312,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter,
 		if ((*pold_state == Ndis802_11Infrastructure) || (*pold_state == Ndis802_11IBSS)) {
 			if (check_fwstate(pmlmepriv, _FW_LINKED) == true)
 				rtw_indicate_disconnect(padapter); /* will clr Linked_state; before this function, we must have checked whether issue dis-assoc_cmd or not */
-	       }
+		}
 
 		*pold_state = networktype;
 
-- 
GitLab


From 57d91e039070faa938622d28093ba1d031e29b3b Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Oct 2022 12:44:42 +0100
Subject: [PATCH 0342/4122] leds: max8997: Don't error if there is no pdata

The driver works just fine if no platform data is supplied.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-max8997.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/leds/leds-max8997.c b/drivers/leds/leds-max8997.c
index c0bddb33888d7..c8d7f55c9dec2 100644
--- a/drivers/leds/leds-max8997.c
+++ b/drivers/leds/leds-max8997.c
@@ -238,11 +238,6 @@ static int max8997_led_probe(struct platform_device *pdev)
 	char name[20];
 	int ret = 0;
 
-	if (pdata == NULL) {
-		dev_err(&pdev->dev, "no platform data\n");
-		return -ENODEV;
-	}
-
 	led = devm_kzalloc(&pdev->dev, sizeof(*led), GFP_KERNEL);
 	if (led == NULL)
 		return -ENOMEM;
@@ -258,7 +253,7 @@ static int max8997_led_probe(struct platform_device *pdev)
 	led->iodev = iodev;
 
 	/* initialize mode and brightness according to platform_data */
-	if (pdata->led_pdata) {
+	if (pdata && pdata->led_pdata) {
 		u8 mode = 0, brightness = 0;
 
 		mode = pdata->led_pdata->mode[led->id];
-- 
GitLab


From 3031993b3474794ecb71b6f969a3e60e4bda9d8a Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Fri, 21 Oct 2022 22:19:40 +0300
Subject: [PATCH 0343/4122] led: qcom-lpg: Fix sleeping in atomic

lpg_brighness_set() function can sleep, while led's brightness_set()
callback must be non-blocking. Change LPG driver to use
brightness_set_blocking() instead.

BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 0, name: swapper/0
preempt_count: 101, expected: 0
INFO: lockdep is turned off.
CPU: 0 PID: 0 Comm: swapper/0 Tainted: G        W          6.1.0-rc1-00014-gbe99b089c6fc-dirty #85
Hardware name: Qualcomm Technologies, Inc. DB820c (DT)
Call trace:
 dump_backtrace.part.0+0xe4/0xf0
 show_stack+0x18/0x40
 dump_stack_lvl+0x88/0xb4
 dump_stack+0x18/0x34
 __might_resched+0x170/0x254
 __might_sleep+0x48/0x9c
 __mutex_lock+0x4c/0x400
 mutex_lock_nested+0x2c/0x40
 lpg_brightness_single_set+0x40/0x90
 led_set_brightness_nosleep+0x34/0x60
 led_heartbeat_function+0x80/0x170
 call_timer_fn+0xb8/0x340
 __run_timers.part.0+0x20c/0x254
 run_timer_softirq+0x3c/0x7c
 _stext+0x14c/0x578
 ____do_softirq+0x10/0x20
 call_on_irq_stack+0x2c/0x5c
 do_softirq_own_stack+0x1c/0x30
 __irq_exit_rcu+0x164/0x170
 irq_exit_rcu+0x10/0x40
 el1_interrupt+0x38/0x50
 el1h_64_irq_handler+0x18/0x2c
 el1h_64_irq+0x64/0x68
 cpuidle_enter_state+0xc8/0x380
 cpuidle_enter+0x38/0x50
 do_idle+0x244/0x2d0
 cpu_startup_entry+0x24/0x30
 rest_init+0x128/0x1a0
 arch_post_acpi_subsys_init+0x0/0x18
 start_kernel+0x6f4/0x734
 __primary_switched+0xbc/0xc4

Fixes: 24e2d05d1b68 ("leds: Add driver for Qualcomm LPG")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/rgb/leds-qcom-lpg.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/leds/rgb/leds-qcom-lpg.c b/drivers/leds/rgb/leds-qcom-lpg.c
index 02f51cc618376..c1a56259226fb 100644
--- a/drivers/leds/rgb/leds-qcom-lpg.c
+++ b/drivers/leds/rgb/leds-qcom-lpg.c
@@ -602,8 +602,8 @@ static void lpg_brightness_set(struct lpg_led *led, struct led_classdev *cdev,
 		lpg_lut_sync(lpg, lut_mask);
 }
 
-static void lpg_brightness_single_set(struct led_classdev *cdev,
-				      enum led_brightness value)
+static int lpg_brightness_single_set(struct led_classdev *cdev,
+				     enum led_brightness value)
 {
 	struct lpg_led *led = container_of(cdev, struct lpg_led, cdev);
 	struct mc_subled info;
@@ -614,10 +614,12 @@ static void lpg_brightness_single_set(struct led_classdev *cdev,
 	lpg_brightness_set(led, cdev, &info);
 
 	mutex_unlock(&led->lpg->lock);
+
+	return 0;
 }
 
-static void lpg_brightness_mc_set(struct led_classdev *cdev,
-				  enum led_brightness value)
+static int lpg_brightness_mc_set(struct led_classdev *cdev,
+				 enum led_brightness value)
 {
 	struct led_classdev_mc *mc = lcdev_to_mccdev(cdev);
 	struct lpg_led *led = container_of(mc, struct lpg_led, mcdev);
@@ -628,6 +630,8 @@ static void lpg_brightness_mc_set(struct led_classdev *cdev,
 	lpg_brightness_set(led, cdev, mc->subled_info);
 
 	mutex_unlock(&led->lpg->lock);
+
+	return 0;
 }
 
 static int lpg_blink_set(struct lpg_led *led,
@@ -1118,7 +1122,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np)
 		led->mcdev.num_colors = num_channels;
 
 		cdev = &led->mcdev.led_cdev;
-		cdev->brightness_set = lpg_brightness_mc_set;
+		cdev->brightness_set_blocking = lpg_brightness_mc_set;
 		cdev->blink_set = lpg_blink_mc_set;
 
 		/* Register pattern accessors only if we have a LUT block */
@@ -1132,7 +1136,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np)
 			return ret;
 
 		cdev = &led->cdev;
-		cdev->brightness_set = lpg_brightness_single_set;
+		cdev->brightness_set_blocking = lpg_brightness_single_set;
 		cdev->blink_set = lpg_blink_single_set;
 
 		/* Register pattern accessors only if we have a LUT block */
@@ -1151,7 +1155,7 @@ static int lpg_add_led(struct lpg *lpg, struct device_node *np)
 	else
 		cdev->brightness = LED_OFF;
 
-	cdev->brightness_set(cdev, cdev->brightness);
+	cdev->brightness_set_blocking(cdev, cdev->brightness);
 
 	init_data.fwnode = of_fwnode_handle(np);
 
-- 
GitLab


From 5f52a8ba7e91215c3d046d298fb328d1b9f7897d Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 21 Oct 2022 15:54:28 -0500
Subject: [PATCH 0344/4122] dt-bindings: leds: Add 'cpuX' to
 'linux,default-trigger'

Add 'cpu' and 'cpuN' to possible values for 'linux,default-trigger'.
There's 45 cases of them in upstream dts files.

Signed-off-by: Rob Herring <robh@kernel.org>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 Documentation/devicetree/bindings/leds/common.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/leds/common.yaml b/Documentation/devicetree/bindings/leds/common.yaml
index 3c14a98430e19..f5c57a580078e 100644
--- a/Documentation/devicetree/bindings/leds/common.yaml
+++ b/Documentation/devicetree/bindings/leds/common.yaml
@@ -100,6 +100,7 @@ properties:
           - pattern
         # LED is triggered by SD/MMC activity
       - pattern: "^mmc[0-9]+$"
+      - pattern: "^cpu[0-9]*$"
 
   led-pattern:
     description: |
-- 
GitLab


From 375ac0b2c1f11d8274999e91546d69e3140c6c6a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 20 Oct 2022 14:06:49 +0100
Subject: [PATCH 0345/4122] usb: ftdi-elan: remove variable err_count

Variable err_count is just being incremented and it's never used
anywhere else. The variable and the increment are redundant so
remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221020130649.1546112-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/ftdi-elan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index b2f980409d0b5..33b35788bd0ba 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -1956,7 +1956,6 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi)
 	int long_stop = 10;
 	int retry_on_timeout = 5;
 	int retry_on_empty = 10;
-	int err_count = 0;
 	retval = ftdi_elan_flush_input_fifo(ftdi);
 	if (retval)
 		return retval;
@@ -2051,7 +2050,6 @@ static int ftdi_elan_synchronize(struct usb_ftdi *ftdi)
 					continue;
 				}
 			} else {
-				err_count += 1;
 				dev_err(&ftdi->udev->dev, "error = %d\n",
 					retval);
 				if (read_stop-- > 0) {
-- 
GitLab


From 13cc02f115d010d078851fac7f347890e62c097d Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Sat, 1 Oct 2022 15:56:33 +0100
Subject: [PATCH 0346/4122] usbip: vudc: Convert snprintf() to sysfs_emit()

Coccinnelle reports a warning
Warning: Use scnprintf or sprintf

Following the advice on kernel documentation
https://www.kernel.org/doc/html/latest/filesystems/sysfs.html

For show(device *...) functions we should only use sysfs_emit() or sysfs_emit_at()
especially when formatting the value to be returned to user space.
Convert snprintf() to sysfs_emit()

Signed-off-by: Jules Irenge <jules.irenge@postgrad.manchester.ac.uk>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/YzhVIaNGdM33pcts@octinomon
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vudc_sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/usbip/vudc_sysfs.c b/drivers/usb/usbip/vudc_sysfs.c
index c95e6b2bfd32a..907a43a008964 100644
--- a/drivers/usb/usbip/vudc_sysfs.c
+++ b/drivers/usb/usbip/vudc_sysfs.c
@@ -242,7 +242,7 @@ static ssize_t usbip_status_show(struct device *dev,
 	status = udc->ud.status;
 	spin_unlock_irq(&udc->ud.lock);
 
-	return snprintf(out, PAGE_SIZE, "%d\n", status);
+	return sysfs_emit(out, "%d\n", status);
 }
 static DEVICE_ATTR_RO(usbip_status);
 
-- 
GitLab


From 27ef01e381c777521084724248c5736cd1cdda63 Mon Sep 17 00:00:00 2001
From: Xuezhi Zhang <zhangxuezhi1@coolpad.com>
Date: Fri, 14 Oct 2022 19:06:06 +0800
Subject: [PATCH 0347/4122] usbip: convert sysfs snprintf to sysfs_emit

Follow the advice of the Documentation/filesystems/sysfs.rst
and show() should only use sysfs_emit() or sysfs_emit_at()
when formatting the value to be returned to user space.

Signed-off-by: Xuezhi Zhang <zhangxuezhi1@coolpad.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221014110606.599352-1-zhangxuezhi3@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/stub_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index 3c6d452e3bf40..f92047d860f07 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -30,7 +30,7 @@ static ssize_t usbip_status_show(struct device *dev,
 	status = sdev->ud.status;
 	spin_unlock_irq(&sdev->ud.lock);
 
-	return snprintf(buf, PAGE_SIZE, "%d\n", status);
+	return sysfs_emit(buf, "%d\n", status);
 }
 static DEVICE_ATTR_RO(usbip_status);
 
-- 
GitLab


From 90732f1769165dcf0778d723ad188f6441a930f5 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <dzm91@hust.edu.cn>
Date: Sun, 9 Oct 2022 15:23:05 +0800
Subject: [PATCH 0348/4122] usb: cdns3: adjust the partial logic of
 cdnsp_pci_remove

In cdnsp_pci_remove, if pci_is_enabled returns true, it will
call cdns_remove; else it will call kfree. Then both control flow
goes to pci_dev_put.

Adjust this logic by modifying it to an if else.

Signed-off-by: Dongliang Mu <dzm91@hust.edu.cn>
Acked-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20221009072305.1593707-1-dzm91@hust.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/cdnsp-pci.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-pci.c b/drivers/usb/cdns3/cdnsp-pci.c
index fe8a114c586cc..efd54ed918b97 100644
--- a/drivers/usb/cdns3/cdnsp-pci.c
+++ b/drivers/usb/cdns3/cdnsp-pci.c
@@ -192,14 +192,12 @@ static void cdnsp_pci_remove(struct pci_dev *pdev)
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_get_noresume(&pdev->dev);
 
-	if (!pci_is_enabled(func)) {
+	if (pci_is_enabled(func)) {
+		cdns_remove(cdnsp);
+	} else {
 		kfree(cdnsp);
-		goto pci_put;
 	}
 
-	cdns_remove(cdnsp);
-
-pci_put:
 	pci_dev_put(func);
 }
 
-- 
GitLab


From 4e74b483a3ce87e173634ba238a84b7fe404061b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 7 Oct 2022 21:32:10 +0100
Subject: [PATCH 0349/4122] USB: host: Kconfig: Fix spelling mistake "firwmare"
 -> "firmware"

There is a spelling mistake in a Kconfig description. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221007203210.2756505-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 247568bc17a2b..8e8db71021a54 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -47,7 +47,7 @@ config USB_XHCI_PCI_RENESAS
 	tristate "Support for additional Renesas xHCI controller with firmware"
 	help
 	  Say 'Y' to enable the support for the Renesas xHCI controller with
-	  firmware. Make sure you have the firwmare for the device and
+	  firmware. Make sure you have the firmware for the device and
 	  installed on your system for this device to work.
 	  If unsure, say 'N'.
 
-- 
GitLab


From 61dd457c0188c0deef68c2b919c0a2defe5db388 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 5 Oct 2022 12:55:55 +0200
Subject: [PATCH 0350/4122] dt-bindings: usb: dwc2: Add some missing Lantiq
 variants

These IP block variants appear in various vendor trees and
are distinct variants which needs to be handled.

Cc: devicetree@vger.kernel.org
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221005105555.2665485-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/dwc2.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml
index dc4988c0009cf..1ab85489a3f8f 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.yaml
+++ b/Documentation/devicetree/bindings/usb/dwc2.yaml
@@ -43,7 +43,10 @@ properties:
           - const: rockchip,rk3066-usb
           - const: snps,dwc2
       - const: lantiq,arx100-usb
+      - const: lantiq,ase-usb
+      - const: lantiq,danube-usb
       - const: lantiq,xrx200-usb
+      - const: lantiq,xrx300-usb
       - items:
           - enum:
               - amlogic,meson8-usb
-- 
GitLab


From 9b6447e04bc2a4d06f2ef74a583848c573a25dbc Mon Sep 17 00:00:00 2001
From: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
Date: Mon, 3 Oct 2022 11:10:16 +0200
Subject: [PATCH 0351/4122] USB: usbip: missing lock in stub down

Missing lock in sysfs operation when we want to close the connection in order
to check the status and send the down event in a safe way.

Signed-off-by: Jose Ignacio Tornos Martinez <jtornosm@redhat.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221003091016.641900-1-jtornosm@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/stub_dev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c
index f92047d860f07..9c6954aad6c88 100644
--- a/drivers/usb/usbip/stub_dev.c
+++ b/drivers/usb/usbip/stub_dev.c
@@ -118,6 +118,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
 	} else {
 		dev_info(dev, "stub down\n");
 
+		mutex_lock(&sdev->ud.sysfs_lock);
+
 		spin_lock_irq(&sdev->ud.lock);
 		if (sdev->ud.status != SDEV_ST_USED)
 			goto err;
-- 
GitLab


From d182bf156c4cb8b08ce4a75e82b3357b14a4382d Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Tue, 11 Oct 2022 09:53:48 +0200
Subject: [PATCH 0352/4122] usb: gadget: uvc: default the ctrl request
 interface offsets

For the userspace it is needed to distinguish between requests for the
control or streaming interface. The userspace would have to parse the
configfs to know which interface index it has to compare the ctrl
requests against. Since the interface numbers are not fixed, e.g. for
composite gadgets, the interface offset depends on the setup.

The kernel has this information when handing over the ctrl request to
the userspace. This patch removes the offset from the interface numbers
and expose the default interface defines in the uapi g_uvc.h.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Link: https://lore.kernel.org/r/20221011075348.1786897-1-m.grzeschik@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_uvc.c | 15 ++++++++++++---
 include/uapi/linux/usb/g_uvc.h      |  3 +++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 6e196e06181ec..6e131624011a5 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -39,9 +39,6 @@ MODULE_PARM_DESC(trace, "Trace level bitmask");
 
 /* string IDs are assigned dynamically */
 
-#define UVC_STRING_CONTROL_IDX			0
-#define UVC_STRING_STREAMING_IDX		1
-
 static struct usb_string uvc_en_us_strings[] = {
 	/* [UVC_STRING_CONTROL_IDX].s = DYNAMIC, */
 	[UVC_STRING_STREAMING_IDX].s = "Video Streaming",
@@ -228,6 +225,8 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	struct uvc_device *uvc = to_uvc(f);
 	struct v4l2_event v4l2_event;
 	struct uvc_event *uvc_event = (void *)&v4l2_event.u.data;
+	unsigned int interface = le16_to_cpu(ctrl->wIndex) & 0xff;
+	struct usb_ctrlrequest *mctrl;
 
 	if ((ctrl->bRequestType & USB_TYPE_MASK) != USB_TYPE_CLASS) {
 		uvcg_info(f, "invalid request type\n");
@@ -248,6 +247,16 @@ uvc_function_setup(struct usb_function *f, const struct usb_ctrlrequest *ctrl)
 	memset(&v4l2_event, 0, sizeof(v4l2_event));
 	v4l2_event.type = UVC_EVENT_SETUP;
 	memcpy(&uvc_event->req, ctrl, sizeof(uvc_event->req));
+
+	/* check for the interface number, fixup the interface number in
+	 * the ctrl request so the userspace doesn't have to bother with
+	 * offset and configfs parsing
+	 */
+	mctrl = &uvc_event->req;
+	mctrl->wIndex &= ~cpu_to_le16(0xff);
+	if (interface == uvc->streaming_intf)
+		mctrl->wIndex = cpu_to_le16(UVC_STRING_STREAMING_IDX);
+
 	v4l2_event_queue(&uvc->vdev, &v4l2_event);
 
 	return 0;
diff --git a/include/uapi/linux/usb/g_uvc.h b/include/uapi/linux/usb/g_uvc.h
index 652f169a019e7..8d7824dde1b2f 100644
--- a/include/uapi/linux/usb/g_uvc.h
+++ b/include/uapi/linux/usb/g_uvc.h
@@ -21,6 +21,9 @@
 #define UVC_EVENT_DATA			(V4L2_EVENT_PRIVATE_START + 5)
 #define UVC_EVENT_LAST			(V4L2_EVENT_PRIVATE_START + 5)
 
+#define UVC_STRING_CONTROL_IDX			0
+#define UVC_STRING_STREAMING_IDX		1
+
 struct uvc_request_data {
 	__s32 length;
 	__u8 data[60];
-- 
GitLab


From a84436a987e7f4ee8eeb62a8a5abcfc60b356d16 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Oct 2022 17:55:52 +0300
Subject: [PATCH 0353/4122] usb: typec: retimer: Use device type for matching

Device name is not reliable so using the type instead in
retimer_fwnode_match().

This will also introduce is_typec_retimer() helper, and
remove the static keyword from the retimer device type. That
will make it accessible also in the main typec class.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221019145552.32493-1-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/retimer.c | 16 ++--------------
 drivers/usb/typec/retimer.h |  4 ++++
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/usb/typec/retimer.c b/drivers/usb/typec/retimer.c
index ee94dbbe47453..3a4146ea6e7cf 100644
--- a/drivers/usb/typec/retimer.c
+++ b/drivers/usb/typec/retimer.c
@@ -17,21 +17,9 @@
 #include "class.h"
 #include "retimer.h"
 
-static bool dev_name_ends_with(struct device *dev, const char *suffix)
-{
-	const char *name = dev_name(dev);
-	const int name_len = strlen(name);
-	const int suffix_len = strlen(suffix);
-
-	if (suffix_len > name_len)
-		return false;
-
-	return strcmp(name + (name_len - suffix_len), suffix) == 0;
-}
-
 static int retimer_fwnode_match(struct device *dev, const void *fwnode)
 {
-	return device_match_fwnode(dev, fwnode) && dev_name_ends_with(dev, "-retimer");
+	return is_typec_retimer(dev) && device_match_fwnode(dev, fwnode);
 }
 
 static void *typec_retimer_match(struct fwnode_handle *fwnode, const char *id, void *data)
@@ -97,7 +85,7 @@ static void typec_retimer_release(struct device *dev)
 	kfree(to_typec_retimer(dev));
 }
 
-static const struct device_type typec_retimer_dev_type = {
+const struct device_type typec_retimer_dev_type = {
 	.name = "typec_retimer",
 	.release = typec_retimer_release,
 };
diff --git a/drivers/usb/typec/retimer.h b/drivers/usb/typec/retimer.h
index fa15951d48468..e34bd23323be5 100644
--- a/drivers/usb/typec/retimer.h
+++ b/drivers/usb/typec/retimer.h
@@ -12,4 +12,8 @@ struct typec_retimer {
 
 #define to_typec_retimer(_dev_) container_of(_dev_, struct typec_retimer, dev)
 
+const struct device_type typec_retimer_dev_type;
+
+#define is_typec_retimer(dev) ((dev)->type == &typec_retimer_dev_type)
+
 #endif /* __USB_TYPEC_RETIMER__ */
-- 
GitLab


From 32fee1df51109a117eb5063e950c372278688098 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Oct 2022 17:29:31 +0200
Subject: [PATCH 0354/4122] usb: musb: remove unused davinci support

The musb-davinci driver was only used on dm644x, which got removed
in linux-6.0. The only remaining davinci machines are da8xx
devicetree based and do not use this hardware.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Link: https://lore.kernel.org/r/20221019152947.3857217-6-arnd@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/Kconfig    |   12 -
 drivers/usb/musb/Makefile   |    2 -
 drivers/usb/musb/cppi_dma.c | 1547 -----------------------------------
 drivers/usb/musb/davinci.c  |  606 --------------
 drivers/usb/musb/davinci.h  |  103 ---
 5 files changed, 2270 deletions(-)
 delete mode 100644 drivers/usb/musb/cppi_dma.c
 delete mode 100644 drivers/usb/musb/davinci.c
 delete mode 100644 drivers/usb/musb/davinci.h

diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 6c8f7763e75e4..f9eec666103cb 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -70,12 +70,6 @@ config USB_MUSB_SUNXI
 	select GENERIC_PHY
 	select SUNXI_SRAM
 
-config USB_MUSB_DAVINCI
-	tristate "DaVinci"
-	depends on ARCH_DAVINCI_DMx
-	depends on NOP_USB_XCEIV
-	depends on BROKEN
-
 config USB_MUSB_DA8XX
 	tristate "DA8xx/OMAP-L1x"
 	depends on ARCH_DAVINCI_DA8XX
@@ -161,12 +155,6 @@ config USB_INVENTRA_DMA
 	help
 	  Enable DMA transfers using Mentor's engine.
 
-config USB_TI_CPPI_DMA
-	bool 'TI CPPI (Davinci)'
-	depends on USB_MUSB_DAVINCI
-	help
-	  Enable DMA transfers when TI CPPI DMA is available.
-
 config USB_TI_CPPI41_DMA
 	bool 'TI CPPI 4.1'
 	depends on (ARCH_OMAP || ARCH_DAVINCI_DA8XX) && DMADEVICES
diff --git a/drivers/usb/musb/Makefile b/drivers/usb/musb/Makefile
index 51dd54a8de497..44a9e27b2157a 100644
--- a/drivers/usb/musb/Makefile
+++ b/drivers/usb/musb/Makefile
@@ -19,7 +19,6 @@ obj-$(CONFIG_USB_MUSB_OMAP2PLUS)		+= omap2430.o
 obj-$(CONFIG_USB_MUSB_AM35X)			+= am35x.o
 obj-$(CONFIG_USB_MUSB_DSPS)			+= musb_dsps.o
 obj-$(CONFIG_USB_MUSB_TUSB6010)			+= tusb6010.o
-obj-$(CONFIG_USB_MUSB_DAVINCI)			+= davinci.o
 obj-$(CONFIG_USB_MUSB_DA8XX)			+= da8xx.o
 obj-$(CONFIG_USB_MUSB_UX500)			+= ux500.o
 obj-$(CONFIG_USB_MUSB_JZ4740)			+= jz4740.o
@@ -33,7 +32,6 @@ obj-$(CONFIG_USB_MUSB_POLARFIRE_SOC)		+= mpfs.o
 # though PIO is always there to back up DMA, and for ep0
 
 musb_hdrc-$(CONFIG_USB_INVENTRA_DMA)		+= musbhsdma.o
-musb_hdrc-$(CONFIG_USB_TI_CPPI_DMA)		+= cppi_dma.o
 musb_hdrc-$(CONFIG_USB_TUSB_OMAP_DMA)		+= tusb6010_omap.o
 musb_hdrc-$(CONFIG_USB_UX500_DMA)		+= ux500_dma.o
 musb_hdrc-$(CONFIG_USB_TI_CPPI41_DMA)		+= musb_cppi41.o
diff --git a/drivers/usb/musb/cppi_dma.c b/drivers/usb/musb/cppi_dma.c
deleted file mode 100644
index edb5b63d70634..0000000000000
--- a/drivers/usb/musb/cppi_dma.c
+++ /dev/null
@@ -1,1547 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2005-2006 by Texas Instruments
- *
- * This file implements a DMA  interface using TI's CPPI DMA.
- * For now it's DaVinci-only, but CPPI isn't specific to DaVinci or USB.
- * The TUSB6020, using VLYNQ, has CPPI that looks much like DaVinci.
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/usb.h>
-
-#include "musb_core.h"
-#include "musb_debug.h"
-#include "cppi_dma.h"
-#include "davinci.h"
-
-
-/* CPPI DMA status 7-mar-2006:
- *
- * - See musb_{host,gadget}.c for more info
- *
- * - Correct RX DMA generally forces the engine into irq-per-packet mode,
- *   which can easily saturate the CPU under non-mass-storage loads.
- *
- * NOTES 24-aug-2006 (2.6.18-rc4):
- *
- * - peripheral RXDMA wedged in a test with packets of length 512/512/1.
- *   evidently after the 1 byte packet was received and acked, the queue
- *   of BDs got garbaged so it wouldn't empty the fifo.  (rxcsr 0x2003,
- *   and RX DMA0: 4 left, 80000000 8feff880, 8feff860 8feff860; 8f321401
- *   004001ff 00000001 .. 8feff860)  Host was just getting NAKed on tx
- *   of its next (512 byte) packet.  IRQ issues?
- *
- * REVISIT:  the "transfer DMA" glue between CPPI and USB fifos will
- * evidently also directly update the RX and TX CSRs ... so audit all
- * host and peripheral side DMA code to avoid CSR access after DMA has
- * been started.
- */
-
-/* REVISIT now we can avoid preallocating these descriptors; or
- * more simply, switch to a global freelist not per-channel ones.
- * Note: at full speed, 64 descriptors == 4K bulk data.
- */
-#define NUM_TXCHAN_BD       64
-#define NUM_RXCHAN_BD       64
-
-static inline void cpu_drain_writebuffer(void)
-{
-	wmb();
-#ifdef	CONFIG_CPU_ARM926T
-	/* REVISIT this "should not be needed",
-	 * but lack of it sure seemed to hurt ...
-	 */
-	asm("mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n");
-#endif
-}
-
-static inline struct cppi_descriptor *cppi_bd_alloc(struct cppi_channel *c)
-{
-	struct cppi_descriptor	*bd = c->freelist;
-
-	if (bd)
-		c->freelist = bd->next;
-	return bd;
-}
-
-static inline void
-cppi_bd_free(struct cppi_channel *c, struct cppi_descriptor *bd)
-{
-	if (!bd)
-		return;
-	bd->next = c->freelist;
-	c->freelist = bd;
-}
-
-/*
- *  Start DMA controller
- *
- *  Initialize the DMA controller as necessary.
- */
-
-/* zero out entire rx state RAM entry for the channel */
-static void cppi_reset_rx(struct cppi_rx_stateram __iomem *rx)
-{
-	musb_writel(&rx->rx_skipbytes, 0, 0);
-	musb_writel(&rx->rx_head, 0, 0);
-	musb_writel(&rx->rx_sop, 0, 0);
-	musb_writel(&rx->rx_current, 0, 0);
-	musb_writel(&rx->rx_buf_current, 0, 0);
-	musb_writel(&rx->rx_len_len, 0, 0);
-	musb_writel(&rx->rx_cnt_cnt, 0, 0);
-}
-
-/* zero out entire tx state RAM entry for the channel */
-static void cppi_reset_tx(struct cppi_tx_stateram __iomem *tx, u32 ptr)
-{
-	musb_writel(&tx->tx_head, 0, 0);
-	musb_writel(&tx->tx_buf, 0, 0);
-	musb_writel(&tx->tx_current, 0, 0);
-	musb_writel(&tx->tx_buf_current, 0, 0);
-	musb_writel(&tx->tx_info, 0, 0);
-	musb_writel(&tx->tx_rem_len, 0, 0);
-	/* musb_writel(&tx->tx_dummy, 0, 0); */
-	musb_writel(&tx->tx_complete, 0, ptr);
-}
-
-static void cppi_pool_init(struct cppi *cppi, struct cppi_channel *c)
-{
-	int	j;
-
-	/* initialize channel fields */
-	c->head = NULL;
-	c->tail = NULL;
-	c->last_processed = NULL;
-	c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
-	c->controller = cppi;
-	c->is_rndis = 0;
-	c->freelist = NULL;
-
-	/* build the BD Free list for the channel */
-	for (j = 0; j < NUM_TXCHAN_BD + 1; j++) {
-		struct cppi_descriptor	*bd;
-		dma_addr_t		dma;
-
-		bd = dma_pool_alloc(cppi->pool, GFP_KERNEL, &dma);
-		bd->dma = dma;
-		cppi_bd_free(c, bd);
-	}
-}
-
-static int cppi_channel_abort(struct dma_channel *);
-
-static void cppi_pool_free(struct cppi_channel *c)
-{
-	struct cppi		*cppi = c->controller;
-	struct cppi_descriptor	*bd;
-
-	(void) cppi_channel_abort(&c->channel);
-	c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
-	c->controller = NULL;
-
-	/* free all its bds */
-	bd = c->last_processed;
-	do {
-		if (bd)
-			dma_pool_free(cppi->pool, bd, bd->dma);
-		bd = cppi_bd_alloc(c);
-	} while (bd);
-	c->last_processed = NULL;
-}
-
-static void cppi_controller_start(struct cppi *controller)
-{
-	void __iomem	*tibase;
-	int		i;
-
-	/* do whatever is necessary to start controller */
-	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
-		controller->tx[i].transmit = true;
-		controller->tx[i].index = i;
-	}
-	for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
-		controller->rx[i].transmit = false;
-		controller->rx[i].index = i;
-	}
-
-	/* setup BD list on a per channel basis */
-	for (i = 0; i < ARRAY_SIZE(controller->tx); i++)
-		cppi_pool_init(controller, controller->tx + i);
-	for (i = 0; i < ARRAY_SIZE(controller->rx); i++)
-		cppi_pool_init(controller, controller->rx + i);
-
-	tibase =  controller->tibase;
-	INIT_LIST_HEAD(&controller->tx_complete);
-
-	/* initialise tx/rx channel head pointers to zero */
-	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
-		struct cppi_channel	*tx_ch = controller->tx + i;
-		struct cppi_tx_stateram __iomem *tx;
-
-		INIT_LIST_HEAD(&tx_ch->tx_complete);
-
-		tx = tibase + DAVINCI_TXCPPI_STATERAM_OFFSET(i);
-		tx_ch->state_ram = tx;
-		cppi_reset_tx(tx, 0);
-	}
-	for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
-		struct cppi_channel	*rx_ch = controller->rx + i;
-		struct cppi_rx_stateram __iomem *rx;
-
-		INIT_LIST_HEAD(&rx_ch->tx_complete);
-
-		rx = tibase + DAVINCI_RXCPPI_STATERAM_OFFSET(i);
-		rx_ch->state_ram = rx;
-		cppi_reset_rx(rx);
-	}
-
-	/* enable individual cppi channels */
-	musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG,
-			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
-	musb_writel(tibase, DAVINCI_RXCPPI_INTENAB_REG,
-			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
-
-	/* enable tx/rx CPPI control */
-	musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
-	musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
-
-	/* disable RNDIS mode, also host rx RNDIS autorequest */
-	musb_writel(tibase, DAVINCI_RNDIS_REG, 0);
-	musb_writel(tibase, DAVINCI_AUTOREQ_REG, 0);
-}
-
-/*
- *  Stop DMA controller
- *
- *  De-Init the DMA controller as necessary.
- */
-
-static void cppi_controller_stop(struct cppi *controller)
-{
-	void __iomem		*tibase;
-	int			i;
-	struct musb		*musb;
-
-	musb = controller->controller.musb;
-
-	tibase = controller->tibase;
-	/* DISABLE INDIVIDUAL CHANNEL Interrupts */
-	musb_writel(tibase, DAVINCI_TXCPPI_INTCLR_REG,
-			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
-	musb_writel(tibase, DAVINCI_RXCPPI_INTCLR_REG,
-			DAVINCI_DMA_ALL_CHANNELS_ENABLE);
-
-	musb_dbg(musb, "Tearing down RX and TX Channels");
-	for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
-		/* FIXME restructure of txdma to use bds like rxdma */
-		controller->tx[i].last_processed = NULL;
-		cppi_pool_free(controller->tx + i);
-	}
-	for (i = 0; i < ARRAY_SIZE(controller->rx); i++)
-		cppi_pool_free(controller->rx + i);
-
-	/* in Tx Case proper teardown is supported. We resort to disabling
-	 * Tx/Rx CPPI after cleanup of Tx channels. Before TX teardown is
-	 * complete TX CPPI cannot be disabled.
-	 */
-	/*disable tx/rx cppi */
-	musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
-	musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_DISABLE);
-}
-
-/* While dma channel is allocated, we only want the core irqs active
- * for fault reports, otherwise we'd get irqs that we don't care about.
- * Except for TX irqs, where dma done != fifo empty and reusable ...
- *
- * NOTE: docs don't say either way, but irq masking **enables** irqs.
- *
- * REVISIT same issue applies to pure PIO usage too, and non-cppi dma...
- */
-static inline void core_rxirq_disable(void __iomem *tibase, unsigned epnum)
-{
-	musb_writel(tibase, DAVINCI_USB_INT_MASK_CLR_REG, 1 << (epnum + 8));
-}
-
-static inline void core_rxirq_enable(void __iomem *tibase, unsigned epnum)
-{
-	musb_writel(tibase, DAVINCI_USB_INT_MASK_SET_REG, 1 << (epnum + 8));
-}
-
-
-/*
- * Allocate a CPPI Channel for DMA.  With CPPI, channels are bound to
- * each transfer direction of a non-control endpoint, so allocating
- * (and deallocating) is mostly a way to notice bad housekeeping on
- * the software side.  We assume the irqs are always active.
- */
-static struct dma_channel *
-cppi_channel_allocate(struct dma_controller *c,
-		struct musb_hw_ep *ep, u8 transmit)
-{
-	struct cppi		*controller;
-	u8			index;
-	struct cppi_channel	*cppi_ch;
-	void __iomem		*tibase;
-	struct musb		*musb;
-
-	controller = container_of(c, struct cppi, controller);
-	tibase = controller->tibase;
-	musb = c->musb;
-
-	/* ep0 doesn't use DMA; remember cppi indices are 0..N-1 */
-	index = ep->epnum - 1;
-
-	/* return the corresponding CPPI Channel Handle, and
-	 * probably disable the non-CPPI irq until we need it.
-	 */
-	if (transmit) {
-		if (index >= ARRAY_SIZE(controller->tx)) {
-			musb_dbg(musb, "no %cX%d CPPI channel", 'T', index);
-			return NULL;
-		}
-		cppi_ch = controller->tx + index;
-	} else {
-		if (index >= ARRAY_SIZE(controller->rx)) {
-			musb_dbg(musb, "no %cX%d CPPI channel", 'R', index);
-			return NULL;
-		}
-		cppi_ch = controller->rx + index;
-		core_rxirq_disable(tibase, ep->epnum);
-	}
-
-	/* REVISIT make this an error later once the same driver code works
-	 * with the other DMA engine too
-	 */
-	if (cppi_ch->hw_ep)
-		musb_dbg(musb, "re-allocating DMA%d %cX channel %p",
-				index, transmit ? 'T' : 'R', cppi_ch);
-	cppi_ch->hw_ep = ep;
-	cppi_ch->channel.status = MUSB_DMA_STATUS_FREE;
-	cppi_ch->channel.max_len = 0x7fffffff;
-
-	musb_dbg(musb, "Allocate CPPI%d %cX", index, transmit ? 'T' : 'R');
-	return &cppi_ch->channel;
-}
-
-/* Release a CPPI Channel.  */
-static void cppi_channel_release(struct dma_channel *channel)
-{
-	struct cppi_channel	*c;
-	void __iomem		*tibase;
-
-	/* REVISIT:  for paranoia, check state and abort if needed... */
-
-	c = container_of(channel, struct cppi_channel, channel);
-	tibase = c->controller->tibase;
-	if (!c->hw_ep)
-		musb_dbg(c->controller->controller.musb,
-			"releasing idle DMA channel %p", c);
-	else if (!c->transmit)
-		core_rxirq_enable(tibase, c->index + 1);
-
-	/* for now, leave its cppi IRQ enabled (we won't trigger it) */
-	c->hw_ep = NULL;
-	channel->status = MUSB_DMA_STATUS_UNKNOWN;
-}
-
-/* Context: controller irqlocked */
-static void
-cppi_dump_rx(int level, struct cppi_channel *c, const char *tag)
-{
-	void __iomem			*base = c->controller->mregs;
-	struct cppi_rx_stateram __iomem	*rx = c->state_ram;
-
-	musb_ep_select(base, c->index + 1);
-
-	musb_dbg(c->controller->controller.musb,
-		"RX DMA%d%s: %d left, csr %04x, "
-		"%08x H%08x S%08x C%08x, "
-		"B%08x L%08x %08x .. %08x",
-		c->index, tag,
-		musb_readl(c->controller->tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + 4 * c->index),
-		musb_readw(c->hw_ep->regs, MUSB_RXCSR),
-
-		musb_readl(&rx->rx_skipbytes, 0),
-		musb_readl(&rx->rx_head, 0),
-		musb_readl(&rx->rx_sop, 0),
-		musb_readl(&rx->rx_current, 0),
-
-		musb_readl(&rx->rx_buf_current, 0),
-		musb_readl(&rx->rx_len_len, 0),
-		musb_readl(&rx->rx_cnt_cnt, 0),
-		musb_readl(&rx->rx_complete, 0)
-		);
-}
-
-/* Context: controller irqlocked */
-static void
-cppi_dump_tx(int level, struct cppi_channel *c, const char *tag)
-{
-	void __iomem			*base = c->controller->mregs;
-	struct cppi_tx_stateram __iomem	*tx = c->state_ram;
-
-	musb_ep_select(base, c->index + 1);
-
-	musb_dbg(c->controller->controller.musb,
-		"TX DMA%d%s: csr %04x, "
-		"H%08x S%08x C%08x %08x, "
-		"F%08x L%08x .. %08x",
-		c->index, tag,
-		musb_readw(c->hw_ep->regs, MUSB_TXCSR),
-
-		musb_readl(&tx->tx_head, 0),
-		musb_readl(&tx->tx_buf, 0),
-		musb_readl(&tx->tx_current, 0),
-		musb_readl(&tx->tx_buf_current, 0),
-
-		musb_readl(&tx->tx_info, 0),
-		musb_readl(&tx->tx_rem_len, 0),
-		/* dummy/unused word 6 */
-		musb_readl(&tx->tx_complete, 0)
-		);
-}
-
-/* Context: controller irqlocked */
-static inline void
-cppi_rndis_update(struct cppi_channel *c, int is_rx,
-		void __iomem *tibase, int is_rndis)
-{
-	/* we may need to change the rndis flag for this cppi channel */
-	if (c->is_rndis != is_rndis) {
-		u32	value = musb_readl(tibase, DAVINCI_RNDIS_REG);
-		u32	temp = 1 << (c->index);
-
-		if (is_rx)
-			temp <<= 16;
-		if (is_rndis)
-			value |= temp;
-		else
-			value &= ~temp;
-		musb_writel(tibase, DAVINCI_RNDIS_REG, value);
-		c->is_rndis = is_rndis;
-	}
-}
-
-static void cppi_dump_rxbd(const char *tag, struct cppi_descriptor *bd)
-{
-	pr_debug("RXBD/%s %08x: "
-			"nxt %08x buf %08x off.blen %08x opt.plen %08x\n",
-			tag, bd->dma,
-			bd->hw_next, bd->hw_bufp, bd->hw_off_len,
-			bd->hw_options);
-}
-
-static void cppi_dump_rxq(int level, const char *tag, struct cppi_channel *rx)
-{
-	struct cppi_descriptor	*bd;
-
-	cppi_dump_rx(level, rx, tag);
-	if (rx->last_processed)
-		cppi_dump_rxbd("last", rx->last_processed);
-	for (bd = rx->head; bd; bd = bd->next)
-		cppi_dump_rxbd("active", bd);
-}
-
-
-/* NOTE:  DaVinci autoreq is ignored except for host side "RNDIS" mode RX;
- * so we won't ever use it (see "CPPI RX Woes" below).
- */
-static inline int cppi_autoreq_update(struct cppi_channel *rx,
-		void __iomem *tibase, int onepacket, unsigned n_bds)
-{
-	u32	val;
-
-#ifdef	RNDIS_RX_IS_USABLE
-	u32	tmp;
-	/* assert(is_host_active(musb)) */
-
-	/* start from "AutoReq never" */
-	tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
-	val = tmp & ~((0x3) << (rx->index * 2));
-
-	/* HCD arranged reqpkt for packet #1.  we arrange int
-	 * for all but the last one, maybe in two segments.
-	 */
-	if (!onepacket) {
-#if 0
-		/* use two segments, autoreq "all" then the last "never" */
-		val |= ((0x3) << (rx->index * 2));
-		n_bds--;
-#else
-		/* one segment, autoreq "all-but-last" */
-		val |= ((0x1) << (rx->index * 2));
-#endif
-	}
-
-	if (val != tmp) {
-		int n = 100;
-
-		/* make sure that autoreq is updated before continuing */
-		musb_writel(tibase, DAVINCI_AUTOREQ_REG, val);
-		do {
-			tmp = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
-			if (tmp == val)
-				break;
-			cpu_relax();
-		} while (n-- > 0);
-	}
-#endif
-
-	/* REQPKT is turned off after each segment */
-	if (n_bds && rx->channel.actual_len) {
-		void __iomem	*regs = rx->hw_ep->regs;
-
-		val = musb_readw(regs, MUSB_RXCSR);
-		if (!(val & MUSB_RXCSR_H_REQPKT)) {
-			val |= MUSB_RXCSR_H_REQPKT | MUSB_RXCSR_H_WZC_BITS;
-			musb_writew(regs, MUSB_RXCSR, val);
-			/* flush writebuffer */
-			val = musb_readw(regs, MUSB_RXCSR);
-		}
-	}
-	return n_bds;
-}
-
-
-/* Buffer enqueuing Logic:
- *
- *  - RX builds new queues each time, to help handle routine "early
- *    termination" cases (faults, including errors and short reads)
- *    more correctly.
- *
- *  - for now, TX reuses the same queue of BDs every time
- *
- * REVISIT long term, we want a normal dynamic model.
- * ... the goal will be to append to the
- * existing queue, processing completed "dma buffers" (segments) on the fly.
- *
- * Otherwise we force an IRQ latency between requests, which slows us a lot
- * (especially in "transparent" dma).  Unfortunately that model seems to be
- * inherent in the DMA model from the Mentor code, except in the rare case
- * of transfers big enough (~128+ KB) that we could append "middle" segments
- * in the TX paths.  (RX can't do this, see below.)
- *
- * That's true even in the CPPI- friendly iso case, where most urbs have
- * several small segments provided in a group and where the "packet at a time"
- * "transparent" DMA model is always correct, even on the RX side.
- */
-
-/*
- * CPPI TX:
- * ========
- * TX is a lot more reasonable than RX; it doesn't need to run in
- * irq-per-packet mode very often.  RNDIS mode seems to behave too
- * (except how it handles the exactly-N-packets case).  Building a
- * txdma queue with multiple requests (urb or usb_request) looks
- * like it would work ... but fault handling would need much testing.
- *
- * The main issue with TX mode RNDIS relates to transfer lengths that
- * are an exact multiple of the packet length.  It appears that there's
- * a hiccup in that case (maybe the DMA completes before the ZLP gets
- * written?) boiling down to not being able to rely on CPPI writing any
- * terminating zero length packet before the next transfer is written.
- * So that's punted to PIO; better yet, gadget drivers can avoid it.
- *
- * Plus, there's allegedly an undocumented constraint that rndis transfer
- * length be a multiple of 64 bytes ... but the chip doesn't act that
- * way, and we really don't _want_ that behavior anyway.
- *
- * On TX, "transparent" mode works ... although experiments have shown
- * problems trying to use the SOP/EOP bits in different USB packets.
- *
- * REVISIT try to handle terminating zero length packets using CPPI
- * instead of doing it by PIO after an IRQ.  (Meanwhile, make Ethernet
- * links avoid that issue by forcing them to avoid zlps.)
- */
-static void
-cppi_next_tx_segment(struct musb *musb, struct cppi_channel *tx)
-{
-	unsigned		maxpacket = tx->maxpacket;
-	dma_addr_t		addr = tx->buf_dma + tx->offset;
-	size_t			length = tx->buf_len - tx->offset;
-	struct cppi_descriptor	*bd;
-	unsigned		n_bds;
-	unsigned		i;
-	struct cppi_tx_stateram	__iomem *tx_ram = tx->state_ram;
-	int			rndis;
-
-	/* TX can use the CPPI "rndis" mode, where we can probably fit this
-	 * transfer in one BD and one IRQ.  The only time we would NOT want
-	 * to use it is when hardware constraints prevent it, or if we'd
-	 * trigger the "send a ZLP?" confusion.
-	 */
-	rndis = (maxpacket & 0x3f) == 0
-		&& length > maxpacket
-		&& length < 0xffff
-		&& (length % maxpacket) != 0;
-
-	if (rndis) {
-		maxpacket = length;
-		n_bds = 1;
-	} else {
-		if (length)
-			n_bds = DIV_ROUND_UP(length, maxpacket);
-		else
-			n_bds = 1;
-		n_bds = min(n_bds, (unsigned) NUM_TXCHAN_BD);
-		length = min(n_bds * maxpacket, length);
-	}
-
-	musb_dbg(musb, "TX DMA%d, pktSz %d %s bds %d dma 0x%llx len %u",
-			tx->index,
-			maxpacket,
-			rndis ? "rndis" : "transparent",
-			n_bds,
-			(unsigned long long)addr, length);
-
-	cppi_rndis_update(tx, 0, musb->ctrl_base, rndis);
-
-	/* assuming here that channel_program is called during
-	 * transfer initiation ... current code maintains state
-	 * for one outstanding request only (no queues, not even
-	 * the implicit ones of an iso urb).
-	 */
-
-	bd = tx->freelist;
-	tx->head = bd;
-	tx->last_processed = NULL;
-
-	/* FIXME use BD pool like RX side does, and just queue
-	 * the minimum number for this request.
-	 */
-
-	/* Prepare queue of BDs first, then hand it to hardware.
-	 * All BDs except maybe the last should be of full packet
-	 * size; for RNDIS there _is_ only that last packet.
-	 */
-	for (i = 0; i < n_bds; ) {
-		if (++i < n_bds && bd->next)
-			bd->hw_next = bd->next->dma;
-		else
-			bd->hw_next = 0;
-
-		bd->hw_bufp = tx->buf_dma + tx->offset;
-
-		/* FIXME set EOP only on the last packet,
-		 * SOP only on the first ... avoid IRQs
-		 */
-		if ((tx->offset + maxpacket) <= tx->buf_len) {
-			tx->offset += maxpacket;
-			bd->hw_off_len = maxpacket;
-			bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET
-				| CPPI_OWN_SET | maxpacket;
-		} else {
-			/* only this one may be a partial USB Packet */
-			u32		partial_len;
-
-			partial_len = tx->buf_len - tx->offset;
-			tx->offset = tx->buf_len;
-			bd->hw_off_len = partial_len;
-
-			bd->hw_options = CPPI_SOP_SET | CPPI_EOP_SET
-				| CPPI_OWN_SET | partial_len;
-			if (partial_len == 0)
-				bd->hw_options |= CPPI_ZERO_SET;
-		}
-
-		musb_dbg(musb, "TXBD %p: nxt %08x buf %08x len %04x opt %08x",
-				bd, bd->hw_next, bd->hw_bufp,
-				bd->hw_off_len, bd->hw_options);
-
-		/* update the last BD enqueued to the list */
-		tx->tail = bd;
-		bd = bd->next;
-	}
-
-	/* BDs live in DMA-coherent memory, but writes might be pending */
-	cpu_drain_writebuffer();
-
-	/* Write to the HeadPtr in state RAM to trigger */
-	musb_writel(&tx_ram->tx_head, 0, (u32)tx->freelist->dma);
-
-	cppi_dump_tx(5, tx, "/S");
-}
-
-/*
- * CPPI RX Woes:
- * =============
- * Consider a 1KB bulk RX buffer in two scenarios:  (a) it's fed two 300 byte
- * packets back-to-back, and (b) it's fed two 512 byte packets back-to-back.
- * (Full speed transfers have similar scenarios.)
- *
- * The correct behavior for Linux is that (a) fills the buffer with 300 bytes,
- * and the next packet goes into a buffer that's queued later; while (b) fills
- * the buffer with 1024 bytes.  How to do that with CPPI?
- *
- * - RX queues in "rndis" mode -- one single BD -- handle (a) correctly, but
- *   (b) loses **BADLY** because nothing (!) happens when that second packet
- *   fills the buffer, much less when a third one arrives.  (Which makes this
- *   not a "true" RNDIS mode.  In the RNDIS protocol short-packet termination
- *   is optional, and it's fine if peripherals -- not hosts! -- pad messages
- *   out to end-of-buffer.  Standard PCI host controller DMA descriptors
- *   implement that mode by default ... which is no accident.)
- *
- * - RX queues in "transparent" mode -- two BDs with 512 bytes each -- have
- *   converse problems:  (b) is handled right, but (a) loses badly.  CPPI RX
- *   ignores SOP/EOP markings and processes both of those BDs; so both packets
- *   are loaded into the buffer (with a 212 byte gap between them), and the next
- *   buffer queued will NOT get its 300 bytes of data. (It seems like SOP/EOP
- *   are intended as outputs for RX queues, not inputs...)
- *
- * - A variant of "transparent" mode -- one BD at a time -- is the only way to
- *   reliably make both cases work, with software handling both cases correctly
- *   and at the significant penalty of needing an IRQ per packet.  (The lack of
- *   I/O overlap can be slightly ameliorated by enabling double buffering.)
- *
- * So how to get rid of IRQ-per-packet?  The transparent multi-BD case could
- * be used in special cases like mass storage, which sets URB_SHORT_NOT_OK
- * (or maybe its peripheral side counterpart) to flag (a) scenarios as errors
- * with guaranteed driver level fault recovery and scrubbing out what's left
- * of that garbaged datastream.
- *
- * But there seems to be no way to identify the cases where CPPI RNDIS mode
- * is appropriate -- which do NOT include RNDIS host drivers, but do include
- * the CDC Ethernet driver! -- and the documentation is incomplete/wrong.
- * So we can't _ever_ use RX RNDIS mode ... except by using a heuristic
- * that applies best on the peripheral side (and which could fail rudely).
- *
- * Leaving only "transparent" mode; we avoid multi-bd modes in almost all
- * cases other than mass storage class.  Otherwise we're correct but slow,
- * since CPPI penalizes our need for a "true RNDIS" default mode.
- */
-
-
-/* Heuristic, intended to kick in for ethernet/rndis peripheral ONLY
- *
- * IFF
- *  (a)	peripheral mode ... since rndis peripherals could pad their
- *	writes to hosts, causing i/o failure; or we'd have to cope with
- *	a largely unknowable variety of host side protocol variants
- *  (b)	and short reads are NOT errors ... since full reads would
- *	cause those same i/o failures
- *  (c)	and read length is
- *	- less than 64KB (max per cppi descriptor)
- *	- not a multiple of 4096 (g_zero default, full reads typical)
- *	- N (>1) packets long, ditto (full reads not EXPECTED)
- * THEN
- *   try rx rndis mode
- *
- * Cost of heuristic failing:  RXDMA wedges at the end of transfers that
- * fill out the whole buffer.  Buggy host side usb network drivers could
- * trigger that, but "in the field" such bugs seem to be all but unknown.
- *
- * So this module parameter lets the heuristic be disabled.  When using
- * gadgetfs, the heuristic will probably need to be disabled.
- */
-static bool cppi_rx_rndis = 1;
-
-module_param(cppi_rx_rndis, bool, 0);
-MODULE_PARM_DESC(cppi_rx_rndis, "enable/disable RX RNDIS heuristic");
-
-
-/**
- * cppi_next_rx_segment - dma read for the next chunk of a buffer
- * @musb: the controller
- * @rx: dma channel
- * @onepacket: true unless caller treats short reads as errors, and
- *	performs fault recovery above usbcore.
- * Context: controller irqlocked
- *
- * See above notes about why we can't use multi-BD RX queues except in
- * rare cases (mass storage class), and can never use the hardware "rndis"
- * mode (since it's not a "true" RNDIS mode) with complete safety..
- *
- * It's ESSENTIAL that callers specify "onepacket" mode unless they kick in
- * code to recover from corrupted datastreams after each short transfer.
- */
-static void
-cppi_next_rx_segment(struct musb *musb, struct cppi_channel *rx, int onepacket)
-{
-	unsigned		maxpacket = rx->maxpacket;
-	dma_addr_t		addr = rx->buf_dma + rx->offset;
-	size_t			length = rx->buf_len - rx->offset;
-	struct cppi_descriptor	*bd, *tail;
-	unsigned		n_bds;
-	unsigned		i;
-	void __iomem		*tibase = musb->ctrl_base;
-	int			is_rndis = 0;
-	struct cppi_rx_stateram	__iomem *rx_ram = rx->state_ram;
-	struct cppi_descriptor	*d;
-
-	if (onepacket) {
-		/* almost every USB driver, host or peripheral side */
-		n_bds = 1;
-
-		/* maybe apply the heuristic above */
-		if (cppi_rx_rndis
-				&& is_peripheral_active(musb)
-				&& length > maxpacket
-				&& (length & ~0xffff) == 0
-				&& (length & 0x0fff) != 0
-				&& (length & (maxpacket - 1)) == 0) {
-			maxpacket = length;
-			is_rndis = 1;
-		}
-	} else {
-		/* virtually nothing except mass storage class */
-		if (length > 0xffff) {
-			n_bds = 0xffff / maxpacket;
-			length = n_bds * maxpacket;
-		} else {
-			n_bds = DIV_ROUND_UP(length, maxpacket);
-		}
-		if (n_bds == 1)
-			onepacket = 1;
-		else
-			n_bds = min(n_bds, (unsigned) NUM_RXCHAN_BD);
-	}
-
-	/* In host mode, autorequest logic can generate some IN tokens; it's
-	 * tricky since we can't leave REQPKT set in RXCSR after the transfer
-	 * finishes. So:  multipacket transfers involve two or more segments.
-	 * And always at least two IRQs ... RNDIS mode is not an option.
-	 */
-	if (is_host_active(musb))
-		n_bds = cppi_autoreq_update(rx, tibase, onepacket, n_bds);
-
-	cppi_rndis_update(rx, 1, musb->ctrl_base, is_rndis);
-
-	length = min(n_bds * maxpacket, length);
-
-	musb_dbg(musb, "RX DMA%d seg, maxp %d %s bds %d (cnt %d) "
-			"dma 0x%llx len %u %u/%u",
-			rx->index, maxpacket,
-			onepacket
-				? (is_rndis ? "rndis" : "onepacket")
-				: "multipacket",
-			n_bds,
-			musb_readl(tibase,
-				DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
-					& 0xffff,
-			(unsigned long long)addr, length,
-			rx->channel.actual_len, rx->buf_len);
-
-	/* only queue one segment at a time, since the hardware prevents
-	 * correct queue shutdown after unexpected short packets
-	 */
-	bd = cppi_bd_alloc(rx);
-	rx->head = bd;
-
-	/* Build BDs for all packets in this segment */
-	for (i = 0, tail = NULL; bd && i < n_bds; i++, tail = bd) {
-		u32	bd_len;
-
-		if (i) {
-			bd = cppi_bd_alloc(rx);
-			if (!bd)
-				break;
-			tail->next = bd;
-			tail->hw_next = bd->dma;
-		}
-		bd->hw_next = 0;
-
-		/* all but the last packet will be maxpacket size */
-		if (maxpacket < length)
-			bd_len = maxpacket;
-		else
-			bd_len = length;
-
-		bd->hw_bufp = addr;
-		addr += bd_len;
-		rx->offset += bd_len;
-
-		bd->hw_off_len = (0 /*offset*/ << 16) + bd_len;
-		bd->buflen = bd_len;
-
-		bd->hw_options = CPPI_OWN_SET | (i == 0 ? length : 0);
-		length -= bd_len;
-	}
-
-	/* we always expect at least one reusable BD! */
-	if (!tail) {
-		WARNING("rx dma%d -- no BDs? need %d\n", rx->index, n_bds);
-		return;
-	} else if (i < n_bds)
-		WARNING("rx dma%d -- only %d of %d BDs\n", rx->index, i, n_bds);
-
-	tail->next = NULL;
-	tail->hw_next = 0;
-
-	bd = rx->head;
-	rx->tail = tail;
-
-	/* short reads and other faults should terminate this entire
-	 * dma segment.  we want one "dma packet" per dma segment, not
-	 * one per USB packet, terminating the whole queue at once...
-	 * NOTE that current hardware seems to ignore SOP and EOP.
-	 */
-	bd->hw_options |= CPPI_SOP_SET;
-	tail->hw_options |= CPPI_EOP_SET;
-
-	for (d = rx->head; d; d = d->next)
-		cppi_dump_rxbd("S", d);
-
-	/* in case the preceding transfer left some state... */
-	tail = rx->last_processed;
-	if (tail) {
-		tail->next = bd;
-		tail->hw_next = bd->dma;
-	}
-
-	core_rxirq_enable(tibase, rx->index + 1);
-
-	/* BDs live in DMA-coherent memory, but writes might be pending */
-	cpu_drain_writebuffer();
-
-	/* REVISIT specs say to write this AFTER the BUFCNT register
-	 * below ... but that loses badly.
-	 */
-	musb_writel(&rx_ram->rx_head, 0, bd->dma);
-
-	/* bufferCount must be at least 3, and zeroes on completion
-	 * unless it underflows below zero, or stops at two, or keeps
-	 * growing ... grr.
-	 */
-	i = musb_readl(tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
-			& 0xffff;
-
-	if (!i)
-		musb_writel(tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
-			n_bds + 2);
-	else if (n_bds > (i - 3))
-		musb_writel(tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
-			n_bds - (i - 3));
-
-	i = musb_readl(tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4))
-			& 0xffff;
-	if (i < (2 + n_bds)) {
-		musb_dbg(musb, "bufcnt%d underrun - %d (for %d)",
-					rx->index, i, n_bds);
-		musb_writel(tibase,
-			DAVINCI_RXCPPI_BUFCNT0_REG + (rx->index * 4),
-			n_bds + 2);
-	}
-
-	cppi_dump_rx(4, rx, "/S");
-}
-
-/**
- * cppi_channel_program - program channel for data transfer
- * @ch: the channel
- * @maxpacket: max packet size
- * @mode: For RX, 1 unless the usb protocol driver promised to treat
- *	all short reads as errors and kick in high level fault recovery.
- *	For TX, ignored because of RNDIS mode races/glitches.
- * @dma_addr: dma address of buffer
- * @len: length of buffer
- * Context: controller irqlocked
- */
-static int cppi_channel_program(struct dma_channel *ch,
-		u16 maxpacket, u8 mode,
-		dma_addr_t dma_addr, u32 len)
-{
-	struct cppi_channel	*cppi_ch;
-	struct cppi		*controller;
-	struct musb		*musb;
-
-	cppi_ch = container_of(ch, struct cppi_channel, channel);
-	controller = cppi_ch->controller;
-	musb = controller->controller.musb;
-
-	switch (ch->status) {
-	case MUSB_DMA_STATUS_BUS_ABORT:
-	case MUSB_DMA_STATUS_CORE_ABORT:
-		/* fault irq handler should have handled cleanup */
-		WARNING("%cX DMA%d not cleaned up after abort!\n",
-				cppi_ch->transmit ? 'T' : 'R',
-				cppi_ch->index);
-		/* WARN_ON(1); */
-		break;
-	case MUSB_DMA_STATUS_BUSY:
-		WARNING("program active channel?  %cX DMA%d\n",
-				cppi_ch->transmit ? 'T' : 'R',
-				cppi_ch->index);
-		/* WARN_ON(1); */
-		break;
-	case MUSB_DMA_STATUS_UNKNOWN:
-		musb_dbg(musb, "%cX DMA%d not allocated!",
-				cppi_ch->transmit ? 'T' : 'R',
-				cppi_ch->index);
-		fallthrough;
-	case MUSB_DMA_STATUS_FREE:
-		break;
-	}
-
-	ch->status = MUSB_DMA_STATUS_BUSY;
-
-	/* set transfer parameters, then queue up its first segment */
-	cppi_ch->buf_dma = dma_addr;
-	cppi_ch->offset = 0;
-	cppi_ch->maxpacket = maxpacket;
-	cppi_ch->buf_len = len;
-	cppi_ch->channel.actual_len = 0;
-
-	/* TX channel? or RX? */
-	if (cppi_ch->transmit)
-		cppi_next_tx_segment(musb, cppi_ch);
-	else
-		cppi_next_rx_segment(musb, cppi_ch, mode);
-
-	return true;
-}
-
-static bool cppi_rx_scan(struct cppi *cppi, unsigned ch)
-{
-	struct cppi_channel		*rx = &cppi->rx[ch];
-	struct cppi_rx_stateram __iomem	*state = rx->state_ram;
-	struct cppi_descriptor		*bd;
-	struct cppi_descriptor		*last = rx->last_processed;
-	bool				completed = false;
-	bool				acked = false;
-	int				i;
-	dma_addr_t			safe2ack;
-	void __iomem			*regs = rx->hw_ep->regs;
-	struct musb			*musb = cppi->controller.musb;
-
-	cppi_dump_rx(6, rx, "/K");
-
-	bd = last ? last->next : rx->head;
-	if (!bd)
-		return false;
-
-	/* run through all completed BDs */
-	for (i = 0, safe2ack = musb_readl(&state->rx_complete, 0);
-			(safe2ack || completed) && bd && i < NUM_RXCHAN_BD;
-			i++, bd = bd->next) {
-		u16	len;
-
-		/* catch latest BD writes from CPPI */
-		rmb();
-		if (!completed && (bd->hw_options & CPPI_OWN_SET))
-			break;
-
-		musb_dbg(musb, "C/RXBD %llx: nxt %08x buf %08x "
-			"off.len %08x opt.len %08x (%d)",
-			(unsigned long long)bd->dma, bd->hw_next, bd->hw_bufp,
-			bd->hw_off_len, bd->hw_options,
-			rx->channel.actual_len);
-
-		/* actual packet received length */
-		if ((bd->hw_options & CPPI_SOP_SET) && !completed)
-			len = bd->hw_off_len & CPPI_RECV_PKTLEN_MASK;
-		else
-			len = 0;
-
-		if (bd->hw_options & CPPI_EOQ_MASK)
-			completed = true;
-
-		if (!completed && len < bd->buflen) {
-			/* NOTE:  when we get a short packet, RXCSR_H_REQPKT
-			 * must have been cleared, and no more DMA packets may
-			 * active be in the queue... TI docs didn't say, but
-			 * CPPI ignores those BDs even though OWN is still set.
-			 */
-			completed = true;
-			musb_dbg(musb, "rx short %d/%d (%d)",
-					len, bd->buflen,
-					rx->channel.actual_len);
-		}
-
-		/* If we got here, we expect to ack at least one BD; meanwhile
-		 * CPPI may completing other BDs while we scan this list...
-		 *
-		 * RACE: we can notice OWN cleared before CPPI raises the
-		 * matching irq by writing that BD as the completion pointer.
-		 * In such cases, stop scanning and wait for the irq, avoiding
-		 * lost acks and states where BD ownership is unclear.
-		 */
-		if (bd->dma == safe2ack) {
-			musb_writel(&state->rx_complete, 0, safe2ack);
-			safe2ack = musb_readl(&state->rx_complete, 0);
-			acked = true;
-			if (bd->dma == safe2ack)
-				safe2ack = 0;
-		}
-
-		rx->channel.actual_len += len;
-
-		cppi_bd_free(rx, last);
-		last = bd;
-
-		/* stop scanning on end-of-segment */
-		if (bd->hw_next == 0)
-			completed = true;
-	}
-	rx->last_processed = last;
-
-	/* dma abort, lost ack, or ... */
-	if (!acked && last) {
-		int	csr;
-
-		if (safe2ack == 0 || safe2ack == rx->last_processed->dma)
-			musb_writel(&state->rx_complete, 0, safe2ack);
-		if (safe2ack == 0) {
-			cppi_bd_free(rx, last);
-			rx->last_processed = NULL;
-
-			/* if we land here on the host side, H_REQPKT will
-			 * be clear and we need to restart the queue...
-			 */
-			WARN_ON(rx->head);
-		}
-		musb_ep_select(cppi->mregs, rx->index + 1);
-		csr = musb_readw(regs, MUSB_RXCSR);
-		if (csr & MUSB_RXCSR_DMAENAB) {
-			musb_dbg(musb, "list%d %p/%p, last %llx%s, csr %04x",
-				rx->index,
-				rx->head, rx->tail,
-				rx->last_processed
-					? (unsigned long long)
-						rx->last_processed->dma
-					: 0,
-				completed ? ", completed" : "",
-				csr);
-			cppi_dump_rxq(4, "/what?", rx);
-		}
-	}
-	if (!completed) {
-		int	csr;
-
-		rx->head = bd;
-
-		/* REVISIT seems like "autoreq all but EOP" doesn't...
-		 * setting it here "should" be racey, but seems to work
-		 */
-		csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
-		if (is_host_active(cppi->controller.musb)
-				&& bd
-				&& !(csr & MUSB_RXCSR_H_REQPKT)) {
-			csr |= MUSB_RXCSR_H_REQPKT;
-			musb_writew(regs, MUSB_RXCSR,
-					MUSB_RXCSR_H_WZC_BITS | csr);
-			csr = musb_readw(rx->hw_ep->regs, MUSB_RXCSR);
-		}
-	} else {
-		rx->head = NULL;
-		rx->tail = NULL;
-	}
-
-	cppi_dump_rx(6, rx, completed ? "/completed" : "/cleaned");
-	return completed;
-}
-
-irqreturn_t cppi_interrupt(int irq, void *dev_id)
-{
-	struct musb		*musb = dev_id;
-	struct cppi		*cppi;
-	void __iomem		*tibase;
-	struct musb_hw_ep	*hw_ep = NULL;
-	u32			rx, tx;
-	int			i, index;
-	unsigned long		flags;
-
-	cppi = container_of(musb->dma_controller, struct cppi, controller);
-	if (cppi->irq)
-		spin_lock_irqsave(&musb->lock, flags);
-
-	tibase = musb->ctrl_base;
-
-	tx = musb_readl(tibase, DAVINCI_TXCPPI_MASKED_REG);
-	rx = musb_readl(tibase, DAVINCI_RXCPPI_MASKED_REG);
-
-	if (!tx && !rx) {
-		if (cppi->irq)
-			spin_unlock_irqrestore(&musb->lock, flags);
-		return IRQ_NONE;
-	}
-
-	musb_dbg(musb, "CPPI IRQ Tx%x Rx%x", tx, rx);
-
-	/* process TX channels */
-	for (index = 0; tx; tx = tx >> 1, index++) {
-		struct cppi_channel		*tx_ch;
-		struct cppi_tx_stateram __iomem	*tx_ram;
-		bool				completed = false;
-		struct cppi_descriptor		*bd;
-
-		if (!(tx & 1))
-			continue;
-
-		tx_ch = cppi->tx + index;
-		tx_ram = tx_ch->state_ram;
-
-		/* FIXME  need a cppi_tx_scan() routine, which
-		 * can also be called from abort code
-		 */
-
-		cppi_dump_tx(5, tx_ch, "/E");
-
-		bd = tx_ch->head;
-
-		/*
-		 * If Head is null then this could mean that a abort interrupt
-		 * that needs to be acknowledged.
-		 */
-		if (NULL == bd) {
-			musb_dbg(musb, "null BD");
-			musb_writel(&tx_ram->tx_complete, 0, 0);
-			continue;
-		}
-
-		/* run through all completed BDs */
-		for (i = 0; !completed && bd && i < NUM_TXCHAN_BD;
-				i++, bd = bd->next) {
-			u16	len;
-
-			/* catch latest BD writes from CPPI */
-			rmb();
-			if (bd->hw_options & CPPI_OWN_SET)
-				break;
-
-			musb_dbg(musb, "C/TXBD %p n %x b %x off %x opt %x",
-					bd, bd->hw_next, bd->hw_bufp,
-					bd->hw_off_len, bd->hw_options);
-
-			len = bd->hw_off_len & CPPI_BUFFER_LEN_MASK;
-			tx_ch->channel.actual_len += len;
-
-			tx_ch->last_processed = bd;
-
-			/* write completion register to acknowledge
-			 * processing of completed BDs, and possibly
-			 * release the IRQ; EOQ might not be set ...
-			 *
-			 * REVISIT use the same ack strategy as rx
-			 *
-			 * REVISIT have observed bit 18 set; huh??
-			 */
-			/* if ((bd->hw_options & CPPI_EOQ_MASK)) */
-				musb_writel(&tx_ram->tx_complete, 0, bd->dma);
-
-			/* stop scanning on end-of-segment */
-			if (bd->hw_next == 0)
-				completed = true;
-		}
-
-		/* on end of segment, maybe go to next one */
-		if (completed) {
-			/* cppi_dump_tx(4, tx_ch, "/complete"); */
-
-			/* transfer more, or report completion */
-			if (tx_ch->offset >= tx_ch->buf_len) {
-				tx_ch->head = NULL;
-				tx_ch->tail = NULL;
-				tx_ch->channel.status = MUSB_DMA_STATUS_FREE;
-
-				hw_ep = tx_ch->hw_ep;
-
-				musb_dma_completion(musb, index + 1, 1);
-
-			} else {
-				/* Bigger transfer than we could fit in
-				 * that first batch of descriptors...
-				 */
-				cppi_next_tx_segment(musb, tx_ch);
-			}
-		} else
-			tx_ch->head = bd;
-	}
-
-	/* Start processing the RX block */
-	for (index = 0; rx; rx = rx >> 1, index++) {
-
-		if (rx & 1) {
-			struct cppi_channel		*rx_ch;
-
-			rx_ch = cppi->rx + index;
-
-			/* let incomplete dma segments finish */
-			if (!cppi_rx_scan(cppi, index))
-				continue;
-
-			/* start another dma segment if needed */
-			if (rx_ch->channel.actual_len != rx_ch->buf_len
-					&& rx_ch->channel.actual_len
-						== rx_ch->offset) {
-				cppi_next_rx_segment(musb, rx_ch, 1);
-				continue;
-			}
-
-			/* all segments completed! */
-			rx_ch->channel.status = MUSB_DMA_STATUS_FREE;
-
-			hw_ep = rx_ch->hw_ep;
-
-			core_rxirq_disable(tibase, index + 1);
-			musb_dma_completion(musb, index + 1, 0);
-		}
-	}
-
-	/* write to CPPI EOI register to re-enable interrupts */
-	musb_writel(tibase, DAVINCI_CPPI_EOI_REG, 0);
-
-	if (cppi->irq)
-		spin_unlock_irqrestore(&musb->lock, flags);
-
-	return IRQ_HANDLED;
-}
-EXPORT_SYMBOL_GPL(cppi_interrupt);
-
-/* Instantiate a software object representing a DMA controller. */
-struct dma_controller *
-cppi_dma_controller_create(struct musb *musb, void __iomem *mregs)
-{
-	struct cppi		*controller;
-	struct device		*dev = musb->controller;
-	struct platform_device	*pdev = to_platform_device(dev);
-	int			irq = platform_get_irq_byname(pdev, "dma");
-
-	controller = kzalloc(sizeof *controller, GFP_KERNEL);
-	if (!controller)
-		return NULL;
-
-	controller->mregs = mregs;
-	controller->tibase = mregs - DAVINCI_BASE_OFFSET;
-
-	controller->controller.musb = musb;
-	controller->controller.channel_alloc = cppi_channel_allocate;
-	controller->controller.channel_release = cppi_channel_release;
-	controller->controller.channel_program = cppi_channel_program;
-	controller->controller.channel_abort = cppi_channel_abort;
-
-	/* NOTE: allocating from on-chip SRAM would give the least
-	 * contention for memory access, if that ever matters here.
-	 */
-
-	/* setup BufferPool */
-	controller->pool = dma_pool_create("cppi",
-			controller->controller.musb->controller,
-			sizeof(struct cppi_descriptor),
-			CPPI_DESCRIPTOR_ALIGN, 0);
-	if (!controller->pool) {
-		kfree(controller);
-		return NULL;
-	}
-
-	if (irq > 0) {
-		if (request_irq(irq, cppi_interrupt, 0, "cppi-dma", musb)) {
-			dev_err(dev, "request_irq %d failed!\n", irq);
-			musb_dma_controller_destroy(&controller->controller);
-			return NULL;
-		}
-		controller->irq = irq;
-	}
-
-	cppi_controller_start(controller);
-	return &controller->controller;
-}
-EXPORT_SYMBOL_GPL(cppi_dma_controller_create);
-
-/*
- *  Destroy a previously-instantiated DMA controller.
- */
-void cppi_dma_controller_destroy(struct dma_controller *c)
-{
-	struct cppi	*cppi;
-
-	cppi = container_of(c, struct cppi, controller);
-
-	cppi_controller_stop(cppi);
-
-	if (cppi->irq)
-		free_irq(cppi->irq, cppi->controller.musb);
-
-	/* assert:  caller stopped the controller first */
-	dma_pool_destroy(cppi->pool);
-
-	kfree(cppi);
-}
-EXPORT_SYMBOL_GPL(cppi_dma_controller_destroy);
-
-/*
- * Context: controller irqlocked, endpoint selected
- */
-static int cppi_channel_abort(struct dma_channel *channel)
-{
-	struct cppi_channel	*cppi_ch;
-	struct cppi		*controller;
-	void __iomem		*mbase;
-	void __iomem		*tibase;
-	void __iomem		*regs;
-	u32			value;
-	struct cppi_descriptor	*queue;
-
-	cppi_ch = container_of(channel, struct cppi_channel, channel);
-
-	controller = cppi_ch->controller;
-
-	switch (channel->status) {
-	case MUSB_DMA_STATUS_BUS_ABORT:
-	case MUSB_DMA_STATUS_CORE_ABORT:
-		/* from RX or TX fault irq handler */
-	case MUSB_DMA_STATUS_BUSY:
-		/* the hardware needs shutting down */
-		regs = cppi_ch->hw_ep->regs;
-		break;
-	case MUSB_DMA_STATUS_UNKNOWN:
-	case MUSB_DMA_STATUS_FREE:
-		return 0;
-	default:
-		return -EINVAL;
-	}
-
-	if (!cppi_ch->transmit && cppi_ch->head)
-		cppi_dump_rxq(3, "/abort", cppi_ch);
-
-	mbase = controller->mregs;
-	tibase = controller->tibase;
-
-	queue = cppi_ch->head;
-	cppi_ch->head = NULL;
-	cppi_ch->tail = NULL;
-
-	/* REVISIT should rely on caller having done this,
-	 * and caller should rely on us not changing it.
-	 * peripheral code is safe ... check host too.
-	 */
-	musb_ep_select(mbase, cppi_ch->index + 1);
-
-	if (cppi_ch->transmit) {
-		struct cppi_tx_stateram __iomem *tx_ram;
-		/* REVISIT put timeouts on these controller handshakes */
-
-		cppi_dump_tx(6, cppi_ch, " (teardown)");
-
-		/* teardown DMA engine then usb core */
-		do {
-			value = musb_readl(tibase, DAVINCI_TXCPPI_TEAR_REG);
-		} while (!(value & CPPI_TEAR_READY));
-		musb_writel(tibase, DAVINCI_TXCPPI_TEAR_REG, cppi_ch->index);
-
-		tx_ram = cppi_ch->state_ram;
-		do {
-			value = musb_readl(&tx_ram->tx_complete, 0);
-		} while (0xFFFFFFFC != value);
-
-		/* FIXME clean up the transfer state ... here?
-		 * the completion routine should get called with
-		 * an appropriate status code.
-		 */
-
-		value = musb_readw(regs, MUSB_TXCSR);
-		value &= ~MUSB_TXCSR_DMAENAB;
-		value |= MUSB_TXCSR_FLUSHFIFO;
-		musb_writew(regs, MUSB_TXCSR, value);
-		musb_writew(regs, MUSB_TXCSR, value);
-
-		/*
-		 * 1. Write to completion Ptr value 0x1(bit 0 set)
-		 *    (write back mode)
-		 * 2. Wait for abort interrupt and then put the channel in
-		 *    compare mode by writing 1 to the tx_complete register.
-		 */
-		cppi_reset_tx(tx_ram, 1);
-		cppi_ch->head = NULL;
-		musb_writel(&tx_ram->tx_complete, 0, 1);
-		cppi_dump_tx(5, cppi_ch, " (done teardown)");
-
-		/* REVISIT tx side _should_ clean up the same way
-		 * as the RX side ... this does no cleanup at all!
-		 */
-
-	} else /* RX */ {
-		u16			csr;
-
-		/* NOTE: docs don't guarantee any of this works ...  we
-		 * expect that if the usb core stops telling the cppi core
-		 * to pull more data from it, then it'll be safe to flush
-		 * current RX DMA state iff any pending fifo transfer is done.
-		 */
-
-		core_rxirq_disable(tibase, cppi_ch->index + 1);
-
-		/* for host, ensure ReqPkt is never set again */
-		if (is_host_active(cppi_ch->controller->controller.musb)) {
-			value = musb_readl(tibase, DAVINCI_AUTOREQ_REG);
-			value &= ~((0x3) << (cppi_ch->index * 2));
-			musb_writel(tibase, DAVINCI_AUTOREQ_REG, value);
-		}
-
-		csr = musb_readw(regs, MUSB_RXCSR);
-
-		/* for host, clear (just) ReqPkt at end of current packet(s) */
-		if (is_host_active(cppi_ch->controller->controller.musb)) {
-			csr |= MUSB_RXCSR_H_WZC_BITS;
-			csr &= ~MUSB_RXCSR_H_REQPKT;
-		} else
-			csr |= MUSB_RXCSR_P_WZC_BITS;
-
-		/* clear dma enable */
-		csr &= ~(MUSB_RXCSR_DMAENAB);
-		musb_writew(regs, MUSB_RXCSR, csr);
-		csr = musb_readw(regs, MUSB_RXCSR);
-
-		/* Quiesce: wait for current dma to finish (if not cleanup).
-		 * We can't use bit zero of stateram->rx_sop, since that
-		 * refers to an entire "DMA packet" not just emptying the
-		 * current fifo.  Most segments need multiple usb packets.
-		 */
-		if (channel->status == MUSB_DMA_STATUS_BUSY)
-			udelay(50);
-
-		/* scan the current list, reporting any data that was
-		 * transferred and acking any IRQ
-		 */
-		cppi_rx_scan(controller, cppi_ch->index);
-
-		/* clobber the existing state once it's idle
-		 *
-		 * NOTE:  arguably, we should also wait for all the other
-		 * RX channels to quiesce (how??) and then temporarily
-		 * disable RXCPPI_CTRL_REG ... but it seems that we can
-		 * rely on the controller restarting from state ram, with
-		 * only RXCPPI_BUFCNT state being bogus.  BUFCNT will
-		 * correct itself after the next DMA transfer though.
-		 *
-		 * REVISIT does using rndis mode change that?
-		 */
-		cppi_reset_rx(cppi_ch->state_ram);
-
-		/* next DMA request _should_ load cppi head ptr */
-
-		/* ... we don't "free" that list, only mutate it in place.  */
-		cppi_dump_rx(5, cppi_ch, " (done abort)");
-
-		/* clean up previously pending bds */
-		cppi_bd_free(cppi_ch, cppi_ch->last_processed);
-		cppi_ch->last_processed = NULL;
-
-		while (queue) {
-			struct cppi_descriptor	*tmp = queue->next;
-
-			cppi_bd_free(cppi_ch, queue);
-			queue = tmp;
-		}
-	}
-
-	channel->status = MUSB_DMA_STATUS_FREE;
-	cppi_ch->buf_dma = 0;
-	cppi_ch->offset = 0;
-	cppi_ch->buf_len = 0;
-	cppi_ch->maxpacket = 0;
-	return 0;
-}
-
-/* TBD Queries:
- *
- * Power Management ... probably turn off cppi during suspend, restart;
- * check state ram?  Clocking is presumably shared with usb core.
- */
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
deleted file mode 100644
index 7044355263943..0000000000000
--- a/drivers/usb/musb/davinci.c
+++ /dev/null
@@ -1,606 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2005-2006 by Texas Instruments
- *
- * This file is part of the Inventra Controller Driver for Linux.
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/sched.h>
-#include <linux/list.h>
-#include <linux/delay.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/gpio/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_generic.h>
-
-#include <mach/cputype.h>
-#include <mach/hardware.h>
-
-#include <asm/mach-types.h>
-
-#include "musb_core.h"
-
-#include "davinci.h"
-#include "cppi_dma.h"
-
-
-#define USB_PHY_CTRL	IO_ADDRESS(USBPHY_CTL_PADDR)
-#define DM355_DEEPSLEEP	IO_ADDRESS(DM355_DEEPSLEEP_PADDR)
-
-struct davinci_glue {
-	struct device		*dev;
-	struct platform_device	*musb;
-	struct clk		*clk;
-	bool			vbus_state;
-	struct gpio_desc	*vbus;
-	struct work_struct	vbus_work;
-};
-
-/* REVISIT (PM) we should be able to keep the PHY in low power mode most
- * of the time (24 MHZ oscillator and PLL off, etc) by setting POWER.D0
- * and, when in host mode, autosuspending idle root ports... PHYPLLON
- * (overriding SUSPENDM?) then likely needs to stay off.
- */
-
-static inline void phy_on(void)
-{
-	u32	phy_ctrl = __raw_readl(USB_PHY_CTRL);
-
-	/* power everything up; start the on-chip PHY and its PLL */
-	phy_ctrl &= ~(USBPHY_OSCPDWN | USBPHY_OTGPDWN | USBPHY_PHYPDWN);
-	phy_ctrl |= USBPHY_SESNDEN | USBPHY_VBDTCTEN | USBPHY_PHYPLLON;
-	__raw_writel(phy_ctrl, USB_PHY_CTRL);
-
-	/* wait for PLL to lock before proceeding */
-	while ((__raw_readl(USB_PHY_CTRL) & USBPHY_PHYCLKGD) == 0)
-		cpu_relax();
-}
-
-static inline void phy_off(void)
-{
-	u32	phy_ctrl = __raw_readl(USB_PHY_CTRL);
-
-	/* powerdown the on-chip PHY, its PLL, and the OTG block */
-	phy_ctrl &= ~(USBPHY_SESNDEN | USBPHY_VBDTCTEN | USBPHY_PHYPLLON);
-	phy_ctrl |= USBPHY_OSCPDWN | USBPHY_OTGPDWN | USBPHY_PHYPDWN;
-	__raw_writel(phy_ctrl, USB_PHY_CTRL);
-}
-
-static int dma_off = 1;
-
-static void davinci_musb_enable(struct musb *musb)
-{
-	u32	tmp, old, val;
-
-	/* workaround:  setup irqs through both register sets */
-	tmp = (musb->epmask & DAVINCI_USB_TX_ENDPTS_MASK)
-			<< DAVINCI_USB_TXINT_SHIFT;
-	musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp);
-	old = tmp;
-	tmp = (musb->epmask & (0xfffe & DAVINCI_USB_RX_ENDPTS_MASK))
-			<< DAVINCI_USB_RXINT_SHIFT;
-	musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp);
-	tmp |= old;
-
-	val = ~MUSB_INTR_SOF;
-	tmp |= ((val & 0x01ff) << DAVINCI_USB_USBINT_SHIFT);
-	musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_SET_REG, tmp);
-
-	if (is_dma_capable() && !dma_off)
-		printk(KERN_WARNING "%s %s: dma not reactivated\n",
-				__FILE__, __func__);
-	else
-		dma_off = 0;
-
-	/* force a DRVVBUS irq so we can start polling for ID change */
-	musb_writel(musb->ctrl_base, DAVINCI_USB_INT_SET_REG,
-			DAVINCI_INTR_DRVVBUS << DAVINCI_USB_USBINT_SHIFT);
-}
-
-/*
- * Disable the HDRC and flush interrupts
- */
-static void davinci_musb_disable(struct musb *musb)
-{
-	/* because we don't set CTRLR.UINT, "important" to:
-	 *  - not read/write INTRUSB/INTRUSBE
-	 *  - (except during initial setup, as workaround)
-	 *  - use INTSETR/INTCLRR instead
-	 */
-	musb_writel(musb->ctrl_base, DAVINCI_USB_INT_MASK_CLR_REG,
-			  DAVINCI_USB_USBINT_MASK
-			| DAVINCI_USB_TXINT_MASK
-			| DAVINCI_USB_RXINT_MASK);
-	musb_writel(musb->ctrl_base, DAVINCI_USB_EOI_REG, 0);
-
-	if (is_dma_capable() && !dma_off)
-		WARNING("dma still active\n");
-}
-
-
-#define	portstate(stmt)		stmt
-
-/*
- * VBUS SWITCHING IS BOARD-SPECIFIC ... at least for the DM6446 EVM,
- * which doesn't wire DRVVBUS to the FET that switches it.  Unclear
- * if that's a problem with the DM6446 chip or just with that board.
- *
- * In either case, the DM355 EVM automates DRVVBUS the normal way,
- * when J10 is out, and TI documents it as handling OTG.
- */
-
-/* I2C operations are always synchronous, and require a task context.
- * With unloaded systems, using the shared workqueue seems to suffice
- * to satisfy the 100msec A_WAIT_VRISE timeout...
- */
-static void evm_deferred_drvvbus(struct work_struct *work)
-{
-	struct davinci_glue *glue = container_of(work, struct davinci_glue,
-						 vbus_work);
-
-	gpiod_set_value_cansleep(glue->vbus, glue->vbus_state);
-	glue->vbus_state = !glue->vbus_state;
-}
-
-static void davinci_musb_source_power(struct musb *musb, int is_on,
-				      int immediate)
-{
-	struct davinci_glue *glue = dev_get_drvdata(musb->controller->parent);
-
-	/* This GPIO handling is entirely optional */
-	if (!glue->vbus)
-		return;
-
-	if (is_on)
-		is_on = 1;
-
-	if (glue->vbus_state == is_on)
-		return;
-	/* 0/1 vs "-1 == unknown/init" */
-	glue->vbus_state = !is_on;
-
-	if (machine_is_davinci_evm()) {
-		if (immediate)
-			gpiod_set_value_cansleep(glue->vbus, glue->vbus_state);
-		else
-			schedule_work(&glue->vbus_work);
-	}
-	if (immediate)
-		glue->vbus_state = is_on;
-}
-
-static void davinci_musb_set_vbus(struct musb *musb, int is_on)
-{
-	WARN_ON(is_on && is_peripheral_active(musb));
-	davinci_musb_source_power(musb, is_on, 0);
-}
-
-
-#define	POLL_SECONDS	2
-
-static void otg_timer(struct timer_list *t)
-{
-	struct musb		*musb = from_timer(musb, t, dev_timer);
-	void __iomem		*mregs = musb->mregs;
-	u8			devctl;
-	unsigned long		flags;
-
-	/* We poll because DaVinci's won't expose several OTG-critical
-	* status change events (from the transceiver) otherwise.
-	 */
-	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	dev_dbg(musb->controller, "poll devctl %02x (%s)\n", devctl,
-		usb_otg_state_string(musb->xceiv->otg->state));
-
-	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->otg->state) {
-	case OTG_STATE_A_WAIT_VFALL:
-		/* Wait till VBUS falls below SessionEnd (~0.2V); the 1.3 RTL
-		 * seems to mis-handle session "start" otherwise (or in our
-		 * case "recover"), in routine "VBUS was valid by the time
-		 * VBUSERR got reported during enumeration" cases.
-		 */
-		if (devctl & MUSB_DEVCTL_VBUS) {
-			mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-			break;
-		}
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-		musb_writel(musb->ctrl_base, DAVINCI_USB_INT_SET_REG,
-			MUSB_INTR_VBUSERROR << DAVINCI_USB_USBINT_SHIFT);
-		break;
-	case OTG_STATE_B_IDLE:
-		/*
-		 * There's no ID-changed IRQ, so we have no good way to tell
-		 * when to switch to the A-Default state machine (by setting
-		 * the DEVCTL.SESSION flag).
-		 *
-		 * Workaround:  whenever we're in B_IDLE, try setting the
-		 * session flag every few seconds.  If it works, ID was
-		 * grounded and we're now in the A-Default state machine.
-		 *
-		 * NOTE setting the session flag is _supposed_ to trigger
-		 * SRP, but clearly it doesn't.
-		 */
-		musb_writeb(mregs, MUSB_DEVCTL,
-				devctl | MUSB_DEVCTL_SESSION);
-		devctl = musb_readb(mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE)
-			mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-		else
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-		break;
-	default:
-		break;
-	}
-	spin_unlock_irqrestore(&musb->lock, flags);
-}
-
-static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
-{
-	unsigned long	flags;
-	irqreturn_t	retval = IRQ_NONE;
-	struct musb	*musb = __hci;
-	struct usb_otg	*otg = musb->xceiv->otg;
-	void __iomem	*tibase = musb->ctrl_base;
-	struct cppi	*cppi;
-	u32		tmp;
-
-	spin_lock_irqsave(&musb->lock, flags);
-
-	/* NOTE: DaVinci shadows the Mentor IRQs.  Don't manage them through
-	 * the Mentor registers (except for setup), use the TI ones and EOI.
-	 *
-	 * Docs describe irq "vector" registers associated with the CPPI and
-	 * USB EOI registers.  These hold a bitmask corresponding to the
-	 * current IRQ, not an irq handler address.  Would using those bits
-	 * resolve some of the races observed in this dispatch code??
-	 */
-
-	/* CPPI interrupts share the same IRQ line, but have their own
-	 * mask, state, "vector", and EOI registers.
-	 */
-	cppi = container_of(musb->dma_controller, struct cppi, controller);
-	if (is_cppi_enabled(musb) && musb->dma_controller && !cppi->irq)
-		retval = cppi_interrupt(irq, __hci);
-
-	/* ack and handle non-CPPI interrupts */
-	tmp = musb_readl(tibase, DAVINCI_USB_INT_SRC_MASKED_REG);
-	musb_writel(tibase, DAVINCI_USB_INT_SRC_CLR_REG, tmp);
-	dev_dbg(musb->controller, "IRQ %08x\n", tmp);
-
-	musb->int_rx = (tmp & DAVINCI_USB_RXINT_MASK)
-			>> DAVINCI_USB_RXINT_SHIFT;
-	musb->int_tx = (tmp & DAVINCI_USB_TXINT_MASK)
-			>> DAVINCI_USB_TXINT_SHIFT;
-	musb->int_usb = (tmp & DAVINCI_USB_USBINT_MASK)
-			>> DAVINCI_USB_USBINT_SHIFT;
-
-	/* DRVVBUS irqs are the only proxy we have (a very poor one!) for
-	 * DaVinci's missing ID change IRQ.  We need an ID change IRQ to
-	 * switch appropriately between halves of the OTG state machine.
-	 * Managing DEVCTL.SESSION per Mentor docs requires we know its
-	 * value, but DEVCTL.BDEVICE is invalid without DEVCTL.SESSION set.
-	 * Also, DRVVBUS pulses for SRP (but not at 5V) ...
-	 */
-	if (tmp & (DAVINCI_INTR_DRVVBUS << DAVINCI_USB_USBINT_SHIFT)) {
-		int	drvvbus = musb_readl(tibase, DAVINCI_USB_STAT_REG);
-		void __iomem *mregs = musb->mregs;
-		u8	devctl = musb_readb(mregs, MUSB_DEVCTL);
-		int	err = musb->int_usb & MUSB_INTR_VBUSERROR;
-
-		err = musb->int_usb & MUSB_INTR_VBUSERROR;
-		if (err) {
-			/* The Mentor core doesn't debounce VBUS as needed
-			 * to cope with device connect current spikes. This
-			 * means it's not uncommon for bus-powered devices
-			 * to get VBUS errors during enumeration.
-			 *
-			 * This is a workaround, but newer RTL from Mentor
-			 * seems to allow a better one: "re"starting sessions
-			 * without waiting (on EVM, a **long** time) for VBUS
-			 * to stop registering in devctl.
-			 */
-			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
-			mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-			WARNING("VBUS error workaround (delay coming)\n");
-		} else if (drvvbus) {
-			MUSB_HST_MODE(musb);
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
-			del_timer(&musb->dev_timer);
-		} else {
-			musb->is_active = 0;
-			MUSB_DEV_MODE(musb);
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
-		}
-
-		/* NOTE:  this must complete poweron within 100 msec
-		 * (OTG_TIME_A_WAIT_VRISE) but we don't check for that.
-		 */
-		davinci_musb_source_power(musb, drvvbus, 0);
-		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
-				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->otg->state),
-				err ? " ERROR" : "",
-				devctl);
-		retval = IRQ_HANDLED;
-	}
-
-	if (musb->int_tx || musb->int_rx || musb->int_usb)
-		retval |= musb_interrupt(musb);
-
-	/* irq stays asserted until EOI is written */
-	musb_writel(tibase, DAVINCI_USB_EOI_REG, 0);
-
-	/* poll for ID change */
-	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE)
-		mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-
-	spin_unlock_irqrestore(&musb->lock, flags);
-
-	return retval;
-}
-
-static int davinci_musb_set_mode(struct musb *musb, u8 mode)
-{
-	/* EVM can't do this (right?) */
-	return -EIO;
-}
-
-static int davinci_musb_init(struct musb *musb)
-{
-	void __iomem	*tibase = musb->ctrl_base;
-	u32		revision;
-	int 		ret = -ENODEV;
-
-	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
-	if (IS_ERR_OR_NULL(musb->xceiv)) {
-		ret = -EPROBE_DEFER;
-		goto unregister;
-	}
-
-	musb->mregs += DAVINCI_BASE_OFFSET;
-
-	/* returns zero if e.g. not clocked */
-	revision = musb_readl(tibase, DAVINCI_USB_VERSION_REG);
-	if (revision == 0)
-		goto fail;
-
-	timer_setup(&musb->dev_timer, otg_timer, 0);
-
-	davinci_musb_source_power(musb, 0, 1);
-
-	/* dm355 EVM swaps D+/D- for signal integrity, and
-	 * is clocked from the main 24 MHz crystal.
-	 */
-	if (machine_is_davinci_dm355_evm()) {
-		u32	phy_ctrl = __raw_readl(USB_PHY_CTRL);
-
-		phy_ctrl &= ~(3 << 9);
-		phy_ctrl |= USBPHY_DATAPOL;
-		__raw_writel(phy_ctrl, USB_PHY_CTRL);
-	}
-
-	/* On dm355, the default-A state machine needs DRVVBUS control.
-	 * If we won't be a host, there's no need to turn it on.
-	 */
-	if (cpu_is_davinci_dm355()) {
-		u32	deepsleep = __raw_readl(DM355_DEEPSLEEP);
-
-		deepsleep &= ~DRVVBUS_FORCE;
-		__raw_writel(deepsleep, DM355_DEEPSLEEP);
-	}
-
-	/* reset the controller */
-	musb_writel(tibase, DAVINCI_USB_CTRL_REG, 0x1);
-
-	/* start the on-chip PHY and its PLL */
-	phy_on();
-
-	msleep(5);
-
-	/* NOTE:  irqs are in mixed mode, not bypass to pure-musb */
-	pr_debug("DaVinci OTG revision %08x phy %03x control %02x\n",
-		revision, __raw_readl(USB_PHY_CTRL),
-		musb_readb(tibase, DAVINCI_USB_CTRL_REG));
-
-	musb->isr = davinci_musb_interrupt;
-	return 0;
-
-fail:
-	usb_put_phy(musb->xceiv);
-unregister:
-	usb_phy_generic_unregister();
-	return ret;
-}
-
-static int davinci_musb_exit(struct musb *musb)
-{
-	int	maxdelay = 30;
-	u8	devctl, warn = 0;
-
-	del_timer_sync(&musb->dev_timer);
-
-	/* force VBUS off */
-	if (cpu_is_davinci_dm355()) {
-		u32	deepsleep = __raw_readl(DM355_DEEPSLEEP);
-
-		deepsleep &= ~DRVVBUS_FORCE;
-		deepsleep |= DRVVBUS_OVERRIDE;
-		__raw_writel(deepsleep, DM355_DEEPSLEEP);
-	}
-
-	davinci_musb_source_power(musb, 0 /*off*/, 1);
-
-	/*
-	 * delay, to avoid problems with module reload.
-	 * if there's no peripheral connected, this can take a
-	 * long time to fall, especially on EVM with huge C133.
-	 */
-	do {
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (!(devctl & MUSB_DEVCTL_VBUS))
-			break;
-		if ((devctl & MUSB_DEVCTL_VBUS) != warn) {
-			warn = devctl & MUSB_DEVCTL_VBUS;
-			dev_dbg(musb->controller, "VBUS %d\n",
-				warn >> MUSB_DEVCTL_VBUS_SHIFT);
-		}
-		msleep(1000);
-		maxdelay--;
-	} while (maxdelay > 0);
-
-	/* in OTG mode, another host might be connected */
-	if (devctl & MUSB_DEVCTL_VBUS)
-		dev_dbg(musb->controller, "VBUS off timeout (devctl %02x)\n", devctl);
-
-	phy_off();
-
-	usb_put_phy(musb->xceiv);
-
-	return 0;
-}
-
-static const struct musb_platform_ops davinci_ops = {
-	.quirks		= MUSB_DMA_CPPI,
-	.init		= davinci_musb_init,
-	.exit		= davinci_musb_exit,
-
-#ifdef CONFIG_USB_TI_CPPI_DMA
-	.dma_init	= cppi_dma_controller_create,
-	.dma_exit	= cppi_dma_controller_destroy,
-#endif
-	.enable		= davinci_musb_enable,
-	.disable	= davinci_musb_disable,
-
-	.set_mode	= davinci_musb_set_mode,
-
-	.set_vbus	= davinci_musb_set_vbus,
-};
-
-static const struct platform_device_info davinci_dev_info = {
-	.name		= "musb-hdrc",
-	.id		= PLATFORM_DEVID_AUTO,
-	.dma_mask	= DMA_BIT_MASK(32),
-};
-
-static int davinci_probe(struct platform_device *pdev)
-{
-	struct resource			musb_resources[3];
-	struct musb_hdrc_platform_data	*pdata = dev_get_platdata(&pdev->dev);
-	struct platform_device		*musb;
-	struct davinci_glue		*glue;
-	struct platform_device_info	pinfo;
-	struct clk			*clk;
-
-	int				ret = -ENOMEM;
-
-	glue = devm_kzalloc(&pdev->dev, sizeof(*glue), GFP_KERNEL);
-	if (!glue)
-		goto err0;
-
-	clk = devm_clk_get(&pdev->dev, "usb");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		ret = PTR_ERR(clk);
-		goto err0;
-	}
-
-	ret = clk_enable(clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto err0;
-	}
-
-	glue->dev			= &pdev->dev;
-	glue->clk			= clk;
-
-	pdata->platform_ops		= &davinci_ops;
-
-	glue->vbus = devm_gpiod_get_optional(&pdev->dev, NULL, GPIOD_OUT_LOW);
-	if (IS_ERR(glue->vbus)) {
-		ret = PTR_ERR(glue->vbus);
-		goto err0;
-	} else {
-		glue->vbus_state = -1;
-		INIT_WORK(&glue->vbus_work, evm_deferred_drvvbus);
-	}
-
-	usb_phy_generic_register();
-	platform_set_drvdata(pdev, glue);
-
-	memset(musb_resources, 0x00, sizeof(*musb_resources) *
-			ARRAY_SIZE(musb_resources));
-
-	musb_resources[0].name = pdev->resource[0].name;
-	musb_resources[0].start = pdev->resource[0].start;
-	musb_resources[0].end = pdev->resource[0].end;
-	musb_resources[0].flags = pdev->resource[0].flags;
-
-	musb_resources[1].name = pdev->resource[1].name;
-	musb_resources[1].start = pdev->resource[1].start;
-	musb_resources[1].end = pdev->resource[1].end;
-	musb_resources[1].flags = pdev->resource[1].flags;
-
-	/*
-	 * For DM6467 3 resources are passed. A placeholder for the 3rd
-	 * resource is always there, so it's safe to always copy it...
-	 */
-	musb_resources[2].name = pdev->resource[2].name;
-	musb_resources[2].start = pdev->resource[2].start;
-	musb_resources[2].end = pdev->resource[2].end;
-	musb_resources[2].flags = pdev->resource[2].flags;
-
-	pinfo = davinci_dev_info;
-	pinfo.parent = &pdev->dev;
-	pinfo.res = musb_resources;
-	pinfo.num_res = ARRAY_SIZE(musb_resources);
-	pinfo.data = pdata;
-	pinfo.size_data = sizeof(*pdata);
-
-	glue->musb = musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(musb)) {
-		ret = PTR_ERR(musb);
-		dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
-		goto err1;
-	}
-
-	return 0;
-
-err1:
-	clk_disable(clk);
-
-err0:
-	return ret;
-}
-
-static int davinci_remove(struct platform_device *pdev)
-{
-	struct davinci_glue		*glue = platform_get_drvdata(pdev);
-
-	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister();
-	clk_disable(glue->clk);
-
-	return 0;
-}
-
-static struct platform_driver davinci_driver = {
-	.probe		= davinci_probe,
-	.remove		= davinci_remove,
-	.driver		= {
-		.name	= "musb-davinci",
-	},
-};
-
-MODULE_DESCRIPTION("DaVinci MUSB Glue Layer");
-MODULE_AUTHOR("Felipe Balbi <balbi@ti.com>");
-MODULE_LICENSE("GPL v2");
-module_platform_driver(davinci_driver);
diff --git a/drivers/usb/musb/davinci.h b/drivers/usb/musb/davinci.h
deleted file mode 100644
index c8e67d15b510b..0000000000000
--- a/drivers/usb/musb/davinci.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2005-2006 by Texas Instruments
- */
-
-#ifndef __MUSB_HDRDF_H__
-#define __MUSB_HDRDF_H__
-
-/*
- * DaVinci-specific definitions
- */
-
-/* Integrated highspeed/otg PHY */
-#define USBPHY_CTL_PADDR	0x01c40034
-#define USBPHY_DATAPOL		BIT(11)	/* (dm355) switch D+/D- */
-#define USBPHY_PHYCLKGD		BIT(8)
-#define USBPHY_SESNDEN		BIT(7)	/* v(sess_end) comparator */
-#define USBPHY_VBDTCTEN		BIT(6)	/* v(bus) comparator */
-#define USBPHY_VBUSSENS		BIT(5)	/* (dm355,ro) is vbus > 0.5V */
-#define USBPHY_PHYPLLON		BIT(4)	/* override pll suspend */
-#define USBPHY_CLKO1SEL		BIT(3)
-#define USBPHY_OSCPDWN		BIT(2)
-#define USBPHY_OTGPDWN		BIT(1)
-#define USBPHY_PHYPDWN		BIT(0)
-
-#define DM355_DEEPSLEEP_PADDR	0x01c40048
-#define DRVVBUS_FORCE		BIT(2)
-#define DRVVBUS_OVERRIDE	BIT(1)
-
-/* For now include usb OTG module registers here */
-#define DAVINCI_USB_VERSION_REG		0x00
-#define DAVINCI_USB_CTRL_REG		0x04
-#define DAVINCI_USB_STAT_REG		0x08
-#define DAVINCI_RNDIS_REG		0x10
-#define DAVINCI_AUTOREQ_REG		0x14
-#define DAVINCI_USB_INT_SOURCE_REG	0x20
-#define DAVINCI_USB_INT_SET_REG		0x24
-#define DAVINCI_USB_INT_SRC_CLR_REG	0x28
-#define DAVINCI_USB_INT_MASK_REG	0x2c
-#define DAVINCI_USB_INT_MASK_SET_REG	0x30
-#define DAVINCI_USB_INT_MASK_CLR_REG	0x34
-#define DAVINCI_USB_INT_SRC_MASKED_REG	0x38
-#define DAVINCI_USB_EOI_REG		0x3c
-#define DAVINCI_USB_EOI_INTVEC		0x40
-
-/* BEGIN CPPI-generic (?) */
-
-/* CPPI related registers */
-#define DAVINCI_TXCPPI_CTRL_REG		0x80
-#define DAVINCI_TXCPPI_TEAR_REG		0x84
-#define DAVINCI_CPPI_EOI_REG		0x88
-#define DAVINCI_CPPI_INTVEC_REG		0x8c
-#define DAVINCI_TXCPPI_MASKED_REG	0x90
-#define DAVINCI_TXCPPI_RAW_REG		0x94
-#define DAVINCI_TXCPPI_INTENAB_REG	0x98
-#define DAVINCI_TXCPPI_INTCLR_REG	0x9c
-
-#define DAVINCI_RXCPPI_CTRL_REG		0xC0
-#define DAVINCI_RXCPPI_MASKED_REG	0xD0
-#define DAVINCI_RXCPPI_RAW_REG		0xD4
-#define DAVINCI_RXCPPI_INTENAB_REG	0xD8
-#define DAVINCI_RXCPPI_INTCLR_REG	0xDC
-
-#define DAVINCI_RXCPPI_BUFCNT0_REG	0xE0
-#define DAVINCI_RXCPPI_BUFCNT1_REG	0xE4
-#define DAVINCI_RXCPPI_BUFCNT2_REG	0xE8
-#define DAVINCI_RXCPPI_BUFCNT3_REG	0xEC
-
-/* CPPI state RAM entries */
-#define DAVINCI_CPPI_STATERAM_BASE_OFFSET   0x100
-
-#define DAVINCI_TXCPPI_STATERAM_OFFSET(chnum) \
-	(DAVINCI_CPPI_STATERAM_BASE_OFFSET +       ((chnum) * 0x40))
-#define DAVINCI_RXCPPI_STATERAM_OFFSET(chnum) \
-	(DAVINCI_CPPI_STATERAM_BASE_OFFSET + 0x20 + ((chnum) * 0x40))
-
-/* CPPI masks */
-#define DAVINCI_DMA_CTRL_ENABLE		1
-#define DAVINCI_DMA_CTRL_DISABLE	0
-
-#define DAVINCI_DMA_ALL_CHANNELS_ENABLE	0xF
-#define DAVINCI_DMA_ALL_CHANNELS_DISABLE 0xF
-
-/* END CPPI-generic (?) */
-
-#define DAVINCI_USB_TX_ENDPTS_MASK	0x1f		/* ep0 + 4 tx */
-#define DAVINCI_USB_RX_ENDPTS_MASK	0x1e		/* 4 rx */
-
-#define DAVINCI_USB_USBINT_SHIFT	16
-#define DAVINCI_USB_TXINT_SHIFT		0
-#define DAVINCI_USB_RXINT_SHIFT		8
-
-#define DAVINCI_INTR_DRVVBUS		0x0100
-
-#define DAVINCI_USB_USBINT_MASK		0x01ff0000	/* 8 Mentor, DRVVBUS */
-#define DAVINCI_USB_TXINT_MASK \
-	(DAVINCI_USB_TX_ENDPTS_MASK << DAVINCI_USB_TXINT_SHIFT)
-#define DAVINCI_USB_RXINT_MASK \
-	(DAVINCI_USB_RX_ENDPTS_MASK << DAVINCI_USB_RXINT_SHIFT)
-
-#define DAVINCI_BASE_OFFSET		0x400
-
-#endif	/* __MUSB_HDRDF_H__ */
-- 
GitLab


From 55f223b8b408cbfd85fb1c5b74ab85ccab319a69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 17 Oct 2022 21:59:14 +0200
Subject: [PATCH 0355/4122] usb: dwc2: platform: Improve error reporting for
 problems during .remove()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Returning an error value in a platform driver's remove callback results in
a generic error message being emitted by the driver core, but otherwise it
doesn't make a difference. The device goes away anyhow.

For each case where ret is non-zero the driver already emits an error
message, so suppress the generic error message by returning zero
unconditionally. (Side note: The return value handling was unreliable
anyhow as the value returned by dwc2_exit_hibernation() was overwritten
anyhow if hsotg->in_ppd was non-zero.)

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Minas Harutyunyan <hminas@synopsys.com>
Link: https://lore.kernel.org/r/20221017195914.1426297-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/platform.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index ec4ace0107f5f..262c13b6362ac 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -321,7 +321,7 @@ static int dwc2_driver_remove(struct platform_device *dev)
 	reset_control_assert(hsotg->reset);
 	reset_control_assert(hsotg->reset_ecc);
 
-	return ret;
+	return 0;
 }
 
 /**
-- 
GitLab


From b295d484b97081feba72b071ffcb72fb4638ccfd Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Oct 2022 12:21:25 +0300
Subject: [PATCH 0356/4122] device property: Allow const parameter to
 dev_fwnode()

It's not fully correct to take a const parameter pointer to a struct
and return a non-const pointer to a member of that struct.

Instead, introduce a const version of the dev_fwnode() API which takes
and returns const pointers and use it where it's applicable.

With this, convert dev_fwnode() to be a macro wrapper on top of const
and non-const APIs that chooses one based on the type.

Suggested-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Fixes: aade55c86033 ("device property: Add const qualifier to device_get_match_data() parameter")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221004092129.19412-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c  | 11 +++++++++--
 include/linux/property.h |  7 ++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 4d6278a848686..d77302d285662 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -17,12 +17,19 @@
 #include <linux/property.h>
 #include <linux/phy.h>
 
-struct fwnode_handle *dev_fwnode(const struct device *dev)
+struct fwnode_handle *__dev_fwnode(struct device *dev)
 {
 	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
 		of_fwnode_handle(dev->of_node) : dev->fwnode;
 }
-EXPORT_SYMBOL_GPL(dev_fwnode);
+EXPORT_SYMBOL_GPL(__dev_fwnode);
+
+const struct fwnode_handle *__dev_fwnode_const(const struct device *dev)
+{
+	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
+		of_fwnode_handle(dev->of_node) : dev->fwnode;
+}
+EXPORT_SYMBOL_GPL(__dev_fwnode_const);
 
 /**
  * device_property_present - check if a property of a device is present
diff --git a/include/linux/property.h b/include/linux/property.h
index 117cc200c656d..587b5b666b5bb 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -32,7 +32,12 @@ enum dev_dma_attr {
 	DEV_DMA_COHERENT,
 };
 
-struct fwnode_handle *dev_fwnode(const struct device *dev);
+const struct fwnode_handle *__dev_fwnode_const(const struct device *dev);
+struct fwnode_handle *__dev_fwnode(struct device *dev);
+#define dev_fwnode(dev)							\
+	_Generic((dev),							\
+		 const struct device *: __dev_fwnode_const,	\
+		 struct device *: __dev_fwnode)(dev)
 
 bool device_property_present(struct device *dev, const char *propname);
 int device_property_read_u8_array(struct device *dev, const char *propname,
-- 
GitLab


From 23ead33bc6ed62abc9adc5fe27b9911e2ef5d209 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Oct 2022 12:21:26 +0300
Subject: [PATCH 0357/4122] device property: Constify fwnode connection match
 APIs

The fwnode and device parameters are not altered in the fwnode
connection match APIs, constify them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221004092129.19412-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c     | 8 ++++----
 drivers/usb/roles/class.c   | 2 +-
 drivers/usb/typec/mux.c     | 8 ++++----
 drivers/usb/typec/retimer.c | 2 +-
 include/linux/property.h    | 8 ++++----
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index d77302d285662..58b8158add5c8 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -1213,7 +1213,7 @@ const void *device_get_match_data(const struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_match_data);
 
-static unsigned int fwnode_graph_devcon_matches(struct fwnode_handle *fwnode,
+static unsigned int fwnode_graph_devcon_matches(const struct fwnode_handle *fwnode,
 						const char *con_id, void *data,
 						devcon_match_fn_t match,
 						void **matches,
@@ -1247,7 +1247,7 @@ static unsigned int fwnode_graph_devcon_matches(struct fwnode_handle *fwnode,
 	return count;
 }
 
-static unsigned int fwnode_devcon_matches(struct fwnode_handle *fwnode,
+static unsigned int fwnode_devcon_matches(const struct fwnode_handle *fwnode,
 					  const char *con_id, void *data,
 					  devcon_match_fn_t match,
 					  void **matches,
@@ -1289,7 +1289,7 @@ static unsigned int fwnode_devcon_matches(struct fwnode_handle *fwnode,
  * device node. @match will be used to convert the connection description to
  * data the caller is expecting to be returned.
  */
-void *fwnode_connection_find_match(struct fwnode_handle *fwnode,
+void *fwnode_connection_find_match(const struct fwnode_handle *fwnode,
 				   const char *con_id, void *data,
 				   devcon_match_fn_t match)
 {
@@ -1326,7 +1326,7 @@ EXPORT_SYMBOL_GPL(fwnode_connection_find_match);
  *
  * Return: Number of matches resolved, or negative errno.
  */
-int fwnode_connection_find_matches(struct fwnode_handle *fwnode,
+int fwnode_connection_find_matches(const struct fwnode_handle *fwnode,
 				   const char *con_id, void *data,
 				   devcon_match_fn_t match,
 				   void **matches, unsigned int matches_len)
diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index dfaed7eee94fc..a3575a5a18ce3 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -87,7 +87,7 @@ enum usb_role usb_role_switch_get_role(struct usb_role_switch *sw)
 }
 EXPORT_SYMBOL_GPL(usb_role_switch_get_role);
 
-static void *usb_role_switch_match(struct fwnode_handle *fwnode, const char *id,
+static void *usb_role_switch_match(const struct fwnode_handle *fwnode, const char *id,
 				   void *data)
 {
 	struct device *dev;
diff --git a/drivers/usb/typec/mux.c b/drivers/usb/typec/mux.c
index 941735c731619..c7177ddd4f127 100644
--- a/drivers/usb/typec/mux.c
+++ b/drivers/usb/typec/mux.c
@@ -32,8 +32,8 @@ static int switch_fwnode_match(struct device *dev, const void *fwnode)
 	return device_match_fwnode(dev, fwnode);
 }
 
-static void *typec_switch_match(struct fwnode_handle *fwnode, const char *id,
-				void *data)
+static void *typec_switch_match(const struct fwnode_handle *fwnode,
+				const char *id, void *data)
 {
 	struct device *dev;
 
@@ -262,8 +262,8 @@ static int mux_fwnode_match(struct device *dev, const void *fwnode)
 	return device_match_fwnode(dev, fwnode);
 }
 
-static void *typec_mux_match(struct fwnode_handle *fwnode, const char *id,
-			     void *data)
+static void *typec_mux_match(const struct fwnode_handle *fwnode,
+			     const char *id, void *data)
 {
 	const struct typec_altmode_desc *desc = data;
 	struct device *dev;
diff --git a/drivers/usb/typec/retimer.c b/drivers/usb/typec/retimer.c
index ee94dbbe47453..8e1055783fe22 100644
--- a/drivers/usb/typec/retimer.c
+++ b/drivers/usb/typec/retimer.c
@@ -34,7 +34,7 @@ static int retimer_fwnode_match(struct device *dev, const void *fwnode)
 	return device_match_fwnode(dev, fwnode) && dev_name_ends_with(dev, "-retimer");
 }
 
-static void *typec_retimer_match(struct fwnode_handle *fwnode, const char *id, void *data)
+static void *typec_retimer_match(const struct fwnode_handle *fwnode, const char *id, void *data)
 {
 	struct device *dev;
 
diff --git a/include/linux/property.h b/include/linux/property.h
index 587b5b666b5bb..8d82775a901a9 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -442,21 +442,21 @@ unsigned int fwnode_graph_get_endpoint_count(struct fwnode_handle *fwnode,
 int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 				struct fwnode_endpoint *endpoint);
 
-typedef void *(*devcon_match_fn_t)(struct fwnode_handle *fwnode, const char *id,
+typedef void *(*devcon_match_fn_t)(const struct fwnode_handle *fwnode, const char *id,
 				   void *data);
 
-void *fwnode_connection_find_match(struct fwnode_handle *fwnode,
+void *fwnode_connection_find_match(const struct fwnode_handle *fwnode,
 				   const char *con_id, void *data,
 				   devcon_match_fn_t match);
 
-static inline void *device_connection_find_match(struct device *dev,
+static inline void *device_connection_find_match(const struct device *dev,
 						 const char *con_id, void *data,
 						 devcon_match_fn_t match)
 {
 	return fwnode_connection_find_match(dev_fwnode(dev), con_id, data, match);
 }
 
-int fwnode_connection_find_matches(struct fwnode_handle *fwnode,
+int fwnode_connection_find_matches(const struct fwnode_handle *fwnode,
 				   const char *con_id, void *data,
 				   devcon_match_fn_t match,
 				   void **matches, unsigned int matches_len);
-- 
GitLab


From a1bfed6094ac6868c43aaa43d021bf562cd93d07 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Oct 2022 12:21:27 +0300
Subject: [PATCH 0358/4122] device property: Constify parameter in
 fwnode_graph_is_endpoint()

Constify parameter in fwnode_graph_is_endpoint() since it doesn't
alter anything related to it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221004092129.19412-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/property.h b/include/linux/property.h
index 8d82775a901a9..7abb1792044f7 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -410,7 +410,7 @@ struct fwnode_handle *fwnode_graph_get_remote_port(
 struct fwnode_handle *fwnode_graph_get_remote_endpoint(
 	const struct fwnode_handle *fwnode);
 
-static inline bool fwnode_graph_is_endpoint(struct fwnode_handle *fwnode)
+static inline bool fwnode_graph_is_endpoint(const struct fwnode_handle *fwnode)
 {
 	return fwnode_property_present(fwnode, "remote-endpoint");
 }
-- 
GitLab


From 7952cd2b8213f20a1752634c25dfd215da537722 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Oct 2022 12:21:28 +0300
Subject: [PATCH 0359/4122] device property: Constify device child node APIs

The device parameter is not altered in the device child node APIs,
constify them.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221004092129.19412-5-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c  |  6 +++---
 include/linux/property.h | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index 58b8158add5c8..d3ea5f82978f3 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -763,7 +763,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_next_available_child_node);
  * @dev: Device to find the next child node for.
  * @child: Handle to one of the device's child nodes or a null handle.
  */
-struct fwnode_handle *device_get_next_child_node(struct device *dev,
+struct fwnode_handle *device_get_next_child_node(const struct device *dev,
 						 struct fwnode_handle *child)
 {
 	const struct fwnode_handle *fwnode = dev_fwnode(dev);
@@ -800,7 +800,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_named_child_node);
  * @dev: Device to find the named child node for.
  * @childname: String to match child node name against.
  */
-struct fwnode_handle *device_get_named_child_node(struct device *dev,
+struct fwnode_handle *device_get_named_child_node(const struct device *dev,
 						  const char *childname)
 {
 	return fwnode_get_named_child_node(dev_fwnode(dev), childname);
@@ -859,7 +859,7 @@ EXPORT_SYMBOL_GPL(fwnode_device_is_available);
  * device_get_child_node_count - return the number of child nodes for device
  * @dev: Device to cound the child nodes for
  */
-unsigned int device_get_child_node_count(struct device *dev)
+unsigned int device_get_child_node_count(const struct device *dev)
 {
 	struct fwnode_handle *child;
 	unsigned int count = 0;
diff --git a/include/linux/property.h b/include/linux/property.h
index 7abb1792044f7..472689e53ade5 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -114,16 +114,16 @@ struct fwnode_handle *fwnode_get_next_available_child_node(
 	for (child = fwnode_get_next_available_child_node(fwnode, NULL); child;\
 	     child = fwnode_get_next_available_child_node(fwnode, child))
 
-struct fwnode_handle *device_get_next_child_node(
-	struct device *dev, struct fwnode_handle *child);
+struct fwnode_handle *device_get_next_child_node(const struct device *dev,
+						 struct fwnode_handle *child);
 
 #define device_for_each_child_node(dev, child)				\
 	for (child = device_get_next_child_node(dev, NULL); child;	\
 	     child = device_get_next_child_node(dev, child))
 
-struct fwnode_handle *fwnode_get_named_child_node(
-	const struct fwnode_handle *fwnode, const char *childname);
-struct fwnode_handle *device_get_named_child_node(struct device *dev,
+struct fwnode_handle *fwnode_get_named_child_node(const struct fwnode_handle *fwnode,
+						  const char *childname);
+struct fwnode_handle *device_get_named_child_node(const struct device *dev,
 						  const char *childname);
 
 struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode);
@@ -132,7 +132,7 @@ void fwnode_handle_put(struct fwnode_handle *fwnode);
 int fwnode_irq_get(const struct fwnode_handle *fwnode, unsigned int index);
 int fwnode_irq_get_byname(const struct fwnode_handle *fwnode, const char *name);
 
-unsigned int device_get_child_node_count(struct device *dev);
+unsigned int device_get_child_node_count(const struct device *dev);
 
 static inline bool device_property_read_bool(struct device *dev,
 					     const char *propname)
-- 
GitLab


From 59789f3418dd3c0a187490d49e900a59a5c8d732 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 4 Oct 2022 12:21:29 +0300
Subject: [PATCH 0360/4122] device property: Constify parameter in
 device_dma_supported() and device_get_dma_attr()

Constify parameter in device_dma_supported() and device_get_dma_attr()
since they don't alter anything related to it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221004092129.19412-6-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c  | 4 ++--
 include/linux/property.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index d3ea5f82978f3..68f61d3e38577 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -871,13 +871,13 @@ unsigned int device_get_child_node_count(const struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_get_child_node_count);
 
-bool device_dma_supported(struct device *dev)
+bool device_dma_supported(const struct device *dev)
 {
 	return fwnode_call_bool_op(dev_fwnode(dev), device_dma_supported);
 }
 EXPORT_SYMBOL_GPL(device_dma_supported);
 
-enum dev_dma_attr device_get_dma_attr(struct device *dev)
+enum dev_dma_attr device_get_dma_attr(const struct device *dev)
 {
 	if (!fwnode_has_op(dev_fwnode(dev), device_get_dma_attr))
 		return DEV_DMA_NOT_SUPPORTED;
diff --git a/include/linux/property.h b/include/linux/property.h
index 472689e53ade5..83674f968a8fd 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -388,9 +388,8 @@ property_entries_dup(const struct property_entry *properties);
 
 void property_entries_free(const struct property_entry *properties);
 
-bool device_dma_supported(struct device *dev);
-
-enum dev_dma_attr device_get_dma_attr(struct device *dev);
+bool device_dma_supported(const struct device *dev);
+enum dev_dma_attr device_get_dma_attr(const struct device *dev);
 
 const void *device_get_match_data(const struct device *dev);
 
-- 
GitLab


From 87fa05b6db47403fa4fbe3a8ce8fa619f7c8667e Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sat, 8 Oct 2022 22:45:01 +0300
Subject: [PATCH 0361/4122] thunderbolt: Use str_enabled_disabled() helper

Use str_enabled_disabled() helper instead of open coding the same.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/switch.c  | 5 +++--
 drivers/thunderbolt/xdomain.c | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/thunderbolt/switch.c b/drivers/thunderbolt/switch.c
index 60da5c23ccaf4..363d712aa3643 100644
--- a/drivers/thunderbolt/switch.c
+++ b/drivers/thunderbolt/switch.c
@@ -8,12 +8,13 @@
 
 #include <linux/delay.h>
 #include <linux/idr.h>
+#include <linux/module.h>
 #include <linux/nvmem-provider.h>
 #include <linux/pm_runtime.h>
 #include <linux/sched/signal.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/module.h>
+#include <linux/string_helpers.h>
 
 #include "tb.h"
 
@@ -644,7 +645,7 @@ static int __tb_port_enable(struct tb_port *port, bool enable)
 	if (ret)
 		return ret;
 
-	tb_port_dbg(port, "lane %sabled\n", enable ? "en" : "dis");
+	tb_port_dbg(port, "lane %s\n", str_enabled_disabled(enable));
 	return 0;
 }
 
diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index f00b2f62d8e3c..ddd8fd2d06f86 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/prandom.h>
+#include <linux/string_helpers.h>
 #include <linux/utsname.h>
 #include <linux/uuid.h>
 #include <linux/workqueue.h>
@@ -1344,7 +1345,7 @@ static int tb_xdomain_bond_lanes_uuid_high(struct tb_xdomain *xd)
 	tb_port_update_credits(port);
 	tb_xdomain_update_link_attributes(xd);
 
-	dev_dbg(&xd->dev, "lane bonding %sabled\n", width == 2 ? "en" : "dis");
+	dev_dbg(&xd->dev, "lane bonding %s\n", str_enabled_disabled(width == 2));
 	return 0;
 }
 
-- 
GitLab


From b9589c417fedab6b963cf084ef305665166f5326 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 19 Oct 2022 23:57:09 +0100
Subject: [PATCH 0362/4122] thunderbolt: Remove redundant assignment to
 variable len

The variable len is assigned a value that is never read. It is re-assigned
a new value in the following do-while loop and never referenced after
the loop. The assignment is redundant and can be removed.

Cleans up clang scan build warning:
drivers/thunderbolt/xdomain.c:344:2: warning: Value stored to 'len' is
never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/xdomain.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thunderbolt/xdomain.c b/drivers/thunderbolt/xdomain.c
index ddd8fd2d06f86..cfa83486c9dad 100644
--- a/drivers/thunderbolt/xdomain.c
+++ b/drivers/thunderbolt/xdomain.c
@@ -342,7 +342,6 @@ static int tb_xdp_properties_request(struct tb_ctl *ctl, u64 route,
 	memcpy(&req.src_uuid, src_uuid, sizeof(*src_uuid));
 	memcpy(&req.dst_uuid, dst_uuid, sizeof(*dst_uuid));
 
-	len = 0;
 	data_len = 0;
 
 	do {
-- 
GitLab


From 32c6fefb291bf84c5a4dbc7d52b56a1605ed9aae Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Wed, 12 Oct 2022 15:27:54 +0200
Subject: [PATCH 0363/4122] usb: phy: generic: make vcc regulator optional

phy-generic uses the existance of the property "vcc-supply" to see if a
regulator is optional or not. Use devm_regulator_get_optional() instead
which exists for this purpose. Using devm_regulator_get_optional()
avoids "supply vcc not found, using dummy regulator" messages.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20221012132754.292151-1-s.hauer@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-generic.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 3dc5c04e7cbf9..8ed9327cc4a52 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -209,7 +209,7 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
 	int err = 0;
 
 	u32 clk_rate = 0;
-	bool needs_vcc = false, needs_clk = false;
+	bool needs_clk = false;
 
 	if (dev->of_node) {
 		struct device_node *node = dev->of_node;
@@ -217,7 +217,6 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
 		if (of_property_read_u32(node, "clock-frequency", &clk_rate))
 			clk_rate = 0;
 
-		needs_vcc = of_property_read_bool(node, "vcc-supply");
 		needs_clk = of_property_read_bool(node, "clocks");
 	}
 	nop->gpiod_reset = devm_gpiod_get_optional(dev, "reset",
@@ -257,13 +256,10 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop)
 		}
 	}
 
-	nop->vcc = devm_regulator_get(dev, "vcc");
-	if (IS_ERR(nop->vcc)) {
-		dev_dbg(dev, "Error getting vcc regulator: %ld\n",
-					PTR_ERR(nop->vcc));
-		if (needs_vcc)
-			return -EPROBE_DEFER;
-	}
+	nop->vcc = devm_regulator_get_optional(dev, "vcc");
+	if (IS_ERR(nop->vcc) && PTR_ERR(nop->vcc) != -ENODEV)
+		return dev_err_probe(dev, PTR_ERR(nop->vcc),
+				     "could not get vcc regulator\n");
 
 	nop->vbus_draw = devm_regulator_get_exclusive(dev, "vbus");
 	if (PTR_ERR(nop->vbus_draw) == -ENODEV)
-- 
GitLab


From e1b5d2bed67c60c30d01a89df32152d74cfc8e63 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Sun, 9 Oct 2022 23:53:36 +0800
Subject: [PATCH 0364/4122] usb: chipidea: core: handle usb role switch in a
 common way

Currently, ci_usb_role_switch_set() may be called before system resume
stage when suspended. Worse yet, ci_hdrc device may stay at RPM_ACTIVE
state which will cause pm_runtime_get_sync() fail to resume the device.
In this case, role-switch may unable to complete transition process due
to not exit from lpm state or due to lack some means after system resume.

Same as ci_cable_notifier(), usb_role_switch could handle its events based
on ci_hdrc_cable mechanism.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20221009155336.766960-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/core.c | 55 ++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 35 deletions(-)

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 6330fa9117926..ae90fee75a32d 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -608,49 +608,32 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw,
 				  enum usb_role role)
 {
 	struct ci_hdrc *ci = usb_role_switch_get_drvdata(sw);
-	struct ci_hdrc_cable *cable = NULL;
-	enum usb_role current_role = ci_role_to_usb_role(ci);
-	enum ci_role ci_role = usb_role_to_ci_role(role);
-	unsigned long flags;
-
-	if ((ci_role != CI_ROLE_END && !ci->roles[ci_role]) ||
-	    (current_role == role))
-		return 0;
+	struct ci_hdrc_cable *cable;
 
-	pm_runtime_get_sync(ci->dev);
-	/* Stop current role */
-	spin_lock_irqsave(&ci->lock, flags);
-	if (current_role == USB_ROLE_DEVICE)
+	if (role == USB_ROLE_HOST) {
+		cable = &ci->platdata->id_extcon;
+		cable->changed = true;
+		cable->connected = true;
 		cable = &ci->platdata->vbus_extcon;
-	else if (current_role == USB_ROLE_HOST)
+		cable->changed = true;
+		cable->connected = false;
+	} else if (role == USB_ROLE_DEVICE) {
 		cable = &ci->platdata->id_extcon;
-
-	if (cable) {
 		cable->changed = true;
 		cable->connected = false;
-		ci_irq(ci);
-		spin_unlock_irqrestore(&ci->lock, flags);
-		if (ci->wq && role != USB_ROLE_NONE)
-			flush_workqueue(ci->wq);
-		spin_lock_irqsave(&ci->lock, flags);
-	}
-
-	cable = NULL;
-
-	/* Start target role */
-	if (role == USB_ROLE_DEVICE)
 		cable = &ci->platdata->vbus_extcon;
-	else if (role == USB_ROLE_HOST)
-		cable = &ci->platdata->id_extcon;
-
-	if (cable) {
 		cable->changed = true;
 		cable->connected = true;
-		ci_irq(ci);
+	} else {
+		cable = &ci->platdata->id_extcon;
+		cable->changed = true;
+		cable->connected = false;
+		cable = &ci->platdata->vbus_extcon;
+		cable->changed = true;
+		cable->connected = false;
 	}
-	spin_unlock_irqrestore(&ci->lock, flags);
-	pm_runtime_put_sync(ci->dev);
 
+	ci_irq(ci);
 	return 0;
 }
 
@@ -1305,11 +1288,13 @@ static void ci_extcon_wakeup_int(struct ci_hdrc *ci)
 	cable_id = &ci->platdata->id_extcon;
 	cable_vbus = &ci->platdata->vbus_extcon;
 
-	if (!IS_ERR(cable_id->edev) && ci->is_otg &&
+	if ((!IS_ERR(cable_id->edev) || !IS_ERR(ci->role_switch))
+		&& ci->is_otg &&
 		(otgsc & OTGSC_IDIE) && (otgsc & OTGSC_IDIS))
 		ci_irq(ci);
 
-	if (!IS_ERR(cable_vbus->edev) && ci->is_otg &&
+	if ((!IS_ERR(cable_vbus->edev) || !IS_ERR(ci->role_switch))
+		&& ci->is_otg &&
 		(otgsc & OTGSC_BSVIE) && (otgsc & OTGSC_BSVIS))
 		ci_irq(ci);
 }
-- 
GitLab


From caa7b74493f9c903fb6cd4bdec295bcae0507cc6 Mon Sep 17 00:00:00 2001
From: Peng Fan <peng.fan@nxp.com>
Date: Fri, 14 Oct 2022 17:55:50 +0800
Subject: [PATCH 0365/4122] dt-bindings: phy: imx8mq-usb: add power-domains
 property

Add optional power-domains property for usb phy.

Signed-off-by: Peng Fan <peng.fan@nxp.com>
Acked-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221014095550.2125018-1-peng.fan@oss.nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
index 2936f3510a6ad..5ba9570ad7bf2 100644
--- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
@@ -28,6 +28,9 @@ properties:
     items:
       - const: phy
 
+  power-domains:
+    maxItems: 1
+
   vbus-supply:
     description:
       A phandle to the regulator for USB VBUS.
-- 
GitLab


From 74494b33211d067427db25824cd8b53fa0eab1ef Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Thu, 13 Oct 2022 23:14:35 +0800
Subject: [PATCH 0366/4122] usb: chipidea: core: add controller resume support
 when controller is powered off

For some SoCs, the controler's power will be off during the system
suspend, and it needs some recovery operation to let the system back
to workable. We add this support in this patch.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/20221013151442.3262951-2-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/core.c | 80 ++++++++++++++++++++++++++++---------
 drivers/usb/chipidea/otg.c  |  2 +-
 drivers/usb/chipidea/otg.h  |  1 +
 3 files changed, 63 insertions(+), 20 deletions(-)

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index ae90fee75a32d..80267b973c26a 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -637,6 +637,49 @@ static int ci_usb_role_switch_set(struct usb_role_switch *sw,
 	return 0;
 }
 
+static enum ci_role ci_get_role(struct ci_hdrc *ci)
+{
+	enum ci_role role;
+
+	if (ci->roles[CI_ROLE_HOST] && ci->roles[CI_ROLE_GADGET]) {
+		if (ci->is_otg) {
+			role = ci_otg_role(ci);
+			hw_write_otgsc(ci, OTGSC_IDIE, OTGSC_IDIE);
+		} else {
+			/*
+			 * If the controller is not OTG capable, but support
+			 * role switch, the defalt role is gadget, and the
+			 * user can switch it through debugfs.
+			 */
+			role = CI_ROLE_GADGET;
+		}
+	} else {
+		role = ci->roles[CI_ROLE_HOST] ? CI_ROLE_HOST
+					: CI_ROLE_GADGET;
+	}
+
+	return role;
+}
+
+static void ci_handle_power_lost(struct ci_hdrc *ci)
+{
+	enum ci_role role;
+
+	disable_irq_nosync(ci->irq);
+	if (!ci_otg_is_fsm_mode(ci)) {
+		role = ci_get_role(ci);
+
+		if (ci->role != role) {
+			ci_handle_id_switch(ci);
+		} else if (role == CI_ROLE_GADGET) {
+			if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
+				usb_gadget_vbus_connect(&ci->gadget);
+		}
+	}
+
+	enable_irq(ci->irq);
+}
+
 static struct usb_role_switch_desc ci_role_switch = {
 	.set = ci_usb_role_switch_set,
 	.get = ci_usb_role_switch_get,
@@ -1134,25 +1177,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 		}
 	}
 
-	if (ci->roles[CI_ROLE_HOST] && ci->roles[CI_ROLE_GADGET]) {
-		if (ci->is_otg) {
-			ci->role = ci_otg_role(ci);
-			/* Enable ID change irq */
-			hw_write_otgsc(ci, OTGSC_IDIE, OTGSC_IDIE);
-		} else {
-			/*
-			 * If the controller is not OTG capable, but support
-			 * role switch, the defalt role is gadget, and the
-			 * user can switch it through debugfs.
-			 */
-			ci->role = CI_ROLE_GADGET;
-		}
-	} else {
-		ci->role = ci->roles[CI_ROLE_HOST]
-			? CI_ROLE_HOST
-			: CI_ROLE_GADGET;
-	}
-
+	ci->role = ci_get_role(ci);
 	if (!ci_otg_is_fsm_mode(ci)) {
 		/* only update vbus status for peripheral */
 		if (ci->role == CI_ROLE_GADGET) {
@@ -1374,8 +1399,16 @@ static int ci_suspend(struct device *dev)
 static int ci_resume(struct device *dev)
 {
 	struct ci_hdrc *ci = dev_get_drvdata(dev);
+	bool power_lost;
 	int ret;
 
+	/* Since ASYNCLISTADDR (host mode) and ENDPTLISTADDR (device
+	 * mode) share the same register address. We can check if
+	 * controller resume from power lost based on this address
+	 * due to this register will be reset after power lost.
+	 */
+	power_lost = !hw_read(ci, OP_ENDPTLISTADDR, ~0);
+
 	if (device_may_wakeup(dev))
 		disable_irq_wake(ci->irq);
 
@@ -1383,6 +1416,15 @@ static int ci_resume(struct device *dev)
 	if (ret)
 		return ret;
 
+	if (power_lost) {
+		/* shutdown and re-init for phy */
+		ci_usb_phy_exit(ci);
+		ci_usb_phy_init(ci);
+	}
+
+	if (power_lost)
+		ci_handle_power_lost(ci);
+
 	if (ci->supports_runtime_pm) {
 		pm_runtime_disable(dev);
 		pm_runtime_set_active(dev);
diff --git a/drivers/usb/chipidea/otg.c b/drivers/usb/chipidea/otg.c
index 7b53274ef9664..622c3b68aa1e6 100644
--- a/drivers/usb/chipidea/otg.c
+++ b/drivers/usb/chipidea/otg.c
@@ -165,7 +165,7 @@ static int hw_wait_vbus_lower_bsv(struct ci_hdrc *ci)
 	return 0;
 }
 
-static void ci_handle_id_switch(struct ci_hdrc *ci)
+void ci_handle_id_switch(struct ci_hdrc *ci)
 {
 	enum ci_role role = ci_otg_role(ci);
 
diff --git a/drivers/usb/chipidea/otg.h b/drivers/usb/chipidea/otg.h
index 5e7a6e571dd2f..87629b81e03ea 100644
--- a/drivers/usb/chipidea/otg.h
+++ b/drivers/usb/chipidea/otg.h
@@ -14,6 +14,7 @@ int ci_hdrc_otg_init(struct ci_hdrc *ci);
 void ci_hdrc_otg_destroy(struct ci_hdrc *ci);
 enum ci_role ci_otg_role(struct ci_hdrc *ci);
 void ci_handle_vbus_change(struct ci_hdrc *ci);
+void ci_handle_id_switch(struct ci_hdrc *ci);
 static inline void ci_otg_queue_work(struct ci_hdrc *ci)
 {
 	disable_irq_nosync(ci->irq);
-- 
GitLab


From 450857c6058f092167f17bad97a2cc9c2a39b9a0 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Thu, 13 Oct 2022 23:14:36 +0800
Subject: [PATCH 0367/4122] usb: chipidea: core: handle suspend/resume for each
 role

There may be a need to handle suspend/resume per role. This patch
will add this support.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/20221013151442.3262951-3-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci.h   | 4 ++++
 drivers/usb/chipidea/core.c | 8 ++++++++
 2 files changed, 12 insertions(+)

diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index a4a3be0499109..005c67cb3afb7 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -127,12 +127,16 @@ enum ci_revision {
  * struct ci_role_driver - host/gadget role driver
  * @start: start this role
  * @stop: stop this role
+ * @suspend: system suspend handler for this role
+ * @resume: system resume handler for this role
  * @irq: irq handler for this role
  * @name: role name string (host/gadget)
  */
 struct ci_role_driver {
 	int		(*start)(struct ci_hdrc *);
 	void		(*stop)(struct ci_hdrc *);
+	void		(*suspend)(struct ci_hdrc *ci);
+	void		(*resume)(struct ci_hdrc *ci, bool power_lost);
 	irqreturn_t	(*irq)(struct ci_hdrc *);
 	const char	*name;
 };
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 80267b973c26a..2b170b434d012 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -1383,6 +1383,10 @@ static int ci_suspend(struct device *dev)
 		return 0;
 	}
 
+	/* Extra routine per role before system suspend */
+	if (ci->role != CI_ROLE_END && ci_role(ci)->suspend)
+		ci_role(ci)->suspend(ci);
+
 	if (device_may_wakeup(dev)) {
 		if (ci_otg_is_fsm_mode(ci))
 			ci_otg_fsm_suspend_for_srp(ci);
@@ -1422,6 +1426,10 @@ static int ci_resume(struct device *dev)
 		ci_usb_phy_init(ci);
 	}
 
+	/* Extra routine per role after system resume */
+	if (ci->role != CI_ROLE_END && ci_role(ci)->resume)
+		ci_role(ci)->resume(ci, power_lost);
+
 	if (power_lost)
 		ci_handle_power_lost(ci);
 
-- 
GitLab


From 2f64d6a6cdfbd992e8a8c481ebf79bfa9a71325b Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Thu, 13 Oct 2022 23:14:37 +0800
Subject: [PATCH 0368/4122] usb: chipidea: host: add suspend/resume support for
 host controller

The controller's power may be powered off during system suspend. This
will add suspend/resume support when the controller suffers power lost.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20221013151442.3262951-4-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/host.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index bc3634a54c6b7..ebe7400243b12 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -459,6 +459,18 @@ static void ci_hdrc_unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 	ci_hdrc_free_dma_aligned_buffer(urb);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static void ci_hdrc_host_suspend(struct ci_hdrc *ci)
+{
+	ehci_suspend(ci->hcd, device_may_wakeup(ci->dev));
+}
+
+static void ci_hdrc_host_resume(struct ci_hdrc *ci, bool power_lost)
+{
+	ehci_resume(ci->hcd, power_lost);
+}
+#endif
+
 int ci_hdrc_host_init(struct ci_hdrc *ci)
 {
 	struct ci_role_driver *rdrv;
@@ -472,6 +484,10 @@ int ci_hdrc_host_init(struct ci_hdrc *ci)
 
 	rdrv->start	= host_start;
 	rdrv->stop	= host_stop;
+#ifdef CONFIG_PM_SLEEP
+	rdrv->suspend	= ci_hdrc_host_suspend;
+	rdrv->resume	= ci_hdrc_host_resume;
+#endif
 	rdrv->irq	= host_irq;
 	rdrv->name	= "host";
 	ci->roles[CI_ROLE_HOST] = rdrv;
-- 
GitLab


From 235ffc17d0146d806f6ad8c094c24ff4878f2edb Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Thu, 13 Oct 2022 23:14:38 +0800
Subject: [PATCH 0369/4122] usb: chipidea: udc: add suspend/resume support for
 device controller

The controller's power may be powered off during system suspend. This
will add suspend/resume support when the controller suffers power lost.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Acked-by: Peter Chen <peter.chen@kernel.org>
Link: https://lore.kernel.org/r/20221013151442.3262951-5-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/udc.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 8c3e3a635ac2d..54c09245ad055 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -2181,6 +2181,34 @@ static void udc_id_switch_for_host(struct ci_hdrc *ci)
 				     ci->platdata->pins_default);
 }
 
+#ifdef CONFIG_PM_SLEEP
+static void udc_suspend(struct ci_hdrc *ci)
+{
+	/*
+	 * Set OP_ENDPTLISTADDR to be non-zero for
+	 * checking if controller resume from power lost
+	 * in non-host mode.
+	 */
+	if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0)
+		hw_write(ci, OP_ENDPTLISTADDR, ~0, ~0);
+}
+
+static void udc_resume(struct ci_hdrc *ci, bool power_lost)
+{
+	if (power_lost) {
+		if (ci->is_otg)
+			hw_write_otgsc(ci, OTGSC_BSVIS | OTGSC_BSVIE,
+					OTGSC_BSVIS | OTGSC_BSVIE);
+		if (ci->vbus_active)
+			usb_gadget_vbus_disconnect(&ci->gadget);
+	}
+
+	/* Restore value 0 if it was set for power lost check */
+	if (hw_read(ci, OP_ENDPTLISTADDR, ~0) == 0xFFFFFFFF)
+		hw_write(ci, OP_ENDPTLISTADDR, ~0, 0);
+}
+#endif
+
 /**
  * ci_hdrc_gadget_init - initialize device related bits
  * @ci: the controller
@@ -2201,6 +2229,10 @@ int ci_hdrc_gadget_init(struct ci_hdrc *ci)
 
 	rdrv->start	= udc_id_switch_for_device;
 	rdrv->stop	= udc_id_switch_for_host;
+#ifdef CONFIG_PM_SLEEP
+	rdrv->suspend	= udc_suspend;
+	rdrv->resume	= udc_resume;
+#endif
 	rdrv->irq	= udc_irq;
 	rdrv->name	= "gadget";
 
-- 
GitLab


From b332d6d5c804085ac26d2e7e1a953b59b49644f3 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Thu, 13 Oct 2022 23:14:39 +0800
Subject: [PATCH 0370/4122] usb: chipidea: usbmisc: group usbmisc operations
 for PM

As there maybe more APIs of usbmisc for suspend and resume, group
them into imx_usbmisc_suspend/resume. Besides, introduced .power_lost_check
API, so that proper resume operations can be performed in power lost case.

Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/20221013151442.3262951-6-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c |  49 ++++--------
 drivers/usb/chipidea/ci_hdrc_imx.h |   4 +-
 drivers/usb/chipidea/usbmisc_imx.c | 119 +++++++++++++++++++++--------
 3 files changed, 106 insertions(+), 66 deletions(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 9ffcecd3058c1..923f5c00a1d96 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -527,16 +527,19 @@ static void ci_hdrc_imx_shutdown(struct platform_device *pdev)
 	ci_hdrc_imx_remove(pdev);
 }
 
-static int __maybe_unused imx_controller_suspend(struct device *dev)
+static int __maybe_unused imx_controller_suspend(struct device *dev,
+						 pm_message_t msg)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 	int ret = 0;
 
 	dev_dbg(dev, "at %s\n", __func__);
 
-	ret = imx_usbmisc_hsic_set_clk(data->usbmisc_data, false);
+	ret = imx_usbmisc_suspend(data->usbmisc_data,
+				  PMSG_IS_AUTO(msg) || device_may_wakeup(dev));
 	if (ret) {
-		dev_err(dev, "usbmisc hsic_set_clk failed, ret=%d\n", ret);
+		dev_err(dev,
+			"usbmisc suspend failed, ret=%d\n", ret);
 		return ret;
 	}
 
@@ -549,7 +552,8 @@ static int __maybe_unused imx_controller_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused imx_controller_resume(struct device *dev)
+static int __maybe_unused imx_controller_resume(struct device *dev,
+						pm_message_t msg)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
 	int ret = 0;
@@ -570,22 +574,15 @@ static int __maybe_unused imx_controller_resume(struct device *dev)
 
 	data->in_lpm = false;
 
-	ret = imx_usbmisc_set_wakeup(data->usbmisc_data, false);
+	ret = imx_usbmisc_resume(data->usbmisc_data,
+				 PMSG_IS_AUTO(msg) || device_may_wakeup(dev));
 	if (ret) {
-		dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n", ret);
+		dev_err(dev, "usbmisc resume failed, ret=%d\n", ret);
 		goto clk_disable;
 	}
 
-	ret = imx_usbmisc_hsic_set_clk(data->usbmisc_data, true);
-	if (ret) {
-		dev_err(dev, "usbmisc hsic_set_clk failed, ret=%d\n", ret);
-		goto hsic_set_clk_fail;
-	}
-
 	return 0;
 
-hsic_set_clk_fail:
-	imx_usbmisc_set_wakeup(data->usbmisc_data, true);
 clk_disable:
 	imx_disable_unprepare_clks(dev);
 	return ret;
@@ -601,16 +598,7 @@ static int __maybe_unused ci_hdrc_imx_suspend(struct device *dev)
 		/* The core's suspend doesn't run */
 		return 0;
 
-	if (device_may_wakeup(dev)) {
-		ret = imx_usbmisc_set_wakeup(data->usbmisc_data, true);
-		if (ret) {
-			dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n",
-					ret);
-			return ret;
-		}
-	}
-
-	ret = imx_controller_suspend(dev);
+	ret = imx_controller_suspend(dev, PMSG_SUSPEND);
 	if (ret)
 		return ret;
 
@@ -624,7 +612,7 @@ static int __maybe_unused ci_hdrc_imx_resume(struct device *dev)
 	int ret;
 
 	pinctrl_pm_select_default_state(dev);
-	ret = imx_controller_resume(dev);
+	ret = imx_controller_resume(dev, PMSG_RESUME);
 	if (!ret && data->supports_runtime_pm) {
 		pm_runtime_disable(dev);
 		pm_runtime_set_active(dev);
@@ -637,25 +625,18 @@ static int __maybe_unused ci_hdrc_imx_resume(struct device *dev)
 static int __maybe_unused ci_hdrc_imx_runtime_suspend(struct device *dev)
 {
 	struct ci_hdrc_imx_data *data = dev_get_drvdata(dev);
-	int ret;
 
 	if (data->in_lpm) {
 		WARN_ON(1);
 		return 0;
 	}
 
-	ret = imx_usbmisc_set_wakeup(data->usbmisc_data, true);
-	if (ret) {
-		dev_err(dev, "usbmisc set_wakeup failed, ret=%d\n", ret);
-		return ret;
-	}
-
-	return imx_controller_suspend(dev);
+	return imx_controller_suspend(dev, PMSG_AUTO_SUSPEND);
 }
 
 static int __maybe_unused ci_hdrc_imx_runtime_resume(struct device *dev)
 {
-	return imx_controller_resume(dev);
+	return imx_controller_resume(dev, PMSG_AUTO_RESUME);
 }
 
 static const struct dev_pm_ops ci_hdrc_imx_pm_ops = {
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.h b/drivers/usb/chipidea/ci_hdrc_imx.h
index 7daccb9c5006a..7135b9a5d9138 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.h
+++ b/drivers/usb/chipidea/ci_hdrc_imx.h
@@ -32,9 +32,9 @@ struct imx_usbmisc_data {
 
 int imx_usbmisc_init(struct imx_usbmisc_data *data);
 int imx_usbmisc_init_post(struct imx_usbmisc_data *data);
-int imx_usbmisc_set_wakeup(struct imx_usbmisc_data *data, bool enabled);
 int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data);
-int imx_usbmisc_hsic_set_clk(struct imx_usbmisc_data *data, bool on);
 int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect);
+int imx_usbmisc_suspend(struct imx_usbmisc_data *data, bool wakeup);
+int imx_usbmisc_resume(struct imx_usbmisc_data *data, bool wakeup);
 
 #endif /* __DRIVER_USB_CHIPIDEA_CI_HDRC_IMX_H */
diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index bac0f5458cab9..aa815f6d3fe9a 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -150,6 +150,8 @@ struct usbmisc_ops {
 	int (*hsic_set_clk)(struct imx_usbmisc_data *data, bool enabled);
 	/* usb charger detection */
 	int (*charger_detection)(struct imx_usbmisc_data *data);
+	/* It's called when system resume from usb power lost */
+	int (*power_lost_check)(struct imx_usbmisc_data *data);
 };
 
 struct imx_usbmisc {
@@ -1009,30 +1011,29 @@ EXPORT_SYMBOL_GPL(imx_usbmisc_init);
 int imx_usbmisc_init_post(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc;
+	int ret = 0;
 
 	if (!data)
 		return 0;
 
 	usbmisc = dev_get_drvdata(data->dev);
-	if (!usbmisc->ops->post)
-		return 0;
-	return usbmisc->ops->post(data);
-}
-EXPORT_SYMBOL_GPL(imx_usbmisc_init_post);
-
-int imx_usbmisc_set_wakeup(struct imx_usbmisc_data *data, bool enabled)
-{
-	struct imx_usbmisc *usbmisc;
+	if (usbmisc->ops->post)
+		ret = usbmisc->ops->post(data);
+	if (ret) {
+		dev_err(data->dev, "post init failed, ret=%d\n", ret);
+		return ret;
+	}
 
-	if (!data)
-		return 0;
+	if (usbmisc->ops->set_wakeup)
+		ret = usbmisc->ops->set_wakeup(data, false);
+	if (ret) {
+		dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret);
+		return ret;
+	}
 
-	usbmisc = dev_get_drvdata(data->dev);
-	if (!usbmisc->ops->set_wakeup)
-		return 0;
-	return usbmisc->ops->set_wakeup(data, enabled);
+	return 0;
 }
-EXPORT_SYMBOL_GPL(imx_usbmisc_set_wakeup);
+EXPORT_SYMBOL_GPL(imx_usbmisc_init_post);
 
 int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data)
 {
@@ -1048,20 +1049,6 @@ int imx_usbmisc_hsic_set_connect(struct imx_usbmisc_data *data)
 }
 EXPORT_SYMBOL_GPL(imx_usbmisc_hsic_set_connect);
 
-int imx_usbmisc_hsic_set_clk(struct imx_usbmisc_data *data, bool on)
-{
-	struct imx_usbmisc *usbmisc;
-
-	if (!data)
-		return 0;
-
-	usbmisc = dev_get_drvdata(data->dev);
-	if (!usbmisc->ops->hsic_set_clk || !data->hsic)
-		return 0;
-	return usbmisc->ops->hsic_set_clk(data, on);
-}
-EXPORT_SYMBOL_GPL(imx_usbmisc_hsic_set_clk);
-
 int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect)
 {
 	struct imx_usbmisc *usbmisc;
@@ -1094,6 +1081,78 @@ int imx_usbmisc_charger_detection(struct imx_usbmisc_data *data, bool connect)
 }
 EXPORT_SYMBOL_GPL(imx_usbmisc_charger_detection);
 
+int imx_usbmisc_suspend(struct imx_usbmisc_data *data, bool wakeup)
+{
+	struct imx_usbmisc *usbmisc;
+	int ret = 0;
+
+	if (!data)
+		return 0;
+
+	usbmisc = dev_get_drvdata(data->dev);
+
+	if (wakeup && usbmisc->ops->set_wakeup)
+		ret = usbmisc->ops->set_wakeup(data, true);
+	if (ret) {
+		dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret);
+		return ret;
+	}
+
+	if (usbmisc->ops->hsic_set_clk && data->hsic)
+		ret = usbmisc->ops->hsic_set_clk(data, false);
+	if (ret) {
+		dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret);
+		return ret;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(imx_usbmisc_suspend);
+
+int imx_usbmisc_resume(struct imx_usbmisc_data *data, bool wakeup)
+{
+	struct imx_usbmisc *usbmisc;
+	int ret = 0;
+
+	if (!data)
+		return 0;
+
+	usbmisc = dev_get_drvdata(data->dev);
+
+	if (usbmisc->ops->power_lost_check)
+		ret = usbmisc->ops->power_lost_check(data);
+	if (ret > 0) {
+		/* re-init if resume from power lost */
+		ret = imx_usbmisc_init(data);
+		if (ret) {
+			dev_err(data->dev, "re-init failed, ret=%d\n", ret);
+			return ret;
+		}
+	}
+
+	if (wakeup && usbmisc->ops->set_wakeup)
+		ret = usbmisc->ops->set_wakeup(data, false);
+	if (ret) {
+		dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret);
+		return ret;
+	}
+
+	if (usbmisc->ops->hsic_set_clk && data->hsic)
+		ret = usbmisc->ops->hsic_set_clk(data, true);
+	if (ret) {
+		dev_err(data->dev, "set_wakeup failed, ret=%d\n", ret);
+		goto hsic_set_clk_fail;
+	}
+
+	return 0;
+
+hsic_set_clk_fail:
+	if (wakeup && usbmisc->ops->set_wakeup)
+		usbmisc->ops->set_wakeup(data, true);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(imx_usbmisc_resume);
+
 static const struct of_device_id usbmisc_imx_dt_ids[] = {
 	{
 		.compatible = "fsl,imx25-usbmisc",
-- 
GitLab


From 04ff4d31af40e268c6cde78814be465a6412212d Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Thu, 13 Oct 2022 23:14:40 +0800
Subject: [PATCH 0371/4122] usb: chipidea: usbmisc: add power lost check for
 imx6sx

imx6sx mega off can shutdown domain power supply if none of peripheral
in this domain is registered as wakeup source, this patch add related
codes to check if power is lost.

Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/20221013151442.3262951-7-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/usbmisc_imx.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index aa815f6d3fe9a..7bfbfc83cfe3c 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -939,6 +939,25 @@ static int usbmisc_imx7ulp_init(struct imx_usbmisc_data *data)
 	return 0;
 }
 
+static int usbmisc_imx6sx_power_lost_check(struct imx_usbmisc_data *data)
+{
+	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&usbmisc->lock, flags);
+	val = readl(usbmisc->base + data->index * 4);
+	spin_unlock_irqrestore(&usbmisc->lock, flags);
+	/*
+	 * Here use a power on reset value to judge
+	 * if the controller experienced a power lost
+	 */
+	if (val == 0x30001000)
+		return 1;
+	else
+		return 0;
+}
+
 static const struct usbmisc_ops imx25_usbmisc_ops = {
 	.init = usbmisc_imx25_init,
 	.post = usbmisc_imx25_post,
@@ -972,6 +991,7 @@ static const struct usbmisc_ops imx6sx_usbmisc_ops = {
 	.init = usbmisc_imx6sx_init,
 	.hsic_set_connect = usbmisc_imx6_hsic_set_connect,
 	.hsic_set_clk = usbmisc_imx6_hsic_set_clk,
+	.power_lost_check = usbmisc_imx6sx_power_lost_check,
 };
 
 static const struct usbmisc_ops imx7d_usbmisc_ops = {
-- 
GitLab


From 604ceaa9e9fc223c2cc8d6cf0fc02022a3d14a68 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Thu, 13 Oct 2022 23:14:41 +0800
Subject: [PATCH 0372/4122] usb: chipidea: usbmisc: add power lost check for
 imx7d

imx7d can shutdown domain power supply if none of peripheral in this
domain is registered as wakeup source, this patch add related codes to
check if power is lost.

Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/20221013151442.3262951-8-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/usbmisc_imx.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index 7bfbfc83cfe3c..cc17dcd97856c 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -939,6 +939,25 @@ static int usbmisc_imx7ulp_init(struct imx_usbmisc_data *data)
 	return 0;
 }
 
+static int usbmisc_imx7d_power_lost_check(struct imx_usbmisc_data *data)
+{
+	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&usbmisc->lock, flags);
+	val = readl(usbmisc->base);
+	spin_unlock_irqrestore(&usbmisc->lock, flags);
+	/*
+	 * Here use a power on reset value to judge
+	 * if the controller experienced a power lost
+	 */
+	if (val == 0x30001000)
+		return 1;
+	else
+		return 0;
+}
+
 static int usbmisc_imx6sx_power_lost_check(struct imx_usbmisc_data *data)
 {
 	struct imx_usbmisc *usbmisc = dev_get_drvdata(data->dev);
@@ -998,6 +1017,7 @@ static const struct usbmisc_ops imx7d_usbmisc_ops = {
 	.init = usbmisc_imx7d_init,
 	.set_wakeup = usbmisc_imx7d_set_wakeup,
 	.charger_detection = imx7d_charger_detection,
+	.power_lost_check = usbmisc_imx7d_power_lost_check,
 };
 
 static const struct usbmisc_ops imx7ulp_usbmisc_ops = {
-- 
GitLab


From 8127cac0f393abaddf5747bcc7e7ccf6668117fe Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Thu, 13 Oct 2022 23:14:42 +0800
Subject: [PATCH 0373/4122] usb: chipidea: usbmisc: add power lost check for
 imx7ulp

imx7ulp can shutdown domain power supply if none of peripheral in this
domain is registered as wakeup source, this patch add related power lost
check API.

Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/20221013151442.3262951-9-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/usbmisc_imx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/chipidea/usbmisc_imx.c b/drivers/usb/chipidea/usbmisc_imx.c
index cc17dcd97856c..acdb13316cd0d 100644
--- a/drivers/usb/chipidea/usbmisc_imx.c
+++ b/drivers/usb/chipidea/usbmisc_imx.c
@@ -1025,6 +1025,7 @@ static const struct usbmisc_ops imx7ulp_usbmisc_ops = {
 	.set_wakeup = usbmisc_imx7d_set_wakeup,
 	.hsic_set_connect = usbmisc_imx6_hsic_set_connect,
 	.hsic_set_clk = usbmisc_imx6_hsic_set_clk,
+	.power_lost_check = usbmisc_imx7d_power_lost_check,
 };
 
 static inline bool is_imx53_usbmisc(struct imx_usbmisc_data *data)
-- 
GitLab


From bf0563502ecdbb97efb65790b37dd50afbe2d9b2 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sat, 22 Oct 2022 01:03:42 +0530
Subject: [PATCH 0374/4122] staging: wlan-ng: remove commented debug printk
 messages

printk messages are added for program flow tracing and are left
commented. These commented log messages should be removed as they
are no more useful for program execution.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y1L0FiKvrM9jjZG9@debian-BULLSEYE-live-builder-AMD64
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211netdev.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211netdev.c b/drivers/staging/wlan-ng/p80211netdev.c
index e04fc666d218e..6bef419e8ad0c 100644
--- a/drivers/staging/wlan-ng/p80211netdev.c
+++ b/drivers/staging/wlan-ng/p80211netdev.c
@@ -881,55 +881,42 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc)
 		wlandev->rx.mgmt++;
 		switch (fstype) {
 		case WLAN_FSTYPE_ASSOCREQ:
-			/* printk("assocreq"); */
 			wlandev->rx.assocreq++;
 			break;
 		case WLAN_FSTYPE_ASSOCRESP:
-			/* printk("assocresp"); */
 			wlandev->rx.assocresp++;
 			break;
 		case WLAN_FSTYPE_REASSOCREQ:
-			/* printk("reassocreq"); */
 			wlandev->rx.reassocreq++;
 			break;
 		case WLAN_FSTYPE_REASSOCRESP:
-			/* printk("reassocresp"); */
 			wlandev->rx.reassocresp++;
 			break;
 		case WLAN_FSTYPE_PROBEREQ:
-			/* printk("probereq"); */
 			wlandev->rx.probereq++;
 			break;
 		case WLAN_FSTYPE_PROBERESP:
-			/* printk("proberesp"); */
 			wlandev->rx.proberesp++;
 			break;
 		case WLAN_FSTYPE_BEACON:
-			/* printk("beacon"); */
 			wlandev->rx.beacon++;
 			break;
 		case WLAN_FSTYPE_ATIM:
-			/* printk("atim"); */
 			wlandev->rx.atim++;
 			break;
 		case WLAN_FSTYPE_DISASSOC:
-			/* printk("disassoc"); */
 			wlandev->rx.disassoc++;
 			break;
 		case WLAN_FSTYPE_AUTHEN:
-			/* printk("authen"); */
 			wlandev->rx.authen++;
 			break;
 		case WLAN_FSTYPE_DEAUTHEN:
-			/* printk("deauthen"); */
 			wlandev->rx.deauthen++;
 			break;
 		default:
-			/* printk("unknown"); */
 			wlandev->rx.mgmt_unknown++;
 			break;
 		}
-		/* printk("\n"); */
 		drop = 2;
 		break;
 
@@ -943,35 +930,27 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc)
 		wlandev->rx.ctl++;
 		switch (fstype) {
 		case WLAN_FSTYPE_PSPOLL:
-			/* printk("pspoll"); */
 			wlandev->rx.pspoll++;
 			break;
 		case WLAN_FSTYPE_RTS:
-			/* printk("rts"); */
 			wlandev->rx.rts++;
 			break;
 		case WLAN_FSTYPE_CTS:
-			/* printk("cts"); */
 			wlandev->rx.cts++;
 			break;
 		case WLAN_FSTYPE_ACK:
-			/* printk("ack"); */
 			wlandev->rx.ack++;
 			break;
 		case WLAN_FSTYPE_CFEND:
-			/* printk("cfend"); */
 			wlandev->rx.cfend++;
 			break;
 		case WLAN_FSTYPE_CFENDCFACK:
-			/* printk("cfendcfack"); */
 			wlandev->rx.cfendcfack++;
 			break;
 		default:
-			/* printk("unknown"); */
 			wlandev->rx.ctl_unknown++;
 			break;
 		}
-		/* printk("\n"); */
 		drop = 2;
 		break;
 
@@ -1007,7 +986,6 @@ static int p80211_rx_typedrop(struct wlandevice *wlandev, u16 fc)
 			wlandev->rx.cfack_cfpoll++;
 			break;
 		default:
-			/* printk("unknown"); */
 			wlandev->rx.data_unknown++;
 			break;
 		}
-- 
GitLab


From a6f100aa64831f64496e26d55c6337cc7ceb8bfc Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Sun, 23 Oct 2022 16:41:51 +0700
Subject: [PATCH 0375/4122] Staging: rtl8192e: rtl819x_HTProc: fixed missing
 blank space

Added a missing blank space as per the Linux kernel coding-style
regulations. The issue was flagged by the checkpatch script as a
warning.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/e6635103e3cf2426220767955b99d2e2b62a7329.1666502177.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_HTProc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index ef3dca51cf99b..b763cf0ba3567 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -70,6 +70,7 @@ static u8 LINKSYS_MARVELL_4400N[3] = {0x00, 0x14, 0xa4};
 void HTUpdateDefaultSetting(struct rtllib_device *ieee)
 {
 	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+
 	pHTInfo->bRegShortGI20MHz = 1;
 	pHTInfo->bRegShortGI40MHz = 1;
 
-- 
GitLab


From 354989f7dc1e54abcb3c0e7b47444e03cb060504 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Sun, 23 Oct 2022 16:44:13 +0700
Subject: [PATCH 0376/4122] Staging: rtl8192e: rtl819x_HTProc: fixed alignment
 matching open parenthesis

Aligned multiple statements to match open parenthesis as per Linux kernel
coding-style regulations. The issues were flagged by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/98c9e764a4447ab550e5615c48f6a98bf6656b0d.1666502177.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_HTProc.c | 25 ++++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index b763cf0ba3567..a701be8c29237 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -152,8 +152,8 @@ bool IsHTHalfNmodeAPs(struct rtllib_device *ieee)
 	    (net->ralink_cap_exist))
 		retValue = true;
 	else if (!memcmp(net->bssid, UNKNOWN_BORADCOM, 3) ||
-		!memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) ||
-		!memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3) ||
+		 !memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) ||
+		 !memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3) ||
 		(net->broadcom_cap_exist))
 		retValue = true;
 	else if (net->bssht.bd_rt2rt_aggregation)
@@ -540,7 +540,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 			     pPeerHTCap, sizeof(struct ht_capab_ele));
 #endif
 	HTSetConnectBwMode(ieee, (enum ht_channel_width)(pPeerHTCap->ChlWidth),
-			  (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset));
+			   (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset));
 	pHTInfo->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ?
 				 true : false);
 
@@ -566,9 +566,9 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 
 	pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable;
 	if (ieee->rtllib_ap_sec_type &&
-	   (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) {
+	    (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) {
 		if ((pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) ||
-				(pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN))
+		    (pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN))
 			pHTInfo->bCurrentAMPDUEnable = false;
 	}
 
@@ -617,7 +617,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 	else
 		pMcsFilter = MCS_FILTER_ALL;
 	ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee,
-				   ieee->dot11HTOperationalRateSet, pMcsFilter);
+						       ieee->dot11HTOperationalRateSet,
+						       pMcsFilter);
 	ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
 	pHTInfo->current_op_mode = pPeerHTInfo->OptMode;
@@ -644,13 +645,13 @@ void HTInitializeHTInfo(struct rtllib_device *ieee)
 	pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor;
 
 	memset((void *)(&(pHTInfo->SelfHTCap)), 0,
-		sizeof(pHTInfo->SelfHTCap));
+	       sizeof(pHTInfo->SelfHTCap));
 	memset((void *)(&(pHTInfo->SelfHTInfo)), 0,
-		sizeof(pHTInfo->SelfHTInfo));
+	       sizeof(pHTInfo->SelfHTInfo));
 	memset((void *)(&(pHTInfo->PeerHTCapBuf)), 0,
-		sizeof(pHTInfo->PeerHTCapBuf));
+	       sizeof(pHTInfo->PeerHTCapBuf));
 	memset((void *)(&(pHTInfo->PeerHTInfoBuf)), 0,
-		sizeof(pHTInfo->PeerHTInfoBuf));
+	       sizeof(pHTInfo->PeerHTInfoBuf));
 
 	pHTInfo->sw_bw_in_progress = false;
 
@@ -803,8 +804,8 @@ void HTUseDefaultSetting(struct rtllib_device *ieee)
 		HTFilterMCSRate(ieee, ieee->Regdot11TxHTOperationalRateSet,
 				ieee->dot11HTOperationalRateSet);
 		ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee,
-					   ieee->dot11HTOperationalRateSet,
-					   MCS_FILTER_ALL);
+							       ieee->dot11HTOperationalRateSet,
+							       MCS_FILTER_ALL);
 		ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
 	} else {
-- 
GitLab


From 4a1fc310e739d63d2d28952e358b33adde0d87e4 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Sun, 23 Oct 2022 16:45:37 +0700
Subject: [PATCH 0377/4122] Staging: rtl8192e: rtl819x_HTProc: fixed
 unnecessary parentheses

Fixed multiple unnecessary parentheses as per the Linux kernel
coding-style regulations. The issues were flagged by the
checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/d2168b90726dda2f02279a3483b53b8d9b34cb30.1666502177.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_HTProc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index a701be8c29237..62aa8e893c34c 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -285,7 +285,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap,
 		u8	EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33};
 
 		memcpy(posHTCap, EWC11NHTCap, sizeof(EWC11NHTCap));
-		pCapELE = (struct ht_capab_ele *)&(posHTCap[4]);
+		pCapELE = (struct ht_capab_ele *)&posHTCap[4];
 		*len = 30 + 2;
 	} else {
 		pCapELE = (struct ht_capab_ele *)posHTCap;
@@ -644,13 +644,13 @@ void HTInitializeHTInfo(struct rtllib_device *ieee)
 	pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density;
 	pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor;
 
-	memset((void *)(&(pHTInfo->SelfHTCap)), 0,
+	memset((void *)(&pHTInfo->SelfHTCap), 0,
 	       sizeof(pHTInfo->SelfHTCap));
-	memset((void *)(&(pHTInfo->SelfHTInfo)), 0,
+	memset((void *)(&pHTInfo->SelfHTInfo), 0,
 	       sizeof(pHTInfo->SelfHTInfo));
-	memset((void *)(&(pHTInfo->PeerHTCapBuf)), 0,
+	memset((void *)(&pHTInfo->PeerHTCapBuf), 0,
 	       sizeof(pHTInfo->PeerHTCapBuf));
-	memset((void *)(&(pHTInfo->PeerHTInfoBuf)), 0,
+	memset((void *)(&pHTInfo->PeerHTInfoBuf), 0,
 	       sizeof(pHTInfo->PeerHTInfoBuf));
 
 	pHTInfo->sw_bw_in_progress = false;
@@ -666,7 +666,7 @@ void HTInitializeHTInfo(struct rtllib_device *ieee)
 	pHTInfo->iot_ra_func = 0;
 
 	{
-		u8 *RegHTSuppRateSets = &(ieee->RegHTSuppRateSet[0]);
+		u8 *RegHTSuppRateSets = &ieee->RegHTSuppRateSet[0];
 
 		RegHTSuppRateSets[0] = 0xFF;
 		RegHTSuppRateSets[1] = 0xFF;
-- 
GitLab


From d0b9f28f0da2808b339bb290b539518e69e48ac9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 21 Oct 2022 18:26:11 +0100
Subject: [PATCH 0378/4122] RDMA/qib: Remove not-used variable n

The variable n being incremented but it is never referenced,
it is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221021172611.26763-1-colin.i.king@gmail.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/qib/qib_tx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_tx.c b/drivers/infiniband/hw/qib/qib_tx.c
index 6a8148851f21d..1325110237cd9 100644
--- a/drivers/infiniband/hw/qib/qib_tx.c
+++ b/drivers/infiniband/hw/qib/qib_tx.c
@@ -82,7 +82,6 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
 	struct qib_devdata *dd = rcd->dd;
 	unsigned i;
 	unsigned last;
-	unsigned n = 0;
 
 	last = rcd->pio_base + rcd->piocnt;
 	/*
@@ -102,10 +101,8 @@ int qib_disarm_piobufs_ifneeded(struct qib_ctxtdata *rcd)
 	}
 	spin_lock_irq(&dd->pioavail_lock);
 	for (i = rcd->pio_base; i < last; i++) {
-		if (__test_and_clear_bit(i, dd->pio_need_disarm)) {
-			n++;
+		if (__test_and_clear_bit(i, dd->pio_need_disarm))
 			dd->f_sendctrl(rcd->ppd, QIB_SENDCTRL_DISARM_BUF(i));
-		}
 	}
 	spin_unlock_irq(&dd->pioavail_lock);
 	return 0;
-- 
GitLab


From 5dc1b37d75e7136588480712e7173169b3d4164f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 21 Oct 2022 18:35:04 +0100
Subject: [PATCH 0379/4122] RDMA/qib: Remove not-used variable freeze_cnt

The variable freeze_cnt being incremented but it is never referenced,
it is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221021173504.27546-1-colin.i.king@gmail.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/qib/qib_iba6120.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/qib/qib_iba6120.c b/drivers/infiniband/hw/qib/qib_iba6120.c
index aea571943768b..23a81edf3f7aa 100644
--- a/drivers/infiniband/hw/qib/qib_iba6120.c
+++ b/drivers/infiniband/hw/qib/qib_iba6120.c
@@ -799,12 +799,9 @@ static void qib_handle_6120_hwerrors(struct qib_devdata *dd, char *msg,
 			hwerrs &= ~TXE_PIO_PARITY;
 		}
 
-		if (!hwerrs) {
-			static u32 freeze_cnt;
-
-			freeze_cnt++;
+		if (!hwerrs)
 			qib_6120_clear_freeze(dd);
-		} else
+		else
 			isfatal = 1;
 	}
 
-- 
GitLab


From 2d5206c4629dfe74a51bb9c54758095139f5d01d Mon Sep 17 00:00:00 2001
From: wangjianli <wangjianli@cdjrlc.com>
Date: Sat, 22 Oct 2022 13:59:05 +0800
Subject: [PATCH 0380/4122] RDMA/qib: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: wangjianli <wangjianli@cdjrlc.com>
Link: https://lore.kernel.org/r/20221022055905.49176-1-wangjianli@cdjrlc.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/qib/qib_user_sdma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_user_sdma.c b/drivers/infiniband/hw/qib/qib_user_sdma.c
index bf2f30d67949d..9fe03d6ffac1a 100644
--- a/drivers/infiniband/hw/qib/qib_user_sdma.c
+++ b/drivers/infiniband/hw/qib/qib_user_sdma.c
@@ -851,7 +851,7 @@ static int qib_user_sdma_queue_pkts(const struct qib_devdata *dd,
 		}
 
 		/*
-		 * This assignment is a bit strange.  it's because the
+		 * This assignment is a bit strange.  it's because
 		 * the pbc counts the number of 32 bit words in the full
 		 * packet _except_ the first word of the pbc itself...
 		 */
-- 
GitLab


From c4bb733234b0ffd939030bb592b691ac19519455 Mon Sep 17 00:00:00 2001
From: wangjianli <wangjianli@cdjrlc.com>
Date: Sat, 22 Oct 2022 14:00:30 +0800
Subject: [PATCH 0381/4122] RDMA/core: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: wangjianli <wangjianli@cdjrlc.com>
Link: https://lore.kernel.org/r/20221022060030.50900-1-wangjianli@cdjrlc.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c
index 4084d05a45102..2e91d88793265 100644
--- a/drivers/infiniband/core/cache.c
+++ b/drivers/infiniband/core/cache.c
@@ -1422,7 +1422,7 @@ int rdma_read_gid_l2_fields(const struct ib_gid_attr *attr,
 			*vlan_id = vlan_dev_vlan_id(ndev);
 		} else {
 			/* If the netdev is upper device and if it's lower
-			 * device is vlan device, consider vlan id of the
+			 * device is vlan device, consider vlan id of
 			 * the lower vlan device for this gid entry.
 			 */
 			netdev_walk_all_lower_dev_rcu(attr->ndev,
-- 
GitLab


From 65bf03427cee48258a227431577dc56bf70461f3 Mon Sep 17 00:00:00 2001
From: wangjianli <wangjianli@cdjrlc.com>
Date: Sat, 22 Oct 2022 13:52:57 +0800
Subject: [PATCH 0382/4122] RDMA/qedr: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: wangjianli <wangjianli@cdjrlc.com>
Link: https://lore.kernel.org/r/20221022055257.42905-1-wangjianli@cdjrlc.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/qedr/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c
index 5152f10d2e6de..5e7069b76d465 100644
--- a/drivers/infiniband/hw/qedr/main.c
+++ b/drivers/infiniband/hw/qedr/main.c
@@ -472,7 +472,7 @@ static irqreturn_t qedr_irq_handler(int irq, void *handle)
 		/* The CQ's CNQ notification counter is checked before
 		 * destroying the CQ in a busy-wait loop that waits for all of
 		 * the CQ's CNQ interrupts to be processed. It is increased
-		 * here, only after the completion handler, to ensure that the
+		 * here, only after the completion handler, to ensure that
 		 * the handler is not running when the CQ is destroyed.
 		 */
 		cq->cnq_notif++;
-- 
GitLab


From 71d236399160ad9beaae7267b93d2d487e8f19a0 Mon Sep 17 00:00:00 2001
From: "yangx.jy@fujitsu.com" <yangx.jy@fujitsu.com>
Date: Fri, 21 Oct 2022 13:45:17 +0000
Subject: [PATCH 0383/4122] RDMA/rxe: Remove the member 'type' of struct rxe_mr

The member 'type' is included in both struct rxe_mr and struct ib_mr
so remove the duplicate one of struct rxe_mr.

Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Link: https://lore.kernel.org/r/20221021134513.17730-1-yangx.jy@fujitsu.com
Reviewed-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/rxe/rxe_mr.c    | 16 ++++++++--------
 drivers/infiniband/sw/rxe/rxe_verbs.h |  1 -
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 502e9ada99b30..d4f10c2d1aa79 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -26,7 +26,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
 {
 
 
-	switch (mr->type) {
+	switch (mr->ibmr.type) {
 	case IB_MR_TYPE_DMA:
 		return 0;
 
@@ -39,7 +39,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
 
 	default:
 		pr_warn("%s: mr type (%d) not supported\n",
-			__func__, mr->type);
+			__func__, mr->ibmr.type);
 		return -EFAULT;
 	}
 }
@@ -109,7 +109,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
 
 	mr->access = access;
 	mr->state = RXE_MR_STATE_VALID;
-	mr->type = IB_MR_TYPE_DMA;
+	mr->ibmr.type = IB_MR_TYPE_DMA;
 }
 
 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
@@ -178,7 +178,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 	mr->access = access;
 	mr->offset = ib_umem_offset(umem);
 	mr->state = RXE_MR_STATE_VALID;
-	mr->type = IB_MR_TYPE_USER;
+	mr->ibmr.type = IB_MR_TYPE_USER;
 
 	return 0;
 
@@ -205,7 +205,7 @@ int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
 
 	mr->max_buf = max_pages;
 	mr->state = RXE_MR_STATE_FREE;
-	mr->type = IB_MR_TYPE_MEM_REG;
+	mr->ibmr.type = IB_MR_TYPE_MEM_REG;
 
 	return 0;
 
@@ -304,7 +304,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 	if (length == 0)
 		return 0;
 
-	if (mr->type == IB_MR_TYPE_DMA) {
+	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
 		u8 *src, *dest;
 
 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
@@ -547,8 +547,8 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
 		goto err_drop_ref;
 	}
 
-	if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
-		pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
+	if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
+		pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type);
 		ret = -EINVAL;
 		goto err_drop_ref;
 	}
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 5f5cbfcb35695..22a299b0a9f0a 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -304,7 +304,6 @@ struct rxe_mr {
 	u32			lkey;
 	u32			rkey;
 	enum rxe_mr_state	state;
-	enum ib_mr_type		type;
 	u32			offset;
 	int			access;
 
-- 
GitLab


From 894c792e3e24c2c15d8aac15aa89ec144468e1b0 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Thu, 13 Oct 2022 14:46:36 -0700
Subject: [PATCH 0384/4122] MAINTAINERS: git://github -> https://github.com for
 terrelln

Github deprecated the git:// links about a year ago, so let's move to
the https:// URLs instead.

Reported-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index e04d944005ba8..4a2b7a9325dc2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -22800,7 +22800,7 @@ ZSTD
 M:	Nick Terrell <terrelln@fb.com>
 S:	Maintained
 B:	https://github.com/facebook/zstd/issues
-T:	git git://github.com/terrelln/linux.git
+T:	git https://github.com/terrelln/linux.git
 F:	include/linux/zstd*
 F:	lib/zstd/
 F:	lib/decompress_unzstd.c
-- 
GitLab


From 7486f5c6e7b197400678f1bb603ac9e4027fb830 Mon Sep 17 00:00:00 2001
From: Jilin Yuan <yuanjilin@cdjrlc.com>
Date: Fri, 2 Sep 2022 09:32:12 +0800
Subject: [PATCH 0385/4122] lib: zstd: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: Jilin Yuan <yuanjilin@cdjrlc.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/compress/zstd_compress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
index a4e916008b3a7..73fff4c60149d 100644
--- a/lib/zstd/compress/zstd_compress.c
+++ b/lib/zstd/compress/zstd_compress.c
@@ -4441,7 +4441,7 @@ static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
                                     size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
     size_t offsetBound;
     U32 windowSize = 1 << windowLog;
-    /* posInSrc represents the amount of data the the decoder would decode up to this point.
+    /* posInSrc represents the amount of data the decoder would decode up to this point.
      * As long as the amount of data decoded is less than or equal to window size, offsets may be
      * larger than the total length of output decoded in order to reference the dict, even larger than
      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
-- 
GitLab


From 19d7df98472851e1d2d11e00c177988d0f49683d Mon Sep 17 00:00:00 2001
From: Xin Gao <gaoxin@cdjrlc.com>
Date: Mon, 17 Oct 2022 15:18:59 -0700
Subject: [PATCH 0386/4122] lib: zstd: Fix comment typo

The double `when' is duplicated in line 999, remove one.

Signed-off-by: Xin Gao <gaoxin@cdjrlc.com>
Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/decompress/zstd_decompress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
index b4d81d84479ac..6928e85f9d193 100644
--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -996,7 +996,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
 size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx) { return dctx->expected; }
 
 /*
- * Similar to ZSTD_nextSrcSizeToDecompress(), but when when a block input can be streamed,
+ * Similar to ZSTD_nextSrcSizeToDecompress(), but when a block input can be streamed,
  * we allow taking a partial block as the input. Currently only raw uncompressed blocks can
  * be streamed.
  *
-- 
GitLab


From 4782c725c1538aa9ef894ae4a3938db40be7f02c Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Fri, 14 Oct 2022 14:47:04 -0700
Subject: [PATCH 0387/4122] zstd: Move zstd-common module exports to
 zstd_common_module.c

The zstd codebase is imported from the upstream zstd repo, and is over-written on
every update. Upstream keeps the kernel specific code separate from the main
library. So the module definition is moved into the zstd_common_module.c file.
This matches the pattern followed by the zstd-compress and zstd-decompress files.

I've done build and boot testing on x86-64, i386, and aarch64. I've
verified that zstd built both as modules and built-in build and boot.

Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 lib/zstd/Makefile                |  1 +
 lib/zstd/common/entropy_common.c |  4 ----
 lib/zstd/common/zstd_common.c    | 10 ----------
 lib/zstd/zstd_common_module.c    | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 33 insertions(+), 14 deletions(-)
 create mode 100644 lib/zstd/zstd_common_module.c

diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index 440bd0007ae20..20f08c644b71a 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -35,6 +35,7 @@ zstd_decompress-y := \
 		decompress/zstd_decompress_block.o \
 
 zstd_common-y := \
+		zstd_common_module.o \
 		common/debug.o \
 		common/entropy_common.o \
 		common/error_private.o \
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
index a311808c0d566..6353249de614c 100644
--- a/lib/zstd/common/entropy_common.c
+++ b/lib/zstd/common/entropy_common.c
@@ -15,7 +15,6 @@
 /* *************************************
 *  Dependencies
 ***************************************/
-#include <linux/module.h>
 #include "mem.h"
 #include "error_private.h"       /* ERR_*, ERROR */
 #define FSE_STATIC_LINKING_ONLY  /* FSE_MIN_TABLELOG */
@@ -240,7 +239,6 @@ size_t FSE_readNCount(
 {
     return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
 }
-EXPORT_SYMBOL_GPL(FSE_readNCount);
 
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
@@ -256,7 +254,6 @@ size_t HUF_readStats(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     U32 wksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
     return HUF_readStats_wksp(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, wksp, sizeof(wksp), /* bmi2 */ 0);
 }
-EXPORT_SYMBOL_GPL(HUF_readStats);
 
 FORCE_INLINE_TEMPLATE size_t
 HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
@@ -357,4 +354,3 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     (void)bmi2;
     return HUF_readStats_body_default(huffWeight, hwSize, rankStats, nbSymbolsPtr, tableLogPtr, src, srcSize, workSpace, wkspSize);
 }
-EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
diff --git a/lib/zstd/common/zstd_common.c b/lib/zstd/common/zstd_common.c
index 0f1f63be25d9f..3d7e35b309b5d 100644
--- a/lib/zstd/common/zstd_common.c
+++ b/lib/zstd/common/zstd_common.c
@@ -13,7 +13,6 @@
 /*-*************************************
 *  Dependencies
 ***************************************/
-#include <linux/module.h>
 #define ZSTD_DEPS_NEED_MALLOC
 #include "zstd_deps.h"   /* ZSTD_malloc, ZSTD_calloc, ZSTD_free, ZSTD_memset */
 #include "error_private.h"
@@ -36,17 +35,14 @@ const char* ZSTD_versionString(void) { return ZSTD_VERSION_STRING; }
  *  tells if a return value is an error code
  *  symbol is required for external callers */
 unsigned ZSTD_isError(size_t code) { return ERR_isError(code); }
-EXPORT_SYMBOL_GPL(ZSTD_isError);
 
 /*! ZSTD_getErrorName() :
  *  provides error code string from function result (useful for debugging) */
 const char* ZSTD_getErrorName(size_t code) { return ERR_getErrorName(code); }
-EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
 
 /*! ZSTD_getError() :
  *  convert a `size_t` function result into a proper ZSTD_errorCode enum */
 ZSTD_ErrorCode ZSTD_getErrorCode(size_t code) { return ERR_getErrorCode(code); }
-EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
 
 /*! ZSTD_getErrorString() :
  *  provides error code string from enum */
@@ -63,7 +59,6 @@ void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem)
         return customMem.customAlloc(customMem.opaque, size);
     return ZSTD_malloc(size);
 }
-EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
 
 void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
 {
@@ -76,7 +71,6 @@ void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem)
     }
     return ZSTD_calloc(1, size);
 }
-EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
 
 void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
 {
@@ -87,7 +81,3 @@ void ZSTD_customFree(void* ptr, ZSTD_customMem customMem)
             ZSTD_free(ptr);
     }
 }
-EXPORT_SYMBOL_GPL(ZSTD_customFree);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Zstd Common");
diff --git a/lib/zstd/zstd_common_module.c b/lib/zstd/zstd_common_module.c
new file mode 100644
index 0000000000000..22686e367e6f0
--- /dev/null
+++ b/lib/zstd/zstd_common_module.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0+ OR BSD-3-Clause
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#include <linux/module.h>
+
+#include "common/huf.h"
+#include "common/fse.h"
+#include "common/zstd_internal.h"
+
+// Export symbols shared by compress and decompress into a common module
+
+#undef ZSTD_isError   /* defined within zstd_internal.h */
+EXPORT_SYMBOL_GPL(FSE_readNCount);
+EXPORT_SYMBOL_GPL(HUF_readStats);
+EXPORT_SYMBOL_GPL(HUF_readStats_wksp);
+EXPORT_SYMBOL_GPL(ZSTD_isError);
+EXPORT_SYMBOL_GPL(ZSTD_getErrorName);
+EXPORT_SYMBOL_GPL(ZSTD_getErrorCode);
+EXPORT_SYMBOL_GPL(ZSTD_customMalloc);
+EXPORT_SYMBOL_GPL(ZSTD_customCalloc);
+EXPORT_SYMBOL_GPL(ZSTD_customFree);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Zstd Common");
-- 
GitLab


From 2aa14b1ab2c41a4fe41efae80d58bb77da91f19f Mon Sep 17 00:00:00 2001
From: Nick Terrell <terrelln@fb.com>
Date: Mon, 17 Oct 2022 13:32:37 -0700
Subject: [PATCH 0388/4122] zstd: import usptream v1.5.2

Updates the kernel's zstd library to v1.5.2, the latest zstd release.
The upstream tag it is updated to is `v1.5.2-kernel`, which contains
several cherry-picked commits on top of the v1.5.2 release which are
required for the kernel update. I will create this tag once the PR is
ready to merge, until then reference the temporary upstream branch
`v1.5.2-kernel-cherrypicks`.

I plan to submit this patch as part of the v6.2 merge window.

I've done basic build testing & testing on x86-64, i386, and aarch64.
I'm merging these patches into my `zstd-next` branch, which is pulled
into `linux-next` for further testing.

I've benchmarked BtrFS with zstd compression on a x86-64 machine, and
saw these results. Decompression speed is a small win across the board.
The lower compression levels 1-4 see both compression speed and
compression ratio wins. The higher compression levels see a small
compression speed loss and about neutral ratio. I expect the lower
compression levels to be used much more heavily than the high
compression levels, so this should be a net win.

Level	CTime	DTime	Ratio
1	-2.95%	-1.1%	-0.7%
3	-3.5%	-1.2%	-0.5%
5	+3.7%	-1.0%	+0.0%
7	+3.2%	-0.9%	+0.0%
9	-4.3%	-0.8%	+0.1%

Signed-off-by: Nick Terrell <terrelln@fb.com>
---
 include/linux/zstd_lib.h                      |  475 ++--
 lib/zstd/common/bitstream.h                   |    9 +
 lib/zstd/common/compiler.h                    |   67 +-
 lib/zstd/common/entropy_common.c              |    7 +-
 lib/zstd/common/error_private.h               |   81 +-
 lib/zstd/common/fse.h                         |    3 +-
 lib/zstd/common/fse_decompress.c              |    2 +-
 lib/zstd/common/huf.h                         |   46 +-
 lib/zstd/common/mem.h                         |    2 +
 lib/zstd/common/portability_macros.h          |   93 +
 lib/zstd/common/zstd_internal.h               |  175 +-
 lib/zstd/compress/clevels.h                   |  132 ++
 lib/zstd/compress/fse_compress.c              |   83 +-
 lib/zstd/compress/huf_compress.c              |  644 +++++-
 lib/zstd/compress/zstd_compress.c             | 1998 +++++++++++++----
 lib/zstd/compress/zstd_compress_internal.h    |  375 +++-
 lib/zstd/compress/zstd_compress_literals.c    |    9 +-
 lib/zstd/compress/zstd_compress_literals.h    |    4 +-
 lib/zstd/compress/zstd_compress_sequences.c   |   31 +-
 lib/zstd/compress/zstd_compress_superblock.c  |  295 +--
 lib/zstd/compress/zstd_cwksp.h                |  225 +-
 lib/zstd/compress/zstd_double_fast.c          |  413 +++-
 lib/zstd/compress/zstd_fast.c                 |  441 ++--
 lib/zstd/compress/zstd_lazy.c                 | 1352 ++++++++---
 lib/zstd/compress/zstd_lazy.h                 |   38 +
 lib/zstd/compress/zstd_ldm.c                  |   76 +-
 lib/zstd/compress/zstd_ldm.h                  |    1 +
 lib/zstd/compress/zstd_ldm_geartab.h          |    5 +-
 lib/zstd/compress/zstd_opt.c                  |  402 ++--
 lib/zstd/decompress/huf_decompress.c          |  912 ++++++--
 lib/zstd/decompress/zstd_decompress.c         |   78 +-
 lib/zstd/decompress/zstd_decompress_block.c   | 1022 +++++++--
 lib/zstd/decompress/zstd_decompress_block.h   |   10 +-
 .../decompress/zstd_decompress_internal.h     |   38 +-
 lib/zstd/decompress_sources.h                 |    6 +
 lib/zstd/zstd_compress_module.c               |    6 +-
 36 files changed, 6951 insertions(+), 2605 deletions(-)
 create mode 100644 lib/zstd/common/portability_macros.h
 create mode 100644 lib/zstd/compress/clevels.h

diff --git a/include/linux/zstd_lib.h b/include/linux/zstd_lib.h
index 6b91758b61af9..79d55465d5c1d 100644
--- a/include/linux/zstd_lib.h
+++ b/include/linux/zstd_lib.h
@@ -17,8 +17,16 @@
 
 
 /* =====   ZSTDLIB_API : control library symbols visibility   ===== */
-#define ZSTDLIB_VISIBILITY 
-#define ZSTDLIB_API ZSTDLIB_VISIBILITY
+#ifndef ZSTDLIB_VISIBLE
+#  if (__GNUC__ >= 4) && !defined(__MINGW32__)
+#    define ZSTDLIB_VISIBLE __attribute__ ((visibility ("default")))
+#    define ZSTDLIB_HIDDEN __attribute__ ((visibility ("hidden")))
+#  else
+#    define ZSTDLIB_VISIBLE
+#    define ZSTDLIB_HIDDEN
+#  endif
+#endif
+#define ZSTDLIB_API ZSTDLIB_VISIBLE
 
 
 /* *****************************************************************************
@@ -56,8 +64,8 @@
 
 /*------   Version   ------*/
 #define ZSTD_VERSION_MAJOR    1
-#define ZSTD_VERSION_MINOR    4
-#define ZSTD_VERSION_RELEASE  10
+#define ZSTD_VERSION_MINOR    5
+#define ZSTD_VERSION_RELEASE  2
 #define ZSTD_VERSION_NUMBER  (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
 
 /*! ZSTD_versionNumber() :
@@ -94,7 +102,6 @@ ZSTDLIB_API const char* ZSTD_versionString(void);
 #define ZSTD_BLOCKSIZE_MAX     (1<<ZSTD_BLOCKSIZELOG_MAX)
 
 
-
 /* *************************************
 *  Simple API
 ***************************************/
@@ -151,7 +158,7 @@ ZSTDLIB_API unsigned long long ZSTD_getFrameContentSize(const void *src, size_t
  * @return : decompressed size of `src` frame content _if known and not empty_, 0 otherwise. */
 ZSTDLIB_API unsigned long long ZSTD_getDecompressedSize(const void* src, size_t srcSize);
 
-/*! ZSTD_findFrameCompressedSize() :
+/*! ZSTD_findFrameCompressedSize() : Requires v1.4.0+
  * `src` should point to the start of a ZSTD frame or skippable frame.
  * `srcSize` must be >= first frame size
  * @return : the compressed size of the first frame starting at `src`,
@@ -165,8 +172,9 @@ ZSTDLIB_API size_t ZSTD_findFrameCompressedSize(const void* src, size_t srcSize)
 ZSTDLIB_API size_t      ZSTD_compressBound(size_t srcSize); /*!< maximum compressed size in worst case single-pass scenario */
 ZSTDLIB_API unsigned    ZSTD_isError(size_t code);          /*!< tells if a `size_t` function result is an error code */
 ZSTDLIB_API const char* ZSTD_getErrorName(size_t code);     /*!< provides readable string from an error code */
-ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed */
+ZSTDLIB_API int         ZSTD_minCLevel(void);               /*!< minimum negative compression level allowed, requires v1.4.0+ */
 ZSTDLIB_API int         ZSTD_maxCLevel(void);               /*!< maximum compression level available */
+ZSTDLIB_API int         ZSTD_defaultCLevel(void);           /*!< default compression level, specified by ZSTD_CLEVEL_DEFAULT, requires v1.5.0+ */
 
 
 /* *************************************
@@ -219,9 +227,9 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
                                  const void* src, size_t srcSize);
 
 
-/* *************************************
-*  Advanced compression API
-***************************************/
+/* *******************************************
+*  Advanced compression API (Requires v1.4.0+)
+**********************************************/
 
 /* API design :
  *   Parameters are pushed one by one into an existing context,
@@ -232,7 +240,7 @@ ZSTDLIB_API size_t ZSTD_decompressDCtx(ZSTD_DCtx* dctx,
  *
  *   It's possible to reset all parameters to "default" using ZSTD_CCtx_reset().
  *
- *   This API supercedes all other "advanced" API entry points in the experimental section.
+ *   This API supersedes all other "advanced" API entry points in the experimental section.
  *   In the future, we expect to remove from experimental API entry points which are redundant with this API.
  */
 
@@ -251,7 +259,6 @@ typedef enum { ZSTD_fast=1,
                          Only the order (from fast to strong) is guaranteed */
 } ZSTD_strategy;
 
-
 typedef enum {
 
     /* compression parameters
@@ -317,7 +324,6 @@ typedef enum {
                               * The higher the value of selected strategy, the more complex it is,
                               * resulting in stronger and slower compression.
                               * Special: value 0 means "use default strategy". */
-
     /* LDM mode parameters */
     ZSTD_c_enableLongDistanceMatching=160, /* Enable long distance matching.
                                      * This parameter is designed to improve compression ratio
@@ -374,7 +380,7 @@ typedef enum {
     ZSTD_c_jobSize=401,      /* Size of a compression job. This value is enforced only when nbWorkers >= 1.
                               * Each compression job is completed in parallel, so this value can indirectly impact the nb of active threads.
                               * 0 means default, which is dynamically determined based on compression parameters.
-                              * Job size must be a minimum of overlap size, or 1 MB, whichever is largest.
+                              * Job size must be a minimum of overlap size, or ZSTDMT_JOBSIZE_MIN (= 512 KB), whichever is largest.
                               * The minimum size is automatically and transparently enforced. */
     ZSTD_c_overlapLog=402,   /* Control the overlap size, as a fraction of window size.
                               * The overlap size is an amount of data reloaded from previous job at the beginning of a new job.
@@ -404,6 +410,8 @@ typedef enum {
      * ZSTD_c_stableOutBuffer
      * ZSTD_c_blockDelimiters
      * ZSTD_c_validateSequences
+     * ZSTD_c_useBlockSplitter
+     * ZSTD_c_useRowMatchFinder
      * Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
      * note : never ever use experimentalParam? names directly;
      *        also, the enums values themselves are unstable and can still change.
@@ -419,7 +427,10 @@ typedef enum {
      ZSTD_c_experimentalParam9=1006,
      ZSTD_c_experimentalParam10=1007,
      ZSTD_c_experimentalParam11=1008,
-     ZSTD_c_experimentalParam12=1009
+     ZSTD_c_experimentalParam12=1009,
+     ZSTD_c_experimentalParam13=1010,
+     ZSTD_c_experimentalParam14=1011,
+     ZSTD_c_experimentalParam15=1012
 } ZSTD_cParameter;
 
 typedef struct {
@@ -504,9 +515,9 @@ ZSTDLIB_API size_t ZSTD_compress2( ZSTD_CCtx* cctx,
                              const void* src, size_t srcSize);
 
 
-/* *************************************
-*  Advanced decompression API
-***************************************/
+/* *********************************************
+*  Advanced decompression API (Requires v1.4.0+)
+************************************************/
 
 /* The advanced API pushes parameters one by one into an existing DCtx context.
  * Parameters are sticky, and remain valid for all following frames
@@ -668,7 +679,7 @@ typedef enum {
                         : note : multithreaded compression will block to flush as much output as possible. */
 } ZSTD_EndDirective;
 
-/*! ZSTD_compressStream2() :
+/*! ZSTD_compressStream2() : Requires v1.4.0+
  *  Behaves about the same as ZSTD_compressStream, with additional control on end directive.
  *  - Compression parameters are pushed into CCtx before starting compression, using ZSTD_CCtx_set*()
  *  - Compression parameters cannot be changed once compression is started (save a list of exceptions in multi-threading mode)
@@ -714,11 +725,11 @@ ZSTDLIB_API size_t ZSTD_CStreamOutSize(void);   /*< recommended size for output
 
 
 /* *****************************************************************************
- * This following is a legacy streaming API.
+ * This following is a legacy streaming API, available since v1.0+ .
  * It can be replaced by ZSTD_CCtx_reset() and ZSTD_compressStream2().
  * It is redundant, but remains fully supported.
- * Advanced parameters and dictionary compression can only be used through the
- * new API.
+ * Streaming in combination with advanced parameters and dictionary compression
+ * can only be used through the new API.
  ******************************************************************************/
 
 /*!
@@ -796,7 +807,7 @@ ZSTDLIB_API size_t ZSTD_DStreamOutSize(void);   /*!< recommended size for output
 /*! ZSTD_compress_usingDict() :
  *  Compression at an explicit compression level using a Dictionary.
  *  A dictionary can be any arbitrary data segment (also called a prefix),
- *  or a buffer with specified information (see dictBuilder/zdict.h).
+ *  or a buffer with specified information (see zdict.h).
  *  Note : This function loads the dictionary, resulting in significant startup delay.
  *         It's intended for a dictionary used only once.
  *  Note 2 : When `dict == NULL || dictSize < 8` no dictionary is used. */
@@ -879,19 +890,25 @@ ZSTDLIB_API size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
  *  Dictionary helper functions
  *******************************/
 
-/*! ZSTD_getDictID_fromDict() :
+/*! ZSTD_getDictID_fromDict() : Requires v1.4.0+
  *  Provides the dictID stored within dictionary.
  *  if @return == 0, the dictionary is not conformant with Zstandard specification.
  *  It can still be loaded, but as a content-only dictionary. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDict(const void* dict, size_t dictSize);
 
-/*! ZSTD_getDictID_fromDDict() :
+/*! ZSTD_getDictID_fromCDict() : Requires v1.5.0+
+ *  Provides the dictID of the dictionary loaded into `cdict`.
+ *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
+ *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
+ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+
+/*! ZSTD_getDictID_fromDDict() : Requires v1.4.0+
  *  Provides the dictID of the dictionary loaded into `ddict`.
  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
 ZSTDLIB_API unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict);
 
-/*! ZSTD_getDictID_fromFrame() :
+/*! ZSTD_getDictID_fromFrame() : Requires v1.4.0+
  *  Provides the dictID required to decompressed the frame stored within `src`.
  *  If @return == 0, the dictID could not be decoded.
  *  This could for one of the following reasons :
@@ -905,16 +922,16 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
 
 
 /* *****************************************************************************
- * Advanced dictionary and prefix API
+ * Advanced dictionary and prefix API (Requires v1.4.0+)
  *
  * This API allows dictionaries to be used with ZSTD_compress2(),
- * ZSTD_compressStream2(), and ZSTD_decompress(). Dictionaries are sticky, and
+ * ZSTD_compressStream2(), and ZSTD_decompressDCtx(). Dictionaries are sticky, and
  * only reset with the context is reset with ZSTD_reset_parameters or
  * ZSTD_reset_session_and_parameters. Prefixes are single-use.
  ******************************************************************************/
 
 
-/*! ZSTD_CCtx_loadDictionary() :
+/*! ZSTD_CCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal CDict from `dict` buffer.
  *  Decompression will have to use same dictionary.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
@@ -933,7 +950,7 @@ ZSTDLIB_API unsigned ZSTD_getDictID_fromFrame(const void* src, size_t srcSize);
  *           to precisely select how dictionary content must be interpreted. */
 ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_CCtx_refCDict() :
+/*! ZSTD_CCtx_refCDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used for all next compressed frames.
  *  Note that compression parameters are enforced from within CDict,
  *  and supersede any compression parameter previously set within CCtx.
@@ -947,7 +964,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, s
  *  Note 2 : CDict is just referenced, its lifetime must outlive its usage within CCtx. */
 ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 
-/*! ZSTD_CCtx_refPrefix() :
+/*! ZSTD_CCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) for next compressed frame.
  *  A prefix is **only used once**. Tables are discarded at end of frame (ZSTD_e_end).
  *  Decompression will need same prefix to properly regenerate data.
@@ -968,7 +985,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict);
 ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
                                  const void* prefix, size_t prefixSize);
 
-/*! ZSTD_DCtx_loadDictionary() :
+/*! ZSTD_DCtx_loadDictionary() : Requires v1.4.0+
  *  Create an internal DDict from dict buffer,
  *  to be used to decompress next frames.
  *  The dictionary remains valid for all future frames, until explicitly invalidated.
@@ -985,7 +1002,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix(ZSTD_CCtx* cctx,
  */
 ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 
-/*! ZSTD_DCtx_refDDict() :
+/*! ZSTD_DCtx_refDDict() : Requires v1.4.0+
  *  Reference a prepared dictionary, to be used to decompress next frames.
  *  The dictionary remains active for decompression of future frames using same DCtx.
  *
@@ -1003,7 +1020,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary(ZSTD_DCtx* dctx, const void* dict, s
  */
 ZSTDLIB_API size_t ZSTD_DCtx_refDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-/*! ZSTD_DCtx_refPrefix() :
+/*! ZSTD_DCtx_refPrefix() : Requires v1.4.0+
  *  Reference a prefix (single-usage dictionary) to decompress next frame.
  *  This is the reverse operation of ZSTD_CCtx_refPrefix(),
  *  and must use the same prefix as the one used during compression.
@@ -1024,7 +1041,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix(ZSTD_DCtx* dctx,
 
 /* ===   Memory management   === */
 
-/*! ZSTD_sizeof_*() :
+/*! ZSTD_sizeof_*() : Requires v1.4.0+
  *  These functions give the _current_ memory usage of selected object.
  *  Note that object memory usage can evolve (increase or decrease) over time. */
 ZSTDLIB_API size_t ZSTD_sizeof_CCtx(const ZSTD_CCtx* cctx);
@@ -1049,6 +1066,29 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #if !defined(ZSTD_H_ZSTD_STATIC_LINKING_ONLY)
 #define ZSTD_H_ZSTD_STATIC_LINKING_ONLY
 
+/* This can be overridden externally to hide static symbols. */
+#ifndef ZSTDLIB_STATIC_API
+#define ZSTDLIB_STATIC_API ZSTDLIB_VISIBLE
+#endif
+
+/* Deprecation warnings :
+ * Should these warnings be a problem, it is generally possible to disable them,
+ * typically with -Wno-deprecated-declarations for gcc or _CRT_SECURE_NO_WARNINGS in Visual.
+ * Otherwise, it's also possible to define ZSTD_DISABLE_DEPRECATE_WARNINGS.
+ */
+#ifdef ZSTD_DISABLE_DEPRECATE_WARNINGS
+#  define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API  /* disable deprecation warnings */
+#else
+#  if (defined(GNUC) && (GNUC > 4 || (GNUC == 4 && GNUC_MINOR >= 5))) || defined(__clang__)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated(message)))
+#  elif (__GNUC__ >= 3)
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API __attribute__((deprecated))
+#  else
+#    pragma message("WARNING: You need to implement ZSTD_DEPRECATED for this compiler")
+#    define ZSTD_DEPRECATED(message) ZSTDLIB_STATIC_API
+#  endif
+#endif /* ZSTD_DISABLE_DEPRECATE_WARNINGS */
+
 /* **************************************************************************************
  *   experimental API (static linking only)
  ****************************************************************************************
@@ -1111,9 +1151,6 @@ ZSTDLIB_API size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict);
 #define ZSTD_SRCSIZEHINT_MIN        0
 #define ZSTD_SRCSIZEHINT_MAX        INT_MAX
 
-/* internal */
-#define ZSTD_HASHLOG3_MAX           17
-
 
 /* ---  Advanced types  --- */
 
@@ -1255,6 +1292,15 @@ typedef enum {
   ZSTD_lcm_uncompressed = 2   /*< Always emit uncompressed literals. */
 } ZSTD_literalCompressionMode_e;
 
+typedef enum {
+  /* Note: This enum controls features which are conditionally beneficial. Zstd typically will make a final
+   * decision on whether or not to enable the feature (ZSTD_ps_auto), but setting the switch to ZSTD_ps_enable
+   * or ZSTD_ps_disable allow for a force enable/disable the feature.
+   */
+  ZSTD_ps_auto = 0,         /* Let the library automatically determine whether the feature shall be enabled */
+  ZSTD_ps_enable = 1,       /* Force-enable the feature */
+  ZSTD_ps_disable = 2       /* Do not use the feature */
+} ZSTD_paramSwitch_e;
 
 /* *************************************
 *  Frame size functions
@@ -1281,7 +1327,7 @@ typedef enum {
  *   note 5 : ZSTD_findDecompressedSize handles multiple frames, and so it must traverse the input to
  *            read each contained frame header.  This is fast as most of the data is skipped,
  *            however it does mean that all frame data must be present and valid. */
-ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t srcSize);
 
 /*! ZSTD_decompressBound() :
  *  `src` should point to the start of a series of ZSTD encoded and/or skippable frames
@@ -1296,13 +1342,13 @@ ZSTDLIB_API unsigned long long ZSTD_findDecompressedSize(const void* src, size_t
  *  note 3  : when the decompressed size field isn't available, the upper-bound for that frame is calculated by:
  *              upper-bound = # blocks * min(128 KB, Window_Size)
  */
-ZSTDLIB_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API unsigned long long ZSTD_decompressBound(const void* src, size_t srcSize);
 
 /*! ZSTD_frameHeaderSize() :
  *  srcSize must be >= ZSTD_FRAMEHEADERSIZE_PREFIX.
  * @return : size of the Frame Header,
  *           or an error code (if srcSize is too small) */
-ZSTDLIB_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_frameHeaderSize(const void* src, size_t srcSize);
 
 typedef enum {
   ZSTD_sf_noBlockDelimiters = 0,         /* Representation of ZSTD_Sequence has no block delimiters, sequences only */
@@ -1325,7 +1371,7 @@ typedef enum {
  * @return : number of sequences generated
  */
 
-ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
+ZSTDLIB_STATIC_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
                                           size_t outSeqsSize, const void* src, size_t srcSize);
 
 /*! ZSTD_mergeBlockDelimiters() :
@@ -1339,7 +1385,7 @@ ZSTDLIB_API size_t ZSTD_generateSequences(ZSTD_CCtx* zc, ZSTD_Sequence* outSeqs,
  * setting of ZSTD_c_blockDelimiters as ZSTD_sf_noBlockDelimiters
  * @return : number of sequences left after merging
  */
-ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
+ZSTDLIB_STATIC_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t seqsSize);
 
 /*! ZSTD_compressSequences() :
  * Compress an array of ZSTD_Sequence, generated from the original source buffer, into dst.
@@ -1369,7 +1415,7 @@ ZSTDLIB_API size_t ZSTD_mergeBlockDelimiters(ZSTD_Sequence* sequences, size_t se
  *         and cannot emit an RLE block that disagrees with the repcode history
  * @return : final compressed size or a ZSTD error.
  */
-ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
+ZSTDLIB_STATIC_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstSize,
                                   const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
                                   const void* src, size_t srcSize);
 
@@ -1387,9 +1433,29 @@ ZSTDLIB_API size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size
  *
  * @return : number of bytes written or a ZSTD error.
  */
-ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
+ZSTDLIB_STATIC_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
                                             const void* src, size_t srcSize, unsigned magicVariant);
 
+/*! ZSTD_readSkippableFrame() :
+ * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
+ * in the magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
+                                            const void* src, size_t srcSize);
+
+/*! ZSTD_isSkippableFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
+ */
+ZSTDLIB_API unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size);
+
+
 
 /* *************************************
 *  Memory management
@@ -1418,10 +1484,10 @@ ZSTDLIB_API size_t ZSTD_writeSkippableFrame(void* dst, size_t dstCapacity,
  *  Note 2 : only single-threaded compression is supported.
  *  ZSTD_estimateCCtxSize_usingCCtxParams() will return an error code if ZSTD_c_nbWorkers is >= 1.
  */
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDCtxSize(void);
 
 /*! ZSTD_estimateCStreamSize() :
  *  ZSTD_estimateCStreamSize() will provide a budget large enough for any compression level up to selected one.
@@ -1436,20 +1502,20 @@ ZSTDLIB_API size_t ZSTD_estimateDCtxSize(void);
  *  Note : if streaming is init with function ZSTD_init?Stream_usingDict(),
  *         an internal ?Dict will be created, which additional size is not estimated here.
  *         In this case, get total size by adding ZSTD_estimate?DictSize */
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
-ZSTDLIB_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
-ZSTDLIB_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
-ZSTDLIB_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize(int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize(size_t windowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDStreamSize_fromFrame(const void* src, size_t srcSize);
 
 /*! ZSTD_estimate?DictSize() :
  *  ZSTD_estimateCDictSize() will bet that src size is relatively "small", and content is copied, like ZSTD_createCDict().
  *  ZSTD_estimateCDictSize_advanced() makes it possible to control compression parameters precisely, like ZSTD_createCDict_advanced().
  *  Note : dictionaries created by reference (`ZSTD_dlm_byRef`) are logically smaller.
  */
-ZSTDLIB_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
-ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize(size_t dictSize, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateCDictSize_advanced(size_t dictSize, ZSTD_compressionParameters cParams, ZSTD_dictLoadMethod_e dictLoadMethod);
+ZSTDLIB_STATIC_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod);
 
 /*! ZSTD_initStatic*() :
  *  Initialize an object using a pre-allocated fixed-size buffer.
@@ -1472,20 +1538,20 @@ ZSTDLIB_API size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e
  *  Limitation 2 : static cctx currently not compatible with multi-threading.
  *  Limitation 3 : static dctx is incompatible with legacy support.
  */
-ZSTDLIB_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticCCtx() */
+ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_initStaticCCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_initStaticCStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticCCtx() */
 
-ZSTDLIB_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
-ZSTDLIB_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticDCtx() */
+ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_initStaticDCtx(void* workspace, size_t workspaceSize);
+ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_initStaticDStream(void* workspace, size_t workspaceSize);    /*< same as ZSTD_initStaticDCtx() */
 
-ZSTDLIB_API const ZSTD_CDict* ZSTD_initStaticCDict(
+ZSTDLIB_STATIC_API const ZSTD_CDict* ZSTD_initStaticCDict(
                                         void* workspace, size_t workspaceSize,
                                         const void* dict, size_t dictSize,
                                         ZSTD_dictLoadMethod_e dictLoadMethod,
                                         ZSTD_dictContentType_e dictContentType,
                                         ZSTD_compressionParameters cParams);
 
-ZSTDLIB_API const ZSTD_DDict* ZSTD_initStaticDDict(
+ZSTDLIB_STATIC_API const ZSTD_DDict* ZSTD_initStaticDDict(
                                         void* workspace, size_t workspaceSize,
                                         const void* dict, size_t dictSize,
                                         ZSTD_dictLoadMethod_e dictLoadMethod,
@@ -1504,44 +1570,44 @@ static
 __attribute__((__unused__))
 ZSTD_customMem const ZSTD_defaultCMem = { NULL, NULL, NULL };  /*< this constant defers to stdlib's functions */
 
-ZSTDLIB_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
-ZSTDLIB_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_CCtx*    ZSTD_createCCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_CStream* ZSTD_createCStream_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_DCtx*    ZSTD_createDCtx_advanced(ZSTD_customMem customMem);
+ZSTDLIB_STATIC_API ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem);
 
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
+ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced(const void* dict, size_t dictSize,
                                                   ZSTD_dictLoadMethod_e dictLoadMethod,
                                                   ZSTD_dictContentType_e dictContentType,
                                                   ZSTD_compressionParameters cParams,
                                                   ZSTD_customMem customMem);
 
-/* ! Thread pool :
- * These prototypes make it possible to share a thread pool among multiple compression contexts.
- * This can limit resources for applications with multiple threads where each one uses
- * a threaded compression mode (via ZSTD_c_nbWorkers parameter).
- * ZSTD_createThreadPool creates a new thread pool with a given number of threads.
- * Note that the lifetime of such pool must exist while being used.
- * ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
- * to use an internal thread pool).
- * ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
+/*! Thread pool :
+ *  These prototypes make it possible to share a thread pool among multiple compression contexts.
+ *  This can limit resources for applications with multiple threads where each one uses
+ *  a threaded compression mode (via ZSTD_c_nbWorkers parameter).
+ *  ZSTD_createThreadPool creates a new thread pool with a given number of threads.
+ *  Note that the lifetime of such pool must exist while being used.
+ *  ZSTD_CCtx_refThreadPool assigns a thread pool to a context (use NULL argument value
+ *  to use an internal thread pool).
+ *  ZSTD_freeThreadPool frees a thread pool, accepts NULL pointer.
  */
 typedef struct POOL_ctx_s ZSTD_threadPool;
-ZSTDLIB_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
-ZSTDLIB_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
-ZSTDLIB_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
+ZSTDLIB_STATIC_API ZSTD_threadPool* ZSTD_createThreadPool(size_t numThreads);
+ZSTDLIB_STATIC_API void ZSTD_freeThreadPool (ZSTD_threadPool* pool);  /* accept NULL pointer */
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refThreadPool(ZSTD_CCtx* cctx, ZSTD_threadPool* pool);
 
 
 /*
  * This API is temporary and is expected to change or disappear in the future!
  */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_advanced2(
     const void* dict, size_t dictSize,
     ZSTD_dictLoadMethod_e dictLoadMethod,
     ZSTD_dictContentType_e dictContentType,
     const ZSTD_CCtx_params* cctxParams,
     ZSTD_customMem customMem);
 
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
+ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_advanced(
     const void* dict, size_t dictSize,
     ZSTD_dictLoadMethod_e dictLoadMethod,
     ZSTD_dictContentType_e dictContentType,
@@ -1558,28 +1624,22 @@ ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_advanced(
  *  As a consequence, `dictBuffer` **must** outlive CDict,
  *  and its content must remain unmodified throughout the lifetime of CDict.
  *  note: equivalent to ZSTD_createCDict_advanced(), with dictLoadMethod==ZSTD_dlm_byRef */
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
-
-/*! ZSTD_getDictID_fromCDict() :
- *  Provides the dictID of the dictionary loaded into `cdict`.
- *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
- *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
-ZSTDLIB_API unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict);
+ZSTDLIB_STATIC_API ZSTD_CDict* ZSTD_createCDict_byReference(const void* dictBuffer, size_t dictSize, int compressionLevel);
 
 /*! ZSTD_getCParams() :
  * @return ZSTD_compressionParameters structure for a selected compression level and estimated srcSize.
  * `estimatedSrcSize` value is optional, select 0 if not known */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_getCParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
 /*! ZSTD_getParams() :
  *  same as ZSTD_getCParams(), but @return a full `ZSTD_parameters` object instead of sub-component `ZSTD_compressionParameters`.
  *  All fields of `ZSTD_frameParameters` are set to default : contentSize=1, checksum=0, noDictID=0 */
-ZSTDLIB_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
+ZSTDLIB_STATIC_API ZSTD_parameters ZSTD_getParams(int compressionLevel, unsigned long long estimatedSrcSize, size_t dictSize);
 
 /*! ZSTD_checkCParams() :
  *  Ensure param values remain within authorized range.
  * @return 0 on success, or an error code (can be checked with ZSTD_isError()) */
-ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
+ZSTDLIB_STATIC_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
 
 /*! ZSTD_adjustCParams() :
  *  optimize params for a given `srcSize` and `dictSize`.
@@ -1587,23 +1647,25 @@ ZSTDLIB_API size_t ZSTD_checkCParams(ZSTD_compressionParameters params);
  * `dictSize` must be `0` when there is no dictionary.
  *  cPar can be invalid : all parameters will be clamped within valid range in the @return struct.
  *  This function never fails (wide contract) */
-ZSTDLIB_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
+ZSTDLIB_STATIC_API ZSTD_compressionParameters ZSTD_adjustCParams(ZSTD_compressionParameters cPar, unsigned long long srcSize, size_t dictSize);
 
 /*! ZSTD_compress_advanced() :
  *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_setParameter() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning on reaching v1.5.x */
-ZSTDLIB_API size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2")
+size_t ZSTD_compress_advanced(ZSTD_CCtx* cctx,
                                           void* dst, size_t dstCapacity,
                                     const void* src, size_t srcSize,
                                     const void* dict,size_t dictSize,
                                           ZSTD_parameters params);
 
 /*! ZSTD_compress_usingCDict_advanced() :
- *  Note : this function is now REDUNDANT.
+ *  Note : this function is now DEPRECATED.
  *         It can be replaced by ZSTD_compress2(), in combination with ZSTD_CCtx_loadDictionary() and other parameter setters.
- *  This prototype will be marked as deprecated and generate compilation warning in some future version */
-ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+ *  This prototype will generate compilation warnings. */
+ZSTD_DEPRECATED("use ZSTD_compress2 with ZSTD_CCtx_loadDictionary")
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
                                               void* dst, size_t dstCapacity,
                                         const void* src, size_t srcSize,
                                         const ZSTD_CDict* cdict,
@@ -1613,18 +1675,18 @@ ZSTDLIB_API size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
 /*! ZSTD_CCtx_loadDictionary_byReference() :
  *  Same as ZSTD_CCtx_loadDictionary(), but dictionary content is referenced, instead of being copied into CCtx.
  *  It saves some memory, but also requires that `dict` outlives its usage within `cctx` */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_byReference(ZSTD_CCtx* cctx, const void* dict, size_t dictSize);
 
 /*! ZSTD_CCtx_loadDictionary_advanced() :
  *  Same as ZSTD_CCtx_loadDictionary(), but gives finer control over
  *  how to load the dictionary (by copy ? by reference ?)
  *  and how to interpret it (automatic ? force raw mode ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_loadDictionary_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 
 /*! ZSTD_CCtx_refPrefix_advanced() :
  *  Same as ZSTD_CCtx_refPrefix(), but gives finer control over
  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 
 /* ===   experimental parameters   === */
 /* these parameters can be used with ZSTD_setParameter()
@@ -1663,9 +1725,15 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
  * See the comments on that enum for an explanation of the feature. */
 #define ZSTD_c_forceAttachDict ZSTD_c_experimentalParam4
 
-/* Controls how the literals are compressed (default is auto).
- * The value must be of type ZSTD_literalCompressionMode_e.
- * See ZSTD_literalCompressionMode_t enum definition for details.
+/* Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never compress literals.
+ * Set to ZSTD_ps_enable to always compress literals. (Note: uncompressed literals
+ * may still be emitted if huffman is not beneficial to use.)
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * literals compression based on the compression parameters - specifically,
+ * negative compression levels do not use literal compression.
  */
 #define ZSTD_c_literalCompressionMode ZSTD_c_experimentalParam5
 
@@ -1728,7 +1796,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
  *
  * Note that this means that the CDict tables can no longer be copied into the
  * CCtx, so the dict attachment mode ZSTD_dictForceCopy will no longer be
- * useable. The dictionary can only be attached or reloaded.
+ * usable. The dictionary can only be attached or reloaded.
  *
  * In general, you should expect compression to be faster--sometimes very much
  * so--and CDict creation to be slightly slower. Eventually, we will probably
@@ -1817,12 +1885,55 @@ ZSTDLIB_API size_t ZSTD_CCtx_refPrefix_advanced(ZSTD_CCtx* cctx, const void* pre
  */
 #define ZSTD_c_validateSequences ZSTD_c_experimentalParam12
 
+/* ZSTD_c_useBlockSplitter
+ * Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never use block splitter.
+ * Set to ZSTD_ps_enable to always use block splitter.
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * block splitting based on the compression parameters.
+ */
+#define ZSTD_c_useBlockSplitter ZSTD_c_experimentalParam13
+
+/* ZSTD_c_useRowMatchFinder
+ * Controlled with ZSTD_paramSwitch_e enum.
+ * Default is ZSTD_ps_auto.
+ * Set to ZSTD_ps_disable to never use row-based matchfinder.
+ * Set to ZSTD_ps_enable to force usage of row-based matchfinder.
+ *
+ * By default, in ZSTD_ps_auto, the library will decide at runtime whether to use
+ * the row-based matchfinder based on support for SIMD instructions and the window log.
+ * Note that this only pertains to compression strategies: greedy, lazy, and lazy2
+ */
+#define ZSTD_c_useRowMatchFinder ZSTD_c_experimentalParam14
+
+/* ZSTD_c_deterministicRefPrefix
+ * Default is 0 == disabled. Set to 1 to enable.
+ *
+ * Zstd produces different results for prefix compression when the prefix is
+ * directly adjacent to the data about to be compressed vs. when it isn't.
+ * This is because zstd detects that the two buffers are contiguous and it can
+ * use a more efficient match finding algorithm. However, this produces different
+ * results than when the two buffers are non-contiguous. This flag forces zstd
+ * to always load the prefix in non-contiguous mode, even if it happens to be
+ * adjacent to the data, to guarantee determinism.
+ *
+ * If you really care about determinism when using a dictionary or prefix,
+ * like when doing delta compression, you should select this option. It comes
+ * at a speed penalty of about ~2.5% if the dictionary and data happened to be
+ * contiguous, and is free if they weren't contiguous. We don't expect that
+ * intentionally making the dictionary and data contiguous will be worth the
+ * cost to memcpy() the data.
+ */
+#define ZSTD_c_deterministicRefPrefix ZSTD_c_experimentalParam15
+
 /*! ZSTD_CCtx_getParameter() :
  *  Get the requested compression parameter value, selected by enum ZSTD_cParameter,
  *  and store it into int* value.
  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter param, int* value);
 
 
 /*! ZSTD_CCtx_params :
@@ -1842,27 +1953,27 @@ ZSTDLIB_API size_t ZSTD_CCtx_getParameter(const ZSTD_CCtx* cctx, ZSTD_cParameter
  *  This can be used with ZSTD_estimateCCtxSize_advanced_usingCCtxParams()
  *  for static allocation of CCtx for single-threaded compression.
  */
-ZSTDLIB_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
-ZSTDLIB_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
+ZSTDLIB_STATIC_API ZSTD_CCtx_params* ZSTD_createCCtxParams(void);
+ZSTDLIB_STATIC_API size_t ZSTD_freeCCtxParams(ZSTD_CCtx_params* params);  /* accept NULL pointer */
 
 /*! ZSTD_CCtxParams_reset() :
  *  Reset params to default values.
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_reset(ZSTD_CCtx_params* params);
 
 /*! ZSTD_CCtxParams_init() :
  *  Initializes the compression parameters of cctxParams according to
  *  compression level. All other parameters are reset to their default values.
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init(ZSTD_CCtx_params* cctxParams, int compressionLevel);
 
 /*! ZSTD_CCtxParams_init_advanced() :
  *  Initializes the compression and frame parameters of cctxParams according to
  *  params. All other parameters are reset to their default values.
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params);
 
-/*! ZSTD_CCtxParams_setParameter() :
+/*! ZSTD_CCtxParams_setParameter() : Requires v1.4.0+
  *  Similar to ZSTD_CCtx_setParameter.
  *  Set one compression parameter, selected by enum ZSTD_cParameter.
  *  Parameters must be applied to a ZSTD_CCtx using
@@ -1870,14 +1981,14 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, Z
  * @result : a code representing success or failure (which can be tested with
  *           ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* params, ZSTD_cParameter param, int value);
 
 /*! ZSTD_CCtxParams_getParameter() :
  * Similar to ZSTD_CCtx_getParameter.
  * Get the requested value of one compression parameter, selected by enum ZSTD_cParameter.
  * @result : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
+ZSTDLIB_STATIC_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params, ZSTD_cParameter param, int* value);
 
 /*! ZSTD_CCtx_setParametersUsingCCtxParams() :
  *  Apply a set of ZSTD_CCtx_params to the compression context.
@@ -1886,7 +1997,7 @@ ZSTDLIB_API size_t ZSTD_CCtxParams_getParameter(const ZSTD_CCtx_params* params,
  *    if nbWorkers>=1, new parameters will be picked up at next job,
  *       with a few restrictions (windowLog, pledgedSrcSize, nbWorkers, jobSize, and overlapLog are not updated).
  */
-ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
+ZSTDLIB_STATIC_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
         ZSTD_CCtx* cctx, const ZSTD_CCtx_params* params);
 
 /*! ZSTD_compressStream2_simpleArgs() :
@@ -1895,7 +2006,7 @@ ZSTDLIB_API size_t ZSTD_CCtx_setParametersUsingCCtxParams(
  *  This variant might be helpful for binders from dynamic languages
  *  which have troubles handling structures containing memory pointers.
  */
-ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
+ZSTDLIB_STATIC_API size_t ZSTD_compressStream2_simpleArgs (
                             ZSTD_CCtx* cctx,
                             void* dst, size_t dstCapacity, size_t* dstPos,
                       const void* src, size_t srcSize, size_t* srcPos,
@@ -1911,33 +2022,33 @@ ZSTDLIB_API size_t ZSTD_compressStream2_simpleArgs (
  *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
  *  Note 2 : Legacy Frame Identifiers are considered valid only if Legacy Support is enabled.
  *  Note 3 : Skippable Frame Identifiers are considered valid. */
-ZSTDLIB_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
+ZSTDLIB_STATIC_API unsigned ZSTD_isFrame(const void* buffer, size_t size);
 
 /*! ZSTD_createDDict_byReference() :
  *  Create a digested dictionary, ready to start decompression operation without startup delay.
  *  Dictionary content is referenced, and therefore stays in dictBuffer.
  *  It is important that dictBuffer outlives DDict,
  *  it must remain read accessible throughout the lifetime of DDict */
-ZSTDLIB_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
+ZSTDLIB_STATIC_API ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize);
 
 /*! ZSTD_DCtx_loadDictionary_byReference() :
  *  Same as ZSTD_DCtx_loadDictionary(),
  *  but references `dict` content instead of copying it into `dctx`.
  *  This saves memory if `dict` remains around.,
  *  However, it's imperative that `dict` remains accessible (and unmodified) while being used, so it must outlive decompression. */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_byReference(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
 
 /*! ZSTD_DCtx_loadDictionary_advanced() :
  *  Same as ZSTD_DCtx_loadDictionary(),
  *  but gives direct control over
  *  how to load the dictionary (by copy ? by reference ?)
  *  and how to interpret it (automatic ? force raw mode ? full mode only ?). */
-ZSTDLIB_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_loadDictionary_advanced(ZSTD_DCtx* dctx, const void* dict, size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod, ZSTD_dictContentType_e dictContentType);
 
 /*! ZSTD_DCtx_refPrefix_advanced() :
  *  Same as ZSTD_DCtx_refPrefix(), but gives finer control over
  *  how to interpret prefix content (automatic ? force raw mode (default) ? full mode only ?) */
-ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* prefix, size_t prefixSize, ZSTD_dictContentType_e dictContentType);
 
 /*! ZSTD_DCtx_setMaxWindowSize() :
  *  Refuses allocating internal buffers for frames requiring a window size larger than provided limit.
@@ -1946,14 +2057,14 @@ ZSTDLIB_API size_t ZSTD_DCtx_refPrefix_advanced(ZSTD_DCtx* dctx, const void* pre
  *  By default, a decompression context accepts all window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT)
  * @return : 0, or an error code (which can be tested using ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_setMaxWindowSize(ZSTD_DCtx* dctx, size_t maxWindowSize);
 
 /*! ZSTD_DCtx_getParameter() :
  *  Get the requested decompression parameter value, selected by enum ZSTD_dParameter,
  *  and store it into int* value.
  * @return : 0, or an error code (which can be tested with ZSTD_isError()).
  */
-ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
+ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value);
 
 /* ZSTD_d_format
  * experimental parameter,
@@ -2028,11 +2139,13 @@ ZSTDLIB_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param
 
 
 /*! ZSTD_DCtx_setFormat() :
+ *  This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().
  *  Instruct the decoder context about what kind of data to decode next.
  *  This instruction is mandatory to decode data without a fully-formed header,
  *  such ZSTD_f_zstd1_magicless for example.
  * @return : 0, or an error code (which can be tested using ZSTD_isError()). */
-ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
+ZSTD_DEPRECATED("use ZSTD_DCtx_setParameter() instead")
+size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
 
 /*! ZSTD_decompressStream_simpleArgs() :
  *  Same as ZSTD_decompressStream(),
@@ -2040,7 +2153,7 @@ ZSTDLIB_API size_t ZSTD_DCtx_setFormat(ZSTD_DCtx* dctx, ZSTD_format_e format);
  *  This can be helpful for binders from dynamic languages
  *  which have troubles handling structures containing memory pointers.
  */
-ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
+ZSTDLIB_STATIC_API size_t ZSTD_decompressStream_simpleArgs (
                             ZSTD_DCtx* dctx,
                             void* dst, size_t dstCapacity, size_t* dstPos,
                       const void* src, size_t srcSize, size_t* srcPos);
@@ -2056,7 +2169,7 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
 /*=====   Advanced Streaming compression functions  =====*/
 
 /*! ZSTD_initCStream_srcSize() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, NULL); // clear the dictionary (if any)
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
@@ -2065,15 +2178,15 @@ ZSTDLIB_API size_t ZSTD_decompressStream_simpleArgs (
  * pledgedSrcSize must be correct. If it is not known at init time, use
  * ZSTD_CONTENTSIZE_UNKNOWN. Note that, for compatibility with older programs,
  * "0" also disables frame content size field. It may be enabled in the future.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
                          int compressionLevel,
                          unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingDict() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setParameter(zcs, ZSTD_c_compressionLevel, compressionLevel);
  *     ZSTD_CCtx_loadDictionary(zcs, dict, dictSize);
@@ -2082,15 +2195,15 @@ ZSTD_initCStream_srcSize(ZSTD_CStream* zcs,
  * dict == NULL or dictSize < 8, in which case no dict is used.
  * Note: dict is loaded with ZSTD_dct_auto (treated as a full zstd dictionary if
  * it begins with ZSTD_MAGIC_DICTIONARY, else as raw content) and ZSTD_dlm_byCopy.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
                      const void* dict, size_t dictSize,
                            int compressionLevel);
 
 /*! ZSTD_initCStream_advanced() :
- * This function is deprecated, and is approximately equivalent to:
+ * This function is DEPRECATED, and is approximately equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     // Pseudocode: Set each zstd parameter and leave the rest as-is.
  *     for ((param, value) : params) {
@@ -2102,23 +2215,24 @@ ZSTD_initCStream_usingDict(ZSTD_CStream* zcs,
  * dict is loaded with ZSTD_dct_auto and ZSTD_dlm_byCopy.
  * pledgedSrcSize must be correct.
  * If srcSize is not known at init time, use value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_advanced(ZSTD_CStream* zcs,
                     const void* dict, size_t dictSize,
                           ZSTD_parameters params,
                           unsigned long long pledgedSrcSize);
 
 /*! ZSTD_initCStream_usingCDict() :
- * This function is deprecated, and equivalent to:
+ * This function is DEPRECATED, and equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_refCDict(zcs, cdict);
  *
  * note : cdict will just be referenced, and must outlive compression session
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDict* cdict);
 
 /*! ZSTD_initCStream_usingCDict_advanced() :
  *   This function is DEPRECATED, and is approximately equivalent to:
@@ -2133,18 +2247,21 @@ ZSTDLIB_API size_t ZSTD_initCStream_usingCDict(ZSTD_CStream* zcs, const ZSTD_CDi
  * same as ZSTD_initCStream_usingCDict(), with control over frame parameters.
  * pledgedSrcSize must be correct. If srcSize is not known at init time, use
  * value ZSTD_CONTENTSIZE_UNKNOWN.
- * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ * This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t
-ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset and ZSTD_CCtx_refCDict, see zstd.h for detailed instructions")
+size_t ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
                                const ZSTD_CDict* cdict,
                                      ZSTD_frameParameters fParams,
                                      unsigned long long pledgedSrcSize);
 
 /*! ZSTD_resetCStream() :
- * This function is deprecated, and is equivalent to:
+ * This function is DEPRECATED, and is equivalent to:
  *     ZSTD_CCtx_reset(zcs, ZSTD_reset_session_only);
  *     ZSTD_CCtx_setPledgedSrcSize(zcs, pledgedSrcSize);
+ * Note: ZSTD_resetCStream() interprets pledgedSrcSize == 0 as ZSTD_CONTENTSIZE_UNKNOWN, but
+ *       ZSTD_CCtx_setPledgedSrcSize() does not do the same, so ZSTD_CONTENTSIZE_UNKNOWN must be
+ *       explicitly specified.
  *
  *  start a new frame, using same parameters from previous frame.
  *  This is typically useful to skip dictionary loading stage, since it will re-use it in-place.
@@ -2154,9 +2271,10 @@ ZSTD_initCStream_usingCDict_advanced(ZSTD_CStream* zcs,
  *  For the time being, pledgedSrcSize==0 is interpreted as "srcSize unknown" for compatibility with older programs,
  *  but it will change to mean "empty" in future version, so use macro ZSTD_CONTENTSIZE_UNKNOWN instead.
  * @return : 0, or an error code (which can be tested using ZSTD_isError())
- *  Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
+ *  This prototype will generate compilation warnings.
  */
-ZSTDLIB_API size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
+ZSTD_DEPRECATED("use ZSTD_CCtx_reset, see zstd.h for detailed instructions")
+size_t ZSTD_resetCStream(ZSTD_CStream* zcs, unsigned long long pledgedSrcSize);
 
 
 typedef struct {
@@ -2174,7 +2292,7 @@ typedef struct {
  * Note : (ingested - consumed) is amount of input data buffered internally, not yet compressed.
  * Aggregates progression inside active worker threads.
  */
-ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
+ZSTDLIB_STATIC_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx);
 
 /*! ZSTD_toFlushNow() :
  *  Tell how many bytes are ready to be flushed immediately.
@@ -2189,7 +2307,7 @@ ZSTDLIB_API ZSTD_frameProgression ZSTD_getFrameProgression(const ZSTD_CCtx* cctx
  *    therefore flush speed is limited by production speed of oldest job
  *    irrespective of the speed of concurrent (and newer) jobs.
  */
-ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
+ZSTDLIB_STATIC_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
 
 
 /*=====   Advanced Streaming decompression functions  =====*/
@@ -2203,7 +2321,7 @@ ZSTDLIB_API size_t ZSTD_toFlushNow(ZSTD_CCtx* cctx);
  * note: no dictionary will be used if dict == NULL or dictSize < 8
  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
+ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dict, size_t dictSize);
 
 /*!
  * This function is deprecated, and is equivalent to:
@@ -2214,7 +2332,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDict(ZSTD_DStream* zds, const void* dic
  * note : ddict is referenced, it must outlive decompression session
  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
+ZSTDLIB_STATIC_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDict* ddict);
 
 /*!
  * This function is deprecated, and is equivalent to:
@@ -2224,7 +2342,7 @@ ZSTDLIB_API size_t ZSTD_initDStream_usingDDict(ZSTD_DStream* zds, const ZSTD_DDi
  * re-use decompression parameters from previous init; saves dictionary loading
  * Note : this prototype will be marked as deprecated and generate compilation warnings on reaching v1.5.x
  */
-ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
+ZSTDLIB_STATIC_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 
 
 /* *******************************************************************
@@ -2243,8 +2361,7 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
   ZSTD_CCtx object can be re-used multiple times within successive compression operations.
 
   Start by initializing a context.
-  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression,
-  or ZSTD_compressBegin_advanced(), for finer parameter control.
+  Use ZSTD_compressBegin(), or ZSTD_compressBegin_usingDict() for dictionary compression.
   It's also possible to duplicate a reference context which has already been initialized, using ZSTD_copyCCtx()
 
   Then, consume your input using ZSTD_compressContinue().
@@ -2267,17 +2384,19 @@ ZSTDLIB_API size_t ZSTD_resetDStream(ZSTD_DStream* zds);
 */
 
 /*=====   Buffer-less streaming compression functions  =====*/
-ZSTDLIB_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
-ZSTDLIB_API size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
-ZSTDLIB_API size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
-ZSTDLIB_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
-
-ZSTDLIB_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-
-
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin(ZSTD_CCtx* cctx, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingDict(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, int compressionLevel);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict); /*< note: fails if cdict==NULL */
+ZSTDLIB_STATIC_API size_t ZSTD_copyCCtx(ZSTD_CCtx* cctx, const ZSTD_CCtx* preparedCCtx, unsigned long long pledgedSrcSize); /*<  note: if pledgedSrcSize is not known, use ZSTD_CONTENTSIZE_UNKNOWN */
+
+ZSTDLIB_STATIC_API size_t ZSTD_compressContinue(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_compressEnd(ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+
+/* The ZSTD_compressBegin_advanced() and ZSTD_compressBegin_usingCDict_advanced() are now DEPRECATED and will generate a compiler warning */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_advanced(ZSTD_CCtx* cctx, const void* dict, size_t dictSize, ZSTD_parameters params, unsigned long long pledgedSrcSize); /*< pledgedSrcSize : If srcSize is not known at init time, use ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTD_DEPRECATED("use advanced API to access custom parameters")
+size_t ZSTD_compressBegin_usingCDict_advanced(ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict, ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize);   /* compression parameters are already set within cdict. pledgedSrcSize must be correct. If srcSize is not known, use macro ZSTD_CONTENTSIZE_UNKNOWN */
 /*
   Buffer-less streaming decompression (synchronous mode)
 
@@ -2368,24 +2487,24 @@ typedef struct {
  * @return : 0, `zfhPtr` is correctly filled,
  *          >0, `srcSize` is too small, value is wanted `srcSize` amount,
  *           or an error code, which can be tested using ZSTD_isError() */
-ZSTDLIB_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize);   /*< doesn't consume input */
 /*! ZSTD_getFrameHeader_advanced() :
  *  same as ZSTD_getFrameHeader(),
  *  with added capability to select a format (like ZSTD_f_zstd1_magicless) */
-ZSTDLIB_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
-ZSTDLIB_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
+ZSTDLIB_STATIC_API size_t ZSTD_getFrameHeader_advanced(ZSTD_frameHeader* zfhPtr, const void* src, size_t srcSize, ZSTD_format_e format);
+ZSTDLIB_STATIC_API size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize);  /*< when frame content size is not known, pass in frameContentSize == ZSTD_CONTENTSIZE_UNKNOWN */
 
-ZSTDLIB_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
-ZSTDLIB_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDict(ZSTD_DCtx* dctx, const void* dict, size_t dictSize);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBegin_usingDDict(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict);
 
-ZSTDLIB_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
-ZSTDLIB_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_nextSrcSizeToDecompress(ZSTD_DCtx* dctx);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
 /* misc */
-ZSTDLIB_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
+ZSTDLIB_STATIC_API void   ZSTD_copyDCtx(ZSTD_DCtx* dctx, const ZSTD_DCtx* preparedDCtx);
 typedef enum { ZSTDnit_frameHeader, ZSTDnit_blockHeader, ZSTDnit_block, ZSTDnit_lastBlock, ZSTDnit_checksum, ZSTDnit_skippableFrame } ZSTD_nextInputType_e;
-ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
+ZSTDLIB_STATIC_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
 
 
@@ -2422,10 +2541,10 @@ ZSTDLIB_API ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx);
 */
 
 /*=====   Raw zstd block functions  =====*/
-ZSTDLIB_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
-ZSTDLIB_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
-ZSTDLIB_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
+ZSTDLIB_STATIC_API size_t ZSTD_getBlockSize   (const ZSTD_CCtx* cctx);
+ZSTDLIB_STATIC_API size_t ZSTD_compressBlock  (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);
+ZSTDLIB_STATIC_API size_t ZSTD_insertBlock    (ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);  /*< insert uncompressed block into `dctx` history. Useful for multi-blocks decompression. */
 
 
 #endif   /* ZSTD_H_ZSTD_STATIC_LINKING_ONLY */
diff --git a/lib/zstd/common/bitstream.h b/lib/zstd/common/bitstream.h
index 28248abe8612a..feef3a1b1d600 100644
--- a/lib/zstd/common/bitstream.h
+++ b/lib/zstd/common/bitstream.h
@@ -313,7 +313,16 @@ MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getMiddleBits(size_t bitContainer, U32 c
     U32 const regMask = sizeof(bitContainer)*8 - 1;
     /* if start > regMask, bitstream is corrupted, and result is undefined */
     assert(nbBits < BIT_MASK_SIZE);
+    /* x86 transform & ((1 << nbBits) - 1) to bzhi instruction, it is better
+     * than accessing memory. When bmi2 instruction is not present, we consider
+     * such cpus old (pre-Haswell, 2013) and their performance is not of that
+     * importance.
+     */
+#if defined(__x86_64__) || defined(_M_X86)
+    return (bitContainer >> (start & regMask)) & ((((U64)1) << nbBits) - 1);
+#else
     return (bitContainer >> (start & regMask)) & BIT_mask[nbBits];
+#endif
 }
 
 MEM_STATIC FORCE_INLINE_ATTR size_t BIT_getLowerBits(size_t bitContainer, U32 const nbBits)
diff --git a/lib/zstd/common/compiler.h b/lib/zstd/common/compiler.h
index f5a9c70a228a2..c42d39faf9bd8 100644
--- a/lib/zstd/common/compiler.h
+++ b/lib/zstd/common/compiler.h
@@ -11,6 +11,8 @@
 #ifndef ZSTD_COMPILER_H
 #define ZSTD_COMPILER_H
 
+#include "portability_macros.h"
+
 /*-*******************************************************
 *  Compiler specifics
 *********************************************************/
@@ -34,7 +36,7 @@
 
 /*
   On MSVC qsort requires that functions passed into it use the __cdecl calling conversion(CC).
-  This explictly marks such functions as __cdecl so that the code will still compile
+  This explicitly marks such functions as __cdecl so that the code will still compile
   if a CC other than __cdecl has been made the default.
 */
 #define WIN_CDECL
@@ -70,25 +72,13 @@
 
 
 /* target attribute */
-#ifndef __has_attribute
-  #define __has_attribute(x) 0  /* Compatibility with non-clang compilers. */
-#endif
 #define TARGET_ATTRIBUTE(target) __attribute__((__target__(target)))
 
-/* Enable runtime BMI2 dispatch based on the CPU.
- * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+/* Target attribute for BMI2 dynamic dispatch.
+ * Enable lzcnt, bmi, and bmi2.
+ * We test for bmi1 & bmi2. lzcnt is included in bmi1.
  */
-#ifndef DYNAMIC_BMI2
-  #if ((defined(__clang__) && __has_attribute(__target__)) \
-      || (defined(__GNUC__) \
-          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
-      && (defined(__x86_64__) || defined(_M_X86)) \
-      && !defined(__BMI2__)
-  #  define DYNAMIC_BMI2 1
-  #else
-  #  define DYNAMIC_BMI2 0
-  #endif
-#endif
+#define BMI2_TARGET_ATTRIBUTE TARGET_ATTRIBUTE("lzcnt,bmi,bmi2")
 
 /* prefetch
  * can be disabled, by declaring NO_PREFETCH build macro */
@@ -115,8 +105,9 @@
 }
 
 /* vectorization
- * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax */
-#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__)
+ * older GCC (pre gcc-4.3 picked as the cutoff) uses a different syntax,
+ * and some compilers, like Intel ICC and MCST LCC, do not support it at all. */
+#if !defined(__INTEL_COMPILER) && !defined(__clang__) && defined(__GNUC__) && !defined(__LCC__)
 #  if (__GNUC__ == 4 && __GNUC_MINOR__ > 3) || (__GNUC__ >= 5)
 #    define DONT_VECTORIZE __attribute__((optimize("no-tree-vectorize")))
 #  else
@@ -134,20 +125,18 @@
 #define LIKELY(x) (__builtin_expect((x), 1))
 #define UNLIKELY(x) (__builtin_expect((x), 0))
 
+#if __has_builtin(__builtin_unreachable) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)))
+#  define ZSTD_UNREACHABLE { assert(0), __builtin_unreachable(); }
+#else
+#  define ZSTD_UNREACHABLE { assert(0); }
+#endif
+
 /* disable warnings */
 
 /*Like DYNAMIC_BMI2 but for compile time determination of BMI2 support*/
 
 
-/* compat. with non-clang compilers */
-#ifndef __has_builtin
-#  define __has_builtin(x) 0
-#endif
-
-/* compat. with non-clang compilers */
-#ifndef __has_feature
-#  define __has_feature(x) 0
-#endif
+/* compile time determination of SIMD support */
 
 /* C-language Attributes are added in C23. */
 #if defined(__STDC_VERSION__) && (__STDC_VERSION__ > 201710L) && defined(__has_c_attribute)
@@ -168,10 +157,28 @@
  */
 #define ZSTD_FALLTHROUGH fallthrough
 
-/* detects whether we are being compiled under msan */
+/*-**************************************************************
+*  Alignment check
+*****************************************************************/
+
+/* this test was initially positioned in mem.h,
+ * but this file is removed (or replaced) for linux kernel
+ * so it's now hosted in compiler.h,
+ * which remains valid for both user & kernel spaces.
+ */
+
+#ifndef ZSTD_ALIGNOF
+/* covers gcc, clang & MSVC */
+/* note : this section must come first, before C11,
+ * due to a limitation in the kernel source generator */
+#  define ZSTD_ALIGNOF(T) __alignof(T)
+
+#endif /* ZSTD_ALIGNOF */
 
+/*-**************************************************************
+*  Sanitizer
+*****************************************************************/
 
-/* detects whether we are being compiled under asan */
 
 
 #endif /* ZSTD_COMPILER_H */
diff --git a/lib/zstd/common/entropy_common.c b/lib/zstd/common/entropy_common.c
index 6353249de614c..fef67056f0524 100644
--- a/lib/zstd/common/entropy_common.c
+++ b/lib/zstd/common/entropy_common.c
@@ -212,7 +212,7 @@ static size_t FSE_readNCount_body_default(
 }
 
 #if DYNAMIC_BMI2
-TARGET_ATTRIBUTE("bmi2") static size_t FSE_readNCount_body_bmi2(
+BMI2_TARGET_ATTRIBUTE static size_t FSE_readNCount_body_bmi2(
         short* normalizedCounter, unsigned* maxSVPtr, unsigned* tableLogPtr,
         const void* headerBuffer, size_t hbSize)
 {
@@ -240,6 +240,7 @@ size_t FSE_readNCount(
     return FSE_readNCount_bmi2(normalizedCounter, maxSVPtr, tableLogPtr, headerBuffer, hbSize, /* bmi2 */ 0);
 }
 
+
 /*! HUF_readStats() :
     Read compact Huffman tree, saved by HUF_writeCTable().
     `huffWeight` is destination buffer.
@@ -293,7 +294,7 @@ HUF_readStats_body(BYTE* huffWeight, size_t hwSize, U32* rankStats,
     ZSTD_memset(rankStats, 0, (HUF_TABLELOG_MAX + 1) * sizeof(U32));
     weightTotal = 0;
     {   U32 n; for (n=0; n<oSize; n++) {
-            if (huffWeight[n] >= HUF_TABLELOG_MAX) return ERROR(corruption_detected);
+            if (huffWeight[n] > HUF_TABLELOG_MAX) return ERROR(corruption_detected);
             rankStats[huffWeight[n]]++;
             weightTotal += (1 << huffWeight[n]) >> 1;
     }   }
@@ -331,7 +332,7 @@ static size_t HUF_readStats_body_default(BYTE* huffWeight, size_t hwSize, U32* r
 }
 
 #if DYNAMIC_BMI2
-static TARGET_ATTRIBUTE("bmi2") size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
+static BMI2_TARGET_ATTRIBUTE size_t HUF_readStats_body_bmi2(BYTE* huffWeight, size_t hwSize, U32* rankStats,
                      U32* nbSymbolsPtr, U32* tableLogPtr,
                      const void* src, size_t srcSize,
                      void* workSpace, size_t wkspSize)
diff --git a/lib/zstd/common/error_private.h b/lib/zstd/common/error_private.h
index d14e686adf951..ca5101e542faa 100644
--- a/lib/zstd/common/error_private.h
+++ b/lib/zstd/common/error_private.h
@@ -18,8 +18,10 @@
 /* ****************************************
 *  Dependencies
 ******************************************/
-#include "zstd_deps.h"    /* size_t */
 #include <linux/zstd_errors.h>  /* enum list */
+#include "compiler.h"
+#include "debug.h"
+#include "zstd_deps.h"       /* size_t */
 
 
 /* ****************************************
@@ -62,5 +64,82 @@ ERR_STATIC const char* ERR_getErrorName(size_t code)
     return ERR_getErrorString(ERR_getErrorCode(code));
 }
 
+/*
+ * Ignore: this is an internal helper.
+ *
+ * This is a helper function to help force C99-correctness during compilation.
+ * Under strict compilation modes, variadic macro arguments can't be empty.
+ * However, variadic function arguments can be. Using a function therefore lets
+ * us statically check that at least one (string) argument was passed,
+ * independent of the compilation flags.
+ */
+static INLINE_KEYWORD UNUSED_ATTR
+void _force_has_format_string(const char *format, ...) {
+  (void)format;
+}
+
+/*
+ * Ignore: this is an internal helper.
+ *
+ * We want to force this function invocation to be syntactically correct, but
+ * we don't want to force runtime evaluation of its arguments.
+ */
+#define _FORCE_HAS_FORMAT_STRING(...) \
+  if (0) { \
+    _force_has_format_string(__VA_ARGS__); \
+  }
+
+#define ERR_QUOTE(str) #str
+
+/*
+ * Return the specified error if the condition evaluates to true.
+ *
+ * In debug modes, prints additional information.
+ * In order to do that (particularly, printing the conditional that failed),
+ * this can't just wrap RETURN_ERROR().
+ */
+#define RETURN_ERROR_IF(cond, err, ...) \
+  if (cond) { \
+    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
+           __FILE__, __LINE__, ERR_QUOTE(cond), ERR_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  }
+
+/*
+ * Unconditionally return the specified error.
+ *
+ * In debug modes, prints additional information.
+ */
+#define RETURN_ERROR(err, ...) \
+  do { \
+    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
+           __FILE__, __LINE__, ERR_QUOTE(ERROR(err))); \
+    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+    RAWLOG(3, ": " __VA_ARGS__); \
+    RAWLOG(3, "\n"); \
+    return ERROR(err); \
+  } while(0);
+
+/*
+ * If the provided expression evaluates to an error code, returns that error code.
+ *
+ * In debug modes, prints additional information.
+ */
+#define FORWARD_IF_ERROR(err, ...) \
+  do { \
+    size_t const err_code = (err); \
+    if (ERR_isError(err_code)) { \
+      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
+             __FILE__, __LINE__, ERR_QUOTE(err), ERR_getErrorName(err_code)); \
+      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
+      RAWLOG(3, ": " __VA_ARGS__); \
+      RAWLOG(3, "\n"); \
+      return err_code; \
+    } \
+  } while(0);
+
 
 #endif /* ERROR_H_MODULE */
diff --git a/lib/zstd/common/fse.h b/lib/zstd/common/fse.h
index 0bb174c2c3677..4507043b2287c 100644
--- a/lib/zstd/common/fse.h
+++ b/lib/zstd/common/fse.h
@@ -333,8 +333,9 @@ size_t FSE_buildCTable_rle (FSE_CTable* ct, unsigned char symbolValue);
 /* FSE_buildCTable_wksp() :
  * Same as FSE_buildCTable(), but using an externally allocated scratch buffer (`workSpace`).
  * `wkspSize` must be >= `FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog)` of `unsigned`.
+ * See FSE_buildCTable_wksp() for breakdown of workspace usage.
  */
-#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (maxSymbolValue + 2 + (1ull << (tableLog - 2)))
+#define FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog) (((maxSymbolValue + 2) + (1ull << (tableLog)))/2 + sizeof(U64)/sizeof(U32) /* additional 8 bytes for potential table overwrite */)
 #define FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) (sizeof(unsigned) * FSE_BUILD_CTABLE_WORKSPACE_SIZE_U32(maxSymbolValue, tableLog))
 size_t FSE_buildCTable_wksp(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);
 
diff --git a/lib/zstd/common/fse_decompress.c b/lib/zstd/common/fse_decompress.c
index 2c8bbe3e4c148..a0d06095be83d 100644
--- a/lib/zstd/common/fse_decompress.c
+++ b/lib/zstd/common/fse_decompress.c
@@ -365,7 +365,7 @@ static size_t FSE_decompress_wksp_body_default(void* dst, size_t dstCapacity, co
 }
 
 #if DYNAMIC_BMI2
-TARGET_ATTRIBUTE("bmi2") static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
+BMI2_TARGET_ATTRIBUTE static size_t FSE_decompress_wksp_body_bmi2(void* dst, size_t dstCapacity, const void* cSrc, size_t cSrcSize, unsigned maxLog, void* workSpace, size_t wkspSize)
 {
     return FSE_decompress_wksp_body(dst, dstCapacity, cSrc, cSrcSize, maxLog, workSpace, wkspSize, 1);
 }
diff --git a/lib/zstd/common/huf.h b/lib/zstd/common/huf.h
index 88c5586646aa5..5042ff8703087 100644
--- a/lib/zstd/common/huf.h
+++ b/lib/zstd/common/huf.h
@@ -86,9 +86,9 @@ HUF_PUBLIC_API size_t HUF_compress2 (void* dst, size_t dstCapacity,
 
 /* HUF_compress4X_wksp() :
  *  Same as HUF_compress2(), but uses externally allocated `workSpace`.
- * `workspace` must have minimum alignment of 4, and be at least as large as HUF_WORKSPACE_SIZE */
-#define HUF_WORKSPACE_SIZE ((6 << 10) + 256)
-#define HUF_WORKSPACE_SIZE_U32 (HUF_WORKSPACE_SIZE / sizeof(U32))
+ * `workspace` must be at least as large as HUF_WORKSPACE_SIZE */
+#define HUF_WORKSPACE_SIZE ((8 << 10) + 512 /* sorting scratch space */)
+#define HUF_WORKSPACE_SIZE_U64 (HUF_WORKSPACE_SIZE / sizeof(U64))
 HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
                                      const void* src, size_t srcSize,
                                      unsigned maxSymbolValue, unsigned tableLog,
@@ -113,11 +113,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 
 
 /* *** Constants *** */
-#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_ABSOLUTEMAX_TABLELOG */
+#define HUF_TABLELOG_MAX      12      /* max runtime value of tableLog (due to static allocation); can be modified up to HUF_TABLELOG_ABSOLUTEMAX */
 #define HUF_TABLELOG_DEFAULT  11      /* default tableLog value when none specified */
 #define HUF_SYMBOLVALUE_MAX  255
 
-#define HUF_TABLELOG_ABSOLUTEMAX  15  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
+#define HUF_TABLELOG_ABSOLUTEMAX  12  /* absolute limit of HUF_MAX_TABLELOG. Beyond that value, code does not work */
 #if (HUF_TABLELOG_MAX > HUF_TABLELOG_ABSOLUTEMAX)
 #  error "HUF_TABLELOG_MAX is too large !"
 #endif
@@ -133,15 +133,11 @@ HUF_PUBLIC_API size_t HUF_compress4X_wksp (void* dst, size_t dstCapacity,
 
 /* static allocation of HUF's Compression Table */
 /* this is a private definition, just exposed for allocation and strict aliasing purpose. never EVER access its members directly */
-struct HUF_CElt_s {
-  U16  val;
-  BYTE nbBits;
-};   /* typedef'd to HUF_CElt */
-typedef struct HUF_CElt_s HUF_CElt;   /* consider it an incomplete type */
-#define HUF_CTABLE_SIZE_U32(maxSymbolValue)   ((maxSymbolValue)+1)   /* Use tables of U32, for proper alignment */
-#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_U32(maxSymbolValue) * sizeof(U32))
+typedef size_t HUF_CElt;   /* consider it an incomplete type */
+#define HUF_CTABLE_SIZE_ST(maxSymbolValue)   ((maxSymbolValue)+2)   /* Use tables of size_t, for proper alignment */
+#define HUF_CTABLE_SIZE(maxSymbolValue)       (HUF_CTABLE_SIZE_ST(maxSymbolValue) * sizeof(size_t))
 #define HUF_CREATE_STATIC_CTABLE(name, maxSymbolValue) \
-    HUF_CElt name[HUF_CTABLE_SIZE_U32(maxSymbolValue)] /* no final ; */
+    HUF_CElt name[HUF_CTABLE_SIZE_ST(maxSymbolValue)] /* no final ; */
 
 /* static allocation of HUF's DTable */
 typedef U32 HUF_DTable;
@@ -191,6 +187,7 @@ size_t HUF_buildCTable (HUF_CElt* CTable, const unsigned* count, unsigned maxSym
 size_t HUF_writeCTable (void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog);
 size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize, const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog, void* workspace, size_t workspaceSize);
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue);
 
@@ -203,12 +200,13 @@ typedef enum {
  *  Same as HUF_compress4X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
  *  If it uses hufTable it does not modify hufTable or repeat.
  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
- *  If preferRepeat then the old table will always be used if valid. */
+ *  If preferRepeat then the old table will always be used if valid.
+ *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
 size_t HUF_compress4X_repeat(void* dst, size_t dstSize,
                        const void* src, size_t srcSize,
                        unsigned maxSymbolValue, unsigned tableLog,
                        void* workSpace, size_t wkspSize,    /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
 
 /* HUF_buildCTable_wksp() :
  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
@@ -246,11 +244,10 @@ size_t HUF_readStats_wksp(BYTE* huffWeight, size_t hwSize,
  *  Loading a CTable saved with HUF_writeCTable() */
 size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void* src, size_t srcSize, unsigned *hasZeroWeights);
 
-/* HUF_getNbBits() :
+/* HUF_getNbBitsFromCTable() :
  *  Read nbBits from CTable symbolTable, for symbol `symbolValue` presumed <= HUF_SYMBOLVALUE_MAX
- *  Note 1 : is not inlined, as HUF_CElt definition is private
- *  Note 2 : const void* used, so that it can provide a statically allocated table as argument (which uses type U32) */
-U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue);
+ *  Note 1 : is not inlined, as HUF_CElt definition is private */
+U32 HUF_getNbBitsFromCTable(const HUF_CElt* symbolTable, U32 symbolValue);
 
 /*
  * HUF_decompress() does the following:
@@ -302,18 +299,20 @@ size_t HUF_decompress4X2_usingDTable(void* dst, size_t maxDstSize, const void* c
 /* ====================== */
 
 size_t HUF_compress1X (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
-size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U32 unsigned */
+size_t HUF_compress1X_wksp (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog, void* workSpace, size_t wkspSize);  /*< `workSpace` must be a table of at least HUF_WORKSPACE_SIZE_U64 U64 */
 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable);
+size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2);
 /* HUF_compress1X_repeat() :
  *  Same as HUF_compress1X_wksp(), but considers using hufTable if *repeat != HUF_repeat_none.
  *  If it uses hufTable it does not modify hufTable or repeat.
  *  If it doesn't, it sets *repeat = HUF_repeat_none, and it sets hufTable to the table used.
- *  If preferRepeat then the old table will always be used if valid. */
+ *  If preferRepeat then the old table will always be used if valid.
+ *  If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
 size_t HUF_compress1X_repeat(void* dst, size_t dstSize,
                        const void* src, size_t srcSize,
                        unsigned maxSymbolValue, unsigned tableLog,
                        void* workSpace, size_t wkspSize,   /*< `workSpace` must be aligned on 4-bytes boundaries, `wkspSize` must be >= HUF_WORKSPACE_SIZE */
-                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2);
+                       HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible);
 
 size_t HUF_decompress1X1 (void* dst, size_t dstSize, const void* cSrc, size_t cSrcSize);   /* single-symbol decoder */
 #ifndef HUF_FORCE_DECOMPRESS_X1
@@ -351,6 +350,9 @@ size_t HUF_decompress4X_hufOnly_wksp_bmi2(HUF_DTable* dctx, void* dst, size_t ds
 #ifndef HUF_FORCE_DECOMPRESS_X2
 size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
 #endif
+#ifndef HUF_FORCE_DECOMPRESS_X1
+size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t srcSize, void* workSpace, size_t wkspSize, int bmi2);
+#endif
 
 #endif /* HUF_STATIC_LINKING_ONLY */
 
diff --git a/lib/zstd/common/mem.h b/lib/zstd/common/mem.h
index dcdd586a9fd91..1d9cc03924ca9 100644
--- a/lib/zstd/common/mem.h
+++ b/lib/zstd/common/mem.h
@@ -30,6 +30,8 @@
 *  Basic Types
 *****************************************************************/
 typedef uint8_t  BYTE;
+typedef uint8_t  U8;
+typedef int8_t   S8;
 typedef uint16_t U16;
 typedef int16_t  S16;
 typedef uint32_t U32;
diff --git a/lib/zstd/common/portability_macros.h b/lib/zstd/common/portability_macros.h
new file mode 100644
index 0000000000000..0e3b2c0a527db
--- /dev/null
+++ b/lib/zstd/common/portability_macros.h
@@ -0,0 +1,93 @@
+/*
+ * Copyright (c) Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_PORTABILITY_MACROS_H
+#define ZSTD_PORTABILITY_MACROS_H
+
+/*
+ * This header file contains macro defintions to support portability.
+ * This header is shared between C and ASM code, so it MUST only
+ * contain macro definitions. It MUST not contain any C code.
+ *
+ * This header ONLY defines macros to detect platforms/feature support.
+ *
+ */
+
+
+/* compat. with non-clang compilers */
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_builtin
+#  define __has_builtin(x) 0
+#endif
+
+/* compat. with non-clang compilers */
+#ifndef __has_feature
+#  define __has_feature(x) 0
+#endif
+
+/* detects whether we are being compiled under msan */
+
+/* detects whether we are being compiled under asan */
+
+/* detects whether we are being compiled under dfsan */
+
+/* Mark the internal assembly functions as hidden  */
+#ifdef __ELF__
+# define ZSTD_HIDE_ASM_FUNCTION(func) .hidden func
+#else
+# define ZSTD_HIDE_ASM_FUNCTION(func)
+#endif
+
+/* Enable runtime BMI2 dispatch based on the CPU.
+ * Enabled for clang & gcc >=4.8 on x86 when BMI2 isn't enabled by default.
+ */
+#ifndef DYNAMIC_BMI2
+  #if ((defined(__clang__) && __has_attribute(__target__)) \
+      || (defined(__GNUC__) \
+          && (__GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))) \
+      && (defined(__x86_64__) || defined(_M_X64)) \
+      && !defined(__BMI2__)
+  #  define DYNAMIC_BMI2 1
+  #else
+  #  define DYNAMIC_BMI2 0
+  #endif
+#endif
+
+/*
+ * Only enable assembly for GNUC comptabile compilers,
+ * because other platforms may not support GAS assembly syntax.
+ *
+ * Only enable assembly for Linux / MacOS, other platforms may
+ * work, but they haven't been tested. This could likely be
+ * extended to BSD systems.
+ *
+ * Disable assembly when MSAN is enabled, because MSAN requires
+ * 100% of code to be instrumented to work.
+ */
+#define ZSTD_ASM_SUPPORTED 1
+
+/*
+ * Determines whether we should enable assembly for x86-64
+ * with BMI2.
+ *
+ * Enable if all of the following conditions hold:
+ * - ASM hasn't been explicitly disabled by defining ZSTD_DISABLE_ASM
+ * - Assembly is supported
+ * - We are compiling for x86-64 and either:
+ *   - DYNAMIC_BMI2 is enabled
+ *   - BMI2 is supported at compile time
+ */
+#define ZSTD_ENABLE_ASM_X86_64_BMI2 0
+
+#endif /* ZSTD_PORTABILITY_MACROS_H */
diff --git a/lib/zstd/common/zstd_internal.h b/lib/zstd/common/zstd_internal.h
index fc6f3a9b40c07..93305d9b41bba 100644
--- a/lib/zstd/common/zstd_internal.h
+++ b/lib/zstd/common/zstd_internal.h
@@ -20,6 +20,7 @@
 *  Dependencies
 ***************************************/
 #include "compiler.h"
+#include "cpu.h"
 #include "mem.h"
 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
 #include "error_private.h"
@@ -47,81 +48,7 @@
 #undef MAX
 #define MIN(a,b) ((a)<(b) ? (a) : (b))
 #define MAX(a,b) ((a)>(b) ? (a) : (b))
-
-/*
- * Ignore: this is an internal helper.
- *
- * This is a helper function to help force C99-correctness during compilation.
- * Under strict compilation modes, variadic macro arguments can't be empty.
- * However, variadic function arguments can be. Using a function therefore lets
- * us statically check that at least one (string) argument was passed,
- * independent of the compilation flags.
- */
-static INLINE_KEYWORD UNUSED_ATTR
-void _force_has_format_string(const char *format, ...) {
-  (void)format;
-}
-
-/*
- * Ignore: this is an internal helper.
- *
- * We want to force this function invocation to be syntactically correct, but
- * we don't want to force runtime evaluation of its arguments.
- */
-#define _FORCE_HAS_FORMAT_STRING(...) \
-  if (0) { \
-    _force_has_format_string(__VA_ARGS__); \
-  }
-
-/*
- * Return the specified error if the condition evaluates to true.
- *
- * In debug modes, prints additional information.
- * In order to do that (particularly, printing the conditional that failed),
- * this can't just wrap RETURN_ERROR().
- */
-#define RETURN_ERROR_IF(cond, err, ...) \
-  if (cond) { \
-    RAWLOG(3, "%s:%d: ERROR!: check %s failed, returning %s", \
-           __FILE__, __LINE__, ZSTD_QUOTE(cond), ZSTD_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  }
-
-/*
- * Unconditionally return the specified error.
- *
- * In debug modes, prints additional information.
- */
-#define RETURN_ERROR(err, ...) \
-  do { \
-    RAWLOG(3, "%s:%d: ERROR!: unconditional check failed, returning %s", \
-           __FILE__, __LINE__, ZSTD_QUOTE(ERROR(err))); \
-    _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-    RAWLOG(3, ": " __VA_ARGS__); \
-    RAWLOG(3, "\n"); \
-    return ERROR(err); \
-  } while(0);
-
-/*
- * If the provided expression evaluates to an error code, returns that error code.
- *
- * In debug modes, prints additional information.
- */
-#define FORWARD_IF_ERROR(err, ...) \
-  do { \
-    size_t const err_code = (err); \
-    if (ERR_isError(err_code)) { \
-      RAWLOG(3, "%s:%d: ERROR!: forwarding error in %s: %s", \
-             __FILE__, __LINE__, ZSTD_QUOTE(err), ERR_getErrorName(err_code)); \
-      _FORCE_HAS_FORMAT_STRING(__VA_ARGS__); \
-      RAWLOG(3, ": " __VA_ARGS__); \
-      RAWLOG(3, "\n"); \
-      return err_code; \
-    } \
-  } while(0);
+#define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
 
 
 /*-*************************************
@@ -130,7 +57,6 @@ void _force_has_format_string(const char *format, ...) {
 #define ZSTD_OPT_NUM    (1<<12)
 
 #define ZSTD_REP_NUM      3                 /* number of repcodes */
-#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
 
 #define KB *(1 <<10)
@@ -182,7 +108,7 @@ typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingTy
 /* Each table cannot take more than #symbols * FSELog bits */
 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
 
-static UNUSED_ATTR const U32 LL_bits[MaxLL+1] = {
+static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      1, 1, 1, 1, 2, 2, 3, 3,
@@ -199,7 +125,7 @@ static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
 static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
 
-static UNUSED_ATTR const U32 ML_bits[MaxML+1] = {
+static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
      0, 0, 0, 0, 0, 0, 0, 0,
@@ -234,12 +160,31 @@ static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
 *  Shared functions to include for inlining
 *********************************************/
 static void ZSTD_copy8(void* dst, const void* src) {
+#if defined(ZSTD_ARCH_ARM_NEON)
+    vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
+#else
     ZSTD_memcpy(dst, src, 8);
+#endif
 }
-
 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
+
+/* Need to use memmove here since the literal buffer can now be located within
+   the dst buffer. In circumstances where the op "catches up" to where the
+   literal buffer is, there can be partial overlaps in this call on the final
+   copy if the literal is being shifted by less than 16 bytes. */
 static void ZSTD_copy16(void* dst, const void* src) {
-    ZSTD_memcpy(dst, src, 16);
+#if defined(ZSTD_ARCH_ARM_NEON)
+    vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
+#elif defined(ZSTD_ARCH_X86_SSE2)
+    _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
+#elif defined(__clang__)
+    ZSTD_memmove(dst, src, 16);
+#else
+    /* ZSTD_memmove is not inlined properly by gcc */
+    BYTE copy16_buf[16];
+    ZSTD_memcpy(copy16_buf, src, 16);
+    ZSTD_memcpy(dst, copy16_buf, 16);
+#endif
 }
 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
 
@@ -267,8 +212,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
     BYTE* op = (BYTE*)dst;
     BYTE* const oend = op + length;
 
-    assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
-
     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
         /* Handle short offset copies. */
         do {
@@ -331,11 +274,18 @@ typedef enum {
 *  Private declarations
 *********************************************/
 typedef struct seqDef_s {
-    U32 offset;         /* Offset code of the sequence */
+    U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
     U16 litLength;
-    U16 matchLength;
+    U16 mlBase;    /* mlBase == matchLength - MINMATCH */
 } seqDef;
 
+/* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
+typedef enum {
+    ZSTD_llt_none = 0,             /* no longLengthType */
+    ZSTD_llt_literalLength = 1,    /* represents a long literal */
+    ZSTD_llt_matchLength = 2       /* represents a long match */
+} ZSTD_longLengthType_e;
+
 typedef struct {
     seqDef* sequencesStart;
     seqDef* sequences;      /* ptr to end of sequences */
@@ -347,12 +297,12 @@ typedef struct {
     size_t maxNbSeq;
     size_t maxNbLit;
 
-    /* longLengthPos and longLengthID to allow us to represent either a single litLength or matchLength
+    /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
      * the existing value of the litLength or matchLength by 0x10000.
      */
-    U32   longLengthID;   /* 0 == no longLength; 1 == Represent the long literal; 2 == Represent the long match; */
-    U32   longLengthPos;  /* Index of the sequence to apply long length modification to */
+    ZSTD_longLengthType_e   longLengthType;
+    U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
 } seqStore_t;
 
 typedef struct {
@@ -362,18 +312,18 @@ typedef struct {
 
 /*
  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
- * indicated by longLengthPos and longLengthID, and adds MINMATCH back to matchLength.
+ * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
  */
 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
 {
     ZSTD_sequenceLength seqLen;
     seqLen.litLength = seq->litLength;
-    seqLen.matchLength = seq->matchLength + MINMATCH;
+    seqLen.matchLength = seq->mlBase + MINMATCH;
     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
-        if (seqStore->longLengthID == 1) {
+        if (seqStore->longLengthType == ZSTD_llt_literalLength) {
             seqLen.litLength += 0xFFFF;
         }
-        if (seqStore->longLengthID == 2) {
+        if (seqStore->longLengthType == ZSTD_llt_matchLength) {
             seqLen.matchLength += 0xFFFF;
         }
     }
@@ -419,6 +369,41 @@ MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus
     }
 }
 
+/*
+ * Counts the number of trailing zeros of a `size_t`.
+ * Most compilers should support CTZ as a builtin. A backup
+ * implementation is provided if the builtin isn't supported, but
+ * it may not be terribly efficient.
+ */
+MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
+{
+    if (MEM_64bits()) {
+#       if (__GNUC__ >= 4)
+            return __builtin_ctzll((U64)val);
+#       else
+            static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
+                                                      4, 25, 14, 28,  9, 34, 20, 56,
+                                                      5, 17, 26, 54, 15, 41, 29, 43,
+                                                      10, 31, 38, 35, 21, 45, 49, 57,
+                                                      63,  6, 12, 18, 24, 27, 33, 55,
+                                                      16, 53, 40, 42, 30, 37, 44, 48,
+                                                      62, 11, 23, 32, 52, 39, 36, 47,
+                                                      61, 22, 51, 46, 60, 50, 59, 58 };
+            return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#       endif
+    } else { /* 32 bits */
+#       if (__GNUC__ >= 3)
+            return __builtin_ctz((U32)val);
+#       else
+            static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
+                                                     30, 22, 20, 15, 25, 17,  4,  8,
+                                                     31, 27, 13, 23, 21, 19, 16,  7,
+                                                     26, 12, 18,  6, 11,  5, 10,  9 };
+            return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#       endif
+    }
+}
+
 
 /* ZSTD_invalidateRepCodes() :
  * ensures next compression will not use repcodes from previous block.
@@ -445,6 +430,14 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                        const void* src, size_t srcSize);
 
+/*
+ * @returns true iff the CPU supports dynamic BMI2 dispatch.
+ */
+MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
+{
+    ZSTD_cpuid_t cpuid = ZSTD_cpuid();
+    return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
+}
 
 
 #endif   /* ZSTD_CCOMMON_H_MODULE */
diff --git a/lib/zstd/compress/clevels.h b/lib/zstd/compress/clevels.h
new file mode 100644
index 0000000000000..d9a76112ec3af
--- /dev/null
+++ b/lib/zstd/compress/clevels.h
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) Yann Collet, Facebook, Inc.
+ * All rights reserved.
+ *
+ * This source code is licensed under both the BSD-style license (found in the
+ * LICENSE file in the root directory of this source tree) and the GPLv2 (found
+ * in the COPYING file in the root directory of this source tree).
+ * You may select, at your option, one of the above-listed licenses.
+ */
+
+#ifndef ZSTD_CLEVELS_H
+#define ZSTD_CLEVELS_H
+
+#define ZSTD_STATIC_LINKING_ONLY  /* ZSTD_compressionParameters  */
+#include <linux/zstd.h>
+
+/*-=====  Pre-defined compression levels  =====-*/
+
+#define ZSTD_MAX_CLEVEL     22
+
+__attribute__((__unused__))
+
+static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
+{   /* "default" - for any srcSize > 256 KB */
+    /* W,  C,  H,  S,  L, TL, strat */
+    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
+    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
+    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
+    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
+    { 21, 18, 19,  3,  5,  2, ZSTD_greedy  },  /* level  5 */
+    { 21, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6 */
+    { 21, 19, 20,  4,  5,  8, ZSTD_lazy    },  /* level  7 */
+    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  8 */
+    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
+    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 10 */
+    { 22, 21, 22,  6,  5, 16, ZSTD_lazy2   },  /* level 11 */
+    { 22, 22, 23,  6,  5, 32, ZSTD_lazy2   },  /* level 12 */
+    { 22, 22, 22,  4,  5, 32, ZSTD_btlazy2 },  /* level 13 */
+    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
+    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
+    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
+    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
+    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
+    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
+    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
+    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
+    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
+},
+{   /* for srcSize <= 256 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
+    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 18, 16, 17,  3,  5,  2, ZSTD_greedy  },  /* level  4.*/
+    { 18, 17, 18,  5,  5,  2, ZSTD_greedy  },  /* level  5.*/
+    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
+    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
+    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
+    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
+    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
+    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
+    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 128 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
+    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
+    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
+    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
+    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
+    { 17, 16, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
+    { 17, 16, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 17, 16, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
+    { 17, 16, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
+    { 17, 16, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
+    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
+    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
+    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
+    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
+    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
+    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
+    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
+    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
+    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+{   /* for srcSize <= 16 KB */
+    /* W,  C,  H,  S,  L,  T, strat */
+    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
+    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
+    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
+    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
+    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
+    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
+    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
+    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
+    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
+    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
+    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
+    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
+    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
+    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
+    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
+    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
+    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
+    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
+    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
+    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
+    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
+    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
+},
+};
+
+
+
+#endif  /* ZSTD_CLEVELS_H */
diff --git a/lib/zstd/compress/fse_compress.c b/lib/zstd/compress/fse_compress.c
index 436985b620e51..ec5b1ca6d71af 100644
--- a/lib/zstd/compress/fse_compress.c
+++ b/lib/zstd/compress/fse_compress.c
@@ -75,13 +75,14 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
     void* const FSCT = ((U32*)ptr) + 1 /* header */ + (tableLog ? tableSize>>1 : 1) ;
     FSE_symbolCompressionTransform* const symbolTT = (FSE_symbolCompressionTransform*) (FSCT);
     U32 const step = FSE_TABLESTEP(tableSize);
+    U32 const maxSV1 = maxSymbolValue+1;
 
-    U32* cumul = (U32*)workSpace;
-    FSE_FUNCTION_TYPE* tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSymbolValue + 2));
+    U16* cumul = (U16*)workSpace;   /* size = maxSV1 */
+    FSE_FUNCTION_TYPE* const tableSymbol = (FSE_FUNCTION_TYPE*)(cumul + (maxSV1+1));  /* size = tableSize */
 
     U32 highThreshold = tableSize-1;
 
-    if ((size_t)workSpace & 3) return ERROR(GENERIC); /* Must be 4 byte aligned */
+    assert(((size_t)workSpace & 1) == 0);  /* Must be 2 bytes-aligned */
     if (FSE_BUILD_CTABLE_WORKSPACE_SIZE(maxSymbolValue, tableLog) > wkspSize) return ERROR(tableLog_tooLarge);
     /* CTable header */
     tableU16[-2] = (U16) tableLog;
@@ -98,20 +99,61 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
     /* symbol start positions */
     {   U32 u;
         cumul[0] = 0;
-        for (u=1; u <= maxSymbolValue+1; u++) {
+        for (u=1; u <= maxSV1; u++) {
             if (normalizedCounter[u-1]==-1) {  /* Low proba symbol */
                 cumul[u] = cumul[u-1] + 1;
                 tableSymbol[highThreshold--] = (FSE_FUNCTION_TYPE)(u-1);
             } else {
-                cumul[u] = cumul[u-1] + normalizedCounter[u-1];
+                assert(normalizedCounter[u-1] >= 0);
+                cumul[u] = cumul[u-1] + (U16)normalizedCounter[u-1];
+                assert(cumul[u] >= cumul[u-1]);  /* no overflow */
         }   }
-        cumul[maxSymbolValue+1] = tableSize+1;
+        cumul[maxSV1] = (U16)(tableSize+1);
     }
 
     /* Spread symbols */
-    {   U32 position = 0;
+    if (highThreshold == tableSize - 1) {
+        /* Case for no low prob count symbols. Lay down 8 bytes at a time
+         * to reduce branch misses since we are operating on a small block
+         */
+        BYTE* const spread = tableSymbol + tableSize; /* size = tableSize + 8 (may write beyond tableSize) */
+        {   U64 const add = 0x0101010101010101ull;
+            size_t pos = 0;
+            U64 sv = 0;
+            U32 s;
+            for (s=0; s<maxSV1; ++s, sv += add) {
+                int i;
+                int const n = normalizedCounter[s];
+                MEM_write64(spread + pos, sv);
+                for (i = 8; i < n; i += 8) {
+                    MEM_write64(spread + pos + i, sv);
+                }
+                assert(n>=0);
+                pos += (size_t)n;
+            }
+        }
+        /* Spread symbols across the table. Lack of lowprob symbols means that
+         * we don't need variable sized inner loop, so we can unroll the loop and
+         * reduce branch misses.
+         */
+        {   size_t position = 0;
+            size_t s;
+            size_t const unroll = 2; /* Experimentally determined optimal unroll */
+            assert(tableSize % unroll == 0); /* FSE_MIN_TABLELOG is 5 */
+            for (s = 0; s < (size_t)tableSize; s += unroll) {
+                size_t u;
+                for (u = 0; u < unroll; ++u) {
+                    size_t const uPosition = (position + (u * step)) & tableMask;
+                    tableSymbol[uPosition] = spread[s + u];
+                }
+                position = (position + (unroll * step)) & tableMask;
+            }
+            assert(position == 0);   /* Must have initialized all positions */
+        }
+    } else {
+        U32 position = 0;
         U32 symbol;
-        for (symbol=0; symbol<=maxSymbolValue; symbol++) {
+        for (symbol=0; symbol<maxSV1; symbol++) {
             int nbOccurrences;
             int const freq = normalizedCounter[symbol];
             for (nbOccurrences=0; nbOccurrences<freq; nbOccurrences++) {
@@ -120,7 +162,6 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
                 while (position > highThreshold)
                     position = (position + step) & tableMask;   /* Low proba area */
         }   }
-
         assert(position==0);  /* Must have initialized all positions */
     }
 
@@ -144,16 +185,17 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
             case -1:
             case  1:
                 symbolTT[s].deltaNbBits = (tableLog << 16) - (1<<tableLog);
-                symbolTT[s].deltaFindState = total - 1;
+                assert(total <= INT_MAX);
+                symbolTT[s].deltaFindState = (int)(total - 1);
                 total ++;
                 break;
             default :
-                {
-                    U32 const maxBitsOut = tableLog - BIT_highbit32 (normalizedCounter[s]-1);
-                    U32 const minStatePlus = normalizedCounter[s] << maxBitsOut;
+                assert(normalizedCounter[s] > 1);
+                {   U32 const maxBitsOut = tableLog - BIT_highbit32 ((U32)normalizedCounter[s]-1);
+                    U32 const minStatePlus = (U32)normalizedCounter[s] << maxBitsOut;
                     symbolTT[s].deltaNbBits = (maxBitsOut << 16) - minStatePlus;
-                    symbolTT[s].deltaFindState = total - normalizedCounter[s];
-                    total +=  normalizedCounter[s];
+                    symbolTT[s].deltaFindState = (int)(total - (unsigned)normalizedCounter[s]);
+                    total +=  (unsigned)normalizedCounter[s];
     }   }   }   }
 
 #if 0  /* debug : symbol costs */
@@ -164,8 +206,7 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
                 symbol, normalizedCounter[symbol],
                 FSE_getMaxNbBits(symbolTT, symbol),
                 (double)FSE_bitCost(symbolTT, tableLog, symbol, 8) / 256);
-        }
-    }
+    }   }
 #endif
 
     return 0;
@@ -173,16 +214,18 @@ size_t FSE_buildCTable_wksp(FSE_CTable* ct,
 
 
-
 #ifndef FSE_COMMONDEFS_ONLY
 
-
 /*-**************************************************************
 *  FSE NCount encoding
 ****************************************************************/
 size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog)
 {
-    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog) >> 3) + 3;
+    size_t const maxHeaderSize = (((maxSymbolValue+1) * tableLog
+                                   + 4 /* bitCount initialized at 4 */
+                                   + 2 /* first two symbols may use one additional bit each */) / 8)
+                                    + 1 /* round up to whole nb bytes */
+                                    + 2 /* additional two bytes for bitstream flush */;
     return maxSymbolValue ? maxHeaderSize : FSE_NCOUNTBOUND;  /* maxSymbolValue==0 ? use default */
 }
 
diff --git a/lib/zstd/compress/huf_compress.c b/lib/zstd/compress/huf_compress.c
index f76a526bfa54b..74ef0db476210 100644
--- a/lib/zstd/compress/huf_compress.c
+++ b/lib/zstd/compress/huf_compress.c
@@ -50,6 +50,28 @@ unsigned HUF_optimalTableLog(unsigned maxTableLog, size_t srcSize, unsigned maxS
 /* *******************************************************
 *  HUF : Huffman block compression
 *********************************************************/
+#define HUF_WORKSPACE_MAX_ALIGNMENT 8
+
+static void* HUF_alignUpWorkspace(void* workspace, size_t* workspaceSizePtr, size_t align)
+{
+    size_t const mask = align - 1;
+    size_t const rem = (size_t)workspace & mask;
+    size_t const add = (align - rem) & mask;
+    BYTE* const aligned = (BYTE*)workspace + add;
+    assert((align & (align - 1)) == 0); /* pow 2 */
+    assert(align <= HUF_WORKSPACE_MAX_ALIGNMENT);
+    if (*workspaceSizePtr >= add) {
+        assert(add < align);
+        assert(((size_t)aligned & mask) == 0);
+        *workspaceSizePtr -= add;
+        return aligned;
+    } else {
+        *workspaceSizePtr = 0;
+        return NULL;
+    }
+}
+
+
 /* HUF_compressWeights() :
  * Same as FSE_compress(), but dedicated to huff0's weights compression.
  * The use case needs much less stack memory.
@@ -72,7 +94,7 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
 
     unsigned maxSymbolValue = HUF_TABLELOG_MAX;
     U32 tableLog = MAX_FSE_TABLELOG_FOR_HUFF_HEADER;
-    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)workspace;
+    HUF_CompressWeightsWksp* wksp = (HUF_CompressWeightsWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
 
     if (workspaceSize < sizeof(HUF_CompressWeightsWksp)) return ERROR(GENERIC);
 
@@ -103,6 +125,40 @@ static size_t HUF_compressWeights(void* dst, size_t dstSize, const void* weightT
     return (size_t)(op-ostart);
 }
 
+static size_t HUF_getNbBits(HUF_CElt elt)
+{
+    return elt & 0xFF;
+}
+
+static size_t HUF_getNbBitsFast(HUF_CElt elt)
+{
+    return elt;
+}
+
+static size_t HUF_getValue(HUF_CElt elt)
+{
+    return elt & ~0xFF;
+}
+
+static size_t HUF_getValueFast(HUF_CElt elt)
+{
+    return elt;
+}
+
+static void HUF_setNbBits(HUF_CElt* elt, size_t nbBits)
+{
+    assert(nbBits <= HUF_TABLELOG_ABSOLUTEMAX);
+    *elt = nbBits;
+}
+
+static void HUF_setValue(HUF_CElt* elt, size_t value)
+{
+    size_t const nbBits = HUF_getNbBits(*elt);
+    if (nbBits > 0) {
+        assert((value >> nbBits) == 0);
+        *elt |= value << (sizeof(HUF_CElt) * 8 - nbBits);
+    }
+}
 
 typedef struct {
     HUF_CompressWeightsWksp wksp;
@@ -114,9 +170,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
                             const HUF_CElt* CTable, unsigned maxSymbolValue, unsigned huffLog,
                             void* workspace, size_t workspaceSize)
 {
+    HUF_CElt const* const ct = CTable + 1;
     BYTE* op = (BYTE*)dst;
     U32 n;
-    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)workspace;
+    HUF_WriteCTableWksp* wksp = (HUF_WriteCTableWksp*)HUF_alignUpWorkspace(workspace, &workspaceSize, ZSTD_ALIGNOF(U32));
 
     /* check conditions */
     if (workspaceSize < sizeof(HUF_WriteCTableWksp)) return ERROR(GENERIC);
@@ -127,9 +184,10 @@ size_t HUF_writeCTable_wksp(void* dst, size_t maxDstSize,
     for (n=1; n<huffLog+1; n++)
         wksp->bitsToWeight[n] = (BYTE)(huffLog + 1 - n);
     for (n=0; n<maxSymbolValue; n++)
-        wksp->huffWeight[n] = wksp->bitsToWeight[CTable[n].nbBits];
+        wksp->huffWeight[n] = wksp->bitsToWeight[HUF_getNbBits(ct[n])];
 
     /* attempt weights compression by FSE */
+    if (maxDstSize < 1) return ERROR(dstSize_tooSmall);
     {   CHECK_V_F(hSize, HUF_compressWeights(op+1, maxDstSize-1, wksp->huffWeight, maxSymbolValue, &wksp->wksp, sizeof(wksp->wksp)) );
         if ((hSize>1) & (hSize < maxSymbolValue/2)) {   /* FSE compressed */
             op[0] = (BYTE)hSize;
@@ -163,6 +221,7 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
     U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];   /* large enough for values from 0 to 16 */
     U32 tableLog = 0;
     U32 nbSymbols = 0;
+    HUF_CElt* const ct = CTable + 1;
 
     /* get symbol weights */
     CHECK_V_F(readSize, HUF_readStats(huffWeight, HUF_SYMBOLVALUE_MAX+1, rankVal, &nbSymbols, &tableLog, src, srcSize));
@@ -172,6 +231,8 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
     if (tableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     if (nbSymbols > *maxSymbolValuePtr+1) return ERROR(maxSymbolValue_tooSmall);
 
+    CTable[0] = tableLog;
+
     /* Prepare base value per rank */
     {   U32 n, nextRankStart = 0;
         for (n=1; n<=tableLog; n++) {
@@ -183,13 +244,13 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
     /* fill nbBits */
     {   U32 n; for (n=0; n<nbSymbols; n++) {
             const U32 w = huffWeight[n];
-            CTable[n].nbBits = (BYTE)(tableLog + 1 - w) & -(w != 0);
+            HUF_setNbBits(ct + n, (BYTE)(tableLog + 1 - w) & -(w != 0));
     }   }
 
     /* fill val */
     {   U16 nbPerRank[HUF_TABLELOG_MAX+2]  = {0};  /* support w=0=>n=tableLog+1 */
         U16 valPerRank[HUF_TABLELOG_MAX+2] = {0};
-        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[CTable[n].nbBits]++; }
+        { U32 n; for (n=0; n<nbSymbols; n++) nbPerRank[HUF_getNbBits(ct[n])]++; }
         /* determine stating value per rank */
         valPerRank[tableLog+1] = 0;   /* for w==0 */
         {   U16 min = 0;
@@ -199,18 +260,18 @@ size_t HUF_readCTable (HUF_CElt* CTable, unsigned* maxSymbolValuePtr, const void
                 min >>= 1;
         }   }
         /* assign value within rank, symbol order */
-        { U32 n; for (n=0; n<nbSymbols; n++) CTable[n].val = valPerRank[CTable[n].nbBits]++; }
+        { U32 n; for (n=0; n<nbSymbols; n++) HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++); }
     }
 
     *maxSymbolValuePtr = nbSymbols - 1;
     return readSize;
 }
 
-U32 HUF_getNbBits(const void* symbolTable, U32 symbolValue)
+U32 HUF_getNbBitsFromCTable(HUF_CElt const* CTable, U32 symbolValue)
 {
-    const HUF_CElt* table = (const HUF_CElt*)symbolTable;
+    const HUF_CElt* ct = CTable + 1;
     assert(symbolValue <= HUF_SYMBOLVALUE_MAX);
-    return table[symbolValue].nbBits;
+    return (U32)HUF_getNbBits(ct[symbolValue]);
 }
 
 
@@ -364,22 +425,118 @@ static U32 HUF_setMaxHeight(nodeElt* huffNode, U32 lastNonNull, U32 maxNbBits)
 }
 
 typedef struct {
-    U32 base;
-    U32 curr;
+    U16 base;
+    U16 curr;
 } rankPos;
 
 typedef nodeElt huffNodeTable[HUF_CTABLE_WORKSPACE_SIZE_U32];
 
-#define RANK_POSITION_TABLE_SIZE 32
+/* Number of buckets available for HUF_sort() */
+#define RANK_POSITION_TABLE_SIZE 192
 
 typedef struct {
   huffNodeTable huffNodeTbl;
   rankPos rankPosition[RANK_POSITION_TABLE_SIZE];
 } HUF_buildCTable_wksp_tables;
 
+/* RANK_POSITION_DISTINCT_COUNT_CUTOFF == Cutoff point in HUF_sort() buckets for which we use log2 bucketing.
+ * Strategy is to use as many buckets as possible for representing distinct
+ * counts while using the remainder to represent all "large" counts.
+ *
+ * To satisfy this requirement for 192 buckets, we can do the following:
+ * Let buckets 0-166 represent distinct counts of [0, 166]
+ * Let buckets 166 to 192 represent all remaining counts up to RANK_POSITION_MAX_COUNT_LOG using log2 bucketing.
+ */
+#define RANK_POSITION_MAX_COUNT_LOG 32
+#define RANK_POSITION_LOG_BUCKETS_BEGIN (RANK_POSITION_TABLE_SIZE - 1) - RANK_POSITION_MAX_COUNT_LOG - 1 /* == 158 */
+#define RANK_POSITION_DISTINCT_COUNT_CUTOFF RANK_POSITION_LOG_BUCKETS_BEGIN + BIT_highbit32(RANK_POSITION_LOG_BUCKETS_BEGIN) /* == 166 */
+
+/* Return the appropriate bucket index for a given count. See definition of
+ * RANK_POSITION_DISTINCT_COUNT_CUTOFF for explanation of bucketing strategy.
+ */
+static U32 HUF_getIndex(U32 const count) {
+    return (count < RANK_POSITION_DISTINCT_COUNT_CUTOFF)
+        ? count
+        : BIT_highbit32(count) + RANK_POSITION_LOG_BUCKETS_BEGIN;
+}
+
+/* Helper swap function for HUF_quickSortPartition() */
+static void HUF_swapNodes(nodeElt* a, nodeElt* b) {
+	nodeElt tmp = *a;
+	*a = *b;
+	*b = tmp;
+}
+
+/* Returns 0 if the huffNode array is not sorted by descending count */
+MEM_STATIC int HUF_isSorted(nodeElt huffNode[], U32 const maxSymbolValue1) {
+    U32 i;
+    for (i = 1; i < maxSymbolValue1; ++i) {
+        if (huffNode[i].count > huffNode[i-1].count) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+/* Insertion sort by descending order */
+HINT_INLINE void HUF_insertionSort(nodeElt huffNode[], int const low, int const high) {
+    int i;
+    int const size = high-low+1;
+    huffNode += low;
+    for (i = 1; i < size; ++i) {
+        nodeElt const key = huffNode[i];
+        int j = i - 1;
+        while (j >= 0 && huffNode[j].count < key.count) {
+            huffNode[j + 1] = huffNode[j];
+            j--;
+        }
+        huffNode[j + 1] = key;
+    }
+}
+
+/* Pivot helper function for quicksort. */
+static int HUF_quickSortPartition(nodeElt arr[], int const low, int const high) {
+    /* Simply select rightmost element as pivot. "Better" selectors like
+     * median-of-three don't experimentally appear to have any benefit.
+     */
+    U32 const pivot = arr[high].count;
+    int i = low - 1;
+    int j = low;
+    for ( ; j < high; j++) {
+        if (arr[j].count > pivot) {
+            i++;
+            HUF_swapNodes(&arr[i], &arr[j]);
+        }
+    }
+    HUF_swapNodes(&arr[i + 1], &arr[high]);
+    return i + 1;
+}
+
+/* Classic quicksort by descending with partially iterative calls
+ * to reduce worst case callstack size.
+ */
+static void HUF_simpleQuickSort(nodeElt arr[], int low, int high) {
+    int const kInsertionSortThreshold = 8;
+    if (high - low < kInsertionSortThreshold) {
+        HUF_insertionSort(arr, low, high);
+        return;
+    }
+    while (low < high) {
+        int const idx = HUF_quickSortPartition(arr, low, high);
+        if (idx - low < high - idx) {
+            HUF_simpleQuickSort(arr, low, idx - 1);
+            low = idx + 1;
+        } else {
+            HUF_simpleQuickSort(arr, idx + 1, high);
+            high = idx - 1;
+        }
+    }
+}
+
 /*
  * HUF_sort():
  * Sorts the symbols [0, maxSymbolValue] by count[symbol] in decreasing order.
+ * This is a typical bucket sorting strategy that uses either quicksort or insertion sort to sort each bucket.
  *
  * @param[out] huffNode       Sorted symbols by decreasing count. Only members `.count` and `.byte` are filled.
  *                            Must have (maxSymbolValue + 1) entries.
@@ -387,44 +544,52 @@ typedef struct {
  * @param[in]  maxSymbolValue Maximum symbol value.
  * @param      rankPosition   This is a scratch workspace. Must have RANK_POSITION_TABLE_SIZE entries.
  */
-static void HUF_sort(nodeElt* huffNode, const unsigned* count, U32 maxSymbolValue, rankPos* rankPosition)
-{
-    int n;
-    int const maxSymbolValue1 = (int)maxSymbolValue + 1;
+static void HUF_sort(nodeElt huffNode[], const unsigned count[], U32 const maxSymbolValue, rankPos rankPosition[]) {
+    U32 n;
+    U32 const maxSymbolValue1 = maxSymbolValue+1;
 
     /* Compute base and set curr to base.
-     * For symbol s let lowerRank = BIT_highbit32(count[n]+1) and rank = lowerRank + 1.
-     * Then 2^lowerRank <= count[n]+1 <= 2^rank.
+     * For symbol s let lowerRank = HUF_getIndex(count[n]) and rank = lowerRank + 1.
+     * See HUF_getIndex to see bucketing strategy.
      * We attribute each symbol to lowerRank's base value, because we want to know where
      * each rank begins in the output, so for rank R we want to count ranks R+1 and above.
      */
     ZSTD_memset(rankPosition, 0, sizeof(*rankPosition) * RANK_POSITION_TABLE_SIZE);
     for (n = 0; n < maxSymbolValue1; ++n) {
-        U32 lowerRank = BIT_highbit32(count[n] + 1);
+        U32 lowerRank = HUF_getIndex(count[n]);
+        assert(lowerRank < RANK_POSITION_TABLE_SIZE - 1);
         rankPosition[lowerRank].base++;
     }
+
     assert(rankPosition[RANK_POSITION_TABLE_SIZE - 1].base == 0);
+    /* Set up the rankPosition table */
     for (n = RANK_POSITION_TABLE_SIZE - 1; n > 0; --n) {
         rankPosition[n-1].base += rankPosition[n].base;
         rankPosition[n-1].curr = rankPosition[n-1].base;
     }
-    /* Sort */
+
+    /* Insert each symbol into their appropriate bucket, setting up rankPosition table. */
     for (n = 0; n < maxSymbolValue1; ++n) {
         U32 const c = count[n];
-        U32 const r = BIT_highbit32(c+1) + 1;
-        U32 pos = rankPosition[r].curr++;
-        /* Insert into the correct position in the rank.
-         * We have at most 256 symbols, so this insertion should be fine.
-         */
-        while ((pos > rankPosition[r].base) && (c > huffNode[pos-1].count)) {
-            huffNode[pos] = huffNode[pos-1];
-            pos--;
-        }
+        U32 const r = HUF_getIndex(c) + 1;
+        U32 const pos = rankPosition[r].curr++;
+        assert(pos < maxSymbolValue1);
         huffNode[pos].count = c;
         huffNode[pos].byte  = (BYTE)n;
     }
-}
 
+    /* Sort each bucket. */
+    for (n = RANK_POSITION_DISTINCT_COUNT_CUTOFF; n < RANK_POSITION_TABLE_SIZE - 1; ++n) {
+        U32 const bucketSize = rankPosition[n].curr-rankPosition[n].base;
+        U32 const bucketStartIdx = rankPosition[n].base;
+        if (bucketSize > 1) {
+            assert(bucketStartIdx < maxSymbolValue1);
+            HUF_simpleQuickSort(huffNode + bucketStartIdx, 0, bucketSize-1);
+        }
+    }
+
+    assert(HUF_isSorted(huffNode, maxSymbolValue1));
+}
 
 /* HUF_buildCTable_wksp() :
  *  Same as HUF_buildCTable(), but using externally allocated scratch buffer.
@@ -487,6 +652,7 @@ static int HUF_buildTree(nodeElt* huffNode, U32 maxSymbolValue)
  */
 static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, int nonNullRank, U32 maxSymbolValue, U32 maxNbBits)
 {
+    HUF_CElt* const ct = CTable + 1;
     /* fill result into ctable (val, nbBits) */
     int n;
     U16 nbPerRank[HUF_TABLELOG_MAX+1] = {0};
@@ -502,20 +668,20 @@ static void HUF_buildCTableFromTree(HUF_CElt* CTable, nodeElt const* huffNode, i
             min >>= 1;
     }   }
     for (n=0; n<alphabetSize; n++)
-        CTable[huffNode[n].byte].nbBits = huffNode[n].nbBits;   /* push nbBits per symbol, symbol order */
+        HUF_setNbBits(ct + huffNode[n].byte, huffNode[n].nbBits);   /* push nbBits per symbol, symbol order */
     for (n=0; n<alphabetSize; n++)
-        CTable[n].val = valPerRank[CTable[n].nbBits]++;   /* assign value within rank, symbol order */
+        HUF_setValue(ct + n, valPerRank[HUF_getNbBits(ct[n])]++);   /* assign value within rank, symbol order */
+    CTable[0] = maxNbBits;
 }
 
-size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
+size_t HUF_buildCTable_wksp (HUF_CElt* CTable, const unsigned* count, U32 maxSymbolValue, U32 maxNbBits, void* workSpace, size_t wkspSize)
 {
-    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)workSpace;
+    HUF_buildCTable_wksp_tables* const wksp_tables = (HUF_buildCTable_wksp_tables*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(U32));
     nodeElt* const huffNode0 = wksp_tables->huffNodeTbl;
     nodeElt* const huffNode = huffNode0+1;
     int nonNullRank;
 
     /* safety checks */
-    if (((size_t)workSpace & 3) != 0) return ERROR(GENERIC);  /* must be aligned on 4-bytes boundaries */
     if (wkspSize < sizeof(HUF_buildCTable_wksp_tables))
       return ERROR(workSpace_tooSmall);
     if (maxNbBits == 0) maxNbBits = HUF_TABLELOG_DEFAULT;
@@ -533,99 +699,334 @@ size_t HUF_buildCTable_wksp (HUF_CElt* tree, const unsigned* count, U32 maxSymbo
     maxNbBits = HUF_setMaxHeight(huffNode, (U32)nonNullRank, maxNbBits);
     if (maxNbBits > HUF_TABLELOG_MAX) return ERROR(GENERIC);   /* check fit into table */
 
-    HUF_buildCTableFromTree(tree, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
+    HUF_buildCTableFromTree(CTable, huffNode, nonNullRank, maxSymbolValue, maxNbBits);
 
     return maxNbBits;
 }
 
 size_t HUF_estimateCompressedSize(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue)
 {
+    HUF_CElt const* ct = CTable + 1;
     size_t nbBits = 0;
     int s;
     for (s = 0; s <= (int)maxSymbolValue; ++s) {
-        nbBits += CTable[s].nbBits * count[s];
+        nbBits += HUF_getNbBits(ct[s]) * count[s];
     }
     return nbBits >> 3;
 }
 
 int HUF_validateCTable(const HUF_CElt* CTable, const unsigned* count, unsigned maxSymbolValue) {
+  HUF_CElt const* ct = CTable + 1;
   int bad = 0;
   int s;
   for (s = 0; s <= (int)maxSymbolValue; ++s) {
-    bad |= (count[s] != 0) & (CTable[s].nbBits == 0);
+    bad |= (count[s] != 0) & (HUF_getNbBits(ct[s]) == 0);
   }
   return !bad;
 }
 
 size_t HUF_compressBound(size_t size) { return HUF_COMPRESSBOUND(size); }
 
+/* HUF_CStream_t:
+ * Huffman uses its own BIT_CStream_t implementation.
+ * There are three major differences from BIT_CStream_t:
+ *   1. HUF_addBits() takes a HUF_CElt (size_t) which is
+ *      the pair (nbBits, value) in the format:
+ *      format:
+ *        - Bits [0, 4)            = nbBits
+ *        - Bits [4, 64 - nbBits)  = 0
+ *        - Bits [64 - nbBits, 64) = value
+ *   2. The bitContainer is built from the upper bits and
+ *      right shifted. E.g. to add a new value of N bits
+ *      you right shift the bitContainer by N, then or in
+ *      the new value into the N upper bits.
+ *   3. The bitstream has two bit containers. You can add
+ *      bits to the second container and merge them into
+ *      the first container.
+ */
+
+#define HUF_BITS_IN_CONTAINER (sizeof(size_t) * 8)
+
+typedef struct {
+    size_t bitContainer[2];
+    size_t bitPos[2];
+
+    BYTE* startPtr;
+    BYTE* ptr;
+    BYTE* endPtr;
+} HUF_CStream_t;
+
+/*! HUF_initCStream():
+ * Initializes the bitstream.
+ * @returns 0 or an error code.
+ */
+static size_t HUF_initCStream(HUF_CStream_t* bitC,
+                                  void* startPtr, size_t dstCapacity)
+{
+    ZSTD_memset(bitC, 0, sizeof(*bitC));
+    bitC->startPtr = (BYTE*)startPtr;
+    bitC->ptr = bitC->startPtr;
+    bitC->endPtr = bitC->startPtr + dstCapacity - sizeof(bitC->bitContainer[0]);
+    if (dstCapacity <= sizeof(bitC->bitContainer[0])) return ERROR(dstSize_tooSmall);
+    return 0;
+}
+
+/*! HUF_addBits():
+ * Adds the symbol stored in HUF_CElt elt to the bitstream.
+ *
+ * @param elt   The element we're adding. This is a (nbBits, value) pair.
+ *              See the HUF_CStream_t docs for the format.
+ * @param idx   Insert into the bitstream at this idx.
+ * @param kFast This is a template parameter. If the bitstream is guaranteed
+ *              to have at least 4 unused bits after this call it may be 1,
+ *              otherwise it must be 0. HUF_addBits() is faster when fast is set.
+ */
+FORCE_INLINE_TEMPLATE void HUF_addBits(HUF_CStream_t* bitC, HUF_CElt elt, int idx, int kFast)
+{
+    assert(idx <= 1);
+    assert(HUF_getNbBits(elt) <= HUF_TABLELOG_ABSOLUTEMAX);
+    /* This is efficient on x86-64 with BMI2 because shrx
+     * only reads the low 6 bits of the register. The compiler
+     * knows this and elides the mask. When fast is set,
+     * every operation can use the same value loaded from elt.
+     */
+    bitC->bitContainer[idx] >>= HUF_getNbBits(elt);
+    bitC->bitContainer[idx] |= kFast ? HUF_getValueFast(elt) : HUF_getValue(elt);
+    /* We only read the low 8 bits of bitC->bitPos[idx] so it
+     * doesn't matter that the high bits have noise from the value.
+     */
+    bitC->bitPos[idx] += HUF_getNbBitsFast(elt);
+    assert((bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
+    /* The last 4-bits of elt are dirty if fast is set,
+     * so we must not be overwriting bits that have already been
+     * inserted into the bit container.
+     */
+#if DEBUGLEVEL >= 1
+    {
+        size_t const nbBits = HUF_getNbBits(elt);
+        size_t const dirtyBits = nbBits == 0 ? 0 : BIT_highbit32((U32)nbBits) + 1;
+        (void)dirtyBits;
+        /* Middle bits are 0. */
+        assert(((elt >> dirtyBits) << (dirtyBits + nbBits)) == 0);
+        /* We didn't overwrite any bits in the bit container. */
+        assert(!kFast || (bitC->bitPos[idx] & 0xFF) <= HUF_BITS_IN_CONTAINER);
+        (void)dirtyBits;
+    }
+#endif
+}
+
+FORCE_INLINE_TEMPLATE void HUF_zeroIndex1(HUF_CStream_t* bitC)
+{
+    bitC->bitContainer[1] = 0;
+    bitC->bitPos[1] = 0;
+}
+
+/*! HUF_mergeIndex1() :
+ * Merges the bit container @ index 1 into the bit container @ index 0
+ * and zeros the bit container @ index 1.
+ */
+FORCE_INLINE_TEMPLATE void HUF_mergeIndex1(HUF_CStream_t* bitC)
+{
+    assert((bitC->bitPos[1] & 0xFF) < HUF_BITS_IN_CONTAINER);
+    bitC->bitContainer[0] >>= (bitC->bitPos[1] & 0xFF);
+    bitC->bitContainer[0] |= bitC->bitContainer[1];
+    bitC->bitPos[0] += bitC->bitPos[1];
+    assert((bitC->bitPos[0] & 0xFF) <= HUF_BITS_IN_CONTAINER);
+}
+
+/*! HUF_flushBits() :
+* Flushes the bits in the bit container @ index 0.
+*
+* @post bitPos will be < 8.
+* @param kFast If kFast is set then we must know a-priori that
+*              the bit container will not overflow.
+*/
+FORCE_INLINE_TEMPLATE void HUF_flushBits(HUF_CStream_t* bitC, int kFast)
+{
+    /* The upper bits of bitPos are noisy, so we must mask by 0xFF. */
+    size_t const nbBits = bitC->bitPos[0] & 0xFF;
+    size_t const nbBytes = nbBits >> 3;
+    /* The top nbBits bits of bitContainer are the ones we need. */
+    size_t const bitContainer = bitC->bitContainer[0] >> (HUF_BITS_IN_CONTAINER - nbBits);
+    /* Mask bitPos to account for the bytes we consumed. */
+    bitC->bitPos[0] &= 7;
+    assert(nbBits > 0);
+    assert(nbBits <= sizeof(bitC->bitContainer[0]) * 8);
+    assert(bitC->ptr <= bitC->endPtr);
+    MEM_writeLEST(bitC->ptr, bitContainer);
+    bitC->ptr += nbBytes;
+    assert(!kFast || bitC->ptr <= bitC->endPtr);
+    if (!kFast && bitC->ptr > bitC->endPtr) bitC->ptr = bitC->endPtr;
+    /* bitContainer doesn't need to be modified because the leftover
+     * bits are already the top bitPos bits. And we don't care about
+     * noise in the lower values.
+     */
+}
+
+/*! HUF_endMark()
+ * @returns The Huffman stream end mark: A 1-bit value = 1.
+ */
+static HUF_CElt HUF_endMark(void)
+{
+    HUF_CElt endMark;
+    HUF_setNbBits(&endMark, 1);
+    HUF_setValue(&endMark, 1);
+    return endMark;
+}
+
+/*! HUF_closeCStream() :
+ *  @return Size of CStream, in bytes,
+ *          or 0 if it could not fit into dstBuffer */
+static size_t HUF_closeCStream(HUF_CStream_t* bitC)
+{
+    HUF_addBits(bitC, HUF_endMark(), /* idx */ 0, /* kFast */ 0);
+    HUF_flushBits(bitC, /* kFast */ 0);
+    {
+        size_t const nbBits = bitC->bitPos[0] & 0xFF;
+        if (bitC->ptr >= bitC->endPtr) return 0; /* overflow detected */
+        return (bitC->ptr - bitC->startPtr) + (nbBits > 0);
+    }
+}
+
 FORCE_INLINE_TEMPLATE void
-HUF_encodeSymbol(BIT_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable)
+HUF_encodeSymbol(HUF_CStream_t* bitCPtr, U32 symbol, const HUF_CElt* CTable, int idx, int fast)
 {
-    BIT_addBitsFast(bitCPtr, CTable[symbol].val, CTable[symbol].nbBits);
+    HUF_addBits(bitCPtr, CTable[symbol], idx, fast);
 }
 
-#define HUF_FLUSHBITS(s)  BIT_flushBits(s)
+FORCE_INLINE_TEMPLATE void
+HUF_compress1X_usingCTable_internal_body_loop(HUF_CStream_t* bitC,
+                                   const BYTE* ip, size_t srcSize,
+                                   const HUF_CElt* ct,
+                                   int kUnroll, int kFastFlush, int kLastFast)
+{
+    /* Join to kUnroll */
+    int n = (int)srcSize;
+    int rem = n % kUnroll;
+    if (rem > 0) {
+        for (; rem > 0; --rem) {
+            HUF_encodeSymbol(bitC, ip[--n], ct, 0, /* fast */ 0);
+        }
+        HUF_flushBits(bitC, kFastFlush);
+    }
+    assert(n % kUnroll == 0);
+
+    /* Join to 2 * kUnroll */
+    if (n % (2 * kUnroll)) {
+        int u;
+        for (u = 1; u < kUnroll; ++u) {
+            HUF_encodeSymbol(bitC, ip[n - u], ct, 0, 1);
+        }
+        HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, 0, kLastFast);
+        HUF_flushBits(bitC, kFastFlush);
+        n -= kUnroll;
+    }
+    assert(n % (2 * kUnroll) == 0);
+
+    for (; n>0; n-= 2 * kUnroll) {
+        /* Encode kUnroll symbols into the bitstream @ index 0. */
+        int u;
+        for (u = 1; u < kUnroll; ++u) {
+            HUF_encodeSymbol(bitC, ip[n - u], ct, /* idx */ 0, /* fast */ 1);
+        }
+        HUF_encodeSymbol(bitC, ip[n - kUnroll], ct, /* idx */ 0, /* fast */ kLastFast);
+        HUF_flushBits(bitC, kFastFlush);
+        /* Encode kUnroll symbols into the bitstream @ index 1.
+         * This allows us to start filling the bit container
+         * without any data dependencies.
+         */
+        HUF_zeroIndex1(bitC);
+        for (u = 1; u < kUnroll; ++u) {
+            HUF_encodeSymbol(bitC, ip[n - kUnroll - u], ct, /* idx */ 1, /* fast */ 1);
+        }
+        HUF_encodeSymbol(bitC, ip[n - kUnroll - kUnroll], ct, /* idx */ 1, /* fast */ kLastFast);
+        /* Merge bitstream @ index 1 into the bitstream @ index 0 */
+        HUF_mergeIndex1(bitC);
+        HUF_flushBits(bitC, kFastFlush);
+    }
+    assert(n == 0);
+
+}
 
-#define HUF_FLUSHBITS_1(stream) \
-    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*2+7) HUF_FLUSHBITS(stream)
+/*
+ * Returns a tight upper bound on the output space needed by Huffman
+ * with 8 bytes buffer to handle over-writes. If the output is at least
+ * this large we don't need to do bounds checks during Huffman encoding.
+ */
+static size_t HUF_tightCompressBound(size_t srcSize, size_t tableLog)
+{
+    return ((srcSize * tableLog) >> 3) + 8;
+}
 
-#define HUF_FLUSHBITS_2(stream) \
-    if (sizeof((stream)->bitContainer)*8 < HUF_TABLELOG_MAX*4+7) HUF_FLUSHBITS(stream)
 
 FORCE_INLINE_TEMPLATE size_t
 HUF_compress1X_usingCTable_internal_body(void* dst, size_t dstSize,
                                    const void* src, size_t srcSize,
                                    const HUF_CElt* CTable)
 {
+    U32 const tableLog = (U32)CTable[0];
+    HUF_CElt const* ct = CTable + 1;
     const BYTE* ip = (const BYTE*) src;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
-    size_t n;
-    BIT_CStream_t bitC;
+    HUF_CStream_t bitC;
 
     /* init */
     if (dstSize < 8) return 0;   /* not enough space to compress */
-    { size_t const initErr = BIT_initCStream(&bitC, op, (size_t)(oend-op));
+    { size_t const initErr = HUF_initCStream(&bitC, op, (size_t)(oend-op));
       if (HUF_isError(initErr)) return 0; }
 
-    n = srcSize & ~3;  /* join to mod 4 */
-    switch (srcSize & 3)
-    {
-        case 3:
-            HUF_encodeSymbol(&bitC, ip[n+ 2], CTable);
-            HUF_FLUSHBITS_2(&bitC);
-            ZSTD_FALLTHROUGH;
-        case 2:
-            HUF_encodeSymbol(&bitC, ip[n+ 1], CTable);
-            HUF_FLUSHBITS_1(&bitC);
-            ZSTD_FALLTHROUGH;
-        case 1:
-            HUF_encodeSymbol(&bitC, ip[n+ 0], CTable);
-            HUF_FLUSHBITS(&bitC);
-            ZSTD_FALLTHROUGH;
-        case 0: ZSTD_FALLTHROUGH;
-        default: break;
-    }
-
-    for (; n>0; n-=4) {  /* note : n&3==0 at this stage */
-        HUF_encodeSymbol(&bitC, ip[n- 1], CTable);
-        HUF_FLUSHBITS_1(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 2], CTable);
-        HUF_FLUSHBITS_2(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 3], CTable);
-        HUF_FLUSHBITS_1(&bitC);
-        HUF_encodeSymbol(&bitC, ip[n- 4], CTable);
-        HUF_FLUSHBITS(&bitC);
-    }
-
-    return BIT_closeCStream(&bitC);
+    if (dstSize < HUF_tightCompressBound(srcSize, (size_t)tableLog) || tableLog > 11)
+        HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ MEM_32bits() ? 2 : 4, /* kFast */ 0, /* kLastFast */ 0);
+    else {
+        if (MEM_32bits()) {
+            switch (tableLog) {
+            case 11:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 0);
+                break;
+            case 10: ZSTD_FALLTHROUGH;
+            case 9: ZSTD_FALLTHROUGH;
+            case 8:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 2, /* kFastFlush */ 1, /* kLastFast */ 1);
+                break;
+            case 7: ZSTD_FALLTHROUGH;
+            default:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 3, /* kFastFlush */ 1, /* kLastFast */ 1);
+                break;
+            }
+        } else {
+            switch (tableLog) {
+            case 11:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 0);
+                break;
+            case 10:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 5, /* kFastFlush */ 1, /* kLastFast */ 1);
+                break;
+            case 9:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 6, /* kFastFlush */ 1, /* kLastFast */ 0);
+                break;
+            case 8:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 7, /* kFastFlush */ 1, /* kLastFast */ 0);
+                break;
+            case 7:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 8, /* kFastFlush */ 1, /* kLastFast */ 0);
+                break;
+            case 6: ZSTD_FALLTHROUGH;
+            default:
+                HUF_compress1X_usingCTable_internal_body_loop(&bitC, ip, srcSize, ct, /* kUnroll */ 9, /* kFastFlush */ 1, /* kLastFast */ 1);
+                break;
+            }
+        }
+    }
+    assert(bitC.ptr <= bitC.endPtr);
+
+    return HUF_closeCStream(&bitC);
 }
 
 #if DYNAMIC_BMI2
 
-static TARGET_ATTRIBUTE("bmi2") size_t
+static BMI2_TARGET_ATTRIBUTE size_t
 HUF_compress1X_usingCTable_internal_bmi2(void* dst, size_t dstSize,
                                    const void* src, size_t srcSize,
                                    const HUF_CElt* CTable)
@@ -667,9 +1068,13 @@ HUF_compress1X_usingCTable_internal(void* dst, size_t dstSize,
 
 size_t HUF_compress1X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
 {
-    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+    return HUF_compress1X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
 }
 
+size_t HUF_compress1X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
+{
+    return HUF_compress1X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
+}
 
 static size_t
 HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
@@ -689,8 +1094,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
 
     assert(op <= oend);
     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize==0) return 0;
-        assert(cSize <= 65535);
+        if (cSize == 0 || cSize > 65535) return 0;
         MEM_writeLE16(ostart, (U16)cSize);
         op += cSize;
     }
@@ -698,8 +1102,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
     ip += segmentSize;
     assert(op <= oend);
     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize==0) return 0;
-        assert(cSize <= 65535);
+        if (cSize == 0 || cSize > 65535) return 0;
         MEM_writeLE16(ostart+2, (U16)cSize);
         op += cSize;
     }
@@ -707,8 +1110,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
     ip += segmentSize;
     assert(op <= oend);
     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, segmentSize, CTable, bmi2) );
-        if (cSize==0) return 0;
-        assert(cSize <= 65535);
+        if (cSize == 0 || cSize > 65535) return 0;
         MEM_writeLE16(ostart+4, (U16)cSize);
         op += cSize;
     }
@@ -717,7 +1119,7 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
     assert(op <= oend);
     assert(ip <= iend);
     {   CHECK_V_F(cSize, HUF_compress1X_usingCTable_internal(op, (size_t)(oend-op), ip, (size_t)(iend-ip), CTable, bmi2) );
-        if (cSize==0) return 0;
+        if (cSize == 0 || cSize > 65535) return 0;
         op += cSize;
     }
 
@@ -726,7 +1128,12 @@ HUF_compress4X_usingCTable_internal(void* dst, size_t dstSize,
 
 size_t HUF_compress4X_usingCTable(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable)
 {
-    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+    return HUF_compress4X_usingCTable_bmi2(dst, dstSize, src, srcSize, CTable, /* bmi2 */ 0);
+}
+
+size_t HUF_compress4X_usingCTable_bmi2(void* dst, size_t dstSize, const void* src, size_t srcSize, const HUF_CElt* CTable, int bmi2)
+{
+    return HUF_compress4X_usingCTable_internal(dst, dstSize, src, srcSize, CTable, bmi2);
 }
 
 typedef enum { HUF_singleStream, HUF_fourStreams } HUF_nbStreams_e;
@@ -750,35 +1157,38 @@ static size_t HUF_compressCTable_internal(
 
 typedef struct {
     unsigned count[HUF_SYMBOLVALUE_MAX + 1];
-    HUF_CElt CTable[HUF_SYMBOLVALUE_MAX + 1];
+    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(HUF_SYMBOLVALUE_MAX)];
     union {
         HUF_buildCTable_wksp_tables buildCTable_wksp;
         HUF_WriteCTableWksp writeCTable_wksp;
+        U32 hist_wksp[HIST_WKSP_SIZE_U32];
     } wksps;
 } HUF_compress_tables_t;
 
+#define SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE 4096
+#define SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO 10  /* Must be >= 2 */
+
 /* HUF_compress_internal() :
  * `workSpace_align4` must be aligned on 4-bytes boundaries,
- * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U32 unsigned */
+ * and occupies the same space as a table of HUF_WORKSPACE_SIZE_U64 unsigned */
 static size_t
 HUF_compress_internal (void* dst, size_t dstSize,
                  const void* src, size_t srcSize,
                        unsigned maxSymbolValue, unsigned huffLog,
                        HUF_nbStreams_e nbStreams,
-                       void* workSpace_align4, size_t wkspSize,
+                       void* workSpace, size_t wkspSize,
                        HUF_CElt* oldHufTable, HUF_repeat* repeat, int preferRepeat,
-                 const int bmi2)
+                 const int bmi2, unsigned suspectUncompressible)
 {
-    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)workSpace_align4;
+    HUF_compress_tables_t* const table = (HUF_compress_tables_t*)HUF_alignUpWorkspace(workSpace, &wkspSize, ZSTD_ALIGNOF(size_t));
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstSize;
     BYTE* op = ostart;
 
-    HUF_STATIC_ASSERT(sizeof(*table) <= HUF_WORKSPACE_SIZE);
-    assert(((size_t)workSpace_align4 & 3) == 0);   /* must be aligned on 4-bytes boundaries */
+    HUF_STATIC_ASSERT(sizeof(*table) + HUF_WORKSPACE_MAX_ALIGNMENT <= HUF_WORKSPACE_SIZE);
 
     /* checks & inits */
-    if (wkspSize < HUF_WORKSPACE_SIZE) return ERROR(workSpace_tooSmall);
+    if (wkspSize < sizeof(*table)) return ERROR(workSpace_tooSmall);
     if (!srcSize) return 0;  /* Uncompressed */
     if (!dstSize) return 0;  /* cannot fit anything within dst budget */
     if (srcSize > HUF_BLOCKSIZE_MAX) return ERROR(srcSize_wrong);   /* current block size limit */
@@ -794,8 +1204,23 @@ HUF_compress_internal (void* dst, size_t dstSize,
                                            nbStreams, oldHufTable, bmi2);
     }
 
+    /* If uncompressible data is suspected, do a smaller sampling first */
+    DEBUG_STATIC_ASSERT(SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO >= 2);
+    if (suspectUncompressible && srcSize >= (SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE * SUSPECT_INCOMPRESSIBLE_SAMPLE_RATIO)) {
+        size_t largestTotal = 0;
+        {   unsigned maxSymbolValueBegin = maxSymbolValue;
+            CHECK_V_F(largestBegin, HIST_count_simple (table->count, &maxSymbolValueBegin, (const BYTE*)src, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
+            largestTotal += largestBegin;
+        }
+        {   unsigned maxSymbolValueEnd = maxSymbolValue;
+            CHECK_V_F(largestEnd, HIST_count_simple (table->count, &maxSymbolValueEnd, (const BYTE*)src + srcSize - SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE, SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) );
+            largestTotal += largestEnd;
+        }
+        if (largestTotal <= ((2 * SUSPECT_INCOMPRESSIBLE_SAMPLE_SIZE) >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
+    }
+
     /* Scan input and build symbol stats */
-    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, workSpace_align4, wkspSize) );
+    {   CHECK_V_F(largest, HIST_count_wksp (table->count, &maxSymbolValue, (const BYTE*)src, srcSize, table->wksps.hist_wksp, sizeof(table->wksps.hist_wksp)) );
         if (largest == srcSize) { *ostart = ((const BYTE*)src)[0]; return 1; }   /* single symbol, rle */
         if (largest <= (srcSize >> 7)+4) return 0;   /* heuristic : probably not compressible enough */
     }
@@ -820,9 +1245,12 @@ HUF_compress_internal (void* dst, size_t dstSize,
                                             &table->wksps.buildCTable_wksp, sizeof(table->wksps.buildCTable_wksp));
         CHECK_F(maxBits);
         huffLog = (U32)maxBits;
-        /* Zero unused symbols in CTable, so we can check it for validity */
-        ZSTD_memset(table->CTable + (maxSymbolValue + 1), 0,
-               sizeof(table->CTable) - ((maxSymbolValue + 1) * sizeof(HUF_CElt)));
+    }
+    /* Zero unused symbols in CTable, so we can check it for validity */
+    {
+        size_t const ctableSize = HUF_CTABLE_SIZE_ST(maxSymbolValue);
+        size_t const unusedSize = sizeof(table->CTable) - ctableSize * sizeof(HUF_CElt);
+        ZSTD_memset(table->CTable + ctableSize, 0, unusedSize);
     }
 
     /* Write table description header */
@@ -859,19 +1287,20 @@ size_t HUF_compress1X_wksp (void* dst, size_t dstSize,
     return HUF_compress_internal(dst, dstSize, src, srcSize,
                                  maxSymbolValue, huffLog, HUF_singleStream,
                                  workSpace, wkspSize,
-                                 NULL, NULL, 0, 0 /*bmi2*/);
+                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
 }
 
 size_t HUF_compress1X_repeat (void* dst, size_t dstSize,
                       const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
                       void* workSpace, size_t wkspSize,
-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat,
+                      int bmi2, unsigned suspectUncompressible)
 {
     return HUF_compress_internal(dst, dstSize, src, srcSize,
                                  maxSymbolValue, huffLog, HUF_singleStream,
                                  workSpace, wkspSize, hufTable,
-                                 repeat, preferRepeat, bmi2);
+                                 repeat, preferRepeat, bmi2, suspectUncompressible);
 }
 
 /* HUF_compress4X_repeat():
@@ -885,21 +1314,22 @@ size_t HUF_compress4X_wksp (void* dst, size_t dstSize,
     return HUF_compress_internal(dst, dstSize, src, srcSize,
                                  maxSymbolValue, huffLog, HUF_fourStreams,
                                  workSpace, wkspSize,
-                                 NULL, NULL, 0, 0 /*bmi2*/);
+                                 NULL, NULL, 0, 0 /*bmi2*/, 0);
 }
 
 /* HUF_compress4X_repeat():
  * compress input using 4 streams.
+ * consider skipping quickly
  * re-use an existing huffman compression table */
 size_t HUF_compress4X_repeat (void* dst, size_t dstSize,
                       const void* src, size_t srcSize,
                       unsigned maxSymbolValue, unsigned huffLog,
                       void* workSpace, size_t wkspSize,
-                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2)
+                      HUF_CElt* hufTable, HUF_repeat* repeat, int preferRepeat, int bmi2, unsigned suspectUncompressible)
 {
     return HUF_compress_internal(dst, dstSize, src, srcSize,
                                  maxSymbolValue, huffLog, HUF_fourStreams,
                                  workSpace, wkspSize,
-                                 hufTable, repeat, preferRepeat, bmi2);
+                                 hufTable, repeat, preferRepeat, bmi2, suspectUncompressible);
 }
 
diff --git a/lib/zstd/compress/zstd_compress.c b/lib/zstd/compress/zstd_compress.c
index 73fff4c60149d..f620cafca633b 100644
--- a/lib/zstd/compress/zstd_compress.c
+++ b/lib/zstd/compress/zstd_compress.c
@@ -12,7 +12,6 @@
 *  Dependencies
 ***************************************/
 #include "../common/zstd_deps.h"  /* INT_MAX, ZSTD_memset, ZSTD_memcpy */
-#include "../common/cpu.h"
 #include "../common/mem.h"
 #include "hist.h"           /* HIST_countFast_wksp */
 #define FSE_STATIC_LINKING_ONLY   /* FSE_encodeSymbol */
@@ -39,6 +38,18 @@
  * Note that functions with explicit context such as ZSTD_compressCCtx() are unaffected.
  */
 
+/*!
+ * ZSTD_HASHLOG3_MAX :
+ * Maximum size of the hash table dedicated to find 3-bytes matches,
+ * in log format, aka 17 => 1 << 17 == 128Ki positions.
+ * This structure is only used in zstd_opt.
+ * Since allocation is centralized for all strategies, it has to be known here.
+ * The actual (selected) size of the hash table is then stored in ZSTD_matchState_t.hashLog3,
+ * so that zstd_opt.c doesn't need to know about this constant.
+ */
+#ifndef ZSTD_HASHLOG3_MAX
+#  define ZSTD_HASHLOG3_MAX 17
+#endif
 
 /*-*************************************
 *  Helper functions
@@ -69,6 +80,10 @@ struct ZSTD_CDict_s {
     ZSTD_customMem customMem;
     U32 dictID;
     int compressionLevel; /* 0 indicates that advanced API was used to select CDict params */
+    ZSTD_paramSwitch_e useRowMatchFinder; /* Indicates whether the CDict was created with params that would use
+                                           * row-based matchfinder. Unless the cdict is reloaded, we will use
+                                           * the same greedy/lazy matchfinder at compression time.
+                                           */
 };  /* typedef'd to ZSTD_CDict within "zstd.h" */
 
 ZSTD_CCtx* ZSTD_createCCtx(void)
@@ -81,7 +96,7 @@ static void ZSTD_initCCtx(ZSTD_CCtx* cctx, ZSTD_customMem memManager)
     assert(cctx != NULL);
     ZSTD_memset(cctx, 0, sizeof(*cctx));
     cctx->customMem = memManager;
-    cctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+    cctx->bmi2 = ZSTD_cpuSupportsBmi2();
     {   size_t const err = ZSTD_CCtx_reset(cctx, ZSTD_reset_parameters);
         assert(!ZSTD_isError(err));
         (void)err;
@@ -192,12 +207,64 @@ size_t ZSTD_sizeof_CStream(const ZSTD_CStream* zcs)
 /* private API call, for dictBuilder only */
 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx) { return &(ctx->seqStore); }
 
+/* Returns true if the strategy supports using a row based matchfinder */
+static int ZSTD_rowMatchFinderSupported(const ZSTD_strategy strategy) {
+    return (strategy >= ZSTD_greedy && strategy <= ZSTD_lazy2);
+}
+
+/* Returns true if the strategy and useRowMatchFinder mode indicate that we will use the row based matchfinder
+ * for this compression.
+ */
+static int ZSTD_rowMatchFinderUsed(const ZSTD_strategy strategy, const ZSTD_paramSwitch_e mode) {
+    assert(mode != ZSTD_ps_auto);
+    return ZSTD_rowMatchFinderSupported(strategy) && (mode == ZSTD_ps_enable);
+}
+
+/* Returns row matchfinder usage given an initial mode and cParams */
+static ZSTD_paramSwitch_e ZSTD_resolveRowMatchFinderMode(ZSTD_paramSwitch_e mode,
+                                                         const ZSTD_compressionParameters* const cParams) {
+#if defined(ZSTD_ARCH_X86_SSE2) || defined(ZSTD_ARCH_ARM_NEON)
+    int const kHasSIMD128 = 1;
+#else
+    int const kHasSIMD128 = 0;
+#endif
+    if (mode != ZSTD_ps_auto) return mode; /* if requested enabled, but no SIMD, we still will use row matchfinder */
+    mode = ZSTD_ps_disable;
+    if (!ZSTD_rowMatchFinderSupported(cParams->strategy)) return mode;
+    if (kHasSIMD128) {
+        if (cParams->windowLog > 14) mode = ZSTD_ps_enable;
+    } else {
+        if (cParams->windowLog > 17) mode = ZSTD_ps_enable;
+    }
+    return mode;
+}
+
+/* Returns block splitter usage (generally speaking, when using slower/stronger compression modes) */
+static ZSTD_paramSwitch_e ZSTD_resolveBlockSplitterMode(ZSTD_paramSwitch_e mode,
+                                                        const ZSTD_compressionParameters* const cParams) {
+    if (mode != ZSTD_ps_auto) return mode;
+    return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 17) ? ZSTD_ps_enable : ZSTD_ps_disable;
+}
+
+/* Returns 1 if the arguments indicate that we should allocate a chainTable, 0 otherwise */
+static int ZSTD_allocateChainTable(const ZSTD_strategy strategy,
+                                   const ZSTD_paramSwitch_e useRowMatchFinder,
+                                   const U32 forDDSDict) {
+    assert(useRowMatchFinder != ZSTD_ps_auto);
+    /* We always should allocate a chaintable if we are allocating a matchstate for a DDS dictionary matchstate.
+     * We do not allocate a chaintable if we are using ZSTD_fast, or are using the row-based matchfinder.
+     */
+    return forDDSDict || ((strategy != ZSTD_fast) && !ZSTD_rowMatchFinderUsed(strategy, useRowMatchFinder));
+}
+
 /* Returns 1 if compression parameters are such that we should
  * enable long distance matching (wlog >= 27, strategy >= btopt).
  * Returns 0 otherwise.
  */
-static U32 ZSTD_CParams_shouldEnableLdm(const ZSTD_compressionParameters* const cParams) {
-    return cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27;
+static ZSTD_paramSwitch_e ZSTD_resolveEnableLdm(ZSTD_paramSwitch_e mode,
+                                 const ZSTD_compressionParameters* const cParams) {
+    if (mode != ZSTD_ps_auto) return mode;
+    return (cParams->strategy >= ZSTD_btopt && cParams->windowLog >= 27) ? ZSTD_ps_enable : ZSTD_ps_disable;
 }
 
 static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
@@ -208,15 +275,15 @@ static ZSTD_CCtx_params ZSTD_makeCCtxParamsFromCParams(
     ZSTD_CCtxParams_init(&cctxParams, ZSTD_CLEVEL_DEFAULT);
     cctxParams.cParams = cParams;
 
-    if (ZSTD_CParams_shouldEnableLdm(&cParams)) {
-        DEBUGLOG(4, "ZSTD_makeCCtxParamsFromCParams(): Including LDM into cctx params");
-        cctxParams.ldmParams.enableLdm = 1;
-        /* LDM is enabled by default for optimal parser and window size >= 128MB */
+    /* Adjust advanced params according to cParams */
+    cctxParams.ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams.ldmParams.enableLdm, &cParams);
+    if (cctxParams.ldmParams.enableLdm == ZSTD_ps_enable) {
         ZSTD_ldm_adjustParameters(&cctxParams.ldmParams, &cParams);
         assert(cctxParams.ldmParams.hashLog >= cctxParams.ldmParams.bucketSizeLog);
         assert(cctxParams.ldmParams.hashRateLog < 32);
     }
-
+    cctxParams.useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams.useBlockSplitter, &cParams);
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
     assert(!ZSTD_checkCParams(cParams));
     return cctxParams;
 }
@@ -275,6 +342,11 @@ static void ZSTD_CCtxParams_init_internal(ZSTD_CCtx_params* cctxParams, ZSTD_par
      * But, set it for tracing anyway.
      */
     cctxParams->compressionLevel = compressionLevel;
+    cctxParams->useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams->useRowMatchFinder, &params->cParams);
+    cctxParams->useBlockSplitter = ZSTD_resolveBlockSplitterMode(cctxParams->useBlockSplitter, &params->cParams);
+    cctxParams->ldmParams.enableLdm = ZSTD_resolveEnableLdm(cctxParams->ldmParams.enableLdm, &params->cParams);
+    DEBUGLOG(4, "ZSTD_CCtxParams_init_internal: useRowMatchFinder=%d, useBlockSplitter=%d ldm=%d",
+                cctxParams->useRowMatchFinder, cctxParams->useBlockSplitter, cctxParams->ldmParams.enableLdm);
 }
 
 size_t ZSTD_CCtxParams_init_advanced(ZSTD_CCtx_params* cctxParams, ZSTD_parameters params)
@@ -431,9 +503,9 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         return bounds;
 
     case ZSTD_c_literalCompressionMode:
-        ZSTD_STATIC_ASSERT(ZSTD_lcm_auto < ZSTD_lcm_huffman && ZSTD_lcm_huffman < ZSTD_lcm_uncompressed);
-        bounds.lowerBound = ZSTD_lcm_auto;
-        bounds.upperBound = ZSTD_lcm_uncompressed;
+        ZSTD_STATIC_ASSERT(ZSTD_ps_auto < ZSTD_ps_enable && ZSTD_ps_enable < ZSTD_ps_disable);
+        bounds.lowerBound = (int)ZSTD_ps_auto;
+        bounds.upperBound = (int)ZSTD_ps_disable;
         return bounds;
 
     case ZSTD_c_targetCBlockSize:
@@ -462,6 +534,21 @@ ZSTD_bounds ZSTD_cParam_getBounds(ZSTD_cParameter param)
         bounds.upperBound = 1;
         return bounds;
 
+    case ZSTD_c_useBlockSplitter:
+        bounds.lowerBound = (int)ZSTD_ps_auto;
+        bounds.upperBound = (int)ZSTD_ps_disable;
+        return bounds;
+
+    case ZSTD_c_useRowMatchFinder:
+        bounds.lowerBound = (int)ZSTD_ps_auto;
+        bounds.upperBound = (int)ZSTD_ps_disable;
+        return bounds;
+
+    case ZSTD_c_deterministicRefPrefix:
+        bounds.lowerBound = 0;
+        bounds.upperBound = 1;
+        return bounds;
+
     default:
         bounds.error = ERROR(parameter_unsupported);
         return bounds;
@@ -523,6 +610,9 @@ static int ZSTD_isUpdateAuthorized(ZSTD_cParameter param)
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_useBlockSplitter:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
     default:
         return 0;
     }
@@ -575,6 +665,9 @@ size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, int value)
     case ZSTD_c_stableOutBuffer:
     case ZSTD_c_blockDelimiters:
     case ZSTD_c_validateSequences:
+    case ZSTD_c_useBlockSplitter:
+    case ZSTD_c_useRowMatchFinder:
+    case ZSTD_c_deterministicRefPrefix:
         break;
 
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
@@ -672,7 +765,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
     }
 
     case ZSTD_c_literalCompressionMode : {
-        const ZSTD_literalCompressionMode_e lcm = (ZSTD_literalCompressionMode_e)value;
+        const ZSTD_paramSwitch_e lcm = (ZSTD_paramSwitch_e)value;
         BOUNDCHECK(ZSTD_c_literalCompressionMode, lcm);
         CCtxParams->literalCompressionMode = lcm;
         return CCtxParams->literalCompressionMode;
@@ -699,7 +792,7 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         return CCtxParams->enableDedicatedDictSearch;
 
     case ZSTD_c_enableLongDistanceMatching :
-        CCtxParams->ldmParams.enableLdm = (value!=0);
+        CCtxParams->ldmParams.enableLdm = (ZSTD_paramSwitch_e)value;
         return CCtxParams->ldmParams.enableLdm;
 
     case ZSTD_c_ldmHashLog :
@@ -758,6 +851,21 @@ size_t ZSTD_CCtxParams_setParameter(ZSTD_CCtx_params* CCtxParams,
         CCtxParams->validateSequences = value;
         return CCtxParams->validateSequences;
 
+    case ZSTD_c_useBlockSplitter:
+        BOUNDCHECK(ZSTD_c_useBlockSplitter, value);
+        CCtxParams->useBlockSplitter = (ZSTD_paramSwitch_e)value;
+        return CCtxParams->useBlockSplitter;
+
+    case ZSTD_c_useRowMatchFinder:
+        BOUNDCHECK(ZSTD_c_useRowMatchFinder, value);
+        CCtxParams->useRowMatchFinder = (ZSTD_paramSwitch_e)value;
+        return CCtxParams->useRowMatchFinder;
+
+    case ZSTD_c_deterministicRefPrefix:
+        BOUNDCHECK(ZSTD_c_deterministicRefPrefix, value);
+        CCtxParams->deterministicRefPrefix = !!value;
+        return CCtxParams->deterministicRefPrefix;
+
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
 }
@@ -863,6 +971,15 @@ size_t ZSTD_CCtxParams_getParameter(
     case ZSTD_c_validateSequences :
         *value = (int)CCtxParams->validateSequences;
         break;
+    case ZSTD_c_useBlockSplitter :
+        *value = (int)CCtxParams->useBlockSplitter;
+        break;
+    case ZSTD_c_useRowMatchFinder :
+        *value = (int)CCtxParams->useRowMatchFinder;
+        break;
+    case ZSTD_c_deterministicRefPrefix:
+        *value = (int)CCtxParams->deterministicRefPrefix;
+        break;
     default: RETURN_ERROR(parameter_unsupported, "unknown parameter");
     }
     return 0;
@@ -889,7 +1006,7 @@ size_t ZSTD_CCtx_setParametersUsingCCtxParams(
     return 0;
 }
 
-ZSTDLIB_API size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
+size_t ZSTD_CCtx_setPledgedSrcSize(ZSTD_CCtx* cctx, unsigned long long pledgedSrcSize)
 {
     DEBUGLOG(4, "ZSTD_CCtx_setPledgedSrcSize to %u bytes", (U32)pledgedSrcSize);
     RETURN_ERROR_IF(cctx->streamStage != zcss_init, stage_wrong,
@@ -969,14 +1086,14 @@ size_t ZSTD_CCtx_loadDictionary_advanced(
     return 0;
 }
 
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary_byReference(
+size_t ZSTD_CCtx_loadDictionary_byReference(
       ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
 {
     return ZSTD_CCtx_loadDictionary_advanced(
             cctx, dict, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto);
 }
 
-ZSTDLIB_API size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
+size_t ZSTD_CCtx_loadDictionary(ZSTD_CCtx* cctx, const void* dict, size_t dictSize)
 {
     return ZSTD_CCtx_loadDictionary_advanced(
             cctx, dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto);
@@ -1146,7 +1263,7 @@ ZSTD_adjustCParams_internal(ZSTD_compressionParameters cPar,
         break;
     case ZSTD_cpm_createCDict:
         /* Assume a small source size when creating a dictionary
-         * with an unkown source size.
+         * with an unknown source size.
          */
         if (dictSize && srcSize == ZSTD_CONTENTSIZE_UNKNOWN)
             srcSize = minSrcSize;
@@ -1220,7 +1337,7 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
       srcSizeHint = CCtxParams->srcSizeHint;
     }
     cParams = ZSTD_getCParams_internal(CCtxParams->compressionLevel, srcSizeHint, dictSize, mode);
-    if (CCtxParams->ldmParams.enableLdm) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
+    if (CCtxParams->ldmParams.enableLdm == ZSTD_ps_enable) cParams.windowLog = ZSTD_LDM_DEFAULT_WINDOW_LOG;
     ZSTD_overrideCParams(&cParams, &CCtxParams->cParams);
     assert(!ZSTD_checkCParams(cParams));
     /* srcSizeHint == 0 means 0 */
@@ -1229,9 +1346,14 @@ ZSTD_compressionParameters ZSTD_getCParamsFromCCtxParams(
 
 static size_t
 ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
+                       const ZSTD_paramSwitch_e useRowMatchFinder,
+                       const U32 enableDedicatedDictSearch,
                        const U32 forCCtx)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* chain table size should be 0 for fast or row-hash strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder, enableDedicatedDictSearch && !forCCtx)
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = (forCCtx && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
@@ -1241,43 +1363,53 @@ ZSTD_sizeof_matchState(const ZSTD_compressionParameters* const cParams,
                             + hSize * sizeof(U32)
                             + h3Size * sizeof(U32);
     size_t const optPotentialSpace =
-        ZSTD_cwksp_alloc_size((MaxML+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxLL+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((MaxOff+1) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((1<<Litbits) * sizeof(U32))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
-      + ZSTD_cwksp_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+        ZSTD_cwksp_aligned_alloc_size((MaxML+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxLL+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((MaxOff+1) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((1<<Litbits) * sizeof(U32))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_match_t))
+      + ZSTD_cwksp_aligned_alloc_size((ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
+    size_t const lazyAdditionalSpace = ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)
+                                            ? ZSTD_cwksp_aligned_alloc_size(hSize*sizeof(U16))
+                                            : 0;
     size_t const optSpace = (forCCtx && (cParams->strategy >= ZSTD_btopt))
                                 ? optPotentialSpace
                                 : 0;
+    size_t const slackSpace = ZSTD_cwksp_slack_space_required();
+
+    /* tables are guaranteed to be sized in multiples of 64 bytes (or 16 uint32_t) */
+    ZSTD_STATIC_ASSERT(ZSTD_HASHLOG_MIN >= 4 && ZSTD_WINDOWLOG_MIN >= 4 && ZSTD_CHAINLOG_MIN >= 4);
+    assert(useRowMatchFinder != ZSTD_ps_auto);
+
     DEBUGLOG(4, "chainSize: %u - hSize: %u - h3Size: %u",
                 (U32)chainSize, (U32)hSize, (U32)h3Size);
-    return tableSpace + optSpace;
+    return tableSpace + optSpace + slackSpace + lazyAdditionalSpace;
 }
 
 static size_t ZSTD_estimateCCtxSize_usingCCtxParams_internal(
         const ZSTD_compressionParameters* cParams,
         const ldmParams_t* ldmParams,
         const int isStatic,
+        const ZSTD_paramSwitch_e useRowMatchFinder,
         const size_t buffInSize,
         const size_t buffOutSize,
         const U64 pledgedSrcSize)
 {
-    size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << cParams->windowLog), pledgedSrcSize));
+    size_t const windowSize = (size_t) BOUNDED(1ULL, 1ULL << cParams->windowLog, pledgedSrcSize);
     size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
     U32    const divider = (cParams->minMatch==3) ? 3 : 4;
     size_t const maxNbSeq = blockSize / divider;
     size_t const tokenSpace = ZSTD_cwksp_alloc_size(WILDCOPY_OVERLENGTH + blockSize)
-                            + ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(seqDef))
+                            + ZSTD_cwksp_aligned_alloc_size(maxNbSeq * sizeof(seqDef))
                             + 3 * ZSTD_cwksp_alloc_size(maxNbSeq * sizeof(BYTE));
     size_t const entropySpace = ZSTD_cwksp_alloc_size(ENTROPY_WORKSPACE_SIZE);
     size_t const blockStateSpace = 2 * ZSTD_cwksp_alloc_size(sizeof(ZSTD_compressedBlockState_t));
-    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, /* forCCtx */ 1);
+    size_t const matchStateSize = ZSTD_sizeof_matchState(cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 0, /* forCCtx */ 1);
 
     size_t const ldmSpace = ZSTD_ldm_getTableSize(*ldmParams);
     size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(*ldmParams, blockSize);
-    size_t const ldmSeqSpace = ldmParams->enableLdm ?
-        ZSTD_cwksp_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
+    size_t const ldmSeqSpace = ldmParams->enableLdm == ZSTD_ps_enable ?
+        ZSTD_cwksp_aligned_alloc_size(maxNbLdmSeq * sizeof(rawSeq)) : 0;
 
 
     size_t const bufferSpace = ZSTD_cwksp_alloc_size(buffInSize)
@@ -1303,19 +1435,32 @@ size_t ZSTD_estimateCCtxSize_usingCCtxParams(const ZSTD_CCtx_params* params)
 {
     ZSTD_compressionParameters const cParams =
                 ZSTD_getCParamsFromCCtxParams(params, ZSTD_CONTENTSIZE_UNKNOWN, 0, ZSTD_cpm_noAttachDict);
+    ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder,
+                                                                               &cParams);
 
     RETURN_ERROR_IF(params->nbWorkers > 0, GENERIC, "Estimate CCtx size is supported for single-threaded compression only.");
     /* estimateCCtxSize is for one-shot compression. So no buffers should
      * be needed. However, we still allocate two 0-sized buffers, which can
      * take space under ASAN. */
     return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-        &cParams, &params->ldmParams, 1, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
+        &cParams, &params->ldmParams, 1, useRowMatchFinder, 0, 0, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
 size_t ZSTD_estimateCCtxSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCCtxSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_ps_disable;
+        noRowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_ps_enable;
+        rowCCtxSize = ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCCtxSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCCtxSize_internal(int compressionLevel)
@@ -1355,17 +1500,29 @@ size_t ZSTD_estimateCStreamSize_usingCCtxParams(const ZSTD_CCtx_params* params)
         size_t const outBuffSize = (params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
+        ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params->useRowMatchFinder, &params->cParams);
 
         return ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-            &cParams, &params->ldmParams, 1, inBuffSize, outBuffSize,
+            &cParams, &params->ldmParams, 1, useRowMatchFinder, inBuffSize, outBuffSize,
             ZSTD_CONTENTSIZE_UNKNOWN);
     }
 }
 
 size_t ZSTD_estimateCStreamSize_usingCParams(ZSTD_compressionParameters cParams)
 {
-    ZSTD_CCtx_params const params = ZSTD_makeCCtxParamsFromCParams(cParams);
-    return ZSTD_estimateCStreamSize_usingCCtxParams(&params);
+    ZSTD_CCtx_params initialParams = ZSTD_makeCCtxParamsFromCParams(cParams);
+    if (ZSTD_rowMatchFinderSupported(cParams.strategy)) {
+        /* Pick bigger of not using and using row-based matchfinder for greedy and lazy strategies */
+        size_t noRowCCtxSize;
+        size_t rowCCtxSize;
+        initialParams.useRowMatchFinder = ZSTD_ps_disable;
+        noRowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        initialParams.useRowMatchFinder = ZSTD_ps_enable;
+        rowCCtxSize = ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+        return MAX(noRowCCtxSize, rowCCtxSize);
+    } else {
+        return ZSTD_estimateCStreamSize_usingCCtxParams(&initialParams);
+    }
 }
 
 static size_t ZSTD_estimateCStreamSize_internal(int compressionLevel)
@@ -1480,20 +1637,27 @@ typedef enum {
     ZSTD_resetTarget_CCtx
 } ZSTD_resetTarget_e;
 
+
 static size_t
 ZSTD_reset_matchState(ZSTD_matchState_t* ms,
                       ZSTD_cwksp* ws,
                 const ZSTD_compressionParameters* cParams,
+                const ZSTD_paramSwitch_e useRowMatchFinder,
                 const ZSTD_compResetPolicy_e crp,
                 const ZSTD_indexResetPolicy_e forceResetIndex,
                 const ZSTD_resetTarget_e forWho)
 {
-    size_t const chainSize = (cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cParams->chainLog);
+    /* disable chain table allocation for fast or row-based strategies */
+    size_t const chainSize = ZSTD_allocateChainTable(cParams->strategy, useRowMatchFinder,
+                                                     ms->dedicatedDictSearch && (forWho == ZSTD_resetTarget_CDict))
+                                ? ((size_t)1 << cParams->chainLog)
+                                : 0;
     size_t const hSize = ((size_t)1) << cParams->hashLog;
     U32    const hashLog3 = ((forWho == ZSTD_resetTarget_CCtx) && cParams->minMatch==3) ? MIN(ZSTD_HASHLOG3_MAX, cParams->windowLog) : 0;
     size_t const h3Size = hashLog3 ? ((size_t)1) << hashLog3 : 0;
 
     DEBUGLOG(4, "reset indices : %u", forceResetIndex == ZSTDirp_reset);
+    assert(useRowMatchFinder != ZSTD_ps_auto);
     if (forceResetIndex == ZSTDirp_reset) {
         ZSTD_window_init(&ms->window);
         ZSTD_cwksp_mark_tables_dirty(ws);
@@ -1532,11 +1696,23 @@ ZSTD_reset_matchState(ZSTD_matchState_t* ms,
         ms->opt.priceTable = (ZSTD_optimal_t*)ZSTD_cwksp_reserve_aligned(ws, (ZSTD_OPT_NUM+1) * sizeof(ZSTD_optimal_t));
     }
 
+    if (ZSTD_rowMatchFinderUsed(cParams->strategy, useRowMatchFinder)) {
+        {   /* Row match finder needs an additional table of hashes ("tags") */
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ms->tagTable = (U16*)ZSTD_cwksp_reserve_aligned(ws, tagTableSize);
+            if (ms->tagTable) ZSTD_memset(ms->tagTable, 0, tagTableSize);
+        }
+        {   /* Switch to 32-entry rows if searchLog is 5 (or more) */
+            U32 const rowLog = BOUNDED(4, cParams->searchLog, 6);
+            assert(cParams->hashLog >= rowLog);
+            ms->rowHashLog = cParams->hashLog - rowLog;
+        }
+    }
+
     ms->cParams = *cParams;
 
     RETURN_ERROR_IF(ZSTD_cwksp_reserve_failed(ws), memory_allocation,
                     "failed a workspace allocation in ZSTD_reset_matchState");
-
     return 0;
 }
 
@@ -1553,61 +1729,87 @@ static int ZSTD_indexTooCloseToMax(ZSTD_window_t w)
     return (size_t)(w.nextSrc - w.base) > (ZSTD_CURRENT_MAX - ZSTD_INDEXOVERFLOW_MARGIN);
 }
 
+/* ZSTD_dictTooBig():
+ * When dictionaries are larger than ZSTD_CHUNKSIZE_MAX they can't be loaded in
+ * one go generically. So we ensure that in that case we reset the tables to zero,
+ * so that we can load as much of the dictionary as possible.
+ */
+static int ZSTD_dictTooBig(size_t const loadedDictSize)
+{
+    return loadedDictSize > ZSTD_CHUNKSIZE_MAX;
+}
+
 /*! ZSTD_resetCCtx_internal() :
-    note : `params` are assumed fully validated at this stage */
+ * @param loadedDictSize The size of the dictionary to be loaded
+ * into the context, if any. If no dictionary is used, or the
+ * dictionary is being attached / copied, then pass 0.
+ * note : `params` are assumed fully validated at this stage.
+ */
 static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
-                                      ZSTD_CCtx_params params,
+                                      ZSTD_CCtx_params const* params,
                                       U64 const pledgedSrcSize,
+                                      size_t const loadedDictSize,
                                       ZSTD_compResetPolicy_e const crp,
                                       ZSTD_buffered_policy_e const zbuff)
 {
     ZSTD_cwksp* const ws = &zc->workspace;
-    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u",
-                (U32)pledgedSrcSize, params.cParams.windowLog);
-    assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
+    DEBUGLOG(4, "ZSTD_resetCCtx_internal: pledgedSrcSize=%u, wlog=%u, useRowMatchFinder=%d useBlockSplitter=%d",
+                (U32)pledgedSrcSize, params->cParams.windowLog, (int)params->useRowMatchFinder, (int)params->useBlockSplitter);
+    assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
 
     zc->isFirstBlock = 1;
 
-    if (params.ldmParams.enableLdm) {
+    /* Set applied params early so we can modify them for LDM,
+     * and point params at the applied params.
+     */
+    zc->appliedParams = *params;
+    params = &zc->appliedParams;
+
+    assert(params->useRowMatchFinder != ZSTD_ps_auto);
+    assert(params->useBlockSplitter != ZSTD_ps_auto);
+    assert(params->ldmParams.enableLdm != ZSTD_ps_auto);
+    if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
         /* Adjust long distance matching parameters */
-        ZSTD_ldm_adjustParameters(&params.ldmParams, &params.cParams);
-        assert(params.ldmParams.hashLog >= params.ldmParams.bucketSizeLog);
-        assert(params.ldmParams.hashRateLog < 32);
+        ZSTD_ldm_adjustParameters(&zc->appliedParams.ldmParams, &params->cParams);
+        assert(params->ldmParams.hashLog >= params->ldmParams.bucketSizeLog);
+        assert(params->ldmParams.hashRateLog < 32);
     }
 
-    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params.cParams.windowLog), pledgedSrcSize));
+    {   size_t const windowSize = MAX(1, (size_t)MIN(((U64)1 << params->cParams.windowLog), pledgedSrcSize));
         size_t const blockSize = MIN(ZSTD_BLOCKSIZE_MAX, windowSize);
-        U32    const divider = (params.cParams.minMatch==3) ? 3 : 4;
+        U32    const divider = (params->cParams.minMatch==3) ? 3 : 4;
         size_t const maxNbSeq = blockSize / divider;
-        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params.outBufferMode == ZSTD_bm_buffered)
+        size_t const buffOutSize = (zbuff == ZSTDb_buffered && params->outBufferMode == ZSTD_bm_buffered)
                 ? ZSTD_compressBound(blockSize) + 1
                 : 0;
-        size_t const buffInSize = (zbuff == ZSTDb_buffered && params.inBufferMode == ZSTD_bm_buffered)
+        size_t const buffInSize = (zbuff == ZSTDb_buffered && params->inBufferMode == ZSTD_bm_buffered)
                 ? windowSize + blockSize
                 : 0;
-        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params.ldmParams, blockSize);
+        size_t const maxNbLdmSeq = ZSTD_ldm_getMaxNbSeq(params->ldmParams, blockSize);
 
         int const indexTooClose = ZSTD_indexTooCloseToMax(zc->blockState.matchState.window);
+        int const dictTooBig = ZSTD_dictTooBig(loadedDictSize);
         ZSTD_indexResetPolicy_e needsIndexReset =
-            (!indexTooClose && zc->initialized) ? ZSTDirp_continue : ZSTDirp_reset;
+            (indexTooClose || dictTooBig || !zc->initialized) ? ZSTDirp_reset : ZSTDirp_continue;
 
         size_t const neededSpace =
             ZSTD_estimateCCtxSize_usingCCtxParams_internal(
-                &params.cParams, &params.ldmParams, zc->staticSize != 0,
+                &params->cParams, &params->ldmParams, zc->staticSize != 0, params->useRowMatchFinder,
                 buffInSize, buffOutSize, pledgedSrcSize);
+        int resizeWorkspace;
+
         FORWARD_IF_ERROR(neededSpace, "cctx size estimate failed!");
 
         if (!zc->staticSize) ZSTD_cwksp_bump_oversized_duration(ws, 0);
 
-        /* Check if workspace is large enough, alloc a new one if needed */
-        {
+        {   /* Check if workspace is large enough, alloc a new one if needed */
             int const workspaceTooSmall = ZSTD_cwksp_sizeof(ws) < neededSpace;
             int const workspaceWasteful = ZSTD_cwksp_check_wasteful(ws, neededSpace);
-
+            resizeWorkspace = workspaceTooSmall || workspaceWasteful;
             DEBUGLOG(4, "Need %zu B workspace", neededSpace);
             DEBUGLOG(4, "windowSize: %zu - blockSize: %zu", windowSize, blockSize);
 
-            if (workspaceTooSmall || workspaceWasteful) {
+            if (resizeWorkspace) {
                 DEBUGLOG(4, "Resize workspaceSize from %zuKB to %zuKB",
                             ZSTD_cwksp_sizeof(ws) >> 10,
                             neededSpace >> 10);
@@ -1629,14 +1831,13 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
                 zc->blockState.nextCBlock = (ZSTD_compressedBlockState_t*) ZSTD_cwksp_reserve_object(ws, sizeof(ZSTD_compressedBlockState_t));
                 RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate nextCBlock");
                 zc->entropyWorkspace = (U32*) ZSTD_cwksp_reserve_object(ws, ENTROPY_WORKSPACE_SIZE);
-                RETURN_ERROR_IF(zc->blockState.nextCBlock == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
+                RETURN_ERROR_IF(zc->entropyWorkspace == NULL, memory_allocation, "couldn't allocate entropyWorkspace");
         }   }
 
         ZSTD_cwksp_clear(ws);
 
         /* init params */
-        zc->appliedParams = params;
-        zc->blockState.matchState.cParams = params.cParams;
+        zc->blockState.matchState.cParams = params->cParams;
         zc->pledgedSrcSizePlusOne = pledgedSrcSize+1;
         zc->consumedSrcSize = 0;
         zc->producedCSize = 0;
@@ -1667,11 +1868,11 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         zc->outBuff = (char*)ZSTD_cwksp_reserve_buffer(ws, buffOutSize);
 
         /* ldm bucketOffsets table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
             /* TODO: avoid memset? */
             size_t const numBuckets =
-                  ((size_t)1) << (params.ldmParams.hashLog -
-                                  params.ldmParams.bucketSizeLog);
+                  ((size_t)1) << (params->ldmParams.hashLog -
+                                  params->ldmParams.bucketSizeLog);
             zc->ldmState.bucketOffsets = ZSTD_cwksp_reserve_buffer(ws, numBuckets);
             ZSTD_memset(zc->ldmState.bucketOffsets, 0, numBuckets);
         }
@@ -1687,32 +1888,28 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
         FORWARD_IF_ERROR(ZSTD_reset_matchState(
             &zc->blockState.matchState,
             ws,
-            &params.cParams,
+            &params->cParams,
+            params->useRowMatchFinder,
             crp,
             needsIndexReset,
             ZSTD_resetTarget_CCtx), "");
 
         /* ldm hash table */
-        if (params.ldmParams.enableLdm) {
+        if (params->ldmParams.enableLdm == ZSTD_ps_enable) {
             /* TODO: avoid memset? */
-            size_t const ldmHSize = ((size_t)1) << params.ldmParams.hashLog;
+            size_t const ldmHSize = ((size_t)1) << params->ldmParams.hashLog;
             zc->ldmState.hashTable = (ldmEntry_t*)ZSTD_cwksp_reserve_aligned(ws, ldmHSize * sizeof(ldmEntry_t));
             ZSTD_memset(zc->ldmState.hashTable, 0, ldmHSize * sizeof(ldmEntry_t));
             zc->ldmSequences = (rawSeq*)ZSTD_cwksp_reserve_aligned(ws, maxNbLdmSeq * sizeof(rawSeq));
             zc->maxNbLdmSequences = maxNbLdmSeq;
 
             ZSTD_window_init(&zc->ldmState.window);
-            ZSTD_window_clear(&zc->ldmState.window);
             zc->ldmState.loadedDictEnd = 0;
         }
 
-        /* Due to alignment, when reusing a workspace, we can actually consume
-         * up to 3 extra bytes for alignment. See the comments in zstd_cwksp.h
-         */
-        assert(ZSTD_cwksp_used(ws) >= neededSpace &&
-               ZSTD_cwksp_used(ws) <= neededSpace + 3);
-
         DEBUGLOG(3, "wksp: finished allocating, %zd bytes remain available", ZSTD_cwksp_available_space(ws));
+        assert(ZSTD_cwksp_estimated_space_within_bounds(ws, neededSpace, resizeWorkspace));
+
         zc->initialized = 1;
 
         return 0;
@@ -1768,6 +1965,8 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
                         U64 pledgedSrcSize,
                         ZSTD_buffered_policy_e zbuff)
 {
+    DEBUGLOG(4, "ZSTD_resetCCtx_byAttachingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
     {
         ZSTD_compressionParameters adjusted_cdict_cParams = cdict->matchState.cParams;
         unsigned const windowLog = params.cParams.windowLog;
@@ -1783,7 +1982,9 @@ ZSTD_resetCCtx_byAttachingCDict(ZSTD_CCtx* cctx,
         params.cParams = ZSTD_adjustCParams_internal(adjusted_cdict_cParams, pledgedSrcSize,
                                                      cdict->dictContentSize, ZSTD_cpm_attachDict);
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;    /* cdict overrides */
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_makeClean, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == adjusted_cdict_cParams.strategy);
     }
@@ -1827,15 +2028,17 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
     const ZSTD_compressionParameters *cdict_cParams = &cdict->matchState.cParams;
 
     assert(!cdict->matchState.dedicatedDictSearch);
-
-    DEBUGLOG(4, "copying dictionary into context");
+    DEBUGLOG(4, "ZSTD_resetCCtx_byCopyingCDict() pledgedSrcSize=%llu",
+                (unsigned long long)pledgedSrcSize);
 
     {   unsigned const windowLog = params.cParams.windowLog;
         assert(windowLog != 0);
         /* Copy only compression parameters related to tables. */
         params.cParams = *cdict_cParams;
         params.cParams.windowLog = windowLog;
-        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+        params.useRowMatchFinder = cdict->useRowMatchFinder;
+        FORWARD_IF_ERROR(ZSTD_resetCCtx_internal(cctx, &params, pledgedSrcSize,
+                                                 /* loadedDictSize */ 0,
                                                  ZSTDcrp_leaveDirty, zbuff), "");
         assert(cctx->appliedParams.cParams.strategy == cdict_cParams->strategy);
         assert(cctx->appliedParams.cParams.hashLog == cdict_cParams->hashLog);
@@ -1843,17 +2046,30 @@ static size_t ZSTD_resetCCtx_byCopyingCDict(ZSTD_CCtx* cctx,
     }
 
     ZSTD_cwksp_mark_tables_dirty(&cctx->workspace);
+    assert(params.useRowMatchFinder != ZSTD_ps_auto);
 
     /* copy tables */
-    {   size_t const chainSize = (cdict_cParams->strategy == ZSTD_fast) ? 0 : ((size_t)1 << cdict_cParams->chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(cdict_cParams->strategy, cdict->useRowMatchFinder, 0 /* DDS guaranteed disabled */)
+                                                            ? ((size_t)1 << cdict_cParams->chainLog)
+                                                            : 0;
         size_t const hSize =  (size_t)1 << cdict_cParams->hashLog;
 
         ZSTD_memcpy(cctx->blockState.matchState.hashTable,
                cdict->matchState.hashTable,
                hSize * sizeof(U32));
-        ZSTD_memcpy(cctx->blockState.matchState.chainTable,
+        /* Do not copy cdict's chainTable if cctx has parameters such that it would not use chainTable */
+        if (ZSTD_allocateChainTable(cctx->appliedParams.cParams.strategy, cctx->appliedParams.useRowMatchFinder, 0 /* forDDSDict */)) {
+            ZSTD_memcpy(cctx->blockState.matchState.chainTable,
                cdict->matchState.chainTable,
                chainSize * sizeof(U32));
+        }
+        /* copy tag table */
+        if (ZSTD_rowMatchFinderUsed(cdict_cParams->strategy, cdict->useRowMatchFinder)) {
+            size_t const tagTableSize = hSize*sizeof(U16);
+            ZSTD_memcpy(cctx->blockState.matchState.tagTable,
+                cdict->matchState.tagTable,
+                tagTableSize);
+        }
     }
 
     /* Zero the hashTable3, since the cdict never fills it */
@@ -1917,16 +2133,22 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
                             U64 pledgedSrcSize,
                             ZSTD_buffered_policy_e zbuff)
 {
-    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     RETURN_ERROR_IF(srcCCtx->stage!=ZSTDcs_init, stage_wrong,
                     "Can't copy a ctx that's not in init stage.");
-
+    DEBUGLOG(5, "ZSTD_copyCCtx_internal");
     ZSTD_memcpy(&dstCCtx->customMem, &srcCCtx->customMem, sizeof(ZSTD_customMem));
     {   ZSTD_CCtx_params params = dstCCtx->requestedParams;
         /* Copy only compression parameters related to tables. */
         params.cParams = srcCCtx->appliedParams.cParams;
+        assert(srcCCtx->appliedParams.useRowMatchFinder != ZSTD_ps_auto);
+        assert(srcCCtx->appliedParams.useBlockSplitter != ZSTD_ps_auto);
+        assert(srcCCtx->appliedParams.ldmParams.enableLdm != ZSTD_ps_auto);
+        params.useRowMatchFinder = srcCCtx->appliedParams.useRowMatchFinder;
+        params.useBlockSplitter = srcCCtx->appliedParams.useBlockSplitter;
+        params.ldmParams = srcCCtx->appliedParams.ldmParams;
         params.fParams = fParams;
-        ZSTD_resetCCtx_internal(dstCCtx, params, pledgedSrcSize,
+        ZSTD_resetCCtx_internal(dstCCtx, &params, pledgedSrcSize,
+                                /* loadedDictSize */ 0,
                                 ZSTDcrp_leaveDirty, zbuff);
         assert(dstCCtx->appliedParams.cParams.windowLog == srcCCtx->appliedParams.cParams.windowLog);
         assert(dstCCtx->appliedParams.cParams.strategy == srcCCtx->appliedParams.cParams.strategy);
@@ -1938,7 +2160,11 @@ static size_t ZSTD_copyCCtx_internal(ZSTD_CCtx* dstCCtx,
     ZSTD_cwksp_mark_tables_dirty(&dstCCtx->workspace);
 
     /* copy tables */
-    {   size_t const chainSize = (srcCCtx->appliedParams.cParams.strategy == ZSTD_fast) ? 0 : ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog);
+    {   size_t const chainSize = ZSTD_allocateChainTable(srcCCtx->appliedParams.cParams.strategy,
+                                                         srcCCtx->appliedParams.useRowMatchFinder,
+                                                         0 /* forDDSDict */)
+                                    ? ((size_t)1 << srcCCtx->appliedParams.cParams.chainLog)
+                                    : 0;
         size_t const hSize =  (size_t)1 << srcCCtx->appliedParams.cParams.hashLog;
         int const h3log = srcCCtx->blockState.matchState.hashLog3;
         size_t const h3Size = h3log ? ((size_t)1 << h3log) : 0;
@@ -2005,6 +2231,8 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
     int const nbRows = (int)size / ZSTD_ROWSIZE;
     int cellNb = 0;
     int rowNb;
+    /* Protect special index values < ZSTD_WINDOW_START_INDEX. */
+    U32 const reducerThreshold = reducerValue + ZSTD_WINDOW_START_INDEX;
     assert((size & (ZSTD_ROWSIZE-1)) == 0);  /* multiple of ZSTD_ROWSIZE */
     assert(size < (1U<<31));   /* can be casted to int */
 
@@ -2012,12 +2240,17 @@ ZSTD_reduceTable_internal (U32* const table, U32 const size, U32 const reducerVa
     for (rowNb=0 ; rowNb < nbRows ; rowNb++) {
         int column;
         for (column=0; column<ZSTD_ROWSIZE; column++) {
-            if (preserveMark) {
-                U32 const adder = (table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) ? reducerValue : 0;
-                table[cellNb] += adder;
+            U32 newVal;
+            if (preserveMark && table[cellNb] == ZSTD_DUBT_UNSORTED_MARK) {
+                /* This write is pointless, but is required(?) for the compiler
+                 * to auto-vectorize the loop. */
+                newVal = ZSTD_DUBT_UNSORTED_MARK;
+            } else if (table[cellNb] < reducerThreshold) {
+                newVal = 0;
+            } else {
+                newVal = table[cellNb] - reducerValue;
             }
-            if (table[cellNb] < reducerValue) table[cellNb] = 0;
-            else table[cellNb] -= reducerValue;
+            table[cellNb] = newVal;
             cellNb++;
     }   }
 }
@@ -2040,7 +2273,7 @@ static void ZSTD_reduceIndex (ZSTD_matchState_t* ms, ZSTD_CCtx_params const* par
         ZSTD_reduceTable(ms->hashTable, hSize, reducerValue);
     }
 
-    if (params->cParams.strategy != ZSTD_fast) {
+    if (ZSTD_allocateChainTable(params->cParams.strategy, params->useRowMatchFinder, (U32)ms->dedicatedDictSearch)) {
         U32 const chainSize = (U32)1 << params->cParams.chainLog;
         if (params->cParams.strategy == ZSTD_btlazy2)
             ZSTD_reduceTable_btlazy2(ms->chainTable, chainSize, reducerValue);
@@ -2072,14 +2305,14 @@ void ZSTD_seqToCodes(const seqStore_t* seqStorePtr)
     assert(nbSeq <= seqStorePtr->maxNbSeq);
     for (u=0; u<nbSeq; u++) {
         U32 const llv = sequences[u].litLength;
-        U32 const mlv = sequences[u].matchLength;
+        U32 const mlv = sequences[u].mlBase;
         llCodeTable[u] = (BYTE)ZSTD_LLcode(llv);
-        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offset);
+        ofCodeTable[u] = (BYTE)ZSTD_highbit32(sequences[u].offBase);
         mlCodeTable[u] = (BYTE)ZSTD_MLcode(mlv);
     }
-    if (seqStorePtr->longLengthID==1)
+    if (seqStorePtr->longLengthType==ZSTD_llt_literalLength)
         llCodeTable[seqStorePtr->longLengthPos] = MaxLL;
-    if (seqStorePtr->longLengthID==2)
+    if (seqStorePtr->longLengthType==ZSTD_llt_matchLength)
         mlCodeTable[seqStorePtr->longLengthPos] = MaxML;
 }
 
@@ -2093,10 +2326,161 @@ static int ZSTD_useTargetCBlockSize(const ZSTD_CCtx_params* cctxParams)
     return (cctxParams->targetCBlockSize != 0);
 }
 
-/* ZSTD_entropyCompressSequences_internal():
- * actually compresses both literals and sequences */
+/* ZSTD_blockSplitterEnabled():
+ * Returns if block splitting param is being used
+ * If used, compression will do best effort to split a block in order to improve compression ratio.
+ * At the time this function is called, the parameter must be finalized.
+ * Returns 1 if true, 0 otherwise. */
+static int ZSTD_blockSplitterEnabled(ZSTD_CCtx_params* cctxParams)
+{
+    DEBUGLOG(5, "ZSTD_blockSplitterEnabled (useBlockSplitter=%d)", cctxParams->useBlockSplitter);
+    assert(cctxParams->useBlockSplitter != ZSTD_ps_auto);
+    return (cctxParams->useBlockSplitter == ZSTD_ps_enable);
+}
+
+/* Type returned by ZSTD_buildSequencesStatistics containing finalized symbol encoding types
+ * and size of the sequences statistics
+ */
+typedef struct {
+    U32 LLtype;
+    U32 Offtype;
+    U32 MLtype;
+    size_t size;
+    size_t lastCountSize; /* Accounts for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_symbolEncodingTypeStats_t;
+
+/* ZSTD_buildSequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t, or a zstd error code in the `size` field.
+ * Modifies `nextEntropy` to have the appropriate values as a side effect.
+ * nbSeq must be greater than 0.
+ *
+ * entropyWkspSize must be of size at least ENTROPY_WORKSPACE_SIZE - (MaxSeq + 1)*sizeof(U32)
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildSequencesStatistics(seqStore_t* seqStorePtr, size_t nbSeq,
+                        const ZSTD_fseCTables_t* prevEntropy, ZSTD_fseCTables_t* nextEntropy,
+                              BYTE* dst, const BYTE* const dstEnd,
+                              ZSTD_strategy strategy, unsigned* countWorkspace,
+                              void* entropyWorkspace, size_t entropyWkspSize) {
+    BYTE* const ostart = dst;
+    const BYTE* const oend = dstEnd;
+    BYTE* op = ostart;
+    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
+    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
+    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
+    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
+    const BYTE* const llCodeTable = seqStorePtr->llCode;
+    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    stats.lastCountSize = 0;
+    /* convert length/distances into codes */
+    ZSTD_seqToCodes(seqStorePtr);
+    assert(op <= oend);
+    assert(nbSeq != 0); /* ZSTD_selectEncodingType() divides by nbSeq */
+    /* build CTable for Literal Lengths */
+    {   unsigned max = MaxLL;
+        size_t const mostFrequent = HIST_countFast_wksp(countWorkspace, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building LL table");
+        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
+        stats.LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        LLFSELog, prevEntropy->litlengthCTable,
+                                        LL_defaultNorm, LL_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(set_basic < set_compressed && set_rle < set_compressed);
+        assert(!(stats.LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_LitLength, LLFSELog, (symbolEncodingType_e)stats.LLtype,
+                countWorkspace, max, llCodeTable, nbSeq,
+                LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                prevEntropy->litlengthCTable,
+                sizeof(prevEntropy->litlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for LitLens failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.LLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for Offsets */
+    {   unsigned max = MaxOff;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
+        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
+        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
+        DEBUGLOG(5, "Building OF table");
+        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
+        stats.Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        OffFSELog, prevEntropy->offcodeCTable,
+                                        OF_defaultNorm, OF_defaultNormLog,
+                                        defaultPolicy, strategy);
+        assert(!(stats.Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)stats.Offtype,
+                countWorkspace, max, ofCodeTable, nbSeq,
+                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                prevEntropy->offcodeCTable,
+                sizeof(prevEntropy->offcodeCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for Offsets failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.Offtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    /* build CTable for MatchLengths */
+    {   unsigned max = MaxML;
+        size_t const mostFrequent = HIST_countFast_wksp(
+            countWorkspace, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
+        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
+        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
+        stats.MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
+                                        countWorkspace, max, mostFrequent, nbSeq,
+                                        MLFSELog, prevEntropy->matchlengthCTable,
+                                        ML_defaultNorm, ML_defaultNormLog,
+                                        ZSTD_defaultAllowed, strategy);
+        assert(!(stats.MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
+        {   size_t const countSize = ZSTD_buildCTable(
+                op, (size_t)(oend - op),
+                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)stats.MLtype,
+                countWorkspace, max, mlCodeTable, nbSeq,
+                ML_defaultNorm, ML_defaultNormLog, MaxML,
+                prevEntropy->matchlengthCTable,
+                sizeof(prevEntropy->matchlengthCTable),
+                entropyWorkspace, entropyWkspSize);
+            if (ZSTD_isError(countSize)) {
+                DEBUGLOG(3, "ZSTD_buildCTable for MatchLengths failed");
+                stats.size = countSize;
+                return stats;
+            }
+            if (stats.MLtype == set_compressed)
+                stats.lastCountSize = countSize;
+            op += countSize;
+            assert(op <= oend);
+    }   }
+    stats.size = (size_t)(op-ostart);
+    return stats;
+}
+
+/* ZSTD_entropyCompressSeqStore_internal():
+ * compresses both literals and sequences
+ * Returns compressed size of block, or a zstd error.
+ */
+#define SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO 20
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore_internal(seqStore_t* seqStorePtr,
                           const ZSTD_entropyCTables_t* prevEntropy,
                                 ZSTD_entropyCTables_t* nextEntropy,
                           const ZSTD_CCtx_params* cctxParams,
@@ -2110,36 +2494,38 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
     FSE_CTable* CTable_LitLength = nextEntropy->fse.litlengthCTable;
     FSE_CTable* CTable_OffsetBits = nextEntropy->fse.offcodeCTable;
     FSE_CTable* CTable_MatchLength = nextEntropy->fse.matchlengthCTable;
-    U32 LLtype, Offtype, MLtype;   /* compressed, raw or rle */
     const seqDef* const sequences = seqStorePtr->sequencesStart;
+    const size_t nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
     const BYTE* const ofCodeTable = seqStorePtr->ofCode;
     const BYTE* const llCodeTable = seqStorePtr->llCode;
     const BYTE* const mlCodeTable = seqStorePtr->mlCode;
     BYTE* const ostart = (BYTE*)dst;
     BYTE* const oend = ostart + dstCapacity;
     BYTE* op = ostart;
-    size_t const nbSeq = (size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
-    BYTE* seqHead;
-    BYTE* lastNCount = NULL;
+    size_t lastCountSize;
 
     entropyWorkspace = count + (MaxSeq + 1);
     entropyWkspSize -= (MaxSeq + 1) * sizeof(*count);
 
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences_internal (nbSeq=%zu)", nbSeq);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore_internal (nbSeq=%zu)", nbSeq);
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(entropyWkspSize >= HUF_WORKSPACE_SIZE);
 
     /* Compress literals */
     {   const BYTE* const literals = seqStorePtr->litStart;
+        size_t const numSequences = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+        size_t const numLiterals = seqStorePtr->lit - seqStorePtr->litStart;
+        /* Base suspicion of uncompressibility on ratio of literals to sequences */
+        unsigned const suspectUncompressible = (numSequences == 0) || (numLiterals / numSequences >= SUSPECT_UNCOMPRESSIBLE_LITERAL_RATIO);
         size_t const litSize = (size_t)(seqStorePtr->lit - literals);
         size_t const cSize = ZSTD_compressLiterals(
                                     &prevEntropy->huf, &nextEntropy->huf,
                                     cctxParams->cParams.strategy,
-                                    ZSTD_disableLiteralsCompression(cctxParams),
+                                    ZSTD_literalsCompressionIsDisabled(cctxParams),
                                     op, dstCapacity,
                                     literals, litSize,
                                     entropyWorkspace, entropyWkspSize,
-                                    bmi2);
+                                    bmi2, suspectUncompressible);
         FORWARD_IF_ERROR(cSize, "ZSTD_compressLiterals failed");
         assert(cSize <= dstCapacity);
         op += cSize;
@@ -2165,95 +2551,20 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
         ZSTD_memcpy(&nextEntropy->fse, &prevEntropy->fse, sizeof(prevEntropy->fse));
         return (size_t)(op - ostart);
     }
-
-    /* seqHead : flags for FSE encoding type */
-    seqHead = op++;
-    assert(op <= oend);
-
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(count, &max, llCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->fse.litlength_repeatMode = prevEntropy->fse.litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->fse.litlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->fse.litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->fse.litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                count, max, llCodeTable, nbSeq,
-                LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                prevEntropy->fse.litlengthCTable,
-                sizeof(prevEntropy->fse.litlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for Offsets */
-    {   unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, ofCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->fse.offcode_repeatMode = prevEntropy->fse.offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->fse.offcode_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->fse.offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->fse.offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                count, max, ofCodeTable, nbSeq,
-                OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                prevEntropy->fse.offcodeCTable,
-                sizeof(prevEntropy->fse.offcodeCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-    /* build CTable for MatchLengths */
-    {   unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(
-            count, &max, mlCodeTable, nbSeq, entropyWorkspace, entropyWkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->fse.matchlength_repeatMode = prevEntropy->fse.matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->fse.matchlength_repeatMode,
-                                        count, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->fse.matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->fse.matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(
-                op, (size_t)(oend - op),
-                CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                count, max, mlCodeTable, nbSeq,
-                ML_defaultNorm, ML_defaultNormLog, MaxML,
-                prevEntropy->fse.matchlengthCTable,
-                sizeof(prevEntropy->fse.matchlengthCTable),
-                entropyWorkspace, entropyWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                lastNCount = op;
-            op += countSize;
-            assert(op <= oend);
-    }   }
-
-    *seqHead = (BYTE)((LLtype<<6) + (Offtype<<4) + (MLtype<<2));
+    {
+        ZSTD_symbolEncodingTypeStats_t stats;
+        BYTE* seqHead = op++;
+        /* build stats for sequences */
+        stats = ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                             &prevEntropy->fse, &nextEntropy->fse,
+                                              op, oend,
+                                              strategy, count,
+                                              entropyWorkspace, entropyWkspSize);
+        FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+        *seqHead = (BYTE)((stats.LLtype<<6) + (stats.Offtype<<4) + (stats.MLtype<<2));
+        lastCountSize = stats.lastCountSize;
+        op += stats.size;
+    }
 
     {   size_t const bitstreamSize = ZSTD_encodeSequences(
                                         op, (size_t)(oend - op),
@@ -2273,9 +2584,9 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
          * In this exceedingly rare case, we will simply emit an uncompressed
          * block, since it isn't worth optimizing.
          */
-        if (lastNCount && (op - lastNCount) < 4) {
-            /* NCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
-            assert(op - lastNCount == 3);
+        if (lastCountSize && (lastCountSize + bitstreamSize) < 4) {
+            /* lastCountSize >= 2 && bitstreamSize > 0 ==> lastCountSize == 3 */
+            assert(lastCountSize + bitstreamSize == 3);
             DEBUGLOG(5, "Avoiding bug in zstd decoder in versions <= 1.3.4 by "
                         "emitting an uncompressed block.");
             return 0;
@@ -2287,7 +2598,7 @@ ZSTD_entropyCompressSequences_internal(seqStore_t* seqStorePtr,
 }
 
 MEM_STATIC size_t
-ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
+ZSTD_entropyCompressSeqStore(seqStore_t* seqStorePtr,
                        const ZSTD_entropyCTables_t* prevEntropy,
                              ZSTD_entropyCTables_t* nextEntropy,
                        const ZSTD_CCtx_params* cctxParams,
@@ -2296,7 +2607,7 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
                              void* entropyWorkspace, size_t entropyWkspSize,
                              int bmi2)
 {
-    size_t const cSize = ZSTD_entropyCompressSequences_internal(
+    size_t const cSize = ZSTD_entropyCompressSeqStore_internal(
                             seqStorePtr, prevEntropy, nextEntropy, cctxParams,
                             dst, dstCapacity,
                             entropyWorkspace, entropyWkspSize, bmi2);
@@ -2306,20 +2617,20 @@ ZSTD_entropyCompressSequences(seqStore_t* seqStorePtr,
      */
     if ((cSize == ERROR(dstSize_tooSmall)) & (srcSize <= dstCapacity))
         return 0;  /* block not compressed */
-    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSequences_internal failed");
+    FORWARD_IF_ERROR(cSize, "ZSTD_entropyCompressSeqStore_internal failed");
 
     /* Check compressibility */
     {   size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, cctxParams->cParams.strategy);
         if (cSize >= maxCSize) return 0;  /* block not compressed */
     }
-    DEBUGLOG(4, "ZSTD_entropyCompressSequences() cSize: %zu\n", cSize);
+    DEBUGLOG(4, "ZSTD_entropyCompressSeqStore() cSize: %zu", cSize);
     return cSize;
 }
 
 /* ZSTD_selectBlockCompressor() :
  * Not static, but internal use only (used by long distance matcher)
  * assumption : strat is a valid strategy */
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode)
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e useRowMatchFinder, ZSTD_dictMode_e dictMode)
 {
     static const ZSTD_blockCompressor blockCompressor[4][ZSTD_STRATEGY_MAX+1] = {
         { ZSTD_compressBlock_fast  /* default for 0 */,
@@ -2367,7 +2678,28 @@ ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMo
     ZSTD_STATIC_ASSERT((unsigned)ZSTD_fast == 1);
 
     assert(ZSTD_cParam_withinBounds(ZSTD_c_strategy, strat));
-    selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    DEBUGLOG(4, "Selected block compressor: dictMode=%d strat=%d rowMatchfinder=%d", (int)dictMode, (int)strat, (int)useRowMatchFinder);
+    if (ZSTD_rowMatchFinderUsed(strat, useRowMatchFinder)) {
+        static const ZSTD_blockCompressor rowBasedBlockCompressors[4][3] = {
+            { ZSTD_compressBlock_greedy_row,
+            ZSTD_compressBlock_lazy_row,
+            ZSTD_compressBlock_lazy2_row },
+            { ZSTD_compressBlock_greedy_extDict_row,
+            ZSTD_compressBlock_lazy_extDict_row,
+            ZSTD_compressBlock_lazy2_extDict_row },
+            { ZSTD_compressBlock_greedy_dictMatchState_row,
+            ZSTD_compressBlock_lazy_dictMatchState_row,
+            ZSTD_compressBlock_lazy2_dictMatchState_row },
+            { ZSTD_compressBlock_greedy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy_dedicatedDictSearch_row,
+            ZSTD_compressBlock_lazy2_dedicatedDictSearch_row }
+        };
+        DEBUGLOG(4, "Selecting a row-based matchfinder");
+        assert(useRowMatchFinder != ZSTD_ps_auto);
+        selectedCompressor = rowBasedBlockCompressors[(int)dictMode][(int)strat - (int)ZSTD_greedy];
+    } else {
+        selectedCompressor = blockCompressor[(int)dictMode][(int)strat];
+    }
     assert(selectedCompressor != NULL);
     return selectedCompressor;
 }
@@ -2383,7 +2715,7 @@ void ZSTD_resetSeqStore(seqStore_t* ssPtr)
 {
     ssPtr->lit = ssPtr->litStart;
     ssPtr->sequences = ssPtr->sequencesStart;
-    ssPtr->longLengthID = 0;
+    ssPtr->longLengthType = ZSTD_llt_none;
 }
 
 typedef enum { ZSTDbss_compress, ZSTDbss_noCompress } ZSTD_buildSeqStore_e;
@@ -2430,15 +2762,16 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
         }
         if (zc->externSeqStore.pos < zc->externSeqStore.size) {
-            assert(!zc->appliedParams.ldmParams.enableLdm);
+            assert(zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_disable);
             /* Updates ldmSeqStore.pos */
             lastLLSize =
                 ZSTD_ldm_blockCompress(&zc->externSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
-        } else if (zc->appliedParams.ldmParams.enableLdm) {
+        } else if (zc->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
             rawSeqStore_t ldmSeqStore = kNullRawSeqStore;
 
             ldmSeqStore.seq = zc->ldmSequences;
@@ -2452,10 +2785,13 @@ static size_t ZSTD_buildSeqStore(ZSTD_CCtx* zc, const void* src, size_t srcSize)
                 ZSTD_ldm_blockCompress(&ldmSeqStore,
                                        ms, &zc->seqStore,
                                        zc->blockState.nextCBlock->rep,
+                                       zc->appliedParams.useRowMatchFinder,
                                        src, srcSize);
             assert(ldmSeqStore.pos == ldmSeqStore.size);
         } else {   /* not long range mode */
-            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, dictMode);
+            ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy,
+                                                                                    zc->appliedParams.useRowMatchFinder,
+                                                                                    dictMode);
             ms->ldmSeqStore = NULL;
             lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, src, srcSize);
         }
@@ -2483,22 +2819,22 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
     assert(zc->seqCollector.maxSequences >= seqStoreSeqSize + 1);
     ZSTD_memcpy(updatedRepcodes.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
     for (i = 0; i < seqStoreSeqSize; ++i) {
-        U32 rawOffset = seqStoreSeqs[i].offset - ZSTD_REP_NUM;
+        U32 rawOffset = seqStoreSeqs[i].offBase - ZSTD_REP_NUM;
         outSeqs[i].litLength = seqStoreSeqs[i].litLength;
-        outSeqs[i].matchLength = seqStoreSeqs[i].matchLength + MINMATCH;
+        outSeqs[i].matchLength = seqStoreSeqs[i].mlBase + MINMATCH;
         outSeqs[i].rep = 0;
 
         if (i == seqStore->longLengthPos) {
-            if (seqStore->longLengthID == 1) {
+            if (seqStore->longLengthType == ZSTD_llt_literalLength) {
                 outSeqs[i].litLength += 0x10000;
-            } else if (seqStore->longLengthID == 2) {
+            } else if (seqStore->longLengthType == ZSTD_llt_matchLength) {
                 outSeqs[i].matchLength += 0x10000;
             }
         }
 
-        if (seqStoreSeqs[i].offset <= ZSTD_REP_NUM) {
+        if (seqStoreSeqs[i].offBase <= ZSTD_REP_NUM) {
             /* Derive the correct offset corresponding to a repcode */
-            outSeqs[i].rep = seqStoreSeqs[i].offset;
+            outSeqs[i].rep = seqStoreSeqs[i].offBase;
             if (outSeqs[i].litLength != 0) {
                 rawOffset = updatedRepcodes.rep[outSeqs[i].rep - 1];
             } else {
@@ -2512,9 +2848,9 @@ static void ZSTD_copyBlockSequences(ZSTD_CCtx* zc)
         outSeqs[i].offset = rawOffset;
         /* seqStoreSeqs[i].offset == offCode+1, and ZSTD_updateRep() expects offCode
            so we provide seqStoreSeqs[i].offset - 1 */
-        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep,
-                                         seqStoreSeqs[i].offset - 1,
-                                         seqStoreSeqs[i].litLength == 0);
+        ZSTD_updateRep(updatedRepcodes.rep,
+                       seqStoreSeqs[i].offBase - 1,
+                       seqStoreSeqs[i].litLength == 0);
         literalsRead += outSeqs[i].litLength;
     }
     /* Insert last literals (if any exist) in the block as a sequence with ml == off == 0.
@@ -2602,16 +2938,740 @@ static int ZSTD_maybeRLE(seqStore_t const* seqStore)
     return nbSeqs < 4 && nbLits < 10;
 }
 
-static void ZSTD_confirmRepcodesAndEntropyTables(ZSTD_CCtx* zc)
+static void ZSTD_blockState_confirmRepcodesAndEntropyTables(ZSTD_blockState_t* const bs)
+{
+    ZSTD_compressedBlockState_t* const tmp = bs->prevCBlock;
+    bs->prevCBlock = bs->nextCBlock;
+    bs->nextCBlock = tmp;
+}
+
+/* Writes the block header */
+static void writeBlockHeader(void* op, size_t cSize, size_t blockSize, U32 lastBlock) {
+    U32 const cBlockHeader = cSize == 1 ?
+                        lastBlock + (((U32)bt_rle)<<1) + (U32)(blockSize << 3) :
+                        lastBlock + (((U32)bt_compressed)<<1) + (U32)(cSize << 3);
+    MEM_writeLE24(op, cBlockHeader);
+    DEBUGLOG(3, "writeBlockHeader: cSize: %zu blockSize: %zu lastBlock: %u", cSize, blockSize, lastBlock);
+}
+
+/* ZSTD_buildBlockEntropyStats_literals() :
+ *  Builds entropy for the literals.
+ *  Stores literals block type (raw, rle, compressed, repeat) and
+ *  huffman description table to hufMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE workspace
+ *  @return : size of huffman description table or error code */
+static size_t ZSTD_buildBlockEntropyStats_literals(void* const src, size_t srcSize,
+                                            const ZSTD_hufCTables_t* prevHuf,
+                                                  ZSTD_hufCTables_t* nextHuf,
+                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                  const int literalsCompressionIsDisabled,
+                                                  void* workspace, size_t wkspSize)
+{
+    BYTE* const wkspStart = (BYTE*)workspace;
+    BYTE* const wkspEnd = wkspStart + wkspSize;
+    BYTE* const countWkspStart = wkspStart;
+    unsigned* const countWksp = (unsigned*)workspace;
+    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
+    BYTE* const nodeWksp = countWkspStart + countWkspSize;
+    const size_t nodeWkspSize = wkspEnd-nodeWksp;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    unsigned huffLog = HUF_TABLELOG_DEFAULT;
+    HUF_repeat repeat = prevHuf->repeatMode;
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_literals (srcSize=%zu)", srcSize);
+
+    /* Prepare nextEntropy assuming reusing the existing table */
+    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+
+    if (literalsCompressionIsDisabled) {
+        DEBUGLOG(5, "set_basic - disabled");
+        hufMetadata->hType = set_basic;
+        return 0;
+    }
+
+    /* small ? don't even attempt compression (speed opt) */
+#ifndef COMPRESS_LITERALS_SIZE_MIN
+#define COMPRESS_LITERALS_SIZE_MIN 63
+#endif
+    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
+        if (srcSize <= minLitSize) {
+            DEBUGLOG(5, "set_basic - too small");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Scan input and build symbol stats */
+    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
+        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
+        if (largest == srcSize) {
+            DEBUGLOG(5, "set_rle");
+            hufMetadata->hType = set_rle;
+            return 0;
+        }
+        if (largest <= (srcSize >> 7)+4) {
+            DEBUGLOG(5, "set_basic - no gain");
+            hufMetadata->hType = set_basic;
+            return 0;
+        }
+    }
+
+    /* Validate the previous Huffman table */
+    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
+        repeat = HUF_repeat_none;
+    }
+
+    /* Build Huffman Tree */
+    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
+    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
+    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
+                                                    maxSymbolValue, huffLog,
+                                                    nodeWksp, nodeWkspSize);
+        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
+        huffLog = (U32)maxBits;
+        {   /* Build and write the CTable */
+            size_t const newCSize = HUF_estimateCompressedSize(
+                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
+            size_t const hSize = HUF_writeCTable_wksp(
+                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
+                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
+                    nodeWksp, nodeWkspSize);
+            /* Check against repeating the previous CTable */
+            if (repeat != HUF_repeat_none) {
+                size_t const oldCSize = HUF_estimateCompressedSize(
+                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
+                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
+                    DEBUGLOG(5, "set_repeat - smaller");
+                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                    hufMetadata->hType = set_repeat;
+                    return 0;
+                }
+            }
+            if (newCSize + hSize >= srcSize) {
+                DEBUGLOG(5, "set_basic - no gains");
+                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
+                hufMetadata->hType = set_basic;
+                return 0;
+            }
+            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
+            hufMetadata->hType = set_compressed;
+            nextHuf->repeatMode = HUF_repeat_check;
+            return hSize;
+        }
+    }
+}
+
+
+/* ZSTD_buildDummySequencesStatistics():
+ * Returns a ZSTD_symbolEncodingTypeStats_t with all encoding types as set_basic,
+ * and updates nextEntropy to the appropriate repeatMode.
+ */
+static ZSTD_symbolEncodingTypeStats_t
+ZSTD_buildDummySequencesStatistics(ZSTD_fseCTables_t* nextEntropy) {
+    ZSTD_symbolEncodingTypeStats_t stats = {set_basic, set_basic, set_basic, 0, 0};
+    nextEntropy->litlength_repeatMode = FSE_repeat_none;
+    nextEntropy->offcode_repeatMode = FSE_repeat_none;
+    nextEntropy->matchlength_repeatMode = FSE_repeat_none;
+    return stats;
+}
+
+/* ZSTD_buildBlockEntropyStats_sequences() :
+ *  Builds entropy for the sequences.
+ *  Stores symbol compression modes and fse table to fseMetadata.
+ *  Requires ENTROPY_WORKSPACE_SIZE wksp.
+ *  @return : size of fse tables or error code */
+static size_t ZSTD_buildBlockEntropyStats_sequences(seqStore_t* seqStorePtr,
+                                              const ZSTD_fseCTables_t* prevEntropy,
+                                                    ZSTD_fseCTables_t* nextEntropy,
+                                              const ZSTD_CCtx_params* cctxParams,
+                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                    void* workspace, size_t wkspSize)
+{
+    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
+    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
+    BYTE* const ostart = fseMetadata->fseTablesBuffer;
+    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
+    BYTE* op = ostart;
+    unsigned* countWorkspace = (unsigned*)workspace;
+    unsigned* entropyWorkspace = countWorkspace + (MaxSeq + 1);
+    size_t entropyWorkspaceSize = wkspSize - (MaxSeq + 1) * sizeof(*countWorkspace);
+    ZSTD_symbolEncodingTypeStats_t stats;
+
+    DEBUGLOG(5, "ZSTD_buildBlockEntropyStats_sequences (nbSeq=%zu)", nbSeq);
+    stats = nbSeq != 0 ? ZSTD_buildSequencesStatistics(seqStorePtr, nbSeq,
+                                          prevEntropy, nextEntropy, op, oend,
+                                          strategy, countWorkspace,
+                                          entropyWorkspace, entropyWorkspaceSize)
+                       : ZSTD_buildDummySequencesStatistics(nextEntropy);
+    FORWARD_IF_ERROR(stats.size, "ZSTD_buildSequencesStatistics failed!");
+    fseMetadata->llType = (symbolEncodingType_e) stats.LLtype;
+    fseMetadata->ofType = (symbolEncodingType_e) stats.Offtype;
+    fseMetadata->mlType = (symbolEncodingType_e) stats.MLtype;
+    fseMetadata->lastCountSize = stats.lastCountSize;
+    return stats.size;
+}
+
+
+/* ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  Requires workspace size ENTROPY_WORKSPACE_SIZE
+ *
+ *  @return : 0 on success or error code
+ */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize)
+{
+    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
+    entropyMetadata->hufMetadata.hufDesSize =
+        ZSTD_buildBlockEntropyStats_literals(seqStorePtr->litStart, litSize,
+                                            &prevEntropy->huf, &nextEntropy->huf,
+                                            &entropyMetadata->hufMetadata,
+                                            ZSTD_literalsCompressionIsDisabled(cctxParams),
+                                            workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildBlockEntropyStats_literals failed");
+    entropyMetadata->fseMetadata.fseTablesSize =
+        ZSTD_buildBlockEntropyStats_sequences(seqStorePtr,
+                                              &prevEntropy->fse, &nextEntropy->fse,
+                                              cctxParams,
+                                              &entropyMetadata->fseMetadata,
+                                              workspace, wkspSize);
+    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildBlockEntropyStats_sequences failed");
+    return 0;
+}
+
+/* Returns the size estimate for the literals section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_literal(const BYTE* literals, size_t litSize,
+                                                const ZSTD_hufCTables_t* huf,
+                                                const ZSTD_hufCTablesMetadata_t* hufMetadata,
+                                                void* workspace, size_t wkspSize,
+                                                int writeEntropy)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    unsigned maxSymbolValue = HUF_SYMBOLVALUE_MAX;
+    size_t literalSectionHeaderSize = 3 + (litSize >= 1 KB) + (litSize >= 16 KB);
+    U32 singleStream = litSize < 256;
+
+    if (hufMetadata->hType == set_basic) return litSize;
+    else if (hufMetadata->hType == set_rle) return 1;
+    else if (hufMetadata->hType == set_compressed || hufMetadata->hType == set_repeat) {
+        size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)literals, litSize, workspace, wkspSize);
+        if (ZSTD_isError(largest)) return litSize;
+        {   size_t cLitSizeEstimate = HUF_estimateCompressedSize((const HUF_CElt*)huf->CTable, countWksp, maxSymbolValue);
+            if (writeEntropy) cLitSizeEstimate += hufMetadata->hufDesSize;
+            if (!singleStream) cLitSizeEstimate += 6; /* multi-stream huffman uses 6-byte jump table */
+            return cLitSizeEstimate + literalSectionHeaderSize;
+    }   }
+    assert(0); /* impossible */
+    return 0;
+}
+
+/* Returns the size estimate for the FSE-compressed symbols (of, ml, ll) of a block */
+static size_t ZSTD_estimateBlockSize_symbolType(symbolEncodingType_e type,
+                        const BYTE* codeTable, size_t nbSeq, unsigned maxCode,
+                        const FSE_CTable* fseCTable,
+                        const U8* additionalBits,
+                        short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
+                        void* workspace, size_t wkspSize)
+{
+    unsigned* const countWksp = (unsigned*)workspace;
+    const BYTE* ctp = codeTable;
+    const BYTE* const ctStart = ctp;
+    const BYTE* const ctEnd = ctStart + nbSeq;
+    size_t cSymbolTypeSizeEstimateInBits = 0;
+    unsigned max = maxCode;
+
+    HIST_countFast_wksp(countWksp, &max, codeTable, nbSeq, workspace, wkspSize);  /* can't fail */
+    if (type == set_basic) {
+        /* We selected this encoding type, so it must be valid. */
+        assert(max <= defaultMax);
+        (void)defaultMax;
+        cSymbolTypeSizeEstimateInBits = ZSTD_crossEntropyCost(defaultNorm, defaultNormLog, countWksp, max);
+    } else if (type == set_rle) {
+        cSymbolTypeSizeEstimateInBits = 0;
+    } else if (type == set_compressed || type == set_repeat) {
+        cSymbolTypeSizeEstimateInBits = ZSTD_fseBitCost(fseCTable, countWksp, max);
+    }
+    if (ZSTD_isError(cSymbolTypeSizeEstimateInBits)) {
+        return nbSeq * 10;
+    }
+    while (ctp < ctEnd) {
+        if (additionalBits) cSymbolTypeSizeEstimateInBits += additionalBits[*ctp];
+        else cSymbolTypeSizeEstimateInBits += *ctp; /* for offset, offset code is also the number of additional bits */
+        ctp++;
+    }
+    return cSymbolTypeSizeEstimateInBits >> 3;
+}
+
+/* Returns the size estimate for the sequences section (header + content) of a block */
+static size_t ZSTD_estimateBlockSize_sequences(const BYTE* ofCodeTable,
+                                                  const BYTE* llCodeTable,
+                                                  const BYTE* mlCodeTable,
+                                                  size_t nbSeq,
+                                                  const ZSTD_fseCTables_t* fseTables,
+                                                  const ZSTD_fseCTablesMetadata_t* fseMetadata,
+                                                  void* workspace, size_t wkspSize,
+                                                  int writeEntropy)
+{
+    size_t sequencesSectionHeaderSize = 1 /* seqHead */ + 1 /* min seqSize size */ + (nbSeq >= 128) + (nbSeq >= LONGNBSEQ);
+    size_t cSeqSizeEstimate = 0;
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, nbSeq, MaxOff,
+                                         fseTables->offcodeCTable, NULL,
+                                         OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->llType, llCodeTable, nbSeq, MaxLL,
+                                         fseTables->litlengthCTable, LL_bits,
+                                         LL_defaultNorm, LL_defaultNormLog, MaxLL,
+                                         workspace, wkspSize);
+    cSeqSizeEstimate += ZSTD_estimateBlockSize_symbolType(fseMetadata->mlType, mlCodeTable, nbSeq, MaxML,
+                                         fseTables->matchlengthCTable, ML_bits,
+                                         ML_defaultNorm, ML_defaultNormLog, MaxML,
+                                         workspace, wkspSize);
+    if (writeEntropy) cSeqSizeEstimate += fseMetadata->fseTablesSize;
+    return cSeqSizeEstimate + sequencesSectionHeaderSize;
+}
+
+/* Returns the size estimate for a given stream of literals, of, ll, ml */
+static size_t ZSTD_estimateBlockSize(const BYTE* literals, size_t litSize,
+                                     const BYTE* ofCodeTable,
+                                     const BYTE* llCodeTable,
+                                     const BYTE* mlCodeTable,
+                                     size_t nbSeq,
+                                     const ZSTD_entropyCTables_t* entropy,
+                                     const ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                     void* workspace, size_t wkspSize,
+                                     int writeLitEntropy, int writeSeqEntropy) {
+    size_t const literalsSize = ZSTD_estimateBlockSize_literal(literals, litSize,
+                                                         &entropy->huf, &entropyMetadata->hufMetadata,
+                                                         workspace, wkspSize, writeLitEntropy);
+    size_t const seqSize = ZSTD_estimateBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable,
+                                                         nbSeq, &entropy->fse, &entropyMetadata->fseMetadata,
+                                                         workspace, wkspSize, writeSeqEntropy);
+    return seqSize + literalsSize + ZSTD_blockHeaderSize;
+}
+
+/* Builds entropy statistics and uses them for blocksize estimation.
+ *
+ * Returns the estimated compressed size of the seqStore, or a zstd error.
+ */
+static size_t ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(seqStore_t* seqStore, ZSTD_CCtx* zc) {
+    ZSTD_entropyCTablesMetadata_t* entropyMetadata = &zc->blockSplitCtx.entropyMetadata;
+    DEBUGLOG(6, "ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize()");
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(seqStore,
+                    &zc->blockState.prevCBlock->entropy,
+                    &zc->blockState.nextCBlock->entropy,
+                    &zc->appliedParams,
+                    entropyMetadata,
+                    zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */), "");
+    return ZSTD_estimateBlockSize(seqStore->litStart, (size_t)(seqStore->lit - seqStore->litStart),
+                    seqStore->ofCode, seqStore->llCode, seqStore->mlCode,
+                    (size_t)(seqStore->sequences - seqStore->sequencesStart),
+                    &zc->blockState.nextCBlock->entropy, entropyMetadata, zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE,
+                    (int)(entropyMetadata->hufMetadata.hType == set_compressed), 1);
+}
+
+/* Returns literals bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreLiteralsBytes(const seqStore_t* const seqStore) {
+    size_t literalsBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        literalsBytes += seq.litLength;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_literalLength) {
+            literalsBytes += 0x10000;
+        }
+    }
+    return literalsBytes;
+}
+
+/* Returns match bytes represented in a seqStore */
+static size_t ZSTD_countSeqStoreMatchBytes(const seqStore_t* const seqStore) {
+    size_t matchBytes = 0;
+    size_t const nbSeqs = seqStore->sequences - seqStore->sequencesStart;
+    size_t i;
+    for (i = 0; i < nbSeqs; ++i) {
+        seqDef seq = seqStore->sequencesStart[i];
+        matchBytes += seq.mlBase + MINMATCH;
+        if (i == seqStore->longLengthPos && seqStore->longLengthType == ZSTD_llt_matchLength) {
+            matchBytes += 0x10000;
+        }
+    }
+    return matchBytes;
+}
+
+/* Derives the seqStore that is a chunk of the originalSeqStore from [startIdx, endIdx).
+ * Stores the result in resultSeqStore.
+ */
+static void ZSTD_deriveSeqStoreChunk(seqStore_t* resultSeqStore,
+                               const seqStore_t* originalSeqStore,
+                                     size_t startIdx, size_t endIdx) {
+    BYTE* const litEnd = originalSeqStore->lit;
+    size_t literalsBytes;
+    size_t literalsBytesPreceding = 0;
+
+    *resultSeqStore = *originalSeqStore;
+    if (startIdx > 0) {
+        resultSeqStore->sequences = originalSeqStore->sequencesStart + startIdx;
+        literalsBytesPreceding = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    }
+
+    /* Move longLengthPos into the correct position if necessary */
+    if (originalSeqStore->longLengthType != ZSTD_llt_none) {
+        if (originalSeqStore->longLengthPos < startIdx || originalSeqStore->longLengthPos > endIdx) {
+            resultSeqStore->longLengthType = ZSTD_llt_none;
+        } else {
+            resultSeqStore->longLengthPos -= (U32)startIdx;
+        }
+    }
+    resultSeqStore->sequencesStart = originalSeqStore->sequencesStart + startIdx;
+    resultSeqStore->sequences = originalSeqStore->sequencesStart + endIdx;
+    literalsBytes = ZSTD_countSeqStoreLiteralsBytes(resultSeqStore);
+    resultSeqStore->litStart += literalsBytesPreceding;
+    if (endIdx == (size_t)(originalSeqStore->sequences - originalSeqStore->sequencesStart)) {
+        /* This accounts for possible last literals if the derived chunk reaches the end of the block */
+        resultSeqStore->lit = litEnd;
+    } else {
+        resultSeqStore->lit = resultSeqStore->litStart+literalsBytes;
+    }
+    resultSeqStore->llCode += startIdx;
+    resultSeqStore->mlCode += startIdx;
+    resultSeqStore->ofCode += startIdx;
+}
+
+/*
+ * Returns the raw offset represented by the combination of offCode, ll0, and repcode history.
+ * offCode must represent a repcode in the numeric representation of ZSTD_storeSeq().
+ */
+static U32
+ZSTD_resolveRepcodeToRawOffset(const U32 rep[ZSTD_REP_NUM], const U32 offCode, const U32 ll0)
+{
+    U32 const adjustedOffCode = STORED_REPCODE(offCode) - 1 + ll0;  /* [ 0 - 3 ] */
+    assert(STORED_IS_REPCODE(offCode));
+    if (adjustedOffCode == ZSTD_REP_NUM) {
+        /* litlength == 0 and offCode == 2 implies selection of first repcode - 1 */
+        assert(rep[0] > 0);
+        return rep[0] - 1;
+    }
+    return rep[adjustedOffCode];
+}
+
+/*
+ * ZSTD_seqStore_resolveOffCodes() reconciles any possible divergences in offset history that may arise
+ * due to emission of RLE/raw blocks that disturb the offset history,
+ * and replaces any repcodes within the seqStore that may be invalid.
+ *
+ * dRepcodes are updated as would be on the decompression side.
+ * cRepcodes are updated exactly in accordance with the seqStore.
+ *
+ * Note : this function assumes seq->offBase respects the following numbering scheme :
+ *        0 : invalid
+ *        1-3 : repcode 1-3
+ *        4+ : real_offset+3
+ */
+static void ZSTD_seqStore_resolveOffCodes(repcodes_t* const dRepcodes, repcodes_t* const cRepcodes,
+                                          seqStore_t* const seqStore, U32 const nbSeq) {
+    U32 idx = 0;
+    for (; idx < nbSeq; ++idx) {
+        seqDef* const seq = seqStore->sequencesStart + idx;
+        U32 const ll0 = (seq->litLength == 0);
+        U32 const offCode = OFFBASE_TO_STORED(seq->offBase);
+        assert(seq->offBase > 0);
+        if (STORED_IS_REPCODE(offCode)) {
+            U32 const dRawOffset = ZSTD_resolveRepcodeToRawOffset(dRepcodes->rep, offCode, ll0);
+            U32 const cRawOffset = ZSTD_resolveRepcodeToRawOffset(cRepcodes->rep, offCode, ll0);
+            /* Adjust simulated decompression repcode history if we come across a mismatch. Replace
+             * the repcode with the offset it actually references, determined by the compression
+             * repcode history.
+             */
+            if (dRawOffset != cRawOffset) {
+                seq->offBase = cRawOffset + ZSTD_REP_NUM;
+            }
+        }
+        /* Compression repcode history is always updated with values directly from the unmodified seqStore.
+         * Decompression repcode history may use modified seq->offset value taken from compression repcode history.
+         */
+        ZSTD_updateRep(dRepcodes->rep, OFFBASE_TO_STORED(seq->offBase), ll0);
+        ZSTD_updateRep(cRepcodes->rep, offCode, ll0);
+    }
+}
+
+/* ZSTD_compressSeqStore_singleBlock():
+ * Compresses a seqStore into a block with a block header, into the buffer dst.
+ *
+ * Returns the total size of that block (including header) or a ZSTD error code.
+ */
+static size_t
+ZSTD_compressSeqStore_singleBlock(ZSTD_CCtx* zc, seqStore_t* const seqStore,
+                                  repcodes_t* const dRep, repcodes_t* const cRep,
+                                  void* dst, size_t dstCapacity,
+                                  const void* src, size_t srcSize,
+                                  U32 lastBlock, U32 isPartition)
 {
-    ZSTD_compressedBlockState_t* const tmp = zc->blockState.prevCBlock;
-    zc->blockState.prevCBlock = zc->blockState.nextCBlock;
-    zc->blockState.nextCBlock = tmp;
+    const U32 rleMaxLength = 25;
+    BYTE* op = (BYTE*)dst;
+    const BYTE* ip = (const BYTE*)src;
+    size_t cSize;
+    size_t cSeqsSize;
+
+    /* In case of an RLE or raw block, the simulated decompression repcode history must be reset */
+    repcodes_t const dRepOriginal = *dRep;
+    DEBUGLOG(5, "ZSTD_compressSeqStore_singleBlock");
+    if (isPartition)
+        ZSTD_seqStore_resolveOffCodes(dRep, cRep, seqStore, (U32)(seqStore->sequences - seqStore->sequencesStart));
+
+    RETURN_ERROR_IF(dstCapacity < ZSTD_blockHeaderSize, dstSize_tooSmall, "Block header doesn't fit");
+    cSeqsSize = ZSTD_entropyCompressSeqStore(seqStore,
+                &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
+                &zc->appliedParams,
+                op + ZSTD_blockHeaderSize, dstCapacity - ZSTD_blockHeaderSize,
+                srcSize,
+                zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
+                zc->bmi2);
+    FORWARD_IF_ERROR(cSeqsSize, "ZSTD_entropyCompressSeqStore failed!");
+
+    if (!zc->isFirstBlock &&
+        cSeqsSize < rleMaxLength &&
+        ZSTD_isRLE((BYTE const*)src, srcSize)) {
+        /* We don't want to emit our first block as a RLE even if it qualifies because
+        * doing so will cause the decoder (cli only) to throw a "should consume all input error."
+        * This is only an issue for zstd <= v1.4.3
+        */
+        cSeqsSize = 1;
+    }
+
+    if (zc->seqCollector.collectSequences) {
+        ZSTD_copyBlockSequences(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        return 0;
+    }
+
+    if (cSeqsSize == 0) {
+        cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "Nocompress block failed");
+        DEBUGLOG(4, "Writing out nocompress block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else if (cSeqsSize == 1) {
+        cSize = ZSTD_rleCompressBlock(op, dstCapacity, *ip, srcSize, lastBlock);
+        FORWARD_IF_ERROR(cSize, "RLE compress block failed");
+        DEBUGLOG(4, "Writing out RLE block, size: %zu", cSize);
+        *dRep = dRepOriginal; /* reset simulated decompression repcode history */
+    } else {
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
+        writeBlockHeader(op, cSeqsSize, srcSize, lastBlock);
+        cSize = ZSTD_blockHeaderSize + cSeqsSize;
+        DEBUGLOG(4, "Writing out compressed block, size: %zu", cSize);
+    }
+
+    if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+        zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+
+    return cSize;
+}
+
+/* Struct to keep track of where we are in our recursive calls. */
+typedef struct {
+    U32* splitLocations;    /* Array of split indices */
+    size_t idx;             /* The current index within splitLocations being worked on */
+} seqStoreSplits;
+
+#define MIN_SEQUENCES_BLOCK_SPLITTING 300
+
+/* Helper function to perform the recursive search for block splits.
+ * Estimates the cost of seqStore prior to split, and estimates the cost of splitting the sequences in half.
+ * If advantageous to split, then we recurse down the two sub-blocks. If not, or if an error occurred in estimation, then
+ * we do not recurse.
+ *
+ * Note: The recursion depth is capped by a heuristic minimum number of sequences, defined by MIN_SEQUENCES_BLOCK_SPLITTING.
+ * In theory, this means the absolute largest recursion depth is 10 == log2(maxNbSeqInBlock/MIN_SEQUENCES_BLOCK_SPLITTING).
+ * In practice, recursion depth usually doesn't go beyond 4.
+ *
+ * Furthermore, the number of splits is capped by ZSTD_MAX_NB_BLOCK_SPLITS. At ZSTD_MAX_NB_BLOCK_SPLITS == 196 with the current existing blockSize
+ * maximum of 128 KB, this value is actually impossible to reach.
+ */
+static void
+ZSTD_deriveBlockSplitsHelper(seqStoreSplits* splits, size_t startIdx, size_t endIdx,
+                             ZSTD_CCtx* zc, const seqStore_t* origSeqStore)
+{
+    seqStore_t* fullSeqStoreChunk = &zc->blockSplitCtx.fullSeqStoreChunk;
+    seqStore_t* firstHalfSeqStore = &zc->blockSplitCtx.firstHalfSeqStore;
+    seqStore_t* secondHalfSeqStore = &zc->blockSplitCtx.secondHalfSeqStore;
+    size_t estimatedOriginalSize;
+    size_t estimatedFirstHalfSize;
+    size_t estimatedSecondHalfSize;
+    size_t midIdx = (startIdx + endIdx)/2;
+
+    if (endIdx - startIdx < MIN_SEQUENCES_BLOCK_SPLITTING || splits->idx >= ZSTD_MAX_NB_BLOCK_SPLITS) {
+        DEBUGLOG(6, "ZSTD_deriveBlockSplitsHelper: Too few sequences");
+        return;
+    }
+    DEBUGLOG(4, "ZSTD_deriveBlockSplitsHelper: startIdx=%zu endIdx=%zu", startIdx, endIdx);
+    ZSTD_deriveSeqStoreChunk(fullSeqStoreChunk, origSeqStore, startIdx, endIdx);
+    ZSTD_deriveSeqStoreChunk(firstHalfSeqStore, origSeqStore, startIdx, midIdx);
+    ZSTD_deriveSeqStoreChunk(secondHalfSeqStore, origSeqStore, midIdx, endIdx);
+    estimatedOriginalSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(fullSeqStoreChunk, zc);
+    estimatedFirstHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(firstHalfSeqStore, zc);
+    estimatedSecondHalfSize = ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(secondHalfSeqStore, zc);
+    DEBUGLOG(4, "Estimated original block size: %zu -- First half split: %zu -- Second half split: %zu",
+             estimatedOriginalSize, estimatedFirstHalfSize, estimatedSecondHalfSize);
+    if (ZSTD_isError(estimatedOriginalSize) || ZSTD_isError(estimatedFirstHalfSize) || ZSTD_isError(estimatedSecondHalfSize)) {
+        return;
+    }
+    if (estimatedFirstHalfSize + estimatedSecondHalfSize < estimatedOriginalSize) {
+        ZSTD_deriveBlockSplitsHelper(splits, startIdx, midIdx, zc, origSeqStore);
+        splits->splitLocations[splits->idx] = (U32)midIdx;
+        splits->idx++;
+        ZSTD_deriveBlockSplitsHelper(splits, midIdx, endIdx, zc, origSeqStore);
+    }
+}
+
+/* Base recursive function. Populates a table with intra-block partition indices that can improve compression ratio.
+ *
+ * Returns the number of splits made (which equals the size of the partition table - 1).
+ */
+static size_t ZSTD_deriveBlockSplits(ZSTD_CCtx* zc, U32 partitions[], U32 nbSeq) {
+    seqStoreSplits splits = {partitions, 0};
+    if (nbSeq <= 4) {
+        DEBUGLOG(4, "ZSTD_deriveBlockSplits: Too few sequences to split");
+        /* Refuse to try and split anything with less than 4 sequences */
+        return 0;
+    }
+    ZSTD_deriveBlockSplitsHelper(&splits, 0, nbSeq, zc, &zc->seqStore);
+    splits.splitLocations[splits.idx] = nbSeq;
+    DEBUGLOG(5, "ZSTD_deriveBlockSplits: final nb partitions: %zu", splits.idx+1);
+    return splits.idx;
+}
+
+/* ZSTD_compressBlock_splitBlock():
+ * Attempts to split a given block into multiple blocks to improve compression ratio.
+ *
+ * Returns combined size of all blocks (which includes headers), or a ZSTD error code.
+ */
+static size_t
+ZSTD_compressBlock_splitBlock_internal(ZSTD_CCtx* zc, void* dst, size_t dstCapacity,
+                                       const void* src, size_t blockSize, U32 lastBlock, U32 nbSeq)
+{
+    size_t cSize = 0;
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    size_t i = 0;
+    size_t srcBytesTotal = 0;
+    U32* partitions = zc->blockSplitCtx.partitions; /* size == ZSTD_MAX_NB_BLOCK_SPLITS */
+    seqStore_t* nextSeqStore = &zc->blockSplitCtx.nextSeqStore;
+    seqStore_t* currSeqStore = &zc->blockSplitCtx.currSeqStore;
+    size_t numSplits = ZSTD_deriveBlockSplits(zc, partitions, nbSeq);
+
+    /* If a block is split and some partitions are emitted as RLE/uncompressed, then repcode history
+     * may become invalid. In order to reconcile potentially invalid repcodes, we keep track of two
+     * separate repcode histories that simulate repcode history on compression and decompression side,
+     * and use the histories to determine whether we must replace a particular repcode with its raw offset.
+     *
+     * 1) cRep gets updated for each partition, regardless of whether the block was emitted as uncompressed
+     *    or RLE. This allows us to retrieve the offset value that an invalid repcode references within
+     *    a nocompress/RLE block.
+     * 2) dRep gets updated only for compressed partitions, and when a repcode gets replaced, will use
+     *    the replacement offset value rather than the original repcode to update the repcode history.
+     *    dRep also will be the final repcode history sent to the next block.
+     *
+     * See ZSTD_seqStore_resolveOffCodes() for more details.
+     */
+    repcodes_t dRep;
+    repcodes_t cRep;
+    ZSTD_memcpy(dRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    ZSTD_memcpy(cRep.rep, zc->blockState.prevCBlock->rep, sizeof(repcodes_t));
+    ZSTD_memset(nextSeqStore, 0, sizeof(seqStore_t));
+
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock_internal (dstCapacity=%u, dictLimit=%u, nextToUpdate=%u)",
+                (unsigned)dstCapacity, (unsigned)zc->blockState.matchState.window.dictLimit,
+                (unsigned)zc->blockState.matchState.nextToUpdate);
+
+    if (numSplits == 0) {
+        size_t cSizeSingleBlock = ZSTD_compressSeqStore_singleBlock(zc, &zc->seqStore,
+                                                                   &dRep, &cRep,
+                                                                    op, dstCapacity,
+                                                                    ip, blockSize,
+                                                                    lastBlock, 0 /* isPartition */);
+        FORWARD_IF_ERROR(cSizeSingleBlock, "Compressing single block from splitBlock_internal() failed!");
+        DEBUGLOG(5, "ZSTD_compressBlock_splitBlock_internal: No splits");
+        assert(cSizeSingleBlock <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+        return cSizeSingleBlock;
+    }
+
+    ZSTD_deriveSeqStoreChunk(currSeqStore, &zc->seqStore, 0, partitions[0]);
+    for (i = 0; i <= numSplits; ++i) {
+        size_t srcBytes;
+        size_t cSizeChunk;
+        U32 const lastPartition = (i == numSplits);
+        U32 lastBlockEntireSrc = 0;
+
+        srcBytes = ZSTD_countSeqStoreLiteralsBytes(currSeqStore) + ZSTD_countSeqStoreMatchBytes(currSeqStore);
+        srcBytesTotal += srcBytes;
+        if (lastPartition) {
+            /* This is the final partition, need to account for possible last literals */
+            srcBytes += blockSize - srcBytesTotal;
+            lastBlockEntireSrc = lastBlock;
+        } else {
+            ZSTD_deriveSeqStoreChunk(nextSeqStore, &zc->seqStore, partitions[i], partitions[i+1]);
+        }
+
+        cSizeChunk = ZSTD_compressSeqStore_singleBlock(zc, currSeqStore,
+                                                      &dRep, &cRep,
+                                                       op, dstCapacity,
+                                                       ip, srcBytes,
+                                                       lastBlockEntireSrc, 1 /* isPartition */);
+        DEBUGLOG(5, "Estimated size: %zu actual size: %zu", ZSTD_buildEntropyStatisticsAndEstimateSubBlockSize(currSeqStore, zc), cSizeChunk);
+        FORWARD_IF_ERROR(cSizeChunk, "Compressing chunk failed!");
+
+        ip += srcBytes;
+        op += cSizeChunk;
+        dstCapacity -= cSizeChunk;
+        cSize += cSizeChunk;
+        *currSeqStore = *nextSeqStore;
+        assert(cSizeChunk <= ZSTD_BLOCKSIZE_MAX + ZSTD_blockHeaderSize);
+    }
+    /* cRep and dRep may have diverged during the compression. If so, we use the dRep repcodes
+     * for the next block.
+     */
+    ZSTD_memcpy(zc->blockState.prevCBlock->rep, dRep.rep, sizeof(repcodes_t));
+    return cSize;
+}
+
+static size_t
+ZSTD_compressBlock_splitBlock(ZSTD_CCtx* zc,
+                              void* dst, size_t dstCapacity,
+                              const void* src, size_t srcSize, U32 lastBlock)
+{
+    const BYTE* ip = (const BYTE*)src;
+    BYTE* op = (BYTE*)dst;
+    U32 nbSeq;
+    size_t cSize;
+    DEBUGLOG(4, "ZSTD_compressBlock_splitBlock");
+    assert(zc->appliedParams.useBlockSplitter == ZSTD_ps_enable);
+
+    {   const size_t bss = ZSTD_buildSeqStore(zc, src, srcSize);
+        FORWARD_IF_ERROR(bss, "ZSTD_buildSeqStore failed");
+        if (bss == ZSTDbss_noCompress) {
+            if (zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
+                zc->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
+            cSize = ZSTD_noCompressBlock(op, dstCapacity, ip, srcSize, lastBlock);
+            FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed");
+            DEBUGLOG(4, "ZSTD_compressBlock_splitBlock: Nocompress block");
+            return cSize;
+        }
+        nbSeq = (U32)(zc->seqStore.sequences - zc->seqStore.sequencesStart);
+    }
+
+    cSize = ZSTD_compressBlock_splitBlock_internal(zc, dst, dstCapacity, src, srcSize, lastBlock, nbSeq);
+    FORWARD_IF_ERROR(cSize, "Splitting blocks failed!");
+    return cSize;
 }
 
-static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
-                                        void* dst, size_t dstCapacity,
-                                        const void* src, size_t srcSize, U32 frame)
+static size_t
+ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
+                            void* dst, size_t dstCapacity,
+                            const void* src, size_t srcSize, U32 frame)
 {
     /* This the upper bound for the length of an rle block.
      * This isn't the actual upper bound. Finding the real threshold
@@ -2632,12 +3692,12 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
 
     if (zc->seqCollector.collectSequences) {
         ZSTD_copyBlockSequences(zc);
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
         return 0;
     }
 
     /* encode sequences and literals */
-    cSize = ZSTD_entropyCompressSequences(&zc->seqStore,
+    cSize = ZSTD_entropyCompressSeqStore(&zc->seqStore,
             &zc->blockState.prevCBlock->entropy, &zc->blockState.nextCBlock->entropy,
             &zc->appliedParams,
             dst, dstCapacity,
@@ -2645,12 +3705,6 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
             zc->entropyWorkspace, ENTROPY_WORKSPACE_SIZE /* statically allocated in resetCCtx */,
             zc->bmi2);
 
-    if (zc->seqCollector.collectSequences) {
-        ZSTD_copyBlockSequences(zc);
-        return 0;
-    }
-
-
     if (frame &&
         /* We don't want to emit our first block as a RLE even if it qualifies because
          * doing so will cause the decoder (cli only) to throw a "should consume all input error."
@@ -2666,7 +3720,7 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
 
 out:
     if (!ZSTD_isError(cSize) && cSize > 1) {
-        ZSTD_confirmRepcodesAndEntropyTables(zc);
+        ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
     }
     /* We check that dictionaries have offset codes available for the first
      * block. After the first block, the offcode table might not have large
@@ -2719,7 +3773,7 @@ static size_t ZSTD_compressBlock_targetCBlockSize_body(ZSTD_CCtx* zc,
                 size_t const maxCSize = srcSize - ZSTD_minGain(srcSize, zc->appliedParams.cParams.strategy);
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressSuperBlock failed");
                 if (cSize != 0 && cSize < maxCSize + ZSTD_blockHeaderSize) {
-                    ZSTD_confirmRepcodesAndEntropyTables(zc);
+                    ZSTD_blockState_confirmRepcodesAndEntropyTables(&zc->blockState);
                     return cSize;
                 }
             }
@@ -2759,9 +3813,9 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
                                          void const* ip,
                                          void const* iend)
 {
-    if (ZSTD_window_needOverflowCorrection(ms->window, iend)) {
-        U32 const maxDist = (U32)1 << params->cParams.windowLog;
-        U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const cycleLog = ZSTD_cycleLog(params->cParams.chainLog, params->cParams.strategy);
+    U32 const maxDist = (U32)1 << params->cParams.windowLog;
+    if (ZSTD_window_needOverflowCorrection(ms->window, cycleLog, maxDist, ms->loadedDictEnd, ip, iend)) {
         U32 const correction = ZSTD_window_correctOverflow(&ms->window, cycleLog, maxDist, ip);
         ZSTD_STATIC_ASSERT(ZSTD_CHAINLOG_MAX <= 30);
         ZSTD_STATIC_ASSERT(ZSTD_WINDOWLOG_MAX_32 <= 30);
@@ -2784,7 +3838,7 @@ static void ZSTD_overflowCorrectIfNeeded(ZSTD_matchState_t* ms,
 *   Frame is supposed already started (header already produced)
 *   @return : compressed size, or an error code
 */
-static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
+static size_t ZSTD_compress_frameChunk(ZSTD_CCtx* cctx,
                                      void* dst, size_t dstCapacity,
                                const void* src, size_t srcSize,
                                      U32 lastFrameChunk)
@@ -2814,6 +3868,7 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
         ZSTD_overflowCorrectIfNeeded(
             ms, &cctx->workspace, &cctx->appliedParams, ip, ip + blockSize);
         ZSTD_checkDictValidity(&ms->window, ip + blockSize, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
+        ZSTD_window_enforceMaxDist(&ms->window, ip, maxDist, &ms->loadedDictEnd, &ms->dictMatchState);
 
         /* Ensure hash/chain table insertion resumes no sooner than lowlimit */
         if (ms->nextToUpdate < ms->window.lowLimit) ms->nextToUpdate = ms->window.lowLimit;
@@ -2824,6 +3879,10 @@ static size_t ZSTD_compress_frameChunk (ZSTD_CCtx* cctx,
                 FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_targetCBlockSize failed");
                 assert(cSize > 0);
                 assert(cSize <= blockSize + ZSTD_blockHeaderSize);
+            } else if (ZSTD_blockSplitterEnabled(&cctx->appliedParams)) {
+                cSize = ZSTD_compressBlock_splitBlock(cctx, op, dstCapacity, ip, blockSize, lastBlock);
+                FORWARD_IF_ERROR(cSize, "ZSTD_compressBlock_splitBlock failed");
+                assert(cSize > 0 || cctx->seqCollector.collectSequences == 1);
             } else {
                 cSize = ZSTD_compressBlock_internal(cctx,
                                         op+ZSTD_blockHeaderSize, dstCapacity-ZSTD_blockHeaderSize,
@@ -2946,7 +4005,7 @@ size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSe
 {
     RETURN_ERROR_IF(cctx->stage != ZSTDcs_init, stage_wrong,
                     "wrong cctx stage");
-    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm,
+    RETURN_ERROR_IF(cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable,
                     parameter_unsupported,
                     "incompatible with ldm");
     cctx->externSeqStore.seq = seq;
@@ -2983,11 +4042,12 @@ static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
 
     if (!srcSize) return fhSize;  /* do not generate an empty block if no input */
 
-    if (!ZSTD_window_update(&ms->window, src, srcSize)) {
+    if (!ZSTD_window_update(&ms->window, src, srcSize, ms->forceNonContiguous)) {
+        ms->forceNonContiguous = 0;
         ms->nextToUpdate = ms->window.dictLimit;
     }
-    if (cctx->appliedParams.ldmParams.enableLdm) {
-        ZSTD_window_update(&cctx->ldmState.window, src, srcSize);
+    if (cctx->appliedParams.ldmParams.enableLdm == ZSTD_ps_enable) {
+        ZSTD_window_update(&cctx->ldmState.window, src, srcSize, /* forceNonContiguous */ 0);
     }
 
     if (!frame) {
@@ -3055,63 +4115,86 @@ static size_t ZSTD_loadDictionaryContent(ZSTD_matchState_t* ms,
 {
     const BYTE* ip = (const BYTE*) src;
     const BYTE* const iend = ip + srcSize;
+    int const loadLdmDict = params->ldmParams.enableLdm == ZSTD_ps_enable && ls != NULL;
 
-    ZSTD_window_update(&ms->window, src, srcSize);
+    /* Assert that we the ms params match the params we're being given */
+    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
+
+    if (srcSize > ZSTD_CHUNKSIZE_MAX) {
+        /* Allow the dictionary to set indices up to exactly ZSTD_CURRENT_MAX.
+         * Dictionaries right at the edge will immediately trigger overflow
+         * correction, but I don't want to insert extra constraints here.
+         */
+        U32 const maxDictSize = ZSTD_CURRENT_MAX - 1;
+        /* We must have cleared our windows when our source is this large. */
+        assert(ZSTD_window_isEmpty(ms->window));
+        if (loadLdmDict)
+            assert(ZSTD_window_isEmpty(ls->window));
+        /* If the dictionary is too large, only load the suffix of the dictionary. */
+        if (srcSize > maxDictSize) {
+            ip = iend - maxDictSize;
+            src = ip;
+            srcSize = maxDictSize;
+        }
+    }
+
+    DEBUGLOG(4, "ZSTD_loadDictionaryContent(): useRowMatchFinder=%d", (int)params->useRowMatchFinder);
+    ZSTD_window_update(&ms->window, src, srcSize, /* forceNonContiguous */ 0);
     ms->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ms->window.base);
+    ms->forceNonContiguous = params->deterministicRefPrefix;
 
-    if (params->ldmParams.enableLdm && ls != NULL) {
-        ZSTD_window_update(&ls->window, src, srcSize);
+    if (loadLdmDict) {
+        ZSTD_window_update(&ls->window, src, srcSize, /* forceNonContiguous */ 0);
         ls->loadedDictEnd = params->forceWindow ? 0 : (U32)(iend - ls->window.base);
     }
 
-    /* Assert that we the ms params match the params we're being given */
-    ZSTD_assertEqualCParams(params->cParams, ms->cParams);
-
     if (srcSize <= HASH_READ_SIZE) return 0;
 
-    while (iend - ip > HASH_READ_SIZE) {
-        size_t const remaining = (size_t)(iend - ip);
-        size_t const chunk = MIN(remaining, ZSTD_CHUNKSIZE_MAX);
-        const BYTE* const ichunk = ip + chunk;
-
-        ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, ichunk);
+    ZSTD_overflowCorrectIfNeeded(ms, ws, params, ip, iend);
 
-        if (params->ldmParams.enableLdm && ls != NULL)
-            ZSTD_ldm_fillHashTable(ls, (const BYTE*)src, (const BYTE*)src + srcSize, &params->ldmParams);
+    if (loadLdmDict)
+        ZSTD_ldm_fillHashTable(ls, ip, iend, &params->ldmParams);
 
-        switch(params->cParams.strategy)
-        {
-        case ZSTD_fast:
-            ZSTD_fillHashTable(ms, ichunk, dtlm);
-            break;
-        case ZSTD_dfast:
-            ZSTD_fillDoubleHashTable(ms, ichunk, dtlm);
-            break;
+    switch(params->cParams.strategy)
+    {
+    case ZSTD_fast:
+        ZSTD_fillHashTable(ms, iend, dtlm);
+        break;
+    case ZSTD_dfast:
+        ZSTD_fillDoubleHashTable(ms, iend, dtlm);
+        break;
 
-        case ZSTD_greedy:
-        case ZSTD_lazy:
-        case ZSTD_lazy2:
-            if (chunk >= HASH_READ_SIZE && ms->dedicatedDictSearch) {
-                assert(chunk == remaining); /* must load everything in one go */
-                ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, ichunk-HASH_READ_SIZE);
-            } else if (chunk >= HASH_READ_SIZE) {
-                ZSTD_insertAndFindFirstIndex(ms, ichunk-HASH_READ_SIZE);
+    case ZSTD_greedy:
+    case ZSTD_lazy:
+    case ZSTD_lazy2:
+        assert(srcSize >= HASH_READ_SIZE);
+        if (ms->dedicatedDictSearch) {
+            assert(ms->chainTable != NULL);
+            ZSTD_dedicatedDictSearch_lazy_loadDictionary(ms, iend-HASH_READ_SIZE);
+        } else {
+            assert(params->useRowMatchFinder != ZSTD_ps_auto);
+            if (params->useRowMatchFinder == ZSTD_ps_enable) {
+                size_t const tagTableSize = ((size_t)1 << params->cParams.hashLog) * sizeof(U16);
+                ZSTD_memset(ms->tagTable, 0, tagTableSize);
+                ZSTD_row_update(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using row-based hash table for lazy dict");
+            } else {
+                ZSTD_insertAndFindFirstIndex(ms, iend-HASH_READ_SIZE);
+                DEBUGLOG(4, "Using chain-based hash table for lazy dict");
             }
-            break;
-
-        case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
-        case ZSTD_btopt:
-        case ZSTD_btultra:
-        case ZSTD_btultra2:
-            if (chunk >= HASH_READ_SIZE)
-                ZSTD_updateTree(ms, ichunk-HASH_READ_SIZE, ichunk);
-            break;
-
-        default:
-            assert(0);  /* not possible : not a valid strategy id */
         }
+        break;
+
+    case ZSTD_btlazy2:   /* we want the dictionary table fully sorted */
+    case ZSTD_btopt:
+    case ZSTD_btultra:
+    case ZSTD_btultra2:
+        assert(srcSize >= HASH_READ_SIZE);
+        ZSTD_updateTree(ms, iend-HASH_READ_SIZE, iend);
+        break;
 
-        ip = ichunk;
+    default:
+        assert(0);  /* not possible : not a valid strategy id */
     }
 
     ms->nextToUpdate = (U32)(iend - ms->window.base);
@@ -3250,7 +4333,6 @@ static size_t ZSTD_loadZstdDictionary(ZSTD_compressedBlockState_t* bs,
     const BYTE* const dictEnd = dictPtr + dictSize;
     size_t dictID;
     size_t eSize;
-
     ZSTD_STATIC_ASSERT(HUF_WORKSPACE_SIZE >= (1<<MAX(MLFSELog,LLFSELog)));
     assert(dictSize >= 8);
     assert(MEM_readLE32(dictPtr) == ZSTD_MAGIC_DICTIONARY);
@@ -3321,6 +4403,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
                                     const ZSTD_CCtx_params* params, U64 pledgedSrcSize,
                                     ZSTD_buffered_policy_e zbuff)
 {
+    size_t const dictContentSize = cdict ? cdict->dictContentSize : dictSize;
     DEBUGLOG(4, "ZSTD_compressBegin_internal: wlog=%u", params->cParams.windowLog);
     /* params are supposed to be fully validated at this point */
     assert(!ZSTD_isError(ZSTD_checkCParams(params->cParams)));
@@ -3335,7 +4418,8 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
         return ZSTD_resetCCtx_usingCDict(cctx, cdict, params, pledgedSrcSize, zbuff);
     }
 
-    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, *params, pledgedSrcSize,
+    FORWARD_IF_ERROR( ZSTD_resetCCtx_internal(cctx, params, pledgedSrcSize,
+                                     dictContentSize,
                                      ZSTDcrp_makeClean, zbuff) , "");
     {   size_t const dictID = cdict ?
                 ZSTD_compress_insertDictionary(
@@ -3350,7 +4434,7 @@ static size_t ZSTD_compressBegin_internal(ZSTD_CCtx* cctx,
         FORWARD_IF_ERROR(dictID, "ZSTD_compress_insertDictionary failed");
         assert(dictID <= UINT_MAX);
         cctx->dictID = (U32)dictID;
-        cctx->dictContentSize = cdict ? cdict->dictContentSize : dictSize;
+        cctx->dictContentSize = dictContentSize;
     }
     return 0;
 }
@@ -3485,15 +4569,14 @@ size_t ZSTD_compress_advanced (ZSTD_CCtx* cctx,
                          const void* dict,size_t dictSize,
                                ZSTD_parameters params)
 {
-    ZSTD_CCtx_params cctxParams;
     DEBUGLOG(4, "ZSTD_compress_advanced");
     FORWARD_IF_ERROR(ZSTD_checkCParams(params.cParams), "");
-    ZSTD_CCtxParams_init_internal(&cctxParams, &params, ZSTD_NO_CLEVEL);
+    ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, ZSTD_NO_CLEVEL);
     return ZSTD_compress_advanced_internal(cctx,
                                            dst, dstCapacity,
                                            src, srcSize,
                                            dict, dictSize,
-                                           &cctxParams);
+                                           &cctx->simpleApiParams);
 }
 
 /* Internal */
@@ -3517,14 +4600,13 @@ size_t ZSTD_compress_usingDict(ZSTD_CCtx* cctx,
                          const void* dict, size_t dictSize,
                                int compressionLevel)
 {
-    ZSTD_CCtx_params cctxParams;
     {
         ZSTD_parameters const params = ZSTD_getParams_internal(compressionLevel, srcSize, dict ? dictSize : 0, ZSTD_cpm_noAttachDict);
         assert(params.fParams.contentSizeFlag == 1);
-        ZSTD_CCtxParams_init_internal(&cctxParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
+        ZSTD_CCtxParams_init_internal(&cctx->simpleApiParams, &params, (compressionLevel == 0) ? ZSTD_CLEVEL_DEFAULT: compressionLevel);
     }
     DEBUGLOG(4, "ZSTD_compress_usingDict (srcSize=%u)", (unsigned)srcSize);
-    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctxParams);
+    return ZSTD_compress_advanced_internal(cctx, dst, dstCapacity, src, srcSize, dict, dictSize, &cctx->simpleApiParams);
 }
 
 size_t ZSTD_compressCCtx(ZSTD_CCtx* cctx,
@@ -3561,7 +4643,10 @@ size_t ZSTD_estimateCDictSize_advanced(
     DEBUGLOG(5, "sizeof(ZSTD_CDict) : %u", (unsigned)sizeof(ZSTD_CDict));
     return ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
          + ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE)
-         + ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0)
+         /* enableDedicatedDictSearch == 1 ensures that CDict estimation will not be too small
+          * in case we are using DDS with row-hash. */
+         + ZSTD_sizeof_matchState(&cParams, ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams),
+                                  /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0)
          + (dictLoadMethod == ZSTD_dlm_byRef ? 0
             : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void *))));
 }
@@ -3592,9 +4677,6 @@ static size_t ZSTD_initCDict_internal(
     assert(!ZSTD_checkCParams(params.cParams));
     cdict->matchState.cParams = params.cParams;
     cdict->matchState.dedicatedDictSearch = params.enableDedicatedDictSearch;
-    if (cdict->matchState.dedicatedDictSearch && dictSize > ZSTD_CHUNKSIZE_MAX) {
-        cdict->matchState.dedicatedDictSearch = 0;
-    }
     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dictBuffer) || (!dictSize)) {
         cdict->dictContent = dictBuffer;
     } else {
@@ -3615,6 +4697,7 @@ static size_t ZSTD_initCDict_internal(
         &cdict->matchState,
         &cdict->workspace,
         &params.cParams,
+        params.useRowMatchFinder,
         ZSTDcrp_makeClean,
         ZSTDirp_reset,
         ZSTD_resetTarget_CDict), "");
@@ -3638,14 +4721,17 @@ static size_t ZSTD_initCDict_internal(
 
 static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
                                       ZSTD_dictLoadMethod_e dictLoadMethod,
-                                      ZSTD_compressionParameters cParams, ZSTD_customMem customMem)
+                                      ZSTD_compressionParameters cParams,
+                                      ZSTD_paramSwitch_e useRowMatchFinder,
+                                      U32 enableDedicatedDictSearch,
+                                      ZSTD_customMem customMem)
 {
     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
     {   size_t const workspaceSize =
             ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict)) +
             ZSTD_cwksp_alloc_size(HUF_WORKSPACE_SIZE) +
-            ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0) +
+            ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, enableDedicatedDictSearch, /* forCCtx */ 0) +
             (dictLoadMethod == ZSTD_dlm_byRef ? 0
              : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))));
         void* const workspace = ZSTD_customMalloc(workspaceSize, customMem);
@@ -3664,7 +4750,7 @@ static ZSTD_CDict* ZSTD_createCDict_advanced_internal(size_t dictSize,
         ZSTD_cwksp_move(&cdict->workspace, &ws);
         cdict->customMem = customMem;
         cdict->compressionLevel = ZSTD_NO_CLEVEL; /* signals advanced API usage */
-
+        cdict->useRowMatchFinder = useRowMatchFinder;
         return cdict;
     }
 }
@@ -3686,7 +4772,7 @@ ZSTD_CDict* ZSTD_createCDict_advanced(const void* dictBuffer, size_t dictSize,
         &cctxParams, customMem);
 }
 
-ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
+ZSTD_CDict* ZSTD_createCDict_advanced2(
         const void* dict, size_t dictSize,
         ZSTD_dictLoadMethod_e dictLoadMethod,
         ZSTD_dictContentType_e dictContentType,
@@ -3716,10 +4802,13 @@ ZSTDLIB_API ZSTD_CDict* ZSTD_createCDict_advanced2(
             &cctxParams, ZSTD_CONTENTSIZE_UNKNOWN, dictSize, ZSTD_cpm_createCDict);
     }
 
+    DEBUGLOG(3, "ZSTD_createCDict_advanced2: DDS: %u", cctxParams.enableDedicatedDictSearch);
     cctxParams.cParams = cParams;
+    cctxParams.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(cctxParams.useRowMatchFinder, &cParams);
 
     cdict = ZSTD_createCDict_advanced_internal(dictSize,
                         dictLoadMethod, cctxParams.cParams,
+                        cctxParams.useRowMatchFinder, cctxParams.enableDedicatedDictSearch,
                         customMem);
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
@@ -3788,7 +4877,9 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
                                  ZSTD_dictContentType_e dictContentType,
                                  ZSTD_compressionParameters cParams)
 {
-    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, /* forCCtx */ 0);
+    ZSTD_paramSwitch_e const useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(ZSTD_ps_auto, &cParams);
+    /* enableDedicatedDictSearch == 1 ensures matchstate is not too small in case this CDict will be used for DDS + row hash */
+    size_t const matchStateSize = ZSTD_sizeof_matchState(&cParams, useRowMatchFinder, /* enableDedicatedDictSearch */ 1, /* forCCtx */ 0);
     size_t const neededSize = ZSTD_cwksp_alloc_size(sizeof(ZSTD_CDict))
                             + (dictLoadMethod == ZSTD_dlm_byRef ? 0
                                : ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(dictSize, sizeof(void*))))
@@ -3813,6 +4904,8 @@ const ZSTD_CDict* ZSTD_initStaticCDict(
 
     ZSTD_CCtxParams_init(&params, 0);
     params.cParams = cParams;
+    params.useRowMatchFinder = useRowMatchFinder;
+    cdict->useRowMatchFinder = useRowMatchFinder;
 
     if (ZSTD_isError( ZSTD_initCDict_internal(cdict,
                                               dict, dictSize,
@@ -3839,15 +4932,15 @@ unsigned ZSTD_getDictID_fromCDict(const ZSTD_CDict* cdict)
     return cdict->dictID;
 }
 
-
-/* ZSTD_compressBegin_usingCDict_advanced() :
- * cdict must be != NULL */
-size_t ZSTD_compressBegin_usingCDict_advanced(
+/* ZSTD_compressBegin_usingCDict_internal() :
+ * Implementation of various ZSTD_compressBegin_usingCDict* functions.
+ */
+static size_t ZSTD_compressBegin_usingCDict_internal(
     ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
     ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
 {
     ZSTD_CCtx_params cctxParams;
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_advanced");
+    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict_internal");
     RETURN_ERROR_IF(cdict==NULL, dictionary_wrong, "NULL pointer!");
     /* Initialize the cctxParams from the cdict */
     {
@@ -3879,25 +4972,48 @@ size_t ZSTD_compressBegin_usingCDict_advanced(
                                         ZSTDb_not_buffered);
 }
 
+
+/* ZSTD_compressBegin_usingCDict_advanced() :
+ * This function is DEPRECATED.
+ * cdict must be != NULL */
+size_t ZSTD_compressBegin_usingCDict_advanced(
+    ZSTD_CCtx* const cctx, const ZSTD_CDict* const cdict,
+    ZSTD_frameParameters const fParams, unsigned long long const pledgedSrcSize)
+{
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, pledgedSrcSize);
+}
+
 /* ZSTD_compressBegin_usingCDict() :
- * pledgedSrcSize=0 means "unknown"
- * if pledgedSrcSize>0, it will enable contentSizeFlag */
+ * cdict must be != NULL */
 size_t ZSTD_compressBegin_usingCDict(ZSTD_CCtx* cctx, const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 0 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    DEBUGLOG(4, "ZSTD_compressBegin_usingCDict : dictIDFlag == %u", !fParams.noDictIDFlag);
-    return ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
+    return ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, ZSTD_CONTENTSIZE_UNKNOWN);
 }
 
-size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+/*! ZSTD_compress_usingCDict_internal():
+ * Implementation of various ZSTD_compress_usingCDict* functions.
+ */
+static size_t ZSTD_compress_usingCDict_internal(ZSTD_CCtx* cctx,
                                 void* dst, size_t dstCapacity,
                                 const void* src, size_t srcSize,
                                 const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
 {
-    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_advanced(cctx, cdict, fParams, srcSize), "");   /* will check if cdict != NULL */
+    FORWARD_IF_ERROR(ZSTD_compressBegin_usingCDict_internal(cctx, cdict, fParams, srcSize), ""); /* will check if cdict != NULL */
     return ZSTD_compressEnd(cctx, dst, dstCapacity, src, srcSize);
 }
 
+/*! ZSTD_compress_usingCDict_advanced():
+ * This function is DEPRECATED.
+ */
+size_t ZSTD_compress_usingCDict_advanced(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                                const void* src, size_t srcSize,
+                                const ZSTD_CDict* cdict, ZSTD_frameParameters fParams)
+{
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+}
+
 /*! ZSTD_compress_usingCDict() :
  *  Compression using a digested Dictionary.
  *  Faster startup than ZSTD_compress_usingDict(), recommended when same dictionary is used multiple times.
@@ -3909,7 +5025,7 @@ size_t ZSTD_compress_usingCDict(ZSTD_CCtx* cctx,
                                 const ZSTD_CDict* cdict)
 {
     ZSTD_frameParameters const fParams = { 1 /*content*/, 0 /*checksum*/, 0 /*noDictID*/ };
-    return ZSTD_compress_usingCDict_advanced(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
+    return ZSTD_compress_usingCDict_internal(cctx, dst, dstCapacity, src, srcSize, cdict, fParams);
 }
 
 
@@ -4313,8 +5429,13 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
     FORWARD_IF_ERROR( ZSTD_initLocalDict(cctx) , ""); /* Init the local dict if present. */
     ZSTD_memset(&cctx->prefixDict, 0, sizeof(cctx->prefixDict));   /* single usage */
     assert(prefixDict.dict==NULL || cctx->cdict==NULL);    /* only one can be set */
-    if (cctx->cdict)
-        params.compressionLevel = cctx->cdict->compressionLevel; /* let cdict take priority in terms of compression level */
+    if (cctx->cdict && !cctx->localDict.cdict) {
+        /* Let the cdict's compression level take priority over the requested params.
+         * But do not take the cdict's compression level if the "cdict" is actually a localDict
+         * generated from ZSTD_initLocalDict().
+         */
+        params.compressionLevel = cctx->cdict->compressionLevel;
+    }
     DEBUGLOG(4, "ZSTD_compressStream2 : transparent init stage");
     if (endOp == ZSTD_e_end) cctx->pledgedSrcSizePlusOne = inSize + 1;  /* auto-fix pledgedSrcSize */
     {
@@ -4327,11 +5448,9 @@ static size_t ZSTD_CCtx_init_compressStream2(ZSTD_CCtx* cctx,
                 dictSize, mode);
     }
 
-    if (ZSTD_CParams_shouldEnableLdm(&params.cParams)) {
-        /* Enable LDM by default for optimal parser and window size >= 128MB */
-        DEBUGLOG(4, "LDM enabled by default (window size >= 128MB, strategy >= btopt)");
-        params.ldmParams.enableLdm = 1;
-    }
+    params.useBlockSplitter = ZSTD_resolveBlockSplitterMode(params.useBlockSplitter, &params.cParams);
+    params.ldmParams.enableLdm = ZSTD_resolveEnableLdm(params.ldmParams.enableLdm, &params.cParams);
+    params.useRowMatchFinder = ZSTD_resolveRowMatchFinderMode(params.useRowMatchFinder, &params.cParams);
 
     {   U64 const pledgedSrcSize = cctx->pledgedSrcSizePlusOne - 1;
         assert(!ZSTD_isError(ZSTD_checkCParams(params.cParams)));
@@ -4436,39 +5555,39 @@ typedef struct {
     size_t posInSrc;        /* Number of bytes given by sequences provided so far */
 } ZSTD_sequencePosition;
 
-/* Returns a ZSTD error code if sequence is not valid */
-static size_t ZSTD_validateSequence(U32 offCode, U32 matchLength,
-                                    size_t posInSrc, U32 windowLog, size_t dictSize, U32 minMatch) {
-    size_t offsetBound;
-    U32 windowSize = 1 << windowLog;
+/* ZSTD_validateSequence() :
+ * @offCode : is presumed to follow format required by ZSTD_storeSeq()
+ * @returns a ZSTD error code if sequence is not valid
+ */
+static size_t
+ZSTD_validateSequence(U32 offCode, U32 matchLength,
+                      size_t posInSrc, U32 windowLog, size_t dictSize)
+{
+    U32 const windowSize = 1 << windowLog;
     /* posInSrc represents the amount of data the decoder would decode up to this point.
      * As long as the amount of data decoded is less than or equal to window size, offsets may be
      * larger than the total length of output decoded in order to reference the dict, even larger than
      * window size. After output surpasses windowSize, we're limited to windowSize offsets again.
      */
-    offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
-    RETURN_ERROR_IF(offCode > offsetBound + ZSTD_REP_MOVE, corruption_detected, "Offset too large!");
-    RETURN_ERROR_IF(matchLength < minMatch, corruption_detected, "Matchlength too small");
+    size_t const offsetBound = posInSrc > windowSize ? (size_t)windowSize : posInSrc + (size_t)dictSize;
+    RETURN_ERROR_IF(offCode > STORE_OFFSET(offsetBound), corruption_detected, "Offset too large!");
+    RETURN_ERROR_IF(matchLength < MINMATCH, corruption_detected, "Matchlength too small");
     return 0;
 }
 
 /* Returns an offset code, given a sequence's raw offset, the ongoing repcode array, and whether litLength == 0 */
-static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0) {
-    U32 offCode = rawOffset + ZSTD_REP_MOVE;
-    U32 repCode = 0;
+static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32 ll0)
+{
+    U32 offCode = STORE_OFFSET(rawOffset);
 
     if (!ll0 && rawOffset == rep[0]) {
-        repCode = 1;
+        offCode = STORE_REPCODE_1;
     } else if (rawOffset == rep[1]) {
-        repCode = 2 - ll0;
+        offCode = STORE_REPCODE(2 - ll0);
     } else if (rawOffset == rep[2]) {
-        repCode = 3 - ll0;
+        offCode = STORE_REPCODE(3 - ll0);
     } else if (ll0 && rawOffset == rep[0] - 1) {
-        repCode = 3;
-    }
-    if (repCode) {
-        /* ZSTD_storeSeq expects a number in the range [0, 2] to represent a repcode */
-        offCode = repCode - 1;
+        offCode = STORE_REPCODE_3;
     }
     return offCode;
 }
@@ -4476,18 +5595,17 @@ static U32 ZSTD_finalizeOffCode(U32 rawOffset, const U32 rep[ZSTD_REP_NUM], U32
 /* Returns 0 on success, and a ZSTD_error otherwise. This function scans through an array of
  * ZSTD_Sequence, storing the sequences it finds, until it reaches a block delimiter.
  */
-static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                                             const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                                             const void* src, size_t blockSize) {
+static size_t
+ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx,
+                                              ZSTD_sequencePosition* seqPos,
+                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                        const void* src, size_t blockSize)
+{
     U32 idx = seqPos->idx;
     BYTE const* ip = (BYTE const*)(src);
     const BYTE* const iend = ip + blockSize;
     repcodes_t updatedRepcodes;
     U32 dictSize;
-    U32 litLength;
-    U32 matchLength;
-    U32 ll0;
-    U32 offCode;
 
     if (cctx->cdict) {
         dictSize = (U32)cctx->cdict->dictContentSize;
@@ -4498,23 +5616,22 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
     }
     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
     for (; (inSeqs[idx].matchLength != 0 || inSeqs[idx].offset != 0) && idx < inSeqsSize; ++idx) {
-        litLength = inSeqs[idx].litLength;
-        matchLength = inSeqs[idx].matchLength;
-        ll0 = litLength == 0;
-        offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
-        updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+        U32 const litLength = inSeqs[idx].litLength;
+        U32 const ll0 = (litLength == 0);
+        U32 const matchLength = inSeqs[idx].matchLength;
+        U32 const offCode = ZSTD_finalizeOffCode(inSeqs[idx].offset, updatedRepcodes.rep, ll0);
+        ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
 
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                cctx->appliedParams.cParams.windowLog, dictSize,
-                                                cctx->appliedParams.cParams.minMatch),
+                                                cctx->appliedParams.cParams.windowLog, dictSize),
                                                 "Sequence validation failed");
         }
         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
         ip += matchLength + litLength;
     }
     ZSTD_memcpy(cctx->blockState.nextCBlock->rep, updatedRepcodes.rep, sizeof(repcodes_t));
@@ -4541,9 +5658,11 @@ static size_t ZSTD_copySequencesToSeqStoreExplicitBlockDelim(ZSTD_CCtx* cctx, ZS
  * avoid splitting a match, or to avoid splitting a match such that it would produce a match
  * smaller than MINMATCH. In this case, we return the number of bytes that we didn't read from this block.
  */
-static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
-                                                       const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
-                                                       const void* src, size_t blockSize) {
+static size_t
+ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
+                                   const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
+                                   const void* src, size_t blockSize)
+{
     U32 idx = seqPos->idx;
     U32 startPosInSequence = seqPos->posInSequence;
     U32 endPosInSequence = seqPos->posInSequence + (U32)blockSize;
@@ -4553,10 +5672,6 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
     repcodes_t updatedRepcodes;
     U32 bytesAdjustment = 0;
     U32 finalMatchSplit = 0;
-    U32 litLength;
-    U32 matchLength;
-    U32 rawOffset;
-    U32 offCode;
 
     if (cctx->cdict) {
         dictSize = cctx->cdict->dictContentSize;
@@ -4570,9 +5685,10 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
     ZSTD_memcpy(updatedRepcodes.rep, cctx->blockState.prevCBlock->rep, sizeof(repcodes_t));
     while (endPosInSequence && idx < inSeqsSize && !finalMatchSplit) {
         const ZSTD_Sequence currSeq = inSeqs[idx];
-        litLength = currSeq.litLength;
-        matchLength = currSeq.matchLength;
-        rawOffset = currSeq.offset;
+        U32 litLength = currSeq.litLength;
+        U32 matchLength = currSeq.matchLength;
+        U32 const rawOffset = currSeq.offset;
+        U32 offCode;
 
         /* Modify the sequence depending on where endPosInSequence lies */
         if (endPosInSequence >= currSeq.litLength + currSeq.matchLength) {
@@ -4625,22 +5741,21 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
             }
         }
         /* Check if this offset can be represented with a repcode */
-        {   U32 ll0 = (litLength == 0);
+        {   U32 const ll0 = (litLength == 0);
             offCode = ZSTD_finalizeOffCode(rawOffset, updatedRepcodes.rep, ll0);
-            updatedRepcodes = ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
+            ZSTD_updateRep(updatedRepcodes.rep, offCode, ll0);
         }
 
         if (cctx->appliedParams.validateSequences) {
             seqPos->posInSrc += litLength + matchLength;
             FORWARD_IF_ERROR(ZSTD_validateSequence(offCode, matchLength, seqPos->posInSrc,
-                                                   cctx->appliedParams.cParams.windowLog, dictSize,
-                                                   cctx->appliedParams.cParams.minMatch),
+                                                   cctx->appliedParams.cParams.windowLog, dictSize),
                                                    "Sequence validation failed");
         }
         DEBUGLOG(6, "Storing sequence: (of: %u, ml: %u, ll: %u)", offCode, matchLength, litLength);
         RETURN_ERROR_IF(idx - seqPos->idx > cctx->seqStore.maxNbSeq, memory_allocation,
                         "Not enough memory allocated. Try adjusting ZSTD_c_minMatch.");
-        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength - MINMATCH);
+        ZSTD_storeSeq(&cctx->seqStore, litLength, ip, iend, offCode, matchLength);
         ip += matchLength + litLength;
     }
     DEBUGLOG(5, "Ending seq: idx: %u (of: %u ml: %u ll: %u)", idx, inSeqs[idx].offset, inSeqs[idx].matchLength, inSeqs[idx].litLength);
@@ -4665,7 +5780,8 @@ static size_t ZSTD_copySequencesToSeqStoreNoBlockDelim(ZSTD_CCtx* cctx, ZSTD_seq
 typedef size_t (*ZSTD_sequenceCopier) (ZSTD_CCtx* cctx, ZSTD_sequencePosition* seqPos,
                                        const ZSTD_Sequence* const inSeqs, size_t inSeqsSize,
                                        const void* src, size_t blockSize);
-static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode) {
+static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
+{
     ZSTD_sequenceCopier sequenceCopier = NULL;
     assert(ZSTD_cParam_withinBounds(ZSTD_c_blockDelimiters, mode));
     if (mode == ZSTD_sf_explicitBlockDelimiters) {
@@ -4679,12 +5795,15 @@ static ZSTD_sequenceCopier ZSTD_selectSequenceCopier(ZSTD_sequenceFormat_e mode)
 
 /* Compress, block-by-block, all of the sequences given.
  *
- * Returns the cumulative size of all compressed blocks (including their headers), otherwise a ZSTD error.
+ * Returns the cumulative size of all compressed blocks (including their headers),
+ * otherwise a ZSTD error.
  */
-static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
-                                              void* dst, size_t dstCapacity,
-                                              const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                                              const void* src, size_t srcSize) {
+static size_t
+ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
+                                void* dst, size_t dstCapacity,
+                          const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
+                          const void* src, size_t srcSize)
+{
     size_t cSize = 0;
     U32 lastBlock;
     size_t blockSize;
@@ -4694,7 +5813,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
 
     BYTE const* ip = (BYTE const*)src;
     BYTE* op = (BYTE*)dst;
-    ZSTD_sequenceCopier sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
+    ZSTD_sequenceCopier const sequenceCopier = ZSTD_selectSequenceCopier(cctx->appliedParams.blockDelimiters);
 
     DEBUGLOG(4, "ZSTD_compressSequences_internal srcSize: %zu, inSeqsSize: %zu", srcSize, inSeqsSize);
     /* Special case: empty frame */
@@ -4732,7 +5851,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
             continue;
         }
 
-        compressedSeqsSize = ZSTD_entropyCompressSequences(&cctx->seqStore,
+        compressedSeqsSize = ZSTD_entropyCompressSeqStore(&cctx->seqStore,
                                 &cctx->blockState.prevCBlock->entropy, &cctx->blockState.nextCBlock->entropy,
                                 &cctx->appliedParams,
                                 op + ZSTD_blockHeaderSize /* Leave space for block header */, dstCapacity - ZSTD_blockHeaderSize,
@@ -4764,7 +5883,7 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
         } else {
             U32 cBlockHeader;
             /* Error checking and repcodes update */
-            ZSTD_confirmRepcodesAndEntropyTables(cctx);
+            ZSTD_blockState_confirmRepcodesAndEntropyTables(&cctx->blockState);
             if (cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode == FSE_repeat_valid)
                 cctx->blockState.prevCBlock->entropy.fse.offcode_repeatMode = FSE_repeat_check;
 
@@ -4794,7 +5913,8 @@ static size_t ZSTD_compressSequences_internal(ZSTD_CCtx* cctx,
 
 size_t ZSTD_compressSequences(ZSTD_CCtx* const cctx, void* dst, size_t dstCapacity,
                               const ZSTD_Sequence* inSeqs, size_t inSeqsSize,
-                              const void* src, size_t srcSize) {
+                              const void* src, size_t srcSize)
+{
     BYTE* op = (BYTE*)dst;
     size_t cSize = 0;
     size_t compressedBlocksSize = 0;
@@ -4861,117 +5981,11 @@ size_t ZSTD_endStream(ZSTD_CStream* zcs, ZSTD_outBuffer* output)
 
 
 /*-=====  Pre-defined compression levels  =====-*/
+#include "clevels.h"
 
-#define ZSTD_MAX_CLEVEL     22
 int ZSTD_maxCLevel(void) { return ZSTD_MAX_CLEVEL; }
 int ZSTD_minCLevel(void) { return (int)-ZSTD_TARGETLENGTH_MAX; }
-
-static const ZSTD_compressionParameters ZSTD_defaultCParameters[4][ZSTD_MAX_CLEVEL+1] = {
-{   /* "default" - for any srcSize > 256 KB */
-    /* W,  C,  H,  S,  L, TL, strat */
-    { 19, 12, 13,  1,  6,  1, ZSTD_fast    },  /* base for negative levels */
-    { 19, 13, 14,  1,  7,  0, ZSTD_fast    },  /* level  1 */
-    { 20, 15, 16,  1,  6,  0, ZSTD_fast    },  /* level  2 */
-    { 21, 16, 17,  1,  5,  0, ZSTD_dfast   },  /* level  3 */
-    { 21, 18, 18,  1,  5,  0, ZSTD_dfast   },  /* level  4 */
-    { 21, 18, 19,  2,  5,  2, ZSTD_greedy  },  /* level  5 */
-    { 21, 19, 19,  3,  5,  4, ZSTD_greedy  },  /* level  6 */
-    { 21, 19, 19,  3,  5,  8, ZSTD_lazy    },  /* level  7 */
-    { 21, 19, 19,  3,  5, 16, ZSTD_lazy2   },  /* level  8 */
-    { 21, 19, 20,  4,  5, 16, ZSTD_lazy2   },  /* level  9 */
-    { 22, 20, 21,  4,  5, 16, ZSTD_lazy2   },  /* level 10 */
-    { 22, 21, 22,  4,  5, 16, ZSTD_lazy2   },  /* level 11 */
-    { 22, 21, 22,  5,  5, 16, ZSTD_lazy2   },  /* level 12 */
-    { 22, 21, 22,  5,  5, 32, ZSTD_btlazy2 },  /* level 13 */
-    { 22, 22, 23,  5,  5, 32, ZSTD_btlazy2 },  /* level 14 */
-    { 22, 23, 23,  6,  5, 32, ZSTD_btlazy2 },  /* level 15 */
-    { 22, 22, 22,  5,  5, 48, ZSTD_btopt   },  /* level 16 */
-    { 23, 23, 22,  5,  4, 64, ZSTD_btopt   },  /* level 17 */
-    { 23, 23, 22,  6,  3, 64, ZSTD_btultra },  /* level 18 */
-    { 23, 24, 22,  7,  3,256, ZSTD_btultra2},  /* level 19 */
-    { 25, 25, 23,  7,  3,256, ZSTD_btultra2},  /* level 20 */
-    { 26, 26, 24,  7,  3,512, ZSTD_btultra2},  /* level 21 */
-    { 27, 27, 25,  9,  3,999, ZSTD_btultra2},  /* level 22 */
-},
-{   /* for srcSize <= 256 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 18, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 18, 13, 14,  1,  6,  0, ZSTD_fast    },  /* level  1 */
-    { 18, 14, 14,  1,  5,  0, ZSTD_dfast   },  /* level  2 */
-    { 18, 16, 16,  1,  4,  0, ZSTD_dfast   },  /* level  3 */
-    { 18, 16, 17,  2,  5,  2, ZSTD_greedy  },  /* level  4.*/
-    { 18, 18, 18,  3,  5,  2, ZSTD_greedy  },  /* level  5.*/
-    { 18, 18, 19,  3,  5,  4, ZSTD_lazy    },  /* level  6.*/
-    { 18, 18, 19,  4,  4,  4, ZSTD_lazy    },  /* level  7 */
-    { 18, 18, 19,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
-    { 18, 18, 19,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
-    { 18, 18, 19,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
-    { 18, 18, 19,  5,  4, 12, ZSTD_btlazy2 },  /* level 11.*/
-    { 18, 19, 19,  7,  4, 12, ZSTD_btlazy2 },  /* level 12.*/
-    { 18, 18, 19,  4,  4, 16, ZSTD_btopt   },  /* level 13 */
-    { 18, 18, 19,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
-    { 18, 18, 19,  6,  3,128, ZSTD_btopt   },  /* level 15.*/
-    { 18, 19, 19,  6,  3,128, ZSTD_btultra },  /* level 16.*/
-    { 18, 19, 19,  8,  3,256, ZSTD_btultra },  /* level 17.*/
-    { 18, 19, 19,  6,  3,128, ZSTD_btultra2},  /* level 18.*/
-    { 18, 19, 19,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 18, 19, 19, 10,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 18, 19, 19, 12,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 18, 19, 19, 13,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-{   /* for srcSize <= 128 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 17, 12, 12,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 17, 12, 13,  1,  6,  0, ZSTD_fast    },  /* level  1 */
-    { 17, 13, 15,  1,  5,  0, ZSTD_fast    },  /* level  2 */
-    { 17, 15, 16,  2,  5,  0, ZSTD_dfast   },  /* level  3 */
-    { 17, 17, 17,  2,  4,  0, ZSTD_dfast   },  /* level  4 */
-    { 17, 16, 17,  3,  4,  2, ZSTD_greedy  },  /* level  5 */
-    { 17, 17, 17,  3,  4,  4, ZSTD_lazy    },  /* level  6 */
-    { 17, 17, 17,  3,  4,  8, ZSTD_lazy2   },  /* level  7 */
-    { 17, 17, 17,  4,  4,  8, ZSTD_lazy2   },  /* level  8 */
-    { 17, 17, 17,  5,  4,  8, ZSTD_lazy2   },  /* level  9 */
-    { 17, 17, 17,  6,  4,  8, ZSTD_lazy2   },  /* level 10 */
-    { 17, 17, 17,  5,  4,  8, ZSTD_btlazy2 },  /* level 11 */
-    { 17, 18, 17,  7,  4, 12, ZSTD_btlazy2 },  /* level 12 */
-    { 17, 18, 17,  3,  4, 12, ZSTD_btopt   },  /* level 13.*/
-    { 17, 18, 17,  4,  3, 32, ZSTD_btopt   },  /* level 14.*/
-    { 17, 18, 17,  6,  3,256, ZSTD_btopt   },  /* level 15.*/
-    { 17, 18, 17,  6,  3,128, ZSTD_btultra },  /* level 16.*/
-    { 17, 18, 17,  8,  3,256, ZSTD_btultra },  /* level 17.*/
-    { 17, 18, 17, 10,  3,512, ZSTD_btultra },  /* level 18.*/
-    { 17, 18, 17,  5,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 17, 18, 17,  7,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 17, 18, 17,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 17, 18, 17, 11,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-{   /* for srcSize <= 16 KB */
-    /* W,  C,  H,  S,  L,  T, strat */
-    { 14, 12, 13,  1,  5,  1, ZSTD_fast    },  /* base for negative levels */
-    { 14, 14, 15,  1,  5,  0, ZSTD_fast    },  /* level  1 */
-    { 14, 14, 15,  1,  4,  0, ZSTD_fast    },  /* level  2 */
-    { 14, 14, 15,  2,  4,  0, ZSTD_dfast   },  /* level  3 */
-    { 14, 14, 14,  4,  4,  2, ZSTD_greedy  },  /* level  4 */
-    { 14, 14, 14,  3,  4,  4, ZSTD_lazy    },  /* level  5.*/
-    { 14, 14, 14,  4,  4,  8, ZSTD_lazy2   },  /* level  6 */
-    { 14, 14, 14,  6,  4,  8, ZSTD_lazy2   },  /* level  7 */
-    { 14, 14, 14,  8,  4,  8, ZSTD_lazy2   },  /* level  8.*/
-    { 14, 15, 14,  5,  4,  8, ZSTD_btlazy2 },  /* level  9.*/
-    { 14, 15, 14,  9,  4,  8, ZSTD_btlazy2 },  /* level 10.*/
-    { 14, 15, 14,  3,  4, 12, ZSTD_btopt   },  /* level 11.*/
-    { 14, 15, 14,  4,  3, 24, ZSTD_btopt   },  /* level 12.*/
-    { 14, 15, 14,  5,  3, 32, ZSTD_btultra },  /* level 13.*/
-    { 14, 15, 15,  6,  3, 64, ZSTD_btultra },  /* level 14.*/
-    { 14, 15, 15,  7,  3,256, ZSTD_btultra },  /* level 15.*/
-    { 14, 15, 15,  5,  3, 48, ZSTD_btultra2},  /* level 16.*/
-    { 14, 15, 15,  6,  3,128, ZSTD_btultra2},  /* level 17.*/
-    { 14, 15, 15,  7,  3,256, ZSTD_btultra2},  /* level 18.*/
-    { 14, 15, 15,  8,  3,256, ZSTD_btultra2},  /* level 19.*/
-    { 14, 15, 15,  8,  3,512, ZSTD_btultra2},  /* level 20.*/
-    { 14, 15, 15,  9,  3,512, ZSTD_btultra2},  /* level 21.*/
-    { 14, 15, 15, 10,  3,999, ZSTD_btultra2},  /* level 22.*/
-},
-};
+int ZSTD_defaultCLevel(void) { return ZSTD_CLEVEL_DEFAULT; }
 
 static ZSTD_compressionParameters ZSTD_dedicatedDictSearch_getCParams(int const compressionLevel, size_t const dictSize)
 {
@@ -4999,7 +6013,7 @@ static int ZSTD_dedicatedDictSearch_isSupported(
 {
     return (cParams->strategy >= ZSTD_greedy)
         && (cParams->strategy <= ZSTD_lazy2)
-        && (cParams->hashLog >= cParams->chainLog)
+        && (cParams->hashLog > cParams->chainLog)
         && (cParams->chainLog <= 24);
 }
 
@@ -5018,6 +6032,9 @@ static void ZSTD_dedicatedDictSearch_revertCParams(
         case ZSTD_lazy:
         case ZSTD_lazy2:
             cParams->hashLog -= ZSTD_LAZY_DDSS_BUCKET_LOG;
+            if (cParams->hashLog < ZSTD_HASHLOG_MIN) {
+                cParams->hashLog = ZSTD_HASHLOG_MIN;
+            }
             break;
         case ZSTD_btlazy2:
         case ZSTD_btopt:
@@ -5066,6 +6083,7 @@ static ZSTD_compressionParameters ZSTD_getCParams_internal(int compressionLevel,
     else row = compressionLevel;
 
     {   ZSTD_compressionParameters cp = ZSTD_defaultCParameters[tableID][row];
+        DEBUGLOG(5, "ZSTD_getCParams_internal selected tableID: %u row: %u strat: %u", tableID, row, (U32)cp.strategy);
         /* acceleration factor */
         if (compressionLevel < 0) {
             int const clampedCompressionLevel = MAX(ZSTD_minCLevel(), compressionLevel);
diff --git a/lib/zstd/compress/zstd_compress_internal.h b/lib/zstd/compress/zstd_compress_internal.h
index 685d2f996cc2a..71697a11ae305 100644
--- a/lib/zstd/compress/zstd_compress_internal.h
+++ b/lib/zstd/compress/zstd_compress_internal.h
@@ -57,7 +57,7 @@ typedef struct {
 } ZSTD_localDict;
 
 typedef struct {
-    HUF_CElt CTable[HUF_CTABLE_SIZE_U32(255)];
+    HUF_CElt CTable[HUF_CTABLE_SIZE_ST(255)];
     HUF_repeat repeatMode;
 } ZSTD_hufCTables_t;
 
@@ -75,8 +75,55 @@ typedef struct {
     ZSTD_fseCTables_t fse;
 } ZSTD_entropyCTables_t;
 
+/* *********************************************
+*  Entropy buffer statistics structs and funcs *
+***********************************************/
+/* ZSTD_hufCTablesMetadata_t :
+ *  Stores Literals Block Type for a super-block in hType, and
+ *  huffman tree description in hufDesBuffer.
+ *  hufDesSize refers to the size of huffman tree description in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_literals() */
 typedef struct {
-    U32 off;            /* Offset code (offset + ZSTD_REP_MOVE) for the match */
+    symbolEncodingType_e hType;
+    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
+    size_t hufDesSize;
+} ZSTD_hufCTablesMetadata_t;
+
+/* ZSTD_fseCTablesMetadata_t :
+ *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
+ *  fse tables in fseTablesBuffer.
+ *  fseTablesSize refers to the size of fse tables in bytes.
+ *  This metadata is populated in ZSTD_buildBlockEntropyStats_sequences() */
+typedef struct {
+    symbolEncodingType_e llType;
+    symbolEncodingType_e ofType;
+    symbolEncodingType_e mlType;
+    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
+    size_t fseTablesSize;
+    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_entropyCompressSeqStore_internal() */
+} ZSTD_fseCTablesMetadata_t;
+
+typedef struct {
+    ZSTD_hufCTablesMetadata_t hufMetadata;
+    ZSTD_fseCTablesMetadata_t fseMetadata;
+} ZSTD_entropyCTablesMetadata_t;
+
+/* ZSTD_buildBlockEntropyStats() :
+ *  Builds entropy for the block.
+ *  @return : 0 on success or error code */
+size_t ZSTD_buildBlockEntropyStats(seqStore_t* seqStorePtr,
+                             const ZSTD_entropyCTables_t* prevEntropy,
+                                   ZSTD_entropyCTables_t* nextEntropy,
+                             const ZSTD_CCtx_params* cctxParams,
+                                   ZSTD_entropyCTablesMetadata_t* entropyMetadata,
+                                   void* workspace, size_t wkspSize);
+
+/* *******************************
+*  Compression internals structs *
+*********************************/
+
+typedef struct {
+    U32 off;            /* Offset sumtype code for the match, using ZSTD_storeSeq() format */
     U32 len;            /* Raw length of match */
 } ZSTD_match_t;
 
@@ -126,7 +173,7 @@ typedef struct {
     U32  offCodeSumBasePrice;    /* to compare to log2(offreq)  */
     ZSTD_OptPrice_e priceType;   /* prices can be determined dynamically, or follow a pre-defined cost structure */
     const ZSTD_entropyCTables_t* symbolCosts;  /* pre-calculated dictionary statistics */
-    ZSTD_literalCompressionMode_e literalCompressionMode;
+    ZSTD_paramSwitch_e literalCompressionMode;
 } optState_t;
 
 typedef struct {
@@ -135,14 +182,23 @@ typedef struct {
 } ZSTD_compressedBlockState_t;
 
 typedef struct {
-    BYTE const* nextSrc;    /* next block here to continue on current prefix */
-    BYTE const* base;       /* All regular indexes relative to this position */
-    BYTE const* dictBase;   /* extDict indexes relative to this position */
-    U32 dictLimit;          /* below that point, need extDict */
-    U32 lowLimit;           /* below that point, no more valid data */
+    BYTE const* nextSrc;       /* next block here to continue on current prefix */
+    BYTE const* base;          /* All regular indexes relative to this position */
+    BYTE const* dictBase;      /* extDict indexes relative to this position */
+    U32 dictLimit;             /* below that point, need extDict */
+    U32 lowLimit;              /* below that point, no more valid data */
+    U32 nbOverflowCorrections; /* Number of times overflow correction has run since
+                                * ZSTD_window_init(). Useful for debugging coredumps
+                                * and for ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY.
+                                */
 } ZSTD_window_t;
 
+#define ZSTD_WINDOW_START_INDEX 2
+
 typedef struct ZSTD_matchState_t ZSTD_matchState_t;
+
+#define ZSTD_ROW_HASH_CACHE_SIZE 8       /* Size of prefetching hash cache for row-based matchfinder */
+
 struct ZSTD_matchState_t {
     ZSTD_window_t window;   /* State for window round buffer management */
     U32 loadedDictEnd;      /* index of end of dictionary, within context's referential.
@@ -154,9 +210,17 @@ struct ZSTD_matchState_t {
                              */
     U32 nextToUpdate;       /* index from which to continue table update */
     U32 hashLog3;           /* dispatch table for matches of len==3 : larger == faster, more memory */
+
+    U32 rowHashLog;                          /* For row-based matchfinder: Hashlog based on nb of rows in the hashTable.*/
+    U16* tagTable;                           /* For row-based matchFinder: A row-based table containing the hashes and head index. */
+    U32 hashCache[ZSTD_ROW_HASH_CACHE_SIZE]; /* For row-based matchFinder: a cache of hashes to improve speed */
+
     U32* hashTable;
     U32* hashTable3;
     U32* chainTable;
+
+    U32 forceNonContiguous; /* Non-zero if we should force non-contiguous load for the next window update. */
+
     int dedicatedDictSearch;  /* Indicates whether this matchState is using the
                                * dedicated dictionary search structure.
                                */
@@ -196,7 +260,7 @@ typedef struct {
 } ldmState_t;
 
 typedef struct {
-    U32 enableLdm;          /* 1 if enable long distance matching */
+    ZSTD_paramSwitch_e enableLdm; /* ZSTD_ps_enable to enable LDM. ZSTD_ps_auto by default */
     U32 hashLog;            /* Log size of hashTable */
     U32 bucketSizeLog;      /* Log bucket size for collision resolution, at most 8 */
     U32 minMatchLength;     /* Minimum match length */
@@ -227,7 +291,7 @@ struct ZSTD_CCtx_params_s {
                                 * There is no guarantee that hint is close to actual source size */
 
     ZSTD_dictAttachPref_e attachDictPref;
-    ZSTD_literalCompressionMode_e literalCompressionMode;
+    ZSTD_paramSwitch_e literalCompressionMode;
 
     /* Multithreading: used to pass parameters to mtctx */
     int nbWorkers;
@@ -249,6 +313,15 @@ struct ZSTD_CCtx_params_s {
     ZSTD_sequenceFormat_e blockDelimiters;
     int validateSequences;
 
+    /* Block splitting */
+    ZSTD_paramSwitch_e useBlockSplitter;
+
+    /* Param for deciding whether to use row-based matchfinder */
+    ZSTD_paramSwitch_e useRowMatchFinder;
+
+    /* Always load a dictionary in ext-dict mode (not prefix mode)? */
+    int deterministicRefPrefix;
+
     /* Internal use, for createCCtxParams() and freeCCtxParams() only */
     ZSTD_customMem customMem;
 };  /* typedef'd to ZSTD_CCtx_params within "zstd.h" */
@@ -266,12 +339,29 @@ typedef enum {
     ZSTDb_buffered
 } ZSTD_buffered_policy_e;
 
+/*
+ * Struct that contains all elements of block splitter that should be allocated
+ * in a wksp.
+ */
+#define ZSTD_MAX_NB_BLOCK_SPLITS 196
+typedef struct {
+    seqStore_t fullSeqStoreChunk;
+    seqStore_t firstHalfSeqStore;
+    seqStore_t secondHalfSeqStore;
+    seqStore_t currSeqStore;
+    seqStore_t nextSeqStore;
+
+    U32 partitions[ZSTD_MAX_NB_BLOCK_SPLITS];
+    ZSTD_entropyCTablesMetadata_t entropyMetadata;
+} ZSTD_blockSplitCtx;
+
 struct ZSTD_CCtx_s {
     ZSTD_compressionStage_e stage;
     int cParamsChanged;                  /* == 1 if cParams(except wlog) or compression level are changed in requestedParams. Triggers transmission of new params to ZSTDMT (if available) then reset to 0. */
     int bmi2;                            /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
     ZSTD_CCtx_params requestedParams;
     ZSTD_CCtx_params appliedParams;
+    ZSTD_CCtx_params simpleApiParams;    /* Param storage used by the simple API - not sticky. Must only be used in top-level simple API functions for storage. */
     U32   dictID;
     size_t dictContentSize;
 
@@ -296,7 +386,7 @@ struct ZSTD_CCtx_s {
     ZSTD_blockState_t blockState;
     U32* entropyWorkspace;  /* entropy workspace of ENTROPY_WORKSPACE_SIZE bytes */
 
-    /* Wether we are streaming or not */
+    /* Whether we are streaming or not */
     ZSTD_buffered_policy_e bufferedPolicy;
 
     /* streaming */
@@ -324,6 +414,9 @@ struct ZSTD_CCtx_s {
     /* Multi-threading */
 
     /* Tracing */
+
+    /* Workspace for block splitter */
+    ZSTD_blockSplitCtx blockSplitCtx;
 };
 
 typedef enum { ZSTD_dtlm_fast, ZSTD_dtlm_full } ZSTD_dictTableLoadMethod_e;
@@ -358,7 +451,7 @@ typedef enum {
 typedef size_t (*ZSTD_blockCompressor) (
         ZSTD_matchState_t* bs, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
-ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_dictMode_e dictMode);
+ZSTD_blockCompressor ZSTD_selectBlockCompressor(ZSTD_strategy strat, ZSTD_paramSwitch_e rowMatchfinderMode, ZSTD_dictMode_e dictMode);
 
 
 MEM_STATIC U32 ZSTD_LLcode(U32 litLength)
@@ -392,31 +485,6 @@ MEM_STATIC U32 ZSTD_MLcode(U32 mlBase)
     return (mlBase > 127) ? ZSTD_highbit32(mlBase) + ML_deltaCode : ML_Code[mlBase];
 }
 
-typedef struct repcodes_s {
-    U32 rep[3];
-} repcodes_t;
-
-MEM_STATIC repcodes_t ZSTD_updateRep(U32 const rep[3], U32 const offset, U32 const ll0)
-{
-    repcodes_t newReps;
-    if (offset >= ZSTD_REP_NUM) {  /* full offset */
-        newReps.rep[2] = rep[1];
-        newReps.rep[1] = rep[0];
-        newReps.rep[0] = offset - ZSTD_REP_MOVE;
-    } else {   /* repcode */
-        U32 const repCode = offset + ll0;
-        if (repCode > 0) {  /* note : if repCode==0, no change */
-            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
-            newReps.rep[2] = (repCode >= 2) ? rep[1] : rep[2];
-            newReps.rep[1] = rep[0];
-            newReps.rep[0] = currentOffset;
-        } else {   /* repCode == 0 */
-            ZSTD_memcpy(&newReps, rep, sizeof(newReps));
-        }
-    }
-    return newReps;
-}
-
 /* ZSTD_cParam_withinBounds:
  * @return 1 if value is within cParam bounds,
  * 0 otherwise */
@@ -465,17 +533,17 @@ MEM_STATIC size_t ZSTD_minGain(size_t srcSize, ZSTD_strategy strat)
     return (srcSize >> minlog) + 2;
 }
 
-MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParams)
+MEM_STATIC int ZSTD_literalsCompressionIsDisabled(const ZSTD_CCtx_params* cctxParams)
 {
     switch (cctxParams->literalCompressionMode) {
-    case ZSTD_lcm_huffman:
+    case ZSTD_ps_enable:
         return 0;
-    case ZSTD_lcm_uncompressed:
+    case ZSTD_ps_disable:
         return 1;
     default:
         assert(0 /* impossible: pre-validated */);
         ZSTD_FALLTHROUGH;
-    case ZSTD_lcm_auto:
+    case ZSTD_ps_auto:
         return (cctxParams->cParams.strategy == ZSTD_fast) && (cctxParams->cParams.targetLength > 0);
     }
 }
@@ -485,7 +553,9 @@ MEM_STATIC int ZSTD_disableLiteralsCompression(const ZSTD_CCtx_params* cctxParam
  *  Only called when the sequence ends past ilimit_w, so it only needs to be optimized for single
  *  large copies.
  */
-static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w) {
+static void
+ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const iend, BYTE const* ilimit_w)
+{
     assert(iend > ilimit_w);
     if (ip <= ilimit_w) {
         ZSTD_wildcopy(op, ip, ilimit_w - ip, ZSTD_no_overlap);
@@ -495,14 +565,30 @@ static void ZSTD_safecopyLiterals(BYTE* op, BYTE const* ip, BYTE const* const ie
     while (ip < iend) *op++ = *ip++;
 }
 
+#define ZSTD_REP_MOVE     (ZSTD_REP_NUM-1)
+#define STORE_REPCODE_1 STORE_REPCODE(1)
+#define STORE_REPCODE_2 STORE_REPCODE(2)
+#define STORE_REPCODE_3 STORE_REPCODE(3)
+#define STORE_REPCODE(r) (assert((r)>=1), assert((r)<=3), (r)-1)
+#define STORE_OFFSET(o)  (assert((o)>0), o + ZSTD_REP_MOVE)
+#define STORED_IS_OFFSET(o)  ((o) > ZSTD_REP_MOVE)
+#define STORED_IS_REPCODE(o) ((o) <= ZSTD_REP_MOVE)
+#define STORED_OFFSET(o)  (assert(STORED_IS_OFFSET(o)), (o)-ZSTD_REP_MOVE)
+#define STORED_REPCODE(o) (assert(STORED_IS_REPCODE(o)), (o)+1)  /* returns ID 1,2,3 */
+#define STORED_TO_OFFBASE(o) ((o)+1)
+#define OFFBASE_TO_STORED(o) ((o)-1)
+
 /*! ZSTD_storeSeq() :
- *  Store a sequence (litlen, litPtr, offCode and mlBase) into seqStore_t.
- *  `offCode` : distance to match + ZSTD_REP_MOVE (values <= ZSTD_REP_MOVE are repCodes).
- *  `mlBase` : matchLength - MINMATCH
+ *  Store a sequence (litlen, litPtr, offCode and matchLength) into seqStore_t.
+ *  @offBase_minus1 : Users should use employ macros STORE_REPCODE_X and STORE_OFFSET().
+ *  @matchLength : must be >= MINMATCH
  *  Allowed to overread literals up to litLimit.
 */
-HINT_INLINE UNUSED_ATTR
-void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, const BYTE* litLimit, U32 offCode, size_t mlBase)
+HINT_INLINE UNUSED_ATTR void
+ZSTD_storeSeq(seqStore_t* seqStorePtr,
+              size_t litLength, const BYTE* literals, const BYTE* litLimit,
+              U32 offBase_minus1,
+              size_t matchLength)
 {
     BYTE const* const litLimit_w = litLimit - WILDCOPY_OVERLENGTH;
     BYTE const* const litEnd = literals + litLength;
@@ -511,7 +597,7 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
     if (g_start==NULL) g_start = (const BYTE*)literals;  /* note : index only works for compression within a single segment */
     {   U32 const pos = (U32)((const BYTE*)literals - g_start);
         DEBUGLOG(6, "Cpos%7u :%3u literals, match%4u bytes at offCode%7u",
-               pos, (U32)litLength, (U32)mlBase+MINMATCH, (U32)offCode);
+               pos, (U32)litLength, (U32)matchLength, (U32)offBase_minus1);
     }
 #endif
     assert((size_t)(seqStorePtr->sequences - seqStorePtr->sequencesStart) < seqStorePtr->maxNbSeq);
@@ -535,26 +621,66 @@ void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* litera
 
     /* literal Length */
     if (litLength>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 1;
+        assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+        seqStorePtr->longLengthType = ZSTD_llt_literalLength;
         seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
     }
     seqStorePtr->sequences[0].litLength = (U16)litLength;
 
     /* match offset */
-    seqStorePtr->sequences[0].offset = offCode + 1;
+    seqStorePtr->sequences[0].offBase = STORED_TO_OFFBASE(offBase_minus1);
 
     /* match Length */
-    if (mlBase>0xFFFF) {
-        assert(seqStorePtr->longLengthID == 0); /* there can only be a single long length */
-        seqStorePtr->longLengthID = 2;
-        seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+    assert(matchLength >= MINMATCH);
+    {   size_t const mlBase = matchLength - MINMATCH;
+        if (mlBase>0xFFFF) {
+            assert(seqStorePtr->longLengthType == ZSTD_llt_none); /* there can only be a single long length */
+            seqStorePtr->longLengthType = ZSTD_llt_matchLength;
+            seqStorePtr->longLengthPos = (U32)(seqStorePtr->sequences - seqStorePtr->sequencesStart);
+        }
+        seqStorePtr->sequences[0].mlBase = (U16)mlBase;
     }
-    seqStorePtr->sequences[0].matchLength = (U16)mlBase;
 
     seqStorePtr->sequences++;
 }
 
+/* ZSTD_updateRep() :
+ * updates in-place @rep (array of repeat offsets)
+ * @offBase_minus1 : sum-type, with same numeric representation as ZSTD_storeSeq()
+ */
+MEM_STATIC void
+ZSTD_updateRep(U32 rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
+{
+    if (STORED_IS_OFFSET(offBase_minus1)) {  /* full offset */
+        rep[2] = rep[1];
+        rep[1] = rep[0];
+        rep[0] = STORED_OFFSET(offBase_minus1);
+    } else {   /* repcode */
+        U32 const repCode = STORED_REPCODE(offBase_minus1) - 1 + ll0;
+        if (repCode > 0) {  /* note : if repCode==0, no change */
+            U32 const currentOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode];
+            rep[2] = (repCode >= 2) ? rep[1] : rep[2];
+            rep[1] = rep[0];
+            rep[0] = currentOffset;
+        } else {   /* repCode == 0 */
+            /* nothing to do */
+        }
+    }
+}
+
+typedef struct repcodes_s {
+    U32 rep[3];
+} repcodes_t;
+
+MEM_STATIC repcodes_t
+ZSTD_newRep(U32 const rep[ZSTD_REP_NUM], U32 const offBase_minus1, U32 const ll0)
+{
+    repcodes_t newReps;
+    ZSTD_memcpy(&newReps, rep, sizeof(newReps));
+    ZSTD_updateRep(newReps.rep, offBase_minus1, ll0);
+    return newReps;
+}
+
 
 /*-*************************************
 *  Match length counter
@@ -778,6 +904,13 @@ MEM_STATIC void ZSTD_window_clear(ZSTD_window_t* window)
     window->dictLimit = end;
 }
 
+MEM_STATIC U32 ZSTD_window_isEmpty(ZSTD_window_t const window)
+{
+    return window.dictLimit == ZSTD_WINDOW_START_INDEX &&
+           window.lowLimit == ZSTD_WINDOW_START_INDEX &&
+           (window.nextSrc - window.base) == ZSTD_WINDOW_START_INDEX;
+}
+
 /*
  * ZSTD_window_hasExtDict():
  * Returns non-zero if the window has a non-empty extDict.
@@ -801,15 +934,71 @@ MEM_STATIC ZSTD_dictMode_e ZSTD_matchState_dictMode(const ZSTD_matchState_t *ms)
             ZSTD_noDict;
 }
 
+/* Defining this macro to non-zero tells zstd to run the overflow correction
+ * code much more frequently. This is very inefficient, and should only be
+ * used for tests and fuzzers.
+ */
+#ifndef ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY
+#  ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 1
+#  else
+#    define ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY 0
+#  endif
+#endif
+
+/*
+ * ZSTD_window_canOverflowCorrect():
+ * Returns non-zero if the indices are large enough for overflow correction
+ * to work correctly without impacting compression ratio.
+ */
+MEM_STATIC U32 ZSTD_window_canOverflowCorrect(ZSTD_window_t const window,
+                                              U32 cycleLog,
+                                              U32 maxDist,
+                                              U32 loadedDictEnd,
+                                              void const* src)
+{
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const curr = (U32)((BYTE const*)src - window.base);
+    U32 const minIndexToOverflowCorrect = cycleSize
+                                        + MAX(maxDist, cycleSize)
+                                        + ZSTD_WINDOW_START_INDEX;
+
+    /* Adjust the min index to backoff the overflow correction frequency,
+     * so we don't waste too much CPU in overflow correction. If this
+     * computation overflows we don't really care, we just need to make
+     * sure it is at least minIndexToOverflowCorrect.
+     */
+    U32 const adjustment = window.nbOverflowCorrections + 1;
+    U32 const adjustedIndex = MAX(minIndexToOverflowCorrect * adjustment,
+                                  minIndexToOverflowCorrect);
+    U32 const indexLargeEnough = curr > adjustedIndex;
+
+    /* Only overflow correct early if the dictionary is invalidated already,
+     * so we don't hurt compression ratio.
+     */
+    U32 const dictionaryInvalidated = curr > maxDist + loadedDictEnd;
+
+    return indexLargeEnough && dictionaryInvalidated;
+}
+
 /*
  * ZSTD_window_needOverflowCorrection():
  * Returns non-zero if the indices are getting too large and need overflow
  * protection.
  */
 MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
+                                                  U32 cycleLog,
+                                                  U32 maxDist,
+                                                  U32 loadedDictEnd,
+                                                  void const* src,
                                                   void const* srcEnd)
 {
     U32 const curr = (U32)((BYTE const*)srcEnd - window.base);
+    if (ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        if (ZSTD_window_canOverflowCorrect(window, cycleLog, maxDist, loadedDictEnd, src)) {
+            return 1;
+        }
+    }
     return curr > ZSTD_CURRENT_MAX;
 }
 
@@ -821,7 +1010,6 @@ MEM_STATIC U32 ZSTD_window_needOverflowCorrection(ZSTD_window_t const window,
  *
  * The least significant cycleLog bits of the indices must remain the same,
  * which may be 0. Every index up to maxDist in the past must be valid.
- * NOTE: (maxDist & cycleMask) must be zero.
  */
 MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
                                            U32 maxDist, void const* src)
@@ -845,32 +1033,52 @@ MEM_STATIC U32 ZSTD_window_correctOverflow(ZSTD_window_t* window, U32 cycleLog,
      * 3. (cctx->lowLimit + 1<<windowLog) < 1<<32:
      *    windowLog <= 31 ==> 3<<29 + 1<<windowLog < 7<<29 < 1<<32.
      */
-    U32 const cycleMask = (1U << cycleLog) - 1;
+    U32 const cycleSize = 1u << cycleLog;
+    U32 const cycleMask = cycleSize - 1;
     U32 const curr = (U32)((BYTE const*)src - window->base);
-    U32 const currentCycle0 = curr & cycleMask;
-    /* Exclude zero so that newCurrent - maxDist >= 1. */
-    U32 const currentCycle1 = currentCycle0 == 0 ? (1U << cycleLog) : currentCycle0;
-    U32 const newCurrent = currentCycle1 + maxDist;
+    U32 const currentCycle = curr & cycleMask;
+    /* Ensure newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX. */
+    U32 const currentCycleCorrection = currentCycle < ZSTD_WINDOW_START_INDEX
+                                     ? MAX(cycleSize, ZSTD_WINDOW_START_INDEX)
+                                     : 0;
+    U32 const newCurrent = currentCycle
+                         + currentCycleCorrection
+                         + MAX(maxDist, cycleSize);
     U32 const correction = curr - newCurrent;
-    assert((maxDist & cycleMask) == 0);
+    /* maxDist must be a power of two so that:
+     *   (newCurrent & cycleMask) == (curr & cycleMask)
+     * This is required to not corrupt the chains / binary tree.
+     */
+    assert((maxDist & (maxDist - 1)) == 0);
+    assert((curr & cycleMask) == (newCurrent & cycleMask));
     assert(curr > newCurrent);
-    /* Loose bound, should be around 1<<29 (see above) */
-    assert(correction > 1<<28);
+    if (!ZSTD_WINDOW_OVERFLOW_CORRECT_FREQUENTLY) {
+        /* Loose bound, should be around 1<<29 (see above) */
+        assert(correction > 1<<28);
+    }
 
     window->base += correction;
     window->dictBase += correction;
-    if (window->lowLimit <= correction) window->lowLimit = 1;
-    else window->lowLimit -= correction;
-    if (window->dictLimit <= correction) window->dictLimit = 1;
-    else window->dictLimit -= correction;
+    if (window->lowLimit < correction + ZSTD_WINDOW_START_INDEX) {
+        window->lowLimit = ZSTD_WINDOW_START_INDEX;
+    } else {
+        window->lowLimit -= correction;
+    }
+    if (window->dictLimit < correction + ZSTD_WINDOW_START_INDEX) {
+        window->dictLimit = ZSTD_WINDOW_START_INDEX;
+    } else {
+        window->dictLimit -= correction;
+    }
 
     /* Ensure we can still reference the full window. */
     assert(newCurrent >= maxDist);
-    assert(newCurrent - maxDist >= 1);
+    assert(newCurrent - maxDist >= ZSTD_WINDOW_START_INDEX);
     /* Ensure that lowLimit and dictLimit didn't underflow. */
     assert(window->lowLimit <= newCurrent);
     assert(window->dictLimit <= newCurrent);
 
+    ++window->nbOverflowCorrections;
+
     DEBUGLOG(4, "Correction of 0x%x bytes to lowLimit=0x%x", correction,
              window->lowLimit);
     return correction;
@@ -975,11 +1183,13 @@ ZSTD_checkDictValidity(const ZSTD_window_t* window,
 
 MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
     ZSTD_memset(window, 0, sizeof(*window));
-    window->base = (BYTE const*)"";
-    window->dictBase = (BYTE const*)"";
-    window->dictLimit = 1;    /* start from 1, so that 1st position is valid */
-    window->lowLimit = 1;     /* it ensures first and later CCtx usages compress the same */
-    window->nextSrc = window->base + 1;   /* see issue #1241 */
+    window->base = (BYTE const*)" ";
+    window->dictBase = (BYTE const*)" ";
+    ZSTD_STATIC_ASSERT(ZSTD_DUBT_UNSORTED_MARK < ZSTD_WINDOW_START_INDEX); /* Start above ZSTD_DUBT_UNSORTED_MARK */
+    window->dictLimit = ZSTD_WINDOW_START_INDEX;    /* start from >0, so that 1st position is valid */
+    window->lowLimit = ZSTD_WINDOW_START_INDEX;     /* it ensures first and later CCtx usages compress the same */
+    window->nextSrc = window->base + ZSTD_WINDOW_START_INDEX;   /* see issue #1241 */
+    window->nbOverflowCorrections = 0;
 }
 
 /*
@@ -990,7 +1200,8 @@ MEM_STATIC void ZSTD_window_init(ZSTD_window_t* window) {
  * Returns non-zero if the segment is contiguous.
  */
 MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
-                                  void const* src, size_t srcSize)
+                                  void const* src, size_t srcSize,
+                                  int forceNonContiguous)
 {
     BYTE const* const ip = (BYTE const*)src;
     U32 contiguous = 1;
@@ -1000,7 +1211,7 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
     assert(window->base != NULL);
     assert(window->dictBase != NULL);
     /* Check if blocks follow each other */
-    if (src != window->nextSrc) {
+    if (src != window->nextSrc || forceNonContiguous) {
         /* not contiguous */
         size_t const distanceFromBase = (size_t)(window->nextSrc - window->base);
         DEBUGLOG(5, "Non contiguous blocks, new segment starts at %u", window->dictLimit);
@@ -1030,15 +1241,15 @@ MEM_STATIC U32 ZSTD_window_update(ZSTD_window_t* window,
  */
 MEM_STATIC U32 ZSTD_getLowestMatchIndex(const ZSTD_matchState_t* ms, U32 curr, unsigned windowLog)
 {
-    U32    const maxDistance = 1U << windowLog;
-    U32    const lowestValid = ms->window.lowLimit;
-    U32    const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
-    U32    const isDictionary = (ms->loadedDictEnd != 0);
+    U32 const maxDistance = 1U << windowLog;
+    U32 const lowestValid = ms->window.lowLimit;
+    U32 const withinWindow = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    U32 const isDictionary = (ms->loadedDictEnd != 0);
     /* When using a dictionary the entire dictionary is valid if a single byte of the dictionary
      * is within the window. We invalidate the dictionary (and set loadedDictEnd to 0) when it isn't
      * valid for the entire block. So this check is sufficient to find the lowest valid match index.
      */
-    U32    const matchLowest = isDictionary ? lowestValid : withinWindow;
+    U32 const matchLowest = isDictionary ? lowestValid : withinWindow;
     return matchLowest;
 }
 
diff --git a/lib/zstd/compress/zstd_compress_literals.c b/lib/zstd/compress/zstd_compress_literals.c
index 655bcda4d1f1c..52b0a8059aba9 100644
--- a/lib/zstd/compress/zstd_compress_literals.c
+++ b/lib/zstd/compress/zstd_compress_literals.c
@@ -73,7 +73,8 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize,
                               void* entropyWorkspace, size_t entropyWorkspaceSize,
-                        const int bmi2)
+                        const int bmi2,
+                        unsigned suspectUncompressible)
 {
     size_t const minGain = ZSTD_minGain(srcSize, strategy);
     size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
@@ -105,11 +106,11 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
             HUF_compress1X_repeat(
                 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
                 HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2) :
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible) :
             HUF_compress4X_repeat(
                 ostart+lhSize, dstCapacity-lhSize, src, srcSize,
                 HUF_SYMBOLVALUE_MAX, HUF_TABLELOG_DEFAULT, entropyWorkspace, entropyWorkspaceSize,
-                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2);
+                (HUF_CElt*)nextHuf->CTable, &repeat, preferRepeat, bmi2, suspectUncompressible);
         if (repeat != HUF_repeat_none) {
             /* reused the existing table */
             DEBUGLOG(5, "Reusing previous huffman table");
@@ -117,7 +118,7 @@ size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
         }
     }
 
-    if ((cLitSize==0) | (cLitSize >= srcSize - minGain) | ERR_isError(cLitSize)) {
+    if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
         ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
         return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
     }
diff --git a/lib/zstd/compress/zstd_compress_literals.h b/lib/zstd/compress/zstd_compress_literals.h
index 9904c0cd30a0a..9775fb97cb702 100644
--- a/lib/zstd/compress/zstd_compress_literals.h
+++ b/lib/zstd/compress/zstd_compress_literals.h
@@ -18,12 +18,14 @@ size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src,
 
 size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize);
 
+/* If suspectUncompressible then some sampling checks will be run to potentially skip huffman coding */
 size_t ZSTD_compressLiterals (ZSTD_hufCTables_t const* prevHuf,
                               ZSTD_hufCTables_t* nextHuf,
                               ZSTD_strategy strategy, int disableLiteralCompression,
                               void* dst, size_t dstCapacity,
                         const void* src, size_t srcSize,
                               void* entropyWorkspace, size_t entropyWorkspaceSize,
-                        const int bmi2);
+                        const int bmi2,
+                        unsigned suspectUncompressible);
 
 #endif /* ZSTD_COMPRESS_LITERALS_H */
diff --git a/lib/zstd/compress/zstd_compress_sequences.c b/lib/zstd/compress/zstd_compress_sequences.c
index dcfcdc9cc5e8d..21ddc1b37acf8 100644
--- a/lib/zstd/compress/zstd_compress_sequences.c
+++ b/lib/zstd/compress/zstd_compress_sequences.c
@@ -85,6 +85,8 @@ static size_t ZSTD_entropyCost(unsigned const* count, unsigned const max, size_t
 {
     unsigned cost = 0;
     unsigned s;
+
+    assert(total > 0);
     for (s = 0; s <= max; ++s) {
         unsigned norm = (unsigned)((256 * count[s]) / total);
         if (count[s] != 0 && norm == 0)
@@ -273,10 +275,11 @@ ZSTD_buildCTable(void* dst, size_t dstCapacity,
         assert(nbSeq_1 > 1);
         assert(entropyWorkspaceSize >= sizeof(ZSTD_BuildCTableWksp));
         (void)entropyWorkspaceSize;
-        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "");
-        {   size_t const NCountSize = FSE_writeNCount(op, oend - op, wksp->norm, max, tableLog);   /* overflow protected */
+        FORWARD_IF_ERROR(FSE_normalizeCount(wksp->norm, tableLog, count, nbSeq_1, max, ZSTD_useLowProbCount(nbSeq_1)), "FSE_normalizeCount failed");
+        assert(oend >= op);
+        {   size_t const NCountSize = FSE_writeNCount(op, (size_t)(oend - op), wksp->norm, max, tableLog);   /* overflow protected */
             FORWARD_IF_ERROR(NCountSize, "FSE_writeNCount failed");
-            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "");
+            FORWARD_IF_ERROR(FSE_buildCTable_wksp(nextCTable, wksp->norm, max, tableLog, wksp->wksp, sizeof(wksp->wksp)), "FSE_buildCTable_wksp failed");
             return NCountSize;
         }
     }
@@ -310,19 +313,19 @@ ZSTD_encodeSequences_body(
     FSE_initCState2(&stateLitLength,   CTable_LitLength,   llCodeTable[nbSeq-1]);
     BIT_addBits(&blockStream, sequences[nbSeq-1].litLength, LL_bits[llCodeTable[nbSeq-1]]);
     if (MEM_32bits()) BIT_flushBits(&blockStream);
-    BIT_addBits(&blockStream, sequences[nbSeq-1].matchLength, ML_bits[mlCodeTable[nbSeq-1]]);
+    BIT_addBits(&blockStream, sequences[nbSeq-1].mlBase, ML_bits[mlCodeTable[nbSeq-1]]);
     if (MEM_32bits()) BIT_flushBits(&blockStream);
     if (longOffsets) {
         U32 const ofBits = ofCodeTable[nbSeq-1];
         unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
         if (extraBits) {
-            BIT_addBits(&blockStream, sequences[nbSeq-1].offset, extraBits);
+            BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, extraBits);
             BIT_flushBits(&blockStream);
         }
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset >> extraBits,
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase >> extraBits,
                     ofBits - extraBits);
     } else {
-        BIT_addBits(&blockStream, sequences[nbSeq-1].offset, ofCodeTable[nbSeq-1]);
+        BIT_addBits(&blockStream, sequences[nbSeq-1].offBase, ofCodeTable[nbSeq-1]);
     }
     BIT_flushBits(&blockStream);
 
@@ -336,8 +339,8 @@ ZSTD_encodeSequences_body(
             U32  const mlBits = ML_bits[mlCode];
             DEBUGLOG(6, "encoding: litlen:%2u - matchlen:%2u - offCode:%7u",
                         (unsigned)sequences[n].litLength,
-                        (unsigned)sequences[n].matchLength + MINMATCH,
-                        (unsigned)sequences[n].offset);
+                        (unsigned)sequences[n].mlBase + MINMATCH,
+                        (unsigned)sequences[n].offBase);
                                                                             /* 32b*/  /* 64b*/
                                                                             /* (7)*/  /* (7)*/
             FSE_encodeSymbol(&blockStream, &stateOffsetBits, ofCode);       /* 15 */  /* 15 */
@@ -348,18 +351,18 @@ ZSTD_encodeSequences_body(
                 BIT_flushBits(&blockStream);                                /* (7)*/
             BIT_addBits(&blockStream, sequences[n].litLength, llBits);
             if (MEM_32bits() && ((llBits+mlBits)>24)) BIT_flushBits(&blockStream);
-            BIT_addBits(&blockStream, sequences[n].matchLength, mlBits);
+            BIT_addBits(&blockStream, sequences[n].mlBase, mlBits);
             if (MEM_32bits() || (ofBits+mlBits+llBits > 56)) BIT_flushBits(&blockStream);
             if (longOffsets) {
                 unsigned const extraBits = ofBits - MIN(ofBits, STREAM_ACCUMULATOR_MIN-1);
                 if (extraBits) {
-                    BIT_addBits(&blockStream, sequences[n].offset, extraBits);
+                    BIT_addBits(&blockStream, sequences[n].offBase, extraBits);
                     BIT_flushBits(&blockStream);                            /* (7)*/
                 }
-                BIT_addBits(&blockStream, sequences[n].offset >> extraBits,
+                BIT_addBits(&blockStream, sequences[n].offBase >> extraBits,
                             ofBits - extraBits);                            /* 31 */
             } else {
-                BIT_addBits(&blockStream, sequences[n].offset, ofBits);     /* 31 */
+                BIT_addBits(&blockStream, sequences[n].offBase, ofBits);     /* 31 */
             }
             BIT_flushBits(&blockStream);                                    /* (7)*/
             DEBUGLOG(7, "remaining space : %i", (int)(blockStream.endPtr - blockStream.ptr));
@@ -396,7 +399,7 @@ ZSTD_encodeSequences_default(
 
 #if DYNAMIC_BMI2
 
-static TARGET_ATTRIBUTE("bmi2") size_t
+static BMI2_TARGET_ATTRIBUTE size_t
 ZSTD_encodeSequences_bmi2(
             void* dst, size_t dstCapacity,
             FSE_CTable const* CTable_MatchLength, BYTE const* mlCodeTable,
diff --git a/lib/zstd/compress/zstd_compress_superblock.c b/lib/zstd/compress/zstd_compress_superblock.c
index b0610b255653f..17d836cc84e8f 100644
--- a/lib/zstd/compress/zstd_compress_superblock.c
+++ b/lib/zstd/compress/zstd_compress_superblock.c
@@ -15,289 +15,10 @@
 
 #include "../common/zstd_internal.h"  /* ZSTD_getSequenceLength */
 #include "hist.h"                     /* HIST_countFast_wksp */
-#include "zstd_compress_internal.h"
+#include "zstd_compress_internal.h"   /* ZSTD_[huf|fse|entropy]CTablesMetadata_t */
 #include "zstd_compress_sequences.h"
 #include "zstd_compress_literals.h"
 
-/*-*************************************
-*  Superblock entropy buffer structs
-***************************************/
-/* ZSTD_hufCTablesMetadata_t :
- *  Stores Literals Block Type for a super-block in hType, and
- *  huffman tree description in hufDesBuffer.
- *  hufDesSize refers to the size of huffman tree description in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_literal() */
-typedef struct {
-    symbolEncodingType_e hType;
-    BYTE hufDesBuffer[ZSTD_MAX_HUF_HEADER_SIZE];
-    size_t hufDesSize;
-} ZSTD_hufCTablesMetadata_t;
-
-/* ZSTD_fseCTablesMetadata_t :
- *  Stores symbol compression modes for a super-block in {ll, ol, ml}Type, and
- *  fse tables in fseTablesBuffer.
- *  fseTablesSize refers to the size of fse tables in bytes.
- *  This metadata is populated in ZSTD_buildSuperBlockEntropy_sequences() */
-typedef struct {
-    symbolEncodingType_e llType;
-    symbolEncodingType_e ofType;
-    symbolEncodingType_e mlType;
-    BYTE fseTablesBuffer[ZSTD_MAX_FSE_HEADERS_SIZE];
-    size_t fseTablesSize;
-    size_t lastCountSize; /* This is to account for bug in 1.3.4. More detail in ZSTD_compressSubBlock_sequences() */
-} ZSTD_fseCTablesMetadata_t;
-
-typedef struct {
-    ZSTD_hufCTablesMetadata_t hufMetadata;
-    ZSTD_fseCTablesMetadata_t fseMetadata;
-} ZSTD_entropyCTablesMetadata_t;
-
-
-/* ZSTD_buildSuperBlockEntropy_literal() :
- *  Builds entropy for the super-block literals.
- *  Stores literals block type (raw, rle, compressed, repeat) and
- *  huffman description table to hufMetadata.
- *  @return : size of huffman description table or error code */
-static size_t ZSTD_buildSuperBlockEntropy_literal(void* const src, size_t srcSize,
-                                            const ZSTD_hufCTables_t* prevHuf,
-                                                  ZSTD_hufCTables_t* nextHuf,
-                                                  ZSTD_hufCTablesMetadata_t* hufMetadata,
-                                                  const int disableLiteralsCompression,
-                                                  void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (HUF_SYMBOLVALUE_MAX + 1) * sizeof(unsigned);
-    BYTE* const nodeWksp = countWkspStart + countWkspSize;
-    const size_t nodeWkspSize = wkspEnd-nodeWksp;
-    unsigned maxSymbolValue = 255;
-    unsigned huffLog = HUF_TABLELOG_DEFAULT;
-    HUF_repeat repeat = prevHuf->repeatMode;
-
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_literal (srcSize=%zu)", srcSize);
-
-    /* Prepare nextEntropy assuming reusing the existing table */
-    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-
-    if (disableLiteralsCompression) {
-        DEBUGLOG(5, "set_basic - disabled");
-        hufMetadata->hType = set_basic;
-        return 0;
-    }
-
-    /* small ? don't even attempt compression (speed opt) */
-#   define COMPRESS_LITERALS_SIZE_MIN 63
-    {   size_t const minLitSize = (prevHuf->repeatMode == HUF_repeat_valid) ? 6 : COMPRESS_LITERALS_SIZE_MIN;
-        if (srcSize <= minLitSize) {
-            DEBUGLOG(5, "set_basic - too small");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Scan input and build symbol stats */
-    {   size_t const largest = HIST_count_wksp (countWksp, &maxSymbolValue, (const BYTE*)src, srcSize, workspace, wkspSize);
-        FORWARD_IF_ERROR(largest, "HIST_count_wksp failed");
-        if (largest == srcSize) {
-            DEBUGLOG(5, "set_rle");
-            hufMetadata->hType = set_rle;
-            return 0;
-        }
-        if (largest <= (srcSize >> 7)+4) {
-            DEBUGLOG(5, "set_basic - no gain");
-            hufMetadata->hType = set_basic;
-            return 0;
-        }
-    }
-
-    /* Validate the previous Huffman table */
-    if (repeat == HUF_repeat_check && !HUF_validateCTable((HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue)) {
-        repeat = HUF_repeat_none;
-    }
-
-    /* Build Huffman Tree */
-    ZSTD_memset(nextHuf->CTable, 0, sizeof(nextHuf->CTable));
-    huffLog = HUF_optimalTableLog(huffLog, srcSize, maxSymbolValue);
-    {   size_t const maxBits = HUF_buildCTable_wksp((HUF_CElt*)nextHuf->CTable, countWksp,
-                                                    maxSymbolValue, huffLog,
-                                                    nodeWksp, nodeWkspSize);
-        FORWARD_IF_ERROR(maxBits, "HUF_buildCTable_wksp");
-        huffLog = (U32)maxBits;
-        {   /* Build and write the CTable */
-            size_t const newCSize = HUF_estimateCompressedSize(
-                    (HUF_CElt*)nextHuf->CTable, countWksp, maxSymbolValue);
-            size_t const hSize = HUF_writeCTable_wksp(
-                    hufMetadata->hufDesBuffer, sizeof(hufMetadata->hufDesBuffer),
-                    (HUF_CElt*)nextHuf->CTable, maxSymbolValue, huffLog,
-                    nodeWksp, nodeWkspSize);
-            /* Check against repeating the previous CTable */
-            if (repeat != HUF_repeat_none) {
-                size_t const oldCSize = HUF_estimateCompressedSize(
-                        (HUF_CElt const*)prevHuf->CTable, countWksp, maxSymbolValue);
-                if (oldCSize < srcSize && (oldCSize <= hSize + newCSize || hSize + 12 >= srcSize)) {
-                    DEBUGLOG(5, "set_repeat - smaller");
-                    ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                    hufMetadata->hType = set_repeat;
-                    return 0;
-                }
-            }
-            if (newCSize + hSize >= srcSize) {
-                DEBUGLOG(5, "set_basic - no gains");
-                ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
-                hufMetadata->hType = set_basic;
-                return 0;
-            }
-            DEBUGLOG(5, "set_compressed (hSize=%u)", (U32)hSize);
-            hufMetadata->hType = set_compressed;
-            nextHuf->repeatMode = HUF_repeat_check;
-            return hSize;
-        }
-    }
-}
-
-/* ZSTD_buildSuperBlockEntropy_sequences() :
- *  Builds entropy for the super-block sequences.
- *  Stores symbol compression modes and fse table to fseMetadata.
- *  @return : size of fse tables or error code */
-static size_t ZSTD_buildSuperBlockEntropy_sequences(seqStore_t* seqStorePtr,
-                                              const ZSTD_fseCTables_t* prevEntropy,
-                                                    ZSTD_fseCTables_t* nextEntropy,
-                                              const ZSTD_CCtx_params* cctxParams,
-                                                    ZSTD_fseCTablesMetadata_t* fseMetadata,
-                                                    void* workspace, size_t wkspSize)
-{
-    BYTE* const wkspStart = (BYTE*)workspace;
-    BYTE* const wkspEnd = wkspStart + wkspSize;
-    BYTE* const countWkspStart = wkspStart;
-    unsigned* const countWksp = (unsigned*)workspace;
-    const size_t countWkspSize = (MaxSeq + 1) * sizeof(unsigned);
-    BYTE* const cTableWksp = countWkspStart + countWkspSize;
-    const size_t cTableWkspSize = wkspEnd-cTableWksp;
-    ZSTD_strategy const strategy = cctxParams->cParams.strategy;
-    FSE_CTable* CTable_LitLength = nextEntropy->litlengthCTable;
-    FSE_CTable* CTable_OffsetBits = nextEntropy->offcodeCTable;
-    FSE_CTable* CTable_MatchLength = nextEntropy->matchlengthCTable;
-    const BYTE* const ofCodeTable = seqStorePtr->ofCode;
-    const BYTE* const llCodeTable = seqStorePtr->llCode;
-    const BYTE* const mlCodeTable = seqStorePtr->mlCode;
-    size_t const nbSeq = seqStorePtr->sequences - seqStorePtr->sequencesStart;
-    BYTE* const ostart = fseMetadata->fseTablesBuffer;
-    BYTE* const oend = ostart + sizeof(fseMetadata->fseTablesBuffer);
-    BYTE* op = ostart;
-
-    assert(cTableWkspSize >= (1 << MaxFSELog) * sizeof(FSE_FUNCTION_TYPE));
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy_sequences (nbSeq=%zu)", nbSeq);
-    ZSTD_memset(workspace, 0, wkspSize);
-
-    fseMetadata->lastCountSize = 0;
-    /* convert length/distances into codes */
-    ZSTD_seqToCodes(seqStorePtr);
-    /* build CTable for Literal Lengths */
-    {   U32 LLtype;
-        unsigned max = MaxLL;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, llCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        DEBUGLOG(5, "Building LL table");
-        nextEntropy->litlength_repeatMode = prevEntropy->litlength_repeatMode;
-        LLtype = ZSTD_selectEncodingType(&nextEntropy->litlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        LLFSELog, prevEntropy->litlengthCTable,
-                                        LL_defaultNorm, LL_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(set_basic < set_compressed && set_rle < set_compressed);
-        assert(!(LLtype < set_compressed && nextEntropy->litlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_LitLength, LLFSELog, (symbolEncodingType_e)LLtype,
-                                                    countWksp, max, llCodeTable, nbSeq, LL_defaultNorm, LL_defaultNormLog, MaxLL,
-                                                    prevEntropy->litlengthCTable, sizeof(prevEntropy->litlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for LitLens failed");
-            if (LLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->llType = (symbolEncodingType_e) LLtype;
-    }   }
-    /* build CTable for Offsets */
-    {   U32 Offtype;
-        unsigned max = MaxOff;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, ofCodeTable, nbSeq, workspace, wkspSize);  /* can't fail */
-        /* We can only use the basic table if max <= DefaultMaxOff, otherwise the offsets are too large */
-        ZSTD_defaultPolicy_e const defaultPolicy = (max <= DefaultMaxOff) ? ZSTD_defaultAllowed : ZSTD_defaultDisallowed;
-        DEBUGLOG(5, "Building OF table");
-        nextEntropy->offcode_repeatMode = prevEntropy->offcode_repeatMode;
-        Offtype = ZSTD_selectEncodingType(&nextEntropy->offcode_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        OffFSELog, prevEntropy->offcodeCTable,
-                                        OF_defaultNorm, OF_defaultNormLog,
-                                        defaultPolicy, strategy);
-        assert(!(Offtype < set_compressed && nextEntropy->offcode_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_OffsetBits, OffFSELog, (symbolEncodingType_e)Offtype,
-                                                    countWksp, max, ofCodeTable, nbSeq, OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
-                                                    prevEntropy->offcodeCTable, sizeof(prevEntropy->offcodeCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for Offsets failed");
-            if (Offtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->ofType = (symbolEncodingType_e) Offtype;
-    }   }
-    /* build CTable for MatchLengths */
-    {   U32 MLtype;
-        unsigned max = MaxML;
-        size_t const mostFrequent = HIST_countFast_wksp(countWksp, &max, mlCodeTable, nbSeq, workspace, wkspSize);   /* can't fail */
-        DEBUGLOG(5, "Building ML table (remaining space : %i)", (int)(oend-op));
-        nextEntropy->matchlength_repeatMode = prevEntropy->matchlength_repeatMode;
-        MLtype = ZSTD_selectEncodingType(&nextEntropy->matchlength_repeatMode,
-                                        countWksp, max, mostFrequent, nbSeq,
-                                        MLFSELog, prevEntropy->matchlengthCTable,
-                                        ML_defaultNorm, ML_defaultNormLog,
-                                        ZSTD_defaultAllowed, strategy);
-        assert(!(MLtype < set_compressed && nextEntropy->matchlength_repeatMode != FSE_repeat_none)); /* We don't copy tables */
-        {   size_t const countSize = ZSTD_buildCTable(op, oend - op, CTable_MatchLength, MLFSELog, (symbolEncodingType_e)MLtype,
-                                                    countWksp, max, mlCodeTable, nbSeq, ML_defaultNorm, ML_defaultNormLog, MaxML,
-                                                    prevEntropy->matchlengthCTable, sizeof(prevEntropy->matchlengthCTable),
-                                                    cTableWksp, cTableWkspSize);
-            FORWARD_IF_ERROR(countSize, "ZSTD_buildCTable for MatchLengths failed");
-            if (MLtype == set_compressed)
-                fseMetadata->lastCountSize = countSize;
-            op += countSize;
-            fseMetadata->mlType = (symbolEncodingType_e) MLtype;
-    }   }
-    assert((size_t) (op-ostart) <= sizeof(fseMetadata->fseTablesBuffer));
-    return op-ostart;
-}
-
-
-/* ZSTD_buildSuperBlockEntropy() :
- *  Builds entropy for the super-block.
- *  @return : 0 on success or error code */
-static size_t
-ZSTD_buildSuperBlockEntropy(seqStore_t* seqStorePtr,
-                      const ZSTD_entropyCTables_t* prevEntropy,
-                            ZSTD_entropyCTables_t* nextEntropy,
-                      const ZSTD_CCtx_params* cctxParams,
-                            ZSTD_entropyCTablesMetadata_t* entropyMetadata,
-                            void* workspace, size_t wkspSize)
-{
-    size_t const litSize = seqStorePtr->lit - seqStorePtr->litStart;
-    DEBUGLOG(5, "ZSTD_buildSuperBlockEntropy");
-    entropyMetadata->hufMetadata.hufDesSize =
-        ZSTD_buildSuperBlockEntropy_literal(seqStorePtr->litStart, litSize,
-                                            &prevEntropy->huf, &nextEntropy->huf,
-                                            &entropyMetadata->hufMetadata,
-                                            ZSTD_disableLiteralsCompression(cctxParams),
-                                            workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->hufMetadata.hufDesSize, "ZSTD_buildSuperBlockEntropy_literal failed");
-    entropyMetadata->fseMetadata.fseTablesSize =
-        ZSTD_buildSuperBlockEntropy_sequences(seqStorePtr,
-                                              &prevEntropy->fse, &nextEntropy->fse,
-                                              cctxParams,
-                                              &entropyMetadata->fseMetadata,
-                                              workspace, wkspSize);
-    FORWARD_IF_ERROR(entropyMetadata->fseMetadata.fseTablesSize, "ZSTD_buildSuperBlockEntropy_sequences failed");
-    return 0;
-}
-
 /* ZSTD_compressSubBlock_literal() :
  *  Compresses literals section for a sub-block.
  *  When we have to write the Huffman table we will sometimes choose a header
@@ -411,8 +132,7 @@ static size_t ZSTD_seqDecompressedSize(seqStore_t const* seqStore, const seqDef*
     const seqDef* sp = sstart;
     size_t matchLengthSum = 0;
     size_t litLengthSum = 0;
-    /* Only used by assert(), suppress unused variable warnings in production. */
-    (void)litLengthSum;
+    (void)(litLengthSum); /* suppress unused variable warning on some environments */
     while (send-sp > 0) {
         ZSTD_sequenceLength const seqLen = ZSTD_getSequenceLength(seqStore, sp);
         litLengthSum += seqLen.litLength;
@@ -605,7 +325,7 @@ static size_t ZSTD_estimateSubBlockSize_literal(const BYTE* literals, size_t lit
 static size_t ZSTD_estimateSubBlockSize_symbolType(symbolEncodingType_e type,
                         const BYTE* codeTable, unsigned maxCode,
                         size_t nbSeq, const FSE_CTable* fseCTable,
-                        const U32* additionalBits,
+                        const U8* additionalBits,
                         short const* defaultNorm, U32 defaultNormLog, U32 defaultMax,
                         void* workspace, size_t wkspSize)
 {
@@ -646,8 +366,9 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable,
                                                   void* workspace, size_t wkspSize,
                                                   int writeEntropy)
 {
-    size_t sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
+    size_t const sequencesSectionHeaderSize = 3; /* Use hard coded size of 3 bytes */
     size_t cSeqSizeEstimate = 0;
+    if (nbSeq == 0) return sequencesSectionHeaderSize;
     cSeqSizeEstimate += ZSTD_estimateSubBlockSize_symbolType(fseMetadata->ofType, ofCodeTable, MaxOff,
                                          nbSeq, fseTables->offcodeCTable, NULL,
                                          OF_defaultNorm, OF_defaultNormLog, DefaultMaxOff,
@@ -754,7 +475,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
         /* I think there is an optimization opportunity here.
          * Calling ZSTD_estimateSubBlockSize for every sequence can be wasteful
          * since it recalculates estimate from scratch.
-         * For example, it would recount literal distribution and symbol codes everytime.
+         * For example, it would recount literal distribution and symbol codes every time.
          */
         cBlockSizeEstimate = ZSTD_estimateSubBlockSize(lp, litSize, ofCodePtr, llCodePtr, mlCodePtr, seqCount,
                                                        &nextCBlock->entropy, entropyMetadata,
@@ -818,7 +539,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr,
             repcodes_t rep;
             ZSTD_memcpy(&rep, prevCBlock->rep, sizeof(rep));
             for (seq = sstart; seq < sp; ++seq) {
-                rep = ZSTD_updateRep(rep.rep, seq->offset - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
+                ZSTD_updateRep(rep.rep, seq->offBase - 1, ZSTD_getSequenceLength(seqStorePtr, seq).litLength == 0);
             }
             ZSTD_memcpy(nextCBlock->rep, &rep, sizeof(rep));
         }
@@ -833,7 +554,7 @@ size_t ZSTD_compressSuperBlock(ZSTD_CCtx* zc,
                                unsigned lastBlock) {
     ZSTD_entropyCTablesMetadata_t entropyMetadata;
 
-    FORWARD_IF_ERROR(ZSTD_buildSuperBlockEntropy(&zc->seqStore,
+    FORWARD_IF_ERROR(ZSTD_buildBlockEntropyStats(&zc->seqStore,
           &zc->blockState.prevCBlock->entropy,
           &zc->blockState.nextCBlock->entropy,
           &zc->appliedParams,
diff --git a/lib/zstd/compress/zstd_cwksp.h b/lib/zstd/compress/zstd_cwksp.h
index 98e359adf5d44..349fc923c355a 100644
--- a/lib/zstd/compress/zstd_cwksp.h
+++ b/lib/zstd/compress/zstd_cwksp.h
@@ -32,6 +32,10 @@
 #define ZSTD_CWKSP_ASAN_REDZONE_SIZE 128
 #endif
 
+
+/* Set our tables and aligneds to align by 64 bytes */
+#define ZSTD_CWKSP_ALIGNMENT_BYTES 64
+
 /*-*************************************
 *  Structures
 ***************************************/
@@ -114,10 +118,11 @@ typedef enum {
  * - Tables: these are any of several different datastructures (hash tables,
  *   chain tables, binary trees) that all respect a common format: they are
  *   uint32_t arrays, all of whose values are between 0 and (nextSrc - base).
- *   Their sizes depend on the cparams.
+ *   Their sizes depend on the cparams. These tables are 64-byte aligned.
  *
  * - Aligned: these buffers are used for various purposes that require 4 byte
- *   alignment, but don't require any initialization before they're used.
+ *   alignment, but don't require any initialization before they're used. These
+ *   buffers are each aligned to 64 bytes.
  *
  * - Buffers: these buffers are used for various purposes that don't require
  *   any alignment or initialization before they're used. This means they can
@@ -130,8 +135,7 @@ typedef enum {
  *
  * 1. Objects
  * 2. Buffers
- * 3. Aligned
- * 4. Tables
+ * 3. Aligned/Tables
  *
  * Attempts to reserve objects of different types out of order will fail.
  */
@@ -184,6 +188,8 @@ MEM_STATIC size_t ZSTD_cwksp_align(size_t size, size_t const align) {
  * Since tables aren't currently redzoned, you don't need to call through this
  * to figure out how much space you need for the matchState tables. Everything
  * else is though.
+ *
+ * Do not use for sizing aligned buffers. Instead, use ZSTD_cwksp_aligned_alloc_size().
  */
 MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
     if (size == 0)
@@ -191,66 +197,139 @@ MEM_STATIC size_t ZSTD_cwksp_alloc_size(size_t size) {
     return size;
 }
 
-MEM_STATIC void ZSTD_cwksp_internal_advance_phase(
-        ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase) {
+/*
+ * Returns an adjusted alloc size that is the nearest larger multiple of 64 bytes.
+ * Used to determine the number of bytes required for a given "aligned".
+ */
+MEM_STATIC size_t ZSTD_cwksp_aligned_alloc_size(size_t size) {
+    return ZSTD_cwksp_alloc_size(ZSTD_cwksp_align(size, ZSTD_CWKSP_ALIGNMENT_BYTES));
+}
+
+/*
+ * Returns the amount of additional space the cwksp must allocate
+ * for internal purposes (currently only alignment).
+ */
+MEM_STATIC size_t ZSTD_cwksp_slack_space_required(void) {
+    /* For alignment, the wksp will always allocate an additional n_1=[1, 64] bytes
+     * to align the beginning of tables section, as well as another n_2=[0, 63] bytes
+     * to align the beginning of the aligned section.
+     *
+     * n_1 + n_2 == 64 bytes if the cwksp is freshly allocated, due to tables and
+     * aligneds being sized in multiples of 64 bytes.
+     */
+    size_t const slackSpace = ZSTD_CWKSP_ALIGNMENT_BYTES;
+    return slackSpace;
+}
+
+
+/*
+ * Return the number of additional bytes required to align a pointer to the given number of bytes.
+ * alignBytes must be a power of two.
+ */
+MEM_STATIC size_t ZSTD_cwksp_bytes_to_align_ptr(void* ptr, const size_t alignBytes) {
+    size_t const alignBytesMask = alignBytes - 1;
+    size_t const bytes = (alignBytes - ((size_t)ptr & (alignBytesMask))) & alignBytesMask;
+    assert((alignBytes & alignBytesMask) == 0);
+    assert(bytes != ZSTD_CWKSP_ALIGNMENT_BYTES);
+    return bytes;
+}
+
+/*
+ * Internal function. Do not use directly.
+ * Reserves the given number of bytes within the aligned/buffer segment of the wksp,
+ * which counts from the end of the wksp (as opposed to the object/table segment).
+ *
+ * Returns a pointer to the beginning of that space.
+ */
+MEM_STATIC void*
+ZSTD_cwksp_reserve_internal_buffer_space(ZSTD_cwksp* ws, size_t const bytes)
+{
+    void* const alloc = (BYTE*)ws->allocStart - bytes;
+    void* const bottom = ws->tableEnd;
+    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
+        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
+    ZSTD_cwksp_assert_internal_consistency(ws);
+    assert(alloc >= bottom);
+    if (alloc < bottom) {
+        DEBUGLOG(4, "cwksp: alloc failed!");
+        ws->allocFailed = 1;
+        return NULL;
+    }
+    /* the area is reserved from the end of wksp.
+     * If it overlaps with tableValidEnd, it voids guarantees on values' range */
+    if (alloc < ws->tableValidEnd) {
+        ws->tableValidEnd = alloc;
+    }
+    ws->allocStart = alloc;
+    return alloc;
+}
+
+/*
+ * Moves the cwksp to the next phase, and does any necessary allocations.
+ * cwksp initialization must necessarily go through each phase in order.
+ * Returns a 0 on success, or zstd error
+ */
+MEM_STATIC size_t
+ZSTD_cwksp_internal_advance_phase(ZSTD_cwksp* ws, ZSTD_cwksp_alloc_phase_e phase)
+{
     assert(phase >= ws->phase);
     if (phase > ws->phase) {
+        /* Going from allocating objects to allocating buffers */
         if (ws->phase < ZSTD_cwksp_alloc_buffers &&
                 phase >= ZSTD_cwksp_alloc_buffers) {
             ws->tableValidEnd = ws->objectEnd;
         }
+
+        /* Going from allocating buffers to allocating aligneds/tables */
         if (ws->phase < ZSTD_cwksp_alloc_aligned &&
                 phase >= ZSTD_cwksp_alloc_aligned) {
-            /* If unaligned allocations down from a too-large top have left us
-             * unaligned, we need to realign our alloc ptr. Technically, this
-             * can consume space that is unaccounted for in the neededSpace
-             * calculation. However, I believe this can only happen when the
-             * workspace is too large, and specifically when it is too large
-             * by a larger margin than the space that will be consumed. */
-            /* TODO: cleaner, compiler warning friendly way to do this??? */
-            ws->allocStart = (BYTE*)ws->allocStart - ((size_t)ws->allocStart & (sizeof(U32)-1));
-            if (ws->allocStart < ws->tableValidEnd) {
-                ws->tableValidEnd = ws->allocStart;
+            {   /* Align the start of the "aligned" to 64 bytes. Use [1, 64] bytes. */
+                size_t const bytesToAlign =
+                    ZSTD_CWKSP_ALIGNMENT_BYTES - ZSTD_cwksp_bytes_to_align_ptr(ws->allocStart, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                DEBUGLOG(5, "reserving aligned alignment addtl space: %zu", bytesToAlign);
+                ZSTD_STATIC_ASSERT((ZSTD_CWKSP_ALIGNMENT_BYTES & (ZSTD_CWKSP_ALIGNMENT_BYTES - 1)) == 0); /* power of 2 */
+                RETURN_ERROR_IF(!ZSTD_cwksp_reserve_internal_buffer_space(ws, bytesToAlign),
+                                memory_allocation, "aligned phase - alignment initial allocation failed!");
             }
-        }
+            {   /* Align the start of the tables to 64 bytes. Use [0, 63] bytes */
+                void* const alloc = ws->objectEnd;
+                size_t const bytesToAlign = ZSTD_cwksp_bytes_to_align_ptr(alloc, ZSTD_CWKSP_ALIGNMENT_BYTES);
+                void* const objectEnd = (BYTE*)alloc + bytesToAlign;
+                DEBUGLOG(5, "reserving table alignment addtl space: %zu", bytesToAlign);
+                RETURN_ERROR_IF(objectEnd > ws->workspaceEnd, memory_allocation,
+                                "table phase - alignment initial allocation failed!");
+                ws->objectEnd = objectEnd;
+                ws->tableEnd = objectEnd;  /* table area starts being empty */
+                if (ws->tableValidEnd < ws->tableEnd) {
+                    ws->tableValidEnd = ws->tableEnd;
+        }   }   }
         ws->phase = phase;
+        ZSTD_cwksp_assert_internal_consistency(ws);
     }
+    return 0;
 }
 
 /*
  * Returns whether this object/buffer/etc was allocated in this workspace.
  */
-MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr) {
+MEM_STATIC int ZSTD_cwksp_owns_buffer(const ZSTD_cwksp* ws, const void* ptr)
+{
     return (ptr != NULL) && (ws->workspace <= ptr) && (ptr <= ws->workspaceEnd);
 }
 
 /*
  * Internal function. Do not use directly.
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_internal(
-        ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase) {
+MEM_STATIC void*
+ZSTD_cwksp_reserve_internal(ZSTD_cwksp* ws, size_t bytes, ZSTD_cwksp_alloc_phase_e phase)
+{
     void* alloc;
-    void* bottom = ws->tableEnd;
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
-    alloc = (BYTE *)ws->allocStart - bytes;
-
-    if (bytes == 0)
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase)) || bytes == 0) {
         return NULL;
+    }
 
 
-    DEBUGLOG(5, "cwksp: reserving %p %zd bytes, %zd bytes remaining",
-        alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
-    ZSTD_cwksp_assert_internal_consistency(ws);
-    assert(alloc >= bottom);
-    if (alloc < bottom) {
-        DEBUGLOG(4, "cwksp: alloc failed!");
-        ws->allocFailed = 1;
-        return NULL;
-    }
-    if (alloc < ws->tableValidEnd) {
-        ws->tableValidEnd = alloc;
-    }
-    ws->allocStart = alloc;
+    alloc = ZSTD_cwksp_reserve_internal_buffer_space(ws, bytes);
 
 
     return alloc;
@@ -259,33 +338,44 @@ MEM_STATIC void* ZSTD_cwksp_reserve_internal(
 /*
  * Reserves and returns unaligned memory.
  */
-MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes) {
+MEM_STATIC BYTE* ZSTD_cwksp_reserve_buffer(ZSTD_cwksp* ws, size_t bytes)
+{
     return (BYTE*)ZSTD_cwksp_reserve_internal(ws, bytes, ZSTD_cwksp_alloc_buffers);
 }
 
 /*
- * Reserves and returns memory sized on and aligned on sizeof(unsigned).
+ * Reserves and returns memory sized on and aligned on ZSTD_CWKSP_ALIGNMENT_BYTES (64 bytes).
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes) {
-    assert((bytes & (sizeof(U32)-1)) == 0);
-    return ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, sizeof(U32)), ZSTD_cwksp_alloc_aligned);
+MEM_STATIC void* ZSTD_cwksp_reserve_aligned(ZSTD_cwksp* ws, size_t bytes)
+{
+    void* ptr = ZSTD_cwksp_reserve_internal(ws, ZSTD_cwksp_align(bytes, ZSTD_CWKSP_ALIGNMENT_BYTES),
+                                            ZSTD_cwksp_alloc_aligned);
+    assert(((size_t)ptr & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
+    return ptr;
 }
 
 /*
- * Aligned on sizeof(unsigned). These buffers have the special property that
+ * Aligned on 64 bytes. These buffers have the special property that
  * their values remain constrained, allowing us to re-use them without
  * memset()-ing them.
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
+MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes)
+{
     const ZSTD_cwksp_alloc_phase_e phase = ZSTD_cwksp_alloc_aligned;
-    void* alloc = ws->tableEnd;
-    void* end = (BYTE *)alloc + bytes;
-    void* top = ws->allocStart;
+    void* alloc;
+    void* end;
+    void* top;
+
+    if (ZSTD_isError(ZSTD_cwksp_internal_advance_phase(ws, phase))) {
+        return NULL;
+    }
+    alloc = ws->tableEnd;
+    end = (BYTE *)alloc + bytes;
+    top = ws->allocStart;
 
     DEBUGLOG(5, "cwksp: reserving %p table %zd bytes, %zd bytes remaining",
         alloc, bytes, ZSTD_cwksp_available_space(ws) - bytes);
     assert((bytes & (sizeof(U32)-1)) == 0);
-    ZSTD_cwksp_internal_advance_phase(ws, phase);
     ZSTD_cwksp_assert_internal_consistency(ws);
     assert(end <= top);
     if (end > top) {
@@ -296,27 +386,31 @@ MEM_STATIC void* ZSTD_cwksp_reserve_table(ZSTD_cwksp* ws, size_t bytes) {
     ws->tableEnd = end;
 
 
+    assert((bytes & (ZSTD_CWKSP_ALIGNMENT_BYTES-1)) == 0);
+    assert(((size_t)alloc & (ZSTD_CWKSP_ALIGNMENT_BYTES-1))== 0);
     return alloc;
 }
 
 /*
  * Aligned on sizeof(void*).
+ * Note : should happen only once, at workspace first initialization
  */
-MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
-    size_t roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
+MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes)
+{
+    size_t const roundedBytes = ZSTD_cwksp_align(bytes, sizeof(void*));
     void* alloc = ws->objectEnd;
     void* end = (BYTE*)alloc + roundedBytes;
 
 
-    DEBUGLOG(5,
+    DEBUGLOG(4,
         "cwksp: reserving %p object %zd bytes (rounded to %zd), %zd bytes remaining",
         alloc, bytes, roundedBytes, ZSTD_cwksp_available_space(ws) - roundedBytes);
-    assert(((size_t)alloc & (sizeof(void*)-1)) == 0);
-    assert((bytes & (sizeof(void*)-1)) == 0);
+    assert((size_t)alloc % ZSTD_ALIGNOF(void*) == 0);
+    assert(bytes % ZSTD_ALIGNOF(void*) == 0);
     ZSTD_cwksp_assert_internal_consistency(ws);
     /* we must be in the first phase, no advance is possible */
     if (ws->phase != ZSTD_cwksp_alloc_objects || end > ws->workspaceEnd) {
-        DEBUGLOG(4, "cwksp: object alloc failed!");
+        DEBUGLOG(3, "cwksp: object alloc failed!");
         ws->allocFailed = 1;
         return NULL;
     }
@@ -328,7 +422,8 @@ MEM_STATIC void* ZSTD_cwksp_reserve_object(ZSTD_cwksp* ws, size_t bytes) {
     return alloc;
 }
 
-MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws) {
+MEM_STATIC void ZSTD_cwksp_mark_tables_dirty(ZSTD_cwksp* ws)
+{
     DEBUGLOG(4, "cwksp: ZSTD_cwksp_mark_tables_dirty");
 
 
@@ -451,6 +546,24 @@ MEM_STATIC int ZSTD_cwksp_reserve_failed(const ZSTD_cwksp* ws) {
 *  Functions Checking Free Space
 ***************************************/
 
+/* ZSTD_alignmentSpaceWithinBounds() :
+ * Returns if the estimated space needed for a wksp is within an acceptable limit of the
+ * actual amount of space used.
+ */
+MEM_STATIC int ZSTD_cwksp_estimated_space_within_bounds(const ZSTD_cwksp* const ws,
+                                                        size_t const estimatedSpace, int resizedWorkspace) {
+    if (resizedWorkspace) {
+        /* Resized/newly allocated wksp should have exact bounds */
+        return ZSTD_cwksp_used(ws) == estimatedSpace;
+    } else {
+        /* Due to alignment, when reusing a workspace, we can actually consume 63 fewer or more bytes
+         * than estimatedSpace. See the comments in zstd_cwksp.h for details.
+         */
+        return (ZSTD_cwksp_used(ws) >= estimatedSpace - 63) && (ZSTD_cwksp_used(ws) <= estimatedSpace + 63);
+    }
+}
+
+
 MEM_STATIC size_t ZSTD_cwksp_available_space(ZSTD_cwksp* ws) {
     return (size_t)((BYTE*)ws->allocStart - (BYTE*)ws->tableEnd);
 }
diff --git a/lib/zstd/compress/zstd_double_fast.c b/lib/zstd/compress/zstd_double_fast.c
index b0424d23ac57f..76933dea2624e 100644
--- a/lib/zstd/compress/zstd_double_fast.c
+++ b/lib/zstd/compress/zstd_double_fast.c
@@ -48,10 +48,216 @@ void ZSTD_fillDoubleHashTable(ZSTD_matchState_t* ms,
 
 
 FORCE_INLINE_TEMPLATE
-size_t ZSTD_compressBlock_doubleFast_generic(
+size_t ZSTD_compressBlock_doubleFast_noDict_generic(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize, U32 const mls /* template */)
+{
+    ZSTD_compressionParameters const* cParams = &ms->cParams;
+    U32* const hashLong = ms->hashTable;
+    const U32 hBitsL = cParams->hashLog;
+    U32* const hashSmall = ms->chainTable;
+    const U32 hBitsS = cParams->chainLog;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const istart = (const BYTE*)src;
+    const BYTE* anchor = istart;
+    const U32 endIndex = (U32)((size_t)(istart - base) + srcSize);
+    /* presumes that, if there is a dictionary, it must be using Attach mode */
+    const U32 prefixLowestIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
+    const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const BYTE* const iend = istart + srcSize;
+    const BYTE* const ilimit = iend - HASH_READ_SIZE;
+    U32 offset_1=rep[0], offset_2=rep[1];
+    U32 offsetSaved = 0;
+
+    size_t mLength;
+    U32 offset;
+    U32 curr;
+
+    /* how many positions to search before increasing step size */
+    const size_t kStepIncr = 1 << kSearchStrength;
+    /* the position at which to increment the step size if no match is found */
+    const BYTE* nextStep;
+    size_t step; /* the current step size */
+
+    size_t hl0; /* the long hash at ip */
+    size_t hl1; /* the long hash at ip1 */
+
+    U32 idxl0; /* the long match index for ip */
+    U32 idxl1; /* the long match index for ip1 */
+
+    const BYTE* matchl0; /* the long match for ip */
+    const BYTE* matchs0; /* the short match for ip */
+    const BYTE* matchl1; /* the long match for ip1 */
+
+    const BYTE* ip = istart; /* the current position */
+    const BYTE* ip1; /* the next position */
+
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_noDict_generic");
+
+    /* init */
+    ip += ((ip - prefixLowest) == 0);
+    {
+        U32 const current = (U32)(ip - base);
+        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, current, cParams->windowLog);
+        U32 const maxRep = current - windowLow;
+        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
+        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+    }
+
+    /* Outer Loop: one iteration per match found and stored */
+    while (1) {
+        step = 1;
+        nextStep = ip + kStepIncr;
+        ip1 = ip + step;
+
+        if (ip1 > ilimit) {
+            goto _cleanup;
+        }
+
+        hl0 = ZSTD_hashPtr(ip, hBitsL, 8);
+        idxl0 = hashLong[hl0];
+        matchl0 = base + idxl0;
+
+        /* Inner Loop: one iteration per search / position */
+        do {
+            const size_t hs0 = ZSTD_hashPtr(ip, hBitsS, mls);
+            const U32 idxs0 = hashSmall[hs0];
+            curr = (U32)(ip-base);
+            matchs0 = base + idxs0;
+
+            hashLong[hl0] = hashSmall[hs0] = curr;   /* update hash tables */
+
+            /* check noDict repcode */
+            if ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1))) {
+                mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
+                ip++;
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
+                goto _match_stored;
+            }
+
+            hl1 = ZSTD_hashPtr(ip1, hBitsL, 8);
+
+            if (idxl0 > prefixLowestIndex) {
+                /* check prefix long match */
+                if (MEM_read64(matchl0) == MEM_read64(ip)) {
+                    mLength = ZSTD_count(ip+8, matchl0+8, iend) + 8;
+                    offset = (U32)(ip-matchl0);
+                    while (((ip>anchor) & (matchl0>prefixLowest)) && (ip[-1] == matchl0[-1])) { ip--; matchl0--; mLength++; } /* catch up */
+                    goto _match_found;
+                }
+            }
+
+            idxl1 = hashLong[hl1];
+            matchl1 = base + idxl1;
+
+            if (idxs0 > prefixLowestIndex) {
+                /* check prefix short match */
+                if (MEM_read32(matchs0) == MEM_read32(ip)) {
+                    goto _search_next_long;
+                }
+            }
+
+            if (ip1 >= nextStep) {
+                PREFETCH_L1(ip1 + 64);
+                PREFETCH_L1(ip1 + 128);
+                step++;
+                nextStep += kStepIncr;
+            }
+            ip = ip1;
+            ip1 += step;
+
+            hl0 = hl1;
+            idxl0 = idxl1;
+            matchl0 = matchl1;
+    #if defined(__aarch64__)
+            PREFETCH_L1(ip+256);
+    #endif
+        } while (ip1 <= ilimit);
+
+_cleanup:
+        /* save reps for next block */
+        rep[0] = offset_1 ? offset_1 : offsetSaved;
+        rep[1] = offset_2 ? offset_2 : offsetSaved;
+
+        /* Return the last literals size */
+        return (size_t)(iend - anchor);
+
+_search_next_long:
+
+        /* check prefix long +1 match */
+        if (idxl1 > prefixLowestIndex) {
+            if (MEM_read64(matchl1) == MEM_read64(ip1)) {
+                ip = ip1;
+                mLength = ZSTD_count(ip+8, matchl1+8, iend) + 8;
+                offset = (U32)(ip-matchl1);
+                while (((ip>anchor) & (matchl1>prefixLowest)) && (ip[-1] == matchl1[-1])) { ip--; matchl1--; mLength++; } /* catch up */
+                goto _match_found;
+            }
+        }
+
+        /* if no long +1 match, explore the short match we found */
+        mLength = ZSTD_count(ip+4, matchs0+4, iend) + 4;
+        offset = (U32)(ip - matchs0);
+        while (((ip>anchor) & (matchs0>prefixLowest)) && (ip[-1] == matchs0[-1])) { ip--; matchs0--; mLength++; } /* catch up */
+
+        /* fall-through */
+
+_match_found: /* requires ip, offset, mLength */
+        offset_2 = offset_1;
+        offset_1 = offset;
+
+        if (step < 4) {
+            /* It is unsafe to write this value back to the hashtable when ip1 is
+             * greater than or equal to the new ip we will have after we're done
+             * processing this match. Rather than perform that test directly
+             * (ip1 >= ip + mLength), which costs speed in practice, we do a simpler
+             * more predictable test. The minmatch even if we take a short match is
+             * 4 bytes, so as long as step, the distance between ip and ip1
+             * (initially) is less than 4, we know ip1 < new ip. */
+            hashLong[hl1] = (U32)(ip1 - base);
+        }
+
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
+
+_match_stored:
+        /* match found */
+        ip += mLength;
+        anchor = ip;
+
+        if (ip <= ilimit) {
+            /* Complementary insertion */
+            /* done after iLimit test, as candidates could be > iend-8 */
+            {   U32 const indexToInsert = curr+2;
+                hashLong[ZSTD_hashPtr(base+indexToInsert, hBitsL, 8)] = indexToInsert;
+                hashLong[ZSTD_hashPtr(ip-2, hBitsL, 8)] = (U32)(ip-2-base);
+                hashSmall[ZSTD_hashPtr(base+indexToInsert, hBitsS, mls)] = indexToInsert;
+                hashSmall[ZSTD_hashPtr(ip-1, hBitsS, mls)] = (U32)(ip-1-base);
+            }
+
+            /* check immediate repcode */
+            while ( (ip <= ilimit)
+                 && ( (offset_2>0)
+                    & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
+                /* store sequence */
+                size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
+                U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
+                hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
+                hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, rLength);
+                ip += rLength;
+                anchor = ip;
+                continue;   /* faster when present ... (?) */
+            }
+        }
+    }
+}
+
+
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_compressBlock_doubleFast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
-        U32 const mls /* template */, ZSTD_dictMode_e const dictMode)
+        U32 const mls /* template */)
 {
     ZSTD_compressionParameters const* cParams = &ms->cParams;
     U32* const hashLong = ms->hashTable;
@@ -72,54 +278,30 @@ size_t ZSTD_compressBlock_doubleFast_generic(
     U32 offsetSaved = 0;
 
     const ZSTD_matchState_t* const dms = ms->dictMatchState;
-    const ZSTD_compressionParameters* const dictCParams =
-                                     dictMode == ZSTD_dictMatchState ?
-                                     &dms->cParams : NULL;
-    const U32* const dictHashLong  = dictMode == ZSTD_dictMatchState ?
-                                     dms->hashTable : NULL;
-    const U32* const dictHashSmall = dictMode == ZSTD_dictMatchState ?
-                                     dms->chainTable : NULL;
-    const U32 dictStartIndex       = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.dictLimit : 0;
-    const BYTE* const dictBase     = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.base : NULL;
-    const BYTE* const dictStart    = dictMode == ZSTD_dictMatchState ?
-                                     dictBase + dictStartIndex : NULL;
-    const BYTE* const dictEnd      = dictMode == ZSTD_dictMatchState ?
-                                     dms->window.nextSrc : NULL;
-    const U32 dictIndexDelta       = dictMode == ZSTD_dictMatchState ?
-                                     prefixLowestIndex - (U32)(dictEnd - dictBase) :
-                                     0;
-    const U32 dictHBitsL           = dictMode == ZSTD_dictMatchState ?
-                                     dictCParams->hashLog : hBitsL;
-    const U32 dictHBitsS           = dictMode == ZSTD_dictMatchState ?
-                                     dictCParams->chainLog : hBitsS;
+    const ZSTD_compressionParameters* const dictCParams = &dms->cParams;
+    const U32* const dictHashLong  = dms->hashTable;
+    const U32* const dictHashSmall = dms->chainTable;
+    const U32 dictStartIndex       = dms->window.dictLimit;
+    const BYTE* const dictBase     = dms->window.base;
+    const BYTE* const dictStart    = dictBase + dictStartIndex;
+    const BYTE* const dictEnd      = dms->window.nextSrc;
+    const U32 dictIndexDelta       = prefixLowestIndex - (U32)(dictEnd - dictBase);
+    const U32 dictHBitsL           = dictCParams->hashLog;
+    const U32 dictHBitsS           = dictCParams->chainLog;
     const U32 dictAndPrefixLength  = (U32)((ip - prefixLowest) + (dictEnd - dictStart));
 
-    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_generic");
-
-    assert(dictMode == ZSTD_noDict || dictMode == ZSTD_dictMatchState);
+    DEBUGLOG(5, "ZSTD_compressBlock_doubleFast_dictMatchState_generic");
 
     /* if a dictionary is attached, it must be within window range */
-    if (dictMode == ZSTD_dictMatchState) {
-        assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
-    }
+    assert(ms->window.dictLimit + (1U << cParams->windowLog) >= endIndex);
 
     /* init */
     ip += (dictAndPrefixLength == 0);
-    if (dictMode == ZSTD_noDict) {
-        U32 const curr = (U32)(ip - base);
-        U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
-        U32 const maxRep = curr - windowLow;
-        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
-        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
-    }
-    if (dictMode == ZSTD_dictMatchState) {
-        /* dictMatchState repCode checks don't currently handle repCode == 0
-         * disabling. */
-        assert(offset_1 <= dictAndPrefixLength);
-        assert(offset_2 <= dictAndPrefixLength);
-    }
+
+    /* dictMatchState repCode checks don't currently handle repCode == 0
+     * disabling. */
+    assert(offset_1 <= dictAndPrefixLength);
+    assert(offset_2 <= dictAndPrefixLength);
 
     /* Main Search Loop */
     while (ip < ilimit) {   /* < instead of <=, because repcode check at (ip+1) */
@@ -135,29 +317,18 @@ size_t ZSTD_compressBlock_doubleFast_generic(
         const BYTE* matchLong = base + matchIndexL;
         const BYTE* match = base + matchIndexS;
         const U32 repIndex = curr + 1 - offset_1;
-        const BYTE* repMatch = (dictMode == ZSTD_dictMatchState
-                            && repIndex < prefixLowestIndex) ?
+        const BYTE* repMatch = (repIndex < prefixLowestIndex) ?
                                dictBase + (repIndex - dictIndexDelta) :
                                base + repIndex;
         hashLong[h2] = hashSmall[h] = curr;   /* update hash tables */
 
-        /* check dictMatchState repcode */
-        if (dictMode == ZSTD_dictMatchState
-            && ((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
+        /* check repcode */
+        if (((U32)((prefixLowestIndex-1) - repIndex) >= 3 /* intentional underflow */)
             && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
-            goto _match_stored;
-        }
-
-        /* check noDict repcode */
-        if ( dictMode == ZSTD_noDict
-          && ((offset_1 > 0) & (MEM_read32(ip+1-offset_1) == MEM_read32(ip+1)))) {
-            mLength = ZSTD_count(ip+1+4, ip+1+4-offset_1, iend) + 4;
-            ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
             goto _match_stored;
         }
 
@@ -169,7 +340,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
                 while (((ip>anchor) & (matchLong>prefixLowest)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; } /* catch up */
                 goto _match_found;
             }
-        } else if (dictMode == ZSTD_dictMatchState) {
+        } else {
             /* check dictMatchState long match */
             U32 const dictMatchIndexL = dictHashLong[dictHL];
             const BYTE* dictMatchL = dictBase + dictMatchIndexL;
@@ -187,7 +358,7 @@ size_t ZSTD_compressBlock_doubleFast_generic(
             if (MEM_read32(match) == MEM_read32(ip)) {
                 goto _search_next_long;
             }
-        } else if (dictMode == ZSTD_dictMatchState) {
+        } else {
             /* check dictMatchState short match */
             U32 const dictMatchIndexS = dictHashSmall[dictHS];
             match = dictBase + dictMatchIndexS;
@@ -220,7 +391,7 @@ _search_next_long:
                     while (((ip>anchor) & (matchL3>prefixLowest)) && (ip[-1] == matchL3[-1])) { ip--; matchL3--; mLength++; } /* catch up */
                     goto _match_found;
                 }
-            } else if (dictMode == ZSTD_dictMatchState) {
+            } else {
                 /* check dict long +1 match */
                 U32 const dictMatchIndexL3 = dictHashLong[dictHLNext];
                 const BYTE* dictMatchL3 = dictBase + dictMatchIndexL3;
@@ -234,7 +405,7 @@ _search_next_long:
         }   }   }
 
         /* if no long +1 match, explore the short match we found */
-        if (dictMode == ZSTD_dictMatchState && matchIndexS < prefixLowestIndex) {
+        if (matchIndexS < prefixLowestIndex) {
             mLength = ZSTD_count_2segments(ip+4, match+4, iend, dictEnd, prefixLowest) + 4;
             offset = (U32)(curr - matchIndexS);
             while (((ip>anchor) & (match>dictStart)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
@@ -248,7 +419,7 @@ _match_found:
         offset_2 = offset_1;
         offset_1 = offset;
 
-        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+        ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
 _match_stored:
         /* match found */
@@ -266,43 +437,27 @@ _match_stored:
             }
 
             /* check immediate repcode */
-            if (dictMode == ZSTD_dictMatchState) {
-                while (ip <= ilimit) {
-                    U32 const current2 = (U32)(ip-base);
-                    U32 const repIndex2 = current2 - offset_2;
-                    const BYTE* repMatch2 = dictMode == ZSTD_dictMatchState
-                        && repIndex2 < prefixLowestIndex ?
-                            dictBase + repIndex2 - dictIndexDelta :
-                            base + repIndex2;
-                    if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
-                       && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
-                        const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
-                        size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
-                        U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                        ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
-                        hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
-                        hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
-                        ip += repLength2;
-                        anchor = ip;
-                        continue;
-                    }
-                    break;
-            }   }
-
-            if (dictMode == ZSTD_noDict) {
-                while ( (ip <= ilimit)
-                     && ( (offset_2>0)
-                        & (MEM_read32(ip) == MEM_read32(ip - offset_2)) )) {
-                    /* store sequence */
-                    size_t const rLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                    U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff;  /* swap offset_2 <=> offset_1 */
-                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = (U32)(ip-base);
-                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = (U32)(ip-base);
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, rLength-MINMATCH);
-                    ip += rLength;
+            while (ip <= ilimit) {
+                U32 const current2 = (U32)(ip-base);
+                U32 const repIndex2 = current2 - offset_2;
+                const BYTE* repMatch2 = repIndex2 < prefixLowestIndex ?
+                        dictBase + repIndex2 - dictIndexDelta :
+                        base + repIndex2;
+                if ( ((U32)((prefixLowestIndex-1) - (U32)repIndex2) >= 3 /* intentional overflow */)
+                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
+                    const BYTE* const repEnd2 = repIndex2 < prefixLowestIndex ? dictEnd : iend;
+                    size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixLowest) + 4;
+                    U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
+                    hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
+                    hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
+                    ip += repLength2;
                     anchor = ip;
-                    continue;   /* faster when present ... (?) */
-        }   }   }
+                    continue;
+                }
+                break;
+            }
+        }
     }   /* while (ip < ilimit) */
 
     /* save reps for next block */
@@ -313,6 +468,24 @@ _match_stored:
     return (size_t)(iend - anchor);
 }
 
+#define ZSTD_GEN_DFAST_FN(dictMode, mls)                                                                 \
+    static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls(                                      \
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                          \
+            void const* src, size_t srcSize)                                                             \
+    {                                                                                                    \
+        return ZSTD_compressBlock_doubleFast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls); \
+    }
+
+ZSTD_GEN_DFAST_FN(noDict, 4)
+ZSTD_GEN_DFAST_FN(noDict, 5)
+ZSTD_GEN_DFAST_FN(noDict, 6)
+ZSTD_GEN_DFAST_FN(noDict, 7)
+
+ZSTD_GEN_DFAST_FN(dictMatchState, 4)
+ZSTD_GEN_DFAST_FN(dictMatchState, 5)
+ZSTD_GEN_DFAST_FN(dictMatchState, 6)
+ZSTD_GEN_DFAST_FN(dictMatchState, 7)
+
 
 size_t ZSTD_compressBlock_doubleFast(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -323,13 +496,13 @@ size_t ZSTD_compressBlock_doubleFast(
     {
     default: /* includes case 3 */
     case 4 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
+        return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
     case 5 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
+        return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
     case 6 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
+        return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
     case 7 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
+        return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
     }
 }
 
@@ -343,13 +516,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
     {
     default: /* includes case 3 */
     case 4 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
     case 5 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
     case 6 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
     case 7 :
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
+        return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
     }
 }
 
@@ -385,7 +558,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
 
     /* if extDict is invalidated due to maxDistance, switch to "regular" variant */
     if (prefixStartIndex == dictStartIndex)
-        return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
+        return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);
 
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
@@ -407,12 +580,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
         hashSmall[hSmall] = hashLong[hLong] = curr;   /* update hash table */
 
         if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
-            & (repIndex > dictStartIndex))
+            & (offset_1 <= curr+1 - dictStartIndex)) /* note: we are searching at curr+1 */
           && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         } else {
             if ((matchLongIndex > dictStartIndex) && (MEM_read64(matchLong) == MEM_read64(ip))) {
                 const BYTE* const matchEnd = matchLongIndex < prefixStartIndex ? dictEnd : iend;
@@ -423,7 +596,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 while (((ip>anchor) & (matchLong>lowMatchPtr)) && (ip[-1] == matchLong[-1])) { ip--; matchLong--; mLength++; }   /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
             } else if ((matchIndex > dictStartIndex) && (MEM_read32(match) == MEM_read32(ip))) {
                 size_t const h3 = ZSTD_hashPtr(ip+1, hBitsL, 8);
@@ -448,7 +621,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 }
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
 
             } else {
                 ip += ((ip-anchor) >> kSearchStrength) + 1;
@@ -475,12 +648,12 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
                 if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3)   /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
-                    & (repIndex2 > dictStartIndex))
+                    & (offset_2 <= current2 - dictStartIndex))
                   && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashSmall[ZSTD_hashPtr(ip, hBitsS, mls)] = current2;
                     hashLong[ZSTD_hashPtr(ip, hBitsL, 8)] = current2;
                     ip += repLength2;
@@ -498,6 +671,10 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
     return (size_t)(iend - anchor);
 }
 
+ZSTD_GEN_DFAST_FN(extDict, 4)
+ZSTD_GEN_DFAST_FN(extDict, 5)
+ZSTD_GEN_DFAST_FN(extDict, 6)
+ZSTD_GEN_DFAST_FN(extDict, 7)
 
 size_t ZSTD_compressBlock_doubleFast_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -508,12 +685,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
     {
     default: /* includes case 3 */
     case 4 :
-        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+        return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
     case 5 :
-        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+        return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
     case 6 :
-        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+        return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
     case 7 :
-        return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+        return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
     }
 }
diff --git a/lib/zstd/compress/zstd_fast.c b/lib/zstd/compress/zstd_fast.c
index 96b7d48e2868e..a752e6beab52e 100644
--- a/lib/zstd/compress/zstd_fast.c
+++ b/lib/zstd/compress/zstd_fast.c
@@ -43,145 +43,294 @@ void ZSTD_fillHashTable(ZSTD_matchState_t* ms,
 }
 
 
+/*
+ * If you squint hard enough (and ignore repcodes), the search operation at any
+ * given position is broken into 4 stages:
+ *
+ * 1. Hash   (map position to hash value via input read)
+ * 2. Lookup (map hash val to index via hashtable read)
+ * 3. Load   (map index to value at that position via input read)
+ * 4. Compare
+ *
+ * Each of these steps involves a memory read at an address which is computed
+ * from the previous step. This means these steps must be sequenced and their
+ * latencies are cumulative.
+ *
+ * Rather than do 1->2->3->4 sequentially for a single position before moving
+ * onto the next, this implementation interleaves these operations across the
+ * next few positions:
+ *
+ * R = Repcode Read & Compare
+ * H = Hash
+ * T = Table Lookup
+ * M = Match Read & Compare
+ *
+ * Pos | Time -->
+ * ----+-------------------
+ * N   | ... M
+ * N+1 | ...   TM
+ * N+2 |    R H   T M
+ * N+3 |         H    TM
+ * N+4 |           R H   T M
+ * N+5 |                H   ...
+ * N+6 |                  R ...
+ *
+ * This is very much analogous to the pipelining of execution in a CPU. And just
+ * like a CPU, we have to dump the pipeline when we find a match (i.e., take a
+ * branch).
+ *
+ * When this happens, we throw away our current state, and do the following prep
+ * to re-enter the loop:
+ *
+ * Pos | Time -->
+ * ----+-------------------
+ * N   | H T
+ * N+1 |  H
+ *
+ * This is also the work we do at the beginning to enter the loop initially.
+ */
 FORCE_INLINE_TEMPLATE size_t
-ZSTD_compressBlock_fast_generic(
+ZSTD_compressBlock_fast_noDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize,
-        U32 const mls)
+        U32 const mls, U32 const hasStep)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32* const hashTable = ms->hashTable;
     U32 const hlog = cParams->hashLog;
     /* support stepSize of 0 */
-    size_t const stepSize = cParams->targetLength + !(cParams->targetLength) + 1;
+    size_t const stepSize = hasStep ? (cParams->targetLength + !(cParams->targetLength) + 1) : 2;
     const BYTE* const base = ms->window.base;
     const BYTE* const istart = (const BYTE*)src;
-    /* We check ip0 (ip + 0) and ip1 (ip + 1) each loop */
-    const BYTE* ip0 = istart;
-    const BYTE* ip1;
-    const BYTE* anchor = istart;
     const U32   endIndex = (U32)((size_t)(istart - base) + srcSize);
     const U32   prefixStartIndex = ZSTD_getLowestPrefixIndex(ms, endIndex, cParams->windowLog);
     const BYTE* const prefixStart = base + prefixStartIndex;
     const BYTE* const iend = istart + srcSize;
     const BYTE* const ilimit = iend - HASH_READ_SIZE;
-    U32 offset_1=rep[0], offset_2=rep[1];
+
+    const BYTE* anchor = istart;
+    const BYTE* ip0 = istart;
+    const BYTE* ip1;
+    const BYTE* ip2;
+    const BYTE* ip3;
+    U32 current0;
+
+    U32 rep_offset1 = rep[0];
+    U32 rep_offset2 = rep[1];
     U32 offsetSaved = 0;
 
-    /* init */
+    size_t hash0; /* hash for ip0 */
+    size_t hash1; /* hash for ip1 */
+    U32 idx; /* match idx for ip0 */
+    U32 mval; /* src value at match idx */
+
+    U32 offcode;
+    const BYTE* match0;
+    size_t mLength;
+
+    /* ip0 and ip1 are always adjacent. The targetLength skipping and
+     * uncompressibility acceleration is applied to every other position,
+     * matching the behavior of #1562. step therefore represents the gap
+     * between pairs of positions, from ip0 to ip2 or ip1 to ip3. */
+    size_t step;
+    const BYTE* nextStep;
+    const size_t kStepIncr = (1 << (kSearchStrength - 1));
+
     DEBUGLOG(5, "ZSTD_compressBlock_fast_generic");
     ip0 += (ip0 == prefixStart);
-    ip1 = ip0 + 1;
     {   U32 const curr = (U32)(ip0 - base);
         U32 const windowLow = ZSTD_getLowestPrefixIndex(ms, curr, cParams->windowLog);
         U32 const maxRep = curr - windowLow;
-        if (offset_2 > maxRep) offsetSaved = offset_2, offset_2 = 0;
-        if (offset_1 > maxRep) offsetSaved = offset_1, offset_1 = 0;
+        if (rep_offset2 > maxRep) offsetSaved = rep_offset2, rep_offset2 = 0;
+        if (rep_offset1 > maxRep) offsetSaved = rep_offset1, rep_offset1 = 0;
     }
 
-    /* Main Search Loop */
-#ifdef __INTEL_COMPILER
-    /* From intel 'The vector pragma indicates that the loop should be
-     * vectorized if it is legal to do so'. Can be used together with
-     * #pragma ivdep (but have opted to exclude that because intel
-     * warns against using it).*/
-    #pragma vector always
-#endif
-    while (ip1 < ilimit) {   /* < instead of <=, because check at ip0+2 */
-        size_t mLength;
-        BYTE const* ip2 = ip0 + 2;
-        size_t const h0 = ZSTD_hashPtr(ip0, hlog, mls);
-        U32 const val0 = MEM_read32(ip0);
-        size_t const h1 = ZSTD_hashPtr(ip1, hlog, mls);
-        U32 const val1 = MEM_read32(ip1);
-        U32 const current0 = (U32)(ip0-base);
-        U32 const current1 = (U32)(ip1-base);
-        U32 const matchIndex0 = hashTable[h0];
-        U32 const matchIndex1 = hashTable[h1];
-        BYTE const* repMatch = ip2 - offset_1;
-        const BYTE* match0 = base + matchIndex0;
-        const BYTE* match1 = base + matchIndex1;
-        U32 offcode;
-
-#if defined(__aarch64__)
-        PREFETCH_L1(ip0+256);
-#endif
-
-        hashTable[h0] = current0;   /* update hash table */
-        hashTable[h1] = current1;   /* update hash table */
-
-        assert(ip0 + 1 == ip1);
-
-        if ((offset_1 > 0) & (MEM_read32(repMatch) == MEM_read32(ip2))) {
-            mLength = (ip2[-1] == repMatch[-1]) ? 1 : 0;
-            ip0 = ip2 - mLength;
-            match0 = repMatch - mLength;
+    /* start each op */
+_start: /* Requires: ip0 */
+
+    step = stepSize;
+    nextStep = ip0 + kStepIncr;
+
+    /* calculate positions, ip0 - anchor == 0, so we skip step calc */
+    ip1 = ip0 + 1;
+    ip2 = ip0 + step;
+    ip3 = ip2 + 1;
+
+    if (ip3 >= ilimit) {
+        goto _cleanup;
+    }
+
+    hash0 = ZSTD_hashPtr(ip0, hlog, mls);
+    hash1 = ZSTD_hashPtr(ip1, hlog, mls);
+
+    idx = hashTable[hash0];
+
+    do {
+        /* load repcode match for ip[2]*/
+        const U32 rval = MEM_read32(ip2 - rep_offset1);
+
+        /* write back hash table entry */
+        current0 = (U32)(ip0 - base);
+        hashTable[hash0] = current0;
+
+        /* check repcode at ip[2] */
+        if ((MEM_read32(ip2) == rval) & (rep_offset1 > 0)) {
+            ip0 = ip2;
+            match0 = ip0 - rep_offset1;
+            mLength = ip0[-1] == match0[-1];
+            ip0 -= mLength;
+            match0 -= mLength;
+            offcode = STORE_REPCODE_1;
             mLength += 4;
-            offcode = 0;
             goto _match;
         }
-        if ((matchIndex0 > prefixStartIndex) && MEM_read32(match0) == val0) {
-            /* found a regular match */
-            goto _offset;
+
+        /* load match for ip[0] */
+        if (idx >= prefixStartIndex) {
+            mval = MEM_read32(base + idx);
+        } else {
+            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         }
-        if ((matchIndex1 > prefixStartIndex) && MEM_read32(match1) == val1) {
-            /* found a regular match after one literal */
-            ip0 = ip1;
-            match0 = match1;
+
+        /* check match at ip[0] */
+        if (MEM_read32(ip0) == mval) {
+            /* found a match! */
             goto _offset;
         }
-        {   size_t const step = ((size_t)(ip0-anchor) >> (kSearchStrength - 1)) + stepSize;
-            assert(step >= 2);
-            ip0 += step;
-            ip1 += step;
-            continue;
+
+        /* lookup ip[1] */
+        idx = hashTable[hash1];
+
+        /* hash ip[2] */
+        hash0 = hash1;
+        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
+
+        /* advance to next positions */
+        ip0 = ip1;
+        ip1 = ip2;
+        ip2 = ip3;
+
+        /* write back hash table entry */
+        current0 = (U32)(ip0 - base);
+        hashTable[hash0] = current0;
+
+        /* load match for ip[0] */
+        if (idx >= prefixStartIndex) {
+            mval = MEM_read32(base + idx);
+        } else {
+            mval = MEM_read32(ip0) ^ 1; /* guaranteed to not match. */
         }
-_offset: /* Requires: ip0, match0 */
-        /* Compute the offset code */
-        offset_2 = offset_1;
-        offset_1 = (U32)(ip0-match0);
-        offcode = offset_1 + ZSTD_REP_MOVE;
-        mLength = 4;
-        /* Count the backwards match length */
-        while (((ip0>anchor) & (match0>prefixStart))
-             && (ip0[-1] == match0[-1])) { ip0--; match0--; mLength++; } /* catch up */
 
-_match: /* Requires: ip0, match0, offcode */
-        /* Count the forward length */
-        mLength += ZSTD_count(ip0+mLength, match0+mLength, iend);
-        ZSTD_storeSeq(seqStore, (size_t)(ip0-anchor), anchor, iend, offcode, mLength-MINMATCH);
-        /* match found */
-        ip0 += mLength;
-        anchor = ip0;
+        /* check match at ip[0] */
+        if (MEM_read32(ip0) == mval) {
+            /* found a match! */
+            goto _offset;
+        }
 
-        if (ip0 <= ilimit) {
-            /* Fill Table */
-            assert(base+current0+2 > istart);  /* check base overflow */
-            hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
-            hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
-
-            if (offset_2 > 0) { /* offset_2==0 means offset_2 is invalidated */
-                while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - offset_2)) ) {
-                    /* store sequence */
-                    size_t const rLength = ZSTD_count(ip0+4, ip0+4-offset_2, iend) + 4;
-                    { U32 const tmpOff = offset_2; offset_2 = offset_1; offset_1 = tmpOff; } /* swap offset_2 <=> offset_1 */
-                    hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
-                    ip0 += rLength;
-                    ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, 0 /*offCode*/, rLength-MINMATCH);
-                    anchor = ip0;
-                    continue;   /* faster when present (confirmed on gcc-8) ... (?) */
-        }   }   }
-        ip1 = ip0 + 1;
-    }
+        /* lookup ip[1] */
+        idx = hashTable[hash1];
+
+        /* hash ip[2] */
+        hash0 = hash1;
+        hash1 = ZSTD_hashPtr(ip2, hlog, mls);
+
+        /* advance to next positions */
+        ip0 = ip1;
+        ip1 = ip2;
+        ip2 = ip0 + step;
+        ip3 = ip1 + step;
+
+        /* calculate step */
+        if (ip2 >= nextStep) {
+            step++;
+            PREFETCH_L1(ip1 + 64);
+            PREFETCH_L1(ip1 + 128);
+            nextStep += kStepIncr;
+        }
+    } while (ip3 < ilimit);
+
+_cleanup:
+    /* Note that there are probably still a couple positions we could search.
+     * However, it seems to be a meaningful performance hit to try to search
+     * them. So let's not. */
 
     /* save reps for next block */
-    rep[0] = offset_1 ? offset_1 : offsetSaved;
-    rep[1] = offset_2 ? offset_2 : offsetSaved;
+    rep[0] = rep_offset1 ? rep_offset1 : offsetSaved;
+    rep[1] = rep_offset2 ? rep_offset2 : offsetSaved;
 
     /* Return the last literals size */
     return (size_t)(iend - anchor);
+
+_offset: /* Requires: ip0, idx */
+
+    /* Compute the offset code. */
+    match0 = base + idx;
+    rep_offset2 = rep_offset1;
+    rep_offset1 = (U32)(ip0-match0);
+    offcode = STORE_OFFSET(rep_offset1);
+    mLength = 4;
+
+    /* Count the backwards match length. */
+    while (((ip0>anchor) & (match0>prefixStart)) && (ip0[-1] == match0[-1])) {
+        ip0--;
+        match0--;
+        mLength++;
+    }
+
+_match: /* Requires: ip0, match0, offcode */
+
+    /* Count the forward length. */
+    mLength += ZSTD_count(ip0 + mLength, match0 + mLength, iend);
+
+    ZSTD_storeSeq(seqStore, (size_t)(ip0 - anchor), anchor, iend, offcode, mLength);
+
+    ip0 += mLength;
+    anchor = ip0;
+
+    /* write next hash table entry */
+    if (ip1 < ip0) {
+        hashTable[hash1] = (U32)(ip1 - base);
+    }
+
+    /* Fill table and check for immediate repcode. */
+    if (ip0 <= ilimit) {
+        /* Fill Table */
+        assert(base+current0+2 > istart);  /* check base overflow */
+        hashTable[ZSTD_hashPtr(base+current0+2, hlog, mls)] = current0+2;  /* here because current+2 could be > iend-8 */
+        hashTable[ZSTD_hashPtr(ip0-2, hlog, mls)] = (U32)(ip0-2-base);
+
+        if (rep_offset2 > 0) { /* rep_offset2==0 means rep_offset2 is invalidated */
+            while ( (ip0 <= ilimit) && (MEM_read32(ip0) == MEM_read32(ip0 - rep_offset2)) ) {
+                /* store sequence */
+                size_t const rLength = ZSTD_count(ip0+4, ip0+4-rep_offset2, iend) + 4;
+                { U32 const tmpOff = rep_offset2; rep_offset2 = rep_offset1; rep_offset1 = tmpOff; } /* swap rep_offset2 <=> rep_offset1 */
+                hashTable[ZSTD_hashPtr(ip0, hlog, mls)] = (U32)(ip0-base);
+                ip0 += rLength;
+                ZSTD_storeSeq(seqStore, 0 /*litLen*/, anchor, iend, STORE_REPCODE_1, rLength);
+                anchor = ip0;
+                continue;   /* faster when present (confirmed on gcc-8) ... (?) */
+    }   }   }
+
+    goto _start;
 }
 
+#define ZSTD_GEN_FAST_FN(dictMode, mls, step)                                                            \
+    static size_t ZSTD_compressBlock_fast_##dictMode##_##mls##_##step(                                      \
+            ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],                    \
+            void const* src, size_t srcSize)                                                       \
+    {                                                                                              \
+        return ZSTD_compressBlock_fast_##dictMode##_generic(ms, seqStore, rep, src, srcSize, mls, step); \
+    }
+
+ZSTD_GEN_FAST_FN(noDict, 4, 1)
+ZSTD_GEN_FAST_FN(noDict, 5, 1)
+ZSTD_GEN_FAST_FN(noDict, 6, 1)
+ZSTD_GEN_FAST_FN(noDict, 7, 1)
+
+ZSTD_GEN_FAST_FN(noDict, 4, 0)
+ZSTD_GEN_FAST_FN(noDict, 5, 0)
+ZSTD_GEN_FAST_FN(noDict, 6, 0)
+ZSTD_GEN_FAST_FN(noDict, 7, 0)
 
 size_t ZSTD_compressBlock_fast(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -189,24 +338,40 @@ size_t ZSTD_compressBlock_fast(
 {
     U32 const mls = ms->cParams.minMatch;
     assert(ms->dictMatchState == NULL);
-    switch(mls)
-    {
-    default: /* includes case 3 */
-    case 4 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
-    case 5 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
-    case 6 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
-    case 7 :
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
+    if (ms->cParams.targetLength > 1) {
+        switch(mls)
+        {
+        default: /* includes case 3 */
+        case 4 :
+            return ZSTD_compressBlock_fast_noDict_4_1(ms, seqStore, rep, src, srcSize);
+        case 5 :
+            return ZSTD_compressBlock_fast_noDict_5_1(ms, seqStore, rep, src, srcSize);
+        case 6 :
+            return ZSTD_compressBlock_fast_noDict_6_1(ms, seqStore, rep, src, srcSize);
+        case 7 :
+            return ZSTD_compressBlock_fast_noDict_7_1(ms, seqStore, rep, src, srcSize);
+        }
+    } else {
+        switch(mls)
+        {
+        default: /* includes case 3 */
+        case 4 :
+            return ZSTD_compressBlock_fast_noDict_4_0(ms, seqStore, rep, src, srcSize);
+        case 5 :
+            return ZSTD_compressBlock_fast_noDict_5_0(ms, seqStore, rep, src, srcSize);
+        case 6 :
+            return ZSTD_compressBlock_fast_noDict_6_0(ms, seqStore, rep, src, srcSize);
+        case 7 :
+            return ZSTD_compressBlock_fast_noDict_7_0(ms, seqStore, rep, src, srcSize);
+        }
+
     }
 }
 
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_fast_dictMatchState_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize, U32 const mls)
+        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32* const hashTable = ms->hashTable;
@@ -242,6 +407,8 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
     assert(endIndex - prefixStartIndex <= maxDistance);
     (void)maxDistance; (void)endIndex;   /* these variables are not used when assert() is disabled */
 
+    (void)hasStep; /* not currently specialized on whether it's accelerated */
+
     /* ensure there will be no underflow
      * when translating a dict index into a local index */
     assert(prefixStartIndex >= (U32)(dictEnd - dictBase));
@@ -272,7 +439,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, mLength);
         } else if ( (matchIndex <= prefixStartIndex) ) {
             size_t const dictHash = ZSTD_hashPtr(ip, dictHLog, mls);
             U32 const dictMatchIndex = dictHashTable[dictHash];
@@ -292,7 +459,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                 } /* catch up */
                 offset_2 = offset_1;
                 offset_1 = offset;
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
             }
         } else if (MEM_read32(match) != MEM_read32(ip)) {
             /* it's not a match, and we're not going to check the dictionary */
@@ -307,7 +474,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                  && (ip[-1] == match[-1])) { ip--; match--; mLength++; } /* catch up */
             offset_2 = offset_1;
             offset_1 = offset;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
         }
 
         /* match found */
@@ -332,7 +499,7 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     U32 tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
@@ -351,6 +518,12 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
     return (size_t)(iend - anchor);
 }
 
+
+ZSTD_GEN_FAST_FN(dictMatchState, 4, 0)
+ZSTD_GEN_FAST_FN(dictMatchState, 5, 0)
+ZSTD_GEN_FAST_FN(dictMatchState, 6, 0)
+ZSTD_GEN_FAST_FN(dictMatchState, 7, 0)
+
 size_t ZSTD_compressBlock_fast_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize)
@@ -361,20 +534,20 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
     {
     default: /* includes case 3 */
     case 4 :
-        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
+        return ZSTD_compressBlock_fast_dictMatchState_4_0(ms, seqStore, rep, src, srcSize);
     case 5 :
-        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
+        return ZSTD_compressBlock_fast_dictMatchState_5_0(ms, seqStore, rep, src, srcSize);
     case 6 :
-        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
+        return ZSTD_compressBlock_fast_dictMatchState_6_0(ms, seqStore, rep, src, srcSize);
     case 7 :
-        return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
+        return ZSTD_compressBlock_fast_dictMatchState_7_0(ms, seqStore, rep, src, srcSize);
     }
 }
 
 
 static size_t ZSTD_compressBlock_fast_extDict_generic(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
-        void const* src, size_t srcSize, U32 const mls)
+        void const* src, size_t srcSize, U32 const mls, U32 const hasStep)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     U32* const hashTable = ms->hashTable;
@@ -398,11 +571,13 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
     const BYTE* const ilimit = iend - 8;
     U32 offset_1=rep[0], offset_2=rep[1];
 
+    (void)hasStep; /* not currently specialized on whether it's accelerated */
+
     DEBUGLOG(5, "ZSTD_compressBlock_fast_extDict_generic (offset_1=%u)", offset_1);
 
     /* switch to "regular" variant if extDict is invalidated due to maxDistance */
     if (prefixStartIndex == dictStartIndex)
-        return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
+        return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);
 
     /* Search Loop */
     while (ip < ilimit) {  /* < instead of <=, because (ip+1) */
@@ -416,14 +591,14 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
         const BYTE* const repMatch = repBase + repIndex;
         hashTable[h] = curr;   /* update hash table */
         DEBUGLOG(7, "offset_1 = %u , curr = %u", offset_1, curr);
-        assert(offset_1 <= curr +1);   /* check repIndex */
 
-        if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex))
+        if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
+             & (offset_1 <= curr+1 - dictStartIndex) ) /* note: we are searching at curr+1 */
            && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
             const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
             size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
             ip++;
-            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, 0, rLength-MINMATCH);
+            ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_REPCODE_1, rLength);
             ip += rLength;
             anchor = ip;
         } else {
@@ -439,7 +614,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                 size_t mLength = ZSTD_count_2segments(ip+4, match+4, iend, matchEnd, prefixStart) + 4;
                 while (((ip>anchor) & (match>lowMatchPtr)) && (ip[-1] == match[-1])) { ip--; match--; mLength++; }   /* catch up */
                 offset_2 = offset_1; offset_1 = offset;  /* update offset history */
-                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, offset + ZSTD_REP_MOVE, mLength-MINMATCH);
+                ZSTD_storeSeq(seqStore, (size_t)(ip-anchor), anchor, iend, STORE_OFFSET(offset), mLength);
                 ip += mLength;
                 anchor = ip;
         }   }
@@ -453,12 +628,12 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
                 U32 const current2 = (U32)(ip-base);
                 U32 const repIndex2 = current2 - offset_2;
                 const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
-                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex))  /* intentional overflow */
+                if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 <= curr - dictStartIndex))  /* intentional overflow */
                    && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
                     size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;
                     { U32 const tmpOffset = offset_2; offset_2 = offset_1; offset_1 = tmpOffset; }  /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, 0 /*offcode*/, repLength2-MINMATCH);
+                    ZSTD_storeSeq(seqStore, 0 /*litlen*/, anchor, iend, STORE_REPCODE_1, repLength2);
                     hashTable[ZSTD_hashPtr(ip, hlog, mls)] = current2;
                     ip += repLength2;
                     anchor = ip;
@@ -475,6 +650,10 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
     return (size_t)(iend - anchor);
 }
 
+ZSTD_GEN_FAST_FN(extDict, 4, 0)
+ZSTD_GEN_FAST_FN(extDict, 5, 0)
+ZSTD_GEN_FAST_FN(extDict, 6, 0)
+ZSTD_GEN_FAST_FN(extDict, 7, 0)
 
 size_t ZSTD_compressBlock_fast_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -485,12 +664,12 @@ size_t ZSTD_compressBlock_fast_extDict(
     {
     default: /* includes case 3 */
     case 4 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
+        return ZSTD_compressBlock_fast_extDict_4_0(ms, seqStore, rep, src, srcSize);
     case 5 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
+        return ZSTD_compressBlock_fast_extDict_5_0(ms, seqStore, rep, src, srcSize);
     case 6 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
+        return ZSTD_compressBlock_fast_extDict_6_0(ms, seqStore, rep, src, srcSize);
     case 7 :
-        return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
+        return ZSTD_compressBlock_fast_extDict_7_0(ms, seqStore, rep, src, srcSize);
     }
 }
diff --git a/lib/zstd/compress/zstd_lazy.c b/lib/zstd/compress/zstd_lazy.c
index fb54d4e28a2bc..0298a01a7504a 100644
--- a/lib/zstd/compress/zstd_lazy.c
+++ b/lib/zstd/compress/zstd_lazy.c
@@ -61,7 +61,7 @@ ZSTD_updateDUBT(ZSTD_matchState_t* ms,
  *  assumption : curr >= btlow == (curr - btmask)
  *  doesn't fail */
 static void
-ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
+ZSTD_insertDUBT1(const ZSTD_matchState_t* ms,
                  U32 curr, const BYTE* inputEnd,
                  U32 nbCompares, U32 btLow,
                  const ZSTD_dictMode_e dictMode)
@@ -151,7 +151,7 @@ ZSTD_insertDUBT1(ZSTD_matchState_t* ms,
 
 static size_t
 ZSTD_DUBT_findBetterDictMatch (
-        ZSTD_matchState_t* ms,
+        const ZSTD_matchState_t* ms,
         const BYTE* const ip, const BYTE* const iend,
         size_t* offsetPtr,
         size_t bestLength,
@@ -197,8 +197,8 @@ ZSTD_DUBT_findBetterDictMatch (
             U32 matchIndex = dictMatchIndex + dictIndexDelta;
             if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) ) {
                 DEBUGLOG(9, "ZSTD_DUBT_findBetterDictMatch(%u) : found better match length %u -> %u and offsetCode %u -> %u (dictMatchIndex %u, matchIndex %u)",
-                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, ZSTD_REP_MOVE + curr - matchIndex, dictMatchIndex, matchIndex);
-                bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                    curr, (U32)bestLength, (U32)matchLength, (U32)*offsetPtr, STORE_OFFSET(curr - matchIndex), dictMatchIndex, matchIndex);
+                bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
             }
             if (ip+matchLength == iend) {   /* reached end of input : ip[matchLength] is not valid, no way to know if it's larger or smaller than match */
                 break;   /* drop, to guarantee consistency (miss a little bit of compression) */
@@ -218,7 +218,7 @@ ZSTD_DUBT_findBetterDictMatch (
     }
 
     if (bestLength >= MINMATCH) {
-        U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+        U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
         DEBUGLOG(8, "ZSTD_DUBT_findBetterDictMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                     curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
     }
@@ -328,7 +328,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
                 if (matchLength > matchEndIdx - matchIndex)
                     matchEndIdx = matchIndex + (U32)matchLength;
                 if ( (4*(int)(matchLength-bestLength)) > (int)(ZSTD_highbit32(curr-matchIndex+1) - ZSTD_highbit32((U32)offsetPtr[0]+1)) )
-                    bestLength = matchLength, *offsetPtr = ZSTD_REP_MOVE + curr - matchIndex;
+                    bestLength = matchLength, *offsetPtr = STORE_OFFSET(curr - matchIndex);
                 if (ip+matchLength == iend) {   /* equal : no way to know if inf or sup */
                     if (dictMode == ZSTD_dictMatchState) {
                         nbCompares = 0; /* in addition to avoiding checking any
@@ -368,7 +368,7 @@ ZSTD_DUBT_findBestMatch(ZSTD_matchState_t* ms,
         assert(matchEndIdx > curr+8); /* ensure nextToUpdate is increased */
         ms->nextToUpdate = matchEndIdx - 8;   /* skip repetitive patterns */
         if (bestLength >= MINMATCH) {
-            U32 const mIndex = curr - ((U32)*offsetPtr - ZSTD_REP_MOVE); (void)mIndex;
+            U32 const mIndex = curr - (U32)STORED_OFFSET(*offsetPtr); (void)mIndex;
             DEBUGLOG(8, "ZSTD_DUBT_findBestMatch(%u) : found match of length %u and offsetCode %u (pos %u)",
                         curr, (U32)bestLength, (U32)*offsetPtr, mIndex);
         }
@@ -391,91 +391,9 @@ ZSTD_BtFindBestMatch( ZSTD_matchState_t* ms,
     return ZSTD_DUBT_findBestMatch(ms, ip, iLimit, offsetPtr, mls, dictMode);
 }
 
-
-static size_t
-ZSTD_BtFindBestMatch_selectMLS (  ZSTD_matchState_t* ms,
-                            const BYTE* ip, const BYTE* const iLimit,
-                                  size_t* offsetPtr)
-{
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
-    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
-    case 7 :
-    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
-    }
-}
-
-
-static size_t ZSTD_BtFindBestMatch_dictMatchState_selectMLS (
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
-{
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
-    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
-    case 7 :
-    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
-    }
-}
-
-
-static size_t ZSTD_BtFindBestMatch_extDict_selectMLS (
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
-{
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
-    case 5 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
-    case 7 :
-    case 6 : return ZSTD_BtFindBestMatch(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
-    }
-}
-
-
-
 /* *********************************
-*  Hash Chain
+* Dedicated dict search
 ***********************************/
-#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
-
-/* Update chains up to ip (excluded)
-   Assumption : always within prefix (i.e. not within extDict) */
-FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
-                        ZSTD_matchState_t* ms,
-                        const ZSTD_compressionParameters* const cParams,
-                        const BYTE* ip, U32 const mls)
-{
-    U32* const hashTable  = ms->hashTable;
-    const U32 hashLog = cParams->hashLog;
-    U32* const chainTable = ms->chainTable;
-    const U32 chainMask = (1 << cParams->chainLog) - 1;
-    const BYTE* const base = ms->window.base;
-    const U32 target = (U32)(ip - base);
-    U32 idx = ms->nextToUpdate;
-
-    while(idx < target) { /* catch up */
-        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
-        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
-        hashTable[h] = idx;
-        idx++;
-    }
-
-    ms->nextToUpdate = target;
-    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
-}
-
-U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
-}
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip)
 {
@@ -485,7 +403,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
     U32* const chainTable = ms->chainTable;
     U32 const chainSize = 1 << ms->cParams.chainLog;
     U32 idx = ms->nextToUpdate;
-    U32 const minChain = chainSize < target ? target - chainSize : idx;
+    U32 const minChain = chainSize < target - idx ? target - chainSize : idx;
     U32 const bucketSize = 1 << ZSTD_LAZY_DDSS_BUCKET_LOG;
     U32 const cacheSize = bucketSize - 1;
     U32 const chainAttempts = (1 << ms->cParams.searchLog) - cacheSize;
@@ -499,13 +417,12 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
     U32 const hashLog = ms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
     U32* const tmpHashTable = hashTable;
     U32* const tmpChainTable = hashTable + ((size_t)1 << hashLog);
-    U32 const tmpChainSize = ((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
+    U32 const tmpChainSize = (U32)((1 << ZSTD_LAZY_DDSS_BUCKET_LOG) - 1) << hashLog;
     U32 const tmpMinChain = tmpChainSize < target ? target - tmpChainSize : idx;
-
     U32 hashIdx;
 
     assert(ms->cParams.chainLog <= 24);
-    assert(ms->cParams.hashLog >= ms->cParams.chainLog);
+    assert(ms->cParams.hashLog > ms->cParams.chainLog);
     assert(idx != 0);
     assert(tmpMinChain <= minChain);
 
@@ -536,7 +453,7 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
             if (count == cacheSize) {
                 for (count = 0; count < chainLimit;) {
                     if (i < minChain) {
-                        if (!i || countBeyondMinChain++ > cacheSize) {
+                        if (!i || ++countBeyondMinChain > cacheSize) {
                             /* only allow pulling `cacheSize` number of entries
                              * into the cache or chainTable beyond `minChain`,
                              * to replace the entries pulled out of the
@@ -592,10 +509,143 @@ void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const B
     ms->nextToUpdate = target;
 }
 
+/* Returns the longest match length found in the dedicated dict search structure.
+ * If none are longer than the argument ml, then ml will be returned.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_dedicatedDictSearch_lazy_search(size_t* offsetPtr, size_t ml, U32 nbAttempts,
+                                            const ZSTD_matchState_t* const dms,
+                                            const BYTE* const ip, const BYTE* const iLimit,
+                                            const BYTE* const prefixStart, const U32 curr,
+                                            const U32 dictLimit, const size_t ddsIdx) {
+    const U32 ddsLowestIndex  = dms->window.dictLimit;
+    const BYTE* const ddsBase = dms->window.base;
+    const BYTE* const ddsEnd  = dms->window.nextSrc;
+    const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
+    const U32 ddsIndexDelta   = dictLimit - ddsSize;
+    const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
+    const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
+    U32 ddsAttempt;
+    U32 matchIndex;
+
+    for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
+        PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 const chainIndex = chainPackedPointer >> 8;
+
+        PREFETCH_L1(&dms->chainTable[chainIndex]);
+    }
+
+    for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
+        size_t currentMl=0;
+        const BYTE* match;
+        matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
+        match = ddsBase + matchIndex;
+
+        if (!matchIndex) {
+            return ml;
+        }
+
+        /* guaranteed by table construction */
+        (void)ddsLowestIndex;
+        assert(matchIndex >= ddsLowestIndex);
+        assert(match+4 <= ddsEnd);
+        if (MEM_read32(match) == MEM_read32(ip)) {
+            /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+            currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+        }
+
+        /* save best solution */
+        if (currentMl > ml) {
+            ml = currentMl;
+            *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
+            if (ip+currentMl == iLimit) {
+                /* best possible, avoids read overflow on next attempt */
+                return ml;
+            }
+        }
+    }
+
+    {
+        U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
+        U32 chainIndex = chainPackedPointer >> 8;
+        U32 const chainLength = chainPackedPointer & 0xFF;
+        U32 const chainAttempts = nbAttempts - ddsAttempt;
+        U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
+        U32 chainAttempt;
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
+            PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
+        }
+
+        for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
+            size_t currentMl=0;
+            const BYTE* match;
+            matchIndex = dms->chainTable[chainIndex];
+            match = ddsBase + matchIndex;
+
+            /* guaranteed by table construction */
+            assert(matchIndex >= ddsLowestIndex);
+            assert(match+4 <= ddsEnd);
+            if (MEM_read32(match) == MEM_read32(ip)) {
+                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
+            }
+
+            /* save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = STORE_OFFSET(curr - (matchIndex + ddsIndexDelta));
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+    return ml;
+}
+
+
+/* *********************************
+*  Hash Chain
+***********************************/
+#define NEXT_IN_CHAIN(d, mask)   chainTable[(d) & (mask)]
+
+/* Update chains up to ip (excluded)
+   Assumption : always within prefix (i.e. not within extDict) */
+FORCE_INLINE_TEMPLATE U32 ZSTD_insertAndFindFirstIndex_internal(
+                        ZSTD_matchState_t* ms,
+                        const ZSTD_compressionParameters* const cParams,
+                        const BYTE* ip, U32 const mls)
+{
+    U32* const hashTable  = ms->hashTable;
+    const U32 hashLog = cParams->hashLog;
+    U32* const chainTable = ms->chainTable;
+    const U32 chainMask = (1 << cParams->chainLog) - 1;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    U32 idx = ms->nextToUpdate;
+
+    while(idx < target) { /* catch up */
+        size_t const h = ZSTD_hashPtr(base+idx, hashLog, mls);
+        NEXT_IN_CHAIN(idx, chainMask) = hashTable[h];
+        hashTable[h] = idx;
+        idx++;
+    }
+
+    ms->nextToUpdate = target;
+    return hashTable[ZSTD_hashPtr(ip, hashLog, mls)];
+}
+
+U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip) {
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    return ZSTD_insertAndFindFirstIndex_internal(ms, cParams, ip, ms->cParams.minMatch);
+}
 
 /* inlining is important to hardwire a hot branch (template emulation) */
 FORCE_INLINE_TEMPLATE
-size_t ZSTD_HcFindBestMatch_generic (
+size_t ZSTD_HcFindBestMatch(
                         ZSTD_matchState_t* ms,
                         const BYTE* const ip, const BYTE* const iLimit,
                         size_t* offsetPtr,
@@ -653,7 +703,7 @@ size_t ZSTD_HcFindBestMatch_generic (
         /* save best solution */
         if (currentMl > ml) {
             ml = currentMl;
-            *offsetPtr = curr - matchIndex + ZSTD_REP_MOVE;
+            *offsetPtr = STORE_OFFSET(curr - matchIndex);
             if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
         }
 
@@ -663,90 +713,8 @@ size_t ZSTD_HcFindBestMatch_generic (
 
     assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
     if (dictMode == ZSTD_dedicatedDictSearch) {
-        const U32 ddsLowestIndex  = dms->window.dictLimit;
-        const BYTE* const ddsBase = dms->window.base;
-        const BYTE* const ddsEnd  = dms->window.nextSrc;
-        const U32 ddsSize         = (U32)(ddsEnd - ddsBase);
-        const U32 ddsIndexDelta   = dictLimit - ddsSize;
-        const U32 bucketSize      = (1 << ZSTD_LAZY_DDSS_BUCKET_LOG);
-        const U32 bucketLimit     = nbAttempts < bucketSize - 1 ? nbAttempts : bucketSize - 1;
-        U32 ddsAttempt;
-
-        for (ddsAttempt = 0; ddsAttempt < bucketSize - 1; ddsAttempt++) {
-            PREFETCH_L1(ddsBase + dms->hashTable[ddsIdx + ddsAttempt]);
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 const chainIndex = chainPackedPointer >> 8;
-
-            PREFETCH_L1(&dms->chainTable[chainIndex]);
-        }
-
-        for (ddsAttempt = 0; ddsAttempt < bucketLimit; ddsAttempt++) {
-            size_t currentMl=0;
-            const BYTE* match;
-            matchIndex = dms->hashTable[ddsIdx + ddsAttempt];
-            match = ddsBase + matchIndex;
-
-            if (!matchIndex) {
-                return ml;
-            }
-
-            /* guaranteed by table construction */
-            (void)ddsLowestIndex;
-            assert(matchIndex >= ddsLowestIndex);
-            assert(match+4 <= ddsEnd);
-            if (MEM_read32(match) == MEM_read32(ip)) {
-                /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-            }
-
-            /* save best solution */
-            if (currentMl > ml) {
-                ml = currentMl;
-                *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                if (ip+currentMl == iLimit) {
-                    /* best possible, avoids read overflow on next attempt */
-                    return ml;
-                }
-            }
-        }
-
-        {
-            U32 const chainPackedPointer = dms->hashTable[ddsIdx + bucketSize - 1];
-            U32 chainIndex = chainPackedPointer >> 8;
-            U32 const chainLength = chainPackedPointer & 0xFF;
-            U32 const chainAttempts = nbAttempts - ddsAttempt;
-            U32 const chainLimit = chainAttempts > chainLength ? chainLength : chainAttempts;
-            U32 chainAttempt;
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++) {
-                PREFETCH_L1(ddsBase + dms->chainTable[chainIndex + chainAttempt]);
-            }
-
-            for (chainAttempt = 0 ; chainAttempt < chainLimit; chainAttempt++, chainIndex++) {
-                size_t currentMl=0;
-                const BYTE* match;
-                matchIndex = dms->chainTable[chainIndex];
-                match = ddsBase + matchIndex;
-
-                /* guaranteed by table construction */
-                assert(matchIndex >= ddsLowestIndex);
-                assert(match+4 <= ddsEnd);
-                if (MEM_read32(match) == MEM_read32(ip)) {
-                    /* assumption : matchIndex <= dictLimit-4 (by table construction) */
-                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, ddsEnd, prefixStart) + 4;
-                }
-
-                /* save best solution */
-                if (currentMl > ml) {
-                    ml = currentMl;
-                    *offsetPtr = curr - (matchIndex + ddsIndexDelta) + ZSTD_REP_MOVE;
-                    if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
-                }
-            }
-        }
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
     } else if (dictMode == ZSTD_dictMatchState) {
         const U32* const dmsChainTable = dms->chainTable;
         const U32 dmsChainSize         = (1 << dms->cParams.chainLog);
@@ -770,7 +738,8 @@ size_t ZSTD_HcFindBestMatch_generic (
             /* save best solution */
             if (currentMl > ml) {
                 ml = currentMl;
-                *offsetPtr = curr - (matchIndex + dmsIndexDelta) + ZSTD_REP_MOVE;
+                assert(curr > matchIndex + dmsIndexDelta);
+                *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
                 if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
             }
 
@@ -783,75 +752,725 @@ size_t ZSTD_HcFindBestMatch_generic (
     return ml;
 }
 
+/* *********************************
+* (SIMD) Row-based matchfinder
+***********************************/
+/* Constants for row-based hash */
+#define ZSTD_ROW_HASH_TAG_OFFSET 16     /* byte offset of hashes in the match state's tagTable from the beginning of a row */
+#define ZSTD_ROW_HASH_TAG_BITS 8        /* nb bits to use for the tag */
+#define ZSTD_ROW_HASH_TAG_MASK ((1u << ZSTD_ROW_HASH_TAG_BITS) - 1)
+#define ZSTD_ROW_HASH_MAX_ENTRIES 64    /* absolute maximum number of entries per row, for all configurations */
+
+#define ZSTD_ROW_HASH_CACHE_MASK (ZSTD_ROW_HASH_CACHE_SIZE - 1)
 
-FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_selectMLS (
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
+typedef U64 ZSTD_VecMask;   /* Clarifies when we are interacting with a U64 representing a mask of matches */
+
+/* ZSTD_VecMask_next():
+ * Starting from the LSB, returns the idx of the next non-zero bit.
+ * Basically counting the nb of trailing zeroes.
+ */
+static U32 ZSTD_VecMask_next(ZSTD_VecMask val) {
+    assert(val != 0);
+#   if (defined(__GNUC__) && ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))))
+    if (sizeof(size_t) == 4) {
+        U32 mostSignificantWord = (U32)(val >> 32);
+        U32 leastSignificantWord = (U32)val;
+        if (leastSignificantWord == 0) {
+            return 32 + (U32)__builtin_ctz(mostSignificantWord);
+        } else {
+            return (U32)__builtin_ctz(leastSignificantWord);
+        }
+    } else {
+        return (U32)__builtin_ctzll(val);
+    }
+#   else
+    /* Software ctz version: http://aggregate.org/MAGIC/#Trailing%20Zero%20Count
+     * and: https://stackoverflow.com/questions/2709430/count-number-of-bits-in-a-64-bit-long-big-integer
+     */
+    val = ~val & (val - 1ULL); /* Lowest set bit mask */
+    val = val - ((val >> 1) & 0x5555555555555555);
+    val = (val & 0x3333333333333333ULL) + ((val >> 2) & 0x3333333333333333ULL);
+    return (U32)((((val + (val >> 4)) & 0xF0F0F0F0F0F0F0FULL) * 0x101010101010101ULL) >> 56);
+#   endif
+}
+
+/* ZSTD_rotateRight_*():
+ * Rotates a bitfield to the right by "count" bits.
+ * https://en.wikipedia.org/w/index.php?title=Circular_shift&oldid=991635599#Implementing_circular_shifts
+ */
+FORCE_INLINE_TEMPLATE
+U64 ZSTD_rotateRight_U64(U64 const value, U32 count) {
+    assert(count < 64);
+    count &= 0x3F; /* for fickle pattern recognition */
+    return (value >> count) | (U64)(value << ((0U - count) & 0x3F));
+}
+
+FORCE_INLINE_TEMPLATE
+U32 ZSTD_rotateRight_U32(U32 const value, U32 count) {
+    assert(count < 32);
+    count &= 0x1F; /* for fickle pattern recognition */
+    return (value >> count) | (U32)(value << ((0U - count) & 0x1F));
+}
+
+FORCE_INLINE_TEMPLATE
+U16 ZSTD_rotateRight_U16(U16 const value, U32 count) {
+    assert(count < 16);
+    count &= 0x0F; /* for fickle pattern recognition */
+    return (value >> count) | (U16)(value << ((0U - count) & 0x0F));
+}
+
+/* ZSTD_row_nextIndex():
+ * Returns the next index to insert at within a tagTable row, and updates the "head"
+ * value to reflect the update. Essentially cycles backwards from [0, {entries per row})
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextIndex(BYTE* const tagRow, U32 const rowMask) {
+  U32 const next = (*tagRow - 1) & rowMask;
+  *tagRow = (BYTE)next;
+  return next;
+}
+
+/* ZSTD_isAligned():
+ * Checks that a pointer is aligned to "align" bytes which must be a power of 2.
+ */
+MEM_STATIC int ZSTD_isAligned(void const* ptr, size_t align) {
+    assert((align & (align - 1)) == 0);
+    return (((size_t)ptr) & (align - 1)) == 0;
+}
+
+/* ZSTD_row_prefetch():
+ * Performs prefetching for the hashTable and tagTable at a given row.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_prefetch(U32 const* hashTable, U16 const* tagTable, U32 const relRow, U32 const rowLog) {
+    PREFETCH_L1(hashTable + relRow);
+    if (rowLog >= 5) {
+        PREFETCH_L1(hashTable + relRow + 16);
+        /* Note: prefetching more of the hash table does not appear to be beneficial for 128-entry rows */
+    }
+    PREFETCH_L1(tagTable + relRow);
+    if (rowLog == 6) {
+        PREFETCH_L1(tagTable + relRow + 32);
+    }
+    assert(rowLog == 4 || rowLog == 5 || rowLog == 6);
+    assert(ZSTD_isAligned(hashTable + relRow, 64));                 /* prefetched hash row always 64-byte aligned */
+    assert(ZSTD_isAligned(tagTable + relRow, (size_t)1 << rowLog)); /* prefetched tagRow sits on correct multiple of bytes (32,64,128) */
+}
+
+/* ZSTD_row_fillHashCache():
+ * Fill up the hash cache starting at idx, prefetching up to ZSTD_ROW_HASH_CACHE_SIZE entries,
+ * but not beyond iLimit.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_fillHashCache(ZSTD_matchState_t* ms, const BYTE* base,
+                                   U32 const rowLog, U32 const mls,
+                                   U32 idx, const BYTE* const iLimit)
 {
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_noDict);
-    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_noDict);
-    case 7 :
-    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_noDict);
+    U32 const* const hashTable = ms->hashTable;
+    U16 const* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    U32 const maxElemsToPrefetch = (base + idx) > iLimit ? 0 : (U32)(iLimit - (base + idx) + 1);
+    U32 const lim = idx + MIN(ZSTD_ROW_HASH_CACHE_SIZE, maxElemsToPrefetch);
+
+    for (; idx < lim; ++idx) {
+        U32 const hash = (U32)ZSTD_hashPtr(base + idx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const row = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+        ms->hashCache[idx & ZSTD_ROW_HASH_CACHE_MASK] = hash;
     }
+
+    DEBUGLOG(6, "ZSTD_row_fillHashCache(): [%u %u %u %u %u %u %u %u]", ms->hashCache[0], ms->hashCache[1],
+                                                     ms->hashCache[2], ms->hashCache[3], ms->hashCache[4],
+                                                     ms->hashCache[5], ms->hashCache[6], ms->hashCache[7]);
 }
 
+/* ZSTD_row_nextCachedHash():
+ * Returns the hash of base + idx, and replaces the hash in the hash cache with the byte at
+ * base + idx + ZSTD_ROW_HASH_CACHE_SIZE. Also prefetches the appropriate rows from hashTable and tagTable.
+ */
+FORCE_INLINE_TEMPLATE U32 ZSTD_row_nextCachedHash(U32* cache, U32 const* hashTable,
+                                                  U16 const* tagTable, BYTE const* base,
+                                                  U32 idx, U32 const hashLog,
+                                                  U32 const rowLog, U32 const mls)
+{
+    U32 const newHash = (U32)ZSTD_hashPtr(base+idx+ZSTD_ROW_HASH_CACHE_SIZE, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+    U32 const row = (newHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+    ZSTD_row_prefetch(hashTable, tagTable, row, rowLog);
+    {   U32 const hash = cache[idx & ZSTD_ROW_HASH_CACHE_MASK];
+        cache[idx & ZSTD_ROW_HASH_CACHE_MASK] = newHash;
+        return hash;
+    }
+}
 
-static size_t ZSTD_HcFindBestMatch_dictMatchState_selectMLS (
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
+/* ZSTD_row_update_internalImpl():
+ * Updates the hash table with positions starting from updateStartIdx until updateEndIdx.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internalImpl(ZSTD_matchState_t* ms,
+                                                        U32 updateStartIdx, U32 const updateEndIdx,
+                                                        U32 const mls, U32 const rowLog,
+                                                        U32 const rowMask, U32 const useCache)
 {
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dictMatchState);
-    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dictMatchState);
-    case 7 :
-    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dictMatchState);
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32 const hashLog = ms->rowHashLog;
+    const BYTE* const base = ms->window.base;
+
+    DEBUGLOG(6, "ZSTD_row_update_internalImpl(): updateStartIdx=%u, updateEndIdx=%u", updateStartIdx, updateEndIdx);
+    for (; updateStartIdx < updateEndIdx; ++updateStartIdx) {
+        U32 const hash = useCache ? ZSTD_row_nextCachedHash(ms->hashCache, hashTable, tagTable, base, updateStartIdx, hashLog, rowLog, mls)
+                                  : (U32)ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);  /* Though tagTable is laid out as a table of U16, each tag is only 1 byte.
+                                                       Explicit cast allows us to get exact desired position within each row */
+        U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+
+        assert(hash == ZSTD_hashPtr(base + updateStartIdx, hashLog + ZSTD_ROW_HASH_TAG_BITS, mls));
+        ((BYTE*)tagRow)[pos + ZSTD_ROW_HASH_TAG_OFFSET] = hash & ZSTD_ROW_HASH_TAG_MASK;
+        row[pos] = updateStartIdx;
     }
 }
 
+/* ZSTD_row_update_internal():
+ * Inserts the byte at ip into the appropriate position in the hash table, and updates ms->nextToUpdate.
+ * Skips sections of long matches as is necessary.
+ */
+FORCE_INLINE_TEMPLATE void ZSTD_row_update_internal(ZSTD_matchState_t* ms, const BYTE* ip,
+                                                    U32 const mls, U32 const rowLog,
+                                                    U32 const rowMask, U32 const useCache)
+{
+    U32 idx = ms->nextToUpdate;
+    const BYTE* const base = ms->window.base;
+    const U32 target = (U32)(ip - base);
+    const U32 kSkipThreshold = 384;
+    const U32 kMaxMatchStartPositionsToUpdate = 96;
+    const U32 kMaxMatchEndPositionsToUpdate = 32;
+
+    if (useCache) {
+        /* Only skip positions when using hash cache, i.e.
+         * if we are loading a dict, don't skip anything.
+         * If we decide to skip, then we only update a set number
+         * of positions at the beginning and end of the match.
+         */
+        if (UNLIKELY(target - idx > kSkipThreshold)) {
+            U32 const bound = idx + kMaxMatchStartPositionsToUpdate;
+            ZSTD_row_update_internalImpl(ms, idx, bound, mls, rowLog, rowMask, useCache);
+            idx = target - kMaxMatchEndPositionsToUpdate;
+            ZSTD_row_fillHashCache(ms, base, rowLog, mls, idx, ip+1);
+        }
+    }
+    assert(target >= idx);
+    ZSTD_row_update_internalImpl(ms, idx, target, mls, rowLog, rowMask, useCache);
+    ms->nextToUpdate = target;
+}
 
-static size_t ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS (
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
+/* ZSTD_row_update():
+ * External wrapper for ZSTD_row_update_internal(). Used for filling the hashtable during dictionary
+ * processing.
+ */
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip) {
+    const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
+    const U32 rowMask = (1u << rowLog) - 1;
+    const U32 mls = MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */);
+
+    DEBUGLOG(5, "ZSTD_row_update(), rowLog=%u", rowLog);
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 0 /* dont use cache */);
+}
+
+#if defined(ZSTD_ARCH_X86_SSE2)
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_row_getSSEMask(int nbChunks, const BYTE* const src, const BYTE tag, const U32 head)
 {
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_dedicatedDictSearch);
-    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_dedicatedDictSearch);
-    case 7 :
-    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_dedicatedDictSearch);
+    const __m128i comparisonMask = _mm_set1_epi8((char)tag);
+    int matches[4] = {0};
+    int i;
+    assert(nbChunks == 1 || nbChunks == 2 || nbChunks == 4);
+    for (i=0; i<nbChunks; i++) {
+        const __m128i chunk = _mm_loadu_si128((const __m128i*)(const void*)(src + 16*i));
+        const __m128i equalMask = _mm_cmpeq_epi8(chunk, comparisonMask);
+        matches[i] = _mm_movemask_epi8(equalMask);
     }
+    if (nbChunks == 1) return ZSTD_rotateRight_U16((U16)matches[0], head);
+    if (nbChunks == 2) return ZSTD_rotateRight_U32((U32)matches[1] << 16 | (U32)matches[0], head);
+    assert(nbChunks == 4);
+    return ZSTD_rotateRight_U64((U64)matches[3] << 48 | (U64)matches[2] << 32 | (U64)matches[1] << 16 | (U64)matches[0], head);
 }
+#endif
 
+/* Returns a ZSTD_VecMask (U32) that has the nth bit set to 1 if the newly-computed "tag" matches
+ * the hash at the nth position in a row of the tagTable.
+ * Each row is a circular buffer beginning at the value of "head". So we must rotate the "matches" bitfield
+ * to match up with the actual layout of the entries within the hashTable */
+FORCE_INLINE_TEMPLATE ZSTD_VecMask
+ZSTD_row_getMatchMask(const BYTE* const tagRow, const BYTE tag, const U32 head, const U32 rowEntries)
+{
+    const BYTE* const src = tagRow + ZSTD_ROW_HASH_TAG_OFFSET;
+    assert((rowEntries == 16) || (rowEntries == 32) || rowEntries == 64);
+    assert(rowEntries <= ZSTD_ROW_HASH_MAX_ENTRIES);
+
+#if defined(ZSTD_ARCH_X86_SSE2)
+
+    return ZSTD_row_getSSEMask(rowEntries / 16, src, tag, head);
+
+#else /* SW or NEON-LE */
+
+# if defined(ZSTD_ARCH_ARM_NEON)
+  /* This NEON path only works for little endian - otherwise use SWAR below */
+    if (MEM_isLittleEndian()) {
+        if (rowEntries == 16) {
+            const uint8x16_t chunk = vld1q_u8(src);
+            const uint16x8_t equalMask = vreinterpretq_u16_u8(vceqq_u8(chunk, vdupq_n_u8(tag)));
+            const uint16x8_t t0 = vshlq_n_u16(equalMask, 7);
+            const uint32x4_t t1 = vreinterpretq_u32_u16(vsriq_n_u16(t0, t0, 14));
+            const uint64x2_t t2 = vreinterpretq_u64_u32(vshrq_n_u32(t1, 14));
+            const uint8x16_t t3 = vreinterpretq_u8_u64(vsraq_n_u64(t2, t2, 28));
+            const U16 hi = (U16)vgetq_lane_u8(t3, 8);
+            const U16 lo = (U16)vgetq_lane_u8(t3, 0);
+            return ZSTD_rotateRight_U16((hi << 8) | lo, head);
+        } else if (rowEntries == 32) {
+            const uint16x8x2_t chunk = vld2q_u16((const U16*)(const void*)src);
+            const uint8x16_t chunk0 = vreinterpretq_u8_u16(chunk.val[0]);
+            const uint8x16_t chunk1 = vreinterpretq_u8_u16(chunk.val[1]);
+            const uint8x16_t equalMask0 = vceqq_u8(chunk0, vdupq_n_u8(tag));
+            const uint8x16_t equalMask1 = vceqq_u8(chunk1, vdupq_n_u8(tag));
+            const int8x8_t pack0 = vqmovn_s16(vreinterpretq_s16_u8(equalMask0));
+            const int8x8_t pack1 = vqmovn_s16(vreinterpretq_s16_u8(equalMask1));
+            const uint8x8_t t0 = vreinterpret_u8_s8(pack0);
+            const uint8x8_t t1 = vreinterpret_u8_s8(pack1);
+            const uint8x8_t t2 = vsri_n_u8(t1, t0, 2);
+            const uint8x8x2_t t3 = vuzp_u8(t2, t0);
+            const uint8x8_t t4 = vsri_n_u8(t3.val[1], t3.val[0], 4);
+            const U32 matches = vget_lane_u32(vreinterpret_u32_u8(t4), 0);
+            return ZSTD_rotateRight_U32(matches, head);
+        } else { /* rowEntries == 64 */
+            const uint8x16x4_t chunk = vld4q_u8(src);
+            const uint8x16_t dup = vdupq_n_u8(tag);
+            const uint8x16_t cmp0 = vceqq_u8(chunk.val[0], dup);
+            const uint8x16_t cmp1 = vceqq_u8(chunk.val[1], dup);
+            const uint8x16_t cmp2 = vceqq_u8(chunk.val[2], dup);
+            const uint8x16_t cmp3 = vceqq_u8(chunk.val[3], dup);
+
+            const uint8x16_t t0 = vsriq_n_u8(cmp1, cmp0, 1);
+            const uint8x16_t t1 = vsriq_n_u8(cmp3, cmp2, 1);
+            const uint8x16_t t2 = vsriq_n_u8(t1, t0, 2);
+            const uint8x16_t t3 = vsriq_n_u8(t2, t2, 4);
+            const uint8x8_t t4 = vshrn_n_u16(vreinterpretq_u16_u8(t3), 4);
+            const U64 matches = vget_lane_u64(vreinterpret_u64_u8(t4), 0);
+            return ZSTD_rotateRight_U64(matches, head);
+        }
+    }
+# endif /* ZSTD_ARCH_ARM_NEON */
+    /* SWAR */
+    {   const size_t chunkSize = sizeof(size_t);
+        const size_t shiftAmount = ((chunkSize * 8) - chunkSize);
+        const size_t xFF = ~((size_t)0);
+        const size_t x01 = xFF / 0xFF;
+        const size_t x80 = x01 << 7;
+        const size_t splatChar = tag * x01;
+        ZSTD_VecMask matches = 0;
+        int i = rowEntries - chunkSize;
+        assert((sizeof(size_t) == 4) || (sizeof(size_t) == 8));
+        if (MEM_isLittleEndian()) { /* runtime check so have two loops */
+            const size_t extractMagic = (xFF / 0x7F) >> chunkSize;
+            do {
+                size_t chunk = MEM_readST(&src[i]);
+                chunk ^= splatChar;
+                chunk = (((chunk | x80) - x01) | chunk) & x80;
+                matches <<= chunkSize;
+                matches |= (chunk * extractMagic) >> shiftAmount;
+                i -= chunkSize;
+            } while (i >= 0);
+        } else { /* big endian: reverse bits during extraction */
+            const size_t msb = xFF ^ (xFF >> 1);
+            const size_t extractMagic = (msb / 0x1FF) | msb;
+            do {
+                size_t chunk = MEM_readST(&src[i]);
+                chunk ^= splatChar;
+                chunk = (((chunk | x80) - x01) | chunk) & x80;
+                matches <<= chunkSize;
+                matches |= ((chunk >> 7) * extractMagic) >> shiftAmount;
+                i -= chunkSize;
+            } while (i >= 0);
+        }
+        matches = ~matches;
+        if (rowEntries == 16) {
+            return ZSTD_rotateRight_U16((U16)matches, head);
+        } else if (rowEntries == 32) {
+            return ZSTD_rotateRight_U32((U32)matches, head);
+        } else {
+            return ZSTD_rotateRight_U64((U64)matches, head);
+        }
+    }
+#endif
+}
 
-FORCE_INLINE_TEMPLATE size_t ZSTD_HcFindBestMatch_extDict_selectMLS (
+/* The high-level approach of the SIMD row based match finder is as follows:
+ * - Figure out where to insert the new entry:
+ *      - Generate a hash from a byte along with an additional 1-byte "short hash". The additional byte is our "tag"
+ *      - The hashTable is effectively split into groups or "rows" of 16 or 32 entries of U32, and the hash determines
+ *        which row to insert into.
+ *      - Determine the correct position within the row to insert the entry into. Each row of 16 or 32 can
+ *        be considered as a circular buffer with a "head" index that resides in the tagTable.
+ *      - Also insert the "tag" into the equivalent row and position in the tagTable.
+ *          - Note: The tagTable has 17 or 33 1-byte entries per row, due to 16 or 32 tags, and 1 "head" entry.
+ *                  The 17 or 33 entry rows are spaced out to occur every 32 or 64 bytes, respectively,
+ *                  for alignment/performance reasons, leaving some bytes unused.
+ * - Use SIMD to efficiently compare the tags in the tagTable to the 1-byte "short hash" and
+ *   generate a bitfield that we can cycle through to check the collisions in the hash table.
+ * - Pick the longest match.
+ */
+FORCE_INLINE_TEMPLATE
+size_t ZSTD_RowFindBestMatch(
                         ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* const iLimit,
-                        size_t* offsetPtr)
+                        const BYTE* const ip, const BYTE* const iLimit,
+                        size_t* offsetPtr,
+                        const U32 mls, const ZSTD_dictMode_e dictMode,
+                        const U32 rowLog)
 {
-    switch(ms->cParams.minMatch)
-    {
-    default : /* includes case 3 */
-    case 4 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 4, ZSTD_extDict);
-    case 5 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 5, ZSTD_extDict);
-    case 7 :
-    case 6 : return ZSTD_HcFindBestMatch_generic(ms, ip, iLimit, offsetPtr, 6, ZSTD_extDict);
+    U32* const hashTable = ms->hashTable;
+    U16* const tagTable = ms->tagTable;
+    U32* const hashCache = ms->hashCache;
+    const U32 hashLog = ms->rowHashLog;
+    const ZSTD_compressionParameters* const cParams = &ms->cParams;
+    const BYTE* const base = ms->window.base;
+    const BYTE* const dictBase = ms->window.dictBase;
+    const U32 dictLimit = ms->window.dictLimit;
+    const BYTE* const prefixStart = base + dictLimit;
+    const BYTE* const dictEnd = dictBase + dictLimit;
+    const U32 curr = (U32)(ip-base);
+    const U32 maxDistance = 1U << cParams->windowLog;
+    const U32 lowestValid = ms->window.lowLimit;
+    const U32 withinMaxDistance = (curr - lowestValid > maxDistance) ? curr - maxDistance : lowestValid;
+    const U32 isDictionary = (ms->loadedDictEnd != 0);
+    const U32 lowLimit = isDictionary ? lowestValid : withinMaxDistance;
+    const U32 rowEntries = (1U << rowLog);
+    const U32 rowMask = rowEntries - 1;
+    const U32 cappedSearchLog = MIN(cParams->searchLog, rowLog); /* nb of searches is capped at nb entries per row */
+    U32 nbAttempts = 1U << cappedSearchLog;
+    size_t ml=4-1;
+
+    /* DMS/DDS variables that may be referenced laster */
+    const ZSTD_matchState_t* const dms = ms->dictMatchState;
+
+    /* Initialize the following variables to satisfy static analyzer */
+    size_t ddsIdx = 0;
+    U32 ddsExtraAttempts = 0; /* cctx hash tables are limited in searches, but allow extra searches into DDS */
+    U32 dmsTag = 0;
+    U32* dmsRow = NULL;
+    BYTE* dmsTagRow = NULL;
+
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        const U32 ddsHashLog = dms->cParams.hashLog - ZSTD_LAZY_DDSS_BUCKET_LOG;
+        {   /* Prefetch DDS hashtable entry */
+            ddsIdx = ZSTD_hashPtr(ip, ddsHashLog, mls) << ZSTD_LAZY_DDSS_BUCKET_LOG;
+            PREFETCH_L1(&dms->hashTable[ddsIdx]);
+        }
+        ddsExtraAttempts = cParams->searchLog > rowLog ? 1U << (cParams->searchLog - rowLog) : 0;
+    }
+
+    if (dictMode == ZSTD_dictMatchState) {
+        /* Prefetch DMS rows */
+        U32* const dmsHashTable = dms->hashTable;
+        U16* const dmsTagTable = dms->tagTable;
+        U32 const dmsHash = (U32)ZSTD_hashPtr(ip, dms->rowHashLog + ZSTD_ROW_HASH_TAG_BITS, mls);
+        U32 const dmsRelRow = (dmsHash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        dmsTag = dmsHash & ZSTD_ROW_HASH_TAG_MASK;
+        dmsTagRow = (BYTE*)(dmsTagTable + dmsRelRow);
+        dmsRow = dmsHashTable + dmsRelRow;
+        ZSTD_row_prefetch(dmsHashTable, dmsTagTable, dmsRelRow, rowLog);
+    }
+
+    /* Update the hashTable and tagTable up to (but not including) ip */
+    ZSTD_row_update_internal(ms, ip, mls, rowLog, rowMask, 1 /* useCache */);
+    {   /* Get the hash for ip, compute the appropriate row */
+        U32 const hash = ZSTD_row_nextCachedHash(hashCache, hashTable, tagTable, base, curr, hashLog, rowLog, mls);
+        U32 const relRow = (hash >> ZSTD_ROW_HASH_TAG_BITS) << rowLog;
+        U32 const tag = hash & ZSTD_ROW_HASH_TAG_MASK;
+        U32* const row = hashTable + relRow;
+        BYTE* tagRow = (BYTE*)(tagTable + relRow);
+        U32 const head = *tagRow & rowMask;
+        U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
+        size_t numMatches = 0;
+        size_t currMatch = 0;
+        ZSTD_VecMask matches = ZSTD_row_getMatchMask(tagRow, (BYTE)tag, head, rowEntries);
+
+        /* Cycle through the matches and prefetch */
+        for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+            U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+            U32 const matchIndex = row[matchPos];
+            assert(numMatches < rowEntries);
+            if (matchIndex < lowLimit)
+                break;
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                PREFETCH_L1(base + matchIndex);
+            } else {
+                PREFETCH_L1(dictBase + matchIndex);
+            }
+            matchBuffer[numMatches++] = matchIndex;
+        }
+
+        /* Speed opt: insert current byte into hashtable too. This allows us to avoid one iteration of the loop
+           in ZSTD_row_update_internal() at the next search. */
+        {
+            U32 const pos = ZSTD_row_nextIndex(tagRow, rowMask);
+            tagRow[pos + ZSTD_ROW_HASH_TAG_OFFSET] = (BYTE)tag;
+            row[pos] = ms->nextToUpdate++;
+        }
+
+        /* Return the longest match */
+        for (; currMatch < numMatches; ++currMatch) {
+            U32 const matchIndex = matchBuffer[currMatch];
+            size_t currentMl=0;
+            assert(matchIndex < curr);
+            assert(matchIndex >= lowLimit);
+
+            if ((dictMode != ZSTD_extDict) || matchIndex >= dictLimit) {
+                const BYTE* const match = base + matchIndex;
+                assert(matchIndex >= dictLimit);   /* ensures this is true if dictMode != ZSTD_extDict */
+                if (match[ml] == ip[ml])   /* potentially better */
+                    currentMl = ZSTD_count(ip, match, iLimit);
+            } else {
+                const BYTE* const match = dictBase + matchIndex;
+                assert(match+4 <= dictEnd);
+                if (MEM_read32(match) == MEM_read32(ip))   /* assumption : matchIndex <= dictLimit-4 (by table construction) */
+                    currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dictEnd, prefixStart) + 4;
+            }
+
+            /* Save best solution */
+            if (currentMl > ml) {
+                ml = currentMl;
+                *offsetPtr = STORE_OFFSET(curr - matchIndex);
+                if (ip+currentMl == iLimit) break; /* best possible, avoids read overflow on next attempt */
+            }
+        }
+    }
+
+    assert(nbAttempts <= (1U << ZSTD_SEARCHLOG_MAX)); /* Check we haven't underflowed. */
+    if (dictMode == ZSTD_dedicatedDictSearch) {
+        ml = ZSTD_dedicatedDictSearch_lazy_search(offsetPtr, ml, nbAttempts + ddsExtraAttempts, dms,
+                                                  ip, iLimit, prefixStart, curr, dictLimit, ddsIdx);
+    } else if (dictMode == ZSTD_dictMatchState) {
+        /* TODO: Measure and potentially add prefetching to DMS */
+        const U32 dmsLowestIndex       = dms->window.dictLimit;
+        const BYTE* const dmsBase      = dms->window.base;
+        const BYTE* const dmsEnd       = dms->window.nextSrc;
+        const U32 dmsSize              = (U32)(dmsEnd - dmsBase);
+        const U32 dmsIndexDelta        = dictLimit - dmsSize;
+
+        {   U32 const head = *dmsTagRow & rowMask;
+            U32 matchBuffer[ZSTD_ROW_HASH_MAX_ENTRIES];
+            size_t numMatches = 0;
+            size_t currMatch = 0;
+            ZSTD_VecMask matches = ZSTD_row_getMatchMask(dmsTagRow, (BYTE)dmsTag, head, rowEntries);
+
+            for (; (matches > 0) && (nbAttempts > 0); --nbAttempts, matches &= (matches - 1)) {
+                U32 const matchPos = (head + ZSTD_VecMask_next(matches)) & rowMask;
+                U32 const matchIndex = dmsRow[matchPos];
+                if (matchIndex < dmsLowestIndex)
+                    break;
+                PREFETCH_L1(dmsBase + matchIndex);
+                matchBuffer[numMatches++] = matchIndex;
+            }
+
+            /* Return the longest match */
+            for (; currMatch < numMatches; ++currMatch) {
+                U32 const matchIndex = matchBuffer[currMatch];
+                size_t currentMl=0;
+                assert(matchIndex >= dmsLowestIndex);
+                assert(matchIndex < curr);
+
+                {   const BYTE* const match = dmsBase + matchIndex;
+                    assert(match+4 <= dmsEnd);
+                    if (MEM_read32(match) == MEM_read32(ip))
+                        currentMl = ZSTD_count_2segments(ip+4, match+4, iLimit, dmsEnd, prefixStart) + 4;
+                }
+
+                if (currentMl > ml) {
+                    ml = currentMl;
+                    assert(curr > matchIndex + dmsIndexDelta);
+                    *offsetPtr = STORE_OFFSET(curr - (matchIndex + dmsIndexDelta));
+                    if (ip+currentMl == iLimit) break;
+                }
+            }
+        }
     }
+    return ml;
 }
 
 
+/*
+ * Generate search functions templated on (dictMode, mls, rowLog).
+ * These functions are outlined for code size & compilation time.
+ * ZSTD_searchMax() dispatches to the correct implementation function.
+ *
+ * TODO: The start of the search function involves loading and calculating a
+ * bunch of constants from the ZSTD_matchState_t. These computations could be
+ * done in an initialization function, and saved somewhere in the match state.
+ * Then we could pass a pointer to the saved state instead of the match state,
+ * and avoid duplicate computations.
+ *
+ * TODO: Move the match re-winding into searchMax. This improves compression
+ * ratio, and unlocks further simplifications with the next TODO.
+ *
+ * TODO: Try moving the repcode search into searchMax. After the re-winding
+ * and repcode search are in searchMax, there is no more logic in the match
+ * finder loop that requires knowledge about the dictMode. So we should be
+ * able to avoid force inlining it, and we can join the extDict loop with
+ * the single segment loop. It should go in searchMax instead of its own
+ * function to avoid having multiple virtual function calls per search.
+ */
+
+#define ZSTD_BT_SEARCH_FN(dictMode, mls) ZSTD_BtFindBestMatch_##dictMode##_##mls
+#define ZSTD_HC_SEARCH_FN(dictMode, mls) ZSTD_HcFindBestMatch_##dictMode##_##mls
+#define ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog) ZSTD_RowFindBestMatch_##dictMode##_##mls##_##rowLog
+
+#define ZSTD_SEARCH_FN_ATTRS FORCE_NOINLINE
+
+#define GEN_ZSTD_BT_SEARCH_FN(dictMode, mls)                                           \
+    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_BT_SEARCH_FN(dictMode, mls)(                      \
+            ZSTD_matchState_t* ms,                                                     \
+            const BYTE* ip, const BYTE* const iLimit,                                  \
+            size_t* offBasePtr)                                                        \
+    {                                                                                  \
+        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                           \
+        return ZSTD_BtFindBestMatch(ms, ip, iLimit, offBasePtr, mls, ZSTD_##dictMode); \
+    }                                                                                  \
+
+#define GEN_ZSTD_HC_SEARCH_FN(dictMode, mls)                                          \
+    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_HC_SEARCH_FN(dictMode, mls)(                     \
+            ZSTD_matchState_t* ms,                                                    \
+            const BYTE* ip, const BYTE* const iLimit,                                 \
+            size_t* offsetPtr)                                                        \
+    {                                                                                 \
+        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                          \
+        return ZSTD_HcFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode); \
+    }                                                                                 \
+
+#define GEN_ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)                                          \
+    ZSTD_SEARCH_FN_ATTRS size_t ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(                     \
+            ZSTD_matchState_t* ms,                                                             \
+            const BYTE* ip, const BYTE* const iLimit,                                          \
+            size_t* offsetPtr)                                                                 \
+    {                                                                                          \
+        assert(MAX(4, MIN(6, ms->cParams.minMatch)) == mls);                                   \
+        assert(MAX(4, MIN(6, ms->cParams.searchLog)) == rowLog);                               \
+        return ZSTD_RowFindBestMatch(ms, ip, iLimit, offsetPtr, mls, ZSTD_##dictMode, rowLog); \
+    }                                                                                          \
+
+#define ZSTD_FOR_EACH_ROWLOG(X, dictMode, mls) \
+    X(dictMode, mls, 4)                        \
+    X(dictMode, mls, 5)                        \
+    X(dictMode, mls, 6)
+
+#define ZSTD_FOR_EACH_MLS_ROWLOG(X, dictMode) \
+    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 4)      \
+    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 5)      \
+    ZSTD_FOR_EACH_ROWLOG(X, dictMode, 6)
+
+#define ZSTD_FOR_EACH_MLS(X, dictMode) \
+    X(dictMode, 4)                     \
+    X(dictMode, 5)                     \
+    X(dictMode, 6)
+
+#define ZSTD_FOR_EACH_DICT_MODE(X, ...) \
+    X(__VA_ARGS__, noDict)              \
+    X(__VA_ARGS__, extDict)             \
+    X(__VA_ARGS__, dictMatchState)      \
+    X(__VA_ARGS__, dedicatedDictSearch)
+
+/* Generate row search fns for each combination of (dictMode, mls, rowLog) */
+ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS_ROWLOG, GEN_ZSTD_ROW_SEARCH_FN)
+/* Generate binary Tree search fns for each combination of (dictMode, mls) */
+ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_BT_SEARCH_FN)
+/* Generate hash chain search fns for each combination of (dictMode, mls) */
+ZSTD_FOR_EACH_DICT_MODE(ZSTD_FOR_EACH_MLS, GEN_ZSTD_HC_SEARCH_FN)
+
+typedef enum { search_hashChain=0, search_binaryTree=1, search_rowHash=2 } searchMethod_e;
+
+#define GEN_ZSTD_CALL_BT_SEARCH_FN(dictMode, mls)                         \
+    case mls:                                                             \
+        return ZSTD_BT_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
+#define GEN_ZSTD_CALL_HC_SEARCH_FN(dictMode, mls)                         \
+    case mls:                                                             \
+        return ZSTD_HC_SEARCH_FN(dictMode, mls)(ms, ip, iend, offsetPtr);
+#define GEN_ZSTD_CALL_ROW_SEARCH_FN(dictMode, mls, rowLog)                         \
+    case rowLog:                                                                   \
+        return ZSTD_ROW_SEARCH_FN(dictMode, mls, rowLog)(ms, ip, iend, offsetPtr);
+
+#define ZSTD_SWITCH_MLS(X, dictMode)   \
+    switch (mls) {                     \
+        ZSTD_FOR_EACH_MLS(X, dictMode) \
+    }
+
+#define ZSTD_SWITCH_ROWLOG(dictMode, mls)                                    \
+    case mls:                                                                \
+        switch (rowLog) {                                                    \
+            ZSTD_FOR_EACH_ROWLOG(GEN_ZSTD_CALL_ROW_SEARCH_FN, dictMode, mls) \
+        }                                                                    \
+        ZSTD_UNREACHABLE;                                                    \
+        break;
+
+#define ZSTD_SWITCH_SEARCH_METHOD(dictMode)                       \
+    switch (searchMethod) {                                       \
+        case search_hashChain:                                    \
+            ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_HC_SEARCH_FN, dictMode) \
+            break;                                                \
+        case search_binaryTree:                                   \
+            ZSTD_SWITCH_MLS(GEN_ZSTD_CALL_BT_SEARCH_FN, dictMode) \
+            break;                                                \
+        case search_rowHash:                                      \
+            ZSTD_SWITCH_MLS(ZSTD_SWITCH_ROWLOG, dictMode)         \
+            break;                                                \
+    }                                                             \
+    ZSTD_UNREACHABLE;
+
+/*
+ * Searches for the longest match at @p ip.
+ * Dispatches to the correct implementation function based on the
+ * (searchMethod, dictMode, mls, rowLog). We use switch statements
+ * here instead of using an indirect function call through a function
+ * pointer because after Spectre and Meltdown mitigations, indirect
+ * function calls can be very costly, especially in the kernel.
+ *
+ * NOTE: dictMode and searchMethod should be templated, so those switch
+ * statements should be optimized out. Only the mls & rowLog switches
+ * should be left.
+ *
+ * @param ms The match state.
+ * @param ip The position to search at.
+ * @param iend The end of the input data.
+ * @param[out] offsetPtr Stores the match offset into this pointer.
+ * @param mls The minimum search length, in the range [4, 6].
+ * @param rowLog The row log (if applicable), in the range [4, 6].
+ * @param searchMethod The search method to use (templated).
+ * @param dictMode The dictMode (templated).
+ *
+ * @returns The length of the longest match found, or < mls if no match is found.
+ * If a match is found its offset is stored in @p offsetPtr.
+ */
+FORCE_INLINE_TEMPLATE size_t ZSTD_searchMax(
+    ZSTD_matchState_t* ms,
+    const BYTE* ip,
+    const BYTE* iend,
+    size_t* offsetPtr,
+    U32 const mls,
+    U32 const rowLog,
+    searchMethod_e const searchMethod,
+    ZSTD_dictMode_e const dictMode)
+{
+    if (dictMode == ZSTD_noDict) {
+        ZSTD_SWITCH_SEARCH_METHOD(noDict)
+    } else if (dictMode == ZSTD_extDict) {
+        ZSTD_SWITCH_SEARCH_METHOD(extDict)
+    } else if (dictMode == ZSTD_dictMatchState) {
+        ZSTD_SWITCH_SEARCH_METHOD(dictMatchState)
+    } else if (dictMode == ZSTD_dedicatedDictSearch) {
+        ZSTD_SWITCH_SEARCH_METHOD(dedicatedDictSearch)
+    }
+    ZSTD_UNREACHABLE;
+    return 0;
+}
+
 /* *******************************
 *  Common parser - lazy strategy
 *********************************/
-typedef enum { search_hashChain, search_binaryTree } searchMethod_e;
 
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_compressBlock_lazy_generic(
@@ -865,41 +1484,13 @@ ZSTD_compressBlock_lazy_generic(
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = (searchMethod == search_rowHash) ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 prefixLowestIndex = ms->window.dictLimit;
     const BYTE* const prefixLowest = base + prefixLowestIndex;
+    const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
+    const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
 
-    typedef size_t (*searchMax_f)(
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-
-    /*
-     * This table is indexed first by the four ZSTD_dictMode_e values, and then
-     * by the two searchMethod_e values. NULLs are placed for configurations
-     * that should never occur (extDict modes go to the other implementation
-     * below and there is no DDSS for binary tree search yet).
-     */
-    const searchMax_f searchFuncs[4][2] = {
-        {
-            ZSTD_HcFindBestMatch_selectMLS,
-            ZSTD_BtFindBestMatch_selectMLS
-        },
-        {
-            NULL,
-            NULL
-        },
-        {
-            ZSTD_HcFindBestMatch_dictMatchState_selectMLS,
-            ZSTD_BtFindBestMatch_dictMatchState_selectMLS
-        },
-        {
-            ZSTD_HcFindBestMatch_dedicatedDictSearch_selectMLS,
-            NULL
-        }
-    };
-
-    searchMax_f const searchMax = searchFuncs[dictMode][searchMethod == search_binaryTree];
     U32 offset_1 = rep[0], offset_2 = rep[1], savedOffset=0;
 
     const int isDMS = dictMode == ZSTD_dictMatchState;
@@ -915,11 +1506,7 @@ ZSTD_compressBlock_lazy_generic(
                                      0;
     const U32 dictAndPrefixLength = (U32)((ip - prefixLowest) + (dictEnd - dictLowest));
 
-    assert(searchMax != NULL);
-
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u)", (U32)dictMode);
-
-    /* init */
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_generic (dictMode=%u) (searchFunc=%u)", (U32)dictMode, (U32)searchMethod);
     ip += (dictAndPrefixLength == 0);
     if (dictMode == ZSTD_noDict) {
         U32 const curr = (U32)(ip - base);
@@ -935,6 +1522,12 @@ ZSTD_compressBlock_lazy_generic(
         assert(offset_2 <= dictAndPrefixLength);
     }
 
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                            MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                            ms->nextToUpdate, ilimit);
+    }
+
     /* Match Loop */
 #if defined(__x86_64__)
     /* I've measured random a 5% speed loss on levels 5 & 6 (greedy) when the
@@ -944,8 +1537,9 @@ ZSTD_compressBlock_lazy_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offset=0;
+        size_t offcode=STORE_REPCODE_1;
         const BYTE* start=ip+1;
+        DEBUGLOG(7, "search baseline (depth 0)");
 
         /* check repCode */
         if (isDxS) {
@@ -969,9 +1563,9 @@ ZSTD_compressBlock_lazy_generic(
 
         /* first search (depth 0) */
         {   size_t offsetFound = 999999999;
-            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, dictMode);
             if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offset=offsetFound;
+                matchLength = ml2, start = ip, offcode=offsetFound;
         }
 
         if (matchLength < 4) {
@@ -982,14 +1576,15 @@ ZSTD_compressBlock_lazy_generic(
         /* let's try to find a better solution */
         if (depth>=1)
         while (ip<ilimit) {
+            DEBUGLOG(7, "search depth 1");
             ip ++;
             if ( (dictMode == ZSTD_noDict)
-              && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+              && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                 size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                 int const gain2 = (int)(mlRep * 3);
-                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                 if ((mlRep >= 4) && (gain2 > gain1))
-                    matchLength = mlRep, offset = 0, start = ip;
+                    matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
             }
             if (isDxS) {
                 const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1001,30 +1596,31 @@ ZSTD_compressBlock_lazy_generic(
                     const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                     size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
                     int const gain2 = (int)(mlRep * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offset = 0, start = ip;
+                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                 }
             }
             {   size_t offset2=999999999;
-                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offset = offset2, start = ip;
+                    matchLength = ml2, offcode = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
 
             /* let's find an even better one */
             if ((depth==2) && (ip<ilimit)) {
+                DEBUGLOG(7, "search depth 2");
                 ip ++;
                 if ( (dictMode == ZSTD_noDict)
-                  && (offset) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
+                  && (offcode) && ((offset_1>0) & (MEM_read32(ip) == MEM_read32(ip - offset_1)))) {
                     size_t const mlRep = ZSTD_count(ip+4, ip+4-offset_1, iend) + 4;
                     int const gain2 = (int)(mlRep * 4);
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((mlRep >= 4) && (gain2 > gain1))
-                        matchLength = mlRep, offset = 0, start = ip;
+                        matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                 }
                 if (isDxS) {
                     const U32 repIndex = (U32)(ip - base) - offset_1;
@@ -1036,46 +1632,45 @@ ZSTD_compressBlock_lazy_generic(
                         const BYTE* repMatchEnd = repIndex < prefixLowestIndex ? dictEnd : iend;
                         size_t const mlRep = ZSTD_count_2segments(ip+4, repMatch+4, iend, repMatchEnd, prefixLowest) + 4;
                         int const gain2 = (int)(mlRep * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((mlRep >= 4) && (gain2 > gain1))
-                            matchLength = mlRep, offset = 0, start = ip;
+                            matchLength = mlRep, offcode = STORE_REPCODE_1, start = ip;
                     }
                 }
                 {   size_t offset2=999999999;
-                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, dictMode);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
                     if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offset = offset2, start = ip;
+                        matchLength = ml2, offcode = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
         }
 
         /* NOTE:
-         * start[-offset+ZSTD_REP_MOVE-1] is undefined behavior.
-         * (-offset+ZSTD_REP_MOVE-1) is unsigned, and is added to start, which
-         * overflows the pointer, which is undefined behavior.
+         * Pay attention that `start[-value]` can lead to strange undefined behavior
+         * notably if `value` is unsigned, resulting in a large positive `-value`.
          */
         /* catch up */
-        if (offset) {
+        if (STORED_IS_OFFSET(offcode)) {
             if (dictMode == ZSTD_noDict) {
-                while ( ((start > anchor) & (start - (offset-ZSTD_REP_MOVE) > prefixLowest))
-                     && (start[-1] == (start-(offset-ZSTD_REP_MOVE))[-1]) )  /* only search for offset within prefix */
+                while ( ((start > anchor) & (start - STORED_OFFSET(offcode) > prefixLowest))
+                     && (start[-1] == (start-STORED_OFFSET(offcode))[-1]) )  /* only search for offset within prefix */
                     { start--; matchLength++; }
             }
             if (isDxS) {
-                U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+                U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
                 const BYTE* match = (matchIndex < prefixLowestIndex) ? dictBase + matchIndex - dictIndexDelta : base + matchIndex;
                 const BYTE* const mStart = (matchIndex < prefixLowestIndex) ? dictLowest : prefixLowest;
                 while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
             }
-            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
         }
         /* store sequence */
 _storeSequence:
-        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+        {   size_t const litLength = (size_t)(start - anchor);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -1091,8 +1686,8 @@ _storeSequence:
                    && (MEM_read32(repMatch) == MEM_read32(ip)) ) {
                     const BYTE* const repEnd2 = repIndex < prefixLowestIndex ? dictEnd : iend;
                     matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd2, prefixLowest) + 4;
-                    offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset_2 <=> offset_1 */
-                    ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                    offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset_2 <=> offset_1 */
+                    ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                     ip += matchLength;
                     anchor = ip;
                     continue;
@@ -1106,8 +1701,8 @@ _storeSequence:
                  && (MEM_read32(ip) == MEM_read32(ip - offset_2)) ) {
                 /* store sequence */
                 matchLength = ZSTD_count(ip+4, ip+4-offset_2, iend) + 4;
-                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset; /* swap repcodes */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode; /* swap repcodes */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -1200,6 +1795,70 @@ size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
     return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_hashChain, 0, ZSTD_dedicatedDictSearch);
 }
 
+/* Row-based matchfinder */
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_noDict);
+}
+
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dictMatchState);
+}
+
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dictMatchState);
+}
+
+
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1, ZSTD_dedicatedDictSearch);
+}
+
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0, ZSTD_dedicatedDictSearch);
+}
 
 FORCE_INLINE_TEMPLATE
 size_t ZSTD_compressBlock_lazy_extDict_generic(
@@ -1212,7 +1871,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
     const BYTE* ip = istart;
     const BYTE* anchor = istart;
     const BYTE* const iend = istart + srcSize;
-    const BYTE* const ilimit = iend - 8;
+    const BYTE* const ilimit = searchMethod == search_rowHash ? iend - 8 - ZSTD_ROW_HASH_CACHE_SIZE : iend - 8;
     const BYTE* const base = ms->window.base;
     const U32 dictLimit = ms->window.dictLimit;
     const BYTE* const prefixStart = base + dictLimit;
@@ -1220,18 +1879,20 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
     const BYTE* const dictEnd  = dictBase + dictLimit;
     const BYTE* const dictStart  = dictBase + ms->window.lowLimit;
     const U32 windowLog = ms->cParams.windowLog;
-
-    typedef size_t (*searchMax_f)(
-                        ZSTD_matchState_t* ms,
-                        const BYTE* ip, const BYTE* iLimit, size_t* offsetPtr);
-    searchMax_f searchMax = searchMethod==search_binaryTree ? ZSTD_BtFindBestMatch_extDict_selectMLS : ZSTD_HcFindBestMatch_extDict_selectMLS;
+    const U32 mls = BOUNDED(4, ms->cParams.minMatch, 6);
+    const U32 rowLog = BOUNDED(4, ms->cParams.searchLog, 6);
 
     U32 offset_1 = rep[0], offset_2 = rep[1];
 
-    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic");
+    DEBUGLOG(5, "ZSTD_compressBlock_lazy_extDict_generic (searchFunc=%u)", (U32)searchMethod);
 
     /* init */
     ip += (ip == prefixStart);
+    if (searchMethod == search_rowHash) {
+        ZSTD_row_fillHashCache(ms, base, rowLog,
+                               MIN(ms->cParams.minMatch, 6 /* mls caps out at 6 */),
+                               ms->nextToUpdate, ilimit);
+    }
 
     /* Match Loop */
 #if defined(__x86_64__)
@@ -1242,7 +1903,7 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
 #endif
     while (ip < ilimit) {
         size_t matchLength=0;
-        size_t offset=0;
+        size_t offcode=STORE_REPCODE_1;
         const BYTE* start=ip+1;
         U32 curr = (U32)(ip-base);
 
@@ -1251,7 +1912,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
             const U32 repIndex = (U32)(curr+1 - offset_1);
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))   /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
+               & (offset_1 <= curr+1 - windowLow) ) /* note: we are searching at curr+1 */
             if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@@ -1261,9 +1923,9 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
 
         /* first search (depth 0) */
         {   size_t offsetFound = 999999999;
-            size_t const ml2 = searchMax(ms, ip, iend, &offsetFound);
+            size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offsetFound, mls, rowLog, searchMethod, ZSTD_extDict);
             if (ml2 > matchLength)
-                matchLength = ml2, start = ip, offset=offsetFound;
+                matchLength = ml2, start = ip, offcode=offsetFound;
         }
 
         if (matchLength < 4) {
@@ -1277,29 +1939,30 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
             ip ++;
             curr++;
             /* check repCode */
-            if (offset) {
+            if (offcode) {
                 const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
                 const U32 repIndex = (U32)(curr - offset_1);
                 const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                 const BYTE* const repMatch = repBase + repIndex;
-                if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                   & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                 if (MEM_read32(ip) == MEM_read32(repMatch)) {
                     /* repcode detected */
                     const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                     size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                     int const gain2 = (int)(repLength * 3);
-                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)offset+1) + 1);
+                    int const gain1 = (int)(matchLength*3 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                     if ((repLength >= 4) && (gain2 > gain1))
-                        matchLength = repLength, offset = 0, start = ip;
+                        matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
             }   }
 
             /* search match, depth 1 */
             {   size_t offset2=999999999;
-                size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 4);
+                size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
+                int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 4);
                 if ((ml2 >= 4) && (gain2 > gain1)) {
-                    matchLength = ml2, offset = offset2, start = ip;
+                    matchLength = ml2, offcode = offset2, start = ip;
                     continue;   /* search a better one */
             }   }
 
@@ -1308,47 +1971,48 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
                 ip ++;
                 curr++;
                 /* check repCode */
-                if (offset) {
+                if (offcode) {
                     const U32 windowLow = ZSTD_getLowestMatchIndex(ms, curr, windowLog);
                     const U32 repIndex = (U32)(curr - offset_1);
                     const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
                     const BYTE* const repMatch = repBase + repIndex;
-                    if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+                    if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+                       & (offset_1 <= curr - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
                     if (MEM_read32(ip) == MEM_read32(repMatch)) {
                         /* repcode detected */
                         const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                         size_t const repLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
                         int const gain2 = (int)(repLength * 4);
-                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 1);
+                        int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 1);
                         if ((repLength >= 4) && (gain2 > gain1))
-                            matchLength = repLength, offset = 0, start = ip;
+                            matchLength = repLength, offcode = STORE_REPCODE_1, start = ip;
                 }   }
 
                 /* search match, depth 2 */
                 {   size_t offset2=999999999;
-                    size_t const ml2 = searchMax(ms, ip, iend, &offset2);
-                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)offset2+1));   /* raw approx */
-                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)offset+1) + 7);
+                    size_t const ml2 = ZSTD_searchMax(ms, ip, iend, &offset2, mls, rowLog, searchMethod, ZSTD_extDict);
+                    int const gain2 = (int)(ml2*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offset2)));   /* raw approx */
+                    int const gain1 = (int)(matchLength*4 - ZSTD_highbit32((U32)STORED_TO_OFFBASE(offcode)) + 7);
                     if ((ml2 >= 4) && (gain2 > gain1)) {
-                        matchLength = ml2, offset = offset2, start = ip;
+                        matchLength = ml2, offcode = offset2, start = ip;
                         continue;
             }   }   }
             break;  /* nothing found : store previous solution */
         }
 
         /* catch up */
-        if (offset) {
-            U32 const matchIndex = (U32)((start-base) - (offset - ZSTD_REP_MOVE));
+        if (STORED_IS_OFFSET(offcode)) {
+            U32 const matchIndex = (U32)((size_t)(start-base) - STORED_OFFSET(offcode));
             const BYTE* match = (matchIndex < dictLimit) ? dictBase + matchIndex : base + matchIndex;
             const BYTE* const mStart = (matchIndex < dictLimit) ? dictStart : prefixStart;
             while ((start>anchor) && (match>mStart) && (start[-1] == match[-1])) { start--; match--; matchLength++; }  /* catch up */
-            offset_2 = offset_1; offset_1 = (U32)(offset - ZSTD_REP_MOVE);
+            offset_2 = offset_1; offset_1 = (U32)STORED_OFFSET(offcode);
         }
 
         /* store sequence */
 _storeSequence:
-        {   size_t const litLength = start - anchor;
-            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offset, matchLength-MINMATCH);
+        {   size_t const litLength = (size_t)(start - anchor);
+            ZSTD_storeSeq(seqStore, litLength, anchor, iend, (U32)offcode, matchLength);
             anchor = ip = start + matchLength;
         }
 
@@ -1359,13 +2023,14 @@ _storeSequence:
             const U32 repIndex = repCurrent - offset_2;
             const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
             const BYTE* const repMatch = repBase + repIndex;
-            if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow))  /* intentional overflow */
+            if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments  */
+               & (offset_2 <= repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
             if (MEM_read32(ip) == MEM_read32(repMatch)) {
                 /* repcode detected we should take it */
                 const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
                 matchLength = ZSTD_count_2segments(ip+4, repMatch+4, iend, repEnd, prefixStart) + 4;
-                offset = offset_2; offset_2 = offset_1; offset_1 = (U32)offset;   /* swap offset history */
-                ZSTD_storeSeq(seqStore, 0, anchor, iend, 0, matchLength-MINMATCH);
+                offcode = offset_2; offset_2 = offset_1; offset_1 = (U32)offcode;   /* swap offset history */
+                ZSTD_storeSeq(seqStore, 0, anchor, iend, STORE_REPCODE_1, matchLength);
                 ip += matchLength;
                 anchor = ip;
                 continue;   /* faster when present ... (?) */
@@ -1412,3 +2077,26 @@ size_t ZSTD_compressBlock_btlazy2_extDict(
 {
     return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_binaryTree, 2);
 }
+
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 0);
+}
+
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 1);
+}
+
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize)
+
+{
+    return ZSTD_compressBlock_lazy_extDict_generic(ms, seqStore, rep, src, srcSize, search_rowHash, 2);
+}
diff --git a/lib/zstd/compress/zstd_lazy.h b/lib/zstd/compress/zstd_lazy.h
index 2fc5a61821344..e5bdf4df8dde0 100644
--- a/lib/zstd/compress/zstd_lazy.h
+++ b/lib/zstd/compress/zstd_lazy.h
@@ -23,6 +23,7 @@
 #define ZSTD_LAZY_DDSS_BUCKET_LOG 2
 
 U32 ZSTD_insertAndFindFirstIndex(ZSTD_matchState_t* ms, const BYTE* ip);
+void ZSTD_row_update(ZSTD_matchState_t* const ms, const BYTE* ip);
 
 void ZSTD_dedicatedDictSearch_lazy_loadDictionary(ZSTD_matchState_t* ms, const BYTE* const ip);
 
@@ -40,6 +41,15 @@ size_t ZSTD_compressBlock_lazy(
 size_t ZSTD_compressBlock_greedy(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_btlazy2_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -53,6 +63,15 @@ size_t ZSTD_compressBlock_lazy_dictMatchState(
 size_t ZSTD_compressBlock_greedy_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dictMatchState_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -63,6 +82,15 @@ size_t ZSTD_compressBlock_lazy_dedicatedDictSearch(
 size_t ZSTD_compressBlock_greedy_dedicatedDictSearch(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_dedicatedDictSearch_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 
 size_t ZSTD_compressBlock_greedy_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
@@ -73,9 +101,19 @@ size_t ZSTD_compressBlock_lazy_extDict(
 size_t ZSTD_compressBlock_lazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_greedy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
+size_t ZSTD_compressBlock_lazy2_extDict_row(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        void const* src, size_t srcSize);
 size_t ZSTD_compressBlock_btlazy2_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         void const* src, size_t srcSize);
+        
 
 
 #endif /* ZSTD_LAZY_H */
diff --git a/lib/zstd/compress/zstd_ldm.c b/lib/zstd/compress/zstd_ldm.c
index 8ef7e88a5adde..dd86fc83e7dde 100644
--- a/lib/zstd/compress/zstd_ldm.c
+++ b/lib/zstd/compress/zstd_ldm.c
@@ -57,6 +57,33 @@ static void ZSTD_ldm_gear_init(ldmRollingHashState_t* state, ldmParams_t const*
     }
 }
 
+/* ZSTD_ldm_gear_reset()
+ * Feeds [data, data + minMatchLength) into the hash without registering any
+ * splits. This effectively resets the hash state. This is used when skipping
+ * over data, either at the beginning of a block, or skipping sections.
+ */
+static void ZSTD_ldm_gear_reset(ldmRollingHashState_t* state,
+                                BYTE const* data, size_t minMatchLength)
+{
+    U64 hash = state->rolling;
+    size_t n = 0;
+
+#define GEAR_ITER_ONCE() do {                                  \
+        hash = (hash << 1) + ZSTD_ldm_gearTab[data[n] & 0xff]; \
+        n += 1;                                                \
+    } while (0)
+    while (n + 3 < minMatchLength) {
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+        GEAR_ITER_ONCE();
+    }
+    while (n < minMatchLength) {
+        GEAR_ITER_ONCE();
+    }
+#undef GEAR_ITER_ONCE
+}
+
 /* ZSTD_ldm_gear_feed():
  *
  * Registers in the splits array all the split points found in the first
@@ -132,12 +159,12 @@ size_t ZSTD_ldm_getTableSize(ldmParams_t params)
     size_t const ldmBucketSize = ((size_t)1) << (params.hashLog - ldmBucketSizeLog);
     size_t const totalSize = ZSTD_cwksp_alloc_size(ldmBucketSize)
                            + ZSTD_cwksp_alloc_size(ldmHSize * sizeof(ldmEntry_t));
-    return params.enableLdm ? totalSize : 0;
+    return params.enableLdm == ZSTD_ps_enable ? totalSize : 0;
 }
 
 size_t ZSTD_ldm_getMaxNbSeq(ldmParams_t params, size_t maxChunkSize)
 {
-    return params.enableLdm ? (maxChunkSize / params.minMatchLength) : 0;
+    return params.enableLdm == ZSTD_ps_enable ? (maxChunkSize / params.minMatchLength) : 0;
 }
 
 /* ZSTD_ldm_getBucket() :
@@ -255,7 +282,7 @@ void ZSTD_ldm_fillHashTable(
     while (ip < iend) {
         size_t hashed;
         unsigned n;
-        
+
         numSplits = 0;
         hashed = ZSTD_ldm_gear_feed(&hashState, ip, iend - ip, splits, &numSplits);
 
@@ -327,16 +354,8 @@ static size_t ZSTD_ldm_generateSequences_internal(
 
     /* Initialize the rolling hash state with the first minMatchLength bytes */
     ZSTD_ldm_gear_init(&hashState, params);
-    {
-        size_t n = 0;
-
-        while (n < minMatchLength) {
-            numSplits = 0;
-            n += ZSTD_ldm_gear_feed(&hashState, ip + n, minMatchLength - n,
-                                    splits, &numSplits);
-        }
-        ip += minMatchLength;
-    }
+    ZSTD_ldm_gear_reset(&hashState, ip, minMatchLength);
+    ip += minMatchLength;
 
     while (ip < ilimit) {
         size_t hashed;
@@ -361,6 +380,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
         for (n = 0; n < numSplits; n++) {
             size_t forwardMatchLength = 0, backwardMatchLength = 0,
                    bestMatchLength = 0, mLength;
+            U32 offset;
             BYTE const* const split = candidates[n].split;
             U32 const checksum = candidates[n].checksum;
             U32 const hash = candidates[n].hash;
@@ -428,9 +448,9 @@ static size_t ZSTD_ldm_generateSequences_internal(
             }
 
             /* Match found */
+            offset = (U32)(split - base) - bestEntry->offset;
             mLength = forwardMatchLength + backwardMatchLength;
             {
-                U32 const offset = (U32)(split - base) - bestEntry->offset;
                 rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
 
                 /* Out of sequence storage */
@@ -447,6 +467,21 @@ static size_t ZSTD_ldm_generateSequences_internal(
             ZSTD_ldm_insertEntry(ldmState, hash, newEntry, *params);
 
             anchor = split + forwardMatchLength;
+
+            /* If we find a match that ends after the data that we've hashed
+             * then we have a repeating, overlapping, pattern. E.g. all zeros.
+             * If one repetition of the pattern matches our `stopMask` then all
+             * repetitions will. We don't need to insert them all into out table,
+             * only the first one. So skip over overlapping matches.
+             * This is a major speed boost (20x) for compressing a single byte
+             * repeated, when that byte ends up in the table.
+             */
+            if (anchor > ip + hashed) {
+                ZSTD_ldm_gear_reset(&hashState, anchor - minMatchLength, minMatchLength);
+                /* Continue the outer loop at anchor (ip + hashed == anchor). */
+                ip = anchor - hashed;
+                break;
+            }
         }
 
         ip += hashed;
@@ -500,7 +535,7 @@ size_t ZSTD_ldm_generateSequences(
 
         assert(chunkStart < iend);
         /* 1. Perform overflow correction if necessary. */
-        if (ZSTD_window_needOverflowCorrection(ldmState->window, chunkEnd)) {
+        if (ZSTD_window_needOverflowCorrection(ldmState->window, 0, maxDist, ldmState->loadedDictEnd, chunkStart, chunkEnd)) {
             U32 const ldmHSize = 1U << params->hashLog;
             U32 const correction = ZSTD_window_correctOverflow(
                 &ldmState->window, /* cycleLog */ 0, maxDist, chunkStart);
@@ -544,7 +579,9 @@ size_t ZSTD_ldm_generateSequences(
     return 0;
 }
 
-void ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch) {
+void
+ZSTD_ldm_skipSequences(rawSeqStore_t* rawSeqStore, size_t srcSize, U32 const minMatch)
+{
     while (srcSize > 0 && rawSeqStore->pos < rawSeqStore->size) {
         rawSeq* seq = rawSeqStore->seq + rawSeqStore->pos;
         if (srcSize <= seq->litLength) {
@@ -622,12 +659,13 @@ void ZSTD_ldm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
 
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
     ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+    ZSTD_paramSwitch_e useRowMatchFinder,
     void const* src, size_t srcSize)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
     unsigned const minMatch = cParams->minMatch;
     ZSTD_blockCompressor const blockCompressor =
-        ZSTD_selectBlockCompressor(cParams->strategy, ZSTD_matchState_dictMode(ms));
+        ZSTD_selectBlockCompressor(cParams->strategy, useRowMatchFinder, ZSTD_matchState_dictMode(ms));
     /* Input bounds */
     BYTE const* const istart = (BYTE const*)src;
     BYTE const* const iend = istart + srcSize;
@@ -673,8 +711,8 @@ size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             rep[0] = sequence.offset;
             /* Store the sequence */
             ZSTD_storeSeq(seqStore, newLitLength, ip - newLitLength, iend,
-                          sequence.offset + ZSTD_REP_MOVE,
-                          sequence.matchLength - MINMATCH);
+                          STORE_OFFSET(sequence.offset),
+                          sequence.matchLength);
             ip += sequence.matchLength;
         }
     }
diff --git a/lib/zstd/compress/zstd_ldm.h b/lib/zstd/compress/zstd_ldm.h
index 25b25270b72e2..fbc6a5e88fd7a 100644
--- a/lib/zstd/compress/zstd_ldm.h
+++ b/lib/zstd/compress/zstd_ldm.h
@@ -63,6 +63,7 @@ size_t ZSTD_ldm_generateSequences(
  */
 size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
             ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+            ZSTD_paramSwitch_e useRowMatchFinder,
             void const* src, size_t srcSize);
 
 /*
diff --git a/lib/zstd/compress/zstd_ldm_geartab.h b/lib/zstd/compress/zstd_ldm_geartab.h
index e5c24d856b0a8..647f865be2903 100644
--- a/lib/zstd/compress/zstd_ldm_geartab.h
+++ b/lib/zstd/compress/zstd_ldm_geartab.h
@@ -11,7 +11,10 @@
 #ifndef ZSTD_LDM_GEARTAB_H
 #define ZSTD_LDM_GEARTAB_H
 
-static U64 ZSTD_ldm_gearTab[256] = {
+#include "../common/compiler.h" /* UNUSED_ATTR */
+#include "../common/mem.h"      /* U64 */
+
+static UNUSED_ATTR const U64 ZSTD_ldm_gearTab[256] = {
     0xf5b8f72c5f77775c, 0x84935f266b7ac412, 0xb647ada9ca730ccc,
     0xb065bb4b114fb1de, 0x34584e7e8c3a9fd0, 0x4e97e17c6ae26b05,
     0x3a03d743bc99a604, 0xcecd042422c4044f, 0x76de76c58524259e,
diff --git a/lib/zstd/compress/zstd_opt.c b/lib/zstd/compress/zstd_opt.c
index dfc55e3e8119b..fd82acfda62f6 100644
--- a/lib/zstd/compress/zstd_opt.c
+++ b/lib/zstd/compress/zstd_opt.c
@@ -8,25 +8,12 @@
  * You may select, at your option, one of the above-listed licenses.
  */
 
-/*
- * Disable inlining for the optimal parser for the kernel build.
- * It is unlikely to be used in the kernel, and where it is used
- * latency shouldn't matter because it is very slow to begin with.
- * We prefer a ~180KB binary size win over faster optimal parsing.
- *
- * TODO(https://github.com/facebook/zstd/issues/2862):
- * Improve the code size of the optimal parser in general, so we
- * don't need this hack for the kernel build.
- */
-#define ZSTD_NO_INLINE 1
-
 #include "zstd_compress_internal.h"
 #include "hist.h"
 #include "zstd_opt.h"
 
 
 #define ZSTD_LITFREQ_ADD    2   /* scaling factor for litFreq, so that frequencies adapt faster to new stats */
-#define ZSTD_FREQ_DIV       4   /* log factor when using previous stats to init next stats */
 #define ZSTD_MAX_PRICE     (1<<30)
 
 #define ZSTD_PREDEF_THRESHOLD 1024   /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */
@@ -36,11 +23,11 @@
 *  Price functions for optimal parser
 ***************************************/
 
-#if 0    /* approximation at bit level */
+#if 0    /* approximation at bit level (for tests) */
 #  define BITCOST_ACCURACY 0
 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
-#  define WEIGHT(stat)  ((void)opt, ZSTD_bitWeight(stat))
-#elif 0  /* fractional bit accuracy */
+#  define WEIGHT(stat, opt) ((void)opt, ZSTD_bitWeight(stat))
+#elif 0  /* fractional bit accuracy (for tests) */
 #  define BITCOST_ACCURACY 8
 #  define BITCOST_MULTIPLIER (1 << BITCOST_ACCURACY)
 #  define WEIGHT(stat,opt) ((void)opt, ZSTD_fracWeight(stat))
@@ -78,7 +65,7 @@ MEM_STATIC double ZSTD_fCost(U32 price)
 
 static int ZSTD_compressedLiterals(optState_t const* const optPtr)
 {
-    return optPtr->literalCompressionMode != ZSTD_lcm_uncompressed;
+    return optPtr->literalCompressionMode != ZSTD_ps_disable;
 }
 
 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
@@ -91,25 +78,46 @@ static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel)
 }
 
 
-/* ZSTD_downscaleStat() :
- * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus)
- * return the resulting sum of elements */
-static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus)
+static U32 sum_u32(const unsigned table[], size_t nbElts)
+{
+    size_t n;
+    U32 total = 0;
+    for (n=0; n<nbElts; n++) {
+        total += table[n];
+    }
+    return total;
+}
+
+static U32 ZSTD_downscaleStats(unsigned* table, U32 lastEltIndex, U32 shift)
 {
     U32 s, sum=0;
-    DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1);
-    assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31);
+    DEBUGLOG(5, "ZSTD_downscaleStats (nbElts=%u, shift=%u)", (unsigned)lastEltIndex+1, (unsigned)shift);
+    assert(shift < 30);
     for (s=0; s<lastEltIndex+1; s++) {
-        table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus));
+        table[s] = 1 + (table[s] >> shift);
         sum += table[s];
     }
     return sum;
 }
 
+/* ZSTD_scaleStats() :
+ * reduce all elements in table is sum too large
+ * return the resulting sum of elements */
+static U32 ZSTD_scaleStats(unsigned* table, U32 lastEltIndex, U32 logTarget)
+{
+    U32 const prevsum = sum_u32(table, lastEltIndex+1);
+    U32 const factor = prevsum >> logTarget;
+    DEBUGLOG(5, "ZSTD_scaleStats (nbElts=%u, target=%u)", (unsigned)lastEltIndex+1, (unsigned)logTarget);
+    assert(logTarget < 30);
+    if (factor <= 1) return prevsum;
+    return ZSTD_downscaleStats(table, lastEltIndex, ZSTD_highbit32(factor));
+}
+
 /* ZSTD_rescaleFreqs() :
  * if first block (detected by optPtr->litLengthSum == 0) : init statistics
  *    take hints from dictionary if there is one
- *    or init from zero, using src for literals stats, or flat 1 for match symbols
+ *    and init from zero if there is none,
+ *    using src for literals stats, and baseline stats for sequence symbols
  * otherwise downscale existing stats, to be used as seed for next block.
  */
 static void
@@ -138,7 +146,7 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
                 optPtr->litSum = 0;
                 for (lit=0; lit<=MaxLit; lit++) {
                     U32 const scaleLog = 11;   /* scale to 2K */
-                    U32 const bitCost = HUF_getNbBits(optPtr->symbolCosts->huf.CTable, lit);
+                    U32 const bitCost = HUF_getNbBitsFromCTable(optPtr->symbolCosts->huf.CTable, lit);
                     assert(bitCost <= scaleLog);
                     optPtr->litFreq[lit] = bitCost ? 1 << (scaleLog-bitCost) : 1 /*minimum to calculate cost*/;
                     optPtr->litSum += optPtr->litFreq[lit];
@@ -186,14 +194,19 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
             if (compressedLiterals) {
                 unsigned lit = MaxLit;
                 HIST_count_simple(optPtr->litFreq, &lit, src, srcSize);   /* use raw first block to init statistics */
-                optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
+                optPtr->litSum = ZSTD_downscaleStats(optPtr->litFreq, MaxLit, 8);
             }
 
-            {   unsigned ll;
-                for (ll=0; ll<=MaxLL; ll++)
-                    optPtr->litLengthFreq[ll] = 1;
+            {   unsigned const baseLLfreqs[MaxLL+1] = {
+                    4, 2, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1
+                };
+                ZSTD_memcpy(optPtr->litLengthFreq, baseLLfreqs, sizeof(baseLLfreqs));
+                optPtr->litLengthSum = sum_u32(baseLLfreqs, MaxLL+1);
             }
-            optPtr->litLengthSum = MaxLL+1;
 
             {   unsigned ml;
                 for (ml=0; ml<=MaxML; ml++)
@@ -201,21 +214,26 @@ ZSTD_rescaleFreqs(optState_t* const optPtr,
             }
             optPtr->matchLengthSum = MaxML+1;
 
-            {   unsigned of;
-                for (of=0; of<=MaxOff; of++)
-                    optPtr->offCodeFreq[of] = 1;
+            {   unsigned const baseOFCfreqs[MaxOff+1] = {
+                    6, 2, 1, 1, 2, 3, 4, 4,
+                    4, 3, 2, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1,
+                    1, 1, 1, 1, 1, 1, 1, 1
+                };
+                ZSTD_memcpy(optPtr->offCodeFreq, baseOFCfreqs, sizeof(baseOFCfreqs));
+                optPtr->offCodeSum = sum_u32(baseOFCfreqs, MaxOff+1);
             }
-            optPtr->offCodeSum = MaxOff+1;
+
 
         }
 
     } else {   /* new block : re-use previous statistics, scaled down */
 
         if (compressedLiterals)
-            optPtr->litSum = ZSTD_downscaleStat(optPtr->litFreq, MaxLit, 1);
-        optPtr->litLengthSum = ZSTD_downscaleStat(optPtr->litLengthFreq, MaxLL, 0);
-        optPtr->matchLengthSum = ZSTD_downscaleStat(optPtr->matchLengthFreq, MaxML, 0);
-        optPtr->offCodeSum = ZSTD_downscaleStat(optPtr->offCodeFreq, MaxOff, 0);
+            optPtr->litSum = ZSTD_scaleStats(optPtr->litFreq, MaxLit, 12);
+        optPtr->litLengthSum = ZSTD_scaleStats(optPtr->litLengthFreq, MaxLL, 11);
+        optPtr->matchLengthSum = ZSTD_scaleStats(optPtr->matchLengthFreq, MaxML, 11);
+        optPtr->offCodeSum = ZSTD_scaleStats(optPtr->offCodeFreq, MaxOff, 11);
     }
 
     ZSTD_setBasePrices(optPtr, optLevel);
@@ -251,7 +269,16 @@ static U32 ZSTD_rawLiteralsCost(const BYTE* const literals, U32 const litLength,
  * cost of literalLength symbol */
 static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optPtr, int optLevel)
 {
-    if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel);
+    assert(litLength <= ZSTD_BLOCKSIZE_MAX);
+    if (optPtr->priceType == zop_predef)
+        return WEIGHT(litLength, optLevel);
+    /* We can't compute the litLength price for sizes >= ZSTD_BLOCKSIZE_MAX
+     * because it isn't representable in the zstd format. So instead just
+     * call it 1 bit more than ZSTD_BLOCKSIZE_MAX - 1. In this case the block
+     * would be all literals.
+     */
+    if (litLength == ZSTD_BLOCKSIZE_MAX)
+        return BITCOST_MULTIPLIER + ZSTD_litLengthPrice(ZSTD_BLOCKSIZE_MAX - 1, optPtr, optLevel);
 
     /* dynamic statistics */
     {   U32 const llCode = ZSTD_LLcode(litLength);
@@ -264,15 +291,17 @@ static U32 ZSTD_litLengthPrice(U32 const litLength, const optState_t* const optP
 /* ZSTD_getMatchPrice() :
  * Provides the cost of the match part (offset + matchLength) of a sequence
  * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence.
- * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */
+ * @offcode : expects a scale where 0,1,2 are repcodes 1-3, and 3+ are real_offsets+2
+ * @optLevel: when <2, favors small offset for decompression speed (improved cache efficiency)
+ */
 FORCE_INLINE_TEMPLATE U32
-ZSTD_getMatchPrice(U32 const offset,
+ZSTD_getMatchPrice(U32 const offcode,
                    U32 const matchLength,
              const optState_t* const optPtr,
                    int const optLevel)
 {
     U32 price;
-    U32 const offCode = ZSTD_highbit32(offset+1);
+    U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offcode));
     U32 const mlBase = matchLength - MINMATCH;
     assert(matchLength >= MINMATCH);
 
@@ -315,8 +344,8 @@ static void ZSTD_updateStats(optState_t* const optPtr,
         optPtr->litLengthSum++;
     }
 
-    /* match offset code (0-2=>repCode; 3+=>offset+2) */
-    {   U32 const offCode = ZSTD_highbit32(offsetCode+1);
+    /* offset code : expected to follow storeSeq() numeric representation */
+    {   U32 const offCode = ZSTD_highbit32(STORED_TO_OFFBASE(offsetCode));
         assert(offCode <= MaxOff);
         optPtr->offCodeFreq[offCode]++;
         optPtr->offCodeSum++;
@@ -350,7 +379,7 @@ MEM_STATIC U32 ZSTD_readMINMATCH(const void* memPtr, U32 length)
 
 /* Update hashTable3 up to ip (excluded)
    Assumption : always within prefix (i.e. not within extDict) */
-static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
+static U32 ZSTD_insertAndFindFirstIndexHash3 (const ZSTD_matchState_t* ms,
                                               U32* nextToUpdate3,
                                               const BYTE* const ip)
 {
@@ -376,11 +405,13 @@ static U32 ZSTD_insertAndFindFirstIndexHash3 (ZSTD_matchState_t* ms,
 *  Binary Tree search
 ***************************************/
 /* ZSTD_insertBt1() : add one or multiple positions to tree.
- *  ip : assumed <= iend-8 .
+ * @param ip assumed <= iend-8 .
+ * @param target The target of ZSTD_updateTree_internal() - we are filling to this position
  * @return : nb of positions added */
 static U32 ZSTD_insertBt1(
-                ZSTD_matchState_t* ms,
+                const ZSTD_matchState_t* ms,
                 const BYTE* const ip, const BYTE* const iend,
+                U32 const target,
                 U32 const mls, const int extDict)
 {
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
@@ -403,7 +434,10 @@ static U32 ZSTD_insertBt1(
     U32* smallerPtr = bt + 2*(curr&btMask);
     U32* largerPtr  = smallerPtr + 1;
     U32 dummy32;   /* to be nullified at the end */
-    U32 const windowLow = ms->window.lowLimit;
+    /* windowLow is based on target because
+     * we only need positions that will be in the window at the end of the tree update.
+     */
+    U32 const windowLow = ZSTD_getLowestMatchIndex(ms, target, cParams->windowLog);
     U32 matchEndIdx = curr+8+1;
     size_t bestLength = 8;
     U32 nbCompares = 1U << cParams->searchLog;
@@ -416,6 +450,7 @@ static U32 ZSTD_insertBt1(
 
     DEBUGLOG(8, "ZSTD_insertBt1 (%u)", curr);
 
+    assert(curr <= target);
     assert(ip <= iend-8);   /* required for h calculation */
     hashTable[h] = curr;   /* Update Hash Table */
 
@@ -504,7 +539,7 @@ void ZSTD_updateTree_internal(
                 idx, target, dictMode);
 
     while(idx < target) {
-        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict);
+        U32 const forward = ZSTD_insertBt1(ms, base+idx, iend, target, mls, dictMode == ZSTD_extDict);
         assert(idx < (U32)(idx + forward));
         idx += forward;
     }
@@ -609,7 +644,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 DEBUGLOG(8, "found repCode %u (ll0:%u, offset:%u) of length %u",
                             repCode, ll0, repOffset, repLen);
                 bestLength = repLen;
-                matches[mnum].off = repCode - ll0;
+                matches[mnum].off = STORE_REPCODE(repCode - ll0 + 1);  /* expect value between 1 and 3 */
                 matches[mnum].len = (U32)repLen;
                 mnum++;
                 if ( (repLen > sufficient_len)
@@ -638,7 +673,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 bestLength = mlen;
                 assert(curr > matchIndex3);
                 assert(mnum==0);  /* no prior solution */
-                matches[0].off = (curr - matchIndex3) + ZSTD_REP_MOVE;
+                matches[0].off = STORE_OFFSET(curr - matchIndex3);
                 matches[0].len = (U32)mlen;
                 mnum = 1;
                 if ( (mlen > sufficient_len) |
@@ -647,7 +682,7 @@ U32 ZSTD_insertBtAndGetAllMatches (
                     return 1;
         }   }   }
         /* no dictMatchState lookup: dicts don't have a populated HC3 table */
-    }
+    }  /* if (mls == 3) */
 
     hashTable[h] = curr;   /* Update Hash Table */
 
@@ -672,20 +707,19 @@ U32 ZSTD_insertBtAndGetAllMatches (
 
         if (matchLength > bestLength) {
             DEBUGLOG(8, "found match of length %u at distance %u (offCode=%u)",
-                    (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                    (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
             assert(matchEndIdx > matchIndex);
             if (matchLength > matchEndIdx - matchIndex)
                 matchEndIdx = matchIndex + (U32)matchLength;
             bestLength = matchLength;
-            matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+            matches[mnum].off = STORE_OFFSET(curr - matchIndex);
             matches[mnum].len = (U32)matchLength;
             mnum++;
             if ( (matchLength > ZSTD_OPT_NUM)
                | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
                 if (dictMode == ZSTD_dictMatchState) nbCompares = 0; /* break should also skip searching dms */
                 break; /* drop, to preserve bt consistency (miss a little bit of compression) */
-            }
-        }
+        }   }
 
         if (match[matchLength] < ip[matchLength]) {
             /* match smaller than current */
@@ -721,18 +755,17 @@ U32 ZSTD_insertBtAndGetAllMatches (
             if (matchLength > bestLength) {
                 matchIndex = dictMatchIndex + dmsIndexDelta;
                 DEBUGLOG(8, "found dms match of length %u at distance %u (offCode=%u)",
-                        (U32)matchLength, curr - matchIndex, curr - matchIndex + ZSTD_REP_MOVE);
+                        (U32)matchLength, curr - matchIndex, STORE_OFFSET(curr - matchIndex));
                 if (matchLength > matchEndIdx - matchIndex)
                     matchEndIdx = matchIndex + (U32)matchLength;
                 bestLength = matchLength;
-                matches[mnum].off = (curr - matchIndex) + ZSTD_REP_MOVE;
+                matches[mnum].off = STORE_OFFSET(curr - matchIndex);
                 matches[mnum].len = (U32)matchLength;
                 mnum++;
                 if ( (matchLength > ZSTD_OPT_NUM)
                    | (ip+matchLength == iLimit) /* equal : no way to know if inf or sup */) {
                     break;   /* drop, to guarantee consistency (miss a little bit of compression) */
-                }
-            }
+            }   }
 
             if (dictMatchIndex <= dmsBtLow) { break; }   /* beyond tree size, stop the search */
             if (match[matchLength] < ip[matchLength]) {
@@ -742,39 +775,91 @@ U32 ZSTD_insertBtAndGetAllMatches (
                 /* match is larger than current */
                 commonLengthLarger = matchLength;
                 dictMatchIndex = nextPtr[0];
-            }
-        }
-    }
+    }   }   }  /* if (dictMode == ZSTD_dictMatchState) */
 
     assert(matchEndIdx > curr+8);
     ms->nextToUpdate = matchEndIdx - 8;  /* skip repetitive patterns */
     return mnum;
 }
 
-
-FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
-                        ZSTD_match_t* matches,   /* store result (match found, increasing size) in this table */
-                        ZSTD_matchState_t* ms,
-                        U32* nextToUpdate3,
-                        const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode,
-                        const U32 rep[ZSTD_REP_NUM],
-                        U32 const ll0,
-                        U32 const lengthToBeat)
+typedef U32 (*ZSTD_getAllMatchesFn)(
+    ZSTD_match_t*,
+    ZSTD_matchState_t*,
+    U32*,
+    const BYTE*,
+    const BYTE*,
+    const U32 rep[ZSTD_REP_NUM],
+    U32 const ll0,
+    U32 const lengthToBeat);
+
+FORCE_INLINE_TEMPLATE U32 ZSTD_btGetAllMatches_internal(
+        ZSTD_match_t* matches,
+        ZSTD_matchState_t* ms,
+        U32* nextToUpdate3,
+        const BYTE* ip,
+        const BYTE* const iHighLimit,
+        const U32 rep[ZSTD_REP_NUM],
+        U32 const ll0,
+        U32 const lengthToBeat,
+        const ZSTD_dictMode_e dictMode,
+        const U32 mls)
 {
-    const ZSTD_compressionParameters* const cParams = &ms->cParams;
-    U32 const matchLengthSearch = cParams->minMatch;
-    DEBUGLOG(8, "ZSTD_BtGetAllMatches");
-    if (ip < ms->window.base + ms->nextToUpdate) return 0;   /* skipped area */
-    ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode);
-    switch(matchLengthSearch)
-    {
-    case 3 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 3);
-    default :
-    case 4 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 4);
-    case 5 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 5);
-    case 7 :
-    case 6 : return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, 6);
+    assert(BOUNDED(3, ms->cParams.minMatch, 6) == mls);
+    DEBUGLOG(8, "ZSTD_BtGetAllMatches(dictMode=%d, mls=%u)", (int)dictMode, mls);
+    if (ip < ms->window.base + ms->nextToUpdate)
+        return 0;   /* skipped area */
+    ZSTD_updateTree_internal(ms, ip, iHighLimit, mls, dictMode);
+    return ZSTD_insertBtAndGetAllMatches(matches, ms, nextToUpdate3, ip, iHighLimit, dictMode, rep, ll0, lengthToBeat, mls);
+}
+
+#define ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls) ZSTD_btGetAllMatches_##dictMode##_##mls
+
+#define GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, mls)            \
+    static U32 ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, mls)(      \
+            ZSTD_match_t* matches,                             \
+            ZSTD_matchState_t* ms,                             \
+            U32* nextToUpdate3,                                \
+            const BYTE* ip,                                    \
+            const BYTE* const iHighLimit,                      \
+            const U32 rep[ZSTD_REP_NUM],                       \
+            U32 const ll0,                                     \
+            U32 const lengthToBeat)                            \
+    {                                                          \
+        return ZSTD_btGetAllMatches_internal(                  \
+                matches, ms, nextToUpdate3, ip, iHighLimit,    \
+                rep, ll0, lengthToBeat, ZSTD_##dictMode, mls); \
+    }
+
+#define GEN_ZSTD_BT_GET_ALL_MATCHES(dictMode)  \
+    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 3)  \
+    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 4)  \
+    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 5)  \
+    GEN_ZSTD_BT_GET_ALL_MATCHES_(dictMode, 6)
+
+GEN_ZSTD_BT_GET_ALL_MATCHES(noDict)
+GEN_ZSTD_BT_GET_ALL_MATCHES(extDict)
+GEN_ZSTD_BT_GET_ALL_MATCHES(dictMatchState)
+
+#define ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMode)  \
+    {                                            \
+        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 3), \
+        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 4), \
+        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 5), \
+        ZSTD_BT_GET_ALL_MATCHES_FN(dictMode, 6)  \
     }
+
+static ZSTD_getAllMatchesFn
+ZSTD_selectBtGetAllMatches(ZSTD_matchState_t const* ms, ZSTD_dictMode_e const dictMode)
+{
+    ZSTD_getAllMatchesFn const getAllMatchesFns[3][4] = {
+        ZSTD_BT_GET_ALL_MATCHES_ARRAY(noDict),
+        ZSTD_BT_GET_ALL_MATCHES_ARRAY(extDict),
+        ZSTD_BT_GET_ALL_MATCHES_ARRAY(dictMatchState)
+    };
+    U32 const mls = BOUNDED(3, ms->cParams.minMatch, 6);
+    assert((U32)dictMode < 3);
+    assert(mls - 3 < 4);
+    return getAllMatchesFns[(int)dictMode][mls - 3];
 }
 
 /* ***********************
@@ -783,16 +868,18 @@ FORCE_INLINE_TEMPLATE U32 ZSTD_BtGetAllMatches (
 
 /* Struct containing info needed to make decision about ldm inclusion */
 typedef struct {
-    rawSeqStore_t seqStore;         /* External match candidates store for this block */
-    U32 startPosInBlock;            /* Start position of the current match candidate */
-    U32 endPosInBlock;              /* End position of the current match candidate */
-    U32 offset;                     /* Offset of the match candidate */
+    rawSeqStore_t seqStore;   /* External match candidates store for this block */
+    U32 startPosInBlock;      /* Start position of the current match candidate */
+    U32 endPosInBlock;        /* End position of the current match candidate */
+    U32 offset;               /* Offset of the match candidate */
 } ZSTD_optLdm_t;
 
 /* ZSTD_optLdm_skipRawSeqStoreBytes():
- * Moves forward in rawSeqStore by nbBytes, which will update the fields 'pos' and 'posInSequence'.
+ * Moves forward in @rawSeqStore by @nbBytes,
+ * which will update the fields 'pos' and 'posInSequence'.
  */
-static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes) {
+static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t nbBytes)
+{
     U32 currPos = (U32)(rawSeqStore->posInSequence + nbBytes);
     while (currPos && rawSeqStore->pos < rawSeqStore->size) {
         rawSeq currSeq = rawSeqStore->seq[rawSeqStore->pos];
@@ -813,8 +900,10 @@ static void ZSTD_optLdm_skipRawSeqStoreBytes(rawSeqStore_t* rawSeqStore, size_t
  * Calculates the beginning and end of the next match in the current block.
  * Updates 'pos' and 'posInSequence' of the ldmSeqStore.
  */
-static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
-                                                   U32 blockBytesRemaining) {
+static void
+ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 currPosInBlock,
+                                       U32 blockBytesRemaining)
+{
     rawSeq currSeq;
     U32 currBlockEndPos;
     U32 literalsBytesRemaining;
@@ -826,8 +915,8 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
         optLdm->endPosInBlock = UINT_MAX;
         return;
     }
-    /* Calculate appropriate bytes left in matchLength and litLength after adjusting
-       based on ldmSeqStore->posInSequence */
+    /* Calculate appropriate bytes left in matchLength and litLength
+     * after adjusting based on ldmSeqStore->posInSequence */
     currSeq = optLdm->seqStore.seq[optLdm->seqStore.pos];
     assert(optLdm->seqStore.posInSequence <= currSeq.litLength + currSeq.matchLength);
     currBlockEndPos = currPosInBlock + blockBytesRemaining;
@@ -863,15 +952,16 @@ static void ZSTD_opt_getNextMatchAndUpdateSeqStore(ZSTD_optLdm_t* optLdm, U32 cu
 }
 
 /* ZSTD_optLdm_maybeAddMatch():
- * Adds a match if it's long enough, based on it's 'matchStartPosInBlock'
- * and 'matchEndPosInBlock', into 'matches'. Maintains the correct ordering of 'matches'
+ * Adds a match if it's long enough,
+ * based on it's 'matchStartPosInBlock' and 'matchEndPosInBlock',
+ * into 'matches'. Maintains the correct ordering of 'matches'.
  */
 static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
-                                      ZSTD_optLdm_t* optLdm, U32 currPosInBlock) {
-    U32 posDiff = currPosInBlock - optLdm->startPosInBlock;
+                                      const ZSTD_optLdm_t* optLdm, U32 currPosInBlock)
+{
+    U32 const posDiff = currPosInBlock - optLdm->startPosInBlock;
     /* Note: ZSTD_match_t actually contains offCode and matchLength (before subtracting MINMATCH) */
-    U32 candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
-    U32 candidateOffCode = optLdm->offset + ZSTD_REP_MOVE;
+    U32 const candidateMatchLength = optLdm->endPosInBlock - optLdm->startPosInBlock - posDiff;
 
     /* Ensure that current block position is not outside of the match */
     if (currPosInBlock < optLdm->startPosInBlock
@@ -881,6 +971,7 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
     }
 
     if (*nbMatches == 0 || ((candidateMatchLength > matches[*nbMatches-1].len) && *nbMatches < ZSTD_OPT_NUM)) {
+        U32 const candidateOffCode = STORE_OFFSET(optLdm->offset);
         DEBUGLOG(6, "ZSTD_optLdm_maybeAddMatch(): Adding ldm candidate match (offCode: %u matchLength %u) at block position=%u",
                  candidateOffCode, candidateMatchLength, currPosInBlock);
         matches[*nbMatches].len = candidateMatchLength;
@@ -892,8 +983,11 @@ static void ZSTD_optLdm_maybeAddMatch(ZSTD_match_t* matches, U32* nbMatches,
 /* ZSTD_optLdm_processMatchCandidate():
  * Wrapper function to update ldm seq store and call ldm functions as necessary.
  */
-static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_t* matches, U32* nbMatches,
-                                              U32 currPosInBlock, U32 remainingBytes) {
+static void
+ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm,
+                                  ZSTD_match_t* matches, U32* nbMatches,
+                                  U32 currPosInBlock, U32 remainingBytes)
+{
     if (optLdm->seqStore.size == 0 || optLdm->seqStore.pos >= optLdm->seqStore.size) {
         return;
     }
@@ -904,19 +998,19 @@ static void ZSTD_optLdm_processMatchCandidate(ZSTD_optLdm_t* optLdm, ZSTD_match_
              * at the end of a match from the ldm seq store, and will often be some bytes
              * over beyond matchEndPosInBlock. As such, we need to correct for these "overshoots"
              */
-            U32 posOvershoot = currPosInBlock - optLdm->endPosInBlock;
+            U32 const posOvershoot = currPosInBlock - optLdm->endPosInBlock;
             ZSTD_optLdm_skipRawSeqStoreBytes(&optLdm->seqStore, posOvershoot);
-        } 
+        }
         ZSTD_opt_getNextMatchAndUpdateSeqStore(optLdm, currPosInBlock, remainingBytes);
     }
     ZSTD_optLdm_maybeAddMatch(matches, nbMatches, optLdm, currPosInBlock);
 }
 
+
 /*-*******************************
 *  Optimal parser
 *********************************/
 
-
 static U32 ZSTD_totalLen(ZSTD_optimal_t sol)
 {
     return sol.litlen + sol.mlen;
@@ -957,6 +1051,8 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
     const BYTE* const prefixStart = base + ms->window.dictLimit;
     const ZSTD_compressionParameters* const cParams = &ms->cParams;
 
+    ZSTD_getAllMatchesFn getAllMatches = ZSTD_selectBtGetAllMatches(ms, dictMode);
+
     U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1);
     U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4;
     U32 nextToUpdate3 = ms->nextToUpdate;
@@ -984,7 +1080,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
         /* find first match */
         {   U32 const litlen = (U32)(ip - anchor);
             U32 const ll0 = !litlen;
-            U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, ip, iend, dictMode, rep, ll0, minMatch);
+            U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, ip, iend, rep, ll0, minMatch);
             ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
                                               (U32)(ip-istart), (U32)(iend - ip));
             if (!nbMatches) { ip++; continue; }
@@ -998,18 +1094,18 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
              * in every price. We include the literal length to avoid negative
              * prices when we subtract the previous literal length.
              */
-            opt[0].price = ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
+            opt[0].price = (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel);
 
             /* large match -> immediate encoding */
             {   U32 const maxML = matches[nbMatches-1].len;
-                U32 const maxOffset = matches[nbMatches-1].off;
+                U32 const maxOffcode = matches[nbMatches-1].off;
                 DEBUGLOG(6, "found %u matches of maxLength=%u and maxOffCode=%u at cPos=%u => start new series",
-                            nbMatches, maxML, maxOffset, (U32)(ip-prefixStart));
+                            nbMatches, maxML, maxOffcode, (U32)(ip-prefixStart));
 
                 if (maxML > sufficient_len) {
                     lastSequence.litlen = litlen;
                     lastSequence.mlen = maxML;
-                    lastSequence.off = maxOffset;
+                    lastSequence.off = maxOffcode;
                     DEBUGLOG(6, "large match (%u>%u), immediate encoding",
                                 maxML, sufficient_len);
                     cur = 0;
@@ -1018,24 +1114,25 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             }   }
 
             /* set prices for first matches starting position == 0 */
-            {   U32 const literalsPrice = opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
+            assert(opt[0].price >= 0);
+            {   U32 const literalsPrice = (U32)opt[0].price + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
                 U32 pos;
                 U32 matchNb;
                 for (pos = 1; pos < minMatch; pos++) {
                     opt[pos].price = ZSTD_MAX_PRICE;   /* mlen, litlen and price will be fixed during forward scanning */
                 }
                 for (matchNb = 0; matchNb < nbMatches; matchNb++) {
-                    U32 const offset = matches[matchNb].off;
+                    U32 const offcode = matches[matchNb].off;
                     U32 const end = matches[matchNb].len;
                     for ( ; pos <= end ; pos++ ) {
-                        U32 const matchPrice = ZSTD_getMatchPrice(offset, pos, optStatePtr, optLevel);
+                        U32 const matchPrice = ZSTD_getMatchPrice(offcode, pos, optStatePtr, optLevel);
                         U32 const sequencePrice = literalsPrice + matchPrice;
                         DEBUGLOG(7, "rPos:%u => set initial price : %.2f",
                                     pos, ZSTD_fCost(sequencePrice));
                         opt[pos].mlen = pos;
-                        opt[pos].off = offset;
+                        opt[pos].off = offcode;
                         opt[pos].litlen = litlen;
-                        opt[pos].price = sequencePrice;
+                        opt[pos].price = (int)sequencePrice;
                 }   }
                 last_pos = pos-1;
             }
@@ -1050,9 +1147,9 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             /* Fix current position with one literal if cheaper */
             {   U32 const litlen = (opt[cur-1].mlen == 0) ? opt[cur-1].litlen + 1 : 1;
                 int const price = opt[cur-1].price
-                                + ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
-                                + ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
-                                - ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
+                                + (int)ZSTD_rawLiteralsCost(ip+cur-1, 1, optStatePtr, optLevel)
+                                + (int)ZSTD_litLengthPrice(litlen, optStatePtr, optLevel)
+                                - (int)ZSTD_litLengthPrice(litlen-1, optStatePtr, optLevel);
                 assert(price < 1000000000); /* overflow check */
                 if (price <= opt[cur].price) {
                     DEBUGLOG(7, "cPos:%zi==rPos:%u : better price (%.2f<=%.2f) using literal (ll==%u) (hist:%u,%u,%u)",
@@ -1078,7 +1175,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
             assert(cur >= opt[cur].mlen);
             if (opt[cur].mlen != 0) {
                 U32 const prev = cur - opt[cur].mlen;
-                repcodes_t newReps = ZSTD_updateRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
+                repcodes_t const newReps = ZSTD_newRep(opt[prev].rep, opt[cur].off, opt[cur].litlen==0);
                 ZSTD_memcpy(opt[cur].rep, &newReps, sizeof(repcodes_t));
             } else {
                 ZSTD_memcpy(opt[cur].rep, opt[cur - 1].rep, sizeof(repcodes_t));
@@ -1095,11 +1192,12 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
                 continue;  /* skip unpromising positions; about ~+6% speed, -0.01 ratio */
             }
 
+            assert(opt[cur].price >= 0);
             {   U32 const ll0 = (opt[cur].mlen != 0);
                 U32 const litlen = (opt[cur].mlen == 0) ? opt[cur].litlen : 0;
-                U32 const previousPrice = opt[cur].price;
+                U32 const previousPrice = (U32)opt[cur].price;
                 U32 const basePrice = previousPrice + ZSTD_litLengthPrice(0, optStatePtr, optLevel);
-                U32 nbMatches = ZSTD_BtGetAllMatches(matches, ms, &nextToUpdate3, inr, iend, dictMode, opt[cur].rep, ll0, minMatch);
+                U32 nbMatches = getAllMatches(matches, ms, &nextToUpdate3, inr, iend, opt[cur].rep, ll0, minMatch);
                 U32 matchNb;
 
                 ZSTD_optLdm_processMatchCandidate(&optLdm, matches, &nbMatches,
@@ -1137,7 +1235,7 @@ ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms,
 
                     for (mlen = lastML; mlen >= startML; mlen--) {  /* scan downward */
                         U32 const pos = cur + mlen;
-                        int const price = basePrice + ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
+                        int const price = (int)basePrice + (int)ZSTD_getMatchPrice(offset, mlen, optStatePtr, optLevel);
 
                         if ((pos > last_pos) || (price < opt[pos].price)) {
                             DEBUGLOG(7, "rPos:%u (ml=%2u) => new better price (%.2f<%.2f)",
@@ -1167,7 +1265,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
          * update them while traversing the sequences.
          */
         if (lastSequence.mlen != 0) {
-            repcodes_t reps = ZSTD_updateRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
+            repcodes_t const reps = ZSTD_newRep(opt[cur].rep, lastSequence.off, lastSequence.litlen==0);
             ZSTD_memcpy(rep, &reps, sizeof(reps));
         } else {
             ZSTD_memcpy(rep, opt[cur].rep, sizeof(repcodes_t));
@@ -1211,7 +1309,7 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
 
                     assert(anchor + llen <= iend);
                     ZSTD_updateStats(optStatePtr, llen, anchor, offCode, mlen);
-                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen-MINMATCH);
+                    ZSTD_storeSeq(seqStore, llen, anchor, iend, offCode, mlen);
                     anchor += advance;
                     ip = anchor;
             }   }
@@ -1223,38 +1321,30 @@ _shortestPath:   /* cur, last_pos, best_mlen, best_off have to be set */
     return (size_t)(iend - anchor);
 }
 
+static size_t ZSTD_compressBlock_opt0(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /* optLevel */, dictMode);
+}
+
+static size_t ZSTD_compressBlock_opt2(
+        ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
+        const void* src, size_t srcSize, const ZSTD_dictMode_e dictMode)
+{
+    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /* optLevel */, dictMode);
+}
 
 size_t ZSTD_compressBlock_btopt(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
     DEBUGLOG(5, "ZSTD_compressBlock_btopt");
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict);
+    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
 
 
-/* used in 2-pass strategy */
-static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus)
-{
-    U32 s, sum=0;
-    assert(ZSTD_FREQ_DIV+bonus >= 0);
-    for (s=0; s<lastEltIndex+1; s++) {
-        table[s] <<= ZSTD_FREQ_DIV+bonus;
-        table[s]--;
-        sum += table[s];
-    }
-    return sum;
-}
 
-/* used in 2-pass strategy */
-MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr)
-{
-    if (ZSTD_compressedLiterals(optPtr))
-        optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0);
-    optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0);
-    optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0);
-    optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0);
-}
 
 /* ZSTD_initStats_ultra():
  * make a first compression pass, just to seed stats with more accurate starting values.
@@ -1276,7 +1366,7 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
     assert(ms->window.dictLimit == ms->window.lowLimit);   /* no dictionary */
     assert(ms->window.dictLimit - ms->nextToUpdate <= 1);  /* no prefix (note: intentional overflow, defined as 2-complement) */
 
-    ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);   /* generate stats into ms->opt*/
+    ZSTD_compressBlock_opt2(ms, seqStore, tmpRep, src, srcSize, ZSTD_noDict);   /* generate stats into ms->opt*/
 
     /* invalidate first scan from history */
     ZSTD_resetSeqStore(seqStore);
@@ -1285,8 +1375,6 @@ ZSTD_initStats_ultra(ZSTD_matchState_t* ms,
     ms->window.lowLimit = ms->window.dictLimit;
     ms->nextToUpdate = ms->window.dictLimit;
 
-    /* re-inforce weight of collected statistics */
-    ZSTD_upscaleStats(&ms->opt);
 }
 
 size_t ZSTD_compressBlock_btultra(
@@ -1294,7 +1382,7 @@ size_t ZSTD_compressBlock_btultra(
         const void* src, size_t srcSize)
 {
     DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize);
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
 
 size_t ZSTD_compressBlock_btultra2(
@@ -1322,35 +1410,35 @@ size_t ZSTD_compressBlock_btultra2(
         ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize);
     }
 
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict);
+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_noDict);
 }
 
 size_t ZSTD_compressBlock_btopt_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }
 
 size_t ZSTD_compressBlock_btultra_dictMatchState(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_dictMatchState);
+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_dictMatchState);
 }
 
 size_t ZSTD_compressBlock_btopt_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_extDict);
+    return ZSTD_compressBlock_opt0(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
 }
 
 size_t ZSTD_compressBlock_btultra_extDict(
         ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
         const void* src, size_t srcSize)
 {
-    return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict);
+    return ZSTD_compressBlock_opt2(ms, seqStore, rep, src, srcSize, ZSTD_extDict);
 }
 
 /* note : no btultra2 variant for extDict nor dictMatchState,
diff --git a/lib/zstd/decompress/huf_decompress.c b/lib/zstd/decompress/huf_decompress.c
index 5105e59ac04a8..89b269a641c7e 100644
--- a/lib/zstd/decompress/huf_decompress.c
+++ b/lib/zstd/decompress/huf_decompress.c
@@ -22,6 +22,13 @@
 #define HUF_STATIC_LINKING_ONLY
 #include "../common/huf.h"
 #include "../common/error_private.h"
+#include "../common/zstd_internal.h"
+
+/* **************************************************************
+*  Constants
+****************************************************************/
+
+#define HUF_DECODER_FAST_TABLELOG 11
 
 /* **************************************************************
 *  Macros
@@ -36,6 +43,26 @@
 #error "Cannot force the use of the X1 and X2 decoders at the same time!"
 #endif
 
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && DYNAMIC_BMI2
+# define HUF_ASM_X86_64_BMI2_ATTRS BMI2_TARGET_ATTRIBUTE
+#else
+# define HUF_ASM_X86_64_BMI2_ATTRS
+#endif
+
+#define HUF_EXTERN_C
+#define HUF_ASM_DECL HUF_EXTERN_C
+
+#if DYNAMIC_BMI2 || (ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
+# define HUF_NEED_BMI2_FUNCTION 1
+#else
+# define HUF_NEED_BMI2_FUNCTION 0
+#endif
+
+#if !(ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__))
+# define HUF_NEED_DEFAULT_FUNCTION 1
+#else
+# define HUF_NEED_DEFAULT_FUNCTION 0
+#endif
 
 /* **************************************************************
 *  Error Management
@@ -65,7 +92,7 @@
         return fn##_body(dst, dstSize, cSrc, cSrcSize, DTable);             \
     }                                                                       \
                                                                             \
-    static TARGET_ATTRIBUTE("bmi2") size_t fn##_bmi2(                       \
+    static BMI2_TARGET_ATTRIBUTE size_t fn##_bmi2(                          \
                   void* dst,  size_t dstSize,                               \
             const void* cSrc, size_t cSrcSize,                              \
             const HUF_DTable* DTable)                                       \
@@ -107,13 +134,147 @@ static DTableDesc HUF_getDTableDesc(const HUF_DTable* table)
     return dtd;
 }
 
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+static size_t HUF_initDStream(BYTE const* ip) {
+    BYTE const lastByte = ip[7];
+    size_t const bitsConsumed = lastByte ? 8 - BIT_highbit32(lastByte) : 0;
+    size_t const value = MEM_readLEST(ip) | 1;
+    assert(bitsConsumed <= 8);
+    return value << bitsConsumed;
+}
+typedef struct {
+    BYTE const* ip[4];
+    BYTE* op[4];
+    U64 bits[4];
+    void const* dt;
+    BYTE const* ilimit;
+    BYTE* oend;
+    BYTE const* iend[4];
+} HUF_DecompressAsmArgs;
+
+/*
+ * Initializes args for the asm decoding loop.
+ * @returns 0 on success
+ *          1 if the fallback implementation should be used.
+ *          Or an error code on failure.
+ */
+static size_t HUF_DecompressAsmArgs_init(HUF_DecompressAsmArgs* args, void* dst, size_t dstSize, void const* src, size_t srcSize, const HUF_DTable* DTable)
+{
+    void const* dt = DTable + 1;
+    U32 const dtLog = HUF_getDTableDesc(DTable).tableLog;
+
+    const BYTE* const ilimit = (const BYTE*)src + 6 + 8;
+
+    BYTE* const oend = (BYTE*)dst + dstSize;
+
+    /* The following condition is false on x32 platform,
+     * but HUF_asm is not compatible with this ABI */
+    if (!(MEM_isLittleEndian() && !MEM_32bits())) return 1;
+
+    /* strict minimum : jump table + 1 byte per stream */
+    if (srcSize < 10)
+        return ERROR(corruption_detected);
+
+    /* Must have at least 8 bytes per stream because we don't handle initializing smaller bit containers.
+     * If table log is not correct at this point, fallback to the old decoder.
+     * On small inputs we don't have enough data to trigger the fast loop, so use the old decoder.
+     */
+    if (dtLog != HUF_DECODER_FAST_TABLELOG)
+        return 1;
+
+    /* Read the jump table. */
+    {
+        const BYTE* const istart = (const BYTE*)src;
+        size_t const length1 = MEM_readLE16(istart);
+        size_t const length2 = MEM_readLE16(istart+2);
+        size_t const length3 = MEM_readLE16(istart+4);
+        size_t const length4 = srcSize - (length1 + length2 + length3 + 6);
+        args->iend[0] = istart + 6;  /* jumpTable */
+        args->iend[1] = args->iend[0] + length1;
+        args->iend[2] = args->iend[1] + length2;
+        args->iend[3] = args->iend[2] + length3;
+
+        /* HUF_initDStream() requires this, and this small of an input
+         * won't benefit from the ASM loop anyways.
+         * length1 must be >= 16 so that ip[0] >= ilimit before the loop
+         * starts.
+         */
+        if (length1 < 16 || length2 < 8 || length3 < 8 || length4 < 8)
+            return 1;
+        if (length4 > srcSize) return ERROR(corruption_detected);   /* overflow */
+    }
+    /* ip[] contains the position that is currently loaded into bits[]. */
+    args->ip[0] = args->iend[1] - sizeof(U64);
+    args->ip[1] = args->iend[2] - sizeof(U64);
+    args->ip[2] = args->iend[3] - sizeof(U64);
+    args->ip[3] = (BYTE const*)src + srcSize - sizeof(U64);
+
+    /* op[] contains the output pointers. */
+    args->op[0] = (BYTE*)dst;
+    args->op[1] = args->op[0] + (dstSize+3)/4;
+    args->op[2] = args->op[1] + (dstSize+3)/4;
+    args->op[3] = args->op[2] + (dstSize+3)/4;
+
+    /* No point to call the ASM loop for tiny outputs. */
+    if (args->op[3] >= oend)
+        return 1;
+
+    /* bits[] is the bit container.
+        * It is read from the MSB down to the LSB.
+        * It is shifted left as it is read, and zeros are
+        * shifted in. After the lowest valid bit a 1 is
+        * set, so that CountTrailingZeros(bits[]) can be used
+        * to count how many bits we've consumed.
+        */
+    args->bits[0] = HUF_initDStream(args->ip[0]);
+    args->bits[1] = HUF_initDStream(args->ip[1]);
+    args->bits[2] = HUF_initDStream(args->ip[2]);
+    args->bits[3] = HUF_initDStream(args->ip[3]);
+
+    /* If ip[] >= ilimit, it is guaranteed to be safe to
+        * reload bits[]. It may be beyond its section, but is
+        * guaranteed to be valid (>= istart).
+        */
+    args->ilimit = ilimit;
+
+    args->oend = oend;
+    args->dt = dt;
+
+    return 0;
+}
+
+static size_t HUF_initRemainingDStream(BIT_DStream_t* bit, HUF_DecompressAsmArgs const* args, int stream, BYTE* segmentEnd)
+{
+    /* Validate that we haven't overwritten. */
+    if (args->op[stream] > segmentEnd)
+        return ERROR(corruption_detected);
+    /* Validate that we haven't read beyond iend[].
+        * Note that ip[] may be < iend[] because the MSB is
+        * the next bit to read, and we may have consumed 100%
+        * of the stream, so down to iend[i] - 8 is valid.
+        */
+    if (args->ip[stream] < args->iend[stream] - 8)
+        return ERROR(corruption_detected);
+
+    /* Construct the BIT_DStream_t. */
+    bit->bitContainer = MEM_readLE64(args->ip[stream]);
+    bit->bitsConsumed = ZSTD_countTrailingZeros((size_t)args->bits[stream]);
+    bit->start = (const char*)args->iend[0];
+    bit->limitPtr = bit->start + sizeof(size_t);
+    bit->ptr = (const char*)args->ip[stream];
+
+    return 0;
+}
+#endif
+
 
 #ifndef HUF_FORCE_DECOMPRESS_X2
 
 /*-***************************/
 /*  single-symbol decoding   */
 /*-***************************/
-typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decoding */
+typedef struct { BYTE nbBits; BYTE byte; } HUF_DEltX1;   /* single-symbol decoding */
 
 /*
  * Packs 4 HUF_DEltX1 structs into a U64. This is used to lay down 4 entries at
@@ -122,14 +283,44 @@ typedef struct { BYTE byte; BYTE nbBits; } HUF_DEltX1;   /* single-symbol decodi
 static U64 HUF_DEltX1_set4(BYTE symbol, BYTE nbBits) {
     U64 D4;
     if (MEM_isLittleEndian()) {
-        D4 = symbol + (nbBits << 8);
-    } else {
         D4 = (symbol << 8) + nbBits;
+    } else {
+        D4 = symbol + (nbBits << 8);
     }
     D4 *= 0x0001000100010001ULL;
     return D4;
 }
 
+/*
+ * Increase the tableLog to targetTableLog and rescales the stats.
+ * If tableLog > targetTableLog this is a no-op.
+ * @returns New tableLog
+ */
+static U32 HUF_rescaleStats(BYTE* huffWeight, U32* rankVal, U32 nbSymbols, U32 tableLog, U32 targetTableLog)
+{
+    if (tableLog > targetTableLog)
+        return tableLog;
+    if (tableLog < targetTableLog) {
+        U32 const scale = targetTableLog - tableLog;
+        U32 s;
+        /* Increase the weight for all non-zero probability symbols by scale. */
+        for (s = 0; s < nbSymbols; ++s) {
+            huffWeight[s] += (BYTE)((huffWeight[s] == 0) ? 0 : scale);
+        }
+        /* Update rankVal to reflect the new weights.
+         * All weights except 0 get moved to weight + scale.
+         * Weights [1, scale] are empty.
+         */
+        for (s = targetTableLog; s > scale; --s) {
+            rankVal[s] = rankVal[s - scale];
+        }
+        for (s = scale; s > 0; --s) {
+            rankVal[s] = 0;
+        }
+    }
+    return targetTableLog;
+}
+
 typedef struct {
         U32 rankVal[HUF_TABLELOG_ABSOLUTEMAX + 1];
         U32 rankStart[HUF_TABLELOG_ABSOLUTEMAX + 1];
@@ -162,8 +353,12 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
     iSize = HUF_readStats_wksp(wksp->huffWeight, HUF_SYMBOLVALUE_MAX + 1, wksp->rankVal, &nbSymbols, &tableLog, src, srcSize, wksp->statsWksp, sizeof(wksp->statsWksp), bmi2);
     if (HUF_isError(iSize)) return iSize;
 
+
     /* Table header */
     {   DTableDesc dtd = HUF_getDTableDesc(DTable);
+        U32 const maxTableLog = dtd.maxTableLog + 1;
+        U32 const targetTableLog = MIN(maxTableLog, HUF_DECODER_FAST_TABLELOG);
+        tableLog = HUF_rescaleStats(wksp->huffWeight, wksp->rankVal, nbSymbols, tableLog, targetTableLog);
         if (tableLog > (U32)(dtd.maxTableLog+1)) return ERROR(tableLog_tooLarge);   /* DTable too small, Huffman tree cannot fit in */
         dtd.tableType = 0;
         dtd.tableLog = (BYTE)tableLog;
@@ -207,7 +402,7 @@ size_t HUF_readDTableX1_wksp_bmi2(HUF_DTable* DTable, const void* src, size_t sr
 
     /* fill DTable
      * We fill all entries of each weight in order.
-     * That way length is a constant for each iteration of the outter loop.
+     * That way length is a constant for each iteration of the outer loop.
      * We can switch based on the length to a different inner loop which is
      * optimized for that particular case.
      */
@@ -304,11 +499,15 @@ HUF_decodeStreamX1(BYTE* p, BIT_DStream_t* const bitDPtr, BYTE* const pEnd, cons
     BYTE* const pStart = p;
 
     /* up to 4 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
-        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+    if ((pEnd - p) > 3) {
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-3)) {
+            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_1(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_2(p, bitDPtr);
+            HUF_DECODE_SYMBOLX1_0(p, bitDPtr);
+        }
+    } else {
+        BIT_reloadDStream(bitDPtr);
     }
 
     /* [0-3] symbols remaining */
@@ -388,33 +587,36 @@ HUF_decompress4X1_usingDTable_internal_body(
         U32 endSignal = 1;
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
 
         /* up to 16 symbols per loop (4 symbols per stream) in 64-bit mode */
-        for ( ; (endSignal) & (op4 < olimit) ; ) {
-            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
-            HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
-            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+        if ((size_t)(oend - op4) >= sizeof(size_t)) {
+            for ( ; (endSignal) & (op4 < olimit) ; ) {
+                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX1_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX1_0(op2, &bitD2);
+                HUF_DECODE_SYMBOLX1_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX1_0(op4, &bitD4);
+                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+            }
         }
 
         /* check corruption */
@@ -440,6 +642,79 @@ HUF_decompress4X1_usingDTable_internal_body(
     }
 }
 
+#if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE
+size_t HUF_decompress4X1_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if HUF_NEED_DEFAULT_FUNCTION
+static
+size_t HUF_decompress4X1_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X1_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
+
+static HUF_ASM_X86_64_BMI2_ATTRS
+size_t
+HUF_decompress4X1_usingDTable_internal_bmi2_asm(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable)
+{
+    void const* dt = DTable + 1;
+    const BYTE* const iend = (const BYTE*)cSrc + 6;
+    BYTE* const oend = (BYTE*)dst + dstSize;
+    HUF_DecompressAsmArgs args;
+    {
+        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+        FORWARD_IF_ERROR(ret, "Failed to init asm args");
+        if (ret != 0)
+            return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+    }
+
+    assert(args.ip[0] >= args.ilimit);
+    HUF_decompress4X1_usingDTable_internal_bmi2_asm_loop(&args);
+
+    /* Our loop guarantees that ip[] >= ilimit and that we haven't
+    * overwritten any op[].
+    */
+    assert(args.ip[0] >= iend);
+    assert(args.ip[1] >= iend);
+    assert(args.ip[2] >= iend);
+    assert(args.ip[3] >= iend);
+    assert(args.op[3] <= oend);
+    (void)iend;
+
+    /* finish bit streams one by one. */
+    {
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* segmentEnd = (BYTE*)dst;
+        int i;
+        for (i = 0; i < 4; ++i) {
+            BIT_DStream_t bit;
+            if (segmentSize <= (size_t)(oend - segmentEnd))
+                segmentEnd += segmentSize;
+            else
+                segmentEnd = oend;
+            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+            /* Decompress and validate that we've produced exactly the expected length. */
+            args.op[i] += HUF_decodeStreamX1(args.op[i], &bit, segmentEnd, (HUF_DEltX1 const*)dt, HUF_DECODER_FAST_TABLELOG);
+            if (args.op[i] != segmentEnd) return ERROR(corruption_detected);
+        }
+    }
+
+    /* decoded size */
+    return dstSize;
+}
+#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
 
 typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
                                                const void *cSrc,
@@ -447,8 +722,28 @@ typedef size_t (*HUF_decompress_usingDTable_t)(void *dst, size_t dstSize,
                                                const HUF_DTable *DTable);
 
 HUF_DGEN(HUF_decompress1X1_usingDTable_internal)
-HUF_DGEN(HUF_decompress4X1_usingDTable_internal)
 
+static size_t HUF_decompress4X1_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+# if ZSTD_ENABLE_ASM_X86_64_BMI2
+        return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+# else
+        return HUF_decompress4X1_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+# endif
+    }
+#else
+    (void)bmi2;
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+    return HUF_decompress4X1_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+#else
+    return HUF_decompress4X1_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
+#endif
+}
 
 
 size_t HUF_decompress1X1_usingDTable(
@@ -518,106 +813,226 @@ size_t HUF_decompress4X1_DCtx_wksp(HUF_DTable* dctx, void* dst, size_t dstSize,
 /* *************************/
 
 typedef struct { U16 sequence; BYTE nbBits; BYTE length; } HUF_DEltX2;  /* double-symbols decoding */
-typedef struct { BYTE symbol; BYTE weight; } sortedSymbol_t;
+typedef struct { BYTE symbol; } sortedSymbol_t;
 typedef U32 rankValCol_t[HUF_TABLELOG_MAX + 1];
 typedef rankValCol_t rankVal_t[HUF_TABLELOG_MAX];
 
+/*
+ * Constructs a HUF_DEltX2 in a U32.
+ */
+static U32 HUF_buildDEltX2U32(U32 symbol, U32 nbBits, U32 baseSeq, int level)
+{
+    U32 seq;
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, sequence) == 0);
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, nbBits) == 2);
+    DEBUG_STATIC_ASSERT(offsetof(HUF_DEltX2, length) == 3);
+    DEBUG_STATIC_ASSERT(sizeof(HUF_DEltX2) == sizeof(U32));
+    if (MEM_isLittleEndian()) {
+        seq = level == 1 ? symbol : (baseSeq + (symbol << 8));
+        return seq + (nbBits << 16) + ((U32)level << 24);
+    } else {
+        seq = level == 1 ? (symbol << 8) : ((baseSeq << 8) + symbol);
+        return (seq << 16) + (nbBits << 8) + (U32)level;
+    }
+}
 
-/* HUF_fillDTableX2Level2() :
- * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
-static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 sizeLog, const U32 consumed,
-                           const U32* rankValOrigin, const int minWeight,
-                           const sortedSymbol_t* sortedSymbols, const U32 sortedListSize,
-                           U32 nbBitsBaseline, U16 baseSeq, U32* wksp, size_t wkspSize)
+/*
+ * Constructs a HUF_DEltX2.
+ */
+static HUF_DEltX2 HUF_buildDEltX2(U32 symbol, U32 nbBits, U32 baseSeq, int level)
 {
     HUF_DEltX2 DElt;
-    U32* rankVal = wksp;
+    U32 const val = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+    DEBUG_STATIC_ASSERT(sizeof(DElt) == sizeof(val));
+    ZSTD_memcpy(&DElt, &val, sizeof(val));
+    return DElt;
+}
+
+/*
+ * Constructs 2 HUF_DEltX2s and packs them into a U64.
+ */
+static U64 HUF_buildDEltX2U64(U32 symbol, U32 nbBits, U16 baseSeq, int level)
+{
+    U32 DElt = HUF_buildDEltX2U32(symbol, nbBits, baseSeq, level);
+    return (U64)DElt + ((U64)DElt << 32);
+}
 
-    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
-    (void)wkspSize;
-    /* get pre-calculated rankVal */
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
+/*
+ * Fills the DTable rank with all the symbols from [begin, end) that are each
+ * nbBits long.
+ *
+ * @param DTableRank The start of the rank in the DTable.
+ * @param begin The first symbol to fill (inclusive).
+ * @param end The last symbol to fill (exclusive).
+ * @param nbBits Each symbol is nbBits long.
+ * @param tableLog The table log.
+ * @param baseSeq If level == 1 { 0 } else { the first level symbol }
+ * @param level The level in the table. Must be 1 or 2.
+ */
+static void HUF_fillDTableX2ForWeight(
+    HUF_DEltX2* DTableRank,
+    sortedSymbol_t const* begin, sortedSymbol_t const* end,
+    U32 nbBits, U32 tableLog,
+    U16 baseSeq, int const level)
+{
+    U32 const length = 1U << ((tableLog - nbBits) & 0x1F /* quiet static-analyzer */);
+    const sortedSymbol_t* ptr;
+    assert(level >= 1 && level <= 2);
+    switch (length) {
+    case 1:
+        for (ptr = begin; ptr != end; ++ptr) {
+            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+            *DTableRank++ = DElt;
+        }
+        break;
+    case 2:
+        for (ptr = begin; ptr != end; ++ptr) {
+            HUF_DEltX2 const DElt = HUF_buildDEltX2(ptr->symbol, nbBits, baseSeq, level);
+            DTableRank[0] = DElt;
+            DTableRank[1] = DElt;
+            DTableRank += 2;
+        }
+        break;
+    case 4:
+        for (ptr = begin; ptr != end; ++ptr) {
+            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+            DTableRank += 4;
+        }
+        break;
+    case 8:
+        for (ptr = begin; ptr != end; ++ptr) {
+            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+            DTableRank += 8;
+        }
+        break;
+    default:
+        for (ptr = begin; ptr != end; ++ptr) {
+            U64 const DEltX2 = HUF_buildDEltX2U64(ptr->symbol, nbBits, baseSeq, level);
+            HUF_DEltX2* const DTableRankEnd = DTableRank + length;
+            for (; DTableRank != DTableRankEnd; DTableRank += 8) {
+                ZSTD_memcpy(DTableRank + 0, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 2, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 4, &DEltX2, sizeof(DEltX2));
+                ZSTD_memcpy(DTableRank + 6, &DEltX2, sizeof(DEltX2));
+            }
+        }
+        break;
+    }
+}
 
-    /* fill skipped values */
+/* HUF_fillDTableX2Level2() :
+ * `rankValOrigin` must be a table of at least (HUF_TABLELOG_MAX + 1) U32 */
+static void HUF_fillDTableX2Level2(HUF_DEltX2* DTable, U32 targetLog, const U32 consumedBits,
+                           const U32* rankVal, const int minWeight, const int maxWeight1,
+                           const sortedSymbol_t* sortedSymbols, U32 const* rankStart,
+                           U32 nbBitsBaseline, U16 baseSeq)
+{
+    /* Fill skipped values (all positions up to rankVal[minWeight]).
+     * These are positions only get a single symbol because the combined weight
+     * is too large.
+     */
     if (minWeight>1) {
-        U32 i, skipSize = rankVal[minWeight];
-        MEM_writeLE16(&(DElt.sequence), baseSeq);
-        DElt.nbBits   = (BYTE)(consumed);
-        DElt.length   = 1;
-        for (i = 0; i < skipSize; i++)
-            DTable[i] = DElt;
+        U32 const length = 1U << ((targetLog - consumedBits) & 0x1F /* quiet static-analyzer */);
+        U64 const DEltX2 = HUF_buildDEltX2U64(baseSeq, consumedBits, /* baseSeq */ 0, /* level */ 1);
+        int const skipSize = rankVal[minWeight];
+        assert(length > 1);
+        assert((U32)skipSize < length);
+        switch (length) {
+        case 2:
+            assert(skipSize == 1);
+            ZSTD_memcpy(DTable, &DEltX2, sizeof(DEltX2));
+            break;
+        case 4:
+            assert(skipSize <= 4);
+            ZSTD_memcpy(DTable + 0, &DEltX2, sizeof(DEltX2));
+            ZSTD_memcpy(DTable + 2, &DEltX2, sizeof(DEltX2));
+            break;
+        default:
+            {
+                int i;
+                for (i = 0; i < skipSize; i += 8) {
+                    ZSTD_memcpy(DTable + i + 0, &DEltX2, sizeof(DEltX2));
+                    ZSTD_memcpy(DTable + i + 2, &DEltX2, sizeof(DEltX2));
+                    ZSTD_memcpy(DTable + i + 4, &DEltX2, sizeof(DEltX2));
+                    ZSTD_memcpy(DTable + i + 6, &DEltX2, sizeof(DEltX2));
+                }
+            }
+        }
     }
 
-    /* fill DTable */
-    {   U32 s; for (s=0; s<sortedListSize; s++) {   /* note : sortedSymbols already skipped */
-            const U32 symbol = sortedSymbols[s].symbol;
-            const U32 weight = sortedSymbols[s].weight;
-            const U32 nbBits = nbBitsBaseline - weight;
-            const U32 length = 1 << (sizeLog-nbBits);
-            const U32 start = rankVal[weight];
-            U32 i = start;
-            const U32 end = start + length;
-
-            MEM_writeLE16(&(DElt.sequence), (U16)(baseSeq + (symbol << 8)));
-            DElt.nbBits = (BYTE)(nbBits + consumed);
-            DElt.length = 2;
-            do { DTable[i++] = DElt; } while (i<end);   /* since length >= 1 */
-
-            rankVal[weight] += length;
-    }   }
+    /* Fill each of the second level symbols by weight. */
+    {
+        int w;
+        for (w = minWeight; w < maxWeight1; ++w) {
+            int const begin = rankStart[w];
+            int const end = rankStart[w+1];
+            U32 const nbBits = nbBitsBaseline - w;
+            U32 const totalBits = nbBits + consumedBits;
+            HUF_fillDTableX2ForWeight(
+                DTable + rankVal[w],
+                sortedSymbols + begin, sortedSymbols + end,
+                totalBits, targetLog,
+                baseSeq, /* level */ 2);
+        }
+    }
 }
 
-
 static void HUF_fillDTableX2(HUF_DEltX2* DTable, const U32 targetLog,
-                           const sortedSymbol_t* sortedList, const U32 sortedListSize,
+                           const sortedSymbol_t* sortedList,
                            const U32* rankStart, rankVal_t rankValOrigin, const U32 maxWeight,
-                           const U32 nbBitsBaseline, U32* wksp, size_t wkspSize)
+                           const U32 nbBitsBaseline)
 {
-    U32* rankVal = wksp;
+    U32* const rankVal = rankValOrigin[0];
     const int scaleLog = nbBitsBaseline - targetLog;   /* note : targetLog >= srcLog, hence scaleLog <= 1 */
     const U32 minBits  = nbBitsBaseline - maxWeight;
-    U32 s;
-
-    assert(wkspSize >= HUF_TABLELOG_MAX + 1);
-    wksp += HUF_TABLELOG_MAX + 1;
-    wkspSize -= HUF_TABLELOG_MAX + 1;
-
-    ZSTD_memcpy(rankVal, rankValOrigin, sizeof(U32) * (HUF_TABLELOG_MAX + 1));
-
-    /* fill DTable */
-    for (s=0; s<sortedListSize; s++) {
-        const U16 symbol = sortedList[s].symbol;
-        const U32 weight = sortedList[s].weight;
-        const U32 nbBits = nbBitsBaseline - weight;
-        const U32 start = rankVal[weight];
-        const U32 length = 1 << (targetLog-nbBits);
-
-        if (targetLog-nbBits >= minBits) {   /* enough room for a second symbol */
-            U32 sortedRank;
+    int w;
+    int const wEnd = (int)maxWeight + 1;
+
+    /* Fill DTable in order of weight. */
+    for (w = 1; w < wEnd; ++w) {
+        int const begin = (int)rankStart[w];
+        int const end = (int)rankStart[w+1];
+        U32 const nbBits = nbBitsBaseline - w;
+
+        if (targetLog-nbBits >= minBits) {
+            /* Enough room for a second symbol. */
+            int start = rankVal[w];
+            U32 const length = 1U << ((targetLog - nbBits) & 0x1F /* quiet static-analyzer */);
             int minWeight = nbBits + scaleLog;
+            int s;
             if (minWeight < 1) minWeight = 1;
-            sortedRank = rankStart[minWeight];
-            HUF_fillDTableX2Level2(DTable+start, targetLog-nbBits, nbBits,
-                           rankValOrigin[nbBits], minWeight,
-                           sortedList+sortedRank, sortedListSize-sortedRank,
-                           nbBitsBaseline, symbol, wksp, wkspSize);
+            /* Fill the DTable for every symbol of weight w.
+             * These symbols get at least 1 second symbol.
+             */
+            for (s = begin; s != end; ++s) {
+                HUF_fillDTableX2Level2(
+                    DTable + start, targetLog, nbBits,
+                    rankValOrigin[nbBits], minWeight, wEnd,
+                    sortedList, rankStart,
+                    nbBitsBaseline, sortedList[s].symbol);
+                start += length;
+            }
         } else {
-            HUF_DEltX2 DElt;
-            MEM_writeLE16(&(DElt.sequence), symbol);
-            DElt.nbBits = (BYTE)(nbBits);
-            DElt.length = 1;
-            {   U32 const end = start + length;
-                U32 u;
-                for (u = start; u < end; u++) DTable[u] = DElt;
-        }   }
-        rankVal[weight] += length;
+            /* Only a single symbol. */
+            HUF_fillDTableX2ForWeight(
+                DTable + rankVal[w],
+                sortedList + begin, sortedList + end,
+                nbBits, targetLog,
+                /* baseSeq */ 0, /* level */ 1);
+        }
     }
 }
 
 typedef struct {
     rankValCol_t rankVal[HUF_TABLELOG_MAX];
     U32 rankStats[HUF_TABLELOG_MAX + 1];
-    U32 rankStart0[HUF_TABLELOG_MAX + 2];
+    U32 rankStart0[HUF_TABLELOG_MAX + 3];
     sortedSymbol_t sortedSymbol[HUF_SYMBOLVALUE_MAX + 1];
     BYTE weightList[HUF_SYMBOLVALUE_MAX + 1];
     U32 calleeWksp[HUF_READ_STATS_WORKSPACE_SIZE_U32];
@@ -627,9 +1042,16 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
                        const void* src, size_t srcSize,
                              void* workSpace, size_t wkspSize)
 {
-    U32 tableLog, maxW, sizeOfSort, nbSymbols;
+    return HUF_readDTableX2_wksp_bmi2(DTable, src, srcSize, workSpace, wkspSize, /* bmi2 */ 0);
+}
+
+size_t HUF_readDTableX2_wksp_bmi2(HUF_DTable* DTable,
+                       const void* src, size_t srcSize,
+                             void* workSpace, size_t wkspSize, int bmi2)
+{
+    U32 tableLog, maxW, nbSymbols;
     DTableDesc dtd = HUF_getDTableDesc(DTable);
-    U32 const maxTableLog = dtd.maxTableLog;
+    U32 maxTableLog = dtd.maxTableLog;
     size_t iSize;
     void* dtPtr = DTable+1;   /* force compiler to avoid strict-aliasing */
     HUF_DEltX2* const dt = (HUF_DEltX2*)dtPtr;
@@ -647,11 +1069,12 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
     if (maxTableLog > HUF_TABLELOG_MAX) return ERROR(tableLog_tooLarge);
     /* ZSTD_memset(weightList, 0, sizeof(weightList)); */  /* is not necessary, even though some analyzer complain ... */
 
-    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), /* bmi2 */ 0);
+    iSize = HUF_readStats_wksp(wksp->weightList, HUF_SYMBOLVALUE_MAX + 1, wksp->rankStats, &nbSymbols, &tableLog, src, srcSize, wksp->calleeWksp, sizeof(wksp->calleeWksp), bmi2);
     if (HUF_isError(iSize)) return iSize;
 
     /* check result */
     if (tableLog > maxTableLog) return ERROR(tableLog_tooLarge);   /* DTable can't fit code depth */
+    if (tableLog <= HUF_DECODER_FAST_TABLELOG && maxTableLog > HUF_DECODER_FAST_TABLELOG) maxTableLog = HUF_DECODER_FAST_TABLELOG;
 
     /* find maxWeight */
     for (maxW = tableLog; wksp->rankStats[maxW]==0; maxW--) {}  /* necessarily finds a solution before 0 */
@@ -664,7 +1087,7 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
             rankStart[w] = curr;
         }
         rankStart[0] = nextRankStart;   /* put all 0w symbols at the end of sorted list*/
-        sizeOfSort = nextRankStart;
+        rankStart[maxW+1] = nextRankStart;
     }
 
     /* sort symbols by weight */
@@ -673,7 +1096,6 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
             U32 const w = wksp->weightList[s];
             U32 const r = rankStart[w]++;
             wksp->sortedSymbol[r].symbol = (BYTE)s;
-            wksp->sortedSymbol[r].weight = (BYTE)w;
         }
         rankStart[0] = 0;   /* forget 0w symbols; this is beginning of weight(1) */
     }
@@ -698,10 +1120,9 @@ size_t HUF_readDTableX2_wksp(HUF_DTable* DTable,
     }   }   }   }
 
     HUF_fillDTableX2(dt, maxTableLog,
-                   wksp->sortedSymbol, sizeOfSort,
+                   wksp->sortedSymbol,
                    wksp->rankStart0, wksp->rankVal, maxW,
-                   tableLog+1,
-                   wksp->calleeWksp, sizeof(wksp->calleeWksp) / sizeof(U32));
+                   tableLog+1);
 
     dtd.tableLog = (BYTE)maxTableLog;
     dtd.tableType = 1;
@@ -714,7 +1135,7 @@ FORCE_INLINE_TEMPLATE U32
 HUF_decodeSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
 {
     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    ZSTD_memcpy(op, dt+val, 2);
+    ZSTD_memcpy(op, &dt[val].sequence, 2);
     BIT_skipBits(DStream, dt[val].nbBits);
     return dt[val].length;
 }
@@ -723,15 +1144,17 @@ FORCE_INLINE_TEMPLATE U32
 HUF_decodeLastSymbolX2(void* op, BIT_DStream_t* DStream, const HUF_DEltX2* dt, const U32 dtLog)
 {
     size_t const val = BIT_lookBitsFast(DStream, dtLog);   /* note : dtLog >= 1 */
-    ZSTD_memcpy(op, dt+val, 1);
-    if (dt[val].length==1) BIT_skipBits(DStream, dt[val].nbBits);
-    else {
+    ZSTD_memcpy(op, &dt[val].sequence, 1);
+    if (dt[val].length==1) {
+        BIT_skipBits(DStream, dt[val].nbBits);
+    } else {
         if (DStream->bitsConsumed < (sizeof(DStream->bitContainer)*8)) {
             BIT_skipBits(DStream, dt[val].nbBits);
             if (DStream->bitsConsumed > (sizeof(DStream->bitContainer)*8))
                 /* ugly hack; works only because it's the last symbol. Note : can't easily extract nbBits from just this symbol */
                 DStream->bitsConsumed = (sizeof(DStream->bitContainer)*8);
-    }   }
+        }
+    }
     return 1;
 }
 
@@ -753,19 +1176,37 @@ HUF_decodeStreamX2(BYTE* p, BIT_DStream_t* bitDPtr, BYTE* const pEnd,
     BYTE* const pStart = p;
 
     /* up to 8 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    if ((size_t)(pEnd - p) >= sizeof(bitDPtr->bitContainer)) {
+        if (dtLog <= 11 && MEM_64bits()) {
+            /* up to 10 symbols at a time */
+            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-9)) {
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+            }
+        } else {
+            /* up to 8 symbols at a time */
+            while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p < pEnd-(sizeof(bitDPtr->bitContainer)-1))) {
+                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_1(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_2(p, bitDPtr);
+                HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+            }
+        }
+    } else {
+        BIT_reloadDStream(bitDPtr);
     }
 
     /* closer to end : up to 2 symbols at a time */
-    while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
+    if ((size_t)(pEnd - p) >= 2) {
+        while ((BIT_reloadDStream(bitDPtr) == BIT_DStream_unfinished) & (p <= pEnd-2))
+            HUF_DECODE_SYMBOLX2_0(p, bitDPtr);
 
-    while (p <= pEnd-2)
-        HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+        while (p <= pEnd-2)
+            HUF_DECODE_SYMBOLX2_0(p, bitDPtr);   /* no need to reload : reached the end of DStream */
+    }
 
     if (p < pEnd)
         p += HUF_decodeLastSymbolX2(p, bitDPtr, dt, dtLog);
@@ -799,7 +1240,6 @@ HUF_decompress1X2_usingDTable_internal_body(
     /* decoded size */
     return dstSize;
 }
-
 FORCE_INLINE_TEMPLATE size_t
 HUF_decompress4X2_usingDTable_internal_body(
           void* dst,  size_t dstSize,
@@ -841,57 +1281,60 @@ HUF_decompress4X2_usingDTable_internal_body(
         U32 const dtLog = dtd.tableLog;
 
         if (length4 > cSrcSize) return ERROR(corruption_detected);   /* overflow */
+        if (opStart4 > oend) return ERROR(corruption_detected);      /* overflow */
         CHECK_F( BIT_initDStream(&bitD1, istart1, length1) );
         CHECK_F( BIT_initDStream(&bitD2, istart2, length2) );
         CHECK_F( BIT_initDStream(&bitD3, istart3, length3) );
         CHECK_F( BIT_initDStream(&bitD4, istart4, length4) );
 
         /* 16-32 symbols per loop (4-8 symbols per stream) */
-        for ( ; (endSignal) & (op4 < olimit); ) {
+        if ((size_t)(oend - op4) >= sizeof(size_t)) {
+            for ( ; (endSignal) & (op4 < olimit); ) {
 #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-            endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-            endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
-            endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+                endSignal &= BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished;
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+                endSignal &= BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished;
+                endSignal &= BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished;
 #else
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
-            HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
-            HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
-            HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
-            HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
-            endSignal = (U32)LIKELY((U32)
-                        (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
-                      & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_1(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_1(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_1(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_1(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_2(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_2(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_2(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_2(op4, &bitD4);
+                HUF_DECODE_SYMBOLX2_0(op1, &bitD1);
+                HUF_DECODE_SYMBOLX2_0(op2, &bitD2);
+                HUF_DECODE_SYMBOLX2_0(op3, &bitD3);
+                HUF_DECODE_SYMBOLX2_0(op4, &bitD4);
+                endSignal = (U32)LIKELY((U32)
+                            (BIT_reloadDStreamFast(&bitD1) == BIT_DStream_unfinished)
+                        & (BIT_reloadDStreamFast(&bitD2) == BIT_DStream_unfinished)
+                        & (BIT_reloadDStreamFast(&bitD3) == BIT_DStream_unfinished)
+                        & (BIT_reloadDStreamFast(&bitD4) == BIT_DStream_unfinished));
 #endif
+            }
         }
 
         /* check corruption */
@@ -915,8 +1358,99 @@ HUF_decompress4X2_usingDTable_internal_body(
     }
 }
 
+#if HUF_NEED_BMI2_FUNCTION
+static BMI2_TARGET_ATTRIBUTE
+size_t HUF_decompress4X2_usingDTable_internal_bmi2(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if HUF_NEED_DEFAULT_FUNCTION
+static
+size_t HUF_decompress4X2_usingDTable_internal_default(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable) {
+    return HUF_decompress4X2_usingDTable_internal_body(dst, dstSize, cSrc, cSrcSize, DTable);
+}
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2
+
+HUF_ASM_DECL void HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(HUF_DecompressAsmArgs* args) ZSTDLIB_HIDDEN;
+
+static HUF_ASM_X86_64_BMI2_ATTRS size_t
+HUF_decompress4X2_usingDTable_internal_bmi2_asm(
+          void* dst,  size_t dstSize,
+    const void* cSrc, size_t cSrcSize,
+    const HUF_DTable* DTable) {
+    void const* dt = DTable + 1;
+    const BYTE* const iend = (const BYTE*)cSrc + 6;
+    BYTE* const oend = (BYTE*)dst + dstSize;
+    HUF_DecompressAsmArgs args;
+    {
+        size_t const ret = HUF_DecompressAsmArgs_init(&args, dst, dstSize, cSrc, cSrcSize, DTable);
+        FORWARD_IF_ERROR(ret, "Failed to init asm args");
+        if (ret != 0)
+            return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+    }
+
+    assert(args.ip[0] >= args.ilimit);
+    HUF_decompress4X2_usingDTable_internal_bmi2_asm_loop(&args);
+
+    /* note : op4 already verified within main loop */
+    assert(args.ip[0] >= iend);
+    assert(args.ip[1] >= iend);
+    assert(args.ip[2] >= iend);
+    assert(args.ip[3] >= iend);
+    assert(args.op[3] <= oend);
+    (void)iend;
+
+    /* finish bitStreams one by one */
+    {
+        size_t const segmentSize = (dstSize+3) / 4;
+        BYTE* segmentEnd = (BYTE*)dst;
+        int i;
+        for (i = 0; i < 4; ++i) {
+            BIT_DStream_t bit;
+            if (segmentSize <= (size_t)(oend - segmentEnd))
+                segmentEnd += segmentSize;
+            else
+                segmentEnd = oend;
+            FORWARD_IF_ERROR(HUF_initRemainingDStream(&bit, &args, i, segmentEnd), "corruption");
+            args.op[i] += HUF_decodeStreamX2(args.op[i], &bit, segmentEnd, (HUF_DEltX2 const*)dt, HUF_DECODER_FAST_TABLELOG);
+            if (args.op[i] != segmentEnd)
+                return ERROR(corruption_detected);
+        }
+    }
+
+    /* decoded size */
+    return dstSize;
+}
+#endif /* ZSTD_ENABLE_ASM_X86_64_BMI2 */
+
+static size_t HUF_decompress4X2_usingDTable_internal(void* dst, size_t dstSize, void const* cSrc,
+                    size_t cSrcSize, HUF_DTable const* DTable, int bmi2)
+{
+#if DYNAMIC_BMI2
+    if (bmi2) {
+# if ZSTD_ENABLE_ASM_X86_64_BMI2
+        return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+# else
+        return HUF_decompress4X2_usingDTable_internal_bmi2(dst, dstSize, cSrc, cSrcSize, DTable);
+# endif
+    }
+#else
+    (void)bmi2;
+#endif
+
+#if ZSTD_ENABLE_ASM_X86_64_BMI2 && defined(__BMI2__)
+    return HUF_decompress4X2_usingDTable_internal_bmi2_asm(dst, dstSize, cSrc, cSrcSize, DTable);
+#else
+    return HUF_decompress4X2_usingDTable_internal_default(dst, dstSize, cSrc, cSrcSize, DTable);
+#endif
+}
+
 HUF_DGEN(HUF_decompress1X2_usingDTable_internal)
-HUF_DGEN(HUF_decompress4X2_usingDTable_internal)
 
 size_t HUF_decompress1X2_usingDTable(
           void* dst,  size_t dstSize,
@@ -1025,25 +1559,25 @@ size_t HUF_decompress4X_usingDTable(void* dst, size_t maxDstSize,
 
 #if !defined(HUF_FORCE_DECOMPRESS_X1) && !defined(HUF_FORCE_DECOMPRESS_X2)
 typedef struct { U32 tableTime; U32 decode256Time; } algo_time_t;
-static const algo_time_t algoTime[16 /* Quantization */][3 /* single, double, quad */] =
+static const algo_time_t algoTime[16 /* Quantization */][2 /* single, double */] =
 {
     /* single, double, quad */
-    {{0,0}, {1,1}, {2,2}},  /* Q==0 : impossible */
-    {{0,0}, {1,1}, {2,2}},  /* Q==1 : impossible */
-    {{  38,130}, {1313, 74}, {2151, 38}},   /* Q == 2 : 12-18% */
-    {{ 448,128}, {1353, 74}, {2238, 41}},   /* Q == 3 : 18-25% */
-    {{ 556,128}, {1353, 74}, {2238, 47}},   /* Q == 4 : 25-32% */
-    {{ 714,128}, {1418, 74}, {2436, 53}},   /* Q == 5 : 32-38% */
-    {{ 883,128}, {1437, 74}, {2464, 61}},   /* Q == 6 : 38-44% */
-    {{ 897,128}, {1515, 75}, {2622, 68}},   /* Q == 7 : 44-50% */
-    {{ 926,128}, {1613, 75}, {2730, 75}},   /* Q == 8 : 50-56% */
-    {{ 947,128}, {1729, 77}, {3359, 77}},   /* Q == 9 : 56-62% */
-    {{1107,128}, {2083, 81}, {4006, 84}},   /* Q ==10 : 62-69% */
-    {{1177,128}, {2379, 87}, {4785, 88}},   /* Q ==11 : 69-75% */
-    {{1242,128}, {2415, 93}, {5155, 84}},   /* Q ==12 : 75-81% */
-    {{1349,128}, {2644,106}, {5260,106}},   /* Q ==13 : 81-87% */
-    {{1455,128}, {2422,124}, {4174,124}},   /* Q ==14 : 87-93% */
-    {{ 722,128}, {1891,145}, {1936,146}},   /* Q ==15 : 93-99% */
+    {{0,0}, {1,1}},  /* Q==0 : impossible */
+    {{0,0}, {1,1}},  /* Q==1 : impossible */
+    {{ 150,216}, { 381,119}},   /* Q == 2 : 12-18% */
+    {{ 170,205}, { 514,112}},   /* Q == 3 : 18-25% */
+    {{ 177,199}, { 539,110}},   /* Q == 4 : 25-32% */
+    {{ 197,194}, { 644,107}},   /* Q == 5 : 32-38% */
+    {{ 221,192}, { 735,107}},   /* Q == 6 : 38-44% */
+    {{ 256,189}, { 881,106}},   /* Q == 7 : 44-50% */
+    {{ 359,188}, {1167,109}},   /* Q == 8 : 50-56% */
+    {{ 582,187}, {1570,114}},   /* Q == 9 : 56-62% */
+    {{ 688,187}, {1712,122}},   /* Q ==10 : 62-69% */
+    {{ 825,186}, {1965,136}},   /* Q ==11 : 69-75% */
+    {{ 976,185}, {2131,150}},   /* Q ==12 : 75-81% */
+    {{1180,186}, {2070,175}},   /* Q ==13 : 81-87% */
+    {{1377,185}, {1731,202}},   /* Q ==14 : 87-93% */
+    {{1412,185}, {1695,202}},   /* Q ==15 : 93-99% */
 };
 #endif
 
@@ -1070,7 +1604,7 @@ U32 HUF_selectDecoder (size_t dstSize, size_t cSrcSize)
         U32 const D256 = (U32)(dstSize >> 8);
         U32 const DTime0 = algoTime[Q][0].tableTime + (algoTime[Q][0].decode256Time * D256);
         U32 DTime1 = algoTime[Q][1].tableTime + (algoTime[Q][1].decode256Time * D256);
-        DTime1 += DTime1 >> 3;  /* advantage to algorithm using less memory, to reduce cache eviction */
+        DTime1 += DTime1 >> 5;  /* small advantage to algorithm using less memory, to reduce cache eviction */
         return DTime1 < DTime0;
     }
 #endif
diff --git a/lib/zstd/decompress/zstd_decompress.c b/lib/zstd/decompress/zstd_decompress.c
index 6928e85f9d193..b9b935a9f5c0d 100644
--- a/lib/zstd/decompress/zstd_decompress.c
+++ b/lib/zstd/decompress/zstd_decompress.c
@@ -53,7 +53,6 @@
 *  Dependencies
 *********************************************************/
 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
-#include "../common/cpu.h"         /* bmi2 */
 #include "../common/mem.h"         /* low level memory routines */
 #define FSE_STATIC_LINKING_ONLY
 #include "../common/fse.h"
@@ -252,11 +251,11 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
     dctx->inBuffSize  = 0;
     dctx->outBuffSize = 0;
     dctx->streamStage = zdss_init;
-    dctx->legacyContext = NULL;
-    dctx->previousLegacyVersion = 0;
     dctx->noForwardProgress = 0;
     dctx->oversizedDuration = 0;
-    dctx->bmi2 = ZSTD_cpuid_bmi2(ZSTD_cpuid());
+#if DYNAMIC_BMI2
+    dctx->bmi2 = ZSTD_cpuSupportsBmi2();
+#endif
     dctx->ddictSet = NULL;
     ZSTD_DCtx_resetParameters(dctx);
 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
@@ -277,8 +276,7 @@ ZSTD_DCtx* ZSTD_initStaticDCtx(void *workspace, size_t workspaceSize)
     return dctx;
 }
 
-ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
-{
+static ZSTD_DCtx* ZSTD_createDCtx_internal(ZSTD_customMem customMem) {
     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
 
     {   ZSTD_DCtx* const dctx = (ZSTD_DCtx*)ZSTD_customMalloc(sizeof(*dctx), customMem);
@@ -289,10 +287,15 @@ ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
     }
 }
 
+ZSTD_DCtx* ZSTD_createDCtx_advanced(ZSTD_customMem customMem)
+{
+    return ZSTD_createDCtx_internal(customMem);
+}
+
 ZSTD_DCtx* ZSTD_createDCtx(void)
 {
     DEBUGLOG(3, "ZSTD_createDCtx");
-    return ZSTD_createDCtx_advanced(ZSTD_defaultCMem);
+    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
 }
 
 static void ZSTD_clearDict(ZSTD_DCtx* dctx)
@@ -370,6 +373,19 @@ unsigned ZSTD_isFrame(const void* buffer, size_t size)
     return 0;
 }
 
+/*! ZSTD_isSkippableFrame() :
+ *  Tells if the content of `buffer` starts with a valid Frame Identifier for a skippable frame.
+ *  Note : Frame Identifier is 4 bytes. If `size < 4`, @return will always be 0.
+ */
+unsigned ZSTD_isSkippableFrame(const void* buffer, size_t size)
+{
+    if (size < ZSTD_FRAMEIDSIZE) return 0;
+    {   U32 const magic = MEM_readLE32(buffer);
+        if ((magic & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) return 1;
+    }
+    return 0;
+}
+
 /* ZSTD_frameHeaderSize_internal() :
  *  srcSize must be large enough to reach header size fields.
  *  note : only works for formats ZSTD_f_zstd1 and ZSTD_f_zstd1_magicless.
@@ -497,7 +513,6 @@ size_t ZSTD_getFrameHeader(ZSTD_frameHeader* zfhPtr, const void* src, size_t src
     return ZSTD_getFrameHeader_advanced(zfhPtr, src, srcSize, ZSTD_f_zstd1);
 }
 
-
 /* ZSTD_getFrameContentSize() :
  *  compatible with legacy mode
  * @return : decompressed size of the single frame pointed to be `src` if known, otherwise
@@ -532,6 +547,37 @@ static size_t readSkippableFrameSize(void const* src, size_t srcSize)
     }
 }
 
+/*! ZSTD_readSkippableFrame() :
+ * Retrieves a zstd skippable frame containing data given by src, and writes it to dst buffer.
+ *
+ * The parameter magicVariant will receive the magicVariant that was supplied when the frame was written,
+ * i.e. magicNumber - ZSTD_MAGIC_SKIPPABLE_START.  This can be NULL if the caller is not interested
+ * in the magicVariant.
+ *
+ * Returns an error if destination buffer is not large enough, or if the frame is not skippable.
+ *
+ * @return : number of bytes written or a ZSTD error.
+ */
+ZSTDLIB_API size_t ZSTD_readSkippableFrame(void* dst, size_t dstCapacity, unsigned* magicVariant,
+                                            const void* src, size_t srcSize)
+{
+    U32 const magicNumber = MEM_readLE32(src);
+    size_t skippableFrameSize = readSkippableFrameSize(src, srcSize);
+    size_t skippableContentSize = skippableFrameSize - ZSTD_SKIPPABLEHEADERSIZE;
+
+    /* check input validity */
+    RETURN_ERROR_IF(!ZSTD_isSkippableFrame(src, srcSize), frameParameter_unsupported, "");
+    RETURN_ERROR_IF(skippableFrameSize < ZSTD_SKIPPABLEHEADERSIZE || skippableFrameSize > srcSize, srcSize_wrong, "");
+    RETURN_ERROR_IF(skippableContentSize > dstCapacity, dstSize_tooSmall, "");
+
+    /* deliver payload */
+    if (skippableContentSize > 0  && dst != NULL)
+        ZSTD_memcpy(dst, (const BYTE *)src + ZSTD_SKIPPABLEHEADERSIZE, skippableContentSize);
+    if (magicVariant != NULL)
+        *magicVariant = magicNumber - ZSTD_MAGIC_SKIPPABLE_START;
+    return skippableContentSize;
+}
+
 /* ZSTD_findDecompressedSize() :
  *  compatible with legacy mode
  *  `srcSize` must be the exact length of some number of ZSTD compressed and/or
@@ -824,7 +870,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
         switch(blockProperties.blockType)
         {
         case bt_compressed:
-            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
+            decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
             break;
         case bt_raw :
             decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
@@ -976,7 +1022,7 @@ size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t sr
 {
 #if defined(ZSTD_HEAPMODE) && (ZSTD_HEAPMODE>=1)
     size_t regenSize;
-    ZSTD_DCtx* const dctx = ZSTD_createDCtx();
+    ZSTD_DCtx* const dctx =  ZSTD_createDCtx_internal(ZSTD_defaultCMem);
     RETURN_ERROR_IF(dctx==NULL, memory_allocation, "NULL pointer!");
     regenSize = ZSTD_decompressDCtx(dctx, dst, dstCapacity, src, srcSize);
     ZSTD_freeDCtx(dctx);
@@ -1010,7 +1056,7 @@ static size_t ZSTD_nextSrcSizeToDecompressWithInputSize(ZSTD_DCtx* dctx, size_t
         return dctx->expected;
     if (dctx->bType != bt_raw)
         return dctx->expected;
-    return MIN(MAX(inputSize, 1), dctx->expected);
+    return BOUNDED(1, inputSize, dctx->expected);
 }
 
 ZSTD_nextInputType_e ZSTD_nextInputType(ZSTD_DCtx* dctx) {
@@ -1116,7 +1162,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
             {
             case bt_compressed:
                 DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
-                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
+                rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
                 dctx->expected = 0;  /* Streaming not supported */
                 break;
             case bt_raw :
@@ -1438,7 +1484,7 @@ size_t ZSTD_decompress_usingDDict(ZSTD_DCtx* dctx,
 ZSTD_DStream* ZSTD_createDStream(void)
 {
     DEBUGLOG(3, "ZSTD_createDStream");
-    return ZSTD_createDStream_advanced(ZSTD_defaultCMem);
+    return ZSTD_createDCtx_internal(ZSTD_defaultCMem);
 }
 
 ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
@@ -1448,7 +1494,7 @@ ZSTD_DStream* ZSTD_initStaticDStream(void *workspace, size_t workspaceSize)
 
 ZSTD_DStream* ZSTD_createDStream_advanced(ZSTD_customMem customMem)
 {
-    return ZSTD_createDCtx_advanced(customMem);
+    return ZSTD_createDCtx_internal(customMem);
 }
 
 size_t ZSTD_freeDStream(ZSTD_DStream* zds)
@@ -1708,7 +1754,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
 size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
 {
     size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
-    unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
+    /* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
+    unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
     unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
     size_t const minRBSize = (size_t) neededSize;
     RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
@@ -1842,7 +1889,6 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
             DEBUGLOG(5, "stage zdss_init => transparent reset ");
             zds->streamStage = zdss_loadHeader;
             zds->lhSize = zds->inPos = zds->outStart = zds->outEnd = 0;
-            zds->legacyVersion = 0;
             zds->hostageByte = 0;
             zds->expectedOutBuffer = *output;
             ZSTD_FALLTHROUGH;
diff --git a/lib/zstd/decompress/zstd_decompress_block.c b/lib/zstd/decompress/zstd_decompress_block.c
index 2d101d9a842ec..c1913b8e7c897 100644
--- a/lib/zstd/decompress/zstd_decompress_block.c
+++ b/lib/zstd/decompress/zstd_decompress_block.c
@@ -69,15 +69,56 @@ size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
     }
 }
 
+/* Allocate buffer for literals, either overlapping current dst, or split between dst and litExtraBuffer, or stored entirely within litExtraBuffer */
+static void ZSTD_allocateLiteralsBuffer(ZSTD_DCtx* dctx, void* const dst, const size_t dstCapacity, const size_t litSize,
+    const streaming_operation streaming, const size_t expectedWriteSize, const unsigned splitImmediately)
+{
+    if (streaming == not_streaming && dstCapacity > ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH + litSize + WILDCOPY_OVERLENGTH)
+    {
+        /* room for litbuffer to fit without read faulting */
+        dctx->litBuffer = (BYTE*)dst + ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH;
+        dctx->litBufferEnd = dctx->litBuffer + litSize;
+        dctx->litBufferLocation = ZSTD_in_dst;
+    }
+    else if (litSize > ZSTD_LITBUFFEREXTRASIZE)
+    {
+        /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+        if (splitImmediately) {
+            /* won't fit in litExtraBuffer, so it will be split between end of dst and extra buffer */
+            dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+            dctx->litBufferEnd = dctx->litBuffer + litSize - ZSTD_LITBUFFEREXTRASIZE;
+        }
+        else {
+            /* initially this will be stored entirely in dst during huffman decoding, it will partially shifted to litExtraBuffer after */
+            dctx->litBuffer = (BYTE*)dst + expectedWriteSize - litSize;
+            dctx->litBufferEnd = (BYTE*)dst + expectedWriteSize;
+        }
+        dctx->litBufferLocation = ZSTD_split;
+    }
+    else
+    {
+        /* fits entirely within litExtraBuffer, so no split is necessary */
+        dctx->litBuffer = dctx->litExtraBuffer;
+        dctx->litBufferEnd = dctx->litBuffer + litSize;
+        dctx->litBufferLocation = ZSTD_not_in_dst;
+    }
+}
 
 /* Hidden declaration for fullbench */
 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                          const void* src, size_t srcSize);
+                          const void* src, size_t srcSize,
+                          void* dst, size_t dstCapacity, const streaming_operation streaming);
 /*! ZSTD_decodeLiteralsBlock() :
+ * Where it is possible to do so without being stomped by the output during decompression, the literals block will be stored
+ * in the dstBuffer.  If there is room to do so, it will be stored in full in the excess dst space after where the current
+ * block will be output.  Otherwise it will be stored at the end of the current dst blockspace, with a small portion being
+ * stored in dctx->litExtraBuffer to help keep it "ahead" of the current output write.
+ *
  * @return : nb of bytes read from src (< srcSize )
  *  note : symbol not declared but exposed for fullbench */
 size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
-                          const void* src, size_t srcSize)   /* note : srcSize < BLOCKSIZE */
+                          const void* src, size_t srcSize,   /* note : srcSize < BLOCKSIZE */
+                          void* dst, size_t dstCapacity, const streaming_operation streaming)
 {
     DEBUGLOG(5, "ZSTD_decodeLiteralsBlock");
     RETURN_ERROR_IF(srcSize < MIN_CBLOCK_SIZE, corruption_detected, "");
@@ -99,6 +140,7 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 U32 const lhlCode = (istart[0] >> 2) & 3;
                 U32 const lhc = MEM_readLE32(istart);
                 size_t hufSuccess;
+                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
                 switch(lhlCode)
                 {
                 case 0: case 1: default:   /* note : default is impossible, since lhlCode into [0..3] */
@@ -121,8 +163,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
                     break;
                 }
+                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
                 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
+                RETURN_ERROR_IF(expectedWriteSize < litSize , dstSize_tooSmall, "");
+                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 0);
 
                 /* prefetch huffman table if cold */
                 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
@@ -133,11 +178,11 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     if (singleStream) {
                         hufSuccess = HUF_decompress1X_usingDTable_bmi2(
                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
-                            dctx->HUFptr, dctx->bmi2);
+                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
                     } else {
                         hufSuccess = HUF_decompress4X_usingDTable_bmi2(
                             dctx->litBuffer, litSize, istart+lhSize, litCSize,
-                            dctx->HUFptr, dctx->bmi2);
+                            dctx->HUFptr, ZSTD_DCtx_get_bmi2(dctx));
                     }
                 } else {
                     if (singleStream) {
@@ -150,15 +195,22 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                         hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
                             istart+lhSize, litCSize, dctx->workspace,
-                            sizeof(dctx->workspace), dctx->bmi2);
+                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
 #endif
                     } else {
                         hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
                             dctx->entropy.hufTable, dctx->litBuffer, litSize,
                             istart+lhSize, litCSize, dctx->workspace,
-                            sizeof(dctx->workspace), dctx->bmi2);
+                            sizeof(dctx->workspace), ZSTD_DCtx_get_bmi2(dctx));
                     }
                 }
+                if (dctx->litBufferLocation == ZSTD_split)
+                {
+                    ZSTD_memcpy(dctx->litExtraBuffer, dctx->litBufferEnd - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
+                    ZSTD_memmove(dctx->litBuffer + ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH, dctx->litBuffer, litSize - ZSTD_LITBUFFEREXTRASIZE);
+                    dctx->litBuffer += ZSTD_LITBUFFEREXTRASIZE - WILDCOPY_OVERLENGTH;
+                    dctx->litBufferEnd -= WILDCOPY_OVERLENGTH;
+                }
 
                 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
 
@@ -166,13 +218,13 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                 dctx->litSize = litSize;
                 dctx->litEntropy = 1;
                 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
-                ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                 return litCSize + lhSize;
             }
 
         case set_basic:
             {   size_t litSize, lhSize;
                 U32 const lhlCode = ((istart[0]) >> 2) & 3;
+                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
                 switch(lhlCode)
                 {
                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
@@ -189,23 +241,36 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     break;
                 }
 
+                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
+                RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
                 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) {  /* risk reading beyond src buffer with wildcopy */
                     RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
-                    ZSTD_memcpy(dctx->litBuffer, istart+lhSize, litSize);
+                    if (dctx->litBufferLocation == ZSTD_split)
+                    {
+                        ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize - ZSTD_LITBUFFEREXTRASIZE);
+                        ZSTD_memcpy(dctx->litExtraBuffer, istart + lhSize + litSize - ZSTD_LITBUFFEREXTRASIZE, ZSTD_LITBUFFEREXTRASIZE);
+                    }
+                    else
+                    {
+                        ZSTD_memcpy(dctx->litBuffer, istart + lhSize, litSize);
+                    }
                     dctx->litPtr = dctx->litBuffer;
                     dctx->litSize = litSize;
-                    ZSTD_memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
                     return lhSize+litSize;
                 }
                 /* direct reference into compressed stream */
                 dctx->litPtr = istart+lhSize;
                 dctx->litSize = litSize;
+                dctx->litBufferEnd = dctx->litPtr + litSize;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
                 return lhSize+litSize;
             }
 
         case set_rle:
             {   U32 const lhlCode = ((istart[0]) >> 2) & 3;
                 size_t litSize, lhSize;
+                size_t expectedWriteSize = MIN(ZSTD_BLOCKSIZE_MAX, dstCapacity);
                 switch(lhlCode)
                 {
                 case 0: case 2: default:   /* note : default is impossible, since lhlCode into [0..3] */
@@ -222,8 +287,19 @@ size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* dctx,
                     RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >= MIN_CBLOCK_SIZE == 3; here we need lhSize+1 = 4");
                     break;
                 }
+                RETURN_ERROR_IF(litSize > 0 && dst == NULL, dstSize_tooSmall, "NULL not handled");
                 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
-                ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
+                RETURN_ERROR_IF(expectedWriteSize < litSize, dstSize_tooSmall, "");
+                ZSTD_allocateLiteralsBuffer(dctx, dst, dstCapacity, litSize, streaming, expectedWriteSize, 1);
+                if (dctx->litBufferLocation == ZSTD_split)
+                {
+                    ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize - ZSTD_LITBUFFEREXTRASIZE);
+                    ZSTD_memset(dctx->litExtraBuffer, istart[lhSize], ZSTD_LITBUFFEREXTRASIZE);
+                }
+                else
+                {
+                    ZSTD_memset(dctx->litBuffer, istart[lhSize], litSize);
+                }
                 dctx->litPtr = dctx->litBuffer;
                 dctx->litSize = litSize;
                 return lhSize+1;
@@ -343,7 +419,7 @@ static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
 };   /* ML_defaultDTable */
 
 
-static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
+static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U8 nbAddBits)
 {
     void* ptr = dt;
     ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
@@ -355,7 +431,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
     cell->nbBits = 0;
     cell->nextState = 0;
     assert(nbAddBits < 255);
-    cell->nbAdditionalBits = (BYTE)nbAddBits;
+    cell->nbAdditionalBits = nbAddBits;
     cell->baseValue = baseValue;
 }
 
@@ -367,7 +443,7 @@ static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddB
 FORCE_INLINE_TEMPLATE
 void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U32* nbAdditionalBits,
+            const U32* baseValue, const U8* nbAdditionalBits,
             unsigned tableLog, void* wksp, size_t wkspSize)
 {
     ZSTD_seqSymbol* const tableDecode = dt+1;
@@ -478,7 +554,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
             tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
             tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
             assert(nbAdditionalBits[symbol] < 255);
-            tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
+            tableDecode[u].nbAdditionalBits = nbAdditionalBits[symbol];
             tableDecode[u].baseValue = baseValue[symbol];
         }
     }
@@ -487,7 +563,7 @@ void ZSTD_buildFSETable_body(ZSTD_seqSymbol* dt,
 /* Avoids the FORCE_INLINE of the _body() function. */
 static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U32* nbAdditionalBits,
+            const U32* baseValue, const U8* nbAdditionalBits,
             unsigned tableLog, void* wksp, size_t wkspSize)
 {
     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
@@ -495,9 +571,9 @@ static void ZSTD_buildFSETable_body_default(ZSTD_seqSymbol* dt,
 }
 
 #if DYNAMIC_BMI2
-TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
+BMI2_TARGET_ATTRIBUTE static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U32* nbAdditionalBits,
+            const U32* baseValue, const U8* nbAdditionalBits,
             unsigned tableLog, void* wksp, size_t wkspSize)
 {
     ZSTD_buildFSETable_body(dt, normalizedCounter, maxSymbolValue,
@@ -507,7 +583,7 @@ TARGET_ATTRIBUTE("bmi2") static void ZSTD_buildFSETable_body_bmi2(ZSTD_seqSymbol
 
 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
             const short* normalizedCounter, unsigned maxSymbolValue,
-            const U32* baseValue, const U32* nbAdditionalBits,
+            const U32* baseValue, const U8* nbAdditionalBits,
             unsigned tableLog, void* wksp, size_t wkspSize, int bmi2)
 {
 #if DYNAMIC_BMI2
@@ -529,7 +605,7 @@ void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
 static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
                                  symbolEncodingType_e type, unsigned max, U32 maxLog,
                                  const void* src, size_t srcSize,
-                                 const U32* baseValue, const U32* nbAdditionalBits,
+                                 const U32* baseValue, const U8* nbAdditionalBits,
                                  const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
                                  int ddictIsCold, int nbSeq, U32* wksp, size_t wkspSize,
                                  int bmi2)
@@ -541,7 +617,7 @@ static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymb
         RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
         {   U32 const symbol = *(const BYTE*)src;
             U32 const baseline = baseValue[symbol];
-            U32 const nbBits = nbAdditionalBits[symbol];
+            U8 const nbBits = nbAdditionalBits[symbol];
             ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
         }
         *DTablePtr = DTableSpace;
@@ -620,7 +696,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       LL_defaultDTable, dctx->fseEntropy,
                                                       dctx->ddictIsCold, nbSeq,
                                                       dctx->workspace, sizeof(dctx->workspace),
-                                                      dctx->bmi2);
+                                                      ZSTD_DCtx_get_bmi2(dctx));
             RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += llhSize;
         }
@@ -632,7 +708,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       OF_defaultDTable, dctx->fseEntropy,
                                                       dctx->ddictIsCold, nbSeq,
                                                       dctx->workspace, sizeof(dctx->workspace),
-                                                      dctx->bmi2);
+                                                      ZSTD_DCtx_get_bmi2(dctx));
             RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += ofhSize;
         }
@@ -644,7 +720,7 @@ size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
                                                       ML_defaultDTable, dctx->fseEntropy,
                                                       dctx->ddictIsCold, nbSeq,
                                                       dctx->workspace, sizeof(dctx->workspace),
-                                                      dctx->bmi2);
+                                                      ZSTD_DCtx_get_bmi2(dctx));
             RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed");
             ip += mlhSize;
         }
@@ -658,7 +734,6 @@ typedef struct {
     size_t litLength;
     size_t matchLength;
     size_t offset;
-    const BYTE* match;
 } seq_t;
 
 typedef struct {
@@ -672,9 +747,6 @@ typedef struct {
     ZSTD_fseState stateOffb;
     ZSTD_fseState stateML;
     size_t prevOffset[ZSTD_REP_NUM];
-    const BYTE* prefixStart;
-    const BYTE* dictEnd;
-    size_t pos;
 } seqState_t;
 
 /*! ZSTD_overlapCopy8() :
@@ -717,7 +789,7 @@ HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap and may be any distance apart.
  *           The src buffer must be before the dst buffer.
  */
-static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
+static void ZSTD_safecopy(BYTE* op, const BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
     ptrdiff_t const diff = op - ip;
     BYTE* const oend = op + length;
 
@@ -733,6 +805,7 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
         /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
         assert(length >= 8);
         ZSTD_overlapCopy8(&op, &ip, diff);
+        length -= 8;
         assert(op - ip >= 8);
         assert(op <= oend);
     }
@@ -747,8 +820,31 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
         assert(oend > oend_w);
         ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
         ip += oend_w - op;
-        op = oend_w;
+        op += oend_w - op;
+    }
+    /* Handle the leftovers. */
+    while (op < oend) *op++ = *ip++;
+}
+
+/* ZSTD_safecopyDstBeforeSrc():
+ * This version allows overlap with dst before src, or handles the non-overlap case with dst after src
+ * Kept separate from more common ZSTD_safecopy case to avoid performance impact to the safecopy common case */
+static void ZSTD_safecopyDstBeforeSrc(BYTE* op, BYTE const* ip, ptrdiff_t length) {
+    ptrdiff_t const diff = op - ip;
+    BYTE* const oend = op + length;
+
+    if (length < 8 || diff > -8) {
+        /* Handle short lengths, close overlaps, and dst not before src. */
+        while (op < oend) *op++ = *ip++;
+        return;
+    }
+
+    if (op <= oend - WILDCOPY_OVERLENGTH && diff < -WILDCOPY_VECLEN) {
+        ZSTD_wildcopy(op, ip, oend - WILDCOPY_OVERLENGTH - op, ZSTD_no_overlap);
+        ip += oend - WILDCOPY_OVERLENGTH - op;
+        op += oend - WILDCOPY_OVERLENGTH - op;
     }
+
     /* Handle the leftovers. */
     while (op < oend) *op++ = *ip++;
 }
@@ -763,9 +859,9 @@ static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_
  */
 FORCE_NOINLINE
 size_t ZSTD_execSequenceEnd(BYTE* op,
-                            BYTE* const oend, seq_t sequence,
-                            const BYTE** litPtr, const BYTE* const litLimit,
-                            const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+    BYTE* const oend, seq_t sequence,
+    const BYTE** litPtr, const BYTE* const litLimit,
+    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -788,27 +884,76 @@ size_t ZSTD_execSequenceEnd(BYTE* op,
     if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
         /* offset beyond prefix */
         RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
-        match = dictEnd - (prefixStart-match);
+        match = dictEnd - (prefixStart - match);
         if (match + sequence.matchLength <= dictEnd) {
             ZSTD_memmove(oLitEnd, match, sequence.matchLength);
             return sequenceLength;
         }
         /* span extDict & currentPrefixSegment */
         {   size_t const length1 = dictEnd - match;
-            ZSTD_memmove(oLitEnd, match, length1);
-            op = oLitEnd + length1;
-            sequence.matchLength -= length1;
-            match = prefixStart;
-    }   }
+        ZSTD_memmove(oLitEnd, match, length1);
+        op = oLitEnd + length1;
+        sequence.matchLength -= length1;
+        match = prefixStart;
+        }
+    }
+    ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
+    return sequenceLength;
+}
+
+/* ZSTD_execSequenceEndSplitLitBuffer():
+ * This version is intended to be used during instances where the litBuffer is still split.  It is kept separate to avoid performance impact for the good case.
+ */
+FORCE_NOINLINE
+size_t ZSTD_execSequenceEndSplitLitBuffer(BYTE* op,
+    BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+    const BYTE** litPtr, const BYTE* const litLimit,
+    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
+
+    /* bounds checks : careful of address space overflow in 32-bit mode */
+    RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer");
+    RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to read beyond literal buffer");
+    assert(op < op + sequenceLength);
+    assert(oLitEnd < op + sequenceLength);
+
+    /* copy literals */
+    RETURN_ERROR_IF(op > *litPtr && op < *litPtr + sequence.litLength, dstSize_tooSmall, "output should not catch up to and overwrite literal buffer");
+    ZSTD_safecopyDstBeforeSrc(op, *litPtr, sequence.litLength);
+    op = oLitEnd;
+    *litPtr = iLitEnd;
+
+    /* copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix */
+        RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
+        match = dictEnd - (prefixStart - match);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+        ZSTD_memmove(oLitEnd, match, length1);
+        op = oLitEnd + length1;
+        sequence.matchLength -= length1;
+        match = prefixStart;
+        }
+    }
     ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
     return sequenceLength;
 }
 
 HINT_INLINE
 size_t ZSTD_execSequence(BYTE* op,
-                         BYTE* const oend, seq_t sequence,
-                         const BYTE** litPtr, const BYTE* const litLimit,
-                         const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+    BYTE* const oend, seq_t sequence,
+    const BYTE** litPtr, const BYTE* const litLimit,
+    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
 {
     BYTE* const oLitEnd = op + sequence.litLength;
     size_t const sequenceLength = sequence.litLength + sequence.matchLength;
@@ -817,6 +962,98 @@ size_t ZSTD_execSequence(BYTE* op,
     const BYTE* const iLitEnd = *litPtr + sequence.litLength;
     const BYTE* match = oLitEnd - sequence.offset;
 
+    assert(op != NULL /* Precondition */);
+    assert(oend_w < oend /* No underflow */);
+    /* Handle edge cases in a slow path:
+     *   - Read beyond end of literals
+     *   - Match end is within WILDCOPY_OVERLIMIT of oend
+     *   - 32-bit mode and the match length overflows
+     */
+    if (UNLIKELY(
+        iLitEnd > litLimit ||
+        oMatchEnd > oend_w ||
+        (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
+        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+
+    /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
+    assert(op <= oLitEnd /* No overflow */);
+    assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
+    assert(oMatchEnd <= oend /* No underflow */);
+    assert(iLitEnd <= litLimit /* Literal length is in bounds */);
+    assert(oLitEnd <= oend_w /* Can wildcopy literals */);
+    assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
+
+    /* Copy Literals:
+     * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
+     * We likely don't need the full 32-byte wildcopy.
+     */
+    assert(WILDCOPY_OVERLENGTH >= 16);
+    ZSTD_copy16(op, (*litPtr));
+    if (UNLIKELY(sequence.litLength > 16)) {
+        ZSTD_wildcopy(op + 16, (*litPtr) + 16, sequence.litLength - 16, ZSTD_no_overlap);
+    }
+    op = oLitEnd;
+    *litPtr = iLitEnd;   /* update for next sequence */
+
+    /* Copy Match */
+    if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
+        /* offset beyond prefix -> go into extDict */
+        RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
+        match = dictEnd + (match - prefixStart);
+        if (match + sequence.matchLength <= dictEnd) {
+            ZSTD_memmove(oLitEnd, match, sequence.matchLength);
+            return sequenceLength;
+        }
+        /* span extDict & currentPrefixSegment */
+        {   size_t const length1 = dictEnd - match;
+        ZSTD_memmove(oLitEnd, match, length1);
+        op = oLitEnd + length1;
+        sequence.matchLength -= length1;
+        match = prefixStart;
+        }
+    }
+    /* Match within prefix of 1 or more bytes */
+    assert(op <= oMatchEnd);
+    assert(oMatchEnd <= oend_w);
+    assert(match >= prefixStart);
+    assert(sequence.matchLength >= 1);
+
+    /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
+     * without overlap checking.
+     */
+    if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
+        /* We bet on a full wildcopy for matches, since we expect matches to be
+         * longer than literals (in general). In silesia, ~10% of matches are longer
+         * than 16 bytes.
+         */
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
+        return sequenceLength;
+    }
+    assert(sequence.offset < WILDCOPY_VECLEN);
+
+    /* Copy 8 bytes and spread the offset to be >= 8. */
+    ZSTD_overlapCopy8(&op, &match, sequence.offset);
+
+    /* If the match length is > 8 bytes, then continue with the wildcopy. */
+    if (sequence.matchLength > 8) {
+        assert(op < oMatchEnd);
+        ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength - 8, ZSTD_overlap_src_before_dst);
+    }
+    return sequenceLength;
+}
+
+HINT_INLINE
+size_t ZSTD_execSequenceSplitLitBuffer(BYTE* op,
+    BYTE* const oend, const BYTE* const oend_w, seq_t sequence,
+    const BYTE** litPtr, const BYTE* const litLimit,
+    const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
+{
+    BYTE* const oLitEnd = op + sequence.litLength;
+    size_t const sequenceLength = sequence.litLength + sequence.matchLength;
+    BYTE* const oMatchEnd = op + sequenceLength;   /* risk : address space overflow (32-bits) */
+    const BYTE* const iLitEnd = *litPtr + sequence.litLength;
+    const BYTE* match = oLitEnd - sequence.offset;
+
     assert(op != NULL /* Precondition */);
     assert(oend_w < oend /* No underflow */);
     /* Handle edge cases in a slow path:
@@ -828,7 +1065,7 @@ size_t ZSTD_execSequence(BYTE* op,
             iLitEnd > litLimit ||
             oMatchEnd > oend_w ||
             (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
-        return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
+        return ZSTD_execSequenceEndSplitLitBuffer(op, oend, oend_w, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
 
     /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
     assert(op <= oLitEnd /* No overflow */);
@@ -896,6 +1133,7 @@ size_t ZSTD_execSequence(BYTE* op,
     return sequenceLength;
 }
 
+
 static void
 ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
 {
@@ -909,20 +1147,10 @@ ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqS
 }
 
 FORCE_INLINE_TEMPLATE void
-ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
-{
-    ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
-    U32 const nbBits = DInfo.nbBits;
-    size_t const lowBits = BIT_readBits(bitD, nbBits);
-    DStatePtr->state = DInfo.nextState + lowBits;
-}
-
-FORCE_INLINE_TEMPLATE void
-ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
+ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, U16 nextState, U32 nbBits)
 {
-    U32 const nbBits = DInfo.nbBits;
     size_t const lowBits = BIT_readBits(bitD, nbBits);
-    DStatePtr->state = DInfo.nextState + lowBits;
+    DStatePtr->state = nextState + lowBits;
 }
 
 /* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
@@ -936,116 +1164,105 @@ ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD
         : 0)
 
 typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
-typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
 
 FORCE_INLINE_TEMPLATE seq_t
-ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
+ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets)
 {
     seq_t seq;
-    ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
-    ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
-    ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
-    U32 const llBase = llDInfo.baseValue;
-    U32 const mlBase = mlDInfo.baseValue;
-    U32 const ofBase = ofDInfo.baseValue;
-    BYTE const llBits = llDInfo.nbAdditionalBits;
-    BYTE const mlBits = mlDInfo.nbAdditionalBits;
-    BYTE const ofBits = ofDInfo.nbAdditionalBits;
-    BYTE const totalBits = llBits+mlBits+ofBits;
-
-    /* sequence */
-    {   size_t offset;
-        if (ofBits > 1) {
-            ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
-            ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
-            assert(ofBits <= MaxOff);
-            if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
-                U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
-                offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
-                BIT_reloadDStream(&seqState->DStream);
-                if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
-                assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
-            } else {
-                offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
-                if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
-            }
-            seqState->prevOffset[2] = seqState->prevOffset[1];
-            seqState->prevOffset[1] = seqState->prevOffset[0];
-            seqState->prevOffset[0] = offset;
-        } else {
-            U32 const ll0 = (llBase == 0);
-            if (LIKELY((ofBits == 0))) {
-                if (LIKELY(!ll0))
-                    offset = seqState->prevOffset[0];
-                else {
-                    offset = seqState->prevOffset[1];
-                    seqState->prevOffset[1] = seqState->prevOffset[0];
-                    seqState->prevOffset[0] = offset;
+    const ZSTD_seqSymbol* const llDInfo = seqState->stateLL.table + seqState->stateLL.state;
+    const ZSTD_seqSymbol* const mlDInfo = seqState->stateML.table + seqState->stateML.state;
+    const ZSTD_seqSymbol* const ofDInfo = seqState->stateOffb.table + seqState->stateOffb.state;
+    seq.matchLength = mlDInfo->baseValue;
+    seq.litLength = llDInfo->baseValue;
+    {   U32 const ofBase = ofDInfo->baseValue;
+        BYTE const llBits = llDInfo->nbAdditionalBits;
+        BYTE const mlBits = mlDInfo->nbAdditionalBits;
+        BYTE const ofBits = ofDInfo->nbAdditionalBits;
+        BYTE const totalBits = llBits+mlBits+ofBits;
+
+        U16 const llNext = llDInfo->nextState;
+        U16 const mlNext = mlDInfo->nextState;
+        U16 const ofNext = ofDInfo->nextState;
+        U32 const llnbBits = llDInfo->nbBits;
+        U32 const mlnbBits = mlDInfo->nbBits;
+        U32 const ofnbBits = ofDInfo->nbBits;
+        /*
+         * As gcc has better branch and block analyzers, sometimes it is only
+         * valuable to mark likelyness for clang, it gives around 3-4% of
+         * performance.
+         */
+
+        /* sequence */
+        {   size_t offset;
+    #if defined(__clang__)
+            if (LIKELY(ofBits > 1)) {
+    #else
+            if (ofBits > 1) {
+    #endif
+                ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
+                ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
+                assert(ofBits <= MaxOff);
+                if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
+                    U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
+                    offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
+                    BIT_reloadDStream(&seqState->DStream);
+                    if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
+                    assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32);   /* to avoid another reload */
+                } else {
+                    offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/);   /* <=  (ZSTD_WINDOWLOG_MAX-1) bits */
+                    if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
                 }
+                seqState->prevOffset[2] = seqState->prevOffset[1];
+                seqState->prevOffset[1] = seqState->prevOffset[0];
+                seqState->prevOffset[0] = offset;
             } else {
-                offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
-                {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
-                    temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
-                    if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
-                    seqState->prevOffset[1] = seqState->prevOffset[0];
-                    seqState->prevOffset[0] = offset = temp;
-        }   }   }
-        seq.offset = offset;
-    }
-
-    seq.matchLength = mlBase;
-    if (mlBits > 0)
-        seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
-
-    if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
-        BIT_reloadDStream(&seqState->DStream);
-    if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
-        BIT_reloadDStream(&seqState->DStream);
-    /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
-    ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
-
-    seq.litLength = llBase;
-    if (llBits > 0)
-        seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
-
-    if (MEM_32bits())
-        BIT_reloadDStream(&seqState->DStream);
-
-    DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
-                (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
-
-    if (prefetch == ZSTD_p_prefetch) {
-        size_t const pos = seqState->pos + seq.litLength;
-        const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
-        seq.match = matchBase + pos - seq.offset;  /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
-                                                    * No consequence though : no memory access will occur, offset is only used for prefetching */
-        seqState->pos = pos + seq.matchLength;
-    }
-
-    /* ANS state update
-     * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
-     * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
-     * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
-     * better option, so it is the default for other compilers. But, if you
-     * measure that it is worse, please put up a pull request.
-     */
-    {
-#if !defined(__clang__)
-        const int kUseUpdateFseState = 1;
-#else
-        const int kUseUpdateFseState = 0;
-#endif
-        if (kUseUpdateFseState) {
-            ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream);    /* <=  9 bits */
-            ZSTD_updateFseState(&seqState->stateML, &seqState->DStream);    /* <=  9 bits */
-            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-            ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream);  /* <=  8 bits */
-        } else {
-            ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo);    /* <=  9 bits */
-            ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo);    /* <=  9 bits */
-            if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
-            ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo);  /* <=  8 bits */
+                U32 const ll0 = (llDInfo->baseValue == 0);
+                if (LIKELY((ofBits == 0))) {
+                    offset = seqState->prevOffset[ll0];
+                    seqState->prevOffset[1] = seqState->prevOffset[!ll0];
+                    seqState->prevOffset[0] = offset;
+                } else {
+                    offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
+                    {   size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
+                        temp += !temp;   /* 0 is not valid; input is corrupted; force offset to 1 */
+                        if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
+                        seqState->prevOffset[1] = seqState->prevOffset[0];
+                        seqState->prevOffset[0] = offset = temp;
+            }   }   }
+            seq.offset = offset;
         }
+
+    #if defined(__clang__)
+        if (UNLIKELY(mlBits > 0))
+    #else
+        if (mlBits > 0)
+    #endif
+            seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
+
+        if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
+            BIT_reloadDStream(&seqState->DStream);
+        if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
+            BIT_reloadDStream(&seqState->DStream);
+        /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
+        ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
+
+    #if defined(__clang__)
+        if (UNLIKELY(llBits > 0))
+    #else
+        if (llBits > 0)
+    #endif
+            seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
+
+        if (MEM_32bits())
+            BIT_reloadDStream(&seqState->DStream);
+
+        DEBUGLOG(6, "seq: litL=%u, matchL=%u, offset=%u",
+                    (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
+
+        ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llNext, llnbBits);    /* <=  9 bits */
+        ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlNext, mlnbBits);    /* <=  9 bits */
+        if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);    /* <= 18 bits */
+        ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofNext, ofnbBits);  /* <=  8 bits */
     }
 
     return seq;
@@ -1098,9 +1315,11 @@ MEM_STATIC void ZSTD_assertValidSequence(
 #endif
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
+
+
 FORCE_INLINE_TEMPLATE size_t
 DONT_VECTORIZE
-ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
+ZSTD_decompressSequences_bodySplitLitBuffer( ZSTD_DCtx* dctx,
                                void* dst, size_t maxDstSize,
                          const void* seqStart, size_t seqSize, int nbSeq,
                          const ZSTD_longOffset_e isLongOffset,
@@ -1112,17 +1331,16 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
     BYTE* const oend = ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* litBufferEnd = dctx->litBufferEnd;
     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
     const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
-    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer");
     (void)frame;
 
     /* Regen sequences */
     if (nbSeq) {
         seqState_t seqState;
-        size_t error = 0;
         dctx->fseEntropy = 1;
         { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
         RETURN_ERROR_IF(
@@ -1138,70 +1356,255 @@ ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
                 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
                 BIT_DStream_completed < BIT_DStream_overflow);
 
+        /* decompress without overrunning litPtr begins */
+        {
+            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            /* Align the decompression loop to 32 + 16 bytes.
+                *
+                * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
+                * speed swings based on the alignment of the decompression loop. This
+                * performance swing is caused by parts of the decompression loop falling
+                * out of the DSB. The entire decompression loop should fit in the DSB,
+                * when it can't we get much worse performance. You can measure if you've
+                * hit the good case or the bad case with this perf command for some
+                * compressed file test.zst:
+                *
+                *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
+                *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
+                *
+                * If you see most cycles served out of the MITE you've hit the bad case.
+                * If you see most cycles served out of the DSB you've hit the good case.
+                * If it is pretty even then you may be in an okay case.
+                *
+                * This issue has been reproduced on the following CPUs:
+                *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
+                *               Use Instruments->Counters to get DSB/MITE cycles.
+                *               I never got performance swings, but I was able to
+                *               go from the good case of mostly DSB to half of the
+                *               cycles served from MITE.
+                *   - Coffeelake: Intel i9-9900k
+                *   - Coffeelake: Intel i7-9700k
+                *
+                * I haven't been able to reproduce the instability or DSB misses on any
+                * of the following CPUS:
+                *   - Haswell
+                *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
+                *   - Skylake
+                *
+                * Alignment is done for each of the three major decompression loops:
+                *   - ZSTD_decompressSequences_bodySplitLitBuffer - presplit section of the literal buffer
+                *   - ZSTD_decompressSequences_bodySplitLitBuffer - postsplit section of the literal buffer
+                *   - ZSTD_decompressSequences_body
+                * Alignment choices are made to minimize large swings on bad cases and influence on performance
+                * from changes external to this code, rather than to overoptimize on the current commit.
+                *
+                * If you are seeing performance stability this script can help test.
+                * It tests on 4 commits in zstd where I saw performance change.
+                *
+                *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
+                */
 #if defined(__x86_64__)
-        /* Align the decompression loop to 32 + 16 bytes.
-         *
-         * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
-         * speed swings based on the alignment of the decompression loop. This
-         * performance swing is caused by parts of the decompression loop falling
-         * out of the DSB. The entire decompression loop should fit in the DSB,
-         * when it can't we get much worse performance. You can measure if you've
-         * hit the good case or the bad case with this perf command for some
-         * compressed file test.zst:
-         *
-         *   perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
-         *             -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
-         *
-         * If you see most cycles served out of the MITE you've hit the bad case.
-         * If you see most cycles served out of the DSB you've hit the good case.
-         * If it is pretty even then you may be in an okay case.
-         *
-         * I've been able to reproduce this issue on the following CPUs:
-         *   - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
-         *               Use Instruments->Counters to get DSB/MITE cycles.
-         *               I never got performance swings, but I was able to
-         *               go from the good case of mostly DSB to half of the
-         *               cycles served from MITE.
-         *   - Coffeelake: Intel i9-9900k
-         *
-         * I haven't been able to reproduce the instability or DSB misses on any
-         * of the following CPUS:
-         *   - Haswell
-         *   - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
-         *   - Skylake
-         *
-         * If you are seeing performance stability this script can help test.
-         * It tests on 4 commits in zstd where I saw performance change.
-         *
-         *   https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
-         */
-        __asm__(".p2align 5");
-        __asm__("nop");
-        __asm__(".p2align 4");
+            __asm__(".p2align 6");
+#  if __GNUC__ >= 7
+	    /* good for gcc-7, gcc-9, and gcc-11 */
+            __asm__("nop");
+            __asm__(".p2align 5");
+            __asm__("nop");
+            __asm__(".p2align 4");
+#    if __GNUC__ == 8 || __GNUC__ == 10
+	    /* good for gcc-8 and gcc-10 */
+            __asm__("nop");
+            __asm__(".p2align 3");
+#    endif
+#  endif
+#endif
+
+            /* Handle the initial state where litBuffer is currently split between dst and litExtraBuffer */
+            for (; litPtr + sequence.litLength <= dctx->litBufferEnd; ) {
+                size_t const oneSeqSize = ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence.litLength - WILDCOPY_OVERLENGTH, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                    return oneSeqSize;
+                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                op += oneSeqSize;
+                if (UNLIKELY(!--nbSeq))
+                    break;
+                BIT_reloadDStream(&(seqState.DStream));
+                sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            }
+
+            /* If there are more sequences, they will need to read literals from litExtraBuffer; copy over the remainder from dst and update litPtr and litEnd */
+            if (nbSeq > 0) {
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequence.litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr = dctx->litExtraBuffer;
+                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                    assert(!ZSTD_isError(oneSeqSize));
+                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+                    if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                        return oneSeqSize;
+                    DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                    op += oneSeqSize;
+                    if (--nbSeq)
+                        BIT_reloadDStream(&(seqState.DStream));
+                }
+            }
+        }
+
+        if (nbSeq > 0) /* there is remaining lit from extra buffer */
+        {
+
+#if defined(__x86_64__)
+            __asm__(".p2align 6");
+            __asm__("nop");
+#  if __GNUC__ != 7
+            /* worse for gcc-7 better for gcc-8, gcc-9, and gcc-10 and clang */
+            __asm__(".p2align 4");
+            __asm__("nop");
+            __asm__(".p2align 3");
+#  elif __GNUC__ >= 11
+            __asm__(".p2align 3");
+#  else
+            __asm__(".p2align 5");
+            __asm__("nop");
+            __asm__(".p2align 3");
+#  endif
+#endif
+
+            for (; ; ) {
+                seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+                size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litBufferEnd, prefixStart, vBase, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
+#endif
+                if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                    return oneSeqSize;
+                DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
+                op += oneSeqSize;
+                if (UNLIKELY(!--nbSeq))
+                    break;
+                BIT_reloadDStream(&(seqState.DStream));
+            }
+        }
+
+        /* check if reached exact end */
+        DEBUGLOG(5, "ZSTD_decompressSequences_bodySplitLitBuffer: after decode loop, remaining nbSeq : %i", nbSeq);
+        RETURN_ERROR_IF(nbSeq, corruption_detected, "");
+        RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
+        /* save reps for next block */
+        { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
+    }
+
+    /* last literal segment */
+    if (dctx->litBufferLocation == ZSTD_split)  /* split hasn't been reached yet, first get dst then copy litExtraBuffer */
+    {
+        size_t const lastLLSize = litBufferEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+        litPtr = dctx->litExtraBuffer;
+        litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+        dctx->litBufferLocation = ZSTD_not_in_dst;
+    }
+    {   size_t const lastLLSize = litBufferEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memcpy(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+    }
+
+    return op-ostart;
+}
+
+FORCE_INLINE_TEMPLATE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequences_body(ZSTD_DCtx* dctx,
+    void* dst, size_t maxDstSize,
+    const void* seqStart, size_t seqSize, int nbSeq,
+    const ZSTD_longOffset_e isLongOffset,
+    const int frame)
+{
+    const BYTE* ip = (const BYTE*)seqStart;
+    const BYTE* const iend = ip + seqSize;
+    BYTE* const ostart = (BYTE*)dst;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_not_in_dst ? ostart + maxDstSize : dctx->litBuffer;
+    BYTE* op = ostart;
+    const BYTE* litPtr = dctx->litPtr;
+    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* const prefixStart = (const BYTE*)(dctx->prefixStart);
+    const BYTE* const vBase = (const BYTE*)(dctx->virtualStart);
+    const BYTE* const dictEnd = (const BYTE*)(dctx->dictEnd);
+    DEBUGLOG(5, "ZSTD_decompressSequences_body");
+    (void)frame;
+
+    /* Regen sequences */
+    if (nbSeq) {
+        seqState_t seqState;
+        dctx->fseEntropy = 1;
+        { U32 i; for (i = 0; i < ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
+        RETURN_ERROR_IF(
+            ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend - ip)),
+            corruption_detected, "");
+        ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
+        ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
+        ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
+        assert(dst != NULL);
+
+        ZSTD_STATIC_ASSERT(
+            BIT_DStream_unfinished < BIT_DStream_completed &&
+            BIT_DStream_endOfBuffer < BIT_DStream_completed &&
+            BIT_DStream_completed < BIT_DStream_overflow);
+
+#if defined(__x86_64__)
+            __asm__(".p2align 6");
+            __asm__("nop");
+#  if __GNUC__ >= 7
+            __asm__(".p2align 5");
+            __asm__("nop");
+            __asm__(".p2align 3");
+#  else
+            __asm__(".p2align 4");
+            __asm__("nop");
+            __asm__(".p2align 3");
+#  endif
 #endif
+
         for ( ; ; ) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
             size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
             assert(!ZSTD_isError(oneSeqSize));
             if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
 #endif
+            if (UNLIKELY(ZSTD_isError(oneSeqSize)))
+                return oneSeqSize;
             DEBUGLOG(6, "regenerated sequence size : %u", (U32)oneSeqSize);
-            BIT_reloadDStream(&(seqState.DStream));
             op += oneSeqSize;
-            /* gcc and clang both don't like early returns in this loop.
-             * Instead break and check for an error at the end of the loop.
-             */
-            if (UNLIKELY(ZSTD_isError(oneSeqSize))) {
-                error = oneSeqSize;
+            if (UNLIKELY(!--nbSeq))
                 break;
-            }
-            if (UNLIKELY(!--nbSeq)) break;
+            BIT_reloadDStream(&(seqState.DStream));
         }
 
         /* check if reached exact end */
         DEBUGLOG(5, "ZSTD_decompressSequences_body: after decode loop, remaining nbSeq : %i", nbSeq);
-        if (ZSTD_isError(error)) return error;
         RETURN_ERROR_IF(nbSeq, corruption_detected, "");
         RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
         /* save reps for next block */
@@ -1229,9 +1632,37 @@ ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
 {
     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
+
+static size_t
+ZSTD_decompressSequencesSplitLitBuffer_default(ZSTD_DCtx* dctx,
+                                               void* dst, size_t maxDstSize,
+                                         const void* seqStart, size_t seqSize, int nbSeq,
+                                         const ZSTD_longOffset_e isLongOffset,
+                                         const int frame)
+{
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
+
+FORCE_INLINE_TEMPLATE size_t
+ZSTD_prefetchMatch(size_t prefetchPos, seq_t const sequence,
+                   const BYTE* const prefixStart, const BYTE* const dictEnd)
+{
+    prefetchPos += sequence.litLength;
+    {   const BYTE* const matchBase = (sequence.offset > prefetchPos) ? dictEnd : prefixStart;
+        const BYTE* const match = matchBase + prefetchPos - sequence.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
+                                                                              * No consequence though : memory address is only used for prefetching, not for dereferencing */
+        PREFETCH_L1(match); PREFETCH_L1(match+CACHELINE_SIZE);   /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+    }
+    return prefetchPos + sequence.matchLength;
+}
+
+/* This decoding function employs prefetching
+ * to reduce latency impact of cache misses.
+ * It's generally employed when block contains a significant portion of long-distance matches
+ * or when coupled with a "cold" dictionary */
 FORCE_INLINE_TEMPLATE size_t
 ZSTD_decompressSequencesLong_body(
                                ZSTD_DCtx* dctx,
@@ -1243,10 +1674,10 @@ ZSTD_decompressSequencesLong_body(
     const BYTE* ip = (const BYTE*)seqStart;
     const BYTE* const iend = ip + seqSize;
     BYTE* const ostart = (BYTE*)dst;
-    BYTE* const oend = ostart + maxDstSize;
+    BYTE* const oend = dctx->litBufferLocation == ZSTD_in_dst ? dctx->litBuffer : ostart + maxDstSize;
     BYTE* op = ostart;
     const BYTE* litPtr = dctx->litPtr;
-    const BYTE* const litEnd = litPtr + dctx->litSize;
+    const BYTE* litBufferEnd = dctx->litBufferEnd;
     const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
     const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
     const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
@@ -1254,18 +1685,17 @@ ZSTD_decompressSequencesLong_body(
 
     /* Regen sequences */
     if (nbSeq) {
-#define STORED_SEQS 4
+#define STORED_SEQS 8
 #define STORED_SEQS_MASK (STORED_SEQS-1)
-#define ADVANCED_SEQS 4
+#define ADVANCED_SEQS STORED_SEQS
         seq_t sequences[STORED_SEQS];
         int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
         seqState_t seqState;
         int seqNb;
+        size_t prefetchPos = (size_t)(op-prefixStart); /* track position relative to prefixStart */
+
         dctx->fseEntropy = 1;
         { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
-        seqState.prefixStart = prefixStart;
-        seqState.pos = (size_t)(op-prefixStart);
-        seqState.dictEnd = dictEnd;
         assert(dst != NULL);
         assert(iend >= ip);
         RETURN_ERROR_IF(
@@ -1277,36 +1707,100 @@ ZSTD_decompressSequencesLong_body(
 
         /* prepare in advance */
         for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
-            sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
+            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+            sequences[seqNb] = sequence;
         }
         RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
 
-        /* decode and decompress */
-        for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
-            seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
-            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+        /* decompress without stomping litBuffer */
+        for (; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb < nbSeq); seqNb++) {
+            seq_t sequence = ZSTD_decodeSequence(&seqState, isLongOffset);
+            size_t oneSeqSize;
+
+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength > dctx->litBufferEnd)
+            {
+                /* lit buffer is reaching split point, empty out the first buffer and transition to litExtraBuffer */
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr = dctx->litExtraBuffer;
+                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-            assert(!ZSTD_isError(oneSeqSize));
-            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+                assert(!ZSTD_isError(oneSeqSize));
+                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
-            sequences[seqNb & STORED_SEQS_MASK] = sequence;
-            op += oneSeqSize;
+                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+
+                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+                sequences[seqNb & STORED_SEQS_MASK] = sequence;
+                op += oneSeqSize;
+            }
+            else
+            {
+                /* lit buffer is either wholly contained in first or second split, or not split at all*/
+                oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+                    ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK].litLength - WILDCOPY_OVERLENGTH, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
+                    ZSTD_execSequence(op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb - ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+
+                prefetchPos = ZSTD_prefetchMatch(prefetchPos, sequence, prefixStart, dictEnd);
+                sequences[seqNb & STORED_SEQS_MASK] = sequence;
+                op += oneSeqSize;
+            }
         }
         RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
 
         /* finish queue */
         seqNb -= seqAdvance;
         for ( ; seqNb<nbSeq ; seqNb++) {
-            size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
+            seq_t *sequence = &(sequences[seqNb&STORED_SEQS_MASK]);
+            if (dctx->litBufferLocation == ZSTD_split && litPtr + sequence->litLength > dctx->litBufferEnd)
+            {
+                const size_t leftoverLit = dctx->litBufferEnd - litPtr;
+                if (leftoverLit)
+                {
+                    RETURN_ERROR_IF(leftoverLit > (size_t)(oend - op), dstSize_tooSmall, "remaining lit must fit within dstBuffer");
+                    ZSTD_safecopyDstBeforeSrc(op, litPtr, leftoverLit);
+                    sequence->litLength -= leftoverLit;
+                    op += leftoverLit;
+                }
+                litPtr = dctx->litExtraBuffer;
+                litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+                dctx->litBufferLocation = ZSTD_not_in_dst;
+                {
+                    size_t const oneSeqSize = ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
 #if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
-            assert(!ZSTD_isError(oneSeqSize));
-            if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+                    assert(!ZSTD_isError(oneSeqSize));
+                    if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
 #endif
-            if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
-            op += oneSeqSize;
+                    if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                    op += oneSeqSize;
+                }
+            }
+            else
+            {
+                size_t const oneSeqSize = dctx->litBufferLocation == ZSTD_split ?
+                    ZSTD_execSequenceSplitLitBuffer(op, oend, litPtr + sequence->litLength - WILDCOPY_OVERLENGTH, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd) :
+                    ZSTD_execSequence(op, oend, *sequence, &litPtr, litBufferEnd, prefixStart, dictStart, dictEnd);
+#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
+                assert(!ZSTD_isError(oneSeqSize));
+                if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
+#endif
+                if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
+                op += oneSeqSize;
+            }
         }
 
         /* save reps for next block */
@@ -1314,10 +1808,21 @@ ZSTD_decompressSequencesLong_body(
     }
 
     /* last literal segment */
-    {   size_t const lastLLSize = litEnd - litPtr;
+    if (dctx->litBufferLocation == ZSTD_split)  /* first deplete literal buffer in dst, then copy litExtraBuffer */
+    {
+        size_t const lastLLSize = litBufferEnd - litPtr;
+        RETURN_ERROR_IF(lastLLSize > (size_t)(oend - op), dstSize_tooSmall, "");
+        if (op != NULL) {
+            ZSTD_memmove(op, litPtr, lastLLSize);
+            op += lastLLSize;
+        }
+        litPtr = dctx->litExtraBuffer;
+        litBufferEnd = dctx->litExtraBuffer + ZSTD_LITBUFFEREXTRASIZE;
+    }
+    {   size_t const lastLLSize = litBufferEnd - litPtr;
         RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
         if (op != NULL) {
-            ZSTD_memcpy(op, litPtr, lastLLSize);
+            ZSTD_memmove(op, litPtr, lastLLSize);
             op += lastLLSize;
         }
     }
@@ -1341,7 +1846,7 @@ ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
 #if DYNAMIC_BMI2
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
-static TARGET_ATTRIBUTE("bmi2") size_t
+static BMI2_TARGET_ATTRIBUTE size_t
 DONT_VECTORIZE
 ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
@@ -1351,10 +1856,20 @@ ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
 {
     return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
+static BMI2_TARGET_ATTRIBUTE size_t
+DONT_VECTORIZE
+ZSTD_decompressSequencesSplitLitBuffer_bmi2(ZSTD_DCtx* dctx,
+                                 void* dst, size_t maxDstSize,
+                           const void* seqStart, size_t seqSize, int nbSeq,
+                           const ZSTD_longOffset_e isLongOffset,
+                           const int frame)
+{
+    return ZSTD_decompressSequences_bodySplitLitBuffer(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
-static TARGET_ATTRIBUTE("bmi2") size_t
+static BMI2_TARGET_ATTRIBUTE size_t
 ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
                                  void* dst, size_t maxDstSize,
                            const void* seqStart, size_t seqSize, int nbSeq,
@@ -1383,11 +1898,25 @@ ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
 {
     DEBUGLOG(5, "ZSTD_decompressSequences");
 #if DYNAMIC_BMI2
-    if (dctx->bmi2) {
+    if (ZSTD_DCtx_get_bmi2(dctx)) {
         return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
     }
 #endif
-  return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+}
+static size_t
+ZSTD_decompressSequencesSplitLitBuffer(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
+                                 const void* seqStart, size_t seqSize, int nbSeq,
+                                 const ZSTD_longOffset_e isLongOffset,
+                                 const int frame)
+{
+    DEBUGLOG(5, "ZSTD_decompressSequencesSplitLitBuffer");
+#if DYNAMIC_BMI2
+    if (ZSTD_DCtx_get_bmi2(dctx)) {
+        return ZSTD_decompressSequencesSplitLitBuffer_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
+    }
+#endif
+    return ZSTD_decompressSequencesSplitLitBuffer_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
 }
 #endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
 
@@ -1407,7 +1936,7 @@ ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
 {
     DEBUGLOG(5, "ZSTD_decompressSequencesLong");
 #if DYNAMIC_BMI2
-    if (dctx->bmi2) {
+    if (ZSTD_DCtx_get_bmi2(dctx)) {
         return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
     }
 #endif
@@ -1448,7 +1977,7 @@ ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
 size_t
 ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                               void* dst, size_t dstCapacity,
-                        const void* src, size_t srcSize, const int frame)
+                        const void* src, size_t srcSize, const int frame, const streaming_operation streaming)
 {   /* blockType == blockCompressed */
     const BYTE* ip = (const BYTE*)src;
     /* isLongOffset must be true if there are long offsets.
@@ -1463,7 +1992,7 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
     RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
 
     /* Decode literals section */
-    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
+    {   size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize, dst, dstCapacity, streaming);
         DEBUGLOG(5, "ZSTD_decodeLiteralsBlock : %u", (U32)litCSize);
         if (ZSTD_isError(litCSize)) return litCSize;
         ip += litCSize;
@@ -1511,7 +2040,10 @@ ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
 
 #ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
         /* else */
-        return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+        if (dctx->litBufferLocation == ZSTD_split)
+            return ZSTD_decompressSequencesSplitLitBuffer(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
+        else
+            return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
 #endif
     }
 }
@@ -1534,7 +2066,7 @@ size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
 {
     size_t dSize;
     ZSTD_checkContinuity(dctx, dst, dstCapacity);
-    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
+    dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0, not_streaming);
     dctx->previousDstEnd = (char*)dst + dSize;
     return dSize;
 }
diff --git a/lib/zstd/decompress/zstd_decompress_block.h b/lib/zstd/decompress/zstd_decompress_block.h
index e7f5f6689459f..3d2d57a5d25a7 100644
--- a/lib/zstd/decompress/zstd_decompress_block.h
+++ b/lib/zstd/decompress/zstd_decompress_block.h
@@ -33,6 +33,12 @@
  */
 
 
+ /* Streaming state is used to inform allocation of the literal buffer */
+typedef enum {
+    not_streaming = 0,
+    is_streaming = 1
+} streaming_operation;
+
 /* ZSTD_decompressBlock_internal() :
  * decompress block, starting at `src`,
  * into destination buffer `dst`.
@@ -41,7 +47,7 @@
  */
 size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
                                void* dst, size_t dstCapacity,
-                         const void* src, size_t srcSize, const int frame);
+                         const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
 
 /* ZSTD_buildFSETable() :
  * generate FSE decoding table for one symbol (ll, ml or off)
@@ -54,7 +60,7 @@ size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
  */
 void ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
              const short* normalizedCounter, unsigned maxSymbolValue,
-             const U32* baseValue, const U32* nbAdditionalBits,
+             const U32* baseValue, const U8* nbAdditionalBits,
                    unsigned tableLog, void* wksp, size_t wkspSize,
                    int bmi2);
 
diff --git a/lib/zstd/decompress/zstd_decompress_internal.h b/lib/zstd/decompress/zstd_decompress_internal.h
index 4b9052f687558..98102edb6a832 100644
--- a/lib/zstd/decompress/zstd_decompress_internal.h
+++ b/lib/zstd/decompress/zstd_decompress_internal.h
@@ -20,7 +20,7 @@
  *  Dependencies
  *********************************************************/
 #include "../common/mem.h"             /* BYTE, U16, U32 */
-#include "../common/zstd_internal.h"   /* ZSTD_seqSymbol */
+#include "../common/zstd_internal.h"   /* constants : MaxLL, MaxML, MaxOff, LLFSELog, etc. */
 
 
@@ -40,7 +40,7 @@ static UNUSED_ATTR const U32 OF_base[MaxOff+1] = {
                  0xFFFD, 0x1FFFD, 0x3FFFD, 0x7FFFD, 0xFFFFD, 0x1FFFFD, 0x3FFFFD, 0x7FFFFD,
                  0xFFFFFD, 0x1FFFFFD, 0x3FFFFFD, 0x7FFFFFD, 0xFFFFFFD, 0x1FFFFFFD, 0x3FFFFFFD, 0x7FFFFFFD };
 
-static UNUSED_ATTR const U32 OF_bits[MaxOff+1] = {
+static UNUSED_ATTR const U8 OF_bits[MaxOff+1] = {
                      0,  1,  2,  3,  4,  5,  6,  7,
                      8,  9, 10, 11, 12, 13, 14, 15,
                     16, 17, 18, 19, 20, 21, 22, 23,
@@ -106,6 +106,22 @@ typedef struct {
     size_t ddictPtrCount;
 } ZSTD_DDictHashSet;
 
+#ifndef ZSTD_DECODER_INTERNAL_BUFFER
+#  define ZSTD_DECODER_INTERNAL_BUFFER  (1 << 16)
+#endif
+
+#define ZSTD_LBMIN 64
+#define ZSTD_LBMAX (128 << 10)
+
+/* extra buffer, compensates when dst is not large enough to store litBuffer */
+#define ZSTD_LITBUFFEREXTRASIZE  BOUNDED(ZSTD_LBMIN, ZSTD_DECODER_INTERNAL_BUFFER, ZSTD_LBMAX)
+
+typedef enum {
+    ZSTD_not_in_dst = 0,  /* Stored entirely within litExtraBuffer */
+    ZSTD_in_dst = 1,           /* Stored entirely within dst (in memory after current output write) */
+    ZSTD_split = 2            /* Split between litExtraBuffer and dst */
+} ZSTD_litLocation_e;
+
 struct ZSTD_DCtx_s
 {
     const ZSTD_seqSymbol* LLTptr;
@@ -136,7 +152,9 @@ struct ZSTD_DCtx_s
     size_t litSize;
     size_t rleSize;
     size_t staticSize;
+#if DYNAMIC_BMI2 != 0
     int bmi2;                     /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
+#endif
 
     /* dictionary */
     ZSTD_DDict* ddictLocal;
@@ -158,16 +176,16 @@ struct ZSTD_DCtx_s
     size_t outStart;
     size_t outEnd;
     size_t lhSize;
-    void* legacyContext;
-    U32 previousLegacyVersion;
-    U32 legacyVersion;
     U32 hostageByte;
     int noForwardProgress;
     ZSTD_bufferMode_e outBufferMode;
     ZSTD_outBuffer expectedOutBuffer;
 
     /* workspace */
-    BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
+    BYTE* litBuffer;
+    const BYTE* litBufferEnd;
+    ZSTD_litLocation_e litBufferLocation;
+    BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
     BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
 
     size_t oversizedDuration;
@@ -180,6 +198,14 @@ struct ZSTD_DCtx_s
     /* Tracing */
 };  /* typedef'd to ZSTD_DCtx within "zstd.h" */
 
+MEM_STATIC int ZSTD_DCtx_get_bmi2(const struct ZSTD_DCtx_s *dctx) {
+#if DYNAMIC_BMI2 != 0
+	return dctx->bmi2;
+#else
+    (void)dctx;
+	return 0;
+#endif
+}
 
 /*-*******************************************************
  *  Shared internal functions
diff --git a/lib/zstd/decompress_sources.h b/lib/zstd/decompress_sources.h
index 0fbec508f285e..a06ca187aab5f 100644
--- a/lib/zstd/decompress_sources.h
+++ b/lib/zstd/decompress_sources.h
@@ -16,6 +16,12 @@
  * decompression.
  */
 
+/*
+ * Disable the ASM Huffman implementation because we need to
+ * include all the sources.
+ */
+#define ZSTD_DISABLE_ASM 1
+
 #include "common/debug.c"
 #include "common/entropy_common.c"
 #include "common/error_private.c"
diff --git a/lib/zstd/zstd_compress_module.c b/lib/zstd/zstd_compress_module.c
index 65548a4bb9341..04e1b5c01d9b6 100644
--- a/lib/zstd/zstd_compress_module.c
+++ b/lib/zstd/zstd_compress_module.c
@@ -133,7 +133,11 @@ EXPORT_SYMBOL(zstd_init_cstream);
 size_t zstd_reset_cstream(zstd_cstream *cstream,
 	unsigned long long pledged_src_size)
 {
-	return ZSTD_resetCStream(cstream, pledged_src_size);
+	if (pledged_src_size == 0)
+		pledged_src_size = ZSTD_CONTENTSIZE_UNKNOWN;
+	ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_reset(cstream, ZSTD_reset_session_only) );
+	ZSTD_FORWARD_IF_ERR( ZSTD_CCtx_setPledgedSrcSize(cstream, pledged_src_size) );
+	return 0;
 }
 EXPORT_SYMBOL(zstd_reset_cstream);
 
-- 
GitLab


From a1ccd3d911382f68753033c6adcf69663c2a9fc5 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 19 Oct 2022 15:41:25 -0500
Subject: [PATCH 0389/4122] PCI/portdrv: Squash into portdrv.c

Squash portdrv_core.c and portdrv_pci.c into portdrv.c to make it easier to
find things.  The whole thing is less than 1000 lines, and it's a pain to
bounce back and forth between two files.

Several portdrv_core.c functions were non-static because they were
referenced from portdrv_pci.c.  Make them static since they're now all in
portdrv.c.

No functional change intended.

Link: https://lore.kernel.org/r/20221019204127.44463-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/pcie/Makefile                     |   2 +-
 .../pci/pcie/{portdrv_core.c => portdrv.c}    | 252 +++++++++++++++++-
 drivers/pci/pcie/portdrv.h                    |  10 -
 drivers/pci/pcie/portdrv_pci.c                | 252 ------------------
 4 files changed, 244 insertions(+), 272 deletions(-)
 rename drivers/pci/pcie/{portdrv_core.c => portdrv.c} (70%)
 delete mode 100644 drivers/pci/pcie/portdrv_pci.c

diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 5783a2f79e6a2..8de4ed5f98f14 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -2,7 +2,7 @@
 #
 # Makefile for PCI Express features and port driver
 
-pcieportdrv-y			:= portdrv_core.o portdrv_pci.o rcec.o
+pcieportdrv-y			:= portdrv.o rcec.o
 
 obj-$(CONFIG_PCIEPORTBUS)	+= pcieportdrv.o
 
diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv.c
similarity index 70%
rename from drivers/pci/pcie/portdrv_core.c
rename to drivers/pci/pcie/portdrv.c
index 1ac7fec47d6fb..0b4a1f9c2a6b8 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -1,11 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * Purpose:	PCI Express Port Bus Driver's Core Functions
+ * Purpose:	PCI Express Port Bus Driver
  *
  * Copyright (C) 2004 Intel
  * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
  */
 
+#include <linux/dmi.h>
+#include <linux/init.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/kernel.h>
@@ -308,7 +310,7 @@ static int pcie_device_init(struct pci_dev *pdev, int service, int irq)
  * Allocate the port extension structure and register services associated with
  * the port.
  */
-int pcie_port_device_register(struct pci_dev *dev)
+static int pcie_port_device_register(struct pci_dev *dev)
 {
 	int status, capabilities, i, nr_service;
 	int irqs[PCIE_PORT_DEVICE_MAXSERVICES];
@@ -362,7 +364,7 @@ error_disable:
 
 typedef int (*pcie_callback_t)(struct pcie_device *);
 
-int pcie_port_device_iter(struct device *dev, void *data)
+static int pcie_port_device_iter(struct device *dev, void *data)
 {
 	struct pcie_port_service_driver *service_driver;
 	size_t offset = *(size_t *)data;
@@ -382,13 +384,13 @@ int pcie_port_device_iter(struct device *dev, void *data)
  * pcie_port_device_suspend - suspend port services associated with a PCIe port
  * @dev: PCI Express port to handle
  */
-int pcie_port_device_suspend(struct device *dev)
+static int pcie_port_device_suspend(struct device *dev)
 {
 	size_t off = offsetof(struct pcie_port_service_driver, suspend);
 	return device_for_each_child(dev, &off, pcie_port_device_iter);
 }
 
-int pcie_port_device_resume_noirq(struct device *dev)
+static int pcie_port_device_resume_noirq(struct device *dev)
 {
 	size_t off = offsetof(struct pcie_port_service_driver, resume_noirq);
 	return device_for_each_child(dev, &off, pcie_port_device_iter);
@@ -398,7 +400,7 @@ int pcie_port_device_resume_noirq(struct device *dev)
  * pcie_port_device_resume - resume port services associated with a PCIe port
  * @dev: PCI Express port to handle
  */
-int pcie_port_device_resume(struct device *dev)
+static int pcie_port_device_resume(struct device *dev)
 {
 	size_t off = offsetof(struct pcie_port_service_driver, resume);
 	return device_for_each_child(dev, &off, pcie_port_device_iter);
@@ -408,7 +410,7 @@ int pcie_port_device_resume(struct device *dev)
  * pcie_port_device_runtime_suspend - runtime suspend port services
  * @dev: PCI Express port to handle
  */
-int pcie_port_device_runtime_suspend(struct device *dev)
+static int pcie_port_device_runtime_suspend(struct device *dev)
 {
 	size_t off = offsetof(struct pcie_port_service_driver, runtime_suspend);
 	return device_for_each_child(dev, &off, pcie_port_device_iter);
@@ -418,7 +420,7 @@ int pcie_port_device_runtime_suspend(struct device *dev)
  * pcie_port_device_runtime_resume - runtime resume port services
  * @dev: PCI Express port to handle
  */
-int pcie_port_device_runtime_resume(struct device *dev)
+static int pcie_port_device_runtime_resume(struct device *dev)
 {
 	size_t off = offsetof(struct pcie_port_service_driver, runtime_resume);
 	return device_for_each_child(dev, &off, pcie_port_device_iter);
@@ -482,7 +484,7 @@ EXPORT_SYMBOL_GPL(pcie_port_find_device);
  * Remove PCI Express port service devices associated with given port and
  * disable MSI-X or MSI for the port.
  */
-void pcie_port_device_remove(struct pci_dev *dev)
+static void pcie_port_device_remove(struct pci_dev *dev)
 {
 	device_for_each_child(&dev->dev, NULL, remove_iter);
 	pci_free_irq_vectors(dev);
@@ -584,3 +586,235 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv)
 	driver_unregister(&drv->driver);
 }
 EXPORT_SYMBOL(pcie_port_service_unregister);
+
+/* If this switch is set, PCIe port native services should not be enabled. */
+bool pcie_ports_disabled;
+
+/*
+ * If the user specified "pcie_ports=native", use the PCIe services regardless
+ * of whether the platform has given us permission.  On ACPI systems, this
+ * means we ignore _OSC.
+ */
+bool pcie_ports_native;
+
+/*
+ * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe
+ * service even if the platform hasn't given us permission.
+ */
+bool pcie_ports_dpc_native;
+
+static int __init pcie_port_setup(char *str)
+{
+	if (!strncmp(str, "compat", 6))
+		pcie_ports_disabled = true;
+	else if (!strncmp(str, "native", 6))
+		pcie_ports_native = true;
+	else if (!strncmp(str, "dpc-native", 10))
+		pcie_ports_dpc_native = true;
+
+	return 1;
+}
+__setup("pcie_ports=", pcie_port_setup);
+
+/* global data */
+
+#ifdef CONFIG_PM
+static int pcie_port_runtime_suspend(struct device *dev)
+{
+	if (!to_pci_dev(dev)->bridge_d3)
+		return -EBUSY;
+
+	return pcie_port_device_runtime_suspend(dev);
+}
+
+static int pcie_port_runtime_idle(struct device *dev)
+{
+	/*
+	 * Assume the PCI core has set bridge_d3 whenever it thinks the port
+	 * should be good to go to D3.  Everything else, including moving
+	 * the port to D3, is handled by the PCI core.
+	 */
+	return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY;
+}
+
+static const struct dev_pm_ops pcie_portdrv_pm_ops = {
+	.suspend	= pcie_port_device_suspend,
+	.resume_noirq	= pcie_port_device_resume_noirq,
+	.resume		= pcie_port_device_resume,
+	.freeze		= pcie_port_device_suspend,
+	.thaw		= pcie_port_device_resume,
+	.poweroff	= pcie_port_device_suspend,
+	.restore_noirq	= pcie_port_device_resume_noirq,
+	.restore	= pcie_port_device_resume,
+	.runtime_suspend = pcie_port_runtime_suspend,
+	.runtime_resume	= pcie_port_device_runtime_resume,
+	.runtime_idle	= pcie_port_runtime_idle,
+};
+
+#define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)
+
+#else /* !PM */
+
+#define PCIE_PORTDRV_PM_OPS	NULL
+#endif /* !PM */
+
+/*
+ * pcie_portdrv_probe - Probe PCI-Express port devices
+ * @dev: PCI-Express port device being probed
+ *
+ * If detected invokes the pcie_port_device_register() method for
+ * this port device.
+ *
+ */
+static int pcie_portdrv_probe(struct pci_dev *dev,
+					const struct pci_device_id *id)
+{
+	int type = pci_pcie_type(dev);
+	int status;
+
+	if (!pci_is_pcie(dev) ||
+	    ((type != PCI_EXP_TYPE_ROOT_PORT) &&
+	     (type != PCI_EXP_TYPE_UPSTREAM) &&
+	     (type != PCI_EXP_TYPE_DOWNSTREAM) &&
+	     (type != PCI_EXP_TYPE_RC_EC)))
+		return -ENODEV;
+
+	if (type == PCI_EXP_TYPE_RC_EC)
+		pcie_link_rcec(dev);
+
+	status = pcie_port_device_register(dev);
+	if (status)
+		return status;
+
+	pci_save_state(dev);
+
+	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE |
+					   DPM_FLAG_SMART_SUSPEND);
+
+	if (pci_bridge_d3_possible(dev)) {
+		/*
+		 * Keep the port resumed 100ms to make sure things like
+		 * config space accesses from userspace (lspci) will not
+		 * cause the port to repeatedly suspend and resume.
+		 */
+		pm_runtime_set_autosuspend_delay(&dev->dev, 100);
+		pm_runtime_use_autosuspend(&dev->dev);
+		pm_runtime_mark_last_busy(&dev->dev);
+		pm_runtime_put_autosuspend(&dev->dev);
+		pm_runtime_allow(&dev->dev);
+	}
+
+	return 0;
+}
+
+static void pcie_portdrv_remove(struct pci_dev *dev)
+{
+	if (pci_bridge_d3_possible(dev)) {
+		pm_runtime_forbid(&dev->dev);
+		pm_runtime_get_noresume(&dev->dev);
+		pm_runtime_dont_use_autosuspend(&dev->dev);
+	}
+
+	pcie_port_device_remove(dev);
+}
+
+static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
+					pci_channel_state_t error)
+{
+	if (error == pci_channel_io_frozen)
+		return PCI_ERS_RESULT_NEED_RESET;
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
+
+static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
+{
+	size_t off = offsetof(struct pcie_port_service_driver, slot_reset);
+	device_for_each_child(&dev->dev, &off, pcie_port_device_iter);
+
+	pci_restore_state(dev);
+	pci_save_state(dev);
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
+{
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/*
+ * LINUX Device Driver Model
+ */
+static const struct pci_device_id port_pci_ids[] = {
+	/* handle any PCI-Express port */
+	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) },
+	/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
+	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) },
+	/* handle any Root Complex Event Collector */
+	{ PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) },
+	{ },
+};
+
+static const struct pci_error_handlers pcie_portdrv_err_handler = {
+	.error_detected = pcie_portdrv_error_detected,
+	.slot_reset = pcie_portdrv_slot_reset,
+	.mmio_enabled = pcie_portdrv_mmio_enabled,
+};
+
+static struct pci_driver pcie_portdriver = {
+	.name		= "pcieport",
+	.id_table	= &port_pci_ids[0],
+
+	.probe		= pcie_portdrv_probe,
+	.remove		= pcie_portdrv_remove,
+	.shutdown	= pcie_portdrv_remove,
+
+	.err_handler	= &pcie_portdrv_err_handler,
+
+	.driver_managed_dma = true,
+
+	.driver.pm	= PCIE_PORTDRV_PM_OPS,
+};
+
+static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d)
+{
+	pr_notice("%s detected: will not use MSI for PCIe PME signaling\n",
+		  d->ident);
+	pcie_pme_disable_msi();
+	return 0;
+}
+
+static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = {
+	/*
+	 * Boxes that should not use MSI for PCIe PME signaling.
+	 */
+	{
+	 .callback = dmi_pcie_pme_disable_msi,
+	 .ident = "MSI Wind U-100",
+	 .matches = {
+		     DMI_MATCH(DMI_SYS_VENDOR,
+				"MICRO-STAR INTERNATIONAL CO., LTD"),
+		     DMI_MATCH(DMI_PRODUCT_NAME, "U-100"),
+		     },
+	 },
+	 {}
+};
+
+static void __init pcie_init_services(void)
+{
+	pcie_aer_init();
+	pcie_pme_init();
+	pcie_dpc_init();
+	pcie_hp_init();
+}
+
+static int __init pcie_portdrv_init(void)
+{
+	if (pcie_ports_disabled)
+		return -EACCES;
+
+	pcie_init_services();
+	dmi_check_system(pcie_portdrv_dmi_table);
+
+	return pci_register_driver(&pcie_portdriver);
+}
+device_initcall(pcie_portdrv_init);
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index 0ef4bf5f811d9..bf380bcea6a5b 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -108,16 +108,6 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *new);
 #define get_descriptor_id(type, service) (((type - 4) << 8) | service)
 
 extern struct bus_type pcie_port_bus_type;
-int pcie_port_device_register(struct pci_dev *dev);
-int pcie_port_device_iter(struct device *dev, void *data);
-#ifdef CONFIG_PM
-int pcie_port_device_suspend(struct device *dev);
-int pcie_port_device_resume_noirq(struct device *dev);
-int pcie_port_device_resume(struct device *dev);
-int pcie_port_device_runtime_suspend(struct device *dev);
-int pcie_port_device_runtime_resume(struct device *dev);
-#endif
-void pcie_port_device_remove(struct pci_dev *dev);
 
 struct pci_dev;
 
diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c
deleted file mode 100644
index 7f8788a970ae1..0000000000000
--- a/drivers/pci/pcie/portdrv_pci.c
+++ /dev/null
@@ -1,252 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Purpose:	PCI Express Port Bus Driver
- * Author:	Tom Nguyen <tom.l.nguyen@intel.com>
- *
- * Copyright (C) 2004 Intel
- * Copyright (C) Tom Long Nguyen (tom.l.nguyen@intel.com)
- */
-
-#include <linux/pci.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/pm.h>
-#include <linux/pm_runtime.h>
-#include <linux/init.h>
-#include <linux/aer.h>
-#include <linux/dmi.h>
-
-#include "../pci.h"
-#include "portdrv.h"
-
-/* If this switch is set, PCIe port native services should not be enabled. */
-bool pcie_ports_disabled;
-
-/*
- * If the user specified "pcie_ports=native", use the PCIe services regardless
- * of whether the platform has given us permission.  On ACPI systems, this
- * means we ignore _OSC.
- */
-bool pcie_ports_native;
-
-/*
- * If the user specified "pcie_ports=dpc-native", use the Linux DPC PCIe
- * service even if the platform hasn't given us permission.
- */
-bool pcie_ports_dpc_native;
-
-static int __init pcie_port_setup(char *str)
-{
-	if (!strncmp(str, "compat", 6))
-		pcie_ports_disabled = true;
-	else if (!strncmp(str, "native", 6))
-		pcie_ports_native = true;
-	else if (!strncmp(str, "dpc-native", 10))
-		pcie_ports_dpc_native = true;
-
-	return 1;
-}
-__setup("pcie_ports=", pcie_port_setup);
-
-/* global data */
-
-#ifdef CONFIG_PM
-static int pcie_port_runtime_suspend(struct device *dev)
-{
-	if (!to_pci_dev(dev)->bridge_d3)
-		return -EBUSY;
-
-	return pcie_port_device_runtime_suspend(dev);
-}
-
-static int pcie_port_runtime_idle(struct device *dev)
-{
-	/*
-	 * Assume the PCI core has set bridge_d3 whenever it thinks the port
-	 * should be good to go to D3.  Everything else, including moving
-	 * the port to D3, is handled by the PCI core.
-	 */
-	return to_pci_dev(dev)->bridge_d3 ? 0 : -EBUSY;
-}
-
-static const struct dev_pm_ops pcie_portdrv_pm_ops = {
-	.suspend	= pcie_port_device_suspend,
-	.resume_noirq	= pcie_port_device_resume_noirq,
-	.resume		= pcie_port_device_resume,
-	.freeze		= pcie_port_device_suspend,
-	.thaw		= pcie_port_device_resume,
-	.poweroff	= pcie_port_device_suspend,
-	.restore_noirq	= pcie_port_device_resume_noirq,
-	.restore	= pcie_port_device_resume,
-	.runtime_suspend = pcie_port_runtime_suspend,
-	.runtime_resume	= pcie_port_device_runtime_resume,
-	.runtime_idle	= pcie_port_runtime_idle,
-};
-
-#define PCIE_PORTDRV_PM_OPS	(&pcie_portdrv_pm_ops)
-
-#else /* !PM */
-
-#define PCIE_PORTDRV_PM_OPS	NULL
-#endif /* !PM */
-
-/*
- * pcie_portdrv_probe - Probe PCI-Express port devices
- * @dev: PCI-Express port device being probed
- *
- * If detected invokes the pcie_port_device_register() method for
- * this port device.
- *
- */
-static int pcie_portdrv_probe(struct pci_dev *dev,
-					const struct pci_device_id *id)
-{
-	int type = pci_pcie_type(dev);
-	int status;
-
-	if (!pci_is_pcie(dev) ||
-	    ((type != PCI_EXP_TYPE_ROOT_PORT) &&
-	     (type != PCI_EXP_TYPE_UPSTREAM) &&
-	     (type != PCI_EXP_TYPE_DOWNSTREAM) &&
-	     (type != PCI_EXP_TYPE_RC_EC)))
-		return -ENODEV;
-
-	if (type == PCI_EXP_TYPE_RC_EC)
-		pcie_link_rcec(dev);
-
-	status = pcie_port_device_register(dev);
-	if (status)
-		return status;
-
-	pci_save_state(dev);
-
-	dev_pm_set_driver_flags(&dev->dev, DPM_FLAG_NO_DIRECT_COMPLETE |
-					   DPM_FLAG_SMART_SUSPEND);
-
-	if (pci_bridge_d3_possible(dev)) {
-		/*
-		 * Keep the port resumed 100ms to make sure things like
-		 * config space accesses from userspace (lspci) will not
-		 * cause the port to repeatedly suspend and resume.
-		 */
-		pm_runtime_set_autosuspend_delay(&dev->dev, 100);
-		pm_runtime_use_autosuspend(&dev->dev);
-		pm_runtime_mark_last_busy(&dev->dev);
-		pm_runtime_put_autosuspend(&dev->dev);
-		pm_runtime_allow(&dev->dev);
-	}
-
-	return 0;
-}
-
-static void pcie_portdrv_remove(struct pci_dev *dev)
-{
-	if (pci_bridge_d3_possible(dev)) {
-		pm_runtime_forbid(&dev->dev);
-		pm_runtime_get_noresume(&dev->dev);
-		pm_runtime_dont_use_autosuspend(&dev->dev);
-	}
-
-	pcie_port_device_remove(dev);
-}
-
-static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
-					pci_channel_state_t error)
-{
-	if (error == pci_channel_io_frozen)
-		return PCI_ERS_RESULT_NEED_RESET;
-	return PCI_ERS_RESULT_CAN_RECOVER;
-}
-
-static pci_ers_result_t pcie_portdrv_slot_reset(struct pci_dev *dev)
-{
-	size_t off = offsetof(struct pcie_port_service_driver, slot_reset);
-	device_for_each_child(&dev->dev, &off, pcie_port_device_iter);
-
-	pci_restore_state(dev);
-	pci_save_state(dev);
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-static pci_ers_result_t pcie_portdrv_mmio_enabled(struct pci_dev *dev)
-{
-	return PCI_ERS_RESULT_RECOVERED;
-}
-
-/*
- * LINUX Device Driver Model
- */
-static const struct pci_device_id port_pci_ids[] = {
-	/* handle any PCI-Express port */
-	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_NORMAL, ~0) },
-	/* subtractive decode PCI-to-PCI bridge, class type is 060401h */
-	{ PCI_DEVICE_CLASS(PCI_CLASS_BRIDGE_PCI_SUBTRACTIVE, ~0) },
-	/* handle any Root Complex Event Collector */
-	{ PCI_DEVICE_CLASS(((PCI_CLASS_SYSTEM_RCEC << 8) | 0x00), ~0) },
-	{ },
-};
-
-static const struct pci_error_handlers pcie_portdrv_err_handler = {
-	.error_detected = pcie_portdrv_error_detected,
-	.slot_reset = pcie_portdrv_slot_reset,
-	.mmio_enabled = pcie_portdrv_mmio_enabled,
-};
-
-static struct pci_driver pcie_portdriver = {
-	.name		= "pcieport",
-	.id_table	= &port_pci_ids[0],
-
-	.probe		= pcie_portdrv_probe,
-	.remove		= pcie_portdrv_remove,
-	.shutdown	= pcie_portdrv_remove,
-
-	.err_handler	= &pcie_portdrv_err_handler,
-
-	.driver_managed_dma = true,
-
-	.driver.pm	= PCIE_PORTDRV_PM_OPS,
-};
-
-static int __init dmi_pcie_pme_disable_msi(const struct dmi_system_id *d)
-{
-	pr_notice("%s detected: will not use MSI for PCIe PME signaling\n",
-		  d->ident);
-	pcie_pme_disable_msi();
-	return 0;
-}
-
-static const struct dmi_system_id pcie_portdrv_dmi_table[] __initconst = {
-	/*
-	 * Boxes that should not use MSI for PCIe PME signaling.
-	 */
-	{
-	 .callback = dmi_pcie_pme_disable_msi,
-	 .ident = "MSI Wind U-100",
-	 .matches = {
-		     DMI_MATCH(DMI_SYS_VENDOR,
-				"MICRO-STAR INTERNATIONAL CO., LTD"),
-		     DMI_MATCH(DMI_PRODUCT_NAME, "U-100"),
-		     },
-	 },
-	 {}
-};
-
-static void __init pcie_init_services(void)
-{
-	pcie_aer_init();
-	pcie_pme_init();
-	pcie_dpc_init();
-	pcie_hp_init();
-}
-
-static int __init pcie_portdrv_init(void)
-{
-	if (pcie_ports_disabled)
-		return -EACCES;
-
-	pcie_init_services();
-	dmi_check_system(pcie_portdrv_dmi_table);
-
-	return pci_register_driver(&pcie_portdriver);
-}
-device_initcall(pcie_portdrv_init);
-- 
GitLab


From 29f193feeea3e3af1c4650870f08ca896b83e1db Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 19 Oct 2022 15:41:26 -0500
Subject: [PATCH 0390/4122] PCI/portdrv: Move private things to portdrv.c

Previously several things used by portdrv_core.c and portdrv_pci.c were
shared by defining them in portdrv.h.  Now that portdrv_core.c and
portdrv_pci.c have been squashed, move things that can be private into
portdrv.c.  No functional change intended.

Link: https://lore.kernel.org/r/20221019204127.44463-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/pcie/portdrv.c | 9 +++++++++
 drivers/pci/pcie/portdrv.h | 9 ---------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
index 0b4a1f9c2a6b8..ae8da5b2e922c 100644
--- a/drivers/pci/pcie/portdrv.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -21,6 +21,15 @@
 #include "../pci.h"
 #include "portdrv.h"
 
+/*
+ * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must
+ * be one of the first 32 MSI-X entries.  Per PCI r3.0, sec 6.8.3.1, MSI
+ * supports a maximum of 32 vectors per function.
+ */
+#define PCIE_PORT_MAX_MSI_ENTRIES	32
+
+#define get_descriptor_id(type, service) (((type - 4) << 8) | service)
+
 struct portdrv_service_data {
 	struct pcie_port_service_driver *drv;
 	struct device *dev;
diff --git a/drivers/pci/pcie/portdrv.h b/drivers/pci/pcie/portdrv.h
index bf380bcea6a5b..58a2b1a1cae4c 100644
--- a/drivers/pci/pcie/portdrv.h
+++ b/drivers/pci/pcie/portdrv.h
@@ -98,15 +98,6 @@ struct pcie_port_service_driver {
 int pcie_port_service_register(struct pcie_port_service_driver *new);
 void pcie_port_service_unregister(struct pcie_port_service_driver *new);
 
-/*
- * The PCIe Capability Interrupt Message Number (PCIe r3.1, sec 7.8.2) must
- * be one of the first 32 MSI-X entries.  Per PCI r3.0, sec 6.8.3.1, MSI
- * supports a maximum of 32 vectors per function.
- */
-#define PCIE_PORT_MAX_MSI_ENTRIES	32
-
-#define get_descriptor_id(type, service) (((type - 4) << 8) | service)
-
 extern struct bus_type pcie_port_bus_type;
 
 struct pci_dev;
-- 
GitLab


From 461a65d7d1a4f56b97c9115eda3e8619516f40fb Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 19 Oct 2022 15:41:27 -0500
Subject: [PATCH 0391/4122] PCI/portdrv: Unexport pcie_port_service_register(),
 pcie_port_service_unregister()

pcie_port_service_register() and pcie_port_service_unregister() are used
only by the pciehp, aer, dpc, and pme PCIe port service drivers, none of
which can be modules.  Unexport pcie_port_service_register() and
pcie_port_service_unregister().  No functional change intended.

Link: https://lore.kernel.org/r/20221019204127.44463-4-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
---
 drivers/pci/pcie/portdrv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
index ae8da5b2e922c..a6c4225505d53 100644
--- a/drivers/pci/pcie/portdrv.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -584,7 +584,6 @@ int pcie_port_service_register(struct pcie_port_service_driver *new)
 
 	return driver_register(&new->driver);
 }
-EXPORT_SYMBOL(pcie_port_service_register);
 
 /**
  * pcie_port_service_unregister - unregister PCI Express port service driver
@@ -594,7 +593,6 @@ void pcie_port_service_unregister(struct pcie_port_service_driver *drv)
 {
 	driver_unregister(&drv->driver);
 }
-EXPORT_SYMBOL(pcie_port_service_unregister);
 
 /* If this switch is set, PCIe port native services should not be enabled. */
 bool pcie_ports_disabled;
-- 
GitLab


From 2f7a29debae2efef94b981377fa3622986cd57f5 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Sep 2022 10:28:39 +0800
Subject: [PATCH 0392/4122] apparmor: remove useless static inline functions

Remove the following useless static inline functions:

1. label_is_visible() is a static function in
security/apparmor/label.c, and it's not used, aa_ns_visible()
can do the same things as it, so it's redundant.

2. is_deleted() is a static function in security/apparmor/file.c,
and it's not used since commit aebd873e8d3e ("apparmor: refactor
path name lookup and permission checks around labels"), so it's
redundant.

They are redundant, so remove them.

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/file.c  | 13 -------------
 security/apparmor/label.c |  6 ------
 2 files changed, 19 deletions(-)

diff --git a/security/apparmor/file.c b/security/apparmor/file.c
index e7dc5ea389973..deb73480f0c65 100644
--- a/security/apparmor/file.c
+++ b/security/apparmor/file.c
@@ -141,19 +141,6 @@ int aa_audit_file(struct aa_profile *profile, struct aa_perms *perms,
 	return aa_audit(type, profile, &sa, file_audit_cb);
 }
 
-/**
- * is_deleted - test if a file has been completely unlinked
- * @dentry: dentry of file to test for deletion  (NOT NULL)
- *
- * Returns: true if deleted else false
- */
-static inline bool is_deleted(struct dentry *dentry)
-{
-	if (d_unlinked(dentry) && d_backing_inode(dentry)->i_nlink == 0)
-		return true;
-	return false;
-}
-
 static int path_name(const char *op, struct aa_label *label,
 		     const struct path *path, int flags, char *buffer,
 		     const char **name, struct path_cond *cond, u32 request)
diff --git a/security/apparmor/label.c b/security/apparmor/label.c
index aa4031628af55..8a2af96f4da57 100644
--- a/security/apparmor/label.c
+++ b/security/apparmor/label.c
@@ -1256,12 +1256,6 @@ out:
 	return label;
 }
 
-static inline bool label_is_visible(struct aa_profile *profile,
-				    struct aa_label *label)
-{
-	return aa_ns_visible(profile->ns, labels_ns(label), true);
-}
-
 /* match a profile and its associated ns component if needed
  * Assumes visibility test has already been done.
  * If a subns profile is not to be matched should be prescreened with
-- 
GitLab


From 1f2bc06a8dbff73957f433b22c6fd35fccfb47a4 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Mon, 26 Sep 2022 19:48:38 +0800
Subject: [PATCH 0393/4122] apparmor: fix obsoleted comments for
 aa_getprocattr() and audit_resource()

Update the comments for aa_getprocattr() and audit_resource(), the
args of them have beed changed since commit 76a1d263aba3 ("apparmor:
switch getprocattr to using label_print fns()").

Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/procattr.c | 11 +++++------
 security/apparmor/resource.c |  2 ++
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/security/apparmor/procattr.c b/security/apparmor/procattr.c
index 86ad26ef72ed4..197d41f9c32b7 100644
--- a/security/apparmor/procattr.c
+++ b/security/apparmor/procattr.c
@@ -17,14 +17,13 @@
 
 
 /**
- * aa_getprocattr - Return the profile information for @profile
- * @profile: the profile to print profile info about  (NOT NULL)
- * @string: Returns - string containing the profile info (NOT NULL)
+ * aa_getprocattr - Return the label information for @label
+ * @label: the label to print label info about  (NOT NULL)
+ * @string: Returns - string containing the label info (NOT NULL)
  *
- * Requires: profile != NULL
+ * Requires: label != NULL && string != NULL
  *
- * Creates a string containing the namespace_name://profile_name for
- * @profile.
+ * Creates a string containing the label information for @label.
  *
  * Returns: size of string placed in @string else error code on failure
  */
diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index ed543f4edfd9f..1b75d8343a8db 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -45,6 +45,8 @@ static void audit_cb(struct audit_buffer *ab, void *va)
  * @profile: profile being enforced  (NOT NULL)
  * @resource: rlimit being auditing
  * @value: value being set
+ * @peer: aa_albel of the task being set
+ * @info: info being auditing
  * @error: error value
  *
  * Returns: 0 or sa->error else other error code on failure
-- 
GitLab


From 58f89ce58bb4f5cf5963b20a19aaa2431b0412d8 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 3 Oct 2022 02:48:24 -0700
Subject: [PATCH 0394/4122] apparmor: refactor code that alloc null profiles

Bother unconfined and learning profiles use the null profile as their
base. Refactor so they are share a common base routine. This doesn't
save much atm but will be important when the feature set of the
parent is inherited.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/domain.c         | 12 ++++----
 security/apparmor/include/policy.h |  6 ++--
 security/apparmor/policy.c         | 47 ++++++++++++++++++++----------
 security/apparmor/policy_ns.c      |  6 +---
 4 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c
index d4b09f061aee2..b447bc13ea8e2 100644
--- a/security/apparmor/domain.c
+++ b/security/apparmor/domain.c
@@ -681,8 +681,8 @@ static struct aa_label *profile_transition(struct aa_profile *profile,
 		/* no exec permission - learning mode */
 		struct aa_profile *new_profile = NULL;
 
-		new_profile = aa_new_null_profile(profile, false, name,
-						  GFP_KERNEL);
+		new_profile = aa_new_learning_profile(profile, false, name,
+						      GFP_KERNEL);
 		if (!new_profile) {
 			error = -ENOMEM;
 			info = "could not create null profile";
@@ -1009,8 +1009,8 @@ static struct aa_label *build_change_hat(struct aa_profile *profile,
 	if (!hat) {
 		error = -ENOENT;
 		if (COMPLAIN_MODE(profile)) {
-			hat = aa_new_null_profile(profile, true, name,
-						  GFP_KERNEL);
+			hat = aa_new_learning_profile(profile, true, name,
+						      GFP_KERNEL);
 			if (!hat) {
 				info = "failed null profile create";
 				error = -ENOMEM;
@@ -1361,8 +1361,8 @@ int aa_change_profile(const char *fqname, int flags)
 		    !COMPLAIN_MODE(labels_profile(label)))
 			goto audit;
 		/* released below */
-		tprofile = aa_new_null_profile(labels_profile(label), false,
-					       fqname, GFP_KERNEL);
+		tprofile = aa_new_learning_profile(labels_profile(label), false,
+						   fqname, GFP_KERNEL);
 		if (!tprofile) {
 			info = "failed null profile create";
 			error = -ENOMEM;
diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h
index 5cadfb20df294..545f791cabdae 100644
--- a/security/apparmor/include/policy.h
+++ b/security/apparmor/include/policy.h
@@ -234,8 +234,10 @@ void aa_free_proxy_kref(struct kref *kref);
 struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp);
 struct aa_profile *aa_alloc_profile(const char *name, struct aa_proxy *proxy,
 				    gfp_t gfp);
-struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
-				       const char *base, gfp_t gfp);
+struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name,
+				 gfp_t gfp);
+struct aa_profile *aa_new_learning_profile(struct aa_profile *parent, bool hat,
+					   const char *base, gfp_t gfp);
 void aa_free_profile(struct aa_profile *profile);
 void aa_free_profile_kref(struct kref *kref);
 struct aa_profile *aa_find_child(struct aa_profile *parent, const char *name);
diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 6f4cc8bfe03de..c17ccedd35f1e 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -524,8 +524,36 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
 	return profile;
 }
 
+
+struct aa_profile *aa_alloc_null(struct aa_profile *parent, const char *name,
+				 gfp_t gfp)
+{
+	struct aa_profile *profile;
+	struct aa_ruleset *rules;
+
+	profile = aa_alloc_profile(name, NULL, gfp);
+	if (!profile)
+		return NULL;
+
+	/* TODO: ideally we should inherit abi from parent */
+	profile->label.flags |= FLAG_NULL;
+	rules = list_first_entry(&profile->rules, typeof(*rules), list);
+	rules->file.dfa = aa_get_dfa(nulldfa);
+	rules->policy.dfa = aa_get_dfa(nulldfa);
+
+	if (parent) {
+		profile->path_flags = parent->path_flags;
+
+		/* released on free_profile */
+		rcu_assign_pointer(profile->parent, aa_get_profile(parent));
+		profile->ns = aa_get_ns(parent->ns);
+	}
+
+	return profile;
+}
+
 /**
- * aa_new_null_profile - create or find a null-X learning profile
+ * aa_new_learning_profile - create or find a null-X learning profile
  * @parent: profile that caused this profile to be created (NOT NULL)
  * @hat: true if the null- learning profile is a hat
  * @base: name to base the null profile off of
@@ -542,10 +570,9 @@ struct aa_profile *aa_fqlookupn_profile(struct aa_label *base,
  *
  * Returns: new refcounted profile else NULL on failure
  */
-struct aa_profile *aa_new_null_profile(struct aa_profile *parent, bool hat,
-				       const char *base, gfp_t gfp)
+struct aa_profile *aa_new_learning_profile(struct aa_profile *parent, bool hat,
+					   const char *base, gfp_t gfp)
 {
-	struct aa_ruleset *rules;
 	struct aa_profile *p, *profile;
 	const char *bname;
 	char *name = NULL;
@@ -575,22 +602,12 @@ name:
 	if (profile)
 		goto out;
 
-	profile = aa_alloc_profile(name, NULL, gfp);
+	profile = aa_alloc_null(parent, name, gfp);
 	if (!profile)
 		goto fail;
-
 	profile->mode = APPARMOR_COMPLAIN;
-	profile->label.flags |= FLAG_NULL;
 	if (hat)
 		profile->label.flags |= FLAG_HAT;
-	profile->path_flags = parent->path_flags;
-
-	/* released on free_profile */
-	rcu_assign_pointer(profile->parent, aa_get_profile(parent));
-	profile->ns = aa_get_ns(parent->ns);
-	rules = list_first_entry(&profile->rules, typeof(*rules), list);
-	rules->file.dfa = aa_get_dfa(nulldfa);
-	rules->policy.dfa = aa_get_dfa(nulldfa);
 
 	mutex_lock_nested(&profile->ns->lock, profile->ns->level);
 	p = __find_child(&parent->base.profiles, bname);
diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 121aa79bccaac..5c38563a6dcfd 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -83,18 +83,14 @@ const char *aa_ns_name(struct aa_ns *curr, struct aa_ns *view, bool subns)
 static struct aa_profile *alloc_unconfined(const char *name)
 {
 	struct aa_profile *profile;
-	struct aa_ruleset *rules;
 
-	profile = aa_alloc_profile(name, NULL, GFP_KERNEL);
+	profile = aa_alloc_null(NULL, name, GFP_KERNEL);
 	if (!profile)
 		return NULL;
 
 	profile->label.flags |= FLAG_IX_ON_NAME_ERROR |
 		FLAG_IMMUTIBLE | FLAG_NS_COUNT | FLAG_UNCONFINED;
 	profile->mode = APPARMOR_UNCONFINED;
-	rules = list_first_entry(&profile->rules, typeof(*rules), list);
-	rules->file.dfa = aa_get_dfa(nulldfa);
-	rules->policy.dfa = aa_get_dfa(nulldfa);
 
 	return profile;
 }
-- 
GitLab


From 5ebc548f4f54fe971d741d80cd108f1a45c9e88d Mon Sep 17 00:00:00 2001
From: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Date: Thu, 13 Oct 2022 10:47:23 +0900
Subject: [PATCH 0395/4122] RDMA/rxe: Make responder handle RDMA Read failures

Currently, responder can reply packets with invalid payloads if it fails
to copy messages to the packets. Add an error handling in read_reply() to
inform a requesting node of the failure.

Link: https://lore.kernel.org/r/20221013014724.3786212-1-matsuda-daisuke@fujitsu.com
Suggested-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index ed5a09e86417e..82b74e926e09f 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -809,10 +809,14 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	if (!skb)
 		return RESPST_ERR_RNR;
 
-	rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
-		    payload, RXE_FROM_MR_OBJ);
+	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
+			  payload, RXE_FROM_MR_OBJ);
 	if (mr)
 		rxe_put(mr);
+	if (err) {
+		kfree_skb(skb);
+		return RESPST_ERR_RKEY_VIOLATION;
+	}
 
 	if (bth_pad(&ack_pkt)) {
 		u8 *pad = payload_addr(&ack_pkt) + payload;
-- 
GitLab


From 5ac814e02ece516761d2e244cef93843df911ae0 Mon Sep 17 00:00:00 2001
From: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Date: Thu, 13 Oct 2022 10:47:24 +0900
Subject: [PATCH 0396/4122] RDMA/rxe: Handle remote errors in the midst of a
 Read reply sequence

Requesting nodes do not handle a reported error correctly if it is
generated in the middle of multi-packet Read responses, and the node tries
to resend the request endlessly. Let completer terminate the connection in
that case.

Link: https://lore.kernel.org/r/20221013014724.3786212-2-matsuda-daisuke@fujitsu.com
Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index fb0c008af78cc..c9170dd99f3aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -200,6 +200,10 @@ static inline enum comp_state check_psn(struct rxe_qp *qp,
 		 */
 		if (pkt->psn == wqe->last_psn)
 			return COMPST_COMP_ACK;
+		else if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE &&
+			 (qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST ||
+			  qp->comp.opcode == IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE))
+			return COMPST_CHECK_ACK;
 		else
 			return COMPST_DONE;
 	} else if ((diff > 0) && (wqe->mask & WR_ATOMIC_OR_READ_MASK)) {
@@ -228,6 +232,10 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_FIRST:
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
+		/* Check NAK code to handle a remote error */
+		if (pkt->opcode == IB_OPCODE_RC_ACKNOWLEDGE)
+			break;
+
 		if (pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE &&
 		    pkt->opcode != IB_OPCODE_RC_RDMA_READ_RESPONSE_LAST) {
 			/* read retries of partial data may restart from
-- 
GitLab


From 686d348476ee8006087cfcbef591e28f4f91bd8b Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Mon, 24 Oct 2022 03:31:54 +0000
Subject: [PATCH 0397/4122] RDMA/rxe: Remove unnecessary mr testing

Before the testing, we already passed it to rxe_mr_copy() where mr could
be dereferenced. so this checking is not needed.

The only way that mr is NULL is when it reaches below line 780 with
 'qp->resp.mr = NULL', which is not possible in Bob's explanation[1].

 778         if (res->state == rdatm_res_state_new) {
 779                 if (!res->replay) {
 780                         mr = qp->resp.mr;
 781                         qp->resp.mr = NULL;
 782                 } else {

[1] https://lore.kernel.org/lkml/30ff25c4-ce66-eac4-eaa2-64c0db203a19@gmail.com/

Link: https://lore.kernel.org/r/1666582315-2-1-git-send-email-lizhijian@fujitsu.com
CC: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Reviewed-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 82b74e926e09f..95d372db934d7 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -811,8 +811,7 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 
 	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
 			  payload, RXE_FROM_MR_OBJ);
-	if (mr)
-		rxe_put(mr);
+	rxe_put(mr);
 	if (err) {
 		kfree_skb(skb);
 		return RESPST_ERR_RKEY_VIOLATION;
-- 
GitLab


From 7872d4236f74ad6df4d2c9189b66f95157e24a9b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 26 Sep 2022 22:17:29 +0800
Subject: [PATCH 0398/4122] fpga: zynq: Switch to use dev_err_probe() helper

In the probe path, dev_err() can be replaced with dev_err_probe()
which will check if error code is -EPROBE_DEFER and prints the
error name. It also sets the defer probe reason which can be
checked later through debugfs. It's more simple in error path.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Link: https://lore.kernel.org/r/20220926141729.2292311-1-yangyingliang@huawei.com
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
---
 drivers/fpga/zynq-fpga.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/fpga/zynq-fpga.c b/drivers/fpga/zynq-fpga.c
index 426aa34c6a0dc..ae0da361e6c62 100644
--- a/drivers/fpga/zynq-fpga.c
+++ b/drivers/fpga/zynq-fpga.c
@@ -582,11 +582,9 @@ static int zynq_fpga_probe(struct platform_device *pdev)
 		return priv->irq;
 
 	priv->clk = devm_clk_get(dev, "ref_clk");
-	if (IS_ERR(priv->clk)) {
-		if (PTR_ERR(priv->clk) != -EPROBE_DEFER)
-			dev_err(dev, "input clock not found\n");
-		return PTR_ERR(priv->clk);
-	}
+	if (IS_ERR(priv->clk))
+		return dev_err_probe(dev, PTR_ERR(priv->clk),
+				     "input clock not found\n");
 
 	err = clk_prepare_enable(priv->clk);
 	if (err) {
-- 
GitLab


From 463dd43bc976d1dfaba5cd46b2a10addf8d3b4de Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Tue, 25 Oct 2022 08:39:46 +0300
Subject: [PATCH 0399/4122] fpga: lattice-sysconfig-spi: add Lattice sysCONFIG
 FPGA manager

Add support to the FPGA manager for programming Lattice ECP5 FPGA over
slave SPI sysCONFIG interface.

sysCONFIG interface core functionality is separate from both ECP5 and
SPI specifics, so support for other FPGAs with different port types can
be added in the future.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Link: https://lore.kernel.org/r/20221025053947.2737-2-i.bornyakov@metrotek.ru
[yilun.xu@intel.com: remove redundant blank line after kmemdup]
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
---
 drivers/fpga/Kconfig                 |  11 +
 drivers/fpga/Makefile                |   2 +
 drivers/fpga/lattice-sysconfig-spi.c | 152 ++++++++++
 drivers/fpga/lattice-sysconfig.c     | 397 +++++++++++++++++++++++++++
 drivers/fpga/lattice-sysconfig.h     |  39 +++
 5 files changed, 601 insertions(+)
 create mode 100644 drivers/fpga/lattice-sysconfig-spi.c
 create mode 100644 drivers/fpga/lattice-sysconfig.c
 create mode 100644 drivers/fpga/lattice-sysconfig.h

diff --git a/drivers/fpga/Kconfig b/drivers/fpga/Kconfig
index 6c416955da532..d1a8107fdcb33 100644
--- a/drivers/fpga/Kconfig
+++ b/drivers/fpga/Kconfig
@@ -263,4 +263,15 @@ config FPGA_MGR_MICROCHIP_SPI
 	  programming over slave SPI interface with .dat formatted
 	  bitstream image.
 
+config FPGA_MGR_LATTICE_SYSCONFIG
+	tristate
+
+config FPGA_MGR_LATTICE_SYSCONFIG_SPI
+	tristate "Lattice sysCONFIG SPI FPGA manager"
+	depends on SPI
+	select FPGA_MGR_LATTICE_SYSCONFIG
+	help
+	  FPGA manager driver support for Lattice FPGAs programming over slave
+	  SPI sysCONFIG interface.
+
 endif # FPGA
diff --git a/drivers/fpga/Makefile b/drivers/fpga/Makefile
index 42ae8b58abce7..72e554b4d2f76 100644
--- a/drivers/fpga/Makefile
+++ b/drivers/fpga/Makefile
@@ -20,6 +20,8 @@ obj-$(CONFIG_FPGA_MGR_ZYNQ_FPGA)	+= zynq-fpga.o
 obj-$(CONFIG_FPGA_MGR_ZYNQMP_FPGA)	+= zynqmp-fpga.o
 obj-$(CONFIG_FPGA_MGR_VERSAL_FPGA)	+= versal-fpga.o
 obj-$(CONFIG_FPGA_MGR_MICROCHIP_SPI)	+= microchip-spi.o
+obj-$(CONFIG_FPGA_MGR_LATTICE_SYSCONFIG)	+= lattice-sysconfig.o
+obj-$(CONFIG_FPGA_MGR_LATTICE_SYSCONFIG_SPI)	+= lattice-sysconfig-spi.o
 obj-$(CONFIG_ALTERA_PR_IP_CORE)		+= altera-pr-ip-core.o
 obj-$(CONFIG_ALTERA_PR_IP_CORE_PLAT)	+= altera-pr-ip-core-plat.o
 
diff --git a/drivers/fpga/lattice-sysconfig-spi.c b/drivers/fpga/lattice-sysconfig-spi.c
new file mode 100644
index 0000000000000..2702b26b7f55e
--- /dev/null
+++ b/drivers/fpga/lattice-sysconfig-spi.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lattice FPGA programming over slave SPI sysCONFIG interface.
+ */
+
+#include <linux/spi/spi.h>
+
+#include "lattice-sysconfig.h"
+
+static const u32 ecp5_spi_max_speed_hz = 60000000;
+
+static int sysconfig_spi_cmd_transfer(struct sysconfig_priv *priv,
+				      const void *tx_buf, size_t tx_len,
+				      void *rx_buf, size_t rx_len)
+{
+	struct spi_device *spi = to_spi_device(priv->dev);
+
+	return spi_write_then_read(spi, tx_buf, tx_len, rx_buf, rx_len);
+}
+
+static int sysconfig_spi_bitstream_burst_init(struct sysconfig_priv *priv)
+{
+	const u8 lsc_bitstream_burst[] = SYSCONFIG_LSC_BITSTREAM_BURST;
+	struct spi_device *spi = to_spi_device(priv->dev);
+	struct spi_transfer xfer = {};
+	struct spi_message msg;
+	size_t buf_len;
+	void *buf;
+	int ret;
+
+	buf_len = sizeof(lsc_bitstream_burst);
+
+	buf = kmemdup(lsc_bitstream_burst, buf_len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	xfer.len = buf_len;
+	xfer.tx_buf = buf;
+	xfer.cs_change = 1;
+
+	spi_message_init_with_transfers(&msg, &xfer, 1);
+
+	/*
+	 * Lock SPI bus for exclusive usage until FPGA programming is done.
+	 * SPI bus will be released in sysconfig_spi_bitstream_burst_complete().
+	 */
+	spi_bus_lock(spi->controller);
+
+	ret = spi_sync_locked(spi, &msg);
+	if (ret)
+		spi_bus_unlock(spi->controller);
+
+	kfree(buf);
+
+	return ret;
+}
+
+static int sysconfig_spi_bitstream_burst_write(struct sysconfig_priv *priv,
+					       const char *buf, size_t len)
+{
+	struct spi_device *spi = to_spi_device(priv->dev);
+	struct spi_transfer xfer = {
+		.tx_buf = buf,
+		.len = len,
+		.cs_change = 1,
+	};
+	struct spi_message msg;
+
+	spi_message_init_with_transfers(&msg, &xfer, 1);
+
+	return spi_sync_locked(spi, &msg);
+}
+
+static int sysconfig_spi_bitstream_burst_complete(struct sysconfig_priv *priv)
+{
+	struct spi_device *spi = to_spi_device(priv->dev);
+
+	/* Bitstream burst write is done, release SPI bus */
+	spi_bus_unlock(spi->controller);
+
+	/* Toggle CS to finish bitstream write */
+	return spi_write(spi, NULL, 0);
+}
+
+static int sysconfig_spi_probe(struct spi_device *spi)
+{
+	const struct spi_device_id *dev_id;
+	struct device *dev = &spi->dev;
+	struct sysconfig_priv *priv;
+	const u32 *spi_max_speed;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	spi_max_speed = device_get_match_data(dev);
+	if (!spi_max_speed) {
+		dev_id = spi_get_device_id(spi);
+		if (!dev_id)
+			return -ENODEV;
+
+		spi_max_speed = (const u32 *)dev_id->driver_data;
+	}
+
+	if (!spi_max_speed)
+		return -EINVAL;
+
+	if (spi->max_speed_hz > *spi_max_speed) {
+		dev_err(dev, "SPI speed %u is too high, maximum speed is %u\n",
+			spi->max_speed_hz, *spi_max_speed);
+		return -EINVAL;
+	}
+
+	priv->dev = dev;
+	priv->command_transfer = sysconfig_spi_cmd_transfer;
+	priv->bitstream_burst_write_init = sysconfig_spi_bitstream_burst_init;
+	priv->bitstream_burst_write = sysconfig_spi_bitstream_burst_write;
+	priv->bitstream_burst_write_complete = sysconfig_spi_bitstream_burst_complete;
+
+	return sysconfig_probe(priv);
+}
+
+static const struct spi_device_id sysconfig_spi_ids[] = {
+	{
+		.name = "sysconfig-ecp5",
+		.driver_data = (kernel_ulong_t)&ecp5_spi_max_speed_hz,
+	}, {},
+};
+MODULE_DEVICE_TABLE(spi, sysconfig_spi_ids);
+
+#if IS_ENABLED(CONFIG_OF)
+static const struct of_device_id sysconfig_of_ids[] = {
+	{
+		.compatible = "lattice,sysconfig-ecp5",
+		.data = &ecp5_spi_max_speed_hz,
+	}, {},
+};
+MODULE_DEVICE_TABLE(of, sysconfig_of_ids);
+#endif /* IS_ENABLED(CONFIG_OF) */
+
+static struct spi_driver lattice_sysconfig_driver = {
+	.probe = sysconfig_spi_probe,
+	.id_table = sysconfig_spi_ids,
+	.driver = {
+		.name = "lattice_sysconfig_spi_fpga_mgr",
+		.of_match_table = of_match_ptr(sysconfig_of_ids),
+	},
+};
+module_spi_driver(lattice_sysconfig_driver);
+
+MODULE_DESCRIPTION("Lattice sysCONFIG Slave SPI FPGA Manager");
+MODULE_LICENSE("GPL");
diff --git a/drivers/fpga/lattice-sysconfig.c b/drivers/fpga/lattice-sysconfig.c
new file mode 100644
index 0000000000000..ba51a60f672fe
--- /dev/null
+++ b/drivers/fpga/lattice-sysconfig.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lattice FPGA sysCONFIG interface functions independent of port type.
+ */
+
+#include <linux/delay.h>
+#include <linux/fpga/fpga-mgr.h>
+#include <linux/gpio/consumer.h>
+#include <linux/iopoll.h>
+
+#include "lattice-sysconfig.h"
+
+static int sysconfig_cmd_write(struct sysconfig_priv *priv, const void *buf,
+			       size_t buf_len)
+{
+	return priv->command_transfer(priv, buf, buf_len, NULL, 0);
+}
+
+static int sysconfig_cmd_read(struct sysconfig_priv *priv, const void *tx_buf,
+			      size_t tx_len, void *rx_buf, size_t rx_len)
+{
+	return priv->command_transfer(priv, tx_buf, tx_len, rx_buf, rx_len);
+}
+
+static int sysconfig_read_busy(struct sysconfig_priv *priv)
+{
+	const u8 lsc_check_busy[] = SYSCONFIG_LSC_CHECK_BUSY;
+	u8 busy;
+	int ret;
+
+	ret = sysconfig_cmd_read(priv, lsc_check_busy, sizeof(lsc_check_busy),
+				 &busy, sizeof(busy));
+
+	return ret ? : busy;
+}
+
+static int sysconfig_poll_busy(struct sysconfig_priv *priv)
+{
+	int ret, busy;
+
+	ret = read_poll_timeout(sysconfig_read_busy, busy, busy <= 0,
+				SYSCONFIG_POLL_INTERVAL_US,
+				SYSCONFIG_POLL_BUSY_TIMEOUT_US, false, priv);
+
+	return ret ? : busy;
+}
+
+static int sysconfig_read_status(struct sysconfig_priv *priv, u32 *status)
+{
+	const u8 lsc_read_status[] = SYSCONFIG_LSC_READ_STATUS;
+	__be32 device_status;
+	int ret;
+
+	ret = sysconfig_cmd_read(priv, lsc_read_status, sizeof(lsc_read_status),
+				 &device_status, sizeof(device_status));
+	if (ret)
+		return ret;
+
+	*status = be32_to_cpu(device_status);
+
+	return 0;
+}
+
+static int sysconfig_poll_status(struct sysconfig_priv *priv, u32 *status)
+{
+	int ret = sysconfig_poll_busy(priv);
+
+	if (ret)
+		return ret;
+
+	return sysconfig_read_status(priv, status);
+}
+
+static int sysconfig_poll_gpio(struct gpio_desc *gpio, bool is_active)
+{
+	int ret, val;
+
+	ret = read_poll_timeout(gpiod_get_value, val,
+				val < 0 || !!val == is_active,
+				SYSCONFIG_POLL_INTERVAL_US,
+				SYSCONFIG_POLL_GPIO_TIMEOUT_US, false, gpio);
+
+	if (val < 0)
+		return val;
+
+	return ret;
+}
+
+static int sysconfig_gpio_refresh(struct sysconfig_priv *priv)
+{
+	struct gpio_desc *program = priv->program;
+	struct gpio_desc *init = priv->init;
+	struct gpio_desc *done = priv->done;
+	int ret;
+
+	/* Enter init mode */
+	gpiod_set_value(program, 1);
+
+	ret = sysconfig_poll_gpio(init, true);
+	if (!ret)
+		ret = sysconfig_poll_gpio(done, false);
+
+	if (ret)
+		return ret;
+
+	/* Enter program mode */
+	gpiod_set_value(program, 0);
+
+	return sysconfig_poll_gpio(init, false);
+}
+
+static int sysconfig_lsc_refresh(struct sysconfig_priv *priv)
+{
+	static const u8 lsc_refresh[] = SYSCONFIG_LSC_REFRESH;
+	int ret;
+
+	ret = sysconfig_cmd_write(priv, lsc_refresh, sizeof(lsc_refresh));
+	if (ret)
+		return ret;
+
+	usleep_range(4000, 8000);
+
+	return 0;
+}
+
+static int sysconfig_refresh(struct sysconfig_priv *priv)
+{
+	struct gpio_desc *program = priv->program;
+	struct gpio_desc *init = priv->init;
+	struct gpio_desc *done = priv->done;
+
+	if (program && init && done)
+		return sysconfig_gpio_refresh(priv);
+
+	return sysconfig_lsc_refresh(priv);
+}
+
+static int sysconfig_isc_enable(struct sysconfig_priv *priv)
+{
+	u8 isc_enable[] = SYSCONFIG_ISC_ENABLE;
+	u32 status;
+	int ret;
+
+	ret = sysconfig_cmd_write(priv, isc_enable, sizeof(isc_enable));
+	if (ret)
+		return ret;
+
+	ret = sysconfig_poll_status(priv, &status);
+	if (ret)
+		return ret;
+
+	if (status & SYSCONFIG_STATUS_FAIL)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int sysconfig_isc_erase(struct sysconfig_priv *priv)
+{
+	u8 isc_erase[] = SYSCONFIG_ISC_ERASE;
+	u32 status;
+	int ret;
+
+	ret = sysconfig_cmd_write(priv, isc_erase, sizeof(isc_erase));
+	if (ret)
+		return ret;
+
+	ret = sysconfig_poll_status(priv, &status);
+	if (ret)
+		return ret;
+
+	if (status & SYSCONFIG_STATUS_FAIL)
+		return -EFAULT;
+
+	return 0;
+}
+
+static int sysconfig_isc_init(struct sysconfig_priv *priv)
+{
+	int ret = sysconfig_isc_enable(priv);
+
+	if (ret)
+		return ret;
+
+	return sysconfig_isc_erase(priv);
+}
+
+static int sysconfig_lsc_init_addr(struct sysconfig_priv *priv)
+{
+	const u8 lsc_init_addr[] = SYSCONFIG_LSC_INIT_ADDR;
+
+	return sysconfig_cmd_write(priv, lsc_init_addr, sizeof(lsc_init_addr));
+}
+
+static int sysconfig_burst_write_init(struct sysconfig_priv *priv)
+{
+	return priv->bitstream_burst_write_init(priv);
+}
+
+static int sysconfig_burst_write_complete(struct sysconfig_priv *priv)
+{
+	return priv->bitstream_burst_write_complete(priv);
+}
+
+static int sysconfig_bitstream_burst_write(struct sysconfig_priv *priv,
+					   const char *buf, size_t count)
+{
+	int ret = priv->bitstream_burst_write(priv, buf, count);
+
+	if (ret)
+		sysconfig_burst_write_complete(priv);
+
+	return ret;
+}
+
+static int sysconfig_isc_disable(struct sysconfig_priv *priv)
+{
+	const u8 isc_disable[] = SYSCONFIG_ISC_DISABLE;
+
+	return sysconfig_cmd_write(priv, isc_disable, sizeof(isc_disable));
+}
+
+static void sysconfig_cleanup(struct sysconfig_priv *priv)
+{
+	sysconfig_isc_erase(priv);
+	sysconfig_refresh(priv);
+}
+
+static int sysconfig_isc_finish(struct sysconfig_priv *priv)
+{
+	struct gpio_desc *done_gpio = priv->done;
+	u32 status;
+	int ret;
+
+	if (done_gpio) {
+		ret = sysconfig_isc_disable(priv);
+		if (ret)
+			return ret;
+
+		return sysconfig_poll_gpio(done_gpio, true);
+	}
+
+	ret = sysconfig_poll_status(priv, &status);
+	if (ret)
+		return ret;
+
+	if ((status & SYSCONFIG_STATUS_DONE) &&
+	    !(status & SYSCONFIG_STATUS_BUSY) &&
+	    !(status & SYSCONFIG_STATUS_ERR))
+		return sysconfig_isc_disable(priv);
+
+	return -EFAULT;
+}
+
+static enum fpga_mgr_states sysconfig_ops_state(struct fpga_manager *mgr)
+{
+	struct sysconfig_priv *priv = mgr->priv;
+	struct gpio_desc *done = priv->done;
+	u32 status;
+	int ret;
+
+	if (done && (gpiod_get_value(done) > 0))
+		return FPGA_MGR_STATE_OPERATING;
+
+	ret = sysconfig_read_status(priv, &status);
+	if (!ret && (status & SYSCONFIG_STATUS_DONE))
+		return FPGA_MGR_STATE_OPERATING;
+
+	return FPGA_MGR_STATE_UNKNOWN;
+}
+
+static int sysconfig_ops_write_init(struct fpga_manager *mgr,
+				    struct fpga_image_info *info,
+				    const char *buf, size_t count)
+{
+	struct sysconfig_priv *priv = mgr->priv;
+	struct device *dev = &mgr->dev;
+	int ret;
+
+	if (info->flags & FPGA_MGR_PARTIAL_RECONFIG) {
+		dev_err(dev, "Partial reconfiguration is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* Enter program mode */
+	ret = sysconfig_refresh(priv);
+	if (ret) {
+		dev_err(dev, "Failed to go to program mode\n");
+		return ret;
+	}
+
+	/* Enter ISC mode */
+	ret = sysconfig_isc_init(priv);
+	if (ret) {
+		dev_err(dev, "Failed to go to ISC mode\n");
+		return ret;
+	}
+
+	/* Initialize the Address Shift Register */
+	ret = sysconfig_lsc_init_addr(priv);
+	if (ret) {
+		dev_err(dev,
+			"Failed to initialize the Address Shift Register\n");
+		return ret;
+	}
+
+	/* Prepare for bitstream burst write */
+	ret = sysconfig_burst_write_init(priv);
+	if (ret)
+		dev_err(dev, "Failed to prepare for bitstream burst write\n");
+
+	return ret;
+}
+
+static int sysconfig_ops_write(struct fpga_manager *mgr, const char *buf,
+			       size_t count)
+{
+	return sysconfig_bitstream_burst_write(mgr->priv, buf, count);
+}
+
+static int sysconfig_ops_write_complete(struct fpga_manager *mgr,
+					struct fpga_image_info *info)
+{
+	struct sysconfig_priv *priv = mgr->priv;
+	struct device *dev = &mgr->dev;
+	int ret;
+
+	ret = sysconfig_burst_write_complete(priv);
+	if (!ret)
+		ret = sysconfig_poll_busy(priv);
+
+	if (ret) {
+		dev_err(dev, "Error while waiting bitstream write to finish\n");
+		goto fail;
+	}
+
+	ret = sysconfig_isc_finish(priv);
+
+fail:
+	if (ret)
+		sysconfig_cleanup(priv);
+
+	return ret;
+}
+
+static const struct fpga_manager_ops sysconfig_fpga_mgr_ops = {
+	.state = sysconfig_ops_state,
+	.write_init = sysconfig_ops_write_init,
+	.write = sysconfig_ops_write,
+	.write_complete = sysconfig_ops_write_complete,
+};
+
+int sysconfig_probe(struct sysconfig_priv *priv)
+{
+	struct gpio_desc *program, *init, *done;
+	struct device *dev = priv->dev;
+	struct fpga_manager *mgr;
+
+	if (!dev)
+		return -ENODEV;
+
+	if (!priv->command_transfer ||
+	    !priv->bitstream_burst_write_init ||
+	    !priv->bitstream_burst_write ||
+	    !priv->bitstream_burst_write_complete) {
+		dev_err(dev, "Essential callback is missing\n");
+		return -EINVAL;
+	}
+
+	program = devm_gpiod_get_optional(dev, "program", GPIOD_OUT_LOW);
+	if (IS_ERR(program))
+		return dev_err_probe(dev, PTR_ERR(program),
+				     "Failed to get PROGRAM GPIO\n");
+
+	init = devm_gpiod_get_optional(dev, "init", GPIOD_IN);
+	if (IS_ERR(init))
+		return dev_err_probe(dev, PTR_ERR(init),
+				     "Failed to get INIT GPIO\n");
+
+	done = devm_gpiod_get_optional(dev, "done", GPIOD_IN);
+	if (IS_ERR(done))
+		return dev_err_probe(dev, PTR_ERR(done),
+				     "Failed to get DONE GPIO\n");
+
+	priv->program = program;
+	priv->init = init;
+	priv->done = done;
+
+	mgr = devm_fpga_mgr_register(dev, "Lattice sysCONFIG FPGA Manager",
+				     &sysconfig_fpga_mgr_ops, priv);
+
+	return PTR_ERR_OR_ZERO(mgr);
+}
+EXPORT_SYMBOL(sysconfig_probe);
+
+MODULE_DESCRIPTION("Lattice sysCONFIG FPGA Manager Core");
+MODULE_LICENSE("GPL");
diff --git a/drivers/fpga/lattice-sysconfig.h b/drivers/fpga/lattice-sysconfig.h
new file mode 100644
index 0000000000000..df47d9a524f63
--- /dev/null
+++ b/drivers/fpga/lattice-sysconfig.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef	__LATTICE_SYSCONFIG_H
+#define	__LATTICE_SYSCONFIG_H
+
+#define	SYSCONFIG_ISC_ENABLE		{0xC6, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_ISC_DISABLE		{0x26, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_ISC_ERASE		{0x0E, 0x01, 0x00, 0x00}
+#define	SYSCONFIG_LSC_READ_STATUS	{0x3C, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_LSC_CHECK_BUSY	{0xF0, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_LSC_REFRESH		{0x79, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_LSC_INIT_ADDR		{0x46, 0x00, 0x00, 0x00}
+#define	SYSCONFIG_LSC_BITSTREAM_BURST	{0x7a, 0x00, 0x00, 0x00}
+
+#define	SYSCONFIG_STATUS_DONE		BIT(8)
+#define	SYSCONFIG_STATUS_BUSY		BIT(12)
+#define	SYSCONFIG_STATUS_FAIL		BIT(13)
+#define	SYSCONFIG_STATUS_ERR		GENMASK(25, 23)
+
+#define	SYSCONFIG_POLL_INTERVAL_US	30
+#define	SYSCONFIG_POLL_BUSY_TIMEOUT_US	1000000
+#define	SYSCONFIG_POLL_GPIO_TIMEOUT_US	100000
+
+struct sysconfig_priv {
+	struct gpio_desc *program;
+	struct gpio_desc *init;
+	struct gpio_desc *done;
+	struct device *dev;
+	int (*command_transfer)(struct sysconfig_priv *priv, const void *tx_buf,
+				size_t tx_len, void *rx_buf, size_t rx_len);
+	int (*bitstream_burst_write_init)(struct sysconfig_priv *priv);
+	int (*bitstream_burst_write)(struct sysconfig_priv *priv,
+				     const char *tx_buf, size_t tx_len);
+	int (*bitstream_burst_write_complete)(struct sysconfig_priv *priv);
+};
+
+int sysconfig_probe(struct sysconfig_priv *priv);
+
+#endif /* __LATTICE_SYSCONFIG_H */
-- 
GitLab


From ee31d5038c06b56ea515f4fe490274628c0f80e1 Mon Sep 17 00:00:00 2001
From: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Date: Tue, 25 Oct 2022 08:39:47 +0300
Subject: [PATCH 0400/4122] dt-bindings: fpga: document Lattice sysCONFIG FPGA
 manager

Add Device Tree Binding doc for configuring Lattice ECP5 FPGA over
Slave SPI sysCONFIG interface.

Signed-off-by: Ivan Bornyakov <i.bornyakov@metrotek.ru>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Xu Yilun <yilun.xu@intel.com>
Link: https://lore.kernel.org/r/20221025053947.2737-3-i.bornyakov@metrotek.ru
Signed-off-by: Xu Yilun <yilun.xu@intel.com>
---
 .../bindings/fpga/lattice,sysconfig.yaml      | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml

diff --git a/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml
new file mode 100644
index 0000000000000..4fb05eb84e2af
--- /dev/null
+++ b/Documentation/devicetree/bindings/fpga/lattice,sysconfig.yaml
@@ -0,0 +1,81 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/fpga/lattice,sysconfig.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lattice Slave SPI sysCONFIG FPGA manager
+
+maintainers:
+  - Ivan Bornyakov <i.bornyakov@metrotek.ru>
+
+description: |
+  Lattice sysCONFIG port, which is used for FPGA configuration, among others,
+  have Slave Serial Peripheral Interface. Only full reconfiguration is
+  supported.
+
+  Programming of ECP5 is done by writing uncompressed bitstream image in .bit
+  format into FPGA's SRAM configuration memory.
+
+properties:
+  compatible:
+    enum:
+      - lattice,sysconfig-ecp5
+
+  reg:
+    maxItems: 1
+
+  program-gpios:
+    description:
+      A GPIO line connected to PROGRAMN (active low) pin of the device.
+      Initiates configuration sequence.
+    maxItems: 1
+
+  init-gpios:
+    description:
+      A GPIO line connected to INITN (active low) pin of the device.
+      Indicates that the FPGA is ready to be configured.
+    maxItems: 1
+
+  done-gpios:
+    description:
+      A GPIO line connected to DONE (active high) pin of the device.
+      Indicates that the configuration sequence is complete.
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: lattice,sysconfig-ecp5
+    then:
+      properties:
+        spi-max-frequency:
+          maximum: 60000000
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        fpga-mgr@0 {
+            compatible = "lattice,sysconfig-ecp5";
+            reg = <0>;
+            spi-max-frequency = <20000000>;
+            program-gpios = <&gpio3 4 GPIO_ACTIVE_LOW>;
+            init-gpios = <&gpio3 3 GPIO_ACTIVE_LOW>;
+            done-gpios = <&gpio3 2 GPIO_ACTIVE_HIGH>;
+        };
+    };
-- 
GitLab


From 665b1856dc2399828d8ee07a18d4fd79868e729a Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Mon, 3 Oct 2022 06:06:26 -0700
Subject: [PATCH 0401/4122] apparmor: Fix loading of child before parent

Unfortunately it is possible for some userspace's to load children
profiles before the parent profile. This can even happen when the
child and the parent are in different load sets.

Fix this by creating a null place holder profile that grants no permissions
and can be replaced by the parent once it is loaded.

Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 87 ++++++++++++++++++++++++++++++++++----
 1 file changed, 78 insertions(+), 9 deletions(-)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index c17ccedd35f1e..66034cf96f4c8 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -423,6 +423,57 @@ static struct aa_policy *__lookup_parent(struct aa_ns *ns,
 	return &profile->base;
 }
 
+/**
+ * __create_missing_ancestors - create place holders for missing ancestores
+ * @ns: namespace to lookup profile in (NOT NULL)
+ * @hname: hierarchical profile name to find parent of (NOT NULL)
+ * @gfp: type of allocation.
+ *
+ * Returns: NULL on error, parent profile on success
+ *
+ * Requires: ns mutex lock held
+ *
+ * Returns: unrefcounted parent policy or NULL if error creating
+ *          place holder profiles.
+ */
+static struct aa_policy *__create_missing_ancestors(struct aa_ns *ns,
+						    const char *hname,
+						    gfp_t gfp)
+{
+	struct aa_policy *policy;
+	struct aa_profile *parent, *profile = NULL;
+	char *split;
+
+	AA_BUG(!ns);
+	AA_BUG(!hname);
+
+	policy = &ns->base;
+
+	for (split = strstr(hname, "//"); split;) {
+		parent = profile;
+		profile = __strn_find_child(&policy->profiles, hname,
+					    split - hname);
+		if (!profile) {
+			const char *name = kstrndup(hname, split - hname,
+						    gfp);
+			if (!name)
+				return NULL;
+			profile = aa_alloc_null(parent, name, gfp);
+			kfree(name);
+			if (!profile)
+				return NULL;
+			if (!parent)
+				profile->ns = aa_get_ns(ns);
+		}
+		policy = &profile->base;
+		hname = split + 2;
+		split = strstr(hname, "//");
+	}
+	if (!profile)
+		return &ns->base;
+	return &profile->base;
+}
+
 /**
  * __lookupn_profile - lookup the profile matching @hname
  * @base: base list to start looking up profile name from  (NOT NULL)
@@ -1032,6 +1083,7 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
 	/* setup parent and ns info */
 	list_for_each_entry(ent, &lh, list) {
 		struct aa_policy *policy;
+		struct aa_profile *p;
 
 		if (aa_g_export_binary)
 			ent->new->rawdata = aa_get_loaddata(udata);
@@ -1056,21 +1108,38 @@ ssize_t aa_replace_profiles(struct aa_ns *policy_ns, struct aa_label *label,
 			continue;
 
 		/* no ref on policy only use inside lock */
+		p = NULL;
 		policy = __lookup_parent(ns, ent->new->base.hname);
 		if (!policy) {
-			struct aa_profile *p;
+			/* first check for parent in the load set */
 			p = __list_lookup_parent(&lh, ent->new);
 			if (!p) {
-				error = -ENOENT;
-				info = "parent does not exist";
-				goto fail_lock;
+				/*
+				 * fill in missing parent with null
+				 * profile that doesn't have
+				 * permissions. This allows for
+				 * individual profile loading where
+				 * the child is loaded before the
+				 * parent, and outside of the current
+				 * atomic set. This unfortunately can
+				 * happen with some userspaces.  The
+				 * null profile will be replaced once
+				 * the parent is loaded.
+				 */
+				policy = __create_missing_ancestors(ns,
+							ent->new->base.hname,
+							GFP_KERNEL);
+				if (!policy) {
+					error = -ENOENT;
+					info = "parent does not exist";
+					goto fail_lock;
+				}
 			}
-			rcu_assign_pointer(ent->new->parent, aa_get_profile(p));
-		} else if (policy != &ns->base) {
-			/* released on profile replacement or free_profile */
-			struct aa_profile *p = (struct aa_profile *) policy;
-			rcu_assign_pointer(ent->new->parent, aa_get_profile(p));
 		}
+		if (!p && policy != &ns->base)
+			/* released on profile replacement or free_profile */
+			p = (struct aa_profile *) policy;
+		rcu_assign_pointer(ent->new->parent, aa_get_profile(p));
 	}
 
 	/* create new fs entries for introspection if needed */
-- 
GitLab


From 64a27ba984342d6c5cf5facc278de5c5df1fd3ff Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sat, 8 Oct 2022 14:34:09 +0800
Subject: [PATCH 0402/4122] AppArmor: Fix kernel-doc

security/apparmor/audit.c:93: warning: expecting prototype for audit_base(). Prototype was for audit_pre() instead.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2339
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/audit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/audit.c b/security/apparmor/audit.c
index 8dfdda98fbf13..5a7978aa4b19e 100644
--- a/security/apparmor/audit.c
+++ b/security/apparmor/audit.c
@@ -83,7 +83,7 @@ static const char *const aa_class_names[] = {
  */
 
 /**
- * audit_base - core AppArmor function.
+ * audit_pre() - core AppArmor function.
  * @ab: audit buffer to fill (NOT NULL)
  * @ca: audit structure containing data to audit (NOT NULL)
  *
-- 
GitLab


From 391f121150a5191c932e02775b6e29e59a3f5a94 Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sat, 8 Oct 2022 14:34:10 +0800
Subject: [PATCH 0403/4122] LSM: Fix kernel-doc

security/apparmor/lsm.c:753: warning: expecting prototype for apparmor_bprm_committed_cred(). Prototype was for apparmor_bprm_committed_creds() instead.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2338
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 8e2b951c4988c..ca4d190a737d3 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -741,7 +741,7 @@ static void apparmor_bprm_committing_creds(struct linux_binprm *bprm)
 }
 
 /**
- * apparmor_bprm_committed_cred - do cleanup after new creds committed
+ * apparmor_bprm_committed_creds() - do cleanup after new creds committed
  * @bprm: binprm for the exec  (NOT NULL)
  */
 static void apparmor_bprm_committed_creds(struct linux_binprm *bprm)
-- 
GitLab


From a2217387c3ec09117b3b6eaa5ec8a0d7d347d4ba Mon Sep 17 00:00:00 2001
From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Date: Sat, 8 Oct 2022 14:34:11 +0800
Subject: [PATCH 0404/4122] AppArmor: Fix kernel-doc

security/apparmor/ipc.c:53: warning: expecting prototype for audit_cb(). Prototype was for audit_signal_cb() instead.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2337
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/ipc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/ipc.c b/security/apparmor/ipc.c
index 1d4099385bdf8..5acde746775f7 100644
--- a/security/apparmor/ipc.c
+++ b/security/apparmor/ipc.c
@@ -45,7 +45,7 @@ static const char *audit_signal_mask(u32 mask)
 }
 
 /**
- * audit_cb - call back for signal specific audit fields
+ * audit_signal_cb() - call back for signal specific audit fields
  * @ab: audit_buffer  (NOT NULL)
  * @va: audit struct to audit values of  (NOT NULL)
  */
-- 
GitLab


From 37923d4321b1e38170086da2c117f78f2b0f49c6 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Fri, 21 Oct 2022 08:46:04 +0800
Subject: [PATCH 0405/4122] apparmor: Use pointer to struct aa_label for
 lbs_cred

According to the implementations of cred_label() and set_cred_label(),
we should use pointer to struct aa_label for lbs_cred instead of struct
aa_task_ctx, this patch fixes it.

Fixes: bbd3662a8348 ("Infrastructure management of the cred security blob")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/lsm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index ca4d190a737d3..25114735bc11c 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1198,10 +1198,10 @@ static int apparmor_inet_conn_request(const struct sock *sk, struct sk_buff *skb
 #endif
 
 /*
- * The cred blob is a pointer to, not an instance of, an aa_task_ctx.
+ * The cred blob is a pointer to, not an instance of, an aa_label.
  */
 struct lsm_blob_sizes apparmor_blob_sizes __lsm_ro_after_init = {
-	.lbs_cred = sizeof(struct aa_task_ctx *),
+	.lbs_cred = sizeof(struct aa_label *),
 	.lbs_file = sizeof(struct aa_file_ctx),
 	.lbs_task = sizeof(struct aa_task_ctx),
 };
-- 
GitLab


From d44c692350d9a376abab46aa0d70587951971068 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 14 Oct 2022 16:42:55 +0800
Subject: [PATCH 0406/4122] apparmor: Fix spelling of function name in comment
 block

'resouce' -> 'resource'

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2396
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/resource.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/resource.c b/security/apparmor/resource.c
index 1b75d8343a8db..e859481648962 100644
--- a/security/apparmor/resource.c
+++ b/security/apparmor/resource.c
@@ -68,7 +68,7 @@ static int audit_resource(struct aa_profile *profile, unsigned int resource,
 }
 
 /**
- * aa_map_resouce - map compiled policy resource to internal #
+ * aa_map_resource - map compiled policy resource to internal #
  * @resource: flattened policy resource number
  *
  * Returns: resource # for the current architecture.
-- 
GitLab


From 7dd426e33e2f9275ac03a306efdc89aa86515a52 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Tue, 25 Oct 2022 11:59:30 +0800
Subject: [PATCH 0407/4122] apparmor: fix a memleak in free_ruleset()

When the aa_profile is released, we will call free_ruleset to
release aa_ruleset, but we don't free the memory of aa_ruleset,
so there will be memleak, fix it.

unreferenced object 0xffff8881475df800 (size 1024):
  comm "apparmor_parser", pid 883, jiffies 4294899650 (age 9114.088s)
  hex dump (first 32 bytes):
    00 f8 5d 47 81 88 ff ff 00 f8 5d 47 81 88 ff ff  ..]G......]G....
    00 00 00 00 00 00 00 00 00 dc 65 47 81 88 ff ff  ..........eG....
  backtrace:
    [<00000000370e658e>] __kmem_cache_alloc_node+0x182/0x700
    [<00000000f2f5a6d2>] kmalloc_trace+0x2c/0x130
    [<00000000c5c905b3>] aa_alloc_profile+0x1bc/0x5c0
    [<00000000bc4fa72b>] unpack_profile+0x319/0x30c0
    [<00000000eab791e9>] aa_unpack+0x307/0x1450
    [<000000002c3a6ee1>] aa_replace_profiles+0x1b8/0x3790
    [<00000000d0c3fd54>] policy_update+0x35a/0x890
    [<00000000d04fed90>] profile_replace+0x1d1/0x260
    [<00000000cba0c0a7>] vfs_write+0x283/0xd10
    [<000000006bae64a5>] ksys_write+0x134/0x260
    [<00000000b2fd8f31>] __x64_sys_write+0x78/0xb0
    [<00000000f3c8a015>] do_syscall_64+0x5c/0x90
    [<00000000a242b1db>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 217af7e2f4de ("apparmor: refactor profile rules and attachments")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c
index 66034cf96f4c8..51e8184e0fec1 100644
--- a/security/apparmor/policy.c
+++ b/security/apparmor/policy.c
@@ -215,6 +215,7 @@ static void free_ruleset(struct aa_ruleset *rules)
 	for (i = 0; i < rules->secmark_count; i++)
 		kfree_sensitive(rules->secmark[i].label);
 	kfree_sensitive(rules->secmark);
+	kfree_sensitive(rules);
 }
 
 struct aa_ruleset *aa_alloc_ruleset(gfp_t gfp)
-- 
GitLab


From 3265949f7cd36a724a35020202c618094be1cf28 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Fri, 21 Oct 2022 17:36:02 +0800
Subject: [PATCH 0408/4122] apparmor: Fix memleak issue in unpack_profile()

Before aa_alloc_profile(), it has allocated string for @*ns_name if @tmpns
is not NULL, so directly return -ENOMEM if aa_alloc_profile() failed will
cause a memleak issue, and even if aa_alloc_profile() succeed, in the
@fail_profile tag of aa_unpack(), it need to free @ns_name as well, this
patch fixes them.

Fixes: 736ec752d95e ("AppArmor: policy routines for loading and unpacking policy")
Fixes: 04dc715e24d0 ("apparmor: audit policy ns specified in policy load")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c
index 2e028d540c6b9..1bf8cfb8700aa 100644
--- a/security/apparmor/policy_unpack.c
+++ b/security/apparmor/policy_unpack.c
@@ -858,8 +858,11 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name)
 	}
 
 	profile = aa_alloc_profile(name, NULL, GFP_KERNEL);
-	if (!profile)
-		return ERR_PTR(-ENOMEM);
+	if (!profile) {
+		info = "out of memory";
+		error = -ENOMEM;
+		goto fail;
+	}
 	rules = list_first_entry(&profile->rules, typeof(*rules), list);
 
 	/* profile renaming is optional */
@@ -1090,6 +1093,10 @@ fail:
 	if (error == 0)
 		/* default error covers most cases */
 		error = -EPROTO;
+	if (*ns_name) {
+		kfree(*ns_name);
+		*ns_name = NULL;
+	}
 	if (profile)
 		name = NULL;
 	else if (!name)
@@ -1392,6 +1399,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh,
 {
 	struct aa_load_ent *tmp, *ent;
 	struct aa_profile *profile = NULL;
+	char *ns_name = NULL;
 	int error;
 	struct aa_ext e = {
 		.start = udata->data,
@@ -1401,7 +1409,6 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh,
 
 	*ns = NULL;
 	while (e.pos < e.end) {
-		char *ns_name = NULL;
 		void *start;
 		error = verify_header(&e, e.pos == e.start, ns);
 		if (error)
@@ -1432,6 +1439,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh,
 
 		ent->new = profile;
 		ent->ns_name = ns_name;
+		ns_name = NULL;
 		list_add_tail(&ent->list, lh);
 	}
 	udata->abi = e.version & K_ABI_MASK;
@@ -1452,6 +1460,7 @@ int aa_unpack(struct aa_loaddata *udata, struct list_head *lh,
 	return 0;
 
 fail_profile:
+	kfree(ns_name);
 	aa_put_profile(profile);
 
 fail:
-- 
GitLab


From 6de0cb80e601df16f481a614daa0e84adbf0b552 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Mon, 24 Oct 2022 16:08:28 +0800
Subject: [PATCH 0409/4122] gpio: ftgpio010: use device name for gpiochip name
 & label

Currently, we use just the fixed string "FTGPIO010" as the gpiochip name
for ftgpio010 drivers. Because it's fixed, this means we cannot
distinguish multiple ftgpio010 devices present on a single system.

This change uses the dev_name() instead, which should be unique between
multiple instances.

Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-ftgpio010.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-ftgpio010.c b/drivers/gpio/gpio-ftgpio010.c
index f77a965f5780d..2728672ef9f83 100644
--- a/drivers/gpio/gpio-ftgpio010.c
+++ b/drivers/gpio/gpio-ftgpio010.c
@@ -277,7 +277,7 @@ static int ftgpio_gpio_probe(struct platform_device *pdev)
 		dev_err(dev, "unable to init generic GPIO\n");
 		goto dis_clk;
 	}
-	g->gc.label = "FTGPIO010";
+	g->gc.label = dev_name(dev);
 	g->gc.base = -1;
 	g->gc.parent = dev;
 	g->gc.owner = THIS_MODULE;
-- 
GitLab


From b9b1fc1ae1191243d3956888c65a280a9b2c847f Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Sun, 18 Sep 2022 12:50:43 -0400
Subject: [PATCH 0410/4122] gpio: idio-16: Introduce the ACCES IDIO-16 GPIO
 library module

Exposes consumer library functions to facilitate communication with
devices within the ACCES IDIO-16 family such as the 104-IDIO-16 and
the PCI-IDIO-16.

A CONFIG_GPIO_IDIO_16 Kconfig option is introduced by this patch.
Modules wanting access to these idio-16 library functions should select
this Kconfig option and import the GPIO_IDIO_16 symbol namespace.

Cc: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 MAINTAINERS                 |   7 ++
 drivers/gpio/Kconfig        |   9 +++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/gpio-idio-16.c | 146 ++++++++++++++++++++++++++++++++++++
 drivers/gpio/gpio-idio-16.h |  71 ++++++++++++++++++
 5 files changed, 234 insertions(+)
 create mode 100644 drivers/gpio/gpio-idio-16.c
 create mode 100644 drivers/gpio/gpio-idio-16.h

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..74efa0492c430 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -312,6 +312,13 @@ L:	linux-iio@vger.kernel.org
 S:	Maintained
 F:	drivers/counter/104-quad-8.c
 
+ACCES IDIO-16 GPIO LIBRARY
+M:	William Breathitt Gray <william.gray@linaro.org>
+L:	linux-gpio@vger.kernel.org
+S:	Maintained
+F:	drivers/gpio/gpio-idio-16.c
+F:	drivers/gpio/gpio-idio-16.h
+
 ACCES PCI-IDIO-16 GPIO DRIVER
 M:	William Breathitt Gray <william.gray@linaro.org>
 L:	linux-gpio@vger.kernel.org
diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index e034f752e7cef..3f8cf6e2165e4 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -109,6 +109,15 @@ config GPIO_REGMAP
 config GPIO_MAX730X
 	tristate
 
+config GPIO_IDIO_16
+	tristate
+	help
+	  Enables support for the idio-16 library functions. The idio-16 library
+	  provides functions to facilitate communication with devices within the
+	  ACCES IDIO-16 family such as the 104-IDIO-16 and the PCI-IDIO-16.
+
+	  If built as a module its name will be gpio-idio-16.
+
 menu "Memory mapped GPIO drivers"
 	depends on HAS_IOMEM
 
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 84fae267e8ebf..37a0b7ebda434 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_GPIO_HLWD)			+= gpio-hlwd.o
 obj-$(CONFIG_HTC_EGPIO)			+= gpio-htc-egpio.o
 obj-$(CONFIG_GPIO_I8255)		+= gpio-i8255.o
 obj-$(CONFIG_GPIO_ICH)			+= gpio-ich.o
+obj-$(CONFIG_GPIO_IDIO_16)		+= gpio-idio-16.o
 obj-$(CONFIG_GPIO_IDT3243X)		+= gpio-idt3243x.o
 obj-$(CONFIG_GPIO_IMX_SCU)		+= gpio-imx-scu.o
 obj-$(CONFIG_GPIO_IOP)			+= gpio-iop.o
diff --git a/drivers/gpio/gpio-idio-16.c b/drivers/gpio/gpio-idio-16.c
new file mode 100644
index 0000000000000..13315242d2209
--- /dev/null
+++ b/drivers/gpio/gpio-idio-16.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPIO library for the ACCES IDIO-16 family
+ * Copyright (C) 2022 William Breathitt Gray
+ */
+#include <linux/bitmap.h>
+#include <linux/export.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "gpio-idio-16.h"
+
+#define DEFAULT_SYMBOL_NAMESPACE GPIO_IDIO_16
+
+/**
+ * idio_16_get - get signal value at signal offset
+ * @reg:	ACCES IDIO-16 device registers
+ * @state:	ACCES IDIO-16 device state
+ * @offset:	offset of signal to get
+ *
+ * Returns the signal value (0=low, 1=high) for the signal at @offset.
+ */
+int idio_16_get(struct idio_16 __iomem *const reg,
+		struct idio_16_state *const state, const unsigned long offset)
+{
+	const unsigned long mask = BIT(offset);
+
+	if (offset < IDIO_16_NOUT)
+		return test_bit(offset, state->out_state);
+
+	if (offset < 24)
+		return !!(ioread8(&reg->in0_7) & (mask >> IDIO_16_NOUT));
+
+	if (offset < 32)
+		return !!(ioread8(&reg->in8_15) & (mask >> 24));
+
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(idio_16_get);
+
+/**
+ * idio_16_get_multiple - get multiple signal values at multiple signal offsets
+ * @reg:	ACCES IDIO-16 device registers
+ * @state:	ACCES IDIO-16 device state
+ * @mask:	mask of signals to get
+ * @bits:	bitmap to store signal values
+ *
+ * Stores in @bits the values (0=low, 1=high) for the signals defined by @mask.
+ */
+void idio_16_get_multiple(struct idio_16 __iomem *const reg,
+			  struct idio_16_state *const state,
+			  const unsigned long *const mask,
+			  unsigned long *const bits)
+{
+	unsigned long flags;
+	const unsigned long out_mask = GENMASK(IDIO_16_NOUT - 1, 0);
+
+	spin_lock_irqsave(&state->lock, flags);
+
+	bitmap_replace(bits, bits, state->out_state, &out_mask, IDIO_16_NOUT);
+	if (*mask & GENMASK(23, 16))
+		bitmap_set_value8(bits, ioread8(&reg->in0_7), 16);
+	if (*mask & GENMASK(31, 24))
+		bitmap_set_value8(bits, ioread8(&reg->in8_15), 24);
+
+	spin_unlock_irqrestore(&state->lock, flags);
+}
+EXPORT_SYMBOL_GPL(idio_16_get_multiple);
+
+/**
+ * idio_16_set - set signal value at signal offset
+ * @reg:	ACCES IDIO-16 device registers
+ * @state:	ACCES IDIO-16 device state
+ * @offset:	offset of signal to set
+ * @value:	value of signal to set
+ *
+ * Assigns output @value for the signal at @offset.
+ */
+void idio_16_set(struct idio_16 __iomem *const reg,
+		 struct idio_16_state *const state, const unsigned long offset,
+		 const unsigned long value)
+{
+	unsigned long flags;
+
+	if (offset >= IDIO_16_NOUT)
+		return;
+
+	spin_lock_irqsave(&state->lock, flags);
+
+	__assign_bit(offset, state->out_state, value);
+	if (offset < 8)
+		iowrite8(bitmap_get_value8(state->out_state, 0), &reg->out0_7);
+	else
+		iowrite8(bitmap_get_value8(state->out_state, 8), &reg->out8_15);
+
+	spin_unlock_irqrestore(&state->lock, flags);
+}
+EXPORT_SYMBOL_GPL(idio_16_set);
+
+/**
+ * idio_16_set_multiple - set signal values at multiple signal offsets
+ * @reg:	ACCES IDIO-16 device registers
+ * @state:	ACCES IDIO-16 device state
+ * @mask:	mask of signals to set
+ * @bits:	bitmap of signal output values
+ *
+ * Assigns output values defined by @bits for the signals defined by @mask.
+ */
+void idio_16_set_multiple(struct idio_16 __iomem *const reg,
+			  struct idio_16_state *const state,
+			  const unsigned long *const mask,
+			  const unsigned long *const bits)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&state->lock, flags);
+
+	bitmap_replace(state->out_state, state->out_state, bits, mask,
+		       IDIO_16_NOUT);
+	if (*mask & GENMASK(7, 0))
+		iowrite8(bitmap_get_value8(state->out_state, 0), &reg->out0_7);
+	if (*mask & GENMASK(15, 8))
+		iowrite8(bitmap_get_value8(state->out_state, 8), &reg->out8_15);
+
+	spin_unlock_irqrestore(&state->lock, flags);
+}
+EXPORT_SYMBOL_GPL(idio_16_set_multiple);
+
+/**
+ * idio_16_state_init - initialize idio_16_state structure
+ * @state:	ACCES IDIO-16 device state
+ *
+ * Initializes the ACCES IDIO-16 device @state for use in idio-16 library
+ * functions.
+ */
+void idio_16_state_init(struct idio_16_state *const state)
+{
+	spin_lock_init(&state->lock);
+}
+EXPORT_SYMBOL_GPL(idio_16_state_init);
+
+MODULE_AUTHOR("William Breathitt Gray");
+MODULE_DESCRIPTION("ACCES IDIO-16 GPIO Library");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpio/gpio-idio-16.h b/drivers/gpio/gpio-idio-16.h
new file mode 100644
index 0000000000000..928f8251a2bd9
--- /dev/null
+++ b/drivers/gpio/gpio-idio-16.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2022 William Breathitt Gray */
+#ifndef _IDIO_16_H_
+#define _IDIO_16_H_
+
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+/**
+ * struct idio_16 - IDIO-16 registers structure
+ * @out0_7:	Read: FET Drive Outputs 0-7
+ *		Write: FET Drive Outputs 0-7
+ * @in0_7:	Read: Isolated Inputs 0-7
+ *		Write: Clear Interrupt
+ * @irq_ctl:	Read: Enable IRQ
+ *		Write: Disable IRQ
+ * @filter_ctl:	Read: Activate Input Filters 0-15
+ *		Write: Deactivate Input Filters 0-15
+ * @out8_15:	Read: FET Drive Outputs 8-15
+ *		Write: FET Drive Outputs 8-15
+ * @in8_15:	Read: Isolated Inputs 8-15
+ *		Write: Unused
+ * @irq_status:	Read: Interrupt status
+ *		Write: Unused
+ */
+struct idio_16 {
+	u8 out0_7;
+	u8 in0_7;
+	u8 irq_ctl;
+	u8 filter_ctl;
+	u8 out8_15;
+	u8 in8_15;
+	u8 irq_status;
+};
+
+#define IDIO_16_NOUT 16
+
+/**
+ * struct idio_16_state - IDIO-16 state structure
+ * @lock:	synchronization lock for accessing device state
+ * @out_state:	output signals state
+ */
+struct idio_16_state {
+	spinlock_t lock;
+	DECLARE_BITMAP(out_state, IDIO_16_NOUT);
+};
+
+/**
+ * idio_16_get_direction - get the I/O direction for a signal offset
+ * @offset:	offset of signal to get direction
+ *
+ * Returns the signal direction (0=output, 1=input) for the signal at @offset.
+ */
+static inline int idio_16_get_direction(const unsigned long offset)
+{
+	return (offset >= IDIO_16_NOUT) ? 1 : 0;
+}
+
+int idio_16_get(struct idio_16 __iomem *reg, struct idio_16_state *state,
+		unsigned long offset);
+void idio_16_get_multiple(struct idio_16 __iomem *reg,
+			  struct idio_16_state *state,
+			  const unsigned long *mask, unsigned long *bits);
+void idio_16_set(struct idio_16 __iomem *reg, struct idio_16_state *state,
+		 unsigned long offset, unsigned long value);
+void idio_16_set_multiple(struct idio_16 __iomem *reg,
+			  struct idio_16_state *state,
+			  const unsigned long *mask, const unsigned long *bits);
+void idio_16_state_init(struct idio_16_state *state);
+
+#endif /* _IDIO_16_H_ */
-- 
GitLab


From c4ec384cf726379e600764c7f2f7ad487280890a Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Tue, 25 Oct 2022 09:57:57 +0200
Subject: [PATCH 0411/4122] gpio: 104-idio-16: Utilize the idio-16 GPIO library

The ACCES 104-IDIO-16 device is part of the ACCES IDIO-16 family, so the
idio-16 GPIO library module is selected and utilized to consolidate
code.

Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig            |  1 +
 drivers/gpio/gpio-104-idio-16.c | 88 ++++++---------------------------
 2 files changed, 17 insertions(+), 72 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3f8cf6e2165e4..3b0a030ce79b0 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -858,6 +858,7 @@ config GPIO_104_IDIO_16
 	depends on PC104
 	select ISA_BUS_API
 	select GPIOLIB_IRQCHIP
+	select GPIO_IDIO_16
 	help
 	  Enables GPIO support for the ACCES 104-IDIO-16 family (104-IDIO-16,
 	  104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, 104-IDO-8). The
diff --git a/drivers/gpio/gpio-104-idio-16.c b/drivers/gpio/gpio-104-idio-16.c
index 718bd54e2a255..098fbefdbe226 100644
--- a/drivers/gpio/gpio-104-idio-16.c
+++ b/drivers/gpio/gpio-104-idio-16.c
@@ -6,7 +6,7 @@
  * This driver supports the following ACCES devices: 104-IDIO-16,
  * 104-IDIO-16E, 104-IDO-16, 104-IDIO-8, 104-IDIO-8E, and 104-IDO-8.
  */
-#include <linux/bits.h>
+#include <linux/bitmap.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/gpio/driver.h>
@@ -21,6 +21,8 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
+#include "gpio-idio-16.h"
+
 #define IDIO_16_EXTENT 8
 #define MAX_NUM_IDIO_16 max_num_isa_dev(IDIO_16_EXTENT)
 
@@ -34,49 +36,26 @@ static unsigned int num_irq;
 module_param_hw_array(irq, uint, irq, &num_irq, 0);
 MODULE_PARM_DESC(irq, "ACCES 104-IDIO-16 interrupt line numbers");
 
-/**
- * struct idio_16_reg - device registers structure
- * @out0_7:	Read: N/A
- *		Write: FET Drive Outputs 0-7
- * @in0_7:	Read: Isolated Inputs 0-7
- *		Write: Clear Interrupt
- * @irq_ctl:	Read: Enable IRQ
- *		Write: Disable IRQ
- * @unused:	N/A
- * @out8_15:	Read: N/A
- *		Write: FET Drive Outputs 8-15
- * @in8_15:	Read: Isolated Inputs 8-15
- *		Write: N/A
- */
-struct idio_16_reg {
-	u8 out0_7;
-	u8 in0_7;
-	u8 irq_ctl;
-	u8 unused;
-	u8 out8_15;
-	u8 in8_15;
-};
-
 /**
  * struct idio_16_gpio - GPIO device private data structure
  * @chip:	instance of the gpio_chip
  * @lock:	synchronization lock to prevent I/O race conditions
  * @irq_mask:	I/O bits affected by interrupts
  * @reg:	I/O address offset for the device registers
- * @out_state:	output bits state
+ * @state:	ACCES IDIO-16 device state
  */
 struct idio_16_gpio {
 	struct gpio_chip chip;
 	raw_spinlock_t lock;
 	unsigned long irq_mask;
-	struct idio_16_reg __iomem *reg;
-	unsigned int out_state;
+	struct idio_16 __iomem *reg;
+	struct idio_16_state state;
 };
 
 static int idio_16_gpio_get_direction(struct gpio_chip *chip,
 				      unsigned int offset)
 {
-	if (offset > 15)
+	if (idio_16_get_direction(offset))
 		return GPIO_LINE_DIRECTION_IN;
 
 	return GPIO_LINE_DIRECTION_OUT;
@@ -98,15 +77,8 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip,
 static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned int mask = BIT(offset-16);
 
-	if (offset < 16)
-		return -EINVAL;
-
-	if (offset < 24)
-		return !!(ioread8(&idio16gpio->reg->in0_7) & mask);
-
-	return !!(ioread8(&idio16gpio->reg->in8_15) & (mask>>8));
+	return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset);
 }
 
 static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
@@ -114,11 +86,7 @@ static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
 
-	*bits = 0;
-	if (*mask & GENMASK(23, 16))
-		*bits |= (unsigned long)ioread8(&idio16gpio->reg->in0_7) << 16;
-	if (*mask & GENMASK(31, 24))
-		*bits |= (unsigned long)ioread8(&idio16gpio->reg->in8_15) << 24;
+	idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
 
 	return 0;
 }
@@ -127,44 +95,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset,
 			     int value)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	const unsigned int mask = BIT(offset);
-	unsigned long flags;
 
-	if (offset > 15)
-		return;
-
-	raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-	if (value)
-		idio16gpio->out_state |= mask;
-	else
-		idio16gpio->out_state &= ~mask;
-
-	if (offset > 7)
-		iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15);
-	else
-		iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7);
-
-	raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+	idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value);
 }
 
 static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	unsigned long flags;
 
-	raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-	idio16gpio->out_state &= ~*mask;
-	idio16gpio->out_state |= *mask & *bits;
-
-	if (*mask & 0xFF)
-		iowrite8(idio16gpio->out_state, &idio16gpio->reg->out0_7);
-	if ((*mask >> 8) & 0xFF)
-		iowrite8(idio16gpio->out_state >> 8, &idio16gpio->reg->out8_15);
-
-	raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+	idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
 }
 
 static void idio_16_irq_ack(struct irq_data *data)
@@ -301,7 +241,10 @@ static int idio_16_probe(struct device *dev, unsigned int id)
 	idio16gpio->chip.get_multiple = idio_16_gpio_get_multiple;
 	idio16gpio->chip.set = idio_16_gpio_set;
 	idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple;
-	idio16gpio->out_state = 0xFFFF;
+
+	idio_16_state_init(&idio16gpio->state);
+	/* FET off states are represented by bit values of "1" */
+	bitmap_fill(idio16gpio->state.out_state, IDIO_16_NOUT);
 
 	girq = &idio16gpio->chip.irq;
 	gpio_irq_chip_set_chip(girq, &idio_16_irqchip);
@@ -343,3 +286,4 @@ module_isa_driver_with_irq(idio_16_driver, num_idio_16, num_irq);
 MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
 MODULE_DESCRIPTION("ACCES 104-IDIO-16 GPIO driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(GPIO_IDIO_16);
-- 
GitLab


From e7f758fa9b7fda8b91f2e429b2be93ae0b88ac33 Mon Sep 17 00:00:00 2001
From: William Breathitt Gray <william.gray@linaro.org>
Date: Sun, 18 Sep 2022 12:50:45 -0400
Subject: [PATCH 0412/4122] gpio: pci-idio-16: Utilize the idio-16 GPIO library

The ACCES PCI-IDIO-16 device is part of the ACCES IDIO-16 family, so the
idio-16 GPIO library module is selected and utilized to consolidate
code.

Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig            |   1 +
 drivers/gpio/gpio-pci-idio-16.c | 119 ++++----------------------------
 2 files changed, 14 insertions(+), 106 deletions(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 3b0a030ce79b0..5a04990f03cc8 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1563,6 +1563,7 @@ config GPIO_PCH
 config GPIO_PCI_IDIO_16
 	tristate "ACCES PCI-IDIO-16 GPIO support"
 	select GPIOLIB_IRQCHIP
+	select GPIO_IDIO_16
 	help
 	  Enables GPIO support for the ACCES PCI-IDIO-16. An interrupt is
 	  generated when any of the inputs change state (low to high or high to
diff --git a/drivers/gpio/gpio-pci-idio-16.c b/drivers/gpio/gpio-pci-idio-16.c
index 71a13a394050f..a86ce748384bb 100644
--- a/drivers/gpio/gpio-pci-idio-16.c
+++ b/drivers/gpio/gpio-pci-idio-16.c
@@ -3,8 +3,7 @@
  * GPIO driver for the ACCES PCI-IDIO-16
  * Copyright (C) 2017 William Breathitt Gray
  */
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
+#include <linux/bits.h>
 #include <linux/device.h>
 #include <linux/errno.h>
 #include <linux/gpio/driver.h>
@@ -16,51 +15,28 @@
 #include <linux/spinlock.h>
 #include <linux/types.h>
 
-/**
- * struct idio_16_gpio_reg - GPIO device registers structure
- * @out0_7:	Read: FET Drive Outputs 0-7
- *		Write: FET Drive Outputs 0-7
- * @in0_7:	Read: Isolated Inputs 0-7
- *		Write: Clear Interrupt
- * @irq_ctl:	Read: Enable IRQ
- *		Write: Disable IRQ
- * @filter_ctl:	Read: Activate Input Filters 0-15
- *		Write: Deactivate Input Filters 0-15
- * @out8_15:	Read: FET Drive Outputs 8-15
- *		Write: FET Drive Outputs 8-15
- * @in8_15:	Read: Isolated Inputs 8-15
- *		Write: Unused
- * @irq_status:	Read: Interrupt status
- *		Write: Unused
- */
-struct idio_16_gpio_reg {
-	u8 out0_7;
-	u8 in0_7;
-	u8 irq_ctl;
-	u8 filter_ctl;
-	u8 out8_15;
-	u8 in8_15;
-	u8 irq_status;
-};
+#include "gpio-idio-16.h"
 
 /**
  * struct idio_16_gpio - GPIO device private data structure
  * @chip:	instance of the gpio_chip
  * @lock:	synchronization lock to prevent I/O race conditions
  * @reg:	I/O address offset for the GPIO device registers
+ * @state:	ACCES IDIO-16 device state
  * @irq_mask:	I/O bits affected by interrupts
  */
 struct idio_16_gpio {
 	struct gpio_chip chip;
 	raw_spinlock_t lock;
-	struct idio_16_gpio_reg __iomem *reg;
+	struct idio_16 __iomem *reg;
+	struct idio_16_state state;
 	unsigned long irq_mask;
 };
 
 static int idio_16_gpio_get_direction(struct gpio_chip *chip,
 	unsigned int offset)
 {
-	if (offset > 15)
+	if (idio_16_get_direction(offset))
 		return GPIO_LINE_DIRECTION_IN;
 
 	return GPIO_LINE_DIRECTION_OUT;
@@ -82,43 +58,16 @@ static int idio_16_gpio_direction_output(struct gpio_chip *chip,
 static int idio_16_gpio_get(struct gpio_chip *chip, unsigned int offset)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	unsigned long mask = BIT(offset);
-
-	if (offset < 8)
-		return !!(ioread8(&idio16gpio->reg->out0_7) & mask);
-
-	if (offset < 16)
-		return !!(ioread8(&idio16gpio->reg->out8_15) & (mask >> 8));
-
-	if (offset < 24)
-		return !!(ioread8(&idio16gpio->reg->in0_7) & (mask >> 16));
 
-	return !!(ioread8(&idio16gpio->reg->in8_15) & (mask >> 24));
+	return idio_16_get(idio16gpio->reg, &idio16gpio->state, offset);
 }
 
 static int idio_16_gpio_get_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	void __iomem *ports[] = {
-		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
-		&idio16gpio->reg->in0_7, &idio16gpio->reg->in8_15,
-	};
-	void __iomem *port_addr;
-	unsigned long port_state;
-
-	/* clear bits array to a clean slate */
-	bitmap_zero(bits, chip->ngpio);
-
-	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
-		port_addr = ports[offset / 8];
-		port_state = ioread8(port_addr) & gpio_mask;
-
-		bitmap_set_value8(bits, port_state, offset);
-	}
 
+	idio_16_get_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
 	return 0;
 }
 
@@ -126,61 +75,16 @@ static void idio_16_gpio_set(struct gpio_chip *chip, unsigned int offset,
 	int value)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	unsigned int mask = BIT(offset);
-	void __iomem *base;
-	unsigned long flags;
-	unsigned int out_state;
-
-	if (offset > 15)
-		return;
-
-	if (offset > 7) {
-		mask >>= 8;
-		base = &idio16gpio->reg->out8_15;
-	} else
-		base = &idio16gpio->reg->out0_7;
-
-	raw_spin_lock_irqsave(&idio16gpio->lock, flags);
 
-	if (value)
-		out_state = ioread8(base) | mask;
-	else
-		out_state = ioread8(base) & ~mask;
-
-	iowrite8(out_state, base);
-
-	raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
+	idio_16_set(idio16gpio->reg, &idio16gpio->state, offset, value);
 }
 
 static void idio_16_gpio_set_multiple(struct gpio_chip *chip,
 	unsigned long *mask, unsigned long *bits)
 {
 	struct idio_16_gpio *const idio16gpio = gpiochip_get_data(chip);
-	unsigned long offset;
-	unsigned long gpio_mask;
-	void __iomem *ports[] = {
-		&idio16gpio->reg->out0_7, &idio16gpio->reg->out8_15,
-	};
-	size_t index;
-	void __iomem *port_addr;
-	unsigned long bitmask;
-	unsigned long flags;
-	unsigned long out_state;
 
-	for_each_set_clump8(offset, gpio_mask, mask, ARRAY_SIZE(ports) * 8) {
-		index = offset / 8;
-		port_addr = ports[index];
-
-		bitmask = bitmap_get_value8(bits, offset) & gpio_mask;
-
-		raw_spin_lock_irqsave(&idio16gpio->lock, flags);
-
-		out_state = ioread8(port_addr) & ~gpio_mask;
-		out_state |= bitmask;
-		iowrite8(out_state, port_addr);
-
-		raw_spin_unlock_irqrestore(&idio16gpio->lock, flags);
-	}
+	idio_16_set_multiple(idio16gpio->reg, &idio16gpio->state, mask, bits);
 }
 
 static void idio_16_irq_ack(struct irq_data *data)
@@ -335,6 +239,8 @@ static int idio_16_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	idio16gpio->chip.set = idio_16_gpio_set;
 	idio16gpio->chip.set_multiple = idio_16_gpio_set_multiple;
 
+	idio_16_state_init(&idio16gpio->state);
+
 	girq = &idio16gpio->chip.irq;
 	girq->chip = &idio_16_irqchip;
 	/* This will let us handle the parent IRQ in the driver */
@@ -379,3 +285,4 @@ module_pci_driver(idio_16_driver);
 MODULE_AUTHOR("William Breathitt Gray <vilhelm.gray@gmail.com>");
 MODULE_DESCRIPTION("ACCES PCI-IDIO-16 GPIO driver");
 MODULE_LICENSE("GPL v2");
+MODULE_IMPORT_NS(GPIO_IDIO_16);
-- 
GitLab


From 848dba781f1951636c966c9f3a6a41a5b2f8b572 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 24 Oct 2022 14:39:33 +0200
Subject: [PATCH 0413/4122] container_of: remove container_of_safe()

It came in from a staging driver that has been long removed from the
tree, and there are no in-kernel users of the macro, and it's very
dubious if anyone should ever use this thing, so just remove it
entirely.

Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221024123933.3331116-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/container_of.h | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/include/linux/container_of.h b/include/linux/container_of.h
index 2f4944b791b81..a6f242137b116 100644
--- a/include/linux/container_of.h
+++ b/include/linux/container_of.h
@@ -21,20 +21,4 @@
 		      "pointer type mismatch in container_of()");	\
 	((type *)(__mptr - offsetof(type, member))); })
 
-/**
- * container_of_safe - cast a member of a structure out to the containing structure
- * @ptr:	the pointer to the member.
- * @type:	the type of the container struct this is embedded in.
- * @member:	the name of the member within the struct.
- *
- * If IS_ERR_OR_NULL(ptr), ptr is returned unchanged.
- */
-#define container_of_safe(ptr, type, member) ({				\
-	void *__mptr = (void *)(ptr);					\
-	static_assert(__same_type(*(ptr), ((type *)0)->member) ||	\
-		      __same_type(*(ptr), void),			\
-		      "pointer type mismatch in container_of_safe()");	\
-	IS_ERR_OR_NULL(__mptr) ? ERR_CAST(__mptr) :			\
-		((type *)(__mptr - offsetof(type, member))); })
-
 #endif	/* _LINUX_CONTAINER_OF_H */
-- 
GitLab


From 7376e561fd2e017e9a53f975209777234b8b434e Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@linux.intel.com>
Date: Mon, 24 Oct 2022 14:16:27 +0300
Subject: [PATCH 0414/4122] linux/container_of.h: Warn about loss of constness

container_of() casts the original type to another which leads to the loss
of the const qualifier if it is not specified in the caller-provided type.
This easily leads to container_of() returning a non-const pointer to a
const struct which the C compiler does not warn about.

Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Link: https://lore.kernel.org/r/20221024111627.75183-1-sakari.ailus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/container_of.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/container_of.h b/include/linux/container_of.h
index a6f242137b116..2008e9f4058c6 100644
--- a/include/linux/container_of.h
+++ b/include/linux/container_of.h
@@ -13,6 +13,7 @@
  * @type:	the type of the container struct this is embedded in.
  * @member:	the name of the member within the struct.
  *
+ * WARNING: any const qualifier of @ptr is lost.
  */
 #define container_of(ptr, type, member) ({				\
 	void *__mptr = (void *)(ptr);					\
-- 
GitLab


From 2ae18cc2269fc2d05d36bf44a8daa4404fa11dde Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Mon, 24 Oct 2022 10:48:46 +0300
Subject: [PATCH 0415/4122] thunderbolt: ACPI: Use the helper
 fwnode_find_reference()

Replacing the direct fwnode_property_get_reference_args()
call will this wrapper function.

No functional changes intended.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/acpi.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/thunderbolt/acpi.c b/drivers/thunderbolt/acpi.c
index 7a8adf5ad5a09..317e4f5fdb979 100644
--- a/drivers/thunderbolt/acpi.c
+++ b/drivers/thunderbolt/acpi.c
@@ -15,24 +15,20 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data,
 				    void **return_value)
 {
 	struct acpi_device *adev = acpi_fetch_acpi_dev(handle);
-	struct fwnode_reference_args args;
 	struct fwnode_handle *fwnode;
 	struct tb_nhi *nhi = data;
 	struct pci_dev *pdev;
 	struct device *dev;
-	int ret;
 
 	if (!adev)
 		return AE_OK;
 
-	fwnode = acpi_fwnode_handle(adev);
-	ret = fwnode_property_get_reference_args(fwnode, "usb4-host-interface",
-						 NULL, 0, 0, &args);
-	if (ret)
+	fwnode = fwnode_find_reference(acpi_fwnode_handle(adev), "usb4-host-interface", 0);
+	if (IS_ERR(fwnode))
 		return AE_OK;
 
 	/* It needs to reference this NHI */
-	if (dev_fwnode(&nhi->pdev->dev) != args.fwnode)
+	if (dev_fwnode(&nhi->pdev->dev) != fwnode)
 		goto out_put;
 
 	/*
@@ -100,7 +96,7 @@ static acpi_status tb_acpi_add_link(acpi_handle handle, u32 level, void *data,
 	}
 
 out_put:
-	fwnode_handle_put(args.fwnode);
+	fwnode_handle_put(fwnode);
 	return AE_OK;
 }
 
-- 
GitLab


From eac001bf4a5b7d857ec228cd18b4e3644a5ceeb9 Mon Sep 17 00:00:00 2001
From: Xiang Yang <xiangyang3@huawei.com>
Date: Thu, 20 Oct 2022 09:44:26 +0800
Subject: [PATCH 0416/4122] gpiolib: acpi: Use METHOD_NAME__AEI macro for
 acpi_walk_resources

Using the METHOD_NAME__AEI macro instead of using "_AEI" directly.

Signed-off-by: Xiang Yang <xiangyang3@huawei.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 drivers/gpio/gpiolib-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index a7d2358736fe7..064ba5150fd4d 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -512,7 +512,7 @@ void acpi_gpiochip_request_interrupts(struct gpio_chip *chip)
 	if (ACPI_FAILURE(status))
 		return;
 
-	acpi_walk_resources(handle, "_AEI",
+	acpi_walk_resources(handle, METHOD_NAME__AEI,
 			    acpi_gpiochip_alloc_event, acpi_gpio);
 
 	mutex_lock(&acpi_gpio_deferred_req_irqs_lock);
-- 
GitLab


From 8d259847243d1e21a866e828c4ce90d759f3d17b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 20 Oct 2022 18:39:14 +0300
Subject: [PATCH 0417/4122] gpiolib: cdev: Fix typo in kernel doc for struct
 line

When eflags has been renamed to the edflags, the kernel doc change were
missed. Update kernel doc accordingly.

Fixes: b1a92e94560d ("gpiolib: cdev: consolidate edge detector configuration flags")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
---
 drivers/gpio/gpiolib-cdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 0cb6b468f364f..08606f32372ce 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -410,7 +410,7 @@ out_free_lh:
  * @desc: the GPIO descriptor for this line.
  * @req: the corresponding line request
  * @irq: the interrupt triggered in response to events on this GPIO
- * @eflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or
+ * @edflags: the edge flags, GPIO_V2_LINE_FLAG_EDGE_RISING and/or
  * GPIO_V2_LINE_FLAG_EDGE_FALLING, indicating the edge detection applied
  * @timestamp_ns: cache for the timestamp storing it between hardirq and
  * IRQ thread, used to bring the timestamp close to the actual event
-- 
GitLab


From 1662cea4623f75d8251adf07370bbaa958f0355d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 25 Oct 2022 15:15:49 +0800
Subject: [PATCH 0418/4122] kset: fix memory leak when kset_register() returns
 error

Inject fault while loading module, kset_register() may fail.
If it fails, the kset.kobj.name allocated by kobject_set_name()
which must be called before a call to kset_register() may be
leaked, since refcount of kobj was set in kset_init().

To mitigate this, we free the name in kset_register() when an
error is encountered, i.e. when kset_register() returns an error.

A kset may be embedded in a larger structure which may be dynamically
allocated in callers, it needs to be freed in ktype.release() or error
path in callers, in this case, we can not call kset_put() in kset_register(),
or it will cause double free, so just call kfree_const() to free the
name and set it to NULL to avoid accessing bad pointer in callers.

With this fix, the callers don't need care about freeing the name
and may call kset_put() if kset_register() fails.

Suggested-by: Luben Tuikov <luben.tuikov@amd.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: <luben.tuikov@amd.com>
Link: https://lore.kernel.org/r/20221025071549.1280528-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/kobject.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/lib/kobject.c b/lib/kobject.c
index 0380ec889a6af..ba1017cd67d16 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -834,6 +834,9 @@ EXPORT_SYMBOL_GPL(kobj_sysfs_ops);
 /**
  * kset_register() - Initialize and add a kset.
  * @k: kset.
+ *
+ * NOTE: On error, the kset.kobj.name allocated by() kobj_set_name()
+ * is freed, it can not be used any more.
  */
 int kset_register(struct kset *k)
 {
@@ -844,8 +847,12 @@ int kset_register(struct kset *k)
 
 	kset_init(k);
 	err = kobject_add_internal(&k->kobj);
-	if (err)
+	if (err) {
+		kfree_const(k->kobj.name);
+		/* Set it to NULL to avoid accessing bad pointer in callers. */
+		k->kobj.name = NULL;
 		return err;
+	}
 	kobject_uevent(&k->kobj, KOBJ_ADD);
 	return 0;
 }
-- 
GitLab


From a12960f970d3d47d1aefd4293738b878a6e7d024 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:01 +0200
Subject: [PATCH 0419/4122] staging: r8188eu: restructure mlme subfunction
 handling

Move some code around in rtw_mlme_ext.c to make it simpler.

mlme_sta_tbl is used only by mgt_dispatcher. Move the table inside the
function. Move mgt_dispatcher behind the handler functions. We can then
make the handler functions static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 116 ++++++++++----------
 1 file changed, 57 insertions(+), 59 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index d146b94307b80..3435610ca4116 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -12,24 +12,6 @@
 #include "../include/rtl8188e_xmit.h"
 #include "../include/rtl8188e_dm.h"
 
-/* response function for each management frame subtype, do not reorder */
-static mlme_handler mlme_sta_tbl[] = {
-	OnAssocReq,
-	OnAssocRsp,
-	OnAssocReq,
-	OnAssocRsp,
-	OnProbeReq,
-	OnProbeRsp,
-	NULL,
-	NULL,
-	OnBeacon,
-	NULL,
-	OnDisassoc,
-	OnAuthClient,
-	OnDeAuth,
-	OnAction,
-};
-
 static u8 null_addr[ETH_ALEN] = {0, 0, 0, 0, 0, 0};
 
 /**************************************************
@@ -393,47 +375,6 @@ void free_mlme_ext_priv(struct mlme_ext_priv *pmlmeext)
 	}
 }
 
-void mgt_dispatcher(struct adapter *padapter, struct recv_frame *precv_frame)
-{
-	int index;
-	mlme_handler fct;
-	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data;
-	struct sta_info *psta = rtw_get_stainfo(&padapter->stapriv, hdr->addr2);
-
-	if (!ieee80211_is_mgmt(hdr->frame_control))
-		return;
-
-	/* receive the frames that ra(a1) is my address or ra(a1) is bc address. */
-	if (memcmp(hdr->addr1, myid(&padapter->eeprompriv), ETH_ALEN) &&
-	    !is_broadcast_ether_addr(hdr->addr1))
-		return;
-
-	index = (le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
-	if (index >= ARRAY_SIZE(mlme_sta_tbl))
-		return;
-	fct = mlme_sta_tbl[index];
-
-	if (psta) {
-		if (ieee80211_has_retry(hdr->frame_control)) {
-			if (precv_frame->attrib.seq_num == psta->RxMgmtFrameSeqNum)
-				/* drop the duplicate management frame */
-				return;
-		}
-		psta->RxMgmtFrameSeqNum = precv_frame->attrib.seq_num;
-	}
-
-	if (ieee80211_is_auth(hdr->frame_control)) {
-		if (check_fwstate(pmlmepriv, WIFI_AP_STATE))
-			fct = OnAuth;
-		else
-			fct = OnAuthClient;
-	}
-
-	if (fct)
-		fct(padapter, precv_frame);
-}
-
 static u32 p2p_listen_state_process(struct adapter *padapter, unsigned char *da)
 {
 	bool response = true;
@@ -4008,6 +3949,63 @@ struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv)
 	return pmgntframe;
 }
 
+void mgt_dispatcher(struct adapter *padapter, struct recv_frame *precv_frame)
+{
+	mlme_handler mlme_sta_tbl[] = {
+		OnAssocReq,
+		OnAssocRsp,
+		OnAssocReq,
+		OnAssocRsp,
+		OnProbeReq,
+		OnProbeRsp,
+		NULL,
+		NULL,
+		OnBeacon,
+		NULL,
+		OnDisassoc,
+		OnAuthClient,
+		OnDeAuth,
+		OnAction,
+	};
+	int index;
+	mlme_handler fct;
+	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data;
+	struct sta_info *psta = rtw_get_stainfo(&padapter->stapriv, hdr->addr2);
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return;
+
+	/* receive the frames that ra(a1) is my address or ra(a1) is bc address. */
+	if (memcmp(hdr->addr1, myid(&padapter->eeprompriv), ETH_ALEN) &&
+	    !is_broadcast_ether_addr(hdr->addr1))
+		return;
+
+	index = (le16_to_cpu(hdr->frame_control) & IEEE80211_FCTL_STYPE) >> 4;
+	if (index >= ARRAY_SIZE(mlme_sta_tbl))
+		return;
+	fct = mlme_sta_tbl[index];
+
+	if (psta) {
+		if (ieee80211_has_retry(hdr->frame_control)) {
+			if (precv_frame->attrib.seq_num == psta->RxMgmtFrameSeqNum)
+				/* drop the duplicate management frame */
+				return;
+		}
+		psta->RxMgmtFrameSeqNum = precv_frame->attrib.seq_num;
+	}
+
+	if (ieee80211_is_auth(hdr->frame_control)) {
+		if (check_fwstate(pmlmepriv, WIFI_AP_STATE))
+			fct = OnAuth;
+		else
+			fct = OnAuthClient;
+	}
+
+	if (fct)
+		fct(padapter, precv_frame);
+}
+
 /****************************************************************************
 
 Following are some TX functions for WiFi MLME
-- 
GitLab


From b8d4f50557d52fff6356e8a83d50283232eedb98 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:02 +0200
Subject: [PATCH 0420/4122] staging: r8188eu: make OnAssocReq static

OnAssocReq is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 3435610ca4116..576f56ad43838 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -911,7 +911,7 @@ static void UpdateBrateTblForSoftAP(u8 *bssrateset, u32 bssratelen)
 	}
 }
 
-unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u16 capab_info;
 	struct rtw_ieee802_11_elems elems;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 66aa8b497aa1d..41e31cec9abe4 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnAssocReq(struct adapter *padapter,
-			struct recv_frame *precv_frame);
 unsigned int OnAssocRsp(struct adapter *padapter,
 			struct recv_frame *precv_frame);
 unsigned int OnProbeReq(struct adapter *padapter,
-- 
GitLab


From 411c3890cb0d385bcd8ddef90abac82a32274153 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:03 +0200
Subject: [PATCH 0421/4122] staging: r8188eu: make OnAssocRsp static

OnAssocRsp is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 576f56ad43838..a3cfab2627d54 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1319,7 +1319,7 @@ OnAssocReqFail:
 	return _FAIL;
 }
 
-unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	uint i;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 41e31cec9abe4..c2b1aa2f378bf 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnAssocRsp(struct adapter *padapter,
-			struct recv_frame *precv_frame);
 unsigned int OnProbeReq(struct adapter *padapter,
 			struct recv_frame *precv_frame);
 unsigned int OnProbeRsp(struct adapter *padapter,
-- 
GitLab


From 94941c42c24813da86128f47bec0949184798b62 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:04 +0200
Subject: [PATCH 0422/4122] staging: r8188eu: make OnProbeReq static

OnProbeReq is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index a3cfab2627d54..5e1e2970ccd9b 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -447,7 +447,7 @@ Following are the callback functions for each subtype of the management frames
 
 *****************************************************************************/
 
-unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	ielen;
 	unsigned char	*p;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index c2b1aa2f378bf..b24aae8108b60 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnProbeReq(struct adapter *padapter,
-			struct recv_frame *precv_frame);
 unsigned int OnProbeRsp(struct adapter *padapter,
 			struct recv_frame *precv_frame);
 unsigned int OnBeacon(struct adapter *padapter,
-- 
GitLab


From bd0bd67c6bd8e2e5f1a3741f3d323e58ea3fc7ff Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:05 +0200
Subject: [PATCH 0423/4122] staging: r8188eu: make OnProbeRsp static

OnProbeRsp is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 5e1e2970ccd9b..4ccc6c6e45e6c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -515,7 +515,7 @@ _issue_probersp:
 	return _SUCCESS;
 }
 
-unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct wifidirect_info	*pwdinfo = &padapter->wdinfo;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index b24aae8108b60..52991ec7b3e84 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnProbeRsp(struct adapter *padapter,
-			struct recv_frame *precv_frame);
 unsigned int OnBeacon(struct adapter *padapter,
 		      struct recv_frame *precv_frame);
 unsigned int OnDisassoc(struct adapter *padapter,
-- 
GitLab


From 1aad70df18457dfbd4244dbe620f6f12429e32d5 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:06 +0200
Subject: [PATCH 0424/4122] staging: r8188eu: make OnBeacon static

OnBeacon is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 4ccc6c6e45e6c..1a279c812f5ff 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -561,7 +561,7 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec
 	return _SUCCESS;
 }
 
-unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	int cam_idx;
 	struct sta_info	*psta;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 52991ec7b3e84..b4868598eba85 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnBeacon(struct adapter *padapter,
-		      struct recv_frame *precv_frame);
 unsigned int OnDisassoc(struct adapter *padapter,
 			struct recv_frame *precv_frame);
 unsigned int OnAuth(struct adapter *padapter,
-- 
GitLab


From 8ef3cd4f527c76832fefa2bb9960b5afb5f0a1d0 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:07 +0200
Subject: [PATCH 0425/4122] staging: r8188eu: make OnDisassoc static

OnDisassoc is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 1a279c812f5ff..309376aba41fe 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1458,7 +1458,7 @@ unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 	return _SUCCESS;
 }
 
-unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u16 reason;
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index b4868598eba85..c1bc554202a03 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnDisassoc(struct adapter *padapter,
-			struct recv_frame *precv_frame);
 unsigned int OnAuth(struct adapter *padapter,
 		    struct recv_frame *precv_frame);
 unsigned int OnAuthClient(struct adapter *padapter,
-- 
GitLab


From de20e195c221c808f7d9e2babe6f4cd9e7c92818 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:08 +0200
Subject: [PATCH 0426/4122] staging: r8188eu: make OnAuthClient static

OnAuthClient is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 309376aba41fe..a63146ccc6b42 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -800,7 +800,7 @@ auth_fail:
 	return _FAIL;
 }
 
-unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	seq, len, status, offset;
 	unsigned char	*p;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index c1bc554202a03..866d358f5f100 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter);
 
 unsigned int OnAuth(struct adapter *padapter,
 		    struct recv_frame *precv_frame);
-unsigned int OnAuthClient(struct adapter *padapter,
-			  struct recv_frame *precv_frame);
 unsigned int OnDeAuth(struct adapter *padapter,
 		      struct recv_frame *precv_frame);
 unsigned int OnAction(struct adapter *padapter,
-- 
GitLab


From 51877bf3ab4ccd0279fedbf97736791234ed0cfe Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:09 +0200
Subject: [PATCH 0427/4122] staging: r8188eu: make OnDeAuth static

OnDeAuth is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index a63146ccc6b42..df817d7594a24 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1396,7 +1396,7 @@ report_assoc_result:
 	return _SUCCESS;
 }
 
-unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	unsigned short	reason;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 866d358f5f100..14df835744783 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter);
 
 unsigned int OnAuth(struct adapter *padapter,
 		    struct recv_frame *precv_frame);
-unsigned int OnDeAuth(struct adapter *padapter,
-		      struct recv_frame *precv_frame);
 unsigned int OnAction(struct adapter *padapter,
 		      struct recv_frame *precv_frame);
 
-- 
GitLab


From 37552ad1fa2ed3924364e54f99f31ee56d902bcb Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:10 +0200
Subject: [PATCH 0428/4122] staging: r8188eu: make OnAction static

OnAction is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-11-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index df817d7594a24..0eb7f586d755c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3910,7 +3910,7 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra
 	return _SUCCESS;
 }
 
-unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 14df835744783..f192733a5c539 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter);
 
 unsigned int OnAuth(struct adapter *padapter,
 		    struct recv_frame *precv_frame);
-unsigned int OnAction(struct adapter *padapter,
-		      struct recv_frame *precv_frame);
 
 unsigned int OnAction_back(struct adapter *padapter,
 			   struct recv_frame *precv_frame);
-- 
GitLab


From 19adbd9a2f26a64be96702f488ebed77b5334f79 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:11 +0200
Subject: [PATCH 0429/4122] staging: r8188eu: make OnAuth static

OnAuth is used only in rtw_mlme_ext.c. Make this function static.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-12-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 0eb7f586d755c..be34c3b21bd7c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -651,7 +651,7 @@ _END_ONBEACON_:
 	return _SUCCESS;
 }
 
-unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame)
+static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	auth_mode, ie_len;
 	u16 seq;
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index f192733a5c539..f77e6dc81831f 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,9 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnAuth(struct adapter *padapter,
-		    struct recv_frame *precv_frame);
-
 unsigned int OnAction_back(struct adapter *padapter,
 			   struct recv_frame *precv_frame);
 unsigned int on_action_public(struct adapter *padapter,
-- 
GitLab


From 05aa6bf9c2546379f8ba57e2ffe8f88082d50a52 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:12 +0200
Subject: [PATCH 0430/4122] staging: r8188eu: change mlme handlers to void

The mlme handlers that are called from mgt_dispatcher return an error
code. mgt_dispatcher doesn't check this error code, we can remove it and
change the handler functions to void.

For now, make only the minimum changes to the handlers for removing the
error codes. If handlers can be simplified, that'll be done it separate
patches.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-13-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   | 93 ++++++++-----------
 .../staging/r8188eu/include/rtw_mlme_ext.h    |  2 +-
 2 files changed, 41 insertions(+), 54 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index be34c3b21bd7c..d89f494d12af8 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -447,7 +447,7 @@ Following are the callback functions for each subtype of the management frames
 
 *****************************************************************************/
 
-static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnProbeReq(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	ielen;
 	unsigned char	*p;
@@ -481,17 +481,17 @@ static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *prec
 				report_survey_event(padapter, precv_frame);
 				p2p_listen_state_process(padapter,  get_sa(pframe));
 
-				return _SUCCESS;
+				return;
 			}
 		}
 	}
 
 	if (check_fwstate(pmlmepriv, WIFI_STATION_STATE))
-		return _SUCCESS;
+		return;
 
 	if (!check_fwstate(pmlmepriv, _FW_LINKED) &&
 	    !check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE | WIFI_AP_STATE))
-		return _SUCCESS;
+		return;
 
 	p = rtw_get_ie(pframe + WLAN_HDR_A3_LEN + _PROBEREQ_IE_OFFSET_, _SSID_IE_, (int *)&ielen,
 			len - WLAN_HDR_A3_LEN - _PROBEREQ_IE_OFFSET_);
@@ -503,7 +503,7 @@ static unsigned int OnProbeReq(struct adapter *padapter, struct recv_frame *prec
 
 		if ((ielen != 0 && memcmp((void *)(p + 2), (void *)cur->Ssid.Ssid, cur->Ssid.SsidLength)) ||
 		    (ielen == 0 && pmlmeinfo->hidden_ssid_mode))
-			return _SUCCESS;
+			return;
 
 _issue_probersp:
 
@@ -512,10 +512,9 @@ _issue_probersp:
 		    check_fwstate(pmlmepriv, WIFI_ADHOC_MASTER_STATE)))
 			issue_probersp(padapter, get_sa(pframe), is_valid_p2p_probereq);
 	}
-	return _SUCCESS;
 }
 
-static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct wifidirect_info	*pwdinfo = &padapter->wdinfo;
@@ -537,7 +536,7 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec
 				}
 			}
 		}
-		return _SUCCESS;
+		return;
 	} else if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_GONEGO_ING)) {
 		if (pwdinfo->nego_req_info.benable) {
 			if (!memcmp(pwdinfo->nego_req_info.peerDevAddr, GetAddr2Ptr(pframe), ETH_ALEN)) {
@@ -555,13 +554,11 @@ static unsigned int OnProbeRsp(struct adapter *padapter, struct recv_frame *prec
 	}
 	if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) {
 		report_survey_event(padapter, precv_frame);
-		return _SUCCESS;
+		return;
 	}
-
-	return _SUCCESS;
 }
 
-static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	int cam_idx;
 	struct sta_info	*psta;
@@ -576,7 +573,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_
 
 	if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) {
 		report_survey_event(padapter, precv_frame);
-		return _SUCCESS;
+		return;
 	}
 
 	if (!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) {
@@ -600,7 +597,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_
 			/* start auth */
 			start_clnt_auth(padapter);
 
-			return _SUCCESS;
+			return;
 		}
 
 		if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) {
@@ -610,7 +607,7 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_
 				if (!ret) {
 					receive_disconnect(padapter,
 							   pmlmeinfo->network.MacAddress, 0);
-					return _SUCCESS;
+					return;
 				}
 				/* update WMM, ERP in the beacon */
 				/* todo: the timer is used instead of the number of the beacon received */
@@ -629,12 +626,12 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_
 				/* allocate a new CAM entry for IBSS station */
 				cam_idx = allocate_fw_sta_entry(padapter);
 				if (cam_idx == NUM_STA)
-					goto _END_ONBEACON_;
+					return;
 
 				/* get supported rate */
 				if (update_sta_support_rate(padapter, (pframe + WLAN_HDR_A3_LEN + _BEACON_IE_OFFSET_), (len - WLAN_HDR_A3_LEN - _BEACON_IE_OFFSET_), cam_idx) == _FAIL) {
 					pmlmeinfo->FW_sta_info[cam_idx].status = 0;
-					goto _END_ONBEACON_;
+					return;
 				}
 
 				/* update TSF Value */
@@ -645,13 +642,9 @@ static unsigned int OnBeacon(struct adapter *padapter, struct recv_frame *precv_
 			}
 		}
 	}
-
-_END_ONBEACON_:
-
-	return _SUCCESS;
 }
 
-static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	auth_mode, ie_len;
 	u16 seq;
@@ -668,7 +661,7 @@ static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_fr
 	uint len = precv_frame->len;
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
-		return _FAIL;
+		return;
 
 	sa = GetAddr2Ptr(pframe);
 
@@ -784,7 +777,7 @@ static unsigned int OnAuth(struct adapter *padapter, struct recv_frame *precv_fr
 	if (pstat->state & WIFI_FW_AUTH_SUCCESS)
 		pstat->auth_seq = 0;
 
-	return _SUCCESS;
+	return;
 
 auth_fail:
 
@@ -797,10 +790,9 @@ auth_fail:
 	memcpy(pstat->hwaddr, sa, 6);
 
 	issue_auth(padapter, pstat, (unsigned short)status);
-	return _FAIL;
 }
 
-static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	unsigned int	seq, len, status, offset;
 	unsigned char	*p;
@@ -812,10 +804,10 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr
 
 	/* check A1 matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), get_da(pframe), ETH_ALEN))
-		return _SUCCESS;
+		return;
 
 	if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE))
-		return _SUCCESS;
+		return;
 
 	offset = (GetPrivacy(pframe)) ? 4 : 0;
 
@@ -848,7 +840,7 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr
 			issue_auth(padapter, NULL, 0);
 			set_link_timer(pmlmeext, REAUTH_TO);
 
-			return _SUCCESS;
+			return;
 		} else {
 			/*  open system */
 			go2asoc = 1;
@@ -865,10 +857,10 @@ static unsigned int OnAuthClient(struct adapter *padapter, struct recv_frame *pr
 
 	if (go2asoc) {
 		start_clnt_assoc(padapter);
-		return _SUCCESS;
+		return;
 	}
 authclnt_fail:
-	return _FAIL;
+	return;
 }
 
 static void UpdateBrateTbl(u8 *mbrate)
@@ -911,7 +903,7 @@ static void UpdateBrateTblForSoftAP(u8 *bssrateset, u32 bssratelen)
 	}
 }
 
-static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAssocReq(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u16 capab_info;
 	struct rtw_ieee802_11_elems elems;
@@ -937,7 +929,7 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec
 	u32 p2pielen = 0;
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
-		return _FAIL;
+		return;
 
 	frame_type = GetFrameSubType(pframe);
 	if (frame_type == WIFI_ASSOCREQ)
@@ -946,7 +938,7 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec
 		ie_offset = _REASOCREQ_IE_OFFSET_;
 
 	if (pkt_len < IEEE80211_3ADDR_LEN + ie_offset)
-		return _FAIL;
+		return;
 
 	pstat = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe));
 	if (pstat == (struct sta_info *)NULL) {
@@ -1300,13 +1292,13 @@ static unsigned int OnAssocReq(struct adapter *padapter, struct recv_frame *prec
 		report_add_sta_event(padapter, pstat->hwaddr, pstat->aid);
 	}
 
-	return _SUCCESS;
+	return;
 
 asoc_class2_error:
 
 	issue_deauth(padapter, (void *)GetAddr2Ptr(pframe), status);
 
-	return _FAIL;
+	return;
 
 OnAssocReqFail:
 
@@ -1316,10 +1308,10 @@ OnAssocReqFail:
 	else
 		issue_asocrsp(padapter, status, pstat, WIFI_REASSOCRSP);
 
-	return _FAIL;
+	return;
 }
 
-static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAssocRsp(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	uint i;
@@ -1333,13 +1325,13 @@ static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *prec
 
 	/* check A1 matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
-		return _SUCCESS;
+		return;
 
 	if (!(pmlmeinfo->state & (WIFI_FW_AUTH_SUCCESS | WIFI_FW_ASSOC_STATE)))
-		return _SUCCESS;
+		return;
 
 	if (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)
-		return _SUCCESS;
+		return;
 
 	_cancel_timer_ex(&pmlmeext->link_timer);
 
@@ -1392,11 +1384,9 @@ static unsigned int OnAssocRsp(struct adapter *padapter, struct recv_frame *prec
 
 report_assoc_result:
 	report_join_res(padapter, res);
-
-	return _SUCCESS;
 }
 
-static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	unsigned short	reason;
@@ -1406,7 +1396,7 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 
 	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
-		return _SUCCESS;
+		return;
 
 	if (pwdinfo->rx_invitereq_info.scan_op_ch_only) {
 		_cancel_timer_ex(&pwdinfo->reset_ch_sitesurvey);
@@ -1422,7 +1412,7 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_
 
 		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 		if (!psta)
-			return _SUCCESS;
+			return;
 
 		spin_lock_bh(&pstapriv->asoc_list_lock);
 		if (!list_empty(&psta->asoc_list)) {
@@ -1455,10 +1445,9 @@ static unsigned int OnDeAuth(struct adapter *padapter, struct recv_frame *precv_
 
 		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 	}
-	return _SUCCESS;
 }
 
-static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u16 reason;
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
@@ -1469,7 +1458,7 @@ static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *prec
 
 	/* check A3 */
 	if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)))
-		return _SUCCESS;
+		return;
 
 	if (pwdinfo->rx_invitereq_info.scan_op_ch_only) {
 		_cancel_timer_ex(&pwdinfo->reset_ch_sitesurvey);
@@ -1497,12 +1486,11 @@ static unsigned int OnDisassoc(struct adapter *padapter, struct recv_frame *prec
 			associated_clients_update(padapter, updated);
 		}
 
-		return _SUCCESS;
+		return;
 	} else {
 		receive_disconnect(padapter, GetAddr3Ptr(pframe), reason);
 	}
 	pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
-	return _SUCCESS;
 }
 
 unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame)
@@ -3910,7 +3898,7 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra
 	return _SUCCESS;
 }
 
-static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 
@@ -3925,7 +3913,6 @@ static unsigned int OnAction(struct adapter *padapter, struct recv_frame *precv_
 		OnAction_p2p(padapter, precv_frame);
 		break;
 	}
-	return _SUCCESS;
 }
 
 struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv)
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index f77e6dc81831f..c8beaa927cba1 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -184,7 +184,7 @@ enum SCAN_STATE {
 	SCAN_STATE_MAX,
 };
 
-typedef unsigned int (*mlme_handler)(struct adapter *adapt, struct recv_frame *frame);
+typedef void (*mlme_handler)(struct adapter *adapt, struct recv_frame *frame);
 
 struct	ss_res {
 	int	state;
-- 
GitLab


From 2d64ae536ef0c78924fdd99e0bb3018d7b6b3178 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:13 +0200
Subject: [PATCH 0431/4122] staging: r8188eu: remove unnecessary label

Remove a label on OnAuthClient that just calls return. We can return
directly instead of jumping to this label.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-14-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index d89f494d12af8..e2e7c35019f2e 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -823,7 +823,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 		}
 
 		set_link_timer(pmlmeext, 1);
-		goto authclnt_fail;
+		return;
 	}
 
 	if (seq == 2) {
@@ -833,7 +833,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 				pkt_len - WLAN_HDR_A3_LEN - _AUTH_IE_OFFSET_);
 
 			if (!p)
-				goto authclnt_fail;
+				return;
 
 			memcpy((void *)(pmlmeinfo->chg_txt), (void *)(p + 2), len);
 			pmlmeinfo->auth_seq = 3;
@@ -849,18 +849,16 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 		if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared)
 			go2asoc = 1;
 		else
-			goto authclnt_fail;
+			return;
 	} else {
 		/*  this is also illegal */
-		goto authclnt_fail;
+		return;
 	}
 
 	if (go2asoc) {
 		start_clnt_assoc(padapter);
 		return;
 	}
-authclnt_fail:
-	return;
 }
 
 static void UpdateBrateTbl(u8 *mbrate)
-- 
GitLab


From 3cf90ead512362a1b2b4b9a0d48ec47ca26744c2 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:14 +0200
Subject: [PATCH 0432/4122] staging: r8188eu: remove unnecessary else branch

Remove an else branch in OnAuthClient that is not needed.

If we go into the else branch, go2asoc is 0. We can simply continue and
the last if condition will be false.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-15-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index e2e7c35019f2e..6ac3e41af992f 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -850,9 +850,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 			go2asoc = 1;
 		else
 			return;
-	} else {
-		/*  this is also illegal */
-		return;
 	}
 
 	if (go2asoc) {
-- 
GitLab


From cb2cff04d040eb02555c3c51a045fa31f4cc0e79 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:15 +0200
Subject: [PATCH 0433/4122] staging: r8188eu: remove unnecessary return

Remove a return statement at the end of a function.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-16-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 6ac3e41af992f..a93133a9ca567 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -852,10 +852,8 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 			return;
 	}
 
-	if (go2asoc) {
+	if (go2asoc)
 		start_clnt_assoc(padapter);
-		return;
-	}
 }
 
 static void UpdateBrateTbl(u8 *mbrate)
-- 
GitLab


From d667d36d689eb3b0cd1bd0d09b36cbad9625f10b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:16 +0200
Subject: [PATCH 0434/4122] staging: r8188eu: remove an else branch

If we go into this else branch, go2asoc is 0. We can continue to the end
of the function. The final if condition will be false.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-17-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index a93133a9ca567..ba95d2391319b 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -848,8 +848,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 	} else if (seq == 4) {
 		if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared)
 			go2asoc = 1;
-		else
-			return;
 	}
 
 	if (go2asoc)
-- 
GitLab


From 8d5c6a1df1204280354bfb59316fd056e978a864 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 24 Oct 2022 10:14:17 +0200
Subject: [PATCH 0435/4122] staging: r8188eu: go2asoc is not needed

Remove the go2asoc variable in OnAuthClient and call start_clnt_assoc
directly. This makes the code a tiny bit shorter.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221024081417.66441-18-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index ba95d2391319b..254832a0177e7 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -796,7 +796,6 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 {
 	unsigned int	seq, len, status, offset;
 	unsigned char	*p;
-	unsigned int	go2asoc = 0;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 	u8 *pframe = precv_frame->rx_data;
@@ -843,15 +842,12 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 			return;
 		} else {
 			/*  open system */
-			go2asoc = 1;
+			start_clnt_assoc(padapter);
 		}
 	} else if (seq == 4) {
 		if (pmlmeinfo->auth_algo == dot11AuthAlgrthm_Shared)
-			go2asoc = 1;
+			start_clnt_assoc(padapter);
 	}
-
-	if (go2asoc)
-		start_clnt_assoc(padapter);
 }
 
 static void UpdateBrateTbl(u8 *mbrate)
-- 
GitLab


From ec6d91016437fe8e5fdcb8cc60c14887e588998f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Oct 2022 19:08:04 +0200
Subject: [PATCH 0436/4122] staging: r8188eu: use standard multicast addr check

Use is_multicast_ether_addr to check for a multicast address instead of
reimplementing this check in the driver.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221023170808.46233-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_sta_mgt.c | 2 +-
 drivers/staging/r8188eu/include/wifi.h     | 5 -----
 2 files changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
index bbde5c03f9ae0..51324e708697f 100644
--- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c
+++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
@@ -404,7 +404,7 @@ struct sta_info *rtw_get_stainfo(struct sta_priv *pstapriv, u8 *hwaddr)
 	if (!hwaddr)
 		return NULL;
 
-	if (IS_MCAST(hwaddr))
+	if (is_multicast_ether_addr(hwaddr))
 		addr = bc_addr;
 	else
 		addr = hwaddr;
diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index 381385a7e1189..fdb7c1bf3573c 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -236,11 +236,6 @@ enum WIFI_REG_DOMAIN {
 
 #define GetAddr4Ptr(pbuf)	((unsigned char *)((size_t)(pbuf) + 24))
 
-static inline bool IS_MCAST(unsigned char *da)
-{
-	return (*da) & 0x01;
-}
-
 static inline unsigned char *get_da(unsigned char *pframe)
 {
 	unsigned char	*da;
-- 
GitLab


From 2ce164e9b363ee845287b7e693a1f9e7429ecc08 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Oct 2022 19:08:05 +0200
Subject: [PATCH 0437/4122] staging: r8188eu: don't set pcmd_obj components to
 0

pcmd_obj was allocated with kzalloc, its memory is filled with 0s.
There's no need to set rsp and rspsz to NULL or 0 again.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221023170808.46233-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 254832a0177e7..6c71fbfec32d1 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -6725,9 +6725,6 @@ void report_join_res(struct adapter *padapter, int res)
 	pcmd_obj->cmdsz = cmdsz;
 	pcmd_obj->parmbuf = pevtcmd;
 
-	pcmd_obj->rsp = NULL;
-	pcmd_obj->rspsz  = 0;
-
 	pc2h_evt_hdr = (struct C2HEvent_Header *)(pevtcmd);
 	pc2h_evt_hdr->len = sizeof(struct joinbss_event);
 	pc2h_evt_hdr->ID = GEN_EVT_CODE(_JoinBss);
-- 
GitLab


From 9e9e26190ee651ba868281ae16eab5dc1bcecf0e Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Oct 2022 19:08:06 +0200
Subject: [PATCH 0438/4122] staging: r8188eu: NetworkTypeInUse is not in use

NetworkTypeInUse from struct wlan_bssid_ex is not used. Remove
NetworkTypeInUse itself, the code to set it and the enum for its
possible values.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221023170808.46233-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme.c       | 16 ----------------
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   |  3 ---
 drivers/staging/r8188eu/include/wlan_bssdef.h |  9 ---------
 3 files changed, 28 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c
index 1f69e5c57d5d5..a47ae33454b3d 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme.c
@@ -1819,22 +1819,6 @@ void rtw_update_registrypriv_dev_network(struct adapter *adapter)
 
 	pdev_network->Rssi = 0;
 
-	switch (pregistrypriv->wireless_mode) {
-	case WIRELESS_11B:
-		pdev_network->NetworkTypeInUse = (Ndis802_11DS);
-		break;
-	case WIRELESS_11G:
-	case WIRELESS_11BG:
-	case WIRELESS_11_24N:
-	case WIRELESS_11G_24N:
-	case WIRELESS_11BG_24N:
-		pdev_network->NetworkTypeInUse = (Ndis802_11OFDM24);
-		break;
-	default:
-		/*  TODO */
-		break;
-	}
-
 	pdev_network->Configuration.DSConfig = (pregistrypriv->channel);
 
 	if (cur_network->network.InfrastructureMode == Ndis802_11IBSS)
diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 6c71fbfec32d1..fc37d17f8c7e9 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -6160,9 +6160,6 @@ u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, st
 		memcpy(bssid->SupportedRates + i, (p + 2), len);
 	}
 
-	/* todo: */
-	bssid->NetworkTypeInUse = Ndis802_11OFDM24;
-
 	if (bssid->IELength < 12)
 		return _FAIL;
 
diff --git a/drivers/staging/r8188eu/include/wlan_bssdef.h b/drivers/staging/r8188eu/include/wlan_bssdef.h
index 81bda91a4136b..831c465df5001 100644
--- a/drivers/staging/r8188eu/include/wlan_bssdef.h
+++ b/drivers/staging/r8188eu/include/wlan_bssdef.h
@@ -17,14 +17,6 @@ struct ndis_802_11_ssid {
 	u8  Ssid[32];
 };
 
-enum NDIS_802_11_NETWORK_TYPE {
-	Ndis802_11FH,
-	Ndis802_11DS,
-	Ndis802_11OFDM5,
-	Ndis802_11OFDM24,
-	Ndis802_11NetworkTypeMax    /*  dummy upper bound */
-};
-
 struct ndis_802_11_config_fh {
 	u32           Length;		/*  Length of structure */
 	u32           HopPattern;	/*  As defined by 802.11, MSB set */
@@ -233,7 +225,6 @@ struct wlan_bssid_ex {
 	struct ndis_802_11_ssid  Ssid;
 	u32  Privacy;
 	NDIS_802_11_RSSI  Rssi;/* in dBM,raw data ,get from PHY) */
-	enum  NDIS_802_11_NETWORK_TYPE  NetworkTypeInUse;
 	struct ndis_802_11_config  Configuration;
 	enum ndis_802_11_network_infra  InfrastructureMode;
 	unsigned char SupportedRates[NDIS_802_11_LENGTH_RATES_EX];
-- 
GitLab


From e329c18341b72c8386a1a25f8176ec541e66bf2b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Oct 2022 19:08:07 +0200
Subject: [PATCH 0439/4122] staging: r8188eu: remove wait_ack param from
 _issue_probereq_p2p

The only caller of _issue_probereq_p2p sets wait_ack = false. Remove the
wait_ack parameter and the code that would run only if wait_ack == true.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221023170808.46233-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index fc37d17f8c7e9..ffb708f242e6c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3207,7 +3207,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da)
 	dump_mgntframe(padapter, pmgntframe);
 }
 
-static int _issue_probereq_p2p(struct adapter *padapter, u8 *da, int wait_ack)
+static int _issue_probereq_p2p(struct adapter *padapter, u8 *da)
 {
 	int ret = _FAIL;
 	struct xmit_frame		*pmgntframe;
@@ -3480,12 +3480,8 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da, int wait_ack)
 
 	pattrib->last_txcmdsz = pattrib->pktlen;
 
-	if (wait_ack) {
-		ret = dump_mgntframe_and_wait_ack(padapter, pmgntframe);
-	} else {
-		dump_mgntframe(padapter, pmgntframe);
-		ret = _SUCCESS;
-	}
+	dump_mgntframe(padapter, pmgntframe);
+	ret = _SUCCESS;
 
 exit:
 	return ret;
@@ -3493,7 +3489,7 @@ exit:
 
 inline void issue_probereq_p2p(struct adapter *adapter, u8 *da)
 {
-	_issue_probereq_p2p(adapter, da, false);
+	_issue_probereq_p2p(adapter, da);
 }
 
 static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token)
-- 
GitLab


From 5229004f800219e849969eaa8197dff3b55f4b02 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 23 Oct 2022 19:08:08 +0200
Subject: [PATCH 0440/4122] staging: r8188eu: bCardDisableWOHSM is write-only

bCardDisableWOHSM in struct adapter is only written but never read.
It can be removed.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221023170808.46233-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/drv_types.h | 1 -
 drivers/staging/r8188eu/os_dep/os_intfs.c   | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/drivers/staging/r8188eu/include/drv_types.h b/drivers/staging/r8188eu/include/drv_types.h
index 1bd0c8f3a3588..8fef5759c36ae 100644
--- a/drivers/staging/r8188eu/include/drv_types.h
+++ b/drivers/staging/r8188eu/include/drv_types.h
@@ -167,7 +167,6 @@ struct adapter {
 
 	s32	bDriverStopped;
 	s32	bSurpriseRemoved;
-	s32	bCardDisableWOHSM;
 
 	u8	hw_init_completed;
 	s8	signal_strength;
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 38e324754c8fa..970f380bac961 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -620,7 +620,6 @@ static int _netdev_open(struct net_device *pnetdev)
 	if (!padapter->bup) {
 		padapter->bDriverStopped = false;
 		padapter->bSurpriseRemoved = false;
-		padapter->bCardDisableWOHSM = false;
 
 		status = rtw_hal_init(padapter);
 		if (status == _FAIL)
@@ -686,7 +685,6 @@ static int  ips_netdrv_open(struct adapter *padapter)
 
 	padapter->bDriverStopped = false;
 	padapter->bSurpriseRemoved = false;
-	padapter->bCardDisableWOHSM = false;
 
 	status = rtw_hal_init(padapter);
 	if (status == _FAIL)
@@ -718,13 +716,11 @@ int rtw_ips_pwr_up(struct adapter *padapter)
 
 void rtw_ips_pwr_down(struct adapter *padapter)
 {
-	padapter->bCardDisableWOHSM = true;
 	padapter->net_closed = true;
 
 	rtw_led_control(padapter, LED_CTL_POWER_OFF);
 
 	rtw_ips_dev_unload(padapter);
-	padapter->bCardDisableWOHSM = false;
 }
 
 static void rtw_fifo_cleanup(struct adapter *adapter)
-- 
GitLab


From db213ea614e0b65d4be9c1cb78d14465a2c5b146 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Mon, 24 Oct 2022 19:26:07 +0700
Subject: [PATCH 0441/4122] Staging: rtl8192e: rtllib_tx: fixed multiple blank
 lines

Multiple blank lines are condensed into a single blank line as per
the Linux kernel coding-style regulations. The issues were flagged
by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/a3e044ff9e5b198af15ebc8b91fd048743881340.1666612946.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_tx.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index e307020580a05..abb5b57d142be 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -191,7 +191,6 @@ int rtllib_encrypt_fragment(struct rtllib_device *ieee, struct sk_buff *frag,
 	return 0;
 }
 
-
 void rtllib_txb_free(struct rtllib_txb *txb)
 {
 	if (unlikely(!txb))
@@ -475,7 +474,6 @@ NO_PROTECTION:
 	tcb_desc->bRTSBW	= false;
 }
 
-
 static void rtllib_txrate_selectmode(struct rtllib_device *ieee,
 				     struct cb_desc *tcb_desc)
 {
@@ -582,7 +580,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 		goto success;
 	}
 
-
 	if (likely(ieee->raw_tx == 0)) {
 		if (unlikely(skb->len < SNAP_SIZE + sizeof(u16))) {
 			netdev_warn(ieee->dev, "skb too small (%d).\n",
@@ -895,7 +892,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 				tcb_desc->bTxDisableRateFallBack = 1;
 			}
 
-
 			tcb_desc->RATRIndex = 7;
 			tcb_desc->bTxUseDriverAssingedRate = 1;
 		} else {
@@ -920,7 +916,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 					tcb_desc->bTxDisableRateFallBack = 1;
 				}
 
-
 				tcb_desc->RATRIndex = 7;
 				tcb_desc->bTxUseDriverAssingedRate = 1;
 				tcb_desc->bdhcp = 1;
-- 
GitLab


From 2de698578ff331f98021d53e626e46acc4a91b46 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Mon, 24 Oct 2022 19:26:59 +0700
Subject: [PATCH 0442/4122] Staging: rtl8192e: rtllib_tx: fixed alignment
 matching open parenthesis

Aligned multiple statements to match the open parenthesis on the
line before it as per the Linux kernel coding-style regulations.
The issues were flagged by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/b9a984cf481018a8ef61e4a579cb307eaf1bdc56.1666612946.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_tx.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index abb5b57d142be..38dfdcf99fecc 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -290,7 +290,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 		return;
 	if (pHTInfo->bCurrentAMPDUEnable) {
 		if (!GetTs(ieee, (struct ts_common_info **)(&pTxTs), hdr->addr1,
-		    skb->priority, TX_DIR, true)) {
+			   skb->priority, TX_DIR, true)) {
 			netdev_info(ieee->dev, "%s: can't get TS\n", __func__);
 			return;
 		}
@@ -306,7 +306,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 			goto FORCED_AGG_SETTING;
 		} else if (!pTxTs->bUsingBa) {
 			if (SN_LESS(pTxTs->TxAdmittedBARecord.ba_start_seq_ctrl.field.seq_num,
-			   (pTxTs->TxCurSeq+1)%4096))
+				    (pTxTs->TxCurSeq+1)%4096))
 				pTxTs->bUsingBa = true;
 			else
 				goto FORCED_AGG_SETTING;
@@ -442,7 +442,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 			u8 HTOpMode = pHTInfo->current_op_mode;
 
 			if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 ||
-			     HTOpMode == 3)) ||
+						      HTOpMode == 3)) ||
 			     (!pHTInfo->bCurBW40MHz && HTOpMode == 3)) {
 				tcb_desc->rts_rate = MGN_24M;
 				tcb_desc->bRTSEnable = true;
@@ -501,7 +501,7 @@ static u16 rtllib_query_seqnum(struct rtllib_device *ieee, struct sk_buff *skb,
 		struct tx_ts_record *pTS = NULL;
 
 		if (!GetTs(ieee, (struct ts_common_info **)(&pTS), dst,
-		    skb->priority, TX_DIR, true))
+			   skb->priority, TX_DIR, true))
 			return 0;
 		seqnum = pTS->TxCurSeq;
 		pTS->TxCurSeq = (pTS->TxCurSeq+1)%4096;
@@ -618,7 +618,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 					udp = (struct udphdr *)((u8 *)ip +
 					      (ip->ihl << 2));
 					if (((((u8 *)udp)[1] == 68) &&
-					   (((u8 *)udp)[3] == 67)) ||
+					     (((u8 *)udp)[3] == 67)) ||
 					   ((((u8 *)udp)[1] == 67) &&
 					   (((u8 *)udp)[3] == 68))) {
 						bdhcp = true;
@@ -712,11 +712,11 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			/* in case we are a client verify acm is not set for this ac */
 			while (unlikely(ieee->wmm_acm & (0x01 << skb->priority))) {
 				netdev_info(ieee->dev, "skb->priority = %x\n",
-						skb->priority);
+					    skb->priority);
 				if (wme_downgrade_ac(skb))
 					break;
 				netdev_info(ieee->dev, "converted skb->priority = %x\n",
-					   skb->priority);
+					    skb->priority);
 			}
 
 			qos_ctl |= skb->priority;
@@ -813,7 +813,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			if ((qos_activated) && (!bIsMulticast)) {
 				frag_hdr->seq_ctl =
 					 cpu_to_le16(rtllib_query_seqnum(ieee, skb_frag,
-							     header.addr1));
+									 header.addr1));
 				frag_hdr->seq_ctl =
 					 cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl)<<4 | i);
 			} else {
-- 
GitLab


From 5b773c5504205398ce7ebcf42c5092a7759b556a Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Mon, 24 Oct 2022 19:27:59 +0700
Subject: [PATCH 0443/4122] Staging: rtl8192e: rtllib_tx: added spaces around
 operators

Added multiple spaces around operators which needs it as per the
Linux kernel coding-style regulations. The issues were flagged by
the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/b8bf422436ff4b2d9b530c61ebf814582ab8836a.1666612946.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_tx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index 38dfdcf99fecc..fa998cd769176 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -306,7 +306,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 			goto FORCED_AGG_SETTING;
 		} else if (!pTxTs->bUsingBa) {
 			if (SN_LESS(pTxTs->TxAdmittedBARecord.ba_start_seq_ctrl.field.seq_num,
-				    (pTxTs->TxCurSeq+1)%4096))
+				    (pTxTs->TxCurSeq + 1) % 4096))
 				pTxTs->bUsingBa = true;
 			else
 				goto FORCED_AGG_SETTING;
@@ -403,7 +403,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 	if (tcb_desc->bBroadcast || tcb_desc->bMulticast)
 		return;
 
-	if (is_broadcast_ether_addr(skb->data+16))
+	if (is_broadcast_ether_addr(skb->data + 16))
 		return;
 
 	if (ieee->mode < IEEE_N_24G) {
@@ -504,7 +504,7 @@ static u16 rtllib_query_seqnum(struct rtllib_device *ieee, struct sk_buff *skb,
 			   skb->priority, TX_DIR, true))
 			return 0;
 		seqnum = pTS->TxCurSeq;
-		pTS->TxCurSeq = (pTS->TxCurSeq+1)%4096;
+		pTS->TxCurSeq = (pTS->TxCurSeq + 1) % 4096;
 		return seqnum;
 	}
 	return 0;
@@ -611,7 +611,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 		if (skb->len > 282) {
 			if (ether_type == ETH_P_IP) {
 				const struct iphdr *ip = (struct iphdr *)
-					((u8 *)skb->data+14);
+					((u8 *)skb->data + 14);
 				if (ip->protocol == IPPROTO_UDP) {
 					struct udphdr *udp;
 
@@ -815,10 +815,10 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 					 cpu_to_le16(rtllib_query_seqnum(ieee, skb_frag,
 									 header.addr1));
 				frag_hdr->seq_ctl =
-					 cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl)<<4 | i);
+					 cpu_to_le16(le16_to_cpu(frag_hdr->seq_ctl) << 4 | i);
 			} else {
 				frag_hdr->seq_ctl =
-					 cpu_to_le16(ieee->seq_ctrl[0]<<4 | i);
+					 cpu_to_le16(ieee->seq_ctrl[0] << 4 | i);
 			}
 			/* Put a SNAP header on the first fragment */
 			if (i == 0) {
-- 
GitLab


From 73fb5660fc300ecd9a611e1997c68bc81c8098db Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Mon, 24 Oct 2022 19:29:14 +0700
Subject: [PATCH 0444/4122] Staging: rtl8192e: rtllib_tx: fixed lines ending
 with an open parenthesis

Fixed several lines that end with an open parenthesis to not end
with it anymore, as per the Linux kernel coding-style regulations.
The issues were flagged by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/a8e67d281b4e9ab1254e0b58294f566994ac7d9d.1666612946.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_tx.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index fa998cd769176..4647efb0f868b 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -802,8 +802,8 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			 * MOREFRAGS bit to the frame control
 			 */
 			if (i != nr_frags - 1) {
-				frag_hdr->frame_ctl = cpu_to_le16(
-					fc | RTLLIB_FCTL_MOREFRAGS);
+				frag_hdr->frame_ctl = cpu_to_le16(fc |
+								  RTLLIB_FCTL_MOREFRAGS);
 				bytes = bytes_per_frag;
 
 			} else {
@@ -822,9 +822,9 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			}
 			/* Put a SNAP header on the first fragment */
 			if (i == 0) {
-				rtllib_put_snap(
-					skb_put(skb_frag, SNAP_SIZE +
-					sizeof(u16)), ether_type);
+				rtllib_put_snap(skb_put(skb_frag,
+							SNAP_SIZE +
+							sizeof(u16)), ether_type);
 				bytes -= SNAP_SIZE + sizeof(u16);
 			}
 
-- 
GitLab


From 438b4ade12582c358a6e6ab762408923f1af2d44 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Mon, 24 Oct 2022 19:30:28 +0700
Subject: [PATCH 0445/4122] Staging: rtl8192e: rtllib_tx: removed unnecessary
 blank line before a close brace

Removed a blank line before a closing brace as it is not necessary
as per the Linux coding-style regulations. The issue was flagged by
the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/234a3e8d21080ae2a1f75127a5c1a0ed131db924.1666612946.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_tx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index 4647efb0f868b..101f441291451 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -954,7 +954,6 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 	netif_stop_queue(dev);
 	stats->tx_errors++;
 	return 1;
-
 }
 
 netdev_tx_t rtllib_xmit(struct sk_buff *skb, struct net_device *dev)
-- 
GitLab


From 6e006508f6e20a0f09dd9e64bc23c0592d92cc2d Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Mon, 24 Oct 2022 14:47:25 -0700
Subject: [PATCH 0446/4122] staging: rtl8723bs: Fix indentation in conditional
 statements

Remove/add tabs in block statements in rtl8723bs/core to fix checkpatch
warnings for suspect code indent for conditionals.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/Y1cH7br3mMcT4Dm5@marshmallow
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_mlme.c     | 30 +++++++++----------
 drivers/staging/rtl8723bs/core/rtw_mlme_ext.c |  2 +-
 drivers/staging/rtl8723bs/core/rtw_recv.c     |  6 ++--
 drivers/staging/rtl8723bs/core/rtw_sta_mgt.c  |  2 +-
 4 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme.c b/drivers/staging/rtl8723bs/core/rtw_mlme.c
index 6498fd17e1d3e..c6fd6cf741ef6 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme.c
@@ -389,7 +389,7 @@ int is_same_network(struct wlan_bssid_ex *src, struct wlan_bssid_ex *dst, u8 fea
 	__le16 tmps, tmpd;
 
 	if (rtw_bug_check(dst, src, &s_cap, &d_cap) == false)
-			return false;
+		return false;
 
 	memcpy((u8 *)&tmps, rtw_get_capability_from_ie(src->ies), 2);
 	memcpy((u8 *)&tmpd, rtw_get_capability_from_ie(dst->ies), 2);
@@ -669,7 +669,7 @@ int rtw_is_desired_network(struct adapter *adapter, struct wlan_network *pnetwor
 		uint ie_len = 0;
 
 		if ((desired_encmode == Ndis802_11EncryptionDisabled) && (privacy != 0))
-	    bselected = false;
+			bselected = false;
 
 		if (psecuritypriv->ndisauthtype == Ndis802_11AuthModeWPA2PSK) {
 			p = rtw_get_ie(pnetwork->network.ies + _BEACON_IE_OFFSET_, WLAN_EID_RSN, &ie_len, (pnetwork->network.ie_length - _BEACON_IE_OFFSET_));
@@ -795,7 +795,7 @@ void rtw_surveydone_event_callback(struct adapter	*adapter, u8 *pbuf)
 			pmlmepriv->to_join = false;
 			s_ret = rtw_select_and_join_from_scanned_queue(pmlmepriv);
 			if (s_ret == _SUCCESS) {
-			     _set_timer(&pmlmepriv->assoc_timer, MAX_JOIN_TIMEOUT);
+				_set_timer(&pmlmepriv->assoc_timer, MAX_JOIN_TIMEOUT);
 			} else if (s_ret == 2) {/* there is no need to wait for join */
 				_clr_fwstate_(pmlmepriv, _FW_UNDER_LINKING);
 				rtw_indicate_connect(adapter);
@@ -2010,8 +2010,8 @@ int rtw_restruct_wmm_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, uint in_
 
 		if (in_ie[i] == 0xDD && in_ie[i+2] == 0x00 && in_ie[i+3] == 0x50  && in_ie[i+4] == 0xF2 && in_ie[i+5] == 0x02 && i+5 < in_len) { /* WMM element ID and OUI */
 			for (j = i; j < i + 9; j++) {
-					out_ie[ielength] = in_ie[j];
-					ielength++;
+				out_ie[ielength] = in_ie[j];
+				ielength++;
 			}
 			out_ie[initial_out_len + 1] = 0x07;
 			out_ie[initial_out_len + 6] = 0x00;
@@ -2064,15 +2064,13 @@ static int rtw_append_pmkid(struct adapter *Adapter, int iEntry, u8 *ie, uint ie
 
 	if (ie[13] <= 20) {
 		/*  The RSN IE didn't include the PMK ID, append the PMK information */
-			ie[ie_len] = 1;
-			ie_len++;
-			ie[ie_len] = 0;	/* PMKID count = 0x0100 */
-			ie_len++;
-			memcpy(&ie[ie_len], &psecuritypriv->PMKIDList[iEntry].PMKID, 16);
-
-			ie_len += 16;
-			ie[13] += 18;/* PMKID length = 2+16 */
-
+		ie[ie_len] = 1;
+		ie_len++;
+		ie[ie_len] = 0;	/* PMKID count = 0x0100 */
+		ie_len++;
+		memcpy(&ie[ie_len], &psecuritypriv->PMKIDList[iEntry].PMKID, 16);
+		ie_len += 16;
+		ie[13] += 18;/* PMKID length = 2+16 */
 	}
 	return ie_len;
 }
@@ -2091,9 +2089,9 @@ signed int rtw_restruct_sec_ie(struct adapter *adapter, u8 *in_ie, u8 *out_ie, u
 	memcpy(out_ie, in_ie, 12);
 	ielength = 12;
 	if ((ndisauthmode == Ndis802_11AuthModeWPA) || (ndisauthmode == Ndis802_11AuthModeWPAPSK))
-			authmode = WLAN_EID_VENDOR_SPECIFIC;
+		authmode = WLAN_EID_VENDOR_SPECIFIC;
 	if ((ndisauthmode == Ndis802_11AuthModeWPA2) || (ndisauthmode == Ndis802_11AuthModeWPA2PSK))
-			authmode = WLAN_EID_RSN;
+		authmode = WLAN_EID_RSN;
 
 	if (check_fwstate(pmlmepriv, WIFI_UNDER_WPS)) {
 		memcpy(out_ie+ielength, psecuritypriv->wps_ie, psecuritypriv->wps_ie_len);
diff --git a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
index 8e74b4f47b947..1148c9829890c 100644
--- a/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
+++ b/drivers/staging/rtl8723bs/core/rtw_mlme_ext.c
@@ -5447,7 +5447,7 @@ u8 disconnect_hdl(struct adapter *padapter, unsigned char *pbuf)
 	u8 val8;
 
 	if (is_client_associated_to_ap(padapter))
-			issue_deauth_ex(padapter, pnetwork->mac_address, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms/100, 100);
+		issue_deauth_ex(padapter, pnetwork->mac_address, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms/100, 100);
 
 	if (((pmlmeinfo->state&0x03) == WIFI_FW_ADHOC_STATE) || ((pmlmeinfo->state&0x03) == WIFI_FW_AP_STATE)) {
 		/* Stop BCN */
diff --git a/drivers/staging/rtl8723bs/core/rtw_recv.c b/drivers/staging/rtl8723bs/core/rtw_recv.c
index 2825375bff942..7c7b6495965f5 100644
--- a/drivers/staging/rtl8723bs/core/rtw_recv.c
+++ b/drivers/staging/rtl8723bs/core/rtw_recv.c
@@ -161,7 +161,7 @@ int rtw_free_recvframe(union recv_frame *precvframe, struct __queue *pfree_recv_
 
 	if (padapter) {
 		if (pfree_recv_queue == &precvpriv->free_recv_queue)
-				precvpriv->free_recvframe_cnt++;
+			precvpriv->free_recvframe_cnt++;
 	}
 	spin_unlock_bh(&pfree_recv_queue->lock);
 	return _SUCCESS;
@@ -691,8 +691,8 @@ static signed int sta2sta_data_frame(struct adapter *adapter, union recv_frame *
 		if (bmcast) {
 			/*  For AP mode, if DA == MCAST, then BSSID should be also MCAST */
 			if (!IS_MCAST(pattrib->bssid)) {
-					ret = _FAIL;
-					goto exit;
+				ret = _FAIL;
+				goto exit;
 			}
 		} else { /*  not mc-frame */
 			/*  For AP mode, if DA is non-MCAST, then it must be BSSID, and bssid == BSSID */
diff --git a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
index beb11d89db186..c7de81f21bec0 100644
--- a/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
+++ b/drivers/staging/rtl8723bs/core/rtw_sta_mgt.c
@@ -551,7 +551,7 @@ u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr)
 	else if (pacl_list->mode == 2)/* deny unless in accept list */
 		res = match;
 	else
-		 res = true;
+		res = true;
 
 	return res;
 }
-- 
GitLab


From 5f803b22bd2befdb43dfbee69d90932edd3cc1d0 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Tue, 25 Oct 2022 17:44:02 +0700
Subject: [PATCH 0447/4122] staging: rtl8192e: rtllib_crypt_wep: multiple blank
 lines removal

Removal of multiple unnecessary blank lines in accordance with the
Linux kernel coding-style regulations. Said issues ware detected on
this file by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/20221025104402.xvbfobi7sdnwlqs2@plymouth
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_wep.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_wep.c b/drivers/staging/rtl8192e/rtllib_crypt_wep.c
index 7790271a6a40d..062285e4d9398 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_wep.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_wep.c
@@ -27,7 +27,6 @@ struct prism2_wep_data {
 	struct arc4_ctx tx_ctx_arc4;
 };
 
-
 static void *prism2_wep_init(int keyidx)
 {
 	struct prism2_wep_data *priv;
@@ -46,7 +45,6 @@ static void *prism2_wep_init(int keyidx)
 	return priv;
 }
 
-
 static void prism2_wep_deinit(void *priv)
 {
 	kfree_sensitive(priv);
@@ -120,7 +118,6 @@ static int prism2_wep_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	return 0;
 }
 
-
 /* Perform WEP decryption on given struct buffer. Buffer includes whole WEP
  * part of the frame: IV (4 bytes), encrypted payload (including SNAP header),
  * ICV (4 bytes). len includes both IV and ICV.
@@ -180,7 +177,6 @@ static int prism2_wep_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	return 0;
 }
 
-
 static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
@@ -194,7 +190,6 @@ static int prism2_wep_set_key(void *key, int len, u8 *seq, void *priv)
 	return 0;
 }
 
-
 static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
@@ -207,7 +202,6 @@ static int prism2_wep_get_key(void *key, int len, u8 *seq, void *priv)
 	return wep->key_len;
 }
 
-
 static void prism2_wep_print_stats(struct seq_file *m, void *priv)
 {
 	struct prism2_wep_data *wep = priv;
@@ -231,13 +225,11 @@ static struct lib80211_crypto_ops rtllib_crypt_wep = {
 	.owner			= THIS_MODULE,
 };
 
-
 static int __init rtllib_crypto_wep_init(void)
 {
 	return lib80211_register_crypto_ops(&rtllib_crypt_wep);
 }
 
-
 static void __exit rtllib_crypto_wep_exit(void)
 {
 	lib80211_unregister_crypto_ops(&rtllib_crypt_wep);
-- 
GitLab


From 8cd75652300f957066b86e4ede5525c756fe7a42 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Tue, 25 Oct 2022 17:27:22 +0200
Subject: [PATCH 0448/4122] staging: r8188eu: remove unused macros from wifi.h

There are some unused macros in wifi.h. Remove them.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221025152722.14926-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/wifi.h | 27 --------------------------
 1 file changed, 27 deletions(-)

diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index fdb7c1bf3573c..6b50089cea294 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -140,7 +140,6 @@ enum WIFI_REG_DOMAIN {
 #define _PWRMGT_	BIT(12)
 #define _MORE_DATA_	BIT(13)
 #define _PRIVACY_	BIT(14)
-#define _ORDER_		BIT(15)
 
 #define SetToDs(pbuf)	\
 	*(__le16 *)(pbuf) |= cpu_to_le16(_TO_DS_)
@@ -187,17 +186,6 @@ enum WIFI_REG_DOMAIN {
 		*(__le16 *)(pbuf) |= cpu_to_le16(type); \
 	} while (0)
 
-#define GetTupleCache(pbuf)			\
-	(cpu_to_le16(*(unsigned short *)((size_t)(pbuf) + 22)))
-
-#define SetFragNum(pbuf, num) \
-	do {    \
-		*(unsigned short *)((size_t)(pbuf) + 22) = \
-			((*(unsigned short *)((size_t)(pbuf) + 22)) &	\
-			le16_to_cpu(~(0x000f))) | \
-			cpu_to_le16(0x0f & (num));     \
-	} while (0)
-
 #define SetSeqNum(pbuf, num) \
 	do {    \
 		*(__le16 *)((size_t)(pbuf) + 22) = \
@@ -221,13 +209,6 @@ enum WIFI_REG_DOMAIN {
 
 #define GetAMsdu(pbuf) (((le16_to_cpu(*(__le16 *)pbuf)) >> 7) & 0x1)
 
-#define SetAMsdu(pbuf, amsdu)	\
-	*(__le16 *)(pbuf) |= cpu_to_le16((amsdu & 1) << 7)
-
-#define GetTid(pbuf)	(le16_to_cpu(*(__le16 *)((size_t)(pbuf) +	\
-			(((GetToDs(pbuf)<<1) | GetFrDs(pbuf)) == 3 ?	\
-			30 : 24))) & 0x000f)
-
 #define GetAddr1Ptr(pbuf)	((unsigned char *)((size_t)(pbuf) + 4))
 
 #define GetAddr2Ptr(pbuf)	((unsigned char *)((size_t)(pbuf) + 10))
@@ -410,14 +391,6 @@ static inline unsigned char *get_hdr_bssid(unsigned char *pframe)
 				Below is the definition for 802.11n
 ------------------------------------------------------------------------------*/
 
-#define SetOrderBit(pbuf)	\
-	do	{	\
-		*(unsigned short *)(pbuf) |= cpu_to_le16(_ORDER_); \
-	} while (0)
-
-#define GetOrderBit(pbuf)			\
-	(((*(unsigned short *)(pbuf)) & le16_to_cpu(_ORDER_)) != 0)
-
 /**
  * struct rtw_ieee80211_bar - HT Block Ack Request
  *
-- 
GitLab


From c3db3c2fd9992c08f49aa93752d3c103c3a4f6aa Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@denx.de>
Date: Mon, 24 Oct 2022 19:30:12 +0200
Subject: [PATCH 0449/4122] f2fs: should put a page when checking the summary
 info

The commit introduces another bug.

Cc: stable@vger.kernel.org
Fixes: c6ad7fd16657e ("f2fs: fix to do sanity check on summary info")
Signed-off-by: Pavel Machek <pavel@denx.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 4546e01b2ee08..dab794225cce7 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1110,6 +1110,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	if (ofs_in_node >= max_addrs) {
 		f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u",
 			ofs_in_node, dni->ino, dni->nid, max_addrs);
+		f2fs_put_page(node_page, 1);
 		return false;
 	}
 
-- 
GitLab


From 14dc00a0e2dbea4b685ab9723ff511fcfd223c18 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 17 Oct 2022 17:52:05 -0700
Subject: [PATCH 0450/4122] f2fs: let's avoid to get cp_rwsem twice by
 f2fs_evict_inode by d_invalidate

f2fs_unlink
 -> f2fs_lock_op
 -> d_invalidate
  -> shrink_dentry_list
   -> iput_final
    -> f2fs_evict_inode
     -> f2fs_lock_op

Reviewed-by: Chao Yu <chao@kernel.org>
Tested-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a389772fd212a..e104409c3a0e5 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -632,6 +632,8 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 		goto fail;
 	}
 	f2fs_delete_entry(de, page, dir, inode);
+	f2fs_unlock_op(sbi);
+
 #if IS_ENABLED(CONFIG_UNICODE)
 	/* VFS negative dentries are incompatible with Encoding and
 	 * Case-insensitiveness. Eventually we'll want avoid
@@ -642,8 +644,6 @@ static int f2fs_unlink(struct inode *dir, struct dentry *dentry)
 	if (IS_CASEFOLDED(dir))
 		d_invalidate(dentry);
 #endif
-	f2fs_unlock_op(sbi);
-
 	if (IS_DIRSYNC(dir))
 		f2fs_sync_fs(sbi->sb, 1);
 fail:
-- 
GitLab


From ae25e00ba84073450c07d8ffd2d74f914a027230 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 25 Oct 2022 18:32:49 +0300
Subject: [PATCH 0451/4122] x86/retpoline: Fix crash printing warning

The first argument of WARN() is a condition, so this will use "addr"
as the format string and possibly crash.

Fixes: 3b6c1747da48 ("x86/retpoline: Add SKL retthunk retpolines")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Link: https://lore.kernel.org/all/Y1gBoUZrRK5N%2FlCB@kili/
---
 arch/x86/kernel/alternative.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 19221d77dc275..b4ac4e58c0106 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -418,7 +418,7 @@ clang_jcc:
 		break;
 
 	default:
-		WARN("%pS %px %*ph\n", addr, addr, 6, addr);
+		WARN(1, "%pS %px %*ph\n", addr, addr, 6, addr);
 		return -1;
 	}
 
-- 
GitLab


From d233ab3c5c5ed4b3d2201bddb71dab5a2946c31b Mon Sep 17 00:00:00 2001
From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Date: Sun, 25 Sep 2022 02:47:57 +0200
Subject: [PATCH 0452/4122] riscv/vdso: typo therefor

The adverbs 'therefor' and 'therefore' have different meaning.
As the meaning here is 'consequently' the spelling should be 'therefore'.

Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20220925004757.9089-1-heinrich.schuchardt@canonical.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/vdso.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/vdso.h b/arch/riscv/include/asm/vdso.h
index af981426fe0ff..a7644f46d0e56 100644
--- a/arch/riscv/include/asm/vdso.h
+++ b/arch/riscv/include/asm/vdso.h
@@ -10,7 +10,7 @@
 
 /*
  * All systems with an MMU have a VDSO, but systems without an MMU don't
- * support shared libraries and therefor don't have one.
+ * support shared libraries and therefore don't have one.
  */
 #ifdef CONFIG_MMU
 
-- 
GitLab


From 079f0c21ef6d79f80b19b64f5e0218d5a328c4cd Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Thu, 20 Oct 2022 16:31:55 +0200
Subject: [PATCH 0453/4122] s390/mm: gmap: sort out physical vs virtual
 pointers usage

Fix virtual vs physical address confusion (which currently are the same).

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221020143159.294605-2-nrb@linux.ibm.com
Message-Id: <20221020143159.294605-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/mm/gmap.c | 147 +++++++++++++++++++++++---------------------
 1 file changed, 76 insertions(+), 71 deletions(-)

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 02d15c8dc92e9..2ccfcc8a3863e 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -72,7 +72,7 @@ static struct gmap *gmap_alloc(unsigned long limit)
 		goto out_free;
 	page->index = 0;
 	list_add(&page->lru, &gmap->crst_list);
-	table = (unsigned long *) page_to_phys(page);
+	table = page_to_virt(page);
 	crst_table_init(table, etype);
 	gmap->table = table;
 	gmap->asce = atype | _ASCE_TABLE_LENGTH |
@@ -311,12 +311,12 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 	page = alloc_pages(GFP_KERNEL_ACCOUNT, CRST_ALLOC_ORDER);
 	if (!page)
 		return -ENOMEM;
-	new = (unsigned long *) page_to_phys(page);
+	new = page_to_virt(page);
 	crst_table_init(new, init);
 	spin_lock(&gmap->guest_table_lock);
 	if (*table & _REGION_ENTRY_INVALID) {
 		list_add(&page->lru, &gmap->crst_list);
-		*table = (unsigned long) new | _REGION_ENTRY_LENGTH |
+		*table = __pa(new) | _REGION_ENTRY_LENGTH |
 			(*table & _REGION_ENTRY_TYPE_MASK);
 		page->index = gaddr;
 		page = NULL;
@@ -557,7 +557,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY,
 				     gaddr & _REGION1_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) {
 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -565,7 +565,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY,
 				     gaddr & _REGION2_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) {
 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -573,7 +573,7 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr)
 		    gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY,
 				     gaddr & _REGION3_MASK))
 			return -ENOMEM;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 	}
 	table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
 	/* Walk the parent mm page table */
@@ -813,7 +813,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION2:
 		table += (gaddr & _REGION2_INDEX) >> _REGION2_SHIFT;
@@ -821,7 +821,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_REGION3:
 		table += (gaddr & _REGION3_INDEX) >> _REGION3_SHIFT;
@@ -829,7 +829,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN);
+		table = __va(*table & _REGION_ENTRY_ORIGIN);
 		fallthrough;
 	case _ASCE_TYPE_SEGMENT:
 		table += (gaddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT;
@@ -837,7 +837,7 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap,
 			break;
 		if (*table & _REGION_ENTRY_INVALID)
 			return NULL;
-		table = (unsigned long *)(*table & _SEGMENT_ENTRY_ORIGIN);
+		table = __va(*table & _SEGMENT_ENTRY_ORIGIN);
 		table += (gaddr & _PAGE_INDEX) >> _PAGE_SHIFT;
 	}
 	return table;
@@ -1150,7 +1150,7 @@ int gmap_read_table(struct gmap *gmap, unsigned long gaddr, unsigned long *val)
 			if (pte_present(pte) && (pte_val(pte) & _PAGE_READ)) {
 				address = pte_val(pte) & PAGE_MASK;
 				address += gaddr & ~PAGE_MASK;
-				*val = *(unsigned long *) address;
+				*val = *(unsigned long *)__va(address);
 				set_pte(ptep, set_pte_bit(*ptep, __pgprot(_PAGE_YOUNG)));
 				/* Do *NOT* clear the _PAGE_INVALID bit! */
 				rc = 0;
@@ -1335,7 +1335,8 @@ static void __gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long sto, *ste, *pgt;
+	unsigned long *ste;
+	phys_addr_t sto, pgt;
 	struct page *page;
 
 	BUG_ON(!gmap_is_shadow(sg));
@@ -1343,13 +1344,13 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 	if (!ste || !(*ste & _SEGMENT_ENTRY_ORIGIN))
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _SEGMENT_SIZE - 1);
-	sto = (unsigned long) (ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
+	sto = __pa(ste - ((raddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT));
 	gmap_idte_one(sto | _ASCE_TYPE_SEGMENT, raddr);
-	pgt = (unsigned long *)(*ste & _SEGMENT_ENTRY_ORIGIN);
+	pgt = *ste & _SEGMENT_ENTRY_ORIGIN;
 	*ste = _SEGMENT_ENTRY_EMPTY;
-	__gmap_unshadow_pgt(sg, raddr, pgt);
+	__gmap_unshadow_pgt(sg, raddr, __va(pgt));
 	/* Free page table */
-	page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+	page = phys_to_page(pgt);
 	list_del(&page->lru);
 	page_table_free_pgste(page);
 }
@@ -1365,19 +1366,19 @@ static void gmap_unshadow_pgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
 				unsigned long *sgt)
 {
-	unsigned long *pgt;
 	struct page *page;
+	phys_addr_t pgt;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
 		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
 			continue;
-		pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+		pgt = sgt[i] & _REGION_ENTRY_ORIGIN;
 		sgt[i] = _SEGMENT_ENTRY_EMPTY;
-		__gmap_unshadow_pgt(sg, raddr, pgt);
+		__gmap_unshadow_pgt(sg, raddr, __va(pgt));
 		/* Free page table */
-		page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+		page = phys_to_page(pgt);
 		list_del(&page->lru);
 		page_table_free_pgste(page);
 	}
@@ -1392,7 +1393,8 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r3o, *r3e, *sgt;
+	unsigned long r3o, *r3e;
+	phys_addr_t sgt;
 	struct page *page;
 
 	BUG_ON(!gmap_is_shadow(sg));
@@ -1401,12 +1403,12 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION3_SIZE - 1);
 	r3o = (unsigned long) (r3e - ((raddr & _REGION3_INDEX) >> _REGION3_SHIFT));
-	gmap_idte_one(r3o | _ASCE_TYPE_REGION3, raddr);
-	sgt = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r3o) | _ASCE_TYPE_REGION3, raddr);
+	sgt = *r3e & _REGION_ENTRY_ORIGIN;
 	*r3e = _REGION3_ENTRY_EMPTY;
-	__gmap_unshadow_sgt(sg, raddr, sgt);
+	__gmap_unshadow_sgt(sg, raddr, __va(sgt));
 	/* Free segment table */
-	page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+	page = phys_to_page(sgt);
 	list_del(&page->lru);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
@@ -1422,19 +1424,19 @@ static void gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r3t)
 {
-	unsigned long *sgt;
 	struct page *page;
+	phys_addr_t sgt;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION3_SIZE) {
 		if (!(r3t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		sgt = (unsigned long *)(r3t[i] & _REGION_ENTRY_ORIGIN);
+		sgt = r3t[i] & _REGION_ENTRY_ORIGIN;
 		r3t[i] = _REGION3_ENTRY_EMPTY;
-		__gmap_unshadow_sgt(sg, raddr, sgt);
+		__gmap_unshadow_sgt(sg, raddr, __va(sgt));
 		/* Free segment table */
-		page = pfn_to_page(__pa(sgt) >> PAGE_SHIFT);
+		page = phys_to_page(sgt);
 		list_del(&page->lru);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
@@ -1449,7 +1451,8 @@ static void __gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r2o, *r2e, *r3t;
+	unsigned long r2o, *r2e;
+	phys_addr_t r3t;
 	struct page *page;
 
 	BUG_ON(!gmap_is_shadow(sg));
@@ -1458,12 +1461,12 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION2_SIZE - 1);
 	r2o = (unsigned long) (r2e - ((raddr & _REGION2_INDEX) >> _REGION2_SHIFT));
-	gmap_idte_one(r2o | _ASCE_TYPE_REGION2, raddr);
-	r3t = (unsigned long *)(*r2e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r2o) | _ASCE_TYPE_REGION2, raddr);
+	r3t = *r2e & _REGION_ENTRY_ORIGIN;
 	*r2e = _REGION2_ENTRY_EMPTY;
-	__gmap_unshadow_r3t(sg, raddr, r3t);
+	__gmap_unshadow_r3t(sg, raddr, __va(r3t));
 	/* Free region 3 table */
-	page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+	page = phys_to_page(r3t);
 	list_del(&page->lru);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
@@ -1479,7 +1482,7 @@ static void gmap_unshadow_r3t(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r2t)
 {
-	unsigned long *r3t;
+	phys_addr_t r3t;
 	struct page *page;
 	int i;
 
@@ -1487,11 +1490,11 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION2_SIZE) {
 		if (!(r2t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		r3t = (unsigned long *)(r2t[i] & _REGION_ENTRY_ORIGIN);
+		r3t = r2t[i] & _REGION_ENTRY_ORIGIN;
 		r2t[i] = _REGION2_ENTRY_EMPTY;
-		__gmap_unshadow_r3t(sg, raddr, r3t);
+		__gmap_unshadow_r3t(sg, raddr, __va(r3t));
 		/* Free region 3 table */
-		page = pfn_to_page(__pa(r3t) >> PAGE_SHIFT);
+		page = phys_to_page(r3t);
 		list_del(&page->lru);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
@@ -1506,8 +1509,9 @@ static void __gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr,
  */
 static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 {
-	unsigned long r1o, *r1e, *r2t;
+	unsigned long r1o, *r1e;
 	struct page *page;
+	phys_addr_t r2t;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	r1e = gmap_table_walk(sg, raddr, 4); /* get region-1 pointer */
@@ -1515,12 +1519,12 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 		return;
 	gmap_call_notifier(sg, raddr, raddr + _REGION1_SIZE - 1);
 	r1o = (unsigned long) (r1e - ((raddr & _REGION1_INDEX) >> _REGION1_SHIFT));
-	gmap_idte_one(r1o | _ASCE_TYPE_REGION1, raddr);
-	r2t = (unsigned long *)(*r1e & _REGION_ENTRY_ORIGIN);
+	gmap_idte_one(__pa(r1o) | _ASCE_TYPE_REGION1, raddr);
+	r2t = *r1e & _REGION_ENTRY_ORIGIN;
 	*r1e = _REGION1_ENTRY_EMPTY;
-	__gmap_unshadow_r2t(sg, raddr, r2t);
+	__gmap_unshadow_r2t(sg, raddr, __va(r2t));
 	/* Free region 2 table */
-	page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+	page = phys_to_page(r2t);
 	list_del(&page->lru);
 	__free_pages(page, CRST_ALLOC_ORDER);
 }
@@ -1536,22 +1540,23 @@ static void gmap_unshadow_r2t(struct gmap *sg, unsigned long raddr)
 static void __gmap_unshadow_r1t(struct gmap *sg, unsigned long raddr,
 				unsigned long *r1t)
 {
-	unsigned long asce, *r2t;
+	unsigned long asce;
 	struct page *page;
+	phys_addr_t r2t;
 	int i;
 
 	BUG_ON(!gmap_is_shadow(sg));
-	asce = (unsigned long) r1t | _ASCE_TYPE_REGION1;
+	asce = __pa(r1t) | _ASCE_TYPE_REGION1;
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _REGION1_SIZE) {
 		if (!(r1t[i] & _REGION_ENTRY_ORIGIN))
 			continue;
-		r2t = (unsigned long *)(r1t[i] & _REGION_ENTRY_ORIGIN);
-		__gmap_unshadow_r2t(sg, raddr, r2t);
+		r2t = r1t[i] & _REGION_ENTRY_ORIGIN;
+		__gmap_unshadow_r2t(sg, raddr, __va(r2t));
 		/* Clear entry and flush translation r1t -> r2t */
 		gmap_idte_one(asce, raddr);
 		r1t[i] = _REGION1_ENTRY_EMPTY;
 		/* Free region 2 table */
-		page = pfn_to_page(__pa(r2t) >> PAGE_SHIFT);
+		page = phys_to_page(r2t);
 		list_del(&page->lru);
 		__free_pages(page, CRST_ALLOC_ORDER);
 	}
@@ -1573,7 +1578,7 @@ static void gmap_unshadow(struct gmap *sg)
 	sg->removed = 1;
 	gmap_call_notifier(sg, 0, -1UL);
 	gmap_flush_tlb(sg);
-	table = (unsigned long *)(sg->asce & _ASCE_ORIGIN);
+	table = __va(sg->asce & _ASCE_ORIGIN);
 	switch (sg->asce & _ASCE_TYPE_MASK) {
 	case _ASCE_TYPE_REGION1:
 		__gmap_unshadow_r1t(sg, 0, table);
@@ -1748,7 +1753,8 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_r2t, *table;
+	unsigned long *table;
+	phys_addr_t s_r2t;
 	struct page *page;
 	int rc;
 
@@ -1760,7 +1766,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 	page->index = r2t & _REGION_ENTRY_ORIGIN;
 	if (fake)
 		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_r2t = (unsigned long *) page_to_phys(page);
+	s_r2t = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 4); /* get region-1 pointer */
@@ -1775,9 +1781,9 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_r2t, _REGION2_ENTRY_EMPTY);
+	crst_table_init(__va(s_r2t), _REGION2_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_r2t | _REGION_ENTRY_LENGTH |
+	*table = s_r2t | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= (r2t & _REGION_ENTRY_PROTECT);
@@ -1798,8 +1804,7 @@ int gmap_shadow_r2t(struct gmap *sg, unsigned long saddr, unsigned long r2t,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 4);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_r2t)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r2t)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -1832,7 +1837,8 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_r3t, *table;
+	unsigned long *table;
+	phys_addr_t s_r3t;
 	struct page *page;
 	int rc;
 
@@ -1844,7 +1850,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 	page->index = r3t & _REGION_ENTRY_ORIGIN;
 	if (fake)
 		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_r3t = (unsigned long *) page_to_phys(page);
+	s_r3t = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 3); /* get region-2 pointer */
@@ -1859,9 +1865,9 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_r3t, _REGION3_ENTRY_EMPTY);
+	crst_table_init(__va(s_r3t), _REGION3_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_r3t | _REGION_ENTRY_LENGTH |
+	*table = s_r3t | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= (r3t & _REGION_ENTRY_PROTECT);
@@ -1882,8 +1888,7 @@ int gmap_shadow_r3t(struct gmap *sg, unsigned long saddr, unsigned long r3t,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 3);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_r3t)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_r3t)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -1916,7 +1921,8 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		    int fake)
 {
 	unsigned long raddr, origin, offset, len;
-	unsigned long *s_sgt, *table;
+	unsigned long *table;
+	phys_addr_t s_sgt;
 	struct page *page;
 	int rc;
 
@@ -1928,7 +1934,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 	page->index = sgt & _REGION_ENTRY_ORIGIN;
 	if (fake)
 		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_sgt = (unsigned long *) page_to_phys(page);
+	s_sgt = page_to_phys(page);
 	/* Install shadow region second table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 2); /* get region-3 pointer */
@@ -1943,9 +1949,9 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		rc = -EAGAIN;		/* Race with shadow */
 		goto out_free;
 	}
-	crst_table_init(s_sgt, _SEGMENT_ENTRY_EMPTY);
+	crst_table_init(__va(s_sgt), _SEGMENT_ENTRY_EMPTY);
 	/* mark as invalid as long as the parent table is not protected */
-	*table = (unsigned long) s_sgt | _REGION_ENTRY_LENGTH |
+	*table = s_sgt | _REGION_ENTRY_LENGTH |
 		 _REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
 	if (sg->edat_level >= 1)
 		*table |= sgt & _REGION_ENTRY_PROTECT;
@@ -1966,8 +1972,7 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 2);
-		if (!table || (*table & _REGION_ENTRY_ORIGIN) !=
-			      (unsigned long) s_sgt)
+		if (!table || (*table & _REGION_ENTRY_ORIGIN) != s_sgt)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_REGION_ENTRY_INVALID;
@@ -2040,8 +2045,9 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 		    int fake)
 {
 	unsigned long raddr, origin;
-	unsigned long *s_pgt, *table;
+	unsigned long *table;
 	struct page *page;
+	phys_addr_t s_pgt;
 	int rc;
 
 	BUG_ON(!gmap_is_shadow(sg) || (pgt & _SEGMENT_ENTRY_LARGE));
@@ -2052,7 +2058,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	page->index = pgt & _SEGMENT_ENTRY_ORIGIN;
 	if (fake)
 		page->index |= GMAP_SHADOW_FAKE_TABLE;
-	s_pgt = (unsigned long *) page_to_phys(page);
+	s_pgt = page_to_phys(page);
 	/* Install shadow page table */
 	spin_lock(&sg->guest_table_lock);
 	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
@@ -2085,8 +2091,7 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 	spin_lock(&sg->guest_table_lock);
 	if (!rc) {
 		table = gmap_table_walk(sg, saddr, 1);
-		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) !=
-			      (unsigned long) s_pgt)
+		if (!table || (*table & _SEGMENT_ENTRY_ORIGIN) != s_pgt)
 			rc = -EAGAIN;		/* Race with unshadow */
 		else
 			*table &= ~_SEGMENT_ENTRY_INVALID;
-- 
GitLab


From 6b33e68ab30949f9657e2acc59766977ae63e1cc Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Thu, 20 Oct 2022 16:31:56 +0200
Subject: [PATCH 0454/4122] s390/entry: sort out physical vs virtual pointers
 usage in sie64a

Fix virtual vs physical address confusion (which currently are the
same).

sie_block is accessed in entry.S and passed it to hardware, which is why
both its physical and virtual address are needed. To avoid every caller
having to do the virtual-physical conversion, add a new function sie64a()
which converts the virtual address to physical.

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Alexander Gordeev <agordeev@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221020143159.294605-3-nrb@linux.ibm.com
Message-Id: <20221020143159.294605-3-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h   |  8 +++++++-
 arch/s390/include/asm/stacktrace.h |  1 +
 arch/s390/kernel/asm-offsets.c     |  1 +
 arch/s390/kernel/entry.S           | 26 +++++++++++++++-----------
 4 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index b1e98a9ed152b..9a31d00e99b3c 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1017,7 +1017,13 @@ void kvm_arch_crypto_clear_masks(struct kvm *kvm);
 void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
 			       unsigned long *aqm, unsigned long *adm);
 
-extern int sie64a(struct kvm_s390_sie_block *, u64 *);
+int __sie64a(phys_addr_t sie_block_phys, struct kvm_s390_sie_block *sie_block, u64 *rsa);
+
+static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa)
+{
+	return __sie64a(virt_to_phys(sie_block), sie_block, rsa);
+}
+
 extern char sie_exit;
 
 extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
diff --git a/arch/s390/include/asm/stacktrace.h b/arch/s390/include/asm/stacktrace.h
index b23c658dce77f..1802be5abb5dc 100644
--- a/arch/s390/include/asm/stacktrace.h
+++ b/arch/s390/include/asm/stacktrace.h
@@ -46,6 +46,7 @@ struct stack_frame {
 			unsigned long sie_savearea;
 			unsigned long sie_reason;
 			unsigned long sie_flags;
+			unsigned long sie_control_block_phys;
 		};
 	};
 	unsigned long gprs[10];
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index d8ce965c0a97c..3f8e760298c22 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -62,6 +62,7 @@ int main(void)
 	OFFSET(__SF_SIE_SAVEAREA, stack_frame, sie_savearea);
 	OFFSET(__SF_SIE_REASON, stack_frame, sie_reason);
 	OFFSET(__SF_SIE_FLAGS, stack_frame, sie_flags);
+	OFFSET(__SF_SIE_CONTROL_PHYS, stack_frame, sie_control_block_phys);
 	DEFINE(STACK_FRAME_OVERHEAD, sizeof(struct stack_frame));
 	BLANK();
 	/* idle data offsets */
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index d2a1f2f4f5b88..12e1773a94a47 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -225,18 +225,20 @@ ENDPROC(__switch_to)
 
 #if IS_ENABLED(CONFIG_KVM)
 /*
- * sie64a calling convention:
- * %r2 pointer to sie control block
- * %r3 guest register save area
+ * __sie64a calling convention:
+ * %r2 pointer to sie control block phys
+ * %r3 pointer to sie control block virt
+ * %r4 guest register save area
  */
-ENTRY(sie64a)
+ENTRY(__sie64a)
 	stmg	%r6,%r14,__SF_GPRS(%r15)	# save kernel registers
 	lg	%r12,__LC_CURRENT
-	stg	%r2,__SF_SIE_CONTROL(%r15)	# save control block pointer
-	stg	%r3,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
+	stg	%r2,__SF_SIE_CONTROL_PHYS(%r15)	# save sie block physical..
+	stg	%r3,__SF_SIE_CONTROL(%r15)	# ...and virtual addresses
+	stg	%r4,__SF_SIE_SAVEAREA(%r15)	# save guest register save area
 	xc	__SF_SIE_REASON(8,%r15),__SF_SIE_REASON(%r15) # reason code = 0
 	mvc	__SF_SIE_FLAGS(8,%r15),__TI_flags(%r12) # copy thread flags
-	lmg	%r0,%r13,0(%r3)			# load guest gprs 0-13
+	lmg	%r0,%r13,0(%r4)			# load guest gprs 0-13
 	lg	%r14,__LC_GMAP			# get gmap pointer
 	ltgr	%r14,%r14
 	jz	.Lsie_gmap
@@ -248,6 +250,7 @@ ENTRY(sie64a)
 	jnz	.Lsie_skip
 	TSTMSK	__LC_CPU_FLAGS,_CIF_FPU
 	jo	.Lsie_skip			# exit if fp/vx regs changed
+	lg	%r14,__SF_SIE_CONTROL_PHYS(%r15)	# get sie block phys addr
 	BPEXIT	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_entry:
 	sie	0(%r14)
@@ -258,13 +261,14 @@ ENTRY(sie64a)
 	BPOFF
 	BPENTER	__SF_SIE_FLAGS(%r15),(_TIF_ISOLATE_BP|_TIF_ISOLATE_BP_GUEST)
 .Lsie_skip:
+	lg	%r14,__SF_SIE_CONTROL(%r15)	# get control block pointer
 	ni	__SIE_PROG0C+3(%r14),0xfe	# no longer in SIE
 	lctlg	%c1,%c1,__LC_KERNEL_ASCE	# load primary asce
 .Lsie_done:
 # some program checks are suppressing. C code (e.g. do_protection_exception)
 # will rewind the PSW by the ILC, which is often 4 bytes in case of SIE. There
 # are some corner cases (e.g. runtime instrumentation) where ILC is unpredictable.
-# Other instructions between sie64a and .Lsie_done should not cause program
+# Other instructions between __sie64a and .Lsie_done should not cause program
 # interrupts. So lets use 3 nops as a landing pad for all possible rewinds.
 .Lrewind_pad6:
 	nopr	7
@@ -293,8 +297,8 @@ sie_exit:
 	EX_TABLE(.Lrewind_pad4,.Lsie_fault)
 	EX_TABLE(.Lrewind_pad2,.Lsie_fault)
 	EX_TABLE(sie_exit,.Lsie_fault)
-ENDPROC(sie64a)
-EXPORT_SYMBOL(sie64a)
+ENDPROC(__sie64a)
+EXPORT_SYMBOL(__sie64a)
 EXPORT_SYMBOL(sie_exit)
 #endif
 
@@ -373,7 +377,7 @@ ENTRY(pgm_check_handler)
 	j	3f			# -> fault in user space
 .Lpgm_skip_asce:
 #if IS_ENABLED(CONFIG_KVM)
-	# cleanup critical section for program checks in sie64a
+	# cleanup critical section for program checks in __sie64a
 	OUTSIDE	%r9,.Lsie_gmap,.Lsie_done,1f
 	SIEEXIT
 	lghi	%r10,_PIF_GUEST_FAULT
-- 
GitLab


From fe0ef00304639cae82df7c9ad6a15286bd5f876e Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Thu, 20 Oct 2022 16:31:57 +0200
Subject: [PATCH 0455/4122] KVM: s390: sort out physical vs virtual pointers
 usage

Fix virtual vs physical address confusion (which currently are the same).

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221020143159.294605-4-nrb@linux.ibm.com
Message-Id: <20221020143159.294605-4-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |  1 +
 arch/s390/kvm/intercept.c        |  2 +-
 arch/s390/kvm/kvm-s390.c         | 44 ++++++++++++++++++--------------
 arch/s390/kvm/kvm-s390.h         |  5 ++--
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 9a31d00e99b3c..931f978758997 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -276,6 +276,7 @@ struct kvm_s390_sie_block {
 #define ECB3_AES 0x04
 #define ECB3_RI  0x01
 	__u8    ecb3;			/* 0x0063 */
+#define ESCA_SCAOL_MASK ~0x3fU
 	__u32	scaol;			/* 0x0064 */
 	__u8	sdf;			/* 0x0068 */
 	__u8    epdx;			/* 0x0069 */
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 88112065d9411..b703b5202f257 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu)
 		return 0;
 	if (current->thread.per_flags & PER_FLAG_NO_TE)
 		return 0;
-	itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba;
+	itdb = phys_to_virt(vcpu->arch.sie_block->itdba);
 	rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb));
 	if (rc)
 		return rc;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 45d4b8182b073..0f7ff0c9019f8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -3329,28 +3329,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu)
 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
 {
 	if (!kvm_s390_use_sca_entries()) {
-		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca);
 
 		/* we still need the basic sca for the ipte control */
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
 		return;
 	}
 	read_lock(&vcpu->kvm->arch.sca_lock);
 	if (vcpu->kvm->arch.use_esca) {
 		struct esca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
 
-		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK;
 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
 	} else {
 		struct bsca_block *sca = vcpu->kvm->arch.sca;
+		phys_addr_t sca_phys = virt_to_phys(sca);
 
-		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
-		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
-		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
+		sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block);
+		vcpu->arch.sie_block->scaoh = sca_phys >> 32;
+		vcpu->arch.sie_block->scaol = sca_phys;
 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
 	}
 	read_unlock(&vcpu->kvm->arch.sca_lock);
@@ -3381,6 +3383,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
 	struct kvm_vcpu *vcpu;
 	unsigned long vcpu_idx;
 	u32 scaol, scaoh;
+	phys_addr_t new_sca_phys;
 
 	if (kvm->arch.use_esca)
 		return 0;
@@ -3389,8 +3392,9 @@ static int sca_switch_to_extended(struct kvm *kvm)
 	if (!new_sca)
 		return -ENOMEM;
 
-	scaoh = (u32)((u64)(new_sca) >> 32);
-	scaol = (u32)(u64)(new_sca) & ~0x3fU;
+	new_sca_phys = virt_to_phys(new_sca);
+	scaoh = new_sca_phys >> 32;
+	scaol = new_sca_phys & ESCA_SCAOL_MASK;
 
 	kvm_s390_vcpu_block_all(kvm);
 	write_lock(&kvm->arch.sca_lock);
@@ -3610,15 +3614,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
 
 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
 {
-	free_page(vcpu->arch.sie_block->cbrlo);
+	free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo));
 	vcpu->arch.sie_block->cbrlo = 0;
 }
 
 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT);
-	if (!vcpu->arch.sie_block->cbrlo)
+	void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+
+	if (!cbrlo_page)
 		return -ENOMEM;
+
+	vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page);
 	return 0;
 }
 
@@ -3628,7 +3635,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
 
 	vcpu->arch.sie_block->ibc = model->ibc;
 	if (test_kvm_facility(vcpu->kvm, 7))
-		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
+		vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list);
 }
 
 static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
@@ -3685,9 +3692,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu)
 		VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u",
 			   vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id);
 	}
-	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
-					| SDNXC;
-	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
+	vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC;
+	vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb);
 
 	if (sclp.has_kss)
 		kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS);
@@ -3737,7 +3743,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 		return -ENOMEM;
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
-	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb);
 
 	/* the real guest size will always be smaller than msl */
 	vcpu->arch.sie_block->mso = 0;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index f6fd668f887e8..a60d1e5c44cd1 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -23,7 +23,8 @@
 /* Transactional Memory Execution related macros */
 #define IS_TE_ENABLED(vcpu)	((vcpu->arch.sie_block->ecb & ECB_TE))
 #define TDB_FORMAT1		1
-#define IS_ITDB_VALID(vcpu)	((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
+#define IS_ITDB_VALID(vcpu) \
+	((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1))
 
 extern debug_info_t *kvm_s390_dbf;
 extern debug_info_t *kvm_s390_dbf_uv;
@@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots)
 
 static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
 {
-	u32 gd = (u32)(u64)kvm->arch.gisa_int.origin;
+	u32 gd = virt_to_phys(kvm->arch.gisa_int.origin);
 
 	if (gd && sclp.has_gisaf)
 		gd |= GISA_FORMAT1;
-- 
GitLab


From b99f4512197acc10f63b5fb462c088c2f62b5120 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Thu, 20 Oct 2022 16:31:58 +0200
Subject: [PATCH 0456/4122] KVM: s390: sida: sort out physical vs virtual
 pointers usage

All callers of the sida_origin() macro actually expected a virtual
address, so rename it to sida_addr() and hand out a virtual address.

At some places, the macro wasn't used, potentially creating problems
if the sida size ever becomes nonzero (not currently the case), so let's
start using it everywhere now while at it.

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221020143159.294605-5-nrb@linux.ibm.com
Message-Id: <20221020143159.294605-5-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h | 3 +--
 arch/s390/kvm/intercept.c        | 7 +++----
 arch/s390/kvm/kvm-s390.c         | 9 +++++----
 arch/s390/kvm/priv.c             | 3 +--
 arch/s390/kvm/pv.c               | 8 +++++---
 5 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 931f978758997..21f1339a41974 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -142,8 +142,7 @@ struct mcck_volatile_info {
 			   CR14_EXTERNAL_DAMAGE_SUBMASK)
 
 #define SIDAD_SIZE_MASK		0xff
-#define sida_origin(sie_block) \
-	((sie_block)->sidad & PAGE_MASK)
+#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK)
 #define sida_size(sie_block) \
 	((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE)
 
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index b703b5202f257..0ee02dae14b2b 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
 out:
 	if (!cc) {
 		if (kvm_s390_pv_cpu_is_protected(vcpu)) {
-			memcpy((void *)(sida_origin(vcpu->arch.sie_block)),
-			       sctns, PAGE_SIZE);
+			memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE);
 		} else {
 			r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE);
 			if (r) {
@@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
 
 static int handle_pv_spx(struct kvm_vcpu *vcpu)
 {
-	u32 pref = *(u32 *)vcpu->arch.sie_block->sidad;
+	u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block);
 
 	kvm_s390_set_prefix(vcpu, pref);
 	trace_kvm_s390_handle_prefix(vcpu, 1, pref);
@@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu)
 
 static int handle_pv_uvc(struct kvm_vcpu *vcpu)
 {
-	struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad;
+	struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block);
 	struct uv_cb_cts uvcb = {
 		.header.cmd	= UVC_CMD_UNPIN_PAGE_SHARED,
 		.header.len	= sizeof(uvcb),
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 0f7ff0c9019f8..bd6e0201bfe5f 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5167,6 +5167,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
 				  struct kvm_s390_mem_op *mop)
 {
 	void __user *uaddr = (void __user *)mop->buf;
+	void *sida_addr;
 	int r = 0;
 
 	if (mop->flags || !mop->size)
@@ -5178,16 +5179,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu,
 	if (!kvm_s390_pv_cpu_is_protected(vcpu))
 		return -EINVAL;
 
+	sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset;
+
 	switch (mop->op) {
 	case KVM_S390_MEMOP_SIDA_READ:
-		if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) +
-				 mop->sida_offset), mop->size))
+		if (copy_to_user(uaddr, sida_addr, mop->size))
 			r = -EFAULT;
 
 		break;
 	case KVM_S390_MEMOP_SIDA_WRITE:
-		if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) +
-				   mop->sida_offset), uaddr, mop->size))
+		if (copy_from_user(sida_addr, uaddr, mop->size))
 			r = -EFAULT;
 		break;
 	}
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 3335fa09b6f1d..9f8a192bd750f 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		return -EREMOTE;
 	}
 	if (kvm_s390_pv_cpu_is_protected(vcpu)) {
-		memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem,
-		       PAGE_SIZE);
+		memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE);
 		rc = 0;
 	} else {
 		rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE);
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 7cb7799a0acb4..c7435c37cdfea 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -44,7 +44,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 		free_pages(vcpu->arch.pv.stor_base,
 			   get_order(uv_info.guest_cpu_stor_len));
 
-	free_page(sida_origin(vcpu->arch.sie_block));
+	free_page((unsigned long)sida_addr(vcpu->arch.sie_block));
 	vcpu->arch.sie_block->pv_handle_cpu = 0;
 	vcpu->arch.sie_block->pv_handle_config = 0;
 	memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv));
@@ -66,6 +66,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 		.header.cmd = UVC_CMD_CREATE_SEC_CPU,
 		.header.len = sizeof(uvcb),
 	};
+	void *sida_addr;
 	int cc;
 
 	if (kvm_s390_pv_cpu_get_handle(vcpu))
@@ -83,12 +84,13 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
 
 	/* Alloc Secure Instruction Data Area Designation */
-	vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
-	if (!vcpu->arch.sie_block->sidad) {
+	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+	if (!sida_addr) {
 		free_pages(vcpu->arch.pv.stor_base,
 			   get_order(uv_info.guest_cpu_stor_len));
 		return -ENOMEM;
 	}
+	vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr);
 
 	cc = uv_call(0, (u64)&uvcb);
 	*rc = uvcb.header.rc;
-- 
GitLab


From 4435b79a366495a5cb43b792d9e7d69d489428cd Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Thu, 20 Oct 2022 16:31:59 +0200
Subject: [PATCH 0457/4122] KVM: s390: pv: sort out physical vs virtual
 pointers usage

Fix virtual vs physical address confusion (which currently are the same).

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221020143159.294605-6-nrb@linux.ibm.com
Message-Id: <20221020143159.294605-6-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/pv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index c7435c37cdfea..48c4f57d5d76f 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -80,8 +80,8 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc)
 	/* Input */
 	uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm);
 	uvcb.num = vcpu->arch.sie_block->icpua;
-	uvcb.state_origin = (u64)vcpu->arch.sie_block;
-	uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base;
+	uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block);
+	uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base);
 
 	/* Alloc Secure Instruction Data Area Designation */
 	sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
@@ -228,8 +228,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 	uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */
 	uvcb.guest_stor_len = kvm->arch.pv.guest_len;
 	uvcb.guest_asce = kvm->arch.gmap->asce;
-	uvcb.guest_sca = (unsigned long)kvm->arch.sca;
-	uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base;
+	uvcb.guest_sca = virt_to_phys(kvm->arch.sca);
+	uvcb.conf_base_stor_origin =
+		virt_to_phys((void *)kvm->arch.pv.stor_base);
 	uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var;
 
 	cc = uv_call_sched(0, (u64)&uvcb);
-- 
GitLab


From 77b533411595668659ce5aaade4ca36c7aa2c488 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Tue, 25 Oct 2022 10:20:39 +0200
Subject: [PATCH 0458/4122] KVM: s390: VSIE: sort out virtual/physical address
 in pin_guest_page

pin_guest_page() used page_to_virt() to calculate the hpa of the pinned
page. This currently works, because virtual and physical addresses are
the same. Use page_to_phys() instead to resolve the virtual-real address
confusion.

One caller of pin_guest_page() actually expected the hpa to be a hva, so
add the missing phys_to_virt() conversion here.

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025082039.117372-2-nrb@linux.ibm.com
Message-Id: <20221025082039.117372-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/vsie.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 94138f8f0c1c3..0e9d020d70932 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -654,7 +654,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa)
 	page = gfn_to_page(kvm, gpa_to_gfn(gpa));
 	if (is_error_page(page))
 		return -EINVAL;
-	*hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK);
+	*hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK);
 	return 0;
 }
 
@@ -869,7 +869,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page,
 		WARN_ON_ONCE(rc);
 		return 1;
 	}
-	vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa;
+	vsie_page->scb_o = phys_to_virt(hpa);
 	return 0;
 }
 
-- 
GitLab


From 2a903ca922d007a0b40ca425ce55b5f0a0e01956 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Oct 2022 13:46:47 +0200
Subject: [PATCH 0459/4122] dt-bindings: gpio: Add gpio-latch binding document

This adds a binding for a GPIO multiplexer driver based on latches
connected to other GPIOs.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 .../devicetree/bindings/gpio/gpio-latch.yaml  | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/gpio-latch.yaml

diff --git a/Documentation/devicetree/bindings/gpio/gpio-latch.yaml b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml
new file mode 100644
index 0000000000000..1ed82a2cebdaa
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/gpio-latch.yaml
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/gpio-latch.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO latch controller
+
+maintainers:
+  - Sascha Hauer <s.hauer@pengutronix.de>
+
+description: |
+  This binding describes a GPIO multiplexer based on latches connected to
+  other GPIOs, like this:
+
+  CLK0 ----------------------.        ,--------.
+  CLK1 -------------------.  `--------|>    #0 |
+                          |           |        |
+  OUT0 ----------------+--|-----------|D0    Q0|-----|<
+  OUT1 --------------+-|--|-----------|D1    Q1|-----|<
+  OUT2 ------------+-|-|--|-----------|D2    Q2|-----|<
+  OUT3 ----------+-|-|-|--|-----------|D3    Q3|-----|<
+  OUT4 --------+-|-|-|-|--|-----------|D4    Q4|-----|<
+  OUT5 ------+-|-|-|-|-|--|-----------|D5    Q5|-----|<
+  OUT6 ----+-|-|-|-|-|-|--|-----------|D6    Q6|-----|<
+  OUT7 --+-|-|-|-|-|-|-|--|-----------|D7    Q7|-----|<
+         | | | | | | | |  |           `--------'
+         | | | | | | | |  |
+         | | | | | | | |  |           ,--------.
+         | | | | | | | |  `-----------|>    #1 |
+         | | | | | | | |              |        |
+         | | | | | | | `--------------|D0    Q0|-----|<
+         | | | | | | `----------------|D1    Q1|-----|<
+         | | | | | `------------------|D2    Q2|-----|<
+         | | | | `--------------------|D3    Q3|-----|<
+         | | | `----------------------|D4    Q4|-----|<
+         | | `------------------------|D5    Q5|-----|<
+         | `--------------------------|D6    Q6|-----|<
+         `----------------------------|D7    Q7|-----|<
+                                      `--------'
+
+  The number of clk-gpios and latched-gpios is not fixed. The actual number
+  of number of latches and the number of inputs per latch is derived from
+  the number of GPIOs given in the corresponding device tree properties.
+
+properties:
+  compatible:
+    const: gpio-latch
+  "#gpio-cells":
+    const: 2
+
+  clk-gpios:
+    description: Array of GPIOs to be used to clock a latch
+
+  latched-gpios:
+    description: Array of GPIOs to be used as inputs per latch
+
+  setup-duration-ns:
+    description: Delay in nanoseconds to wait after the latch inputs have been
+      set up
+
+  clock-duration-ns:
+    description: Delay in nanoseconds to wait between clock output changes
+
+  gpio-controller: true
+
+  gpio-line-names: true
+
+required:
+  - compatible
+  - "#gpio-cells"
+  - gpio-controller
+  - clk-gpios
+  - latched-gpios
+
+additionalProperties: false
+
+examples:
+  - |
+    gpio-latch {
+        #gpio-cells = <2>;
+        pinctrl-names = "default";
+        pinctrl-0 = <&pinctrl_di_do_leds>;
+        compatible = "gpio-latch";
+        gpio-controller;
+        setup-duration-ns = <100>;
+        clock-duration-ns = <100>;
+
+        clk-gpios = <&gpio3 7 0>, <&gpio3 8 0>;
+        latched-gpios = <&gpio3 21 0>, <&gpio3 22 0>,
+                       <&gpio3 23 0>, <&gpio3 24 0>,
+                       <&gpio3 25 0>, <&gpio3 26 0>,
+                       <&gpio3 27 0>, <&gpio3 28 0>;
+    };
-- 
GitLab


From 1454a928b637bd169d99fc91a46b3b36cea76f9f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 7 Oct 2022 13:46:46 +0200
Subject: [PATCH 0460/4122] gpio: Add gpio latch driver

This driver implements a GPIO multiplexer based on latches connected to
other GPIOs. A set of data GPIOs is connected to the data input of
multiple latches. The clock input of each latch is driven by another
set of GPIOs. With two 8-bit latches 10 GPIOs can be multiplexed into
16 GPIOs. GPOs might be a better term as in fact the multiplexed pins
are output only.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Serge Semin <fancer.lancer@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
[Bartosz: fixed the strange of_device_id formatting]
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig      |   6 ++
 drivers/gpio/Makefile     |   1 +
 drivers/gpio/gpio-latch.c | 219 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 226 insertions(+)
 create mode 100644 drivers/gpio/gpio-latch.c

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 5a04990f03cc8..8c756cb292140 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -1684,6 +1684,12 @@ config GPIO_AGGREGATOR
 	      industrial control context, to be operated from userspace using
 	      the GPIO chardev interface.
 
+config GPIO_LATCH
+	tristate "GPIO latch driver"
+	help
+	  Say yes here to enable a driver for GPIO multiplexers based on latches
+	  connected to other GPIOs.
+
 config GPIO_MOCKUP
 	tristate "GPIO Testing Driver"
 	select IRQ_SIM
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 37a0b7ebda434..8629e9eaf79e1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -76,6 +76,7 @@ obj-$(CONFIG_GPIO_IT87)			+= gpio-it87.o
 obj-$(CONFIG_GPIO_IXP4XX)		+= gpio-ixp4xx.o
 obj-$(CONFIG_GPIO_JANZ_TTL)		+= gpio-janz-ttl.o
 obj-$(CONFIG_GPIO_KEMPLD)		+= gpio-kempld.o
+obj-$(CONFIG_GPIO_LATCH)		+= gpio-latch.o
 obj-$(CONFIG_GPIO_LOGICVC)		+= gpio-logicvc.o
 obj-$(CONFIG_GPIO_LOONGSON1)		+= gpio-loongson1.o
 obj-$(CONFIG_GPIO_LOONGSON)		+= gpio-loongson.o
diff --git a/drivers/gpio/gpio-latch.c b/drivers/gpio/gpio-latch.c
new file mode 100644
index 0000000000000..d7c3b20c8482e
--- /dev/null
+++ b/drivers/gpio/gpio-latch.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPIO latch driver
+ *
+ *  Copyright (C) 2022 Sascha Hauer <s.hauer@pengutronix.de>
+ *
+ * This driver implements a GPIO (or better GPO as there is no input)
+ * multiplexer based on latches like this:
+ *
+ * CLK0 ----------------------.        ,--------.
+ * CLK1 -------------------.  `--------|>    #0 |
+ *                         |           |        |
+ * OUT0 ----------------+--|-----------|D0    Q0|-----|<
+ * OUT1 --------------+-|--|-----------|D1    Q1|-----|<
+ * OUT2 ------------+-|-|--|-----------|D2    Q2|-----|<
+ * OUT3 ----------+-|-|-|--|-----------|D3    Q3|-----|<
+ * OUT4 --------+-|-|-|-|--|-----------|D4    Q4|-----|<
+ * OUT5 ------+-|-|-|-|-|--|-----------|D5    Q5|-----|<
+ * OUT6 ----+-|-|-|-|-|-|--|-----------|D6    Q6|-----|<
+ * OUT7 --+-|-|-|-|-|-|-|--|-----------|D7    Q7|-----|<
+ *        | | | | | | | |  |           `--------'
+ *        | | | | | | | |  |
+ *        | | | | | | | |  |           ,--------.
+ *        | | | | | | | |  `-----------|>    #1 |
+ *        | | | | | | | |              |        |
+ *        | | | | | | | `--------------|D0    Q0|-----|<
+ *        | | | | | | `----------------|D1    Q1|-----|<
+ *        | | | | | `------------------|D2    Q2|-----|<
+ *        | | | | `--------------------|D3    Q3|-----|<
+ *        | | | `----------------------|D4    Q4|-----|<
+ *        | | `------------------------|D5    Q5|-----|<
+ *        | `--------------------------|D6    Q6|-----|<
+ *        `----------------------------|D7    Q7|-----|<
+ *                                     `--------'
+ *
+ * The above is just an example. The actual number of number of latches and
+ * the number of inputs per latch is derived from the number of GPIOs given
+ * in the corresponding device tree properties.
+ */
+
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+
+#include "gpiolib.h"
+
+struct gpio_latch_priv {
+	struct gpio_chip gc;
+	struct gpio_descs *clk_gpios;
+	struct gpio_descs *latched_gpios;
+	int n_latched_gpios;
+	unsigned int setup_duration_ns;
+	unsigned int clock_duration_ns;
+	unsigned long *shadow;
+	/*
+	 * Depending on whether any of the underlying GPIOs may sleep we either
+	 * use a mutex or a spinlock to protect our shadow map.
+	 */
+	union {
+		struct mutex mutex; /* protects @shadow */
+		spinlock_t spinlock; /* protects @shadow */
+	};
+};
+
+static int gpio_latch_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+	return GPIO_LINE_DIRECTION_OUT;
+}
+
+static void gpio_latch_set_unlocked(struct gpio_latch_priv *priv,
+				    void (*set)(struct gpio_desc *desc, int value),
+				    unsigned int offset, bool val)
+{
+	int latch = offset / priv->n_latched_gpios;
+	int i;
+
+	assign_bit(offset, priv->shadow, val);
+
+	for (i = 0; i < priv->n_latched_gpios; i++)
+		set(priv->latched_gpios->desc[i],
+		    test_bit(latch * priv->n_latched_gpios + i, priv->shadow));
+
+	ndelay(priv->setup_duration_ns);
+	set(priv->clk_gpios->desc[latch], 1);
+	ndelay(priv->clock_duration_ns);
+	set(priv->clk_gpios->desc[latch], 0);
+}
+
+static void gpio_latch_set(struct gpio_chip *gc, unsigned int offset, int val)
+{
+	struct gpio_latch_priv *priv = gpiochip_get_data(gc);
+	unsigned long flags;
+
+	spin_lock_irqsave(&priv->spinlock, flags);
+
+	gpio_latch_set_unlocked(priv, gpiod_set_value, offset, val);
+
+	spin_unlock_irqrestore(&priv->spinlock, flags);
+}
+
+static void gpio_latch_set_can_sleep(struct gpio_chip *gc, unsigned int offset, int val)
+{
+	struct gpio_latch_priv *priv = gpiochip_get_data(gc);
+
+	mutex_lock(&priv->mutex);
+
+	gpio_latch_set_unlocked(priv, gpiod_set_value_cansleep, offset, val);
+
+	mutex_unlock(&priv->mutex);
+}
+
+static bool gpio_latch_can_sleep(struct gpio_latch_priv *priv, unsigned int n_latches)
+{
+	int i;
+
+	for (i = 0; i < n_latches; i++)
+		if (gpiod_cansleep(priv->clk_gpios->desc[i]))
+			return true;
+
+	for (i = 0; i < priv->n_latched_gpios; i++)
+		if (gpiod_cansleep(priv->latched_gpios->desc[i]))
+			return true;
+
+	return false;
+}
+
+/*
+ * Some value which is still acceptable to delay in atomic context.
+ * If we need to go higher we might have to switch to usleep_range(),
+ * but that cannot ne used in atomic context and the driver would have
+ * to be adjusted to support that.
+ */
+#define DURATION_NS_MAX 5000
+
+static int gpio_latch_probe(struct platform_device *pdev)
+{
+	struct gpio_latch_priv *priv;
+	unsigned int n_latches;
+	struct device_node *np = pdev->dev.of_node;
+
+	priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->clk_gpios = devm_gpiod_get_array(&pdev->dev, "clk", GPIOD_OUT_LOW);
+	if (IS_ERR(priv->clk_gpios))
+		return PTR_ERR(priv->clk_gpios);
+
+	priv->latched_gpios = devm_gpiod_get_array(&pdev->dev, "latched", GPIOD_OUT_LOW);
+	if (IS_ERR(priv->latched_gpios))
+		return PTR_ERR(priv->latched_gpios);
+
+	n_latches = priv->clk_gpios->ndescs;
+	priv->n_latched_gpios = priv->latched_gpios->ndescs;
+
+	priv->shadow = devm_bitmap_zalloc(&pdev->dev, n_latches * priv->n_latched_gpios,
+					  GFP_KERNEL);
+	if (!priv->shadow)
+		return -ENOMEM;
+
+	if (gpio_latch_can_sleep(priv, n_latches)) {
+		priv->gc.can_sleep = true;
+		priv->gc.set = gpio_latch_set_can_sleep;
+		mutex_init(&priv->mutex);
+	} else {
+		priv->gc.can_sleep = false;
+		priv->gc.set = gpio_latch_set;
+		spin_lock_init(&priv->spinlock);
+	}
+
+	of_property_read_u32(np, "setup-duration-ns", &priv->setup_duration_ns);
+	if (priv->setup_duration_ns > DURATION_NS_MAX) {
+		dev_warn(&pdev->dev, "setup-duration-ns too high, limit to %d\n",
+			 DURATION_NS_MAX);
+		priv->setup_duration_ns = DURATION_NS_MAX;
+	}
+
+	of_property_read_u32(np, "clock-duration-ns", &priv->clock_duration_ns);
+	if (priv->clock_duration_ns > DURATION_NS_MAX) {
+		dev_warn(&pdev->dev, "clock-duration-ns too high, limit to %d\n",
+			 DURATION_NS_MAX);
+		priv->clock_duration_ns = DURATION_NS_MAX;
+	}
+
+	priv->gc.get_direction = gpio_latch_get_direction;
+	priv->gc.ngpio = n_latches * priv->n_latched_gpios;
+	priv->gc.owner = THIS_MODULE;
+	priv->gc.base = -1;
+	priv->gc.parent = &pdev->dev;
+
+	platform_set_drvdata(pdev, priv);
+
+	return devm_gpiochip_add_data(&pdev->dev, &priv->gc, priv);
+}
+
+static const struct of_device_id gpio_latch_ids[] = {
+	{
+		.compatible = "gpio-latch",
+	},
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, gpio_latch_ids);
+
+static struct platform_driver gpio_latch_driver = {
+	.driver	= {
+		.name		= "gpio-latch",
+		.of_match_table	= gpio_latch_ids,
+	},
+	.probe	= gpio_latch_probe,
+};
+module_platform_driver(gpio_latch_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
+MODULE_DESCRIPTION("GPIO latch driver");
-- 
GitLab


From b4e83d369015e3045418ca86984c3cd8dcf5a365 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 Oct 2022 20:06:00 +0300
Subject: [PATCH 0461/4122] gpio: exar: Allow IO port access

It's possible that PCI device can provide an IO port resource for
the device. regmap MMIO currently uses MMIO by default. With an
additional flag we enable support for IO port accesses.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: William Breathitt Gray <william.gray@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-exar.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpio/gpio-exar.c b/drivers/gpio/gpio-exar.c
index 482f678c893e5..df1bdaae441c5 100644
--- a/drivers/gpio/gpio-exar.c
+++ b/drivers/gpio/gpio-exar.c
@@ -141,6 +141,7 @@ static const struct regmap_config exar_regmap_config = {
 	.name		= "exar-gpio",
 	.reg_bits	= 16,
 	.val_bits	= 8,
+	.io_port	= true,
 };
 
 static int gpio_exar_probe(struct platform_device *pdev)
-- 
GitLab


From 94e9f9a23fe4b093cd5a8b292165fad840242b79 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:45 -0500
Subject: [PATCH 0462/4122] agp/efficeon: Convert to generic power management

Convert agpgart-efficeon from legacy PCI power management to the generic
power management framework.

Previously agpgart-efficeon used legacy PCI power management, which means
agp_efficeon_suspend() and agp_efficeon_resume() were responsible for both
device-specific things and generic PCI things like saving and restoring
config space and managing power state.

In this case, agp_efficeon_suspend() was empty, and agp_efficeon_resume()
already did only device-specific things, so simply convert it to take a
struct device * instead of a struct pci_dev *.

Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by
Vaibhav Gupta <vaibhavgupta40@gmail.com>.

Link: https://lore.kernel.org/r/20221025203852.681822-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/efficeon-agp.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/char/agp/efficeon-agp.c b/drivers/char/agp/efficeon-agp.c
index c53f0f9ef5b0b..f28d423192695 100644
--- a/drivers/char/agp/efficeon-agp.c
+++ b/drivers/char/agp/efficeon-agp.c
@@ -412,18 +412,11 @@ static void agp_efficeon_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#ifdef CONFIG_PM
-static int agp_efficeon_suspend(struct pci_dev *dev, pm_message_t state)
-{
-	return 0;
-}
-
-static int agp_efficeon_resume(struct pci_dev *pdev)
+static int agp_efficeon_resume(struct device *dev)
 {
 	printk(KERN_DEBUG PFX "agp_efficeon_resume()\n");
 	return efficeon_configure();
 }
-#endif
 
 static const struct pci_device_id agp_efficeon_pci_table[] = {
 	{
@@ -437,6 +430,8 @@ static const struct pci_device_id agp_efficeon_pci_table[] = {
 	{ }
 };
 
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_efficeon_pm_ops, NULL, agp_efficeon_resume);
+
 MODULE_DEVICE_TABLE(pci, agp_efficeon_pci_table);
 
 static struct pci_driver agp_efficeon_pci_driver = {
@@ -444,10 +439,7 @@ static struct pci_driver agp_efficeon_pci_driver = {
 	.id_table	= agp_efficeon_pci_table,
 	.probe		= agp_efficeon_probe,
 	.remove		= agp_efficeon_remove,
-#ifdef CONFIG_PM
-	.suspend	= agp_efficeon_suspend,
-	.resume		= agp_efficeon_resume,
-#endif
+	.driver.pm	= &agp_efficeon_pm_ops,
 };
 
 static int __init agp_efficeon_init(void)
-- 
GitLab


From 7f142022e6bfd2dd5ed998f7165e396bf5966513 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:46 -0500
Subject: [PATCH 0463/4122] agp/intel: Convert to generic power management

Convert agpgart-intel from legacy PCI power management to the generic power
management framework.

Previously agpgart-intel used legacy PCI power management, and
agp_intel_resume() was responsible for both device-specific things and
generic PCI things like saving and restoring config space and managing
power state.

In this case, agp_intel_suspend() was empty, and agp_intel_resume()
already did only device-specific things, so simply convert it to take a
struct device * instead of a struct pci_dev *.

Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by
Vaibhav Gupta <vaibhavgupta40@gmail.com>.

Link: https://lore.kernel.org/r/20221025203852.681822-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/intel-agp.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/char/agp/intel-agp.c b/drivers/char/agp/intel-agp.c
index 9e4f27a6cb5af..c518b3a9db04d 100644
--- a/drivers/char/agp/intel-agp.c
+++ b/drivers/char/agp/intel-agp.c
@@ -817,16 +817,15 @@ static void agp_intel_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#ifdef CONFIG_PM
-static int agp_intel_resume(struct pci_dev *pdev)
+static int agp_intel_resume(struct device *dev)
 {
+	struct pci_dev *pdev = to_pci_dev(dev);
 	struct agp_bridge_data *bridge = pci_get_drvdata(pdev);
 
 	bridge->driver->configure();
 
 	return 0;
 }
-#endif
 
 static const struct pci_device_id agp_intel_pci_table[] = {
 #define ID(x)						\
@@ -895,14 +894,14 @@ static const struct pci_device_id agp_intel_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_intel_pci_table);
 
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_intel_pm_ops, NULL, agp_intel_resume);
+
 static struct pci_driver agp_intel_pci_driver = {
 	.name		= "agpgart-intel",
 	.id_table	= agp_intel_pci_table,
 	.probe		= agp_intel_probe,
 	.remove		= agp_intel_remove,
-#ifdef CONFIG_PM
-	.resume		= agp_intel_resume,
-#endif
+	.driver.pm	= &agp_intel_pm_ops,
 };
 
 static int __init agp_intel_init(void)
-- 
GitLab


From c78679d1fe43f9165b11c5ccd3f79c7108b066fe Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:47 -0500
Subject: [PATCH 0464/4122] agp/amd-k7: Convert to generic power management

Convert agpgart-amdk7 from legacy PCI power management to the generic power
management framework.

Previously agpgart-amdk7 used legacy PCI power management, and
agp_amdk7_suspend() and agp_amdk7_resume() were responsible for both
device-specific things and generic PCI things like saving and restoring
config space and managing power state:

  agp_amdk7_suspend
    pci_save_state                         <-- generic PCI
    pci_set_power_state                    <-- generic PCI

  agp_amdk7_resume
    pci_set_power_state(PCI_D0)            <-- generic PCI
    pci_restore_state                      <-- generic PCI
    amd_irongate_driver.configure          <-- device-specific

Convert to generic power management where the PCI bus PM methods do the
generic PCI things, and the driver needs only the device-specific part,
i.e.,

  suspend_devices_and_enter
    dpm_suspend_start(PMSG_SUSPEND)
      pci_pm_suspend                       # PCI bus .suspend() method
        agp_amdk7_suspend                  <-- not needed at all; removed
    suspend_enter
      dpm_suspend_noirq(PMSG_SUSPEND)
        pci_pm_suspend_noirq               # PCI bus .suspend_noirq() method
          pci_save_state                   <-- generic PCI
          pci_prepare_to_sleep             <-- generic PCI
            pci_set_power_state
    ...
    dpm_resume_end(PMSG_RESUME)
      pci_pm_resume                        # PCI bus .resume() method
        pci_restore_standard_config
          pci_set_power_state(PCI_D0)      <-- generic PCI
          pci_restore_state                <-- generic PCI
        agp_amdk7_resume                   # driver->pm->resume
          amd_irongate_driver.configure    <-- device-specific

Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by
Vaibhav Gupta <vaibhavgupta40@gmail.com>.

Link: https://lore.kernel.org/r/20221025203852.681822-4-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/amd-k7-agp.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/char/agp/amd-k7-agp.c b/drivers/char/agp/amd-k7-agp.c
index 2b2095542816b..55397ba765d23 100644
--- a/drivers/char/agp/amd-k7-agp.c
+++ b/drivers/char/agp/amd-k7-agp.c
@@ -488,26 +488,11 @@ static void agp_amdk7_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#ifdef CONFIG_PM
-
-static int agp_amdk7_suspend(struct pci_dev *pdev, pm_message_t state)
+static int agp_amdk7_resume(struct device *dev)
 {
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, pci_choose_state(pdev, state));
-
-	return 0;
-}
-
-static int agp_amdk7_resume(struct pci_dev *pdev)
-{
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
 	return amd_irongate_driver.configure();
 }
 
-#endif /* CONFIG_PM */
-
 /* must be the same order as name table above */
 static const struct pci_device_id agp_amdk7_pci_table[] = {
 	{
@@ -539,15 +524,14 @@ static const struct pci_device_id agp_amdk7_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_amdk7_pci_table);
 
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_amdk7_pm_ops, NULL, agp_amdk7_resume);
+
 static struct pci_driver agp_amdk7_pci_driver = {
 	.name		= "agpgart-amdk7",
 	.id_table	= agp_amdk7_pci_table,
 	.probe		= agp_amdk7_probe,
 	.remove		= agp_amdk7_remove,
-#ifdef CONFIG_PM
-	.suspend	= agp_amdk7_suspend,
-	.resume		= agp_amdk7_resume,
-#endif
+	.driver.pm	= &agp_amdk7_pm_ops,
 };
 
 static int __init agp_amdk7_init(void)
-- 
GitLab


From 6a1274ea0e5dfb2eca85b0175820d7b5183c9cae Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:48 -0500
Subject: [PATCH 0465/4122] agp/ati: Convert to generic power management

Convert agpgart-ati from legacy PCI power management to the generic power
management framework.

Previously agpgart-ati used legacy PCI power management, and
agp_ati_suspend() and agp_ati_resume() were responsible for both
device-specific things and generic PCI things like saving and restoring
config space and managing power state:

  agp_ati_suspend
    pci_save_state                         <-- generic PCI
    pci_set_power_state(PCI_D3hot)         <-- generic PCI

  agp_ati_resume
    pci_set_power_state(PCI_D0)            <-- generic PCI
    pci_restore_state                      <-- generic PCI
    ati_configure                          <-- device-specific

With generic power management, the PCI bus PM methods do the generic PCI
things, and the driver needs only the device-specific part, i.e.,

  suspend_devices_and_enter
    dpm_suspend_start(PMSG_SUSPEND)
      pci_pm_suspend                       # PCI bus .suspend() method
        agp_ati_suspend                    <-- not needed at all; removed
    suspend_enter
      dpm_suspend_noirq(PMSG_SUSPEND)
        pci_pm_suspend_noirq               # PCI bus .suspend_noirq() method
          pci_save_state                   <-- generic PCI
          pci_prepare_to_sleep             <-- generic PCI
            pci_set_power_state
    ...
    dpm_resume_end(PMSG_RESUME)
      pci_pm_resume                        # PCI bus .resume() method
        pci_restore_standard_config
          pci_set_power_state(PCI_D0)      <-- generic PCI
          pci_restore_state                <-- generic PCI
        agp_ati_resume                     # driver->pm->resume
          ati_configure                    <-- device-specific

Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by
Vaibhav Gupta <vaibhavgupta40@gmail.com>.

Link: https://lore.kernel.org/r/20221025203852.681822-5-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/ati-agp.c | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/drivers/char/agp/ati-agp.c b/drivers/char/agp/ati-agp.c
index 6f5530482d833..3c1fce48aabe7 100644
--- a/drivers/char/agp/ati-agp.c
+++ b/drivers/char/agp/ati-agp.c
@@ -238,23 +238,10 @@ static int ati_configure(void)
 }
 
 
-#ifdef CONFIG_PM
-static int agp_ati_suspend(struct pci_dev *dev, pm_message_t state)
+static int agp_ati_resume(struct device *dev)
 {
-	pci_save_state(dev);
-	pci_set_power_state(dev, PCI_D3hot);
-
-	return 0;
-}
-
-static int agp_ati_resume(struct pci_dev *dev)
-{
-	pci_set_power_state(dev, PCI_D0);
-	pci_restore_state(dev);
-
 	return ati_configure();
 }
-#endif
 
 /*
  *Since we don't need contiguous memory we just try
@@ -559,15 +546,14 @@ static const struct pci_device_id agp_ati_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_ati_pci_table);
 
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_ati_pm_ops, NULL, agp_ati_resume);
+
 static struct pci_driver agp_ati_pci_driver = {
 	.name		= "agpgart-ati",
 	.id_table	= agp_ati_pci_table,
 	.probe		= agp_ati_probe,
 	.remove		= agp_ati_remove,
-#ifdef CONFIG_PM
-	.suspend	= agp_ati_suspend,
-	.resume		= agp_ati_resume,
-#endif
+	.driver.pm	= &agp_ati_pm_ops,
 };
 
 static int __init agp_ati_init(void)
-- 
GitLab


From 11a8d8774e68e07385a5b10d9546598f57ace7da Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:49 -0500
Subject: [PATCH 0466/4122] agp/nvidia: Convert to generic power management

Convert agpgart-nvidia from legacy PCI power management to the generic
power management framework.

Previously agpgart-nvidia used legacy PCI power management, and
agp_nvidia_suspend() and agp_nvidia_resume() were responsible for both
device-specific things and generic PCI things:

  agp_nvidia_suspend
    pci_save_state                         <-- generic PCI
    pci_set_power_state(PCI_D3hot)         <-- generic PCI

  agp_nvidia_resume
    pci_set_power_state(PCI_D0)            <-- generic PCI
    pci_restore_state                      <-- generic PCI
    nvidia_configure                       <-- device-specific

Convert to generic power management where the PCI bus PM methods do the
generic PCI things, and the driver needs only the device-specific part,
i.e.,

  suspend_devices_and_enter
    dpm_suspend_start(PMSG_SUSPEND)
      pci_pm_suspend                       # PCI bus .suspend() method
        agp_nvidia_suspend                 <-- not needed at all; removed
    suspend_enter
      dpm_suspend_noirq(PMSG_SUSPEND)
        pci_pm_suspend_noirq               # PCI bus .suspend_noirq() method
          pci_save_state                   <-- generic PCI
          pci_prepare_to_sleep             <-- generic PCI
            pci_set_power_state
    ...
    dpm_resume_end(PMSG_RESUME)
      pci_pm_resume                        # PCI bus .resume() method
        pci_restore_standard_config
          pci_set_power_state(PCI_D0)      <-- generic PCI
          pci_restore_state                <-- generic PCI
        agp_nvidia_resume                  # driver->pm->resume
          nvidia_configure                 <-- device-specific

Based on 0aeddbd0cb07 ("via-agp: convert to generic power management") by
Vaibhav Gupta <vaibhavgupta40@gmail.com>.

Link: https://lore.kernel.org/r/20221025203852.681822-6-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/nvidia-agp.c | 24 ++++--------------------
 1 file changed, 4 insertions(+), 20 deletions(-)

diff --git a/drivers/char/agp/nvidia-agp.c b/drivers/char/agp/nvidia-agp.c
index 826dbd06f6bbb..dbcbc06cc2029 100644
--- a/drivers/char/agp/nvidia-agp.c
+++ b/drivers/char/agp/nvidia-agp.c
@@ -404,28 +404,13 @@ static void agp_nvidia_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#ifdef CONFIG_PM
-static int agp_nvidia_suspend(struct pci_dev *pdev, pm_message_t state)
+static int agp_nvidia_resume(struct device *dev)
 {
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, PCI_D3hot);
-
-	return 0;
-}
-
-static int agp_nvidia_resume(struct pci_dev *pdev)
-{
-	/* set power state 0 and restore PCI space */
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
 	/* reconfigure AGP hardware again */
 	nvidia_configure();
 
 	return 0;
 }
-#endif
-
 
 static const struct pci_device_id agp_nvidia_pci_table[] = {
 	{
@@ -449,15 +434,14 @@ static const struct pci_device_id agp_nvidia_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_nvidia_pci_table);
 
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_nvidia_pm_ops, NULL, agp_nvidia_resume);
+
 static struct pci_driver agp_nvidia_pci_driver = {
 	.name		= "agpgart-nvidia",
 	.id_table	= agp_nvidia_pci_table,
 	.probe		= agp_nvidia_probe,
 	.remove		= agp_nvidia_remove,
-#ifdef CONFIG_PM
-	.suspend	= agp_nvidia_suspend,
-	.resume		= agp_nvidia_resume,
-#endif
+	.driver.pm	= &agp_nvidia_pm_ops,
 };
 
 static int __init agp_nvidia_init(void)
-- 
GitLab


From 8c1f82c710f18c5f51c3b43402cd8175d6655369 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:50 -0500
Subject: [PATCH 0467/4122] agp/amd64: Update to DEFINE_SIMPLE_DEV_PM_OPS()

As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old
ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of
DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks
don't need to be wrapped with #ifdef CONFIG_PM or tagged with
__maybe_unused.

Convert to DEFINE_SIMPLE_DEV_PM_OPS().  No functional change intended.

Link: https://lore.kernel.org/r/20221025203852.681822-7-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/amd64-agp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c
index 84a4aa9312cf8..ce8651436609f 100644
--- a/drivers/char/agp/amd64-agp.c
+++ b/drivers/char/agp/amd64-agp.c
@@ -588,9 +588,7 @@ static void agp_amd64_remove(struct pci_dev *pdev)
 	agp_bridges_found--;
 }
 
-#define agp_amd64_suspend NULL
-
-static int __maybe_unused agp_amd64_resume(struct device *dev)
+static int agp_amd64_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 
@@ -727,7 +725,7 @@ static const struct pci_device_id agp_amd64_pci_promisc_table[] = {
 	{ }
 };
 
-static SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, agp_amd64_suspend, agp_amd64_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_amd64_pm_ops, NULL, agp_amd64_resume);
 
 static struct pci_driver agp_amd64_pci_driver = {
 	.name		= "agpgart-amd64",
-- 
GitLab


From 746e926b9fe327ace4be187144913abd7cfc2f4a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:51 -0500
Subject: [PATCH 0468/4122] agp/sis: Update to DEFINE_SIMPLE_DEV_PM_OPS()

As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old
ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of
DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks
don't need to be wrapped with #ifdef CONFIG_PM or tagged with
__maybe_unused.

Convert to DEFINE_SIMPLE_DEV_PM_OPS().  No functional change intended.

Link: https://lore.kernel.org/r/20221025203852.681822-8-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/sis-agp.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/char/agp/sis-agp.c b/drivers/char/agp/sis-agp.c
index f8a02f4bef1bf..484bb101c53b5 100644
--- a/drivers/char/agp/sis-agp.c
+++ b/drivers/char/agp/sis-agp.c
@@ -217,10 +217,7 @@ static void agp_sis_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#define agp_sis_suspend NULL
-
-static int __maybe_unused agp_sis_resume(
-	__attribute__((unused)) struct device *dev)
+static int agp_sis_resume(__attribute__((unused)) struct device *dev)
 {
 	return sis_driver.configure();
 }
@@ -407,7 +404,7 @@ static const struct pci_device_id agp_sis_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_sis_pci_table);
 
-static SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, agp_sis_suspend, agp_sis_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_sis_pm_ops, NULL, agp_sis_resume);
 
 static struct pci_driver agp_sis_pci_driver = {
 	.name		= "agpgart-sis",
-- 
GitLab


From 73fcd4520edb430684246448d096f8f17f107c97 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 15:38:52 -0500
Subject: [PATCH 0469/4122] agp/via: Update to DEFINE_SIMPLE_DEV_PM_OPS()

As of 1a3c7bb08826 ("PM: core: Add new *_PM_OPS macros, deprecate old
ones"), SIMPLE_DEV_PM_OPS() is deprecated in favor of
DEFINE_SIMPLE_DEV_PM_OPS(), which has the advantage that the PM callbacks
don't need to be wrapped with #ifdef CONFIG_PM or tagged with
__maybe_unused.

Convert to DEFINE_SIMPLE_DEV_PM_OPS().  No functional change intended.

Link: https://lore.kernel.org/r/20221025203852.681822-9-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Dave Airlie <airlied@redhat.com>
---
 drivers/char/agp/via-agp.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/char/agp/via-agp.c b/drivers/char/agp/via-agp.c
index b2f484f527fb7..bc5140af2dcbe 100644
--- a/drivers/char/agp/via-agp.c
+++ b/drivers/char/agp/via-agp.c
@@ -489,9 +489,7 @@ static void agp_via_remove(struct pci_dev *pdev)
 	agp_put_bridge(bridge);
 }
 
-#define agp_via_suspend NULL
-
-static int __maybe_unused agp_via_resume(struct device *dev)
+static int agp_via_resume(struct device *dev)
 {
 	struct agp_bridge_data *bridge = dev_get_drvdata(dev);
 
@@ -551,7 +549,7 @@ static const struct pci_device_id agp_via_pci_table[] = {
 
 MODULE_DEVICE_TABLE(pci, agp_via_pci_table);
 
-static SIMPLE_DEV_PM_OPS(agp_via_pm_ops, agp_via_suspend, agp_via_resume);
+static DEFINE_SIMPLE_DEV_PM_OPS(agp_via_pm_ops, NULL, agp_via_resume);
 
 static struct pci_driver agp_via_pci_driver = {
 	.name		= "agpgart-via",
-- 
GitLab


From 5984de0b41bf8f261e41b45c4fe64a32236e0c42 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Tue, 25 Oct 2022 14:35:02 -0500
Subject: [PATCH 0470/4122] PCI/PM: Remove unused 'state' parameter to
 pci_legacy_suspend_late()

1a1daf097e21 ("PCI/PM: Remove unused pci_driver.suspend_late() hook")
removed the legacy .suspend_late() hook, which was the only user of the
"state" parameter to pci_legacy_suspend_late(), but it neglected to remove
the parameter.

Remove the unused "state" parameter to pci_legacy_suspend_late().

Link: https://lore.kernel.org/r/20221025193502.669091-1-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/pci/pci-driver.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 107d77f3c8467..a2ceeacc33eb6 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -646,7 +646,7 @@ static int pci_legacy_suspend(struct device *dev, pm_message_t state)
 	return 0;
 }
 
-static int pci_legacy_suspend_late(struct device *dev, pm_message_t state)
+static int pci_legacy_suspend_late(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 
@@ -848,7 +848,7 @@ static int pci_pm_suspend_noirq(struct device *dev)
 		return 0;
 
 	if (pci_has_legacy_pm_support(pci_dev))
-		return pci_legacy_suspend_late(dev, PMSG_SUSPEND);
+		return pci_legacy_suspend_late(dev);
 
 	if (!pm) {
 		pci_save_state(pci_dev);
@@ -1060,7 +1060,7 @@ static int pci_pm_freeze_noirq(struct device *dev)
 	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
 	if (pci_has_legacy_pm_support(pci_dev))
-		return pci_legacy_suspend_late(dev, PMSG_FREEZE);
+		return pci_legacy_suspend_late(dev);
 
 	if (pm && pm->freeze_noirq) {
 		int error;
@@ -1179,7 +1179,7 @@ static int pci_pm_poweroff_noirq(struct device *dev)
 		return 0;
 
 	if (pci_has_legacy_pm_support(pci_dev))
-		return pci_legacy_suspend_late(dev, PMSG_HIBERNATE);
+		return pci_legacy_suspend_late(dev);
 
 	if (!pm) {
 		pci_fixup_device(pci_fixup_suspend_late, pci_dev);
-- 
GitLab


From b9b8782f8966a7f219ec2e2db3ffe5eeb23943ab Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:16 +0300
Subject: [PATCH 0471/4122] scsi: target: core: Add support for RSOC command

Add support for REPORT SUPPORTED OPERATION CODES command according to SPC4.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-2-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_spc.c  | 200 ++++++++++++++++++++++++++++++
 include/scsi/scsi_proto.h         |   7 ++
 include/target/target_core_base.h |  12 ++
 3 files changed, 219 insertions(+)

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 7cca3b15472b3..afd5ea0344f36 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1314,6 +1314,202 @@ spc_emulate_testunitready(struct se_cmd *cmd)
 	return 0;
 }
 
+
+static struct target_opcode_descriptor *tcm_supported_opcodes[] = {
+};
+
+static int
+spc_rsoc_encode_command_timeouts_descriptor(unsigned char *buf, u8 ctdp,
+				struct target_opcode_descriptor *descr)
+{
+	if (!ctdp)
+		return 0;
+
+	put_unaligned_be16(0xa, buf);
+	buf[3] = descr->specific_timeout;
+	put_unaligned_be32(descr->nominal_timeout, &buf[4]);
+	put_unaligned_be32(descr->recommended_timeout, &buf[8]);
+
+	return 12;
+}
+
+static int
+spc_rsoc_encode_command_descriptor(unsigned char *buf, u8 ctdp,
+				   struct target_opcode_descriptor *descr)
+{
+	int td_size = 0;
+
+	buf[0] = descr->opcode;
+
+	put_unaligned_be16(descr->service_action, &buf[2]);
+
+	buf[5] = (ctdp << 1) | descr->serv_action_valid;
+	put_unaligned_be16(descr->cdb_size, &buf[6]);
+
+	td_size = spc_rsoc_encode_command_timeouts_descriptor(&buf[8], ctdp,
+							      descr);
+
+	return 8 + td_size;
+}
+
+static int
+spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp,
+				       struct target_opcode_descriptor *descr)
+{
+	int td_size = 0;
+
+	if (!descr) {
+		buf[1] = (ctdp << 7) | SCSI_SUPPORT_NOT_SUPPORTED;
+		return 2;
+	}
+
+	buf[1] = (ctdp << 7) | SCSI_SUPPORT_FULL;
+	put_unaligned_be16(descr->cdb_size, &buf[2]);
+	memcpy(&buf[4], descr->usage_bits, descr->cdb_size);
+
+	td_size = spc_rsoc_encode_command_timeouts_descriptor(
+			&buf[4 + descr->cdb_size], ctdp, descr);
+
+	return 4 + descr->cdb_size + td_size;
+}
+
+static sense_reason_t
+spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode)
+{
+	struct target_opcode_descriptor *descr;
+	struct se_session *sess = cmd->se_sess;
+	unsigned char *cdb = cmd->t_task_cdb;
+	u8 opts = cdb[2] & 0x3;
+	u8 requested_opcode;
+	u16 requested_sa;
+	int i;
+
+	requested_opcode = cdb[3];
+	requested_sa = ((u16)cdb[4]) << 8 | cdb[5];
+	*opcode = NULL;
+
+	if (opts > 3) {
+		pr_debug("TARGET_CORE[%s]: Invalid REPORT SUPPORTED OPERATION CODES"
+			" with unsupported REPORTING OPTIONS %#x for 0x%08llx from %s\n",
+			cmd->se_tfo->fabric_name, opts,
+			cmd->se_lun->unpacked_lun,
+			sess->se_node_acl->initiatorname);
+		return TCM_INVALID_CDB_FIELD;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) {
+		descr = tcm_supported_opcodes[i];
+		if (descr->opcode != requested_opcode)
+			continue;
+
+		switch (opts) {
+		case 0x1:
+			/*
+			 * If the REQUESTED OPERATION CODE field specifies an
+			 * operation code for which the device server implements
+			 * service actions, then the device server shall
+			 * terminate the command with CHECK CONDITION status,
+			 * with the sense key set to ILLEGAL REQUEST, and the
+			 * additional sense code set to INVALID FIELD IN CDB
+			 */
+			if (descr->serv_action_valid)
+				return TCM_INVALID_CDB_FIELD;
+			*opcode = descr;
+			break;
+		case 0x2:
+			/*
+			 * If the REQUESTED OPERATION CODE field specifies an
+			 * operation code for which the device server does not
+			 * implement service actions, then the device server
+			 * shall terminate the command with CHECK CONDITION
+			 * status, with the sense key set to ILLEGAL REQUEST,
+			 * and the additional sense code set to INVALID FIELD IN CDB.
+			 */
+			if (descr->serv_action_valid &&
+			    descr->service_action == requested_sa)
+				*opcode = descr;
+			else if (!descr->serv_action_valid)
+				return TCM_INVALID_CDB_FIELD;
+			break;
+		case 0x3:
+			/*
+			 * The command support data for the operation code and
+			 * service action a specified in the REQUESTED OPERATION
+			 * CODE field and REQUESTED SERVICE ACTION field shall
+			 * be returned in the one_command parameter data format.
+			 */
+			if (descr->service_action == requested_sa)
+				*opcode = descr;
+			break;
+		}
+	}
+	return 0;
+}
+
+static sense_reason_t
+spc_emulate_report_supp_op_codes(struct se_cmd *cmd)
+{
+	int descr_num = ARRAY_SIZE(tcm_supported_opcodes);
+	struct target_opcode_descriptor *descr = NULL;
+	unsigned char *cdb = cmd->t_task_cdb;
+	u8 rctd = (cdb[2] >> 7) & 0x1;
+	unsigned char *buf = NULL;
+	int response_length = 0;
+	u8 opts = cdb[2] & 0x3;
+	unsigned char *rbuf;
+	sense_reason_t ret = 0;
+	int i;
+
+	rbuf = transport_kmap_data_sg(cmd);
+	if (cmd->data_length && !rbuf) {
+		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		goto out;
+	}
+
+	if (opts == 0)
+		response_length = 4 + (8 + rctd * 12) * descr_num;
+	else {
+		ret = spc_rsoc_get_descr(cmd, &descr);
+		if (ret)
+			goto out;
+
+		if (descr)
+			response_length = 4 + descr->cdb_size + rctd * 12;
+		else
+			response_length = 2;
+	}
+
+	buf = kzalloc(response_length, GFP_KERNEL);
+	if (!buf) {
+		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+		goto out;
+	}
+	response_length = 0;
+
+	if (opts == 0) {
+		response_length += 4;
+
+		for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) {
+			descr = tcm_supported_opcodes[i];
+			response_length += spc_rsoc_encode_command_descriptor(
+					&buf[response_length], rctd, descr);
+		}
+		put_unaligned_be32(response_length - 3, buf);
+	} else {
+		response_length = spc_rsoc_encode_one_command_descriptor(
+				&buf[response_length], rctd, descr);
+	}
+
+	memcpy(rbuf, buf, min_t(u32, response_length, cmd->data_length));
+out:
+	kfree(buf);
+	transport_kunmap_data_sg(cmd);
+
+	if (!ret)
+		target_complete_cmd_with_length(cmd, SAM_STAT_GOOD, response_length);
+	return ret;
+}
+
 sense_reason_t
 spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 {
@@ -1439,6 +1635,10 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
 				cmd->execute_cmd =
 					target_emulate_report_target_port_groups;
 			}
+			if ((cdb[1] & 0x1f) ==
+			    MI_REPORT_SUPPORTED_OPERATION_CODES)
+				cmd->execute_cmd =
+					spc_emulate_report_supp_op_codes;
 			*size = get_unaligned_be32(&cdb[6]);
 		} else {
 			/*
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index c03e35fc382cd..651b5183451c1 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -342,4 +342,11 @@ enum scsi_version_descriptor {
 	SCSI_VERSION_DESCRIPTOR_SRP	= 0x0940
 };
 
+enum scsi_support_opcode {
+	SCSI_SUPPORT_NO_INFO		= 0,
+	SCSI_SUPPORT_NOT_SUPPORTED	= 1,
+	SCSI_SUPPORT_FULL		= 3,
+	SCSI_SUPPORT_VENDOR		= 5,
+};
+
 #endif /* _SCSI_PROTO_H_ */
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 8c920456edd93..02a2d48d20b69 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -867,6 +867,18 @@ struct se_device {
 	struct se_device_queue	*queues;
 };
 
+struct target_opcode_descriptor {
+	u8			support:3;
+	u8			serv_action_valid:1;
+	u8			opcode;
+	u16			service_action;
+	u32			cdb_size;
+	u8			specific_timeout;
+	u16			nominal_timeout;
+	u16			recommended_timeout;
+	u8			usage_bits[];
+};
+
 struct se_hba {
 	u16			hba_tpgt;
 	u32			hba_id;
-- 
GitLab


From 0016e820716ff863a76e960cb91bd72373ac2e74 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:17 +0300
Subject: [PATCH 0472/4122] scsi: target: core: Add list of opcodes for RSOC

Fill the strucures for supported opcodes and usage bits that are reported
in REPORT SUPPORTED OPERATION CODES command response.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-3-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_spc.c | 568 +++++++++++++++++++++++++++++++
 include/scsi/scsi_proto.h        |   3 +
 2 files changed, 571 insertions(+)

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index afd5ea0344f36..31cd6f31f6b1a 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1314,8 +1314,576 @@ spc_emulate_testunitready(struct se_cmd *cmd)
 	return 0;
 }
 
+static struct target_opcode_descriptor tcm_opcode_read6 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = READ_6,
+	.cdb_size = 6,
+	.usage_bits = {READ_6, 0x1f, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = READ_10,
+	.cdb_size = 10,
+	.usage_bits = {READ_10, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read12 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = READ_12,
+	.cdb_size = 12,
+	.usage_bits = {READ_12, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = READ_16,
+	.cdb_size = 16,
+	.usage_bits = {READ_16, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write6 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_6,
+	.cdb_size = 6,
+	.usage_bits = {WRITE_6, 0x1f, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_10,
+	.cdb_size = 10,
+	.usage_bits = {WRITE_10, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write_verify10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_VERIFY,
+	.cdb_size = 10,
+	.usage_bits = {WRITE_VERIFY, 0xf0, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write12 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_12,
+	.cdb_size = 12,
+	.usage_bits = {WRITE_12, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_16,
+	.cdb_size = 16,
+	.usage_bits = {WRITE_16, 0xf8, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write_verify16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_VERIFY_16,
+	.cdb_size = 16,
+	.usage_bits = {WRITE_VERIFY_16, 0xf0, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write_same32 = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = VARIABLE_LENGTH_CMD,
+	.service_action = WRITE_SAME_32,
+	.cdb_size = 32,
+	.usage_bits = {VARIABLE_LENGTH_CMD, SCSI_CONTROL_MASK, 0x00, 0x00,
+		       0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0x18,
+		       0x00, WRITE_SAME_32, 0xe8, 0x00,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0x00, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0x00,
+		       0xff, 0xff, 0xff, 0xff},
+};
+
+static struct target_opcode_descriptor tcm_opcode_compare_write = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = COMPARE_AND_WRITE,
+	.cdb_size = 16,
+	.usage_bits = {COMPARE_AND_WRITE, 0x18, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0x00, 0x00,
+		       0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read_capacity = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = READ_CAPACITY,
+	.cdb_size = 10,
+	.usage_bits = {READ_CAPACITY, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, 0x00,
+		       0x01, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read_capacity16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = SERVICE_ACTION_IN_16,
+	.service_action = SAI_READ_CAPACITY_16,
+	.cdb_size = 16,
+	.usage_bits = {SERVICE_ACTION_IN_16, SAI_READ_CAPACITY_16, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_read_report_refferals = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = SERVICE_ACTION_IN_16,
+	.service_action = SAI_REPORT_REFERRALS,
+	.cdb_size = 16,
+	.usage_bits = {SERVICE_ACTION_IN_16, SAI_REPORT_REFERRALS, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_sync_cache = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = SYNCHRONIZE_CACHE,
+	.cdb_size = 10,
+	.usage_bits = {SYNCHRONIZE_CACHE, 0x02, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_sync_cache16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = SYNCHRONIZE_CACHE_16,
+	.cdb_size = 16,
+	.usage_bits = {SYNCHRONIZE_CACHE_16, 0x02, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_unmap = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = UNMAP,
+	.cdb_size = 10,
+	.usage_bits = {UNMAP, 0x00, 0x00, 0x00,
+		       0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write_same = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_SAME,
+	.cdb_size = 10,
+	.usage_bits = {WRITE_SAME, 0xe8, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_write_same16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = WRITE_SAME_16,
+	.cdb_size = 16,
+	.usage_bits = {WRITE_SAME_16, 0xe8, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_verify = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = VERIFY,
+	.cdb_size = 10,
+	.usage_bits = {VERIFY, 0x00, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_verify16 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = VERIFY_16,
+	.cdb_size = 16,
+	.usage_bits = {VERIFY_16, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_start_stop = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = START_STOP,
+	.cdb_size = 6,
+	.usage_bits = {START_STOP, 0x01, 0x00, 0x00,
+		       0x01, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_mode_select = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = MODE_SELECT,
+	.cdb_size = 6,
+	.usage_bits = {MODE_SELECT, 0x10, 0x00, 0x00,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_mode_select10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = MODE_SELECT_10,
+	.cdb_size = 10,
+	.usage_bits = {MODE_SELECT_10, 0x10, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_mode_sense = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = MODE_SENSE,
+	.cdb_size = 6,
+	.usage_bits = {MODE_SENSE, 0x08, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_mode_sense10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = MODE_SENSE_10,
+	.cdb_size = 10,
+	.usage_bits = {MODE_SENSE_10, 0x18, 0xff, 0xff,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pri_read_keys = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_IN,
+	.service_action = PRI_READ_KEYS,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_KEYS, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pri_read_resrv = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_IN,
+	.service_action = PRI_READ_RESERVATION,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_RESERVATION, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pri_read_caps = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_IN,
+	.service_action = PRI_REPORT_CAPABILITIES,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_REPORT_CAPABILITIES, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_IN,
+	.service_action = PRI_READ_FULL_STATUS,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_FULL_STATUS, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_register = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_REGISTER,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_reserve = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_RESERVE,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RESERVE, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_release = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_RELEASE,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RELEASE, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_clear = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_CLEAR,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_CLEAR, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_preempt = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_PREEMPT,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_PREEMPT_AND_ABORT,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT_AND_ABORT, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_REGISTER_AND_IGNORE_EXISTING_KEY,
+	.cdb_size = 10,
+	.usage_bits = {
+		PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_IGNORE_EXISTING_KEY,
+		0xff, 0x00,
+		0x00, 0xff, 0xff, 0xff,
+		0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_pro_register_move = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = PERSISTENT_RESERVE_OUT,
+	.service_action = PRO_REGISTER_AND_MOVE,
+	.cdb_size = 10,
+	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_MOVE, 0xff, 0x00,
+		       0x00, 0xff, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_release = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = RELEASE,
+	.cdb_size = 6,
+	.usage_bits = {RELEASE, 0x00, 0x00, 0x00,
+		       0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_release10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = RELEASE_10,
+	.cdb_size = 10,
+	.usage_bits = {RELEASE_10, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_reserve = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = RESERVE,
+	.cdb_size = 6,
+	.usage_bits = {RESERVE, 0x00, 0x00, 0x00,
+		       0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_reserve10 = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = RESERVE_10,
+	.cdb_size = 10,
+	.usage_bits = {RESERVE_10, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_request_sense = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = REQUEST_SENSE,
+	.cdb_size = 6,
+	.usage_bits = {REQUEST_SENSE, 0x00, 0x00, 0x00,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_inquiry = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = INQUIRY,
+	.cdb_size = 6,
+	.usage_bits = {INQUIRY, 0x01, 0xff, 0xff,
+		       0xff, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = EXTENDED_COPY,
+	.cdb_size = 16,
+	.usage_bits = {EXTENDED_COPY, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = RECEIVE_COPY_RESULTS,
+	.service_action = RCR_SA_OPERATING_PARAMETERS,
+	.cdb_size = 16,
+	.usage_bits = {RECEIVE_COPY_RESULTS, RCR_SA_OPERATING_PARAMETERS,
+		       0x00, 0x00,
+		       0x00, 0x00, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_report_luns = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = REPORT_LUNS,
+	.cdb_size = 12,
+	.usage_bits = {REPORT_LUNS, 0x00, 0xff, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_test_unit_ready = {
+	.support = SCSI_SUPPORT_FULL,
+	.opcode = TEST_UNIT_READY,
+	.cdb_size = 6,
+	.usage_bits = {TEST_UNIT_READY, 0x00, 0x00, 0x00,
+		       0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_report_target_pgs = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = MAINTENANCE_IN,
+	.service_action = MI_REPORT_TARGET_PGS,
+	.cdb_size = 12,
+	.usage_bits = {MAINTENANCE_IN, 0xE0 | MI_REPORT_TARGET_PGS, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = MAINTENANCE_IN,
+	.service_action = MI_REPORT_SUPPORTED_OPERATION_CODES,
+	.cdb_size = 12,
+	.usage_bits = {MAINTENANCE_IN, MI_REPORT_SUPPORTED_OPERATION_CODES,
+		       0x87, 0xff,
+		       0xff, 0xff, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
+
+static struct target_opcode_descriptor tcm_opcode_set_tpg = {
+	.support = SCSI_SUPPORT_FULL,
+	.serv_action_valid = 1,
+	.opcode = MAINTENANCE_OUT,
+	.service_action = MO_SET_TARGET_PGS,
+	.cdb_size = 12,
+	.usage_bits = {MAINTENANCE_OUT, MO_SET_TARGET_PGS, 0x00, 0x00,
+		       0x00, 0x00, 0xff, 0xff,
+		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+};
 
 static struct target_opcode_descriptor *tcm_supported_opcodes[] = {
+	&tcm_opcode_read6,
+	&tcm_opcode_read10,
+	&tcm_opcode_read12,
+	&tcm_opcode_read16,
+	&tcm_opcode_write6,
+	&tcm_opcode_write10,
+	&tcm_opcode_write_verify10,
+	&tcm_opcode_write12,
+	&tcm_opcode_write16,
+	&tcm_opcode_write_verify16,
+	&tcm_opcode_write_same32,
+	&tcm_opcode_compare_write,
+	&tcm_opcode_read_capacity,
+	&tcm_opcode_read_capacity16,
+	&tcm_opcode_read_report_refferals,
+	&tcm_opcode_sync_cache,
+	&tcm_opcode_sync_cache16,
+	&tcm_opcode_unmap,
+	&tcm_opcode_write_same,
+	&tcm_opcode_write_same16,
+	&tcm_opcode_verify,
+	&tcm_opcode_verify16,
+	&tcm_opcode_start_stop,
+	&tcm_opcode_mode_select,
+	&tcm_opcode_mode_select10,
+	&tcm_opcode_mode_sense,
+	&tcm_opcode_mode_sense10,
+	&tcm_opcode_pri_read_keys,
+	&tcm_opcode_pri_read_resrv,
+	&tcm_opcode_pri_read_caps,
+	&tcm_opcode_pri_read_full_status,
+	&tcm_opcode_pro_register,
+	&tcm_opcode_pro_reserve,
+	&tcm_opcode_pro_release,
+	&tcm_opcode_pro_clear,
+	&tcm_opcode_pro_preempt,
+	&tcm_opcode_pro_preempt_abort,
+	&tcm_opcode_pro_reg_ign_exist,
+	&tcm_opcode_pro_register_move,
+	&tcm_opcode_release,
+	&tcm_opcode_release10,
+	&tcm_opcode_reserve,
+	&tcm_opcode_reserve10,
+	&tcm_opcode_request_sense,
+	&tcm_opcode_inquiry,
+	&tcm_opcode_extended_copy_lid1,
+	&tcm_opcode_rcv_copy_res_op_params,
+	&tcm_opcode_report_luns,
+	&tcm_opcode_test_unit_ready,
+	&tcm_opcode_report_target_pgs,
+	&tcm_opcode_report_supp_opcodes,
+	&tcm_opcode_set_tpg,
 };
 
 static int
diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h
index 651b5183451c1..cb722225b3bcd 100644
--- a/include/scsi/scsi_proto.h
+++ b/include/scsi/scsi_proto.h
@@ -349,4 +349,7 @@ enum scsi_support_opcode {
 	SCSI_SUPPORT_VENDOR		= 5,
 };
 
+#define SCSI_CONTROL_MASK 0
+#define SCSI_GROUP_NUMBER_MASK 0
+
 #endif /* _SCSI_PROTO_H_ */
-- 
GitLab


From 553b08d9b3a78aa602f818c0c94705774f018df0 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:18 +0300
Subject: [PATCH 0473/4122] scsi: target: core: Dynamic opcode support in RSOC

Report supported opcodes depending on a dynamic device configuration.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-4-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_spc.c  | 120 ++++++++++++++++++++++++++++--
 include/target/target_core_base.h |   1 +
 2 files changed, 116 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 31cd6f31f6b1a..e1cf9c352fd3f 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1405,6 +1405,15 @@ static struct target_opcode_descriptor tcm_opcode_write_verify16 = {
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_ws_enabled(struct se_cmd *cmd)
+{
+	struct sbc_ops *ops = cmd->protocol_data;
+	struct se_device *dev = cmd->se_dev;
+
+	return (dev->dev_attrib.emulate_tpws && !!ops->execute_unmap) ||
+	       !!ops->execute_write_same;
+}
+
 static struct target_opcode_descriptor tcm_opcode_write_same32 = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1419,8 +1428,16 @@ static struct target_opcode_descriptor tcm_opcode_write_same32 = {
 		       0x00, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0x00, 0x00,
 		       0xff, 0xff, 0xff, 0xff},
+	.enabled = tcm_is_ws_enabled,
 };
 
+static bool tcm_is_caw_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	return dev->dev_attrib.emulate_caw;
+}
+
 static struct target_opcode_descriptor tcm_opcode_compare_write = {
 	.support = SCSI_SUPPORT_FULL,
 	.opcode = COMPARE_AND_WRITE,
@@ -1429,6 +1446,7 @@ static struct target_opcode_descriptor tcm_opcode_compare_write = {
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0x00, 0x00,
 		       0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_caw_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_read_capacity = {
@@ -1452,6 +1470,20 @@ static struct target_opcode_descriptor tcm_opcode_read_capacity16 = {
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_rep_ref_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	spin_lock(&dev->t10_alua.lba_map_lock);
+	if (list_empty(&dev->t10_alua.lba_map_list)) {
+		spin_unlock(&dev->t10_alua.lba_map_lock);
+		return false;
+	}
+	spin_unlock(&dev->t10_alua.lba_map_lock);
+	return true;
+
+}
+
 static struct target_opcode_descriptor tcm_opcode_read_report_refferals = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1462,6 +1494,7 @@ static struct target_opcode_descriptor tcm_opcode_read_report_refferals = {
 		       0x00, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0xff, 0xff,
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_rep_ref_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_sync_cache = {
@@ -1483,6 +1516,14 @@ static struct target_opcode_descriptor tcm_opcode_sync_cache16 = {
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_unmap_enabled(struct se_cmd *cmd)
+{
+	struct sbc_ops *ops = cmd->protocol_data;
+	struct se_device *dev = cmd->se_dev;
+
+	return ops->execute_unmap && dev->dev_attrib.emulate_tpu;
+}
+
 static struct target_opcode_descriptor tcm_opcode_unmap = {
 	.support = SCSI_SUPPORT_FULL,
 	.opcode = UNMAP,
@@ -1490,6 +1531,7 @@ static struct target_opcode_descriptor tcm_opcode_unmap = {
 	.usage_bits = {UNMAP, 0x00, 0x00, 0x00,
 		       0x00, 0x00, SCSI_GROUP_NUMBER_MASK, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_unmap_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write_same = {
@@ -1499,6 +1541,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same = {
 	.usage_bits = {WRITE_SAME, 0xe8, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_ws_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write_same16 = {
@@ -1509,6 +1552,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same16 = {
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_ws_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_verify = {
@@ -1594,6 +1638,13 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_resrv = {
 		       0xff, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_pr_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	return dev->dev_attrib.emulate_pr;
+}
+
 static struct target_opcode_descriptor tcm_opcode_pri_read_caps = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1603,6 +1654,7 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_caps = {
 	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_REPORT_CAPABILITIES, 0x00, 0x00,
 		       0x00, 0x00, 0x00, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = {
@@ -1614,6 +1666,7 @@ static struct target_opcode_descriptor tcm_opcode_pri_read_full_status = {
 	.usage_bits = {PERSISTENT_RESERVE_IN, PRI_READ_FULL_STATUS, 0x00, 0x00,
 		       0x00, 0x00, 0x00, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_register = {
@@ -1625,6 +1678,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_register = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_reserve = {
@@ -1636,6 +1690,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_reserve = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RESERVE, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_release = {
@@ -1647,6 +1702,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_release = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_RELEASE, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_clear = {
@@ -1658,6 +1714,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_clear = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_CLEAR, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_preempt = {
@@ -1669,6 +1726,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_preempt = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = {
@@ -1680,6 +1738,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_preempt_abort = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_PREEMPT_AND_ABORT, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = {
@@ -1693,6 +1752,7 @@ static struct target_opcode_descriptor tcm_opcode_pro_reg_ign_exist = {
 		0xff, 0x00,
 		0x00, 0xff, 0xff, 0xff,
 		0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_pro_register_move = {
@@ -1704,14 +1764,23 @@ static struct target_opcode_descriptor tcm_opcode_pro_register_move = {
 	.usage_bits = {PERSISTENT_RESERVE_OUT, PRO_REGISTER_AND_MOVE, 0xff, 0x00,
 		       0x00, 0xff, 0xff, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_pr_enabled,
 };
 
+static bool tcm_is_scsi2_reservations_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	return dev->dev_attrib.emulate_pr;
+}
+
 static struct target_opcode_descriptor tcm_opcode_release = {
 	.support = SCSI_SUPPORT_FULL,
 	.opcode = RELEASE,
 	.cdb_size = 6,
 	.usage_bits = {RELEASE, 0x00, 0x00, 0x00,
 		       0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_scsi2_reservations_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_release10 = {
@@ -1721,6 +1790,7 @@ static struct target_opcode_descriptor tcm_opcode_release10 = {
 	.usage_bits = {RELEASE_10, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0x00, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_scsi2_reservations_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_reserve = {
@@ -1729,6 +1799,7 @@ static struct target_opcode_descriptor tcm_opcode_reserve = {
 	.cdb_size = 6,
 	.usage_bits = {RESERVE, 0x00, 0x00, 0x00,
 		       0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_scsi2_reservations_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_reserve10 = {
@@ -1738,6 +1809,7 @@ static struct target_opcode_descriptor tcm_opcode_reserve10 = {
 	.usage_bits = {RESERVE_10, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0x00, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_scsi2_reservations_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_request_sense = {
@@ -1756,6 +1828,13 @@ static struct target_opcode_descriptor tcm_opcode_inquiry = {
 		       0xff, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_3pc_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	return dev->dev_attrib.emulate_3pc;
+}
+
 static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1765,6 +1844,7 @@ static struct target_opcode_descriptor tcm_opcode_extended_copy_lid1 = {
 		       0x00, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0xff, 0xff,
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_3pc_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = {
@@ -1778,6 +1858,7 @@ static struct target_opcode_descriptor tcm_opcode_rcv_copy_res_op_params = {
 		       0x00, 0x00, 0x00, 0x00,
 		       0x00, 0x00, 0xff, 0xff,
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_3pc_enabled,
 };
 
 static struct target_opcode_descriptor tcm_opcode_report_luns = {
@@ -1820,6 +1901,26 @@ static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = {
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
 };
 
+static bool tcm_is_set_tpg_enabled(struct se_cmd *cmd)
+{
+	struct t10_alua_tg_pt_gp *l_tg_pt_gp;
+	struct se_lun *l_lun = cmd->se_lun;
+
+	rcu_read_lock();
+	l_tg_pt_gp = rcu_dereference(l_lun->lun_tg_pt_gp);
+	if (!l_tg_pt_gp) {
+		rcu_read_unlock();
+		return false;
+	}
+	if (!(l_tg_pt_gp->tg_pt_gp_alua_access_type & TPGS_EXPLICIT_ALUA)) {
+		rcu_read_unlock();
+		return false;
+	}
+	rcu_read_unlock();
+
+	return true;
+}
+
 static struct target_opcode_descriptor tcm_opcode_set_tpg = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1829,6 +1930,7 @@ static struct target_opcode_descriptor tcm_opcode_set_tpg = {
 	.usage_bits = {MAINTENANCE_OUT, MO_SET_TARGET_PGS, 0x00, 0x00,
 		       0x00, 0x00, 0xff, 0xff,
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+	.enabled = tcm_is_set_tpg_enabled,
 };
 
 static struct target_opcode_descriptor *tcm_supported_opcodes[] = {
@@ -1982,7 +2084,9 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode)
 			 */
 			if (descr->serv_action_valid)
 				return TCM_INVALID_CDB_FIELD;
-			*opcode = descr;
+
+			if (!descr->enabled || descr->enabled(cmd))
+				*opcode = descr;
 			break;
 		case 0x2:
 			/*
@@ -1994,9 +2098,10 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode)
 			 * and the additional sense code set to INVALID FIELD IN CDB.
 			 */
 			if (descr->serv_action_valid &&
-			    descr->service_action == requested_sa)
-				*opcode = descr;
-			else if (!descr->serv_action_valid)
+			    descr->service_action == requested_sa) {
+				if (!descr->enabled || descr->enabled(cmd))
+					*opcode = descr;
+			} else if (!descr->serv_action_valid)
 				return TCM_INVALID_CDB_FIELD;
 			break;
 		case 0x3:
@@ -2007,10 +2112,12 @@ spc_rsoc_get_descr(struct se_cmd *cmd, struct target_opcode_descriptor **opcode)
 			 * be returned in the one_command parameter data format.
 			 */
 			if (descr->service_action == requested_sa)
-				*opcode = descr;
+				if (!descr->enabled || descr->enabled(cmd))
+					*opcode = descr;
 			break;
 		}
 	}
+
 	return 0;
 }
 
@@ -2059,6 +2166,9 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd)
 
 		for (i = 0; i < ARRAY_SIZE(tcm_supported_opcodes); i++) {
 			descr = tcm_supported_opcodes[i];
+			if (descr->enabled && !descr->enabled(cmd))
+				continue;
+
 			response_length += spc_rsoc_encode_command_descriptor(
 					&buf[response_length], rctd, descr);
 		}
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 02a2d48d20b69..7542a8de8fb5b 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -876,6 +876,7 @@ struct target_opcode_descriptor {
 	u8			specific_timeout;
 	u16			nominal_timeout;
 	u16			recommended_timeout;
+	bool			(*enabled)(struct se_cmd *cmd);
 	u8			usage_bits[];
 };
 
-- 
GitLab


From bd217b8c3a1f705f2d92d30974412fbd5f43271a Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:19 +0300
Subject: [PATCH 0474/4122] scsi: target: core: Add emulate_rsoc attribute

Allow support for RSOC to be turned off via the emulate_rsoc attibute.
This is just for testing purposes.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-5-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_configfs.c | 20 ++++++++++++++++++++
 drivers/target/target_core_device.c   |  1 +
 drivers/target/target_core_spc.c      | 12 ++++++++++++
 include/target/target_core_base.h     |  3 +++
 4 files changed, 36 insertions(+)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 416514c5c7acd..533524299ed6b 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -547,6 +547,7 @@ DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity);
 DEF_CONFIGFS_ATTRIB_SHOW(unmap_granularity_alignment);
 DEF_CONFIGFS_ATTRIB_SHOW(unmap_zeroes_data);
 DEF_CONFIGFS_ATTRIB_SHOW(max_write_same_len);
+DEF_CONFIGFS_ATTRIB_SHOW(emulate_rsoc);
 
 #define DEF_CONFIGFS_ATTRIB_STORE_U32(_name)				\
 static ssize_t _name##_store(struct config_item *item, const char *page,\
@@ -1186,6 +1187,23 @@ static ssize_t pgr_support_store(struct config_item *item,
 	return count;
 }
 
+static ssize_t emulate_rsoc_store(struct config_item *item,
+		const char *page, size_t count)
+{
+	struct se_dev_attrib *da = to_attrib(item);
+	bool flag;
+	int ret;
+
+	ret = strtobool(page, &flag);
+	if (ret < 0)
+		return ret;
+
+	da->emulate_rsoc = flag;
+	pr_debug("dev[%p]: SE Device REPORT_SUPPORTED_OPERATION_CODES_EMULATION flag: %d\n",
+			da->da_dev, flag);
+	return count;
+}
+
 CONFIGFS_ATTR(, emulate_model_alias);
 CONFIGFS_ATTR(, emulate_dpo);
 CONFIGFS_ATTR(, emulate_fua_write);
@@ -1198,6 +1216,7 @@ CONFIGFS_ATTR(, emulate_tpws);
 CONFIGFS_ATTR(, emulate_caw);
 CONFIGFS_ATTR(, emulate_3pc);
 CONFIGFS_ATTR(, emulate_pr);
+CONFIGFS_ATTR(, emulate_rsoc);
 CONFIGFS_ATTR(, pi_prot_type);
 CONFIGFS_ATTR_RO(, hw_pi_prot_type);
 CONFIGFS_ATTR(, pi_prot_format);
@@ -1261,6 +1280,7 @@ struct configfs_attribute *sbc_attrib_attrs[] = {
 	&attr_max_write_same_len,
 	&attr_alua_support,
 	&attr_pgr_support,
+	&attr_emulate_rsoc,
 	NULL,
 };
 EXPORT_SYMBOL(sbc_attrib_attrs);
diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c
index b7f16ee8aa0e5..e7d202b574055 100644
--- a/drivers/target/target_core_device.c
+++ b/drivers/target/target_core_device.c
@@ -785,6 +785,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
 	dev->dev_attrib.emulate_caw = DA_EMULATE_CAW;
 	dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC;
 	dev->dev_attrib.emulate_pr = DA_EMULATE_PR;
+	dev->dev_attrib.emulate_rsoc = DA_EMULATE_RSOC;
 	dev->dev_attrib.pi_prot_type = TARGET_DIF_TYPE0_PROT;
 	dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS;
 	dev->dev_attrib.force_pr_aptpl = DA_FORCE_PR_APTPL;
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index e1cf9c352fd3f..91f03312a5eaf 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1889,6 +1889,14 @@ static struct target_opcode_descriptor tcm_opcode_report_target_pgs = {
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
 };
 
+
+static bool spc_rsoc_enabled(struct se_cmd *cmd)
+{
+	struct se_device *dev = cmd->se_dev;
+
+	return dev->dev_attrib.emulate_rsoc;
+}
+
 static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = {
 	.support = SCSI_SUPPORT_FULL,
 	.serv_action_valid = 1,
@@ -1899,6 +1907,7 @@ static struct target_opcode_descriptor tcm_opcode_report_supp_opcodes = {
 		       0x87, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0x00, SCSI_CONTROL_MASK},
+	.enabled = spc_rsoc_enabled,
 };
 
 static bool tcm_is_set_tpg_enabled(struct se_cmd *cmd)
@@ -2135,6 +2144,9 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd)
 	sense_reason_t ret = 0;
 	int i;
 
+	if (!cmd->se_dev->dev_attrib.emulate_rsoc)
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+
 	rbuf = transport_kmap_data_sg(cmd);
 	if (cmd->data_length && !rbuf) {
 		ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 7542a8de8fb5b..062ee8b6c4333 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -91,6 +91,8 @@
 #define DA_EMULATE_ALUA				0
 /* Emulate SCSI2 RESERVE/RELEASE and Persistent Reservations by default */
 #define DA_EMULATE_PR				1
+/* Emulation for REPORT SUPPORTED OPERATION CODES */
+#define DA_EMULATE_RSOC				1
 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
 #define DA_ENFORCE_PR_ISIDS			1
 /* Force SPC-3 PR Activate Persistence across Target Power Loss */
@@ -690,6 +692,7 @@ struct se_dev_attrib {
 	bool		emulate_caw;
 	bool		emulate_3pc;
 	bool		emulate_pr;
+	bool		emulate_rsoc;
 	enum target_prot_type pi_prot_type;
 	enum target_prot_type hw_pi_prot_type;
 	bool		pi_prot_verify;
-- 
GitLab


From b8908e5e1d1de66e6905fbec7cdfbbf8ac3ecf9a Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:20 +0300
Subject: [PATCH 0475/4122] scsi: target: core: Check emulate_3pc for RECEIVE
 COPY

RECEIVE COPY RESULTS is an opcode from 3rd party copy command set and shall
be rejected if emulate_3pc attribute is off like EXTENDED COPY.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-6-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_xcopy.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index 8713cda0c2fb5..edf5222082855 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -1009,8 +1009,14 @@ sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd)
 {
 	unsigned char *cdb = &se_cmd->t_task_cdb[0];
 	int sa = (cdb[1] & 0x1f), list_id = cdb[2];
+	struct se_device *dev = se_cmd->se_dev;
 	sense_reason_t rc = TCM_NO_SENSE;
 
+	if (!dev->dev_attrib.emulate_3pc) {
+		pr_debug("Third-party copy operations explicitly disabled\n");
+		return TCM_UNSUPPORTED_SCSI_OPCODE;
+	}
+
 	pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:"
 		" 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length);
 
-- 
GitLab


From 415d82b4401150c32687e1b7cc68de621ad24663 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <d.bogdanov@yadro.com>
Date: Tue, 6 Sep 2022 13:34:21 +0300
Subject: [PATCH 0476/4122] scsi: target: core: Dynamically set DPO and FUA in
 usage_bits

libiscsi tests check the support of DPO & FUA bits in usage bits of RSOC
response.  This patch adds support for dynamic usage bits for each opcode.
Set support of DPO & FUA bits in usage_bits of RSOC response depending on
support DPOFUA in the backstore device.

Reviewed-by: Roman Bolshakov <r.bolshakov@yadro.com>
Reviewed-by: Konstantin Shelekhin <k.shelekhin@yadro.com>
Signed-off-by: Dmitry Bogdanov <d.bogdanov@yadro.com>
Link: https://lore.kernel.org/r/20220906103421.22348-7-d.bogdanov@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_spc.c  | 34 +++++++++++++++++++++++++++++--
 include/target/target_core_base.h |  2 ++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 91f03312a5eaf..ffe02e1957334 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -1314,6 +1314,22 @@ spc_emulate_testunitready(struct se_cmd *cmd)
 	return 0;
 }
 
+static void set_dpofua_usage_bits(u8 *usage_bits, struct se_device *dev)
+{
+	if (!target_check_fua(dev))
+		usage_bits[1] &= ~0x18;
+	else
+		usage_bits[1] |= 0x18;
+}
+
+static void set_dpofua_usage_bits32(u8 *usage_bits, struct se_device *dev)
+{
+	if (!target_check_fua(dev))
+		usage_bits[10] &= ~0x18;
+	else
+		usage_bits[10] |= 0x18;
+}
+
 static struct target_opcode_descriptor tcm_opcode_read6 = {
 	.support = SCSI_SUPPORT_FULL,
 	.opcode = READ_6,
@@ -1329,6 +1345,7 @@ static struct target_opcode_descriptor tcm_opcode_read10 = {
 	.usage_bits = {READ_10, 0xf8, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_read12 = {
@@ -1338,6 +1355,7 @@ static struct target_opcode_descriptor tcm_opcode_read12 = {
 	.usage_bits = {READ_12, 0xf8, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_read16 = {
@@ -1348,6 +1366,7 @@ static struct target_opcode_descriptor tcm_opcode_read16 = {
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write6 = {
@@ -1365,6 +1384,7 @@ static struct target_opcode_descriptor tcm_opcode_write10 = {
 	.usage_bits = {WRITE_10, 0xf8, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write_verify10 = {
@@ -1374,6 +1394,7 @@ static struct target_opcode_descriptor tcm_opcode_write_verify10 = {
 	.usage_bits = {WRITE_VERIFY, 0xf0, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, 0xff,
 		       0xff, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write12 = {
@@ -1383,6 +1404,7 @@ static struct target_opcode_descriptor tcm_opcode_write12 = {
 	.usage_bits = {WRITE_12, 0xf8, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write16 = {
@@ -1393,6 +1415,7 @@ static struct target_opcode_descriptor tcm_opcode_write16 = {
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_write_verify16 = {
@@ -1403,6 +1426,7 @@ static struct target_opcode_descriptor tcm_opcode_write_verify16 = {
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, 0xff, 0xff,
 		       0xff, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static bool tcm_is_ws_enabled(struct se_cmd *cmd)
@@ -1429,6 +1453,7 @@ static struct target_opcode_descriptor tcm_opcode_write_same32 = {
 		       0x00, 0x00, 0x00, 0x00,
 		       0xff, 0xff, 0xff, 0xff},
 	.enabled = tcm_is_ws_enabled,
+	.update_usage_bits = set_dpofua_usage_bits32,
 };
 
 static bool tcm_is_caw_enabled(struct se_cmd *cmd)
@@ -1447,6 +1472,7 @@ static struct target_opcode_descriptor tcm_opcode_compare_write = {
 		       0xff, 0xff, 0x00, 0x00,
 		       0x00, 0xff, SCSI_GROUP_NUMBER_MASK, SCSI_CONTROL_MASK},
 	.enabled = tcm_is_caw_enabled,
+	.update_usage_bits = set_dpofua_usage_bits,
 };
 
 static struct target_opcode_descriptor tcm_opcode_read_capacity = {
@@ -2033,7 +2059,8 @@ spc_rsoc_encode_command_descriptor(unsigned char *buf, u8 ctdp,
 
 static int
 spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp,
-				       struct target_opcode_descriptor *descr)
+				       struct target_opcode_descriptor *descr,
+				       struct se_device *dev)
 {
 	int td_size = 0;
 
@@ -2045,6 +2072,8 @@ spc_rsoc_encode_one_command_descriptor(unsigned char *buf, u8 ctdp,
 	buf[1] = (ctdp << 7) | SCSI_SUPPORT_FULL;
 	put_unaligned_be16(descr->cdb_size, &buf[2]);
 	memcpy(&buf[4], descr->usage_bits, descr->cdb_size);
+	if (descr->update_usage_bits)
+		descr->update_usage_bits(&buf[4], dev);
 
 	td_size = spc_rsoc_encode_command_timeouts_descriptor(
 			&buf[4 + descr->cdb_size], ctdp, descr);
@@ -2187,7 +2216,8 @@ spc_emulate_report_supp_op_codes(struct se_cmd *cmd)
 		put_unaligned_be32(response_length - 3, buf);
 	} else {
 		response_length = spc_rsoc_encode_one_command_descriptor(
-				&buf[response_length], rctd, descr);
+				&buf[response_length], rctd, descr,
+				cmd->se_dev);
 	}
 
 	memcpy(rbuf, buf, min_t(u32, response_length, cmd->data_length));
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 062ee8b6c4333..0c1e439809852 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -880,6 +880,8 @@ struct target_opcode_descriptor {
 	u16			nominal_timeout;
 	u16			recommended_timeout;
 	bool			(*enabled)(struct se_cmd *cmd);
+	void			(*update_usage_bits)(u8 *usage_bits,
+						     struct se_device *dev);
 	u8			usage_bits[];
 };
 
-- 
GitLab


From a301d487d7bde62de43671a1642f8f5a2e2cceef Mon Sep 17 00:00:00 2001
From: Peter Wang <peter.wang@mediatek.com>
Date: Mon, 24 Oct 2022 20:06:02 +0800
Subject: [PATCH 0477/4122] scsi: ufs: core: Print events for WLUN suspend and
 resume failures

WLUN suspend and resume events are currently not handled by
ufshcd_print_evt_hist(). Add the missing events.

Signed-off-by: Peter Wang <peter.wang@mediatek.com>
Link: https://lore.kernel.org/r/20221024120602.30019-1-peter.wang@mediatek.com
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Reviewed-by: Asutosh Das <quic_asutoshd@quicinc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index b2203dd79e8c3..008fc60392fc2 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -486,6 +486,9 @@ static void ufshcd_print_evt_hist(struct ufs_hba *hba)
 	ufshcd_print_evt(hba, UFS_EVT_RESUME_ERR, "resume_fail");
 	ufshcd_print_evt(hba, UFS_EVT_SUSPEND_ERR,
 			 "suspend_fail");
+	ufshcd_print_evt(hba, UFS_EVT_WL_RES_ERR, "wlun resume_fail");
+	ufshcd_print_evt(hba, UFS_EVT_WL_SUSP_ERR,
+			 "wlun suspend_fail");
 	ufshcd_print_evt(hba, UFS_EVT_DEV_RESET, "dev_reset");
 	ufshcd_print_evt(hba, UFS_EVT_HOST_RESET, "host_reset");
 	ufshcd_print_evt(hba, UFS_EVT_ABORT, "task_abort");
-- 
GitLab


From 25ad6f63e77eeafc3a9f17c92aadd66c56599fdc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 24 Oct 2022 15:11:01 +0100
Subject: [PATCH 0478/4122] scsi: pcmcia: nsp_cs: Remove unused variable i

Variable i is just being incremented and it's never used anywhere else. The
variable and the increment are redundant so remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221024141101.2161167-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pcmcia/nsp_cs.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/scsi/pcmcia/nsp_cs.c b/drivers/scsi/pcmcia/nsp_cs.c
index 48acab03a8a00..a5a1406a2bdef 100644
--- a/drivers/scsi/pcmcia/nsp_cs.c
+++ b/drivers/scsi/pcmcia/nsp_cs.c
@@ -450,8 +450,6 @@ static int nsp_analyze_sdtr(struct scsi_cmnd *SCpnt)
 	sync_data	      *sync   = &(data->Sync[target]);
 	struct nsp_sync_table *sync_table;
 	unsigned int	       period, offset;
-	int		       i;
-
 
 	nsp_dbg(NSP_DEBUG_SYNC, "in");
 
@@ -466,7 +464,7 @@ static int nsp_analyze_sdtr(struct scsi_cmnd *SCpnt)
 		sync_table = nsp_sync_table_40M;
 	}
 
-	for ( i = 0; sync_table->max_period != 0; i++, sync_table++) {
+	for (; sync_table->max_period != 0; sync_table++) {
 		if ( period >= sync_table->min_period &&
 		     period <= sync_table->max_period	 ) {
 			break;
-- 
GitLab


From b43678ea5bbd92388339ecae47ed44955474f53b Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Wed, 26 Oct 2022 00:24:28 +0200
Subject: [PATCH 0479/4122] scsi: ufs: core: Revert "WB is only available on
 LUN #0 to #7"

Ccommit d3d9c4570285 ("scsi: ufs: Fix memory corruption by
ufshcd_read_desc_param()") has properly fixed stack overflow issue.

As a result, commit a2fca52ee640 ("scsi: ufs: WB is only available on LUN
#0 to #7") is no longer required. Revert it.

Cc: Jaegeuk Kim <jaegeuk@kernel.org>
Signed-off-by: Bean Huo <beanhuo@micron.com>
Link: https://lore.kernel.org/r/20221025222430.277768-2-beanhuo@iokpp.de
Reviewed-by: Arthur Simchaev <arthur.simchaev@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufs-sysfs.c   | 3 +--
 drivers/ufs/core/ufshcd-priv.h | 6 +-----
 drivers/ufs/core/ufshcd.c      | 2 +-
 3 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index 53aea56d1de13..eb6b278c4e790 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1234,8 +1234,7 @@ static ssize_t _pname##_show(struct device *dev,			\
 	struct scsi_device *sdev = to_scsi_device(dev);			\
 	struct ufs_hba *hba = shost_priv(sdev->host);			\
 	u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun);			\
-	if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun,		\
-				_duname##_DESC_PARAM##_puname))		\
+	if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun))		\
 		return -EINVAL;						\
 	return ufs_sysfs_read_desc_param(hba, QUERY_DESC_IDN_##_duname,	\
 		lun, _duname##_DESC_PARAM##_puname, buf, _size);	\
diff --git a/drivers/ufs/core/ufshcd-priv.h b/drivers/ufs/core/ufshcd-priv.h
index f68ca33f6ac78..a9e8e1f5afe7a 100644
--- a/drivers/ufs/core/ufshcd-priv.h
+++ b/drivers/ufs/core/ufshcd-priv.h
@@ -293,16 +293,12 @@ static inline int ufshcd_rpm_put(struct ufs_hba *hba)
  * @lun: LU number to check
  * @return: true if the lun has a matching unit descriptor, false otherwise
  */
-static inline bool ufs_is_valid_unit_desc_lun(struct ufs_dev_info *dev_info,
-		u8 lun, u8 param_offset)
+static inline bool ufs_is_valid_unit_desc_lun(struct ufs_dev_info *dev_info, u8 lun)
 {
 	if (!dev_info || !dev_info->max_lu_supported) {
 		pr_err("Max General LU supported by UFS isn't initialized\n");
 		return false;
 	}
-	/* WB is available only for the logical unit from 0 to 7 */
-	if (param_offset == UNIT_DESC_PARAM_WB_BUF_ALLOC_UNITS)
-		return lun < UFS_UPIU_MAX_WB_LUN_ID;
 	return lun == UFS_UPIU_RPMB_WLUN || (lun < dev_info->max_lu_supported);
 }
 
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 008fc60392fc2..94ae0fb253584 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -3608,7 +3608,7 @@ static inline int ufshcd_read_unit_desc_param(struct ufs_hba *hba,
 	 * Unit descriptors are only available for general purpose LUs (LUN id
 	 * from 0 to 7) and RPMB Well known LU.
 	 */
-	if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun, param_offset))
+	if (!ufs_is_valid_unit_desc_lun(&hba->dev_info, lun))
 		return -EOPNOTSUPP;
 
 	return ufshcd_read_desc_param(hba, QUERY_DESC_IDN_UNIT, lun,
-- 
GitLab


From dca899bc02231214e25cffd3014cc77018dae942 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Wed, 26 Oct 2022 00:24:29 +0200
Subject: [PATCH 0480/4122] scsi: ufs: core: Clean up ufshcd_slave_alloc()

Combine ufshcd_get_lu_power_on_wp_status() and ufshcd_set_queue_depth()
into one single ufshcd_lu_init(), so that we only need to read the LUN
descriptor once.

Signed-off-by: Bean Huo <beanhuo@micron.com>
Link: https://lore.kernel.org/r/20221025222430.277768-3-beanhuo@iokpp.de
Reviewed-by: Arthur Simchaev <arthur.simchaev@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 150 ++++++++++++++------------------------
 1 file changed, 53 insertions(+), 97 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 94ae0fb253584..ee73d70361331 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -4861,100 +4861,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
 	return err;
 }
 
-/**
- * ufshcd_set_queue_depth - set lun queue depth
- * @sdev: pointer to SCSI device
- *
- * Read bLUQueueDepth value and activate scsi tagged command
- * queueing. For WLUN, queue depth is set to 1. For best-effort
- * cases (bLUQueueDepth = 0) the queue depth is set to a maximum
- * value that host can queue.
- */
-static void ufshcd_set_queue_depth(struct scsi_device *sdev)
-{
-	int ret = 0;
-	u8 lun_qdepth;
-	struct ufs_hba *hba;
-
-	hba = shost_priv(sdev->host);
-
-	lun_qdepth = hba->nutrs;
-	ret = ufshcd_read_unit_desc_param(hba,
-					  ufshcd_scsi_to_upiu_lun(sdev->lun),
-					  UNIT_DESC_PARAM_LU_Q_DEPTH,
-					  &lun_qdepth,
-					  sizeof(lun_qdepth));
-
-	/* Some WLUN doesn't support unit descriptor */
-	if (ret == -EOPNOTSUPP)
-		lun_qdepth = 1;
-	else if (!lun_qdepth)
-		/* eventually, we can figure out the real queue depth */
-		lun_qdepth = hba->nutrs;
-	else
-		lun_qdepth = min_t(int, lun_qdepth, hba->nutrs);
-
-	dev_dbg(hba->dev, "%s: activate tcq with queue depth %d\n",
-			__func__, lun_qdepth);
-	scsi_change_queue_depth(sdev, lun_qdepth);
-}
-
-/*
- * ufshcd_get_lu_wp - returns the "b_lu_write_protect" from UNIT DESCRIPTOR
- * @hba: per-adapter instance
- * @lun: UFS device lun id
- * @b_lu_write_protect: pointer to buffer to hold the LU's write protect info
- *
- * Returns 0 in case of success and b_lu_write_protect status would be returned
- * @b_lu_write_protect parameter.
- * Returns -ENOTSUPP if reading b_lu_write_protect is not supported.
- * Returns -EINVAL in case of invalid parameters passed to this function.
- */
-static int ufshcd_get_lu_wp(struct ufs_hba *hba,
-			    u8 lun,
-			    u8 *b_lu_write_protect)
-{
-	int ret;
-
-	if (!b_lu_write_protect)
-		ret = -EINVAL;
-	/*
-	 * According to UFS device spec, RPMB LU can't be write
-	 * protected so skip reading bLUWriteProtect parameter for
-	 * it. For other W-LUs, UNIT DESCRIPTOR is not available.
-	 */
-	else if (lun >= hba->dev_info.max_lu_supported)
-		ret = -ENOTSUPP;
-	else
-		ret = ufshcd_read_unit_desc_param(hba,
-					  lun,
-					  UNIT_DESC_PARAM_LU_WR_PROTECT,
-					  b_lu_write_protect,
-					  sizeof(*b_lu_write_protect));
-	return ret;
-}
-
-/**
- * ufshcd_get_lu_power_on_wp_status - get LU's power on write protect
- * status
- * @hba: per-adapter instance
- * @sdev: pointer to SCSI device
- *
- */
-static inline void ufshcd_get_lu_power_on_wp_status(struct ufs_hba *hba,
-						    const struct scsi_device *sdev)
-{
-	if (hba->dev_info.f_power_on_wp_en &&
-	    !hba->dev_info.is_lu_power_on_wp) {
-		u8 b_lu_write_protect;
-
-		if (!ufshcd_get_lu_wp(hba, ufshcd_scsi_to_upiu_lun(sdev->lun),
-				      &b_lu_write_protect) &&
-		    (b_lu_write_protect == UFS_LU_POWER_ON_WP))
-			hba->dev_info.is_lu_power_on_wp = true;
-	}
-}
-
 /**
  * ufshcd_setup_links - associate link b/w device wlun and other luns
  * @sdev: pointer to SCSI device
@@ -4992,6 +4898,58 @@ static void ufshcd_setup_links(struct ufs_hba *hba, struct scsi_device *sdev)
 	}
 }
 
+/**
+ * ufshcd_lu_init - Initialize the relevant parameters of the LU
+ * @hba: per-adapter instance
+ * @sdev: pointer to SCSI device
+ */
+static void ufshcd_lu_init(struct ufs_hba *hba, struct scsi_device *sdev)
+{
+	int len = hba->desc_size[QUERY_DESC_IDN_UNIT];
+	u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun);
+	u8 lun_qdepth = hba->nutrs;
+	u8 *desc_buf;
+	int ret;
+
+	desc_buf = kzalloc(len, GFP_KERNEL);
+	if (!desc_buf)
+		goto set_qdepth;
+
+	ret = ufshcd_read_unit_desc_param(hba, lun, 0, desc_buf, len);
+	if (ret < 0) {
+		if (ret == -EOPNOTSUPP)
+			/* If LU doesn't support unit descriptor, its queue depth is set to 1 */
+			lun_qdepth = 1;
+		kfree(desc_buf);
+		goto set_qdepth;
+	}
+
+	if (desc_buf[UNIT_DESC_PARAM_LU_Q_DEPTH]) {
+		/*
+		 * In per-LU queueing architecture, bLUQueueDepth will not be 0, then we will
+		 * use the smaller between UFSHCI CAP.NUTRS and UFS LU bLUQueueDepth
+		 */
+		lun_qdepth = min_t(int, desc_buf[UNIT_DESC_PARAM_LU_Q_DEPTH], hba->nutrs);
+	}
+	/*
+	 * According to UFS device specification, the write protection mode is only supported by
+	 * normal LU, not supported by WLUN.
+	 */
+	if (hba->dev_info.f_power_on_wp_en && lun < hba->dev_info.max_lu_supported &&
+	    !hba->dev_info.is_lu_power_on_wp &&
+	    desc_buf[UNIT_DESC_PARAM_LU_WR_PROTECT] == UFS_LU_POWER_ON_WP)
+		hba->dev_info.is_lu_power_on_wp = true;
+
+	kfree(desc_buf);
+set_qdepth:
+	/*
+	 * For WLUNs that don't support unit descriptor, queue depth is set to 1. For LUs whose
+	 * bLUQueueDepth == 0, the queue depth is set to a maximum value that host can queue.
+	 */
+	dev_dbg(hba->dev, "Set LU %x queue depth %d\n", lun, lun_qdepth);
+	scsi_change_queue_depth(sdev, lun_qdepth);
+}
+
 /**
  * ufshcd_slave_alloc - handle initial SCSI device configurations
  * @sdev: pointer to SCSI device
@@ -5019,9 +4977,7 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
 	/* WRITE_SAME command is not supported */
 	sdev->no_write_same = 1;
 
-	ufshcd_set_queue_depth(sdev);
-
-	ufshcd_get_lu_power_on_wp_status(hba, sdev);
+	ufshcd_lu_init(hba, sdev);
 
 	ufshcd_setup_links(hba, sdev);
 
-- 
GitLab


From 9d266e792b0fb4c25448dc240a808667e0932ef2 Mon Sep 17 00:00:00 2001
From: Bean Huo <beanhuo@micron.com>
Date: Wed, 26 Oct 2022 00:24:30 +0200
Subject: [PATCH 0481/4122] scsi: ufs: core: Use is_visible to control UFS unit
 descriptor sysfs nodes

UFS Boot and Device W-LUs do not have unit descriptors and RPMB does not
support WB. Use is_visible() to control which nodes are visible and which
are not.

Signed-off-by: Bean Huo <beanhuo@micron.com>
Link: https://lore.kernel.org/r/20221025222430.277768-4-beanhuo@iokpp.de
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Arthur Simchaev <arthur.simchaev@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufs-sysfs.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/ufs/core/ufs-sysfs.c b/drivers/ufs/core/ufs-sysfs.c
index eb6b278c4e790..883f0e44b54e7 100644
--- a/drivers/ufs/core/ufs-sysfs.c
+++ b/drivers/ufs/core/ufs-sysfs.c
@@ -1285,9 +1285,27 @@ static struct attribute *ufs_sysfs_unit_descriptor[] = {
 	NULL,
 };
 
+static umode_t ufs_unit_descriptor_is_visible(struct kobject *kobj, struct attribute *attr, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+	struct scsi_device *sdev = to_scsi_device(dev);
+	u8 lun = ufshcd_scsi_to_upiu_lun(sdev->lun);
+	umode_t mode = attr->mode;
+
+	if (lun == UFS_UPIU_BOOT_WLUN || lun == UFS_UPIU_UFS_DEVICE_WLUN)
+		/* Boot and device WLUN have no unit descriptors */
+		mode = 0;
+	if (lun == UFS_UPIU_RPMB_WLUN && attr == &dev_attr_wb_buf_alloc_units.attr)
+		mode = 0;
+
+	return mode;
+}
+
+
 const struct attribute_group ufs_sysfs_unit_descriptor_group = {
 	.name = "unit_descriptor",
 	.attrs = ufs_sysfs_unit_descriptor,
+	.is_visible = ufs_unit_descriptor_is_visible,
 };
 
 static ssize_t dyn_cap_needed_attribute_show(struct device *dev,
-- 
GitLab


From 4481bdc677c1aa9b0138ee9234c8c2d14142b42e Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 26 Oct 2022 18:56:04 +0800
Subject: [PATCH 0482/4122] scsi: pm8001: Drop !task check in
 pm8001_abort_task()

In commit 0b639decf651 ("scsi: pm8001: Modify task abort handling for SATA
task"), code was introduced to dereference "task" pointer in
pm8001_abort_task(). However there was a pre-existing later check for
"!task", which spooked the kernel test robot.

Function pm8001_abort_task() should never be passed NULL for "task"
pointer, so remove that check. Also remove the "unlikely" hint, as this is
not fastpath code.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666781764-123090-1-git-send-email-john.garry@huawei.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/pm8001/pm8001_sas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/pm8001/pm8001_sas.c b/drivers/scsi/pm8001/pm8001_sas.c
index 2359e827c9e61..e5673c774f66d 100644
--- a/drivers/scsi/pm8001/pm8001_sas.c
+++ b/drivers/scsi/pm8001/pm8001_sas.c
@@ -979,7 +979,7 @@ int pm8001_abort_task(struct sas_task *task)
 	u32 phy_id, port_id;
 	struct sas_task_slow slow_task;
 
-	if (unlikely(!task || !task->lldd_task || !task->dev))
+	if (!task->lldd_task || !task->dev)
 		return TMF_RESP_FUNC_FAILED;
 
 	dev = task->dev;
-- 
GitLab


From e6629dcb00adeebcfeaee45b1c987a84eb3ce1ba Mon Sep 17 00:00:00 2001
From: John Garry <john.garry@huawei.com>
Date: Wed, 26 Oct 2022 19:33:28 +0800
Subject: [PATCH 0483/4122] scsi: MAINTAINERS: Make Xiang Chen HiSilicon SAS
 controller driver maintainer

I am soon leaving Huawei, so will no longer maintain this driver. However I
will stay active in upstream Linux storage domain.

Xiang Chen has worked on the driver for as long as I have and has good
knowledge of the driver, so should do a good job.

Signed-off-by: John Garry <john.garry@huawei.com>
Link: https://lore.kernel.org/r/1666784008-125519-1-git-send-email-john.garry@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..174de5e8d4c86 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9290,7 +9290,7 @@ F:	Documentation/devicetree/bindings/infiniband/hisilicon-hns-roce.txt
 F:	drivers/infiniband/hw/hns/
 
 HISILICON SAS Controller
-M:	John Garry <john.garry@huawei.com>
+M:	Xiang Chen <chenxiang66@hisilicon.com>
 S:	Supported
 W:	http://www.hisilicon.com
 F:	Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
-- 
GitLab


From f7ec74c14f24d78f054efd4ddbda0b4a174cf39f Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Wed, 26 Oct 2022 20:45:41 +0530
Subject: [PATCH 0484/4122] dt-bindings: gpio: pca9570: Add compatible for
 slg7xl45106

This patch adds compatible string for the SLG7XL45106,
I2C GPO expander.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml
index 1acaa0a3d35af..48bf414aa50ea 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-pca9570.yaml
@@ -12,6 +12,7 @@ maintainers:
 properties:
   compatible:
     enum:
+      - dlg,slg7xl45106
       - nxp,pca9570
       - nxp,pca9571
 
-- 
GitLab


From b8a34582c7f7f22f82852f9d3cc192e050f892fd Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Wed, 26 Oct 2022 20:45:42 +0530
Subject: [PATCH 0485/4122] gpio: pca9570: add a platform data structure

Add struct pca9570_platform_data for adding the platform data
structure. Also modify the existing structs for pca9570 and pca9571

Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pca9570.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c
index ab2a652964ec2..e8c2ddb1bcd8c 100644
--- a/drivers/gpio/gpio-pca9570.c
+++ b/drivers/gpio/gpio-pca9570.c
@@ -15,14 +15,26 @@
 #include <linux/mutex.h>
 #include <linux/property.h>
 
+/**
+ * struct pca9570_platform_data - GPIO platformdata
+ * @ngpio: no of gpios
+ * @command: Command to be sent
+ */
+struct pca9570_platform_data {
+	u16 ngpio;
+	u32 command;
+};
+
 /**
  * struct pca9570 - GPIO driver data
  * @chip: GPIO controller chip
+ * @p_data: GPIO controller platform data
  * @lock: Protects write sequences
  * @out: Buffer for device register
  */
 struct pca9570 {
 	struct gpio_chip chip;
+	const struct pca9570_platform_data *p_data;
 	struct mutex lock;
 	u8 out;
 };
@@ -106,7 +118,8 @@ static int pca9570_probe(struct i2c_client *client)
 	gpio->chip.get = pca9570_get;
 	gpio->chip.set = pca9570_set;
 	gpio->chip.base = -1;
-	gpio->chip.ngpio = (uintptr_t)device_get_match_data(&client->dev);
+	gpio->p_data = device_get_match_data(&client->dev);
+	gpio->chip.ngpio = gpio->p_data->ngpio;
 	gpio->chip.can_sleep = true;
 
 	mutex_init(&gpio->lock);
@@ -119,16 +132,24 @@ static int pca9570_probe(struct i2c_client *client)
 	return devm_gpiochip_add_data(&client->dev, &gpio->chip, gpio);
 }
 
+static const struct pca9570_platform_data pca9570_gpio = {
+	.ngpio = 4,
+};
+
+static const struct pca9570_platform_data pca9571_gpio = {
+	.ngpio = 8,
+};
+
 static const struct i2c_device_id pca9570_id_table[] = {
-	{ "pca9570", 4 },
-	{ "pca9571", 8 },
+	{ "pca9570", (kernel_ulong_t)&pca9570_gpio},
+	{ "pca9571", (kernel_ulong_t)&pca9571_gpio },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(i2c, pca9570_id_table);
 
 static const struct of_device_id pca9570_of_match_table[] = {
-	{ .compatible = "nxp,pca9570", .data = (void *)4 },
-	{ .compatible = "nxp,pca9571", .data = (void *)8 },
+	{ .compatible = "nxp,pca9570", .data = &pca9570_gpio },
+	{ .compatible = "nxp,pca9571", .data = &pca9571_gpio },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, pca9570_of_match_table);
-- 
GitLab


From fbb19fe17eaef7b6ba2e68dbf0600a97060f2909 Mon Sep 17 00:00:00 2001
From: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Date: Wed, 26 Oct 2022 20:45:43 +0530
Subject: [PATCH 0486/4122] gpio: pca9570: add slg7xl45106 support

Dialog semiconductors SLG7XL45106 is an 8-bit I2C GPO expander.
The output port is controlled by a data byte with register
address. Add a compatible string for the same. Also update
the driver to write and read from it.

Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Shubhrajyoti Datta <shubhrajyoti.datta@amd.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pca9570.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/gpio-pca9570.c b/drivers/gpio/gpio-pca9570.c
index e8c2ddb1bcd8c..6c07a8811a7a5 100644
--- a/drivers/gpio/gpio-pca9570.c
+++ b/drivers/gpio/gpio-pca9570.c
@@ -15,6 +15,8 @@
 #include <linux/mutex.h>
 #include <linux/property.h>
 
+#define SLG7XL45106_GPO_REG	0xDB
+
 /**
  * struct pca9570_platform_data - GPIO platformdata
  * @ngpio: no of gpios
@@ -44,7 +46,11 @@ static int pca9570_read(struct pca9570 *gpio, u8 *value)
 	struct i2c_client *client = to_i2c_client(gpio->chip.parent);
 	int ret;
 
-	ret = i2c_smbus_read_byte(client);
+	if (gpio->p_data->command != 0)
+		ret = i2c_smbus_read_byte_data(client, gpio->p_data->command);
+	else
+		ret = i2c_smbus_read_byte(client);
+
 	if (ret < 0)
 		return ret;
 
@@ -56,6 +62,9 @@ static int pca9570_write(struct pca9570 *gpio, u8 value)
 {
 	struct i2c_client *client = to_i2c_client(gpio->chip.parent);
 
+	if (gpio->p_data->command != 0)
+		return i2c_smbus_write_byte_data(client, gpio->p_data->command, value);
+
 	return i2c_smbus_write_byte(client, value);
 }
 
@@ -140,14 +149,21 @@ static const struct pca9570_platform_data pca9571_gpio = {
 	.ngpio = 8,
 };
 
+static const struct pca9570_platform_data slg7xl45106_gpio = {
+	.ngpio = 8,
+	.command = SLG7XL45106_GPO_REG,
+};
+
 static const struct i2c_device_id pca9570_id_table[] = {
 	{ "pca9570", (kernel_ulong_t)&pca9570_gpio},
 	{ "pca9571", (kernel_ulong_t)&pca9571_gpio },
+	{ "slg7xl45106", (kernel_ulong_t)&slg7xl45106_gpio },
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(i2c, pca9570_id_table);
 
 static const struct of_device_id pca9570_of_match_table[] = {
+	{ .compatible = "dlg,slg7xl45106", .data = &slg7xl45106_gpio},
 	{ .compatible = "nxp,pca9570", .data = &pca9570_gpio },
 	{ .compatible = "nxp,pca9571", .data = &pca9571_gpio },
 	{ /* sentinel */ }
-- 
GitLab


From ba4ff1cb6cac8acca928ea41588cf84b18ffdedb Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Tue, 25 Oct 2022 01:19:08 -0700
Subject: [PATCH 0487/4122] dt-bindings: PCI: ti,j721e-pci-host: add interrupt
 controller definition

Add missing 'interrupt-controller' property and related subnodes to resolve
the following warning:

arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dtb: pcie@2910000: Unevaluated properties are not allowed ('interrupt-controller' was unexpected)
        From schema: Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml

Link: https://lore.kernel.org/r/20221025081909.404107-2-mranostay@ti.com
Signed-off-by: Matt Ranostay <mranostay@ti.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/pci/ti,j721e-pci-host.yaml  | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
index 2115d5a3f0e14..0f5914a22c14c 100644
--- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
@@ -76,6 +76,19 @@ properties:
 
   msi-map: true
 
+  interrupt-controller:
+    type: object
+    additionalProperties: false
+
+    properties:
+      interrupt-controller: true
+
+      '#interrupt-cells':
+        const: 1
+
+      interrupts:
+        maxItems: 1
+
 required:
   - compatible
   - reg
-- 
GitLab


From 598418e6035622c0dc735764f0f1b7293c0c7d48 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Tue, 25 Oct 2022 01:19:09 -0700
Subject: [PATCH 0488/4122] dt-bindings: PCI: ti,j721e-pci-*: Add missing
 interrupt properties

Both interrupts, and interrupt names weren't defined in both EP and host
yaml. Also define the only possible interrupt-name as link_state, and
maxItems of interrupts to one.

This patch resolves the following warning:

arch/arm64/boot/dts/ti/k3-j721s2-common-proc-board.dtb: pcie-ep@2910000: Unevaluated properties are not allowed ('interrupt-names', 'interrupts' were unexpected)
        From schema Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml

Link: https://lore.kernel.org/r/20221025081909.404107-3-mranostay@ti.com
Signed-off-by: Matt Ranostay <mranostay@ti.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml | 7 +++++++
 .../devicetree/bindings/pci/ti,j721e-pci-host.yaml         | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml
index aed437dac363a..10e6eabdff534 100644
--- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-ep.yaml
@@ -58,6 +58,13 @@ properties:
   dma-coherent:
     description: Indicates that the PCIe IP block can ensure the coherency
 
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    items:
+      - const: link_state
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
index 0f5914a22c14c..d9df7cd922f1f 100644
--- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
@@ -76,6 +76,13 @@ properties:
 
   msi-map: true
 
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    items:
+      - const: link_state
+
   interrupt-controller:
     type: object
     additionalProperties: false
-- 
GitLab


From 66110361281b2f7da0c8bd51eaf1f152f4236035 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Mon, 26 Sep 2022 16:49:23 +0530
Subject: [PATCH 0489/4122] PCI: dwc: Fix n_fts[] array overrun

commit aeaa0bfe89654 ("PCI: dwc: Move N_FTS setup to common setup")
incorrectly uses pci->link_gen in deriving the index to the
n_fts[] array also introducing the issue of accessing beyond the
boundaries of array for greater than Gen-2 speeds. This change fixes
that issue.

Link: https://lore.kernel.org/r/20220926111923.22487-1-vidyas@nvidia.com
Fixes: aeaa0bfe8965 ("PCI: dwc: Move N_FTS setup to common setup")
Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Jingoo Han <jingoohan1@gmail.com>
---
 drivers/pci/controller/dwc/pcie-designware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index c6725c519a479..9e4d96e5a3f5a 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -641,7 +641,7 @@ void dw_pcie_setup(struct dw_pcie *pci)
 	if (pci->n_fts[1]) {
 		val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
 		val &= ~PORT_LOGIC_N_FTS_MASK;
-		val |= pci->n_fts[pci->link_gen - 1];
+		val |= pci->n_fts[1];
 		dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
 	}
 
-- 
GitLab


From 4508d32ccced24c972bc4592104513e1ff8439b5 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Tue, 25 Oct 2022 10:37:13 +0300
Subject: [PATCH 0490/4122] RDMA/core: Fix order of nldev_exit call

Create symmetrical exit flow by calling to nldev_exit() after
call to rdma_nl_unregister(RDMA_NL_LS).

Fixes: 6c80b41abe22 ("RDMA/netlink: Add nldev initialization flows")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/64e676774a53a406f4cde265d5a4cfd6b8e97df9.1666683334.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ae60c73babcc5..3409c55ea88bf 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2843,8 +2843,8 @@ err:
 static void __exit ib_core_cleanup(void)
 {
 	roce_gid_mgmt_cleanup();
-	nldev_exit();
 	rdma_nl_unregister(RDMA_NL_LS);
+	nldev_exit();
 	unregister_pernet_device(&rdma_dev_net_ops);
 	unregister_blocking_lsm_notifier(&ibdev_lsm_nb);
 	ib_sa_cleanup();
-- 
GitLab


From e32e1e26c4098d8a866ce09fd26d8004da4ddf9e Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Mon, 19 Sep 2022 20:03:39 +0530
Subject: [PATCH 0491/4122] PCI: Add PCI_PTM_CAP_RES macro

Add macro defining Responder capable bit in Precision Time Measurement
capability register.

Link: https://lore.kernel.org/r/20220919143340.4527-2-vidyas@nvidia.com
Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Jingoo Han <jingoohan1@gmail.com>
---
 include/uapi/linux/pci_regs.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index 57b8e2ffb1dd3..1c3591c8e09ef 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -1058,6 +1058,7 @@
 /* Precision Time Measurement */
 #define PCI_PTM_CAP			0x04	    /* PTM Capability */
 #define  PCI_PTM_CAP_REQ		0x00000001  /* Requester capable */
+#define  PCI_PTM_CAP_RES		0x00000002  /* Responder capable */
 #define  PCI_PTM_CAP_ROOT		0x00000004  /* Root capable */
 #define  PCI_PTM_GRANULARITY_MASK	0x0000FF00  /* Clock granularity */
 #define PCI_PTM_CTRL			0x08	    /* PTM Control */
-- 
GitLab


From 442ae919e6ca77354551a7b8717746b44272e274 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Mon, 19 Sep 2022 20:03:40 +0530
Subject: [PATCH 0492/4122] PCI: designware-ep: Disable PTM capabilities for EP
 mode

Dual mode DesignWare PCIe IP has PTM capability enabled (if supported) even
in the EP mode. The PCIe compliance for the EP mode expects PTM
capabilities (ROOT_CAPABLE, RES_CAPABLE, CLK_GRAN) be disabled.
Hence disable PTM for the EP mode.

Link: https://lore.kernel.org/r/20220919143340.4527-3-vidyas@nvidia.com
Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Jingoo Han <jingoohan1@gmail.com>
---
 .../pci/controller/dwc/pcie-designware-ep.c   | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 83ddb190292e4..efc6c6360e28f 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -643,7 +643,7 @@ static unsigned int dw_pcie_ep_find_ext_capability(struct dw_pcie *pci, int cap)
 int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
 {
 	struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
-	unsigned int offset;
+	unsigned int offset, ptm_cap_base;
 	unsigned int nbars;
 	u8 hdr_type;
 	u32 reg;
@@ -659,6 +659,7 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
 	}
 
 	offset = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_REBAR);
+	ptm_cap_base = dw_pcie_ep_find_ext_capability(pci, PCI_EXT_CAP_ID_PTM);
 
 	dw_pcie_dbi_ro_wr_en(pci);
 
@@ -671,6 +672,22 @@ int dw_pcie_ep_init_complete(struct dw_pcie_ep *ep)
 			dw_pcie_writel_dbi(pci, offset + PCI_REBAR_CAP, 0x0);
 	}
 
+	/*
+	 * PTM responder capability can be disabled only after disabling
+	 * PTM root capability.
+	 */
+	if (ptm_cap_base) {
+		dw_pcie_dbi_ro_wr_en(pci);
+		reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
+		reg &= ~PCI_PTM_CAP_ROOT;
+		dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
+
+		reg = dw_pcie_readl_dbi(pci, ptm_cap_base + PCI_PTM_CAP);
+		reg &= ~(PCI_PTM_CAP_RES | PCI_PTM_GRANULARITY_MASK);
+		dw_pcie_writel_dbi(pci, ptm_cap_base + PCI_PTM_CAP, reg);
+		dw_pcie_dbi_ro_wr_dis(pci);
+	}
+
 	dw_pcie_setup(pci);
 	dw_pcie_dbi_ro_wr_dis(pci);
 
-- 
GitLab


From 7711cbb4862aa00909a248f011ba3fa578bd1cf3 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Thu, 23 Jun 2022 09:38:17 +0900
Subject: [PATCH 0493/4122] PCI: endpoint: Fix WARN() when an endpoint driver
 is removed

Since there is no release callback defined for the PCI EPC device,
the below warning is thrown by driver core when a PCI endpoint driver is
removed:

  Device 'e65d0000.pcie-ep' does not have a release() function, it is broken and must be fixed. See Documentation/core-api/kobject.rst.
  WARNING: CPU: 0 PID: 139 at drivers/base/core.c:2232 device_release+0x78/0x8c

Hence, add the release callback and also move the kfree(epc) from
pci_epc_destroy() so that the epc memory is freed when all references are
dropped.

Link: https://lore.kernel.org/r/20220623003817.298173-1-yoshihiro.shimoda.uh@renesas.com
Tested-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/pci/endpoint/pci-epc-core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/endpoint/pci-epc-core.c b/drivers/pci/endpoint/pci-epc-core.c
index 3bc9273d0a082..2542196e8c3d9 100644
--- a/drivers/pci/endpoint/pci-epc-core.c
+++ b/drivers/pci/endpoint/pci-epc-core.c
@@ -724,7 +724,6 @@ void pci_epc_destroy(struct pci_epc *epc)
 {
 	pci_ep_cfs_remove_epc_group(epc->group);
 	device_unregister(&epc->dev);
-	kfree(epc);
 }
 EXPORT_SYMBOL_GPL(pci_epc_destroy);
 
@@ -746,6 +745,11 @@ void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc)
 }
 EXPORT_SYMBOL_GPL(devm_pci_epc_destroy);
 
+static void pci_epc_release(struct device *dev)
+{
+	kfree(to_pci_epc(dev));
+}
+
 /**
  * __pci_epc_create() - create a new endpoint controller (EPC) device
  * @dev: device that is creating the new EPC
@@ -779,6 +783,7 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
 	device_initialize(&epc->dev);
 	epc->dev.class = pci_epc_class;
 	epc->dev.parent = dev;
+	epc->dev.release = pci_epc_release;
 	epc->ops = ops;
 
 	ret = dev_set_name(&epc->dev, "%s", dev_name(dev));
-- 
GitLab


From 16e3f40779659ff525364e5d9df369953fa7192b Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 4 Sep 2022 23:30:53 -0700
Subject: [PATCH 0494/4122] PCI: tegra: Switch to using devm_fwnode_gpiod_get

[devm_]gpiod_get_from_of_node in drivers usage should be limited
so that gpiolib can be cleaned up; let's switch to the generic device
property API.

It may even help with handling secondary fwnodes when gpiolib is taught
to handle gpios described by swnodes.

Link: https://lore.kernel.org/r/20220903-gpiod_get_from_of_node-remove-v1-1-b29adfb27a6c@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
[lpieralisi@kernel.org: commit log]
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pci/controller/pci-tegra.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/pci-tegra.c b/drivers/pci/controller/pci-tegra.c
index 24478ae5a345d..b6f77b102709d 100644
--- a/drivers/pci/controller/pci-tegra.c
+++ b/drivers/pci/controller/pci-tegra.c
@@ -2197,10 +2197,11 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
 		 * and in this case fall back to using AFI per port register
 		 * to toggle PERST# SFIO line.
 		 */
-		rp->reset_gpio = devm_gpiod_get_from_of_node(dev, port,
-							     "reset-gpios", 0,
-							     GPIOD_OUT_LOW,
-							     label);
+		rp->reset_gpio = devm_fwnode_gpiod_get(dev,
+						       of_fwnode_handle(port),
+						       "reset",
+						       GPIOD_OUT_LOW,
+						       label);
 		if (IS_ERR(rp->reset_gpio)) {
 			if (PTR_ERR(rp->reset_gpio) == -ENOENT) {
 				rp->reset_gpio = NULL;
-- 
GitLab


From 6acd25cc98ce0c9ee4fefdaf44fc8bca534b26e5 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Thu, 25 Aug 2022 18:01:01 +0900
Subject: [PATCH 0495/4122] PCI: pci-epf-test: Register notifier if only
 core_init_notifier is enabled

The pci_epf_test_notifier function should be installed also if only
core_init_notifier is enabled. Fix the current logic.

Link: https://lore.kernel.org/r/20220825090101.20474-1-hayashi.kunihiko@socionext.com
Fixes: 5e50ee27d4a5 ("PCI: pci-epf-test: Add support to defer core initialization")
Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Om Prakash Singh <omp@nvidia.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/pci/endpoint/functions/pci-epf-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-test.c b/drivers/pci/endpoint/functions/pci-epf-test.c
index 36b1801a061b7..55283d2379a6a 100644
--- a/drivers/pci/endpoint/functions/pci-epf-test.c
+++ b/drivers/pci/endpoint/functions/pci-epf-test.c
@@ -979,7 +979,7 @@ static int pci_epf_test_bind(struct pci_epf *epf)
 	if (ret)
 		epf_test->dma_supported = false;
 
-	if (linkup_notifier) {
+	if (linkup_notifier || core_init_notifier) {
 		epf->nb.notifier_call = pci_epf_test_notifier;
 		pci_epc_register_notifier(epc, &epf->nb);
 	} else {
-- 
GitLab


From 25c2e5911705c69d7b01c35ea0b32cbe6ada67cd Mon Sep 17 00:00:00 2001
From: Nikita Shubin <n.shubin@yadro.com>
Date: Mon, 15 Aug 2022 16:22:38 +0300
Subject: [PATCH 0496/4122] perf tools riscv: Add support for get_cpuid_str
 function

The get_cpuid_str function returns the string that contains values of
MVENDORID, MARCHID and MIMPID in hex format separated by coma.

The values themselves are taken from first cpu entry in "/proc/cpuid"
that contains "mvendorid", "marchid" and "mimpid".

Signed-off-by: Nikita Shubin <n.shubin@yadro.com>
Tested-by: Kautuk Consul <kconsul@ventanamicro.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anup Patel <anup@brainfault.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-riscv@lists.infradead.org
Cc: linux@yadro.com
Link: https://lore.kernel.org/r/20220815132251.25702-2-nikita.shubin@maquefel.me
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/riscv/util/Build    |   1 +
 tools/perf/arch/riscv/util/header.c | 104 ++++++++++++++++++++++++++++
 2 files changed, 105 insertions(+)
 create mode 100644 tools/perf/arch/riscv/util/header.c

diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build
index 7d3050134ae0f..603dbb5ae4dc9 100644
--- a/tools/perf/arch/riscv/util/Build
+++ b/tools/perf/arch/riscv/util/Build
@@ -1,4 +1,5 @@
 perf-y += perf_regs.o
+perf-y += header.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c
new file mode 100644
index 0000000000000..4a41856938a88
--- /dev/null
+++ b/tools/perf/arch/riscv/util/header.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Implementation of get_cpuid().
+ *
+ * Author: Nikita Shubin <n.shubin@yadro.com>
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <api/fs/fs.h>
+#include <errno.h>
+#include "../../util/debug.h"
+#include "../../util/header.h"
+
+#define CPUINFO_MVEN	"mvendorid"
+#define CPUINFO_MARCH	"marchid"
+#define CPUINFO_MIMP	"mimpid"
+#define CPUINFO		"/proc/cpuinfo"
+
+static char *_get_field(const char *line)
+{
+	char *line2, *nl;
+
+	line2 = strrchr(line, ' ');
+	if (!line2)
+		return NULL;
+
+	line2++;
+	nl = strrchr(line, '\n');
+	if (!nl)
+		return NULL;
+
+	return strndup(line2, nl - line2);
+}
+
+static char *_get_cpuid(void)
+{
+	char *line = NULL;
+	char *mvendorid = NULL;
+	char *marchid = NULL;
+	char *mimpid = NULL;
+	char *cpuid = NULL;
+	int read;
+	unsigned long line_sz;
+	FILE *cpuinfo;
+
+	cpuinfo = fopen(CPUINFO, "r");
+	if (cpuinfo == NULL)
+		return cpuid;
+
+	while ((read = getline(&line, &line_sz, cpuinfo)) != -1) {
+		if (!strncmp(line, CPUINFO_MVEN, strlen(CPUINFO_MVEN))) {
+			mvendorid = _get_field(line);
+			if (!mvendorid)
+				goto free;
+		} else if (!strncmp(line, CPUINFO_MARCH, strlen(CPUINFO_MARCH))) {
+			marchid = _get_field(line);
+			if (!marchid)
+				goto free;
+		} else if (!strncmp(line, CPUINFO_MIMP, strlen(CPUINFO_MIMP))) {
+			mimpid = _get_field(line);
+			if (!mimpid)
+				goto free;
+
+			break;
+		}
+	}
+
+	if (!mvendorid || !marchid || !mimpid)
+		goto free;
+
+	if (asprintf(&cpuid, "%s-%s-%s", mvendorid, marchid, mimpid) < 0)
+		cpuid = NULL;
+
+free:
+	fclose(cpuinfo);
+	free(mvendorid);
+	free(marchid);
+	free(mimpid);
+
+	return cpuid;
+}
+
+int get_cpuid(char *buffer, size_t sz)
+{
+	char *cpuid = _get_cpuid();
+	int ret = 0;
+
+	if (sz < strlen(cpuid)) {
+		ret = -EINVAL;
+		goto free;
+	}
+
+	scnprintf(buffer, sz, "%s", cpuid);
+free:
+	free(cpuid);
+	return ret;
+}
+
+char *
+get_cpuid_str(struct perf_pmu *pmu __maybe_unused)
+{
+	return _get_cpuid();
+}
-- 
GitLab


From 8f0dcb4e7364af3e5b156e6a3fdac0860733eb86 Mon Sep 17 00:00:00 2001
From: Nikita Shubin <n.shubin@yadro.com>
Date: Mon, 15 Aug 2022 16:22:39 +0300
Subject: [PATCH 0497/4122] perf arch events: riscv sbi firmware std event
 files

Firmware events are defined by "RISC-V Supervisor Binary Interface
Specification", which means they should be always available as long as
firmware supports >= 0.3.0 SBI.

Expose them to arch std events, so they can be reused by particular PMU
bindings.

Signed-off-by: Nikita Shubin <n.shubin@yadro.com>
Tested-by: Kautuk Consul <kconsul@ventanamicro.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anup Patel <anup@brainfault.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-riscv@lists.infradead.org
Cc: linux@yadro.com
Link: https://lore.kernel.org/r/20220815132251.25702-3-nikita.shubin@maquefel.me
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/riscv/riscv-sbi-firmware.json        | 134 ++++++++++++++++++
 1 file changed, 134 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json

diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
new file mode 100644
index 0000000000000..a9939823b14b5
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json
@@ -0,0 +1,134 @@
+[
+  {
+    "PublicDescription": "Misaligned load trap",
+    "ConfigCode": "0x8000000000000000",
+    "EventName": "FW_MISALIGNED_LOAD",
+    "BriefDescription": "Misaligned load trap event"
+  },
+  {
+    "PublicDescription": "Misaligned store trap",
+    "ConfigCode": "0x8000000000000001",
+    "EventName": "FW_MISALIGNED_STORE",
+    "BriefDescription": "Misaligned store trap event"
+  },
+  {
+    "PublicDescription": "Load access trap",
+    "ConfigCode": "0x8000000000000002",
+    "EventName": "FW_ACCESS_LOAD",
+    "BriefDescription": "Load access trap event"
+  },
+  {
+    "PublicDescription": "Store access trap",
+    "ConfigCode": "0x8000000000000003",
+    "EventName": "FW_ACCESS_STORE",
+    "BriefDescription": "Store access trap event"
+  },
+  {
+    "PublicDescription": "Illegal instruction trap",
+    "ConfigCode": "0x8000000000000004",
+    "EventName": "FW_ILLEGAL_INSN",
+    "BriefDescription": "Illegal instruction trap event"
+  },
+  {
+    "PublicDescription": "Set timer event",
+    "ConfigCode": "0x8000000000000005",
+    "EventName": "FW_SET_TIMER",
+    "BriefDescription": "Set timer event"
+  },
+  {
+    "PublicDescription": "Sent IPI to other HART event",
+    "ConfigCode": "0x8000000000000006",
+    "EventName": "FW_IPI_SENT",
+    "BriefDescription": "Sent IPI to other HART event"
+  },
+  {
+    "PublicDescription": "Received IPI from other HART event",
+    "ConfigCode": "0x8000000000000007",
+    "EventName": "FW_IPI_RECEIVED",
+    "BriefDescription": "Received IPI from other HART event"
+  },
+  {
+    "PublicDescription": "Sent FENCE.I request to other HART event",
+    "ConfigCode": "0x8000000000000008",
+    "EventName": "FW_FENCE_I_SENT",
+    "BriefDescription": "Sent FENCE.I request to other HART event"
+  },
+  {
+    "PublicDescription": "Received FENCE.I request from other HART event",
+    "ConfigCode": "0x8000000000000009",
+    "EventName": "FW_FENCE_I_RECEIVED",
+    "BriefDescription": "Received FENCE.I request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent SFENCE.VMA request to other HART event",
+    "ConfigCode": "0x800000000000000a",
+    "EventName": "FW_SFENCE_VMA_SENT",
+    "BriefDescription": "Sent SFENCE.VMA request to other HART event"
+  },
+  {
+    "PublicDescription": "Received SFENCE.VMA request from other HART event",
+    "ConfigCode": "0x800000000000000b",
+    "EventName": "FW_SFENCE_VMA_RECEIVED",
+    "BriefDescription": "Received SFENCE.VMA request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event",
+    "ConfigCode": "0x800000000000000c",
+    "EventName": "FW_SFENCE_VMA_RECEIVED",
+    "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event"
+  },
+  {
+    "PublicDescription": "Received SFENCE.VMA with ASID request from other HART event",
+    "ConfigCode": "0x800000000000000d",
+    "EventName": "FW_SFENCE_VMA_ASID_RECEIVED",
+    "BriefDescription": "Received SFENCE.VMA with ASID request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent HFENCE.GVMA request to other HART event",
+    "ConfigCode": "0x800000000000000e",
+    "EventName": "FW_HFENCE_GVMA_SENT",
+    "BriefDescription": "Sent HFENCE.GVMA request to other HART event"
+  },
+  {
+    "PublicDescription": "Received HFENCE.GVMA request from other HART event",
+    "ConfigCode": "0x800000000000000f",
+    "EventName": "FW_HFENCE_GVMA_RECEIVED",
+    "BriefDescription": "Received HFENCE.GVMA request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent HFENCE.GVMA with VMID request to other HART event",
+    "ConfigCode": "0x8000000000000010",
+    "EventName": "FW_HFENCE_GVMA_VMID_SENT",
+    "BriefDescription": "Sent HFENCE.GVMA with VMID request to other HART event"
+  },
+  {
+    "PublicDescription": "Received HFENCE.GVMA with VMID request from other HART event",
+    "ConfigCode": "0x8000000000000011",
+    "EventName": "FW_HFENCE_GVMA_VMID_RECEIVED",
+    "BriefDescription": "Received HFENCE.GVMA with VMID request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent HFENCE.VVMA request to other HART event",
+    "ConfigCode": "0x8000000000000012",
+    "EventName": "FW_HFENCE_VVMA_SENT",
+    "BriefDescription": "Sent HFENCE.VVMA request to other HART event"
+  },
+  {
+    "PublicDescription": "Received HFENCE.VVMA request from other HART event",
+    "ConfigCode": "0x8000000000000013",
+    "EventName": "FW_HFENCE_VVMA_RECEIVED",
+    "BriefDescription": "Received HFENCE.VVMA request from other HART event"
+  },
+  {
+    "PublicDescription": "Sent HFENCE.VVMA with ASID request to other HART event",
+    "ConfigCode": "0x8000000000000014",
+    "EventName": "FW_HFENCE_VVMA_ASID_SENT",
+    "BriefDescription": "Sent HFENCE.VVMA with ASID request to other HART event"
+  },
+  {
+    "PublicDescription": "Received HFENCE.VVMA with ASID request from other HART event",
+    "ConfigCode": "0x8000000000000015",
+    "EventName": "FW_HFENCE_VVMA_ASID_RECEIVED",
+    "BriefDescription": "Received HFENCE.VVMA with ASID request from other HART event"
+  }
+]
-- 
GitLab


From c4f769d4093d114f3c374ba06d6eef1fb763b56c Mon Sep 17 00:00:00 2001
From: Nikita Shubin <n.shubin@yadro.com>
Date: Mon, 15 Aug 2022 16:22:40 +0300
Subject: [PATCH 0498/4122] perf vendor events riscv: add Sifive U74 JSON file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch add the Sifive U74 JSON file.

Link: https://sifive.cdn.prismic.io/sifive/ad5577a0-9a00-45c9-a5d0-424a3d586060_u74_core_complex_manual_21G3.pdf
Derived-from-code-by: João Mário Domingos <joao.mario@tecnico.ulisboa.pt>
Signed-off-by: Nikita Shubin <n.shubin@yadro.com>
Tested-by: Kautuk Consul <kconsul@ventanamicro.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Albert Ou <aou@eecs.berkeley.edu>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Anup Patel <anup@brainfault.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-riscv@lists.infradead.org
Cc: linux@yadro.com
Link: https://lore.kernel.org/r/20220815132251.25702-4-nikita.shubin@maquefel.me
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/arch/riscv/mapfile.csv  | 17 ++++
 .../arch/riscv/sifive/u74/firmware.json       | 68 ++++++++++++++
 .../arch/riscv/sifive/u74/instructions.json   | 92 +++++++++++++++++++
 .../arch/riscv/sifive/u74/memory.json         | 32 +++++++
 .../arch/riscv/sifive/u74/microarch.json      | 57 ++++++++++++
 5 files changed, 266 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/riscv/mapfile.csv
 create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
 create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json
 create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json
 create mode 100644 tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json

diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv
new file mode 100644
index 0000000000000..c61b3d6ef6166
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv
@@ -0,0 +1,17 @@
+# Format:
+#	MVENDORID-MARCHID-MIMPID,Version,JSON/file/pathname,Type
+#
+# where
+#	MVENDORID	JEDEC code of the core provider
+#	MARCHID		base microarchitecture of the hart
+#	MIMPID		unique encoding of the version
+#			of the processor implementation
+#	Version could be used to track version of JSON file
+#		but currently unused.
+#	JSON/file/pathname is the path to JSON file, relative
+#		to tools/perf/pmu-events/arch/riscv/.
+#	Type is core, uncore etc
+#
+#
+#MVENDORID-MARCHID-MIMPID,Version,Filename,EventType
+0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
new file mode 100644
index 0000000000000..9b4a032186a7b
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json
@@ -0,0 +1,68 @@
+[
+  {
+    "ArchStdEvent": "FW_MISALIGNED_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_MISALIGNED_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_LOAD"
+  },
+  {
+    "ArchStdEvent": "FW_ACCESS_STORE"
+  },
+  {
+    "ArchStdEvent": "FW_ILLEGAL_INSN"
+  },
+  {
+    "ArchStdEvent": "FW_SET_TIMER"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_IPI_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_FENCE_I_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT"
+  },
+  {
+    "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED"
+  }
+]
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json
new file mode 100644
index 0000000000000..5eab718c9256c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json
@@ -0,0 +1,92 @@
+[
+  {
+    "EventName": "EXCEPTION_TAKEN",
+    "EventCode": "0x0000100",
+    "BriefDescription": "Exception taken"
+  },
+  {
+    "EventName": "INTEGER_LOAD_RETIRED",
+    "EventCode": "0x0000200",
+    "BriefDescription": "Integer load instruction retired"
+  },
+  {
+    "EventName": "INTEGER_STORE_RETIRED",
+    "EventCode": "0x0000400",
+    "BriefDescription": "Integer store instruction retired"
+  },
+  {
+    "EventName": "ATOMIC_MEMORY_RETIRED",
+    "EventCode": "0x0000800",
+    "BriefDescription": "Atomic memory operation retired"
+  },
+  {
+    "EventName": "SYSTEM_INSTRUCTION_RETIRED",
+    "EventCode": "0x0001000",
+    "BriefDescription": "System instruction retired"
+  },
+  {
+    "EventName": "INTEGER_ARITHMETIC_RETIRED",
+    "EventCode": "0x0002000",
+    "BriefDescription": "Integer arithmetic instruction retired"
+  },
+  {
+    "EventName": "CONDITIONAL_BRANCH_RETIRED",
+    "EventCode": "0x0004000",
+    "BriefDescription": "Conditional branch retired"
+  },
+  {
+    "EventName": "JAL_INSTRUCTION_RETIRED",
+    "EventCode": "0x0008000",
+    "BriefDescription": "JAL instruction retired"
+  },
+  {
+    "EventName": "JALR_INSTRUCTION_RETIRED",
+    "EventCode": "0x0010000",
+    "BriefDescription": "JALR instruction retired"
+  },
+  {
+    "EventName": "INTEGER_MULTIPLICATION_RETIRED",
+    "EventCode": "0x0020000",
+    "BriefDescription": "Integer multiplication instruction retired"
+  },
+  {
+    "EventName": "INTEGER_DIVISION_RETIRED",
+    "EventCode": "0x0040000",
+    "BriefDescription": "Integer division instruction retired"
+  },
+  {
+    "EventName": "FP_LOAD_RETIRED",
+    "EventCode": "0x0080000",
+    "BriefDescription": "Floating-point load instruction retired"
+  },
+  {
+    "EventName": "FP_STORE_RETIRED",
+    "EventCode": "0x0100000",
+    "BriefDescription": "Floating-point store instruction retired"
+  },
+  {
+    "EventName": "FP_ADDITION_RETIRED",
+    "EventCode": "0x0200000",
+    "BriefDescription": "Floating-point addition retired"
+  },
+  {
+    "EventName": "FP_MULTIPLICATION_RETIRED",
+    "EventCode": "0x0400000",
+    "BriefDescription": "Floating-point multiplication retired"
+  },
+  {
+    "EventName": "FP_FUSEDMADD_RETIRED",
+    "EventCode": "0x0800000",
+    "BriefDescription": "Floating-point fused multiply-add retired"
+  },
+  {
+    "EventName": "FP_DIV_SQRT_RETIRED",
+    "EventCode": "0x1000000",
+    "BriefDescription": "Floating-point division or square-root retired"
+  },
+  {
+    "EventName": "OTHER_FP_RETIRED",
+    "EventCode": "0x2000000",
+    "BriefDescription": "Other floating-point instruction retired"
+  }
+]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json
new file mode 100644
index 0000000000000..be1a46312ac36
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json
@@ -0,0 +1,32 @@
+[
+  {
+    "EventName": "ICACHE_RETIRED",
+    "EventCode": "0x0000102",
+    "BriefDescription": "Instruction cache miss"
+  },
+  {
+    "EventName": "DCACHE_MISS_MMIO_ACCESSES",
+    "EventCode": "0x0000202",
+    "BriefDescription": "Data cache miss or memory-mapped I/O access"
+  },
+  {
+    "EventName": "DCACHE_WRITEBACK",
+    "EventCode": "0x0000402",
+    "BriefDescription": "Data cache write-back"
+  },
+  {
+    "EventName": "INST_TLB_MISS",
+    "EventCode": "0x0000802",
+    "BriefDescription": "Instruction TLB miss"
+  },
+  {
+    "EventName": "DATA_TLB_MISS",
+    "EventCode": "0x0001002",
+    "BriefDescription": "Data TLB miss"
+  },
+  {
+    "EventName": "UTLB_MISS",
+    "EventCode": "0x0002002",
+    "BriefDescription": "UTLB miss"
+  }
+]
\ No newline at end of file
diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json
new file mode 100644
index 0000000000000..50ffa55418cb8
--- /dev/null
+++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json
@@ -0,0 +1,57 @@
+[
+  {
+    "EventName": "ADDRESSGEN_INTERLOCK",
+    "EventCode": "0x0000101",
+    "BriefDescription": "Address-generation interlock"
+  },
+  {
+    "EventName": "LONGLAT_INTERLOCK",
+    "EventCode": "0x0000201",
+    "BriefDescription": "Long-latency interlock"
+  },
+  {
+    "EventName": "CSR_READ_INTERLOCK",
+    "EventCode": "0x0000401",
+    "BriefDescription": "CSR read interlock"
+  },
+  {
+    "EventName": "ICACHE_ITIM_BUSY",
+    "EventCode": "0x0000801",
+    "BriefDescription": "Instruction cache/ITIM busy"
+  },
+  {
+    "EventName": "DCACHE_DTIM_BUSY",
+    "EventCode": "0x0001001",
+    "BriefDescription": "Data cache/DTIM busy"
+  },
+  {
+    "EventName": "BRANCH_DIRECTION_MISPREDICTION",
+    "EventCode": "0x0002001",
+    "BriefDescription": "Branch direction misprediction"
+  },
+  {
+    "EventName": "BRANCH_TARGET_MISPREDICTION",
+    "EventCode": "0x0004001",
+    "BriefDescription": "Branch/jump target misprediction"
+  },
+  {
+    "EventName": "PIPE_FLUSH_CSR_WRITE",
+    "EventCode": "0x0008001",
+    "BriefDescription": "Pipeline flush from CSR write"
+  },
+  {
+    "EventName": "PIPE_FLUSH_OTHER_EVENT",
+    "EventCode": "0x0010001",
+    "BriefDescription": "Pipeline flush from other event"
+  },
+  {
+    "EventName": "INTEGER_MULTIPLICATION_INTERLOCK",
+    "EventCode": "0x0020001",
+    "BriefDescription": "Integer multiplication interlock"
+  },
+  {
+    "EventName": "FP_INTERLOCK",
+    "EventCode": "0x0040001",
+    "BriefDescription": "Floating-point interlock"
+  }
+]
\ No newline at end of file
-- 
GitLab


From f7400262ea2192fc79b6f5a68242b3495b016981 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:08 -0700
Subject: [PATCH 0499/4122] perf tools: Save evsel->pmu in parse_events()

Now evsel has a pmu pointer, let's save the info and use it like in
evsel__find_pmu().  The missing feature check needs to be changed as the
pmu pointer can be set from the beginning.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c        | 20 ++++++++++----------
 tools/perf/util/parse-events.c |  1 +
 tools/perf/util/pmu.c          |  4 ++++
 3 files changed, 15 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 76605fde35078..b7140beca9700 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -467,6 +467,7 @@ struct evsel *evsel__clone(struct evsel *orig)
 	evsel->collect_stat = orig->collect_stat;
 	evsel->weak_group = orig->weak_group;
 	evsel->use_config_name = orig->use_config_name;
+	evsel->pmu = orig->pmu;
 
 	if (evsel__copy_config_terms(evsel, orig) < 0)
 		goto out_err;
@@ -1966,17 +1967,16 @@ bool evsel__detect_missing_features(struct evsel *evsel)
 		perf_missing_features.mmap2 = true;
 		pr_debug2_peo("switching off mmap2\n");
 		return true;
-	} else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) &&
-		   (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) {
-		if (evsel->pmu == NULL) {
+	} else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) {
+		if (evsel->pmu == NULL)
 			evsel->pmu = evsel__find_pmu(evsel);
-			if (evsel->pmu)
-				evsel->pmu->missing_features.exclude_guest = true;
-			else {
-				/* we cannot find PMU, disable attrs now */
-				evsel->core.attr.exclude_host = false;
-				evsel->core.attr.exclude_guest = false;
-			}
+
+		if (evsel->pmu)
+			evsel->pmu->missing_features.exclude_guest = true;
+		else {
+			/* we cannot find PMU, disable attrs now */
+			evsel->core.attr.exclude_host = false;
+			evsel->core.attr.exclude_guest = false;
 		}
 
 		if (evsel->exclude_GH) {
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5973f46c23755..6502cd679f577 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx,
 	evsel->core.own_cpus = perf_cpu_map__get(cpus);
 	evsel->core.requires_cpu = pmu ? pmu->is_uncore : false;
 	evsel->auto_merge_stats = auto_merge_stats;
+	evsel->pmu = pmu;
 
 	if (name)
 		evsel->name = strdup(name);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 03284059175f7..6a86e6af0903c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1065,11 +1065,15 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel)
 {
 	struct perf_pmu *pmu = NULL;
 
+	if (evsel->pmu)
+		return evsel->pmu;
+
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
 		if (pmu->type == evsel->core.attr.type)
 			break;
 	}
 
+	evsel->pmu = pmu;
 	return pmu;
 }
 
-- 
GitLab


From b86ac6796b6c1dea83b744812d36922c21f43323 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:09 -0700
Subject: [PATCH 0500/4122] perf tools: Use pmu info in evsel__is_hybrid()

If evsel has pmu, it can use pmu->is_hybrid directly.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index b7140beca9700..c9fef26d07027 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3131,6 +3131,9 @@ void evsel__zero_per_pkg(struct evsel *evsel)
 
 bool evsel__is_hybrid(struct evsel *evsel)
 {
+	if (evsel->pmu)
+		return evsel->pmu->is_hybrid;
+
 	return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
 }
 
-- 
GitLab


From 93d5e700156e03e66eb1bf2158ba3b8a8b354c71 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:10 -0700
Subject: [PATCH 0501/4122] perf stat: Use evsel__is_hybrid() more

In the stat-display code, it needs to check if the current evsel is
hybrid but it uses perf_pmu__has_hybrid() which can return true for
non-hybrid event too.  I think it's better to use evsel__is_hybrid().

Also remove a NULL check for the 'config' parameter in the
hybrid_merge() since it's called after config->no_merge check.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 20 ++++----------------
 1 file changed, 4 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 5c47ee9963a7c..4113aa86772f7 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -704,7 +704,7 @@ static void uniquify_event_name(struct evsel *counter)
 			counter->name = new_name;
 		}
 	} else {
-		if (perf_pmu__has_hybrid()) {
+		if (evsel__is_hybrid(counter)) {
 			ret = asprintf(&new_name, "%s/%s/",
 				       counter->pmu_name, counter->name);
 		} else {
@@ -744,26 +744,14 @@ static void collect_all_aliases(struct perf_stat_config *config, struct evsel *c
 	}
 }
 
-static bool is_uncore(struct evsel *evsel)
-{
-	struct perf_pmu *pmu = evsel__find_pmu(evsel);
-
-	return pmu && pmu->is_uncore;
-}
-
-static bool hybrid_uniquify(struct evsel *evsel)
-{
-	return perf_pmu__has_hybrid() && !is_uncore(evsel);
-}
-
 static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
 			 bool check)
 {
-	if (hybrid_uniquify(counter)) {
+	if (evsel__is_hybrid(counter)) {
 		if (check)
-			return config && config->hybrid_merge;
+			return config->hybrid_merge;
 		else
-			return config && !config->hybrid_merge;
+			return !config->hybrid_merge;
 	}
 
 	return false;
-- 
GitLab


From 375369abcdb774abadbc1c82d2e6c24f0f1f49a1 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:11 -0700
Subject: [PATCH 0502/4122] perf stat: Add aggr id for global mode

To make the code simpler, I'd like to use the same aggregation code for
the global mode.  We can simply add an id function to return cpu 0 and
use print_aggr().

No functional change intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 36 ++++++++++++++++++++++++++++++++++--
 tools/perf/util/cpumap.c  | 10 ++++++++++
 tools/perf/util/cpumap.h  |  6 +++++-
 3 files changed, 49 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 265b051579726..75d16e9705a4e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1330,6 +1330,12 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __
 	return aggr_cpu_id__node(cpu, /*data=*/NULL);
 }
 
+static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused,
+						struct perf_cpu cpu)
+{
+	return aggr_cpu_id__global(cpu, /*data=*/NULL);
+}
+
 static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
 					      aggr_get_id_t get_id, struct perf_cpu cpu)
 {
@@ -1366,6 +1372,12 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co
 	return perf_stat__get_aggr(config, perf_stat__get_node, cpu);
 }
 
+static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config,
+						       struct perf_cpu cpu)
+{
+	return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
+}
+
 static bool term_percore_set(void)
 {
 	struct evsel *counter;
@@ -1395,6 +1407,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
 
 		return NULL;
 	case AGGR_GLOBAL:
+		return aggr_cpu_id__global;
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
@@ -1420,6 +1433,7 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
 		}
 		return NULL;
 	case AGGR_GLOBAL:
+		return perf_stat__get_global_cached;
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
@@ -1535,6 +1549,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo
 	return id;
 }
 
+static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused,
+							   void *data __maybe_unused)
+{
+	struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+	/* it always aggregates to the cpu 0 */
+	id.cpu = (struct perf_cpu){ .cpu = 0 };
+	return id;
+}
+
 static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused,
 						     struct perf_cpu cpu)
 {
@@ -1558,6 +1582,12 @@ static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *conf
 	return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env);
 }
 
+static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused,
+						     struct perf_cpu cpu)
+{
+	return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
 static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
 {
 	switch (aggr_mode) {
@@ -1569,8 +1599,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
 		return perf_env__get_core_aggr_by_cpu;
 	case AGGR_NODE:
 		return perf_env__get_node_aggr_by_cpu;
-	case AGGR_NONE:
 	case AGGR_GLOBAL:
+		return perf_env__get_global_aggr_by_cpu;
+	case AGGR_NONE:
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
@@ -1590,8 +1621,9 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
 		return perf_stat__get_core_file;
 	case AGGR_NODE:
 		return perf_stat__get_node_file;
-	case AGGR_NONE:
 	case AGGR_GLOBAL:
+		return perf_stat__get_global_file;
+	case AGGR_NONE:
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 8486ca3bec75f..60209fe87456a 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -354,6 +354,16 @@ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unu
 	return id;
 }
 
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused)
+{
+	struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+	/* it always aggregates to the cpu 0 */
+	cpu.cpu = 0;
+	id.cpu = cpu;
+	return id;
+}
+
 /* setup simple routines to easily access node numbers given a cpu number */
 static int get_max_num(char *path, int *max)
 {
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index 4a6d029576eeb..b2ff648bc4176 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -133,5 +133,9 @@ struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data);
  * cpu. The function signature is compatible with aggr_cpu_id_get_t.
  */
 struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data);
-
+/**
+ * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation.
+ * The function signature is compatible with aggr_cpu_id_get_t.
+ */
+struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data);
 #endif /* __PERF_CPUMAP_H */
-- 
GitLab


From 8938cfa7480282fb3c75548958b239444affcc50 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:12 -0700
Subject: [PATCH 0503/4122] perf stat: Add cpu aggr id for no aggregation mode

Likewise, add an aggr_id for cpu for none aggregation mode.  This is not
used actually yet but later code will use to unify the aggregation code.

No functional change intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 48 +++++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 75d16e9705a4e..b03b530fe9a6f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1336,6 +1336,12 @@ static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config
 	return aggr_cpu_id__global(cpu, /*data=*/NULL);
 }
 
+static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused,
+					     struct perf_cpu cpu)
+{
+	return aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+}
+
 static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
 					      aggr_get_id_t get_id, struct perf_cpu cpu)
 {
@@ -1378,6 +1384,12 @@ static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *
 	return perf_stat__get_aggr(config, perf_stat__get_global, cpu);
 }
 
+static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config,
+						    struct perf_cpu cpu)
+{
+	return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
+}
+
 static bool term_percore_set(void)
 {
 	struct evsel *counter;
@@ -1404,8 +1416,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
 	case AGGR_NONE:
 		if (term_percore_set())
 			return aggr_cpu_id__core;
-
-		return NULL;
+		return aggr_cpu_id__cpu;
 	case AGGR_GLOBAL:
 		return aggr_cpu_id__global;
 	case AGGR_THREAD:
@@ -1428,10 +1439,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
 	case AGGR_NODE:
 		return perf_stat__get_node_cached;
 	case AGGR_NONE:
-		if (term_percore_set()) {
+		if (term_percore_set())
 			return perf_stat__get_core_cached;
-		}
-		return NULL;
+		return perf_stat__get_cpu_cached;
 	case AGGR_GLOBAL:
 		return perf_stat__get_global_cached;
 	case AGGR_THREAD:
@@ -1541,6 +1551,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo
 	return id;
 }
 
+static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data)
+{
+	struct perf_env *env = data;
+	struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+	if (cpu.cpu != -1) {
+		/*
+		 * core_id is relative to socket and die,
+		 * we need a global id. So we set
+		 * socket, die id and core id
+		 */
+		id.socket = env->cpu[cpu.cpu].socket_id;
+		id.die = env->cpu[cpu.cpu].die_id;
+		id.core = env->cpu[cpu.cpu].core_id;
+		id.cpu = cpu;
+	}
+
+	return id;
+}
+
 static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data)
 {
 	struct aggr_cpu_id id = aggr_cpu_id__empty();
@@ -1576,6 +1606,12 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf
 	return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env);
 }
 
+static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused,
+						  struct perf_cpu cpu)
+{
+	return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env);
+}
+
 static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused,
 						   struct perf_cpu cpu)
 {
@@ -1602,6 +1638,7 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode)
 	case AGGR_GLOBAL:
 		return perf_env__get_global_aggr_by_cpu;
 	case AGGR_NONE:
+		return perf_env__get_cpu_aggr_by_cpu;
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
@@ -1624,6 +1661,7 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode)
 	case AGGR_GLOBAL:
 		return perf_stat__get_global_file;
 	case AGGR_NONE:
+		return perf_stat__get_cpu_file;
 	case AGGR_THREAD:
 	case AGGR_UNSET:
 	case AGGR_MAX:
-- 
GitLab


From 505ac48ba759cd88d9fd40ec5354cda28e17377b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:13 -0700
Subject: [PATCH 0504/4122] perf stat: Add 'needs_sort' argument to
 cpu_aggr_map__new()

In case of no aggregation, it needs to keep the original (cpu) ordering
in the aggr_map so that it can be in sync with the cpu map.  This will
make the code easier to handle AGGR_NONE similar to others.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 7 +++++--
 tools/perf/util/cpumap.c  | 6 ++++--
 tools/perf/util/cpumap.h  | 2 +-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b03b530fe9a6f..9053fd4d15a77 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1458,8 +1458,9 @@ static int perf_stat_init_aggr_mode(void)
 	aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode);
 
 	if (get_id) {
+		bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
 		stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
-							 get_id, /*data=*/NULL);
+							 get_id, /*data=*/NULL, needs_sort);
 		if (!stat_config.aggr_map) {
 			pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
 			return -1;
@@ -1674,11 +1675,13 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 {
 	struct perf_env *env = &st->session->header.env;
 	aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
+	bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
 
 	if (!get_id)
 		return 0;
 
-	stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env);
+	stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus,
+						 get_id, env, needs_sort);
 	if (!stat_config.aggr_map) {
 		pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]);
 		return -1;
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 60209fe87456a..6e3fcf523de93 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -234,7 +234,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer)
 
 struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
 				       aggr_cpu_id_get_t get_id,
-				       void *data)
+				       void *data, bool needs_sort)
 {
 	int idx;
 	struct perf_cpu cpu;
@@ -270,8 +270,10 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
 		if (trimmed_c)
 			c = trimmed_c;
 	}
+
 	/* ensure we process id in increasing order */
-	qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
+	if (needs_sort)
+		qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp);
 
 	return c;
 
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index b2ff648bc4176..da28b3146ef9f 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -97,7 +97,7 @@ typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data)
  */
 struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus,
 				       aggr_cpu_id_get_t get_id,
-				       void *data);
+				       void *data, bool needs_sort);
 
 bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b);
 bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a);
-- 
GitLab


From ca68b374d0409bea5cacd4c5a8e0fbb407922d38 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:14 -0700
Subject: [PATCH 0505/4122] perf stat: Add struct perf_stat_aggr to
 perf_stat_evsel

The perf_stat_aggr struct is to keep aggregated counter values and the
states according to the aggregation mode.  The number of entries is
depends on the mode and this is a preparation for the later use.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat.c | 34 +++++++++++++++++++++++++++-------
 tools/perf/util/stat.h | 19 +++++++++++++++++++
 2 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8ec8bb4a99129..c9d5aa295b541 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -133,15 +133,33 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
 static void evsel__reset_stat_priv(struct evsel *evsel)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
+	struct perf_stat_aggr *aggr = ps->aggr;
 
 	init_stats(&ps->res_stats);
+
+	if (aggr)
+		memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr);
 }
 
-static int evsel__alloc_stat_priv(struct evsel *evsel)
+
+static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr)
 {
-	evsel->stats = zalloc(sizeof(struct perf_stat_evsel));
-	if (evsel->stats == NULL)
+	struct perf_stat_evsel *ps;
+
+	ps = zalloc(sizeof(*ps));
+	if (ps == NULL)
 		return -ENOMEM;
+
+	if (nr_aggr) {
+		ps->nr_aggr = nr_aggr;
+		ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr));
+		if (ps->aggr == NULL) {
+			free(ps);
+			return -ENOMEM;
+		}
+	}
+
+	evsel->stats = ps;
 	perf_stat_evsel_id_init(evsel);
 	evsel__reset_stat_priv(evsel);
 	return 0;
@@ -151,8 +169,10 @@ static void evsel__free_stat_priv(struct evsel *evsel)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
 
-	if (ps)
+	if (ps) {
+		zfree(&ps->aggr);
 		zfree(&ps->group_data);
+	}
 	zfree(&evsel->stats);
 }
 
@@ -181,9 +201,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel)
 		perf_counts__reset(evsel->prev_raw_counts);
 }
 
-static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw)
+static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw)
 {
-	if (evsel__alloc_stat_priv(evsel) < 0 ||
+	if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 ||
 	    evsel__alloc_counts(evsel) < 0 ||
 	    (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0))
 		return -ENOMEM;
@@ -196,7 +216,7 @@ int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
 	struct evsel *evsel;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (evsel__alloc_stats(evsel, alloc_raw))
+		if (evsel__alloc_stats(evsel, 0, alloc_raw))
 			goto out_free;
 	}
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index b0899c6e002f5..42453513ffea8 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -8,6 +8,7 @@
 #include <sys/resource.h>
 #include "cpumap.h"
 #include "rblist.h"
+#include "counts.h"
 
 struct perf_cpu_map;
 struct perf_stat_config;
@@ -42,9 +43,27 @@ enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__MAX,
 };
 
+/* hold aggregated event info */
+struct perf_stat_aggr {
+	/* aggregated values */
+	struct perf_counts_values	counts;
+	/* number of entries (CPUs) aggregated */
+	int				nr;
+	/* whether any entry has failed to read/process event */
+	bool				failed;
+};
+
+/* per-evsel event stats */
 struct perf_stat_evsel {
+	/* used for repeated runs */
 	struct stats		 res_stats;
+	/* evsel id for quick check */
 	enum perf_stat_evsel_id	 id;
+	/* number of allocated 'aggr' */
+	int			 nr_aggr;
+	/* aggregated event values */
+	struct perf_stat_aggr	*aggr;
+	/* used for group read */
 	u64			*group_data;
 };
 
-- 
GitLab


From 1f297a6eb2bd90663518cbb6e9e2a3b2add34b73 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:15 -0700
Subject: [PATCH 0506/4122] perf stat: Allocate evsel->stats->aggr properly

The perf_stat_config.aggr_map should have a correct size of the
aggregation map.  Use it to allocate aggr_counts.

Also AGGR_NONE with per-core events can be tricky because it doesn't
aggreate basically but it needs to do so for per-core events only.
So only per-core evsels will have stats->aggr data.

Note that other caller of evlist__alloc_stat() might not have
stat_config or aggr_map.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c     | 4 ++--
 tools/perf/builtin-stat.c       | 6 +++---
 tools/perf/tests/parse-metric.c | 2 +-
 tools/perf/tests/pmu-events.c   | 2 +-
 tools/perf/util/stat.c          | 9 +++++++--
 tools/perf/util/stat.h          | 3 ++-
 6 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7ca238277d835..d7ec8c1af2936 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -2049,7 +2049,7 @@ static void perf_sample__fprint_metric(struct perf_script *script,
 	u64 val;
 
 	if (!evsel->stats)
-		evlist__alloc_stats(script->session->evlist, false);
+		evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false);
 	if (evsel_script(leader)->gnum++ == 0)
 		perf_stat__reset_shadow_stats();
 	val = sample->period * evsel->scale;
@@ -3632,7 +3632,7 @@ static int set_maps(struct perf_script *script)
 
 	perf_evlist__set_maps(&evlist->core, script->cpus, script->threads);
 
-	if (evlist__alloc_stats(evlist, true))
+	if (evlist__alloc_stats(&stat_config, evlist, /*alloc_raw=*/true))
 		return -ENOMEM;
 
 	script->allocated = true;
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9053fd4d15a77..92a8e4512f982 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2121,7 +2121,7 @@ static int set_maps(struct perf_stat *st)
 
 	perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads);
 
-	if (evlist__alloc_stats(evsel_list, true))
+	if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true))
 		return -ENOMEM;
 
 	st->maps_allocated = true;
@@ -2568,10 +2568,10 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (evlist__alloc_stats(evsel_list, interval))
+	if (perf_stat_init_aggr_mode())
 		goto out;
 
-	if (perf_stat_init_aggr_mode())
+	if (evlist__alloc_stats(&stat_config, evsel_list, interval))
 		goto out;
 
 	/*
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index 68f5a2a03242a..21b7ac00d7985 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -103,7 +103,7 @@ static int __compute_metric(const char *name, struct value *vals,
 	if (err)
 		goto out;
 
-	err = evlist__alloc_stats(evlist, false);
+	err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false);
 	if (err)
 		goto out;
 
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 097e05c796abf..5d0d3b239a68b 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -889,7 +889,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e
 		goto out_err;
 	}
 
-	err = evlist__alloc_stats(evlist, false);
+	err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false);
 	if (err)
 		goto out_err;
 	/*
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c9d5aa295b541..3741496285077 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -211,12 +211,17 @@ static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw)
 	return 0;
 }
 
-int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw)
+int evlist__alloc_stats(struct perf_stat_config *config,
+			struct evlist *evlist, bool alloc_raw)
 {
 	struct evsel *evsel;
+	int nr_aggr = 0;
+
+	if (config && config->aggr_map)
+		nr_aggr = config->aggr_map->nr;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (evsel__alloc_stats(evsel, 0, alloc_raw))
+		if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw))
 			goto out_free;
 	}
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 42453513ffea8..0980875b9be11 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -267,7 +267,8 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config,
 				   struct runtime_stat *st);
 void perf_stat__collect_metric_expr(struct evlist *);
 
-int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw);
+int evlist__alloc_stats(struct perf_stat_config *config,
+			struct evlist *evlist, bool alloc_raw);
 void evlist__free_stats(struct evlist *evlist);
 void evlist__reset_stats(struct evlist *evlist);
 void evlist__reset_prev_raw_counts(struct evlist *evlist);
-- 
GitLab


From f976bc6b6bfc9b14eeaf9a8859191c8f85c253dc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:16 -0700
Subject: [PATCH 0507/4122] perf stat: Aggregate events using
 evsel->stats->aggr

Add a logic to aggregate counter values to the new evsel->stats->aggr.
This is not used yet so shadow stats are not updated.  But later patch
will convert the existing code to use it.

With that, we don't need to handle AGGR_GLOBAL specially anymore.  It
can use the same logic with counts, prev_counts and aggr_counts.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c                     |  3 --
 tools/perf/util/evsel.c                       |  9 +---
 .../scripting-engines/trace-event-python.c    |  6 ---
 tools/perf/util/stat.c                        | 46 ++++++++++++++++---
 4 files changed, 41 insertions(+), 23 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 92a8e4512f982..abede56d79b6f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -963,9 +963,6 @@ try_again_reset:
 		init_stats(&walltime_nsecs_stats);
 		update_stats(&walltime_nsecs_stats, t1 - t0);
 
-		if (stat_config.aggr_mode == AGGR_GLOBAL)
-			evlist__save_aggr_prev_raw_counts(evsel_list);
-
 		evlist__copy_prev_raw_counts(evsel_list);
 		evlist__reset_prev_raw_counts(evsel_list);
 		perf_stat__reset_shadow_per_stat(&rt_stat);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c9fef26d07027..cdde5b5f8ad2b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1526,13 +1526,8 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread,
 	if (!evsel->prev_raw_counts)
 		return;
 
-	if (cpu_map_idx == -1) {
-		tmp = evsel->prev_raw_counts->aggr;
-		evsel->prev_raw_counts->aggr = *count;
-	} else {
-		tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
-		*perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
-	}
+	tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread);
+	*perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count;
 
 	count->val = count->val - tmp.val;
 	count->ena = count->ena - tmp.ena;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1f2040f36d4e9..7bc8559dce6af 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1653,12 +1653,6 @@ static void python_process_stat(struct perf_stat_config *config,
 	struct perf_cpu_map *cpus = counter->core.cpus;
 	int cpu, thread;
 
-	if (config->aggr_mode == AGGR_GLOBAL) {
-		process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp,
-			     &counter->counts->aggr);
-		return;
-	}
-
 	for (thread = 0; thread < threads->nr; thread++) {
 		for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
 			process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 3741496285077..99874254809dc 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -387,6 +387,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		       struct perf_counts_values *count)
 {
 	struct perf_counts_values *aggr = &evsel->counts->aggr;
+	struct perf_stat_evsel *ps = evsel->stats;
 	static struct perf_counts_values zero;
 	bool skip = false;
 
@@ -398,6 +399,44 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 	if (skip)
 		count = &zero;
 
+	if (!evsel->snapshot)
+		evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
+	perf_counts_values__scale(count, config->scale, NULL);
+
+	if (ps->aggr) {
+		struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
+		struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu);
+		struct perf_stat_aggr *ps_aggr;
+		int i;
+
+		for (i = 0; i < ps->nr_aggr; i++) {
+			if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i]))
+				continue;
+
+			ps_aggr = &ps->aggr[i];
+			ps_aggr->nr++;
+
+			/*
+			 * When any result is bad, make them all to give
+			 * consistent output in interval mode.
+			 */
+			if (count->ena == 0 || count->run == 0 ||
+			    evsel->counts->scaled == -1) {
+				ps_aggr->counts.val = 0;
+				ps_aggr->counts.ena = 0;
+				ps_aggr->counts.run = 0;
+				ps_aggr->failed = true;
+			}
+
+			if (!ps_aggr->failed) {
+				ps_aggr->counts.val += count->val;
+				ps_aggr->counts.ena += count->ena;
+				ps_aggr->counts.run += count->run;
+			}
+			break;
+		}
+	}
+
 	switch (config->aggr_mode) {
 	case AGGR_THREAD:
 	case AGGR_CORE:
@@ -405,9 +444,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 	case AGGR_SOCKET:
 	case AGGR_NODE:
 	case AGGR_NONE:
-		if (!evsel->snapshot)
-			evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
-		perf_counts_values__scale(count, config->scale, NULL);
 		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
 			perf_stat__update_shadow_stats(evsel, count->val,
 						       cpu_map_idx, &rt_stat);
@@ -469,10 +505,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	if (config->aggr_mode != AGGR_GLOBAL)
 		return 0;
 
-	if (!counter->snapshot)
-		evsel__compute_deltas(counter, -1, -1, aggr);
-	perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled);
-
 	update_stats(&ps->res_stats, *count);
 
 	if (verbose > 0) {
-- 
GitLab


From 049aba09e2156dd2ff1a61bf1b8738b0fc864c9d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:17 -0700
Subject: [PATCH 0508/4122] perf stat: Factor out evsel__count_has_error()

It's possible to have 0 enabled/running time for some per-task or per-cgroup
events since it's not scheduled on any CPU.  Treating the whole event as
failed would not work in this case.  Thinking again, the code only existed
when any CPU-level aggregation is enabled (like per-socket, per-core, ...).

To make it clearer, factor out the condition check into the new
evsel__count_has_error() function and add some comments.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 99874254809dc..dc075d5a0f72a 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -381,6 +381,25 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals,
 	return ret;
 }
 
+static bool evsel__count_has_error(struct evsel *evsel,
+				   struct perf_counts_values *count,
+				   struct perf_stat_config *config)
+{
+	/* the evsel was failed already */
+	if (evsel->err || evsel->counts->scaled == -1)
+		return true;
+
+	/* this is meaningful for CPU aggregation modes only */
+	if (config->aggr_mode == AGGR_GLOBAL)
+		return false;
+
+	/* it's considered ok when it actually ran */
+	if (count->ena != 0 && count->run != 0)
+		return false;
+
+	return true;
+}
+
 static int
 process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		       int cpu_map_idx, int thread,
@@ -420,8 +439,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 			 * When any result is bad, make them all to give
 			 * consistent output in interval mode.
 			 */
-			if (count->ena == 0 || count->run == 0 ||
-			    evsel->counts->scaled == -1) {
+			if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) {
 				ps_aggr->counts.val = 0;
 				ps_aggr->counts.ena = 0;
 				ps_aggr->counts.run = 0;
-- 
GitLab


From 050059e1b1affc080ede9fe691768e2383eb6367 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:18 -0700
Subject: [PATCH 0509/4122] perf stat: Aggregate per-thread stats using
 evsel->stats->aggr

Per-thread aggregation doesn't use the CPU numbers but the logic should
be the same.  Initialize cpu_aggr_map separately for AGGR_THREAD and use
thread map idx to aggregate counter values.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 31 +++++++++++++++++++++++++++++++
 tools/perf/util/stat.c    | 24 ++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index abede56d79b6f..6777fef0d56c3 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1465,6 +1465,21 @@ static int perf_stat_init_aggr_mode(void)
 		stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode);
 	}
 
+	if (stat_config.aggr_mode == AGGR_THREAD) {
+		nr = perf_thread_map__nr(evsel_list->core.threads);
+		stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+		if (stat_config.aggr_map == NULL)
+			return -ENOMEM;
+
+		for (int s = 0; s < nr; s++) {
+			struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+			id.thread_idx = s;
+			stat_config.aggr_map->map[s] = id;
+		}
+		return 0;
+	}
+
 	/*
 	 * The evsel_list->cpus is the base we operate on,
 	 * taking the highest cpu number to be the size of
@@ -1674,6 +1689,22 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st)
 	aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode);
 	bool needs_sort = stat_config.aggr_mode != AGGR_NONE;
 
+	if (stat_config.aggr_mode == AGGR_THREAD) {
+		int nr = perf_thread_map__nr(evsel_list->core.threads);
+
+		stat_config.aggr_map = cpu_aggr_map__empty_new(nr);
+		if (stat_config.aggr_map == NULL)
+			return -ENOMEM;
+
+		for (int s = 0; s < nr; s++) {
+			struct aggr_cpu_id id = aggr_cpu_id__empty();
+
+			id.thread_idx = s;
+			stat_config.aggr_map->map[s] = id;
+		}
+		return 0;
+	}
+
 	if (!get_id)
 		return 0;
 
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index dc075d5a0f72a..5b04c9d161568 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -422,6 +422,24 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		evsel__compute_deltas(evsel, cpu_map_idx, thread, count);
 	perf_counts_values__scale(count, config->scale, NULL);
 
+	if (config->aggr_mode == AGGR_THREAD) {
+		struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts;
+
+		/*
+		 * Skip value 0 when enabling --per-thread globally,
+		 * otherwise too many 0 output.
+		 */
+		if (count->val == 0 && config->system_wide)
+			return 0;
+
+		ps->aggr[thread].nr++;
+
+		aggr_counts->val += count->val;
+		aggr_counts->ena += count->ena;
+		aggr_counts->run += count->run;
+		goto update;
+	}
+
 	if (ps->aggr) {
 		struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx);
 		struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu);
@@ -436,8 +454,9 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 			ps_aggr->nr++;
 
 			/*
-			 * When any result is bad, make them all to give
-			 * consistent output in interval mode.
+			 * When any result is bad, make them all to give consistent output
+			 * in interval mode.  But per-task counters can have 0 enabled time
+			 * when some tasks are idle.
 			 */
 			if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) {
 				ps_aggr->counts.val = 0;
@@ -455,6 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		}
 	}
 
+update:
 	switch (config->aggr_mode) {
 	case AGGR_THREAD:
 	case AGGR_CORE:
-- 
GitLab


From ae7e6492ee54e18cc3d6ed4bb5ca857726a7e9c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:19 -0700
Subject: [PATCH 0510/4122] perf stat: Allocate aggr counts for recorded data

In the process_stat_config_event() it sets the aggr_mode that means the
earlier evlist__alloc_stats() cannot allocate the aggr counts due to the
missing aggr_mode.

Do it after setting the aggr_map using evlist__alloc_aggr_stats().

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-13-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 20 +++++++++++++++-----
 tools/perf/util/stat.c    | 39 +++++++++++++++++++++++++++++++--------
 tools/perf/util/stat.h    |  2 ++
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6777fef0d56c3..2a6a5d0c5563a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1342,7 +1342,11 @@ static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __m
 static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config,
 					      aggr_get_id_t get_id, struct perf_cpu cpu)
 {
-	struct aggr_cpu_id id = aggr_cpu_id__empty();
+	struct aggr_cpu_id id;
+
+	/* per-process mode - should use global aggr mode */
+	if (cpu.cpu == -1)
+		return get_id(config, cpu);
 
 	if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu]))
 		config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu);
@@ -2125,17 +2129,23 @@ int process_stat_config_event(struct perf_session *session,
 	if (perf_cpu_map__empty(st->cpus)) {
 		if (st->aggr_mode != AGGR_UNSET)
 			pr_warning("warning: processing task data, aggregation mode not set\n");
-		return 0;
-	}
-
-	if (st->aggr_mode != AGGR_UNSET)
+	} else if (st->aggr_mode != AGGR_UNSET) {
 		stat_config.aggr_mode = st->aggr_mode;
+	}
 
 	if (perf_stat.data.is_pipe)
 		perf_stat_init_aggr_mode();
 	else
 		perf_stat_init_aggr_mode_file(st);
 
+	if (stat_config.aggr_map) {
+		int nr_aggr = stat_config.aggr_map->nr;
+
+		if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) {
+			pr_err("cannot allocate aggr counts\n");
+			return -1;
+		}
+	}
 	return 0;
 }
 
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 5b04c9d161568..1b9048115a185 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -141,6 +141,31 @@ static void evsel__reset_stat_priv(struct evsel *evsel)
 		memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr);
 }
 
+static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	if (ps == NULL)
+		return 0;
+
+	ps->nr_aggr = nr_aggr;
+	ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr));
+	if (ps->aggr == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0)
+			return -1;
+	}
+	return 0;
+}
 
 static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr)
 {
@@ -150,16 +175,14 @@ static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr)
 	if (ps == NULL)
 		return -ENOMEM;
 
-	if (nr_aggr) {
-		ps->nr_aggr = nr_aggr;
-		ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr));
-		if (ps->aggr == NULL) {
-			free(ps);
-			return -ENOMEM;
-		}
+	evsel->stats = ps;
+
+	if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) {
+		evsel->stats = NULL;
+		free(ps);
+		return -ENOMEM;
 	}
 
-	evsel->stats = ps;
 	perf_stat_evsel_id_init(evsel);
 	evsel__reset_stat_priv(evsel);
 	return 0;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 0980875b9be11..4c00f814bd79a 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -275,6 +275,8 @@ void evlist__reset_prev_raw_counts(struct evlist *evlist);
 void evlist__copy_prev_raw_counts(struct evlist *evlist);
 void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
+int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr);
+
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
 struct perf_tool;
-- 
GitLab


From 8f97963e09761c239522649921d7b1c57ff2debb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:20 -0700
Subject: [PATCH 0511/4122] perf stat: Reset aggr counts for each interval

The evsel->stats->aggr->count should be reset for interval processing
since we want to use the values directly for display.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-14-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c |  3 +++
 tools/perf/util/stat.c    | 20 +++++++++++++++++---
 tools/perf/util/stat.h    |  1 +
 3 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2a6a5d0c5563a..bff28a199dfd7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -492,6 +492,8 @@ static void process_interval(void)
 	diff_timespec(&rs, &ts, &ref_time);
 
 	perf_stat__reset_shadow_per_stat(&rt_stat);
+	evlist__reset_aggr_stats(evsel_list);
+
 	read_counters(&rs);
 
 	if (STAT_RECORD) {
@@ -965,6 +967,7 @@ try_again_reset:
 
 		evlist__copy_prev_raw_counts(evsel_list);
 		evlist__reset_prev_raw_counts(evsel_list);
+		evlist__reset_aggr_stats(evsel_list);
 		perf_stat__reset_shadow_per_stat(&rt_stat);
 	} else {
 		update_stats(&walltime_nsecs_stats, t1 - t0);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 1b9048115a185..a4066f0d36379 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -130,17 +130,23 @@ static void perf_stat_evsel_id_init(struct evsel *evsel)
 	}
 }
 
-static void evsel__reset_stat_priv(struct evsel *evsel)
+static void evsel__reset_aggr_stats(struct evsel *evsel)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
 	struct perf_stat_aggr *aggr = ps->aggr;
 
-	init_stats(&ps->res_stats);
-
 	if (aggr)
 		memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr);
 }
 
+static void evsel__reset_stat_priv(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+
+	init_stats(&ps->res_stats);
+	evsel__reset_aggr_stats(evsel);
+}
+
 static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr)
 {
 	struct perf_stat_evsel *ps = evsel->stats;
@@ -276,6 +282,14 @@ void evlist__reset_stats(struct evlist *evlist)
 	}
 }
 
+void evlist__reset_aggr_stats(struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__reset_aggr_stats(evsel);
+}
+
 void evlist__reset_prev_raw_counts(struct evlist *evlist)
 {
 	struct evsel *evsel;
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 4c00f814bd79a..809f9f0aff0c3 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -276,6 +276,7 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist);
 void evlist__save_aggr_prev_raw_counts(struct evlist *evlist);
 
 int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr);
+void evlist__reset_aggr_stats(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
-- 
GitLab


From 8962cbec5a0672b3966e9c48ac22e207d56e13ca Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:21 -0700
Subject: [PATCH 0512/4122] perf stat: Split process_counters() to share it
 with process_stat_round_event()

It'd do more processing with aggregation.  Let's split the function so that it
can be shared with by process_stat_round_event() too.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-15-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index bff28a199dfd7..838d29590bed9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -465,15 +465,19 @@ static int read_bpf_map_counters(void)
 	return 0;
 }
 
-static void read_counters(struct timespec *rs)
+static int read_counters(struct timespec *rs)
 {
-	struct evsel *counter;
-
 	if (!stat_config.stop_read_counter) {
 		if (read_bpf_map_counters() ||
 		    read_affinity_counters(rs))
-			return;
+			return -1;
 	}
+	return 0;
+}
+
+static void process_counters(void)
+{
+	struct evsel *counter;
 
 	evlist__for_each_entry(evsel_list, counter) {
 		if (counter->err)
@@ -494,7 +498,8 @@ static void process_interval(void)
 	perf_stat__reset_shadow_per_stat(&rt_stat);
 	evlist__reset_aggr_stats(evsel_list);
 
-	read_counters(&rs);
+	if (read_counters(&rs) == 0)
+		process_counters();
 
 	if (STAT_RECORD) {
 		if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL))
@@ -980,7 +985,8 @@ try_again_reset:
 	 * avoid arbitrary skew, we must read all counters before closing any
 	 * group leaders.
 	 */
-	read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
+	if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0)
+		process_counters();
 
 	/*
 	 * We need to keep evsel_list alive, because it's processed
@@ -2099,13 +2105,11 @@ static int process_stat_round_event(struct perf_session *session,
 				    union perf_event *event)
 {
 	struct perf_record_stat_round *stat_round = &event->stat_round;
-	struct evsel *counter;
 	struct timespec tsh, *ts = NULL;
 	const char **argv = session->header.env.cmdline_argv;
 	int argc = session->header.env.nr_cmdline;
 
-	evlist__for_each_entry(evsel_list, counter)
-		perf_stat_process_counter(&stat_config, counter);
+	process_counters();
 
 	if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL)
 		update_stats(&walltime_nsecs_stats, stat_round->time);
-- 
GitLab


From 942c5593393d9418bf521e77fa1eab47599efc4d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:22 -0700
Subject: [PATCH 0513/4122] perf stat: Add perf_stat_merge_counters()

The perf_stat_merge_counters() is to aggregate the same events in different
PMUs like in case of uncore or hybrid.  The same logic is in the stat-display
routines but I think it should be handled when it processes the event counters.

As it works on the aggr_counters, it doesn't change the output yet.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-16-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c |  2 +
 tools/perf/util/stat.c    | 96 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/stat.h    |  2 +
 3 files changed, 100 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 838d29590bed9..371d6e8969422 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -486,6 +486,8 @@ static void process_counters(void)
 			pr_warning("failed to process counter %s\n", counter->name);
 		counter->err = 0;
 	}
+
+	perf_stat_merge_counters(&stat_config, evsel_list);
 }
 
 static void process_interval(void)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index a4066f0d36379..aff1e7390585c 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -595,6 +595,102 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	return 0;
 }
 
+static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias)
+{
+	struct perf_stat_evsel *ps_a = evsel->stats;
+	struct perf_stat_evsel *ps_b = alias->stats;
+	int i;
+
+	if (ps_a->aggr == NULL && ps_b->aggr == NULL)
+		return 0;
+
+	if (ps_a->nr_aggr != ps_b->nr_aggr) {
+		pr_err("Unmatched aggregation mode between aliases\n");
+		return -1;
+	}
+
+	for (i = 0; i < ps_a->nr_aggr; i++) {
+		struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts;
+		struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts;
+
+		/* NB: don't increase aggr.nr for aliases */
+
+		aggr_counts_a->val += aggr_counts_b->val;
+		aggr_counts_a->ena += aggr_counts_b->ena;
+		aggr_counts_a->run += aggr_counts_b->run;
+	}
+
+	return 0;
+}
+/* events should have the same name, scale, unit, cgroup but on different PMUs */
+static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b)
+{
+	if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b)))
+		return false;
+
+	if (evsel_a->scale != evsel_b->scale)
+		return false;
+
+	if (evsel_a->cgrp != evsel_b->cgrp)
+		return false;
+
+	if (strcmp(evsel_a->unit, evsel_b->unit))
+		return false;
+
+	if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b))
+		return false;
+
+	return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name);
+}
+
+static void evsel__merge_aliases(struct evsel *evsel)
+{
+	struct evlist *evlist = evsel->evlist;
+	struct evsel *alias;
+
+	alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node);
+	list_for_each_entry_continue(alias, &evlist->core.entries, core.node) {
+		/* Merge the same events on different PMUs. */
+		if (evsel__is_alias(evsel, alias)) {
+			evsel__merge_aggr_counters(evsel, alias);
+			alias->merged_stat = true;
+		}
+	}
+}
+
+static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config)
+{
+	struct perf_pmu *pmu;
+
+	if (!config->hybrid_merge)
+		return false;
+
+	pmu = evsel__find_pmu(evsel);
+	return pmu && pmu->is_hybrid;
+}
+
+static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config)
+{
+	/* this evsel is already merged */
+	if (evsel->merged_stat)
+		return;
+
+	if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config))
+		evsel__merge_aliases(evsel);
+}
+
+/* merge the same uncore and hybrid events if requested */
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	if (config->no_merge)
+		return;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__merge_stats(evsel, config);
+}
+
 int perf_event__process_stat_event(struct perf_session *session,
 				   union perf_event *event)
 {
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 809f9f0aff0c3..728bbc823b0d2 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -280,6 +280,8 @@ void evlist__reset_aggr_stats(struct evlist *evlist);
 
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
+void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist);
+
 struct perf_tool;
 union perf_event;
 struct perf_session;
-- 
GitLab


From 1d6d2bea5b97359ff09a8d793674aab3e5f79023 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:23 -0700
Subject: [PATCH 0514/4122] perf stat: Add perf_stat_process_percore()

The perf_stat_process_percore() is to aggregate counts for an event per-core
even if the aggr_mode is AGGR_NONE.  This is enabled when user requested it
on the command line.

To handle that, it keeps the per-cpu counts at first.  And then it aggregates
the counts that have the same core id in the aggr->counts and updates the
values for each cpu back.

Later, per-core events will skip one of the CPUs unless percore-show-thread
option is given.  In that case, it can simply print all cpu stats with the
updated (per-core) values.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-17-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c |  1 +
 tools/perf/util/stat.c    | 71 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/stat.h    |  3 ++
 3 files changed, 75 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 371d6e8969422..d6a006e41da09 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -488,6 +488,7 @@ static void process_counters(void)
 	}
 
 	perf_stat_merge_counters(&stat_config, evsel_list);
+	perf_stat_process_percore(&stat_config, evsel_list);
 }
 
 static void process_interval(void)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index aff1e7390585c..26c48ef7ca920 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -691,6 +691,77 @@ void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *ev
 		evsel__merge_stats(evsel, config);
 }
 
+static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+	struct perf_counts_values counts = { 0, };
+	struct aggr_cpu_id id;
+	struct perf_cpu cpu;
+	int idx;
+
+	/* collect per-core counts */
+	perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+		struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+		id = aggr_cpu_id__core(cpu, NULL);
+		if (!aggr_cpu_id__equal(core_id, &id))
+			continue;
+
+		counts.val += aggr->counts.val;
+		counts.ena += aggr->counts.ena;
+		counts.run += aggr->counts.run;
+	}
+
+	/* update aggregated per-core counts for each CPU */
+	perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+		struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+		id = aggr_cpu_id__core(cpu, NULL);
+		if (!aggr_cpu_id__equal(core_id, &id))
+			continue;
+
+		aggr->counts.val = counts.val;
+		aggr->counts.ena = counts.ena;
+		aggr->counts.run = counts.run;
+
+		aggr->used = true;
+	}
+}
+
+/* we have an aggr_map for cpu, but want to aggregate the counters per-core */
+static void evsel__process_percore(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+	struct aggr_cpu_id core_id;
+	struct perf_cpu cpu;
+	int idx;
+
+	if (!evsel->percore)
+		return;
+
+	perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) {
+		struct perf_stat_aggr *aggr = &ps->aggr[idx];
+
+		if (aggr->used)
+			continue;
+
+		core_id = aggr_cpu_id__core(cpu, NULL);
+		evsel__update_percore_stats(evsel, &core_id);
+	}
+}
+
+/* process cpu stats on per-core events */
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	if (config->aggr_mode != AGGR_NONE)
+		return;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__process_percore(evsel);
+}
+
 int perf_event__process_stat_event(struct perf_session *session,
 				   union perf_event *event)
 {
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 728bbc823b0d2..d23f8743e4422 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -51,6 +51,8 @@ struct perf_stat_aggr {
 	int				nr;
 	/* whether any entry has failed to read/process event */
 	bool				failed;
+	/* to mark this data is processed already */
+	bool				used;
 };
 
 /* per-evsel event stats */
@@ -281,6 +283,7 @@ void evlist__reset_aggr_stats(struct evlist *evlist);
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
 void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist);
+void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist);
 
 struct perf_tool;
 union perf_event;
-- 
GitLab


From 88f1d3512c947ad8e396e50acfce5ee55133df0a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:24 -0700
Subject: [PATCH 0515/4122] perf stat: Add perf_stat_process_shadow_stats()

This function updates the shadow stats using the aggregated counts
uniformly since it uses the aggr_counts for the every aggr mode.

It'd have duplicate shadow stats for each items for now since the
display routines will update them once again.  But that'd be fine
as it shows the average values and it'd be gone eventually.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-18-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c |  1 +
 tools/perf/util/stat.c    | 50 ++++++++++++++++++++-------------------
 tools/perf/util/stat.h    |  1 +
 3 files changed, 28 insertions(+), 24 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d6a006e41da09..d7c52cef70a3a 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -489,6 +489,7 @@ static void process_counters(void)
 
 	perf_stat_merge_counters(&stat_config, evsel_list);
 	perf_stat_process_percore(&stat_config, evsel_list);
+	perf_stat_process_shadow_stats(&stat_config, evsel_list);
 }
 
 static void process_interval(void)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 26c48ef7ca920..c0955a0427ab3 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -474,7 +474,7 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		aggr_counts->val += count->val;
 		aggr_counts->ena += count->ena;
 		aggr_counts->run += count->run;
-		goto update;
+		return 0;
 	}
 
 	if (ps->aggr) {
@@ -511,32 +511,10 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		}
 	}
 
-update:
-	switch (config->aggr_mode) {
-	case AGGR_THREAD:
-	case AGGR_CORE:
-	case AGGR_DIE:
-	case AGGR_SOCKET:
-	case AGGR_NODE:
-	case AGGR_NONE:
-		if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) {
-			perf_stat__update_shadow_stats(evsel, count->val,
-						       cpu_map_idx, &rt_stat);
-		}
-
-		if (config->aggr_mode == AGGR_THREAD) {
-			perf_stat__update_shadow_stats(evsel, count->val,
-						       thread, &rt_stat);
-		}
-		break;
-	case AGGR_GLOBAL:
+	if (config->aggr_mode == AGGR_GLOBAL) {
 		aggr->val += count->val;
 		aggr->ena += count->ena;
 		aggr->run += count->run;
-	case AGGR_UNSET:
-	case AGGR_MAX:
-	default:
-		break;
 	}
 
 	return 0;
@@ -762,6 +740,30 @@ void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *e
 		evsel__process_percore(evsel);
 }
 
+static void evsel__update_shadow_stats(struct evsel *evsel)
+{
+	struct perf_stat_evsel *ps = evsel->stats;
+	int i;
+
+	if (ps->aggr == NULL)
+		return;
+
+	for (i = 0; i < ps->nr_aggr; i++) {
+		struct perf_counts_values *aggr_counts = &ps->aggr[i].counts;
+
+		perf_stat__update_shadow_stats(evsel, aggr_counts->val, i, &rt_stat);
+	}
+}
+
+void perf_stat_process_shadow_stats(struct perf_stat_config *config __maybe_unused,
+				    struct evlist *evlist)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(evlist, evsel)
+		evsel__update_shadow_stats(evsel);
+}
+
 int perf_event__process_stat_event(struct perf_session *session,
 				   union perf_event *event)
 {
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index d23f8743e4422..3d413ba8c68a6 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -284,6 +284,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter);
 void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist);
 void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist);
+void perf_stat_process_shadow_stats(struct perf_stat_config *config, struct evlist *evlist);
 
 struct perf_tool;
 union perf_event;
-- 
GitLab


From 91f85f98da7ab8c32105f42dd03884c01ec4498f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:25 -0700
Subject: [PATCH 0516/4122] perf stat: Display event stats using aggr counts

Now aggr counts are ready for use.  Convert the display routines to use
the aggr counts and update the shadow stat with them.  It doesn't need
to aggregate counts or collect aliases anymore during the display.  Get
rid of now unused struct perf_aggr_thread_value.

Note that there's a difference in the display order among the aggr mode.
For per-core/die/socket/node aggregation, it shows relevant events in
the same unit together, whereas global/thread/no aggregation it shows
the same events for different units together.  So it still uses separate
codes to display them due to the ordering.

One more thing to note is that it breaks per-core event display for now.
The next patch will fix it to have identical output as of now.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-19-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 421 ++++-----------------------------
 tools/perf/util/stat.c         |   5 -
 tools/perf/util/stat.h         |   9 -
 3 files changed, 49 insertions(+), 386 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 4113aa86772f7..bfae2784609c8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -442,31 +442,6 @@ static void print_metric_header(struct perf_stat_config *config,
 		fprintf(os->fh, "%*s ", config->metric_only_len, unit);
 }
 
-static int first_shadow_map_idx(struct perf_stat_config *config,
-				struct evsel *evsel, const struct aggr_cpu_id *id)
-{
-	struct perf_cpu_map *cpus = evsel__cpus(evsel);
-	struct perf_cpu cpu;
-	int idx;
-
-	if (config->aggr_mode == AGGR_NONE)
-		return perf_cpu_map__idx(cpus, id->cpu);
-
-	if (config->aggr_mode == AGGR_THREAD)
-		return id->thread_idx;
-
-	if (!config->aggr_get_id)
-		return 0;
-
-	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
-		struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu);
-
-		if (aggr_cpu_id__equal(&cpu_id, id))
-			return idx;
-	}
-	return 0;
-}
-
 static void abs_printout(struct perf_stat_config *config,
 			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
 {
@@ -537,7 +512,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
 static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
 		     struct evsel *counter, double uval,
 		     char *prefix, u64 run, u64 ena, double noise,
-		     struct runtime_stat *st)
+		     struct runtime_stat *st, int map_idx)
 {
 	struct perf_stat_output_ctx out;
 	struct outstate os = {
@@ -648,8 +623,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 		print_running(config, run, ena);
 	}
 
-	perf_stat__print_shadow_stats(config, counter, uval,
-				first_shadow_map_idx(config, counter, &id),
+	perf_stat__print_shadow_stats(config, counter, uval, map_idx,
 				&out, &config->metric_events, st);
 	if (!config->csv_output && !config->metric_only && !config->json_output) {
 		print_noise(config, counter, noise);
@@ -657,34 +631,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 	}
 }
 
-static void aggr_update_shadow(struct perf_stat_config *config,
-			       struct evlist *evlist)
-{
-	int idx, s;
-	struct perf_cpu cpu;
-	struct aggr_cpu_id s2, id;
-	u64 val;
-	struct evsel *counter;
-	struct perf_cpu_map *cpus;
-
-	for (s = 0; s < config->aggr_map->nr; s++) {
-		id = config->aggr_map->map[s];
-		evlist__for_each_entry(evlist, counter) {
-			cpus = evsel__cpus(counter);
-			val = 0;
-			perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
-				s2 = config->aggr_get_id(config, cpu);
-				if (!aggr_cpu_id__equal(&s2, &id))
-					continue;
-				val += perf_counts(counter->counts, idx, 0)->val;
-			}
-			perf_stat__update_shadow_stats(counter, val,
-					first_shadow_map_idx(config, counter, &id),
-					&rt_stat);
-		}
-	}
-}
-
 static void uniquify_event_name(struct evsel *counter)
 {
 	char *new_name;
@@ -721,137 +667,51 @@ static void uniquify_event_name(struct evsel *counter)
 	counter->uniquified_name = true;
 }
 
-static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter,
-			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
-				       bool first),
-			    void *data)
+static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config)
 {
-	struct evlist *evlist = counter->evlist;
-	struct evsel *alias;
-
-	alias = list_prepare_entry(counter, &(evlist->core.entries), core.node);
-	list_for_each_entry_continue (alias, &evlist->core.entries, core.node) {
-		/* Merge events with the same name, etc. but on different PMUs. */
-		if (!strcmp(evsel__name(alias), evsel__name(counter)) &&
-			alias->scale == counter->scale &&
-			alias->cgrp == counter->cgrp &&
-			!strcmp(alias->unit, counter->unit) &&
-			evsel__is_clock(alias) == evsel__is_clock(counter) &&
-			strcmp(alias->pmu_name, counter->pmu_name)) {
-			alias->merged_stat = true;
-			cb(config, alias, data, false);
-		}
-	}
+	return evsel__is_hybrid(evsel) && !config->hybrid_merge;
 }
 
-static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config,
-			 bool check)
+static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter)
 {
-	if (evsel__is_hybrid(counter)) {
-		if (check)
-			return config->hybrid_merge;
-		else
-			return !config->hybrid_merge;
-	}
-
-	return false;
-}
-
-static bool collect_data(struct perf_stat_config *config, struct evsel *counter,
-			    void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data,
-				       bool first),
-			    void *data)
-{
-	if (counter->merged_stat)
-		return false;
-	cb(config, counter, data, true);
-	if (config->no_merge || hybrid_merge(counter, config, false))
+	if (config->no_merge || hybrid_uniquify(counter, config))
 		uniquify_event_name(counter);
-	else if (counter->auto_merge_stats || hybrid_merge(counter, config, true))
-		collect_all_aliases(config, counter, cb, data);
-	return true;
-}
-
-struct aggr_data {
-	u64 ena, run, val;
-	struct aggr_cpu_id id;
-	int nr;
-	int cpu_map_idx;
-};
-
-static void aggr_cb(struct perf_stat_config *config,
-		    struct evsel *counter, void *data, bool first)
-{
-	struct aggr_data *ad = data;
-	int idx;
-	struct perf_cpu cpu;
-	struct perf_cpu_map *cpus;
-	struct aggr_cpu_id s2;
-
-	cpus = evsel__cpus(counter);
-	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
-		struct perf_counts_values *counts;
-
-		s2 = config->aggr_get_id(config, cpu);
-		if (!aggr_cpu_id__equal(&s2, &ad->id))
-			continue;
-		if (first)
-			ad->nr++;
-		counts = perf_counts(counter->counts, idx, 0);
-		/*
-		 * When any result is bad, make them all to give
-		 * consistent output in interval mode.
-		 */
-		if (counts->ena == 0 || counts->run == 0 ||
-		    counter->counts->scaled == -1) {
-			ad->ena = 0;
-			ad->run = 0;
-			break;
-		}
-		ad->val += counts->val;
-		ad->ena += counts->ena;
-		ad->run += counts->run;
-	}
 }
 
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
 				   char *prefix, bool metric_only,
-				   bool *first, struct perf_cpu cpu)
+				   bool *first)
 {
-	struct aggr_data ad;
 	FILE *output = config->output;
 	u64 ena, run, val;
-	int nr;
-	struct aggr_cpu_id id;
 	double uval;
+	struct perf_stat_evsel *ps = counter->stats;
+	struct perf_stat_aggr *aggr = &ps->aggr[s];
+	struct aggr_cpu_id id = config->aggr_map->map[s];
+	double avg = aggr->counts.val;
 
-	ad.id = id = config->aggr_map->map[s];
-	ad.val = ad.ena = ad.run = 0;
-	ad.nr = 0;
-	if (!collect_data(config, counter, aggr_cb, &ad))
+	if (counter->supported && aggr->nr == 0)
 		return;
 
-	if (perf_pmu__has_hybrid() && ad.ena == 0)
-		return;
+	uniquify_counter(config, counter);
+
+	val = aggr->counts.val;
+	ena = aggr->counts.ena;
+	run = aggr->counts.run;
 
-	nr = ad.nr;
-	ena = ad.ena;
-	run = ad.run;
-	val = ad.val;
 	if (*first && metric_only) {
 		*first = false;
-		aggr_printout(config, counter, id, nr);
+		aggr_printout(config, counter, id, aggr->nr);
 	}
 	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
 	uval = val * counter->scale;
-	if (cpu.cpu != -1)
-		id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 
-	printout(config, id, nr, counter, uval,
-		 prefix, run, ena, 1.0, &rt_stat);
+	printout(config, id, aggr->nr, counter, uval,
+		 prefix, run, ena, avg, &rt_stat, s);
+
 	if (!metric_only)
 		fputc('\n', output);
 }
@@ -869,8 +729,6 @@ static void print_aggr(struct perf_stat_config *config,
 	if (!config->aggr_map || !config->aggr_get_id)
 		return;
 
-	aggr_update_shadow(config, evlist);
-
 	/*
 	 * With metric_only everything is on a single line.
 	 * Without each counter has its own line.
@@ -881,188 +739,36 @@ static void print_aggr(struct perf_stat_config *config,
 
 		first = true;
 		evlist__for_each_entry(evlist, counter) {
+			if (counter->merged_stat)
+				continue;
+
 			print_counter_aggrdata(config, counter, s,
-					prefix, metric_only,
-					&first, (struct perf_cpu){ .cpu = -1 });
+					       prefix, metric_only,
+					       &first);
 		}
 		if (metric_only)
 			fputc('\n', output);
 	}
 }
 
-static int cmp_val(const void *a, const void *b)
-{
-	return ((struct perf_aggr_thread_value *)b)->val -
-		((struct perf_aggr_thread_value *)a)->val;
-}
-
-static struct perf_aggr_thread_value *sort_aggr_thread(
-					struct evsel *counter,
-					int *ret,
-					struct target *_target)
-{
-	int nthreads = perf_thread_map__nr(counter->core.threads);
-	int i = 0;
-	double uval;
-	struct perf_aggr_thread_value *buf;
-
-	buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value));
-	if (!buf)
-		return NULL;
-
-	for (int thread = 0; thread < nthreads; thread++) {
-		int idx;
-		u64 ena = 0, run = 0, val = 0;
-
-		perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) {
-			struct perf_counts_values *counts =
-				perf_counts(counter->counts, idx, thread);
-
-			val += counts->val;
-			ena += counts->ena;
-			run += counts->run;
-		}
-
-		uval = val * counter->scale;
-
-		/*
-		 * Skip value 0 when enabling --per-thread globally,
-		 * otherwise too many 0 output.
-		 */
-		if (uval == 0.0 && target__has_per_thread(_target))
-			continue;
-
-		buf[i].counter = counter;
-		buf[i].id = aggr_cpu_id__empty();
-		buf[i].id.thread_idx = thread;
-		buf[i].uval = uval;
-		buf[i].val = val;
-		buf[i].run = run;
-		buf[i].ena = ena;
-		i++;
-	}
-
-	qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val);
-
-	if (ret)
-		*ret = i;
-
-	return buf;
-}
-
-static void print_aggr_thread(struct perf_stat_config *config,
-			      struct target *_target,
-			      struct evsel *counter, char *prefix)
-{
-	FILE *output = config->output;
-	int thread, sorted_threads;
-	struct aggr_cpu_id id;
-	struct perf_aggr_thread_value *buf;
-
-	buf = sort_aggr_thread(counter, &sorted_threads, _target);
-	if (!buf) {
-		perror("cannot sort aggr thread");
-		return;
-	}
-
-	for (thread = 0; thread < sorted_threads; thread++) {
-		if (prefix)
-			fprintf(output, "%s", prefix);
-
-		id = buf[thread].id;
-		printout(config, id, 0, buf[thread].counter, buf[thread].uval,
-			 prefix, buf[thread].run, buf[thread].ena, 1.0,
-			 &rt_stat);
-		fputc('\n', output);
-	}
-
-	free(buf);
-}
-
-struct caggr_data {
-	double avg, avg_enabled, avg_running;
-};
-
-static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused,
-			    struct evsel *counter, void *data,
-			    bool first __maybe_unused)
-{
-	struct caggr_data *cd = data;
-	struct perf_counts_values *aggr = &counter->counts->aggr;
-
-	cd->avg += aggr->val;
-	cd->avg_enabled += aggr->ena;
-	cd->avg_running += aggr->run;
-}
-
-/*
- * Print out the results of a single counter:
- * aggregated counts in system-wide mode
- */
-static void print_counter_aggr(struct perf_stat_config *config,
-			       struct evsel *counter, char *prefix)
-{
-	bool metric_only = config->metric_only;
-	FILE *output = config->output;
-	double uval;
-	struct caggr_data cd = { .avg = 0.0 };
-
-	if (!collect_data(config, counter, counter_aggr_cb, &cd))
-		return;
-
-	if (prefix && !metric_only)
-		fprintf(output, "%s", prefix);
-
-	uval = cd.avg * counter->scale;
-	printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running,
-		 cd.avg_enabled, cd.avg, &rt_stat);
-	if (!metric_only)
-		fprintf(output, "\n");
-}
-
-static void counter_cb(struct perf_stat_config *config __maybe_unused,
-		       struct evsel *counter, void *data,
-		       bool first __maybe_unused)
-{
-	struct aggr_data *ad = data;
-
-	ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val;
-	ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena;
-	ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run;
-}
-
-/*
- * Print out the results of a single counter:
- * does not use aggregated count in system-wide
- */
 static void print_counter(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
-	FILE *output = config->output;
-	u64 ena, run, val;
-	double uval;
-	int idx;
-	struct perf_cpu cpu;
-	struct aggr_cpu_id id;
-
-	perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) {
-		struct aggr_data ad = { .cpu_map_idx = idx };
-
-		if (!collect_data(config, counter, counter_cb, &ad))
-			return;
-		val = ad.val;
-		ena = ad.ena;
-		run = ad.run;
+	bool metric_only = config->metric_only;
+	bool first = false;
+	int s;
 
-		if (prefix)
-			fprintf(output, "%s", prefix);
+	/* AGGR_THREAD doesn't have config->aggr_get_id */
+	if (!config->aggr_map)
+		return;
 
-		uval = val * counter->scale;
-		id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
-		printout(config, id, 0, counter, uval, prefix,
-			 run, ena, 1.0, &rt_stat);
+	if (counter->merged_stat)
+		return;
 
-		fputc('\n', output);
+	for (s = 0; s < config->aggr_map->nr; s++) {
+		print_counter_aggrdata(config, counter, s,
+				       prefix, metric_only,
+				       &first);
 	}
 }
 
@@ -1081,6 +787,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			u64 ena, run, val;
 			double uval;
 			struct aggr_cpu_id id;
+			struct perf_stat_evsel *ps = counter->stats;
 			int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
 
 			if (counter_idx < 0)
@@ -1093,13 +800,13 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 				aggr_printout(config, counter, id, 0);
 				first = false;
 			}
-			val = perf_counts(counter->counts, counter_idx, 0)->val;
-			ena = perf_counts(counter->counts, counter_idx, 0)->ena;
-			run = perf_counts(counter->counts, counter_idx, 0)->run;
+			val = ps->aggr[counter_idx].counts.val;
+			ena = ps->aggr[counter_idx].counts.ena;
+			run = ps->aggr[counter_idx].counts.run;
 
 			uval = val * counter->scale;
 			printout(config, id, 0, counter, uval, prefix,
-				 run, ena, 1.0, &rt_stat);
+				 run, ena, 1.0, &rt_stat, counter_idx);
 		}
 		if (!first)
 			fputc('\n', config->output);
@@ -1135,8 +842,8 @@ static void print_metric_headers(struct perf_stat_config *config,
 	};
 	bool first = true;
 
-		if (config->json_output && !config->interval)
-			fprintf(config->output, "{");
+	if (config->json_output && !config->interval)
+		fprintf(config->output, "{");
 
 	if (prefix && !config->json_output)
 		fprintf(config->output, "%s", prefix);
@@ -1379,31 +1086,6 @@ static void print_footer(struct perf_stat_config *config)
 			"the same PMU. Try reorganizing the group.\n");
 }
 
-static void print_percore_thread(struct perf_stat_config *config,
-				 struct evsel *counter, char *prefix)
-{
-	int s;
-	struct aggr_cpu_id s2, id;
-	struct perf_cpu_map *cpus;
-	bool first = true;
-	int idx;
-	struct perf_cpu cpu;
-
-	cpus = evsel__cpus(counter);
-	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {
-		s2 = config->aggr_get_id(config, cpu);
-		for (s = 0; s < config->aggr_map->nr; s++) {
-			id = config->aggr_map->map[s];
-			if (aggr_cpu_id__equal(&s2, &id))
-				break;
-		}
-
-		print_counter_aggrdata(config, counter, s,
-				       prefix, false,
-				       &first, cpu);
-	}
-}
-
 static void print_percore(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
@@ -1416,15 +1098,14 @@ static void print_percore(struct perf_stat_config *config,
 		return;
 
 	if (config->percore_show_thread)
-		return print_percore_thread(config, counter, prefix);
+		return print_counter(config, counter, prefix);
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
 		if (prefix && metric_only)
 			fprintf(output, "%s", prefix);
 
 		print_counter_aggrdata(config, counter, s,
-				prefix, metric_only,
-				&first, (struct perf_cpu){ .cpu = -1 });
+				       prefix, metric_only, &first);
 	}
 
 	if (metric_only)
@@ -1469,17 +1150,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		print_aggr(config, evlist, prefix);
 		break;
 	case AGGR_THREAD:
-		evlist__for_each_entry(evlist, counter) {
-			print_aggr_thread(config, _target, counter, prefix);
-		}
-		break;
 	case AGGR_GLOBAL:
 		if (config->iostat_run)
 			iostat_print_counters(evlist, config, ts, prefix = buf,
-					      print_counter_aggr);
+					      print_counter);
 		else {
 			evlist__for_each_entry(evlist, counter) {
-				print_counter_aggr(config, counter, prefix);
+				print_counter(config, counter, prefix);
 			}
 			if (metric_only)
 				fputc('\n', config->output);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index c0955a0427ab3..0316557adce9e 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -565,11 +565,6 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 			evsel__name(counter), count[0], count[1], count[2]);
 	}
 
-	/*
-	 * Save the full runtime - to allow normalization during printout:
-	 */
-	perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat);
-
 	return 0;
 }
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 3d413ba8c68a6..382a1ab92ce18 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -224,15 +224,6 @@ static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rus
 struct evsel;
 struct evlist;
 
-struct perf_aggr_thread_value {
-	struct evsel *counter;
-	struct aggr_cpu_id id;
-	double uval;
-	u64 val;
-	u64 run;
-	u64 ena;
-};
-
 bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id);
 
 #define perf_stat_evsel__is(evsel, id) \
-- 
GitLab


From cec94d69636a023302d484ec036158b792001d3c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:26 -0700
Subject: [PATCH 0517/4122] perf stat: Display percore events properly

The recent change in the perf stat broke the percore event display.
Note that the aggr counts are already processed so that the every
sibling thread in the same core will get the per-core counter values.

Check percore evsels and skip the sibling threads in the display.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-20-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c      | 16 ----------------
 tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++++--
 2 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d7c52cef70a3a..9d35a33389766 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1404,18 +1404,6 @@ static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *con
 	return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu);
 }
 
-static bool term_percore_set(void)
-{
-	struct evsel *counter;
-
-	evlist__for_each_entry(evsel_list, counter) {
-		if (counter->percore)
-			return true;
-	}
-
-	return false;
-}
-
 static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
 {
 	switch (aggr_mode) {
@@ -1428,8 +1416,6 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode)
 	case AGGR_NODE:
 		return aggr_cpu_id__node;
 	case AGGR_NONE:
-		if (term_percore_set())
-			return aggr_cpu_id__core;
 		return aggr_cpu_id__cpu;
 	case AGGR_GLOBAL:
 		return aggr_cpu_id__global;
@@ -1453,8 +1439,6 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode)
 	case AGGR_NODE:
 		return perf_stat__get_node_cached;
 	case AGGR_NONE:
-		if (term_percore_set())
-			return perf_stat__get_core_cached;
 		return perf_stat__get_cpu_cached;
 	case AGGR_GLOBAL:
 		return perf_stat__get_global_cached;
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bfae2784609c8..657434cd29eea 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1091,7 +1091,8 @@ static void print_percore(struct perf_stat_config *config,
 {
 	bool metric_only = config->metric_only;
 	FILE *output = config->output;
-	int s;
+	struct cpu_aggr_map *core_map;
+	int s, c, i;
 	bool first = true;
 
 	if (!config->aggr_map || !config->aggr_get_id)
@@ -1100,13 +1101,35 @@ static void print_percore(struct perf_stat_config *config,
 	if (config->percore_show_thread)
 		return print_counter(config, counter, prefix);
 
-	for (s = 0; s < config->aggr_map->nr; s++) {
+	core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
+	if (core_map == NULL) {
+		fprintf(output, "Cannot allocate per-core aggr map for display\n");
+		return;
+	}
+
+	for (s = 0, c = 0; s < config->aggr_map->nr; s++) {
+		struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu;
+		struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL);
+		bool found = false;
+
+		for (i = 0; i < c; i++) {
+			if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) {
+				found = true;
+				break;
+			}
+		}
+		if (found)
+			continue;
+
 		if (prefix && metric_only)
 			fprintf(output, "%s", prefix);
 
 		print_counter_aggrdata(config, counter, s,
 				       prefix, metric_only, &first);
+
+		core_map->map[c++] = core_id;
 	}
+	free(core_map);
 
 	if (metric_only)
 		fputc('\n', output);
-- 
GitLab


From 8b76a3188b85724f345ea5d03ff206cc9bbe6d72 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 17 Oct 2022 19:02:27 -0700
Subject: [PATCH 0518/4122] perf stat: Remove unused perf_counts.aggr field

The aggr field in the struct perf_counts is to keep the aggregated value
in the AGGR_GLOBAL for the old code.  But it's not used anymore.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221018020227.85905-21-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/counts.c |  1 -
 tools/perf/util/counts.h |  1 -
 tools/perf/util/stat.c   | 39 ++++++---------------------------------
 3 files changed, 6 insertions(+), 35 deletions(-)

diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c
index 7a447d9184588..11cd85b278a63 100644
--- a/tools/perf/util/counts.c
+++ b/tools/perf/util/counts.c
@@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts)
 {
 	xyarray__reset(counts->loaded);
 	xyarray__reset(counts->values);
-	memset(&counts->aggr, 0, sizeof(struct perf_counts_values));
 }
 
 void evsel__reset_counts(struct evsel *evsel)
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 5de275194f2b1..42760242e0df0 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -11,7 +11,6 @@ struct evsel;
 
 struct perf_counts {
 	s8			  scaled;
-	struct perf_counts_values aggr;
 	struct xyarray		  *values;
 	struct xyarray		  *loaded;
 };
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 0316557adce9e..3a432a949d469 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -308,8 +308,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel)
 				*perf_counts(evsel->prev_raw_counts, idx, thread);
 		}
 	}
-
-	evsel->counts->aggr = evsel->prev_raw_counts->aggr;
 }
 
 void evlist__copy_prev_raw_counts(struct evlist *evlist)
@@ -320,26 +318,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist)
 		evsel__copy_prev_raw_counts(evsel);
 }
 
-void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
-{
-	struct evsel *evsel;
-
-	/*
-	 * To collect the overall statistics for interval mode,
-	 * we copy the counts from evsel->prev_raw_counts to
-	 * evsel->counts. The perf_stat_process_counter creates
-	 * aggr values from per cpu values, but the per cpu values
-	 * are 0 for AGGR_GLOBAL. So we use a trick that saves the
-	 * previous aggr value to the first member of perf_counts,
-	 * then aggr calculation in process_counter_values can work
-	 * correctly.
-	 */
-	evlist__for_each_entry(evlist, evsel) {
-		*perf_counts(evsel->prev_raw_counts, 0, 0) =
-			evsel->prev_raw_counts->aggr;
-	}
-}
-
 static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
 {
 	uint64_t *key = (uint64_t *) __key;
@@ -442,7 +420,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		       int cpu_map_idx, int thread,
 		       struct perf_counts_values *count)
 {
-	struct perf_counts_values *aggr = &evsel->counts->aggr;
 	struct perf_stat_evsel *ps = evsel->stats;
 	static struct perf_counts_values zero;
 	bool skip = false;
@@ -511,12 +488,6 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel,
 		}
 	}
 
-	if (config->aggr_mode == AGGR_GLOBAL) {
-		aggr->val += count->val;
-		aggr->ena += count->ena;
-		aggr->run += count->run;
-	}
-
 	return 0;
 }
 
@@ -541,13 +512,10 @@ static int process_counter_maps(struct perf_stat_config *config,
 int perf_stat_process_counter(struct perf_stat_config *config,
 			      struct evsel *counter)
 {
-	struct perf_counts_values *aggr = &counter->counts->aggr;
 	struct perf_stat_evsel *ps = counter->stats;
-	u64 *count = counter->counts->aggr.values;
+	u64 *count;
 	int ret;
 
-	aggr->val = aggr->ena = aggr->run = 0;
-
 	if (counter->per_pkg)
 		evsel__zero_per_pkg(counter);
 
@@ -558,6 +526,11 @@ int perf_stat_process_counter(struct perf_stat_config *config,
 	if (config->aggr_mode != AGGR_GLOBAL)
 		return 0;
 
+	/*
+	 * GLOBAL aggregation mode only has a single aggr counts,
+	 * so we can use ps->aggr[0] as the actual output.
+	 */
+	count = ps->aggr[0].counts.values;
 	update_stats(&ps->res_stats, *count);
 
 	if (verbose > 0) {
-- 
GitLab


From a87edbec35725ced484f6b8275f1246ed7194329 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Sat, 22 Oct 2022 17:27:34 +0800
Subject: [PATCH 0519/4122] perf daemon: Complete list of supported subcommand
 in help message

perf daemon supports start, signal, stop and ping subcommands, complete it

Before:

  # perf daemon -h
   Usage: perf daemon start [<options>]
      or: perf daemon [<options>]

      -v, --verbose         be more verbose
      -x, --field-separator[=<field separator>]
                            print counts with custom separator
          --base <directory>
                            base directory
          --config <config file>
                            config file path

After:

  # perf daemon -h

   Usage: perf daemon {start|signal|stop|ping} [<options>]
      or: perf daemon [<options>]

      -v, --verbose         be more verbose
      -x, --field-separator[=<field separator>]
                            print counts with custom separator
          --base <directory>
                            base directory
          --config <config file>
                            config file path

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221022092735.114967-3-yangjihong1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-daemon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 6cb3f6cc36d0a..3ce0c960ccc9b 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -100,7 +100,7 @@ static struct daemon __daemon = {
 };
 
 static const char * const daemon_usage[] = {
-	"perf daemon start [<options>]",
+	"perf daemon {start|signal|stop|ping} [<options>]",
 	"perf daemon [<options>]",
 	NULL
 };
-- 
GitLab


From 0cef66a98420f1210be697ed47bd7417ed428847 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Sat, 22 Oct 2022 17:27:35 +0800
Subject: [PATCH 0520/4122] perf config: Add missing newline on pr_warning()
 call in home_perfconfig()

Add missing newline on pr_warning() call in home_perfconfig().

Before:
  # perf record
  File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it.Couldn't synthesize bpf events.

After:
  # perf record
  File /home/yangjihong/.perfconfig not owned by current user or root, ignoring it.
  Couldn't synthesize bpf events.

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221022092735.114967-4-yangjihong1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/config.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c
index 3f2ae19a1dd40..658170b8dcef7 100644
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@@ -556,7 +556,7 @@ static char *home_perfconfig(void)
 
 	config = strdup(mkpath("%s/.perfconfig", home));
 	if (config == NULL) {
-		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home);
+		pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home);
 		return NULL;
 	}
 
@@ -564,7 +564,7 @@ static char *home_perfconfig(void)
 		goto out_free;
 
 	if (st.st_uid && (st.st_uid != geteuid())) {
-		pr_warning("File %s not owned by current user or root, ignoring it.", config);
+		pr_warning("File %s not owned by current user or root, ignoring it.\n", config);
 		goto out_free;
 	}
 
-- 
GitLab


From 743ef218c2fbe63502615a2044977041ee068322 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Oct 2022 17:25:18 -0300
Subject: [PATCH 0521/4122] perf unwind arm64: Remove needless event.h &
 thread.h includes

To reduce compile time and header dependency chains just add forward
declarations for pointer types and include linux/types.h for u64.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/arm64-frame-pointer-unwind-support.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h
index 32af9ce94398e..42d3a45490f50 100644
--- a/tools/perf/util/arm64-frame-pointer-unwind-support.h
+++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h
@@ -2,8 +2,10 @@
 #ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
 #define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H
 
-#include "event.h"
-#include "thread.h"
+#include <linux/types.h>
+
+struct perf_sample;
+struct thread;
 
 u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx);
 
-- 
GitLab


From 6bc13cab5798bd9b049694983ae5702666d24e83 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Oct 2022 17:32:19 -0300
Subject: [PATCH 0522/4122] perf arch x86: Add missing stdlib.h to get free()
 prototype

It was getting indirectly, out of luck, add it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/event.c | 1 +
 tools/perf/arch/x86/util/tsc.c   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index e670f35475815..55ff6aec10fdd 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -2,6 +2,7 @@
 #include <linux/types.h>
 #include <linux/string.h>
 #include <linux/zalloc.h>
+#include <stdlib.h>
 
 #include "../../../util/event.h"
 #include "../../../util/synthetic-events.h"
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index eb2b5195bd020..9b99f48b923cf 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -2,6 +2,7 @@
 #include <linux/types.h>
 #include <math.h>
 #include <string.h>
+#include <stdlib.h>
 
 #include "../../../util/debug.h"
 #include "../../../util/tsc.h"
-- 
GitLab


From 06bf28cbc63287c69fe834b527127a56b65de2d7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Oct 2022 17:33:04 -0300
Subject: [PATCH 0523/4122] perf scripting python: Add missing util/perf_regs.h
 include to get perf_reg_name() prototype

It was getting it via event.h, that doesn't need that include anymore
and will drop it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/scripting-engines/trace-event-python.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 7bc8559dce6af..1985d1a42a224 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -52,6 +52,7 @@
 #include "print_binary.h"
 #include "stat.h"
 #include "mem-events.h"
+#include "util/perf_regs.h"
 
 #if PY_MAJOR_VERSION < 3
 #define _PyUnicode_FromString(arg) \
-- 
GitLab


From b15cf900d11d4db2c2dac544a27f3e1217bdd0d4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 25 Oct 2022 17:34:44 -0300
Subject: [PATCH 0524/4122] perf event: Drop perf_regs.h include, not needed
 anymore

Since commit c897899752478d4c ("perf tools: Prevent out-of-bounds access
to registers") the util/event.h header doesn't use anything from
util/perf_regs.h, so drop it to untangle the header dependency tree a
bit, speeding up compilation.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 12eae69170225..65495f6945b4d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -12,8 +12,6 @@
 #include <perf/event.h>
 #include <linux/types.h>
 
-#include "perf_regs.h"
-
 struct dso;
 struct machine;
 struct perf_event_attr;
-- 
GitLab


From ad7ad6b5ddf63b436a5344fb686887f2d8b7cf3d Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 20 Oct 2022 18:25:09 +0300
Subject: [PATCH 0525/4122] perf scripts python: intel-pt-events.py: Add
 ability interleave output

Intel PT timestamps are not provided for every branch, let alone every
instruction, so there can be many samples with the same timestamp. With
per-cpu contexts, decoding is done for each CPU in turn, which can make it
difficult to see what is happening on different CPUs at the same time.
Currently the interleaving from perf script --itrace=i0ns is quite coarse
grained. There are often long stretches executing on one CPU and nothing on
another.

Some people are interested in seeing what happened on multiple CPUs before
a crash to debug races etc.

To improve perf script interleaving for parallel execution, the
intel-pt-events.py script has been enhanced to enable interleaving the
output with the same timestamp from different CPUs. It is understood that
interleaving is not perfect or causal.

Add parameter --interleave [<n>] to interleave sample output for the same
timestamp so that no more than n samples for a CPU are displayed in a row.
'n' defaults to 4. Note this only affects the order of output, and only
when the timestamp is the same.

Example:

  $ perf script intel-pt-events.py --insn-trace --interleave 3
  ...
  bash  2267/2267  [004]  9323.692625625  563caa3c86f0  jz 0x563caa3c89c7        run_pending_traps+0x30 (/usr/bin/bash)   IPC: 1.52 (38/25)
  bash  2267/2267  [004]  9323.692625625  563caa3c89c7  movq  0x118(%rsp), %rax  run_pending_traps+0x307 (/usr/bin/bash)
  bash  2267/2267  [004]  9323.692625625  563caa3c89cf  subq  %fs:0x28, %rax     run_pending_traps+0x30f (/usr/bin/bash)
  bash  2270/2270  [007]  9323.692625625  55dc58cabf02  jz 0x55dc58cabf48        unquoted_glob_pattern_p+0x102 (/usr/bin/bash)   IPC: 1.56 (25/16)
  bash  2270/2270  [007]  9323.692625625  55dc58cabf04  cmp $0x5d, %al           unquoted_glob_pattern_p+0x104 (/usr/bin/bash)
  bash  2270/2270  [007]  9323.692625625  55dc58cabf06  jnz 0x55dc58cabf10       unquoted_glob_pattern_p+0x106 (/usr/bin/bash)
  bash  2264/2264  [001]  9323.692625625  7fd556a4376c  jbe 0x7fd556a43ac8       round_and_return+0x3fc (/usr/lib/x86_64-linux-gnu/libc.so.6)   IPC: 4.30 (43/10)
  bash  2264/2264  [001]  9323.692625625  7fd556a43772  and $0x8, %edx           round_and_return+0x402 (/usr/lib/x86_64-linux-gnu/libc.so.6)
  bash  2264/2264  [001]  9323.692625625  7fd556a43775  jnz 0x7fd556a43ac8       round_and_return+0x405 (/usr/lib/x86_64-linux-gnu/libc.so.6)
  bash  2267/2267  [004]  9323.692625625  563caa3c89d8  jnz 0x563caa3c8b11       run_pending_traps+0x318 (/usr/bin/bash)
  bash  2267/2267  [004]  9323.692625625  563caa3c89de  add $0x128, %rsp         run_pending_traps+0x31e (/usr/bin/bash)
  bash  2267/2267  [004]  9323.692625625  563caa3c89e5  popq  %rbx               run_pending_traps+0x325 (/usr/bin/bash)
  ...

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20221020152509.5298-1-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-intel-pt.txt   | 12 +++-
 tools/perf/scripts/python/intel-pt-events.py | 65 +++++++++++++++++++-
 2 files changed, 74 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt
index 92464a5d7eafd..7b6ccd2fa3bf1 100644
--- a/tools/perf/Documentation/perf-intel-pt.txt
+++ b/tools/perf/Documentation/perf-intel-pt.txt
@@ -189,8 +189,16 @@ There is also script intel-pt-events.py which provides an example of how to
 unpack the raw data for power events and PTWRITE. The script also displays
 branches, and supports 2 additional modes selected by option:
 
- --insn-trace - instruction trace
- --src-trace - source trace
+ - --insn-trace - instruction trace
+ - --src-trace - source trace
+
+The intel-pt-events.py script also has options:
+
+ - --all-switch-events - display all switch events, not only the last consecutive.
+ - --interleave [<n>] - interleave sample output for the same timestamp so that
+ no more than n samples for a CPU are displayed in a row. 'n' defaults to 4.
+ Note this only affects the order of output, and only when the timestamp is the
+ same.
 
 As mentioned above, it is easy to capture too much data.  One way to limit the
 data captured is to use 'snapshot' mode which is explained further below.
diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py
index 6be7fd8fd6158..08862a2582f44 100644
--- a/tools/perf/scripts/python/intel-pt-events.py
+++ b/tools/perf/scripts/python/intel-pt-events.py
@@ -13,10 +13,12 @@
 
 from __future__ import print_function
 
+import io
 import os
 import sys
 import struct
 import argparse
+import contextlib
 
 from libxed import LibXED
 from ctypes import create_string_buffer, addressof
@@ -39,6 +41,11 @@ glb_src			= False
 glb_source_file_name	= None
 glb_line_number		= None
 glb_dso			= None
+glb_stash_dict		= {}
+glb_output		= None
+glb_output_pos		= 0
+glb_cpu			= -1
+glb_time		= 0
 
 def get_optional_null(perf_dict, field):
 	if field in perf_dict:
@@ -70,6 +77,7 @@ def trace_begin():
 	ap.add_argument("--insn-trace", action='store_true')
 	ap.add_argument("--src-trace", action='store_true')
 	ap.add_argument("--all-switch-events", action='store_true')
+	ap.add_argument("--interleave", type=int, nargs='?', const=4, default=0)
 	global glb_args
 	global glb_insn
 	global glb_src
@@ -94,11 +102,39 @@ def trace_begin():
 	perf_set_itrace_options(perf_script_context, itrace)
 
 def trace_end():
+	if glb_args.interleave:
+		flush_stashed_output()
 	print("End")
 
 def trace_unhandled(event_name, context, event_fields_dict):
 		print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]))
 
+def stash_output():
+	global glb_stash_dict
+	global glb_output_pos
+	output_str = glb_output.getvalue()[glb_output_pos:]
+	n = len(output_str)
+	if n:
+		glb_output_pos += n
+		if glb_cpu not in glb_stash_dict:
+			glb_stash_dict[glb_cpu] = []
+		glb_stash_dict[glb_cpu].append(output_str)
+
+def flush_stashed_output():
+	global glb_stash_dict
+	while glb_stash_dict:
+		cpus = list(glb_stash_dict.keys())
+		# Output at most glb_args.interleave output strings per cpu
+		for cpu in cpus:
+			items = glb_stash_dict[cpu]
+			countdown = glb_args.interleave
+			while len(items) and countdown:
+				sys.stdout.write(items[0])
+				del items[0]
+				countdown -= 1
+			if not items:
+				del glb_stash_dict[cpu]
+
 def print_ptwrite(raw_buf):
 	data = struct.unpack_from("<IQ", raw_buf)
 	flags = data[0]
@@ -375,15 +411,40 @@ def do_process_event(param_dict):
 		print_common_start(comm, sample, name)
 		print_common_ip(param_dict, sample, symbol, dso)
 
+def interleave_events(param_dict):
+	global glb_cpu
+	global glb_time
+	global glb_output
+	global glb_output_pos
+
+	sample  = param_dict["sample"]
+	glb_cpu = sample["cpu"]
+	ts      = sample["time"]
+
+	if glb_time != ts:
+		glb_time = ts
+		flush_stashed_output()
+
+	glb_output_pos = 0
+	with contextlib.redirect_stdout(io.StringIO()) as glb_output:
+		do_process_event(param_dict)
+
+	stash_output()
+
 def process_event(param_dict):
 	try:
-		do_process_event(param_dict)
+		if glb_args.interleave:
+			interleave_events(param_dict)
+		else:
+			do_process_event(param_dict)
 	except broken_pipe_exception:
 		# Stop python printing broken pipe errors and traceback
 		sys.stdout = open(os.devnull, 'w')
 		sys.exit(1)
 
 def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
+	if glb_args.interleave:
+		flush_stashed_output()
 	if len(x) >= 2 and x[0]:
 		machine_pid = x[0]
 		vcpu = x[1]
@@ -403,6 +464,8 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x):
 		sys.exit(1)
 
 def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x):
+	if glb_args.interleave:
+		flush_stashed_output()
 	if out:
 		out_str = "Switch out "
 	else:
-- 
GitLab


From 439dbef2a94e825241160e1bbd050d01e5728608 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:36 -0700
Subject: [PATCH 0526/4122] perf test: Do not use instructions:u explicitly

I think it's to support non-root user tests.  But perf record can handle
the case and fall back to a software event (cpu-clock).  Practically this
would affect when it's run on a VM, but it seems no reason to prevent running
the test in the guest.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 301f95427159d..747c33a1ec454 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -21,18 +21,18 @@ trap trap_cleanup exit term int
 
 test_per_thread() {
   echo "Basic --per-thread mode test"
-  if ! perf record -e instructions:u -o ${perfdata} --quiet true 2> /dev/null
+  if ! perf record -o /dev/null --quiet true 2> /dev/null
   then
-    echo "Per-thread record [Skipped instructions:u not supported]"
+    echo "Per-thread record [Skipped event not supported]"
     if [ $err -ne 1 ]
     then
       err=2
     fi
     return
   fi
-  if ! perf record -e instructions:u --per-thread -o ${perfdata} true 2> /dev/null
+  if ! perf record --per-thread -o ${perfdata} true 2> /dev/null
   then
-    echo "Per-thread record of instructions:u [Failed]"
+    echo "Per-thread record [Failed record]"
     err=1
     return
   fi
@@ -49,7 +49,7 @@ test_register_capture() {
   echo "Register capture test"
   if ! perf list | egrep -q 'br_inst_retired.near_call'
   then
-    echo "Register capture test [Skipped missing instruction]"
+    echo "Register capture test [Skipped missing event]"
     if [ $err -ne 1 ]
     then
       err=2
-- 
GitLab


From 9e455f4f29e3aeab29b6c1f7d086ddc43fc1a502 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:37 -0700
Subject: [PATCH 0527/4122] perf test: Fix shellcheck issues in the record test

Basically there are 3 issues:

 1. quote shell expansion
 2. do not use egrep
 3. use upper case letters for signal names

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 747c33a1ec454..4640714628092 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -8,16 +8,16 @@ err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 
 cleanup() {
-  rm -f ${perfdata}
-  rm -f ${perfdata}.old
-  trap - exit term int
+  rm -f "${perfdata}"
+  rm -f "${perfdata}".old
+  trap - EXIT TERM INT
 }
 
 trap_cleanup() {
   cleanup
   exit 1
 }
-trap trap_cleanup exit term int
+trap trap_cleanup EXIT TERM INT
 
 test_per_thread() {
   echo "Basic --per-thread mode test"
@@ -30,13 +30,13 @@ test_per_thread() {
     fi
     return
   fi
-  if ! perf record --per-thread -o ${perfdata} true 2> /dev/null
+  if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null
   then
     echo "Per-thread record [Failed record]"
     err=1
     return
   fi
-  if ! perf report -i ${perfdata} -q | egrep -q true
+  if ! perf report -i "${perfdata}" -q | grep -q true
   then
     echo "Per-thread record [Failed missing output]"
     err=1
@@ -47,7 +47,7 @@ test_per_thread() {
 
 test_register_capture() {
   echo "Register capture test"
-  if ! perf list | egrep -q 'br_inst_retired.near_call'
+  if ! perf list | grep -q 'br_inst_retired.near_call'
   then
     echo "Register capture test [Skipped missing event]"
     if [ $err -ne 1 ]
@@ -56,7 +56,7 @@ test_register_capture() {
     fi
     return
   fi
-  if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
+  if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
   then
     echo "Register capture test [Skipped missing registers]"
     return
@@ -64,7 +64,7 @@ test_register_capture() {
   if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
     -c 1000 --per-thread true 2> /dev/null \
     | perf script -F ip,sym,iregs -i - 2> /dev/null \
-    | egrep -q "DI:"
+    | grep -q "DI:"
   then
     echo "Register capture test [Failed missing output]"
     err=1
-- 
GitLab


From 4321ad4ee98b7325d6133e1d5b7fa25bcbdeb57e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:38 -0700
Subject: [PATCH 0528/4122] perf test: Use a test program in 'perf record'
 tests

If the system has cc it could build a test program with two threads
and then use it for more detailed testing.  Also it accepts an option
to run a thread forever to ensure multi-thread runs.

If cc is not found, it falls back to use the default value 'true'.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 64 ++++++++++++++++++++++++++++++--
 1 file changed, 60 insertions(+), 4 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 4640714628092..9529814812390 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -6,10 +6,17 @@ set -e
 
 err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
+testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX)
+testsym="test_loop"
 
 cleanup() {
   rm -f "${perfdata}"
   rm -f "${perfdata}".old
+
+  if [ "${testprog}" != "true" ]; then
+    rm -f "${testprog}"
+  fi
+
   trap - EXIT TERM INT
 }
 
@@ -19,9 +26,56 @@ trap_cleanup() {
 }
 trap trap_cleanup EXIT TERM INT
 
+build_test_program() {
+  if ! [ -x "$(command -v cc)" ]; then
+    # No CC found. Fall back to 'true'
+    testprog=true
+    testsym=true
+    return
+  fi
+
+  echo "Build a test program"
+  cat <<EOF | cc -o ${testprog} -xc - -pthread
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+void test_loop(void) {
+  volatile int count = 1000000;
+
+  while (count--)
+    continue;
+}
+
+void *thfunc(void *arg) {
+  int forever = *(int *)arg;
+
+  do {
+    test_loop();
+  } while (forever);
+
+  return NULL;
+}
+
+int main(int argc, char *argv[]) {
+  pthread_t th;
+  int forever = 0;
+
+  if (argc > 1)
+    forever = atoi(argv[1]);
+
+  pthread_create(&th, NULL, thfunc, &forever);
+  test_loop();
+  pthread_join(th, NULL);
+
+  return 0;
+}
+EOF
+}
+
 test_per_thread() {
   echo "Basic --per-thread mode test"
-  if ! perf record -o /dev/null --quiet true 2> /dev/null
+  if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
   then
     echo "Per-thread record [Skipped event not supported]"
     if [ $err -ne 1 ]
@@ -30,13 +84,13 @@ test_per_thread() {
     fi
     return
   fi
-  if ! perf record --per-thread -o "${perfdata}" true 2> /dev/null
+  if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null
   then
     echo "Per-thread record [Failed record]"
     err=1
     return
   fi
-  if ! perf report -i "${perfdata}" -q | grep -q true
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
   then
     echo "Per-thread record [Failed missing output]"
     err=1
@@ -62,7 +116,7 @@ test_register_capture() {
     return
   fi
   if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
-    -c 1000 --per-thread true 2> /dev/null \
+    -c 1000 --per-thread ${testprog} 2> /dev/null \
     | perf script -F ip,sym,iregs -i - 2> /dev/null \
     | grep -q "DI:"
   then
@@ -73,6 +127,8 @@ test_register_capture() {
   echo "Register capture test [Success]"
 }
 
+build_test_program
+
 test_per_thread
 test_register_capture
 
-- 
GitLab


From 6b7e02ab1262141cfebd05b68410fa297f557961 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:39 -0700
Subject: [PATCH 0529/4122] perf test: Wait for a new thread when testing
 --per-thread record

Just running the target program is not enough to test multi-thread
target because it'd be racy perf vs target startup.  I used the
initial delay but it cannot guarantee for perf to see the thread.

Instead, use wait_for_threads helper from shell/lib/waiting.sh to make
sure it starts the sibling thread first.  Then perf record can use -p
option to profile the target process.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 9529814812390..d1640d1daf2e3 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -4,6 +4,9 @@
 
 set -e
 
+shelldir=$(dirname "$0")
+. "${shelldir}"/lib/waiting.sh
+
 err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX)
@@ -96,6 +99,30 @@ test_per_thread() {
     err=1
     return
   fi
+
+  # run the test program in background (forever)
+  ${testprog} 1 &
+  TESTPID=$!
+
+  rm -f "${perfdata}"
+
+  wait_for_threads ${TESTPID} 2
+  perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null
+  kill ${TESTPID}
+
+  if [ ! -e "${perfdata}" ]
+  then
+    echo "Per-thread record [Failed record -p]"
+    err=1
+    return
+  fi
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+  then
+    echo "Per-thread record [Failed -p missing output]"
+    err=1
+    return
+  fi
+
   echo "Basic --per-thread mode test [Success]"
 }
 
-- 
GitLab


From 2cadf2c7b99a980455500c34571a540300c49c72 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:40 -0700
Subject: [PATCH 0530/4122] perf test: Add system-wide mode in 'perf record'
 tests

Add system wide recording test with the same pattern.  It'd skip the
test when it fails to run 'perf record'.

For system-wide mode, it needs to avoid build-id collection and
synthesis because the test only cares about the test program and kernel
would generate the necessary events as the process starts.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index d1640d1daf2e3..345764afb7456 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -154,10 +154,31 @@ test_register_capture() {
   echo "Register capture test [Success]"
 }
 
+test_system_wide() {
+  echo "Basic --system-wide mode test"
+  if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null
+  then
+    echo "System-wide record [Skipped not supported]"
+    if [ $err -ne 1 ]
+    then
+      err=2
+    fi
+    return
+  fi
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+  then
+    echo "System-wide record [Failed missing output]"
+    err=1
+    return
+  fi
+  echo "Basic --system-wide mode test [Success]"
+}
+
 build_test_program
 
 test_per_thread
 test_register_capture
+test_system_wide
 
 cleanup
 exit $err
-- 
GitLab


From c8c935677487ba6f7dd18d48e39ab30bbb4cb5d9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:41 -0700
Subject: [PATCH 0531/4122] perf test: Add target workload test in 'perf
 record' tests

Add a subtest which profiles the given workload on the command line.

As it's a minimal requirement, the test should run ok so it doesn't skip
the test even if it failed to run the 'perf record' command.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 345764afb7456..c59d1459c9603 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -174,11 +174,29 @@ test_system_wide() {
   echo "Basic --system-wide mode test [Success]"
 }
 
+test_workload() {
+  echo "Basic target workload test"
+  if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null
+  then
+    echo "Workload record [Failed record]"
+    err=1
+    return
+  fi
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+  then
+    echo "Workload record [Failed missing output]"
+    err=1
+    return
+  fi
+  echo "Basic target workload test [Success]"
+}
+
 build_test_program
 
 test_per_thread
 test_register_capture
 test_system_wide
+test_workload
 
 cleanup
 exit $err
-- 
GitLab


From 7f4ed3f0b1fc951bcb91a78bf68ac45aac13ec0b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:42 -0700
Subject: [PATCH 0532/4122] perf test: Test record with --threads option

The --threads option changed the 'perf record' behavior significantly,
so it'd be nice if we test it separately.  Add --threads options with
different argument in each test supported and check the result.

Also update the cleanup routine because threads recording produces data
in a directory.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 30 ++++++++++++++++++++++++++++--
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index c59d1459c9603..01aa9531b3694 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -13,8 +13,8 @@ testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX)
 testsym="test_loop"
 
 cleanup() {
-  rm -f "${perfdata}"
-  rm -f "${perfdata}".old
+  rm -rf "${perfdata}"
+  rm -rf "${perfdata}".old
 
   if [ "${testprog}" != "true" ]; then
     rm -f "${testprog}"
@@ -171,6 +171,19 @@ test_system_wide() {
     err=1
     return
   fi
+  if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \
+    -o "${perfdata}" ${testprog} 2> /dev/null
+  then
+    echo "System-wide record [Failed record --threads option]"
+    err=1
+    return
+  fi
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+  then
+    echo "System-wide record [Failed --threads missing output]"
+    err=1
+    return
+  fi
   echo "Basic --system-wide mode test [Success]"
 }
 
@@ -188,6 +201,19 @@ test_workload() {
     err=1
     return
   fi
+  if ! perf record -e cpu-clock,cs --threads=package \
+    -o "${perfdata}" ${testprog} 2> /dev/null
+  then
+    echo "Workload record [Failed record --threads option]"
+    err=1
+    return
+  fi
+  if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
+  then
+    echo "Workload record [Failed --threads missing output]"
+    err=1
+    return
+  fi
   echo "Basic target workload test [Success]"
 }
 
-- 
GitLab


From 8b380e6afd124d18cd51a43d2505e4eb05e2ba09 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 20 Oct 2022 10:26:43 -0700
Subject: [PATCH 0533/4122] perf test: Do not set TEST_SKIP for record subtests

It now has 4 sub tests and at least one of them should run.

But once the TEST_SKIP (= 2) return value is set, it won't be
overwritten unless there's a failure.  I think we should return success
when one or more tests are skipped but the remaining subtests are
passed.

So update the test code not to set the err variable when it skips
the test.

Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221020172643.3458767-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 01aa9531b3694..e93b3a8871fe3 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -81,10 +81,6 @@ test_per_thread() {
   if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
   then
     echo "Per-thread record [Skipped event not supported]"
-    if [ $err -ne 1 ]
-    then
-      err=2
-    fi
     return
   fi
   if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null
@@ -131,10 +127,6 @@ test_register_capture() {
   if ! perf list | grep -q 'br_inst_retired.near_call'
   then
     echo "Register capture test [Skipped missing event]"
-    if [ $err -ne 1 ]
-    then
-      err=2
-    fi
     return
   fi
   if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15'
@@ -159,10 +151,6 @@ test_system_wide() {
   if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null
   then
     echo "System-wide record [Skipped not supported]"
-    if [ $err -ne 1 ]
-    then
-      err=2
-    fi
     return
   fi
   if ! perf report -i "${perfdata}" -q | grep -q "${testsym}"
-- 
GitLab


From 65319890c32db29fb56b41f84265a2c7029943f4 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 18 Oct 2022 10:41:35 +0100
Subject: [PATCH 0534/4122] perf tools: Fix "kernel lock contention analysis"
 test by not printing warnings in quiet mode

Especially when CONFIG_LOCKDEP and other debug configs are enabled,
Perf can print the following warning when running the "kernel lock
contention analysis" test:

  Warning:
  Processed 1378918 events and lost 4 chunks!

  Check IO/CPU overload!

  Warning:
  Processed 4593325 samples and lost 70.00%!

The test already supplies -q to run in quiet mode, so extend quiet mode
to perf_stdio__warning() and also ui__warning() for consistency.

This fixes the following failure due to the extra lines counted:

  perf test "lock cont" -vvv

  82: kernel lock contention analysis test                            :
  --- start ---
  test child forked, pid 3125
  Testing perf lock record and perf lock contention
  [Fail] Recorded result count is not 1: 9
  test child finished with -1
  ---- end ----
  kernel lock contention analysis test: FAILED!

Fixes: ec685de25b6718f8 ("perf test: Add kernel lock contention test")
Signed-off-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221018094137.783081-2-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/util.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c
index 689b27c34246c..1d38ddf01b604 100644
--- a/tools/perf/ui/util.c
+++ b/tools/perf/ui/util.c
@@ -15,6 +15,9 @@ static int perf_stdio__error(const char *format, va_list args)
 
 static int perf_stdio__warning(const char *format, va_list args)
 {
+	if (quiet)
+		return 0;
+
 	fprintf(stderr, "Warning:\n");
 	vfprintf(stderr, format, args);
 	return 0;
@@ -45,6 +48,8 @@ int ui__warning(const char *format, ...)
 {
 	int ret;
 	va_list args;
+	if (quiet)
+		return 0;
 
 	va_start(args, format);
 	ret = perf_eops->warning(format, args);
-- 
GitLab


From a527c2c1e2d43e9f145f5d0c5d6ac0bdf5220e22 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 18 Oct 2022 10:41:36 +0100
Subject: [PATCH 0535/4122] perf tools: Make quiet mode consistent between
 tools

Use the global quiet variable everywhere so that all tools hide warnings
in quiet mode and update the documentation to reflect this.

'perf probe' claimed that errors are not printed in quiet mode but I
don't see this so remove it from the docs.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221018094137.783081-3-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-annotate.txt | 2 +-
 tools/perf/Documentation/perf-diff.txt     | 2 +-
 tools/perf/Documentation/perf-lock.txt     | 2 +-
 tools/perf/Documentation/perf-probe.txt    | 2 +-
 tools/perf/Documentation/perf-record.txt   | 2 +-
 tools/perf/Documentation/perf-report.txt   | 2 +-
 tools/perf/Documentation/perf-stat.txt     | 4 ++--
 tools/perf/bench/numa.c                    | 9 +++++----
 tools/perf/builtin-annotate.c              | 2 +-
 tools/perf/builtin-diff.c                  | 2 +-
 tools/perf/builtin-lock.c                  | 2 +-
 tools/perf/builtin-probe.c                 | 7 +++----
 tools/perf/builtin-record.c                | 2 +-
 tools/perf/builtin-report.c                | 2 +-
 tools/perf/builtin-stat.c                  | 8 ++++----
 tools/perf/util/stat.h                     | 1 -
 16 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index 18fcc52809fbf..980fe2c292752 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -41,7 +41,7 @@ OPTIONS
 
 -q::
 --quiet::
-	Do not show any message.  (Suppress -v)
+	Do not show any warnings or messages.  (Suppress -v)
 
 -n::
 --show-nr-samples::
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index be65bd55ab2aa..f3067a4af2940 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -75,7 +75,7 @@ OPTIONS
 
 -q::
 --quiet::
-	Do not show any message.  (Suppress -v)
+	Do not show any warnings or messages.  (Suppress -v)
 
 -f::
 --force::
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 3b1e16563b795..4958a1ffa1cca 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -42,7 +42,7 @@ COMMON OPTIONS
 
 -q::
 --quiet::
-	Do not show any message. (Suppress -v)
+	Do not show any warnings or messages. (Suppress -v)
 
 -D::
 --dump-raw-trace::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 080981d38d7ba..7f8e8ba3a7872 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -57,7 +57,7 @@ OPTIONS
 
 -q::
 --quiet::
-	Be quiet (do not show any messages including errors).
+	Do not show any warnings or messages.
 	Can not use with -v.
 
 -a::
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index e41ae950fdc3b..9ea6d44aca58c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -282,7 +282,7 @@ OPTIONS
 
 -q::
 --quiet::
-	Don't print any message, useful for scripting.
+	Don't print any warnings or messages, useful for scripting.
 
 -v::
 --verbose::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4533db2ee56bb..4fa509b159489 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -27,7 +27,7 @@ OPTIONS
 
 -q::
 --quiet::
-	Do not show any message.  (Suppress -v)
+	Do not show any warnings or messages.  (Suppress -v)
 
 -n::
 --show-nr-samples::
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index d7ff1867feda6..18abdc1dce055 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -354,8 +354,8 @@ forbids the event merging logic from sharing events between groups and
 may be used to increase accuracy in this case.
 
 --quiet::
-Don't print output. This is useful with perf stat record below to only
-write data to the perf.data file.
+Don't print output, warnings or messages. This is useful with perf stat
+record below to only write data to the perf.data file.
 
 STAT RECORD
 -----------
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index e78dedf9e682c..9717c6c17433c 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -16,6 +16,7 @@
 #include <sched.h>
 #include <stdio.h>
 #include <assert.h>
+#include <debug.h>
 #include <malloc.h>
 #include <signal.h>
 #include <stdlib.h>
@@ -116,7 +117,6 @@ struct params {
 	long			bytes_thread;
 
 	int			nr_tasks;
-	bool			show_quiet;
 
 	bool			show_convergence;
 	bool			measure_convergence;
@@ -197,7 +197,8 @@ static const struct option options[] = {
 	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
 		    "convergence is reached when each process (all its threads) is running on a single NUMA node."),
 	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
-	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"quiet mode"),
+	OPT_BOOLEAN('q', "quiet"	, &quiet,
+		    "quiet mode (do not show any warnings or messages)"),
 	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
 
 	/* Special option string parsing callbacks: */
@@ -1474,7 +1475,7 @@ static int init(void)
 	/* char array in count_process_nodes(): */
 	BUG_ON(g->p.nr_nodes < 0);
 
-	if (g->p.show_quiet && !g->p.show_details)
+	if (quiet && !g->p.show_details)
 		g->p.show_details = -1;
 
 	/* Some memory should be specified: */
@@ -1553,7 +1554,7 @@ static void print_res(const char *name, double val,
 	if (!name)
 		name = "main,";
 
-	if (!g->p.show_quiet)
+	if (!quiet)
 		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
 	else
 		printf(" %14.3f %s\n", val, txt_long);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index f839e69492e80..517d928c00e3f 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -525,7 +525,7 @@ int cmd_annotate(int argc, const char **argv)
 	OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 #ifdef HAVE_GTK2_SUPPORT
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index d925096dd7f02..ed07cc6cca56c 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -1260,7 +1260,7 @@ static const char * const diff_usage[] = {
 static const struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
 	OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
 		    "Show only items with match in baseline"),
 	OPT_CALLBACK('c', "compute", &compute,
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 9722d4ab2e557..66520712a1675 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1869,7 +1869,7 @@ int cmd_lock(int argc, const char **argv)
 		   "file", "vmlinux pathname"),
 	OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name,
 		   "file", "kallsyms pathname"),
-	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
 	OPT_END()
 	};
 
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f62298f5db3b4..2ae50fc9e5976 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -40,7 +40,6 @@ static struct {
 	int command;	/* Command short_name */
 	bool list_events;
 	bool uprobes;
-	bool quiet;
 	bool target_used;
 	int nevents;
 	struct perf_probe_event events[MAX_PROBES];
@@ -514,8 +513,8 @@ __cmd_probe(int argc, const char **argv)
 	struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show parsed arguments, etc)"),
-	OPT_BOOLEAN('q', "quiet", &params.quiet,
-		    "be quiet (do not show any messages)"),
+	OPT_BOOLEAN('q', "quiet", &quiet,
+		    "be quiet (do not show any warnings or messages)"),
 	OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
 			     "list up probe events",
 			     opt_set_filter_with_command, DEFAULT_LIST_FILTER),
@@ -634,7 +633,7 @@ __cmd_probe(int argc, const char **argv)
 	if (ret)
 		return ret;
 
-	if (params.quiet) {
+	if (quiet) {
 		if (verbose != 0) {
 			pr_err("  Error: -v and -q are exclusive.\n");
 			return -EINVAL;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e128b855dddec..59f3d98a0196d 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3388,7 +3388,7 @@ static struct option __record_options[] = {
 		     &record_parse_callchain_opt),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show counter open errors, etc)"),
-	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"),
 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 		    "per thread counts"),
 	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 8361890176c23..b6d77d3da64f6 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1222,7 +1222,7 @@ int cmd_report(int argc, const char **argv)
 		    "input file name"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
-	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"),
+	OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
 	OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9d35a33389766..e52601a54b26d 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1033,7 +1033,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	/* Do not print anything if we record to the pipe. */
 	if (STAT_RECORD && perf_stat.data.is_pipe)
 		return;
-	if (stat_config.quiet)
+	if (quiet)
 		return;
 
 	evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
@@ -1283,8 +1283,8 @@ static struct option stat_options[] = {
 		       "print summary for interval mode"),
 	OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary,
 		       "don't print 'summary' for CSV summary output"),
-	OPT_BOOLEAN(0, "quiet", &stat_config.quiet,
-			"don't print output (useful with record)"),
+	OPT_BOOLEAN(0, "quiet", &quiet,
+			"don't print any output, messages or warnings (useful with record)"),
 	OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type",
 		     "Only enable events on applying cpu with this type "
 		     "for hybrid platform (e.g. core or atom)",
@@ -2383,7 +2383,7 @@ int cmd_stat(int argc, const char **argv)
 		goto out;
 	}
 
-	if (!output && !stat_config.quiet) {
+	if (!output && !quiet) {
 		struct timespec tm;
 		mode = append_file ? "a" : "w";
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 382a1ab92ce18..499c3bf813336 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -160,7 +160,6 @@ struct perf_stat_config {
 	bool			 metric_no_group;
 	bool			 metric_no_merge;
 	bool			 stop_read_counter;
-	bool			 quiet;
 	bool			 iostat_run;
 	char			 *user_requested_cpu_list;
 	bool			 system_wide;
-- 
GitLab


From cff624146450bd25a1acea0439b7654167e0f722 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Oct 2022 13:50:17 -0300
Subject: [PATCH 0536/4122] perf bpf: No need to include compiler.h when
 HAVE_LIBBPF_SUPPORT is true

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_map.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h
index d6abd5e47af8d..c2f7c13cba23f 100644
--- a/tools/perf/util/bpf_map.h
+++ b/tools/perf/util/bpf_map.h
@@ -3,7 +3,6 @@
 #define __PERF_BPF_MAP_H 1
 
 #include <stdio.h>
-#include <linux/compiler.h>
 struct bpf_map;
 
 #ifdef HAVE_LIBBPF_SUPPORT
@@ -12,6 +11,8 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp);
 
 #else
 
+#include <linux/compiler.h>
+
 static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused)
 {
 	return 0;
-- 
GitLab


From 5e9c68ea777594a2d63fa44c0509782e90821707 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 12 Oct 2022 01:18:40 +0200
Subject: [PATCH 0537/4122] RISC-V: Cache SBI vendor values

sbi_get_mvendorid(), sbi_get_marchid() and sbi_get_mimpid() might get
called multiple times, though the values of these CSRs should not change
during the runtime of a specific machine.

Though the values can be different depending on which hart of the system
they get called. So hook into the newly introduced cpuinfo struct to allow
retrieving these cached values via new functions.

Also use arch_initcall for the cpuinfo setup instead, as that now clearly
is "architecture specific initialization" and also makes these information
available slightly earlier.

[caching vendor ids]

Suggested-by: Atish Patra <atishp@atishpatra.org>
[using cpuinfo struct as cache]
Suggested-by: Anup Patel <apatel@ventanamicro.com>
Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/sbi.h |  5 +++++
 arch/riscv/kernel/cpu.c      | 30 +++++++++++++++++++++++++++---
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h
index 2a0ef738695ed..4ca7fbacff424 100644
--- a/arch/riscv/include/asm/sbi.h
+++ b/arch/riscv/include/asm/sbi.h
@@ -327,4 +327,9 @@ int sbi_err_map_linux_errno(int err);
 static inline int sbi_remote_fence_i(const struct cpumask *cpu_mask) { return -1; }
 static inline void sbi_init(void) {}
 #endif /* CONFIG_RISCV_SBI */
+
+unsigned long riscv_cached_mvendorid(unsigned int cpu_id);
+unsigned long riscv_cached_marchid(unsigned int cpu_id);
+unsigned long riscv_cached_mimpid(unsigned int cpu_id);
+
 #endif /* _ASM_RISCV_SBI_H */
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index fa427bdcf773d..bf9dd6764bad2 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -70,8 +70,6 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
 	return -1;
 }
 
-#ifdef CONFIG_PROC_FS
-
 struct riscv_cpuinfo {
 	unsigned long mvendorid;
 	unsigned long marchid;
@@ -79,6 +77,30 @@ struct riscv_cpuinfo {
 };
 static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
+unsigned long riscv_cached_mvendorid(unsigned int cpu_id)
+{
+	struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
+
+	return ci->mvendorid;
+}
+EXPORT_SYMBOL(riscv_cached_mvendorid);
+
+unsigned long riscv_cached_marchid(unsigned int cpu_id)
+{
+	struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
+
+	return ci->marchid;
+}
+EXPORT_SYMBOL(riscv_cached_marchid);
+
+unsigned long riscv_cached_mimpid(unsigned int cpu_id)
+{
+	struct riscv_cpuinfo *ci = per_cpu_ptr(&riscv_cpuinfo, cpu_id);
+
+	return ci->mimpid;
+}
+EXPORT_SYMBOL(riscv_cached_mimpid);
+
 static int riscv_cpuinfo_starting(unsigned int cpu)
 {
 	struct riscv_cpuinfo *ci = this_cpu_ptr(&riscv_cpuinfo);
@@ -113,7 +135,9 @@ static int __init riscv_cpuinfo_init(void)
 
 	return 0;
 }
-device_initcall(riscv_cpuinfo_init);
+arch_initcall(riscv_cpuinfo_init);
+
+#ifdef CONFIG_PROC_FS
 
 #define __RISCV_ISA_EXT_DATA(UPROP, EXTID) \
 	{							\
-- 
GitLab


From 65e9fb081877a18c432c6ff344937b7277c044b5 Mon Sep 17 00:00:00 2001
From: Heiko Stuebner <heiko@sntech.de>
Date: Wed, 12 Oct 2022 01:18:41 +0200
Subject: [PATCH 0538/4122] drivers/perf: riscv_pmu_sbi: add support for PMU
 variant on T-Head C9xx cores

With the T-HEAD C9XX cores being designed before or during the ratification
to the SSCOFPMF extension, it implements functionality very similar but
not equal to it.

It implements overflow handling and also some privilege-mode filtering.
While SSCOFPMF supports this for all modes, the C9XX only implements the
filtering for M-mode and S-mode but not user-mode.

So add some adaptions to allow the C9XX to still handle
its PMU through the regular SBI PMU interface instead of defining new
interfaces or drivers.

To work properly, this requires a matching change in SBI, though the actual
interface between kernel and SBI does not change.

The main differences are a the overflow CSR and irq number.

As the reading of the overflow-csr is in the hot-path during irq handling,
use an errata and alternatives to not introduce new conditionals there.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/all/20221011231841.2951264-2-heiko@sntech.de/
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig.erratas           | 13 +++++++++++
 arch/riscv/errata/thead/errata.c     | 19 ++++++++++++++++
 arch/riscv/include/asm/errata_list.h | 16 ++++++++++++-
 drivers/perf/riscv_pmu_sbi.c         | 34 ++++++++++++++++++++--------
 4 files changed, 71 insertions(+), 11 deletions(-)

diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas
index f3623df23b5fc..69621ae6d647a 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.erratas
@@ -66,4 +66,17 @@ config ERRATA_THEAD_CMO
 
 	  If you don't know what to do here, say "Y".
 
+config ERRATA_THEAD_PMU
+	bool "Apply T-Head PMU errata"
+	depends on ERRATA_THEAD && RISCV_PMU_SBI
+	default y
+	help
+	  The T-Head C9xx cores implement a PMU overflow extension very
+	  similar to the core SSCOFPMF extension.
+
+	  This will apply the overflow errata to handle the non-standard
+	  behaviour via the regular SBI PMU driver and interface.
+
+	  If you don't know what to do here, say "Y".
+
 endmenu # "CPU errata selection"
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 21546937db39b..fac5742d1c1e6 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -47,6 +47,22 @@ static bool errata_probe_cmo(unsigned int stage,
 	return true;
 }
 
+static bool errata_probe_pmu(unsigned int stage,
+			     unsigned long arch_id, unsigned long impid)
+{
+	if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PMU))
+		return false;
+
+	/* target-c9xx cores report arch_id and impid as 0 */
+	if (arch_id != 0 || impid != 0)
+		return false;
+
+	if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
+		return false;
+
+	return true;
+}
+
 static u32 thead_errata_probe(unsigned int stage,
 			      unsigned long archid, unsigned long impid)
 {
@@ -58,6 +74,9 @@ static u32 thead_errata_probe(unsigned int stage,
 	if (errata_probe_cmo(stage, archid, impid))
 		cpu_req_errata |= BIT(ERRATA_THEAD_CMO);
 
+	if (errata_probe_pmu(stage, archid, impid))
+		cpu_req_errata |= BIT(ERRATA_THEAD_PMU);
+
 	return cpu_req_errata;
 }
 
diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h
index 19a771085781a..4180312d2a701 100644
--- a/arch/riscv/include/asm/errata_list.h
+++ b/arch/riscv/include/asm/errata_list.h
@@ -6,6 +6,7 @@
 #define ASM_ERRATA_LIST_H
 
 #include <asm/alternative.h>
+#include <asm/csr.h>
 #include <asm/vendorid_list.h>
 
 #ifdef CONFIG_ERRATA_SIFIVE
@@ -17,7 +18,8 @@
 #ifdef CONFIG_ERRATA_THEAD
 #define	ERRATA_THEAD_PBMT 0
 #define	ERRATA_THEAD_CMO 1
-#define	ERRATA_THEAD_NUMBER 2
+#define	ERRATA_THEAD_PMU 2
+#define	ERRATA_THEAD_NUMBER 3
 #endif
 
 #define	CPUFEATURE_SVPBMT 0
@@ -142,6 +144,18 @@ asm volatile(ALTERNATIVE_2(						\
 	    "r"((unsigned long)(_start) + (_size))			\
 	: "a0")
 
+#define THEAD_C9XX_RV_IRQ_PMU			17
+#define THEAD_C9XX_CSR_SCOUNTEROF		0x5c5
+
+#define ALT_SBI_PMU_OVERFLOW(__ovl)					\
+asm volatile(ALTERNATIVE(						\
+	"csrr %0, " __stringify(CSR_SSCOUNTOVF),			\
+	"csrr %0, " __stringify(THEAD_C9XX_CSR_SCOUNTEROF),		\
+		THEAD_VENDOR_ID, ERRATA_THEAD_PMU,			\
+		CONFIG_ERRATA_THEAD_PMU)				\
+	: "=r" (__ovl) :						\
+	: "memory")
+
 #endif /* __ASSEMBLY__ */
 
 #endif
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 3852c18362f53..f6507efe2a584 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -20,6 +20,7 @@
 #include <linux/cpu_pm.h>
 #include <linux/sched/clock.h>
 
+#include <asm/errata_list.h>
 #include <asm/sbi.h>
 #include <asm/hwcap.h>
 
@@ -47,6 +48,8 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
  * per_cpu in case of harts with different pmu counters
  */
 static union sbi_pmu_ctr_info *pmu_ctr_list;
+static bool riscv_pmu_use_irq;
+static unsigned int riscv_pmu_irq_num;
 static unsigned int riscv_pmu_irq;
 
 struct sbi_pmu_event_data {
@@ -580,7 +583,7 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
 	fidx = find_first_bit(cpu_hw_evt->used_hw_ctrs, RISCV_MAX_COUNTERS);
 	event = cpu_hw_evt->events[fidx];
 	if (!event) {
-		csr_clear(CSR_SIP, SIP_LCOFIP);
+		csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
 		return IRQ_NONE;
 	}
 
@@ -588,13 +591,13 @@ static irqreturn_t pmu_sbi_ovf_handler(int irq, void *dev)
 	pmu_sbi_stop_hw_ctrs(pmu);
 
 	/* Overflow status register should only be read after counter are stopped */
-	overflow = csr_read(CSR_SSCOUNTOVF);
+	ALT_SBI_PMU_OVERFLOW(overflow);
 
 	/*
 	 * Overflow interrupt pending bit should only be cleared after stopping
 	 * all the counters to avoid any race condition.
 	 */
-	csr_clear(CSR_SIP, SIP_LCOFIP);
+	csr_clear(CSR_SIP, BIT(riscv_pmu_irq_num));
 
 	/* No overflow bit is set */
 	if (!overflow)
@@ -661,10 +664,10 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
 	/* Stop all the counters so that they can be enabled from perf */
 	pmu_sbi_stop_all(pmu);
 
-	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+	if (riscv_pmu_use_irq) {
 		cpu_hw_evt->irq = riscv_pmu_irq;
-		csr_clear(CSR_IP, BIT(RV_IRQ_PMU));
-		csr_set(CSR_IE, BIT(RV_IRQ_PMU));
+		csr_clear(CSR_IP, BIT(riscv_pmu_irq_num));
+		csr_set(CSR_IE, BIT(riscv_pmu_irq_num));
 		enable_percpu_irq(riscv_pmu_irq, IRQ_TYPE_NONE);
 	}
 
@@ -673,9 +676,9 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
 
 static int pmu_sbi_dying_cpu(unsigned int cpu, struct hlist_node *node)
 {
-	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+	if (riscv_pmu_use_irq) {
 		disable_percpu_irq(riscv_pmu_irq);
-		csr_clear(CSR_IE, BIT(RV_IRQ_PMU));
+		csr_clear(CSR_IE, BIT(riscv_pmu_irq_num));
 	}
 
 	/* Disable all counters access for user mode now */
@@ -691,7 +694,18 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
 	struct device_node *cpu, *child;
 	struct irq_domain *domain = NULL;
 
-	if (!riscv_isa_extension_available(NULL, SSCOFPMF))
+	if (riscv_isa_extension_available(NULL, SSCOFPMF)) {
+		riscv_pmu_irq_num = RV_IRQ_PMU;
+		riscv_pmu_use_irq = true;
+	} else if (IS_ENABLED(CONFIG_ERRATA_THEAD_PMU) &&
+		   riscv_cached_mvendorid(0) == THEAD_VENDOR_ID &&
+		   riscv_cached_marchid(0) == 0 &&
+		   riscv_cached_mimpid(0) == 0) {
+		riscv_pmu_irq_num = THEAD_C9XX_RV_IRQ_PMU;
+		riscv_pmu_use_irq = true;
+	}
+
+	if (!riscv_pmu_use_irq)
 		return -EOPNOTSUPP;
 
 	for_each_of_cpu_node(cpu) {
@@ -713,7 +727,7 @@ static int pmu_sbi_setup_irqs(struct riscv_pmu *pmu, struct platform_device *pde
 		return -ENODEV;
 	}
 
-	riscv_pmu_irq = irq_create_mapping(domain, RV_IRQ_PMU);
+	riscv_pmu_irq = irq_create_mapping(domain, riscv_pmu_irq_num);
 	if (!riscv_pmu_irq) {
 		pr_err("Failed to map PMU interrupt for node\n");
 		return -ENODEV;
-- 
GitLab


From 2348e6bf44213c5f447ff698e43c089185241ed7 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Tue, 18 Oct 2022 22:12:00 +0800
Subject: [PATCH 0539/4122] riscv: remove special treatment for the link order
 of head.o

arch/riscv/kernel/head.o does not need any special treatment - the only
requirement is the ".head.text" section must be placed before the
normal ".text" section.

The linker script does the right thing to do. The build system does
not need to manipulate the link order of head.o.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20221018141200.1040-1-jszhang@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 scripts/head-object-list.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/scripts/head-object-list.txt b/scripts/head-object-list.txt
index b16326a92c458..105ea7ac47511 100644
--- a/scripts/head-object-list.txt
+++ b/scripts/head-object-list.txt
@@ -39,7 +39,6 @@ arch/powerpc/kernel/entry_64.o
 arch/powerpc/kernel/fpu.o
 arch/powerpc/kernel/vector.o
 arch/powerpc/kernel/prom_init.o
-arch/riscv/kernel/head.o
 arch/s390/kernel/head64.o
 arch/sh/kernel/head_32.o
 arch/sparc/kernel/head_32.o
-- 
GitLab


From 28fc4e9077ce59ab28c89c20dc6be5154473218f Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Tue, 18 Oct 2022 10:45:32 +0800
Subject: [PATCH 0540/4122] f2fs: Fix the race condition of resize flag between
 resizefs

Because the set/clear SBI_IS_RESIZEFS flag not between any locks,
In the following case:
  thread1			thread2
   ->ioctl(resizefs)
    ->set RESIZEFS flag		 ->ioctl(resizefs)
    ...                   	  ->set RESIZEFS flag
    ->clear RESIZEFS flag
    				  ->resizefs stream
				    # No RESIZEFS flag in the stream

Also before freeze_super, the resizefs not started, we should not set
the SBI_IS_RESIZEFS flag.

So move the set/clear SBI_IS_RESIZEFS flag between the cp_mutex and
gc_lock.

Fixes: b4b10061ef98 ("f2fs: refactor resize_fs to avoid meta updates in progress")
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index dab794225cce7..7b4be412cec06 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -2134,8 +2134,6 @@ out_unlock:
 	if (err)
 		return err;
 
-	set_sbi_flag(sbi, SBI_IS_RESIZEFS);
-
 	freeze_super(sbi->sb);
 	f2fs_down_write(&sbi->gc_lock);
 	f2fs_down_write(&sbi->cp_global_sem);
@@ -2151,6 +2149,7 @@ out_unlock:
 	if (err)
 		goto out_err;
 
+	set_sbi_flag(sbi, SBI_IS_RESIZEFS);
 	err = free_segment_range(sbi, secs, false);
 	if (err)
 		goto recover_out;
@@ -2174,6 +2173,7 @@ out_unlock:
 		f2fs_commit_super(sbi, false);
 	}
 recover_out:
+	clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
 	if (err) {
 		set_sbi_flag(sbi, SBI_NEED_FSCK);
 		f2fs_err(sbi, "resize_fs failed, should run fsck to repair!");
@@ -2186,6 +2186,5 @@ out_err:
 	f2fs_up_write(&sbi->cp_global_sem);
 	f2fs_up_write(&sbi->gc_lock);
 	thaw_super(sbi->sb);
-	clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
 	return err;
 }
-- 
GitLab


From 299c481fa5c121f892420d97f1123a853b7f1079 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:39 +0000
Subject: [PATCH 0541/4122] crypto: rockchip - use dev_err for error message
 about interrupt

Interrupt is mandatory so the message should be printed as error.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 35d73061d1569..45cc5f766788c 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -371,8 +371,7 @@ static int rk_crypto_probe(struct platform_device *pdev)
 
 	crypto_info->irq = platform_get_irq(pdev, 0);
 	if (crypto_info->irq < 0) {
-		dev_warn(crypto_info->dev,
-			 "control Interrupt is not available.\n");
+		dev_err(&pdev->dev, "control Interrupt is not available.\n");
 		err = crypto_info->irq;
 		goto err_crypto;
 	}
-- 
GitLab


From 8ccd9c8cd1d1618f5e073c86ffcfe15f292eefe6 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:40 +0000
Subject: [PATCH 0542/4122] crypto: rockchip - do not use uninitialized
 variable

crypto_info->dev is not yet set, so use pdev->dev instead.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 45cc5f766788c..21d3f14585843 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -381,7 +381,7 @@ static int rk_crypto_probe(struct platform_device *pdev)
 			       "rk-crypto", pdev);
 
 	if (err) {
-		dev_err(crypto_info->dev, "irq request failed.\n");
+		dev_err(&pdev->dev, "irq request failed.\n");
 		goto err_crypto;
 	}
 
-- 
GitLab


From c50ef1411c8cbad0c7db100c477126076b6e3348 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:41 +0000
Subject: [PATCH 0543/4122] crypto: rockchip - do not do custom power
 management

The clock enable/disable at tfm init/exit is fragile,
if 2 tfm are init in the same time and one is removed just after,
it will leave the hardware uncloked even if a user remains.

Instead simply enable clocks at probe time.
We will do PM later.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c          | 4 ++--
 drivers/crypto/rockchip/rk3288_crypto.h          | 2 --
 drivers/crypto/rockchip/rk3288_crypto_ahash.c    | 3 +--
 drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 5 +++--
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 21d3f14585843..4cff49b829838 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -394,8 +394,7 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		     rk_crypto_done_task_cb, (unsigned long)crypto_info);
 	crypto_init_queue(&crypto_info->queue, 50);
 
-	crypto_info->enable_clk = rk_crypto_enable_clk;
-	crypto_info->disable_clk = rk_crypto_disable_clk;
+	rk_crypto_enable_clk(crypto_info);
 	crypto_info->load_data = rk_load_data;
 	crypto_info->unload_data = rk_unload_data;
 	crypto_info->enqueue = rk_crypto_enqueue;
@@ -422,6 +421,7 @@ static int rk_crypto_remove(struct platform_device *pdev)
 	struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
 
 	rk_crypto_unregister();
+	rk_crypto_disable_clk(crypto_tmp);
 	tasklet_kill(&crypto_tmp->done_task);
 	tasklet_kill(&crypto_tmp->queue_task);
 	return 0;
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 97278c2574ff9..2fa7131e40604 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -220,8 +220,6 @@ struct rk_crypto_info {
 	int (*start)(struct rk_crypto_info *dev);
 	int (*update)(struct rk_crypto_info *dev);
 	void (*complete)(struct crypto_async_request *base, int err);
-	int (*enable_clk)(struct rk_crypto_info *dev);
-	void (*disable_clk)(struct rk_crypto_info *dev);
 	int (*load_data)(struct rk_crypto_info *dev,
 			 struct scatterlist *sg_src,
 			 struct scatterlist *sg_dst);
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index ed03058497bc2..49017d1fb5104 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -301,7 +301,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 				 sizeof(struct rk_ahash_rctx) +
 				 crypto_ahash_reqsize(tctx->fallback_tfm));
 
-	return tctx->dev->enable_clk(tctx->dev);
+	return 0;
 }
 
 static void rk_cra_hash_exit(struct crypto_tfm *tfm)
@@ -309,7 +309,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
 
 	free_page((unsigned long)tctx->dev->addr_vir);
-	return tctx->dev->disable_clk(tctx->dev);
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 5bbf0d2722e11..8c44a19eab751 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -388,8 +388,10 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	ctx->dev->update = rk_ablk_rx;
 	ctx->dev->complete = rk_crypto_complete;
 	ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
+	if (!ctx->dev->addr_vir)
+		return -ENOMEM;
 
-	return ctx->dev->addr_vir ? ctx->dev->enable_clk(ctx->dev) : -ENOMEM;
+	return 0;
 }
 
 static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
@@ -397,7 +399,6 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	free_page((unsigned long)ctx->dev->addr_vir);
-	ctx->dev->disable_clk(ctx->dev);
 }
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
-- 
GitLab


From 6d11c9387865723fd779be00ae37a4588e60133d Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:42 +0000
Subject: [PATCH 0544/4122] crypto: rockchip - fix privete/private typo

This fix a simple typo on private word.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 2fa7131e40604..656d6795d4004 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -235,7 +235,7 @@ struct rk_ahash_ctx {
 	struct crypto_ahash		*fallback_tfm;
 };
 
-/* the privete variable of hash for fallback */
+/* the private variable of hash for fallback */
 struct rk_ahash_rctx {
 	struct ahash_request		fallback_req;
 	u32				mode;
-- 
GitLab


From 87e356c4966444866186f68f05832fdcc0f351a3 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:43 +0000
Subject: [PATCH 0545/4122] crypto: rockchip - do not store mode globally

Storing the mode globally does not work if 2 requests are handled in the
same time.
We should store it in a request context.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.h       |  5 +-
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 58 ++++++++++++-------
 2 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 656d6795d4004..c919d9a43a082 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -245,10 +245,13 @@ struct rk_ahash_rctx {
 struct rk_cipher_ctx {
 	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
-	u32				mode;
 	u8				iv[AES_BLOCK_SIZE];
 };
 
+struct rk_cipher_rctx {
+	u32				mode;
+};
+
 enum alg_type {
 	ALG_TYPE_HASH,
 	ALG_TYPE_CIPHER,
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 8c44a19eab751..bbd0bf52bf07e 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -76,9 +76,10 @@ static int rk_aes_ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_AES_ECB_MODE;
+	rctx->mode = RK_CRYPTO_AES_ECB_MODE;
 	return rk_handle_req(dev, req);
 }
 
@@ -86,9 +87,10 @@ static int rk_aes_ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
+	rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -96,9 +98,10 @@ static int rk_aes_cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_AES_CBC_MODE;
+	rctx->mode = RK_CRYPTO_AES_CBC_MODE;
 	return rk_handle_req(dev, req);
 }
 
@@ -106,9 +109,10 @@ static int rk_aes_cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
+	rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -116,9 +120,10 @@ static int rk_des_ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = 0;
+	rctx->mode = 0;
 	return rk_handle_req(dev, req);
 }
 
@@ -126,9 +131,10 @@ static int rk_des_ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_DEC;
+	rctx->mode = RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -136,9 +142,10 @@ static int rk_des_cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
+	rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
 	return rk_handle_req(dev, req);
 }
 
@@ -146,9 +153,10 @@ static int rk_des_cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
+	rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -156,9 +164,10 @@ static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_SELECT;
+	rctx->mode = RK_CRYPTO_TDES_SELECT;
 	return rk_handle_req(dev, req);
 }
 
@@ -166,9 +175,10 @@ static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
+	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
 
@@ -176,9 +186,10 @@ static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
+	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
 	return rk_handle_req(dev, req);
 }
 
@@ -186,9 +197,10 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *dev = ctx->dev;
 
-	ctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
+	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
 		    RK_CRYPTO_DEC;
 	return rk_handle_req(dev, req);
 }
@@ -199,6 +211,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 		skcipher_request_cast(dev->async_req);
 	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
 	u32 ivsize, block, conf_reg = 0;
 
@@ -206,22 +219,22 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 	ivsize = crypto_skcipher_ivsize(cipher);
 
 	if (block == DES_BLOCK_SIZE) {
-		ctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
+		rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
 			     RK_CRYPTO_TDES_BYTESWAP_KEY |
 			     RK_CRYPTO_TDES_BYTESWAP_IV;
-		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, ctx->mode);
+		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
 		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
 		conf_reg = RK_CRYPTO_DESSEL;
 	} else {
-		ctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
+		rctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
 			     RK_CRYPTO_AES_KEY_CHANGE |
 			     RK_CRYPTO_AES_BYTESWAP_KEY |
 			     RK_CRYPTO_AES_BYTESWAP_IV;
 		if (ctx->keylen == AES_KEYSIZE_192)
-			ctx->mode |= RK_CRYPTO_AES_192BIT_key;
+			rctx->mode |= RK_CRYPTO_AES_192BIT_key;
 		else if (ctx->keylen == AES_KEYSIZE_256)
-			ctx->mode |= RK_CRYPTO_AES_256BIT_key;
-		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, ctx->mode);
+			rctx->mode |= RK_CRYPTO_AES_256BIT_key;
+		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
 		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
 	}
 	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
@@ -246,6 +259,7 @@ static int rk_set_data_start(struct rk_crypto_info *dev)
 	struct skcipher_request *req =
 		skcipher_request_cast(dev->async_req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u32 ivsize = crypto_skcipher_ivsize(tfm);
 	u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
@@ -254,7 +268,7 @@ static int rk_set_data_start(struct rk_crypto_info *dev)
 	/* Store the iv that need to be updated in chain mode.
 	 * And update the IV buffer to contain the next IV for decryption mode.
 	 */
-	if (ctx->mode & RK_CRYPTO_DEC) {
+	if (rctx->mode & RK_CRYPTO_DEC) {
 		memcpy(ctx->iv, src_last_blk, ivsize);
 		sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv,
 				   ivsize, dev->total - ivsize);
@@ -294,11 +308,12 @@ static void rk_iv_copyback(struct rk_crypto_info *dev)
 	struct skcipher_request *req =
 		skcipher_request_cast(dev->async_req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u32 ivsize = crypto_skcipher_ivsize(tfm);
 
 	/* Update the IV buffer to contain the next IV for encryption mode. */
-	if (!(ctx->mode & RK_CRYPTO_DEC)) {
+	if (!(rctx->mode & RK_CRYPTO_DEC)) {
 		if (dev->aligned) {
 			memcpy(req->iv, sg_virt(dev->sg_dst) +
 				dev->sg_dst->length - ivsize, ivsize);
@@ -314,11 +329,12 @@ static void rk_update_iv(struct rk_crypto_info *dev)
 	struct skcipher_request *req =
 		skcipher_request_cast(dev->async_req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	u32 ivsize = crypto_skcipher_ivsize(tfm);
 	u8 *new_iv = NULL;
 
-	if (ctx->mode & RK_CRYPTO_DEC) {
+	if (rctx->mode & RK_CRYPTO_DEC) {
 		new_iv = ctx->iv;
 	} else {
 		new_iv = page_address(sg_page(dev->sg_dst)) +
-- 
GitLab


From 68ef8af09a1a912a5ed2cfaa4cca7606f52cef90 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:44 +0000
Subject: [PATCH 0546/4122] crypto: rockchip - add fallback for cipher

The hardware does not handle 0 size length request, let's add a
fallback.
Furthermore fallback will be used for all unaligned case the hardware
cannot handle.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig                        |  4 +
 drivers/crypto/rockchip/rk3288_crypto.h       |  2 +
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 97 ++++++++++++++++---
 3 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 55e75fbb658ee..113b35f695980 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -669,6 +669,10 @@ config CRYPTO_DEV_IMGTEC_HASH
 config CRYPTO_DEV_ROCKCHIP
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
+	depends on PM
+	select CRYPTO_ECB
+	select CRYPTO_CBC
+	select CRYPTO_DES
 	select CRYPTO_AES
 	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index c919d9a43a082..8b1e15d8ddc67 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -246,10 +246,12 @@ struct rk_cipher_ctx {
 	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
 	u8				iv[AES_BLOCK_SIZE];
+	struct crypto_skcipher *fallback_tfm;
 };
 
 struct rk_cipher_rctx {
 	u32				mode;
+	struct skcipher_request fallback_req;   // keep at the end
 };
 
 enum alg_type {
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index bbd0bf52bf07e..eac5bba66e257 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -13,6 +13,63 @@
 
 #define RK_CRYPTO_DEC			BIT(0)
 
+static int rk_cipher_need_fallback(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	struct scatterlist *sgs, *sgd;
+	unsigned int stodo, dtodo, len;
+
+	if (!req->cryptlen)
+		return true;
+
+	len = req->cryptlen;
+	sgs = req->src;
+	sgd = req->dst;
+	while (sgs && sgd) {
+		if (!IS_ALIGNED(sgs->offset, sizeof(u32))) {
+			return true;
+		}
+		if (!IS_ALIGNED(sgd->offset, sizeof(u32))) {
+			return true;
+		}
+		stodo = min(len, sgs->length);
+		if (stodo % bs) {
+			return true;
+		}
+		dtodo = min(len, sgd->length);
+		if (dtodo % bs) {
+			return true;
+		}
+		if (stodo != dtodo) {
+			return true;
+		}
+		len -= stodo;
+		sgs = sg_next(sgs);
+		sgd = sg_next(sgd);
+	}
+	return false;
+}
+
+static int rk_cipher_fallback(struct skcipher_request *areq)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+	struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
+	int err;
+
+	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+				      areq->base.complete, areq->base.data);
+	skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+				   areq->cryptlen, areq->iv);
+	if (rctx->mode & RK_CRYPTO_DEC)
+		err = crypto_skcipher_decrypt(&rctx->fallback_req);
+	else
+		err = crypto_skcipher_encrypt(&rctx->fallback_req);
+	return err;
+}
+
 static void rk_crypto_complete(struct crypto_async_request *base, int err)
 {
 	if (base->complete)
@@ -22,10 +79,10 @@ static void rk_crypto_complete(struct crypto_async_request *base, int err)
 static int rk_handle_req(struct rk_crypto_info *dev,
 			 struct skcipher_request *req)
 {
-	if (!IS_ALIGNED(req->cryptlen, dev->align_size))
-		return -EINVAL;
-	else
-		return dev->enqueue(dev, &req->base);
+	if (rk_cipher_need_fallback(req))
+		return rk_cipher_fallback(req);
+
+	return dev->enqueue(dev, &req->base);
 }
 
 static int rk_aes_setkey(struct crypto_skcipher *cipher,
@@ -39,7 +96,8 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher,
 		return -EINVAL;
 	ctx->keylen = keylen;
 	memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
-	return 0;
+
+	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
 
 static int rk_des_setkey(struct crypto_skcipher *cipher,
@@ -54,7 +112,8 @@ static int rk_des_setkey(struct crypto_skcipher *cipher,
 
 	ctx->keylen = keylen;
 	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
-	return 0;
+
+	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
 
 static int rk_tdes_setkey(struct crypto_skcipher *cipher,
@@ -69,7 +128,7 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
 
 	ctx->keylen = keylen;
 	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
-	return 0;
+	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
 
 static int rk_aes_ecb_encrypt(struct skcipher_request *req)
@@ -394,6 +453,7 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 {
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	const char *name = crypto_tfm_alg_name(&tfm->base);
 	struct rk_crypto_tmp *algt;
 
 	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
@@ -407,6 +467,16 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	if (!ctx->dev->addr_vir)
 		return -ENOMEM;
 
+	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+	if (IS_ERR(ctx->fallback_tfm)) {
+		dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+			name, PTR_ERR(ctx->fallback_tfm));
+		return PTR_ERR(ctx->fallback_tfm);
+	}
+
+	tfm->reqsize = sizeof(struct rk_cipher_rctx) +
+		crypto_skcipher_reqsize(ctx->fallback_tfm);
+
 	return 0;
 }
 
@@ -415,6 +485,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	free_page((unsigned long)ctx->dev->addr_vir);
+	crypto_free_skcipher(ctx->fallback_tfm);
 }
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
@@ -423,7 +494,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
 		.base.cra_name		= "ecb(aes)",
 		.base.cra_driver_name	= "ecb-aes-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= AES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x0f,
@@ -445,7 +516,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
 		.base.cra_name		= "cbc(aes)",
 		.base.cra_driver_name	= "cbc-aes-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= AES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x0f,
@@ -468,7 +539,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
 		.base.cra_name		= "ecb(des)",
 		.base.cra_driver_name	= "ecb-des-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= DES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x07,
@@ -490,7 +561,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
 		.base.cra_name		= "cbc(des)",
 		.base.cra_driver_name	= "cbc-des-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= DES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x07,
@@ -513,7 +584,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 		.base.cra_name		= "ecb(des3_ede)",
 		.base.cra_driver_name	= "ecb-des3-ede-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= DES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x07,
@@ -535,7 +606,7 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
 		.base.cra_name		= "cbc(des3_ede)",
 		.base.cra_driver_name	= "cbc-des3-ede-rk",
 		.base.cra_priority	= 300,
-		.base.cra_flags		= CRYPTO_ALG_ASYNC,
+		.base.cra_flags		= CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
 		.base.cra_blocksize	= DES_BLOCK_SIZE,
 		.base.cra_ctxsize	= sizeof(struct rk_cipher_ctx),
 		.base.cra_alignmask	= 0x07,
-- 
GitLab


From 816600485cb597b3ff7d6806a95a78512839f775 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:45 +0000
Subject: [PATCH 0547/4122] crypto: rockchip - add fallback for ahash

Adds a fallback for all case hardware cannot handle.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 49017d1fb5104..16009bb0bf160 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -16,6 +16,40 @@
  * so we put the fixed hash out when met zero message.
  */
 
+static bool rk_ahash_need_fallback(struct ahash_request *req)
+{
+	struct scatterlist *sg;
+
+	sg = req->src;
+	while (sg) {
+		if (!IS_ALIGNED(sg->offset, sizeof(u32))) {
+			return true;
+		}
+		if (sg->length % 4) {
+			return true;
+		}
+		sg = sg_next(sg);
+	}
+	return false;
+}
+
+static int rk_ahash_digest_fb(struct ahash_request *areq)
+{
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm);
+
+	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
+	rctx->fallback_req.base.flags = areq->base.flags &
+					CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	rctx->fallback_req.nbytes = areq->nbytes;
+	rctx->fallback_req.src = areq->src;
+	rctx->fallback_req.result = areq->result;
+
+	return crypto_ahash_digest(&rctx->fallback_req);
+}
+
 static int zero_message_process(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
@@ -167,6 +201,9 @@ static int rk_ahash_digest(struct ahash_request *req)
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
 	struct rk_crypto_info *dev = tctx->dev;
 
+	if (rk_ahash_need_fallback(req))
+		return rk_ahash_digest_fb(req);
+
 	if (!req->nbytes)
 		return zero_message_process(req);
 	else
@@ -309,6 +346,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
 
 	free_page((unsigned long)tctx->dev->addr_vir);
+	crypto_free_ahash(tctx->fallback_tfm);
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
-- 
GitLab


From d6b23ccef82816050c2fd458c9dabfa0e0af09b9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:46 +0000
Subject: [PATCH 0548/4122] crypto: rockchip - better handle cipher key

The key should not be set in hardware too much in advance, this will
fail it 2 TFM with different keys generate alternative requests.
The key should be stored and used just before doing cipher operations.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.h          |  1 +
 drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 8b1e15d8ddc67..540b81a14b9be 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -245,6 +245,7 @@ struct rk_ahash_rctx {
 struct rk_cipher_ctx {
 	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
+	u8				key[AES_MAX_KEY_SIZE];
 	u8				iv[AES_BLOCK_SIZE];
 	struct crypto_skcipher *fallback_tfm;
 };
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index eac5bba66e257..1ef94f8db2c5c 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -95,7 +95,7 @@ static int rk_aes_setkey(struct crypto_skcipher *cipher,
 	    keylen != AES_KEYSIZE_256)
 		return -EINVAL;
 	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, key, keylen);
+	memcpy(ctx->key, key, keylen);
 
 	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
@@ -111,7 +111,7 @@ static int rk_des_setkey(struct crypto_skcipher *cipher,
 		return err;
 
 	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+	memcpy(ctx->key, key, keylen);
 
 	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
@@ -127,7 +127,8 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
 		return err;
 
 	ctx->keylen = keylen;
-	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
+	memcpy(ctx->key, key, keylen);
+
 	return crypto_skcipher_setkey(ctx->fallback_tfm, key, keylen);
 }
 
@@ -283,6 +284,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 			     RK_CRYPTO_TDES_BYTESWAP_IV;
 		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
 		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
+		memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
 		conf_reg = RK_CRYPTO_DESSEL;
 	} else {
 		rctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
@@ -295,6 +297,7 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 			rctx->mode |= RK_CRYPTO_AES_256BIT_key;
 		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
 		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
+		memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
 	}
 	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
 		    RK_CRYPTO_BYTESWAP_BRFIFO;
@@ -484,6 +487,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 {
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
+	memzero_explicit(ctx->key, ctx->keylen);
 	free_page((unsigned long)ctx->dev->addr_vir);
 	crypto_free_skcipher(ctx->fallback_tfm);
 }
-- 
GitLab


From bb3c7b73363c9a149b12b74c44ae94b73a8fddf8 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:47 +0000
Subject: [PATCH 0549/4122] crypto: rockchip - remove non-aligned handling

Now driver have fallback for un-aligned cases, remove all code handling
those cases.

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 69 +++++--------------
 drivers/crypto/rockchip/rk3288_crypto.h       |  4 --
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 22 ++----
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 39 +++--------
 4 files changed, 31 insertions(+), 103 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 4cff49b829838..b3db096e2ec29 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -88,63 +88,26 @@ static int rk_load_data(struct rk_crypto_info *dev,
 {
 	unsigned int count;
 
-	dev->aligned = dev->aligned ?
-		check_alignment(sg_src, sg_dst, dev->align_size) :
-		dev->aligned;
-	if (dev->aligned) {
-		count = min(dev->left_bytes, sg_src->length);
-		dev->left_bytes -= count;
-
-		if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) {
-			dev_err(dev->dev, "[%s:%d] dma_map_sg(src)  error\n",
+	count = min(dev->left_bytes, sg_src->length);
+	dev->left_bytes -= count;
+
+	if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) {
+		dev_err(dev->dev, "[%s:%d] dma_map_sg(src)  error\n",
 				__func__, __LINE__);
-			return -EINVAL;
-		}
-		dev->addr_in = sg_dma_address(sg_src);
+		return -EINVAL;
+	}
+	dev->addr_in = sg_dma_address(sg_src);
 
-		if (sg_dst) {
-			if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) {
-				dev_err(dev->dev,
+	if (sg_dst) {
+		if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) {
+			dev_err(dev->dev,
 					"[%s:%d] dma_map_sg(dst)  error\n",
 					__func__, __LINE__);
-				dma_unmap_sg(dev->dev, sg_src, 1,
-					     DMA_TO_DEVICE);
-				return -EINVAL;
-			}
-			dev->addr_out = sg_dma_address(sg_dst);
-		}
-	} else {
-		count = (dev->left_bytes > PAGE_SIZE) ?
-			PAGE_SIZE : dev->left_bytes;
-
-		if (!sg_pcopy_to_buffer(dev->first, dev->src_nents,
-					dev->addr_vir, count,
-					dev->total - dev->left_bytes)) {
-			dev_err(dev->dev, "[%s:%d] pcopy err\n",
-				__func__, __LINE__);
+			dma_unmap_sg(dev->dev, sg_src, 1,
+					DMA_TO_DEVICE);
 			return -EINVAL;
 		}
-		dev->left_bytes -= count;
-		sg_init_one(&dev->sg_tmp, dev->addr_vir, count);
-		if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1, DMA_TO_DEVICE)) {
-			dev_err(dev->dev, "[%s:%d] dma_map_sg(sg_tmp)  error\n",
-				__func__, __LINE__);
-			return -ENOMEM;
-		}
-		dev->addr_in = sg_dma_address(&dev->sg_tmp);
-
-		if (sg_dst) {
-			if (!dma_map_sg(dev->dev, &dev->sg_tmp, 1,
-					DMA_FROM_DEVICE)) {
-				dev_err(dev->dev,
-					"[%s:%d] dma_map_sg(sg_tmp)  error\n",
-					__func__, __LINE__);
-				dma_unmap_sg(dev->dev, &dev->sg_tmp, 1,
-					     DMA_TO_DEVICE);
-				return -ENOMEM;
-			}
-			dev->addr_out = sg_dma_address(&dev->sg_tmp);
-		}
+		dev->addr_out = sg_dma_address(sg_dst);
 	}
 	dev->count = count;
 	return 0;
@@ -154,11 +117,11 @@ static void rk_unload_data(struct rk_crypto_info *dev)
 {
 	struct scatterlist *sg_in, *sg_out;
 
-	sg_in = dev->aligned ? dev->sg_src : &dev->sg_tmp;
+	sg_in = dev->sg_src;
 	dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE);
 
 	if (dev->sg_dst) {
-		sg_out = dev->aligned ? dev->sg_dst : &dev->sg_tmp;
+		sg_out = dev->sg_dst;
 		dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE);
 	}
 }
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 540b81a14b9be..a7de5738f6dc4 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -204,12 +204,8 @@ struct rk_crypto_info {
 	/* the public variable */
 	struct scatterlist		*sg_src;
 	struct scatterlist		*sg_dst;
-	struct scatterlist		sg_tmp;
 	struct scatterlist		*first;
 	unsigned int			left_bytes;
-	void				*addr_vir;
-	int				aligned;
-	int				align_size;
 	size_t				src_nents;
 	size_t				dst_nents;
 	unsigned int			total;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 16009bb0bf160..c762e462eb570 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -236,8 +236,6 @@ static int rk_ahash_start(struct rk_crypto_info *dev)
 
 	dev->total = req->nbytes;
 	dev->left_bytes = req->nbytes;
-	dev->aligned = 0;
-	dev->align_size = 4;
 	dev->sg_dst = NULL;
 	dev->sg_src = req->src;
 	dev->first = req->src;
@@ -272,15 +270,13 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
 
 	dev->unload_data(dev);
 	if (dev->left_bytes) {
-		if (dev->aligned) {
-			if (sg_is_last(dev->sg_src)) {
-				dev_warn(dev->dev, "[%s:%d], Lack of data\n",
-					 __func__, __LINE__);
-				err = -ENOMEM;
-				goto out_rx;
-			}
-			dev->sg_src = sg_next(dev->sg_src);
+		if (sg_is_last(dev->sg_src)) {
+			dev_warn(dev->dev, "[%s:%d], Lack of data\n",
+					__func__, __LINE__);
+			err = -ENOMEM;
+			goto out_rx;
 		}
+		dev->sg_src = sg_next(dev->sg_src);
 		err = rk_ahash_set_data_start(dev);
 	} else {
 		/*
@@ -318,11 +314,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 	algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 
 	tctx->dev = algt->dev;
-	tctx->dev->addr_vir = (void *)__get_free_page(GFP_KERNEL);
-	if (!tctx->dev->addr_vir) {
-		dev_err(tctx->dev->dev, "failed to kmalloc for addr_vir\n");
-		return -ENOMEM;
-	}
 	tctx->dev->start = rk_ahash_start;
 	tctx->dev->update = rk_ahash_crypto_rx;
 	tctx->dev->complete = rk_ahash_crypto_complete;
@@ -345,7 +336,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 {
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
 
-	free_page((unsigned long)tctx->dev->addr_vir);
 	crypto_free_ahash(tctx->fallback_tfm);
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 1ef94f8db2c5c..d067b7f091659 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -356,7 +356,6 @@ static int rk_ablk_start(struct rk_crypto_info *dev)
 	dev->src_nents = sg_nents(req->src);
 	dev->sg_dst = req->dst;
 	dev->dst_nents = sg_nents(req->dst);
-	dev->aligned = 1;
 
 	spin_lock_irqsave(&dev->lock, flags);
 	rk_ablk_hw_init(dev);
@@ -376,13 +375,9 @@ static void rk_iv_copyback(struct rk_crypto_info *dev)
 
 	/* Update the IV buffer to contain the next IV for encryption mode. */
 	if (!(rctx->mode & RK_CRYPTO_DEC)) {
-		if (dev->aligned) {
-			memcpy(req->iv, sg_virt(dev->sg_dst) +
-				dev->sg_dst->length - ivsize, ivsize);
-		} else {
-			memcpy(req->iv, dev->addr_vir +
-				dev->count - ivsize, ivsize);
-		}
+		memcpy(req->iv,
+		       sg_virt(dev->sg_dst) + dev->sg_dst->length - ivsize,
+		       ivsize);
 	}
 }
 
@@ -420,27 +415,16 @@ static int rk_ablk_rx(struct rk_crypto_info *dev)
 		skcipher_request_cast(dev->async_req);
 
 	dev->unload_data(dev);
-	if (!dev->aligned) {
-		if (!sg_pcopy_from_buffer(req->dst, dev->dst_nents,
-					  dev->addr_vir, dev->count,
-					  dev->total - dev->left_bytes -
-					  dev->count)) {
-			err = -EINVAL;
-			goto out_rx;
-		}
-	}
 	if (dev->left_bytes) {
 		rk_update_iv(dev);
-		if (dev->aligned) {
-			if (sg_is_last(dev->sg_src)) {
-				dev_err(dev->dev, "[%s:%d] Lack of data\n",
+		if (sg_is_last(dev->sg_src)) {
+			dev_err(dev->dev, "[%s:%d] Lack of data\n",
 					__func__, __LINE__);
-				err = -ENOMEM;
-				goto out_rx;
-			}
-			dev->sg_src = sg_next(dev->sg_src);
-			dev->sg_dst = sg_next(dev->sg_dst);
+			err = -ENOMEM;
+			goto out_rx;
 		}
+		dev->sg_src = sg_next(dev->sg_src);
+		dev->sg_dst = sg_next(dev->sg_dst);
 		err = rk_set_data_start(dev);
 	} else {
 		rk_iv_copyback(dev);
@@ -462,13 +446,9 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 
 	ctx->dev = algt->dev;
-	ctx->dev->align_size = crypto_tfm_alg_alignmask(crypto_skcipher_tfm(tfm)) + 1;
 	ctx->dev->start = rk_ablk_start;
 	ctx->dev->update = rk_ablk_rx;
 	ctx->dev->complete = rk_crypto_complete;
-	ctx->dev->addr_vir = (char *)__get_free_page(GFP_KERNEL);
-	if (!ctx->dev->addr_vir)
-		return -ENOMEM;
 
 	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback_tfm)) {
@@ -488,7 +468,6 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
 	memzero_explicit(ctx->key, ctx->keylen);
-	free_page((unsigned long)ctx->dev->addr_vir);
 	crypto_free_skcipher(ctx->fallback_tfm);
 }
 
-- 
GitLab


From 57d67c6e8219b2a034c16d6149e30fb40fd39935 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:48 +0000
Subject: [PATCH 0550/4122] crypto: rockchip - rework by using crypto_engine

Instead of doing manual queue management, let's use the crypto/engine
for that.
In the same time, rework the requests handling to be easier to
understand (and fix all bugs related to them).

Fixes: ce0183cb6464b ("crypto: rockchip - switch to skcipher API")
Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig                        |   1 +
 drivers/crypto/rockchip/rk3288_crypto.c       | 152 +----------
 drivers/crypto/rockchip/rk3288_crypto.h       |  39 +--
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 144 +++++-----
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 250 +++++++++---------
 5 files changed, 221 insertions(+), 365 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 113b35f695980..c30b5a39c2ac2 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -674,6 +674,7 @@ config CRYPTO_DEV_ROCKCHIP
 	select CRYPTO_CBC
 	select CRYPTO_DES
 	select CRYPTO_AES
+	select CRYPTO_ENGINE
 	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index b3db096e2ec29..1afb65eee6c9f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -65,149 +65,24 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
 	clk_disable_unprepare(dev->sclk);
 }
 
-static int check_alignment(struct scatterlist *sg_src,
-			   struct scatterlist *sg_dst,
-			   int align_mask)
-{
-	int in, out, align;
-
-	in = IS_ALIGNED((uint32_t)sg_src->offset, 4) &&
-	     IS_ALIGNED((uint32_t)sg_src->length, align_mask);
-	if (!sg_dst)
-		return in;
-	out = IS_ALIGNED((uint32_t)sg_dst->offset, 4) &&
-	      IS_ALIGNED((uint32_t)sg_dst->length, align_mask);
-	align = in && out;
-
-	return (align && (sg_src->length == sg_dst->length));
-}
-
-static int rk_load_data(struct rk_crypto_info *dev,
-			struct scatterlist *sg_src,
-			struct scatterlist *sg_dst)
-{
-	unsigned int count;
-
-	count = min(dev->left_bytes, sg_src->length);
-	dev->left_bytes -= count;
-
-	if (!dma_map_sg(dev->dev, sg_src, 1, DMA_TO_DEVICE)) {
-		dev_err(dev->dev, "[%s:%d] dma_map_sg(src)  error\n",
-				__func__, __LINE__);
-		return -EINVAL;
-	}
-	dev->addr_in = sg_dma_address(sg_src);
-
-	if (sg_dst) {
-		if (!dma_map_sg(dev->dev, sg_dst, 1, DMA_FROM_DEVICE)) {
-			dev_err(dev->dev,
-					"[%s:%d] dma_map_sg(dst)  error\n",
-					__func__, __LINE__);
-			dma_unmap_sg(dev->dev, sg_src, 1,
-					DMA_TO_DEVICE);
-			return -EINVAL;
-		}
-		dev->addr_out = sg_dma_address(sg_dst);
-	}
-	dev->count = count;
-	return 0;
-}
-
-static void rk_unload_data(struct rk_crypto_info *dev)
-{
-	struct scatterlist *sg_in, *sg_out;
-
-	sg_in = dev->sg_src;
-	dma_unmap_sg(dev->dev, sg_in, 1, DMA_TO_DEVICE);
-
-	if (dev->sg_dst) {
-		sg_out = dev->sg_dst;
-		dma_unmap_sg(dev->dev, sg_out, 1, DMA_FROM_DEVICE);
-	}
-}
-
 static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
 {
 	struct rk_crypto_info *dev  = platform_get_drvdata(dev_id);
 	u32 interrupt_status;
 
-	spin_lock(&dev->lock);
 	interrupt_status = CRYPTO_READ(dev, RK_CRYPTO_INTSTS);
 	CRYPTO_WRITE(dev, RK_CRYPTO_INTSTS, interrupt_status);
 
+	dev->status = 1;
 	if (interrupt_status & 0x0a) {
 		dev_warn(dev->dev, "DMA Error\n");
-		dev->err = -EFAULT;
+		dev->status = 0;
 	}
-	tasklet_schedule(&dev->done_task);
+	complete(&dev->complete);
 
-	spin_unlock(&dev->lock);
 	return IRQ_HANDLED;
 }
 
-static int rk_crypto_enqueue(struct rk_crypto_info *dev,
-			      struct crypto_async_request *async_req)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&dev->lock, flags);
-	ret = crypto_enqueue_request(&dev->queue, async_req);
-	if (dev->busy) {
-		spin_unlock_irqrestore(&dev->lock, flags);
-		return ret;
-	}
-	dev->busy = true;
-	spin_unlock_irqrestore(&dev->lock, flags);
-	tasklet_schedule(&dev->queue_task);
-
-	return ret;
-}
-
-static void rk_crypto_queue_task_cb(unsigned long data)
-{
-	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
-	struct crypto_async_request *async_req, *backlog;
-	unsigned long flags;
-	int err = 0;
-
-	dev->err = 0;
-	spin_lock_irqsave(&dev->lock, flags);
-	backlog   = crypto_get_backlog(&dev->queue);
-	async_req = crypto_dequeue_request(&dev->queue);
-
-	if (!async_req) {
-		dev->busy = false;
-		spin_unlock_irqrestore(&dev->lock, flags);
-		return;
-	}
-	spin_unlock_irqrestore(&dev->lock, flags);
-
-	if (backlog) {
-		backlog->complete(backlog, -EINPROGRESS);
-		backlog = NULL;
-	}
-
-	dev->async_req = async_req;
-	err = dev->start(dev);
-	if (err)
-		dev->complete(dev->async_req, err);
-}
-
-static void rk_crypto_done_task_cb(unsigned long data)
-{
-	struct rk_crypto_info *dev = (struct rk_crypto_info *)data;
-
-	if (dev->err) {
-		dev->complete(dev->async_req, dev->err);
-		return;
-	}
-
-	dev->err = dev->update(dev);
-	if (dev->err)
-		dev->complete(dev->async_req, dev->err);
-}
-
 static struct rk_crypto_tmp *rk_cipher_algs[] = {
 	&rk_ecb_aes_alg,
 	&rk_cbc_aes_alg,
@@ -300,8 +175,6 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	if (err)
 		goto err_crypto;
 
-	spin_lock_init(&crypto_info->lock);
-
 	crypto_info->reg = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(crypto_info->reg)) {
 		err = PTR_ERR(crypto_info->reg);
@@ -351,17 +224,11 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	crypto_info->dev = &pdev->dev;
 	platform_set_drvdata(pdev, crypto_info);
 
-	tasklet_init(&crypto_info->queue_task,
-		     rk_crypto_queue_task_cb, (unsigned long)crypto_info);
-	tasklet_init(&crypto_info->done_task,
-		     rk_crypto_done_task_cb, (unsigned long)crypto_info);
-	crypto_init_queue(&crypto_info->queue, 50);
+	crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
+	crypto_engine_start(crypto_info->engine);
+	init_completion(&crypto_info->complete);
 
 	rk_crypto_enable_clk(crypto_info);
-	crypto_info->load_data = rk_load_data;
-	crypto_info->unload_data = rk_unload_data;
-	crypto_info->enqueue = rk_crypto_enqueue;
-	crypto_info->busy = false;
 
 	err = rk_crypto_register(crypto_info);
 	if (err) {
@@ -373,9 +240,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	return 0;
 
 err_register_alg:
-	tasklet_kill(&crypto_info->queue_task);
-	tasklet_kill(&crypto_info->done_task);
+	crypto_engine_exit(crypto_info->engine);
 err_crypto:
+	dev_err(dev, "Crypto Accelerator not successfully registered\n");
 	return err;
 }
 
@@ -385,8 +252,7 @@ static int rk_crypto_remove(struct platform_device *pdev)
 
 	rk_crypto_unregister();
 	rk_crypto_disable_clk(crypto_tmp);
-	tasklet_kill(&crypto_tmp->done_task);
-	tasklet_kill(&crypto_tmp->queue_task);
+	crypto_engine_exit(crypto_tmp->engine);
 	return 0;
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index a7de5738f6dc4..65ed645e01687 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -5,9 +5,11 @@
 #include <crypto/aes.h>
 #include <crypto/internal/des.h>
 #include <crypto/algapi.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
+#include <crypto/engine.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 
@@ -193,39 +195,15 @@ struct rk_crypto_info {
 	struct reset_control		*rst;
 	void __iomem			*reg;
 	int				irq;
-	struct crypto_queue		queue;
-	struct tasklet_struct		queue_task;
-	struct tasklet_struct		done_task;
-	struct crypto_async_request	*async_req;
-	int 				err;
-	/* device lock */
-	spinlock_t			lock;
-
-	/* the public variable */
-	struct scatterlist		*sg_src;
-	struct scatterlist		*sg_dst;
-	struct scatterlist		*first;
-	unsigned int			left_bytes;
-	size_t				src_nents;
-	size_t				dst_nents;
-	unsigned int			total;
-	unsigned int			count;
-	dma_addr_t			addr_in;
-	dma_addr_t			addr_out;
-	bool				busy;
-	int (*start)(struct rk_crypto_info *dev);
-	int (*update)(struct rk_crypto_info *dev);
-	void (*complete)(struct crypto_async_request *base, int err);
-	int (*load_data)(struct rk_crypto_info *dev,
-			 struct scatterlist *sg_src,
-			 struct scatterlist *sg_dst);
-	void (*unload_data)(struct rk_crypto_info *dev);
-	int (*enqueue)(struct rk_crypto_info *dev,
-		       struct crypto_async_request *async_req);
+
+	struct crypto_engine *engine;
+	struct completion complete;
+	int status;
 };
 
 /* the private variable of hash */
 struct rk_ahash_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct rk_crypto_info		*dev;
 	/* for fallback */
 	struct crypto_ahash		*fallback_tfm;
@@ -235,10 +213,12 @@ struct rk_ahash_ctx {
 struct rk_ahash_rctx {
 	struct ahash_request		fallback_req;
 	u32				mode;
+	int nrsg;
 };
 
 /* the private variable of cipher */
 struct rk_cipher_ctx {
+	struct crypto_engine_ctx enginectx;
 	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
 	u8				key[AES_MAX_KEY_SIZE];
@@ -247,6 +227,7 @@ struct rk_cipher_ctx {
 };
 
 struct rk_cipher_rctx {
+	u8 backup_iv[AES_BLOCK_SIZE];
 	u32				mode;
 	struct skcipher_request fallback_req;   // keep at the end
 };
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index c762e462eb570..edd40e16a3f0a 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -9,6 +9,7 @@
  * Some ideas are from marvell/cesa.c and s5p-sss.c driver.
  */
 #include <linux/device.h>
+#include <asm/unaligned.h>
 #include "rk3288_crypto.h"
 
 /*
@@ -72,16 +73,12 @@ static int zero_message_process(struct ahash_request *req)
 	return 0;
 }
 
-static void rk_ahash_crypto_complete(struct crypto_async_request *base, int err)
+static void rk_ahash_reg_init(struct ahash_request *req)
 {
-	if (base->complete)
-		base->complete(base, err);
-}
-
-static void rk_ahash_reg_init(struct rk_crypto_info *dev)
-{
-	struct ahash_request *req = ahash_request_cast(dev->async_req);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct rk_crypto_info *dev = tctx->dev;
 	int reg_status;
 
 	reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) |
@@ -108,7 +105,7 @@ static void rk_ahash_reg_init(struct rk_crypto_info *dev)
 					  RK_CRYPTO_BYTESWAP_BRFIFO |
 					  RK_CRYPTO_BYTESWAP_BTFIFO);
 
-	CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, dev->total);
+	CRYPTO_WRITE(dev, RK_CRYPTO_HASH_MSG_LEN, req->nbytes);
 }
 
 static int rk_ahash_init(struct ahash_request *req)
@@ -206,44 +203,59 @@ static int rk_ahash_digest(struct ahash_request *req)
 
 	if (!req->nbytes)
 		return zero_message_process(req);
-	else
-		return dev->enqueue(dev, &req->base);
+
+	return crypto_transfer_hash_request_to_engine(dev->engine, req);
 }
 
-static void crypto_ahash_dma_start(struct rk_crypto_info *dev)
+static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg)
 {
-	CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, dev->addr_in);
-	CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, (dev->count + 3) / 4);
+	CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAS, sg_dma_address(sg));
+	CRYPTO_WRITE(dev, RK_CRYPTO_HRDMAL, sg_dma_len(sg) / 4);
 	CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_HASH_START |
 					  (RK_CRYPTO_HASH_START << 16));
 }
 
-static int rk_ahash_set_data_start(struct rk_crypto_info *dev)
+static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
+{
+	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	int ret;
+
+	ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
+	if (ret <= 0)
+		return -EINVAL;
+
+	rctx->nrsg = ret;
+
+	return 0;
+}
+
+static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
 {
-	int err;
+	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
 
-	err = dev->load_data(dev, dev->sg_src, NULL);
-	if (!err)
-		crypto_ahash_dma_start(dev);
-	return err;
+	dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
+	return 0;
 }
 
-static int rk_ahash_start(struct rk_crypto_info *dev)
+static int rk_hash_run(struct crypto_engine *engine, void *breq)
 {
-	struct ahash_request *req = ahash_request_cast(dev->async_req);
-	struct crypto_ahash *tfm;
-	struct rk_ahash_rctx *rctx;
-
-	dev->total = req->nbytes;
-	dev->left_bytes = req->nbytes;
-	dev->sg_dst = NULL;
-	dev->sg_src = req->src;
-	dev->first = req->src;
-	dev->src_nents = sg_nents(req->src);
-	rctx = ahash_request_ctx(req);
+	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
+	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
+	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct scatterlist *sg = areq->src;
+	int err = 0;
+	int i;
+	u32 v;
+
 	rctx->mode = 0;
 
-	tfm = crypto_ahash_reqtfm(req);
 	switch (crypto_ahash_digestsize(tfm)) {
 	case SHA1_DIGEST_SIZE:
 		rctx->mode = RK_CRYPTO_HASH_SHA1;
@@ -255,30 +267,26 @@ static int rk_ahash_start(struct rk_crypto_info *dev)
 		rctx->mode = RK_CRYPTO_HASH_MD5;
 		break;
 	default:
-		return -EINVAL;
+		err =  -EINVAL;
+		goto theend;
 	}
 
-	rk_ahash_reg_init(dev);
-	return rk_ahash_set_data_start(dev);
-}
+	rk_ahash_reg_init(areq);
 
-static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
-{
-	int err = 0;
-	struct ahash_request *req = ahash_request_cast(dev->async_req);
-	struct crypto_ahash *tfm;
-
-	dev->unload_data(dev);
-	if (dev->left_bytes) {
-		if (sg_is_last(dev->sg_src)) {
-			dev_warn(dev->dev, "[%s:%d], Lack of data\n",
-					__func__, __LINE__);
-			err = -ENOMEM;
-			goto out_rx;
+	while (sg) {
+		reinit_completion(&tctx->dev->complete);
+		tctx->dev->status = 0;
+		crypto_ahash_dma_start(tctx->dev, sg);
+		wait_for_completion_interruptible_timeout(&tctx->dev->complete,
+							  msecs_to_jiffies(2000));
+		if (!tctx->dev->status) {
+			dev_err(tctx->dev->dev, "DMA timeout\n");
+			err = -EFAULT;
+			goto theend;
 		}
-		dev->sg_src = sg_next(dev->sg_src);
-		err = rk_ahash_set_data_start(dev);
-	} else {
+		sg = sg_next(sg);
+	}
+
 		/*
 		 * it will take some time to process date after last dma
 		 * transmission.
@@ -289,18 +297,20 @@ static int rk_ahash_crypto_rx(struct rk_crypto_info *dev)
 		 * efficiency, and make it response quickly when dma
 		 * complete.
 		 */
-		while (!CRYPTO_READ(dev, RK_CRYPTO_HASH_STS))
-			udelay(10);
-
-		tfm = crypto_ahash_reqtfm(req);
-		memcpy_fromio(req->result, dev->reg + RK_CRYPTO_HASH_DOUT_0,
-			      crypto_ahash_digestsize(tfm));
-		dev->complete(dev->async_req, 0);
-		tasklet_schedule(&dev->queue_task);
+	while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS))
+		udelay(10);
+
+	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) {
+		v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
+		put_unaligned_le32(v, areq->result + i * 4);
 	}
 
-out_rx:
-	return err;
+theend:
+	local_bh_disable();
+	crypto_finalize_hash_request(engine, breq, err);
+	local_bh_enable();
+
+	return 0;
 }
 
 static int rk_cra_hash_init(struct crypto_tfm *tfm)
@@ -314,9 +324,6 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 	algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 
 	tctx->dev = algt->dev;
-	tctx->dev->start = rk_ahash_start;
-	tctx->dev->update = rk_ahash_crypto_rx;
-	tctx->dev->complete = rk_ahash_crypto_complete;
 
 	/* for fallback */
 	tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
@@ -325,10 +332,15 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 		dev_err(tctx->dev->dev, "Could not load fallback driver.\n");
 		return PTR_ERR(tctx->fallback_tfm);
 	}
+
 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
 				 sizeof(struct rk_ahash_rctx) +
 				 crypto_ahash_reqsize(tctx->fallback_tfm));
 
+	tctx->enginectx.op.do_one_request = rk_hash_run;
+	tctx->enginectx.op.prepare_request = rk_hash_prepare;
+	tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
+
 	return 0;
 }
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index d067b7f091659..67a7e05d5ae31 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -9,6 +9,7 @@
  * Some ideas are from marvell-cesa.c and s5p-sss.c driver.
  */
 #include <linux/device.h>
+#include <crypto/scatterwalk.h>
 #include "rk3288_crypto.h"
 
 #define RK_CRYPTO_DEC			BIT(0)
@@ -70,19 +71,15 @@ static int rk_cipher_fallback(struct skcipher_request *areq)
 	return err;
 }
 
-static void rk_crypto_complete(struct crypto_async_request *base, int err)
-{
-	if (base->complete)
-		base->complete(base, err);
-}
-
 static int rk_handle_req(struct rk_crypto_info *dev,
 			 struct skcipher_request *req)
 {
+	struct crypto_engine *engine = dev->engine;
+
 	if (rk_cipher_need_fallback(req))
 		return rk_cipher_fallback(req);
 
-	return dev->enqueue(dev, &req->base);
+	return crypto_transfer_skcipher_request_to_engine(engine, req);
 }
 
 static int rk_aes_setkey(struct crypto_skcipher *cipher,
@@ -265,25 +262,21 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
 	return rk_handle_req(dev, req);
 }
 
-static void rk_ablk_hw_init(struct rk_crypto_info *dev)
+static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
 {
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
 	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(cipher);
-	u32 ivsize, block, conf_reg = 0;
+	u32 block, conf_reg = 0;
 
 	block = crypto_tfm_alg_blocksize(tfm);
-	ivsize = crypto_skcipher_ivsize(cipher);
 
 	if (block == DES_BLOCK_SIZE) {
 		rctx->mode |= RK_CRYPTO_TDES_FIFO_MODE |
 			     RK_CRYPTO_TDES_BYTESWAP_KEY |
 			     RK_CRYPTO_TDES_BYTESWAP_IV;
 		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, req->iv, ivsize);
 		memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
 		conf_reg = RK_CRYPTO_DESSEL;
 	} else {
@@ -296,7 +289,6 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 		else if (ctx->keylen == AES_KEYSIZE_256)
 			rctx->mode |= RK_CRYPTO_AES_256BIT_key;
 		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
-		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, req->iv, ivsize);
 		memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
 	}
 	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
@@ -306,133 +298,138 @@ static void rk_ablk_hw_init(struct rk_crypto_info *dev)
 		     RK_CRYPTO_BCDMA_ERR_ENA | RK_CRYPTO_BCDMA_DONE_ENA);
 }
 
-static void crypto_dma_start(struct rk_crypto_info *dev)
+static void crypto_dma_start(struct rk_crypto_info *dev,
+			     struct scatterlist *sgs,
+			     struct scatterlist *sgd, unsigned int todo)
 {
-	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, dev->addr_in);
-	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, dev->count / 4);
-	CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, dev->addr_out);
+	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAS, sg_dma_address(sgs));
+	CRYPTO_WRITE(dev, RK_CRYPTO_BRDMAL, todo);
+	CRYPTO_WRITE(dev, RK_CRYPTO_BTDMAS, sg_dma_address(sgd));
 	CRYPTO_WRITE(dev, RK_CRYPTO_CTRL, RK_CRYPTO_BLOCK_START |
 		     _SBF(RK_CRYPTO_BLOCK_START, 16));
 }
 
-static int rk_set_data_start(struct rk_crypto_info *dev)
+static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 {
-	int err;
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base);
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 ivsize = crypto_skcipher_ivsize(tfm);
-	u8 *src_last_blk = page_address(sg_page(dev->sg_src)) +
-		dev->sg_src->offset + dev->sg_src->length - ivsize;
-
-	/* Store the iv that need to be updated in chain mode.
-	 * And update the IV buffer to contain the next IV for decryption mode.
-	 */
-	if (rctx->mode & RK_CRYPTO_DEC) {
-		memcpy(ctx->iv, src_last_blk, ivsize);
-		sg_pcopy_to_buffer(dev->first, dev->src_nents, req->iv,
-				   ivsize, dev->total - ivsize);
-	}
-
-	err = dev->load_data(dev, dev->sg_src, dev->sg_dst);
-	if (!err)
-		crypto_dma_start(dev);
-	return err;
-}
-
-static int rk_ablk_start(struct rk_crypto_info *dev)
-{
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
-	unsigned long flags;
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
+	struct scatterlist *sgs, *sgd;
 	int err = 0;
+	int ivsize = crypto_skcipher_ivsize(tfm);
+	int offset;
+	u8 iv[AES_BLOCK_SIZE];
+	u8 biv[AES_BLOCK_SIZE];
+	u8 *ivtouse = areq->iv;
+	unsigned int len = areq->cryptlen;
+	unsigned int todo;
+
+	ivsize = crypto_skcipher_ivsize(tfm);
+	if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
+		if (rctx->mode & RK_CRYPTO_DEC) {
+			offset = areq->cryptlen - ivsize;
+			scatterwalk_map_and_copy(rctx->backup_iv, areq->src,
+						 offset, ivsize, 0);
+		}
+	}
 
-	dev->left_bytes = req->cryptlen;
-	dev->total = req->cryptlen;
-	dev->sg_src = req->src;
-	dev->first = req->src;
-	dev->src_nents = sg_nents(req->src);
-	dev->sg_dst = req->dst;
-	dev->dst_nents = sg_nents(req->dst);
-
-	spin_lock_irqsave(&dev->lock, flags);
-	rk_ablk_hw_init(dev);
-	err = rk_set_data_start(dev);
-	spin_unlock_irqrestore(&dev->lock, flags);
-	return err;
-}
-
-static void rk_iv_copyback(struct rk_crypto_info *dev)
-{
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 ivsize = crypto_skcipher_ivsize(tfm);
+	sgs = areq->src;
+	sgd = areq->dst;
 
-	/* Update the IV buffer to contain the next IV for encryption mode. */
-	if (!(rctx->mode & RK_CRYPTO_DEC)) {
-		memcpy(req->iv,
-		       sg_virt(dev->sg_dst) + dev->sg_dst->length - ivsize,
-		       ivsize);
+	while (sgs && sgd && len) {
+		if (!sgs->length) {
+			sgs = sg_next(sgs);
+			sgd = sg_next(sgd);
+			continue;
+		}
+		if (rctx->mode & RK_CRYPTO_DEC) {
+			/* we backup last block of source to be used as IV at next step */
+			offset = sgs->length - ivsize;
+			scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0);
+		}
+		if (sgs == sgd) {
+			err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+			if (err <= 0) {
+				err = -EINVAL;
+				goto theend_iv;
+			}
+		} else {
+			err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
+			if (err <= 0) {
+				err = -EINVAL;
+				goto theend_iv;
+			}
+			err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+			if (err <= 0) {
+				err = -EINVAL;
+				goto theend_sgs;
+			}
+		}
+		err = 0;
+		rk_ablk_hw_init(ctx->dev, areq);
+		if (ivsize) {
+			if (ivsize == DES_BLOCK_SIZE)
+				memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
+			else
+				memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize);
+		}
+		reinit_completion(&ctx->dev->complete);
+		ctx->dev->status = 0;
+
+		todo = min(sg_dma_len(sgs), len);
+		len -= todo;
+		crypto_dma_start(ctx->dev, sgs, sgd, todo / 4);
+		wait_for_completion_interruptible_timeout(&ctx->dev->complete,
+							  msecs_to_jiffies(2000));
+		if (!ctx->dev->status) {
+			dev_err(ctx->dev->dev, "DMA timeout\n");
+			err = -EFAULT;
+			goto theend;
+		}
+		if (sgs == sgd) {
+			dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+		} else {
+			dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
+			dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+		}
+		if (rctx->mode & RK_CRYPTO_DEC) {
+			memcpy(iv, biv, ivsize);
+			ivtouse = iv;
+		} else {
+			offset = sgd->length - ivsize;
+			scatterwalk_map_and_copy(iv, sgd, offset, ivsize, 0);
+			ivtouse = iv;
+		}
+		sgs = sg_next(sgs);
+		sgd = sg_next(sgd);
 	}
-}
-
-static void rk_update_iv(struct rk_crypto_info *dev)
-{
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	u32 ivsize = crypto_skcipher_ivsize(tfm);
-	u8 *new_iv = NULL;
 
-	if (rctx->mode & RK_CRYPTO_DEC) {
-		new_iv = ctx->iv;
-	} else {
-		new_iv = page_address(sg_page(dev->sg_dst)) +
-			 dev->sg_dst->offset + dev->sg_dst->length - ivsize;
+	if (areq->iv && ivsize > 0) {
+		offset = areq->cryptlen - ivsize;
+		if (rctx->mode & RK_CRYPTO_DEC) {
+			memcpy(areq->iv, rctx->backup_iv, ivsize);
+			memzero_explicit(rctx->backup_iv, ivsize);
+		} else {
+			scatterwalk_map_and_copy(areq->iv, areq->dst, offset,
+						 ivsize, 0);
+		}
 	}
 
-	if (ivsize == DES_BLOCK_SIZE)
-		memcpy_toio(dev->reg + RK_CRYPTO_TDES_IV_0, new_iv, ivsize);
-	else if (ivsize == AES_BLOCK_SIZE)
-		memcpy_toio(dev->reg + RK_CRYPTO_AES_IV_0, new_iv, ivsize);
-}
+theend:
+	local_bh_disable();
+	crypto_finalize_skcipher_request(engine, areq, err);
+	local_bh_enable();
+	return 0;
 
-/* return:
- *	true	some err was occurred
- *	fault	no err, continue
- */
-static int rk_ablk_rx(struct rk_crypto_info *dev)
-{
-	int err = 0;
-	struct skcipher_request *req =
-		skcipher_request_cast(dev->async_req);
-
-	dev->unload_data(dev);
-	if (dev->left_bytes) {
-		rk_update_iv(dev);
-		if (sg_is_last(dev->sg_src)) {
-			dev_err(dev->dev, "[%s:%d] Lack of data\n",
-					__func__, __LINE__);
-			err = -ENOMEM;
-			goto out_rx;
-		}
-		dev->sg_src = sg_next(dev->sg_src);
-		dev->sg_dst = sg_next(dev->sg_dst);
-		err = rk_set_data_start(dev);
+theend_sgs:
+	if (sgs == sgd) {
+		dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
 	} else {
-		rk_iv_copyback(dev);
-		/* here show the calculation is over without any err */
-		dev->complete(dev->async_req, 0);
-		tasklet_schedule(&dev->queue_task);
+		dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
+		dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
 	}
-out_rx:
+theend_iv:
 	return err;
 }
 
@@ -446,9 +443,6 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 
 	ctx->dev = algt->dev;
-	ctx->dev->start = rk_ablk_start;
-	ctx->dev->update = rk_ablk_rx;
-	ctx->dev->complete = rk_crypto_complete;
 
 	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback_tfm)) {
@@ -460,6 +454,8 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	tfm->reqsize = sizeof(struct rk_cipher_rctx) +
 		crypto_skcipher_reqsize(ctx->fallback_tfm);
 
+	ctx->enginectx.op.do_one_request = rk_cipher_run;
+
 	return 0;
 }
 
-- 
GitLab


From 6d55c4a206d29006c733b5083ba5da8391abbdbd Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:49 +0000
Subject: [PATCH 0551/4122] crypto: rockchip - rewrite type

Instead of using a custom type for classify algorithms, let's just use
already defined ones.
And let's made a bit more verbose about what is registered.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 26 +++++++++++++------
 drivers/crypto/rockchip/rk3288_crypto.h       |  7 +----
 drivers/crypto/rockchip/rk3288_crypto_ahash.c |  6 ++---
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 12 ++++-----
 4 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 1afb65eee6c9f..8f9664acc78d3 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -102,12 +102,22 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
 		rk_cipher_algs[i]->dev = crypto_info;
-		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
-			err = crypto_register_skcipher(
-					&rk_cipher_algs[i]->alg.skcipher);
-		else
-			err = crypto_register_ahash(
-					&rk_cipher_algs[i]->alg.hash);
+		switch (rk_cipher_algs[i]->type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			dev_info(crypto_info->dev, "Register %s as %s\n",
+				 rk_cipher_algs[i]->alg.skcipher.base.cra_name,
+				 rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name);
+			err = crypto_register_skcipher(&rk_cipher_algs[i]->alg.skcipher);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			dev_info(crypto_info->dev, "Register %s as %s\n",
+				 rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
+				 rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name);
+			err = crypto_register_ahash(&rk_cipher_algs[i]->alg.hash);
+			break;
+		default:
+			dev_err(crypto_info->dev, "unknown algorithm\n");
+		}
 		if (err)
 			goto err_cipher_algs;
 	}
@@ -115,7 +125,7 @@ static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 
 err_cipher_algs:
 	for (k = 0; k < i; k++) {
-		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
+		if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
 			crypto_unregister_skcipher(&rk_cipher_algs[k]->alg.skcipher);
 		else
 			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
@@ -128,7 +138,7 @@ static void rk_crypto_unregister(void)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
-		if (rk_cipher_algs[i]->type == ALG_TYPE_CIPHER)
+		if (rk_cipher_algs[i]->type == CRYPTO_ALG_TYPE_SKCIPHER)
 			crypto_unregister_skcipher(&rk_cipher_algs[i]->alg.skcipher);
 		else
 			crypto_unregister_ahash(&rk_cipher_algs[i]->alg.hash);
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 65ed645e01687..d924ea17402a7 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -232,18 +232,13 @@ struct rk_cipher_rctx {
 	struct skcipher_request fallback_req;   // keep at the end
 };
 
-enum alg_type {
-	ALG_TYPE_HASH,
-	ALG_TYPE_CIPHER,
-};
-
 struct rk_crypto_tmp {
+	u32 type;
 	struct rk_crypto_info		*dev;
 	union {
 		struct skcipher_alg	skcipher;
 		struct ahash_alg	hash;
 	} alg;
-	enum alg_type			type;
 };
 
 extern struct rk_crypto_tmp rk_ecb_aes_alg;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index edd40e16a3f0a..d08e2438d3567 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -352,7 +352,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
-	.type = ALG_TYPE_HASH,
+	.type = CRYPTO_ALG_TYPE_AHASH,
 	.alg.hash = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
@@ -382,7 +382,7 @@ struct rk_crypto_tmp rk_ahash_sha1 = {
 };
 
 struct rk_crypto_tmp rk_ahash_sha256 = {
-	.type = ALG_TYPE_HASH,
+	.type = CRYPTO_ALG_TYPE_AHASH,
 	.alg.hash = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
@@ -412,7 +412,7 @@ struct rk_crypto_tmp rk_ahash_sha256 = {
 };
 
 struct rk_crypto_tmp rk_ahash_md5 = {
-	.type = ALG_TYPE_HASH,
+	.type = CRYPTO_ALG_TYPE_AHASH,
 	.alg.hash = {
 		.init = rk_ahash_init,
 		.update = rk_ahash_update,
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 67a7e05d5ae31..1ed297f5d8098 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -468,7 +468,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 }
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "ecb(aes)",
 		.base.cra_driver_name	= "ecb-aes-rk",
@@ -490,7 +490,7 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
 };
 
 struct rk_crypto_tmp rk_cbc_aes_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "cbc(aes)",
 		.base.cra_driver_name	= "cbc-aes-rk",
@@ -513,7 +513,7 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
 };
 
 struct rk_crypto_tmp rk_ecb_des_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "ecb(des)",
 		.base.cra_driver_name	= "ecb-des-rk",
@@ -535,7 +535,7 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
 };
 
 struct rk_crypto_tmp rk_cbc_des_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "cbc(des)",
 		.base.cra_driver_name	= "cbc-des-rk",
@@ -558,7 +558,7 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
 };
 
 struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "ecb(des3_ede)",
 		.base.cra_driver_name	= "ecb-des3-ede-rk",
@@ -580,7 +580,7 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 };
 
 struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
-	.type = ALG_TYPE_CIPHER,
+	.type = CRYPTO_ALG_TYPE_SKCIPHER,
 	.alg.skcipher = {
 		.base.cra_name		= "cbc(des3_ede)",
 		.base.cra_driver_name	= "cbc-des3-ede-rk",
-- 
GitLab


From 48d904d428b68080abd9161148ca2ab1331124a4 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:50 +0000
Subject: [PATCH 0552/4122] crypto: rockchip - add debugfs

This patch enable to access usage stats for each algorithm.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig                        | 10 ++++
 drivers/crypto/rockchip/rk3288_crypto.c       | 47 +++++++++++++++++++
 drivers/crypto/rockchip/rk3288_crypto.h       | 11 +++++
 drivers/crypto/rockchip/rk3288_crypto_ahash.c |  8 ++++
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 15 ++++++
 5 files changed, 91 insertions(+)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index c30b5a39c2ac2..2947888d3b828 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -686,6 +686,16 @@ config CRYPTO_DEV_ROCKCHIP
 	  This driver interfaces with the hardware crypto accelerator.
 	  Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode.
 
+config CRYPTO_DEV_ROCKCHIP_DEBUG
+	bool "Enable Rockchip crypto stats"
+	depends on CRYPTO_DEV_ROCKCHIP
+	depends on DEBUG_FS
+	help
+	  Say y to enable Rockchip crypto debug stats.
+	  This will create /sys/kernel/debug/rk3288_crypto/stats for displaying
+	  the number of requests per algorithm and other internal stats.
+
+
 config CRYPTO_DEV_ZYNQMP_AES
 	tristate "Support for Xilinx ZynqMP AES hw accelerator"
 	depends on ZYNQMP_FIRMWARE || COMPILE_TEST
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 8f9664acc78d3..3e1b4f3b2422e 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -95,6 +95,41 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = {
 	&rk_ahash_md5,
 };
 
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
+		if (!rk_cipher_algs[i]->dev)
+			continue;
+		switch (rk_cipher_algs[i]->type) {
+		case CRYPTO_ALG_TYPE_SKCIPHER:
+			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+				   rk_cipher_algs[i]->alg.skcipher.base.cra_driver_name,
+				   rk_cipher_algs[i]->alg.skcipher.base.cra_name,
+				   rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
+			seq_printf(seq, "\tfallback due to length: %lu\n",
+				   rk_cipher_algs[i]->stat_fb_len);
+			seq_printf(seq, "\tfallback due to alignment: %lu\n",
+				   rk_cipher_algs[i]->stat_fb_align);
+			seq_printf(seq, "\tfallback due to SGs: %lu\n",
+				   rk_cipher_algs[i]->stat_fb_sgdiff);
+			break;
+		case CRYPTO_ALG_TYPE_AHASH:
+			seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+				   rk_cipher_algs[i]->alg.hash.halg.base.cra_driver_name,
+				   rk_cipher_algs[i]->alg.hash.halg.base.cra_name,
+				   rk_cipher_algs[i]->stat_req, rk_cipher_algs[i]->stat_fb);
+			break;
+		}
+	}
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs);
+#endif
+
 static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 {
 	unsigned int i, k;
@@ -246,6 +281,15 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		goto err_register_alg;
 	}
 
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	/* Ignore error of debugfs */
+	crypto_info->dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
+	crypto_info->dbgfs_stats = debugfs_create_file("stats", 0444,
+						       crypto_info->dbgfs_dir,
+						       crypto_info,
+						       &rk_crypto_debugfs_fops);
+#endif
+
 	dev_info(dev, "Crypto Accelerator successfully registered\n");
 	return 0;
 
@@ -260,6 +304,9 @@ static int rk_crypto_remove(struct platform_device *pdev)
 {
 	struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
 
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	debugfs_remove_recursive(crypto_tmp->dbgfs_dir);
+#endif
 	rk_crypto_unregister();
 	rk_crypto_disable_clk(crypto_tmp);
 	crypto_engine_exit(crypto_tmp->engine);
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index d924ea17402a7..945a8184bbadd 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -7,6 +7,7 @@
 #include <crypto/algapi.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <crypto/engine.h>
@@ -199,6 +200,10 @@ struct rk_crypto_info {
 	struct crypto_engine *engine;
 	struct completion complete;
 	int status;
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	struct dentry *dbgfs_dir;
+	struct dentry *dbgfs_stats;
+#endif
 };
 
 /* the private variable of hash */
@@ -239,6 +244,12 @@ struct rk_crypto_tmp {
 		struct skcipher_alg	skcipher;
 		struct ahash_alg	hash;
 	} alg;
+	unsigned long stat_req;
+	unsigned long stat_fb;
+	unsigned long stat_fb_len;
+	unsigned long stat_fb_sglen;
+	unsigned long stat_fb_align;
+	unsigned long stat_fb_sgdiff;
 };
 
 extern struct rk_crypto_tmp rk_ecb_aes_alg;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index d08e2438d3567..8856c6226be60 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -39,6 +39,10 @@ static int rk_ahash_digest_fb(struct ahash_request *areq)
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_ctx *tfmctx = crypto_ahash_ctx(tfm);
+	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
+
+	algt->stat_fb++;
 
 	ahash_request_set_tfm(&rctx->fallback_req, tfmctx->fallback_tfm);
 	rctx->fallback_req.base.flags = areq->base.flags &
@@ -249,6 +253,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 	struct scatterlist *sg = areq->src;
 	int err = 0;
 	int i;
@@ -256,6 +262,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 
 	rctx->mode = 0;
 
+	algt->stat_req++;
+
 	switch (crypto_ahash_digestsize(tfm)) {
 	case SHA1_DIGEST_SIZE:
 		rctx->mode = RK_CRYPTO_HASH_SHA1;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 1ed297f5d8098..91b8a4c574da2 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -18,6 +18,8 @@ static int rk_cipher_need_fallback(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	unsigned int bs = crypto_skcipher_blocksize(tfm);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 	struct scatterlist *sgs, *sgd;
 	unsigned int stodo, dtodo, len;
 
@@ -29,20 +31,25 @@ static int rk_cipher_need_fallback(struct skcipher_request *req)
 	sgd = req->dst;
 	while (sgs && sgd) {
 		if (!IS_ALIGNED(sgs->offset, sizeof(u32))) {
+			algt->stat_fb_align++;
 			return true;
 		}
 		if (!IS_ALIGNED(sgd->offset, sizeof(u32))) {
+			algt->stat_fb_align++;
 			return true;
 		}
 		stodo = min(len, sgs->length);
 		if (stodo % bs) {
+			algt->stat_fb_len++;
 			return true;
 		}
 		dtodo = min(len, sgd->length);
 		if (dtodo % bs) {
+			algt->stat_fb_len++;
 			return true;
 		}
 		if (stodo != dtodo) {
+			algt->stat_fb_sgdiff++;
 			return true;
 		}
 		len -= stodo;
@@ -57,8 +64,12 @@ static int rk_cipher_fallback(struct skcipher_request *areq)
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
 	struct rk_cipher_ctx *op = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 	int err;
 
+	algt->stat_fb++;
+
 	skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
 	skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
 				      areq->base.complete, areq->base.data);
@@ -324,6 +335,10 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	u8 *ivtouse = areq->iv;
 	unsigned int len = areq->cryptlen;
 	unsigned int todo;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+
+	algt->stat_req++;
 
 	ivsize = crypto_skcipher_ivsize(tfm);
 	if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
-- 
GitLab


From a216be3964c15661579005012b1f0d7d20a1f265 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:51 +0000
Subject: [PATCH 0553/4122] crypto: rockchip - introduce PM

Add runtime PM support for rockchip crypto.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 51 ++++++++++++++++++-
 drivers/crypto/rockchip/rk3288_crypto.h       |  1 +
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 10 ++++
 .../crypto/rockchip/rk3288_crypto_skcipher.c  |  9 ++++
 4 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 3e1b4f3b2422e..d9258b9e71b3c 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -65,6 +65,48 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
 	clk_disable_unprepare(dev->sclk);
 }
 
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int rk_crypto_pm_suspend(struct device *dev)
+{
+	struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
+
+	rk_crypto_disable_clk(rkdev);
+	return 0;
+}
+
+static int rk_crypto_pm_resume(struct device *dev)
+{
+	struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
+
+	return rk_crypto_enable_clk(rkdev);
+}
+
+static const struct dev_pm_ops rk_crypto_pm_ops = {
+	SET_RUNTIME_PM_OPS(rk_crypto_pm_suspend, rk_crypto_pm_resume, NULL)
+};
+
+static int rk_crypto_pm_init(struct rk_crypto_info *rkdev)
+{
+	int err;
+
+	pm_runtime_use_autosuspend(rkdev->dev);
+	pm_runtime_set_autosuspend_delay(rkdev->dev, 2000);
+
+	err = pm_runtime_set_suspended(rkdev->dev);
+	if (err)
+		return err;
+	pm_runtime_enable(rkdev->dev);
+	return err;
+}
+
+static void rk_crypto_pm_exit(struct rk_crypto_info *rkdev)
+{
+	pm_runtime_disable(rkdev->dev);
+}
+
 static irqreturn_t rk_crypto_irq_handle(int irq, void *dev_id)
 {
 	struct rk_crypto_info *dev  = platform_get_drvdata(dev_id);
@@ -273,7 +315,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	crypto_engine_start(crypto_info->engine);
 	init_completion(&crypto_info->complete);
 
-	rk_crypto_enable_clk(crypto_info);
+	err = rk_crypto_pm_init(crypto_info);
+	if (err)
+		goto err_pm;
 
 	err = rk_crypto_register(crypto_info);
 	if (err) {
@@ -294,6 +338,8 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	return 0;
 
 err_register_alg:
+	rk_crypto_pm_exit(crypto_info);
+err_pm:
 	crypto_engine_exit(crypto_info->engine);
 err_crypto:
 	dev_err(dev, "Crypto Accelerator not successfully registered\n");
@@ -308,7 +354,7 @@ static int rk_crypto_remove(struct platform_device *pdev)
 	debugfs_remove_recursive(crypto_tmp->dbgfs_dir);
 #endif
 	rk_crypto_unregister();
-	rk_crypto_disable_clk(crypto_tmp);
+	rk_crypto_pm_exit(crypto_tmp);
 	crypto_engine_exit(crypto_tmp->engine);
 	return 0;
 }
@@ -318,6 +364,7 @@ static struct platform_driver crypto_driver = {
 	.remove		= rk_crypto_remove,
 	.driver		= {
 		.name	= "rk3288-crypto",
+		.pm		= &rk_crypto_pm_ops,
 		.of_match_table	= crypto_of_id_table,
 	},
 };
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 945a8184bbadd..ddbb9246ce164 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -9,6 +9,7 @@
 #include <linux/interrupt.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
+#include <linux/pm_runtime.h>
 #include <linux/scatterlist.h>
 #include <crypto/engine.h>
 #include <crypto/internal/hash.h>
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 8856c6226be60..137013bd44102 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -328,6 +328,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
 
 	const char *alg_name = crypto_tfm_alg_name(tfm);
+	int err;
 
 	algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 
@@ -349,7 +350,15 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 	tctx->enginectx.op.prepare_request = rk_hash_prepare;
 	tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
 
+	err = pm_runtime_resume_and_get(tctx->dev->dev);
+	if (err < 0)
+		goto error_pm;
+
 	return 0;
+error_pm:
+	crypto_free_ahash(tctx->fallback_tfm);
+
+	return err;
 }
 
 static void rk_cra_hash_exit(struct crypto_tfm *tfm)
@@ -357,6 +366,7 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_ahash(tctx->fallback_tfm);
+	pm_runtime_put_autosuspend(tctx->dev->dev);
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 91b8a4c574da2..3bdb304aa7948 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -454,6 +454,7 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	const char *name = crypto_tfm_alg_name(&tfm->base);
 	struct rk_crypto_tmp *algt;
+	int err;
 
 	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 
@@ -471,7 +472,14 @@ static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
 
 	ctx->enginectx.op.do_one_request = rk_cipher_run;
 
+	err = pm_runtime_resume_and_get(ctx->dev->dev);
+	if (err < 0)
+		goto error_pm;
+
 	return 0;
+error_pm:
+	crypto_free_skcipher(ctx->fallback_tfm);
+	return err;
 }
 
 static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
@@ -480,6 +488,7 @@ static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
 
 	memzero_explicit(ctx->key, ctx->keylen);
 	crypto_free_skcipher(ctx->fallback_tfm);
+	pm_runtime_put_autosuspend(ctx->dev->dev);
 }
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
-- 
GitLab


From 6f61192549d0214f8d9d1e1d3152e450658ed1e9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:52 +0000
Subject: [PATCH 0554/4122] crypto: rockchip - handle reset also in PM

reset could be handled by PM functions.
We keep the initial reset pulse to be sure the hw is a know device state
after probe.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index d9258b9e71b3c..399829ef92e0f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -74,14 +74,23 @@ static int rk_crypto_pm_suspend(struct device *dev)
 	struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
 
 	rk_crypto_disable_clk(rkdev);
+	reset_control_assert(rkdev->rst);
+
 	return 0;
 }
 
 static int rk_crypto_pm_resume(struct device *dev)
 {
 	struct rk_crypto_info *rkdev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = rk_crypto_enable_clk(rkdev);
+	if (ret)
+		return ret;
+
+	reset_control_deassert(rkdev->rst);
+	return 0;
 
-	return rk_crypto_enable_clk(rkdev);
 }
 
 static const struct dev_pm_ops rk_crypto_pm_ops = {
@@ -222,13 +231,6 @@ static void rk_crypto_unregister(void)
 	}
 }
 
-static void rk_crypto_action(void *data)
-{
-	struct rk_crypto_info *crypto_info = data;
-
-	reset_control_assert(crypto_info->rst);
-}
-
 static const struct of_device_id crypto_of_id_table[] = {
 	{ .compatible = "rockchip,rk3288-crypto" },
 	{}
@@ -258,10 +260,6 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	usleep_range(10, 20);
 	reset_control_deassert(crypto_info->rst);
 
-	err = devm_add_action_or_reset(dev, rk_crypto_action, crypto_info);
-	if (err)
-		goto err_crypto;
-
 	crypto_info->reg = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(crypto_info->reg)) {
 		err = PTR_ERR(crypto_info->reg);
-- 
GitLab


From 3a6fd464f48ad35d8cf15d81fd92094132dc862a Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:53 +0000
Subject: [PATCH 0555/4122] crypto: rockchip - use clk_bulk to simplify clock
 management

rk3328 does not have the same clock names than rk3288, instead of using a complex
clock management, let's use clk_bulk to simplify their handling.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 66 ++++---------------------
 drivers/crypto/rockchip/rk3288_crypto.h |  6 +--
 2 files changed, 11 insertions(+), 61 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 399829ef92e0f..a635029ac71d0 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -22,47 +22,16 @@ static int rk_crypto_enable_clk(struct rk_crypto_info *dev)
 {
 	int err;
 
-	err = clk_prepare_enable(dev->sclk);
-	if (err) {
-		dev_err(dev->dev, "[%s:%d], Couldn't enable clock sclk\n",
-			__func__, __LINE__);
-		goto err_return;
-	}
-	err = clk_prepare_enable(dev->aclk);
-	if (err) {
-		dev_err(dev->dev, "[%s:%d], Couldn't enable clock aclk\n",
-			__func__, __LINE__);
-		goto err_aclk;
-	}
-	err = clk_prepare_enable(dev->hclk);
-	if (err) {
-		dev_err(dev->dev, "[%s:%d], Couldn't enable clock hclk\n",
-			__func__, __LINE__);
-		goto err_hclk;
-	}
-	err = clk_prepare_enable(dev->dmaclk);
-	if (err) {
-		dev_err(dev->dev, "[%s:%d], Couldn't enable clock dmaclk\n",
-			__func__, __LINE__);
-		goto err_dmaclk;
-	}
-	return err;
-err_dmaclk:
-	clk_disable_unprepare(dev->hclk);
-err_hclk:
-	clk_disable_unprepare(dev->aclk);
-err_aclk:
-	clk_disable_unprepare(dev->sclk);
-err_return:
+	err = clk_bulk_prepare_enable(dev->num_clks, dev->clks);
+	if (err)
+		dev_err(dev->dev, "Could not enable clock clks\n");
+
 	return err;
 }
 
 static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
 {
-	clk_disable_unprepare(dev->dmaclk);
-	clk_disable_unprepare(dev->hclk);
-	clk_disable_unprepare(dev->aclk);
-	clk_disable_unprepare(dev->sclk);
+	clk_bulk_disable_unprepare(dev->num_clks, dev->clks);
 }
 
 /*
@@ -266,27 +235,10 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		goto err_crypto;
 	}
 
-	crypto_info->aclk = devm_clk_get(&pdev->dev, "aclk");
-	if (IS_ERR(crypto_info->aclk)) {
-		err = PTR_ERR(crypto_info->aclk);
-		goto err_crypto;
-	}
-
-	crypto_info->hclk = devm_clk_get(&pdev->dev, "hclk");
-	if (IS_ERR(crypto_info->hclk)) {
-		err = PTR_ERR(crypto_info->hclk);
-		goto err_crypto;
-	}
-
-	crypto_info->sclk = devm_clk_get(&pdev->dev, "sclk");
-	if (IS_ERR(crypto_info->sclk)) {
-		err = PTR_ERR(crypto_info->sclk);
-		goto err_crypto;
-	}
-
-	crypto_info->dmaclk = devm_clk_get(&pdev->dev, "apb_pclk");
-	if (IS_ERR(crypto_info->dmaclk)) {
-		err = PTR_ERR(crypto_info->dmaclk);
+	crypto_info->num_clks = devm_clk_bulk_get_all(&pdev->dev,
+						      &crypto_info->clks);
+	if (crypto_info->num_clks < 3) {
+		err = -EINVAL;
 		goto err_crypto;
 	}
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index ddbb9246ce164..28bf09fe1c1d1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -190,10 +190,8 @@
 
 struct rk_crypto_info {
 	struct device			*dev;
-	struct clk			*aclk;
-	struct clk			*hclk;
-	struct clk			*sclk;
-	struct clk			*dmaclk;
+	struct clk_bulk_data		*clks;
+	int num_clks;
 	struct reset_control		*rst;
 	void __iomem			*reg;
 	int				irq;
-- 
GitLab


From e803188400d32d28ecfbef0878c289e3c7026723 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:54 +0000
Subject: [PATCH 0556/4122] crypto: rockchip - add myself as maintainer

Nobody is set as maintainer of rockchip crypto, I propose to do it as I
have already reworked lot of this code.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 MAINTAINERS | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..3489126acd1f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17753,6 +17753,13 @@ F:	Documentation/ABI/*/sysfs-driver-hid-roccat*
 F:	drivers/hid/hid-roccat*
 F:	include/linux/hid-roccat*
 
+ROCKCHIP CRYPTO DRIVERS
+M:	Corentin Labbe <clabbe@baylibre.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
+F:	drivers/crypto/rockchip/
+
 ROCKCHIP I2S TDM DRIVER
 M:	Nicolas Frattaroli <frattaroli.nicolas@gmail.com>
 L:	linux-rockchip@lists.infradead.org
-- 
GitLab


From 37bc22159c456ad43fb852fc6ed60f4081df25df Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:55 +0000
Subject: [PATCH 0557/4122] crypto: rockchip - use read_poll_timeout

Use read_poll_timeout instead of open coding it.
In the same time, fix indentation of related comment.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 137013bd44102..1fbab86c92384 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -10,6 +10,7 @@
  */
 #include <linux/device.h>
 #include <asm/unaligned.h>
+#include <linux/iopoll.h>
 #include "rk3288_crypto.h"
 
 /*
@@ -295,18 +296,17 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 		sg = sg_next(sg);
 	}
 
-		/*
-		 * it will take some time to process date after last dma
-		 * transmission.
-		 *
-		 * waiting time is relative with the last date len,
-		 * so cannot set a fixed time here.
-		 * 10us makes system not call here frequently wasting
-		 * efficiency, and make it response quickly when dma
-		 * complete.
-		 */
-	while (!CRYPTO_READ(tctx->dev, RK_CRYPTO_HASH_STS))
-		udelay(10);
+	/*
+	 * it will take some time to process date after last dma
+	 * transmission.
+	 *
+	 * waiting time is relative with the last date len,
+	 * so cannot set a fixed time here.
+	 * 10us makes system not call here frequently wasting
+	 * efficiency, and make it response quickly when dma
+	 * complete.
+	 */
+	readl_poll_timeout(tctx->dev->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000);
 
 	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) {
 		v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
-- 
GitLab


From 456698746b40008eb0924eb7e9ec908330948b2d Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:56 +0000
Subject: [PATCH 0558/4122] crypto: rockchip - fix style issue

This patch fixes some warning reported by checkpatch

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 1fbab86c92384..fae779d73c840 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -336,7 +336,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 
 	/* for fallback */
 	tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
-					       CRYPTO_ALG_NEED_FALLBACK);
+						CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(tctx->fallback_tfm)) {
 		dev_err(tctx->dev->dev, "Could not load fallback driver.\n");
 		return PTR_ERR(tctx->fallback_tfm);
@@ -394,8 +394,8 @@ struct rk_crypto_tmp rk_ahash_sha1 = {
 				  .cra_init = rk_cra_hash_init,
 				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
-				  }
-			 }
+			}
+		}
 	}
 };
 
@@ -424,8 +424,8 @@ struct rk_crypto_tmp rk_ahash_sha256 = {
 				  .cra_init = rk_cra_hash_init,
 				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
-				  }
-			 }
+			}
+		}
 	}
 };
 
@@ -454,7 +454,7 @@ struct rk_crypto_tmp rk_ahash_md5 = {
 				  .cra_init = rk_cra_hash_init,
 				  .cra_exit = rk_cra_hash_exit,
 				  .cra_module = THIS_MODULE,
-				  }
 			}
+		}
 	}
 };
-- 
GitLab


From e65e90101329de0fe304e2df057f68c5f0fa4748 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:57 +0000
Subject: [PATCH 0559/4122] crypto: rockchip - add support for rk3328

The rk3328 could be used as-is by the rockchip driver.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index a635029ac71d0..c92559b83f7dd 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -202,6 +202,7 @@ static void rk_crypto_unregister(void)
 
 static const struct of_device_id crypto_of_id_table[] = {
 	{ .compatible = "rockchip,rk3288-crypto" },
+	{ .compatible = "rockchip,rk3328-crypto" },
 	{}
 };
 MODULE_DEVICE_TABLE(of, crypto_of_id_table);
-- 
GitLab


From a7fa0644dd0b91fab97398de7ea4672a6526261f Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:58 +0000
Subject: [PATCH 0560/4122] crypto: rockchip - rename ablk functions to cipher

Some functions have still ablk in their name even if there are
not handling ablk_cipher anymore.
So let's rename them.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 3bdb304aa7948..d60c206e717d5 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -273,7 +273,7 @@ static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
 	return rk_handle_req(dev, req);
 }
 
-static void rk_ablk_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
+static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
 {
 	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(req);
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
@@ -382,7 +382,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 			}
 		}
 		err = 0;
-		rk_ablk_hw_init(ctx->dev, areq);
+		rk_cipher_hw_init(ctx->dev, areq);
 		if (ivsize) {
 			if (ivsize == DES_BLOCK_SIZE)
 				memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
@@ -448,7 +448,7 @@ theend_iv:
 	return err;
 }
 
-static int rk_ablk_init_tfm(struct crypto_skcipher *tfm)
+static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
 {
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
@@ -482,7 +482,7 @@ error_pm:
 	return err;
 }
 
-static void rk_ablk_exit_tfm(struct crypto_skcipher *tfm)
+static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm)
 {
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
@@ -503,8 +503,8 @@ struct rk_crypto_tmp rk_ecb_aes_alg = {
 		.base.cra_alignmask	= 0x0f,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= AES_MIN_KEY_SIZE,
 		.max_keysize		= AES_MAX_KEY_SIZE,
 		.setkey			= rk_aes_setkey,
@@ -525,8 +525,8 @@ struct rk_crypto_tmp rk_cbc_aes_alg = {
 		.base.cra_alignmask	= 0x0f,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= AES_MIN_KEY_SIZE,
 		.max_keysize		= AES_MAX_KEY_SIZE,
 		.ivsize			= AES_BLOCK_SIZE,
@@ -548,8 +548,8 @@ struct rk_crypto_tmp rk_ecb_des_alg = {
 		.base.cra_alignmask	= 0x07,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= DES_KEY_SIZE,
 		.max_keysize		= DES_KEY_SIZE,
 		.setkey			= rk_des_setkey,
@@ -570,8 +570,8 @@ struct rk_crypto_tmp rk_cbc_des_alg = {
 		.base.cra_alignmask	= 0x07,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= DES_KEY_SIZE,
 		.max_keysize		= DES_KEY_SIZE,
 		.ivsize			= DES_BLOCK_SIZE,
@@ -593,8 +593,8 @@ struct rk_crypto_tmp rk_ecb_des3_ede_alg = {
 		.base.cra_alignmask	= 0x07,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= DES3_EDE_KEY_SIZE,
 		.max_keysize		= DES3_EDE_KEY_SIZE,
 		.setkey			= rk_tdes_setkey,
@@ -615,8 +615,8 @@ struct rk_crypto_tmp rk_cbc_des3_ede_alg = {
 		.base.cra_alignmask	= 0x07,
 		.base.cra_module	= THIS_MODULE,
 
-		.init			= rk_ablk_init_tfm,
-		.exit			= rk_ablk_exit_tfm,
+		.init			= rk_cipher_tfm_init,
+		.exit			= rk_cipher_tfm_exit,
 		.min_keysize		= DES3_EDE_KEY_SIZE,
 		.max_keysize		= DES3_EDE_KEY_SIZE,
 		.ivsize			= DES_BLOCK_SIZE,
-- 
GitLab


From 2e3b149578c30275db9c3501c1d9dec36d16622a Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:54:59 +0000
Subject: [PATCH 0561/4122] crypto: rockchip - rework rk_handle_req function

This patch rework the rk_handle_req(), simply removing the
rk_crypto_info parameter.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 68 +++++--------------
 1 file changed, 17 insertions(+), 51 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index d60c206e717d5..3187869c4c68e 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -82,10 +82,12 @@ static int rk_cipher_fallback(struct skcipher_request *areq)
 	return err;
 }
 
-static int rk_handle_req(struct rk_crypto_info *dev,
-			 struct skcipher_request *req)
+static int rk_cipher_handle_req(struct skcipher_request *req)
 {
-	struct crypto_engine *engine = dev->engine;
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct rk_crypto_info *rkc = tctx->dev;
+	struct crypto_engine *engine = rkc->engine;
 
 	if (rk_cipher_need_fallback(req))
 		return rk_cipher_fallback(req);
@@ -142,135 +144,99 @@ static int rk_tdes_setkey(struct crypto_skcipher *cipher,
 
 static int rk_aes_ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_AES_ECB_MODE;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_aes_ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_AES_ECB_MODE | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_aes_cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_AES_CBC_MODE;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_aes_cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_AES_CBC_MODE | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des_ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = 0;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des_ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des_cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des_cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_CHAINMODE_CBC | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des3_ede_ecb_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_SELECT;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des3_ede_ecb_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des3_ede_cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static int rk_des3_ede_cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *dev = ctx->dev;
 
 	rctx->mode = RK_CRYPTO_TDES_SELECT | RK_CRYPTO_TDES_CHAINMODE_CBC |
 		    RK_CRYPTO_DEC;
-	return rk_handle_req(dev, req);
+	return rk_cipher_handle_req(req);
 }
 
 static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_request *req)
-- 
GitLab


From c018c7a9dd198ce965ca4d10c7b083849bc533be Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:00 +0000
Subject: [PATCH 0562/4122] crypto: rockchip - use a rk_crypto_info variable
 instead of lot of indirection

Instead of using lot of ctx->dev->xx indirections, use an intermediate
variable for rk_crypto_info.
This will help later, when 2 different rk_crypto_info would be used.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 23 +++++++-----
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 37 ++++++++++---------
 2 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index fae779d73c840..636dbcde0ca3f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -226,9 +226,10 @@ static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct rk_crypto_info *rkc = tctx->dev;
 	int ret;
 
-	ret = dma_map_sg(tctx->dev->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
+	ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
 	if (ret <= 0)
 		return -EINVAL;
 
@@ -243,8 +244,9 @@ static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
 	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
+	struct rk_crypto_info *rkc = tctx->dev;
 
-	dma_unmap_sg(tctx->dev->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
+	dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
 	return 0;
 }
 
@@ -257,6 +259,7 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 	struct scatterlist *sg = areq->src;
+	struct rk_crypto_info *rkc = tctx->dev;
 	int err = 0;
 	int i;
 	u32 v;
@@ -283,13 +286,13 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	rk_ahash_reg_init(areq);
 
 	while (sg) {
-		reinit_completion(&tctx->dev->complete);
-		tctx->dev->status = 0;
-		crypto_ahash_dma_start(tctx->dev, sg);
-		wait_for_completion_interruptible_timeout(&tctx->dev->complete,
+		reinit_completion(&rkc->complete);
+		rkc->status = 0;
+		crypto_ahash_dma_start(rkc, sg);
+		wait_for_completion_interruptible_timeout(&rkc->complete,
 							  msecs_to_jiffies(2000));
-		if (!tctx->dev->status) {
-			dev_err(tctx->dev->dev, "DMA timeout\n");
+		if (!rkc->status) {
+			dev_err(rkc->dev, "DMA timeout\n");
 			err = -EFAULT;
 			goto theend;
 		}
@@ -306,10 +309,10 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	 * efficiency, and make it response quickly when dma
 	 * complete.
 	 */
-	readl_poll_timeout(tctx->dev->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000);
+	readl_poll_timeout(rkc->reg + RK_CRYPTO_HASH_STS, v, v == 0, 10, 1000);
 
 	for (i = 0; i < crypto_ahash_digestsize(tfm) / 4; i++) {
-		v = readl(tctx->dev->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
+		v = readl(rkc->reg + RK_CRYPTO_HASH_DOUT_0 + i * 4);
 		put_unaligned_le32(v, areq->result + i * 4);
 	}
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 3187869c4c68e..6a1bea98fded1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -303,6 +303,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	unsigned int todo;
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
+	struct rk_crypto_info *rkc = ctx->dev;
 
 	algt->stat_req++;
 
@@ -330,49 +331,49 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 			scatterwalk_map_and_copy(biv, sgs, offset, ivsize, 0);
 		}
 		if (sgs == sgd) {
-			err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+			err = dma_map_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
 			if (err <= 0) {
 				err = -EINVAL;
 				goto theend_iv;
 			}
 		} else {
-			err = dma_map_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
+			err = dma_map_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
 			if (err <= 0) {
 				err = -EINVAL;
 				goto theend_iv;
 			}
-			err = dma_map_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+			err = dma_map_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
 			if (err <= 0) {
 				err = -EINVAL;
 				goto theend_sgs;
 			}
 		}
 		err = 0;
-		rk_cipher_hw_init(ctx->dev, areq);
+		rk_cipher_hw_init(rkc, areq);
 		if (ivsize) {
 			if (ivsize == DES_BLOCK_SIZE)
-				memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
+				memcpy_toio(rkc->reg + RK_CRYPTO_TDES_IV_0, ivtouse, ivsize);
 			else
-				memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize);
+				memcpy_toio(rkc->reg + RK_CRYPTO_AES_IV_0, ivtouse, ivsize);
 		}
-		reinit_completion(&ctx->dev->complete);
-		ctx->dev->status = 0;
+		reinit_completion(&rkc->complete);
+		rkc->status = 0;
 
 		todo = min(sg_dma_len(sgs), len);
 		len -= todo;
-		crypto_dma_start(ctx->dev, sgs, sgd, todo / 4);
-		wait_for_completion_interruptible_timeout(&ctx->dev->complete,
+		crypto_dma_start(rkc, sgs, sgd, todo / 4);
+		wait_for_completion_interruptible_timeout(&rkc->complete,
 							  msecs_to_jiffies(2000));
-		if (!ctx->dev->status) {
-			dev_err(ctx->dev->dev, "DMA timeout\n");
+		if (!rkc->status) {
+			dev_err(rkc->dev, "DMA timeout\n");
 			err = -EFAULT;
 			goto theend;
 		}
 		if (sgs == sgd) {
-			dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+			dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
 		} else {
-			dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
-			dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+			dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
+			dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
 		}
 		if (rctx->mode & RK_CRYPTO_DEC) {
 			memcpy(iv, biv, ivsize);
@@ -405,10 +406,10 @@ theend:
 
 theend_sgs:
 	if (sgs == sgd) {
-		dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(rkc->dev, sgs, 1, DMA_BIDIRECTIONAL);
 	} else {
-		dma_unmap_sg(ctx->dev->dev, sgs, 1, DMA_TO_DEVICE);
-		dma_unmap_sg(ctx->dev->dev, sgd, 1, DMA_FROM_DEVICE);
+		dma_unmap_sg(rkc->dev, sgs, 1, DMA_TO_DEVICE);
+		dma_unmap_sg(rkc->dev, sgd, 1, DMA_FROM_DEVICE);
 	}
 theend_iv:
 	return err;
-- 
GitLab


From ea389be9857721252367fd2cf81bc8068e060693 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:01 +0000
Subject: [PATCH 0563/4122] crypto: rockchip - use the rk_crypto_info given as
 parameter

Instead of using the crypto_info from TFM ctx, use the one given as parameter.

Reviewed-by: John Keeping <john@metanate.com>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_skcipher.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 6a1bea98fded1..cf0dfb6029d82 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -254,7 +254,7 @@ static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_reques
 			     RK_CRYPTO_TDES_BYTESWAP_KEY |
 			     RK_CRYPTO_TDES_BYTESWAP_IV;
 		CRYPTO_WRITE(dev, RK_CRYPTO_TDES_CTRL, rctx->mode);
-		memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
+		memcpy_toio(dev->reg + RK_CRYPTO_TDES_KEY1_0, ctx->key, ctx->keylen);
 		conf_reg = RK_CRYPTO_DESSEL;
 	} else {
 		rctx->mode |= RK_CRYPTO_AES_FIFO_MODE |
@@ -266,7 +266,7 @@ static void rk_cipher_hw_init(struct rk_crypto_info *dev, struct skcipher_reques
 		else if (ctx->keylen == AES_KEYSIZE_256)
 			rctx->mode |= RK_CRYPTO_AES_256BIT_key;
 		CRYPTO_WRITE(dev, RK_CRYPTO_AES_CTRL, rctx->mode);
-		memcpy_toio(ctx->dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
+		memcpy_toio(dev->reg + RK_CRYPTO_AES_KEY_0, ctx->key, ctx->keylen);
 	}
 	conf_reg |= RK_CRYPTO_BYTESWAP_BTFIFO |
 		    RK_CRYPTO_BYTESWAP_BRFIFO;
-- 
GitLab


From 81aaf680e85207d6521b250b2a80ba7c91cc9cbe Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:02 +0000
Subject: [PATCH 0564/4122] dt-bindings: crypto: convert rockchip-crypto to
 YAML

Convert rockchip-crypto to YAML.

Reviewed-by: John Keeping <john@metanate.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/rockchip,rk3288-crypto.yaml        | 64 +++++++++++++++++++
 .../bindings/crypto/rockchip-crypto.txt       | 28 --------
 2 files changed, 64 insertions(+), 28 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
 delete mode 100644 Documentation/devicetree/bindings/crypto/rockchip-crypto.txt

diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
new file mode 100644
index 0000000000000..8a219d439d02a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/rockchip,rk3288-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip Electronics Security Accelerator
+
+maintainers:
+  - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+  compatible:
+    enum:
+      - rockchip,rk3288-crypto
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: aclk
+      - const: hclk
+      - const: sclk
+      - const: apb_pclk
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: crypto-rst
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/rk3288-cru.h>
+    crypto@ff8a0000 {
+      compatible = "rockchip,rk3288-crypto";
+      reg = <0xff8a0000 0x4000>;
+      interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
+      clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
+               <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
+      clock-names = "aclk", "hclk", "sclk", "apb_pclk";
+      resets = <&cru SRST_CRYPTO>;
+      reset-names = "crypto-rst";
+    };
diff --git a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt b/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
deleted file mode 100644
index 5e2ba385b8c9b..0000000000000
--- a/Documentation/devicetree/bindings/crypto/rockchip-crypto.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Rockchip Electronics And Security Accelerator
-
-Required properties:
-- compatible: Should be "rockchip,rk3288-crypto"
-- reg: Base physical address of the engine and length of memory mapped
-       region
-- interrupts: Interrupt number
-- clocks: Reference to the clocks about crypto
-- clock-names: "aclk" used to clock data
-	       "hclk" used to clock data
-	       "sclk" used to clock crypto accelerator
-	       "apb_pclk" used to clock dma
-- resets: Must contain an entry for each entry in reset-names.
-	  See ../reset/reset.txt for details.
-- reset-names: Must include the name "crypto-rst".
-
-Examples:
-
-	crypto: cypto-controller@ff8a0000 {
-		compatible = "rockchip,rk3288-crypto";
-		reg = <0xff8a0000 0x4000>;
-		interrupts = <GIC_SPI 48 IRQ_TYPE_LEVEL_HIGH>;
-		clocks = <&cru ACLK_CRYPTO>, <&cru HCLK_CRYPTO>,
-			 <&cru SCLK_CRYPTO>, <&cru ACLK_DMAC1>;
-		clock-names = "aclk", "hclk", "sclk", "apb_pclk";
-		resets = <&cru SRST_CRYPTO>;
-		reset-names = "crypto-rst";
-	};
-- 
GitLab


From d1b5749687618d969c0be6428174a18a7e94ebd2 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:03 +0000
Subject: [PATCH 0565/4122] dt-bindings: crypto: rockchip: add new compatible

Since driver support new compatible, we need to update the driver bindings.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/rockchip,rk3288-crypto.yaml        | 79 +++++++++++++++++--
 1 file changed, 71 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
index 8a219d439d02a..f1a9da8bff7a7 100644
--- a/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
+++ b/Documentation/devicetree/bindings/crypto/rockchip,rk3288-crypto.yaml
@@ -13,6 +13,8 @@ properties:
   compatible:
     enum:
       - rockchip,rk3288-crypto
+      - rockchip,rk3328-crypto
+      - rockchip,rk3399-crypto
 
   reg:
     maxItems: 1
@@ -21,21 +23,82 @@ properties:
     maxItems: 1
 
   clocks:
+    minItems: 3
     maxItems: 4
 
   clock-names:
-    items:
-      - const: aclk
-      - const: hclk
-      - const: sclk
-      - const: apb_pclk
+    minItems: 3
+    maxItems: 4
 
   resets:
-    maxItems: 1
+    minItems: 1
+    maxItems: 3
 
   reset-names:
-    items:
-      - const: crypto-rst
+    minItems: 1
+    maxItems: 3
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: rockchip,rk3288-crypto
+    then:
+      properties:
+        clocks:
+          minItems: 4
+        clock-names:
+          items:
+            - const: aclk
+            - const: hclk
+            - const: sclk
+            - const: apb_pclk
+        resets:
+          maxItems: 1
+        reset-names:
+          items:
+            - const: crypto-rst
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: rockchip,rk3328-crypto
+    then:
+      properties:
+        clocks:
+          maxItems: 3
+        clock-names:
+          items:
+            - const: hclk_master
+            - const: hclk_slave
+            - const: sclk
+        resets:
+          maxItems: 1
+        reset-names:
+          items:
+            - const: crypto-rst
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: rockchip,rk3399-crypto
+    then:
+      properties:
+        clocks:
+          maxItems: 3
+        clock-names:
+          items:
+            - const: hclk_master
+            - const: hclk_slave
+            - const: sclk
+        resets:
+          minItems: 3
+        reset-names:
+          items:
+            - const: master
+            - const: slave
+            - const: crypto-rst
 
 required:
   - compatible
-- 
GitLab


From 2d3c756adcd7a7ee15b6a55cf01b363e3f134e79 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:07 +0000
Subject: [PATCH 0566/4122] crypto: rockchip - store crypto_info in request
 context

The crypto_info to use must be stored in the request context.
This will help when 2 crypto_info will be available on rk3399.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.h          |  2 ++
 drivers/crypto/rockchip/rk3288_crypto_ahash.c    | 14 ++++++--------
 drivers/crypto/rockchip/rk3288_crypto_skcipher.c |  6 ++++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 28bf09fe1c1d1..ff9fc25972ebe 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -215,6 +215,7 @@ struct rk_ahash_ctx {
 
 /* the private variable of hash for fallback */
 struct rk_ahash_rctx {
+	struct rk_crypto_info		*dev;
 	struct ahash_request		fallback_req;
 	u32				mode;
 	int nrsg;
@@ -231,6 +232,7 @@ struct rk_cipher_ctx {
 };
 
 struct rk_cipher_rctx {
+	struct rk_crypto_info		*dev;
 	u8 backup_iv[AES_BLOCK_SIZE];
 	u32				mode;
 	struct skcipher_request fallback_req;   // keep at the end
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 636dbcde0ca3f..d1bf68cb390d1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -200,6 +200,7 @@ static int rk_ahash_export(struct ahash_request *req, void *out)
 
 static int rk_ahash_digest(struct ahash_request *req)
 {
+	struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
 	struct rk_crypto_info *dev = tctx->dev;
 
@@ -209,6 +210,8 @@ static int rk_ahash_digest(struct ahash_request *req)
 	if (!req->nbytes)
 		return zero_message_process(req);
 
+	rctx->dev = dev;
+
 	return crypto_transfer_hash_request_to_engine(dev->engine, req);
 }
 
@@ -223,10 +226,8 @@ static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlis
 static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
 {
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
-	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
-	struct rk_crypto_info *rkc = tctx->dev;
+	struct rk_crypto_info *rkc = rctx->dev;
 	int ret;
 
 	ret = dma_map_sg(rkc->dev, areq->src, sg_nents(areq->src), DMA_TO_DEVICE);
@@ -241,10 +242,8 @@ static int rk_hash_prepare(struct crypto_engine *engine, void *breq)
 static int rk_hash_unprepare(struct crypto_engine *engine, void *breq)
 {
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
-	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
-	struct rk_crypto_info *rkc = tctx->dev;
+	struct rk_crypto_info *rkc = rctx->dev;
 
 	dma_unmap_sg(rkc->dev, areq->src, rctx->nrsg, DMA_TO_DEVICE);
 	return 0;
@@ -255,11 +254,10 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	struct ahash_request *areq = container_of(breq, struct ahash_request, base);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(areq);
-	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
 	struct ahash_alg *alg = __crypto_ahash_alg(tfm->base.__crt_alg);
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 	struct scatterlist *sg = areq->src;
-	struct rk_crypto_info *rkc = tctx->dev;
+	struct rk_crypto_info *rkc = rctx->dev;
 	int err = 0;
 	int i;
 	u32 v;
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index cf0dfb6029d82..0b1c90ababb71 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -86,12 +86,15 @@ static int rk_cipher_handle_req(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm);
+	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
 	struct rk_crypto_info *rkc = tctx->dev;
 	struct crypto_engine *engine = rkc->engine;
 
 	if (rk_cipher_need_fallback(req))
 		return rk_cipher_fallback(req);
 
+	rctx->dev = rkc;
+
 	return crypto_transfer_skcipher_request_to_engine(engine, req);
 }
 
@@ -290,7 +293,6 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 {
 	struct skcipher_request *areq = container_of(async_req, struct skcipher_request, base);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
-	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(areq);
 	struct scatterlist *sgs, *sgd;
 	int err = 0;
@@ -303,7 +305,7 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	unsigned int todo;
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
-	struct rk_crypto_info *rkc = ctx->dev;
+	struct rk_crypto_info *rkc = rctx->dev;
 
 	algt->stat_req++;
 
-- 
GitLab


From e220e6719438f7a99fe0a73e6e126481380202fa Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:08 +0000
Subject: [PATCH 0567/4122] crypto: rockchip - Check for clocks numbers and
 their frequencies

Add the number of clocks needed for each compatible.
Rockchip's datasheet give maximum frequencies for some clocks, so add
checks for verifying they are within limits. Let's start with rk3288 for
clock frequency check, other will came later.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 75 +++++++++++++++++++++----
 drivers/crypto/rockchip/rk3288_crypto.h | 16 +++++-
 2 files changed, 79 insertions(+), 12 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index c92559b83f7dd..232dc625d6e50 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -14,10 +14,58 @@
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/clk.h>
 #include <linux/crypto.h>
 #include <linux/reset.h>
 
+static const struct rk_variant rk3288_variant = {
+	.num_clks = 4,
+	.rkclks = {
+		{ "sclk", 150000000},
+	}
+};
+
+static const struct rk_variant rk3328_variant = {
+	.num_clks = 3,
+};
+
+static int rk_crypto_get_clks(struct rk_crypto_info *dev)
+{
+	int i, j, err;
+	unsigned long cr;
+
+	dev->num_clks = devm_clk_bulk_get_all(dev->dev, &dev->clks);
+	if (dev->num_clks < dev->variant->num_clks) {
+		dev_err(dev->dev, "Missing clocks, got %d instead of %d\n",
+			dev->num_clks, dev->variant->num_clks);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < dev->num_clks; i++) {
+		cr = clk_get_rate(dev->clks[i].clk);
+		for (j = 0; j < ARRAY_SIZE(dev->variant->rkclks); j++) {
+			if (dev->variant->rkclks[j].max == 0)
+				continue;
+			if (strcmp(dev->variant->rkclks[j].name, dev->clks[i].id))
+				continue;
+			if (cr > dev->variant->rkclks[j].max) {
+				err = clk_set_rate(dev->clks[i].clk,
+						   dev->variant->rkclks[j].max);
+				if (err)
+					dev_err(dev->dev, "Fail downclocking %s from %lu to %lu\n",
+						dev->variant->rkclks[j].name, cr,
+						dev->variant->rkclks[j].max);
+				else
+					dev_info(dev->dev, "Downclocking %s from %lu to %lu\n",
+						 dev->variant->rkclks[j].name, cr,
+						 dev->variant->rkclks[j].max);
+			}
+		}
+	}
+	return 0;
+}
+
 static int rk_crypto_enable_clk(struct rk_crypto_info *dev)
 {
 	int err;
@@ -201,8 +249,12 @@ static void rk_crypto_unregister(void)
 }
 
 static const struct of_device_id crypto_of_id_table[] = {
-	{ .compatible = "rockchip,rk3288-crypto" },
-	{ .compatible = "rockchip,rk3328-crypto" },
+	{ .compatible = "rockchip,rk3288-crypto",
+	  .data = &rk3288_variant,
+	},
+	{ .compatible = "rockchip,rk3328-crypto",
+	  .data = &rk3328_variant,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, crypto_of_id_table);
@@ -220,6 +272,15 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		goto err_crypto;
 	}
 
+	crypto_info->dev = &pdev->dev;
+	platform_set_drvdata(pdev, crypto_info);
+
+	crypto_info->variant = of_device_get_match_data(&pdev->dev);
+	if (!crypto_info->variant) {
+		dev_err(&pdev->dev, "Missing variant\n");
+		return -EINVAL;
+	}
+
 	crypto_info->rst = devm_reset_control_get(dev, "crypto-rst");
 	if (IS_ERR(crypto_info->rst)) {
 		err = PTR_ERR(crypto_info->rst);
@@ -236,12 +297,9 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		goto err_crypto;
 	}
 
-	crypto_info->num_clks = devm_clk_bulk_get_all(&pdev->dev,
-						      &crypto_info->clks);
-	if (crypto_info->num_clks < 3) {
-		err = -EINVAL;
+	err = rk_crypto_get_clks(crypto_info);
+	if (err)
 		goto err_crypto;
-	}
 
 	crypto_info->irq = platform_get_irq(pdev, 0);
 	if (crypto_info->irq < 0) {
@@ -259,9 +317,6 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		goto err_crypto;
 	}
 
-	crypto_info->dev = &pdev->dev;
-	platform_set_drvdata(pdev, crypto_info);
-
 	crypto_info->engine = crypto_engine_alloc_init(&pdev->dev, true);
 	crypto_engine_start(crypto_info->engine);
 	init_completion(&crypto_info->complete);
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index ff9fc25972ebe..ac979d67ced98 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -188,14 +188,26 @@
 #define CRYPTO_WRITE(dev, offset, val)	  \
 		writel_relaxed((val), ((dev)->reg + (offset)))
 
+#define RK_MAX_CLKS 4
+
+struct rk_clks {
+	const char *name;
+	unsigned long max;
+};
+
+struct rk_variant {
+	int num_clks;
+	struct rk_clks rkclks[RK_MAX_CLKS];
+};
+
 struct rk_crypto_info {
 	struct device			*dev;
 	struct clk_bulk_data		*clks;
-	int num_clks;
+	int				num_clks;
 	struct reset_control		*rst;
 	void __iomem			*reg;
 	int				irq;
-
+	const struct rk_variant *variant;
 	struct crypto_engine *engine;
 	struct completion complete;
 	int status;
-- 
GitLab


From 0d31b14c9e4178a129a1aa5e491e4da1489c07de Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:09 +0000
Subject: [PATCH 0568/4122] crypto: rockchip - rk_ahash_reg_init use
 crypto_info from parameter

rk_ahash_reg_init() use crypto_info from TFM context, since we will
remove it, let's take if from parameters.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index d1bf68cb390d1..30f78256c9551 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -78,12 +78,10 @@ static int zero_message_process(struct ahash_request *req)
 	return 0;
 }
 
-static void rk_ahash_reg_init(struct ahash_request *req)
+static void rk_ahash_reg_init(struct ahash_request *req,
+			      struct rk_crypto_info *dev)
 {
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
-	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct rk_ahash_ctx *tctx = crypto_ahash_ctx(tfm);
-	struct rk_crypto_info *dev = tctx->dev;
 	int reg_status;
 
 	reg_status = CRYPTO_READ(dev, RK_CRYPTO_CTRL) |
@@ -281,7 +279,7 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 		goto theend;
 	}
 
-	rk_ahash_reg_init(areq);
+	rk_ahash_reg_init(areq, rkc);
 
 	while (sg) {
 		reinit_completion(&rkc->complete);
-- 
GitLab


From c5a1e104c35e5134b6048f1e03960a6ac9c42935 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:10 +0000
Subject: [PATCH 0569/4122] crypto: rockchip - permit to have more than one
 reset

The RK3399 has 3 resets, so the driver to handle multiple resets.
This is done by using devm_reset_control_array_get_exclusive().

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 232dc625d6e50..d96f375423d59 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -281,7 +281,7 @@ static int rk_crypto_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	crypto_info->rst = devm_reset_control_get(dev, "crypto-rst");
+	crypto_info->rst = devm_reset_control_array_get_exclusive(dev);
 	if (IS_ERR(crypto_info->rst)) {
 		err = PTR_ERR(crypto_info->rst);
 		goto err_crypto;
-- 
GitLab


From 9dcd71c863a6f6476378d076d3e9189c854d49fd Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Tue, 27 Sep 2022 07:55:11 +0000
Subject: [PATCH 0570/4122] crypto: rockchip - Add support for RK3399

The RK3399 has 2 rk3288 compatible crypto device named crypto0 and
crypto1. The only difference is lack of RSA in crypto1.

We need to add driver support for 2 parallel instance as only one need
to register crypto algorithms.
Then the driver will round robin each request on each device.

For avoiding complexity (device bringup after a TFM is created), PM is
modified to be handled per request.
Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c       | 92 +++++++++++++++----
 drivers/crypto/rockchip/rk3288_crypto.h       | 25 +++--
 drivers/crypto/rockchip/rk3288_crypto_ahash.c | 37 ++++----
 .../crypto/rockchip/rk3288_crypto_skcipher.c  | 37 ++++----
 4 files changed, 123 insertions(+), 68 deletions(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index d96f375423d59..6217e73ba4c41 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -19,6 +19,23 @@
 #include <linux/crypto.h>
 #include <linux/reset.h>
 
+static struct rockchip_ip rocklist = {
+	.dev_list = LIST_HEAD_INIT(rocklist.dev_list),
+	.lock = __SPIN_LOCK_UNLOCKED(rocklist.lock),
+};
+
+struct rk_crypto_info *get_rk_crypto(void)
+{
+	struct rk_crypto_info *first;
+
+	spin_lock(&rocklist.lock);
+	first = list_first_entry_or_null(&rocklist.dev_list,
+					 struct rk_crypto_info, list);
+	list_rotate_left(&rocklist.dev_list);
+	spin_unlock(&rocklist.lock);
+	return first;
+}
+
 static const struct rk_variant rk3288_variant = {
 	.num_clks = 4,
 	.rkclks = {
@@ -30,6 +47,10 @@ static const struct rk_variant rk3328_variant = {
 	.num_clks = 3,
 };
 
+static const struct rk_variant rk3399_variant = {
+	.num_clks = 3,
+};
+
 static int rk_crypto_get_clks(struct rk_crypto_info *dev)
 {
 	int i, j, err;
@@ -83,8 +104,8 @@ static void rk_crypto_disable_clk(struct rk_crypto_info *dev)
 }
 
 /*
- * Power management strategy: The device is suspended unless a TFM exists for
- * one of the algorithms proposed by this driver.
+ * Power management strategy: The device is suspended until a request
+ * is handled. For avoiding suspend/resume yoyo, the autosuspend is set to 2s.
  */
 static int rk_crypto_pm_suspend(struct device *dev)
 {
@@ -166,8 +187,17 @@ static struct rk_crypto_tmp *rk_cipher_algs[] = {
 #ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
 static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 {
+	struct rk_crypto_info *dd;
 	unsigned int i;
 
+	spin_lock(&rocklist.lock);
+	list_for_each_entry(dd, &rocklist.dev_list, list) {
+		seq_printf(seq, "%s %s requests: %lu\n",
+			   dev_driver_string(dd->dev), dev_name(dd->dev),
+			   dd->nreq);
+	}
+	spin_unlock(&rocklist.lock);
+
 	for (i = 0; i < ARRAY_SIZE(rk_cipher_algs); i++) {
 		if (!rk_cipher_algs[i]->dev)
 			continue;
@@ -198,6 +228,18 @@ static int rk_crypto_debugfs_show(struct seq_file *seq, void *v)
 DEFINE_SHOW_ATTRIBUTE(rk_crypto_debugfs);
 #endif
 
+static void register_debugfs(struct rk_crypto_info *crypto_info)
+{
+#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
+	/* Ignore error of debugfs */
+	rocklist.dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
+	rocklist.dbgfs_stats = debugfs_create_file("stats", 0444,
+						   rocklist.dbgfs_dir,
+						   &rocklist,
+						   &rk_crypto_debugfs_fops);
+#endif
+}
+
 static int rk_crypto_register(struct rk_crypto_info *crypto_info)
 {
 	unsigned int i, k;
@@ -255,6 +297,9 @@ static const struct of_device_id crypto_of_id_table[] = {
 	{ .compatible = "rockchip,rk3328-crypto",
 	  .data = &rk3328_variant,
 	},
+	{ .compatible = "rockchip,rk3399-crypto",
+	  .data = &rk3399_variant,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, crypto_of_id_table);
@@ -262,7 +307,7 @@ MODULE_DEVICE_TABLE(of, crypto_of_id_table);
 static int rk_crypto_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct rk_crypto_info *crypto_info;
+	struct rk_crypto_info *crypto_info, *first;
 	int err = 0;
 
 	crypto_info = devm_kzalloc(&pdev->dev,
@@ -325,22 +370,22 @@ static int rk_crypto_probe(struct platform_device *pdev)
 	if (err)
 		goto err_pm;
 
-	err = rk_crypto_register(crypto_info);
-	if (err) {
-		dev_err(dev, "err in register alg");
-		goto err_register_alg;
-	}
+	spin_lock(&rocklist.lock);
+	first = list_first_entry_or_null(&rocklist.dev_list,
+					 struct rk_crypto_info, list);
+	list_add_tail(&crypto_info->list, &rocklist.dev_list);
+	spin_unlock(&rocklist.lock);
+
+	if (!first) {
+		err = rk_crypto_register(crypto_info);
+		if (err) {
+			dev_err(dev, "Fail to register crypto algorithms");
+			goto err_register_alg;
+		}
 
-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
-	/* Ignore error of debugfs */
-	crypto_info->dbgfs_dir = debugfs_create_dir("rk3288_crypto", NULL);
-	crypto_info->dbgfs_stats = debugfs_create_file("stats", 0444,
-						       crypto_info->dbgfs_dir,
-						       crypto_info,
-						       &rk_crypto_debugfs_fops);
-#endif
+		register_debugfs(crypto_info);
+	}
 
-	dev_info(dev, "Crypto Accelerator successfully registered\n");
 	return 0;
 
 err_register_alg:
@@ -355,11 +400,20 @@ err_crypto:
 static int rk_crypto_remove(struct platform_device *pdev)
 {
 	struct rk_crypto_info *crypto_tmp = platform_get_drvdata(pdev);
+	struct rk_crypto_info *first;
+
+	spin_lock_bh(&rocklist.lock);
+	list_del(&crypto_tmp->list);
+	first = list_first_entry_or_null(&rocklist.dev_list,
+					 struct rk_crypto_info, list);
+	spin_unlock_bh(&rocklist.lock);
 
+	if (!first) {
 #ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
-	debugfs_remove_recursive(crypto_tmp->dbgfs_dir);
+		debugfs_remove_recursive(rocklist.dbgfs_dir);
 #endif
-	rk_crypto_unregister();
+		rk_crypto_unregister();
+	}
 	rk_crypto_pm_exit(crypto_tmp);
 	crypto_engine_exit(crypto_tmp->engine);
 	return 0;
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index ac979d67ced98..b2695258cade9 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -190,6 +190,20 @@
 
 #define RK_MAX_CLKS 4
 
+/*
+ * struct rockchip_ip - struct for managing a list of RK crypto instance
+ * @dev_list:		Used for doing a list of rk_crypto_info
+ * @lock:		Control access to dev_list
+ * @dbgfs_dir:		Debugfs dentry for statistic directory
+ * @dbgfs_stats:	Debugfs dentry for statistic counters
+ */
+struct rockchip_ip {
+	struct list_head	dev_list;
+	spinlock_t		lock; /* Control access to dev_list */
+	struct dentry		*dbgfs_dir;
+	struct dentry		*dbgfs_stats;
+};
+
 struct rk_clks {
 	const char *name;
 	unsigned long max;
@@ -201,6 +215,7 @@ struct rk_variant {
 };
 
 struct rk_crypto_info {
+	struct list_head		list;
 	struct device			*dev;
 	struct clk_bulk_data		*clks;
 	int				num_clks;
@@ -208,19 +223,15 @@ struct rk_crypto_info {
 	void __iomem			*reg;
 	int				irq;
 	const struct rk_variant *variant;
+	unsigned long nreq;
 	struct crypto_engine *engine;
 	struct completion complete;
 	int status;
-#ifdef CONFIG_CRYPTO_DEV_ROCKCHIP_DEBUG
-	struct dentry *dbgfs_dir;
-	struct dentry *dbgfs_stats;
-#endif
 };
 
 /* the private variable of hash */
 struct rk_ahash_ctx {
 	struct crypto_engine_ctx enginectx;
-	struct rk_crypto_info		*dev;
 	/* for fallback */
 	struct crypto_ahash		*fallback_tfm;
 };
@@ -236,7 +247,6 @@ struct rk_ahash_rctx {
 /* the private variable of cipher */
 struct rk_cipher_ctx {
 	struct crypto_engine_ctx enginectx;
-	struct rk_crypto_info		*dev;
 	unsigned int			keylen;
 	u8				key[AES_MAX_KEY_SIZE];
 	u8				iv[AES_BLOCK_SIZE];
@@ -252,7 +262,7 @@ struct rk_cipher_rctx {
 
 struct rk_crypto_tmp {
 	u32 type;
-	struct rk_crypto_info		*dev;
+	struct rk_crypto_info           *dev;
 	union {
 		struct skcipher_alg	skcipher;
 		struct ahash_alg	hash;
@@ -276,4 +286,5 @@ extern struct rk_crypto_tmp rk_ahash_sha1;
 extern struct rk_crypto_tmp rk_ahash_sha256;
 extern struct rk_crypto_tmp rk_ahash_md5;
 
+struct rk_crypto_info *get_rk_crypto(void);
 #endif
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ahash.c b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
index 30f78256c9551..a78ff3dcd0b1f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ahash.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ahash.c
@@ -199,8 +199,8 @@ static int rk_ahash_export(struct ahash_request *req, void *out)
 static int rk_ahash_digest(struct ahash_request *req)
 {
 	struct rk_ahash_rctx *rctx = ahash_request_ctx(req);
-	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
-	struct rk_crypto_info *dev = tctx->dev;
+	struct rk_crypto_info *dev;
+	struct crypto_engine *engine;
 
 	if (rk_ahash_need_fallback(req))
 		return rk_ahash_digest_fb(req);
@@ -208,9 +208,12 @@ static int rk_ahash_digest(struct ahash_request *req)
 	if (!req->nbytes)
 		return zero_message_process(req);
 
+	dev = get_rk_crypto();
+
 	rctx->dev = dev;
+	engine = dev->engine;
 
-	return crypto_transfer_hash_request_to_engine(dev->engine, req);
+	return crypto_transfer_hash_request_to_engine(engine, req);
 }
 
 static void crypto_ahash_dma_start(struct rk_crypto_info *dev, struct scatterlist *sg)
@@ -260,9 +263,14 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	int i;
 	u32 v;
 
+	err = pm_runtime_resume_and_get(rkc->dev);
+	if (err)
+		return err;
+
 	rctx->mode = 0;
 
 	algt->stat_req++;
+	rkc->nreq++;
 
 	switch (crypto_ahash_digestsize(tfm)) {
 	case SHA1_DIGEST_SIZE:
@@ -313,6 +321,8 @@ static int rk_hash_run(struct crypto_engine *engine, void *breq)
 	}
 
 theend:
+	pm_runtime_put_autosuspend(rkc->dev);
+
 	local_bh_disable();
 	crypto_finalize_hash_request(engine, breq, err);
 	local_bh_enable();
@@ -323,21 +333,15 @@ theend:
 static int rk_cra_hash_init(struct crypto_tfm *tfm)
 {
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
-	struct rk_crypto_tmp *algt;
-	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
-
 	const char *alg_name = crypto_tfm_alg_name(tfm);
-	int err;
-
-	algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
-
-	tctx->dev = algt->dev;
+	struct ahash_alg *alg = __crypto_ahash_alg(tfm->__crt_alg);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.hash);
 
 	/* for fallback */
 	tctx->fallback_tfm = crypto_alloc_ahash(alg_name, 0,
 						CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(tctx->fallback_tfm)) {
-		dev_err(tctx->dev->dev, "Could not load fallback driver.\n");
+		dev_err(algt->dev->dev, "Could not load fallback driver.\n");
 		return PTR_ERR(tctx->fallback_tfm);
 	}
 
@@ -349,15 +353,7 @@ static int rk_cra_hash_init(struct crypto_tfm *tfm)
 	tctx->enginectx.op.prepare_request = rk_hash_prepare;
 	tctx->enginectx.op.unprepare_request = rk_hash_unprepare;
 
-	err = pm_runtime_resume_and_get(tctx->dev->dev);
-	if (err < 0)
-		goto error_pm;
-
 	return 0;
-error_pm:
-	crypto_free_ahash(tctx->fallback_tfm);
-
-	return err;
 }
 
 static void rk_cra_hash_exit(struct crypto_tfm *tfm)
@@ -365,7 +361,6 @@ static void rk_cra_hash_exit(struct crypto_tfm *tfm)
 	struct rk_ahash_ctx *tctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_ahash(tctx->fallback_tfm);
-	pm_runtime_put_autosuspend(tctx->dev->dev);
 }
 
 struct rk_crypto_tmp rk_ahash_sha1 = {
diff --git a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
index 0b1c90ababb71..59069457582bf 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_skcipher.c
@@ -17,11 +17,11 @@
 static int rk_cipher_need_fallback(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	unsigned int bs = crypto_skcipher_blocksize(tfm);
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 	struct scatterlist *sgs, *sgd;
 	unsigned int stodo, dtodo, len;
+	unsigned int bs = crypto_skcipher_blocksize(tfm);
 
 	if (!req->cryptlen)
 		return true;
@@ -84,15 +84,16 @@ static int rk_cipher_fallback(struct skcipher_request *areq)
 
 static int rk_cipher_handle_req(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct rk_cipher_ctx *tctx = crypto_skcipher_ctx(tfm);
 	struct rk_cipher_rctx *rctx = skcipher_request_ctx(req);
-	struct rk_crypto_info *rkc = tctx->dev;
-	struct crypto_engine *engine = rkc->engine;
+	struct rk_crypto_info *rkc;
+	struct crypto_engine *engine;
 
 	if (rk_cipher_need_fallback(req))
 		return rk_cipher_fallback(req);
 
+	rkc = get_rk_crypto();
+
+	engine = rkc->engine;
 	rctx->dev = rkc;
 
 	return crypto_transfer_skcipher_request_to_engine(engine, req);
@@ -307,7 +308,12 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 	struct rk_crypto_info *rkc = rctx->dev;
 
+	err = pm_runtime_resume_and_get(rkc->dev);
+	if (err)
+		return err;
+
 	algt->stat_req++;
+	rkc->nreq++;
 
 	ivsize = crypto_skcipher_ivsize(tfm);
 	if (areq->iv && crypto_skcipher_ivsize(tfm) > 0) {
@@ -401,6 +407,8 @@ static int rk_cipher_run(struct crypto_engine *engine, void *async_req)
 	}
 
 theend:
+	pm_runtime_put_autosuspend(rkc->dev);
+
 	local_bh_disable();
 	crypto_finalize_skcipher_request(engine, areq, err);
 	local_bh_enable();
@@ -420,18 +428,13 @@ theend_iv:
 static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
 {
 	struct rk_cipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	const char *name = crypto_tfm_alg_name(&tfm->base);
-	struct rk_crypto_tmp *algt;
-	int err;
-
-	algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
-
-	ctx->dev = algt->dev;
+	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+	struct rk_crypto_tmp *algt = container_of(alg, struct rk_crypto_tmp, alg.skcipher);
 
 	ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
 	if (IS_ERR(ctx->fallback_tfm)) {
-		dev_err(ctx->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+		dev_err(algt->dev->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
 			name, PTR_ERR(ctx->fallback_tfm));
 		return PTR_ERR(ctx->fallback_tfm);
 	}
@@ -441,14 +444,7 @@ static int rk_cipher_tfm_init(struct crypto_skcipher *tfm)
 
 	ctx->enginectx.op.do_one_request = rk_cipher_run;
 
-	err = pm_runtime_resume_and_get(ctx->dev->dev);
-	if (err < 0)
-		goto error_pm;
-
 	return 0;
-error_pm:
-	crypto_free_skcipher(ctx->fallback_tfm);
-	return err;
 }
 
 static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm)
@@ -457,7 +453,6 @@ static void rk_cipher_tfm_exit(struct crypto_skcipher *tfm)
 
 	memzero_explicit(ctx->key, ctx->keylen);
 	crypto_free_skcipher(ctx->fallback_tfm);
-	pm_runtime_put_autosuspend(ctx->dev->dev);
 }
 
 struct rk_crypto_tmp rk_ecb_aes_alg = {
-- 
GitLab


From 7984ceb134bf31aa9a597f10ed52d831d5aede14 Mon Sep 17 00:00:00 2001
From: Frederick Lawler <fred@cloudflare.com>
Date: Mon, 17 Oct 2022 14:25:00 -0500
Subject: [PATCH 0571/4122] crypto: af_alg - Support symmetric encryption via
 keyring keys

We want to leverage keyring to store sensitive keys, and then use those
keys for symmetric encryption via the crypto API. Among the key types we
wish to support are: user, logon, encrypted, and trusted.

User key types are already able to have their data copied to user space,
but logon does not support this. Further, trusted and encrypted keys will
return their encrypted data back to user space on read, which does not
make them ideal for symmetric encryption.

To support symmetric encryption for these key types, add a new
ALG_SET_KEY_BY_KEY_SERIAL setsockopt() option to the crypto API. This
allows users to pass a key_serial_t to the crypto API to perform
symmetric encryption. The behavior is the same as ALG_SET_KEY, but
the crypto key data is copied in kernel space from a keyring key,
which allows for the support of logon, encrypted, and trusted key types.

Keyring keys must have the KEY_(POS|USR|GRP|OTH)_SEARCH permission set
to leverage this feature. This follows the asymmetric_key type where key
lookup calls eventually lead to keyring_search_rcu() without the
KEYRING_SEARCH_NO_CHECK_PERM flag set.

Signed-off-by: Frederick Lawler <fred@cloudflare.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/userspace-if.rst |  15 ++-
 crypto/af_alg.c                       | 135 +++++++++++++++++++++++++-
 include/uapi/linux/if_alg.h           |   1 +
 3 files changed, 147 insertions(+), 4 deletions(-)

diff --git a/Documentation/crypto/userspace-if.rst b/Documentation/crypto/userspace-if.rst
index b45dabbf69d68..f80f243e227e6 100644
--- a/Documentation/crypto/userspace-if.rst
+++ b/Documentation/crypto/userspace-if.rst
@@ -131,9 +131,9 @@ from the kernel crypto API. If the buffer is too small for the message
 digest, the flag MSG_TRUNC is set by the kernel.
 
 In order to set a message digest key, the calling application must use
-the setsockopt() option of ALG_SET_KEY. If the key is not set the HMAC
-operation is performed without the initial HMAC state change caused by
-the key.
+the setsockopt() option of ALG_SET_KEY or ALG_SET_KEY_BY_KEY_SERIAL. If the
+key is not set the HMAC operation is performed without the initial HMAC state
+change caused by the key.
 
 Symmetric Cipher API
 --------------------
@@ -382,6 +382,15 @@ mentioned optname:
 
    -  the RNG cipher type to provide the seed
 
+- ALG_SET_KEY_BY_KEY_SERIAL -- Setting the key via keyring key_serial_t.
+   This operation behaves the same as ALG_SET_KEY. The decrypted
+   data is copied from a keyring key, and uses that data as the
+   key for symmetric encryption.
+
+   The passed in key_serial_t must have the KEY_(POS|USR|GRP|OTH)_SEARCH
+   permission set, otherwise -EPERM is returned. Supports key types: user,
+   logon, encrypted, and trusted.
+
 -  ALG_SET_AEAD_AUTHSIZE -- Setting the authentication tag size for
    AEAD ciphers. For a encryption operation, the authentication tag of
    the given size will be generated. For a decryption operation, the
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index e893c0f6c8799..0a4fa2a429e22 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -12,6 +12,8 @@
 #include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/key.h>
+#include <linux/key-type.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/net.h>
@@ -19,6 +21,10 @@
 #include <linux/sched.h>
 #include <linux/sched/signal.h>
 #include <linux/security.h>
+#include <linux/string.h>
+#include <keys/user-type.h>
+#include <keys/trusted-type.h>
+#include <keys/encrypted-type.h>
 
 struct alg_type_list {
 	const struct af_alg_type *type;
@@ -222,6 +228,129 @@ out:
 	return err;
 }
 
+#ifdef CONFIG_KEYS
+
+static const u8 *key_data_ptr_user(const struct key *key,
+				   unsigned int *datalen)
+{
+	const struct user_key_payload *ukp;
+
+	ukp = user_key_payload_locked(key);
+	if (IS_ERR_OR_NULL(ukp))
+		return ERR_PTR(-EKEYREVOKED);
+
+	*datalen = key->datalen;
+
+	return ukp->data;
+}
+
+static const u8 *key_data_ptr_encrypted(const struct key *key,
+					unsigned int *datalen)
+{
+	const struct encrypted_key_payload *ekp;
+
+	ekp = dereference_key_locked(key);
+	if (IS_ERR_OR_NULL(ekp))
+		return ERR_PTR(-EKEYREVOKED);
+
+	*datalen = ekp->decrypted_datalen;
+
+	return ekp->decrypted_data;
+}
+
+static const u8 *key_data_ptr_trusted(const struct key *key,
+				      unsigned int *datalen)
+{
+	const struct trusted_key_payload *tkp;
+
+	tkp = dereference_key_locked(key);
+	if (IS_ERR_OR_NULL(tkp))
+		return ERR_PTR(-EKEYREVOKED);
+
+	*datalen = tkp->key_len;
+
+	return tkp->key;
+}
+
+static struct key *lookup_key(key_serial_t serial)
+{
+	key_ref_t key_ref;
+
+	key_ref = lookup_user_key(serial, 0, KEY_NEED_SEARCH);
+	if (IS_ERR(key_ref))
+		return ERR_CAST(key_ref);
+
+	return key_ref_to_ptr(key_ref);
+}
+
+static int alg_setkey_by_key_serial(struct alg_sock *ask, sockptr_t optval,
+				    unsigned int optlen)
+{
+	const struct af_alg_type *type = ask->type;
+	u8 *key_data = NULL;
+	unsigned int key_datalen;
+	key_serial_t serial;
+	struct key *key;
+	const u8 *ret;
+	int err;
+
+	if (optlen != sizeof(serial))
+		return -EINVAL;
+
+	if (copy_from_sockptr(&serial, optval, optlen))
+		return -EFAULT;
+
+	key = lookup_key(serial);
+	if (IS_ERR(key))
+		return PTR_ERR(key);
+
+	down_read(&key->sem);
+
+	ret = ERR_PTR(-ENOPROTOOPT);
+	if (!strcmp(key->type->name, "user") ||
+	    !strcmp(key->type->name, "logon")) {
+		ret = key_data_ptr_user(key, &key_datalen);
+	} else if (IS_REACHABLE(CONFIG_ENCRYPTED_KEYS) &&
+			   !strcmp(key->type->name, "encrypted")) {
+		ret = key_data_ptr_encrypted(key, &key_datalen);
+	} else if (IS_REACHABLE(CONFIG_TRUSTED_KEYS) &&
+			   !strcmp(key->type->name, "trusted")) {
+		ret = key_data_ptr_trusted(key, &key_datalen);
+	}
+
+	if (IS_ERR(ret)) {
+		up_read(&key->sem);
+		return PTR_ERR(ret);
+	}
+
+	key_data = sock_kmalloc(&ask->sk, key_datalen, GFP_KERNEL);
+	if (!key_data) {
+		up_read(&key->sem);
+		return -ENOMEM;
+	}
+
+	memcpy(key_data, ret, key_datalen);
+
+	up_read(&key->sem);
+
+	err = type->setkey(ask->private, key_data, key_datalen);
+
+	sock_kzfree_s(&ask->sk, key_data, key_datalen);
+
+	return err;
+}
+
+#else
+
+static inline int alg_setkey_by_key_serial(struct alg_sock *ask,
+					   sockptr_t optval,
+					   unsigned int optlen)
+{
+	return -ENOPROTOOPT;
+}
+
+#endif
+
 static int alg_setsockopt(struct socket *sock, int level, int optname,
 			  sockptr_t optval, unsigned int optlen)
 {
@@ -242,12 +371,16 @@ static int alg_setsockopt(struct socket *sock, int level, int optname,
 
 	switch (optname) {
 	case ALG_SET_KEY:
+	case ALG_SET_KEY_BY_KEY_SERIAL:
 		if (sock->state == SS_CONNECTED)
 			goto unlock;
 		if (!type->setkey)
 			goto unlock;
 
-		err = alg_setkey(sk, optval, optlen);
+		if (optname == ALG_SET_KEY_BY_KEY_SERIAL)
+			err = alg_setkey_by_key_serial(ask, optval, optlen);
+		else
+			err = alg_setkey(sk, optval, optlen);
 		break;
 	case ALG_SET_AEAD_AUTHSIZE:
 		if (sock->state == SS_CONNECTED)
diff --git a/include/uapi/linux/if_alg.h b/include/uapi/linux/if_alg.h
index 578b18aab8215..0824fbc026a1c 100644
--- a/include/uapi/linux/if_alg.h
+++ b/include/uapi/linux/if_alg.h
@@ -52,6 +52,7 @@ struct af_alg_iv {
 #define ALG_SET_AEAD_ASSOCLEN		4
 #define ALG_SET_AEAD_AUTHSIZE		5
 #define ALG_SET_DRBG_ENTROPY		6
+#define ALG_SET_KEY_BY_KEY_SERIAL	7
 
 /* Operations */
 #define ALG_OP_DECRYPT			0
-- 
GitLab


From 3efe90af4c0c46c58dba1b306de142827153d9c0 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Oct 2022 01:17:44 +0000
Subject: [PATCH 0572/4122] crypto: hisilicon/qm - increase the memory of local
 variables

Increase the buffer to prevent stack overflow by fuzz test. The maximum
length of the qos configuration buffer is 256 bytes. Currently, the value
of the 'val buffer' is only 32 bytes. The sscanf does not check the dest
memory length. So the 'val buffer' may stack overflow.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index e3edb176d9765..5d79e9f0e7e14 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -250,7 +250,6 @@
 #define QM_QOS_MIN_CIR_B		100
 #define QM_QOS_MAX_CIR_U		6
 #define QM_QOS_MAX_CIR_S		11
-#define QM_QOS_VAL_MAX_LEN		32
 #define QM_DFX_BASE		0x0100000
 #define QM_DFX_STATE1		0x0104000
 #define QM_DFX_STATE2		0x01040C8
@@ -4612,7 +4611,7 @@ static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
 			       unsigned int *fun_index)
 {
 	char tbuf_bdf[QM_DBG_READ_LEN] = {0};
-	char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
+	char val_buf[QM_DBG_READ_LEN] = {0};
 	u32 tmp1, device, function;
 	int ret, bus;
 
-- 
GitLab


From 22d7a6c39cabab811f42cb2daed2343c87b0aca5 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Oct 2022 01:17:45 +0000
Subject: [PATCH 0573/4122] crypto: hisilicon/qm - add pci bdf number check

The pci bdf number check is added for qos written by using the pci api.
Directly get the devfn by pci_dev, so delete some redundant code.
And use the kstrtoul instead of sscanf to simplify code.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 37 ++++++++++++-----------------------
 1 file changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 5d79e9f0e7e14..80eeb966cf891 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4589,49 +4589,36 @@ err_put_dfx_access:
 	return ret;
 }
 
-static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
-{
-	int buflen = strlen(buf);
-	int ret, i;
-
-	for (i = 0; i < buflen; i++) {
-		if (!isdigit(buf[i]))
-			return -EINVAL;
-	}
-
-	ret = sscanf(buf, "%lu", val);
-	if (ret != QM_QOS_VAL_NUM)
-		return -EINVAL;
-
-	return 0;
-}
-
 static ssize_t qm_get_qos_value(struct hisi_qm *qm, const char *buf,
 			       unsigned long *val,
 			       unsigned int *fun_index)
 {
+	struct bus_type *bus_type = qm->pdev->dev.bus;
 	char tbuf_bdf[QM_DBG_READ_LEN] = {0};
 	char val_buf[QM_DBG_READ_LEN] = {0};
-	u32 tmp1, device, function;
-	int ret, bus;
+	struct pci_dev *pdev;
+	struct device *dev;
+	int ret;
 
 	ret = sscanf(buf, "%s %s", tbuf_bdf, val_buf);
 	if (ret != QM_QOS_PARAM_NUM)
 		return -EINVAL;
 
-	ret = qm_qos_value_init(val_buf, val);
+	ret = kstrtoul(val_buf, 10, val);
 	if (ret || *val == 0 || *val > QM_QOS_MAX_VAL) {
 		pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
 		return -EINVAL;
 	}
 
-	ret = sscanf(tbuf_bdf, "%u:%x:%u.%u", &tmp1, &bus, &device, &function);
-	if (ret != QM_QOS_BDF_PARAM_NUM) {
-		pci_err(qm->pdev, "input pci bdf value is error!\n");
-		return -EINVAL;
+	dev = bus_find_device_by_name(bus_type, NULL, tbuf_bdf);
+	if (!dev) {
+		pci_err(qm->pdev, "input pci bdf number is error!\n");
+		return -ENODEV;
 	}
 
-	*fun_index = PCI_DEVFN(device, function);
+	pdev = container_of(dev, struct pci_dev, dev);
+
+	*fun_index = pdev->devfn;
 
 	return 0;
 }
-- 
GitLab


From 8f82f4ae8946d665f1e38da8e2b39b929d2435b1 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 22 Oct 2022 01:17:46 +0000
Subject: [PATCH 0574/4122] crypto: hisilicon/qm - delete redundancy check

Because the permission on the VF debugfs file is "0444". So
the VF function checking is redundant in qos writing api.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 80eeb966cf891..363a02810a164 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -4632,9 +4632,6 @@ static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
 	unsigned long val;
 	int len, ret;
 
-	if (qm->fun_type == QM_HW_VF)
-		return -EINVAL;
-
 	if (*pos != 0)
 		return 0;
 
-- 
GitLab


From d6e9aa6e1ea872d1bbdf08ac78245cf8efeda19c Mon Sep 17 00:00:00 2001
From: wangjianli <wangjianli@cdjrlc.com>
Date: Sat, 22 Oct 2022 13:38:02 +0800
Subject: [PATCH 0575/4122] crypto: octeontx - fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: wangjianli <wangjianli@cdjrlc.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
index 205eacac4a348..f8aedafdfdc5f 100644
--- a/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
+++ b/drivers/crypto/marvell/octeontx/otx_cpt_hw_types.h
@@ -534,7 +534,7 @@ union otx_cptx_vqx_misc_ena_w1s {
  * Word0
  *  reserved_20_63:44 [63:20] Reserved.
  *  dbell_cnt:20 [19:0](R/W/H) Number of instruction queue 64-bit words to add
- *	to the CPT instruction doorbell count. Readback value is the the
+ *	to the CPT instruction doorbell count. Readback value is the
  *	current number of pending doorbell requests. If counter overflows
  *	CPT()_VQ()_MISC_INT[DBELL_DOVF] is set. To reset the count back to
  *	zero, write one to clear CPT()_VQ()_MISC_INT_ENA_W1C[DBELL_DOVF],
-- 
GitLab


From b39301ee1f268c89bd2a8eae257b7d2f50308598 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 26 Oct 2022 12:02:07 +0100
Subject: [PATCH 0576/4122] soundwire: qcom: remove unused SWRM_SPECIAL_CMD_ID

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221026110210.6575-4-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/qcom.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index b33d5db494a54..185d75ee81ee6 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -104,7 +104,6 @@
 #define SWRM_REG_VAL_PACK(data, dev, id, reg)	\
 			((reg) | ((id) << 16) | ((dev) << 20) | ((data) << 24))
 
-#define SWRM_SPECIAL_CMD_ID	0xF
 #define MAX_FREQ_NUM		1
 #define TIMEOUT_MS		100
 #define QCOM_SWRM_MAX_RD_LEN	0x1
-- 
GitLab


From 1cdbfd4c9dc95d9b1e6bcbeba71cfdc70732b50e Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 26 Oct 2022 12:02:08 +0100
Subject: [PATCH 0577/4122] soundwire: qcom: make reset optional for v1.6
 controller

On Some Qualcomm SOCs like sc8280xp which uses v1.6 soundwire controller
reset is not mandatory, so make this an optional one.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221026110210.6575-5-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/qcom.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 185d75ee81ee6..49e6d745e0fa5 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -1321,8 +1321,8 @@ static int qcom_swrm_probe(struct platform_device *pdev)
 	}
 
 	if (data->sw_clk_gate_required) {
-		ctrl->audio_cgcr = devm_reset_control_get_exclusive(dev, "swr_audio_cgcr");
-		if (IS_ERR_OR_NULL(ctrl->audio_cgcr)) {
+		ctrl->audio_cgcr = devm_reset_control_get_optional_exclusive(dev, "swr_audio_cgcr");
+		if (IS_ERR(ctrl->audio_cgcr)) {
 			dev_err(dev, "Failed to get cgcr reset ctrl required for SW gating\n");
 			ret = PTR_ERR(ctrl->audio_cgcr);
 			goto err_init;
-- 
GitLab


From df73f66c7dd4474a05e07f911427043bc32cff31 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 26 Oct 2022 12:02:09 +0100
Subject: [PATCH 0578/4122] dt-bindings: soundwire: qcom: add v1.7.0 support

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221026110210.6575-6-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/soundwire/qcom,sdw.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
index c85c25779e3fc..e0faed8dceac7 100644
--- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
+++ b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
@@ -13,6 +13,7 @@ board specific bus parameters.
 			"qcom,soundwire-v1.5.0"
 			"qcom,soundwire-v1.5.1"
 			"qcom,soundwire-v1.6.0"
+			"qcom,soundwire-v1.7.0"
 - reg:
 	Usage: required
 	Value type: <prop-encoded-array>
-- 
GitLab


From cf43cd33b67a291fadcd16b1ad2f435bd2e60749 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Date: Wed, 26 Oct 2022 12:02:10 +0100
Subject: [PATCH 0579/4122] soundwire: qcom: add support for v1.7 Soundwire
 Controller

This patch add support for v1.7 SoundWire Controller which has
support for Multi-EE (Execution Environment), resulting in a
new register and extending field in BUS_CTRL register.

With these updates v1.7.0 is fully supported.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221026110210.6575-7-srinivas.kandagatla@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/qcom.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
index 49e6d745e0fa5..ce00a5cf980c7 100644
--- a/drivers/soundwire/qcom.c
+++ b/drivers/soundwire/qcom.c
@@ -25,6 +25,8 @@
 
 #define SWRM_COMP_SW_RESET					0x008
 #define SWRM_COMP_STATUS					0x014
+#define SWRM_LINK_MANAGER_EE					0x018
+#define SWRM_EE_CPU						1
 #define SWRM_FRM_GEN_ENABLED					BIT(0)
 #define SWRM_COMP_HW_VERSION					0x00
 #define SWRM_COMP_CFG_ADDR					0x04
@@ -684,7 +686,14 @@ static int qcom_swrm_init(struct qcom_swrm_ctrl *ctrl)
 	u32p_replace_bits(&val, SWRM_DEF_CMD_NO_PINGS, SWRM_MCP_CFG_MAX_NUM_OF_CMD_NO_PINGS_BMSK);
 	ctrl->reg_write(ctrl, SWRM_MCP_CFG_ADDR, val);
 
-	ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START);
+	if (ctrl->version >= 0x01070000) {
+		ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU);
+		ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL,
+				SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU);
+	} else {
+		ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START);
+	}
+
 	/* Configure number of retries of a read/write cmd */
 	if (ctrl->version > 0x01050001) {
 		/* Only for versions >= 1.5.1 */
@@ -1509,7 +1518,13 @@ static int __maybe_unused swrm_runtime_resume(struct device *dev)
 	} else {
 		reset_control_reset(ctrl->audio_cgcr);
 
-		ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START);
+		if (ctrl->version >= 0x01070000) {
+			ctrl->reg_write(ctrl, SWRM_LINK_MANAGER_EE, SWRM_EE_CPU);
+			ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL,
+					SWRM_MCP_BUS_CLK_START << SWRM_EE_CPU);
+		} else {
+			ctrl->reg_write(ctrl, SWRM_MCP_BUS_CTRL, SWRM_MCP_BUS_CLK_START);
+		}
 		ctrl->reg_write(ctrl, SWRM_INTERRUPT_CLEAR,
 			SWRM_INTERRUPT_STATUS_MASTER_CLASH_DET);
 
@@ -1573,6 +1588,7 @@ static const struct of_device_id qcom_swrm_of_match[] = {
 	{ .compatible = "qcom,soundwire-v1.3.0", .data = &swrm_v1_3_data },
 	{ .compatible = "qcom,soundwire-v1.5.1", .data = &swrm_v1_5_data },
 	{ .compatible = "qcom,soundwire-v1.6.0", .data = &swrm_v1_6_data },
+	{ .compatible = "qcom,soundwire-v1.7.0", .data = &swrm_v1_5_data },
 	{/* sentinel */},
 };
 
-- 
GitLab


From 28d74fc36a3e667b51a437fbf6c45264a0c8f2db Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:28 +0200
Subject: [PATCH 0580/4122] phy: qcom-qmp: drop regulator error message

Regulator core already logs an error message in case requesting a
regulator fails so drop the mostly redundant error message from probe.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c        | 3 +--
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 +--
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c         | 3 +--
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c          | 3 +--
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c          | 3 +--
 5 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 7b434e2ee640e..998c8f80ccd80 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2816,8 +2816,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 
 	ret = qmp_combo_vreg_init(dev, cfg);
 	if (ret)
-		return dev_err_probe(dev, ret,
-				     "failed to get regulator supplies\n");
+		return ret;
 
 	num = of_get_available_child_count(dev->of_node);
 	/* do we have a rogue child node ? */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 5fdd85a1dc3ef..45c0e2958bf63 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -869,8 +869,7 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev)
 
 	ret = qmp_pcie_msm8996_vreg_init(dev, cfg);
 	if (ret)
-		return dev_err_probe(dev, ret,
-				     "failed to get regulator supplies\n");
+		return ret;
 
 	num = of_get_available_child_count(dev->of_node);
 	/* do we have a rogue child node ? */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 30838ae8f0272..dc7f8ba413b9d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2445,8 +2445,7 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 
 	ret = qmp_pcie_vreg_init(dev, cfg);
 	if (ret)
-		return dev_err_probe(dev, ret,
-				     "failed to get regulator supplies\n");
+		return ret;
 
 	num = of_get_available_child_count(dev->of_node);
 	/* do we have a rogue child node ? */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index e28c45ab74ea2..566365fbfe1ae 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -1210,8 +1210,7 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 
 	ret = qmp_ufs_vreg_init(dev, cfg);
 	if (ret)
-		return dev_err_probe(dev, ret,
-				     "failed to get regulator supplies\n");
+		return ret;
 
 	num = of_get_available_child_count(dev->of_node);
 	/* do we have a rogue child node ? */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index b0b13fb6cb59e..a0b97fd5d0a53 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2746,8 +2746,7 @@ static int qmp_usb_probe(struct platform_device *pdev)
 
 	ret = qmp_usb_vreg_init(dev, cfg);
 	if (ret)
-		return dev_err_probe(dev, ret,
-				     "failed to get regulator supplies\n");
+		return ret;
 
 	num = of_get_available_child_count(dev->of_node);
 	/* do we have a rogue child node ? */
-- 
GitLab


From 17302d3630030db09947241451f3d984bc0d3144 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:29 +0200
Subject: [PATCH 0581/4122] phy: qcom-qmp: drop superfluous comments

Drop some unnecessary or incorrect comments.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c        | 4 ----
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 3 ---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c         | 3 ---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c          | 3 ---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c          | 5 -----
 5 files changed, 18 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 998c8f80ccd80..3889dcf73c599 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1949,7 +1949,6 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 		return 0;
 	}
 
-	/* turn on regulator supplies */
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
 	if (ret) {
 		dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
@@ -2779,7 +2778,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	/* Get the specific init parameters of QMP phy */
 	combo_cfg = of_device_get_match_data(dev);
 	if (!combo_cfg)
 		return -EINVAL;
@@ -2787,7 +2785,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	usb_cfg = combo_cfg->usb_cfg;
 	cfg = usb_cfg; /* Setup clks and regulators */
 
-	/* per PHY serdes; usually located at base address */
 	usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
@@ -2796,7 +2793,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (IS_ERR(qmp->dp_com))
 		return PTR_ERR(qmp->dp_com);
 
-	/* Only two serdes for combo PHY */
 	dp_serdes = devm_platform_ioremap_resource(pdev, 2);
 	if (IS_ERR(dp_serdes))
 		return PTR_ERR(dp_serdes);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 45c0e2958bf63..8b74948eb467d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -420,7 +420,6 @@ static int qmp_pcie_msm8996_com_init(struct qmp_phy *qphy)
 		return 0;
 	}
 
-	/* turn on regulator supplies */
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
 	if (ret) {
 		dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
@@ -845,12 +844,10 @@ static int qmp_pcie_msm8996_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	/* Get the specific init parameters of QMP phy */
 	cfg = of_device_get_match_data(dev);
 	if (!cfg)
 		return -EINVAL;
 
-	/* per PHY serdes; usually located at base address */
 	serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index dc7f8ba413b9d..de04d8dd53504 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1978,7 +1978,6 @@ static int qmp_pcie_init(struct phy *phy)
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	int ret;
 
-	/* turn on regulator supplies */
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
 	if (ret) {
 		dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
@@ -2425,12 +2424,10 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	/* Get the specific init parameters of QMP phy */
 	cfg = of_device_get_match_data(dev);
 	if (!cfg)
 		return -EINVAL;
 
-	/* per PHY serdes; usually located at base address */
 	serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 566365fbfe1ae..ab69f648ee387 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -849,7 +849,6 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy)
 	void __iomem *pcs = qphy->pcs;
 	int ret;
 
-	/* turn on regulator supplies */
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
 	if (ret) {
 		dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
@@ -1194,12 +1193,10 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	/* Get the specific init parameters of QMP phy */
 	cfg = of_device_get_match_data(dev);
 	if (!cfg)
 		return -EINVAL;
 
-	/* per PHY serdes; usually located at base address */
 	serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index a0b97fd5d0a53..2c5e4041bcf9d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2120,7 +2120,6 @@ static int qmp_usb_init(struct phy *phy)
 	void __iomem *dp_com = qmp->dp_com;
 	int ret;
 
-	/* turn on regulator supplies */
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
 	if (ret) {
 		dev_err(qmp->dev, "failed to enable regulators, err=%d\n", ret);
@@ -2229,7 +2228,6 @@ static int qmp_usb_power_on(struct phy *phy)
 					cfg->rx_tbl, cfg->rx_tbl_num, 2);
 	}
 
-	/* Configure link rate, swing, etc. */
 	qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
@@ -2719,17 +2717,14 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	/* Get the specific init parameters of QMP phy */
 	cfg = of_device_get_match_data(dev);
 	if (!cfg)
 		return -EINVAL;
 
-	/* per PHY serdes; usually located at base address */
 	serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
 
-	/* per PHY dp_com; if PHY has dp_com control block */
 	if (cfg->has_phy_dp_com_ctrl) {
 		qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
 		if (IS_ERR(qmp->dp_com))
-- 
GitLab


From ccf6f83b1b0bbdcde1ec7c0a35dde014f7101507 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:30 +0200
Subject: [PATCH 0582/4122] phy: qcom-qmp-combo: drop unused in-layout
 configuration

The QMP combo PHY driver does not use the "in-layout" configuration
macro to configure registers that are typically accessed using
"regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused
feature.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 54 ++++++-----------------
 1 file changed, 14 insertions(+), 40 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 3889dcf73c599..84380852ba5b7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -69,11 +69,6 @@
 struct qmp_phy_init_tbl {
 	unsigned int offset;
 	unsigned int val;
-	/*
-	 * register part of layout ?
-	 * if yes, then offset gives index in the reg-layout
-	 */
-	bool in_layout;
 	/*
 	 * mask of lanes for which this register is written
 	 * for cases when second lane needs different values
@@ -88,14 +83,6 @@ struct qmp_phy_init_tbl {
 		.lane_mask = 0xff,	\
 	}
 
-#define QMP_PHY_INIT_CFG_L(o, v)	\
-	{				\
-		.offset = o,		\
-		.val = v,		\
-		.in_layout = true,	\
-		.lane_mask = 0xff,	\
-	}
-
 #define QMP_PHY_INIT_CFG_LANE(o, v, l)	\
 	{				\
 		.offset = o,		\
@@ -1346,7 +1333,6 @@ static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = {
 };
 
 static void qmp_combo_configure_lane(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num,
 					u8 lane_mask)
@@ -1361,19 +1347,15 @@ static void qmp_combo_configure_lane(void __iomem *base,
 		if (!(t->lane_mask & lane_mask))
 			continue;
 
-		if (t->in_layout)
-			writel(t->val, base + regs[t->offset]);
-		else
-			writel(t->val, base + t->offset);
+		writel(t->val, base + t->offset);
 	}
 }
 
 static void qmp_combo_configure(void __iomem *base,
-				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
 				   int num)
 {
-	qmp_combo_configure_lane(base, regs, tbl, num, 0xff);
+	qmp_combo_configure_lane(base, tbl, num, 0xff);
 }
 
 static int qmp_combo_serdes_init(struct qmp_phy *qphy)
@@ -1384,28 +1366,24 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy)
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 
-	qmp_combo_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num);
+	qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num);
 
 	if (cfg->type == PHY_TYPE_DP) {
 		switch (dp_opts->link_rate) {
 		case 1620:
-			qmp_combo_configure(serdes, cfg->regs,
-					       cfg->serdes_tbl_rbr,
+			qmp_combo_configure(serdes, cfg->serdes_tbl_rbr,
 					       cfg->serdes_tbl_rbr_num);
 			break;
 		case 2700:
-			qmp_combo_configure(serdes, cfg->regs,
-					       cfg->serdes_tbl_hbr,
+			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr,
 					       cfg->serdes_tbl_hbr_num);
 			break;
 		case 5400:
-			qmp_combo_configure(serdes, cfg->regs,
-					       cfg->serdes_tbl_hbr2,
+			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2,
 					       cfg->serdes_tbl_hbr2_num);
 			break;
 		case 8100:
-			qmp_combo_configure(serdes, cfg->regs,
-					       cfg->serdes_tbl_hbr3,
+			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3,
 					       cfg->serdes_tbl_hbr3_num);
 			break;
 		default:
@@ -2069,29 +2047,25 @@ static int qmp_combo_power_on(struct phy *phy)
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_combo_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_combo_configure_lane(qphy->tx2, cfg->regs, cfg->tx_tbl,
-					 cfg->tx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
 	/* Configure special DP tx tunings */
 	if (cfg->type == PHY_TYPE_DP)
 		cfg->configure_dp_tx(qphy);
 
-	qmp_combo_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+	qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_combo_configure_lane(qphy->rx2, cfg->regs, cfg->rx_tbl,
-					 cfg->rx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
 	/* Configure link rate, swing, etc. */
 	if (cfg->type == PHY_TYPE_DP)
 		cfg->configure_dp_phy(qphy);
 	else
-		qmp_combo_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+		qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
 		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
-- 
GitLab


From f2175762b4ed90048b739b32a739a2df790d4e13 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:31 +0200
Subject: [PATCH 0583/4122] phy: qcom-qmp-pcie: drop redundant ipq8074 power on

The PCS initialisation table for IPQ8074 includes updates of the reset
and start-control registers which is already handled explicitly by the
driver during power on.

Drop the redundant register write from the IPQ8074 configuration table
and along with it the now unused "in-layout" configuration macro and
code.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 42 +++++-------------------
 1 file changed, 9 insertions(+), 33 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index de04d8dd53504..fa8bc6aeedf14 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -42,11 +42,6 @@
 struct qmp_phy_init_tbl {
 	unsigned int offset;
 	unsigned int val;
-	/*
-	 * register part of layout ?
-	 * if yes, then offset gives index in the reg-layout
-	 */
-	bool in_layout;
 	/*
 	 * mask of lanes for which this register is written
 	 * for cases when second lane needs different values
@@ -61,14 +56,6 @@ struct qmp_phy_init_tbl {
 		.lane_mask = 0xff,	\
 	}
 
-#define QMP_PHY_INIT_CFG_L(o, v)	\
-	{				\
-		.offset = o,		\
-		.val = v,		\
-		.in_layout = true,	\
-		.lane_mask = 0xff,	\
-	}
-
 #define QMP_PHY_INIT_CFG_LANE(o, v, l)	\
 	{				\
 		.offset = o,		\
@@ -388,8 +375,6 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V2_PCS_RX_SIGDET_LVL, 0x99),
 	QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M6DB_V0, 0x15),
 	QMP_PHY_INIT_CFG(QPHY_V2_PCS_TXDEEMPH_M3P5DB_V0, 0xe),
-	QMP_PHY_INIT_CFG_L(QPHY_SW_RESET, 0x0),
-	QMP_PHY_INIT_CFG_L(QPHY_START_CTRL, 0x3),
 };
 
 static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_serdes_tbl[] = {
@@ -1896,7 +1881,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 };
 
 static void qmp_pcie_configure_lane(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num,
 					u8 lane_mask)
@@ -1911,30 +1895,25 @@ static void qmp_pcie_configure_lane(void __iomem *base,
 		if (!(t->lane_mask & lane_mask))
 			continue;
 
-		if (t->in_layout)
-			writel(t->val, base + regs[t->offset]);
-		else
-			writel(t->val, base + t->offset);
+		writel(t->val, base + t->offset);
 	}
 }
 
 static void qmp_pcie_configure(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num)
 {
-	qmp_pcie_configure_lane(base, regs, tbl, num, 0xff);
+	qmp_pcie_configure_lane(base, tbl, num, 0xff);
 }
 
 static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *serdes = qphy->serdes;
 
 	if (!tables)
 		return;
 
-	qmp_pcie_configure(serdes, cfg->regs, tables->serdes, tables->serdes_num);
+	qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num);
 }
 
 static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
@@ -1946,29 +1925,26 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t
 	if (!tables)
 		return;
 
-	qmp_pcie_configure_lane(tx, cfg->regs, tables->tx, tables->tx_num, 1);
+	qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qphy->tx2, cfg->regs, tables->tx, tables->tx_num, 2);
+		qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2);
 
-	qmp_pcie_configure_lane(rx, cfg->regs, tables->rx, tables->rx_num, 1);
+	qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1);
 	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qphy->rx2, cfg->regs, tables->rx, tables->rx_num, 2);
+		qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2);
 }
 
 static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *pcs_misc = qphy->pcs_misc;
 
 	if (!tables)
 		return;
 
-	qmp_pcie_configure(pcs, cfg->regs,
-			   tables->pcs, tables->pcs_num);
-	qmp_pcie_configure(pcs_misc, cfg->regs,
-			   tables->pcs_misc, tables->pcs_misc_num);
+	qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num);
+	qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num);
 }
 
 static int qmp_pcie_init(struct phy *phy)
-- 
GitLab


From d3ef88635e318a7cc7e2fc26a58b4e8b56c9fb9b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:32 +0200
Subject: [PATCH 0584/4122] phy: qcom-qmp-pcie-msm8996: drop unused in-layout
 configuration

The MSM8996 QMP PCIe PHY driver no longer uses the "in-layout"
configuration macro to configure registers that are typically accessed
using "regs_layout" arrays (e.g. QPHY_START_CTRL) so drop this unused
feature.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c  | 34 ++++---------------
 1 file changed, 6 insertions(+), 28 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 8b74948eb467d..31ac405d3785a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -46,11 +46,6 @@
 struct qmp_phy_init_tbl {
 	unsigned int offset;
 	unsigned int val;
-	/*
-	 * register part of layout ?
-	 * if yes, then offset gives index in the reg-layout
-	 */
-	bool in_layout;
 	/*
 	 * mask of lanes for which this register is written
 	 * for cases when second lane needs different values
@@ -65,14 +60,6 @@ struct qmp_phy_init_tbl {
 		.lane_mask = 0xff,	\
 	}
 
-#define QMP_PHY_INIT_CFG_L(o, v)	\
-	{				\
-		.offset = o,		\
-		.val = v,		\
-		.in_layout = true,	\
-		.lane_mask = 0xff,	\
-	}
-
 #define QMP_PHY_INIT_CFG_LANE(o, v, l)	\
 	{				\
 		.offset = o,		\
@@ -346,7 +333,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
 };
 
 static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num,
 					u8 lane_mask)
@@ -361,19 +347,15 @@ static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
 		if (!(t->lane_mask & lane_mask))
 			continue;
 
-		if (t->in_layout)
-			writel(t->val, base + regs[t->offset]);
-		else
-			writel(t->val, base + t->offset);
+		writel(t->val, base + t->offset);
 	}
 }
 
 static void qmp_pcie_msm8996_configure(void __iomem *base,
-				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
 				   int num)
 {
-	qmp_pcie_msm8996_configure_lane(base, regs, tbl, num, 0xff);
+	qmp_pcie_msm8996_configure_lane(base, tbl, num, 0xff);
 }
 
 static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
@@ -387,7 +369,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
 	unsigned int mask, val;
 	int ret;
 
-	qmp_pcie_msm8996_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num);
+	qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num);
 
 	qphy_clrbits(serdes, cfg->regs[QPHY_COM_SW_RESET], SW_RESET);
 	qphy_setbits(serdes, cfg->regs[QPHY_COM_START_CONTROL],
@@ -531,13 +513,9 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_pcie_msm8996_configure_lane(tx, cfg->regs, cfg->tx_tbl,
-					cfg->tx_tbl_num, 1);
-
-	qmp_pcie_msm8996_configure_lane(rx, cfg->regs, cfg->rx_tbl,
-					cfg->rx_tbl_num, 1);
-
-	qmp_pcie_msm8996_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+	qmp_pcie_msm8996_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+	qmp_pcie_msm8996_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+	qmp_pcie_msm8996_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	/*
 	 * Pull out PHY from POWER DOWN state.
-- 
GitLab


From 91496846a9e863f7caa2db4a828844746b6f6b32 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:33 +0200
Subject: [PATCH 0585/4122] phy: qcom-qmp-ufs: drop unused in-layout
 configuration

The QMP UFS PHY driver does not use the "in-layout" configuration macro
to configure registers that are typically accessed using "regs_layout"
arrays (e.g. QPHY_START_CTRL) so drop this unused feature.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 42 ++++++-------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index ab69f648ee387..02931b82132f7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -41,11 +41,6 @@
 struct qmp_phy_init_tbl {
 	unsigned int offset;
 	unsigned int val;
-	/*
-	 * register part of layout ?
-	 * if yes, then offset gives index in the reg-layout
-	 */
-	bool in_layout;
 	/*
 	 * mask of lanes for which this register is written
 	 * for cases when second lane needs different values
@@ -60,14 +55,6 @@ struct qmp_phy_init_tbl {
 		.lane_mask = 0xff,	\
 	}
 
-#define QMP_PHY_INIT_CFG_L(o, v)	\
-	{				\
-		.offset = o,		\
-		.val = v,		\
-		.in_layout = true,	\
-		.lane_mask = 0xff,	\
-	}
-
 #define QMP_PHY_INIT_CFG_LANE(o, v, l)	\
 	{				\
 		.offset = o,		\
@@ -800,7 +787,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
 };
 
 static void qmp_ufs_configure_lane(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num,
 					u8 lane_mask)
@@ -815,19 +801,15 @@ static void qmp_ufs_configure_lane(void __iomem *base,
 		if (!(t->lane_mask & lane_mask))
 			continue;
 
-		if (t->in_layout)
-			writel(t->val, base + regs[t->offset]);
-		else
-			writel(t->val, base + t->offset);
+		writel(t->val, base + t->offset);
 	}
 }
 
 static void qmp_ufs_configure(void __iomem *base,
-				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
 				   int num)
 {
-	qmp_ufs_configure_lane(base, regs, tbl, num, 0xff);
+	qmp_ufs_configure_lane(base, tbl, num, 0xff);
 }
 
 static int qmp_ufs_serdes_init(struct qmp_phy *qphy)
@@ -837,7 +819,7 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy)
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 
-	qmp_ufs_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num);
+	qmp_ufs_configure(serdes, serdes_tbl, serdes_tbl_num);
 
 	return 0;
 }
@@ -941,21 +923,17 @@ static int qmp_ufs_power_on(struct phy *phy)
 	qmp_ufs_serdes_init(qphy);
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_ufs_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+	qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_ufs_configure_lane(qphy->tx2, cfg->regs,
-					cfg->tx_tbl, cfg->tx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
-	qmp_ufs_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+	qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_ufs_configure_lane(qphy->rx2, cfg->regs,
-					cfg->rx_tbl, cfg->rx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
-	qmp_ufs_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+	qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	ret = reset_control_deassert(qmp->ufs_reset);
 	if (ret)
-- 
GitLab


From 9d452c3ac257d36740580e5ce2b899bfca99fd62 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:34 +0200
Subject: [PATCH 0586/4122] phy: qcom-qmp-usb: drop unused in-layout
 configuration

The QMP USB PHY driver does not use the "in-layout" configuration macro
to configure registers that are typically accessed using "regs_layout"
arrays (e.g. QPHY_START_CTRL) so drop this unused feature.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012081241.18273-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 42 ++++++-------------------
 1 file changed, 10 insertions(+), 32 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 2c5e4041bcf9d..3aab9ea900783 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -69,11 +69,6 @@
 struct qmp_phy_init_tbl {
 	unsigned int offset;
 	unsigned int val;
-	/*
-	 * register part of layout ?
-	 * if yes, then offset gives index in the reg-layout
-	 */
-	bool in_layout;
 	/*
 	 * mask of lanes for which this register is written
 	 * for cases when second lane needs different values
@@ -88,14 +83,6 @@ struct qmp_phy_init_tbl {
 		.lane_mask = 0xff,	\
 	}
 
-#define QMP_PHY_INIT_CFG_L(o, v)	\
-	{				\
-		.offset = o,		\
-		.val = v,		\
-		.in_layout = true,	\
-		.lane_mask = 0xff,	\
-	}
-
 #define QMP_PHY_INIT_CFG_LANE(o, v, l)	\
 	{				\
 		.offset = o,		\
@@ -2069,7 +2056,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
 };
 
 static void qmp_usb_configure_lane(void __iomem *base,
-					const unsigned int *regs,
 					const struct qmp_phy_init_tbl tbl[],
 					int num,
 					u8 lane_mask)
@@ -2084,19 +2070,15 @@ static void qmp_usb_configure_lane(void __iomem *base,
 		if (!(t->lane_mask & lane_mask))
 			continue;
 
-		if (t->in_layout)
-			writel(t->val, base + regs[t->offset]);
-		else
-			writel(t->val, base + t->offset);
+		writel(t->val, base + t->offset);
 	}
 }
 
 static void qmp_usb_configure(void __iomem *base,
-				   const unsigned int *regs,
 				   const struct qmp_phy_init_tbl tbl[],
 				   int num)
 {
-	qmp_usb_configure_lane(base, regs, tbl, num, 0xff);
+	qmp_usb_configure_lane(base, tbl, num, 0xff);
 }
 
 static int qmp_usb_serdes_init(struct qmp_phy *qphy)
@@ -2106,7 +2088,7 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy)
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 
-	qmp_usb_configure(serdes, cfg->regs, serdes_tbl, serdes_tbl_num);
+	qmp_usb_configure(serdes, serdes_tbl, serdes_tbl_num);
 
 	return 0;
 }
@@ -2214,21 +2196,17 @@ static int qmp_usb_power_on(struct phy *phy)
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_usb_configure_lane(tx, cfg->regs, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+	qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_usb_configure_lane(qphy->tx2, cfg->regs,
-					cfg->tx_tbl, cfg->tx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
-	qmp_usb_configure_lane(rx, cfg->regs, cfg->rx_tbl, cfg->rx_tbl_num, 1);
+	qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-	if (cfg->lanes >= 2) {
-		qmp_usb_configure_lane(qphy->rx2, cfg->regs,
-					cfg->rx_tbl, cfg->rx_tbl_num, 2);
-	}
+	if (cfg->lanes >= 2)
+		qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
-	qmp_usb_configure(pcs, cfg->regs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+	qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
 		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
-- 
GitLab


From e71906144b432135b483e228d65be59fbb44c310 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:35 +0200
Subject: [PATCH 0587/4122] phy: qcom-qmp-pcie: drop power-down delay config

The power-down delay was included in the first version of the QMP driver
as an optional delay after powering on the PHY (using
POWER_DOWN_CONTROL) and just before starting it. Later changes modified
this sequence by powering on before initialising the PHY, but the
optional delay stayed where it was (i.e. before starting the PHY).

The vendor driver does not use a delay before starting the PHY and this
is likely not needed on any platform unless there is a corresponding
delay in the vendor kernel init sequence tables (i.e. in devicetree).

Let's keep the delay for now, but drop the redundant delay period
configuration while increasing the unnecessarily low timer slack
somewhat.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 27 +-----------------------
 1 file changed, 1 insertion(+), 26 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index fa8bc6aeedf14..315de484f8759 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1362,9 +1362,6 @@ struct qmp_phy_cfg {
 
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
-	/* power_down delay in usec */
-	int pwrdn_delay_min;
-	int pwrdn_delay_max;
 
 	/* QMP PHY pipe clock interface rate */
 	unsigned long pipe_clock_rate;
@@ -1500,8 +1497,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
@@ -1529,8 +1524,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 
 	.pipe_clock_rate	= 250000000,
 };
@@ -1562,8 +1555,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
@@ -1594,8 +1585,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
@@ -1624,8 +1613,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
@@ -1666,8 +1653,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
@@ -1708,8 +1693,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
@@ -1765,8 +1748,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
@@ -1797,8 +1778,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.phy_status		= PHYSTATUS_4_20,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
@@ -1829,8 +1808,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
@@ -1876,8 +1853,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 	.phy_status		= PHYSTATUS_4_20,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= 995,		/* us */
-	.pwrdn_delay_max	= 1005,		/* us */
 };
 
 static void qmp_pcie_configure_lane(void __iomem *base,
@@ -2037,7 +2012,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qmp_pcie_pcs_init(qphy, mode_tables);
 
 	if (cfg->has_pwrdn_delay)
-		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
+		usleep_range(1000, 1200);
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
-- 
GitLab


From 51bd33069f80705aba5f4725287bc5688ca6d92a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:36 +0200
Subject: [PATCH 0588/4122] phy: qcom-qmp-pcie: replace power-down delay

The power-down delay was included in the first version of the QMP driver
as an optional delay after powering on the PHY (using
POWER_DOWN_CONTROL) and just before starting it. Later changes modified
this sequence by powering on before initialising the PHY, but the
optional delay stayed where it was (i.e. before starting the PHY).

The vendor driver does not use a delay before starting the PHY and this
is likely not needed on any platform unless there is a corresponding
delay in the vendor kernel init sequence tables (i.e. in devicetree).

But as the vendor kernel do have a 1 ms delay *after* starting the PHY
and before starting to poll the status it is possible that later
contributors have simply not noticed that the mainline power-down delay
is not equivalent.

As the current delay before even starting the PHY is pretty much
pointless and likely a mistake, move the delay after starting the PHY
which avoids a few iterations of polling and speeds up startup by 1 ms
(the poll loop otherwise takes about 1.8 ms).

Note that MSM8998 has never used a power-down delay so add a flag to
skip the delay in case starting the PHY is faster on MSM8998. This can
be removed after someone takes a measurement.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012081241.18273-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 33 +++++-------------------
 1 file changed, 6 insertions(+), 27 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 315de484f8759..151baa63e8e87 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1360,8 +1360,7 @@ struct qmp_phy_cfg {
 	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
 	unsigned int phy_status;
 
-	/* true, if PHY needs delay after POWER_DOWN */
-	bool has_pwrdn_delay;
+	bool skip_start_delay;
 
 	/* QMP PHY pipe clock interface rate */
 	unsigned long pipe_clock_rate;
@@ -1495,8 +1494,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
@@ -1523,8 +1520,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 
-	.has_pwrdn_delay	= true,
-
 	.pipe_clock_rate	= 250000000,
 };
 
@@ -1553,8 +1548,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
@@ -1583,8 +1576,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
@@ -1611,8 +1602,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
@@ -1651,8 +1640,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
@@ -1691,8 +1678,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
@@ -1719,6 +1704,8 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
+
+	.skip_start_delay	= true,
 };
 
 static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
@@ -1746,8 +1733,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
@@ -1776,8 +1761,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS_4_20,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
@@ -1806,8 +1789,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
 };
 
 static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
@@ -1851,8 +1832,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS_4_20,
-
-	.has_pwrdn_delay	= true,
 };
 
 static void qmp_pcie_configure_lane(void __iomem *base,
@@ -2011,15 +1990,15 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qmp_pcie_pcs_init(qphy, &cfg->tables);
 	qmp_pcie_pcs_init(qphy, mode_tables);
 
-	if (cfg->has_pwrdn_delay)
-		usleep_range(1000, 1200);
-
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* start SerDes and Phy-Coding-Sublayer */
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
+	if (!cfg->skip_start_delay)
+		usleep_range(1000, 1200);
+
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
 	mask = cfg->phy_status;
 	ready = 0;
-- 
GitLab


From abc0841666b9ab6568229e6b9816505c987d8a59 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:37 +0200
Subject: [PATCH 0589/4122] phy: qcom-qmp-pcie-msm8996: drop power-down delay
 config

The power-down delay was included in the first version of the QMP driver
for MSM8996 as an optional delay after powering on the PHY (using
POWER_DOWN_CONTROL) and just before starting it. Later changes modified
this sequence by powering on before initialising the PHY, but the
optional delay stayed where it was (i.e. before starting the PHY).

The vendor driver does not use a delay before starting the PHY and this
is likely not needed on any platform unless there is a corresponding
delay in the vendor kernel init sequence tables (i.e. in devicetree).

Let's keep the delay for now, but drop the redundant configuration
options while increasing the unnecessarily low timer slack somewhat.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 31ac405d3785a..899be7bd4d92d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -41,7 +41,7 @@
 
 #define PHY_INIT_COMPLETE_TIMEOUT		10000
 #define POWER_DOWN_DELAY_US_MIN			10
-#define POWER_DOWN_DELAY_US_MAX			11
+#define POWER_DOWN_DELAY_US_MAX			20
 
 struct qmp_phy_init_tbl {
 	unsigned int offset;
@@ -203,12 +203,6 @@ struct qmp_phy_cfg {
 	unsigned int mask_com_pcs_ready;
 	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
 	unsigned int phy_status;
-
-	/* true, if PHY needs delay after POWER_DOWN */
-	bool has_pwrdn_delay;
-	/* power_down delay in usec */
-	int pwrdn_delay_min;
-	int pwrdn_delay_max;
 };
 
 /**
@@ -326,10 +320,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.mask_com_pcs_ready	= PCS_READY,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
@@ -523,8 +513,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	 */
 	qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
 
-	if (cfg->has_pwrdn_delay)
-		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
+	usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX);
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
-- 
GitLab


From d71eb7083e5eea8ddddab52e9b57a9783603a95f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:38 +0200
Subject: [PATCH 0590/4122] phy: qcom-qmp-combo: drop sc8280xp power-down delay

The SC8280XP combo PHY does not need a delay before starting the USB PHY
(which is what the has_pwrdn_delay config option really controls) so
drop the unnecessary delay.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012081241.18273-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 84380852ba5b7..a8e09333072e9 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1210,10 +1210,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
-- 
GitLab


From acfee73b635bca04e8a942a162bae0c0cd84b796 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:39 +0200
Subject: [PATCH 0591/4122] phy: qcom-qmp-combo: drop power-down delay config

The power-down delay was included in the first version of the QMP driver
as an optional delay after powering on the PHY (using
POWER_DOWN_CONTROL) and just before starting it. Later changes modified
this sequence by powering on before initialising the PHY, but the
optional delay stayed where it was (i.e. before starting the PHY).

The vendor driver does not use a delay before starting the PHY and this
is likely not needed on any platform unless there is a corresponding
delay in the vendor kernel init sequence tables (i.e. in devicetree).

Let's keep the delay for now, but drop the redundant delay period
configuration while increasing the unnecessarily low timer slack
somewhat.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 +---------------
 1 file changed, 1 insertion(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index a8e09333072e9..82055d3a3536b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -63,8 +63,6 @@
 #define CLAMP_EN				BIT(0) /* enables i/o clamp_n */
 
 #define PHY_INIT_COMPLETE_TIMEOUT		10000
-#define POWER_DOWN_DELAY_US_MIN			10
-#define POWER_DOWN_DELAY_US_MAX			11
 
 struct qmp_phy_init_tbl {
 	unsigned int offset;
@@ -860,9 +858,6 @@ struct qmp_phy_cfg {
 
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
-	/* power_down delay in usec */
-	int pwrdn_delay_min;
-	int pwrdn_delay_max;
 
 	/* Offset from PCS to PCS_USB region */
 	unsigned int pcs_usb_offset;
@@ -1031,8 +1026,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
@@ -1102,8 +1095,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = {
@@ -1138,10 +1129,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS,
 
-
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
@@ -1282,8 +1270,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
@@ -2064,7 +2050,7 @@ static int qmp_combo_power_on(struct phy *phy)
 		qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
-		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
+		usleep_range(10, 20);
 
 	if (cfg->type != PHY_TYPE_DP) {
 		/* Pull PHY out of reset state */
-- 
GitLab


From 898ab85d6b1e8f6271d180c47ef8a024dea9e357 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:40 +0200
Subject: [PATCH 0592/4122] phy: qcom-qmp-usb: drop sc8280xp power-down delay

The SC8280XP PHY does not need a delay before starting the PHY (which is
what the has_pwrdn_delay config option really controls) so drop the
unnecessary delay.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012081241.18273-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 3aab9ea900783..57dda1ecefe69 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1718,10 +1718,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS,
-
-	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
-- 
GitLab


From 38cd167d1fc6b5bf038229b1fa02bb1f551a564f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:12:41 +0200
Subject: [PATCH 0593/4122] phy: qcom-qmp-usb: drop power-down delay config

The power-down delay was included in the first version of the QMP driver
as an optional delay after powering on the PHY (using
POWER_DOWN_CONTROL) and just before starting it. Later changes modified
this sequence by powering on before initialising the PHY, but the
optional delay stayed where it was (i.e. before starting the PHY).

The vendor driver does not use a delay before starting the PHY and this
is likely not needed on any platform unless there is a corresponding
delay in the vendor kernel init sequence tables (i.e. in devicetree).

Let's keep the delay for now, but drop the redundant delay period
configuration while increasing the unnecessarily low timer slack
somewhat.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012081241.18273-15-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 35 +------------------------
 1 file changed, 1 insertion(+), 34 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 57dda1ecefe69..751f628710eb5 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -63,8 +63,6 @@
 #define CLAMP_EN				BIT(0) /* enables i/o clamp_n */
 
 #define PHY_INIT_COMPLETE_TIMEOUT		10000
-#define POWER_DOWN_DELAY_US_MIN			10
-#define POWER_DOWN_DELAY_US_MAX			11
 
 struct qmp_phy_init_tbl {
 	unsigned int offset;
@@ -1452,9 +1450,6 @@ struct qmp_phy_cfg {
 
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
-	/* power_down delay in usec */
-	int pwrdn_delay_min;
-	int pwrdn_delay_max;
 
 	/* true, if PHY has a separate DP_COM control block */
 	bool has_phy_dp_com_ctrl;
@@ -1660,9 +1655,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
-
 	.has_phy_dp_com_ctrl	= true,
 };
 
@@ -1690,9 +1682,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
-
 	.has_phy_dp_com_ctrl	= true,
 };
 
@@ -1744,8 +1733,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg msm8998_usb3phy_cfg = {
@@ -1798,11 +1785,7 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS,
 
-
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
-
 	.has_phy_dp_com_ctrl	= true,
 };
 
@@ -1833,8 +1816,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
@@ -1864,9 +1845,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
-
 	.has_phy_dp_com_ctrl	= true,
 };
 
@@ -1897,8 +1875,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
@@ -1928,8 +1904,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
@@ -1959,8 +1933,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
@@ -1990,9 +1962,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
-
 	.has_phy_dp_com_ctrl	= true,
 };
 
@@ -2023,8 +1992,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
 	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
-	.pwrdn_delay_min	= POWER_DOWN_DELAY_US_MIN,
-	.pwrdn_delay_max	= POWER_DOWN_DELAY_US_MAX,
 };
 
 static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
@@ -2205,7 +2172,7 @@ static int qmp_usb_power_on(struct phy *phy)
 	qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
-		usleep_range(cfg->pwrdn_delay_min, cfg->pwrdn_delay_max);
+		usleep_range(10, 20);
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
-- 
GitLab


From 0983529d7513e5417a5010f70582e1040c404551 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Fri, 14 Oct 2022 00:08:41 +0530
Subject: [PATCH 0594/4122] phy: tegra: p2u: Set ENABLE_L2_EXIT_RATE_CHANGE in
 calibration

Set ENABLE_L2_EXIT_RATE_CHANGE register bit to request UPHY PLL rate change
to Gen1 during initialization. This helps in the below surprise link down
cases,
  - Surprise link down happens at Gen3/Gen4 link speed.
  - Surprise link down happens and external REFCLK is cut off, which causes
UPHY PLL rate to deviate to an invalid rate.

Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Link: https://lore.kernel.org/r/20221013183854.21087-9-vidyas@nvidia.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/tegra/phy-tegra194-p2u.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/phy/tegra/phy-tegra194-p2u.c b/drivers/phy/tegra/phy-tegra194-p2u.c
index 1415ca71de382..633e6b7472759 100644
--- a/drivers/phy/tegra/phy-tegra194-p2u.c
+++ b/drivers/phy/tegra/phy-tegra194-p2u.c
@@ -15,6 +15,7 @@
 #include <linux/phy/phy.h>
 
 #define P2U_CONTROL_CMN			0x74
+#define P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE		BIT(13)
 #define P2U_CONTROL_CMN_SKP_SIZE_PROTECTION_EN			BIT(20)
 
 #define P2U_PERIODIC_EQ_CTRL_GEN3	0xc0
@@ -85,8 +86,21 @@ static int tegra_p2u_power_on(struct phy *x)
 	return 0;
 }
 
+static int tegra_p2u_calibrate(struct phy *x)
+{
+	struct tegra_p2u *phy = phy_get_drvdata(x);
+	u32 val;
+
+	val = p2u_readl(phy, P2U_CONTROL_CMN);
+	val |= P2U_CONTROL_CMN_ENABLE_L2_EXIT_RATE_CHANGE;
+	p2u_writel(phy, val, P2U_CONTROL_CMN);
+
+	return 0;
+}
+
 static const struct phy_ops ops = {
 	.power_on = tegra_p2u_power_on,
+	.calibrate = tegra_p2u_calibrate,
 	.owner = THIS_MODULE,
 };
 
-- 
GitLab


From 4a9eac5ae2200f1b208dd33738777f89f93dc0fe Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:43 +0200
Subject: [PATCH 0595/4122] phy: qcom-qmp-pcie: fix sc8180x initialisation

The phy_status mask was never set for SC8180X which meant that the
driver would not wait for the PHY to be initialised during power-on and
would never detect PHY initialisation timeouts.

Fixes: f839f14e24f2 ("phy: qcom-qmp: Add sc8180x PCIe support")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-1-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 151baa63e8e87..a7677b61f96e9 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1733,6 +1733,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 
 	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
-- 
GitLab


From 94b7288eadf6e2c09e6280c65a9d07cca01bf434 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:44 +0200
Subject: [PATCH 0596/4122] phy: qcom-qmp-pcie: fix ipq8074-gen3 initialisation

The phy_status mask was never set for IPQ8074 (gen3) which meant that
the driver would not wait for the PHY to be initialised during power-on
and would never detect PHY initialisation timeouts.

Fixes: 334fad185415 ("phy: qcom-qmp-pcie: add IPQ8074 PCIe Gen3 QMP PHY support")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index a7677b61f96e9..f1e94b879d318 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1519,6 +1519,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
 
 	.pipe_clock_rate	= 250000000,
 };
-- 
GitLab


From 30518b19895789aa9101474af2ee0f62cd882d5e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:45 +0200
Subject: [PATCH 0597/4122] phy: qcom-qmp-pcie: fix ipq6018 initialisation

The phy_status mask was never set for IPQ6018 which meant that the
driver would not wait for the PHY to be initialised during power-on and
would never detect PHY initialisation timeouts.

Fixes: 520264db3bf9 ("phy: qcom-qmp: add QMP V2 PCIe PHY support for ipq60xx")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index f1e94b879d318..9b866ed19ddc8 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1549,6 +1549,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
-- 
GitLab


From 2577ba8c39dafe4320e1eb206b732e08bf871c83 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:46 +0200
Subject: [PATCH 0598/4122] phy: qcom-qmp-pcie: clean up status polling

Clean up the status polling by dropping the ready bit mask which is no
longer needed since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 9b866ed19ddc8..4af6b9e50d16f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1966,7 +1966,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 	const struct qmp_phy_cfg_tables *mode_tables;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
-	unsigned int mask, val, ready;
+	unsigned int mask, val;
 	int ret;
 
 	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
@@ -2004,9 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
 	mask = cfg->phy_status;
-	ready = 0;
-
-	ret = readl_poll_timeout(status, val, (val & mask) == ready, 10,
+	ret = readl_poll_timeout(status, val, !(val & mask), 10,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 5cbeb75a36aa28353e51f5e6926e19449a4f3389 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:47 +0200
Subject: [PATCH 0599/4122] phy: qcom-qmp-pcie: increase status polling period

It typically takes between one and two milliseconds for the PHY to
become ready after starting it. Increase the tight 3--10 us polling
period to the more reasonable 51--200 us.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012085002.24099-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 4af6b9e50d16f..d3e7e673114fb 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2004,7 +2004,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
 	mask = cfg->phy_status;
-	ret = readl_poll_timeout(status, val, !(val & mask), 10,
+	ret = readl_poll_timeout(status, val, !(val & mask), 200,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 1a3ae97c2490b2217810a17ab4f47552e9f6f70f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:48 +0200
Subject: [PATCH 0600/4122] phy: qcom-qmp-pcie-msm8996: clean up ready and
 status polling

Clean up the PHY ready and status polling by dropping the configuration
masks which are no longer needed since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c  | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index 899be7bd4d92d..b9260c8746bf0 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -35,7 +35,6 @@
 #define PLL_READY_GATE_EN			BIT(3)
 /* QPHY_PCS_STATUS bit */
 #define PHYSTATUS				BIT(6)
-#define PHYSTATUS_4_20				BIT(7)
 /* QPHY_COM_PCS_READY_STATUS bit */
 #define PCS_READY				BIT(0)
 
@@ -200,9 +199,6 @@ struct qmp_phy_cfg {
 
 	unsigned int start_ctrl;
 	unsigned int pwrdn_ctrl;
-	unsigned int mask_com_pcs_ready;
-	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
-	unsigned int phy_status;
 };
 
 /**
@@ -318,8 +314,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
 
 	.start_ctrl		= PCS_START | PLL_READY_GATE_EN,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
-	.mask_com_pcs_ready	= PCS_READY,
-	.phy_status		= PHYSTATUS,
 };
 
 static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
@@ -356,7 +350,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 	void __iomem *status;
-	unsigned int mask, val;
+	unsigned int val;
 	int ret;
 
 	qmp_pcie_msm8996_configure(serdes, serdes_tbl, serdes_tbl_num);
@@ -366,9 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
 		     SERDES_START | PCS_START);
 
 	status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS];
-	mask = cfg->mask_com_pcs_ready;
-
-	ret = readl_poll_timeout(status, val, (val & mask), 10,
+	ret = readl_poll_timeout(status, val, (val & PCS_READY), 10,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev,
@@ -484,7 +476,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
-	unsigned int mask, val, ready;
+	unsigned int val;
 	int ret;
 
 	qmp_pcie_msm8996_serdes_init(qphy);
@@ -522,10 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
-	mask = cfg->phy_status;
-	ready = 0;
-
-	ret = readl_poll_timeout(status, val, (val & mask) == ready, 10,
+	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 3894f6d03c4e96665232fbe2e04589f1228cbb0c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:49 +0200
Subject: [PATCH 0601/4122] phy: qcom-qmp-pcie-msm8996: increase status polling
 period

It typically takes between one and two milliseconds for the PHY to
become ready after starting it. Increase the tight 3--10 us polling
period to the more reasonable 51--200 us.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012085002.24099-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index b9260c8746bf0..cd8fafe4c2956 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -360,7 +360,7 @@ static int qmp_pcie_msm8996_serdes_init(struct qmp_phy *qphy)
 		     SERDES_START | PCS_START);
 
 	status = serdes + cfg->regs[QPHY_COM_PCS_READY_STATUS];
-	ret = readl_poll_timeout(status, val, (val & PCS_READY), 10,
+	ret = readl_poll_timeout(status, val, (val & PCS_READY), 200,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev,
@@ -514,7 +514,7 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
-	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
+	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From f7075f4905e79e340b0e5f0f097c8ce896be8bb3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:50 +0200
Subject: [PATCH 0602/4122] phy: qcom-qmp-combo: clean up status polling

Clean up the PHY status polling by dropping the configuration mask which
is no longer needed since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 82055d3a3536b..e312cad6d9c21 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -853,8 +853,6 @@ struct qmp_phy_cfg {
 
 	unsigned int start_ctrl;
 	unsigned int pwrdn_ctrl;
-	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
-	unsigned int phy_status;
 
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
@@ -1023,7 +1021,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1092,7 +1089,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1127,7 +1123,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1197,7 +1192,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
@@ -1267,7 +1261,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -2017,7 +2010,7 @@ static int qmp_combo_power_on(struct phy *phy)
 	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
-	unsigned int mask, val, ready;
+	unsigned int val;
 	int ret;
 
 	qmp_combo_serdes_init(qphy);
@@ -2059,10 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy)
 		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 		status = pcs + cfg->regs[QPHY_PCS_STATUS];
-		mask = cfg->phy_status;
-		ready = 0;
-
-		ret = readl_poll_timeout(status, val, (val & mask) == ready, 10,
+		ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
 					 PHY_INIT_COMPLETE_TIMEOUT);
 		if (ret) {
 			dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 0c1154d69511a030b8ebb2b873095a7fa851e189 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:51 +0200
Subject: [PATCH 0603/4122] phy: qcom-qmp-combo: increase status polling period

It typically takes between one and two milliseconds for the PHY to
become ready after starting it. Increase the tight 3--10 us polling
period to the more reasonable 51--200 us.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012085002.24099-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index e312cad6d9c21..0071c73ac1c83 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2052,7 +2052,7 @@ static int qmp_combo_power_on(struct phy *phy)
 		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 		status = pcs + cfg->regs[QPHY_PCS_STATUS];
-		ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
+		ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
 					 PHY_INIT_COMPLETE_TIMEOUT);
 		if (ret) {
 			dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From c8f5c188156b87c115f27d44004428ede2e262f8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:52 +0200
Subject: [PATCH 0604/4122] phy: qcom-qmp-ufs: drop unused phy-status config

Drop the unused phy-status configuration mask which has never been used
for UFS PHYs.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 02931b82132f7..1c7d8fc9b94aa 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -31,8 +31,6 @@
 /* QPHY_START_CONTROL bits */
 #define SERDES_START				BIT(0)
 #define PCS_START				BIT(1)
-/* QPHY_PCS_STATUS bit */
-#define PHYSTATUS				BIT(6)
 /* QPHY_PCS_READY_STATUS bit */
 #define PCS_READY				BIT(0)
 
@@ -548,8 +546,6 @@ struct qmp_phy_cfg {
 
 	unsigned int start_ctrl;
 	unsigned int pwrdn_ctrl;
-	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
-	unsigned int phy_status;
 
 	/* true, if PCS block has no separate SW_RESET register */
 	bool no_pcs_sw_reset;
@@ -668,7 +664,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = {
 
 	.start_ctrl		= SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.no_pcs_sw_reset	= true,
 };
@@ -692,7 +687,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
 
 	.start_ctrl		= SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.no_pcs_sw_reset	= true,
 };
@@ -739,7 +733,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = {
 
 	.start_ctrl		= SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
@@ -761,7 +754,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
 
 	.start_ctrl		= SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
@@ -783,7 +775,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
 
 	.start_ctrl		= SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static void qmp_ufs_configure_lane(void __iomem *base,
-- 
GitLab


From 2f561b687cf47e289b7e068881ad87530c1f1435 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:53 +0200
Subject: [PATCH 0605/4122] phy: qcom-qmp-ufs: clean up ready polling

Clean up the PHY ready polling by dropping the mask variables which are
no longer needed since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 1c7d8fc9b94aa..8380904cf26c2 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -908,7 +908,7 @@ static int qmp_ufs_power_on(struct phy *phy)
 	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
-	unsigned int mask, val, ready;
+	unsigned int val;
 	int ret;
 
 	qmp_ufs_serdes_init(qphy);
@@ -937,10 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_READY_STATUS];
-	mask = PCS_READY;
-	ready = PCS_READY;
-
-	ret = readl_poll_timeout(status, val, (val & mask) == ready, 10,
+	ret = readl_poll_timeout(status, val, (val & PCS_READY), 10,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 7516edbfaf708e5b987f1b9f23aa7336dd4a812d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:54 +0200
Subject: [PATCH 0606/4122] phy: qcom-qmp-ufs: increase ready polling period

It typically takes between one and two milliseconds for the PHY to
become ready after starting it. Increase the tight 3--10 us polling
period to the more reasonable 51--200 us.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012085002.24099-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 8380904cf26c2..1a51f803928b7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -937,7 +937,7 @@ static int qmp_ufs_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_READY_STATUS];
-	ret = readl_poll_timeout(status, val, (val & PCS_READY), 10,
+	ret = readl_poll_timeout(status, val, (val & PCS_READY), 200,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From f5ef85adece529a6cd1e7563081c41038923a9ed Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:55 +0200
Subject: [PATCH 0607/4122] phy: qcom-qmp-usb: clean up status polling

Clean up the PHY status polling by dropping the configuration mask which
is no longer needed since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 751f628710eb5..840b67167581e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1445,8 +1445,6 @@ struct qmp_phy_cfg {
 
 	unsigned int start_ctrl;
 	unsigned int pwrdn_ctrl;
-	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
-	unsigned int phy_status;
 
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
@@ -1604,7 +1602,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
@@ -1628,7 +1625,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
@@ -1652,7 +1648,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
@@ -1679,7 +1674,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
@@ -1706,7 +1700,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
@@ -1730,7 +1723,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1756,7 +1748,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = {
 
 	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl             = SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
@@ -1783,7 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
@@ -1813,7 +1803,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1842,7 +1831,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
@@ -1872,7 +1860,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1901,7 +1888,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1930,7 +1916,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -1959,7 +1944,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
@@ -1989,7 +1973,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 
 	.has_pwrdn_delay	= true,
 };
@@ -2015,7 +1998,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
 
 	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN,
-	.phy_status		= PHYSTATUS,
 };
 
 static void qmp_usb_configure_lane(void __iomem *base,
@@ -2147,7 +2129,7 @@ static int qmp_usb_power_on(struct phy *phy)
 	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
-	unsigned int mask, val, ready;
+	unsigned int val;
 	int ret;
 
 	qmp_usb_serdes_init(qphy);
@@ -2181,10 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
-	mask = cfg->phy_status;
-	ready = 0;
-
-	ret = readl_poll_timeout(status, val, (val & mask) == ready, 10,
+	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From 7612890b9df8f3f4f9b4fd39d988a5afd97aa3e7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:56 +0200
Subject: [PATCH 0608/4122] phy: qcom-qmp-usb: increase status polling period

It typically takes between one and two milliseconds for the PHY to
become ready after starting it. Increase the tight 3--10 us polling
period to the more reasonable 51--200 us.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221012085002.24099-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 840b67167581e..0bd9291e6a7b5 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2163,7 +2163,7 @@ static int qmp_usb_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
-	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 10,
+	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
 				 PHY_INIT_COMPLETE_TIMEOUT);
 	if (ret) {
 		dev_err(qmp->dev, "phy initialization timed-out\n");
-- 
GitLab


From d4b81490fe44429203ae6e55df8a556e5b77c88e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:57 +0200
Subject: [PATCH 0609/4122] phy: qcom-qmp-combo: drop start and pwrdn-ctrl
 abstraction

All USB PHYs need to start and stop the SerDes and PCS so drop the
start-ctrl abstraction which is no longer needed since the QMP driver
split.

Similarly, drop the pwrdn-ctrl abstraction which also is not needed
since the split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-15-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 29 +++++------------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 0071c73ac1c83..2fab8d5ec0f1d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -851,9 +851,6 @@ struct qmp_phy_cfg {
 	/* array of registers with different offsets */
 	const unsigned int *regs;
 
-	unsigned int start_ctrl;
-	unsigned int pwrdn_ctrl;
-
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
 
@@ -1019,9 +1016,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1087,9 +1081,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1121,9 +1112,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1189,9 +1177,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
-
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
@@ -1259,9 +1244,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1944,8 +1926,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
-	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN);
 
 	mutex_unlock(&qmp->phy_mutex);
 
@@ -2049,7 +2030,8 @@ static int qmp_combo_power_on(struct phy *phy)
 		/* Pull PHY out of reset state */
 		qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 		/* start SerDes and Phy-Coding-Sublayer */
-		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL],
+				SERDES_START | PCS_START);
 
 		status = pcs + cfg->regs[QPHY_PCS_STATUS];
 		ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
@@ -2082,11 +2064,12 @@ static int qmp_combo_power_off(struct phy *phy)
 		qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 		/* stop SerDes and Phy-Coding-Sublayer */
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+				SERDES_START | PCS_START);
 
 		/* Put PHY into POWER DOWN state: active low */
 		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-				cfg->pwrdn_ctrl);
+				SW_PWRDN);
 	}
 
 	return 0;
-- 
GitLab


From 5806b87dea8fc1b65a542ef93cbe5f6114157a74 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:58 +0200
Subject: [PATCH 0610/4122] phy: qcom-qmp-pcie: drop start-ctrl abstraction

All PCIe PHYs need to start and stop the SerDes and PCS so drop the
start-ctrl abstraction which is no longer needed since the QMP driver
split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-16-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index d3e7e673114fb..5534a4ad02435 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1355,7 +1355,6 @@ struct qmp_phy_cfg {
 	/* array of registers with different offsets */
 	const unsigned int *regs;
 
-	unsigned int start_ctrl;
 	unsigned int pwrdn_ctrl;
 	/* bit offset of PHYSTATUS in QPHY_PCS_STATUS register */
 	unsigned int phy_status;
@@ -1491,7 +1490,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.num_vregs		= 0,
 	.regs			= pciephy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1517,7 +1515,6 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 	.num_vregs		= 0,
 	.regs			= ipq_pciephy_gen3_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 
@@ -1547,7 +1544,6 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 	.num_vregs		= 0,
 	.regs			= ipq_pciephy_gen3_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1575,7 +1571,6 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sdm845_qmp_pciephy_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1601,7 +1596,6 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sdm845_qhp_pciephy_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1639,7 +1633,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1677,7 +1670,6 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1703,7 +1695,6 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= pciephy_regs_layout,
 
-	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 
@@ -1733,7 +1724,6 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1761,7 +1751,6 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl		= PCS_START | SERDES_START,
 	.pwrdn_ctrl		= SW_PWRDN,
 	.phy_status		= PHYSTATUS_4_20,
 };
@@ -1789,7 +1778,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS,
 };
@@ -1832,7 +1820,6 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8250_pcie_regs_layout,
 
-	.start_ctrl             = SERDES_START | PCS_START,
 	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 	.phy_status		= PHYSTATUS_4_20,
 };
@@ -1997,7 +1984,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* start SerDes and Phy-Coding-Sublayer */
-	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START);
 
 	if (!cfg->skip_start_delay)
 		usleep_range(1000, 1200);
@@ -2030,7 +2017,8 @@ static int qmp_pcie_power_off(struct phy *phy)
 	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-- 
GitLab


From 73ad6a9dd51799afd104edc2bf2016a347a717fe Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:49:59 +0200
Subject: [PATCH 0611/4122] phy: qcom-qmp-pcie: add config sanity checks

The driver expects every configuration to set the pwrdn_ctrl and
phy_status masks. Add some probe WARN_ON_ONCE() to probe to catch any
new driver support that fails to provide them.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-17-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 5534a4ad02435..7c81667dd9680 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2347,6 +2347,9 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (!cfg)
 		return -EINVAL;
 
+	WARN_ON_ONCE(!cfg->pwrdn_ctrl);
+	WARN_ON_ONCE(!cfg->phy_status);
+
 	serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
-- 
GitLab


From 3d3db6f024e70255899a32323e20561c8c6f5850 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:50:00 +0200
Subject: [PATCH 0612/4122] phy: qcom-qmp-pcie-msm8996: drop start and
 pwrdn-ctrl abstraction

Drop the start and pwrdn-ctrl abstractions which are no longer needed
since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-18-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c    | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index cd8fafe4c2956..ff198d846fd2d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -196,9 +196,6 @@ struct qmp_phy_cfg {
 
 	/* array of registers with different offsets */
 	const unsigned int *regs;
-
-	unsigned int start_ctrl;
-	unsigned int pwrdn_ctrl;
 };
 
 /**
@@ -311,9 +308,6 @@ static const struct qmp_phy_cfg msm8996_pciephy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= pciephy_regs_layout,
-
-	.start_ctrl		= PCS_START | PLL_READY_GATE_EN,
-	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
 };
 
 static void qmp_pcie_msm8996_configure_lane(void __iomem *base,
@@ -503,7 +497,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	 * Pull out PHY from POWER DOWN state.
 	 * This is active low enable signal to power-down PHY.
 	 */
-	qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL, cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
+			SW_PWRDN | REFCLK_DRV_DSBL);
 
 	usleep_range(POWER_DOWN_DELAY_US_MIN, POWER_DOWN_DELAY_US_MAX);
 
@@ -511,7 +506,8 @@ static int qmp_pcie_msm8996_power_on(struct phy *phy)
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* start SerDes and Phy-Coding-Sublayer */
-	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL],
+			PCS_START | PLL_READY_GATE_EN);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
 	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
@@ -542,11 +538,12 @@ static int qmp_pcie_msm8996_power_off(struct phy *phy)
 	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
 	qphy_clrbits(qphy->pcs, QPHY_V2_PCS_POWER_DOWN_CONTROL,
-			cfg->pwrdn_ctrl);
+			SW_PWRDN | REFCLK_DRV_DSBL);
 
 	return 0;
 }
-- 
GitLab


From cb4a982fa94a106c3e5d7d9f596375ae442a71ba Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:50:01 +0200
Subject: [PATCH 0613/4122] phy: qcom-qmp-ufs: drop start and pwrdn-ctrl
 abstraction

Drop the start and pwrdn-ctrl abstractions which are no longer needed
since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-19-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 35 +++++--------------------
 1 file changed, 7 insertions(+), 28 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 1a51f803928b7..9b58d742af3b7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -544,9 +544,6 @@ struct qmp_phy_cfg {
 	/* array of registers with different offsets */
 	const unsigned int *regs;
 
-	unsigned int start_ctrl;
-	unsigned int pwrdn_ctrl;
-
 	/* true, if PCS block has no separate SW_RESET register */
 	bool no_pcs_sw_reset;
 };
@@ -662,9 +659,6 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = {
 
 	.regs			= msm8996_ufsphy_regs_layout,
 
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.no_pcs_sw_reset	= true,
 };
 
@@ -685,9 +679,6 @@ static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sdm845_ufsphy_regs_layout,
 
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.no_pcs_sw_reset	= true,
 };
 
@@ -708,9 +699,6 @@ static const struct qmp_phy_cfg sm6115_ufsphy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm6115_ufsphy_regs_layout,
 
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.no_pcs_sw_reset	= true,
 };
 
@@ -730,9 +718,6 @@ static const struct qmp_phy_cfg sm8150_ufsphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8150_ufsphy_regs_layout,
-
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
@@ -751,9 +736,6 @@ static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8150_ufsphy_regs_layout,
-
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
@@ -772,9 +754,6 @@ static const struct qmp_phy_cfg sm8450_ufsphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= sm8150_ufsphy_regs_layout,
-
-	.start_ctrl		= SERDES_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static void qmp_ufs_configure_lane(void __iomem *base,
@@ -832,8 +811,7 @@ static int qmp_ufs_com_init(struct qmp_phy *qphy)
 	if (ret)
 		goto err_disable_regulators;
 
-	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN);
 
 	return 0;
 
@@ -933,8 +911,9 @@ static int qmp_ufs_power_on(struct phy *phy)
 	/* Pull PHY out of reset state */
 	if (!cfg->no_pcs_sw_reset)
 		qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
-	/* start SerDes and Phy-Coding-Sublayer */
-	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+
+	/* start SerDes */
+	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START);
 
 	status = pcs + cfg->regs[QPHY_PCS_READY_STATUS];
 	ret = readl_poll_timeout(status, val, (val & PCS_READY), 200,
@@ -956,12 +935,12 @@ static int qmp_ufs_power_off(struct phy *phy)
 	if (!cfg->no_pcs_sw_reset)
 		qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
-	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	/* stop SerDes */
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START);
 
 	/* Put PHY into POWER DOWN state: active low */
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
+			SW_PWRDN);
 
 	return 0;
 }
-- 
GitLab


From 47b009db545ae90f0b50149029a6b8137685f524 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 12 Oct 2022 10:50:02 +0200
Subject: [PATCH 0614/4122] phy: qcom-qmp-usb: drop start and pwrdn-ctrl
 abstraction

Drop the start and pwrdn-ctrl abstractions which are no longer needed
since the QMP driver split.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221012085002.24099-20-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 61 ++-----------------------
 1 file changed, 5 insertions(+), 56 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 0bd9291e6a7b5..d0c433197080f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1443,9 +1443,6 @@ struct qmp_phy_cfg {
 	/* array of registers with different offsets */
 	const unsigned int *regs;
 
-	unsigned int start_ctrl;
-	unsigned int pwrdn_ctrl;
-
 	/* true, if PHY needs delay after POWER_DOWN */
 	bool has_pwrdn_delay;
 
@@ -1599,9 +1596,6 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= usb3phy_regs_layout,
-
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
@@ -1622,9 +1616,6 @@ static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= usb3phy_regs_layout,
-
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
@@ -1646,9 +1637,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3phy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
 };
@@ -1672,9 +1660,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
 };
@@ -1697,9 +1682,6 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
-
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
@@ -1721,9 +1703,6 @@ static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1745,9 +1724,6 @@ static const struct qmp_phy_cfg msm8998_usb3phy_cfg = {
 	.vreg_list              = qmp_phy_vreg_l,
 	.num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs                   = qmp_v3_usb3phy_regs_layout,
-
-	.start_ctrl             = SERDES_START | PCS_START,
-	.pwrdn_ctrl             = SW_PWRDN,
 };
 
 static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
@@ -1772,9 +1748,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
 };
@@ -1801,9 +1774,6 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x600,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1829,9 +1799,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
 };
@@ -1858,9 +1825,6 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x600,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1886,9 +1850,6 @@ static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x600,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1914,9 +1875,6 @@ static const struct qmp_phy_cfg sdx65_usb3_uniphy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x1000,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1942,9 +1900,6 @@ static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 	.has_phy_dp_com_ctrl	= true,
 };
@@ -1971,9 +1926,6 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x1000,
 
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
-
 	.has_pwrdn_delay	= true,
 };
 
@@ -1995,9 +1947,6 @@ static const struct qmp_phy_cfg qcm2290_usb3phy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qcm2290_usb3phy_regs_layout,
-
-	.start_ctrl		= SERDES_START | PCS_START,
-	.pwrdn_ctrl		= SW_PWRDN,
 };
 
 static void qmp_usb_configure_lane(void __iomem *base,
@@ -2092,8 +2041,7 @@ static int qmp_usb_init(struct phy *phy)
 		qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 	}
 
-	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN);
 
 	return 0;
 
@@ -2160,7 +2108,7 @@ static int qmp_usb_power_on(struct phy *phy)
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* start SerDes and Phy-Coding-Sublayer */
-	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START);
 
 	status = pcs + cfg->regs[QPHY_PCS_STATUS];
 	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
@@ -2189,11 +2137,12 @@ static int qmp_usb_power_off(struct phy *phy)
 	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], cfg->start_ctrl);
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
 	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-			cfg->pwrdn_ctrl);
+			SW_PWRDN);
 
 	return 0;
 }
-- 
GitLab


From 922adfd59efd337059f8445a8d8968552b06ed4e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Thu, 29 Sep 2022 22:00:17 +0300
Subject: [PATCH 0615/4122] phy: qcom-qmp-usb: correct registers layout for
 IPQ8074 USB3 PHY

According to the kernel 4.4 sources from NHSS.QSDK.9.0.2 and according
to hardware docs, the PHY registers layout used for IPQ8074 USB3 PHY is
incorrect. This platform uses offset 0x174 for the PCS_STATUS register,
0xd8 for PCS_AUTONOMOUS_MODE_CTRL, etc.

Correct the PHY registers layout.

Fixes: 94a407cc17a4 ("phy: qcom-qmp: create copies of QMP PHY driver")
Fixes: 507156f5a99f ("phy: qcom-qmp: Add USB QMP PHY support for IPQ8074")
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Kathiravan T<quic_kathirav@quicinc.com>
Link: https://lore.kernel.org/r/20220929190017.529207-1-dmitry.baryshkov@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index d0c433197080f..cd167508f5281 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1595,7 +1595,7 @@ static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= usb3phy_regs_layout,
+	.regs			= qmp_v3_usb3phy_regs_layout,
 };
 
 static const struct qmp_phy_cfg msm8996_usb3phy_cfg = {
-- 
GitLab


From d907774ed5aab5a33ef4106ea3830e673196313b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:32 +0200
Subject: [PATCH 0616/4122] phy: qcom-qmp-ufs: move device-id table

Move the device-id table below probe() and next to the driver structure
to keep the driver callback functions grouped together.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 78 ++++++++++++-------------
 1 file changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 9b58d742af3b7..acb8efa1d7584 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -1081,45 +1081,6 @@ static int qmp_ufs_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct of_device_id qmp_ufs_of_match_table[] = {
-	{
-		.compatible = "qcom,msm8996-qmp-ufs-phy",
-		.data = &msm8996_ufs_cfg,
-	}, {
-		.compatible = "qcom,msm8998-qmp-ufs-phy",
-		.data = &sdm845_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sc8180x-qmp-ufs-phy",
-		.data = &sm8150_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sc8280xp-qmp-ufs-phy",
-		.data = &sm8350_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sdm845-qmp-ufs-phy",
-		.data = &sdm845_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm6115-qmp-ufs-phy",
-		.data = &sm6115_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm6350-qmp-ufs-phy",
-		.data = &sdm845_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm8150-qmp-ufs-phy",
-		.data = &sm8150_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-ufs-phy",
-		.data = &sm8150_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm8350-qmp-ufs-phy",
-		.data = &sm8350_ufsphy_cfg,
-	}, {
-		.compatible = "qcom,sm8450-qmp-ufs-phy",
-		.data = &sm8450_ufsphy_cfg,
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table);
-
 static int qmp_ufs_probe(struct platform_device *pdev)
 {
 	struct qcom_qmp *qmp;
@@ -1185,6 +1146,45 @@ err_node_put:
 	return ret;
 }
 
+static const struct of_device_id qmp_ufs_of_match_table[] = {
+	{
+		.compatible = "qcom,msm8996-qmp-ufs-phy",
+		.data = &msm8996_ufs_cfg,
+	}, {
+		.compatible = "qcom,msm8998-qmp-ufs-phy",
+		.data = &sdm845_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sc8180x-qmp-ufs-phy",
+		.data = &sm8150_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sc8280xp-qmp-ufs-phy",
+		.data = &sm8350_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-ufs-phy",
+		.data = &sdm845_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm6115-qmp-ufs-phy",
+		.data = &sm6115_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm6350-qmp-ufs-phy",
+		.data = &sdm845_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm8150-qmp-ufs-phy",
+		.data = &sm8150_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-ufs-phy",
+		.data = &sm8150_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm8350-qmp-ufs-phy",
+		.data = &sm8350_ufsphy_cfg,
+	}, {
+		.compatible = "qcom,sm8450-qmp-ufs-phy",
+		.data = &sm8450_ufsphy_cfg,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, qmp_ufs_of_match_table);
+
 static struct platform_driver qmp_ufs_driver = {
 	.probe		= qmp_ufs_probe,
 	.driver = {
-- 
GitLab


From a36032db30deb2235bc18a8f1088c9a801bf66b0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:33 +0200
Subject: [PATCH 0617/4122] phy: qcom-qmp-ufs: merge driver data

The UFS QMP PHY driver only manages a single PHY so merge the old
qcom_qmp and qmp_phy structures and drop the PHY array.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 171 +++++++++---------------
 1 file changed, 63 insertions(+), 108 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index acb8efa1d7584..b4c3b3d97f523 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -548,54 +548,24 @@ struct qmp_phy_cfg {
 	bool no_pcs_sw_reset;
 };
 
-/**
- * struct qmp_phy - per-lane phy descriptor
- *
- * @phy: generic phy
- * @cfg: phy specific configuration
- * @serdes: iomapped memory space for phy's serdes (i.e. PLL)
- * @tx: iomapped memory space for lane's tx
- * @rx: iomapped memory space for lane's rx
- * @pcs: iomapped memory space for lane's pcs
- * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs)
- * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs)
- * @pcs_misc: iomapped memory space for lane's pcs_misc
- * @qmp: QMP phy to which this lane belongs
- */
-struct qmp_phy {
-	struct phy *phy;
+struct qmp_ufs {
+	struct device *dev;
+
 	const struct qmp_phy_cfg *cfg;
+
 	void __iomem *serdes;
+	void __iomem *pcs;
+	void __iomem *pcs_misc;
 	void __iomem *tx;
 	void __iomem *rx;
-	void __iomem *pcs;
 	void __iomem *tx2;
 	void __iomem *rx2;
-	void __iomem *pcs_misc;
-	struct qcom_qmp *qmp;
-};
-
-/**
- * struct qcom_qmp - structure holding QMP phy block attributes
- *
- * @dev: device
- *
- * @clks: array of clocks required by phy
- * @resets: array of resets required by phy
- * @vregs: regulator supplies bulk data
- *
- * @phys: array of per-lane phy descriptors
- * @ufs_reset: optional UFS PHY reset handle
- */
-struct qcom_qmp {
-	struct device *dev;
 
 	struct clk_bulk_data *clks;
 	struct regulator_bulk_data *vregs;
-
-	struct qmp_phy **phys;
-
 	struct reset_control *ufs_reset;
+
+	struct phy *phy;
 };
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
@@ -782,10 +752,10 @@ static void qmp_ufs_configure(void __iomem *base,
 	qmp_ufs_configure_lane(base, tbl, num, 0xff);
 }
 
-static int qmp_ufs_serdes_init(struct qmp_phy *qphy)
+static int qmp_ufs_serdes_init(struct qmp_ufs *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *serdes = qphy->serdes;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *serdes = qmp->serdes;
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 
@@ -794,11 +764,10 @@ static int qmp_ufs_serdes_init(struct qmp_phy *qphy)
 	return 0;
 }
 
-static int qmp_ufs_com_init(struct qmp_phy *qphy)
+static int qmp_ufs_com_init(struct qmp_ufs *qmp)
 {
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs = qphy->pcs;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs = qmp->pcs;
 	int ret;
 
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
@@ -821,10 +790,9 @@ err_disable_regulators:
 	return ret;
 }
 
-static int qmp_ufs_com_exit(struct qmp_phy *qphy)
+static int qmp_ufs_com_exit(struct qmp_ufs *qmp)
 {
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	reset_control_assert(qmp->ufs_reset);
 
@@ -837,9 +805,8 @@ static int qmp_ufs_com_exit(struct qmp_phy *qphy)
 
 static int qmp_ufs_init(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_ufs *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	int ret;
 	dev_vdbg(qmp->dev, "Initializing QMP phy\n");
 
@@ -870,7 +837,7 @@ static int qmp_ufs_init(struct phy *phy)
 			return ret;
 	}
 
-	ret = qmp_ufs_com_init(qphy);
+	ret = qmp_ufs_com_init(qmp);
 	if (ret)
 		return ret;
 
@@ -879,28 +846,27 @@ static int qmp_ufs_init(struct phy *phy)
 
 static int qmp_ufs_power_on(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->tx;
-	void __iomem *rx = qphy->rx;
-	void __iomem *pcs = qphy->pcs;
+	struct qmp_ufs *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *tx = qmp->tx;
+	void __iomem *rx = qmp->rx;
+	void __iomem *pcs = qmp->pcs;
 	void __iomem *status;
 	unsigned int val;
 	int ret;
 
-	qmp_ufs_serdes_init(qphy);
+	qmp_ufs_serdes_init(qmp);
 
 	/* Tx, Rx, and PCS configurations */
 	qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_ufs_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+		qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
 	qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_ufs_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+		qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
 	qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
@@ -928,18 +894,18 @@ static int qmp_ufs_power_on(struct phy *phy)
 
 static int qmp_ufs_power_off(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_ufs *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	/* PHY reset */
 	if (!cfg->no_pcs_sw_reset)
-		qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+		qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START);
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL], SERDES_START);
 
 	/* Put PHY into POWER DOWN state: active low */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
 
 	return 0;
@@ -947,9 +913,9 @@ static int qmp_ufs_power_off(struct phy *phy)
 
 static int qmp_ufs_exit(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_ufs *qmp = phy_get_drvdata(phy);
 
-	qmp_ufs_com_exit(qphy);
+	qmp_ufs_com_exit(qmp);
 
 	return 0;
 }
@@ -981,7 +947,7 @@ static int qmp_ufs_disable(struct phy *phy)
 
 static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_ufs *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_vregs;
 	int i;
 
@@ -997,7 +963,7 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 
 static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_ufs *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_clks;
 	int i;
 
@@ -1017,78 +983,71 @@ static const struct phy_ops qcom_qmp_ufs_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int qmp_ufs_create(struct device *dev, struct device_node *np, int id,
+static int qmp_ufs_create(struct device *dev, struct device_node *np,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_ufs *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
-	struct qmp_phy *qphy;
 	int ret;
 
-	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
-	if (!qphy)
-		return -ENOMEM;
-
-	qphy->cfg = cfg;
-	qphy->serdes = serdes;
+	qmp->cfg = cfg;
+	qmp->serdes = serdes;
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
 	 */
-	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	qmp->tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qmp->tx))
+		return PTR_ERR(qmp->tx);
 
-	qphy->rx = devm_of_iomap(dev, np, 1, NULL);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+	qmp->rx = devm_of_iomap(dev, np, 1, NULL);
+	if (IS_ERR(qmp->rx))
+		return PTR_ERR(qmp->rx);
 
-	qphy->pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	qmp->pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qmp->pcs))
+		return PTR_ERR(qmp->pcs);
 
 	if (cfg->lanes >= 2) {
-		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->tx2))
-			return PTR_ERR(qphy->tx2);
+		qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+		if (IS_ERR(qmp->tx2))
+			return PTR_ERR(qmp->tx2);
 
-		qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
-		if (IS_ERR(qphy->rx2))
-			return PTR_ERR(qphy->rx2);
+		qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+		if (IS_ERR(qmp->rx2))
+			return PTR_ERR(qmp->rx2);
 
-		qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
 	} else {
-		qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
 	}
 
-	if (IS_ERR(qphy->pcs_misc))
+	if (IS_ERR(qmp->pcs_misc))
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
 
 	generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create qphy %d\n", ret);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
 		return ret;
 	}
 
-	qphy->phy = generic_phy;
-	qphy->qmp = qmp;
-	qmp->phys[id] = qphy;
-	phy_set_drvdata(generic_phy, qphy);
+	qmp->phy = generic_phy;
+	phy_set_drvdata(generic_phy, qmp);
 
 	return 0;
 }
 
 static int qmp_ufs_probe(struct platform_device *pdev)
 {
-	struct qcom_qmp *qmp;
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
+	struct qmp_ufs *qmp;
 	int num, id;
 	int ret;
 
@@ -1120,14 +1079,10 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (num > 1)
 		return -EINVAL;
 
-	qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL);
-	if (!qmp->phys)
-		return -ENOMEM;
-
 	id = 0;
 	for_each_available_child_of_node(dev->of_node, child) {
 		/* Create per-lane phy */
-		ret = qmp_ufs_create(dev, child, id, serdes, cfg);
+		ret = qmp_ufs_create(dev, child, serdes, cfg);
 		if (ret) {
 			dev_err(dev, "failed to create lane%d phy, %d\n",
 				id, ret);
-- 
GitLab


From cb2c3d2ee46fe56144b74a22504e023aa59835aa Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:34 +0200
Subject: [PATCH 0618/4122] phy: qcom-qmp-ufs: clean up device-tree parsing

Since the QMP driver split there will be at most a single child node so
drop the obsolete iteration construct.

While at it, drop the verbose error logging that would have been
printed also on probe deferrals.

Note that there's no need to check if there are additional child nodes
(the kernel is not a devicetree validator), but let's return an error if
there are no child nodes at all for now.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index b4c3b3d97f523..25744b3576f3e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -1048,7 +1048,6 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_ufs *qmp;
-	int num, id;
 	int ret;
 
 	qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
@@ -1074,23 +1073,15 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	num = of_get_available_child_count(dev->of_node);
-	/* do we have a rogue child node ? */
-	if (num > 1)
+	child = of_get_next_available_child(dev->of_node, NULL);
+	if (!child)
 		return -EINVAL;
 
-	id = 0;
-	for_each_available_child_of_node(dev->of_node, child) {
-		/* Create per-lane phy */
-		ret = qmp_ufs_create(dev, child, serdes, cfg);
-		if (ret) {
-			dev_err(dev, "failed to create lane%d phy, %d\n",
-				id, ret);
-			goto err_node_put;
-		}
+	ret = qmp_ufs_create(dev, child, serdes, cfg);
+	if (ret)
+		goto err_node_put;
 
-		id++;
-	}
+	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
-- 
GitLab


From 018dfc99aef2f487f95e07fcbd600e02d290ba18 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:35 +0200
Subject: [PATCH 0619/4122] phy: qcom-qmp-ufs: clean up probe initialisation

Stop abusing the driver data pointer and instead pass the driver state
structure directly to the initialisation helpers during probe.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 37 ++++++++++++-------------
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 25744b3576f3e..057f9a3fdd6d1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -945,9 +945,10 @@ static int qmp_ufs_disable(struct phy *phy)
 	return qmp_ufs_exit(phy);
 }
 
-static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_ufs_vreg_init(struct qmp_ufs *qmp)
 {
-	struct qmp_ufs *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_vregs;
 	int i;
 
@@ -961,9 +962,10 @@ static int qmp_ufs_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 	return devm_regulator_bulk_get(dev, num, qmp->vregs);
 }
 
-static int qmp_ufs_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_ufs_clk_init(struct qmp_ufs *qmp)
 {
-	struct qmp_ufs *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_clks;
 	int i;
 
@@ -983,15 +985,13 @@ static const struct phy_ops qcom_qmp_ufs_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int qmp_ufs_create(struct device *dev, struct device_node *np,
-			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np)
 {
-	struct qmp_ufs *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	struct phy *generic_phy;
 	int ret;
 
-	qmp->cfg = cfg;
-	qmp->serdes = serdes;
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
@@ -1045,8 +1045,6 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
-	void __iomem *serdes;
-	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_ufs *qmp;
 	int ret;
 
@@ -1055,21 +1053,20 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	qmp->dev = dev;
-	dev_set_drvdata(dev, qmp);
 
-	cfg = of_device_get_match_data(dev);
-	if (!cfg)
+	qmp->cfg = of_device_get_match_data(dev);
+	if (!qmp->cfg)
 		return -EINVAL;
 
-	serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(serdes))
-		return PTR_ERR(serdes);
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
-	ret = qmp_ufs_clk_init(dev, cfg);
+	ret = qmp_ufs_clk_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_ufs_vreg_init(dev, cfg);
+	ret = qmp_ufs_vreg_init(qmp);
 	if (ret)
 		return ret;
 
@@ -1077,7 +1074,7 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (!child)
 		return -EINVAL;
 
-	ret = qmp_ufs_create(dev, child, serdes, cfg);
+	ret = qmp_ufs_create(qmp, child);
 	if (ret)
 		goto err_node_put;
 
-- 
GitLab


From b98e44e608bcb4a2c235b50cc48144e7043595b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:36 +0200
Subject: [PATCH 0620/4122] phy: qcom-qmp-ufs: rename PHY ops structure

Rename the PHY operation structure so that it has a "phy_ops" suffix and
move it next to the implementation.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 057f9a3fdd6d1..90583b87efd1c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -945,6 +945,12 @@ static int qmp_ufs_disable(struct phy *phy)
 	return qmp_ufs_exit(phy);
 }
 
+static const struct phy_ops qcom_qmp_ufs_phy_ops = {
+	.power_on	= qmp_ufs_enable,
+	.power_off	= qmp_ufs_disable,
+	.owner		= THIS_MODULE,
+};
+
 static int qmp_ufs_vreg_init(struct qmp_ufs *qmp)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -979,12 +985,6 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp)
 	return devm_clk_bulk_get(dev, num, qmp->clks);
 }
 
-static const struct phy_ops qcom_qmp_ufs_ops = {
-	.power_on	= qmp_ufs_enable,
-	.power_off	= qmp_ufs_disable,
-	.owner		= THIS_MODULE,
-};
-
 static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -1027,7 +1027,7 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np)
 	if (IS_ERR(qmp->pcs_misc))
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
 
-	generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_ops);
+	generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
-- 
GitLab


From e0a0c761d2203955585b2c93126d6a712726c368 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:37 +0200
Subject: [PATCH 0621/4122] phy: qcom-qmp-ufs: clean up PHY init

Clean up the PHY initialisation somewhat programming both tx and rx for
the second lane after the first lane.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 90583b87efd1c..fa703e856eb78 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -859,14 +859,12 @@ static int qmp_ufs_power_on(struct phy *phy)
 
 	/* Tx, Rx, and PCS configurations */
 	qmp_ufs_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
-
 	qmp_ufs_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-	if (cfg->lanes >= 2)
+	if (cfg->lanes >= 2) {
+		qmp_ufs_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 		qmp_ufs_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+	}
 
 	qmp_ufs_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
-- 
GitLab


From 54293c08f2c01efff4a8c1c61290e5f8e34df2df Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:38 +0200
Subject: [PATCH 0622/4122] dt-bindings: phy: qcom,qmp-ufs: rename current
 bindings

The current QMP UFS PHY bindings are based on the original MSM8996 PCIe
PHY binding which provided multiple PHYs per IP block and these in turn
were described by child nodes.

The QMP UFS PHY block only provide a single PHY and the remnant child
node does not really reflect the hardware.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers.

In preparation for adding new bindings for SC8280XP which further
bindings can be based on, rename the current bindings after MSM8996 and
add a reference to the SC8280XP bindings.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 ...qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename Documentation/devicetree/bindings/phy/{qcom,qmp-ufs-phy.yaml => qcom,msm8996-qmp-ufs-phy.yaml} (95%)

diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
similarity index 95%
rename from Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml
rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
index 815c375d0f7bc..438f9606414ac 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-ufs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/phy/qcom,qmp-ufs-phy.yaml#
+$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-ufs-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm QMP PHY controller (UFS)
+title: Qualcomm QMP PHY controller (UFS, MSM8996)
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
@@ -13,6 +13,9 @@ description:
   QMP PHY controller supports physical layer functionality for a number of
   controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
 
+  Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see
+  qcom,sc8280xp-qmp-ufs-phy.yaml.
+
 properties:
   compatible:
     enum:
-- 
GitLab


From 7741f31ae44568f9c32046aaf4c6c41a51359f6d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:39 +0200
Subject: [PATCH 0623/4122] dt-bindings: phy: qcom,qmp-ufs: fix sc8280xp
 binding

The current QMP UFS PHY bindings are based on the original MSM8996 PCIe
PHY binding which provided multiple PHYs per IP block and these in turn
were described by child nodes.

The QMP UFS PHY block only provide a single PHY and the remnant child
node does not really reflect the hardware.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers.

Add a new binding for the UFS QMP PHYs found on SC8280XP which further
bindings can be based on.

Note that the current binding is simply removed instead of being
deprecated as it was only recently merged and support for SC8280XP is
still under development.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qcom,msm8996-qmp-ufs-phy.yaml         | 10 +--
 .../phy/qcom,sc8280xp-qmp-ufs-phy.yaml        | 83 +++++++++++++++++++
 2 files changed, 87 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml

diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
index 438f9606414ac..be41acbd3b6c3 100644
--- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-ufs-phy.yaml
@@ -22,7 +22,6 @@ properties:
       - qcom,msm8996-qmp-ufs-phy
       - qcom,msm8998-qmp-ufs-phy
       - qcom,sc8180x-qmp-ufs-phy
-      - qcom,sc8280xp-qmp-ufs-phy
       - qcom,sdm845-qmp-ufs-phy
       - qcom,sm6115-qmp-ufs-phy
       - qcom,sm6350-qmp-ufs-phy
@@ -122,7 +121,6 @@ allOf:
             enum:
               - qcom,msm8998-qmp-ufs-phy
               - qcom,sc8180x-qmp-ufs-phy
-              - qcom,sc8280xp-qmp-ufs-phy
               - qcom,sdm845-qmp-ufs-phy
               - qcom,sm6115-qmp-ufs-phy
               - qcom,sm6350-qmp-ufs-phy
@@ -159,7 +157,6 @@ allOf:
           contains:
             enum:
               - qcom,msm8998-qmp-ufs-phy
-              - qcom,sc8280xp-qmp-ufs-phy
               - qcom,sdm845-qmp-ufs-phy
               - qcom,sm6350-qmp-ufs-phy
               - qcom,sm8150-qmp-ufs-phy
@@ -214,11 +211,12 @@ allOf:
 
 examples:
   - |
-    #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+    #include <dt-bindings/clock/qcom,gcc-sm8250.h>
     #include <dt-bindings/clock/qcom,rpmh.h>
+
     phy-wrapper@1d87000 {
-        compatible = "qcom,sc8280xp-qmp-ufs-phy";
-        reg = <0x01d87000 0xe10>;
+        compatible = "qcom,sm8250-qmp-ufs-phy";
+        reg = <0x01d87000 0x1c0>;
         #address-cells = <1>;
         #size-cells = <1>;
         ranges = <0x0 0x01d87000 0x1000>;
diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml
new file mode 100644
index 0000000000000..dde86a19f7926
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-ufs-phy.yaml
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-ufs-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QMP PHY controller (UFS, SC8280XP)
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description:
+  The QMP PHY controller supports physical layer functionality for a number of
+  controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc8280xp-qmp-ufs-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 2
+
+  clock-names:
+    items:
+      - const: ref
+      - const: ref_aux
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: ufsphy
+
+  vdda-phy-supply: true
+
+  vdda-pll-supply: true
+
+  "#phy-cells":
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - reset-names
+  - vdda-phy-supply
+  - vdda-pll-supply
+  - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+
+    ufs_mem_phy: phy@1d87000 {
+        compatible = "qcom,sc8280xp-qmp-ufs-phy";
+        reg = <0x01d87000 0x1000>;
+
+        clocks = <&gcc GCC_UFS_REF_CLKREF_CLK>, <&gcc GCC_UFS_PHY_PHY_AUX_CLK>;
+        clock-names = "ref", "ref_aux";
+
+        power-domains = <&gcc UFS_PHY_GDSC>;
+
+        resets = <&ufs_mem_hc 0>;
+        reset-names = "ufsphy";
+
+        vdda-phy-supply = <&vreg_l6b>;
+        vdda-pll-supply = <&vreg_l3b>;
+
+        #phy-cells = <0>;
+    };
-- 
GitLab


From c64d39b403d8dc751ea6a56f97962f93f811fed4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:40 +0200
Subject: [PATCH 0624/4122] phy: qcom-qmp-ufs: restructure PHY creation

In preparation for supporting devicetree bindings which do not use a
child node, move the PHY creation to probe() proper and parse the serdes
resource in what is now the legacy devicetree helper.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 34 ++++++++++++-------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index fa703e856eb78..bf5c1a6b9ca4f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -983,12 +983,15 @@ static int qmp_ufs_clk_init(struct qmp_ufs *qmp)
 	return devm_clk_bulk_get(dev, num, qmp->clks);
 }
 
-static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np)
+static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np)
 {
+	struct platform_device *pdev = to_platform_device(qmp->dev);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	struct device *dev = qmp->dev;
-	struct phy *generic_phy;
-	int ret;
+
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
 	/*
 	 * Get memory resources for the PHY:
@@ -1025,16 +1028,6 @@ static int qmp_ufs_create(struct qmp_ufs *qmp, struct device_node *np)
 	if (IS_ERR(qmp->pcs_misc))
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
 
-	generic_phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops);
-	if (IS_ERR(generic_phy)) {
-		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create PHY: %d\n", ret);
-		return ret;
-	}
-
-	qmp->phy = generic_phy;
-	phy_set_drvdata(generic_phy, qmp);
-
 	return 0;
 }
 
@@ -1056,10 +1049,6 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (!qmp->cfg)
 		return -EINVAL;
 
-	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(qmp->serdes))
-		return PTR_ERR(qmp->serdes);
-
 	ret = qmp_ufs_clk_init(qmp);
 	if (ret)
 		return ret;
@@ -1072,10 +1061,19 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (!child)
 		return -EINVAL;
 
-	ret = qmp_ufs_create(qmp, child);
+	ret = qmp_ufs_parse_dt_legacy(qmp, child);
 	if (ret)
 		goto err_node_put;
 
+	qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops);
+	if (IS_ERR(qmp->phy)) {
+		ret = PTR_ERR(qmp->phy);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
+		goto err_node_put;
+	}
+
+	phy_set_drvdata(qmp->phy, qmp);
+
 	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-- 
GitLab


From 0e089bb8b31f7651d364723122af7ba7be7b98a9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 24 Oct 2022 11:00:41 +0200
Subject: [PATCH 0625/4122] phy: qcom-qmp-ufs: add support for updated sc8280xp
 binding

Add support for the new SC8280XP binding.

Note that the binding does not try to describe every register subregion
and instead the driver holds the corresponding offsets.

Also note that the driver will continue to accept the old binding, at
least for the time being.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221024090041.19574-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c | 90 ++++++++++++++++++++++---
 1 file changed, 80 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index bf5c1a6b9ca4f..189103d1bd180 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -520,10 +520,21 @@ static const struct qmp_phy_init_tbl sm8350_ufsphy_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1, 0x02),
 };
 
+struct qmp_ufs_offsets {
+	u16 serdes;
+	u16 pcs;
+	u16 tx;
+	u16 rx;
+	u16 tx2;
+	u16 rx2;
+};
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
 	int lanes;
 
+	const struct qmp_ufs_offsets *offsets;
+
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_init_tbl *serdes_tbl;
 	int serdes_tbl_num;
@@ -611,6 +622,15 @@ static const char * const qmp_phy_vreg_l[] = {
 	"vdda-phy", "vdda-pll",
 };
 
+static const struct qmp_ufs_offsets qmp_ufs_offsets_v5 = {
+	.serdes		= 0,
+	.pcs		= 0xc00,
+	.tx		= 0x400,
+	.rx		= 0x600,
+	.tx2		= 0x800,
+	.rx2		= 0xa00,
+};
+
 static const struct qmp_phy_cfg msm8996_ufs_cfg = {
 	.lanes			= 1,
 
@@ -632,6 +652,26 @@ static const struct qmp_phy_cfg msm8996_ufs_cfg = {
 	.no_pcs_sw_reset	= true,
 };
 
+static const struct qmp_phy_cfg sc8280xp_ufsphy_cfg = {
+	.lanes			= 2,
+
+	.offsets		= &qmp_ufs_offsets_v5,
+
+	.serdes_tbl		= sm8350_ufsphy_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_serdes_tbl),
+	.tx_tbl			= sm8350_ufsphy_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_tx_tbl),
+	.rx_tbl			= sm8350_ufsphy_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_rx_tbl),
+	.pcs_tbl		= sm8350_ufsphy_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sm8350_ufsphy_pcs_tbl),
+	.clk_list		= sdm845_ufs_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8150_ufsphy_regs_layout,
+};
+
 static const struct qmp_phy_cfg sdm845_ufsphy_cfg = {
 	.lanes			= 2,
 
@@ -1031,11 +1071,38 @@ static int qmp_ufs_parse_dt_legacy(struct qmp_ufs *qmp, struct device_node *np)
 	return 0;
 }
 
+static int qmp_ufs_parse_dt(struct qmp_ufs *qmp)
+{
+	struct platform_device *pdev = to_platform_device(qmp->dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	const struct qmp_ufs_offsets *offs = cfg->offsets;
+	void __iomem *base;
+
+	if (!offs)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	qmp->serdes = base + offs->serdes;
+	qmp->pcs = base + offs->pcs;
+	qmp->tx = base + offs->tx;
+	qmp->rx = base + offs->rx;
+
+	if (cfg->lanes >= 2) {
+		qmp->tx2 = base + offs->tx2;
+		qmp->rx2 = base + offs->rx2;
+	}
+
+	return 0;
+}
+
 static int qmp_ufs_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *child;
 	struct phy_provider *phy_provider;
+	struct device_node *np;
 	struct qmp_ufs *qmp;
 	int ret;
 
@@ -1057,15 +1124,18 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	child = of_get_next_available_child(dev->of_node, NULL);
-	if (!child)
-		return -EINVAL;
-
-	ret = qmp_ufs_parse_dt_legacy(qmp, child);
+	/* Check for legacy binding with child node. */
+	np = of_get_next_available_child(dev->of_node, NULL);
+	if (np) {
+		ret = qmp_ufs_parse_dt_legacy(qmp, np);
+	} else {
+		np = of_node_get(dev->of_node);
+		ret = qmp_ufs_parse_dt(qmp);
+	}
 	if (ret)
 		goto err_node_put;
 
-	qmp->phy = devm_phy_create(dev, child, &qcom_qmp_ufs_phy_ops);
+	qmp->phy = devm_phy_create(dev, np, &qcom_qmp_ufs_phy_ops);
 	if (IS_ERR(qmp->phy)) {
 		ret = PTR_ERR(qmp->phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
@@ -1074,14 +1144,14 @@ static int qmp_ufs_probe(struct platform_device *pdev)
 
 	phy_set_drvdata(qmp->phy, qmp);
 
-	of_node_put(child);
+	of_node_put(np);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
 	return PTR_ERR_OR_ZERO(phy_provider);
 
 err_node_put:
-	of_node_put(child);
+	of_node_put(np);
 	return ret;
 }
 
@@ -1097,7 +1167,7 @@ static const struct of_device_id qmp_ufs_of_match_table[] = {
 		.data = &sm8150_ufsphy_cfg,
 	}, {
 		.compatible = "qcom,sc8280xp-qmp-ufs-phy",
-		.data = &sm8350_ufsphy_cfg,
+		.data = &sc8280xp_ufsphy_cfg,
 	}, {
 		.compatible = "qcom,sdm845-qmp-ufs-phy",
 		.data = &sdm845_ufsphy_cfg,
-- 
GitLab


From 2e5632aef677cc2e22cbc76704526b51e29b3a7b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 5 Oct 2022 17:13:48 +0100
Subject: [PATCH 0626/4122] power: supply: lp8788: make const array name static

Don't populate the read-only array name on the stack but instead make
it static. Since the data and the pointers don't change also add in
a missing const. Also makes the object code a little smaller.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/lp8788-charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/lp8788-charger.c b/drivers/power/supply/lp8788-charger.c
index 56c57529c2287..f5f47a0aa1e3d 100644
--- a/drivers/power/supply/lp8788-charger.c
+++ b/drivers/power/supply/lp8788-charger.c
@@ -520,7 +520,7 @@ err_free_irq:
 static int lp8788_irq_register(struct platform_device *pdev,
 				struct lp8788_charger *pchg)
 {
-	const char *name[] = {
+	static const char * const name[] = {
 		LP8788_CHG_IRQ, LP8788_PRSW_IRQ, LP8788_BATT_IRQ
 	};
 	int i;
-- 
GitLab


From 3f5b93f4077b37d576b6cdbcb636f443aa09eb03 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 23 Sep 2022 22:36:03 +0200
Subject: [PATCH 0627/4122] dt-bindings: power: reset: restart-handler: add
 common schema

Add common schema for restart and shutdown handlers, so they all use
same meaning of "priority" field.  The Linux drivers already have this
property and some systems want to customize it per-board in DTS.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../bindings/power/reset/gpio-restart.yaml    | 13 ++------
 .../bindings/power/reset/restart-handler.yaml | 30 +++++++++++++++++++
 2 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power/reset/restart-handler.yaml

diff --git a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
index a72d5c7215161..d3d18e0f5db38 100644
--- a/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
+++ b/Documentation/devicetree/bindings/power/reset/gpio-restart.yaml
@@ -25,6 +25,9 @@ description: >
   inactive-delay, the GPIO is driven active again.  After a delay specified by wait-delay, the
   restart handler completes allowing other restart handlers to be attempted.
 
+allOf:
+  - $ref: restart-handler.yaml#
+
 properties:
   compatible:
     const: gpio-restart
@@ -41,16 +44,6 @@ properties:
       in its inactive state.
 
   priority:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    description: |
-      A priority ranging from 0 to 255 (default 129) according to the following guidelines:
-
-        0:   Restart handler of last resort, with limited restart capabilities.
-        128: Default restart handler; use if no other restart handler is expected to be available,
-             and/or if restart functionality is sufficient to restart the entire system.
-        255: Highest priority restart handler, will preempt all other restart handlers.
-    minimum: 0
-    maximum: 255
     default: 129
 
   active-delay:
diff --git a/Documentation/devicetree/bindings/power/reset/restart-handler.yaml b/Documentation/devicetree/bindings/power/reset/restart-handler.yaml
new file mode 100644
index 0000000000000..1f9a2aac53c08
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/reset/restart-handler.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/restart-handler.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Restart and shutdown handler generic binding
+
+maintainers:
+  - Sebastian Reichel <sre@kernel.org>
+
+description:
+  Restart and shutdown handler device is responsible for powering off the
+  system, e.g. my cutting off the power.  System might have several restart
+  handlers, which usually are tried from most precise to last resort.
+
+properties:
+  priority:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      A priority ranging from 0 to 255 according to the following guidelines::
+        0::   Restart handler of last resort, with limited restart capabilities.
+        128:: Typical, default restart handler; use if no other restart handler
+              is expected to be available, and/or if restart functionality is
+              sufficient to restart the entire system.
+        255:: Highest priority restart handler, will preempt all other restart handlers.
+    minimum: 0
+    maximum: 255
+
+additionalProperties: true
-- 
GitLab


From 03b33d4ac41e4d0bb3e4654f21e06caba4ccb400 Mon Sep 17 00:00:00 2001
From: wangjianli <wangjianli@cdjrlc.com>
Date: Sat, 22 Oct 2022 13:45:44 +0800
Subject: [PATCH 0628/4122] power/supply: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: wangjianli <wangjianli@cdjrlc.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ab8500_charger.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index c19c50442761d..cf45a2c26b7f3 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -1940,7 +1940,7 @@ static int ab8500_charger_get_ext_psy_data(struct device *dev, void *data)
  *
  * Due to a asic bug it is necessary to lower the input current to the vbus
  * charger when charging with at some specific levels. This issue is only valid
- * for below a certain battery voltage. This function makes sure that the
+ * for below a certain battery voltage. This function makes sure that
  * the allowed current limit isn't exceeded.
  */
 static void ab8500_charger_check_vbat_work(struct work_struct *work)
-- 
GitLab


From 17c13c724b143c835fe3a9109daab524dff3d06f Mon Sep 17 00:00:00 2001
From: Maarten Zanders <maarten.zanders@mind.be>
Date: Fri, 28 Oct 2022 12:56:43 +0200
Subject: [PATCH 0629/4122] leds: lp5523: fix out-of-bounds bug in
 lp5523_selftest()

When not all LED channels of the led chip are configured, the
sysfs selftest functionality gives erroneous results and tries to
test all channels of the chip.
There is a potential for LED overcurrent conditions since the
test current will be set to values from out-of-bound regions.

It is wrong to use pdata->led_config[i].led_current to skip absent
channels as led_config[] only contains the configured LED channels.

Instead of iterating over all the physical channels of the device,
loop over the available LED configurations and use led->chan_nr to
access the correct i2c registers. Keep the zero-check for the LED
current as existing users might depend on this to disable a channel.

Reported-by: Arne Staessen <a.staessen@televic.com>
Signed-off-by: Maarten Zanders <maarten.zanders@mind.be>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-lp5523.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/leds/leds-lp5523.c b/drivers/leds/leds-lp5523.c
index 369d40b0b65b8..e08e3de1428df 100644
--- a/drivers/leds/leds-lp5523.c
+++ b/drivers/leds/leds-lp5523.c
@@ -581,8 +581,8 @@ static ssize_t lp5523_selftest(struct device *dev,
 	struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
 	struct lp55xx_chip *chip = led->chip;
 	struct lp55xx_platform_data *pdata = chip->pdata;
-	int i, ret, pos = 0;
-	u8 status, adc, vdd;
+	int ret, pos = 0;
+	u8 status, adc, vdd, i;
 
 	mutex_lock(&chip->lock);
 
@@ -612,20 +612,21 @@ static ssize_t lp5523_selftest(struct device *dev,
 
 	vdd--;	/* There may be some fluctuation in measurement */
 
-	for (i = 0; i < LP5523_MAX_LEDS; i++) {
-		/* Skip non-existing channels */
+	for (i = 0; i < pdata->num_channels; i++) {
+		/* Skip disabled channels */
 		if (pdata->led_config[i].led_current == 0)
 			continue;
 
 		/* Set default current */
-		lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + i,
+		lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + led->chan_nr,
 			pdata->led_config[i].led_current);
 
-		lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + i, 0xff);
+		lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + led->chan_nr,
+			     0xff);
 		/* let current stabilize 2 - 4ms before measurements start */
 		usleep_range(2000, 4000);
 		lp55xx_write(chip, LP5523_REG_LED_TEST_CTRL,
-			     LP5523_EN_LEDTEST | i);
+			     LP5523_EN_LEDTEST | led->chan_nr);
 		/* ADC conversion time is 2.7 ms typically */
 		usleep_range(3000, 6000);
 		ret = lp55xx_read(chip, LP5523_REG_STATUS, &status);
@@ -633,20 +634,22 @@ static ssize_t lp5523_selftest(struct device *dev,
 			goto fail;
 
 		if (!(status & LP5523_LEDTEST_DONE))
-			usleep_range(3000, 6000);/* Was not ready. Wait. */
+			usleep_range(3000, 6000); /* Was not ready. Wait. */
 
 		ret = lp55xx_read(chip, LP5523_REG_LED_TEST_ADC, &adc);
 		if (ret < 0)
 			goto fail;
 
 		if (adc >= vdd || adc < LP5523_ADC_SHORTCIRC_LIM)
-			pos += sprintf(buf + pos, "LED %d FAIL\n", i);
+			pos += sprintf(buf + pos, "LED %d FAIL\n",
+				       led->chan_nr);
 
-		lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + i, 0x00);
+		lp55xx_write(chip, LP5523_REG_LED_PWM_BASE + led->chan_nr,
+			     0x00);
 
 		/* Restore current */
-		lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + i,
-			led->led_current);
+		lp55xx_write(chip, LP5523_REG_LED_CURRENT_BASE + led->chan_nr,
+			     led->led_current);
 		led++;
 	}
 	if (pos == 0)
-- 
GitLab


From 05e88ebb9ecfe9631ccc6483a79b0eabf554da60 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:02 -0500
Subject: [PATCH 0630/4122] RDMA/rxe: Remove redundant header files

Remove unneeded include files.

Link: https://lore.kernel.org/r/20221021200118.2163-2-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_task.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index ec2b7de1c4972..3fbaba9eec396 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -4,10 +4,6 @@
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  */
 
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/hardirq.h>
-
 #include "rxe.h"
 
 int __rxe_do_task(struct rxe_task *task)
-- 
GitLab


From 98a54f170617746b5d09b18b23b295efc7a42a5e Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:03 -0500
Subject: [PATCH 0631/4122] RDMA/rxe: Remove init of task locks from rxe_qp.c

The calls to spin_lock_init() for the tasklet spinlocks in
rxe_qp_init_misc() are redundant since they are intiialized in
rxe_init_task().  This patch removes them.

Link: https://lore.kernel.org/r/20221021200118.2163-3-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index a62bab88415cb..57c3f05ad15b1 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -172,10 +172,6 @@ static void rxe_qp_init_misc(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	spin_lock_init(&qp->state_lock);
 
-	spin_lock_init(&qp->req.task.state_lock);
-	spin_lock_init(&qp->resp.task.state_lock);
-	spin_lock_init(&qp->comp.task.state_lock);
-
 	spin_lock_init(&qp->sq.sq_lock);
 	spin_lock_init(&qp->rq.producer_lock);
 	spin_lock_init(&qp->rq.consumer_lock);
-- 
GitLab


From de669ae8af49ceed0eed44f5b3d51dc62affc5e4 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:04 -0500
Subject: [PATCH 0632/4122] RDMA/rxe: Removed unused name from rxe_task struct

The name field in struct rxe_task is never used. This patch removes it.

Link: https://lore.kernel.org/r/20221021200118.2163-4-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c   | 9 +++------
 drivers/infiniband/sw/rxe/rxe_task.c | 4 +---
 drivers/infiniband/sw/rxe/rxe_task.h | 4 +---
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 57c3f05ad15b1..03bd9f3e9956a 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -238,10 +238,8 @@ static int rxe_qp_init_req(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	skb_queue_head_init(&qp->req_pkts);
 
-	rxe_init_task(&qp->req.task, qp,
-		      rxe_requester, "req");
-	rxe_init_task(&qp->comp.task, qp,
-		      rxe_completer, "comp");
+	rxe_init_task(&qp->req.task, qp, rxe_requester);
+	rxe_init_task(&qp->comp.task, qp, rxe_completer);
 
 	qp->qp_timeout_jiffies = 0; /* Can't be set for UD/UC in modify_qp */
 	if (init->qp_type == IB_QPT_RC) {
@@ -288,8 +286,7 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	skb_queue_head_init(&qp->resp_pkts);
 
-	rxe_init_task(&qp->resp.task, qp,
-		      rxe_responder, "resp");
+	rxe_init_task(&qp->resp.task, qp, rxe_responder);
 
 	qp->resp.opcode		= OPCODE_NONE;
 	qp->resp.msn		= 0;
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 3fbaba9eec396..0cbba455fefd8 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -90,12 +90,10 @@ void rxe_do_task(struct tasklet_struct *t)
 	task->ret = ret;
 }
 
-int rxe_init_task(struct rxe_task *task,
-		  void *arg, int (*func)(void *), char *name)
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
 {
 	task->arg	= arg;
 	task->func	= func;
-	snprintf(task->name, sizeof(task->name), "%s", name);
 	task->destroyed	= false;
 
 	tasklet_setup(&task->tasklet, rxe_do_task);
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 7f612a1c68a7b..b3dfd970d1dc6 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -25,7 +25,6 @@ struct rxe_task {
 	void			*arg;
 	int			(*func)(void *arg);
 	int			ret;
-	char			name[16];
 	bool			destroyed;
 };
 
@@ -34,8 +33,7 @@ struct rxe_task {
  *	arg  => parameter to pass to fcn
  *	func => function to call until it returns != 0
  */
-int rxe_init_task(struct rxe_task *task,
-		  void *arg, int (*func)(void *), char *name);
+int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *));
 
 /* cleanup task */
 void rxe_cleanup_task(struct rxe_task *task);
-- 
GitLab


From dccb23f6c312e4480fe32ccbc2afac1a5cac7e5e Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:05 -0500
Subject: [PATCH 0633/4122] RDMA/rxe: Split rxe_run_task() into two subroutines

Split rxe_run_task(task, sched) into rxe_run_task(task) and
rxe_sched_task(task).

Link: https://lore.kernel.org/r/20221021200118.2163-5-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c  | 19 +++++++++++--------
 drivers/infiniband/sw/rxe/rxe_net.c   |  4 ++--
 drivers/infiniband/sw/rxe/rxe_qp.c    | 10 +++++-----
 drivers/infiniband/sw/rxe/rxe_req.c   | 10 +++++-----
 drivers/infiniband/sw/rxe/rxe_resp.c  |  5 ++++-
 drivers/infiniband/sw/rxe/rxe_task.c  | 15 ++++++++++-----
 drivers/infiniband/sw/rxe/rxe_task.h  |  7 +++----
 drivers/infiniband/sw/rxe/rxe_verbs.c |  8 ++++----
 8 files changed, 44 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index c9170dd99f3aa..66f392810c860 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -118,7 +118,7 @@ void retransmit_timer(struct timer_list *t)
 
 	if (qp->valid) {
 		qp->comp.timeout = 1;
-		rxe_run_task(&qp->comp.task, 1);
+		rxe_sched_task(&qp->comp.task);
 	}
 }
 
@@ -132,7 +132,10 @@ void rxe_comp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
 	if (must_sched != 0)
 		rxe_counter_inc(SKB_TO_PKT(skb)->rxe, RXE_CNT_COMPLETER_SCHED);
 
-	rxe_run_task(&qp->comp.task, must_sched);
+	if (must_sched)
+		rxe_sched_task(&qp->comp.task);
+	else
+		rxe_run_task(&qp->comp.task);
 }
 
 static inline enum comp_state get_wqe(struct rxe_qp *qp,
@@ -313,7 +316,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 					qp->comp.psn = pkt->psn;
 					if (qp->req.wait_psn) {
 						qp->req.wait_psn = 0;
-						rxe_run_task(&qp->req.task, 0);
+						rxe_run_task(&qp->req.task);
 					}
 				}
 				return COMPST_ERROR_RETRY;
@@ -460,7 +463,7 @@ static void do_complete(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 	 */
 	if (qp->req.wait_fence) {
 		qp->req.wait_fence = 0;
-		rxe_run_task(&qp->req.task, 0);
+		rxe_run_task(&qp->req.task);
 	}
 }
 
@@ -474,7 +477,7 @@ static inline enum comp_state complete_ack(struct rxe_qp *qp,
 		if (qp->req.need_rd_atomic) {
 			qp->comp.timeout_retry = 0;
 			qp->req.need_rd_atomic = 0;
-			rxe_run_task(&qp->req.task, 0);
+			rxe_run_task(&qp->req.task);
 		}
 	}
 
@@ -520,7 +523,7 @@ static inline enum comp_state complete_wqe(struct rxe_qp *qp,
 
 		if (qp->req.wait_psn) {
 			qp->req.wait_psn = 0;
-			rxe_run_task(&qp->req.task, 1);
+			rxe_sched_task(&qp->req.task);
 		}
 	}
 
@@ -654,7 +657,7 @@ int rxe_completer(void *arg)
 
 			if (qp->req.wait_psn) {
 				qp->req.wait_psn = 0;
-				rxe_run_task(&qp->req.task, 1);
+				rxe_sched_task(&qp->req.task);
 			}
 
 			state = COMPST_DONE;
@@ -722,7 +725,7 @@ int rxe_completer(void *arg)
 							RXE_CNT_COMP_RETRY);
 					qp->req.need_retry = 1;
 					qp->comp.started_retry = 1;
-					rxe_run_task(&qp->req.task, 0);
+					rxe_run_task(&qp->req.task);
 				}
 				goto done;
 
diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index 35f327b9d4b8e..c36cad9c7a664 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -345,7 +345,7 @@ static void rxe_skb_tx_dtor(struct sk_buff *skb)
 
 	if (unlikely(qp->need_req_skb &&
 		     skb_out < RXE_INFLIGHT_SKBS_PER_QP_LOW))
-		rxe_run_task(&qp->req.task, 1);
+		rxe_sched_task(&qp->req.task);
 
 	rxe_put(qp);
 }
@@ -429,7 +429,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 	if ((qp_type(qp) != IB_QPT_RC) &&
 	    (pkt->mask & RXE_END_MASK)) {
 		pkt->wqe->state = wqe_state_done;
-		rxe_run_task(&qp->comp.task, 1);
+		rxe_sched_task(&qp->comp.task);
 	}
 
 	rxe_counter_inc(rxe, RXE_CNT_SENT_PKTS);
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 03bd9f3e9956a..3f6d62a80bea9 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -536,10 +536,10 @@ static void rxe_qp_drain(struct rxe_qp *qp)
 		if (qp->req.state != QP_STATE_DRAINED) {
 			qp->req.state = QP_STATE_DRAIN;
 			if (qp_type(qp) == IB_QPT_RC)
-				rxe_run_task(&qp->comp.task, 1);
+				rxe_sched_task(&qp->comp.task);
 			else
 				__rxe_do_task(&qp->comp.task);
-			rxe_run_task(&qp->req.task, 1);
+			rxe_sched_task(&qp->req.task);
 		}
 	}
 }
@@ -553,13 +553,13 @@ void rxe_qp_error(struct rxe_qp *qp)
 	qp->attr.qp_state = IB_QPS_ERR;
 
 	/* drain work and packet queues */
-	rxe_run_task(&qp->resp.task, 1);
+	rxe_sched_task(&qp->resp.task);
 
 	if (qp_type(qp) == IB_QPT_RC)
-		rxe_run_task(&qp->comp.task, 1);
+		rxe_sched_task(&qp->comp.task);
 	else
 		__rxe_do_task(&qp->comp.task);
-	rxe_run_task(&qp->req.task, 1);
+	rxe_sched_task(&qp->req.task);
 }
 
 /* called by the modify qp verb */
diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index f637712079705..41f1d84f0acbf 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -105,7 +105,7 @@ void rnr_nak_timer(struct timer_list *t)
 	/* request a send queue retry */
 	qp->req.need_retry = 1;
 	qp->req.wait_for_rnr_timer = 0;
-	rxe_run_task(&qp->req.task, 1);
+	rxe_sched_task(&qp->req.task);
 }
 
 static struct rxe_send_wqe *req_next_wqe(struct rxe_qp *qp)
@@ -608,7 +608,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 	 * which can lead to a deadlock. So go ahead and complete
 	 * it now.
 	 */
-	rxe_run_task(&qp->comp.task, 1);
+	rxe_sched_task(&qp->comp.task);
 
 	return 0;
 }
@@ -733,7 +733,7 @@ int rxe_requester(void *arg)
 						       qp->req.wqe_index);
 			wqe->state = wqe_state_done;
 			wqe->status = IB_WC_SUCCESS;
-			rxe_run_task(&qp->comp.task, 0);
+			rxe_run_task(&qp->comp.task);
 			goto done;
 		}
 		payload = mtu;
@@ -795,7 +795,7 @@ int rxe_requester(void *arg)
 		rollback_state(wqe, qp, &rollback_wqe, rollback_psn);
 
 		if (err == -EAGAIN) {
-			rxe_run_task(&qp->req.task, 1);
+			rxe_sched_task(&qp->req.task);
 			goto exit;
 		}
 
@@ -817,7 +817,7 @@ err:
 	qp->req.wqe_index = queue_next_index(qp->sq.queue, qp->req.wqe_index);
 	wqe->state = wqe_state_error;
 	qp->req.state = QP_STATE_ERROR;
-	rxe_run_task(&qp->comp.task, 0);
+	rxe_run_task(&qp->comp.task);
 exit:
 	ret = -EAGAIN;
 out:
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 95d372db934d7..c32bc12cc82f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -91,7 +91,10 @@ void rxe_resp_queue_pkt(struct rxe_qp *qp, struct sk_buff *skb)
 	must_sched = (pkt->opcode == IB_OPCODE_RC_RDMA_READ_REQUEST) ||
 			(skb_queue_len(&qp->req_pkts) > 1);
 
-	rxe_run_task(&qp->resp.task, must_sched);
+	if (must_sched)
+		rxe_sched_task(&qp->resp.task);
+	else
+		rxe_run_task(&qp->resp.task);
 }
 
 static inline enum resp_states get_req(struct rxe_qp *qp,
diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 0cbba455fefd8..442b7348acdc3 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -123,15 +123,20 @@ void rxe_cleanup_task(struct rxe_task *task)
 	tasklet_kill(&task->tasklet);
 }
 
-void rxe_run_task(struct rxe_task *task, int sched)
+void rxe_run_task(struct rxe_task *task)
 {
 	if (task->destroyed)
 		return;
 
-	if (sched)
-		tasklet_schedule(&task->tasklet);
-	else
-		rxe_do_task(&task->tasklet);
+	rxe_do_task(&task->tasklet);
+}
+
+void rxe_sched_task(struct rxe_task *task)
+{
+	if (task->destroyed)
+		return;
+
+	tasklet_schedule(&task->tasklet);
 }
 
 void rxe_disable_task(struct rxe_task *task)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index b3dfd970d1dc6..590b1c1d7e7ca 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -52,10 +52,9 @@ int __rxe_do_task(struct rxe_task *task);
  */
 void rxe_do_task(struct tasklet_struct *t);
 
-/* run a task, else schedule it to run as a tasklet, The decision
- * to run or schedule tasklet is based on the parameter sched.
- */
-void rxe_run_task(struct rxe_task *task, int sched);
+void rxe_run_task(struct rxe_task *task);
+
+void rxe_sched_task(struct rxe_task *task);
 
 /* keep a task from scheduling */
 void rxe_disable_task(struct rxe_task *task);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 88825edc7dce1..f2f82efbaf6d6 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -695,9 +695,9 @@ static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr,
 		wr = next;
 	}
 
-	rxe_run_task(&qp->req.task, 1);
+	rxe_sched_task(&qp->req.task);
 	if (unlikely(qp->req.state == QP_STATE_ERROR))
-		rxe_run_task(&qp->comp.task, 1);
+		rxe_sched_task(&qp->comp.task);
 
 	return err;
 }
@@ -719,7 +719,7 @@ static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
 
 	if (qp->is_user) {
 		/* Utilize process context to do protocol processing */
-		rxe_run_task(&qp->req.task, 0);
+		rxe_run_task(&qp->req.task);
 		return 0;
 	} else
 		return rxe_post_send_kernel(qp, wr, bad_wr);
@@ -759,7 +759,7 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 	spin_unlock_irqrestore(&rq->producer_lock, flags);
 
 	if (qp->resp.state == QP_STATE_ERROR)
-		rxe_run_task(&qp->resp.task, 1);
+		rxe_sched_task(&qp->resp.task);
 
 err1:
 	return err;
-- 
GitLab


From dcef28528cce82a82134abd393aa0f38f2edf77e Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:06 -0500
Subject: [PATCH 0634/4122] RDMA/rxe: Make rxe_do_task static

The subroutine rxe_do_task() is only called in rxe_task.c. This patch
makes it static and renames it do_task().

Link: https://lore.kernel.org/r/20221021200118.2163-6-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_task.c | 6 +++---
 drivers/infiniband/sw/rxe/rxe_task.h | 8 --------
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 442b7348acdc3..fb953f5195b87 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -24,7 +24,7 @@ int __rxe_do_task(struct rxe_task *task)
  * a second caller finds the task already running
  * but looks just after the last call to func
  */
-void rxe_do_task(struct tasklet_struct *t)
+static void do_task(struct tasklet_struct *t)
 {
 	int cont;
 	int ret;
@@ -96,7 +96,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
 	task->func	= func;
 	task->destroyed	= false;
 
-	tasklet_setup(&task->tasklet, rxe_do_task);
+	tasklet_setup(&task->tasklet, do_task);
 
 	task->state = TASK_STATE_START;
 	spin_lock_init(&task->state_lock);
@@ -128,7 +128,7 @@ void rxe_run_task(struct rxe_task *task)
 	if (task->destroyed)
 		return;
 
-	rxe_do_task(&task->tasklet);
+	do_task(&task->tasklet);
 }
 
 void rxe_sched_task(struct rxe_task *task)
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 590b1c1d7e7ca..99e0173e5c46d 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -44,14 +44,6 @@ void rxe_cleanup_task(struct rxe_task *task);
  */
 int __rxe_do_task(struct rxe_task *task);
 
-/*
- * common function called by any of the main tasklets
- * If there is any chance that there is additional
- * work to do someone must reschedule the task before
- * leaving
- */
-void rxe_do_task(struct tasklet_struct *t);
-
 void rxe_run_task(struct rxe_task *task);
 
 void rxe_sched_task(struct rxe_task *task);
-- 
GitLab


From 63a18baef2653f59a7c5b990283628bd54d062fd Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Fri, 21 Oct 2022 15:01:07 -0500
Subject: [PATCH 0635/4122] RDMA/rxe: Rename task->state_lock to task->lock

Rename task-state_lock to task->lock

Link: https://lore.kernel.org/r/20221021200118.2163-7-rpearsonhpe@gmail.com
Signed-off-by: Ian Ziemba <ian.ziemba@hpe.com>
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_task.c | 18 +++++++++---------
 drivers/infiniband/sw/rxe/rxe_task.h |  2 +-
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index fb953f5195b87..0208d833a41b9 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -31,22 +31,22 @@ static void do_task(struct tasklet_struct *t)
 	struct rxe_task *task = from_tasklet(task, t, tasklet);
 	unsigned int iterations = RXE_MAX_ITERATIONS;
 
-	spin_lock_bh(&task->state_lock);
+	spin_lock_bh(&task->lock);
 	switch (task->state) {
 	case TASK_STATE_START:
 		task->state = TASK_STATE_BUSY;
-		spin_unlock_bh(&task->state_lock);
+		spin_unlock_bh(&task->lock);
 		break;
 
 	case TASK_STATE_BUSY:
 		task->state = TASK_STATE_ARMED;
 		fallthrough;
 	case TASK_STATE_ARMED:
-		spin_unlock_bh(&task->state_lock);
+		spin_unlock_bh(&task->lock);
 		return;
 
 	default:
-		spin_unlock_bh(&task->state_lock);
+		spin_unlock_bh(&task->lock);
 		pr_warn("%s failed with bad state %d\n", __func__, task->state);
 		return;
 	}
@@ -55,7 +55,7 @@ static void do_task(struct tasklet_struct *t)
 		cont = 0;
 		ret = task->func(task->arg);
 
-		spin_lock_bh(&task->state_lock);
+		spin_lock_bh(&task->lock);
 		switch (task->state) {
 		case TASK_STATE_BUSY:
 			if (ret) {
@@ -84,7 +84,7 @@ static void do_task(struct tasklet_struct *t)
 			pr_warn("%s failed with bad state %d\n", __func__,
 				task->state);
 		}
-		spin_unlock_bh(&task->state_lock);
+		spin_unlock_bh(&task->lock);
 	} while (cont);
 
 	task->ret = ret;
@@ -99,7 +99,7 @@ int rxe_init_task(struct rxe_task *task, void *arg, int (*func)(void *))
 	tasklet_setup(&task->tasklet, do_task);
 
 	task->state = TASK_STATE_START;
-	spin_lock_init(&task->state_lock);
+	spin_lock_init(&task->lock);
 
 	return 0;
 }
@@ -115,9 +115,9 @@ void rxe_cleanup_task(struct rxe_task *task)
 	task->destroyed = true;
 
 	do {
-		spin_lock_bh(&task->state_lock);
+		spin_lock_bh(&task->lock);
 		idle = (task->state == TASK_STATE_START);
-		spin_unlock_bh(&task->state_lock);
+		spin_unlock_bh(&task->lock);
 	} while (!idle);
 
 	tasklet_kill(&task->tasklet);
diff --git a/drivers/infiniband/sw/rxe/rxe_task.h b/drivers/infiniband/sw/rxe/rxe_task.h
index 99e0173e5c46d..7b88129702ac6 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.h
+++ b/drivers/infiniband/sw/rxe/rxe_task.h
@@ -21,7 +21,7 @@ enum {
 struct rxe_task {
 	struct tasklet_struct	tasklet;
 	int			state;
-	spinlock_t		state_lock; /* spinlock for task state */
+	spinlock_t		lock;
 	void			*arg;
 	int			(*func)(void *arg);
 	int			ret;
-- 
GitLab


From 46db0ba12be6cfd043df1d09b5831cb84ce2307f Mon Sep 17 00:00:00 2001
From: Qiang Yu <quic_qianyu@quicinc.com>
Date: Mon, 10 Oct 2022 12:32:04 +0800
Subject: [PATCH 0636/4122] bus: mhi: host: Use mhi_soc_reset() API in place of
 register write

Currently, a direct register write is used when ramdump collection
in panic path occurs. Replace that with new mhi_soc_reset() API
such that a controller defined reset() function is exercised if
one is present and the regular SOC reset is done if it is not.

Signed-off-by: Qiang Yu <quic_qianyu@quicinc.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/1665376324-34258-1-git-send-email-quic_qianyu@quicinc.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/boot.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/bus/mhi/host/boot.c b/drivers/bus/mhi/host/boot.c
index 26d0eddb14771..1c69feee17030 100644
--- a/drivers/bus/mhi/host/boot.c
+++ b/drivers/bus/mhi/host/boot.c
@@ -118,9 +118,7 @@ static int __mhi_download_rddm_in_panic(struct mhi_controller *mhi_cntrl)
 			/* Hardware reset so force device to enter RDDM */
 			dev_dbg(dev,
 				"Did not enter RDDM, do a host req reset\n");
-			mhi_write_reg(mhi_cntrl, mhi_cntrl->regs,
-				      MHI_SOC_RESET_REQ_OFFSET,
-				      MHI_SOC_RESET_REQ);
+			mhi_soc_reset(mhi_cntrl);
 			udelay(delayus);
 		}
 
-- 
GitLab


From 869a99907faea6d1835b0bd0d0422ae3519c6ea9 Mon Sep 17 00:00:00 2001
From: Qiang Yu <quic_qianyu@quicinc.com>
Date: Sun, 16 Oct 2022 11:05:32 +0800
Subject: [PATCH 0637/4122] bus: mhi: host: Fix race between channel
 preparation and M0 event

There is a race condition where mhi_prepare_channel() updates the
read and write pointers as the base address and in parallel, if
an M0 transition occurs, the tasklet goes ahead and rings
doorbells for all channels with a delta in TRE rings assuming
they are already enabled. This causes a null pointer access. Fix
it by adding a channel enabled check before ringing channel
doorbells.

Cc: stable@vger.kernel.org # 5.19
Fixes: a6e2e3522f29 "bus: mhi: core: Add support for PM state transitions"
Signed-off-by: Qiang Yu <quic_qianyu@quicinc.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/1665889532-13634-1-git-send-email-quic_qianyu@quicinc.com
[mani: CCed stable list]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/pm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c
index 4a42186ff1112..083459028a4b8 100644
--- a/drivers/bus/mhi/host/pm.c
+++ b/drivers/bus/mhi/host/pm.c
@@ -301,7 +301,8 @@ int mhi_pm_m0_transition(struct mhi_controller *mhi_cntrl)
 		read_lock_irq(&mhi_chan->lock);
 
 		/* Only ring DB if ring is not empty */
-		if (tre_ring->base && tre_ring->wp  != tre_ring->rp)
+		if (tre_ring->base && tre_ring->wp  != tre_ring->rp &&
+		    mhi_chan->ch_state == MHI_CH_STATE_ENABLED)
 			mhi_ring_chan_db(mhi_cntrl, mhi_chan);
 		read_unlock_irq(&mhi_chan->lock);
 	}
-- 
GitLab


From 2d5253a096c6057bbf7caa5520856dcdf7eca8bb Mon Sep 17 00:00:00 2001
From: Fabio Porcedda <fabio.porcedda@gmail.com>
Date: Fri, 16 Sep 2022 16:43:29 +0200
Subject: [PATCH 0638/4122] bus: mhi: host: pci_generic: Add a secondary AT
 port to Telit FN990

Add a secondary AT port using one of OEM reserved channel.

Signed-off-by: Fabio Porcedda <fabio.porcedda@gmail.com>
Reviewed-by: Loic Poulain <loic.poulain@linaro.org>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/20220916144329.243368-3-fabio.porcedda@gmail.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/pci_generic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index caa4ce28cf9e5..c4259cb2d2899 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -340,6 +340,8 @@ static const struct mhi_channel_config mhi_foxconn_sdx55_channels[] = {
 	MHI_CHANNEL_CONFIG_DL(13, "MBIM", 32, 0),
 	MHI_CHANNEL_CONFIG_UL(32, "DUN", 32, 0),
 	MHI_CHANNEL_CONFIG_DL(33, "DUN", 32, 0),
+	MHI_CHANNEL_CONFIG_UL(92, "DUN2", 32, 1),
+	MHI_CHANNEL_CONFIG_DL(93, "DUN2", 32, 1),
 	MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0_MBIM", 128, 2),
 	MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0_MBIM", 128, 3),
 };
-- 
GitLab


From 875ab4a8d9a7e559c4aaad28f5886d39923301b7 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 27 Sep 2022 13:53:27 +0800
Subject: [PATCH 0639/4122] RDMA/rxe: Make sure requested access is a subset of
 {mr,mw}->access

We should reject the requests with access flags that is not registered by
MR/MW. For example, lookup_mr() should return NULL when requested access
is 0x03 and mr->access is 0x01.

Link: https://lore.kernel.org/r/20220927055337.22630-2-lizhijian@fujitsu.com
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 2 +-
 drivers/infiniband/sw/rxe/rxe_mw.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index d4f10c2d1aa79..014c27bba049c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -511,7 +511,7 @@ struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
 
 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
-		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
+		     mr_pd(mr) != pd || ((access & mr->access) != access) ||
 		     mr->state != RXE_MR_STATE_VALID)) {
 		rxe_put(mr);
 		mr = NULL;
diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 902b7df7aaedb..8df1c9066ed89 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -293,8 +293,7 @@ struct rxe_mw *rxe_lookup_mw(struct rxe_qp *qp, int access, u32 rkey)
 
 	if (unlikely((mw->rkey != rkey) || rxe_mw_pd(mw) != pd ||
 		     (mw->ibmw.type == IB_MW_TYPE_2 && mw->qp != qp) ||
-		     (mw->length == 0) ||
-		     (access && !(access & mw->access)) ||
+		     (mw->length == 0) || ((access & mw->access) != access) ||
 		     mw->state != RXE_MW_STATE_VALID)) {
 		rxe_put(mw);
 		return NULL;
-- 
GitLab


From b071850ef62e36b2fc2ec81863f07be857151409 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 27 Oct 2022 07:31:33 +0000
Subject: [PATCH 0640/4122] RDMA/rxe: Remove the duplicate assignment of
 mr->map_shift

mr->map_shift is set to ilog2(RXE_BUF_PER_MAP) in both rxe_mr_init() and
rxe_mr_alloc() so remove the duplicate one in rxe_mr_init().

Link: https://lore.kernel.org/r/1666855893-145-1-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 014c27bba049c..bc081002bddcb 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -62,7 +62,6 @@ static void rxe_mr_init(int access, struct rxe_mr *mr)
 	mr->rkey = mr->ibmr.rkey = rkey;
 
 	mr->state = RXE_MR_STATE_INVALID;
-	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
 }
 
 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
-- 
GitLab


From 692373d186205dfb1b56f35f22702412d94d9420 Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Fri, 28 Oct 2022 15:50:53 +0800
Subject: [PATCH 0641/4122] RDMA/rxe: cleanup some error handling in
 rxe_verbs.c

Instead of 'goto and return', just return directly to
simplify the error handling, and avoid some unnecessary
return value check.

Link: https://lore.kernel.org/r/20221028075053.3990467-1-xuhaoyue1@hisilicon.com
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 80 ++++++++-------------------
 1 file changed, 23 insertions(+), 57 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index f2f82efbaf6d6..bcdfdadaebbc8 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -238,7 +238,6 @@ static int rxe_destroy_ah(struct ib_ah *ibah, u32 flags)
 
 static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 {
-	int err;
 	int i;
 	u32 length;
 	struct rxe_recv_wqe *recv_wqe;
@@ -246,15 +245,11 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 	int full;
 
 	full = queue_full(rq->queue, QUEUE_TYPE_TO_DRIVER);
-	if (unlikely(full)) {
-		err = -ENOMEM;
-		goto err1;
-	}
+	if (unlikely(full))
+		return -ENOMEM;
 
-	if (unlikely(num_sge > rq->max_sge)) {
-		err = -EINVAL;
-		goto err1;
-	}
+	if (unlikely(num_sge > rq->max_sge))
+		return -EINVAL;
 
 	length = 0;
 	for (i = 0; i < num_sge; i++)
@@ -275,9 +270,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr)
 	queue_advance_producer(rq->queue, QUEUE_TYPE_TO_DRIVER);
 
 	return 0;
-
-err1:
-	return err;
 }
 
 static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init,
@@ -343,10 +335,7 @@ static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr,
 	if (err)
 		return err;
 
-	err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
-	if (err)
-		return err;
-	return 0;
+	return rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata);
 }
 
 static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr)
@@ -453,11 +442,11 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 
 	err = rxe_qp_chk_attr(rxe, qp, attr, mask);
 	if (err)
-		goto err1;
+		return err;
 
 	err = rxe_qp_from_attr(qp, attr, mask, udata);
 	if (err)
-		goto err1;
+		return err;
 
 	if ((mask & IB_QP_AV) && (attr->ah_attr.ah_flags & IB_AH_GRH))
 		qp->src_port = rdma_get_udp_sport(attr->ah_attr.grh.flow_label,
@@ -465,9 +454,6 @@ static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 						  qp->attr.dest_qp_num);
 
 	return 0;
-
-err1:
-	return err;
 }
 
 static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
@@ -501,24 +487,21 @@ static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr,
 	struct rxe_sq *sq = &qp->sq;
 
 	if (unlikely(num_sge > sq->max_sge))
-		goto err1;
+		return -EINVAL;
 
 	if (unlikely(mask & WR_ATOMIC_MASK)) {
 		if (length < 8)
-			goto err1;
+			return -EINVAL;
 
 		if (atomic_wr(ibwr)->remote_addr & 0x7)
-			goto err1;
+			return -EINVAL;
 	}
 
 	if (unlikely((ibwr->send_flags & IB_SEND_INLINE) &&
 		     (length > sq->max_inline)))
-		goto err1;
+		return -EINVAL;
 
 	return 0;
-
-err1:
-	return -EINVAL;
 }
 
 static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr,
@@ -735,14 +718,12 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 
 	if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) {
 		*bad_wr = wr;
-		err = -EINVAL;
-		goto err1;
+		return -EINVAL;
 	}
 
 	if (unlikely(qp->srq)) {
 		*bad_wr = wr;
-		err = -EINVAL;
-		goto err1;
+		return -EINVAL;
 	}
 
 	spin_lock_irqsave(&rq->producer_lock, flags);
@@ -761,7 +742,6 @@ static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
 	if (qp->resp.state == QP_STATE_ERROR)
 		rxe_sched_task(&qp->resp.task);
 
-err1:
 	return err;
 }
 
@@ -826,16 +806,9 @@ static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata)
 
 	err = rxe_cq_chk_attr(rxe, cq, cqe, 0);
 	if (err)
-		goto err1;
-
-	err = rxe_cq_resize_queue(cq, cqe, uresp, udata);
-	if (err)
-		goto err1;
-
-	return 0;
+		return err;
 
-err1:
-	return err;
+	return rxe_cq_resize_queue(cq, cqe, uresp, udata);
 }
 
 static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
@@ -921,26 +894,22 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 	struct rxe_mr *mr;
 
 	mr = rxe_alloc(&rxe->mr_pool);
-	if (!mr) {
-		err = -ENOMEM;
-		goto err2;
-	}
-
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
 
 	rxe_get(pd);
 	mr->ibmr.pd = ibpd;
 
 	err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
 	if (err)
-		goto err3;
+		goto err1;
 
 	rxe_finalize(mr);
 
 	return &mr->ibmr;
 
-err3:
+err1:
 	rxe_cleanup(mr);
-err2:
 	return ERR_PTR(err);
 }
 
@@ -956,25 +925,22 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 		return ERR_PTR(-EINVAL);
 
 	mr = rxe_alloc(&rxe->mr_pool);
-	if (!mr) {
-		err = -ENOMEM;
-		goto err1;
-	}
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
 
 	rxe_get(pd);
 	mr->ibmr.pd = ibpd;
 
 	err = rxe_mr_init_fast(max_num_sg, mr);
 	if (err)
-		goto err2;
+		goto err1;
 
 	rxe_finalize(mr);
 
 	return &mr->ibmr;
 
-err2:
-	rxe_cleanup(mr);
 err1:
+	rxe_cleanup(mr);
 	return ERR_PTR(err);
 }
 
-- 
GitLab


From 1b52861f0e04da43013f88dd56464b5719a974e3 Mon Sep 17 00:00:00 2001
From: Jinyu Tang <tjytimi@163.com>
Date: Sun, 9 Oct 2022 21:45:03 +0800
Subject: [PATCH 0642/4122] riscv: support update_mmu_tlb()

Add macro definition to support update_mmu_tlb() for riscv,
this function is from commit:7df676974359 ("mm/memory.c:Update
local TLB if PTE entry exists").

update_mmu_tlb() is used when a thread notice that other cpu thread
has handled the fault and changed the PTE. For MIPS, it's worth to
do that,this cpu thread will trap in tlb fault again otherwise.

For RISCV, it's also better to flush local tlb than do nothing in
update_mmu_tlb(). There are two kinds of page fault that have
update_mmu_tlb() inside:

1.page fault which PTE is NOT none, only protection check error,
like write protection fault. If updata_mmu_tlb() is empty, after
finsh page fault this time and re-execute, cpu will find address
but protection checked error in tlb again. So this will cause
another page fault. PTE in memory is good now,so update_mmu_cache()
in handle_pte_fault() will be executed. If updata_mmu_tlb() is not
empty flush local tlb, cpu won't find this address in tlb next time,
and get entry in physical memory, so it won't cause another page
fault.

2.page fault which PTE is none or swapped.
For this case, this cpu thread won't cause another page fault,cpu
will have tlb miss when re-execute, and get entry in memory
directly. But "set pte in phycial memory and flush local tlb" is
pratice in Linux, it's better to flush local tlb if it find entry
in phycial memory has changed.

Maybe it's same for other ARCH which can't detect PTE changed and
update it in local tlb automatically.

Signed-off-by: Jinyu Tang <tjytimi@163.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20221009134503.18783-1-tjytimi@163.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/pgtable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ec936910a96e..c61ae83aadee8 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -418,6 +418,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	local_flush_tlb_page(address);
 }
 
+#define __HAVE_ARCH_UPDATE_MMU_TLB
+#define update_mmu_tlb update_mmu_cache
+
 static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 		unsigned long address, pmd_t *pmdp)
 {
-- 
GitLab


From 3558927fc2b2fd0af309648f4071035e08719866 Mon Sep 17 00:00:00 2001
From: Cleo John <waterdev@galaxycrow.de>
Date: Mon, 10 Oct 2022 20:28:48 +0200
Subject: [PATCH 0643/4122] riscv: fix styling in ucontext header

Change the two comments in ucontext.h by getting them up to
the coding style proposed by torvalds.

Signed-off-by: Cleo John <waterdev@galaxycrow.de>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221010182848.GA28029@watet-ms7b87
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/uapi/asm/ucontext.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/include/uapi/asm/ucontext.h b/arch/riscv/include/uapi/asm/ucontext.h
index 44eb993950e5e..516bd0bb0da5c 100644
--- a/arch/riscv/include/uapi/asm/ucontext.h
+++ b/arch/riscv/include/uapi/asm/ucontext.h
@@ -15,19 +15,23 @@ struct ucontext {
 	struct ucontext	 *uc_link;
 	stack_t		  uc_stack;
 	sigset_t	  uc_sigmask;
-	/* There's some padding here to allow sigset_t to be expanded in the
+	/*
+	 * There's some padding here to allow sigset_t to be expanded in the
 	 * future.  Though this is unlikely, other architectures put uc_sigmask
 	 * at the end of this structure and explicitly state it can be
-	 * expanded, so we didn't want to box ourselves in here. */
+	 * expanded, so we didn't want to box ourselves in here.
+	 */
 	__u8		  __unused[1024 / 8 - sizeof(sigset_t)];
-	/* We can't put uc_sigmask at the end of this structure because we need
+	/*
+	 * We can't put uc_sigmask at the end of this structure because we need
 	 * to be able to expand sigcontext in the future.  For example, the
 	 * vector ISA extension will almost certainly add ISA state.  We want
 	 * to ensure all user-visible ISA state can be saved and restored via a
 	 * ucontext, so we're putting this at the end in order to allow for
 	 * infinite extensibility.  Since we know this will be extended and we
 	 * assume sigset_t won't be extended an extreme amount, we're
-	 * prioritizing this. */
+	 * prioritizing this.
+	 */
 	struct sigcontext uc_mcontext;
 };
 
-- 
GitLab


From 03699f271de1f4df6369cd379506539cd7d590d3 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 2 Sep 2022 14:33:44 -0700
Subject: [PATCH 0644/4122] string: Rewrite and add more kern-doc for the
 str*() functions

While there were varying degrees of kern-doc for various str*()-family
functions, many needed updating and clarification, or to just be
entirely written. Update (and relocate) existing kern-doc and add missing
functions, sadly shaking my head at how many times I have written "Do
not use this function". Include the results in the core kernel API doc.

Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Andy Shevchenko <andy@kernel.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: linux-hardening@vger.kernel.org
Tested-by: Akira Yokosawa <akiyks@gmail.com>
Link: https://lore.kernel.org/lkml/9b0cf584-01b3-3013-b800-1ef59fe82476@gmail.com
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/core-api/kernel-api.rst |   3 +
 include/linux/fortify-string.h        | 133 ++++++++++++++++++++++++--
 lib/string.c                          |  82 ----------------
 scripts/kernel-doc                    |   6 +-
 4 files changed, 131 insertions(+), 93 deletions(-)

diff --git a/Documentation/core-api/kernel-api.rst b/Documentation/core-api/kernel-api.rst
index 06f4ab1226979..0d0c4f87057cb 100644
--- a/Documentation/core-api/kernel-api.rst
+++ b/Documentation/core-api/kernel-api.rst
@@ -36,6 +36,9 @@ String Conversions
 String Manipulation
 -------------------
 
+.. kernel-doc:: include/linux/fortify-string.h
+   :internal:
+
 .. kernel-doc:: lib/string.c
    :export:
 
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 0f00a551939a4..e5b39b1cc2fc1 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -106,13 +106,13 @@ extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size)
  * Instead, please choose an alternative, so that the expectation
  * of @p's contents is unambiguous:
  *
- * +--------------------+-----------------+------------+
- * | @p needs to be:    | padded to @size | not padded |
- * +====================+=================+============+
- * |     NUL-terminated | strscpy_pad()   | strscpy()  |
- * +--------------------+-----------------+------------+
- * | not NUL-terminated | strtomem_pad()  | strtomem() |
- * +--------------------+-----------------+------------+
+ * +--------------------+--------------------+------------+
+ * | **p** needs to be: | padded to **size** | not padded |
+ * +====================+====================+============+
+ * |     NUL-terminated | strscpy_pad()      | strscpy()  |
+ * +--------------------+--------------------+------------+
+ * | not NUL-terminated | strtomem_pad()     | strtomem() |
+ * +--------------------+--------------------+------------+
  *
  * Note strscpy*()'s differing return values for detecting truncation,
  * and strtomem*()'s expectation that the destination is marked with
@@ -131,6 +131,21 @@ char *strncpy(char * const POS p, const char *q, __kernel_size_t size)
 	return __underlying_strncpy(p, q, size);
 }
 
+/**
+ * strcat - Append a string to an existing string
+ *
+ * @p: pointer to NUL-terminated string to append to
+ * @q: pointer to NUL-terminated source string to append from
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the
+ * destination buffer size is known to the compiler. Prefer
+ * building the string with formatting, via scnprintf() or similar.
+ * At the very least, use strncat().
+ *
+ * Returns @p.
+ *
+ */
 __FORTIFY_INLINE __diagnose_as(__builtin_strcat, 1, 2)
 char *strcat(char * const POS p, const char *q)
 {
@@ -144,6 +159,16 @@ char *strcat(char * const POS p, const char *q)
 }
 
 extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+/**
+ * strnlen - Return bounded count of characters in a NUL-terminated string
+ *
+ * @p: pointer to NUL-terminated string to count.
+ * @maxlen: maximum number of characters to count.
+ *
+ * Returns number of characters in @p (NOT including the final NUL), or
+ * @maxlen, if no NUL has been found up to there.
+ *
+ */
 __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size_t maxlen)
 {
 	size_t p_size = __member_size(p);
@@ -169,6 +194,19 @@ __FORTIFY_INLINE __kernel_size_t strnlen(const char * const POS p, __kernel_size
  * possible for strlen() to be used on compile-time strings for use in
  * static initializers (i.e. as a constant expression).
  */
+/**
+ * strlen - Return count of characters in a NUL-terminated string
+ *
+ * @p: pointer to NUL-terminated string to count.
+ *
+ * Do not use this function unless the string length is known at
+ * compile-time. When @p is unterminated, this function may crash
+ * or return unexpected counts that could lead to memory content
+ * exposures. Prefer strnlen().
+ *
+ * Returns number of characters in @p (NOT including the final NUL).
+ *
+ */
 #define strlen(p)							\
 	__builtin_choose_expr(__is_constexpr(__builtin_strlen(p)),	\
 		__builtin_strlen(p), __fortify_strlen(p))
@@ -187,8 +225,26 @@ __kernel_size_t __fortify_strlen(const char * const POS p)
 	return ret;
 }
 
-/* defined after fortified strlen to reuse it */
+/* Defined after fortified strlen() to reuse it. */
 extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+/**
+ * strlcpy - Copy a string into another string buffer
+ *
+ * @p: pointer to destination of copy
+ * @q: pointer to NUL-terminated source string to copy
+ * @size: maximum number of bytes to write at @p
+ *
+ * If strlen(@q) >= @size, the copy of @q will be truncated at
+ * @size - 1 bytes. @p will always be NUL-terminated.
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * over-reads when calculating strlen(@q), it is still possible.
+ * Prefer strscpy(), though note its different return values for
+ * detecting truncation.
+ *
+ * Returns total number of bytes written to @p, including terminating NUL.
+ *
+ */
 __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, size_t size)
 {
 	size_t p_size = __member_size(p);
@@ -214,8 +270,32 @@ __FORTIFY_INLINE size_t strlcpy(char * const POS p, const char * const POS q, si
 	return q_len;
 }
 
-/* defined after fortified strnlen to reuse it */
+/* Defined after fortified strnlen() to reuse it. */
 extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ *
+ * @p: Where to copy the string to
+ * @q: Where to copy the string from
+ * @size: Size of destination buffer
+ *
+ * Copy the source string @p, or as much of it as fits, into the destination
+ * @q buffer. The behavior is undefined if the string buffers overlap. The
+ * destination @p buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Preferred to strlcpy() since the API doesn't require reading memory
+ * from the source @q string beyond the specified @size bytes, and since
+ * the return value is easier to error-check than strlcpy()'s.
+ * In addition, the implementation is robust to the string changing out
+ * from underneath it, unlike the current strlcpy() implementation.
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zero padded. If padding is desired please use strscpy_pad().
+ *
+ * Returns the number of characters copied in @p (not including the
+ * trailing %NUL) or -E2BIG if @size is 0 or the copy of @q was truncated.
+ */
 __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, size_t size)
 {
 	size_t len;
@@ -261,7 +341,26 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
 	return __real_strscpy(p, q, len);
 }
 
-/* defined after fortified strlen and strnlen to reuse them */
+/**
+ * strncat - Append a string to an existing string
+ *
+ * @p: pointer to NUL-terminated string to append to
+ * @q: pointer to source string to append from
+ * @count: Maximum bytes to read from @q
+ *
+ * Appends at most @count bytes from @q (stopping at the first
+ * NUL byte) after the NUL-terminated string at @p. @p will be
+ * NUL-terminated.
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * read and write overflows, this is only possible when the sizes
+ * of @p and @q are known to the compiler. Prefer building the
+ * string with formatting, via scnprintf() or similar.
+ *
+ * Returns @p.
+ *
+ */
+/* Defined after fortified strlen() and strnlen() to reuse them. */
 __FORTIFY_INLINE __diagnose_as(__builtin_strncat, 1, 2, 3)
 char *strncat(char * const POS p, const char * const POS q, __kernel_size_t count)
 {
@@ -572,6 +671,20 @@ __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp
 	return __real_kmemdup(p, size, gfp);
 }
 
+/**
+ * strcpy - Copy a string into another string buffer
+ *
+ * @p: pointer to destination of copy
+ * @q: pointer to NUL-terminated source string to copy
+ *
+ * Do not use this function. While FORTIFY_SOURCE tries to avoid
+ * overflows, this is only possible when the sizes of @q and @p are
+ * known to the compiler. Prefer strscpy(), though note its different
+ * return values for detecting truncation.
+ *
+ * Returns @p.
+ *
+ */
 /* Defined after fortified strlen to reuse it. */
 __FORTIFY_INLINE __diagnose_as(__builtin_strcpy, 1, 2)
 char *strcpy(char * const POS p, const char * const POS q)
diff --git a/lib/string.c b/lib/string.c
index 3371d26a0e390..4fb566ea610ff 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -76,11 +76,6 @@ EXPORT_SYMBOL(strcasecmp);
 #endif
 
 #ifndef __HAVE_ARCH_STRCPY
-/**
- * strcpy - Copy a %NUL terminated string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- */
 char *strcpy(char *dest, const char *src)
 {
 	char *tmp = dest;
@@ -93,19 +88,6 @@ EXPORT_SYMBOL(strcpy);
 #endif
 
 #ifndef __HAVE_ARCH_STRNCPY
-/**
- * strncpy - Copy a length-limited, C-string
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @count: The maximum number of bytes to copy
- *
- * The result is not %NUL-terminated if the source exceeds
- * @count bytes.
- *
- * In the case where the length of @src is less than  that  of
- * count, the remainder of @dest will be padded with %NUL.
- *
- */
 char *strncpy(char *dest, const char *src, size_t count)
 {
 	char *tmp = dest;
@@ -122,17 +104,6 @@ EXPORT_SYMBOL(strncpy);
 #endif
 
 #ifndef __HAVE_ARCH_STRLCPY
-/**
- * strlcpy - Copy a C-string into a sized buffer
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @size: size of destination buffer
- *
- * Compatible with ``*BSD``: the result is always a valid
- * NUL-terminated string that fits in the buffer (unless,
- * of course, the buffer size is zero). It does not pad
- * out the result like strncpy() does.
- */
 size_t strlcpy(char *dest, const char *src, size_t size)
 {
 	size_t ret = strlen(src);
@@ -148,30 +119,6 @@ EXPORT_SYMBOL(strlcpy);
 #endif
 
 #ifndef __HAVE_ARCH_STRSCPY
-/**
- * strscpy - Copy a C-string into a sized buffer
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @count: Size of destination buffer
- *
- * Copy the string, or as much of it as fits, into the dest buffer.  The
- * behavior is undefined if the string buffers overlap.  The destination
- * buffer is always NUL terminated, unless it's zero-sized.
- *
- * Preferred to strlcpy() since the API doesn't require reading memory
- * from the src string beyond the specified "count" bytes, and since
- * the return value is easier to error-check than strlcpy()'s.
- * In addition, the implementation is robust to the string changing out
- * from underneath it, unlike the current strlcpy() implementation.
- *
- * Preferred to strncpy() since it always returns a valid string, and
- * doesn't unnecessarily force the tail of the destination buffer to be
- * zeroed.  If zeroing is desired please use strscpy_pad().
- *
- * Returns:
- * * The number of characters copied (not including the trailing %NUL)
- * * -E2BIG if count is 0 or @src was truncated.
- */
 ssize_t strscpy(char *dest, const char *src, size_t count)
 {
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
@@ -266,11 +213,6 @@ char *stpcpy(char *__restrict__ dest, const char *__restrict__ src)
 EXPORT_SYMBOL(stpcpy);
 
 #ifndef __HAVE_ARCH_STRCAT
-/**
- * strcat - Append one %NUL-terminated string to another
- * @dest: The string to be appended to
- * @src: The string to append to it
- */
 char *strcat(char *dest, const char *src)
 {
 	char *tmp = dest;
@@ -285,15 +227,6 @@ EXPORT_SYMBOL(strcat);
 #endif
 
 #ifndef __HAVE_ARCH_STRNCAT
-/**
- * strncat - Append a length-limited, C-string to another
- * @dest: The string to be appended to
- * @src: The string to append to it
- * @count: The maximum numbers of bytes to copy
- *
- * Note that in contrast to strncpy(), strncat() ensures the result is
- * terminated.
- */
 char *strncat(char *dest, const char *src, size_t count)
 {
 	char *tmp = dest;
@@ -314,12 +247,6 @@ EXPORT_SYMBOL(strncat);
 #endif
 
 #ifndef __HAVE_ARCH_STRLCAT
-/**
- * strlcat - Append a length-limited, C-string to another
- * @dest: The string to be appended to
- * @src: The string to append to it
- * @count: The size of the destination buffer.
- */
 size_t strlcat(char *dest, const char *src, size_t count)
 {
 	size_t dsize = strlen(dest);
@@ -484,10 +411,6 @@ EXPORT_SYMBOL(strnchr);
 #endif
 
 #ifndef __HAVE_ARCH_STRLEN
-/**
- * strlen - Find the length of a string
- * @s: The string to be sized
- */
 size_t strlen(const char *s)
 {
 	const char *sc;
@@ -500,11 +423,6 @@ EXPORT_SYMBOL(strlen);
 #endif
 
 #ifndef __HAVE_ARCH_STRNLEN
-/**
- * strnlen - Find the length of a length-limited string
- * @s: The string to be sized
- * @count: The maximum number of bytes to search
- */
 size_t strnlen(const char *s, size_t count)
 {
 	const char *sc;
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index aea04365bc69d..adbc4d3077702 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1448,6 +1448,8 @@ sub create_parameterlist($$$$) {
     foreach my $arg (split($splitter, $args)) {
 	# strip comments
 	$arg =~ s/\/\*.*\*\///;
+	# ignore argument attributes
+	$arg =~ s/\sPOS0?\s/ /;
 	# strip leading/trailing spaces
 	$arg =~ s/^\s*//;
 	$arg =~ s/\s*$//;
@@ -1657,6 +1659,7 @@ sub dump_function($$) {
     $prototype =~ s/^__inline +//;
     $prototype =~ s/^__always_inline +//;
     $prototype =~ s/^noinline +//;
+    $prototype =~ s/^__FORTIFY_INLINE +//;
     $prototype =~ s/__init +//;
     $prototype =~ s/__init_or_module +//;
     $prototype =~ s/__deprecated +//;
@@ -1666,7 +1669,8 @@ sub dump_function($$) {
     $prototype =~ s/__weak +//;
     $prototype =~ s/__sched +//;
     $prototype =~ s/__printf\s*\(\s*\d*\s*,\s*\d*\s*\) +//;
-    $prototype =~ s/__alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
+    $prototype =~ s/__(?:re)?alloc_size\s*\(\s*\d+\s*(?:,\s*\d+\s*)?\) +//;
+    $prototype =~ s/__diagnose_as\s*\(\s*\S+\s*(?:,\s*\d+\s*)*\) +//;
     my $define = $prototype =~ s/^#\s*define\s+//; #ak added
     $prototype =~ s/__attribute_const__ +//;
     $prototype =~ s/__attribute__\s*\(\(
-- 
GitLab


From 96fce387d58fa8eae6e8d9b1ecdfbc18292d7a68 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 28 Sep 2022 14:17:05 -0700
Subject: [PATCH 0645/4122] kunit/memcpy: Add dynamic size and window tests

The "side effects" memmove() test accidentally found[1] a corner case in
the recent refactoring of the i386 assembly memmove(), but missed another
corner case. Instead of hoping to get lucky next time, implement much
more complete tests of memcpy() and memmove() -- especially the moving
window overlap for memmove() -- which catches all the issues encountered
and should catch anything new.

[1] https://lore.kernel.org/lkml/CAKwvOdkaKTa2aiA90VzFrChNQM6O_ro+b7VWs=op70jx-DKaXA@mail.gmail.com

Cc: Nick Desaulniers <ndesaulniers@google.com>
Tested-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 MAINTAINERS        |   1 +
 lib/memcpy_kunit.c | 205 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 206 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..9dd8d74c4df09 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8044,6 +8044,7 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
 F:	include/linux/fortify-string.h
 F:	lib/fortify_kunit.c
+F:	lib/memcpy_kunit.c
 F:	lib/test_fortify/*
 F:	scripts/test_fortify.sh
 K:	\b__NO_FORTIFY\b
diff --git a/lib/memcpy_kunit.c b/lib/memcpy_kunit.c
index 2b5cc70ac53fc..c4a7107edd43a 100644
--- a/lib/memcpy_kunit.c
+++ b/lib/memcpy_kunit.c
@@ -270,6 +270,208 @@ static void memset_test(struct kunit *test)
 #undef TEST_OP
 }
 
+static u8 large_src[1024];
+static u8 large_dst[2048];
+static const u8 large_zero[2048];
+
+static void set_random_nonzero(struct kunit *test, u8 *byte)
+{
+	int failed_rng = 0;
+
+	while (*byte == 0) {
+		get_random_bytes(byte, 1);
+		KUNIT_ASSERT_LT_MSG(test, failed_rng++, 100,
+				    "Is the RNG broken?");
+	}
+}
+
+static void init_large(struct kunit *test)
+{
+
+	/* Get many bit patterns. */
+	get_random_bytes(large_src, ARRAY_SIZE(large_src));
+
+	/* Make sure we have non-zero edges. */
+	set_random_nonzero(test, &large_src[0]);
+	set_random_nonzero(test, &large_src[ARRAY_SIZE(large_src) - 1]);
+
+	/* Explicitly zero the entire destination. */
+	memset(large_dst, 0, ARRAY_SIZE(large_dst));
+}
+
+/*
+ * Instead of an indirect function call for "copy" or a giant macro,
+ * use a bool to pick memcpy or memmove.
+ */
+static void copy_large_test(struct kunit *test, bool use_memmove)
+{
+	init_large(test);
+
+	/* Copy a growing number of non-overlapping bytes ... */
+	for (int bytes = 1; bytes <= ARRAY_SIZE(large_src); bytes++) {
+		/* Over a shifting destination window ... */
+		for (int offset = 0; offset < ARRAY_SIZE(large_src); offset++) {
+			int right_zero_pos = offset + bytes;
+			int right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos;
+
+			/* Copy! */
+			if (use_memmove)
+				memmove(large_dst + offset, large_src, bytes);
+			else
+				memcpy(large_dst + offset, large_src, bytes);
+
+			/* Did we touch anything before the copy area? */
+			KUNIT_ASSERT_EQ_MSG(test,
+				memcmp(large_dst, large_zero, offset), 0,
+				"with size %d at offset %d", bytes, offset);
+			/* Did we touch anything after the copy area? */
+			KUNIT_ASSERT_EQ_MSG(test,
+				memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
+				"with size %d at offset %d", bytes, offset);
+
+			/* Are we byte-for-byte exact across the copy? */
+			KUNIT_ASSERT_EQ_MSG(test,
+				memcmp(large_dst + offset, large_src, bytes), 0,
+				"with size %d at offset %d", bytes, offset);
+
+			/* Zero out what we copied for the next cycle. */
+			memset(large_dst + offset, 0, bytes);
+		}
+		/* Avoid stall warnings if this loop gets slow. */
+		cond_resched();
+	}
+}
+
+static void memcpy_large_test(struct kunit *test)
+{
+	copy_large_test(test, false);
+}
+
+static void memmove_large_test(struct kunit *test)
+{
+	copy_large_test(test, true);
+}
+
+/*
+ * On the assumption that boundary conditions are going to be the most
+ * sensitive, instead of taking a full step (inc) each iteration,
+ * take single index steps for at least the first "inc"-many indexes
+ * from the "start" and at least the last "inc"-many indexes before
+ * the "end". When in the middle, take full "inc"-wide steps. For
+ * example, calling next_step(idx, 1, 15, 3) with idx starting at 0
+ * would see the following pattern: 1 2 3 4 7 10 11 12 13 14 15.
+ */
+static int next_step(int idx, int start, int end, int inc)
+{
+	start += inc;
+	end -= inc;
+
+	if (idx < start || idx + inc > end)
+		inc = 1;
+	return idx + inc;
+}
+
+static void inner_loop(struct kunit *test, int bytes, int d_off, int s_off)
+{
+	int left_zero_pos, left_zero_size;
+	int right_zero_pos, right_zero_size;
+	int src_pos, src_orig_pos, src_size;
+	int pos;
+
+	/* Place the source in the destination buffer. */
+	memcpy(&large_dst[s_off], large_src, bytes);
+
+	/* Copy to destination offset. */
+	memmove(&large_dst[d_off], &large_dst[s_off], bytes);
+
+	/* Make sure destination entirely matches. */
+	KUNIT_ASSERT_EQ_MSG(test, memcmp(&large_dst[d_off], large_src, bytes), 0,
+		"with size %d at src offset %d and dest offset %d",
+		bytes, s_off, d_off);
+
+	/* Calculate the expected zero spans. */
+	if (s_off < d_off) {
+		left_zero_pos = 0;
+		left_zero_size = s_off;
+
+		right_zero_pos = d_off + bytes;
+		right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos;
+
+		src_pos = s_off;
+		src_orig_pos = 0;
+		src_size = d_off - s_off;
+	} else {
+		left_zero_pos = 0;
+		left_zero_size = d_off;
+
+		right_zero_pos = s_off + bytes;
+		right_zero_size = ARRAY_SIZE(large_dst) - right_zero_pos;
+
+		src_pos = d_off + bytes;
+		src_orig_pos = src_pos - s_off;
+		src_size = right_zero_pos - src_pos;
+	}
+
+	/* Check non-overlapping source is unchanged.*/
+	KUNIT_ASSERT_EQ_MSG(test,
+		memcmp(&large_dst[src_pos], &large_src[src_orig_pos], src_size), 0,
+		"with size %d at src offset %d and dest offset %d",
+		bytes, s_off, d_off);
+
+	/* Check leading buffer contents are zero. */
+	KUNIT_ASSERT_EQ_MSG(test,
+		memcmp(&large_dst[left_zero_pos], large_zero, left_zero_size), 0,
+		"with size %d at src offset %d and dest offset %d",
+		bytes, s_off, d_off);
+	/* Check trailing buffer contents are zero. */
+	KUNIT_ASSERT_EQ_MSG(test,
+		memcmp(&large_dst[right_zero_pos], large_zero, right_zero_size), 0,
+		"with size %d at src offset %d and dest offset %d",
+		bytes, s_off, d_off);
+
+	/* Zero out everything not already zeroed.*/
+	pos = left_zero_pos + left_zero_size;
+	memset(&large_dst[pos], 0, right_zero_pos - pos);
+}
+
+static void memmove_overlap_test(struct kunit *test)
+{
+	/*
+	 * Running all possible offset and overlap combinations takes a
+	 * very long time. Instead, only check up to 128 bytes offset
+	 * into the destination buffer (which should result in crossing
+	 * cachelines), with a step size of 1 through 7 to try to skip some
+	 * redundancy.
+	 */
+	static const int offset_max = 128; /* less than ARRAY_SIZE(large_src); */
+	static const int bytes_step = 7;
+	static const int window_step = 7;
+
+	static const int bytes_start = 1;
+	static const int bytes_end = ARRAY_SIZE(large_src) + 1;
+
+	init_large(test);
+
+	/* Copy a growing number of overlapping bytes ... */
+	for (int bytes = bytes_start; bytes < bytes_end;
+	     bytes = next_step(bytes, bytes_start, bytes_end, bytes_step)) {
+
+		/* Over a shifting destination window ... */
+		for (int d_off = 0; d_off < offset_max; d_off++) {
+			int s_start = max(d_off - bytes, 0);
+			int s_end = min_t(int, d_off + bytes, ARRAY_SIZE(large_src));
+
+			/* Over a shifting source window ... */
+			for (int s_off = s_start; s_off < s_end;
+			     s_off = next_step(s_off, s_start, s_end, window_step))
+				inner_loop(test, bytes, d_off, s_off);
+
+			/* Avoid stall warnings. */
+			cond_resched();
+		}
+	}
+}
+
 static void strtomem_test(struct kunit *test)
 {
 	static const char input[sizeof(unsigned long)] = "hi";
@@ -325,7 +527,10 @@ static void strtomem_test(struct kunit *test)
 static struct kunit_case memcpy_test_cases[] = {
 	KUNIT_CASE(memset_test),
 	KUNIT_CASE(memcpy_test),
+	KUNIT_CASE(memcpy_large_test),
 	KUNIT_CASE(memmove_test),
+	KUNIT_CASE(memmove_large_test),
+	KUNIT_CASE(memmove_overlap_test),
 	KUNIT_CASE(strtomem_test),
 	{}
 };
-- 
GitLab


From ef1ca2102e9c546a507ed43994f5dd022f7a80d3 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:21 +0200
Subject: [PATCH 0646/4122] power: supply: bq25890: Document
 POWER_SUPPLY_PROP_CURRENT_NOW

Document that POWER_SUPPLY_PROP_CURRENT_NOW really does refer to ADC-sampled
immediate battery charge current I_BAT , since the meaning is not clear with
all the currents which might be measured by charger chips.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 6020b58c641d2..1298d5720aa4b 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -588,7 +588,14 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 		val->intval = 2304000 + ret * 20000;
 		break;
 
-	case POWER_SUPPLY_PROP_CURRENT_NOW:
+	case POWER_SUPPLY_PROP_CURRENT_NOW:	/* I_BAT now */
+		/*
+		 * This is ADC-sampled immediate charge current supplied
+		 * from charger to battery. The property name is confusing,
+		 * for clarification refer to:
+		 * Documentation/ABI/testing/sysfs-class-power
+		 * /sys/class/power_supply/<supply_name>/current_now
+		 */
 		ret = bq25890_field_read(bq, F_ICHGR); /* read measured value */
 		if (ret < 0)
 			return ret;
-- 
GitLab


From 8327a8abd4d7c842d81108b8361c199d0631d173 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:22 +0200
Subject: [PATCH 0647/4122] power: supply: bq25890: Clean up
 POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT

Clean up misuse of POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT and
POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX and document what
exactly each value means.

The POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT content is newly read
back from hardware, while POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX
is reported as the maximum value set in DT.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 57 ++++++++++++++++++--------
 1 file changed, 41 insertions(+), 16 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 1298d5720aa4b..5924b036b1588 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -529,22 +529,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
 		break;
 
-	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:
-		val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG);
-
-		/* When temperature is too low, charge current is decreased */
-		if (bq->state.ntc_fault == NTC_FAULT_COOL) {
-			ret = bq25890_field_read(bq, F_JEITA_ISET);
-			if (ret < 0)
-				return ret;
-
-			if (ret)
-				val->intval /= 5;
-			else
-				val->intval /= 2;
-		}
-		break;
-
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
 		if (!state.online) {
 			val->intval = 0;
@@ -604,6 +588,46 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 		val->intval = ret * -50000;
 		break;
 
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:	/* I_BAT user limit */
+		/*
+		 * This is user-configured constant charge current supplied
+		 * from charger to battery in first phase of charging, when
+		 * battery voltage is below constant charge voltage.
+		 *
+		 * This value reflects the current hardware setting.
+		 *
+		 * The POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX is the
+		 * maximum value of this property.
+		 */
+		ret = bq25890_field_read(bq, F_ICHG);
+		if (ret < 0)
+			return ret;
+		val->intval = bq25890_find_val(ret, TBL_ICHG);
+
+		/* When temperature is too low, charge current is decreased */
+		if (bq->state.ntc_fault == NTC_FAULT_COOL) {
+			ret = bq25890_field_read(bq, F_JEITA_ISET);
+			if (ret < 0)
+				return ret;
+
+			if (ret)
+				val->intval /= 5;
+			else
+				val->intval /= 2;
+		}
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX:	/* I_BAT max */
+		/*
+		 * This is maximum allowed constant charge current supplied
+		 * from charger to battery in first phase of charging, when
+		 * battery voltage is below constant charge voltage.
+		 *
+		 * This value is constant for each battery and set from DT.
+		 */
+		val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG);
+		break;
+
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = bq25890_field_read(bq, F_TSPCT);
 		if (ret < 0)
@@ -887,6 +911,7 @@ static const enum power_supply_property bq25890_power_supply_props[] = {
 	POWER_SUPPLY_PROP_CHARGE_TYPE,
 	POWER_SUPPLY_PROP_ONLINE,
 	POWER_SUPPLY_PROP_HEALTH,
+	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE,
 	POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX,
-- 
GitLab


From 7c85237519738ca4808b4fa79c2d448770f8d33f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:23 +0200
Subject: [PATCH 0648/4122] power: supply: bq25890: Clean up
 POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE

Clean up misuse of POWER_SUPPLY_PROP_VOLTAGE,
POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX
and POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE
and document what exactly each value means.

The POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE content is newly read
back from hardware, while POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX
is reported as the maximum value set in DT.

The POWER_SUPPLY_PROP_VOLTAGE is newly used to report immediate value
of battery voltage V_BAT, which is what this property was intended to
report and which has been thus far misused to report the charger chip
output voltage V_SYS.

The V_SYS is no longer reported as there is currently no suitable
property to report V_SYS. V_SYS reporting will be reinstated in
subsequent patch.

Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 72 ++++++++++++++++----------
 1 file changed, 45 insertions(+), 27 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 5924b036b1588..050eef2571e8e 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -529,24 +529,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 			val->intval = POWER_SUPPLY_HEALTH_UNSPEC_FAILURE;
 		break;
 
-	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
-		if (!state.online) {
-			val->intval = 0;
-			break;
-		}
-
-		ret = bq25890_field_read(bq, F_BATV); /* read measured value */
-		if (ret < 0)
-			return ret;
-
-		/* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */
-		val->intval = 2304000 + ret * 20000;
-		break;
-
-	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
-		val->intval = bq25890_find_val(bq->init_data.vreg, TBL_VREG);
-		break;
-
 	case POWER_SUPPLY_PROP_PRECHARGE_CURRENT:
 		val->intval = bq25890_find_val(bq->init_data.iprechg, TBL_ITERM);
 		break;
@@ -563,15 +545,6 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 		val->intval = bq25890_find_val(ret, TBL_IINLIM);
 		break;
 
-	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
-		ret = bq25890_field_read(bq, F_SYSV); /* read measured value */
-		if (ret < 0)
-			return ret;
-
-		/* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */
-		val->intval = 2304000 + ret * 20000;
-		break;
-
 	case POWER_SUPPLY_PROP_CURRENT_NOW:	/* I_BAT now */
 		/*
 		 * This is ADC-sampled immediate charge current supplied
@@ -628,6 +601,51 @@ static int bq25890_power_supply_get_property(struct power_supply *psy,
 		val->intval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG);
 		break;
 
+	case POWER_SUPPLY_PROP_VOLTAGE_NOW:	/* V_BAT now */
+		/*
+		 * This is ADC-sampled immediate charge voltage supplied
+		 * from charger to battery. The property name is confusing,
+		 * for clarification refer to:
+		 * Documentation/ABI/testing/sysfs-class-power
+		 * /sys/class/power_supply/<supply_name>/voltage_now
+		 */
+		ret = bq25890_field_read(bq, F_BATV); /* read measured value */
+		if (ret < 0)
+			return ret;
+
+		/* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */
+		val->intval = 2304000 + ret * 20000;
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:	/* V_BAT user limit */
+		/*
+		 * This is user-configured constant charge voltage supplied
+		 * from charger to battery in second phase of charging, when
+		 * battery voltage reached constant charge voltage.
+		 *
+		 * This value reflects the current hardware setting.
+		 *
+		 * The POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX is the
+		 * maximum value of this property.
+		 */
+		ret = bq25890_field_read(bq, F_VREG);
+		if (ret < 0)
+			return ret;
+
+		val->intval = bq25890_find_val(ret, TBL_VREG);
+		break;
+
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:	/* V_BAT max */
+		/*
+		 * This is maximum allowed constant charge voltage supplied
+		 * from charger to battery in second phase of charging, when
+		 * battery voltage reached constant charge voltage.
+		 *
+		 * This value is constant for each battery and set from DT.
+		 */
+		val->intval = bq25890_find_val(bq->init_data.vreg, TBL_VREG);
+		break;
+
 	case POWER_SUPPLY_PROP_TEMP:
 		ret = bq25890_field_read(bq, F_TSPCT);
 		if (ret < 0)
-- 
GitLab


From b63e60ebb3fb071a96667307b1e129ccac76ce6b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:24 +0200
Subject: [PATCH 0649/4122] power: supply: bq25890: Add support for setting
 user charge current and voltage limit

Let user set battery charge current and voltage limit via sysfs. This is
useful in case the user space needs to reduce charge current to keep the
system within thermal limits. The maximum charge current and voltage are
still limited to "ti,charge-current" and "ti,battery-regulation-voltage"
values to avoid damaging the hardware in case too high values are set by
user space.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 050eef2571e8e..95803157ac4af 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -667,9 +667,18 @@ static int bq25890_power_supply_set_property(struct power_supply *psy,
 					     const union power_supply_propval *val)
 {
 	struct bq25890_device *bq = power_supply_get_drvdata(psy);
+	int maxval;
 	u8 lval;
 
 	switch (psp) {
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+		maxval = bq25890_find_val(bq->init_data.ichg, TBL_ICHG);
+		lval = bq25890_find_idx(min(val->intval, maxval), TBL_ICHG);
+		return bq25890_field_write(bq, F_ICHG, lval);
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
+		maxval = bq25890_find_val(bq->init_data.vreg, TBL_VREG);
+		lval = bq25890_find_idx(min(val->intval, maxval), TBL_VREG);
+		return bq25890_field_write(bq, F_VREG, lval);
 	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
 		lval = bq25890_find_idx(val->intval, TBL_IINLIM);
 		return bq25890_field_write(bq, F_IINLIM, lval);
@@ -682,6 +691,8 @@ static int bq25890_power_supply_property_is_writeable(struct power_supply *psy,
 						      enum power_supply_property psp)
 {
 	switch (psp) {
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_CURRENT:
+	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
 	case POWER_SUPPLY_PROP_INPUT_CURRENT_LIMIT:
 		return true;
 	default:
-- 
GitLab


From 5f5c10ecaf3fdeba9b2b0af5301977420c2c4df0 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:25 +0200
Subject: [PATCH 0650/4122] power: supply: bq25890: Factor out regulator
 registration code

Pull the regulator registration code into separate function, so it can
be extended to register more regulators later. Currently this is only
moving ifdeffery into one place and other preparatory changes. The
dev_err_probe() output string is changed to explicitly list vbus
regulator failure, so that once more regulators are registered, it
would be clear which one failed.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 51 ++++++++++++++++++--------
 1 file changed, 35 insertions(+), 16 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 95803157ac4af..dad98b782a2f8 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1110,6 +1110,36 @@ static const struct regulator_desc bq25890_vbus_desc = {
 	.fixed_uV = 5000000,
 	.n_voltages = 1,
 };
+
+static int bq25890_register_regulator(struct bq25890_device *bq)
+{
+	struct bq25890_platform_data *pdata = dev_get_platdata(bq->dev);
+	struct regulator_config cfg = {
+		.dev = bq->dev,
+		.driver_data = bq,
+	};
+	struct regulator_dev *reg;
+
+	if (!IS_ERR_OR_NULL(bq->usb_phy))
+		return 0;
+
+	if (pdata)
+		cfg.init_data = pdata->regulator_init_data;
+
+	reg = devm_regulator_register(bq->dev, &bq25890_vbus_desc, &cfg);
+	if (IS_ERR(reg)) {
+		return dev_err_probe(bq->dev, PTR_ERR(reg),
+				     "registering vbus regulator");
+	}
+
+	return 0;
+}
+#else
+static inline int
+bq25890_register_regulator(struct bq25890_device *bq)
+{
+	return 0;
+}
 #endif
 
 static int bq25890_get_chip_version(struct bq25890_device *bq)
@@ -1305,27 +1335,16 @@ static int bq25890_probe(struct i2c_client *client,
 
 	/* OTG reporting */
 	bq->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
+
+	ret = bq25890_register_regulator(bq);
+	if (ret)
+		return ret;
+
 	if (!IS_ERR_OR_NULL(bq->usb_phy)) {
 		INIT_WORK(&bq->usb_work, bq25890_usb_work);
 		bq->usb_nb.notifier_call = bq25890_usb_notifier;
 		usb_register_notifier(bq->usb_phy, &bq->usb_nb);
 	}
-#ifdef CONFIG_REGULATOR
-	else {
-		struct bq25890_platform_data *pdata = dev_get_platdata(dev);
-		struct regulator_config cfg = { };
-		struct regulator_dev *reg;
-
-		cfg.dev = dev;
-		cfg.driver_data = bq;
-		if (pdata)
-			cfg.init_data = pdata->regulator_init_data;
-
-		reg = devm_regulator_register(dev, &bq25890_vbus_desc, &cfg);
-		if (IS_ERR(reg))
-			return dev_err_probe(dev, PTR_ERR(reg), "registering regulator");
-	}
-#endif
 
 	ret = bq25890_power_supply_init(bq);
 	if (ret < 0) {
-- 
GitLab


From 85052e90007bd9e11123bd691a8131089178a4f8 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:26 +0200
Subject: [PATCH 0651/4122] power: supply: bq25890: Add get_voltage support to
 Vbus regulator

The chip is capable of reporting Vbus voltage, add .get_voltage
implementation to Vbus regulator to report current Vbus voltage.
This requires for the Vbus regulator to be registered always
instead of the current state where the regulator is registered
only in case USB PHY is not found.

Do not provide Vbus regulator enable/disable ops in case USB PHY
is present, as they would race with USB PHY notifier which is also
used to toggle OTG boost mode.

Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index dad98b782a2f8..ad5811304f88a 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1095,10 +1095,18 @@ static int bq25890_vbus_is_enabled(struct regulator_dev *rdev)
 	return bq25890_field_read(bq, F_OTG_CFG);
 }
 
+static int bq25890_vbus_get_voltage(struct regulator_dev *rdev)
+{
+	struct bq25890_device *bq = rdev_get_drvdata(rdev);
+
+	return bq25890_get_vbus_voltage(bq);
+}
+
 static const struct regulator_ops bq25890_vbus_ops = {
 	.enable = bq25890_vbus_enable,
 	.disable = bq25890_vbus_disable,
 	.is_enabled = bq25890_vbus_is_enabled,
+	.get_voltage = bq25890_vbus_get_voltage,
 };
 
 static const struct regulator_desc bq25890_vbus_desc = {
@@ -1107,8 +1115,6 @@ static const struct regulator_desc bq25890_vbus_desc = {
 	.type = REGULATOR_VOLTAGE,
 	.owner = THIS_MODULE,
 	.ops = &bq25890_vbus_ops,
-	.fixed_uV = 5000000,
-	.n_voltages = 1,
 };
 
 static int bq25890_register_regulator(struct bq25890_device *bq)
@@ -1120,9 +1126,6 @@ static int bq25890_register_regulator(struct bq25890_device *bq)
 	};
 	struct regulator_dev *reg;
 
-	if (!IS_ERR_OR_NULL(bq->usb_phy))
-		return 0;
-
 	if (pdata)
 		cfg.init_data = pdata->regulator_init_data;
 
-- 
GitLab


From 14a3d159abf8f6013d40723856283705253e7e9a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 14 Oct 2022 19:24:27 +0200
Subject: [PATCH 0652/4122] power: supply: bq25890: Add Vsys regulator

The chip is capable of reporting Vsys voltage supplied to the system.
Add regulator which represents the Vsys supply. This can be used e.g.
as a supply for system PMIC input.

Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 32 ++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index ad5811304f88a..f0362dcb935e9 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1102,6 +1102,20 @@ static int bq25890_vbus_get_voltage(struct regulator_dev *rdev)
 	return bq25890_get_vbus_voltage(bq);
 }
 
+static int bq25890_vsys_get_voltage(struct regulator_dev *rdev)
+{
+	struct bq25890_device *bq = rdev_get_drvdata(rdev);
+	int ret;
+
+	/* Should be some output voltage ? */
+	ret = bq25890_field_read(bq, F_SYSV); /* read measured value */
+	if (ret < 0)
+		return ret;
+
+	/* converted_val = 2.304V + ADC_val * 20mV (table 10.3.15) */
+	return 2304000 + ret * 20000;
+}
+
 static const struct regulator_ops bq25890_vbus_ops = {
 	.enable = bq25890_vbus_enable,
 	.disable = bq25890_vbus_disable,
@@ -1117,6 +1131,18 @@ static const struct regulator_desc bq25890_vbus_desc = {
 	.ops = &bq25890_vbus_ops,
 };
 
+static const struct regulator_ops bq25890_vsys_ops = {
+	.get_voltage = bq25890_vsys_get_voltage,
+};
+
+static const struct regulator_desc bq25890_vsys_desc = {
+	.name = "vsys",
+	.of_match = "vsys",
+	.type = REGULATOR_VOLTAGE,
+	.owner = THIS_MODULE,
+	.ops = &bq25890_vsys_ops,
+};
+
 static int bq25890_register_regulator(struct bq25890_device *bq)
 {
 	struct bq25890_platform_data *pdata = dev_get_platdata(bq->dev);
@@ -1135,6 +1161,12 @@ static int bq25890_register_regulator(struct bq25890_device *bq)
 				     "registering vbus regulator");
 	}
 
+	reg = devm_regulator_register(bq->dev, &bq25890_vsys_desc, &cfg);
+	if (IS_ERR(reg)) {
+		return dev_err_probe(bq->dev, PTR_ERR(reg),
+				     "registering vsys regulator");
+	}
+
 	return 0;
 }
 #else
-- 
GitLab


From 310f541a027b1d5dc68f44f176cde618e6ee9691 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Wed, 12 Oct 2022 20:00:37 +0800
Subject: [PATCH 0653/4122] riscv: Enable HAVE_ARCH_HUGE_VMAP for 64BIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This sets the HAVE_ARCH_HUGE_VMAP option, and defines the required page
table functions. With this feature, ioremap area will be mapped with
huge page granularity according to its actual size. This feature can be
disabled by kernel parameter "nohugeiomap".

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Björn Töpel <bjorn@kernel.org>
Tested-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/r/20221012120038.1034354-2-liushixin2@huawei.com
[Palmer: minor formatting]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../features/vm/huge-vmap/arch-support.txt    |  2 +-
 arch/riscv/Kconfig                            |  1 +
 arch/riscv/include/asm/vmalloc.h              | 18 ++++
 arch/riscv/mm/Makefile                        |  1 +
 arch/riscv/mm/pgtable.c                       | 83 +++++++++++++++++++
 5 files changed, 104 insertions(+), 1 deletion(-)
 create mode 100644 arch/riscv/mm/pgtable.c

diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt
index 13b4940e0c3a7..7274a4b15bcc8 100644
--- a/Documentation/features/vm/huge-vmap/arch-support.txt
+++ b/Documentation/features/vm/huge-vmap/arch-support.txt
@@ -21,7 +21,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: | TODO |
     |          sh: | TODO |
     |       sparc: | TODO |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6b48a3ae98439..db2082dd456d6 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -72,6 +72,7 @@ config RISCV
 	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
 	select HAVE_ARCH_KASAN if MMU && 64BIT
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index ff9abc00d1394..48da5371f1e9a 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -1,4 +1,22 @@
 #ifndef _ASM_RISCV_VMALLOC_H
 #define _ASM_RISCV_VMALLOC_H
 
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+
+#define IOREMAP_MAX_ORDER (PUD_SHIFT)
+
+#define arch_vmap_pud_supported arch_vmap_pud_supported
+static inline bool arch_vmap_pud_supported(pgprot_t prot)
+{
+	return true;
+}
+
+#define arch_vmap_pmd_supported arch_vmap_pmd_supported
+static inline bool arch_vmap_pmd_supported(pgprot_t prot)
+{
+	return true;
+}
+
+#endif
+
 #endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index d76aabf4b94d6..ce7f121ad2dc5 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -13,6 +13,7 @@ obj-y += extable.o
 obj-$(CONFIG_MMU) += fault.o pageattr.o
 obj-y += cacheflush.o
 obj-y += context.o
+obj-y += pgtable.o
 
 ifeq ($(CONFIG_MMU),y)
 obj-$(CONFIG_SMP) += tlbflush.o
diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c
new file mode 100644
index 0000000000000..6645ead1a7c16
--- /dev/null
+++ b/arch/riscv/mm/pgtable.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <asm/pgalloc.h>
+#include <linux/gfp.h>
+#include <linux/kernel.h>
+#include <linux/pgtable.h>
+
+#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot)
+{
+	return 0;
+}
+
+void p4d_clear_huge(p4d_t *p4d)
+{
+}
+
+int pud_set_huge(pud_t *pud, phys_addr_t phys, pgprot_t prot)
+{
+	pud_t new_pud = pfn_pud(__phys_to_pfn(phys), prot);
+
+	set_pud(pud, new_pud);
+	return 1;
+}
+
+int pud_clear_huge(pud_t *pud)
+{
+	if (!pud_leaf(READ_ONCE(*pud)))
+		return 0;
+	pud_clear(pud);
+	return 1;
+}
+
+int pud_free_pmd_page(pud_t *pud, unsigned long addr)
+{
+	pmd_t *pmd = pud_pgtable(*pud);
+	int i;
+
+	pud_clear(pud);
+
+	flush_tlb_kernel_range(addr, addr + PUD_SIZE);
+
+	for (i = 0; i < PTRS_PER_PMD; i++) {
+		if (!pmd_none(pmd[i])) {
+			pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]);
+
+			pte_free_kernel(NULL, pte);
+		}
+	}
+
+	pmd_free(NULL, pmd);
+
+	return 1;
+}
+
+int pmd_set_huge(pmd_t *pmd, phys_addr_t phys, pgprot_t prot)
+{
+	pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), prot);
+
+	set_pmd(pmd, new_pmd);
+	return 1;
+}
+
+int pmd_clear_huge(pmd_t *pmd)
+{
+	if (!pmd_leaf(READ_ONCE(*pmd)))
+		return 0;
+	pmd_clear(pmd);
+	return 1;
+}
+
+int pmd_free_pte_page(pmd_t *pmd, unsigned long addr)
+{
+	pte_t *pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+	pmd_clear(pmd);
+
+	flush_tlb_kernel_range(addr, addr + PMD_SIZE);
+	pte_free_kernel(NULL, pte);
+	return 1;
+}
+
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-- 
GitLab


From be79afc740b5a1b2048cd67580cdb9d76d7e6cc2 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Wed, 12 Oct 2022 20:00:38 +0800
Subject: [PATCH 0654/4122] riscv: Enable HAVE_ARCH_HUGE_VMALLOC for 64BIT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After we support HAVE_ARCH_HUGE_VMAP, we can now enable
HAVE_ARCH_HUGE_VMALLOC too. This feature has been used in kvmalloc and
alloc_large_system_hash for now. This feature can be disabled by kernel
parameters "nohugevmalloc".

Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Björn Töpel <bjorn@kernel.org>
Tested-by: Björn Töpel <bjorn@kernel.org>
Link: https://lore.kernel.org/r/20221012120038.1034354-3-liushixin2@huawei.com
[Palmer: minor formatting]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index db2082dd456d6..7cd981f96f48b 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -72,6 +72,7 @@ config RISCV
 	select GENERIC_VDSO_TIME_NS if HAVE_GENERIC_VDSO
 	select HARDIRQS_SW_RESEND
 	select HAVE_ARCH_AUDITSYSCALL
+	select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
 	select HAVE_ARCH_HUGE_VMAP if MMU && 64BIT && !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL if !XIP_KERNEL
 	select HAVE_ARCH_JUMP_LABEL_RELATIVE if !XIP_KERNEL
-- 
GitLab


From 1d96c542b29129abbee819f355a1facbee07626e Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Fri, 28 Oct 2022 23:09:32 +0530
Subject: [PATCH 0655/4122] MAINTAINERS: Remove Hemant from MHI bus

Hemant moved out of Qualcomm and expressed his wish to not continue doing
any reviews for MHI patches. So let's remove him from MAINTAINERS file.

Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..ad9279218885d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13395,7 +13395,6 @@ F:	arch/arm64/boot/dts/marvell/armada-3720-uDPU.dts
 
 MHI BUS
 M:	Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
-R:	Hemant Kumar <quic_hemantk@quicinc.com>
 L:	mhi@lists.linux.dev
 L:	linux-arm-msm@vger.kernel.org
 S:	Maintained
-- 
GitLab


From 2fe5dc3dcc2920669217e3696ec15fec6630a92f Mon Sep 17 00:00:00 2001
From: Emily Peri <eperi1024@gmail.com>
Date: Fri, 28 Oct 2022 11:39:19 -0700
Subject: [PATCH 0656/4122] staging: rtl8723bs: replace ternary statement with
 min_t macro

Ternary statements that pick the min of two values can be replaced by
the macro min_t(). This improves readability, since its quicker to
understand min_t(type, x, y) than x < y ? x : y. Issue found by
coccicheck.

Signed-off-by: Emily Peri <eperi1024@gmail.com>
Link: https://lore.kernel.org/r/Y1wh1zYMAbbKSrGB@marshmallow
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index 6aeb169c6ebf0..54004f846cf0f 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -1551,7 +1551,7 @@ static int rtw_cfg80211_set_wpa_ie(struct adapter *padapter, u8 *pie, size_t iel
 
 		wps_ie = rtw_get_wps_ie(buf, ielen, NULL, &wps_ielen);
 		if (wps_ie && wps_ielen > 0) {
-			padapter->securitypriv.wps_ie_len = wps_ielen < MAX_WPS_IE_LEN ? wps_ielen : MAX_WPS_IE_LEN;
+			padapter->securitypriv.wps_ie_len = min_t(uint, wps_ielen, MAX_WPS_IE_LEN);
 			memcpy(padapter->securitypriv.wps_ie, wps_ie, padapter->securitypriv.wps_ie_len);
 			set_fwstate(&padapter->mlmepriv, WIFI_UNDER_WPS);
 		} else {
-- 
GitLab


From 737143dc87dfd3cb600a46092387c9caa814ce88 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 30 Oct 2022 09:10:20 +0100
Subject: [PATCH 0657/4122] staging: r8188eu: use min() instead of ternary
 operator

Replace a ternary operator usage with the min() macro to improve
readability.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221030081020.8533-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/os_dep/ioctl_linux.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 8516e253bb031..22aab3f986b4d 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -3568,7 +3568,7 @@ static int rtw_wx_set_priv(struct net_device *dev,
 
 		if ((_VENDOR_SPECIFIC_IE_ == probereq_wpsie[0]) &&
 		    (!memcmp(&probereq_wpsie[2], wps_oui, 4))) {
-			cp_sz = probereq_wpsie_len > MAX_WPS_IE_LEN ? MAX_WPS_IE_LEN : probereq_wpsie_len;
+			cp_sz = min(probereq_wpsie_len, MAX_WPS_IE_LEN);
 
 			pmlmepriv->wps_probe_req_ie_len = 0;
 			kfree(pmlmepriv->wps_probe_req_ie);
-- 
GitLab


From cbcab58a7cbf51e0e1e8e5dec0bd44be48e966db Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Wed, 26 Oct 2022 02:06:18 +0900
Subject: [PATCH 0658/4122] staging: r8188eu: remove unnecessary variable in
 ioctl_linux

Returning value 0 directly instead of storing it in variable ret.
This commit can prevent cocci warning as follows:

  Unneeded variable: "ret". Return "0" on line 3030

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Link: https://lore.kernel.org/r/20221025170621.271903-2-tegongkang@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/os_dep/ioctl_linux.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index 22aab3f986b4d..f3b3d74685390 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -2979,8 +2979,6 @@ static int rtw_p2p_set(struct net_device *dev,
 			       struct iw_request_info *info,
 			       union iwreq_data *wrqu, char *extra)
 {
-	int ret = 0;
-
 	if (!memcmp(extra, "enable =", 7)) {
 		rtw_wext_p2p_enable(dev, info, wrqu, &extra[7]);
 	} else if (!memcmp(extra, "setDN =", 6)) {
@@ -3027,7 +3025,7 @@ static int rtw_p2p_set(struct net_device *dev,
 		rtw_p2p_set_persistent(dev, info, wrqu, &extra[11]);
 	}
 
-	return ret;
+	return 0;
 }
 
 static int rtw_p2p_get2(struct net_device *dev,
-- 
GitLab


From f67469fe658a484f8c000bfcf66a191417bd6e30 Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Wed, 26 Oct 2022 02:06:19 +0900
Subject: [PATCH 0659/4122] staging: r8188eu: remove unnecessary vaiable in
 rtw_recv

Return _SUCCESS directly instead of storing it in a variable.
This can prevent cocci warning as follows:

  Unneeded variable: "ret". Return "_SUCCESS" on line 1516

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Link: https://lore.kernel.org/r/20221025170621.271903-3-tegongkang@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_recv.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
index 4b68a543f68b7..94f85cd7038dc 100644
--- a/drivers/staging/r8188eu/core/rtw_recv.c
+++ b/drivers/staging/r8188eu/core/rtw_recv.c
@@ -1415,7 +1415,6 @@ static int amsdu_to_msdu(struct adapter *padapter, struct recv_frame *prframe)
 
 	struct recv_priv *precvpriv = &padapter->recvpriv;
 	struct __queue *pfree_recv_queue = &precvpriv->free_recv_queue;
-	int	ret = _SUCCESS;
 
 	nr_subframes = 0;
 
@@ -1513,7 +1512,7 @@ exit:
 	prframe->len = 0;
 	rtw_free_recvframe(prframe, pfree_recv_queue);/* free this recv_frame */
 
-	return ret;
+	return _SUCCESS;
 }
 
 static bool check_indicate_seq(struct recv_reorder_ctrl *preorder_ctrl, u16 seq_num)
-- 
GitLab


From 0a7bf6a948b2a772ebed3915b6c47189d63dc246 Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Wed, 26 Oct 2022 02:06:20 +0900
Subject: [PATCH 0660/4122] staging: r8188eu: remove unnecessary variable in
 rtl8188eu_xmit

Return 0 directly instead of storing it in a variable.
This can prevent cocci warning as follows:

  Unneeded variable: "pull". Return "0" on line 298

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Link: https://lore.kernel.org/r/20221025170621.271903-4-tegongkang@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188eu_xmit.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c b/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c
index 8e4a5acc0b188..6d1f56d1f9d72 100644
--- a/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c
+++ b/drivers/staging/r8188eu/hal/rtl8188eu_xmit.c
@@ -149,7 +149,6 @@ static void fill_txdesc_phy(struct pkt_attrib *pattrib, __le32 *pdw)
 
 static s32 update_txdesc(struct xmit_frame *pxmitframe, u8 *pmem, s32 sz, u8 bagg_pkt)
 {
-	int	pull = 0;
 	uint	qsel;
 	u8 data_rate, pwr_status, offset;
 	struct adapter		*adapt = pxmitframe->padapter;
@@ -295,7 +294,7 @@ static s32 update_txdesc(struct xmit_frame *pxmitframe, u8 *pmem, s32 sz, u8 bag
 	ODM_SetTxAntByTxInfo_88E(&haldata->odmpriv, pmem, pattrib->mac_id);
 
 	rtl8188eu_cal_txdesc_chksum(ptxdesc);
-	return pull;
+	return 0;
 }
 
 /* for non-agg data frame or  management frame */
-- 
GitLab


From 95571b8db71e586c8071670fde0c6db5e4ced353 Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Wed, 26 Oct 2022 02:06:21 +0900
Subject: [PATCH 0661/4122] staging: r8188eu: make rtw_sta_flush to void

Make function rtw_sta_flush to void in order to
prevent cocci warning as follows:

  Unneeded variable: "ret". Return "0" on line 1031

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Link: https://lore.kernel.org/r/20221025170621.271903-5-tegongkang@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_ap.c    | 7 ++-----
 drivers/staging/r8188eu/include/rtw_ap.h | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_ap.c b/drivers/staging/r8188eu/core/rtw_ap.c
index 24eb8dce9bfeb..e0ca4b6e17cca 100644
--- a/drivers/staging/r8188eu/core/rtw_ap.c
+++ b/drivers/staging/r8188eu/core/rtw_ap.c
@@ -1017,10 +1017,9 @@ u8 ap_free_sta(struct adapter *padapter, struct sta_info *psta,
 	return beacon_updated;
 }
 
-int rtw_sta_flush(struct adapter *padapter)
+void rtw_sta_flush(struct adapter *padapter)
 {
 	struct list_head *phead, *plist;
-	int ret = 0;
 	struct sta_info *psta = NULL;
 	struct sta_priv *pstapriv = &padapter->stapriv;
 	struct mlme_ext_priv *pmlmeext = &padapter->mlmeextpriv;
@@ -1028,7 +1027,7 @@ int rtw_sta_flush(struct adapter *padapter)
 	u8 bc_addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
-		return ret;
+		return;
 
 	spin_lock_bh(&pstapriv->asoc_list_lock);
 	phead = &pstapriv->asoc_list;
@@ -1050,8 +1049,6 @@ int rtw_sta_flush(struct adapter *padapter)
 	issue_deauth(padapter, bc_addr, WLAN_REASON_DEAUTH_LEAVING);
 
 	associated_clients_update(padapter, true);
-
-	return ret;
 }
 
 /* called > TSR LEVEL for USB or SDIO Interface*/
diff --git a/drivers/staging/r8188eu/include/rtw_ap.h b/drivers/staging/r8188eu/include/rtw_ap.h
index 8b4134eb30956..89b02c97e041d 100644
--- a/drivers/staging/r8188eu/include/rtw_ap.h
+++ b/drivers/staging/r8188eu/include/rtw_ap.h
@@ -26,7 +26,7 @@ u8 bss_cap_update_on_sta_leave(struct adapter *padapter, struct sta_info *psta);
 void sta_info_update(struct adapter *padapter, struct sta_info *psta);
 u8 ap_free_sta(struct adapter *padapter, struct sta_info *psta,
 	       bool active, u16 reason);
-int rtw_sta_flush(struct adapter *padapter);
+void rtw_sta_flush(struct adapter *padapter);
 void start_ap_mode(struct adapter *padapter);
 void stop_ap_mode(struct adapter *padapter);
 void update_bmc_sta(struct adapter *padapter);
-- 
GitLab


From f369953d9cd99a3977859a4bbad58897bc3da153 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Thu, 27 Oct 2022 20:56:27 +0700
Subject: [PATCH 0662/4122] staging: rtl8192e: rtllib_module: remove
 unnecessary parentheses

This patch is intended to remove unnecessary parentheses in the
rtllib_module.c file following the Linux kernel coding-style
regulations. The modification is recommended by the checkpatch script.

Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/20221027135627.vzc3woeuhrivozqz@plymouth
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_module.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c
index 41697ef55dbd2..ce8b73f437a3d 100644
--- a/drivers/staging/rtl8192e/rtllib_module.c
+++ b/drivers/staging/rtl8192e/rtllib_module.c
@@ -107,7 +107,7 @@ struct net_device *alloc_rtllib(int sizeof_priv)
 	spin_lock_init(&ieee->lock);
 	spin_lock_init(&ieee->wpax_suitlist_lock);
 	spin_lock_init(&ieee->reorder_spinlock);
-	atomic_set(&(ieee->atm_swbw), 0);
+	atomic_set(&ieee->atm_swbw, 0);
 
 	/* SAM FIXME */
 	lib80211_crypt_info_init(&ieee->crypt_info, "RTLLIB", &ieee->lock);
-- 
GitLab


From 607732b42ce6aad76bbca3de8ea853c03e374958 Mon Sep 17 00:00:00 2001
From: Yogesh Hegde <yogi.kernel@gmail.com>
Date: Sun, 30 Oct 2022 19:02:04 +0530
Subject: [PATCH 0663/4122] staging: rtl8192e: Rename variables rateIndex and
 rateBitmap to avoid CamelCase

Rename variables
* rateIndex to rate_index
* rateBitmap to rate_bitmap
to avoid CamelCase which is not accepted by checkpatch.pl .

Signed-off-by: Yogesh Hegde <yogi.kernel@gmail.com>
Link: https://lore.kernel.org/r/20221030133204.GA416592@zephyrus
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index c9e495538e2c7..767c746fc73d9 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -2121,8 +2121,8 @@ static void _rtl92e_dm_end_sw_fsync(struct net_device *dev)
 static void _rtl92e_dm_start_sw_fsync(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	u32			rateIndex;
-	u32			rateBitmap;
+	u32 rate_index;
+	u32 rate_bitmap;
 
 	priv->rate_record = 0;
 	priv->ContinueDiffCount = 0;
@@ -2136,12 +2136,12 @@ static void _rtl92e_dm_start_sw_fsync(struct net_device *dev)
 		priv->rtllib->fsync_firstdiff_ratethreshold = 200;
 		priv->rtllib->fsync_seconddiff_ratethreshold = 200;
 	}
-	for (rateIndex = 0; rateIndex <= 27; rateIndex++) {
-		rateBitmap  = 1 << rateIndex;
-		if (priv->rtllib->fsync_rate_bitmap & rateBitmap)
+	for (rate_index = 0; rate_index <= 27; rate_index++) {
+		rate_bitmap  = 1 << rate_index;
+		if (priv->rtllib->fsync_rate_bitmap & rate_bitmap)
 			priv->rate_record +=
 				 priv->stats.received_rate_histogram[1]
-				[rateIndex];
+				[rate_index];
 	}
 	if (timer_pending(&priv->fsync_timer))
 		del_timer_sync(&priv->fsync_timer);
-- 
GitLab


From 130d7c481008703b4b103c9322e5fdfac130cc55 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:39:28 +0700
Subject: [PATCH 0664/4122] staging: rtl8192e: rtllib_crypt_tkip: multiple
 blank lines removal

Removed multiple unnecessary blank lines in accordance with the
Linux kernel coding-style regulations. The issues were reported by
the checkpatch script.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/f2103758c23f37b61fcbe14f8ed0da8d6b31f5c6.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 24 --------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index 8bc95651e384f..468f28f48ebaa 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -91,7 +91,6 @@ fail:
 	return NULL;
 }
 
-
 static void rtllib_tkip_deinit(void *priv)
 {
 	struct rtllib_tkip_data *_priv = priv;
@@ -103,49 +102,41 @@ static void rtllib_tkip_deinit(void *priv)
 	kfree_sensitive(priv);
 }
 
-
 static inline u16 RotR1(u16 val)
 {
 	return (val >> 1) | (val << 15);
 }
 
-
 static inline u8 Lo8(u16 val)
 {
 	return val & 0xff;
 }
 
-
 static inline u8 Hi8(u16 val)
 {
 	return val >> 8;
 }
 
-
 static inline u16 Lo16(u32 val)
 {
 	return val & 0xffff;
 }
 
-
 static inline u16 Hi16(u32 val)
 {
 	return val >> 16;
 }
 
-
 static inline u16 Mk16(u8 hi, u8 lo)
 {
 	return lo | (hi << 8);
 }
 
-
 static inline u16 Mk16_le(u16 *v)
 {
 	return *v;
 }
 
-
 static const u16 Sbox[256] = {
 	0xC6A5, 0xF884, 0xEE99, 0xF68D, 0xFF0D, 0xD6BD, 0xDEB1, 0x9154,
 	0x6050, 0x0203, 0xCEA9, 0x567D, 0xE719, 0xB562, 0x4DE6, 0xEC9A,
@@ -181,17 +172,14 @@ static const u16 Sbox[256] = {
 	0x82C3, 0x29B0, 0x5A77, 0x1E11, 0x7BCB, 0xA8FC, 0x6DD6, 0x2C3A,
 };
 
-
 static inline u16 _S_(u16 v)
 {
 	u16 t = Sbox[Hi8(v)];
 	return Sbox[Lo8(v)] ^ ((t << 8) | (t >> 8));
 }
 
-
 #define PHASE1_LOOP_COUNT 8
 
-
 static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32)
 {
 	int i, j;
@@ -213,7 +201,6 @@ static void tkip_mixing_phase1(u16 *TTAK, const u8 *TK, const u8 *TA, u32 IV32)
 	}
 }
 
-
 static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK,
 			       u16 IV16)
 {
@@ -263,7 +250,6 @@ static void tkip_mixing_phase2(u8 *WEPSeed, const u8 *TK, const u16 *TTAK,
 #endif
 }
 
-
 static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct rtllib_tkip_data *tkey = priv;
@@ -293,7 +279,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	} else
 		tkey->tx_phase1_done = 1;
 
-
 	len = skb->len - hdr_len;
 	pos = skb_push(skb, 8);
 	memmove(pos, pos + 8, hdr_len);
@@ -337,7 +322,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		return ret;
 	return 0;
 
-
 }
 
 static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
@@ -453,7 +437,6 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	return keyidx;
 }
 
-
 static int michael_mic(struct crypto_shash *tfm_michael, u8 *key, u8 *hdr,
 		       u8 *data, size_t data_len, u8 *mic)
 {
@@ -511,7 +494,6 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr)
 	hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */
 }
 
-
 static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv)
 {
 	struct rtllib_tkip_data *tkey = priv;
@@ -539,7 +521,6 @@ static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv)
 	return 0;
 }
 
-
 static void rtllib_michael_mic_failure(struct net_device *dev,
 				       struct rtllib_hdr_4addr *hdr,
 				       int keyidx)
@@ -609,7 +590,6 @@ static int rtllib_michael_mic_verify(struct sk_buff *skb, int keyidx,
 	return 0;
 }
 
-
 static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv)
 {
 	struct rtllib_tkip_data *tkey = priv;
@@ -640,7 +620,6 @@ static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv)
 	return 0;
 }
 
-
 static int rtllib_tkip_get_key(void *key, int len, u8 *seq, void *priv)
 {
 	struct rtllib_tkip_data *tkey = priv;
@@ -671,7 +650,6 @@ static int rtllib_tkip_get_key(void *key, int len, u8 *seq, void *priv)
 	return TKIP_KEY_LEN;
 }
 
-
 static void rtllib_tkip_print_stats(struct seq_file *m, void *priv)
 {
 	struct rtllib_tkip_data *tkip = priv;
@@ -713,13 +691,11 @@ static struct lib80211_crypto_ops rtllib_crypt_tkip = {
 	.owner			= THIS_MODULE,
 };
 
-
 static int __init rtllib_crypto_tkip_init(void)
 {
 	return lib80211_register_crypto_ops(&rtllib_crypt_tkip);
 }
 
-
 static void __exit rtllib_crypto_tkip_exit(void)
 {
 	lib80211_unregister_crypto_ops(&rtllib_crypt_tkip);
-- 
GitLab


From 6360fc223387179bb943d8865b853d0bb273dcf4 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:40:15 +0700
Subject: [PATCH 0665/4122] staging: rtl8192e: rtllib_crypt_tkip: blank line
 before close brace removal

Removed multiple blank lines that are not necessary before a closing
brace. The issues were found with the checkpatch script and were
dealt with in accordance with the Linux kernel coding-style guidelines.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/1ba11754f1b1e39b1525a837b8493ba8434d5e3b.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index 468f28f48ebaa..40930c9b00179 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -321,7 +321,6 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	if (!tcb_desc->bHwSec)
 		return ret;
 	return 0;
-
 }
 
 static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
@@ -420,7 +419,6 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 			tkey->dot11RSNAStatsTKIPICVErrors++;
 			return -5;
 		}
-
 	}
 
 	/* Update real counters only after Michael MIC verification has
-- 
GitLab


From dcbdcfca138ae185d7013e25ff02d9defd392884 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:41:08 +0700
Subject: [PATCH 0666/4122] staging: rtl8192e: rtllib_crypt_tkip: fixed
 alignment matching open parentheses

Aligned multiple lines to be at the same indentation of open
parentheses before it in accordance with the Linux kernel
coding-style regulations. The issues were found by running the
checkpatch script on the file.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/4253b94d6b7d94713afb02fa63d0a98686e77cc1.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index 40930c9b00179..6c98f43f967e2 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -271,7 +271,7 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 	if (!tcb_desc->bHwSec) {
 		if (!tkey->tx_phase1_done) {
 			tkip_mixing_phase1(tkey->tx_ttak, tkey->key, hdr->addr2,
-					tkey->tx_iv32);
+					   tkey->tx_iv32);
 			tkey->tx_phase1_done = 1;
 		}
 		tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak,
@@ -372,8 +372,8 @@ static int rtllib_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv)
 
 	if (!tcb_desc->bHwSec || (skb->cb[0] == 1)) {
 		if ((iv32 < tkey->rx_iv32 ||
-		    (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) &&
-		    tkey->initialized) {
+		     (iv32 == tkey->rx_iv32 && iv16 <= tkey->rx_iv16)) &&
+		     tkey->initialized) {
 			if (net_ratelimit()) {
 				netdev_dbg(skb->dev,
 					   "Replay detected: STA= %pM previous TSC %08x%04x received TSC %08x%04x\n",
@@ -513,7 +513,7 @@ static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv)
 		tkey->tx_hdr[12] = *(skb->data + hdr_len - 2) & 0x07;
 	pos = skb_put(skb, 8);
 	if (michael_mic(tkey->tx_tfm_michael, &tkey->key[16], tkey->tx_hdr,
-	    skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
+			skb->data + hdr_len, skb->len - 8 - hdr_len, pos))
 		return -1;
 
 	return 0;
-- 
GitLab


From 8d1dcc729ada8e604442190e2887ff85d37ea6ca Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:41:56 +0700
Subject: [PATCH 0667/4122] staging: rtl8192e: rtllib_crypt_tkip: fixes on
 unbalanced braces

Added braces around needed arms of statements which needs them
in accordance with the Linux kernel coding-style regulations.
The issues were found with the help of the checkpatch script.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/641d8360c5f86b54efc96d7f8ef70be1371db480.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index 6c98f43f967e2..d1b86de76eb20 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -276,8 +276,9 @@ static int rtllib_tkip_encrypt(struct sk_buff *skb, int hdr_len, void *priv)
 		}
 		tkip_mixing_phase2(rc4key, tkey->key, tkey->tx_ttak,
 				   tkey->tx_iv16);
-	} else
+	} else {
 		tkey->tx_phase1_done = 1;
+	}
 
 	len = skb->len - hdr_len;
 	pos = skb_push(skb, 8);
@@ -610,10 +611,11 @@ static int rtllib_tkip_set_key(void *key, int len, u8 *seq, void *priv)
 				(seq[3] << 8) | seq[2];
 			tkey->rx_iv16 = (seq[1] << 8) | seq[0];
 		}
-	} else if (len == 0)
+	} else if (len == 0) {
 		tkey->key_set = 0;
-	else
+	} else {
 		return -1;
+	}
 
 	return 0;
 }
-- 
GitLab


From 1f610736f7f40efbcac07a1c100ef703c74d30c1 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:42:55 +0700
Subject: [PATCH 0668/4122] staging: rtl8192e: rtllib_crypt_tkip: split
 multiple assignments

Split a multiple assignments statement to individual assignments
on different lines in accordance with the Linux kernel coding-style
regulations. Also repositioned comments on it and the statement
before for increased legibility. The multiple assignments issue
was found by the checkpatch script, with the comments legibility
issue were through direct observation.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/a9ab257d0042afd3b3231eefe4f58c0c3ac7649f.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index d1b86de76eb20..b8a52b9f9d070 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -488,9 +488,13 @@ static void michael_mic_hdr(struct sk_buff *skb, u8 *hdr)
 		break;
 	}
 
-	hdr[12] = 0; /* priority */
+	/* priority */
+	hdr[12] = 0;
 
-	hdr[13] = hdr[14] = hdr[15] = 0; /* reserved */
+	/* reserved */
+	hdr[13] = 0;
+	hdr[14] = 0;
+	hdr[15] = 0;
 }
 
 static int rtllib_michael_mic_add(struct sk_buff *skb, int hdr_len, void *priv)
-- 
GitLab


From 1838742b1d4e6d89d9448fba2f3340b0aaaede73 Mon Sep 17 00:00:00 2001
From: Aaron Lawrence <t4rmin@zohomail.com>
Date: Wed, 26 Oct 2022 19:43:51 +0700
Subject: [PATCH 0669/4122] staging: rtl8192e: rtllib_crypt_tkip: rewritten
 comparison to NULL

Rewritten a comparison to NULL with a negation operator in
accordance with the Linux kernel coding-style regulations. The fix
was directly recommended by the checkpatch script.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Aaron Lawrence <t4rmin@zohomail.com>
Link: https://lore.kernel.org/r/ca33296630627020694f4b653580f689a8a3d1c7.1666787061.git.t4rmin@zohomail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_crypt_tkip.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
index b8a52b9f9d070..9fdfcc017ee69 100644
--- a/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
+++ b/drivers/staging/rtl8192e/rtllib_crypt_tkip.c
@@ -62,7 +62,7 @@ static void *rtllib_tkip_init(int key_idx)
 		return NULL;
 
 	priv = kzalloc(sizeof(*priv), GFP_ATOMIC);
-	if (priv == NULL)
+	if (!priv)
 		goto fail;
 	priv->key_idx = key_idx;
 
-- 
GitLab


From 1a048cde49046f37f91d340d25d7622f8f87a994 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 29 Oct 2022 19:10:10 +0200
Subject: [PATCH 0670/4122] staging: r8188eu: replace get_da with
 ieee80211_get_DA

Replace a call to the driver-specific get_da function with
ieee80211_get_DA from ieee80211.h.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221029171011.1572091-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index ffb708f242e6c..127dbc4e8b9ac 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -798,11 +798,12 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 	unsigned char	*p;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data;
 	u8 *pframe = precv_frame->rx_data;
 	uint pkt_len = precv_frame->len;
 
 	/* check A1 matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), get_da(pframe), ETH_ALEN))
+	if (memcmp(myid(&padapter->eeprompriv), ieee80211_get_DA(hdr), ETH_ALEN))
 		return;
 
 	if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE))
-- 
GitLab


From dbc97f832aa798c16453eeb4eb5fe74b5c998223 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 29 Oct 2022 19:10:11 +0200
Subject: [PATCH 0671/4122] staging: r8188eu: remove get_da

Replace the last get_da call with ieee80211_get_DA and remove the get_da
function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221029171011.1572091-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c |  3 ++-
 drivers/staging/r8188eu/include/wifi.h        | 22 -------------------
 2 files changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
index dff0cba751df4..f01ae71bcdb11 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
@@ -108,6 +108,7 @@ void update_recvframe_attrib_88e(struct recv_frame *precvframe, struct recv_stat
  */
 void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat *pphy_status)
 {
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precvframe->rx_data;
 	struct adapter *padapter = precvframe->adapter;
 	struct rx_pkt_attrib *pattrib = &precvframe->attrib;
 	struct hal_data_8188e *pHalData = &padapter->haldata;
@@ -125,7 +126,7 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat
 		 get_bssid(&padapter->mlmepriv), ETH_ALEN));
 
 	pkt_info.bPacketToSelf = pkt_info.bPacketMatchBSSID &&
-				 (!memcmp(get_da(wlanhdr),
+				 (!memcmp(ieee80211_get_DA(hdr),
 				  myid(&padapter->eeprompriv), ETH_ALEN));
 
 	pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc);
diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index 6b50089cea294..92a584a8b6c07 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -217,28 +217,6 @@ enum WIFI_REG_DOMAIN {
 
 #define GetAddr4Ptr(pbuf)	((unsigned char *)((size_t)(pbuf) + 24))
 
-static inline unsigned char *get_da(unsigned char *pframe)
-{
-	unsigned char	*da;
-	unsigned int to_fr_ds = (GetToDs(pframe) << 1) | GetFrDs(pframe);
-
-	switch (to_fr_ds) {
-	case 0x00:	/*  ToDs=0, FromDs=0 */
-		da = GetAddr1Ptr(pframe);
-		break;
-	case 0x01:	/*  ToDs=0, FromDs=1 */
-		da = GetAddr1Ptr(pframe);
-		break;
-	case 0x02:	/*  ToDs=1, FromDs=0 */
-		da = GetAddr3Ptr(pframe);
-		break;
-	default:	/*  ToDs=1, FromDs=1 */
-		da = GetAddr3Ptr(pframe);
-		break;
-	}
-	return da;
-}
-
 static inline unsigned char *get_sa(unsigned char *pframe)
 {
 	unsigned char	*sa;
-- 
GitLab


From c5a7eecdcdb6c9d3a907ee66db2b5cffc8bdb4e1 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcaov@gmail.com>
Date: Tue, 25 Oct 2022 11:12:23 +0200
Subject: [PATCH 0672/4122] Revert "staging: r8712u: Tracking kmemleak false
 positives."

This reverts commit 5d3da4a20a271e3cf5496a50cbb8118aa019374f.

This commit annotated false positive for kmemleak. The reasoning is that
the buffers are freed when the driver is unloaded. However, there is
actually potential memory leak when probe fails.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Nam Cao <namcaov@gmail.com>
Link: https://lore.kernel.org/r/26ce206b2c40c7db48c146aa6105789db9dfcc1a.1666688642.git.namcaov@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/rtl871x_recv.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c
index de9a568eaffaa..4db7eed64a034 100644
--- a/drivers/staging/rtl8712/rtl871x_recv.c
+++ b/drivers/staging/rtl8712/rtl871x_recv.c
@@ -17,9 +17,7 @@
 #define _RTL871X_RECV_C_
 
 #include <linux/ip.h>
-#include <linux/slab.h>
 #include <linux/if_ether.h>
-#include <linux/kmemleak.h>
 #include <linux/etherdevice.h>
 #include <linux/ieee80211.h>
 #include <net/cfg80211.h>
@@ -61,7 +59,6 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv,
 				GFP_ATOMIC);
 	if (!precvpriv->pallocated_frame_buf)
 		return;
-	kmemleak_not_leak(precvpriv->pallocated_frame_buf);
 	precvpriv->precv_frame_buf = precvpriv->pallocated_frame_buf +
 				    RXFRAME_ALIGN_SZ -
 				    ((addr_t)(precvpriv->pallocated_frame_buf) &
-- 
GitLab


From 63b5e50571ce4d87cd6c1c4d99de74c9d86fd0d8 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcaov@gmail.com>
Date: Tue, 25 Oct 2022 11:12:24 +0200
Subject: [PATCH 0673/4122] staging: rtl8712: check for alloc fail in
 _r8712_init_recv_priv()

The function _r8712_init_recv_priv() and also r8712_init_recv_priv()
just returns silently if they fail to allocate memory. Change their
return type to int and add necessary checks and handling if they return
-ENOMEM

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Nam Cao <namcaov@gmail.com>
Link: https://lore.kernel.org/r/506ac35a667e511db568b06b86834fd0ceeba453.1666688642.git.namcaov@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/os_intfs.c     |  4 +++-
 drivers/staging/rtl8712/recv_osdep.h   |  8 ++++----
 drivers/staging/rtl8712/rtl8712_recv.c |  7 ++++---
 drivers/staging/rtl8712/rtl871x_recv.c | 13 +++++++++----
 4 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c
index 003e972051240..12adb470d216c 100644
--- a/drivers/staging/rtl8712/os_intfs.c
+++ b/drivers/staging/rtl8712/os_intfs.c
@@ -309,7 +309,9 @@ int r8712_init_drv_sw(struct _adapter *padapter)
 	if (ret)
 		return ret;
 	_r8712_init_xmit_priv(&padapter->xmitpriv, padapter);
-	_r8712_init_recv_priv(&padapter->recvpriv, padapter);
+	ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter);
+	if (ret)
+		return ret;
 	memset((unsigned char *)&padapter->securitypriv, 0,
 	       sizeof(struct security_priv));
 	timer_setup(&padapter->securitypriv.tkip_timer,
diff --git a/drivers/staging/rtl8712/recv_osdep.h b/drivers/staging/rtl8712/recv_osdep.h
index d8c1fa74f5440..fbe3f28685064 100644
--- a/drivers/staging/rtl8712/recv_osdep.h
+++ b/drivers/staging/rtl8712/recv_osdep.h
@@ -18,15 +18,15 @@
 #include "drv_types.h"
 #include <linux/skbuff.h>
 
-void _r8712_init_recv_priv(struct recv_priv *precvpriv,
-			   struct _adapter *padapter);
+int _r8712_init_recv_priv(struct recv_priv *precvpriv,
+			  struct _adapter *padapter);
 void _r8712_free_recv_priv(struct recv_priv *precvpriv);
 void r8712_recv_entry(union recv_frame *precv_frame);
 void r8712_recv_indicatepkt(struct _adapter *adapter,
 			    union recv_frame *precv_frame);
 void r8712_handle_tkip_mic_err(struct _adapter *padapter, u8 bgroup);
-void r8712_init_recv_priv(struct recv_priv *precvpriv,
-			  struct _adapter *padapter);
+int r8712_init_recv_priv(struct recv_priv *precvpriv,
+			 struct _adapter *padapter);
 void r8712_free_recv_priv(struct recv_priv *precvpriv);
 void r8712_os_recv_resource_alloc(struct _adapter *padapter,
 				  union recv_frame *precvframe);
diff --git a/drivers/staging/rtl8712/rtl8712_recv.c b/drivers/staging/rtl8712/rtl8712_recv.c
index 7f1fdd0585511..7da014ab0723e 100644
--- a/drivers/staging/rtl8712/rtl8712_recv.c
+++ b/drivers/staging/rtl8712/rtl8712_recv.c
@@ -30,8 +30,8 @@
 
 static void recv_tasklet(struct tasklet_struct *t);
 
-void r8712_init_recv_priv(struct recv_priv *precvpriv,
-			  struct _adapter *padapter)
+int r8712_init_recv_priv(struct recv_priv *precvpriv,
+			 struct _adapter *padapter)
 {
 	int i;
 	struct recv_buf *precvbuf;
@@ -44,7 +44,7 @@ void r8712_init_recv_priv(struct recv_priv *precvpriv,
 	precvpriv->pallocated_recv_buf =
 		kzalloc(NR_RECVBUFF * sizeof(struct recv_buf) + 4, GFP_ATOMIC);
 	if (!precvpriv->pallocated_recv_buf)
-		return;
+		return -ENOMEM;
 	precvpriv->precv_buf = precvpriv->pallocated_recv_buf + 4 -
 			      ((addr_t)(precvpriv->pallocated_recv_buf) & 3);
 	precvbuf = (struct recv_buf *)precvpriv->precv_buf;
@@ -75,6 +75,7 @@ void r8712_init_recv_priv(struct recv_priv *precvpriv,
 		}
 		pskb = NULL;
 	}
+	return 0;
 }
 
 void r8712_free_recv_priv(struct recv_priv *precvpriv)
diff --git a/drivers/staging/rtl8712/rtl871x_recv.c b/drivers/staging/rtl8712/rtl871x_recv.c
index 4db7eed64a034..8a3566214af72 100644
--- a/drivers/staging/rtl8712/rtl871x_recv.c
+++ b/drivers/staging/rtl8712/rtl871x_recv.c
@@ -42,9 +42,10 @@ void _r8712_init_sta_recv_priv(struct sta_recv_priv *psta_recvpriv)
 	_init_queue(&psta_recvpriv->defrag_q);
 }
 
-void _r8712_init_recv_priv(struct recv_priv *precvpriv,
-			   struct _adapter *padapter)
+int _r8712_init_recv_priv(struct recv_priv *precvpriv,
+			  struct _adapter *padapter)
 {
+	int ret;
 	sint i;
 	union recv_frame *precvframe;
 
@@ -58,7 +59,7 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv,
 				sizeof(union recv_frame) + RXFRAME_ALIGN_SZ,
 				GFP_ATOMIC);
 	if (!precvpriv->pallocated_frame_buf)
-		return;
+		return -ENOMEM;
 	precvpriv->precv_frame_buf = precvpriv->pallocated_frame_buf +
 				    RXFRAME_ALIGN_SZ -
 				    ((addr_t)(precvpriv->pallocated_frame_buf) &
@@ -73,7 +74,11 @@ void _r8712_init_recv_priv(struct recv_priv *precvpriv,
 		precvframe++;
 	}
 	precvpriv->rx_pending_cnt = 1;
-	r8712_init_recv_priv(precvpriv, padapter);
+	ret = r8712_init_recv_priv(precvpriv, padapter);
+	if (ret)
+		kfree(precvpriv->pallocated_frame_buf);
+
+	return ret;
 }
 
 void _r8712_free_recv_priv(struct recv_priv *precvpriv)
-- 
GitLab


From 242443430dd8ef932a56cf483080a46f2948dd65 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcaov@gmail.com>
Date: Tue, 25 Oct 2022 11:12:25 +0200
Subject: [PATCH 0674/4122] staging: rtl8712: check for return value of
 _r8712_init_xmit_priv()

The return value of _r8712_init_xmit_priv() is never checked and the driver
always continue execution as if all is well. This will cause problems
if, for example, buffers cannot be allocated and the driver continue and
use those buffers.

Check for return value of _r8712_init_xmit_priv() and return error (if any)
during probing.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Nam Cao <namcaov@gmail.com>
Link: https://lore.kernel.org/r/b550803561acf26af71f2377215c28b94435a644.1666688642.git.namcaov@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/os_intfs.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c
index 12adb470d216c..205b7d66a40ab 100644
--- a/drivers/staging/rtl8712/os_intfs.c
+++ b/drivers/staging/rtl8712/os_intfs.c
@@ -308,7 +308,9 @@ int r8712_init_drv_sw(struct _adapter *padapter)
 	ret = r8712_init_mlme_priv(padapter);
 	if (ret)
 		return ret;
-	_r8712_init_xmit_priv(&padapter->xmitpriv, padapter);
+	ret = _r8712_init_xmit_priv(&padapter->xmitpriv, padapter);
+	if (ret)
+		return ret;
 	ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter);
 	if (ret)
 		return ret;
-- 
GitLab


From 336ccc31cda111c830332d4b56def49c9c1d48e6 Mon Sep 17 00:00:00 2001
From: Nam Cao <namcaov@gmail.com>
Date: Tue, 25 Oct 2022 11:12:26 +0200
Subject: [PATCH 0675/4122] staging: rtl8712: fix potential memory leak

In r8712_init_drv_sw(), whenever any function call returns error, it is
returned immediately without properly cleaning up the other successfully
executed functions. This can cause memory leak.

Instead of return immediately, free all the allocated buffers first.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Signed-off-by: Nam Cao <namcaov@gmail.com>
Link: https://lore.kernel.org/r/0a3414b12031f6cdcba81a8725e91eb9567ff34f.1666688642.git.namcaov@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8712/os_intfs.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/rtl8712/os_intfs.c b/drivers/staging/rtl8712/os_intfs.c
index 205b7d66a40ab..a2f3645be0cc8 100644
--- a/drivers/staging/rtl8712/os_intfs.c
+++ b/drivers/staging/rtl8712/os_intfs.c
@@ -304,29 +304,42 @@ int r8712_init_drv_sw(struct _adapter *padapter)
 	padapter->cmdpriv.padapter = padapter;
 	ret = r8712_init_evt_priv(&padapter->evtpriv);
 	if (ret)
-		return ret;
+		goto free_cmd;
 	ret = r8712_init_mlme_priv(padapter);
 	if (ret)
-		return ret;
+		goto free_evt;
 	ret = _r8712_init_xmit_priv(&padapter->xmitpriv, padapter);
 	if (ret)
-		return ret;
+		goto free_mlme;
 	ret = _r8712_init_recv_priv(&padapter->recvpriv, padapter);
 	if (ret)
-		return ret;
+		goto free_xmit;
 	memset((unsigned char *)&padapter->securitypriv, 0,
 	       sizeof(struct security_priv));
 	timer_setup(&padapter->securitypriv.tkip_timer,
 		    r8712_use_tkipkey_handler, 0);
 	ret = _r8712_init_sta_priv(&padapter->stapriv);
 	if (ret)
-		return ret;
+		goto free_recv;
 	padapter->stapriv.padapter = padapter;
 	r8712_init_bcmc_stainfo(padapter);
 	r8712_init_pwrctrl_priv(padapter);
 	mp871xinit(padapter);
 	init_default_value(padapter);
 	r8712_InitSwLeds(padapter);
+
+	return 0;
+
+free_recv:
+	_r8712_free_recv_priv(&padapter->recvpriv);
+free_xmit:
+	_free_xmit_priv(&padapter->xmitpriv);
+free_mlme:
+	r8712_free_mlme_priv(&padapter->mlmepriv);
+free_evt:
+	r8712_free_evt_priv(&padapter->evtpriv);
+free_cmd:
+	r8712_free_cmd_priv(&padapter->cmdpriv);
 	return ret;
 }
 
-- 
GitLab


From 5c4fb46e9116bd7cf87bd76b417a865626e13c30 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:14 +0100
Subject: [PATCH 0676/4122] staging: r8188eu: replace a GetAddr1Ptr call

Define a struct ieee80211_mgmt and use it to read the destination address.

This replaces one call to the driver-specific GetAddr1Ptr function, which
should eventually be removed.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 127dbc4e8b9ac..5a366688a3f7d 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3815,13 +3815,14 @@ exit:
 
 unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv_frame)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	unsigned int ret = _FAIL;
 	u8 *pframe = precv_frame->rx_data;
 	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
 	u8 category, action;
 
 	/* check RA matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN))
+	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
 		goto exit;
 
 	category = frame_body[0];
-- 
GitLab


From 5998e3192274da5fd9521b1019cc62b5ea2283b7 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:15 +0100
Subject: [PATCH 0677/4122] staging: r8188eu: remove duplicate category check

The caller of on_action_public has already checked the action category. We
can remove the check in on_action_public.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 5a366688a3f7d..7d4f208d161bb 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3819,16 +3819,12 @@ unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv
 	unsigned int ret = _FAIL;
 	u8 *pframe = precv_frame->rx_data;
 	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
-	u8 category, action;
+	u8 action;
 
 	/* check RA matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
 		goto exit;
 
-	category = frame_body[0];
-	if (category != WLAN_CATEGORY_PUBLIC)
-		goto exit;
-
 	action = frame_body[1];
 	switch (action) {
 	case ACT_PUBLIC_VENDOR:
-- 
GitLab


From 721d7f496a0602d061bd127bd88de7e70181f521 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:16 +0100
Subject: [PATCH 0678/4122] staging: r8188eu: make on_action_public static void

The on_action_public function is called only by OnAction. This function
also lives in rtw_mlme_ext.c and does not check the return value from
on_action_public.

We can make on_action_public a static void function.

The ret variable is no longer needed if we don't return a value. It can
be removed.

Reported-by: kernel test robot <lkp@intel.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 12 ++++--------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  2 --
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 7d4f208d161bb..88600f62ffb4b 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3813,30 +3813,26 @@ exit:
 	return ret;
 }
 
-unsigned int on_action_public(struct adapter *padapter, struct recv_frame *precv_frame)
+static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
-	unsigned int ret = _FAIL;
 	u8 *pframe = precv_frame->rx_data;
 	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
 	u8 action;
 
 	/* check RA matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
-		goto exit;
+		return;
 
 	action = frame_body[1];
 	switch (action) {
 	case ACT_PUBLIC_VENDOR:
-		ret = on_action_public_vendor(precv_frame);
+		on_action_public_vendor(precv_frame);
 		break;
 	default:
-		ret = on_action_public_default(precv_frame);
+		on_action_public_default(precv_frame);
 		break;
 	}
-
-exit:
-	return ret;
 }
 
 unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index c8beaa927cba1..ec2e9352011be 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -538,8 +538,6 @@ void start_create_ibss(struct adapter *padapter);
 
 unsigned int OnAction_back(struct adapter *padapter,
 			   struct recv_frame *precv_frame);
-unsigned int on_action_public(struct adapter *padapter,
-			      struct recv_frame *precv_frame);
 unsigned int OnAction_p2p(struct adapter *padapter,
 			  struct recv_frame *precv_frame);
 
-- 
GitLab


From b22b8618ab3cf011a5536e30e3fb99b32c911ecb Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:17 +0100
Subject: [PATCH 0679/4122] staging: r8188eu: make OnAction_back static void

OnAction_back is called only by OnAction, its return value is not checked.
We can make it a static void function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 13 +++++--------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  2 --
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 88600f62ffb4b..779c022b1c506 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1481,7 +1481,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 }
 
-unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	struct sta_info *psta = NULL;
@@ -1494,21 +1494,20 @@ unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_fr
 	struct sta_priv *pstapriv = &padapter->stapriv;
 	/* check RA matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */
-		return _SUCCESS;
+		return;
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
 		if (!(pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS))
-			return _SUCCESS;
+			return;
 
 	psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-
 	if (!psta)
-		return _SUCCESS;
+		return;
 
 	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
 
 	if (!pmlmeinfo->HT_enable)
-		return _SUCCESS;
+		return;
 	/* All union members start with an action code, it's ok to use addba_req. */
 	switch (mgmt->u.action.u.addba_req.action_code) {
 	case WLAN_ACTION_ADDBA_REQ:
@@ -1550,8 +1549,6 @@ unsigned int OnAction_back(struct adapter *padapter, struct recv_frame *precv_fr
 	default:
 		break;
 	}
-
-	return _SUCCESS;
 }
 
 static int get_reg_classes_full_count(struct p2p_channels *channel_list)
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index ec2e9352011be..4ccdce1ad9be3 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,8 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnAction_back(struct adapter *padapter,
-			   struct recv_frame *precv_frame);
 unsigned int OnAction_p2p(struct adapter *padapter,
 			  struct recv_frame *precv_frame);
 
-- 
GitLab


From aa415931f9682fd4ac71ac34ffa5c157cef2b680 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:18 +0100
Subject: [PATCH 0680/4122] staging: r8188eu: make OnAction_p2p static void

OnAction_p2p is called only by OnAction, its return value is not checked.
We can make it a static void function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 9 ++++-----
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 ---
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 779c022b1c506..f1054192bfb19 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3832,7 +3832,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_
 	}
 }
 
-unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
+static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u8 *frame_body;
 	u8 category, OUI_Subtype;
@@ -3842,16 +3842,16 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra
 
 	/* check RA matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN))/* for if1, sta/ap mode */
-		return _SUCCESS;
+		return;
 
 	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
 
 	category = frame_body[0];
 	if (category != RTW_WLAN_CATEGORY_P2P)
-		return _SUCCESS;
+		return;
 
 	if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI)
-		return _SUCCESS;
+		return;
 
 	len -= sizeof(struct ieee80211_hdr_3addr);
 	OUI_Subtype = frame_body[5];
@@ -3869,7 +3869,6 @@ unsigned int OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fra
 	default:
 		break;
 	}
-	return _SUCCESS;
 }
 
 static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 4ccdce1ad9be3..ce5b57e23e53b 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -536,9 +536,6 @@ void start_clnt_auth(struct adapter *padapter);
 void start_clnt_join(struct adapter *padapter);
 void start_create_ibss(struct adapter *padapter);
 
-unsigned int OnAction_p2p(struct adapter *padapter,
-			  struct recv_frame *precv_frame);
-
 void mlmeext_joinbss_event_callback(struct adapter *padapter, int join_res);
 void mlmeext_sta_del_event_callback(struct adapter *padapter);
 void mlmeext_sta_add_event_callback(struct adapter *padapter,
-- 
GitLab


From b31b29788f103c2720898af2d499d8b53c1eb980 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:19 +0100
Subject: [PATCH 0681/4122] staging: r8188eu: remove category check in
 OnAction_p2p

The caller of OnAction_p2p has already checked the action category. We can
remove the check in OnAction_p2p.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index f1054192bfb19..efcb2f3b6d3f7 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3835,7 +3835,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_
 static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	u8 *frame_body;
-	u8 category, OUI_Subtype;
+	u8 OUI_Subtype;
 	u8 *pframe = precv_frame->rx_data;
 	uint len = precv_frame->len;
 	struct	wifidirect_info	*pwdinfo = &padapter->wdinfo;
@@ -3846,10 +3846,6 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram
 
 	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
 
-	category = frame_body[0];
-	if (category != RTW_WLAN_CATEGORY_P2P)
-		return;
-
 	if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI)
 		return;
 
-- 
GitLab


From e246bf42ef82ac25d8a7ad45ddd07fbf161f7e7f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:20 +0100
Subject: [PATCH 0682/4122] staging: r8188eu: replace switch-case with if

OnAction_p2p has a switch-case statement where only a single case is
handled. Use if instead, this makes the code shorter and easier to read.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index efcb2f3b6d3f7..64d01da9c8140 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3852,19 +3852,8 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram
 	len -= sizeof(struct ieee80211_hdr_3addr);
 	OUI_Subtype = frame_body[5];
 
-	switch (OUI_Subtype) {
-	case P2P_NOTICE_OF_ABSENCE:
-		break;
-	case P2P_PRESENCE_REQUEST:
+	if (OUI_Subtype == P2P_PRESENCE_REQUEST)
 		process_p2p_presence_req(pwdinfo, pframe, len);
-		break;
-	case P2P_PRESENCE_RESPONSE:
-		break;
-	case P2P_GO_DISC_REQUEST:
-		break;
-	default:
-		break;
-	}
 }
 
 static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
-- 
GitLab


From fc47cb05cf19017c3eb87da9b82e20f57b595ad3 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:21 +0100
Subject: [PATCH 0683/4122] staging: r8188eu: replace GetAddr1Ptr call in
 OnAction_p2p

Define a struct ieee80211_mgmt in OnAction_p2p and use it to check the
destination address. This replaces a call to the driver-specific
GetAddr1Ptr function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 64d01da9c8140..6d95d3bc23e6c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3834,6 +3834,7 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_
 
 static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	u8 *frame_body;
 	u8 OUI_Subtype;
 	u8 *pframe = precv_frame->rx_data;
@@ -3841,7 +3842,7 @@ static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_fram
 	struct	wifidirect_info	*pwdinfo = &padapter->wdinfo;
 
 	/* check RA matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), GetAddr1Ptr(pframe), ETH_ALEN))/* for if1, sta/ap mode */
+	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */
 		return;
 
 	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
-- 
GitLab


From a05159da5a4e49fd4b34cb00fb33badeb4dc9ec3 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:22 +0100
Subject: [PATCH 0684/4122] staging: r8188eu: clean up on_action_public

Use the struct mgmt to read the action_code. This is much simpler than
parsing the message ourselves.

Add a comment about reading the action code. All members of the action
enum start with an action_code byte. It does not matter which member we
use.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 6d95d3bc23e6c..b3cef3504ad30 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3813,23 +3813,16 @@ exit:
 static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
-	u8 *pframe = precv_frame->rx_data;
-	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
-	u8 action;
 
 	/* check RA matches or not */
 	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
 		return;
 
-	action = frame_body[1];
-	switch (action) {
-	case ACT_PUBLIC_VENDOR:
+	/* All members of the action enum start with action_code. */
+	if (mgmt->u.action.u.s1g.action_code == WLAN_PUB_ACTION_VENDOR_SPECIFIC)
 		on_action_public_vendor(precv_frame);
-		break;
-	default:
+	else
 		on_action_public_default(precv_frame);
-		break;
-	}
 }
 
 static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
-- 
GitLab


From a399a3b05209dc759b20c6d6e4aa331f2e60bc8a Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:23 +0100
Subject: [PATCH 0685/4122] staging: r8188eu: remove return value from
 on_action_public_vendor

The only caller of on_action_public_vendor does not check the return
value. We can make it a void function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-11-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index b3cef3504ad30..b395457a6a60a 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3779,17 +3779,13 @@ static unsigned int on_action_public_p2p(struct recv_frame *precv_frame)
 	return _SUCCESS;
 }
 
-static unsigned int on_action_public_vendor(struct recv_frame *precv_frame)
+static void on_action_public_vendor(struct recv_frame *precv_frame)
 {
-	unsigned int ret = _FAIL;
 	u8 *pframe = precv_frame->rx_data;
 	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
 
-	if (!memcmp(frame_body + 2, P2P_OUI, 4)) {
-		ret = on_action_public_p2p(precv_frame);
-	}
-
-	return ret;
+	if (!memcmp(frame_body + 2, P2P_OUI, 4))
+		on_action_public_p2p(precv_frame);
 }
 
 static unsigned int on_action_public_default(struct recv_frame *precv_frame)
-- 
GitLab


From 8161a8335821bb6944eef2de8b750544d59c49c2 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:24 +0100
Subject: [PATCH 0686/4122] staging: r8188eu: remove return value from
 on_action_public_default

The only caller of on_action_public_default does not check the return
value. We can make it a void function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-12-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index b395457a6a60a..f5923792f0675 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3788,22 +3788,15 @@ static void on_action_public_vendor(struct recv_frame *precv_frame)
 		on_action_public_p2p(precv_frame);
 }
 
-static unsigned int on_action_public_default(struct recv_frame *precv_frame)
+static void on_action_public_default(struct recv_frame *precv_frame)
 {
-	unsigned int ret = _FAIL;
 	u8 *pframe = precv_frame->rx_data;
 	u8 *frame_body = pframe + sizeof(struct ieee80211_hdr_3addr);
 	u8 token;
 
 	token = frame_body[2];
 
-	if (rtw_action_public_decache(precv_frame, token) == _FAIL)
-		goto exit;
-
-	ret = _SUCCESS;
-
-exit:
-	return ret;
+	rtw_action_public_decache(precv_frame, token);
 }
 
 static void on_action_public(struct adapter *padapter, struct recv_frame *precv_frame)
-- 
GitLab


From 9001c5029dded946e7862e5785278f7c1d9dfe55 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:25 +0100
Subject: [PATCH 0687/4122] staging: r8188eu: rtw_action_public_decache's token
 is a u8

Both callers of rtw_action_public_decache pass a u8 value for the token
parameter. We can change token from s32 to u8 and remove the code for
token < 0.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-13-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index f5923792f0675..93f3d387e92dd 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3490,7 +3490,7 @@ inline void issue_probereq_p2p(struct adapter *adapter, u8 *da)
 	_issue_probereq_p2p(adapter, da);
 }
 
-static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token)
+static s32 rtw_action_public_decache(struct recv_frame *recv_frame, u8 token)
 {
 	struct adapter *adapter = recv_frame->adapter;
 	struct mlme_ext_priv *mlmeext = &adapter->mlmeextpriv;
@@ -3499,21 +3499,13 @@ static s32 rtw_action_public_decache(struct recv_frame *recv_frame, s32 token)
 		(recv_frame->attrib.frag_num & 0xf);
 
 	if (GetRetry(frame)) {
-		if (token >= 0) {
-			if ((seq_ctrl == mlmeext->action_public_rxseq) &&
-			    (token == mlmeext->action_public_dialog_token))
-				return _FAIL;
-		} else {
-			if (seq_ctrl == mlmeext->action_public_rxseq)
-				return _FAIL;
-		}
+		if ((seq_ctrl == mlmeext->action_public_rxseq) &&
+		    (token == mlmeext->action_public_dialog_token))
+			return _FAIL;
 	}
 
 	mlmeext->action_public_rxseq = seq_ctrl;
-
-	if (token >= 0)
-		mlmeext->action_public_dialog_token = token;
-
+	mlmeext->action_public_dialog_token = token;
 	return _SUCCESS;
 }
 
-- 
GitLab


From ae85931f8f483ed58f7c90c99c75afd8482e0399 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 30 Oct 2022 18:33:26 +0100
Subject: [PATCH 0688/4122] staging: r8188eu: check destination address in
 OnAction

All subfunctions of OnAction check if the destination address matches the
local interface's address. It's simpler to move this check to OnAction.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221030173326.1588647-14-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 93f3d387e92dd..e985fc5fc575e 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1492,9 +1492,6 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 	u8 *pframe = precv_frame->rx_data;
 	struct sta_priv *pstapriv = &padapter->stapriv;
-	/* check RA matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */
-		return;
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
 		if (!(pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS))
@@ -3795,10 +3792,6 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 
-	/* check RA matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
-		return;
-
 	/* All members of the action enum start with action_code. */
 	if (mgmt->u.action.u.s1g.action_code == WLAN_PUB_ACTION_VENDOR_SPECIFIC)
 		on_action_public_vendor(precv_frame);
@@ -3808,17 +3801,12 @@ static void on_action_public(struct adapter *padapter, struct recv_frame *precv_
 
 static void OnAction_p2p(struct adapter *padapter, struct recv_frame *precv_frame)
 {
-	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	u8 *frame_body;
 	u8 OUI_Subtype;
 	u8 *pframe = precv_frame->rx_data;
 	uint len = precv_frame->len;
 	struct	wifidirect_info	*pwdinfo = &padapter->wdinfo;
 
-	/* check RA matches or not */
-	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))/* for if1, sta/ap mode */
-		return;
-
 	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
 
 	if (be32_to_cpu(*((__be32 *)(frame_body + 1))) != P2POUI)
@@ -3835,6 +3823,9 @@ static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 
+	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
+		return;
+
 	switch (mgmt->u.action.category) {
 	case WLAN_CATEGORY_BACK:
 		OnAction_back(padapter, precv_frame);
-- 
GitLab


From 085bdaa6eb1476ec054164bdc4001bc3916ff5cb Mon Sep 17 00:00:00 2001
From: Shaoqin Huang <shaoqin.huang@intel.com>
Date: Tue, 11 Oct 2022 14:21:20 +0800
Subject: [PATCH 0689/4122] memblock test: Add test to memblock_add() 129th
 region

Add 129th region into the memblock, and this will trigger the
memblock_double_array() function, this needs valid memory regions. So
using dummy_physical_memory_init() to allocate a large enough memory
region, and split it into a large enough memory which can be choosed by
memblock_double_array(), and the left memory will be split into small
memory region, and add them into the memblock. It make sure the
memblock_double_array() will always choose the valid memory region that
is allocated by the dummy_physical_memory_init().
So memblock_double_array() must success.

Another thing should be done is to restore the memory.regions after
memblock_double_array(), due to now the memory.regions is pointing to a
memory region allocated by dummy_physical_memory_init(). And it will
affect the subsequent tests if we don't restore the memory region. So
simply record the origin region, and restore it after the test.

Signed-off-by: Shaoqin Huang <shaoqin.huang@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011062128.49359-2-shaoqin.huang@intel.com
---
 tools/testing/memblock/tests/basic_api.c | 93 ++++++++++++++++++++++++
 tools/testing/memblock/tests/common.c    |  7 +-
 tools/testing/memblock/tests/common.h    |  6 +-
 3 files changed, 103 insertions(+), 3 deletions(-)

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index a13a57ba0815f..4d61a4b474be3 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -423,6 +423,98 @@ static int memblock_add_near_max_check(void)
 	return 0;
 }
 
+/*
+ * A test that trying to add the 129th memory block.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as
+ * memory.regions.
+ */
+static int memblock_add_many_check(void)
+{
+	int i;
+	void *orig_region;
+	struct region r = {
+		.base = SZ_16K,
+		.size = SZ_16K,
+	};
+	phys_addr_t new_memory_regions_size;
+	phys_addr_t base, size = SZ_64;
+	phys_addr_t gap_size = SZ_64;
+
+	PREFIX_PUSH();
+
+	reset_memblock_regions();
+	memblock_allow_resize();
+
+	dummy_physical_memory_init();
+	/*
+	 * We allocated enough memory by using dummy_physical_memory_init(), and
+	 * split it into small block. First we split a large enough memory block
+	 * as the memory region which will be choosed by memblock_double_array().
+	 */
+	base = PAGE_ALIGN(dummy_physical_memory_base());
+	new_memory_regions_size = PAGE_ALIGN(INIT_MEMBLOCK_REGIONS * 2 *
+					     sizeof(struct memblock_region));
+	memblock_add(base, new_memory_regions_size);
+
+	/* This is the base of small memory block. */
+	base += new_memory_regions_size + gap_size;
+
+	orig_region = memblock.memory.regions;
+
+	for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) {
+		/*
+		 * Add these small block to fulfill the memblock. We keep a
+		 * gap between the nearby memory to avoid being merged.
+		 */
+		memblock_add(base, size);
+		base += size + gap_size;
+
+		ASSERT_EQ(memblock.memory.cnt, i + 2);
+		ASSERT_EQ(memblock.memory.total_size, new_memory_regions_size +
+						      (i + 1) * size);
+	}
+
+	/*
+	 * At there, memblock_double_array() has been succeed, check if it
+	 * update the memory.max.
+	 */
+	ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2);
+
+	/* memblock_double_array() will reserve the memory it used. Check it. */
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, new_memory_regions_size);
+
+	/*
+	 * Now memblock_double_array() works fine. Let's check after the
+	 * double_array(), the memblock_add() still works as normal.
+	 */
+	memblock_add(r.base, r.size);
+	ASSERT_EQ(memblock.memory.regions[0].base, r.base);
+	ASSERT_EQ(memblock.memory.regions[0].size, r.size);
+
+	ASSERT_EQ(memblock.memory.cnt, INIT_MEMBLOCK_REGIONS + 2);
+	ASSERT_EQ(memblock.memory.total_size, INIT_MEMBLOCK_REGIONS * size +
+					      new_memory_regions_size +
+					      r.size);
+	ASSERT_EQ(memblock.memory.max, INIT_MEMBLOCK_REGIONS * 2);
+
+	dummy_physical_memory_cleanup();
+
+	/*
+	 * The current memory.regions is occupying a range of memory that
+	 * allocated from dummy_physical_memory_init(). After free the memory,
+	 * we must not use it. So restore the origin memory region to make sure
+	 * the tests can run as normal and not affected by the double array.
+	 */
+	memblock.memory.regions = orig_region;
+	memblock.memory.cnt = INIT_MEMBLOCK_REGIONS;
+
+	test_pass_pop();
+
+	return 0;
+}
+
 static int memblock_add_checks(void)
 {
 	prefix_reset();
@@ -438,6 +530,7 @@ static int memblock_add_checks(void)
 	memblock_add_twice_check();
 	memblock_add_between_check();
 	memblock_add_near_max_check();
+	memblock_add_many_check();
 
 	prefix_pop();
 
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index 3f795047bbe18..f43b6f414983d 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -5,8 +5,6 @@
 #include <linux/memory_hotplug.h>
 #include <linux/build_bug.h>
 
-#define INIT_MEMBLOCK_REGIONS			128
-#define INIT_MEMBLOCK_RESERVED_REGIONS		INIT_MEMBLOCK_REGIONS
 #define PREFIXES_MAX				15
 #define DELIM					": "
 #define BASIS					10000
@@ -115,6 +113,11 @@ void dummy_physical_memory_cleanup(void)
 	free(memory_block.base);
 }
 
+phys_addr_t dummy_physical_memory_base(void)
+{
+	return (phys_addr_t)memory_block.base;
+}
+
 static void usage(const char *prog)
 {
 	BUILD_BUG_ON(ARRAY_SIZE(help_opts) != ARRAY_SIZE(long_opts) - 1);
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index d6bbbe63bfc36..cc82b85151b61 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -10,9 +10,12 @@
 #include <linux/printk.h>
 #include <../selftests/kselftest.h>
 
-#define MEM_SIZE		SZ_16K
+#define MEM_SIZE		SZ_32K
 #define NUMA_NODES		8
 
+#define INIT_MEMBLOCK_REGIONS			128
+#define INIT_MEMBLOCK_RESERVED_REGIONS		INIT_MEMBLOCK_REGIONS
+
 enum test_flags {
 	/* No special request. */
 	TEST_F_NONE = 0x0,
@@ -124,6 +127,7 @@ void setup_memblock(void);
 void setup_numa_memblock(const unsigned int node_fracs[]);
 void dummy_physical_memory_init(void);
 void dummy_physical_memory_cleanup(void);
+phys_addr_t dummy_physical_memory_base(void);
 void parse_args(int argc, char **argv);
 
 void test_fail(void);
-- 
GitLab


From 5b27dd7968b9c916da4af48d0310f94152744f8e Mon Sep 17 00:00:00 2001
From: Shaoqin Huang <shaoqin.huang@intel.com>
Date: Tue, 11 Oct 2022 14:21:21 +0800
Subject: [PATCH 0690/4122] memblock test: Add test to memblock_reserve() 129th
 region

Reserve 129th region in the memblock, and this will trigger the
memblock_double_array() function, this needs valid memory regions. So
using dummy_physical_memory_init() to allocate a valid memory region.
At the same time, reserve 128 faked memory region, and make sure these
reserved region not intersect with the valid memory region. So
memblock_double_array() will choose the valid memory region, and it will
success.

Also need to restore the reserved.regions after memblock_double_array(),
to make sure the subsequent tests can run as normal.

Signed-off-by: Shaoqin Huang <shaoqin.huang@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011062128.49359-3-shaoqin.huang@intel.com
---
 tools/testing/memblock/tests/basic_api.c | 91 ++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 4d61a4b474be3..411647094cc37 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -892,6 +892,96 @@ static int memblock_reserve_near_max_check(void)
 	return 0;
 }
 
+/*
+ * A test that trying to reserve the 129th memory block.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as
+ * reserved.regions.
+ */
+static int memblock_reserve_many_check(void)
+{
+	int i;
+	void *orig_region;
+	struct region r = {
+		.base = SZ_16K,
+		.size = SZ_16K,
+	};
+	phys_addr_t memory_base = SZ_128K;
+	phys_addr_t new_reserved_regions_size;
+
+	PREFIX_PUSH();
+
+	reset_memblock_regions();
+	memblock_allow_resize();
+
+	/* Add a valid memory region used by double_array(). */
+	dummy_physical_memory_init();
+	memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+
+	for (i = 0; i < INIT_MEMBLOCK_REGIONS; i++) {
+		/* Reserve some fakes memory region to fulfill the memblock. */
+		memblock_reserve(memory_base, MEM_SIZE);
+
+		ASSERT_EQ(memblock.reserved.cnt, i + 1);
+		ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
+
+		/* Keep the gap so these memory region will not be merged. */
+		memory_base += MEM_SIZE * 2;
+	}
+
+	orig_region = memblock.reserved.regions;
+
+	/* This reserve the 129 memory_region, and makes it double array. */
+	memblock_reserve(memory_base, MEM_SIZE);
+
+	/*
+	 * This is the memory region size used by the doubled reserved.regions,
+	 * and it has been reserved due to it has been used. The size is used to
+	 * calculate the total_size that the memblock.reserved have now.
+	 */
+	new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+					sizeof(struct memblock_region));
+	/*
+	 * The double_array() will find a free memory region as the new
+	 * reserved.regions, and the used memory region will be reserved, so
+	 * there will be one more region exist in the reserved memblock. And the
+	 * one more reserved region's size is new_reserved_regions_size.
+	 */
+	ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+	ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+						new_reserved_regions_size);
+	ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+	/*
+	 * Now memblock_double_array() works fine. Let's check after the
+	 * double_array(), the memblock_reserve() still works as normal.
+	 */
+	memblock_reserve(r.base, r.size);
+	ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+	ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+	ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+	ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+						new_reserved_regions_size +
+						r.size);
+	ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+	dummy_physical_memory_cleanup();
+
+	/*
+	 * The current reserved.regions is occupying a range of memory that
+	 * allocated from dummy_physical_memory_init(). After free the memory,
+	 * we must not use it. So restore the origin memory region to make sure
+	 * the tests can run as normal and not affected by the double array.
+	 */
+	memblock.reserved.regions = orig_region;
+	memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+	test_pass_pop();
+
+	return 0;
+}
+
 static int memblock_reserve_checks(void)
 {
 	prefix_reset();
@@ -906,6 +996,7 @@ static int memblock_reserve_checks(void)
 	memblock_reserve_twice_check();
 	memblock_reserve_between_check();
 	memblock_reserve_near_max_check();
+	memblock_reserve_many_check();
 
 	prefix_pop();
 
-- 
GitLab


From 62a56c540797681a5b50a4c06bf638f79b6013bc Mon Sep 17 00:00:00 2001
From: Shaoqin Huang <shaoqin.huang@intel.com>
Date: Tue, 11 Oct 2022 14:21:22 +0800
Subject: [PATCH 0691/4122] memblock test: Update TODO list

Remove the completed items from TODO list.

Signed-off-by: Shaoqin Huang <shaoqin.huang@intel.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20221011062128.49359-4-shaoqin.huang@intel.com
---
 tools/testing/memblock/TODO | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
index 33044c634ea7f..503cc96fcdc3a 100644
--- a/tools/testing/memblock/TODO
+++ b/tools/testing/memblock/TODO
@@ -1,17 +1,10 @@
 TODO
 =====
 
-1. Add tests trying to memblock_add() or memblock_reserve() 129th region.
-   This will trigger memblock_double_array(), make sure it succeeds.
-   *Important:* These tests require valid memory ranges, use dummy physical
-                memory block from common.c to implement them. It is also very
-                likely that the current MEM_SIZE won't be enough for these
-                test cases. Use realloc to adjust the size accordingly.
-
-2. Add test cases using this functions (implement them for both directions):
+1. Add test cases using this functions (implement them for both directions):
    + memblock_alloc_raw()
    + memblock_alloc_exact_nid_raw()
    + memblock_alloc_try_nid_raw()
 
-3. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
+2. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
    for the new region
-- 
GitLab


From 3c728e079d83f581a1f8b7755f6e26087b15c4fb Mon Sep 17 00:00:00 2001
From: Tamas Zsoldos <tamas.zsoldos@arm.com>
Date: Tue, 5 Jul 2022 16:59:35 +0200
Subject: [PATCH 0692/4122] coresight: etm4x: add CPU hotplug support for
 probing

etm4x devices cannot be successfully probed when their CPU is offline.
For example, when booting with maxcpus=n, ETM probing will fail on
CPUs >n, and the probing won't be reattempted once the CPUs come
online. This will leave those CPUs unable to make use of ETM.

This change adds a mechanism to delay the probing if the corresponding
CPU is offline, and to try it again when the CPU comes online.

Signed-off-by: Tamas Zsoldos <tamas.zsoldos@arm.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20220705145935.24679-1-tamas.zsoldos@arm.com
---
 .../coresight/coresight-etm4x-core.c          | 153 +++++++++++++-----
 1 file changed, 113 insertions(+), 40 deletions(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 80fefaba58eeb..9dc0d7f57565a 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -66,10 +66,13 @@ static enum cpuhp_state hp_online;
 
 struct etm4_init_arg {
 	unsigned int		pid;
-	struct etmv4_drvdata	*drvdata;
+	struct device		*dev;
 	struct csdev_access	*csa;
 };
 
+static DEFINE_PER_CPU(struct etm4_init_arg *, delayed_probe);
+static int etm4_probe_cpu(unsigned int cpu);
+
 /*
  * Check if TRCSSPCICRn(i) is implemented for a given instance.
  *
@@ -1085,7 +1088,7 @@ static void etm4_init_arch_data(void *info)
 	struct csdev_access *csa;
 	int i;
 
-	drvdata = init_arg->drvdata;
+	drvdata = dev_get_drvdata(init_arg->dev);
 	csa = init_arg->csa;
 
 	/*
@@ -1528,7 +1531,7 @@ void etm4_config_trace_mode(struct etmv4_config *config)
 static int etm4_online_cpu(unsigned int cpu)
 {
 	if (!etmdrvdata[cpu])
-		return 0;
+		return etm4_probe_cpu(cpu);
 
 	if (etmdrvdata[cpu]->boot_enable && !etmdrvdata[cpu]->sticky_enable)
 		coresight_enable(etmdrvdata[cpu]->csdev);
@@ -1904,48 +1907,20 @@ static void etm4_pm_clear(void)
 	}
 }
 
-static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
+static int etm4_add_coresight_dev(struct etm4_init_arg *init_arg)
 {
 	int ret;
 	struct coresight_platform_data *pdata = NULL;
-	struct etmv4_drvdata *drvdata;
+	struct device *dev = init_arg->dev;
+	struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
 	struct coresight_desc desc = { 0 };
-	struct etm4_init_arg init_arg = { 0 };
 	u8 major, minor;
 	char *type_name;
 
-	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
 	if (!drvdata)
-		return -ENOMEM;
-
-	dev_set_drvdata(dev, drvdata);
-
-	if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
-		pm_save_enable = coresight_loses_context_with_cpu(dev) ?
-			       PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
-
-	if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
-		drvdata->save_state = devm_kmalloc(dev,
-				sizeof(struct etmv4_save_state), GFP_KERNEL);
-		if (!drvdata->save_state)
-			return -ENOMEM;
-	}
-
-	drvdata->base = base;
-
-	spin_lock_init(&drvdata->spinlock);
-
-	drvdata->cpu = coresight_get_cpu(dev);
-	if (drvdata->cpu < 0)
-		return drvdata->cpu;
-
-	init_arg.drvdata = drvdata;
-	init_arg.csa = &desc.access;
-	init_arg.pid = etm_pid;
+		return -EINVAL;
 
-	if (smp_call_function_single(drvdata->cpu,
-				etm4_init_arch_data,  &init_arg, 1))
-		dev_err(dev, "ETM arch init failed\n");
+	desc.access = *init_arg->csa;
 
 	if (!drvdata->arch)
 		return -EINVAL;
@@ -2016,6 +1991,68 @@ static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
 	return 0;
 }
 
+static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
+{
+	struct etmv4_drvdata *drvdata;
+	struct csdev_access access = { 0 };
+	struct etm4_init_arg init_arg = { 0 };
+	struct etm4_init_arg *delayed;
+
+	drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, drvdata);
+
+	if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
+		pm_save_enable = coresight_loses_context_with_cpu(dev) ?
+			       PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
+
+	if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
+		drvdata->save_state = devm_kmalloc(dev,
+				sizeof(struct etmv4_save_state), GFP_KERNEL);
+		if (!drvdata->save_state)
+			return -ENOMEM;
+	}
+
+	drvdata->base = base;
+
+	spin_lock_init(&drvdata->spinlock);
+
+	drvdata->cpu = coresight_get_cpu(dev);
+	if (drvdata->cpu < 0)
+		return drvdata->cpu;
+
+	init_arg.dev = dev;
+	init_arg.csa = &access;
+	init_arg.pid = etm_pid;
+
+	/*
+	 * Serialize against CPUHP callbacks to avoid race condition
+	 * between the smp call and saving the delayed probe.
+	 */
+	cpus_read_lock();
+	if (smp_call_function_single(drvdata->cpu,
+				etm4_init_arch_data,  &init_arg, 1)) {
+		/* The CPU was offline, try again once it comes online. */
+		delayed = devm_kmalloc(dev, sizeof(*delayed), GFP_KERNEL);
+		if (!delayed) {
+			cpus_read_unlock();
+			return -ENOMEM;
+		}
+
+		*delayed = init_arg;
+
+		per_cpu(delayed_probe, drvdata->cpu) = delayed;
+
+		cpus_read_unlock();
+		return 0;
+	}
+	cpus_read_unlock();
+
+	return etm4_add_coresight_dev(&init_arg);
+}
+
 static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id)
 {
 	void __iomem *base;
@@ -2054,6 +2091,35 @@ static int etm4_probe_platform_dev(struct platform_device *pdev)
 	return ret;
 }
 
+static int etm4_probe_cpu(unsigned int cpu)
+{
+	int ret;
+	struct etm4_init_arg init_arg;
+	struct csdev_access access = { 0 };
+	struct etm4_init_arg *iap = *this_cpu_ptr(&delayed_probe);
+
+	if (!iap)
+		return 0;
+
+	init_arg = *iap;
+	devm_kfree(init_arg.dev, iap);
+	*this_cpu_ptr(&delayed_probe) = NULL;
+
+	ret = pm_runtime_resume_and_get(init_arg.dev);
+	if (ret < 0) {
+		dev_err(init_arg.dev, "Failed to get PM runtime!\n");
+		return 0;
+	}
+
+	init_arg.csa = &access;
+	etm4_init_arch_data(&init_arg);
+
+	etm4_add_coresight_dev(&init_arg);
+
+	pm_runtime_put(init_arg.dev);
+	return 0;
+}
+
 static struct amba_cs_uci_id uci_id_etm4[] = {
 	{
 		/*  ETMv4 UCI data */
@@ -2068,16 +2134,20 @@ static void clear_etmdrvdata(void *info)
 	int cpu = *(int *)info;
 
 	etmdrvdata[cpu] = NULL;
+	per_cpu(delayed_probe, cpu) = NULL;
 }
 
 static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 {
-	etm_perf_symlink(drvdata->csdev, false);
+	bool had_delayed_probe;
 	/*
 	 * Taking hotplug lock here to avoid racing between etm4_remove_dev()
 	 * and CPU hotplug call backs.
 	 */
 	cpus_read_lock();
+
+	had_delayed_probe = per_cpu(delayed_probe, drvdata->cpu);
+
 	/*
 	 * The readers for etmdrvdata[] are CPU hotplug call backs
 	 * and PM notification call backs. Change etmdrvdata[i] on
@@ -2085,12 +2155,15 @@ static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 	 * inside one call back function.
 	 */
 	if (smp_call_function_single(drvdata->cpu, clear_etmdrvdata, &drvdata->cpu, 1))
-		etmdrvdata[drvdata->cpu] = NULL;
+		clear_etmdrvdata(&drvdata->cpu);
 
 	cpus_read_unlock();
 
-	cscfg_unregister_csdev(drvdata->csdev);
-	coresight_unregister(drvdata->csdev);
+	if (!had_delayed_probe) {
+		etm_perf_symlink(drvdata->csdev, false);
+		cscfg_unregister_csdev(drvdata->csdev);
+		coresight_unregister(drvdata->csdev);
+	}
 
 	return 0;
 }
-- 
GitLab


From 8d0d129e94d4518fd17c13b4991ff10b7f4cd85a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Oct 2022 13:52:17 -0300
Subject: [PATCH 0693/4122] perf bpf: No need to include headers just use
 forward declarations

In the bpf-prologue.h header we are just using pointers, so no need to
include headers for that, just provide forward declarations for those
types.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf-prologue.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h
index c50c7358009f2..66dcf751ef654 100644
--- a/tools/perf/util/bpf-prologue.h
+++ b/tools/perf/util/bpf-prologue.h
@@ -6,9 +6,8 @@
 #ifndef __BPF_PROLOGUE_H
 #define __BPF_PROLOGUE_H
 
-#include <linux/compiler.h>
-#include <linux/filter.h>
-#include "probe-event.h"
+struct probe_trace_arg;
+struct bpf_insn;
 
 #define BPF_PROLOGUE_MAX_ARGS 3
 #define BPF_PROLOGUE_START_ARG_REG BPF_REG_3
@@ -19,6 +18,7 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs,
 		      struct bpf_insn *new_prog, size_t *new_cnt,
 		      size_t cnt_space);
 #else
+#include <linux/compiler.h>
 #include <errno.h>
 
 static inline int
-- 
GitLab


From 08043330167f1e21abe60ff7e124ed87d4fd029d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Oct 2022 17:02:23 -0300
Subject: [PATCH 0694/4122] perf branch: Remove some needless headers, add a
 needed one

map_symbol.h is needed because we have structs that contains 'struct
addr_map_symbol', so add it, remove the others.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/branch.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index f838b23db1804..94f36a187fe73 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -7,11 +7,9 @@
  * detected in at least musl libc, used in Alpine Linux. -acme
  */
 #include <stdio.h>
-#include <stdint.h>
-#include <linux/compiler.h>
-#include <linux/stddef.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
+#include "util/map_symbol.h"
 #include "event.h"
 
 struct branch_flags {
-- 
GitLab


From 9823147da6c893d9295949e5ed982a8630deb6db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 26 Oct 2022 17:24:27 -0300
Subject: [PATCH 0695/4122] perf tools: Move 'struct perf_sample' to a separate
 header file to disentangle headers

Some places were including event.h just to get 'struct perf_sample',
move it to a separate place so that we speed up a bit the build.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm/util/unwind-libdw.c       |   2 +-
 tools/perf/arch/arm64/util/machine.c          |   1 +
 tools/perf/arch/arm64/util/unwind-libdw.c     |   2 +-
 tools/perf/arch/powerpc/util/event.c          |   1 +
 tools/perf/arch/powerpc/util/unwind-libdw.c   |   2 +-
 tools/perf/arch/s390/util/unwind-libdw.c      |   1 +
 tools/perf/arch/x86/tests/sample-parsing.c    |   1 +
 tools/perf/arch/x86/util/event.c              |   1 +
 tools/perf/arch/x86/util/unwind-libdw.c       |   2 +-
 tools/perf/bench/inject-buildid.c             |   2 +-
 tools/perf/builtin-mem.c                      |   1 +
 tools/perf/tests/dlfilter-test.c              |   1 +
 tools/perf/tests/mmap-basic.c                 |   1 +
 tools/perf/tests/openat-syscall-tp-fields.c   |   1 +
 tools/perf/tests/parse-no-sample-id-all.c     |   1 +
 tools/perf/tests/perf-record.c                |   1 +
 tools/perf/tests/perf-time-to-tsc.c           |   1 +
 tools/perf/tests/sw-clock.c                   |   1 +
 tools/perf/tests/switch-tracking.c            |   1 +
 tools/perf/util/amd-sample-raw.c              |   1 +
 tools/perf/util/auxtrace.c                    |   1 +
 tools/perf/util/branch.h                      |   2 +-
 tools/perf/util/data-convert-bt.c             |   1 +
 tools/perf/util/event.h                       | 111 +----------------
 tools/perf/util/evlist.c                      |   1 +
 .../intel-pt-decoder/intel-pt-insn-decoder.c  |   1 +
 tools/perf/util/perf_regs.c                   |   2 +-
 tools/perf/util/s390-cpumsf.c                 |   1 +
 tools/perf/util/s390-sample-raw.c             |   1 +
 tools/perf/util/sample.h                      | 117 ++++++++++++++++++
 tools/perf/util/trace-event-scripting.c       |   2 +-
 31 files changed, 147 insertions(+), 118 deletions(-)
 create mode 100644 tools/perf/util/sample.h

diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
index b7692cb0c7339..1834a0cd9ce3a 100644
--- a/tools/perf/arch/arm/util/unwind-libdw.c
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -2,7 +2,7 @@
 #include <elfutils/libdwfl.h>
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c
index 41c1596e52071..235a0a1e1ec7b 100644
--- a/tools/perf/arch/arm64/util/machine.c
+++ b/tools/perf/arch/arm64/util/machine.c
@@ -7,6 +7,7 @@
 #include "symbol.h"
 #include "callchain.h"
 #include "record.h"
+#include "util/perf_regs.h"
 
 void arch__add_leaf_frame_record_opts(struct record_opts *opts)
 {
diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c
index a509416296495..09385081bb03c 100644
--- a/tools/perf/arch/arm64/util/unwind-libdw.c
+++ b/tools/perf/arch/arm64/util/unwind-libdw.c
@@ -2,7 +2,7 @@
 #include <elfutils/libdwfl.h>
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c
index cf430a4c55b9a..77d8cc2b5691f 100644
--- a/tools/perf/arch/powerpc/util/event.c
+++ b/tools/perf/arch/powerpc/util/event.c
@@ -9,6 +9,7 @@
 #include "../../../util/tool.h"
 #include "../../../util/map.h"
 #include "../../../util/debug.h"
+#include "../../../util/sample.h"
 
 void arch_perf_parse_sample_weight(struct perf_sample *data,
 				   const __u64 *array, u64 type)
diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c
index 7b2d96ec28e39..e616642c754c4 100644
--- a/tools/perf/arch/powerpc/util/unwind-libdw.c
+++ b/tools/perf/arch/powerpc/util/unwind-libdw.c
@@ -3,7 +3,7 @@
 #include <linux/kernel.h>
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "../../../util/sample.h"
 
 /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils.  */
 static const int special_regs[3][2] = {
diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c
index 387c698cdd1be..7d92452d52877 100644
--- a/tools/perf/arch/s390/util/unwind-libdw.c
+++ b/tools/perf/arch/s390/util/unwind-libdw.c
@@ -3,6 +3,7 @@
 #include "../../util/unwind-libdw.h"
 #include "../../util/perf_regs.h"
 #include "../../util/event.h"
+#include "../../util/sample.h"
 #include "dwarf-regs-table.h"
 
 
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
index bfbd3662b69e8..690c7c07e90d1 100644
--- a/tools/perf/arch/x86/tests/sample-parsing.c
+++ b/tools/perf/arch/x86/tests/sample-parsing.c
@@ -10,6 +10,7 @@
 #include "event.h"
 #include "evsel.h"
 #include "debug.h"
+#include "util/sample.h"
 #include "util/synthetic-events.h"
 
 #include "tests/tests.h"
diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c
index 55ff6aec10fdd..a3acefe6d0c65 100644
--- a/tools/perf/arch/x86/util/event.c
+++ b/tools/perf/arch/x86/util/event.c
@@ -10,6 +10,7 @@
 #include "../../../util/tool.h"
 #include "../../../util/map.h"
 #include "../../../util/debug.h"
+#include "util/sample.h"
 
 #if defined(__x86_64__)
 
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
index eea2bf87232b2..ef71e8bf80bf5 100644
--- a/tools/perf/arch/x86/util/unwind-libdw.c
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -2,7 +2,7 @@
 #include <elfutils/libdwfl.h>
 #include "../../../util/unwind-libdw.h"
 #include "../../../util/perf_regs.h"
-#include "../../../util/event.h"
+#include "util/sample.h"
 
 bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
 {
diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c
index 17672790f1231..4561bda0ce6ad 100644
--- a/tools/perf/bench/inject-buildid.c
+++ b/tools/perf/bench/inject-buildid.c
@@ -19,10 +19,10 @@
 #include "util/data.h"
 #include "util/stat.h"
 #include "util/debug.h"
-#include "util/event.h"
 #include "util/symbol.h"
 #include "util/session.h"
 #include "util/build-id.h"
+#include "util/sample.h"
 #include "util/synthetic-events.h"
 
 #define MMAP_DEV_MAJOR  8
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 923fb8316fdae..dedd612eae5e0 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -20,6 +20,7 @@
 #include "util/symbol.h"
 #include "util/pmu.h"
 #include "util/pmu-hybrid.h"
+#include "util/sample.h"
 #include "util/string2.h"
 #include <linux/err.h>
 
diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c
index 84352d55347d5..99aa72e425e4d 100644
--- a/tools/perf/tests/dlfilter-test.c
+++ b/tools/perf/tests/dlfilter-test.c
@@ -33,6 +33,7 @@
 #include "archinsn.h"
 #include "dlfilter.h"
 #include "tests.h"
+#include "util/sample.h"
 
 #define MAP_START 0x400000
 
diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 8322fc2295fa0..6377906c13183 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -10,6 +10,7 @@
 #include "thread_map.h"
 #include "tests.h"
 #include "util/mmap.h"
+#include "util/sample.h"
 #include <linux/err.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
index a7b2800652e4c..888df8eca9812 100644
--- a/tools/perf/tests/openat-syscall-tp-fields.c
+++ b/tools/perf/tests/openat-syscall-tp-fields.c
@@ -14,6 +14,7 @@
 #include "util/mmap.h"
 #include <errno.h>
 #include <perf/mmap.h>
+#include "util/sample.h"
 
 #ifndef O_DIRECTORY
 #define O_DIRECTORY    00200000
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
index d62e31595ab21..202f0a9a6796b 100644
--- a/tools/perf/tests/parse-no-sample-id-all.c
+++ b/tools/perf/tests/parse-no-sample-id-all.c
@@ -8,6 +8,7 @@
 #include "evlist.h"
 #include "header.h"
 #include "debug.h"
+#include "util/sample.h"
 
 static int process_event(struct evlist **pevlist, union perf_event *event)
 {
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index 7aa946aa886de..d82539e2ae648 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -11,6 +11,7 @@
 #include "record.h"
 #include "tests.h"
 #include "util/mmap.h"
+#include "util/sample.h"
 
 static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp)
 {
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index c3aaa1ddff29f..efcd71c2738af 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -20,6 +20,7 @@
 #include "tsc.h"
 #include "mmap.h"
 #include "tests.h"
+#include "util/sample.h"
 
 /*
  * Except x86_64/i386 and Arm64, other archs don't support TSC in perf.  Just
diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c
index 9cd6fec375eeb..4d7493fa01059 100644
--- a/tools/perf/tests/sw-clock.c
+++ b/tools/perf/tests/sw-clock.c
@@ -13,6 +13,7 @@
 #include "util/evlist.h"
 #include "util/cpumap.h"
 #include "util/mmap.h"
+#include "util/sample.h"
 #include "util/thread_map.h"
 #include <perf/evlist.h>
 #include <perf/mmap.h>
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index 87f565c7f650d..b3bd14b025a89 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -19,6 +19,7 @@
 #include "record.h"
 #include "tests.h"
 #include "util/mmap.h"
+#include "util/sample.h"
 #include "pmu.h"
 
 static int spin_sleep(void)
diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c
index 2383058686444..b0e70ce9d87a4 100644
--- a/tools/perf/util/amd-sample-raw.c
+++ b/tools/perf/util/amd-sample-raw.c
@@ -16,6 +16,7 @@
 #include "evlist.h"
 #include "sample-raw.h"
 #include "pmu-events/pmu-events.h"
+#include "util/sample.h"
 
 static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type;
 static bool zen4_ibs_extensions;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 46ada5ec3f9a2..265d20cc126b1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -59,6 +59,7 @@
 #include <linux/ctype.h>
 #include "symbol/kallsyms.h"
 #include <internal/lib.h>
+#include "util/sample.h"
 
 /*
  * Make a group from 'leader' to 'last', requiring that the events were not
diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index 94f36a187fe73..d6017c9b1872d 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -10,7 +10,7 @@
 #include <linux/perf_event.h>
 #include <linux/types.h>
 #include "util/map_symbol.h"
-#include "event.h"
+#include "util/sample.h"
 
 struct branch_flags {
 	union {
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 9e0aee276df8e..c65cdaf6975ed 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -34,6 +34,7 @@
 #include <linux/time64.h>
 #include "util.h"
 #include "clockid.h"
+#include "util/sample.h"
 
 #define pr_N(n, fmt, ...) \
 	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 65495f6945b4d..8b71ac1af81be 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -15,6 +15,7 @@
 struct dso;
 struct machine;
 struct perf_event_attr;
+struct perf_sample;
 
 #ifdef __LP64__
 /*
@@ -42,61 +43,6 @@ struct perf_event_attr;
 /* perf sample has 16 bits size limit */
 #define PERF_SAMPLE_MAX_SIZE (1 << 16)
 
-/* number of register is bound by the number of bits in regs_dump::mask (64) */
-#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
-
-struct regs_dump {
-	u64 abi;
-	u64 mask;
-	u64 *regs;
-
-	/* Cached values/mask filled by first register access. */
-	u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
-	u64 cache_mask;
-};
-
-struct stack_dump {
-	u16 offset;
-	u64 size;
-	char *data;
-};
-
-struct sample_read_value {
-	u64 value;
-	u64 id;   /* only if PERF_FORMAT_ID */
-	u64 lost; /* only if PERF_FORMAT_LOST */
-};
-
-struct sample_read {
-	u64 time_enabled;
-	u64 time_running;
-	union {
-		struct {
-			u64 nr;
-			struct sample_read_value *values;
-		} group;
-		struct sample_read_value one;
-	};
-};
-
-static inline size_t sample_read_value_size(u64 read_format)
-{
-	/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
-	if (read_format & PERF_FORMAT_LOST)
-		return sizeof(struct sample_read_value);
-	else
-		return offsetof(struct sample_read_value, lost);
-}
-
-static inline struct sample_read_value *
-next_sample_read_value(struct sample_read_value *v, u64 read_format)
-{
-	return (void *)v + sample_read_value_size(read_format);
-}
-
-#define sample_read_group__for_each(v, nr, rf)		\
-	for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
-
 struct ip_callchain {
 	u64 nr;
 	u64 ips[];
@@ -138,52 +84,6 @@ enum {
 	PERF_IP_FLAG_VMENTRY		|\
 	PERF_IP_FLAG_VMEXIT)
 
-#define MAX_INSN 16
-
-struct aux_sample {
-	u64 size;
-	void *data;
-};
-
-struct perf_sample {
-	u64 ip;
-	u32 pid, tid;
-	u64 time;
-	u64 addr;
-	u64 id;
-	u64 stream_id;
-	u64 period;
-	u64 weight;
-	u64 transaction;
-	u64 insn_cnt;
-	u64 cyc_cnt;
-	u32 cpu;
-	u32 raw_size;
-	u64 data_src;
-	u64 phys_addr;
-	u64 data_page_size;
-	u64 code_page_size;
-	u64 cgroup;
-	u32 flags;
-	u32 machine_pid;
-	u32 vcpu;
-	u16 insn_len;
-	u8  cpumode;
-	u16 misc;
-	u16 ins_lat;
-	u16 p_stage_cyc;
-	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
-	char insn[MAX_INSN];
-	void *raw_data;
-	struct ip_callchain *callchain;
-	struct branch_stack *branch_stack;
-	struct regs_dump  user_regs;
-	struct regs_dump  intr_regs;
-	struct stack_dump user_stack;
-	struct sample_read read;
-	struct aux_sample aux_sample;
-};
-
 #define PERF_MEM_DATA_SRC_NONE \
 	(PERF_MEM_S(OP, NA) |\
 	 PERF_MEM_S(LVL, NA) |\
@@ -342,15 +242,6 @@ struct perf_synth_intel_iflag_chg {
 	u64	branch_ip; /* If via_branch */
 };
 
-/*
- * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
- * 8-byte alignment.
- */
-static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
-{
-	return sample->raw_data - 4;
-}
-
 static inline void *perf_synth__raw_data(void *p)
 {
 	return p + 4;
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6612b00949e70..112850d629cb8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -29,6 +29,7 @@
 #include "util/evsel_fprintf.h"
 #include "util/evlist-hybrid.h"
 #include "util/pmu.h"
+#include "util/sample.h"
 #include <signal.h>
 #include <unistd.h>
 #include <sched.h>
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 1376077183f72..22308dd930101 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -18,6 +18,7 @@
 
 #include "intel-pt-insn-decoder.h"
 #include "dump-insn.h"
+#include "util/sample.h"
 
 #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
 #error Instruction buffer size too small
diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c
index 872dd3d387821..57a567ee2ceaa 100644
--- a/tools/perf/util/perf_regs.c
+++ b/tools/perf/util/perf_regs.c
@@ -2,7 +2,7 @@
 #include <errno.h>
 #include <string.h>
 #include "perf_regs.h"
-#include "event.h"
+#include "util/sample.h"
 
 int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused,
 				 char **new_op __maybe_unused)
diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c
index f3fdad28a852c..6fe478b0b61b3 100644
--- a/tools/perf/util/s390-cpumsf.c
+++ b/tools/perf/util/s390-cpumsf.c
@@ -163,6 +163,7 @@
 #include "s390-cpumsf-kernel.h"
 #include "s390-cpumcf-kernel.h"
 #include "config.h"
+#include "util/sample.h"
 
 struct s390_cpumsf {
 	struct auxtrace		auxtrace;
diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c
index 9a631d97471c3..c10b891dbad6b 100644
--- a/tools/perf/util/s390-sample-raw.c
+++ b/tools/perf/util/s390-sample-raw.c
@@ -28,6 +28,7 @@
 #include "sample-raw.h"
 #include "s390-cpumcf-kernel.h"
 #include "pmu-events/pmu-events.h"
+#include "util/sample.h"
 
 static size_t ctrset_size(struct cf_ctrset_entry *set)
 {
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
new file mode 100644
index 0000000000000..60ec79d4eea4f
--- /dev/null
+++ b/tools/perf/util/sample.h
@@ -0,0 +1,117 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_SAMPLE_H
+#define __PERF_SAMPLE_H
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+/* number of register is bound by the number of bits in regs_dump::mask (64) */
+#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64))
+
+struct regs_dump {
+	u64 abi;
+	u64 mask;
+	u64 *regs;
+
+	/* Cached values/mask filled by first register access. */
+	u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE];
+	u64 cache_mask;
+};
+
+struct stack_dump {
+	u16 offset;
+	u64 size;
+	char *data;
+};
+
+struct sample_read_value {
+	u64 value;
+	u64 id;   /* only if PERF_FORMAT_ID */
+	u64 lost; /* only if PERF_FORMAT_LOST */
+};
+
+struct sample_read {
+	u64 time_enabled;
+	u64 time_running;
+	union {
+		struct {
+			u64 nr;
+			struct sample_read_value *values;
+		} group;
+		struct sample_read_value one;
+	};
+};
+
+static inline size_t sample_read_value_size(u64 read_format)
+{
+	/* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */
+	if (read_format & PERF_FORMAT_LOST)
+		return sizeof(struct sample_read_value);
+	else
+		return offsetof(struct sample_read_value, lost);
+}
+
+static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format)
+{
+	return (void *)v + sample_read_value_size(read_format);
+}
+
+#define sample_read_group__for_each(v, nr, rf) \
+	for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++)
+
+#define MAX_INSN 16
+
+struct aux_sample {
+	u64 size;
+	void *data;
+};
+
+struct perf_sample {
+	u64 ip;
+	u32 pid, tid;
+	u64 time;
+	u64 addr;
+	u64 id;
+	u64 stream_id;
+	u64 period;
+	u64 weight;
+	u64 transaction;
+	u64 insn_cnt;
+	u64 cyc_cnt;
+	u32 cpu;
+	u32 raw_size;
+	u64 data_src;
+	u64 phys_addr;
+	u64 data_page_size;
+	u64 code_page_size;
+	u64 cgroup;
+	u32 flags;
+	u32 machine_pid;
+	u32 vcpu;
+	u16 insn_len;
+	u8  cpumode;
+	u16 misc;
+	u16 ins_lat;
+	u16 p_stage_cyc;
+	bool no_hw_idx;		/* No hw_idx collected in branch_stack */
+	char insn[MAX_INSN];
+	void *raw_data;
+	struct ip_callchain *callchain;
+	struct branch_stack *branch_stack;
+	struct regs_dump  user_regs;
+	struct regs_dump  intr_regs;
+	struct stack_dump user_stack;
+	struct sample_read read;
+	struct aux_sample aux_sample;
+};
+
+/*
+ * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get
+ * 8-byte alignment.
+ */
+static inline void *perf_sample__synth_ptr(struct perf_sample *sample)
+{
+	return sample->raw_data - 4;
+}
+
+#endif /* __PERF_SAMPLE_H */
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 7172ca05265f3..636a010d929b9 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -12,9 +12,9 @@
 
 #include "debug.h"
 #include "trace-event.h"
-#include "event.h"
 #include "evsel.h"
 #include <linux/zalloc.h>
+#include "util/sample.h"
 
 struct scripting_context *scripting_context;
 
-- 
GitLab


From 628d69995e66343266475e6afc76192f5878b605 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 16:49:58 -0300
Subject: [PATCH 0696/4122] perf kwork: Remove includes not needed in kwork.h

Leave just some forward declarations for pointers, move the includes to
where they are really needed.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kwork.c  |  6 ++++++
 tools/perf/util/bpf_kwork.c |  3 +++
 tools/perf/util/kwork.h     | 12 ++++++------
 3 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index fb8c63656ad89..4ffbf5908070d 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -6,10 +6,15 @@
  */
 
 #include "builtin.h"
+#include "perf.h"
 
 #include "util/data.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
+#include "util/header.h"
 #include "util/kwork.h"
 #include "util/debug.h"
+#include "util/session.h"
 #include "util/symbol.h"
 #include "util/thread.h"
 #include "util/string2.h"
@@ -21,6 +26,7 @@
 
 #include <errno.h>
 #include <inttypes.h>
+#include <signal.h>
 #include <linux/err.h>
 #include <linux/time64.h>
 #include <linux/zalloc.h>
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
index b629dd679d3fa..6eb2c78fd7f4a 100644
--- a/tools/perf/util/bpf_kwork.c
+++ b/tools/perf/util/bpf_kwork.c
@@ -7,15 +7,18 @@
 
 #include <time.h>
 #include <fcntl.h>
+#include <signal.h>
 #include <stdio.h>
 #include <unistd.h>
 
 #include <linux/time64.h>
 
 #include "util/debug.h"
+#include "util/evsel.h"
 #include "util/kwork.h"
 
 #include <bpf/bpf.h>
+#include <perf/cpumap.h>
 
 #include "util/bpf_skel/kwork_trace.skel.h"
 
diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h
index 320c0a6d2e086..53b7327550b86 100644
--- a/tools/perf/util/kwork.h
+++ b/tools/perf/util/kwork.h
@@ -1,16 +1,16 @@
 #ifndef PERF_UTIL_KWORK_H
 #define PERF_UTIL_KWORK_H
 
-#include "perf.h"
-
 #include "util/tool.h"
-#include "util/event.h"
-#include "util/evlist.h"
-#include "util/session.h"
 #include "util/time-utils.h"
 
-#include <linux/list.h>
 #include <linux/bitmap.h>
+#include <linux/list.h>
+#include <linux/rbtree.h>
+#include <linux/types.h>
+
+struct perf_sample;
+struct perf_session;
 
 enum kwork_class_type {
 	KWORK_CLASS_IRQ,
-- 
GitLab


From 7e5c6f2c1aa2daa0d8aca657377450529f381fe6 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 16:54:46 -0300
Subject: [PATCH 0697/4122] perf machine: Move machine__resolve() from event.h

Its a machine method, so move it to machine.h, this way some places that
were using event.h just to get this prototype may stop doing so and
speed up building and disentanble the header dependency graph.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.h   | 3 ---
 tools/perf/util/machine.h | 3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8b71ac1af81be..ea5bd1f62b0fe 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -337,9 +337,6 @@ int perf_event__process(struct perf_tool *tool,
 
 struct addr_location;
 
-int machine__resolve(struct machine *machine, struct addr_location *al,
-		     struct perf_sample *sample);
-
 void addr_location__put(struct addr_location *al);
 
 struct thread;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 74935dfaa9375..6267c1d6f2321 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -305,4 +305,7 @@ int machine__create_extra_kernel_map(struct machine *machine,
 int machine__map_x86_64_entry_trampolines(struct machine *machine,
 					  struct dso *kernel);
 
+int machine__resolve(struct machine *machine, struct addr_location *al,
+		     struct perf_sample *sample);
+
 #endif /* __PERF_MACHINE_H */
-- 
GitLab


From d1e633e4cdc0d06cec82d4772c025f13c3b25a6c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 16:54:46 -0300
Subject: [PATCH 0698/4122] perf symbol: Move addr_location__put() from event.h

Its a addr_location method, so move it to symbol.h, where 'struct
addr_location' is, this way some places that were using event.h just to
get this prototype may stop doing so and speed up building and
disentanble the header dependency graph.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.h  | 3 ---
 tools/perf/util/symbol.h | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index ea5bd1f62b0fe..bc6c1e2206cf6 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -336,9 +336,6 @@ int perf_event__process(struct perf_tool *tool,
 			struct machine *machine);
 
 struct addr_location;
-
-void addr_location__put(struct addr_location *al);
-
 struct thread;
 
 bool is_bts_event(struct perf_event_attr *attr);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 0b893dcc8ea68..e297de14184c5 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -132,6 +132,8 @@ struct addr_location {
 	s32	      socket;
 };
 
+void addr_location__put(struct addr_location *al);
+
 int dso__load(struct dso *dso, struct map *map);
 int dso__load_vmlinux(struct dso *dso, struct map *map,
 		      const char *vmlinux, bool vmlinux_allocated);
-- 
GitLab


From cde5671268faf6419026ebd5e0f1783b3a84cf39 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 16:54:46 -0300
Subject: [PATCH 0699/4122] perf thread: Move thread__resolve() from event.h

Its a thread method, so move it to thread.h, this way some places that
were using event.h just to get this prototype may stop doing so and
speed up building and disentanble the header dependency graph.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.h  | 5 -----
 tools/perf/util/thread.h | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index bc6c1e2206cf6..6663a676eadcf 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -335,13 +335,8 @@ int perf_event__process(struct perf_tool *tool,
 			struct perf_sample *sample,
 			struct machine *machine);
 
-struct addr_location;
-struct thread;
-
 bool is_bts_event(struct perf_event_attr *attr);
 bool sample_addr_correlates_sym(struct perf_event_attr *attr);
-void thread__resolve(struct thread *thread, struct addr_location *al,
-		     struct perf_sample *sample);
 
 const char *perf_event__name(unsigned int id);
 
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 241f300d7d6ef..395c626699a9c 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -158,4 +158,7 @@ static inline bool thread__is_filtered(struct thread *thread)
 
 void thread__free_stitch_list(struct thread *thread);
 
+void thread__resolve(struct thread *thread, struct addr_location *al,
+		     struct perf_sample *sample);
+
 #endif	/* __PERF_THREAD_H */
-- 
GitLab


From fd8d5a3b076c033f5589186ac49d76e74b39f97f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 17:02:57 -0300
Subject: [PATCH 0700/4122] perf tests: Add missing event.h include

It uses things like perf_event__name() but were not including event.h,
where its prototype lives, fix it.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/mmap-basic.c  | 1 +
 tools/perf/tests/perf-record.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 6377906c13183..e68ca62297566 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -5,6 +5,7 @@
 #include <perf/cpumap.h>
 
 #include "debug.h"
+#include "event.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c
index d82539e2ae648..1c4feec1adff1 100644
--- a/tools/perf/tests/perf-record.c
+++ b/tools/perf/tests/perf-record.c
@@ -5,6 +5,7 @@
 
 #include <sched.h>
 #include <perf/mmap.h>
+#include "event.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "debug.h"
-- 
GitLab


From 606f70ab7f5ace535514a5fa6f9ad62ae6e515eb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 17:42:52 -0300
Subject: [PATCH 0701/4122] perf mmap: Remove several unneeded includes from
 util/mmap.h

Those headers are not needed in util/mmap.h, remove them.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/mmap.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h
index cd4ccec7f3617..f944c3cd5efa0 100644
--- a/tools/perf/util/mmap.h
+++ b/tools/perf/util/mmap.h
@@ -2,18 +2,13 @@
 #define __PERF_MMAP_H 1
 
 #include <internal/mmap.h>
-#include <linux/compiler.h>
-#include <linux/refcount.h>
 #include <linux/types.h>
-#include <linux/ring_buffer.h>
 #include <linux/bitops.h>
 #include <perf/cpumap.h>
-#include <stdbool.h>
 #ifdef HAVE_AIO_SUPPORT
 #include <aio.h>
 #endif
 #include "auxtrace.h"
-#include "event.h"
 #include "util/compress.h"
 
 struct aiocb;
-- 
GitLab


From 2e5a738abc69601a4ea4a3544ec29cab9189eaae Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 27 Oct 2022 17:44:07 -0300
Subject: [PATCH 0702/4122] perf evlist: Add missing util/event.h header

Needed to get the event_attr_init() and perf_event_paranoid() prototypes
that were being obtained indirectly, by sheer luck.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 112850d629cb8..fbf3192bced97 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -24,6 +24,7 @@
 #include "../perf.h"
 #include "asm/bug.h"
 #include "bpf-event.h"
+#include "util/event.h"
 #include "util/string2.h"
 #include "util/perf_api_probe.h"
 #include "util/evsel_fprintf.h"
-- 
GitLab


From 146edff3d7ed135269dae8abef3219083c45b21e Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 12 Oct 2022 18:12:05 -0700
Subject: [PATCH 0703/4122] perf test: Parse events workaround for dash/minus

Skip an event configuration for event names with a dash/minus in them.
Events with a dash/minus in their name cause parsing issues as legacy
encoding of events would use a dash/minus as a separator.

The parser separates events with dashes into prefixes and suffixes and
then recombines them. Unfortunately if an event has part of its name
that matches a legacy token then the recombining fails.

This is seen for branch-brs where branch is a legacy token. branch-brs
was introduced to sysfs in:

  https://lore.kernel.org/all/20220322221517.2510440-5-eranian@google.com/

The failure is shown below as well as the workaround to use a config
where the dash/minus isn't treated specially:

```
  $ perf stat -e branch-brs true
  event syntax error: 'branch-brs'
                             \___ parser error

  $ perf stat -e cpu/branch-brs/ true

   Performance counter stats for 'true':

              46,179      cpu/branch-brs/
```

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20221013011205.3151391-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/parse-events.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 459afdb256a1c..3440dd2616b06 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -2237,6 +2237,19 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest
 			pr_debug("Test PMU event failed for '%s'", name);
 			ret = combine_test_results(ret, test_ret);
 		}
+		/*
+		 * Names containing '-' are recognized as prefixes and suffixes
+		 * due to '-' being a legacy PMU separator. This fails when the
+		 * prefix or suffix collides with an existing legacy token. For
+		 * example, branch-brs has a prefix (branch) that collides with
+		 * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix
+		 * isn't expected after this. As event names in the config
+		 * slashes are allowed a '-' in the name we check this works
+		 * above.
+		 */
+		if (strchr(ent->d_name, '-'))
+			continue;
+
 		snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name);
 		e.name  = name;
 		e.check = test__checkevent_pmu_events_mix;
-- 
GitLab


From 005ef2b41b119caf65ac611109ce7ae68d857b46 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 28 Oct 2022 11:01:25 -0700
Subject: [PATCH 0704/4122] perf lock contention: Fix memory sanitizer issue

The msan reported a use-of-uninitialized-value warning for the struct
lock_contention_data in lock_contention_read().  While it'd be filled
by bpf_map_lookup_elem(), let's just initialize it to silence the
warning.

  ==12524==WARNING: MemorySanitizer: use-of-uninitialized-value
  #0 0x562b0f16b1cd in lock_contention_read  util/bpf_lock_contention.c:139:7
  #1 0x562b0ef65ec6 in __cmd_contention  builtin-lock.c:1737:3
  #2 0x562b0ef65ec6 in cmd_lock  builtin-lock.c:1992:8
  #3 0x562b0ee7f50b in run_builtin  perf.c:322:11
  #4 0x562b0ee7efc1 in handle_internal_command  perf.c:376:8
  #5 0x562b0ee7e1e9 in run_argv  perf.c:420:2
  #6 0x562b0ee7e1e9 in main  perf.c:550:3
  #7 0x7f065f10e632 in __libc_start_main (/usr/lib64/libc.so.6+0x61632)
  #8 0x562b0edf2fa9 in _start (perf+0xfa9)
  SUMMARY: MemorySanitizer: use-of-uninitialized-value (perf+0xe15160) in lock_contention_read

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221028180128.3311491-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_lock_contention.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index fc4d613cb979a..06466da792e41 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -110,7 +110,7 @@ int lock_contention_read(struct lock_contention *con)
 {
 	int fd, stack;
 	s32 prev_key, key;
-	struct lock_contention_data data;
+	struct lock_contention_data data = {};
 	struct lock_stat *st;
 	struct machine *machine = con->machine;
 	u64 stack_trace[con->max_stack];
-- 
GitLab


From 0a277b6226703812ba90543d5bb49476e03f816e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 28 Oct 2022 11:01:26 -0700
Subject: [PATCH 0705/4122] perf lock contention: Check --max-stack option

The --max-stack option is used to allocate the BPF stack map and stack
trace array in the userspace.  Check the value properly before using.
Practically it cannot be greater than the sysctl_perf_event_max_stack.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221028180128.3311491-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 66520712a1675..6f79175365a80 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -24,6 +24,7 @@
 #include "util/data.h"
 #include "util/string2.h"
 #include "util/map.h"
+#include "util/util.h"
 
 #include <sys/types.h>
 #include <sys/prctl.h>
@@ -1858,6 +1859,29 @@ static int parse_map_entry(const struct option *opt, const char *str,
 	return 0;
 }
 
+static int parse_max_stack(const struct option *opt, const char *str,
+			   int unset __maybe_unused)
+{
+	unsigned long *len = (unsigned long *)opt->value;
+	long val;
+	char *endptr;
+
+	errno = 0;
+	val = strtol(str, &endptr, 0);
+	if (*endptr != '\0' || errno != 0) {
+		pr_err("invalid max stack depth: %s\n", str);
+		return -1;
+	}
+
+	if (val < 0 || val > sysctl__max_stack()) {
+		pr_err("invalid max stack depth: %ld\n", val);
+		return -1;
+	}
+
+	*len = val;
+	return 0;
+}
+
 int cmd_lock(int argc, const char **argv)
 {
 	const struct option lock_options[] = {
@@ -1913,9 +1937,9 @@ int cmd_lock(int argc, const char **argv)
 		   "Trace on existing thread id (exclusive to --pid)"),
 	OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num",
 		     "Max number of BPF map entries", parse_map_entry),
-	OPT_INTEGER(0, "max-stack", &max_stack_depth,
-		    "Set the maximum stack depth when collecting lock contention, "
-		    "Default: " __stringify(CONTENTION_STACK_DEPTH)),
+	OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num",
+		     "Set the maximum stack depth when collecting lopck contention, "
+		     "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack),
 	OPT_INTEGER(0, "stack-skip", &stack_skip,
 		    "Set the number of stack depth to skip when finding a lock caller, "
 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
-- 
GitLab


From 9e9c5f3cf912c3e068b6c24d77f6f07ac242395b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 28 Oct 2022 11:01:27 -0700
Subject: [PATCH 0706/4122] perf lock contention: Avoid variable length arrays

The msan also warns about the use of VLA for stack_trace variable.
We can dynamically allocate instead.  While at it, simplify the error
handle a bit (and fix bugs).

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221028180128.3311491-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_lock_contention.c | 41 ++++++++++++++++++---------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 06466da792e41..0deec1178778e 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -108,28 +108,36 @@ int lock_contention_stop(void)
 
 int lock_contention_read(struct lock_contention *con)
 {
-	int fd, stack;
+	int fd, stack, err = 0;
 	s32 prev_key, key;
 	struct lock_contention_data data = {};
-	struct lock_stat *st;
+	struct lock_stat *st = NULL;
 	struct machine *machine = con->machine;
-	u64 stack_trace[con->max_stack];
+	u64 *stack_trace;
+	size_t stack_size = con->max_stack * sizeof(*stack_trace);
 
 	fd = bpf_map__fd(skel->maps.lock_stat);
 	stack = bpf_map__fd(skel->maps.stacks);
 
 	con->lost = skel->bss->lost;
 
+	stack_trace = zalloc(stack_size);
+	if (stack_trace == NULL)
+		return -1;
+
 	prev_key = 0;
 	while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
 		struct map *kmap;
 		struct symbol *sym;
 		int idx = 0;
 
+		/* to handle errors in the loop body */
+		err = -1;
+
 		bpf_map_lookup_elem(fd, &key, &data);
 		st = zalloc(sizeof(*st));
 		if (st == NULL)
-			return -1;
+			break;
 
 		st->nr_contended = data.count;
 		st->wait_time_total = data.total_time;
@@ -163,25 +171,32 @@ int lock_contention_read(struct lock_contention *con)
 				st->name = strdup(sym->name);
 
 			if (ret < 0 || st->name == NULL)
-				return -1;
+				break;
 		} else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) {
-			free(st);
-			return -1;
+			break;
 		}
 
 		if (verbose) {
-			st->callstack = memdup(stack_trace, sizeof(stack_trace));
-			if (st->callstack == NULL) {
-				free(st);
-				return -1;
-			}
+			st->callstack = memdup(stack_trace, stack_size);
+			if (st->callstack == NULL)
+				break;
 		}
 
 		hlist_add_head(&st->hash_entry, con->result);
 		prev_key = key;
+
+		/* we're fine now, reset the values */
+		st = NULL;
+		err = 0;
 	}
 
-	return 0;
+	free(stack_trace);
+	if (st) {
+		free(st->name);
+		free(st);
+	}
+
+	return err;
 }
 
 int lock_contention_finish(void)
-- 
GitLab


From c940fa276b5a4210255e0cb9766f06e38443303a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 28 Oct 2022 11:01:28 -0700
Subject: [PATCH 0707/4122] perf lock contention: Increase default stack skip
 to 4

In most configurations, it works well with skipping 4 entries by
default.  If some systems still have 3 BPF internal stack frames,
the next frame should be in a lock function which will be skipped
later when it tries to find a caller.  So increasing to 4 won't
affect such systems too.

With --stack-skip=0, I can see something like this:

    24     49.84 us      7.41 us      2.08 us        mutex   bpf_prog_e1b85959d520446c_contention_begin+0x12e
                    0xffffffffc045040e  bpf_prog_e1b85959d520446c_contention_begin+0x12e
                    0xffffffffc045040e  bpf_prog_e1b85959d520446c_contention_begin+0x12e
                    0xffffffff82ea2071  bpf_trace_run2+0x51
                    0xffffffff82de775b  __bpf_trace_contention_begin+0xb
                    0xffffffff82c02045  __mutex_lock+0x245
                    0xffffffff82c019e3  __mutex_lock_slowpath+0x13
                    0xffffffff82c019c0  mutex_lock+0x20
                    0xffffffff830a083c  kernfs_iop_permission+0x2c

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221028180128.3311491-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/lock-contention.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index b8cb8830b7bc5..e3c061b1795ba 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -91,7 +91,7 @@ struct thread_stat {
  * Number of stack trace entries to skip when finding callers.
  * The first few entries belong to the locking implementation itself.
  */
-#define CONTENTION_STACK_SKIP  3
+#define CONTENTION_STACK_SKIP  4
 
 /*
  * flags for lock:contention_begin
-- 
GitLab


From 98c148cd5c73a4cf1d78319e309a79eef50b4220 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 31 Oct 2022 16:18:08 +0100
Subject: [PATCH 0708/4122] power: supply: core: repair kernel-doc for
 power_supply_vbat2ri()

The function power_supply_vbat2ri() does not have a parameter called table,
despite it being mentioned in the kernel-doc comment. The table is actually
obtained from the info parameter, the battery information container. Hence,
./scripts/kernel-doc -none drivers/power/supply/power_supply_core.c warns
about this excess function parameter.

Adjust the kernel-doc comment for power_supply_vbat2ri() for make W=1
happiness.

Fixes: e9e7d165b4b0 ("power: supply: Support VBAT-to-Ri lookup tables")
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 4b5fb172fa994..9035e349bf53c 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -870,7 +870,6 @@ EXPORT_SYMBOL_GPL(power_supply_temp2resist_simple);
  * power_supply_vbat2ri() - find the battery internal resistance
  * from the battery voltage
  * @info: The battery information container
- * @table: Pointer to battery resistance temperature table
  * @vbat_uv: The battery voltage in microvolt
  * @charging: If we are charging (true) or not (false)
  *
-- 
GitLab


From 9a265e04b99dd2e9c2fac26875e120c777f14f43 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 31 Oct 2022 09:36:21 +0200
Subject: [PATCH 0709/4122] power: supply: bd99954: Use LINEAR_RANGE()

Do minor clean-up by using newly inroduced LINEAR_RANGE() initialization
macro.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bd99954-charger.c | 84 ++++----------------------
 1 file changed, 13 insertions(+), 71 deletions(-)

diff --git a/drivers/power/supply/bd99954-charger.c b/drivers/power/supply/bd99954-charger.c
index 96e93e1b80949..250362e15c98b 100644
--- a/drivers/power/supply/bd99954-charger.c
+++ b/drivers/power/supply/bd99954-charger.c
@@ -768,27 +768,13 @@ static const struct power_supply_desc bd9995x_power_supply_desc = {
  * Describe the setting in linear_range table.
  */
 static const struct linear_range input_current_limit_ranges[] = {
-	{
-		.min = 0,
-		.step = 32000,
-		.min_sel = 0x0,
-		.max_sel = 0x1ff,
-	},
+	LINEAR_RANGE(0, 0x0, 0x1ff, 32000),
 };
 
 /* Possible trickle, pre-charging and termination current values */
 static const struct linear_range charging_current_ranges[] = {
-	{
-		.min = 0,
-		.step = 64000,
-		.min_sel = 0x0,
-		.max_sel = 0x10,
-	}, {
-		.min = 1024000,
-		.step = 0,
-		.min_sel = 0x11,
-		.max_sel = 0x1f,
-	},
+	LINEAR_RANGE(0, 0x0, 0x10, 64000),
+	LINEAR_RANGE(1024000, 0x11, 0x1f, 0),
 };
 
 /*
@@ -796,72 +782,28 @@ static const struct linear_range charging_current_ranges[] = {
  * and battery over voltage protection have same possible values
  */
 static const struct linear_range charge_voltage_regulation_ranges[] = {
-	{
-		.min = 2560000,
-		.step = 0,
-		.min_sel = 0,
-		.max_sel = 0xA0,
-	}, {
-		.min = 2560000,
-		.step = 16000,
-		.min_sel = 0xA0,
-		.max_sel = 0x4B0,
-	}, {
-		.min = 19200000,
-		.step = 0,
-		.min_sel = 0x4B0,
-		.max_sel = 0x7FF,
-	},
+	LINEAR_RANGE(2560000, 0, 0xA0, 0),
+	LINEAR_RANGE(2560000, 0xA0, 0x4B0, 16000),
+	LINEAR_RANGE(19200000, 0x4B0, 0x7FF, 0),
 };
 
 /* Possible VSYS voltage regulation values */
 static const struct linear_range vsys_voltage_regulation_ranges[] = {
-	{
-		.min = 2560000,
-		.step = 0,
-		.min_sel = 0,
-		.max_sel = 0x28,
-	}, {
-		.min = 2560000,
-		.step = 64000,
-		.min_sel = 0x28,
-		.max_sel = 0x12C,
-	}, {
-		.min = 19200000,
-		.step = 0,
-		.min_sel = 0x12C,
-		.max_sel = 0x1FF,
-	},
+	LINEAR_RANGE(2560000, 0, 0x28, 0),
+	LINEAR_RANGE(2560000, 0x28, 0x12C, 64000),
+	LINEAR_RANGE(19200000, 0x12C, 0x1FF, 0),
 };
 
 /* Possible settings for switching from trickle to pre-charging limits */
 static const struct linear_range trickle_to_pre_threshold_ranges[] = {
-	{
-		.min = 2048000,
-		.step = 0,
-		.min_sel = 0,
-		.max_sel = 0x20,
-	}, {
-		.min = 2048000,
-		.step = 64000,
-		.min_sel = 0x20,
-		.max_sel = 0x12C,
-	}, {
-		.min = 19200000,
-		.step = 0,
-		.min_sel = 0x12C,
-		.max_sel = 0x1FF
-	}
+	LINEAR_RANGE(2048000, 0, 0x20, 0),
+	LINEAR_RANGE(2048000, 0x20, 0x12C, 64000),
+	LINEAR_RANGE(19200000, 0x12C, 0x1FF, 0),
 };
 
 /* Possible current values for fast-charging constant current phase */
 static const struct linear_range fast_charge_current_ranges[] = {
-	{
-		.min = 0,
-		.step = 64000,
-		.min_sel = 0,
-		.max_sel = 0xFF,
-	}
+	LINEAR_RANGE(0, 0, 0xFF, 64000),
 };
 
 struct battery_init {
-- 
GitLab


From eae063f64714c2ab92eda900031c5e34a5597201 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 31 Oct 2022 11:25:00 +0200
Subject: [PATCH 0710/4122] power: supply: mt6360: Use LINEAR_RANGE_IDX()

Do minor clean-up by using the newly inroduced LINEAR_RANGE_IDX()
initialization macro.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/mt6360_charger.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/power/supply/mt6360_charger.c b/drivers/power/supply/mt6360_charger.c
index 3abaa72e06683..92e48e3a48536 100644
--- a/drivers/power/supply/mt6360_charger.c
+++ b/drivers/power/supply/mt6360_charger.c
@@ -113,16 +113,13 @@ enum {
 	MT6360_RANGE_MAX,
 };
 
-#define MT6360_LINEAR_RANGE(idx, _min, _min_sel, _max_sel, _step) \
-	[idx] = REGULATOR_LINEAR_RANGE(_min, _min_sel, _max_sel, _step)
-
 static const struct linear_range mt6360_chg_range[MT6360_RANGE_MAX] = {
-	MT6360_LINEAR_RANGE(MT6360_RANGE_VMIVR, 3900000, 0, 0x5F, 100000),
-	MT6360_LINEAR_RANGE(MT6360_RANGE_ICHG, 100000, 0, 0x31, 100000),
-	MT6360_LINEAR_RANGE(MT6360_RANGE_VOREG, 3900000, 0, 0x51, 10000),
-	MT6360_LINEAR_RANGE(MT6360_RANGE_AICR, 100000, 0, 0x3F, 50000),
-	MT6360_LINEAR_RANGE(MT6360_RANGE_IPREC, 100000, 0, 0x0F, 50000),
-	MT6360_LINEAR_RANGE(MT6360_RANGE_IEOC, 100000, 0, 0x0F, 50000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_VMIVR, 3900000, 0, 0x5F, 100000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_ICHG, 100000, 0, 0x31, 100000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_VOREG, 3900000, 0, 0x51, 10000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_AICR, 100000, 0, 0x3F, 50000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_IPREC, 100000, 0, 0x0F, 50000),
+	LINEAR_RANGE_IDX(MT6360_RANGE_IEOC, 100000, 0, 0x0F, 50000),
 };
 
 struct mt6360_chg_info {
-- 
GitLab


From e9e6fa49dbab6d84c676666f3fe7d360497fd65b Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Fri, 28 Oct 2022 20:33:20 +0800
Subject: [PATCH 0711/4122] apparmor: Fix memleak in alloc_ns()

After changes in commit a1bd627b46d1 ("apparmor: share profile name on
replacement"), the hname member of struct aa_policy is not valid slab
object, but a subset of that, it can not be freed by kfree_sensitive(),
use aa_policy_destroy() to fix it.

Fixes: a1bd627b46d1 ("apparmor: share profile name on replacement")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_ns.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/apparmor/policy_ns.c b/security/apparmor/policy_ns.c
index 5c38563a6dcfd..fd5b7afbcb487 100644
--- a/security/apparmor/policy_ns.c
+++ b/security/apparmor/policy_ns.c
@@ -132,7 +132,7 @@ static struct aa_ns *alloc_ns(const char *prefix, const char *name)
 	return ns;
 
 fail_unconfined:
-	kfree_sensitive(ns->base.hname);
+	aa_policy_destroy(&ns->base);
 fail_ns:
 	kfree_sensitive(ns);
 	return NULL;
-- 
GitLab


From 969864efae78eb51b0baa1d14b2dfe08151b5874 Mon Sep 17 00:00:00 2001
From: Raju Rangoju <Raju.Rangoju@amd.com>
Date: Tue, 25 Oct 2022 23:41:24 +0530
Subject: [PATCH 0712/4122] i2c: amd-mp2: use msix/msi if the hardware supports

Use msix or msi interrupts if the hardware supports it. Else, fallback to
legacy interrupts.

Co-developed-by: Basavaraj Natikar <basavaraj.natikar@amd.com>
Signed-off-by: Basavaraj Natikar <basavaraj.natikar@amd.com>
Signed-off-by: Raju Rangoju <Raju.Rangoju@amd.com>
Acked-by: Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-amd-mp2-pci.c | 30 +++++++++++++++++++---------
 drivers/i2c/busses/i2c-amd-mp2.h     |  1 +
 2 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/i2c/busses/i2c-amd-mp2-pci.c b/drivers/i2c/busses/i2c-amd-mp2-pci.c
index f57077a7448d1..1431653009496 100644
--- a/drivers/i2c/busses/i2c-amd-mp2-pci.c
+++ b/drivers/i2c/busses/i2c-amd-mp2-pci.c
@@ -288,7 +288,7 @@ static void amd_mp2_clear_reg(struct amd_mp2_dev *privdata)
 static int amd_mp2_pci_init(struct amd_mp2_dev *privdata,
 			    struct pci_dev *pci_dev)
 {
-	int rc;
+	int irq_flag = 0, rc;
 
 	pci_set_drvdata(pci_dev, privdata);
 
@@ -311,17 +311,29 @@ static int amd_mp2_pci_init(struct amd_mp2_dev *privdata,
 	if (rc)
 		goto err_dma_mask;
 
-	/* Set up intx irq */
+	/* request and enable interrupt */
 	writel(0, privdata->mmio + AMD_P2C_MSG_INTEN);
-	pci_intx(pci_dev, 1);
-	rc = devm_request_irq(&pci_dev->dev, pci_dev->irq, amd_mp2_irq_isr,
-			      IRQF_SHARED, dev_name(&pci_dev->dev), privdata);
-	if (rc)
-		pci_err(pci_dev, "Failure requesting irq %i: %d\n",
-			pci_dev->irq, rc);
+	rc = pci_alloc_irq_vectors(pci_dev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (rc < 0) {
+		dev_err(&pci_dev->dev, "Failed to allocate single IRQ err=%d\n", rc);
+		goto err_dma_mask;
+	}
+
+	privdata->dev_irq = pci_irq_vector(pci_dev, 0);
+	if (!pci_dev->msix_enabled && !pci_dev->msi_enabled)
+		irq_flag = IRQF_SHARED;
+
+	rc = devm_request_irq(&pci_dev->dev, privdata->dev_irq,
+			      amd_mp2_irq_isr, irq_flag, dev_name(&pci_dev->dev), privdata);
+	if (rc) {
+		pci_err(pci_dev, "Failure requesting irq %i: %d\n", privdata->dev_irq, rc);
+		goto free_irq_vectors;
+	}
 
 	return rc;
 
+free_irq_vectors:
+	free_irq(privdata->dev_irq, privdata);
 err_dma_mask:
 	pci_clear_master(pci_dev);
 err_pci_enable:
@@ -364,7 +376,7 @@ static void amd_mp2_pci_remove(struct pci_dev *pci_dev)
 	pm_runtime_forbid(&pci_dev->dev);
 	pm_runtime_get_noresume(&pci_dev->dev);
 
-	pci_intx(pci_dev, 0);
+	free_irq(privdata->dev_irq, privdata);
 	pci_clear_master(pci_dev);
 
 	amd_mp2_clear_reg(privdata);
diff --git a/drivers/i2c/busses/i2c-amd-mp2.h b/drivers/i2c/busses/i2c-amd-mp2.h
index ddecd0c886560..018a42de8b1e3 100644
--- a/drivers/i2c/busses/i2c-amd-mp2.h
+++ b/drivers/i2c/busses/i2c-amd-mp2.h
@@ -183,6 +183,7 @@ struct amd_mp2_dev {
 	struct mutex c2p_lock;
 	u8 c2p_lock_busid;
 	unsigned int probed;
+	int dev_irq;
 };
 
 /* PCIe communication driver */
-- 
GitLab


From bb2617f0f2abbd8c622b2401e5e4984a4eef895b Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Sun, 9 Oct 2022 12:16:30 +0200
Subject: [PATCH 0713/4122] dt-bindings: i2c: update bindings for mt7986 soc

Add i2c compatible for MT7986 SOC.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml
index 4e730fb7be567..421563bf576cd 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-mt65xx.yaml
@@ -23,6 +23,7 @@ properties:
       - const: mediatek,mt6577-i2c
       - const: mediatek,mt6589-i2c
       - const: mediatek,mt7622-i2c
+      - const: mediatek,mt7986-i2c
       - const: mediatek,mt8168-i2c
       - const: mediatek,mt8173-i2c
       - const: mediatek,mt8183-i2c
-- 
GitLab


From e0b7afc0eba88e8270860a573fe4ea28fe2d467c Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Sun, 9 Oct 2022 12:16:31 +0200
Subject: [PATCH 0714/4122] i2c: mediatek: add mt7986 support

Add i2c support for MT7986 SoC.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-mt65xx.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/i2c/busses/i2c-mt65xx.c b/drivers/i2c/busses/i2c-mt65xx.c
index fc7bfd98156ba..d80e59340d97a 100644
--- a/drivers/i2c/busses/i2c-mt65xx.c
+++ b/drivers/i2c/busses/i2c-mt65xx.c
@@ -431,6 +431,19 @@ static const struct mtk_i2c_compatible mt8168_compat = {
 	.max_dma_support = 33,
 };
 
+static const struct mtk_i2c_compatible mt7986_compat = {
+	.quirks = &mt7622_i2c_quirks,
+	.regs = mt_i2c_regs_v1,
+	.pmic_i2c = 0,
+	.dcm = 1,
+	.auto_restart = 1,
+	.aux_len_reg = 1,
+	.timing_adjust = 0,
+	.dma_sync = 1,
+	.ltiming_adjust = 0,
+	.max_dma_support = 32,
+};
+
 static const struct mtk_i2c_compatible mt8173_compat = {
 	.regs = mt_i2c_regs_v1,
 	.pmic_i2c = 0,
@@ -503,6 +516,7 @@ static const struct of_device_id mtk_i2c_of_match[] = {
 	{ .compatible = "mediatek,mt6577-i2c", .data = &mt6577_compat },
 	{ .compatible = "mediatek,mt6589-i2c", .data = &mt6589_compat },
 	{ .compatible = "mediatek,mt7622-i2c", .data = &mt7622_compat },
+	{ .compatible = "mediatek,mt7986-i2c", .data = &mt7986_compat },
 	{ .compatible = "mediatek,mt8168-i2c", .data = &mt8168_compat },
 	{ .compatible = "mediatek,mt8173-i2c", .data = &mt8173_compat },
 	{ .compatible = "mediatek,mt8183-i2c", .data = &mt8183_compat },
-- 
GitLab


From a826b6e9e467ed378a2c50c4a03cb863ab681198 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sat, 8 Oct 2022 14:59:23 +0200
Subject: [PATCH 0715/4122] i2c: npcm7xx: Group bank 0/1 registers together for
 readability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The unlabelled registers NPCM_I2CCTL4 to NPCM_I2CSCLHT overlap with the
bank 1 registers below, and they are accessed after selecting bank 0, so
they clearly belong to bank 0.

Move them together with the other bank 0 registers, and move the
unrelated definition of npcm_i2caddr down to keep the banked registers
in one piece.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Reviewed-by: Tali Perry <tali.perry1@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-npcm7xx.c | 31 +++++++++++++++----------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 0c365b57d9572..9a7a2d0bf5765 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -106,7 +106,7 @@ enum i2c_addr {
 #define NPCM_I2CCST3			0x19
 #define I2C_VER				0x1F
 
-/*BANK0 regs*/
+/* BANK 0 regs */
 #define NPCM_I2CADDR3			0x10
 #define NPCM_I2CADDR7			0x11
 #define NPCM_I2CADDR4			0x12
@@ -115,6 +115,20 @@ enum i2c_addr {
 #define NPCM_I2CADDR9			0x15
 #define NPCM_I2CADDR6			0x16
 #define NPCM_I2CADDR10			0x17
+#define NPCM_I2CCTL4			0x1A
+#define NPCM_I2CCTL5			0x1B
+#define NPCM_I2CSCLLT			0x1C /* SCL Low Time */
+#define NPCM_I2CFIF_CTL			0x1D /* FIFO Control */
+#define NPCM_I2CSCLHT			0x1E /* SCL High Time */
+
+/* BANK 1 regs */
+#define NPCM_I2CFIF_CTS			0x10 /* Both FIFOs Control and Status */
+#define NPCM_I2CTXF_CTL			0x12 /* Tx-FIFO Control */
+#define NPCM_I2CT_OUT			0x14 /* Bus T.O. */
+#define NPCM_I2CPEC			0x16 /* PEC Data */
+#define NPCM_I2CTXF_STS			0x1A /* Tx-FIFO Status */
+#define NPCM_I2CRXF_STS			0x1C /* Rx-FIFO Status */
+#define NPCM_I2CRXF_CTL			0x1E /* Rx-FIFO Control */
 
 #if IS_ENABLED(CONFIG_I2C_SLAVE)
 /*
@@ -131,21 +145,6 @@ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = {
 };
 #endif
 
-#define NPCM_I2CCTL4			0x1A
-#define NPCM_I2CCTL5			0x1B
-#define NPCM_I2CSCLLT			0x1C /* SCL Low Time */
-#define NPCM_I2CFIF_CTL			0x1D /* FIFO Control */
-#define NPCM_I2CSCLHT			0x1E /* SCL High Time */
-
-/* BANK 1 regs */
-#define NPCM_I2CFIF_CTS			0x10 /* Both FIFOs Control and Status */
-#define NPCM_I2CTXF_CTL			0x12 /* Tx-FIFO Control */
-#define NPCM_I2CT_OUT			0x14 /* Bus T.O. */
-#define NPCM_I2CPEC			0x16 /* PEC Data */
-#define NPCM_I2CTXF_STS			0x1A /* Tx-FIFO Status */
-#define NPCM_I2CRXF_STS			0x1C /* Rx-FIFO Status */
-#define NPCM_I2CRXF_CTL			0x1E /* Rx-FIFO Control */
-
 /* NPCM_I2CST reg fields */
 #define NPCM_I2CST_XMIT			BIT(0)
 #define NPCM_I2CST_MASTER		BIT(1)
-- 
GitLab


From 3ca8217dc450f7a50f90d2ad97787e38088af8e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Sat, 8 Oct 2022 14:59:24 +0200
Subject: [PATCH 0716/4122] i2c: npcm7xx: Annotate register field definitions
 with longer names
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

To make the code easier to understand, add longer names to the
definitions of register fields. These longer names are based on source
code published by DELL/AESS for WPCM450, but should apply just as well
to NPCM7xx and NPCM8xx.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Reviewed-by: Tali Perry <tali.perry1@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-npcm7xx.c | 56 ++++++++++++++++----------------
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/i2c/busses/i2c-npcm7xx.c b/drivers/i2c/busses/i2c-npcm7xx.c
index 9a7a2d0bf5765..bbc7359e67f74 100644
--- a/drivers/i2c/busses/i2c-npcm7xx.c
+++ b/drivers/i2c/busses/i2c-npcm7xx.c
@@ -146,50 +146,50 @@ static const int npcm_i2caddr[I2C_NUM_OWN_ADDR] = {
 #endif
 
 /* NPCM_I2CST reg fields */
-#define NPCM_I2CST_XMIT			BIT(0)
-#define NPCM_I2CST_MASTER		BIT(1)
-#define NPCM_I2CST_NMATCH		BIT(2)
-#define NPCM_I2CST_STASTR		BIT(3)
-#define NPCM_I2CST_NEGACK		BIT(4)
-#define NPCM_I2CST_BER			BIT(5)
-#define NPCM_I2CST_SDAST		BIT(6)
-#define NPCM_I2CST_SLVSTP		BIT(7)
+#define NPCM_I2CST_XMIT			BIT(0)	/* Transmit mode */
+#define NPCM_I2CST_MASTER		BIT(1)	/* Master mode */
+#define NPCM_I2CST_NMATCH		BIT(2)	/* New match */
+#define NPCM_I2CST_STASTR		BIT(3)	/* Stall after start */
+#define NPCM_I2CST_NEGACK		BIT(4)	/* Negative ACK */
+#define NPCM_I2CST_BER			BIT(5)	/* Bus error */
+#define NPCM_I2CST_SDAST		BIT(6)	/* SDA status */
+#define NPCM_I2CST_SLVSTP		BIT(7)	/* Slave stop */
 
 /* NPCM_I2CCST reg fields */
-#define NPCM_I2CCST_BUSY		BIT(0)
-#define NPCM_I2CCST_BB			BIT(1)
-#define NPCM_I2CCST_MATCH		BIT(2)
-#define NPCM_I2CCST_GCMATCH		BIT(3)
-#define NPCM_I2CCST_TSDA		BIT(4)
-#define NPCM_I2CCST_TGSCL		BIT(5)
-#define NPCM_I2CCST_MATCHAF		BIT(6)
-#define NPCM_I2CCST_ARPMATCH		BIT(7)
+#define NPCM_I2CCST_BUSY		BIT(0)	/* Busy */
+#define NPCM_I2CCST_BB			BIT(1)	/* Bus busy */
+#define NPCM_I2CCST_MATCH		BIT(2)	/* Address match */
+#define NPCM_I2CCST_GCMATCH		BIT(3)	/* Global call match */
+#define NPCM_I2CCST_TSDA		BIT(4)	/* Test SDA line */
+#define NPCM_I2CCST_TGSCL		BIT(5)	/* Toggle SCL line */
+#define NPCM_I2CCST_MATCHAF		BIT(6)	/* Match address field */
+#define NPCM_I2CCST_ARPMATCH		BIT(7)	/* ARP address match */
 
 /* NPCM_I2CCTL1 reg fields */
-#define NPCM_I2CCTL1_START		BIT(0)
-#define NPCM_I2CCTL1_STOP		BIT(1)
-#define NPCM_I2CCTL1_INTEN		BIT(2)
+#define NPCM_I2CCTL1_START		BIT(0)	/* Generate start condition */
+#define NPCM_I2CCTL1_STOP		BIT(1)	/* Generate stop condition */
+#define NPCM_I2CCTL1_INTEN		BIT(2)	/* Interrupt enable */
 #define NPCM_I2CCTL1_EOBINTE		BIT(3)
 #define NPCM_I2CCTL1_ACK		BIT(4)
-#define NPCM_I2CCTL1_GCMEN		BIT(5)
-#define NPCM_I2CCTL1_NMINTE		BIT(6)
-#define NPCM_I2CCTL1_STASTRE		BIT(7)
+#define NPCM_I2CCTL1_GCMEN		BIT(5)	/* Global call match enable */
+#define NPCM_I2CCTL1_NMINTE		BIT(6)	/* New match interrupt enable */
+#define NPCM_I2CCTL1_STASTRE		BIT(7)	/* Stall after start enable */
 
 /* RW1S fields (inside a RW reg): */
 #define NPCM_I2CCTL1_RWS   \
 	(NPCM_I2CCTL1_START | NPCM_I2CCTL1_STOP | NPCM_I2CCTL1_ACK)
 
 /* npcm_i2caddr reg fields */
-#define NPCM_I2CADDR_A			GENMASK(6, 0)
-#define NPCM_I2CADDR_SAEN		BIT(7)
+#define NPCM_I2CADDR_A			GENMASK(6, 0)	/* Address */
+#define NPCM_I2CADDR_SAEN		BIT(7)		/* Slave address enable */
 
 /* NPCM_I2CCTL2 reg fields */
-#define I2CCTL2_ENABLE			BIT(0)
-#define I2CCTL2_SCLFRQ6_0		GENMASK(7, 1)
+#define I2CCTL2_ENABLE			BIT(0)		/* Module enable */
+#define I2CCTL2_SCLFRQ6_0		GENMASK(7, 1)	/* Bits 0:6 of frequency divisor */
 
 /* NPCM_I2CCTL3 reg fields */
-#define I2CCTL3_SCLFRQ8_7		GENMASK(1, 0)
-#define I2CCTL3_ARPMEN			BIT(2)
+#define I2CCTL3_SCLFRQ8_7		GENMASK(1, 0)	/* Bits 7:8 of frequency divisor */
+#define I2CCTL3_ARPMEN			BIT(2)	/* ARP match enable */
 #define I2CCTL3_IDL_START		BIT(3)
 #define I2CCTL3_400K_MODE		BIT(4)
 #define I2CCTL3_BNK_SEL			BIT(5)
-- 
GitLab


From b1f37ef655cf372f96015bf54abdb76a91aff27e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 31 Oct 2022 11:10:56 +0100
Subject: [PATCH 0717/4122] x86: Unconfuse CONFIG_ and X86_FEATURE_ namespaces

Lukas reported someone fat fingered the CONFIG_ symbol; fix er up.

Fixes: 5d8213864ade ("x86/retbleed: Add SKL return thunk")
Reported-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/Y1+fL4qQEIGZEEKB@hirez.programming.kicks-ass.net
---
 arch/x86/include/asm/nospec-branch.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 82580adbca4b2..3ab90f23e7f7b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -285,7 +285,7 @@
  */
 .macro UNTRAIN_RET
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-	defined(CONFIG_X86_FEATURE_CALL_DEPTH)
+	defined(CONFIG_CALL_DEPTH_TRACKING)
 	ANNOTATE_UNRET_END
 	ALTERNATIVE_3 "",						\
 		      CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,		\
@@ -296,7 +296,7 @@
 
 .macro UNTRAIN_RET_FROM_CALL
 #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \
-	defined(CONFIG_X86_FEATURE_CALL_DEPTH)
+	defined(CONFIG_CALL_DEPTH_TRACKING)
 	ANNOTATE_UNRET_END
 	ALTERNATIVE_3 "",						\
 		      CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET,		\
-- 
GitLab


From 5ebddd7c4951c50142bcb239d4c6a82eff15759e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Oct 2022 15:26:51 +0200
Subject: [PATCH 0718/4122] kallsyms: Revert "Take callthunks into account"

This is a full revert of commit:

  f1389181622a ("kallsyms: Take callthunks into account")

The commit assumes a number of things that are not quite right.
Notably it assumes every symbol has PADDING_BYTES in front of it that
are not claimed by another symbol.

This is not true; even when compiled with:

  -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES}

Notably things like .cold subfunctions do not need to adhere to this
change in ABI. It it also not true when build with CFI_CLANG, which
claims these PADDING_BYTES in the __cfi_##name symbol.

Once the prefix bytes are not consistent and or otherwise claimed the
approach this patch takes goes out the window and kallsym resolution
will report invalid symbol names.

Therefore revert this to make room for another approach.

Reported-by: Reported-by: kernel test robot <yujie.liu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210241614.2ae4c1f5-yujie.liu@intel.com
Link: https://lkml.kernel.org/r/20221028194453.330970755@infradead.org
---
 kernel/kallsyms.c | 45 +++++----------------------------------------
 1 file changed, 5 insertions(+), 40 deletions(-)

diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index cc244c02b4cf6..60c20f301a6ba 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -293,12 +293,6 @@ static unsigned long get_symbol_pos(unsigned long addr,
 	return low;
 }
 
-#ifdef CONFIG_FUNCTION_PADDING_BYTES
-#define PADDING_BYTES	CONFIG_FUNCTION_PADDING_BYTES
-#else
-#define PADDING_BYTES	0
-#endif
-
 /*
  * Lookup an address but don't bother to find any names.
  */
@@ -306,25 +300,13 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize,
 				unsigned long *offset)
 {
 	char namebuf[KSYM_NAME_LEN];
-	int ret;
-
-	addr += PADDING_BYTES;
 
 	if (is_ksym_addr(addr)) {
 		get_symbol_pos(addr, symbolsize, offset);
-		ret = 1;
-		goto found;
-	}
-
-	ret = !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf);
-	if (!ret) {
-		ret = !!__bpf_address_lookup(addr, symbolsize,
-					     offset, namebuf);
+		return 1;
 	}
-found:
-	if (ret && offset)
-		*offset -= PADDING_BYTES;
-	return ret;
+	return !!module_address_lookup(addr, symbolsize, offset, NULL, NULL, namebuf) ||
+	       !!__bpf_address_lookup(addr, symbolsize, offset, namebuf);
 }
 
 static const char *kallsyms_lookup_buildid(unsigned long addr,
@@ -337,8 +319,6 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
 	namebuf[KSYM_NAME_LEN - 1] = 0;
 	namebuf[0] = 0;
 
-	addr += PADDING_BYTES;
-
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -368,8 +348,6 @@ static const char *kallsyms_lookup_buildid(unsigned long addr,
 
 found:
 	cleanup_symbol_name(namebuf);
-	if (ret && offset)
-		*offset -= PADDING_BYTES;
 	return ret;
 }
 
@@ -396,8 +374,6 @@ int lookup_symbol_name(unsigned long addr, char *symname)
 	symname[0] = '\0';
 	symname[KSYM_NAME_LEN - 1] = '\0';
 
-	addr += PADDING_BYTES;
-
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -425,8 +401,6 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 	name[0] = '\0';
 	name[KSYM_NAME_LEN - 1] = '\0';
 
-	addr += PADDING_BYTES;
-
 	if (is_ksym_addr(addr)) {
 		unsigned long pos;
 
@@ -443,8 +417,6 @@ int lookup_symbol_attrs(unsigned long addr, unsigned long *size,
 		return res;
 
 found:
-	if (offset)
-		*offset -= PADDING_BYTES;
 	cleanup_symbol_name(name);
 	return 0;
 }
@@ -470,15 +442,8 @@ static int __sprint_symbol(char *buffer, unsigned long address,
 	len = strlen(buffer);
 	offset -= symbol_offset;
 
-	if (add_offset) {
-		char s = '+';
-
-		if ((long)offset < 0) {
-			s = '-';
-			offset = 0UL - offset;
-		}
-		len += sprintf(buffer + len, "%c%#lx/%#lx", s, offset, size);
-	}
+	if (add_offset)
+		len += sprintf(buffer + len, "+%#lx/%#lx", offset, size);
 
 	if (modname) {
 		len += sprintf(buffer + len, " [%s", modname);
-- 
GitLab


From 4c91be8e926c6b3734d59b9348e305431484d42b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Oct 2022 15:49:26 +0200
Subject: [PATCH 0719/4122] objtool: Slice up elf_create_section_symbol()

In order to facilitate creation of more symbol types, slice up
elf_create_section_symbol() to extract a generic helper that deals
with adding ELF symbols.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Link: https://lkml.kernel.org/r/20221028194453.396634875@infradead.org
---
 tools/objtool/elf.c | 56 ++++++++++++++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 21 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 89b37cd4ab1dc..3ad89d963e593 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -717,11 +717,11 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 }
 
 static struct symbol *
-elf_create_section_symbol(struct elf *elf, struct section *sec)
+__elf_create_symbol(struct elf *elf, struct symbol *sym)
 {
 	struct section *symtab, *symtab_shndx;
 	Elf32_Word first_non_local, new_idx;
-	struct symbol *sym, *old;
+	struct symbol *old;
 
 	symtab = find_section_by_name(elf, ".symtab");
 	if (symtab) {
@@ -731,27 +731,16 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
 		return NULL;
 	}
 
-	sym = calloc(1, sizeof(*sym));
-	if (!sym) {
-		perror("malloc");
-		return NULL;
-	}
-
-	sym->name = sec->name;
-	sym->sec = sec;
+	new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
 
-	// st_name 0
-	sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
-	// st_other 0
-	// st_value 0
-	// st_size 0
+	if (GELF_ST_BIND(sym->sym.st_info) != STB_LOCAL)
+		goto non_local;
 
 	/*
 	 * Move the first global symbol, as per sh_info, into a new, higher
 	 * symbol index. This fees up a spot for a new local symbol.
 	 */
 	first_non_local = symtab->sh.sh_info;
-	new_idx = symtab->sh.sh_size / symtab->sh.sh_entsize;
 	old = find_symbol_by_index(elf, first_non_local);
 	if (old) {
 		old->idx = new_idx;
@@ -769,18 +758,43 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
 		new_idx = first_non_local;
 	}
 
+	/*
+	 * Either way, we will add a LOCAL symbol.
+	 */
+	symtab->sh.sh_info += 1;
+
+non_local:
 	sym->idx = new_idx;
 	if (elf_update_symbol(elf, symtab, symtab_shndx, sym)) {
 		WARN("elf_update_symbol");
 		return NULL;
 	}
 
-	/*
-	 * Either way, we added a LOCAL symbol.
-	 */
-	symtab->sh.sh_info += 1;
+	return sym;
+}
+
+static struct symbol *
+elf_create_section_symbol(struct elf *elf, struct section *sec)
+{
+	struct symbol *sym = calloc(1, sizeof(*sym));
+
+	if (!sym) {
+		perror("malloc");
+		return NULL;
+	}
 
-	elf_add_symbol(elf, sym);
+	sym->name = sec->name;
+	sym->sec = sec;
+
+	// st_name 0
+	sym->sym.st_info = GELF_ST_INFO(STB_LOCAL, STT_SECTION);
+	// st_other 0
+	// st_value 0
+	// st_size 0
+
+	sym = __elf_create_symbol(elf, sym);
+	if (sym)
+		elf_add_symbol(elf, sym);
 
 	return sym;
 }
-- 
GitLab


From 13f60e80e15dd0657c90bcca372ba045630ed9de Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Oct 2022 20:29:51 +0200
Subject: [PATCH 0720/4122] objtool: Avoid O(bloody terrible) behaviour -- an
 ode to libelf

Due to how gelf_update_sym*() requires an Elf_Data pointer, and how
libelf keeps Elf_Data in a linked list per section,
elf_update_symbol() ends up having to iterate this list on each
update to find the correct Elf_Data for the index'ed symbol.

By allocating one Elf_Data per new symbol, the list grows per new
symbol, giving an effective O(n^2) insertion time. This is obviously
bloody terrible.

Therefore over-allocate the Elf_Data when an extention is needed.
Except it turns out libelf disregards Elf_Scn::sh_size in favour of
the sum of Elf_Data::d_size. IOW it will happily write out all the
unused space and fill it with:

  0000000000000000     0 NOTYPE  LOCAL  DEFAULT  UND

entries (aka zeros). Which obviously violates the STB_LOCAL placement
rule, and is a general pain in the backside for not being the desired
behaviour.

Manually fix-up the Elf_Data size to avoid this problem before calling
elf_update().

This significantly improves performance when adding a significant
number of symbols.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Link: https://lkml.kernel.org/r/20221028194453.461658986@infradead.org
---
 tools/objtool/elf.c                 | 89 +++++++++++++++++++++++++++--
 tools/objtool/include/objtool/elf.h |  2 +-
 2 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3ad89d963e593..36dc78796f58d 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -634,6 +634,12 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 
 		/* end-of-list */
 		if (!symtab_data) {
+			/*
+			 * Over-allocate to avoid O(n^2) symbol creation
+			 * behaviour.  The down side is that libelf doesn't
+			 * like this; see elf_truncate_section() for the fixup.
+			 */
+			int num = max(1U, sym->idx/3);
 			void *buf;
 
 			if (idx) {
@@ -647,28 +653,34 @@ static int elf_update_symbol(struct elf *elf, struct section *symtab,
 			if (t)
 				shndx_data = elf_newdata(t);
 
-			buf = calloc(1, entsize);
+			buf = calloc(num, entsize);
 			if (!buf) {
 				WARN("malloc");
 				return -1;
 			}
 
 			symtab_data->d_buf = buf;
-			symtab_data->d_size = entsize;
+			symtab_data->d_size = num * entsize;
 			symtab_data->d_align = 1;
 			symtab_data->d_type = ELF_T_SYM;
 
-			symtab->sh.sh_size += entsize;
 			symtab->changed = true;
+			symtab->truncate = true;
 
 			if (t) {
-				shndx_data->d_buf = &sym->sec->idx;
-				shndx_data->d_size = sizeof(Elf32_Word);
+				buf = calloc(num, sizeof(Elf32_Word));
+				if (!buf) {
+					WARN("malloc");
+					return -1;
+				}
+
+				shndx_data->d_buf = buf;
+				shndx_data->d_size = num * sizeof(Elf32_Word);
 				shndx_data->d_align = sizeof(Elf32_Word);
 				shndx_data->d_type = ELF_T_WORD;
 
-				symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
 				symtab_shndx->changed = true;
+				symtab_shndx->truncate = true;
 			}
 
 			break;
@@ -770,6 +782,14 @@ non_local:
 		return NULL;
 	}
 
+	symtab->sh.sh_size += symtab->sh.sh_entsize;
+	symtab->changed = true;
+
+	if (symtab_shndx) {
+		symtab_shndx->sh.sh_size += sizeof(Elf32_Word);
+		symtab_shndx->changed = true;
+	}
+
 	return sym;
 }
 
@@ -1286,6 +1306,60 @@ int elf_write_reloc(struct elf *elf, struct reloc *reloc)
 	return 0;
 }
 
+/*
+ * When Elf_Scn::sh_size is smaller than the combined Elf_Data::d_size
+ * do you:
+ *
+ *   A) adhere to the section header and truncate the data, or
+ *   B) ignore the section header and write out all the data you've got?
+ *
+ * Yes, libelf sucks and we need to manually truncate if we over-allocate data.
+ */
+static int elf_truncate_section(struct elf *elf, struct section *sec)
+{
+	u64 size = sec->sh.sh_size;
+	bool truncated = false;
+	Elf_Data *data = NULL;
+	Elf_Scn *s;
+
+	s = elf_getscn(elf->elf, sec->idx);
+	if (!s) {
+		WARN_ELF("elf_getscn");
+		return -1;
+	}
+
+	for (;;) {
+		/* get next data descriptor for the relevant section */
+		data = elf_getdata(s, data);
+
+		if (!data) {
+			if (size) {
+				WARN("end of section data but non-zero size left\n");
+				return -1;
+			}
+			return 0;
+		}
+
+		if (truncated) {
+			/* when we remove symbols */
+			WARN("truncated; but more data\n");
+			return -1;
+		}
+
+		if (!data->d_size) {
+			WARN("zero size data");
+			return -1;
+		}
+
+		if (data->d_size > size) {
+			truncated = true;
+			data->d_size = size;
+		}
+
+		size -= data->d_size;
+	}
+}
+
 int elf_write(struct elf *elf)
 {
 	struct section *sec;
@@ -1296,6 +1370,9 @@ int elf_write(struct elf *elf)
 
 	/* Update changed relocation sections and section headers: */
 	list_for_each_entry(sec, &elf->sections, list) {
+		if (sec->truncate)
+			elf_truncate_section(elf, sec);
+
 		if (sec->changed) {
 			s = elf_getscn(elf->elf, sec->idx);
 			if (!s) {
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index d28533106b780..9e96a613c50f7 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -38,7 +38,7 @@ struct section {
 	Elf_Data *data;
 	char *name;
 	int idx;
-	bool changed, text, rodata, noinstr, init;
+	bool changed, text, rodata, noinstr, init, truncate;
 };
 
 struct symbol {
-- 
GitLab


From 9f2899fe36a623885d8576604cb582328ad32b3c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Oct 2022 15:50:42 +0200
Subject: [PATCH 0721/4122] objtool: Add option to generate prefix symbols

When code is compiled with:

  -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES}

functions will have PADDING_BYTES of NOP in front of them. Unwinders
and other things that symbolize code locations will typically
attribute these bytes to the preceding function.

Given that these bytes nominally belong to the following symbol this
mis-attribution is confusing.

Inspired by the fact that CFI_CLANG emits __cfi_##name symbols to
claim these bytes, allow objtool to emit __pfx_##name symbols to do
the same.

Therefore add the objtool --prefix=N argument, to conditionally place
a __pfx_##name symbol at N bytes ahead of symbol 'name' when: all
these preceding bytes are NOP and name-N is an instruction boundary.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Link: https://lkml.kernel.org/r/20221028194453.526899822@infradead.org
---
 tools/objtool/builtin-check.c           |  1 +
 tools/objtool/check.c                   | 33 ++++++++++++++++++++++++-
 tools/objtool/elf.c                     | 31 +++++++++++++++++++++++
 tools/objtool/include/objtool/builtin.h |  1 +
 tools/objtool/include/objtool/elf.h     |  2 ++
 5 files changed, 67 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 0a04f8ea4432d..95fcecee60ce5 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -75,6 +75,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
 	OPT_BOOLEAN(0,   "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
 	OPT_BOOLEAN(0,   "unret", &opts.unret, "validate entry unret placement"),
+	OPT_INTEGER(0,   "prefix", &opts.prefix, "generate prefix symbols"),
 	OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
 	OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
 	OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 7936312e10c79..27f35f5f831ab 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -3417,7 +3417,8 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
 		if (func && insn_func(insn) && func != insn_func(insn)->pfunc) {
 			/* Ignore KCFI type preambles, which always fall through */
-			if (!strncmp(func->name, "__cfi_", 6))
+			if (!strncmp(func->name, "__cfi_", 6) ||
+			    !strncmp(func->name, "__pfx_", 6))
 				return 0;
 
 			WARN("%s() falls through to next function %s()",
@@ -3972,6 +3973,34 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
 	return false;
 }
 
+static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
+			     struct instruction *insn)
+{
+	if (!opts.prefix)
+		return 0;
+
+	for (;;) {
+		struct instruction *prev = list_prev_entry(insn, list);
+		u64 offset;
+
+		if (&prev->list == &file->insn_list)
+			break;
+
+		if (prev->type != INSN_NOP)
+			break;
+
+		offset = func->offset - prev->offset;
+		if (offset >= opts.prefix) {
+			if (offset == opts.prefix)
+				elf_create_prefix_symbol(file->elf, func, opts.prefix);
+			break;
+		}
+		insn = prev;
+	}
+
+	return 0;
+}
+
 static int validate_symbol(struct objtool_file *file, struct section *sec,
 			   struct symbol *sym, struct insn_state *state)
 {
@@ -3990,6 +4019,8 @@ static int validate_symbol(struct objtool_file *file, struct section *sec,
 	if (!insn || insn->ignore || insn->visited)
 		return 0;
 
+	add_prefix_symbol(file, sym, insn);
+
 	state->uaccess = sym->uaccess_safe;
 
 	ret = validate_branch(file, insn_func(insn), insn, *state);
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 36dc78796f58d..3d636d12d679f 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -819,6 +819,37 @@ elf_create_section_symbol(struct elf *elf, struct section *sec)
 	return sym;
 }
 
+static int elf_add_string(struct elf *elf, struct section *strtab, char *str);
+
+struct symbol *
+elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size)
+{
+	struct symbol *sym = calloc(1, sizeof(*sym));
+	size_t namelen = strlen(orig->name) + sizeof("__pfx_");
+	char *name = malloc(namelen);
+
+	if (!sym || !name) {
+		perror("malloc");
+		return NULL;
+	}
+
+	snprintf(name, namelen, "__pfx_%s", orig->name);
+
+	sym->name = name;
+	sym->sec = orig->sec;
+
+	sym->sym.st_name = elf_add_string(elf, NULL, name);
+	sym->sym.st_info = orig->sym.st_info;
+	sym->sym.st_value = orig->sym.st_value - size;
+	sym->sym.st_size = size;
+
+	sym = __elf_create_symbol(elf, sym);
+	if (sym)
+		elf_add_symbol(elf, sym);
+
+	return sym;
+}
+
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
 			  unsigned long offset, unsigned int type,
 			  struct section *insn_sec, unsigned long insn_off)
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 22092a9f3cf6c..f341b620dead4 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -26,6 +26,7 @@ struct opts {
 	bool stackval;
 	bool static_call;
 	bool uaccess;
+	int prefix;
 
 	/* options: */
 	bool backtrace;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 9e96a613c50f7..b6974e3173aa5 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -146,6 +146,8 @@ static inline bool has_multiple_files(struct elf *elf)
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 
+struct symbol *elf_create_prefix_symbol(struct elf *elf, struct symbol *orig, long size);
+
 int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
 		  unsigned int type, struct symbol *sym, s64 addend);
 int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
-- 
GitLab


From b341b20d648bb7e9a3307c33163e7399f0913e66 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 28 Oct 2022 21:08:19 +0200
Subject: [PATCH 0722/4122] x86: Add prefix symbols for function padding

When code is compiled with:

  -fpatchable-function-entry=${PADDING_BYTES},${PADDING_BYTES}

functions will have PADDING_BYTES of NOP in front of them. Unwinders
and other things that symbolize code locations will typically
attribute these bytes to the preceding function.

Given that these bytes nominally belong to the following symbol this
mis-attribution is confusing.

Inspired by the fact that CFI_CLANG emits __cfi_##name symbols to
claim these bytes, use objtool to emit __pfx_##name symbols to do
the same when CFI_CLANG is not used.

This then shows the callthunk for symbol 'name' as:

  __pfx_##name+0x6/0x10

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Link: https://lkml.kernel.org/r/20221028194453.592512209@infradead.org
---
 arch/x86/Kconfig     | 4 ++++
 scripts/Makefile.lib | 1 +
 2 files changed, 5 insertions(+)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b52ad13f0f449..32818aa1dca49 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2471,6 +2471,10 @@ config CALL_THUNKS
 	def_bool n
 	select FUNCTION_ALIGNMENT_16B
 
+config PREFIX_SYMBOLS
+	def_bool y
+	depends on CALL_THUNKS && !CFI_CLANG
+
 menuconfig SPECULATION_MITIGATIONS
 	bool "Mitigations for speculative execution vulnerabilities"
 	default y
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 85f02756dc9c1..2e03bcbf2b9b7 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -265,6 +265,7 @@ objtool-args-$(CONFIG_STACK_VALIDATION)			+= --stackval
 objtool-args-$(CONFIG_HAVE_STATIC_CALL_INLINE)		+= --static-call
 objtool-args-$(CONFIG_HAVE_UACCESS_VALIDATION)		+= --uaccess
 objtool-args-$(CONFIG_GCOV_KERNEL)			+= --no-unreachable
+objtool-args-$(CONFIG_PREFIX_SYMBOLS)			+= --prefix=$(CONFIG_FUNCTION_PADDING_BYTES)
 
 objtool-args = $(objtool-args-y)					\
 	$(if $(delay-objtool), --link)					\
-- 
GitLab


From 9a479f766be1dd777e12e3e57b6ee4c3028a40a5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 11:28:13 +0200
Subject: [PATCH 0723/4122] objtool: Add --cfi to generate the .cfi_sites
 section

Add the location of all __cfi_##name symbols (as generated by kCFI) to
a section such that we might re-write things at kernel boot.

Notably; boot time re-hashing and FineIBT are the intended use of
this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221027092842.568039454@infradead.org
---
 tools/objtool/builtin-check.c           |  1 +
 tools/objtool/check.c                   | 69 +++++++++++++++++++++++++
 tools/objtool/include/objtool/builtin.h |  1 +
 3 files changed, 71 insertions(+)

diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 95fcecee60ce5..868e3e363786f 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -80,6 +80,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
 	OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
 	OPT_BOOLEAN('u', "uaccess", &opts.uaccess, "validate uaccess rules for SMAP"),
+	OPT_BOOLEAN(0  , "cfi", &opts.cfi, "annotate kernel control flow integrity (kCFI) function preambles"),
 	OPT_CALLBACK_OPTARG(0, "dump", NULL, NULL, "orc", "dump metadata", parse_dump),
 
 	OPT_GROUP("Options:"),
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 27f35f5f831ab..55066c4935702 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -861,6 +861,68 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
 	return 0;
 }
 
+static int create_cfi_sections(struct objtool_file *file)
+{
+	struct section *sec, *s;
+	struct symbol *sym;
+	unsigned int *loc;
+	int idx;
+
+	sec = find_section_by_name(file->elf, ".cfi_sites");
+	if (sec) {
+		INIT_LIST_HEAD(&file->call_list);
+		WARN("file already has .cfi_sites section, skipping");
+		return 0;
+	}
+
+	idx = 0;
+	for_each_sec(file, s) {
+		if (!s->text)
+			continue;
+
+		list_for_each_entry(sym, &s->symbol_list, list) {
+			if (sym->type != STT_FUNC)
+				continue;
+
+			if (strncmp(sym->name, "__cfi_", 6))
+				continue;
+
+			idx++;
+		}
+	}
+
+	sec = elf_create_section(file->elf, ".cfi_sites", 0, sizeof(unsigned int), idx);
+	if (!sec)
+		return -1;
+
+	idx = 0;
+	for_each_sec(file, s) {
+		if (!s->text)
+			continue;
+
+		list_for_each_entry(sym, &s->symbol_list, list) {
+			if (sym->type != STT_FUNC)
+				continue;
+
+			if (strncmp(sym->name, "__cfi_", 6))
+				continue;
+
+			loc = (unsigned int *)sec->data->d_buf + idx;
+			memset(loc, 0, sizeof(unsigned int));
+
+			if (elf_add_reloc_to_insn(file->elf, sec,
+						  idx * sizeof(unsigned int),
+						  R_X86_64_PC32,
+						  s, sym->offset))
+				return -1;
+
+			idx++;
+		}
+	}
+
+	return 0;
+}
+
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
 	struct section *sec;
@@ -4430,6 +4492,13 @@ int check(struct objtool_file *file)
 		warnings += ret;
 	}
 
+	if (opts.cfi) {
+		ret = create_cfi_sections(file);
+		if (ret < 0)
+			goto out;
+		warnings += ret;
+	}
+
 	if (opts.rethunk) {
 		ret = create_return_sites_sections(file);
 		if (ret < 0)
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index f341b620dead4..c44ff39df80c6 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -27,6 +27,7 @@ struct opts {
 	bool static_call;
 	bool uaccess;
 	int prefix;
+	bool cfi;
 
 	/* options: */
 	bool backtrace;
-- 
GitLab


From 931ab63664f02b17d2213ef36b83e1e50190a0aa Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 11:28:14 +0200
Subject: [PATCH 0724/4122] x86/ibt: Implement FineIBT

Implement an alternative CFI scheme that merges both the fine-grained
nature of kCFI but also takes full advantage of the coarse grained
hardware CFI as provided by IBT.

To contrast:

  kCFI is a pure software CFI scheme and relies on being able to read
text -- specifically the instruction *before* the target symbol, and
does the hash validation *before* doing the call (otherwise control
flow is compromised already).

  FineIBT is a software and hardware hybrid scheme; by ensuring every
branch target starts with a hash validation it is possible to place
the hash validation after the branch. This has several advantages:

   o the (hash) load is avoided; no memop; no RX requirement.

   o IBT WAIT-FOR-ENDBR state is a speculation stop; by placing
     the hash validation in the immediate instruction after
     the branch target there is a minimal speculation window
     and the whole is a viable defence against SpectreBHB.

   o Kees feels obliged to mention it is slightly more vulnerable
     when the attacker can write code.

Obviously this patch relies on kCFI, but additionally it also relies
on the padding from the call-depth-tracking patches. It uses this
padding to place the hash-validation while the call-sites are
re-written to modify the indirect target to be 16 bytes in front of
the original target, thus hitting this new preamble.

Notably, there is no hardware that needs call-depth-tracking (Skylake)
and supports IBT (Tigerlake and onwards).

Suggested-by: Joao Moreira (Intel) <joao@overdrivepizza.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221027092842.634714496@infradead.org
---
 arch/um/kernel/um_arch.c           |   5 +
 arch/x86/Kconfig                   |  14 +-
 arch/x86/Makefile                  |   2 +-
 arch/x86/include/asm/alternative.h |   2 +
 arch/x86/include/asm/linkage.h     |   6 +-
 arch/x86/kernel/alternative.c      | 253 +++++++++++++++++++++++++++--
 arch/x86/kernel/cpu/common.c       |   1 +
 arch/x86/kernel/module.c           |  20 ++-
 arch/x86/kernel/vmlinux.lds.S      |   9 +
 include/linux/bpf.h                |   2 +-
 scripts/Makefile.lib               |   1 +
 11 files changed, 294 insertions(+), 21 deletions(-)

diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
index 8adf8e89b2558..786b44dc20c98 100644
--- a/arch/um/kernel/um_arch.c
+++ b/arch/um/kernel/um_arch.c
@@ -444,6 +444,11 @@ void apply_returns(s32 *start, s32 *end)
 {
 }
 
+void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+		   s32 *start_cfi, s32 *end_cfi)
+{
+}
+
 void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
 {
 }
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 32818aa1dca49..479ee63898f57 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2463,17 +2463,27 @@ config FUNCTION_PADDING_BYTES
 	default FUNCTION_PADDING_CFI if CFI_CLANG
 	default FUNCTION_ALIGNMENT
 
+config CALL_PADDING
+	def_bool n
+	depends on CC_HAS_ENTRY_PADDING && OBJTOOL
+	select FUNCTION_ALIGNMENT_16B
+
+config FINEIBT
+	def_bool y
+	depends on X86_KERNEL_IBT && CFI_CLANG && RETPOLINE
+	select CALL_PADDING
+
 config HAVE_CALL_THUNKS
 	def_bool y
 	depends on CC_HAS_ENTRY_PADDING && RETHUNK && OBJTOOL
 
 config CALL_THUNKS
 	def_bool n
-	select FUNCTION_ALIGNMENT_16B
+	select CALL_PADDING
 
 config PREFIX_SYMBOLS
 	def_bool y
-	depends on CALL_THUNKS && !CFI_CLANG
+	depends on CALL_PADDING && !CFI_CLANG
 
 menuconfig SPECULATION_MITIGATIONS
 	bool "Mitigations for speculative execution vulnerabilities"
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 1640e005092b9..a3a07df8a6095 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -208,7 +208,7 @@ ifdef CONFIG_SLS
   KBUILD_CFLAGS += -mharden-sls=all
 endif
 
-ifdef CONFIG_CALL_THUNKS
+ifdef CONFIG_CALL_PADDING
 PADDING_CFLAGS := -fpatchable-function-entry=$(CONFIG_FUNCTION_PADDING_BYTES),$(CONFIG_FUNCTION_PADDING_BYTES)
 KBUILD_CFLAGS += $(PADDING_CFLAGS)
 export PADDING_CFLAGS
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 664c0779375c1..7659217f4d492 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -78,6 +78,8 @@ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
 extern void apply_retpolines(s32 *start, s32 *end);
 extern void apply_returns(s32 *start, s32 *end);
 extern void apply_ibt_endbr(s32 *start, s32 *end);
+extern void apply_fineibt(s32 *start_retpoline, s32 *end_retpoine,
+			  s32 *start_cfi, s32 *end_cfi);
 
 struct module;
 struct paravirt_patch_site;
diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h
index 45e0df850645c..dd9b8118f7848 100644
--- a/arch/x86/include/asm/linkage.h
+++ b/arch/x86/include/asm/linkage.h
@@ -15,7 +15,7 @@
 #define __ALIGN		.balign CONFIG_FUNCTION_ALIGNMENT, 0x90;
 #define __ALIGN_STR	__stringify(__ALIGN)
 
-#if defined(CONFIG_CALL_THUNKS) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#if defined(CONFIG_CALL_PADDING) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
 #define FUNCTION_PADDING	.skip CONFIG_FUNCTION_ALIGNMENT, 0x90;
 #else
 #define FUNCTION_PADDING
@@ -57,7 +57,7 @@
 #endif /* __ASSEMBLY__ */
 
 /*
- * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_THUNKS) the
+ * Depending on -fpatchable-function-entry=N,N usage (CONFIG_CALL_PADDING) the
  * CFI symbol layout changes.
  *
  * Without CALL_THUNKS:
@@ -81,7 +81,7 @@
  * In both cases the whole thing is FUNCTION_ALIGNMENT aligned and sized.
  */
 
-#ifdef CONFIG_CALL_THUNKS
+#ifdef CONFIG_CALL_PADDING
 #define CFI_PRE_PADDING
 #define CFI_POST_PADDING	.skip	CONFIG_FUNCTION_PADDING_BYTES, 0x90;
 #else
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index b4ac4e58c0106..91b0e63a6238a 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -116,6 +116,7 @@ static void __init_or_module add_nops(void *insns, unsigned int len)
 
 extern s32 __retpoline_sites[], __retpoline_sites_end[];
 extern s32 __return_sites[], __return_sites_end[];
+extern s32 __cfi_sites[], __cfi_sites_end[];
 extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
 extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
 extern s32 __smp_locks[], __smp_locks_end[];
@@ -656,6 +657,28 @@ void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
 
 #ifdef CONFIG_X86_KERNEL_IBT
 
+static void poison_endbr(void *addr, bool warn)
+{
+	u32 endbr, poison = gen_endbr_poison();
+
+	if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
+		return;
+
+	if (!is_endbr(endbr)) {
+		WARN_ON_ONCE(warn);
+		return;
+	}
+
+	DPRINTK("ENDBR at: %pS (%px)", addr, addr);
+
+	/*
+	 * When we have IBT, the lack of ENDBR will trigger #CP
+	 */
+	DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
+	DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
+	text_poke_early(addr, &poison, 4);
+}
+
 /*
  * Generated by: objtool --ibt
  */
@@ -664,31 +687,232 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end)
 	s32 *s;
 
 	for (s = start; s < end; s++) {
-		u32 endbr, poison = gen_endbr_poison();
 		void *addr = (void *)s + *s;
 
-		if (WARN_ON_ONCE(get_kernel_nofault(endbr, addr)))
-			continue;
+		poison_endbr(addr, true);
+		if (IS_ENABLED(CONFIG_FINEIBT))
+			poison_endbr(addr - 16, false);
+	}
+}
+
+#else
+
+void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
+
+#endif /* CONFIG_X86_KERNEL_IBT */
+
+#ifdef CONFIG_FINEIBT
+/*
+ * kCFI						FineIBT
+ *
+ * __cfi_\func:					__cfi_\func:
+ *	movl   $0x12345678,%eax		// 5	     endbr64			// 4
+ *	nop					     subl   $0x12345678,%r10d   // 7
+ *	nop					     jz     1f			// 2
+ *	nop					     ud2			// 2
+ *	nop					1:   nop			// 1
+ *	nop
+ *	nop
+ *	nop
+ *	nop
+ *	nop
+ *	nop
+ *	nop
+ *
+ *
+ * caller:					caller:
+ *	movl	$(-0x12345678),%r10d	 // 6	     movl   $0x12345678,%r10d	// 6
+ *	addl	$-15(%r11),%r10d	 // 4	     sub    $16,%r11		// 4
+ *	je	1f			 // 2	     nop4			// 4
+ *	ud2				 // 2
+ * 1:	call	__x86_indirect_thunk_r11 // 5	     call   *%r11; nop2;	// 5
+ *
+ */
+
+asm(	".pushsection .rodata			\n"
+	"fineibt_preamble_start:		\n"
+	"	endbr64				\n"
+	"	subl	$0x12345678, %r10d	\n"
+	"	je	fineibt_preamble_end	\n"
+	"	ud2				\n"
+	"	nop				\n"
+	"fineibt_preamble_end:			\n"
+	".popsection\n"
+);
+
+extern u8 fineibt_preamble_start[];
+extern u8 fineibt_preamble_end[];
+
+#define fineibt_preamble_size (fineibt_preamble_end - fineibt_preamble_start)
+#define fineibt_preamble_hash 7
+
+asm(	".pushsection .rodata			\n"
+	"fineibt_caller_start:			\n"
+	"	movl	$0x12345678, %r10d	\n"
+	"	sub	$16, %r11		\n"
+	ASM_NOP4
+	"fineibt_caller_end:			\n"
+	".popsection				\n"
+);
+
+extern u8 fineibt_caller_start[];
+extern u8 fineibt_caller_end[];
+
+#define fineibt_caller_size (fineibt_caller_end - fineibt_caller_start)
+#define fineibt_caller_hash 2
+
+#define fineibt_caller_jmp (fineibt_caller_size - 2)
+
+static u32 decode_preamble_hash(void *addr)
+{
+	u8 *p = addr;
+
+	/* b8 78 56 34 12          mov    $0x12345678,%eax */
+	if (p[0] == 0xb8)
+		return *(u32 *)(addr + 1);
+
+	return 0; /* invalid hash value */
+}
+
+static u32 decode_caller_hash(void *addr)
+{
+	u8 *p = addr;
+
+	/* 41 ba 78 56 34 12       mov    $0x12345678,%r10d */
+	if (p[0] == 0x41 && p[1] == 0xba)
+		return -*(u32 *)(addr + 2);
+
+	/* e8 0c 78 56 34 12	   jmp.d8  +12 */
+	if (p[0] == JMP8_INSN_OPCODE && p[1] == fineibt_caller_jmp)
+		return -*(u32 *)(addr + 2);
+
+	return 0; /* invalid hash value */
+}
+
+/* .retpoline_sites */
+static int cfi_disable_callers(s32 *start, s32 *end)
+{
+	/*
+	 * Disable kCFI by patching in a JMP.d8, this leaves the hash immediate
+	 * in tact for later usage. Also see decode_caller_hash() and
+	 * cfi_rewrite_callers().
+	 */
+	const u8 jmp[] = { JMP8_INSN_OPCODE, fineibt_caller_jmp };
+	s32 *s;
 
-		if (WARN_ON_ONCE(!is_endbr(endbr)))
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		addr -= fineibt_caller_size;
+		hash = decode_caller_hash(addr);
+		if (!hash) /* nocfi callers */
 			continue;
 
-		DPRINTK("ENDBR at: %pS (%px)", addr, addr);
+		text_poke_early(addr, jmp, 2);
+	}
 
-		/*
-		 * When we have IBT, the lack of ENDBR will trigger #CP
-		 */
-		DUMP_BYTES(((u8*)addr), 4, "%px: orig: ", addr);
-		DUMP_BYTES(((u8*)&poison), 4, "%px: repl: ", addr);
-		text_poke_early(addr, &poison, 4);
+	return 0;
+}
+
+/* .cfi_sites */
+static int cfi_rewrite_preamble(s32 *start, s32 *end)
+{
+	s32 *s;
+
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		hash = decode_preamble_hash(addr);
+		if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
+			 addr, addr, 5, addr))
+			return -EINVAL;
+
+		text_poke_early(addr, fineibt_preamble_start, fineibt_preamble_size);
+		WARN_ON(*(u32 *)(addr + fineibt_preamble_hash) != 0x12345678);
+		text_poke_early(addr + fineibt_preamble_hash, &hash, 4);
 	}
+
+	return 0;
+}
+
+/* .retpoline_sites */
+static int cfi_rewrite_callers(s32 *start, s32 *end)
+{
+	s32 *s;
+
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		addr -= fineibt_caller_size;
+		hash = decode_caller_hash(addr);
+		if (hash) {
+			text_poke_early(addr, fineibt_caller_start, fineibt_caller_size);
+			WARN_ON(*(u32 *)(addr + fineibt_caller_hash) != 0x12345678);
+			text_poke_early(addr + fineibt_caller_hash, &hash, 4);
+		}
+		/* rely on apply_retpolines() */
+	}
+
+	return 0;
+}
+
+static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+			    s32 *start_cfi, s32 *end_cfi, bool builtin)
+{
+	int ret;
+
+	if (WARN_ONCE(fineibt_preamble_size != 16,
+		      "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
+		return;
+
+	if (!HAS_KERNEL_IBT || !cpu_feature_enabled(X86_FEATURE_IBT))
+		return;
+
+	/*
+	 * Rewrite the callers to not use the __cfi_ stubs, such that we might
+	 * rewrite them. This disables all CFI. If this succeeds but any of the
+	 * later stages fails, we're without CFI.
+	 */
+	ret = cfi_disable_callers(start_retpoline, end_retpoline);
+	if (ret)
+		goto err;
+
+	ret = cfi_rewrite_preamble(start_cfi, end_cfi);
+	if (ret)
+		goto err;
+
+	ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
+	if (ret)
+		goto err;
+
+	if (builtin)
+		pr_info("Using FineIBT CFI\n");
+
+	return;
+
+err:
+	pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
 }
 
 #else
 
-void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
+static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+			    s32 *start_cfi, s32 *end_cfi, bool builtin)
+{
+}
 
-#endif /* CONFIG_X86_KERNEL_IBT */
+#endif
+
+void apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
+		   s32 *start_cfi, s32 *end_cfi)
+{
+	return __apply_fineibt(start_retpoline, end_retpoline,
+			       start_cfi, end_cfi,
+			       /* .builtin = */ false);
+}
 
 #ifdef CONFIG_SMP
 static void alternatives_smp_lock(const s32 *start, const s32 *end,
@@ -996,6 +1220,9 @@ void __init alternative_instructions(void)
 	 */
 	apply_paravirt(__parainstructions, __parainstructions_end);
 
+	__apply_fineibt(__retpoline_sites, __retpoline_sites_end,
+			__cfi_sites, __cfi_sites_end, true);
+
 	/*
 	 * Rewrite the retpolines, must be done before alternatives since
 	 * those can rewrite the retpoline thunks.
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 2bec4b4b2c50d..423a760fa9de2 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -609,6 +609,7 @@ static __always_inline void setup_cet(struct cpuinfo_x86 *c)
 
 	if (!ibt_selftest()) {
 		pr_err("IBT selftest: Failed!\n");
+		wrmsrl(MSR_IA32_S_CET, 0);
 		setup_clear_cpu_cap(X86_FEATURE_IBT);
 		return;
 	}
diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c
index 2fb9de2cef40e..0142982e94c5a 100644
--- a/arch/x86/kernel/module.c
+++ b/arch/x86/kernel/module.c
@@ -255,7 +255,7 @@ int module_finalize(const Elf_Ehdr *hdr,
 	const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
 		*para = NULL, *orc = NULL, *orc_ip = NULL,
 		*retpolines = NULL, *returns = NULL, *ibt_endbr = NULL,
-		*calls = NULL;
+		*calls = NULL, *cfi = NULL;
 	char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
 
 	for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
@@ -277,6 +277,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 			returns = s;
 		if (!strcmp(".call_sites", secstrings + s->sh_name))
 			calls = s;
+		if (!strcmp(".cfi_sites", secstrings + s->sh_name))
+			cfi = s;
 		if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
 			ibt_endbr = s;
 	}
@@ -289,6 +291,22 @@ int module_finalize(const Elf_Ehdr *hdr,
 		void *pseg = (void *)para->sh_addr;
 		apply_paravirt(pseg, pseg + para->sh_size);
 	}
+	if (retpolines || cfi) {
+		void *rseg = NULL, *cseg = NULL;
+		unsigned int rsize = 0, csize = 0;
+
+		if (retpolines) {
+			rseg = (void *)retpolines->sh_addr;
+			rsize = retpolines->sh_size;
+		}
+
+		if (cfi) {
+			cseg = (void *)cfi->sh_addr;
+			csize = cfi->sh_size;
+		}
+
+		apply_fineibt(rseg, rseg + rsize, cseg, cseg + csize);
+	}
 	if (retpolines) {
 		void *rseg = (void *)retpolines->sh_addr;
 		apply_retpolines(rseg, rseg + retpolines->sh_size);
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 49f3f86433c7d..2e0ee14229bff 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -309,6 +309,15 @@ SECTIONS
 	}
 #endif
 
+#ifdef CONFIG_FINEIBT
+	. = ALIGN(8);
+	.cfi_sites : AT(ADDR(.cfi_sites) - LOAD_OFFSET) {
+		__cfi_sites = .;
+		*(.cfi_sites)
+		__cfi_sites_end = .;
+	}
+#endif
+
 	/*
 	 * struct alt_inst entries. From the header (alternative.h):
 	 * "Alternative instructions for different CPU types or capabilities"
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5296aea9b5b40..923a3d5080474 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -984,7 +984,7 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
 }
 
 #ifdef CONFIG_X86_64
-#ifdef CONFIG_CALL_THUNKS
+#ifdef CONFIG_CALL_PADDING
 #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5+CONFIG_FUNCTION_PADDING_BYTES,CONFIG_FUNCTION_PADDING_BYTES)))
 #else
 #define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5)))
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 2e03bcbf2b9b7..2b2fab705a633 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -256,6 +256,7 @@ objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK)		+= --hacks=jump_label
 objtool-args-$(CONFIG_HAVE_NOINSTR_HACK)		+= --hacks=noinstr
 objtool-args-$(CONFIG_CALL_DEPTH_TRACKING)		+= --hacks=skylake
 objtool-args-$(CONFIG_X86_KERNEL_IBT)			+= --ibt
+objtool-args-$(CONFIG_FINEIBT)				+= --cfi
 objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL)	+= --mcount
 objtool-args-$(CONFIG_UNWINDER_ORC)			+= --orc
 objtool-args-$(CONFIG_RETPOLINE)			+= --retpoline
-- 
GitLab


From 082c4c815252ea333b0f3a51e336df60c2314fe2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 11:28:15 +0200
Subject: [PATCH 0725/4122] x86/cfi: Boot time selection of CFI scheme

Add the "cfi=" boot parameter to allow people to select a CFI scheme
at boot time. Mostly useful for development / debugging.

Requested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221027092842.699804264@infradead.org
---
 arch/x86/kernel/alternative.c | 99 ++++++++++++++++++++++++++++-------
 1 file changed, 81 insertions(+), 18 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 91b0e63a6238a..9d3b58748ca53 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -702,6 +702,47 @@ void __init_or_module noinline apply_ibt_endbr(s32 *start, s32 *end) { }
 #endif /* CONFIG_X86_KERNEL_IBT */
 
 #ifdef CONFIG_FINEIBT
+
+enum cfi_mode {
+	CFI_DEFAULT,
+	CFI_OFF,
+	CFI_KCFI,
+	CFI_FINEIBT,
+};
+
+static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
+
+static __init int cfi_parse_cmdline(char *str)
+{
+	if (!str)
+		return -EINVAL;
+
+	while (str) {
+		char *next = strchr(str, ',');
+		if (next) {
+			*next = 0;
+			next++;
+		}
+
+		if (!strcmp(str, "auto")) {
+			cfi_mode = CFI_DEFAULT;
+		} else if (!strcmp(str, "off")) {
+			cfi_mode = CFI_OFF;
+		} else if (!strcmp(str, "kcfi")) {
+			cfi_mode = CFI_KCFI;
+		} else if (!strcmp(str, "fineibt")) {
+			cfi_mode = CFI_FINEIBT;
+		} else {
+			pr_err("Ignoring unknown cfi option (%s).", str);
+		}
+
+		str = next;
+	}
+
+	return 0;
+}
+early_param("cfi", cfi_parse_cmdline);
+
 /*
  * kCFI						FineIBT
  *
@@ -868,30 +909,52 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
 		      "FineIBT preamble wrong size: %ld", fineibt_preamble_size))
 		return;
 
-	if (!HAS_KERNEL_IBT || !cpu_feature_enabled(X86_FEATURE_IBT))
+	if (cfi_mode == CFI_DEFAULT) {
+		cfi_mode = CFI_KCFI;
+		if (HAS_KERNEL_IBT && cpu_feature_enabled(X86_FEATURE_IBT))
+			cfi_mode = CFI_FINEIBT;
+	}
+
+	switch (cfi_mode) {
+	case CFI_OFF:
+		ret = cfi_disable_callers(start_retpoline, end_retpoline);
+		if (ret)
+			goto err;
+
+		if (builtin)
+			pr_info("Disabling CFI\n");
 		return;
 
-	/*
-	 * Rewrite the callers to not use the __cfi_ stubs, such that we might
-	 * rewrite them. This disables all CFI. If this succeeds but any of the
-	 * later stages fails, we're without CFI.
-	 */
-	ret = cfi_disable_callers(start_retpoline, end_retpoline);
-	if (ret)
-		goto err;
+	case CFI_KCFI:
+		if (builtin)
+			pr_info("Using kCFI\n");
+		return;
 
-	ret = cfi_rewrite_preamble(start_cfi, end_cfi);
-	if (ret)
-		goto err;
+	case CFI_FINEIBT:
+		/*
+		 * Rewrite the callers to not use the __cfi_ stubs, such that we might
+		 * rewrite them. This disables all CFI. If this succeeds but any of the
+		 * later stages fails, we're without CFI.
+		 */
+		ret = cfi_disable_callers(start_retpoline, end_retpoline);
+		if (ret)
+			goto err;
+
+		ret = cfi_rewrite_preamble(start_cfi, end_cfi);
+		if (ret)
+			goto err;
 
-	ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
-	if (ret)
-		goto err;
+		ret = cfi_rewrite_callers(start_retpoline, end_retpoline);
+		if (ret)
+			goto err;
 
-	if (builtin)
-		pr_info("Using FineIBT CFI\n");
+		if (builtin)
+			pr_info("Using FineIBT CFI\n");
+		return;
 
-	return;
+	default:
+		break;
+	}
 
 err:
 	pr_err("Something went horribly wrong trying to rewrite the CFI implementation.\n");
-- 
GitLab


From 0c3e806ec0f9771fa1f34c60499097d9260a8bb7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 11:28:16 +0200
Subject: [PATCH 0726/4122] x86/cfi: Add boot time hash randomization

In order to avoid known hashes (from knowing the boot image),
randomize the CFI hashes with a per-boot random seed.

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221027092842.765195516@infradead.org
---
 arch/x86/kernel/alternative.c | 120 ++++++++++++++++++++++++++++++----
 1 file changed, 108 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 9d3b58748ca53..aa7f791585c5c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -711,6 +711,24 @@ enum cfi_mode {
 };
 
 static enum cfi_mode cfi_mode __ro_after_init = CFI_DEFAULT;
+static bool cfi_rand __ro_after_init = true;
+static u32  cfi_seed __ro_after_init;
+
+/*
+ * Re-hash the CFI hash with a boot-time seed while making sure the result is
+ * not a valid ENDBR instruction.
+ */
+static u32 cfi_rehash(u32 hash)
+{
+	hash ^= cfi_seed;
+	while (unlikely(is_endbr(hash) || is_endbr(-hash))) {
+		bool lsb = hash & 1;
+		hash >>= 1;
+		if (lsb)
+			hash ^= 0x80200003;
+	}
+	return hash;
+}
 
 static __init int cfi_parse_cmdline(char *str)
 {
@@ -728,10 +746,13 @@ static __init int cfi_parse_cmdline(char *str)
 			cfi_mode = CFI_DEFAULT;
 		} else if (!strcmp(str, "off")) {
 			cfi_mode = CFI_OFF;
+			cfi_rand = false;
 		} else if (!strcmp(str, "kcfi")) {
 			cfi_mode = CFI_KCFI;
 		} else if (!strcmp(str, "fineibt")) {
 			cfi_mode = CFI_FINEIBT;
+		} else if (!strcmp(str, "norand")) {
+			cfi_rand = false;
 		} else {
 			pr_err("Ignoring unknown cfi option (%s).", str);
 		}
@@ -856,7 +877,50 @@ static int cfi_disable_callers(s32 *start, s32 *end)
 	return 0;
 }
 
+static int cfi_enable_callers(s32 *start, s32 *end)
+{
+	/*
+	 * Re-enable kCFI, undo what cfi_disable_callers() did.
+	 */
+	const u8 mov[] = { 0x41, 0xba };
+	s32 *s;
+
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		addr -= fineibt_caller_size;
+		hash = decode_caller_hash(addr);
+		if (!hash) /* nocfi callers */
+			continue;
+
+		text_poke_early(addr, mov, 2);
+	}
+
+	return 0;
+}
+
 /* .cfi_sites */
+static int cfi_rand_preamble(s32 *start, s32 *end)
+{
+	s32 *s;
+
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		hash = decode_preamble_hash(addr);
+		if (WARN(!hash, "no CFI hash found at: %pS %px %*ph\n",
+			 addr, addr, 5, addr))
+			return -EINVAL;
+
+		hash = cfi_rehash(hash);
+		text_poke_early(addr + 1, &hash, 4);
+	}
+
+	return 0;
+}
+
 static int cfi_rewrite_preamble(s32 *start, s32 *end)
 {
 	s32 *s;
@@ -879,6 +943,25 @@ static int cfi_rewrite_preamble(s32 *start, s32 *end)
 }
 
 /* .retpoline_sites */
+static int cfi_rand_callers(s32 *start, s32 *end)
+{
+	s32 *s;
+
+	for (s = start; s < end; s++) {
+		void *addr = (void *)s + *s;
+		u32 hash;
+
+		addr -= fineibt_caller_size;
+		hash = decode_caller_hash(addr);
+		if (hash) {
+			hash = -cfi_rehash(hash);
+			text_poke_early(addr + 2, &hash, 4);
+		}
+	}
+
+	return 0;
+}
+
 static int cfi_rewrite_callers(s32 *start, s32 *end)
 {
 	s32 *s;
@@ -915,31 +998,44 @@ static void __apply_fineibt(s32 *start_retpoline, s32 *end_retpoline,
 			cfi_mode = CFI_FINEIBT;
 	}
 
-	switch (cfi_mode) {
-	case CFI_OFF:
-		ret = cfi_disable_callers(start_retpoline, end_retpoline);
+	/*
+	 * Rewrite the callers to not use the __cfi_ stubs, such that we might
+	 * rewrite them. This disables all CFI. If this succeeds but any of the
+	 * later stages fails, we're without CFI.
+	 */
+	ret = cfi_disable_callers(start_retpoline, end_retpoline);
+	if (ret)
+		goto err;
+
+	if (cfi_rand) {
+		if (builtin)
+			cfi_seed = get_random_u32();
+
+		ret = cfi_rand_preamble(start_cfi, end_cfi);
 		if (ret)
 			goto err;
 
+		ret = cfi_rand_callers(start_retpoline, end_retpoline);
+		if (ret)
+			goto err;
+	}
+
+	switch (cfi_mode) {
+	case CFI_OFF:
 		if (builtin)
 			pr_info("Disabling CFI\n");
 		return;
 
 	case CFI_KCFI:
+		ret = cfi_enable_callers(start_retpoline, end_retpoline);
+		if (ret)
+			goto err;
+
 		if (builtin)
 			pr_info("Using kCFI\n");
 		return;
 
 	case CFI_FINEIBT:
-		/*
-		 * Rewrite the callers to not use the __cfi_ stubs, such that we might
-		 * rewrite them. This disables all CFI. If this succeeds but any of the
-		 * later stages fails, we're without CFI.
-		 */
-		ret = cfi_disable_callers(start_retpoline, end_retpoline);
-		if (ret)
-			goto err;
-
 		ret = cfi_rewrite_preamble(start_cfi, end_cfi);
 		if (ret)
 			goto err;
-- 
GitLab


From 9e4a617757273a86b560c1ece40c48e4940a3c79 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 29 Sep 2022 02:24:53 -0700
Subject: [PATCH 0727/4122] string: Add __realloc_size hint to kmemdup()

Add __realloc_size() hint to kmemdup() so the compiler can reason about
the length of the returned buffer. (These must not use __alloc_size,
since those include __malloc which says the contents aren't defined[1]).

[1] https://lore.kernel.org/linux-hardening/d199c2af-06af-8a50-a6a1-00eefa0b67b4@prevas.dk/

Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Andy Shevchenko <andriy.shevchenko@intel.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 3 ++-
 include/linux/string.h         | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index e5b39b1cc2fc1..49782f63f015a 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -659,7 +659,8 @@ __FORTIFY_INLINE void *memchr_inv(const void * const POS0 p, int c, size_t size)
 	return __real_memchr_inv(p, c, size);
 }
 
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup)
+								    __realloc_size(2);
 __FORTIFY_INLINE void *kmemdup(const void * const POS0 p, size_t size, gfp_t gfp)
 {
 	size_t p_size = __struct_size(p);
diff --git a/include/linux/string.h b/include/linux/string.h
index cf7607b321027..db28802ab0a65 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -176,7 +176,7 @@ extern void kfree_const(const void *x);
 extern char *kstrdup(const char *s, gfp_t gfp) __malloc;
 extern const char *kstrdup_const(const char *s, gfp_t gfp);
 extern char *kstrndup(const char *s, size_t len, gfp_t gfp);
-extern void *kmemdup(const void *src, size_t len, gfp_t gfp);
+extern void *kmemdup(const void *src, size_t len, gfp_t gfp) __realloc_size(2);
 extern char *kmemdup_nul(const char *s, size_t len, gfp_t gfp);
 
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
-- 
GitLab


From 41eefc46a3a4682976afb5f8c4b9734ed6bfd406 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 Oct 2022 09:51:46 -0700
Subject: [PATCH 0728/4122] string: Convert strscpy() self-test to KUnit

Convert the strscpy() self-test to a KUnit test.

Cc: David Gow <davidgow@google.com>
Cc: Tobin C. Harding <tobin@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/lkml/Y072ZMk/hNkfwqMv@dev-arch.thelio-3990X
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 MAINTAINERS         |   1 +
 lib/Kconfig.debug   |   8 ++-
 lib/Makefile        |   2 +-
 lib/strscpy_kunit.c | 129 +++++++++++++++++++++++++++++++++++++
 lib/test_strscpy.c  | 150 --------------------------------------------
 5 files changed, 136 insertions(+), 154 deletions(-)
 create mode 100644 lib/strscpy_kunit.c
 delete mode 100644 lib/test_strscpy.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 9dd8d74c4df09..232d78340d79e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8045,6 +8045,7 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/har
 F:	include/linux/fortify-string.h
 F:	lib/fortify_kunit.c
 F:	lib/memcpy_kunit.c
+F:	lib/strscpy_kunit.c
 F:	lib/test_fortify/*
 F:	scripts/test_fortify.sh
 K:	\b__NO_FORTIFY\b
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3fc7abffc7aa2..e0a4d52e434c3 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2215,9 +2215,6 @@ config STRING_SELFTEST
 config TEST_STRING_HELPERS
 	tristate "Test functions located in the string_helpers module at runtime"
 
-config TEST_STRSCPY
-	tristate "Test strscpy*() family of functions at runtime"
-
 config TEST_KSTRTOX
 	tristate "Test kstrto*() family of functions at runtime"
 
@@ -2583,6 +2580,11 @@ config HW_BREAKPOINT_KUNIT_TEST
 
 	  If unsure, say N.
 
+config STRSCPY_KUNIT_TEST
+	tristate "Test strscpy*() family of functions at runtime" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 161d6a724ff71..1905e5c268495 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -82,7 +82,6 @@ obj-$(CONFIG_TEST_DYNAMIC_DEBUG) += test_dynamic_debug.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_SCANF) += test_scanf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
-obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
@@ -380,6 +379,7 @@ obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
 obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
+obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c
new file mode 100644
index 0000000000000..98523f828d3a2
--- /dev/null
+++ b/lib/strscpy_kunit.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Kernel module for testing 'strscpy' family of functions.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/string.h>
+
+/*
+ * tc() - Run a specific test case.
+ * @src: Source string, argument to strscpy_pad()
+ * @count: Size of destination buffer, argument to strscpy_pad()
+ * @expected: Expected return value from call to strscpy_pad()
+ * @terminator: 1 if there should be a terminating null byte 0 otherwise.
+ * @chars: Number of characters from the src string expected to be
+ *         written to the dst buffer.
+ * @pad: Number of pad characters expected (in the tail of dst buffer).
+ *       (@pad does not include the null terminator byte.)
+ *
+ * Calls strscpy_pad() and verifies the return value and state of the
+ * destination buffer after the call returns.
+ */
+static void tc(struct kunit *test, char *src, int count, int expected,
+	       int chars, int terminator, int pad)
+{
+	int nr_bytes_poison;
+	int max_expected;
+	int max_count;
+	int written;
+	char buf[6];
+	int index, i;
+	const char POISON = 'z';
+
+	KUNIT_ASSERT_TRUE_MSG(test, src != NULL,
+			      "null source string not supported");
+
+	memset(buf, POISON, sizeof(buf));
+	/* Future proofing test suite, validate args */
+	max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */
+	max_expected = count - 1;    /* Space for the null */
+
+	KUNIT_ASSERT_LE_MSG(test, count, max_count,
+		"count (%d) is too big (%d) ... aborting", count, max_count);
+	KUNIT_EXPECT_LE_MSG(test, expected, max_expected,
+		"expected (%d) is bigger than can possibly be returned (%d)",
+		expected, max_expected);
+
+	written = strscpy_pad(buf, src, count);
+	KUNIT_ASSERT_EQ(test, written, expected);
+
+	if (count && written == -E2BIG) {
+		KUNIT_ASSERT_EQ_MSG(test, 0, strncmp(buf, src, count - 1),
+			"buffer state invalid for -E2BIG");
+		KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0',
+			"too big string is not null terminated correctly");
+	}
+
+	for (i = 0; i < chars; i++)
+		KUNIT_ASSERT_EQ_MSG(test, buf[i], src[i],
+			"buf[i]==%c != src[i]==%c", buf[i], src[i]);
+
+	if (terminator)
+		KUNIT_ASSERT_EQ_MSG(test, buf[count - 1], '\0',
+			"string is not null terminated correctly");
+
+	for (i = 0; i < pad; i++) {
+		index = chars + terminator + i;
+		KUNIT_ASSERT_EQ_MSG(test, buf[index], '\0',
+			"padding missing at index: %d", i);
+	}
+
+	nr_bytes_poison = sizeof(buf) - chars - terminator - pad;
+	for (i = 0; i < nr_bytes_poison; i++) {
+		index = sizeof(buf) - 1 - i; /* Check from the end back */
+		KUNIT_ASSERT_EQ_MSG(test, buf[index], POISON,
+			"poison value missing at index: %d", i);
+	}
+}
+
+static void strscpy_test(struct kunit *test)
+{
+	/*
+	 * tc() uses a destination buffer of size 6 and needs at
+	 * least 2 characters spare (one for null and one to check for
+	 * overflow).  This means we should only call tc() with
+	 * strings up to a maximum of 4 characters long and 'count'
+	 * should not exceed 4.  To test with longer strings increase
+	 * the buffer size in tc().
+	 */
+
+	/* tc(test, src, count, expected, chars, terminator, pad) */
+	tc(test, "a", 0, -E2BIG, 0, 0, 0);
+	tc(test, "",  0, -E2BIG, 0, 0, 0);
+
+	tc(test, "a", 1, -E2BIG, 0, 1, 0);
+	tc(test, "",  1, 0,	 0, 1, 0);
+
+	tc(test, "ab", 2, -E2BIG, 1, 1, 0);
+	tc(test, "a",  2, 1,	  1, 1, 0);
+	tc(test, "",   2, 0,	  0, 1, 1);
+
+	tc(test, "abc", 3, -E2BIG, 2, 1, 0);
+	tc(test, "ab",  3, 2,	   2, 1, 0);
+	tc(test, "a",   3, 1,	   1, 1, 1);
+	tc(test, "",    3, 0,	   0, 1, 2);
+
+	tc(test, "abcd", 4, -E2BIG, 3, 1, 0);
+	tc(test, "abc",  4, 3,	    3, 1, 0);
+	tc(test, "ab",   4, 2,	    2, 1, 1);
+	tc(test, "a",    4, 1,	    1, 1, 2);
+	tc(test, "",     4, 0,	    0, 1, 3);
+}
+
+static struct kunit_case strscpy_test_cases[] = {
+	KUNIT_CASE(strscpy_test),
+	{}
+};
+
+static struct kunit_suite strscpy_test_suite = {
+	.name = "strscpy",
+	.test_cases = strscpy_test_cases,
+};
+
+kunit_test_suite(strscpy_test_suite);
+
+MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_strscpy.c b/lib/test_strscpy.c
deleted file mode 100644
index a827f94601f5d..0000000000000
--- a/lib/test_strscpy.c
+++ /dev/null
@@ -1,150 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/string.h>
-
-#include "../tools/testing/selftests/kselftest_module.h"
-
-/*
- * Kernel module for testing 'strscpy' family of functions.
- */
-
-KSTM_MODULE_GLOBALS();
-
-/*
- * tc() - Run a specific test case.
- * @src: Source string, argument to strscpy_pad()
- * @count: Size of destination buffer, argument to strscpy_pad()
- * @expected: Expected return value from call to strscpy_pad()
- * @terminator: 1 if there should be a terminating null byte 0 otherwise.
- * @chars: Number of characters from the src string expected to be
- *         written to the dst buffer.
- * @pad: Number of pad characters expected (in the tail of dst buffer).
- *       (@pad does not include the null terminator byte.)
- *
- * Calls strscpy_pad() and verifies the return value and state of the
- * destination buffer after the call returns.
- */
-static int __init tc(char *src, int count, int expected,
-		     int chars, int terminator, int pad)
-{
-	int nr_bytes_poison;
-	int max_expected;
-	int max_count;
-	int written;
-	char buf[6];
-	int index, i;
-	const char POISON = 'z';
-
-	total_tests++;
-
-	if (!src) {
-		pr_err("null source string not supported\n");
-		return -1;
-	}
-
-	memset(buf, POISON, sizeof(buf));
-	/* Future proofing test suite, validate args */
-	max_count = sizeof(buf) - 2; /* Space for null and to verify overflow */
-	max_expected = count - 1;    /* Space for the null */
-	if (count > max_count) {
-		pr_err("count (%d) is too big (%d) ... aborting", count, max_count);
-		return -1;
-	}
-	if (expected > max_expected) {
-		pr_warn("expected (%d) is bigger than can possibly be returned (%d)",
-			expected, max_expected);
-	}
-
-	written = strscpy_pad(buf, src, count);
-	if ((written) != (expected)) {
-		pr_err("%d != %d (written, expected)\n", written, expected);
-		goto fail;
-	}
-
-	if (count && written == -E2BIG) {
-		if (strncmp(buf, src, count - 1) != 0) {
-			pr_err("buffer state invalid for -E2BIG\n");
-			goto fail;
-		}
-		if (buf[count - 1] != '\0') {
-			pr_err("too big string is not null terminated correctly\n");
-			goto fail;
-		}
-	}
-
-	for (i = 0; i < chars; i++) {
-		if (buf[i] != src[i]) {
-			pr_err("buf[i]==%c != src[i]==%c\n", buf[i], src[i]);
-			goto fail;
-		}
-	}
-
-	if (terminator) {
-		if (buf[count - 1] != '\0') {
-			pr_err("string is not null terminated correctly\n");
-			goto fail;
-		}
-	}
-
-	for (i = 0; i < pad; i++) {
-		index = chars + terminator + i;
-		if (buf[index] != '\0') {
-			pr_err("padding missing at index: %d\n", i);
-			goto fail;
-		}
-	}
-
-	nr_bytes_poison = sizeof(buf) - chars - terminator - pad;
-	for (i = 0; i < nr_bytes_poison; i++) {
-		index = sizeof(buf) - 1 - i; /* Check from the end back */
-		if (buf[index] != POISON) {
-			pr_err("poison value missing at index: %d\n", i);
-			goto fail;
-		}
-	}
-
-	return 0;
-fail:
-	failed_tests++;
-	return -1;
-}
-
-static void __init selftest(void)
-{
-	/*
-	 * tc() uses a destination buffer of size 6 and needs at
-	 * least 2 characters spare (one for null and one to check for
-	 * overflow).  This means we should only call tc() with
-	 * strings up to a maximum of 4 characters long and 'count'
-	 * should not exceed 4.  To test with longer strings increase
-	 * the buffer size in tc().
-	 */
-
-	/* tc(src, count, expected, chars, terminator, pad) */
-	KSTM_CHECK_ZERO(tc("a", 0, -E2BIG, 0, 0, 0));
-	KSTM_CHECK_ZERO(tc("", 0, -E2BIG, 0, 0, 0));
-
-	KSTM_CHECK_ZERO(tc("a", 1, -E2BIG, 0, 1, 0));
-	KSTM_CHECK_ZERO(tc("", 1, 0, 0, 1, 0));
-
-	KSTM_CHECK_ZERO(tc("ab", 2, -E2BIG, 1, 1, 0));
-	KSTM_CHECK_ZERO(tc("a", 2, 1, 1, 1, 0));
-	KSTM_CHECK_ZERO(tc("", 2, 0, 0, 1, 1));
-
-	KSTM_CHECK_ZERO(tc("abc", 3, -E2BIG, 2, 1, 0));
-	KSTM_CHECK_ZERO(tc("ab", 3, 2, 2, 1, 0));
-	KSTM_CHECK_ZERO(tc("a", 3, 1, 1, 1, 1));
-	KSTM_CHECK_ZERO(tc("", 3, 0, 0, 1, 2));
-
-	KSTM_CHECK_ZERO(tc("abcd", 4, -E2BIG, 3, 1, 0));
-	KSTM_CHECK_ZERO(tc("abc", 4, 3, 3, 1, 0));
-	KSTM_CHECK_ZERO(tc("ab", 4, 2, 2, 1, 1));
-	KSTM_CHECK_ZERO(tc("a", 4, 1, 1, 1, 2));
-	KSTM_CHECK_ZERO(tc("", 4, 0, 0, 1, 3));
-}
-
-KSTM_MODULE_LOADERS(test_strscpy);
-MODULE_AUTHOR("Tobin C. Harding <tobin@kernel.org>");
-MODULE_LICENSE("GPL");
-- 
GitLab


From 62e1cbfc5d795381a0f237ae7ee229a92d51cf9e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 Oct 2022 09:17:03 -0700
Subject: [PATCH 0729/4122] fortify: Short-circuit known-safe calls to
 strscpy()

Replacing compile-time safe calls of strcpy()-related functions with
strscpy() was always calling the full strscpy() logic when a builtin
would be better. For example:

	char buf[16];
	strcpy(buf, "yes");

would reduce to __builtin_memcpy(buf, "yes", 4), but not if it was:

	strscpy(buf, yes, sizeof(buf));

Fix this by checking if all sizes are known at compile-time.

Cc: linux-hardening@vger.kernel.org
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 10 ++++++++++
 lib/strscpy_kunit.c            | 13 +++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 49782f63f015a..32a66d4b30ca3 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -314,6 +314,16 @@ __FORTIFY_INLINE ssize_t strscpy(char * const POS p, const char * const POS q, s
 	if (__compiletime_lessthan(p_size, size))
 		__write_overflow();
 
+	/* Short-circuit for compile-time known-safe lengths. */
+	if (__compiletime_lessthan(p_size, SIZE_MAX)) {
+		len = __compiletime_strlen(q);
+
+		if (len < SIZE_MAX && __compiletime_lessthan(len, size)) {
+			__underlying_memcpy(p, q, len + 1);
+			return len;
+		}
+	}
+
 	/*
 	 * This call protects from read overflow, because len will default to q
 	 * length if it smaller than size.
diff --git a/lib/strscpy_kunit.c b/lib/strscpy_kunit.c
index 98523f828d3a2..a6b6344354ed5 100644
--- a/lib/strscpy_kunit.c
+++ b/lib/strscpy_kunit.c
@@ -81,6 +81,8 @@ static void tc(struct kunit *test, char *src, int count, int expected,
 
 static void strscpy_test(struct kunit *test)
 {
+	char dest[8];
+
 	/*
 	 * tc() uses a destination buffer of size 6 and needs at
 	 * least 2 characters spare (one for null and one to check for
@@ -111,6 +113,17 @@ static void strscpy_test(struct kunit *test)
 	tc(test, "ab",   4, 2,	    2, 1, 1);
 	tc(test, "a",    4, 1,	    1, 1, 2);
 	tc(test, "",     4, 0,	    0, 1, 3);
+
+	/* Compile-time-known source strings. */
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "", ARRAY_SIZE(dest)), 0);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "", 3), 0);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "", 1), 0);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "", 0), -E2BIG);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", ARRAY_SIZE(dest)), 5);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 3), -E2BIG);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 1), -E2BIG);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "Fixed", 0), -E2BIG);
+	KUNIT_EXPECT_EQ(test, strscpy(dest, "This is too long", ARRAY_SIZE(dest)), -E2BIG);
 }
 
 static struct kunit_case strscpy_test_cases[] = {
-- 
GitLab


From fb3d88ab354b3b07e805aba9d67cbb43d23dc70e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 2 Oct 2022 19:45:23 -0700
Subject: [PATCH 0730/4122] siphash: Convert selftest to KUnit

Convert the siphash self-test to KUnit so it will be included in "all
KUnit tests" coverage, and can be run individually still:

$ ./tools/testing/kunit/kunit.py run siphash
...
[02:58:45] Starting KUnit Kernel (1/1)...
[02:58:45] ============================================================
[02:58:45] =================== siphash (1 subtest) ====================
[02:58:45] [PASSED] siphash_test
[02:58:45] ===================== [PASSED] siphash =====================
[02:58:45] ============================================================
[02:58:45] Testing complete. Ran 1 tests: passed: 1
[02:58:45] Elapsed time: 21.421s total, 4.306s configuring, 16.947s building, 0.148s running

Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: Sander Vanheule <sander@svanheule.net>
Acked-by: "Jason A. Donenfeld" <Jason@zx2c4.com>
Link: https://lore.kernel.org/lkml/CAHmME9r+9MPH6zk3Vn=buEMSbQiWMFryqqzerKarmjYk+tHLJA@mail.gmail.com
Tested-by: David Gow <davidgow@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 MAINTAINERS                             |   2 +-
 lib/Kconfig.debug                       |  20 +--
 lib/Makefile                            |   2 +-
 lib/{test_siphash.c => siphash_kunit.c} | 165 ++++++++++--------------
 4 files changed, 83 insertions(+), 106 deletions(-)
 rename lib/{test_siphash.c => siphash_kunit.c} (60%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 232d78340d79e..1cd80c1137212 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18864,7 +18864,7 @@ M:	Jason A. Donenfeld <Jason@zx2c4.com>
 S:	Maintained
 F:	include/linux/siphash.h
 F:	lib/siphash.c
-F:	lib/test_siphash.c
+F:	lib/siphash_kunit.c
 
 SIS 190 ETHERNET DRIVER
 M:	Francois Romieu <romieu@fr.zoreil.com>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e0a4d52e434c3..50cc1c4efaddb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2244,15 +2244,6 @@ config TEST_RHASHTABLE
 
 	  If unsure, say N.
 
-config TEST_SIPHASH
-	tristate "Perform selftest on siphash functions"
-	help
-	  Enable this option to test the kernel's siphash (<linux/siphash.h>) hash
-	  functions on boot (or module load).
-
-	  This is intended to help people writing architecture-specific
-	  optimized versions.  If unsure, say N.
-
 config TEST_IDA
 	tristate "Perform selftest on IDA functions"
 
@@ -2585,6 +2576,17 @@ config STRSCPY_KUNIT_TEST
 	depends on KUNIT
 	default KUNIT_ALL_TESTS
 
+config SIPHASH_KUNIT_TEST
+	tristate "Perform selftest on siphash functions" if !KUNIT_ALL_TESTS
+	depends on KUNIT
+	default KUNIT_ALL_TESTS
+	help
+	  Enable this option to test the kernel's siphash (<linux/siphash.h>) hash
+	  functions on boot (or module load).
+
+	  This is intended to help people writing architecture-specific
+	  optimized versions.  If unsure, say N.
+
 config TEST_UDELAY
 	tristate "udelay test driver"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index 1905e5c268495..77c7951c8cf09 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -62,7 +62,6 @@ obj-$(CONFIG_TEST_BITOPS) += test_bitops.o
 CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_CPUMASK_KUNIT_TEST) += cpumask_kunit.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
-obj-$(CONFIG_TEST_SIPHASH) += test_siphash.o
 obj-$(CONFIG_HASH_KUNIT_TEST) += test_hash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
@@ -380,6 +379,7 @@ CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
 obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o
+obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
 
 obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o
 
diff --git a/lib/test_siphash.c b/lib/siphash_kunit.c
similarity index 60%
rename from lib/test_siphash.c
rename to lib/siphash_kunit.c
index a96788d0141d4..a3c697e8be357 100644
--- a/lib/test_siphash.c
+++ b/lib/siphash_kunit.c
@@ -13,6 +13,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <kunit/test.h>
 #include <linux/siphash.h>
 #include <linux/kernel.h>
 #include <linux/string.h>
@@ -109,114 +110,88 @@ static const u32 test_vectors_hsiphash[64] = {
 };
 #endif
 
-static int __init siphash_test_init(void)
+#define chk(hash, vector, fmt...)			\
+	KUNIT_EXPECT_EQ_MSG(test, hash, vector, fmt)
+
+static void siphash_test(struct kunit *test)
 {
 	u8 in[64] __aligned(SIPHASH_ALIGNMENT);
 	u8 in_unaligned[65] __aligned(SIPHASH_ALIGNMENT);
 	u8 i;
-	int ret = 0;
 
 	for (i = 0; i < 64; ++i) {
 		in[i] = i;
 		in_unaligned[i + 1] = i;
-		if (siphash(in, i, &test_key_siphash) !=
-						test_vectors_siphash[i]) {
-			pr_info("siphash self-test aligned %u: FAIL\n", i + 1);
-			ret = -EINVAL;
-		}
-		if (siphash(in_unaligned + 1, i, &test_key_siphash) !=
-						test_vectors_siphash[i]) {
-			pr_info("siphash self-test unaligned %u: FAIL\n", i + 1);
-			ret = -EINVAL;
-		}
-		if (hsiphash(in, i, &test_key_hsiphash) !=
-						test_vectors_hsiphash[i]) {
-			pr_info("hsiphash self-test aligned %u: FAIL\n", i + 1);
-			ret = -EINVAL;
-		}
-		if (hsiphash(in_unaligned + 1, i, &test_key_hsiphash) !=
-						test_vectors_hsiphash[i]) {
-			pr_info("hsiphash self-test unaligned %u: FAIL\n", i + 1);
-			ret = -EINVAL;
-		}
-	}
-	if (siphash_1u64(0x0706050403020100ULL, &test_key_siphash) !=
-						test_vectors_siphash[8]) {
-		pr_info("siphash self-test 1u64: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
-			 &test_key_siphash) != test_vectors_siphash[16]) {
-		pr_info("siphash self-test 2u64: FAIL\n");
-		ret = -EINVAL;
+		chk(siphash(in, i, &test_key_siphash),
+		    test_vectors_siphash[i],
+		    "siphash self-test aligned %u: FAIL", i + 1);
+		chk(siphash(in_unaligned + 1, i, &test_key_siphash),
+		    test_vectors_siphash[i],
+		    "siphash self-test unaligned %u: FAIL", i + 1);
+		chk(hsiphash(in, i, &test_key_hsiphash),
+		    test_vectors_hsiphash[i],
+		    "hsiphash self-test aligned %u: FAIL", i + 1);
+		chk(hsiphash(in_unaligned + 1, i, &test_key_hsiphash),
+		    test_vectors_hsiphash[i],
+		    "hsiphash self-test unaligned %u: FAIL", i + 1);
 	}
-	if (siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
-			 0x1716151413121110ULL, &test_key_siphash) !=
-						test_vectors_siphash[24]) {
-		pr_info("siphash self-test 3u64: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+	chk(siphash_1u64(0x0706050403020100ULL, &test_key_siphash),
+	    test_vectors_siphash[8],
+	    "siphash self-test 1u64: FAIL");
+	chk(siphash_2u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+			 &test_key_siphash),
+	    test_vectors_siphash[16],
+	    "siphash self-test 2u64: FAIL");
+	chk(siphash_3u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
+			 0x1716151413121110ULL, &test_key_siphash),
+	    test_vectors_siphash[24],
+	    "siphash self-test 3u64: FAIL");
+	chk(siphash_4u64(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL,
 			 0x1716151413121110ULL, 0x1f1e1d1c1b1a1918ULL,
-			 &test_key_siphash) != test_vectors_siphash[32]) {
-		pr_info("siphash self-test 4u64: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_1u32(0x03020100U, &test_key_siphash) !=
-						test_vectors_siphash[4]) {
-		pr_info("siphash self-test 1u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash) !=
-						test_vectors_siphash[8]) {
-		pr_info("siphash self-test 2u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_3u32(0x03020100U, 0x07060504U,
-			 0x0b0a0908U, &test_key_siphash) !=
-						test_vectors_siphash[12]) {
-		pr_info("siphash self-test 3u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (siphash_4u32(0x03020100U, 0x07060504U,
-			 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash) !=
-						test_vectors_siphash[16]) {
-		pr_info("siphash self-test 4u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (hsiphash_1u32(0x03020100U, &test_key_hsiphash) !=
-						test_vectors_hsiphash[4]) {
-		pr_info("hsiphash self-test 1u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash) !=
-						test_vectors_hsiphash[8]) {
-		pr_info("hsiphash self-test 2u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (hsiphash_3u32(0x03020100U, 0x07060504U,
-			  0x0b0a0908U, &test_key_hsiphash) !=
-						test_vectors_hsiphash[12]) {
-		pr_info("hsiphash self-test 3u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (hsiphash_4u32(0x03020100U, 0x07060504U,
-			  0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash) !=
-						test_vectors_hsiphash[16]) {
-		pr_info("hsiphash self-test 4u32: FAIL\n");
-		ret = -EINVAL;
-	}
-	if (!ret)
-		pr_info("self-tests: pass\n");
-	return ret;
+			 &test_key_siphash),
+	    test_vectors_siphash[32],
+	    "siphash self-test 4u64: FAIL");
+	chk(siphash_1u32(0x03020100U, &test_key_siphash),
+	    test_vectors_siphash[4],
+	    "siphash self-test 1u32: FAIL");
+	chk(siphash_2u32(0x03020100U, 0x07060504U, &test_key_siphash),
+	    test_vectors_siphash[8],
+	    "siphash self-test 2u32: FAIL");
+	chk(siphash_3u32(0x03020100U, 0x07060504U,
+			 0x0b0a0908U, &test_key_siphash),
+	    test_vectors_siphash[12],
+	    "siphash self-test 3u32: FAIL");
+	chk(siphash_4u32(0x03020100U, 0x07060504U,
+			 0x0b0a0908U, 0x0f0e0d0cU, &test_key_siphash),
+	    test_vectors_siphash[16],
+	    "siphash self-test 4u32: FAIL");
+	chk(hsiphash_1u32(0x03020100U, &test_key_hsiphash),
+	    test_vectors_hsiphash[4],
+	    "hsiphash self-test 1u32: FAIL");
+	chk(hsiphash_2u32(0x03020100U, 0x07060504U, &test_key_hsiphash),
+	    test_vectors_hsiphash[8],
+	    "hsiphash self-test 2u32: FAIL");
+	chk(hsiphash_3u32(0x03020100U, 0x07060504U,
+			  0x0b0a0908U, &test_key_hsiphash),
+	    test_vectors_hsiphash[12],
+	    "hsiphash self-test 3u32: FAIL");
+	chk(hsiphash_4u32(0x03020100U, 0x07060504U,
+			  0x0b0a0908U, 0x0f0e0d0cU, &test_key_hsiphash),
+	    test_vectors_hsiphash[16],
+	    "hsiphash self-test 4u32: FAIL");
 }
 
-static void __exit siphash_test_exit(void)
-{
-}
+static struct kunit_case siphash_test_cases[] = {
+	KUNIT_CASE(siphash_test),
+	{}
+};
+
+static struct kunit_suite siphash_test_suite = {
+	.name = "siphash",
+	.test_cases = siphash_test_cases,
+};
 
-module_init(siphash_test_init);
-module_exit(siphash_test_exit);
+kunit_test_suite(siphash_test_suite);
 
 MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
 MODULE_LICENSE("Dual BSD/GPL");
-- 
GitLab


From e9a40e1585d792751d3a122392695e5a53032809 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 25 Oct 2022 16:05:18 -0700
Subject: [PATCH 0731/4122] fortify: Do not cast to "unsigned char"

Do not cast to "unsigned char", as this needlessly creates type problems
when attempting builds without -Wno-pointer-sign[1]. The intent of the
cast is to drop possible "const" types.

[1] https://lore.kernel.org/lkml/CAHk-=wgz3Uba8w7kdXhsqR1qvfemYL+OFQdefJnkeqXG8qZ_pA@mail.gmail.com/

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 3009f891bb9f ("fortify: Allow strlen() and strnlen() to pass compile-time known lengths")
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/fortify-string.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 32a66d4b30ca3..aa31f54f8b573 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -18,7 +18,7 @@ void __write_overflow_field(size_t avail, size_t wanted) __compiletime_warning("
 
 #define __compiletime_strlen(p)					\
 ({								\
-	unsigned char *__p = (unsigned char *)(p);		\
+	char *__p = (char *)(p);				\
 	size_t __ret = SIZE_MAX;				\
 	size_t __p_size = __member_size(p);			\
 	if (__p_size != SIZE_MAX &&				\
-- 
GitLab


From 5a17f040fa332e71a45ca9ff02d6979d9176a423 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 26 Oct 2022 16:31:11 -0700
Subject: [PATCH 0732/4122] cred: Do not default to init_cred in
 prepare_kernel_cred()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A common exploit pattern for ROP attacks is to abuse prepare_kernel_cred()
in order to construct escalated privileges[1]. Instead of providing a
short-hand argument (NULL) to the "daemon" argument to indicate using
init_cred as the base cred, require that "daemon" is always set to
an actual task. Replace all existing callers that were passing NULL
with &init_task.

Future attacks will need to have sufficiently powerful read/write
primitives to have found an appropriately privileged task and written it
to the ROP stack as an argument to succeed, which is similarly difficult
to the prior effort needed to escalate privileges before struct cred
existed: locate the current cred and overwrite the uid member.

This has the added benefit of meaning that prepare_kernel_cred() can no
longer exceed the privileges of the init task, which may have changed from
the original init_cred (e.g. dropping capabilities from the bounding set).

[1] https://google.com/search?q=commit_creds(prepare_kernel_cred(0))

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Steve French <sfrench@samba.org>
Cc: Ronnie Sahlberg <lsahlber@redhat.com>
Cc: Shyam Prasad N <sprasad@microsoft.com>
Cc: Tom Talpey <tom@talpey.com>
Cc: Namjae Jeon <linkinjeon@kernel.org>
Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: "Michal Koutný" <mkoutny@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: linux-cifs@vger.kernel.org
Cc: samba-technical@lists.samba.org
Cc: linux-nfs@vger.kernel.org
Cc: netdev@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Russ Weight <russell.h.weight@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Link: https://lore.kernel.org/r/20221026232943.never.775-kees@kernel.org
---
 drivers/base/firmware_loader/main.c    |  2 +-
 fs/cifs/cifs_spnego.c                  |  2 +-
 fs/cifs/cifsacl.c                      |  2 +-
 fs/ksmbd/smb_common.c                  |  2 +-
 fs/nfs/flexfilelayout/flexfilelayout.c |  4 ++--
 fs/nfs/nfs4idmap.c                     |  2 +-
 fs/nfsd/nfs4callback.c                 |  2 +-
 kernel/cred.c                          | 15 +++++++--------
 net/dns_resolver/dns_key.c             |  2 +-
 9 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c
index 7c3590fd97c28..017c4cdb219eb 100644
--- a/drivers/base/firmware_loader/main.c
+++ b/drivers/base/firmware_loader/main.c
@@ -821,7 +821,7 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 	 * called by a driver when serving an unrelated request from userland, we use
 	 * the kernel credentials to read the file.
 	 */
-	kern_cred = prepare_kernel_cred(NULL);
+	kern_cred = prepare_kernel_cred(&init_task);
 	if (!kern_cred) {
 		ret = -ENOMEM;
 		goto out;
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 342717bf1dc28..6f3285f1dfee5 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -189,7 +189,7 @@ init_cifs_spnego(void)
 	 * spnego upcalls.
 	 */
 
-	cred = prepare_kernel_cred(NULL);
+	cred = prepare_kernel_cred(&init_task);
 	if (!cred)
 		return -ENOMEM;
 
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index fa480d62f3137..574de2b225ae6 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -465,7 +465,7 @@ init_cifs_idmap(void)
 	 * this is used to prevent malicious redirections from being installed
 	 * with add_key().
 	 */
-	cred = prepare_kernel_cred(NULL);
+	cred = prepare_kernel_cred(&init_task);
 	if (!cred)
 		return -ENOMEM;
 
diff --git a/fs/ksmbd/smb_common.c b/fs/ksmbd/smb_common.c
index d96da872d70a1..2a4fbbd55b91f 100644
--- a/fs/ksmbd/smb_common.c
+++ b/fs/ksmbd/smb_common.c
@@ -623,7 +623,7 @@ int ksmbd_override_fsids(struct ksmbd_work *work)
 	if (share->force_gid != KSMBD_SHARE_INVALID_GID)
 		gid = share->force_gid;
 
-	cred = prepare_kernel_cred(NULL);
+	cred = prepare_kernel_cred(&init_task);
 	if (!cred)
 		return -ENOMEM;
 
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 1ec79ccf89ad2..7deb3cd76abe4 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -493,10 +493,10 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
 		gid = make_kgid(&init_user_ns, id);
 
 		if (gfp_flags & __GFP_FS)
-			kcred = prepare_kernel_cred(NULL);
+			kcred = prepare_kernel_cred(&init_task);
 		else {
 			unsigned int nofs_flags = memalloc_nofs_save();
-			kcred = prepare_kernel_cred(NULL);
+			kcred = prepare_kernel_cred(&init_task);
 			memalloc_nofs_restore(nofs_flags);
 		}
 		rc = -ENOMEM;
diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index e3fdd2f45b01f..25a7c771cfd89 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -203,7 +203,7 @@ int nfs_idmap_init(void)
 	printk(KERN_NOTICE "NFS: Registering the %s key type\n",
 		key_type_id_resolver.name);
 
-	cred = prepare_kernel_cred(NULL);
+	cred = prepare_kernel_cred(&init_task);
 	if (!cred)
 		return -ENOMEM;
 
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index f0e69edf5f0f1..4a9e8d17e56aa 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -870,7 +870,7 @@ static const struct cred *get_backchannel_cred(struct nfs4_client *clp, struct r
 	} else {
 		struct cred *kcred;
 
-		kcred = prepare_kernel_cred(NULL);
+		kcred = prepare_kernel_cred(&init_task);
 		if (!kcred)
 			return NULL;
 
diff --git a/kernel/cred.c b/kernel/cred.c
index e10c15f51c1fe..811ad654abd18 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -701,9 +701,9 @@ void __init cred_init(void)
  * override a task's own credentials so that work can be done on behalf of that
  * task that requires a different subjective context.
  *
- * @daemon is used to provide a base for the security record, but can be NULL.
- * If @daemon is supplied, then the security data will be derived from that;
- * otherwise they'll be set to 0 and no groups, full capabilities and no keys.
+ * @daemon is used to provide a base cred, with the security data derived from
+ * that; if this is "&init_task", they'll be set to 0, no groups, full
+ * capabilities, and no keys.
  *
  * The caller may change these controls afterwards if desired.
  *
@@ -714,17 +714,16 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon)
 	const struct cred *old;
 	struct cred *new;
 
+	if (WARN_ON_ONCE(!daemon))
+		return NULL;
+
 	new = kmem_cache_alloc(cred_jar, GFP_KERNEL);
 	if (!new)
 		return NULL;
 
 	kdebug("prepare_kernel_cred() alloc %p", new);
 
-	if (daemon)
-		old = get_task_cred(daemon);
-	else
-		old = get_cred(&init_cred);
-
+	old = get_task_cred(daemon);
 	validate_creds(old);
 
 	*new = *old;
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 3aced951d5ab8..01e54b46ae0b9 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -337,7 +337,7 @@ static int __init init_dns_resolver(void)
 	 * this is used to prevent malicious redirections from being installed
 	 * with add_key().
 	 */
-	cred = prepare_kernel_cred(NULL);
+	cred = prepare_kernel_cred(&init_task);
 	if (!cred)
 		return -ENOMEM;
 
-- 
GitLab


From e1789d7c752ed001cf1a4bbbd624f70a7dd3c6db Mon Sep 17 00:00:00 2001
From: Xin Li <xin3.li@intel.com>
Date: Tue, 25 Oct 2022 00:30:23 -0700
Subject: [PATCH 0733/4122] kbuild: upgrade the orphan section warning to an
 error if CONFIG_WERROR is set

Andrew Cooper suggested upgrading the orphan section warning to a hard link
error. However Nathan Chancellor said outright turning the warning into an
error with no escape hatch might be too aggressive, as we have had these
warnings triggered by new compiler generated sections, and suggested turning
orphan sections into an error only if CONFIG_WERROR is set. Kees Cook echoed
and emphasized that the mandate from Linus is that we should avoid breaking
builds. It wrecks bisection, it causes problems across compiler versions, etc.

Thus upgrade the orphan section warning to a hard link error only if
CONFIG_WERROR is set.

Suggested-by: Andrew Cooper <andrew.cooper3@citrix.com>
Suggested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Xin Li <xin3.li@intel.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221025073023.16137-2-xin3.li@intel.com
---
 Makefile                          |  2 +-
 arch/arm/boot/compressed/Makefile |  2 +-
 arch/arm64/kernel/vdso/Makefile   |  2 +-
 arch/arm64/kernel/vdso32/Makefile |  2 +-
 arch/x86/boot/compressed/Makefile |  2 +-
 init/Kconfig                      | 15 ++++++++++++---
 6 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/Makefile b/Makefile
index f41ec8c8426ba..9a496bef3170c 100644
--- a/Makefile
+++ b/Makefile
@@ -1118,7 +1118,7 @@ endif
 # We never want expected sections to be placed heuristically by the
 # linker. All sections should be explicitly named in the linker script.
 ifdef CONFIG_LD_ORPHAN_WARN
-LDFLAGS_vmlinux += --orphan-handling=warn
+LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
 endif
 
 # Align the bit size of userspace programs with the kernel
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 41bcbb460fac4..53cadc3aaff11 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -123,7 +123,7 @@ LDFLAGS_vmlinux += --no-undefined
 LDFLAGS_vmlinux += -X
 # Report orphan sections
 ifdef CONFIG_LD_ORPHAN_WARN
-LDFLAGS_vmlinux += --orphan-handling=warn
+LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
 endif
 # Next argument is a linker script
 LDFLAGS_vmlinux += -T
diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index 619e2dc7ee14c..beaf9586338f5 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -27,7 +27,7 @@ ldflags-y := -shared -soname=linux-vdso.so.1 --hash-style=sysv	\
 	     -Bsymbolic --build-id=sha1 -n $(btildflags-y)
 
 ifdef CONFIG_LD_ORPHAN_WARN
-  ldflags-y += --orphan-handling=warn
+  ldflags-y += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
 endif
 
 ldflags-y += -T
diff --git a/arch/arm64/kernel/vdso32/Makefile b/arch/arm64/kernel/vdso32/Makefile
index 36c8f66cad251..f59bd1a4ead6b 100644
--- a/arch/arm64/kernel/vdso32/Makefile
+++ b/arch/arm64/kernel/vdso32/Makefile
@@ -104,7 +104,7 @@ VDSO_AFLAGS += -D__ASSEMBLY__
 VDSO_LDFLAGS += -Bsymbolic --no-undefined -soname=linux-vdso.so.1
 VDSO_LDFLAGS += -z max-page-size=4096 -z common-page-size=4096
 VDSO_LDFLAGS += -shared --hash-style=sysv --build-id=sha1
-VDSO_LDFLAGS += --orphan-handling=warn
+VDSO_LDFLAGS += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
 
 
 # Borrow vdsomunge.c from the arm vDSO
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 3a261abb6d158..66b8a8cb5a0f8 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -68,7 +68,7 @@ KBUILD_LDFLAGS += $(call ld-option,--no-ld-generated-unwind-info)
 # address by the bootloader.
 LDFLAGS_vmlinux := -pie $(call ld-option, --no-dynamic-linker)
 ifdef CONFIG_LD_ORPHAN_WARN
-LDFLAGS_vmlinux += --orphan-handling=warn
+LDFLAGS_vmlinux += --orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL)
 endif
 LDFLAGS_vmlinux += -z noexecstack
 ifeq ($(CONFIG_LD_IS_BFD),y)
diff --git a/init/Kconfig b/init/Kconfig
index 694f7c160c9c1..bb1225ef04e76 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -159,10 +159,12 @@ config WERROR
 	help
 	  A kernel build should not cause any compiler warnings, and this
 	  enables the '-Werror' (for C) and '-Dwarnings' (for Rust) flags
-	  to enforce that rule by default.
+	  to enforce that rule by default. Certain warnings from other tools
+	  such as the linker may be upgraded to errors with this option as
+	  well.
 
-	  However, if you have a new (or very old) compiler with odd and
-	  unusual warnings, or you have some architecture with problems,
+	  However, if you have a new (or very old) compiler or linker with odd
+	  and unusual warnings, or you have some architecture with problems,
 	  you may need to disable this config option in order to
 	  successfully build the kernel.
 
@@ -1454,6 +1456,13 @@ config LD_ORPHAN_WARN
 	def_bool y
 	depends on ARCH_WANT_LD_ORPHAN_WARN
 	depends on $(ld-option,--orphan-handling=warn)
+	depends on $(ld-option,--orphan-handling=error)
+
+config LD_ORPHAN_WARN_LEVEL
+        string
+        depends on LD_ORPHAN_WARN
+        default "error" if WERROR
+        default "warn"
 
 config SYSCTL
 	bool
-- 
GitLab


From cd536db050993f7c220a6cfb01de5356032b6f8e Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 18 Oct 2022 02:10:11 -0700
Subject: [PATCH 0734/4122] dma-buf: Proactively round up to kmalloc bucket
 size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of discovering the kmalloc bucket size _after_ allocation, round
up proactively so the allocation is explicitly made for the full size,
allowing the compiler to correctly reason about the resulting size of
the buffer through the existing __alloc_size() hint.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221018090858.never.941-kees@kernel.org
---
 drivers/dma-buf/dma-resv.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index e3885c90a3acb..1c76aed8e2626 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -98,12 +98,17 @@ static void dma_resv_list_set(struct dma_resv_list *list,
 static struct dma_resv_list *dma_resv_list_alloc(unsigned int max_fences)
 {
 	struct dma_resv_list *list;
+	size_t size;
 
-	list = kmalloc(struct_size(list, table, max_fences), GFP_KERNEL);
+	/* Round up to the next kmalloc bucket size. */
+	size = kmalloc_size_roundup(struct_size(list, table, max_fences));
+
+	list = kmalloc(size, GFP_KERNEL);
 	if (!list)
 		return NULL;
 
-	list->max_fences = (ksize(list) - offsetof(typeof(*list), table)) /
+	/* Given the resulting bucket size, recalculated max_fences. */
+	list->max_fences = (size - offsetof(typeof(*list), table)) /
 		sizeof(*list->table);
 
 	return list;
-- 
GitLab


From 905889bc6c842d18f369bf2834cf7219f32709ae Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 23 Sep 2022 13:28:13 -0700
Subject: [PATCH 0735/4122] btrfs: send: Proactively round up to kmalloc bucket
 size

Instead of discovering the kmalloc bucket size _after_ allocation, round
up proactively so the allocation is explicitly made for the full size,
allowing the compiler to correctly reason about the resulting size of
the buffer through the existing __alloc_size() hint.

Cc: Chris Mason <clm@fb.com>
Cc: Josef Bacik <josef@toxicpanda.com>
Cc: linux-btrfs@vger.kernel.org
Acked-by: David Sterba <dsterba@suse.com>
Link: https://lore.kernel.org/lkml/20220922133014.GI32411@suse.cz
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20220923202822.2667581-8-keescook@chromium.org
---
 fs/btrfs/send.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 4ef4167072b89..f53e8049473dd 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -438,6 +438,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
 	path_len = p->end - p->start;
 	old_buf_len = p->buf_len;
 
+	/*
+	 * Allocate to the next largest kmalloc bucket size, to let
+	 * the fast path happen most of the time.
+	 */
+	len = kmalloc_size_roundup(len);
 	/*
 	 * First time the inline_buf does not suffice
 	 */
@@ -451,11 +456,7 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
 	if (!tmp_buf)
 		return -ENOMEM;
 	p->buf = tmp_buf;
-	/*
-	 * The real size of the buffer is bigger, this will let the fast path
-	 * happen most of the time
-	 */
-	p->buf_len = ksize(p->buf);
+	p->buf_len = len;
 
 	if (p->reversed) {
 		tmp_buf = p->buf + old_buf_len - path_len - 1;
-- 
GitLab


From 6dd142d9013ca82155d0c069434c60a0d5755ec0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 20 Sep 2022 14:13:05 -0700
Subject: [PATCH 0736/4122] coredump: Proactively round up to kmalloc bucket
 size

Instead of discovering the kmalloc bucket size _after_ allocation, round
up proactively so the allocation is explicitly made for the full size,
allowing the compiler to correctly reason about the resulting size of
the buffer through the existing __alloc_size() hint.

Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 fs/coredump.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/coredump.c b/fs/coredump.c
index 7bad7785e8e67..97eaee3252515 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -68,7 +68,10 @@ struct core_name {
 
 static int expand_corename(struct core_name *cn, int size)
 {
-	char *corename = krealloc(cn->corename, size, GFP_KERNEL);
+	char *corename;
+
+	size = kmalloc_size_roundup(size);
+	corename = krealloc(cn->corename, size, GFP_KERNEL);
 
 	if (!corename)
 		return -ENOMEM;
@@ -76,7 +79,7 @@ static int expand_corename(struct core_name *cn, int size)
 	if (size > core_name_size) /* racy but harmless */
 		core_name_size = size;
 
-	cn->size = ksize(corename);
+	cn->size = size;
 	cn->corename = corename;
 	return 0;
 }
-- 
GitLab


From 79218fd0b38bb05e8dcb80a49342836274046432 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 17 Oct 2022 16:00:45 -0700
Subject: [PATCH 0737/4122] iommu/amd: Drop unnecessary checks in
 amd_iommu_attach_device()

The same checks are done in amd_iommu_probe_device(). If any of them fails
there, then the device won't get a group, so there's no way for it to even
reach amd_iommu_attach_device anymore.

Link: https://lore.kernel.org/r/c054654a81f2b675c73108fe4bf10e45335a721a.1666042872.git.nicolinc@nvidia.com
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/amd/iommu.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d3b39d0416fa3..45299eb7e8e30 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2155,21 +2155,13 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
 static int amd_iommu_attach_device(struct iommu_domain *dom,
 				   struct device *dev)
 {
+	struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
 	struct protection_domain *domain = to_pdomain(dom);
-	struct iommu_dev_data *dev_data;
-	struct amd_iommu *iommu;
+	struct amd_iommu *iommu = rlookup_amd_iommu(dev);
 	int ret;
 
-	if (!check_device(dev))
-		return -EINVAL;
-
-	dev_data = dev_iommu_priv_get(dev);
 	dev_data->defer_attach = false;
 
-	iommu = rlookup_amd_iommu(dev);
-	if (!iommu)
-		return -EINVAL;
-
 	if (dev_data->domain)
 		detach_device(dev);
 
-- 
GitLab


From 00208852d351ca6e4a8b9ff0c5376fa3a8ed8eaa Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 17 Oct 2022 16:01:22 -0700
Subject: [PATCH 0738/4122] iommu: Add return value rules to attach_dev op and
 APIs

Cases like VFIO wish to attach a device to an existing domain that was
not allocated specifically from the device. This raises a condition
where the IOMMU driver can fail the domain attach because the domain and
device are incompatible with each other.

This is a soft failure that can be resolved by using a different domain.

Provide a dedicated errno EINVAL from the IOMMU driver during attach that
the reason why the attach failed is because of domain incompatibility.

VFIO can use this to know that the attach is a soft failure and it should
continue searching. Otherwise, the attach will be a hard failure and VFIO
will return the code to userspace.

Update kdocs to add rules of return value to the attach_dev op and APIs.

Link: https://lore.kernel.org/r/bd56d93c18621104a0fa1b0de31e9b760b81b769.1666042872.git.nicolinc@nvidia.com
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommu.c | 24 ++++++++++++++++++++++++
 include/linux/iommu.h | 12 ++++++++++++
 2 files changed, 36 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 65a3b3d886dc0..972731f0b328a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1949,6 +1949,18 @@ static int __iommu_attach_device(struct iommu_domain *domain,
 	return ret;
 }
 
+/**
+ * iommu_attach_device - Attach an IOMMU domain to a device
+ * @domain: IOMMU domain to attach
+ * @dev: Device that will be attached
+ *
+ * Returns 0 on success and error code on failure
+ *
+ * Note that EINVAL can be treated as a soft failure, indicating
+ * that certain configuration of the domain is incompatible with
+ * the device. In this case attaching a different domain to the
+ * device may succeed.
+ */
 int iommu_attach_device(struct iommu_domain *domain, struct device *dev)
 {
 	struct iommu_group *group;
@@ -2075,6 +2087,18 @@ static int __iommu_attach_group(struct iommu_domain *domain,
 	return ret;
 }
 
+/**
+ * iommu_attach_group - Attach an IOMMU domain to an IOMMU group
+ * @domain: IOMMU domain to attach
+ * @group: IOMMU group that will be attached
+ *
+ * Returns 0 on success and error code on failure
+ *
+ * Note that EINVAL can be treated as a soft failure, indicating
+ * that certain configuration of the domain is incompatible with
+ * the group. In this case attaching a different domain to the
+ * group may succeed.
+ */
 int iommu_attach_group(struct iommu_domain *domain, struct iommu_group *group)
 {
 	int ret;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3c9da1f8979e3..857898d102b36 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -266,6 +266,18 @@ struct iommu_ops {
 /**
  * struct iommu_domain_ops - domain specific operations
  * @attach_dev: attach an iommu domain to a device
+ *  Return:
+ * * 0		- success
+ * * EINVAL	- can indicate that device and domain are incompatible due to
+ *		  some previous configuration of the domain, in which case the
+ *		  driver shouldn't log an error, since it is legitimate for a
+ *		  caller to test reuse of existing domains. Otherwise, it may
+ *		  still represent some other fundamental problem
+ * * ENOMEM	- out of memory
+ * * ENOSPC	- non-ENOMEM type of resource allocation failures
+ * * EBUSY	- device is attached to a domain and cannot be changed
+ * * ENODEV	- device specific errors, not able to be attached
+ * * <others>	- treated as ENODEV by the caller. Use is discouraged
  * @detach_dev: detach an iommu domain from a device
  * @map: map a physically contiguous memory region to an iommu domain
  * @map_pages: map a physically contiguous set of pages of the same size to
-- 
GitLab


From bd7ebb7719356d750b1b4d671535922bae43fb3b Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 17 Oct 2022 16:02:13 -0700
Subject: [PATCH 0739/4122] iommu: Regulate EINVAL in ->attach_dev callback
 functions

Following the new rules in include/linux/iommu.h kdocs, EINVAL now can be
used to indicate that domain and device are incompatible by a caller that
treats it as a soft failure and tries attaching to another domain.

On the other hand, there are ->attach_dev callback functions returning it
for obvious device-specific errors. They will result in some inefficiency
in the caller handling routine.

Update these places to corresponding errnos following the new rules.

Link: https://lore.kernel.org/r/5924c03bea637f05feb2a20d624bae086b555ec5.1666042872.git.nicolinc@nvidia.com
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/fsl_pamu.c        | 2 +-
 drivers/iommu/fsl_pamu_domain.c | 4 ++--
 drivers/iommu/intel/pasid.c     | 6 ++++--
 drivers/iommu/mtk_iommu.c       | 2 +-
 drivers/iommu/omap-iommu.c      | 4 ++--
 drivers/iommu/virtio-iommu.c    | 2 +-
 6 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 0d03f837a5d4e..2eb3211c81677 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -211,7 +211,7 @@ int pamu_config_ppaace(int liodn, u32 omi, u32 stashid, int prot)
 		ppaace->op_encode.index_ot.omi = omi;
 	} else if (~omi != 0) {
 		pr_debug("bad operation mapping index: %d\n", omi);
-		return -EINVAL;
+		return -ENODEV;
 	}
 
 	/* configure stash id */
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
index fa20f4b03e12d..4408ac3c49b61 100644
--- a/drivers/iommu/fsl_pamu_domain.c
+++ b/drivers/iommu/fsl_pamu_domain.c
@@ -258,7 +258,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
 	liodn = of_get_property(dev->of_node, "fsl,liodn", &len);
 	if (!liodn) {
 		pr_debug("missing fsl,liodn property at %pOF\n", dev->of_node);
-		return -EINVAL;
+		return -ENODEV;
 	}
 
 	spin_lock_irqsave(&dma_domain->domain_lock, flags);
@@ -267,7 +267,7 @@ static int fsl_pamu_attach_device(struct iommu_domain *domain,
 		if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
 			pr_debug("Invalid liodn %d, attach device failed for %pOF\n",
 				 liodn[i], dev->of_node);
-			ret = -EINVAL;
+			ret = -ENODEV;
 			break;
 		}
 
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index c30ddac40ee5f..95d73f19ab619 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -101,8 +101,10 @@ int intel_pasid_alloc_table(struct device *dev)
 
 	might_sleep();
 	info = dev_iommu_priv_get(dev);
-	if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
-		return -EINVAL;
+	if (WARN_ON(!info || !dev_is_pci(dev)))
+		return -ENODEV;
+	if (WARN_ON(info->pasid_table))
+		return -EEXIST;
 
 	pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
 	if (!pasid_table)
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 2ab2ecfe01f80..eda441d0c6b68 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -609,7 +609,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom,
 	dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
 	if (!dom->iop) {
 		dev_err(data->dev, "Failed to alloc io pgtable\n");
-		return -EINVAL;
+		return -ENOMEM;
 	}
 
 	/* Update our support page sizes bitmap */
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 07ee2600113c2..3f153f9e0ac59 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1414,7 +1414,7 @@ static int omap_iommu_attach_init(struct device *dev,
 
 	odomain->num_iommus = omap_iommu_count(dev);
 	if (!odomain->num_iommus)
-		return -EINVAL;
+		return -ENODEV;
 
 	odomain->iommus = kcalloc(odomain->num_iommus, sizeof(*iommu),
 				  GFP_ATOMIC);
@@ -1464,7 +1464,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 
 	if (!arch_data || !arch_data->iommu_dev) {
 		dev_err(dev, "device doesn't have an associated iommu\n");
-		return -EINVAL;
+		return -ENODEV;
 	}
 
 	spin_lock(&omap_domain->lock);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 8b1b5c270e502..0b64e7f64e68c 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -670,7 +670,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
 		dev_err(vdev->dev,
 			"granule 0x%lx larger than system page size 0x%lx\n",
 			viommu_page_size, PAGE_SIZE);
-		return -EINVAL;
+		return -ENODEV;
 	}
 
 	ret = ida_alloc_range(&viommu->domain_ids, viommu->first_domain,
-- 
GitLab


From f4a14773579302e5f0c4bf80b03f0db7ce67f2ce Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 17 Oct 2022 16:02:21 -0700
Subject: [PATCH 0740/4122] iommu: Use EINVAL for incompatible device/domain in
 ->attach_dev

Following the new rules in include/linux/iommu.h kdocs, update all drivers
->attach_dev callback functions to return EINVAL in the failure paths that
are related to domain incompatibility.

Also, drop adjacent error prints to prevent a kernel log spam.

Link: https://lore.kernel.org/r/f52a07f7320da94afe575c9631340d0019a203a7.1666042873.git.nicolinc@nvidia.com
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 11 +----------
 drivers/iommu/arm/arm-smmu/arm-smmu.c       |  3 ---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c     |  7 +------
 drivers/iommu/intel/iommu.c                 | 10 +++-------
 drivers/iommu/ipmmu-vmsa.c                  |  2 --
 drivers/iommu/omap-iommu.c                  |  2 +-
 drivers/iommu/sprd-iommu.c                  |  4 +---
 drivers/iommu/tegra-gart.c                  |  2 +-
 drivers/iommu/virtio-iommu.c                |  3 +--
 9 files changed, 9 insertions(+), 35 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6d5df91c5c465..8b0a1e476d44d 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2430,23 +2430,14 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 			goto out_unlock;
 		}
 	} else if (smmu_domain->smmu != smmu) {
-		dev_err(dev,
-			"cannot attach to SMMU %s (upstream of %s)\n",
-			dev_name(smmu_domain->smmu->dev),
-			dev_name(smmu->dev));
-		ret = -ENXIO;
+		ret = -EINVAL;
 		goto out_unlock;
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
 		   master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
-		dev_err(dev,
-			"cannot attach to incompatible domain (%u SSID bits != %u)\n",
-			smmu_domain->s1_cfg.s1cdmax, master->ssid_bits);
 		ret = -EINVAL;
 		goto out_unlock;
 	} else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
 		   smmu_domain->stall_enabled != master->stall_enabled) {
-		dev_err(dev, "cannot attach to stall-%s domain\n",
-			smmu_domain->stall_enabled ? "enabled" : "disabled");
 		ret = -EINVAL;
 		goto out_unlock;
 	}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c
index 30dab1418e3ff..719fbca1fe52a 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c
@@ -1150,9 +1150,6 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	 * different SMMUs.
 	 */
 	if (smmu_domain->smmu != smmu) {
-		dev_err(dev,
-			"cannot attach to SMMU %s whilst already attached to domain on SMMU %s\n",
-			dev_name(smmu_domain->smmu->dev), dev_name(smmu->dev));
 		ret = -EINVAL;
 		goto rpm_put;
 	}
diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index 3869c3ecda8cd..bfd7b51eb5dbf 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -381,13 +381,8 @@ static int qcom_iommu_attach_dev(struct iommu_domain *domain, struct device *dev
 	 * Sanity check the domain. We don't support domains across
 	 * different IOMMUs.
 	 */
-	if (qcom_domain->iommu != qcom_iommu) {
-		dev_err(dev, "cannot attach to IOMMU %s while already "
-			"attached to domain on IOMMU %s\n",
-			dev_name(qcom_domain->iommu->dev),
-			dev_name(qcom_iommu->dev));
+	if (qcom_domain->iommu != qcom_iommu)
 		return -EINVAL;
-	}
 
 	return 0;
 }
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 48cdcd0a5cf34..6f1a59206d2e0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4194,19 +4194,15 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
 		return -ENODEV;
 
 	if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
-		return -EOPNOTSUPP;
+		return -EINVAL;
 
 	/* check if this iommu agaw is sufficient for max mapped address */
 	addr_width = agaw_to_width(iommu->agaw);
 	if (addr_width > cap_mgaw(iommu->cap))
 		addr_width = cap_mgaw(iommu->cap);
 
-	if (dmar_domain->max_addr > (1LL << addr_width)) {
-		dev_err(dev, "%s: iommu width (%d) is not "
-		        "sufficient for the mapped address (%llx)\n",
-		        __func__, addr_width, dmar_domain->max_addr);
-		return -EFAULT;
-	}
+	if (dmar_domain->max_addr > (1LL << addr_width))
+		return -EINVAL;
 	dmar_domain->gaw = addr_width;
 
 	/*
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 3b30c0752274f..22230cc15dcd1 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -628,8 +628,6 @@ static int ipmmu_attach_device(struct iommu_domain *io_domain,
 		 * Something is wrong, we can't attach two devices using
 		 * different IOMMUs to the same domain.
 		 */
-		dev_err(dev, "Can't attach IPMMU %s to domain on IPMMU %s\n",
-			dev_name(mmu->dev), dev_name(domain->mmu->dev));
 		ret = -EINVAL;
 	} else
 		dev_info(dev, "Reusing IPMMU context %u\n", domain->context_id);
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 3f153f9e0ac59..2fd7702c67096 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1472,7 +1472,7 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 	/* only a single client device can be attached to a domain */
 	if (omap_domain->dev) {
 		dev_err(dev, "iommu domain is already attached\n");
-		ret = -EBUSY;
+		ret = -EINVAL;
 		goto out;
 	}
 
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index fadd2c907222b..e027933755989 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -237,10 +237,8 @@ static int sprd_iommu_attach_device(struct iommu_domain *domain,
 	struct sprd_iommu_domain *dom = to_sprd_domain(domain);
 	size_t pgt_size = sprd_iommu_pgt_size(domain);
 
-	if (dom->sdev) {
-		pr_err("There's already a device attached to this domain.\n");
+	if (dom->sdev)
 		return -EINVAL;
-	}
 
 	dom->pgt_va = dma_alloc_coherent(sdev->dev, pgt_size, &dom->pgt_pa, GFP_KERNEL);
 	if (!dom->pgt_va)
diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c
index e5ca3cf1a9496..ed53279d1106c 100644
--- a/drivers/iommu/tegra-gart.c
+++ b/drivers/iommu/tegra-gart.c
@@ -112,7 +112,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain,
 	spin_lock(&gart->dom_lock);
 
 	if (gart->active_domain && gart->active_domain != domain) {
-		ret = -EBUSY;
+		ret = -EINVAL;
 	} else if (dev_iommu_priv_get(dev) != domain) {
 		dev_iommu_priv_set(dev, domain);
 		gart->active_domain = domain;
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 0b64e7f64e68c..8226b4da43507 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -734,8 +734,7 @@ static int viommu_attach_dev(struct iommu_domain *domain, struct device *dev)
 		 */
 		ret = viommu_domain_finalise(vdev, domain);
 	} else if (vdomain->viommu != vdev->viommu) {
-		dev_err(dev, "cannot attach to foreign vIOMMU\n");
-		ret = -EXDEV;
+		ret = -EINVAL;
 	}
 	mutex_unlock(&vdomain->mutex);
 
-- 
GitLab


From 04cee82e04d2aff3d177ef0021ecdff228daf7b8 Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicolinc@nvidia.com>
Date: Mon, 17 Oct 2022 16:02:36 -0700
Subject: [PATCH 0741/4122] iommu: Propagate return value in ->attach_dev
 callback functions

The mtk_iommu and virtio drivers have places in the ->attach_dev callback
functions that return hardcode errnos instead of the returned values, but
callers of these ->attach_dv callback functions may care. Propagate them
directly without the extra conversions.

Link: https://lore.kernel.org/r/ca8c5a447b87002334f83325f28823008b4ce420.1666042873.git.nicolinc@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/mtk_iommu.c    | 2 +-
 drivers/iommu/virtio-iommu.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index eda441d0c6b68..b383c8327f9cb 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -668,7 +668,7 @@ static int mtk_iommu_attach_device(struct iommu_domain *domain,
 		ret = mtk_iommu_domain_finalise(dom, frstdata, region_id);
 		if (ret) {
 			mutex_unlock(&dom->mutex);
-			return -ENODEV;
+			return ret;
 		}
 		dom->bank = &data->bank[bankid];
 	}
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 8226b4da43507..5b8fe9bfa9a5b 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -697,7 +697,7 @@ static int viommu_domain_finalise(struct viommu_endpoint *vdev,
 		if (ret) {
 			ida_free(&viommu->domain_ids, vdomain->id);
 			vdomain->viommu = NULL;
-			return -EOPNOTSUPP;
+			return ret;
 		}
 	}
 
-- 
GitLab


From 91586ce0d39a05f88795aa8814fb99b1387236b3 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 21 Oct 2022 10:34:22 +0800
Subject: [PATCH 0742/4122] f2fs: fix to invalidate dcc->f2fs_issue_discard in
 error path

Syzbot reports a NULL pointer dereference issue as below:

 __refcount_add include/linux/refcount.h:193 [inline]
 __refcount_inc include/linux/refcount.h:250 [inline]
 refcount_inc include/linux/refcount.h:267 [inline]
 get_task_struct include/linux/sched/task.h:110 [inline]
 kthread_stop+0x34/0x1c0 kernel/kthread.c:703
 f2fs_stop_discard_thread+0x3c/0x5c fs/f2fs/segment.c:1638
 kill_f2fs_super+0x5c/0x194 fs/f2fs/super.c:4522
 deactivate_locked_super+0x70/0xe8 fs/super.c:332
 deactivate_super+0xd0/0xd4 fs/super.c:363
 cleanup_mnt+0x1f8/0x234 fs/namespace.c:1186
 __cleanup_mnt+0x20/0x30 fs/namespace.c:1193
 task_work_run+0xc4/0x14c kernel/task_work.c:177
 exit_task_work include/linux/task_work.h:38 [inline]
 do_exit+0x26c/0xbe0 kernel/exit.c:795
 do_group_exit+0x60/0xe8 kernel/exit.c:925
 __do_sys_exit_group kernel/exit.c:936 [inline]
 __se_sys_exit_group kernel/exit.c:934 [inline]
 __wake_up_parent+0x0/0x40 kernel/exit.c:934
 __invoke_syscall arch/arm64/kernel/syscall.c:38 [inline]
 invoke_syscall arch/arm64/kernel/syscall.c:52 [inline]
 el0_svc_common+0x138/0x220 arch/arm64/kernel/syscall.c:142
 do_el0_svc+0x48/0x164 arch/arm64/kernel/syscall.c:206
 el0_svc+0x58/0x150 arch/arm64/kernel/entry-common.c:636
 el0t_64_sync_handler+0x84/0xf0 arch/arm64/kernel/entry-common.c:654
 el0t_64_sync+0x18c/0x190 arch/arm64/kernel/entry.S:581

The root cause of this issue is in error path of f2fs_start_discard_thread(),
it missed to invalidate dcc->f2fs_issue_discard, later kthread_stop() may
access invalid pointer.

Fixes: 4d67490498ac ("f2fs: Don't create discard thread when device doesn't support realtime discard")
Reported-by: syzbot+035a381ea1afb63f098d@syzkaller.appspotmail.com
Reported-by: syzbot+729c925c2d9fc495ddee@syzkaller.appspotmail.com
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index acf3d3fa43635..7a4f7c88b8b9f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2025,8 +2025,10 @@ int f2fs_start_discard_thread(struct f2fs_sb_info *sbi)
 
 	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
 				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
-	if (IS_ERR(dcc->f2fs_issue_discard))
+	if (IS_ERR(dcc->f2fs_issue_discard)) {
 		err = PTR_ERR(dcc->f2fs_issue_discard);
+		dcc->f2fs_issue_discard = NULL;
+	}
 
 	return err;
 }
-- 
GitLab


From 18792e64c86dd7e34ba28e4f61faba472b7bf5fc Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Thu, 6 Oct 2022 23:09:28 +0800
Subject: [PATCH 0743/4122] f2fs: support fault injection for
 f2fs_is_valid_blkaddr()

This patch supports to inject fault into f2fs_is_valid_blkaddr() to
simulate accessing inconsistent data/meta block addressses from caller.

Usage:
a) echo 262144 > /sys/fs/f2fs/<dev>/inject_type or
b) mount -o fault_type=262144 <dev> <mountpoint>

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.rst | 1 +
 fs/f2fs/checkpoint.c               | 5 +++++
 fs/f2fs/f2fs.h                     | 1 +
 fs/f2fs/super.c                    | 1 +
 4 files changed, 8 insertions(+)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 17df9a02ccff1..b797e8ec96ede 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -199,6 +199,7 @@ fault_type=%d		 Support configuring fault injection type, should be
 			 FAULT_SLAB_ALLOC	  0x000008000
 			 FAULT_DQUOT_INIT	  0x000010000
 			 FAULT_LOCK_OP		  0x000020000
+			 FAULT_BLKADDR		  0x000040000
 			 ===================	  ===========
 mode=%s			 Control block allocation mode which supports "adaptive"
 			 and "lfs". In "lfs" mode, there should be no random
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 0c82dae082aa9..c00694a502227 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -171,6 +171,11 @@ static bool __is_bitmap_valid(struct f2fs_sb_info *sbi, block_t blkaddr,
 bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
 					block_t blkaddr, int type)
 {
+	if (time_to_inject(sbi, FAULT_BLKADDR)) {
+		f2fs_show_injection_info(sbi, FAULT_BLKADDR);
+		return false;
+	}
+
 	switch (type) {
 	case META_NAT:
 		break;
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e6355a5683b75..f57cb49dc3830 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -60,6 +60,7 @@ enum {
 	FAULT_SLAB_ALLOC,
 	FAULT_DQUOT_INIT,
 	FAULT_LOCK_OP,
+	FAULT_BLKADDR,
 	FAULT_MAX,
 };
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 3834ead046200..df26fbe2bf586 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -61,6 +61,7 @@ const char *f2fs_fault_name[FAULT_MAX] = {
 	[FAULT_SLAB_ALLOC]	= "slab alloc",
 	[FAULT_DQUOT_INIT]	= "dquot initialize",
 	[FAULT_LOCK_OP]		= "lock_op",
+	[FAULT_BLKADDR]		= "invalid blkaddr",
 };
 
 void f2fs_build_fault_attr(struct f2fs_sb_info *sbi, unsigned int rate,
-- 
GitLab


From 3688cbe39b7a9ef3feb73234fb351de33fd1da52 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 11:08:31 +0800
Subject: [PATCH 0744/4122] f2fs: remove batched_trim_sections node

commit 377224c47118("f2fs: don't split checkpoint in fstrim") obsolete
batch mode and related sysfs entry.

Since this testing sysfs node has been deprecated for a long time, let's
remove it.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h  | 3 ---
 fs/f2fs/sysfs.c | 5 -----
 2 files changed, 8 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f57cb49dc3830..e990870bdab92 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1063,9 +1063,6 @@ struct f2fs_sm_info {
 	/* a threshold to reclaim prefree segments */
 	unsigned int rec_prefree_segments;
 
-	/* for batched trimming */
-	unsigned int trim_sections;		/* # of sections to trim */
-
 	struct list_head sit_entry_set;	/* sit entry set list */
 
 	unsigned int ipu_policy;	/* in-place-update policy */
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index df27afd71ef48..926b7a844362e 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -488,9 +488,6 @@ out:
 			return -EINVAL;
 	}
 
-	if (!strcmp(a->attr.name, "trim_sections"))
-		return -EINVAL;
-
 	if (!strcmp(a->attr.name, "gc_urgent")) {
 		if (t == 0) {
 			sbi->gc_mode = GC_NORMAL;
@@ -790,7 +787,6 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
-F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
@@ -919,7 +915,6 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(max_discard_issue_time),
 	ATTR_LIST(discard_granularity),
 	ATTR_LIST(pending_discard),
-	ATTR_LIST(batched_trim_sections),
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
-- 
GitLab


From 6359a1aaca527311b7145ec6eb16890a5ddf5214 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 14:50:24 +0800
Subject: [PATCH 0745/4122] f2fs: fix gc mode when gc_urgent_high_remaining is
 1

Under the current logic, when gc_urgent_high_remaining is set to 1,
the mode will be switched to normal at the beginning, instead of
running in gc_urgent mode.

Let's switch the gc mode back to normal when the gc ends.

Fixes: 265576181b4a ("f2fs: remove gc_urgent_high_limited for cleanup")
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 7b4be412cec06..d2e9c280773f0 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -96,16 +96,6 @@ static int gc_thread_func(void *data)
 		 * invalidated soon after by user update or deletion.
 		 * So, I'd like to wait some time to collect dirty segments.
 		 */
-		if (sbi->gc_mode == GC_URGENT_HIGH) {
-			spin_lock(&sbi->gc_urgent_high_lock);
-			if (sbi->gc_urgent_high_remaining) {
-				sbi->gc_urgent_high_remaining--;
-				if (!sbi->gc_urgent_high_remaining)
-					sbi->gc_mode = GC_NORMAL;
-			}
-			spin_unlock(&sbi->gc_urgent_high_lock);
-		}
-
 		if (sbi->gc_mode == GC_URGENT_HIGH ||
 				sbi->gc_mode == GC_URGENT_MID) {
 			wait_ms = gc_th->urgent_sleep_time;
@@ -162,6 +152,15 @@ do_gc:
 		/* balancing f2fs's metadata periodically */
 		f2fs_balance_fs_bg(sbi, true);
 next:
+		if (sbi->gc_mode == GC_URGENT_HIGH) {
+			spin_lock(&sbi->gc_urgent_high_lock);
+			if (sbi->gc_urgent_high_remaining) {
+				sbi->gc_urgent_high_remaining--;
+				if (!sbi->gc_urgent_high_remaining)
+					sbi->gc_mode = GC_NORMAL;
+			}
+			spin_unlock(&sbi->gc_urgent_high_lock);
+		}
 		sb_end_write(sbi->sb);
 
 	} while (!kthread_should_stop());
-- 
GitLab


From 44b9d01f2ee32884a7de270394b0fb7f75f87dba Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 16:05:26 +0800
Subject: [PATCH 0746/4122] f2fs: cleanup in f2fs_create_flush_cmd_control()

Just cleanup for readable, no functional changes.

Suggested-by: Chao Yu <chao@kernel.org>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 7a4f7c88b8b9f..0df47ad80efba 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -620,12 +620,12 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
 {
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct flush_cmd_control *fcc;
-	int err = 0;
+	int err;
 
 	if (SM_I(sbi)->fcc_info) {
 		fcc = SM_I(sbi)->fcc_info;
 		if (fcc->f2fs_issue_flush)
-			return err;
+			return 0;
 		goto init_thread;
 	}
 
@@ -638,7 +638,7 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
 	init_llist_head(&fcc->issue_list);
 	SM_I(sbi)->fcc_info = fcc;
 	if (!test_opt(sbi, FLUSH_MERGE))
-		return err;
+		return 0;
 
 init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
@@ -650,7 +650,7 @@ init_thread:
 		return err;
 	}
 
-	return err;
+	return 0;
 }
 
 void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
-- 
GitLab


From b5f1a218ae5e4339130d6e733f0e63d623e09a2c Mon Sep 17 00:00:00 2001
From: Dongdong Zhang <zhangdongdong1@oppo.com>
Date: Tue, 25 Oct 2022 17:40:36 +0800
Subject: [PATCH 0747/4122] f2fs: fix normal discard process

In the DPOLICY_BG mode, there is a conflict between
the two conditions "i + 1 < dpolicy->granularity" and
"i < DEFAULT_DISCARD_GRANULARITY". If i = 15, the first
condition is false, it will enter the second condition
and dispatch all small granularity discards in function
 __issue_discard_cmd_orderly. The restrictive effect
of the first condition to small discards will be
invalidated. These two conditions should align.

Fixes: 20ee4382322c ("f2fs: issue small discard by LBA order")
Signed-off-by: Dongdong Zhang <zhangdongdong1@oppo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0df47ad80efba..38f6a2bcb158b 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1448,7 +1448,7 @@ retry:
 		if (i + 1 < dpolicy->granularity)
 			break;
 
-		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+		if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
 			return __issue_discard_cmd_orderly(sbi, dpolicy);
 
 		pend_list = &dcc->pend_list[i];
-- 
GitLab


From 6047de5482c33d5f912cdc907336fde9ebc5714e Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 01:54:01 +0800
Subject: [PATCH 0748/4122] f2fs: add barrier mount option

This patch adds a mount option, barrier, in f2fs.
The barrier option is the opposite of nobarrier.
If this option is set, cache_flush commands are allowed to be issued.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.rst | 2 ++
 fs/f2fs/super.c                    | 7 +++++++
 2 files changed, 9 insertions(+)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index b797e8ec96ede..6e67c5e6c7c37 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -154,6 +154,8 @@ nobarrier		 This option can be used if underlying storage guarantees
 			 If this option is set, no cache_flush commands are issued
 			 but f2fs still guarantees the write ordering of all the
 			 data writes.
+barrier		 If this option is set, cache_flush commands are allowed to be
+			 issued.
 fastboot		 This option is used when a system wants to reduce mount
 			 time as much as possible, even though normal performance
 			 can be sacrificed.
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index df26fbe2bf586..a247027711d89 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -111,6 +111,7 @@ enum {
 	Opt_noinline_dentry,
 	Opt_flush_merge,
 	Opt_noflush_merge,
+	Opt_barrier,
 	Opt_nobarrier,
 	Opt_fastboot,
 	Opt_extent_cache,
@@ -187,6 +188,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_noinline_dentry, "noinline_dentry"},
 	{Opt_flush_merge, "flush_merge"},
 	{Opt_noflush_merge, "noflush_merge"},
+	{Opt_barrier, "barrier"},
 	{Opt_nobarrier, "nobarrier"},
 	{Opt_fastboot, "fastboot"},
 	{Opt_extent_cache, "extent_cache"},
@@ -807,6 +809,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 		case Opt_nobarrier:
 			set_opt(sbi, NOBARRIER);
 			break;
+		case Opt_barrier:
+			clear_opt(sbi, NOBARRIER);
+			break;
 		case Opt_fastboot:
 			set_opt(sbi, FASTBOOT);
 			break;
@@ -1940,6 +1945,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",flush_merge");
 	if (test_opt(sbi, NOBARRIER))
 		seq_puts(seq, ",nobarrier");
+	else
+		seq_puts(seq, ",barrier");
 	if (test_opt(sbi, FASTBOOT))
 		seq_puts(seq, ",fastboot");
 	if (test_opt(sbi, EXTENT_CACHE))
-- 
GitLab


From a995627e6dd81d4485d40ce64880017a080d71e6 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Mon, 24 Oct 2022 16:00:35 -0700
Subject: [PATCH 0749/4122] f2fs: allow to set compression for inlined file

The below commit disallows to set compression on empty created file which
has a inline_data. Let's fix it.

Fixes: 7165841d578e ("f2fs: fix to check inline_data during compressed inode conversion")
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 82cda12582272..f96bbfa8b3991 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1915,6 +1915,10 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
 			if (!f2fs_disable_compressed_file(inode))
 				return -EINVAL;
 		} else {
+			/* try to convert inline_data to support compression */
+			int err = f2fs_convert_inline_inode(inode);
+			if (err)
+				return err;
 			if (!f2fs_may_compress(inode))
 				return -EINVAL;
 			if (S_ISREG(inode->i_mode) && F2FS_HAS_BLOCKS(inode))
-- 
GitLab


From c46867e9b9b8e0cdd6a5212c2b5ae616583a3bfd Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 16:32:26 +0800
Subject: [PATCH 0750/4122] f2fs: introduce max_ordered_discard sysfs node

The current max_ordered_discard is a fixed value, change it to be
configurable through the sys node.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 fs/f2fs/f2fs.h                          |  3 +++
 fs/f2fs/segment.c                       |  3 ++-
 fs/f2fs/sysfs.c                         | 11 +++++++++++
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 483639fb727b2..53f70eadec96a 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -99,6 +99,12 @@ Description:	Controls the issue rate of discard commands that consist of small
 		checkpoint is triggered, and issued during the checkpoint.
 		By default, it is disabled with 0.
 
+What:		/sys/fs/f2fs/<disk>/max_ordered_discard
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Controls the maximum ordered discard, the unit size is one block(4KB).
+		Set it to 16 by default.
+
 What:		/sys/fs/f2fs/<disk>/max_discard_request
 Date:		December 2021
 Contact:	"Konstantin Vyshetsky" <vkon@google.com>
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index e990870bdab92..fa8dc00dfb2b8 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -331,6 +331,8 @@ struct discard_entry {
 
 /* default discard granularity of inner discard thread, unit: block count */
 #define DEFAULT_DISCARD_GRANULARITY		16
+/* default maximum discard granularity of ordered discard, unit: block count */
+#define DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY	16
 
 /* max discard pend list number */
 #define MAX_PLIST_NUM		512
@@ -410,6 +412,7 @@ struct discard_cmd_control {
 	unsigned int mid_discard_issue_time;	/* mid. interval between discard issue */
 	unsigned int max_discard_issue_time;	/* max. interval between discard issue */
 	unsigned int discard_granularity;	/* discard granularity */
+	unsigned int max_ordered_discard;	/* maximum discard granularity issued by lba order */
 	unsigned int undiscard_blks;		/* # of undiscard blocks */
 	unsigned int next_pos;			/* next discard position */
 	atomic_t issued_discard;		/* # of issued discard */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 38f6a2bcb158b..c470b443615f1 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1448,7 +1448,7 @@ retry:
 		if (i + 1 < dpolicy->granularity)
 			break;
 
-		if (i + 1 < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
+		if (i + 1 < dcc->max_ordered_discard && dpolicy->ordered)
 			return __issue_discard_cmd_orderly(sbi, dpolicy);
 
 		pend_list = &dcc->pend_list[i];
@@ -2048,6 +2048,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 
 	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
+	dcc->max_ordered_discard = DEFAULT_MAX_ORDERED_DISCARD_GRANULARITY;
 	if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SEGMENT)
 		dcc->discard_granularity = sbi->blocks_per_seg;
 	else if (F2FS_OPTION(sbi).discard_unit == DISCARD_UNIT_SECTION)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 926b7a844362e..8095345ebdad5 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -483,6 +483,15 @@ out:
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "max_ordered_discard")) {
+		if (t == 0 || t > MAX_PLIST_NUM)
+			return -EINVAL;
+		if (!f2fs_block_unit_discard(sbi))
+			return -EINVAL;
+		*ui = t;
+		return count;
+	}
+
 	if (!strcmp(a->attr.name, "migration_granularity")) {
 		if (t == 0 || t > sbi->segs_per_sec)
 			return -EINVAL;
@@ -786,6 +795,7 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy);
 F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
@@ -914,6 +924,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(mid_discard_issue_time),
 	ATTR_LIST(max_discard_issue_time),
 	ATTR_LIST(discard_granularity),
+	ATTR_LIST(max_ordered_discard),
 	ATTR_LIST(pending_discard),
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
-- 
GitLab


From a5029a57a2f3f2e2711f4ac2d876c3c83d1758fe Mon Sep 17 00:00:00 2001
From: Keoseong Park <keosung.park@samsung.com>
Date: Thu, 27 Oct 2022 20:01:05 +0900
Subject: [PATCH 0751/4122] f2fs: Fix typo in comments

Change "truncateion" to "truncation".

Signed-off-by: Keoseong Park <keosung.park@samsung.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f96bbfa8b3991..c605a4f2bce21 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -571,7 +571,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 	raw_node = F2FS_NODE(dn->node_page);
 	addr = blkaddr_in_node(raw_node) + base + ofs;
 
-	/* Assumption: truncateion starts with cluster */
+	/* Assumption: truncation starts with cluster */
 	for (; count > 0; count--, addr++, dn->ofs_in_node++, cluster_index++) {
 		block_t blkaddr = le32_to_cpu(*addr);
 
-- 
GitLab


From 0db18eec0d9a7ee525209e31e3ac2f673545b12f Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <quic_mojha@quicinc.com>
Date: Thu, 27 Oct 2022 14:42:40 +0530
Subject: [PATCH 0752/4122] f2fs: fix the assign logic of iocb

commit 18ae8d12991b ("f2fs: show more DIO information in tracepoint")
introduces iocb field in 'f2fs_direct_IO_enter' trace event
And it only assigns the pointer and later it accesses its field
in trace print log.

Unable to handle kernel paging request at virtual address ffffffc04cef3d30
Mem abort info:
ESR = 0x96000007
EC = 0x25: DABT (current EL), IL = 32 bits

 pc : trace_raw_output_f2fs_direct_IO_enter+0x54/0xa4
 lr : trace_raw_output_f2fs_direct_IO_enter+0x2c/0xa4
 sp : ffffffc0443cbbd0
 x29: ffffffc0443cbbf0 x28: ffffff8935b120d0 x27: ffffff8935b12108
 x26: ffffff8935b120f0 x25: ffffff8935b12100 x24: ffffff8935b110c0
 x23: ffffff8935b10000 x22: ffffff88859a936c x21: ffffff88859a936c
 x20: ffffff8935b110c0 x19: ffffff8935b10000 x18: ffffffc03b195060
 x17: ffffff8935b11e76 x16: 00000000000000cc x15: ffffffef855c4f2c
 x14: 0000000000000001 x13: 000000000000004e x12: ffff0000ffffff00
 x11: ffffffef86c350d0 x10: 00000000000010c0 x9 : 000000000fe0002c
 x8 : ffffffc04cef3d28 x7 : 7f7f7f7f7f7f7f7f x6 : 0000000002000000
 x5 : ffffff8935b11e9a x4 : 0000000000006250 x3 : ffff0a00ffffff04
 x2 : 0000000000000002 x1 : ffffffef86a0a31f x0 : ffffff8935b10000
 Call trace:
  trace_raw_output_f2fs_direct_IO_enter+0x54/0xa4
  print_trace_fmt+0x9c/0x138
  print_trace_line+0x154/0x254
  tracing_read_pipe+0x21c/0x380
  vfs_read+0x108/0x3ac
  ksys_read+0x7c/0xec
  __arm64_sys_read+0x20/0x30
  invoke_syscall+0x60/0x150
  el0_svc_common.llvm.1237943816091755067+0xb8/0xf8
  do_el0_svc+0x28/0xa0

Fix it by copying the required variables for printing and while at
it fix the similar issue at some other places in the same file.

Fixes: bd984c03097b ("f2fs: show more DIO information in tracepoint")
Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index c6b372401c278..ff57e7f9914cc 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -322,7 +322,7 @@ TRACE_EVENT(f2fs_unlink_enter,
 		__field(ino_t,	ino)
 		__field(loff_t,	size)
 		__field(blkcnt_t, blocks)
-		__field(const char *,	name)
+		__string(name,  dentry->d_name.name)
 	),
 
 	TP_fast_assign(
@@ -330,7 +330,7 @@ TRACE_EVENT(f2fs_unlink_enter,
 		__entry->ino	= dir->i_ino;
 		__entry->size	= dir->i_size;
 		__entry->blocks	= dir->i_blocks;
-		__entry->name	= dentry->d_name.name;
+		__assign_str(name, dentry->d_name.name);
 	),
 
 	TP_printk("dev = (%d,%d), dir ino = %lu, i_size = %lld, "
@@ -338,7 +338,7 @@ TRACE_EVENT(f2fs_unlink_enter,
 		show_dev_ino(__entry),
 		__entry->size,
 		(unsigned long long)__entry->blocks,
-		__entry->name)
+		__get_str(name))
 );
 
 DEFINE_EVENT(f2fs__inode_exit, f2fs_unlink_exit,
@@ -940,25 +940,29 @@ TRACE_EVENT(f2fs_direct_IO_enter,
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
-		__field(struct kiocb *,	iocb)
+		__field(loff_t,	ki_pos)
+		__field(int,	ki_flags)
+		__field(u16,	ki_ioprio)
 		__field(unsigned long,	len)
 		__field(int,	rw)
 	),
 
 	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->iocb	= iocb;
-		__entry->len	= len;
-		__entry->rw	= rw;
+		__entry->dev		= inode->i_sb->s_dev;
+		__entry->ino		= inode->i_ino;
+		__entry->ki_pos		= iocb->ki_pos;
+		__entry->ki_flags	= iocb->ki_flags;
+		__entry->ki_ioprio	= iocb->ki_ioprio;
+		__entry->len		= len;
+		__entry->rw		= rw;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu pos = %lld len = %lu ki_flags = %x ki_ioprio = %x rw = %d",
 		show_dev_ino(__entry),
-		__entry->iocb->ki_pos,
+		__entry->ki_pos,
 		__entry->len,
-		__entry->iocb->ki_flags,
-		__entry->iocb->ki_ioprio,
+		__entry->ki_flags,
+		__entry->ki_ioprio,
 		__entry->rw)
 );
 
@@ -1407,19 +1411,19 @@ TRACE_EVENT(f2fs_write_checkpoint,
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(int,	reason)
-		__field(char *,	msg)
+		__string(dest_msg, msg)
 	),
 
 	TP_fast_assign(
 		__entry->dev		= sb->s_dev;
 		__entry->reason		= reason;
-		__entry->msg		= msg;
+		__assign_str(dest_msg, msg);
 	),
 
 	TP_printk("dev = (%d,%d), checkpoint for %s, state = %s",
 		show_dev(__entry->dev),
 		show_cpreason(__entry->reason),
-		__entry->msg)
+		__get_str(dest_msg))
 );
 
 DECLARE_EVENT_CLASS(f2fs_discard,
-- 
GitLab


From 195623f2d8e9361eaddec071ad298998ec0590ba Mon Sep 17 00:00:00 2001
From: Mukesh Ojha <quic_mojha@quicinc.com>
Date: Thu, 27 Oct 2022 14:42:41 +0530
Subject: [PATCH 0753/4122] f2fs: fix the msg data type

Data type of msg in f2fs_write_checkpoint trace should
be const char * instead of char *.

Signed-off-by: Mukesh Ojha <quic_mojha@quicinc.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/trace/events/f2fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index ff57e7f9914cc..7fbfce498472f 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -1404,7 +1404,7 @@ TRACE_EVENT(f2fs_readpages,
 
 TRACE_EVENT(f2fs_write_checkpoint,
 
-	TP_PROTO(struct super_block *sb, int reason, char *msg),
+	TP_PROTO(struct super_block *sb, int reason, const char *msg),
 
 	TP_ARGS(sb, reason, msg),
 
-- 
GitLab


From 146dbcbf17a6d07169e75224d949cc2670de2e20 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 27 Oct 2022 18:24:46 +0800
Subject: [PATCH 0754/4122] f2fs: fix return val in f2fs_start_ckpt_thread()

Return PTR_ERR(cprc->f2fs_issue_ckpt) instead of -ENOMEM;

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c |  4 +++-
 fs/f2fs/gc.c         | 15 +++++++--------
 fs/f2fs/segment.c    |  4 ++--
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index c00694a502227..56f7d0d6a8b2f 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -1902,8 +1902,10 @@ int f2fs_start_ckpt_thread(struct f2fs_sb_info *sbi)
 	cprc->f2fs_issue_ckpt = kthread_run(issue_checkpoint_thread, sbi,
 			"f2fs_ckpt-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(cprc->f2fs_issue_ckpt)) {
+		int err = PTR_ERR(cprc->f2fs_issue_ckpt);
+
 		cprc->f2fs_issue_ckpt = NULL;
-		return -ENOMEM;
+		return err;
 	}
 
 	set_task_ioprio(cprc->f2fs_issue_ckpt, cprc->ckpt_thread_ioprio);
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d2e9c280773f0..15f56859966cc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -171,13 +171,10 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 {
 	struct f2fs_gc_kthread *gc_th;
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
-	int err = 0;
 
 	gc_th = f2fs_kmalloc(sbi, sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
-	if (!gc_th) {
-		err = -ENOMEM;
-		goto out;
-	}
+	if (!gc_th)
+		return -ENOMEM;
 
 	gc_th->urgent_sleep_time = DEF_GC_THREAD_URGENT_SLEEP_TIME;
 	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
@@ -192,12 +189,14 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
 	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
 			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(gc_th->f2fs_gc_task)) {
-		err = PTR_ERR(gc_th->f2fs_gc_task);
+		int err = PTR_ERR(gc_th->f2fs_gc_task);
+
 		kfree(gc_th);
 		sbi->gc_thread = NULL;
+		return err;
 	}
-out:
-	return err;
+
+	return 0;
 }
 
 void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c470b443615f1..c4270cd6eaab7 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -620,7 +620,6 @@ int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
 {
 	dev_t dev = sbi->sb->s_bdev->bd_dev;
 	struct flush_cmd_control *fcc;
-	int err;
 
 	if (SM_I(sbi)->fcc_info) {
 		fcc = SM_I(sbi)->fcc_info;
@@ -644,7 +643,8 @@ init_thread:
 	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
 				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
 	if (IS_ERR(fcc->f2fs_issue_flush)) {
-		err = PTR_ERR(fcc->f2fs_issue_flush);
+		int err = PTR_ERR(fcc->f2fs_issue_flush);
+
 		kfree(fcc);
 		SM_I(sbi)->fcc_info = NULL;
 		return err;
-- 
GitLab


From 7b02b2201893a71b881026cf574902019ab00db5 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Fri, 28 Oct 2022 17:30:26 +0800
Subject: [PATCH 0755/4122] f2fs: fix to destroy sbi->post_read_wq in error
 path of f2fs_fill_super()

In error path of f2fs_fill_super(), this patch fixes to call
f2fs_destroy_post_read_wq() once if we fail in f2fs_start_ckpt_thread().

Fixes: 261eeb9c1585 ("f2fs: introduce checkpoint_merge mount option")
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a247027711d89..e6365f040171e 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4531,9 +4531,9 @@ free_nm:
 	f2fs_destroy_node_manager(sbi);
 free_sm:
 	f2fs_destroy_segment_manager(sbi);
-	f2fs_destroy_post_read_wq(sbi);
 stop_ckpt_thread:
 	f2fs_stop_ckpt_thread(sbi);
+	f2fs_destroy_post_read_wq(sbi);
 free_devices:
 	destroy_device_list(sbi);
 	kvfree(sbi->ckpt);
-- 
GitLab


From a3951cd199a5d26138532d4e55af41262237632e Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 11:32:16 +0800
Subject: [PATCH 0756/4122] f2fs: introduce gc_mode sysfs node

Revert "f2fs: make gc_urgent and gc_segment_mode sysfs node readable".

Add a gc_mode sysfs node to show the current gc_mode as a string.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  6 ++++++
 fs/f2fs/f2fs.h                          |  1 +
 fs/f2fs/sysfs.c                         | 15 +++++++++------
 3 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 53f70eadec96a..ef2b3572ba181 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -640,3 +640,9 @@ Date:		July 2022
 Contact:	"Daeho Jeong" <daehojeong@google.com>
 Description:	Show the accumulated total revoked atomic write block count after boot.
 		If you write "0" here, you can initialize to "0".
+
+What:		/sys/fs/f2fs/<disk>/gc_mode
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	Show the current gc_mode as a string.
+		This is a read-only entry.
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index fa8dc00dfb2b8..662b27c19de10 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1319,6 +1319,7 @@ enum {
 	MAX_TIME,
 };
 
+/* Note that you need to keep synchronization with this gc_mode_names array */
 enum {
 	GC_NORMAL,
 	GC_IDLE_CB,
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 8095345ebdad5..1fbd41c483282 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -143,6 +143,12 @@ static ssize_t pending_discard_show(struct f2fs_attr *a,
 				&SM_I(sbi)->dcc_info->discard_cmd_cnt));
 }
 
+static ssize_t gc_mode_show(struct f2fs_attr *a,
+		struct f2fs_sb_info *sbi, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", gc_mode_names[sbi->gc_mode]);
+}
+
 static ssize_t features_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
@@ -332,13 +338,8 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 		return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
 #endif
 
-	if (!strcmp(a->attr.name, "gc_urgent"))
-		return sysfs_emit(buf, "%s\n",
-				gc_mode_names[sbi->gc_mode]);
-
 	if (!strcmp(a->attr.name, "gc_segment_mode"))
-		return sysfs_emit(buf, "%s\n",
-				gc_mode_names[sbi->gc_segment_mode]);
+		return sysfs_emit(buf, "%u\n", sbi->gc_segment_mode);
 
 	if (!strcmp(a->attr.name, "gc_reclaimed_segments")) {
 		return sysfs_emit(buf, "%u\n",
@@ -844,6 +845,7 @@ F2FS_GENERAL_RO_ATTR(encoding);
 F2FS_GENERAL_RO_ATTR(mounted_time_sec);
 F2FS_GENERAL_RO_ATTR(main_blkaddr);
 F2FS_GENERAL_RO_ATTR(pending_discard);
+F2FS_GENERAL_RO_ATTR(gc_mode);
 #ifdef CONFIG_F2FS_STAT_FS
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_foreground_calls, cp_count);
 F2FS_STAT_ATTR(STAT_INFO, f2fs_stat_info, cp_background_calls, bg_cp_count);
@@ -926,6 +928,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(discard_granularity),
 	ATTR_LIST(max_ordered_discard),
 	ATTR_LIST(pending_discard),
+	ATTR_LIST(gc_mode),
 	ATTR_LIST(ipu_policy),
 	ATTR_LIST(min_ipu_util),
 	ATTR_LIST(min_fsync_blocks),
-- 
GitLab


From 23ddc81b087c8bd9a73272afa076e204dc2e5410 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 28 Oct 2022 09:49:53 -0700
Subject: [PATCH 0757/4122] f2fs: use sysfs_emit instead of sprintf

Let's use sysfs_emit.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/sysfs.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 1fbd41c483282..af23ed6121b00 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -95,28 +95,28 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
 static ssize_t dirty_segments_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 			(unsigned long long)(dirty_segments(sbi)));
 }
 
 static ssize_t free_segments_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 			(unsigned long long)(free_segments(sbi)));
 }
 
 static ssize_t ovp_segments_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 			(unsigned long long)(overprovision_segments(sbi)));
 }
 
 static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 			(unsigned long long)(sbi->kbytes_written +
 			((f2fs_get_sectors_written(sbi) -
 				sbi->sectors_written_start) >> 1)));
@@ -125,13 +125,13 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
 static ssize_t sb_status_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%lx\n", sbi->s_flag);
+	return sysfs_emit(buf, "%lx\n", sbi->s_flag);
 }
 
 static ssize_t cp_status_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags));
+	return sysfs_emit(buf, "%x\n", le32_to_cpu(F2FS_CKPT(sbi)->ckpt_flags));
 }
 
 static ssize_t pending_discard_show(struct f2fs_attr *a,
@@ -139,7 +139,7 @@ static ssize_t pending_discard_show(struct f2fs_attr *a,
 {
 	if (!SM_I(sbi)->dcc_info)
 		return -EINVAL;
-	return sprintf(buf, "%llu\n", (unsigned long long)atomic_read(
+	return sysfs_emit(buf, "%llu\n", (unsigned long long)atomic_read(
 				&SM_I(sbi)->dcc_info->discard_cmd_cnt));
 }
 
@@ -205,7 +205,7 @@ static ssize_t features_show(struct f2fs_attr *a,
 static ssize_t current_reserved_blocks_show(struct f2fs_attr *a,
 					struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%u\n", sbi->current_reserved_blocks);
+	return sysfs_emit(buf, "%u\n", sbi->current_reserved_blocks);
 }
 
 static ssize_t unusable_show(struct f2fs_attr *a,
@@ -217,7 +217,7 @@ static ssize_t unusable_show(struct f2fs_attr *a,
 		unusable = sbi->unusable_block_count;
 	else
 		unusable = f2fs_get_unusable_blocks(sbi);
-	return sprintf(buf, "%llu\n", (unsigned long long)unusable);
+	return sysfs_emit(buf, "%llu\n", (unsigned long long)unusable);
 }
 
 static ssize_t encoding_show(struct f2fs_attr *a,
@@ -232,13 +232,13 @@ static ssize_t encoding_show(struct f2fs_attr *a,
 			(sb->s_encoding->version >> 8) & 0xff,
 			sb->s_encoding->version & 0xff);
 #endif
-	return sprintf(buf, "(none)");
+	return sysfs_emit(buf, "(none)");
 }
 
 static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "%llu", SIT_I(sbi)->mounted_time);
+	return sysfs_emit(buf, "%llu", SIT_I(sbi)->mounted_time);
 }
 
 #ifdef CONFIG_F2FS_STAT_FS
@@ -247,7 +247,7 @@ static ssize_t moved_blocks_foreground_show(struct f2fs_attr *a,
 {
 	struct f2fs_stat_info *si = F2FS_STAT(sbi);
 
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 		(unsigned long long)(si->tot_blks -
 			(si->bg_data_blks + si->bg_node_blks)));
 }
@@ -257,7 +257,7 @@ static ssize_t moved_blocks_background_show(struct f2fs_attr *a,
 {
 	struct f2fs_stat_info *si = F2FS_STAT(sbi);
 
-	return sprintf(buf, "%llu\n",
+	return sysfs_emit(buf, "%llu\n",
 		(unsigned long long)(si->bg_data_blks + si->bg_node_blks));
 }
 
@@ -268,7 +268,7 @@ static ssize_t avg_vblocks_show(struct f2fs_attr *a,
 
 	si->dirty_count = dirty_segments(sbi);
 	f2fs_update_sit_info(sbi);
-	return sprintf(buf, "%llu\n", (unsigned long long)(si->avg_vblocks));
+	return sysfs_emit(buf, "%llu\n", (unsigned long long)(si->avg_vblocks));
 }
 #endif
 
@@ -363,7 +363,7 @@ static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
 
 	ui = (unsigned int *)(ptr + a->offset);
 
-	return sprintf(buf, "%u\n", *ui);
+	return sysfs_emit(buf, "%u\n", *ui);
 }
 
 static ssize_t __sbi_store(struct f2fs_attr *a,
@@ -728,7 +728,7 @@ static void f2fs_sb_release(struct kobject *kobj)
 static ssize_t f2fs_feature_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sprintf(buf, "supported\n");
+	return sysfs_emit(buf, "supported\n");
 }
 
 #define F2FS_FEATURE_RO_ATTR(_name)				\
@@ -741,8 +741,8 @@ static ssize_t f2fs_sb_feature_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
 	if (F2FS_HAS_FEATURE(sbi, a->id))
-		return sprintf(buf, "supported\n");
-	return sprintf(buf, "unsupported\n");
+		return sysfs_emit(buf, "supported\n");
+	return sysfs_emit(buf, "unsupported\n");
 }
 
 #define F2FS_SB_FEATURE_RO_ATTR(_name, _feat)			\
-- 
GitLab


From eebd36a408bb6fc5d7adbb4b8c6db993d0a850f8 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 28 Oct 2022 10:07:13 -0700
Subject: [PATCH 0758/4122] f2fs: add missing bracket in doc

Let's add missing <>.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index ef2b3572ba181..a6a60268dcc5a 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -241,7 +241,7 @@ Description:	Shows total written kbytes issued to disk.
 What:		/sys/fs/f2fs/<disk>/features
 Date:		July 2017
 Contact:	"Jaegeuk Kim" <jaegeuk@kernel.org>
-Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/
+Description:	<deprecated: should use /sys/fs/f2fs/<disk>/feature_list/>
 		Shows all enabled features in current device.
 		Supported features:
 		encryption, blkzoned, extra_attr, projquota, inode_checksum,
-- 
GitLab


From e5a0db6a9e2eafe50e3ebc73a8285ae561e7d850 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 14:50:25 +0800
Subject: [PATCH 0759/4122] f2fs: replace gc_urgent_high_remaining with
 gc_remaining_trials

The user can set the trial count limit for GC urgent and
idle mode with replaced gc_remaining_trials.. If GC thread gets
to the limit, the mode will turn back to GC normal mode finally.

It was applied only to GC_URGENT, while this patch expands it for
GC_IDLE.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  8 ++++----
 fs/f2fs/f2fs.h                          |  5 +++--
 fs/f2fs/gc.c                            | 12 ++++++------
 fs/f2fs/super.c                         |  2 +-
 fs/f2fs/sysfs.c                         | 12 ++++++------
 5 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index a6a60268dcc5a..24e7cb77f265b 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -598,10 +598,10 @@ Description:	With "mode=fragment:block" mount options, we can scatter block allo
 		in the length of 1..<max_fragment_hole> by turns. This value can be set
 		between 1..512 and the default value is 4.
 
-What:		/sys/fs/f2fs/<disk>/gc_urgent_high_remaining
-Date:		December 2021
-Contact:	"Daeho Jeong" <daehojeong@google.com>
-Description:	You can set the trial count limit for GC urgent high mode with this value.
+What:		/sys/fs/f2fs/<disk>/gc_remaining_trials
+Date:		October 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	You can set the trial count limit for GC urgent and idle mode with this value.
 		If GC thread gets to the limit, the mode will turn back to GC normal mode.
 		By default, the value is zero, which means there is no limit like before.
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 662b27c19de10..04ef4cce3d7f4 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1736,8 +1736,9 @@ struct f2fs_sb_info {
 	unsigned int cur_victim_sec;		/* current victim section num */
 	unsigned int gc_mode;			/* current GC state */
 	unsigned int next_victim_seg[2];	/* next segment in victim section */
-	spinlock_t gc_urgent_high_lock;
-	unsigned int gc_urgent_high_remaining;	/* remaining trial count for GC_URGENT_HIGH */
+	spinlock_t gc_remaining_trials_lock;
+	/* remaining trial count for GC_URGENT_* and GC_IDLE_* */
+	unsigned int gc_remaining_trials;
 
 	/* for skip statistic */
 	unsigned long long skipped_gc_rwsem;		/* FG_GC only */
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 15f56859966cc..6466db75af5d2 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -152,14 +152,14 @@ do_gc:
 		/* balancing f2fs's metadata periodically */
 		f2fs_balance_fs_bg(sbi, true);
 next:
-		if (sbi->gc_mode == GC_URGENT_HIGH) {
-			spin_lock(&sbi->gc_urgent_high_lock);
-			if (sbi->gc_urgent_high_remaining) {
-				sbi->gc_urgent_high_remaining--;
-				if (!sbi->gc_urgent_high_remaining)
+		if (sbi->gc_mode != GC_NORMAL) {
+			spin_lock(&sbi->gc_remaining_trials_lock);
+			if (sbi->gc_remaining_trials) {
+				sbi->gc_remaining_trials--;
+				if (!sbi->gc_remaining_trials)
 					sbi->gc_mode = GC_NORMAL;
 			}
-			spin_unlock(&sbi->gc_urgent_high_lock);
+			spin_unlock(&sbi->gc_remaining_trials_lock);
 		}
 		sb_end_write(sbi->sb);
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index e6365f040171e..a43d8a46a6e56 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -3624,7 +3624,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	sbi->seq_file_ra_mul = MIN_RA_MUL;
 	sbi->max_fragment_chunk = DEF_FRAGMENT_SIZE;
 	sbi->max_fragment_hole = DEF_FRAGMENT_SIZE;
-	spin_lock_init(&sbi->gc_urgent_high_lock);
+	spin_lock_init(&sbi->gc_remaining_trials_lock);
 	atomic64_set(&sbi->current_atomic_write, 0);
 
 	sbi->dir_level = DEF_DIR_LEVEL;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index af23ed6121b00..032c03e09580d 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -538,10 +538,10 @@ out:
 		return count;
 	}
 
-	if (!strcmp(a->attr.name, "gc_urgent_high_remaining")) {
-		spin_lock(&sbi->gc_urgent_high_lock);
-		sbi->gc_urgent_high_remaining = t;
-		spin_unlock(&sbi->gc_urgent_high_lock);
+	if (!strcmp(a->attr.name, "gc_remaining_trials")) {
+		spin_lock(&sbi->gc_remaining_trials_lock);
+		sbi->gc_remaining_trials = t;
+		spin_unlock(&sbi->gc_remaining_trials_lock);
 
 		return count;
 	}
@@ -832,7 +832,7 @@ F2FS_RW_ATTR(FAULT_INFO_TYPE, f2fs_fault_info, inject_type, inject_type);
 #endif
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, data_io_flag, data_io_flag);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, node_io_flag, node_io_flag);
-F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_urgent_high_remaining, gc_urgent_high_remaining);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_remaining_trials, gc_remaining_trials);
 F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
 F2FS_GENERAL_RO_ATTR(dirty_segments);
 F2FS_GENERAL_RO_ATTR(free_segments);
@@ -961,7 +961,7 @@ static struct attribute *f2fs_attrs[] = {
 #endif
 	ATTR_LIST(data_io_flag),
 	ATTR_LIST(node_io_flag),
-	ATTR_LIST(gc_urgent_high_remaining),
+	ATTR_LIST(gc_remaining_trials),
 	ATTR_LIST(ckpt_thread_ioprio),
 	ATTR_LIST(dirty_segments),
 	ATTR_LIST(free_segments),
-- 
GitLab


From 3b21b794b5797d35f4fad930b53b1cd881c12dd3 Mon Sep 17 00:00:00 2001
From: "wangkailong@jari.cn" <wangkailong@jari.cn>
Date: Sat, 29 Oct 2022 22:49:30 +0800
Subject: [PATCH 0760/4122] f2fs: replace ternary operator with max()

Fix the following coccicheck warning:

./fs/f2fs/segment.c:877:24-25: WARNING opportunity for max()

Signed-off-by: KaiLong Wang <wangkailong@jari.cn>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c4270cd6eaab7..aa4be7f25963d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -856,7 +856,7 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
 	}
 	mutex_unlock(&dirty_i->seglist_lock);
 
-	unusable = holes[DATA] > holes[NODE] ? holes[DATA] : holes[NODE];
+	unusable = max(holes[DATA], holes[NODE]);
 	if (unusable > ovp_holes)
 		return unusable - ovp_holes;
 	return 0;
-- 
GitLab


From f6c64dc32ab91b4c37fa2a255d2270f4ff0b95ba Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Sat, 29 Oct 2022 09:25:05 +0800
Subject: [PATCH 0761/4122] apparmor: Add __init annotation to
 aa_{setup/teardown}_dfa_engine()

The aa_setup_dfa_engine() and aa_teardown_dfa_engine() is only called in
apparmor_init(), so let us add __init annotation to them.

Fixes: 11c236b89d7c ("apparmor: add a default null dfa")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/match.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/match.c b/security/apparmor/match.c
index 5095c26ca6836..b97ef5e1db732 100644
--- a/security/apparmor/match.c
+++ b/security/apparmor/match.c
@@ -31,7 +31,7 @@ static char stacksplitdfa_src[] = {
 };
 struct aa_dfa *stacksplitdfa;
 
-int aa_setup_dfa_engine(void)
+int __init aa_setup_dfa_engine(void)
 {
 	int error;
 
@@ -59,7 +59,7 @@ int aa_setup_dfa_engine(void)
 	return 0;
 }
 
-void aa_teardown_dfa_engine(void)
+void __init aa_teardown_dfa_engine(void)
 {
 	aa_put_dfa(stacksplitdfa);
 	aa_put_dfa(nulldfa);
-- 
GitLab


From 4295c60bbe9e63e35d330546eeaa1d2b62dae303 Mon Sep 17 00:00:00 2001
From: John Johansen <john.johansen@canonical.com>
Date: Tue, 1 Nov 2022 05:40:40 -0700
Subject: [PATCH 0762/4122] apparmor: Fix uninitialized symbol 'array_size' in
 policy_unpack_test.c

Make sure array_size is initialized in the kunit test to get rid of
compiler warnings. This will also make sure the following tests fail
consistently if the first test fails.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: John Johansen <john.johansen@canonical.com>
---
 security/apparmor/policy_unpack_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/security/apparmor/policy_unpack_test.c b/security/apparmor/policy_unpack_test.c
index b214f6ea8a725..7465da42492d1 100644
--- a/security/apparmor/policy_unpack_test.c
+++ b/security/apparmor/policy_unpack_test.c
@@ -140,7 +140,7 @@ static void policy_unpack_test_inbounds_when_out_of_bounds(struct kunit *test)
 static void policy_unpack_test_unpack_array_with_null_name(struct kunit *test)
 {
 	struct policy_unpack_fixture *puf = test->priv;
-	u16 array_size;
+	u16 array_size = 0;
 
 	puf->e->pos += TEST_ARRAY_BUF_OFFSET;
 
@@ -155,7 +155,7 @@ static void policy_unpack_test_unpack_array_with_name(struct kunit *test)
 {
 	struct policy_unpack_fixture *puf = test->priv;
 	const char name[] = TEST_ARRAY_NAME;
-	u16 array_size;
+	u16 array_size = 0;
 
 	puf->e->pos += TEST_NAMED_ARRAY_BUF_OFFSET;
 
-- 
GitLab


From d780a47c2de9a761bb3d754a24e76495b3f0d55f Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Wed, 5 Oct 2022 18:13:45 +0100
Subject: [PATCH 0763/4122] serial: sifive: select by default if SOC_SIFIVE

With the aim of dropping direct selects of drivers from Kconfig.socs,
default the SiFive serial drivers to the value of SOC_SIFIVE.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221005171348.167476-3-conor@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 434f83168546c..94457b54125b6 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -958,6 +958,7 @@ config SERIAL_OMAP_CONSOLE
 config SERIAL_SIFIVE
 	tristate "SiFive UART support"
 	depends on OF
+	default SOC_SIFIVE
 	select SERIAL_CORE
 	help
 	  Select this option if you are building a kernel for a device that
@@ -967,6 +968,7 @@ config SERIAL_SIFIVE
 config SERIAL_SIFIVE_CONSOLE
 	bool "Console on SiFive UART"
 	depends on SERIAL_SIFIVE=y
+	default SOC_SIFIVE
 	select SERIAL_CORE_CONSOLE
 	select SERIAL_EARLYCON
 	help
-- 
GitLab


From f58a16043a2154661aafe8287cea24e6ae51a9d9 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Wed, 5 Oct 2022 18:13:46 +0100
Subject: [PATCH 0764/4122] serial: sifive: select by default if SOC_CANAAN

With the aim of dropping direct selects of drivers from Kconfig.socs,
default the SiFive serial drivers to enabled if SOC_CANAAN.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221005171348.167476-4-conor@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 94457b54125b6..c55b947f3cdbb 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -958,7 +958,7 @@ config SERIAL_OMAP_CONSOLE
 config SERIAL_SIFIVE
 	tristate "SiFive UART support"
 	depends on OF
-	default SOC_SIFIVE
+	default SOC_SIFIVE || SOC_CANAAN
 	select SERIAL_CORE
 	help
 	  Select this option if you are building a kernel for a device that
@@ -968,7 +968,7 @@ config SERIAL_SIFIVE
 config SERIAL_SIFIVE_CONSOLE
 	bool "Console on SiFive UART"
 	depends on SERIAL_SIFIVE=y
-	default SOC_SIFIVE
+	default SOC_SIFIVE || SOC_CANAAN
 	select SERIAL_CORE_CONSOLE
 	select SERIAL_EARLYCON
 	help
-- 
GitLab


From 72da688b457d738b943016dabc603efb1be5f4e1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Oct 2022 07:20:52 +0200
Subject: [PATCH 0765/4122] tty: evh_bytechan: Replace NO_IRQ by 0

NO_IRQ is used to check the return of irq_of_parse_and_map().

On some architecture NO_IRQ is 0, on other architectures it is -1.

irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ.

So use 0 instead of using NO_IRQ.

Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Acked-by: Laurentiu Tudor <laurentiu.tudor@nxp.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/23f608ca57e7e19bc7060d3e563de383e0b2b337.1665033575.git.christophe.leroy@csgroup.eu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/ehv_bytechan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
index 19d32cb6af84e..8595483f46977 100644
--- a/drivers/tty/ehv_bytechan.c
+++ b/drivers/tty/ehv_bytechan.c
@@ -118,7 +118,7 @@ static int find_console_handle(void)
 		return 0;
 
 	stdout_irq = irq_of_parse_and_map(np, 0);
-	if (stdout_irq == NO_IRQ) {
+	if (!stdout_irq) {
 		pr_err("ehv-bc: no 'interrupts' property in %pOF node\n", np);
 		return 0;
 	}
@@ -696,7 +696,7 @@ static int ehv_bc_tty_probe(struct platform_device *pdev)
 
 	bc->rx_irq = irq_of_parse_and_map(np, 0);
 	bc->tx_irq = irq_of_parse_and_map(np, 1);
-	if ((bc->rx_irq == NO_IRQ) || (bc->tx_irq == NO_IRQ)) {
+	if (!bc->rx_irq || !bc->tx_irq) {
 		dev_err(&pdev->dev, "no 'interrupts' property in %pOFn node\n",
 			np);
 		ret = -ENODEV;
-- 
GitLab


From e5c3ddd394a409021377b5454a5bd1efa1ba8447 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 31 Oct 2022 21:54:10 +0100
Subject: [PATCH 0766/4122] staging: r8188eu: use ether_addr_equal for address
 comparison

We can use ether_addr_equal instead of memcmp in
update_recvframe_phyinfo_88e for comparing the incoming frame's
destination address with our local address.

Both struct ieee80211_hdr and struct eeprom_priv's mac_addr component are
2-byte aligned.

Suggested-by: Joe Perches <joe@perches.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221031205412.124871-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
index f01ae71bcdb11..10bb2e6029846 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
@@ -126,8 +126,8 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat
 		 get_bssid(&padapter->mlmepriv), ETH_ALEN));
 
 	pkt_info.bPacketToSelf = pkt_info.bPacketMatchBSSID &&
-				 (!memcmp(ieee80211_get_DA(hdr),
-				  myid(&padapter->eeprompriv), ETH_ALEN));
+				 ether_addr_equal(ieee80211_get_DA(hdr),
+						  myid(&padapter->eeprompriv));
 
 	pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc);
 	if (pkt_info.bPacketBeacon) {
-- 
GitLab


From 97cc476fc6fbfabb989d508df09208c4a598e149 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 31 Oct 2022 21:54:11 +0100
Subject: [PATCH 0767/4122] staging: r8188eu: use hdr->frame_control instead of
 fc

We can remove the fc variable in update_recvframe_phyinfo_88e and use
hdr->frame_control instead.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221031205412.124871-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
index 10bb2e6029846..4e9d8c8285c4f 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
@@ -114,13 +114,12 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat
 	struct hal_data_8188e *pHalData = &padapter->haldata;
 	struct phy_info *pPHYInfo  = &pattrib->phy_info;
 	u8 *wlanhdr = precvframe->rx_data;
-	__le16 fc = *(__le16 *)wlanhdr;
 	struct odm_per_pkt_info	pkt_info;
 	u8 *sa = NULL;
 	struct sta_priv *pstapriv;
 	struct sta_info *psta;
 
-	pkt_info.bPacketMatchBSSID = ((!ieee80211_is_ctl(fc)) &&
+	pkt_info.bPacketMatchBSSID = ((!ieee80211_is_ctl(hdr->frame_control)) &&
 		!pattrib->icv_err && !pattrib->crc_err &&
 		!memcmp(get_hdr_bssid(wlanhdr),
 		 get_bssid(&padapter->mlmepriv), ETH_ALEN));
@@ -129,7 +128,8 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat
 				 ether_addr_equal(ieee80211_get_DA(hdr),
 						  myid(&padapter->eeprompriv));
 
-	pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID && ieee80211_is_beacon(fc);
+	pkt_info.bPacketBeacon = pkt_info.bPacketMatchBSSID &&
+				 ieee80211_is_beacon(hdr->frame_control);
 	if (pkt_info.bPacketBeacon) {
 		if (check_fwstate(&padapter->mlmepriv, WIFI_STATION_STATE))
 			sa = padapter->mlmepriv.cur_network.network.MacAddress;
-- 
GitLab


From aa69ca7d6d6cd2ed2209b4b6c102f2fec3e97012 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 31 Oct 2022 21:54:12 +0100
Subject: [PATCH 0768/4122] staging: r8188eu: use ieee80211_get_SA

Use ieee80211_get_SA in update_recvframe_phyinfo_88e instead of the
driver-specific get_sa function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221031205412.124871-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
index 4e9d8c8285c4f..9a61eef8550bd 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
@@ -135,7 +135,7 @@ void update_recvframe_phyinfo_88e(struct recv_frame *precvframe, struct phy_stat
 			sa = padapter->mlmepriv.cur_network.network.MacAddress;
 		/* to do Ad-hoc */
 	} else {
-		sa = get_sa(wlanhdr);
+		sa = ieee80211_get_SA(hdr);
 	}
 
 	pstapriv = &padapter->stapriv;
-- 
GitLab


From 3032eb4690d5d5bac28d0e92cc501962a42ca4be Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Mon, 31 Oct 2022 16:37:43 +0100
Subject: [PATCH 0769/4122] staging: r8188eu: replace ternary operator with
 min, max, abs macros

Replace some ternary operators with the min(), max() or abs() macros
to improve readability.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221031153743.8801-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_xmit.c      | 3 +--
 drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 2 +-
 drivers/staging/r8188eu/hal/odm_RTL8188E.c   | 4 ++--
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index 4f82204283282..bd6c1a401c590 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -476,8 +476,7 @@ static uint rtw_pktfile_read(struct pkt_file *pfile, u8 *rmem, uint rlen)
 {
 	uint len;
 
-	len = rtw_remainder_len(pfile);
-	len = (rlen > len) ? len : rlen;
+	len = min(rtw_remainder_len(pfile), rlen);
 
 	if (rmem)
 		skb_copy_bits(pfile->pkt, pfile->buf_len - pfile->pkt_len, rmem, len);
diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
index 60cdfcf80daad..622f95d3f2edf 100644
--- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
+++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
@@ -583,7 +583,7 @@ static bool phy_SimularityCompare_8188E(
 			tmp2 = resulta[c2][i];
 		}
 
-		diff = (tmp1 > tmp2) ? (tmp1 - tmp2) : (tmp2 - tmp1);
+		diff = abs(tmp1 - tmp2);
 
 		if (diff > MAX_TOLERANCE) {
 			if ((i == 2 || i == 6) && !sim_bitmap) {
diff --git a/drivers/staging/r8188eu/hal/odm_RTL8188E.c b/drivers/staging/r8188eu/hal/odm_RTL8188E.c
index dd9c8291f025d..f3f4074d43167 100644
--- a/drivers/staging/r8188eu/hal/odm_RTL8188E.c
+++ b/drivers/staging/r8188eu/hal/odm_RTL8188E.c
@@ -199,7 +199,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm)
 			Aux_RSSI = (dm_fat_tbl->AuxAnt_Cnt[i] != 0) ? (dm_fat_tbl->AuxAnt_Sum[i] / dm_fat_tbl->AuxAnt_Cnt[i]) : 0;
 			TargetAnt = (Main_RSSI >= Aux_RSSI) ? MAIN_ANT : AUX_ANT;
 			/* 2 Select MaxRSSI for DIG */
-			LocalMaxRSSI = (Main_RSSI > Aux_RSSI) ? Main_RSSI : Aux_RSSI;
+			LocalMaxRSSI = max(Main_RSSI, Aux_RSSI);
 			if ((LocalMaxRSSI > AntDivMaxRSSI) && (LocalMaxRSSI < 40))
 				AntDivMaxRSSI = LocalMaxRSSI;
 			if (LocalMaxRSSI > MaxRSSI)
@@ -211,7 +211,7 @@ static void odm_HWAntDiv(struct odm_dm_struct *dm_odm)
 			else if ((dm_fat_tbl->RxIdleAnt == AUX_ANT) && (Aux_RSSI == 0))
 				Aux_RSSI = Main_RSSI;
 
-			LocalMinRSSI = (Main_RSSI > Aux_RSSI) ? Aux_RSSI : Main_RSSI;
+			LocalMinRSSI = min(Main_RSSI, Aux_RSSI);
 			if (LocalMinRSSI < MinRSSI) {
 				MinRSSI = LocalMinRSSI;
 				RxIdleAnt = TargetAnt;
-- 
GitLab


From a653e32aaddb11cc4f159558c4cf7dfa2c5a0222 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 31 Oct 2022 21:51:41 +0100
Subject: [PATCH 0770/4122] staging: r8188eu: use ether_addr_equal in OnAction

Use ether_addr_equal to compare two mac addresses in OnAction.

Both struct ieee80211_mgmt and struct eeprom_priv's mac_addr component
are 2-byte aligned.

Suggested-by: Joe Perches <joe@perches.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221031205140.124682-1-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index e985fc5fc575e..6679d4037d6bd 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3823,7 +3823,7 @@ static void OnAction(struct adapter *padapter, struct recv_frame *precv_frame)
 {
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 
-	if (memcmp(myid(&padapter->eeprompriv), mgmt->da, ETH_ALEN))
+	if (!ether_addr_equal(myid(&padapter->eeprompriv), mgmt->da))
 		return;
 
 	switch (mgmt->u.action.category) {
-- 
GitLab


From 3b45e2e139c1e71b17a97ae887ba1caadd8e8ad2 Mon Sep 17 00:00:00 2001
From: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Date: Tue, 1 Nov 2022 09:55:33 +0000
Subject: [PATCH 0771/4122] staging: vt6655: change variable name wTimeStampOff

Remove the use of Hungarian notation, which is not used in the Linux
kernel.

Signed-off-by: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Link: https://lore.kernel.org/r/Y2DtFRdhCiyNF2kF@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vt6655/rxtx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/vt6655/rxtx.c b/drivers/staging/vt6655/rxtx.c
index 5bdb5176772c0..baa3f6f1508f7 100644
--- a/drivers/staging/vt6655/rxtx.c
+++ b/drivers/staging/vt6655/rxtx.c
@@ -54,7 +54,7 @@
  */
 #define CRITICAL_PACKET_LEN      256
 
-static const unsigned short wTimeStampOff[2][MAX_RATE] = {
+static const unsigned short time_stamp_off[2][MAX_RATE] = {
 	{384, 288, 226, 209, 54, 43, 37, 31, 28, 25, 24, 23}, /* Long Preamble */
 	{384, 192, 130, 113, 54, 43, 37, 31, 28, 25, 24, 23}, /* Short Preamble */
 };
@@ -142,7 +142,7 @@ s_uFillDataHead(
 
 static __le16 vnt_time_stamp_off(struct vnt_private *priv, u16 rate)
 {
-	return cpu_to_le16(wTimeStampOff[priv->preamble_type % 2]
+	return cpu_to_le16(time_stamp_off[priv->preamble_type % 2]
 							[rate % MAX_RATE]);
 }
 
-- 
GitLab


From 3cbb8d0d6d42958c146624a102d1a0e59a820b65 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 1 Nov 2022 09:01:11 +0100
Subject: [PATCH 0772/4122] staging: octeon: cvmx_ptr_to_phys() should return
 physaddr_t
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On 32-bit without physical address extensions (e.g. sh-allmodconfig):

    drivers/staging/octeon/ethernet-mem.c: In function ‘cvm_oct_free_hw_memory’:
    ./arch/sh/include/asm/io.h:239:32: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
      239 | #define phys_to_virt(address) ((void *)(address))
	  |                                ^
    drivers/staging/octeon/ethernet-mem.c:123:18: note: in expansion of macro ‘phys_to_virt’
      123 |    fpa = (char *)phys_to_virt(cvmx_ptr_to_phys(fpa));
	  |                  ^~~~~~~~~~~~

Fix this by making cvmx_ptr_to_phys() return physaddr_t instead of
uint64_t.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Link: https://lore.kernel.org/r/20221101080111.750748-1-geert@linux-m68k.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/mips/include/asm/octeon/cvmx.h   | 4 ++--
 drivers/staging/octeon/octeon-stubs.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/mips/include/asm/octeon/cvmx.h b/arch/mips/include/asm/octeon/cvmx.h
index 25854abc95f8a..72e775bf31e60 100644
--- a/arch/mips/include/asm/octeon/cvmx.h
+++ b/arch/mips/include/asm/octeon/cvmx.h
@@ -154,13 +154,13 @@ static inline uint64_t cvmx_build_bits(uint64_t high_bit,
 
 /**
  * Convert a memory pointer (void*) into a hardware compatible
- * memory address (uint64_t). Octeon hardware widgets don't
+ * memory address (phys_addr_t). Octeon hardware widgets don't
  * understand logical addresses.
  *
  * @ptr:    C style memory pointer
  * Returns Hardware physical address
  */
-static inline uint64_t cvmx_ptr_to_phys(void *ptr)
+static inline phys_addr_t cvmx_ptr_to_phys(void *ptr)
 {
 	if (sizeof(void *) == 8) {
 		/*
diff --git a/drivers/staging/octeon/octeon-stubs.h b/drivers/staging/octeon/octeon-stubs.h
index 3f8e5713b8a85..7a02e59e283fb 100644
--- a/drivers/staging/octeon/octeon-stubs.h
+++ b/drivers/staging/octeon/octeon-stubs.h
@@ -1212,7 +1212,7 @@ static inline void *cvmx_phys_to_ptr(uint64_t physical_address)
 	return (void *)(uintptr_t)(physical_address);
 }
 
-static inline uint64_t cvmx_ptr_to_phys(void *ptr)
+static inline phys_addr_t cvmx_ptr_to_phys(void *ptr)
 {
 	return (unsigned long)ptr;
 }
-- 
GitLab


From dba2628c8ec5c2821103090a6ba946eab9da0c1f Mon Sep 17 00:00:00 2001
From: Gabhyun Kim <kimgaby415@gmail.com>
Date: Tue, 1 Nov 2022 15:26:36 +0900
Subject: [PATCH 0773/4122] staging: rtl8192e: Remove line breaks to match
 coding style

Remove redundant line break in function definition to
correct coding style.

Signed-off-by: Gabhyun Kim <kimgaby415@gmail.com>
Link: https://lore.kernel.org/r/20221101062636.GA3257@ubuntu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_softmac.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 1a3ca3e57623a..1253de4818053 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -148,8 +148,7 @@ static void init_mgmt_queue(struct rtllib_device *ieee)
 }
 
 
-u8
-MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee)
+u8 MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee)
 {
 	u16	i;
 	u8	QueryRate = 0;
-- 
GitLab


From 8b550eb637b483866f8d4662fffc9518ad9e7eca Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Mon, 31 Oct 2022 23:25:39 +0530
Subject: [PATCH 0774/4122] staging: rtl8192u: remove unnecessary function
 implementation

This driver is a single standalone driver and does not have any loading
dependencies on another associated drivers. The build results in one
single .ko object. The current implementation of the function
ieee80211_tkip_null simply returns back to the caller without any useful
instruction executions. It does not lead to auto-loading of any other
associated modules as the initial design implementation appears to be.

Hence the call to ieee80211_tkip_null() and its implementation is
unnecessary and should be removed.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/6f5a4313fc21365bf733c25385aef79554ffb253.1667237959.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192u/ieee80211/ieee80211.h            | 3 ---
 drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c | 6 ------
 drivers/staging/rtl8192u/ieee80211/ieee80211_module.c     | 3 ---
 3 files changed, 12 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index 9cd4b18967459..00c07455cbb3b 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -232,8 +232,6 @@ struct cb_desc {
 
 #define ieee80211_ccmp_null		ieee80211_ccmp_null_rsl
 
-#define ieee80211_tkip_null		ieee80211_tkip_null_rsl
-
 #define free_ieee80211			free_ieee80211_rsl
 #define alloc_ieee80211			alloc_ieee80211_rsl
 
@@ -2256,7 +2254,6 @@ void ieee80211_ps_tx_ack(struct ieee80211_device *ieee, short success);
 void softmac_mgmt_xmit(struct sk_buff *skb, struct ieee80211_device *ieee);
 
 /* ieee80211_crypt_ccmp&tkip&wep.c */
-void ieee80211_tkip_null(void);
 
 int ieee80211_crypto_init(void);
 void ieee80211_crypto_deinit(void);
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
index 7b120b8cb9824..9bfd24ad46b69 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_crypt_tkip.c
@@ -716,9 +716,3 @@ void ieee80211_crypto_tkip_exit(void)
 {
 	ieee80211_unregister_crypto_ops(&ieee80211_crypt_tkip);
 }
-
-void ieee80211_tkip_null(void)
-{
-//    printk("============>%s()\n", __func__);
-	return;
-}
diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
index b94fe9b449b6c..3f93939bc4ee0 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_module.c
@@ -159,9 +159,6 @@ struct net_device *alloc_ieee80211(int sizeof_priv)
 		ieee->last_packet_time[i] = 0;
 	}
 
-/* These function were added to load crypte module autoly */
-	ieee80211_tkip_null();
-
 	return dev;
 
  failed:
-- 
GitLab


From 1a6d64701922b2ff05b240dc0d69f3feda5e2690 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Tue, 1 Nov 2022 20:14:57 +0100
Subject: [PATCH 0775/4122] staging: r8188eu: remove extern from function
 prototypes

Declaring function prototypes extern is redundant and triggers
checkpatch warnings. Remove all extern from function prototypes.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221101191458.8619-2-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/r8188eu/include/osdep_service.h    |  2 +-
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  4 ++--
 drivers/staging/r8188eu/include/sta_info.h     | 18 +++++++++---------
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/include/osdep_service.h b/drivers/staging/r8188eu/include/osdep_service.h
index ec2631455f08c..f8ed04f32cae3 100644
--- a/drivers/staging/r8188eu/include/osdep_service.h
+++ b/drivers/staging/r8188eu/include/osdep_service.h
@@ -66,7 +66,7 @@ static inline int rtw_netif_queue_stopped(struct net_device *pnetdev)
 		netif_tx_queue_stopped(netdev_get_tx_queue(pnetdev, 3));
 }
 
-extern int RTW_STATUS_CODE(int error_code);
+int RTW_STATUS_CODE(int error_code);
 
 void *rtw_malloc2d(int h, int w, int size);
 
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index ce5b57e23e53b..e234a3b9af6fa 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -388,7 +388,7 @@ struct mlme_ext_priv {
 void init_mlme_ext_priv(struct adapter *adapter);
 int init_hw_mlme_ext(struct adapter *padapter);
 void free_mlme_ext_priv (struct mlme_ext_priv *pmlmeext);
-extern struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv);
+struct xmit_frame *alloc_mgtxmitframe(struct xmit_priv *pxmitpriv);
 
 unsigned char networktype_to_raid(unsigned char network_type);
 u8 judge_network_type(struct adapter *padapter, unsigned char *rate, int len);
@@ -483,7 +483,7 @@ void report_add_sta_event(struct adapter *padapter, unsigned char *addr,
 			  int cam_idx);
 
 void beacon_timing_control(struct adapter *padapter);
-extern u8 set_tx_beacon_cmd(struct adapter *padapter);
+u8 set_tx_beacon_cmd(struct adapter *padapter);
 unsigned int setup_beacon_frame(struct adapter *padapter,
 				unsigned char *beacon_frame);
 void update_mgnt_tx_rate(struct adapter *padapter, u8 rate);
diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h
index f76e086b5701f..55ea5edc29141 100644
--- a/drivers/staging/r8188eu/include/sta_info.h
+++ b/drivers/staging/r8188eu/include/sta_info.h
@@ -295,19 +295,19 @@ static inline u32 wifi_mac_hash(u8 *mac)
 	return x;
 }
 
-extern int _rtw_init_sta_priv(struct sta_priv *pstapriv);
-extern void _rtw_free_sta_priv(struct sta_priv *pstapriv);
+int _rtw_init_sta_priv(struct sta_priv *pstapriv);
+void _rtw_free_sta_priv(struct sta_priv *pstapriv);
 
 #define stainfo_offset_valid(offset) (offset < NUM_STA && offset >= 0)
 int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta);
 struct sta_info *rtw_get_stainfo_by_offset(struct sta_priv *stapriv, int off);
 
-extern struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
-extern u32	rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta);
-extern void rtw_free_all_stainfo(struct adapter *adapt);
-extern struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
-extern u32 rtw_init_bcmc_stainfo(struct adapter *adapt);
-extern struct sta_info *rtw_get_bcmc_stainfo(struct adapter *padapter);
-extern u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr);
+struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
+u32	rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta);
+void rtw_free_all_stainfo(struct adapter *adapt);
+struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
+u32 rtw_init_bcmc_stainfo(struct adapter *adapt);
+struct sta_info *rtw_get_bcmc_stainfo(struct adapter *padapter);
+u8 rtw_access_ctrl(struct adapter *padapter, u8 *mac_addr);
 
 #endif /* _STA_INFO_H_ */
-- 
GitLab


From 29626f3c07d09b79d6510d1915c106e51ba85022 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Tue, 1 Nov 2022 20:14:58 +0100
Subject: [PATCH 0776/4122] staging: r8188eu: convert rtw_free_stainfo() to
 void

The function rtw_free_stainfo() returns always _SUCCESS and none of
its callers uses the return value. We can safely make the function
void. Yet another tiny step towards getting rid of _FAIL / _SUCCESS.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221101191458.8619-3-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_sta_mgt.c | 8 ++------
 drivers/staging/r8188eu/include/sta_info.h | 2 +-
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_sta_mgt.c b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
index 51324e708697f..b4aee86230991 100644
--- a/drivers/staging/r8188eu/core/rtw_sta_mgt.c
+++ b/drivers/staging/r8188eu/core/rtw_sta_mgt.c
@@ -242,7 +242,7 @@ exit:
 }
 
 /*  using pstapriv->sta_hash_lock to protect */
-u32	rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
+void rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
 {
 	int i;
 	struct __queue *pfree_sta_queue;
@@ -252,7 +252,7 @@ u32	rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
 	struct	sta_priv *pstapriv = &padapter->stapriv;
 
 	if (!psta)
-		goto exit;
+		return;
 
 	pfree_sta_queue = &pstapriv->free_sta_queue;
 
@@ -356,10 +356,6 @@ u32	rtw_free_stainfo(struct adapter *padapter, struct sta_info *psta)
 	spin_lock_bh(&pfree_sta_queue->lock);
 	list_add_tail(&psta->list, get_list_head(pfree_sta_queue));
 	spin_unlock_bh(&pfree_sta_queue->lock);
-
-exit:
-
-	return _SUCCESS;
 }
 
 /*  free all stainfo which in sta_hash[all] */
diff --git a/drivers/staging/r8188eu/include/sta_info.h b/drivers/staging/r8188eu/include/sta_info.h
index 55ea5edc29141..e42f4b4c6e247 100644
--- a/drivers/staging/r8188eu/include/sta_info.h
+++ b/drivers/staging/r8188eu/include/sta_info.h
@@ -303,7 +303,7 @@ int rtw_stainfo_offset(struct sta_priv *stapriv, struct sta_info *sta);
 struct sta_info *rtw_get_stainfo_by_offset(struct sta_priv *stapriv, int off);
 
 struct sta_info *rtw_alloc_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
-u32	rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta);
+void rtw_free_stainfo(struct adapter *adapt, struct sta_info *psta);
 void rtw_free_all_stainfo(struct adapter *adapt);
 struct sta_info *rtw_get_stainfo(struct sta_priv *stapriv, u8 *hwaddr);
 u32 rtw_init_bcmc_stainfo(struct adapter *adapt);
-- 
GitLab


From 72cdc5aec2913bef5dd62eb75cadad8bff27fd8e Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:18:10 +0530
Subject: [PATCH 0777/4122] staging: wlan-ng: Remove unused struct wlan_ie_ssid
 references

Pointer reference to struct wlan_ie_ssid is added as a member variable
to 5 different structures. However, these references are never used.
Remove such unused struct references. The cleanup also renders the
struct useless; so remove it as well.
Issue identified as part of coccicheck based code analysis.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/6e39ef59d01d65a1e179f6aecfbb0d68b81fa257.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index 1ef30d3f3159e..dcff56d184985 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -225,13 +225,6 @@ struct wlan_ie {
 	u8 len;
 } __packed;
 
-/*-- Service Set Identity (SSID)  -----------------*/
-struct wlan_ie_ssid {
-	u8 eid;
-	u8 len;
-	u8 ssid[1];		/* may be zero, ptrs may overlap */
-} __packed;
-
 /*-- Supported Rates  -----------------------------*/
 struct wlan_ie_supp_rates {
 	u8 eid;
@@ -319,7 +312,6 @@ struct wlan_fr_beacon {
 	u16 *bcn_int;
 	u16 *cap_info;
 	/*-- info elements ----------*/
-	struct wlan_ie_ssid *ssid;
 	struct wlan_ie_supp_rates *supp_rates;
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
@@ -372,7 +364,6 @@ struct wlan_fr_assocreq {
 	u16 *cap_info;
 	u16 *listen_int;
 	/*-- info elements ----------*/
-	struct wlan_ie_ssid *ssid;
 	struct wlan_ie_supp_rates *supp_rates;
 
 };
@@ -407,7 +398,6 @@ struct wlan_fr_reassocreq {
 	u16 *listen_int;
 	u8 *curr_ap;
 	/*-- info elements ----------*/
-	struct wlan_ie_ssid *ssid;
 	struct wlan_ie_supp_rates *supp_rates;
 
 };
@@ -439,7 +429,6 @@ struct wlan_fr_probereq {
 	void *priv;
 	/*-- fixed fields -----------*/
 	/*-- info elements ----------*/
-	struct wlan_ie_ssid *ssid;
 	struct wlan_ie_supp_rates *supp_rates;
 
 };
@@ -457,7 +446,6 @@ struct wlan_fr_proberesp {
 	u16 *bcn_int;
 	u16 *cap_info;
 	/*-- info elements ----------*/
-	struct wlan_ie_ssid *ssid;
 	struct wlan_ie_supp_rates *supp_rates;
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
-- 
GitLab


From 2a899064b60ad1ede8a3990bd512e859b15896d5 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:18:35 +0530
Subject: [PATCH 0778/4122] staging: wlan-ng: Remove unused struct
 wlan_ie_supp_rates references

Pointer reference to struct wlan_ie_supp_rates is added as a member
variable to 7 different structures. However, these references are
never used. Remove such unused struct references. The cleanup also
renders the struct useless; so remove it as well.
Issue identified as part of coccicheck based code analysis.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/370feb1f300896af66fa1c443d3ad19dc8934be3.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index dcff56d184985..536794bdd1f00 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -225,13 +225,6 @@ struct wlan_ie {
 	u8 len;
 } __packed;
 
-/*-- Supported Rates  -----------------------------*/
-struct wlan_ie_supp_rates {
-	u8 eid;
-	u8 len;
-	u8 rates[1];		/* had better be at LEAST one! */
-} __packed;
-
 /*-- FH Parameter Set  ----------------------------*/
 struct wlan_ie_fh_parms {
 	u8 eid;
@@ -312,7 +305,6 @@ struct wlan_fr_beacon {
 	u16 *bcn_int;
 	u16 *cap_info;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
 	struct wlan_ie_cf_parms *cf_parms;
@@ -364,7 +356,6 @@ struct wlan_fr_assocreq {
 	u16 *cap_info;
 	u16 *listen_int;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 
 };
 
@@ -381,7 +372,6 @@ struct wlan_fr_assocresp {
 	u16 *status;
 	u16 *aid;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 
 };
 
@@ -398,7 +388,6 @@ struct wlan_fr_reassocreq {
 	u16 *listen_int;
 	u8 *curr_ap;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 
 };
 
@@ -415,7 +404,6 @@ struct wlan_fr_reassocresp {
 	u16 *status;
 	u16 *aid;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 
 };
 
@@ -429,7 +417,6 @@ struct wlan_fr_probereq {
 	void *priv;
 	/*-- fixed fields -----------*/
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 
 };
 
@@ -446,7 +433,6 @@ struct wlan_fr_proberesp {
 	u16 *bcn_int;
 	u16 *cap_info;
 	/*-- info elements ----------*/
-	struct wlan_ie_supp_rates *supp_rates;
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
 	struct wlan_ie_cf_parms *cf_parms;
-- 
GitLab


From a5f994fc062971aa801eda9e8af214e785e32aff Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:19:05 +0530
Subject: [PATCH 0779/4122] staging: wlan-ng: Remove unused struct wlan_ie_tim
 references

Pointer reference to struct wlan_ie_tim is added as a member
variable of a structure; However, this references is never
used. Remove such unused struct reference. The cleanup also
renders the struct useless; so remove it as well.
Issue identified as part of coccicheck based code analysis.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/36834007587a2e0ef7a782f5919f3a4c756b7840.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index 536794bdd1f00..d95ffe0097e94 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -253,16 +253,6 @@ struct wlan_ie_cf_parms {
 	u16 cfp_durremaining;
 } __packed;
 
-/*-- TIM ------------------------------------------*/
-struct wlan_ie_tim {
-	u8 eid;
-	u8 len;
-	u8 dtim_cnt;
-	u8 dtim_period;
-	u8 bitmap_ctl;
-	u8 virt_bm[1];
-} __packed;
-
 /*-- IBSS Parameter Set ---------------------------*/
 struct wlan_ie_ibss_parms {
 	u8 eid;
@@ -309,8 +299,6 @@ struct wlan_fr_beacon {
 	struct wlan_ie_ds_parms *ds_parms;
 	struct wlan_ie_cf_parms *cf_parms;
 	struct wlan_ie_ibss_parms *ibss_parms;
-	struct wlan_ie_tim *tim;
-
 };
 
 /*-- IBSS ATIM ------------------------------------*/
-- 
GitLab


From a2c896ab66f2c65cf6eeba140b97200a47d0450e Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:19:30 +0530
Subject: [PATCH 0780/4122] staging: wlan-ng: Remove unused struct
 wlan_ie_ibss_parms references

Pointer reference to struct wlan_ie_ibss_parms is added as a member
variable to 2 structures; However, these references are never used.
Remove such unused struct references. The cleanup also renders the
struct useless; so remove it as well.
Issue identified as part of coccicheck based code analysis.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/e53084cdd3ca3ffea6a32393f6f28c7b7650d1a4.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index d95ffe0097e94..816b25641f5a5 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -253,13 +253,6 @@ struct wlan_ie_cf_parms {
 	u16 cfp_durremaining;
 } __packed;
 
-/*-- IBSS Parameter Set ---------------------------*/
-struct wlan_ie_ibss_parms {
-	u8 eid;
-	u8 len;
-	u16 atim_win;
-} __packed;
-
 /*-- Challenge Text  ------------------------------*/
 struct wlan_ie_challenge {
 	u8 eid;
@@ -298,7 +291,6 @@ struct wlan_fr_beacon {
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
 	struct wlan_ie_cf_parms *cf_parms;
-	struct wlan_ie_ibss_parms *ibss_parms;
 };
 
 /*-- IBSS ATIM ------------------------------------*/
@@ -424,7 +416,6 @@ struct wlan_fr_proberesp {
 	struct wlan_ie_fh_parms *fh_parms;
 	struct wlan_ie_ds_parms *ds_parms;
 	struct wlan_ie_cf_parms *cf_parms;
-	struct wlan_ie_ibss_parms *ibss_parms;
 };
 
 /*-- Authentication -------------------------------*/
-- 
GitLab


From 5d2366684f9331dc91449b7b866f172d7641ecf3 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:19:54 +0530
Subject: [PATCH 0781/4122] staging: wlan-ng: Remove unused struct
 p80211macarray definition

struct p80211macarray is defined but is never used. Remove the unused
struct declaration.
Issue identified as part of coccicheck based code analysis.

Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/b42eff9b8f73542bc9015948e5e4684416982e1b.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211types.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211types.h b/drivers/staging/wlan-ng/p80211types.h
index 6486612a8f31b..b2ed969604133 100644
--- a/drivers/staging/wlan-ng/p80211types.h
+++ b/drivers/staging/wlan-ng/p80211types.h
@@ -231,12 +231,6 @@ struct p80211pstr32 {
 	u8 data[MAXLEN_PSTR32];
 } __packed;
 
-/* MAC address array */
-struct p80211macarray {
-	u32 cnt;
-	u8 data[1][MAXLEN_PSTR6];
-} __packed;
-
 /* prototype template */
 struct p80211item {
 	u32 did;
-- 
GitLab


From 49af5dd7209ea2242a3c55e0b180f7ff7913fa3b Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:20:13 +0530
Subject: [PATCH 0782/4122] staging: wlan-ng: Remove unused function
 declarations

Several functions are declared but are not implemented or used in any
part of the code. Remove such unimplemented function declarations.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/09e2af8d702e33c15ed9f655b0a1190b4e8bec86.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index 816b25641f5a5..da824bc363655 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -449,26 +449,4 @@ struct wlan_fr_deauthen {
 	/*-- info elements ----------*/
 
 };
-
-void wlan_mgmt_encode_beacon(struct wlan_fr_beacon *f);
-void wlan_mgmt_decode_beacon(struct wlan_fr_beacon *f);
-void wlan_mgmt_encode_disassoc(struct wlan_fr_disassoc *f);
-void wlan_mgmt_decode_disassoc(struct wlan_fr_disassoc *f);
-void wlan_mgmt_encode_assocreq(struct wlan_fr_assocreq *f);
-void wlan_mgmt_decode_assocreq(struct wlan_fr_assocreq *f);
-void wlan_mgmt_encode_assocresp(struct wlan_fr_assocresp *f);
-void wlan_mgmt_decode_assocresp(struct wlan_fr_assocresp *f);
-void wlan_mgmt_encode_reassocreq(struct wlan_fr_reassocreq *f);
-void wlan_mgmt_decode_reassocreq(struct wlan_fr_reassocreq *f);
-void wlan_mgmt_encode_reassocresp(struct wlan_fr_reassocresp *f);
-void wlan_mgmt_decode_reassocresp(struct wlan_fr_reassocresp *f);
-void wlan_mgmt_encode_probereq(struct wlan_fr_probereq *f);
-void wlan_mgmt_decode_probereq(struct wlan_fr_probereq *f);
-void wlan_mgmt_encode_proberesp(struct wlan_fr_proberesp *f);
-void wlan_mgmt_decode_proberesp(struct wlan_fr_proberesp *f);
-void wlan_mgmt_encode_authen(struct wlan_fr_authen *f);
-void wlan_mgmt_decode_authen(struct wlan_fr_authen *f);
-void wlan_mgmt_encode_deauthen(struct wlan_fr_deauthen *f);
-void wlan_mgmt_decode_deauthen(struct wlan_fr_deauthen *f);
-
 #endif /* _P80211MGMT_H */
-- 
GitLab


From d911a624cfb40e356b98f135d52914b30214fd7f Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 1 Nov 2022 19:20:40 +0530
Subject: [PATCH 0783/4122] staging: wlan-ng: Remove unused structure
 definitions

Remove structure definitions that are never used in the code.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/74a79ea400ec26624e445692f3353424fb6fc29e.1667308828.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/wlan-ng/p80211mgmt.h | 232 ---------------------------
 1 file changed, 232 deletions(-)

diff --git a/drivers/staging/wlan-ng/p80211mgmt.h b/drivers/staging/wlan-ng/p80211mgmt.h
index da824bc363655..fc23fae5651b9 100644
--- a/drivers/staging/wlan-ng/p80211mgmt.h
+++ b/drivers/staging/wlan-ng/p80211mgmt.h
@@ -217,236 +217,4 @@
 #define WLAN_SET_MGMT_CAP_INFO_PBCC(n)		((n) << 6)
 #define WLAN_SET_MGMT_CAP_INFO_AGILITY(n)	((n) << 7)
 
-/*-- Information Element Types --------------------*/
-/* prototype structure, all IEs start with these members */
-
-struct wlan_ie {
-	u8 eid;
-	u8 len;
-} __packed;
-
-/*-- FH Parameter Set  ----------------------------*/
-struct wlan_ie_fh_parms {
-	u8 eid;
-	u8 len;
-	u16 dwell;
-	u8 hopset;
-	u8 hoppattern;
-	u8 hopindex;
-} __packed;
-
-/*-- DS Parameter Set  ----------------------------*/
-struct wlan_ie_ds_parms {
-	u8 eid;
-	u8 len;
-	u8 curr_ch;
-} __packed;
-
-/*-- CF Parameter Set  ----------------------------*/
-
-struct wlan_ie_cf_parms {
-	u8 eid;
-	u8 len;
-	u8 cfp_cnt;
-	u8 cfp_period;
-	u16 cfp_maxdur;
-	u16 cfp_durremaining;
-} __packed;
-
-/*-- Challenge Text  ------------------------------*/
-struct wlan_ie_challenge {
-	u8 eid;
-	u8 len;
-	u8 challenge[1];
-} __packed;
-
-/*-------------------------------------------------*/
-/*  Frame Types  */
-
-/* prototype structure, all mgmt frame types will start with these members */
-struct wlan_fr_mgmt {
-	u16 type;
-	u16 len;		/* DOES NOT include CRC !!!! */
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	/*-- info elements ----------*/
-};
-
-/*-- Beacon ---------------------------------------*/
-struct wlan_fr_beacon {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u64 *ts;
-	u16 *bcn_int;
-	u16 *cap_info;
-	/*-- info elements ----------*/
-	struct wlan_ie_fh_parms *fh_parms;
-	struct wlan_ie_ds_parms *ds_parms;
-	struct wlan_ie_cf_parms *cf_parms;
-};
-
-/*-- IBSS ATIM ------------------------------------*/
-struct wlan_fr_ibssatim {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-
-	/*-- fixed fields -----------*/
-	/*-- info elements ----------*/
-
-	/* this frame type has a null body */
-
-};
-
-/*-- Disassociation -------------------------------*/
-struct wlan_fr_disassoc {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *reason;
-
-	/*-- info elements ----------*/
-
-};
-
-/*-- Association Request --------------------------*/
-struct wlan_fr_assocreq {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *cap_info;
-	u16 *listen_int;
-	/*-- info elements ----------*/
-
-};
-
-/*-- Association Response -------------------------*/
-struct wlan_fr_assocresp {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *cap_info;
-	u16 *status;
-	u16 *aid;
-	/*-- info elements ----------*/
-
-};
-
-/*-- Reassociation Request ------------------------*/
-struct wlan_fr_reassocreq {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *cap_info;
-	u16 *listen_int;
-	u8 *curr_ap;
-	/*-- info elements ----------*/
-
-};
-
-/*-- Reassociation Response -----------------------*/
-struct wlan_fr_reassocresp {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *cap_info;
-	u16 *status;
-	u16 *aid;
-	/*-- info elements ----------*/
-
-};
-
-/*-- Probe Request --------------------------------*/
-struct wlan_fr_probereq {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	/*-- info elements ----------*/
-
-};
-
-/*-- Probe Response -------------------------------*/
-struct wlan_fr_proberesp {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u64 *ts;
-	u16 *bcn_int;
-	u16 *cap_info;
-	/*-- info elements ----------*/
-	struct wlan_ie_fh_parms *fh_parms;
-	struct wlan_ie_ds_parms *ds_parms;
-	struct wlan_ie_cf_parms *cf_parms;
-};
-
-/*-- Authentication -------------------------------*/
-struct wlan_fr_authen {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *auth_alg;
-	u16 *auth_seq;
-	u16 *status;
-	/*-- info elements ----------*/
-	struct wlan_ie_challenge *challenge;
-
-};
-
-/*-- Deauthenication -----------------------------*/
-struct wlan_fr_deauthen {
-	u16 type;
-	u16 len;
-	u8 *buf;
-	struct p80211_hdr *hdr;
-	/* used for target specific data, skb in Linux */
-	void *priv;
-	/*-- fixed fields -----------*/
-	u16 *reason;
-
-	/*-- info elements ----------*/
-
-};
 #endif /* _P80211MGMT_H */
-- 
GitLab


From a37068715956111af1d49105d9e41b4cc8f69ea0 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Wed, 2 Nov 2022 00:36:13 +0000
Subject: [PATCH 0784/4122] staging: r8188eu: convert rtw_setdatarate_cmd to
 correct error semantics

Convert rtw_setdatarate_cmd function to use proper error return codes
rather than _SUCCESS and _FAIL, and a simpler 'return 0;' style. For now,
wrap rtw_enqueue_cmd call and return -EPERM if it fails, as converting
this function makes more sense later on due to its large number of callers.

Also change rtw_wx_set_rate function to pass through the proper error
code rather than just 0 or -1.

Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Link: https://lore.kernel.org/r/20221102003613.971-1-phil@philpotter.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_cmd.c       | 18 +++++++-----------
 drivers/staging/r8188eu/include/rtw_cmd.h    |  2 +-
 drivers/staging/r8188eu/os_dep/ioctl_linux.c |  7 ++-----
 3 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_cmd.c b/drivers/staging/r8188eu/core/rtw_cmd.c
index fdc0c71e28a45..19b2f73d481db 100644
--- a/drivers/staging/r8188eu/core/rtw_cmd.c
+++ b/drivers/staging/r8188eu/core/rtw_cmd.c
@@ -340,33 +340,29 @@ u8 rtw_sitesurvey_cmd(struct adapter  *padapter, struct ndis_802_11_ssid *ssid,
 	return res;
 }
 
-u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset)
+int rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset)
 {
 	struct cmd_obj *ph2c;
 	struct setdatarate_parm *pbsetdataratepara;
 	struct cmd_priv *pcmdpriv = &padapter->cmdpriv;
-	u8	res = _SUCCESS;
 
 	ph2c = kzalloc(sizeof(*ph2c), GFP_ATOMIC);
-	if (!ph2c) {
-		res = _FAIL;
-		goto exit;
-	}
+	if (!ph2c)
+		return -ENOMEM;
 
 	pbsetdataratepara = kzalloc(sizeof(*pbsetdataratepara), GFP_ATOMIC);
 	if (!pbsetdataratepara) {
 		kfree(ph2c);
-		res = _FAIL;
-		goto exit;
+		return -ENOMEM;
 	}
 
 	init_h2fwcmd_w_parm_no_rsp(ph2c, pbsetdataratepara, GEN_CMD_CODE(_SetDataRate));
 	pbsetdataratepara->mac_id = 5;
 	memcpy(pbsetdataratepara->datarates, rateset, NumRates);
-	res = rtw_enqueue_cmd(pcmdpriv, ph2c);
-exit:
+	if (rtw_enqueue_cmd(pcmdpriv, ph2c) == _FAIL)
+		return -EPERM;
 
-	return res;
+	return 0;
 }
 
 void rtw_getbbrfreg_cmdrsp_callback(struct adapter *padapter,  struct cmd_obj *pcmd)
diff --git a/drivers/staging/r8188eu/include/rtw_cmd.h b/drivers/staging/r8188eu/include/rtw_cmd.h
index a740a9a101d88..c330a4435b31a 100644
--- a/drivers/staging/r8188eu/include/rtw_cmd.h
+++ b/drivers/staging/r8188eu/include/rtw_cmd.h
@@ -727,7 +727,7 @@ u8 rtw_clearstakey_cmd(struct adapter *padapter, u8 *psta, u8 entry, u8 enqueue)
 u8 rtw_joinbss_cmd(struct adapter *padapter, struct wlan_network *pnetwork);
 u8 rtw_disassoc_cmd(struct adapter *padapter, u32 deauth_timeout_ms, bool enqueue);
 u8 rtw_setopmode_cmd(struct adapter *padapter, enum ndis_802_11_network_infra networktype);
-u8 rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset);
+int rtw_setdatarate_cmd(struct adapter *padapter, u8 *rateset);
 u8 rtw_setrfintfs_cmd(struct adapter *padapter, u8 mode);
 
 u8 rtw_gettssi_cmd(struct adapter *padapter, u8 offset, u8 *pval);
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index f3b3d74685390..dda48a2a6d0c1 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -1340,7 +1340,7 @@ static int rtw_wx_set_rate(struct net_device *dev,
 			      struct iw_request_info *a,
 			      union iwreq_data *wrqu, char *extra)
 {
-	int i, ret = 0;
+	int i;
 	struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
 	u8 datarates[NumRates];
 	u32	target_rate = wrqu->bitrate.value;
@@ -1408,10 +1408,7 @@ set_rate:
 		}
 	}
 
-	if (rtw_setdatarate_cmd(padapter, datarates) != _SUCCESS)
-		ret = -1;
-
-	return ret;
+	return rtw_setdatarate_cmd(padapter, datarates);
 }
 
 static int rtw_wx_get_rate(struct net_device *dev,
-- 
GitLab


From 4b21d25bf519c9487935a664886956bb18f04f6d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Mon, 24 Oct 2022 23:11:25 +0300
Subject: [PATCH 0785/4122] overflow: Introduce overflows_type() and
 castable_to_type()

Implement a robust overflows_type() macro to test if a variable or
constant value would overflow another variable or type. This can be
used as a constant expression for static_assert() (which requires a
constant expression[1][2]) when used on constant values. This must be
constructed manually, since __builtin_add_overflow() does not produce
a constant expression[3].

Additionally adds castable_to_type(), similar to __same_type(), but for
checking if a constant value would overflow if cast to a given type.

Add unit tests for overflows_type(), __same_type(), and castable_to_type()
to the existing KUnit "overflow" test:

[16:03:33] ================== overflow (21 subtests) ==================
...
[16:03:33] [PASSED] overflows_type_test
[16:03:33] [PASSED] same_type_test
[16:03:33] [PASSED] castable_to_type_test
[16:03:33] ==================== [PASSED] overflow =====================
[16:03:33] ============================================================
[16:03:33] Testing complete. Ran 21 tests: passed: 21
[16:03:33] Elapsed time: 24.022s total, 0.002s configuring, 22.598s building, 0.767s running

[1] https://en.cppreference.com/w/c/language/_Static_assert
[2] C11 standard (ISO/IEC 9899:2011): 6.7.10 Static assertions
[3] https://gcc.gnu.org/onlinedocs/gcc/Integer-Overflow-Builtins.html
    6.56 Built-in Functions to Perform Arithmetic with Overflow Checking
    Built-in Function: bool __builtin_add_overflow (type1 a, type2 b,

Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: Daniel Latypov <dlatypov@google.com>
Cc: Vitor Massaru Iha <vitor@massaru.org>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-hardening@vger.kernel.org
Cc: llvm@lists.linux.dev
Co-developed-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Gwan-gyeong Mun <gwan-gyeong.mun@intel.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221024201125.1416422-1-gwan-gyeong.mun@intel.com
---
 drivers/gpu/drm/i915/i915_user_extensions.c |   2 +-
 drivers/gpu/drm/i915/i915_utils.h           |   4 -
 include/linux/compiler.h                    |   1 +
 include/linux/overflow.h                    |  47 +++
 lib/Makefile                                |   1 +
 lib/overflow_kunit.c                        | 381 ++++++++++++++++++++
 6 files changed, 431 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
index c822d0aafd2dc..e3f808372c47b 100644
--- a/drivers/gpu/drm/i915/i915_user_extensions.c
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext,
 			return err;
 
 		if (get_user(next, &ext->next_extension) ||
-		    overflows_type(next, ext))
+		    overflows_type(next, uintptr_t))
 			return -EFAULT;
 
 		ext = u64_to_user_ptr(next);
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 6c14d13364bf7..67a66d4d5c70c 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -111,10 +111,6 @@ bool i915_error_injected(void);
 #define range_overflows_end_t(type, start, size, max) \
 	range_overflows_end((type)(start), (type)(size), (type)(max))
 
-/* Note we don't consider signbits :| */
-#define overflows_type(x, T) \
-	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
-
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index 973a1bfd7ef53..947a60b801dbd 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -236,6 +236,7 @@ static inline void *offset_to_ptr(const int *off)
  * bool and also pointer types.
  */
 #define is_signed_type(type) (((type)(-1)) < (__force type)1)
+#define is_unsigned_type(type) (!is_signed_type(type))
 
 /*
  * This is needed in functions which generate the stack canary, see
diff --git a/include/linux/overflow.h b/include/linux/overflow.h
index 1d3be1a2204c3..0e33b5cbdb9f6 100644
--- a/include/linux/overflow.h
+++ b/include/linux/overflow.h
@@ -128,6 +128,53 @@ static inline bool __must_check __must_check_overflow(bool overflow)
 	(*_d >> _to_shift) != _a);					\
 }))
 
+#define __overflows_type_constexpr(x, T) (			\
+	is_unsigned_type(typeof(x)) ?				\
+		(x) > type_max(typeof(T)) :			\
+	is_unsigned_type(typeof(T)) ?				\
+		(x) < 0 || (x) > type_max(typeof(T)) :		\
+	(x) < type_min(typeof(T)) || (x) > type_max(typeof(T)))
+
+#define __overflows_type(x, T)		({	\
+	typeof(T) v = 0;			\
+	check_add_overflow((x), v, &v);		\
+})
+
+/**
+ * overflows_type - helper for checking the overflows between value, variables,
+ *		    or data type
+ *
+ * @n: source constant value or variable to be checked
+ * @T: destination variable or data type proposed to store @x
+ *
+ * Compares the @x expression for whether or not it can safely fit in
+ * the storage of the type in @T. @x and @T can have different types.
+ * If @x is a constant expression, this will also resolve to a constant
+ * expression.
+ *
+ * Returns: true if overflow can occur, false otherwise.
+ */
+#define overflows_type(n, T)					\
+	__builtin_choose_expr(__is_constexpr(n),		\
+			      __overflows_type_constexpr(n, T),	\
+			      __overflows_type(n, T))
+
+/**
+ * castable_to_type - like __same_type(), but also allows for casted literals
+ *
+ * @n: variable or constant value
+ * @T: variable or data type
+ *
+ * Unlike the __same_type() macro, this allows a constant value as the
+ * first argument. If this value would not overflow into an assignment
+ * of the second argument's type, it returns true. Otherwise, this falls
+ * back to __same_type().
+ */
+#define castable_to_type(n, T)						\
+	__builtin_choose_expr(__is_constexpr(n),			\
+			      !__overflows_type_constexpr(n, T),	\
+			      __same_type(n, T))
+
 /**
  * size_mul() - Calculate size_t multiplication with saturation at SIZE_MAX
  * @factor1: first factor
diff --git a/lib/Makefile b/lib/Makefile
index 77c7951c8cf09..322178b9f7fbc 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -374,6 +374,7 @@ obj-$(CONFIG_CMDLINE_KUNIT_TEST) += cmdline_kunit.o
 obj-$(CONFIG_SLUB_KUNIT_TEST) += slub_kunit.o
 obj-$(CONFIG_MEMCPY_KUNIT_TEST) += memcpy_kunit.o
 obj-$(CONFIG_IS_SIGNED_TYPE_KUNIT_TEST) += is_signed_type_kunit.o
+CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-of-range-compare)
 obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
 obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
diff --git a/lib/overflow_kunit.c b/lib/overflow_kunit.c
index b8556a2e7bb1d..dcd3ba102db69 100644
--- a/lib/overflow_kunit.c
+++ b/lib/overflow_kunit.c
@@ -736,6 +736,384 @@ static void overflow_size_helpers_test(struct kunit *test)
 #undef check_one_size_helper
 }
 
+static void overflows_type_test(struct kunit *test)
+{
+	int count = 0;
+	unsigned int var;
+
+#define __TEST_OVERFLOWS_TYPE(func, arg1, arg2, of)	do {		\
+	bool __of = func(arg1, arg2);					\
+	KUNIT_EXPECT_EQ_MSG(test, __of, of,				\
+		"expected " #func "(" #arg1 ", " #arg2 " to%s overflow\n",\
+		of ? "" : " not");					\
+	count++;							\
+} while (0)
+
+/* Args are: first type, second type, value, overflow expected */
+#define TEST_OVERFLOWS_TYPE(__t1, __t2, v, of) do {			\
+	__t1 t1 = (v);							\
+	__t2 t2;							\
+	__TEST_OVERFLOWS_TYPE(__overflows_type, t1, t2, of);		\
+	__TEST_OVERFLOWS_TYPE(__overflows_type, t1, __t2, of);		\
+	__TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, t2, of);	\
+	__TEST_OVERFLOWS_TYPE(__overflows_type_constexpr, t1, __t2, of);\
+} while (0)
+
+	TEST_OVERFLOWS_TYPE(u8, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u8, u16, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u8, s8, U8_MAX, true);
+	TEST_OVERFLOWS_TYPE(u8, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u8, s8, (u8)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u8, s16, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, u8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, u8, -1, true);
+	TEST_OVERFLOWS_TYPE(s8, u8, S8_MIN, true);
+	TEST_OVERFLOWS_TYPE(s8, u16, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, u16, -1, true);
+	TEST_OVERFLOWS_TYPE(s8, u16, S8_MIN, true);
+	TEST_OVERFLOWS_TYPE(s8, u32, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, u32, -1, true);
+	TEST_OVERFLOWS_TYPE(s8, u32, S8_MIN, true);
+#if BITS_PER_LONG == 64
+	TEST_OVERFLOWS_TYPE(s8, u64, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, u64, -1, true);
+	TEST_OVERFLOWS_TYPE(s8, u64, S8_MIN, true);
+#endif
+	TEST_OVERFLOWS_TYPE(s8, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, s8, S8_MIN, false);
+	TEST_OVERFLOWS_TYPE(s8, s16, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s8, s16, S8_MIN, false);
+	TEST_OVERFLOWS_TYPE(u16, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u16, u8, (u16)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u16, u8, U16_MAX, true);
+	TEST_OVERFLOWS_TYPE(u16, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u16, s8, (u16)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u16, s8, U16_MAX, true);
+	TEST_OVERFLOWS_TYPE(u16, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u16, s16, (u16)S16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u16, s16, U16_MAX, true);
+	TEST_OVERFLOWS_TYPE(u16, u32, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u16, s32, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, u8, (s16)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s16, u8, -1, true);
+	TEST_OVERFLOWS_TYPE(s16, u8, S16_MIN, true);
+	TEST_OVERFLOWS_TYPE(s16, u16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, u16, -1, true);
+	TEST_OVERFLOWS_TYPE(s16, u16, S16_MIN, true);
+	TEST_OVERFLOWS_TYPE(s16, u32, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, u32, -1, true);
+	TEST_OVERFLOWS_TYPE(s16, u32, S16_MIN, true);
+#if BITS_PER_LONG == 64
+	TEST_OVERFLOWS_TYPE(s16, u64, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, u64, -1, true);
+	TEST_OVERFLOWS_TYPE(s16, u64, S16_MIN, true);
+#endif
+	TEST_OVERFLOWS_TYPE(s16, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, s8, S8_MIN, false);
+	TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s16, s8, (s16)S8_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s16, s8, S16_MAX, true);
+	TEST_OVERFLOWS_TYPE(s16, s8, S16_MIN, true);
+	TEST_OVERFLOWS_TYPE(s16, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, s16, S16_MIN, false);
+	TEST_OVERFLOWS_TYPE(s16, s32, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s16, s32, S16_MIN, false);
+	TEST_OVERFLOWS_TYPE(u32, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, u8, (u32)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u32, u8, U32_MAX, true);
+	TEST_OVERFLOWS_TYPE(u32, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, s8, (u32)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u32, s8, U32_MAX, true);
+	TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, u16, U16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u32, u16, U32_MAX, true);
+	TEST_OVERFLOWS_TYPE(u32, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, s16, (u32)S16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u32, s16, U32_MAX, true);
+	TEST_OVERFLOWS_TYPE(u32, u32, U32_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, s32, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, s32, U32_MAX, true);
+	TEST_OVERFLOWS_TYPE(u32, s32, (u32)S32_MAX + 1, true);
+#if BITS_PER_LONG == 64
+	TEST_OVERFLOWS_TYPE(u32, u64, U32_MAX, false);
+	TEST_OVERFLOWS_TYPE(u32, s64, U32_MAX, false);
+#endif
+	TEST_OVERFLOWS_TYPE(s32, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, u8, (s32)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true);
+	TEST_OVERFLOWS_TYPE(s32, u8, -1, true);
+	TEST_OVERFLOWS_TYPE(s32, u8, S32_MIN, true);
+	TEST_OVERFLOWS_TYPE(s32, u16, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, u16, (s32)U16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s32, u16, S32_MAX, true);
+	TEST_OVERFLOWS_TYPE(s32, u16, -1, true);
+	TEST_OVERFLOWS_TYPE(s32, u16, S32_MIN, true);
+	TEST_OVERFLOWS_TYPE(s32, u32, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, u32, -1, true);
+	TEST_OVERFLOWS_TYPE(s32, u32, S32_MIN, true);
+#if BITS_PER_LONG == 64
+	TEST_OVERFLOWS_TYPE(s32, u64, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, u64, -1, true);
+	TEST_OVERFLOWS_TYPE(s32, u64, S32_MIN, true);
+#endif
+	TEST_OVERFLOWS_TYPE(s32, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, s8, S8_MIN, false);
+	TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s32, s8, (s32)S8_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s32, s8, S32_MAX, true);
+	TEST_OVERFLOWS_TYPE(s32, s8, S32_MIN, true);
+	TEST_OVERFLOWS_TYPE(s32, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, s16, S16_MIN, false);
+	TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s32, s16, (s32)S16_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s32, s16, S32_MAX, true);
+	TEST_OVERFLOWS_TYPE(s32, s16, S32_MIN, true);
+	TEST_OVERFLOWS_TYPE(s32, s32, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, s32, S32_MIN, false);
+#if BITS_PER_LONG == 64
+	TEST_OVERFLOWS_TYPE(s32, s64, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s32, s64, S32_MIN, false);
+	TEST_OVERFLOWS_TYPE(u64, u8, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, u8, (u64)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, u16, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, u16, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, u16, (u64)U16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, u32, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, u32, U32_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, u32, (u64)U32_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, u64, U64_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, s8, (u64)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, s8, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, s16, (u64)S16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, s16, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, s32, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, s32, (u64)S32_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(u64, s32, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, s64, S64_MAX, false);
+	TEST_OVERFLOWS_TYPE(u64, s64, U64_MAX, true);
+	TEST_OVERFLOWS_TYPE(u64, s64, (u64)S64_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, u8, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, u8, S64_MIN, true);
+	TEST_OVERFLOWS_TYPE(s64, u8, -1, true);
+	TEST_OVERFLOWS_TYPE(s64, u8, U8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, u8, (s64)U8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, u16, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, u16, S64_MIN, true);
+	TEST_OVERFLOWS_TYPE(s64, u16, -1, true);
+	TEST_OVERFLOWS_TYPE(s64, u16, U16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, u16, (s64)U16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, u32, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, u32, S64_MIN, true);
+	TEST_OVERFLOWS_TYPE(s64, u32, -1, true);
+	TEST_OVERFLOWS_TYPE(s64, u32, U32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, u32, (s64)U32_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, u64, S64_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, u64, S64_MIN, true);
+	TEST_OVERFLOWS_TYPE(s64, u64, -1, true);
+	TEST_OVERFLOWS_TYPE(s64, s8, S8_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, s8, S8_MIN, false);
+	TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s8, (s64)S8_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s8, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, s16, S16_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, s16, S16_MIN, false);
+	TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s16, (s64)S16_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s16, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, s32, S32_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, s32, S32_MIN, false);
+	TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MAX + 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s32, (s64)S32_MIN - 1, true);
+	TEST_OVERFLOWS_TYPE(s64, s32, S64_MAX, true);
+	TEST_OVERFLOWS_TYPE(s64, s64, S64_MAX, false);
+	TEST_OVERFLOWS_TYPE(s64, s64, S64_MIN, false);
+#endif
+
+	/* Check for macro side-effects. */
+	var = INT_MAX - 1;
+	__TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false);
+	__TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, false);
+	__TEST_OVERFLOWS_TYPE(__overflows_type, var++, int, true);
+	var = INT_MAX - 1;
+	__TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false);
+	__TEST_OVERFLOWS_TYPE(overflows_type, var++, int, false);
+	__TEST_OVERFLOWS_TYPE(overflows_type, var++, int, true);
+
+	kunit_info(test, "%d overflows_type() tests finished\n", count);
+#undef TEST_OVERFLOWS_TYPE
+#undef __TEST_OVERFLOWS_TYPE
+}
+
+static void same_type_test(struct kunit *test)
+{
+	int count = 0;
+	int var;
+
+#define TEST_SAME_TYPE(t1, t2, same)			do {	\
+	typeof(t1) __t1h = type_max(t1);			\
+	typeof(t1) __t1l = type_min(t1);			\
+	typeof(t2) __t2h = type_max(t2);			\
+	typeof(t2) __t2l = type_min(t2);			\
+	KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1h));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(t1, __t1l));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(__t1h, t1));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(__t1l, t1));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2h));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(t2, __t2l));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(__t2h, t2));	\
+	KUNIT_EXPECT_EQ(test, true, __same_type(__t2l, t2));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(t1, t2));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1h));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(t2, __t1l));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(__t1h, t2));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(__t1l, t2));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2h));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(t1, __t2l));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(__t2h, t1));	\
+	KUNIT_EXPECT_EQ(test, same, __same_type(__t2l, t1));	\
+} while (0)
+
+#if BITS_PER_LONG == 64
+# define TEST_SAME_TYPE64(base, t, m)	TEST_SAME_TYPE(base, t, m)
+#else
+# define TEST_SAME_TYPE64(base, t, m)	do { } while (0)
+#endif
+
+#define TEST_TYPE_SETS(base, mu8, mu16, mu32, ms8, ms16, ms32, mu64, ms64) \
+do {									\
+	TEST_SAME_TYPE(base,  u8,  mu8);				\
+	TEST_SAME_TYPE(base, u16, mu16);				\
+	TEST_SAME_TYPE(base, u32, mu32);				\
+	TEST_SAME_TYPE(base,  s8,  ms8);				\
+	TEST_SAME_TYPE(base, s16, ms16);				\
+	TEST_SAME_TYPE(base, s32, ms32);				\
+	TEST_SAME_TYPE64(base, u64, mu64);				\
+	TEST_SAME_TYPE64(base, s64, ms64);				\
+} while (0)
+
+	TEST_TYPE_SETS(u8,   true, false, false, false, false, false, false, false);
+	TEST_TYPE_SETS(u16, false,  true, false, false, false, false, false, false);
+	TEST_TYPE_SETS(u32, false, false,  true, false, false, false, false, false);
+	TEST_TYPE_SETS(s8,  false, false, false,  true, false, false, false, false);
+	TEST_TYPE_SETS(s16, false, false, false, false,  true, false, false, false);
+	TEST_TYPE_SETS(s32, false, false, false, false, false,  true, false, false);
+#if BITS_PER_LONG == 64
+	TEST_TYPE_SETS(u64, false, false, false, false, false, false,  true, false);
+	TEST_TYPE_SETS(s64, false, false, false, false, false, false, false,  true);
+#endif
+
+	/* Check for macro side-effects. */
+	var = 4;
+	KUNIT_EXPECT_EQ(test, var, 4);
+	KUNIT_EXPECT_TRUE(test, __same_type(var++, int));
+	KUNIT_EXPECT_EQ(test, var, 4);
+	KUNIT_EXPECT_TRUE(test, __same_type(int, var++));
+	KUNIT_EXPECT_EQ(test, var, 4);
+	KUNIT_EXPECT_TRUE(test, __same_type(var++, var++));
+	KUNIT_EXPECT_EQ(test, var, 4);
+
+	kunit_info(test, "%d __same_type() tests finished\n", count);
+
+#undef TEST_TYPE_SETS
+#undef TEST_SAME_TYPE64
+#undef TEST_SAME_TYPE
+}
+
+static void castable_to_type_test(struct kunit *test)
+{
+	int count = 0;
+
+#define TEST_CASTABLE_TO_TYPE(arg1, arg2, pass)	do {	\
+	bool __pass = castable_to_type(arg1, arg2);		\
+	KUNIT_EXPECT_EQ_MSG(test, __pass, pass,			\
+		"expected castable_to_type(" #arg1 ", " #arg2 ") to%s pass\n",\
+		pass ? "" : " not");				\
+	count++;						\
+} while (0)
+
+	TEST_CASTABLE_TO_TYPE(16, u8, true);
+	TEST_CASTABLE_TO_TYPE(16, u16, true);
+	TEST_CASTABLE_TO_TYPE(16, u32, true);
+	TEST_CASTABLE_TO_TYPE(16, s8, true);
+	TEST_CASTABLE_TO_TYPE(16, s16, true);
+	TEST_CASTABLE_TO_TYPE(16, s32, true);
+	TEST_CASTABLE_TO_TYPE(-16, s8, true);
+	TEST_CASTABLE_TO_TYPE(-16, s16, true);
+	TEST_CASTABLE_TO_TYPE(-16, s32, true);
+#if BITS_PER_LONG == 64
+	TEST_CASTABLE_TO_TYPE(16, u64, true);
+	TEST_CASTABLE_TO_TYPE(-16, s64, true);
+#endif
+
+#define TEST_CASTABLE_TO_TYPE_VAR(width)	do {				\
+	u ## width u ## width ## var = 0;					\
+	s ## width s ## width ## var = 0;					\
+										\
+	/* Constant expressions that fit types. */				\
+	TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(type_max(u ## width), u ## width ## var, true);	\
+	TEST_CASTABLE_TO_TYPE(type_min(u ## width), u ## width ## var, true);	\
+	TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(type_min(s ## width), s ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(type_max(s ## width), s ## width ## var, true);	\
+	TEST_CASTABLE_TO_TYPE(type_min(u ## width), s ## width ## var, true);	\
+	/* Constant expressions that do not fit types. */			\
+	TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width, false);		\
+	TEST_CASTABLE_TO_TYPE(type_max(u ## width), s ## width ## var, false);	\
+	TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width, false);		\
+	TEST_CASTABLE_TO_TYPE(type_min(s ## width), u ## width ## var, false);	\
+	/* Non-constant expression with mismatched type. */			\
+	TEST_CASTABLE_TO_TYPE(s ## width ## var, u ## width, false);		\
+	TEST_CASTABLE_TO_TYPE(u ## width ## var, s ## width, false);		\
+} while (0)
+
+#define TEST_CASTABLE_TO_TYPE_RANGE(width)	do {				\
+	unsigned long big = U ## width ## _MAX;					\
+	signed long small = S ## width ## _MIN;					\
+	u ## width u ## width ## var = 0;					\
+	s ## width s ## width ## var = 0;					\
+										\
+	/* Constant expression in range. */					\
+	TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(U ## width ## _MAX, u ## width ## var, true);	\
+	TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width, true);		\
+	TEST_CASTABLE_TO_TYPE(S ## width ## _MIN, s ## width ## var, true);	\
+	/* Constant expression out of range. */					\
+	TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width, false); \
+	TEST_CASTABLE_TO_TYPE((unsigned long)U ## width ## _MAX + 1, u ## width ## var, false); \
+	TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width, false); \
+	TEST_CASTABLE_TO_TYPE((signed long)S ## width ## _MIN - 1, s ## width ## var, false); \
+	/* Non-constant expression with mismatched type. */			\
+	TEST_CASTABLE_TO_TYPE(big, u ## width, false);				\
+	TEST_CASTABLE_TO_TYPE(big, u ## width ## var, false);			\
+	TEST_CASTABLE_TO_TYPE(small, s ## width, false);			\
+	TEST_CASTABLE_TO_TYPE(small, s ## width ## var, false);			\
+} while (0)
+
+	TEST_CASTABLE_TO_TYPE_VAR(8);
+	TEST_CASTABLE_TO_TYPE_VAR(16);
+	TEST_CASTABLE_TO_TYPE_VAR(32);
+#if BITS_PER_LONG == 64
+	TEST_CASTABLE_TO_TYPE_VAR(64);
+#endif
+
+	TEST_CASTABLE_TO_TYPE_RANGE(8);
+	TEST_CASTABLE_TO_TYPE_RANGE(16);
+#if BITS_PER_LONG == 64
+	TEST_CASTABLE_TO_TYPE_RANGE(32);
+#endif
+	kunit_info(test, "%d castable_to_type() tests finished\n", count);
+
+#undef TEST_CASTABLE_TO_TYPE_RANGE
+#undef TEST_CASTABLE_TO_TYPE_VAR
+#undef TEST_CASTABLE_TO_TYPE
+}
+
 static struct kunit_case overflow_test_cases[] = {
 	KUNIT_CASE(u8_u8__u8_overflow_test),
 	KUNIT_CASE(s8_s8__s8_overflow_test),
@@ -755,6 +1133,9 @@ static struct kunit_case overflow_test_cases[] = {
 	KUNIT_CASE(shift_nonsense_test),
 	KUNIT_CASE(overflow_allocation_test),
 	KUNIT_CASE(overflow_size_helpers_test),
+	KUNIT_CASE(overflows_type_test),
+	KUNIT_CASE(same_type_test),
+	KUNIT_CASE(castable_to_type_test),
 	{}
 };
 
-- 
GitLab


From a072f249b1b3a457196c83df622e3aa376b1f8df Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Tue, 1 Nov 2022 14:16:52 +0000
Subject: [PATCH 0786/4122] dt-bindings: i2c: mv64xxx: Add F1C100s compatible
 string

The I2C controller IP used in the Allwinner F1C100s series of SoCs is
compatible with the ones used in the other Allwinner SoCs.

Add an F1C100s specific compatible string to the list of existing names.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
index 93c164aa00daf..984fc1ed3ec6a 100644
--- a/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/marvell,mv64xxx-i2c.yaml
@@ -19,6 +19,7 @@ properties:
       - const: allwinner,sun6i-a31-i2c
       - items:
           - enum:
+              - allwinner,suniv-f1c100s-i2c
               - allwinner,sun8i-a23-i2c
               - allwinner,sun8i-a83t-i2c
               - allwinner,sun8i-v536-i2c
-- 
GitLab


From 52951ea193ad3b77c433497425a1049520fd6f22 Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Tue, 1 Nov 2022 16:07:27 +0800
Subject: [PATCH 0787/4122] i2c: hisi: Add initial device tree support

The HiSilicon I2C controller can be used on embedded platform, which
boot from devicetree.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Acked-by: Yicong Yang <yangyicong@hisilicon.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/Kconfig    | 2 +-
 drivers/i2c/busses/i2c-hisi.c | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/Kconfig b/drivers/i2c/busses/Kconfig
index e50f9603d189e..a7bfddf08fa7b 100644
--- a/drivers/i2c/busses/Kconfig
+++ b/drivers/i2c/busses/Kconfig
@@ -673,7 +673,7 @@ config I2C_HIGHLANDER
 
 config I2C_HISI
 	tristate "HiSilicon I2C controller"
-	depends on (ARM64 && ACPI) || COMPILE_TEST
+	depends on ARM64 || COMPILE_TEST
 	help
 	  Say Y here if you want to have Hisilicon I2C controller support
 	  available on the Kunpeng Server.
diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c
index 76c3d8f6fc3c6..bcc97e4fcb654 100644
--- a/drivers/i2c/busses/i2c-hisi.c
+++ b/drivers/i2c/busses/i2c-hisi.c
@@ -489,11 +489,18 @@ static const struct acpi_device_id hisi_i2c_acpi_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, hisi_i2c_acpi_ids);
 
+static const struct of_device_id hisi_i2c_dts_ids[] = {
+	{ .compatible = "hisilicon,ascend910-i2c", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, hisi_i2c_dts_ids);
+
 static struct platform_driver hisi_i2c_driver = {
 	.probe		= hisi_i2c_probe,
 	.driver		= {
 		.name	= "hisi-i2c",
 		.acpi_match_table = hisi_i2c_acpi_ids,
+		.of_match_table = hisi_i2c_dts_ids,
 	},
 };
 module_platform_driver(hisi_i2c_driver);
-- 
GitLab


From e77f7ba726cc0c9b1c62b295d2aac42c3a18ebd1 Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Tue, 1 Nov 2022 16:07:28 +0800
Subject: [PATCH 0788/4122] dt-bindings: i2c: add entry for
 hisilicon,ascend910-i2c

Add the new compatible for HiSilicon i2c.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 .../bindings/i2c/hisilicon,ascend910-i2c.yaml | 73 +++++++++++++++++++
 MAINTAINERS                                   |  1 +
 2 files changed, 74 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml

diff --git a/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml
new file mode 100644
index 0000000000000..7d7a8de7bcd89
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/hisilicon,ascend910-i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: HiSilicon common I2C controller
+
+maintainers:
+  - Yicong Yang <yangyicong@hisilicon.com>
+
+description:
+  The HiSilicon common I2C controller can be used for many different
+  types of SoC such as Huawei Ascend AI series chips.
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+
+properties:
+  compatible:
+    const: hisilicon,ascend910-i2c
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-frequency:
+    default: 400000
+
+  i2c-sda-falling-time-ns:
+    default: 343
+
+  i2c-scl-falling-time-ns:
+    default: 203
+
+  i2c-sda-hold-time-ns:
+    default: 830
+
+  i2c-scl-rising-time-ns:
+    default: 365
+
+  i2c-digital-filter-width-ns:
+    default: 0
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    i2c@38b0000 {
+      compatible = "hisilicon,ascend910-i2c";
+      reg = <0x38b0000 0x10000>;
+      interrupts = <GIC_SPI 120 IRQ_TYPE_LEVEL_HIGH>;
+      i2c-sda-falling-time-ns = <56>;
+      i2c-scl-falling-time-ns = <56>;
+      i2c-sda-hold-time-ns = <56>;
+      i2c-scl-rising-time-ns = <56>;
+      i2c-digital-filter;
+      i2c-digital-filter-width-ns = <0x0>;
+      clocks = <&alg_clk>;
+      clock-frequency = <400000>;
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 379945f82a643..1d09b36b9b9c8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9214,6 +9214,7 @@ M:	Yicong Yang <yangyicong@hisilicon.com>
 L:	linux-i2c@vger.kernel.org
 S:	Maintained
 W:	https://www.hisilicon.com
+F:	Documentation/devicetree/bindings/i2c/hisilicon,ascend910-i2c.yaml
 F:	drivers/i2c/busses/i2c-hisi.c
 
 HISILICON LPC BUS DRIVER
-- 
GitLab


From 5fd8c2d3de3dd3cc6d36a0c7a08e44cd5bf173e6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 22 Oct 2022 11:29:48 -0700
Subject: [PATCH 0789/4122] tty: Move sysctl setup into "core" tty logic

In preparation for adding another sysctl to the tty subsystem, move the
tty setup code into the "core" tty code, which contains tty_init() itself.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221022182949.2684794-1-keescook@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty.h       |  2 +-
 drivers/tty/tty_io.c    | 34 ++++++++++++++++++++++++++++++++--
 drivers/tty/tty_ldisc.c | 38 +-------------------------------------
 3 files changed, 34 insertions(+), 40 deletions(-)

diff --git a/drivers/tty/tty.h b/drivers/tty/tty.h
index 1c08c9b67b16c..f45cd683c02ea 100644
--- a/drivers/tty/tty.h
+++ b/drivers/tty/tty.h
@@ -93,7 +93,7 @@ void tty_ldisc_release(struct tty_struct *tty);
 int __must_check tty_ldisc_init(struct tty_struct *tty);
 void tty_ldisc_deinit(struct tty_struct *tty);
 
-void tty_sysctl_init(void);
+extern int tty_ldisc_autoload;
 
 /* tty_audit.c */
 #ifdef CONFIG_AUDIT
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index de06c3c2ff70a..fe77a3d41326d 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3572,13 +3572,44 @@ void console_sysfs_notify(void)
 		sysfs_notify(&consdev->kobj, NULL, "active");
 }
 
+static struct ctl_table tty_table[] = {
+	{
+		.procname	= "ldisc_autoload",
+		.data		= &tty_ldisc_autoload,
+		.maxlen		= sizeof(tty_ldisc_autoload),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static struct ctl_table tty_dir_table[] = {
+	{
+		.procname	= "tty",
+		.mode		= 0555,
+		.child		= tty_table,
+	},
+	{ }
+};
+
+static struct ctl_table tty_root_table[] = {
+	{
+		.procname	= "dev",
+		.mode		= 0555,
+		.child		= tty_dir_table,
+	},
+	{ }
+};
+
 /*
  * Ok, now we can initialize the rest of the tty devices and can count
  * on memory allocations, interrupts etc..
  */
 int __init tty_init(void)
 {
-	tty_sysctl_init();
+	register_sysctl_table(tty_root_table);
 	cdev_init(&tty_cdev, &tty_fops);
 	if (cdev_add(&tty_cdev, MKDEV(TTYAUX_MAJOR, 0), 1) ||
 	    register_chrdev_region(MKDEV(TTYAUX_MAJOR, 0), 1, "/dev/tty") < 0)
@@ -3600,4 +3631,3 @@ int __init tty_init(void)
 #endif
 	return 0;
 }
-
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 776d8a62f77cc..e758f44729e7e 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -117,7 +117,7 @@ static void put_ldops(struct tty_ldisc_ops *ldops)
 	raw_spin_unlock_irqrestore(&tty_ldiscs_lock, flags);
 }
 
-static int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD);
+int tty_ldisc_autoload = IS_BUILTIN(CONFIG_LDISC_AUTOLOAD);
 
 /**
  * tty_ldisc_get	-	take a reference to an ldisc
@@ -817,39 +817,3 @@ void tty_ldisc_deinit(struct tty_struct *tty)
 		tty_ldisc_put(tty->ldisc);
 	tty->ldisc = NULL;
 }
-
-static struct ctl_table tty_table[] = {
-	{
-		.procname	= "ldisc_autoload",
-		.data		= &tty_ldisc_autoload,
-		.maxlen		= sizeof(tty_ldisc_autoload),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{ }
-};
-
-static struct ctl_table tty_dir_table[] = {
-	{
-		.procname	= "tty",
-		.mode		= 0555,
-		.child		= tty_table,
-	},
-	{ }
-};
-
-static struct ctl_table tty_root_table[] = {
-	{
-		.procname	= "dev",
-		.mode		= 0555,
-		.child		= tty_dir_table,
-	},
-	{ }
-};
-
-void tty_sysctl_init(void)
-{
-	register_sysctl_table(tty_root_table);
-}
-- 
GitLab


From 83efeeeb3d04b22aaed1df99bc70a48fe9d22c4d Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 22 Oct 2022 11:29:49 -0700
Subject: [PATCH 0790/4122] tty: Allow TIOCSTI to be disabled

TIOCSTI continues its long history of being used in privilege escalation
attacks[1]. Prior attempts to provide a mechanism to disable this have
devolved into discussions around creating full-blown LSMs to provide
arbitrary ioctl filtering, which is hugely over-engineered -- only
TIOCSTI is being used this way. 3 years ago OpenBSD entirely removed
TIOCSTI[2], Android has had it filtered for longer[3], and the tools that
had historically used TIOCSTI either do not need it, are not commonly
built with it, or have had its use removed.

Provide a simple CONFIG and global sysctl to disable this for the system
builders who have wanted this functionality for literally decades now,
much like the ldisc_autoload CONFIG and sysctl.

[1] https://lore.kernel.org/linux-hardening/Y0m9l52AKmw6Yxi1@hostpad
[2] https://undeadly.org/cgi?action=article;sid=20170701132619
[3] https://lore.kernel.org/lkml/CAFJ0LnFGRuEEn1tCLhoki8ZyWrKfktbF+rwwN7WzyC_kBFoQVA@mail.gmail.com/

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Simon Brand <simon.brand@postadigitale.de>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221022182949.2684794-2-keescook@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/Kconfig  | 19 +++++++++++++++++++
 drivers/tty/tty_io.c | 11 +++++++++++
 2 files changed, 30 insertions(+)

diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index cc30ff93e2e47..d35fc068da74e 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -149,6 +149,25 @@ config LEGACY_PTY_COUNT
 	  When not in use, each legacy PTY occupies 12 bytes on 32-bit
 	  architectures and 24 bytes on 64-bit architectures.
 
+config LEGACY_TIOCSTI
+	bool "Allow legacy TIOCSTI usage"
+	default y
+	help
+	  Historically the kernel has allowed TIOCSTI, which will push
+	  characters into a controlling TTY. This continues to be used
+	  as a malicious privilege escalation mechanism, and provides no
+	  meaningful real-world utility any more. Its use is considered
+	  a dangerous legacy operation, and can be disabled on most
+	  systems.
+
+	  Say 'Y here only if you have confirmed that your system's
+	  userspace depends on this functionality to continue operating
+	  normally.
+
+	  This functionality can be changed at runtime with the
+	  dev.tty.legacy_tiocsti sysctl. This configuration option sets
+	  the default value of the sysctl.
+
 config LDISC_AUTOLOAD
 	bool "Automatically load TTY Line Disciplines"
 	default y
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index fe77a3d41326d..a6a16cf986b72 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2268,11 +2268,15 @@ static int tty_fasync(int fd, struct file *filp, int on)
  *  * Called functions take tty_ldiscs_lock
  *  * current->signal->tty check is safe without locks
  */
+static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI);
 static int tiocsti(struct tty_struct *tty, char __user *p)
 {
 	char ch, mbz = 0;
 	struct tty_ldisc *ld;
 
+	if (!tty_legacy_tiocsti)
+		return -EIO;
+
 	if ((current->signal->tty != tty) && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
 	if (get_user(ch, p))
@@ -3573,6 +3577,13 @@ void console_sysfs_notify(void)
 }
 
 static struct ctl_table tty_table[] = {
+	{
+		.procname	= "legacy_tiocsti",
+		.data		= &tty_legacy_tiocsti,
+		.maxlen		= sizeof(tty_legacy_tiocsti),
+		.mode		= 0644,
+		.proc_handler	= proc_dobool,
+	},
 	{
 		.procname	= "ldisc_autoload",
 		.data		= &tty_ldisc_autoload,
-- 
GitLab


From 8275b48b278096edc1e3ea5aa9cf946a10022f79 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 4 Oct 2022 12:49:25 +0200
Subject: [PATCH 0791/4122] tty: serial: introduce transmit helpers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Many serial drivers do the same thing:
* send x_char if set
* keep sending from the xmit circular buffer until either
  - the loop reaches the end of the xmit buffer
  - TX is stopped
  - HW fifo is full
* check for pending characters and:
  - wake up tty writers to fill for more data into xmit buffer
  - stop TX if there is nothing in the xmit buffer

The only differences are:
* how to write the character to the HW fifo
* the check of the end condition:
  - is the HW fifo full?
  - is limit of the written characters reached?

So unify the above into two helpers:
* uart_port_tx_limited() -- it performs the above taking the written
  characters limit into account, and
* uart_port_tx() -- the same as above, except it only checks the HW
  readiness, not the characters limit.

The HW specific operations (as stated as "differences" above) are passed
as arguments to the macros. They are:
* tx_ready -- returns true if HW can accept more data.
* put_char -- write a character to the device.
* tx_done -- when the write loop is done, perform arbitrary action
  before potential invocation of ops->stop_tx() happens.

Note that the above are macros. This means the code is generated in
place and the above 3 arguments are "inlined". I.e. no added penalty by
generating call instructions for every single character. Nor any
indirect calls. (As in some previous versions of this patchset.)

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221004104927.14361-2-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/serial/driver.rst |  3 +
 include/linux/serial_core.h                | 80 ++++++++++++++++++++++
 2 files changed, 83 insertions(+)

diff --git a/Documentation/driver-api/serial/driver.rst b/Documentation/driver-api/serial/driver.rst
index 23c6b956cd90d..98d268555dcc9 100644
--- a/Documentation/driver-api/serial/driver.rst
+++ b/Documentation/driver-api/serial/driver.rst
@@ -78,6 +78,9 @@ Other functions
            uart_get_lsr_info uart_handle_dcd_change uart_handle_cts_change
            uart_try_toggle_sysrq uart_get_console
 
+.. kernel-doc:: include/linux/serial_core.h
+   :identifiers: uart_port_tx_limited uart_port_tx
+
 Other notes
 -----------
 
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index d657f2a42a7b8..dbbc4408bb19a 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -664,6 +664,86 @@ struct uart_driver {
 
 void uart_write_wakeup(struct uart_port *port);
 
+#define __uart_port_tx(uport, ch, tx_ready, put_char, tx_done, for_test,      \
+		for_post)						      \
+({									      \
+	struct uart_port *__port = (uport);				      \
+	struct circ_buf *xmit = &__port->state->xmit;			      \
+	unsigned int pending;						      \
+									      \
+	for (; (for_test) && (tx_ready); (for_post), __port->icount.tx++) {   \
+		if (__port->x_char) {					      \
+			(ch) = __port->x_char;				      \
+			(put_char);					      \
+			__port->x_char = 0;				      \
+			continue;					      \
+		}							      \
+									      \
+		if (uart_circ_empty(xmit) || uart_tx_stopped(__port))	      \
+			break;						      \
+									      \
+		(ch) = xmit->buf[xmit->tail];				      \
+		(put_char);						      \
+		xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE;		      \
+	}								      \
+									      \
+	(tx_done);							      \
+									      \
+	pending = uart_circ_chars_pending(xmit);			      \
+	if (pending < WAKEUP_CHARS) {					      \
+		uart_write_wakeup(__port);				      \
+									      \
+		if (pending == 0)					      \
+			__port->ops->stop_tx(__port);			      \
+	}								      \
+									      \
+	pending;							      \
+})
+
+/**
+ * uart_port_tx_limited -- transmit helper for uart_port with count limiting
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @count: a limit of characters to send
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ * @tx_done: function to call after the loop is done
+ *
+ * This helper transmits characters from the xmit buffer to the hardware using
+ * @put_char(). It does so until @count characters are sent and while @tx_ready
+ * evaluates to true.
+ *
+ * Returns: the number of characters in the xmit buffer when done.
+ *
+ * The expression in macro parameters shall be designed as follows:
+ *  * **tx_ready:** should evaluate to true if the HW can accept more data to
+ *    be sent. This parameter can be %true, which means the HW is always ready.
+ *  * **put_char:** shall write @ch to the device of @port.
+ *  * **tx_done:** when the write loop is done, this can perform arbitrary
+ *    action before potential invocation of ops->stop_tx() happens. If the
+ *    driver does not need to do anything, use e.g. ({}).
+ *
+ * For all of them, @port->lock is held, interrupts are locally disabled and
+ * the expressions must not sleep.
+ */
+#define uart_port_tx_limited(port, ch, count, tx_ready, put_char, tx_done) ({ \
+	unsigned int __count = (count);					      \
+	__uart_port_tx(port, ch, tx_ready, put_char, tx_done, __count,	      \
+			__count--);					      \
+})
+
+/**
+ * uart_port_tx -- transmit helper for uart_port
+ * @port: uart port
+ * @ch: variable to store a character to be written to the HW
+ * @tx_ready: can HW accept more data function
+ * @put_char: function to write a character
+ *
+ * See uart_port_tx_limited() for more details.
+ */
+#define uart_port_tx(port, ch, tx_ready, put_char)			\
+	__uart_port_tx(port, ch, tx_ready, put_char, ({}), true, ({}))
+
 /*
  * Baud rate helpers.
  */
-- 
GitLab


From 2d141e683e9ac7041c0350bb7b5e31f5f02ddbe3 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 4 Oct 2022 12:49:26 +0200
Subject: [PATCH 0792/4122] tty: serial: use uart_port_tx() helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

uart_port_tx() is a new helper to send characters to the device. Use it
in these drivers.

Cc: Tobias Klauser <tklauser@distanz.ch>
Cc: Richard Genoud <richard.genoud@gmail.com>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: Vladimir Zapolskiy <vz@mleia.com>
Cc: Liviu Dudau <liviu.dudau@arm.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: Shawn Guo <shawnguo@kernel.org>
Cc: Sascha Hauer <s.hauer@pengutronix.de>
Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
Cc: Fabio Estevam <festevam@gmail.com>
Cc: NXP Linux Team <linux-imx@nxp.com>
Cc: "Andreas Färber" <afaerber@suse.de>
Cc: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221004104927.14361-3-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_uart.c   | 29 ++++------------------
 drivers/tty/serial/atmel_serial.c  | 28 +++++----------------
 drivers/tty/serial/fsl_lpuart.c    | 30 ++++-------------------
 drivers/tty/serial/lantiq.c        | 36 +++------------------------
 drivers/tty/serial/lpc32xx_hs.c    | 33 +++----------------------
 drivers/tty/serial/mcf.c           | 34 ++++++--------------------
 drivers/tty/serial/mpc52xx_uart.c  | 39 +++---------------------------
 drivers/tty/serial/mps2-uart.c     | 26 +++-----------------
 drivers/tty/serial/mxs-auart.c     | 32 ++++++------------------
 drivers/tty/serial/owl-uart.c      | 32 +++---------------------
 drivers/tty/serial/sa1100.c        | 34 +++-----------------------
 drivers/tty/serial/vt8500_serial.c | 30 +++--------------------
 12 files changed, 60 insertions(+), 323 deletions(-)

diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 82f2790de28d1..316074bb23e9f 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -247,31 +247,12 @@ static void altera_uart_rx_chars(struct uart_port *port)
 
 static void altera_uart_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if (port->x_char) {
-		/* Send special char - probably flow control */
-		altera_uart_writel(port, port->x_char, ALTERA_UART_TXDATA_REG);
-		port->x_char = 0;
-		port->icount.tx++;
-		return;
-	}
-
-	while (altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
-	       ALTERA_UART_STATUS_TRDY_MSK) {
-		if (xmit->head == xmit->tail)
-			break;
-		altera_uart_writel(port, xmit->buf[xmit->tail],
-		       ALTERA_UART_TXDATA_REG);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		altera_uart_stop_tx(port);
+	uart_port_tx(port, ch,
+		altera_uart_readl(port, ALTERA_UART_STATUS_REG) &
+		                ALTERA_UART_STATUS_TRDY_MSK,
+		altera_uart_writel(port, ch, ALTERA_UART_TXDATA_REG));
 }
 
 static irqreturn_t altera_uart_interrupt(int irq, void *data)
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index bd07f79a2df91..a6b4d30c5888d 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -824,30 +824,14 @@ static void atmel_rx_chars(struct uart_port *port)
  */
 static void atmel_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	bool pending;
+	u8 ch;
 
-	if (port->x_char &&
-	    (atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY)) {
-		atmel_uart_write_char(port, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
-		return;
-
-	while (atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY) {
-		atmel_uart_write_char(port, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (!uart_circ_empty(xmit)) {
+	pending = uart_port_tx(port, ch,
+		atmel_uart_readl(port, ATMEL_US_CSR) & ATMEL_US_TXRDY,
+		atmel_uart_write_char(port, ch));
+	if (pending) {
 		/* we still have characters to transmit, so we should continue
 		 * transmitting them when TX is ready, regardless of
 		 * mode or duplexity
diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 67fa113f77d47..d811eda1844e7 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -742,32 +742,12 @@ static int lpuart32_poll_get_char(struct uart_port *port)
 
 static inline void lpuart_transmit_buffer(struct lpuart_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.state->xmit;
-
-	if (sport->port.x_char) {
-		writeb(sport->port.x_char, sport->port.membase + UARTDR);
-		sport->port.icount.tx++;
-		sport->port.x_char = 0;
-		return;
-	}
-
-	if (lpuart_stopped_or_empty(&sport->port)) {
-		lpuart_stop_tx(&sport->port);
-		return;
-	}
-
-	while (!uart_circ_empty(xmit) &&
-		(readb(sport->port.membase + UARTTCFIFO) < sport->txfifo_size)) {
-		writeb(xmit->buf[xmit->tail], sport->port.membase + UARTDR);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&sport->port);
+	struct uart_port *port = &sport->port;
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		lpuart_stop_tx(&sport->port);
+	uart_port_tx(port, ch,
+		readb(port->membase + UARTTCFIFO) < sport->txfifo_size,
+		writeb(ch, port->membase + UARTDR));
 }
 
 static inline void lpuart32_transmit_buffer(struct lpuart_port *sport)
diff --git a/drivers/tty/serial/lantiq.c b/drivers/tty/serial/lantiq.c
index c892f3c7d1abc..a58e9277dfad8 100644
--- a/drivers/tty/serial/lantiq.c
+++ b/drivers/tty/serial/lantiq.c
@@ -95,7 +95,6 @@
 #define ASCFSTAT_TXFFLMASK	0x3F00
 #define ASCFSTAT_TXFREEMASK	0x3F000000
 
-static void lqasc_tx_chars(struct uart_port *port);
 static struct ltq_uart_port *lqasc_port[MAXPORTS];
 static struct uart_driver lqasc_reg;
 
@@ -151,9 +150,12 @@ lqasc_start_tx(struct uart_port *port)
 {
 	unsigned long flags;
 	struct ltq_uart_port *ltq_port = to_ltq_uart_port(port);
+	u8 ch;
 
 	spin_lock_irqsave(&ltq_port->lock, flags);
-	lqasc_tx_chars(port);
+	uart_port_tx(port, ch,
+		lqasc_tx_ready(port),
+		writeb(ch, port->membase + LTQ_ASC_TBUF));
 	spin_unlock_irqrestore(&ltq_port->lock, flags);
 	return;
 }
@@ -226,36 +228,6 @@ lqasc_rx_chars(struct uart_port *port)
 	return 0;
 }
 
-static void
-lqasc_tx_chars(struct uart_port *port)
-{
-	struct circ_buf *xmit = &port->state->xmit;
-	if (uart_tx_stopped(port)) {
-		lqasc_stop_tx(port);
-		return;
-	}
-
-	while (lqasc_tx_ready(port)) {
-		if (port->x_char) {
-			writeb(port->x_char, port->membase + LTQ_ASC_TBUF);
-			port->icount.tx++;
-			port->x_char = 0;
-			continue;
-		}
-
-		if (uart_circ_empty(xmit))
-			break;
-
-		writeb(port->state->xmit.buf[port->state->xmit.tail],
-			port->membase + LTQ_ASC_TBUF);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-}
-
 static irqreturn_t
 lqasc_tx_int(int irq, void *_port)
 {
diff --git a/drivers/tty/serial/lpc32xx_hs.c b/drivers/tty/serial/lpc32xx_hs.c
index ed47f47683383..b38fe4728c264 100644
--- a/drivers/tty/serial/lpc32xx_hs.c
+++ b/drivers/tty/serial/lpc32xx_hs.c
@@ -276,8 +276,6 @@ static void __serial_lpc32xx_rx(struct uart_port *port)
 	tty_flip_buffer_push(tport);
 }
 
-static void serial_lpc32xx_stop_tx(struct uart_port *port);
-
 static bool serial_lpc32xx_tx_ready(struct uart_port *port)
 {
 	u32 level = readl(LPC32XX_HSUART_LEVEL(port->membase));
@@ -287,34 +285,11 @@ static bool serial_lpc32xx_tx_ready(struct uart_port *port)
 
 static void __serial_lpc32xx_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if (port->x_char) {
-		writel((u32)port->x_char, LPC32XX_HSUART_FIFO(port->membase));
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port))
-		goto exit_tx;
-
-	/* Transfer data */
-	while (serial_lpc32xx_tx_ready(port)) {
-		writel((u32) xmit->buf[xmit->tail],
-		       LPC32XX_HSUART_FIFO(port->membase));
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-exit_tx:
-	if (uart_circ_empty(xmit))
-		serial_lpc32xx_stop_tx(port);
+	uart_port_tx(port, ch,
+		serial_lpc32xx_tx_ready(port),
+		writel(ch, LPC32XX_HSUART_FIFO(port->membase)));
 }
 
 static irqreturn_t serial_lpc32xx_interrupt(int irq, void *dev_id)
diff --git a/drivers/tty/serial/mcf.c b/drivers/tty/serial/mcf.c
index b1cd9a76dd93b..3239babe12a4e 100644
--- a/drivers/tty/serial/mcf.c
+++ b/drivers/tty/serial/mcf.c
@@ -327,34 +327,16 @@ static void mcf_rx_chars(struct mcf_uart *pp)
 static void mcf_tx_chars(struct mcf_uart *pp)
 {
 	struct uart_port *port = &pp->port;
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if (port->x_char) {
-		/* Send special char - probably flow control */
-		writeb(port->x_char, port->membase + MCFUART_UTB);
-		port->x_char = 0;
-		port->icount.tx++;
-		return;
-	}
-
-	while (readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY) {
-		if (uart_circ_empty(xmit))
-			break;
-		writeb(xmit->buf[xmit->tail], port->membase + MCFUART_UTB);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE -1);
-		port->icount.tx++;
-	}
+	bool pending;
+	u8 ch;
 
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	pending = uart_port_tx(port, ch,
+		readb(port->membase + MCFUART_USR) & MCFUART_USR_TXREADY,
+		writeb(ch, port->membase + MCFUART_UTB));
 
-	if (uart_circ_empty(xmit)) {
-		mcf_stop_tx(port);
-		/* Disable TX to negate RTS automatically */
-		if (port->rs485.flags & SER_RS485_ENABLED)
-			writeb(MCFUART_UCR_TXDISABLE,
-				port->membase + MCFUART_UCR);
-	}
+	/* Disable TX to negate RTS automatically */
+	if (!pending && (port->rs485.flags & SER_RS485_ENABLED))
+		writeb(MCFUART_UCR_TXDISABLE, port->membase + MCFUART_UCR);
 }
 
 /****************************************************************************/
diff --git a/drivers/tty/serial/mpc52xx_uart.c b/drivers/tty/serial/mpc52xx_uart.c
index 73362d4bc45d3..384ca195e3d5d 100644
--- a/drivers/tty/serial/mpc52xx_uart.c
+++ b/drivers/tty/serial/mpc52xx_uart.c
@@ -1428,42 +1428,11 @@ mpc52xx_uart_int_rx_chars(struct uart_port *port)
 static inline bool
 mpc52xx_uart_int_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-
-	/* Process out of band chars */
-	if (port->x_char) {
-		psc_ops->write_char(port, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return true;
-	}
-
-	/* Nothing to do ? */
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		mpc52xx_uart_stop_tx(port);
-		return false;
-	}
-
-	/* Send chars */
-	while (psc_ops->raw_tx_rdy(port)) {
-		psc_ops->write_char(port, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	/* Wake up */
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	/* Maybe we're done after all */
-	if (uart_circ_empty(xmit)) {
-		mpc52xx_uart_stop_tx(port);
-		return false;
-	}
+	u8 ch;
 
-	return true;
+	return uart_port_tx(port, ch,
+		psc_ops->raw_tx_rdy(port),
+		psc_ops->write_char(port, ch));
 }
 
 static irqreturn_t
diff --git a/drivers/tty/serial/mps2-uart.c b/drivers/tty/serial/mps2-uart.c
index 2e3e6cf168178..860d161fa5948 100644
--- a/drivers/tty/serial/mps2-uart.c
+++ b/drivers/tty/serial/mps2-uart.c
@@ -129,29 +129,11 @@ static void mps2_uart_stop_tx(struct uart_port *port)
 
 static void mps2_uart_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-
-	while (!(mps2_uart_read8(port, UARTn_STATE) & UARTn_STATE_TX_FULL)) {
-		if (port->x_char) {
-			mps2_uart_write8(port, port->x_char, UARTn_DATA);
-			port->x_char = 0;
-			port->icount.tx++;
-			continue;
-		}
-
-		if (uart_circ_empty(xmit) || uart_tx_stopped(port))
-			break;
-
-		mps2_uart_write8(port, xmit->buf[xmit->tail], UARTn_DATA);
-		xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE;
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		mps2_uart_stop_tx(port);
+	uart_port_tx(port, ch,
+		mps2_uart_tx_empty(port),
+		mps2_uart_write8(port, ch, UARTn_DATA));
 }
 
 static void mps2_uart_start_tx(struct uart_port *port)
diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index d21a4f3ef2fe6..ef6e7bb6105ca 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -569,6 +569,8 @@ static int mxs_auart_dma_tx(struct mxs_auart_port *s, int size)
 static void mxs_auart_tx_chars(struct mxs_auart_port *s)
 {
 	struct circ_buf *xmit = &s->port.state->xmit;
+	bool pending;
+	u8 ch;
 
 	if (auart_dma_enabled(s)) {
 		u32 i = 0;
@@ -603,31 +605,13 @@ static void mxs_auart_tx_chars(struct mxs_auart_port *s)
 		return;
 	}
 
-
-	while (!(mxs_read(s, REG_STAT) & AUART_STAT_TXFF)) {
-		if (s->port.x_char) {
-			s->port.icount.tx++;
-			mxs_write(s->port.x_char, s, REG_DATA);
-			s->port.x_char = 0;
-			continue;
-		}
-		if (!uart_circ_empty(xmit) && !uart_tx_stopped(&s->port)) {
-			s->port.icount.tx++;
-			mxs_write(xmit->buf[xmit->tail], s, REG_DATA);
-			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		} else
-			break;
-	}
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&s->port);
-
-	if (uart_circ_empty(&(s->port.state->xmit)))
-		mxs_clr(AUART_INTR_TXIEN, s, REG_INTR);
-	else
+	pending = uart_port_tx(&s->port, ch,
+		!(mxs_read(s, REG_STAT) & AUART_STAT_TXFF),
+		mxs_write(ch, s, REG_DATA));
+	if (pending)
 		mxs_set(AUART_INTR_TXIEN, s, REG_INTR);
-
-	if (uart_tx_stopped(&s->port))
-		mxs_auart_stop_tx(&s->port);
+	else
+		mxs_clr(AUART_INTR_TXIEN, s, REG_INTR);
 }
 
 static void mxs_auart_rx_char(struct mxs_auart_port *s)
diff --git a/drivers/tty/serial/owl-uart.c b/drivers/tty/serial/owl-uart.c
index fde39cc1145db..e99970a9437ff 100644
--- a/drivers/tty/serial/owl-uart.c
+++ b/drivers/tty/serial/owl-uart.c
@@ -181,35 +181,11 @@ static void owl_uart_start_tx(struct uart_port *port)
 
 static void owl_uart_send_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	unsigned int ch;
-
-	if (port->x_char) {
-		while (!(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU))
-			cpu_relax();
-		owl_uart_write(port, port->x_char, OWL_UART_TXDAT);
-		port->icount.tx++;
-		port->x_char = 0;
-	}
-
-	if (uart_tx_stopped(port))
-		return;
-
-	while (!(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU)) {
-		if (uart_circ_empty(xmit))
-			break;
+	u8 ch;
 
-		ch = xmit->buf[xmit->tail];
-		owl_uart_write(port, ch, OWL_UART_TXDAT);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (uart_circ_empty(xmit))
-		owl_uart_stop_tx(port);
+	uart_port_tx(port, ch,
+		!(owl_uart_read(port, OWL_UART_STAT) & OWL_UART_STAT_TFFU),
+		owl_uart_write(port, ch, OWL_UART_TXDAT));
 }
 
 static void owl_uart_receive_chars(struct uart_port *port)
diff --git a/drivers/tty/serial/sa1100.c b/drivers/tty/serial/sa1100.c
index dd9e3253cab44..55107bbc00ceb 100644
--- a/drivers/tty/serial/sa1100.c
+++ b/drivers/tty/serial/sa1100.c
@@ -228,14 +228,7 @@ sa1100_rx_chars(struct sa1100_port *sport)
 
 static void sa1100_tx_chars(struct sa1100_port *sport)
 {
-	struct circ_buf *xmit = &sport->port.state->xmit;
-
-	if (sport->port.x_char) {
-		UART_PUT_CHAR(sport, sport->port.x_char);
-		sport->port.icount.tx++;
-		sport->port.x_char = 0;
-		return;
-	}
+	u8 ch;
 
 	/*
 	 * Check the modem control lines before
@@ -243,28 +236,9 @@ static void sa1100_tx_chars(struct sa1100_port *sport)
 	 */
 	sa1100_mctrl_check(sport);
 
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&sport->port)) {
-		sa1100_stop_tx(&sport->port);
-		return;
-	}
-
-	/*
-	 * Tried using FIFO (not checking TNF) for fifo fill:
-	 * still had the '4 bytes repeated' problem.
-	 */
-	while (UART_GET_UTSR1(sport) & UTSR1_TNF) {
-		UART_PUT_CHAR(sport, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&sport->port);
-
-	if (uart_circ_empty(xmit))
-		sa1100_stop_tx(&sport->port);
+	uart_port_tx(&sport->port, ch,
+			UART_GET_UTSR1(sport) & UTSR1_TNF,
+			UART_PUT_CHAR(sport, ch));
 }
 
 static irqreturn_t sa1100_int(int irq, void *dev_id)
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index 10fbdb09965f3..deedb65131606 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -196,33 +196,11 @@ static unsigned int vt8500_tx_empty(struct uart_port *port)
 
 static void handle_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
+	u8 ch;
 
-	if (port->x_char) {
-		writeb(port->x_char, port->membase + VT8500_TXFIFO);
-		port->icount.tx++;
-		port->x_char = 0;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		vt8500_stop_tx(port);
-		return;
-	}
-
-	while (vt8500_tx_empty(port)) {
-		if (uart_circ_empty(xmit))
-			break;
-
-		writeb(xmit->buf[xmit->tail], port->membase + VT8500_TXFIFO);
-
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (uart_circ_empty(xmit))
-		vt8500_stop_tx(port);
+	uart_port_tx(port, ch,
+		vt8500_tx_empty(port),
+		writeb(ch, port->membase + VT8500_TXFIFO));
 }
 
 static void vt8500_start_tx(struct uart_port *port)
-- 
GitLab


From d11cc8c3c4b65e00e01f20a920c5fa412415204a Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 4 Oct 2022 12:49:27 +0200
Subject: [PATCH 0793/4122] tty: serial: use uart_port_tx_limited()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

uart_port_tx_limited() is a new helper to send characters to the device.
Use it in these drivers.

mux.c also needs to define tx_done(). But I'm not sure if the driver
really wants to wait for all the characters to dismiss from the HW fifo
at this code point. Hence I marked this as FIXME.

Cc: Russell King <linux@armlinux.org.uk>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: bcm-kernel-feedback-list@broadcom.com
Cc: "Pali Rohár" <pali@kernel.org>
Cc: Kevin Cernekee <cernekee@gmail.com>
Cc: Palmer Dabbelt <palmer@dabbelt.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Orson Zhai <orsonzhai@gmail.com>
Cc: Baolin Wang <baolin.wang7@gmail.com>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: Patrice Chotard <patrice.chotard@foss.st.com>
Cc: linux-riscv@lists.infradead.org
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221004104927.14361-4-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/21285.c           | 32 +++----------------
 drivers/tty/serial/altera_jtaguart.c | 36 ++++-----------------
 drivers/tty/serial/amba-pl010.c      | 32 +++----------------
 drivers/tty/serial/apbuart.c         | 34 +++-----------------
 drivers/tty/serial/bcm63xx_uart.c    | 47 ++++++---------------------
 drivers/tty/serial/mux.c             | 45 ++++++++------------------
 drivers/tty/serial/mvebu-uart.c      | 38 +++-------------------
 drivers/tty/serial/omap-serial.c     | 32 +++----------------
 drivers/tty/serial/pxa.c             | 33 +++----------------
 drivers/tty/serial/rp2.c             | 31 ++++--------------
 drivers/tty/serial/serial_txx9.c     | 32 +++----------------
 drivers/tty/serial/sifive.c          | 31 +++---------------
 drivers/tty/serial/sprd_serial.c     | 33 +++----------------
 drivers/tty/serial/st-asc.c          | 48 +++-------------------------
 14 files changed, 85 insertions(+), 419 deletions(-)

diff --git a/drivers/tty/serial/21285.c b/drivers/tty/serial/21285.c
index c7d34823f7155..185462fd959c6 100644
--- a/drivers/tty/serial/21285.c
+++ b/drivers/tty/serial/21285.c
@@ -154,35 +154,13 @@ static irqreturn_t serial21285_rx_chars(int irq, void *dev_id)
 static irqreturn_t serial21285_tx_chars(int irq, void *dev_id)
 {
 	struct uart_port *port = dev_id;
-	struct circ_buf *xmit = &port->state->xmit;
-	int count = 256;
-
-	if (port->x_char) {
-		*CSR_UARTDR = port->x_char;
-		port->icount.tx++;
-		port->x_char = 0;
-		goto out;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		serial21285_stop_tx(port);
-		goto out;
-	}
-
-	do {
-		*CSR_UARTDR = xmit->buf[xmit->tail];
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0 && !(*CSR_UARTFLG & 0x20));
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		serial21285_stop_tx(port);
+	uart_port_tx_limited(port, ch, 256,
+		!(*CSR_UARTFLG & 0x20),
+		*CSR_UARTDR = ch,
+		({}));
 
- out:
 	return IRQ_HANDLED;
 }
 
diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index c2d154d78e545..aa49553fac584 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -146,37 +146,15 @@ static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
 static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp)
 {
 	struct uart_port *port = &pp->port;
-	struct circ_buf *xmit = &port->state->xmit;
-	unsigned int pending, count;
-
-	if (port->x_char) {
-		/* Send special char - probably flow control */
-		writel(port->x_char, port->membase + ALTERA_JTAGUART_DATA_REG);
-		port->x_char = 0;
-		port->icount.tx++;
-		return;
-	}
+	unsigned int count;
+	u8 ch;
 
-	pending = uart_circ_chars_pending(xmit);
-	if (pending > 0) {
-		count = altera_jtaguart_tx_space(port, NULL);
-		if (count > pending)
-			count = pending;
-		if (count > 0) {
-			pending -= count;
-			while (count--) {
-				writel(xmit->buf[xmit->tail],
-				       port->membase + ALTERA_JTAGUART_DATA_REG);
-				xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-				port->icount.tx++;
-			}
-			if (pending < WAKEUP_CHARS)
-				uart_write_wakeup(port);
-		}
-	}
+	count = altera_jtaguart_tx_space(port, NULL);
 
-	if (pending == 0)
-		altera_jtaguart_stop_tx(port);
+	uart_port_tx_limited(port, ch, count,
+		true,
+		writel(ch, port->membase + ALTERA_JTAGUART_DATA_REG),
+		({}));
 }
 
 static irqreturn_t altera_jtaguart_interrupt(int irq, void *data)
diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c
index af27fb8ec1451..a98fae2ca422d 100644
--- a/drivers/tty/serial/amba-pl010.c
+++ b/drivers/tty/serial/amba-pl010.c
@@ -164,34 +164,12 @@ static void pl010_rx_chars(struct uart_port *port)
 
 static void pl010_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	int count;
+	u8 ch;
 
-	if (port->x_char) {
-		writel(port->x_char, port->membase + UART01x_DR);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		pl010_stop_tx(port);
-		return;
-	}
-
-	count = port->fifosize >> 1;
-	do {
-		writel(xmit->buf[xmit->tail], port->membase + UART01x_DR);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (uart_circ_empty(xmit))
-		pl010_stop_tx(port);
+	uart_port_tx_limited(port, ch, port->fifosize >> 1,
+		true,
+		writel(ch, port->membase + UART01x_DR),
+		({}));
 }
 
 static void pl010_modem_status(struct uart_amba_port *uap)
diff --git a/drivers/tty/serial/apbuart.c b/drivers/tty/serial/apbuart.c
index 450f4edfda0f0..915ee4b0d5949 100644
--- a/drivers/tty/serial/apbuart.c
+++ b/drivers/tty/serial/apbuart.c
@@ -122,36 +122,12 @@ static void apbuart_rx_chars(struct uart_port *port)
 
 static void apbuart_tx_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	int count;
-
-	if (port->x_char) {
-		UART_PUT_CHAR(port, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		apbuart_stop_tx(port);
-		return;
-	}
-
-	/* amba: fill FIFO */
-	count = port->fifosize >> 1;
-	do {
-		UART_PUT_CHAR(port, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		apbuart_stop_tx(port);
+	uart_port_tx_limited(port, ch, port->fifosize >> 1,
+		true,
+		UART_PUT_CHAR(port, ch),
+		({}));
 }
 
 static irqreturn_t apbuart_int(int irq, void *dev_id)
diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 5d9737c2d1f2c..62bc7244dc67f 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -303,53 +303,24 @@ static void bcm_uart_do_rx(struct uart_port *port)
  */
 static void bcm_uart_do_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit;
-	unsigned int val, max_count;
-
-	if (port->x_char) {
-		bcm_uart_writel(port, port->x_char, UART_FIFO_REG);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_tx_stopped(port)) {
-		bcm_uart_stop_tx(port);
-		return;
-	}
-
-	xmit = &port->state->xmit;
-	if (uart_circ_empty(xmit))
-		goto txq_empty;
+	unsigned int val;
+	bool pending;
+	u8 ch;
 
 	val = bcm_uart_readl(port, UART_MCTL_REG);
 	val = (val & UART_MCTL_TXFIFOFILL_MASK) >> UART_MCTL_TXFIFOFILL_SHIFT;
-	max_count = port->fifosize - val;
-
-	while (max_count--) {
-		unsigned int c;
 
-		c = xmit->buf[xmit->tail];
-		bcm_uart_writel(port, c, UART_FIFO_REG);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
-
-	if (uart_circ_empty(xmit))
-		goto txq_empty;
-	return;
+	pending = uart_port_tx_limited(port, ch, port->fifosize - val,
+		true,
+		bcm_uart_writel(port, ch, UART_FIFO_REG),
+		({}));
+	if (pending)
+		return;
 
-txq_empty:
 	/* nothing to send, disable transmit interrupt */
 	val = bcm_uart_readl(port, UART_IR_REG);
 	val &= ~UART_TX_INT_MASK;
 	bcm_uart_writel(port, val, UART_IR_REG);
-	return;
 }
 
 /*
diff --git a/drivers/tty/serial/mux.c b/drivers/tty/serial/mux.c
index ed0e763f622a8..85ce1e9af44ae 100644
--- a/drivers/tty/serial/mux.c
+++ b/drivers/tty/serial/mux.c
@@ -171,6 +171,13 @@ static void mux_break_ctl(struct uart_port *port, int break_state)
 {
 }
 
+static void mux_tx_done(struct uart_port *port)
+{
+	/* FIXME js: really needs to wait? */
+	while (UART_GET_FIFO_CNT(port))
+		udelay(1);
+}
+
 /**
  * mux_write - Write chars to the mux fifo.
  * @port: Ptr to the uart_port.
@@ -180,39 +187,13 @@ static void mux_break_ctl(struct uart_port *port, int break_state)
  */
 static void mux_write(struct uart_port *port)
 {
-	int count;
-	struct circ_buf *xmit = &port->state->xmit;
-
-	if(port->x_char) {
-		UART_PUT_CHAR(port, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if(uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		mux_stop_tx(port);
-		return;
-	}
-
-	count = (port->fifosize) - UART_GET_FIFO_CNT(port);
-	do {
-		UART_PUT_CHAR(port, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if(uart_circ_empty(xmit))
-			break;
-
-	} while(--count > 0);
-
-	while(UART_GET_FIFO_CNT(port)) 
-		udelay(1);
-
-	if(uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		mux_stop_tx(port);
+	uart_port_tx_limited(port, ch,
+		port->fifosize - UART_GET_FIFO_CNT(port),
+		true,
+		UART_PUT_CHAR(port, ch),
+		mux_tx_done(port));
 }
 
 /**
diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index ba16e1da6bd3e..7b566404cb331 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -335,40 +335,12 @@ ignore_char:
 
 static void mvebu_uart_tx_chars(struct uart_port *port, unsigned int status)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	unsigned int count;
-	unsigned int st;
-
-	if (port->x_char) {
-		writel(port->x_char, port->membase + UART_TSH(port));
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		mvebu_uart_stop_tx(port);
-		return;
-	}
-
-	for (count = 0; count < port->fifosize; count++) {
-		writel(xmit->buf[xmit->tail], port->membase + UART_TSH(port));
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-
-		if (uart_circ_empty(xmit))
-			break;
-
-		st = readl(port->membase + UART_STAT);
-		if (st & STAT_TX_FIFO_FUL)
-			break;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		mvebu_uart_stop_tx(port);
+	uart_port_tx_limited(port, ch, port->fifosize,
+		!(readl(port->membase + UART_STAT) & STAT_TX_FIFO_FUL),
+		writel(ch, port->membase + UART_TSH(port)),
+		({}));
 }
 
 static irqreturn_t mvebu_uart_isr(int irq, void *dev_id)
diff --git a/drivers/tty/serial/omap-serial.c b/drivers/tty/serial/omap-serial.c
index 7d0d2718ef595..82d35dbbfa6cb 100644
--- a/drivers/tty/serial/omap-serial.c
+++ b/drivers/tty/serial/omap-serial.c
@@ -347,34 +347,12 @@ static void serial_omap_put_char(struct uart_omap_port *up, unsigned char ch)
 
 static void transmit_chars(struct uart_omap_port *up, unsigned int lsr)
 {
-	struct circ_buf *xmit = &up->port.state->xmit;
-	int count;
+	u8 ch;
 
-	if (up->port.x_char) {
-		serial_omap_put_char(up, up->port.x_char);
-		up->port.icount.tx++;
-		up->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
-		serial_omap_stop_tx(&up->port);
-		return;
-	}
-	count = up->port.fifosize / 4;
-	do {
-		serial_omap_put_char(up, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
-
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-
-	if (uart_circ_empty(xmit))
-		serial_omap_stop_tx(&up->port);
+	uart_port_tx_limited(&up->port, ch, up->port.fifosize / 4,
+		true,
+		serial_omap_put_char(up, ch),
+		({}));
 }
 
 static inline void serial_omap_enable_ier_thri(struct uart_omap_port *up)
diff --git a/drivers/tty/serial/pxa.c b/drivers/tty/serial/pxa.c
index 2d25231fad847..444fa4b654aca 100644
--- a/drivers/tty/serial/pxa.c
+++ b/drivers/tty/serial/pxa.c
@@ -174,35 +174,12 @@ static inline void receive_chars(struct uart_pxa_port *up, int *status)
 
 static void transmit_chars(struct uart_pxa_port *up)
 {
-	struct circ_buf *xmit = &up->port.state->xmit;
-	int count;
+	u8 ch;
 
-	if (up->port.x_char) {
-		serial_out(up, UART_TX, up->port.x_char);
-		up->port.icount.tx++;
-		up->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
-		serial_pxa_stop_tx(&up->port);
-		return;
-	}
-
-	count = up->port.fifosize / 2;
-	do {
-		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-
-
-	if (uart_circ_empty(xmit))
-		serial_pxa_stop_tx(&up->port);
+	uart_port_tx_limited(&up->port, ch, up->port.fifosize / 2,
+		true,
+		serial_out(up, UART_TX, ch),
+		({}));
 }
 
 static void serial_pxa_start_tx(struct uart_port *port)
diff --git a/drivers/tty/serial/rp2.c b/drivers/tty/serial/rp2.c
index b81afb06f1f40..749b873a5d99b 100644
--- a/drivers/tty/serial/rp2.c
+++ b/drivers/tty/serial/rp2.c
@@ -427,32 +427,13 @@ static void rp2_rx_chars(struct rp2_uart_port *up)
 
 static void rp2_tx_chars(struct rp2_uart_port *up)
 {
-	u16 max_tx = FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT);
-	struct circ_buf *xmit = &up->port.state->xmit;
+	u8 ch;
 
-	if (uart_tx_stopped(&up->port)) {
-		rp2_uart_stop_tx(&up->port);
-		return;
-	}
-
-	for (; max_tx != 0; max_tx--) {
-		if (up->port.x_char) {
-			writeb(up->port.x_char, up->base + RP2_DATA_BYTE);
-			up->port.x_char = 0;
-			up->port.icount.tx++;
-			continue;
-		}
-		if (uart_circ_empty(xmit)) {
-			rp2_uart_stop_tx(&up->port);
-			break;
-		}
-		writeb(xmit->buf[xmit->tail], up->base + RP2_DATA_BYTE);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
+	uart_port_tx_limited(&up->port, ch,
+		FIFO_SIZE - readw(up->base + RP2_TX_FIFO_COUNT),
+		true,
+		writeb(ch, up->base + RP2_DATA_BYTE),
+		({}));
 }
 
 static void rp2_ch_interrupt(struct rp2_uart_port *up)
diff --git a/drivers/tty/serial/serial_txx9.c b/drivers/tty/serial/serial_txx9.c
index e12f1dc18c38b..eab387b01e36f 100644
--- a/drivers/tty/serial/serial_txx9.c
+++ b/drivers/tty/serial/serial_txx9.c
@@ -321,34 +321,12 @@ receive_chars(struct uart_port *up, unsigned int *status)
 
 static inline void transmit_chars(struct uart_port *up)
 {
-	struct circ_buf *xmit = &up->state->xmit;
-	int count;
+	u8 ch;
 
-	if (up->x_char) {
-		sio_out(up, TXX9_SITFIFO, up->x_char);
-		up->icount.tx++;
-		up->x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(up)) {
-		serial_txx9_stop_tx(up);
-		return;
-	}
-
-	count = TXX9_SIO_TX_FIFO;
-	do {
-		sio_out(up, TXX9_SITFIFO, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(up);
-
-	if (uart_circ_empty(xmit))
-		serial_txx9_stop_tx(up);
+	uart_port_tx_limited(up, ch, TXX9_SIO_TX_FIFO,
+		true,
+		sio_out(up, TXX9_SITFIFO, ch),
+		({}));
 }
 
 static irqreturn_t serial_txx9_interrupt(int irq, void *dev_id)
diff --git a/drivers/tty/serial/sifive.c b/drivers/tty/serial/sifive.c
index 7fb6760b5c37a..1f565a216e748 100644
--- a/drivers/tty/serial/sifive.c
+++ b/drivers/tty/serial/sifive.c
@@ -288,33 +288,12 @@ static void __ssp_transmit_char(struct sifive_serial_port *ssp, int ch)
  */
 static void __ssp_transmit_chars(struct sifive_serial_port *ssp)
 {
-	struct circ_buf *xmit = &ssp->port.state->xmit;
-	int count;
-
-	if (ssp->port.x_char) {
-		__ssp_transmit_char(ssp, ssp->port.x_char);
-		ssp->port.icount.tx++;
-		ssp->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&ssp->port)) {
-		sifive_serial_stop_tx(&ssp->port);
-		return;
-	}
-	count = SIFIVE_TX_FIFO_DEPTH;
-	do {
-		__ssp_transmit_char(ssp, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		ssp->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&ssp->port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		sifive_serial_stop_tx(&ssp->port);
+	uart_port_tx_limited(&ssp->port, ch, SIFIVE_TX_FIFO_DEPTH,
+		true,
+		__ssp_transmit_char(ssp, ch),
+		({}));
 }
 
 /**
diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
index 342a879676315..3f34f7bb77002 100644
--- a/drivers/tty/serial/sprd_serial.c
+++ b/drivers/tty/serial/sprd_serial.c
@@ -626,35 +626,12 @@ static inline void sprd_rx(struct uart_port *port)
 
 static inline void sprd_tx(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	int count;
-
-	if (port->x_char) {
-		serial_out(port, SPRD_TXD, port->x_char);
-		port->icount.tx++;
-		port->x_char = 0;
-		return;
-	}
-
-	if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
-		sprd_stop_tx(port);
-		return;
-	}
-
-	count = THLD_TX_EMPTY;
-	do {
-		serial_out(port, SPRD_TXD, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		sprd_stop_tx(port);
+	uart_port_tx_limited(port, ch, THLD_TX_EMPTY,
+		true,
+		serial_out(port, SPRD_TXD, ch),
+		({}));
 }
 
 /* this handles the interrupt from one port */
diff --git a/drivers/tty/serial/st-asc.c b/drivers/tty/serial/st-asc.c
index fcecea689a0dd..5215e6910f686 100644
--- a/drivers/tty/serial/st-asc.c
+++ b/drivers/tty/serial/st-asc.c
@@ -237,50 +237,12 @@ static inline unsigned asc_hw_txroom(struct uart_port *port)
  */
 static void asc_transmit_chars(struct uart_port *port)
 {
-	struct circ_buf *xmit = &port->state->xmit;
-	int txroom;
-	unsigned char c;
-
-	txroom = asc_hw_txroom(port);
-
-	if ((txroom != 0) && port->x_char) {
-		c = port->x_char;
-		port->x_char = 0;
-		asc_out(port, ASC_TXBUF, c);
-		port->icount.tx++;
-		txroom = asc_hw_txroom(port);
-	}
-
-	if (uart_tx_stopped(port)) {
-		/*
-		 * We should try and stop the hardware here, but I
-		 * don't think the ASC has any way to do that.
-		 */
-		asc_disable_tx_interrupts(port);
-		return;
-	}
-
-	if (uart_circ_empty(xmit)) {
-		asc_disable_tx_interrupts(port);
-		return;
-	}
-
-	if (txroom == 0)
-		return;
-
-	do {
-		c = xmit->buf[xmit->tail];
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		asc_out(port, ASC_TXBUF, c);
-		port->icount.tx++;
-		txroom--;
-	} while ((txroom > 0) && (!uart_circ_empty(xmit)));
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(port);
+	u8 ch;
 
-	if (uart_circ_empty(xmit))
-		asc_disable_tx_interrupts(port);
+	uart_port_tx_limited(port, ch, asc_hw_txroom(port),
+		true,
+		asc_out(port, ASC_TXBUF, ch),
+		({}));
 }
 
 static void asc_receive_chars(struct uart_port *port)
-- 
GitLab


From 35781d8356a2eecaa6074ceeb80ee22e252fcdae Mon Sep 17 00:00:00 2001
From: Aniket Randive <quic_arandive@quicinc.com>
Date: Fri, 7 Oct 2022 11:53:00 +0530
Subject: [PATCH 0794/4122] tty: serial: qcom-geni-serial: Add support for
 Hibernation feature

Added changes to support the hibernation feature for serial UART.
Added support for freeze, restore and thaw callbacks to put the
device into hibernation.

Signed-off-by: Aniket Randive <quic_arandive@quicinc.com>
Link: https://lore.kernel.org/r/1665123780-20557-1-git-send-email-quic_arandive@quicinc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/qcom_geni_serial.c | 35 +++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/drivers/tty/serial/qcom_geni_serial.c b/drivers/tty/serial/qcom_geni_serial.c
index 83b66b73303af..b487823f0e617 100644
--- a/drivers/tty/serial/qcom_geni_serial.c
+++ b/drivers/tty/serial/qcom_geni_serial.c
@@ -924,6 +924,7 @@ static int qcom_geni_serial_port_setup(struct uart_port *uport)
 			       false, true, true);
 	geni_se_init(&port->se, UART_RX_WM, port->rx_fifo_depth - 2);
 	geni_se_select_mode(&port->se, GENI_SE_FIFO);
+	qcom_geni_serial_start_rx(uport);
 	port->setup = true;
 
 	return 0;
@@ -1547,9 +1548,43 @@ static int __maybe_unused qcom_geni_serial_sys_resume(struct device *dev)
 	return ret;
 }
 
+static int qcom_geni_serial_sys_hib_resume(struct device *dev)
+{
+	int ret = 0;
+	struct uart_port *uport;
+	struct qcom_geni_private_data *private_data;
+	struct qcom_geni_serial_port *port = dev_get_drvdata(dev);
+
+	uport = &port->uport;
+	private_data = uport->private_data;
+
+	if (uart_console(uport)) {
+		geni_icc_set_tag(&port->se, 0x7);
+		geni_icc_set_bw(&port->se);
+		ret = uart_resume_port(private_data->drv, uport);
+		/*
+		 * For hibernation usecase clients for
+		 * console UART won't call port setup during restore,
+		 * hence call port setup for console uart.
+		 */
+		qcom_geni_serial_port_setup(uport);
+	} else {
+		/*
+		 * Peripheral register settings are lost during hibernation.
+		 * Update setup flag such that port setup happens again
+		 * during next session. Clients of HS-UART will close and
+		 * open the port during hibernation.
+		 */
+		port->setup = false;
+	}
+	return ret;
+}
+
 static const struct dev_pm_ops qcom_geni_serial_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(qcom_geni_serial_sys_suspend,
 					qcom_geni_serial_sys_resume)
+	.restore = qcom_geni_serial_sys_hib_resume,
+	.thaw = qcom_geni_serial_sys_hib_resume,
 };
 
 static const struct of_device_id qcom_geni_serial_match_table[] = {
-- 
GitLab


From 801954d1210a89b767176e1e34cf5976f41ca6d3 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sun, 16 Oct 2022 10:02:00 +0200
Subject: [PATCH 0795/4122] serial: 8250: 8250_omap: Support native RS485
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recent TI Sitara SoCs such as AM64/AM65 have gained the ability to
automatically assert RTS when data is transmitted, obviating the need
to emulate this functionality in software.

The feature is controlled through new DIR_EN and DIR_POL bits in the
Mode Definition Register 3.  For details see page 8783 and 8890 of the
AM65 TRM:  https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf

Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Cc: Su Bao Cheng <baocheng.su@siemens.com>
Cc: Vignesh Raghavendra <vigneshr@ti.com>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Bin Liu <b-liu@ti.com>
Tested-by: Zeng Chao <chao.zeng@siemens.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/e9f25f5c9200a35d3162973c2b45d6b892cc9bf2.1665906869.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_omap.c | 85 ++++++++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 41b8c6b27136a..1c8a48fdc8f24 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -44,6 +44,7 @@
 #define	UART_HAS_EFR2			BIT(4)
 #define UART_HAS_RHR_IT_DIS		BIT(5)
 #define UART_RX_TIMEOUT_QUIRK		BIT(6)
+#define UART_HAS_NATIVE_RS485		BIT(7)
 
 #define OMAP_UART_FCR_RX_TRIG		6
 #define OMAP_UART_FCR_TX_TRIG		4
@@ -101,6 +102,11 @@
 #define UART_OMAP_IER2			0x1B
 #define UART_OMAP_IER2_RHR_IT_DIS	BIT(2)
 
+/* Mode Definition Register 3 */
+#define UART_OMAP_MDR3			0x20
+#define UART_OMAP_MDR3_DIR_POL		BIT(3)
+#define UART_OMAP_MDR3_DIR_EN		BIT(4)
+
 /* Enhanced features register 2 */
 #define UART_OMAP_EFR2			0x23
 #define UART_OMAP_EFR2_TIMEOUT_BEHAVE	BIT(6)
@@ -112,6 +118,7 @@ struct omap8250_priv {
 	int line;
 	u8 habit;
 	u8 mdr1;
+	u8 mdr3;
 	u8 efr;
 	u8 scr;
 	u8 wer;
@@ -343,7 +350,10 @@ static void omap8250_restore_regs(struct uart_8250_port *up)
 
 	up->port.ops->set_mctrl(&up->port, up->port.mctrl);
 
-	if (up->port.rs485.flags & SER_RS485_ENABLED)
+	serial_out(up, UART_OMAP_MDR3, priv->mdr3);
+
+	if (up->port.rs485.flags & SER_RS485_ENABLED &&
+	    up->port.rs485_config == serial8250_em485_config)
 		serial8250_em485_stop_tx(up);
 }
 
@@ -792,6 +802,74 @@ static void omap_8250_unthrottle(struct uart_port *port)
 	pm_runtime_put_autosuspend(port->dev);
 }
 
+static int omap8250_rs485_config(struct uart_port *port,
+				 struct ktermios *termios,
+				 struct serial_rs485 *rs485)
+{
+	struct omap8250_priv *priv = port->private_data;
+	struct uart_8250_port *up = up_to_u8250p(port);
+	u32 fixed_delay_rts_before_send = 0;
+	u32 fixed_delay_rts_after_send = 0;
+	unsigned int baud;
+
+	/*
+	 * There is a fixed delay of 3 bit clock cycles after the TX shift
+	 * register is going empty to allow time for the stop bit to transition
+	 * through the transceiver before direction is changed to receive.
+	 *
+	 * Additionally there appears to be a 1 bit clock delay between writing
+	 * to the THR register and transmission of the start bit, per page 8783
+	 * of the AM65 TRM:  https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf
+	 */
+	if (priv->quot) {
+		if (priv->mdr1 & UART_OMAP_MDR1_16X_MODE)
+			baud = port->uartclk / (16 * priv->quot);
+		else
+			baud = port->uartclk / (13 * priv->quot);
+
+		fixed_delay_rts_after_send  = 3 * MSEC_PER_SEC / baud;
+		fixed_delay_rts_before_send = 1 * MSEC_PER_SEC / baud;
+	}
+
+	/*
+	 * Fall back to RS485 software emulation if the UART is missing
+	 * hardware support, if the device tree specifies an mctrl_gpio
+	 * (indicates that RTS is unavailable due to a pinmux conflict)
+	 * or if the requested delays exceed the fixed hardware delays.
+	 */
+	if (!(priv->habit & UART_HAS_NATIVE_RS485) ||
+	    mctrl_gpio_to_gpiod(up->gpios, UART_GPIO_RTS) ||
+	    rs485->delay_rts_after_send  > fixed_delay_rts_after_send ||
+	    rs485->delay_rts_before_send > fixed_delay_rts_before_send) {
+		priv->mdr3 &= ~UART_OMAP_MDR3_DIR_EN;
+		serial_out(up, UART_OMAP_MDR3, priv->mdr3);
+
+		port->rs485_config = serial8250_em485_config;
+		return serial8250_em485_config(port, termios, rs485);
+	}
+
+	rs485->delay_rts_after_send  = fixed_delay_rts_after_send;
+	rs485->delay_rts_before_send = fixed_delay_rts_before_send;
+
+	if (rs485->flags & SER_RS485_ENABLED)
+		priv->mdr3 |= UART_OMAP_MDR3_DIR_EN;
+	else
+		priv->mdr3 &= ~UART_OMAP_MDR3_DIR_EN;
+
+	/*
+	 * Retain same polarity semantics as RS485 software emulation,
+	 * i.e. SER_RS485_RTS_ON_SEND means driving RTS low on send.
+	 */
+	if (rs485->flags & SER_RS485_RTS_ON_SEND)
+		priv->mdr3 &= ~UART_OMAP_MDR3_DIR_POL;
+	else
+		priv->mdr3 |= UART_OMAP_MDR3_DIR_POL;
+
+	serial_out(up, UART_OMAP_MDR3, priv->mdr3);
+
+	return 0;
+}
+
 #ifdef CONFIG_SERIAL_8250_DMA
 static int omap_8250_rx_dma(struct uart_8250_port *p);
 
@@ -1241,7 +1319,7 @@ static struct omap8250_dma_params am33xx_dma = {
 static struct omap8250_platdata am654_platdata = {
 	.dma_params	= &am654_dma,
 	.habit		= UART_HAS_EFR2 | UART_HAS_RHR_IT_DIS |
-			  UART_RX_TIMEOUT_QUIRK,
+			  UART_RX_TIMEOUT_QUIRK | UART_HAS_NATIVE_RS485,
 };
 
 static struct omap8250_platdata am33xx_platdata = {
@@ -1334,7 +1412,8 @@ static int omap8250_probe(struct platform_device *pdev)
 	up.port.shutdown = omap_8250_shutdown;
 	up.port.throttle = omap_8250_throttle;
 	up.port.unthrottle = omap_8250_unthrottle;
-	up.port.rs485_config = serial8250_em485_config;
+	up.port.rs485_config = omap8250_rs485_config;
+	/* same rs485_supported for software emulation and native RS485 */
 	up.port.rs485_supported = serial8250_em485_supported;
 	up.rs485_start_tx = serial8250_em485_start_tx;
 	up.rs485_stop_tx = serial8250_em485_stop_tx;
-- 
GitLab


From f4000a06f40f6008c9cd8092c30ed3ffb62a1587 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:08 +0300
Subject: [PATCH 0796/4122] serial: dz: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/dz.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/dz.c b/drivers/tty/serial/dz.c
index 829b452daee9d..6b7ed7f2f3cac 100644
--- a/drivers/tty/serial/dz.c
+++ b/drivers/tty/serial/dz.c
@@ -279,9 +279,8 @@ static inline void dz_transmit_chars(struct dz_mux *mux)
 	 * so we go one char at a time) :-<
 	 */
 	tmp = xmit->buf[xmit->tail];
-	xmit->tail = (xmit->tail + 1) & (DZ_XMIT_SIZE - 1);
 	dz_out(dport, DZ_TDR, tmp);
-	dport->port.icount.tx++;
+	uart_xmit_advance(&dport->port, 1);
 
 	if (uart_circ_chars_pending(xmit) < DZ_WAKEUP_CHARS)
 		uart_write_wakeup(&dport->port);
-- 
GitLab


From 20b01af85291b8e25133ab22399f2de6bbad861e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:09 +0300
Subject: [PATCH 0797/4122] serial: men_z135_uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-3-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/men_z135_uart.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/tty/serial/men_z135_uart.c b/drivers/tty/serial/men_z135_uart.c
index 3690f5cf0f434..d2502aaa3e8c7 100644
--- a/drivers/tty/serial/men_z135_uart.c
+++ b/drivers/tty/serial/men_z135_uart.c
@@ -352,11 +352,8 @@ static void men_z135_handle_tx(struct men_z135_port *uart)
 	n = min(n, s);
 
 	memcpy_toio(port->membase + MEN_Z135_TX_RAM, &xmit->buf[xmit->tail], n);
-	xmit->tail = (xmit->tail + n) & (UART_XMIT_SIZE - 1);
-
 	iowrite32(n & 0x3ff, port->membase + MEN_Z135_TX_CTRL);
-
-	port->icount.tx += n;
+	uart_xmit_advance(port, n);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
-- 
GitLab


From 1fcff75f3932bdc5ef923bbe8e973d1f3ed59096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:10 +0300
Subject: [PATCH 0798/4122] serial: msm: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/msm_serial.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index 7dd19a2815794..2b2e0f74b75a0 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -464,12 +464,9 @@ static void msm_complete_tx_dma(void *args)
 	}
 
 	count = dma->count - state.residue;
-	port->icount.tx += count;
+	uart_xmit_advance(port, count);
 	dma->count = 0;
 
-	xmit->tail += count;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
 	/* Restore "Tx FIFO below watermark" interrupt */
 	msm_port->imr |= MSM_UART_IMR_TXLEV;
 	msm_write(port, msm_port->imr, MSM_UART_IMR);
@@ -866,13 +863,11 @@ static void msm_handle_tx_pio(struct uart_port *port, unsigned int tx_count)
 		else
 			num_chars = 1;
 
-		for (i = 0; i < num_chars; i++) {
+		for (i = 0; i < num_chars; i++)
 			buf[i] = xmit->buf[xmit->tail + i];
-			port->icount.tx++;
-		}
 
 		iowrite32_rep(tf, buf, 1);
-		xmit->tail = (xmit->tail + num_chars) & (UART_XMIT_SIZE - 1);
+		uart_xmit_advance(port, num_chars);
 		tf_pointer += num_chars;
 	}
 
-- 
GitLab


From a5c9611ddc51b846dd699c3ce2a6bcbfa544ef20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:11 +0300
Subject: [PATCH 0799/4122] serial: pch_uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-5-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/pch_uart.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index c59ce78865799..c76719c0f453a 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -738,15 +738,12 @@ static void pch_dma_tx_complete(void *arg)
 {
 	struct eg20t_port *priv = arg;
 	struct uart_port *port = &priv->port;
-	struct circ_buf *xmit = &port->state->xmit;
 	struct scatterlist *sg = priv->sg_tx_p;
 	int i;
 
-	for (i = 0; i < priv->nent; i++, sg++) {
-		xmit->tail += sg_dma_len(sg);
-		port->icount.tx += sg_dma_len(sg);
-	}
-	xmit->tail &= UART_XMIT_SIZE - 1;
+	for (i = 0; i < priv->nent; i++, sg++)
+		uart_xmit_advance(port, sg_dma_len(sg));
+
 	async_tx_ack(priv->desc_tx);
 	dma_unmap_sg(port->dev, sg, priv->orig_nent, DMA_TO_DEVICE);
 	priv->tx_dma_use = 0;
@@ -843,8 +840,7 @@ static unsigned int handle_tx(struct eg20t_port *priv)
 
 	while (!uart_tx_stopped(port) && !uart_circ_empty(xmit) && fifo_size) {
 		iowrite8(xmit->buf[xmit->tail], priv->membase + PCH_UART_THR);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		fifo_size--;
 		tx_empty = 0;
 	}
-- 
GitLab


From a2a74303b3085109b27160a49e61d4d067acfae7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:12 +0300
Subject: [PATCH 0800/4122] serial: sc16is7xx: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-6-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sc16is7xx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sc16is7xx.c b/drivers/tty/serial/sc16is7xx.c
index 524921360ca78..39f92eb1e6989 100644
--- a/drivers/tty/serial/sc16is7xx.c
+++ b/drivers/tty/serial/sc16is7xx.c
@@ -686,13 +686,10 @@ static void sc16is7xx_handle_tx(struct uart_port *port)
 		}
 		to_send = (to_send > txlen) ? txlen : to_send;
 
-		/* Add data to send */
-		port->icount.tx += to_send;
-
 		/* Convert to linear buffer */
 		for (i = 0; i < to_send; ++i) {
 			s->buf[i] = xmit->buf[xmit->tail];
-			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+			uart_xmit_advance(port, 1);
 		}
 
 		sc16is7xx_fifo_write(port, to_send);
-- 
GitLab


From fc59f80b087447a198274e8b3adf2b8ddcecb561 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:13 +0300
Subject: [PATCH 0801/4122] serial: 8250_bcm7271: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-7-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_bcm7271.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
index fa8ccf204d860..062177b64d21f 100644
--- a/drivers/tty/serial/8250/8250_bcm7271.c
+++ b/drivers/tty/serial/8250/8250_bcm7271.c
@@ -425,9 +425,7 @@ static int brcmuart_tx_dma(struct uart_8250_port *p)
 
 	priv->dma.tx_err = 0;
 	memcpy(priv->tx_buf, &xmit->buf[xmit->tail], tx_size);
-	xmit->tail += tx_size;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-	p->port.icount.tx += tx_size;
+	uart_xmit_advance(&p->port, tx_size);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&p->port);
-- 
GitLab


From 051ef7c8d81ff25b843afb661776e0a77a4f9374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:14 +0300
Subject: [PATCH 0802/4122] serial: 8250: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-8-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_port.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index fe8662cd94024..b94e60e753265 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -1842,8 +1842,7 @@ void serial8250_tx_chars(struct uart_8250_port *up)
 			 */
 			serial_in(up, UART_SCR);
 		}
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 		if ((up->capabilities & UART_CAP_HFIFO) &&
-- 
GitLab


From 71a67573d0ede98f9c5f6466da6f6a7d7663498b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:15 +0300
Subject: [PATCH 0803/4122] serial: pl011: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-9-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 5cdced39eafdb..6d8552506091c 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -677,8 +677,7 @@ static int pl011_dma_tx_refill(struct uart_amba_port *uap)
 	 * Now we know that DMA will fire, so advance the ring buffer
 	 * with the stuff we just dispatched.
 	 */
-	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
-	uap->port.icount.tx += count;
+	uart_xmit_advance(&uap->port, count);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&uap->port);
-- 
GitLab


From d29d947c14d1704d567db3025a293c4ee54cd90c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:16 +0300
Subject: [PATCH 0804/4122] serial: ar933x: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-10-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/ar933x_uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/ar933x_uart.c b/drivers/tty/serial/ar933x_uart.c
index 925484a42c821..4c3d04c6826a9 100644
--- a/drivers/tty/serial/ar933x_uart.c
+++ b/drivers/tty/serial/ar933x_uart.c
@@ -425,8 +425,7 @@ static void ar933x_uart_tx_chars(struct ar933x_uart_port *up)
 
 		ar933x_uart_putc(up, xmit->buf[xmit->tail]);
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
+		uart_xmit_advance(&up->port, 1);
 	} while (--count > 0);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From 8a8dee2cdbb3d147430684d408127e5c9e910fc0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:17 +0300
Subject: [PATCH 0805/4122] serial: arc: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-11-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/arc_uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c
index 2a65ea2660e10..748e8b1cf4f7f 100644
--- a/drivers/tty/serial/arc_uart.c
+++ b/drivers/tty/serial/arc_uart.c
@@ -166,8 +166,7 @@ static void arc_serial_tx_chars(struct uart_port *port)
 		sent = 1;
 	} else if (!uart_circ_empty(xmit)) {
 		ch = xmit->buf[xmit->tail];
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		while (!(UART_GET_STATUS(port) & TXEMPTY))
 			cpu_relax();
 		UART_SET_DATA(port, ch);
-- 
GitLab


From add147a4591e20732c3f51ed63414d9e89757e5f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:18 +0300
Subject: [PATCH 0806/4122] serial: atmel: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Acked-By: Richard GENOUD <richard.genoud@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-12-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index a6b4d30c5888d..4ca04676c4066 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -859,10 +859,7 @@ static void atmel_complete_tx_dma(void *arg)
 
 	if (chan)
 		dmaengine_terminate_all(chan);
-	xmit->tail += atmel_port->tx_len;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
-	port->icount.tx += atmel_port->tx_len;
+	uart_xmit_advance(port, atmel_port->tx_len);
 
 	spin_lock_irq(&atmel_port->lock_tx);
 	async_tx_ack(atmel_port->desc_tx);
@@ -1455,11 +1452,7 @@ static void atmel_tx_pdc(struct uart_port *port)
 	/* nothing left to transmit? */
 	if (atmel_uart_readl(port, ATMEL_PDC_TCR))
 		return;
-
-	xmit->tail += pdc->ofs;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
-	port->icount.tx += pdc->ofs;
+	uart_xmit_advance(port, pdc->ofs);
 	pdc->ofs = 0;
 
 	/* more to transmit - setup next transfer */
-- 
GitLab


From 4146765cae90bac4643e2225a49670da3749311f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:19 +0300
Subject: [PATCH 0807/4122] serial: clps711x: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-13-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/clps711x.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/clps711x.c b/drivers/tty/serial/clps711x.c
index 404b43a5ae33f..e190dce58f462 100644
--- a/drivers/tty/serial/clps711x.c
+++ b/drivers/tty/serial/clps711x.c
@@ -166,8 +166,7 @@ static irqreturn_t uart_clps711x_int_tx(int irq, void *dev_id)
 		u32 sysflg = 0;
 
 		writew(xmit->buf[xmit->tail], port->membase + UARTDR_OFFSET);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 
 		regmap_read(s->syscon, SYSFLG_OFFSET, &sysflg);
 		if (sysflg & SYSFLG_UTXFF)
-- 
GitLab


From f8097f0caaf2188e8e4e552116cbd8d5f7746f8a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:20 +0300
Subject: [PATCH 0808/4122] serial: cpm_uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-14-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/cpm_uart/cpm_uart_core.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/cpm_uart/cpm_uart_core.c b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
index b4369ed45ae2d..5565f302cb212 100644
--- a/drivers/tty/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/tty/serial/cpm_uart/cpm_uart_core.c
@@ -684,8 +684,7 @@ static int cpm_uart_tx_pump(struct uart_port *port)
 		p = cpm2cpu_addr(in_be32(&bdp->cbd_bufaddr), pinfo);
 		while (count < pinfo->tx_fifosize) {
 			*p++ = xmit->buf[xmit->tail];
-			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-			port->icount.tx++;
+			uart_xmit_advance(port, 1);
 			count++;
 			if (xmit->head == xmit->tail)
 				break;
-- 
GitLab


From cb867f542e2a895e9f2bdcd846f1241b51e3cc2d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:21 +0300
Subject: [PATCH 0809/4122] serial: digicolor: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Acked-by: Baruch Siach <baruch@tkos.co.il>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-15-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/digicolor-usart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/digicolor-usart.c b/drivers/tty/serial/digicolor-usart.c
index 0c0a62346f231..ed197705f7eea 100644
--- a/drivers/tty/serial/digicolor-usart.c
+++ b/drivers/tty/serial/digicolor-usart.c
@@ -202,8 +202,7 @@ static void digicolor_uart_tx(struct uart_port *port)
 
 	while (!uart_circ_empty(xmit)) {
 		writeb(xmit->buf[xmit->tail], port->membase + UA_EMI_REC);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 
 		if (digicolor_uart_tx_full(port))
 			break;
-- 
GitLab


From 7840a92a3e7d7f78bdc16b75c62101bc240bfa93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:22 +0300
Subject: [PATCH 0810/4122] serial: linflexuart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-16-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_linflexuart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/fsl_linflexuart.c b/drivers/tty/serial/fsl_linflexuart.c
index 84e8153e54200..6fc21b6684e6f 100644
--- a/drivers/tty/serial/fsl_linflexuart.c
+++ b/drivers/tty/serial/fsl_linflexuart.c
@@ -178,8 +178,7 @@ static inline void linflex_transmit_buffer(struct uart_port *sport)
 
 	while (!uart_circ_empty(xmit)) {
 		linflex_put_char(sport, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->icount.tx++;
+		uart_xmit_advance(sport, 1);
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From cacf7f689b9b783d1efaf4b545d8038ee6edc73e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:23 +0300
Subject: [PATCH 0811/4122] serial: fsl_lpuart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-17-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index d811eda1844e7..849ef313b824e 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -535,9 +535,7 @@ static void lpuart_dma_tx_complete(void *arg)
 	dma_unmap_sg(chan->device->dev, sgl, sport->dma_tx_nents,
 		     DMA_TO_DEVICE);
 
-	xmit->tail = (xmit->tail + sport->dma_tx_bytes) & (UART_XMIT_SIZE - 1);
-
-	sport->port.icount.tx += sport->dma_tx_bytes;
+	uart_xmit_advance(&sport->port, sport->dma_tx_bytes);
 	sport->dma_tx_in_progress = false;
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 
@@ -772,8 +770,7 @@ static inline void lpuart32_transmit_buffer(struct lpuart_port *sport)
 	txcnt &= UARTWATER_COUNT_MASK;
 	while (!uart_circ_empty(xmit) && (txcnt < sport->txfifo_size)) {
 		lpuart32_write(&sport->port, xmit->buf[xmit->tail], UARTDATA);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
+		uart_xmit_advance(&sport->port, 1);
 		txcnt = lpuart32_read(&sport->port, UARTWATER);
 		txcnt = txcnt >> UARTWATER_TXCNT_OFF;
 		txcnt &= UARTWATER_COUNT_MASK;
-- 
GitLab


From 26e8f1d9a88144b35b35ce3b9df2d437d8cb2ed8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:24 +0300
Subject: [PATCH 0812/4122] serial: imx: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-18-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/imx.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index 05b432dc7a85c..a7548d0a1aeee 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -563,8 +563,7 @@ static inline void imx_uart_transmit_buffer(struct imx_port *sport)
 		/* send xmit->buf[xmit->tail]
 		 * out the port here */
 		imx_uart_writel(sport, xmit->buf[xmit->tail], URTX0);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
+		uart_xmit_advance(&sport->port, 1);
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -590,9 +589,7 @@ static void imx_uart_dma_tx_callback(void *data)
 	ucr1 &= ~UCR1_TXDMAEN;
 	imx_uart_writel(sport, ucr1, UCR1);
 
-	/* update the stat */
-	xmit->tail = (xmit->tail + sport->tx_bytes) & (UART_XMIT_SIZE - 1);
-	sport->port.icount.tx += sport->tx_bytes;
+	uart_xmit_advance(&sport->port, sport->tx_bytes);
 
 	dev_dbg(sport->port.dev, "we finish the TX DMA.\n");
 
-- 
GitLab


From daf63432f4626e4cf97dac0cf12c7969617ff1a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:25 +0300
Subject: [PATCH 0813/4122] serial: ip22zilog: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-19-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/ip22zilog.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/ip22zilog.c b/drivers/tty/serial/ip22zilog.c
index dd0a8915ce4ff..b1f27e1681358 100644
--- a/drivers/tty/serial/ip22zilog.c
+++ b/drivers/tty/serial/ip22zilog.c
@@ -409,8 +409,7 @@ static void ip22zilog_transmit_chars(struct uart_ip22zilog_port *up,
 	ZSDELAY();
 	ZS_WSYNC(channel);
 
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-	up->port.icount.tx++;
+	uart_xmit_advance(&up->port, 1);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&up->port);
@@ -609,8 +608,7 @@ static void ip22zilog_start_tx(struct uart_port *port)
 		ZSDELAY();
 		ZS_WSYNC(channel);
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 			uart_write_wakeup(&up->port);
-- 
GitLab


From 53c3d62f46872fd7bb177a9181291e58edcc3cf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:26 +0300
Subject: [PATCH 0814/4122] serial: liteuart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Acked-by: Gabriel Somlo <gsomlo@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-20-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/liteuart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/liteuart.c b/drivers/tty/serial/liteuart.c
index 4c0604325ee91..062812fe1b092 100644
--- a/drivers/tty/serial/liteuart.c
+++ b/drivers/tty/serial/liteuart.c
@@ -136,8 +136,7 @@ static void liteuart_start_tx(struct uart_port *port)
 	} else if (!uart_circ_empty(xmit)) {
 		while (xmit->head != xmit->tail) {
 			ch = xmit->buf[xmit->tail];
-			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-			port->icount.tx++;
+			uart_xmit_advance(port, 1);
 			liteuart_putchar(port, ch);
 		}
 	}
-- 
GitLab


From 502b13cc5aa2c9b2b2a802a7d473734439dc09b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:27 +0300
Subject: [PATCH 0815/4122] serial: max3100: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-21-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max3100.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/tty/serial/max3100.c b/drivers/tty/serial/max3100.c
index c69602f356fdc..bb74f23251fe4 100644
--- a/drivers/tty/serial/max3100.c
+++ b/drivers/tty/serial/max3100.c
@@ -292,9 +292,7 @@ static void max3100_work(struct work_struct *w)
 			} else if (!uart_circ_empty(xmit) &&
 				   !uart_tx_stopped(&s->port)) {
 				tx = xmit->buf[xmit->tail];
-				xmit->tail = (xmit->tail + 1) &
-					(UART_XMIT_SIZE - 1);
-				s->port.icount.tx++;
+				uart_xmit_advance(&s->port, 1);
 			}
 			if (tx != 0xffff) {
 				max3100_calc_parity(s, &tx);
-- 
GitLab


From d41727dbdfcbb25bf6e30ddcbe330bb9e7b9fcbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:28 +0300
Subject: [PATCH 0816/4122] serial: max310x: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-22-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/max310x.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/tty/serial/max310x.c b/drivers/tty/serial/max310x.c
index fbf6e2b3161c5..4eb24e3407f80 100644
--- a/drivers/tty/serial/max310x.c
+++ b/drivers/tty/serial/max310x.c
@@ -787,10 +787,7 @@ static void max310x_handle_tx(struct uart_port *port)
 		} else {
 			max310x_batch_write(port, xmit->buf + xmit->tail, to_send);
 		}
-
-		/* Add data to send */
-		port->icount.tx += to_send;
-		xmit->tail = (xmit->tail + to_send) & (UART_XMIT_SIZE - 1);
+		uart_xmit_advance(port, to_send);
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From 681ef4219bee73139c713226d90bd068d0bd9b89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:29 +0300
Subject: [PATCH 0817/4122] serial: meson: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Acked-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Acked-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-23-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/meson_uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/meson_uart.c b/drivers/tty/serial/meson_uart.c
index 056243c12836c..74110017988a8 100644
--- a/drivers/tty/serial/meson_uart.c
+++ b/drivers/tty/serial/meson_uart.c
@@ -162,8 +162,7 @@ static void meson_uart_start_tx(struct uart_port *port)
 
 		ch = xmit->buf[xmit->tail];
 		writel(ch, port->membase + AML_UART_WFIFO);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	if (!uart_circ_empty(xmit)) {
-- 
GitLab


From 5c664457a9373394eab65f1459ad55c36e147c94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:30 +0300
Subject: [PATCH 0818/4122] serial: milbeaut_usio: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-24-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/milbeaut_usio.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/milbeaut_usio.c b/drivers/tty/serial/milbeaut_usio.c
index c15e0d84dc7e3..44988a2941b8d 100644
--- a/drivers/tty/serial/milbeaut_usio.c
+++ b/drivers/tty/serial/milbeaut_usio.c
@@ -98,8 +98,7 @@ static void mlb_usio_tx_chars(struct uart_port *port)
 	do {
 		writew(xmit->buf[xmit->tail], port->membase + MLB_USIO_REG_DR);
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 
-- 
GitLab


From 269599fa886f63dc425857f4672e1d0c8df2a5f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:31 +0300
Subject: [PATCH 0819/4122] serial: mvebu-uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-25-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mvebu-uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
index 7b566404cb331..31f739c7a08bf 100644
--- a/drivers/tty/serial/mvebu-uart.c
+++ b/drivers/tty/serial/mvebu-uart.c
@@ -223,8 +223,7 @@ static void mvebu_uart_start_tx(struct uart_port *port)
 
 	if (IS_EXTENDED(port) && !uart_circ_empty(xmit)) {
 		writel(xmit->buf[xmit->tail], port->membase + UART_TSH(port));
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	ctl = readl(port->membase + UART_INTR(port));
-- 
GitLab


From 98fdebeebbad98393d691aaa092e3c7889e5cbbd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:32 +0300
Subject: [PATCH 0820/4122] serial: pic32: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-26-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/pic32_uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/pic32_uart.c b/drivers/tty/serial/pic32_uart.c
index 2beada66c8245..52d026865a327 100644
--- a/drivers/tty/serial/pic32_uart.c
+++ b/drivers/tty/serial/pic32_uart.c
@@ -376,8 +376,7 @@ static void pic32_uart_do_tx(struct uart_port *port)
 
 		pic32_uart_writel(sport, PIC32_UART_TX, c);
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 		if (--max_count == 0)
-- 
GitLab


From b31b07a7d21a10f5068b92563c8037a8c2b15f1d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:33 +0300
Subject: [PATCH 0821/4122] serial: pmac_zilog: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-27-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/pmac_zilog.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/pmac_zilog.c b/drivers/tty/serial/pmac_zilog.c
index fe2e4ec423f79..13668ffdb1e7d 100644
--- a/drivers/tty/serial/pmac_zilog.c
+++ b/drivers/tty/serial/pmac_zilog.c
@@ -410,8 +410,7 @@ static void pmz_transmit_chars(struct uart_pmac_port *uap)
 	write_zsdata(uap, xmit->buf[xmit->tail]);
 	zssync(uap);
 
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-	uap->port.icount.tx++;
+	uart_xmit_advance(&uap->port, 1);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&uap->port);
@@ -627,8 +626,7 @@ static void pmz_start_tx(struct uart_port *port)
 			return;
 		write_zsdata(uap, xmit->buf[xmit->tail]);
 		zssync(uap);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 			uart_write_wakeup(&uap->port);
-- 
GitLab


From 3d4d838423a56a3c18174d194508e728ce9fe6af Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:34 +0300
Subject: [PATCH 0822/4122] serial: rda: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-28-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/rda-uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/rda-uart.c b/drivers/tty/serial/rda-uart.c
index 0e387e2144fa2..be5c842b5ba95 100644
--- a/drivers/tty/serial/rda-uart.c
+++ b/drivers/tty/serial/rda-uart.c
@@ -353,8 +353,7 @@ static void rda_uart_send_chars(struct uart_port *port)
 
 		ch = xmit->buf[xmit->tail];
 		rda_uart_write(port, ch, RDA_UART_RXTX_BUFFER);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From ec04d75fe4386c7e5eafb72367e962f56694d490 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:35 +0300
Subject: [PATCH 0823/4122] serial: samsung_tty: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-29-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/samsung_tty.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/tty/serial/samsung_tty.c b/drivers/tty/serial/samsung_tty.c
index 77d1363029f55..7e34361a1085a 100644
--- a/drivers/tty/serial/samsung_tty.c
+++ b/drivers/tty/serial/samsung_tty.c
@@ -288,7 +288,6 @@ static void s3c24xx_serial_stop_tx(struct uart_port *port)
 {
 	struct s3c24xx_uart_port *ourport = to_ourport(port);
 	struct s3c24xx_uart_dma *dma = ourport->dma;
-	struct circ_buf *xmit = &port->state->xmit;
 	struct dma_tx_state state;
 	int count;
 
@@ -316,8 +315,7 @@ static void s3c24xx_serial_stop_tx(struct uart_port *port)
 					DMA_TO_DEVICE);
 		async_tx_ack(dma->tx_desc);
 		count = dma->tx_bytes_requested - state.residue;
-		xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
-		port->icount.tx += count;
+		uart_xmit_advance(port, count);
 	}
 
 	ourport->tx_enabled = 0;
@@ -351,8 +349,7 @@ static void s3c24xx_serial_tx_dma_complete(void *args)
 
 	spin_lock_irqsave(&port->lock, flags);
 
-	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
-	port->icount.tx += count;
+	uart_xmit_advance(port, count);
 	ourport->tx_in_progress = 0;
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
@@ -916,8 +913,7 @@ static void s3c24xx_serial_tx_chars(struct s3c24xx_uart_port *ourport)
 			break;
 
 		wr_reg(port, S3C2410_UTXH, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 		count--;
 	}
 
-- 
GitLab


From 10b459d2c3da954286e1fcc3ce33ec42d0ea5126 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:36 +0300
Subject: [PATCH 0824/4122] serial: sb1250-duart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-30-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sb1250-duart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c
index c5d2b6cdcb4a1..de56f383964e6 100644
--- a/drivers/tty/serial/sb1250-duart.c
+++ b/drivers/tty/serial/sb1250-duart.c
@@ -399,8 +399,7 @@ static void sbd_transmit_chars(struct sbd_port *sport)
 	/* Send char.  */
 	if (!stop_tx) {
 		write_sbdchn(sport, R_DUART_TX_HOLD, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		sport->port.icount.tx++;
+		uart_xmit_advance(&sport->port, 1);
 
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 			uart_write_wakeup(&sport->port);
-- 
GitLab


From 3ea03c021dcceac88bbb38f7b67bc08d23c80c84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:37 +0300
Subject: [PATCH 0825/4122] serial: sccnxp: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-31-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sccnxp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sccnxp.c b/drivers/tty/serial/sccnxp.c
index dd98509f52e5a..7df6878226340 100644
--- a/drivers/tty/serial/sccnxp.c
+++ b/drivers/tty/serial/sccnxp.c
@@ -468,8 +468,7 @@ static void sccnxp_handle_tx(struct uart_port *port)
 			break;
 
 		sccnxp_port_write(port, SCCNXP_THR_REG, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From b7e2647671a2e7eb8d5dd1cb73464d7d760f6139 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:38 +0300
Subject: [PATCH 0826/4122] serial: tegra: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-32-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial-tegra.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index b7170cb9a544f..4304e3f9307dd 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -496,8 +496,7 @@ static void tegra_uart_fill_tx_fifo(struct tegra_uart_port *tup, int max_bytes)
 				break;
 		}
 		tegra_uart_write(tup, xmit->buf[xmit->tail], UART_TX);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		tup->uport.icount.tx++;
+		uart_xmit_advance(&tup->uport, 1);
 	}
 }
 
-- 
GitLab


From e234ef0ef1dec33384968c695f0c2c751b0621ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:39 +0300
Subject: [PATCH 0827/4122] serial: sh-sci: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-33-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sh-sci.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
index 62f773286d44b..a92a897803570 100644
--- a/drivers/tty/serial/sh-sci.c
+++ b/drivers/tty/serial/sh-sci.c
@@ -1181,10 +1181,7 @@ static void sci_dma_tx_complete(void *arg)
 
 	spin_lock_irqsave(&port->lock, flags);
 
-	xmit->tail += s->tx_dma_len;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
-	port->icount.tx += s->tx_dma_len;
+	uart_xmit_advance(port, s->tx_dma_len);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
-- 
GitLab


From b92df54ccf7355256e4eebe2f36dc2b83808e9ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:40 +0300
Subject: [PATCH 0828/4122] serial: sprd: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-34-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sprd_serial.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/tty/serial/sprd_serial.c b/drivers/tty/serial/sprd_serial.c
index 3f34f7bb77002..492a3bdab5ba2 100644
--- a/drivers/tty/serial/sprd_serial.c
+++ b/drivers/tty/serial/sprd_serial.c
@@ -206,7 +206,6 @@ static void sprd_stop_tx_dma(struct uart_port *port)
 {
 	struct sprd_uart_port *sp =
 		container_of(port, struct sprd_uart_port, port);
-	struct circ_buf *xmit = &port->state->xmit;
 	struct dma_tx_state state;
 	u32 trans_len;
 
@@ -215,8 +214,7 @@ static void sprd_stop_tx_dma(struct uart_port *port)
 	dmaengine_tx_status(sp->tx_dma.chn, sp->tx_dma.cookie, &state);
 	if (state.residue) {
 		trans_len = state.residue - sp->tx_dma.phys_addr;
-		xmit->tail = (xmit->tail + trans_len) & (UART_XMIT_SIZE - 1);
-		port->icount.tx += trans_len;
+		uart_xmit_advance(port, trans_len);
 		dma_unmap_single(port->dev, sp->tx_dma.phys_addr,
 				 sp->tx_dma.trans_len, DMA_TO_DEVICE);
 	}
@@ -253,8 +251,7 @@ static void sprd_complete_tx_dma(void *data)
 	dma_unmap_single(port->dev, sp->tx_dma.phys_addr,
 			 sp->tx_dma.trans_len, DMA_TO_DEVICE);
 
-	xmit->tail = (xmit->tail + sp->tx_dma.trans_len) & (UART_XMIT_SIZE - 1);
-	port->icount.tx += sp->tx_dma.trans_len;
+	uart_xmit_advance(port, sp->tx_dma.trans_len);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(port);
-- 
GitLab


From 29d8c07b49578cc58f48fdea361c1f419515076c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:41 +0300
Subject: [PATCH 0829/4122] serial: stm32: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-35-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/stm32-usart.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index dfdbcf092facc..24def72b25653 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -596,8 +596,7 @@ static void stm32_usart_transmit_chars_pio(struct uart_port *port)
 		if (!(readl_relaxed(port->membase + ofs->isr) & USART_SR_TXE))
 			break;
 		writel_relaxed(xmit->buf[xmit->tail], port->membase + ofs->tdr);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	/* rely on TXE irq (mask or unmask) for sending remaining data */
@@ -673,8 +672,8 @@ static void stm32_usart_transmit_chars_dma(struct uart_port *port)
 
 	stm32_usart_set_bits(port, ofs->cr3, USART_CR3_DMAT);
 
-	xmit->tail = (xmit->tail + count) & (UART_XMIT_SIZE - 1);
-	port->icount.tx += count;
+	uart_xmit_advance(port, count);
+
 	return;
 
 fallback_err:
-- 
GitLab


From c5fd4b7d7e58e20b05b5d24eddab70567ec7a724 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:42 +0300
Subject: [PATCH 0830/4122] serial: sunhv: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-36-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunhv.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sunhv.c b/drivers/tty/serial/sunhv.c
index 1938ba5e98c0e..16c746a632587 100644
--- a/drivers/tty/serial/sunhv.c
+++ b/drivers/tty/serial/sunhv.c
@@ -47,8 +47,7 @@ static void transmit_chars_putchar(struct uart_port *port, struct circ_buf *xmit
 		if (status != HV_EOK)
 			break;
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 }
 
@@ -63,8 +62,7 @@ static void transmit_chars_write(struct uart_port *port, struct circ_buf *xmit)
 		status = sun4v_con_write(ra, len, &sent);
 		if (status != HV_EOK)
 			break;
-		xmit->tail = (xmit->tail + sent) & (UART_XMIT_SIZE - 1);
-		port->icount.tx += sent;
+		uart_xmit_advance(port, sent);
 	}
 }
 
-- 
GitLab


From 54ffabbe2203817d1d70ceb97407a4382633d2e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:43 +0300
Subject: [PATCH 0831/4122] serial: sunplus-uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-37-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunplus-uart.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/tty/serial/sunplus-uart.c b/drivers/tty/serial/sunplus-uart.c
index 7afe61a0e72e4..727942c43c454 100644
--- a/drivers/tty/serial/sunplus-uart.c
+++ b/drivers/tty/serial/sunplus-uart.c
@@ -216,9 +216,7 @@ static void transmit_chars(struct uart_port *port)
 
 	do {
 		sp_uart_put_char(port, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) % UART_XMIT_SIZE;
-		port->icount.tx++;
-
+		uart_xmit_advance(port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 	} while (sunplus_tx_buf_not_full(port));
-- 
GitLab


From 5aaae464d6de0913a962d4f9412986a528b0acf7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:44 +0300
Subject: [PATCH 0832/4122] serial: sunsab: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-38-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunsab.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
index 99608b2a2b74f..94db67f21abf2 100644
--- a/drivers/tty/serial/sunsab.c
+++ b/drivers/tty/serial/sunsab.c
@@ -266,8 +266,7 @@ static void transmit_chars(struct uart_sunsab_port *up,
 	for (i = 0; i < up->port.fifosize; i++) {
 		writeb(xmit->buf[xmit->tail],
 		       &up->regs->w.xfifo[i]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
+		uart_xmit_advance(&up->port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 	}
@@ -453,8 +452,7 @@ static void sunsab_start_tx(struct uart_port *port)
 	for (i = 0; i < up->port.fifosize; i++) {
 		writeb(xmit->buf[xmit->tail],
 		       &up->regs->w.xfifo[i]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
+		uart_xmit_advance(&up->port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 	}
-- 
GitLab


From 7f20ab70940e121c9af3f4ca013f38c762ef19e6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:45 +0300
Subject: [PATCH 0833/4122] serial: sunsu: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-39-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunsu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/sunsu.c b/drivers/tty/serial/sunsu.c
index 9ea7e567540d6..fed052a0b9310 100644
--- a/drivers/tty/serial/sunsu.c
+++ b/drivers/tty/serial/sunsu.c
@@ -417,8 +417,7 @@ static void transmit_chars(struct uart_sunsu_port *up)
 	count = up->port.fifosize;
 	do {
 		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		up->port.icount.tx++;
+		uart_xmit_advance(&up->port, 1);
 		if (uart_circ_empty(xmit))
 			break;
 	} while (--count > 0);
-- 
GitLab


From 81eb6227afea551fbfcb7958bdfcd70b6e5419f4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:46 +0300
Subject: [PATCH 0834/4122] serial: sunzilog: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-40-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunzilog.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/sunzilog.c b/drivers/tty/serial/sunzilog.c
index 87425290687d5..ccb809216e941 100644
--- a/drivers/tty/serial/sunzilog.c
+++ b/drivers/tty/serial/sunzilog.c
@@ -508,8 +508,7 @@ static void sunzilog_transmit_chars(struct uart_sunzilog_port *up,
 	ZSDELAY();
 	ZS_WSYNC(channel);
 
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-	up->port.icount.tx++;
+	uart_xmit_advance(&up->port, 1);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&up->port);
@@ -709,8 +708,7 @@ static void sunzilog_start_tx(struct uart_port *port)
 		ZSDELAY();
 		ZS_WSYNC(channel);
 
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 
 		if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 			uart_write_wakeup(&up->port);
-- 
GitLab


From b421cbb2f33c7692aa209ce2781647919a1265b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:47 +0300
Subject: [PATCH 0835/4122] serial: timbuart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-41-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/timbuart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/timbuart.c b/drivers/tty/serial/timbuart.c
index bb19ed012def7..0859394a78cdf 100644
--- a/drivers/tty/serial/timbuart.c
+++ b/drivers/tty/serial/timbuart.c
@@ -101,8 +101,7 @@ static void timbuart_tx_chars(struct uart_port *port)
 		!uart_circ_empty(xmit)) {
 		iowrite8(xmit->buf[xmit->tail],
 			port->membase + TIMBUART_TXFIFO);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-		port->icount.tx++;
+		uart_xmit_advance(port, 1);
 	}
 
 	dev_dbg(port->dev,
-- 
GitLab


From 852322ff4f2be566b0165a1f9c2adfcc0a86f8b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:48 +0300
Subject: [PATCH 0836/4122] serial: uartlite: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-42-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/uartlite.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index eca41ac5477cb..94584e54ebbed 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -203,8 +203,7 @@ static int ulite_transmit(struct uart_port *port, int stat)
 		return 0;
 
 	uart_out32(xmit->buf[xmit->tail], ULITE_TX, port);
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE-1);
-	port->icount.tx++;
+	uart_xmit_advance(port, 1);
 
 	/* wake up */
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-- 
GitLab


From 41e804c4dec667e64c9204e13862cf634df794a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:49 +0300
Subject: [PATCH 0837/4122] serial: ucc_uart: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-43-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/ucc_uart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/ucc_uart.c b/drivers/tty/serial/ucc_uart.c
index 82cf14dd3d433..b09b6496ee3eb 100644
--- a/drivers/tty/serial/ucc_uart.c
+++ b/drivers/tty/serial/ucc_uart.c
@@ -372,8 +372,7 @@ static int qe_uart_tx_pump(struct uart_qe_port *qe_port)
 		p = qe2cpu_addr(be32_to_cpu(bdp->buf), qe_port);
 		while (count < qe_port->tx_fifosize) {
 			*p++ = xmit->buf[xmit->tail];
-			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-			port->icount.tx++;
+			uart_xmit_advance(port, 1);
 			count++;
 			if (xmit->head == xmit->tail)
 				break;
-- 
GitLab


From edc62b17ed9f6db3de3521793e4689e15aad4625 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:50 +0300
Subject: [PATCH 0838/4122] serial: xuartps: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-44-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/xilinx_uartps.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/tty/serial/xilinx_uartps.c b/drivers/tty/serial/xilinx_uartps.c
index 2eff7cff57c48..01d8027e64fda 100644
--- a/drivers/tty/serial/xilinx_uartps.c
+++ b/drivers/tty/serial/xilinx_uartps.c
@@ -326,9 +326,7 @@ static void cdns_uart_handle_tx(void *dev_id)
 	       !(readl(port->membase + CDNS_UART_SR) & CDNS_UART_SR_TXFULL)) {
 
 		writel(xmit->buf[xmit->tail], port->membase + CDNS_UART_FIFO);
-
-		port->icount.tx++;
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		uart_xmit_advance(port, 1);
 		numbytes--;
 	}
 
-- 
GitLab


From c2087b37d10404220d06f4503ef165ab7835b246 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:11:51 +0300
Subject: [PATCH 0839/4122] serial: zs: Use uart_xmit_advance()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Take advantage of the new uart_xmit_advance() helper.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019091151.6692-45-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/zs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/tty/serial/zs.c b/drivers/tty/serial/zs.c
index 688db7d8b7488..730c648e32ff3 100644
--- a/drivers/tty/serial/zs.c
+++ b/drivers/tty/serial/zs.c
@@ -623,8 +623,7 @@ static void zs_raw_transmit_chars(struct zs_port *zport)
 
 	/* Send char.  */
 	write_zsdata(zport, xmit->buf[xmit->tail]);
-	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-	zport->port.icount.tx++;
+	uart_xmit_advance(&zport->port, 1);
 
 	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
 		uart_write_wakeup(&zport->port);
-- 
GitLab


From 109a951a9f1fd8a34ebd1896cbbd5d5cede880a7 Mon Sep 17 00:00:00 2001
From: Kartik <kkartik@nvidia.com>
Date: Tue, 18 Oct 2022 20:28:06 +0530
Subject: [PATCH 0840/4122] serial: tegra: Read DMA status before terminating
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Read the DMA status before terminating the DMA, as doing so deletes
the DMA desc.

Also, to get the correct transfer status information, pause the DMA
using dmaengine_pause() before reading the DMA status.

Fixes: e9ea096dd225 ("serial: tegra: add serial driver")
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Signed-off-by: Kartik <kkartik@nvidia.com>
Link: https://lore.kernel.org/r/1666105086-17326-1-git-send-email-kkartik@nvidia.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial-tegra.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/serial-tegra.c b/drivers/tty/serial/serial-tegra.c
index 4304e3f9307dd..e5b9773db5e36 100644
--- a/drivers/tty/serial/serial-tegra.c
+++ b/drivers/tty/serial/serial-tegra.c
@@ -618,8 +618,9 @@ static void tegra_uart_stop_tx(struct uart_port *u)
 	if (tup->tx_in_progress != TEGRA_UART_TX_DMA)
 		return;
 
-	dmaengine_terminate_all(tup->tx_dma_chan);
+	dmaengine_pause(tup->tx_dma_chan);
 	dmaengine_tx_status(tup->tx_dma_chan, tup->tx_cookie, &state);
+	dmaengine_terminate_all(tup->tx_dma_chan);
 	count = tup->tx_bytes_requested - state.residue;
 	async_tx_ack(tup->tx_dma_desc);
 	uart_xmit_advance(&tup->uport, count);
@@ -762,8 +763,9 @@ static void tegra_uart_terminate_rx_dma(struct tegra_uart_port *tup)
 		return;
 	}
 
-	dmaengine_terminate_all(tup->rx_dma_chan);
+	dmaengine_pause(tup->rx_dma_chan);
 	dmaengine_tx_status(tup->rx_dma_chan, tup->rx_cookie, &state);
+	dmaengine_terminate_all(tup->rx_dma_chan);
 
 	tegra_uart_rx_buffer_push(tup, state.residue);
 	tup->rx_dma_active = false;
-- 
GitLab


From 6dd07781b4cdd38103c81ddcc88fa4e8a31ebf71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:33:39 +0300
Subject: [PATCH 0841/4122] serial: Convert serial_rs485 to kernel doc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert struct serial_rs485 comments to kernel doc format and include
it into documentation.

Suggested-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019093343.9546-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../driver-api/serial/serial-rs485.rst        | 13 +++--
 include/uapi/linux/serial.h                   | 55 ++++++++++++-------
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index 6ebad75c74ed9..264e4b7537135 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -29,11 +29,11 @@ RS485 Serial Communications
 3. Data Structures Already Available in the Kernel
 ==================================================
 
-   The Linux kernel provides the serial_rs485 structure (see [1]) to handle
-   RS485 communications. This data structure is used to set and configure RS485
+   The Linux kernel provides the serial_rs485 structure to handle RS485
+   communications. This data structure is used to set and configure RS485
    parameters in the platform data and in ioctls.
 
-   The device tree can also provide RS485 boot time parameters (see [2]
+   The device tree can also provide RS485 boot time parameters (see [1]
    for bindings). The driver is in charge of filling this data structure from
    the values given by the device tree.
 
@@ -47,6 +47,9 @@ RS485 Serial Communications
    for the uart_port. TIOCGRS485 ioctl can be used to read back the
    serial_rs485 structure matching to the current configuration.
 
+.. kernel-doc:: include/uapi/linux/serial.h
+   :identifiers: serial_rs485
+
 4. Usage from user-level
 ========================
 
@@ -126,6 +129,4 @@ RS485 Serial Communications
 6. References
 =============
 
- [1]	include/uapi/linux/serial.h
-
- [2]	Documentation/devicetree/bindings/serial/rs485.txt
+ [1]	Documentation/devicetree/bindings/serial/rs485.txt
diff --git a/include/uapi/linux/serial.h b/include/uapi/linux/serial.h
index cea06924b295d..53bc1af67a411 100644
--- a/include/uapi/linux/serial.h
+++ b/include/uapi/linux/serial.h
@@ -107,33 +107,50 @@ struct serial_icounter_struct {
 	int reserved[9];
 };
 
-/*
+/**
+ * struct serial_rs485 - serial interface for controlling RS485 settings.
+ * @flags:			RS485 feature flags.
+ * @delay_rts_before_send:	Delay before send (milliseconds).
+ * @delay_rts_after_send:	Delay after send (milliseconds).
+ * @addr_recv:			Receive filter for RS485 addressing mode
+ *				(used only when %SER_RS485_ADDR_RECV is set).
+ * @addr_dest:			Destination address for RS485 addressing mode
+ *				(used only when %SER_RS485_ADDR_DEST is set).
+ * @padding0:			Padding (set to zero).
+ * @padding1:			Padding (set to zero).
+ * @padding:			Deprecated, use @padding0 and @padding1 instead.
+ *				Do not use with @addr_recv and @addr_dest (due to
+ *				overlap).
+ *
  * Serial interface for controlling RS485 settings on chips with suitable
  * support. Set with TIOCSRS485 and get with TIOCGRS485 if supported by your
  * platform. The set function returns the new state, with any unsupported bits
  * reverted appropriately.
+ *
+ * The flag bits are:
+ *
+ * * %SER_RS485_ENABLED		- RS485 enabled.
+ * * %SER_RS485_RTS_ON_SEND	- Logical level for RTS pin when sending.
+ * * %SER_RS485_RTS_AFTER_SEND	- Logical level for RTS pin after sent.
+ * * %SER_RS485_RX_DURING_TX	- Full-duplex RS485 line.
+ * * %SER_RS485_TERMINATE_BUS	- Enable bus termination (if supported).
+ * * %SER_RS485_ADDRB		- Enable RS485 addressing mode.
+ * * %SER_RS485_ADDR_RECV - Receive address filter (enables @addr_recv). Requires %SER_RS485_ADDRB.
+ * * %SER_RS485_ADDR_DEST - Destination address (enables @addr_dest). Requires %SER_RS485_ADDRB.
  */
-
 struct serial_rs485 {
-	__u32	flags;			/* RS485 feature flags */
-#define SER_RS485_ENABLED		(1 << 0)	/* If enabled */
-#define SER_RS485_RTS_ON_SEND		(1 << 1)	/* Logical level for
-							   RTS pin when
-							   sending */
-#define SER_RS485_RTS_AFTER_SEND	(1 << 2)	/* Logical level for
-							   RTS pin after sent*/
+	__u32	flags;
+#define SER_RS485_ENABLED		(1 << 0)
+#define SER_RS485_RTS_ON_SEND		(1 << 1)
+#define SER_RS485_RTS_AFTER_SEND	(1 << 2)
 #define SER_RS485_RX_DURING_TX		(1 << 4)
-#define SER_RS485_TERMINATE_BUS		(1 << 5)	/* Enable bus
-							   termination
-							   (if supported) */
-
-/* RS-485 addressing mode */
-#define SER_RS485_ADDRB			(1 << 6)	/* Enable addressing mode */
-#define SER_RS485_ADDR_RECV		(1 << 7)	/* Receive address filter */
-#define SER_RS485_ADDR_DEST		(1 << 8)	/* Destination address */
+#define SER_RS485_TERMINATE_BUS		(1 << 5)
+#define SER_RS485_ADDRB			(1 << 6)
+#define SER_RS485_ADDR_RECV		(1 << 7)
+#define SER_RS485_ADDR_DEST		(1 << 8)
 
-	__u32	delay_rts_before_send;	/* Delay before send (milliseconds) */
-	__u32	delay_rts_after_send;	/* Delay after send (milliseconds) */
+	__u32	delay_rts_before_send;
+	__u32	delay_rts_after_send;
 
 	/* The fields below are defined by flags */
 	union {
-- 
GitLab


From 851453abc8e9cfe92bbf8fdf07ee024ec372f09d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:33:40 +0300
Subject: [PATCH 0842/4122] Documentation: rs485: Link reference properly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Link DT bindings reference properly.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019093343.9546-3-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/serial/serial-rs485.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index 264e4b7537135..513758a702a67 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -33,9 +33,9 @@ RS485 Serial Communications
    communications. This data structure is used to set and configure RS485
    parameters in the platform data and in ioctls.
 
-   The device tree can also provide RS485 boot time parameters (see [1]
-   for bindings). The driver is in charge of filling this data structure from
-   the values given by the device tree.
+   The device tree can also provide RS485 boot time parameters
+   [#DT-bindings]_. The driver is in charge of filling this data structure
+   from the values given by the device tree.
 
    Any driver for devices capable of working both as RS232 and RS485 should
    implement the rs485_config callback and provide rs485_supported in the
@@ -129,4 +129,4 @@ RS485 Serial Communications
 6. References
 =============
 
- [1]	Documentation/devicetree/bindings/serial/rs485.txt
+.. [#DT-bindings]	Documentation/devicetree/bindings/serial/rs485.txt
-- 
GitLab


From 0f4648a1a6d36487e4134a4fd75d8cdae0cd6bf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:33:41 +0300
Subject: [PATCH 0843/4122] Documentation: rs485: Mention uart_get_rs485_mode()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add to rs485 documentation that serial core prepares the struct
serial_rs485 when uart_get_rs485_mode() is called. Remove the wrong
claim that the driver must fill it by itself.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019093343.9546-4-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/serial/serial-rs485.rst | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index 513758a702a67..ee34c8e1e56ee 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -34,8 +34,9 @@ RS485 Serial Communications
    parameters in the platform data and in ioctls.
 
    The device tree can also provide RS485 boot time parameters
-   [#DT-bindings]_. The driver is in charge of filling this data structure
-   from the values given by the device tree.
+   [#DT-bindings]_. The serial core fills the struct serial_rs485 from the
+   values given by the device tree when the driver calls
+   uart_get_rs485_mode().
 
    Any driver for devices capable of working both as RS232 and RS485 should
    implement the rs485_config callback and provide rs485_supported in the
@@ -48,7 +49,7 @@ RS485 Serial Communications
    serial_rs485 structure matching to the current configuration.
 
 .. kernel-doc:: include/uapi/linux/serial.h
-   :identifiers: serial_rs485
+   :identifiers: serial_rs485 uart_get_rs485_mode
 
 4. Usage from user-level
 ========================
-- 
GitLab


From 891e999394f56b52ed81b9ebcb7fe0bf050a3157 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:33:42 +0300
Subject: [PATCH 0844/4122] Documentation: rs485: Fix struct referencing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use "struct serial_rs485" to get the references properly recognized.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019093343.9546-5-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../driver-api/serial/serial-rs485.rst        | 21 ++++++++++---------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index ee34c8e1e56ee..e53aa291bcd7d 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -29,7 +29,7 @@ RS485 Serial Communications
 3. Data Structures Already Available in the Kernel
 ==================================================
 
-   The Linux kernel provides the serial_rs485 structure to handle RS485
+   The Linux kernel provides the struct serial_rs485 to handle RS485
    communications. This data structure is used to set and configure RS485
    parameters in the platform data and in ioctls.
 
@@ -40,13 +40,14 @@ RS485 Serial Communications
 
    Any driver for devices capable of working both as RS232 and RS485 should
    implement the rs485_config callback and provide rs485_supported in the
-   uart_port structure. The serial core calls rs485_config to do the device
-   specific part in response to TIOCSRS485 ioctl (see below). The rs485_config
-   callback receives a pointer to a sanitizated serial_rs485 structure. The
-   serial_rs485 userspace provides is sanitized before calling rs485_config
-   using rs485_supported that indicates what RS485 features the driver supports
-   for the uart_port. TIOCGRS485 ioctl can be used to read back the
-   serial_rs485 structure matching to the current configuration.
+   struct uart_port. The serial core calls rs485_config to do the device
+   specific part in response to TIOCSRS485 ioctl (see below). The
+   rs485_config callback receives a pointer to a sanitizated struct
+   serial_rs485. The struct serial_rs485 userspace provides is sanitized
+   before calling rs485_config using rs485_supported that indicates what
+   RS485 features the driver supports for the struct uart_port. TIOCGRS485
+   ioctl can be used to read back the struct serial_rs485 matching to the
+   current configuration.
 
 .. kernel-doc:: include/uapi/linux/serial.h
    :identifiers: serial_rs485 uart_get_rs485_mode
@@ -108,8 +109,8 @@ RS485 Serial Communications
 
    The Linux kernel provides addressing mode for multipoint RS-485 serial
    communications line. The addressing mode is enabled with SER_RS485_ADDRB
-   flag in serial_rs485. Struct serial_rs485 has two additional flags and
-   fields for enabling receive and destination addresses.
+   flag in struct serial_rs485. The struct serial_rs485 has two additional
+   flags and fields for enabling receive and destination addresses.
 
    Address mode flags:
 	- SER_RS485_ADDRB: Enabled addressing mode (sets also ADDRB in termios).
-- 
GitLab


From 7b1c56e08a45693ce6f708a4ee499e44c4fb9ca5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 12:33:43 +0300
Subject: [PATCH 0845/4122] Documentation: Make formatting consistent for rs485
 docs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Tweak styling of names that come directly from the code.

Suggested-by: Bagas Sanjaya <bagasdotme@gmail.com>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019093343.9546-6-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../driver-api/serial/serial-rs485.rst        | 37 ++++++++++---------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/Documentation/driver-api/serial/serial-rs485.rst b/Documentation/driver-api/serial/serial-rs485.rst
index e53aa291bcd7d..dce061ef76477 100644
--- a/Documentation/driver-api/serial/serial-rs485.rst
+++ b/Documentation/driver-api/serial/serial-rs485.rst
@@ -39,15 +39,15 @@ RS485 Serial Communications
    uart_get_rs485_mode().
 
    Any driver for devices capable of working both as RS232 and RS485 should
-   implement the rs485_config callback and provide rs485_supported in the
-   struct uart_port. The serial core calls rs485_config to do the device
-   specific part in response to TIOCSRS485 ioctl (see below). The
-   rs485_config callback receives a pointer to a sanitizated struct
+   implement the ``rs485_config`` callback and provide ``rs485_supported``
+   in the ``struct uart_port``. The serial core calls ``rs485_config`` to do
+   the device specific part in response to TIOCSRS485 ioctl (see below). The
+   ``rs485_config`` callback receives a pointer to a sanitizated struct
    serial_rs485. The struct serial_rs485 userspace provides is sanitized
-   before calling rs485_config using rs485_supported that indicates what
-   RS485 features the driver supports for the struct uart_port. TIOCGRS485
-   ioctl can be used to read back the struct serial_rs485 matching to the
-   current configuration.
+   before calling ``rs485_config`` using ``rs485_supported`` that indicates
+   what RS485 features the driver supports for the ``struct uart_port``.
+   TIOCGRS485 ioctl can be used to read back the struct serial_rs485
+   matching to the current configuration.
 
 .. kernel-doc:: include/uapi/linux/serial.h
    :identifiers: serial_rs485 uart_get_rs485_mode
@@ -108,23 +108,24 @@ RS485 Serial Communications
 ========================
 
    The Linux kernel provides addressing mode for multipoint RS-485 serial
-   communications line. The addressing mode is enabled with SER_RS485_ADDRB
-   flag in struct serial_rs485. The struct serial_rs485 has two additional
-   flags and fields for enabling receive and destination addresses.
+   communications line. The addressing mode is enabled with
+   ``SER_RS485_ADDRB`` flag in struct serial_rs485. The struct serial_rs485
+   has two additional flags and fields for enabling receive and destination
+   addresses.
 
    Address mode flags:
-	- SER_RS485_ADDRB: Enabled addressing mode (sets also ADDRB in termios).
-	- SER_RS485_ADDR_RECV: Receive (filter) address enabled.
-	- SER_RS485_ADDR_DEST: Set destination address.
+	- ``SER_RS485_ADDRB``: Enabled addressing mode (sets also ADDRB in termios).
+	- ``SER_RS485_ADDR_RECV``: Receive (filter) address enabled.
+	- ``SER_RS485_ADDR_DEST``: Set destination address.
 
-   Address fields (enabled with corresponding SER_RS485_ADDR_* flag):
-	- addr_recv: Receive address.
-	- addr_dest: Destination address.
+   Address fields (enabled with corresponding ``SER_RS485_ADDR_*`` flag):
+	- ``addr_recv``: Receive address.
+	- ``addr_dest``: Destination address.
 
    Once a receive address is set, the communication can occur only with the
    particular device and other peers are filtered out. It is left up to the
    receiver side to enforce the filtering. Receive address will be cleared
-   if SER_RS485_ADDR_RECV is not set.
+   if ``SER_RS485_ADDR_RECV`` is not set.
 
    Note: not all devices supporting RS485 support multipoint addressing.
 
-- 
GitLab


From 0c3c184c5218d9209fcb8ee151074bc629bd4024 Mon Sep 17 00:00:00 2001
From: Siarhei Volkau <lis8215@gmail.com>
Date: Mon, 31 Oct 2022 21:40:39 +0300
Subject: [PATCH 0846/4122] dt-bindings: serial: ingenic: Add support for the
 JZ4750/55 SoCs

These SoCs UART block are the same as JZ4725b' one, the difference is
outside of the block - it is in the clock generation unit (CGU).
The difference requires to make a quirk for early console init.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Link: https://lore.kernel.org/r/20221031184041.1338129-2-lis8215@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/serial/ingenic,uart.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
index 9ca7a18ecd8b1..315ceb722e199 100644
--- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
+++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
@@ -20,6 +20,7 @@ properties:
     oneOf:
       - enum:
           - ingenic,jz4740-uart
+          - ingenic,jz4750-uart
           - ingenic,jz4760-uart
           - ingenic,jz4780-uart
           - ingenic,x1000-uart
@@ -31,6 +32,9 @@ properties:
       - items:
           - const: ingenic,jz4725b-uart
           - const: ingenic,jz4740-uart
+      - items:
+          - const: ingenic,jz4755-uart
+          - const: ingenic,jz4750-uart
 
   reg:
     maxItems: 1
-- 
GitLab


From e9c29d80278c0f5c6198ac741b10a534672042ca Mon Sep 17 00:00:00 2001
From: Siarhei Volkau <lis8215@gmail.com>
Date: Mon, 31 Oct 2022 21:40:40 +0300
Subject: [PATCH 0847/4122] serial: 8250/ingenic: Add support for the
 JZ4750/JZ4755

JZ4750/55/60 (but not JZ4760b) have an optional /2 divider between the
EXT oscillator and some peripherals including UART, which will
be enabled if using a 24 MHz oscillator, and disabled when
using a 12 MHz oscillator.

This behavior relies on hardware differences: most boards (if not all)
with those SoCs have 12 or 24 MHz oscillators but many peripherals want
12Mhz to operate properly (AIC and USB-PHY at least).

The 16MHz threshold looks arbitrary but used in vendor's bootloader code
for enable the divider.

The patch doesn't affect JZ4760's behavior as it is subject for another
patchset with re-classification of all supported ingenic UARTs.

Link: https://github.com/carlos-wong/uboot_jz4755/blob/master/cpu/mips/jz_serial.c#L158
Signed-off-by: Siarhei Volkau <lis8215@gmail.com>
Link: https://lore.kernel.org/r/20221031184041.1338129-3-lis8215@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_ingenic.c | 32 +++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_ingenic.c b/drivers/tty/serial/8250/8250_ingenic.c
index 2b2f5d8d24b91..617b8ce60d6b5 100644
--- a/drivers/tty/serial/8250/8250_ingenic.c
+++ b/drivers/tty/serial/8250/8250_ingenic.c
@@ -87,7 +87,7 @@ static void __init ingenic_early_console_setup_clock(struct earlycon_device *dev
 	dev->port.uartclk = be32_to_cpup(prop);
 }
 
-static int __init ingenic_early_console_setup(struct earlycon_device *dev,
+static int __init ingenic_earlycon_setup_tail(struct earlycon_device *dev,
 					      const char *opt)
 {
 	struct uart_port *port = &dev->port;
@@ -103,8 +103,6 @@ static int __init ingenic_early_console_setup(struct earlycon_device *dev,
 		uart_parse_options(opt, &baud, &parity, &bits, &flow);
 	}
 
-	ingenic_early_console_setup_clock(dev);
-
 	if (dev->baud)
 		baud = dev->baud;
 	divisor = DIV_ROUND_CLOSEST(port->uartclk, 16 * baud);
@@ -129,9 +127,36 @@ static int __init ingenic_early_console_setup(struct earlycon_device *dev,
 	return 0;
 }
 
+static int __init ingenic_early_console_setup(struct earlycon_device *dev,
+					      const char *opt)
+{
+	ingenic_early_console_setup_clock(dev);
+
+	return ingenic_earlycon_setup_tail(dev, opt);
+}
+
+static int __init jz4750_early_console_setup(struct earlycon_device *dev,
+					     const char *opt)
+{
+	/*
+	 * JZ4750/55/60 have an optional /2 divider between the EXT
+	 * oscillator and some peripherals including UART, which will
+	 * be enabled if using a 24 MHz oscillator, and disabled when
+	 * using a 12 MHz oscillator.
+	 */
+	ingenic_early_console_setup_clock(dev);
+	if (dev->port.uartclk >= 16000000)
+		dev->port.uartclk /= 2;
+
+	return ingenic_earlycon_setup_tail(dev, opt);
+}
+
 OF_EARLYCON_DECLARE(jz4740_uart, "ingenic,jz4740-uart",
 		    ingenic_early_console_setup);
 
+OF_EARLYCON_DECLARE(jz4750_uart, "ingenic,jz4750-uart",
+		    jz4750_early_console_setup);
+
 OF_EARLYCON_DECLARE(jz4770_uart, "ingenic,jz4770-uart",
 		    ingenic_early_console_setup);
 
@@ -328,6 +353,7 @@ static const struct ingenic_uart_config x1000_uart_config = {
 
 static const struct of_device_id of_match[] = {
 	{ .compatible = "ingenic,jz4740-uart", .data = &jz4740_uart_config },
+	{ .compatible = "ingenic,jz4750-uart", .data = &jz4760_uart_config },
 	{ .compatible = "ingenic,jz4760-uart", .data = &jz4760_uart_config },
 	{ .compatible = "ingenic,jz4770-uart", .data = &jz4760_uart_config },
 	{ .compatible = "ingenic,jz4775-uart", .data = &jz4760_uart_config },
-- 
GitLab


From 79d0224f6bf296d04cd843cfc49921b19c97bb09 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Thu, 29 Sep 2022 16:44:00 +0200
Subject: [PATCH 0848/4122] tty: serial: imx: Handle RS485 DE signal active
 high

The default polarity of RS485 DE signal is active high. This driver does
not handle such case properly. Currently, when a pin is multiplexed as a
UART CTS_B on boot, this pin is pulled HIGH by the i.MX UART CTS circuit,
which activates DE signal on the RS485 transceiver and thus behave as if
the RS485 was transmitting data, so the system blocks the RS485 bus when
it starts and until user application takes over. This behavior is not OK.
The problem consists of two separate parts.

First, the i.MX UART IP requires UCR1 UARTEN and UCR2 RXEN to be set for
UCR2 CTSC and CTS bits to have any effect. The UCR2 CTSC bit permits the
driver to set CTS (RTS_B or RS485 DE signal) to either level sychronous
to the internal UART IP clock. Compared to other options, like GPIO CTS
control, this has the benefit of being synchronous to the UART IP clock
and thus without glitches or bus delays. The reason for the CTS design
is likely because when the Receiver is disabled, the UART IP can never
indicate that it is ready to receive data by assering CTS signal, so
the CTS is always pulled HIGH by default.

When the port is closed by user space, imx_uart_stop_rx() clears UCR2
RXEN bit, and imx_uart_shutdown() clears UCR1 UARTEN bit. This disables
UART Receiver and UART itself, and forces CTS signal HIGH, which leads
to the RS485 bus being blocked because RS485 DE is incorrectly active.

The proposed solution for this problem is to keep the Receiver running
even after the port is closed, but in loopback mode. This disconnects
the RX FIFO input from the RXD external signal, and since UCR2 TXEN is
cleared, the UART Transmitter is disabled, so nothing can feed data in
the RX FIFO. Because the Receiver is still enabled, the UCR2 CTSC and
CTS bits still have effect and the CTS (RS485 DE) control is retained.

Note that in case of RS485 DE signal active low, there is no problem and
no special handling is necessary. The CTS signal defaults to HIGH, thus
the RS485 is by default set to Receive and the bus is not blocked.

Note that while there is the possibility to control CTS using GPIO with
either CTS polarity, this has the downside of not being synchronous to
the UART IP clock and thus glitchy and susceptible to slow DE switching.

Second, on boot, before the UART driver probe callback is called, the
driver core triggers pinctrl_init_done() and configures the IOMUXC to
default state. At this point, UCR1 UARTEN and UCR2 RXEN are both still
cleared, but UART CTS_B (RS485 DE) is configured as CTS function, thus
the RTS signal is pulled HIGH by the UART IP CTS circuit.

One part of the solution here is to enable UCR1 UARTEN and UCR2 RXEN and
UTS loopback in this driver probe callback, thus unblocking the CTSC and
CTS control early on. But this is still too late, since the pin control
is already configured and CTS has been pulled HIGH for a short period
of time.

When Linux kernel boots and this driver is bound, the pin control is set
to special "init" state if the state is available, and driver can switch
the "default" state afterward when ready. This state can be used to set
the CTS line as a GPIO in DT temporarily, and a GPIO hog can force such
GPIO to LOW, thus keeping the RS485 DE line LOW early on boot. Once the
driver takes over and UCR1 UARTEN and UCR2 RXEN and UTS loopback are all
enabled, the driver can switch to "default" pin control state and control
the CTS line as function instead. DT binding example is below:

"
&gpio6 {
  rts-init-hog {
    gpio-hog;
    gpios = <5 0>;
    output-low;
    line-name = "rs485-de";
  };
};

&uart5 { /* DHCOM UART2 */
  pinctrl-0 = <&pinctrl_uart5>;
  pinctrl-1 = <&pinctrl_uart5_init>;
  pinctrl-names = "default", "init";
  ...
};
pinctrl_uart5_init: uart5-init-grp {
  fsl,pins = <
...
    MX6QDL_PAD_CSI0_DAT19__GPIO6_IO05       0x30b1
  >;
};

pinctrl_uart5: uart5-grp {
  fsl,pins = <
...
    MX6QDL_PAD_CSI0_DAT19__UART5_CTS_B      0x30b1
  >;
};
"

Tested-by: Christoph Niedermaier <cniedermaier@dh-electronics.com>
Reviewed-by: Fabio Estevam <festevam@denx.de>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20220929144400.13571-1-marex@denx.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/imx.c | 64 ++++++++++++++++++++++++++++++++++++----
 1 file changed, 58 insertions(+), 6 deletions(-)

diff --git a/drivers/tty/serial/imx.c b/drivers/tty/serial/imx.c
index a7548d0a1aeee..11ac805663aed 100644
--- a/drivers/tty/serial/imx.c
+++ b/drivers/tty/serial/imx.c
@@ -489,7 +489,7 @@ static void imx_uart_stop_tx(struct uart_port *port)
 static void imx_uart_stop_rx(struct uart_port *port)
 {
 	struct imx_port *sport = (struct imx_port *)port;
-	u32 ucr1, ucr2, ucr4;
+	u32 ucr1, ucr2, ucr4, uts;
 
 	ucr1 = imx_uart_readl(sport, UCR1);
 	ucr2 = imx_uart_readl(sport, UCR2);
@@ -505,7 +505,18 @@ static void imx_uart_stop_rx(struct uart_port *port)
 	imx_uart_writel(sport, ucr1, UCR1);
 	imx_uart_writel(sport, ucr4, UCR4);
 
-	ucr2 &= ~UCR2_RXEN;
+	/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
+	if (port->rs485.flags & SER_RS485_ENABLED &&
+	    port->rs485.flags & SER_RS485_RTS_ON_SEND &&
+	    sport->have_rtscts && !sport->have_rtsgpio) {
+		uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
+		uts |= UTS_LOOP;
+		imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
+		ucr2 |= UCR2_RXEN;
+	} else {
+		ucr2 &= ~UCR2_RXEN;
+	}
+
 	imx_uart_writel(sport, ucr2, UCR2);
 }
 
@@ -1390,7 +1401,7 @@ static int imx_uart_startup(struct uart_port *port)
 	int retval, i;
 	unsigned long flags;
 	int dma_is_inited = 0;
-	u32 ucr1, ucr2, ucr3, ucr4;
+	u32 ucr1, ucr2, ucr3, ucr4, uts;
 
 	retval = clk_prepare_enable(sport->clk_per);
 	if (retval)
@@ -1495,6 +1506,11 @@ static int imx_uart_startup(struct uart_port *port)
 		imx_uart_writel(sport, ucr2, UCR2);
 	}
 
+	/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
+	uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
+	uts &= ~UTS_LOOP;
+	imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
+
 	spin_unlock_irqrestore(&sport->port.lock, flags);
 
 	return 0;
@@ -1504,7 +1520,7 @@ static void imx_uart_shutdown(struct uart_port *port)
 {
 	struct imx_port *sport = (struct imx_port *)port;
 	unsigned long flags;
-	u32 ucr1, ucr2, ucr4;
+	u32 ucr1, ucr2, ucr4, uts;
 
 	if (sport->dma_is_enabled) {
 		dmaengine_terminate_sync(sport->dma_chan_tx);
@@ -1548,7 +1564,18 @@ static void imx_uart_shutdown(struct uart_port *port)
 	spin_lock_irqsave(&sport->port.lock, flags);
 
 	ucr1 = imx_uart_readl(sport, UCR1);
-	ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_UARTEN | UCR1_RXDMAEN | UCR1_ATDMAEN);
+	ucr1 &= ~(UCR1_TRDYEN | UCR1_RRDYEN | UCR1_RTSDEN | UCR1_RXDMAEN | UCR1_ATDMAEN);
+	/* See SER_RS485_ENABLED/UTS_LOOP comment in imx_uart_probe() */
+	if (port->rs485.flags & SER_RS485_ENABLED &&
+	    port->rs485.flags & SER_RS485_RTS_ON_SEND &&
+	    sport->have_rtscts && !sport->have_rtsgpio) {
+		uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
+		uts |= UTS_LOOP;
+		imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
+		ucr1 |= UCR1_UARTEN;
+	} else {
+		ucr1 &= ~UCR1_UARTEN;
+	}
 	imx_uart_writel(sport, ucr1, UCR1);
 
 	ucr4 = imx_uart_readl(sport, UCR4);
@@ -2210,7 +2237,7 @@ static int imx_uart_probe(struct platform_device *pdev)
 	void __iomem *base;
 	u32 dma_buf_conf[2];
 	int ret = 0;
-	u32 ucr1;
+	u32 ucr1, ucr2, uts;
 	struct resource *res;
 	int txirq, rxirq, rtsirq;
 
@@ -2347,6 +2374,31 @@ static int imx_uart_probe(struct platform_device *pdev)
 	ucr1 &= ~(UCR1_ADEN | UCR1_TRDYEN | UCR1_IDEN | UCR1_RRDYEN | UCR1_RTSDEN);
 	imx_uart_writel(sport, ucr1, UCR1);
 
+	/*
+	 * In case RS485 is enabled without GPIO RTS control, the UART IP
+	 * is used to control CTS signal. Keep both the UART and Receiver
+	 * enabled, otherwise the UART IP pulls CTS signal always HIGH no
+	 * matter how the UCR2 CTSC and CTS bits are set. To prevent any
+	 * data from being fed into the RX FIFO, enable loopback mode in
+	 * UTS register, which disconnects the RX path from external RXD
+	 * pin and connects it to the Transceiver, which is disabled, so
+	 * no data can be fed to the RX FIFO that way.
+	 */
+	if (sport->port.rs485.flags & SER_RS485_ENABLED &&
+	    sport->have_rtscts && !sport->have_rtsgpio) {
+		uts = imx_uart_readl(sport, imx_uart_uts_reg(sport));
+		uts |= UTS_LOOP;
+		imx_uart_writel(sport, uts, imx_uart_uts_reg(sport));
+
+		ucr1 = imx_uart_readl(sport, UCR1);
+		ucr1 |= UCR1_UARTEN;
+		imx_uart_writel(sport, ucr1, UCR1);
+
+		ucr2 = imx_uart_readl(sport, UCR2);
+		ucr2 |= UCR2_RXEN;
+		imx_uart_writel(sport, ucr2, UCR2);
+	}
+
 	if (!imx_uart_is_imx1(sport) && sport->dte_mode) {
 		/*
 		 * The DCEDTE bit changes the direction of DSR, DCD, DTR and RI
-- 
GitLab


From 2cfc64f3f0e1c1136b1a8247e53dc24c54f0bf93 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 17 Oct 2022 20:16:33 +0300
Subject: [PATCH 0849/4122] serial: 8250_core: Use str_enabled_disabled()
 helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use str_enabled_disabled() helper instead of open coding the same.

Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221017171633.65275-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 94fbf0add2ce2..80a2fc2fbd4d7 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -32,6 +32,7 @@
 #include <linux/nmi.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
+#include <linux/string_helpers.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
 #ifdef CONFIG_SPARC
@@ -1175,8 +1176,8 @@ static int __init serial8250_init(void)
 
 	serial8250_isa_init_ports();
 
-	pr_info("Serial: 8250/16550 driver, %d ports, IRQ sharing %sabled\n",
-		nr_uarts, share_irqs ? "en" : "dis");
+	pr_info("Serial: 8250/16550 driver, %d ports, IRQ sharing %s\n",
+		nr_uarts, str_enabled_disabled(share_irqs));
 
 #ifdef CONFIG_SPARC
 	ret = sunserial_register_minors(&serial8250_reg, UART_NR);
-- 
GitLab


From cc72a1eea5e3f712ab4d59615bf1d4479cee16fc Mon Sep 17 00:00:00 2001
From: ruanjinjie <ruanjinjie@huawei.com>
Date: Wed, 19 Oct 2022 14:44:12 +0800
Subject: [PATCH 0850/4122] tty: hvc: make hvc_rtas_dev static

The symbol is not used outside of the file, so mark it static.

Fixes the following warning:

drivers/tty/hvc/hvc_rtas.c:29:19: warning: symbol 'hvc_rtas_dev' was
not declared. Should it be static?

Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20221019064412.3759874-1-ruanjinjie@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/hvc/hvc_rtas.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/hvc/hvc_rtas.c b/drivers/tty/hvc/hvc_rtas.c
index e8b8c645482b0..184d325abeeda 100644
--- a/drivers/tty/hvc/hvc_rtas.c
+++ b/drivers/tty/hvc/hvc_rtas.c
@@ -26,7 +26,7 @@
 #include "hvc_console.h"
 
 #define hvc_rtas_cookie 0x67781e15
-struct hvc_struct *hvc_rtas_dev;
+static struct hvc_struct *hvc_rtas_dev;
 
 static int rtascons_put_char_token = RTAS_UNKNOWN_SERVICE;
 static int rtascons_get_char_token = RTAS_UNKNOWN_SERVICE;
-- 
GitLab


From fa31528a214701e3afb3997f1c2c2b7290e1d05c Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 21 Oct 2022 16:02:16 +0200
Subject: [PATCH 0851/4122] dt-bindings: serial: renesas,scif: Document
 r8a779g0 support

Document support for the Serial Communication Interface with FIFO (SCIF)
in the Renesas R-Car V4H (R8A779G0) SoC.

Reviewed-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/8a2d04651f04a97d652395b4d933af5c3c8d5b5b.1666360789.git.geert+renesas@glider.be
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/serial/renesas,scif.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/serial/renesas,scif.yaml b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
index f930e7f1349fc..f81f2d67a1ed4 100644
--- a/Documentation/devicetree/bindings/serial/renesas,scif.yaml
+++ b/Documentation/devicetree/bindings/serial/renesas,scif.yaml
@@ -67,6 +67,7 @@ properties:
           - enum:
               - renesas,scif-r8a779a0     # R-Car V3U
               - renesas,scif-r8a779f0     # R-Car S4-8
+              - renesas,scif-r8a779g0     # R-Car V4H
           - const: renesas,rcar-gen4-scif # R-Car Gen4
           - const: renesas,scif           # generic SCIF compatible UART
 
-- 
GitLab


From 15730dc45dc7432713c7af9ee5abad76872f6405 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 13:55:04 +0300
Subject: [PATCH 0852/4122] tty: Cleanup tty buffer align mask
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't use decimal for mask. Don't use literal for aligning.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019105504.16800-2-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 5e287dedce018..3f057805560fe 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -21,7 +21,7 @@
 #include "tty.h"
 
 #define MIN_TTYB_SIZE	256
-#define TTYB_ALIGN_MASK	255
+#define TTYB_ALIGN_MASK	0xff
 
 /*
  * Byte threshold to limit memory consumption for flip buffers.
@@ -37,7 +37,7 @@
  * logic this must match.
  */
 
-#define TTY_BUFFER_PAGE	(((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~0xFF)
+#define TTY_BUFFER_PAGE	(((PAGE_SIZE - sizeof(struct tty_buffer)) / 2) & ~TTYB_ALIGN_MASK)
 
 /**
  * tty_buffer_lock_exclusive	-	gain exclusive access to buffer
-- 
GitLab


From 98e4c68ddcaf3721df9bef809775a8e3562cb6f9 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 1 Nov 2022 22:48:30 +0900
Subject: [PATCH 0853/4122] perf probe: Fix to avoid crashing if
 DW_AT_decl_file is NULL

Since clang generates DWARF5 which sets DW_AT_decl_file as 0,
dwarf_decl_file() thinks that is invalid and returns NULL.  In that case
'perf probe' SIGSEGVs because it doesn't expect a NULL decl_file.

This adds a dwarf_decl_file() return value check to avoid such SEGV with
clang generated DWARF5 info.

Without this, 'perf probe' crashes:

  $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10
  Segmentation fault
  $

With this, it just warns about it:

  $ perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10
  Debuginfo analysis failed.
    Error: Failed to show lines.
  $

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/166731051077.2100653.15626653369345128302.stgit@devnote3
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    |  7 ++++++-
 tools/perf/util/probe-finder.c | 29 +++++++++++++++++++++--------
 2 files changed, 27 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 609ca16715018..406b7bdc851ab 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -137,7 +137,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
 	}
 
 out:
-	return *lineno ?: -ENOENT;
+	return (*lineno && *fname) ? *lineno : -ENOENT;
 }
 
 static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data);
@@ -874,6 +874,11 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
 		cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
 		dwarf_decl_line(rt_die, &decl);
 		decf = dwarf_decl_file(rt_die);
+		if (!decf) {
+			pr_debug2("Failed to get the declared file name of %s\n",
+				  dwarf_diename(rt_die));
+			return -EINVAL;
+		}
 	} else
 		cu_die = rt_die;
 	if (!cu_die) {
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 50d861a80f572..1aa8fcc41c76c 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1063,6 +1063,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 	struct dwarf_callback_param *param = data;
 	struct probe_finder *pf = param->data;
 	struct perf_probe_point *pp = &pf->pev->point;
+	const char *fname;
 
 	/* Check tag and diename */
 	if (!die_is_func_def(sp_die) ||
@@ -1070,12 +1071,17 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 		return DWARF_CB_OK;
 
 	/* Check declared file */
-	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
+	fname = dwarf_decl_file(sp_die);
+	if (!fname) {
+		pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n");
+		return DWARF_CB_OK;
+	}
+	if (pp->file && fname && strtailcmp(pp->file, fname))
 		return DWARF_CB_OK;
 
 	pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die),
 		 (unsigned long)dwarf_dieoffset(sp_die));
-	pf->fname = dwarf_decl_file(sp_die);
+	pf->fname = fname;
 	if (pp->line) { /* Function relative line */
 		dwarf_decl_line(sp_die, &pf->lno);
 		pf->lno += pp->line;
@@ -1134,6 +1140,7 @@ struct pubname_callback_param {
 static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 {
 	struct pubname_callback_param *param = data;
+	const char *fname;
 
 	if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) {
 		if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
@@ -1143,9 +1150,11 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 			if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
 				return DWARF_CB_OK;
 
-			if (param->file &&
-			    strtailcmp(param->file, dwarf_decl_file(param->sp_die)))
-				return DWARF_CB_OK;
+			if (param->file) {
+				fname = dwarf_decl_file(param->sp_die);
+				if (!fname || strtailcmp(param->file, fname))
+					return DWARF_CB_OK;
+			}
 
 			param->found = 1;
 			return DWARF_CB_ABORT;
@@ -1779,7 +1788,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
 		}
 		/* Verify the lineno and baseline are in a same file */
 		tmp = dwarf_decl_file(&spdie);
-		if (!tmp || strcmp(tmp, fname) != 0)
+		if (!tmp || (fname && strcmp(tmp, fname) != 0))
 			lineno = 0;
 	}
 
@@ -1889,10 +1898,14 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 	struct dwarf_callback_param *param = data;
 	struct line_finder *lf = param->data;
 	struct line_range *lr = lf->lr;
+	const char *fname;
 
 	/* Check declared file */
-	if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die)))
-		return DWARF_CB_OK;
+	if (lr->file) {
+		fname = dwarf_decl_file(sp_die);
+		if (!fname || strtailcmp(lr->file, fname))
+			return DWARF_CB_OK;
+	}
 
 	if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
 		lf->fname = dwarf_decl_file(sp_die);
-- 
GitLab


From f828929ab7f0dc3353e4a617f94f297fa8f3dec3 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 1 Nov 2022 22:48:39 +0900
Subject: [PATCH 0854/4122] perf probe: Use dwarf_attr_integrate as generic
 DWARF attr accessor

Use dwarf_attr_integrate() instead of dwarf_attr() for generic attribute
acccessor functions, so that it can find the specified attribute from
abstact origin DIE etc.

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/166731051988.2100653.13595339994343449770.stgit@devnote3
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 406b7bdc851ab..216fc3d959e80 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -308,7 +308,7 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
 {
 	Dwarf_Attribute attr;
 
-	if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+	if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
 	    dwarf_formudata(&attr, result) != 0)
 		return -ENOENT;
 
@@ -321,7 +321,7 @@ static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
 {
 	Dwarf_Attribute attr;
 
-	if (dwarf_attr(tp_die, attr_name, &attr) == NULL ||
+	if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
 	    dwarf_formsdata(&attr, result) != 0)
 		return -ENOENT;
 
-- 
GitLab


From dc9a5d2ccd5c823cc05cafe75fcf19b682d8152c Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 1 Nov 2022 22:48:49 +0900
Subject: [PATCH 0855/4122] perf probe: Fix to get declared file name from
 clang DWARF5

Fix to get the declared file name even if it uses file index 0
in DWARF5, using custom die_get_decl_file() function.

Actually, the DWARF5 standard says file index 0 of the DW_AT_decl_file
is invalid(1), but there is a discussion and maybe this will be updated
[2].

Anyway, clang generates such DWARF5 file for the linux kernel. Thus it
must be handled.

Without this, 'perf probe' returns an error:

   $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10
   Debuginfo analysis failed.
     Error: Failed to show lines.

With this, it can handle the case correctly:

  $ ./perf probe -k $BIN_PATH/vmlinux -s $SRC_PATH -L vfs_read:10
  <vfs_read@$SRC_PATH/fs/read_write.c:10>

       11         ret = rw_verify_area(READ, file, pos, count);
       12         if (ret)
                           return ret;

[1] DWARF5 specification 2.14 says "The value 0 indicates that no source file has been specified.")
[2] http://wiki.dwarfstd.org/index.php?title=DWARF5_Line_Table_File_Numbers)

Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/166731052936.2100653.13380621874859467731.stgit@devnote3
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 47 +++++++++++++++++++++++-----------
 tools/perf/util/dwarf-aux.h    |  3 +++
 tools/perf/util/probe-finder.c | 14 +++++-----
 3 files changed, 42 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 216fc3d959e80..30b36b525681a 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -123,7 +123,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr,
 	if (die_find_realfunc(cu_die, addr, &die_mem)
 	    && die_entrypc(&die_mem, &faddr) == 0 &&
 	    faddr == addr) {
-		*fname = dwarf_decl_file(&die_mem);
+		*fname = die_get_decl_file(&die_mem);
 		dwarf_decl_line(&die_mem, lineno);
 		goto out;
 	}
@@ -486,6 +486,19 @@ static int die_get_decl_fileno(Dwarf_Die *pdie)
 		return -ENOENT;
 }
 
+/* Return the file name by index */
+static const char *die_get_file_name(Dwarf_Die *dw_die, int idx)
+{
+	Dwarf_Die cu_die;
+	Dwarf_Files *files;
+
+	if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) ||
+	    dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
+		return NULL;
+
+	return dwarf_filesrc(files, idx, NULL, NULL);
+}
+
 /**
  * die_get_call_file - Get callsite file name of inlined function instance
  * @in_die: a DIE of an inlined function instance
@@ -495,18 +508,22 @@ static int die_get_decl_fileno(Dwarf_Die *pdie)
  */
 const char *die_get_call_file(Dwarf_Die *in_die)
 {
-	Dwarf_Die cu_die;
-	Dwarf_Files *files;
-	int idx;
-
-	idx = die_get_call_fileno(in_die);
-	if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) ||
-	    dwarf_getsrcfiles(&cu_die, &files, NULL) != 0)
-		return NULL;
-
-	return dwarf_filesrc(files, idx, NULL, NULL);
+	return die_get_file_name(in_die, die_get_call_fileno(in_die));
 }
 
+/**
+ * die_get_decl_file - Find the declared file name of this DIE
+ * @dw_die: a DIE for something declared.
+ *
+ * Get declared file name of @dw_die.
+ * NOTE: Since some version of clang DWARF5 implementation incorrectly uses
+ * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for
+ * such cases. Use this function instead.
+ */
+const char *die_get_decl_file(Dwarf_Die *dw_die)
+{
+	return die_get_file_name(dw_die, die_get_decl_fileno(dw_die));
+}
 
 /**
  * die_find_child - Generic DIE search function in DIE tree
@@ -790,7 +807,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data)
 	}
 
 	if (addr) {
-		fname = dwarf_decl_file(in_die);
+		fname = die_get_decl_file(in_die);
 		if (fname && dwarf_decl_line(in_die, &lineno) == 0) {
 			lw->retval = lw->callback(fname, lineno, addr, lw->data);
 			if (lw->retval != 0)
@@ -818,7 +835,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive,
 	int lineno;
 
 	/* Handle function declaration line */
-	fname = dwarf_decl_file(sp_die);
+	fname = die_get_decl_file(sp_die);
 	if (fname && dwarf_decl_line(sp_die, &lineno) == 0 &&
 	    die_entrypc(sp_die, &addr) == 0) {
 		lw.retval = callback(fname, lineno, addr, data);
@@ -873,7 +890,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
 	if (dwarf_tag(rt_die) != DW_TAG_compile_unit) {
 		cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL);
 		dwarf_decl_line(rt_die, &decl);
-		decf = dwarf_decl_file(rt_die);
+		decf = die_get_decl_file(rt_die);
 		if (!decf) {
 			pr_debug2("Failed to get the declared file name of %s\n",
 				  dwarf_diename(rt_die));
@@ -928,7 +945,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data)
 
 				dwarf_decl_line(&die_mem, &inl);
 				if (inl != decl ||
-				    decf != dwarf_decl_file(&die_mem))
+				    decf != die_get_decl_file(&die_mem))
 					continue;
 			}
 		}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 7ee0fa19b5c49..7ec8bc1083bb3 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -50,6 +50,9 @@ int die_get_call_lineno(Dwarf_Die *in_die);
 /* Get callsite file name of inlined function instance */
 const char *die_get_call_file(Dwarf_Die *in_die);
 
+/* Get declared file name of a DIE */
+const char *die_get_decl_file(Dwarf_Die *dw_die);
+
 /* Get type die */
 Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1aa8fcc41c76c..54b49ce85c9f2 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -763,7 +763,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
 
 	/* Skip if declared file name does not match */
 	if (fsp->file) {
-		file = dwarf_decl_file(fn_die);
+		file = die_get_decl_file(fn_die);
 		if (!file || strcmp(fsp->file, file) != 0)
 			return 0;
 	}
@@ -1071,7 +1071,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 		return DWARF_CB_OK;
 
 	/* Check declared file */
-	fname = dwarf_decl_file(sp_die);
+	fname = die_get_decl_file(sp_die);
 	if (!fname) {
 		pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n");
 		return DWARF_CB_OK;
@@ -1151,7 +1151,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 				return DWARF_CB_OK;
 
 			if (param->file) {
-				fname = dwarf_decl_file(param->sp_die);
+				fname = die_get_decl_file(param->sp_die);
 				if (!fname || strtailcmp(param->file, fname))
 					return DWARF_CB_OK;
 			}
@@ -1750,7 +1750,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
 			goto post;
 		}
 
-		fname = dwarf_decl_file(&spdie);
+		fname = die_get_decl_file(&spdie);
 		if (addr == baseaddr) {
 			/* Function entry - Relative line number is 0 */
 			lineno = baseline;
@@ -1787,7 +1787,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr,
 			}
 		}
 		/* Verify the lineno and baseline are in a same file */
-		tmp = dwarf_decl_file(&spdie);
+		tmp = die_get_decl_file(&spdie);
 		if (!tmp || (fname && strcmp(tmp, fname) != 0))
 			lineno = 0;
 	}
@@ -1902,13 +1902,13 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 
 	/* Check declared file */
 	if (lr->file) {
-		fname = dwarf_decl_file(sp_die);
+		fname = die_get_decl_file(sp_die);
 		if (!fname || strtailcmp(lr->file, fname))
 			return DWARF_CB_OK;
 	}
 
 	if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) {
-		lf->fname = dwarf_decl_file(sp_die);
+		lf->fname = die_get_decl_file(sp_die);
 		dwarf_decl_line(sp_die, &lr->offset);
 		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
 		lf->lno_s = lr->offset + lr->start;
-- 
GitLab


From f3c9bd4e16a503cb14891963428d388a4f70ffb8 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:06 -0700
Subject: [PATCH 0856/4122] perf build: Update to C standard to gnu11

C11 has become the standard for mainstream kernel development [1],
allowing it in the perf build enables libraries like stdatomic.h to be
assumed to be present. This came up in the context of [2].

[1] https://lore.kernel.org/lkml/CAHk-=whWbENRz-vLY6vpESDLj6kGUTKO3khGtVfipHqwewh2HQ@mail.gmail.com/
[2] https://lore.kernel.org/lkml/20221024011024.462518-1-irogers@google.com/

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 898226ea8cadc..d3d3c13a9f25b 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -307,7 +307,7 @@ CORE_CFLAGS += -ggdb3
 CORE_CFLAGS += -funwind-tables
 CORE_CFLAGS += -Wall
 CORE_CFLAGS += -Wextra
-CORE_CFLAGS += -std=gnu99
+CORE_CFLAGS += -std=gnu11
 
 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti
 CXXFLAGS += -Wall
-- 
GitLab


From 8ed28c2b56b78442989ef1afee2b968e9d51a65c Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:07 -0700
Subject: [PATCH 0857/4122] perf record: Use sig_atomic_t for signal handlers

This removes undefined behavior as described in:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 59f3d98a0196d..bd462a3f2bbdb 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -646,10 +646,10 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size)
 	return record__write(rec, map, bf, size);
 }
 
-static volatile int signr = -1;
-static volatile int child_finished;
+static volatile sig_atomic_t signr = -1;
+static volatile sig_atomic_t child_finished;
 #ifdef HAVE_EVENTFD_SUPPORT
-static volatile int done_fd = -1;
+static volatile sig_atomic_t done_fd = -1;
 #endif
 
 static void sig_handler(int sig)
@@ -1926,7 +1926,7 @@ static void record__read_lost_samples(struct record *rec)
 
 }
 
-static volatile int workload_exec_errno;
+static volatile sig_atomic_t workload_exec_errno;
 
 /*
  * evlist__prepare_workload will send a SIGUSR1
-- 
GitLab


From 7f3374299f9762ba7946138bf0d5cfbd50da111b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:08 -0700
Subject: [PATCH 0858/4122] perf daemon: Use sig_atomic_t to avoid UB

Use sig_atomic_t for a variable written to in a signal handler and
read elsewhere. This is undefined behavior as per:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-daemon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index 3ce0c960ccc9b..7036ec92d47d0 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -105,7 +105,7 @@ static const char * const daemon_usage[] = {
 	NULL
 };
 
-static bool done;
+static volatile sig_atomic_t done;
 
 static void sig_handler(int sig __maybe_unused)
 {
-- 
GitLab


From 853596fb71f7c2f7ff0de7b13f09d6598d4d56cd Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:09 -0700
Subject: [PATCH 0859/4122] perf ftrace: Use sig_atomic_t to avoid UB

Use sig_atomic_t for a variable written to in a signal handler and
read elsewhere. This is undefined behavior as per:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-ftrace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index 7de07bb16d235..d7fe00f66b831 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -36,8 +36,8 @@
 
 #define DEFAULT_TRACER  "function_graph"
 
-static volatile int workload_exec_errno;
-static bool done;
+static volatile sig_atomic_t workload_exec_errno;
+static volatile sig_atomic_t done;
 
 static void sig_handler(int sig __maybe_unused)
 {
-- 
GitLab


From 057929f9d083e80c9b30c324add69d2054ca6d82 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:10 -0700
Subject: [PATCH 0860/4122] perf session: Change type to avoid undefined
 behaviour in a signal handler

The 'session_done' variable is written to inside the signal handler of
'perf report' and 'perf script'. Switch its type to avoid undefined
behavior.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a4f10de29ffe..0e1a3d6bacb92 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <errno.h>
+#include <signal.h>
 #include <inttypes.h>
 #include <linux/err.h>
 #include <linux/kernel.h>
@@ -2022,7 +2023,7 @@ static int perf_session__flush_thread_stacks(struct perf_session *session)
 					 NULL);
 }
 
-volatile int session_done;
+volatile sig_atomic_t session_done;
 
 static int __perf_session__process_decomp_events(struct perf_session *session);
 
-- 
GitLab


From 01513fdc18f395dbcc924bc5e9962b12f86f947a Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:11 -0700
Subject: [PATCH 0861/4122] perf stat: Use sig_atomic_t to avoid undefined
 behaviour in a signal handler

Use sig_atomic_t for variables written/accessed in signal
handlers. This is undefined behavior as per:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index e52601a54b26d..d5e1670bca204 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -173,7 +173,7 @@ static struct target target = {
 
 #define METRIC_ONLY_LEN 20
 
-static volatile pid_t		child_pid			= -1;
+static volatile sig_atomic_t	child_pid			= -1;
 static int			detailed_run			=  0;
 static bool			transaction_run;
 static bool			topdown_run			= false;
@@ -208,7 +208,7 @@ struct perf_stat {
 static struct perf_stat		perf_stat;
 #define STAT_RECORD		perf_stat.record
 
-static volatile int done = 0;
+static volatile sig_atomic_t done = 0;
 
 static struct perf_stat_config stat_config = {
 	.aggr_mode		= AGGR_GLOBAL,
@@ -580,7 +580,7 @@ static void disable_counters(void)
 	}
 }
 
-static volatile int workload_exec_errno;
+static volatile sig_atomic_t workload_exec_errno;
 
 /*
  * evlist__prepare_workload will send a SIGUSR1
@@ -1039,7 +1039,7 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
 	evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv);
 }
 
-static volatile int signr = -1;
+static volatile sig_atomic_t signr = -1;
 
 static void skip_signal(int signo)
 {
-- 
GitLab


From 691768968f2a13eba8d52e8475dca7feb288d4f2 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:12 -0700
Subject: [PATCH 0862/4122] perf top: Use sig_atomic_t to avoid undefined
 behaviour in a signal handler

Use sig_atomic_t for variables written/accessed in signal
handlers. This is undefined behavior as per:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-8-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 4b3ff7687236e..bb5bd241246bc 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -87,8 +87,8 @@
 #include <linux/ctype.h>
 #include <perf/mmap.h>
 
-static volatile int done;
-static volatile int resize;
+static volatile sig_atomic_t done;
+static volatile sig_atomic_t resize;
 
 #define HEADER_LINE_NR  5
 
-- 
GitLab


From 8330b9ebf9ef10156b01d40176b9fff1ce2a374c Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Oct 2022 07:24:03 +0200
Subject: [PATCH 0863/4122] iommu/fsl_pamu: Replace NO_IRQ by 0

NO_IRQ is used to check the return of irq_of_parse_and_map().

On some architecture NO_IRQ is 0, on other architectures it is -1.

irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ.

So use 0 instead of using NO_IRQ.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/2a2570a8d12c80a7d36837b6c586daa708ca09d7.1665033732.git.christophe.leroy@csgroup.eu
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/fsl_pamu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 0d03f837a5d4e..1b53d2da2c191 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -779,7 +779,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 	of_get_address(dev->of_node, 0, &size, NULL);
 
 	irq = irq_of_parse_and_map(dev->of_node, 0);
-	if (irq == NO_IRQ) {
+	if (!irq) {
 		dev_warn(dev, "no interrupts listed in PAMU node\n");
 		goto error;
 	}
@@ -903,7 +903,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 	return 0;
 
 error:
-	if (irq != NO_IRQ)
+	if (irq)
 		free_irq(irq, data);
 
 	kfree_sensitive(data);
-- 
GitLab


From 2a48b15972a3b2a2622e6e537e1d53f457670395 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 24 Oct 2022 14:43:01 +0100
Subject: [PATCH 0864/4122] iommu/amd: Remove variable cnt ind
 iommu_poll_ga_log()

Variable cnt is just being incremented and it's never used
anywhere else. The variable and the increment are redundant so
remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Link: https://lore.kernel.org/r/20221024134301.2158939-1-colin.i.king@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index d3b39d0416fa3..3847f3bdc5687 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -767,7 +767,7 @@ EXPORT_SYMBOL(amd_iommu_register_ga_log_notifier);
 
 static void iommu_poll_ga_log(struct amd_iommu *iommu)
 {
-	u32 head, tail, cnt = 0;
+	u32 head, tail;
 
 	if (iommu->ga_log == NULL)
 		return;
@@ -780,7 +780,6 @@ static void iommu_poll_ga_log(struct amd_iommu *iommu)
 		u64 log_entry;
 
 		raw = (u64 *)(iommu->ga_log + head);
-		cnt++;
 
 		/* Avoid memcpy function-call overhead */
 		log_entry = *raw;
-- 
GitLab


From bf8d2dd2ed0825a58f31cc510245a1eb46f8a87e Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:52 +0200
Subject: [PATCH 0865/4122] iommu/s390: Fix duplicate domain attachments

Since commit fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev
calls") we can end up with duplicates in the list of devices attached to
a domain. This is inefficient and confusing since only one domain can
actually be in control of the IOMMU translations for a device. Fix this
by detaching the device from the previous domain, if any, on attach.
Add a WARN_ON() in case we still have attached devices on freeing the
domain. While here remove the re-attach on failure dance as it was
determined to be unlikely to help and may confuse debug and recovery.

Fixes: fa7e9ecc5e1c ("iommu/s390: Tolerate repeat attach_dev calls")
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-2-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 106 ++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 61 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 3c071782f6f16..c2e5e81d609e1 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -79,10 +79,36 @@ static void s390_domain_free(struct iommu_domain *domain)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 
+	WARN_ON(!list_empty(&s390_domain->devices));
 	dma_cleanup_tables(s390_domain->dma_table);
 	kfree(s390_domain);
 }
 
+static void __s390_iommu_detach_device(struct zpci_dev *zdev)
+{
+	struct s390_domain *s390_domain = zdev->s390_domain;
+	struct s390_domain_device *domain_device, *tmp;
+	unsigned long flags;
+
+	if (!s390_domain)
+		return;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
+				 list) {
+		if (domain_device->zdev == zdev) {
+			list_del(&domain_device->list);
+			kfree(domain_device);
+			break;
+		}
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+
+	zpci_unregister_ioat(zdev, 0);
+	zdev->s390_domain = NULL;
+	zdev->dma_table = NULL;
+}
+
 static int s390_iommu_attach_device(struct iommu_domain *domain,
 				    struct device *dev)
 {
@@ -90,7 +116,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct s390_domain_device *domain_device;
 	unsigned long flags;
-	int cc, rc;
+	int cc, rc = 0;
 
 	if (!zdev)
 		return -ENODEV;
@@ -99,24 +125,18 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	if (!domain_device)
 		return -ENOMEM;
 
-	if (zdev->dma_table && !zdev->s390_domain) {
-		cc = zpci_dma_exit_device(zdev);
-		if (cc) {
-			rc = -EIO;
-			goto out_free;
-		}
-	}
-
 	if (zdev->s390_domain)
-		zpci_unregister_ioat(zdev, 0);
+		__s390_iommu_detach_device(zdev);
+	else if (zdev->dma_table)
+		zpci_dma_exit_device(zdev);
 
-	zdev->dma_table = s390_domain->dma_table;
 	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				virt_to_phys(zdev->dma_table));
+				virt_to_phys(s390_domain->dma_table));
 	if (cc) {
 		rc = -EIO;
-		goto out_restore;
+		goto out_free;
 	}
+	zdev->dma_table = s390_domain->dma_table;
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
 	/* First device defines the DMA range limits */
@@ -127,9 +147,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	/* Allow only devices with identical DMA range limits */
 	} else if (domain->geometry.aperture_start != zdev->start_dma ||
 		   domain->geometry.aperture_end != zdev->end_dma) {
-		rc = -EINVAL;
 		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-		goto out_restore;
+		rc = -EINVAL;
+		goto out_unregister;
 	}
 	domain_device->zdev = zdev;
 	zdev->s390_domain = s390_domain;
@@ -138,14 +158,9 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 
 	return 0;
 
-out_restore:
-	if (!zdev->s390_domain) {
-		zpci_dma_init_device(zdev);
-	} else {
-		zdev->dma_table = zdev->s390_domain->dma_table;
-		zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				   virt_to_phys(zdev->dma_table));
-	}
+out_unregister:
+	zpci_unregister_ioat(zdev, 0);
+	zdev->dma_table = NULL;
 out_free:
 	kfree(domain_device);
 
@@ -155,32 +170,12 @@ out_free:
 static void s390_iommu_detach_device(struct iommu_domain *domain,
 				     struct device *dev)
 {
-	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
-	struct s390_domain_device *domain_device, *tmp;
-	unsigned long flags;
-	int found = 0;
 
-	if (!zdev)
-		return;
+	WARN_ON(zdev->s390_domain != to_s390_domain(domain));
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
-				 list) {
-		if (domain_device->zdev == zdev) {
-			list_del(&domain_device->list);
-			kfree(domain_device);
-			found = 1;
-			break;
-		}
-	}
-	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-
-	if (found && (zdev->s390_domain == s390_domain)) {
-		zdev->s390_domain = NULL;
-		zpci_unregister_ioat(zdev, 0);
-		zpci_dma_init_device(zdev);
-	}
+	__s390_iommu_detach_device(zdev);
+	zpci_dma_init_device(zdev);
 }
 
 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
@@ -198,24 +193,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 static void s390_iommu_release_device(struct device *dev)
 {
 	struct zpci_dev *zdev = to_zpci_dev(dev);
-	struct iommu_domain *domain;
 
 	/*
-	 * This is a workaround for a scenario where the IOMMU API common code
-	 * "forgets" to call the detach_dev callback: After binding a device
-	 * to vfio-pci and completing the VFIO_SET_IOMMU ioctl (which triggers
-	 * the attach_dev), removing the device via
-	 * "echo 1 > /sys/bus/pci/devices/.../remove" won't trigger detach_dev,
-	 * only release_device will be called via the BUS_NOTIFY_REMOVED_DEVICE
-	 * notifier.
-	 *
-	 * So let's call detach_dev from here if it hasn't been called before.
+	 * release_device is expected to detach any domain currently attached
+	 * to the device, but keep it attached to other devices in the group.
 	 */
-	if (zdev && zdev->s390_domain) {
-		domain = iommu_get_domain_for_dev(dev);
-		if (domain)
-			s390_iommu_detach_device(domain, dev);
-	}
+	if (zdev)
+		__s390_iommu_detach_device(zdev);
 }
 
 static int s390_iommu_update_trans(struct s390_domain *s390_domain,
-- 
GitLab


From 1a3a7d64bbce3179401f4e691522ff992aa1b8a1 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:53 +0200
Subject: [PATCH 0866/4122] iommu/s390: Get rid of s390_domain_device

The struct s390_domain_device serves the sole purpose as list entry for
the devices list of a struct s390_domain. As it contains no additional
information besides a list_head and a pointer to the struct zpci_dev we
can simplify things and just thread the device list through struct
zpci_dev directly. This removes the need to allocate during domain
attach and gets rid of one level of indirection during mapping
operations.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-3-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/s390/include/asm/pci.h |  1 +
 drivers/iommu/s390-iommu.c  | 37 +++++++------------------------------
 2 files changed, 8 insertions(+), 30 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 108e732d7b140..15f8714ca9b7c 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -117,6 +117,7 @@ struct zpci_bus {
 struct zpci_dev {
 	struct zpci_bus *zbus;
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
+	struct list_head iommu_list;
 	struct kref kref;
 	struct hotplug_slot hotplug_slot;
 
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index c2e5e81d609e1..af83ccde16a49 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -29,11 +29,6 @@ struct s390_domain {
 	spinlock_t		list_lock;
 };
 
-struct s390_domain_device {
-	struct list_head	list;
-	struct zpci_dev		*zdev;
-};
-
 static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct s390_domain, domain);
@@ -87,21 +82,13 @@ static void s390_domain_free(struct iommu_domain *domain)
 static void __s390_iommu_detach_device(struct zpci_dev *zdev)
 {
 	struct s390_domain *s390_domain = zdev->s390_domain;
-	struct s390_domain_device *domain_device, *tmp;
 	unsigned long flags;
 
 	if (!s390_domain)
 		return;
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_for_each_entry_safe(domain_device, tmp, &s390_domain->devices,
-				 list) {
-		if (domain_device->zdev == zdev) {
-			list_del(&domain_device->list);
-			kfree(domain_device);
-			break;
-		}
-	}
+	list_del_init(&zdev->iommu_list);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	zpci_unregister_ioat(zdev, 0);
@@ -114,17 +101,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
-	struct s390_domain_device *domain_device;
 	unsigned long flags;
 	int cc, rc = 0;
 
 	if (!zdev)
 		return -ENODEV;
 
-	domain_device = kzalloc(sizeof(*domain_device), GFP_KERNEL);
-	if (!domain_device)
-		return -ENOMEM;
-
 	if (zdev->s390_domain)
 		__s390_iommu_detach_device(zdev);
 	else if (zdev->dma_table)
@@ -132,10 +114,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 
 	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				virt_to_phys(s390_domain->dma_table));
-	if (cc) {
-		rc = -EIO;
-		goto out_free;
-	}
+	if (cc)
+		return -EIO;
 	zdev->dma_table = s390_domain->dma_table;
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
@@ -151,9 +131,8 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		rc = -EINVAL;
 		goto out_unregister;
 	}
-	domain_device->zdev = zdev;
 	zdev->s390_domain = s390_domain;
-	list_add(&domain_device->list, &s390_domain->devices);
+	list_add(&zdev->iommu_list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	return 0;
@@ -161,8 +140,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 out_unregister:
 	zpci_unregister_ioat(zdev, 0);
 	zdev->dma_table = NULL;
-out_free:
-	kfree(domain_device);
 
 	return rc;
 }
@@ -206,10 +183,10 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 				   phys_addr_t pa, dma_addr_t dma_addr,
 				   size_t size, int flags)
 {
-	struct s390_domain_device *domain_device;
 	phys_addr_t page_addr = pa & PAGE_MASK;
 	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags, nr_pages, i;
+	struct zpci_dev *zdev;
 	unsigned long *entry;
 	int rc = 0;
 
@@ -234,8 +211,8 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 	}
 
 	spin_lock(&s390_domain->list_lock);
-	list_for_each_entry(domain_device, &s390_domain->devices, list) {
-		rc = zpci_refresh_trans((u64) domain_device->zdev->fh << 32,
+	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+		rc = zpci_refresh_trans((u64)zdev->fh << 32,
 					start_dma_addr, nr_pages * PAGE_SIZE);
 		if (rc)
 			break;
-- 
GitLab


From cbf7827bc5dcfa4301aaea6f57eba9a94dbee7b1 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:54 +0200
Subject: [PATCH 0867/4122] iommu/s390: Fix potential s390_domain aperture
 shrinking

The s390 IOMMU driver currently sets the IOMMU domain's aperture to
match the device specific DMA address range of the device that is first
attached. This is not ideal. For one if the domain has no device
attached in the meantime the aperture could be shrunk allowing
translations outside the aperture to exist in the translation tables.
Also this is a bit of a misuse of the aperture which really should
describe what addresses can be translated and not some device specific
limitations.

Instead of misusing the aperture like this we can instead create
reserved ranges for the ranges inaccessible to the attached devices
allowing devices with overlapping ranges to still share an IOMMU domain.
This also significantly simplifies s390_iommu_attach_device() allowing
us to move the aperture check to the beginning of the function and
removing the need to hold the device list's lock to check the aperture.

As we then use the same aperture for all domains and it only depends on
the table properties we can already check zdev->start_dma/end_dma at
probe time and turn the check on attach into a WARN_ON().

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-4-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 63 ++++++++++++++++++++++++++------------
 1 file changed, 43 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index af83ccde16a49..9b3adc61005c3 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -62,6 +62,9 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 		kfree(s390_domain);
 		return NULL;
 	}
+	s390_domain->domain.geometry.force_aperture = true;
+	s390_domain->domain.geometry.aperture_start = 0;
+	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
 
 	spin_lock_init(&s390_domain->dma_table_lock);
 	spin_lock_init(&s390_domain->list_lock);
@@ -102,11 +105,15 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	unsigned long flags;
-	int cc, rc = 0;
+	int cc;
 
 	if (!zdev)
 		return -ENODEV;
 
+	if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
+		domain->geometry.aperture_end < zdev->start_dma))
+		return -EINVAL;
+
 	if (zdev->s390_domain)
 		__s390_iommu_detach_device(zdev);
 	else if (zdev->dma_table)
@@ -118,30 +125,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		return -EIO;
 	zdev->dma_table = s390_domain->dma_table;
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	/* First device defines the DMA range limits */
-	if (list_empty(&s390_domain->devices)) {
-		domain->geometry.aperture_start = zdev->start_dma;
-		domain->geometry.aperture_end = zdev->end_dma;
-		domain->geometry.force_aperture = true;
-	/* Allow only devices with identical DMA range limits */
-	} else if (domain->geometry.aperture_start != zdev->start_dma ||
-		   domain->geometry.aperture_end != zdev->end_dma) {
-		spin_unlock_irqrestore(&s390_domain->list_lock, flags);
-		rc = -EINVAL;
-		goto out_unregister;
-	}
+	zdev->dma_table = s390_domain->dma_table;
 	zdev->s390_domain = s390_domain;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
 	list_add(&zdev->iommu_list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	return 0;
-
-out_unregister:
-	zpci_unregister_ioat(zdev, 0);
-	zdev->dma_table = NULL;
-
-	return rc;
 }
 
 static void s390_iommu_detach_device(struct iommu_domain *domain,
@@ -155,6 +146,30 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 	zpci_dma_init_device(zdev);
 }
 
+static void s390_iommu_get_resv_regions(struct device *dev,
+					struct list_head *list)
+{
+	struct zpci_dev *zdev = to_zpci_dev(dev);
+	struct iommu_resv_region *region;
+
+	if (zdev->start_dma) {
+		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
+						 IOMMU_RESV_RESERVED, GFP_KERNEL);
+		if (!region)
+			return;
+		list_add_tail(&region->list, list);
+	}
+
+	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
+		region = iommu_alloc_resv_region(zdev->end_dma + 1,
+						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
+						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
+		if (!region)
+			return;
+		list_add_tail(&region->list, list);
+	}
+}
+
 static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 {
 	struct zpci_dev *zdev;
@@ -164,6 +179,13 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 
 	zdev = to_zpci_dev(dev);
 
+	if (zdev->start_dma > zdev->end_dma ||
+	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+		return ERR_PTR(-EINVAL);
+
+	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
+		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+
 	return &zdev->iommu_dev;
 }
 
@@ -342,6 +364,7 @@ static const struct iommu_ops s390_iommu_ops = {
 	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
 	.pgsize_bitmap = S390_IOMMU_PGSIZES,
+	.get_resv_regions = s390_iommu_get_resv_regions,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= s390_iommu_attach_device,
 		.detach_dev	= s390_iommu_detach_device,
-- 
GitLab


From a4d996c2c4b55a42b21d0f7026b2bd6f7396f666 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:55 +0200
Subject: [PATCH 0868/4122] iommu/s390: Fix incorrect aperture check

The domain->geometry.aperture_end specifies the last valid address treat
it as such when checking if a DMA address is valid.

Reviewed-by: Pierre Morel <pmorel@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-5-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9b3adc61005c3..3e601ca6ee0fc 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -213,7 +213,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 	int rc = 0;
 
 	if (dma_addr < s390_domain->domain.geometry.aperture_start ||
-	    dma_addr + size > s390_domain->domain.geometry.aperture_end)
+	    (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end)
 		return -EINVAL;
 
 	nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
-- 
GitLab


From b4d8ae0e907b096583491101ddfc5143b7c08918 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:56 +0200
Subject: [PATCH 0869/4122] iommu/s390: Fix incorrect pgsize_bitmap

The .pgsize_bitmap property of struct iommu_ops is not a page mask but
rather has a bit set for each size of pages the IOMMU supports. As the
comment correctly pointed out at this moment the code only support 4K
pages so simply use SZ_4K here.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-6-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 3e601ca6ee0fc..104dfbec10378 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -12,13 +12,6 @@
 #include <linux/sizes.h>
 #include <asm/pci_dma.h>
 
-/*
- * Physically contiguous memory regions can be mapped with 4 KiB alignment,
- * we allow all page sizes that are an order of 4KiB (no special large page
- * support so far).
- */
-#define S390_IOMMU_PGSIZES	(~0xFFFUL)
-
 static const struct iommu_ops s390_iommu_ops;
 
 struct s390_domain {
@@ -363,7 +356,7 @@ static const struct iommu_ops s390_iommu_ops = {
 	.probe_device = s390_iommu_probe_device,
 	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
-	.pgsize_bitmap = S390_IOMMU_PGSIZES,
+	.pgsize_bitmap = SZ_4K,
 	.get_resv_regions = s390_iommu_get_resv_regions,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= s390_iommu_attach_device,
-- 
GitLab


From f3cc4f874efa8d5b10ebd9dc8702cd25b9e536a3 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Tue, 25 Oct 2022 13:56:57 +0200
Subject: [PATCH 0870/4122] iommu/s390: Implement map_pages()/unmap_pages()
 instead of map()/unmap()

While s390-iommu currently implements the map_page()/unmap_page()
operations which only map/unmap a single page at a time the internal
s390_iommu_update_trans() API already supports mapping/unmapping a range
of pages at once. Take advantage of this by implementing the
map_pages()/unmap_pages() operations instead thus allowing users of the
IOMMU drivers to map multiple pages in a single call followed by
a single I/O TLB flush if needed.

Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221025115657.1666860-7-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 48 +++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 104dfbec10378..7fb512bece9a8 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -196,20 +196,15 @@ static void s390_iommu_release_device(struct device *dev)
 
 static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 				   phys_addr_t pa, dma_addr_t dma_addr,
-				   size_t size, int flags)
+				   unsigned long nr_pages, int flags)
 {
 	phys_addr_t page_addr = pa & PAGE_MASK;
 	dma_addr_t start_dma_addr = dma_addr;
-	unsigned long irq_flags, nr_pages, i;
+	unsigned long irq_flags, i;
 	struct zpci_dev *zdev;
 	unsigned long *entry;
 	int rc = 0;
 
-	if (dma_addr < s390_domain->domain.geometry.aperture_start ||
-	    (dma_addr + size - 1) > s390_domain->domain.geometry.aperture_end)
-		return -EINVAL;
-
-	nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	if (!nr_pages)
 		return 0;
 
@@ -252,11 +247,24 @@ undo_cpu_trans:
 	return rc;
 }
 
-static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+static int s390_iommu_map_pages(struct iommu_domain *domain,
+				unsigned long iova, phys_addr_t paddr,
+				size_t pgsize, size_t pgcount,
+				int prot, gfp_t gfp, size_t *mapped)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	int flags = ZPCI_PTE_VALID, rc = 0;
+	size_t size = pgcount << __ffs(pgsize);
+
+	if (pgsize != SZ_4K)
+		return -EINVAL;
+
+	if (iova < s390_domain->domain.geometry.aperture_start ||
+	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
+		return -EINVAL;
+
+	if (!IS_ALIGNED(iova | paddr, pgsize))
+		return -EINVAL;
 
 	if (!(prot & IOMMU_READ))
 		return -EINVAL;
@@ -265,7 +273,9 @@ static int s390_iommu_map(struct iommu_domain *domain, unsigned long iova,
 		flags |= ZPCI_TABLE_PROTECTED;
 
 	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
-				     size, flags);
+				     pgcount, flags);
+	if (!rc)
+		*mapped = size;
 
 	return rc;
 }
@@ -301,21 +311,27 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 	return phys;
 }
 
-static size_t s390_iommu_unmap(struct iommu_domain *domain,
-			       unsigned long iova, size_t size,
-			       struct iommu_iotlb_gather *gather)
+static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
+				     unsigned long iova,
+				     size_t pgsize, size_t pgcount,
+				     struct iommu_iotlb_gather *gather)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
+	size_t size = pgcount << __ffs(pgsize);
 	int flags = ZPCI_PTE_INVALID;
 	phys_addr_t paddr;
 	int rc;
 
+	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
+	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
+		return 0;
+
 	paddr = s390_iommu_iova_to_phys(domain, iova);
 	if (!paddr)
 		return 0;
 
 	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
-				     size, flags);
+				     pgcount, flags);
 	if (rc)
 		return 0;
 
@@ -361,8 +377,8 @@ static const struct iommu_ops s390_iommu_ops = {
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= s390_iommu_attach_device,
 		.detach_dev	= s390_iommu_detach_device,
-		.map		= s390_iommu_map,
-		.unmap		= s390_iommu_unmap,
+		.map_pages	= s390_iommu_map_pages,
+		.unmap_pages	= s390_iommu_unmap_pages,
 		.iova_to_phys	= s390_iommu_iova_to_phys,
 		.free		= s390_domain_free,
 	}
-- 
GitLab


From 92ea0720ba9cf7f09589a711245c2da145125958 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 24 Oct 2022 11:19:13 -0700
Subject: [PATCH 0871/4122] perf trace: Use sig_atomic_t to avoid undefined
 behaviour in a signal handler

Use sig_atomic_t for variables written/accessed in signal
handlers. This is undefined behavior as per:

  https://wiki.sei.cmu.edu/confluence/display/c/SIG31-C.+Do+not+access+shared+objects+in+signal+handlers

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221024181913.630986-9-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d3c757769b965..72991528687ea 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1535,8 +1535,8 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp)
 }
 
 static pid_t workload_pid = -1;
-static bool done = false;
-static bool interrupted = false;
+static volatile sig_atomic_t done = false;
+static volatile sig_atomic_t interrupted = false;
 
 static void sighandler_interrupt(int sig __maybe_unused)
 {
-- 
GitLab


From 9ad0c1252e84dbc664f0462707182245ed603237 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Tue, 25 Oct 2022 18:54:11 +0200
Subject: [PATCH 0872/4122] iommu/sun50i: Fix reset release

Reset signal is asserted by writing 0 to the corresponding locations of
masters we want to reset. So in order to deassert all reset signals, we
should write 1's to all locations.

Current code writes 1's to locations of masters which were just reset
which is good. However, at the same time it also writes 0's to other
locations and thus asserts reset signals of remaining masters. Fix code
by writing all 1's when we want to deassert all reset signals.

This bug was discovered when working with Cedrus (video decoder). When
it faulted, display went blank due to reset signal assertion.

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20221025165415.307591-2-jernej.skrabec@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index cd9b74ee24def..270204a6ec4ae 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 
 #define IOMMU_RESET_REG			0x010
+#define IOMMU_RESET_RELEASE_ALL			0xffffffff
 #define IOMMU_ENABLE_REG		0x020
 #define IOMMU_ENABLE_ENABLE			BIT(0)
 
@@ -893,7 +894,7 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
 	iommu_write(iommu, IOMMU_INT_CLR_REG, status);
 
 	iommu_write(iommu, IOMMU_RESET_REG, ~status);
-	iommu_write(iommu, IOMMU_RESET_REG, status);
+	iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL);
 
 	spin_unlock(&iommu->iommu_lock);
 
-- 
GitLab


From cef20703e2b2276aaa402ec5a65ec9a09963b83e Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Tue, 25 Oct 2022 18:54:12 +0200
Subject: [PATCH 0873/4122] iommu/sun50i: Consider all fault sources for reset

We have to reset masters for all faults - permissions, L1 fault or L2
fault. Currently it's done only for permissions. If other type of fault
happens, master is in locked up state. Fix that by really considering
all fault sources.

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20221025165415.307591-3-jernej.skrabec@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 270204a6ec4ae..bbc269500800f 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -869,8 +869,8 @@ static phys_addr_t sun50i_iommu_handle_perm_irq(struct sun50i_iommu *iommu)
 
 static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
 {
+	u32 status, l1_status, l2_status, resets;
 	struct sun50i_iommu *iommu = dev_id;
-	u32 status;
 
 	spin_lock(&iommu->iommu_lock);
 
@@ -880,6 +880,9 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
+	l1_status = iommu_read(iommu, IOMMU_L1PG_INT_REG);
+	l2_status = iommu_read(iommu, IOMMU_L2PG_INT_REG);
+
 	if (status & IOMMU_INT_INVALID_L2PG)
 		sun50i_iommu_handle_pt_irq(iommu,
 					    IOMMU_INT_ERR_ADDR_L2_REG,
@@ -893,7 +896,8 @@ static irqreturn_t sun50i_iommu_irq(int irq, void *dev_id)
 
 	iommu_write(iommu, IOMMU_INT_CLR_REG, status);
 
-	iommu_write(iommu, IOMMU_RESET_REG, ~status);
+	resets = (status | l1_status | l2_status) & IOMMU_INT_MASTER_MASK;
+	iommu_write(iommu, IOMMU_RESET_REG, ~resets);
 	iommu_write(iommu, IOMMU_RESET_REG, IOMMU_RESET_RELEASE_ALL);
 
 	spin_unlock(&iommu->iommu_lock);
-- 
GitLab


From eac0104dc69be50bed86926d6f32e82b44f8c921 Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Tue, 25 Oct 2022 18:54:13 +0200
Subject: [PATCH 0874/4122] iommu/sun50i: Fix R/W permission check

Because driver has enum type permissions and iommu subsystem has bitmap
type, we have to be careful how check for combined read and write
permissions is done. In such case, we have to mask both permissions and
check that both are set at the same time.

Current code just masks both flags but doesn't check that both are set.
In short, it always sets R/W permission, regardles if requested
permissions were RO, WO or RW. Fix that.

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20221025165415.307591-4-jernej.skrabec@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index bbc269500800f..df871af04bcb8 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -271,7 +271,7 @@ static u32 sun50i_mk_pte(phys_addr_t page, int prot)
 	enum sun50i_iommu_aci aci;
 	u32 flags = 0;
 
-	if (prot & (IOMMU_READ | IOMMU_WRITE))
+	if ((prot & (IOMMU_READ | IOMMU_WRITE)) == (IOMMU_READ | IOMMU_WRITE))
 		aci = SUN50I_IOMMU_ACI_RD_WR;
 	else if (prot & IOMMU_READ)
 		aci = SUN50I_IOMMU_ACI_RD;
-- 
GitLab


From 67a8a67f9eceb72e4c73d1d09ed9ab04f4b8e12d Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Tue, 25 Oct 2022 18:54:14 +0200
Subject: [PATCH 0875/4122] iommu/sun50i: Fix flush size

Function sun50i_table_flush() takes number of entries as an argument,
not number of bytes. Fix that mistake in sun50i_dte_get_page_table().

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20221025165415.307591-5-jernej.skrabec@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index df871af04bcb8..e62e245060ac7 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -512,7 +512,7 @@ static u32 *sun50i_dte_get_page_table(struct sun50i_iommu_domain *sun50i_domain,
 		sun50i_iommu_free_page_table(iommu, drop_pt);
 	}
 
-	sun50i_table_flush(sun50i_domain, page_table, PT_SIZE);
+	sun50i_table_flush(sun50i_domain, page_table, NUM_PT_ENTRIES);
 	sun50i_table_flush(sun50i_domain, dte_addr, 1);
 
 	return page_table;
-- 
GitLab


From e563cc0c787c85a4d9def0a77078dc5d3f445e3d Mon Sep 17 00:00:00 2001
From: Jernej Skrabec <jernej.skrabec@gmail.com>
Date: Tue, 25 Oct 2022 18:54:15 +0200
Subject: [PATCH 0876/4122] iommu/sun50i: Implement .iotlb_sync_map

Allocated iova ranges need to be invalidated immediately or otherwise
they might or might not work when used by master or CPU. This was
discovered when running video decoder conformity test with Cedrus. Some
videos were now and then decoded incorrectly and generated page faults.

According to vendor driver, it's enough to invalidate just start and end
TLB and PTW cache lines. Documentation says that neighbouring lines must
be invalidated too. Finally, when page fault occurs, that iova must be
invalidated the same way, according to documentation.

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jernej Skrabec <jernej.skrabec@gmail.com>
Link: https://lore.kernel.org/r/20221025165415.307591-6-jernej.skrabec@gmail.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 73 ++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index e62e245060ac7..5cb2d44dfb92d 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -93,6 +93,8 @@
 #define NUM_PT_ENTRIES			256
 #define PT_SIZE				(NUM_PT_ENTRIES * PT_ENTRY_SIZE)
 
+#define SPAGE_SIZE			4096
+
 struct sun50i_iommu {
 	struct iommu_device iommu;
 
@@ -295,6 +297,62 @@ static void sun50i_table_flush(struct sun50i_iommu_domain *sun50i_domain,
 	dma_sync_single_for_device(iommu->dev, dma, size, DMA_TO_DEVICE);
 }
 
+static void sun50i_iommu_zap_iova(struct sun50i_iommu *iommu,
+				  unsigned long iova)
+{
+	u32 reg;
+	int ret;
+
+	iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_REG, iova);
+	iommu_write(iommu, IOMMU_TLB_IVLD_ADDR_MASK_REG, GENMASK(31, 12));
+	iommu_write(iommu, IOMMU_TLB_IVLD_ENABLE_REG,
+		    IOMMU_TLB_IVLD_ENABLE_ENABLE);
+
+	ret = readl_poll_timeout_atomic(iommu->base + IOMMU_TLB_IVLD_ENABLE_REG,
+					reg, !reg, 1, 2000);
+	if (ret)
+		dev_warn(iommu->dev, "TLB invalidation timed out!\n");
+}
+
+static void sun50i_iommu_zap_ptw_cache(struct sun50i_iommu *iommu,
+				       unsigned long iova)
+{
+	u32 reg;
+	int ret;
+
+	iommu_write(iommu, IOMMU_PC_IVLD_ADDR_REG, iova);
+	iommu_write(iommu, IOMMU_PC_IVLD_ENABLE_REG,
+		    IOMMU_PC_IVLD_ENABLE_ENABLE);
+
+	ret = readl_poll_timeout_atomic(iommu->base + IOMMU_PC_IVLD_ENABLE_REG,
+					reg, !reg, 1, 2000);
+	if (ret)
+		dev_warn(iommu->dev, "PTW cache invalidation timed out!\n");
+}
+
+static void sun50i_iommu_zap_range(struct sun50i_iommu *iommu,
+				   unsigned long iova, size_t size)
+{
+	assert_spin_locked(&iommu->iommu_lock);
+
+	iommu_write(iommu, IOMMU_AUTO_GATING_REG, 0);
+
+	sun50i_iommu_zap_iova(iommu, iova);
+	sun50i_iommu_zap_iova(iommu, iova + SPAGE_SIZE);
+	if (size > SPAGE_SIZE) {
+		sun50i_iommu_zap_iova(iommu, iova + size);
+		sun50i_iommu_zap_iova(iommu, iova + size + SPAGE_SIZE);
+	}
+	sun50i_iommu_zap_ptw_cache(iommu, iova);
+	sun50i_iommu_zap_ptw_cache(iommu, iova + SZ_1M);
+	if (size > SZ_1M) {
+		sun50i_iommu_zap_ptw_cache(iommu, iova + size);
+		sun50i_iommu_zap_ptw_cache(iommu, iova + size + SZ_1M);
+	}
+
+	iommu_write(iommu, IOMMU_AUTO_GATING_REG, IOMMU_AUTO_GATING_ENABLE);
+}
+
 static int sun50i_iommu_flush_all_tlb(struct sun50i_iommu *iommu)
 {
 	u32 reg;
@@ -344,6 +402,18 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
 	spin_unlock_irqrestore(&iommu->iommu_lock, flags);
 }
 
+static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
+					unsigned long iova, size_t size)
+{
+	struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
+	struct sun50i_iommu *iommu = sun50i_domain->iommu;
+	unsigned long flags;
+
+	spin_lock_irqsave(&iommu->iommu_lock, flags);
+	sun50i_iommu_zap_range(iommu, iova, size);
+	spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+}
+
 static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
 				    struct iommu_iotlb_gather *gather)
 {
@@ -767,6 +837,7 @@ static const struct iommu_ops sun50i_iommu_ops = {
 		.attach_dev	= sun50i_iommu_attach_device,
 		.detach_dev	= sun50i_iommu_detach_device,
 		.flush_iotlb_all = sun50i_iommu_flush_iotlb_all,
+		.iotlb_sync_map = sun50i_iommu_iotlb_sync_map,
 		.iotlb_sync	= sun50i_iommu_iotlb_sync,
 		.iova_to_phys	= sun50i_iommu_iova_to_phys,
 		.map		= sun50i_iommu_map,
@@ -786,6 +857,8 @@ static void sun50i_iommu_report_fault(struct sun50i_iommu *iommu,
 		report_iommu_fault(iommu->domain, iommu->dev, iova, prot);
 	else
 		dev_err(iommu->dev, "Page fault while iommu not attached to any domain?\n");
+
+	sun50i_iommu_zap_range(iommu, iova, SPAGE_SIZE);
 }
 
 static phys_addr_t sun50i_iommu_handle_pt_irq(struct sun50i_iommu *iommu,
-- 
GitLab


From 7a09c1269702db8eccb6f718da2b00173e1e0034 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 2 Nov 2022 14:13:19 -0400
Subject: [PATCH 0877/4122] USB: core: Change configuration warnings to notices

It has been pointed out that the kernel log messages warning about
problems in USB configuration and related descriptors are vexing for
users.  The warning log level has a fairly high priority, but the user
can do nothing to fix the underlying errors in the device's firmware.

To reduce the amount of useless information produced by tools that
filter high-priority log messages, we can change these warnings to
notices, i.e., change dev_warn() to dev_notice().  The same holds for
a few messages that currently use dev_err(): Unless they indicate a
failure that might make a device unusable (such as inability to
transfer a config descriptor), change them to dev_notice() also.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216630
Suggested-by: Artem S. Tashkinov <aros@gmx.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/Y2KzPx0h6z1jXCuN@rowland.harvard.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/config.c | 82 +++++++++++++++++++--------------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 48bc8a4814ac4..725b8dbcfe5f0 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -61,7 +61,7 @@ static void usb_parse_ssp_isoc_endpoint_companion(struct device *ddev,
 	desc = (struct usb_ssp_isoc_ep_comp_descriptor *) buffer;
 	if (desc->bDescriptorType != USB_DT_SSP_ISOC_ENDPOINT_COMP ||
 	    size < USB_DT_SSP_ISOC_EP_COMP_SIZE) {
-		dev_warn(ddev, "Invalid SuperSpeedPlus isoc endpoint companion"
+		dev_notice(ddev, "Invalid SuperSpeedPlus isoc endpoint companion"
 			 "for config %d interface %d altsetting %d ep %d.\n",
 			 cfgno, inum, asnum, ep->desc.bEndpointAddress);
 		return;
@@ -83,7 +83,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 
 	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP ||
 			size < USB_DT_SS_EP_COMP_SIZE) {
-		dev_warn(ddev, "No SuperSpeed endpoint companion for config %d "
+		dev_notice(ddev, "No SuperSpeed endpoint companion for config %d "
 				" interface %d altsetting %d ep %d: "
 				"using minimum values\n",
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
@@ -109,13 +109,13 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 
 	/* Check the various values */
 	if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) {
-		dev_warn(ddev, "Control endpoint with bMaxBurst = %d in "
+		dev_notice(ddev, "Control endpoint with bMaxBurst = %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to zero\n", desc->bMaxBurst,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
 		ep->ss_ep_comp.bMaxBurst = 0;
 	} else if (desc->bMaxBurst > 15) {
-		dev_warn(ddev, "Endpoint with bMaxBurst = %d in "
+		dev_notice(ddev, "Endpoint with bMaxBurst = %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to 15\n", desc->bMaxBurst,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
@@ -125,7 +125,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 	if ((usb_endpoint_xfer_control(&ep->desc) ||
 			usb_endpoint_xfer_int(&ep->desc)) &&
 				desc->bmAttributes != 0) {
-		dev_warn(ddev, "%s endpoint with bmAttributes = %d in "
+		dev_notice(ddev, "%s endpoint with bmAttributes = %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to zero\n",
 				usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk",
@@ -134,7 +134,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		ep->ss_ep_comp.bmAttributes = 0;
 	} else if (usb_endpoint_xfer_bulk(&ep->desc) &&
 			desc->bmAttributes > 16) {
-		dev_warn(ddev, "Bulk endpoint with more than 65536 streams in "
+		dev_notice(ddev, "Bulk endpoint with more than 65536 streams in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to max\n",
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
@@ -142,7 +142,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 	} else if (usb_endpoint_xfer_isoc(&ep->desc) &&
 		   !USB_SS_SSP_ISOC_COMP(desc->bmAttributes) &&
 		   USB_SS_MULT(desc->bmAttributes) > 3) {
-		dev_warn(ddev, "Isoc endpoint has Mult of %d in "
+		dev_notice(ddev, "Isoc endpoint has Mult of %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to 3\n",
 				USB_SS_MULT(desc->bmAttributes),
@@ -160,7 +160,7 @@ static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 	else
 		max_tx = 999999;
 	if (le16_to_cpu(desc->wBytesPerInterval) > max_tx) {
-		dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in "
+		dev_notice(ddev, "%s endpoint with wBytesPerInterval of %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to %d\n",
 				usb_endpoint_xfer_isoc(&ep->desc) ? "Isoc" : "Int",
@@ -273,7 +273,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	else if (d->bLength >= USB_DT_ENDPOINT_SIZE)
 		n = USB_DT_ENDPOINT_SIZE;
 	else {
-		dev_warn(ddev, "config %d interface %d altsetting %d has an "
+		dev_notice(ddev, "config %d interface %d altsetting %d has an "
 		    "invalid endpoint descriptor of length %d, skipping\n",
 		    cfgno, inum, asnum, d->bLength);
 		goto skip_to_next_endpoint_or_interface_descriptor;
@@ -281,7 +281,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 
 	i = d->bEndpointAddress & ~USB_ENDPOINT_DIR_MASK;
 	if (i >= 16 || i == 0) {
-		dev_warn(ddev, "config %d interface %d altsetting %d has an "
+		dev_notice(ddev, "config %d interface %d altsetting %d has an "
 		    "invalid endpoint with address 0x%X, skipping\n",
 		    cfgno, inum, asnum, d->bEndpointAddress);
 		goto skip_to_next_endpoint_or_interface_descriptor;
@@ -293,7 +293,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 
 	/* Check for duplicate endpoint addresses */
 	if (config_endpoint_is_duplicate(config, inum, asnum, d)) {
-		dev_warn(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n",
+		dev_notice(ddev, "config %d interface %d altsetting %d has a duplicate endpoint with address 0x%X, skipping\n",
 				cfgno, inum, asnum, d->bEndpointAddress);
 		goto skip_to_next_endpoint_or_interface_descriptor;
 	}
@@ -301,7 +301,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	/* Ignore some endpoints */
 	if (udev->quirks & USB_QUIRK_ENDPOINT_IGNORE) {
 		if (usb_endpoint_is_ignored(udev, ifp, d)) {
-			dev_warn(ddev, "config %d interface %d altsetting %d has an ignored endpoint with address 0x%X, skipping\n",
+			dev_notice(ddev, "config %d interface %d altsetting %d has an ignored endpoint with address 0x%X, skipping\n",
 					cfgno, inum, asnum,
 					d->bEndpointAddress);
 			goto skip_to_next_endpoint_or_interface_descriptor;
@@ -378,7 +378,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 		}
 	}
 	if (d->bInterval < i || d->bInterval > j) {
-		dev_warn(ddev, "config %d interface %d altsetting %d "
+		dev_notice(ddev, "config %d interface %d altsetting %d "
 		    "endpoint 0x%X has an invalid bInterval %d, "
 		    "changing to %d\n",
 		    cfgno, inum, asnum,
@@ -391,7 +391,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	 * them usable, we will try treating them as Interrupt endpoints.
 	 */
 	if (udev->speed == USB_SPEED_LOW && usb_endpoint_xfer_bulk(d)) {
-		dev_warn(ddev, "config %d interface %d altsetting %d "
+		dev_notice(ddev, "config %d interface %d altsetting %d "
 		    "endpoint 0x%X is Bulk; changing to Interrupt\n",
 		    cfgno, inum, asnum, d->bEndpointAddress);
 		endpoint->desc.bmAttributes = USB_ENDPOINT_XFER_INT;
@@ -408,7 +408,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	 */
 	maxp = le16_to_cpu(endpoint->desc.wMaxPacketSize);
 	if (maxp == 0 && !(usb_endpoint_xfer_isoc(d) && asnum == 0)) {
-		dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n",
+		dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid wMaxPacketSize 0\n",
 		    cfgno, inum, asnum, d->bEndpointAddress);
 	}
 
@@ -439,7 +439,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	j = maxpacket_maxes[usb_endpoint_type(&endpoint->desc)];
 
 	if (maxp > j) {
-		dev_warn(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
+		dev_notice(ddev, "config %d interface %d altsetting %d endpoint 0x%X has invalid maxpacket %d, setting to %d\n",
 		    cfgno, inum, asnum, d->bEndpointAddress, maxp, j);
 		maxp = j;
 		endpoint->desc.wMaxPacketSize = cpu_to_le16(i | maxp);
@@ -452,7 +452,7 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno,
 	 */
 	if (udev->speed == USB_SPEED_HIGH && usb_endpoint_xfer_bulk(d)) {
 		if (maxp != 512)
-			dev_warn(ddev, "config %d interface %d altsetting %d "
+			dev_notice(ddev, "config %d interface %d altsetting %d "
 				"bulk endpoint 0x%X has invalid maxpacket %d\n",
 				cfgno, inum, asnum, d->bEndpointAddress,
 				maxp);
@@ -533,7 +533,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno,
 	      i < intfc->num_altsetting;
 	     (++i, ++alt)) {
 		if (alt->desc.bAlternateSetting == asnum) {
-			dev_warn(ddev, "Duplicate descriptor for config %d "
+			dev_notice(ddev, "Duplicate descriptor for config %d "
 			    "interface %d altsetting %d, skipping\n",
 			    cfgno, inum, asnum);
 			goto skip_to_next_interface_descriptor;
@@ -559,7 +559,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno,
 	num_ep = num_ep_orig = alt->desc.bNumEndpoints;
 	alt->desc.bNumEndpoints = 0;		/* Use as a counter */
 	if (num_ep > USB_MAXENDPOINTS) {
-		dev_warn(ddev, "too many endpoints for config %d interface %d "
+		dev_notice(ddev, "too many endpoints for config %d interface %d "
 		    "altsetting %d: %d, using maximum allowed: %d\n",
 		    cfgno, inum, asnum, num_ep, USB_MAXENDPOINTS);
 		num_ep = USB_MAXENDPOINTS;
@@ -590,7 +590,7 @@ static int usb_parse_interface(struct device *ddev, int cfgno,
 	}
 
 	if (n != num_ep_orig)
-		dev_warn(ddev, "config %d interface %d altsetting %d has %d "
+		dev_notice(ddev, "config %d interface %d altsetting %d has %d "
 		    "endpoint descriptor%s, different from the interface "
 		    "descriptor's value: %d\n",
 		    cfgno, inum, asnum, n, plural(n), num_ep_orig);
@@ -625,7 +625,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	if (config->desc.bDescriptorType != USB_DT_CONFIG ||
 	    config->desc.bLength < USB_DT_CONFIG_SIZE ||
 	    config->desc.bLength > size) {
-		dev_err(ddev, "invalid descriptor for config index %d: "
+		dev_notice(ddev, "invalid descriptor for config index %d: "
 		    "type = 0x%X, length = %d\n", cfgidx,
 		    config->desc.bDescriptorType, config->desc.bLength);
 		return -EINVAL;
@@ -636,7 +636,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	size -= config->desc.bLength;
 
 	if (nintf > USB_MAXINTERFACES) {
-		dev_warn(ddev, "config %d has too many interfaces: %d, "
+		dev_notice(ddev, "config %d has too many interfaces: %d, "
 		    "using maximum allowed: %d\n",
 		    cfgno, nintf, USB_MAXINTERFACES);
 		nintf = USB_MAXINTERFACES;
@@ -650,7 +650,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	     (buffer2 += header->bLength, size2 -= header->bLength)) {
 
 		if (size2 < sizeof(struct usb_descriptor_header)) {
-			dev_warn(ddev, "config %d descriptor has %d excess "
+			dev_notice(ddev, "config %d descriptor has %d excess "
 			    "byte%s, ignoring\n",
 			    cfgno, size2, plural(size2));
 			break;
@@ -658,7 +658,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 		header = (struct usb_descriptor_header *) buffer2;
 		if ((header->bLength > size2) || (header->bLength < 2)) {
-			dev_warn(ddev, "config %d has an invalid descriptor "
+			dev_notice(ddev, "config %d has an invalid descriptor "
 			    "of length %d, skipping remainder of the config\n",
 			    cfgno, header->bLength);
 			break;
@@ -670,7 +670,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 			d = (struct usb_interface_descriptor *) header;
 			if (d->bLength < USB_DT_INTERFACE_SIZE) {
-				dev_warn(ddev, "config %d has an invalid "
+				dev_notice(ddev, "config %d has an invalid "
 				    "interface descriptor of length %d, "
 				    "skipping\n", cfgno, d->bLength);
 				continue;
@@ -680,7 +680,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 			if ((dev->quirks & USB_QUIRK_HONOR_BNUMINTERFACES) &&
 			    n >= nintf_orig) {
-				dev_warn(ddev, "config %d has more interface "
+				dev_notice(ddev, "config %d has more interface "
 				    "descriptors, than it declares in "
 				    "bNumInterfaces, ignoring interface "
 				    "number: %d\n", cfgno, inum);
@@ -688,7 +688,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 			}
 
 			if (inum >= nintf_orig)
-				dev_warn(ddev, "config %d has an invalid "
+				dev_notice(ddev, "config %d has an invalid "
 				    "interface number: %d but max is %d\n",
 				    cfgno, inum, nintf_orig - 1);
 
@@ -713,14 +713,14 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 			d = (struct usb_interface_assoc_descriptor *)header;
 			if (d->bLength < USB_DT_INTERFACE_ASSOCIATION_SIZE) {
-				dev_warn(ddev,
+				dev_notice(ddev,
 					 "config %d has an invalid interface association descriptor of length %d, skipping\n",
 					 cfgno, d->bLength);
 				continue;
 			}
 
 			if (iad_num == USB_MAXIADS) {
-				dev_warn(ddev, "found more Interface "
+				dev_notice(ddev, "found more Interface "
 					       "Association Descriptors "
 					       "than allocated for in "
 					       "configuration %d\n", cfgno);
@@ -731,7 +731,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 
 		} else if (header->bDescriptorType == USB_DT_DEVICE ||
 			    header->bDescriptorType == USB_DT_CONFIG)
-			dev_warn(ddev, "config %d contains an unexpected "
+			dev_notice(ddev, "config %d contains an unexpected "
 			    "descriptor of type 0x%X, skipping\n",
 			    cfgno, header->bDescriptorType);
 
@@ -740,11 +740,11 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	config->desc.wTotalLength = cpu_to_le16(buffer2 - buffer0);
 
 	if (n != nintf)
-		dev_warn(ddev, "config %d has %d interface%s, different from "
+		dev_notice(ddev, "config %d has %d interface%s, different from "
 		    "the descriptor's value: %d\n",
 		    cfgno, n, plural(n), nintf_orig);
 	else if (n == 0)
-		dev_warn(ddev, "config %d has no interfaces?\n", cfgno);
+		dev_notice(ddev, "config %d has no interfaces?\n", cfgno);
 	config->desc.bNumInterfaces = nintf = n;
 
 	/* Check for missing interface numbers */
@@ -754,7 +754,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 				break;
 		}
 		if (j >= nintf)
-			dev_warn(ddev, "config %d has no interface number "
+			dev_notice(ddev, "config %d has no interface number "
 			    "%d\n", cfgno, i);
 	}
 
@@ -762,7 +762,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 	for (i = 0; i < nintf; ++i) {
 		j = nalts[i];
 		if (j > USB_MAXALTSETTING) {
-			dev_warn(ddev, "too many alternate settings for "
+			dev_notice(ddev, "too many alternate settings for "
 			    "config %d interface %d: %d, "
 			    "using maximum allowed: %d\n",
 			    cfgno, inums[i], j, USB_MAXALTSETTING);
@@ -811,7 +811,7 @@ static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
 					break;
 			}
 			if (n >= intfc->num_altsetting)
-				dev_warn(ddev, "config %d interface %d has no "
+				dev_notice(ddev, "config %d interface %d has no "
 				    "altsetting %d\n", cfgno, inums[i], j);
 		}
 	}
@@ -868,7 +868,7 @@ int usb_get_configuration(struct usb_device *dev)
 	int result;
 
 	if (ncfg > USB_MAXCONFIG) {
-		dev_warn(ddev, "too many configurations: %d, "
+		dev_notice(ddev, "too many configurations: %d, "
 		    "using maximum allowed: %d\n", ncfg, USB_MAXCONFIG);
 		dev->descriptor.bNumConfigurations = ncfg = USB_MAXCONFIG;
 	}
@@ -902,7 +902,7 @@ int usb_get_configuration(struct usb_device *dev)
 			    "descriptor/%s: %d\n", cfgno, "start", result);
 			if (result != -EPIPE)
 				goto err;
-			dev_err(ddev, "chopping to %d config(s)\n", cfgno);
+			dev_notice(ddev, "chopping to %d config(s)\n", cfgno);
 			dev->descriptor.bNumConfigurations = cfgno;
 			break;
 		} else if (result < 4) {
@@ -934,7 +934,7 @@ int usb_get_configuration(struct usb_device *dev)
 			goto err;
 		}
 		if (result < length) {
-			dev_warn(ddev, "config index %d descriptor too short "
+			dev_notice(ddev, "config index %d descriptor too short "
 			    "(expected %i, got %i)\n", cfgno, length, result);
 			length = result;
 		}
@@ -993,7 +993,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 	/* Get BOS descriptor */
 	ret = usb_get_descriptor(dev, USB_DT_BOS, 0, bos, USB_DT_BOS_SIZE);
 	if (ret < USB_DT_BOS_SIZE || bos->bLength < USB_DT_BOS_SIZE) {
-		dev_err(ddev, "unable to get BOS descriptor or descriptor too short\n");
+		dev_notice(ddev, "unable to get BOS descriptor or descriptor too short\n");
 		if (ret >= 0)
 			ret = -ENOMSG;
 		kfree(bos);
@@ -1021,7 +1021,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 
 	ret = usb_get_descriptor(dev, USB_DT_BOS, 0, buffer, total_len);
 	if (ret < total_len) {
-		dev_err(ddev, "unable to get BOS descriptor set\n");
+		dev_notice(ddev, "unable to get BOS descriptor set\n");
 		if (ret >= 0)
 			ret = -ENOMSG;
 		goto err;
@@ -1046,7 +1046,7 @@ int usb_get_bos_descriptor(struct usb_device *dev)
 		}
 
 		if (cap->bDescriptorType != USB_DT_DEVICE_CAPABILITY) {
-			dev_warn(ddev, "descriptor type invalid, skip\n");
+			dev_notice(ddev, "descriptor type invalid, skip\n");
 			continue;
 		}
 
-- 
GitLab


From 372488c6936f4e7734e4ff5613c504affb49ff68 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:13:55 +0100
Subject: [PATCH 0878/4122] usb: core: Use kstrtobool() instead of strtobool()

strtobool() is the same as kstrtobool().
However, the latter is more used within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/f01ef2ddaf12a6412127611617786adc1234e0b4.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/port.c  | 3 ++-
 drivers/usb/core/sysfs.c | 7 ++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 38c1a4f4fdeae..015204fc67a10 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -7,6 +7,7 @@
  * Author: Lan Tianyu <tianyu.lan@intel.com>
  */
 
+#include <linux/kstrtox.h>
 #include <linux/slab.h>
 #include <linux/pm_qos.h>
 #include <linux/component.h>
@@ -63,7 +64,7 @@ static ssize_t disable_store(struct device *dev, struct device_attribute *attr,
 	bool disabled;
 	int rc;
 
-	rc = strtobool(buf, &disabled);
+	rc = kstrtobool(buf, &disabled);
 	if (rc)
 		return rc;
 
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 631574718d8ac..8217032dfb85f 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -13,6 +13,7 @@
 
 
 #include <linux/kernel.h>
+#include <linux/kstrtox.h>
 #include <linux/string.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
@@ -505,7 +506,7 @@ static ssize_t usb2_hardware_lpm_store(struct device *dev,
 	if (ret < 0)
 		return -EINTR;
 
-	ret = strtobool(buf, &value);
+	ret = kstrtobool(buf, &value);
 
 	if (!ret) {
 		udev->usb2_hw_lpm_allowed = value;
@@ -975,7 +976,7 @@ static ssize_t interface_authorized_default_store(struct device *dev,
 	int rc = count;
 	bool val;
 
-	if (strtobool(buf, &val) != 0)
+	if (kstrtobool(buf, &val) != 0)
 		return -EINVAL;
 
 	if (val)
@@ -1176,7 +1177,7 @@ static ssize_t interface_authorized_store(struct device *dev,
 	struct usb_interface *intf = to_usb_interface(dev);
 	bool val;
 
-	if (strtobool(buf, &val) != 0)
+	if (kstrtobool(buf, &val) != 0)
 		return -EINVAL;
 
 	if (val)
-- 
GitLab


From a8bc8cc193c69e41df5e757d1a592346526e136d Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:13:56 +0100
Subject: [PATCH 0879/4122] usb: gadget: Use kstrtobool() instead of
 strtobool()

strtobool() is the same as kstrtobool().
However, the latter is more used within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/09bc980d8432a4b5f7d88388ec0df5b085583139.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/configfs.c                | 3 ++-
 drivers/usb/gadget/function/f_mass_storage.c | 3 ++-
 drivers/usb/gadget/function/storage_common.c | 9 +++++----
 drivers/usb/gadget/function/u_serial.c       | 3 ++-
 drivers/usb/gadget/legacy/serial.c           | 3 ++-
 5 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 3a6b4926193ef..96121d1c8df4c 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -3,6 +3,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/device.h>
+#include <linux/kstrtox.h>
 #include <linux/nls.h>
 #include <linux/usb/composite.h>
 #include <linux/usb/gadget_configfs.h>
@@ -800,7 +801,7 @@ static ssize_t os_desc_use_store(struct config_item *item, const char *page,
 	bool use;
 
 	mutex_lock(&gi->lock);
-	ret = strtobool(page, &use);
+	ret = kstrtobool(page, &use);
 	if (!ret) {
 		gi->use_os_desc = use;
 		ret = len;
diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c
index 3abf7f586e2af..3a30feb47073f 100644
--- a/drivers/usb/gadget/function/f_mass_storage.c
+++ b/drivers/usb/gadget/function/f_mass_storage.c
@@ -176,6 +176,7 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/kstrtox.h>
 #include <linux/kthread.h>
 #include <linux/sched/signal.h>
 #include <linux/limits.h>
@@ -3387,7 +3388,7 @@ static ssize_t fsg_opts_stall_store(struct config_item *item, const char *page,
 		return -EBUSY;
 	}
 
-	ret = strtobool(page, &stall);
+	ret = kstrtobool(page, &stall);
 	if (!ret) {
 		opts->common->can_stall = stall;
 		ret = len;
diff --git a/drivers/usb/gadget/function/storage_common.c b/drivers/usb/gadget/function/storage_common.c
index 208c6a92780ab..2a4163b0f6fe2 100644
--- a/drivers/usb/gadget/function/storage_common.c
+++ b/drivers/usb/gadget/function/storage_common.c
@@ -23,6 +23,7 @@
 #include <linux/blkdev.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/kstrtox.h>
 #include <linux/usb/composite.h>
 
 #include "storage_common.h"
@@ -396,7 +397,7 @@ ssize_t fsg_store_ro(struct fsg_lun *curlun, struct rw_semaphore *filesem,
 	ssize_t		rc;
 	bool		ro;
 
-	rc = strtobool(buf, &ro);
+	rc = kstrtobool(buf, &ro);
 	if (rc)
 		return rc;
 
@@ -419,7 +420,7 @@ ssize_t fsg_store_nofua(struct fsg_lun *curlun, const char *buf, size_t count)
 	bool		nofua;
 	int		ret;
 
-	ret = strtobool(buf, &nofua);
+	ret = kstrtobool(buf, &nofua);
 	if (ret)
 		return ret;
 
@@ -470,7 +471,7 @@ ssize_t fsg_store_cdrom(struct fsg_lun *curlun, struct rw_semaphore *filesem,
 	bool		cdrom;
 	int		ret;
 
-	ret = strtobool(buf, &cdrom);
+	ret = kstrtobool(buf, &cdrom);
 	if (ret)
 		return ret;
 
@@ -493,7 +494,7 @@ ssize_t fsg_store_removable(struct fsg_lun *curlun, const char *buf,
 	bool		removable;
 	int		ret;
 
-	ret = strtobool(buf, &removable);
+	ret = kstrtobool(buf, &removable);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/function/u_serial.c b/drivers/usb/gadget/function/u_serial.c
index 7538279f98179..840626e064e13 100644
--- a/drivers/usb/gadget/function/u_serial.c
+++ b/drivers/usb/gadget/function/u_serial.c
@@ -24,6 +24,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/console.h>
+#include <linux/kstrtox.h>
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
 #include <linux/kfifo.h>
@@ -1070,7 +1071,7 @@ ssize_t gserial_set_console(unsigned char port_num, const char *page, size_t cou
 	bool enable;
 	int ret;
 
-	ret = strtobool(page, &enable);
+	ret = kstrtobool(page, &enable);
 	if (ret)
 		return ret;
 
diff --git a/drivers/usb/gadget/legacy/serial.c b/drivers/usb/gadget/legacy/serial.c
index dcd3a6603d900..4974bee6049a6 100644
--- a/drivers/usb/gadget/legacy/serial.c
+++ b/drivers/usb/gadget/legacy/serial.c
@@ -9,6 +9,7 @@
 
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
@@ -109,7 +110,7 @@ static int enable_set(const char *s, const struct kernel_param *kp)
 	if (!s)	/* called for no-arg enable == default */
 		return 0;
 
-	ret = strtobool(s, &do_enable);
+	ret = kstrtobool(s, &do_enable);
 	if (ret || enable == do_enable)
 		return ret;
 
-- 
GitLab


From 1adf3cc20d693569ebee90fd91fa34b0570fcd6f Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:05 +0800
Subject: [PATCH 0880/4122] iommu: Add max_pasids field in struct iommu_device

Use this field to keep the number of supported PASIDs that an IOMMU
hardware is able to support. This is a generic attribute of an IOMMU
and lifting it into the per-IOMMU device structure makes it possible
to allocate a PASID for device without calls into the IOMMU drivers.
Any iommu driver that supports PASID related features should set this
field before enabling them on the devices.

In the Intel IOMMU driver, intel_iommu_sm is moved to CONFIG_INTEL_IOMMU
enclave so that the pasid_supported() helper could be used in dmar.c
without compilation errors.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 1 +
 drivers/iommu/intel/dmar.c                  | 7 +++++++
 drivers/iommu/intel/iommu.h                 | 4 ++--
 include/linux/iommu.h                       | 2 ++
 4 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 6d5df91c5c465..21cb13da122c8 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -3543,6 +3543,7 @@ static int arm_smmu_device_hw_probe(struct arm_smmu_device *smmu)
 	/* SID/SSID sizes */
 	smmu->ssid_bits = FIELD_GET(IDR1_SSIDSIZE, reg);
 	smmu->sid_bits = FIELD_GET(IDR1_SIDSIZE, reg);
+	smmu->iommu.max_pasids = 1UL << smmu->ssid_bits;
 
 	/*
 	 * If the SMMU supports fewer bits than would fill a single L2 stream
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 5a8f780e7ffd8..3528058d253e4 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1104,6 +1104,13 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	raw_spin_lock_init(&iommu->register_lock);
 
+	/*
+	 * A value of N in PSS field of eCap register indicates hardware
+	 * supports PASID field of N+1 bits.
+	 */
+	if (pasid_supported(iommu))
+		iommu->iommu.max_pasids = 2UL << ecap_pss(iommu->ecap);
+
 	/*
 	 * This is only for hotplug; at boot time intel_iommu_enabled won't
 	 * be set yet. When intel_iommu_init() runs, it registers the units
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 92023dff9513a..cce0598f41092 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -480,8 +480,6 @@ enum {
 #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED	(1 << 1)
 #define VTD_FLAG_SVM_CAPABLE		(1 << 2)
 
-extern int intel_iommu_sm;
-
 #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))
 #define pasid_supported(iommu)	(sm_supported(iommu) &&			\
 				 ecap_pasid((iommu)->ecap))
@@ -795,6 +793,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
 extern const struct iommu_ops intel_iommu_ops;
 
 #ifdef CONFIG_INTEL_IOMMU
+extern int intel_iommu_sm;
 extern int iommu_calculate_agaw(struct intel_iommu *iommu);
 extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu);
 extern int dmar_disabled;
@@ -810,6 +809,7 @@ static inline int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
 }
 #define dmar_disabled	(1)
 #define intel_iommu_enabled (0)
+#define intel_iommu_sm (0)
 #endif
 
 static inline const char *decode_prq_descriptor(char *str, size_t size,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3c9da1f8979e3..e3af4f46e6e07 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -322,12 +322,14 @@ struct iommu_domain_ops {
  * @list: Used by the iommu-core to keep a list of registered iommus
  * @ops: iommu-ops for talking to this iommu
  * @dev: struct device for sysfs handling
+ * @max_pasids: number of supported PASIDs
  */
 struct iommu_device {
 	struct list_head list;
 	const struct iommu_ops *ops;
 	struct fwnode_handle *fwnode;
 	struct device *dev;
+	u32 max_pasids;
 };
 
 /**
-- 
GitLab


From 22d2c7afb3697a68c7fc05c935ef662dee06dc60 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:06 +0800
Subject: [PATCH 0881/4122] iommu: Add max_pasids field in struct dev_iommu

Use this field to save the number of PASIDs that a device is able to
consume. It is a generic attribute of a device and lifting it into the
per-device dev_iommu struct could help to avoid the boilerplate code
in various IOMMU drivers.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 20 ++++++++++++++++++++
 include/linux/iommu.h |  2 ++
 2 files changed, 22 insertions(+)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 65a3b3d886dc0..297ac79bc21cf 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -21,6 +21,7 @@
 #include <linux/idr.h>
 #include <linux/err.h>
 #include <linux/pci.h>
+#include <linux/pci-ats.h>
 #include <linux/bitops.h>
 #include <linux/platform_device.h>
 #include <linux/property.h>
@@ -278,6 +279,24 @@ static void dev_iommu_free(struct device *dev)
 	kfree(param);
 }
 
+static u32 dev_iommu_get_max_pasids(struct device *dev)
+{
+	u32 max_pasids = 0, bits = 0;
+	int ret;
+
+	if (dev_is_pci(dev)) {
+		ret = pci_max_pasids(to_pci_dev(dev));
+		if (ret > 0)
+			max_pasids = ret;
+	} else {
+		ret = device_property_read_u32(dev, "pasid-num-bits", &bits);
+		if (!ret)
+			max_pasids = 1UL << bits;
+	}
+
+	return min_t(u32, max_pasids, dev->iommu->iommu_dev->max_pasids);
+}
+
 static int __iommu_probe_device(struct device *dev, struct list_head *group_list)
 {
 	const struct iommu_ops *ops = dev->bus->iommu_ops;
@@ -303,6 +322,7 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	}
 
 	dev->iommu->iommu_dev = iommu_dev;
+	dev->iommu->max_pasids = dev_iommu_get_max_pasids(dev);
 
 	group = iommu_group_get_for_dev(dev);
 	if (IS_ERR(group)) {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e3af4f46e6e07..ac3f6c6dcc6d6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -368,6 +368,7 @@ struct iommu_fault_param {
  * @fwspec:	 IOMMU fwspec data
  * @iommu_dev:	 IOMMU device this device is linked to
  * @priv:	 IOMMU Driver private data
+ * @max_pasids:  number of PASIDs this device can consume
  *
  * TODO: migrate other per device data pointers under iommu_dev_data, e.g.
  *	struct iommu_group	*iommu_group;
@@ -379,6 +380,7 @@ struct dev_iommu {
 	struct iommu_fwspec		*fwspec;
 	struct iommu_device		*iommu_dev;
 	void				*priv;
+	u32				max_pasids;
 };
 
 int iommu_device_register(struct iommu_device *iommu,
-- 
GitLab


From 942fd5435dccb273f90176b046ae6bbba60cfbd8 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:07 +0800
Subject: [PATCH 0882/4122] iommu: Remove SVM_FLAG_SUPERVISOR_MODE support

The current kernel DMA with PASID support is based on the SVA with a flag
SVM_FLAG_SUPERVISOR_MODE. The IOMMU driver binds the kernel memory address
space to a PASID of the device. The device driver programs the device with
kernel virtual address (KVA) for DMA access. There have been security and
functional issues with this approach:

- The lack of IOTLB synchronization upon kernel page table updates.
  (vmalloc, module/BPF loading, CONFIG_DEBUG_PAGEALLOC etc.)
- Other than slight more protection, using kernel virtual address (KVA)
  has little advantage over physical address. There are also no use
  cases yet where DMA engines need kernel virtual addresses for in-kernel
  DMA.

This removes SVM_FLAG_SUPERVISOR_MODE support from the IOMMU interface.
The device drivers are suggested to handle kernel DMA with PASID through
the kernel DMA APIs.

The drvdata parameter in iommu_sva_bind_device() and all callbacks is not
needed anymore. Cleanup them as well.

Link: https://lore.kernel.org/linux-iommu/20210511194726.GP1002214@nvidia.com/
Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Fenghua Yu <fenghua.yu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/dma/idxd/cdev.c                       |  3 +-
 drivers/dma/idxd/init.c                       | 25 +--------
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   |  3 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   |  5 +-
 drivers/iommu/intel/iommu.h                   |  3 +-
 drivers/iommu/intel/svm.c                     | 55 +++++--------------
 drivers/iommu/iommu.c                         |  5 +-
 drivers/misc/uacce/uacce.c                    |  2 +-
 include/linux/intel-svm.h                     | 13 -----
 include/linux/iommu.h                         |  8 +--
 10 files changed, 25 insertions(+), 97 deletions(-)

diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index c2808fd081d65..66720001ba1ce 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -6,7 +6,6 @@
 #include <linux/pci.h>
 #include <linux/device.h>
 #include <linux/sched/task.h>
-#include <linux/intel-svm.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/cdev.h>
 #include <linux/fs.h>
@@ -100,7 +99,7 @@ static int idxd_cdev_open(struct inode *inode, struct file *filp)
 	filp->private_data = ctx;
 
 	if (device_user_pasid_enabled(idxd)) {
-		sva = iommu_sva_bind_device(dev, current->mm, NULL);
+		sva = iommu_sva_bind_device(dev, current->mm);
 		if (IS_ERR(sva)) {
 			rc = PTR_ERR(sva);
 			dev_err(dev, "pasid allocation failed: %d\n", rc);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 2b18d512cbfc9..2c0fcfdc75c72 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -14,7 +14,6 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/device.h>
 #include <linux/idr.h>
-#include <linux/intel-svm.h>
 #include <linux/iommu.h>
 #include <uapi/linux/idxd.h>
 #include <linux/dmaengine.h>
@@ -502,29 +501,7 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
 
 static int idxd_enable_system_pasid(struct idxd_device *idxd)
 {
-	int flags;
-	unsigned int pasid;
-	struct iommu_sva *sva;
-
-	flags = SVM_FLAG_SUPERVISOR_MODE;
-
-	sva = iommu_sva_bind_device(&idxd->pdev->dev, NULL, &flags);
-	if (IS_ERR(sva)) {
-		dev_warn(&idxd->pdev->dev,
-			 "iommu sva bind failed: %ld\n", PTR_ERR(sva));
-		return PTR_ERR(sva);
-	}
-
-	pasid = iommu_sva_get_pasid(sva);
-	if (pasid == IOMMU_PASID_INVALID) {
-		iommu_sva_unbind_device(sva);
-		return -ENODEV;
-	}
-
-	idxd->sva = sva;
-	idxd->pasid = pasid;
-	dev_dbg(&idxd->pdev->dev, "system pasid: %u\n", pasid);
-	return 0;
+	return -EOPNOTSUPP;
 }
 
 static void idxd_disable_system_pasid(struct idxd_device *idxd)
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 5968a568aae2a..8fcf0df4bd0ea 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -367,8 +367,7 @@ err_free_bond:
 	return ERR_PTR(ret);
 }
 
-struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 {
 	struct iommu_sva *handle;
 	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index cd48590ada303..d2ba86470c423 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -754,8 +754,7 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
 bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
-struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm,
-				    void *drvdata);
+struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm);
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
 u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
@@ -791,7 +790,7 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master
 }
 
 static inline struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 {
 	return ERR_PTR(-ENODEV);
 }
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index cce0598f41092..33e5bcaf2a6cf 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -748,8 +748,7 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
 extern void intel_svm_check(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm,
-				 void *drvdata);
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm);
 void intel_svm_unbind(struct iommu_sva *handle);
 u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 7d08eb034f2d2..94bc47b68c935 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -296,8 +296,7 @@ out:
 	return 0;
 }
 
-static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
-				 unsigned int flags)
+static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm)
 {
 	ioasid_t max_pasid = dev_is_pci(dev) ?
 			pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
@@ -307,8 +306,7 @@ static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm,
 
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 					   struct device *dev,
-					   struct mm_struct *mm,
-					   unsigned int flags)
+					   struct mm_struct *mm)
 {
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 	struct intel_svm_dev *sdev;
@@ -324,22 +322,18 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 
 		svm->pasid = mm->pasid;
 		svm->mm = mm;
-		svm->flags = flags;
 		INIT_LIST_HEAD_RCU(&svm->devs);
 
-		if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) {
-			svm->notifier.ops = &intel_mmuops;
-			ret = mmu_notifier_register(&svm->notifier, mm);
-			if (ret) {
-				kfree(svm);
-				return ERR_PTR(ret);
-			}
+		svm->notifier.ops = &intel_mmuops;
+		ret = mmu_notifier_register(&svm->notifier, mm);
+		if (ret) {
+			kfree(svm);
+			return ERR_PTR(ret);
 		}
 
 		ret = pasid_private_add(svm->pasid, svm);
 		if (ret) {
-			if (svm->notifier.ops)
-				mmu_notifier_unregister(&svm->notifier, mm);
+			mmu_notifier_unregister(&svm->notifier, mm);
 			kfree(svm);
 			return ERR_PTR(ret);
 		}
@@ -374,9 +368,7 @@ static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 	}
 
 	/* Setup the pasid table: */
-	sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ?
-			PASID_FLAG_SUPERVISOR_MODE : 0;
-	sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
+	sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0;
 	ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid,
 					    FLPT_DEFAULT_DID, sflags);
 	if (ret)
@@ -390,8 +382,7 @@ free_sdev:
 	kfree(sdev);
 free_svm:
 	if (list_empty(&svm->devs)) {
-		if (svm->notifier.ops)
-			mmu_notifier_unregister(&svm->notifier, mm);
+		mmu_notifier_unregister(&svm->notifier, mm);
 		pasid_private_remove(mm->pasid);
 		kfree(svm);
 	}
@@ -780,40 +771,20 @@ prq_advance:
 	return IRQ_RETVAL(handled);
 }
 
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata)
+struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm)
 {
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-	unsigned int flags = 0;
 	struct iommu_sva *sva;
 	int ret;
 
-	if (drvdata)
-		flags = *(unsigned int *)drvdata;
-
-	if (flags & SVM_FLAG_SUPERVISOR_MODE) {
-		if (!ecap_srs(iommu->ecap)) {
-			dev_err(dev, "%s: Supervisor PASID not supported\n",
-				iommu->name);
-			return ERR_PTR(-EOPNOTSUPP);
-		}
-
-		if (mm) {
-			dev_err(dev, "%s: Supervisor PASID with user provided mm\n",
-				iommu->name);
-			return ERR_PTR(-EINVAL);
-		}
-
-		mm = &init_mm;
-	}
-
 	mutex_lock(&pasid_mutex);
-	ret = intel_svm_alloc_pasid(dev, mm, flags);
+	ret = intel_svm_alloc_pasid(dev, mm);
 	if (ret) {
 		mutex_unlock(&pasid_mutex);
 		return ERR_PTR(ret);
 	}
 
-	sva = intel_svm_bind_mm(iommu, dev, mm, flags);
+	sva = intel_svm_bind_mm(iommu, dev, mm);
 	mutex_unlock(&pasid_mutex);
 
 	return sva;
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 297ac79bc21cf..a94ec648c88bb 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2750,7 +2750,6 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
  * iommu_sva_bind_device() - Bind a process address space to a device
  * @dev: the device
  * @mm: the mm to bind, caller must hold a reference to it
- * @drvdata: opaque data pointer to pass to bind callback
  *
  * Create a bond between device and address space, allowing the device to access
  * the mm using the returned PASID. If a bond already exists between @device and
@@ -2763,7 +2762,7 @@ EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
  * On error, returns an ERR_PTR value.
  */
 struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
 {
 	struct iommu_group *group;
 	struct iommu_sva *handle = ERR_PTR(-EINVAL);
@@ -2788,7 +2787,7 @@ iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
 	if (iommu_group_device_count(group) != 1)
 		goto out_unlock;
 
-	handle = ops->sva_bind(dev, mm, drvdata);
+	handle = ops->sva_bind(dev, mm);
 
 out_unlock:
 	mutex_unlock(&group->mutex);
diff --git a/drivers/misc/uacce/uacce.c b/drivers/misc/uacce/uacce.c
index b70a013139c74..905eff1f840ed 100644
--- a/drivers/misc/uacce/uacce.c
+++ b/drivers/misc/uacce/uacce.c
@@ -108,7 +108,7 @@ static int uacce_bind_queue(struct uacce_device *uacce, struct uacce_queue *q)
 	if (!(uacce->flags & UACCE_DEV_SVA))
 		return 0;
 
-	handle = iommu_sva_bind_device(uacce->parent, current->mm, NULL);
+	handle = iommu_sva_bind_device(uacce->parent, current->mm);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h
index 207ef06ba3e1d..f9a0d44f6fdb1 100644
--- a/include/linux/intel-svm.h
+++ b/include/linux/intel-svm.h
@@ -13,17 +13,4 @@
 #define PRQ_RING_MASK	((0x1000 << PRQ_ORDER) - 0x20)
 #define PRQ_DEPTH	((0x1000 << PRQ_ORDER) >> 5)
 
-/*
- * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only
- * for access to kernel addresses. No IOTLB flushes are automatically done
- * for kernel mappings; it is valid only for access to the kernel's static
- * 1:1 mapping of physical memory — not to vmalloc or even module mappings.
- * A future API addition may permit the use of such ranges, by means of an
- * explicit IOTLB flush call (akin to the DMA API's unmap method).
- *
- * It is unlikely that we will ever hook into flush_tlb_kernel_range() to
- * do such IOTLB flushes automatically.
- */
-#define SVM_FLAG_SUPERVISOR_MODE	BIT(0)
-
 #endif /* __INTEL_SVM_H__ */
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index ac3f6c6dcc6d6..72bb0531aa769 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -247,8 +247,7 @@ struct iommu_ops {
 	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
 
-	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm,
-				      void *drvdata);
+	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm);
 	void (*sva_unbind)(struct iommu_sva *handle);
 	u32 (*sva_get_pasid)(struct iommu_sva *handle);
 
@@ -668,8 +667,7 @@ int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
 
 struct iommu_sva *iommu_sva_bind_device(struct device *dev,
-					struct mm_struct *mm,
-					void *drvdata);
+					struct mm_struct *mm);
 void iommu_sva_unbind_device(struct iommu_sva *handle);
 u32 iommu_sva_get_pasid(struct iommu_sva *handle);
 
@@ -1000,7 +998,7 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 }
 
 static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm, void *drvdata)
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
 {
 	return NULL;
 }
-- 
GitLab


From 201007ef707a8bb5592cd07dd46fc9222c48e0b9 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:08 +0800
Subject: [PATCH 0883/4122] PCI: Enable PASID only when ACS RR & UF enabled on
 upstream path

The Requester ID/Process Address Space ID (PASID) combination
identifies an address space distinct from the PCI bus address space,
e.g., an address space defined by an IOMMU.

But the PCIe fabric routes Memory Requests based on the TLP address,
ignoring any PASID (PCIe r6.0, sec 2.2.10.4), so a TLP with PASID that
SHOULD go upstream to the IOMMU may instead be routed as a P2P
Request if its address falls in a bridge window.

To ensure that all Memory Requests with PASID are routed upstream,
only enable PASID if ACS P2P Request Redirect and Upstream Forwarding
are enabled for the path leading to the device.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Suggested-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/pci/ats.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index c967ad6e26267..f9cc2e10b676a 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -382,6 +382,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
 	if (!pasid)
 		return -EINVAL;
 
+	if (!pci_acs_path_enabled(pdev, NULL, PCI_ACS_RR | PCI_ACS_UF))
+		return -EINVAL;
+
 	pci_read_config_word(pdev, pasid + PCI_PASID_CAP, &supported);
 	supported &= PCI_PASID_CAP_EXEC | PCI_PASID_CAP_PRIV;
 
-- 
GitLab


From 16603704559c7a68718059c4f75287886c01b20f Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:09 +0800
Subject: [PATCH 0884/4122] iommu: Add attach/detach_dev_pasid iommu interfaces

Attaching an IOMMU domain to a PASID of a device is a generic operation
for modern IOMMU drivers which support PASID-granular DMA address
translation. Currently visible usage scenarios include (but not limited):

 - SVA (Shared Virtual Address)
 - kernel DMA with PASID
 - hardware-assist mediated device

This adds the set_dev_pasid domain ops for setting the domain onto a
PASID of a device and remove_dev_pasid iommu ops for removing any setup
on a PASID of device. This also adds interfaces for device drivers to
attach/detach/retrieve a domain for a PASID of a device.

If multiple devices share a single group, it's fine as long the fabric
always routes every TLP marked with a PASID to the host bridge and only
the host bridge. For example, ACS achieves this universally and has been
checked when pci_enable_pasid() is called. As we can't reliably tell the
source apart in a group, all the devices in a group have to be considered
as the same source, and mapped to the same PASID table.

The DMA ownership is about the whole device (more precisely, iommu group),
including the RID and PASIDs. When the ownership is converted, the pasid
array must be empty. This also adds necessary checks in the DMA ownership
interfaces.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-6-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 141 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/iommu.h |  32 ++++++++++
 2 files changed, 169 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index a94ec648c88bb..bf22992beb98a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -43,6 +43,7 @@ struct iommu_group {
 	struct kobject kobj;
 	struct kobject *devices_kobj;
 	struct list_head devices;
+	struct xarray pasid_array;
 	struct mutex mutex;
 	void *iommu_data;
 	void (*iommu_data_release)(void *iommu_data);
@@ -723,6 +724,7 @@ struct iommu_group *iommu_group_alloc(void)
 	mutex_init(&group->mutex);
 	INIT_LIST_HEAD(&group->devices);
 	INIT_LIST_HEAD(&group->entry);
+	xa_init(&group->pasid_array);
 
 	ret = ida_alloc(&iommu_group_ida, GFP_KERNEL);
 	if (ret < 0) {
@@ -3106,7 +3108,8 @@ int iommu_device_use_default_domain(struct device *dev)
 
 	mutex_lock(&group->mutex);
 	if (group->owner_cnt) {
-		if (group->owner || !iommu_is_default_domain(group)) {
+		if (group->owner || !iommu_is_default_domain(group) ||
+		    !xa_empty(&group->pasid_array)) {
 			ret = -EBUSY;
 			goto unlock_out;
 		}
@@ -3137,7 +3140,7 @@ void iommu_device_unuse_default_domain(struct device *dev)
 		return;
 
 	mutex_lock(&group->mutex);
-	if (!WARN_ON(!group->owner_cnt))
+	if (!WARN_ON(!group->owner_cnt || !xa_empty(&group->pasid_array)))
 		group->owner_cnt--;
 
 	mutex_unlock(&group->mutex);
@@ -3185,7 +3188,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
 		ret = -EPERM;
 		goto unlock_out;
 	} else {
-		if (group->domain && group->domain != group->default_domain) {
+		if ((group->domain && group->domain != group->default_domain) ||
+		    !xa_empty(&group->pasid_array)) {
 			ret = -EBUSY;
 			goto unlock_out;
 		}
@@ -3219,7 +3223,8 @@ void iommu_group_release_dma_owner(struct iommu_group *group)
 	int ret;
 
 	mutex_lock(&group->mutex);
-	if (WARN_ON(!group->owner_cnt || !group->owner))
+	if (WARN_ON(!group->owner_cnt || !group->owner ||
+		    !xa_empty(&group->pasid_array)))
 		goto unlock_out;
 
 	group->owner_cnt = 0;
@@ -3250,3 +3255,131 @@ bool iommu_group_dma_owner_claimed(struct iommu_group *group)
 	return user;
 }
 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
+
+static int __iommu_set_group_pasid(struct iommu_domain *domain,
+				   struct iommu_group *group, ioasid_t pasid)
+{
+	struct group_device *device;
+	int ret = 0;
+
+	list_for_each_entry(device, &group->devices, list) {
+		ret = domain->ops->set_dev_pasid(domain, device->dev, pasid);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
+
+static void __iommu_remove_group_pasid(struct iommu_group *group,
+				       ioasid_t pasid)
+{
+	struct group_device *device;
+	const struct iommu_ops *ops;
+
+	list_for_each_entry(device, &group->devices, list) {
+		ops = dev_iommu_ops(device->dev);
+		ops->remove_dev_pasid(device->dev, pasid);
+	}
+}
+
+/*
+ * iommu_attach_device_pasid() - Attach a domain to pasid of device
+ * @domain: the iommu domain.
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ *
+ * Return: 0 on success, or an error.
+ */
+int iommu_attach_device_pasid(struct iommu_domain *domain,
+			      struct device *dev, ioasid_t pasid)
+{
+	struct iommu_group *group;
+	void *curr;
+	int ret;
+
+	if (!domain->ops->set_dev_pasid)
+		return -EOPNOTSUPP;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		return -ENODEV;
+
+	mutex_lock(&group->mutex);
+	curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
+	if (curr) {
+		ret = xa_err(curr) ? : -EBUSY;
+		goto out_unlock;
+	}
+
+	ret = __iommu_set_group_pasid(domain, group, pasid);
+	if (ret) {
+		__iommu_remove_group_pasid(group, pasid);
+		xa_erase(&group->pasid_array, pasid);
+	}
+out_unlock:
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
+
+/*
+ * iommu_detach_device_pasid() - Detach the domain from pasid of device
+ * @domain: the iommu domain.
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ *
+ * The @domain must have been attached to @pasid of the @dev with
+ * iommu_attach_device_pasid().
+ */
+void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
+			       ioasid_t pasid)
+{
+	struct iommu_group *group = iommu_group_get(dev);
+
+	mutex_lock(&group->mutex);
+	__iommu_remove_group_pasid(group, pasid);
+	WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
+	mutex_unlock(&group->mutex);
+
+	iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
+
+/*
+ * iommu_get_domain_for_dev_pasid() - Retrieve domain for @pasid of @dev
+ * @dev: the queried device
+ * @pasid: the pasid of the device
+ * @type: matched domain type, 0 for any match
+ *
+ * This is a variant of iommu_get_domain_for_dev(). It returns the existing
+ * domain attached to pasid of a device. Callers must hold a lock around this
+ * function, and both iommu_attach/detach_dev_pasid() whenever a domain of
+ * type is being manipulated. This API does not internally resolve races with
+ * attach/detach.
+ *
+ * Return: attached domain on success, NULL otherwise.
+ */
+struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
+						    ioasid_t pasid,
+						    unsigned int type)
+{
+	struct iommu_domain *domain;
+	struct iommu_group *group;
+
+	group = iommu_group_get(dev);
+	if (!group)
+		return NULL;
+
+	xa_lock(&group->pasid_array);
+	domain = xa_load(&group->pasid_array, pasid);
+	if (type && domain && domain->type != type)
+		domain = ERR_PTR(-EBUSY);
+	xa_unlock(&group->pasid_array);
+	iommu_group_put(group);
+
+	return domain;
+}
+EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 72bb0531aa769..5d2b78ac5416f 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -223,6 +223,9 @@ struct iommu_iotlb_gather {
  *		- IOMMU_DOMAIN_DMA: must use a dma domain
  *		- 0: use the default setting
  * @default_domain_ops: the default ops for domains
+ * @remove_dev_pasid: Remove any translation configurations of a specific
+ *                    pasid, so that any DMA transactions with this pasid
+ *                    will be blocked by the hardware.
  * @pgsize_bitmap: bitmap of all possible supported page sizes
  * @owner: Driver module providing these ops
  */
@@ -256,6 +259,7 @@ struct iommu_ops {
 			     struct iommu_page_response *msg);
 
 	int (*def_domain_type)(struct device *dev);
+	void (*remove_dev_pasid)(struct device *dev, ioasid_t pasid);
 
 	const struct iommu_domain_ops *default_domain_ops;
 	unsigned long pgsize_bitmap;
@@ -266,6 +270,7 @@ struct iommu_ops {
  * struct iommu_domain_ops - domain specific operations
  * @attach_dev: attach an iommu domain to a device
  * @detach_dev: detach an iommu domain from a device
+ * @set_dev_pasid: set an iommu domain to a pasid of device
  * @map: map a physically contiguous memory region to an iommu domain
  * @map_pages: map a physically contiguous set of pages of the same size to
  *             an iommu domain.
@@ -286,6 +291,8 @@ struct iommu_ops {
 struct iommu_domain_ops {
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	int (*set_dev_pasid)(struct iommu_domain *domain, struct device *dev,
+			     ioasid_t pasid);
 
 	int (*map)(struct iommu_domain *domain, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
@@ -678,6 +685,13 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
 void iommu_group_release_dma_owner(struct iommu_group *group);
 bool iommu_group_dma_owner_claimed(struct iommu_group *group);
 
+int iommu_attach_device_pasid(struct iommu_domain *domain,
+			      struct device *dev, ioasid_t pasid);
+void iommu_detach_device_pasid(struct iommu_domain *domain,
+			       struct device *dev, ioasid_t pasid);
+struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
+			       unsigned int type);
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -1040,6 +1054,24 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
 {
 	return false;
 }
+
+static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
+					    struct device *dev, ioasid_t pasid)
+{
+	return -ENODEV;
+}
+
+static inline void iommu_detach_device_pasid(struct iommu_domain *domain,
+					     struct device *dev, ioasid_t pasid)
+{
+}
+
+static inline struct iommu_domain *
+iommu_get_domain_for_dev_pasid(struct device *dev, ioasid_t pasid,
+			       unsigned int type)
+{
+	return NULL;
+}
 #endif /* CONFIG_IOMMU_API */
 
 /**
-- 
GitLab


From 136467962e49931dbc6240aea8197fab7e407ba4 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:10 +0800
Subject: [PATCH 0885/4122] iommu: Add IOMMU SVA domain support

The SVA iommu_domain represents a hardware pagetable that the IOMMU
hardware could use for SVA translation. This adds some infrastructures
to support SVA domain in the iommu core. It includes:

- Extend the iommu_domain to support a new IOMMU_DOMAIN_SVA domain
  type. The IOMMU drivers that support allocation of the SVA domain
  should provide its own SVA domain specific iommu_domain_ops.
- Add a helper to allocate an SVA domain. The iommu_domain_free()
  is still used to free an SVA domain.

The report_iommu_fault() should be replaced by the new
iommu_report_device_fault(). Leave the existing fault handler with the
existing users and the newly added SVA members excludes it.

Suggested-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-7-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 20 ++++++++++++++++++++
 include/linux/iommu.h | 25 +++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bf22992beb98a..6a1cd2018e301 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -29,6 +29,7 @@
 #include <linux/module.h>
 #include <linux/cc_platform.h>
 #include <trace/events/iommu.h>
+#include <linux/sched/mm.h>
 
 #include "dma-iommu.h"
 
@@ -1934,6 +1935,8 @@ EXPORT_SYMBOL_GPL(iommu_domain_alloc);
 
 void iommu_domain_free(struct iommu_domain *domain)
 {
+	if (domain->type == IOMMU_DOMAIN_SVA)
+		mmdrop(domain->mm);
 	iommu_put_dma_cookie(domain);
 	domain->ops->free(domain);
 }
@@ -3383,3 +3386,20 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
 	return domain;
 }
 EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev_pasid);
+
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+					    struct mm_struct *mm)
+{
+	const struct iommu_ops *ops = dev_iommu_ops(dev);
+	struct iommu_domain *domain;
+
+	domain = ops->domain_alloc(IOMMU_DOMAIN_SVA);
+	if (!domain)
+		return NULL;
+
+	domain->type = IOMMU_DOMAIN_SVA;
+	mmgrab(mm);
+	domain->mm = mm;
+
+	return domain;
+}
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5d2b78ac5416f..776baa3759674 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -64,6 +64,8 @@ struct iommu_domain_geometry {
 #define __IOMMU_DOMAIN_PT	(1U << 2)  /* Domain is identity mapped   */
 #define __IOMMU_DOMAIN_DMA_FQ	(1U << 3)  /* DMA-API uses flush queue    */
 
+#define __IOMMU_DOMAIN_SVA	(1U << 4)  /* Shared process address space */
+
 /*
  * This are the possible domain-types
  *
@@ -77,6 +79,8 @@ struct iommu_domain_geometry {
  *				  certain optimizations for these domains
  *	IOMMU_DOMAIN_DMA_FQ	- As above, but definitely using batched TLB
  *				  invalidation.
+ *	IOMMU_DOMAIN_SVA	- DMA addresses are shared process addresses
+ *				  represented by mm_struct's.
  */
 #define IOMMU_DOMAIN_BLOCKED	(0U)
 #define IOMMU_DOMAIN_IDENTITY	(__IOMMU_DOMAIN_PT)
@@ -86,15 +90,24 @@ struct iommu_domain_geometry {
 #define IOMMU_DOMAIN_DMA_FQ	(__IOMMU_DOMAIN_PAGING |	\
 				 __IOMMU_DOMAIN_DMA_API |	\
 				 __IOMMU_DOMAIN_DMA_FQ)
+#define IOMMU_DOMAIN_SVA	(__IOMMU_DOMAIN_SVA)
 
 struct iommu_domain {
 	unsigned type;
 	const struct iommu_domain_ops *ops;
 	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
-	iommu_fault_handler_t handler;
-	void *handler_token;
 	struct iommu_domain_geometry geometry;
 	struct iommu_dma_cookie *iova_cookie;
+	union {
+		struct {
+			iommu_fault_handler_t handler;
+			void *handler_token;
+		};
+		struct {	/* IOMMU_DOMAIN_SVA */
+			struct mm_struct *mm;
+			int users;
+		};
+	};
 };
 
 static inline bool iommu_is_dma_domain(struct iommu_domain *domain)
@@ -685,6 +698,8 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
 void iommu_group_release_dma_owner(struct iommu_group *group);
 bool iommu_group_dma_owner_claimed(struct iommu_group *group);
 
+struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
+					    struct mm_struct *mm);
 int iommu_attach_device_pasid(struct iommu_domain *domain,
 			      struct device *dev, ioasid_t pasid);
 void iommu_detach_device_pasid(struct iommu_domain *domain,
@@ -1055,6 +1070,12 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
 	return false;
 }
 
+static inline struct iommu_domain *
+iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
+{
+	return NULL;
+}
+
 static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
 					    struct device *dev, ioasid_t pasid)
 {
-- 
GitLab


From eaca8889a1ef50783bcaad143668b735d136fe46 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:11 +0800
Subject: [PATCH 0886/4122] iommu/vt-d: Add SVA domain support

Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops. This implementation is based on the existing SVA
code. Possible cleanup and refactoring are left for incremental
changes later.

The VT-d driver will also need to support setting a DMA domain to a
PASID of device. Current SVA implementation uses different data
structures to track the domain and device PASID relationship. That's
the reason why we need to check the domain type in remove_dev_pasid
callback. Eventually we'll consolidate the data structures and remove
the need of domain type check.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-8-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 25 ++++++++++++++++++++
 drivers/iommu/intel/iommu.h | 10 ++++++++
 drivers/iommu/intel/svm.c   | 47 +++++++++++++++++++++++++++++++++++++
 3 files changed, 82 insertions(+)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 48cdcd0a5cf34..7b67e431dd362 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4169,6 +4169,8 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 		return domain;
 	case IOMMU_DOMAIN_IDENTITY:
 		return &si_domain->domain;
+	case IOMMU_DOMAIN_SVA:
+		return intel_svm_domain_alloc();
 	default:
 		return NULL;
 	}
@@ -4712,6 +4714,28 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
 		__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
 }
 
+static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
+	struct iommu_domain *domain;
+
+	/* Domain type specific cleanup: */
+	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
+	if (domain) {
+		switch (domain->type) {
+		case IOMMU_DOMAIN_SVA:
+			intel_svm_remove_dev_pasid(dev, pasid);
+			break;
+		default:
+			/* should never reach here */
+			WARN_ON(1);
+			break;
+		}
+	}
+
+	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+}
+
 const struct iommu_ops intel_iommu_ops = {
 	.capable		= intel_iommu_capable,
 	.domain_alloc		= intel_iommu_domain_alloc,
@@ -4724,6 +4748,7 @@ const struct iommu_ops intel_iommu_ops = {
 	.dev_disable_feat	= intel_iommu_dev_disable_feat,
 	.is_attach_deferred	= intel_iommu_is_attach_deferred,
 	.def_domain_type	= device_def_domain_type,
+	.remove_dev_pasid	= intel_iommu_remove_dev_pasid,
 	.pgsize_bitmap		= SZ_4K,
 #ifdef CONFIG_INTEL_IOMMU_SVM
 	.sva_bind		= intel_svm_bind,
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 33e5bcaf2a6cf..252fa344f88a2 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -753,6 +753,8 @@ void intel_svm_unbind(struct iommu_sva *handle);
 u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg);
+struct iommu_domain *intel_svm_domain_alloc(void);
+void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
 
 struct intel_svm_dev {
 	struct list_head list;
@@ -777,6 +779,14 @@ struct intel_svm {
 };
 #else
 static inline void intel_svm_check(struct intel_iommu *iommu) {}
+static inline struct iommu_domain *intel_svm_domain_alloc(void)
+{
+	return NULL;
+}
+
+static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+}
 #endif
 
 #ifdef CONFIG_INTEL_IOMMU_DEBUGFS
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 94bc47b68c935..86c8ea0d9635e 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -882,3 +882,50 @@ int intel_svm_page_response(struct device *dev,
 out:
 	return ret;
 }
+
+void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+	mutex_lock(&pasid_mutex);
+	intel_svm_unbind_mm(dev, pasid);
+	mutex_unlock(&pasid_mutex);
+}
+
+static int intel_svm_set_dev_pasid(struct iommu_domain *domain,
+				   struct device *dev, ioasid_t pasid)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct intel_iommu *iommu = info->iommu;
+	struct mm_struct *mm = domain->mm;
+	struct iommu_sva *sva;
+	int ret = 0;
+
+	mutex_lock(&pasid_mutex);
+	sva = intel_svm_bind_mm(iommu, dev, mm);
+	if (IS_ERR(sva))
+		ret = PTR_ERR(sva);
+	mutex_unlock(&pasid_mutex);
+
+	return ret;
+}
+
+static void intel_svm_domain_free(struct iommu_domain *domain)
+{
+	kfree(to_dmar_domain(domain));
+}
+
+static const struct iommu_domain_ops intel_svm_domain_ops = {
+	.set_dev_pasid		= intel_svm_set_dev_pasid,
+	.free			= intel_svm_domain_free
+};
+
+struct iommu_domain *intel_svm_domain_alloc(void)
+{
+	struct dmar_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return NULL;
+	domain->domain.ops = &intel_svm_domain_ops;
+
+	return &domain->domain;
+}
-- 
GitLab


From 386fa64fd52baadb849ed60c78b024fd1618278e Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:12 +0800
Subject: [PATCH 0887/4122] arm-smmu-v3/sva: Add SVA domain support

Add support for SVA domain allocation and provide an SVA-specific
iommu_domain_ops. This implementation is based on the existing SVA
code. Possible cleanup and refactoring are left for incremental
changes later.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Link: https://lore.kernel.org/r/20221031005917.45690-9-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 61 +++++++++++++++++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   | 15 +++++
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 14 +++++
 3 files changed, 90 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 8fcf0df4bd0ea..2d188d12419ec 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -549,3 +549,64 @@ void arm_smmu_sva_notifier_synchronize(void)
 	 */
 	mmu_notifier_synchronize();
 }
+
+void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
+				   struct device *dev, ioasid_t id)
+{
+	struct mm_struct *mm = domain->mm;
+	struct arm_smmu_bond *bond = NULL, *t;
+	struct arm_smmu_master *master = dev_iommu_priv_get(dev);
+
+	mutex_lock(&sva_lock);
+	list_for_each_entry(t, &master->bonds, list) {
+		if (t->mm == mm) {
+			bond = t;
+			break;
+		}
+	}
+
+	if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+		list_del(&bond->list);
+		arm_smmu_mmu_notifier_put(bond->smmu_mn);
+		kfree(bond);
+	}
+	mutex_unlock(&sva_lock);
+}
+
+static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
+				      struct device *dev, ioasid_t id)
+{
+	int ret = 0;
+	struct iommu_sva *handle;
+	struct mm_struct *mm = domain->mm;
+
+	mutex_lock(&sva_lock);
+	handle = __arm_smmu_sva_bind(dev, mm);
+	if (IS_ERR(handle))
+		ret = PTR_ERR(handle);
+	mutex_unlock(&sva_lock);
+
+	return ret;
+}
+
+static void arm_smmu_sva_domain_free(struct iommu_domain *domain)
+{
+	kfree(domain);
+}
+
+static const struct iommu_domain_ops arm_smmu_sva_domain_ops = {
+	.set_dev_pasid		= arm_smmu_sva_set_dev_pasid,
+	.free			= arm_smmu_sva_domain_free
+};
+
+struct iommu_domain *arm_smmu_sva_domain_alloc(void)
+{
+	struct iommu_domain *domain;
+
+	domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+	if (!domain)
+		return NULL;
+	domain->ops = &arm_smmu_sva_domain_ops;
+
+	return domain;
+}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 21cb13da122c8..eed2eb8effa3f 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2009,6 +2009,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type)
 {
 	struct arm_smmu_domain *smmu_domain;
 
+	if (type == IOMMU_DOMAIN_SVA)
+		return arm_smmu_sva_domain_alloc();
+
 	if (type != IOMMU_DOMAIN_UNMANAGED &&
 	    type != IOMMU_DOMAIN_DMA &&
 	    type != IOMMU_DOMAIN_DMA_FQ &&
@@ -2838,6 +2841,17 @@ static int arm_smmu_def_domain_type(struct device *dev)
 	return 0;
 }
 
+static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
+{
+	struct iommu_domain *domain;
+
+	domain = iommu_get_domain_for_dev_pasid(dev, pasid, IOMMU_DOMAIN_SVA);
+	if (WARN_ON(IS_ERR(domain)) || !domain)
+		return;
+
+	arm_smmu_sva_remove_dev_pasid(domain, dev, pasid);
+}
+
 static struct iommu_ops arm_smmu_ops = {
 	.capable		= arm_smmu_capable,
 	.domain_alloc		= arm_smmu_domain_alloc,
@@ -2846,6 +2860,7 @@ static struct iommu_ops arm_smmu_ops = {
 	.device_group		= arm_smmu_device_group,
 	.of_xlate		= arm_smmu_of_xlate,
 	.get_resv_regions	= arm_smmu_get_resv_regions,
+	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
 	.sva_bind		= arm_smmu_sva_bind,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index d2ba86470c423..5aa853e98d388 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -758,6 +758,9 @@ struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm);
 void arm_smmu_sva_unbind(struct iommu_sva *handle);
 u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
+struct iommu_domain *arm_smmu_sva_domain_alloc(void);
+void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
+				   struct device *dev, ioasid_t id);
 #else /* CONFIG_ARM_SMMU_V3_SVA */
 static inline bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
 {
@@ -803,5 +806,16 @@ static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
 }
 
 static inline void arm_smmu_sva_notifier_synchronize(void) {}
+
+static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void)
+{
+	return NULL;
+}
+
+static inline void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
+						 struct device *dev,
+						 ioasid_t id)
+{
+}
 #endif /* CONFIG_ARM_SMMU_V3_SVA */
 #endif /* _ARM_SMMU_V3_H */
-- 
GitLab


From be51b1d6bbff48c7d1943a8ff1e5a55777807f6e Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:13 +0800
Subject: [PATCH 0888/4122] iommu/sva: Refactoring
 iommu_sva_bind/unbind_device()

The existing iommu SVA interfaces are implemented by calling the SVA
specific iommu ops provided by the IOMMU drivers. There's no need for
any SVA specific ops in iommu_ops vector anymore as we can achieve
this through the generic attach/detach_dev_pasid domain ops.

This refactors the IOMMU SVA interfaces implementation by using the
iommu_attach/detach_device_pasid interfaces and align them with the
concept of the SVA iommu domain. Put the new SVA code in the SVA
related file in order to make it self-contained.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-10-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu-sva-lib.c | 111 ++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu.c         |  91 ----------------------------
 include/linux/iommu.h         |  43 +++++++------
 3 files changed, 134 insertions(+), 111 deletions(-)

diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index 1065061438960..e425573a17875 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -4,6 +4,7 @@
  */
 #include <linux/mutex.h>
 #include <linux/sched/mm.h>
+#include <linux/iommu.h>
 
 #include "iommu-sva-lib.h"
 
@@ -69,3 +70,113 @@ struct mm_struct *iommu_sva_find(ioasid_t pasid)
 	return ioasid_find(&iommu_sva_pasid, pasid, __mmget_not_zero);
 }
 EXPORT_SYMBOL_GPL(iommu_sva_find);
+
+/**
+ * iommu_sva_bind_device() - Bind a process address space to a device
+ * @dev: the device
+ * @mm: the mm to bind, caller must hold a reference to mm_users
+ *
+ * Create a bond between device and address space, allowing the device to
+ * access the mm using the PASID returned by iommu_sva_get_pasid(). If a
+ * bond already exists between @device and @mm, an additional internal
+ * reference is taken. Caller must call iommu_sva_unbind_device()
+ * to release each reference.
+ *
+ * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
+ * initialize the required SVA features.
+ *
+ * On error, returns an ERR_PTR value.
+ */
+struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
+{
+	struct iommu_domain *domain;
+	struct iommu_sva *handle;
+	ioasid_t max_pasids;
+	int ret;
+
+	max_pasids = dev->iommu->max_pasids;
+	if (!max_pasids)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/* Allocate mm->pasid if necessary. */
+	ret = iommu_sva_alloc_pasid(mm, 1, max_pasids - 1);
+	if (ret)
+		return ERR_PTR(ret);
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	mutex_lock(&iommu_sva_lock);
+	/* Search for an existing domain. */
+	domain = iommu_get_domain_for_dev_pasid(dev, mm->pasid,
+						IOMMU_DOMAIN_SVA);
+	if (IS_ERR(domain)) {
+		ret = PTR_ERR(domain);
+		goto out_unlock;
+	}
+
+	if (domain) {
+		domain->users++;
+		goto out;
+	}
+
+	/* Allocate a new domain and set it on device pasid. */
+	domain = iommu_sva_domain_alloc(dev, mm);
+	if (!domain) {
+		ret = -ENOMEM;
+		goto out_unlock;
+	}
+
+	ret = iommu_attach_device_pasid(domain, dev, mm->pasid);
+	if (ret)
+		goto out_free_domain;
+	domain->users = 1;
+out:
+	mutex_unlock(&iommu_sva_lock);
+	handle->dev = dev;
+	handle->domain = domain;
+
+	return handle;
+
+out_free_domain:
+	iommu_domain_free(domain);
+out_unlock:
+	mutex_unlock(&iommu_sva_lock);
+	kfree(handle);
+
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
+
+/**
+ * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
+ * @handle: the handle returned by iommu_sva_bind_device()
+ *
+ * Put reference to a bond between device and address space. The device should
+ * not be issuing any more transaction for this PASID. All outstanding page
+ * requests for this PASID must have been flushed to the IOMMU.
+ */
+void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+	struct iommu_domain *domain = handle->domain;
+	ioasid_t pasid = domain->mm->pasid;
+	struct device *dev = handle->dev;
+
+	mutex_lock(&iommu_sva_lock);
+	if (--domain->users == 0) {
+		iommu_detach_device_pasid(domain, dev, pasid);
+		iommu_domain_free(domain);
+	}
+	mutex_unlock(&iommu_sva_lock);
+	kfree(handle);
+}
+EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
+
+u32 iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+	struct iommu_domain *domain = handle->domain;
+
+	return domain->mm->pasid;
+}
+EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6a1cd2018e301..c9da0a1bb3b8f 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2751,97 +2751,6 @@ int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 }
 EXPORT_SYMBOL_GPL(iommu_dev_disable_feature);
 
-/**
- * iommu_sva_bind_device() - Bind a process address space to a device
- * @dev: the device
- * @mm: the mm to bind, caller must hold a reference to it
- *
- * Create a bond between device and address space, allowing the device to access
- * the mm using the returned PASID. If a bond already exists between @device and
- * @mm, it is returned and an additional reference is taken. Caller must call
- * iommu_sva_unbind_device() to release each reference.
- *
- * iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA) must be called first, to
- * initialize the required SVA features.
- *
- * On error, returns an ERR_PTR value.
- */
-struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
-{
-	struct iommu_group *group;
-	struct iommu_sva *handle = ERR_PTR(-EINVAL);
-	const struct iommu_ops *ops = dev_iommu_ops(dev);
-
-	if (!ops->sva_bind)
-		return ERR_PTR(-ENODEV);
-
-	group = iommu_group_get(dev);
-	if (!group)
-		return ERR_PTR(-ENODEV);
-
-	/* Ensure device count and domain don't change while we're binding */
-	mutex_lock(&group->mutex);
-
-	/*
-	 * To keep things simple, SVA currently doesn't support IOMMU groups
-	 * with more than one device. Existing SVA-capable systems are not
-	 * affected by the problems that required IOMMU groups (lack of ACS
-	 * isolation, device ID aliasing and other hardware issues).
-	 */
-	if (iommu_group_device_count(group) != 1)
-		goto out_unlock;
-
-	handle = ops->sva_bind(dev, mm);
-
-out_unlock:
-	mutex_unlock(&group->mutex);
-	iommu_group_put(group);
-
-	return handle;
-}
-EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
-
-/**
- * iommu_sva_unbind_device() - Remove a bond created with iommu_sva_bind_device
- * @handle: the handle returned by iommu_sva_bind_device()
- *
- * Put reference to a bond between device and address space. The device should
- * not be issuing any more transaction for this PASID. All outstanding page
- * requests for this PASID must have been flushed to the IOMMU.
- */
-void iommu_sva_unbind_device(struct iommu_sva *handle)
-{
-	struct iommu_group *group;
-	struct device *dev = handle->dev;
-	const struct iommu_ops *ops = dev_iommu_ops(dev);
-
-	if (!ops->sva_unbind)
-		return;
-
-	group = iommu_group_get(dev);
-	if (!group)
-		return;
-
-	mutex_lock(&group->mutex);
-	ops->sva_unbind(handle);
-	mutex_unlock(&group->mutex);
-
-	iommu_group_put(group);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
-
-u32 iommu_sva_get_pasid(struct iommu_sva *handle)
-{
-	const struct iommu_ops *ops = dev_iommu_ops(handle->dev);
-
-	if (!ops->sva_get_pasid)
-		return IOMMU_PASID_INVALID;
-
-	return ops->sva_get_pasid(handle);
-}
-EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
-
 /*
  * Changes the default domain of an iommu group that has *only* one device
  *
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 776baa3759674..bee5659d07ebe 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -645,6 +645,7 @@ struct iommu_fwspec {
  */
 struct iommu_sva {
 	struct device			*dev;
+	struct iommu_domain		*domain;
 };
 
 int iommu_fwspec_init(struct device *dev, struct fwnode_handle *iommu_fwnode,
@@ -686,11 +687,6 @@ void iommu_release_device(struct device *dev);
 int iommu_dev_enable_feature(struct device *dev, enum iommu_dev_features f);
 int iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features f);
 
-struct iommu_sva *iommu_sva_bind_device(struct device *dev,
-					struct mm_struct *mm);
-void iommu_sva_unbind_device(struct iommu_sva *handle);
-u32 iommu_sva_get_pasid(struct iommu_sva *handle);
-
 int iommu_device_use_default_domain(struct device *dev);
 void iommu_device_unuse_default_domain(struct device *dev);
 
@@ -1026,21 +1022,6 @@ iommu_dev_disable_feature(struct device *dev, enum iommu_dev_features feat)
 	return -ENODEV;
 }
 
-static inline struct iommu_sva *
-iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
-{
-	return NULL;
-}
-
-static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
-{
-}
-
-static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
-{
-	return IOMMU_PASID_INVALID;
-}
-
 static inline struct iommu_fwspec *dev_iommu_fwspec_get(struct device *dev)
 {
 	return NULL;
@@ -1154,4 +1135,26 @@ static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_m
 
 #endif	/* CONFIG_IOMMU_DMA */
 
+#ifdef CONFIG_IOMMU_SVA
+struct iommu_sva *iommu_sva_bind_device(struct device *dev,
+					struct mm_struct *mm);
+void iommu_sva_unbind_device(struct iommu_sva *handle);
+u32 iommu_sva_get_pasid(struct iommu_sva *handle);
+#else
+static inline struct iommu_sva *
+iommu_sva_bind_device(struct device *dev, struct mm_struct *mm)
+{
+	return NULL;
+}
+
+static inline void iommu_sva_unbind_device(struct iommu_sva *handle)
+{
+}
+
+static inline u32 iommu_sva_get_pasid(struct iommu_sva *handle)
+{
+	return IOMMU_PASID_INVALID;
+}
+#endif /* CONFIG_IOMMU_SVA */
+
 #endif /* __LINUX_IOMMU_H */
-- 
GitLab


From 1c263576f4735e063e234fa5f43fd3046d36b5b3 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:14 +0800
Subject: [PATCH 0889/4122] iommu: Remove SVA related callbacks from iommu ops

These ops'es have been deprecated. There's no need for them anymore.
Remove them to avoid dead code.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-11-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c   | 40 ---------------
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c   |  3 --
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h   | 16 ------
 drivers/iommu/intel/iommu.c                   |  3 --
 drivers/iommu/intel/iommu.h                   |  3 --
 drivers/iommu/intel/svm.c                     | 49 -------------------
 include/linux/iommu.h                         |  7 ---
 7 files changed, 121 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 2d188d12419ec..9541afbba73c5 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -344,11 +344,6 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
 	if (!bond)
 		return ERR_PTR(-ENOMEM);
 
-	/* Allocate a PASID for this mm if necessary */
-	ret = iommu_sva_alloc_pasid(mm, 1, (1U << master->ssid_bits) - 1);
-	if (ret)
-		goto err_free_bond;
-
 	bond->mm = mm;
 	bond->sva.dev = dev;
 	refcount_set(&bond->refs, 1);
@@ -367,41 +362,6 @@ err_free_bond:
 	return ERR_PTR(ret);
 }
 
-struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
-{
-	struct iommu_sva *handle;
-	struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
-	struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
-
-	if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1)
-		return ERR_PTR(-EINVAL);
-
-	mutex_lock(&sva_lock);
-	handle = __arm_smmu_sva_bind(dev, mm);
-	mutex_unlock(&sva_lock);
-	return handle;
-}
-
-void arm_smmu_sva_unbind(struct iommu_sva *handle)
-{
-	struct arm_smmu_bond *bond = sva_to_bond(handle);
-
-	mutex_lock(&sva_lock);
-	if (refcount_dec_and_test(&bond->refs)) {
-		list_del(&bond->list);
-		arm_smmu_mmu_notifier_put(bond->smmu_mn);
-		kfree(bond);
-	}
-	mutex_unlock(&sva_lock);
-}
-
-u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
-{
-	struct arm_smmu_bond *bond = sva_to_bond(handle);
-
-	return bond->mm->pasid;
-}
-
 bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
 {
 	unsigned long reg, fld;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index eed2eb8effa3f..891e87ea54dbe 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -2863,9 +2863,6 @@ static struct iommu_ops arm_smmu_ops = {
 	.remove_dev_pasid	= arm_smmu_remove_dev_pasid,
 	.dev_enable_feat	= arm_smmu_dev_enable_feature,
 	.dev_disable_feat	= arm_smmu_dev_disable_feature,
-	.sva_bind		= arm_smmu_sva_bind,
-	.sva_unbind		= arm_smmu_sva_unbind,
-	.sva_get_pasid		= arm_smmu_sva_get_pasid,
 	.page_response		= arm_smmu_page_response,
 	.def_domain_type	= arm_smmu_def_domain_type,
 	.pgsize_bitmap		= -1UL, /* Restricted during device attach */
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 5aa853e98d388..8d772ea8a5836 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -754,9 +754,6 @@ bool arm_smmu_master_sva_enabled(struct arm_smmu_master *master);
 int arm_smmu_master_enable_sva(struct arm_smmu_master *master);
 int arm_smmu_master_disable_sva(struct arm_smmu_master *master);
 bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master);
-struct iommu_sva *arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm);
-void arm_smmu_sva_unbind(struct iommu_sva *handle);
-u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle);
 void arm_smmu_sva_notifier_synchronize(void);
 struct iommu_domain *arm_smmu_sva_domain_alloc(void);
 void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
@@ -792,19 +789,6 @@ static inline bool arm_smmu_master_iopf_supported(struct arm_smmu_master *master
 	return false;
 }
 
-static inline struct iommu_sva *
-arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
-{
-	return ERR_PTR(-ENODEV);
-}
-
-static inline void arm_smmu_sva_unbind(struct iommu_sva *handle) {}
-
-static inline u32 arm_smmu_sva_get_pasid(struct iommu_sva *handle)
-{
-	return IOMMU_PASID_INVALID;
-}
-
 static inline void arm_smmu_sva_notifier_synchronize(void) {}
 
 static inline struct iommu_domain *arm_smmu_sva_domain_alloc(void)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 7b67e431dd362..5a41b10593b7a 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4751,9 +4751,6 @@ const struct iommu_ops intel_iommu_ops = {
 	.remove_dev_pasid	= intel_iommu_remove_dev_pasid,
 	.pgsize_bitmap		= SZ_4K,
 #ifdef CONFIG_INTEL_IOMMU_SVM
-	.sva_bind		= intel_svm_bind,
-	.sva_unbind		= intel_svm_unbind,
-	.sva_get_pasid		= intel_svm_get_pasid,
 	.page_response		= intel_svm_page_response,
 #endif
 	.default_domain_ops = &(const struct iommu_domain_ops) {
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 252fa344f88a2..251a609fdce32 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -748,9 +748,6 @@ struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
 extern void intel_svm_check(struct intel_iommu *iommu);
 extern int intel_svm_enable_prq(struct intel_iommu *iommu);
 extern int intel_svm_finish_prq(struct intel_iommu *iommu);
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm);
-void intel_svm_unbind(struct iommu_sva *handle);
-u32 intel_svm_get_pasid(struct iommu_sva *handle);
 int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg);
 struct iommu_domain *intel_svm_domain_alloc(void);
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 86c8ea0d9635e..fceae93870189 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -296,14 +296,6 @@ out:
 	return 0;
 }
 
-static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm)
-{
-	ioasid_t max_pasid = dev_is_pci(dev) ?
-			pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id;
-
-	return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1);
-}
-
 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu,
 					   struct device *dev,
 					   struct mm_struct *mm)
@@ -771,47 +763,6 @@ prq_advance:
 	return IRQ_RETVAL(handled);
 }
 
-struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm)
-{
-	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
-	struct iommu_sva *sva;
-	int ret;
-
-	mutex_lock(&pasid_mutex);
-	ret = intel_svm_alloc_pasid(dev, mm);
-	if (ret) {
-		mutex_unlock(&pasid_mutex);
-		return ERR_PTR(ret);
-	}
-
-	sva = intel_svm_bind_mm(iommu, dev, mm);
-	mutex_unlock(&pasid_mutex);
-
-	return sva;
-}
-
-void intel_svm_unbind(struct iommu_sva *sva)
-{
-	struct intel_svm_dev *sdev = to_intel_svm_dev(sva);
-
-	mutex_lock(&pasid_mutex);
-	intel_svm_unbind_mm(sdev->dev, sdev->pasid);
-	mutex_unlock(&pasid_mutex);
-}
-
-u32 intel_svm_get_pasid(struct iommu_sva *sva)
-{
-	struct intel_svm_dev *sdev;
-	u32 pasid;
-
-	mutex_lock(&pasid_mutex);
-	sdev = to_intel_svm_dev(sva);
-	pasid = sdev->pasid;
-	mutex_unlock(&pasid_mutex);
-
-	return pasid;
-}
-
 int intel_svm_page_response(struct device *dev,
 			    struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index bee5659d07ebe..c337ef1c97bc8 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -227,9 +227,6 @@ struct iommu_iotlb_gather {
  *                      driver init to device driver init (default no)
  * @dev_enable/disable_feat: per device entries to enable/disable
  *                               iommu specific features.
- * @sva_bind: Bind process address space to device
- * @sva_unbind: Unbind process address space from device
- * @sva_get_pasid: Get PASID associated to a SVA handle
  * @page_response: handle page request response
  * @def_domain_type: device default domain type, return value:
  *		- IOMMU_DOMAIN_IDENTITY: must use an identity domain
@@ -263,10 +260,6 @@ struct iommu_ops {
 	int (*dev_enable_feat)(struct device *dev, enum iommu_dev_features f);
 	int (*dev_disable_feat)(struct device *dev, enum iommu_dev_features f);
 
-	struct iommu_sva *(*sva_bind)(struct device *dev, struct mm_struct *mm);
-	void (*sva_unbind)(struct iommu_sva *handle);
-	u32 (*sva_get_pasid)(struct iommu_sva *handle);
-
 	int (*page_response)(struct device *dev,
 			     struct iommu_fault_event *evt,
 			     struct iommu_page_response *msg);
-- 
GitLab


From 8cc93159f91960b4812ea48887e9e7501babc95a Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:15 +0800
Subject: [PATCH 0890/4122] iommu: Prepare IOMMU domain for IOPF

This adds some mechanisms around the iommu_domain so that the I/O page
fault handling framework could route a page fault to the domain and
call the fault handler from it.

Add pointers to the page fault handler and its private data in struct
iommu_domain. The fault handler will be called with the private data
as a parameter once a page fault is routed to the domain. Any kernel
component which owns an iommu domain could install handler and its
private parameter so that the page fault could be further routed and
handled.

This also prepares the SVA implementation to be the first consumer of
the per-domain page fault handling model. The I/O page fault handler
for SVA is copied to the SVA file with mmget_not_zero() added before
mmap_read_lock().

Suggested-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-12-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgfault.c    |  7 +++++
 drivers/iommu/iommu-sva-lib.c | 58 +++++++++++++++++++++++++++++++++++
 drivers/iommu/iommu-sva-lib.h |  8 +++++
 drivers/iommu/iommu.c         |  4 +++
 include/linux/iommu.h         |  3 ++
 5 files changed, 80 insertions(+)

diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index 1df8c1dcae776..aee9e033012f0 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -181,6 +181,13 @@ static void iopf_handle_group(struct work_struct *work)
  * request completes, outstanding faults will have been dealt with by the time
  * the PASID is freed.
  *
+ * Any valid page fault will be eventually routed to an iommu domain and the
+ * page fault handler installed there will get called. The users of this
+ * handling framework should guarantee that the iommu domain could only be
+ * freed after the device has stopped generating page faults (or the iommu
+ * hardware has been set to block the page faults) and the pending page faults
+ * have been flushed.
+ *
  * Return: 0 on success and <0 on error.
  */
 int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva-lib.c
index e425573a17875..089fd61ff4538 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva-lib.c
@@ -180,3 +180,61 @@ u32 iommu_sva_get_pasid(struct iommu_sva *handle)
 	return domain->mm->pasid;
 }
 EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
+
+/*
+ * I/O page fault handler for SVA
+ */
+enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+	vm_fault_t ret;
+	struct vm_area_struct *vma;
+	struct mm_struct *mm = data;
+	unsigned int access_flags = 0;
+	unsigned int fault_flags = FAULT_FLAG_REMOTE;
+	struct iommu_fault_page_request *prm = &fault->prm;
+	enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
+
+	if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
+		return status;
+
+	if (!mmget_not_zero(mm))
+		return status;
+
+	mmap_read_lock(mm);
+
+	vma = find_extend_vma(mm, prm->addr);
+	if (!vma)
+		/* Unmapped area */
+		goto out_put_mm;
+
+	if (prm->perm & IOMMU_FAULT_PERM_READ)
+		access_flags |= VM_READ;
+
+	if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
+		access_flags |= VM_WRITE;
+		fault_flags |= FAULT_FLAG_WRITE;
+	}
+
+	if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
+		access_flags |= VM_EXEC;
+		fault_flags |= FAULT_FLAG_INSTRUCTION;
+	}
+
+	if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
+		fault_flags |= FAULT_FLAG_USER;
+
+	if (access_flags & ~vma->vm_flags)
+		/* Access fault */
+		goto out_put_mm;
+
+	ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
+	status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
+		IOMMU_PAGE_RESP_SUCCESS;
+
+out_put_mm:
+	mmap_read_unlock(mm);
+	mmput(mm);
+
+	return status;
+}
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva-lib.h
index 8909ea1094e3a..1b3ace4b5863a 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva-lib.h
@@ -26,6 +26,8 @@ int iopf_queue_flush_dev(struct device *dev);
 struct iopf_queue *iopf_queue_alloc(const char *name);
 void iopf_queue_free(struct iopf_queue *queue);
 int iopf_queue_discard_partial(struct iopf_queue *queue);
+enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data);
 
 #else /* CONFIG_IOMMU_SVA */
 static inline int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
@@ -63,5 +65,11 @@ static inline int iopf_queue_discard_partial(struct iopf_queue *queue)
 {
 	return -ENODEV;
 }
+
+static inline enum iommu_page_response_code
+iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
+{
+	return IOMMU_PAGE_RESP_INVALID;
+}
 #endif /* CONFIG_IOMMU_SVA */
 #endif /* _IOMMU_SVA_LIB_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index c9da0a1bb3b8f..9e0fb18e1b346 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -33,6 +33,8 @@
 
 #include "dma-iommu.h"
 
+#include "iommu-sva-lib.h"
+
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 
@@ -3309,6 +3311,8 @@ struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
 	domain->type = IOMMU_DOMAIN_SVA;
 	mmgrab(mm);
 	domain->mm = mm;
+	domain->iopf_handler = iommu_sva_handle_iopf;
+	domain->fault_data = mm;
 
 	return domain;
 }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index c337ef1c97bc8..7d2648058e439 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -98,6 +98,9 @@ struct iommu_domain {
 	unsigned long pgsize_bitmap;	/* Bitmap of page sizes in use */
 	struct iommu_domain_geometry geometry;
 	struct iommu_dma_cookie *iova_cookie;
+	enum iommu_page_response_code (*iopf_handler)(struct iommu_fault *fault,
+						      void *data);
+	void *fault_data;
 	union {
 		struct {
 			iommu_fault_handler_t handler;
-- 
GitLab


From 4bb4211e48fbfb392bb07168b75b1a92832b62f5 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:16 +0800
Subject: [PATCH 0891/4122] iommu: Per-domain I/O page fault handling

Tweak the I/O page fault handling framework to route the page faults to
the domain and call the page fault handler retrieved from the domain.
This makes the I/O page fault handling framework possible to serve more
usage scenarios as long as they have an IOMMU domain and install a page
fault handler in it. Some unused functions are also removed to avoid
dead code.

The iommu_get_domain_for_dev_pasid() which retrieves attached domain
for a {device, PASID} pair is used. It will be used by the page fault
handling framework which knows {device, PASID} reported from the iommu
driver. We have a guarantee that the SVA domain doesn't go away during
IOPF handling, because unbind() won't free the domain until all the
pending page requests have been flushed from the pipeline. The drivers
either call iopf_queue_flush_dev() explicitly, or in stall case, the
device driver is required to flush all DMAs including stalled
transactions before calling unbind().

This also renames iopf_handle_group() to iopf_handler() to avoid
confusing.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-13-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgfault.c | 68 +++++---------------------------------
 1 file changed, 9 insertions(+), 59 deletions(-)

diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index aee9e033012f0..d046d89cec553 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -69,69 +69,18 @@ static int iopf_complete_group(struct device *dev, struct iopf_fault *iopf,
 	return iommu_page_response(dev, &resp);
 }
 
-static enum iommu_page_response_code
-iopf_handle_single(struct iopf_fault *iopf)
-{
-	vm_fault_t ret;
-	struct mm_struct *mm;
-	struct vm_area_struct *vma;
-	unsigned int access_flags = 0;
-	unsigned int fault_flags = FAULT_FLAG_REMOTE;
-	struct iommu_fault_page_request *prm = &iopf->fault.prm;
-	enum iommu_page_response_code status = IOMMU_PAGE_RESP_INVALID;
-
-	if (!(prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID))
-		return status;
-
-	mm = iommu_sva_find(prm->pasid);
-	if (IS_ERR_OR_NULL(mm))
-		return status;
-
-	mmap_read_lock(mm);
-
-	vma = find_extend_vma(mm, prm->addr);
-	if (!vma)
-		/* Unmapped area */
-		goto out_put_mm;
-
-	if (prm->perm & IOMMU_FAULT_PERM_READ)
-		access_flags |= VM_READ;
-
-	if (prm->perm & IOMMU_FAULT_PERM_WRITE) {
-		access_flags |= VM_WRITE;
-		fault_flags |= FAULT_FLAG_WRITE;
-	}
-
-	if (prm->perm & IOMMU_FAULT_PERM_EXEC) {
-		access_flags |= VM_EXEC;
-		fault_flags |= FAULT_FLAG_INSTRUCTION;
-	}
-
-	if (!(prm->perm & IOMMU_FAULT_PERM_PRIV))
-		fault_flags |= FAULT_FLAG_USER;
-
-	if (access_flags & ~vma->vm_flags)
-		/* Access fault */
-		goto out_put_mm;
-
-	ret = handle_mm_fault(vma, prm->addr, fault_flags, NULL);
-	status = ret & VM_FAULT_ERROR ? IOMMU_PAGE_RESP_INVALID :
-		IOMMU_PAGE_RESP_SUCCESS;
-
-out_put_mm:
-	mmap_read_unlock(mm);
-	mmput(mm);
-
-	return status;
-}
-
-static void iopf_handle_group(struct work_struct *work)
+static void iopf_handler(struct work_struct *work)
 {
 	struct iopf_group *group;
+	struct iommu_domain *domain;
 	struct iopf_fault *iopf, *next;
 	enum iommu_page_response_code status = IOMMU_PAGE_RESP_SUCCESS;
 
 	group = container_of(work, struct iopf_group, work);
+	domain = iommu_get_domain_for_dev_pasid(group->dev,
+				group->last_fault.fault.prm.pasid, 0);
+	if (!domain || !domain->iopf_handler)
+		status = IOMMU_PAGE_RESP_INVALID;
 
 	list_for_each_entry_safe(iopf, next, &group->faults, list) {
 		/*
@@ -139,7 +88,8 @@ static void iopf_handle_group(struct work_struct *work)
 		 * faults in the group if there is an error.
 		 */
 		if (status == IOMMU_PAGE_RESP_SUCCESS)
-			status = iopf_handle_single(iopf);
+			status = domain->iopf_handler(&iopf->fault,
+						      domain->fault_data);
 
 		if (!(iopf->fault.prm.flags &
 		      IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE))
@@ -242,7 +192,7 @@ int iommu_queue_iopf(struct iommu_fault *fault, void *cookie)
 	group->last_fault.fault = *fault;
 	INIT_LIST_HEAD(&group->faults);
 	list_add(&group->last_fault.list, &group->faults);
-	INIT_WORK(&group->work, iopf_handle_group);
+	INIT_WORK(&group->work, iopf_handler);
 
 	/* See if we have partial faults for this group */
 	list_for_each_entry_safe(iopf, next, &iopf_param->partial, list) {
-- 
GitLab


From 757636ed2607a3269cd2764e3e4a0480384c6c26 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Mon, 31 Oct 2022 08:59:17 +0800
Subject: [PATCH 0892/4122] iommu: Rename iommu-sva-lib.{c,h}

Rename iommu-sva-lib.c[h] to iommu-sva.c[h] as it contains all code
for SVA implementation in iommu core.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jean-Philippe Brucker <jean-philippe@linaro.org>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Zhangfei Gao <zhangfei.gao@linaro.org>
Tested-by: Tony Zhu <tony.zhu@intel.com>
Link: https://lore.kernel.org/r/20221031005917.45690-14-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/Makefile                          | 2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c | 2 +-
 drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c     | 2 +-
 drivers/iommu/intel/iommu.c                     | 2 +-
 drivers/iommu/intel/svm.c                       | 2 +-
 drivers/iommu/io-pgfault.c                      | 2 +-
 drivers/iommu/{iommu-sva-lib.c => iommu-sva.c}  | 2 +-
 drivers/iommu/{iommu-sva-lib.h => iommu-sva.h}  | 6 +++---
 drivers/iommu/iommu.c                           | 2 +-
 9 files changed, 11 insertions(+), 11 deletions(-)
 rename drivers/iommu/{iommu-sva-lib.c => iommu-sva.c} (99%)
 rename drivers/iommu/{iommu-sva-lib.h => iommu-sva.h} (95%)

diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index cc9f381013c35..7fbf6a3376620 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -28,6 +28,6 @@ obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
 obj-$(CONFIG_S390_IOMMU) += s390-iommu.o
 obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o
 obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o
-obj-$(CONFIG_IOMMU_SVA) += iommu-sva-lib.o io-pgfault.o
+obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o io-pgfault.o
 obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
 obj-$(CONFIG_APPLE_DART) += apple-dart.o
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 9541afbba73c5..a5a63b1c947eb 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -10,7 +10,7 @@
 #include <linux/slab.h>
 
 #include "arm-smmu-v3.h"
-#include "../../iommu-sva-lib.h"
+#include "../../iommu-sva.h"
 #include "../../io-pgtable-arm.h"
 
 struct arm_smmu_mmu_notifier {
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index 891e87ea54dbe..94a2e53368af9 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -29,7 +29,7 @@
 
 #include "arm-smmu-v3.h"
 #include "../../dma-iommu.h"
-#include "../../iommu-sva-lib.h"
+#include "../../iommu-sva.h"
 
 static bool disable_bypass = true;
 module_param(disable_bypass, bool, 0444);
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 5a41b10593b7a..a934a46bb9e65 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -27,7 +27,7 @@
 #include "iommu.h"
 #include "../dma-iommu.h"
 #include "../irq_remapping.h"
-#include "../iommu-sva-lib.h"
+#include "../iommu-sva.h"
 #include "pasid.h"
 #include "cap_audit.h"
 
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index fceae93870189..f32de15da61a2 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -24,7 +24,7 @@
 #include "iommu.h"
 #include "pasid.h"
 #include "perf.h"
-#include "../iommu-sva-lib.h"
+#include "../iommu-sva.h"
 #include "trace.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
diff --git a/drivers/iommu/io-pgfault.c b/drivers/iommu/io-pgfault.c
index d046d89cec553..e5b8b9110c132 100644
--- a/drivers/iommu/io-pgfault.c
+++ b/drivers/iommu/io-pgfault.c
@@ -11,7 +11,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 
-#include "iommu-sva-lib.h"
+#include "iommu-sva.h"
 
 /**
  * struct iopf_queue - IO Page Fault queue
diff --git a/drivers/iommu/iommu-sva-lib.c b/drivers/iommu/iommu-sva.c
similarity index 99%
rename from drivers/iommu/iommu-sva-lib.c
rename to drivers/iommu/iommu-sva.c
index 089fd61ff4538..24bf9b2b58aa6 100644
--- a/drivers/iommu/iommu-sva-lib.c
+++ b/drivers/iommu/iommu-sva.c
@@ -6,7 +6,7 @@
 #include <linux/sched/mm.h>
 #include <linux/iommu.h>
 
-#include "iommu-sva-lib.h"
+#include "iommu-sva.h"
 
 static DEFINE_MUTEX(iommu_sva_lock);
 static DECLARE_IOASID_SET(iommu_sva_pasid);
diff --git a/drivers/iommu/iommu-sva-lib.h b/drivers/iommu/iommu-sva.h
similarity index 95%
rename from drivers/iommu/iommu-sva-lib.h
rename to drivers/iommu/iommu-sva.h
index 1b3ace4b5863a..7215a761b9628 100644
--- a/drivers/iommu/iommu-sva-lib.h
+++ b/drivers/iommu/iommu-sva.h
@@ -2,8 +2,8 @@
 /*
  * SVA library for IOMMU drivers
  */
-#ifndef _IOMMU_SVA_LIB_H
-#define _IOMMU_SVA_LIB_H
+#ifndef _IOMMU_SVA_H
+#define _IOMMU_SVA_H
 
 #include <linux/ioasid.h>
 #include <linux/mm_types.h>
@@ -72,4 +72,4 @@ iommu_sva_handle_iopf(struct iommu_fault *fault, void *data)
 	return IOMMU_PAGE_RESP_INVALID;
 }
 #endif /* CONFIG_IOMMU_SVA */
-#endif /* _IOMMU_SVA_LIB_H */
+#endif /* _IOMMU_SVA_H */
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9e0fb18e1b346..c50f68b2b656e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -33,7 +33,7 @@
 
 #include "dma-iommu.h"
 
-#include "iommu-sva-lib.h"
+#include "iommu-sva.h"
 
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
-- 
GitLab


From 7eb99841f340b80be0d0973b0deb592d75fb8928 Mon Sep 17 00:00:00 2001
From: Michael Riesch <michael.riesch@wolfvision.net>
Date: Wed, 2 Nov 2022 07:35:53 +0100
Subject: [PATCH 0893/4122] iommu/rockchip: fix permission bits in page table
 entries v2

As pointed out in the corresponding downstream fix [0], the permission bits
of the page table entries are compatible between v1 and v2 of the IOMMU.
This is in contrast to the current mainline code that incorrectly assumes
that the read and write permission bits are switched. Fix the permission
bits by reusing the v1 bit defines.

[0] https://github.com/rockchip-linux/kernel/commit/e3bc123a2260145e34b57454da3db0edd117eb8e

Fixes: c55356c534aa ("iommu: rockchip: Add support for iommu v2")
Signed-off-by: Michael Riesch <michael.riesch@wolfvision.net>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20221102063553.2464161-1-michael.riesch@wolfvision.net
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/rockchip-iommu.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c
index a3fc59b814ab5..a68eadd64f38d 100644
--- a/drivers/iommu/rockchip-iommu.c
+++ b/drivers/iommu/rockchip-iommu.c
@@ -280,19 +280,17 @@ static u32 rk_mk_pte(phys_addr_t page, int prot)
  *  11:9 - Page address bit 34:32
  *   8:4 - Page address bit 39:35
  *     3 - Security
- *     2 - Readable
- *     1 - Writable
+ *     2 - Writable
+ *     1 - Readable
  *     0 - 1 if Page @ Page address is valid
  */
-#define RK_PTE_PAGE_READABLE_V2      BIT(2)
-#define RK_PTE_PAGE_WRITABLE_V2      BIT(1)
 
 static u32 rk_mk_pte_v2(phys_addr_t page, int prot)
 {
 	u32 flags = 0;
 
-	flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE_V2 : 0;
-	flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE_V2 : 0;
+	flags |= (prot & IOMMU_READ) ? RK_PTE_PAGE_READABLE : 0;
+	flags |= (prot & IOMMU_WRITE) ? RK_PTE_PAGE_WRITABLE : 0;
 
 	return rk_mk_dte_v2(page) | flags;
 }
-- 
GitLab


From fdaeb224e2bf747942653e4f70226fcfe60fbf73 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Wed, 26 Oct 2022 12:16:13 -0700
Subject: [PATCH 0894/4122] crypto: tcrypt - Use pr_cont to print test results

For some test cases, a line break gets inserted between the test banner
and the results. For example, with mode=211 this is the output:

[...]
      testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[...] test 0 (160 bit key, 16 byte blocks):
[...] 1 operation in 2373 cycles (16 bytes)

--snip--

[...]
      testing speed of gcm(aes) (generic-gcm-aesni) encryption
[...] test 0 (128 bit key, 16 byte blocks):
[...] 1 operation in 2338 cycles (16 bytes)

Similar behavior is seen in the following cases as well:

  modprobe tcrypt mode=212
  modprobe tcrypt mode=213
  modprobe tcrypt mode=221
  modprobe tcrypt mode=300 sec=1
  modprobe tcrypt mode=400 sec=1

This doesn't happen with mode=215:

[...] tcrypt:
              testing speed of multibuffer rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[...] tcrypt: test 0 (160 bit key, 16 byte blocks): 1 operation in 2215 cycles (16 bytes)

--snip--

[...] tcrypt:
              testing speed of multibuffer gcm(aes) (generic-gcm-aesni) encryption
[...] tcrypt: test 0 (128 bit key, 16 byte blocks): 1 operation in 2191 cycles (16 bytes)

This print inconsistency is because printk() is used instead of pr_cont()
in a few places. Change these to be pr_cont().

checkpatch warns that pr_cont() shouldn't be used. This can be ignored in
this context as tcrypt already uses pr_cont().

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 3f7dc94a63e0b..2822405a0d45d 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -506,8 +506,8 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen)
 
 out:
 	if (ret == 0)
-		printk("1 operation in %lu cycles (%d bytes)\n",
-		       (cycles + 4) / 8, blen);
+		pr_cont("1 operation in %lu cycles (%d bytes)\n",
+			(cycles + 4) / 8, blen);
 
 	return ret;
 }
@@ -727,8 +727,8 @@ static int test_ahash_jiffies_digest(struct ahash_request *req, int blen,
 			return ret;
 	}
 
-	printk("%6u opers/sec, %9lu bytes/sec\n",
-	       bcount / secs, ((long)bcount * blen) / secs);
+	pr_cont("%6u opers/sec, %9lu bytes/sec\n",
+		bcount / secs, ((long)bcount * blen) / secs);
 
 	return 0;
 }
-- 
GitLab


From 837a99f59043c3505d69761575172da1d09220b5 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Wed, 26 Oct 2022 12:16:14 -0700
Subject: [PATCH 0895/4122] crypto: tcrypt - Use pr_info/pr_err

Currently, there's mixed use of printk() and pr_info()/pr_err(). The latter
prints the module name (because pr_fmt() is defined so) but the former does
not. As a result there's inconsistency in the printed output. For example:

modprobe mode=211:

[...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes)
[...] test 1 (160 bit key, 64 byte blocks): 1 operation in 2336 cycles (64 bytes)

modprobe mode=215:

[...] tcrypt: test 0 (160 bit key, 16 byte blocks): 1 operation in 2173 cycles (16 bytes)
[...] tcrypt: test 1 (160 bit key, 64 byte blocks): 1 operation in 2241 cycles (64 bytes)

Replace all instances of printk() with pr_info()/pr_err() so that the
module name is printed consistently.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 2822405a0d45d..78b236bc9cd81 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -575,8 +575,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 	}
 
 	crypto_init_wait(&wait);
-	printk(KERN_INFO "\ntesting speed of %s (%s) %s\n", algo,
-			get_driver_name(crypto_aead, tfm), e);
+	pr_info("\ntesting speed of %s (%s) %s\n", algo,
+		get_driver_name(crypto_aead, tfm), e);
 
 	req = aead_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
@@ -624,8 +624,8 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 				memset(iv, 0xff, iv_len);
 
 			crypto_aead_clear_flags(tfm, ~0);
-			printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ",
-					i, *keysize * 8, bs);
+			pr_info("test %u (%d bit key, %d byte blocks): ",
+				i, *keysize * 8, bs);
 
 			memset(tvmem[0], 0xff, PAGE_SIZE);
 
@@ -877,8 +877,8 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
 		return;
 	}
 
-	printk(KERN_INFO "\ntesting speed of async %s (%s)\n", algo,
-			get_driver_name(crypto_ahash, tfm));
+	pr_info("\ntesting speed of async %s (%s)\n", algo,
+		get_driver_name(crypto_ahash, tfm));
 
 	if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) {
 		pr_err("digestsize(%u) > %d\n", crypto_ahash_digestsize(tfm),
@@ -2885,7 +2885,7 @@ static int __init tcrypt_mod_init(void)
 	err = do_test(alg, type, mask, mode, num_mb);
 
 	if (err) {
-		printk(KERN_ERR "tcrypt: one or more tests failed!\n");
+		pr_err("one or more tests failed!\n");
 		goto err_free_tv;
 	} else {
 		pr_debug("all tests passed\n");
-- 
GitLab


From a2ef563000aff04352a4aded7b2d2bf19a1674bf Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Wed, 26 Oct 2022 12:16:15 -0700
Subject: [PATCH 0896/4122] crypto: tcrypt - Drop module name from print string

The pr_fmt() define includes KBUILD_MODNAME, and so there's no need
for pr_err() to also print it. Drop module name from the print string.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 78b236bc9cd81..40e72ec0f5378 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1329,8 +1329,7 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
 
 	req = skcipher_request_alloc(tfm, GFP_KERNEL);
 	if (!req) {
-		pr_err("tcrypt: skcipher: Failed to allocate request for %s\n",
-		       algo);
+		pr_err("skcipher: Failed to allocate request for %s\n", algo);
 		goto out;
 	}
 
-- 
GitLab


From 3513828cb8f6db714ee48b1559e6253a57ecf1f6 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Wed, 26 Oct 2022 12:16:16 -0700
Subject: [PATCH 0897/4122] crypto: tcrypt - Drop leading newlines from prints

The top level print banners have a leading newline. It's not entirely
clear why this exists, but it makes it harder to parse tcrypt test output
using a script. Drop said newlines.

tcrypt output before this patch:

[...]
      testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes)

tcrypt output with this patch:

[...] testing speed of rfc4106(gcm(aes)) (rfc4106-gcm-aesni) encryption
[...] test 0 (160 bit key, 16 byte blocks): 1 operation in 2320 cycles (16 bytes)

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 40e72ec0f5378..b096ae901aa80 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -324,7 +324,7 @@ static void test_mb_aead_speed(const char *algo, int enc, int secs,
 					  crypto_req_done, &data[i].wait);
 	}
 
-	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+	pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
 		get_driver_name(crypto_aead, tfm), e);
 
 	i = 0;
@@ -575,7 +575,7 @@ static void test_aead_speed(const char *algo, int enc, unsigned int secs,
 	}
 
 	crypto_init_wait(&wait);
-	pr_info("\ntesting speed of %s (%s) %s\n", algo,
+	pr_info("testing speed of %s (%s) %s\n", algo,
 		get_driver_name(crypto_aead, tfm), e);
 
 	req = aead_request_alloc(tfm, GFP_KERNEL);
@@ -877,7 +877,7 @@ static void test_ahash_speed_common(const char *algo, unsigned int secs,
 		return;
 	}
 
-	pr_info("\ntesting speed of async %s (%s)\n", algo,
+	pr_info("testing speed of async %s (%s)\n", algo,
 		get_driver_name(crypto_ahash, tfm));
 
 	if (crypto_ahash_digestsize(tfm) > MAX_DIGEST_SIZE) {
@@ -1117,7 +1117,7 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
 		crypto_init_wait(&data[i].wait);
 	}
 
-	pr_info("\ntesting speed of multibuffer %s (%s) %s\n", algo,
+	pr_info("testing speed of multibuffer %s (%s) %s\n", algo,
 		get_driver_name(crypto_skcipher, tfm), e);
 
 	i = 0;
@@ -1324,7 +1324,7 @@ static void test_skcipher_speed(const char *algo, int enc, unsigned int secs,
 		return;
 	}
 
-	pr_info("\ntesting speed of %s %s (%s) %s\n", async ? "async" : "sync",
+	pr_info("testing speed of %s %s (%s) %s\n", async ? "async" : "sync",
 		algo, get_driver_name(crypto_skcipher, tfm), e);
 
 	req = skcipher_request_alloc(tfm, GFP_KERNEL);
-- 
GitLab


From e1fa51aa2b04830495c1fc9d3d5dee4a4419b70d Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:53 +0800
Subject: [PATCH 0898/4122] crypto: arm64/sm3 - raise the priority of the CE
 implementation

Raise the priority of the sm3-ce algorithm from 200 to 400, this is
to make room for the implementation of sm3-neon.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm3-ce-glue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c
index ee98954ae8ca6..54bf6ebcfffb1 100644
--- a/arch/arm64/crypto/sm3-ce-glue.c
+++ b/arch/arm64/crypto/sm3-ce-glue.c
@@ -84,7 +84,7 @@ static struct shash_alg sm3_alg = {
 	.base.cra_driver_name	= "sm3-ce",
 	.base.cra_blocksize	= SM3_BLOCK_SIZE,
 	.base.cra_module	= THIS_MODULE,
-	.base.cra_priority	= 200,
+	.base.cra_priority	= 400,
 };
 
 static int __init sm3_ce_mod_init(void)
-- 
GitLab


From a41b2129461f6c88e087ca9a6e2fde34cb6deb48 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:54 +0800
Subject: [PATCH 0899/4122] crypto: arm64/sm3 - add NEON assembly
 implementation

This patch adds the NEON acceleration implementation of the SM3 hash
algorithm. The main algorithm is based on SM3 NEON accelerated work of
the libgcrypt project.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 326 mode
of tcrypt, and compares the performance data of sm3-generic and sm3-ce.
The abscissas are blocks of different lengths. The data is tabulated and
the unit is Mb/s:

update-size    |      16      64     256    1024    2048    4096    8192
---------------+--------------------------------------------------------
sm3-generic    |  185.24  221.28  301.26  307.43  300.83  308.82  308.91
sm3-neon       |  171.81  220.20  322.94  339.28  334.09  343.61  343.87
sm3-ce         |  227.48  333.48  502.62  527.87  520.45  534.91  535.40

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig         |  11 +
 arch/arm64/crypto/Makefile        |   3 +
 arch/arm64/crypto/sm3-neon-core.S | 600 ++++++++++++++++++++++++++++++
 arch/arm64/crypto/sm3-neon-glue.c | 103 +++++
 4 files changed, 717 insertions(+)
 create mode 100644 arch/arm64/crypto/sm3-neon-core.S
 create mode 100644 arch/arm64/crypto/sm3-neon-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 8bd80508a710d..4b121dc0cfba2 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -96,6 +96,17 @@ config CRYPTO_SHA3_ARM64
 	  Architecture: arm64 using:
 	  - ARMv8.2 Crypto Extensions
 
+config CRYPTO_SM3_NEON
+	tristate "Hash functions: SM3 (NEON)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_HASH
+	select CRYPTO_SM3
+	help
+	  SM3 (ShangMi 3) secure hash function (OSCCA GM/T 0004-2012)
+
+	  Architecture: arm64 using:
+	  - NEON (Advanced SIMD) extensions
+
 config CRYPTO_SM3_ARM64_CE
 	tristate "Hash functions: SM3 (ARMv8.2 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 24bb0c4610de2..087f1625e7751 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -17,6 +17,9 @@ sha512-ce-y := sha512-ce-glue.o sha512-ce-core.o
 obj-$(CONFIG_CRYPTO_SHA3_ARM64) += sha3-ce.o
 sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o
 
+obj-$(CONFIG_CRYPTO_SM3_NEON) += sm3-neon.o
+sm3-neon-y := sm3-neon-glue.o sm3-neon-core.o
+
 obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o
 sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o
 
diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S
new file mode 100644
index 0000000000000..3e3b4e5c736fc
--- /dev/null
+++ b/arch/arm64/crypto/sm3-neon-core.S
@@ -0,0 +1,600 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sm3-neon-core.S - SM3 secure hash using NEON instructions
+ *
+ * Linux/arm64 port of the libgcrypt SM3 implementation for AArch64
+ *
+ * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (c) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+/* Context structure */
+
+#define state_h0 0
+#define state_h1 4
+#define state_h2 8
+#define state_h3 12
+#define state_h4 16
+#define state_h5 20
+#define state_h6 24
+#define state_h7 28
+
+/* Stack structure */
+
+#define STACK_W_SIZE        (32 * 2 * 3)
+
+#define STACK_W             (0)
+#define STACK_SIZE          (STACK_W + STACK_W_SIZE)
+
+/* Register macros */
+
+#define RSTATE x0
+#define RDATA  x1
+#define RNBLKS x2
+#define RKPTR  x28
+#define RFRAME x29
+
+#define ra w3
+#define rb w4
+#define rc w5
+#define rd w6
+#define re w7
+#define rf w8
+#define rg w9
+#define rh w10
+
+#define t0 w11
+#define t1 w12
+#define t2 w13
+#define t3 w14
+#define t4 w15
+#define t5 w16
+#define t6 w17
+
+#define k_even w19
+#define k_odd w20
+
+#define addr0 x21
+#define addr1 x22
+
+#define s0 w23
+#define s1 w24
+#define s2 w25
+#define s3 w26
+
+#define W0 v0
+#define W1 v1
+#define W2 v2
+#define W3 v3
+#define W4 v4
+#define W5 v5
+
+#define XTMP0 v6
+#define XTMP1 v7
+#define XTMP2 v16
+#define XTMP3 v17
+#define XTMP4 v18
+#define XTMP5 v19
+#define XTMP6 v20
+
+/* Helper macros. */
+
+#define _(...) /*_*/
+
+#define clear_vec(x) \
+	movi	x.8h, #0;
+
+#define rolw(o, a, n) \
+	ror	o, a, #(32 - n);
+
+/* Round function macros. */
+
+#define GG1_1(x, y, z, o, t) \
+	eor	o, x, y;
+#define GG1_2(x, y, z, o, t) \
+	eor	o, o, z;
+#define GG1_3(x, y, z, o, t)
+
+#define FF1_1(x, y, z, o, t) GG1_1(x, y, z, o, t)
+#define FF1_2(x, y, z, o, t)
+#define FF1_3(x, y, z, o, t) GG1_2(x, y, z, o, t)
+
+#define GG2_1(x, y, z, o, t) \
+	bic	o, z, x;
+#define GG2_2(x, y, z, o, t) \
+	and	t, y, x;
+#define GG2_3(x, y, z, o, t) \
+	eor	o, o, t;
+
+#define FF2_1(x, y, z, o, t) \
+	eor	o, x, y;
+#define FF2_2(x, y, z, o, t) \
+	and	t, x, y; \
+	and	o, o, z;
+#define FF2_3(x, y, z, o, t) \
+	eor	o, o, t;
+
+#define R(i, a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+	K_LOAD(round);                                                        \
+	ldr	t5, [sp, #(wtype##_W1_ADDR(round, widx))];                    \
+	rolw(t0, a, 12);                              /* rol(a, 12) => t0 */  \
+      IOP(1, iop_param);                                                      \
+	FF##i##_1(a, b, c, t1, t2);                                           \
+	ldr	t6, [sp, #(wtype##_W1W2_ADDR(round, widx))];                  \
+	add	k, k, e;                                                      \
+      IOP(2, iop_param);                                                      \
+	GG##i##_1(e, f, g, t3, t4);                                           \
+	FF##i##_2(a, b, c, t1, t2);                                           \
+      IOP(3, iop_param);                                                      \
+	add	k, k, t0;                                                     \
+	add	h, h, t5;                                                     \
+	add	d, d, t6;                     /* w1w2 + d => d */             \
+      IOP(4, iop_param);                                                      \
+	rolw(k, k, 7);                        /* rol (t0 + e + t), 7) => k */ \
+	GG##i##_2(e, f, g, t3, t4);                                           \
+	add	h, h, k;                      /* h + w1 + k => h */           \
+      IOP(5, iop_param);                                                      \
+	FF##i##_3(a, b, c, t1, t2);                                           \
+	eor	t0, t0, k;                    /* k ^ t0 => t0 */              \
+	GG##i##_3(e, f, g, t3, t4);                                           \
+	add	d, d, t1;                     /* FF(a,b,c) + d => d */        \
+      IOP(6, iop_param);                                                      \
+	add	t3, t3, h;                    /* GG(e,f,g) + h => t3 */       \
+	rolw(b, b, 9);                        /* rol(b, 9) => b */            \
+	eor	h, t3, t3, ror #(32-9);                                       \
+      IOP(7, iop_param);                                                      \
+	add	d, d, t0;                     /* t0 + d => d */               \
+	rolw(f, f, 19);                       /* rol(f, 19) => f */           \
+      IOP(8, iop_param);                                                      \
+	eor	h, h, t3, ror #(32-17);       /* P0(t3) => h */
+
+#define R1(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+	R(1, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
+
+#define R2(a, b, c, d, e, f, g, h, k, K_LOAD, round, widx, wtype, IOP, iop_param) \
+	R(2, ##a, ##b, ##c, ##d, ##e, ##f, ##g, ##h, ##k, K_LOAD, round, widx, wtype, IOP, iop_param)
+
+#define KL(round) \
+	ldp	k_even, k_odd, [RKPTR, #(4*(round))];
+
+/* Input expansion macros. */
+
+/* Byte-swapped input address. */
+#define IW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))
+
+/* Expanded input address. */
+#define XW_W_ADDR(round, widx, offs) \
+	(STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))
+
+/* Rounds 1-12, byte-swapped input block addresses. */
+#define IW_W1_ADDR(round, widx)   IW_W_ADDR(round, widx, 32)
+#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 48)
+
+/* Rounds 1-12, expanded input block addresses. */
+#define XW_W1_ADDR(round, widx)   XW_W_ADDR(round, widx, 0)
+#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 16)
+
+/* Input block loading.
+ * Interleaving within round function needed for in-order CPUs. */
+#define LOAD_W_VEC_1_1() \
+	add	addr0, sp, #IW_W1_ADDR(0, 0);
+#define LOAD_W_VEC_1_2() \
+	add	addr1, sp, #IW_W1_ADDR(4, 0);
+#define LOAD_W_VEC_1_3() \
+	ld1	{W0.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_4() \
+	ld1	{W1.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_5() \
+	ld1	{W2.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_6() \
+	ld1	{W3.16b}, [RDATA], #16;
+#define LOAD_W_VEC_1_7() \
+	rev32	XTMP0.16b, W0.16b;
+#define LOAD_W_VEC_1_8() \
+	rev32	XTMP1.16b, W1.16b;
+#define LOAD_W_VEC_2_1() \
+	rev32	XTMP2.16b, W2.16b;
+#define LOAD_W_VEC_2_2() \
+	rev32	XTMP3.16b, W3.16b;
+#define LOAD_W_VEC_2_3() \
+	eor	XTMP4.16b, XTMP1.16b, XTMP0.16b;
+#define LOAD_W_VEC_2_4() \
+	eor	XTMP5.16b, XTMP2.16b, XTMP1.16b;
+#define LOAD_W_VEC_2_5() \
+	st1	{XTMP0.16b}, [addr0], #16;
+#define LOAD_W_VEC_2_6() \
+	st1	{XTMP4.16b}, [addr0]; \
+	add	addr0, sp, #IW_W1_ADDR(8, 0);
+#define LOAD_W_VEC_2_7() \
+	eor	XTMP6.16b, XTMP3.16b, XTMP2.16b;
+#define LOAD_W_VEC_2_8() \
+	ext	W0.16b, XTMP0.16b, XTMP0.16b, #8;  /* W0: xx, w0, xx, xx */
+#define LOAD_W_VEC_3_1() \
+	mov	W2.16b, XTMP1.16b;                 /* W2: xx, w6, w5, w4 */
+#define LOAD_W_VEC_3_2() \
+	st1	{XTMP1.16b}, [addr1], #16;
+#define LOAD_W_VEC_3_3() \
+	st1	{XTMP5.16b}, [addr1]; \
+	ext	W1.16b, XTMP0.16b, XTMP0.16b, #4;  /* W1: xx, w3, w2, w1 */
+#define LOAD_W_VEC_3_4() \
+	ext	W3.16b, XTMP1.16b, XTMP2.16b, #12; /* W3: xx, w9, w8, w7 */
+#define LOAD_W_VEC_3_5() \
+	ext	W4.16b, XTMP2.16b, XTMP3.16b, #8;  /* W4: xx, w12, w11, w10 */
+#define LOAD_W_VEC_3_6() \
+	st1	{XTMP2.16b}, [addr0], #16;
+#define LOAD_W_VEC_3_7() \
+	st1	{XTMP6.16b}, [addr0];
+#define LOAD_W_VEC_3_8() \
+	ext	W5.16b, XTMP3.16b, XTMP3.16b, #4;  /* W5: xx, w15, w14, w13 */
+
+#define LOAD_W_VEC_1(iop_num, ...) \
+	LOAD_W_VEC_1_##iop_num()
+#define LOAD_W_VEC_2(iop_num, ...) \
+	LOAD_W_VEC_2_##iop_num()
+#define LOAD_W_VEC_3(iop_num, ...) \
+	LOAD_W_VEC_3_##iop_num()
+
+/* Message scheduling. Note: 3 words per vector register.
+ * Interleaving within round function needed for in-order CPUs. */
+#define SCHED_W_1_1(round, w0, w1, w2, w3, w4, w5) \
+	/* Load (w[i - 16]) => XTMP0 */            \
+	/* Load (w[i - 13]) => XTMP5 */            \
+	ext	XTMP0.16b, w0.16b, w0.16b, #12;    /* XTMP0: w0, xx, xx, xx */
+#define SCHED_W_1_2(round, w0, w1, w2, w3, w4, w5) \
+	ext	XTMP5.16b, w1.16b, w1.16b, #12;
+#define SCHED_W_1_3(round, w0, w1, w2, w3, w4, w5) \
+	ext	XTMP0.16b, XTMP0.16b, w1.16b, #12; /* XTMP0: xx, w2, w1, w0 */
+#define SCHED_W_1_4(round, w0, w1, w2, w3, w4, w5) \
+	ext	XTMP5.16b, XTMP5.16b, w2.16b, #12;
+#define SCHED_W_1_5(round, w0, w1, w2, w3, w4, w5) \
+	/* w[i - 9] == w3 */                       \
+	/* W3 ^ XTMP0 => XTMP0 */                  \
+	eor	XTMP0.16b, XTMP0.16b, w3.16b;
+#define SCHED_W_1_6(round, w0, w1, w2, w3, w4, w5) \
+	/* w[i - 3] == w5 */                       \
+	/* rol(XMM5, 15) ^ XTMP0 => XTMP0 */       \
+	/* rol(XTMP5, 7) => XTMP1 */               \
+	add	addr0, sp, #XW_W1_ADDR((round), 0); \
+	shl	XTMP2.4s, w5.4s, #15;
+#define SCHED_W_1_7(round, w0, w1, w2, w3, w4, w5) \
+	shl	XTMP1.4s, XTMP5.4s, #7;
+#define SCHED_W_1_8(round, w0, w1, w2, w3, w4, w5) \
+	sri	XTMP2.4s, w5.4s, #(32-15);
+#define SCHED_W_2_1(round, w0, w1, w2, w3, w4, w5) \
+	sri	XTMP1.4s, XTMP5.4s, #(32-7);
+#define SCHED_W_2_2(round, w0, w1, w2, w3, w4, w5) \
+	eor	XTMP0.16b, XTMP0.16b, XTMP2.16b;
+#define SCHED_W_2_3(round, w0, w1, w2, w3, w4, w5) \
+	/* w[i - 6] == W4 */                       \
+	/* W4 ^ XTMP1 => XTMP1 */                  \
+	eor	XTMP1.16b, XTMP1.16b, w4.16b;
+#define SCHED_W_2_4(round, w0, w1, w2, w3, w4, w5) \
+	/* P1(XTMP0) ^ XTMP1 => W0 */              \
+	shl	XTMP3.4s, XTMP0.4s, #15;
+#define SCHED_W_2_5(round, w0, w1, w2, w3, w4, w5) \
+	shl	XTMP4.4s, XTMP0.4s, #23;
+#define SCHED_W_2_6(round, w0, w1, w2, w3, w4, w5) \
+	eor	w0.16b, XTMP1.16b, XTMP0.16b;
+#define SCHED_W_2_7(round, w0, w1, w2, w3, w4, w5) \
+	sri	XTMP3.4s, XTMP0.4s, #(32-15);
+#define SCHED_W_2_8(round, w0, w1, w2, w3, w4, w5) \
+	sri	XTMP4.4s, XTMP0.4s, #(32-23);
+#define SCHED_W_3_1(round, w0, w1, w2, w3, w4, w5) \
+	eor	w0.16b, w0.16b, XTMP3.16b;
+#define SCHED_W_3_2(round, w0, w1, w2, w3, w4, w5) \
+	/* Load (w[i - 3]) => XTMP2 */             \
+	ext	XTMP2.16b, w4.16b, w4.16b, #12;
+#define SCHED_W_3_3(round, w0, w1, w2, w3, w4, w5) \
+	eor	w0.16b, w0.16b, XTMP4.16b;
+#define SCHED_W_3_4(round, w0, w1, w2, w3, w4, w5) \
+	ext	XTMP2.16b, XTMP2.16b, w5.16b, #12;
+#define SCHED_W_3_5(round, w0, w1, w2, w3, w4, w5) \
+	/* W1 ^ W2 => XTMP3 */                     \
+	eor	XTMP3.16b, XTMP2.16b, w0.16b;
+#define SCHED_W_3_6(round, w0, w1, w2, w3, w4, w5)
+#define SCHED_W_3_7(round, w0, w1, w2, w3, w4, w5) \
+	st1	{XTMP2.16b-XTMP3.16b}, [addr0];
+#define SCHED_W_3_8(round, w0, w1, w2, w3, w4, w5)
+
+#define SCHED_W_W0W1W2W3W4W5_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W0, W1, W2, W3, W4, W5)
+#define SCHED_W_W0W1W2W3W4W5_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W0, W1, W2, W3, W4, W5)
+
+#define SCHED_W_W1W2W3W4W5W0_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W1, W2, W3, W4, W5, W0)
+#define SCHED_W_W1W2W3W4W5W0_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W1, W2, W3, W4, W5, W0)
+
+#define SCHED_W_W2W3W4W5W0W1_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W2, W3, W4, W5, W0, W1)
+#define SCHED_W_W2W3W4W5W0W1_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W2, W3, W4, W5, W0, W1)
+
+#define SCHED_W_W3W4W5W0W1W2_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W3, W4, W5, W0, W1, W2)
+#define SCHED_W_W3W4W5W0W1W2_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W3, W4, W5, W0, W1, W2)
+
+#define SCHED_W_W4W5W0W1W2W3_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W4, W5, W0, W1, W2, W3)
+#define SCHED_W_W4W5W0W1W2W3_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W4, W5, W0, W1, W2, W3)
+
+#define SCHED_W_W5W0W1W2W3W4_1(iop_num, round) \
+	SCHED_W_1_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_2(iop_num, round) \
+	SCHED_W_2_##iop_num(round, W5, W0, W1, W2, W3, W4)
+#define SCHED_W_W5W0W1W2W3W4_3(iop_num, round) \
+	SCHED_W_3_##iop_num(round, W5, W0, W1, W2, W3, W4)
+
+
+	/*
+	 * Transform blocks*64 bytes (blocks*16 32-bit words) at 'src'.
+	 *
+	 * void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
+	 *                         int blocks)
+	 */
+	.text
+.align 3
+SYM_FUNC_START(sm3_neon_transform)
+	ldp		ra, rb, [RSTATE, #0]
+	ldp		rc, rd, [RSTATE, #8]
+	ldp		re, rf, [RSTATE, #16]
+	ldp		rg, rh, [RSTATE, #24]
+
+	stp		x28, x29, [sp, #-16]!
+	stp		x19, x20, [sp, #-16]!
+	stp		x21, x22, [sp, #-16]!
+	stp		x23, x24, [sp, #-16]!
+	stp		x25, x26, [sp, #-16]!
+	mov		RFRAME, sp
+
+	sub		addr0, sp, #STACK_SIZE
+	adr_l		RKPTR, .LKtable
+	and		sp, addr0, #(~63)
+
+	/* Preload first block. */
+	LOAD_W_VEC_1(1, 0)
+	LOAD_W_VEC_1(2, 0)
+	LOAD_W_VEC_1(3, 0)
+	LOAD_W_VEC_1(4, 0)
+	LOAD_W_VEC_1(5, 0)
+	LOAD_W_VEC_1(6, 0)
+	LOAD_W_VEC_1(7, 0)
+	LOAD_W_VEC_1(8, 0)
+	LOAD_W_VEC_2(1, 0)
+	LOAD_W_VEC_2(2, 0)
+	LOAD_W_VEC_2(3, 0)
+	LOAD_W_VEC_2(4, 0)
+	LOAD_W_VEC_2(5, 0)
+	LOAD_W_VEC_2(6, 0)
+	LOAD_W_VEC_2(7, 0)
+	LOAD_W_VEC_2(8, 0)
+	LOAD_W_VEC_3(1, 0)
+	LOAD_W_VEC_3(2, 0)
+	LOAD_W_VEC_3(3, 0)
+	LOAD_W_VEC_3(4, 0)
+	LOAD_W_VEC_3(5, 0)
+	LOAD_W_VEC_3(6, 0)
+	LOAD_W_VEC_3(7, 0)
+	LOAD_W_VEC_3(8, 0)
+
+.balign 16
+.Loop:
+	/* Transform 0-3 */
+	R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 0, 0, IW, _, 0)
+	R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  1, 1, IW, _, 0)
+	R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 2, 2, IW, _, 0)
+	R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  3, 3, IW, _, 0)
+
+	/* Transform 4-7 + Precalc 12-14 */
+	R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 4, 0, IW, _, 0)
+	R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  5, 1, IW, _, 0)
+	R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 6, 2, IW, SCHED_W_W0W1W2W3W4W5_1, 12)
+	R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  7, 3, IW, SCHED_W_W0W1W2W3W4W5_2, 12)
+
+	/* Transform 8-11 + Precalc 12-17 */
+	R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 8, 0, IW, SCHED_W_W0W1W2W3W4W5_3, 12)
+	R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  9, 1, IW, SCHED_W_W1W2W3W4W5W0_1, 15)
+	R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 10, 2, IW, SCHED_W_W1W2W3W4W5W0_2, 15)
+	R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  11, 3, IW, SCHED_W_W1W2W3W4W5W0_3, 15)
+
+	/* Transform 12-14 + Precalc 18-20 */
+	R1(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 12, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 18)
+	R1(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  13, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 18)
+	R1(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 14, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 18)
+
+	/* Transform 15-17 + Precalc 21-23 */
+	R1(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  15, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 21)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 16, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 21)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  17, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 21)
+
+	/* Transform 18-20 + Precalc 24-26 */
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 18, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 24)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  19, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 24)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 20, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 24)
+
+	/* Transform 21-23 + Precalc 27-29 */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  21, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 27)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 22, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 27)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  23, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 27)
+
+	/* Transform 24-26 + Precalc 30-32 */
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 24, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 30)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  25, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 30)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 26, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 30)
+
+	/* Transform 27-29 + Precalc 33-35 */
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  27, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 33)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 28, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 33)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  29, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 33)
+
+	/* Transform 30-32 + Precalc 36-38 */
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 30, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 36)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  31, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 36)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 32, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 36)
+
+	/* Transform 33-35 + Precalc 39-41 */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  33, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 39)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 34, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 39)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  35, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 39)
+
+	/* Transform 36-38 + Precalc 42-44 */
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 36, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 42)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  37, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 42)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 38, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 42)
+
+	/* Transform 39-41 + Precalc 45-47 */
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  39, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 45)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 40, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 45)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  41, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 45)
+
+	/* Transform 42-44 + Precalc 48-50 */
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 42, 0, XW, SCHED_W_W0W1W2W3W4W5_1, 48)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  43, 1, XW, SCHED_W_W0W1W2W3W4W5_2, 48)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 44, 2, XW, SCHED_W_W0W1W2W3W4W5_3, 48)
+
+	/* Transform 45-47 + Precalc 51-53 */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  45, 0, XW, SCHED_W_W1W2W3W4W5W0_1, 51)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 46, 1, XW, SCHED_W_W1W2W3W4W5W0_2, 51)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  47, 2, XW, SCHED_W_W1W2W3W4W5W0_3, 51)
+
+	/* Transform 48-50 + Precalc 54-56 */
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 48, 0, XW, SCHED_W_W2W3W4W5W0W1_1, 54)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  49, 1, XW, SCHED_W_W2W3W4W5W0W1_2, 54)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 50, 2, XW, SCHED_W_W2W3W4W5W0W1_3, 54)
+
+	/* Transform 51-53 + Precalc 57-59 */
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  51, 0, XW, SCHED_W_W3W4W5W0W1W2_1, 57)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 52, 1, XW, SCHED_W_W3W4W5W0W1W2_2, 57)
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  53, 2, XW, SCHED_W_W3W4W5W0W1W2_3, 57)
+
+	/* Transform 54-56 + Precalc 60-62 */
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 54, 0, XW, SCHED_W_W4W5W0W1W2W3_1, 60)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  55, 1, XW, SCHED_W_W4W5W0W1W2W3_2, 60)
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 56, 2, XW, SCHED_W_W4W5W0W1W2W3_3, 60)
+
+	/* Transform 57-59 + Precalc 63 */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  57, 0, XW, SCHED_W_W5W0W1W2W3W4_1, 63)
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 58, 1, XW, SCHED_W_W5W0W1W2W3W4_2, 63)
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  59, 2, XW, SCHED_W_W5W0W1W2W3W4_3, 63)
+
+	/* Transform 60 */
+	R2(ra, rb, rc, rd, re, rf, rg, rh, k_even, KL, 60, 0, XW, _, _)
+	subs		RNBLKS, RNBLKS, #1
+	b.eq		.Lend
+
+	/* Transform 61-63 + Preload next block */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, LOAD_W_VEC_1, _)
+	ldp		s0, s1, [RSTATE, #0]
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, LOAD_W_VEC_2, _)
+	ldp		s2, s3, [RSTATE, #8]
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, LOAD_W_VEC_3, _)
+
+	/* Update the chaining variables. */
+	eor		ra, ra, s0
+	eor		rb, rb, s1
+	ldp		s0, s1, [RSTATE, #16]
+	eor		rc, rc, s2
+	ldp		k_even, k_odd, [RSTATE, #24]
+	eor		rd, rd, s3
+	eor		re, re, s0
+	stp		ra, rb, [RSTATE, #0]
+	eor		rf, rf, s1
+	stp		rc, rd, [RSTATE, #8]
+	eor		rg, rg, k_even
+	stp		re, rf, [RSTATE, #16]
+	eor		rh, rh, k_odd
+	stp		rg, rh, [RSTATE, #24]
+	b		.Loop
+
+.Lend:
+	/* Transform 61-63 */
+	R2(rd, ra, rb, rc, rh, re, rf, rg, k_odd,  _,  61, 1, XW, _, _)
+	ldp		s0, s1, [RSTATE, #0]
+	R2(rc, rd, ra, rb, rg, rh, re, rf, k_even, KL, 62, 2, XW, _, _)
+	ldp		s2, s3, [RSTATE, #8]
+	R2(rb, rc, rd, ra, rf, rg, rh, re, k_odd,  _,  63, 0, XW, _, _)
+
+	/* Update the chaining variables. */
+	eor		ra, ra, s0
+	clear_vec(W0)
+	eor		rb, rb, s1
+	clear_vec(W1)
+	ldp		s0, s1, [RSTATE, #16]
+	clear_vec(W2)
+	eor		rc, rc, s2
+	clear_vec(W3)
+	ldp		k_even, k_odd, [RSTATE, #24]
+	clear_vec(W4)
+	eor		rd, rd, s3
+	clear_vec(W5)
+	eor		re, re, s0
+	clear_vec(XTMP0)
+	stp		ra, rb, [RSTATE, #0]
+	clear_vec(XTMP1)
+	eor		rf, rf, s1
+	clear_vec(XTMP2)
+	stp		rc, rd, [RSTATE, #8]
+	clear_vec(XTMP3)
+	eor		rg, rg, k_even
+	clear_vec(XTMP4)
+	stp		re, rf, [RSTATE, #16]
+	clear_vec(XTMP5)
+	eor		rh, rh, k_odd
+	clear_vec(XTMP6)
+	stp		rg, rh, [RSTATE, #24]
+
+	/* Clear message expansion area */
+	add		addr0, sp, #STACK_W
+	st1		{W0.16b-W3.16b}, [addr0], #64
+	st1		{W0.16b-W3.16b}, [addr0], #64
+	st1		{W0.16b-W3.16b}, [addr0]
+
+	mov		sp, RFRAME
+
+	ldp		x25, x26, [sp], #16
+	ldp		x23, x24, [sp], #16
+	ldp		x21, x22, [sp], #16
+	ldp		x19, x20, [sp], #16
+	ldp		x28, x29, [sp], #16
+
+	ret
+SYM_FUNC_END(sm3_neon_transform)
+
+
+	.section	".rodata", "a"
+
+	.align 4
+.LKtable:
+	.long 0x79cc4519, 0xf3988a32, 0xe7311465, 0xce6228cb
+	.long 0x9cc45197, 0x3988a32f, 0x7311465e, 0xe6228cbc
+	.long 0xcc451979, 0x988a32f3, 0x311465e7, 0x6228cbce
+	.long 0xc451979c, 0x88a32f39, 0x11465e73, 0x228cbce6
+	.long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+	.long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+	.long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+	.long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
+	.long 0x7a879d8a, 0xf50f3b14, 0xea1e7629, 0xd43cec53
+	.long 0xa879d8a7, 0x50f3b14f, 0xa1e7629e, 0x43cec53d
+	.long 0x879d8a7a, 0x0f3b14f5, 0x1e7629ea, 0x3cec53d4
+	.long 0x79d8a7a8, 0xf3b14f50, 0xe7629ea1, 0xcec53d43
+	.long 0x9d8a7a87, 0x3b14f50f, 0x7629ea1e, 0xec53d43c
+	.long 0xd8a7a879, 0xb14f50f3, 0x629ea1e7, 0xc53d43ce
+	.long 0x8a7a879d, 0x14f50f3b, 0x29ea1e76, 0x53d43cec
+	.long 0xa7a879d8, 0x4f50f3b1, 0x9ea1e762, 0x3d43cec5
diff --git a/arch/arm64/crypto/sm3-neon-glue.c b/arch/arm64/crypto/sm3-neon-glue.c
new file mode 100644
index 0000000000000..7182ee683f14a
--- /dev/null
+++ b/arch/arm64/crypto/sm3-neon-glue.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * sm3-neon-glue.c - SM3 secure hash using NEON instructions
+ *
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
+#include <crypto/internal/hash.h>
+#include <crypto/internal/simd.h>
+#include <crypto/sm3.h>
+#include <crypto/sm3_base.h>
+#include <linux/cpufeature.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+
+
+asmlinkage void sm3_neon_transform(struct sm3_state *sst, u8 const *src,
+				   int blocks);
+
+static int sm3_neon_update(struct shash_desc *desc, const u8 *data,
+			   unsigned int len)
+{
+	if (!crypto_simd_usable()) {
+		sm3_update(shash_desc_ctx(desc), data, len);
+		return 0;
+	}
+
+	kernel_neon_begin();
+	sm3_base_do_update(desc, data, len, sm3_neon_transform);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sm3_neon_final(struct shash_desc *desc, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		sm3_final(shash_desc_ctx(desc), out);
+		return 0;
+	}
+
+	kernel_neon_begin();
+	sm3_base_do_finalize(desc, sm3_neon_transform);
+	kernel_neon_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static int sm3_neon_finup(struct shash_desc *desc, const u8 *data,
+			  unsigned int len, u8 *out)
+{
+	if (!crypto_simd_usable()) {
+		struct sm3_state *sctx = shash_desc_ctx(desc);
+
+		if (len)
+			sm3_update(sctx, data, len);
+		sm3_final(sctx, out);
+		return 0;
+	}
+
+	kernel_neon_begin();
+	if (len)
+		sm3_base_do_update(desc, data, len, sm3_neon_transform);
+	sm3_base_do_finalize(desc, sm3_neon_transform);
+	kernel_neon_end();
+
+	return sm3_base_finish(desc, out);
+}
+
+static struct shash_alg sm3_alg = {
+	.digestsize		= SM3_DIGEST_SIZE,
+	.init			= sm3_base_init,
+	.update			= sm3_neon_update,
+	.final			= sm3_neon_final,
+	.finup			= sm3_neon_finup,
+	.descsize		= sizeof(struct sm3_state),
+	.base.cra_name		= "sm3",
+	.base.cra_driver_name	= "sm3-neon",
+	.base.cra_blocksize	= SM3_BLOCK_SIZE,
+	.base.cra_module	= THIS_MODULE,
+	.base.cra_priority	= 200,
+};
+
+static int __init sm3_neon_init(void)
+{
+	return crypto_register_shash(&sm3_alg);
+}
+
+static void __exit sm3_neon_fini(void)
+{
+	crypto_unregister_shash(&sm3_alg);
+}
+
+module_init(sm3_neon_init);
+module_exit(sm3_neon_fini);
+
+MODULE_DESCRIPTION("SM3 secure hash using NEON instructions");
+MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 62508017a264133a62987fe40f70c68af9a36572 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:55 +0800
Subject: [PATCH 0900/4122] crypto: arm64/sm4 - refactor and simplify NEON
 implementation

This patch does not add new features. The main work is to refactor and
simplify the implementation of SM4 NEON, which is reflected in the
following aspects:

The accelerated implementation supports the arbitrary number of blocks,
not just multiples of 8, which simplifies the implementation and brings
some optimization acceleration for data that is not aligned by 8 blocks.

When loading the input data, use the ld4 instruction to replace the
original ld1 instruction as much as possible, which will save the cost
of matrix transposition of the input data.

Use 8-block parallelism whenever possible to speed up matrix transpose
and rotation operations, instead of up to 4-block parallelism.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-neon-core.S | 630 +++++++++++++++++++-----------
 arch/arm64/crypto/sm4-neon-glue.c | 172 +++-----
 2 files changed, 456 insertions(+), 346 deletions(-)

diff --git a/arch/arm64/crypto/sm4-neon-core.S b/arch/arm64/crypto/sm4-neon-core.S
index 3d5256b354d27..f295b4b7d70a3 100644
--- a/arch/arm64/crypto/sm4-neon-core.S
+++ b/arch/arm64/crypto/sm4-neon-core.S
@@ -18,6 +18,11 @@
 #define RTMP2	v10
 #define RTMP3	v11
 
+#define RTMP4	v12
+#define RTMP5	v13
+#define RTMP6	v14
+#define RTMP7	v15
+
 #define RX0	v12
 #define RX1	v13
 #define RKEY	v14
@@ -25,7 +30,7 @@
 
 /* Helper macros. */
 
-#define PREPARE                                                 \
+#define SM4_PREPARE()                                           \
 	adr_l		x5, crypto_sm4_sbox;                    \
 	ld1		{v16.16b-v19.16b}, [x5], #64;           \
 	ld1		{v20.16b-v23.16b}, [x5], #64;           \
@@ -42,7 +47,25 @@
 	zip1		s2.2d, RTMP2.2d, RTMP3.2d;              \
 	zip2		s3.2d, RTMP2.2d, RTMP3.2d;
 
-#define rotate_clockwise_90(s0, s1, s2, s3)                     \
+#define transpose_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7)        \
+	zip1		RTMP0.4s, s0.4s, s1.4s;                 \
+	zip1		RTMP1.4s, s2.4s, s3.4s;                 \
+	zip2		RTMP2.4s, s0.4s, s1.4s;                 \
+	zip2		RTMP3.4s, s2.4s, s3.4s;                 \
+	zip1		RTMP4.4s, s4.4s, s5.4s;                 \
+	zip1		RTMP5.4s, s6.4s, s7.4s;                 \
+	zip2		RTMP6.4s, s4.4s, s5.4s;                 \
+	zip2		RTMP7.4s, s6.4s, s7.4s;                 \
+	zip1		s0.2d, RTMP0.2d, RTMP1.2d;              \
+	zip2		s1.2d, RTMP0.2d, RTMP1.2d;              \
+	zip1		s2.2d, RTMP2.2d, RTMP3.2d;              \
+	zip2		s3.2d, RTMP2.2d, RTMP3.2d;              \
+	zip1		s4.2d, RTMP4.2d, RTMP5.2d;              \
+	zip2		s5.2d, RTMP4.2d, RTMP5.2d;              \
+	zip1		s6.2d, RTMP6.2d, RTMP7.2d;              \
+	zip2		s7.2d, RTMP6.2d, RTMP7.2d;
+
+#define rotate_clockwise_4x4(s0, s1, s2, s3)                    \
 	zip1		RTMP0.4s, s1.4s, s0.4s;                 \
 	zip2		RTMP1.4s, s1.4s, s0.4s;                 \
 	zip1		RTMP2.4s, s3.4s, s2.4s;                 \
@@ -52,6 +75,24 @@
 	zip1		s2.2d, RTMP3.2d, RTMP1.2d;              \
 	zip2		s3.2d, RTMP3.2d, RTMP1.2d;
 
+#define rotate_clockwise_4x4_2x(s0, s1, s2, s3, s4, s5, s6, s7) \
+	zip1		RTMP0.4s, s1.4s, s0.4s;                 \
+	zip1		RTMP2.4s, s3.4s, s2.4s;                 \
+	zip2		RTMP1.4s, s1.4s, s0.4s;                 \
+	zip2		RTMP3.4s, s3.4s, s2.4s;                 \
+	zip1		RTMP4.4s, s5.4s, s4.4s;                 \
+	zip1		RTMP6.4s, s7.4s, s6.4s;                 \
+	zip2		RTMP5.4s, s5.4s, s4.4s;                 \
+	zip2		RTMP7.4s, s7.4s, s6.4s;                 \
+	zip1		s0.2d, RTMP2.2d, RTMP0.2d;              \
+	zip2		s1.2d, RTMP2.2d, RTMP0.2d;              \
+	zip1		s2.2d, RTMP3.2d, RTMP1.2d;              \
+	zip2		s3.2d, RTMP3.2d, RTMP1.2d;              \
+	zip1		s4.2d, RTMP6.2d, RTMP4.2d;              \
+	zip2		s5.2d, RTMP6.2d, RTMP4.2d;              \
+	zip1		s6.2d, RTMP7.2d, RTMP5.2d;              \
+	zip2		s7.2d, RTMP7.2d, RTMP5.2d;
+
 #define ROUND4(round, s0, s1, s2, s3)                           \
 	dup		RX0.4s, RKEY.s[round];                  \
 	/* rk ^ s1 ^ s2 ^ s3 */                                 \
@@ -87,14 +128,7 @@
 	/* s0 ^= RTMP3 */                                       \
 	eor		s0.16b, s0.16b, RTMP3.16b;
 
-#define SM4_CRYPT_BLK4(b0, b1, b2, b3)                          \
-	rev32		b0.16b, b0.16b;                         \
-	rev32		b1.16b, b1.16b;                         \
-	rev32		b2.16b, b2.16b;                         \
-	rev32		b3.16b, b3.16b;                         \
-                                                                \
-	transpose_4x4(b0, b1, b2, b3);                          \
-                                                                \
+#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3)                       \
 	mov		x6, 8;                                  \
 4:                                                              \
 	ld1		{RKEY.4s}, [x0], #16;                   \
@@ -107,15 +141,23 @@
                                                                 \
 	bne		4b;                                     \
                                                                 \
-	rotate_clockwise_90(b0, b1, b2, b3);                    \
 	rev32		b0.16b, b0.16b;                         \
 	rev32		b1.16b, b1.16b;                         \
 	rev32		b2.16b, b2.16b;                         \
 	rev32		b3.16b, b3.16b;                         \
                                                                 \
+	rotate_clockwise_4x4(b0, b1, b2, b3);                   \
+                                                                \
 	/* repoint to rkey */                                   \
 	sub		x0, x0, #128;
 
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3)                          \
+	rev32		b0.16b, b0.16b;                         \
+	rev32		b1.16b, b1.16b;                         \
+	rev32		b2.16b, b2.16b;                         \
+	rev32		b3.16b, b3.16b;                         \
+	SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
+
 #define ROUND8(round, s0, s1, s2, s3, t0, t1, t2, t3)           \
 	/* rk ^ s1 ^ s2 ^ s3 */                                 \
 	dup		RX0.4s, RKEY.s[round];                  \
@@ -175,7 +217,7 @@
 	eor		s0.16b, s0.16b, RTMP0.16b;              \
 	eor		t0.16b, t0.16b, RTMP1.16b;
 
-#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7)          \
+#define SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7) \
 	rev32		b0.16b, b0.16b;                         \
 	rev32		b1.16b, b1.16b;                         \
 	rev32		b2.16b, b2.16b;                         \
@@ -185,9 +227,6 @@
 	rev32		b6.16b, b6.16b;                         \
 	rev32		b7.16b, b7.16b;                         \
                                                                 \
-	transpose_4x4(b0, b1, b2, b3);                          \
-	transpose_4x4(b4, b5, b6, b7);                          \
-                                                                \
 	mov		x6, 8;                                  \
 8:                                                              \
 	ld1		{RKEY.4s}, [x0], #16;                   \
@@ -200,8 +239,6 @@
                                                                 \
 	bne		8b;                                     \
                                                                 \
-	rotate_clockwise_90(b0, b1, b2, b3);                    \
-	rotate_clockwise_90(b4, b5, b6, b7);                    \
 	rev32		b0.16b, b0.16b;                         \
 	rev32		b1.16b, b1.16b;                         \
 	rev32		b2.16b, b2.16b;                         \
@@ -214,274 +251,429 @@
 	/* repoint to rkey */                                   \
 	sub		x0, x0, #128;
 
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7)			\
+	SM4_CRYPT_BLK8_norotate(b0, b1, b2, b3, b4, b5, b6, b7);	\
+	rotate_clockwise_4x4_2x(b0, b1, b2, b3, b4, b5, b6, b7);	\
 
-.align 3
-SYM_FUNC_START_LOCAL(__sm4_neon_crypt_blk1_4)
-	/* input:
-	 *   x0: round key array, CTX
-	 *   x1: dst
-	 *   x2: src
-	 *   w3: num blocks (1..4)
-	 */
-	PREPARE;
-
-	ld1		{v0.16b}, [x2], #16;
-	mov		v1.16b, v0.16b;
-	mov		v2.16b, v0.16b;
-	mov		v3.16b, v0.16b;
-	cmp		w3, #2;
-	blt		.Lblk4_load_input_done;
-	ld1		{v1.16b}, [x2], #16;
-	beq		.Lblk4_load_input_done;
-	ld1		{v2.16b}, [x2], #16;
-	cmp		w3, #3;
-	beq		.Lblk4_load_input_done;
-	ld1		{v3.16b}, [x2];
-
-.Lblk4_load_input_done:
-	SM4_CRYPT_BLK4(v0, v1, v2, v3);
-
-	st1		{v0.16b}, [x1], #16;
-	cmp		w3, #2;
-	blt		.Lblk4_store_output_done;
-	st1		{v1.16b}, [x1], #16;
-	beq		.Lblk4_store_output_done;
-	st1		{v2.16b}, [x1], #16;
-	cmp		w3, #3;
-	beq		.Lblk4_store_output_done;
-	st1		{v3.16b}, [x1];
-
-.Lblk4_store_output_done:
-	ret;
-SYM_FUNC_END(__sm4_neon_crypt_blk1_4)
 
 .align 3
-SYM_FUNC_START(sm4_neon_crypt_blk1_8)
+SYM_FUNC_START(sm4_neon_crypt)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
 	 *   x2: src
-	 *   w3: num blocks (1..8)
+	 *   w3: nblocks
 	 */
-	cmp		w3, #5;
-	blt		__sm4_neon_crypt_blk1_4;
-
-	PREPARE;
-
-	ld1		{v0.16b-v3.16b}, [x2], #64;
-	ld1		{v4.16b}, [x2], #16;
-	mov		v5.16b, v4.16b;
-	mov		v6.16b, v4.16b;
-	mov		v7.16b, v4.16b;
-	beq		.Lblk8_load_input_done;
-	ld1		{v5.16b}, [x2], #16;
-	cmp		w3, #7;
-	blt		.Lblk8_load_input_done;
-	ld1		{v6.16b}, [x2], #16;
-	beq		.Lblk8_load_input_done;
-	ld1		{v7.16b}, [x2];
-
-.Lblk8_load_input_done:
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
-
-	cmp		w3, #6;
-	st1		{v0.16b-v3.16b}, [x1], #64;
-	st1		{v4.16b}, [x1], #16;
-	blt		.Lblk8_store_output_done;
-	st1		{v5.16b}, [x1], #16;
-	beq		.Lblk8_store_output_done;
-	st1		{v6.16b}, [x1], #16;
-	cmp		w3, #7;
-	beq		.Lblk8_store_output_done;
-	st1		{v7.16b}, [x1];
-
-.Lblk8_store_output_done:
-	ret;
-SYM_FUNC_END(sm4_neon_crypt_blk1_8)
+	SM4_PREPARE()
 
-.align 3
-SYM_FUNC_START(sm4_neon_crypt_blk8)
-	/* input:
-	 *   x0: round key array, CTX
-	 *   x1: dst
-	 *   x2: src
-	 *   w3: nblocks (multiples of 8)
-	 */
-	PREPARE;
+.Lcrypt_loop_8x:
+	sub		w3, w3, #8
+	tbnz		w3, #31, .Lcrypt_4x
+
+	ld4		{v0.4s-v3.4s}, [x2], #64
+	ld4		{v4.4s-v7.4s}, [x2], #64
 
-.Lcrypt_loop_blk:
-	subs		w3, w3, #8;
-	bmi		.Lcrypt_end;
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
 
-	ld1		{v0.16b-v3.16b}, [x2], #64;
-	ld1		{v4.16b-v7.16b}, [x2], #64;
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+	cbz		w3, .Lcrypt_end
+	b		.Lcrypt_loop_8x
 
-	st1		{v0.16b-v3.16b}, [x1], #64;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+.Lcrypt_4x:
+	add		w3, w3, #8
+	cmp		w3, #4
+	blt		.Lcrypt_tail
 
-	b		.Lcrypt_loop_blk;
+	sub		w3, w3, #4
+
+	ld4		{v0.4s-v3.4s}, [x2], #64
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	st1		{v0.16b-v3.16b}, [x1], #64
+
+	cbz		w3, .Lcrypt_end
+
+.Lcrypt_tail:
+	cmp		w3, #2
+	ld1		{v0.16b}, [x2], #16
+	blt		.Lcrypt_tail_load_done
+	ld1		{v1.16b}, [x2], #16
+	beq		.Lcrypt_tail_load_done
+	ld1		{v2.16b}, [x2], #16
+
+.Lcrypt_tail_load_done:
+	transpose_4x4(v0, v1, v2, v3)
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	cmp		w3, #2
+	st1		{v0.16b}, [x1], #16
+	blt		.Lcrypt_end
+	st1		{v1.16b}, [x1], #16
+	beq		.Lcrypt_end
+	st1		{v2.16b}, [x1], #16
 
 .Lcrypt_end:
-	ret;
-SYM_FUNC_END(sm4_neon_crypt_blk8)
+	ret
+SYM_FUNC_END(sm4_neon_crypt)
 
 .align 3
-SYM_FUNC_START(sm4_neon_cbc_dec_blk8)
+SYM_FUNC_START(sm4_neon_cbc_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
 	 *   x2: src
 	 *   x3: iv (big endian, 128 bit)
-	 *   w4: nblocks (multiples of 8)
+	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE()
+
+	ld1		{RIV.16b}, [x3]
+
+.Lcbc_dec_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lcbc_dec_4x
+
+	ld4		{v0.4s-v3.4s}, [x2], #64
+	ld4		{v4.4s-v7.4s}, [x2]
+
+	SM4_CRYPT_BLK8_norotate(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	/* Avoid overwriting the RIV register */
+	rotate_clockwise_4x4(v0, v1, v2, v3)
+	rotate_clockwise_4x4(v4, v5, v6, v7)
+
+	sub		x2, x2, #64
+
+	eor		v0.16b, v0.16b, RIV.16b
 
-	ld1		{RIV.16b}, [x3];
+	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64
+	ld1		{RTMP4.16b-RTMP7.16b}, [x2], #64
 
-.Lcbc_loop_blk:
-	subs		w4, w4, #8;
-	bmi		.Lcbc_end;
+	eor		v1.16b, v1.16b, RTMP0.16b
+	eor		v2.16b, v2.16b, RTMP1.16b
+	eor		v3.16b, v3.16b, RTMP2.16b
+	eor		v4.16b, v4.16b, RTMP3.16b
+	eor		v5.16b, v5.16b, RTMP4.16b
+	eor		v6.16b, v6.16b, RTMP5.16b
+	eor		v7.16b, v7.16b, RTMP6.16b
 
-	ld1		{v0.16b-v3.16b}, [x2], #64;
-	ld1		{v4.16b-v7.16b}, [x2];
+	mov		RIV.16b, RTMP7.16b
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
 
-	sub		x2, x2, #64;
-	eor		v0.16b, v0.16b, RIV.16b;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v1.16b, v1.16b, RTMP0.16b;
-	eor		v2.16b, v2.16b, RTMP1.16b;
-	eor		v3.16b, v3.16b, RTMP2.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	cbz		w4, .Lcbc_dec_end
+	b		.Lcbc_dec_loop_8x
 
-	eor		v4.16b, v4.16b, RTMP3.16b;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v5.16b, v5.16b, RTMP0.16b;
-	eor		v6.16b, v6.16b, RTMP1.16b;
-	eor		v7.16b, v7.16b, RTMP2.16b;
+.Lcbc_dec_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lcbc_dec_tail
 
-	mov		RIV.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+	sub		w4, w4, #4
 
-	b		.Lcbc_loop_blk;
+	ld1		{v0.16b-v3.16b}, [x2], #64
 
-.Lcbc_end:
+	rev32		v4.16b, v0.16b
+	rev32		v5.16b, v1.16b
+	rev32		v6.16b, v2.16b
+	rev32		v7.16b, v3.16b
+
+	transpose_4x4(v4, v5, v6, v7)
+
+	SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
+
+	eor		v4.16b, v4.16b, RIV.16b
+	eor		v5.16b, v5.16b, v0.16b
+	eor		v6.16b, v6.16b, v1.16b
+	eor		v7.16b, v7.16b, v2.16b
+
+	mov		RIV.16b, v3.16b
+
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	cbz		w4, .Lcbc_dec_end
+
+.Lcbc_dec_tail:
+	cmp		w4, #2
+	ld1		{v0.16b}, [x2], #16
+	blt		.Lcbc_dec_tail_load_done
+	ld1		{v1.16b}, [x2], #16
+	beq		.Lcbc_dec_tail_load_done
+	ld1		{v2.16b}, [x2], #16
+
+.Lcbc_dec_tail_load_done:
+	rev32		v4.16b, v0.16b
+	rev32		v5.16b, v1.16b
+	rev32		v6.16b, v2.16b
+
+	transpose_4x4(v4, v5, v6, v7)
+
+	SM4_CRYPT_BLK4_BE(v4, v5, v6, v7)
+
+	cmp		w4, #2
+	eor		v4.16b, v4.16b, RIV.16b
+	mov		RIV.16b, v0.16b
+	st1		{v4.16b}, [x1], #16
+	blt		.Lcbc_dec_end
+
+	eor		v5.16b, v5.16b, v0.16b
+	mov		RIV.16b, v1.16b
+	st1		{v5.16b}, [x1], #16
+	beq		.Lcbc_dec_end
+
+	eor		v6.16b, v6.16b, v1.16b
+	mov		RIV.16b, v2.16b
+	st1		{v6.16b}, [x1], #16
+
+.Lcbc_dec_end:
 	/* store new IV */
-	st1		{RIV.16b}, [x3];
+	st1		{RIV.16b}, [x3]
 
-	ret;
-SYM_FUNC_END(sm4_neon_cbc_dec_blk8)
+	ret
+SYM_FUNC_END(sm4_neon_cbc_dec)
 
 .align 3
-SYM_FUNC_START(sm4_neon_cfb_dec_blk8)
+SYM_FUNC_START(sm4_neon_cfb_dec)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
 	 *   x2: src
 	 *   x3: iv (big endian, 128 bit)
-	 *   w4: nblocks (multiples of 8)
+	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE()
+
+	ld1		{v0.16b}, [x3]
+
+.Lcfb_dec_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lcfb_dec_4x
+
+	ld1		{v1.16b-v3.16b}, [x2], #48
+	ld4		{v4.4s-v7.4s}, [x2]
+
+	transpose_4x4(v0, v1, v2, v3)
+
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	sub		x2, x2, #48
+	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64
+	ld1		{RTMP4.16b-RTMP7.16b}, [x2], #64
+
+	eor		v0.16b, v0.16b, RTMP0.16b
+	eor		v1.16b, v1.16b, RTMP1.16b
+	eor		v2.16b, v2.16b, RTMP2.16b
+	eor		v3.16b, v3.16b, RTMP3.16b
+	eor		v4.16b, v4.16b, RTMP4.16b
+	eor		v5.16b, v5.16b, RTMP5.16b
+	eor		v6.16b, v6.16b, RTMP6.16b
+	eor		v7.16b, v7.16b, RTMP7.16b
+
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	mov		v0.16b, RTMP7.16b
+
+	cbz		w4, .Lcfb_dec_end
+	b		.Lcfb_dec_loop_8x
+
+.Lcfb_dec_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lcfb_dec_tail
+
+	sub		w4, w4, #4
+
+	ld1		{v4.16b-v7.16b}, [x2], #64
+
+	rev32		v0.16b, v0.16b		/* v0 is IV register */
+	rev32		v1.16b, v4.16b
+	rev32		v2.16b, v5.16b
+	rev32		v3.16b, v6.16b
+
+	transpose_4x4(v0, v1, v2, v3)
+
+	SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
 
-	ld1		{v0.16b}, [x3];
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	eor		v3.16b, v3.16b, v7.16b
 
-.Lcfb_loop_blk:
-	subs		w4, w4, #8;
-	bmi		.Lcfb_end;
+	st1		{v0.16b-v3.16b}, [x1], #64
 
-	ld1		{v1.16b, v2.16b, v3.16b}, [x2], #48;
-	ld1		{v4.16b-v7.16b}, [x2];
+	mov		v0.16b, v7.16b
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+	cbz		w4, .Lcfb_dec_end
 
-	sub		x2, x2, #48;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+.Lcfb_dec_tail:
+	cmp		w4, #2
+	ld1		{v4.16b}, [x2], #16
+	blt		.Lcfb_dec_tail_load_done
+	ld1		{v5.16b}, [x2], #16
+	beq		.Lcfb_dec_tail_load_done
+	ld1		{v6.16b}, [x2], #16
 
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v4.16b, v4.16b, RTMP0.16b;
-	eor		v5.16b, v5.16b, RTMP1.16b;
-	eor		v6.16b, v6.16b, RTMP2.16b;
-	eor		v7.16b, v7.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+.Lcfb_dec_tail_load_done:
+	rev32		v0.16b, v0.16b		/* v0 is IV register */
+	rev32		v1.16b, v4.16b
+	rev32		v2.16b, v5.16b
 
-	mov		v0.16b, RTMP3.16b;
+	transpose_4x4(v0, v1, v2, v3)
 
-	b		.Lcfb_loop_blk;
+	SM4_CRYPT_BLK4_BE(v0, v1, v2, v3)
 
-.Lcfb_end:
+	cmp		w4, #2
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x1], #16
+	mov		v0.16b, v4.16b
+	blt		.Lcfb_dec_end
+
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v1.16b}, [x1], #16
+	mov		v0.16b, v5.16b
+	beq		.Lcfb_dec_end
+
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v2.16b}, [x1], #16
+	mov		v0.16b, v6.16b
+
+.Lcfb_dec_end:
 	/* store new IV */
-	st1		{v0.16b}, [x3];
+	st1		{v0.16b}, [x3]
 
-	ret;
-SYM_FUNC_END(sm4_neon_cfb_dec_blk8)
+	ret
+SYM_FUNC_END(sm4_neon_cfb_dec)
 
 .align 3
-SYM_FUNC_START(sm4_neon_ctr_enc_blk8)
+SYM_FUNC_START(sm4_neon_ctr_crypt)
 	/* input:
 	 *   x0: round key array, CTX
 	 *   x1: dst
 	 *   x2: src
 	 *   x3: ctr (big endian, 128 bit)
-	 *   w4: nblocks (multiples of 8)
+	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE()
 
-	ldp		x7, x8, [x3];
-	rev		x7, x7;
-	rev		x8, x8;
+	ldp		x7, x8, [x3]
+	rev		x7, x7
+	rev		x8, x8
 
-.Lctr_loop_blk:
-	subs		w4, w4, #8;
-	bmi		.Lctr_end;
+.Lctr_crypt_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lctr_crypt_4x
 
-#define inc_le128(vctr)                     \
-	mov		vctr.d[1], x8;      \
-	mov		vctr.d[0], x7;      \
-	adds		x8, x8, #1;         \
-	adc		x7, x7, xzr;        \
-	rev64		vctr.16b, vctr.16b;
+#define inc_le128(vctr)                             \
+		mov		vctr.d[1], x8;      \
+		mov		vctr.d[0], x7;      \
+		adds		x8, x8, #1;         \
+		rev64		vctr.16b, vctr.16b; \
+		adc		x7, x7, xzr;
 
 	/* construct CTRs */
-	inc_le128(v0);			/* +0 */
-	inc_le128(v1);			/* +1 */
-	inc_le128(v2);			/* +2 */
-	inc_le128(v3);			/* +3 */
-	inc_le128(v4);			/* +4 */
-	inc_le128(v5);			/* +5 */
-	inc_le128(v6);			/* +6 */
-	inc_le128(v7);			/* +7 */
-
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
-
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
-
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v4.16b, v4.16b, RTMP0.16b;
-	eor		v5.16b, v5.16b, RTMP1.16b;
-	eor		v6.16b, v6.16b, RTMP2.16b;
-	eor		v7.16b, v7.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
-
-	b		.Lctr_loop_blk;
-
-.Lctr_end:
+	inc_le128(v0)			/* +0 */
+	inc_le128(v1)			/* +1 */
+	inc_le128(v2)			/* +2 */
+	inc_le128(v3)			/* +3 */
+	inc_le128(v4)			/* +4 */
+	inc_le128(v5)			/* +5 */
+	inc_le128(v6)			/* +6 */
+	inc_le128(v7)			/* +7 */
+
+	transpose_4x4_2x(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64
+	ld1		{RTMP4.16b-RTMP7.16b}, [x2], #64
+
+	eor		v0.16b, v0.16b, RTMP0.16b
+	eor		v1.16b, v1.16b, RTMP1.16b
+	eor		v2.16b, v2.16b, RTMP2.16b
+	eor		v3.16b, v3.16b, RTMP3.16b
+	eor		v4.16b, v4.16b, RTMP4.16b
+	eor		v5.16b, v5.16b, RTMP5.16b
+	eor		v6.16b, v6.16b, RTMP6.16b
+	eor		v7.16b, v7.16b, RTMP7.16b
+
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	cbz		w4, .Lctr_crypt_end
+	b		.Lctr_crypt_loop_8x
+
+.Lctr_crypt_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lctr_crypt_tail
+
+	sub		w4, w4, #4
+
+	/* construct CTRs */
+	inc_le128(v0)			/* +0 */
+	inc_le128(v1)			/* +1 */
+	inc_le128(v2)			/* +2 */
+	inc_le128(v3)			/* +3 */
+
+	ld1		{v4.16b-v7.16b}, [x2], #64
+
+	transpose_4x4(v0, v1, v2, v3)
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	eor		v0.16b, v0.16b, v4.16b
+	eor		v1.16b, v1.16b, v5.16b
+	eor		v2.16b, v2.16b, v6.16b
+	eor		v3.16b, v3.16b, v7.16b
+
+	st1		{v0.16b-v3.16b}, [x1], #64
+
+	cbz		w4, .Lctr_crypt_end
+
+.Lctr_crypt_tail:
+	/* inc_le128 will change the sign bit */
+	ld1		{v4.16b}, [x2], #16
+	inc_le128(v0)
+	cmp		w4, #2
+	blt		.Lctr_crypt_tail_load_done
+
+	ld1		{v5.16b}, [x2], #16
+	inc_le128(v1)
+	cmp		w4, #2
+	beq		.Lctr_crypt_tail_load_done
+
+	ld1		{v6.16b}, [x2], #16
+	inc_le128(v2)
+
+.Lctr_crypt_tail_load_done:
+	transpose_4x4(v0, v1, v2, v3)
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	cmp		w4, #2
+
+	eor		v0.16b, v0.16b, v4.16b
+	st1		{v0.16b}, [x1], #16
+	blt		.Lctr_crypt_end
+
+	eor		v1.16b, v1.16b, v5.16b
+	st1		{v1.16b}, [x1], #16
+	beq		.Lctr_crypt_end
+
+	eor		v2.16b, v2.16b, v6.16b
+	st1		{v2.16b}, [x1], #16
+
+.Lctr_crypt_end:
 	/* store new CTR */
-	rev		x7, x7;
-	rev		x8, x8;
-	stp		x7, x8, [x3];
+	rev		x7, x7
+	rev		x8, x8
+	stp		x7, x8, [x3]
 
-	ret;
-SYM_FUNC_END(sm4_neon_ctr_enc_blk8)
+	ret
+SYM_FUNC_END(sm4_neon_ctr_crypt)
diff --git a/arch/arm64/crypto/sm4-neon-glue.c b/arch/arm64/crypto/sm4-neon-glue.c
index 03a6a6866a311..7b19accf5c037 100644
--- a/arch/arm64/crypto/sm4-neon-glue.c
+++ b/arch/arm64/crypto/sm4-neon-glue.c
@@ -18,19 +18,14 @@
 #include <crypto/internal/skcipher.h>
 #include <crypto/sm4.h>
 
-#define BYTES2BLKS(nbytes)	((nbytes) >> 4)
-#define BYTES2BLK8(nbytes)	(((nbytes) >> 4) & ~(8 - 1))
-
-asmlinkage void sm4_neon_crypt_blk1_8(const u32 *rkey, u8 *dst, const u8 *src,
-				      unsigned int nblks);
-asmlinkage void sm4_neon_crypt_blk8(const u32 *rkey, u8 *dst, const u8 *src,
-				    unsigned int nblks);
-asmlinkage void sm4_neon_cbc_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
-				      u8 *iv, unsigned int nblks);
-asmlinkage void sm4_neon_cfb_dec_blk8(const u32 *rkey, u8 *dst, const u8 *src,
-				      u8 *iv, unsigned int nblks);
-asmlinkage void sm4_neon_ctr_enc_blk8(const u32 *rkey, u8 *dst, const u8 *src,
-				      u8 *iv, unsigned int nblks);
+asmlinkage void sm4_neon_crypt(const u32 *rkey, u8 *dst, const u8 *src,
+			       unsigned int nblocks);
+asmlinkage void sm4_neon_cbc_dec(const u32 *rkey_dec, u8 *dst, const u8 *src,
+				 u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_neon_cfb_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
+				 u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_neon_ctr_crypt(const u32 *rkey_enc, u8 *dst, const u8 *src,
+				   u8 *iv, unsigned int nblocks);
 
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		      unsigned int key_len)
@@ -51,27 +46,18 @@ static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
 	while ((nbytes = walk.nbytes) > 0) {
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
+		unsigned int nblocks;
 
-		kernel_neon_begin();
+		nblocks = nbytes / SM4_BLOCK_SIZE;
+		if (nblocks) {
+			kernel_neon_begin();
 
-		nblks = BYTES2BLK8(nbytes);
-		if (nblks) {
-			sm4_neon_crypt_blk8(rkey, dst, src, nblks);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			sm4_neon_crypt(rkey, dst, src, nblocks);
 
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			sm4_neon_crypt_blk1_8(rkey, dst, src, nblks);
-			nbytes -= nblks * SM4_BLOCK_SIZE;
+			kernel_neon_end();
 		}
 
-		kernel_neon_end();
-
-		err = skcipher_walk_done(&walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
 	}
 
 	return err;
@@ -138,48 +124,19 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
 	while ((nbytes = walk.nbytes) > 0) {
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
+		unsigned int nblocks;
 
-		kernel_neon_begin();
+		nblocks = nbytes / SM4_BLOCK_SIZE;
+		if (nblocks) {
+			kernel_neon_begin();
 
-		nblks = BYTES2BLK8(nbytes);
-		if (nblks) {
-			sm4_neon_cbc_dec_blk8(ctx->rkey_dec, dst, src,
-					walk.iv, nblks);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			sm4_neon_cbc_dec(ctx->rkey_dec, dst, src,
+					 walk.iv, nblocks);
 
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			u8 keystream[SM4_BLOCK_SIZE * 8];
-			u8 iv[SM4_BLOCK_SIZE];
-			int i;
-
-			sm4_neon_crypt_blk1_8(ctx->rkey_dec, keystream,
-					src, nblks);
-
-			src += ((int)nblks - 2) * SM4_BLOCK_SIZE;
-			dst += (nblks - 1) * SM4_BLOCK_SIZE;
-			memcpy(iv, src + SM4_BLOCK_SIZE, SM4_BLOCK_SIZE);
-
-			for (i = nblks - 1; i > 0; i--) {
-				crypto_xor_cpy(dst, src,
-					&keystream[i * SM4_BLOCK_SIZE],
-					SM4_BLOCK_SIZE);
-				src -= SM4_BLOCK_SIZE;
-				dst -= SM4_BLOCK_SIZE;
-			}
-			crypto_xor_cpy(dst, walk.iv,
-					keystream, SM4_BLOCK_SIZE);
-			memcpy(walk.iv, iv, SM4_BLOCK_SIZE);
-			nbytes -= nblks * SM4_BLOCK_SIZE;
+			kernel_neon_end();
 		}
 
-		kernel_neon_end();
-
-		err = skcipher_walk_done(&walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
 	}
 
 	return err;
@@ -238,41 +195,21 @@ static int sm4_cfb_decrypt(struct skcipher_request *req)
 	while ((nbytes = walk.nbytes) > 0) {
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
+		unsigned int nblocks;
 
-		kernel_neon_begin();
+		nblocks = nbytes / SM4_BLOCK_SIZE;
+		if (nblocks) {
+			kernel_neon_begin();
 
-		nblks = BYTES2BLK8(nbytes);
-		if (nblks) {
-			sm4_neon_cfb_dec_blk8(ctx->rkey_enc, dst, src,
-					walk.iv, nblks);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			sm4_neon_cfb_dec(ctx->rkey_enc, dst, src,
+					 walk.iv, nblocks);
 
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			u8 keystream[SM4_BLOCK_SIZE * 8];
-
-			memcpy(keystream, walk.iv, SM4_BLOCK_SIZE);
-			if (nblks > 1)
-				memcpy(&keystream[SM4_BLOCK_SIZE], src,
-					(nblks - 1) * SM4_BLOCK_SIZE);
-			memcpy(walk.iv, src + (nblks - 1) * SM4_BLOCK_SIZE,
-				SM4_BLOCK_SIZE);
-
-			sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
-					keystream, nblks);
-
-			crypto_xor_cpy(dst, src, keystream,
-					nblks * SM4_BLOCK_SIZE);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			kernel_neon_end();
 
-		kernel_neon_end();
+			dst += nblocks * SM4_BLOCK_SIZE;
+			src += nblocks * SM4_BLOCK_SIZE;
+			nbytes -= nblocks * SM4_BLOCK_SIZE;
+		}
 
 		/* tail */
 		if (walk.nbytes == walk.total && nbytes > 0) {
@@ -302,40 +239,21 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
 	while ((nbytes = walk.nbytes) > 0) {
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
+		unsigned int nblocks;
 
-		kernel_neon_begin();
+		nblocks = nbytes / SM4_BLOCK_SIZE;
+		if (nblocks) {
+			kernel_neon_begin();
 
-		nblks = BYTES2BLK8(nbytes);
-		if (nblks) {
-			sm4_neon_ctr_enc_blk8(ctx->rkey_enc, dst, src,
-					walk.iv, nblks);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			sm4_neon_ctr_crypt(ctx->rkey_enc, dst, src,
+					   walk.iv, nblocks);
 
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			u8 keystream[SM4_BLOCK_SIZE * 8];
-			int i;
-
-			for (i = 0; i < nblks; i++) {
-				memcpy(&keystream[i * SM4_BLOCK_SIZE],
-					walk.iv, SM4_BLOCK_SIZE);
-				crypto_inc(walk.iv, SM4_BLOCK_SIZE);
-			}
-			sm4_neon_crypt_blk1_8(ctx->rkey_enc, keystream,
-					keystream, nblks);
-
-			crypto_xor_cpy(dst, src, keystream,
-					nblks * SM4_BLOCK_SIZE);
-			dst += nblks * SM4_BLOCK_SIZE;
-			src += nblks * SM4_BLOCK_SIZE;
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			kernel_neon_end();
 
-		kernel_neon_end();
+			dst += nblocks * SM4_BLOCK_SIZE;
+			src += nblocks * SM4_BLOCK_SIZE;
+			nbytes -= nblocks * SM4_BLOCK_SIZE;
+		}
 
 		/* tail */
 		if (walk.nbytes == walk.total && nbytes > 0) {
-- 
GitLab


From c24ee936c79d7c381750f6c23bbef1257850279f Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:56 +0800
Subject: [PATCH 0901/4122] crypto: testmgr - add SM4 cts-cbc/xts/xcbc test
 vectors

This patch newly adds the test vectors of CTS-CBC/XTS/XCBC modes of
the SM4 algorithm, and also added some test vectors for SM4 GCM/CCM.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/testmgr.c |  19 +
 crypto/testmgr.h | 977 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 996 insertions(+)

diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index bcd059caa1c81..e2806ef044fd4 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4712,6 +4712,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.alg = "cts(cbc(paes))",
 		.test = alg_test_null,
 		.fips_allowed = 1,
+	}, {
+		.alg = "cts(cbc(sm4))",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_cts_tv_template)
+		}
 	}, {
 		.alg = "curve25519",
 		.test = alg_test_kpp,
@@ -5586,6 +5592,12 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.hash = __VECS(aes_xcbc128_tv_template)
 		}
+	}, {
+		.alg = "xcbc(sm4)",
+		.test = alg_test_hash,
+		.suite = {
+			.hash = __VECS(sm4_xcbc128_tv_template)
+		}
 	}, {
 		.alg = "xchacha12",
 		.test = alg_test_skcipher,
@@ -5640,6 +5652,13 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.cipher = __VECS(serpent_xts_tv_template)
 		}
+	}, {
+		.alg = "xts(sm4)",
+		.generic_driver = "xts(ecb(sm4-generic))",
+		.test = alg_test_skcipher,
+		.suite = {
+			.cipher = __VECS(sm4_xts_tv_template)
+		}
 	}, {
 		.alg = "xts(twofish)",
 		.generic_driver = "xts(ecb(twofish-generic))",
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index d6088e26f3261..f10bfb9d99735 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -14882,6 +14882,353 @@ static const struct cipher_testvec sm4_cfb_tv_template[] = {
 	}
 };
 
+static const struct cipher_testvec sm4_cts_tv_template[] = {
+	/* Generated from AES-CTS test vectors */
+	{
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20",
+		.len	= 17,
+		.ctext	= "\x05\xfe\x23\xee\x17\xa2\x89\x98"
+			  "\xbc\x97\x0a\x0b\x54\x67\xca\xd7"
+			  "\xd6",
+	}, {
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+			  "\x20\x47\x61\x75\x27\x73\x20",
+		.len	= 31,
+		.ctext	= "\x15\x46\xe4\x95\xa4\xec\xf0\xb8"
+			  "\x49\xd6\x6a\x9d\x89\xc7\xfd\x70"
+			  "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+			  "\x93\xf7\x70\xbb\xa8\x3f\xa3",
+	}, {
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+			  "\x20\x47\x61\x75\x27\x73\x20\x43",
+		.len	= 32,
+		.ctext	= "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+			  "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3"
+			  "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+			  "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf",
+	}, {
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+			  "\x20\x47\x61\x75\x27\x73\x20\x43"
+			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+			  "\x70\x6c\x65\x61\x73\x65\x2c",
+		.len	= 47,
+		.ctext	= "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+			  "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+			  "\xd3\xe1\xdc\xeb\xfa\x04\x11\x99"
+			  "\xde\xcf\x6f\x4d\x7b\x09\x92\x7f"
+			  "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+			  "\x01\x6a\xbf\xd4\x3f\x79\x02",
+	}, {
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+			  "\x20\x47\x61\x75\x27\x73\x20\x43"
+			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+			  "\x70\x6c\x65\x61\x73\x65\x2c\x20",
+		.len	= 48,
+		.ctext	= "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+			  "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+			  "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f"
+			  "\xbd\x99\x21\x0c\x5e\x4d\xed\x20"
+			  "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+			  "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3",
+	}, {
+		.klen	= 16,
+		.key    = "\x63\x68\x69\x63\x6b\x65\x6e\x20"
+			  "\x74\x65\x72\x69\x79\x61\x6b\x69",
+		.ptext	= "\x49\x20\x77\x6f\x75\x6c\x64\x20"
+			  "\x6c\x69\x6b\x65\x20\x74\x68\x65"
+			  "\x20\x47\x65\x6e\x65\x72\x61\x6c"
+			  "\x20\x47\x61\x75\x27\x73\x20\x43"
+			  "\x68\x69\x63\x6b\x65\x6e\x2c\x20"
+			  "\x70\x6c\x65\x61\x73\x65\x2c\x20"
+			  "\x61\x6e\x64\x20\x77\x6f\x6e\x74"
+			  "\x6f\x6e\x20\x73\x6f\x75\x70\x2e",
+		.len	= 64,
+		.ctext	= "\xd6\x71\xc8\xc0\x4d\x52\x7c\x66"
+			  "\x93\xf7\x70\xbb\xa8\x3f\xa3\xcf"
+			  "\x89\xc7\x99\x3f\x87\x69\x5c\xd3"
+			  "\x01\x6a\xbf\xd4\x3f\x79\x02\xa3"
+			  "\x58\x19\xa4\x8f\xa9\x68\x5e\x6b"
+			  "\x2c\x0f\x81\x60\x15\x98\x27\x4f"
+			  "\x9a\xbd\x7b\xfe\x82\xab\xcc\x7f"
+			  "\xbd\x99\x21\x0c\x5e\x4d\xed\x20",
+	}
+};
+
+static const struct cipher_testvec sm4_xts_tv_template[] = {
+	/* Generated from AES-XTS test vectors */
+	{
+		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ctext	= "\xd9\xb4\x21\xf7\x31\xc8\x94\xfd"
+			  "\xc3\x5b\x77\x29\x1f\xe4\xe3\xb0"
+			  "\x2a\x1f\xb7\x66\x98\xd5\x9f\x0e"
+			  "\x51\x37\x6c\x4a\xda\x5b\xc7\x5d",
+		.len	= 32,
+	}, {
+		.key	= "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x11\x11\x11\x11\x11\x11\x11\x11"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ctext	= "\xa7\x4d\x72\x6c\x11\x19\x6a\x32"
+			  "\xbe\x04\xe0\x01\xff\x29\xd0\xc7"
+			  "\x93\x2f\x9f\x3e\xc2\x9b\xfc\xb6"
+			  "\x4d\xd1\x7f\x63\xcb\xd3\xea\x31",
+		.len	= 32,
+	}, {
+		.key	= "\xff\xfe\xfd\xfc\xfb\xfa\xf9\xf8"
+			  "\xf7\xf6\xf5\xf4\xf3\xf2\xf1\xf0"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22"
+			  "\x22\x22\x22\x22\x22\x22\x22\x22",
+		.klen	= 32,
+		.iv	= "\x33\x33\x33\x33\x33\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44"
+			  "\x44\x44\x44\x44\x44\x44\x44\x44",
+		.ctext	= "\x7f\x76\x08\x8e\xff\xad\xf7\x0c"
+			  "\x02\xea\x9f\x95\xda\x06\x28\xd3"
+			  "\x51\xbf\xcb\x9e\xac\x05\x63\xbc"
+			  "\xf1\x7b\x71\x0d\xab\x0a\x98\x26",
+		.len	= 32,
+	}, {
+		.key	= "\x27\x18\x28\x18\x28\x45\x90\x45"
+			  "\x23\x53\x60\x28\x74\x71\x35\x26"
+			  "\x31\x41\x59\x26\x53\x58\x97\x93"
+			  "\x23\x84\x62\x64\x33\x83\x27\x95",
+		.klen	= 32,
+		.iv	= "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"
+			  "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf"
+			  "\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7"
+			  "\xe8\xe9\xea\xeb\xec\xed\xee\xef"
+			  "\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7"
+			  "\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff",
+		.ctext	= "\x54\xdd\x65\xb6\x32\x6f\xae\xa8"
+			  "\xfa\xd1\xa8\x3c\x63\x61\x4a\xf3"
+			  "\x9f\x72\x1d\x8d\xfe\x17\x7a\x30"
+			  "\xb6\x6a\xbf\x6a\x44\x99\x80\xe1"
+			  "\xcd\xbe\x06\xaf\xb7\x33\x36\xf3"
+			  "\x7a\x4d\x39\xde\x96\x4a\x30\xd7"
+			  "\xd0\x4a\x37\x99\x16\x9c\x60\x25"
+			  "\x8f\x6b\x74\x8a\x61\x86\x1a\xa5"
+			  "\xec\x92\xa2\xc1\x5b\x2b\x7c\x61"
+			  "\x5a\x42\xab\xa4\x99\xbb\xd6\xb7"
+			  "\x1d\xb9\xc7\x89\xb2\x18\x20\x89"
+			  "\xa2\x5d\xd3\xdf\x80\x0e\xd1\x86"
+			  "\x4d\x19\xf7\xed\x45\xfd\x17\xa9"
+			  "\x48\x0b\x0f\xb8\x2d\x9b\x7f\xc3"
+			  "\xed\x57\xe9\xa1\x14\x0e\xaa\x77"
+			  "\x8d\xd2\xdd\x67\x9e\x3e\xdc\x3d"
+			  "\xc4\xd5\x5c\x95\x0e\xbc\x53\x1d"
+			  "\x95\x92\xf7\xc4\x63\x82\x56\xd5"
+			  "\x65\x18\x29\x2a\x20\xaf\x98\xfd"
+			  "\xd3\xa6\x36\x00\x35\x0a\x70\xab"
+			  "\x5a\x40\xf4\xc2\x85\x03\x7c\xa0"
+			  "\x1f\x25\x1f\x19\xec\xae\x03\x29"
+			  "\xff\x77\xad\x88\xcd\x5a\x4c\xde"
+			  "\xa2\xae\xab\xc2\x21\x48\xff\xbd"
+			  "\x23\x9b\xd1\x05\x15\xbd\xe1\x13"
+			  "\x1d\xec\x84\x04\xe4\x43\xdc\x76"
+			  "\x31\x40\xd5\xf2\x2b\xf3\x3e\x0c"
+			  "\x68\x72\xd6\xb8\x1d\x63\x0f\x6f"
+			  "\x00\xcd\xd0\x58\xfe\x80\xf9\xcb"
+			  "\xfb\x77\x70\x7f\x93\xce\xe2\xca"
+			  "\x92\xb9\x15\xb8\x30\x40\x27\xc1"
+			  "\x90\xa8\x4e\x2d\x65\xe0\x18\xcc"
+			  "\x6a\x38\x7d\x37\x66\xac\xdb\x28"
+			  "\x25\x32\x84\xe8\xdb\x9a\xcf\x8f"
+			  "\x52\x28\x0d\xdc\x6d\x00\x33\xd2"
+			  "\xcc\xaa\xa4\xf9\xae\xff\x12\x36"
+			  "\x69\xbc\x02\x4f\xd6\x76\x8e\xdf"
+			  "\x8b\xc1\xf8\xd6\x22\xc1\x9c\x60"
+			  "\x9e\xf9\x7f\x60\x91\x90\xcd\x11"
+			  "\x02\x41\xe7\xfb\x08\x4e\xd8\x94"
+			  "\x2d\xa1\xf9\xb9\xcf\x1b\x51\x4b"
+			  "\x61\xa3\x88\xb3\x0e\xa6\x1a\x4a"
+			  "\x74\x5b\x38\x1e\xe7\xad\x6c\x4d"
+			  "\xb1\x27\x54\x53\xb8\x41\x3f\x98"
+			  "\xdf\x6e\x4a\x40\x98\x6e\xe4\xb5"
+			  "\x9a\xf5\xdf\xae\xcd\x30\x12\x65"
+			  "\x17\x90\x67\xa0\x0d\x7c\xa3\x5a"
+			  "\xb9\x5a\xbd\x61\x7a\xde\xa2\x8e"
+			  "\xc1\xc2\x6a\x97\xde\x28\xb8\xbf"
+			  "\xe3\x01\x20\xd6\xae\xfb\xd2\x58"
+			  "\xc5\x9e\x42\xd1\x61\xe8\x06\x5a"
+			  "\x78\x10\x6b\xdc\xa5\xcd\x90\xfb"
+			  "\x3a\xac\x4e\x93\x86\x6c\x8a\x7f"
+			  "\x96\x76\x86\x0a\x79\x14\x5b\xd9"
+			  "\x2e\x02\xe8\x19\xa9\x0b\xe0\xb9"
+			  "\x7c\xc5\x22\xb3\x21\x06\x85\x6f"
+			  "\xdf\x0e\x54\xd8\x8e\x46\x24\x15"
+			  "\x5a\x2f\x1c\x14\xea\xea\xa1\x63"
+			  "\xf8\x58\xe9\x9a\x80\x6e\x79\x1a"
+			  "\xcd\x82\xf1\xb0\xe2\x9f\x00\x28"
+			  "\xa4\xc3\x8e\x97\x6f\x57\x1a\x93"
+			  "\xf4\xfd\x57\xd7\x87\xc2\x4d\xb0"
+			  "\xe0\x1c\xa3\x04\xe5\xa5\xc4\xdd"
+			  "\x50\xcf\x8b\xdb\xf4\x91\xe5\x7c",
+		.len	= 512,
+	}, {
+		.key	= "\x62\x49\x77\x57\x24\x70\x93\x69"
+			  "\x99\x59\x57\x49\x66\x96\x76\x27"
+			  "\x02\x88\x41\x97\x16\x93\x99\x37"
+			  "\x51\x05\x82\x09\x74\x94\x45\x92",
+		.klen	= 32,
+		.iv	= "\xff\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\x40\x41\x42\x43\x44\x45\x46\x47"
+			  "\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f"
+			  "\x50\x51\x52\x53\x54\x55\x56\x57"
+			  "\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f"
+			  "\x60\x61\x62\x63\x64\x65\x66\x67"
+			  "\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f"
+			  "\x70\x71\x72\x73\x74\x75\x76\x77"
+			  "\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
+			  "\x80\x81\x82\x83\x84\x85\x86\x87"
+			  "\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f"
+			  "\x90\x91\x92\x93\x94\x95\x96\x97"
+			  "\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xf8\xf9\xfa\xfb\xfc",
+		.ctext	= "\xa2\x9f\x9e\x4e\x71\xdb\x28\x3c"
+			  "\x80\x0e\xf6\xb7\x8e\x57\x1c\xba"
+			  "\x90\xda\x3b\x6c\x22\x00\x68\x30"
+			  "\x1d\x63\x0d\x9e\x6a\xad\x37\x55"
+			  "\xbc\x77\x1e\xc9\xad\x83\x30\xd5"
+			  "\x27\xb2\x66\x77\x18\x3c\xa6\x39"
+			  "\x9c\x0a\xaa\x1f\x02\xe1\xd5\x65"
+			  "\x9b\x8d\xc5\x97\x3d\xc5\x04\x53"
+			  "\x78\x00\xe3\xb0\x1a\x43\x4e\xb7"
+			  "\xc4\x9f\x38\xc5\x7b\xa4\x70\x64"
+			  "\x78\xe6\x32\xd9\x65\x44\xc5\x64"
+			  "\xb8\x42\x35\x99\xff\x66\x75\xb0"
+			  "\x22\xd3\x9b\x6e\x8d\xcf\x6a\x24"
+			  "\xfd\x92\xb7\x1b\x04\x28\x2a\x61"
+			  "\xdc\x96\x2a\x20\x7a\x2c\xf1\xf9"
+			  "\x12\x15\xf0\x4d\xcf\x2b\xde\x33"
+			  "\x41\xbc\xe7\x85\x87\x22\xb7\x16"
+			  "\x02\x1c\xd8\xa2\x0f\x1f\xa3\xe9"
+			  "\xd8\x45\x48\xe7\xbe\x08\x4e\x4e"
+			  "\x23\x79\x84\xdb\x40\x76\xf5\x13"
+			  "\x78\x92\x4a\x2f\xf9\x1b\xf2\x80"
+			  "\x25\x74\x51\x45\x9a\x77\x78\x97"
+			  "\xd3\xe0\xc7\xc4\x35\x67\x2a\xe6"
+			  "\xb3\x0d\x62\x9f\x8b",
+		.len	= 189,
+	},
+};
+
 static const struct aead_testvec sm4_gcm_tv_template[] = {
 	{ /* From https://datatracker.ietf.org/doc/html/rfc8998#appendix-A.1 */
 		.key	= "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
@@ -14913,6 +15260,298 @@ static const struct aead_testvec sm4_gcm_tv_template[] = {
 			  "\x83\xDE\x35\x41\xE4\xC2\xB5\x81"
 			  "\x77\xE0\x65\xA9\xBF\x7B\x62\xEC",
 		.clen	= 80,
+	}, { /* Generated from AES-GCM test vectors */
+		.key    = zeroed_string,
+		.klen	= 16,
+		.ctext	= "\x23\x2f\x0c\xfe\x30\x8b\x49\xea"
+			  "\x6f\xc8\x82\x29\xb5\xdc\x85\x8d",
+		.clen	= 16,
+	}, {
+		.key    = zeroed_string,
+		.klen	= 16,
+		.ptext	= zeroed_string,
+		.plen	= 16,
+		.ctext	= "\x7d\xe2\xaa\x7f\x11\x10\x18\x82"
+			  "\x18\x06\x3b\xe1\xbf\xeb\x6d\x89"
+			  "\xb8\x51\xb5\xf3\x94\x93\x75\x2b"
+			  "\xe5\x08\xf1\xbb\x44\x82\xc5\x57",
+		.clen	= 32,
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+			  "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+			  "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+			  "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+			  "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+			  "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+			  "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+			  "\xba\x63\x7b\x39\x1a\xaf\xd2\x55",
+		.plen	= 64,
+		.ctext	= "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6"
+			  "\x76\x21\x6a\x33\x83\x10\x41\xeb"
+			  "\x09\x58\x00\x11\x7b\xdc\x3f\x75"
+			  "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb"
+			  "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07"
+			  "\x1a\xe5\x48\x3f\xed\xde\x98\x5d"
+			  "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf"
+			  "\xe3\x63\x36\x83\x23\xf7\x5b\x80"
+			  "\x7d\xfe\x77\xef\x71\xb1\x5e\xc9"
+			  "\x52\x6b\x09\xab\x84\x28\x4b\x8a",
+		.clen	= 80,
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+			  "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+			  "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+			  "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+			  "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+			  "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+			  "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+			  "\xba\x63\x7b\x39",
+		.plen	= 60,
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= "\xe4\x11\x0f\xf1\xc1\x41\x97\xe6"
+			  "\x76\x21\x6a\x33\x83\x10\x41\xeb"
+			  "\x09\x58\x00\x11\x7b\xdc\x3f\x75"
+			  "\x1a\x49\x6e\xfc\xf2\xbb\xdf\xdb"
+			  "\x3a\x2e\x13\xfd\xc5\xc1\x9d\x07"
+			  "\x1a\xe5\x48\x3f\xed\xde\x98\x5d"
+			  "\x3f\x2d\x5b\x4e\xee\x0b\xb6\xdf"
+			  "\xe3\x63\x36\x83"
+			  "\x89\xf6\xba\x35\xb8\x18\xd3\xcc"
+			  "\x38\x6c\x05\xb3\x8a\xcb\xc9\xde",
+		.clen	= 76,
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= "\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+			  "\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+			  "\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+			  "\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+			  "\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+			  "\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+			  "\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+			  "\xba\x63\x7b\x39",
+		.plen	= 60,
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= "\xc1\x11\x44\x51\xd9\x25\x87\x5b"
+			  "\x0f\xd9\x06\xf3\x33\x44\xbb\x87"
+			  "\x8b\xa3\x77\xd2\x0c\x60\xfa\xcc"
+			  "\x85\x50\x6f\x96\x0c\x54\x54\xc1"
+			  "\x58\x04\x88\x6e\xf4\x26\x35\x7e"
+			  "\x94\x80\x48\x6c\xf2\xf4\x88\x1f"
+			  "\x19\x63\xea\xae\xba\x81\x1a\x5d"
+			  "\x0e\x6f\x59\x08"
+			  "\x33\xac\x5b\xa8\x19\x60\xdb\x1d"
+			  "\xdd\x2e\x22\x2e\xe0\x87\x51\x5d",
+		.clen	= 76,
+	}, {
+		.key	= "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+			  "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+		.klen	= 16,
+		.iv	= "\x00\xff\xff\xff\xff\x00\x00\xff"
+			  "\xff\xff\x00\xff",
+		.ptext	= "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+			  "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+			  "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+			  "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+			  "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+			  "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+			  "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+			  "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+			  "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+			  "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+			  "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+			  "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+			  "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+			  "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+			  "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+			  "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+			  "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+			  "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+			  "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+			  "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+			  "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+			  "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+			  "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+			  "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+			  "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+			  "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+			  "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+			  "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+			  "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+			  "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+			  "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+			  "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+			  "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+			  "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+			  "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+			  "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+			  "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+			  "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+			  "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+			  "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+			  "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+			  "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+			  "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+			  "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+			  "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+			  "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+			  "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+			  "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+			  "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+			  "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+			  "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+			  "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+			  "\x87\x79\x60\x38\x46\xb4\x25\x57"
+			  "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+			  "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+			  "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+			  "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+			  "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+			  "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+			  "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+			  "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+			  "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+			  "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+			  "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+			  "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+			  "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+			  "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+			  "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+			  "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+			  "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+			  "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+			  "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+			  "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+			  "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+			  "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+			  "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+			  "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+			  "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+			  "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+			  "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+			  "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+			  "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+			  "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+			  "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+			  "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+			  "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+			  "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+			  "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+			  "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+			  "\xa4\x78\xdb\x74\x3d\x8b\xb5",
+		.plen	= 719,
+		.ctext	= "\xdc\xb1\x0f\x2a\xe8\x2d\x1c\x57"
+			  "\xc4\x82\xfa\xd6\x87\xe6\x2f\x50"
+			  "\xbd\x9e\x0a\x42\x31\xf2\xc7\xbb"
+			  "\x21\x63\xa7\x05\x43\x33\xef\x33"
+			  "\x5c\xd3\x47\x55\xce\x5c\xe4\xd4"
+			  "\xe5\x07\x62\x22\xac\x01\xa8\x35"
+			  "\x9c\x59\x34\x30\x8e\xff\x9f\xb4"
+			  "\xd2\x4e\x74\x90\x64\xf2\x78\x5e"
+			  "\x63\xb7\xc5\x08\x1b\x37\xa5\x9e"
+			  "\xc0\xde\xff\xa9\x7f\x0b\xd3\x02"
+			  "\x83\x6e\x33\xfa\x43\x11\xd3\xda"
+			  "\x02\xcf\xcd\x4a\xc0\x78\x1f\x39"
+			  "\x62\xcb\xa3\x95\x7e\x13\x92\x28"
+			  "\xb2\xc4\x7a\xba\xd1\xc6\xf6\x1f"
+			  "\xda\x0b\xf1\xd1\x99\x54\xd8\x3b"
+			  "\x16\xf8\xe6\x97\x1e\xa7\xcf\x49"
+			  "\x69\x84\x01\x4c\xdc\x7a\x34\xff"
+			  "\x01\x08\xa3\x0b\x39\xac\x21\x37"
+			  "\xd8\xb4\x04\x19\x8b\x7a\x7d\x17"
+			  "\x44\xd1\x18\xaf\x1f\xa9\x29\xfe"
+			  "\xfa\x77\xe0\x40\x42\x0c\x79\xb7"
+			  "\xc3\x15\x1b\xd9\x0c\x82\xfc\x16"
+			  "\x70\xd6\x2a\xe9\x94\x72\xc5\xa5"
+			  "\x8a\x58\xbc\xfa\xe0\x88\x39\x4a"
+			  "\x80\xe8\xec\xaf\x60\xac\xe7\xf8"
+			  "\x9c\xf0\xfc\x61\x39\x07\x98\x6b"
+			  "\x88\xe3\x98\x22\x28\x18\x4a\x2d"
+			  "\x25\xef\x10\xe3\x83\x66\x3f\xfd"
+			  "\xc7\x0b\xa3\xfd\x97\xa9\xf4\xbd"
+			  "\xd8\x2a\xee\x4a\x50\xad\xcc\xb5"
+			  "\xc7\xab\xb8\x79\x9c\xd1\xf1\x27"
+			  "\x08\xf5\xf5\xe8\x1b\x66\xce\x41"
+			  "\x56\x60\x94\x86\xf0\x78\xc2\xfa"
+			  "\x5b\x63\x40\xb1\xd1\x1a\x38\x69"
+			  "\x0b\x8c\xb2\xf5\xa2\xbe\x90\x9d"
+			  "\x46\x23\x79\x8b\x3b\x4a\xf4\xbb"
+			  "\x55\xf7\x58\x9d\xaf\x59\xff\x74"
+			  "\xf3\xb9\xc4\x26\xb1\xf8\xe1\x28"
+			  "\x8b\x5e\x8f\x6d\x64\xe7\xe8\x63"
+			  "\xd2\x9e\xcb\xee\xae\x19\x04\x1d"
+			  "\x05\xf0\x9d\x99\x7b\x33\x33\xae"
+			  "\x6e\xe5\x09\xdd\x67\x51\xc4\xc8"
+			  "\x6a\xc7\x36\x35\xc9\x93\x76\xa1"
+			  "\xa8\x1c\xfa\x75\x92\x34\x0e\x7d"
+			  "\x3d\x1d\xef\x00\xfd\xa5\x25\x12"
+			  "\x7c\x91\x21\x41\xcc\x50\x47\xa9"
+			  "\x22\x50\x24\x96\x34\x79\x3d\xe8"
+			  "\x3f\xa0\x56\xaf\x98\x53\x55\xc3"
+			  "\x46\x1b\x17\x54\xb8\xb0\xb7\xe0"
+			  "\xe0\xab\x47\x6f\x06\xda\xcc\x75"
+			  "\xa7\x96\xb7\x92\xf3\xa0\x5f\xe6"
+			  "\xba\x97\xe3\x2f\x97\x05\xb2\x99"
+			  "\xa0\x09\x10\x98\x9c\xd3\x2e\xd1"
+			  "\x7e\x2a\x30\x54\x3c\xb9\x33\xe3"
+			  "\xf2\xaf\xd3\xa5\xee\xd0\x0b\x8a"
+			  "\x19\x54\x0f\x02\x51\x1f\x91\xdf"
+			  "\x71\x9c\xad\x77\x35\x28\x55\x6d"
+			  "\xcd\x7a\xd9\xa3\x41\x98\x6b\x37"
+			  "\x19\x0f\xbe\xae\x69\xb2\x25\x01"
+			  "\xee\x0e\x51\x4b\x53\xea\x0f\x5f"
+			  "\x85\x74\x79\x36\x32\x0a\x2a\x40"
+			  "\xad\x6b\x78\x41\x54\x99\xe9\xc1"
+			  "\x2b\x6c\x9b\x42\x21\xef\xe2\x50"
+			  "\x56\x8d\x78\xdf\x58\xbe\x0a\x0f"
+			  "\xfc\xfc\x0d\x2e\xd0\xcb\xa6\x0a"
+			  "\xa8\xd9\x1e\xa9\xd4\x7c\x99\x88"
+			  "\xcf\x11\xad\x1c\xd3\x04\x63\x55"
+			  "\xef\x85\x0b\x69\xa1\x40\xf1\x75"
+			  "\x24\xf4\xe5\x2c\xd4\x7a\x24\x50"
+			  "\x8f\xa2\x71\xc9\x92\x20\xcd\xcf"
+			  "\xda\x40\xbe\xf6\xfe\x1a\xca\xc7"
+			  "\x4a\x80\x45\x55\xcb\xdd\xb7\x01"
+			  "\xb0\x8d\xcb\xd2\xae\xbd\xa4\xd0"
+			  "\x5c\x10\x05\x66\x7b\xd4\xff\xd9"
+			  "\xc4\x23\x9d\x8d\x6b\x24\xf8\x3f"
+			  "\x73\x4d\x5c\x2b\x33\x4c\x5e\x63"
+			  "\x74\x6d\x03\xa1\x7a\x35\x65\x17"
+			  "\x38\x7f\x3b\xc1\x69\xcf\x61\x34"
+			  "\x30\x21\xaf\x97\x47\x12\x3f\xa1"
+			  "\xa7\x50\xc5\x87\xfb\x3f\x70\x32"
+			  "\x86\x17\x5f\x25\xe4\x74\xc6\xd0"
+			  "\x9b\x39\xe6\xe1\x5a\xec\x8f\x40"
+			  "\xce\xcc\x37\x3b\xd8\x72\x1c\x31"
+			  "\x75\xa4\xa6\x89\x8c\xdd\xd6\xd2"
+			  "\x32\x3d\xe8\xc3\x54\xab\x1f\x35"
+			  "\x52\xb4\x94\x81\xb0\x37\x3a\x03"
+			  "\xbb\xb1\x99\x30\xa5\xf8\x21\xcd"
+			  "\x93\x5d\xa7\x13\xed\xc7\x49\x09"
+			  "\x70\xda\x08\x39\xaa\x15\x9e\x45"
+			  "\x35\x2b\x0f\x5c\x8c\x8b\xc9"
+			  "\xa8\xb8\x9f\xfd\x37\x36\x31\x7e"
+			  "\x34\x4f\xc1\xc0\xca\x8a\x22\xfd",
+		.clen	= 735,
 	}
 };
 
@@ -14947,6 +15586,282 @@ static const struct aead_testvec sm4_ccm_tv_template[] = {
 			  "\x16\x84\x2D\x4F\xA1\x86\xF5\x6A"
 			  "\xB3\x32\x56\x97\x1F\xA1\x10\xF4",
 		.clen	= 80,
+	}, { /* Generated from AES-CCM test vectors */
+		.key	= "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf",
+		.klen	= 16,
+		.iv	= "\x01\x00\x00\x00\x03\x02\x01\x00"
+			  "\xa0\xa1\xa2\xa3\xa4\xa5\x00\x00",
+		.assoc	= "\x00\x01\x02\x03\x04\x05\x06\x07",
+		.alen	= 8,
+		.ptext	= "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e",
+		.plen	= 23,
+		.ctext	= "\x7b\xff\x4a\x15\xf5\x73\xce\x82"
+			  "\x6e\xc2\x31\x1d\xe2\x53\x02\xac"
+			  "\xa4\x48\xf9\xe4\xf5\x1f\x81\x70"
+			  "\x18\xbc\xb6\x84\x01\xb8\xae",
+		.clen	= 31,
+	}, {
+		.key	= "\xf4\x6b\xc2\x75\x62\xfe\xb4\xe1"
+			  "\x53\x14\x73\x66\x8d\x88\xf6\x80",
+		.klen	= 16,
+		.iv	= "\x03\xa0\x20\x35\x26\xf2\x21\x8d"
+			  "\x50\x20\xda\xe2\x00\x00\x00\x00",
+		.assoc	= "\x5b\x9e\x13\x67\x02\x5e\xef\xc1"
+			  "\x6c\xf9\xd7\x1e\x52\x8f\x7a\x47"
+			  "\xe9\xd4\xcf\x20\x14\x6e\xf0\x2d"
+			  "\xd8\x9e\x2b\x56\x10\x23\x56\xe7",
+		.alen	= 32,
+		.ctext	= "\x23\x58\xce\xdc\x40\xb1\xcd\x92"
+			  "\x47\x96\x59\xfc\x8a\x26\x4f\xcf",
+		.clen	= 16,
+	}, {
+		.key	= "\xab\x2f\x8a\x74\xb7\x1c\xd2\xb1"
+			  "\xff\x80\x2e\x48\x7d\x82\xf8\xb9",
+		.klen	= 16,
+		.iv	= "\x03\xaf\x94\x87\x78\x35\x82\x81"
+			  "\x7f\x88\x94\x68\x00\x00\x00\x00",
+		.alen	= 0,
+		.ptext	= "\x00",
+		.plen	= 0,
+		.ctext	= "\x72\x7e\xf5\xd6\x39\x7a\x2b\x43",
+		.clen	= 8,
+	}, {
+		.key	= "\x39\xbb\xa7\xbe\x59\x97\x9e\x73"
+			  "\xa4\x48\x93\x39\x26\x71\x4a\xc6",
+		.klen	= 16,
+		.iv	= "\x03\xee\x49\x83\xe9\xa9\xff\xe9"
+			  "\x57\xba\xfd\x9e\x00\x00\x00\x00",
+		.assoc	= "\x44\xa6\x2c\x05\xe9\xe1\x43\xb1"
+			  "\x58\x7c\xf2\x5c\x6d\x39\x0a\x64"
+			  "\xa4\xf0\x13\x05\xd1\x77\x99\x67"
+			  "\x11\xc4\xc6\xdb\x00\x56\x36\x61",
+		.alen	= 32,
+		.ptext	= "\x00",
+		.plen	= 0,
+		.ctext	= "\xb0\x9d\xc6\xfb\x7d\xb5\xa1\x0e",
+		.clen	= 8,
+	}, {
+		.key	= "\x58\x5d\xa0\x96\x65\x1a\x04\xd7"
+			  "\x0d\x1a\x53\x3b\xb5\xe3\xf8\x8b",
+		.klen	= 16,
+		.iv	= "\x03\xcf\x76\x3f\xd9\x95\x75\x8f"
+			  "\x44\x89\x40\x7b\x00\x00\x00\x00",
+		.assoc	= "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+			  "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+			  "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+			  "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe",
+		.alen	= 32,
+		.ptext	= "\xc2\x54\xc8\xde\x78\x87\x77\x40"
+			  "\x49\x71\xe4\xb7\xe7\xcb\x76\x61"
+			  "\x0a\x41\xb9\xe9\xc0\x76\x54\xab"
+			  "\x04\x49\x3b\x19\x93\x57\x25\x5d",
+		.plen	= 32,
+		.ctext	= "\xc9\xae\xef\x1d\xf3\x2c\xd3\x38"
+			  "\xc9\x7f\x7e\x28\xe8\xaa\xb3\x60"
+			  "\x49\xdc\x66\xca\x7b\x3d\xe0\x3c"
+			  "\xcb\x45\x9c\x1b\xb2\xbe\x07\x90"
+			  "\x87\xa6\x6b\x89\x0d\x0f\x90\xaa"
+			  "\x7d\xf6\x5a\x9a\x68\x2b\x81\x92",
+		.clen	= 48,
+	}, {
+		.key	= "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+			  "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+		.klen	= 16,
+		.iv	= "\x02\xff\xff\xff\xff\x00\x00\xff"
+			  "\xff\xff\x00\xff\xff\x00\x00\x00",
+		.assoc	= "\x8f\x86\x6c\x4d\x1d\xc5\x39\x88"
+			  "\xc8\xf3\x5c\x52\x10\x63\x6f\x2b"
+			  "\x8a\x2a\xc5\x6f\x30\x23\x58\x7b"
+			  "\xfb\x36\x03\x11\xb4\xd9\xf2\xfe"
+			  "\xc8\xf3\x5c\x52\x10\x63",
+		.alen	= 38,
+		.ptext	= "\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+			  "\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+			  "\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+			  "\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+			  "\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+			  "\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+			  "\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+			  "\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+			  "\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+			  "\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+			  "\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+			  "\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+			  "\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+			  "\x35\x23\xf4\x20\x41\xd4\xad\x82"
+			  "\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+			  "\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+			  "\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+			  "\xad\x49\x3a\xae\x98\xce\xa6\x66"
+			  "\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+			  "\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+			  "\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+			  "\x57\xcc\x89\x09\x75\x9b\x78\x70"
+			  "\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+			  "\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+			  "\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+			  "\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+			  "\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+			  "\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+			  "\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+			  "\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+			  "\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+			  "\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+			  "\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+			  "\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+			  "\x02\x66\x49\xca\x7c\x91\x05\xf2"
+			  "\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+			  "\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+			  "\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+			  "\x45\xea\x78\x73\xd9\xb7\x39\x11"
+			  "\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+			  "\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+			  "\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+			  "\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+			  "\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+			  "\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+			  "\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+			  "\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+			  "\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+			  "\x03\x25\x3c\x8d\x48\x58\x71\x34"
+			  "\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+			  "\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+			  "\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+			  "\x87\x79\x60\x38\x46\xb4\x25\x57"
+			  "\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+			  "\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+			  "\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+			  "\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+			  "\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+			  "\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+			  "\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+			  "\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+			  "\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+			  "\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+			  "\x0b\x63\xde\x87\x42\x79\x8a\x68"
+			  "\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+			  "\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+			  "\xe9\x83\x84\xcb\x28\x69\x09\x69"
+			  "\xce\x99\x46\x00\x54\xcb\xd8\x38"
+			  "\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+			  "\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+			  "\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+			  "\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+			  "\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+			  "\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+			  "\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+			  "\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+			  "\x78\xc6\x91\x22\x40\x91\x80\xbe"
+			  "\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+			  "\x67\x10\xa4\x83\x98\x79\x23\xe7"
+			  "\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+			  "\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+			  "\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+			  "\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+			  "\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+			  "\x3f\x73\x09\xe2\x45\x56\x31\x51"
+			  "\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+			  "\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+			  "\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+			  "\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+			  "\xa4\x78\xdb\x74\x3d\x8b\xb5",
+		.plen	= 719,
+		.ctext	= "\xc5\x50\x85\x02\x72\xa8\xb3\x62"
+			  "\xf9\xcd\x77\x7b\x43\xa5\x04\x70"
+			  "\x68\x40\x57\x21\x1c\xfe\xef\x05"
+			  "\x4d\xb8\x44\xba\x59\xea\x62\x32"
+			  "\xcb\x6b\x6a\x39\x9b\xf3\xe5\xa4"
+			  "\x36\x38\xde\x7d\xcf\xb6\xcd\xe3"
+			  "\x89\xbf\x37\xc9\x96\x3c\x70\x10"
+			  "\x92\x47\xcc\xac\x6f\xf8\x55\x9a"
+			  "\x26\x43\x34\xb4\x92\x7d\x68\xfc"
+			  "\x60\x37\x74\x2a\x55\xba\xc7\xd7"
+			  "\x98\x69\xb7\xcf\x42\xfd\xb2\x10"
+			  "\xa0\x59\xe1\x2c\x73\x66\x12\x97"
+			  "\x85\x8b\x28\xcc\x29\x02\x15\x89"
+			  "\x23\xd3\x32\x92\x87\x57\x09\x13"
+			  "\x04\x7e\x8b\x6c\x3a\xc1\x4e\x6c"
+			  "\xe1\x9f\xc8\xcc\x47\x9c\xd8\x10"
+			  "\xf4\xb7\x5c\x30\x7a\x8b\x0f\x01"
+			  "\x52\x38\x02\x92\x99\xac\x03\x90"
+			  "\x18\x32\x2d\x21\x6a\x0a\x2a\xe7"
+			  "\xc2\xcc\x15\x84\x4e\x2b\x0b\x3a"
+			  "\x4c\xdc\xb0\x6b\x10\xd1\x27\x10"
+			  "\xf0\x4a\x5c\x43\xa0\x34\x34\x59"
+			  "\x47\x43\x48\xcb\x69\xa7\xff\x52"
+			  "\xb8\xca\x23\x09\x07\xd7\xc5\xe4"
+			  "\x2a\x4f\x99\xd5\x83\x36\x2a\x2d"
+			  "\x59\xd0\xca\xb0\xfa\x40\x8c\xab"
+			  "\xdf\x69\x08\xd9\x79\x1d\xde\xa8"
+			  "\x0b\x34\x74\x4d\xf5\xa0\x4c\x81"
+			  "\x7f\x93\x06\x40\x24\xfe\x7d\xcd"
+			  "\xe4\xfe\xf8\xf8\x30\xce\xd0\x5d"
+			  "\x70\xfd\x0d\x5a\x78\x85\x74\x2d"
+			  "\xe4\xb5\x40\x18\x99\x11\xe4\x6a"
+			  "\xdf\xfa\x4f\x25\x2c\xde\x15\xb7"
+			  "\x12\xd8\xc6\x90\x0d\x0f\xc9\xfb"
+			  "\x21\xf1\xed\xfe\x98\xe1\x03\xe2"
+			  "\x5c\xef\xb6\xc7\x87\x77\x0e\xcd"
+			  "\xff\x78\x94\xc9\xbe\xd3\x47\xf7"
+			  "\x8d\x37\x48\x01\x42\xe2\x17\x96"
+			  "\xfc\xc0\xcb\x7b\x7b\x57\xaf\x3b"
+			  "\xc9\xd0\x94\xce\x5e\x1b\xa9\x47"
+			  "\x02\x4d\x74\xcc\x45\x1d\xd3\x2d"
+			  "\x5f\x4f\x7f\xf2\x4b\xf9\x59\xee"
+			  "\x9e\x9e\xb9\x95\x29\x19\xd1\x5f"
+			  "\x72\xab\x8d\xf1\x28\xd1\x1c\xae"
+			  "\xc2\xba\xf7\x22\x84\x2c\x83\x51"
+			  "\x03\xad\xa3\xef\x81\xa7\xdc\xf1"
+			  "\x44\x51\x50\x96\x70\xd1\xe5\x47"
+			  "\x57\xf9\x30\x90\xe4\xbf\xfc\x75"
+			  "\x14\xaa\x4d\xb7\xb1\xe7\x79\x33"
+			  "\x43\xc2\x5c\xc1\xbc\x09\x92\x0f"
+			  "\xa7\xaf\x68\x51\x51\xec\x0b\xc3"
+			  "\x3d\x2b\x94\x30\x45\x29\x1b\x9e"
+			  "\x70\x56\xf8\xd6\x67\x2d\x39\x3b"
+			  "\x3c\xd2\xd0\xd3\xdc\x7d\x84\xe9"
+			  "\x06\x31\x98\xa6\x5c\xbf\x10\x58"
+			  "\xce\xbb\xa7\xe1\x65\x7e\x51\x87"
+			  "\x70\x46\xb4\x7f\xf9\xec\x92\x1c"
+			  "\x9b\x24\x49\xc1\x04\xbe\x1c\x5f"
+			  "\xcc\xb3\x33\x8c\xad\xe7\xdc\x32"
+			  "\x54\xa2\x0d\x83\x0f\x3c\x12\x5d"
+			  "\x71\xe3\x9c\xae\x71\xa3\x2a\x10"
+			  "\xc5\x91\xb4\x73\x96\x60\xdb\x5d"
+			  "\x1f\xd5\x9a\xd2\x69\xc3\xd7\x4b"
+			  "\xa2\x66\x81\x96\x4a\xaa\x02\xd6"
+			  "\xd5\x44\x9b\x42\x3a\x15\x5f\xe7"
+			  "\x4d\x7c\xf6\x71\x4a\xea\xe8\x43"
+			  "\xd7\x68\xe4\xbc\x05\x87\x49\x05"
+			  "\x3b\x47\xb2\x6d\x5f\xd1\x11\xa6"
+			  "\x58\xd4\xa2\x45\xec\xb5\x54\x55"
+			  "\xd3\xd6\xd2\x6a\x8b\x21\x9e\x2c"
+			  "\xf1\x27\x4b\x5b\xe3\xff\xe0\xfd"
+			  "\x4b\xf1\xe7\xe2\x84\xf2\x17\x37"
+			  "\x11\x68\xc4\x92\x4b\x6b\xef\x8e"
+			  "\x75\xf5\xc2\x7d\x5c\xe9\x7c\xfc"
+			  "\x2b\x00\x33\x0e\x7d\x69\xd8\xd4"
+			  "\x9b\xa8\x38\x54\x7e\x6d\x23\x51"
+			  "\x2c\xd6\xc4\x58\x23\x1c\x22\x2a"
+			  "\x59\xc5\x9b\xec\x9d\xbf\x03\x0f"
+			  "\xb3\xdd\xba\x02\x22\xa0\x34\x37"
+			  "\x19\x56\xc2\x5b\x32\x1d\x1e\x66"
+			  "\x68\xf4\x47\x05\x04\x18\xa7\x28"
+			  "\x80\xf2\xc7\x99\xed\x1e\x72\x48"
+			  "\x8f\x97\x5d\xb3\x74\x42\xfd\x0c"
+			  "\x0f\x5f\x29\x0c\xf1\x35\x22\x90"
+			  "\xd6\x7c\xb8\xa3\x2a\x89\x38\x71"
+			  "\xe9\x7a\x55\x3c\x3b\xf2\x6e\x1a"
+			  "\x22\x8f\x07\x81\xc1\xe1\xf1\x76"
+			  "\x2a\x75\xab\x86\xc4\xcc\x52\x59"
+			  "\x83\x19\x5e\xb3\x53\xe2\x81\xdf"
+			  "\xe6\x15\xb3\xba\x0c\x0e\xba"
+			  "\xa9\x2c\xed\x51\xd5\x06\xc8\xc6"
+			  "\x4b\x9f\x5d\x1b\x61\x31\xad\xf4",
+		.clen	= 735,
 	}
 };
 
@@ -15030,6 +15945,68 @@ static const struct hash_testvec sm4_cmac128_tv_template[] = {
 	}
 };
 
+static const struct hash_testvec sm4_xcbc128_tv_template[] = {
+	{ /* Generated from AES-XCBC128 test vectors */
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= zeroed_string,
+		.digest 	= "\xa9\x9a\x5c\x44\xe2\x34\xee\x2c"
+				  "\x9b\xe4\x9d\xca\x64\xb0\xa5\xc4",
+		.psize		= 0,
+		.ksize		= 16,
+	}, {
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= "\x00\x01\x02",
+		.digest		= "\x17\x27\x62\xf3\x8b\x88\x1d\xc0"
+				  "\x97\x35\x9c\x3e\x9f\x27\xb7\x83",
+		.psize		= 3,
+		.ksize		= 16,
+	} , {
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.digest 	= "\xda\x45\xd1\xac\xec\x4d\xab\x46"
+				  "\xdd\x59\xe0\x44\xff\x59\xd5\xfc",
+		.psize		= 16,
+		.ksize		= 16,
+	}, {
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+				  "\x10\x11\x12\x13",
+		.digest 	= "\xbe\x24\x5d\x81\x8c\x8a\x10\xa4"
+				  "\x8e\xc2\x16\xfa\xa4\x83\xc9\x2a",
+		.psize		= 20,
+		.ksize		= 16,
+	}, {
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+				  "\x10\x11\x12\x13\x14\x15\x16\x17"
+				  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.digest 	= "\x91\x82\x31\x56\xd5\x77\xa4\xc5"
+				  "\x88\x2d\xce\x3a\x87\x5e\xbd\xba",
+		.psize		= 32,
+		.ksize		= 16,
+	}, {
+		.key		= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.plaintext 	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+				  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+				  "\x10\x11\x12\x13\x14\x15\x16\x17"
+				  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+				  "\x20\x21",
+		.digest 	= "\x2a\xae\xa5\x24\x0c\x12\x9f\x5f"
+				  "\x55\xfb\xae\x35\x13\x0d\x22\x2d",
+		.psize		= 34,
+		.ksize		= 16,
+	}
+};
+
 /* Cast6 test vectors from RFC 2612 */
 static const struct cipher_testvec cast6_tv_template[] = {
 	{
-- 
GitLab


From 3c3836378dd578a0d510420c0f67818e8dc49f0e Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:57 +0800
Subject: [PATCH 0902/4122] crypto: tcrypt - add SM4 cts-cbc/xts/xcbc test

Added CTS-CBC/XTS/XCBC tests for SM4 algorithms, as well as
corresponding speed tests, this is to test performance-optimized
implementations of these modes.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index b096ae901aa80..0f101897e90fb 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1710,6 +1710,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret = min(ret, tcrypt_test("gcm(aria)"));
 		break;
 
+	case 59:
+		ret = min(ret, tcrypt_test("cts(cbc(sm4))"));
+		break;
+
 	case 100:
 		ret = min(ret, tcrypt_test("hmac(md5)"));
 		break;
@@ -1810,6 +1814,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret = min(ret, tcrypt_test("cmac(sm4)"));
 		break;
 
+	case 160:
+		ret = min(ret, tcrypt_test("xcbc(sm4)"));
+		break;
+
 	case 181:
 		ret = min(ret, tcrypt_test("authenc(hmac(sha1),cbc(des))"));
 		break;
@@ -1845,6 +1853,7 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret = min(ret, tcrypt_test("cbc(sm4)"));
 		ret = min(ret, tcrypt_test("cfb(sm4)"));
 		ret = min(ret, tcrypt_test("ctr(sm4)"));
+		ret = min(ret, tcrypt_test("xts(sm4)"));
 		break;
 	case 192:
 		ret = min(ret, tcrypt_test("ecb(aria)"));
@@ -2108,6 +2117,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 				speed_template_16);
 		test_cipher_speed("cbc(sm4)", DECRYPT, sec, NULL, 0,
 				speed_template_16);
+		test_cipher_speed("cts(cbc(sm4))", ENCRYPT, sec, NULL, 0,
+				speed_template_16);
+		test_cipher_speed("cts(cbc(sm4))", DECRYPT, sec, NULL, 0,
+				speed_template_16);
 		test_cipher_speed("cfb(sm4)", ENCRYPT, sec, NULL, 0,
 				speed_template_16);
 		test_cipher_speed("cfb(sm4)", DECRYPT, sec, NULL, 0,
@@ -2116,6 +2129,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 				speed_template_16);
 		test_cipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
 				speed_template_16);
+		test_cipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
+				speed_template_32);
+		test_cipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
+				speed_template_32);
 		break;
 
 	case 219:
@@ -2629,6 +2646,10 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 				speed_template_16);
 		test_acipher_speed("ctr(sm4)", DECRYPT, sec, NULL, 0,
 				speed_template_16);
+		test_acipher_speed("xts(sm4)", ENCRYPT, sec, NULL, 0,
+				speed_template_32);
+		test_acipher_speed("xts(sm4)", DECRYPT, sec, NULL, 0,
+				speed_template_32);
 		break;
 
 	case 519:
-- 
GitLab


From ce41fefd2443c25166458f24621b53a28fff989f Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:58 +0800
Subject: [PATCH 0903/4122] crypto: arm64/sm4 - refactor and simplify CE
 implementation

This patch does not add new features, but only refactors and simplifies the
implementation of the Crypto Extension acceleration of the SM4 algorithm:

Extract the macro optimized by SM4 Crypto Extension for reuse in the
subsequent optimization of CCM/GCM modes.

Encryption in CBC and CFB modes processes four blocks at a time instead of
one, allowing the ld1 instruction to load 64 bytes of data at a time, which
will reduces unnecessary memory accesses.

CBC/CFB/CTR makes full use of free registers to reduce redundant memory
accesses, and rearranges some instructions to improve out-of-order execution
capabilities.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-ce-asm.h  | 209 +++++++++++
 arch/arm64/crypto/sm4-ce-core.S | 646 ++++++++++++++------------------
 arch/arm64/crypto/sm4-ce-glue.c |  64 ++--
 3 files changed, 519 insertions(+), 400 deletions(-)
 create mode 100644 arch/arm64/crypto/sm4-ce-asm.h

diff --git a/arch/arm64/crypto/sm4-ce-asm.h b/arch/arm64/crypto/sm4-ce-asm.h
new file mode 100644
index 0000000000000..7ea98e42e779c
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-asm.h
@@ -0,0 +1,209 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 helper macros for Crypto Extensions
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#define SM4_PREPARE(ptr)					\
+	ld1		{v24.16b-v27.16b}, [ptr], #64;		\
+	ld1		{v28.16b-v31.16b}, [ptr];
+
+#define SM4_CRYPT_BLK_BE(b0)					\
+	sm4e		b0.4s, v24.4s;				\
+	sm4e		b0.4s, v25.4s;				\
+	sm4e		b0.4s, v26.4s;				\
+	sm4e		b0.4s, v27.4s;				\
+	sm4e		b0.4s, v28.4s;				\
+	sm4e		b0.4s, v29.4s;				\
+	sm4e		b0.4s, v30.4s;				\
+	sm4e		b0.4s, v31.4s;				\
+	rev64		b0.4s, b0.4s;				\
+	ext		b0.16b, b0.16b, b0.16b, #8;		\
+	rev32		b0.16b, b0.16b;
+
+#define SM4_CRYPT_BLK(b0)					\
+	rev32		b0.16b, b0.16b;				\
+	SM4_CRYPT_BLK_BE(b0);
+
+#define SM4_CRYPT_BLK2_BE(b0, b1)				\
+	sm4e		b0.4s, v24.4s;				\
+	sm4e		b1.4s, v24.4s;				\
+	sm4e		b0.4s, v25.4s;				\
+	sm4e		b1.4s, v25.4s;				\
+	sm4e		b0.4s, v26.4s;				\
+	sm4e		b1.4s, v26.4s;				\
+	sm4e		b0.4s, v27.4s;				\
+	sm4e		b1.4s, v27.4s;				\
+	sm4e		b0.4s, v28.4s;				\
+	sm4e		b1.4s, v28.4s;				\
+	sm4e		b0.4s, v29.4s;				\
+	sm4e		b1.4s, v29.4s;				\
+	sm4e		b0.4s, v30.4s;				\
+	sm4e		b1.4s, v30.4s;				\
+	sm4e		b0.4s, v31.4s;				\
+	sm4e		b1.4s, v31.4s;				\
+	rev64		b0.4s, b0.4s;				\
+	rev64		b1.4s, b1.4s;				\
+	ext		b0.16b, b0.16b, b0.16b, #8;		\
+	ext		b1.16b, b1.16b, b1.16b, #8;		\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+
+#define SM4_CRYPT_BLK2(b0, b1)					\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+	SM4_CRYPT_BLK2_BE(b0, b1);
+
+#define SM4_CRYPT_BLK4_BE(b0, b1, b2, b3)			\
+	sm4e		b0.4s, v24.4s;				\
+	sm4e		b1.4s, v24.4s;				\
+	sm4e		b2.4s, v24.4s;				\
+	sm4e		b3.4s, v24.4s;				\
+	sm4e		b0.4s, v25.4s;				\
+	sm4e		b1.4s, v25.4s;				\
+	sm4e		b2.4s, v25.4s;				\
+	sm4e		b3.4s, v25.4s;				\
+	sm4e		b0.4s, v26.4s;				\
+	sm4e		b1.4s, v26.4s;				\
+	sm4e		b2.4s, v26.4s;				\
+	sm4e		b3.4s, v26.4s;				\
+	sm4e		b0.4s, v27.4s;				\
+	sm4e		b1.4s, v27.4s;				\
+	sm4e		b2.4s, v27.4s;				\
+	sm4e		b3.4s, v27.4s;				\
+	sm4e		b0.4s, v28.4s;				\
+	sm4e		b1.4s, v28.4s;				\
+	sm4e		b2.4s, v28.4s;				\
+	sm4e		b3.4s, v28.4s;				\
+	sm4e		b0.4s, v29.4s;				\
+	sm4e		b1.4s, v29.4s;				\
+	sm4e		b2.4s, v29.4s;				\
+	sm4e		b3.4s, v29.4s;				\
+	sm4e		b0.4s, v30.4s;				\
+	sm4e		b1.4s, v30.4s;				\
+	sm4e		b2.4s, v30.4s;				\
+	sm4e		b3.4s, v30.4s;				\
+	sm4e		b0.4s, v31.4s;				\
+	sm4e		b1.4s, v31.4s;				\
+	sm4e		b2.4s, v31.4s;				\
+	sm4e		b3.4s, v31.4s;				\
+	rev64		b0.4s, b0.4s;				\
+	rev64		b1.4s, b1.4s;				\
+	rev64		b2.4s, b2.4s;				\
+	rev64		b3.4s, b3.4s;				\
+	ext		b0.16b, b0.16b, b0.16b, #8;		\
+	ext		b1.16b, b1.16b, b1.16b, #8;		\
+	ext		b2.16b, b2.16b, b2.16b, #8;		\
+	ext		b3.16b, b3.16b, b3.16b, #8;		\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+	rev32		b2.16b, b2.16b;				\
+	rev32		b3.16b, b3.16b;
+
+#define SM4_CRYPT_BLK4(b0, b1, b2, b3)				\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+	rev32		b2.16b, b2.16b;				\
+	rev32		b3.16b, b3.16b;				\
+	SM4_CRYPT_BLK4_BE(b0, b1, b2, b3);
+
+#define SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7)	\
+	sm4e		b0.4s, v24.4s;				\
+	sm4e		b1.4s, v24.4s;				\
+	sm4e		b2.4s, v24.4s;				\
+	sm4e		b3.4s, v24.4s;				\
+	sm4e		b4.4s, v24.4s;				\
+	sm4e		b5.4s, v24.4s;				\
+	sm4e		b6.4s, v24.4s;				\
+	sm4e		b7.4s, v24.4s;				\
+	sm4e		b0.4s, v25.4s;				\
+	sm4e		b1.4s, v25.4s;				\
+	sm4e		b2.4s, v25.4s;				\
+	sm4e		b3.4s, v25.4s;				\
+	sm4e		b4.4s, v25.4s;				\
+	sm4e		b5.4s, v25.4s;				\
+	sm4e		b6.4s, v25.4s;				\
+	sm4e		b7.4s, v25.4s;				\
+	sm4e		b0.4s, v26.4s;				\
+	sm4e		b1.4s, v26.4s;				\
+	sm4e		b2.4s, v26.4s;				\
+	sm4e		b3.4s, v26.4s;				\
+	sm4e		b4.4s, v26.4s;				\
+	sm4e		b5.4s, v26.4s;				\
+	sm4e		b6.4s, v26.4s;				\
+	sm4e		b7.4s, v26.4s;				\
+	sm4e		b0.4s, v27.4s;				\
+	sm4e		b1.4s, v27.4s;				\
+	sm4e		b2.4s, v27.4s;				\
+	sm4e		b3.4s, v27.4s;				\
+	sm4e		b4.4s, v27.4s;				\
+	sm4e		b5.4s, v27.4s;				\
+	sm4e		b6.4s, v27.4s;				\
+	sm4e		b7.4s, v27.4s;				\
+	sm4e		b0.4s, v28.4s;				\
+	sm4e		b1.4s, v28.4s;				\
+	sm4e		b2.4s, v28.4s;				\
+	sm4e		b3.4s, v28.4s;				\
+	sm4e		b4.4s, v28.4s;				\
+	sm4e		b5.4s, v28.4s;				\
+	sm4e		b6.4s, v28.4s;				\
+	sm4e		b7.4s, v28.4s;				\
+	sm4e		b0.4s, v29.4s;				\
+	sm4e		b1.4s, v29.4s;				\
+	sm4e		b2.4s, v29.4s;				\
+	sm4e		b3.4s, v29.4s;				\
+	sm4e		b4.4s, v29.4s;				\
+	sm4e		b5.4s, v29.4s;				\
+	sm4e		b6.4s, v29.4s;				\
+	sm4e		b7.4s, v29.4s;				\
+	sm4e		b0.4s, v30.4s;				\
+	sm4e		b1.4s, v30.4s;				\
+	sm4e		b2.4s, v30.4s;				\
+	sm4e		b3.4s, v30.4s;				\
+	sm4e		b4.4s, v30.4s;				\
+	sm4e		b5.4s, v30.4s;				\
+	sm4e		b6.4s, v30.4s;				\
+	sm4e		b7.4s, v30.4s;				\
+	sm4e		b0.4s, v31.4s;				\
+	sm4e		b1.4s, v31.4s;				\
+	sm4e		b2.4s, v31.4s;				\
+	sm4e		b3.4s, v31.4s;				\
+	sm4e		b4.4s, v31.4s;				\
+	sm4e		b5.4s, v31.4s;				\
+	sm4e		b6.4s, v31.4s;				\
+	sm4e		b7.4s, v31.4s;				\
+	rev64		b0.4s, b0.4s;				\
+	rev64		b1.4s, b1.4s;				\
+	rev64		b2.4s, b2.4s;				\
+	rev64		b3.4s, b3.4s;				\
+	rev64		b4.4s, b4.4s;				\
+	rev64		b5.4s, b5.4s;				\
+	rev64		b6.4s, b6.4s;				\
+	rev64		b7.4s, b7.4s;				\
+	ext		b0.16b, b0.16b, b0.16b, #8;		\
+	ext		b1.16b, b1.16b, b1.16b, #8;		\
+	ext		b2.16b, b2.16b, b2.16b, #8;		\
+	ext		b3.16b, b3.16b, b3.16b, #8;		\
+	ext		b4.16b, b4.16b, b4.16b, #8;		\
+	ext		b5.16b, b5.16b, b5.16b, #8;		\
+	ext		b6.16b, b6.16b, b6.16b, #8;		\
+	ext		b7.16b, b7.16b, b7.16b, #8;		\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+	rev32		b2.16b, b2.16b;				\
+	rev32		b3.16b, b3.16b;				\
+	rev32		b4.16b, b4.16b;				\
+	rev32		b5.16b, b5.16b;				\
+	rev32		b6.16b, b6.16b;				\
+	rev32		b7.16b, b7.16b;
+
+#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7)		\
+	rev32		b0.16b, b0.16b;				\
+	rev32		b1.16b, b1.16b;				\
+	rev32		b2.16b, b2.16b;				\
+	rev32		b3.16b, b3.16b;				\
+	rev32		b4.16b, b4.16b;				\
+	rev32		b5.16b, b5.16b;				\
+	rev32		b6.16b, b6.16b;				\
+	rev32		b7.16b, b7.16b;				\
+	SM4_CRYPT_BLK8_BE(b0, b1, b2, b3, b4, b5, b6, b7);
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 934e0f0932799..41fc745a85284 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -10,10 +10,12 @@
 
 #include <linux/linkage.h>
 #include <asm/assembler.h>
+#include "sm4-ce-asm.h"
 
 .arch	armv8-a+crypto
 
-.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 16, 20, 24, 25, 26, 27, 28, 29, 30, 31
+.irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, \
+		20, 24, 25, 26, 27, 28, 29, 30, 31
 	.set .Lv\b\().4s, \b
 .endr
 
@@ -34,174 +36,6 @@
 
 #define RIV	v20
 
-/* Helper macros. */
-
-#define PREPARE                                       \
-	ld1		{v24.16b-v27.16b}, [x0], #64; \
-	ld1		{v28.16b-v31.16b}, [x0];
-
-#define SM4_CRYPT_BLK(b0)                           \
-	rev32		b0.16b, b0.16b;             \
-	sm4e		b0.4s, v24.4s;              \
-	sm4e		b0.4s, v25.4s;              \
-	sm4e		b0.4s, v26.4s;              \
-	sm4e		b0.4s, v27.4s;              \
-	sm4e		b0.4s, v28.4s;              \
-	sm4e		b0.4s, v29.4s;              \
-	sm4e		b0.4s, v30.4s;              \
-	sm4e		b0.4s, v31.4s;              \
-	rev64		b0.4s, b0.4s;               \
-	ext		b0.16b, b0.16b, b0.16b, #8; \
-	rev32		b0.16b, b0.16b;
-
-#define SM4_CRYPT_BLK4(b0, b1, b2, b3)              \
-	rev32		b0.16b, b0.16b;             \
-	rev32		b1.16b, b1.16b;             \
-	rev32		b2.16b, b2.16b;             \
-	rev32		b3.16b, b3.16b;             \
-	sm4e		b0.4s, v24.4s;              \
-	sm4e		b1.4s, v24.4s;              \
-	sm4e		b2.4s, v24.4s;              \
-	sm4e		b3.4s, v24.4s;              \
-	sm4e		b0.4s, v25.4s;              \
-	sm4e		b1.4s, v25.4s;              \
-	sm4e		b2.4s, v25.4s;              \
-	sm4e		b3.4s, v25.4s;              \
-	sm4e		b0.4s, v26.4s;              \
-	sm4e		b1.4s, v26.4s;              \
-	sm4e		b2.4s, v26.4s;              \
-	sm4e		b3.4s, v26.4s;              \
-	sm4e		b0.4s, v27.4s;              \
-	sm4e		b1.4s, v27.4s;              \
-	sm4e		b2.4s, v27.4s;              \
-	sm4e		b3.4s, v27.4s;              \
-	sm4e		b0.4s, v28.4s;              \
-	sm4e		b1.4s, v28.4s;              \
-	sm4e		b2.4s, v28.4s;              \
-	sm4e		b3.4s, v28.4s;              \
-	sm4e		b0.4s, v29.4s;              \
-	sm4e		b1.4s, v29.4s;              \
-	sm4e		b2.4s, v29.4s;              \
-	sm4e		b3.4s, v29.4s;              \
-	sm4e		b0.4s, v30.4s;              \
-	sm4e		b1.4s, v30.4s;              \
-	sm4e		b2.4s, v30.4s;              \
-	sm4e		b3.4s, v30.4s;              \
-	sm4e		b0.4s, v31.4s;              \
-	sm4e		b1.4s, v31.4s;              \
-	sm4e		b2.4s, v31.4s;              \
-	sm4e		b3.4s, v31.4s;              \
-	rev64		b0.4s, b0.4s;               \
-	rev64		b1.4s, b1.4s;               \
-	rev64		b2.4s, b2.4s;               \
-	rev64		b3.4s, b3.4s;               \
-	ext		b0.16b, b0.16b, b0.16b, #8; \
-	ext		b1.16b, b1.16b, b1.16b, #8; \
-	ext		b2.16b, b2.16b, b2.16b, #8; \
-	ext		b3.16b, b3.16b, b3.16b, #8; \
-	rev32		b0.16b, b0.16b;             \
-	rev32		b1.16b, b1.16b;             \
-	rev32		b2.16b, b2.16b;             \
-	rev32		b3.16b, b3.16b;
-
-#define SM4_CRYPT_BLK8(b0, b1, b2, b3, b4, b5, b6, b7) \
-	rev32		b0.16b, b0.16b;             \
-	rev32		b1.16b, b1.16b;             \
-	rev32		b2.16b, b2.16b;             \
-	rev32		b3.16b, b3.16b;             \
-	rev32		b4.16b, b4.16b;             \
-	rev32		b5.16b, b5.16b;             \
-	rev32		b6.16b, b6.16b;             \
-	rev32		b7.16b, b7.16b;             \
-	sm4e		b0.4s, v24.4s;              \
-	sm4e		b1.4s, v24.4s;              \
-	sm4e		b2.4s, v24.4s;              \
-	sm4e		b3.4s, v24.4s;              \
-	sm4e		b4.4s, v24.4s;              \
-	sm4e		b5.4s, v24.4s;              \
-	sm4e		b6.4s, v24.4s;              \
-	sm4e		b7.4s, v24.4s;              \
-	sm4e		b0.4s, v25.4s;              \
-	sm4e		b1.4s, v25.4s;              \
-	sm4e		b2.4s, v25.4s;              \
-	sm4e		b3.4s, v25.4s;              \
-	sm4e		b4.4s, v25.4s;              \
-	sm4e		b5.4s, v25.4s;              \
-	sm4e		b6.4s, v25.4s;              \
-	sm4e		b7.4s, v25.4s;              \
-	sm4e		b0.4s, v26.4s;              \
-	sm4e		b1.4s, v26.4s;              \
-	sm4e		b2.4s, v26.4s;              \
-	sm4e		b3.4s, v26.4s;              \
-	sm4e		b4.4s, v26.4s;              \
-	sm4e		b5.4s, v26.4s;              \
-	sm4e		b6.4s, v26.4s;              \
-	sm4e		b7.4s, v26.4s;              \
-	sm4e		b0.4s, v27.4s;              \
-	sm4e		b1.4s, v27.4s;              \
-	sm4e		b2.4s, v27.4s;              \
-	sm4e		b3.4s, v27.4s;              \
-	sm4e		b4.4s, v27.4s;              \
-	sm4e		b5.4s, v27.4s;              \
-	sm4e		b6.4s, v27.4s;              \
-	sm4e		b7.4s, v27.4s;              \
-	sm4e		b0.4s, v28.4s;              \
-	sm4e		b1.4s, v28.4s;              \
-	sm4e		b2.4s, v28.4s;              \
-	sm4e		b3.4s, v28.4s;              \
-	sm4e		b4.4s, v28.4s;              \
-	sm4e		b5.4s, v28.4s;              \
-	sm4e		b6.4s, v28.4s;              \
-	sm4e		b7.4s, v28.4s;              \
-	sm4e		b0.4s, v29.4s;              \
-	sm4e		b1.4s, v29.4s;              \
-	sm4e		b2.4s, v29.4s;              \
-	sm4e		b3.4s, v29.4s;              \
-	sm4e		b4.4s, v29.4s;              \
-	sm4e		b5.4s, v29.4s;              \
-	sm4e		b6.4s, v29.4s;              \
-	sm4e		b7.4s, v29.4s;              \
-	sm4e		b0.4s, v30.4s;              \
-	sm4e		b1.4s, v30.4s;              \
-	sm4e		b2.4s, v30.4s;              \
-	sm4e		b3.4s, v30.4s;              \
-	sm4e		b4.4s, v30.4s;              \
-	sm4e		b5.4s, v30.4s;              \
-	sm4e		b6.4s, v30.4s;              \
-	sm4e		b7.4s, v30.4s;              \
-	sm4e		b0.4s, v31.4s;              \
-	sm4e		b1.4s, v31.4s;              \
-	sm4e		b2.4s, v31.4s;              \
-	sm4e		b3.4s, v31.4s;              \
-	sm4e		b4.4s, v31.4s;              \
-	sm4e		b5.4s, v31.4s;              \
-	sm4e		b6.4s, v31.4s;              \
-	sm4e		b7.4s, v31.4s;              \
-	rev64		b0.4s, b0.4s;               \
-	rev64		b1.4s, b1.4s;               \
-	rev64		b2.4s, b2.4s;               \
-	rev64		b3.4s, b3.4s;               \
-	rev64		b4.4s, b4.4s;               \
-	rev64		b5.4s, b5.4s;               \
-	rev64		b6.4s, b6.4s;               \
-	rev64		b7.4s, b7.4s;               \
-	ext		b0.16b, b0.16b, b0.16b, #8; \
-	ext		b1.16b, b1.16b, b1.16b, #8; \
-	ext		b2.16b, b2.16b, b2.16b, #8; \
-	ext		b3.16b, b3.16b, b3.16b, #8; \
-	ext		b4.16b, b4.16b, b4.16b, #8; \
-	ext		b5.16b, b5.16b, b5.16b, #8; \
-	ext		b6.16b, b6.16b, b6.16b, #8; \
-	ext		b7.16b, b7.16b, b7.16b, #8; \
-	rev32		b0.16b, b0.16b;             \
-	rev32		b1.16b, b1.16b;             \
-	rev32		b2.16b, b2.16b;             \
-	rev32		b3.16b, b3.16b;             \
-	rev32		b4.16b, b4.16b;             \
-	rev32		b5.16b, b5.16b;             \
-	rev32		b6.16b, b6.16b;             \
-	rev32		b7.16b, b7.16b;
-
 
 .align 3
 SYM_FUNC_START(sm4_ce_expand_key)
@@ -268,7 +102,7 @@ SYM_FUNC_START(sm4_ce_crypt_block)
 	 *   x1: dst
 	 *   x2: src
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
 
 	ld1		{v0.16b}, [x2];
 	SM4_CRYPT_BLK(v0);
@@ -285,7 +119,7 @@ SYM_FUNC_START(sm4_ce_crypt)
 	 *   x2: src
 	 *   w3: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
 
 .Lcrypt_loop_blk:
 	sub		w3, w3, #8;
@@ -337,26 +171,50 @@ SYM_FUNC_START(sm4_ce_cbc_enc)
 	 *   x3: iv (big endian, 128 bit)
 	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
+
+	ld1		{RIV.16b}, [x3]
+
+.Lcbc_enc_loop_4x:
+	cmp		w4, #4
+	blt		.Lcbc_enc_loop_1x
+
+	sub		w4, w4, #4
 
-	ld1		{RIV.16b}, [x3];
+	ld1		{v0.16b-v3.16b}, [x2], #64
 
-.Lcbc_enc_loop:
-	sub		w4, w4, #1;
+	eor		v0.16b, v0.16b, RIV.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v1.16b, v1.16b, v0.16b
+	SM4_CRYPT_BLK(v1)
+	eor		v2.16b, v2.16b, v1.16b
+	SM4_CRYPT_BLK(v2)
+	eor		v3.16b, v3.16b, v2.16b
+	SM4_CRYPT_BLK(v3)
 
-	ld1		{RTMP0.16b}, [x2], #16;
-	eor		RIV.16b, RIV.16b, RTMP0.16b;
+	st1		{v0.16b-v3.16b}, [x1], #64
+	mov		RIV.16b, v3.16b
 
-	SM4_CRYPT_BLK(RIV);
+	cbz		w4, .Lcbc_enc_end
+	b		.Lcbc_enc_loop_4x
 
-	st1		{RIV.16b}, [x1], #16;
+.Lcbc_enc_loop_1x:
+	sub		w4, w4, #1
 
-	cbnz		w4, .Lcbc_enc_loop;
+	ld1		{v0.16b}, [x2], #16
 
+	eor		RIV.16b, RIV.16b, v0.16b
+	SM4_CRYPT_BLK(RIV)
+
+	st1		{RIV.16b}, [x1], #16
+
+	cbnz		w4, .Lcbc_enc_loop_1x
+
+.Lcbc_enc_end:
 	/* store new IV */
-	st1		{RIV.16b}, [x3];
+	st1		{RIV.16b}, [x3]
 
-	ret;
+	ret
 SYM_FUNC_END(sm4_ce_cbc_enc)
 
 .align 3
@@ -368,79 +226,93 @@ SYM_FUNC_START(sm4_ce_cbc_dec)
 	 *   x3: iv (big endian, 128 bit)
 	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
 
-	ld1		{RIV.16b}, [x3];
+	ld1		{RIV.16b}, [x3]
 
-.Lcbc_loop_blk:
-	sub		w4, w4, #8;
-	tbnz		w4, #31, .Lcbc_tail8;
+.Lcbc_dec_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lcbc_dec_4x
 
-	ld1		{v0.16b-v3.16b}, [x2], #64;
-	ld1		{v4.16b-v7.16b}, [x2];
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	ld1		{v4.16b-v7.16b}, [x2], #64
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+	rev32		v8.16b, v0.16b
+	rev32		v9.16b, v1.16b
+	rev32		v10.16b, v2.16b
+	rev32		v11.16b, v3.16b
+	rev32		v12.16b, v4.16b
+	rev32		v13.16b, v5.16b
+	rev32		v14.16b, v6.16b
+	rev32		v15.16b, v7.16b
 
-	sub		x2, x2, #64;
-	eor		v0.16b, v0.16b, RIV.16b;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v1.16b, v1.16b, RTMP0.16b;
-	eor		v2.16b, v2.16b, RTMP1.16b;
-	eor		v3.16b, v3.16b, RTMP2.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
 
-	eor		v4.16b, v4.16b, RTMP3.16b;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v5.16b, v5.16b, RTMP0.16b;
-	eor		v6.16b, v6.16b, RTMP1.16b;
-	eor		v7.16b, v7.16b, RTMP2.16b;
+	eor		v8.16b, v8.16b, RIV.16b
+	eor		v9.16b, v9.16b, v0.16b
+	eor		v10.16b, v10.16b, v1.16b
+	eor		v11.16b, v11.16b, v2.16b
+	eor		v12.16b, v12.16b, v3.16b
+	eor		v13.16b, v13.16b, v4.16b
+	eor		v14.16b, v14.16b, v5.16b
+	eor		v15.16b, v15.16b, v6.16b
 
-	mov		RIV.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+	st1		{v8.16b-v11.16b}, [x1], #64
+	st1		{v12.16b-v15.16b}, [x1], #64
 
-	cbz		w4, .Lcbc_end;
-	b		.Lcbc_loop_blk;
+	mov		RIV.16b, v7.16b
 
-.Lcbc_tail8:
-	add		w4, w4, #8;
-	cmp		w4, #4;
-	blt		.Lcbc_tail4;
+	cbz		w4, .Lcbc_dec_end
+	b		.Lcbc_dec_loop_8x
 
-	sub		w4, w4, #4;
+.Lcbc_dec_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lcbc_dec_loop_1x
 
-	ld1		{v0.16b-v3.16b}, [x2];
+	sub		w4, w4, #4
 
-	SM4_CRYPT_BLK4(v0, v1, v2, v3);
+	ld1		{v0.16b-v3.16b}, [x2], #64
 
-	eor		v0.16b, v0.16b, RIV.16b;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v1.16b, v1.16b, RTMP0.16b;
-	eor		v2.16b, v2.16b, RTMP1.16b;
-	eor		v3.16b, v3.16b, RTMP2.16b;
+	rev32		v8.16b, v0.16b
+	rev32		v9.16b, v1.16b
+	rev32		v10.16b, v2.16b
+	rev32		v11.16b, v3.16b
 
-	mov		RIV.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
 
-	cbz		w4, .Lcbc_end;
+	eor		v8.16b, v8.16b, RIV.16b
+	eor		v9.16b, v9.16b, v0.16b
+	eor		v10.16b, v10.16b, v1.16b
+	eor		v11.16b, v11.16b, v2.16b
 
-.Lcbc_tail4:
-	sub		w4, w4, #1;
+	st1		{v8.16b-v11.16b}, [x1], #64
 
-	ld1		{v0.16b}, [x2];
+	mov		RIV.16b, v3.16b
 
-	SM4_CRYPT_BLK(v0);
+	cbz		w4, .Lcbc_dec_end
 
-	eor		v0.16b, v0.16b, RIV.16b;
-	ld1		{RIV.16b}, [x2], #16;
-	st1		{v0.16b}, [x1], #16;
+.Lcbc_dec_loop_1x:
+	sub		w4, w4, #1
+
+	ld1		{v0.16b}, [x2], #16
+
+	rev32		v8.16b, v0.16b
+
+	SM4_CRYPT_BLK_BE(v8)
 
-	cbnz		w4, .Lcbc_tail4;
+	eor		v8.16b, v8.16b, RIV.16b
+	st1		{v8.16b}, [x1], #16
 
-.Lcbc_end:
+	mov		RIV.16b, v0.16b
+
+	cbnz		w4, .Lcbc_dec_loop_1x
+
+.Lcbc_dec_end:
 	/* store new IV */
-	st1		{RIV.16b}, [x3];
+	st1		{RIV.16b}, [x3]
 
-	ret;
+	ret
 SYM_FUNC_END(sm4_ce_cbc_dec)
 
 .align 3
@@ -452,25 +324,57 @@ SYM_FUNC_START(sm4_ce_cfb_enc)
 	 *   x3: iv (big endian, 128 bit)
 	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
+
+	ld1		{RIV.16b}, [x3]
+
+.Lcfb_enc_loop_4x:
+	cmp		w4, #4
+	blt		.Lcfb_enc_loop_1x
+
+	sub		w4, w4, #4
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	rev32		v8.16b, RIV.16b
+	SM4_CRYPT_BLK_BE(v8)
+	eor		v0.16b, v0.16b, v8.16b
+
+	rev32		v8.16b, v0.16b
+	SM4_CRYPT_BLK_BE(v8)
+	eor		v1.16b, v1.16b, v8.16b
+
+	rev32		v8.16b, v1.16b
+	SM4_CRYPT_BLK_BE(v8)
+	eor		v2.16b, v2.16b, v8.16b
+
+	rev32		v8.16b, v2.16b
+	SM4_CRYPT_BLK_BE(v8)
+	eor		v3.16b, v3.16b, v8.16b
 
-	ld1		{RIV.16b}, [x3];
+	st1		{v0.16b-v3.16b}, [x1], #64
+	mov		RIV.16b, v3.16b
 
-.Lcfb_enc_loop:
-	sub		w4, w4, #1;
+	cbz		w4, .Lcfb_enc_end
+	b		.Lcfb_enc_loop_4x
 
-	SM4_CRYPT_BLK(RIV);
+.Lcfb_enc_loop_1x:
+	sub		w4, w4, #1
 
-	ld1		{RTMP0.16b}, [x2], #16;
-	eor		RIV.16b, RIV.16b, RTMP0.16b;
-	st1		{RIV.16b}, [x1], #16;
+	ld1		{v0.16b}, [x2], #16
 
-	cbnz		w4, .Lcfb_enc_loop;
+	SM4_CRYPT_BLK(RIV)
+	eor		RIV.16b, RIV.16b, v0.16b
 
+	st1		{RIV.16b}, [x1], #16
+
+	cbnz		w4, .Lcfb_enc_loop_1x
+
+.Lcfb_enc_end:
 	/* store new IV */
-	st1		{RIV.16b}, [x3];
+	st1		{RIV.16b}, [x3]
 
-	ret;
+	ret
 SYM_FUNC_END(sm4_ce_cfb_enc)
 
 .align 3
@@ -482,79 +386,91 @@ SYM_FUNC_START(sm4_ce_cfb_dec)
 	 *   x3: iv (big endian, 128 bit)
 	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
 
-	ld1		{v0.16b}, [x3];
+	ld1		{RIV.16b}, [x3]
 
-.Lcfb_loop_blk:
-	sub		w4, w4, #8;
-	tbnz		w4, #31, .Lcfb_tail8;
+.Lcfb_dec_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lcfb_dec_4x
 
-	ld1		{v1.16b, v2.16b, v3.16b}, [x2], #48;
-	ld1		{v4.16b-v7.16b}, [x2];
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	ld1		{v4.16b-v7.16b}, [x2], #64
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
+	rev32		v8.16b, RIV.16b
+	rev32		v9.16b, v0.16b
+	rev32		v10.16b, v1.16b
+	rev32		v11.16b, v2.16b
+	rev32		v12.16b, v3.16b
+	rev32		v13.16b, v4.16b
+	rev32		v14.16b, v5.16b
+	rev32		v15.16b, v6.16b
 
-	sub		x2, x2, #48;
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	SM4_CRYPT_BLK8_BE(v8, v9, v10, v11, v12, v13, v14, v15)
 
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v4.16b, v4.16b, RTMP0.16b;
-	eor		v5.16b, v5.16b, RTMP1.16b;
-	eor		v6.16b, v6.16b, RTMP2.16b;
-	eor		v7.16b, v7.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+	mov		RIV.16b, v7.16b
 
-	mov		v0.16b, RTMP3.16b;
+	eor		v0.16b, v0.16b, v8.16b
+	eor		v1.16b, v1.16b, v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
 
-	cbz		w4, .Lcfb_end;
-	b		.Lcfb_loop_blk;
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
 
-.Lcfb_tail8:
-	add		w4, w4, #8;
-	cmp		w4, #4;
-	blt		.Lcfb_tail4;
+	cbz		w4, .Lcfb_dec_end
+	b		.Lcfb_dec_loop_8x
 
-	sub		w4, w4, #4;
+.Lcfb_dec_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lcfb_dec_loop_1x
 
-	ld1		{v1.16b, v2.16b, v3.16b}, [x2];
+	sub		w4, w4, #4
 
-	SM4_CRYPT_BLK4(v0, v1, v2, v3);
+	ld1		{v0.16b-v3.16b}, [x2], #64
 
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	rev32		v8.16b, RIV.16b
+	rev32		v9.16b, v0.16b
+	rev32		v10.16b, v1.16b
+	rev32		v11.16b, v2.16b
 
-	mov		v0.16b, RTMP3.16b;
+	SM4_CRYPT_BLK4_BE(v8, v9, v10, v11)
 
-	cbz		w4, .Lcfb_end;
+	mov		RIV.16b, v3.16b
 
-.Lcfb_tail4:
-	sub		w4, w4, #1;
+	eor		v0.16b, v0.16b, v8.16b
+	eor		v1.16b, v1.16b, v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
 
-	SM4_CRYPT_BLK(v0);
+	st1		{v0.16b-v3.16b}, [x1], #64
 
-	ld1		{RTMP0.16b}, [x2], #16;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	st1		{v0.16b}, [x1], #16;
+	cbz		w4, .Lcfb_dec_end
+
+.Lcfb_dec_loop_1x:
+	sub		w4, w4, #1
+
+	ld1		{v0.16b}, [x2], #16
 
-	mov		v0.16b, RTMP0.16b;
+	SM4_CRYPT_BLK(RIV)
 
-	cbnz		w4, .Lcfb_tail4;
+	eor		RIV.16b, RIV.16b, v0.16b
+	st1		{RIV.16b}, [x1], #16
 
-.Lcfb_end:
+	mov		RIV.16b, v0.16b
+
+	cbnz		w4, .Lcfb_dec_loop_1x
+
+.Lcfb_dec_end:
 	/* store new IV */
-	st1		{v0.16b}, [x3];
+	st1		{RIV.16b}, [x3]
 
-	ret;
+	ret
 SYM_FUNC_END(sm4_ce_cfb_dec)
 
 .align 3
@@ -566,95 +482,99 @@ SYM_FUNC_START(sm4_ce_ctr_enc)
 	 *   x3: ctr (big endian, 128 bit)
 	 *   w4: nblocks
 	 */
-	PREPARE;
+	SM4_PREPARE(x0)
 
-	ldp		x7, x8, [x3];
-	rev		x7, x7;
-	rev		x8, x8;
+	ldp		x7, x8, [x3]
+	rev		x7, x7
+	rev		x8, x8
 
-.Lctr_loop_blk:
-	sub		w4, w4, #8;
-	tbnz		w4, #31, .Lctr_tail8;
+.Lctr_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lctr_4x
 
-#define inc_le128(vctr)                     \
-	mov		vctr.d[1], x8;      \
-	mov		vctr.d[0], x7;      \
-	adds		x8, x8, #1;         \
-	adc		x7, x7, xzr;        \
-	rev64		vctr.16b, vctr.16b;
+#define inc_le128(vctr)					\
+		mov		vctr.d[1], x8;		\
+		mov		vctr.d[0], x7;		\
+		adds		x8, x8, #1;		\
+		rev64		vctr.16b, vctr.16b;	\
+		adc		x7, x7, xzr;
 
 	/* construct CTRs */
-	inc_le128(v0);			/* +0 */
-	inc_le128(v1);			/* +1 */
-	inc_le128(v2);			/* +2 */
-	inc_le128(v3);			/* +3 */
-	inc_le128(v4);			/* +4 */
-	inc_le128(v5);			/* +5 */
-	inc_le128(v6);			/* +6 */
-	inc_le128(v7);			/* +7 */
+	inc_le128(v0)			/* +0 */
+	inc_le128(v1)			/* +1 */
+	inc_le128(v2)			/* +2 */
+	inc_le128(v3)			/* +3 */
+	inc_le128(v4)			/* +4 */
+	inc_le128(v5)			/* +5 */
+	inc_le128(v6)			/* +6 */
+	inc_le128(v7)			/* +7 */
+
+	ld1		{v8.16b-v11.16b}, [x2], #64
+	ld1		{v12.16b-v15.16b}, [x2], #64
+
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	eor		v0.16b, v0.16b, v8.16b
+	eor		v1.16b, v1.16b, v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
+
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	cbz		w4, .Lctr_end
+	b		.Lctr_loop_8x
+
+.Lctr_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lctr_loop_1x
+
+	sub		w4, w4, #4
 
-	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7);
-
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
+	/* construct CTRs */
+	inc_le128(v0)			/* +0 */
+	inc_le128(v1)			/* +1 */
+	inc_le128(v2)			/* +2 */
+	inc_le128(v3)			/* +3 */
 
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v4.16b, v4.16b, RTMP0.16b;
-	eor		v5.16b, v5.16b, RTMP1.16b;
-	eor		v6.16b, v6.16b, RTMP2.16b;
-	eor		v7.16b, v7.16b, RTMP3.16b;
-	st1		{v4.16b-v7.16b}, [x1], #64;
+	ld1		{v8.16b-v11.16b}, [x2], #64
 
-	cbz		w4, .Lctr_end;
-	b		.Lctr_loop_blk;
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
 
-.Lctr_tail8:
-	add		w4, w4, #8;
-	cmp		w4, #4;
-	blt		.Lctr_tail4;
+	eor		v0.16b, v0.16b, v8.16b
+	eor		v1.16b, v1.16b, v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
 
-	sub		w4, w4, #4;
+	st1		{v0.16b-v3.16b}, [x1], #64
 
-	/* construct CTRs */
-	inc_le128(v0);			/* +0 */
-	inc_le128(v1);			/* +1 */
-	inc_le128(v2);			/* +2 */
-	inc_le128(v3);			/* +3 */
+	cbz		w4, .Lctr_end
 
-	SM4_CRYPT_BLK4(v0, v1, v2, v3);
-
-	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	eor		v1.16b, v1.16b, RTMP1.16b;
-	eor		v2.16b, v2.16b, RTMP2.16b;
-	eor		v3.16b, v3.16b, RTMP3.16b;
-	st1		{v0.16b-v3.16b}, [x1], #64;
-
-	cbz		w4, .Lctr_end;
-
-.Lctr_tail4:
-	sub		w4, w4, #1;
+.Lctr_loop_1x:
+	sub		w4, w4, #1
 
 	/* construct CTRs */
-	inc_le128(v0);
+	inc_le128(v0)
 
-	SM4_CRYPT_BLK(v0);
+	ld1		{v8.16b}, [x2], #16
 
-	ld1		{RTMP0.16b}, [x2], #16;
-	eor		v0.16b, v0.16b, RTMP0.16b;
-	st1		{v0.16b}, [x1], #16;
+	SM4_CRYPT_BLK(v0)
+
+	eor		v0.16b, v0.16b, v8.16b
+	st1		{v0.16b}, [x1], #16
 
-	cbnz		w4, .Lctr_tail4;
+	cbnz		w4, .Lctr_loop_1x
 
 .Lctr_end:
 	/* store new CTR */
-	rev		x7, x7;
-	rev		x8, x8;
-	stp		x7, x8, [x3];
+	rev		x7, x7
+	rev		x8, x8
+	stp		x7, x8, [x3]
 
-	ret;
+	ret
 SYM_FUNC_END(sm4_ce_ctr_enc)
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 496d55c0d01a4..e56e81b1f35f7 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -26,9 +26,9 @@ asmlinkage void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
 asmlinkage void sm4_ce_crypt(const u32 *rkey, u8 *dst, const u8 *src,
 			     unsigned int nblks);
 asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
-			       u8 *iv, unsigned int nblks);
+			       u8 *iv, unsigned int nblocks);
 asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
-			       u8 *iv, unsigned int nblks);
+			       u8 *iv, unsigned int nblocks);
 asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
 asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
@@ -94,66 +94,56 @@ static int sm4_ecb_decrypt(struct skcipher_request *req)
 	return sm4_ecb_do_crypt(req, ctx->rkey_dec);
 }
 
-static int sm4_cbc_encrypt(struct skcipher_request *req)
+static int sm4_cbc_crypt(struct skcipher_request *req,
+			 struct sm4_ctx *ctx, bool encrypt)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
 	unsigned int nbytes;
 	int err;
 
 	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
 
 	while ((nbytes = walk.nbytes) > 0) {
 		const u8 *src = walk.src.virt.addr;
 		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
+		unsigned int nblocks;
 
-		kernel_neon_begin();
+		nblocks = nbytes / SM4_BLOCK_SIZE;
+		if (nblocks) {
+			kernel_neon_begin();
 
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			sm4_ce_cbc_enc(ctx->rkey_enc, dst, src, walk.iv, nblks);
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
+			if (encrypt)
+				sm4_ce_cbc_enc(ctx->rkey_enc, dst, src,
+					       walk.iv, nblocks);
+			else
+				sm4_ce_cbc_dec(ctx->rkey_dec, dst, src,
+					       walk.iv, nblocks);
 
-		kernel_neon_end();
+			kernel_neon_end();
+		}
 
-		err = skcipher_walk_done(&walk, nbytes);
+		err = skcipher_walk_done(&walk, nbytes % SM4_BLOCK_SIZE);
 	}
 
 	return err;
 }
 
-static int sm4_cbc_decrypt(struct skcipher_request *req)
+static int sm4_cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_walk walk;
-	unsigned int nbytes;
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, false);
 
-	while ((nbytes = walk.nbytes) > 0) {
-		const u8 *src = walk.src.virt.addr;
-		u8 *dst = walk.dst.virt.addr;
-		unsigned int nblks;
-
-		kernel_neon_begin();
-
-		nblks = BYTES2BLKS(nbytes);
-		if (nblks) {
-			sm4_ce_cbc_dec(ctx->rkey_dec, dst, src, walk.iv, nblks);
-			nbytes -= nblks * SM4_BLOCK_SIZE;
-		}
-
-		kernel_neon_end();
+	return sm4_cbc_crypt(req, ctx, true);
+}
 
-		err = skcipher_walk_done(&walk, nbytes);
-	}
+static int sm4_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return err;
+	return sm4_cbc_crypt(req, ctx, false);
 }
 
 static int sm4_cfb_encrypt(struct skcipher_request *req)
-- 
GitLab


From cb9ba02b07d18172c6a6dcc69410c56482903230 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:54:59 +0800
Subject: [PATCH 0904/4122] crypto: arm64/sm4 - simplify sm4_ce_expand_key() of
 CE implementation

Use a 128-bit swap mask and tbl instruction to simplify the implementation
for generating SM4 rkey_dec.

Also fixed the issue of not being wrapped by kernel_neon_begin/end() when
using the sm4_ce_expand_key() function.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-ce-core.S | 46 ++++++++++++++++-----------------
 arch/arm64/crypto/sm4-ce-glue.c |  2 ++
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 41fc745a85284..9e4b4f01cdf3e 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -65,32 +65,23 @@ SYM_FUNC_START(sm4_ce_expand_key)
 	sm4ekey		v6.4s, v5.4s, v30.4s;
 	sm4ekey		v7.4s, v6.4s, v31.4s;
 
+	adr_l		x5, .Lbswap128_mask
+	ld1		{v24.16b}, [x5]
+
 	st1		{v0.16b-v3.16b}, [x1], #64;
 	st1		{v4.16b-v7.16b}, [x1];
-	rev64		v7.4s, v7.4s;
-	rev64		v6.4s, v6.4s;
-	rev64		v5.4s, v5.4s;
-	rev64		v4.4s, v4.4s;
-	rev64		v3.4s, v3.4s;
-	rev64		v2.4s, v2.4s;
-	rev64		v1.4s, v1.4s;
-	rev64		v0.4s, v0.4s;
-	ext		v7.16b, v7.16b, v7.16b, #8;
-	ext		v6.16b, v6.16b, v6.16b, #8;
-	ext		v5.16b, v5.16b, v5.16b, #8;
-	ext		v4.16b, v4.16b, v4.16b, #8;
-	ext		v3.16b, v3.16b, v3.16b, #8;
-	ext		v2.16b, v2.16b, v2.16b, #8;
-	ext		v1.16b, v1.16b, v1.16b, #8;
-	ext		v0.16b, v0.16b, v0.16b, #8;
-	st1		{v7.16b}, [x2], #16;
-	st1		{v6.16b}, [x2], #16;
-	st1		{v5.16b}, [x2], #16;
-	st1		{v4.16b}, [x2], #16;
-	st1		{v3.16b}, [x2], #16;
-	st1		{v2.16b}, [x2], #16;
-	st1		{v1.16b}, [x2], #16;
-	st1		{v0.16b}, [x2];
+
+	tbl		v16.16b, {v7.16b}, v24.16b
+	tbl		v17.16b, {v6.16b}, v24.16b
+	tbl		v18.16b, {v5.16b}, v24.16b
+	tbl		v19.16b, {v4.16b}, v24.16b
+	tbl		v20.16b, {v3.16b}, v24.16b
+	tbl		v21.16b, {v2.16b}, v24.16b
+	tbl		v22.16b, {v1.16b}, v24.16b
+	tbl		v23.16b, {v0.16b}, v24.16b
+
+	st1		{v16.16b-v19.16b}, [x2], #64
+	st1		{v20.16b-v23.16b}, [x2]
 
 	ret;
 SYM_FUNC_END(sm4_ce_expand_key)
@@ -578,3 +569,10 @@ SYM_FUNC_START(sm4_ce_ctr_enc)
 
 	ret
 SYM_FUNC_END(sm4_ce_ctr_enc)
+
+
+	.section	".rodata", "a"
+	.align 4
+.Lbswap128_mask:
+	.byte		0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
+	.byte		0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index e56e81b1f35f7..ff2d8442d4730 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -44,8 +44,10 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	if (key_len != SM4_KEY_SIZE)
 		return -EINVAL;
 
+	kernel_neon_begin();
 	sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
 			  crypto_sm4_fk, crypto_sm4_ck);
+	kernel_neon_end();
 	return 0;
 }
 
-- 
GitLab


From 45089dbe5952e9afbe2a3b3054105f2a694930f1 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:00 +0800
Subject: [PATCH 0905/4122] crypto: arm64/sm4 - export reusable CE acceleration
 functions

In the accelerated implementation of the SM4 algorithm using the Crypto
Extension instructions, there are some functions that can be reused in
the upcoming accelerated implementation of the GCM/CCM mode, and the
CBC/CFB encryption is reused in the optimized implementation of SVESM4.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-ce-glue.c |  5 +++++
 arch/arm64/crypto/sm4-ce.h      | 16 ++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 arch/arm64/crypto/sm4-ce.h

diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index ff2d8442d4730..63abcadc684ba 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -36,6 +36,11 @@ asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
 asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
 
+EXPORT_SYMBOL(sm4_ce_expand_key);
+EXPORT_SYMBOL(sm4_ce_crypt_block);
+EXPORT_SYMBOL(sm4_ce_cbc_enc);
+EXPORT_SYMBOL(sm4_ce_cfb_enc);
+
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		      unsigned int key_len)
 {
diff --git a/arch/arm64/crypto/sm4-ce.h b/arch/arm64/crypto/sm4-ce.h
new file mode 100644
index 0000000000000..109c21b37590d
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4 common functions for Crypto Extensions
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+void sm4_ce_expand_key(const u8 *key, u32 *rkey_enc, u32 *rkey_dec,
+		       const u32 *fk, const u32 *ck);
+
+void sm4_ce_crypt_block(const u32 *rkey, u8 *dst, const u8 *src);
+
+void sm4_ce_cbc_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+		    u8 *iv, unsigned int nblocks);
+
+void sm4_ce_cfb_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+		    u8 *iv, unsigned int nblocks);
-- 
GitLab


From b1863fd0742f8da21f6f994e14e820db5831bd74 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:01 +0800
Subject: [PATCH 0906/4122] crypto: arm64/sm4 - add CE implementation for
 CTS-CBC mode

This patch is a CE-optimized assembly implementation for CTS-CBC mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of
tcrypt, and compared the performance before and after this patch (the driver
used before this patch is cts(cbc-sm4-ce)). The abscissas are blocks of
different lengths. The data is tabulated and the unit is Mb/s:

Before:

cts(cbc-sm4-ce) |      16       64      128      256     1024     1420     4096
----------------+--------------------------------------------------------------
    CTS-CBC enc |  286.09   297.17   457.97   627.75   868.58   900.80   957.69
    CTS-CBC dec |  286.67   285.63   538.35   947.08  2241.03  2577.32  3391.14

After:

cts-cbc-sm4-ce  |      16       64      128      256     1024     1420     4096
----------------+--------------------------------------------------------------
    CTS-CBC enc |  288.19   428.80   593.57   741.04   911.73   931.80   950.00
    CTS-CBC dec |  292.22   468.99   838.23  1380.76  2741.17  3036.42  3409.62

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-ce-core.S | 102 ++++++++++++++++++++++++++++++++
 arch/arm64/crypto/sm4-ce-glue.c |  94 +++++++++++++++++++++++++++++
 2 files changed, 196 insertions(+)

diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 9e4b4f01cdf3e..414d29f8110b3 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -306,6 +306,100 @@ SYM_FUNC_START(sm4_ce_cbc_dec)
 	ret
 SYM_FUNC_END(sm4_ce_cbc_dec)
 
+.align 3
+SYM_FUNC_START(sm4_ce_cbc_cts_enc)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: iv (big endian, 128 bit)
+	 *   w4: nbytes
+	 */
+	SM4_PREPARE(x0)
+
+	sub		w5, w4, #16
+	uxtw		x5, w5
+
+	ld1		{RIV.16b}, [x3]
+
+	ld1		{v0.16b}, [x2]
+	eor		RIV.16b, RIV.16b, v0.16b
+	SM4_CRYPT_BLK(RIV)
+
+	/* load permute table */
+	adr_l		x6, .Lcts_permute_table
+	add		x7, x6, #32
+	add		x6, x6, x5
+	sub		x7, x7, x5
+	ld1		{v3.16b}, [x6]
+	ld1		{v4.16b}, [x7]
+
+	/* overlapping loads */
+	add		x2, x2, x5
+	ld1		{v1.16b}, [x2]
+
+	/* create Cn from En-1 */
+	tbl		v0.16b, {RIV.16b}, v3.16b
+	/* padding Pn with zeros */
+	tbl		v1.16b, {v1.16b}, v4.16b
+
+	eor		v1.16b, v1.16b, RIV.16b
+	SM4_CRYPT_BLK(v1)
+
+	/* overlapping stores */
+	add		x5, x1, x5
+	st1		{v0.16b}, [x5]
+	st1		{v1.16b}, [x1]
+
+	ret
+SYM_FUNC_END(sm4_ce_cbc_cts_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbc_cts_dec)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: iv (big endian, 128 bit)
+	 *   w4: nbytes
+	 */
+	SM4_PREPARE(x0)
+
+	sub		w5, w4, #16
+	uxtw		x5, w5
+
+	ld1		{RIV.16b}, [x3]
+
+	/* load permute table */
+	adr_l		x6, .Lcts_permute_table
+	add		x7, x6, #32
+	add		x6, x6, x5
+	sub		x7, x7, x5
+	ld1		{v3.16b}, [x6]
+	ld1		{v4.16b}, [x7]
+
+	/* overlapping loads */
+	ld1		{v0.16b}, [x2], x5
+	ld1		{v1.16b}, [x2]
+
+	SM4_CRYPT_BLK(v0)
+	/* select the first Ln bytes of Xn to create Pn */
+	tbl		v2.16b, {v0.16b}, v3.16b
+	eor		v2.16b, v2.16b, v1.16b
+
+	/* overwrite the first Ln bytes with Cn to create En-1 */
+	tbx		v0.16b, {v1.16b}, v4.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v0.16b, v0.16b, RIV.16b
+
+	/* overlapping stores */
+	add		x5, x1, x5
+	st1		{v2.16b}, [x5]
+	st1		{v0.16b}, [x1]
+
+	ret
+SYM_FUNC_END(sm4_ce_cbc_cts_dec)
+
 .align 3
 SYM_FUNC_START(sm4_ce_cfb_enc)
 	/* input:
@@ -576,3 +670,11 @@ SYM_FUNC_END(sm4_ce_ctr_enc)
 .Lbswap128_mask:
 	.byte		0x0c, 0x0d, 0x0e, 0x0f, 0x08, 0x09, 0x0a, 0x0b
 	.byte		0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03
+
+.Lcts_permute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 63abcadc684ba..4d4072c7bfa2e 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -16,6 +16,7 @@
 #include <asm/simd.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/sm4.h>
 
 #define BYTES2BLKS(nbytes)	((nbytes) >> 4)
@@ -29,6 +30,10 @@ asmlinkage void sm4_ce_cbc_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblocks);
 asmlinkage void sm4_ce_cbc_dec(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblocks);
+asmlinkage void sm4_ce_cbc_cts_enc(const u32 *rkey, u8 *dst, const u8 *src,
+				   u8 *iv, unsigned int nbytes);
+asmlinkage void sm4_ce_cbc_cts_dec(const u32 *rkey, u8 *dst, const u8 *src,
+				   u8 *iv, unsigned int nbytes);
 asmlinkage void sm4_ce_cfb_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
 asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
@@ -153,6 +158,78 @@ static int sm4_cbc_decrypt(struct skcipher_request *req)
 	return sm4_cbc_crypt(req, ctx, false);
 }
 
+static int sm4_cbc_cts_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct sm4_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct scatterlist *src = req->src;
+	struct scatterlist *dst = req->dst;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct skcipher_walk walk;
+	int cbc_blocks;
+	int err;
+
+	if (req->cryptlen < SM4_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (req->cryptlen == SM4_BLOCK_SIZE)
+		return sm4_cbc_crypt(req, ctx, encrypt);
+
+	skcipher_request_set_tfm(&subreq, tfm);
+	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+				      NULL, NULL);
+
+	/* handle the CBC cryption part */
+	cbc_blocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
+	if (cbc_blocks) {
+		skcipher_request_set_crypt(&subreq, src, dst,
+					   cbc_blocks * SM4_BLOCK_SIZE,
+					   req->iv);
+
+		err = sm4_cbc_crypt(&subreq, ctx, encrypt);
+		if (err)
+			return err;
+
+		dst = src = scatterwalk_ffwd(sg_src, src, subreq.cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(sg_dst, req->dst,
+					       subreq.cryptlen);
+	}
+
+	/* handle ciphertext stealing */
+	skcipher_request_set_crypt(&subreq, src, dst,
+				   req->cryptlen - cbc_blocks * SM4_BLOCK_SIZE,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+
+	if (encrypt)
+		sm4_ce_cbc_cts_enc(ctx->rkey_enc, walk.dst.virt.addr,
+				   walk.src.virt.addr, walk.iv, walk.nbytes);
+	else
+		sm4_ce_cbc_cts_dec(ctx->rkey_dec, walk.dst.virt.addr,
+				   walk.src.virt.addr, walk.iv, walk.nbytes);
+
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int sm4_cbc_cts_encrypt(struct skcipher_request *req)
+{
+	return sm4_cbc_cts_crypt(req, true);
+}
+
+static int sm4_cbc_cts_decrypt(struct skcipher_request *req)
+{
+	return sm4_cbc_cts_crypt(req, false);
+}
+
 static int sm4_cfb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -342,6 +419,22 @@ static struct skcipher_alg sm4_algs[] = {
 		.setkey		= sm4_setkey,
 		.encrypt	= sm4_ctr_crypt,
 		.decrypt	= sm4_ctr_crypt,
+	}, {
+		.base = {
+			.cra_name		= "cts(cbc(sm4))",
+			.cra_driver_name	= "cts-cbc-sm4-ce",
+			.cra_priority		= 400,
+			.cra_blocksize		= SM4_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct sm4_ctx),
+			.cra_module		= THIS_MODULE,
+		},
+		.min_keysize	= SM4_KEY_SIZE,
+		.max_keysize	= SM4_KEY_SIZE,
+		.ivsize		= SM4_BLOCK_SIZE,
+		.walksize	= SM4_BLOCK_SIZE * 2,
+		.setkey		= sm4_setkey,
+		.encrypt	= sm4_cbc_cts_encrypt,
+		.decrypt	= sm4_cbc_cts_decrypt,
 	}
 };
 
@@ -365,5 +458,6 @@ MODULE_ALIAS_CRYPTO("ecb(sm4)");
 MODULE_ALIAS_CRYPTO("cbc(sm4)");
 MODULE_ALIAS_CRYPTO("cfb(sm4)");
 MODULE_ALIAS_CRYPTO("ctr(sm4)");
+MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 01f633113b19534ab4f4e9cf72d8e72fb3568901 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:02 +0800
Subject: [PATCH 0907/4122] crypto: arm64/sm4 - add CE implementation for XTS
 mode

This patch is a CE-optimized assembly implementation for XTS mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 218 mode of
tcrypt, and compared the performance before and after this patch (the driver
used before this patch is xts(ecb-sm4-ce)). The abscissas are blocks of
different lengths. The data is tabulated and the unit is Mb/s:

Before:

xts(ecb-sm4-ce) |      16       64      128      256     1024     1420     4096
----------------+--------------------------------------------------------------
        XTS enc |  117.17   430.56   732.92  1134.98  2007.03  2136.23  2347.20
        XTS dec |  116.89   429.02   733.40  1132.96  2006.13  2130.50  2347.92

After:

xts-sm4-ce      |      16       64      128      256     1024     1420     4096
----------------+--------------------------------------------------------------
        XTS enc |  224.68   798.91  1248.08  1714.60  2413.73  2467.84  2612.62
        XTS dec |  229.85   791.34  1237.79  1720.00  2413.30  2473.84  2611.95

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig       |   4 +-
 arch/arm64/crypto/sm4-ce-core.S | 343 ++++++++++++++++++++++++++++++++
 arch/arm64/crypto/sm4-ce-glue.c | 159 ++++++++++++++-
 3 files changed, 504 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 4b121dc0cfba2..8939f5ae9214d 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -231,7 +231,7 @@ config CRYPTO_SM4_ARM64_CE
 	  - NEON (Advanced SIMD) extensions
 
 config CRYPTO_SM4_ARM64_CE_BLK
-	tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR (ARMv8 Crypto Extensions)"
+	tristate "Ciphers: SM4, modes: ECB/CBC/CFB/CTR/XTS (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_SKCIPHER
 	select CRYPTO_SM4
@@ -242,6 +242,8 @@ config CRYPTO_SM4_ARM64_CE_BLK
 	  - CBC (Cipher Block Chaining) mode (NIST SP800-38A)
 	  - CFB (Cipher Feedback) mode (NIST SP800-38A)
 	  - CTR (Counter) mode (NIST SP800-38A)
+	  - XTS (XOR Encrypt XOR with ciphertext stealing) mode (NIST SP800-38E
+	    and IEEE 1619)
 
 	  Architecture: arm64 using:
 	  - ARMv8 Crypto Extensions
diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index 414d29f8110b3..ddd15ec09d38c 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -35,6 +35,7 @@
 #define RTMP3	v19
 
 #define RIV	v20
+#define RMASK	v21
 
 
 .align 3
@@ -665,6 +666,348 @@ SYM_FUNC_START(sm4_ce_ctr_enc)
 SYM_FUNC_END(sm4_ce_ctr_enc)
 
 
+#define tweak_next(vt, vin, RTMP)					\
+		sshr		RTMP.2d, vin.2d, #63;			\
+		and		RTMP.16b, RTMP.16b, RMASK.16b;		\
+		add		vt.2d, vin.2d, vin.2d;			\
+		ext		RTMP.16b, RTMP.16b, RTMP.16b, #8;	\
+		eor		vt.16b, vt.16b, RTMP.16b;
+
+.align 3
+SYM_FUNC_START(sm4_ce_xts_enc)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: tweak (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: round key array for IV
+	 */
+	ld1		{v8.16b}, [x3]
+
+	cbz		x5, .Lxts_enc_nofirst
+
+	SM4_PREPARE(x5)
+
+	/* Generate first tweak */
+	SM4_CRYPT_BLK(v8)
+
+.Lxts_enc_nofirst:
+	SM4_PREPARE(x0)
+
+	ands		w5, w4, #15
+	lsr		w4, w4, #4
+	sub		w6, w4, #1
+	csel		w4, w4, w6, eq
+	uxtw		x5, w5
+
+	movi		RMASK.2s, #0x1
+	movi		RTMP0.2s, #0x87
+	uzp1		RMASK.4s, RMASK.4s, RTMP0.4s
+
+	cbz		w4, .Lxts_enc_cts
+
+.Lxts_enc_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lxts_enc_4x
+
+	tweak_next( v9,  v8, RTMP0)
+	tweak_next(v10,  v9, RTMP1)
+	tweak_next(v11, v10, RTMP2)
+	tweak_next(v12, v11, RTMP3)
+	tweak_next(v13, v12, RTMP0)
+	tweak_next(v14, v13, RTMP1)
+	tweak_next(v15, v14, RTMP2)
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	ld1		{v4.16b-v7.16b}, [x2], #64
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
+
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	tweak_next(v8, v15, RTMP3)
+
+	cbz		w4, .Lxts_enc_cts
+	b		.Lxts_enc_loop_8x
+
+.Lxts_enc_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lxts_enc_loop_1x
+
+	sub		w4, w4, #4
+
+	tweak_next( v9,  v8, RTMP0)
+	tweak_next(v10,  v9, RTMP1)
+	tweak_next(v11, v10, RTMP2)
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	st1		{v0.16b-v3.16b}, [x1], #64
+
+	tweak_next(v8, v11, RTMP3)
+
+	cbz		w4, .Lxts_enc_cts
+
+.Lxts_enc_loop_1x:
+	sub		w4, w4, #1
+
+	ld1		{v0.16b}, [x2], #16
+	eor		v0.16b, v0.16b, v8.16b
+
+	SM4_CRYPT_BLK(v0)
+
+	eor		v0.16b, v0.16b, v8.16b
+	st1		{v0.16b}, [x1], #16
+
+	tweak_next(v8, v8, RTMP0)
+
+	cbnz		w4, .Lxts_enc_loop_1x
+
+.Lxts_enc_cts:
+	cbz		x5, .Lxts_enc_end
+
+	/* cipher text stealing */
+
+	tweak_next(v9, v8, RTMP0)
+	ld1		{v0.16b}, [x2]
+	eor		v0.16b, v0.16b, v8.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v0.16b, v0.16b, v8.16b
+
+	/* load permute table */
+	adr_l		x6, .Lcts_permute_table
+	add		x7, x6, #32
+	add		x6, x6, x5
+	sub		x7, x7, x5
+	ld1		{v3.16b}, [x6]
+	ld1		{v4.16b}, [x7]
+
+	/* overlapping loads */
+	add		x2, x2, x5
+	ld1		{v1.16b}, [x2]
+
+	/* create Cn from En-1 */
+	tbl		v2.16b, {v0.16b}, v3.16b
+	/* padding Pn with En-1 at the end */
+	tbx		v0.16b, {v1.16b}, v4.16b
+
+	eor		v0.16b, v0.16b, v9.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v0.16b, v0.16b, v9.16b
+
+
+	/* overlapping stores */
+	add		x5, x1, x5
+	st1		{v2.16b}, [x5]
+	st1		{v0.16b}, [x1]
+
+	b		.Lxts_enc_ret
+
+.Lxts_enc_end:
+	/* store new tweak */
+	st1		{v8.16b}, [x3]
+
+.Lxts_enc_ret:
+	ret
+SYM_FUNC_END(sm4_ce_xts_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_xts_dec)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: tweak (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: round key array for IV
+	 */
+	ld1		{v8.16b}, [x3]
+
+	cbz		x5, .Lxts_dec_nofirst
+
+	SM4_PREPARE(x5)
+
+	/* Generate first tweak */
+	SM4_CRYPT_BLK(v8)
+
+.Lxts_dec_nofirst:
+	SM4_PREPARE(x0)
+
+	ands		w5, w4, #15
+	lsr		w4, w4, #4
+	sub		w6, w4, #1
+	csel		w4, w4, w6, eq
+	uxtw		x5, w5
+
+	movi		RMASK.2s, #0x1
+	movi		RTMP0.2s, #0x87
+	uzp1		RMASK.4s, RMASK.4s, RTMP0.4s
+
+	cbz		w4, .Lxts_dec_cts
+
+.Lxts_dec_loop_8x:
+	sub		w4, w4, #8
+	tbnz		w4, #31, .Lxts_dec_4x
+
+	tweak_next( v9,  v8, RTMP0)
+	tweak_next(v10,  v9, RTMP1)
+	tweak_next(v11, v10, RTMP2)
+	tweak_next(v12, v11, RTMP3)
+	tweak_next(v13, v12, RTMP0)
+	tweak_next(v14, v13, RTMP1)
+	tweak_next(v15, v14, RTMP2)
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	ld1		{v4.16b-v7.16b}, [x2], #64
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
+
+	SM4_CRYPT_BLK8(v0, v1, v2, v3, v4, v5, v6, v7)
+
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	eor		v4.16b, v4.16b, v12.16b
+	eor		v5.16b, v5.16b, v13.16b
+	eor		v6.16b, v6.16b, v14.16b
+	eor		v7.16b, v7.16b, v15.16b
+	st1		{v0.16b-v3.16b}, [x1], #64
+	st1		{v4.16b-v7.16b}, [x1], #64
+
+	tweak_next(v8, v15, RTMP3)
+
+	cbz		w4, .Lxts_dec_cts
+	b		.Lxts_dec_loop_8x
+
+.Lxts_dec_4x:
+	add		w4, w4, #8
+	cmp		w4, #4
+	blt		.Lxts_dec_loop_1x
+
+	sub		w4, w4, #4
+
+	tweak_next( v9,  v8, RTMP0)
+	tweak_next(v10,  v9, RTMP1)
+	tweak_next(v11, v10, RTMP2)
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	eor		v0.16b, v0.16b,  v8.16b
+	eor		v1.16b, v1.16b,  v9.16b
+	eor		v2.16b, v2.16b, v10.16b
+	eor		v3.16b, v3.16b, v11.16b
+	st1		{v0.16b-v3.16b}, [x1], #64
+
+	tweak_next(v8, v11, RTMP3)
+
+	cbz		w4, .Lxts_dec_cts
+
+.Lxts_dec_loop_1x:
+	sub		w4, w4, #1
+
+	ld1		{v0.16b}, [x2], #16
+	eor		v0.16b, v0.16b, v8.16b
+
+	SM4_CRYPT_BLK(v0)
+
+	eor		v0.16b, v0.16b, v8.16b
+	st1		{v0.16b}, [x1], #16
+
+	tweak_next(v8, v8, RTMP0)
+
+	cbnz		w4, .Lxts_dec_loop_1x
+
+.Lxts_dec_cts:
+	cbz		x5, .Lxts_dec_end
+
+	/* cipher text stealing */
+
+	tweak_next(v9, v8, RTMP0)
+	ld1		{v0.16b}, [x2]
+	eor		v0.16b, v0.16b, v9.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v0.16b, v0.16b, v9.16b
+
+	/* load permute table */
+	adr_l		x6, .Lcts_permute_table
+	add		x7, x6, #32
+	add		x6, x6, x5
+	sub		x7, x7, x5
+	ld1		{v3.16b}, [x6]
+	ld1		{v4.16b}, [x7]
+
+	/* overlapping loads */
+	add		x2, x2, x5
+	ld1		{v1.16b}, [x2]
+
+	/* create Cn from En-1 */
+	tbl		v2.16b, {v0.16b}, v3.16b
+	/* padding Pn with En-1 at the end */
+	tbx		v0.16b, {v1.16b}, v4.16b
+
+	eor		v0.16b, v0.16b, v8.16b
+	SM4_CRYPT_BLK(v0)
+	eor		v0.16b, v0.16b, v8.16b
+
+
+	/* overlapping stores */
+	add		x5, x1, x5
+	st1		{v2.16b}, [x5]
+	st1		{v0.16b}, [x1]
+
+	b		.Lxts_dec_ret
+
+.Lxts_dec_end:
+	/* store new tweak */
+	st1		{v8.16b}, [x3]
+
+.Lxts_dec_ret:
+	ret
+SYM_FUNC_END(sm4_ce_xts_dec)
+
+
 	.section	".rodata", "a"
 	.align 4
 .Lbswap128_mask:
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 4d4072c7bfa2e..8222766f712a4 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -17,6 +17,7 @@
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
+#include <crypto/xts.h>
 #include <crypto/sm4.h>
 
 #define BYTES2BLKS(nbytes)	((nbytes) >> 4)
@@ -40,12 +41,23 @@ asmlinkage void sm4_ce_cfb_dec(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
 asmlinkage void sm4_ce_ctr_enc(const u32 *rkey, u8 *dst, const u8 *src,
 			       u8 *iv, unsigned int nblks);
+asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src,
+			       u8 *tweak, unsigned int nbytes,
+			       const u32 *rkey2_enc);
+asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
+			       u8 *tweak, unsigned int nbytes,
+			       const u32 *rkey2_enc);
 
 EXPORT_SYMBOL(sm4_ce_expand_key);
 EXPORT_SYMBOL(sm4_ce_crypt_block);
 EXPORT_SYMBOL(sm4_ce_cbc_enc);
 EXPORT_SYMBOL(sm4_ce_cfb_enc);
 
+struct sm4_xts_ctx {
+	struct sm4_ctx key1;
+	struct sm4_ctx key2;
+};
+
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		      unsigned int key_len)
 {
@@ -61,6 +73,29 @@ static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 	return 0;
 }
 
+static int sm4_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
+			  unsigned int key_len)
+{
+	struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int ret;
+
+	if (key_len != SM4_KEY_SIZE * 2)
+		return -EINVAL;
+
+	ret = xts_verify_key(tfm, key, key_len);
+	if (ret)
+		return ret;
+
+	kernel_neon_begin();
+	sm4_ce_expand_key(key, ctx->key1.rkey_enc,
+			  ctx->key1.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+	sm4_ce_expand_key(&key[SM4_KEY_SIZE], ctx->key2.rkey_enc,
+			  ctx->key2.rkey_dec, crypto_sm4_fk, crypto_sm4_ck);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int sm4_ecb_do_crypt(struct skcipher_request *req, const u32 *rkey)
 {
 	struct skcipher_walk walk;
@@ -357,6 +392,111 @@ static int sm4_ctr_crypt(struct skcipher_request *req)
 	return err;
 }
 
+static int sm4_xts_crypt(struct skcipher_request *req, bool encrypt)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct sm4_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % SM4_BLOCK_SIZE;
+	const u32 *rkey2_enc = ctx->key2.rkey_enc;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
+	struct skcipher_walk walk;
+	unsigned int nbytes;
+	int err;
+
+	if (req->cryptlen < SM4_BLOCK_SIZE)
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		int nblocks = DIV_ROUND_UP(req->cryptlen, SM4_BLOCK_SIZE) - 2;
+
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   nblocks * SM4_BLOCK_SIZE, req->iv);
+
+		err = skcipher_walk_virt(&walk, &subreq, false);
+		if (err)
+			return err;
+	} else {
+		tail = 0;
+	}
+
+	while ((nbytes = walk.nbytes) >= SM4_BLOCK_SIZE) {
+		if (nbytes < walk.total)
+			nbytes &= ~(SM4_BLOCK_SIZE - 1);
+
+		kernel_neon_begin();
+
+		if (encrypt)
+			sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+				       walk.src.virt.addr, walk.iv, nbytes,
+				       rkey2_enc);
+		else
+			sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+				       walk.src.virt.addr, walk.iv, nbytes,
+				       rkey2_enc);
+
+		kernel_neon_end();
+
+		rkey2_enc = NULL;
+
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+		if (err)
+			return err;
+	}
+
+	if (likely(tail == 0))
+		return 0;
+
+	/* handle ciphertext stealing */
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, subreq.cryptlen);
+
+	skcipher_request_set_crypt(&subreq, src, dst, SM4_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+
+	if (encrypt)
+		sm4_ce_xts_enc(ctx->key1.rkey_enc, walk.dst.virt.addr,
+			       walk.src.virt.addr, walk.iv, walk.nbytes,
+			       rkey2_enc);
+	else
+		sm4_ce_xts_dec(ctx->key1.rkey_dec, walk.dst.virt.addr,
+			       walk.src.virt.addr, walk.iv, walk.nbytes,
+			       rkey2_enc);
+
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int sm4_xts_encrypt(struct skcipher_request *req)
+{
+	return sm4_xts_crypt(req, true);
+}
+
+static int sm4_xts_decrypt(struct skcipher_request *req)
+{
+	return sm4_xts_crypt(req, false);
+}
+
 static struct skcipher_alg sm4_algs[] = {
 	{
 		.base = {
@@ -435,6 +575,22 @@ static struct skcipher_alg sm4_algs[] = {
 		.setkey		= sm4_setkey,
 		.encrypt	= sm4_cbc_cts_encrypt,
 		.decrypt	= sm4_cbc_cts_decrypt,
+	}, {
+		.base = {
+			.cra_name		= "xts(sm4)",
+			.cra_driver_name	= "xts-sm4-ce",
+			.cra_priority		= 400,
+			.cra_blocksize		= SM4_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct sm4_xts_ctx),
+			.cra_module		= THIS_MODULE,
+		},
+		.min_keysize	= SM4_KEY_SIZE * 2,
+		.max_keysize	= SM4_KEY_SIZE * 2,
+		.ivsize		= SM4_BLOCK_SIZE,
+		.walksize	= SM4_BLOCK_SIZE * 2,
+		.setkey		= sm4_xts_setkey,
+		.encrypt	= sm4_xts_encrypt,
+		.decrypt	= sm4_xts_decrypt,
 	}
 };
 
@@ -451,7 +607,7 @@ static void __exit sm4_exit(void)
 module_cpu_feature_match(SM4, sm4_init);
 module_exit(sm4_exit);
 
-MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("SM4 ECB/CBC/CFB/CTR/XTS using ARMv8 Crypto Extensions");
 MODULE_ALIAS_CRYPTO("sm4-ce");
 MODULE_ALIAS_CRYPTO("sm4");
 MODULE_ALIAS_CRYPTO("ecb(sm4)");
@@ -459,5 +615,6 @@ MODULE_ALIAS_CRYPTO("cbc(sm4)");
 MODULE_ALIAS_CRYPTO("cfb(sm4)");
 MODULE_ALIAS_CRYPTO("ctr(sm4)");
 MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
+MODULE_ALIAS_CRYPTO("xts(sm4)");
 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 6b5360a5e0ad357b73776d092437715ba4a77865 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:03 +0800
Subject: [PATCH 0908/4122] crypto: arm64/sm4 - add CE implementation for
 cmac/xcbc/cbcmac

This patch is a CE-optimized assembly implementation for cmac/xcbc/cbcmac.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 300 mode of
tcrypt, and compared the performance before and after this patch (the driver
used before this patch is XXXmac(sm4-ce)). The abscissas are blocks of
different lengths. The data is tabulated and the unit is Mb/s:

Before:

update-size    |      16      64     256    1024    2048    4096    8192
---------------+--------------------------------------------------------
cmac(sm4-ce)   |  293.33  403.69  503.76  527.78  531.10  535.46  535.81
xcbc(sm4-ce)   |  292.83  402.50  504.02  529.08  529.87  536.55  538.24
cbcmac(sm4-ce) |  318.42  415.79  497.12  515.05  523.15  521.19  523.01

After:

update-size    |      16      64     256    1024    2048    4096    8192
---------------+--------------------------------------------------------
cmac-sm4-ce    |  371.99  675.28  903.56  971.65  980.57  990.40  991.04
xcbc-sm4-ce    |  372.11  674.55  903.47  971.61  980.96  990.42  991.10
cbcmac-sm4-ce  |  371.63  675.33  903.23  972.07  981.42  990.93  991.45

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm4-ce-core.S |  70 +++++++++
 arch/arm64/crypto/sm4-ce-glue.c | 267 +++++++++++++++++++++++++++++++-
 2 files changed, 336 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S
index ddd15ec09d38c..877b80c54a0d3 100644
--- a/arch/arm64/crypto/sm4-ce-core.S
+++ b/arch/arm64/crypto/sm4-ce-core.S
@@ -35,6 +35,7 @@
 #define RTMP3	v19
 
 #define RIV	v20
+#define RMAC	v20
 #define RMASK	v21
 
 
@@ -1007,6 +1008,75 @@ SYM_FUNC_START(sm4_ce_xts_dec)
 	ret
 SYM_FUNC_END(sm4_ce_xts_dec)
 
+.align 3
+SYM_FUNC_START(sm4_ce_mac_update)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: digest
+	 *   x2: src
+	 *   w3: nblocks
+	 *   w4: enc_before
+	 *   w5: enc_after
+	 */
+	SM4_PREPARE(x0)
+
+	ld1		{RMAC.16b}, [x1]
+
+	cbz		w4, .Lmac_update
+
+	SM4_CRYPT_BLK(RMAC)
+
+.Lmac_update:
+	cbz		w3, .Lmac_ret
+
+	sub		w6, w3, #1
+	cmp		w5, wzr
+	csel		w3, w3, w6, ne
+
+	cbz		w3, .Lmac_end
+
+.Lmac_loop_4x:
+	cmp		w3, #4
+	blt		.Lmac_loop_1x
+
+	sub		w3, w3, #4
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	eor		RMAC.16b, RMAC.16b, v0.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v1.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v2.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v3.16b
+	SM4_CRYPT_BLK(RMAC)
+
+	cbz		w3, .Lmac_end
+	b		.Lmac_loop_4x
+
+.Lmac_loop_1x:
+	sub		w3, w3, #1
+
+	ld1		{v0.16b}, [x2], #16
+
+	eor		RMAC.16b, RMAC.16b, v0.16b
+	SM4_CRYPT_BLK(RMAC)
+
+	cbnz		w3, .Lmac_loop_1x
+
+
+.Lmac_end:
+	cbnz		w5, .Lmac_ret
+
+	ld1		{v0.16b}, [x2], #16
+	eor		RMAC.16b, RMAC.16b, v0.16b
+
+.Lmac_ret:
+	st1		{RMAC.16b}, [x1]
+	ret
+SYM_FUNC_END(sm4_ce_mac_update)
+
 
 	.section	".rodata", "a"
 	.align 4
diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c
index 8222766f712a4..0a2d32ed3bdec 100644
--- a/arch/arm64/crypto/sm4-ce-glue.c
+++ b/arch/arm64/crypto/sm4-ce-glue.c
@@ -14,8 +14,10 @@
 #include <linux/cpufeature.h>
 #include <asm/neon.h>
 #include <asm/simd.h>
+#include <crypto/b128ops.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/hash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <crypto/sm4.h>
@@ -47,6 +49,9 @@ asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src,
 asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
 			       u8 *tweak, unsigned int nbytes,
 			       const u32 *rkey2_enc);
+asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest,
+				  const u8 *src, unsigned int nblocks,
+				  bool enc_before, bool enc_after);
 
 EXPORT_SYMBOL(sm4_ce_expand_key);
 EXPORT_SYMBOL(sm4_ce_crypt_block);
@@ -58,6 +63,16 @@ struct sm4_xts_ctx {
 	struct sm4_ctx key2;
 };
 
+struct sm4_mac_tfm_ctx {
+	struct sm4_ctx key;
+	u8 __aligned(8) consts[];
+};
+
+struct sm4_mac_desc_ctx {
+	unsigned int len;
+	u8 digest[SM4_BLOCK_SIZE];
+};
+
 static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		      unsigned int key_len)
 {
@@ -594,13 +609,260 @@ static struct skcipher_alg sm4_algs[] = {
 	}
 };
 
+static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
+			     unsigned int key_len)
+{
+	struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	kernel_neon_begin();
+	sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
+			   unsigned int key_len)
+{
+	struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+	be128 *consts = (be128 *)ctx->consts;
+	u64 a, b;
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	memset(consts, 0, SM4_BLOCK_SIZE);
+
+	kernel_neon_begin();
+
+	sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+
+	/* encrypt the zero block */
+	sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
+
+	kernel_neon_end();
+
+	/* gf(2^128) multiply zero-ciphertext with u and u^2 */
+	a = be64_to_cpu(consts[0].a);
+	b = be64_to_cpu(consts[0].b);
+	consts[0].a = cpu_to_be64((a << 1) | (b >> 63));
+	consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+	a = be64_to_cpu(consts[0].a);
+	b = be64_to_cpu(consts[0].b);
+	consts[1].a = cpu_to_be64((a << 1) | (b >> 63));
+	consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
+
+	return 0;
+}
+
+static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
+			   unsigned int key_len)
+{
+	struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
+	u8 __aligned(8) key2[SM4_BLOCK_SIZE];
+	static u8 const ks[3][SM4_BLOCK_SIZE] = {
+		{ [0 ... SM4_BLOCK_SIZE - 1] = 0x1},
+		{ [0 ... SM4_BLOCK_SIZE - 1] = 0x2},
+		{ [0 ... SM4_BLOCK_SIZE - 1] = 0x3},
+	};
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	kernel_neon_begin();
+
+	sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+
+	sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
+	sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
+
+	sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int sm4_mac_init(struct shash_desc *desc)
+{
+	struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	memset(ctx->digest, 0, SM4_BLOCK_SIZE);
+	ctx->len = 0;
+
+	return 0;
+}
+
+static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
+			  unsigned int len)
+{
+	struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+	unsigned int l, nblocks;
+
+	if (len == 0)
+		return 0;
+
+	if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
+		l = min(len, SM4_BLOCK_SIZE - ctx->len);
+
+		crypto_xor(ctx->digest + ctx->len, p, l);
+		ctx->len += l;
+		len -= l;
+		p += l;
+	}
+
+	if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
+		kernel_neon_begin();
+
+		if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
+			sm4_ce_crypt_block(tctx->key.rkey_enc,
+					   ctx->digest, ctx->digest);
+			ctx->len = 0;
+		} else {
+			nblocks = len / SM4_BLOCK_SIZE;
+			len %= SM4_BLOCK_SIZE;
+
+			sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
+					  nblocks, (ctx->len == SM4_BLOCK_SIZE),
+					  (len != 0));
+
+			p += nblocks * SM4_BLOCK_SIZE;
+
+			if (len == 0)
+				ctx->len = SM4_BLOCK_SIZE;
+		}
+
+		kernel_neon_end();
+
+		if (len) {
+			crypto_xor(ctx->digest, p, len);
+			ctx->len = len;
+		}
+	}
+
+	return 0;
+}
+
+static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
+{
+	struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+	const u8 *consts = tctx->consts;
+
+	if (ctx->len != SM4_BLOCK_SIZE) {
+		ctx->digest[ctx->len] ^= 0x80;
+		consts += SM4_BLOCK_SIZE;
+	}
+
+	kernel_neon_begin();
+	sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
+			  false, true);
+	kernel_neon_end();
+
+	memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+	return 0;
+}
+
+static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
+{
+	struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+	struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
+
+	if (ctx->len) {
+		kernel_neon_begin();
+		sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
+				   ctx->digest);
+		kernel_neon_end();
+	}
+
+	memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
+
+	return 0;
+}
+
+static struct shash_alg sm4_mac_algs[] = {
+	{
+		.base = {
+			.cra_name		= "cmac(sm4)",
+			.cra_driver_name	= "cmac-sm4-ce",
+			.cra_priority		= 400,
+			.cra_blocksize		= SM4_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct sm4_mac_tfm_ctx)
+							+ SM4_BLOCK_SIZE * 2,
+			.cra_module		= THIS_MODULE,
+		},
+		.digestsize	= SM4_BLOCK_SIZE,
+		.init		= sm4_mac_init,
+		.update		= sm4_mac_update,
+		.final		= sm4_cmac_final,
+		.setkey		= sm4_cmac_setkey,
+		.descsize	= sizeof(struct sm4_mac_desc_ctx),
+	}, {
+		.base = {
+			.cra_name		= "xcbc(sm4)",
+			.cra_driver_name	= "xcbc-sm4-ce",
+			.cra_priority		= 400,
+			.cra_blocksize		= SM4_BLOCK_SIZE,
+			.cra_ctxsize		= sizeof(struct sm4_mac_tfm_ctx)
+							+ SM4_BLOCK_SIZE * 2,
+			.cra_module		= THIS_MODULE,
+		},
+		.digestsize	= SM4_BLOCK_SIZE,
+		.init		= sm4_mac_init,
+		.update		= sm4_mac_update,
+		.final		= sm4_cmac_final,
+		.setkey		= sm4_xcbc_setkey,
+		.descsize	= sizeof(struct sm4_mac_desc_ctx),
+	}, {
+		.base = {
+			.cra_name		= "cbcmac(sm4)",
+			.cra_driver_name	= "cbcmac-sm4-ce",
+			.cra_priority		= 400,
+			.cra_blocksize		= 1,
+			.cra_ctxsize		= sizeof(struct sm4_mac_tfm_ctx),
+			.cra_module		= THIS_MODULE,
+		},
+		.digestsize	= SM4_BLOCK_SIZE,
+		.init		= sm4_mac_init,
+		.update		= sm4_mac_update,
+		.final		= sm4_cbcmac_final,
+		.setkey		= sm4_cbcmac_setkey,
+		.descsize	= sizeof(struct sm4_mac_desc_ctx),
+	}
+};
+
 static int __init sm4_init(void)
 {
-	return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+	int err;
+
+	err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+	if (err)
+		return err;
+
+	err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
+	if (err)
+		goto out_err;
+
+	return 0;
+
+out_err:
+	crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
+	return err;
 }
 
 static void __exit sm4_exit(void)
 {
+	crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
 	crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
 }
 
@@ -616,5 +878,8 @@ MODULE_ALIAS_CRYPTO("cfb(sm4)");
 MODULE_ALIAS_CRYPTO("ctr(sm4)");
 MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
 MODULE_ALIAS_CRYPTO("xts(sm4)");
+MODULE_ALIAS_CRYPTO("cmac(sm4)");
+MODULE_ALIAS_CRYPTO("xcbc(sm4)");
+MODULE_ALIAS_CRYPTO("cbcmac(sm4)");
 MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
 MODULE_LICENSE("GPL v2");
-- 
GitLab


From 67fa3a7fdf80c80ee737840dfdd225260e5c1044 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:04 +0800
Subject: [PATCH 0909/4122] crypto: arm64/sm4 - add CE implementation for CCM
 mode

This patch is a CE-optimized assembly implementation for CCM mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 223 and 225
modes of tcrypt, and compared the performance before and after this patch (the
driver used before this patch is ccm_base(ctr-sm4-ce,cbcmac-sm4-ce)).
The abscissas are blocks of different lengths. The data is tabulated and the
unit is Mb/s:

Before (rfc4309(ccm_base(ctr-sm4-ce,cbcmac-sm4-ce))):

ccm(sm4)     |     16      64     256     512    1024    1420    4096    8192
-------------+---------------------------------------------------------------
  CCM enc    |  35.07  125.40  336.47  468.17  581.97  619.18  712.56  736.01
  CCM dec    |  34.87  124.40  335.08  466.75  581.04  618.81  712.25  735.89
  CCM mb enc |  34.71  123.96  333.92  465.39  579.91  617.49  711.45  734.92
  CCM mb dec |  34.42  122.80  331.02  462.81  578.28  616.42  709.88  734.19

After (rfc4309(ccm-sm4-ce)):

ccm-sm4-ce   |     16      64     256     512    1024    1420    4096    8192
-------------+---------------------------------------------------------------
  CCM enc    |  77.12  249.82  569.94  725.17  839.27  867.71  952.87  969.89
  CCM dec    |  75.90  247.26  566.29  722.12  836.90  865.95  951.74  968.57
  CCM mb enc |  75.98  245.25  562.91  718.99  834.76  864.70  950.17  967.90
  CCM mb dec |  75.06  243.78  560.58  717.13  833.68  862.70  949.35  967.11

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig           |  16 ++
 arch/arm64/crypto/Makefile          |   3 +
 arch/arm64/crypto/sm4-ce-ccm-core.S | 328 ++++++++++++++++++++++++++++
 arch/arm64/crypto/sm4-ce-ccm-glue.c | 303 +++++++++++++++++++++++++
 4 files changed, 650 insertions(+)
 create mode 100644 arch/arm64/crypto/sm4-ce-ccm-core.S
 create mode 100644 arch/arm64/crypto/sm4-ce-ccm-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 8939f5ae9214d..2611036a3e3fa 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -281,6 +281,22 @@ config CRYPTO_AES_ARM64_CE_CCM
 	  - ARMv8 Crypto Extensions
 	  - NEON (Advanced SIMD) extensions
 
+config CRYPTO_SM4_ARM64_CE_CCM
+	tristate "AEAD cipher: SM4 in CCM mode (ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
+	select CRYPTO_SM4
+	select CRYPTO_SM4_ARM64_CE_BLK
+	help
+	  AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
+	  CCM (Counter with Cipher Block Chaining-Message Authentication Code)
+	  authenticated encryption mode (NIST SP800-38C)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+	  - NEON (Advanced SIMD) extensions
+
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF (PMULL)"
 	depends on KERNEL_MODE_NEON && CRC_T10DIF
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 087f1625e7751..843ea52669653 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -29,6 +29,9 @@ sm4-ce-cipher-y := sm4-ce-cipher-glue.o sm4-ce-cipher-core.o
 obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_BLK) += sm4-ce.o
 sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
 
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
+sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
+
 obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
 sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
 
diff --git a/arch/arm64/crypto/sm4-ce-ccm-core.S b/arch/arm64/crypto/sm4-ce-ccm-core.S
new file mode 100644
index 0000000000000..028207c4afd0f
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-ccm-core.S
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "sm4-ce-asm.h"
+
+.arch	armv8-a+crypto
+
+.irp b, 0, 1, 8, 9, 10, 11, 12, 13, 14, 15, 16, 24, 25, 26, 27, 28, 29, 30, 31
+	.set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+	.inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+/* Register macros */
+
+#define RMAC	v16
+
+/* Helper macros. */
+
+#define inc_le128(vctr)					\
+		mov		vctr.d[1], x8;		\
+		mov		vctr.d[0], x7;		\
+		adds		x8, x8, #1;		\
+		rev64		vctr.16b, vctr.16b;	\
+		adc		x7, x7, xzr;
+
+
+.align 3
+SYM_FUNC_START(sm4_ce_cbcmac_update)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: mac
+	 *   x2: src
+	 *   w3: nblocks
+	 */
+	SM4_PREPARE(x0)
+
+	ld1		{RMAC.16b}, [x1]
+
+.Lcbcmac_loop_4x:
+	cmp		w3, #4
+	blt		.Lcbcmac_loop_1x
+
+	sub		w3, w3, #4
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v0.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v1.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v2.16b
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v3.16b
+
+	cbz		w3, .Lcbcmac_end
+	b		.Lcbcmac_loop_4x
+
+.Lcbcmac_loop_1x:
+	sub		w3, w3, #1
+
+	ld1		{v0.16b}, [x2], #16
+
+	SM4_CRYPT_BLK(RMAC)
+	eor		RMAC.16b, RMAC.16b, v0.16b
+
+	cbnz		w3, .Lcbcmac_loop_1x
+
+.Lcbcmac_end:
+	st1		{RMAC.16b}, [x1]
+	ret
+SYM_FUNC_END(sm4_ce_cbcmac_update)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_final)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: ctr0 (big endian, 128 bit)
+	 *   x2: mac
+	 */
+	SM4_PREPARE(x0)
+
+	ld1		{RMAC.16b}, [x2]
+	ld1		{v0.16b}, [x1]
+
+	SM4_CRYPT_BLK2(RMAC, v0)
+
+	/* en-/decrypt the mac with ctr0 */
+	eor		RMAC.16b, RMAC.16b, v0.16b
+	st1		{RMAC.16b}, [x2]
+
+	ret
+SYM_FUNC_END(sm4_ce_ccm_final)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_enc)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: ctr (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: mac
+	 */
+	SM4_PREPARE(x0)
+
+	ldp		x7, x8, [x3]
+	rev		x7, x7
+	rev		x8, x8
+
+	ld1		{RMAC.16b}, [x5]
+
+.Lccm_enc_loop_4x:
+	cmp		w4, #(4 * 16)
+	blt		.Lccm_enc_loop_1x
+
+	sub		w4, w4, #(4 * 16)
+
+	/* construct CTRs */
+	inc_le128(v8)			/* +0 */
+	inc_le128(v9)			/* +1 */
+	inc_le128(v10)			/* +2 */
+	inc_le128(v11)			/* +3 */
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	SM4_CRYPT_BLK2(v8, RMAC)
+	eor		v8.16b, v8.16b, v0.16b
+	eor		RMAC.16b, RMAC.16b, v0.16b
+	SM4_CRYPT_BLK2(v9, RMAC)
+	eor		v9.16b, v9.16b, v1.16b
+	eor		RMAC.16b, RMAC.16b, v1.16b
+	SM4_CRYPT_BLK2(v10, RMAC)
+	eor		v10.16b, v10.16b, v2.16b
+	eor		RMAC.16b, RMAC.16b, v2.16b
+	SM4_CRYPT_BLK2(v11, RMAC)
+	eor		v11.16b, v11.16b, v3.16b
+	eor		RMAC.16b, RMAC.16b, v3.16b
+
+	st1		{v8.16b-v11.16b}, [x1], #64
+
+	cbz		w4, .Lccm_enc_end
+	b		.Lccm_enc_loop_4x
+
+.Lccm_enc_loop_1x:
+	cmp		w4, #16
+	blt		.Lccm_enc_tail
+
+	sub		w4, w4, #16
+
+	/* construct CTRs */
+	inc_le128(v8)
+
+	ld1		{v0.16b}, [x2], #16
+
+	SM4_CRYPT_BLK2(v8, RMAC)
+	eor		v8.16b, v8.16b, v0.16b
+	eor		RMAC.16b, RMAC.16b, v0.16b
+
+	st1		{v8.16b}, [x1], #16
+
+	cbz		w4, .Lccm_enc_end
+	b		.Lccm_enc_loop_1x
+
+.Lccm_enc_tail:
+	/* construct CTRs */
+	inc_le128(v8)
+
+	SM4_CRYPT_BLK2(RMAC, v8)
+
+	/* store new MAC */
+	st1		{RMAC.16b}, [x5]
+
+.Lccm_enc_tail_loop:
+	ldrb		w0, [x2], #1		/* get 1 byte from input */
+	umov		w9, v8.b[0]		/* get top crypted CTR byte */
+	umov		w6, RMAC.b[0]		/* get top MAC byte */
+
+	eor		w9, w9, w0		/* w9 = CTR ^ input */
+	eor		w6, w6, w0		/* w6 = MAC ^ input */
+
+	strb		w9, [x1], #1		/* store out byte */
+	strb		w6, [x5], #1		/* store MAC byte */
+
+	subs		w4, w4, #1
+	beq		.Lccm_enc_ret
+
+	/* shift out one byte */
+	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
+	ext		v8.16b, v8.16b, v8.16b, #1
+
+	b		.Lccm_enc_tail_loop
+
+.Lccm_enc_end:
+	/* store new MAC */
+	st1		{RMAC.16b}, [x5]
+
+	/* store new CTR */
+	rev		x7, x7
+	rev		x8, x8
+	stp		x7, x8, [x3]
+
+.Lccm_enc_ret:
+	ret
+SYM_FUNC_END(sm4_ce_ccm_enc)
+
+.align 3
+SYM_FUNC_START(sm4_ce_ccm_dec)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: ctr (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: mac
+	 */
+	SM4_PREPARE(x0)
+
+	ldp		x7, x8, [x3]
+	rev		x7, x7
+	rev		x8, x8
+
+	ld1		{RMAC.16b}, [x5]
+
+.Lccm_dec_loop_4x:
+	cmp		w4, #(4 * 16)
+	blt		.Lccm_dec_loop_1x
+
+	sub		w4, w4, #(4 * 16)
+
+	/* construct CTRs */
+	inc_le128(v8)			/* +0 */
+	inc_le128(v9)			/* +1 */
+	inc_le128(v10)			/* +2 */
+	inc_le128(v11)			/* +3 */
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	SM4_CRYPT_BLK2(v8, RMAC)
+	eor		v8.16b, v8.16b, v0.16b
+	eor		RMAC.16b, RMAC.16b, v8.16b
+	SM4_CRYPT_BLK2(v9, RMAC)
+	eor		v9.16b, v9.16b, v1.16b
+	eor		RMAC.16b, RMAC.16b, v9.16b
+	SM4_CRYPT_BLK2(v10, RMAC)
+	eor		v10.16b, v10.16b, v2.16b
+	eor		RMAC.16b, RMAC.16b, v10.16b
+	SM4_CRYPT_BLK2(v11, RMAC)
+	eor		v11.16b, v11.16b, v3.16b
+	eor		RMAC.16b, RMAC.16b, v11.16b
+
+	st1		{v8.16b-v11.16b}, [x1], #64
+
+	cbz		w4, .Lccm_dec_end
+	b		.Lccm_dec_loop_4x
+
+.Lccm_dec_loop_1x:
+	cmp		w4, #16
+	blt		.Lccm_dec_tail
+
+	sub		w4, w4, #16
+
+	/* construct CTRs */
+	inc_le128(v8)
+
+	ld1		{v0.16b}, [x2], #16
+
+	SM4_CRYPT_BLK2(v8, RMAC)
+	eor		v8.16b, v8.16b, v0.16b
+	eor		RMAC.16b, RMAC.16b, v8.16b
+
+	st1		{v8.16b}, [x1], #16
+
+	cbz		w4, .Lccm_dec_end
+	b		.Lccm_dec_loop_1x
+
+.Lccm_dec_tail:
+	/* construct CTRs */
+	inc_le128(v8)
+
+	SM4_CRYPT_BLK2(RMAC, v8)
+
+	/* store new MAC */
+	st1		{RMAC.16b}, [x5]
+
+.Lccm_dec_tail_loop:
+	ldrb		w0, [x2], #1		/* get 1 byte from input */
+	umov		w9, v8.b[0]		/* get top crypted CTR byte */
+	umov		w6, RMAC.b[0]		/* get top MAC byte */
+
+	eor		w9, w9, w0		/* w9 = CTR ^ input */
+	eor		w6, w6, w9		/* w6 = MAC ^ output */
+
+	strb		w9, [x1], #1		/* store out byte */
+	strb		w6, [x5], #1		/* store MAC byte */
+
+	subs		w4, w4, #1
+	beq		.Lccm_dec_ret
+
+	/* shift out one byte */
+	ext		RMAC.16b, RMAC.16b, RMAC.16b, #1
+	ext		v8.16b, v8.16b, v8.16b, #1
+
+	b		.Lccm_dec_tail_loop
+
+.Lccm_dec_end:
+	/* store new MAC */
+	st1		{RMAC.16b}, [x5]
+
+	/* store new CTR */
+	rev		x7, x7
+	rev		x8, x8
+	stp		x7, x8, [x3]
+
+.Lccm_dec_ret:
+	ret
+SYM_FUNC_END(sm4_ce_ccm_dec)
diff --git a/arch/arm64/crypto/sm4-ce-ccm-glue.c b/arch/arm64/crypto/sm4-ce-ccm-glue.c
new file mode 100644
index 0000000000000..f2cec7b52efcd
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-ccm-glue.c
@@ -0,0 +1,303 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-CCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-ce.h"
+
+asmlinkage void sm4_ce_cbcmac_update(const u32 *rkey_enc, u8 *mac,
+				     const u8 *src, unsigned int nblocks);
+asmlinkage void sm4_ce_ccm_enc(const u32 *rkey_enc, u8 *dst, const u8 *src,
+			       u8 *iv, unsigned int nbytes, u8 *mac);
+asmlinkage void sm4_ce_ccm_dec(const u32 *rkey_enc, u8 *dst, const u8 *src,
+			       u8 *iv, unsigned int nbytes, u8 *mac);
+asmlinkage void sm4_ce_ccm_final(const u32 *rkey_enc, u8 *iv, u8 *mac);
+
+
+static int ccm_setkey(struct crypto_aead *tfm, const u8 *key,
+		      unsigned int key_len)
+{
+	struct sm4_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	kernel_neon_begin();
+	sm4_ce_expand_key(key, ctx->rkey_enc, ctx->rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+	kernel_neon_end();
+
+	return 0;
+}
+
+static int ccm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	if ((authsize & 1) || authsize < 4)
+		return -EINVAL;
+	return 0;
+}
+
+static int ccm_format_input(u8 info[], struct aead_request *req,
+			    unsigned int msglen)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int l = req->iv[0] + 1;
+	unsigned int m;
+	__be32 len;
+
+	/* verify that CCM dimension 'L': 2 <= L <= 8 */
+	if (l < 2 || l > 8)
+		return -EINVAL;
+	if (l < 4 && msglen >> (8 * l))
+		return -EOVERFLOW;
+
+	memset(&req->iv[SM4_BLOCK_SIZE - l], 0, l);
+
+	memcpy(info, req->iv, SM4_BLOCK_SIZE);
+
+	m = crypto_aead_authsize(aead);
+
+	/* format flags field per RFC 3610/NIST 800-38C */
+	*info |= ((m - 2) / 2) << 3;
+	if (req->assoclen)
+		*info |= (1 << 6);
+
+	/*
+	 * format message length field,
+	 * Linux uses a u32 type to represent msglen
+	 */
+	if (l >= 4)
+		l = 4;
+
+	len = cpu_to_be32(msglen);
+	memcpy(&info[SM4_BLOCK_SIZE - l], (u8 *)&len + 4 - l, l);
+
+	return 0;
+}
+
+static void ccm_calculate_auth_mac(struct aead_request *req, u8 mac[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+	struct __packed { __be16 l; __be32 h; } aadlen;
+	u32 assoclen = req->assoclen;
+	struct scatter_walk walk;
+	unsigned int len;
+
+	if (assoclen < 0xff00) {
+		aadlen.l = cpu_to_be16(assoclen);
+		len = 2;
+	} else {
+		aadlen.l = cpu_to_be16(0xfffe);
+		put_unaligned_be32(assoclen, &aadlen.h);
+		len = 6;
+	}
+
+	sm4_ce_crypt_block(ctx->rkey_enc, mac, mac);
+	crypto_xor(mac, (const u8 *)&aadlen, len);
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, assoclen);
+		u8 *p, *ptr;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, assoclen);
+		}
+
+		p = ptr = scatterwalk_map(&walk);
+		assoclen -= n;
+		scatterwalk_advance(&walk, n);
+
+		while (n > 0) {
+			unsigned int l, nblocks;
+
+			if (len == SM4_BLOCK_SIZE) {
+				if (n < SM4_BLOCK_SIZE) {
+					sm4_ce_crypt_block(ctx->rkey_enc,
+							   mac, mac);
+
+					len = 0;
+				} else {
+					nblocks = n / SM4_BLOCK_SIZE;
+					sm4_ce_cbcmac_update(ctx->rkey_enc,
+							     mac, ptr, nblocks);
+
+					ptr += nblocks * SM4_BLOCK_SIZE;
+					n %= SM4_BLOCK_SIZE;
+
+					continue;
+				}
+			}
+
+			l = min(n, SM4_BLOCK_SIZE - len);
+			if (l) {
+				crypto_xor(mac + len, ptr, l);
+				len += l;
+				ptr += l;
+				n -= l;
+			}
+		}
+
+		scatterwalk_unmap(p);
+		scatterwalk_done(&walk, 0, assoclen);
+	} while (assoclen);
+}
+
+static int ccm_crypt(struct aead_request *req, struct skcipher_walk *walk,
+		     u32 *rkey_enc, u8 mac[],
+		     void (*sm4_ce_ccm_crypt)(const u32 *rkey_enc, u8 *dst,
+					const u8 *src, u8 *iv,
+					unsigned int nbytes, u8 *mac))
+{
+	u8 __aligned(8) ctr0[SM4_BLOCK_SIZE];
+	int err;
+
+	/* preserve the initial ctr0 for the TAG */
+	memcpy(ctr0, walk->iv, SM4_BLOCK_SIZE);
+	crypto_inc(walk->iv, SM4_BLOCK_SIZE);
+
+	kernel_neon_begin();
+
+	if (req->assoclen)
+		ccm_calculate_auth_mac(req, mac);
+
+	do {
+		unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+		const u8 *src = walk->src.virt.addr;
+		u8 *dst = walk->dst.virt.addr;
+
+		if (walk->nbytes == walk->total)
+			tail = 0;
+
+		if (walk->nbytes - tail)
+			sm4_ce_ccm_crypt(rkey_enc, dst, src, walk->iv,
+					 walk->nbytes - tail, mac);
+
+		if (walk->nbytes == walk->total)
+			sm4_ce_ccm_final(rkey_enc, ctr0, mac);
+
+		kernel_neon_end();
+
+		if (walk->nbytes) {
+			err = skcipher_walk_done(walk, tail);
+			if (err)
+				return err;
+			if (walk->nbytes)
+				kernel_neon_begin();
+		}
+	} while (walk->nbytes > 0);
+
+	return 0;
+}
+
+static int ccm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+	u8 __aligned(8) mac[SM4_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	err = ccm_format_input(mac, req, req->cryptlen);
+	if (err)
+		return err;
+
+	err = skcipher_walk_aead_encrypt(&walk, req, false);
+	if (err)
+		return err;
+
+	err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_enc);
+	if (err)
+		return err;
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(mac, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int ccm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct sm4_ctx *ctx = crypto_aead_ctx(aead);
+	u8 __aligned(8) mac[SM4_BLOCK_SIZE];
+	u8 authtag[SM4_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	err = ccm_format_input(mac, req, req->cryptlen - authsize);
+	if (err)
+		return err;
+
+	err = skcipher_walk_aead_decrypt(&walk, req, false);
+	if (err)
+		return err;
+
+	err = ccm_crypt(req, &walk, ctx->rkey_enc, mac, sm4_ce_ccm_dec);
+	if (err)
+		return err;
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(authtag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(authtag, mac, authsize))
+		return -EBADMSG;
+
+	return 0;
+}
+
+static struct aead_alg sm4_ccm_alg = {
+	.base = {
+		.cra_name		= "ccm(sm4)",
+		.cra_driver_name	= "ccm-sm4-ce",
+		.cra_priority		= 400,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct sm4_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.ivsize		= SM4_BLOCK_SIZE,
+	.chunksize	= SM4_BLOCK_SIZE,
+	.maxauthsize	= SM4_BLOCK_SIZE,
+	.setkey		= ccm_setkey,
+	.setauthsize	= ccm_setauthsize,
+	.encrypt	= ccm_encrypt,
+	.decrypt	= ccm_decrypt,
+};
+
+static int __init sm4_ce_ccm_init(void)
+{
+	return crypto_register_aead(&sm4_ccm_alg);
+}
+
+static void __exit sm4_ce_ccm_exit(void)
+{
+	crypto_unregister_aead(&sm4_ccm_alg);
+}
+
+module_cpu_feature_match(SM4, sm4_ce_ccm_init);
+module_exit(sm4_ce_ccm_exit);
+
+MODULE_DESCRIPTION("Synchronous SM4 in CCM mode using ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("ccm(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From ae1b83c7d572101b3b5cfbf40415c4cc5d469bde Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 27 Oct 2022 14:55:05 +0800
Subject: [PATCH 0910/4122] crypto: arm64/sm4 - add CE implementation for GCM
 mode

This patch is a CE-optimized assembly implementation for GCM mode.

Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 224 and 224
modes of tcrypt, and compared the performance before and after this patch (the
driver used before this patch is gcm_base(ctr-sm4-ce,ghash-generic)).
The abscissas are blocks of different lengths. The data is tabulated and the
unit is Mb/s:

Before (gcm_base(ctr-sm4-ce,ghash-generic)):

gcm(sm4)     |     16      64      256      512     1024     1420     4096     8192
-------------+---------------------------------------------------------------------
  GCM enc    |  25.24   64.65   104.66   116.69   123.81   125.12   129.67   130.62
  GCM dec    |  25.40   64.80   104.74   116.70   123.81   125.21   129.68   130.59
  GCM mb enc |  24.95   64.06   104.20   116.38   123.55   124.97   129.63   130.61
  GCM mb dec |  24.92   64.00   104.13   116.34   123.55   124.98   129.56   130.48

After:

gcm-sm4-ce   |     16      64      256      512     1024     1420     4096     8192
-------------+---------------------------------------------------------------------
  GCM enc    | 108.62  397.18   971.60  1283.92  1522.77  1513.39  1777.00  1806.96
  GCM dec    | 116.36  398.14  1004.27  1319.11  1624.21  1635.43  1932.54  1974.20
  GCM mb enc | 107.13  391.79   962.05  1274.94  1514.76  1508.57  1769.07  1801.58
  GCM mb dec | 113.40  389.36   988.51  1307.68  1619.10  1631.55  1931.70  1970.86

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/Kconfig           |  16 +
 arch/arm64/crypto/Makefile          |   3 +
 arch/arm64/crypto/sm4-ce-gcm-core.S | 741 ++++++++++++++++++++++++++++
 arch/arm64/crypto/sm4-ce-gcm-glue.c | 286 +++++++++++
 4 files changed, 1046 insertions(+)
 create mode 100644 arch/arm64/crypto/sm4-ce-gcm-core.S
 create mode 100644 arch/arm64/crypto/sm4-ce-gcm-glue.c

diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 2611036a3e3fa..6793d5bc3ee53 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -297,6 +297,22 @@ config CRYPTO_SM4_ARM64_CE_CCM
 	  - ARMv8 Crypto Extensions
 	  - NEON (Advanced SIMD) extensions
 
+config CRYPTO_SM4_ARM64_CE_GCM
+	tristate "AEAD cipher: SM4 in GCM mode (ARMv8 Crypto Extensions)"
+	depends on KERNEL_MODE_NEON
+	select CRYPTO_ALGAPI
+	select CRYPTO_AEAD
+	select CRYPTO_SM4
+	select CRYPTO_SM4_ARM64_CE_BLK
+	help
+	  AEAD cipher: SM4 cipher algorithms (OSCCA GB/T 32907-2016) with
+	  GCM (Galois/Counter Mode) authenticated encryption mode (NIST SP800-38D)
+
+	  Architecture: arm64 using:
+	  - ARMv8 Crypto Extensions
+	  - PMULL (Polynomial Multiply Long) instructions
+	  - NEON (Advanced SIMD) extensions
+
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF (PMULL)"
 	depends on KERNEL_MODE_NEON && CRC_T10DIF
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index 843ea52669653..4818e204c2aca 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -32,6 +32,9 @@ sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o
 obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_CCM) += sm4-ce-ccm.o
 sm4-ce-ccm-y := sm4-ce-ccm-glue.o sm4-ce-ccm-core.o
 
+obj-$(CONFIG_CRYPTO_SM4_ARM64_CE_GCM) += sm4-ce-gcm.o
+sm4-ce-gcm-y := sm4-ce-gcm-glue.o sm4-ce-gcm-core.o
+
 obj-$(CONFIG_CRYPTO_SM4_ARM64_NEON_BLK) += sm4-neon.o
 sm4-neon-y := sm4-neon-glue.o sm4-neon-core.o
 
diff --git a/arch/arm64/crypto/sm4-ce-gcm-core.S b/arch/arm64/crypto/sm4-ce-gcm-core.S
new file mode 100644
index 0000000000000..7aa3ec18a2891
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-gcm-core.S
@@ -0,0 +1,741 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2016 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include "sm4-ce-asm.h"
+
+.arch	armv8-a+crypto
+
+.irp b, 0, 1, 2, 3, 24, 25, 26, 27, 28, 29, 30, 31
+	.set .Lv\b\().4s, \b
+.endr
+
+.macro sm4e, vd, vn
+	.inst 0xcec08400 | (.L\vn << 5) | .L\vd
+.endm
+
+/* Register macros */
+
+/* Used for both encryption and decryption */
+#define	RHASH	v21
+#define	RRCONST	v22
+#define RZERO	v23
+
+/* Helper macros. */
+
+/*
+ * input: m0, m1
+ * output: r0:r1 (low 128-bits in r0, high in r1)
+ */
+#define PMUL_128x128(r0, r1, m0, m1, T0, T1)			\
+		ext		T0.16b, m1.16b, m1.16b, #8;	\
+		pmull		r0.1q, m0.1d, m1.1d;		\
+		pmull		T1.1q, m0.1d, T0.1d;		\
+		pmull2		T0.1q, m0.2d, T0.2d;		\
+		pmull2		r1.1q, m0.2d, m1.2d;		\
+		eor		T0.16b, T0.16b, T1.16b;		\
+		ext		T1.16b, RZERO.16b, T0.16b, #8;	\
+		ext		T0.16b, T0.16b, RZERO.16b, #8;	\
+		eor		r0.16b, r0.16b, T1.16b;		\
+		eor		r1.16b, r1.16b, T0.16b;
+
+#define PMUL_128x128_4x(r0, r1, m0, m1, T0, T1,			\
+			r2, r3, m2, m3, T2, T3,			\
+			r4, r5, m4, m5, T4, T5,			\
+			r6, r7, m6, m7, T6, T7)			\
+		ext		T0.16b, m1.16b, m1.16b, #8;	\
+		ext		T2.16b, m3.16b, m3.16b, #8;	\
+		ext		T4.16b, m5.16b, m5.16b, #8;	\
+		ext		T6.16b, m7.16b, m7.16b, #8;	\
+		pmull		r0.1q, m0.1d, m1.1d;		\
+		pmull		r2.1q, m2.1d, m3.1d;		\
+		pmull		r4.1q, m4.1d, m5.1d;		\
+		pmull		r6.1q, m6.1d, m7.1d;		\
+		pmull		T1.1q, m0.1d, T0.1d;		\
+		pmull		T3.1q, m2.1d, T2.1d;		\
+		pmull		T5.1q, m4.1d, T4.1d;		\
+		pmull		T7.1q, m6.1d, T6.1d;		\
+		pmull2		T0.1q, m0.2d, T0.2d;		\
+		pmull2		T2.1q, m2.2d, T2.2d;		\
+		pmull2		T4.1q, m4.2d, T4.2d;		\
+		pmull2		T6.1q, m6.2d, T6.2d;		\
+		pmull2		r1.1q, m0.2d, m1.2d;		\
+		pmull2		r3.1q, m2.2d, m3.2d;		\
+		pmull2		r5.1q, m4.2d, m5.2d;		\
+		pmull2		r7.1q, m6.2d, m7.2d;		\
+		eor		T0.16b, T0.16b, T1.16b;		\
+		eor		T2.16b, T2.16b, T3.16b;		\
+		eor		T4.16b, T4.16b, T5.16b;		\
+		eor		T6.16b, T6.16b, T7.16b;		\
+		ext		T1.16b, RZERO.16b, T0.16b, #8;	\
+		ext		T3.16b, RZERO.16b, T2.16b, #8;	\
+		ext		T5.16b, RZERO.16b, T4.16b, #8;	\
+		ext		T7.16b, RZERO.16b, T6.16b, #8;	\
+		ext		T0.16b, T0.16b, RZERO.16b, #8;	\
+		ext		T2.16b, T2.16b, RZERO.16b, #8;	\
+		ext		T4.16b, T4.16b, RZERO.16b, #8;	\
+		ext		T6.16b, T6.16b, RZERO.16b, #8;	\
+		eor		r0.16b, r0.16b, T1.16b;		\
+		eor		r2.16b, r2.16b, T3.16b; 	\
+		eor		r4.16b, r4.16b, T5.16b; 	\
+		eor		r6.16b, r6.16b, T7.16b; 	\
+		eor		r1.16b, r1.16b, T0.16b; 	\
+		eor		r3.16b, r3.16b, T2.16b; 	\
+		eor		r5.16b, r5.16b, T4.16b; 	\
+		eor		r7.16b, r7.16b, T6.16b;
+
+/*
+ * input: r0:r1 (low 128-bits in r0, high in r1)
+ * output: a
+ */
+#define REDUCTION(a, r0, r1, rconst, T0, T1)			\
+		pmull2		T0.1q, r1.2d, rconst.2d;	\
+		ext		T1.16b, T0.16b, RZERO.16b, #8;	\
+		ext		T0.16b, RZERO.16b, T0.16b, #8;	\
+		eor		r1.16b, r1.16b, T1.16b;		\
+		eor		r0.16b, r0.16b, T0.16b;		\
+		pmull		T0.1q, r1.1d, rconst.1d;	\
+		eor		a.16b, r0.16b, T0.16b;
+
+#define SM4_CRYPT_PMUL_128x128_BLK(b0, r0, r1, m0, m1, T0, T1)	\
+	rev32			b0.16b, b0.16b;			\
+		ext		T0.16b, m1.16b, m1.16b, #8;	\
+	sm4e			b0.4s, v24.4s;			\
+		pmull		r0.1q, m0.1d, m1.1d;		\
+	sm4e			b0.4s, v25.4s;			\
+		pmull		T1.1q, m0.1d, T0.1d;		\
+	sm4e			b0.4s, v26.4s;			\
+		pmull2		T0.1q, m0.2d, T0.2d;		\
+	sm4e			b0.4s, v27.4s;			\
+		pmull2		r1.1q, m0.2d, m1.2d;		\
+	sm4e			b0.4s, v28.4s;			\
+		eor		T0.16b, T0.16b, T1.16b;		\
+	sm4e			b0.4s, v29.4s;			\
+		ext		T1.16b, RZERO.16b, T0.16b, #8;	\
+	sm4e			b0.4s, v30.4s;			\
+		ext		T0.16b, T0.16b, RZERO.16b, #8;	\
+	sm4e			b0.4s, v31.4s;			\
+		eor		r0.16b, r0.16b, T1.16b;		\
+	rev64			b0.4s, b0.4s;			\
+		eor		r1.16b, r1.16b, T0.16b;		\
+	ext			b0.16b, b0.16b, b0.16b, #8;	\
+	rev32			b0.16b, b0.16b;
+
+#define SM4_CRYPT_PMUL_128x128_BLK3(b0, b1, b2,			\
+				    r0, r1, m0, m1, T0, T1,	\
+				    r2, r3, m2, m3, T2, T3,	\
+				    r4, r5, m4, m5, T4, T5)	\
+	rev32			b0.16b, b0.16b;			\
+	rev32			b1.16b, b1.16b;			\
+	rev32			b2.16b, b2.16b;			\
+		ext		T0.16b, m1.16b, m1.16b, #8;	\
+		ext		T2.16b, m3.16b, m3.16b, #8;	\
+		ext		T4.16b, m5.16b, m5.16b, #8;	\
+	sm4e			b0.4s, v24.4s;			\
+	sm4e			b1.4s, v24.4s;			\
+	sm4e			b2.4s, v24.4s;			\
+		pmull		r0.1q, m0.1d, m1.1d;		\
+		pmull		r2.1q, m2.1d, m3.1d;		\
+		pmull		r4.1q, m4.1d, m5.1d;		\
+	sm4e			b0.4s, v25.4s;			\
+	sm4e			b1.4s, v25.4s;			\
+	sm4e			b2.4s, v25.4s;			\
+		pmull		T1.1q, m0.1d, T0.1d;		\
+		pmull		T3.1q, m2.1d, T2.1d;		\
+		pmull		T5.1q, m4.1d, T4.1d;		\
+	sm4e			b0.4s, v26.4s;			\
+	sm4e			b1.4s, v26.4s;			\
+	sm4e			b2.4s, v26.4s;			\
+		pmull2		T0.1q, m0.2d, T0.2d;		\
+		pmull2		T2.1q, m2.2d, T2.2d;		\
+		pmull2		T4.1q, m4.2d, T4.2d;		\
+	sm4e			b0.4s, v27.4s;			\
+	sm4e			b1.4s, v27.4s;			\
+	sm4e			b2.4s, v27.4s;			\
+		pmull2		r1.1q, m0.2d, m1.2d;		\
+		pmull2		r3.1q, m2.2d, m3.2d;		\
+		pmull2		r5.1q, m4.2d, m5.2d;		\
+	sm4e			b0.4s, v28.4s;			\
+	sm4e			b1.4s, v28.4s;			\
+	sm4e			b2.4s, v28.4s;			\
+		eor		T0.16b, T0.16b, T1.16b;		\
+		eor		T2.16b, T2.16b, T3.16b;		\
+		eor		T4.16b, T4.16b, T5.16b;		\
+	sm4e			b0.4s, v29.4s;			\
+	sm4e			b1.4s, v29.4s;			\
+	sm4e			b2.4s, v29.4s;			\
+		ext		T1.16b, RZERO.16b, T0.16b, #8;	\
+		ext		T3.16b, RZERO.16b, T2.16b, #8;	\
+		ext		T5.16b, RZERO.16b, T4.16b, #8;	\
+	sm4e			b0.4s, v30.4s;			\
+	sm4e			b1.4s, v30.4s;			\
+	sm4e			b2.4s, v30.4s;			\
+		ext		T0.16b, T0.16b, RZERO.16b, #8;	\
+		ext		T2.16b, T2.16b, RZERO.16b, #8;	\
+		ext		T4.16b, T4.16b, RZERO.16b, #8;	\
+	sm4e			b0.4s, v31.4s;			\
+	sm4e			b1.4s, v31.4s;			\
+	sm4e			b2.4s, v31.4s;			\
+		eor		r0.16b, r0.16b, T1.16b;		\
+		eor		r2.16b, r2.16b, T3.16b;		\
+		eor		r4.16b, r4.16b, T5.16b;		\
+	rev64			b0.4s, b0.4s;			\
+	rev64			b1.4s, b1.4s;			\
+	rev64			b2.4s, b2.4s;			\
+		eor		r1.16b, r1.16b, T0.16b;		\
+		eor		r3.16b, r3.16b, T2.16b;		\
+		eor		r5.16b, r5.16b, T4.16b;		\
+	ext			b0.16b, b0.16b, b0.16b, #8;	\
+	ext			b1.16b, b1.16b, b1.16b, #8;	\
+	ext			b2.16b, b2.16b, b2.16b, #8;	\
+		eor		r0.16b, r0.16b, r2.16b;		\
+		eor		r1.16b, r1.16b, r3.16b;		\
+	rev32			b0.16b, b0.16b;			\
+	rev32			b1.16b, b1.16b;			\
+	rev32			b2.16b, b2.16b;			\
+		eor		r0.16b, r0.16b, r4.16b;		\
+		eor		r1.16b, r1.16b, r5.16b;
+
+#define inc32_le128(vctr)					\
+		mov		vctr.d[1], x9;			\
+		add		w6, w9, #1;			\
+		mov		vctr.d[0], x8;			\
+		bfi		x9, x6, #0, #32;		\
+		rev64		vctr.16b, vctr.16b;
+
+#define GTAG_HASH_LENGTHS(vctr0, vlen)					\
+		ld1		{vlen.16b}, [x7];			\
+		/* construct CTR0 */					\
+		/* the lower 32-bits of initial IV is always be32(1) */	\
+		mov		x6, #0x1;				\
+		bfi		x9, x6, #0, #32;			\
+		mov		vctr0.d[0], x8;				\
+		mov		vctr0.d[1], x9;				\
+		rbit		vlen.16b, vlen.16b;			\
+		rev64		vctr0.16b, vctr0.16b;			\
+		/* authtag = GCTR(CTR0, GHASH) */			\
+		eor		RHASH.16b, RHASH.16b, vlen.16b;		\
+		SM4_CRYPT_PMUL_128x128_BLK(vctr0, RR0, RR1, RHASH, RH1,	\
+					   RTMP0, RTMP1);		\
+		REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3);	\
+		rbit		RHASH.16b, RHASH.16b;			\
+		eor		RHASH.16b, RHASH.16b, vctr0.16b;
+
+
+/* Register macros for encrypt and ghash */
+
+/* can be the same as input v0-v3 */
+#define	RR1	v0
+#define	RR3	v1
+#define	RR5	v2
+#define	RR7	v3
+
+#define	RR0	v4
+#define	RR2	v5
+#define	RR4	v6
+#define	RR6	v7
+
+#define RTMP0	v8
+#define RTMP1	v9
+#define RTMP2	v10
+#define RTMP3	v11
+#define RTMP4	v12
+#define RTMP5	v13
+#define RTMP6	v14
+#define RTMP7	v15
+
+#define	RH1	v16
+#define	RH2	v17
+#define	RH3	v18
+#define	RH4	v19
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_ghash_setup)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: ghash table
+	 */
+	SM4_PREPARE(x0)
+
+	adr_l		x2, .Lghash_rconst
+	ld1r		{RRCONST.2d}, [x2]
+
+	eor		RZERO.16b, RZERO.16b, RZERO.16b
+
+	/* H = E(K, 0^128) */
+	rev32		v0.16b, RZERO.16b
+	SM4_CRYPT_BLK_BE(v0)
+
+	/* H ^ 1 */
+	rbit		RH1.16b, v0.16b
+
+	/* H ^ 2 */
+	PMUL_128x128(RR0, RR1, RH1, RH1, RTMP0, RTMP1)
+	REDUCTION(RH2, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	/* H ^ 3 */
+	PMUL_128x128(RR0, RR1, RH2, RH1, RTMP0, RTMP1)
+	REDUCTION(RH3, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	/* H ^ 4 */
+	PMUL_128x128(RR0, RR1, RH2, RH2, RTMP0, RTMP1)
+	REDUCTION(RH4, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	st1		{RH1.16b-RH4.16b}, [x1]
+
+	ret
+SYM_FUNC_END(sm4_ce_pmull_ghash_setup)
+
+.align 3
+SYM_FUNC_START(pmull_ghash_update)
+	/* input:
+	 *   x0: ghash table
+	 *   x1: ghash result
+	 *   x2: src
+	 *   w3: nblocks
+	 */
+	ld1		{RH1.16b-RH4.16b}, [x0]
+
+	ld1		{RHASH.16b}, [x1]
+	rbit		RHASH.16b, RHASH.16b
+
+	adr_l		x4, .Lghash_rconst
+	ld1r		{RRCONST.2d}, [x4]
+
+	eor		RZERO.16b, RZERO.16b, RZERO.16b
+
+.Lghash_loop_4x:
+	cmp		w3, #4
+	blt		.Lghash_loop_1x
+
+	sub		w3, w3, #4
+
+	ld1		{v0.16b-v3.16b}, [x2], #64
+
+	rbit		v0.16b, v0.16b
+	rbit		v1.16b, v1.16b
+	rbit		v2.16b, v2.16b
+	rbit		v3.16b, v3.16b
+
+	/*
+	 * (in0 ^ HASH) * H^4 => rr0:rr1
+	 * (in1)        * H^3 => rr2:rr3
+	 * (in2)        * H^2 => rr4:rr5
+	 * (in3)        * H^1 => rr6:rr7
+	 */
+	eor		RHASH.16b, RHASH.16b, v0.16b
+
+	PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
+			RR2, RR3, v1, RH3, RTMP2, RTMP3,
+			RR4, RR5, v2, RH2, RTMP4, RTMP5,
+			RR6, RR7, v3, RH1, RTMP6, RTMP7)
+
+	eor		RR0.16b, RR0.16b, RR2.16b
+	eor		RR1.16b, RR1.16b, RR3.16b
+	eor		RR0.16b, RR0.16b, RR4.16b
+	eor		RR1.16b, RR1.16b, RR5.16b
+	eor		RR0.16b, RR0.16b, RR6.16b
+	eor		RR1.16b, RR1.16b, RR7.16b
+
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+	cbz		w3, .Lghash_end
+	b		.Lghash_loop_4x
+
+.Lghash_loop_1x:
+	sub		w3, w3, #1
+
+	ld1		{v0.16b}, [x2], #16
+	rbit		v0.16b, v0.16b
+	eor		RHASH.16b, RHASH.16b, v0.16b
+
+	PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	cbnz		w3, .Lghash_loop_1x
+
+.Lghash_end:
+	rbit		RHASH.16b, RHASH.16b
+	st1		{RHASH.2d}, [x1]
+
+	ret
+SYM_FUNC_END(pmull_ghash_update)
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_gcm_enc)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: ctr (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: ghash result
+	 *   x6: ghash table
+	 *   x7: lengths (only for last block)
+	 */
+	SM4_PREPARE(x0)
+
+	ldp		x8, x9, [x3]
+	rev		x8, x8
+	rev		x9, x9
+
+	ld1		{RH1.16b-RH4.16b}, [x6]
+
+	ld1		{RHASH.16b}, [x5]
+	rbit		RHASH.16b, RHASH.16b
+
+	adr_l		x6, .Lghash_rconst
+	ld1r		{RRCONST.2d}, [x6]
+
+	eor		RZERO.16b, RZERO.16b, RZERO.16b
+
+	cbz		w4, .Lgcm_enc_hash_len
+
+.Lgcm_enc_loop_4x:
+	cmp		w4, #(4 * 16)
+	blt		.Lgcm_enc_loop_1x
+
+	sub		w4, w4, #(4 * 16)
+
+	/* construct CTRs */
+	inc32_le128(v0)			/* +0 */
+	inc32_le128(v1)			/* +1 */
+	inc32_le128(v2)			/* +2 */
+	inc32_le128(v3)			/* +3 */
+
+	ld1		{RTMP0.16b-RTMP3.16b}, [x2], #64
+
+	SM4_CRYPT_BLK4(v0, v1, v2, v3)
+
+	eor		v0.16b, v0.16b, RTMP0.16b
+	eor		v1.16b, v1.16b, RTMP1.16b
+	eor		v2.16b, v2.16b, RTMP2.16b
+	eor		v3.16b, v3.16b, RTMP3.16b
+	st1		{v0.16b-v3.16b}, [x1], #64
+
+	/* ghash update */
+
+	rbit		v0.16b, v0.16b
+	rbit		v1.16b, v1.16b
+	rbit		v2.16b, v2.16b
+	rbit		v3.16b, v3.16b
+
+	/*
+	 * (in0 ^ HASH) * H^4 => rr0:rr1
+	 * (in1)        * H^3 => rr2:rr3
+	 * (in2)        * H^2 => rr4:rr5
+	 * (in3)        * H^1 => rr6:rr7
+	 */
+	eor		RHASH.16b, RHASH.16b, v0.16b
+
+	PMUL_128x128_4x(RR0, RR1, RHASH, RH4, RTMP0, RTMP1,
+			RR2, RR3, v1, RH3, RTMP2, RTMP3,
+			RR4, RR5, v2, RH2, RTMP4, RTMP5,
+			RR6, RR7, v3, RH1, RTMP6, RTMP7)
+
+	eor		RR0.16b, RR0.16b, RR2.16b
+	eor		RR1.16b, RR1.16b, RR3.16b
+	eor		RR0.16b, RR0.16b, RR4.16b
+	eor		RR1.16b, RR1.16b, RR5.16b
+	eor		RR0.16b, RR0.16b, RR6.16b
+	eor		RR1.16b, RR1.16b, RR7.16b
+
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+	cbz		w4, .Lgcm_enc_hash_len
+	b		.Lgcm_enc_loop_4x
+
+.Lgcm_enc_loop_1x:
+	cmp		w4, #16
+	blt		.Lgcm_enc_tail
+
+	sub		w4, w4, #16
+
+	/* construct CTRs */
+	inc32_le128(v0)
+
+	ld1		{RTMP0.16b}, [x2], #16
+
+	SM4_CRYPT_BLK(v0)
+
+	eor		v0.16b, v0.16b, RTMP0.16b
+	st1		{v0.16b}, [x1], #16
+
+	/* ghash update */
+	rbit		v0.16b, v0.16b
+	eor		RHASH.16b, RHASH.16b, v0.16b
+	PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	cbz		w4, .Lgcm_enc_hash_len
+	b		.Lgcm_enc_loop_1x
+
+.Lgcm_enc_tail:
+	/* construct CTRs */
+	inc32_le128(v0)
+	SM4_CRYPT_BLK(v0)
+
+	/* load permute table */
+	adr_l		x0, .Lcts_permute_table
+	add		x0, x0, #32
+	sub		x0, x0, w4, uxtw
+	ld1		{v3.16b}, [x0]
+
+.Lgcm_enc_tail_loop:
+	/* do encrypt */
+	ldrb		w0, [x2], #1	/* get 1 byte from input */
+	umov		w6, v0.b[0]	/* get top crypted byte */
+	eor		w6, w6, w0	/* w6 = CTR ^ input */
+	strb		w6, [x1], #1	/* store out byte */
+
+	/* shift right out one byte */
+	ext		v0.16b, v0.16b, v0.16b, #1
+	/* the last ciphertext is placed in high bytes */
+	ins		v0.b[15], w6
+
+	subs		w4, w4, #1
+	bne		.Lgcm_enc_tail_loop
+
+	/* padding last block with zeros */
+	tbl		v0.16b, {v0.16b}, v3.16b
+
+	/* ghash update */
+	rbit		v0.16b, v0.16b
+	eor		RHASH.16b, RHASH.16b, v0.16b
+	PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+.Lgcm_enc_hash_len:
+	cbz		x7, .Lgcm_enc_end
+
+	GTAG_HASH_LENGTHS(v1, v3)
+
+	b		.Lgcm_enc_ret
+
+.Lgcm_enc_end:
+	/* store new CTR */
+	rev		x8, x8
+	rev		x9, x9
+	stp		x8, x9, [x3]
+
+	rbit		RHASH.16b, RHASH.16b
+
+.Lgcm_enc_ret:
+	/* store new MAC */
+	st1		{RHASH.2d}, [x5]
+
+	ret
+SYM_FUNC_END(sm4_ce_pmull_gcm_enc)
+
+#undef	RR1
+#undef	RR3
+#undef	RR5
+#undef	RR7
+#undef	RR0
+#undef	RR2
+#undef	RR4
+#undef	RR6
+#undef RTMP0
+#undef RTMP1
+#undef RTMP2
+#undef RTMP3
+#undef RTMP4
+#undef RTMP5
+#undef RTMP6
+#undef RTMP7
+#undef	RH1
+#undef	RH2
+#undef	RH3
+#undef	RH4
+
+
+/* Register macros for decrypt */
+
+/* v0-v2 for building CTRs, v3-v5 for saving inputs */
+
+#define	RR1	v6
+#define	RR3	v7
+#define	RR5	v8
+
+#define	RR0	v9
+#define	RR2	v10
+#define	RR4	v11
+
+#define RTMP0	v12
+#define RTMP1	v13
+#define RTMP2	v14
+#define RTMP3	v15
+#define RTMP4	v16
+#define RTMP5	v17
+
+#define	RH1	v18
+#define	RH2	v19
+#define	RH3	v20
+
+.align 3
+SYM_FUNC_START(sm4_ce_pmull_gcm_dec)
+	/* input:
+	 *   x0: round key array, CTX
+	 *   x1: dst
+	 *   x2: src
+	 *   x3: ctr (big endian, 128 bit)
+	 *   w4: nbytes
+	 *   x5: ghash result
+	 *   x6: ghash table
+	 *   x7: lengths (only for last block)
+	 */
+	SM4_PREPARE(x0)
+
+	ldp		x8, x9, [x3]
+	rev		x8, x8
+	rev		x9, x9
+
+	ld1		{RH1.16b-RH3.16b}, [x6]
+
+	ld1		{RHASH.16b}, [x5]
+	rbit		RHASH.16b, RHASH.16b
+
+	adr_l		x6, .Lghash_rconst
+	ld1r		{RRCONST.2d}, [x6]
+
+	eor		RZERO.16b, RZERO.16b, RZERO.16b
+
+	cbz		w4, .Lgcm_dec_hash_len
+
+.Lgcm_dec_loop_3x:
+	cmp		w4, #(3 * 16)
+	blt		.Lgcm_dec_loop_1x
+
+	sub		w4, w4, #(3 * 16)
+
+	ld1		{v3.16b-v5.16b}, [x2], #(3 * 16)
+
+	/* construct CTRs */
+	inc32_le128(v0)			/* +0 */
+	rbit		v6.16b, v3.16b
+	inc32_le128(v1)			/* +1 */
+	rbit		v7.16b, v4.16b
+	inc32_le128(v2)			/* +2 */
+	rbit		v8.16b, v5.16b
+
+	eor		RHASH.16b, RHASH.16b, v6.16b
+
+	/* decrypt & ghash update */
+	SM4_CRYPT_PMUL_128x128_BLK3(v0, v1, v2,
+				    RR0, RR1, RHASH, RH3, RTMP0, RTMP1,
+				    RR2, RR3, v7, RH2, RTMP2, RTMP3,
+				    RR4, RR5, v8, RH1, RTMP4, RTMP5)
+
+	eor		v0.16b, v0.16b, v3.16b
+	eor		v1.16b, v1.16b, v4.16b
+	eor		v2.16b, v2.16b, v5.16b
+
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP0, RTMP1)
+
+	st1		{v0.16b-v2.16b}, [x1], #(3 * 16)
+
+	cbz		w4, .Lgcm_dec_hash_len
+	b		.Lgcm_dec_loop_3x
+
+.Lgcm_dec_loop_1x:
+	cmp		w4, #16
+	blt		.Lgcm_dec_tail
+
+	sub		w4, w4, #16
+
+	ld1		{v3.16b}, [x2], #16
+
+	/* construct CTRs */
+	inc32_le128(v0)
+	rbit		v6.16b, v3.16b
+
+	eor		RHASH.16b, RHASH.16b, v6.16b
+
+	SM4_CRYPT_PMUL_128x128_BLK(v0, RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+
+	eor		v0.16b, v0.16b, v3.16b
+
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+	st1		{v0.16b}, [x1], #16
+
+	cbz		w4, .Lgcm_dec_hash_len
+	b		.Lgcm_dec_loop_1x
+
+.Lgcm_dec_tail:
+	/* construct CTRs */
+	inc32_le128(v0)
+	SM4_CRYPT_BLK(v0)
+
+	/* load permute table */
+	adr_l		x0, .Lcts_permute_table
+	add		x0, x0, #32
+	sub		x0, x0, w4, uxtw
+	ld1		{v3.16b}, [x0]
+
+.Lgcm_dec_tail_loop:
+	/* do decrypt */
+	ldrb		w0, [x2], #1	/* get 1 byte from input */
+	umov		w6, v0.b[0]	/* get top crypted byte */
+	eor		w6, w6, w0	/* w6 = CTR ^ input */
+	strb		w6, [x1], #1	/* store out byte */
+
+	/* shift right out one byte */
+	ext		v0.16b, v0.16b, v0.16b, #1
+	/* the last ciphertext is placed in high bytes */
+	ins		v0.b[15], w0
+
+	subs		w4, w4, #1
+	bne		.Lgcm_dec_tail_loop
+
+	/* padding last block with zeros */
+	tbl		v0.16b, {v0.16b}, v3.16b
+
+	/* ghash update */
+	rbit		v0.16b, v0.16b
+	eor		RHASH.16b, RHASH.16b, v0.16b
+	PMUL_128x128(RR0, RR1, RHASH, RH1, RTMP0, RTMP1)
+	REDUCTION(RHASH, RR0, RR1, RRCONST, RTMP2, RTMP3)
+
+.Lgcm_dec_hash_len:
+	cbz		x7, .Lgcm_dec_end
+
+	GTAG_HASH_LENGTHS(v1, v3)
+
+	b		.Lgcm_dec_ret
+
+.Lgcm_dec_end:
+	/* store new CTR */
+	rev		x8, x8
+	rev		x9, x9
+	stp		x8, x9, [x3]
+
+	rbit		RHASH.16b, RHASH.16b
+
+.Lgcm_dec_ret:
+	/* store new MAC */
+	st1		{RHASH.2d}, [x5]
+
+	ret
+SYM_FUNC_END(sm4_ce_pmull_gcm_dec)
+
+	.section	".rodata", "a"
+	.align 4
+.Lcts_permute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+
+.Lghash_rconst:
+	.quad		0x87
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
new file mode 100644
index 0000000000000..e90ea0f17beb2
--- /dev/null
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -0,0 +1,286 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * SM4-GCM AEAD Algorithm using ARMv8 Crypto Extensions
+ * as specified in rfc8998
+ * https://datatracker.ietf.org/doc/html/rfc8998
+ *
+ * Copyright (C) 2022 Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+ */
+
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/kernel.h>
+#include <linux/cpufeature.h>
+#include <asm/neon.h>
+#include <crypto/b128ops.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/skcipher.h>
+#include <crypto/sm4.h>
+#include "sm4-ce.h"
+
+asmlinkage void sm4_ce_pmull_ghash_setup(const u32 *rkey_enc, u8 *ghash_table);
+asmlinkage void pmull_ghash_update(const u8 *ghash_table, u8 *ghash,
+				   const u8 *src, unsigned int nblocks);
+asmlinkage void sm4_ce_pmull_gcm_enc(const u32 *rkey_enc, u8 *dst,
+				     const u8 *src, u8 *iv,
+				     unsigned int nbytes, u8 *ghash,
+				     const u8 *ghash_table, const u8 *lengths);
+asmlinkage void sm4_ce_pmull_gcm_dec(const u32 *rkey_enc, u8 *dst,
+				     const u8 *src, u8 *iv,
+				     unsigned int nbytes, u8 *ghash,
+				     const u8 *ghash_table, const u8 *lengths);
+
+#define GHASH_BLOCK_SIZE	16
+#define GCM_IV_SIZE		12
+
+struct sm4_gcm_ctx {
+	struct sm4_ctx key;
+	u8 ghash_table[16 * 4];
+};
+
+
+static int gcm_setkey(struct crypto_aead *tfm, const u8 *key,
+		      unsigned int key_len)
+{
+	struct sm4_gcm_ctx *ctx = crypto_aead_ctx(tfm);
+
+	if (key_len != SM4_KEY_SIZE)
+		return -EINVAL;
+
+	kernel_neon_begin();
+
+	sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
+			  crypto_sm4_fk, crypto_sm4_ck);
+	sm4_ce_pmull_ghash_setup(ctx->key.rkey_enc, ctx->ghash_table);
+
+	kernel_neon_end();
+	return 0;
+}
+
+static int gcm_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12 ... 16:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void gcm_calculate_auth_mac(struct aead_request *req, u8 ghash[])
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	u8 __aligned(8) buffer[GHASH_BLOCK_SIZE];
+	u32 assoclen = req->assoclen;
+	struct scatter_walk walk;
+	unsigned int buflen = 0;
+
+	scatterwalk_start(&walk, req->src);
+
+	do {
+		u32 n = scatterwalk_clamp(&walk, assoclen);
+		u8 *p, *ptr;
+
+		if (!n) {
+			scatterwalk_start(&walk, sg_next(walk.sg));
+			n = scatterwalk_clamp(&walk, assoclen);
+		}
+
+		p = ptr = scatterwalk_map(&walk);
+		assoclen -= n;
+		scatterwalk_advance(&walk, n);
+
+		if (n + buflen < GHASH_BLOCK_SIZE) {
+			memcpy(&buffer[buflen], ptr, n);
+			buflen += n;
+		} else {
+			unsigned int nblocks;
+
+			if (buflen) {
+				unsigned int l = GHASH_BLOCK_SIZE - buflen;
+
+				memcpy(&buffer[buflen], ptr, l);
+				ptr += l;
+				n -= l;
+
+				pmull_ghash_update(ctx->ghash_table, ghash,
+						   buffer, 1);
+			}
+
+			nblocks = n / GHASH_BLOCK_SIZE;
+			if (nblocks) {
+				pmull_ghash_update(ctx->ghash_table, ghash,
+						   ptr, nblocks);
+				ptr += nblocks * GHASH_BLOCK_SIZE;
+			}
+
+			buflen = n % GHASH_BLOCK_SIZE;
+			if (buflen)
+				memcpy(&buffer[0], ptr, buflen);
+		}
+
+		scatterwalk_unmap(p);
+		scatterwalk_done(&walk, 0, assoclen);
+	} while (assoclen);
+
+	/* padding with '0' */
+	if (buflen) {
+		memset(&buffer[buflen], 0, GHASH_BLOCK_SIZE - buflen);
+		pmull_ghash_update(ctx->ghash_table, ghash, buffer, 1);
+	}
+}
+
+static int gcm_crypt(struct aead_request *req, struct skcipher_walk *walk,
+		     struct sm4_gcm_ctx *ctx, u8 ghash[],
+		     void (*sm4_ce_pmull_gcm_crypt)(const u32 *rkey_enc,
+				u8 *dst, const u8 *src, u8 *iv,
+				unsigned int nbytes, u8 *ghash,
+				const u8 *ghash_table, const u8 *lengths))
+{
+	u8 __aligned(8) iv[SM4_BLOCK_SIZE];
+	be128 __aligned(8) lengths;
+	int err;
+
+	memset(ghash, 0, SM4_BLOCK_SIZE);
+
+	lengths.a = cpu_to_be64(req->assoclen * 8);
+	lengths.b = cpu_to_be64(walk->total * 8);
+
+	memcpy(iv, walk->iv, GCM_IV_SIZE);
+	put_unaligned_be32(2, iv + GCM_IV_SIZE);
+
+	kernel_neon_begin();
+
+	if (req->assoclen)
+		gcm_calculate_auth_mac(req, ghash);
+
+	do {
+		unsigned int tail = walk->nbytes % SM4_BLOCK_SIZE;
+		const u8 *src = walk->src.virt.addr;
+		u8 *dst = walk->dst.virt.addr;
+
+		if (walk->nbytes == walk->total) {
+			tail = 0;
+
+			sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
+					       walk->nbytes, ghash,
+					       ctx->ghash_table,
+					       (const u8 *)&lengths);
+		} else if (walk->nbytes - tail) {
+			sm4_ce_pmull_gcm_crypt(ctx->key.rkey_enc, dst, src, iv,
+					       walk->nbytes - tail, ghash,
+					       ctx->ghash_table, NULL);
+		}
+
+		kernel_neon_end();
+
+		err = skcipher_walk_done(walk, tail);
+		if (err)
+			return err;
+		if (walk->nbytes)
+			kernel_neon_begin();
+	} while (walk->nbytes > 0);
+
+	return 0;
+}
+
+static int gcm_encrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_aead_encrypt(&walk, req, false);
+	if (err)
+		return err;
+
+	err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_enc);
+	if (err)
+		return err;
+
+	/* copy authtag to end of dst */
+	scatterwalk_map_and_copy(ghash, req->dst, req->assoclen + req->cryptlen,
+				 crypto_aead_authsize(aead), 1);
+
+	return 0;
+}
+
+static int gcm_decrypt(struct aead_request *req)
+{
+	struct crypto_aead *aead = crypto_aead_reqtfm(req);
+	unsigned int authsize = crypto_aead_authsize(aead);
+	struct sm4_gcm_ctx *ctx = crypto_aead_ctx(aead);
+	u8 __aligned(8) ghash[SM4_BLOCK_SIZE];
+	u8 authtag[SM4_BLOCK_SIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_aead_decrypt(&walk, req, false);
+	if (err)
+		return err;
+
+	err = gcm_crypt(req, &walk, ctx, ghash, sm4_ce_pmull_gcm_dec);
+	if (err)
+		return err;
+
+	/* compare calculated auth tag with the stored one */
+	scatterwalk_map_and_copy(authtag, req->src,
+				 req->assoclen + req->cryptlen - authsize,
+				 authsize, 0);
+
+	if (crypto_memneq(authtag, ghash, authsize))
+		return -EBADMSG;
+
+	return 0;
+}
+
+static struct aead_alg sm4_gcm_alg = {
+	.base = {
+		.cra_name		= "gcm(sm4)",
+		.cra_driver_name	= "gcm-sm4-ce",
+		.cra_priority		= 400,
+		.cra_blocksize		= 1,
+		.cra_ctxsize		= sizeof(struct sm4_gcm_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.ivsize		= GCM_IV_SIZE,
+	.chunksize	= SM4_BLOCK_SIZE,
+	.maxauthsize	= SM4_BLOCK_SIZE,
+	.setkey		= gcm_setkey,
+	.setauthsize	= gcm_setauthsize,
+	.encrypt	= gcm_encrypt,
+	.decrypt	= gcm_decrypt,
+};
+
+static int __init sm4_ce_gcm_init(void)
+{
+	if (!cpu_have_named_feature(PMULL))
+		return -ENODEV;
+
+	return crypto_register_aead(&sm4_gcm_alg);
+}
+
+static void __exit sm4_ce_gcm_exit(void)
+{
+	crypto_unregister_aead(&sm4_gcm_alg);
+}
+
+static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = {
+	{ cpu_feature(PMULL) },
+	{}
+};
+MODULE_DEVICE_TABLE(cpu, sm4_ce_gcm_cpu_feature);
+
+module_cpu_feature_match(SM4, sm4_ce_gcm_init);
+module_exit(sm4_ce_gcm_exit);
+
+MODULE_DESCRIPTION("Synchronous SM4 in GCM mode using ARMv8 Crypto Extensions");
+MODULE_ALIAS_CRYPTO("gcm(sm4)");
+MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From 329cfa42e5280decfc9247598b9996e13b28c380 Mon Sep 17 00:00:00 2001
From: Ralph Siemsen <ralph.siemsen@linaro.org>
Date: Thu, 27 Oct 2022 15:35:44 -0400
Subject: [PATCH 0911/4122] crypto: doc - use correct function name

The hashing API does not have a function called .finish()

Signed-off-by: Ralph Siemsen <ralph.siemsen@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 Documentation/crypto/devel-algos.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/crypto/devel-algos.rst b/Documentation/crypto/devel-algos.rst
index f225a953ab4b9..3506899ef83e3 100644
--- a/Documentation/crypto/devel-algos.rst
+++ b/Documentation/crypto/devel-algos.rst
@@ -172,7 +172,7 @@ Here are schematics of how these functions are called when operated from
 other part of the kernel. Note that the .setkey() call might happen
 before or after any of these schematics happen, but must not happen
 during any of these are in-flight. Please note that calling .init()
-followed immediately by .finish() is also a perfectly valid
+followed immediately by .final() is also a perfectly valid
 transformation.
 
 ::
-- 
GitLab


From 80629af0f3e438b94532cf88f3fb53c711f95138 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Thu, 20 Oct 2022 10:21:03 +0200
Subject: [PATCH 0912/4122] MAINTAINERS: remove section INTEL IOP-ADMA DMA
 DRIVER

Commit cd0ab43ec91a ("dmaengine: remove iop-adma driver") removes the
driver's source code, but misses to remove the MAINTAINERS section.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a
broken file pattern.

Remove the INTEL IOP-ADMA DMA DRIVER section pointing to the removed
driver.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Link: https://lore.kernel.org/r/20221020082103.29218-1-lukas.bulwahn@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 MAINTAINERS | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..cd1264d24db85 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10332,11 +10332,6 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
 F:	drivers/iommu/intel/
 F:	include/linux/intel-svm.h
 
-INTEL IOP-ADMA DMA DRIVER
-R:	Dan Williams <dan.j.williams@intel.com>
-S:	Odd fixes
-F:	drivers/dma/iop-adma.c
-
 INTEL IPU3 CSI-2 CIO2 DRIVER
 M:	Yong Zhi <yong.zhi@intel.com>
 M:	Sakari Ailus <sakari.ailus@linux.intel.com>
-- 
GitLab


From cccc46ae362398e43c6b8be38fdb7e39def9e21b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 21 Oct 2022 22:27:47 +0200
Subject: [PATCH 0913/4122] dmaengine: remove s3c24xx driver

The s3c24xx platform was removed and this driver is no longer
needed.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221021203329.4143397-14-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig                       |   12 -
 drivers/dma/Makefile                      |    1 -
 drivers/dma/s3c24xx-dma.c                 | 1428 ---------------------
 include/linux/platform_data/dma-s3c24xx.h |   48 -
 4 files changed, 1489 deletions(-)
 delete mode 100644 drivers/dma/s3c24xx-dma.c
 delete mode 100644 include/linux/platform_data/dma-s3c24xx.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b73fc89ba8776..ea81d825575fb 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -609,18 +609,6 @@ config SPRD_DMA
 	help
 	  Enable support for the on-chip DMA controller on Spreadtrum platform.
 
-config S3C24XX_DMAC
-	bool "Samsung S3C24XX DMA support"
-	depends on ARCH_S3C24XX || COMPILE_TEST
-	select DMA_ENGINE
-	select DMA_VIRTUAL_CHANNELS
-	help
-	  Support for the Samsung S3C24XX DMA controller driver. The
-	  DMA controller is having multiple DMA channels which can be
-	  configured for different peripherals like audio, UART, SPI.
-	  The DMA controller can transfer data from memory to peripheral,
-	  periphal to memory, periphal to periphal and memory to memory.
-
 config TXX9_DMAC
 	tristate "Toshiba TXx9 SoC DMA support"
 	depends on MACH_TX49XX
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 5b55ada052a7f..a4fd1ce295102 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -70,7 +70,6 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o
 obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o
 obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o
 obj-$(CONFIG_SPRD_DMA) += sprd-dma.o
-obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
deleted file mode 100644
index a09eeb545f7d5..0000000000000
--- a/drivers/dma/s3c24xx-dma.c
+++ /dev/null
@@ -1,1428 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * S3C24XX DMA handling
- *
- * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
- *
- * based on amba-pl08x.c
- *
- * Copyright (c) 2006 ARM Ltd.
- * Copyright (c) 2010 ST-Ericsson SA
- *
- * Author: Peter Pearse <peter.pearse@arm.com>
- * Author: Linus Walleij <linus.walleij@stericsson.com>
- *
- * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
- * that can be routed to any of the 4 to 8 hardware-channels.
- *
- * Therefore on these DMA controllers the number of channels
- * and the number of incoming DMA signals are two totally different things.
- * It is usually not possible to theoretically handle all physical signals,
- * so a multiplexing scheme with possible denial of use is necessary.
- *
- * Open items:
- * - bursts
- */
-
-#include <linux/platform_device.h>
-#include <linux/types.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/slab.h>
-#include <linux/platform_data/dma-s3c24xx.h>
-
-#include "dmaengine.h"
-#include "virt-dma.h"
-
-#define MAX_DMA_CHANNELS	8
-
-#define S3C24XX_DISRC			0x00
-#define S3C24XX_DISRCC			0x04
-#define S3C24XX_DISRCC_INC_INCREMENT	0
-#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
-#define S3C24XX_DISRCC_LOC_AHB		0
-#define S3C24XX_DISRCC_LOC_APB		BIT(1)
-
-#define S3C24XX_DIDST			0x08
-#define S3C24XX_DIDSTC			0x0c
-#define S3C24XX_DIDSTC_INC_INCREMENT	0
-#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
-#define S3C24XX_DIDSTC_LOC_AHB		0
-#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
-#define S3C24XX_DIDSTC_INT_TC0		0
-#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
-
-#define S3C24XX_DCON			0x10
-
-#define S3C24XX_DCON_TC_MASK		0xfffff
-#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
-#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
-#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
-#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
-#define S3C24XX_DCON_DSZ_SHIFT		20
-#define S3C24XX_DCON_AUTORELOAD		0
-#define S3C24XX_DCON_NORELOAD		BIT(22)
-#define S3C24XX_DCON_HWTRIG		BIT(23)
-#define S3C24XX_DCON_HWSRC_SHIFT	24
-#define S3C24XX_DCON_SERV_SINGLE	0
-#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
-#define S3C24XX_DCON_TSZ_UNIT		0
-#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
-#define S3C24XX_DCON_INT		BIT(29)
-#define S3C24XX_DCON_SYNC_PCLK		0
-#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
-#define S3C24XX_DCON_DEMAND		0
-#define S3C24XX_DCON_HANDSHAKE		BIT(31)
-
-#define S3C24XX_DSTAT			0x14
-#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
-#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
-
-#define S3C24XX_DMASKTRIG		0x20
-#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
-#define S3C24XX_DMASKTRIG_ON		BIT(1)
-#define S3C24XX_DMASKTRIG_STOP		BIT(2)
-
-#define S3C24XX_DMAREQSEL		0x24
-#define S3C24XX_DMAREQSEL_HW		BIT(0)
-
-/*
- * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
- * for a DMA source. Instead only specific channels are valid.
- * All of these SoCs have 4 physical channels and the number of request
- * source bits is 3. Additionally we also need 1 bit to mark the channel
- * as valid.
- * Therefore we separate the chansel element of the channel data into 4
- * parts of 4 bits each, to hold the information if the channel is valid
- * and the hw request source to use.
- *
- * Example:
- * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
- * For it the chansel field would look like
- *
- * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
- * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
- * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
- */
-#define S3C24XX_CHANSEL_WIDTH		4
-#define S3C24XX_CHANSEL_VALID		BIT(3)
-#define S3C24XX_CHANSEL_REQ_MASK	7
-
-/*
- * struct soc_data - vendor-specific config parameters for individual SoCs
- * @stride: spacing between the registers of each channel
- * @has_reqsel: does the controller use the newer requestselection mechanism
- * @has_clocks: are controllable dma-clocks present
- */
-struct soc_data {
-	int stride;
-	bool has_reqsel;
-	bool has_clocks;
-};
-
-/*
- * enum s3c24xx_dma_chan_state - holds the virtual channel states
- * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
- * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
- * channel and is running a transfer on it
- * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
- * channel to become available (only pertains to memcpy channels)
- */
-enum s3c24xx_dma_chan_state {
-	S3C24XX_DMA_CHAN_IDLE,
-	S3C24XX_DMA_CHAN_RUNNING,
-	S3C24XX_DMA_CHAN_WAITING,
-};
-
-/*
- * struct s3c24xx_sg - structure containing data per sg
- * @src_addr: src address of sg
- * @dst_addr: dst address of sg
- * @len: transfer len in bytes
- * @node: node for txd's dsg_list
- */
-struct s3c24xx_sg {
-	dma_addr_t src_addr;
-	dma_addr_t dst_addr;
-	size_t len;
-	struct list_head node;
-};
-
-/*
- * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
- * @vd: virtual DMA descriptor
- * @dsg_list: list of children sg's
- * @at: sg currently being transfered
- * @width: transfer width
- * @disrcc: value for source control register
- * @didstc: value for destination control register
- * @dcon: base value for dcon register
- * @cyclic: indicate cyclic transfer
- */
-struct s3c24xx_txd {
-	struct virt_dma_desc vd;
-	struct list_head dsg_list;
-	struct list_head *at;
-	u8 width;
-	u32 disrcc;
-	u32 didstc;
-	u32 dcon;
-	bool cyclic;
-};
-
-struct s3c24xx_dma_chan;
-
-/*
- * struct s3c24xx_dma_phy - holder for the physical channels
- * @id: physical index to this channel
- * @valid: does the channel have all required elements
- * @base: virtual memory base (remapped) for the this channel
- * @irq: interrupt for this channel
- * @clk: clock for this channel
- * @lock: a lock to use when altering an instance of this struct
- * @serving: virtual channel currently being served by this physicalchannel
- * @host: a pointer to the host (internal use)
- */
-struct s3c24xx_dma_phy {
-	unsigned int			id;
-	bool				valid;
-	void __iomem			*base;
-	int				irq;
-	struct clk			*clk;
-	spinlock_t			lock;
-	struct s3c24xx_dma_chan		*serving;
-	struct s3c24xx_dma_engine	*host;
-};
-
-/*
- * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
- * @id: the id of the channel
- * @name: name of the channel
- * @vc: wrapped virtual channel
- * @phy: the physical channel utilized by this channel, if there is one
- * @runtime_addr: address for RX/TX according to the runtime config
- * @at: active transaction on this channel
- * @lock: a lock for this channel data
- * @host: a pointer to the host (internal use)
- * @state: whether the channel is idle, running etc
- * @slave: whether this channel is a device (slave) or for memcpy
- */
-struct s3c24xx_dma_chan {
-	int id;
-	const char *name;
-	struct virt_dma_chan vc;
-	struct s3c24xx_dma_phy *phy;
-	struct dma_slave_config cfg;
-	struct s3c24xx_txd *at;
-	struct s3c24xx_dma_engine *host;
-	enum s3c24xx_dma_chan_state state;
-	bool slave;
-};
-
-/*
- * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
- * @pdev: the corresponding platform device
- * @pdata: platform data passed in from the platform/machine
- * @base: virtual memory base (remapped)
- * @slave: slave engine for this instance
- * @memcpy: memcpy engine for this instance
- * @phy_chans: array of data for the physical channels
- */
-struct s3c24xx_dma_engine {
-	struct platform_device			*pdev;
-	const struct s3c24xx_dma_platdata	*pdata;
-	struct soc_data				*sdata;
-	void __iomem				*base;
-	struct dma_device			slave;
-	struct dma_device			memcpy;
-	struct s3c24xx_dma_phy			*phy_chans;
-};
-
-/*
- * Physical channel handling
- */
-
-/*
- * Check whether a certain channel is busy or not.
- */
-static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
-{
-	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
-	return val & S3C24XX_DSTAT_STAT_BUSY;
-}
-
-static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
-				  struct s3c24xx_dma_phy *phy)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
-	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
-	int phyvalid;
-
-	/* every phy is valid for memcopy channels */
-	if (!s3cchan->slave)
-		return true;
-
-	/* On newer variants all phys can be used for all virtual channels */
-	if (s3cdma->sdata->has_reqsel)
-		return true;
-
-	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
-	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
-}
-
-/*
- * Allocate a physical channel for a virtual channel
- *
- * Try to locate a physical channel to be used for this transfer. If all
- * are taken return NULL and the requester will have to cope by using
- * some fallback PIO mode or retrying later.
- */
-static
-struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	struct s3c24xx_dma_phy *phy = NULL;
-	unsigned long flags;
-	int i;
-	int ret;
-
-	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
-		phy = &s3cdma->phy_chans[i];
-
-		if (!phy->valid)
-			continue;
-
-		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
-			continue;
-
-		spin_lock_irqsave(&phy->lock, flags);
-
-		if (!phy->serving) {
-			phy->serving = s3cchan;
-			spin_unlock_irqrestore(&phy->lock, flags);
-			break;
-		}
-
-		spin_unlock_irqrestore(&phy->lock, flags);
-	}
-
-	/* No physical channel available, cope with it */
-	if (i == s3cdma->pdata->num_phy_channels) {
-		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
-		return NULL;
-	}
-
-	/* start the phy clock */
-	if (s3cdma->sdata->has_clocks) {
-		ret = clk_enable(phy->clk);
-		if (ret) {
-			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
-				phy->id, ret);
-			phy->serving = NULL;
-			return NULL;
-		}
-	}
-
-	return phy;
-}
-
-/*
- * Mark the physical channel as free.
- *
- * This drops the link between the physical and virtual channel.
- */
-static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
-{
-	struct s3c24xx_dma_engine *s3cdma = phy->host;
-
-	if (s3cdma->sdata->has_clocks)
-		clk_disable(phy->clk);
-
-	phy->serving = NULL;
-}
-
-/*
- * Stops the channel by writing the stop bit.
- * This should not be used for an on-going transfer, but as a method of
- * shutting down a channel (eg, when it's no longer used) or terminating a
- * transfer.
- */
-static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
-{
-	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
-}
-
-/*
- * Virtual channel handling
- */
-
-static inline
-struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
-{
-	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
-}
-
-static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_phy *phy = s3cchan->phy;
-	struct s3c24xx_txd *txd = s3cchan->at;
-	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
-
-	return tc * txd->width;
-}
-
-static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan,
-				  struct dma_slave_config *config)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	unsigned long flags;
-	int ret = 0;
-
-	/* Reject definitely invalid configurations */
-	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
-	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
-		return -EINVAL;
-
-	spin_lock_irqsave(&s3cchan->vc.lock, flags);
-
-	if (!s3cchan->slave) {
-		ret = -EINVAL;
-		goto out;
-	}
-
-	s3cchan->cfg = *config;
-
-out:
-	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-	return ret;
-}
-
-/*
- * Transfer handling
- */
-
-static inline
-struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
-{
-	return container_of(tx, struct s3c24xx_txd, vd.tx);
-}
-
-static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
-{
-	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
-
-	if (txd) {
-		INIT_LIST_HEAD(&txd->dsg_list);
-		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
-	}
-
-	return txd;
-}
-
-static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
-{
-	struct s3c24xx_sg *dsg, *_dsg;
-
-	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
-		list_del(&dsg->node);
-		kfree(dsg);
-	}
-
-	kfree(txd);
-}
-
-static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
-				       struct s3c24xx_txd *txd)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	struct s3c24xx_dma_phy *phy = s3cchan->phy;
-	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
-	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
-	u32 dcon = txd->dcon;
-	u32 val;
-
-	/* transfer-size and -count from len and width */
-	switch (txd->width) {
-	case 1:
-		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
-		break;
-	case 2:
-		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
-		break;
-	case 4:
-		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
-		break;
-	}
-
-	if (s3cchan->slave) {
-		struct s3c24xx_dma_channel *cdata =
-					&pdata->channels[s3cchan->id];
-
-		if (s3cdma->sdata->has_reqsel) {
-			writel_relaxed((cdata->chansel << 1) |
-							S3C24XX_DMAREQSEL_HW,
-					phy->base + S3C24XX_DMAREQSEL);
-		} else {
-			int csel = cdata->chansel >> (phy->id *
-							S3C24XX_CHANSEL_WIDTH);
-
-			csel &= S3C24XX_CHANSEL_REQ_MASK;
-			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
-			dcon |= S3C24XX_DCON_HWTRIG;
-		}
-	} else {
-		if (s3cdma->sdata->has_reqsel)
-			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
-	}
-
-	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
-	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
-	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
-	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
-	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
-
-	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
-	val &= ~S3C24XX_DMASKTRIG_STOP;
-	val |= S3C24XX_DMASKTRIG_ON;
-
-	/* trigger the dma operation for memcpy transfers */
-	if (!s3cchan->slave)
-		val |= S3C24XX_DMASKTRIG_SWTRIG;
-
-	writel(val, phy->base + S3C24XX_DMASKTRIG);
-}
-
-/*
- * Set the initial DMA register values and start first sg.
- */
-static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_phy *phy = s3cchan->phy;
-	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
-	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
-
-	list_del(&txd->vd.node);
-
-	s3cchan->at = txd;
-
-	/* Wait for channel inactive */
-	while (s3c24xx_dma_phy_busy(phy))
-		cpu_relax();
-
-	/* point to the first element of the sg list */
-	txd->at = txd->dsg_list.next;
-	s3c24xx_dma_start_next_sg(s3cchan, txd);
-}
-
-/*
- * Try to allocate a physical channel.  When successful, assign it to
- * this virtual channel, and initiate the next descriptor.  The
- * virtual channel lock must be held at this point.
- */
-static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	struct s3c24xx_dma_phy *phy;
-
-	phy = s3c24xx_dma_get_phy(s3cchan);
-	if (!phy) {
-		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
-			s3cchan->name);
-		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
-		return;
-	}
-
-	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
-		phy->id, s3cchan->name);
-
-	s3cchan->phy = phy;
-	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
-
-	s3c24xx_dma_start_next_txd(s3cchan);
-}
-
-static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
-	struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-
-	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
-		phy->id, s3cchan->name);
-
-	/*
-	 * We do this without taking the lock; we're really only concerned
-	 * about whether this pointer is NULL or not, and we're guaranteed
-	 * that this will only be called when it _already_ is non-NULL.
-	 */
-	phy->serving = s3cchan;
-	s3cchan->phy = phy;
-	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
-	s3c24xx_dma_start_next_txd(s3cchan);
-}
-
-/*
- * Free a physical DMA channel, potentially reallocating it to another
- * virtual channel if we have any pending.
- */
-static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
-{
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	struct s3c24xx_dma_chan *p, *next;
-
-retry:
-	next = NULL;
-
-	/* Find a waiting virtual channel for the next transfer. */
-	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
-		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
-			next = p;
-			break;
-		}
-
-	if (!next) {
-		list_for_each_entry(p, &s3cdma->slave.channels,
-				    vc.chan.device_node)
-			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
-				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
-				next = p;
-				break;
-			}
-	}
-
-	/* Ensure that the physical channel is stopped */
-	s3c24xx_dma_terminate_phy(s3cchan->phy);
-
-	if (next) {
-		bool success;
-
-		/*
-		 * Eww.  We know this isn't going to deadlock
-		 * but lockdep probably doesn't.
-		 */
-		spin_lock(&next->vc.lock);
-		/* Re-check the state now that we have the lock */
-		success = next->state == S3C24XX_DMA_CHAN_WAITING;
-		if (success)
-			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
-		spin_unlock(&next->vc.lock);
-
-		/* If the state changed, try to find another channel */
-		if (!success)
-			goto retry;
-	} else {
-		/* No more jobs, so free up the physical channel */
-		s3c24xx_dma_put_phy(s3cchan->phy);
-	}
-
-	s3cchan->phy = NULL;
-	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
-}
-
-static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
-{
-	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
-
-	if (!s3cchan->slave)
-		dma_descriptor_unmap(&vd->tx);
-
-	s3c24xx_dma_free_txd(txd);
-}
-
-static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
-{
-	struct s3c24xx_dma_phy *phy = data;
-	struct s3c24xx_dma_chan *s3cchan = phy->serving;
-	struct s3c24xx_txd *txd;
-
-	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
-
-	/*
-	 * Interrupts happen to notify the completion of a transfer and the
-	 * channel should have moved into its stop state already on its own.
-	 * Therefore interrupts on channels not bound to a virtual channel
-	 * should never happen. Nevertheless send a terminate command to the
-	 * channel if the unlikely case happens.
-	 */
-	if (unlikely(!s3cchan)) {
-		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
-			phy->id);
-
-		s3c24xx_dma_terminate_phy(phy);
-
-		return IRQ_HANDLED;
-	}
-
-	spin_lock(&s3cchan->vc.lock);
-	txd = s3cchan->at;
-	if (txd) {
-		/* when more sg's are in this txd, start the next one */
-		if (!list_is_last(txd->at, &txd->dsg_list)) {
-			txd->at = txd->at->next;
-			if (txd->cyclic)
-				vchan_cyclic_callback(&txd->vd);
-			s3c24xx_dma_start_next_sg(s3cchan, txd);
-		} else if (!txd->cyclic) {
-			s3cchan->at = NULL;
-			vchan_cookie_complete(&txd->vd);
-
-			/*
-			 * And start the next descriptor (if any),
-			 * otherwise free this channel.
-			 */
-			if (vchan_next_desc(&s3cchan->vc))
-				s3c24xx_dma_start_next_txd(s3cchan);
-			else
-				s3c24xx_dma_phy_free(s3cchan);
-		} else {
-			vchan_cyclic_callback(&txd->vd);
-
-			/* Cyclic: reset at beginning */
-			txd->at = txd->dsg_list.next;
-			s3c24xx_dma_start_next_sg(s3cchan, txd);
-		}
-	}
-	spin_unlock(&s3cchan->vc.lock);
-
-	return IRQ_HANDLED;
-}
-
-/*
- * The DMA ENGINE API
- */
-
-static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	LIST_HEAD(head);
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&s3cchan->vc.lock, flags);
-
-	if (!s3cchan->phy && !s3cchan->at) {
-		dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
-			s3cchan->id);
-		ret = -EINVAL;
-		goto unlock;
-	}
-
-	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
-
-	/* Mark physical channel as free */
-	if (s3cchan->phy)
-		s3c24xx_dma_phy_free(s3cchan);
-
-	/* Dequeue current job */
-	if (s3cchan->at) {
-		vchan_terminate_vdesc(&s3cchan->at->vd);
-		s3cchan->at = NULL;
-	}
-
-	/* Dequeue jobs not yet fired as well */
-
-	vchan_get_all_descriptors(&s3cchan->vc, &head);
-
-	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-
-	vchan_dma_desc_free_list(&s3cchan->vc, &head);
-
-	return 0;
-
-unlock:
-	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-
-	return ret;
-}
-
-static void s3c24xx_dma_synchronize(struct dma_chan *chan)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-
-	vchan_synchronize(&s3cchan->vc);
-}
-
-static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
-{
-	/* Ensure all queued descriptors are freed */
-	vchan_free_chan_resources(to_virt_chan(chan));
-}
-
-static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
-		dma_cookie_t cookie, struct dma_tx_state *txstate)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	struct s3c24xx_txd *txd;
-	struct s3c24xx_sg *dsg;
-	struct virt_dma_desc *vd;
-	unsigned long flags;
-	enum dma_status ret;
-	size_t bytes = 0;
-
-	spin_lock_irqsave(&s3cchan->vc.lock, flags);
-	ret = dma_cookie_status(chan, cookie, txstate);
-
-	/*
-	 * There's no point calculating the residue if there's
-	 * no txstate to store the value.
-	 */
-	if (ret == DMA_COMPLETE || !txstate) {
-		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-		return ret;
-	}
-
-	vd = vchan_find_desc(&s3cchan->vc, cookie);
-	if (vd) {
-		/* On the issued list, so hasn't been processed yet */
-		txd = to_s3c24xx_txd(&vd->tx);
-
-		list_for_each_entry(dsg, &txd->dsg_list, node)
-			bytes += dsg->len;
-	} else {
-		/*
-		 * Currently running, so sum over the pending sg's and
-		 * the currently active one.
-		 */
-		txd = s3cchan->at;
-
-		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
-		list_for_each_entry_from(dsg, &txd->dsg_list, node)
-			bytes += dsg->len;
-
-		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
-	}
-	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-
-	/*
-	 * This cookie not complete yet
-	 * Get number of bytes left in the active transactions and queue
-	 */
-	dma_set_residue(txstate, bytes);
-
-	/* Whether waiting or running, we're in progress */
-	return ret;
-}
-
-/*
- * Initialize a descriptor to be used by memcpy submit
- */
-static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
-		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-		size_t len, unsigned long flags)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	struct s3c24xx_txd *txd;
-	struct s3c24xx_sg *dsg;
-	int src_mod, dest_mod;
-
-	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n",
-			len, s3cchan->name);
-
-	if ((len & S3C24XX_DCON_TC_MASK) != len) {
-		dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len);
-		return NULL;
-	}
-
-	txd = s3c24xx_dma_get_txd();
-	if (!txd)
-		return NULL;
-
-	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
-	if (!dsg) {
-		s3c24xx_dma_free_txd(txd);
-		return NULL;
-	}
-	list_add_tail(&dsg->node, &txd->dsg_list);
-
-	dsg->src_addr = src;
-	dsg->dst_addr = dest;
-	dsg->len = len;
-
-	/*
-	 * Determine a suitable transfer width.
-	 * The DMA controller cannot fetch/store information which is not
-	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
-	 * an address divisible by 4 - more generally addr % width must be 0.
-	 */
-	src_mod = src % 4;
-	dest_mod = dest % 4;
-	switch (len % 4) {
-	case 0:
-		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
-		break;
-	case 2:
-		txd->width = ((src_mod == 2 || src_mod == 0) &&
-			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
-		break;
-	default:
-		txd->width = 1;
-		break;
-	}
-
-	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
-	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
-	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
-		     S3C24XX_DCON_SERV_WHOLE;
-
-	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
-}
-
-static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
-	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
-	enum dma_transfer_direction direction, unsigned long flags)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
-	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
-	struct s3c24xx_txd *txd;
-	struct s3c24xx_sg *dsg;
-	unsigned sg_len;
-	dma_addr_t slave_addr;
-	u32 hwcfg = 0;
-	int i;
-
-	dev_dbg(&s3cdma->pdev->dev,
-		"prepare cyclic transaction of %zu bytes with period %zu from %s\n",
-		size, period, s3cchan->name);
-
-	if (!is_slave_direction(direction)) {
-		dev_err(&s3cdma->pdev->dev,
-			"direction %d unsupported\n", direction);
-		return NULL;
-	}
-
-	txd = s3c24xx_dma_get_txd();
-	if (!txd)
-		return NULL;
-
-	txd->cyclic = 1;
-
-	if (cdata->handshake)
-		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
-
-	switch (cdata->bus) {
-	case S3C24XX_DMA_APB:
-		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
-		hwcfg |= S3C24XX_DISRCC_LOC_APB;
-		break;
-	case S3C24XX_DMA_AHB:
-		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
-		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
-		break;
-	}
-
-	/*
-	 * Always assume our peripheral desintation is a fixed
-	 * address in memory.
-	 */
-	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
-
-	/*
-	 * Individual dma operations are requested by the slave,
-	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
-	 */
-	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
-
-	if (direction == DMA_MEM_TO_DEV) {
-		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
-			      S3C24XX_DISRCC_INC_INCREMENT;
-		txd->didstc = hwcfg;
-		slave_addr = s3cchan->cfg.dst_addr;
-		txd->width = s3cchan->cfg.dst_addr_width;
-	} else {
-		txd->disrcc = hwcfg;
-		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
-			      S3C24XX_DIDSTC_INC_INCREMENT;
-		slave_addr = s3cchan->cfg.src_addr;
-		txd->width = s3cchan->cfg.src_addr_width;
-	}
-
-	sg_len = size / period;
-
-	for (i = 0; i < sg_len; i++) {
-		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
-		if (!dsg) {
-			s3c24xx_dma_free_txd(txd);
-			return NULL;
-		}
-		list_add_tail(&dsg->node, &txd->dsg_list);
-
-		dsg->len = period;
-		/* Check last period length */
-		if (i == sg_len - 1)
-			dsg->len = size - period * i;
-		if (direction == DMA_MEM_TO_DEV) {
-			dsg->src_addr = addr + period * i;
-			dsg->dst_addr = slave_addr;
-		} else { /* DMA_DEV_TO_MEM */
-			dsg->src_addr = slave_addr;
-			dsg->dst_addr = addr + period * i;
-		}
-	}
-
-	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
-}
-
-static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
-		struct dma_chan *chan, struct scatterlist *sgl,
-		unsigned int sg_len, enum dma_transfer_direction direction,
-		unsigned long flags, void *context)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
-	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
-	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
-	struct s3c24xx_txd *txd;
-	struct s3c24xx_sg *dsg;
-	struct scatterlist *sg;
-	dma_addr_t slave_addr;
-	u32 hwcfg = 0;
-	int tmp;
-
-	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
-			sg_dma_len(sgl), s3cchan->name);
-
-	txd = s3c24xx_dma_get_txd();
-	if (!txd)
-		return NULL;
-
-	if (cdata->handshake)
-		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
-
-	switch (cdata->bus) {
-	case S3C24XX_DMA_APB:
-		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
-		hwcfg |= S3C24XX_DISRCC_LOC_APB;
-		break;
-	case S3C24XX_DMA_AHB:
-		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
-		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
-		break;
-	}
-
-	/*
-	 * Always assume our peripheral desintation is a fixed
-	 * address in memory.
-	 */
-	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
-
-	/*
-	 * Individual dma operations are requested by the slave,
-	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
-	 */
-	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
-
-	if (direction == DMA_MEM_TO_DEV) {
-		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
-			      S3C24XX_DISRCC_INC_INCREMENT;
-		txd->didstc = hwcfg;
-		slave_addr = s3cchan->cfg.dst_addr;
-		txd->width = s3cchan->cfg.dst_addr_width;
-	} else if (direction == DMA_DEV_TO_MEM) {
-		txd->disrcc = hwcfg;
-		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
-			      S3C24XX_DIDSTC_INC_INCREMENT;
-		slave_addr = s3cchan->cfg.src_addr;
-		txd->width = s3cchan->cfg.src_addr_width;
-	} else {
-		s3c24xx_dma_free_txd(txd);
-		dev_err(&s3cdma->pdev->dev,
-			"direction %d unsupported\n", direction);
-		return NULL;
-	}
-
-	for_each_sg(sgl, sg, sg_len, tmp) {
-		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
-		if (!dsg) {
-			s3c24xx_dma_free_txd(txd);
-			return NULL;
-		}
-		list_add_tail(&dsg->node, &txd->dsg_list);
-
-		dsg->len = sg_dma_len(sg);
-		if (direction == DMA_MEM_TO_DEV) {
-			dsg->src_addr = sg_dma_address(sg);
-			dsg->dst_addr = slave_addr;
-		} else { /* DMA_DEV_TO_MEM */
-			dsg->src_addr = slave_addr;
-			dsg->dst_addr = sg_dma_address(sg);
-		}
-	}
-
-	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
-}
-
-/*
- * Slave transactions callback to the slave device to allow
- * synchronization of slave DMA signals with the DMAC enable
- */
-static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
-{
-	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
-	unsigned long flags;
-
-	spin_lock_irqsave(&s3cchan->vc.lock, flags);
-	if (vchan_issue_pending(&s3cchan->vc)) {
-		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
-			s3c24xx_dma_phy_alloc_and_start(s3cchan);
-	}
-	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
-}
-
-/*
- * Bringup and teardown
- */
-
-/*
- * Initialise the DMAC memcpy/slave channels.
- * Make a local wrapper to hold required data
- */
-static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
-		struct dma_device *dmadev, unsigned int channels, bool slave)
-{
-	struct s3c24xx_dma_chan *chan;
-	int i;
-
-	INIT_LIST_HEAD(&dmadev->channels);
-
-	/*
-	 * Register as many memcpy as we have physical channels,
-	 * we won't always be able to use all but the code will have
-	 * to cope with that situation.
-	 */
-	for (i = 0; i < channels; i++) {
-		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
-		if (!chan)
-			return -ENOMEM;
-
-		chan->id = i;
-		chan->host = s3cdma;
-		chan->state = S3C24XX_DMA_CHAN_IDLE;
-
-		if (slave) {
-			chan->slave = true;
-			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
-			if (!chan->name)
-				return -ENOMEM;
-		} else {
-			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
-			if (!chan->name)
-				return -ENOMEM;
-		}
-		dev_dbg(dmadev->dev,
-			 "initialize virtual channel \"%s\"\n",
-			 chan->name);
-
-		chan->vc.desc_free = s3c24xx_dma_desc_free;
-		vchan_init(&chan->vc, dmadev);
-	}
-	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
-		 i, slave ? "slave" : "memcpy");
-	return i;
-}
-
-static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
-{
-	struct s3c24xx_dma_chan *chan = NULL;
-	struct s3c24xx_dma_chan *next;
-
-	list_for_each_entry_safe(chan,
-				 next, &dmadev->channels, vc.chan.device_node) {
-		list_del(&chan->vc.chan.device_node);
-		tasklet_kill(&chan->vc.task);
-	}
-}
-
-/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */
-static struct soc_data soc_s3c2410 = {
-	.stride = 0x40,
-	.has_reqsel = false,
-	.has_clocks = false,
-};
-
-/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
-static struct soc_data soc_s3c2412 = {
-	.stride = 0x40,
-	.has_reqsel = true,
-	.has_clocks = true,
-};
-
-/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
-static struct soc_data soc_s3c2443 = {
-	.stride = 0x100,
-	.has_reqsel = true,
-	.has_clocks = true,
-};
-
-static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
-	{
-		.name		= "s3c2410-dma",
-		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
-	}, {
-		.name		= "s3c2412-dma",
-		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
-	}, {
-		.name		= "s3c2443-dma",
-		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
-	},
-	{ },
-};
-
-static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
-{
-	return (struct soc_data *)
-			 platform_get_device_id(pdev)->driver_data;
-}
-
-static int s3c24xx_dma_probe(struct platform_device *pdev)
-{
-	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
-	struct s3c24xx_dma_engine *s3cdma;
-	struct soc_data *sdata;
-	struct resource *res;
-	int ret;
-	int i;
-
-	if (!pdata) {
-		dev_err(&pdev->dev, "platform data missing\n");
-		return -ENODEV;
-	}
-
-	/* Basic sanity check */
-	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
-		dev_err(&pdev->dev, "too many dma channels %d, max %d\n",
-			pdata->num_phy_channels, MAX_DMA_CHANNELS);
-		return -EINVAL;
-	}
-
-	sdata = s3c24xx_dma_get_soc_data(pdev);
-	if (!sdata)
-		return -EINVAL;
-
-	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
-	if (!s3cdma)
-		return -ENOMEM;
-
-	s3cdma->pdev = pdev;
-	s3cdma->pdata = pdata;
-	s3cdma->sdata = sdata;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(s3cdma->base))
-		return PTR_ERR(s3cdma->base);
-
-	s3cdma->phy_chans = devm_kcalloc(&pdev->dev,
-					      pdata->num_phy_channels,
-					      sizeof(struct s3c24xx_dma_phy),
-					      GFP_KERNEL);
-	if (!s3cdma->phy_chans)
-		return -ENOMEM;
-
-	/* acquire irqs and clocks for all physical channels */
-	for (i = 0; i < pdata->num_phy_channels; i++) {
-		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
-		char clk_name[6];
-
-		phy->id = i;
-		phy->base = s3cdma->base + (i * sdata->stride);
-		phy->host = s3cdma;
-
-		phy->irq = platform_get_irq(pdev, i);
-		if (phy->irq < 0)
-			continue;
-
-		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
-				       0, pdev->name, phy);
-		if (ret) {
-			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
-				i, ret);
-			continue;
-		}
-
-		if (sdata->has_clocks) {
-			sprintf(clk_name, "dma.%d", i);
-			phy->clk = devm_clk_get(&pdev->dev, clk_name);
-			if (IS_ERR(phy->clk) && sdata->has_clocks) {
-				dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
-					i, PTR_ERR(phy->clk));
-				continue;
-			}
-
-			ret = clk_prepare(phy->clk);
-			if (ret) {
-				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
-					i, ret);
-				continue;
-			}
-		}
-
-		spin_lock_init(&phy->lock);
-		phy->valid = true;
-
-		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
-			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
-	}
-
-	/* Initialize memcpy engine */
-	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
-	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
-	s3cdma->memcpy.dev = &pdev->dev;
-	s3cdma->memcpy.device_free_chan_resources =
-					s3c24xx_dma_free_chan_resources;
-	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
-	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
-	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
-	s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config;
-	s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all;
-	s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize;
-
-	/* Initialize slave engine for SoC internal dedicated peripherals */
-	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
-	dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
-	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
-	s3cdma->slave.dev = &pdev->dev;
-	s3cdma->slave.device_free_chan_resources =
-					s3c24xx_dma_free_chan_resources;
-	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
-	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
-	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
-	s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
-	s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config;
-	s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all;
-	s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize;
-	s3cdma->slave.filter.map = pdata->slave_map;
-	s3cdma->slave.filter.mapcnt = pdata->slavecnt;
-	s3cdma->slave.filter.fn = s3c24xx_dma_filter;
-
-	/* Register as many memcpy channels as there are physical channels */
-	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
-						pdata->num_phy_channels, false);
-	if (ret <= 0) {
-		dev_warn(&pdev->dev,
-			 "%s failed to enumerate memcpy channels - %d\n",
-			 __func__, ret);
-		goto err_memcpy;
-	}
-
-	/* Register slave channels */
-	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
-				pdata->num_channels, true);
-	if (ret <= 0) {
-		dev_warn(&pdev->dev,
-			"%s failed to enumerate slave channels - %d\n",
-				__func__, ret);
-		goto err_slave;
-	}
-
-	ret = dma_async_device_register(&s3cdma->memcpy);
-	if (ret) {
-		dev_warn(&pdev->dev,
-			"%s failed to register memcpy as an async device - %d\n",
-			__func__, ret);
-		goto err_memcpy_reg;
-	}
-
-	ret = dma_async_device_register(&s3cdma->slave);
-	if (ret) {
-		dev_warn(&pdev->dev,
-			"%s failed to register slave as an async device - %d\n",
-			__func__, ret);
-		goto err_slave_reg;
-	}
-
-	platform_set_drvdata(pdev, s3cdma);
-	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
-		 pdata->num_phy_channels);
-
-	return 0;
-
-err_slave_reg:
-	dma_async_device_unregister(&s3cdma->memcpy);
-err_memcpy_reg:
-	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
-err_slave:
-	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
-err_memcpy:
-	if (sdata->has_clocks)
-		for (i = 0; i < pdata->num_phy_channels; i++) {
-			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
-			if (phy->valid)
-				clk_unprepare(phy->clk);
-		}
-
-	return ret;
-}
-
-static void s3c24xx_dma_free_irq(struct platform_device *pdev,
-				struct s3c24xx_dma_engine *s3cdma)
-{
-	int i;
-
-	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
-		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
-
-		devm_free_irq(&pdev->dev, phy->irq, phy);
-	}
-}
-
-static int s3c24xx_dma_remove(struct platform_device *pdev)
-{
-	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
-	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
-	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
-	int i;
-
-	dma_async_device_unregister(&s3cdma->slave);
-	dma_async_device_unregister(&s3cdma->memcpy);
-
-	s3c24xx_dma_free_irq(pdev, s3cdma);
-
-	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
-	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
-
-	if (sdata->has_clocks)
-		for (i = 0; i < pdata->num_phy_channels; i++) {
-			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
-			if (phy->valid)
-				clk_unprepare(phy->clk);
-		}
-
-	return 0;
-}
-
-static struct platform_driver s3c24xx_dma_driver = {
-	.driver		= {
-		.name	= "s3c24xx-dma",
-	},
-	.id_table	= s3c24xx_dma_driver_ids,
-	.probe		= s3c24xx_dma_probe,
-	.remove		= s3c24xx_dma_remove,
-};
-
-module_platform_driver(s3c24xx_dma_driver);
-
-bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
-{
-	struct s3c24xx_dma_chan *s3cchan;
-
-	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
-		return false;
-
-	s3cchan = to_s3c24xx_dma_chan(chan);
-
-	return s3cchan->id == (uintptr_t)param;
-}
-EXPORT_SYMBOL(s3c24xx_dma_filter);
-
-MODULE_DESCRIPTION("S3C24XX DMA Driver");
-MODULE_AUTHOR("Heiko Stuebner");
-MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
deleted file mode 100644
index 96d02dbeea67a..0000000000000
--- a/include/linux/platform_data/dma-s3c24xx.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * S3C24XX DMA handling
- *
- * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
- */
-
-/* Helper to encode the source selection constraints for early s3c socs. */
-#define S3C24XX_DMA_CHANREQ(src, chan)	((BIT(3) | src) << chan * 4)
-
-enum s3c24xx_dma_bus {
-	S3C24XX_DMA_APB,
-	S3C24XX_DMA_AHB,
-};
-
-/**
- * @bus: on which bus does the peripheral reside - AHB or APB.
- * @handshake: is a handshake with the peripheral necessary
- * @chansel: channel selection information, depending on variant; reqsel for
- *	     s3c2443 and later and channel-selection map for earlier SoCs
- *	     see CHANSEL doc in s3c2443-dma.c
- */
-struct s3c24xx_dma_channel {
-	enum s3c24xx_dma_bus bus;
-	bool handshake;
-	u16 chansel;
-};
-
-struct dma_slave_map;
-
-/**
- * struct s3c24xx_dma_platdata - platform specific settings
- * @num_phy_channels: number of physical channels
- * @channels: array of virtual channel descriptions
- * @num_channels: number of virtual channels
- * @slave_map: dma slave map matching table
- * @slavecnt: number of elements in slave_map
- */
-struct s3c24xx_dma_platdata {
-	int num_phy_channels;
-	struct s3c24xx_dma_channel *channels;
-	int num_channels;
-	const struct dma_slave_map *slave_map;
-	int slavecnt;
-};
-
-struct dma_chan;
-bool s3c24xx_dma_filter(struct dma_chan *chan, void *param);
-- 
GitLab


From a92b744f28e86e05514e6bbd889131f6693b6eed Mon Sep 17 00:00:00 2001
From: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Date: Tue, 25 Oct 2022 13:12:16 +0530
Subject: [PATCH 0914/4122] dmaengine: xilinx_dma: fix xilinx_dma_child_probe()
 return documentation

Modify xilinx_dma_child_probe() return documentation to be inline
with implementation i.e. can also return failure value on error.

Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Link: https://lore.kernel.org/r/1666683737-7668-2-git-send-email-radhey.shyam.pandey@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 8cd4e69dc7b4c..3b8cfeccf2da9 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -2924,7 +2924,7 @@ static int xilinx_dma_chan_probe(struct xilinx_dma_device *xdev,
  * @xdev: Driver specific device structure
  * @node: Device node
  *
- * Return: 0 always.
+ * Return: '0' on success and failure value on error.
  */
 static int xilinx_dma_child_probe(struct xilinx_dma_device *xdev,
 				    struct device_node *node)
-- 
GitLab


From 73f11324738a4c23159cf22b08225b6642232982 Mon Sep 17 00:00:00 2001
From: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Date: Tue, 25 Oct 2022 13:12:17 +0530
Subject: [PATCH 0915/4122] dmaengine: xilinx_dma : add
 xilinx_dma_device_config() return documentation

document xilinx_dma_device_config() return value. Fixes below
kernel-doc warning.

xilinx_dma.c:1665: warning: No description found for return value
of 'xilinx_dma_device_config'

Signed-off-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
Link: https://lore.kernel.org/r/1666683737-7668-3-git-send-email-radhey.shyam.pandey@amd.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/xilinx/xilinx_dma.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/dma/xilinx/xilinx_dma.c b/drivers/dma/xilinx/xilinx_dma.c
index 3b8cfeccf2da9..a8d23cdf883e5 100644
--- a/drivers/dma/xilinx/xilinx_dma.c
+++ b/drivers/dma/xilinx/xilinx_dma.c
@@ -1659,6 +1659,8 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
  * xilinx_dma_device_config - Configure the DMA channel
  * @dchan: DMA channel
  * @config: channel configuration
+ *
+ * Return: 0 always.
  */
 static int xilinx_dma_device_config(struct dma_chan *dchan,
 				    struct dma_slave_config *config)
-- 
GitLab


From 14e4b9f4289aed2c8d4858cd750748041b6c434f Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 2 Nov 2022 21:54:31 -0700
Subject: [PATCH 0916/4122] perf trace: Raw augmented syscalls fix libbpf 1.0+
 compatibility

Don't use deprecated and now broken map style. Avoid use of
tools/perf/include/bpf/bpf.h and use the more regular BPF headers.

Committer notes:

Add /usr/include to the include path so that bpf/bpf_helpers.h can be
found, remove sys/socket.h, adding the sockaddr_storage definition, also
remove stdbool.h, both were preventing building the
augmented_raw_syscalls.c file with clang, revisit later.

Testing it:

Asking for syscalls that have string arguments:

  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,string --max-events 10
     0.000 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/intel-rapl:0:2/energy_uj", flags: RDONLY) = 13
     0.158 thermald/1144 openat(dfd: CWD, filename: "/sys/class/powercap/intel-rapl/intel-rapl:0/energy_uj", flags: RDONLY) = 13
     0.215 thermald/1144 openat(dfd: CWD, filename: "/sys/class/thermal/thermal_zone3/temp", flags: RDONLY) = 13
    16.448 cgroupify/36478 openat(dfd: 4, filename: ".", flags: RDONLY|CLOEXEC|DIRECTORY|NONBLOCK) = 5
    16.468 cgroupify/36478 newfstatat(dfd: 5, filename: "", statbuf: 0x7fffca5b4130, flag: 4096) = 0
    16.473 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
    16.499 systemd-oomd/972 newfstatat(dfd: 12, filename: "", statbuf: 0x7ffd2bc73cc0, flag: 4096) = 0
    16.516 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
    16.538 abrt-dump-jour/1370 newfstatat(dfd: 21, filename: "", statbuf: 0x7ffc651b8980, flag: 4096) = 0
    16.540 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
  #

Networking syscalls:

  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,sendto*,connect* --max-events 10
     0.000 isc-net-0005/1206 connect(fd: 512, uservaddr: { .family: INET, port: 53, addr: 23.211.132.65 }, addrlen: 16) = 0
     0.070 isc-net-0002/1203 connect(fd: 515, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
     0.031 isc-net-0006/1207 connect(fd: 513, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
     0.079 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a40611b0, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106
     0.180 isc-net-0006/1207 connect(fd: 519, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
     0.211 isc-net-0006/1207 sendto(fd: 3, buff: 0x7f73a4061230, len: 106, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 106
     0.298 isc-net-0006/1207 connect(fd: 515, uservaddr: { .family: INET, port: 53, addr: 96.7.49.67 }, addrlen: 16) = 0
     0.109 isc-net-0004/1205 connect(fd: 518, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:2::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
     0.164 isc-net-0002/1203 sendto(fd: 3, buff: 0x7f73ac064300, len: 107, flags: NOSIGNAL, addr: { .family: UNSPEC }, addr_len: NULL) = 107
     0.247 isc-net-0002/1203 connect(fd: 522, uservaddr: { .family: INET6, port: 53, addr: 2600:1401:1::43 }, addrlen: 28) = -1 ENETUNREACH (Network is unreachable)
  #

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221103045437.163510-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config                    |   3 +-
 .../examples/bpf/augmented_raw_syscalls.c     | 103 +++++++++++++++---
 tools/perf/util/llvm-utils.c                  |   2 +-
 3 files changed, 89 insertions(+), 19 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index d3d3c13a9f25b..59420676dee8e 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -1239,7 +1239,8 @@ includedir = $(abspath $(prefix)/$(includedir_relative))
 mandir = share/man
 infodir = share/info
 perfexecdir = libexec/perf-core
-perf_include_dir = lib/perf/include
+# FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance
+perf_include_dir = /usr/include
 perf_examples_dir = lib/perf/examples
 sharedir = $(prefix)/share
 template_dir = share/perf-core/templates
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index a262dcd020f49..926238efd7d8d 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -14,13 +14,21 @@
  * code that will combine entry/exit in a strace like way.
  */
 
-#include <unistd.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
 #include <linux/limits.h>
-#include <linux/socket.h>
-#include <pid_filter.h>
+
+// FIXME: These should come from system headers
+typedef char bool;
+typedef int pid_t;
 
 /* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
+struct __augmented_syscalls__ {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__type(key, int);
+	__type(value, __u32);
+	__uint(max_entries, __NR_CPUS__);
+} __augmented_syscalls__ SEC(".maps");
 
 /*
  * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
@@ -29,24 +37,39 @@ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
  */
 struct syscall {
 	bool	enabled;
-	u16	string_args_len[6];
+	__u16	string_args_len[6];
 };
 
-bpf_map(syscalls, ARRAY, int, struct syscall, 512);
+struct syscalls {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct syscall);
+	__uint(max_entries, 512);
+} syscalls SEC(".maps");
 
 /*
  * What to augment at entry?
  *
  * Pointer arg payloads (filenames, etc) passed from userspace to the kernel
  */
-bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512);
+struct syscalls_sys_enter {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 512);
+} syscalls_sys_enter SEC(".maps");
 
 /*
  * What to augment at exit?
  *
  * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace.
  */
-bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512);
+struct syscalls_sys_exit {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 512);
+} syscalls_sys_exit SEC(".maps");
 
 struct syscall_enter_args {
 	unsigned long long common_tp_fields;
@@ -66,7 +89,38 @@ struct augmented_arg {
 	char		value[PATH_MAX];
 };
 
-pid_filter(pids_filtered);
+struct pids_filtered {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, pid_t);
+	__type(value, bool);
+	__uint(max_entries, 64);
+} pids_filtered SEC(".maps");
+
+/*
+ * Desired design of maximum size and alignment (see RFC2553)
+ */
+#define SS_MAXSIZE   128     /* Implementation specific max size */
+
+typedef unsigned short sa_family_t;
+
+/*
+ * FIXME: Should come from system headers
+ *
+ * The definition uses anonymous union and struct in order to control the
+ * default alignment.
+ */
+struct sockaddr_storage {
+	union {
+		struct {
+			sa_family_t    ss_family; /* address family */
+			/* Following field(s) are implementation specific */
+			char __data[SS_MAXSIZE - sizeof(unsigned short)];
+				/* space to achieve desired size, */
+				/* _SS_MAXSIZE value minus size of ss_family */
+		};
+		void *__align; /* implementation specific desired alignment */
+	};
+};
 
 struct augmented_args_payload {
        struct syscall_enter_args args;
@@ -79,7 +133,12 @@ struct augmented_args_payload {
 };
 
 // We need more tmp space than the BPF stack can give us
-bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1);
+struct augmented_args_tmp {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__type(key, int);
+	__type(value, struct augmented_args_payload);
+	__uint(max_entries, 1);
+} augmented_args_tmp SEC(".maps");
 
 static inline struct augmented_args_payload *augmented_args_payload(void)
 {
@@ -90,14 +149,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void)
 static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len)
 {
 	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
-	return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
+	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
 }
 
 static inline
 unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len)
 {
 	unsigned int augmented_len = sizeof(*augmented_arg);
-	int string_len = probe_read_str(&augmented_arg->value, arg_len, arg);
+	int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg);
 
 	augmented_arg->size = augmented_arg->err = 0;
 	/*
@@ -146,7 +205,7 @@ int sys_enter_connect(struct syscall_enter_args *args)
 	if (socklen > sizeof(augmented_args->saddr))
 		socklen = sizeof(augmented_args->saddr);
 
-	probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 
 	return augmented__output(args, augmented_args, len + socklen);
 }
@@ -165,7 +224,7 @@ int sys_enter_sendto(struct syscall_enter_args *args)
 	if (socklen > sizeof(augmented_args->saddr))
 		socklen = sizeof(augmented_args->saddr);
 
-	probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
+	bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg);
 
 	return augmented__output(args, augmented_args, len + socklen);
 }
@@ -234,6 +293,16 @@ int sys_enter_renameat(struct syscall_enter_args *args)
 	return augmented__output(args, augmented_args, len);
 }
 
+static pid_t getpid(void)
+{
+	return bpf_get_current_pid_tgid();
+}
+
+static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
+{
+	return bpf_map_lookup_elem(pids, &pid) != NULL;
+}
+
 SEC("raw_syscalls:sys_enter")
 int sys_enter(struct syscall_enter_args *args)
 {
@@ -257,7 +326,7 @@ int sys_enter(struct syscall_enter_args *args)
 	if (augmented_args == NULL)
 		return 1;
 
-	probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
+	bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
 
 	/*
 	 * Jump to syscall specific augmenter, even if the default one,
@@ -278,7 +347,7 @@ int sys_exit(struct syscall_exit_args *args)
 	if (pid_filter__has(&pids_filtered, getpid()))
 		return 0;
 
-	probe_read(&exit_args, sizeof(exit_args), args);
+	bpf_probe_read(&exit_args, sizeof(exit_args), args);
 	/*
 	 * Jump to syscall specific return augmenter, even if the default one,
 	 * "!raw_syscalls:unaugmented" that will just return 1 to return the
@@ -291,4 +360,4 @@ int sys_exit(struct syscall_exit_args *args)
 	return 0;
 }
 
-license(GPL);
+char _license[] SEC("license") = "GPL";
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 2dc7970074196..a5cac85783d87 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 
 	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
 		 "0x%x", kernel_version);
-	if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0)
+	if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0)
 		goto errout;
 	force_set_env("NR_CPUS", nr_cpus_avail_str);
 	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
-- 
GitLab


From 514607e3c0f0e381aa4f6fe866b70b1fa9bfae74 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 2 Nov 2022 21:54:34 -0700
Subject: [PATCH 0917/4122] perf trace: hello fix libbpf 1.0+ compatibility

Don't use deprecated and now broken map style. Avoid use of
tools/perf/include/bpf/bpf.h and use the more regular BPF headers.

Switch to raw_syscalls:sys_enter to avoid the evlist being empty and
fixing generating output.

Committer testing:

  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/hello.c --call-graph=dwarf --max-events 5
     0.000 perf/206852 __bpf_stdout__(Hello, world)
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       __GI___sched_setaffinity_new (/usr/lib64/libc.so.6)
     8.561 pipewire/2290 __bpf_stdout__(Hello, world)
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       __libc_read (/usr/lib64/libc.so.6)
     8.571 pipewire/2290 __bpf_stdout__(Hello, world)
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       __GI___ioctl (/usr/lib64/libc.so.6)
     8.586 pipewire/2290 __bpf_stdout__(Hello, world)
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       __GI___write (/usr/lib64/libc.so.6)
     8.592 pipewire/2290 __bpf_stdout__(Hello, world)
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       syscall_trace_enter.constprop.0 ([kernel.kallsyms])
                                       do_syscall_64 ([kernel.kallsyms])
                                       entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                       __timerfd_settime (/usr/lib64/libc.so.6)
  #

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221103045437.163510-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/examples/bpf/hello.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c
index cf3c2fdc7f79c..e9080b0df1587 100644
--- a/tools/perf/examples/bpf/hello.c
+++ b/tools/perf/examples/bpf/hello.c
@@ -1,9 +1,27 @@
-#include <stdio.h>
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
 
-int syscall_enter(openat)(void *args)
+struct __bpf_stdout__ {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__type(key, int);
+	__type(value, __u32);
+	__uint(max_entries, __NR_CPUS__);
+} __bpf_stdout__ SEC(".maps");
+
+#define puts(from) \
+	({ const int __len = sizeof(from); \
+	   char __from[sizeof(from)] = from;			\
+	   bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
+			  &__from, __len & (sizeof(from) - 1)); })
+
+struct syscall_enter_args;
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
 {
 	puts("Hello, world\n");
 	return 0;
 }
 
-license(GPL);
+char _license[] SEC("license") = "GPL";
-- 
GitLab


From baddab891a21e5870723d182020fec445b0874b3 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 2 Nov 2022 21:54:35 -0700
Subject: [PATCH 0918/4122] perf trace: empty fix libbpf 1.0+ compatibility

Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF
headers.  Add raw_syscalls:sys_enter to avoid the evlist being empty.

Committer testing:

  # time perf trace -e ~acme/git/perf/tools/perf/examples/bpf/empty.c sleep 5

  real	0m5.697s
  user	0m0.217s
  sys	0m0.453s
  #

I.e. it sets up everything successfully (use -v to see the details) and
filters out all syscalls, then exits when the workload (sleep 5)
finishes.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221103045437.163510-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/examples/bpf/empty.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c
index 7d7fb0c9fe76b..3e296c0c53d7d 100644
--- a/tools/perf/examples/bpf/empty.c
+++ b/tools/perf/examples/bpf/empty.c
@@ -1,3 +1,12 @@
-#include <bpf/bpf.h>
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
 
-license(GPL);
+struct syscall_enter_args;
+
+SEC("raw_syscalls:sys_enter")
+int sys_enter(struct syscall_enter_args *args)
+{
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
-- 
GitLab


From 71811e8c77e974a0ab978c86a2b32b3f2c82f455 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 2 Nov 2022 21:54:36 -0700
Subject: [PATCH 0919/4122] perf trace: 5sec fix libbpf 1.0+ compatibility

Avoid use of tools/perf/include/bpf/bpf.h and use the more regular BPF
headers.

Committer testing:

  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c sleep 5
       0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000)
  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/5sec.c/max-stack=7/ sleep 5
       0.000 perf_bpf_probe:hrtimer_nanosleep(__probe_ip: -1474734416, rqtp: 5000000000)
                                         hrtimer_nanosleep ([kernel.kallsyms])
                                         common_nsleep ([kernel.kallsyms])
                                         __x64_sys_clock_nanosleep ([kernel.kallsyms])
                                         do_syscall_64 ([kernel.kallsyms])
                                         entry_SYSCALL_64_after_hwframe ([kernel.kallsyms])
                                         __GI___clock_nanosleep (/usr/lib64/libc.so.6)
                                         [0] ([unknown])
  #

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221103045437.163510-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/examples/bpf/5sec.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c
index e6b6181c6dc63..3bd7fc17631f0 100644
--- a/tools/perf/examples/bpf/5sec.c
+++ b/tools/perf/examples/bpf/5sec.c
@@ -39,13 +39,15 @@
    Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <acme@redhat.com>
 */
 
-#include <bpf.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
 
 #define NSEC_PER_SEC	1000000000L
 
-int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec)
+SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp")
+int hrtimer_nanosleep(void *ctx, int err, long long sec)
 {
 	return sec / NSEC_PER_SEC == 5ULL;
 }
 
-license(GPL);
+char _license[] SEC("license") = "GPL";
-- 
GitLab


From cfddf0d4a5571bcc94760c27729a60daefda38bb Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 2 Nov 2022 21:54:37 -0700
Subject: [PATCH 0920/4122] perf bpf: Remove now unused BPF headers

Example code has migrated to use standard BPF header files, remove
unnecessary perf equivalents. Update install step to not try to copy
these.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: https://lore.kernel.org/r/20221103045437.163510-8-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf              |  5 --
 tools/perf/include/bpf/bpf.h          | 70 ---------------------------
 tools/perf/include/bpf/linux/socket.h | 24 ---------
 tools/perf/include/bpf/pid_filter.h   | 21 --------
 tools/perf/include/bpf/stdio.h        | 16 ------
 tools/perf/include/bpf/unistd.h       | 10 ----
 6 files changed, 146 deletions(-)
 delete mode 100644 tools/perf/include/bpf/bpf.h
 delete mode 100644 tools/perf/include/bpf/linux/socket.h
 delete mode 100644 tools/perf/include/bpf/pid_filter.h
 delete mode 100644 tools/perf/include/bpf/stdio.h
 delete mode 100644 tools/perf/include/bpf/unistd.h

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index a432e59afc42a..67819f9056116 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -960,11 +960,6 @@ endif
 	$(call QUIET_INSTALL, libexec) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
 ifndef NO_LIBBPF
-	$(call QUIET_INSTALL, bpf-headers) \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \
-		$(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \
-		$(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'
 	$(call QUIET_INSTALL, bpf-examples) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \
 		$(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'
diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h
deleted file mode 100644
index b422aeef53396..0000000000000
--- a/tools/perf/include/bpf/bpf.h
+++ /dev/null
@@ -1,70 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#ifndef _PERF_BPF_H
-#define _PERF_BPF_H
-
-#include <uapi/linux/bpf.h>
-
-/*
- * A helper structure used by eBPF C program to describe map attributes to
- * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h:
- */
-struct bpf_map {
-        unsigned int type;
-        unsigned int key_size;
-        unsigned int value_size;
-        unsigned int max_entries;
-        unsigned int map_flags;
-        unsigned int inner_map_idx;
-        unsigned int numa_node;
-};
-
-#define bpf_map(name, _type, type_key, type_val, _max_entries)	\
-struct bpf_map SEC("maps") name = {				\
-	.type	     = BPF_MAP_TYPE_##_type,			\
-	.key_size    = sizeof(type_key),			\
-	.value_size  = sizeof(type_val),			\
-	.max_entries = _max_entries,				\
-};								\
-struct ____btf_map_##name {					\
-	type_key key;						\
-	type_val value;                                 	\
-};								\
-struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \
-	____btf_map_##name = { }
-
-/*
- * FIXME: this should receive .max_entries as a parameter, as careful
- *	  tuning of these limits is needed to avoid hitting limits that
- *	  prevents other BPF constructs, such as tracepoint handlers,
- *	  to get installed, with cryptic messages from libbpf, etc.
- *	  For the current need, 'perf trace --filter-pids', 64 should
- *	  be good enough, but this surely needs to be revisited.
- */
-#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64)
-
-static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem;
-static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem;
-
-static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *)BPF_FUNC_tail_call;
-
-#define SEC(NAME) __attribute__((section(NAME),  used))
-
-#define probe(function, vars) \
-	SEC(#function "=" #function " " #vars) function
-
-#define syscall_enter(name) \
-	SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name
-
-#define syscall_exit(name) \
-	SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name
-
-#define license(name) \
-char _license[] SEC("license") = #name; \
-int _version SEC("version") = LINUX_VERSION_CODE;
-
-static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read;
-static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str;
-
-static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output;
-
-#endif /* _PERF_BPF_H */
diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h
deleted file mode 100644
index 7f844568dab87..0000000000000
--- a/tools/perf/include/bpf/linux/socket.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI_LINUX_SOCKET_H
-#define _UAPI_LINUX_SOCKET_H
-
-/*
- * Desired design of maximum size and alignment (see RFC2553)
- */
-#define _K_SS_MAXSIZE	128	/* Implementation specific max size */
-#define _K_SS_ALIGNSIZE	(__alignof__ (struct sockaddr *))
-				/* Implementation specific desired alignment */
-
-typedef unsigned short __kernel_sa_family_t;
-
-struct __kernel_sockaddr_storage {
-	__kernel_sa_family_t	ss_family;		/* address family */
-	/* Following field(s) are implementation specific */
-	char		__data[_K_SS_MAXSIZE - sizeof(unsigned short)];
-				/* space to achieve desired size, */
-				/* _SS_MAXSIZE value minus size of ss_family */
-} __attribute__ ((aligned(_K_SS_ALIGNSIZE)));	/* force desired alignment */
-
-#define sockaddr_storage __kernel_sockaddr_storage
-
-#endif /* _UAPI_LINUX_SOCKET_H */
diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h
deleted file mode 100644
index 6e61c4bdf5482..0000000000000
--- a/tools/perf/include/bpf/pid_filter.h
+++ /dev/null
@@ -1,21 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-
-#ifndef _PERF_BPF_PID_FILTER_
-#define _PERF_BPF_PID_FILTER_
-
-#include <bpf.h>
-
-#define pid_filter(name) pid_map(name, bool)
-
-static int pid_filter__add(struct bpf_map *pids, pid_t pid)
-{
-	bool value = true;
-	return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST);
-}
-
-static bool pid_filter__has(struct bpf_map *pids, pid_t pid)
-{
-	return bpf_map_lookup_elem(pids, &pid) != NULL;
-}
-
-#endif // _PERF_BPF_PID_FILTER_
diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h
deleted file mode 100644
index 316af5b2ff351..0000000000000
--- a/tools/perf/include/bpf/stdio.h
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <bpf.h>
-
-struct bpf_map SEC("maps") __bpf_stdout__ = {
-       .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-       .key_size = sizeof(int),
-       .value_size = sizeof(u32),
-       .max_entries = __NR_CPUS__,
-};
-
-#define puts(from) \
-	({ const int __len = sizeof(from); \
-	   char __from[__len] = from; \
-	   perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \
-			  &__from, __len & (sizeof(from) - 1)); })
diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h
deleted file mode 100644
index ca7877f9a976f..0000000000000
--- a/tools/perf/include/bpf/unistd.h
+++ /dev/null
@@ -1,10 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-
-#include <bpf.h>
-
-static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid;
-
-static pid_t getpid(void)
-{
-	return bpf_get_current_pid_tgid();
-}
-- 
GitLab


From 3cd65616f607cd3769647e2e0490e4048c0f289e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 4 Nov 2022 11:15:51 -0300
Subject: [PATCH 0921/4122] perf examples bpf: Remove augmented_syscalls.c, the
 raw_syscalls one should be used instead

The attempt at using BPF to copy syscall pointer arguments to show them
like strace does started with sys_{enter,exit}_SYSCALL_NAME tracepoints,
in tools/perf/examples/bpf/augmented_syscalls.c, but then achieving this
result using raw_syscalls:{enter,exit} and BPF tail calls was deemed
more flexible.

The 'perf trace' codebase was adapted to using it while trying to
continue supporting the old style per-syscall tracepoints, which at some
point became too unwieldly and now isn't working properly.

So lets scale back and concentrate on the augmented_raw_syscalls.c
model on the way to using BPF skeletons.

For the same reason remove the etcsnoop.c example, that used the
old style per-tp syscalls just for the 'open' and 'openat' syscalls,
looking at the pathnames starting with "/etc/", we should be able
to do this later using filters, after we move to BPF skels.

The augmented_raw_syscalls.c one continues to work, now with libbpf 1.0,
after Ian work on using the libbpf map style:

  # perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,open* --max-events 4
     0.000 ping/194815 openat(dfd: CWD, filename: "/etc/hosts", flags: RDONLY|CLOEXEC) = 5
    20.225 systemd-oomd/972 openat(dfd: CWD, filename: "/proc/meminfo", flags: RDONLY|CLOEXEC) = 12
    20.285 abrt-dump-jour/1371 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
    20.301 abrt-dump-jour/1370 openat(dfd: CWD, filename: "/var/log/journal/d6a97235307247e09f13f326fb607e3c/system.journal", flags: RDONLY|CLOEXEC|NONBLOCK) = 21
  #

This is using this:

  # cat ~/.perfconfig
  [trace]
	show_zeros = yes
	show_duration = no
	no_inherit = yes
	args_alignment = 40

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/examples/bpf/augmented_syscalls.c | 169 -------------------
 tools/perf/examples/bpf/etcsnoop.c           |  76 ---------
 2 files changed, 245 deletions(-)
 delete mode 100644 tools/perf/examples/bpf/augmented_syscalls.c
 delete mode 100644 tools/perf/examples/bpf/etcsnoop.c

diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c
deleted file mode 100644
index 524fdb8534b3f..0000000000000
--- a/tools/perf/examples/bpf/augmented_syscalls.c
+++ /dev/null
@@ -1,169 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Augment syscalls with the contents of the pointer arguments.
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
- *
- * It'll catch some openat syscalls related to the dynamic linked and
- * the last one should be the one for '/etc/passwd'.
- *
- * This matches what is marshalled into the raw_syscall:sys_enter payload
- * expected by the 'perf trace' beautifiers, and can be used by them, that will
- * check if perf_sample->raw_data is more than what is expected for each
- * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the
- * contents of pointer arguments.
- */
-
-#include <stdio.h>
-#include <linux/socket.h>
-
-/* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
-
-struct syscall_exit_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   ret;
-};
-
-struct augmented_filename {
-	unsigned int	size;
-	int		reserved;
-	char		value[256];
-};
-
-#define augmented_filename_syscall(syscall)							\
-struct augmented_enter_##syscall##_args {			 				\
-	struct syscall_enter_##syscall##_args	args;				 		\
-	struct augmented_filename		filename;				 	\
-};												\
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
-{												\
-	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
-	unsigned int len = sizeof(augmented_args);						\
-	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
-	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
-						      sizeof(augmented_args.filename.value), 	\
-						      args->filename_ptr); 			\
-	if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {		\
-		len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;	\
-		len &= sizeof(augmented_args.filename.value) - 1;				\
-	}											\
-	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
-	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
-				 &augmented_args, len);						\
-}												\
-int syscall_exit(syscall)(struct syscall_exit_args *args)					\
-{												\
-       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
-}
-
-struct syscall_enter_openat_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   dfd;
-	char		   *filename_ptr;
-	long		   flags;
-	long		   mode;
-};
-
-augmented_filename_syscall(openat);
-
-struct syscall_enter_open_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	char		   *filename_ptr;
-	long		   flags;
-	long		   mode;
-};
-
-augmented_filename_syscall(open);
-
-struct syscall_enter_inotify_add_watch_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   fd;
-	char		   *filename_ptr;
-	long		   mask;
-};
-
-augmented_filename_syscall(inotify_add_watch);
-
-struct statbuf;
-
-struct syscall_enter_newstat_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	char		   *filename_ptr;
-	struct stat	   *statbuf;
-};
-
-augmented_filename_syscall(newstat);
-
-#ifndef _K_SS_MAXSIZE
-#define _K_SS_MAXSIZE 128
-#endif
-
-#define augmented_sockaddr_syscall(syscall)						\
-struct augmented_enter_##syscall##_args {			 				\
-	struct syscall_enter_##syscall##_args	args;				 		\
-	struct sockaddr_storage			addr;						\
-};												\
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
-{												\
-	struct augmented_enter_##syscall##_args augmented_args;				 	\
-	unsigned long addrlen = sizeof(augmented_args.addr);					\
-	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
-/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */		\
-/*	if (addrlen > augmented_args.args.addrlen)				     */		\
-/*		addrlen = augmented_args.args.addrlen;				     */		\
-/*										     */		\
-	probe_read(&augmented_args.addr, addrlen, args->addr_ptr); 				\
-	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
-	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
-				 &augmented_args, 						\
-				sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\
-}												\
-int syscall_exit(syscall)(struct syscall_exit_args *args)					\
-{												\
-       return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */	\
-}
-
-struct sockaddr;
-
-struct syscall_enter_bind_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   fd;
-	struct sockaddr	   *addr_ptr;
-	unsigned long	   addrlen;
-};
-
-augmented_sockaddr_syscall(bind);
-
-struct syscall_enter_connect_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   fd;
-	struct sockaddr	   *addr_ptr;
-	unsigned long	   addrlen;
-};
-
-augmented_sockaddr_syscall(connect);
-
-struct syscall_enter_sendto_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   fd;
-	void		   *buff;
-	long		   len;
-	unsigned long	   flags;
-	struct sockaddr	   *addr_ptr;
-	long		   addr_len;
-};
-
-augmented_sockaddr_syscall(sendto);
-
-license(GPL);
diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c
deleted file mode 100644
index e81b535346c0b..0000000000000
--- a/tools/perf/examples/bpf/etcsnoop.c
+++ /dev/null
@@ -1,76 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Augment the filename syscalls with the contents of the filename pointer argument
- * filtering only those that do not start with /etc/.
- *
- * Test it with:
- *
- * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null
- *
- * It'll catch some openat syscalls related to the dynamic linked and
- * the last one should be the one for '/etc/passwd'.
- *
- * This matches what is marshalled into the raw_syscall:sys_enter payload
- * expected by the 'perf trace' beautifiers, and can be used by them unmodified,
- * which will be done as that feature is implemented in the next csets, for now
- * it will appear in a dump done by the default tracepoint handler in 'perf trace',
- * that uses bpf_output__fprintf() to just dump those contents, as done with
- * the bpf-output event associated with the __bpf_output__ map declared in
- * tools/perf/include/bpf/stdio.h.
- */
-
-#include <stdio.h>
-
-/* bpf-output associated map */
-bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__);
-
-struct augmented_filename {
-	int	size;
-	int	reserved;
-	char	value[64];
-};
-
-#define augmented_filename_syscall_enter(syscall) 						\
-struct augmented_enter_##syscall##_args {			 				\
-	struct syscall_enter_##syscall##_args	args;				 		\
-	struct augmented_filename		filename;				 	\
-};												\
-int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args)				\
-{												\
-	char etc[6] = "/etc/";									\
-	struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; 	\
-	probe_read(&augmented_args.args, sizeof(augmented_args.args), args);			\
-	augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, 		\
-						      sizeof(augmented_args.filename.value), 	\
-						      args->filename_ptr); 			\
-	if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0)			\
-		return 0;									\
-	/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */	\
-	return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, 		\
-				 &augmented_args,						\
-				 (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \
-				 augmented_args.filename.size));				\
-}
-
-struct syscall_enter_openat_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	long		   dfd;
-	char		   *filename_ptr;
-	long		   flags;
-	long		   mode;
-};
-
-augmented_filename_syscall_enter(openat);
-
-struct syscall_enter_open_args {
-	unsigned long long common_tp_fields;
-	long		   syscall_nr;
-	char		   *filename_ptr;
-	long		   flags;
-	long		   mode;
-};
-
-augmented_filename_syscall_enter(open);
-
-license(GPL);
-- 
GitLab


From 3e98b9bd8469d0b78975be9b36e423b30b0badbe Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Fri, 28 Oct 2022 13:53:36 +0200
Subject: [PATCH 0922/4122] dmaengine: sh: Remove unused shdma-arm.h

shdma-arm.h was introduced with commit 1e69653d40f1 ("DMA: shdma: add
r8a73a4 DMAC data to the device ID table"), and its sole user was
removed with commit a19788612f51 ("dmaengine: sh: Remove R-Mobile APE6
support"). The latter mentions r8a73a4.dtsi but shdma support was
removed from that with commit cfda82037780 ("ARM: dts: r8a73a4: Remove
non-functional DMA support"), so it seems this is safe to remove.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20221028115336.1052782-1-steve@sk2.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/sh/shdma-arm.h | 48 --------------------------------------
 1 file changed, 48 deletions(-)
 delete mode 100644 drivers/dma/sh/shdma-arm.h

diff --git a/drivers/dma/sh/shdma-arm.h b/drivers/dma/sh/shdma-arm.h
deleted file mode 100644
index 7459f9a13b5be..0000000000000
--- a/drivers/dma/sh/shdma-arm.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Renesas SuperH DMA Engine support
- *
- * Copyright (C) 2013 Renesas Electronics, Inc.
- */
-
-#ifndef SHDMA_ARM_H
-#define SHDMA_ARM_H
-
-#include "shdma.h"
-
-/* Transmit sizes and respective CHCR register values */
-enum {
-	XMIT_SZ_8BIT		= 0,
-	XMIT_SZ_16BIT		= 1,
-	XMIT_SZ_32BIT		= 2,
-	XMIT_SZ_64BIT		= 7,
-	XMIT_SZ_128BIT		= 3,
-	XMIT_SZ_256BIT		= 4,
-	XMIT_SZ_512BIT		= 5,
-};
-
-/* log2(size / 8) - used to calculate number of transfers */
-#define SH_DMAE_TS_SHIFT {		\
-	[XMIT_SZ_8BIT]		= 0,	\
-	[XMIT_SZ_16BIT]		= 1,	\
-	[XMIT_SZ_32BIT]		= 2,	\
-	[XMIT_SZ_64BIT]		= 3,	\
-	[XMIT_SZ_128BIT]	= 4,	\
-	[XMIT_SZ_256BIT]	= 5,	\
-	[XMIT_SZ_512BIT]	= 6,	\
-}
-
-#define TS_LOW_BIT	0x3 /* --xx */
-#define TS_HI_BIT	0xc /* xx-- */
-
-#define TS_LOW_SHIFT	(3)
-#define TS_HI_SHIFT	(20 - 2)	/* 2 bits for shifted low TS */
-
-#define TS_INDEX2VAL(i) \
-	((((i) & TS_LOW_BIT) << TS_LOW_SHIFT) |\
-	 (((i) & TS_HI_BIT)  << TS_HI_SHIFT))
-
-#define CHCR_TX(xmit_sz) (DM_FIX | SM_INC | RS_ERS | TS_INDEX2VAL((xmit_sz)))
-#define CHCR_RX(xmit_sz) (DM_INC | SM_FIX | RS_ERS | TS_INDEX2VAL((xmit_sz)))
-
-#endif
-- 
GitLab


From 9a8ddb35a9d5d3ad76784a012459b256a9d7de7e Mon Sep 17 00:00:00 2001
From: Xiaochen Shen <xiaochen.shen@intel.com>
Date: Sat, 22 Oct 2022 15:49:49 +0800
Subject: [PATCH 0923/4122] dmaengine: idxd: Make read buffer sysfs attributes
 invisible for Intel IAA

In current code, the following sysfs attributes are exposed to user to
show or update the values:
  max_read_buffers (max_tokens)
  read_buffer_limit (token_limit)
  group/read_buffers_allowed (group/tokens_allowed)
  group/read_buffers_reserved (group/tokens_reserved)
  group/use_read_buffer_limit (group/use_token_limit)

>From Intel IAA spec [1], Intel IAA does not support Read Buffer
allocation control. So these sysfs attributes should not be supported on
IAA device.

Fix this issue by making these sysfs attributes invisible through
is_visible() filter when the device is IAA.

Add description in the ABI documentation to mention that these
attributes are not visible when the device does not support Read Buffer
allocation control.

[1]: https://cdrdv2.intel.com/v1/dl/getContent/721858

Fixes: fde212e44f45 ("dmaengine: idxd: deprecate token sysfs attributes for read buffers")
Fixes: c52ca478233c ("dmaengine: idxd: add configuration component of driver")
Signed-off-by: Xiaochen Shen <xiaochen.shen@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20221022074949.11719-1-xiaochen.shen@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../ABI/stable/sysfs-driver-dma-idxd          | 10 ++++++
 drivers/dma/idxd/sysfs.c                      | 36 +++++++++++++++++++
 2 files changed, 46 insertions(+)

diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index 69e2d9155e0d9..3becc9a82bdf6 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -50,6 +50,8 @@ Description:    The total number of read buffers supported by this device.
 		The read buffers represent resources within the DSA
 		implementation, and these resources are allocated by engines to
 		support operations. See DSA spec v1.2 9.2.4 Total Read Buffers.
+		It's not visible when the device does not support Read Buffer
+		allocation control.
 
 What:           /sys/bus/dsa/devices/dsa<m>/max_transfer_size
 Date:           Oct 25, 2019
@@ -123,6 +125,8 @@ Contact:        dmaengine@vger.kernel.org
 Description:    The maximum number of read buffers that may be in use at
 		one time by operations that access low bandwidth memory in the
 		device. See DSA spec v1.2 9.2.8 GENCFG on Global Read Buffer Limit.
+		It's not visible when the device does not support Read Buffer
+		allocation control.
 
 What:		/sys/bus/dsa/devices/dsa<m>/cmd_status
 Date:		Aug 28, 2020
@@ -252,6 +256,8 @@ KernelVersion:	5.17.0
 Contact:	dmaengine@vger.kernel.org
 Description:	Enable the use of global read buffer limit for the group. See DSA
 		spec v1.2 9.2.18 GRPCFG Use Global Read Buffer Limit.
+		It's not visible when the device does not support Read Buffer
+		allocation control.
 
 What:		/sys/bus/dsa/devices/group<m>.<n>/read_buffers_allowed
 Date:		Dec 10, 2021
@@ -260,6 +266,8 @@ Contact:	dmaengine@vger.kernel.org
 Description:	Indicates max number of read buffers that may be in use at one time
 		by all engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read
 		Buffers Allowed.
+		It's not visible when the device does not support Read Buffer
+		allocation control.
 
 What:		/sys/bus/dsa/devices/group<m>.<n>/read_buffers_reserved
 Date:		Dec 10, 2021
@@ -268,6 +276,8 @@ Contact:	dmaengine@vger.kernel.org
 Description:	Indicates the number of Read Buffers reserved for the use of
 		engines in the group. See DSA spec v1.2 9.2.18 GRPCFG Read Buffers
 		Reserved.
+		It's not visible when the device does not support Read Buffer
+		allocation control.
 
 What:		/sys/bus/dsa/devices/group<m>.<n>/desc_progress_limit
 Date:		Sept 14, 2022
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index f30aad90537bc..7c7ec8323cb7c 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -528,6 +528,22 @@ static bool idxd_group_attr_progress_limit_invisible(struct attribute *attr,
 		!idxd->hw.group_cap.progress_limit;
 }
 
+static bool idxd_group_attr_read_buffers_invisible(struct attribute *attr,
+						   struct idxd_device *idxd)
+{
+	/*
+	 * Intel IAA does not support Read Buffer allocation control,
+	 * make these attributes invisible.
+	 */
+	return (attr == &dev_attr_group_use_token_limit.attr ||
+		attr == &dev_attr_group_use_read_buffer_limit.attr ||
+		attr == &dev_attr_group_tokens_allowed.attr ||
+		attr == &dev_attr_group_read_buffers_allowed.attr ||
+		attr == &dev_attr_group_tokens_reserved.attr ||
+		attr == &dev_attr_group_read_buffers_reserved.attr) &&
+		idxd->data->type == IDXD_TYPE_IAX;
+}
+
 static umode_t idxd_group_attr_visible(struct kobject *kobj,
 				       struct attribute *attr, int n)
 {
@@ -538,6 +554,9 @@ static umode_t idxd_group_attr_visible(struct kobject *kobj,
 	if (idxd_group_attr_progress_limit_invisible(attr, idxd))
 		return 0;
 
+	if (idxd_group_attr_read_buffers_invisible(attr, idxd))
+		return 0;
+
 	return attr->mode;
 }
 
@@ -1552,6 +1571,20 @@ static bool idxd_device_attr_max_batch_size_invisible(struct attribute *attr,
 	       idxd->data->type == IDXD_TYPE_IAX;
 }
 
+static bool idxd_device_attr_read_buffers_invisible(struct attribute *attr,
+						    struct idxd_device *idxd)
+{
+	/*
+	 * Intel IAA does not support Read Buffer allocation control,
+	 * make these attributes invisible.
+	 */
+	return (attr == &dev_attr_max_tokens.attr ||
+		attr == &dev_attr_max_read_buffers.attr ||
+		attr == &dev_attr_token_limit.attr ||
+		attr == &dev_attr_read_buffer_limit.attr) &&
+		idxd->data->type == IDXD_TYPE_IAX;
+}
+
 static umode_t idxd_device_attr_visible(struct kobject *kobj,
 					struct attribute *attr, int n)
 {
@@ -1561,6 +1594,9 @@ static umode_t idxd_device_attr_visible(struct kobject *kobj,
 	if (idxd_device_attr_max_batch_size_invisible(attr, idxd))
 		return 0;
 
+	if (idxd_device_attr_read_buffers_invisible(attr, idxd))
+		return 0;
+
 	return attr->mode;
 }
 
-- 
GitLab


From b018899e620b8ee4529f43bd02e9e8e43043e33e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 4 Nov 2022 11:45:36 -0300
Subject: [PATCH 0924/4122] perf bpf: Rename perf_include_dir to
 libbpf_include_dir

As this is where we expect to find bpf/bpf_helpers.h, etc.

This needs more work to make it follow LIBBPF_DYNAMIC=1 usage, i.e. when
not using the system libbpf it should use the headers in the in-kernel
sources libbpf in tools/lib/bpf.

We need to do that anyway to avoid this mixup system libbpf and
in-kernel files, so we'll get this sorted out that way.

And this also may become moot as we move to using BPF skels for this
feature.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config   | 12 ++++++------
 tools/perf/util/Build        |  2 +-
 tools/perf/util/llvm-utils.c |  8 ++++----
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 59420676dee8e..a7f6c0669fae4 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -1216,7 +1216,7 @@ endif
 
 # Among the variables below, these:
 #   perfexecdir
-#   perf_include_dir
+#   libbpf_include_dir
 #   perf_examples_dir
 #   template_dir
 #   mandir
@@ -1240,7 +1240,7 @@ mandir = share/man
 infodir = share/info
 perfexecdir = libexec/perf-core
 # FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance
-perf_include_dir = /usr/include
+libbpf_include_dir = /usr/include
 perf_examples_dir = lib/perf/examples
 sharedir = $(prefix)/share
 template_dir = share/perf-core/templates
@@ -1273,7 +1273,7 @@ includedir_SQ = $(subst ','\'',$(includedir))
 mandir_SQ = $(subst ','\'',$(mandir))
 infodir_SQ = $(subst ','\'',$(infodir))
 perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
-perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir))
+libbpf_include_dir_SQ = $(subst ','\'',$(libbpf_include_dir))
 perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir))
 template_dir_SQ = $(subst ','\'',$(template_dir))
 htmldir_SQ = $(subst ','\'',$(htmldir))
@@ -1285,13 +1285,13 @@ srcdir_SQ = $(subst ','\'',$(srcdir))
 
 ifneq ($(filter /%,$(firstword $(perfexecdir))),)
 perfexec_instdir = $(perfexecdir)
-perf_include_instdir = $(perf_include_dir)
+perf_include_instdir = $(libbpf_include_dir)
 perf_examples_instdir = $(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR)
 tip_instdir = $(tipdir)
 else
 perfexec_instdir = $(prefix)/$(perfexecdir)
-perf_include_instdir = $(prefix)/$(perf_include_dir)
+perf_include_instdir = $(prefix)/$(libbpf_include_dir)
 perf_examples_instdir = $(prefix)/$(perf_examples_dir)
 STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR)
 tip_instdir = $(prefix)/$(tipdir)
@@ -1353,7 +1353,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ)
 $(call detected_var,STRACE_GROUPS_DIR_SQ)
 $(call detected_var,prefix_SQ)
 $(call detected_var,perfexecdir_SQ)
-$(call detected_var,perf_include_dir_SQ)
+$(call detected_var,libbpf_include_dir_SQ)
 $(call detected_var,perf_examples_dir_SQ)
 $(call detected_var,tipdir_SQ)
 $(call detected_var,srcdir_SQ)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e315ecaec3233..47a7db3ad0a1f 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -220,7 +220,7 @@ perf-$(CONFIG_CXX) += c++/
 perf-$(CONFIG_LIBPFM4) += pfm.o
 
 CFLAGS_config.o   += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))"
-CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))"
+CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))"
 
 # avoid compiler warnings in 32-bit mode
 CFLAGS_genelf_debug.o  += -Wno-packed
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index a5cac85783d87..650ffe336f3aa 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -463,7 +463,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	char *pipe_template = NULL;
 	const char *opts = llvm_param.opts;
 	char *command_echo = NULL, *command_out;
-	char *perf_include_dir = system_path(PERF_INCLUDE_DIR);
+	char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR);
 
 	if (path[0] != '-' && realpath(path, abspath) == NULL) {
 		err = errno;
@@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 
 	snprintf(linux_version_code_str, sizeof(linux_version_code_str),
 		 "0x%x", kernel_version);
-	if (asprintf(&perf_bpf_include_opts, "-I%s/", perf_include_dir) < 0)
+	if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0)
 		goto errout;
 	force_set_env("NR_CPUS", nr_cpus_avail_str);
 	force_set_env("LINUX_VERSION_CODE", linux_version_code_str);
@@ -556,7 +556,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
 	free(kbuild_dir);
 	free(kbuild_include_opts);
 	free(perf_bpf_include_opts);
-	free(perf_include_dir);
+	free(libbpf_include_dir);
 
 	if (!p_obj_buf)
 		free(obj_buf);
@@ -572,7 +572,7 @@ errout:
 	free(kbuild_include_opts);
 	free(obj_buf);
 	free(perf_bpf_include_opts);
-	free(perf_include_dir);
+	free(libbpf_include_dir);
 	free(pipe_template);
 	if (p_obj_buf)
 		*p_obj_buf = NULL;
-- 
GitLab


From 17a36713babe882928b869b427729c85deeb1267 Mon Sep 17 00:00:00 2001
From: Adam Skladowski <a39.skl@gmail.com>
Date: Sun, 30 Oct 2022 11:42:53 +0200
Subject: [PATCH 0925/4122] dt-bindings: dmaengine: qcom: gpi: add compatible
 for SM6115

Document the compatible for GPI DMA controller on SM6115 SoC.

Signed-off-by: Adam Skladowski <a39.skl@gmail.com>
Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221030094258.486428-4-iskren.chernev@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 0c28944988456..232895fa1d8d2 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -25,6 +25,7 @@ properties:
       - items:
           - enum:
               - qcom,sc7280-gpi-dma
+              - qcom,sm6115-gpi-dma
               - qcom,sm8350-gpi-dma
               - qcom,sm8450-gpi-dma
           - const: qcom,sm6350-gpi-dma
-- 
GitLab


From 739153a6ae6891ff42ed9dbbd8e72dd99e6c8ba5 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 2 Nov 2022 10:27:01 +0800
Subject: [PATCH 0926/4122] Documentation: devres: add missing
 devm_acpi_dma_controller_free() helper

Add missing devm_acpi_dma_controller_free() to devres.rst, it's introduced
by commit 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers").

Fixes: 1b2e98bc1e35 ("dma: acpi-dma: introduce ACPI DMA helpers")
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221102022701.1407289-1-yangyingliang@huawei.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-api/driver-model/devres.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 687adb58048ec..99a989ebee87e 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -437,6 +437,7 @@ SERDEV
 
 SLAVE DMA ENGINE
   devm_acpi_dma_controller_register()
+  devm_acpi_dma_controller_free()
 
 SPI
   devm_spi_alloc_master()
-- 
GitLab


From d59fdbc7164a6b2a0ed45c13387deac8efeed5a2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 3 Nov 2022 22:30:05 +0200
Subject: [PATCH 0927/4122] gpiolib: of: Make use of device_match_of_node()

Make use of device_match_of_node() instead of open coding its
functionality.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 52616848a37c4..4b91e19366a8e 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -85,7 +85,7 @@ static int of_gpiochip_match_node_and_xlate(struct gpio_chip *chip, void *data)
 {
 	struct of_phandle_args *gpiospec = data;
 
-	return chip->gpiodev->dev.of_node == gpiospec->np &&
+	return device_match_of_node(&chip->gpiodev->dev, gpiospec->np) &&
 				chip->of_xlate &&
 				chip->of_xlate(chip, gpiospec, NULL) >= 0;
 }
-- 
GitLab


From 34cb9352b62366038fd2d5b9d9f393f35d0be1e0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 29 Oct 2022 21:40:46 -0700
Subject: [PATCH 0928/4122] gpiolib: of: factor out quirk setting polarity via
 separate property

Several legacy bindings use a separate property to specify polarity of
GPIOs instead of specifying it directly in the GPIO property. Factor
out this code to make it easier to add more such cases.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 98 +++++++++++++++++++++------------------
 1 file changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 4b91e19366a8e..e72b56a87b86c 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -186,33 +186,68 @@ static void of_gpio_try_fixup_polarity(const struct device_node *np,
 	}
 }
 
-static void of_gpio_flags_quirks(const struct device_node *np,
-				 const char *propname,
-				 enum of_gpio_flags *flags,
-				 int index)
+static void of_gpio_set_polarity_by_property(const struct device_node *np,
+					     const char *propname,
+					     enum of_gpio_flags *flags)
 {
-	of_gpio_try_fixup_polarity(np, propname, flags);
+	static const struct {
+		const char *compatible;
+		const char *gpio_propname;
+		const char *polarity_propname;
+	} gpios[] = {
+#if IS_ENABLED(CONFIG_FEC)
+		/* Freescale Fast Ethernet Controller */
+		{ "fsl,imx25-fec",   "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx27-fec",   "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx28-fec",   "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx6q-fec",   "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,mvf600-fec",  "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx6sx-fec",  "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx6ul-fec",  "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx8mq-fec",  "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,imx8qm-fec",  "phy-reset-gpios", "phy-reset-active-high" },
+		{ "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" },
+#endif
 
-	/*
-	 * Some GPIO fixed regulator quirks.
-	 * Note that active low is the default.
-	 */
-	if (IS_ENABLED(CONFIG_REGULATOR) &&
-	    (of_device_is_compatible(np, "regulator-fixed") ||
-	     of_device_is_compatible(np, "reg-fixed-voltage") ||
-	     (!(strcmp(propname, "enable-gpio") &&
-		strcmp(propname, "enable-gpios")) &&
-	      of_device_is_compatible(np, "regulator-gpio")))) {
-		bool active_high = of_property_read_bool(np,
-							 "enable-active-high");
 		/*
 		 * The regulator GPIO handles are specified such that the
 		 * presence or absence of "enable-active-high" solely controls
 		 * the polarity of the GPIO line. Any phandle flags must
 		 * be actively ignored.
 		 */
-		of_gpio_quirk_polarity(np, active_high, flags);
+#if IS_ENABLED(CONFIG_REGULATOR_FIXED_VOLTAGE)
+		{ "regulator-fixed",   "gpios",        "enable-active-high" },
+		{ "regulator-fixed",   "gpio",         "enable-active-high" },
+		{ "reg-fixed-voltage", "gpios",        "enable-active-high" },
+		{ "reg-fixed-voltage", "gpio",         "enable-active-high" },
+#endif
+#if IS_ENABLED(CONFIG_REGULATOR_GPIO)
+		{ "regulator-gpio",    "enable-gpio",  "enable-active-high" },
+		{ "regulator-gpio",    "enable-gpios", "enable-active-high" },
+#endif
+	};
+	unsigned int i;
+	bool active_high;
+
+	for (i = 0; i < ARRAY_SIZE(gpios); i++) {
+		if (of_device_is_compatible(np, gpios[i].compatible) &&
+		    !strcmp(propname, gpios[i].gpio_propname)) {
+			active_high = of_property_read_bool(np,
+						gpios[i].polarity_propname);
+			of_gpio_quirk_polarity(np, active_high, flags);
+			break;
+		}
 	}
+}
+
+static void of_gpio_flags_quirks(const struct device_node *np,
+				 const char *propname,
+				 enum of_gpio_flags *flags,
+				 int index)
+{
+	of_gpio_try_fixup_polarity(np, propname, flags);
+	of_gpio_set_polarity_by_property(np, propname, flags);
+
 	/*
 	 * Legacy open drain handling for fixed voltage regulators.
 	 */
@@ -267,33 +302,6 @@ static void of_gpio_flags_quirks(const struct device_node *np,
 	    !strcmp(propname, "snps,reset-gpio") &&
 	    of_property_read_bool(np, "snps,reset-active-low"))
 		*flags |= OF_GPIO_ACTIVE_LOW;
-
-	/*
-	 * Freescale Fast Ethernet Controller uses a separate property to
-	 * describe polarity of the phy reset line.
-	 */
-	if (IS_ENABLED(CONFIG_FEC)) {
-		static const char * const fec_devices[] = {
-			"fsl,imx25-fec",
-			"fsl,imx27-fec",
-			"fsl,imx28-fec",
-			"fsl,imx6q-fec",
-			"fsl,mvf600-fec",
-			"fsl,imx6sx-fec",
-			"fsl,imx6ul-fec",
-			"fsl,imx8mq-fec",
-			"fsl,imx8qm-fec",
-			"fsl,s32v234-fec",
-			NULL
-		};
-
-		if (!strcmp(propname, "phy-reset-gpios") &&
-		    of_device_compatible_match(np, fec_devices)) {
-			bool active_high = of_property_read_bool(np,
-						"phy-reset-active-high");
-			of_gpio_quirk_polarity(np, active_high, flags);
-		}
-	}
 }
 
 /**
-- 
GitLab


From b8b80348c57b360019071e17380298619c5d8066 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sat, 29 Oct 2022 21:40:47 -0700
Subject: [PATCH 0929/4122] gpiolib: of: add polarity quirk for Freescale PCIe
 controller

Bindings for Freescale PCIe controller use a separate property called
"reset-gpio-active-high" to control polarity of its reset line, add it
to the list of quirks in gpiolib so that gpiod API can be used in the
driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index e72b56a87b86c..be9c34cca3225 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -208,6 +208,15 @@ static void of_gpio_set_polarity_by_property(const struct device_node *np,
 		{ "fsl,imx8qm-fec",  "phy-reset-gpios", "phy-reset-active-high" },
 		{ "fsl,s32v234-fec", "phy-reset-gpios", "phy-reset-active-high" },
 #endif
+#if IS_ENABLED(CONFIG_PCI_IMX6)
+		{ "fsl,imx6q-pcie",  "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx6sx-pcie", "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx6qp-pcie", "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx7d-pcie",  "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx8mq-pcie", "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx8mm-pcie", "reset-gpio", "reset-gpio-active-high" },
+		{ "fsl,imx8mp-pcie", "reset-gpio", "reset-gpio-active-high" },
+#endif
 
 		/*
 		 * The regulator GPIO handles are specified such that the
-- 
GitLab


From 503fa23614dc95f96af883a8e2e873d5c6cd53d8 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@orcam.me.uk>
Date: Sat, 17 Sep 2022 13:03:09 +0100
Subject: [PATCH 0930/4122] PCI: Access Link 2 registers only for devices with
 Links

PCIe r2.0, sec 7.8 added Link Capabilities/Status/Control 2 registers to
the PCIe Capability with Capability Version 2.

Previously we assumed these registers were implemented for all PCIe
Capabilities of version 2 or greater, but in fact they are only
implemented for devices with Links.

Update pcie_capability_reg_implemented() to check whether the device has
a Link.

[bhelgaas: commit log, squash export]
Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057070.2275@angie.orcam.me.uk
Link: https://lore.kernel.org/r/alpine.DEB.2.21.2209100057300.2275@angie.orcam.me.uk
Signed-off-by: Maciej W. Rozycki <macro@orcam.me.uk>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/access.c | 8 +++++++-
 drivers/pci/pci.h    | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/access.c b/drivers/pci/access.c
index 708c7529647fd..3c230ca3de584 100644
--- a/drivers/pci/access.c
+++ b/drivers/pci/access.c
@@ -350,6 +350,11 @@ bool pcie_cap_has_lnkctl(const struct pci_dev *dev)
 	       type == PCI_EXP_TYPE_PCIE_BRIDGE;
 }
 
+bool pcie_cap_has_lnkctl2(const struct pci_dev *dev)
+{
+	return pcie_cap_has_lnkctl(dev) && pcie_cap_version(dev) > 1;
+}
+
 static inline bool pcie_cap_has_sltctl(const struct pci_dev *dev)
 {
 	return pcie_downstream_port(dev) &&
@@ -390,10 +395,11 @@ static bool pcie_capability_reg_implemented(struct pci_dev *dev, int pos)
 		return pcie_cap_has_rtctl(dev);
 	case PCI_EXP_DEVCAP2:
 	case PCI_EXP_DEVCTL2:
+		return pcie_cap_version(dev) > 1;
 	case PCI_EXP_LNKCAP2:
 	case PCI_EXP_LNKCTL2:
 	case PCI_EXP_LNKSTA2:
-		return pcie_cap_version(dev) > 1;
+		return pcie_cap_has_lnkctl2(dev);
 	default:
 		return false;
 	}
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index b1ebb7ab88051..9ed3b55500432 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -15,6 +15,7 @@ extern const unsigned char pcie_link_speed[];
 extern bool pci_early_dump;
 
 bool pcie_cap_has_lnkctl(const struct pci_dev *dev);
+bool pcie_cap_has_lnkctl2(const struct pci_dev *dev);
 bool pcie_cap_has_rtctl(const struct pci_dev *dev);
 
 /* Functions internal to the PCI core code */
-- 
GitLab


From 4c6b3af3906d0c59497d3bfb07760f3a082b4150 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:23 +0200
Subject: [PATCH 0931/4122] phy: qcom-qmp-usb: fix sc8280xp PCS_USB offset

The PCS_USB register block lives at an offset of 0x1000 from the PCS
region on SC8280XP so add the missing offset to avoid corrupting
unrelated registers on runtime suspend.

Note that the current binding is broken as it does not describe the
PCS_USB region and the PCS register size does not cover PCS_USB and the
regions in between. As Linux currently maps full pages, simply adding
the offset to driver works until the binding has been fixed.

Fixes: c0c7769cdae2 ("phy: qcom-qmp: Add SC8280XP USB3 UNI phy")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index cd167508f5281..2e1f5dd7e76db 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1682,6 +1682,7 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
+	.pcs_usb_offset		= 0x1000,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
-- 
GitLab


From 9fe6b4e87c225cfd8aad1be0e73be86611c35b01 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:24 +0200
Subject: [PATCH 0932/4122] phy: qcom-qmp-usb: sort device-id table

Sort the device-id table by compatible string to make it easier to find
and add new entries.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 26 ++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 2e1f5dd7e76db..52b4bee867db3 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2503,14 +2503,20 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id,
 
 static const struct of_device_id qmp_usb_of_match_table[] = {
 	{
+		.compatible = "qcom,ipq6018-qmp-usb3-phy",
+		.data = &ipq8074_usb3phy_cfg,
+	}, {
 		.compatible = "qcom,ipq8074-qmp-usb3-phy",
 		.data = &ipq8074_usb3phy_cfg,
 	}, {
 		.compatible = "qcom,msm8996-qmp-usb3-phy",
 		.data = &msm8996_usb3phy_cfg,
 	}, {
-		.compatible = "qcom,ipq6018-qmp-usb3-phy",
-		.data = &ipq8074_usb3phy_cfg,
+		.compatible = "qcom,msm8998-qmp-usb3-phy",
+		.data = &msm8998_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,qcm2290-qmp-usb3-phy",
+		.data = &qcm2290_usb3phy_cfg,
 	}, {
 		.compatible = "qcom,sc7180-qmp-usb3-phy",
 		.data = &sc7180_usb3phy_cfg,
@@ -2527,8 +2533,11 @@ static const struct of_device_id qmp_usb_of_match_table[] = {
 		.compatible = "qcom,sdm845-qmp-usb3-uni-phy",
 		.data = &qmp_v3_usb3_uniphy_cfg,
 	}, {
-		.compatible = "qcom,msm8998-qmp-usb3-phy",
-		.data = &msm8998_usb3phy_cfg,
+		.compatible = "qcom,sdx55-qmp-usb3-uni-phy",
+		.data = &sdx55_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sdx65-qmp-usb3-uni-phy",
+		.data = &sdx65_usb3_uniphy_cfg,
 	}, {
 		.compatible = "qcom,sm8150-qmp-usb3-phy",
 		.data = &sm8150_usb3phy_cfg,
@@ -2541,12 +2550,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sm8250-qmp-usb3-uni-phy",
 		.data = &sm8250_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sdx55-qmp-usb3-uni-phy",
-		.data = &sdx55_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sdx65-qmp-usb3-uni-phy",
-		.data = &sdx65_usb3_uniphy_cfg,
 	}, {
 		.compatible = "qcom,sm8350-qmp-usb3-phy",
 		.data = &sm8350_usb3phy_cfg,
@@ -2556,9 +2559,6 @@ static const struct of_device_id qmp_usb_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sm8450-qmp-usb3-phy",
 		.data = &sm8350_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,qcm2290-qmp-usb3-phy",
-		.data = &qcm2290_usb3phy_cfg,
 	},
 	{ },
 };
-- 
GitLab


From 95dd63b8988cd914c8b5e805e5599cda2cad161e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:25 +0200
Subject: [PATCH 0933/4122] phy: qcom-qmp-usb: move device-id table

Move the device-id table below probe() and next to the driver structure
to keep the driver callback functions grouped together.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 126 ++++++++++++------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 52b4bee867db3..ead136c7bd01e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2501,69 +2501,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct of_device_id qmp_usb_of_match_table[] = {
-	{
-		.compatible = "qcom,ipq6018-qmp-usb3-phy",
-		.data = &ipq8074_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,ipq8074-qmp-usb3-phy",
-		.data = &ipq8074_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,msm8996-qmp-usb3-phy",
-		.data = &msm8996_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,msm8998-qmp-usb3-phy",
-		.data = &msm8998_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,qcm2290-qmp-usb3-phy",
-		.data = &qcm2290_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sc7180-qmp-usb3-phy",
-		.data = &sc7180_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sc8180x-qmp-usb3-phy",
-		.data = &sm8150_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sc8280xp-qmp-usb3-uni-phy",
-		.data = &sc8280xp_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sdm845-qmp-usb3-phy",
-		.data = &qmp_v3_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sdm845-qmp-usb3-uni-phy",
-		.data = &qmp_v3_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sdx55-qmp-usb3-uni-phy",
-		.data = &sdx55_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sdx65-qmp-usb3-uni-phy",
-		.data = &sdx65_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sm8150-qmp-usb3-phy",
-		.data = &sm8150_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sm8150-qmp-usb3-uni-phy",
-		.data = &sm8150_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-usb3-phy",
-		.data = &sm8250_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-usb3-uni-phy",
-		.data = &sm8250_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sm8350-qmp-usb3-phy",
-		.data = &sm8350_usb3phy_cfg,
-	}, {
-		.compatible = "qcom,sm8350-qmp-usb3-uni-phy",
-		.data = &sm8350_usb3_uniphy_cfg,
-	}, {
-		.compatible = "qcom,sm8450-qmp-usb3-phy",
-		.data = &sm8350_usb3phy_cfg,
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table);
-
 static const struct dev_pm_ops qmp_usb_pm_ops = {
 	SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend,
 			   qmp_usb_runtime_resume, NULL)
@@ -2665,6 +2602,69 @@ err_node_put:
 	return ret;
 }
 
+static const struct of_device_id qmp_usb_of_match_table[] = {
+	{
+		.compatible = "qcom,ipq6018-qmp-usb3-phy",
+		.data = &ipq8074_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,ipq8074-qmp-usb3-phy",
+		.data = &ipq8074_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,msm8996-qmp-usb3-phy",
+		.data = &msm8996_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,msm8998-qmp-usb3-phy",
+		.data = &msm8998_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,qcm2290-qmp-usb3-phy",
+		.data = &qcm2290_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sc7180-qmp-usb3-phy",
+		.data = &sc7180_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sc8180x-qmp-usb3-phy",
+		.data = &sm8150_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sc8280xp-qmp-usb3-uni-phy",
+		.data = &sc8280xp_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-usb3-phy",
+		.data = &qmp_v3_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-usb3-uni-phy",
+		.data = &qmp_v3_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sdx55-qmp-usb3-uni-phy",
+		.data = &sdx55_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sdx65-qmp-usb3-uni-phy",
+		.data = &sdx65_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sm8150-qmp-usb3-phy",
+		.data = &sm8150_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sm8150-qmp-usb3-uni-phy",
+		.data = &sm8150_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-usb3-phy",
+		.data = &sm8250_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-usb3-uni-phy",
+		.data = &sm8250_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sm8350-qmp-usb3-phy",
+		.data = &sm8350_usb3phy_cfg,
+	}, {
+		.compatible = "qcom,sm8350-qmp-usb3-uni-phy",
+		.data = &sm8350_usb3_uniphy_cfg,
+	}, {
+		.compatible = "qcom,sm8450-qmp-usb3-phy",
+		.data = &sm8350_usb3phy_cfg,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, qmp_usb_of_match_table);
+
 static struct platform_driver qmp_usb_driver = {
 	.probe		= qmp_usb_probe,
 	.driver = {
-- 
GitLab


From 9c9beef111a763513545a0ac0c60220fea64b063 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:26 +0200
Subject: [PATCH 0934/4122] phy: qcom-qmp-usb: move pm ops

Move the PM ops structure next to the implementation to keep the driver
callbacks grouped.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index ead136c7bd01e..2a4535494d389 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2280,6 +2280,11 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev)
 	return 0;
 }
 
+static const struct dev_pm_ops qmp_usb_pm_ops = {
+	SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend,
+			   qmp_usb_runtime_resume, NULL)
+};
+
 static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
@@ -2501,11 +2506,6 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct dev_pm_ops qmp_usb_pm_ops = {
-	SET_RUNTIME_PM_OPS(qmp_usb_runtime_suspend,
-			   qmp_usb_runtime_resume, NULL)
-};
-
 static int qmp_usb_probe(struct platform_device *pdev)
 {
 	struct qcom_qmp *qmp;
-- 
GitLab


From 2a55ec4f0a048e0aa12022b52009d8d8667ee3d3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:27 +0200
Subject: [PATCH 0935/4122] phy: qcom-qmp-usb: merge driver data

The USB QMP PHY driver only manages a single PHY so merge the old
qcom_qmp and qmp_phy structures and drop the PHY array.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 239 ++++++++++--------------
 1 file changed, 96 insertions(+), 143 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 2a4535494d389..4a1c7ac3f7842 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1453,60 +1453,30 @@ struct qmp_phy_cfg {
 	unsigned int pcs_usb_offset;
 };
 
-/**
- * struct qmp_phy - per-lane phy descriptor
- *
- * @phy: generic phy
- * @cfg: phy specific configuration
- * @serdes: iomapped memory space for phy's serdes (i.e. PLL)
- * @tx: iomapped memory space for lane's tx
- * @rx: iomapped memory space for lane's rx
- * @pcs: iomapped memory space for lane's pcs
- * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs)
- * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs)
- * @pcs_misc: iomapped memory space for lane's pcs_misc
- * @pcs_usb: iomapped memory space for lane's pcs_usb
- * @pipe_clk: pipe clock
- * @qmp: QMP phy to which this lane belongs
- * @mode: current PHY mode
- */
-struct qmp_phy {
-	struct phy *phy;
+struct qmp_usb {
+	struct device *dev;
+
 	const struct qmp_phy_cfg *cfg;
+
 	void __iomem *serdes;
+	void __iomem *pcs;
+	void __iomem *pcs_misc;
+	void __iomem *pcs_usb;
 	void __iomem *tx;
 	void __iomem *rx;
-	void __iomem *pcs;
 	void __iomem *tx2;
 	void __iomem *rx2;
-	void __iomem *pcs_misc;
-	void __iomem *pcs_usb;
-	struct clk *pipe_clk;
-	struct qcom_qmp *qmp;
-	enum phy_mode mode;
-};
 
-/**
- * struct qcom_qmp - structure holding QMP phy block attributes
- *
- * @dev: device
- * @dp_com: iomapped memory space for phy's dp_com control block
- *
- * @clks: array of clocks required by phy
- * @resets: array of resets required by phy
- * @vregs: regulator supplies bulk data
- *
- * @phys: array of per-lane phy descriptors
- */
-struct qcom_qmp {
-	struct device *dev;
 	void __iomem *dp_com;
 
+	struct clk *pipe_clk;
 	struct clk_bulk_data *clks;
 	struct reset_control_bulk_data *resets;
 	struct regulator_bulk_data *vregs;
 
-	struct qmp_phy **phys;
+	enum phy_mode mode;
+
+	struct phy *phy;
 };
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
@@ -1976,10 +1946,10 @@ static void qmp_usb_configure(void __iomem *base,
 	qmp_usb_configure_lane(base, tbl, num, 0xff);
 }
 
-static int qmp_usb_serdes_init(struct qmp_phy *qphy)
+static int qmp_usb_serdes_init(struct qmp_usb *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *serdes = qphy->serdes;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *serdes = qmp->serdes;
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
 
@@ -1990,10 +1960,9 @@ static int qmp_usb_serdes_init(struct qmp_phy *qphy)
 
 static int qmp_usb_init(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs = qphy->pcs;
+	struct qmp_usb *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs = qmp->pcs;
 	void __iomem *dp_com = qmp->dp_com;
 	int ret;
 
@@ -2056,9 +2025,8 @@ err_disable_regulators:
 
 static int qmp_usb_exit(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_usb *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	reset_control_bulk_assert(cfg->num_resets, qmp->resets);
 
@@ -2071,19 +2039,18 @@ static int qmp_usb_exit(struct phy *phy)
 
 static int qmp_usb_power_on(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->tx;
-	void __iomem *rx = qphy->rx;
-	void __iomem *pcs = qphy->pcs;
+	struct qmp_usb *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *tx = qmp->tx;
+	void __iomem *rx = qmp->rx;
+	void __iomem *pcs = qmp->pcs;
 	void __iomem *status;
 	unsigned int val;
 	int ret;
 
-	qmp_usb_serdes_init(qphy);
+	qmp_usb_serdes_init(qmp);
 
-	ret = clk_prepare_enable(qphy->pipe_clk);
+	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
 		dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
 		return ret;
@@ -2093,12 +2060,12 @@ static int qmp_usb_power_on(struct phy *phy)
 	qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_usb_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+		qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
 	qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_usb_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+		qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
 	qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
@@ -2122,27 +2089,27 @@ static int qmp_usb_power_on(struct phy *phy)
 	return 0;
 
 err_disable_pipe_clk:
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	return ret;
 }
 
 static int qmp_usb_power_off(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_usb *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	/* PHY reset */
-	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+	qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL],
 			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
 
 	return 0;
@@ -2175,22 +2142,22 @@ static int qmp_usb_disable(struct phy *phy)
 
 static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_usb *qmp = phy_get_drvdata(phy);
 
-	qphy->mode = mode;
+	qmp->mode = mode;
 
 	return 0;
 }
 
-static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy)
+static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 	u32 intr_mask;
 
-	if (qphy->mode == PHY_MODE_USB_HOST_SS ||
-	    qphy->mode == PHY_MODE_USB_DEVICE_SS)
+	if (qmp->mode == PHY_MODE_USB_HOST_SS ||
+	    qmp->mode == PHY_MODE_USB_DEVICE_SS)
 		intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN;
 	else
 		intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL;
@@ -2211,11 +2178,11 @@ static void qmp_usb_enable_autonomous_mode(struct qmp_phy *qphy)
 		qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN);
 }
 
-static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy)
+static void qmp_usb_disable_autonomous_mode(struct qmp_usb *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 
 	/* Disable i/o clamp_n on resume for normal mode */
 	if (pcs_misc)
@@ -2231,20 +2198,19 @@ static void qmp_usb_disable_autonomous_mode(struct qmp_phy *qphy)
 
 static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->phys[0];
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode);
+	dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode);
 
-	if (!qphy->phy->init_count) {
+	if (!qmp->phy->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
 		return 0;
 	}
 
-	qmp_usb_enable_autonomous_mode(qphy);
+	qmp_usb_enable_autonomous_mode(qmp);
 
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 	clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
 
 	return 0;
@@ -2252,14 +2218,13 @@ static int __maybe_unused qmp_usb_runtime_suspend(struct device *dev)
 
 static int __maybe_unused qmp_usb_runtime_resume(struct device *dev)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->phys[0];
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	int ret = 0;
 
-	dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode);
+	dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode);
 
-	if (!qphy->phy->init_count) {
+	if (!qmp->phy->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
 		return 0;
 	}
@@ -2268,14 +2233,14 @@ static int __maybe_unused qmp_usb_runtime_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	ret = clk_prepare_enable(qphy->pipe_clk);
+	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
 		dev_err(dev, "pipe_clk enable failed, err=%d\n", ret);
 		clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
 		return ret;
 	}
 
-	qmp_usb_disable_autonomous_mode(qphy);
+	qmp_usb_disable_autonomous_mode(qmp);
 
 	return 0;
 }
@@ -2287,7 +2252,7 @@ static const struct dev_pm_ops qmp_usb_pm_ops = {
 
 static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_vregs;
 	int i;
 
@@ -2303,7 +2268,7 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 
 static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
 	int i;
 	int ret;
 
@@ -2324,7 +2289,7 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 
 static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_clks;
 	int i;
 
@@ -2361,7 +2326,7 @@ static void phy_clk_release_provider(void *res)
  *    clk  |   +-------+   |                   +-----+
  *         +---------------+
  */
-static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
+static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np)
 {
 	struct clk_fixed_rate *fixed;
 	struct clk_init_data init = { };
@@ -2420,13 +2385,11 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
 	return devm_of_iomap(dev, np, index, NULL);
 }
 
-static
-int qmp_usb_create(struct device *dev, struct device_node *np, int id,
+static int qmp_usb_create(struct device *dev, struct device_node *np,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_usb *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
-	struct qmp_phy *qphy;
 	bool exclusive = true;
 	int ret;
 
@@ -2439,81 +2402,75 @@ int qmp_usb_create(struct device *dev, struct device_node *np, int id,
 	if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy"))
 		exclusive = false;
 
-	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
-	if (!qphy)
-		return -ENOMEM;
-
-	qphy->cfg = cfg;
-	qphy->serdes = serdes;
+	qmp->cfg = cfg;
+	qmp->serdes = serdes;
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
 	 */
-	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	qmp->tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qmp->tx))
+		return PTR_ERR(qmp->tx);
 
-	qphy->rx = devm_of_iomap(dev, np, 1, NULL);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+	qmp->rx = devm_of_iomap(dev, np, 1, NULL);
+	if (IS_ERR(qmp->rx))
+		return PTR_ERR(qmp->rx);
 
-	qphy->pcs = qmp_usb_iomap(dev, np, 2, exclusive);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	qmp->pcs = qmp_usb_iomap(dev, np, 2, exclusive);
+	if (IS_ERR(qmp->pcs))
+		return PTR_ERR(qmp->pcs);
 
 	if (cfg->pcs_usb_offset)
-		qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset;
+		qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset;
 
 	if (cfg->lanes >= 2) {
-		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->tx2))
-			return PTR_ERR(qphy->tx2);
+		qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+		if (IS_ERR(qmp->tx2))
+			return PTR_ERR(qmp->tx2);
 
-		qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
-		if (IS_ERR(qphy->rx2))
-			return PTR_ERR(qphy->rx2);
+		qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+		if (IS_ERR(qmp->rx2))
+			return PTR_ERR(qmp->rx2);
 
-		qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
 	} else {
-		qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
 	}
 
-	if (IS_ERR(qphy->pcs_misc)) {
+	if (IS_ERR(qmp->pcs_misc)) {
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
-		qphy->pcs_misc = NULL;
+		qmp->pcs_misc = NULL;
 	}
 
-	qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
-	if (IS_ERR(qphy->pipe_clk)) {
-		return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk),
-				     "failed to get lane%d pipe clock\n", id);
+	qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
+	if (IS_ERR(qmp->pipe_clk)) {
+		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+				     "failed to get pipe clock\n");
 	}
 
 	generic_phy = devm_phy_create(dev, np, &qmp_usb_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create qphy %d\n", ret);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
 		return ret;
 	}
 
-	qphy->phy = generic_phy;
-	qphy->qmp = qmp;
-	qmp->phys[id] = qphy;
-	phy_set_drvdata(generic_phy, qphy);
+	qmp->phy = generic_phy;
+	phy_set_drvdata(generic_phy, qmp);
 
 	return 0;
 }
 
 static int qmp_usb_probe(struct platform_device *pdev)
 {
-	struct qcom_qmp *qmp;
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
+	struct qmp_usb *qmp;
 	int num, id;
 	int ret;
 
@@ -2555,10 +2512,6 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	if (num > 1)
 		return -EINVAL;
 
-	qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL);
-	if (!qmp->phys)
-		return -ENOMEM;
-
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
 	if (ret)
@@ -2572,7 +2525,7 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	id = 0;
 	for_each_available_child_of_node(dev->of_node, child) {
 		/* Create per-lane phy */
-		ret = qmp_usb_create(dev, child, id, serdes, cfg);
+		ret = qmp_usb_create(dev, child, serdes, cfg);
 		if (ret) {
 			dev_err(dev, "failed to create lane%d phy, %d\n",
 				id, ret);
-- 
GitLab


From 8ec02ba8493639c721fc63ed05c06891061ef9b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:28 +0200
Subject: [PATCH 0936/4122] phy: qcom-qmp-usb: clean up device-tree parsing

Since the QMP driver split there will be at most a single child node so
drop the obsolete iteration construct.

While at it, drop the verbose error logging that would have been
printed also on probe deferrals.

Note that there's no need to check if there are additional child nodes
(the kernel is not a devicetree validator), but let's return an error if
there are no child nodes at all for now.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 40 ++++++++-----------------
 1 file changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 4a1c7ac3f7842..05ceab23258a1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2471,7 +2471,6 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_usb *qmp;
-	int num, id;
 	int ret;
 
 	qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
@@ -2507,44 +2506,29 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	num = of_get_available_child_count(dev->of_node);
-	/* do we have a rogue child node ? */
-	if (num > 1)
+	child = of_get_next_available_child(dev->of_node, NULL);
+	if (!child)
 		return -EINVAL;
 
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
 	if (ret)
-		return ret;
+		goto err_node_put;
 	/*
 	 * Prevent runtime pm from being ON by default. Users can enable
 	 * it using power/control in sysfs.
 	 */
 	pm_runtime_forbid(dev);
 
-	id = 0;
-	for_each_available_child_of_node(dev->of_node, child) {
-		/* Create per-lane phy */
-		ret = qmp_usb_create(dev, child, serdes, cfg);
-		if (ret) {
-			dev_err(dev, "failed to create lane%d phy, %d\n",
-				id, ret);
-			goto err_node_put;
-		}
-
-		/*
-		 * Register the pipe clock provided by phy.
-		 * See function description to see details of this pipe clock.
-		 */
-		ret = phy_pipe_clk_register(qmp, child);
-		if (ret) {
-			dev_err(qmp->dev,
-				"failed to register pipe clock source\n");
-			goto err_node_put;
-		}
-
-		id++;
-	}
+	ret = qmp_usb_create(dev, child, serdes, cfg);
+	if (ret)
+		goto err_node_put;
+
+	ret = phy_pipe_clk_register(qmp, child);
+	if (ret)
+		goto err_node_put;
+
+	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
-- 
GitLab


From 413db06c05e729639e9b64cf7ab5d918b8182006 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:29 +0200
Subject: [PATCH 0937/4122] phy: qcom-qmp-usb: clean up probe initialisation

Stop abusing the driver data pointer and instead pass the driver state
structure directly to the initialisation helpers during probe.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++-------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 05ceab23258a1..d3c0b994b9395 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2250,9 +2250,10 @@ static const struct dev_pm_ops qmp_usb_pm_ops = {
 			   qmp_usb_runtime_resume, NULL)
 };
 
-static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_usb_vreg_init(struct qmp_usb *qmp)
 {
-	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_vregs;
 	int i;
 
@@ -2266,9 +2267,10 @@ static int qmp_usb_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 	return devm_regulator_bulk_get(dev, num, qmp->vregs);
 }
 
-static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_usb_reset_init(struct qmp_usb *qmp)
 {
-	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int i;
 	int ret;
 
@@ -2287,9 +2289,10 @@ static int qmp_usb_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 	return 0;
 }
 
-static int qmp_usb_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_usb_clk_init(struct qmp_usb *qmp)
 {
-	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_clks;
 	int i;
 
@@ -2385,10 +2388,10 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
 	return devm_of_iomap(dev, np, index, NULL);
 }
 
-static int qmp_usb_create(struct device *dev, struct device_node *np,
-			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np)
 {
-	struct qmp_usb *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	struct phy *generic_phy;
 	bool exclusive = true;
 	int ret;
@@ -2402,8 +2405,6 @@ static int qmp_usb_create(struct device *dev, struct device_node *np,
 	if (of_device_is_compatible(dev->of_node, "qcom,sm8350-qmp-usb3-uni-phy"))
 		exclusive = false;
 
-	qmp->cfg = cfg;
-	qmp->serdes = serdes;
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
@@ -2468,8 +2469,6 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
-	void __iomem *serdes;
-	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_usb *qmp;
 	int ret;
 
@@ -2478,31 +2477,30 @@ static int qmp_usb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	qmp->dev = dev;
-	dev_set_drvdata(dev, qmp);
 
-	cfg = of_device_get_match_data(dev);
-	if (!cfg)
+	qmp->cfg = of_device_get_match_data(dev);
+	if (!qmp->cfg)
 		return -EINVAL;
 
-	serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(serdes))
-		return PTR_ERR(serdes);
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
-	if (cfg->has_phy_dp_com_ctrl) {
+	if (qmp->cfg->has_phy_dp_com_ctrl) {
 		qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
 		if (IS_ERR(qmp->dp_com))
 			return PTR_ERR(qmp->dp_com);
 	}
 
-	ret = qmp_usb_clk_init(dev, cfg);
+	ret = qmp_usb_clk_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_usb_reset_init(dev, cfg);
+	ret = qmp_usb_reset_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_usb_vreg_init(dev, cfg);
+	ret = qmp_usb_vreg_init(qmp);
 	if (ret)
 		return ret;
 
@@ -2520,7 +2518,7 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = qmp_usb_create(dev, child, serdes, cfg);
+	ret = qmp_usb_create(qmp, child);
 	if (ret)
 		goto err_node_put;
 
-- 
GitLab


From 8fe2b2b745a123640ead94743cf28c380fec9b58 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:30 +0200
Subject: [PATCH 0938/4122] phy: qcom-qmp-usb: rename PHY ops structure

Rename the PHY operation structure so that it has a "phy_ops" suffix and
move it next to the implementation.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index d3c0b994b9395..0158399920b86 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2149,6 +2149,13 @@ static int qmp_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 	return 0;
 }
 
+static const struct phy_ops qmp_usb_phy_ops = {
+	.init		= qmp_usb_enable,
+	.exit		= qmp_usb_disable,
+	.set_mode	= qmp_usb_set_mode,
+	.owner		= THIS_MODULE,
+};
+
 static void qmp_usb_enable_autonomous_mode(struct qmp_usb *qmp)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -2366,13 +2373,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static const struct phy_ops qmp_usb_ops = {
-	.init		= qmp_usb_enable,
-	.exit		= qmp_usb_disable,
-	.set_mode	= qmp_usb_set_mode,
-	.owner		= THIS_MODULE,
-};
-
 static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
 					int index, bool exclusive)
 {
@@ -2451,7 +2451,7 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np)
 				     "failed to get pipe clock\n");
 	}
 
-	generic_phy = devm_phy_create(dev, np, &qmp_usb_ops);
+	generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
-- 
GitLab


From 876420fb7b98934a4f78d8976c7ff095a13c90b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:31 +0200
Subject: [PATCH 0939/4122] phy: qcom-qmp-usb: clean up PHY init

Clean up the PHY initialisation somewhat by programming both tx and rx
for the second lane after the first lane.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 0158399920b86..cf1e04e9daf7a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2058,14 +2058,12 @@ static int qmp_usb_power_on(struct phy *phy)
 
 	/* Tx, Rx, and PCS configurations */
 	qmp_usb_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
-
 	qmp_usb_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
-	if (cfg->lanes >= 2)
+	if (cfg->lanes >= 2) {
+		qmp_usb_configure_lane(qmp->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 		qmp_usb_configure_lane(qmp->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+	}
 
 	qmp_usb_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
-- 
GitLab


From 500e9d37fb9ead52498af1b035933c62a487efe8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:32 +0200
Subject: [PATCH 0940/4122] dt-bindings: phy: qcom,qmp-usb: rename current
 bindings

The current QMP USB PHY bindings are based on the original MSM8996
binding which provided multiple PHYs per IP block and these in turn were
described by child nodes.

Later QMP USB PHY blocks only provide a single PHY and the remnant child
node does not really reflect the hardware.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers (e.g. does not include the per lane PCS
registers).

In preparation for adding new bindings for SC8280XP which further
bindings can be based on, rename the current bindings after MSM8996 and
add a reference to the SC8280XP bindings.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 ...com,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb-phy.yaml => qcom,msm8996-qmp-usb3-phy.yaml} (97%)

diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
similarity index 97%
rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml
rename to Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
index 7acb4b7de7f94..58ac84de8eee5 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/phy/qcom,qmp-usb-phy.yaml#
+$id: http://devicetree.org/schemas/phy/qcom,msm8996-qmp-usb3-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm QMP PHY controller (USB)
+title: Qualcomm QMP PHY controller (USB, MSM8996)
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
@@ -13,6 +13,9 @@ description:
   QMP PHY controller supports physical layer functionality for a number of
   controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
 
+  Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see
+  qcom,sc8280xp-qmp-usb3-uni-phy.yaml.
+
 properties:
   compatible:
     enum:
-- 
GitLab


From e8e58e29a0c9310a917448d0c4a1857f0dbfd917 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:33 +0200
Subject: [PATCH 0941/4122] dt-bindings: phy: qcom,qmp-usb: fix sc8280xp
 binding

The current QMP USB PHY bindings are based on the original MSM8996 PCIe
PHY binding which provided multiple PHYs per IP block and these in turn
were described by child nodes.

The QMP USB PHY block only provide a single PHY and the remnant child
node does not really reflect the hardware.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers (e.g. does not include the per lane PCS
registers).

Note that PCS_USB region is also not described by the current bindings
despite being used by the driver and this has led to people increasing
the size of the PCS region in the devicetree so that it includes PCS_USB
registers even though other regions like TX and RX may lie in between.

Add a new binding for the QMP USB PHYs found on SC8280XP which further
bindings can be based on.

Note that this also fixes the SC8280XP "phy_phy" reset name.

Also note that the current binding is simply removed instead of being
deprecated as it was only recently merged and support for SC8280XP is
still under development. And, specifically, there is no support in
mainline for the multiport controller that uses these PHYs.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qcom,msm8996-qmp-usb3-phy.yaml        |  13 ---
 .../phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml   | 105 ++++++++++++++++++
 2 files changed, 105 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml

diff --git a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
index 58ac84de8eee5..0c6b3ba7346b9 100644
--- a/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,msm8996-qmp-usb3-phy.yaml
@@ -26,7 +26,6 @@ properties:
       - qcom,qcm2290-qmp-usb3-phy
       - qcom,sc7180-qmp-usb3-phy
       - qcom,sc8180x-qmp-usb3-phy
-      - qcom,sc8280xp-qmp-usb3-uni-phy
       - qcom,sdm845-qmp-usb3-phy
       - qcom,sdm845-qmp-usb3-uni-phy
       - qcom,sdx55-qmp-usb3-uni-phy
@@ -204,7 +203,6 @@ allOf:
         compatible:
           contains:
             enum:
-              - qcom,sc8280xp-qmp-usb3-uni-phy
               - qcom,sm8150-qmp-usb3-phy
               - qcom,sm8150-qmp-usb3-uni-phy
               - qcom,sm8250-qmp-usb3-uni-phy
@@ -271,16 +269,6 @@ allOf:
             - const: phy_phy
             - const: phy
 
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
-              - qcom,sc8280xp-qmp-usb3-uni-phy
-    then:
-      required:
-        - power-domains
-
   - if:
       properties:
         compatible:
@@ -352,7 +340,6 @@ allOf:
           contains:
             enum:
               - qcom,msm8996-qmp-usb3-phy
-              - qcom,sc8280xp-qmp-usb3-uni-phy
               - qcom,sm8250-qmp-usb3-uni-phy
               - qcom,sm8350-qmp-usb3-uni-phy
     then:
diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
new file mode 100644
index 0000000000000..ef080509747ae
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
@@ -0,0 +1,105 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QMP PHY controller (USB, SC8280XP)
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description:
+  The QMP PHY controller supports physical layer functionality for a number of
+  controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc8280xp-qmp-usb3-uni-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 5
+
+  clock-names:
+    items:
+      - const: aux
+      - const: ref_clk_src
+      - const: ref
+      - const: com_aux
+      - const: pipe
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 2
+
+  reset-names:
+    items:
+      - const: phy
+      - const: phy_phy
+
+  vdda-phy-supply: true
+
+  vdda-pll-supply: true
+
+  "#clock-cells":
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+  "#phy-cells":
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - reset-names
+  - vdda-phy-supply
+  - vdda-pll-supply
+  - "#clock-cells"
+  - clock-output-names
+  - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+    #include <dt-bindings/clock/qcom,rpmh.h>
+
+    phy@88ef000 {
+      compatible = "qcom,sc8280xp-qmp-usb3-uni-phy";
+      reg = <0x088ef000 0x2000>;
+
+      clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>,
+               <&rpmhcc RPMH_CXO_CLK>,
+               <&gcc GCC_USB3_MP0_CLKREF_CLK>,
+               <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>,
+               <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>;
+      clock-names = "aux", "ref_clk_src", "ref", "com_aux",
+                    "pipe";
+
+      power-domains = <&gcc USB30_MP_GDSC>;
+
+      resets = <&gcc GCC_USB3_UNIPHY_MP0_BCR>,
+               <&gcc GCC_USB3UNIPHY_PHY_MP0_BCR>;
+      reset-names = "phy", "phy_phy";
+
+      vdda-phy-supply = <&vreg_l3a>;
+      vdda-pll-supply = <&vreg_l5a>;
+
+      #clock-cells = <0>;
+      clock-output-names = "usb2_phy0_pipe_clk";
+
+      #phy-cells = <0>;
+    };
-- 
GitLab


From 183462e8c92cce5cbfabecc1719fb61c61b70833 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:34 +0200
Subject: [PATCH 0942/4122] phy: qcom-qmp-usb: restructure PHY creation

In preparation for supporting devicetree bindings which do not use a
child node, move the PHY creation to probe() proper and parse the serdes
and dp_com resources in what is now the legacy devicetree helper.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 46 ++++++++++++-------------
 1 file changed, 22 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index cf1e04e9daf7a..fa87e90a821eb 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -2386,13 +2386,22 @@ static void __iomem *qmp_usb_iomap(struct device *dev, struct device_node *np,
 	return devm_of_iomap(dev, np, index, NULL);
 }
 
-static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np)
+static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np)
 {
+	struct platform_device *pdev = to_platform_device(qmp->dev);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	struct device *dev = qmp->dev;
-	struct phy *generic_phy;
 	bool exclusive = true;
-	int ret;
+
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
+
+	if (cfg->has_phy_dp_com_ctrl) {
+		qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(qmp->dp_com))
+			return PTR_ERR(qmp->dp_com);
+	}
 
 	/*
 	 * FIXME: These bindings should be fixed to not rely on overlapping
@@ -2449,16 +2458,6 @@ static int qmp_usb_create(struct qmp_usb *qmp, struct device_node *np)
 				     "failed to get pipe clock\n");
 	}
 
-	generic_phy = devm_phy_create(dev, np, &qmp_usb_phy_ops);
-	if (IS_ERR(generic_phy)) {
-		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create PHY: %d\n", ret);
-		return ret;
-	}
-
-	qmp->phy = generic_phy;
-	phy_set_drvdata(generic_phy, qmp);
-
 	return 0;
 }
 
@@ -2480,16 +2479,6 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	if (!qmp->cfg)
 		return -EINVAL;
 
-	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(qmp->serdes))
-		return PTR_ERR(qmp->serdes);
-
-	if (qmp->cfg->has_phy_dp_com_ctrl) {
-		qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
-		if (IS_ERR(qmp->dp_com))
-			return PTR_ERR(qmp->dp_com);
-	}
-
 	ret = qmp_usb_clk_init(qmp);
 	if (ret)
 		return ret;
@@ -2516,7 +2505,7 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = qmp_usb_create(qmp, child);
+	ret = qmp_usb_parse_dt_legacy(qmp, child);
 	if (ret)
 		goto err_node_put;
 
@@ -2524,6 +2513,15 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_node_put;
 
+	qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops);
+	if (IS_ERR(qmp->phy)) {
+		ret = PTR_ERR(qmp->phy);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
+		goto err_node_put;
+	}
+
+	phy_set_drvdata(qmp->phy, qmp);
+
 	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-- 
GitLab


From c0a6c25283672facaa57cb3daad71c6586736312 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 28 Oct 2022 18:04:35 +0200
Subject: [PATCH 0943/4122] phy: qcom-qmp-usb: add support for updated sc8280xp
 binding

Add support for the new SC8280XP binding.

Note that the binding does not try to describe every register subregion
and instead the driver holds the corresponding offsets. This includes
the PCS_USB region which was initially overlooked.

Note that the driver will no longer accept the old binding due to the
fixed "phy_phy" reset name.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221028160435.26948-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 82 ++++++++++++++++++++-----
 1 file changed, 67 insertions(+), 15 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index fa87e90a821eb..55029ea63f731 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1414,10 +1414,20 @@ static const struct qmp_phy_init_tbl sc8280xp_usb3_uniphy_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x21),
 };
 
+struct qmp_usb_offsets {
+	u16 serdes;
+	u16 pcs;
+	u16 pcs_usb;
+	u16 tx;
+	u16 rx;
+};
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
 	int lanes;
 
+	const struct qmp_usb_offsets *offsets;
+
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_init_tbl *serdes_tbl;
 	int serdes_tbl_num;
@@ -1548,6 +1558,14 @@ static const char * const qmp_phy_vreg_l[] = {
 	"vdda-phy", "vdda-pll",
 };
 
+static const struct qmp_usb_offsets qmp_usb_offsets_v5 = {
+	.serdes		= 0,
+	.pcs		= 0x0200,
+	.pcs_usb	= 0x1200,
+	.tx		= 0x0e00,
+	.rx		= 0x1000,
+};
+
 static const struct qmp_phy_cfg ipq8074_usb3phy_cfg = {
 	.lanes			= 1,
 
@@ -1637,6 +1655,8 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 	.lanes			= 1,
 
+	.offsets		= &qmp_usb_offsets_v5,
+
 	.serdes_tbl		= sc8280xp_usb3_uniphy_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sc8280xp_usb3_uniphy_serdes_tbl),
 	.tx_tbl			= sc8280xp_usb3_uniphy_tx_tbl,
@@ -1647,12 +1667,11 @@ static const struct qmp_phy_cfg sc8280xp_usb3_uniphy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sc8280xp_usb3_uniphy_pcs_tbl),
 	.clk_list		= qmp_v4_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.reset_list		= qcm2290_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(qcm2290_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
-	.pcs_usb_offset		= 0x1000,
 };
 
 static const struct qmp_phy_cfg qmp_v3_usb3_uniphy_cfg = {
@@ -2461,11 +2480,41 @@ static int qmp_usb_parse_dt_legacy(struct qmp_usb *qmp, struct device_node *np)
 	return 0;
 }
 
+static int qmp_usb_parse_dt(struct qmp_usb *qmp)
+{
+	struct platform_device *pdev = to_platform_device(qmp->dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	const struct qmp_usb_offsets *offs = cfg->offsets;
+	struct device *dev = qmp->dev;
+	void __iomem *base;
+
+	if (!offs)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	qmp->serdes = base + offs->serdes;
+	qmp->pcs = base + offs->pcs;
+	qmp->pcs_usb = base + offs->pcs_usb;
+	qmp->tx = base + offs->tx;
+	qmp->rx = base + offs->rx;
+
+	qmp->pipe_clk = devm_clk_get(dev, "pipe");
+	if (IS_ERR(qmp->pipe_clk)) {
+		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+				     "failed to get pipe clock\n");
+	}
+
+	return 0;
+}
+
 static int qmp_usb_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *child;
 	struct phy_provider *phy_provider;
+	struct device_node *np;
 	struct qmp_usb *qmp;
 	int ret;
 
@@ -2491,9 +2540,16 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	child = of_get_next_available_child(dev->of_node, NULL);
-	if (!child)
-		return -EINVAL;
+	/* Check for legacy binding with child node. */
+	np = of_get_next_available_child(dev->of_node, NULL);
+	if (np) {
+		ret = qmp_usb_parse_dt_legacy(qmp, np);
+	} else {
+		np = of_node_get(dev->of_node);
+		ret = qmp_usb_parse_dt(qmp);
+	}
+	if (ret)
+		goto err_node_put;
 
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
@@ -2505,15 +2561,11 @@ static int qmp_usb_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = qmp_usb_parse_dt_legacy(qmp, child);
-	if (ret)
-		goto err_node_put;
-
-	ret = phy_pipe_clk_register(qmp, child);
+	ret = phy_pipe_clk_register(qmp, np);
 	if (ret)
 		goto err_node_put;
 
-	qmp->phy = devm_phy_create(dev, child, &qmp_usb_phy_ops);
+	qmp->phy = devm_phy_create(dev, np, &qmp_usb_phy_ops);
 	if (IS_ERR(qmp->phy)) {
 		ret = PTR_ERR(qmp->phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
@@ -2522,14 +2574,14 @@ static int qmp_usb_probe(struct platform_device *pdev)
 
 	phy_set_drvdata(qmp->phy, qmp);
 
-	of_node_put(child);
+	of_node_put(np);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
 	return PTR_ERR_OR_ZERO(phy_provider);
 
 err_node_put:
-	of_node_put(child);
+	of_node_put(np);
 	return ret;
 }
 
-- 
GitLab


From 32fb07f35675c4c3311ae370471ee1ae6cc3e694 Mon Sep 17 00:00:00 2001
From: Justin Chen <justinpopo6@gmail.com>
Date: Wed, 5 Oct 2022 14:30:13 -0700
Subject: [PATCH 0944/4122] phy: usb: Improve port mode selection

Split port modes into two different variables. Supported port modes
is what the hardware supports. While port mode is how the hardware
is currently configured and can be dynamically changed through the
sysfs. We initialize all supported port modes on init even though
the port mode may not be selected because we cannot guarantee the
downstream interface from the phy will be active or not.

This also fixes an issue where port modes selected via sysfs were
not being saved through suspend/resume.

Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-2-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 12 ++++++------
 drivers/phy/broadcom/phy-brcm-usb-init.c          | 10 +++++-----
 drivers/phy/broadcom/phy-brcm-usb-init.h          | 10 +++++-----
 drivers/phy/broadcom/phy-brcm-usb.c               | 14 +++++++++-----
 4 files changed, 25 insertions(+), 21 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index d2524b70ea161..430a8ae0cd24b 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -195,10 +195,10 @@ static void usb_init_common(struct brcm_usb_init_params *params)
 	if (USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE)) {
 		reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 		reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE);
-		reg |= params->mode;
+		reg |= params->port_mode;
 		brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 	}
-	switch (params->mode) {
+	switch (params->supported_port_modes) {
 	case USB_CTLR_MODE_HOST:
 		USB_CTRL_UNSET(ctrl, USB_PM, BDC_SOFT_RESETB);
 		break;
@@ -276,7 +276,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params)
 	/* Set the PHY_MODE */
 	reg = brcm_usb_readl(usb_phy + USB_PHY_UTMI_CTL_1);
 	reg &= ~USB_PHY_UTMI_CTL_1_PHY_MODE_MASK;
-	reg |= params->mode << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT;
+	reg |= params->supported_port_modes << USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT;
 	brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1);
 
 	usb_init_common(params);
@@ -286,7 +286,7 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params)
 	 * the default "Read Transaction Size" of 6 (1024 bytes).
 	 * Set it to 4 (256 bytes).
 	 */
-	if ((params->mode != USB_CTLR_MODE_HOST) && bdc_ec) {
+	if ((params->supported_port_modes != USB_CTLR_MODE_HOST) && bdc_ec) {
 		reg = brcm_usb_readl(bdc_ec + BDC_EC_AXIRDA);
 		reg &= ~BDC_EC_AXIRDA_RTS_MASK;
 		reg |= (0x4 << BDC_EC_AXIRDA_RTS_SHIFT);
@@ -385,7 +385,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params)
 	return reg;
 }
 
-static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode)
+static void usb_set_dual_select(struct brcm_usb_init_params *params)
 {
 	void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
 	u32 reg;
@@ -394,7 +394,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode)
 
 	reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 	reg &= ~USB_CTRL_MASK(USB_DEVICE_CTL1, PORT_MODE);
-	reg |= mode;
+	reg |= params->port_mode;
 	brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 }
 
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c
index dddcbd3cd5f3d..a7f8b3d3264d5 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.c
@@ -876,11 +876,11 @@ static void usb_init_common(struct brcm_usb_init_params *params)
 		reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 		reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1,
 					PORT_MODE);
-		reg |= params->mode;
+		reg |= params->port_mode;
 		brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 	}
 	if (USB_CTRL_MASK_FAMILY(params, USB_PM, BDC_SOFT_RESETB)) {
-		switch (params->mode) {
+		switch (params->supported_port_modes) {
 		case USB_CTLR_MODE_HOST:
 			USB_CTRL_UNSET_FAMILY(params, USB_PM, BDC_SOFT_RESETB);
 			break;
@@ -891,7 +891,7 @@ static void usb_init_common(struct brcm_usb_init_params *params)
 		}
 	}
 	if (USB_CTRL_MASK_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE)) {
-		if (params->mode == USB_CTLR_MODE_TYPEC_PD)
+		if (params->supported_port_modes == USB_CTLR_MODE_TYPEC_PD)
 			USB_CTRL_SET_FAMILY(params, SETUP, CC_DRD_MODE_ENABLE);
 		else
 			USB_CTRL_UNSET_FAMILY(params, SETUP,
@@ -1000,7 +1000,7 @@ static int usb_get_dual_select(struct brcm_usb_init_params *params)
 	return reg;
 }
 
-static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode)
+static void usb_set_dual_select(struct brcm_usb_init_params *params)
 {
 	void __iomem *ctrl = params->regs[BRCM_REGS_CTRL];
 	u32 reg;
@@ -1011,7 +1011,7 @@ static void usb_set_dual_select(struct brcm_usb_init_params *params, int mode)
 		reg = brcm_usb_readl(USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 		reg &= ~USB_CTRL_MASK_FAMILY(params, USB_DEVICE_CTL1,
 					PORT_MODE);
-		reg |= mode;
+		reg |= params->port_mode;
 		brcm_usb_writel(reg, USB_CTRL_REG(ctrl, USB_DEVICE_CTL1));
 	}
 }
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h
index 1ccb5ddab865c..bedf2b8e2f196 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.h
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.h
@@ -45,14 +45,15 @@ struct brcm_usb_init_ops {
 	void (*uninit_eohci)(struct brcm_usb_init_params *params);
 	void (*uninit_xhci)(struct brcm_usb_init_params *params);
 	int  (*get_dual_select)(struct brcm_usb_init_params *params);
-	void (*set_dual_select)(struct brcm_usb_init_params *params, int mode);
+	void (*set_dual_select)(struct brcm_usb_init_params *params);
 };
 
 struct  brcm_usb_init_params {
 	void __iomem *regs[BRCM_REGS_MAX];
 	int ioc;
 	int ipp;
-	int mode;
+	int supported_port_modes;
+	int port_mode;
 	u32 family_id;
 	u32 product_id;
 	int selected_family;
@@ -153,11 +154,10 @@ static inline int brcm_usb_get_dual_select(struct brcm_usb_init_params *ini)
 	return 0;
 }
 
-static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini,
-	int mode)
+static inline void brcm_usb_set_dual_select(struct brcm_usb_init_params *ini)
 {
 	if (ini->ops->set_dual_select)
-		ini->ops->set_dual_select(ini, mode);
+		ini->ops->set_dual_select(ini);
 }
 
 #endif /* _USB_BRCM_COMMON_INIT_H */
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index 2cb3779fcdf82..99d4deabfd976 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -233,7 +233,7 @@ static ssize_t dr_mode_show(struct device *dev,
 	return sprintf(buf, "%s\n",
 		value_to_name(&brcm_dr_mode_to_name[0],
 			      ARRAY_SIZE(brcm_dr_mode_to_name),
-			      priv->ini.mode));
+			      priv->ini.supported_port_modes));
 }
 static DEVICE_ATTR_RO(dr_mode);
 
@@ -249,7 +249,8 @@ static ssize_t dual_select_store(struct device *dev,
 	res = name_to_value(&brcm_dual_mode_to_name[0],
 			    ARRAY_SIZE(brcm_dual_mode_to_name), buf, &value);
 	if (!res) {
-		brcm_usb_set_dual_select(&priv->ini, value);
+		priv->ini.port_mode = value;
+		brcm_usb_set_dual_select(&priv->ini);
 		res = len;
 	}
 	mutex_unlock(&sysfs_lock);
@@ -495,13 +496,16 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
 	of_property_read_u32(dn, "brcm,ipp", &priv->ini.ipp);
 	of_property_read_u32(dn, "brcm,ioc", &priv->ini.ioc);
 
-	priv->ini.mode = USB_CTLR_MODE_HOST;
+	priv->ini.supported_port_modes = USB_CTLR_MODE_HOST;
 	err = of_property_read_string(dn, "dr_mode", &mode);
 	if (err == 0) {
 		name_to_value(&brcm_dr_mode_to_name[0],
 			      ARRAY_SIZE(brcm_dr_mode_to_name),
-			mode, &priv->ini.mode);
+			mode, &priv->ini.supported_port_modes);
 	}
+	/* Default port_mode to supported port_modes */
+	priv->ini.port_mode = priv->ini.supported_port_modes;
+
 	if (of_property_read_bool(dn, "brcm,has-xhci"))
 		priv->has_xhci = true;
 	if (of_property_read_bool(dn, "brcm,has-eohci"))
@@ -539,7 +543,7 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
 	 * Create sysfs entries for mode.
 	 * Remove "dual_select" attribute if not in dual mode
 	 */
-	if (priv->ini.mode != USB_CTLR_MODE_DRD)
+	if (priv->ini.supported_port_modes != USB_CTLR_MODE_DRD)
 		brcm_usb_phy_attrs[1] = NULL;
 	err = sysfs_create_group(&dev->kobj, &brcm_usb_phy_group);
 	if (err)
-- 
GitLab


From 19526717f768bf2f89ca01bd2a595728ebe57540 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 2 Nov 2022 22:31:19 +0100
Subject: [PATCH 0945/4122] objtool: Optimize elf_dirty_reloc_sym()

When moving a symbol in the symtab its index changes and any reloc
referring that symtol-table-index will need to be rewritten too.

In order to facilitate this, objtool simply marks the whole reloc
section 'changed' which will cause the whole section to be
re-generated.

However, finding the relocs that use any given symbol is implemented
rather crudely -- a fully iteration of all sections and their relocs.
Given that some builds have over 20k sections (kallsyms etc..)
iterating all that for *each* symbol moved takes a bit of time.

Instead have each symbol keep a list of relocs that reference it.

This *vastly* improves build times for certain configs.

Reported-by: Borislav Petkov <bp@alien8.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/Y2LlRA7x+8UsE1xf@hirez.programming.kicks-ass.net
---
 tools/objtool/elf.c                 | 27 ++++++++++-----------------
 tools/objtool/include/objtool/elf.h |  2 ++
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 3d636d12d679f..8cd7f018002c5 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -356,6 +356,7 @@ static void elf_add_symbol(struct elf *elf, struct symbol *sym)
 	struct rb_node *pnode;
 	struct symbol *iter;
 
+	INIT_LIST_HEAD(&sym->reloc_list);
 	INIT_LIST_HEAD(&sym->pv_target);
 	sym->alias = sym;
 
@@ -557,6 +558,7 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
 	reloc->sym = sym;
 	reloc->addend = addend;
 
+	list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
 	list_add_tail(&reloc->list, &sec->reloc->reloc_list);
 	elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
@@ -573,21 +575,10 @@ int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
  */
 static void elf_dirty_reloc_sym(struct elf *elf, struct symbol *sym)
 {
-	struct section *sec;
-
-	list_for_each_entry(sec, &elf->sections, list) {
-		struct reloc *reloc;
-
-		if (sec->changed)
-			continue;
+	struct reloc *reloc;
 
-		list_for_each_entry(reloc, &sec->reloc_list, list) {
-			if (reloc->sym == sym) {
-				sec->changed = true;
-				break;
-			}
-		}
-	}
+	list_for_each_entry(reloc, &sym->reloc_list, sym_reloc_entry)
+		reloc->sec->changed = true;
 }
 
 /*
@@ -902,11 +893,12 @@ static int read_rela_reloc(struct section *sec, int i, struct reloc *reloc, unsi
 
 static int read_relocs(struct elf *elf)
 {
+	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
 	struct section *sec;
 	struct reloc *reloc;
-	int i;
 	unsigned int symndx;
-	unsigned long nr_reloc, max_reloc = 0, tot_reloc = 0;
+	struct symbol *sym;
+	int i;
 
 	if (!elf_alloc_hash(reloc, elf->text_size / 16))
 		return -1;
@@ -947,13 +939,14 @@ static int read_relocs(struct elf *elf)
 
 			reloc->sec = sec;
 			reloc->idx = i;
-			reloc->sym = find_symbol_by_index(elf, symndx);
+			reloc->sym = sym = find_symbol_by_index(elf, symndx);
 			if (!reloc->sym) {
 				WARN("can't find reloc entry symbol %d for %s",
 				     symndx, sec->name);
 				return -1;
 			}
 
+			list_add_tail(&reloc->sym_reloc_entry, &sym->reloc_list);
 			list_add_tail(&reloc->list, &sec->reloc_list);
 			elf_hash_add(reloc, &reloc->hash, reloc_hash(reloc));
 
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index b6974e3173aa5..bca719b2104b8 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -62,6 +62,7 @@ struct symbol {
 	u8 fentry            : 1;
 	u8 profiling_func    : 1;
 	struct list_head pv_target;
+	struct list_head reloc_list;
 };
 
 struct reloc {
@@ -73,6 +74,7 @@ struct reloc {
 	};
 	struct section *sec;
 	struct symbol *sym;
+	struct list_head sym_reloc_entry;
 	unsigned long offset;
 	unsigned int type;
 	s64 addend;
-- 
GitLab


From 023f2340f053537cce170c31c430b0886c6f07ca Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Nov 2022 20:57:51 +0100
Subject: [PATCH 0946/4122] objtool: Fix weak hole vs prefix symbol

Boris (and the robot) reported that objtool grew a new complaint about
unreachable instructions. Upon inspection it was immediately clear
the __weak zombie instructions struck again.

For the unweary, the linker will simply remove the symbol for
overriden __weak symbols but leave the instructions in place, creating
unreachable instructions -- and objtool likes to report these.

Commit 4adb23686795 ("objtool: Ignore extra-symbol code") was supposed
to have dealt with that, but the new commit 9f2899fe36a6 ("objtool:
Add option to generate prefix symbols") subtly broke that logic by
created unvisited symbols.

Fixes: 9f2899fe36a6 ("objtool: Add option to generate prefix symbols")
Reported-by: Borislav Petkov <bp@alien8.de>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 tools/objtool/check.c | 22 +++++++++++++++++++++-
 1 file changed, 21 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 55066c4935702..4f1a7384426b4 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4053,8 +4053,28 @@ static int add_prefix_symbol(struct objtool_file *file, struct symbol *func,
 
 		offset = func->offset - prev->offset;
 		if (offset >= opts.prefix) {
-			if (offset == opts.prefix)
+			if (offset == opts.prefix) {
+				/*
+				 * Since the sec->symbol_list is ordered by
+				 * offset (see elf_add_symbol()) the added
+				 * symbol will not be seen by the iteration in
+				 * validate_section().
+				 *
+				 * Hence the lack of list_for_each_entry_safe()
+				 * there.
+				 *
+				 * The direct concequence is that prefix symbols
+				 * don't get visited (because pointless), except
+				 * for the logic in ignore_unreachable_insn()
+				 * that needs the terminating insn to be visited
+				 * otherwise it will report the hole.
+				 *
+				 * Hence mark the first instruction of the
+				 * prefix symbol as visisted.
+				 */
+				prev->visited |= VISITED_BRANCH;
 				elf_create_prefix_symbol(file->elf, func, opts.prefix);
+			}
 			break;
 		}
 		insn = prev;
-- 
GitLab


From b32fd8a60f5d855758208c2b5b49cba8087f03c4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 3 Nov 2022 21:17:03 +0100
Subject: [PATCH 0947/4122] x86,pm: Force out-of-line memcpy()

GCC fancies inlining memcpy(), and because it cannot prove the
destination is page-aligned (it is) it ends up generating atrocious
code like:

 19e:   48 8b 15 00 00 00 00    mov    0x0(%rip),%rdx        # 1a5 <relocate_restore_code+0x25> 1a1: R_X86_64_PC32      core_restore_code-0x4
 1a5:   48 8d 78 08             lea    0x8(%rax),%rdi
 1a9:   48 89 c1                mov    %rax,%rcx
 1ac:   48 c7 c6 00 00 00 00    mov    $0x0,%rsi        1af: R_X86_64_32S       core_restore_code
 1b3:   48 83 e7 f8             and    $0xfffffffffffffff8,%rdi
 1b7:   48 29 f9                sub    %rdi,%rcx
 1ba:   48 89 10                mov    %rdx,(%rax)
 1bd:   48 8b 15 00 00 00 00    mov    0x0(%rip),%rdx        # 1c4 <relocate_restore_code+0x44> 1c0: R_X86_64_PC32      core_restore_code+0xff4
 1c4:   48 29 ce                sub    %rcx,%rsi
 1c7:   81 c1 00 10 00 00       add    $0x1000,%ecx
 1cd:   48 89 90 f8 0f 00 00    mov    %rdx,0xff8(%rax)
 1d4:   c1 e9 03                shr    $0x3,%ecx
 1d7:   f3 48 a5                rep movsq %ds:(%rsi),%es:(%rdi)

Notably the alignment code generates a text reference to
code_restore_code+0xff8, for which objtool raises the objection:

  vmlinux.o: warning: objtool: relocate_restore_code+0x3d: relocation to !ENDBR: next_arg+0x18

Applying some __assume_aligned(PAGE_SIZE) improve code-gen to:

 19e:   48 89 c7                mov    %rax,%rdi
 1a1:   48 c7 c6 00 00 00 00    mov    $0x0,%rsi        1a4: R_X86_64_32S       core_restore_code
 1a8:   b9 00 02 00 00          mov    $0x200,%ecx
 1ad:   f3 48 a5                rep movsq %ds:(%rsi),%es:(%rdi)

And resolve the problem, however, none of this is important code and
a much simpler solution still is to force a memcpy() call:

 1a1:   ba 00 10 00 00          mov    $0x1000,%edx
 1a6:   48 c7 c6 00 00 00 00    mov    $0x0,%rsi        1a9: R_X86_64_32S       core_restore_code
 1ad:   e8 00 00 00 00          call   1b2 <relocate_restore_code+0x32> 1ae: R_X86_64_PLT32     __memcpy-0x4

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/power/hibernate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index e94e0050a583a..6f955eb1e1631 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -159,7 +159,7 @@ int relocate_restore_code(void)
 	if (!relocated_restore_code)
 		return -ENOMEM;
 
-	memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
+	__memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
 
 	/* Make the page containing the relocated code executable */
 	pgd = (pgd_t *)__va(read_cr3_pa()) +
-- 
GitLab


From 4fd5f70ce14da230c6a29648c3d51a48ee0b4bfd Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 1 Nov 2022 10:25:07 -0700
Subject: [PATCH 0948/4122] x86/Kconfig: Enable kernel IBT by default

The kernel IBT defense strongly mitigates the common "first step" of ROP
attacks, by eliminating arbitrary stack pivots (that appear either at
the end of a function or in immediate values), which cannot be reached
if indirect calls must be to marked function entry addresses. IBT is
also required to be enabled to gain the FineIBT feature when built with
Kernel Control Flow Integrity.

Additionally, given that this feature is runtime enabled via CPU ID,
it clearly should be built in by default; it will only be enabled if the
CPU supports it. The build takes 2 seconds longer, which seems a small
price to pay for gaining this coverage by default.

Suggested-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221101172503.gonna.094-kees@kernel.org
---
 arch/x86/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 479ee63898f57..aaf1f0f461617 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1856,7 +1856,7 @@ config CC_HAS_IBT
 
 config X86_KERNEL_IBT
 	prompt "Indirect Branch Tracking"
-	bool
+	def_bool y
 	depends on X86_64 && CC_HAS_IBT && HAVE_OBJTOOL
 	# https://github.com/llvm/llvm-project/commit/9d7001eba9c4cb311e03cd8cdc231f9e579f2d0f
 	depends on !LD_IS_LLD || LLD_VERSION >= 140000
-- 
GitLab


From c7d47d51663c2cbb9f07ac827f9dffa98d6cab21 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 2 Nov 2022 14:44:59 -0400
Subject: [PATCH 0949/4122] dt-bindings: power: supply: define
 monitored-battery in common place

Define the type of monitored-battery in power-supply.yaml common schema.
Reference the schema where applicable to enforce the above in bindings
which have monitored-battery property.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/devicetree/bindings/mfd/ene-kb930.yaml        | 6 +++---
 Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml   | 2 ++
 Documentation/devicetree/bindings/power/supply/bq27xxx.yaml | 2 --
 .../devicetree/bindings/power/supply/ingenic,battery.yaml   | 4 ++--
 .../devicetree/bindings/power/supply/power-supply.yaml      | 6 ++++++
 .../devicetree/bindings/power/supply/rohm,bd99954.yaml      | 1 +
 .../devicetree/bindings/power/supply/sc2731-charger.yaml    | 1 -
 7 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
index 06ed9ec8f4bbc..7c0a42390f189 100644
--- a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
+++ b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
@@ -13,6 +13,8 @@ description: |
 maintainers:
   - Dmitry Osipenko <digetx@gmail.com>
 
+$ref: /schemas/power/supply/power-supply.yaml
+
 properties:
   compatible:
     items:
@@ -22,15 +24,13 @@ properties:
   reg:
     maxItems: 1
 
-  monitored-battery: true
-  power-supplies: true
   system-power-controller: true
 
 required:
   - compatible
   - reg
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml
index 935e17099213f..269fb85b20278 100644
--- a/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml
+++ b/Documentation/devicetree/bindings/mfd/rockchip,rk817.yaml
@@ -124,6 +124,8 @@ properties:
       The child node for the charger to hold additional properties. If a
       battery is not in use, this node can be omitted.
     type: object
+    $ref: /schemas/power/supply/power-supply.yaml
+
     properties:
       monitored-battery:
         description: |
diff --git a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
index 65fc6049efc14..347d4433adc5f 100644
--- a/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq27xxx.yaml
@@ -60,13 +60,11 @@ properties:
 
   monitored-battery:
     description: |
-       phandle of battery characteristics node.
        The fuel gauge uses the following battery properties:
        - energy-full-design-microwatt-hours
        - charge-full-design-microamp-hours
        - voltage-min-design-microvolt
        Both or neither of the *-full-design-*-hours properties must be set.
-       See Documentation/devicetree/bindings/power/supply/battery.yaml
 
   power-supplies: true
 
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
index 46527038bf227..42fcfc0269726 100644
--- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
@@ -10,6 +10,8 @@ title: Ingenic JZ47xx battery bindings
 maintainers:
   - Artur Rojek <contact@artur-rojek.eu>
 
+$ref: power-supply.yaml#
+
 properties:
   compatible:
     oneOf:
@@ -28,8 +30,6 @@ properties:
 
   monitored-battery:
     description: >
-      phandle to a "simple-battery" compatible node.
-
       This property must be a phandle to a node using the format described
       in battery.yaml, with the following properties being required:
       - voltage-min-design-microvolt: drained battery voltage,
diff --git a/Documentation/devicetree/bindings/power/supply/power-supply.yaml b/Documentation/devicetree/bindings/power/supply/power-supply.yaml
index 2f672e6e8d72c..4e54c937973ee 100644
--- a/Documentation/devicetree/bindings/power/supply/power-supply.yaml
+++ b/Documentation/devicetree/bindings/power/supply/power-supply.yaml
@@ -18,4 +18,10 @@ properties:
       This property is added to a supply in order to list the devices which
       supply it power, referenced by their phandles.
 
+  monitored-battery:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      The battery (with "simple-battery" compatible) being monitored by this
+      power supply.
+
 additionalProperties: true
diff --git a/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml b/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml
index 24b06957b4ca5..14d9b42eda27d 100644
--- a/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml
+++ b/Documentation/devicetree/bindings/power/supply/rohm,bd99954.yaml
@@ -18,6 +18,7 @@ description: |
   provides a Dual-source Battery Charger, two port BC1.2 detection and a
   Battery Monitor.
 
+$ref: power-supply.yaml#
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
index eeb043f9bb4f2..735f7d372ae17 100644
--- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
@@ -28,7 +28,6 @@ properties:
       The charger uses the following battery properties
       - charge-term-current-microamp: current for charge termination phase.
       - constant-charge-voltage-max-microvolt: maximum constant input voltage.
-      See Documentation/devicetree/bindings/power/supply/battery.yaml
 
 additionalProperties: false
 
-- 
GitLab


From 8c9e038a2a36af49526cd509dbd8a1b34284cac6 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 2 Nov 2022 14:45:00 -0400
Subject: [PATCH 0950/4122] dt-bindings: power: supply: bq25890: allow
 power-supply fields

The BQ25890 schema references common power-supply.yaml, so allow all its
properties to fix warnings like:

  arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: charger@6a: 'monitored-battery', 'power-supplies', ... do not match any of the regexes: 'pinctrl-[0-9]+'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 Documentation/devicetree/bindings/power/supply/bq25890.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
index 204c0147188f2..fd01ff10b4e00 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
@@ -93,7 +93,7 @@ required:
   - ti,boost-voltage
   - ti,boost-max-current
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From 70547f34deedcb039c8f92c603b70bc8d8992432 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 2 Nov 2022 14:45:01 -0400
Subject: [PATCH 0951/4122] dt-bindings: power: supply: maxim,max17042: allow
 power-supplies

MAX17042 Fuel Gauge uses power-supplies supplies:

  arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: fuel-gauge@36: 'power-supplies' does not match any of the regexes: 'pinctrl-[0-9]+'

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../devicetree/bindings/power/supply/maxim,max17042.yaml        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
index aff5d0792e0fe..64a0edb7bc473 100644
--- a/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max17042.yaml
@@ -59,6 +59,8 @@ properties:
       Voltage threshold to report battery as over voltage (in mV).
       Default is not to report over-voltage events.
 
+  power-supplies: true
+
 required:
   - compatible
   - reg
-- 
GitLab


From 09b327c991d21f7b432ba7b88b2368a6970c3916 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 2 Nov 2022 14:20:11 -0400
Subject: [PATCH 0952/4122] dt-bindings: power: supply: bq25890: use one
 fallback compatible

BQ2589[0256] seem compatible between each other in major aspects and
remaining features are auto-detectable (by reading device revision ID
register).  Existing DTS already uses the compatibles with a fallback,
so adjust the bindings to reflect this and fix dtbs_check warning:

  arch/arm64/boot/dts/freescale/imx8mq-librem5-r2.dtb: charger@6a: compatible: ['ti,bq25895', 'ti,bq25890'] is too long

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 .../devicetree/bindings/power/supply/bq25890.yaml  | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
index fd01ff10b4e00..ee51b6335e728 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
@@ -15,11 +15,15 @@ allOf:
 
 properties:
   compatible:
-    enum:
-      - ti,bq25890
-      - ti,bq25892
-      - ti,bq25895
-      - ti,bq25896
+    oneOf:
+      - enum:
+          - ti,bq25890
+      - items:
+          - enum:
+              - ti,bq25892
+              - ti,bq25895
+              - ti,bq25896
+          - const: ti,bq25890
 
   reg:
     maxItems: 1
-- 
GitLab


From b1599915f09157e98f59556e1b2eafe473603347 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 6 Nov 2022 09:55:56 +0100
Subject: [PATCH 0953/4122] x86/cpufeatures: Move X86_FEATURE_CALL_DEPTH from
 bit 18 to bit 19 of word 11, to leave space for WIP X86_FEATURE_SGX_EDECCSSA
 bit

Reallocate a soft-cpufeatures bit allocated for call-depth tracking
code, which clashes with this recent KVM/SGX patch being worked on:

        KVM/VMX: Allow exposing EDECCSSA user leaf function to KVM guest

Instead of reallocating cpufeatures bits in evil merges, make the
allocation explicit.

Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: x86@kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/cpufeatures.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index aefd0816a333f..864c9b0dda687 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -304,7 +304,8 @@
 #define X86_FEATURE_UNRET		(11*32+15) /* "" AMD BTB untrain return */
 #define X86_FEATURE_USE_IBPB_FW		(11*32+16) /* "" Use IBPB during runtime firmware calls */
 #define X86_FEATURE_RSB_VMEXIT_LITE	(11*32+17) /* "" Fill RSB on VM exit when EIBRS is enabled */
-#define X86_FEATURE_CALL_DEPTH		(11*32+18) /* "" Call depth tracking for RSB stuffing */
+						   /* Hole left for X86_FEATURE_SGX_EDECCSSA */
+#define X86_FEATURE_CALL_DEPTH		(11*32+19) /* "" Call depth tracking for RSB stuffing */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
-- 
GitLab


From abef378c434e6f5abd46fd536e9972374fb74e98 Mon Sep 17 00:00:00 2001
From: Arumugam Kolappan <aru.kolappan@oracle.com>
Date: Tue, 1 Nov 2022 00:27:44 -0700
Subject: [PATCH 0954/4122] RDMA/mlx5: Change debug log level for remote access
 error syndromes

The mlx5 driver dumps the entire CQE buffer by default for few syndromes.
Some syndromes are expected due to the application behavior [ex:
MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR, MLX5_CQE_SYNDROME_REMOTE_OP_ERR and
MLX5_CQE_SYNDROME_LOCAL_PROT_ERR]. Hence, for these syndromes, the patch
converts the log level from KERN_WARNING to KERN_DEBUG. This enables the
application to get the CQE buffer dump by changing to KERN_DEBUG level
as and when needed.

Suggested-by: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Arumugam Kolappan <aru.kolappan@oracle.com>
Link: https://lore.kernel.org/r/1667287664-19377-1-git-send-email-aru.kolappan@oracle.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/cq.c      | 27 +++++++++++++++------------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  4 ++++
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index be189e0525de6..efc9e4a6df04a 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -267,17 +267,20 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	wc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
 }
 
-static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe)
+static void dump_cqe(struct mlx5_ib_dev *dev, struct mlx5_err_cqe *cqe,
+		     struct ib_wc *wc, const char *level)
 {
-	mlx5_ib_warn(dev, "dump error cqe\n");
-	mlx5_dump_err_cqe(dev->mdev, cqe);
+	mlx5_ib_log(level, dev, "WC error: %d, Message: %s\n", wc->status,
+		    ib_wc_status_msg(wc->status));
+	print_hex_dump(level, "cqe_dump: ", DUMP_PREFIX_OFFSET, 16, 1,
+		       cqe, sizeof(*cqe), false);
 }
 
 static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
 				  struct mlx5_err_cqe *cqe,
 				  struct ib_wc *wc)
 {
-	int dump = 1;
+	const char *dump = KERN_WARNING;
 
 	switch (cqe->syndrome) {
 	case MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR:
@@ -287,10 +290,11 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
 		wc->status = IB_WC_LOC_QP_OP_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_LOCAL_PROT_ERR:
+		dump = KERN_DEBUG;
 		wc->status = IB_WC_LOC_PROT_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_WR_FLUSH_ERR:
-		dump = 0;
+		dump = NULL;
 		wc->status = IB_WC_WR_FLUSH_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_MW_BIND_ERR:
@@ -306,18 +310,20 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
 		wc->status = IB_WC_REM_INV_REQ_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_REMOTE_ACCESS_ERR:
+		dump = KERN_DEBUG;
 		wc->status = IB_WC_REM_ACCESS_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_REMOTE_OP_ERR:
+		dump = KERN_DEBUG;
 		wc->status = IB_WC_REM_OP_ERR;
 		break;
 	case MLX5_CQE_SYNDROME_TRANSPORT_RETRY_EXC_ERR:
+		dump = NULL;
 		wc->status = IB_WC_RETRY_EXC_ERR;
-		dump = 0;
 		break;
 	case MLX5_CQE_SYNDROME_RNR_RETRY_EXC_ERR:
+		dump = NULL;
 		wc->status = IB_WC_RNR_RETRY_EXC_ERR;
-		dump = 0;
 		break;
 	case MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR:
 		wc->status = IB_WC_REM_ABORT_ERR;
@@ -328,11 +334,8 @@ static void mlx5_handle_error_cqe(struct mlx5_ib_dev *dev,
 	}
 
 	wc->vendor_err = cqe->vendor_err_synd;
-	if (dump) {
-		mlx5_ib_warn(dev, "WC error: %d, Message: %s\n", wc->status,
-			     ib_wc_status_msg(wc->status));
-		dump_cqe(dev, cqe);
-	}
+	if (dump)
+		dump_cqe(dev, cqe, wc, dump);
 }
 
 static void handle_atomics(struct mlx5_ib_qp *qp, struct mlx5_cqe64 *cqe64,
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 4a7f7064bd0eb..8b91babdd4c08 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -38,6 +38,10 @@
 	dev_warn(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__,     \
 		 __LINE__, current->pid, ##arg)
 
+#define mlx5_ib_log(lvl, _dev, format, arg...)                                 \
+	dev_printk(lvl, &(_dev)->ib_dev.dev,  "%s:%d:(pid %d): " format,       \
+		   __func__, __LINE__, current->pid, ##arg)
+
 #define MLX5_IB_DEFAULT_UIDX 0xffffff
 #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index)
 
-- 
GitLab


From 118e021b4b66f758f8e8f21dc0e5e0a4c721e69e Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Mon, 7 Nov 2022 10:09:11 +1100
Subject: [PATCH 0955/4122] xfs: write page faults in iomap are not buffered
 writes

When we reserve a delalloc region in xfs_buffered_write_iomap_begin,
we mark the iomap as IOMAP_F_NEW so that the the write context
understands that it allocated the delalloc region.

If we then fail that buffered write, xfs_buffered_write_iomap_end()
checks for the IOMAP_F_NEW flag and if it is set, it punches out
the unused delalloc region that was allocated for the write.

The assumption this code makes is that all buffered write operations
that can allocate space are run under an exclusive lock (i_rwsem).
This is an invalid assumption: page faults in mmap()d regions call
through this same function pair to map the file range being faulted
and this runs only holding the inode->i_mapping->invalidate_lock in
shared mode.

IOWs, we can have races between page faults and write() calls that
fail the nested page cache write operation that result in data loss.
That is, the failing iomap_end call will punch out the data that
the other racing iomap iteration brought into the page cache. This
can be reproduced with generic/34[46] if we arbitrarily fail page
cache copy-in operations from write() syscalls.

Code analysis tells us that the iomap_page_mkwrite() function holds
the already instantiated and uptodate folio locked across the iomap
mapping iterations. Hence the folio cannot be removed from memory
whilst we are mapping the range it covers, and as such we do not
care if the mapping changes state underneath the iomap iteration
loop:

1. if the folio is not already dirty, there is no writeback races
   possible.
2. if we allocated the mapping (delalloc or unwritten), the folio
   cannot already be dirty. See #1.
3. If the folio is already dirty, it must be up to date. As we hold
   it locked, it cannot be reclaimed from memory. Hence we always
   have valid data in the page cache while iterating the mapping.
4. Valid data in the page cache can exist when the underlying
   mapping is DELALLOC, UNWRITTEN or WRITTEN. Having the mapping
   change from DELALLOC->UNWRITTEN or UNWRITTEN->WRITTEN does not
   change the data in the page - it only affects actions if we are
   initialising a new page. Hence #3 applies  and we don't care
   about these extent map transitions racing with
   iomap_page_mkwrite().
5. iomap_page_mkwrite() checks for page invalidation races
   (truncate, hole punch, etc) after it locks the folio. We also
   hold the mapping->invalidation_lock here, and hence the mapping
   cannot change due to extent removal operations while we are
   iterating the folio.

As such, filesystems that don't use bufferheads will never fail
the iomap_folio_mkwrite_iter() operation on the current mapping,
regardless of whether the iomap should be considered stale.

Further, the range we are asked to iterate is limited to the range
inside EOF that the folio spans. Hence, for XFS, we will only map
the exact range we are asked for, and we will only do speculative
preallocation with delalloc if we are mapping a hole at the EOF
page. The iterator will consume the entire range of the folio that
is within EOF, and anything beyond the EOF block cannot be accessed.
We never need to truncate this post-EOF speculative prealloc away in
the context of the iomap_page_mkwrite() iterator because if it
remains unused we'll remove it when the last reference to the inode
goes away.

Hence we don't actually need an .iomap_end() cleanup/error handling
path at all for iomap_page_mkwrite() for XFS. This means we can
separate the page fault processing from the complexity of the
.iomap_end() processing in the buffered write path. This also means
that the buffered write path will also be able to take the
mapping->invalidate_lock as necessary.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_file.c  | 2 +-
 fs/xfs/xfs_iomap.c | 9 +++++++++
 fs/xfs/xfs_iomap.h | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index e462d39c840e6..595a5bcf46b94 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1325,7 +1325,7 @@ __xfs_filemap_fault(
 		if (write_fault) {
 			xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 			ret = iomap_page_mkwrite(vmf,
-					&xfs_buffered_write_iomap_ops);
+					&xfs_page_mkwrite_iomap_ops);
 			xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
 		} else {
 			ret = filemap_fault(vmf);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 07da03976ec12..5cea069a38b4e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1187,6 +1187,15 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = {
 	.iomap_end		= xfs_buffered_write_iomap_end,
 };
 
+/*
+ * iomap_page_mkwrite() will never fail in a way that requires delalloc extents
+ * that it allocated to be revoked. Hence we do not need an .iomap_end method
+ * for this operation.
+ */
+const struct iomap_ops xfs_page_mkwrite_iomap_ops = {
+	.iomap_begin		= xfs_buffered_write_iomap_begin,
+};
+
 static int
 xfs_read_iomap_begin(
 	struct inode		*inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index c782e8c0479c0..0f62ab633040c 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -47,6 +47,7 @@ xfs_aligned_fsb_count(
 }
 
 extern const struct iomap_ops xfs_buffered_write_iomap_ops;
+extern const struct iomap_ops xfs_page_mkwrite_iomap_ops;
 extern const struct iomap_ops xfs_direct_write_iomap_ops;
 extern const struct iomap_ops xfs_read_iomap_ops;
 extern const struct iomap_ops xfs_seek_iomap_ops;
-- 
GitLab


From f7fc5b7090372fc4dd7798c874635ca41b8ba733 Mon Sep 17 00:00:00 2001
From: Al Cooper <alcooperx@gmail.com>
Date: Wed, 5 Oct 2022 14:30:14 -0700
Subject: [PATCH 0956/4122] phy: usb: s2 WoL wakeup_count not incremented for
 USB->Eth devices

The PHY's "wakeup_count" is not incrementing when waking from
WoL. The wakeup count can be found in sysfs at:
/sys/bus/platform/devices/rdb/*.usb-phy/power/wakeup_count.
The problem is that the system wakup event handler was being passed
the wrong "device" by the PHY driver.

Fixes: f1c0db40a3ad ("phy: usb: Add "wake on" functionality")
Signed-off-by: Al Cooper <alcooperx@gmail.com>
Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-3-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index 99d4deabfd976..d97fa58cd63a6 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -102,9 +102,9 @@ static int brcm_pm_notifier(struct notifier_block *notifier,
 
 static irqreturn_t brcm_usb_phy_wake_isr(int irq, void *dev_id)
 {
-	struct phy *gphy = dev_id;
+	struct device *dev = dev_id;
 
-	pm_wakeup_event(&gphy->dev, 0);
+	pm_wakeup_event(dev, 0);
 
 	return IRQ_HANDLED;
 }
@@ -452,7 +452,7 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev,
 	if (priv->wake_irq >= 0) {
 		err = devm_request_irq(dev, priv->wake_irq,
 				       brcm_usb_phy_wake_isr, 0,
-				       dev_name(dev), gphy);
+				       dev_name(dev), dev);
 		if (err < 0)
 			return err;
 		device_set_wakeup_capable(dev, 1);
-- 
GitLab


From 7e81153d0f16dd7e6f571bd168bc3d8b46f9f5b7 Mon Sep 17 00:00:00 2001
From: Justin Chen <justinpopo6@gmail.com>
Date: Wed, 5 Oct 2022 14:30:15 -0700
Subject: [PATCH 0957/4122] phy: usb: Migrate to BIT and BITMASK macros

Using BIT and BITMASK macros makes it much easier to read and make
modifications. Also reordered some constants to be in numerical order.

Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-4-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/broadcom/phy-brcm-usb-init-synopsys.c | 68 ++++++++--------
 drivers/phy/broadcom/phy-brcm-usb-init.c      | 80 +++++++++----------
 2 files changed, 74 insertions(+), 74 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 430a8ae0cd24b..26e9585eca60c 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -18,14 +18,14 @@
 #define PIARBCTL_CAM			0x00
 #define PIARBCTL_SPLITTER		0x04
 #define PIARBCTL_MISC			0x08
-#define   PIARBCTL_MISC_SECURE_MASK			0x80000000
-#define   PIARBCTL_MISC_USB_SELECT_MASK			0x40000000
-#define   PIARBCTL_MISC_USB_4G_SDRAM_MASK		0x20000000
-#define   PIARBCTL_MISC_USB_PRIORITY_MASK		0x000f0000
-#define   PIARBCTL_MISC_USB_MEM_PAGE_MASK		0x0000f000
-#define   PIARBCTL_MISC_CAM1_MEM_PAGE_MASK		0x00000f00
-#define   PIARBCTL_MISC_CAM0_MEM_PAGE_MASK		0x000000f0
-#define   PIARBCTL_MISC_SATA_PRIORITY_MASK		0x0000000f
+#define   PIARBCTL_MISC_SATA_PRIORITY_MASK		GENMASK(3, 0)
+#define   PIARBCTL_MISC_CAM0_MEM_PAGE_MASK		GENMASK(7, 4)
+#define   PIARBCTL_MISC_CAM1_MEM_PAGE_MASK		GENMASK(11, 8)
+#define   PIARBCTL_MISC_USB_MEM_PAGE_MASK		GENMASK(15, 12)
+#define   PIARBCTL_MISC_USB_PRIORITY_MASK		GENMASK(19, 16)
+#define   PIARBCTL_MISC_USB_4G_SDRAM_MASK		BIT(29)
+#define   PIARBCTL_MISC_USB_SELECT_MASK			BIT(30)
+#define   PIARBCTL_MISC_SECURE_MASK			BIT(31)
 
 #define PIARBCTL_MISC_USB_ONLY_MASK		\
 	(PIARBCTL_MISC_USB_SELECT_MASK |	\
@@ -35,46 +35,46 @@
 
 /* Register definitions for the USB CTRL block */
 #define USB_CTRL_SETUP			0x00
-#define   USB_CTRL_SETUP_STRAP_IPP_SEL_MASK		0x02000000
-#define   USB_CTRL_SETUP_SCB2_EN_MASK			0x00008000
-#define   USB_CTRL_SETUP_tca_drv_sel_MASK		0x01000000
-#define   USB_CTRL_SETUP_SCB1_EN_MASK			0x00004000
-#define   USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK		0x00000200
-#define   USB_CTRL_SETUP_IPP_MASK			0x00000020
-#define   USB_CTRL_SETUP_IOC_MASK			0x00000010
+#define   USB_CTRL_SETUP_IOC_MASK			BIT(4)
+#define   USB_CTRL_SETUP_IPP_MASK			BIT(5)
+#define   USB_CTRL_SETUP_SOFT_SHUTDOWN_MASK		BIT(9)
+#define   USB_CTRL_SETUP_SCB1_EN_MASK			BIT(14)
+#define   USB_CTRL_SETUP_SCB2_EN_MASK			BIT(15)
+#define   USB_CTRL_SETUP_tca_drv_sel_MASK		BIT(24)
+#define   USB_CTRL_SETUP_STRAP_IPP_SEL_MASK		BIT(25)
 #define USB_CTRL_USB_PM			0x04
-#define   USB_CTRL_USB_PM_USB_PWRDN_MASK		0x80000000
-#define   USB_CTRL_USB_PM_SOFT_RESET_MASK		0x40000000
-#define   USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK		0x00800000
-#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK		0x00400000
-#define   USB_CTRL_USB_PM_XHC_PME_EN_MASK		0x00000010
-#define   USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK	0x00000008
+#define   USB_CTRL_USB_PM_XHC_S2_CLK_SWITCH_EN_MASK	BIT(3)
+#define   USB_CTRL_USB_PM_XHC_PME_EN_MASK		BIT(4)
+#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK		BIT(22)
+#define   USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK		BIT(23)
+#define   USB_CTRL_USB_PM_SOFT_RESET_MASK		BIT(30)
+#define   USB_CTRL_USB_PM_USB_PWRDN_MASK		BIT(31)
 #define USB_CTRL_USB_PM_STATUS		0x08
 #define USB_CTRL_USB_DEVICE_CTL1	0x10
-#define   USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK	0x00000003
+#define   USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK	GENMASK(1, 0)
 #define USB_CTRL_TEST_PORT_CTL		0x30
-#define   USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK	0x000000ff
+#define   USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_MASK		GENMASK(7, 0)
 #define   USB_CTRL_TEST_PORT_CTL_TPOUT_SEL_PME_GEN_MASK	0x0000002e
 #define USB_CTRL_TP_DIAG1		0x34
-#define   USB_CTLR_TP_DIAG1_wake_MASK	0x00000002
+#define   USB_CTLR_TP_DIAG1_wake_MASK			BIT(1)
 #define USB_CTRL_CTLR_CSHCR		0x50
-#define   USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK	0x00040000
+#define   USB_CTRL_CTLR_CSHCR_ctl_pme_en_MASK		BIT(18)
 
 /* Register definitions for the USB_PHY block in 7211b0 */
 #define USB_PHY_PLL_CTL			0x00
-#define   USB_PHY_PLL_CTL_PLL_RESETB_MASK		0x40000000
+#define   USB_PHY_PLL_CTL_PLL_RESETB_MASK		BIT(30)
 #define USB_PHY_PLL_LDO_CTL		0x08
-#define   USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK		0x00000004
-#define   USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK	0x00000002
-#define   USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK	0x00000001
+#define   USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK	BIT(0)
+#define   USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK	BIT(1)
+#define   USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK		BIT(2)
 #define USB_PHY_UTMI_CTL_1		0x04
-#define   USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK	0x00000800
-#define   USB_PHY_UTMI_CTL_1_PHY_MODE_MASK		0x0000000c
+#define   USB_PHY_UTMI_CTL_1_PHY_MODE_MASK		GENMASK(3, 2)
 #define   USB_PHY_UTMI_CTL_1_PHY_MODE_SHIFT		2
+#define   USB_PHY_UTMI_CTL_1_POWER_UP_FSM_EN_MASK	BIT(11)
 #define USB_PHY_IDDQ			0x1c
-#define   USB_PHY_IDDQ_phy_iddq_MASK			0x00000001
+#define   USB_PHY_IDDQ_phy_iddq_MASK			BIT(0)
 #define USB_PHY_STATUS			0x20
-#define   USB_PHY_STATUS_pll_lock_MASK			0x00000001
+#define   USB_PHY_STATUS_pll_lock_MASK			BIT(0)
 
 /* Register definitions for the MDIO registers in the DWC2 block of
  * the 7211b0.
@@ -86,7 +86,7 @@
 
 /* Register definitions for the BDC EC block in 7211b0 */
 #define BDC_EC_AXIRDA			0x0c
-#define   BDC_EC_AXIRDA_RTS_MASK			0xf0000000
+#define   BDC_EC_AXIRDA_RTS_MASK			GENMASK(31, 28)
 #define   BDC_EC_AXIRDA_RTS_SHIFT			28
 
 
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.c b/drivers/phy/broadcom/phy-brcm-usb-init.c
index a7f8b3d3264d5..a1ca83308f98d 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.c
@@ -21,57 +21,57 @@
 
 /* Register definitions for the USB CTRL block */
 #define USB_CTRL_SETUP			0x00
-#define   USB_CTRL_SETUP_IOC_MASK			0x00000010
-#define   USB_CTRL_SETUP_IPP_MASK			0x00000020
-#define   USB_CTRL_SETUP_BABO_MASK			0x00000001
-#define   USB_CTRL_SETUP_FNHW_MASK			0x00000002
-#define   USB_CTRL_SETUP_FNBO_MASK			0x00000004
-#define   USB_CTRL_SETUP_WABO_MASK			0x00000008
-#define   USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK		0x00002000 /* option */
-#define   USB_CTRL_SETUP_SCB1_EN_MASK			0x00004000 /* option */
-#define   USB_CTRL_SETUP_SCB2_EN_MASK			0x00008000 /* option */
-#define   USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK		0X00020000 /* option */
-#define   USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK	0x00010000 /* option */
-#define   USB_CTRL_SETUP_STRAP_IPP_SEL_MASK		0x02000000 /* option */
-#define   USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK	0x04000000 /* option */
-#define   USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK 0x08000000 /* opt */
-#define   USB_CTRL_SETUP_OC3_DISABLE_MASK		0xc0000000 /* option */
+#define   USB_CTRL_SETUP_BABO_MASK			BIT(0)
+#define   USB_CTRL_SETUP_FNHW_MASK			BIT(1)
+#define   USB_CTRL_SETUP_FNBO_MASK			BIT(2)
+#define   USB_CTRL_SETUP_WABO_MASK			BIT(3)
+#define   USB_CTRL_SETUP_IOC_MASK			BIT(4)
+#define   USB_CTRL_SETUP_IPP_MASK			BIT(5)
+#define   USB_CTRL_SETUP_SCB_CLIENT_SWAP_MASK		BIT(13) /* option */
+#define   USB_CTRL_SETUP_SCB1_EN_MASK			BIT(14) /* option */
+#define   USB_CTRL_SETUP_SCB2_EN_MASK			BIT(15) /* option */
+#define   USB_CTRL_SETUP_SS_EHCI64BIT_EN_MASK		BIT(17) /* option */
+#define   USB_CTRL_SETUP_SS_EHCI64BIT_EN_VAR_MASK	BIT(16) /* option */
+#define   USB_CTRL_SETUP_STRAP_IPP_SEL_MASK		BIT(25) /* option */
+#define   USB_CTRL_SETUP_CC_DRD_MODE_ENABLE_MASK	BIT(26) /* option */
+#define   USB_CTRL_SETUP_STRAP_CC_DRD_MODE_ENABLE_SEL_MASK BIT(27) /* opt */
+#define   USB_CTRL_SETUP_OC3_DISABLE_MASK		GENMASK(31, 30) /* option */
 #define USB_CTRL_PLL_CTL		0x04
-#define   USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK		0x08000000
-#define   USB_CTRL_PLL_CTL_PLL_RESETB_MASK		0x40000000
-#define   USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK		0x80000000 /* option */
+#define   USB_CTRL_PLL_CTL_PLL_SUSPEND_EN_MASK		BIT(27)
+#define   USB_CTRL_PLL_CTL_PLL_RESETB_MASK		BIT(30)
+#define   USB_CTRL_PLL_CTL_PLL_IDDQ_PWRDN_MASK		BIT(31) /* option */
 #define USB_CTRL_EBRIDGE		0x0c
-#define   USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK		0x00020000 /* option */
-#define   USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK		0x00000f80 /* option */
+#define   USB_CTRL_EBRIDGE_EBR_SCB_SIZE_MASK		GENMASK(11, 7) /* option */
+#define   USB_CTRL_EBRIDGE_ESTOP_SCB_REQ_MASK		BIT(17) /* option */
 #define USB_CTRL_OBRIDGE		0x10
-#define   USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK		0x08000000
+#define   USB_CTRL_OBRIDGE_LS_KEEP_ALIVE_MASK		BIT(27)
 #define USB_CTRL_MDIO			0x14
 #define USB_CTRL_MDIO2			0x18
 #define USB_CTRL_UTMI_CTL_1		0x2c
-#define   USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK	0x00000800
-#define   USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK	0x08000000
+#define   USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_MASK	BIT(11)
+#define   USB_CTRL_UTMI_CTL_1_POWER_UP_FSM_EN_P1_MASK	BIT(27)
 #define USB_CTRL_USB_PM			0x34
-#define   USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK		0x00800000 /* option */
-#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK		0x00400000 /* option */
-#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK	0x40000000 /* option */
-#define   USB_CTRL_USB_PM_USB_PWRDN_MASK		0x80000000 /* option */
-#define   USB_CTRL_USB_PM_SOFT_RESET_MASK		0x40000000 /* option */
-#define   USB_CTRL_USB_PM_USB20_HC_RESETB_MASK		0x30000000 /* option */
-#define   USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK	0x00300000 /* option */
-#define   USB_CTRL_USB_PM_RMTWKUP_EN_MASK		0x00000001
+#define   USB_CTRL_USB_PM_RMTWKUP_EN_MASK		BIT(0)
+#define   USB_CTRL_USB_PM_USB20_HC_RESETB_VAR_MASK	GENMASK(21, 20) /* option */
+#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_MASK		BIT(22) /* option */
+#define   USB_CTRL_USB_PM_BDC_SOFT_RESETB_MASK		BIT(23) /* option */
+#define   USB_CTRL_USB_PM_USB20_HC_RESETB_MASK		GENMASK(29, 28) /* option */
+#define   USB_CTRL_USB_PM_XHC_SOFT_RESETB_VAR_MASK	BIT(30) /* option */
+#define   USB_CTRL_USB_PM_SOFT_RESET_MASK		BIT(30) /* option */
+#define   USB_CTRL_USB_PM_USB_PWRDN_MASK		BIT(31) /* option */
 #define USB_CTRL_USB_PM_STATUS		0x38
 #define USB_CTRL_USB30_CTL1		0x60
-#define   USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK	0x00000010
-#define   USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK		0x00010000
-#define   USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK	0x00020000 /* option */
-#define   USB_CTRL_USB30_CTL1_USB3_IOC_MASK		0x10000000 /* option */
-#define   USB_CTRL_USB30_CTL1_USB3_IPP_MASK		0x20000000 /* option */
+#define   USB_CTRL_USB30_CTL1_PHY3_PLL_SEQ_START_MASK	BIT(4)
+#define   USB_CTRL_USB30_CTL1_PHY3_RESETB_MASK		BIT(16)
+#define   USB_CTRL_USB30_CTL1_XHC_SOFT_RESETB_MASK	BIT(17) /* option */
+#define   USB_CTRL_USB30_CTL1_USB3_IOC_MASK		BIT(28) /* option */
+#define   USB_CTRL_USB30_CTL1_USB3_IPP_MASK		BIT(29) /* option */
 #define USB_CTRL_USB30_PCTL		0x70
-#define   USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK	0x00000002
-#define   USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK	0x00008000
-#define   USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK	0x00020000
+#define   USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_MASK	BIT(1)
+#define   USB_CTRL_USB30_PCTL_PHY3_IDDQ_OVERRIDE_MASK	BIT(15)
+#define   USB_CTRL_USB30_PCTL_PHY3_SOFT_RESETB_P1_MASK	BIT(17)
 #define USB_CTRL_USB_DEVICE_CTL1	0x90
-#define   USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK	0x00000003 /* option */
+#define   USB_CTRL_USB_DEVICE_CTL1_PORT_MODE_MASK	GENMASK(1, 0) /* option */
 
 /* Register definitions for the XHCI EC block */
 #define USB_XHCI_EC_IRAADR 0x658
-- 
GitLab


From 833c173ebab420997b98a0d888fc5b55ee4a8a7e Mon Sep 17 00:00:00 2001
From: Justin Chen <justinpopo6@gmail.com>
Date: Wed, 5 Oct 2022 14:30:16 -0700
Subject: [PATCH 0958/4122] phy: usb: Disable phy auto-suspend

The BDC block requires the PLL lock in order to grab the PLL clock.
The phy auto-suspend feature turns off the phy when nothing is attached
leading to the PLL to not lock. This leads the BDC block to grab the AUX
clock instead of the PLL clock. This is not ideal, so lets turn this
feature off.

Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-5-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 26e9585eca60c..6a4d47886e0e7 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -62,6 +62,7 @@
 
 /* Register definitions for the USB_PHY block in 7211b0 */
 #define USB_PHY_PLL_CTL			0x00
+#define   USB_PHY_PLL_CTL_PLL_SUSPEND_MASK		BIT(27)
 #define   USB_PHY_PLL_CTL_PLL_RESETB_MASK		BIT(30)
 #define USB_PHY_PLL_LDO_CTL		0x08
 #define   USB_PHY_PLL_LDO_CTL_AFE_BG_PWRDWNB_MASK	BIT(0)
@@ -259,6 +260,11 @@ static void usb_init_common_7211b0(struct brcm_usb_init_params *params)
 		brcm_usb_writel(reg, usb_phy + USB_PHY_UTMI_CTL_1);
 	}
 
+	/* Disable PLL auto suspend */
+	reg = brcm_usb_readl(usb_phy + USB_PHY_PLL_CTL);
+	reg |= USB_PHY_PLL_CTL_PLL_SUSPEND_MASK;
+	brcm_usb_writel(reg, usb_phy + USB_PHY_PLL_CTL);
+
 	/* Init the PHY */
 	reg = USB_PHY_PLL_LDO_CTL_AFE_CORERDY_MASK |
 		USB_PHY_PLL_LDO_CTL_AFE_LDO_PWRDWNB_MASK |
-- 
GitLab


From 700c44b508020a3ea29d297c677f8d4ab14b7e6a Mon Sep 17 00:00:00 2001
From: Justin Chen <justinpopo6@gmail.com>
Date: Wed, 5 Oct 2022 14:30:17 -0700
Subject: [PATCH 0959/4122] phy: usb: Use slow clock for wake enabled suspend

The logic was incorrect when switching to slow clock. We want the slow
clock if wake_enabled is set.

Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers")
Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-6-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index 6a4d47886e0e7..f78cff24eec8d 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -337,13 +337,12 @@ static void usb_uninit_common_7216(struct brcm_usb_init_params *params)
 
 	pr_debug("%s\n", __func__);
 
-	if (!params->wake_enabled) {
-		USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN);
-
+	if (params->wake_enabled) {
 		/* Switch to using slower clock during suspend to save power */
 		USB_CTRL_SET(ctrl, USB_PM, XHC_S2_CLK_SWITCH_EN);
-	} else {
 		usb_wake_enable_7216(params, true);
+	} else {
+		USB_CTRL_SET(ctrl, USB_PM, USB_PWRDN);
 	}
 }
 
-- 
GitLab


From 8484199c09347bdd5d81ee8a2bc530850f900797 Mon Sep 17 00:00:00 2001
From: Justin Chen <justinpopo6@gmail.com>
Date: Wed, 5 Oct 2022 14:30:18 -0700
Subject: [PATCH 0960/4122] phy: usb: Fix clock imbalance for suspend/resume

We should be disabling clocks when wake from USB is not needed. Since
this wasn't done, we had a clock imbalance since clocks were always
being enabled on resume.

Fixes: ae532b2b7aa5 ("phy: usb: Add "wake on" functionality for newer Synopsis XHCI controllers")
Fixes: b0c0b66c0b43 ("phy: usb: Add support for wake and USB low power mode for 7211 S2/S5")
Signed-off-by: Justin Chen <justinpopo6@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/1665005418-15807-7-git-send-email-justinpopo6@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c | 2 --
 drivers/phy/broadcom/phy-brcm-usb-init.h          | 1 -
 drivers/phy/broadcom/phy-brcm-usb.c               | 8 +++++---
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
index f78cff24eec8d..76cf4280d7ed9 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
+++ b/drivers/phy/broadcom/phy-brcm-usb-init-synopsys.c
@@ -430,7 +430,6 @@ void brcm_usb_dvr_init_7216(struct brcm_usb_init_params *params)
 
 	params->family_name = "7216";
 	params->ops = &bcm7216_ops;
-	params->suspend_with_clocks = true;
 }
 
 void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params)
@@ -440,5 +439,4 @@ void brcm_usb_dvr_init_7211b0(struct brcm_usb_init_params *params)
 
 	params->family_name = "7211";
 	params->ops = &bcm7211b0_ops;
-	params->suspend_with_clocks = true;
 }
diff --git a/drivers/phy/broadcom/phy-brcm-usb-init.h b/drivers/phy/broadcom/phy-brcm-usb-init.h
index bedf2b8e2f196..f9fbf8fb80e54 100644
--- a/drivers/phy/broadcom/phy-brcm-usb-init.h
+++ b/drivers/phy/broadcom/phy-brcm-usb-init.h
@@ -62,7 +62,6 @@ struct  brcm_usb_init_params {
 	const struct brcm_usb_init_ops *ops;
 	struct regmap *syscon_piarbctl;
 	bool wake_enabled;
-	bool suspend_with_clocks;
 };
 
 void brcm_usb_dvr_init_4908(struct brcm_usb_init_params *params);
diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index d97fa58cd63a6..aafba4a047018 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -602,7 +602,7 @@ static int brcm_usb_phy_suspend(struct device *dev)
 		 * and newer XHCI->2.0-clks/3.0-clks.
 		 */
 
-		if (!priv->ini.suspend_with_clocks) {
+		if (!priv->ini.wake_enabled) {
 			if (priv->phys[BRCM_USB_PHY_3_0].inited)
 				clk_disable_unprepare(priv->usb_30_clk);
 			if (priv->phys[BRCM_USB_PHY_2_0].inited ||
@@ -619,8 +619,10 @@ static int brcm_usb_phy_resume(struct device *dev)
 {
 	struct brcm_usb_phy_data *priv = dev_get_drvdata(dev);
 
-	clk_prepare_enable(priv->usb_20_clk);
-	clk_prepare_enable(priv->usb_30_clk);
+	if (!priv->ini.wake_enabled) {
+		clk_prepare_enable(priv->usb_20_clk);
+		clk_prepare_enable(priv->usb_30_clk);
+	}
 	brcm_usb_init_ipp(&priv->ini);
 
 	/*
-- 
GitLab


From 6964affe65066651eca21e97247d3b7cac5153dc Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 31 Oct 2022 11:13:53 +0000
Subject: [PATCH 0961/4122] dt-bindings: phy: Add special clock for Allwinner
 H616 PHY

The USB PHY IP in the Allwinner H616 SoC requires a quirk that involves
some resources from port 2's PHY and HCI IP. In particular the PMU clock
for port 2 must be surely ungated before accessing the REG_HCI_PHY_CTL
register of port 2. To allow each USB port to be controlled
independently of port 2, we need a handle to that particular PMU clock
in the *PHY* node, as the HCI and PHY part might be handled by separate
drivers.

Add that clock to the requirements of the H616 PHY binding, so that a
PHY driver can apply the quirk in isolation, without requiring help from
port 2's HCI driver.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221031111358.3387297-3-andre.przywara@arm.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/allwinner,sun8i-h3-usb-phy.yaml       | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml
index 77539b4601c24..2df012d13655e 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun8i-h3-usb-phy.yaml
@@ -36,18 +36,22 @@ properties:
       - const: pmu3
 
   clocks:
+    minItems: 4
     items:
       - description: USB OTG PHY bus clock
       - description: USB Host 0 PHY bus clock
       - description: USB Host 1 PHY bus clock
       - description: USB Host 2 PHY bus clock
+      - description: PMU clock for host port 2
 
   clock-names:
+    minItems: 4
     items:
       - const: usb0_phy
       - const: usb1_phy
       - const: usb2_phy
       - const: usb3_phy
+      - const: pmu2_clk
 
   resets:
     items:
@@ -96,6 +100,28 @@ required:
   - resets
   - reset-names
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - allwinner,sun50i-h616-usb-phy
+    then:
+      properties:
+        clocks:
+          minItems: 5
+
+        clock-names:
+          minItems: 5
+    else:
+      properties:
+        clocks:
+          maxItems: 4
+
+        clock-names:
+          maxItems: 4
+
 additionalProperties: false
 
 examples:
-- 
GitLab


From b45c6d80325bec2b78c716629a518b6442d8bdc6 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 31 Oct 2022 11:13:54 +0000
Subject: [PATCH 0962/4122] phy: sun4i-usb: Introduce port2 SIDDQ quirk

At least the Allwinner H616 SoC requires a weird quirk to make most
USB PHYs work: Only port2 works out of the box, but all other ports
need some help from this port2 to work correctly: The CLK_BUS_PHY2 and
RST_USB_PHY2 clock and reset need to be enabled, and the SIDDQ bit in
the PMU PHY control register needs to be cleared. For this register to
be accessible, CLK_BUS_ECHI2 needs to be ungated. Don't ask ....

Instead of disguising this as some generic feature, treat it more like
a quirk (what it really is):
If the quirk bit is set, and we initialise a PHY other than PHY2, ungate
this one special clock, and clear the SIDDQ bit. We also pick the clock
and reset from PHY2 and enable them as well.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20221031111358.3387297-4-andre.przywara@arm.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun4i-usb.c | 59 +++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c
index 3a3831f6059a3..e39f5ad62cc19 100644
--- a/drivers/phy/allwinner/phy-sun4i-usb.c
+++ b/drivers/phy/allwinner/phy-sun4i-usb.c
@@ -120,6 +120,7 @@ struct sun4i_usb_phy_cfg {
 	u8 phyctl_offset;
 	bool dedicated_clocks;
 	bool phy0_dual_route;
+	bool needs_phy2_siddq;
 	int missing_phys;
 };
 
@@ -289,6 +290,50 @@ static int sun4i_usb_phy_init(struct phy *_phy)
 		return ret;
 	}
 
+	/* Some PHYs on some SoCs need the help of PHY2 to work. */
+	if (data->cfg->needs_phy2_siddq && phy->index != 2) {
+		struct sun4i_usb_phy *phy2 = &data->phys[2];
+
+		ret = clk_prepare_enable(phy2->clk);
+		if (ret) {
+			reset_control_assert(phy->reset);
+			clk_disable_unprepare(phy->clk2);
+			clk_disable_unprepare(phy->clk);
+			return ret;
+		}
+
+		ret = reset_control_deassert(phy2->reset);
+		if (ret) {
+			clk_disable_unprepare(phy2->clk);
+			reset_control_assert(phy->reset);
+			clk_disable_unprepare(phy->clk2);
+			clk_disable_unprepare(phy->clk);
+			return ret;
+		}
+
+		/*
+		 * This extra clock is just needed to access the
+		 * REG_HCI_PHY_CTL PMU register for PHY2.
+		 */
+		ret = clk_prepare_enable(phy2->clk2);
+		if (ret) {
+			reset_control_assert(phy2->reset);
+			clk_disable_unprepare(phy2->clk);
+			reset_control_assert(phy->reset);
+			clk_disable_unprepare(phy->clk2);
+			clk_disable_unprepare(phy->clk);
+			return ret;
+		}
+
+		if (phy2->pmu && data->cfg->hci_phy_ctl_clear) {
+			val = readl(phy2->pmu + REG_HCI_PHY_CTL);
+			val &= ~data->cfg->hci_phy_ctl_clear;
+			writel(val, phy2->pmu + REG_HCI_PHY_CTL);
+		}
+
+		clk_disable_unprepare(phy->clk2);
+	}
+
 	if (phy->pmu && data->cfg->hci_phy_ctl_clear) {
 		val = readl(phy->pmu + REG_HCI_PHY_CTL);
 		val &= ~data->cfg->hci_phy_ctl_clear;
@@ -354,6 +399,13 @@ static int sun4i_usb_phy_exit(struct phy *_phy)
 		data->phy0_init = false;
 	}
 
+	if (data->cfg->needs_phy2_siddq && phy->index != 2) {
+		struct sun4i_usb_phy *phy2 = &data->phys[2];
+
+		clk_disable_unprepare(phy2->clk);
+		reset_control_assert(phy2->reset);
+	}
+
 	sun4i_usb_phy_passby(phy, 0);
 	reset_control_assert(phy->reset);
 	clk_disable_unprepare(phy->clk2);
@@ -785,6 +837,13 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 				dev_err(dev, "failed to get clock %s\n", name);
 				return PTR_ERR(phy->clk2);
 			}
+		} else {
+			snprintf(name, sizeof(name), "pmu%d_clk", i);
+			phy->clk2 = devm_clk_get_optional(dev, name);
+			if (IS_ERR(phy->clk2)) {
+				dev_err(dev, "failed to get clock %s\n", name);
+				return PTR_ERR(phy->clk2);
+			}
 		}
 
 		snprintf(name, sizeof(name), "usb%d_reset", i);
-- 
GitLab


From 0f607406525d25019dd9c498bcc0b42734fc59d5 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Mon, 31 Oct 2022 11:13:55 +0000
Subject: [PATCH 0963/4122] phy: sun4i-usb: Add support for the H616 USB PHY

The USB PHY used in the Allwinner H616 SoC inherits some traits from its
various predecessors: it has four full PHYs like the H3, needs some
extra bits to be set like the H6, and puts SIDDQ on a different bit like
the A100. Plus it needs this weird PHY2 quirk.

Name all those properties in a new config struct and assign a new
compatible name to it.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Reviewed-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221031111358.3387297-5-andre.przywara@arm.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun4i-usb.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/phy/allwinner/phy-sun4i-usb.c b/drivers/phy/allwinner/phy-sun4i-usb.c
index e39f5ad62cc19..5472db9e87ef8 100644
--- a/drivers/phy/allwinner/phy-sun4i-usb.c
+++ b/drivers/phy/allwinner/phy-sun4i-usb.c
@@ -1032,6 +1032,17 @@ static const struct sun4i_usb_phy_cfg sun50i_h6_cfg = {
 	.missing_phys = BIT(1) | BIT(2),
 };
 
+static const struct sun4i_usb_phy_cfg sun50i_h616_cfg = {
+	.num_phys = 4,
+	.type = sun50i_h6_phy,
+	.disc_thresh = 3,
+	.phyctl_offset = REG_PHYCTL_A33,
+	.dedicated_clocks = true,
+	.phy0_dual_route = true,
+	.hci_phy_ctl_clear = PHY_CTL_SIDDQ,
+	.needs_phy2_siddq = true,
+};
+
 static const struct of_device_id sun4i_usb_phy_of_match[] = {
 	{ .compatible = "allwinner,sun4i-a10-usb-phy", .data = &sun4i_a10_cfg },
 	{ .compatible = "allwinner,sun5i-a13-usb-phy", .data = &sun5i_a13_cfg },
@@ -1047,6 +1058,7 @@ static const struct of_device_id sun4i_usb_phy_of_match[] = {
 	{ .compatible = "allwinner,sun50i-a64-usb-phy",
 	  .data = &sun50i_a64_cfg},
 	{ .compatible = "allwinner,sun50i-h6-usb-phy", .data = &sun50i_h6_cfg },
+	{ .compatible = "allwinner,sun50i-h616-usb-phy", .data = &sun50i_h616_cfg },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, sun4i_usb_phy_of_match);
-- 
GitLab


From 8ca2a81bff096b359736bba9b7d06cf5bc04fa88 Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Wed, 26 Oct 2022 13:15:30 +0530
Subject: [PATCH 0964/4122] dt-bindings: phy: ti: phy-gmii-sel: Add bindings
 for J721e

TI's J721e SoC supports additional PHY modes like QSGMII and SGMII
that are not supported on earlier SoCs. Add a compatible for it.

Extend ti,qsgmii-main-ports property to support selection of upto
two main ports at once across the two QSGMII interfaces.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221026074532.109220-2-s-vadapalli@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/phy/ti,phy-gmii-sel.yaml         | 48 ++++++++++++++++---
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
index da7cac537e15c..3a6d686383cf9 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
@@ -54,6 +54,7 @@ properties:
       - ti,dm814-phy-gmii-sel
       - ti,am654-phy-gmii-sel
       - ti,j7200-cpsw5g-phy-gmii-sel
+      - ti,j721e-cpsw9g-phy-gmii-sel
 
   reg:
     maxItems: 1
@@ -63,14 +64,17 @@ properties:
   ti,qsgmii-main-ports:
     $ref: /schemas/types.yaml#/definitions/uint32-array
     description: |
-      Required only for QSGMII mode. Array to select the port for
-      QSGMII main mode. Rest of the ports are selected as QSGMII_SUB
-      ports automatically. Any one of the 4 CPSW5G ports can act as the
-      main port with the rest of them being the QSGMII_SUB ports.
-    maxItems: 1
+      Required only for QSGMII mode. Array to select the port/s for QSGMII
+      main mode. The size of the array corresponds to the number of QSGMII
+      interfaces and thus, the number of distinct QSGMII main ports,
+      supported by the device. If the device supports two QSGMII interfaces
+      but only one QSGMII interface is desired, repeat the QSGMII main port
+      value corresponding to the QSGMII interface in the array.
+    minItems: 1
+    maxItems: 2
     items:
       minimum: 1
-      maximum: 4
+      maximum: 8
 
 allOf:
   - if:
@@ -81,12 +85,43 @@ allOf:
               - ti,dra7xx-phy-gmii-sel
               - ti,dm814-phy-gmii-sel
               - ti,am654-phy-gmii-sel
+              - ti,j7200-cpsw5g-phy-gmii-sel
+              - ti,j721e-cpsw9g-phy-gmii-sel
     then:
       properties:
         '#phy-cells':
           const: 1
           description: CPSW port number (starting from 1)
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,j7200-cpsw5g-phy-gmii-sel
+    then:
+      properties:
+        ti,qsgmii-main-ports:
+          maxItems: 1
+          items:
+            minimum: 1
+            maximum: 4
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - ti,j721e-cpsw9g-phy-gmii-sel
+    then:
+      properties:
+        ti,qsgmii-main-ports:
+          minItems: 2
+          maxItems: 2
+          items:
+            minimum: 1
+            maximum: 8
+
   - if:
       not:
         properties:
@@ -94,6 +129,7 @@ allOf:
             contains:
               enum:
                 - ti,j7200-cpsw5g-phy-gmii-sel
+                - ti,j721e-cpsw9g-phy-gmii-sel
     then:
       properties:
         ti,qsgmii-main-ports: false
-- 
GitLab


From 3b66ab69c566e79d58cc38bd7c90a6b2b0b84a7d Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Wed, 26 Oct 2022 13:15:31 +0530
Subject: [PATCH 0965/4122] phy: ti: gmii-sel: Update methods for fetching and
 using qsgmii main port

The number of QSGMII main ports are specific to the device. TI's J7200 for
which the QSGMII main port property is fetched from the device-tree has
only one QSGMII main port. However, devices like TI's J721e support up to
two QSGMII main ports. Thus, the existing methods for fetching and using
the QSGMII main port are not scalable.

Update the existing methods for handling the QSGMII main ports and its
associated requirements to make it scalable for future devices.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Link: https://lore.kernel.org/r/20221026074532.109220-3-s-vadapalli@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-gmii-sel.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c
index 0bcfd6d96b4d0..c8f30d2e1f460 100644
--- a/drivers/phy/ti/phy-gmii-sel.c
+++ b/drivers/phy/ti/phy-gmii-sel.c
@@ -50,6 +50,7 @@ struct phy_gmii_sel_soc_data {
 	const struct reg_field (*regfields)[PHY_GMII_SEL_LAST];
 	bool use_of_data;
 	u64 extra_modes;
+	u32 num_qsgmii_main_ports;
 };
 
 struct phy_gmii_sel_priv {
@@ -213,6 +214,8 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = {
 	.use_of_data = true,
 	.regfields = phy_gmii_sel_fields_am654,
 	.extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+	.num_ports = 4,
+	.num_qsgmii_main_ports = 1,
 };
 
 static const struct of_device_id phy_gmii_sel_id_table[] = {
@@ -378,11 +381,13 @@ static int phy_gmii_sel_init_ports(struct phy_gmii_sel_priv *priv)
 static int phy_gmii_sel_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	const struct phy_gmii_sel_soc_data *soc_data;
 	struct device_node *node = dev->of_node;
 	const struct of_device_id *of_id;
 	struct phy_gmii_sel_priv *priv;
 	u32 main_ports = 1;
 	int ret;
+	u32 i;
 
 	of_id = of_match_node(phy_gmii_sel_id_table, pdev->dev.of_node);
 	if (!of_id)
@@ -394,16 +399,26 @@ static int phy_gmii_sel_probe(struct platform_device *pdev)
 
 	priv->dev = &pdev->dev;
 	priv->soc_data = of_id->data;
+	soc_data = priv->soc_data;
 	priv->num_ports = priv->soc_data->num_ports;
-	of_property_read_u32(node, "ti,qsgmii-main-ports", &main_ports);
+	priv->qsgmii_main_ports = 0;
+
 	/*
-	 * Ensure that main_ports is within bounds. If the property
-	 * ti,qsgmii-main-ports is not mentioned, or the value mentioned
-	 * is out of bounds, default to 1.
+	 * Based on the compatible, try to read the appropriate number of
+	 * QSGMII main ports from the "ti,qsgmii-main-ports" property from
+	 * the device-tree node.
 	 */
-	if (main_ports < 1 || main_ports > 4)
-		main_ports = 1;
-	priv->qsgmii_main_ports = PHY_GMII_PORT(main_ports);
+	for (i = 0; i < soc_data->num_qsgmii_main_ports; i++) {
+		of_property_read_u32_index(node, "ti,qsgmii-main-ports", i, &main_ports);
+		/*
+		 * Ensure that main_ports is within bounds.
+		 */
+		if (main_ports < 1 || main_ports > soc_data->num_ports) {
+			dev_err(dev, "Invalid qsgmii main port provided\n");
+			return -EINVAL;
+		}
+		priv->qsgmii_main_ports |= PHY_GMII_PORT(main_ports);
+	}
 
 	priv->regmap = syscon_node_to_regmap(node->parent);
 	if (IS_ERR(priv->regmap)) {
-- 
GitLab


From 5bd78c00d753d4e80e151555565334c475a559d3 Mon Sep 17 00:00:00 2001
From: Siddharth Vadapalli <s-vadapalli@ti.com>
Date: Wed, 26 Oct 2022 13:15:32 +0530
Subject: [PATCH 0966/4122] phy: ti: gmii-sel: Add support for CPSW9G GMII SEL
 in J721e

Each of the CPSW9G ports in J721e support additional modes like QSGMII.
Add a new compatible for J721e to support the additional modes.

In TI's J721e, each of the CPSW9G ethernet interfaces can act as a
QSGMII main or QSGMII-SUB port. The QSGMII main interface is responsible
for performing auto-negotiation between the MAC and the PHY while the rest
of the interfaces are designated as QSGMII-SUB interfaces, indicating that
they will not be taking part in the auto-negotiation process.

Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Link: https://lore.kernel.org/r/20221026074532.109220-4-s-vadapalli@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-gmii-sel.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/phy/ti/phy-gmii-sel.c b/drivers/phy/ti/phy-gmii-sel.c
index c8f30d2e1f460..8c667819c39a1 100644
--- a/drivers/phy/ti/phy-gmii-sel.c
+++ b/drivers/phy/ti/phy-gmii-sel.c
@@ -218,6 +218,15 @@ struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw5g_soc_j7200 = {
 	.num_qsgmii_main_ports = 1,
 };
 
+static const
+struct phy_gmii_sel_soc_data phy_gmii_sel_cpsw9g_soc_j721e = {
+	.use_of_data = true,
+	.regfields = phy_gmii_sel_fields_am654,
+	.extra_modes = BIT(PHY_INTERFACE_MODE_QSGMII),
+	.num_ports = 8,
+	.num_qsgmii_main_ports = 2,
+};
+
 static const struct of_device_id phy_gmii_sel_id_table[] = {
 	{
 		.compatible	= "ti,am3352-phy-gmii-sel",
@@ -243,6 +252,10 @@ static const struct of_device_id phy_gmii_sel_id_table[] = {
 		.compatible	= "ti,j7200-cpsw5g-phy-gmii-sel",
 		.data		= &phy_gmii_sel_cpsw5g_soc_j7200,
 	},
+	{
+		.compatible	= "ti,j721e-cpsw9g-phy-gmii-sel",
+		.data		= &phy_gmii_sel_cpsw9g_soc_j721e,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, phy_gmii_sel_id_table);
-- 
GitLab


From 53bffe0055741440a6c91abb80bad1c62ea443e3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 26 Oct 2022 15:44:49 -0700
Subject: [PATCH 0967/4122] phy: phy-brcm-usb: Utilize
 platform_get_irq_byname_optional()

The wake-up interrupt lines are entirely optional, avoid printing
messages that interrupts were not found by switching to the _optional
variant.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Acked-by: Justin Chen <justinpopo6@gmail.com>
Link: https://lore.kernel.org/r/20221026224450.2958762-1-f.fainelli@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/broadcom/phy-brcm-usb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/broadcom/phy-brcm-usb.c b/drivers/phy/broadcom/phy-brcm-usb.c
index aafba4a047018..4de39999f43d3 100644
--- a/drivers/phy/broadcom/phy-brcm-usb.c
+++ b/drivers/phy/broadcom/phy-brcm-usb.c
@@ -446,9 +446,9 @@ static int brcm_usb_phy_dvr_init(struct platform_device *pdev,
 		priv->suspend_clk = NULL;
 	}
 
-	priv->wake_irq = platform_get_irq_byname(pdev, "wake");
+	priv->wake_irq = platform_get_irq_byname_optional(pdev, "wake");
 	if (priv->wake_irq < 0)
-		priv->wake_irq = platform_get_irq_byname(pdev, "wakeup");
+		priv->wake_irq = platform_get_irq_byname_optional(pdev, "wakeup");
 	if (priv->wake_irq >= 0) {
 		err = devm_request_irq(dev, priv->wake_irq,
 				       brcm_usb_phy_wake_isr, 0,
-- 
GitLab


From 2428787f16155aa03aa63d5c130e83809a7df5cf Mon Sep 17 00:00:00 2001
From: Wayne Chang <waynec@nvidia.com>
Date: Wed, 5 Oct 2022 16:40:31 +0800
Subject: [PATCH 0968/4122] phy: tegra: xusb: Remove usb3 supply

Remove redundant codes for getting the vbus supply of usb3 ports because
we get and control the vbus supply by the companion usb2 ports

Signed-off-by: Wayne Chang <waynec@nvidia.com>
Signed-off-by: Haotien Hsu <haotienh@nvidia.com>
Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/r/20221005084031.2154251-1-haotienh@nvidia.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/tegra/xusb-tegra124.c |  1 -
 drivers/phy/tegra/xusb-tegra186.c |  1 -
 drivers/phy/tegra/xusb-tegra210.c |  1 -
 drivers/phy/tegra/xusb.c          | 10 +---------
 drivers/phy/tegra/xusb.h          |  2 --
 5 files changed, 1 insertion(+), 14 deletions(-)

diff --git a/drivers/phy/tegra/xusb-tegra124.c b/drivers/phy/tegra/xusb-tegra124.c
index db56c7fbe60be..f4f75ea033b81 100644
--- a/drivers/phy/tegra/xusb-tegra124.c
+++ b/drivers/phy/tegra/xusb-tegra124.c
@@ -1652,7 +1652,6 @@ tegra124_usb3_port_map(struct tegra_xusb_port *port)
 
 static const struct tegra_xusb_port_ops tegra124_usb3_port_ops = {
 	.release = tegra_xusb_usb3_port_release,
-	.remove = tegra_xusb_usb3_port_remove,
 	.enable = tegra124_usb3_port_enable,
 	.disable = tegra124_usb3_port_disable,
 	.map = tegra124_usb3_port_map,
diff --git a/drivers/phy/tegra/xusb-tegra186.c b/drivers/phy/tegra/xusb-tegra186.c
index 0996ede63387a..6a8bd87cfdbda 100644
--- a/drivers/phy/tegra/xusb-tegra186.c
+++ b/drivers/phy/tegra/xusb-tegra186.c
@@ -1185,7 +1185,6 @@ tegra186_usb3_port_map(struct tegra_xusb_port *port)
 
 static const struct tegra_xusb_port_ops tegra186_usb3_port_ops = {
 	.release = tegra_xusb_usb3_port_release,
-	.remove = tegra_xusb_usb3_port_remove,
 	.enable = tegra186_usb3_port_enable,
 	.disable = tegra186_usb3_port_disable,
 	.map = tegra186_usb3_port_map,
diff --git a/drivers/phy/tegra/xusb-tegra210.c b/drivers/phy/tegra/xusb-tegra210.c
index eedfc7c2cc052..ebc8a7e21a318 100644
--- a/drivers/phy/tegra/xusb-tegra210.c
+++ b/drivers/phy/tegra/xusb-tegra210.c
@@ -3078,7 +3078,6 @@ tegra210_usb3_port_map(struct tegra_xusb_port *port)
 
 static const struct tegra_xusb_port_ops tegra210_usb3_port_ops = {
 	.release = tegra_xusb_usb3_port_release,
-	.remove = tegra_xusb_usb3_port_remove,
 	.enable = tegra210_usb3_port_enable,
 	.disable = tegra210_usb3_port_disable,
 	.map = tegra210_usb3_port_map,
diff --git a/drivers/phy/tegra/xusb.c b/drivers/phy/tegra/xusb.c
index dce45fbbd699c..ff4b930879f3c 100644
--- a/drivers/phy/tegra/xusb.c
+++ b/drivers/phy/tegra/xusb.c
@@ -954,8 +954,7 @@ static int tegra_xusb_usb3_port_parse_dt(struct tegra_xusb_usb3_port *usb3)
 			return -EINVAL;
 	}
 
-	usb3->supply = regulator_get(&port->dev, "vbus");
-	return PTR_ERR_OR_ZERO(usb3->supply);
+	return 0;
 }
 
 static int tegra_xusb_add_usb3_port(struct tegra_xusb_padctl *padctl,
@@ -1012,13 +1011,6 @@ void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port)
 	kfree(usb3);
 }
 
-void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port)
-{
-	struct tegra_xusb_usb3_port *usb3 = to_usb3_port(port);
-
-	regulator_put(usb3->supply);
-}
-
 static void __tegra_xusb_remove_ports(struct tegra_xusb_padctl *padctl)
 {
 	struct tegra_xusb_port *port, *tmp;
diff --git a/drivers/phy/tegra/xusb.h b/drivers/phy/tegra/xusb.h
index 8cfbbdbd6e0c0..c384734a61c2a 100644
--- a/drivers/phy/tegra/xusb.h
+++ b/drivers/phy/tegra/xusb.h
@@ -359,7 +359,6 @@ void tegra_xusb_hsic_port_release(struct tegra_xusb_port *port);
 
 struct tegra_xusb_usb3_port {
 	struct tegra_xusb_port base;
-	struct regulator *supply;
 	bool context_saved;
 	unsigned int port;
 	bool internal;
@@ -381,7 +380,6 @@ struct tegra_xusb_usb3_port *
 tegra_xusb_find_usb3_port(struct tegra_xusb_padctl *padctl,
 			  unsigned int index);
 void tegra_xusb_usb3_port_release(struct tegra_xusb_port *port);
-void tegra_xusb_usb3_port_remove(struct tegra_xusb_port *port);
 
 struct tegra_xusb_port_ops {
 	void (*release)(struct tegra_xusb_port *port);
-- 
GitLab


From 846d479224537185768276dd4a84c1bda2bbcd4e Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Tue, 18 Oct 2022 13:58:41 -0400
Subject: [PATCH 0969/4122] doc: phy: Document typical order of API calls

Document the typical order of API calls to used by new drivers and
controllers. Many existing controllers follow this order, but some do
not. This is especially true for controllers designed to work with one
particular PHY driver, which may not need a call to (for example)
phy_init.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Link: https://lore.kernel.org/r/20221018175841.1906611-1-sean.anderson@seco.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/driver-api/phy/phy.rst | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/Documentation/driver-api/phy/phy.rst b/Documentation/driver-api/phy/phy.rst
index 8fc1ce0bb905d..8e8b3e8f95238 100644
--- a/Documentation/driver-api/phy/phy.rst
+++ b/Documentation/driver-api/phy/phy.rst
@@ -94,7 +94,8 @@ Inorder to dereference the private data (in phy_ops), the phy provider driver
 can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
 phy_ops to get back the private data.
 
-4. Getting a reference to the PHY
+Getting a reference to the PHY
+==============================
 
 Before the controller can make use of the PHY, it has to get a reference to
 it. This framework provides the following APIs to get a reference to the PHY.
@@ -130,6 +131,28 @@ the phy_init() and phy_exit() calls, and phy_power_on() and
 phy_power_off() calls are all NOP when applied to a NULL phy. The NULL
 phy is useful in devices for handling optional phy devices.
 
+Order of API calls
+==================
+
+The general order of calls should be::
+
+    [devm_][of_]phy_get()
+    phy_init()
+    phy_power_on()
+    [phy_set_mode[_ext]()]
+    ...
+    phy_power_off()
+    phy_exit()
+    [[of_]phy_put()]
+
+Some PHY drivers may not implement :c:func:`phy_init` or :c:func:`phy_power_on`,
+but controllers should always call these functions to be compatible with other
+PHYs. Some PHYs may require :c:func:`phy_set_mode <phy_set_mode_ext>`, while
+others may use a default mode (typically configured via devicetree or other
+firmware). For compatibility, you should always call this function if you know
+what mode you will be using. Generally, this function should be called after
+:c:func:`phy_power_on`, although some PHY drivers may allow it at any time.
+
 Releasing a reference to the PHY
 ================================
 
-- 
GitLab


From 4eace75e0853273755b878ffa9cce6de84df975a Mon Sep 17 00:00:00 2001
From: Shiraz Saleem <shiraz.saleem@intel.com>
Date: Fri, 4 Nov 2022 18:49:57 -0500
Subject: [PATCH 0970/4122] RDMA/irdma: Report the correct link speed

The active link speed is currently hard-coded in irdma_query_port due
to which the port rate in ibstatus does reflect the active link speed.

Call ib_get_eth_speed in irdma_query_port to get the active link speed.

Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs")
Reported-by: Kamal Heib <kamalheib1@gmail.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Link: https://lore.kernel.org/r/20221104234957.1135-1-shiraz.saleem@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/irdma/verbs.c | 35 +++--------------------------
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index a22afbb25bc58..434241789f123 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -63,36 +63,6 @@ static int irdma_query_device(struct ib_device *ibdev,
 	return 0;
 }
 
-/**
- * irdma_get_eth_speed_and_width - Get IB port speed and width from netdev speed
- * @link_speed: netdev phy link speed
- * @active_speed: IB port speed
- * @active_width: IB port width
- */
-static void irdma_get_eth_speed_and_width(u32 link_speed, u16 *active_speed,
-					  u8 *active_width)
-{
-	if (link_speed <= SPEED_1000) {
-		*active_width = IB_WIDTH_1X;
-		*active_speed = IB_SPEED_SDR;
-	} else if (link_speed <= SPEED_10000) {
-		*active_width = IB_WIDTH_1X;
-		*active_speed = IB_SPEED_FDR10;
-	} else if (link_speed <= SPEED_20000) {
-		*active_width = IB_WIDTH_4X;
-		*active_speed = IB_SPEED_DDR;
-	} else if (link_speed <= SPEED_25000) {
-		*active_width = IB_WIDTH_1X;
-		*active_speed = IB_SPEED_EDR;
-	} else if (link_speed <= SPEED_40000) {
-		*active_width = IB_WIDTH_4X;
-		*active_speed = IB_SPEED_FDR10;
-	} else {
-		*active_width = IB_WIDTH_4X;
-		*active_speed = IB_SPEED_EDR;
-	}
-}
-
 /**
  * irdma_query_port - get port attributes
  * @ibdev: device pointer from stack
@@ -120,8 +90,9 @@ static int irdma_query_port(struct ib_device *ibdev, u32 port,
 		props->state = IB_PORT_DOWN;
 		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
 	}
-	irdma_get_eth_speed_and_width(SPEED_100000, &props->active_speed,
-				      &props->active_width);
+
+	ib_get_eth_speed(ibdev, port, &props->active_speed,
+			 &props->active_width);
 
 	if (rdma_protocol_roce(ibdev, 1)) {
 		props->gid_tbl_len = 32;
-- 
GitLab


From ece43fad220ba03c529cc0f6f302d796044e8476 Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Mon, 7 Nov 2022 10:18:43 +0800
Subject: [PATCH 0971/4122] RDMA/erdma: Extend access right field of FRMR and
 REG MR to support atomic

To support atomic operations, IB_ACCESS_REMOTE_ATOMIC right should be
passed to hardware for permission check. Since "access mode" field in FRMR
SQE and RegMr command is never used by hw, we remove the "access mode"
field, so that we can then have enough space to extend access fields.

Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221107021845.44598-2-chengyou@linux.alibaba.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/erdma/erdma_hw.h    |  6 ++----
 drivers/infiniband/hw/erdma/erdma_qp.c    |  3 +--
 drivers/infiniband/hw/erdma/erdma_verbs.c |  3 +--
 drivers/infiniband/hw/erdma/erdma_verbs.h | 12 +++++++-----
 4 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index e788887732e1f..2a9a4c73d52cd 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -224,8 +224,7 @@ struct erdma_cmdq_create_cq_req {
 /* regmr cfg1 */
 #define ERDMA_CMD_REGMR_PD_MASK GENMASK(31, 12)
 #define ERDMA_CMD_REGMR_TYPE_MASK GENMASK(7, 6)
-#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 2)
-#define ERDMA_CMD_REGMR_ACC_MODE_MASK GENMASK(1, 0)
+#define ERDMA_CMD_REGMR_RIGHT_MASK GENMASK(5, 1)
 
 /* regmr cfg2 */
 #define ERDMA_CMD_REGMR_PAGESIZE_MASK GENMASK(31, 27)
@@ -370,8 +369,7 @@ struct erdma_rqe {
 #define ERDMA_SQE_HDR_WQEBB_INDEX_MASK GENMASK_ULL(15, 0)
 
 /* REG MR attrs */
-#define ERDMA_SQE_MR_MODE_MASK GENMASK(1, 0)
-#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 2)
+#define ERDMA_SQE_MR_ACCESS_MASK GENMASK(5, 1)
 #define ERDMA_SQE_MR_MTT_TYPE_MASK GENMASK(7, 6)
 #define ERDMA_SQE_MR_MTT_CNT_MASK GENMASK(31, 12)
 
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 5fe1a339a4354..c7f343173cb90 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -397,8 +397,7 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
 		regmr_sge->addr = cpu_to_le64(mr->ibmr.iova);
 		regmr_sge->length = cpu_to_le32(mr->ibmr.length);
 		regmr_sge->stag = cpu_to_le32(reg_wr(send_wr)->key);
-		attrs = FIELD_PREP(ERDMA_SQE_MR_MODE_MASK, 0) |
-			FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
+		attrs = FIELD_PREP(ERDMA_SQE_MR_ACCESS_MASK, mr->access) |
 			FIELD_PREP(ERDMA_SQE_MR_MTT_CNT_MASK,
 				   mr->mem.mtt_nents);
 
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index 62be98e2b9414..f3bf87f17527f 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -118,8 +118,7 @@ static int regmr_cmd(struct erdma_dev *dev, struct erdma_mr *mr)
 		   FIELD_PREP(ERDMA_CMD_MR_MPT_IDX_MASK, mr->ibmr.lkey >> 8);
 	req.cfg1 = FIELD_PREP(ERDMA_CMD_REGMR_PD_MASK, pd->pdn) |
 		   FIELD_PREP(ERDMA_CMD_REGMR_TYPE_MASK, mr->type) |
-		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access) |
-		   FIELD_PREP(ERDMA_CMD_REGMR_ACC_MODE_MASK, 0);
+		   FIELD_PREP(ERDMA_CMD_REGMR_RIGHT_MASK, mr->access);
 	req.cfg2 = FIELD_PREP(ERDMA_CMD_REGMR_PAGESIZE_MASK,
 			      ilog2(mr->mem.page_size)) |
 		   FIELD_PREP(ERDMA_CMD_REGMR_MTT_TYPE_MASK, mr->mem.mtt_type) |
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index ab6380635e9e6..a5574f0252bb4 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -71,16 +71,18 @@ struct erdma_pd {
 #define ERDMA_MR_INLINE_MTT 0
 #define ERDMA_MR_INDIRECT_MTT 1
 
-#define ERDMA_MR_ACC_LR BIT(0)
-#define ERDMA_MR_ACC_LW BIT(1)
-#define ERDMA_MR_ACC_RR BIT(2)
-#define ERDMA_MR_ACC_RW BIT(3)
+#define ERDMA_MR_ACC_RA BIT(0)
+#define ERDMA_MR_ACC_LR BIT(1)
+#define ERDMA_MR_ACC_LW BIT(2)
+#define ERDMA_MR_ACC_RR BIT(3)
+#define ERDMA_MR_ACC_RW BIT(4)
 
 static inline u8 to_erdma_access_flags(int access)
 {
 	return (access & IB_ACCESS_REMOTE_READ ? ERDMA_MR_ACC_RR : 0) |
 	       (access & IB_ACCESS_LOCAL_WRITE ? ERDMA_MR_ACC_LW : 0) |
-	       (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0);
+	       (access & IB_ACCESS_REMOTE_WRITE ? ERDMA_MR_ACC_RW : 0) |
+	       (access & IB_ACCESS_REMOTE_ATOMIC ? ERDMA_MR_ACC_RA : 0);
 }
 
 struct erdma_mem {
-- 
GitLab


From 71c6925f280ae8cb52eafee2404ae75c176c28ba Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Mon, 7 Nov 2022 10:18:44 +0800
Subject: [PATCH 0972/4122] RDMA/erdma: Report atomic capacity when hardware
 supports atomic feature

Introduce "capacity flags" field at where hardware put all zeros originally
in "query device" response. Using this field, hardware can report atomic
feature if supports.

Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221107021845.44598-3-chengyou@linux.alibaba.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/erdma/erdma.h       | 1 +
 drivers/infiniband/hw/erdma/erdma_hw.h    | 5 +++++
 drivers/infiniband/hw/erdma/erdma_main.c  | 1 +
 drivers/infiniband/hw/erdma/erdma_verbs.c | 4 ++++
 4 files changed, 11 insertions(+)

diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index 730783fbc8949..bb23d897c7108 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -124,6 +124,7 @@ struct erdma_devattr {
 	u32 fw_version;
 
 	unsigned char peer_addr[ETH_ALEN];
+	unsigned long cap_flags;
 
 	int numa_node;
 	enum erdma_cc_alg cc;
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 2a9a4c73d52cd..808e7ee56d931 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -303,6 +303,7 @@ struct erdma_cmdq_destroy_qp_req {
 
 /* cap qword 0 definition */
 #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
+#define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
 #define ERDMA_CMD_DEV_CAP_MAX_RECV_WR_MASK GENMASK_ULL(23, 16)
 #define ERDMA_CMD_DEV_CAP_MAX_MR_SIZE_MASK GENMASK_ULL(7, 0)
 
@@ -314,6 +315,10 @@ struct erdma_cmdq_destroy_qp_req {
 
 #define ERDMA_NQP_PER_QBLOCK 1024
 
+enum {
+	ERDMA_DEV_CAP_FLAGS_ATOMIC = 1 << 7,
+};
+
 #define ERDMA_CMD_INFO0_FW_VER_MASK GENMASK_ULL(31, 0)
 
 /* CQE hdr */
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index 49778bb294ae4..e44b06fea5957 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -374,6 +374,7 @@ static int erdma_dev_attrs_init(struct erdma_dev *dev)
 	dev->attrs.max_qp = ERDMA_NQP_PER_QBLOCK * ERDMA_GET_CAP(QBLOCK, cap1);
 	dev->attrs.max_mr = dev->attrs.max_qp << 1;
 	dev->attrs.max_cq = dev->attrs.max_qp << 1;
+	dev->attrs.cap_flags = ERDMA_GET_CAP(FLAGS, cap0);
 
 	dev->attrs.max_send_wr = ERDMA_MAX_SEND_WR;
 	dev->attrs.max_ord = ERDMA_MAX_ORD;
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index f3bf87f17527f..d843ce1f35f3d 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -288,6 +288,10 @@ int erdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
 	attr->max_mw = dev->attrs.max_mw;
 	attr->max_fast_reg_page_list_len = ERDMA_MAX_FRMR_PA;
 	attr->page_size_cap = ERDMA_PAGE_SIZE_SUPPORT;
+
+	if (dev->attrs.cap_flags & ERDMA_DEV_CAP_FLAGS_ATOMIC)
+		attr->atomic_cap = IB_ATOMIC_GLOB;
+
 	attr->fw_ver = dev->attrs.fw_version;
 
 	if (dev->netdev)
-- 
GitLab


From 0ca9c2e2844aa285c3656a29d4803839cfa8bca9 Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Mon, 7 Nov 2022 10:18:45 +0800
Subject: [PATCH 0973/4122] RDMA/erdma: Implement atomic operations support

Add atomic operations support in post_send and poll_cq implementation.
Also, rename 'laddr' and 'lkey' in struct erdma_sge to 'addr' and 'key',
because this structure is used for both local and remote SGEs.

Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Link: https://lore.kernel.org/r/20221107021845.44598-4-chengyou@linux.alibaba.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/erdma/erdma_cq.c |  2 ++
 drivers/infiniband/hw/erdma/erdma_hw.h | 18 ++++++++++--
 drivers/infiniband/hw/erdma/erdma_qp.c | 40 ++++++++++++++++++++++----
 3 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_cq.c b/drivers/infiniband/hw/erdma/erdma_cq.c
index 58e0dc5c75d1d..cabd8678b3558 100644
--- a/drivers/infiniband/hw/erdma/erdma_cq.c
+++ b/drivers/infiniband/hw/erdma/erdma_cq.c
@@ -64,6 +64,8 @@ static const enum ib_wc_opcode wc_mapping_table[ERDMA_NUM_OPCODES] = {
 	[ERDMA_OP_REG_MR] = IB_WC_REG_MR,
 	[ERDMA_OP_LOCAL_INV] = IB_WC_LOCAL_INV,
 	[ERDMA_OP_READ_WITH_INV] = IB_WC_RDMA_READ,
+	[ERDMA_OP_ATOMIC_CAS] = IB_WC_COMP_SWAP,
+	[ERDMA_OP_ATOMIC_FAD] = IB_WC_FETCH_ADD,
 };
 
 static const struct {
diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 808e7ee56d931..1b2e2b70678f5 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -344,9 +344,9 @@ struct erdma_cqe {
 };
 
 struct erdma_sge {
-	__aligned_le64 laddr;
+	__aligned_le64 addr;
 	__le32 length;
-	__le32 lkey;
+	__le32 key;
 };
 
 /* Receive Queue Element */
@@ -413,6 +413,16 @@ struct erdma_readreq_sqe {
 	__le32 rsvd;
 };
 
+struct erdma_atomic_sqe {
+	__le64 hdr;
+	__le64 rsvd;
+	__le64 fetchadd_swap_data;
+	__le64 cmp_data;
+
+	struct erdma_sge remote;
+	struct erdma_sge sgl;
+};
+
 struct erdma_reg_mr_sqe {
 	__le64 hdr;
 	__le64 addr;
@@ -472,7 +482,9 @@ enum erdma_opcode {
 	ERDMA_OP_REG_MR = 14,
 	ERDMA_OP_LOCAL_INV = 15,
 	ERDMA_OP_READ_WITH_INV = 16,
-	ERDMA_NUM_OPCODES = 17,
+	ERDMA_OP_ATOMIC_CAS = 17,
+	ERDMA_OP_ATOMIC_FAD = 18,
+	ERDMA_NUM_OPCODES = 19,
 	ERDMA_OP_INVALID = ERDMA_NUM_OPCODES + 1
 };
 
diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index c7f343173cb90..521e97258de77 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -285,15 +285,16 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
 	u32 wqe_size, wqebb_cnt, hw_op, flags, sgl_offset;
 	u32 idx = *pi & (qp->attrs.sq_size - 1);
 	enum ib_wr_opcode op = send_wr->opcode;
+	struct erdma_atomic_sqe *atomic_sqe;
 	struct erdma_readreq_sqe *read_sqe;
 	struct erdma_reg_mr_sqe *regmr_sge;
 	struct erdma_write_sqe *write_sqe;
 	struct erdma_send_sqe *send_sqe;
 	struct ib_rdma_wr *rdma_wr;
-	struct erdma_mr *mr;
+	struct erdma_sge *sge;
 	__le32 *length_field;
+	struct erdma_mr *mr;
 	u64 wqe_hdr, *entry;
-	struct ib_sge *sge;
 	u32 attrs;
 	int ret;
 
@@ -360,9 +361,9 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
 
 		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
 				      qp->attrs.sq_size, SQEBB_SHIFT);
-		sge->addr = rdma_wr->remote_addr;
-		sge->lkey = rdma_wr->rkey;
-		sge->length = send_wr->sg_list[0].length;
+		sge->addr = cpu_to_le64(rdma_wr->remote_addr);
+		sge->key = cpu_to_le32(rdma_wr->rkey);
+		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
 		wqe_size = sizeof(struct erdma_readreq_sqe) +
 			   send_wr->num_sge * sizeof(struct ib_sge);
 
@@ -423,6 +424,35 @@ static int erdma_push_one_sqe(struct erdma_qp *qp, u16 *pi,
 		regmr_sge->stag = cpu_to_le32(send_wr->ex.invalidate_rkey);
 		wqe_size = sizeof(struct erdma_reg_mr_sqe);
 		goto out;
+	case IB_WR_ATOMIC_CMP_AND_SWP:
+	case IB_WR_ATOMIC_FETCH_AND_ADD:
+		atomic_sqe = (struct erdma_atomic_sqe *)entry;
+		if (op == IB_WR_ATOMIC_CMP_AND_SWP) {
+			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+					      ERDMA_OP_ATOMIC_CAS);
+			atomic_sqe->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(send_wr)->swap);
+			atomic_sqe->cmp_data =
+				cpu_to_le64(atomic_wr(send_wr)->compare_add);
+		} else {
+			wqe_hdr |= FIELD_PREP(ERDMA_SQE_HDR_OPCODE_MASK,
+					      ERDMA_OP_ATOMIC_FAD);
+			atomic_sqe->fetchadd_swap_data =
+				cpu_to_le64(atomic_wr(send_wr)->compare_add);
+		}
+
+		sge = get_queue_entry(qp->kern_qp.sq_buf, idx + 1,
+				      qp->attrs.sq_size, SQEBB_SHIFT);
+		sge->addr = cpu_to_le64(atomic_wr(send_wr)->remote_addr);
+		sge->key = cpu_to_le32(atomic_wr(send_wr)->rkey);
+		sge++;
+
+		sge->addr = cpu_to_le64(send_wr->sg_list[0].addr);
+		sge->key = cpu_to_le32(send_wr->sg_list[0].lkey);
+		sge->length = cpu_to_le32(send_wr->sg_list[0].length);
+
+		wqe_size = sizeof(*atomic_sqe);
+		goto out;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
GitLab


From c8a51f03503633ec4a3f390aaadc3e8959fa44de Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 3 Nov 2022 22:28:17 +0200
Subject: [PATCH 0974/4122] gpio: Add Generic regmap GPIO conversion to the
 TODO list

It's actually preferable to use Generic regmap GPIO over other
simple approaches. Add a TODO item for that.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/TODO | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/gpio/TODO b/drivers/gpio/TODO
index f87ff3fa8a531..76560744587a5 100644
--- a/drivers/gpio/TODO
+++ b/drivers/gpio/TODO
@@ -124,6 +124,13 @@ Work items:
   this with dry-coding and sending to maintainers to test
 
 
+Generic regmap GPIO
+
+In the very similar way to Generic MMIO GPIO convert the users which can
+take advantage of using regmap over direct IO accessors. Note, even in
+MMIO case the regmap MMIO with gpio-regmap.c is preferable over gpio-mmio.c.
+
+
 GPIOLIB irqchip
 
 The GPIOLIB irqchip is a helper irqchip for "simple cases" that should
-- 
GitLab


From 46af287cd5d7de316f1afd7966b27fc4369c6c35 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 4 Nov 2022 10:39:13 +0100
Subject: [PATCH 0975/4122] bus: mhi: host: pci_generic: add support for
 sc8280xp-crd SDX55 variant

The SC8280XP Compute Reference Design (CRD) has an on-PCB SDX55 modem
which uses MBIM.

The exact channel configuration is not known but the Foxconn SDX55
configuration allows the modem to be used so reuse that one for now.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/20221104093913.23347-1-johan+linaro@kernel.org
[mani: modified the subject to format "bus: mhi: host"]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/pci_generic.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index c4259cb2d2899..fb3b050aed70e 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -544,6 +544,8 @@ static const struct mhi_pci_dev_info mhi_telit_fn990_info = {
 static const struct pci_device_id mhi_pci_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0304),
 		.driver_data = (kernel_ulong_t) &mhi_qcom_sdx24_info },
+	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, PCI_VENDOR_ID_QCOM, 0x010c),
+		.driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
 	/* EM919x (sdx55), use the same vid:pid as qcom-sdx55m */
 	{ PCI_DEVICE_SUB(PCI_VENDOR_ID_QCOM, 0x0306, 0x18d7, 0x0200),
 		.driver_data = (kernel_ulong_t) &mhi_sierra_em919x_info },
-- 
GitLab


From a5cfc9d65879c0d377f732531a2e80ee3a9eebbc Mon Sep 17 00:00:00 2001
From: Rajat Khandelwal <rajat.khandelwal@intel.com>
Date: Tue, 1 Nov 2022 17:20:42 +0530
Subject: [PATCH 0976/4122] thunderbolt: Add wake on connect/disconnect on USB4
 ports

Wake on connect/disconnect is only supported while runtime suspend for
now, which is obviously necessary. It is also not inherently desired for
the system to wakeup on Thunderbolt/USB4 hot plug events. However, we
can still make user in control of waking up the system in the events of
hot plug/unplug.

This patch adds 'wakeup' attribute under 'usb4_portX/power' sysfs
attribute and only enables wakes on connect/disconnect to the respective
port when 'wakeup' is set to 'enabled'. The attribute is set to
'disabled' by default.

Signed-off-by: Rajat Khandelwal <rajat.khandelwal@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/thunderbolt/tb_regs.h   |  2 ++
 drivers/thunderbolt/usb4.c      | 33 +++++++++++++++++++++++++--------
 drivers/thunderbolt/usb4_port.c |  3 +++
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/thunderbolt/tb_regs.h b/drivers/thunderbolt/tb_regs.h
index 86319dca0f8ca..3c38b0cb8f74e 100644
--- a/drivers/thunderbolt/tb_regs.h
+++ b/drivers/thunderbolt/tb_regs.h
@@ -361,6 +361,8 @@ struct tb_regs_port_header {
 #define PORT_CS_18_BE				BIT(8)
 #define PORT_CS_18_TCM				BIT(9)
 #define PORT_CS_18_CPS				BIT(10)
+#define PORT_CS_18_WOCS				BIT(16)
+#define PORT_CS_18_WODS				BIT(17)
 #define PORT_CS_18_WOU4S			BIT(18)
 #define PORT_CS_19				0x13
 #define PORT_CS_19_PC				BIT(3)
diff --git a/drivers/thunderbolt/usb4.c b/drivers/thunderbolt/usb4.c
index f986854aa207a..2ed50fcbcca7a 100644
--- a/drivers/thunderbolt/usb4.c
+++ b/drivers/thunderbolt/usb4.c
@@ -155,6 +155,8 @@ static inline int usb4_switch_op_data(struct tb_switch *sw, u16 opcode,
 
 static void usb4_switch_check_wakes(struct tb_switch *sw)
 {
+	bool wakeup_usb4 = false;
+	struct usb4_port *usb4;
 	struct tb_port *port;
 	bool wakeup = false;
 	u32 val;
@@ -173,20 +175,31 @@ static void usb4_switch_check_wakes(struct tb_switch *sw)
 		wakeup = val & (ROUTER_CS_6_WOPS | ROUTER_CS_6_WOUS);
 	}
 
-	/* Check for any connected downstream ports for USB4 wake */
+	/*
+	 * Check for any downstream ports for USB4 wake,
+	 * connection wake and disconnection wake.
+	 */
 	tb_switch_for_each_port(sw, port) {
-		if (!tb_port_has_remote(port))
+		if (!port->cap_usb4)
 			continue;
 
 		if (tb_port_read(port, &val, TB_CFG_PORT,
 				 port->cap_usb4 + PORT_CS_18, 1))
 			break;
 
-		tb_port_dbg(port, "USB4 wake: %s\n",
-			    (val & PORT_CS_18_WOU4S) ? "yes" : "no");
+		tb_port_dbg(port, "USB4 wake: %s, connection wake: %s, disconnection wake: %s\n",
+			    (val & PORT_CS_18_WOU4S) ? "yes" : "no",
+			    (val & PORT_CS_18_WOCS) ? "yes" : "no",
+			    (val & PORT_CS_18_WODS) ? "yes" : "no");
+
+		wakeup_usb4 = val & (PORT_CS_18_WOU4S | PORT_CS_18_WOCS |
+				     PORT_CS_18_WODS);
+
+		usb4 = port->usb4;
+		if (device_may_wakeup(&usb4->dev) && wakeup_usb4)
+			pm_wakeup_event(&usb4->dev, 0);
 
-		if (val & PORT_CS_18_WOU4S)
-			wakeup = true;
+		wakeup |= wakeup_usb4;
 	}
 
 	if (wakeup)
@@ -366,6 +379,7 @@ bool usb4_switch_lane_bonding_possible(struct tb_switch *sw)
  */
 int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags)
 {
+	struct usb4_port *usb4;
 	struct tb_port *port;
 	u64 route = tb_route(sw);
 	u32 val;
@@ -395,10 +409,13 @@ int usb4_switch_set_wake(struct tb_switch *sw, unsigned int flags)
 			val |= PORT_CS_19_WOU4;
 		} else {
 			bool configured = val & PORT_CS_19_PC;
+			usb4 = port->usb4;
 
-			if ((flags & TB_WAKE_ON_CONNECT) && !configured)
+			if (((flags & TB_WAKE_ON_CONNECT) |
+			      device_may_wakeup(&usb4->dev)) && !configured)
 				val |= PORT_CS_19_WOC;
-			if ((flags & TB_WAKE_ON_DISCONNECT) && configured)
+			if (((flags & TB_WAKE_ON_DISCONNECT) |
+			      device_may_wakeup(&usb4->dev)) && configured)
 				val |= PORT_CS_19_WOD;
 			if ((flags & TB_WAKE_ON_USB4) && configured)
 				val |= PORT_CS_19_WOU4;
diff --git a/drivers/thunderbolt/usb4_port.c b/drivers/thunderbolt/usb4_port.c
index 1a30c0a232865..e355bfd6343ff 100644
--- a/drivers/thunderbolt/usb4_port.c
+++ b/drivers/thunderbolt/usb4_port.c
@@ -284,6 +284,9 @@ struct usb4_port *usb4_port_device_add(struct tb_port *port)
 		}
 	}
 
+	if (!tb_is_upstream_port(port))
+		device_set_wakeup_capable(&usb4->dev, true);
+
 	pm_runtime_no_callbacks(&usb4->dev);
 	pm_runtime_set_active(&usb4->dev);
 	pm_runtime_enable(&usb4->dev);
-- 
GitLab


From 58635d6615f1e5a870548ae8999870fdfcdecec0 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Mon, 7 Nov 2022 13:12:21 +0100
Subject: [PATCH 0977/4122] s390/mm: fix virtual-physical address confusion for
 swiotlb

swiotlb passes virtual addresses to set_memory_encrypted() and
set_memory_decrypted(), but uv_remove_shared() and uv_set_shared()
expect physical addresses. This currently works, because virtual
and physical addresses are the same.

Add virt_to_phys() to resolve the virtual-physical confusion.

Reported-by: Marc Hartmayer <mhartmay@linux.ibm.com>
Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Link: https://lore.kernel.org/r/20221107121221.156274-2-nrb@linux.ibm.com
Message-Id: <20221107121221.156274-2-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/mem_encrypt.h |  4 ++--
 arch/s390/mm/init.c                 | 12 ++++++------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/mem_encrypt.h b/arch/s390/include/asm/mem_encrypt.h
index 08a8b96606d7e..b85e13505a0f2 100644
--- a/arch/s390/include/asm/mem_encrypt.h
+++ b/arch/s390/include/asm/mem_encrypt.h
@@ -4,8 +4,8 @@
 
 #ifndef __ASSEMBLY__
 
-int set_memory_encrypted(unsigned long addr, int numpages);
-int set_memory_decrypted(unsigned long addr, int numpages);
+int set_memory_encrypted(unsigned long vaddr, int numpages);
+int set_memory_decrypted(unsigned long vaddr, int numpages);
 
 #endif	/* __ASSEMBLY__ */
 
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 97d66a3e60fb2..d509656c67d77 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -140,25 +140,25 @@ void mark_rodata_ro(void)
 	debug_checkwx();
 }
 
-int set_memory_encrypted(unsigned long addr, int numpages)
+int set_memory_encrypted(unsigned long vaddr, int numpages)
 {
 	int i;
 
 	/* make specified pages unshared, (swiotlb, dma_free) */
 	for (i = 0; i < numpages; ++i) {
-		uv_remove_shared(addr);
-		addr += PAGE_SIZE;
+		uv_remove_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
 	}
 	return 0;
 }
 
-int set_memory_decrypted(unsigned long addr, int numpages)
+int set_memory_decrypted(unsigned long vaddr, int numpages)
 {
 	int i;
 	/* make specified pages shared (swiotlb, dma_alloca) */
 	for (i = 0; i < numpages; ++i) {
-		uv_set_shared(addr);
-		addr += PAGE_SIZE;
+		uv_set_shared(virt_to_phys((void *)vaddr));
+		vaddr += PAGE_SIZE;
 	}
 	return 0;
 }
-- 
GitLab


From d8425a8c3a8419dd505016951dd6393f21fb394e Mon Sep 17 00:00:00 2001
From: Song Fuchang <song.fc@gmail.com>
Date: Mon, 7 Nov 2022 19:18:35 +0530
Subject: [PATCH 0978/4122] bus: mhi: host: pci_generic: Add HP variant of
 T99W175

The Foxconn T99W175 modem has an HP variant, which has
the following output from lspci:

01:00.0 Wireless controller [0d40]: Device 03f0:0a6c

It also has some HP-specific serial numbers on the
metal case. It works well with this driver, so add
support for this to the pci_generic driver.

Signed-off-by: Song Fuchang <song.fc@gmail.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
[mani: manually applied the patch]
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/pci_generic.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index fb3b050aed70e..b58a303678964 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -598,6 +598,9 @@ static const struct pci_device_id mhi_pci_id_table[] = {
 	/* MV32-WB (Cinterion) */
 	{ PCI_DEVICE(0x1269, 0x00bb),
 		.driver_data = (kernel_ulong_t) &mhi_mv32_info },
+	/* T99W175 (sdx55), HP variant */
+	{ PCI_DEVICE(0x03f0, 0x0a6c),
+		.driver_data = (kernel_ulong_t) &mhi_foxconn_sdx55_info },
 	{  }
 };
 MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
-- 
GitLab


From a9cd6c6766857212894dd736d9f2bc29f1416f6a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 4 Nov 2022 16:44:52 -0300
Subject: [PATCH 0979/4122] perf trace: Add BPF augmenter to
 perf_event_open()'s 'struct perf_event_attr' arg

Using BPF for that, doing a cleverish reuse of perf_event_attr__fprintf(),
that really needs to be turned into __snprintf(), etc.

But since the plan is to go the BTF way probably use libbpf's
btf_dump__dump_type_data().

Example:

[root@quaco ~]# perf trace -e ~acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.c,perf_event_open --max-events 10 perf stat --quiet sleep 0.001
fg
     0.000 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
     0.067 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x3, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 4
     0.120 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 5
     0.172 perf_event_open(attr_uptr: { type: 1, size: 128, config: 0x2, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 7
     0.190 perf_event_open(attr_uptr: { size: 128, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 8
     0.199 perf_event_open(attr_uptr: { size: 128, config: 0x1, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 9
     0.204 perf_event_open(attr_uptr: { size: 128, config: 0x4, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 10
     0.210 perf_event_open(attr_uptr: { size: 128, config: 0x5, sample_type: IDENTIFIER, read_format: TOTAL_TIME_ENABLED|TOTAL_TIME_RUNNING, disabled: 1, inherit: 1, enable_on_exec: 1, exclude_guest: 1 }, pid: 258859 (perf), cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = 11
[root@quaco ~]#

Suggested-by: Ian Rogers <irogers@google.com>
Tested-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/Y2V2Tpu+2vzJyon2@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c                    |  3 +-
 .../examples/bpf/augmented_raw_syscalls.c     | 44 +++++++++++++++++++
 tools/perf/trace/beauty/perf_event_open.c     | 44 +++++++++++++++++++
 3 files changed, 90 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 72991528687ea..5690c33c523b3 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1053,7 +1053,8 @@ static struct syscall_fmt syscall_fmts[] = {
 	  .arg = { [0] = { .scnprintf = SCA_FDAT,	/* dfd */ },
 		   [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, },
 	{ .name	    = "perf_event_open",
-	  .arg = { [2] = { .scnprintf = SCA_INT,	/* cpu */ },
+	  .arg = { [0] = { .scnprintf = SCA_PERF_ATTR,  /* attr */ },
+		   [2] = { .scnprintf = SCA_INT,	/* cpu */ },
 		   [3] = { .scnprintf = SCA_FD,		/* group_fd */ },
 		   [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, },
 	{ .name	    = "pipe2",
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 926238efd7d8d..0599823e8ae1b 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -129,6 +129,7 @@ struct augmented_args_payload {
 			struct augmented_arg arg, arg2;
 		};
 		struct sockaddr_storage saddr;
+		char   __data[sizeof(struct augmented_arg)];
 	};
 };
 
@@ -293,6 +294,49 @@ int sys_enter_renameat(struct syscall_enter_args *args)
 	return augmented__output(args, augmented_args, len);
 }
 
+#define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
+
+// we need just the start, get the size to then copy it
+struct perf_event_attr_size {
+        __u32                   type;
+        /*
+         * Size of the attr structure, for fwd/bwd compat.
+         */
+        __u32                   size;
+};
+
+SEC("!syscalls:sys_enter_perf_event_open")
+int sys_enter_perf_event_open(struct syscall_enter_args *args)
+{
+	struct augmented_args_payload *augmented_args = augmented_args_payload();
+	const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read;
+	unsigned int len = sizeof(augmented_args->args);
+
+        if (augmented_args == NULL)
+		goto failure;
+
+	if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0)
+		goto failure;
+
+	attr_read = (const struct perf_event_attr_size *)augmented_args->__data;
+
+	__u32 size = attr_read->size;
+
+	if (!size)
+		size = PERF_ATTR_SIZE_VER0;
+
+	if (size > sizeof(augmented_args->__data))
+                goto failure;
+
+	// Now that we read attr->size and tested it against the size limits, read it completely
+	if (bpf_probe_read(&augmented_args->__data, size, attr) < 0)
+		goto failure;
+
+	return augmented__output(args, augmented_args, len + size);
+failure:
+	return 1; /* Failure: don't filter */
+}
+
 static pid_t getpid(void)
 {
 	return bpf_get_current_pid_tgid();
diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c
index 11d47dbe63bd1..01ee15fe9d0c7 100644
--- a/tools/perf/trace/beauty/perf_event_open.c
+++ b/tools/perf/trace/beauty/perf_event_open.c
@@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
 }
 
 #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+
+struct attr_fprintf_args {
+	size_t size, printed;
+	char *bf;
+	bool first;
+};
+
+static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv)
+{
+	struct attr_fprintf_args *args = priv;
+	size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val);
+
+	args->first = false;
+	args->printed += printed;
+	return printed;
+}
+
+static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused)
+{
+	struct attr_fprintf_args args = {
+		.printed = scnprintf(bf, size, "{ "),
+		.size    = size,
+		.first   = true,
+		.bf	 = bf,
+	};
+
+	perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args);
+	return args.printed + scnprintf(bf + args.printed, size - args.printed, " }");
+}
+
+static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size)
+{
+	return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros);
+}
+
+static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg)
+{
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size);
+
+	return scnprintf(bf, size, "%#lx", arg->val);
+}
+
+#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr
-- 
GitLab


From 95e7fc84c78adeef654acb919f04d98a87e6372d Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Tue, 1 Nov 2022 16:24:42 +0800
Subject: [PATCH 0980/4122] dt-bindings: gpio: add entry for
 hisilicon,ascend910-gpio

Add the new compatible for HiSilicon gpio controller driver.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 .../gpio/hisilicon,ascend910-gpio.yaml        | 56 +++++++++++++++++++
 MAINTAINERS                                   |  1 +
 2 files changed, 57 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml

diff --git a/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml
new file mode 100644
index 0000000000000..735d97d645a09
--- /dev/null
+++ b/Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/hisilicon,ascend910-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: HiSilicon common GPIO controller
+
+maintainers:
+  - Jay Fang <f.fangjian@huawei.com>
+
+description:
+  The HiSilicon common GPIO controller can be used for many different
+  types of SoC such as Huawei Ascend AI series chips.
+
+properties:
+  compatible:
+    const: hisilicon,ascend910-gpio
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  ngpios:
+    minimum: 1
+    maximum: 32
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - gpio-controller
+  - "#gpio-cells"
+  - ngpios
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    gpio@840d0000 {
+      compatible = "hisilicon,ascend910-gpio";
+      reg = <0x840d0000 0x1000>;
+      ngpios = <32>;
+      gpio-controller;
+      #gpio-cells = <2>;
+      interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 74efa0492c430..02f333c1093eb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9198,6 +9198,7 @@ HISILICON GPIO DRIVER
 M:	Jay Fang <f.fangjian@huawei.com>
 L:	linux-gpio@vger.kernel.org
 S:	Maintained
+F:	Documentation/devicetree/bindings/gpio/hisilicon,ascend910-gpio.yaml
 F:	drivers/gpio/gpio-hisi.c
 
 HISILICON HIGH PERFORMANCE RSA ENGINE DRIVER (HPRE)
-- 
GitLab


From 80280df758c1498485988b30cf6887fde7796056 Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Tue, 1 Nov 2022 16:24:41 +0800
Subject: [PATCH 0981/4122] gpio: hisi: Add initial device tree support

Add support for HiSilicon GPIO controller in embedded platform, which
boot from devicetree.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Acked-by: Jay Fang <f.fangjian@huawei.com>
Reviewed-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig     | 2 +-
 drivers/gpio/gpio-hisi.c | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 8c756cb292140..4bfedb0109a76 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -319,7 +319,7 @@ config GPIO_GRGPIO
 
 config GPIO_HISI
 	tristate "HiSilicon GPIO controller driver"
-	depends on (ARM64 && ACPI) || COMPILE_TEST
+	depends on ARM64 || COMPILE_TEST
 	select GPIO_GENERIC
 	select GPIOLIB_IRQCHIP
 	help
diff --git a/drivers/gpio/gpio-hisi.c b/drivers/gpio/gpio-hisi.c
index 3caabef5c7a2e..55bd69043bf42 100644
--- a/drivers/gpio/gpio-hisi.c
+++ b/drivers/gpio/gpio-hisi.c
@@ -221,6 +221,12 @@ static const struct acpi_device_id hisi_gpio_acpi_match[] = {
 };
 MODULE_DEVICE_TABLE(acpi, hisi_gpio_acpi_match);
 
+static const struct of_device_id hisi_gpio_dts_match[] = {
+	{ .compatible = "hisilicon,ascend910-gpio", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, hisi_gpio_dts_match);
+
 static void hisi_gpio_get_pdata(struct device *dev,
 				struct hisi_gpio *hisi_gpio)
 {
@@ -311,6 +317,7 @@ static struct platform_driver hisi_gpio_driver = {
 	.driver		= {
 		.name	= HISI_GPIO_DRIVER_NAME,
 		.acpi_match_table = hisi_gpio_acpi_match,
+		.of_match_table = hisi_gpio_dts_match,
 	},
 	.probe		= hisi_gpio_probe,
 };
-- 
GitLab


From 9f0b4cc174c3c5ceb9322d01372369610f327c42 Mon Sep 17 00:00:00 2001
From: Yipeng Zou <zouyipeng@huawei.com>
Date: Fri, 4 Nov 2022 11:24:30 +0800
Subject: [PATCH 0982/4122] PCI/ACPI: Use METHOD_NAME__UID instead of plain
 string

Replace the string "_UID" with the METHOD_NAME__UID macro so instances are
easier to find.

Link: https://lore.kernel.org/r/20221104032430.186424-1-zouyipeng@huawei.com
Signed-off-by: Yipeng Zou <zouyipeng@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci-acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c
index a46fec776ad77..068d6745bf98c 100644
--- a/drivers/pci/pci-acpi.c
+++ b/drivers/pci/pci-acpi.c
@@ -67,7 +67,7 @@ static acpi_status acpi_match_rc(acpi_handle handle, u32 lvl, void *context,
 	unsigned long long uid;
 	acpi_status status;
 
-	status = acpi_evaluate_integer(handle, "_UID", NULL, &uid);
+	status = acpi_evaluate_integer(handle, METHOD_NAME__UID, NULL, &uid);
 	if (ACPI_FAILURE(status) || uid != *segment)
 		return AE_CTRL_DEPTH;
 
-- 
GitLab


From 44e985938e85503d0a69ec538e15fd33c1a4df05 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 7 Nov 2022 15:31:08 -0600
Subject: [PATCH 0983/4122] Revert "PCI: Clear PCI_STATUS when setting up
 device"

This reverts commit 6cd514e58f12b211d638dbf6f791fa18d854f09c.

Christophe Fergeau reported that 6cd514e58f12 ("PCI: Clear PCI_STATUS when
setting up device") causes boot failures when trying to start linux guests
with Apple's virtualization framework (for example using
https://developer.apple.com/documentation/virtualization/running_linux_in_a_virtual_machine?language=objc)

6cd514e58f12 only solved a cosmetic problem, so revert it to fix the boot
failures.

Link: https://bugzilla.redhat.com/show_bug.cgi?id=2137803
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/probe.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index b66fa42c4b1fa..1d6f7b502020d 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1891,9 +1891,6 @@ int pci_setup_device(struct pci_dev *dev)
 
 	dev->broken_intx_masking = pci_intx_mask_broken(dev);
 
-	/* Clear errors left from system firmware */
-	pci_write_config_word(dev, PCI_STATUS, 0xffff);
-
 	switch (dev->hdr_type) {		    /* header type */
 	case PCI_HEADER_TYPE_NORMAL:		    /* standard header */
 		if (class == PCI_CLASS_BRIDGE_PCI)
-- 
GitLab


From bc77fb9ce40c276cedf889dca2bc6d1b1edc2763 Mon Sep 17 00:00:00 2001
From: Keoseong Park <keosung.park@samsung.com>
Date: Fri, 28 Oct 2022 16:35:53 +0900
Subject: [PATCH 0984/4122] scsi: ufs: core: Refactor ufshcd_hba_enable()

Use "if error return" style in ufshcd_hba_enable().  No functional change.

Cc: Bart Van Assche <bvanassche@acm.org>
Cc: Alim Akhtar <alim.akhtar@samsung.com>
Signed-off-by: Keoseong Park <keosung.park@samsung.com>
Link: https://lore.kernel.org/r/20221028073553epcms2p6dc4f8bdbebdc8f96f43fc4197b3edd0c@epcms2p6
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index ee73d70361331..0591d05c078a3 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -4668,14 +4668,18 @@ int ufshcd_hba_enable(struct ufs_hba *hba)
 		/* enable UIC related interrupts */
 		ufshcd_enable_intr(hba, UFSHCD_UIC_MASK);
 		ret = ufshcd_dme_reset(hba);
-		if (!ret) {
-			ret = ufshcd_dme_enable(hba);
-			if (!ret)
-				ufshcd_vops_hce_enable_notify(hba, POST_CHANGE);
-			if (ret)
-				dev_err(hba->dev,
-					"Host controller enable failed with non-hce\n");
+		if (ret) {
+			dev_err(hba->dev, "DME_RESET failed\n");
+			return ret;
 		}
+
+		ret = ufshcd_dme_enable(hba);
+		if (ret) {
+			dev_err(hba->dev, "Enabling DME failed\n");
+			return ret;
+		}
+
+		ufshcd_vops_hce_enable_notify(hba, POST_CHANGE);
 	} else {
 		ret = ufshcd_hba_execute_hce(hba);
 	}
-- 
GitLab


From e47c49219c1e20760cd66cef4411b35a3a86c0a2 Mon Sep 17 00:00:00 2001
From: Jilin Yuan <yuanjilin@cdjrlc.com>
Date: Fri, 28 Oct 2022 21:37:08 +0800
Subject: [PATCH 0985/4122] scsi: NCR5380: Fix repeated words in comment

Delete the redundant word 'the'.

Signed-off-by: Jilin Yuan <yuanjilin@cdjrlc.com>
Link: https://lore.kernel.org/r/20221028133708.60030-1-yuanjilin@cdjrlc.com
Acked-by: Finn Thain <fthain@linux-m68k.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/NCR5380.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/NCR5380.c b/drivers/scsi/NCR5380.c
index dece7d9eb4d36..ca85bddb582b4 100644
--- a/drivers/scsi/NCR5380.c
+++ b/drivers/scsi/NCR5380.c
@@ -858,7 +858,7 @@ static void NCR5380_dma_complete(struct Scsi_Host *instance)
  * latency, but a bus reset will reset chip logic. Checking for parity error
  * is unnecessary because that interrupt is never enabled. A Loss of BSY
  * condition will clear DMA Mode. We can tell when this occurs because the
- * the Busy Monitor interrupt is enabled together with DMA Mode.
+ * Busy Monitor interrupt is enabled together with DMA Mode.
  */
 
 static irqreturn_t __maybe_unused NCR5380_intr(int irq, void *dev_id)
-- 
GitLab


From c7cbaab2d464484008b4f8ed66b6bb0ee99745c2 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 31 Oct 2022 15:45:38 +0000
Subject: [PATCH 0986/4122] scsi: message: fusion: Remove variable 'where'

Variable 'where' is just being incremented and it's never used
anywhere else. Remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221031154538.870223-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/message/fusion/mptctl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/message/fusion/mptctl.c b/drivers/message/fusion/mptctl.c
index 52c7020c9d191..1decd09a08d81 100644
--- a/drivers/message/fusion/mptctl.c
+++ b/drivers/message/fusion/mptctl.c
@@ -2879,7 +2879,6 @@ static struct mpt_pci_driver mptctl_driver = {
 static int __init mptctl_init(void)
 {
 	int err;
-	int where = 1;
 
 	show_mptmod_ver(my_NAME, my_VERSION);
 
@@ -2898,7 +2897,6 @@ static int __init mptctl_init(void)
 	/*
 	 *  Install our handler
 	 */
-	++where;
 	mptctl_id = mpt_register(mptctl_reply, MPTCTL_DRIVER,
 	    "mptctl_reply");
 	if (!mptctl_id || mptctl_id >= MPT_MAX_PROTOCOL_DRIVERS) {
-- 
GitLab


From 81cb3eb68af5d0bee61ea45a72a0e6e3862b246f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 31 Oct 2022 16:05:12 +0000
Subject: [PATCH 0987/4122] scsi: BusLogic: Remove variable 'adapter_count'

Variable 'adapter_count' is just being incremented and it's never used
anywhere else. Remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221031160512.872153-1-colin.i.king@gmail.com
Acked-by: Khalid Aziz <khalid@gonehiking.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/BusLogic.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index f2abffce26599..f7b7ffda1161f 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2198,7 +2198,7 @@ static int blogic_slaveconfig(struct scsi_device *dev)
 
 static int __init blogic_init(void)
 {
-	int adapter_count = 0, drvr_optindex = 0, probeindex;
+	int drvr_optindex = 0, probeindex;
 	struct blogic_adapter *adapter;
 	int ret = 0;
 
@@ -2368,10 +2368,8 @@ static int __init blogic_init(void)
 					list_del(&myadapter->host_list);
 					scsi_host_put(host);
 					ret = -ENODEV;
-				} else {
+				} else
 					scsi_scan_host(host);
-					adapter_count++;
-				}
 			}
 		} else {
 			/*
-- 
GitLab


From b817e6ffbad7a1a0a5ca5bb7d4020823c3f4d9d0 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 11:34:21 -0700
Subject: [PATCH 0988/4122] scsi: ufs: core: Introduce ufshcd_abort_all()

Move the code for aborting all SCSI commands and TMFs into a new function.
This patch makes the ufshcd_err_handler() easier to read. Except for adding
more logging, this patch does not change any functionality.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031183433.2443554-1-bvanassche@acm.org
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 62 +++++++++++++++++++++------------------
 1 file changed, 34 insertions(+), 28 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 0591d05c078a3..768cb49d269ca 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -6161,6 +6161,38 @@ static bool ufshcd_is_pwr_mode_restore_needed(struct ufs_hba *hba)
 	return false;
 }
 
+static bool ufshcd_abort_all(struct ufs_hba *hba)
+{
+	bool needs_reset = false;
+	int tag, ret;
+
+	/* Clear pending transfer requests */
+	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
+		ret = ufshcd_try_to_abort_task(hba, tag);
+		dev_err(hba->dev, "Aborting tag %d / CDB %#02x %s\n", tag,
+			hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1,
+			ret ? "failed" : "succeeded");
+		if (ret) {
+			needs_reset = true;
+			goto out;
+		}
+	}
+
+	/* Clear pending task management requests */
+	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
+		if (ufshcd_clear_tm_cmd(hba, tag)) {
+			needs_reset = true;
+			goto out;
+		}
+	}
+
+out:
+	/* Complete the requests that are cleared by s/w */
+	ufshcd_complete_requests(hba);
+
+	return needs_reset;
+}
+
 /**
  * ufshcd_err_handler - handle UFS errors that require s/w attention
  * @work: pointer to work structure
@@ -6172,10 +6204,7 @@ static void ufshcd_err_handler(struct work_struct *work)
 	unsigned long flags;
 	bool needs_restore;
 	bool needs_reset;
-	bool err_xfer;
-	bool err_tm;
 	int pmc_err;
-	int tag;
 
 	hba = container_of(work, struct ufs_hba, eh_work);
 
@@ -6204,8 +6233,6 @@ static void ufshcd_err_handler(struct work_struct *work)
 again:
 	needs_restore = false;
 	needs_reset = false;
-	err_xfer = false;
-	err_tm = false;
 
 	if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
 		hba->ufshcd_state = UFSHCD_STATE_RESET;
@@ -6274,34 +6301,13 @@ again:
 	hba->silence_err_logs = true;
 	/* release lock as clear command might sleep */
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	/* Clear pending transfer requests */
-	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs) {
-		if (ufshcd_try_to_abort_task(hba, tag)) {
-			err_xfer = true;
-			goto lock_skip_pending_xfer_clear;
-		}
-		dev_err(hba->dev, "Aborted tag %d / CDB %#02x\n", tag,
-			hba->lrb[tag].cmd ? hba->lrb[tag].cmd->cmnd[0] : -1);
-	}
 
-	/* Clear pending task management requests */
-	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs) {
-		if (ufshcd_clear_tm_cmd(hba, tag)) {
-			err_tm = true;
-			goto lock_skip_pending_xfer_clear;
-		}
-	}
-
-lock_skip_pending_xfer_clear:
-	/* Complete the requests that are cleared by s/w */
-	ufshcd_complete_requests(hba);
+	needs_reset = ufshcd_abort_all(hba);
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	hba->silence_err_logs = false;
-	if (err_xfer || err_tm) {
-		needs_reset = true;
+	if (needs_reset)
 		goto do_reset;
-	}
 
 	/*
 	 * After all reqs and tasks are cleared from doorbell,
-- 
GitLab


From 3d75e766b58a7410d4e835c534e1b4664a8f62d0 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 2 Nov 2022 09:19:06 -0700
Subject: [PATCH 0989/4122] scsi: elx: libefc: Fix second parameter type in
 state callbacks

With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
indirect call targets are validated against the expected function pointer
prototype to make sure the call target is valid to help mitigate ROP
attacks. If they are not identical, there is a failure at run time, which
manifests as either a kernel panic or thread getting killed. A proposed
warning in clang aims to catch these at compile time, which reveals:

  drivers/scsi/elx/libefc/efc_node.c:811:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict]
                  ctx->current_state = state;
                                    ^ ~~~~~
  drivers/scsi/elx/libefc/efc_node.c:878:21: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          node->nodedb_state = state;
                            ^ ~~~~~
  drivers/scsi/elx/libefc/efc_node.c:905:6: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' from 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') [-Werror,-Wincompatible-function-pointer-types-strict]
                  pf = node->nodedb_state;
                    ^ ~~~~~~~~~~~~~~~~~~

  drivers/scsi/elx/libefc/efc_device.c:455:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict]
                  node->nodedb_state = __efc_d_init;
                                    ^ ~~~~~~~~~~~~

  drivers/scsi/elx/libefc/efc_sm.c:41:22: error: incompatible function pointer types assigning to 'void (*)(struct efc_sm_ctx *, u32, void *)' (aka 'void (*)(struct efc_sm_ctx *, unsigned int, void *)') from 'void (*)(struct efc_sm_ctx *, enum efc_sm_event, void *)' [-Werror,-Wincompatible-function-pointer-types-strict]
                  ctx->current_state = state;
                                    ^ ~~~~~

The type of the second parameter in the prototypes of ->current_state() and
->nodedb_state() ('u32') does not match the implementations, which have a
second parameter type of 'enum efc_sm_event'. Update the prototypes to have
the correct second parameter type, clearing up all the warnings and CFI
failures.

Link: https://github.com/ClangBuiltLinux/linux/issues/1750
Reported-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Link: https://lore.kernel.org/r/20221102161906.2781508-1-nathan@kernel.org
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/elx/libefc/efclib.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/elx/libefc/efclib.h b/drivers/scsi/elx/libefc/efclib.h
index dde20891c2dd7..57e3386128127 100644
--- a/drivers/scsi/elx/libefc/efclib.h
+++ b/drivers/scsi/elx/libefc/efclib.h
@@ -58,10 +58,12 @@ enum efc_node_send_ls_acc {
 #define EFC_LINK_STATUS_UP		0
 #define EFC_LINK_STATUS_DOWN		1
 
+enum efc_sm_event;
+
 /* State machine context header  */
 struct efc_sm_ctx {
 	void (*current_state)(struct efc_sm_ctx *ctx,
-			      u32 evt, void *arg);
+			      enum efc_sm_event evt, void *arg);
 
 	const char	*description;
 	void		*app;
@@ -365,7 +367,7 @@ struct efc_node {
 	int			prev_evt;
 
 	void (*nodedb_state)(struct efc_sm_ctx *ctx,
-			     u32 evt, void *arg);
+			     enum efc_sm_event evt, void *arg);
 	struct timer_list	gidpt_delay_timer;
 	u64			time_last_gidpt_msec;
 
-- 
GitLab


From 3d6d7930928ace6b982258ebb81d585fe20e9f44 Mon Sep 17 00:00:00 2001
From: Keoseong Park <keosung.park@samsung.com>
Date: Thu, 3 Nov 2022 14:53:49 +0900
Subject: [PATCH 0990/4122] scsi: ufs: core: Remove check_upiu_size() from
 ufshcd.h

Commit 68078d5cc1a5 ("[SCSI] ufs: Set fDeviceInit flag to initiate device
initialization") added check_upiu_size(), but no caller.

Cc: Dolev Raviv <draviv@codeaurora.org>
Link: https://lore.kernel.org/r/20221103055349epcms2p338f2550c2dd78d00231a83b24719a3d4@epcms2p3
Signed-off-by: Keoseong Park <keosung.park@samsung.com>
Reviewed-by: Avri Altman <avri.altman@wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/ufs/ufshcd.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h
index 96538eb3a6c01..5cf81dff60aa0 100644
--- a/include/ufs/ufshcd.h
+++ b/include/ufs/ufshcd.h
@@ -1072,12 +1072,6 @@ void ufshcd_update_evt_hist(struct ufs_hba *hba, u32 id, u32 val);
 void ufshcd_hba_stop(struct ufs_hba *hba);
 void ufshcd_schedule_eh_work(struct ufs_hba *hba);
 
-static inline void check_upiu_size(void)
-{
-	BUILD_BUG_ON(ALIGNED_UPIU_SIZE <
-		GENERAL_UPIU_REQUEST_SIZE + QUERY_DESC_MAX_SIZE);
-}
-
 /**
  * ufshcd_set_variant - set variant specific data to the hba
  * @hba: per adapter instance
-- 
GitLab


From 0b25e17e9018a0ea68a9f0b4787672e8c68fa8d5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 15:47:25 -0700
Subject: [PATCH 0991/4122] scsi: alua: Move a scsi_device_put() call out of
 alua_check_vpd()

Fix the following smatch warning:

drivers/scsi/device_handler/scsi_dh_alua.c:1013 alua_rtpg_queue() warn: sleeping in atomic context

alua_check_vpd() <- disables preempt
-> alua_rtpg_queue()
   -> scsi_device_put()

Cc: Hannes Reinecke <hare@suse.de>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031224728.2607760-2-bvanassche@acm.org
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 23 ++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 610a51538f034..f7bc81cc59ab5 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -324,6 +324,7 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 	struct alua_port_group *pg, *old_pg = NULL;
 	bool pg_updated = false;
 	unsigned long flags;
+	bool put_sdev;
 
 	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
 	if (group_id < 0) {
@@ -373,11 +374,14 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 		list_add_rcu(&h->node, &pg->dh_list);
 	spin_unlock_irqrestore(&pg->lock, flags);
 
-	alua_rtpg_queue(rcu_dereference_protected(h->pg,
+	put_sdev = alua_rtpg_queue(rcu_dereference_protected(h->pg,
 						  lockdep_is_held(&h->pg_lock)),
 			sdev, NULL, true);
 	spin_unlock(&h->pg_lock);
 
+	if (put_sdev)
+		scsi_device_put(sdev);
+
 	if (old_pg)
 		kref_put(&old_pg->kref, release_port_group);
 
@@ -968,9 +972,10 @@ static void alua_rtpg_work(struct work_struct *work)
  *         RTPG already has been scheduled.
  *
  * Returns true if and only if alua_rtpg_work() will be called asynchronously.
- * That function is responsible for calling @qdata->fn().
+ * That function is responsible for calling @qdata->fn(). If this function
+ * returns true, the caller is responsible for invoking scsi_device_put(@sdev).
  */
-static bool alua_rtpg_queue(struct alua_port_group *pg,
+static bool __must_check alua_rtpg_queue(struct alua_port_group *pg,
 			    struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force)
 {
@@ -1009,8 +1014,6 @@ static bool alua_rtpg_queue(struct alua_port_group *pg,
 		else
 			kref_put(&pg->kref, release_port_group);
 	}
-	if (sdev)
-		scsi_device_put(sdev);
 
 	return true;
 }
@@ -1117,10 +1120,12 @@ static int alua_activate(struct scsi_device *sdev,
 	rcu_read_unlock();
 	mutex_unlock(&h->init_mutex);
 
-	if (alua_rtpg_queue(pg, sdev, qdata, true))
+	if (alua_rtpg_queue(pg, sdev, qdata, true)) {
+		scsi_device_put(sdev);
 		fn = NULL;
-	else
+	} else {
 		err = SCSI_DH_DEV_OFFLINED;
+	}
 	kref_put(&pg->kref, release_port_group);
 out:
 	if (fn)
@@ -1146,7 +1151,9 @@ static void alua_check(struct scsi_device *sdev, bool force)
 		return;
 	}
 	rcu_read_unlock();
-	alua_rtpg_queue(pg, sdev, NULL, force);
+
+	if (alua_rtpg_queue(pg, sdev, NULL, force))
+		scsi_device_put(sdev);
 	kref_put(&pg->kref, release_port_group);
 }
 
-- 
GitLab


From 379e2554e3d10e87c0c0a728ef538f3c26d82a98 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 15:47:26 -0700
Subject: [PATCH 0992/4122] scsi: alua: Move a scsi_device_put() call out of
 alua_rtpg_select_sdev()

Move a scsi_device_put() call from alua_rtpg_select_sdev() to its
callers. Fixes the following smatch complaint:

drivers/scsi/device_handler/scsi_dh_alua.c:853 alua_rtpg_select_sdev() warn: sleeping in atomic context

alua_rtpg_work() <- disables preempt
-> alua_rtpg_select_sdev()
   -> scsi_device_put()

Cc: Hannes Reinecke <hare@suse.de>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031224728.2607760-3-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 38 ++++++++++++++--------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index f7bc81cc59ab5..693cd827e1384 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -815,14 +815,19 @@ static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg)
 	return SCSI_DH_RETRY;
 }
 
-static bool alua_rtpg_select_sdev(struct alua_port_group *pg)
+/*
+ * The caller must call scsi_device_put() on the returned pointer if it is not
+ * NULL.
+ */
+static struct scsi_device * __must_check
+alua_rtpg_select_sdev(struct alua_port_group *pg)
 {
 	struct alua_dh_data *h;
-	struct scsi_device *sdev = NULL;
+	struct scsi_device *sdev = NULL, *prev_sdev;
 
 	lockdep_assert_held(&pg->lock);
 	if (WARN_ON(!pg->rtpg_sdev))
-		return false;
+		return NULL;
 
 	/*
 	 * RCU protection isn't necessary for dh_list here
@@ -849,22 +854,22 @@ static bool alua_rtpg_select_sdev(struct alua_port_group *pg)
 		pr_warn("%s: no device found for rtpg\n",
 			(pg->device_id_len ?
 			 (char *)pg->device_id_str : "(nameless PG)"));
-		return false;
+		return NULL;
 	}
 
 	sdev_printk(KERN_INFO, sdev, "rtpg retry on different device\n");
 
-	scsi_device_put(pg->rtpg_sdev);
+	prev_sdev = pg->rtpg_sdev;
 	pg->rtpg_sdev = sdev;
 
-	return true;
+	return prev_sdev;
 }
 
 static void alua_rtpg_work(struct work_struct *work)
 {
 	struct alua_port_group *pg =
 		container_of(work, struct alua_port_group, rtpg_work.work);
-	struct scsi_device *sdev;
+	struct scsi_device *sdev, *prev_sdev = NULL;
 	LIST_HEAD(qdata_list);
 	int err = SCSI_DH_OK;
 	struct alua_queue_data *qdata, *tmp;
@@ -905,7 +910,7 @@ static void alua_rtpg_work(struct work_struct *work)
 
 		/* If RTPG failed on the current device, try using another */
 		if (err == SCSI_DH_RES_TEMP_UNAVAIL &&
-		    alua_rtpg_select_sdev(pg))
+		    (prev_sdev = alua_rtpg_select_sdev(pg)))
 			err = SCSI_DH_IMM_RETRY;
 
 		if (err == SCSI_DH_RETRY || err == SCSI_DH_IMM_RETRY ||
@@ -917,9 +922,7 @@ static void alua_rtpg_work(struct work_struct *work)
 				pg->interval = ALUA_RTPG_RETRY_DELAY;
 			pg->flags |= ALUA_PG_RUN_RTPG;
 			spin_unlock_irqrestore(&pg->lock, flags);
-			queue_delayed_work(kaluad_wq, &pg->rtpg_work,
-					   pg->interval * HZ);
-			return;
+			goto queue_rtpg;
 		}
 		if (err != SCSI_DH_OK)
 			pg->flags &= ~ALUA_PG_RUN_STPG;
@@ -934,9 +937,7 @@ static void alua_rtpg_work(struct work_struct *work)
 			pg->interval = 0;
 			pg->flags &= ~ALUA_PG_RUNNING;
 			spin_unlock_irqrestore(&pg->lock, flags);
-			queue_delayed_work(kaluad_wq, &pg->rtpg_work,
-					   pg->interval * HZ);
-			return;
+			goto queue_rtpg;
 		}
 	}
 
@@ -950,6 +951,9 @@ static void alua_rtpg_work(struct work_struct *work)
 	pg->rtpg_sdev = NULL;
 	spin_unlock_irqrestore(&pg->lock, flags);
 
+	if (prev_sdev)
+		scsi_device_put(prev_sdev);
+
 	list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) {
 		list_del(&qdata->entry);
 		if (qdata->callback_fn)
@@ -961,6 +965,12 @@ static void alua_rtpg_work(struct work_struct *work)
 	spin_unlock_irqrestore(&pg->lock, flags);
 	scsi_device_put(sdev);
 	kref_put(&pg->kref, release_port_group);
+	return;
+
+queue_rtpg:
+	if (prev_sdev)
+		scsi_device_put(prev_sdev);
+	queue_delayed_work(kaluad_wq, &pg->rtpg_work, pg->interval * HZ);
 }
 
 /**
-- 
GitLab


From 2e5a6c3baccd31476ed00c3fbc413b48ddd87993 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 15:47:27 -0700
Subject: [PATCH 0993/4122] scsi: bfa: Convert bfad_reset_sdev_bflags() from a
 macro into a function

Before modifying bfad_reset_sdev_bflags(), convert it from a macro into a
function.

Cc: Anil Gurumurthy <anil.gurumurthy@qlogic.com>
Cc: Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031224728.2607760-4-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bfa/bfad_bsg.c | 27 +++++++++++++++++++++++++++
 drivers/scsi/bfa/bfad_im.h  | 26 --------------------------
 2 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index be8dfbe13e904..73754032e25c0 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -2540,6 +2540,33 @@ out:
 	return 0;
 }
 
+/*
+ * Set the SCSI device sdev_bflags - sdev_bflags are used by the
+ * SCSI mid-layer to choose LUN Scanning mode REPORT_LUNS vs. Sequential Scan
+ *
+ * Internally iterates over all the ITNIM's part of the im_port & sets the
+ * sdev_bflags for the scsi_device associated with LUN #0.
+ */
+static void bfad_reset_sdev_bflags(struct bfad_im_port_s *im_port,
+				   int lunmask_cfg)
+{
+	const u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN;
+	struct bfad_itnim_s *itnim;
+	struct scsi_device *sdev;
+
+	list_for_each_entry(itnim, &im_port->itnim_mapped_list, list_entry) {
+		sdev = scsi_device_lookup(im_port->shost, itnim->channel,
+					  itnim->scsi_tgt_id, 0);
+		if (sdev) {
+			if (lunmask_cfg == BFA_TRUE)
+				sdev->sdev_bflags |= scan_flags;
+			else
+				sdev->sdev_bflags &= ~scan_flags;
+			scsi_device_put(sdev);
+		}
+	}
+}
+
 /* Function to reset the LUN SCAN mode */
 static void
 bfad_iocmd_lunmask_reset_lunscan_mode(struct bfad_s *bfad, int lunmask_cfg)
diff --git a/drivers/scsi/bfa/bfad_im.h b/drivers/scsi/bfa/bfad_im.h
index c03b225ea1ba0..4353feedf76ad 100644
--- a/drivers/scsi/bfa/bfad_im.h
+++ b/drivers/scsi/bfa/bfad_im.h
@@ -198,30 +198,4 @@ irqreturn_t bfad_intx(int irq, void *dev_id);
 int bfad_im_bsg_request(struct bsg_job *job);
 int bfad_im_bsg_timeout(struct bsg_job *job);
 
-/*
- * Macro to set the SCSI device sdev_bflags - sdev_bflags are used by the
- * SCSI mid-layer to choose LUN Scanning mode REPORT_LUNS vs. Sequential Scan
- *
- * Internally iterate's over all the ITNIM's part of the im_port & set's the
- * sdev_bflags for the scsi_device associated with LUN #0.
- */
-#define bfad_reset_sdev_bflags(__im_port, __lunmask_cfg) do {		\
-	struct scsi_device *__sdev = NULL;				\
-	struct bfad_itnim_s *__itnim = NULL;				\
-	u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN;		\
-	list_for_each_entry(__itnim, &((__im_port)->itnim_mapped_list),	\
-			    list_entry) {				\
-		__sdev = scsi_device_lookup((__im_port)->shost,		\
-					    __itnim->channel,		\
-					    __itnim->scsi_tgt_id, 0);	\
-		if (__sdev) {						\
-			if ((__lunmask_cfg) == BFA_TRUE)		\
-				__sdev->sdev_bflags |= scan_flags;	\
-			else						\
-				__sdev->sdev_bflags &= ~scan_flags;	\
-			scsi_device_put(__sdev);			\
-		}							\
-	}								\
-} while (0)
-
 #endif
-- 
GitLab


From 2e79cf37b15b1936f8630d9c5805d2c76bde213b Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 15:47:28 -0700
Subject: [PATCH 0994/4122] scsi: bfa: Rework bfad_reset_sdev_bflags()

Since commit f93ed747e2c7 ("scsi: core: Release SCSI devices
synchronously") it is no longer allowed to call scsi_device_put() from
atomic context.

Rework bfad_reset_sdev_bflags() such that scsi_device_put() is no longer
called. This fixes the following smatch warning:

drivers/scsi/bfa/bfad_bsg.c:2551 bfad_iocmd_lunmask_reset_lunscan_mode() warn: sleeping in atomic context

bfad_iocmd_lunmask() <- disables preempt
-> bfad_iocmd_lunmask_reset_lunscan_mode()
   -> scsi_device_put()

Cc: Anil Gurumurthy <anil.gurumurthy@qlogic.com>
Cc: Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031224728.2607760-5-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bfa/bfad_bsg.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/bfa/bfad_bsg.c b/drivers/scsi/bfa/bfad_bsg.c
index 73754032e25c0..79d4f7ee5bcb0 100644
--- a/drivers/scsi/bfa/bfad_bsg.c
+++ b/drivers/scsi/bfa/bfad_bsg.c
@@ -2553,18 +2553,20 @@ static void bfad_reset_sdev_bflags(struct bfad_im_port_s *im_port,
 	const u32 scan_flags = BLIST_NOREPORTLUN | BLIST_SPARSELUN;
 	struct bfad_itnim_s *itnim;
 	struct scsi_device *sdev;
+	unsigned long flags;
 
+	spin_lock_irqsave(im_port->shost->host_lock, flags);
 	list_for_each_entry(itnim, &im_port->itnim_mapped_list, list_entry) {
-		sdev = scsi_device_lookup(im_port->shost, itnim->channel,
-					  itnim->scsi_tgt_id, 0);
+		sdev = __scsi_device_lookup(im_port->shost, itnim->channel,
+					    itnim->scsi_tgt_id, 0);
 		if (sdev) {
 			if (lunmask_cfg == BFA_TRUE)
 				sdev->sdev_bflags |= scan_flags;
 			else
 				sdev->sdev_bflags &= ~scan_flags;
-			scsi_device_put(sdev);
 		}
 	}
+	spin_unlock_irqrestore(im_port->shost->host_lock, flags);
 }
 
 /* Function to reset the LUN SCAN mode */
-- 
GitLab


From 4fb2169d66b837a2986f569f5d5b81f79e6e4a4c Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 31 Oct 2022 15:48:18 -0700
Subject: [PATCH 0995/4122] scsi: qla2xxx: Fix set-but-not-used variable
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following two compiler warnings:

drivers/scsi/qla2xxx/qla_init.c: In function ‘qla24xx_async_abort_cmd’:
drivers/scsi/qla2xxx/qla_init.c:171:17: warning: variable ‘bail’ set but not used [-Wunused-but-set-variable]
  171 |         uint8_t bail;
      |                 ^~~~
drivers/scsi/qla2xxx/qla_init.c: In function ‘qla2x00_async_tm_cmd’:
drivers/scsi/qla2xxx/qla_init.c:2023:17: warning: variable ‘bail’ set but not used [-Wunused-but-set-variable]
 2023 |         uint8_t bail;
      |                 ^~~~

Cc: Arun Easi <arun.easi@qlogic.com>
Cc: Giridhar Malavali <giridhar.malavali@qlogic.com>
Fixes: feafb7b1714c ("[SCSI] qla2xxx: Fix vport delete issues")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221031224818.2607882-1-bvanassche@acm.org
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_def.h    | 22 +++++++++++-----------
 drivers/scsi/qla2xxx/qla_init.c   |  6 ++----
 drivers/scsi/qla2xxx/qla_inline.h |  4 +---
 drivers/scsi/qla2xxx/qla_os.c     |  4 +---
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
index 802eec6407d9a..a26a373be9da3 100644
--- a/drivers/scsi/qla2xxx/qla_def.h
+++ b/drivers/scsi/qla2xxx/qla_def.h
@@ -5136,17 +5136,17 @@ struct secure_flash_update_block_pk {
 		(test_bit(ISP_ABORT_NEEDED, &ha->dpc_flags) || \
 			 test_bit(LOOP_RESYNC_NEEDED, &ha->dpc_flags))
 
-#define QLA_VHA_MARK_BUSY(__vha, __bail) do {		\
-	atomic_inc(&__vha->vref_count);			\
-	mb();						\
-	if (__vha->flags.delete_progress) {		\
-		atomic_dec(&__vha->vref_count);		\
-		wake_up(&__vha->vref_waitq);		\
-		__bail = 1;				\
-	} else {					\
-		__bail = 0;				\
-	}						\
-} while (0)
+static inline bool qla_vha_mark_busy(scsi_qla_host_t *vha)
+{
+	atomic_inc(&vha->vref_count);
+	mb();
+	if (vha->flags.delete_progress) {
+		atomic_dec(&vha->vref_count);
+		wake_up(&vha->vref_waitq);
+		return true;
+	}
+	return false;
+}
 
 #define QLA_VHA_MARK_NOT_BUSY(__vha) do {		\
 	atomic_dec(&__vha->vref_count);			\
diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index e12db95de6883..631993504a767 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -168,7 +168,6 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
 	struct srb_iocb *abt_iocb;
 	srb_t *sp;
 	int rval = QLA_FUNCTION_FAILED;
-	uint8_t bail;
 
 	/* ref: INIT for ABTS command */
 	sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
@@ -176,7 +175,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
 	if (!sp)
 		return QLA_MEMORY_ALLOC_FAILED;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
+	qla_vha_mark_busy(vha);
 	abt_iocb = &sp->u.iocb_cmd;
 	sp->type = SRB_ABT_CMD;
 	sp->name = "abort";
@@ -2020,14 +2019,13 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
 	struct srb_iocb *tm_iocb;
 	srb_t *sp;
 	int rval = QLA_FUNCTION_FAILED;
-	uint8_t bail;
 
 	/* ref: INIT */
 	sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
 	if (!sp)
 		goto done;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
+	qla_vha_mark_busy(vha);
 	sp->type = SRB_TM_CMD;
 	sp->name = "tmf";
 	qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha),
diff --git a/drivers/scsi/qla2xxx/qla_inline.h b/drivers/scsi/qla2xxx/qla_inline.h
index db17f7f410cdd..5185dc5daf80d 100644
--- a/drivers/scsi/qla2xxx/qla_inline.h
+++ b/drivers/scsi/qla2xxx/qla_inline.h
@@ -225,11 +225,9 @@ static inline srb_t *
 qla2x00_get_sp(scsi_qla_host_t *vha, fc_port_t *fcport, gfp_t flag)
 {
 	srb_t *sp = NULL;
-	uint8_t bail;
 	struct qla_qpair *qpair;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
-	if (unlikely(bail))
+	if (unlikely(qla_vha_mark_busy(vha)))
 		return NULL;
 
 	qpair = vha->hw->base_qpair;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 2c85f3cce7264..96ba1398f20c1 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -5069,13 +5069,11 @@ struct qla_work_evt *
 qla2x00_alloc_work(struct scsi_qla_host *vha, enum qla_work_type type)
 {
 	struct qla_work_evt *e;
-	uint8_t bail;
 
 	if (test_bit(UNLOADING, &vha->dpc_flags))
 		return NULL;
 
-	QLA_VHA_MARK_BUSY(vha, bail);
-	if (bail)
+	if (qla_vha_mark_busy(vha))
 		return NULL;
 
 	e = kzalloc(sizeof(struct qla_work_evt), GFP_ATOMIC);
-- 
GitLab


From e137b81d30e7ef8ec27a77c3b2cbbad52845872a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Nov 2022 10:47:33 +0000
Subject: [PATCH 0996/4122] scsi: qla2xxx: Remove unused variable 'found_devs'

Variable 'found_devs' is just being incremented and it's never used
anywhere else. Remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221101104733.30363-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_init.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
index 631993504a767..ce4c5d728407d 100644
--- a/drivers/scsi/qla2xxx/qla_init.c
+++ b/drivers/scsi/qla2xxx/qla_init.c
@@ -5544,7 +5544,6 @@ static int
 qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 {
 	int		rval, rval2;
-	int		found_devs;
 	int		found;
 	fc_port_t	*fcport, *new_fcport;
 	uint16_t	index;
@@ -5559,7 +5558,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 	if (N2N_TOPO(ha))
 		return qla2x00_configure_n2n_loop(vha);
 
-	found_devs = 0;
 	new_fcport = NULL;
 	entries = MAX_FIBRE_DEVICES_LOOP;
 
@@ -5718,8 +5716,6 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
 
 		/* Base iIDMA settings on HBA port speed. */
 		fcport->fp_speed = ha->link_data_rate;
-
-		found_devs++;
 	}
 
 	list_for_each_entry(fcport, &vha->vp_fcports, list) {
-- 
GitLab


From 0aa46eba29553035d6af8384f19dfee2258d2a46 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Nov 2022 12:32:35 +0000
Subject: [PATCH 0997/4122] scsi: csiostor: Remove unused variable 'n'

Variable 'n' is just being incremented and it's never used anywhere
else. Remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221101123235.52152-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/csiostor/csio_wr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/csiostor/csio_wr.c b/drivers/scsi/csiostor/csio_wr.c
index fe0355c964bca..a516df019c224 100644
--- a/drivers/scsi/csiostor/csio_wr.c
+++ b/drivers/scsi/csiostor/csio_wr.c
@@ -1051,7 +1051,6 @@ csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q,
 	struct csio_fl_dma_buf flb;
 	struct csio_dma_buf *buf, *fbuf;
 	uint32_t bufsz, len, lastlen = 0;
-	int n;
 	struct csio_q *flq = hw->wrm.q_arr[q->un.iq.flq_idx];
 
 	CSIO_DB_ASSERT(flq != NULL);
@@ -1071,7 +1070,7 @@ csio_wr_process_fl(struct csio_hw *hw, struct csio_q *q,
 	flb.totlen = len;
 
 	/* Consume all freelist buffers used for len bytes */
-	for (n = 0, fbuf = flb.flbufs; ; n++, fbuf++) {
+	for (fbuf = flb.flbufs; ; fbuf++) {
 		buf = &flq->un.fl.bufs[flq->cidx];
 		bufsz = csio_wr_fl_bufsz(sge, buf);
 
-- 
GitLab


From bc81131813aaf6fe764d1cc6b942a35a8c0c5c36 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Nov 2022 10:53:26 +0000
Subject: [PATCH 0998/4122] scsi: target: core: Remove unused variable
 'unit_serial_len'

Variable 'unit_serial_len' is just being assigned and it's never used
anywhere else. Remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221101105326.31037-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_spc.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index ffe02e1957334..621a460ba234e 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -227,7 +227,7 @@ spc_emulate_evpd_83(struct se_cmd *cmd, unsigned char *buf)
 	struct t10_alua_tg_pt_gp *tg_pt_gp;
 	unsigned char *prod = &dev->t10_wwn.model[0];
 	u32 prod_len;
-	u32 unit_serial_len, off = 0;
+	u32 off = 0;
 	u16 len = 0, id_len;
 
 	off = 4;
@@ -272,13 +272,9 @@ check_t10_vend_desc:
 	prod_len += strlen(prod);
 	prod_len++; /* For : */
 
-	if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL) {
-		unit_serial_len = strlen(&dev->t10_wwn.unit_serial[0]);
-		unit_serial_len++; /* For NULL Terminator */
-
+	if (dev->dev_flags & DF_EMULATED_VPD_UNIT_SERIAL)
 		id_len += sprintf(&buf[off+12], "%s:%s", prod,
 				&dev->t10_wwn.unit_serial[0]);
-	}
 	buf[off] = 0x2; /* ASCII */
 	buf[off+1] = 0x1; /* T10 Vendor ID */
 	buf[off+2] = 0x0;
-- 
GitLab


From e56ca6bcd2136207868516f5a304fbb82cc0cb82 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:13:52 +0100
Subject: [PATCH 0999/4122] scsi: target: Use kstrtobool() instead of
 strtobool()

strtobool() is the same as kstrtobool().  However, the latter is more used
within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/fcddc0a53b4fc6e3c2e93592d3f61c5c63121855.1667336095.git.christophe.jaillet@wanadoo.fr
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_configfs.c        | 29 ++++++++++----------
 drivers/target/target_core_fabric_configfs.c |  3 +-
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index 533524299ed6b..b8a5c8d6cfde0 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -12,6 +12,7 @@
  *
  ****************************************************************************/
 
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <generated/utsrelease.h>
@@ -578,7 +579,7 @@ static ssize_t _name##_store(struct config_item *item, const char *page,	\
 	bool flag;							\
 	int ret;							\
 									\
-	ret = strtobool(page, &flag);					\
+	ret = kstrtobool(page, &flag);					\
 	if (ret < 0)							\
 		return ret;						\
 	da->_name = flag;						\
@@ -638,7 +639,7 @@ static ssize_t emulate_model_alias_store(struct config_item *item,
 		return -EINVAL;
 	}
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -660,7 +661,7 @@ static ssize_t emulate_write_cache_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -712,7 +713,7 @@ static ssize_t emulate_tas_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -737,7 +738,7 @@ static ssize_t emulate_tpu_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -767,7 +768,7 @@ static ssize_t emulate_tpws_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -866,7 +867,7 @@ static ssize_t pi_prot_format_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -903,7 +904,7 @@ static ssize_t pi_prot_verify_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -932,7 +933,7 @@ static ssize_t force_pr_aptpl_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 	if (da->da_dev->export_count) {
@@ -954,7 +955,7 @@ static ssize_t emulate_rest_reord_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -977,7 +978,7 @@ static ssize_t unmap_zeroes_data_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -1126,7 +1127,7 @@ static ssize_t alua_support_store(struct config_item *item,
 	bool flag, oldflag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -1165,7 +1166,7 @@ static ssize_t pgr_support_store(struct config_item *item,
 	bool flag, oldflag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
@@ -1194,7 +1195,7 @@ static ssize_t emulate_rsoc_store(struct config_item *item,
 	bool flag;
 	int ret;
 
-	ret = strtobool(page, &flag);
+	ret = kstrtobool(page, &flag);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/target/target_core_fabric_configfs.c b/drivers/target/target_core_fabric_configfs.c
index 95a88f6224cd9..67b18a67317a0 100644
--- a/drivers/target/target_core_fabric_configfs.c
+++ b/drivers/target/target_core_fabric_configfs.c
@@ -11,6 +11,7 @@
 *
  ****************************************************************************/
 
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/utsname.h>
@@ -829,7 +830,7 @@ static ssize_t target_fabric_tpg_base_enable_store(struct config_item *item,
 	int ret;
 	bool op;
 
-	ret = strtobool(page, &op);
+	ret = kstrtobool(page, &op);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 61da03328a603d2d4a5b2e80cbe29bbf0122e6f8 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Mon, 7 Nov 2022 00:28:05 -0600
Subject: [PATCH 1000/4122] memblock tests: introduce range tests for
 memblock_alloc_exact_nid_raw

Add TEST_F_EXACT flag, which specifies that tests should run
memblock_alloc_exact_nid_raw(). Introduce range tests for
memblock_alloc_exact_nid_raw() by using the TEST_F_EXACT flag to run the
range tests in alloc_nid_api.c, since memblock_alloc_exact_nid_raw() and
memblock_alloc_try_nid_raw() behave the same way when nid = NUMA_NO_NODE.

Rename tests and other functions in alloc_nid_api.c by removing "_try".
Since the test names will be displayed in verbose output, they need to
be general enough to refer to any of the memblock functions that the
tests may run.

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/5a4b6d1b6130ab7375314e1c45a6d5813dfdabbd.1667802195.git.remckee0@gmail.com
---
 tools/testing/memblock/Makefile               |   2 +-
 tools/testing/memblock/main.c                 |   2 +
 .../memblock/tests/alloc_exact_nid_api.c      |  22 +
 .../memblock/tests/alloc_exact_nid_api.h      |   9 +
 tools/testing/memblock/tests/alloc_nid_api.c  | 546 +++++++++---------
 tools/testing/memblock/tests/alloc_nid_api.h  |   1 +
 tools/testing/memblock/tests/common.h         |   2 +
 7 files changed, 320 insertions(+), 264 deletions(-)
 create mode 100644 tools/testing/memblock/tests/alloc_exact_nid_api.c
 create mode 100644 tools/testing/memblock/tests/alloc_exact_nid_api.h

diff --git a/tools/testing/memblock/Makefile b/tools/testing/memblock/Makefile
index 246f7ac8489b4..2310ac4d080ec 100644
--- a/tools/testing/memblock/Makefile
+++ b/tools/testing/memblock/Makefile
@@ -7,7 +7,7 @@ CFLAGS += -I. -I../../include -Wall -O2 -fsanitize=address \
 LDFLAGS += -fsanitize=address -fsanitize=undefined
 TARGETS = main
 TEST_OFILES = tests/alloc_nid_api.o tests/alloc_helpers_api.o tests/alloc_api.o \
-		  tests/basic_api.o tests/common.o
+		  tests/basic_api.o tests/common.o tests/alloc_exact_nid_api.o
 DEP_OFILES = memblock.o lib/slab.o mmzone.o slab.o
 OFILES = main.o $(DEP_OFILES) $(TEST_OFILES)
 EXTR_SRC = ../../../mm/memblock.c
diff --git a/tools/testing/memblock/main.c b/tools/testing/memblock/main.c
index 4ca1024342b13..278f9dec50087 100644
--- a/tools/testing/memblock/main.c
+++ b/tools/testing/memblock/main.c
@@ -3,6 +3,7 @@
 #include "tests/alloc_api.h"
 #include "tests/alloc_helpers_api.h"
 #include "tests/alloc_nid_api.h"
+#include "tests/alloc_exact_nid_api.h"
 #include "tests/common.h"
 
 int main(int argc, char **argv)
@@ -12,6 +13,7 @@ int main(int argc, char **argv)
 	memblock_alloc_checks();
 	memblock_alloc_helpers_checks();
 	memblock_alloc_nid_checks();
+	memblock_alloc_exact_nid_checks();
 
 	return 0;
 }
diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c
new file mode 100644
index 0000000000000..6406496623a01
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include "alloc_exact_nid_api.h"
+#include "alloc_nid_api.h"
+
+#define FUNC_NAME			"memblock_alloc_exact_nid_raw"
+
+int memblock_alloc_exact_nid_checks(void)
+{
+	prefix_reset();
+	prefix_push(FUNC_NAME);
+
+	reset_memblock_attributes();
+	dummy_physical_memory_init();
+
+	memblock_alloc_exact_nid_range_checks();
+
+	dummy_physical_memory_cleanup();
+
+	prefix_pop();
+
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h
new file mode 100644
index 0000000000000..4408719de3b9a
--- /dev/null
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _MEMBLOCK_ALLOC_EXACT_NID_H
+#define _MEMBLOCK_ALLOC_EXACT_NID_H
+
+#include "common.h"
+
+int memblock_alloc_exact_nid_checks(void);
+
+#endif
diff --git a/tools/testing/memblock/tests/alloc_nid_api.c b/tools/testing/memblock/tests/alloc_nid_api.c
index 2c2d60f4e3e3c..49ef68cccd6f8 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_nid_api.c
@@ -18,18 +18,29 @@ static const unsigned int node_fractions[] = {
 	 625, /* 1/16 */
 };
 
-static inline const char * const get_memblock_alloc_try_nid_name(int flags)
+static inline const char * const get_memblock_alloc_nid_name(int flags)
 {
+	if (flags & TEST_F_EXACT)
+		return "memblock_alloc_exact_nid_raw";
 	if (flags & TEST_F_RAW)
 		return "memblock_alloc_try_nid_raw";
 	return "memblock_alloc_try_nid";
 }
 
-static inline void *run_memblock_alloc_try_nid(phys_addr_t size,
-					       phys_addr_t align,
-					       phys_addr_t min_addr,
-					       phys_addr_t max_addr, int nid)
-{
+static inline void *run_memblock_alloc_nid(phys_addr_t size,
+					   phys_addr_t align,
+					   phys_addr_t min_addr,
+					   phys_addr_t max_addr, int nid)
+{
+	assert(!(alloc_nid_test_flags & TEST_F_EXACT) ||
+	       (alloc_nid_test_flags & TEST_F_RAW));
+	/*
+	 * TEST_F_EXACT should be checked before TEST_F_RAW since
+	 * memblock_alloc_exact_nid_raw() performs raw allocations.
+	 */
+	if (alloc_nid_test_flags & TEST_F_EXACT)
+		return memblock_alloc_exact_nid_raw(size, align, min_addr,
+						    max_addr, nid);
 	if (alloc_nid_test_flags & TEST_F_RAW)
 		return memblock_alloc_try_nid_raw(size, align, min_addr,
 						  max_addr, nid);
@@ -50,7 +61,7 @@ static inline void *run_memblock_alloc_try_nid(phys_addr_t size,
  *
  * Expect to allocate a region that ends at max_addr.
  */
-static int alloc_try_nid_top_down_simple_check(void)
+static int alloc_nid_top_down_simple_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -65,9 +76,9 @@ static int alloc_try_nid_top_down_simple_check(void)
 	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
 	max_addr = min_addr + SZ_512;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 	rgn_end = rgn->base + rgn->size;
 
 	ASSERT_NE(allocated_ptr, NULL);
@@ -102,7 +113,7 @@ static int alloc_try_nid_top_down_simple_check(void)
  *
  * Expect to allocate an aligned region that ends before max_addr.
  */
-static int alloc_try_nid_top_down_end_misaligned_check(void)
+static int alloc_nid_top_down_end_misaligned_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -118,9 +129,9 @@ static int alloc_try_nid_top_down_end_misaligned_check(void)
 	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
 	max_addr = min_addr + SZ_512 + misalign;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 	rgn_end = rgn->base + rgn->size;
 
 	ASSERT_NE(allocated_ptr, NULL);
@@ -153,7 +164,7 @@ static int alloc_try_nid_top_down_end_misaligned_check(void)
  * Expect to allocate a region that starts at min_addr and ends at
  * max_addr, given that min_addr is aligned.
  */
-static int alloc_try_nid_exact_address_generic_check(void)
+static int alloc_nid_exact_address_generic_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -168,9 +179,9 @@ static int alloc_try_nid_exact_address_generic_check(void)
 	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 	rgn_end = rgn->base + rgn->size;
 
 	ASSERT_NE(allocated_ptr, NULL);
@@ -205,7 +216,7 @@ static int alloc_try_nid_exact_address_generic_check(void)
  * Expect to drop the lower limit and allocate a memory region which
  * ends at max_addr (if the address is aligned).
  */
-static int alloc_try_nid_top_down_narrow_range_check(void)
+static int alloc_nid_top_down_narrow_range_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -219,9 +230,9 @@ static int alloc_try_nid_top_down_narrow_range_check(void)
 	min_addr = memblock_start_of_DRAM() + SZ_512;
 	max_addr = min_addr + SMP_CACHE_BYTES;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -257,7 +268,7 @@ static int alloc_try_nid_top_down_narrow_range_check(void)
  *
  * Expect no allocation to happen.
  */
-static int alloc_try_nid_low_max_generic_check(void)
+static int alloc_nid_low_max_generic_check(void)
 {
 	void *allocated_ptr = NULL;
 	phys_addr_t size = SZ_1K;
@@ -270,9 +281,9 @@ static int alloc_try_nid_low_max_generic_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = min_addr + SMP_CACHE_BYTES;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_EQ(allocated_ptr, NULL);
 
@@ -295,7 +306,7 @@ static int alloc_try_nid_low_max_generic_check(void)
  *
  * Expect a merge of both regions. Only the region size gets updated.
  */
-static int alloc_try_nid_min_reserved_generic_check(void)
+static int alloc_nid_min_reserved_generic_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -315,9 +326,9 @@ static int alloc_try_nid_min_reserved_generic_check(void)
 
 	memblock_reserve(reserved_base, r1_size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags);
@@ -347,7 +358,7 @@ static int alloc_try_nid_min_reserved_generic_check(void)
  *
  * Expect a merge of regions. Only the region size gets updated.
  */
-static int alloc_try_nid_max_reserved_generic_check(void)
+static int alloc_nid_max_reserved_generic_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -365,9 +376,9 @@ static int alloc_try_nid_max_reserved_generic_check(void)
 
 	memblock_reserve(max_addr, r1_size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r2_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r2_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r2_size, alloc_nid_test_flags);
@@ -400,7 +411,7 @@ static int alloc_try_nid_max_reserved_generic_check(void)
  * updated. The total size field gets updated.
  */
 
-static int alloc_try_nid_top_down_reserved_with_space_check(void)
+static int alloc_nid_top_down_reserved_with_space_check(void)
 {
 	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
 	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
@@ -428,9 +439,9 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
@@ -465,7 +476,7 @@ static int alloc_try_nid_top_down_reserved_with_space_check(void)
  * Expect to merge all of the regions into one. The region counter and total
  * size fields get updated.
  */
-static int alloc_try_nid_reserved_full_merge_generic_check(void)
+static int alloc_nid_reserved_full_merge_generic_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -491,9 +502,9 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
@@ -527,7 +538,7 @@ static int alloc_try_nid_reserved_full_merge_generic_check(void)
  * Expect to merge the new region with r2. The second region does not get
  * updated. The total size counter gets updated.
  */
-static int alloc_try_nid_top_down_reserved_no_space_check(void)
+static int alloc_nid_top_down_reserved_no_space_check(void)
 {
 	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
 	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
@@ -555,9 +566,9 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
@@ -596,7 +607,7 @@ static int alloc_try_nid_top_down_reserved_no_space_check(void)
  * Expect no allocation to happen.
  */
 
-static int alloc_try_nid_reserved_all_generic_check(void)
+static int alloc_nid_reserved_all_generic_check(void)
 {
 	void *allocated_ptr = NULL;
 	struct region r1, r2;
@@ -620,9 +631,9 @@ static int alloc_try_nid_reserved_all_generic_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_EQ(allocated_ptr, NULL);
 
@@ -636,7 +647,7 @@ static int alloc_try_nid_reserved_all_generic_check(void)
  * bigger than the end address of the available memory. Expect to allocate
  * a region that ends before the end of the memory.
  */
-static int alloc_try_nid_top_down_cap_max_check(void)
+static int alloc_nid_top_down_cap_max_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -650,9 +661,9 @@ static int alloc_try_nid_top_down_cap_max_check(void)
 	min_addr = memblock_end_of_DRAM() - SZ_1K;
 	max_addr = memblock_end_of_DRAM() + SZ_256;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -673,7 +684,7 @@ static int alloc_try_nid_top_down_cap_max_check(void)
  * smaller than the start address of the available memory. Expect to allocate
  * a region that ends before the end of the memory.
  */
-static int alloc_try_nid_top_down_cap_min_check(void)
+static int alloc_nid_top_down_cap_min_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -687,9 +698,9 @@ static int alloc_try_nid_top_down_cap_min_check(void)
 	min_addr = memblock_start_of_DRAM() - SZ_256;
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -719,7 +730,7 @@ static int alloc_try_nid_top_down_cap_min_check(void)
  *
  * Expect to allocate a region that ends before max_addr.
  */
-static int alloc_try_nid_bottom_up_simple_check(void)
+static int alloc_nid_bottom_up_simple_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -734,9 +745,9 @@ static int alloc_try_nid_bottom_up_simple_check(void)
 	min_addr = memblock_start_of_DRAM() + SMP_CACHE_BYTES * 2;
 	max_addr = min_addr + SZ_512;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 	rgn_end = rgn->base + rgn->size;
 
 	ASSERT_NE(allocated_ptr, NULL);
@@ -771,7 +782,7 @@ static int alloc_try_nid_bottom_up_simple_check(void)
  *
  * Expect to allocate an aligned region that ends before max_addr.
  */
-static int alloc_try_nid_bottom_up_start_misaligned_check(void)
+static int alloc_nid_bottom_up_start_misaligned_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -787,9 +798,9 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void)
 	min_addr = memblock_start_of_DRAM() + misalign;
 	max_addr = min_addr + SZ_512;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 	rgn_end = rgn->base + rgn->size;
 
 	ASSERT_NE(allocated_ptr, NULL);
@@ -824,7 +835,7 @@ static int alloc_try_nid_bottom_up_start_misaligned_check(void)
  * Expect to drop the lower limit and allocate a memory region which
  * starts at the beginning of the available memory.
  */
-static int alloc_try_nid_bottom_up_narrow_range_check(void)
+static int alloc_nid_bottom_up_narrow_range_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -838,9 +849,9 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void)
 	min_addr = memblock_start_of_DRAM() + SZ_512;
 	max_addr = min_addr + SMP_CACHE_BYTES;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -873,7 +884,7 @@ static int alloc_try_nid_bottom_up_narrow_range_check(void)
  * updated. The total size field gets updated.
  */
 
-static int alloc_try_nid_bottom_up_reserved_with_space_check(void)
+static int alloc_nid_bottom_up_reserved_with_space_check(void)
 {
 	struct memblock_region *rgn1 = &memblock.reserved.regions[1];
 	struct memblock_region *rgn2 = &memblock.reserved.regions[0];
@@ -901,9 +912,9 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
@@ -942,7 +953,7 @@ static int alloc_try_nid_bottom_up_reserved_with_space_check(void)
  * Other regions are not modified.
  */
 
-static int alloc_try_nid_bottom_up_reserved_no_space_check(void)
+static int alloc_nid_bottom_up_reserved_no_space_check(void)
 {
 	struct memblock_region *rgn1 = &memblock.reserved.regions[2];
 	struct memblock_region *rgn2 = &memblock.reserved.regions[1];
@@ -971,9 +982,9 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(r3_size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(r3_size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, r3_size, alloc_nid_test_flags);
@@ -1000,7 +1011,7 @@ static int alloc_try_nid_bottom_up_reserved_no_space_check(void)
  * bigger than the end address of the available memory. Expect to allocate
  * a region that starts at the min_addr.
  */
-static int alloc_try_nid_bottom_up_cap_max_check(void)
+static int alloc_nid_bottom_up_cap_max_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -1014,9 +1025,9 @@ static int alloc_try_nid_bottom_up_cap_max_check(void)
 	min_addr = memblock_start_of_DRAM() + SZ_1K;
 	max_addr = memblock_end_of_DRAM() + SZ_256;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1037,7 +1048,7 @@ static int alloc_try_nid_bottom_up_cap_max_check(void)
  * smaller than the start address of the available memory. Expect to allocate
  * a region at the beginning of the available memory.
  */
-static int alloc_try_nid_bottom_up_cap_min_check(void)
+static int alloc_nid_bottom_up_cap_min_check(void)
 {
 	struct memblock_region *rgn = &memblock.reserved.regions[0];
 	void *allocated_ptr = NULL;
@@ -1051,9 +1062,9 @@ static int alloc_try_nid_bottom_up_cap_min_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM() - SZ_256;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1070,133 +1081,133 @@ static int alloc_try_nid_bottom_up_cap_min_check(void)
 }
 
 /* Test case wrappers for range tests */
-static int alloc_try_nid_simple_check(void)
+static int alloc_nid_simple_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_simple_check();
+	alloc_nid_top_down_simple_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_simple_check();
+	alloc_nid_bottom_up_simple_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_misaligned_check(void)
+static int alloc_nid_misaligned_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_end_misaligned_check();
+	alloc_nid_top_down_end_misaligned_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_start_misaligned_check();
+	alloc_nid_bottom_up_start_misaligned_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_narrow_range_check(void)
+static int alloc_nid_narrow_range_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_narrow_range_check();
+	alloc_nid_top_down_narrow_range_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_narrow_range_check();
+	alloc_nid_bottom_up_narrow_range_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_reserved_with_space_check(void)
+static int alloc_nid_reserved_with_space_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_reserved_with_space_check();
+	alloc_nid_top_down_reserved_with_space_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_reserved_with_space_check();
+	alloc_nid_bottom_up_reserved_with_space_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_reserved_no_space_check(void)
+static int alloc_nid_reserved_no_space_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_reserved_no_space_check();
+	alloc_nid_top_down_reserved_no_space_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_reserved_no_space_check();
+	alloc_nid_bottom_up_reserved_no_space_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_cap_max_check(void)
+static int alloc_nid_cap_max_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_cap_max_check();
+	alloc_nid_top_down_cap_max_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_cap_max_check();
+	alloc_nid_bottom_up_cap_max_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_cap_min_check(void)
+static int alloc_nid_cap_min_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_cap_min_check();
+	alloc_nid_top_down_cap_min_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_cap_min_check();
+	alloc_nid_bottom_up_cap_min_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_min_reserved_check(void)
+static int alloc_nid_min_reserved_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_min_reserved_generic_check);
-	run_bottom_up(alloc_try_nid_min_reserved_generic_check);
+	run_top_down(alloc_nid_min_reserved_generic_check);
+	run_bottom_up(alloc_nid_min_reserved_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_max_reserved_check(void)
+static int alloc_nid_max_reserved_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_max_reserved_generic_check);
-	run_bottom_up(alloc_try_nid_max_reserved_generic_check);
+	run_top_down(alloc_nid_max_reserved_generic_check);
+	run_bottom_up(alloc_nid_max_reserved_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_exact_address_check(void)
+static int alloc_nid_exact_address_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_exact_address_generic_check);
-	run_bottom_up(alloc_try_nid_exact_address_generic_check);
+	run_top_down(alloc_nid_exact_address_generic_check);
+	run_bottom_up(alloc_nid_exact_address_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_reserved_full_merge_check(void)
+static int alloc_nid_reserved_full_merge_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_reserved_full_merge_generic_check);
-	run_bottom_up(alloc_try_nid_reserved_full_merge_generic_check);
+	run_top_down(alloc_nid_reserved_full_merge_generic_check);
+	run_bottom_up(alloc_nid_reserved_full_merge_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_reserved_all_check(void)
+static int alloc_nid_reserved_all_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_reserved_all_generic_check);
-	run_bottom_up(alloc_try_nid_reserved_all_generic_check);
+	run_top_down(alloc_nid_reserved_all_generic_check);
+	run_bottom_up(alloc_nid_reserved_all_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_low_max_check(void)
+static int alloc_nid_low_max_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_low_max_generic_check);
-	run_bottom_up(alloc_try_nid_low_max_generic_check);
+	run_top_down(alloc_nid_low_max_generic_check);
+	run_bottom_up(alloc_nid_low_max_generic_check);
 
 	return 0;
 }
@@ -1204,22 +1215,22 @@ static int alloc_try_nid_low_max_check(void)
 static int memblock_alloc_nid_range_checks(void)
 {
 	test_print("Running %s range tests...\n",
-		   get_memblock_alloc_try_nid_name(alloc_nid_test_flags));
+		   get_memblock_alloc_nid_name(alloc_nid_test_flags));
 
-	alloc_try_nid_simple_check();
-	alloc_try_nid_misaligned_check();
-	alloc_try_nid_narrow_range_check();
-	alloc_try_nid_reserved_with_space_check();
-	alloc_try_nid_reserved_no_space_check();
-	alloc_try_nid_cap_max_check();
-	alloc_try_nid_cap_min_check();
+	alloc_nid_simple_check();
+	alloc_nid_misaligned_check();
+	alloc_nid_narrow_range_check();
+	alloc_nid_reserved_with_space_check();
+	alloc_nid_reserved_no_space_check();
+	alloc_nid_cap_max_check();
+	alloc_nid_cap_min_check();
 
-	alloc_try_nid_min_reserved_check();
-	alloc_try_nid_max_reserved_check();
-	alloc_try_nid_exact_address_check();
-	alloc_try_nid_reserved_full_merge_check();
-	alloc_try_nid_reserved_all_check();
-	alloc_try_nid_low_max_check();
+	alloc_nid_min_reserved_check();
+	alloc_nid_max_reserved_check();
+	alloc_nid_exact_address_check();
+	alloc_nid_reserved_full_merge_check();
+	alloc_nid_reserved_all_check();
+	alloc_nid_low_max_check();
 
 	return 0;
 }
@@ -1229,7 +1240,7 @@ static int memblock_alloc_nid_range_checks(void)
  * has enough memory to allocate a region of the requested size.
  * Expect to allocate an aligned region at the end of the requested node.
  */
-static int alloc_try_nid_top_down_numa_simple_check(void)
+static int alloc_nid_top_down_numa_simple_check(void)
 {
 	int nid_req = 3;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1247,8 +1258,8 @@ static int alloc_try_nid_top_down_numa_simple_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1280,7 +1291,7 @@ static int alloc_try_nid_top_down_numa_simple_check(void)
  * Expect to allocate an aligned region at the end of the last node that has
  * enough memory (in this case, nid = 6) after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_top_down_numa_small_node_check(void)
+static int alloc_nid_top_down_numa_small_node_check(void)
 {
 	int nid_req = 1;
 	int nid_exp = 6;
@@ -1299,8 +1310,8 @@ static int alloc_try_nid_top_down_numa_small_node_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1333,7 +1344,7 @@ static int alloc_try_nid_top_down_numa_small_node_check(void)
  * large enough and has enough unreserved memory (in this case, nid = 6) after
  * falling back to NUMA_NO_NODE. The region count and total size get updated.
  */
-static int alloc_try_nid_top_down_numa_node_reserved_check(void)
+static int alloc_nid_top_down_numa_node_reserved_check(void)
 {
 	int nid_req = 2;
 	int nid_exp = 6;
@@ -1353,8 +1364,8 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void)
 	max_addr = memblock_end_of_DRAM();
 
 	memblock_reserve(req_node->base, req_node->size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1386,7 +1397,7 @@ static int alloc_try_nid_top_down_numa_node_reserved_check(void)
  * Expect to allocate an aligned region at the end of the requested node. The
  * region count and total size get updated.
  */
-static int alloc_try_nid_top_down_numa_part_reserved_check(void)
+static int alloc_nid_top_down_numa_part_reserved_check(void)
 {
 	int nid_req = 4;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[1];
@@ -1408,8 +1419,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void)
 	max_addr = memblock_end_of_DRAM();
 
 	memblock_reserve(r1.base, r1.size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1444,7 +1455,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_check(void)
  * nid = NUMA_NODES - 1) after falling back to NUMA_NO_NODE. The region count
  * and total size get updated.
  */
-static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void)
+static int alloc_nid_top_down_numa_part_reserved_fallback_check(void)
 {
 	int nid_req = 4;
 	int nid_exp = NUMA_NODES - 1;
@@ -1469,8 +1480,8 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void)
 	max_addr = memblock_end_of_DRAM();
 
 	memblock_reserve(r1.base, r1.size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1507,7 +1518,7 @@ static int alloc_try_nid_top_down_numa_part_reserved_fallback_check(void)
  * Expect to drop the lower limit and allocate a memory region that ends at
  * the end of the requested node.
  */
-static int alloc_try_nid_top_down_numa_split_range_low_check(void)
+static int alloc_nid_top_down_numa_split_range_low_check(void)
 {
 	int nid_req = 2;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1525,8 +1536,8 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void)
 	min_addr = req_node_end - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1563,7 +1574,7 @@ static int alloc_try_nid_top_down_numa_split_range_low_check(void)
  * Expect to drop the lower limit and allocate a memory region that
  * ends at the end of the first node that overlaps with the range.
  */
-static int alloc_try_nid_top_down_numa_split_range_high_check(void)
+static int alloc_nid_top_down_numa_split_range_high_check(void)
 {
 	int nid_req = 3;
 	int nid_exp = nid_req - 1;
@@ -1582,8 +1593,8 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void)
 	min_addr = exp_node_end - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1620,7 +1631,7 @@ static int alloc_try_nid_top_down_numa_split_range_high_check(void)
  * Expect to drop the lower limit and allocate a memory region that ends at
  * the end of the requested node.
  */
-static int alloc_try_nid_top_down_numa_no_overlap_split_check(void)
+static int alloc_nid_top_down_numa_no_overlap_split_check(void)
 {
 	int nid_req = 2;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1638,8 +1649,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void)
 	min_addr = node2->base - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1677,7 +1688,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_split_check(void)
  * Expect to allocate a memory region at the end of the final node in
  * the range after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_top_down_numa_no_overlap_low_check(void)
+static int alloc_nid_top_down_numa_no_overlap_low_check(void)
 {
 	int nid_req = 0;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1694,8 +1705,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void)
 	min_addr = min_node->base;
 	max_addr = region_end(max_node);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1733,7 +1744,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_low_check(void)
  * Expect to allocate a memory region at the end of the final node in
  * the range after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_top_down_numa_no_overlap_high_check(void)
+static int alloc_nid_top_down_numa_no_overlap_high_check(void)
 {
 	int nid_req = 7;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1750,8 +1761,8 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void)
 	min_addr = min_node->base;
 	max_addr = region_end(max_node);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1773,7 +1784,7 @@ static int alloc_try_nid_top_down_numa_no_overlap_high_check(void)
  * has enough memory to allocate a region of the requested size.
  * Expect to allocate an aligned region at the beginning of the requested node.
  */
-static int alloc_try_nid_bottom_up_numa_simple_check(void)
+static int alloc_nid_bottom_up_numa_simple_check(void)
 {
 	int nid_req = 3;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1791,8 +1802,8 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1824,7 +1835,7 @@ static int alloc_try_nid_bottom_up_numa_simple_check(void)
  * Expect to allocate an aligned region at the beginning of the first node that
  * has enough memory (in this case, nid = 0) after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_bottom_up_numa_small_node_check(void)
+static int alloc_nid_bottom_up_numa_small_node_check(void)
 {
 	int nid_req = 1;
 	int nid_exp = 0;
@@ -1843,8 +1854,8 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1878,7 +1889,7 @@ static int alloc_try_nid_bottom_up_numa_small_node_check(void)
  * after falling back to NUMA_NO_NODE. The region count and total size get
  * updated.
  */
-static int alloc_try_nid_bottom_up_numa_node_reserved_check(void)
+static int alloc_nid_bottom_up_numa_node_reserved_check(void)
 {
 	int nid_req = 2;
 	int nid_exp = 0;
@@ -1898,8 +1909,8 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void)
 	max_addr = memblock_end_of_DRAM();
 
 	memblock_reserve(req_node->base, req_node->size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1931,7 +1942,7 @@ static int alloc_try_nid_bottom_up_numa_node_reserved_check(void)
  * Expect to allocate an aligned region in the requested node that merges with
  * the existing reserved region. The total size gets updated.
  */
-static int alloc_try_nid_bottom_up_numa_part_reserved_check(void)
+static int alloc_nid_bottom_up_numa_part_reserved_check(void)
 {
 	int nid_req = 4;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -1955,8 +1966,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void)
 	total_size = size + r1.size;
 
 	memblock_reserve(r1.base, r1.size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -1991,7 +2002,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_check(void)
  * nid = 0) after falling back to NUMA_NO_NODE. The region count and total size
  * get updated.
  */
-static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void)
+static int alloc_nid_bottom_up_numa_part_reserved_fallback_check(void)
 {
 	int nid_req = 4;
 	int nid_exp = 0;
@@ -2016,8 +2027,8 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void)
 	max_addr = memblock_end_of_DRAM();
 
 	memblock_reserve(r1.base, r1.size);
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2054,7 +2065,7 @@ static int alloc_try_nid_bottom_up_numa_part_reserved_fallback_check(void)
  * Expect to drop the lower limit and allocate a memory region at the beginning
  * of the requested node.
  */
-static int alloc_try_nid_bottom_up_numa_split_range_low_check(void)
+static int alloc_nid_bottom_up_numa_split_range_low_check(void)
 {
 	int nid_req = 2;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -2072,8 +2083,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void)
 	min_addr = req_node_end - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2110,7 +2121,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_low_check(void)
  * Expect to drop the lower limit and allocate a memory region at the beginning
  * of the first node that has enough memory.
  */
-static int alloc_try_nid_bottom_up_numa_split_range_high_check(void)
+static int alloc_nid_bottom_up_numa_split_range_high_check(void)
 {
 	int nid_req = 3;
 	int nid_exp = 0;
@@ -2130,8 +2141,8 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void)
 	min_addr = req_node->base - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2168,7 +2179,7 @@ static int alloc_try_nid_bottom_up_numa_split_range_high_check(void)
  * Expect to drop the lower limit and allocate a memory region that starts at
  * the beginning of the requested node.
  */
-static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void)
+static int alloc_nid_bottom_up_numa_no_overlap_split_check(void)
 {
 	int nid_req = 2;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -2186,8 +2197,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void)
 	min_addr = node2->base - SZ_256;
 	max_addr = min_addr + size;
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2225,7 +2236,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_split_check(void)
  * Expect to allocate a memory region at the beginning of the first node
  * in the range after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void)
+static int alloc_nid_bottom_up_numa_no_overlap_low_check(void)
 {
 	int nid_req = 0;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -2242,8 +2253,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void)
 	min_addr = min_node->base;
 	max_addr = region_end(max_node);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2281,7 +2292,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_low_check(void)
  * Expect to allocate a memory region at the beginning of the first node
  * in the range after falling back to NUMA_NO_NODE.
  */
-static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void)
+static int alloc_nid_bottom_up_numa_no_overlap_high_check(void)
 {
 	int nid_req = 7;
 	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
@@ -2298,8 +2309,8 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void)
 	min_addr = min_node->base;
 	max_addr = region_end(max_node);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2330,7 +2341,7 @@ static int alloc_try_nid_bottom_up_numa_no_overlap_high_check(void)
  *
  * Expect no allocation to happen.
  */
-static int alloc_try_nid_numa_large_region_generic_check(void)
+static int alloc_nid_numa_large_region_generic_check(void)
 {
 	int nid_req = 3;
 	void *allocated_ptr = NULL;
@@ -2344,8 +2355,8 @@ static int alloc_try_nid_numa_large_region_generic_check(void)
 	min_addr = memblock_start_of_DRAM();
 	max_addr = memblock_end_of_DRAM();
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 	ASSERT_EQ(allocated_ptr, NULL);
 
 	test_pass_pop();
@@ -2374,7 +2385,7 @@ static int alloc_try_nid_numa_large_region_generic_check(void)
  * Expect to merge all of the regions into one. The region counter and total
  * size fields get updated.
  */
-static int alloc_try_nid_numa_reserved_full_merge_generic_check(void)
+static int alloc_nid_numa_reserved_full_merge_generic_check(void)
 {
 	int nid_req = 6;
 	int nid_next = nid_req + 1;
@@ -2404,8 +2415,8 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr, nid_req);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr, nid_req);
 
 	ASSERT_NE(allocated_ptr, NULL);
 	assert_mem_content(allocated_ptr, size, alloc_nid_test_flags);
@@ -2448,7 +2459,7 @@ static int alloc_try_nid_numa_reserved_full_merge_generic_check(void)
  *
  * Expect no allocation to happen.
  */
-static int alloc_try_nid_numa_split_all_reserved_generic_check(void)
+static int alloc_nid_numa_split_all_reserved_generic_check(void)
 {
 	void *allocated_ptr = NULL;
 	struct memblock_region *next_node = &memblock.memory.regions[7];
@@ -2472,9 +2483,9 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void)
 	memblock_reserve(r1.base, r1.size);
 	memblock_reserve(r2.base, r2.size);
 
-	allocated_ptr = run_memblock_alloc_try_nid(size, SMP_CACHE_BYTES,
-						   min_addr, max_addr,
-						   NUMA_NO_NODE);
+	allocated_ptr = run_memblock_alloc_nid(size, SMP_CACHE_BYTES,
+					       min_addr, max_addr,
+					       NUMA_NO_NODE);
 
 	ASSERT_EQ(allocated_ptr, NULL);
 
@@ -2484,139 +2495,139 @@ static int alloc_try_nid_numa_split_all_reserved_generic_check(void)
 }
 
 /* Test case wrappers for NUMA tests */
-static int alloc_try_nid_numa_simple_check(void)
+static int alloc_nid_numa_simple_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_simple_check();
+	alloc_nid_top_down_numa_simple_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_simple_check();
+	alloc_nid_bottom_up_numa_simple_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_small_node_check(void)
+static int alloc_nid_numa_small_node_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_small_node_check();
+	alloc_nid_top_down_numa_small_node_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_small_node_check();
+	alloc_nid_bottom_up_numa_small_node_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_node_reserved_check(void)
+static int alloc_nid_numa_node_reserved_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_node_reserved_check();
+	alloc_nid_top_down_numa_node_reserved_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_node_reserved_check();
+	alloc_nid_bottom_up_numa_node_reserved_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_part_reserved_check(void)
+static int alloc_nid_numa_part_reserved_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_part_reserved_check();
+	alloc_nid_top_down_numa_part_reserved_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_part_reserved_check();
+	alloc_nid_bottom_up_numa_part_reserved_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_part_reserved_fallback_check(void)
+static int alloc_nid_numa_part_reserved_fallback_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_part_reserved_fallback_check();
+	alloc_nid_top_down_numa_part_reserved_fallback_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_part_reserved_fallback_check();
+	alloc_nid_bottom_up_numa_part_reserved_fallback_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_split_range_low_check(void)
+static int alloc_nid_numa_split_range_low_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_split_range_low_check();
+	alloc_nid_top_down_numa_split_range_low_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_split_range_low_check();
+	alloc_nid_bottom_up_numa_split_range_low_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_split_range_high_check(void)
+static int alloc_nid_numa_split_range_high_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_split_range_high_check();
+	alloc_nid_top_down_numa_split_range_high_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_split_range_high_check();
+	alloc_nid_bottom_up_numa_split_range_high_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_no_overlap_split_check(void)
+static int alloc_nid_numa_no_overlap_split_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_no_overlap_split_check();
+	alloc_nid_top_down_numa_no_overlap_split_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_no_overlap_split_check();
+	alloc_nid_bottom_up_numa_no_overlap_split_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_no_overlap_low_check(void)
+static int alloc_nid_numa_no_overlap_low_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_no_overlap_low_check();
+	alloc_nid_top_down_numa_no_overlap_low_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_no_overlap_low_check();
+	alloc_nid_bottom_up_numa_no_overlap_low_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_no_overlap_high_check(void)
+static int alloc_nid_numa_no_overlap_high_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
-	alloc_try_nid_top_down_numa_no_overlap_high_check();
+	alloc_nid_top_down_numa_no_overlap_high_check();
 	memblock_set_bottom_up(true);
-	alloc_try_nid_bottom_up_numa_no_overlap_high_check();
+	alloc_nid_bottom_up_numa_no_overlap_high_check();
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_large_region_check(void)
+static int alloc_nid_numa_large_region_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_numa_large_region_generic_check);
-	run_bottom_up(alloc_try_nid_numa_large_region_generic_check);
+	run_top_down(alloc_nid_numa_large_region_generic_check);
+	run_bottom_up(alloc_nid_numa_large_region_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_reserved_full_merge_check(void)
+static int alloc_nid_numa_reserved_full_merge_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_numa_reserved_full_merge_generic_check);
-	run_bottom_up(alloc_try_nid_numa_reserved_full_merge_generic_check);
+	run_top_down(alloc_nid_numa_reserved_full_merge_generic_check);
+	run_bottom_up(alloc_nid_numa_reserved_full_merge_generic_check);
 
 	return 0;
 }
 
-static int alloc_try_nid_numa_split_all_reserved_check(void)
+static int alloc_nid_numa_split_all_reserved_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
-	run_top_down(alloc_try_nid_numa_split_all_reserved_generic_check);
-	run_bottom_up(alloc_try_nid_numa_split_all_reserved_generic_check);
+	run_top_down(alloc_nid_numa_split_all_reserved_generic_check);
+	run_bottom_up(alloc_nid_numa_split_all_reserved_generic_check);
 
 	return 0;
 }
@@ -2624,22 +2635,22 @@ static int alloc_try_nid_numa_split_all_reserved_check(void)
 int __memblock_alloc_nid_numa_checks(void)
 {
 	test_print("Running %s NUMA tests...\n",
-		   get_memblock_alloc_try_nid_name(alloc_nid_test_flags));
+		   get_memblock_alloc_nid_name(alloc_nid_test_flags));
 
-	alloc_try_nid_numa_simple_check();
-	alloc_try_nid_numa_small_node_check();
-	alloc_try_nid_numa_node_reserved_check();
-	alloc_try_nid_numa_part_reserved_check();
-	alloc_try_nid_numa_part_reserved_fallback_check();
-	alloc_try_nid_numa_split_range_low_check();
-	alloc_try_nid_numa_split_range_high_check();
+	alloc_nid_numa_simple_check();
+	alloc_nid_numa_small_node_check();
+	alloc_nid_numa_node_reserved_check();
+	alloc_nid_numa_part_reserved_check();
+	alloc_nid_numa_part_reserved_fallback_check();
+	alloc_nid_numa_split_range_low_check();
+	alloc_nid_numa_split_range_high_check();
 
-	alloc_try_nid_numa_no_overlap_split_check();
-	alloc_try_nid_numa_no_overlap_low_check();
-	alloc_try_nid_numa_no_overlap_high_check();
-	alloc_try_nid_numa_large_region_check();
-	alloc_try_nid_numa_reserved_full_merge_check();
-	alloc_try_nid_numa_split_all_reserved_check();
+	alloc_nid_numa_no_overlap_split_check();
+	alloc_nid_numa_no_overlap_low_check();
+	alloc_nid_numa_no_overlap_high_check();
+	alloc_nid_numa_large_region_check();
+	alloc_nid_numa_reserved_full_merge_check();
+	alloc_nid_numa_split_all_reserved_check();
 
 	return 0;
 }
@@ -2649,7 +2660,7 @@ static int memblock_alloc_nid_checks_internal(int flags)
 	alloc_nid_test_flags = flags;
 
 	prefix_reset();
-	prefix_push(get_memblock_alloc_try_nid_name(flags));
+	prefix_push(get_memblock_alloc_nid_name(flags));
 
 	reset_memblock_attributes();
 	dummy_physical_memory_init();
@@ -2671,3 +2682,12 @@ int memblock_alloc_nid_checks(void)
 
 	return 0;
 }
+
+int memblock_alloc_exact_nid_range_checks(void)
+{
+	alloc_nid_test_flags = (TEST_F_RAW | TEST_F_EXACT);
+
+	memblock_alloc_nid_range_checks();
+
+	return 0;
+}
diff --git a/tools/testing/memblock/tests/alloc_nid_api.h b/tools/testing/memblock/tests/alloc_nid_api.h
index 92d07d230e18f..2b8cabacacb82 100644
--- a/tools/testing/memblock/tests/alloc_nid_api.h
+++ b/tools/testing/memblock/tests/alloc_nid_api.h
@@ -5,6 +5,7 @@
 #include "common.h"
 
 int memblock_alloc_nid_checks(void);
+int memblock_alloc_exact_nid_range_checks(void);
 int __memblock_alloc_nid_numa_checks(void);
 
 #ifdef CONFIG_NUMA
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index cc82b85151b61..4f23302ee6779 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -21,6 +21,8 @@ enum test_flags {
 	TEST_F_NONE = 0x0,
 	/* Perform raw allocations (no zeroing of memory). */
 	TEST_F_RAW = 0x1,
+	/* Perform allocations on the exact node specified. */
+	TEST_F_EXACT = 0x2
 };
 
 /**
-- 
GitLab


From bfc05a4ce3650a1e5a47ccdaf8c87f814829b4a7 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Mon, 7 Nov 2022 00:28:06 -0600
Subject: [PATCH 1001/4122] memblock tests: add top-down NUMA tests for
 memblock_alloc_exact_nid_raw

Add tests for memblock_alloc_exact_nid_raw() where the simulated physical
memory is set up with multiple NUMA nodes. Additionally, all of these
tests set nid != NUMA_NO_NODE. These tests are run with a top-down
allocation direction.

The tested scenarios are:

Range unrestricted:
- region can be allocated in the specific node requested:
      + there are no previously reserved regions
      + the requested node is partially reserved but has enough space

Range restricted:
- region can be allocated in the specific node requested after dropping
  min_addr:
      + range partially overlaps with two different nodes, where the
        first node is the requested node
      + range partially overlaps with two different nodes, where the
        requested node ends before min_addr
      + range overlaps with multiple nodes along node boundaries, and
        the requested node ends before min_addr

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/2cc0883243d68ddc3faf833d2d9e86f48534c1d7.1667802195.git.remckee0@gmail.com
---
 .../memblock/tests/alloc_exact_nid_api.c      | 344 ++++++++++++++++++
 .../memblock/tests/alloc_exact_nid_api.h      |  16 +
 2 files changed, 360 insertions(+)

diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c
index 6406496623a01..79150784b3739 100644
--- a/tools/testing/memblock/tests/alloc_exact_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c
@@ -4,6 +4,349 @@
 
 #define FUNC_NAME			"memblock_alloc_exact_nid_raw"
 
+/*
+ * contains the fraction of MEM_SIZE contained in each node in basis point
+ * units (one hundredth of 1% or 1/10000)
+ */
+static const unsigned int node_fractions[] = {
+	2500, /* 1/4  */
+	 625, /* 1/16 */
+	1250, /* 1/8  */
+	1250, /* 1/8  */
+	 625, /* 1/16 */
+	 625, /* 1/16 */
+	2500, /* 1/4  */
+	 625, /* 1/16 */
+};
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_simple_check(void)
+{
+	int nid_req = 3;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	ASSERT_LE(SZ_4, req_node->size);
+	size = req_node->size / SZ_4;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+	ASSERT_LE(req_node->base, new_rgn->base);
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ *  |           +---------------------------------------+          |
+ *  |           |               requested               |          |
+ *  +-----------+---------------------------------------+----------+
+ *
+ *  |           +------------------+              +-----+          |
+ *  |           |     reserved     |              | new |          |
+ *  +-----------+------------------+--------------+-----+----------+
+ *
+ * Expect to allocate an aligned region at the end of the requested node. The
+ * region count and total size get updated.
+ */
+static int alloc_exact_nid_top_down_numa_part_reserved_check(void)
+{
+	int nid_req = 4;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[1];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	struct region r1;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	ASSERT_LE(SZ_8, req_node->size);
+	r1.base = req_node->base;
+	r1.size = req_node->size / SZ_2;
+	size = r1.size / SZ_4;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	memblock_reserve(r1.base, r1.size);
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+	ASSERT_LE(req_node->base, new_rgn->base);
+
+	ASSERT_EQ(memblock.reserved.cnt, 2);
+	ASSERT_EQ(memblock.reserved.total_size, size + r1.size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ *                                min_addr
+ *                                |           max_addr
+ *                                |           |
+ *                                v           v
+ *  |           +-----------------------+-----------+              |
+ *  |           |       requested       |   node3   |              |
+ *  +-----------+-----------------------+-----------+--------------+
+ *                                +           +
+ *  |                       +-----------+                          |
+ *  |                       |    rgn    |                          |
+ *  +-----------------------+-----------+--------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_split_range_low_check(void)
+{
+	int nid_req = 2;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_512;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t req_node_end;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	req_node_end = region_end(req_node);
+	min_addr = req_node_end - SZ_256;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, req_node_end - size);
+	ASSERT_LE(req_node->base, new_rgn->base);
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ *                                         min_addr
+ *                                         |         max_addr
+ *                                         |         |
+ *                                         v         v
+ *  |    +---------------+        +-------------+---------+          |
+ *  |    |   requested   |        |    node1    |  node2  |          |
+ *  +----+---------------+--------+-------------+---------+----------+
+ *                                         +         +
+ *  |          +---------+                                           |
+ *  |          |   rgn   |                                           |
+ *  +----------+---------+-------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_no_overlap_split_check(void)
+{
+	int nid_req = 2;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	struct memblock_region *node2 = &memblock.memory.regions[6];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	size = SZ_512;
+	min_addr = node2->base - SZ_256;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+	ASSERT_LE(req_node->base, new_rgn->base);
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ *                          min_addr
+ *                          |                                 max_addr
+ *                          |                                 |
+ *                          v                                 v
+ *  |-----------+           +----------+----...----+----------+      |
+ *  | requested |           | min node |    ...    | max node |      |
+ *  +-----------+-----------+----------+----...----+----------+------+
+ *                          +                                 +
+ *  |     +-----+                                                    |
+ *  |     | rgn |                                                    |
+ *  +-----+-----+----------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that ends at
+ * the end of the requested node.
+ */
+static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void)
+{
+	int nid_req = 0;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	struct memblock_region *min_node = &memblock.memory.regions[2];
+	struct memblock_region *max_node = &memblock.memory.regions[5];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_64;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	min_addr = min_node->base;
+	max_addr = region_end(max_node);
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, region_end(req_node) - size);
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/* Test case wrappers for NUMA tests */
+static int alloc_exact_nid_numa_simple_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	memblock_set_bottom_up(false);
+	alloc_exact_nid_top_down_numa_simple_check();
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_part_reserved_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	memblock_set_bottom_up(false);
+	alloc_exact_nid_top_down_numa_part_reserved_check();
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_split_range_low_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	memblock_set_bottom_up(false);
+	alloc_exact_nid_top_down_numa_split_range_low_check();
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_split_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	memblock_set_bottom_up(false);
+	alloc_exact_nid_top_down_numa_no_overlap_split_check();
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_low_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	memblock_set_bottom_up(false);
+	alloc_exact_nid_top_down_numa_no_overlap_low_check();
+
+	return 0;
+}
+
+int __memblock_alloc_exact_nid_numa_checks(void)
+{
+	test_print("Running %s NUMA tests...\n", FUNC_NAME);
+
+	alloc_exact_nid_numa_simple_check();
+	alloc_exact_nid_numa_part_reserved_check();
+	alloc_exact_nid_numa_split_range_low_check();
+	alloc_exact_nid_numa_no_overlap_split_check();
+	alloc_exact_nid_numa_no_overlap_low_check();
+
+	return 0;
+}
+
 int memblock_alloc_exact_nid_checks(void)
 {
 	prefix_reset();
@@ -13,6 +356,7 @@ int memblock_alloc_exact_nid_checks(void)
 	dummy_physical_memory_init();
 
 	memblock_alloc_exact_nid_range_checks();
+	memblock_alloc_exact_nid_numa_checks();
 
 	dummy_physical_memory_cleanup();
 
diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.h b/tools/testing/memblock/tests/alloc_exact_nid_api.h
index 4408719de3b9a..cef419d55d2ae 100644
--- a/tools/testing/memblock/tests/alloc_exact_nid_api.h
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.h
@@ -5,5 +5,21 @@
 #include "common.h"
 
 int memblock_alloc_exact_nid_checks(void);
+int __memblock_alloc_exact_nid_numa_checks(void);
+
+#ifdef CONFIG_NUMA
+static inline int memblock_alloc_exact_nid_numa_checks(void)
+{
+	__memblock_alloc_exact_nid_numa_checks();
+	return 0;
+}
+
+#else
+static inline int memblock_alloc_exact_nid_numa_checks(void)
+{
+	return 0;
+}
+
+#endif /* CONFIG_NUMA */
 
 #endif
-- 
GitLab


From b6df23edb1ba65b0b46788a872ddc85dfe86ccf5 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Mon, 7 Nov 2022 00:28:07 -0600
Subject: [PATCH 1002/4122] memblock tests: add bottom-up NUMA tests for
 memblock_alloc_exact_nid_raw

Add tests for memblock_alloc_exact_nid_raw() where the simulated physical
memory is set up with multiple NUMA nodes. Additionally, all of these
tests set nid != NUMA_NO_NODE. These tests are run with a bottom-up
allocation direction.

The tested scenarios are:

Range unrestricted:
- region can be allocated in the specific node requested:
      + there are no previously reserved regions
      + the requested node is partially reserved but has enough space

Range restricted:
- region can be allocated in the specific node requested after dropping
  min_addr:
      + range partially overlaps with two different nodes, where the
        first node is the requested node
      + range partially overlaps with two different nodes, where the
        requested node ends before min_addr
      + range overlaps with multiple nodes along node boundaries, and
        the requested node ends before min_addr

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/935f0eed5e06fd44dc67d9f49b277923d7896bd3.1667802195.git.remckee0@gmail.com
---
 .../memblock/tests/alloc_exact_nid_api.c      | 282 ++++++++++++++++++
 1 file changed, 282 insertions(+)

diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c
index 79150784b3739..b97b5c04de053 100644
--- a/tools/testing/memblock/tests/alloc_exact_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c
@@ -288,12 +288,286 @@ static int alloc_exact_nid_top_down_numa_no_overlap_low_check(void)
 	return 0;
 }
 
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * has enough memory to allocate a region of the requested size.
+ * Expect to allocate an aligned region at the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_simple_check(void)
+{
+	int nid_req = 3;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	ASSERT_LE(SZ_4, req_node->size);
+	size = req_node->size / SZ_4;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved but has enough memory for the allocated region:
+ *
+ *  |           +---------------------------------------+         |
+ *  |           |               requested               |         |
+ *  +-----------+---------------------------------------+---------+
+ *
+ *  |           +------------------+-----+                        |
+ *  |           |     reserved     | new |                        |
+ *  +-----------+------------------+-----+------------------------+
+ *
+ * Expect to allocate an aligned region in the requested node that merges with
+ * the existing reserved region. The total size gets updated.
+ */
+static int alloc_exact_nid_bottom_up_numa_part_reserved_check(void)
+{
+	int nid_req = 4;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	struct region r1;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t total_size;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	ASSERT_LE(SZ_8, req_node->size);
+	r1.base = req_node->base;
+	r1.size = req_node->size / SZ_2;
+	size = r1.size / SZ_4;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+	total_size = size + r1.size;
+
+	memblock_reserve(r1.base, r1.size);
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, total_size);
+	ASSERT_EQ(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the first
+ * node is the requested node:
+ *
+ *                                min_addr
+ *                                |           max_addr
+ *                                |           |
+ *                                v           v
+ *  |           +-----------------------+-----------+              |
+ *  |           |       requested       |   node3   |              |
+ *  +-----------+-----------------------+-----------+--------------+
+ *                                +           +
+ *  |           +-----------+                                      |
+ *  |           |    rgn    |                                      |
+ *  +-----------+-----------+--------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region at the beginning
+ * of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_split_range_low_check(void)
+{
+	int nid_req = 2;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_512;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+	phys_addr_t req_node_end;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	req_node_end = region_end(req_node);
+	min_addr = req_node_end - SZ_256;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(new_rgn), req_node_end);
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the requested
+ * node ends before min_addr:
+ *
+ *                                          min_addr
+ *                                         |         max_addr
+ *                                         |         |
+ *                                         v         v
+ *  |    +---------------+        +-------------+---------+         |
+ *  |    |   requested   |        |    node1    |  node2  |         |
+ *  +----+---------------+--------+-------------+---------+---------+
+ *                                         +         +
+ *  |    +---------+                                                |
+ *  |    |   rgn   |                                                |
+ *  +----+---------+------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that starts at
+ * the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_no_overlap_split_check(void)
+{
+	int nid_req = 2;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	struct memblock_region *node2 = &memblock.memory.regions[6];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	size = SZ_512;
+	min_addr = node2->base - SZ_256;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node ends
+ * before min_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ *                          min_addr
+ *                          |                                 max_addr
+ *                          |                                 |
+ *                          v                                 v
+ *  |-----------+           +----------+----...----+----------+      |
+ *  | requested |           | min node |    ...    | max node |      |
+ *  +-----------+-----------+----------+----...----+----------+------+
+ *                          +                                 +
+ *  |-----+                                                          |
+ *  | rgn |                                                          |
+ *  +-----+----------------------------------------------------------+
+ *
+ * Expect to drop the lower limit and allocate a memory region that starts at
+ * the beginning of the requested node.
+ */
+static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void)
+{
+	int nid_req = 0;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	struct memblock_region *min_node = &memblock.memory.regions[2];
+	struct memblock_region *max_node = &memblock.memory.regions[5];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_64;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	min_addr = min_node->base;
+	max_addr = region_end(max_node);
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, size);
+	ASSERT_EQ(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(new_rgn), region_end(req_node));
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
 /* Test case wrappers for NUMA tests */
 static int alloc_exact_nid_numa_simple_check(void)
 {
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
 	alloc_exact_nid_top_down_numa_simple_check();
+	memblock_set_bottom_up(true);
+	alloc_exact_nid_bottom_up_numa_simple_check();
 
 	return 0;
 }
@@ -303,6 +577,8 @@ static int alloc_exact_nid_numa_part_reserved_check(void)
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
 	alloc_exact_nid_top_down_numa_part_reserved_check();
+	memblock_set_bottom_up(true);
+	alloc_exact_nid_bottom_up_numa_part_reserved_check();
 
 	return 0;
 }
@@ -312,6 +588,8 @@ static int alloc_exact_nid_numa_split_range_low_check(void)
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
 	alloc_exact_nid_top_down_numa_split_range_low_check();
+	memblock_set_bottom_up(true);
+	alloc_exact_nid_bottom_up_numa_split_range_low_check();
 
 	return 0;
 }
@@ -321,6 +599,8 @@ static int alloc_exact_nid_numa_no_overlap_split_check(void)
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
 	alloc_exact_nid_top_down_numa_no_overlap_split_check();
+	memblock_set_bottom_up(true);
+	alloc_exact_nid_bottom_up_numa_no_overlap_split_check();
 
 	return 0;
 }
@@ -330,6 +610,8 @@ static int alloc_exact_nid_numa_no_overlap_low_check(void)
 	test_print("\tRunning %s...\n", __func__);
 	memblock_set_bottom_up(false);
 	alloc_exact_nid_top_down_numa_no_overlap_low_check();
+	memblock_set_bottom_up(true);
+	alloc_exact_nid_bottom_up_numa_no_overlap_low_check();
 
 	return 0;
 }
-- 
GitLab


From 62bdc99008b372a6c5f81e6d968f3b077a1e3667 Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Mon, 7 Nov 2022 00:28:08 -0600
Subject: [PATCH 1003/4122] memblock tests: add generic NUMA tests for
 memblock_alloc_exact_nid_raw

Add tests for memblock_alloc_exact_nid_raw() where the simulated physical
memory is set up with multiple NUMA nodes. Additionally, all but one of
these tests set nid != NUMA_NO_NODE. All tests are run for both top-down
and bottom-up allocation directions.

The tested scenarios are:

Range unrestricted:
- region cannot be allocated:
      + there are no previously reserved regions, but requested node is
        too small
      + the requested node is fully reserved
      + the requested node is partially reserved and does not have
        enough space
      + none of the nodes have enough memory to allocate the region

Range restricted:
- region can be allocated in the specific node requested without
  dropping min_addr:
      + the range fully overlaps with the node, and there are adjacent
        reserved regions
- region cannot be allocated:
      + range partially overlaps with two different nodes, where the
        second node is the requested node
      + range overlaps with multiple nodes along node boundaries, and
        the requested node starts after max_addr
      + nid is set to NUMA_NO_NODE and the total range can fit the
        region, but the range is split between two nodes and everything
        else is reserved

Acked-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/51b14da46e6591428df3aefc5acc7dca9341a541.1667802195.git.remckee0@gmail.com
---
 .../memblock/tests/alloc_exact_nid_api.c      | 465 ++++++++++++++++++
 1 file changed, 465 insertions(+)

diff --git a/tools/testing/memblock/tests/alloc_exact_nid_api.c b/tools/testing/memblock/tests/alloc_exact_nid_api.c
index b97b5c04de053..6e14447da6e1c 100644
--- a/tools/testing/memblock/tests/alloc_exact_nid_api.c
+++ b/tools/testing/memblock/tests/alloc_exact_nid_api.c
@@ -560,6 +560,390 @@ static int alloc_exact_nid_bottom_up_numa_no_overlap_low_check(void)
 	return 0;
 }
 
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size:
+ *
+ *  |   +-----+                            |
+ *  |   | req |                            |
+ *  +---+-----+----------------------------+
+ *
+ *  +---------+
+ *  |   rgn   |
+ *  +---------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_small_node_generic_check(void)
+{
+	int nid_req = 1;
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	size = SZ_2 * req_node->size;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is fully reserved:
+ *
+ *  |              +---------+             |
+ *  |              |requested|             |
+ *  +--------------+---------+-------------+
+ *
+ *  |              +---------+             |
+ *  |              | reserved|             |
+ *  +--------------+---------+-------------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_node_reserved_generic_check(void)
+{
+	int nid_req = 2;
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	size = req_node->size;
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	memblock_reserve(req_node->base, req_node->size);
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * is partially reserved and does not have enough contiguous memory for the
+ * allocated region:
+ *
+ *  |           +-----------------------+    |
+ *  |           |       requested       |    |
+ *  +-----------+-----------------------+----+
+ *
+ *  |                 +----------+           |
+ *  |                 | reserved |           |
+ *  +-----------------+----------+-----------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_part_reserved_fail_generic_check(void)
+{
+	int nid_req = 4;
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	struct region r1;
+	phys_addr_t size;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	ASSERT_LE(SZ_4, req_node->size);
+	size = req_node->size / SZ_2;
+	r1.base = req_node->base + (size / SZ_2);
+	r1.size = size;
+
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	memblock_reserve(r1.base, r1.size);
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region that spans over the min_addr
+ * and max_addr range and overlaps with two different nodes, where the second
+ * node is the requested node:
+ *
+ *                               min_addr
+ *                               |         max_addr
+ *                               |         |
+ *                               v         v
+ *  |      +--------------------------+---------+                |
+ *  |      |        first node        |requested|                |
+ *  +------+--------------------------+---------+----------------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_split_range_high_generic_check(void)
+{
+	int nid_req = 3;
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_512;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	min_addr = req_node->base - SZ_256;
+	max_addr = min_addr + size;
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range when
+ * the requested node and the range do not overlap, and requested node starts
+ * after max_addr. The range overlaps with multiple nodes along node
+ * boundaries:
+ *
+ *        min_addr
+ *        |                                 max_addr
+ *        |                                 |
+ *        v                                 v
+ *  |     +----------+----...----+----------+        +-----------+   |
+ *  |     | min node |    ...    | max node |        | requested |   |
+ *  +-----+----------+----...----+----------+--------+-----------+---+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_no_overlap_high_generic_check(void)
+{
+	int nid_req = 7;
+	struct memblock_region *min_node = &memblock.memory.regions[2];
+	struct memblock_region *max_node = &memblock.memory.regions[5];
+	void *allocated_ptr = NULL;
+	phys_addr_t size = SZ_64;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	min_addr = min_node->base;
+	max_addr = region_end(max_node);
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate a memory region in a specific NUMA node that
+ * does not have enough memory to allocate a region of the requested size.
+ * Additionally, none of the nodes have enough memory to allocate the region:
+ *
+ * +-----------------------------------+
+ * |                new                |
+ * +-----------------------------------+
+ *     |-------+-------+-------+-------+-------+-------+-------+-------|
+ *     | node0 | node1 | node2 | node3 | node4 | node5 | node6 | node7 |
+ *     +-------+-------+-------+-------+-------+-------+-------+-------+
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_large_region_generic_check(void)
+{
+	int nid_req = 3;
+	void *allocated_ptr = NULL;
+	phys_addr_t size = MEM_SIZE / SZ_2;
+	phys_addr_t min_addr;
+	phys_addr_t max_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	min_addr = memblock_start_of_DRAM();
+	max_addr = memblock_end_of_DRAM();
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_addr range when
+ * there are two reserved regions at the borders. The requested node starts at
+ * min_addr and ends at max_addr and is the same size as the region to be
+ * allocated:
+ *
+ *                     min_addr
+ *                     |                       max_addr
+ *                     |                       |
+ *                     v                       v
+ *  |      +-----------+-----------------------+-----------------------|
+ *  |      |   node5   |       requested       |         node7         |
+ *  +------+-----------+-----------------------+-----------------------+
+ *                     +                       +
+ *  |             +----+-----------------------+----+                  |
+ *  |             | r2 |          new          | r1 |                  |
+ *  +-------------+----+-----------------------+----+------------------+
+ *
+ * Expect to merge all of the regions into one. The region counter and total
+ * size fields get updated.
+ */
+static int alloc_exact_nid_numa_reserved_full_merge_generic_check(void)
+{
+	int nid_req = 6;
+	int nid_next = nid_req + 1;
+	struct memblock_region *new_rgn = &memblock.reserved.regions[0];
+	struct memblock_region *req_node = &memblock.memory.regions[nid_req];
+	struct memblock_region *next_node = &memblock.memory.regions[nid_next];
+	void *allocated_ptr = NULL;
+	struct region r1, r2;
+	phys_addr_t size = req_node->size;
+	phys_addr_t total_size;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	r1.base = next_node->base;
+	r1.size = SZ_128;
+
+	r2.size = SZ_128;
+	r2.base = r1.base - (size + r2.size);
+
+	total_size = r1.size + r2.size + size;
+	min_addr = r2.base + r2.size;
+	max_addr = r1.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     nid_req);
+
+	ASSERT_NE(allocated_ptr, NULL);
+	ASSERT_MEM_NE(allocated_ptr, 0, size);
+
+	ASSERT_EQ(new_rgn->size, total_size);
+	ASSERT_EQ(new_rgn->base, r2.base);
+
+	ASSERT_LE(new_rgn->base, req_node->base);
+	ASSERT_LE(region_end(req_node), region_end(new_rgn));
+
+	ASSERT_EQ(memblock.reserved.cnt, 1);
+	ASSERT_EQ(memblock.reserved.total_size, total_size);
+
+	test_pass_pop();
+
+	return 0;
+}
+
+/*
+ * A test that tries to allocate memory within min_addr and max_add range,
+ * where the total range can fit the region, but it is split between two nodes
+ * and everything else is reserved. Additionally, nid is set to NUMA_NO_NODE
+ * instead of requesting a specific node:
+ *
+ *                         +-----------+
+ *                         |    new    |
+ *                         +-----------+
+ *  |      +---------------------+-----------|
+ *  |      |      prev node      | next node |
+ *  +------+---------------------+-----------+
+ *                         +           +
+ *  |----------------------+           +-----|
+ *  |          r1          |           |  r2 |
+ *  +----------------------+-----------+-----+
+ *                         ^           ^
+ *                         |           |
+ *                         |           max_addr
+ *                         |
+ *                         min_addr
+ *
+ * Expect no allocation to happen.
+ */
+static int alloc_exact_nid_numa_split_all_reserved_generic_check(void)
+{
+	void *allocated_ptr = NULL;
+	struct memblock_region *next_node = &memblock.memory.regions[7];
+	struct region r1, r2;
+	phys_addr_t size = SZ_256;
+	phys_addr_t max_addr;
+	phys_addr_t min_addr;
+
+	PREFIX_PUSH();
+	setup_numa_memblock(node_fractions);
+
+	r2.base = next_node->base + SZ_128;
+	r2.size = memblock_end_of_DRAM() - r2.base;
+
+	r1.size = MEM_SIZE - (r2.size + size);
+	r1.base = memblock_start_of_DRAM();
+
+	min_addr = r1.base + r1.size;
+	max_addr = r2.base;
+
+	memblock_reserve(r1.base, r1.size);
+	memblock_reserve(r2.base, r2.size);
+
+	allocated_ptr = memblock_alloc_exact_nid_raw(size, SMP_CACHE_BYTES,
+						     min_addr, max_addr,
+						     NUMA_NO_NODE);
+
+	ASSERT_EQ(allocated_ptr, NULL);
+
+	test_pass_pop();
+
+	return 0;
+}
+
 /* Test case wrappers for NUMA tests */
 static int alloc_exact_nid_numa_simple_check(void)
 {
@@ -616,6 +1000,78 @@ static int alloc_exact_nid_numa_no_overlap_low_check(void)
 	return 0;
 }
 
+static int alloc_exact_nid_numa_small_node_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_small_node_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_small_node_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_node_reserved_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_node_reserved_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_node_reserved_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_part_reserved_fail_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_part_reserved_fail_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_part_reserved_fail_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_split_range_high_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_split_range_high_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_split_range_high_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_no_overlap_high_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_no_overlap_high_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_no_overlap_high_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_large_region_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_large_region_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_large_region_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_reserved_full_merge_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_reserved_full_merge_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_reserved_full_merge_generic_check);
+
+	return 0;
+}
+
+static int alloc_exact_nid_numa_split_all_reserved_check(void)
+{
+	test_print("\tRunning %s...\n", __func__);
+	run_top_down(alloc_exact_nid_numa_split_all_reserved_generic_check);
+	run_bottom_up(alloc_exact_nid_numa_split_all_reserved_generic_check);
+
+	return 0;
+}
+
 int __memblock_alloc_exact_nid_numa_checks(void)
 {
 	test_print("Running %s NUMA tests...\n", FUNC_NAME);
@@ -626,6 +1082,15 @@ int __memblock_alloc_exact_nid_numa_checks(void)
 	alloc_exact_nid_numa_no_overlap_split_check();
 	alloc_exact_nid_numa_no_overlap_low_check();
 
+	alloc_exact_nid_numa_small_node_check();
+	alloc_exact_nid_numa_node_reserved_check();
+	alloc_exact_nid_numa_part_reserved_fail_check();
+	alloc_exact_nid_numa_split_range_high_check();
+	alloc_exact_nid_numa_no_overlap_high_check();
+	alloc_exact_nid_numa_large_region_check();
+	alloc_exact_nid_numa_reserved_full_merge_check();
+	alloc_exact_nid_numa_split_all_reserved_check();
+
 	return 0;
 }
 
-- 
GitLab


From 80c2fe022ef5d29f3bafee90c37dbcff18cab57a Mon Sep 17 00:00:00 2001
From: Rebecca Mckeever <remckee0@gmail.com>
Date: Mon, 7 Nov 2022 00:28:09 -0600
Subject: [PATCH 1004/4122] memblock tests: remove completed TODO item

Remove completed item from TODO list.

Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Rebecca Mckeever <remckee0@gmail.com>
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/f2263abe45613b28f1583fbf04a4bffcf735bcf6.1667802195.git.remckee0@gmail.com
---
 tools/testing/memblock/TODO | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tools/testing/memblock/TODO b/tools/testing/memblock/TODO
index 503cc96fcdc3a..e306c90c535fb 100644
--- a/tools/testing/memblock/TODO
+++ b/tools/testing/memblock/TODO
@@ -1,10 +1,5 @@
 TODO
 =====
 
-1. Add test cases using this functions (implement them for both directions):
-   + memblock_alloc_raw()
-   + memblock_alloc_exact_nid_raw()
-   + memblock_alloc_try_nid_raw()
-
-2. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
+1. Add tests for memblock_alloc_node() to check if the correct NUMA node is set
    for the new region
-- 
GitLab


From ad9f64cd2d4a17f0d5ecf04d38170fdc34f21c61 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Mon, 3 Oct 2022 09:58:48 -0700
Subject: [PATCH 1005/4122] LICENSES: Add the copyleft-next-0.3.1 license

Add the full text of the copyleft-next-0.3.1 license to the kernel
tree as well as the required tags for reference and tooling.
The license text was copied directly from the copyleft-next project's
git tree [0].

Discussion of using copyleft-next-0.3.1 on Linux started since June,
2016 [1]. In the end Linus' preference was to have drivers use
MODULE_LICENSE("GPL") to make it clear that the GPL applies when it
comes to Linux [2]. Additionally, even though copyleft-next-0.3.1 has
been found to be to be GPLv2 compatible by three attorneys at SUSE and
Redhat [3], to err on the side of caution we simply recommend to
always use the "OR" language for this license [4].

Even though it has been a goal of the project to be GPL-v2 compatible
to be certain in 2016 I asked for a clarification about what makes
copyleft-next GPLv2 compatible and also asked for a summary of
benefits. This prompted some small minor changes to make compatibility
even further clear and as of copyleft 0.3.1 compatibility should
be crystal clear [5].

The summary of why copyleft-next 0.3.1 is compatible with GPLv2
is explained as follows:

  Like GPLv2, copyleft-next requires distribution of derivative works
  ("Derived Works" in copyleft-next 0.3.x) to be under the same license.
  Ordinarily this would make the two licenses incompatible. However,
  copyleft-next 0.3.1 says: "If the Derived Work includes material
  licensed under the GPL, You may instead license the Derived Work under
  the GPL." "GPL" is defined to include GPLv2.

In practice this means copyleft-next code in Linux may be licensed
under the GPL2, however there are additional obvious gains for
bringing contributions from Linux outbound where copyleft-next is
preferred. A summary of benefits why projects outside of Linux might
prefer to use copyleft-next >= 0.3.1 over GPLv2:

o It is much shorter and simpler
o It has an explicit patent license grant, unlike GPLv2
o Its notice preservation conditions are clearer
o More free software/open source licenses are compatible
  with it (via section 4)
o The source code requirement triggered by binary distribution
  is much simpler in a procedural sense
o Recipients potentially have a contract claim against distributors
  who are noncompliant with the source code requirement
o There is a built-in inbound=outbound policy for upstream
  contributions (cf. Apache License 2.0 section 5)
o There are disincentives to engage in the controversial practice
  of copyleft/ proprietary dual-licensing
o In 15 years copyleft expires, which can be advantageous
  for legacy code
o There are explicit disincentives to bringing patent infringement
  claims accusing the licensed work of infringement (see 10b)
o There is a cure period for licensees who are not compliant
  with the license (there is no cure opportunity in GPLv2)
o copyleft-next has a 'built-in or-later' provision

The first driver submission to Linux under this dual strategy was
lib/test_sysctl.c through commit 9308f2f9e7f05 ("test_sysctl: add
dedicated proc sysctl test driver") merged in July 2017. Shortly after
that I also added test_kmod through commit d9c6a72d6fa29 ("kmod: add
test driver to stress test the module loader") in the same month. These
two drivers went in just a few months before the SPDX license practice
kicked in. In 2018 Kuno Woudt went through the process to get SPDX
identifiers for copyleft-next [6] [7]. Although there are SPDX tags
for copyleft-next-0.3.0, we only document use in Linux starting from
copyleft-next-0.3.1 which makes GPLv2 compatibility crystal clear.

This patch will let us update the two Linux selftest drivers in
subsequent patches with their respective SPDX license identifiers and
let us remove repetitive license boiler plate.

[0] https://github.com/copyleft-next/copyleft-next/blob/master/Releases/copyleft-next-0.3.1
[1] https://lore.kernel.org/lkml/1465929311-13509-1-git-send-email-mcgrof@kernel.org/
[2] https://lore.kernel.org/lkml/CA+55aFyhxcvD+q7tp+-yrSFDKfR0mOHgyEAe=f_94aKLsOu0Og@mail.gmail.com/
[3] https://lore.kernel.org/lkml/20170516232702.GL17314@wotan.suse.de/
[4] https://lkml.kernel.org/r/1495234558.7848.122.camel@linux.intel.com
[5] https://lists.fedorahosted.org/archives/list/copyleft-next@lists.fedorahosted.org/thread/JTGV56DDADWGKU7ZKTZA4DLXTGTLNJ57/#SQMDIKBRAVDOCT4UVNOOCRGBN2UJIKHZ
[6] https://spdx.org/licenses/copyleft-next-0.3.0.html
[7] https://spdx.org/licenses/copyleft-next-0.3.1.html

Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Cc: Kuno Woudt <kuno@frob.nl>
Cc: Richard Fontana <fontana@sharpeleven.org>
Cc: copyleft-next@lists.fedorahosted.org
Cc: Ciaran Farrell <Ciaran.Farrell@suse.com>
Cc: Christopher De Nicolo <Christopher.DeNicolo@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thorsten Leemhuis <linux@leemhuis.info>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Tim Bird <tim.bird@sony.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 LICENSES/dual/copyleft-next-0.3.1 | 236 ++++++++++++++++++++++++++++++
 1 file changed, 236 insertions(+)
 create mode 100644 LICENSES/dual/copyleft-next-0.3.1

diff --git a/LICENSES/dual/copyleft-next-0.3.1 b/LICENSES/dual/copyleft-next-0.3.1
new file mode 100644
index 0000000000000..c81acf7106572
--- /dev/null
+++ b/LICENSES/dual/copyleft-next-0.3.1
@@ -0,0 +1,236 @@
+Valid-License-Identifier: copyleft-next-0.3.1
+SPDX-URL: https://spdx.org/licenses/copyleft-next-0.3.1
+Usage-Guide:
+  copyleft-next-0.3.1 is explicitly compatible with GPLv2 (or later) and
+  can therefore be used for kernel code. Though the best and recommended
+  practice is to express this in the SPDX license identifier by
+  licensing the code under both licenses expressed by the OR operator.
+  To use the copyleft-next-0.3.1 license put the following SPDX tag/value
+  pair into a comment according to the placement guidelines in the
+  licensing rules documentation:
+    SPDX-License-Identifier: GPL-2.0-only OR copyleft-next 0.3.1
+    SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
+License-Text:
+
+=======================================================================
+
+                      copyleft-next 0.3.1 ("this License")
+                            Release date: 2016-04-29
+
+1. License Grants; No Trademark License
+
+   Subject to the terms of this License, I grant You:
+
+   a) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable
+      copyright license, to reproduce, Distribute, prepare derivative works
+      of, publicly perform and publicly display My Work.
+
+   b) A non-exclusive, worldwide, perpetual, royalty-free, irrevocable
+      patent license under Licensed Patents to make, have made, use, sell,
+      offer for sale, and import Covered Works.
+
+   This License does not grant any rights in My name, trademarks, service
+   marks, or logos.
+
+2. Distribution: General Conditions
+
+   You may Distribute Covered Works, provided that You (i) inform
+   recipients how they can obtain a copy of this License; (ii) satisfy the
+   applicable conditions of sections 3 through 6; and (iii) preserve all
+   Legal Notices contained in My Work (to the extent they remain
+   pertinent). "Legal Notices" means copyright notices, license notices,
+   license texts, and author attributions, but does not include logos,
+   other graphical images, trademarks or trademark legends.
+
+3. Conditions for Distributing Derived Works; Outbound GPL Compatibility
+
+   If You Distribute a Derived Work, You must license the entire Derived
+   Work as a whole under this License, with prominent notice of such
+   licensing. This condition may not be avoided through such means as
+   separate Distribution of portions of the Derived Work.
+
+   If the Derived Work includes material licensed under the GPL, You may
+   instead license the Derived Work under the GPL.
+   
+4. Condition Against Further Restrictions; Inbound License Compatibility
+
+   When Distributing a Covered Work, You may not impose further
+   restrictions on the exercise of rights in the Covered Work granted under
+   this License. This condition is not excused merely because such
+   restrictions result from Your compliance with conditions or obligations
+   extrinsic to this License (such as a court order or an agreement with a
+   third party).
+
+   However, You may Distribute a Covered Work incorporating material
+   governed by a license that is both OSI-Approved and FSF-Free as of the
+   release date of this License, provided that compliance with such
+   other license would not conflict with any conditions stated in other
+   sections of this License.
+
+5. Conditions for Distributing Object Code
+
+   You may Distribute an Object Code form of a Covered Work, provided that
+   you accompany the Object Code with a URL through which the Corresponding
+   Source is made available, at no charge, by some standard or customary
+   means of providing network access to source code.
+
+   If you Distribute the Object Code in a physical product or tangible
+   storage medium ("Product"), the Corresponding Source must be available
+   through such URL for two years from the date of Your most recent
+   Distribution of the Object Code in the Product. However, if the Product
+   itself contains or is accompanied by the Corresponding Source (made
+   available in a customarily accessible manner), You need not also comply
+   with the first paragraph of this section.
+
+   Each direct and indirect recipient of the Covered Work from You is an
+   intended third-party beneficiary of this License solely as to this
+   section 5, with the right to enforce its terms.
+
+6. Symmetrical Licensing Condition for Upstream Contributions
+
+   If You Distribute a work to Me specifically for inclusion in or
+   modification of a Covered Work (a "Patch"), and no explicit licensing
+   terms apply to the Patch, You license the Patch under this License, to
+   the extent of Your copyright in the Patch. This condition does not
+   negate the other conditions of this License, if applicable to the Patch.
+
+7. Nullification of Copyleft/Proprietary Dual Licensing
+
+   If I offer to license, for a fee, a Covered Work under terms other than
+   a license that is OSI-Approved or FSF-Free as of the release date of this
+   License or a numbered version of copyleft-next released by the
+   Copyleft-Next Project, then the license I grant You under section 1 is no
+   longer subject to the conditions in sections 3 through 5.
+
+8. Copyleft Sunset
+
+   The conditions in sections 3 through 5 no longer apply once fifteen
+   years have elapsed from the date of My first Distribution of My Work
+   under this License.
+
+9. Pass-Through
+
+   When You Distribute a Covered Work, the recipient automatically receives
+   a license to My Work from Me, subject to the terms of this License.
+
+10. Termination
+
+    Your license grants under section 1 are automatically terminated if You
+
+    a) fail to comply with the conditions of this License, unless You cure
+       such noncompliance within thirty days after becoming aware of it, or
+
+    b) initiate a patent infringement litigation claim (excluding
+       declaratory judgment actions, counterclaims, and cross-claims)
+       alleging that any part of My Work directly or indirectly infringes
+       any patent.
+
+    Termination of Your license grants extends to all copies of Covered
+    Works You subsequently obtain. Termination does not terminate the
+    rights of those who have received copies or rights from You subject to
+    this License.
+
+    To the extent permission to make copies of a Covered Work is necessary
+    merely for running it, such permission is not terminable.
+
+11. Later License Versions
+
+    The Copyleft-Next Project may release new versions of copyleft-next,
+    designated by a distinguishing version number ("Later Versions").
+    Unless I explicitly remove the option of Distributing Covered Works
+    under Later Versions, You may Distribute Covered Works under any Later
+    Version.
+
+** 12. No Warranty                                                       **
+**                                                                       **
+**     My Work is provided "as-is", without warranty. You bear the risk  **
+**     of using it. To the extent permitted by applicable law, each      **
+**     Distributor of My Work excludes the implied warranties of title,  **
+**     merchantability, fitness for a particular purpose and             **
+**     non-infringement.                                                 **
+
+** 13. Limitation of Liability                                           **
+**                                                                       **
+**     To the extent permitted by applicable law, in no event will any   **
+**     Distributor of My Work be liable to You for any damages           **
+**     whatsoever, whether direct, indirect, special, incidental, or     **
+**     consequential damages, whether arising under contract, tort       **
+**     (including negligence), or otherwise, even where the Distributor  **
+**     knew or should have known about the possibility of such damages.  **
+
+14. Severability
+
+    The invalidity or unenforceability of any provision of this License
+    does not affect the validity or enforceability of the remainder of
+    this License. Such provision is to be reformed to the minimum extent
+    necessary to make it valid and enforceable.
+
+15. Definitions
+
+    "Copyleft-Next Project" means the project that maintains the source
+    code repository at <https://github.com/copyleft-next/copyleft-next.git/>
+    as of the release date of this License.
+
+    "Corresponding Source" of a Covered Work in Object Code form means (i)
+    the Source Code form of the Covered Work; (ii) all scripts,
+    instructions and similar information that are reasonably necessary for
+    a skilled developer to generate such Object Code from the Source Code
+    provided under (i); and (iii) a list clearly identifying all Separate
+    Works (other than those provided in compliance with (ii)) that were
+    specifically used in building and (if applicable) installing the
+    Covered Work (for example, a specified proprietary compiler including
+    its version number). Corresponding Source must be machine-readable.
+
+    "Covered Work" means My Work or a Derived Work.
+
+    "Derived Work" means a work of authorship that copies from, modifies,
+    adapts, is based on, is a derivative work of, transforms, translates or
+    contains all or part of My Work, such that copyright permission is
+    required. The following are not Derived Works: (i) Mere Aggregation;
+    (ii) a mere reproduction of My Work; and (iii) if My Work fails to
+    explicitly state an expectation otherwise, a work that merely makes
+    reference to My Work.
+
+    "Distribute" means to distribute, transfer or make a copy available to
+    someone else, such that copyright permission is required.
+
+    "Distributor" means Me and anyone else who Distributes a Covered Work.
+
+    "FSF-Free" means classified as 'free' by the Free Software Foundation.
+
+    "GPL" means a version of the GNU General Public License or the GNU
+    Affero General Public License.
+
+    "I"/"Me"/"My" refers to the individual or legal entity that places My
+    Work under this License. "You"/"Your" refers to the individual or legal
+    entity exercising rights in My Work under this License. A legal entity
+    includes each entity that controls, is controlled by, or is under
+    common control with such legal entity. "Control" means (a) the power to
+    direct the actions of such legal entity, whether by contract or
+    otherwise, or (b) ownership of more than fifty percent of the
+    outstanding shares or beneficial ownership of such legal entity.
+
+    "Licensed Patents" means all patent claims licensable royalty-free by
+    Me, now or in the future, that are necessarily infringed by making,
+    using, or selling My Work, and excludes claims that would be infringed
+    only as a consequence of further modification of My Work.
+
+    "Mere Aggregation" means an aggregation of a Covered Work with a
+    Separate Work.
+
+    "My Work" means the particular work of authorship I license to You
+    under this License.
+
+    "Object Code" means any form of a work that is not Source Code.
+
+    "OSI-Approved" means approved as 'Open Source' by the Open Source
+    Initiative.
+
+    "Separate Work" means a work that is separate from and independent of a
+    particular Covered Work and is not by its nature an extension or
+    enhancement of the Covered Work, and/or a runtime library, standard
+    library or similar component that is used to generate an Object Code
+    form of a Covered Work.
+
+    "Source Code" means the preferred form of a work for making
+    modifications to it.
-- 
GitLab


From 6cad1ecd4e3213d892b70afa999a81849d1f0206 Mon Sep 17 00:00:00 2001
From: Luis Chamberlain <mcgrof@kernel.org>
Date: Mon, 3 Oct 2022 09:58:49 -0700
Subject: [PATCH 1006/4122] testing: use the copyleft-next-0.3.1 SPDX tag

Two selftests drivers exist under the copyleft-next license.
These drivers were added prior to SPDX practice taking full swing
in the kernel. Now that we have an SPDX tag for copyleft-next-0.3.1
documented, embrace it and remove the boiler plate.

Cc: Goldwyn Rodrigues <rgoldwyn@suse.com>
Cc: Kuno Woudt <kuno@frob.nl>
Cc: Richard Fontana <fontana@sharpeleven.org>
Cc: copyleft-next@lists.fedorahosted.org
Cc: Ciaran Farrell <Ciaran.Farrell@suse.com>
Cc: Christopher De Nicolo <Christopher.DeNicolo@suse.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thorsten Leemhuis <linux@leemhuis.info>
Cc: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Tim Bird <tim.bird@sony.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_kmod.c                          | 12 +-----------
 lib/test_sysctl.c                        | 12 +-----------
 tools/testing/selftests/kmod/kmod.sh     | 13 +------------
 tools/testing/selftests/sysctl/sysctl.sh | 12 +-----------
 4 files changed, 4 insertions(+), 45 deletions(-)

diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index cb800b1d0d99c..6423df9fa8dd4 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
 /*
  * kmod stress test driver
  *
  * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or at your option any
- * later version; or, when distributed separately from the Linux kernel or
- * when incorporated into other software packages, subject to the following
- * license:
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of copyleft-next (version 0.3.1 or later) as published
- * at http://copyleft-next.org/.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 9a564971f539a..e2a816d85ea24 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -1,18 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
 /*
  * proc sysctl test driver
  *
  * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or at your option any
- * later version; or, when distributed separately from the Linux kernel or
- * when incorporated into other software packages, subject to the following
- * license:
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of copyleft-next (version 0.3.1 or later) as published
- * at http://copyleft-next.org/.
  */
 
 /*
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index afd42387e8b21..7189715d7960f 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -1,18 +1,7 @@
 #!/bin/bash
-#
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
 # Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
 #
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
-
 # This is a stress test script for kmod, the kernel module loader. It uses
 # test_kmod which exposes a series of knobs for the API for us so we can
 # tweak each test in userspace rather than in kernelspace.
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index f50778a3d7445..bfc54b422f25a 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -1,16 +1,6 @@
 #!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
 # Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
 
 # This performs a series tests against the proc sysctl interface.
 
-- 
GitLab


From 5c30f3e4a6e67c88c979ad30554bf4ef9b24fbd0 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sun, 6 Nov 2022 19:46:35 -0800
Subject: [PATCH 1007/4122] tty: Move TIOCSTI toggle variable before kerndoc

The variable "tty_legacy_tiocsti" should be defined before the kerndoc
for the tiocsti() function. The new variable was breaking the "htmldocs"
build target:

drivers/tty/tty_io.c:2271: warning: cannot understand function prototype: 'bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI); '

Fixes: 83efeeeb3d04 ("tty: Allow TIOCSTI to be disabled")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/lkml/20221107143434.66f7be35@canb.auug.org.au
Cc: Jiri Slaby <jirislaby@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Link: https://lore.kernel.org/r/20221107034631.never.637-kees@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index a6a16cf986b72..24ebcb495145d 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2255,6 +2255,7 @@ static int tty_fasync(int fd, struct file *filp, int on)
 	return retval;
 }
 
+static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI);
 /**
  * tiocsti		-	fake input character
  * @tty: tty to fake input into
@@ -2268,7 +2269,6 @@ static int tty_fasync(int fd, struct file *filp, int on)
  *  * Called functions take tty_ldiscs_lock
  *  * current->signal->tty check is safe without locks
  */
-static bool tty_legacy_tiocsti __read_mostly = IS_ENABLED(CONFIG_LEGACY_TIOCSTI);
 static int tiocsti(struct tty_struct *tty, char __user *p)
 {
 	char ch, mbz = 0;
-- 
GitLab


From 96a53586ec095dcc426df5bc6a568108dcfa8a0a Mon Sep 17 00:00:00 2001
From: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Date: Thu, 3 Nov 2022 07:34:22 +0000
Subject: [PATCH 1008/4122] staging: vt6655: change 2 variable names wFB_Opt0
 and wFB_Opt1

These variables are named using Hungarian notation, which is not used
in the Linux kernel.

Signed-off-by: Tanjuate Brunostar <tanjubrunostar0@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/Y2Nu/i957Ill7r+d@elroy-temp-vm.gaiao0uenmiufjlowqgp5yxwdh.gvxx.internal.cloudapp.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vt6655/rxtx.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/staging/vt6655/rxtx.c b/drivers/staging/vt6655/rxtx.c
index baa3f6f1508f7..522d34ca9b0ff 100644
--- a/drivers/staging/vt6655/rxtx.c
+++ b/drivers/staging/vt6655/rxtx.c
@@ -59,12 +59,12 @@ static const unsigned short time_stamp_off[2][MAX_RATE] = {
 	{384, 192, 130, 113, 54, 43, 37, 31, 28, 25, 24, 23}, /* Short Preamble */
 };
 
-static const unsigned short wFB_Opt0[2][5] = {
+static const unsigned short fb_opt0[2][5] = {
 	{RATE_12M, RATE_18M, RATE_24M, RATE_36M, RATE_48M}, /* fallback_rate0 */
 	{RATE_12M, RATE_12M, RATE_18M, RATE_24M, RATE_36M}, /* fallback_rate1 */
 };
 
-static const unsigned short wFB_Opt1[2][5] = {
+static const unsigned short fb_opt1[2][5] = {
 	{RATE_12M, RATE_18M, RATE_24M, RATE_24M, RATE_36M}, /* fallback_rate0 */
 	{RATE_6M,  RATE_6M,  RATE_12M, RATE_12M, RATE_18M}, /* fallback_rate1 */
 };
@@ -310,9 +310,9 @@ s_uGetDataDuration(
 			wRate -= RATE_18M;
 
 			if (byFBOption == AUTO_FB_0)
-				wRate = wFB_Opt0[FB_RATE0][wRate];
+				wRate = fb_opt0[FB_RATE0][wRate];
 			else
-				wRate = wFB_Opt1[FB_RATE0][wRate];
+				wRate = fb_opt1[FB_RATE0][wRate];
 
 			uNextPktTime = s_uGetTxRsvTime(pDevice, byPktType,
 						       len, wRate, bNeedAck);
@@ -365,52 +365,52 @@ s_uGetRTSCTSDuration(
 	case RTSDUR_BA_F0: /* RTSDuration_ba_f0 */
 		uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopCCKBasicRate);
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
 
 		break;
 
 	case RTSDUR_AA_F0: /* RTSDuration_aa_f0 */
 		uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopOFDMBasicRate);
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
 
 		break;
 
 	case RTSDUR_BA_F1: /* RTSDuration_ba_f1 */
 		uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopCCKBasicRate);
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
 
 		break;
 
 	case RTSDUR_AA_F1: /* RTSDuration_aa_f1 */
 		uCTSTime = bb_get_frame_time(pDevice->preamble_type, byPktType, 14, pDevice->byTopOFDMBasicRate);
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = uCTSTime + 2 * pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
 
 		break;
 
 	case CTSDUR_BA_F0: /* CTSDuration_ba_f0 */
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE0][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
+			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE0][wRate - RATE_18M], bNeedAck);
 
 		break;
 
 	case CTSDUR_BA_F1: /* CTSDuration_ba_f1 */
 		if ((byFBOption == AUTO_FB_0) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt0[FB_RATE1][wRate - RATE_18M], bNeedAck);
 		else if ((byFBOption == AUTO_FB_1) && (wRate >= RATE_18M) && (wRate <= RATE_54M))
-			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, wFB_Opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
+			uDurTime = pDevice->uSIFS + s_uGetTxRsvTime(pDevice, byPktType, cbFrameLength, fb_opt1[FB_RATE1][wRate - RATE_18M], bNeedAck);
 
 		break;
 
-- 
GitLab


From f541335e830eb1abf3e4e260fcef98c429165aac Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Thu, 3 Nov 2022 04:06:59 +0530
Subject: [PATCH 1009/4122] staging: rtl8192e: Use min_t/max_t macros for
 variable comparison

Simplify code by using min_t and max_t helper macros in place of lengthy
if/else block oriented logical evaluation and value assignment. This
issue is identified by coccicheck using the minmax.cocci file.

Use the *_t variants of min/max macros to avoid compiler warnings about
data typecast.
Also, use u32 as type for min_t macro to avoid any truncation of data
associated with enum constant HT_AGG_SIZE_32K.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/Y2LxC2kziM1TznhO@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_HTProc.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 62aa8e893c34c..ccb86660ab487 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -587,17 +587,12 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 			else
 				pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_64K;
 		} else {
-			if (pPeerHTCap->MaxRxAMPDUFactor < HT_AGG_SIZE_32K)
-				pHTInfo->CurrentAMPDUFactor =
-						 pPeerHTCap->MaxRxAMPDUFactor;
-			else
-				pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_32K;
+			pHTInfo->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor,
+							    HT_AGG_SIZE_32K);
 		}
 	}
-	if (pHTInfo->MPDU_Density > pPeerHTCap->MPDUDensity)
-		pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density;
-	else
-		pHTInfo->current_mpdu_density = pPeerHTCap->MPDUDensity;
+	pHTInfo->current_mpdu_density = max_t(u8, pHTInfo->MPDU_Density,
+					      pPeerHTCap->MPDUDensity);
 	if (pHTInfo->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) {
 		pHTInfo->bCurrentAMPDUEnable = false;
 		pHTInfo->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE;
-- 
GitLab


From 6c6ff293337b62c0c646d1ede2e9962f7501f9e4 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Thu, 3 Nov 2022 15:03:29 +0530
Subject: [PATCH 1010/4122] staging: rtl8723bs: Use min/max macros for variable
 comparison

Simplify code by using min and max helper macros in place of lengthy
if/else block oriented logical evaluation and value assignment. This
issue is identified by coccicheck using the minmax.cocci file.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y2OK6fcIkH3S2/1f@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8723bs/core/rtw_wlan_util.c | 12 ++++--------
 drivers/staging/rtl8723bs/hal/odm_DIG.c        |  5 +----
 2 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
index 18ba846c0b7b7..ba39c8b1a9ae1 100644
--- a/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
+++ b/drivers/staging/rtl8723bs/core/rtw_wlan_util.c
@@ -986,15 +986,11 @@ void HT_caps_handler(struct adapter *padapter, struct ndis_80211_var_ie *pIE)
 			pmlmeinfo->HT_caps.u.HT_cap[i] &= (pIE->data[i]);
 		} else {
 			/* modify from  fw by Thomas 2010/11/17 */
-			if ((pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3) > (pIE->data[i] & 0x3))
-				max_AMPDU_len = (pIE->data[i] & 0x3);
-			else
-				max_AMPDU_len = (pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3);
+			max_AMPDU_len = min(pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x3,
+					    pIE->data[i] & 0x3);
 
-			if ((pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c) > (pIE->data[i] & 0x1c))
-				min_MPDU_spacing = (pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c);
-			else
-				min_MPDU_spacing = (pIE->data[i] & 0x1c);
+			min_MPDU_spacing = max(pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para & 0x1c,
+					       pIE->data[i] & 0x1c);
 
 			pmlmeinfo->HT_caps.u.HT_cap_element.AMPDU_para = max_AMPDU_len | min_MPDU_spacing;
 		}
diff --git a/drivers/staging/rtl8723bs/hal/odm_DIG.c b/drivers/staging/rtl8723bs/hal/odm_DIG.c
index 07edf74ccfe52..97a51546463a6 100644
--- a/drivers/staging/rtl8723bs/hal/odm_DIG.c
+++ b/drivers/staging/rtl8723bs/hal/odm_DIG.c
@@ -598,10 +598,7 @@ void odm_DIGbyRSSI_LPS(void *pDM_VOID)
 	/* Lower bound checking */
 
 	/* RSSI Lower bound check */
-	if ((pDM_Odm->RSSI_Min-10) > DM_DIG_MIN_NIC)
-		RSSI_Lower = pDM_Odm->RSSI_Min-10;
-	else
-		RSSI_Lower = DM_DIG_MIN_NIC;
+	RSSI_Lower = max(pDM_Odm->RSSI_Min - 10, DM_DIG_MIN_NIC);
 
 	/* Upper and Lower Bound checking */
 	if (CurrentIGI > DM_DIG_MAX_NIC)
-- 
GitLab


From 84415762be0736efe7ed1b26d72a65ef9e0c6e58 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Thu, 3 Nov 2022 22:58:06 +0530
Subject: [PATCH 1011/4122] staging: r8188eu: remove unused ijk_matrix_regs_set
 implementation

Instance IQKMatrixRegSetting of struct ijk_matrix_regs_set is
initialised and its member variables are assigned values, but it is
not used anywhere. Remove the structure and its unused implementation.

Suggested-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y2P6Jj+IcPss0wFd@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/HalPhyRf_8188e.c | 8 --------
 drivers/staging/r8188eu/include/odm.h        | 7 -------
 2 files changed, 15 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
index 622f95d3f2edf..26e710ef5134d 100644
--- a/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
+++ b/drivers/staging/r8188eu/hal/HalPhyRf_8188e.c
@@ -882,14 +882,6 @@ void PHY_IQCalibrate_8188E(struct adapter *adapt, bool recovery)
 	if (RegE94 != 0)
 		patha_fill_iqk(adapt, pathaok, result, final_candidate, (RegEA4 == 0));
 
-/* To Fix BSOD when final_candidate is 0xff */
-/* by sherry 20120321 */
-	if (final_candidate < 4) {
-		for (i = 0; i < IQK_Matrix_REG_NUM; i++)
-			dm_odm->RFCalibrateInfo.IQKMatrixRegSetting.Value[0][i] = result[final_candidate][i];
-		dm_odm->RFCalibrateInfo.IQKMatrixRegSetting.bIQKDone = true;
-	}
-
 	_PHY_SaveADDARegisters(adapt, IQK_BB_REG_92C, dm_odm->RFCalibrateInfo.IQK_BB_backup_recover, 9);
 }
 
diff --git a/drivers/staging/r8188eu/include/odm.h b/drivers/staging/r8188eu/include/odm.h
index 89b01dd614ba9..8cea166b7b735 100644
--- a/drivers/staging/r8188eu/include/odm.h
+++ b/drivers/staging/r8188eu/include/odm.h
@@ -80,7 +80,6 @@ struct odm_rate_adapt {
 #define HP_THERMAL_NUM		8
 
 #define AVG_THERMAL_NUM		8
-#define IQK_Matrix_REG_NUM	8
 
 struct odm_phy_dbg_info {
 	/* ODM Write,debug info */
@@ -164,11 +163,6 @@ struct odm_ra_info {
 	u8 PTSmoothFactor;
 };
 
-struct ijk_matrix_regs_set {
-	bool	bIQKDone;
-	s32	Value[1][IQK_Matrix_REG_NUM];
-};
-
 struct odm_rf_cal {
 	/* for tx power tracking */
 	u32	RegA24; /*  for TempCCK */
@@ -206,7 +200,6 @@ struct odm_rf_cal {
 
 	u8	ThermalValue_HP[HP_THERMAL_NUM];
 	u8	ThermalValue_HP_index;
-	struct ijk_matrix_regs_set IQKMatrixRegSetting;
 
 	u8	Delta_IQK;
 	u8	Delta_LCK;
-- 
GitLab


From 1d6290639bfce477f71392392436d2aa830bca2e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 3 Nov 2022 13:06:19 +0000
Subject: [PATCH 1012/4122] staging: rtl8192e: rtl819x_HTProc: make arrays
 const and one static

Make two dead-only arrays const. Make array EWC11NHTCap static const
so it is not populated on the stack, makes the code smaller too.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/20221103130619.78413-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl819x_HTProc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index ccb86660ab487..2c0a8d5c8f279 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -282,7 +282,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap,
 	memset(posHTCap, 0, *len);
 
 	if ((bAssoc) && (pHT->ePeerHTSpecVer == HT_SPEC_VER_EWC)) {
-		u8	EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33};
+		static const u8	EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 };
 
 		memcpy(posHTCap, EWC11NHTCap, sizeof(EWC11NHTCap));
 		pCapELE = (struct ht_capab_ele *)&posHTCap[4];
@@ -515,8 +515,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 	u16 nMaxAMSDUSize = 0;
 	u8 *pMcsFilter = NULL;
 
-	static u8 EWC11NHTCap[] = {0x00, 0x90, 0x4c, 0x33};
-	static u8 EWC11NHTInfo[] = {0x00, 0x90, 0x4c, 0x34};
+	static const u8 EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 };
+	static const u8 EWC11NHTInfo[] = { 0x00, 0x90, 0x4c, 0x34 };
 
 	if (!pHTInfo->bCurrentHTSupport) {
 		netdev_warn(ieee->dev, "%s(): HT_DISABLE\n", __func__);
-- 
GitLab


From dcb18f5140ab7b4ea23417598c601b9468141d99 Mon Sep 17 00:00:00 2001
From: Phillip Potter <phil@philpotter.co.uk>
Date: Thu, 3 Nov 2022 23:06:32 +0000
Subject: [PATCH 1013/4122] staging: r8188eu: change return type of
 rtw_set_802_11_disassociate to void

Change return type of rtw_set_802_11_disassociate to void. This function
always returns 'true' no matter what, so there is no need to return a
value, and no need to check for it in the two call sites within
rtw_wx_set_mlme. Also, as we are no longer using ret in rtw_wx_set_mlme
except as the return value, just remove it and return 0 directly.

Suggested-by: Michael Straube <straube.linux@gmail.com>
Signed-off-by: Phillip Potter <phil@philpotter.co.uk>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221103230632.6946-1-phil@philpotter.co.uk
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_ioctl_set.c    | 4 +---
 drivers/staging/r8188eu/include/rtw_ioctl_set.h | 2 +-
 drivers/staging/r8188eu/os_dep/ioctl_linux.c    | 9 +++------
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_ioctl_set.c b/drivers/staging/r8188eu/core/rtw_ioctl_set.c
index 7864318266598..785c0dba508fa 100644
--- a/drivers/staging/r8188eu/core/rtw_ioctl_set.c
+++ b/drivers/staging/r8188eu/core/rtw_ioctl_set.c
@@ -314,7 +314,7 @@ u8 rtw_set_802_11_infrastructure_mode(struct adapter *padapter,
 	return true;
 }
 
-u8 rtw_set_802_11_disassociate(struct adapter *padapter)
+void rtw_set_802_11_disassociate(struct adapter *padapter)
 {
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 
@@ -328,8 +328,6 @@ u8 rtw_set_802_11_disassociate(struct adapter *padapter)
 	}
 
 	spin_unlock_bh(&pmlmepriv->lock);
-
-	return true;
 }
 
 u8 rtw_set_802_11_bssid_list_scan(struct adapter *padapter, struct ndis_802_11_ssid *pssid, int ssid_max_num)
diff --git a/drivers/staging/r8188eu/include/rtw_ioctl_set.h b/drivers/staging/r8188eu/include/rtw_ioctl_set.h
index abe460d6504d0..c3eb2479f27b4 100644
--- a/drivers/staging/r8188eu/include/rtw_ioctl_set.h
+++ b/drivers/staging/r8188eu/include/rtw_ioctl_set.h
@@ -12,7 +12,7 @@ u8 rtw_set_802_11_authentication_mode(struct adapter *adapt,
 				      enum ndis_802_11_auth_mode authmode);
 u8 rtw_set_802_11_bssid(struct adapter *adapter, u8 *bssid);
 u8 rtw_set_802_11_add_wep(struct adapter *adapter, struct ndis_802_11_wep *wep);
-u8 rtw_set_802_11_disassociate(struct adapter *adapter);
+void rtw_set_802_11_disassociate(struct adapter *adapter);
 u8 rtw_set_802_11_bssid_list_scan(struct adapter *adapter,
 				  struct ndis_802_11_ssid *pssid,
 				  int ssid_max_num);
diff --git a/drivers/staging/r8188eu/os_dep/ioctl_linux.c b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
index dda48a2a6d0c1..8e9b7b0664bc3 100644
--- a/drivers/staging/r8188eu/os_dep/ioctl_linux.c
+++ b/drivers/staging/r8188eu/os_dep/ioctl_linux.c
@@ -1011,7 +1011,6 @@ static int rtw_wx_set_mlme(struct net_device *dev,
 			     struct iw_request_info *info,
 			     union iwreq_data *wrqu, char *extra)
 {
-	int ret = 0;
 	struct adapter *padapter = (struct adapter *)rtw_netdev_priv(dev);
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
 
@@ -1020,17 +1019,15 @@ static int rtw_wx_set_mlme(struct net_device *dev,
 
 	switch (mlme->cmd) {
 	case IW_MLME_DEAUTH:
-		if (!rtw_set_802_11_disassociate(padapter))
-			ret = -1;
+		rtw_set_802_11_disassociate(padapter);
 		break;
 	case IW_MLME_DISASSOC:
-		if (!rtw_set_802_11_disassociate(padapter))
-			ret = -1;
+		rtw_set_802_11_disassociate(padapter);
 		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-	return ret;
+	return 0;
 }
 
 static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
-- 
GitLab


From bb5b5e2104ca000888df89e35ff42c677df22671 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Thu, 3 Nov 2022 20:27:50 -0500
Subject: [PATCH 1014/4122] staging: rtl8192e: Fix divide fault when
 calculating beacon age

When the configuration parameter CONFIG_HZ is less that 100, the compiler
generates an error as follows:

../drivers/staging/rtl8192e/rtllib_wx.c: In function 'rtl819x_translate_scan':
../drivers/staging/rtl8192e/rtllib_wx.c:220:57: warning: division by zero [-Wdiv-by-zero]
  220 |      (jiffies - network->last_scanned) / (HZ / 100));
      |                                                         ^
In file included from ../include/linux/skbuff.h:45,
                 from ../include/linux/if_ether.h:19,
                 from ../include/linux/etherdevice.h:20,
                 from ../drivers/staging/rtl8192e/rtllib_wx.c:18:
../drivers/staging/rtl8192e/rtllib_wx.c: In function 'rtllib_wx_get_scan':
../drivers/staging/rtl8192e/rtllib_wx.c:261:70: warning: division by zero [-Wdiv-by-zero]
  261 |      (jiffies - network->last_scanned) /
      |

In fact, is HZ is not a multiple of 100, the calculation will be wrong,
but it will compile correctly.

The fix is to get rid of the (HZ / 100) portion. To decrease any round-off
errors, the compiler is forced to perform the 100 * jiffies-difference
before dividing by HZ. This patch is only compile tested.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org> # build-tested
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/20221104012750.2076-1-Larry.Finger@lwfinger.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_wx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtllib_wx.c b/drivers/staging/rtl8192e/rtllib_wx.c
index da2c41c9b92f1..217426ee2e92a 100644
--- a/drivers/staging/rtl8192e/rtllib_wx.c
+++ b/drivers/staging/rtl8192e/rtllib_wx.c
@@ -217,7 +217,7 @@ static inline char *rtl819x_translate_scan(struct rtllib_device *ieee,
 	p = custom;
 	p += scnprintf(p, MAX_CUSTOM_LEN - (p - custom),
 		      " Last beacon: %lums ago",
-		      (jiffies - network->last_scanned) / (HZ / 100));
+		      (100 * (jiffies - network->last_scanned)) / HZ);
 	iwe.u.data.length = p - custom;
 	if (iwe.u.data.length)
 		start = iwe_stream_add_point_rsl(info, start, stop,
@@ -258,8 +258,8 @@ int rtllib_wx_get_scan(struct rtllib_device *ieee,
 				   escape_essid(network->ssid,
 						network->ssid_len),
 				   network->bssid,
-				   (jiffies - network->last_scanned) /
-				   (HZ / 100));
+				   (100 * (jiffies - network->last_scanned)) /
+				   HZ);
 	}
 
 	spin_unlock_irqrestore(&ieee->lock, flags);
-- 
GitLab


From 11454ddeb2f68a1c03b90f0c2bba18570f1a3019 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sat, 5 Nov 2022 16:45:52 +0530
Subject: [PATCH 1015/4122] staging: rtl8723bs: replace underutilized struct by
 array variable

For structure iqk_matrix_regs_setting, only the "Value" member variable
is utilized whereas the other struct members are only declared but not
utilised. Replace the struct declaration and implementation by an
equivalent variable similar to the only used struct member variable.
While in there, update the macro mixed case names to uppercase style.
The resultant code is simpler and is easy to maintain.

Suggested-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y2ZF6O1KU3zZ6r3C@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8723bs/hal/HalPhyRf_8723B.c    | 21 +++++++++----------
 drivers/staging/rtl8723bs/hal/odm.h           | 12 +++--------
 2 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c b/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c
index a52748f7b56eb..22e33b97800d1 100644
--- a/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c
+++ b/drivers/staging/rtl8723bs/hal/HalPhyRf_8723B.c
@@ -244,8 +244,8 @@ void ODM_TxPwrTrackSetPwr_8723B(
 			Final_CCK_Swing_Index = 0;
 
 		setIqkMatrix_8723B(pDM_Odm, Final_OFDM_Swing_Index, RFPath,
-			pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0],
-			pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]);
+			pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0],
+			pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]);
 
 		setCCKFilterCoefficient(pDM_Odm, Final_CCK_Swing_Index);
 
@@ -257,8 +257,8 @@ void ODM_TxPwrTrackSetPwr_8723B(
 			pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = Final_OFDM_Swing_Index - PwrTrackingLimit_OFDM;
 
 			setIqkMatrix_8723B(pDM_Odm, PwrTrackingLimit_OFDM, RFPath,
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0],
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]);
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0],
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]);
 
 			pDM_Odm->Modify_TxAGC_Flag_PathA = true;
 			PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, OFDM);
@@ -267,16 +267,16 @@ void ODM_TxPwrTrackSetPwr_8723B(
 			pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = Final_OFDM_Swing_Index;
 
 			setIqkMatrix_8723B(pDM_Odm, 0, RFPath,
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0],
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]);
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0],
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]);
 
 			pDM_Odm->Modify_TxAGC_Flag_PathA = true;
 			PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, OFDM);
 			PHY_SetTxPowerIndexByRateSection(Adapter, RFPath, pHalData->CurrentChannel, HT_MCS0_MCS7);
 		} else {
 			setIqkMatrix_8723B(pDM_Odm, Final_OFDM_Swing_Index, RFPath,
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][0],
-				pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[ChannelMappedIndex].Value[0][1]);
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][0],
+				pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[ChannelMappedIndex][1]);
 
 			if (pDM_Odm->Modify_TxAGC_Flag_PathA) { /* If TxAGC has changed, reset TxAGC again */
 				pDM_Odm->Remnant_OFDMSwingIdx[RFPath] = 0;
@@ -1759,9 +1759,8 @@ void PHY_IQCalibrate_8723B(
 /* To Fix BSOD when final_candidate is 0xff */
 /* by sherry 20120321 */
 	if (final_candidate < 4) {
-		for (i = 0; i < IQK_Matrix_REG_NUM; i++)
-			pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[0].Value[0][i] = result[final_candidate][i];
-		pDM_Odm->RFCalibrateInfo.IQKMatrixRegSetting[0].bIQKDone = true;
+		for (i = 0; i < IQK_MATRIX_REG_NUM; i++)
+			pDM_Odm->RFCalibrateInfo.iqk_matrix_regs_setting_value[0][i] = result[final_candidate][i];
 	}
 
 	_PHY_SaveADDARegisters8723B(padapter, IQK_BB_REG_92C, pDM_Odm->RFCalibrateInfo.IQK_BB_backup_recover, 9);
diff --git a/drivers/staging/rtl8723bs/hal/odm.h b/drivers/staging/rtl8723bs/hal/odm.h
index fe9782d2d4fd2..f5c804a1b9d55 100644
--- a/drivers/staging/rtl8723bs/hal/odm.h
+++ b/drivers/staging/rtl8723bs/hal/odm.h
@@ -193,8 +193,8 @@ struct odm_rate_adaptive {
 #define HP_THERMAL_NUM		8
 
 #define AVG_THERMAL_NUM		8
-#define IQK_Matrix_REG_NUM	8
-#define IQK_Matrix_Settings_NUM	14 /* Channels_2_4G_NUM */
+#define IQK_MATRIX_REG_NUM	8
+#define IQK_MATRIX_SETTINGS_NUM	14 /* Channels_2_4G_NUM */
 
 #define		DM_Type_ByFW			0
 #define		DM_Type_ByDriver		1
@@ -479,12 +479,6 @@ enum odm_type_alna_e { /* tag_ODM_TYPE_ALNA_Definition */
 	TYPE_ALNA3 = BIT(3)|BIT(2)|BIT(1)|BIT(0)
 };
 
-struct iqk_matrix_regs_setting { /* _IQK_MATRIX_REGS_SETTING */
-	bool bIQKDone;
-	s32 Value[3][IQK_Matrix_REG_NUM];
-	bool bBWIqkResultSaved[3];
-};
-
 /* Remove PATHDIV_PARA struct to odm_PathDiv.h */
 
 struct odm_rf_cal_t { /* ODM_RF_Calibration_Structure */
@@ -530,7 +524,7 @@ struct odm_rf_cal_t { /* ODM_RF_Calibration_Structure */
 
 	u8 ThermalValue_HP[HP_THERMAL_NUM];
 	u8 ThermalValue_HP_index;
-	struct iqk_matrix_regs_setting IQKMatrixRegSetting[IQK_Matrix_Settings_NUM];
+	s32 iqk_matrix_regs_setting_value[IQK_MATRIX_SETTINGS_NUM][IQK_MATRIX_REG_NUM];
 	bool bNeedIQK;
 	bool bIQKInProgress;
 	u8 Delta_IQK;
-- 
GitLab


From f6f213ba49107be5ea2e4aad9ffea9e844c58a2a Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sun, 6 Nov 2022 15:51:58 +0530
Subject: [PATCH 1016/4122] staging: emxx_udc: use min helper macro for
 variable comparison

Simplify code by using min helper macros in place of lengthy if/else
block oriented logical evaluation and value assignment. This issue is
identified by coccicheck using the minmax.cocci file.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y2eKxoUWtbPY/88b@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/emxx_udc/emxx_udc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/staging/emxx_udc/emxx_udc.c b/drivers/staging/emxx_udc/emxx_udc.c
index b6abd3770e81c..b4e19174bef2e 100644
--- a/drivers/staging/emxx_udc/emxx_udc.c
+++ b/drivers/staging/emxx_udc/emxx_udc.c
@@ -1004,10 +1004,7 @@ static int _nbu2ss_in_dma(struct nbu2ss_udc *udc, struct nbu2ss_ep *ep,
 	/* MAX Packet Size */
 	mpkt = _nbu2ss_readl(&preg->EP_REGS[num].EP_PCKT_ADRS) & EPN_MPKT;
 
-	if ((DMA_MAX_COUNT * mpkt) < length)
-		i_write_length = DMA_MAX_COUNT * mpkt;
-	else
-		i_write_length = length;
+	i_write_length = min(DMA_MAX_COUNT * mpkt, length);
 
 	/*------------------------------------------------------------*/
 	/* Number of transmission packets */
-- 
GitLab


From 38aa1741c3b373fb021b2d2ab1f5f07bec03fbd3 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Thu, 3 Nov 2022 02:23:18 +0530
Subject: [PATCH 1017/4122] staging: r8188eu: remove unused struct declarations

Some structures are only declared but have not been used anywhere
in the code. Remove such unused structs. Issue identified as part
of coccicheck report driven code investigation.

Suggested-by: Pavel Skripkin <paskripkin@gmail.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/Y2LYvhr74ng+xFbz@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/wlan_bssdef.h | 42 -------------------
 1 file changed, 42 deletions(-)

diff --git a/drivers/staging/r8188eu/include/wlan_bssdef.h b/drivers/staging/r8188eu/include/wlan_bssdef.h
index 831c465df5001..ffeafa19ef264 100644
--- a/drivers/staging/r8188eu/include/wlan_bssdef.h
+++ b/drivers/staging/r8188eu/include/wlan_bssdef.h
@@ -177,20 +177,6 @@ struct ndis_802_11_status_ind {
 /*  MIC check time, 60 seconds. */
 #define MIC_CHECK_TIME	60000000
 
-struct ndis_802_11_auth_evt {
-	struct ndis_802_11_status_ind       Status;
-	struct ndis_802_11_auth_req  Request[1];
-};
-
-struct ndis_802_11_test {
-	u32 Length;
-	u32 Type;
-	union {
-		struct ndis_802_11_auth_evt AuthenticationEvent;
-		NDIS_802_11_RSSI RssiTrigger;
-	} tt;
-};
-
 #ifndef Ndis802_11APMode
 #define Ndis802_11APMode (Ndis802_11InfrastructureMax+1)
 #endif
@@ -279,34 +265,6 @@ enum UAPSD_MAX_SP {
 #define NUM_PRE_AUTH_KEY 16
 #define NUM_PMKID_CACHE NUM_PRE_AUTH_KEY
 
-/*
-*	WPA2
-*/
-
-struct pmkid_candidate {
-	unsigned char BSSID[ETH_ALEN];
-	u32 Flags;
-};
-
-struct ndis_802_11_pmkid_list {
-	u32 Version;       /*  Version of the structure */
-	u32 NumCandidates; /*  No. of pmkid candidates */
-	struct pmkid_candidate CandidateList[1];
-};
-
-struct ndis_802_11_auth_encrypt {
-	enum ndis_802_11_auth_mode AuthModeSupported;
-	enum ndis_802_11_wep_status EncryptStatusSupported;
-};
-
-struct ndis_802_11_cap {
-	u32  Length;
-	u32  Version;
-	u32  NoOfPMKIDs;
-	u32  NoOfAuthEncryptPairsSupported;
-	struct ndis_802_11_auth_encrypt AuthenticationEncryptionSupported[1];
-};
-
 u8 key_2char2num(u8 hch, u8 lch);
 u8 key_char2num(u8 ch);
 u8 str_2char2num(u8 hch, u8 lch);
-- 
GitLab


From 30534c72bac17a6c160017229ba38fcd0cc21ac6 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sat, 5 Nov 2022 10:39:16 +0100
Subject: [PATCH 1018/4122] staging: r8188eu: convert three functions to bool

The functions

is_client_associated_to_ap()
is_client_associated_to_ibss()
is_IBSS_empty()

return boolean values. Convert their return type to bool and replace
_FAIL, which is defined as 0, with false. Another step to get rid of
_SUCCESS / _FAIL.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221105093916.8255-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_wlan_util.c   | 18 +++++++++---------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  6 +++---
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c
index e50631848cab6..c95438a12b598 100644
--- a/drivers/staging/r8188eu/core/rtw_wlan_util.c
+++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c
@@ -331,35 +331,35 @@ u16 get_beacon_interval(struct wlan_bssid_ex *bss)
 	return le16_to_cpu(val);
 }
 
-int is_client_associated_to_ap(struct adapter *padapter)
+bool is_client_associated_to_ap(struct adapter *padapter)
 {
 	struct mlme_ext_priv	*pmlmeext;
 	struct mlme_ext_info	*pmlmeinfo;
 
 	if (!padapter)
-		return _FAIL;
+		return false;
 
 	pmlmeext = &padapter->mlmeextpriv;
 	pmlmeinfo = &pmlmeext->mlmext_info;
 
 	if ((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && ((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE))
 		return true;
-	else
-		return _FAIL;
+
+	return false;
 }
 
-int is_client_associated_to_ibss(struct adapter *padapter)
+bool is_client_associated_to_ibss(struct adapter *padapter)
 {
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 
 	if ((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE))
 		return true;
-	else
-		return _FAIL;
+
+	return false;
 }
 
-int is_IBSS_empty(struct adapter *padapter)
+bool is_IBSS_empty(struct adapter *padapter)
 {
 	unsigned int i;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
@@ -367,7 +367,7 @@ int is_IBSS_empty(struct adapter *padapter)
 
 	for (i = IBSS_START_MAC_ID; i < NUM_STA; i++) {
 		if (pmlmeinfo->FW_sta_info[i].status == 1)
-			return _FAIL;
+			return false;
 	}
 	return true;
 }
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index e234a3b9af6fa..7652e72a03f4c 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -432,9 +432,9 @@ void update_network(struct wlan_bssid_ex *dst, struct wlan_bssid_ex *src,
 u8 *get_my_bssid(struct wlan_bssid_ex *pnetwork);
 u16 get_beacon_interval(struct wlan_bssid_ex *bss);
 
-int is_client_associated_to_ap(struct adapter *padapter);
-int is_client_associated_to_ibss(struct adapter *padapter);
-int is_IBSS_empty(struct adapter *padapter);
+bool is_client_associated_to_ap(struct adapter *padapter);
+bool is_client_associated_to_ibss(struct adapter *padapter);
+bool is_IBSS_empty(struct adapter *padapter);
 
 unsigned char check_assoc_AP(u8 *pframe, uint len);
 
-- 
GitLab


From cc027b3e4087e27d23e5c1d778e802c35f4c987d Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:53 +0100
Subject: [PATCH 1019/4122] staging: r8188eu: don't store addba request

There's no need to store an incoming addba request in struct
mlme_ext_info. We only need the addba request to copy some of its fields
into our addba response.

It's simpler to pass the incoming request's management frame to
issue_action_BA as an additional parameter. issue_action_BA can then
extract the required fields. If issue_action_BA prepares a request rather
than a response, the caller sets the parameter for the incoming request to
NULL.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   | 25 ++++++++-----------
 .../staging/r8188eu/include/rtw_mlme_ext.h    |  4 +--
 drivers/staging/r8188eu/include/wifi.h        |  8 ------
 3 files changed, 13 insertions(+), 24 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 6679d4037d6bd..324757699716a 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1486,11 +1486,9 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra
 	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	struct sta_info *psta = NULL;
 	struct recv_reorder_ctrl *preorder_ctrl;
-	unsigned char		*frame_body;
 	unsigned short	tid;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
-	u8 *pframe = precv_frame->rx_data;
 	struct sta_priv *pstapriv = &padapter->stapriv;
 
 	if ((pmlmeinfo->state & 0x03) != WIFI_FW_AP_STATE)
@@ -1501,23 +1499,19 @@ static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_fra
 	if (!psta)
 		return;
 
-	frame_body = (unsigned char *)(pframe + sizeof(struct ieee80211_hdr_3addr));
-
 	if (!pmlmeinfo->HT_enable)
 		return;
 	/* All union members start with an action code, it's ok to use addba_req. */
 	switch (mgmt->u.action.u.addba_req.action_code) {
 	case WLAN_ACTION_ADDBA_REQ:
-		memcpy(&pmlmeinfo->ADDBA_req, &frame_body[2], sizeof(struct ADDBA_request));
 		tid = u16_get_bits(le16_to_cpu(mgmt->u.action.u.addba_req.capab),
 				   IEEE80211_ADDBA_PARAM_TID_MASK);
 		preorder_ctrl = &psta->recvreorder_ctrl[tid];
 		preorder_ctrl->indicate_seq = 0xffff;
 		preorder_ctrl->enable = pmlmeinfo->bAcceptAddbaReq;
-
 		issue_action_BA(padapter, mgmt->sa, WLAN_ACTION_ADDBA_RESP,
 				pmlmeinfo->bAcceptAddbaReq ?
-					WLAN_STATUS_SUCCESS : WLAN_STATUS_REQUEST_DECLINED);
+					WLAN_STATUS_SUCCESS : WLAN_STATUS_REQUEST_DECLINED, mgmt);
 		break;
 	case WLAN_ACTION_ADDBA_RESP:
 		tid = u16_get_bits(le16_to_cpu(mgmt->u.action.u.addba_resp.capab),
@@ -5377,7 +5371,8 @@ exit:
 	return ret;
 }
 
-void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, u16 status)
+void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action,
+		     u16 status, struct ieee80211_mgmt *mgmt_req)
 {
 	u16 start_seq;
 	u16 BA_starting_seqctrl = 0;
@@ -5446,13 +5441,13 @@ void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action,
 		break;
 	case WLAN_ACTION_ADDBA_RESP:
 		mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP;
-		mgmt->u.action.u.addba_resp.dialog_token = pmlmeinfo->ADDBA_req.dialog_token;
+		mgmt->u.action.u.addba_resp.dialog_token = mgmt_req->u.action.u.addba_req.dialog_token;
 		mgmt->u.action.u.addba_resp.status = cpu_to_le16(status);
-		capab = le16_to_cpu(pmlmeinfo->ADDBA_req.BA_para_set) & 0x3f;
+		capab = le16_to_cpu(mgmt_req->u.action.u.addba_req.capab) & 0x3f;
 		capab |= u16_encode_bits(64, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK);
 		capab |= u16_encode_bits(pregpriv->ampdu_amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK);
 		mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab);
-		mgmt->u.action.u.addba_resp.timeout = pmlmeinfo->ADDBA_req.BA_timeout_value;
+		mgmt->u.action.u.addba_resp.timeout = mgmt_req->u.action.u.addba_req.timeout;
 		pattrib->pktlen = offsetofend(struct ieee80211_mgmt, u.action.u.addba_resp.timeout);
 		break;
 	case WLAN_ACTION_DELBA:
@@ -5620,7 +5615,8 @@ unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr)
 	if (initiator == 0) { /*  recipient */
 		for (tid = 0; tid < MAXTID; tid++) {
 			if (psta->recvreorder_ctrl[tid].enable) {
-				issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, (((tid << 1) | initiator) & 0x1F));
+				issue_action_BA(padapter, addr, WLAN_ACTION_DELBA,
+						(((tid << 1) | initiator) & 0x1F), NULL);
 				psta->recvreorder_ctrl[tid].enable = false;
 				psta->recvreorder_ctrl[tid].indicate_seq = 0xffff;
 			}
@@ -5628,7 +5624,8 @@ unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr)
 	} else if (initiator == 1) { /*  originator */
 		for (tid = 0; tid < MAXTID; tid++) {
 			if (psta->htpriv.agg_enable_bitmap & BIT(tid)) {
-				issue_action_BA(padapter, addr, WLAN_ACTION_DELBA, (((tid << 1) | initiator) & 0x1F));
+				issue_action_BA(padapter, addr, WLAN_ACTION_DELBA,
+						(((tid << 1) | initiator) & 0x1F), NULL);
 				psta->htpriv.agg_enable_bitmap &= ~BIT(tid);
 				psta->htpriv.candidate_tid_bitmap &= ~BIT(tid);
 			}
@@ -7683,7 +7680,7 @@ u8 add_ba_hdl(struct adapter *padapter, unsigned char *pbuf)
 
 	if (((pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS) && (pmlmeinfo->HT_enable)) ||
 	    ((pmlmeinfo->state & 0x03) == WIFI_FW_AP_STATE)) {
-		issue_action_BA(padapter, pparm->addr, WLAN_ACTION_ADDBA_REQ, (u16)pparm->tid);
+		issue_action_BA(padapter, pparm->addr, WLAN_ACTION_ADDBA_REQ, (u16)pparm->tid, NULL);
 		_set_timer(&psta->addba_retry_timer, ADDBA_TO);
 	} else {
 		psta->htpriv.candidate_tid_bitmap &= ~BIT(pparm->tid);
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 7652e72a03f4c..9c0af4704607b 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -285,7 +285,6 @@ struct mlme_ext_info {
 	u8	bwmode_updated;
 	u8	hidden_ssid_mode;
 
-	struct ADDBA_request	ADDBA_req;
 	struct WMM_para_element	WMM_param;
 	struct HT_caps_element	HT_caps;
 	struct HT_info_element	HT_info;
@@ -523,7 +522,8 @@ int issue_deauth(struct adapter *padapter, unsigned char *da,
 		 unsigned short reason);
 int issue_deauth_ex(struct adapter *padapter, u8 *da, unsigned short reason,
 		    int try_cnt, int wait_ms);
-void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action, u16 status);
+void issue_action_BA(struct adapter *padapter, unsigned char *raddr, u8 action,
+		     u16 status, struct ieee80211_mgmt *mgmt_req);
 unsigned int send_delba(struct adapter *padapter, u8 initiator, u8 *addr);
 unsigned int send_beacon(struct adapter *padapter);
 bool get_beacon_valid_bit(struct adapter *adapter);
diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index 92a584a8b6c07..2381c519ceafe 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -430,14 +430,6 @@ struct WMM_para_element {
 	struct AC_param	ac_param[4];
 } __packed;
 
-struct ADDBA_request {
-	unsigned char	action_code;
-	unsigned char	dialog_token;
-	__le16	BA_para_set;
-	__le16	BA_timeout_value;
-	__le16	BA_starting_seqctrl;
-} __packed;
-
 #define MAX_AMPDU_FACTOR_64K	3
 
 /* Spatial Multiplexing Power Save Modes */
-- 
GitLab


From 4142c442f2597c806d6fdcaf253aa38bfea4233c Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:54 +0100
Subject: [PATCH 1020/4122] staging: r8188eu: remove some obsolete comments

Remove obsolete comments in validate_recv_data_frame.
There is no prxcmd variable (any more?).

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_recv.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
index 94f85cd7038dc..cb0f35d7ab98d 100644
--- a/drivers/staging/r8188eu/core/rtw_recv.c
+++ b/drivers/staging/r8188eu/core/rtw_recv.c
@@ -1065,8 +1065,6 @@ static int validate_recv_data_frame(struct adapter *adapter,
 	if (!psta)
 		return _FAIL;
 
-	/* psta->rssi = prxcmd->rssi; */
-	/* psta->signal_quality = prxcmd->sq; */
 	precv_frame->psta = psta;
 
 	pattrib->amsdu = 0;
-- 
GitLab


From 8554c4768a36da65ca35a397e4d9c6285b8daf5e Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:55 +0100
Subject: [PATCH 1021/4122] staging: r8188eu: reorder assignments, clarify the
 header format

Reorder some of the assignments in update_recvframe_attrib_88e.

This should make it a bit easier to understand the format of the header
that is added by the chip's firmware.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
index 9a61eef8550bd..d1ac2960f1c44 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_rxdesc.c
@@ -66,28 +66,25 @@ void update_recvframe_attrib_88e(struct recv_frame *precvframe, struct recv_stat
 
 	if (pattrib->pkt_rpt_type == NORMAL_RX) {
 		pattrib->pkt_len = le32_to_cpu(prxstat->rxdw0) & 0x00003fff;
+		pattrib->icv_err = (le32_to_cpu(prxstat->rxdw0) >> 15) & 0x1;
 		pattrib->drvinfo_sz = ((le32_to_cpu(prxstat->rxdw0) >> 16) & 0xf) * 8;
-
+		pattrib->encrypt = (u8)((le32_to_cpu(prxstat->rxdw0) >> 20) & 0x7);
+		pattrib->qos = (le32_to_cpu(prxstat->rxdw0) >> 23) & 0x1;
+		pattrib->shift_sz = (le32_to_cpu(prxstat->rxdw0) >> 24) & 0x3;
 		pattrib->physt = (le32_to_cpu(prxstat->rxdw0) >> 26) & 0x1;
-
 		pattrib->bdecrypted = (le32_to_cpu(prxstat->rxdw0) & BIT(27)) ? 0 : 1;
-		pattrib->encrypt = (le32_to_cpu(prxstat->rxdw0) >> 20) & 0x7;
 
-		pattrib->qos = (le32_to_cpu(prxstat->rxdw0) >> 23) & 0x1;
 		pattrib->priority = (le32_to_cpu(prxstat->rxdw1) >> 8) & 0xf;
-
 		pattrib->amsdu = (le32_to_cpu(prxstat->rxdw1) >> 13) & 0x1;
+		pattrib->mdata = (le32_to_cpu(prxstat->rxdw1) >> 26) & 0x1;
+		pattrib->mfrag = (le32_to_cpu(prxstat->rxdw1) >> 27) & 0x1;
 
 		pattrib->seq_num = le32_to_cpu(prxstat->rxdw2) & 0x00000fff;
 		pattrib->frag_num = (le32_to_cpu(prxstat->rxdw2) >> 12) & 0xf;
-		pattrib->mfrag = (le32_to_cpu(prxstat->rxdw1) >> 27) & 0x1;
-		pattrib->mdata = (le32_to_cpu(prxstat->rxdw1) >> 26) & 0x1;
 
 		pattrib->mcs_rate = le32_to_cpu(prxstat->rxdw3) & 0x3f;
 		pattrib->rxht = (le32_to_cpu(prxstat->rxdw3) >> 6) & 0x1;
 
-		pattrib->icv_err = (le32_to_cpu(prxstat->rxdw0) >> 15) & 0x1;
-		pattrib->shift_sz = (le32_to_cpu(prxstat->rxdw0) >> 24) & 0x3;
 	} else if (pattrib->pkt_rpt_type == TX_REPORT1) { /* CCX */
 		pattrib->pkt_len = TX_RPT1_PKT_LEN;
 	} else if (pattrib->pkt_rpt_type == TX_REPORT2) {
-- 
GitLab


From 2dbc324eaab66098ae485114b0862653355a0a2b Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:56 +0100
Subject: [PATCH 1022/4122] staging: r8188eu: reformat a function header

The definition of Hal_EfuseParseIDCode88E can be a single line.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/rtl8188e_hal_init.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
index cc29963f4b491..73855bca76fe0 100644
--- a/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
+++ b/drivers/staging/r8188eu/hal/rtl8188e_hal_init.c
@@ -676,11 +676,7 @@ s32 InitLLTTable(struct adapter *padapter, u8 txpktbuf_bndy)
 	return status;
 }
 
-void
-Hal_EfuseParseIDCode88E(
-		struct adapter *padapter,
-		u8 *hwinfo
-	)
+void Hal_EfuseParseIDCode88E(struct adapter *padapter, u8 *hwinfo)
 {
 	struct eeprom_priv *pEEPROM = &padapter->eeprompriv;
 	struct net_device *netdev = padapter->pnetdev;
-- 
GitLab


From d44fa4cb2e30fc22b2763fd425f2c48be958354f Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:57 +0100
Subject: [PATCH 1023/4122] staging: r8188eu: remove state checks in
 rtw_led_control

There's no need to check for bSurpriseRemoved or bDriverStopped in the
rtw_led_control function. This function schedules a delayed worker which
calls SwLedOn or SwLedOff (or the function calls SwLedOff directly).

SwLedOn and SwLedOff check bDriverStopped themselves or they initiate a
USB control transfer via usb_write, where bSurpriseRemoved is checked.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_led.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_led.c b/drivers/staging/r8188eu/core/rtw_led.c
index ce8de2eb7845f..48725ce9d369e 100644
--- a/drivers/staging/r8188eu/core/rtw_led.c
+++ b/drivers/staging/r8188eu/core/rtw_led.c
@@ -140,8 +140,7 @@ void rtw_led_control(struct adapter *padapter, enum LED_CTL_MODE LedAction)
 	struct registry_priv *registry_par;
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 
-	if ((padapter->bSurpriseRemoved) || (padapter->bDriverStopped) ||
-	    (!padapter->hw_init_completed))
+	if (!padapter->hw_init_completed)
 		return;
 
 	if (!pLed->bRegUseLed)
-- 
GitLab


From b69373787637e9b38f73ff9f8b7b03e9d9767839 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:58 +0100
Subject: [PATCH 1024/4122] staging: r8188eu: clean up rtw_hal_init

Clean up the rtw_hal_init function.

Remove the status variable. Exit immediately for errors.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/hal/hal_intf.c | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/hal/hal_intf.c b/drivers/staging/r8188eu/hal/hal_intf.c
index 37935aef71eab..13790e32f11c2 100644
--- a/drivers/staging/r8188eu/hal/hal_intf.c
+++ b/drivers/staging/r8188eu/hal/hal_intf.c
@@ -6,24 +6,19 @@
 #include "../include/drv_types.h"
 #include "../include/hal_intf.h"
 
-uint	 rtw_hal_init(struct adapter *adapt)
+uint rtw_hal_init(struct adapter *adapt)
 {
-	uint	status = _SUCCESS;
-
 	adapt->hw_init_completed = false;
 
-	status = rtl8188eu_hal_init(adapt);
+	if (rtl8188eu_hal_init(adapt) != _SUCCESS)
+		return _FAIL;
 
-	if (status == _SUCCESS) {
-		adapt->hw_init_completed = true;
+	adapt->hw_init_completed = true;
 
-		if (adapt->registrypriv.notch_filter == 1)
-			hal_notch_filter_8188e(adapt, 1);
-	} else {
-		adapt->hw_init_completed = false;
-	}
+	if (adapt->registrypriv.notch_filter == 1)
+		hal_notch_filter_8188e(adapt, 1);
 
-	return status;
+	return _SUCCESS;
 }
 
 uint rtw_hal_deinit(struct adapter *adapt)
-- 
GitLab


From b78ec61d9478d438bc9e723214cf609ac669d132 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:48:59 +0100
Subject: [PATCH 1025/4122] staging: r8188eu: remove get_fwstate

The get_fwstate function is not used. Remove it.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/rtw_mlme.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h
index ca539c652f26d..268f898b151bb 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme.h
@@ -443,11 +443,6 @@ static inline bool check_fwstate(struct mlme_priv *pmlmepriv, int state)
 	return false;
 }
 
-static inline int get_fwstate(struct mlme_priv *pmlmepriv)
-{
-	return pmlmepriv->fw_state;
-}
-
 /*
  * No Limit on the calling context,
  * therefore set it to be the critical section...
-- 
GitLab


From 32265aaf61c6bf399a7ed2c06d75a7d00dd5500e Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:49:00 +0100
Subject: [PATCH 1026/4122] staging: r8188eu: merge two rtw_free_network_nolock
 functions

Remove the _rtw_free_network_nolock function and merge it into
rtw_free_network_nolock, which is its only caller.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme.c    | 23 ++++++++--------------
 drivers/staging/r8188eu/include/rtw_mlme.h |  2 --
 2 files changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme.c b/drivers/staging/r8188eu/core/rtw_mlme.c
index a47ae33454b3d..b272123626ac6 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme.c
@@ -76,19 +76,6 @@ void _rtw_free_network(struct mlme_priv *pmlmepriv, struct wlan_network *pnetwor
 	spin_unlock_bh(&free_queue->lock);
 }
 
-void _rtw_free_network_nolock(struct	mlme_priv *pmlmepriv, struct wlan_network *pnetwork)
-{
-	struct __queue *free_queue = &pmlmepriv->free_bss_pool;
-
-	if (!pnetwork)
-		return;
-	if (pnetwork->fixed)
-		return;
-	list_del_init(&pnetwork->list);
-	list_add_tail(&pnetwork->list, get_list_head(free_queue));
-	pmlmepriv->num_of_scanned--;
-}
-
 /*
 	return the wlan_network with the matching addr
 
@@ -307,9 +294,15 @@ exit:
 static void rtw_free_network_nolock(struct mlme_priv *pmlmepriv,
 				    struct wlan_network *pnetwork)
 {
+	struct __queue *free_queue = &pmlmepriv->free_bss_pool;
 
-	_rtw_free_network_nolock(pmlmepriv, pnetwork);
-
+	if (!pnetwork)
+		return;
+	if (pnetwork->fixed)
+		return;
+	list_del_init(&pnetwork->list);
+	list_add_tail(&pnetwork->list, get_list_head(free_queue));
+	pmlmepriv->num_of_scanned--;
 }
 
 void rtw_free_network_queue(struct adapter *dev, u8 isfreeall)
diff --git a/drivers/staging/r8188eu/include/rtw_mlme.h b/drivers/staging/r8188eu/include/rtw_mlme.h
index 268f898b151bb..3ff653ff1d816 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme.h
@@ -539,8 +539,6 @@ struct wlan_network *rtw_alloc_network(struct mlme_priv *pmlmepriv);
 
 void _rtw_free_network(struct mlme_priv *pmlmepriv,
 		       struct wlan_network *pnetwork, u8 isfreeall);
-void _rtw_free_network_nolock(struct mlme_priv *pmlmepriv,
-			      struct wlan_network *pnetwork);
 
 struct wlan_network *_rtw_find_network(struct __queue *scanned_queue, u8 *addr);
 
-- 
GitLab


From c12b5b5614ac39c66d6dcf8f367620e614343b25 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sun, 6 Nov 2022 13:49:01 +0100
Subject: [PATCH 1027/4122] staging: r8188eu: remove checks in dump_mgntframe

There's no need to check for bSurpriseRemoved and bDriverStopped in
dump_mgmtframe.

The sequence of function calls is

dump_mgntframe
   rtl8188eu_mgnt_xmit
      rtw_dump_xframe
         loop over all fragments

For each fragment, rtw_write_port is called. This function checks
bSurpriseRemoved and bDriverStopped.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106124901.720785-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 324757699716a..17803aca83c8a 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3959,9 +3959,6 @@ void update_mgntframe_attrib(struct adapter *padapter, struct pkt_attrib *pattri
 
 void dump_mgntframe(struct adapter *padapter, struct xmit_frame *pmgntframe)
 {
-	if (padapter->bSurpriseRemoved || padapter->bDriverStopped)
-		return;
-
 	rtl8188eu_mgnt_xmit(padapter, pmgntframe);
 }
 
-- 
GitLab


From 2b62e1b447c643de8bb063d64348e4d000f3c557 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Sun, 6 Nov 2022 14:34:43 +0100
Subject: [PATCH 1028/4122] staging: r8188eu: clean up error handling in
 rtw_start_drv_threads()

Convert the error handling in the function rtw_start_drv_threads() to
the common logic used in the kernel. Another step to get rid of _FAIL
and _SUCCESS which uses inverted logic.

Signed-off-by: Michael Straube <straube.linux@gmail.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/20221106133443.8872-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/include/osdep_intf.h |  2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c    | 17 +++++++----------
 2 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/r8188eu/include/osdep_intf.h b/drivers/staging/r8188eu/include/osdep_intf.h
index 0f7d74a3ff6df..6d66cb57225e7 100644
--- a/drivers/staging/r8188eu/include/osdep_intf.h
+++ b/drivers/staging/r8188eu/include/osdep_intf.h
@@ -46,7 +46,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter);
 void rtw_free_drv_sw(struct adapter *padapter);
 void rtw_reset_drv_sw(struct adapter *padapter);
 
-u32 rtw_start_drv_threads(struct adapter *padapter);
+int rtw_start_drv_threads(struct adapter *padapter);
 void rtw_stop_drv_threads (struct adapter *padapter);
 void rtw_cancel_all_timer(struct adapter *padapter);
 
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 970f380bac961..66556e07ed93d 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -363,18 +363,16 @@ struct net_device *rtw_init_netdev(struct adapter *old_padapter)
 	return pnetdev;
 }
 
-u32 rtw_start_drv_threads(struct adapter *padapter)
+int rtw_start_drv_threads(struct adapter *padapter)
 {
-	u32 _status = _SUCCESS;
-
 	padapter->cmdThread = kthread_run(rtw_cmd_thread, padapter, "RTW_CMD_THREAD");
 	if (IS_ERR(padapter->cmdThread))
-		_status = _FAIL;
-	else
-		/* wait for rtw_cmd_thread() to start running */
-		wait_for_completion(&padapter->cmdpriv.start_cmd_thread);
+		return PTR_ERR(padapter->cmdThread);
 
-	return _status;
+	/* wait for rtw_cmd_thread() to start running */
+	wait_for_completion(&padapter->cmdpriv.start_cmd_thread);
+
+	return 0;
 }
 
 void rtw_stop_drv_threads(struct adapter *padapter)
@@ -627,8 +625,7 @@ static int _netdev_open(struct net_device *pnetdev)
 
 		netdev_dbg(pnetdev, "MAC Address = %pM\n", pnetdev->dev_addr);
 
-		status = rtw_start_drv_threads(padapter);
-		if (status == _FAIL) {
+		if (rtw_start_drv_threads(padapter)) {
 			pr_info("Initialize driver software resource Failed!\n");
 			goto netdev_open_error;
 		}
-- 
GitLab


From c1a84fe1f431cb3de6763aabddcf6073d5053f9d Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sun, 6 Nov 2022 23:25:37 +0530
Subject: [PATCH 1029/4122] staging: r8188eu: simplify complex pointer casting

Pointers to structures udphdr and dhcpMessage are derived by casting
adjacent pointers with size_t. Such typecast of pointer using size_t
is not preferred. The code looks complex and delicate. Simplify such
casting by utilizing generic "void *" casting.

Suggested-by: Joe Perches <joe@perches.com>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/1428580d09a9916899209c9278dca40ee2d297d3.1667755987.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index a23f7df373ede..4deaa7e352a34 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -610,12 +610,11 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb)
 			struct iphdr *iph = (struct iphdr *)(skb->data + ETH_HLEN);
 
 			if (iph->protocol == IPPROTO_UDP) { /*  UDP */
-				struct udphdr *udph = (struct udphdr *)((size_t)iph + (iph->ihl << 2));
+				struct udphdr *udph = (void *)iph + (iph->ihl << 2);
 
 				if ((udph->source == htons(CLIENT_PORT)) &&
 				    (udph->dest == htons(SERVER_PORT))) { /*  DHCP request */
-					struct dhcpMessage *dhcph =
-						(struct dhcpMessage *)((size_t)udph + sizeof(struct udphdr));
+					struct dhcpMessage *dhcph = (void *)udph + sizeof(struct udphdr);
 					u32 cookie = be32_to_cpu((__be32)dhcph->cookie);
 
 					if (cookie == DHCP_MAGIC) { /*  match magic word */
-- 
GitLab


From a57ef65a09a0214776675dcf931ace146724b63c Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sun, 6 Nov 2022 23:27:12 +0530
Subject: [PATCH 1030/4122] staging: r8188eu: remove unnecessary casting

The dhcpMessage struct member variable "cookie" is already declared
to be of type __be32. There is no need to cast it again as __be32.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Link: https://lore.kernel.org/r/c333e22349c5c347c740b425330b35830b969fa9.1667755987.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_br_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_br_ext.c b/drivers/staging/r8188eu/core/rtw_br_ext.c
index 4deaa7e352a34..a7c67014dde0f 100644
--- a/drivers/staging/r8188eu/core/rtw_br_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_br_ext.c
@@ -615,7 +615,7 @@ void dhcp_flag_bcast(struct adapter *priv, struct sk_buff *skb)
 				if ((udph->source == htons(CLIENT_PORT)) &&
 				    (udph->dest == htons(SERVER_PORT))) { /*  DHCP request */
 					struct dhcpMessage *dhcph = (void *)udph + sizeof(struct udphdr);
-					u32 cookie = be32_to_cpu((__be32)dhcph->cookie);
+					u32 cookie = be32_to_cpu(dhcph->cookie);
 
 					if (cookie == DHCP_MAGIC) { /*  match magic word */
 						if (!(dhcph->flags & htons(BROADCAST_FLAG))) {
-- 
GitLab


From d54b6ac14d3f1ac033fe88a7f9935ba9f1f47077 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 7 Nov 2022 21:28:22 +0100
Subject: [PATCH 1031/4122] staging: r8188eu: use a qos_hdr in
 validate_recv_data_frame

Define a struct ieee80211_qos_hdr in the validate_recv_data_frame
function. Use this struct to replace some numeric offsets and make the
code easier to understand.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221107202824.61431-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_recv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
index cb0f35d7ab98d..5b0a66aebff19 100644
--- a/drivers/staging/r8188eu/core/rtw_recv.c
+++ b/drivers/staging/r8188eu/core/rtw_recv.c
@@ -1032,7 +1032,6 @@ static int validate_recv_data_frame(struct adapter *adapter,
 				    struct recv_frame *precv_frame)
 {
 	struct sta_info *psta = NULL;
-	u8 *ptr = precv_frame->rx_data;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)precv_frame->rx_data;
 	struct rx_pkt_attrib	*pattrib = &precv_frame->attrib;
 	struct security_priv	*psecuritypriv = &adapter->securitypriv;
@@ -1071,10 +1070,12 @@ static int validate_recv_data_frame(struct adapter *adapter,
 	pattrib->ack_policy = 0;
 	/* parsing QC field */
 	if (pattrib->qos) {
+		struct ieee80211_qos_hdr *qos_hdr = (struct ieee80211_qos_hdr *)hdr;
+
 		pattrib->priority = ieee80211_get_tid(hdr);
-		pattrib->ack_policy = GetAckpolicy((ptr + 24));
-		pattrib->amsdu = GetAMsdu((ptr + 24));
-		pattrib->hdrlen = 26;
+		pattrib->ack_policy = GetAckpolicy(&qos_hdr->qos_ctrl);
+		pattrib->amsdu = GetAMsdu(&qos_hdr->qos_ctrl);
+		pattrib->hdrlen = sizeof(*qos_hdr);
 
 		if (pattrib->priority != 0 && pattrib->priority != 3)
 			adapter->recvpriv.bIsAnyNonBEPkts = true;
-- 
GitLab


From fa2a8d4284225fbc0ee3db4760d6d7339586d61d Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 7 Nov 2022 21:28:23 +0100
Subject: [PATCH 1032/4122] staging: r8188eu: drop another removal/stop check

There's no need to check bDriverStopped and bSurpriseRemoved in
issue_probereq_ex.

The code path looks like

issue_probereq_ex
   _issue_probereq
      dump_mgntframe
      or
      dump_mgntframe_and_wait_ack

All paths from dump_mgntframe check the two variables.
dump_mgntframe_and_wait_ack contains a check as well.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221107202824.61431-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 17803aca83c8a..bfd6afd7266e3 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -4496,9 +4496,6 @@ int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 
 		i++;
 
-		if (padapter->bDriverStopped || padapter->bSurpriseRemoved)
-			break;
-
 		if (i < try_cnt && wait_ms > 0 && ret == _FAIL)
 			msleep(wait_ms);
 
-- 
GitLab


From a614e753e8e7f3322b560dcf6eaf44468ec22b3a Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Mon, 7 Nov 2022 21:28:24 +0100
Subject: [PATCH 1033/4122] staging: r8188eu: drop removal/stop check in
 dump_mgntframe_and_wait_ack

We can remove the checks for bDriverStopped and bSurpriseRemoved in
dump_mgntframe_and_wait_ack.

The code path from this function looks like

dump_mgntframe_and_wait_ack
   rtl8188eu_mgnt_xmit
      rtw_dump_xframe
         loop over all fragments

rtw_write_port is called for each fragment. bSurpriseRemoved and
bDriverStopped are checked in rtw_write_port.

Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221107202824.61431-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index bfd6afd7266e3..be33489d3dfdd 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3988,9 +3988,6 @@ s32 dump_mgntframe_and_wait_ack(struct adapter *padapter, struct xmit_frame *pmg
 	u32 timeout_ms = 500;/*   500ms */
 	struct xmit_priv	*pxmitpriv = &padapter->xmitpriv;
 
-	if (padapter->bSurpriseRemoved || padapter->bDriverStopped)
-		return -1;
-
 	mutex_lock(&pxmitpriv->ack_tx_mutex);
 	pxmitpriv->ack_tx = true;
 
-- 
GitLab


From 0ce0f9d0785a7ba5637a22b63332cf747772da2a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 7 Nov 2022 00:05:06 +0100
Subject: [PATCH 1034/4122] usb: phy: phy-gpio-vbus-usb: Add device tree
 probing

Make it possible to probe the GPIO VBUS detection driver
from the device tree compatible for GPIO USB B connectors.

Since this driver is using the "gpio-usb-b-connector"
compatible, it is important to discern it from the role
switch connector driver (which does not provide a phy),
so we add some Kconfig text and depend on !USB_CONN_GPIO.

Cc: Rob Herring <robh@kernel.org>
Cc: Prashant Malani <pmalani@chromium.org>
Cc: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221106230506.1646101-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/Kconfig             |  6 +++++-
 drivers/usb/phy/phy-gpio-vbus-usb.c | 12 ++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index 2acbe41fbf7e0..efdcafdbe46d7 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -93,12 +93,16 @@ config USB_GPIO_VBUS
 	tristate "GPIO based peripheral-only VBUS sensing 'transceiver'"
 	depends on GPIOLIB || COMPILE_TEST
 	depends on USB_GADGET || !USB_GADGET # if USB_GADGET=m, this can't be 'y'
+	depends on !USB_CONN_GPIO
 	select USB_PHY
 	help
 	  Provides simple GPIO VBUS sensing for controllers with an
 	  internal transceiver via the usb_phy interface, and
 	  optionally control of a D+ pullup GPIO as well as a VBUS
-	  current limit regulator.
+	  current limit regulator. This driver is for devices that do
+	  NOT support role switch. OTG devices that can do role switch
+	  (master/peripheral) shall use the USB based connection
+	  detection driver USB_CONN_GPIO.
 
 config OMAP_OTG
 	tristate "OMAP USB OTG controller driver"
diff --git a/drivers/usb/phy/phy-gpio-vbus-usb.c b/drivers/usb/phy/phy-gpio-vbus-usb.c
index f13f5530746cc..12dfeff7de3d0 100644
--- a/drivers/usb/phy/phy-gpio-vbus-usb.c
+++ b/drivers/usb/phy/phy-gpio-vbus-usb.c
@@ -366,12 +366,24 @@ static const struct dev_pm_ops gpio_vbus_dev_pm_ops = {
 
 MODULE_ALIAS("platform:gpio-vbus");
 
+/*
+ * NOTE: this driver matches against "gpio-usb-b-connector" for
+ * devices that do NOT support role switch.
+ */
+static const struct of_device_id gpio_vbus_of_match[] = {
+	{
+		.compatible = "gpio-usb-b-connector",
+	},
+	{},
+};
+
 static struct platform_driver gpio_vbus_driver = {
 	.driver = {
 		.name  = "gpio-vbus",
 #ifdef CONFIG_PM
 		.pm = &gpio_vbus_dev_pm_ops,
 #endif
+		.of_match_table = gpio_vbus_of_match,
 	},
 	.probe		= gpio_vbus_probe,
 	.remove		= gpio_vbus_remove,
-- 
GitLab


From 04914233561377fc0369b984c9d19ec1b6ce2845 Mon Sep 17 00:00:00 2001
From: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Date: Mon, 7 Nov 2022 16:37:55 -0800
Subject: [PATCH 1035/4122] usb: dwc3: gadget: Reduce TRB IOC settings

When the TRB ring is full, the dwc3 driver must make sure that there's
at least 1 TRB with Interrupt On Completion (IOC) set to notify of
available TRBs.

The current logic just sets the TRB's IOC whenever we run out of TRBs,
but it doesn't consider that there may be other TRBs with IOC/LST set
already. This creates more events and unnecessary delay from interrupt
handling. Only forcefully set IOC when we run out of TRBs and none of
the TRBs in the TRB ring has had IOC set.

Signed-off-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/72a1fa448eb1201b152e65be7902a5d1c75b9f3a.1667867687.git.Thinh.Nguyen@synopsys.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/gadget.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 5fe2d136dff51..ecddb144871be 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1463,8 +1463,18 @@ static int dwc3_prepare_trbs_sg(struct dwc3_ep *dep,
 			 */
 			if (num_trbs_left == 1 || (needs_extra_trb &&
 					num_trbs_left <= 2 &&
-					sg_dma_len(sg_next(s)) >= length))
-				must_interrupt = true;
+					sg_dma_len(sg_next(s)) >= length)) {
+				struct dwc3_request *r;
+
+				/* Check if previous requests already set IOC */
+				list_for_each_entry(r, &dep->started_list, list) {
+					if (r != req && !r->request.no_interrupt)
+						break;
+
+					if (r == req)
+						must_interrupt = true;
+				}
+			}
 
 			dwc3_prepare_one_trb(dep, req, trb_length, 1, i, false,
 					must_interrupt);
-- 
GitLab


From 430d57f53eb1cdbf9ba9bbd397317912b3cd2de5 Mon Sep 17 00:00:00 2001
From: Ray Chi <raychi@google.com>
Date: Mon, 7 Nov 2022 15:27:54 +0800
Subject: [PATCH 1036/4122] usb: core: stop USB enumeration if too many retries

When a broken USB accessory connects to a USB host, usbcore might
keep doing enumeration retries. If the host has a watchdog mechanism,
the kernel panic will happen on the host.

This patch provides an attribute early_stop to limit the numbers of retries
for each port of a hub. If a port was marked with early_stop attribute,
unsuccessful connection attempts will fail quickly. In addition, if an
early_stop port has failed to initialize, it will ignore all future
connection events until early_stop attribute is clear.

Signed-off-by: Ray Chi <raychi@google.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20221107072754.3336357-1-raychi@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-usb | 11 +++++
 drivers/usb/core/hub.c                  | 60 +++++++++++++++++++++++++
 drivers/usb/core/hub.h                  |  4 ++
 drivers/usb/core/port.c                 | 27 +++++++++++
 4 files changed, 102 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 568103d3376ee..545c2dd97ed02 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -264,6 +264,17 @@ Description:
 		attached to the port will not be detected, initialized,
 		or enumerated.
 
+What:		/sys/bus/usb/devices/.../<hub_interface>/port<X>/early_stop
+Date:		Sep 2022
+Contact:	Ray Chi <raychi@google.com>
+Description:
+		Some USB hosts have some watchdog mechanisms so that the device
+		may enter ramdump if it takes a long time during port initialization.
+		This attribute allows each port just has two attempts so that the
+		port initialization will be failed quickly. In addition, if a port
+		which is marked with early_stop has failed to initialize, it will ignore
+		all future connections until this attribute is clear.
+
 What:		/sys/bus/usb/devices/.../power/usb2_lpm_l1_timeout
 Date:		May 2013
 Contact:	Mathias Nyman <mathias.nyman@linux.intel.com>
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index bbab424b0d559..77e73fc8d6736 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3081,6 +3081,48 @@ done:
 	return status;
 }
 
+/*
+ * hub_port_stop_enumerate - stop USB enumeration or ignore port events
+ * @hub: target hub
+ * @port1: port num of the port
+ * @retries: port retries number of hub_port_init()
+ *
+ * Return:
+ *    true: ignore port actions/events or give up connection attempts.
+ *    false: keep original behavior.
+ *
+ * This function will be based on retries to check whether the port which is
+ * marked with early_stop attribute would stop enumeration or ignore events.
+ *
+ * Note:
+ * This function didn't change anything if early_stop is not set, and it will
+ * prevent all connection attempts when early_stop is set and the attempts of
+ * the port are more than 1.
+ */
+static bool hub_port_stop_enumerate(struct usb_hub *hub, int port1, int retries)
+{
+	struct usb_port *port_dev = hub->ports[port1 - 1];
+
+	if (port_dev->early_stop) {
+		if (port_dev->ignore_event)
+			return true;
+
+		/*
+		 * We want unsuccessful attempts to fail quickly.
+		 * Since some devices may need one failure during
+		 * port initialization, we allow two tries but no
+		 * more.
+		 */
+		if (retries < 2)
+			return false;
+
+		port_dev->ignore_event = 1;
+	} else
+		port_dev->ignore_event = 0;
+
+	return port_dev->ignore_event;
+}
+
 /* Check if a port is power on */
 int usb_port_is_power_on(struct usb_hub *hub, unsigned int portstatus)
 {
@@ -4796,6 +4838,11 @@ hub_port_init(struct usb_hub *hub, struct usb_device *udev, int port1,
 	do_new_scheme = use_new_scheme(udev, retry_counter, port_dev);
 
 	for (retries = 0; retries < GET_DESCRIPTOR_TRIES; (++retries, msleep(100))) {
+		if (hub_port_stop_enumerate(hub, port1, retries)) {
+			retval = -ENODEV;
+			break;
+		}
+
 		if (do_new_scheme) {
 			struct usb_device_descriptor *buf;
 			int r = 0;
@@ -5246,6 +5293,11 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 	status = 0;
 
 	for (i = 0; i < PORT_INIT_TRIES; i++) {
+		if (hub_port_stop_enumerate(hub, port1, i)) {
+			status = -ENODEV;
+			break;
+		}
+
 		usb_lock_port(port_dev);
 		mutex_lock(hcd->address0_mutex);
 		retry_locked = true;
@@ -5614,6 +5666,10 @@ static void port_event(struct usb_hub *hub, int port1)
 	if (!pm_runtime_active(&port_dev->dev))
 		return;
 
+	/* skip port actions if ignore_event and early_stop are true */
+	if (port_dev->ignore_event && port_dev->early_stop)
+		return;
+
 	if (hub_handle_remote_wakeup(hub, port1, portstatus, portchange))
 		connect_change = 1;
 
@@ -5927,6 +5983,10 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 	mutex_lock(hcd->address0_mutex);
 
 	for (i = 0; i < PORT_INIT_TRIES; ++i) {
+		if (hub_port_stop_enumerate(parent_hub, port1, i)) {
+			ret = -ENODEV;
+			break;
+		}
 
 		/* ep0 maxpacket size may change; let the HCD know about it.
 		 * Other endpoints will be handled by re-enumeration. */
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index b2925856b4cb4..e23833562e4f2 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -90,6 +90,8 @@ struct usb_hub {
  * @is_superspeed cache super-speed status
  * @usb3_lpm_u1_permit: whether USB3 U1 LPM is permitted.
  * @usb3_lpm_u2_permit: whether USB3 U2 LPM is permitted.
+ * @early_stop: whether port initialization will be stopped earlier.
+ * @ignore_event: whether events of the port are ignored.
  */
 struct usb_port {
 	struct usb_device *child;
@@ -103,6 +105,8 @@ struct usb_port {
 	u32 over_current_count;
 	u8 portnum;
 	u32 quirks;
+	unsigned int early_stop:1;
+	unsigned int ignore_event:1;
 	unsigned int is_superspeed:1;
 	unsigned int usb3_lpm_u1_permit:1;
 	unsigned int usb3_lpm_u2_permit:1;
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index 015204fc67a10..06a8f1f84f6f8 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -18,6 +18,32 @@ static int usb_port_block_power_off;
 
 static const struct attribute_group *port_dev_group[];
 
+static ssize_t early_stop_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct usb_port *port_dev = to_usb_port(dev);
+
+	return sysfs_emit(buf, "%s\n", port_dev->early_stop ? "yes" : "no");
+}
+
+static ssize_t early_stop_store(struct device *dev, struct device_attribute *attr,
+				const char *buf, size_t count)
+{
+	struct usb_port *port_dev = to_usb_port(dev);
+	bool value;
+
+	if (kstrtobool(buf, &value))
+		return -EINVAL;
+
+	if (value)
+		port_dev->early_stop = 1;
+	else
+		port_dev->early_stop = 0;
+
+	return count;
+}
+static DEVICE_ATTR_RW(early_stop);
+
 static ssize_t disable_show(struct device *dev,
 			      struct device_attribute *attr, char *buf)
 {
@@ -237,6 +263,7 @@ static struct attribute *port_dev_attrs[] = {
 	&dev_attr_quirks.attr,
 	&dev_attr_over_current_count.attr,
 	&dev_attr_disable.attr,
+	&dev_attr_early_stop.attr,
 	NULL,
 };
 
-- 
GitLab


From c14f7ccc9f5dcf9d06ddeec706f85405b2c80600 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Thu, 14 Jul 2022 20:41:30 +0200
Subject: [PATCH 1037/4122] PCI: Assign PCI domain IDs by ida_alloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace assignment of PCI domain IDs from atomic_inc_return() to
ida_alloc().

Use two IDAs, one for static domain allocations (those which are defined in
device tree) and second for dynamic allocations (all other).

During removal of root bus / host bridge, also release the domain ID.  The
released ID can be reused again, for example when dynamically loading and
unloading native PCI host bridge drivers.

This change also allows to mix static device tree assignment and dynamic by
kernel as all static allocations are reserved in dynamic pool.

[bhelgaas: set "err" if "bus->domain_nr < 0"]
Link: https://lore.kernel.org/r/20220714184130.5436-1-pali@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/pci.c    | 103 +++++++++++++++++++++++++------------------
 drivers/pci/probe.c  |   7 +++
 drivers/pci/remove.c |   6 +++
 include/linux/pci.h  |   1 +
 4 files changed, 74 insertions(+), 43 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2127aba3550b5..9f3cc829dfeea 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6743,60 +6743,70 @@ static void pci_no_domains(void)
 }
 
 #ifdef CONFIG_PCI_DOMAINS_GENERIC
-static atomic_t __domain_nr = ATOMIC_INIT(-1);
+static DEFINE_IDA(pci_domain_nr_static_ida);
+static DEFINE_IDA(pci_domain_nr_dynamic_ida);
 
-static int pci_get_new_domain_nr(void)
+static void of_pci_reserve_static_domain_nr(void)
 {
-	return atomic_inc_return(&__domain_nr);
+	struct device_node *np;
+	int domain_nr;
+
+	for_each_node_by_type(np, "pci") {
+		domain_nr = of_get_pci_domain_nr(np);
+		if (domain_nr < 0)
+			continue;
+		/*
+		 * Permanently allocate domain_nr in dynamic_ida
+		 * to prevent it from dynamic allocation.
+		 */
+		ida_alloc_range(&pci_domain_nr_dynamic_ida,
+				domain_nr, domain_nr, GFP_KERNEL);
+	}
 }
 
 static int of_pci_bus_find_domain_nr(struct device *parent)
 {
-	static int use_dt_domains = -1;
-	int domain = -1;
+	static bool static_domains_reserved = false;
+	int domain_nr;
 
-	if (parent)
-		domain = of_get_pci_domain_nr(parent->of_node);
+	/* On the first call scan device tree for static allocations. */
+	if (!static_domains_reserved) {
+		of_pci_reserve_static_domain_nr();
+		static_domains_reserved = true;
+	}
+
+	if (parent) {
+		/*
+		 * If domain is in DT, allocate it in static IDA.  This
+		 * prevents duplicate static allocations in case of errors
+		 * in DT.
+		 */
+		domain_nr = of_get_pci_domain_nr(parent->of_node);
+		if (domain_nr >= 0)
+			return ida_alloc_range(&pci_domain_nr_static_ida,
+					       domain_nr, domain_nr,
+					       GFP_KERNEL);
+	}
 
 	/*
-	 * Check DT domain and use_dt_domains values.
-	 *
-	 * If DT domain property is valid (domain >= 0) and
-	 * use_dt_domains != 0, the DT assignment is valid since this means
-	 * we have not previously allocated a domain number by using
-	 * pci_get_new_domain_nr(); we should also update use_dt_domains to
-	 * 1, to indicate that we have just assigned a domain number from
-	 * DT.
-	 *
-	 * If DT domain property value is not valid (ie domain < 0), and we
-	 * have not previously assigned a domain number from DT
-	 * (use_dt_domains != 1) we should assign a domain number by
-	 * using the:
-	 *
-	 * pci_get_new_domain_nr()
-	 *
-	 * API and update the use_dt_domains value to keep track of method we
-	 * are using to assign domain numbers (use_dt_domains = 0).
-	 *
-	 * All other combinations imply we have a platform that is trying
-	 * to mix domain numbers obtained from DT and pci_get_new_domain_nr(),
-	 * which is a recipe for domain mishandling and it is prevented by
-	 * invalidating the domain value (domain = -1) and printing a
-	 * corresponding error.
+	 * If domain was not specified in DT, choose a free ID from dynamic
+	 * allocations. All domain numbers from DT are permanently in
+	 * dynamic allocations to prevent assigning them to other DT nodes
+	 * without static domain.
 	 */
-	if (domain >= 0 && use_dt_domains) {
-		use_dt_domains = 1;
-	} else if (domain < 0 && use_dt_domains != 1) {
-		use_dt_domains = 0;
-		domain = pci_get_new_domain_nr();
-	} else {
-		if (parent)
-			pr_err("Node %pOF has ", parent->of_node);
-		pr_err("Inconsistent \"linux,pci-domain\" property in DT\n");
-		domain = -1;
-	}
+	return ida_alloc(&pci_domain_nr_dynamic_ida, GFP_KERNEL);
+}
 
-	return domain;
+static void of_pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (bus->domain_nr < 0)
+		return;
+
+	/* Release domain from IDA where it was allocated. */
+	if (of_get_pci_domain_nr(parent->of_node) == bus->domain_nr)
+		ida_free(&pci_domain_nr_static_ida, bus->domain_nr);
+	else
+		ida_free(&pci_domain_nr_dynamic_ida, bus->domain_nr);
 }
 
 int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
@@ -6804,6 +6814,13 @@ int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent)
 	return acpi_disabled ? of_pci_bus_find_domain_nr(parent) :
 			       acpi_pci_bus_find_domain_nr(bus);
 }
+
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent)
+{
+	if (!acpi_disabled)
+		return;
+	of_pci_bus_release_domain_nr(bus, parent);
+}
 #endif
 
 /**
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 1d6f7b502020d..1e234189aff15 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -906,6 +906,10 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge)
 		bus->domain_nr = pci_bus_find_domain_nr(bus, parent);
 	else
 		bus->domain_nr = bridge->domain_nr;
+	if (bus->domain_nr < 0) {
+		err = bus->domain_nr;
+		goto free;
+	}
 #endif
 
 	b = pci_find_bus(pci_domain_nr(bus), bridge->busnr);
@@ -1030,6 +1034,9 @@ unregister:
 	device_del(&bridge->dev);
 
 free:
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	pci_bus_release_domain_nr(bus, parent);
+#endif
 	kfree(bus);
 	return err;
 }
diff --git a/drivers/pci/remove.c b/drivers/pci/remove.c
index 4c54c75050dc1..0145aef1b9301 100644
--- a/drivers/pci/remove.c
+++ b/drivers/pci/remove.c
@@ -160,6 +160,12 @@ void pci_remove_root_bus(struct pci_bus *bus)
 	pci_remove_bus(bus);
 	host_bridge->bus = NULL;
 
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	/* Release domain_nr if it was dynamically allocated */
+	if (host_bridge->domain_nr == PCI_DOMAIN_NR_NOT_SET)
+		pci_bus_release_domain_nr(bus, host_bridge->dev.parent);
+#endif
+
 	/* remove the host bridge */
 	device_del(&host_bridge->dev);
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 2bda4a4e47e81..28af4414f7891 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1726,6 +1726,7 @@ static inline int acpi_pci_bus_find_domain_nr(struct pci_bus *bus)
 { return 0; }
 #endif
 int pci_bus_find_domain_nr(struct pci_bus *bus, struct device *parent);
+void pci_bus_release_domain_nr(struct pci_bus *bus, struct device *parent);
 #endif
 
 /* Some architectures require additional setup to direct VGA traffic */
-- 
GitLab


From a9dfc46c67b52ad43b8e335e28f4cf8002c67793 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Sat, 5 Nov 2022 12:01:14 +0900
Subject: [PATCH 1038/4122] perf probe: Fix to get the DW_AT_decl_file and
 DW_AT_call_file as unsinged data

DWARF version 5 standard Sec 2.14 says that

  Any debugging information entry representing the declaration of an object,
  module, subprogram or type may have DW_AT_decl_file, DW_AT_decl_line and
  DW_AT_decl_column attributes, each of whose value is an unsigned integer
  constant.

So it should be an unsigned integer data. Also, even though the standard
doesn't clearly say the DW_AT_call_file is signed or unsigned, the
elfutils (eu-readelf) interprets it as unsigned integer data and it is
natural to handle it as unsigned integer data as same as DW_AT_decl_file.
This changes the DW_AT_call_file as unsigned integer data too.

Fixes: 3f4460a28fb2f73d ("perf probe: Filter out redundant inline-instances")
Signed-off-by: Masami Hiramatsu <mhiramat@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: stable@vger.kernel.org
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/166761727445.480106.3738447577082071942.stgit@devnote3
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 30b36b525681a..b074144097710 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -315,19 +315,6 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name,
 	return 0;
 }
 
-/* Get attribute and translate it as a sdata */
-static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name,
-			      Dwarf_Sword *result)
-{
-	Dwarf_Attribute attr;
-
-	if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL ||
-	    dwarf_formsdata(&attr, result) != 0)
-		return -ENOENT;
-
-	return 0;
-}
-
 /**
  * die_is_signed_type - Check whether a type DIE is signed or not
  * @tp_die: a DIE of a type
@@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs)
 /* Get the call file index number in CU DIE */
 static int die_get_call_fileno(Dwarf_Die *in_die)
 {
-	Dwarf_Sword idx;
+	Dwarf_Word idx;
 
-	if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0)
+	if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0)
 		return (int)idx;
 	else
 		return -ENOENT;
@@ -478,9 +465,9 @@ static int die_get_call_fileno(Dwarf_Die *in_die)
 /* Get the declared file index number in CU DIE */
 static int die_get_decl_fileno(Dwarf_Die *pdie)
 {
-	Dwarf_Sword idx;
+	Dwarf_Word idx;
 
-	if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0)
+	if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0)
 		return (int)idx;
 	else
 		return -ENOENT;
-- 
GitLab


From 57a196a58421a4b0c45949ae7309f21829aaa77f Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Sun, 18 Sep 2022 19:13:48 -0700
Subject: [PATCH 1039/4122] hugetlb: simplify hugetlb handling in
 follow_page_mask

During discussions of this series [1], it was suggested that hugetlb
handling code in follow_page_mask could be simplified.  At the beginning
of follow_page_mask, there currently is a call to follow_huge_addr which
'may' handle hugetlb pages.  ia64 is the only architecture which provides
a follow_huge_addr routine that does not return error.  Instead, at each
level of the page table a check is made for a hugetlb entry.  If a hugetlb
entry is found, a call to a routine associated with that entry is made.

Currently, there are two checks for hugetlb entries at each page table
level.  The first check is of the form:

        if (p?d_huge())
                page = follow_huge_p?d();

the second check is of the form:

        if (is_hugepd())
                page = follow_huge_pd().

We can replace these checks, as well as the special handling routines such
as follow_huge_p?d() and follow_huge_pd() with a single routine to handle
hugetlb vmas.

A new routine hugetlb_follow_page_mask is called for hugetlb vmas at the
beginning of follow_page_mask.  hugetlb_follow_page_mask will use the
existing routine huge_pte_offset to walk page tables looking for hugetlb
entries.  huge_pte_offset can be overwritten by architectures, and already
handles special cases such as hugepd entries.

[1] https://lore.kernel.org/linux-mm/cover.1661240170.git.baolin.wang@linux.alibaba.com/

[mike.kravetz@oracle.com: remove vma (pmd sharing) per Peter]
  Link: https://lkml.kernel.org/r/20221028181108.119432-1-mike.kravetz@oracle.com
[mike.kravetz@oracle.com: remove left over hugetlb_vma_unlock_read()]
  Link: https://lkml.kernel.org/r/20221030225825.40872-1-mike.kravetz@oracle.com
Link: https://lkml.kernel.org/r/20220919021348.22151-1-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: David Hildenbrand <david@redhat.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/ia64/mm/hugetlbpage.c    |  15 ---
 arch/powerpc/mm/hugetlbpage.c |  37 --------
 include/linux/hugetlb.h       |  50 ++--------
 mm/gup.c                      |  80 +++-------------
 mm/hugetlb.c                  | 172 +++++++++++-----------------------
 5 files changed, 76 insertions(+), 278 deletions(-)

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index f993cb36c0626..380d2f3966c98 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -91,21 +91,6 @@ int prepare_hugepage_range(struct file *file,
 	return 0;
 }
 
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long addr, int write)
-{
-	struct page *page;
-	pte_t *ptep;
-
-	if (REGION_NUMBER(addr) != RGN_HPAGE)
-		return ERR_PTR(-EINVAL);
-
-	ptep = huge_pte_offset(mm, addr, HPAGE_SIZE);
-	if (!ptep || pte_none(*ptep))
-		return NULL;
-	page = pte_page(*ptep);
-	page += ((addr & ~HPAGE_MASK) >> PAGE_SHIFT);
-	return page;
-}
 int pmd_huge(pmd_t pmd)
 {
 	return 0;
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 5852a86d990d6..f1ba8d1e8c1a1 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -506,43 +506,6 @@ void hugetlb_free_pgd_range(struct mmu_gather *tlb,
 	} while (addr = next, addr != end);
 }
 
-struct page *follow_huge_pd(struct vm_area_struct *vma,
-			    unsigned long address, hugepd_t hpd,
-			    int flags, int pdshift)
-{
-	pte_t *ptep;
-	spinlock_t *ptl;
-	struct page *page = NULL;
-	unsigned long mask;
-	int shift = hugepd_shift(hpd);
-	struct mm_struct *mm = vma->vm_mm;
-
-retry:
-	/*
-	 * hugepage directory entries are protected by mm->page_table_lock
-	 * Use this instead of huge_pte_lockptr
-	 */
-	ptl = &mm->page_table_lock;
-	spin_lock(ptl);
-
-	ptep = hugepte_offset(hpd, address, pdshift);
-	if (pte_present(*ptep)) {
-		mask = (1UL << shift) - 1;
-		page = pte_page(*ptep);
-		page += ((address & mask) >> PAGE_SHIFT);
-		if (flags & FOLL_GET)
-			get_page(page);
-	} else {
-		if (is_hugetlb_entry_migration(*ptep)) {
-			spin_unlock(ptl);
-			__migration_entry_wait(mm, ptep, ptl);
-			goto retry;
-		}
-	}
-	spin_unlock(ptl);
-	return page;
-}
-
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
 	int shift = __ffs(size);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8b4f93e848680..4a76c0fc6bbf5 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -149,6 +149,8 @@ int move_hugetlb_page_tables(struct vm_area_struct *vma,
 			     unsigned long len);
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *,
 			    struct vm_area_struct *, struct vm_area_struct *);
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+				unsigned long address, unsigned int flags);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
 			 struct page **, struct vm_area_struct **,
 			 unsigned long *, unsigned long *, long, unsigned int,
@@ -209,17 +211,6 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma,
 				unsigned long addr, pte_t *ptep);
 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
 				unsigned long *start, unsigned long *end);
-struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write);
-struct page *follow_huge_pd(struct vm_area_struct *vma,
-			    unsigned long address, hugepd_t hpd,
-			    int flags, int pdshift);
-struct page *follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address,
-				 int flags);
-struct page *follow_huge_pud(struct mm_struct *mm, unsigned long address,
-				pud_t *pud, int flags);
-struct page *follow_huge_pgd(struct mm_struct *mm, unsigned long address,
-			     pgd_t *pgd, int flags);
 
 void hugetlb_vma_lock_read(struct vm_area_struct *vma);
 void hugetlb_vma_unlock_read(struct vm_area_struct *vma);
@@ -272,6 +263,12 @@ static inline void adjust_range_if_pmd_sharing_possible(
 {
 }
 
+static inline struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+				unsigned long address, unsigned int flags)
+{
+	BUILD_BUG(); /* should never be compiled in if !CONFIG_HUGETLB_PAGE*/
+}
+
 static inline long follow_hugetlb_page(struct mm_struct *mm,
 			struct vm_area_struct *vma, struct page **pages,
 			struct vm_area_struct **vmas, unsigned long *position,
@@ -282,12 +279,6 @@ static inline long follow_hugetlb_page(struct mm_struct *mm,
 	return 0;
 }
 
-static inline struct page *follow_huge_addr(struct mm_struct *mm,
-					unsigned long address, int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
 static inline int copy_hugetlb_page_range(struct mm_struct *dst,
 					  struct mm_struct *src,
 					  struct vm_area_struct *dst_vma,
@@ -320,31 +311,6 @@ static inline void hugetlb_show_meminfo_node(int nid)
 {
 }
 
-static inline struct page *follow_huge_pd(struct vm_area_struct *vma,
-				unsigned long address, hugepd_t hpd, int flags,
-				int pdshift)
-{
-	return NULL;
-}
-
-static inline struct page *follow_huge_pmd_pte(struct vm_area_struct *vma,
-				unsigned long address, int flags)
-{
-	return NULL;
-}
-
-static inline struct page *follow_huge_pud(struct mm_struct *mm,
-				unsigned long address, pud_t *pud, int flags)
-{
-	return NULL;
-}
-
-static inline struct page *follow_huge_pgd(struct mm_struct *mm,
-				unsigned long address, pgd_t *pgd, int flags)
-{
-	return NULL;
-}
-
 static inline int prepare_hugepage_range(struct file *file,
 				unsigned long addr, unsigned long len)
 {
diff --git a/mm/gup.c b/mm/gup.c
index fe195d47de74a..6b16aecf5d2c5 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -537,18 +537,6 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
 			 (FOLL_PIN | FOLL_GET)))
 		return ERR_PTR(-EINVAL);
-
-	/*
-	 * Considering PTE level hugetlb, like continuous-PTE hugetlb on
-	 * ARM64 architecture.
-	 */
-	if (is_vm_hugetlb_page(vma)) {
-		page = follow_huge_pmd_pte(vma, address, flags);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
-
 retry:
 	if (unlikely(pmd_bad(*pmd)))
 		return no_page_table(vma, flags);
@@ -680,20 +668,6 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 	pmdval = READ_ONCE(*pmd);
 	if (pmd_none(pmdval))
 		return no_page_table(vma, flags);
-	if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
-		page = follow_huge_pmd_pte(vma, address, flags);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
-	if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
-		page = follow_huge_pd(vma, address,
-				      __hugepd(pmd_val(pmdval)), flags,
-				      PMD_SHIFT);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
 retry:
 	if (!pmd_present(pmdval)) {
 		/*
@@ -783,20 +757,6 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma,
 	pud = pud_offset(p4dp, address);
 	if (pud_none(*pud))
 		return no_page_table(vma, flags);
-	if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
-		page = follow_huge_pud(mm, address, pud, flags);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
-	if (is_hugepd(__hugepd(pud_val(*pud)))) {
-		page = follow_huge_pd(vma, address,
-				      __hugepd(pud_val(*pud)), flags,
-				      PUD_SHIFT);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
 	if (pud_devmap(*pud)) {
 		ptl = pud_lock(mm, pud);
 		page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
@@ -816,7 +776,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 				    struct follow_page_context *ctx)
 {
 	p4d_t *p4d;
-	struct page *page;
 
 	p4d = p4d_offset(pgdp, address);
 	if (p4d_none(*p4d))
@@ -825,14 +784,6 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma,
 	if (unlikely(p4d_bad(*p4d)))
 		return no_page_table(vma, flags);
 
-	if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
-		page = follow_huge_pd(vma, address,
-				      __hugepd(p4d_val(*p4d)), flags,
-				      P4D_SHIFT);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
 	return follow_pud_mask(vma, address, p4d, flags, ctx);
 }
 
@@ -870,10 +821,18 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
 
 	ctx->page_mask = 0;
 
-	/* make this handle hugepd */
-	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
-	if (!IS_ERR(page)) {
-		WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
+	/*
+	 * Call hugetlb_follow_page_mask for hugetlb vmas as it will use
+	 * special hugetlb page table walking code.  This eliminates the
+	 * need to check for hugetlb entries in the general walking code.
+	 *
+	 * hugetlb_follow_page_mask is only for follow_page() handling here.
+	 * Ordinary GUP uses follow_hugetlb_page for hugetlb processing.
+	 */
+	if (is_vm_hugetlb_page(vma)) {
+		page = hugetlb_follow_page_mask(vma, address, flags);
+		if (!page)
+			page = no_page_table(vma, flags);
 		return page;
 	}
 
@@ -882,21 +841,6 @@ static struct page *follow_page_mask(struct vm_area_struct *vma,
 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
 		return no_page_table(vma, flags);
 
-	if (pgd_huge(*pgd)) {
-		page = follow_huge_pgd(mm, address, pgd, flags);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
-	if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
-		page = follow_huge_pd(vma, address,
-				      __hugepd(pgd_val(*pgd)), flags,
-				      PGDIR_SHIFT);
-		if (page)
-			return page;
-		return no_page_table(vma, flags);
-	}
-
 	return follow_p4d_mask(vma, address, pgd, flags, ctx);
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 546df97c31e4c..0af18c1e4b311 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6209,6 +6209,62 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
 	return false;
 }
 
+struct page *hugetlb_follow_page_mask(struct vm_area_struct *vma,
+				unsigned long address, unsigned int flags)
+{
+	struct hstate *h = hstate_vma(vma);
+	struct mm_struct *mm = vma->vm_mm;
+	unsigned long haddr = address & huge_page_mask(h);
+	struct page *page = NULL;
+	spinlock_t *ptl;
+	pte_t *pte, entry;
+
+	/*
+	 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
+	 * follow_hugetlb_page().
+	 */
+	if (WARN_ON_ONCE(flags & FOLL_PIN))
+		return NULL;
+
+retry:
+	pte = huge_pte_offset(mm, haddr, huge_page_size(h));
+	if (!pte)
+		return NULL;
+
+	ptl = huge_pte_lock(h, mm, pte);
+	entry = huge_ptep_get(pte);
+	if (pte_present(entry)) {
+		page = pte_page(entry) +
+				((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
+		/*
+		 * Note that page may be a sub-page, and with vmemmap
+		 * optimizations the page struct may be read only.
+		 * try_grab_page() will increase the ref count on the
+		 * head page, so this will be OK.
+		 *
+		 * try_grab_page() should always succeed here, because we hold
+		 * the ptl lock and have verified pte_present().
+		 */
+		if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
+			page = NULL;
+			goto out;
+		}
+	} else {
+		if (is_hugetlb_entry_migration(entry)) {
+			spin_unlock(ptl);
+			__migration_entry_wait_huge(pte, ptl);
+			goto retry;
+		}
+		/*
+		 * hwpoisoned entry is treated as no_page_table in
+		 * follow_page_mask().
+		 */
+	}
+out:
+	spin_unlock(ptl);
+	return page;
+}
+
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 			 struct page **pages, struct vm_area_struct **vmas,
 			 unsigned long *position, unsigned long *nr_pages,
@@ -7201,122 +7257,6 @@ __weak unsigned long hugetlb_mask_last_page(struct hstate *h)
  * These functions are overwritable if your architecture needs its own
  * behavior.
  */
-struct page * __weak
-follow_huge_addr(struct mm_struct *mm, unsigned long address,
-			      int write)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-struct page * __weak
-follow_huge_pd(struct vm_area_struct *vma,
-	       unsigned long address, hugepd_t hpd, int flags, int pdshift)
-{
-	WARN(1, "hugepd follow called with no support for hugepage directory format\n");
-	return NULL;
-}
-
-struct page * __weak
-follow_huge_pmd_pte(struct vm_area_struct *vma, unsigned long address, int flags)
-{
-	struct hstate *h = hstate_vma(vma);
-	struct mm_struct *mm = vma->vm_mm;
-	struct page *page = NULL;
-	spinlock_t *ptl;
-	pte_t *ptep, pte;
-
-	/*
-	 * FOLL_PIN is not supported for follow_page(). Ordinary GUP goes via
-	 * follow_hugetlb_page().
-	 */
-	if (WARN_ON_ONCE(flags & FOLL_PIN))
-		return NULL;
-
-retry:
-	ptep = huge_pte_offset(mm, address, huge_page_size(h));
-	if (!ptep)
-		return NULL;
-
-	ptl = huge_pte_lock(h, mm, ptep);
-	pte = huge_ptep_get(ptep);
-	if (pte_present(pte)) {
-		page = pte_page(pte) +
-			((address & ~huge_page_mask(h)) >> PAGE_SHIFT);
-		/*
-		 * try_grab_page() should always succeed here, because: a) we
-		 * hold the pmd (ptl) lock, and b) we've just checked that the
-		 * huge pmd (head) page is present in the page tables. The ptl
-		 * prevents the head page and tail pages from being rearranged
-		 * in any way. So this page must be available at this point,
-		 * unless the page refcount overflowed:
-		 */
-		if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
-			page = NULL;
-			goto out;
-		}
-	} else {
-		if (is_hugetlb_entry_migration(pte)) {
-			spin_unlock(ptl);
-			__migration_entry_wait_huge(ptep, ptl);
-			goto retry;
-		}
-		/*
-		 * hwpoisoned entry is treated as no_page_table in
-		 * follow_page_mask().
-		 */
-	}
-out:
-	spin_unlock(ptl);
-	return page;
-}
-
-struct page * __weak
-follow_huge_pud(struct mm_struct *mm, unsigned long address,
-		pud_t *pud, int flags)
-{
-	struct page *page = NULL;
-	spinlock_t *ptl;
-	pte_t pte;
-
-	if (WARN_ON_ONCE(flags & FOLL_PIN))
-		return NULL;
-
-retry:
-	ptl = huge_pte_lock(hstate_sizelog(PUD_SHIFT), mm, (pte_t *)pud);
-	if (!pud_huge(*pud))
-		goto out;
-	pte = huge_ptep_get((pte_t *)pud);
-	if (pte_present(pte)) {
-		page = pud_page(*pud) + ((address & ~PUD_MASK) >> PAGE_SHIFT);
-		if (WARN_ON_ONCE(!try_grab_page(page, flags))) {
-			page = NULL;
-			goto out;
-		}
-	} else {
-		if (is_hugetlb_entry_migration(pte)) {
-			spin_unlock(ptl);
-			__migration_entry_wait(mm, (pte_t *)pud, ptl);
-			goto retry;
-		}
-		/*
-		 * hwpoisoned entry is treated as no_page_table in
-		 * follow_page_mask().
-		 */
-	}
-out:
-	spin_unlock(ptl);
-	return page;
-}
-
-struct page * __weak
-follow_huge_pgd(struct mm_struct *mm, unsigned long address, pgd_t *pgd, int flags)
-{
-	if (flags & (FOLL_GET | FOLL_PIN))
-		return NULL;
-
-	return pte_page(*(pte_t *)pgd) + ((address & ~PGDIR_MASK) >> PAGE_SHIFT);
-}
-
 int isolate_hugetlb(struct page *page, struct list_head *list)
 {
 	int ret = 0;
-- 
GitLab


From 0538a82c39e94d49fa6985c6a0101ca819be11ee Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 13 Oct 2022 15:31:13 -0400
Subject: [PATCH 1040/4122] mm: vmscan: make rotations a secondary factor in
 balancing anon vs file

We noticed a 2% webserver throughput regression after upgrading from 5.6.
This could be tracked down to a shift in the anon/file reclaim balance
(confirmed with swappiness) that resulted in worse reclaim efficiency and
thus more kswapd activity for the same outcome.

The change that exposed the problem is aae466b0052e ("mm/swap: implement
workingset detection for anonymous LRU").  By qualifying swapins based on
their refault distance, it lowered the cost of anon reclaim in this
workload, in turn causing (much) more anon scanning than before.  Scanning
the anon list is more expensive due to the higher ratio of mmapped pages
that may rotate during reclaim, and so the result was an increase in %sys
time.

Right now, rotations aren't considered a cost when balancing scan pressure
between LRUs.  We can end up with very few file refaults putting all the
scan pressure on hot anon pages that are rotated en masse, don't get
reclaimed, and never push back on the file LRU again.  We still only
reclaim file cache in that case, but we burn a lot CPU rotating anon
pages.  It's "fair" from an LRU age POV, but doesn't reflect the real cost
it imposes on the system.

Consider rotations as a secondary factor in balancing the LRUs.  This
doesn't attempt to make a precise comparison between IO cost and CPU cost,
it just says: if reloads are about comparable between the lists, or
rotations are overwhelmingly different, adjust for CPU work.

This fixed the regression on our webservers.  It has since been deployed
to the entire Meta fleet and hasn't caused any problems.

Link: https://lkml.kernel.org/r/20221013193113.726425-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h |  5 +++--
 mm/swap.c            | 22 +++++++++++++++++-----
 mm/vmscan.c          |  4 +++-
 mm/workingset.c      |  2 +-
 4 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index a18cf4b7c724c..369d7799205dd 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -384,8 +384,9 @@ extern unsigned long totalreserve_pages;
 
 
 /* linux/mm/swap.c */
-void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages);
-void lru_note_cost_folio(struct folio *);
+void lru_note_cost(struct lruvec *lruvec, bool file,
+		   unsigned int nr_io, unsigned int nr_rotated);
+void lru_note_cost_refault(struct folio *);
 void folio_add_lru(struct folio *);
 void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
 void lru_cache_add(struct page *);
diff --git a/mm/swap.c b/mm/swap.c
index 955930f41d20c..2f12a2ee1d3af 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -295,8 +295,20 @@ void folio_rotate_reclaimable(struct folio *folio)
 	}
 }
 
-void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
+void lru_note_cost(struct lruvec *lruvec, bool file,
+		   unsigned int nr_io, unsigned int nr_rotated)
 {
+	unsigned long cost;
+
+	/*
+	 * Reflect the relative cost of incurring IO and spending CPU
+	 * time on rotations. This doesn't attempt to make a precise
+	 * comparison, it just says: if reloads are about comparable
+	 * between the LRU lists, or rotations are overwhelmingly
+	 * different between them, adjust scan balance for CPU work.
+	 */
+	cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
+
 	do {
 		unsigned long lrusize;
 
@@ -310,9 +322,9 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 		spin_lock_irq(&lruvec->lru_lock);
 		/* Record cost event */
 		if (file)
-			lruvec->file_cost += nr_pages;
+			lruvec->file_cost += cost;
 		else
-			lruvec->anon_cost += nr_pages;
+			lruvec->anon_cost += cost;
 
 		/*
 		 * Decay previous events
@@ -335,10 +347,10 @@ void lru_note_cost(struct lruvec *lruvec, bool file, unsigned int nr_pages)
 	} while ((lruvec = parent_lruvec(lruvec)));
 }
 
-void lru_note_cost_folio(struct folio *folio)
+void lru_note_cost_refault(struct folio *folio)
 {
 	lru_note_cost(folio_lruvec(folio), folio_is_file_lru(folio),
-			folio_nr_pages(folio));
+		      folio_nr_pages(folio), 0);
 }
 
 static void folio_activate_fn(struct lruvec *lruvec, struct folio *folio)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 04d8b88e52164..ffe402e095d35 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2499,7 +2499,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	__count_vm_events(PGSTEAL_ANON + file, nr_reclaimed);
 	spin_unlock_irq(&lruvec->lru_lock);
 
-	lru_note_cost(lruvec, file, stat.nr_pageout);
+	lru_note_cost(lruvec, file, stat.nr_pageout, nr_scanned - nr_reclaimed);
 	mem_cgroup_uncharge_list(&folio_list);
 	free_unref_page_list(&folio_list);
 
@@ -2639,6 +2639,8 @@ static void shrink_active_list(unsigned long nr_to_scan,
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
 	spin_unlock_irq(&lruvec->lru_lock);
 
+	if (nr_rotated)
+		lru_note_cost(lruvec, file, 0, nr_rotated);
 	mem_cgroup_uncharge_list(&l_active);
 	free_unref_page_list(&l_active);
 	trace_mm_vmscan_lru_shrink_active(pgdat->node_id, nr_taken, nr_activate,
diff --git a/mm/workingset.c b/mm/workingset.c
index ae7e984b23c6b..d2d02978588cf 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -493,7 +493,7 @@ void workingset_refault(struct folio *folio, void *shadow)
 	if (workingset) {
 		folio_set_workingset(folio);
 		/* XXX: Move to lru_cache_add() when it supports new vs putback */
-		lru_note_cost_folio(folio);
+		lru_note_cost_refault(folio);
 		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
 	}
 out:
-- 
GitLab


From de2baa880de3d5eecf2e46ebdee0aaeb565f5917 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 Oct 2022 10:39:18 -0400
Subject: [PATCH 1041/4122] selftests/vm: use memfd for uffd hugetlb tests

Patch series "selftests/vm: Drop hugetlb mntpoint in run_vmtests.sh", v2.

Clean the code up so we can use the same memfd for both hugetlb and shmem
which is cleaner.


This patch (of 4):

We already used memfd for shmem test, move it forward with hugetlb too so
that we don't need user to specify the hugetlb file path explicitly when
running hugetlb shared tests.

Link: https://lkml.kernel.org/r/20221014143921.93887-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20221014143921.93887-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/userfaultfd.c | 62 ++++++++----------------
 1 file changed, 21 insertions(+), 41 deletions(-)

diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 297f250c1d956..7f22844ed7040 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -93,10 +93,8 @@ static volatile bool test_uffdio_zeropage_eexist = true;
 static bool test_uffdio_wp = true;
 /* Whether to test uffd minor faults */
 static bool test_uffdio_minor = false;
-
 static bool map_shared;
-static int shm_fd;
-static int huge_fd;
+static int mem_fd;
 static unsigned long long *count_verify;
 static int uffd = -1;
 static int uffd_flags, finished, *pipefd;
@@ -143,7 +141,7 @@ const char *examples =
     "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
     "./userfaultfd hugetlb 256 50\n\n"
     "# Run the same hugetlb test but using shared file:\n"
-    "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
+    "./userfaultfd hugetlb_shared 256 50\n\n"
     "# 10MiB-~6GiB 999 bounces anonymous test, "
     "continue forever unless an error triggers\n"
     "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
@@ -260,35 +258,21 @@ static void hugetlb_release_pages(char *rel_area)
 
 static void hugetlb_allocate_area(void **alloc_area, bool is_src)
 {
+	off_t size = nr_pages * page_size;
+	off_t offset = is_src ? 0 : size;
 	void *area_alias = NULL;
 	char **alloc_area_alias;
 
-	if (!map_shared)
-		*alloc_area = mmap(NULL,
-			nr_pages * page_size,
-			PROT_READ | PROT_WRITE,
-			MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB |
-				(is_src ? 0 : MAP_NORESERVE),
-			-1,
-			0);
-	else
-		*alloc_area = mmap(NULL,
-			nr_pages * page_size,
-			PROT_READ | PROT_WRITE,
-			MAP_SHARED |
-				(is_src ? 0 : MAP_NORESERVE),
-			huge_fd,
-			is_src ? 0 : nr_pages * page_size);
+	*alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			   (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+			   (is_src ? 0 : MAP_NORESERVE),
+			   mem_fd, offset);
 	if (*alloc_area == MAP_FAILED)
 		err("mmap of hugetlbfs file failed");
 
 	if (map_shared) {
-		area_alias = mmap(NULL,
-			nr_pages * page_size,
-			PROT_READ | PROT_WRITE,
-			MAP_SHARED,
-			huge_fd,
-			is_src ? 0 : nr_pages * page_size);
+		area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE,
+				  MAP_SHARED, mem_fd, offset);
 		if (area_alias == MAP_FAILED)
 			err("mmap of hugetlb file alias failed");
 	}
@@ -334,14 +318,14 @@ static void shmem_allocate_area(void **alloc_area, bool is_src)
 	}
 
 	*alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
-			   shm_fd, offset);
+			   mem_fd, offset);
 	if (*alloc_area == MAP_FAILED)
 		err("mmap of memfd failed");
 	if (test_collapse && *alloc_area != p)
 		err("mmap of memfd failed at %p", p);
 
 	area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
-			  shm_fd, offset);
+			  mem_fd, offset);
 	if (area_alias == MAP_FAILED)
 		err("mmap of memfd alias failed");
 	if (test_collapse && area_alias != p_alias)
@@ -1841,21 +1825,17 @@ int main(int argc, char **argv)
 	}
 	nr_pages = nr_pages_per_cpu * nr_cpus;
 
-	if (test_type == TEST_HUGETLB && map_shared) {
-		if (argc < 5)
-			usage();
-		huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
-		if (huge_fd < 0)
-			err("Open of %s failed", argv[4]);
-		if (ftruncate(huge_fd, 0))
-			err("ftruncate %s to size 0 failed", argv[4]);
-	} else if (test_type == TEST_SHMEM) {
-		shm_fd = memfd_create(argv[0], 0);
-		if (shm_fd < 0)
+	if (test_type == TEST_SHMEM || test_type == TEST_HUGETLB) {
+		unsigned int memfd_flags = 0;
+
+		if (test_type == TEST_HUGETLB)
+			memfd_flags = MFD_HUGETLB;
+		mem_fd = memfd_create(argv[0], memfd_flags);
+		if (mem_fd < 0)
 			err("memfd_create");
-		if (ftruncate(shm_fd, nr_pages * page_size * 2))
+		if (ftruncate(mem_fd, nr_pages * page_size * 2))
 			err("ftruncate");
-		if (fallocate(shm_fd,
+		if (fallocate(mem_fd,
 			      FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
 			      nr_pages * page_size * 2))
 			err("fallocate");
-- 
GitLab


From 62f33fa228003a419a9484eed5133447a280387c Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 Oct 2022 10:39:19 -0400
Subject: [PATCH 1042/4122] selftests/vm: use memfd for hugetlb-madvise test

For dropping the hugetlb mountpoint in run_vmtests.sh.  Since no parameter
is needed, drop USAGE too.

Link: https://lkml.kernel.org/r/20221014143921.93887-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/hugetlb-madvise.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
index 3c9943131881e..f96435b709864 100644
--- a/tools/testing/selftests/vm/hugetlb-madvise.c
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -12,6 +12,7 @@
  * directory.
  */
 
+#define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
@@ -19,7 +20,6 @@
 #define __USE_GNU
 #include <fcntl.h>
 
-#define USAGE	"USAGE: %s <hugepagefile_name>\n"
 #define MIN_FREE_PAGES	20
 #define NR_HUGE_PAGES	10	/* common number of pages to map/allocate */
 
@@ -103,11 +103,6 @@ int main(int argc, char **argv)
 	int fd;
 	int ret;
 
-	if (argc != 2) {
-		printf(USAGE, argv[0]);
-		exit(1);
-	}
-
 	huge_page_size = default_huge_page_size();
 	if (!huge_page_size) {
 		printf("Unable to determine huge page size, exiting!\n");
@@ -125,9 +120,9 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	fd = open(argv[1], O_CREAT | O_RDWR, 0755);
+	fd = memfd_create(argv[0], MFD_HUGETLB);
 	if (fd < 0) {
-		perror("Open failed");
+		perror("memfd_create() failed");
 		exit(1);
 	}
 
@@ -406,6 +401,5 @@ int main(int argc, char **argv)
 	(void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
 
 	close(fd);
-	unlink(argv[1]);
 	return 0;
 }
-- 
GitLab


From 4705700d4fef3178e93230f53c2c528569744bb6 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 Oct 2022 10:40:13 -0400
Subject: [PATCH 1043/4122] selftests/vm: use memfd for hugepage-mremap test

For dropping the hugetlb mountpoint in run_vmtests.sh.  Cleaned it up a
little bit around the changed codes.

Link: https://lkml.kernel.org/r/20221014144013.94027-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/hugepage-mremap.c | 21 +++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/vm/hugepage-mremap.c b/tools/testing/selftests/vm/hugepage-mremap.c
index e63a0214f6399..e53b5eaa8fced 100644
--- a/tools/testing/selftests/vm/hugepage-mremap.c
+++ b/tools/testing/selftests/vm/hugepage-mremap.c
@@ -22,6 +22,7 @@
 #include <sys/syscall.h> /* Definition of SYS_* constants */
 #include <linux/userfaultfd.h>
 #include <sys/ioctl.h>
+#include <string.h>
 
 #define DEFAULT_LENGTH_MB 10UL
 #define MB_TO_BYTES(x) (x * 1024 * 1024)
@@ -108,26 +109,23 @@ static void register_region_with_uffd(char *addr, size_t len)
 int main(int argc, char *argv[])
 {
 	size_t length = 0;
+	int ret = 0, fd;
 
-	if (argc != 2 && argc != 3) {
-		printf("Usage: %s [length_in_MB] <hugetlb_file>\n", argv[0]);
+	if (argc >= 2 && !strcmp(argv[1], "-h")) {
+		printf("Usage: %s [length_in_MB]\n", argv[0]);
 		exit(1);
 	}
 
 	/* Read memory length as the first arg if valid, otherwise fallback to
 	 * the default length.
 	 */
-	if (argc == 3)
-		length = argc > 2 ? (size_t)atoi(argv[1]) : 0UL;
+	if (argc >= 2)
+		length = (size_t)atoi(argv[1]);
+	else
+		length = DEFAULT_LENGTH_MB;
 
-	length = length > 0 ? length : DEFAULT_LENGTH_MB;
 	length = MB_TO_BYTES(length);
-
-	int ret = 0;
-
-	/* last arg is the hugetlb file name */
-	int fd = open(argv[argc-1], O_CREAT | O_RDWR, 0755);
-
+	fd = memfd_create(argv[0], MFD_HUGETLB);
 	if (fd < 0) {
 		perror("Open failed");
 		exit(1);
@@ -185,7 +183,6 @@ int main(int argc, char *argv[])
 	}
 
 	close(fd);
-	unlink(argv[argc-1]);
 
 	return ret;
 }
-- 
GitLab


From 0796c7b8be84415994fa37e9a022e5595d915dd7 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 14 Oct 2022 10:40:15 -0400
Subject: [PATCH 1044/4122] selftests/vm: drop mnt point for hugetlb in
 run_vmtests.sh

After converting all the three relevant testcases (uffd, madvise, mremap)
to use memfd, no test will need the hugetlb mount point anymore.  Drop the
code.

Link: https://lkml.kernel.org/r/20221014144015.94039-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Axel Rasmussen <axelrasmussen@google.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh | 18 +++---------------
 1 file changed, 3 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index e780e76c26b87..0dc9f545a32d6 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -5,7 +5,6 @@
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-mnt=./huge
 exitcode=0
 
 #get huge pagesize and freepages from /proc/meminfo
@@ -84,9 +83,6 @@ run_test() {
 	fi
 }
 
-mkdir "$mnt"
-mount -t hugetlbfs none "$mnt"
-
 run_test ./hugepage-mmap
 
 shmmax=$(cat /proc/sys/kernel/shmmax)
@@ -98,14 +94,9 @@ echo "$shmmax" > /proc/sys/kernel/shmmax
 echo "$shmall" > /proc/sys/kernel/shmall
 
 run_test ./map_hugetlb
-
-run_test ./hugepage-mremap "$mnt"/huge_mremap
-rm -f "$mnt"/huge_mremap
-
+run_test ./hugepage-mremap
 run_test ./hugepage-vmemmap
-
-run_test ./hugetlb-madvise "$mnt"/madvise-test
-rm -f "$mnt"/madvise-test
+run_test ./hugetlb-madvise
 
 echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
 echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
@@ -126,14 +117,11 @@ for mod in "${uffd_mods[@]}"; do
 	# Hugetlb tests require source and destination huge pages. Pass in half
 	# the size ($half_ufd_size_MB), which is used for *each*.
 	run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32
-	run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32 "$mnt"/uffd-test
-	rm -f "$mnt"/uffd-test
+	run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32
 	run_test ./userfaultfd shmem${mod} 20 16
 done
 
 #cleanup
-umount "$mnt"
-rm -rf "$mnt"
 echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
 
 run_test ./compaction_test
-- 
GitLab


From d03c376d9066532551dc56837c7c5490e4fcbbfe Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Sep 2022 10:42:03 -0500
Subject: [PATCH 1045/4122] mm/hugetlb: add folio support to hugetlb specific
 flag macros

Patch series "begin converting hugetlb code to folios", v4.

This patch series starts the conversion of the hugetlb code to operate on
struct folios rather than struct pages.  This removes the ambiguitiy of
whether functions are operating on head pages, tail pages of compound
pages, or base pages.

This series passes the linux test project hugetlb test cases.

Patch 1 adds hugeltb specific page macros that can operate on folios.

Patch 2 adds the private field of the first tail page to struct page.  For
32-bit, _private_1 alinging with page[1].private was confirmed by using
pahole.

Patch 3 introduces hugetlb subpool helper functions which operate on
struct folios. These patches were tested using the hugepage-mmap.c
selftest along with the migratepages command.

Patch 4 converts hugetlb_delete_from_page_cache() to use folios.

Patch 5 adds a folio_hstate() function to get hstate information from a
folio and adds a user of folio_hstate().

Bpftrace was used to track time spent in the free_huge_pages function
during the ltp test cases as it is a caller of the hugetlb subpool
functions. From the histogram, the performance is similar before and
after the patch series.

Time spent in 'free_huge_page'

6.0.0-rc2.master.20220823
@nsecs:
[256, 512)         14770 |@@@@@@@@@@@@@@@@@@@@@@@@@@@
			 |@@@@@@@@@@@@@@@@@@@@@@@@@			      |
[512, 1K)            155 |                                                    |
[1K, 2K)             169 |                                                    |
[2K, 4K)              50 |                                                    |
[4K, 8K)              14 |                                                    |
[8K, 16K)              3 |                                                    |
[16K, 32K)             3 |                                                    |


6.0.0-rc2.master.20220823 + patch series
@nsecs:
[256, 512)         13678 |@@@@@@@@@@@@@@@@@@@@@@@@@@@			      |
			 |@@@@@@@@@@@@@@@@@@@@@@@@@			      |
[512, 1K)            142 |                                                    |
[1K, 2K)             199 |                                                    |
[2K, 4K)              44 |                                                    |
[4K, 8K)              13 |                                                    |
[8K, 16K)              4 |                                                    |
[16K, 32K)             1 |                                                    |


This patch (of 5):

Allow the macros which test, set, and clear hugetlb specific page flags to
take a hugetlb folio as an input.  The macrros are generated as
folio_{test, set, clear}_hugetlb_{restore_reserve, migratable, temporary,
freed, vmemmap_optimized, raw_hwp_unreliable}.

Link: https://lkml.kernel.org/r/20220922154207.1575343-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20220922154207.1575343-2-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Cross <ccross@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 4a76c0fc6bbf5..3ff5d2dd3ca3d 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -589,26 +589,50 @@ enum hugetlb_page_flags {
  */
 #ifdef CONFIG_HUGETLB_PAGE
 #define TESTHPAGEFLAG(uname, flname)				\
+static __always_inline						\
+bool folio_test_hugetlb_##flname(struct folio *folio)		\
+	{	void *private = &folio->private;		\
+		return test_bit(HPG_##flname, private);		\
+	}							\
 static inline int HPage##uname(struct page *page)		\
 	{ return test_bit(HPG_##flname, &(page->private)); }
 
 #define SETHPAGEFLAG(uname, flname)				\
+static __always_inline						\
+void folio_set_hugetlb_##flname(struct folio *folio)		\
+	{	void *private = &folio->private;		\
+		set_bit(HPG_##flname, private);			\
+	}							\
 static inline void SetHPage##uname(struct page *page)		\
 	{ set_bit(HPG_##flname, &(page->private)); }
 
 #define CLEARHPAGEFLAG(uname, flname)				\
+static __always_inline						\
+void folio_clear_hugetlb_##flname(struct folio *folio)		\
+	{	void *private = &folio->private;		\
+		clear_bit(HPG_##flname, private);		\
+	}							\
 static inline void ClearHPage##uname(struct page *page)		\
 	{ clear_bit(HPG_##flname, &(page->private)); }
 #else
 #define TESTHPAGEFLAG(uname, flname)				\
+static inline bool						\
+folio_test_hugetlb_##flname(struct folio *folio)		\
+	{ return 0; }						\
 static inline int HPage##uname(struct page *page)		\
 	{ return 0; }
 
 #define SETHPAGEFLAG(uname, flname)				\
+static inline void						\
+folio_set_hugetlb_##flname(struct folio *folio) 		\
+	{ }							\
 static inline void SetHPage##uname(struct page *page)		\
 	{ }
 
 #define CLEARHPAGEFLAG(uname, flname)				\
+static inline void						\
+folio_clear_hugetlb_##flname(struct folio *folio)		\
+	{ }							\
 static inline void ClearHPage##uname(struct page *page)		\
 	{ }
 #endif
-- 
GitLab


From d340625f4849ab5dbfebbc7d84709fbfcd39e52f Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Sep 2022 10:42:04 -0500
Subject: [PATCH 1046/4122] mm: add private field of first tail to struct page
 and struct folio

Allow struct folio to store hugetlb metadata that is contained in the
private field of the first tail page.  On 32-bit, _private_1 aligns with
page[1].private.

Link: https://lkml.kernel.org/r/20220922154207.1575343-3-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Acked-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Cross <ccross@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 500e536796ca4..2d5b1575ffe08 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -144,6 +144,7 @@ struct page {
 			atomic_t compound_pincount;
 #ifdef CONFIG_64BIT
 			unsigned int compound_nr; /* 1 << compound_order */
+			unsigned long _private_1;
 #endif
 		};
 		struct {	/* Second tail page of compound page */
@@ -264,6 +265,7 @@ struct page {
  * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
+ * @_private_1: Do not use directly, call folio_get_private_1().
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -311,6 +313,7 @@ struct folio {
 #ifdef CONFIG_64BIT
 	unsigned int _folio_nr_pages;
 #endif
+	unsigned long _private_1;
 };
 
 #define FOLIO_MATCH(pg, fl)						\
@@ -338,6 +341,7 @@ FOLIO_MATCH(compound_mapcount, _total_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
+FOLIO_MATCH(_private_1, _private_1);
 #endif
 #undef FOLIO_MATCH
 
@@ -383,6 +387,16 @@ static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
+static inline void folio_set_private_1(struct folio *folio, unsigned long private)
+{
+	folio->_private_1 = private;
+}
+
+static inline unsigned long folio_get_private_1(struct folio *folio)
+{
+	return folio->_private_1;
+}
+
 struct page_frag_cache {
 	void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-- 
GitLab


From 149562f7509404c382c32c3fa8a6ba356135e5cf Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Sep 2022 10:42:05 -0500
Subject: [PATCH 1047/4122] mm/hugetlb: add hugetlb_folio_subpool() helpers

Allow hugetlbfs_migrate_folio to check and read subpool information by
passing in a folio.

Link: https://lkml.kernel.org/r/20220922154207.1575343-4-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Cross <ccross@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    |  8 ++++----
 include/linux/hugetlb.h | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dd54f67e47fdf..c5137607e523c 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1091,10 +1091,10 @@ static int hugetlbfs_migrate_folio(struct address_space *mapping,
 	if (rc != MIGRATEPAGE_SUCCESS)
 		return rc;
 
-	if (hugetlb_page_subpool(&src->page)) {
-		hugetlb_set_page_subpool(&dst->page,
-					hugetlb_page_subpool(&src->page));
-		hugetlb_set_page_subpool(&src->page, NULL);
+	if (hugetlb_folio_subpool(src)) {
+		hugetlb_set_folio_subpool(dst,
+					hugetlb_folio_subpool(src));
+		hugetlb_set_folio_subpool(src, NULL);
 	}
 
 	if (mode != MIGRATE_SYNC_NO_COPY)
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3ff5d2dd3ca3d..496d02bdb997e 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -718,18 +718,29 @@ extern unsigned int default_hstate_idx;
 
 #define default_hstate (hstates[default_hstate_idx])
 
+static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
+{
+	return (void *)folio_get_private_1(folio);
+}
+
 /*
  * hugetlb page subpool pointer located in hpage[1].private
  */
 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 {
-	return (void *)page_private(hpage + SUBPAGE_INDEX_SUBPOOL);
+	return hugetlb_folio_subpool(page_folio(hpage));
+}
+
+static inline void hugetlb_set_folio_subpool(struct folio *folio,
+					struct hugepage_subpool *subpool)
+{
+	folio_set_private_1(folio, (unsigned long)subpool);
 }
 
 static inline void hugetlb_set_page_subpool(struct page *hpage,
 					struct hugepage_subpool *subpool)
 {
-	set_page_private(hpage + SUBPAGE_INDEX_SUBPOOL, (unsigned long)subpool);
+	hugetlb_set_folio_subpool(page_folio(hpage), subpool);
 }
 
 static inline struct hstate *hstate_file(struct file *f)
-- 
GitLab


From ece62684dcfb714b7d8452056b4a33d426b16457 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Sep 2022 10:42:06 -0500
Subject: [PATCH 1048/4122] hugetlbfs: convert hugetlb_delete_from_page_cache()
 to use folios

Remove the last caller of delete_from_page_cache() by converting the code
to its folio equivalent.

Link: https://lkml.kernel.org/r/20220922154207.1575343-5-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Cross <ccross@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hugetlbfs/inode.c    | 12 ++++++------
 include/linux/pagemap.h |  1 -
 mm/folio-compat.c       |  5 -----
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index c5137607e523c..00495fc128c5b 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -364,11 +364,11 @@ static int hugetlbfs_write_end(struct file *file, struct address_space *mapping,
 	return -EINVAL;
 }
 
-static void hugetlb_delete_from_page_cache(struct page *page)
+static void hugetlb_delete_from_page_cache(struct folio *folio)
 {
-	ClearPageDirty(page);
-	ClearPageUptodate(page);
-	delete_from_page_cache(page);
+	folio_clear_dirty(folio);
+	folio_clear_uptodate(folio);
+	filemap_remove_folio(folio);
 }
 
 /*
@@ -574,8 +574,8 @@ static bool remove_inode_single_folio(struct hstate *h, struct inode *inode,
 	 * map could fail.  Correspondingly, the subpool and global
 	 * reserve usage count can need to be adjusted.
 	 */
-	VM_BUG_ON(HPageRestoreReserve(&folio->page));
-	hugetlb_delete_from_page_cache(&folio->page);
+	VM_BUG_ON_FOLIO(folio_test_hugetlb_restore_reserve(folio), folio);
+	hugetlb_delete_from_page_cache(folio);
 	ret = true;
 	if (!truncate_op) {
 		if (unlikely(hugetlb_unreserve_pages(inode, index,
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index bbccb40442224..060ee98474ef5 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1102,7 +1102,6 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 int filemap_add_folio(struct address_space *mapping, struct folio *folio,
 		pgoff_t index, gfp_t gfp);
 void filemap_remove_folio(struct folio *folio);
-void delete_from_page_cache(struct page *page);
 void __filemap_remove_folio(struct folio *folio, void *shadow);
 void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index e1e23b4947d73..8ae39c06da62a 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -124,11 +124,6 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
-void delete_from_page_cache(struct page *page)
-{
-	return filemap_remove_folio(page_folio(page));
-}
-
 int try_to_release_page(struct page *page, gfp_t gfp)
 {
 	return filemap_release_folio(page_folio(page), gfp);
-- 
GitLab


From e51da3a9b6c2f67879880259a25c51dbda01c462 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Thu, 22 Sep 2022 10:42:07 -0500
Subject: [PATCH 1049/4122] mm/hugetlb: add folio_hstate()

Helper function to retrieve hstate information from a hugetlb folio.

Link: https://lkml.kernel.org/r/20220922154207.1575343-6-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Colin Cross <ccross@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 14 ++++++++++++--
 mm/migrate.c            |  2 +-
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 496d02bdb997e..20a0d5a083956 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -824,10 +824,15 @@ static inline pte_t arch_make_huge_pte(pte_t entry, unsigned int shift,
 }
 #endif
 
+static inline struct hstate *folio_hstate(struct folio *folio)
+{
+	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+	return size_to_hstate(folio_size(folio));
+}
+
 static inline struct hstate *page_hstate(struct page *page)
 {
-	VM_BUG_ON_PAGE(!PageHuge(page), page);
-	return size_to_hstate(page_size(page));
+	return folio_hstate(page_folio(page));
 }
 
 static inline unsigned hstate_index_to_shift(unsigned index)
@@ -1036,6 +1041,11 @@ static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
 	return NULL;
 }
 
+static inline struct hstate *folio_hstate(struct folio *folio)
+{
+	return NULL;
+}
+
 static inline struct hstate *page_hstate(struct page *page)
 {
 	return NULL;
diff --git a/mm/migrate.c b/mm/migrate.c
index dff333593a8ae..556cb1c86e534 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1620,7 +1620,7 @@ struct page *alloc_migration_target(struct page *page, unsigned long private)
 		nid = folio_nid(folio);
 
 	if (folio_test_hugetlb(folio)) {
-		struct hstate *h = page_hstate(&folio->page);
+		struct hstate *h = folio_hstate(folio);
 
 		gfp_mask = htlb_modify_alloc_mask(h, gfp_mask);
 		return alloc_huge_page_nodemask(h, nid, mtc->nmask, gfp_mask);
-- 
GitLab


From d8e454eb44473b2270e2675fb44a9d79dee36097 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Fri, 14 Oct 2022 09:39:31 +0800
Subject: [PATCH 1050/4122] mm/rmap: fix comment in anon_vma_clone()

Commit 2555283eb40d ("mm/rmap: Fix anon_vma->degree ambiguity leading to
double-reuse") use num_children and num_active_vmas to replace the origin
degree to fix anon_vma UAF problem.  Update the comment in anon_vma_clone
to fit this change.

Link: https://lkml.kernel.org/r/20221014013931.1565969-1-mawupeng1@huawei.com
Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/rmap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 2ec925e5fa6a9..92ed6fe3d038f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -315,8 +315,8 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 
  enomem_failure:
 	/*
-	 * dst->anon_vma is dropped here otherwise its degree can be incorrectly
-	 * decremented in unlink_anon_vmas().
+	 * dst->anon_vma is dropped here otherwise its num_active_vmas can
+	 * be incorrectly decremented in unlink_anon_vmas().
 	 * We can safely do this because callers of anon_vma_clone() don't care
 	 * about dst->anon_vma if anon_vma_clone() failed.
 	 */
-- 
GitLab


From 3392ca121872dd8c33015c7703d4981c78819be3 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Mon, 17 Oct 2022 09:17:59 -0700
Subject: [PATCH 1051/4122] filemap: find_lock_entries() now updates start
 offset

Patch series "Rework find_get_entries() and find_lock_entries()", v3.

Originally the callers of find_get_entries() and find_lock_entries() were
keeping track of the start index themselves as they traverse the search
range.

This resulted in hacky code such as in shmem_undo_range():

			index = folio->index + folio_nr_pages(folio) - 1;

where the - 1 is only present to stay in the right spot after incrementing
index later.  This sort of calculation was also being done on every folio
despite not even using index later within that function.

These patches change find_get_entries() and find_lock_entries() to
calculate the new index instead of leaving it to the callers so we can
avoid all these complications.


This patch (of 2):

Initially, find_lock_entries() was being passed in the start offset as a
value.  That left the calculation of the offset to the callers.  This led
to complexity in the callers trying to keep track of the index.

Now find_lock_entries() takes in a pointer to the start offset and updates
the value to be directly after the last entry found.  If no entry is
found, the offset is not changed.  This gets rid of multiple hacky
calculations that kept track of the start offset.

Link: https://lkml.kernel.org/r/20221017161800.2003-1-vishal.moola@gmail.com
Link: https://lkml.kernel.org/r/20221017161800.2003-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/filemap.c  | 15 ++++++++++++---
 mm/internal.h |  2 +-
 mm/shmem.c    |  8 ++------
 mm/truncate.c | 11 +++--------
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 08341616ae7a1..3a73b7b8c2a4d 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2085,16 +2085,16 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
  *
  * Return: The number of entries which were found.
  */
-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
 {
-	XA_STATE(xas, &mapping->i_pages, start);
+	XA_STATE(xas, &mapping->i_pages, *start);
 	struct folio *folio;
 
 	rcu_read_lock();
 	while ((folio = find_get_entry(&xas, end, XA_PRESENT))) {
 		if (!xa_is_value(folio)) {
-			if (folio->index < start)
+			if (folio->index < *start)
 				goto put;
 			if (folio->index + folio_nr_pages(folio) - 1 > end)
 				goto put;
@@ -2117,6 +2117,15 @@ put:
 	}
 	rcu_read_unlock();
 
+	if (folio_batch_count(fbatch)) {
+		unsigned long nr = 1;
+		int idx = folio_batch_count(fbatch) - 1;
+
+		folio = fbatch->folios[idx];
+		if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+			nr = folio_nr_pages(folio);
+		*start = indices[idx] + nr;
+	}
 	return folio_batch_count(fbatch);
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 6b7ef495b56d3..c504ac7267e08 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -106,7 +106,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
 	force_page_cache_ra(&ractl, nr_to_read);
 }
 
-unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
diff --git a/mm/shmem.c b/mm/shmem.c
index c1d8b8a1aa3b4..6b560c3915af0 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -922,21 +922,18 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 
 	folio_batch_init(&fbatch);
 	index = start;
-	while (index < end && find_lock_entries(mapping, index, end - 1,
+	while (index < end && find_lock_entries(mapping, &index, end - 1,
 			&fbatch, indices)) {
 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 			folio = fbatch.folios[i];
 
-			index = indices[i];
-
 			if (xa_is_value(folio)) {
 				if (unfalloc)
 					continue;
 				nr_swaps_freed += !shmem_free_swap(mapping,
-								index, folio);
+							indices[i], folio);
 				continue;
 			}
-			index += folio_nr_pages(folio) - 1;
 
 			if (!unfalloc || !folio_test_uptodate(folio))
 				truncate_inode_folio(mapping, folio);
@@ -945,7 +942,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		folio_batch_remove_exceptionals(&fbatch);
 		folio_batch_release(&fbatch);
 		cond_resched();
-		index++;
 	}
 
 	same_folio = (lstart >> PAGE_SHIFT) == (lend >> PAGE_SHIFT);
diff --git a/mm/truncate.c b/mm/truncate.c
index c0be77e5c0083..b6065a494c715 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -361,9 +361,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
 
 	folio_batch_init(&fbatch);
 	index = start;
-	while (index < end && find_lock_entries(mapping, index, end - 1,
+	while (index < end && find_lock_entries(mapping, &index, end - 1,
 			&fbatch, indices)) {
-		index = indices[folio_batch_count(&fbatch) - 1] + 1;
 		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
 		for (i = 0; i < folio_batch_count(&fbatch); i++)
 			truncate_cleanup_folio(fbatch.folios[i]);
@@ -510,20 +509,17 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
 	int i;
 
 	folio_batch_init(&fbatch);
-	while (find_lock_entries(mapping, index, end, &fbatch, indices)) {
+	while (find_lock_entries(mapping, &index, end, &fbatch, indices)) {
 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 			struct folio *folio = fbatch.folios[i];
 
 			/* We rely upon deletion not changing folio->index */
-			index = indices[i];
 
 			if (xa_is_value(folio)) {
 				count += invalidate_exceptional_entry(mapping,
-								      index,
-								      folio);
+							     indices[i], folio);
 				continue;
 			}
-			index += folio_nr_pages(folio) - 1;
 
 			ret = mapping_evict_folio(mapping, folio);
 			folio_unlock(folio);
@@ -542,7 +538,6 @@ unsigned long invalidate_mapping_pagevec(struct address_space *mapping,
 		folio_batch_remove_exceptionals(&fbatch);
 		folio_batch_release(&fbatch);
 		cond_resched();
-		index++;
 	}
 	return count;
 }
-- 
GitLab


From 9fb6beea79c6e7c959adf4fb7b94cf9a6028b941 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Mon, 17 Oct 2022 09:18:00 -0700
Subject: [PATCH 1052/4122] filemap: find_get_entries() now updates start
 offset

Initially, find_get_entries() was being passed in the start offset as a
value.  That left the calculation of the offset to the callers.  This led
to complexity in the callers trying to keep track of the index.

Now find_get_entries() takes in a pointer to the start offset and updates
the value to be directly after the last entry found.  If no entry is
found, the offset is not changed.  This gets rid of multiple hacky
calculations that kept track of the start offset.

Link: https://lkml.kernel.org/r/20221017161800.2003-3-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/filemap.c  | 13 +++++++++++--
 mm/internal.h |  2 +-
 mm/shmem.c    | 11 ++++-------
 mm/truncate.c | 19 +++++++------------
 4 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 3a73b7b8c2a4d..65eee6ec10667 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2048,10 +2048,10 @@ reset:
  *
  * Return: The number of entries which were found.
  */
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices)
 {
-	XA_STATE(xas, &mapping->i_pages, start);
+	XA_STATE(xas, &mapping->i_pages, *start);
 	struct folio *folio;
 
 	rcu_read_lock();
@@ -2062,6 +2062,15 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
 	}
 	rcu_read_unlock();
 
+	if (folio_batch_count(fbatch)) {
+		unsigned long nr = 1;
+		int idx = folio_batch_count(fbatch) - 1;
+
+		folio = fbatch->folios[idx];
+		if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+			nr = folio_nr_pages(folio);
+		*start = indices[idx] + nr;
+	}
 	return folio_batch_count(fbatch);
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index c504ac7267e08..68afdbe7106ed 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -108,7 +108,7 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
 
 unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
-unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
 		pgoff_t end, struct folio_batch *fbatch, pgoff_t *indices);
 void filemap_free_folio(struct address_space *mapping, struct folio *folio);
 int truncate_inode_folio(struct address_space *mapping, struct folio *folio);
diff --git a/mm/shmem.c b/mm/shmem.c
index 6b560c3915af0..9c897cf3fb995 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -973,7 +973,7 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 	while (index < end) {
 		cond_resched();
 
-		if (!find_get_entries(mapping, index, end - 1, &fbatch,
+		if (!find_get_entries(mapping, &index, end - 1, &fbatch,
 				indices)) {
 			/* If all gone or hole-punch or unfalloc, we're done */
 			if (index == start || end != -1)
@@ -985,13 +985,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 			folio = fbatch.folios[i];
 
-			index = indices[i];
 			if (xa_is_value(folio)) {
 				if (unfalloc)
 					continue;
-				if (shmem_free_swap(mapping, index, folio)) {
+				if (shmem_free_swap(mapping, indices[i], folio)) {
 					/* Swap was replaced by page: retry */
-					index--;
+					index = indices[i];
 					break;
 				}
 				nr_swaps_freed++;
@@ -1004,19 +1003,17 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 				if (folio_mapping(folio) != mapping) {
 					/* Page was replaced by swap: retry */
 					folio_unlock(folio);
-					index--;
+					index = indices[i];
 					break;
 				}
 				VM_BUG_ON_FOLIO(folio_test_writeback(folio),
 						folio);
 				truncate_inode_folio(mapping, folio);
 			}
-			index = folio->index + folio_nr_pages(folio) - 1;
 			folio_unlock(folio);
 		}
 		folio_batch_remove_exceptionals(&fbatch);
 		folio_batch_release(&fbatch);
-		index++;
 	}
 
 	spin_lock_irq(&info->lock);
diff --git a/mm/truncate.c b/mm/truncate.c
index b6065a494c715..c7bfd247a651c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -400,7 +400,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
 	index = start;
 	while (index < end) {
 		cond_resched();
-		if (!find_get_entries(mapping, index, end - 1, &fbatch,
+		if (!find_get_entries(mapping, &index, end - 1, &fbatch,
 				indices)) {
 			/* If all gone from start onwards, we're done */
 			if (index == start)
@@ -414,21 +414,18 @@ void truncate_inode_pages_range(struct address_space *mapping,
 			struct folio *folio = fbatch.folios[i];
 
 			/* We rely upon deletion not changing page->index */
-			index = indices[i];
 
 			if (xa_is_value(folio))
 				continue;
 
 			folio_lock(folio);
-			VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
 			folio_wait_writeback(folio);
 			truncate_inode_folio(mapping, folio);
 			folio_unlock(folio);
-			index = folio_index(folio) + folio_nr_pages(folio) - 1;
 		}
 		truncate_folio_batch_exceptionals(mapping, &fbatch, indices);
 		folio_batch_release(&fbatch);
-		index++;
 	}
 }
 EXPORT_SYMBOL(truncate_inode_pages_range);
@@ -636,16 +633,15 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 
 	folio_batch_init(&fbatch);
 	index = start;
-	while (find_get_entries(mapping, index, end, &fbatch, indices)) {
+	while (find_get_entries(mapping, &index, end, &fbatch, indices)) {
 		for (i = 0; i < folio_batch_count(&fbatch); i++) {
 			struct folio *folio = fbatch.folios[i];
 
 			/* We rely upon deletion not changing folio->index */
-			index = indices[i];
 
 			if (xa_is_value(folio)) {
 				if (!invalidate_exceptional_entry2(mapping,
-						index, folio))
+						indices[i], folio))
 					ret = -EBUSY;
 				continue;
 			}
@@ -655,13 +651,13 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 				 * If folio is mapped, before taking its lock,
 				 * zap the rest of the file in one hit.
 				 */
-				unmap_mapping_pages(mapping, index,
-						(1 + end - index), false);
+				unmap_mapping_pages(mapping, indices[i],
+						(1 + end - indices[i]), false);
 				did_range_unmap = 1;
 			}
 
 			folio_lock(folio);
-			VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
+			VM_BUG_ON_FOLIO(!folio_contains(folio, indices[i]), folio);
 			if (folio->mapping != mapping) {
 				folio_unlock(folio);
 				continue;
@@ -684,7 +680,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 		folio_batch_remove_exceptionals(&fbatch);
 		folio_batch_release(&fbatch);
 		cond_resched();
-		index++;
 	}
 	/*
 	 * For DAX we invalidate page tables after invalidating page cache.  We
-- 
GitLab


From 70ec04f3486103819807b061b50a99f6e1d2bf36 Mon Sep 17 00:00:00 2001
From: Uros Bizjak <ubizjak@gmail.com>
Date: Tue, 18 Oct 2022 16:51:54 +0200
Subject: [PATCH 1053/4122] zram: use try_cmpxchg in update_used_max

Use try_cmpxchg instead of cmpxchg (*ptr, old, new) == old in
update_used_max.  x86 CMPXCHG instruction returns success in ZF flag, so
this change saves a compare after cmpxchg (and related move instruction in
front of cmpxchg).

Also, reorder code a bit to remove additional compare and conditional jump
from the assembly code.  Together, hese two changes save 15 bytes from the
function when compiled for x86_64.

No functional change intended.

Link: https://lkml.kernel.org/r/20221018145154.3699-1-ubizjak@gmail.com
Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 966aab902d19a..87711ddf4b54b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -188,16 +188,13 @@ static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
 static inline void update_used_max(struct zram *zram,
 					const unsigned long pages)
 {
-	unsigned long old_max, cur_max;
-
-	old_max = atomic_long_read(&zram->stats.max_used_pages);
+	unsigned long cur_max = atomic_long_read(&zram->stats.max_used_pages);
 
 	do {
-		cur_max = old_max;
-		if (pages > cur_max)
-			old_max = atomic_long_cmpxchg(
-				&zram->stats.max_used_pages, cur_max, pages);
-	} while (old_max != cur_max);
+		if (cur_max >= pages)
+			return;
+	} while (!atomic_long_try_cmpxchg(&zram->stats.max_used_pages,
+					  &cur_max, pages));
 }
 
 static inline void zram_fill_page(void *ptr, unsigned long len,
-- 
GitLab


From 3e0ee843427a573e3e1187a5331e4b7fb00a76f3 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eb@emlix.com>
Date: Fri, 7 Oct 2022 13:37:41 +0200
Subject: [PATCH 1054/4122] mm: fix typo in struct vm_operations_struct
 comments

There is no eprotect(), so I assume this is about mprotect().

Link: https://lkml.kernel.org/r/2385684.8vm7BOzihM@mobilepool36.emlix.com
Signed-off-by: Rolf Eike Beer <eb@emlix.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc55654..f6d2d2d9e284a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -549,7 +549,7 @@ struct vm_operations_struct {
 	/*
 	 * Called by mprotect() to make driver-specific permission
 	 * checks before mprotect() is finalised.   The VMA must not
-	 * be modified.  Returns 0 if eprotect() can proceed.
+	 * be modified.  Returns 0 if mprotect() can proceed.
 	 */
 	int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
 			unsigned long end, unsigned long newflags);
-- 
GitLab


From 6fe7d712d798e9312e3dff69ec3f5f62f4d03a04 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Fri, 7 Oct 2022 10:50:27 +0200
Subject: [PATCH 1055/4122] mm/shmem: remove unneeded assignments in
 shmem_get_folio_gfp()

After the rework of shmem_get_folio_gfp() to use a folio, the local
variable hindex is only needed to be set once before passing it to
shmem_add_to_page_cache().

Remove the unneeded initialization and assignments of the variable hindex
before the actual effective assignment and first use.

No functional change. No change in object code.

Link: https://lkml.kernel.org/r/20221007085027.6309-1-lukas.bulwahn@gmail.com
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/shmem.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index 9c897cf3fb995..57d878b6391d8 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1826,7 +1826,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
 	struct shmem_sb_info *sbinfo;
 	struct mm_struct *charge_mm;
 	struct folio *folio;
-	pgoff_t hindex = index;
+	pgoff_t hindex;
 	gfp_t huge_gfp;
 	int error;
 	int once = 0;
@@ -1864,7 +1864,6 @@ repeat:
 	}
 
 	if (folio) {
-		hindex = folio->index;
 		if (sgp == SGP_WRITE)
 			folio_mark_accessed(folio);
 		if (folio_test_uptodate(folio))
-- 
GitLab


From 97955f6941f0e7dea64dea22711382daf1db2f76 Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Thu, 6 Oct 2022 23:03:45 -0400
Subject: [PATCH 1056/4122] mm/mincore.c: use vma_lookup() instead of
 find_vma()

Using vma_lookup() verifies the start address is contained in the found
vma.  This results in easier to read the code.

Link: https://lkml.kernel.org/r/20221007030345.5029-1-wangdeming@inspur.com
Signed-off-by: Deming Wang <wangdeming@inspur.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mincore.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/mincore.c b/mm/mincore.c
index fa200c14185fc..e7e046fe17d78 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -190,8 +190,8 @@ static long do_mincore(unsigned long addr, unsigned long pages, unsigned char *v
 	unsigned long end;
 	int err;
 
-	vma = find_vma(current->mm, addr);
-	if (!vma || addr < vma->vm_start)
+	vma = vma_lookup(current->mm, addr);
+	if (!vma)
 		return -ENOMEM;
 	end = min(vma->vm_end, addr + (pages << PAGE_SHIFT));
 	if (!can_do_mincore(vma)) {
-- 
GitLab


From 7848ed6284ec4791eba22026e28edb2062790a3d Mon Sep 17 00:00:00 2001
From: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Date: Fri, 30 Sep 2022 19:14:33 +0530
Subject: [PATCH 1057/4122] mm: memcontrol: use mem_cgroup_is_root() helper

Replace the checks for memcg is root memcg, with mem_cgroup_is_root()
helper.

Link: https://lkml.kernel.org/r/20220930134433.338103-1-kamalesh.babulal@oracle.com
Signed-off-by: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kamalesh Babulal <kamalesh.babulal@oracle.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tom Hromatka <tom.hromatka@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memcontrol.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2d8549ae1b300..f264a856ba862 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1219,7 +1219,7 @@ static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
 	 * cgroup root (root_mem_cgroup). So we have to handle
 	 * dead_memcg from cgroup root separately.
 	 */
-	if (last != root_mem_cgroup)
+	if (!mem_cgroup_is_root(last))
 		__invalidate_reclaim_iterators(root_mem_cgroup,
 						dead_memcg);
 }
@@ -1243,7 +1243,7 @@ int mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 	struct mem_cgroup *iter;
 	int ret = 0;
 
-	BUG_ON(memcg == root_mem_cgroup);
+	BUG_ON(mem_cgroup_is_root(memcg));
 
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
@@ -1272,7 +1272,7 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
 	memcg = folio_memcg(folio);
 
 	if (!memcg)
-		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != root_mem_cgroup, folio);
+		VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
 	else
 		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
 }
@@ -2036,7 +2036,7 @@ struct mem_cgroup *mem_cgroup_get_oom_group(struct task_struct *victim,
 	rcu_read_lock();
 
 	memcg = mem_cgroup_from_task(victim);
-	if (memcg == root_mem_cgroup)
+	if (mem_cgroup_is_root(memcg))
 		goto out;
 
 	/*
@@ -2995,7 +2995,7 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg)
 {
 	struct obj_cgroup *objcg = NULL;
 
-	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
 		objcg = rcu_dereference(memcg->objcg);
 		if (objcg && obj_cgroup_tryget(objcg))
 			break;
@@ -7163,7 +7163,7 @@ void mem_cgroup_sk_alloc(struct sock *sk)
 
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
-	if (memcg == root_mem_cgroup)
+	if (mem_cgroup_is_root(memcg))
 		goto out;
 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && !memcg->tcpmem_active)
 		goto out;
@@ -7298,7 +7298,7 @@ static struct mem_cgroup *mem_cgroup_id_get_online(struct mem_cgroup *memcg)
 		 * The root cgroup cannot be destroyed, so it's refcount must
 		 * always be >= 1.
 		 */
-		if (WARN_ON_ONCE(memcg == root_mem_cgroup)) {
+		if (WARN_ON_ONCE(mem_cgroup_is_root(memcg))) {
 			VM_BUG_ON(1);
 			break;
 		}
@@ -7462,7 +7462,7 @@ long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
 
 	if (mem_cgroup_disabled() || do_memsw_account())
 		return nr_swap_pages;
-	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
+	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg))
 		nr_swap_pages = min_t(long, nr_swap_pages,
 				      READ_ONCE(memcg->swap.max) -
 				      page_counter_read(&memcg->swap));
@@ -7484,7 +7484,7 @@ bool mem_cgroup_swap_full(struct folio *folio)
 	if (!memcg)
 		return false;
 
-	for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) {
+	for (; !mem_cgroup_is_root(memcg); memcg = parent_mem_cgroup(memcg)) {
 		unsigned long usage = page_counter_read(&memcg->swap);
 
 		if (usage * 2 >= READ_ONCE(memcg->swap.high) ||
@@ -7648,7 +7648,7 @@ bool obj_cgroup_may_zswap(struct obj_cgroup *objcg)
 		return true;
 
 	original_memcg = get_mem_cgroup_from_objcg(objcg);
-	for (memcg = original_memcg; memcg != root_mem_cgroup;
+	for (memcg = original_memcg; !mem_cgroup_is_root(memcg);
 	     memcg = parent_mem_cgroup(memcg)) {
 		unsigned long max = READ_ONCE(memcg->zswap_max);
 		unsigned long pages;
-- 
GitLab


From a5454f95246aa1d3527ef5e128cd3a10bc8371de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@amadeus.com>
Date: Wed, 28 Sep 2022 12:45:35 +0200
Subject: [PATCH 1058/4122] tmpfs: ensure O_LARGEFILE with generic_file_open()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Without this check open() will open large files on tmpfs although
O_LARGEFILE was not specified.  This is inconsistent with other
filesystems.  Also it will later result in EOVERFLOW on stat() or EFBIG on
write().

Link: https://lore.kernel.org/lkml/76bedae6-22ea-4abc-8c06-b424ceb39217@t-8ch.de/
Link: https://lkml.kernel.org/r/20220928104535.61186-1-linux@weissschuh.net
Signed-off-by: Thomas Weißschuh <thomas.weissschuh@amadeus.com>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/shmem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/shmem.c b/mm/shmem.c
index 57d878b6391d8..0a7c4a748811d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3902,6 +3902,7 @@ EXPORT_SYMBOL(shmem_aops);
 
 static const struct file_operations shmem_file_operations = {
 	.mmap		= shmem_mmap,
+	.open		= generic_file_open,
 	.get_unmapped_area = shmem_get_unmapped_area,
 #ifdef CONFIG_TMPFS
 	.llseek		= shmem_file_llseek,
-- 
GitLab


From 7ce0ea19d50e4e97a8da69f616ffa8afbb532a93 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 27 Sep 2022 19:09:09 +0200
Subject: [PATCH 1059/4122] kasan: switch kunit tests to console tracepoints

Switch KUnit-compatible KASAN tests from using per-task KUnit resources to
console tracepoints.

This allows for two things:

1. Migrating tests that trigger a KASAN report in the context of a task
   other than current to KUnit framework.
   This is implemented in the patches that follow.

2. Parsing and matching the contents of KASAN reports.
   This is not yet implemented.

Link: https://lkml.kernel.org/r/9345acdd11e953b207b0ed4724ff780e63afeb36.1664298455.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.kasan     |  2 +-
 mm/kasan/kasan.h      |  8 ----
 mm/kasan/kasan_test.c | 85 +++++++++++++++++++++++++++++++------------
 mm/kasan/report.c     | 31 ----------------
 4 files changed, 63 insertions(+), 63 deletions(-)

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index ca09b1cf8ee9d..ba5b27962c348 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -181,7 +181,7 @@ config KASAN_VMALLOC
 
 config KASAN_KUNIT_TEST
 	tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS
-	depends on KASAN && KUNIT
+	depends on KASAN && KUNIT && TRACEPOINTS
 	default KUNIT_ALL_TESTS
 	help
 	  A KUnit-based KASAN test suite. Triggers different kinds of
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index abbcc1b0eec50..a84491bc4867e 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -261,14 +261,6 @@ struct kasan_stack_ring {
 
 #endif /* CONFIG_KASAN_SW_TAGS || CONFIG_KASAN_HW_TAGS */
 
-#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
-/* Used in KUnit-compatible KASAN tests. */
-struct kunit_kasan_status {
-	bool report_found;
-	bool sync_fault;
-};
-#endif
-
 #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
 
 static inline const void *kasan_shadow_to_mem(const void *shadow_addr)
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 0d59098f08761..0ff20bfa3376b 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -5,8 +5,12 @@
  * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
  */
 
+#define pr_fmt(fmt) "kasan_test: " fmt
+
+#include <kunit/test.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/io.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
 #include <linux/mm.h>
@@ -14,21 +18,28 @@
 #include <linux/module.h>
 #include <linux/printk.h>
 #include <linux/random.h>
+#include <linux/set_memory.h>
 #include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/tracepoint.h>
 #include <linux/uaccess.h>
-#include <linux/io.h>
 #include <linux/vmalloc.h>
-#include <linux/set_memory.h>
+#include <trace/events/printk.h>
 
 #include <asm/page.h>
 
-#include <kunit/test.h>
-
 #include "kasan.h"
 
 #define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
 
+static bool multishot;
+
+/* Fields set based on lines observed in the console. */
+static struct {
+	bool report_found;
+	bool async_fault;
+} test_status;
+
 /*
  * Some tests use these global variables to store return values from function
  * calls that could otherwise be eliminated by the compiler as dead code.
@@ -36,35 +47,61 @@
 void *kasan_ptr_result;
 int kasan_int_result;
 
-static struct kunit_resource resource;
-static struct kunit_kasan_status test_status;
-static bool multishot;
+/* Probe for console output: obtains test_status lines of interest. */
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+	if (strnstr(buf, "BUG: KASAN: ", len))
+		WRITE_ONCE(test_status.report_found, true);
+	else if (strnstr(buf, "Asynchronous fault: ", len))
+		WRITE_ONCE(test_status.async_fault, true);
+}
 
-/*
- * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the
- * first detected bug and panic the kernel if panic_on_warn is enabled. For
- * hardware tag-based KASAN also allow tag checking to be reenabled for each
- * test, see the comment for KUNIT_EXPECT_KASAN_FAIL().
- */
-static int kasan_test_init(struct kunit *test)
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+	check_trace_callback_type_console(probe_console);
+	if (!strcmp(tp->name, "console"))
+		WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+	if (!strcmp(tp->name, "console"))
+		tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+static int kasan_suite_init(struct kunit_suite *suite)
 {
 	if (!kasan_enabled()) {
-		kunit_err(test, "can't run KASAN tests with KASAN disabled");
+		pr_err("Can't run KASAN tests with KASAN disabled");
 		return -1;
 	}
 
+	/*
+	 * Temporarily enable multi-shot mode. Otherwise, KASAN would only
+	 * report the first detected bug and panic the kernel if panic_on_warn
+	 * is enabled.
+	 */
 	multishot = kasan_save_enable_multi_shot();
-	test_status.report_found = false;
-	test_status.sync_fault = false;
-	kunit_add_named_resource(test, NULL, NULL, &resource,
-					"kasan_status", &test_status);
+
+	/*
+	 * Because we want to be able to build the test as a module, we need to
+	 * iterate through all known tracepoints, since the static registration
+	 * won't work here.
+	 */
+	for_each_kernel_tracepoint(register_tracepoints, NULL);
 	return 0;
 }
 
-static void kasan_test_exit(struct kunit *test)
+static void kasan_suite_exit(struct kunit_suite *suite)
 {
 	kasan_restore_multi_shot(multishot);
-	KUNIT_EXPECT_FALSE(test, test_status.report_found);
+	for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+	tracepoint_synchronize_unregister();
+}
+
+static void kasan_test_exit(struct kunit *test)
+{
+	KUNIT_EXPECT_FALSE(test, READ_ONCE(test_status.report_found));
 }
 
 /**
@@ -106,11 +143,12 @@ static void kasan_test_exit(struct kunit *test)
 	if (IS_ENABLED(CONFIG_KASAN_HW_TAGS) &&				\
 	    kasan_sync_fault_possible()) {				\
 		if (READ_ONCE(test_status.report_found) &&		\
-		    READ_ONCE(test_status.sync_fault))			\
+		    !READ_ONCE(test_status.async_fault))		\
 			kasan_enable_tagging();				\
 		migrate_enable();					\
 	}								\
 	WRITE_ONCE(test_status.report_found, false);			\
+	WRITE_ONCE(test_status.async_fault, false);			\
 } while (0)
 
 #define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do {			\
@@ -1447,9 +1485,10 @@ static struct kunit_case kasan_kunit_test_cases[] = {
 
 static struct kunit_suite kasan_kunit_test_suite = {
 	.name = "kasan",
-	.init = kasan_test_init,
 	.test_cases = kasan_kunit_test_cases,
 	.exit = kasan_test_exit,
+	.suite_init = kasan_suite_init,
+	.suite_exit = kasan_suite_exit,
 };
 
 kunit_test_suite(kasan_kunit_test_suite);
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index df3602062bfd6..31355851a5ec3 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -30,8 +30,6 @@
 
 #include <asm/sections.h>
 
-#include <kunit/test.h>
-
 #include "kasan.h"
 #include "../slab.h"
 
@@ -114,41 +112,12 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
 
 #endif
 
-#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
-static void update_kunit_status(bool sync)
-{
-	struct kunit *test;
-	struct kunit_resource *resource;
-	struct kunit_kasan_status *status;
-
-	test = current->kunit_test;
-	if (!test)
-		return;
-
-	resource = kunit_find_named_resource(test, "kasan_status");
-	if (!resource) {
-		kunit_set_failure(test);
-		return;
-	}
-
-	status = (struct kunit_kasan_status *)resource->data;
-	WRITE_ONCE(status->report_found, true);
-	WRITE_ONCE(status->sync_fault, sync);
-
-	kunit_put_resource(resource);
-}
-#else
-static void update_kunit_status(bool sync) { }
-#endif
-
 static DEFINE_SPINLOCK(report_lock);
 
 static void start_report(unsigned long *flags, bool sync)
 {
 	/* Respect the /proc/sys/kernel/traceoff_on_warning interface. */
 	disable_trace_on_warning();
-	/* Update status of the currently running KASAN test. */
-	update_kunit_status(sync);
 	/* Do not allow LOCKDEP mangling KASAN reports. */
 	lockdep_off();
 	/* Make sure we don't end up in loop. */
-- 
GitLab


From 8516e837cab0b2c740b90603b66039aa7dcecda4 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 27 Sep 2022 19:09:10 +0200
Subject: [PATCH 1060/4122] kasan: migrate kasan_rcu_uaf test to kunit

Migrate the kasan_rcu_uaf test to the KUnit framework.

Changes to the implementation of the test:

- Call rcu_barrier() after call_rcu() to make that the RCU callbacks get
  triggered before the test is over.

- Cast pointer passed to rcu_dereference_protected as __rcu to get rid of
  the Sparse warning.

- Check that KASAN prints a report via KUNIT_EXPECT_KASAN_FAIL.

Initially, this test was intended to check that Generic KASAN prints
auxiliary stack traces for RCU objects. Nevertheless, the test is enabled
for all modes to make that KASAN reports bad accesses in RCU callbacks.

The presence of auxiliary stack traces for the Generic mode needs to be
inspected manually.

Link: https://lkml.kernel.org/r/897ee08d6cd0ba7e8a4fbfd9d8502823a2f922e6.1664298455.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kasan/kasan_test.c        | 37 ++++++++++++++++++++++++++++++++++++
 mm/kasan/kasan_test_module.c | 30 -----------------------------
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 0ff20bfa3376b..38bf6ed61cb87 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -1141,6 +1141,42 @@ static void kmalloc_double_kzfree(struct kunit *test)
 	KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
 }
 
+static struct kasan_rcu_info {
+	int i;
+	struct rcu_head rcu;
+} *global_rcu_ptr;
+
+static void rcu_uaf_reclaim(struct rcu_head *rp)
+{
+	struct kasan_rcu_info *fp =
+		container_of(rp, struct kasan_rcu_info, rcu);
+
+	kfree(fp);
+	((volatile struct kasan_rcu_info *)fp)->i;
+}
+
+/*
+ * Check that Generic KASAN prints auxiliary stack traces for RCU callbacks.
+ * The report needs to be inspected manually.
+ *
+ * This test is still enabled for other KASAN modes to make sure that all modes
+ * report bad accesses in tested scenarios.
+ */
+static void rcu_uaf(struct kunit *test)
+{
+	struct kasan_rcu_info *ptr;
+
+	ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+	global_rcu_ptr = rcu_dereference_protected(
+				(struct kasan_rcu_info __rcu *)ptr, NULL);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		call_rcu(&global_rcu_ptr->rcu, rcu_uaf_reclaim);
+		rcu_barrier());
+}
+
 static void vmalloc_helpers_tags(struct kunit *test)
 {
 	void *ptr;
@@ -1472,6 +1508,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
 	KUNIT_CASE(kasan_bitops_generic),
 	KUNIT_CASE(kasan_bitops_tags),
 	KUNIT_CASE(kmalloc_double_kzfree),
+	KUNIT_CASE(rcu_uaf),
 	KUNIT_CASE(vmalloc_helpers_tags),
 	KUNIT_CASE(vmalloc_oob),
 	KUNIT_CASE(vmap_tags),
diff --git a/mm/kasan/kasan_test_module.c b/mm/kasan/kasan_test_module.c
index e4ca82dc2c16d..4688cbcd722d4 100644
--- a/mm/kasan/kasan_test_module.c
+++ b/mm/kasan/kasan_test_module.c
@@ -62,35 +62,6 @@ static noinline void __init copy_user_test(void)
 	kfree(kmem);
 }
 
-static struct kasan_rcu_info {
-	int i;
-	struct rcu_head rcu;
-} *global_rcu_ptr;
-
-static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
-{
-	struct kasan_rcu_info *fp = container_of(rp,
-						struct kasan_rcu_info, rcu);
-
-	kfree(fp);
-	((volatile struct kasan_rcu_info *)fp)->i;
-}
-
-static noinline void __init kasan_rcu_uaf(void)
-{
-	struct kasan_rcu_info *ptr;
-
-	pr_info("use-after-free in kasan_rcu_reclaim\n");
-	ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	global_rcu_ptr = rcu_dereference_protected(ptr, NULL);
-	call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim);
-}
-
 static noinline void __init kasan_workqueue_work(struct work_struct *work)
 {
 	kfree(work);
@@ -130,7 +101,6 @@ static int __init test_kasan_module_init(void)
 	bool multishot = kasan_save_enable_multi_shot();
 
 	copy_user_test();
-	kasan_rcu_uaf();
 	kasan_workqueue_uaf();
 
 	kasan_restore_multi_shot(multishot);
-- 
GitLab


From b2c5bd4c69ce28500ed2176d11002a4e9b30da36 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Tue, 27 Sep 2022 19:09:11 +0200
Subject: [PATCH 1061/4122] kasan: migrate workqueue_uaf test to kunit

Migrate the workqueue_uaf test to the KUnit framework.

Initially, this test was intended to check that Generic KASAN prints
auxiliary stack traces for workqueues.  Nevertheless, the test is enabled
for all modes to make that KASAN reports bad accesses in the tested
scenario.

The presence of auxiliary stack traces for the Generic mode needs to be
inspected manually.

Link: https://lkml.kernel.org/r/1d81b6cc2a58985126283d1e0de8e663716dd930.1664298455.git.andreyknvl@google.com
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kasan/kasan_test.c        | 40 +++++++++++++++++++++++++++++-------
 mm/kasan/kasan_test_module.c | 30 ---------------------------
 2 files changed, 33 insertions(+), 37 deletions(-)

diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 38bf6ed61cb87..e27591ef2777f 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -1141,6 +1141,14 @@ static void kmalloc_double_kzfree(struct kunit *test)
 	KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
 }
 
+/*
+ * The two tests below check that Generic KASAN prints auxiliary stack traces
+ * for RCU callbacks and workqueues. The reports need to be inspected manually.
+ *
+ * These tests are still enabled for other KASAN modes to make sure that all
+ * modes report bad accesses in tested scenarios.
+ */
+
 static struct kasan_rcu_info {
 	int i;
 	struct rcu_head rcu;
@@ -1155,13 +1163,6 @@ static void rcu_uaf_reclaim(struct rcu_head *rp)
 	((volatile struct kasan_rcu_info *)fp)->i;
 }
 
-/*
- * Check that Generic KASAN prints auxiliary stack traces for RCU callbacks.
- * The report needs to be inspected manually.
- *
- * This test is still enabled for other KASAN modes to make sure that all modes
- * report bad accesses in tested scenarios.
- */
 static void rcu_uaf(struct kunit *test)
 {
 	struct kasan_rcu_info *ptr;
@@ -1177,6 +1178,30 @@ static void rcu_uaf(struct kunit *test)
 		rcu_barrier());
 }
 
+static void workqueue_uaf_work(struct work_struct *work)
+{
+	kfree(work);
+}
+
+static void workqueue_uaf(struct kunit *test)
+{
+	struct workqueue_struct *workqueue;
+	struct work_struct *work;
+
+	workqueue = create_workqueue("kasan_workqueue_test");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, workqueue);
+
+	work = kmalloc(sizeof(struct work_struct), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, work);
+
+	INIT_WORK(work, workqueue_uaf_work);
+	queue_work(workqueue, work);
+	destroy_workqueue(workqueue);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		((volatile struct work_struct *)work)->data);
+}
+
 static void vmalloc_helpers_tags(struct kunit *test)
 {
 	void *ptr;
@@ -1509,6 +1534,7 @@ static struct kunit_case kasan_kunit_test_cases[] = {
 	KUNIT_CASE(kasan_bitops_tags),
 	KUNIT_CASE(kmalloc_double_kzfree),
 	KUNIT_CASE(rcu_uaf),
+	KUNIT_CASE(workqueue_uaf),
 	KUNIT_CASE(vmalloc_helpers_tags),
 	KUNIT_CASE(vmalloc_oob),
 	KUNIT_CASE(vmap_tags),
diff --git a/mm/kasan/kasan_test_module.c b/mm/kasan/kasan_test_module.c
index 4688cbcd722d4..7be7bed456ef5 100644
--- a/mm/kasan/kasan_test_module.c
+++ b/mm/kasan/kasan_test_module.c
@@ -62,35 +62,6 @@ static noinline void __init copy_user_test(void)
 	kfree(kmem);
 }
 
-static noinline void __init kasan_workqueue_work(struct work_struct *work)
-{
-	kfree(work);
-}
-
-static noinline void __init kasan_workqueue_uaf(void)
-{
-	struct workqueue_struct *workqueue;
-	struct work_struct *work;
-
-	workqueue = create_workqueue("kasan_wq_test");
-	if (!workqueue) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-	work = kmalloc(sizeof(struct work_struct), GFP_KERNEL);
-	if (!work) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	INIT_WORK(work, kasan_workqueue_work);
-	queue_work(workqueue, work);
-	destroy_workqueue(workqueue);
-
-	pr_info("use-after-free on workqueue\n");
-	((volatile struct work_struct *)work)->data;
-}
-
 static int __init test_kasan_module_init(void)
 {
 	/*
@@ -101,7 +72,6 @@ static int __init test_kasan_module_init(void)
 	bool multishot = kasan_save_enable_multi_shot();
 
 	copy_user_test();
-	kasan_workqueue_uaf();
 
 	kasan_restore_multi_shot(multishot);
 	return -EAGAIN;
-- 
GitLab


From 69c66add566395eaf4c08cb5975b45dec70dbe85 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:14 +0200
Subject: [PATCH 1062/4122] selftests/vm: anon_cow: test COW handling of
 anonymous memory

Patch series "selftests/vm: test COW handling of anonymous memory".

This is my current set of tests for testing COW handling of anonymous
memory, especially when interacting with GUP.  I developed these tests
while working on PageAnonExclusive and managed to clean them up just now.

On current upstream Linux, all tests pass except the hugetlb tests that
rely on vmsplice -- these tests should pass as soon as vmsplice properly
uses FOLL_PIN instead of FOLL_GET.

I'm working on additional tests for COW handling in private mappings,
focusing on long-term R/O pinning e.g., of the shared zeropage, pagecache
pages and KSM pages.  These tests, however, will go into a different file.
So this is everything I have regarding tests for anonymous memory.


This patch (of 7):

Let's start adding tests for our COW handling of anonymous memory.  We'll
focus on basic tests that we can achieve without additional libraries or
gup_test extensions.

We'll add THP and hugetlb tests separately.

[david@redhat.com: s/size_t/ssize_t/ on `cur', `total', `transferred';]
  Link: https://lkml.kernel.org/r/51302b9e-dc69-d709-3214-f23868028555@redhat.com
Link: https://lkml.kernel.org/r/20220927110120.106906-1-david@redhat.com
Link: https://lkml.kernel.org/r/20220927110120.106906-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore     |   1 +
 tools/testing/selftests/vm/Makefile       |   4 +-
 tools/testing/selftests/vm/anon_cow.c     | 401 ++++++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests.sh |   3 +
 tools/testing/selftests/vm/vm_util.c      |   7 +
 tools/testing/selftests/vm/vm_util.h      |   1 +
 6 files changed, 416 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/vm/anon_cow.c

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 7b9dc2426f189..8a536c731e3c2 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+anon_cow
 hugepage-mmap
 hugepage-mremap
 hugepage-shm
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 163c2fde3cb3a..ad07d7a84c3ed 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -25,7 +25,8 @@ MAKEFLAGS += --no-builtin-rules
 
 CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
 LDLIBS = -lrt -lpthread
-TEST_GEN_FILES = compaction_test
+TEST_GEN_FILES = anon_cow
+TEST_GEN_FILES += compaction_test
 TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
 TEST_GEN_FILES += hugetlb-madvise
@@ -95,6 +96,7 @@ TEST_FILES += va_128TBswitch.sh
 
 include ../lib.mk
 
+$(OUTPUT)/anon_cow: vm_util.c
 $(OUTPUT)/khugepaged: vm_util.c
 $(OUTPUT)/madv_populate: vm_util.c
 $(OUTPUT)/soft-dirty: vm_util.c
diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
new file mode 100644
index 0000000000000..4613294af7582
--- /dev/null
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * COW (Copy On Write) tests for anonymous memory.
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static int pagemap_fd;
+
+struct comm_pipes {
+	int child_ready[2];
+	int parent_ready[2];
+};
+
+static int setup_comm_pipes(struct comm_pipes *comm_pipes)
+{
+	if (pipe(comm_pipes->child_ready) < 0)
+		return -errno;
+	if (pipe(comm_pipes->parent_ready) < 0) {
+		close(comm_pipes->child_ready[0]);
+		close(comm_pipes->child_ready[1]);
+		return -errno;
+	}
+
+	return 0;
+}
+
+static void close_comm_pipes(struct comm_pipes *comm_pipes)
+{
+	close(comm_pipes->child_ready[0]);
+	close(comm_pipes->child_ready[1]);
+	close(comm_pipes->parent_ready[0]);
+	close(comm_pipes->parent_ready[1]);
+}
+
+static int child_memcmp_fn(char *mem, size_t size,
+			   struct comm_pipes *comm_pipes)
+{
+	char *old = malloc(size);
+	char buf;
+
+	/* Backup the original content. */
+	memcpy(old, mem, size);
+
+	/* Wait until the parent modified the page. */
+	write(comm_pipes->child_ready[1], "0", 1);
+	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+		;
+
+	/* See if we still read the old values. */
+	return memcmp(old, mem, size);
+}
+
+static int child_vmsplice_memcmp_fn(char *mem, size_t size,
+				    struct comm_pipes *comm_pipes)
+{
+	struct iovec iov = {
+		.iov_base = mem,
+		.iov_len = size,
+	};
+	ssize_t cur, total, transferred;
+	char *old, *new;
+	int fds[2];
+	char buf;
+
+	old = malloc(size);
+	new = malloc(size);
+
+	/* Backup the original content. */
+	memcpy(old, mem, size);
+
+	if (pipe(fds) < 0)
+		return -errno;
+
+	/* Trigger a read-only pin. */
+	transferred = vmsplice(fds[1], &iov, 1, 0);
+	if (transferred < 0)
+		return -errno;
+	if (transferred == 0)
+		return -EINVAL;
+
+	/* Unmap it from our page tables. */
+	if (munmap(mem, size) < 0)
+		return -errno;
+
+	/* Wait until the parent modified it. */
+	write(comm_pipes->child_ready[1], "0", 1);
+	while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+		;
+
+	/* See if we still read the old values via the pipe. */
+	for (total = 0; total < transferred; total += cur) {
+		cur = read(fds[0], new + total, transferred - total);
+		if (cur < 0)
+			return -errno;
+	}
+
+	return memcmp(old, new, transferred);
+}
+
+typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
+
+static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn)
+{
+	struct comm_pipes comm_pipes;
+	char buf;
+	int ret;
+
+	ret = setup_comm_pipes(&comm_pipes);
+	if (ret) {
+		ksft_test_result_fail("pipe() failed\n");
+		return;
+	}
+
+	ret = fork();
+	if (ret < 0) {
+		ksft_test_result_fail("fork() failed\n");
+		goto close_comm_pipes;
+	} else if (!ret) {
+		exit(fn(mem, size, &comm_pipes));
+	}
+
+	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+		;
+	/* Modify the page. */
+	memset(mem, 0xff, size);
+	write(comm_pipes.parent_ready[1], "0", 1);
+
+	wait(&ret);
+	if (WIFEXITED(ret))
+		ret = WEXITSTATUS(ret);
+	else
+		ret = -EINVAL;
+
+	ksft_test_result(!ret, "No leak from parent into child\n");
+close_comm_pipes:
+	close_comm_pipes(&comm_pipes);
+}
+
+static void test_cow_in_parent(char *mem, size_t size)
+{
+	do_test_cow_in_parent(mem, size, child_memcmp_fn);
+}
+
+static void test_vmsplice_in_child(char *mem, size_t size)
+{
+	do_test_cow_in_parent(mem, size, child_vmsplice_memcmp_fn);
+}
+
+static void do_test_vmsplice_in_parent(char *mem, size_t size,
+				       bool before_fork)
+{
+	struct iovec iov = {
+		.iov_base = mem,
+		.iov_len = size,
+	};
+	ssize_t cur, total, transferred;
+	struct comm_pipes comm_pipes;
+	char *old, *new;
+	int ret, fds[2];
+	char buf;
+
+	old = malloc(size);
+	new = malloc(size);
+
+	memcpy(old, mem, size);
+
+	ret = setup_comm_pipes(&comm_pipes);
+	if (ret) {
+		ksft_test_result_fail("pipe() failed\n");
+		goto free;
+	}
+
+	if (pipe(fds) < 0) {
+		ksft_test_result_fail("pipe() failed\n");
+		goto close_comm_pipes;
+	}
+
+	if (before_fork) {
+		transferred = vmsplice(fds[1], &iov, 1, 0);
+		if (transferred <= 0) {
+			ksft_test_result_fail("vmsplice() failed\n");
+			goto close_pipe;
+		}
+	}
+
+	ret = fork();
+	if (ret < 0) {
+		ksft_test_result_fail("fork() failed\n");
+		goto close_pipe;
+	} else if (!ret) {
+		write(comm_pipes.child_ready[1], "0", 1);
+		while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+			;
+		/* Modify page content in the child. */
+		memset(mem, 0xff, size);
+		exit(0);
+	}
+
+	if (!before_fork) {
+		transferred = vmsplice(fds[1], &iov, 1, 0);
+		if (transferred <= 0) {
+			ksft_test_result_fail("vmsplice() failed\n");
+			wait(&ret);
+			goto close_pipe;
+		}
+	}
+
+	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+		;
+	if (munmap(mem, size) < 0) {
+		ksft_test_result_fail("munmap() failed\n");
+		goto close_pipe;
+	}
+	write(comm_pipes.parent_ready[1], "0", 1);
+
+	/* Wait until the child is done writing. */
+	wait(&ret);
+	if (!WIFEXITED(ret)) {
+		ksft_test_result_fail("wait() failed\n");
+		goto close_pipe;
+	}
+
+	/* See if we still read the old values. */
+	for (total = 0; total < transferred; total += cur) {
+		cur = read(fds[0], new + total, transferred - total);
+		if (cur < 0) {
+			ksft_test_result_fail("read() failed\n");
+			goto close_pipe;
+		}
+	}
+
+	ksft_test_result(!memcmp(old, new, transferred),
+			 "No leak from child into parent\n");
+close_pipe:
+	close(fds[0]);
+	close(fds[1]);
+close_comm_pipes:
+	close_comm_pipes(&comm_pipes);
+free:
+	free(old);
+	free(new);
+}
+
+static void test_vmsplice_before_fork(char *mem, size_t size)
+{
+	do_test_vmsplice_in_parent(mem, size, true);
+}
+
+static void test_vmsplice_after_fork(char *mem, size_t size)
+{
+	do_test_vmsplice_in_parent(mem, size, false);
+}
+
+typedef void (*test_fn)(char *mem, size_t size);
+
+static void do_run_with_base_page(test_fn fn, bool swapout)
+{
+	char *mem;
+	int ret;
+
+	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		return;
+	}
+
+	ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
+	/* Ignore if not around on a kernel. */
+	if (ret && errno != EINVAL) {
+		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+		goto munmap;
+	}
+
+	/* Populate a base page. */
+	memset(mem, 0, pagesize);
+
+	if (swapout) {
+		madvise(mem, pagesize, MADV_PAGEOUT);
+		if (!pagemap_is_swapped(pagemap_fd, mem)) {
+			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+			goto munmap;
+		}
+	}
+
+	fn(mem, pagesize);
+munmap:
+	munmap(mem, pagesize);
+}
+
+static void run_with_base_page(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with base page\n", desc);
+	do_run_with_base_page(fn, false);
+}
+
+static void run_with_base_page_swap(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
+	do_run_with_base_page(fn, true);
+}
+
+struct test_case {
+	const char *desc;
+	test_fn fn;
+};
+
+static const struct test_case test_cases[] = {
+	/*
+	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
+	 * either the child can observe modifications by the parent or the
+	 * other way around.
+	 */
+	{
+		"Basic COW after fork()",
+		test_cow_in_parent,
+	},
+	/*
+	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
+	 * we miss to break COW, the child observes modifications by the parent.
+	 * This is CVE-2020-29374 reported by Jann Horn.
+	 */
+	{
+		"vmsplice() + unmap in child",
+		test_vmsplice_in_child
+	},
+	/*
+	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
+	 * fork(); modify in the child. If we miss to break COW, the parent
+	 * observes modifications by the child.
+	 */
+	{
+		"vmsplice() before fork(), unmap in parent after fork()",
+		test_vmsplice_before_fork,
+	},
+	/*
+	 * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
+	 * child. If we miss to break COW, the parent observes modifications by
+	 * the child.
+	 */
+	{
+		"vmsplice() + unmap in parent after fork()",
+		test_vmsplice_after_fork,
+	},
+};
+
+static void run_test_case(struct test_case const *test_case)
+{
+	run_with_base_page(test_case->fn, test_case->desc);
+	run_with_base_page_swap(test_case->fn, test_case->desc);
+}
+
+static void run_test_cases(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		run_test_case(&test_cases[i]);
+}
+
+int main(int argc, char **argv)
+{
+	int nr_test_cases = ARRAY_SIZE(test_cases);
+	int err;
+
+	pagesize = getpagesize();
+
+	ksft_print_header();
+	ksft_set_plan(nr_test_cases * 2);
+
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	if (pagemap_fd < 0)
+		ksft_exit_fail_msg("opening pagemap failed\n");
+
+	run_test_cases();
+
+	err = ksft_get_fail_cnt();
+	if (err)
+		ksft_exit_fail_msg("%d out of %d tests failed\n",
+				   err, ksft_test_num());
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 0dc9f545a32d6..1fa783732296e 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -185,4 +185,7 @@ fi
 
 run_test ./soft-dirty
 
+# COW tests for anonymous memory
+run_test ./anon_cow
+
 exit $exitcode
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
index f11f8adda5218..37dd230673eea 100644
--- a/tools/testing/selftests/vm/vm_util.c
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -28,6 +28,13 @@ bool pagemap_is_softdirty(int fd, char *start)
 	return entry & 0x0080000000000000ull;
 }
 
+bool pagemap_is_swapped(int fd, char *start)
+{
+	uint64_t entry = pagemap_get_entry(fd, start);
+
+	return entry & 0x4000000000000000ull;
+}
+
 void clear_softdirty(void)
 {
 	int ret;
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
index 5c35de454e08f..833df81b26945 100644
--- a/tools/testing/selftests/vm/vm_util.h
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -4,6 +4,7 @@
 
 uint64_t pagemap_get_entry(int fd, char *start);
 bool pagemap_is_softdirty(int fd, char *start);
+bool pagemap_is_swapped(int fd, char *start);
 void clear_softdirty(void);
 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
 uint64_t read_pmd_pagesize(void);
-- 
GitLab


From a905e82ae44b22a25ea73415a3ec1228775eb9a9 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:15 +0200
Subject: [PATCH 1063/4122] selftests/vm: factor out pagemap_is_populated()
 into vm_util

We'll reuse it in the anon_cow test next.

Link: https://lkml.kernel.org/r/20220927110120.106906-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/madv_populate.c | 8 --------
 tools/testing/selftests/vm/vm_util.c       | 8 ++++++++
 tools/testing/selftests/vm/vm_util.h       | 1 +
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
index 715a42e8e2cdb..60547245e479b 100644
--- a/tools/testing/selftests/vm/madv_populate.c
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -27,14 +27,6 @@
 
 static size_t pagesize;
 
-static bool pagemap_is_populated(int fd, char *start)
-{
-	uint64_t entry = pagemap_get_entry(fd, start);
-
-	/* Present or swapped. */
-	return entry & 0xc000000000000000ull;
-}
-
 static void sense_support(void)
 {
 	char *addr;
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
index 37dd230673eea..5bbf7641a0f07 100644
--- a/tools/testing/selftests/vm/vm_util.c
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -35,6 +35,14 @@ bool pagemap_is_swapped(int fd, char *start)
 	return entry & 0x4000000000000000ull;
 }
 
+bool pagemap_is_populated(int fd, char *start)
+{
+	uint64_t entry = pagemap_get_entry(fd, start);
+
+	/* Present or swapped. */
+	return entry & 0xc000000000000000ull;
+}
+
 void clear_softdirty(void)
 {
 	int ret;
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
index 833df81b26945..80d5a6ad413b1 100644
--- a/tools/testing/selftests/vm/vm_util.h
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -5,6 +5,7 @@
 uint64_t pagemap_get_entry(int fd, char *start);
 bool pagemap_is_softdirty(int fd, char *start);
 bool pagemap_is_swapped(int fd, char *start);
+bool pagemap_is_populated(int fd, char *start);
 void clear_softdirty(void);
 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
 uint64_t read_pmd_pagesize(void);
-- 
GitLab


From f4b5fd6946e244cdedc3bbb9a1f24c8133b2077a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:16 +0200
Subject: [PATCH 1064/4122] selftests/vm: anon_cow: THP tests

Let's add various THP variants that we'll run with our existing test
cases.

Link: https://lkml.kernel.org/r/20220927110120.106906-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/anon_cow.c | 259 +++++++++++++++++++++++++-
 1 file changed, 258 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
index 4613294af7582..c1681c9d255f3 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -24,6 +24,43 @@
 
 static size_t pagesize;
 static int pagemap_fd;
+static size_t thpsize;
+
+static void detect_thpsize(void)
+{
+	int fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
+		      O_RDONLY);
+	size_t size = 0;
+	char buf[15];
+	int ret;
+
+	if (fd < 0)
+		return;
+
+	ret = pread(fd, buf, sizeof(buf), 0);
+	if (ret > 0 && ret < sizeof(buf)) {
+		buf[ret] = 0;
+
+		size = strtoul(buf, NULL, 10);
+		if (size < pagesize)
+			size = 0;
+		if (size > 0) {
+			thpsize = size;
+			ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
+				       thpsize / 1024);
+		}
+	}
+
+	close(fd);
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+	for (; size; addr += pagesize, size -= pagesize)
+		if (!pagemap_is_swapped(pagemap_fd, addr))
+			return false;
+	return true;
+}
 
 struct comm_pipes {
 	int child_ready[2];
@@ -319,6 +356,206 @@ static void run_with_base_page_swap(test_fn fn, const char *desc)
 	do_run_with_base_page(fn, true);
 }
 
+enum thp_run {
+	THP_RUN_PMD,
+	THP_RUN_PMD_SWAPOUT,
+	THP_RUN_PTE,
+	THP_RUN_PTE_SWAPOUT,
+	THP_RUN_SINGLE_PTE,
+	THP_RUN_SINGLE_PTE_SWAPOUT,
+	THP_RUN_PARTIAL_MREMAP,
+	THP_RUN_PARTIAL_SHARED,
+};
+
+static void do_run_with_thp(test_fn fn, enum thp_run thp_run)
+{
+	char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
+	size_t size, mmap_size, mremap_size;
+	int ret;
+
+	/* For alignment purposes, we need twice the thp size. */
+	mmap_size = 2 * thpsize;
+	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mmap_mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		return;
+	}
+
+	/* We need a THP-aligned memory area. */
+	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+
+	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
+	if (ret) {
+		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+		goto munmap;
+	}
+
+	/*
+	 * Try to populate a THP. Touch the first sub-page and test if we get
+	 * another sub-page populated automatically.
+	 */
+	mem[0] = 0;
+	if (!pagemap_is_populated(pagemap_fd, mem + pagesize)) {
+		ksft_test_result_skip("Did not get a THP populated\n");
+		goto munmap;
+	}
+	memset(mem, 0, thpsize);
+
+	size = thpsize;
+	switch (thp_run) {
+	case THP_RUN_PMD:
+	case THP_RUN_PMD_SWAPOUT:
+		break;
+	case THP_RUN_PTE:
+	case THP_RUN_PTE_SWAPOUT:
+		/*
+		 * Trigger PTE-mapping the THP by temporarily mapping a single
+		 * subpage R/O.
+		 */
+		ret = mprotect(mem + pagesize, pagesize, PROT_READ);
+		if (ret) {
+			ksft_test_result_fail("mprotect() failed\n");
+			goto munmap;
+		}
+		ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
+		if (ret) {
+			ksft_test_result_fail("mprotect() failed\n");
+			goto munmap;
+		}
+		break;
+	case THP_RUN_SINGLE_PTE:
+	case THP_RUN_SINGLE_PTE_SWAPOUT:
+		/*
+		 * Discard all but a single subpage of that PTE-mapped THP. What
+		 * remains is a single PTE mapping a single subpage.
+		 */
+		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
+		if (ret) {
+			ksft_test_result_fail("MADV_DONTNEED failed\n");
+			goto munmap;
+		}
+		size = pagesize;
+		break;
+	case THP_RUN_PARTIAL_MREMAP:
+		/*
+		 * Remap half of the THP. We need some new memory location
+		 * for that.
+		 */
+		mremap_size = thpsize / 2;
+		mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
+				  MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (mem == MAP_FAILED) {
+			ksft_test_result_fail("mmap() failed\n");
+			goto munmap;
+		}
+		tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
+			     MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
+		if (tmp != mremap_mem) {
+			ksft_test_result_fail("mremap() failed\n");
+			goto munmap;
+		}
+		size = mremap_size;
+		break;
+	case THP_RUN_PARTIAL_SHARED:
+		/*
+		 * Share the first page of the THP with a child and quit the
+		 * child. This will result in some parts of the THP never
+		 * have been shared.
+		 */
+		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
+		if (ret) {
+			ksft_test_result_fail("MADV_DONTFORK failed\n");
+			goto munmap;
+		}
+		ret = fork();
+		if (ret < 0) {
+			ksft_test_result_fail("fork() failed\n");
+			goto munmap;
+		} else if (!ret) {
+			exit(0);
+		}
+		wait(&ret);
+		/* Allow for sharing all pages again. */
+		ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
+		if (ret) {
+			ksft_test_result_fail("MADV_DOFORK failed\n");
+			goto munmap;
+		}
+		break;
+	default:
+		assert(false);
+	}
+
+	switch (thp_run) {
+	case THP_RUN_PMD_SWAPOUT:
+	case THP_RUN_PTE_SWAPOUT:
+	case THP_RUN_SINGLE_PTE_SWAPOUT:
+		madvise(mem, size, MADV_PAGEOUT);
+		if (!range_is_swapped(mem, size)) {
+			ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+			goto munmap;
+		}
+		break;
+	default:
+		break;
+	}
+
+	fn(mem, size);
+munmap:
+	munmap(mmap_mem, mmap_size);
+	if (mremap_mem != MAP_FAILED)
+		munmap(mremap_mem, mremap_size);
+}
+
+static void run_with_thp(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PMD);
+}
+
+static void run_with_thp_swap(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with swapped-out THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT);
+}
+
+static void run_with_pte_mapped_thp(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with PTE-mapped THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PTE);
+}
+
+static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT);
+}
+
+static void run_with_single_pte_of_thp(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with single PTE of THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_SINGLE_PTE);
+}
+
+static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT);
+}
+
+static void run_with_partial_mremap_thp(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP);
+}
+
+static void run_with_partial_shared_thp(test_fn fn, const char *desc)
+{
+	ksft_print_msg("[RUN] %s ... with partially shared THP\n", desc);
+	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
+}
+
 struct test_case {
 	const char *desc;
 	test_fn fn;
@@ -367,6 +604,16 @@ static void run_test_case(struct test_case const *test_case)
 {
 	run_with_base_page(test_case->fn, test_case->desc);
 	run_with_base_page_swap(test_case->fn, test_case->desc);
+	if (thpsize) {
+		run_with_thp(test_case->fn, test_case->desc);
+		run_with_thp_swap(test_case->fn, test_case->desc);
+		run_with_pte_mapped_thp(test_case->fn, test_case->desc);
+		run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc);
+		run_with_single_pte_of_thp(test_case->fn, test_case->desc);
+		run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc);
+		run_with_partial_mremap_thp(test_case->fn, test_case->desc);
+		run_with_partial_shared_thp(test_case->fn, test_case->desc);
+	}
 }
 
 static void run_test_cases(void)
@@ -377,15 +624,25 @@ static void run_test_cases(void)
 		run_test_case(&test_cases[i]);
 }
 
+static int tests_per_test_case(void)
+{
+	int tests = 2;
+
+	if (thpsize)
+		tests += 8;
+	return tests;
+}
+
 int main(int argc, char **argv)
 {
 	int nr_test_cases = ARRAY_SIZE(test_cases);
 	int err;
 
 	pagesize = getpagesize();
+	detect_thpsize();
 
 	ksft_print_header();
-	ksft_set_plan(nr_test_cases * 2);
+	ksft_set_plan(nr_test_cases * tests_per_test_case());
 
 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
 	if (pagemap_fd < 0)
-- 
GitLab


From 7dad331be7816103eba8c12caeb88fbd3599c0b9 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:17 +0200
Subject: [PATCH 1065/4122] selftests/vm: anon_cow: hugetlb tests

Let's run all existing test cases with all hugetlb sizes we're able to
detect.

Note that some tests cases still fail. This will, for example, be fixed
once vmsplice properly uses FOLL_PIN instead of FOLL_GET for pinning.
With 2 MiB and 1 GiB hugetlb on x86_64, the expected failures are:

 # [RUN] vmsplice() + unmap in child ... with hugetlb (2048 kB)
 not ok 23 No leak from parent into child
 # [RUN] vmsplice() + unmap in child ... with hugetlb (1048576 kB)
 not ok 24 No leak from parent into child
 # [RUN] vmsplice() before fork(), unmap in parent after fork() ... with hugetlb (2048 kB)
 not ok 35 No leak from child into parent
 # [RUN] vmsplice() before fork(), unmap in parent after fork() ... with hugetlb (1048576 kB)
 not ok 36 No leak from child into parent
 # [RUN] vmsplice() + unmap in parent after fork() ... with hugetlb (2048 kB)
 not ok 47 No leak from child into parent
 # [RUN] vmsplice() + unmap in parent after fork() ... with hugetlb (1048576 kB)
 not ok 48 No leak from child into parent

Link: https://lkml.kernel.org/r/20220927110120.106906-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/anon_cow.c | 70 ++++++++++++++++++++++++++-
 1 file changed, 69 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
index c1681c9d255f3..78dfdd983380e 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -25,6 +25,8 @@
 static size_t pagesize;
 static int pagemap_fd;
 static size_t thpsize;
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
 
 static void detect_thpsize(void)
 {
@@ -54,6 +56,31 @@ static void detect_thpsize(void)
 	close(fd);
 }
 
+static void detect_hugetlbsizes(void)
+{
+	DIR *dir = opendir("/sys/kernel/mm/hugepages/");
+
+	if (!dir)
+		return;
+
+	while (nr_hugetlbsizes < ARRAY_SIZE(hugetlbsizes)) {
+		struct dirent *entry = readdir(dir);
+		size_t kb;
+
+		if (!entry)
+			break;
+		if (entry->d_type != DT_DIR)
+			continue;
+		if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
+			continue;
+		hugetlbsizes[nr_hugetlbsizes] = kb * 1024;
+		nr_hugetlbsizes++;
+		ksft_print_msg("[INFO] detected hugetlb size: %zu KiB\n",
+			       kb);
+	}
+	closedir(dir);
+}
+
 static bool range_is_swapped(void *addr, size_t size)
 {
 	for (; size; addr += pagesize, size -= pagesize)
@@ -556,6 +583,41 @@ static void run_with_partial_shared_thp(test_fn fn, const char *desc)
 	do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED);
 }
 
+static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
+{
+	int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+	char *mem, *dummy;
+
+	ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
+		       hugetlbsize / 1024);
+
+	flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
+
+	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_skip("need more free huge pages\n");
+		return;
+	}
+
+	/* Populate an huge page. */
+	memset(mem, 0, hugetlbsize);
+
+	/*
+	 * We need a total of two hugetlb pages to handle COW/unsharing
+	 * properly, otherwise we might get zapped by a SIGBUS.
+	 */
+	dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+	if (dummy == MAP_FAILED) {
+		ksft_test_result_skip("need more free huge pages\n");
+		goto munmap;
+	}
+	munmap(dummy, hugetlbsize);
+
+	fn(mem, hugetlbsize);
+munmap:
+	munmap(mem, hugetlbsize);
+}
+
 struct test_case {
 	const char *desc;
 	test_fn fn;
@@ -602,6 +664,8 @@ static const struct test_case test_cases[] = {
 
 static void run_test_case(struct test_case const *test_case)
 {
+	int i;
+
 	run_with_base_page(test_case->fn, test_case->desc);
 	run_with_base_page_swap(test_case->fn, test_case->desc);
 	if (thpsize) {
@@ -614,6 +678,9 @@ static void run_test_case(struct test_case const *test_case)
 		run_with_partial_mremap_thp(test_case->fn, test_case->desc);
 		run_with_partial_shared_thp(test_case->fn, test_case->desc);
 	}
+	for (i = 0; i < nr_hugetlbsizes; i++)
+		run_with_hugetlb(test_case->fn, test_case->desc,
+				 hugetlbsizes[i]);
 }
 
 static void run_test_cases(void)
@@ -626,7 +693,7 @@ static void run_test_cases(void)
 
 static int tests_per_test_case(void)
 {
-	int tests = 2;
+	int tests = 2 + nr_hugetlbsizes;
 
 	if (thpsize)
 		tests += 8;
@@ -640,6 +707,7 @@ int main(int argc, char **argv)
 
 	pagesize = getpagesize();
 	detect_thpsize();
+	detect_hugetlbsizes();
 
 	ksft_print_header();
 	ksft_set_plan(nr_test_cases * tests_per_test_case());
-- 
GitLab


From e487ebbd12986facc7f77129d3ca80de84841170 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:18 +0200
Subject: [PATCH 1066/4122] selftests/vm: anon_cow: add liburing test cases

io_uring provides a simple mechanism to test long-term, R/W GUP pins
-- via fixed buffers -- and can be used to verify that GUP pins stay
in sync with the pages in the page table even if a page would
temporarily get mapped R/O or concurrent fork() could accidentially
end up sharing pinned pages with the child.

Note that this essentially re-introduces local_config support that was
removed recently in commit 6f83d6c74ea5 ("Kselftests: remove support of
libhugetlbfs from kselftests").

[david@redhat.com: s/size_t/ssize_t/ on `cur', `total'.]
  Link: https://lkml.kernel.org/r/445fe1ae-9e22-0d1d-4d09-272231d2f84a@redhat.com
Link: https://lkml.kernel.org/r/20220927110120.106906-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile        |  21 ++-
 tools/testing/selftests/vm/anon_cow.c      | 190 +++++++++++++++++++++
 tools/testing/selftests/vm/check_config.sh |  31 ++++
 3 files changed, 241 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/vm/check_config.sh

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index ad07d7a84c3ed..00920cb8b499b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,7 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for vm selftests
 
-LOCAL_HDRS += $(top_srcdir)/mm/gup_test.h
+LOCAL_HDRS += $(selfdir)/vm/local_config.h $(top_srcdir)/mm/gup_test.h
+
+include local_config.mk
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
@@ -152,8 +154,25 @@ warn_32bit_failure:
 endif
 endif
 
+# ANON_COW_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/anon_cow: LDLIBS += $(ANON_COW_EXTRA_LIBS)
+
 $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
 
 $(OUTPUT)/ksm_tests: LDLIBS += -lnuma
 
 $(OUTPUT)/migration: LDLIBS += -lnuma
+
+local_config.mk local_config.h: check_config.sh
+	/bin/sh ./check_config.sh $(CC)
+
+EXTRA_CLEAN += local_config.mk local_config.h
+
+ifeq ($(ANON_COW_EXTRA_LIBS),)
+all: warn_missing_liburing
+
+warn_missing_liburing:
+	@echo ; \
+	echo "Warning: missing liburing support. Some COW tests will be skipped." ; \
+	echo
+endif
diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
index 78dfdd983380e..01417a604edae 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -19,6 +19,11 @@
 #include <sys/mman.h>
 #include <sys/wait.h>
 
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
 #include "../kselftest.h"
 #include "vm_util.h"
 
@@ -334,6 +339,170 @@ static void test_vmsplice_after_fork(char *mem, size_t size)
 	do_test_vmsplice_in_parent(mem, size, false);
 }
 
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void do_test_iouring(char *mem, size_t size, bool use_fork)
+{
+	struct comm_pipes comm_pipes;
+	struct io_uring_cqe *cqe;
+	struct io_uring_sqe *sqe;
+	struct io_uring ring;
+	ssize_t cur, total;
+	struct iovec iov;
+	char *buf, *tmp;
+	int ret, fd;
+	FILE *file;
+
+	ret = setup_comm_pipes(&comm_pipes);
+	if (ret) {
+		ksft_test_result_fail("pipe() failed\n");
+		return;
+	}
+
+	file = tmpfile();
+	if (!file) {
+		ksft_test_result_fail("tmpfile() failed\n");
+		goto close_comm_pipes;
+	}
+	fd = fileno(file);
+	assert(fd);
+
+	tmp = malloc(size);
+	if (!tmp) {
+		ksft_test_result_fail("malloc() failed\n");
+		goto close_file;
+	}
+
+	/* Skip on errors, as we might just lack kernel support. */
+	ret = io_uring_queue_init(1, &ring, 0);
+	if (ret < 0) {
+		ksft_test_result_skip("io_uring_queue_init() failed\n");
+		goto free_tmp;
+	}
+
+	/*
+	 * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
+	 * | FOLL_LONGTERM the range.
+	 *
+	 * Skip on errors, as we might just lack kernel support or might not
+	 * have sufficient MEMLOCK permissions.
+	 */
+	iov.iov_base = mem;
+	iov.iov_len = size;
+	ret = io_uring_register_buffers(&ring, &iov, 1);
+	if (ret) {
+		ksft_test_result_skip("io_uring_register_buffers() failed\n");
+		goto queue_exit;
+	}
+
+	if (use_fork) {
+		/*
+		 * fork() and keep the child alive until we're done. Note that
+		 * we expect the pinned page to not get shared with the child.
+		 */
+		ret = fork();
+		if (ret < 0) {
+			ksft_test_result_fail("fork() failed\n");
+			goto unregister_buffers;
+		} else if (!ret) {
+			write(comm_pipes.child_ready[1], "0", 1);
+			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+				;
+			exit(0);
+		}
+
+		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+			;
+	} else {
+		/*
+		 * Map the page R/O into the page table. Enable softdirty
+		 * tracking to stop the page from getting mapped R/W immediately
+		 * again by mprotect() optimizations. Note that we don't have an
+		 * easy way to test if that worked (the pagemap does not export
+		 * if the page is mapped R/O vs. R/W).
+		 */
+		ret = mprotect(mem, size, PROT_READ);
+		clear_softdirty();
+		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+		if (ret) {
+			ksft_test_result_fail("mprotect() failed\n");
+			goto unregister_buffers;
+		}
+	}
+
+	/*
+	 * Modify the page and write page content as observed by the fixed
+	 * buffer pin to the file so we can verify it.
+	 */
+	memset(mem, 0xff, size);
+	sqe = io_uring_get_sqe(&ring);
+	if (!sqe) {
+		ksft_test_result_fail("io_uring_get_sqe() failed\n");
+		goto quit_child;
+	}
+	io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
+
+	ret = io_uring_submit(&ring);
+	if (ret < 0) {
+		ksft_test_result_fail("io_uring_submit() failed\n");
+		goto quit_child;
+	}
+
+	ret = io_uring_wait_cqe(&ring, &cqe);
+	if (ret < 0) {
+		ksft_test_result_fail("io_uring_wait_cqe() failed\n");
+		goto quit_child;
+	}
+
+	if (cqe->res != size) {
+		ksft_test_result_fail("write_fixed failed\n");
+		goto quit_child;
+	}
+	io_uring_cqe_seen(&ring, cqe);
+
+	/* Read back the file content to the temporary buffer. */
+	total = 0;
+	while (total < size) {
+		cur = pread(fd, tmp + total, size - total, total);
+		if (cur < 0) {
+			ksft_test_result_fail("pread() failed\n");
+			goto quit_child;
+		}
+		total += cur;
+	}
+
+	/* Finally, check if we read what we expected. */
+	ksft_test_result(!memcmp(mem, tmp, size),
+			 "Longterm R/W pin is reliable\n");
+
+quit_child:
+	if (use_fork) {
+		write(comm_pipes.parent_ready[1], "0", 1);
+		wait(&ret);
+	}
+unregister_buffers:
+	io_uring_unregister_buffers(&ring);
+queue_exit:
+	io_uring_queue_exit(&ring);
+free_tmp:
+	free(tmp);
+close_file:
+	fclose(file);
+close_comm_pipes:
+	close_comm_pipes(&comm_pipes);
+}
+
+static void test_iouring_ro(char *mem, size_t size)
+{
+	do_test_iouring(mem, size, false);
+}
+
+static void test_iouring_fork(char *mem, size_t size)
+{
+	do_test_iouring(mem, size, true);
+}
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
 typedef void (*test_fn)(char *mem, size_t size);
 
 static void do_run_with_base_page(test_fn fn, bool swapout)
@@ -660,6 +829,27 @@ static const struct test_case test_cases[] = {
 		"vmsplice() + unmap in parent after fork()",
 		test_vmsplice_after_fork,
 	},
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+	/*
+	 * Take a R/W longterm pin and then map the page R/O into the page
+	 * table to trigger a write fault on next access. When modifying the
+	 * page, the page content must be visible via the pin.
+	 */
+	{
+		"R/O-mapping a page registered as iouring fixed buffer",
+		test_iouring_ro,
+	},
+	/*
+	 * Take a R/W longterm pin and then fork() a child. When modifying the
+	 * page, the page content must be visible via the pin. We expect the
+	 * pinned page to not get shared with the child.
+	 */
+	{
+		"fork() with an iouring fixed buffer",
+		test_iouring_fork,
+	},
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
 };
 
 static void run_test_case(struct test_case const *test_case)
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
new file mode 100644
index 0000000000000..9a44c65209255
--- /dev/null
+++ b/tools/testing/selftests/vm/check_config.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Probe for libraries and create header files to record the results. Both C
+# header files and Makefile include fragments are created.
+
+OUTPUT_H_FILE=local_config.h
+OUTPUT_MKFILE=local_config.mk
+
+tmpname=$(mktemp)
+tmpfile_c=${tmpname}.c
+tmpfile_o=${tmpname}.o
+
+# liburing
+echo "#include <sys/types.h>"        > $tmpfile_c
+echo "#include <liburing.h>"        >> $tmpfile_c
+echo "int func(void) { return 0; }" >> $tmpfile_c
+
+CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
+$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
+
+if [ -f $tmpfile_o ]; then
+    echo "#define LOCAL_CONFIG_HAVE_LIBURING 1"  > $OUTPUT_H_FILE
+    echo "ANON_COW_EXTRA_LIBS = -luring"         > $OUTPUT_MKFILE
+else
+    echo "// No liburing support found"          > $OUTPUT_H_FILE
+    echo "# No liburing support found, so:"      > $OUTPUT_MKFILE
+    echo "ANON_COW_EXTRA_LIBS = "               >> $OUTPUT_MKFILE
+fi
+
+rm ${tmpname}.*
-- 
GitLab


From c77369b437f983a862bb6741814670d4ad38478c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:19 +0200
Subject: [PATCH 1067/4122] mm/gup_test: start/stop/read functionality for PIN
 LONGTERM test

We want an easy way to take a R/O or R/W longterm pin on a range and be
able to observe the content of the pinned pages, so we can properly test
how longterm puns interact with our COW logic.

[david@redhat.com: silence a warning on 32-bit]
  Link: https://lkml.kernel.org/r/74adbb51-6e33-f636-8a9c-2ad87bd9007e@redhat.com
[yang.lee@linux.alibaba.com: ./mm/gup_test.c:281:2-3: Unneeded semicolon]
  Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2455
  Link: https://lkml.kernel.org/r/20221020024035.113619-1-yang.lee@linux.alibaba.com
Link: https://lkml.kernel.org/r/20220927110120.106906-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup_test.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/gup_test.h |  12 +++++
 2 files changed, 153 insertions(+)

diff --git a/mm/gup_test.c b/mm/gup_test.c
index 12b0a91767d3c..0d76d9b4bb5a1 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -203,6 +203,135 @@ free_pages:
 	return ret;
 }
 
+static DEFINE_MUTEX(pin_longterm_test_mutex);
+static struct page **pin_longterm_test_pages;
+static unsigned long pin_longterm_test_nr_pages;
+
+static inline void pin_longterm_test_stop(void)
+{
+	if (pin_longterm_test_pages) {
+		if (pin_longterm_test_nr_pages)
+			unpin_user_pages(pin_longterm_test_pages,
+					 pin_longterm_test_nr_pages);
+		kfree(pin_longterm_test_pages);
+		pin_longterm_test_pages = NULL;
+		pin_longterm_test_nr_pages = 0;
+	}
+}
+
+static inline int pin_longterm_test_start(unsigned long arg)
+{
+	long nr_pages, cur_pages, addr, remaining_pages;
+	int gup_flags = FOLL_LONGTERM;
+	struct pin_longterm_test args;
+	struct page **pages;
+	int ret = 0;
+	bool fast;
+
+	if (pin_longterm_test_pages)
+		return -EINVAL;
+
+	if (copy_from_user(&args, (void __user *)arg, sizeof(args)))
+		return -EFAULT;
+
+	if (args.flags &
+	    ~(PIN_LONGTERM_TEST_FLAG_USE_WRITE|PIN_LONGTERM_TEST_FLAG_USE_FAST))
+		return -EINVAL;
+	if (!IS_ALIGNED(args.addr | args.size, PAGE_SIZE))
+		return -EINVAL;
+	if (args.size > LONG_MAX)
+		return -EINVAL;
+	nr_pages = args.size / PAGE_SIZE;
+	if (!nr_pages)
+		return -EINVAL;
+
+	pages = kvcalloc(nr_pages, sizeof(void *), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	if (args.flags & PIN_LONGTERM_TEST_FLAG_USE_WRITE)
+		gup_flags |= FOLL_WRITE;
+	fast = !!(args.flags & PIN_LONGTERM_TEST_FLAG_USE_FAST);
+
+	if (!fast && mmap_read_lock_killable(current->mm)) {
+		kfree(pages);
+		return -EINTR;
+	}
+
+	pin_longterm_test_pages = pages;
+	pin_longterm_test_nr_pages = 0;
+
+	while (nr_pages - pin_longterm_test_nr_pages) {
+		remaining_pages = nr_pages - pin_longterm_test_nr_pages;
+		addr = args.addr + pin_longterm_test_nr_pages * PAGE_SIZE;
+
+		if (fast)
+			cur_pages = pin_user_pages_fast(addr, remaining_pages,
+							gup_flags, pages);
+		else
+			cur_pages = pin_user_pages(addr, remaining_pages,
+						   gup_flags, pages, NULL);
+		if (cur_pages < 0) {
+			pin_longterm_test_stop();
+			ret = cur_pages;
+			break;
+		}
+		pin_longterm_test_nr_pages += cur_pages;
+		pages += cur_pages;
+	}
+
+	if (!fast)
+		mmap_read_unlock(current->mm);
+	return ret;
+}
+
+static inline int pin_longterm_test_read(unsigned long arg)
+{
+	__u64 user_addr;
+	unsigned long i;
+
+	if (!pin_longterm_test_pages)
+		return -EINVAL;
+
+	if (copy_from_user(&user_addr, (void __user *)arg, sizeof(user_addr)))
+		return -EFAULT;
+
+	for (i = 0; i < pin_longterm_test_nr_pages; i++) {
+		void *addr = page_to_virt(pin_longterm_test_pages[i]);
+
+		if (copy_to_user((void __user *)(unsigned long)user_addr, addr,
+				 PAGE_SIZE))
+			return -EFAULT;
+		user_addr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+static long pin_longterm_test_ioctl(struct file *filep, unsigned int cmd,
+				    unsigned long arg)
+{
+	int ret = -EINVAL;
+
+	if (mutex_lock_killable(&pin_longterm_test_mutex))
+		return -EINTR;
+
+	switch (cmd) {
+	case PIN_LONGTERM_TEST_START:
+		ret = pin_longterm_test_start(arg);
+		break;
+	case PIN_LONGTERM_TEST_STOP:
+		pin_longterm_test_stop();
+		ret = 0;
+		break;
+	case PIN_LONGTERM_TEST_READ:
+		ret = pin_longterm_test_read(arg);
+		break;
+	}
+
+	mutex_unlock(&pin_longterm_test_mutex);
+	return ret;
+}
+
 static long gup_test_ioctl(struct file *filep, unsigned int cmd,
 		unsigned long arg)
 {
@@ -217,6 +346,10 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd,
 	case PIN_BASIC_TEST:
 	case DUMP_USER_PAGES_TEST:
 		break;
+	case PIN_LONGTERM_TEST_START:
+	case PIN_LONGTERM_TEST_STOP:
+	case PIN_LONGTERM_TEST_READ:
+		return pin_longterm_test_ioctl(filep, cmd, arg);
 	default:
 		return -EINVAL;
 	}
@@ -234,9 +367,17 @@ static long gup_test_ioctl(struct file *filep, unsigned int cmd,
 	return 0;
 }
 
+static int gup_test_release(struct inode *inode, struct file *file)
+{
+	pin_longterm_test_stop();
+
+	return 0;
+}
+
 static const struct file_operations gup_test_fops = {
 	.open = nonseekable_open,
 	.unlocked_ioctl = gup_test_ioctl,
+	.release = gup_test_release,
 };
 
 static int __init gup_test_init(void)
diff --git a/mm/gup_test.h b/mm/gup_test.h
index 887ac1d5f5bc3..5b37b54e8bea6 100644
--- a/mm/gup_test.h
+++ b/mm/gup_test.h
@@ -10,6 +10,9 @@
 #define GUP_BASIC_TEST		_IOWR('g', 4, struct gup_test)
 #define PIN_BASIC_TEST		_IOWR('g', 5, struct gup_test)
 #define DUMP_USER_PAGES_TEST	_IOWR('g', 6, struct gup_test)
+#define PIN_LONGTERM_TEST_START	_IOW('g', 7, struct pin_longterm_test)
+#define PIN_LONGTERM_TEST_STOP	_IO('g', 8)
+#define PIN_LONGTERM_TEST_READ	_IOW('g', 9, __u64)
 
 #define GUP_TEST_MAX_PAGES_TO_DUMP		8
 
@@ -30,4 +33,13 @@ struct gup_test {
 	__u32 which_pages[GUP_TEST_MAX_PAGES_TO_DUMP];
 };
 
+#define PIN_LONGTERM_TEST_FLAG_USE_WRITE	1
+#define PIN_LONGTERM_TEST_FLAG_USE_FAST		2
+
+struct pin_longterm_test {
+	__u64 addr;
+	__u64 size;
+	__u32 flags;
+};
+
 #endif	/* __GUP_TEST_H */
-- 
GitLab


From 6f1405efc61b6e686bdf6e2e09b41d2d3a5c14e2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 27 Sep 2022 13:01:20 +0200
Subject: [PATCH 1068/4122] selftests/vm: anon_cow: add R/O longterm tests via
 gup_test

Let's trigger a R/O longterm pin on three cases of R/O mapped anonymous
pages:
* exclusive (never shared)
* shared (child still alive)
* previously shared (child no longer alive)

... and make sure that the pin is reliable: whatever we write via the page
tables has to be observable via the pin.

Link: https://lkml.kernel.org/r/20220927110120.106906-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christoph von Recklinghausen <crecklin@redhat.com>
Cc: Don Dutile <ddutile@redhat.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/anon_cow.c | 210 ++++++++++++++++++++++++++
 1 file changed, 210 insertions(+)

diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
index 01417a604edae..705bd0b3db11c 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -17,6 +17,7 @@
 #include <dirent.h>
 #include <assert.h>
 #include <sys/mman.h>
+#include <sys/ioctl.h>
 #include <sys/wait.h>
 
 #include "local_config.h"
@@ -24,6 +25,7 @@
 #include <liburing.h>
 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
 
+#include "../../../../mm/gup_test.h"
 #include "../kselftest.h"
 #include "vm_util.h"
 
@@ -32,6 +34,7 @@ static int pagemap_fd;
 static size_t thpsize;
 static int nr_hugetlbsizes;
 static size_t hugetlbsizes[10];
+static int gup_fd;
 
 static void detect_thpsize(void)
 {
@@ -503,6 +506,170 @@ static void test_iouring_fork(char *mem, size_t size)
 
 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
 
+enum ro_pin_test {
+	RO_PIN_TEST_SHARED,
+	RO_PIN_TEST_PREVIOUSLY_SHARED,
+	RO_PIN_TEST_RO_EXCLUSIVE,
+};
+
+static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
+			   bool fast)
+{
+	struct pin_longterm_test args;
+	struct comm_pipes comm_pipes;
+	char *tmp, buf;
+	__u64 tmp_val;
+	int ret;
+
+	if (gup_fd < 0) {
+		ksft_test_result_skip("gup_test not available\n");
+		return;
+	}
+
+	tmp = malloc(size);
+	if (!tmp) {
+		ksft_test_result_fail("malloc() failed\n");
+		return;
+	}
+
+	ret = setup_comm_pipes(&comm_pipes);
+	if (ret) {
+		ksft_test_result_fail("pipe() failed\n");
+		goto free_tmp;
+	}
+
+	switch (test) {
+	case RO_PIN_TEST_SHARED:
+	case RO_PIN_TEST_PREVIOUSLY_SHARED:
+		/*
+		 * Share the pages with our child. As the pages are not pinned,
+		 * this should just work.
+		 */
+		ret = fork();
+		if (ret < 0) {
+			ksft_test_result_fail("fork() failed\n");
+			goto close_comm_pipes;
+		} else if (!ret) {
+			write(comm_pipes.child_ready[1], "0", 1);
+			while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+				;
+			exit(0);
+		}
+
+		/* Wait until our child is ready. */
+		while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+			;
+
+		if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
+			/*
+			 * Tell the child to quit now and wait until it quit.
+			 * The pages should now be mapped R/O into our page
+			 * tables, but they are no longer shared.
+			 */
+			write(comm_pipes.parent_ready[1], "0", 1);
+			wait(&ret);
+			if (!WIFEXITED(ret))
+				ksft_print_msg("[INFO] wait() failed\n");
+		}
+		break;
+	case RO_PIN_TEST_RO_EXCLUSIVE:
+		/*
+		 * Map the page R/O into the page table. Enable softdirty
+		 * tracking to stop the page from getting mapped R/W immediately
+		 * again by mprotect() optimizations. Note that we don't have an
+		 * easy way to test if that worked (the pagemap does not export
+		 * if the page is mapped R/O vs. R/W).
+		 */
+		ret = mprotect(mem, size, PROT_READ);
+		clear_softdirty();
+		ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+		if (ret) {
+			ksft_test_result_fail("mprotect() failed\n");
+			goto close_comm_pipes;
+		}
+		break;
+	default:
+		assert(false);
+	}
+
+	/* Take a R/O pin. This should trigger unsharing. */
+	args.addr = (__u64)mem;
+	args.size = size;
+	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+	if (ret) {
+		if (errno == EINVAL)
+			ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+		else
+			ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+		goto wait;
+	}
+
+	/* Modify the page. */
+	memset(mem, 0xff, size);
+
+	/*
+	 * Read back the content via the pin to the temporary buffer and
+	 * test if we observed the modification.
+	 */
+	tmp_val = (__u64)tmp;
+	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
+	if (ret)
+		ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
+	else
+		ksft_test_result(!memcmp(mem, tmp, size),
+				 "Longterm R/O pin is reliable\n");
+
+	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
+	if (ret)
+		ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+wait:
+	switch (test) {
+	case RO_PIN_TEST_SHARED:
+		write(comm_pipes.parent_ready[1], "0", 1);
+		wait(&ret);
+		if (!WIFEXITED(ret))
+			ksft_print_msg("[INFO] wait() failed\n");
+		break;
+	default:
+		break;
+	}
+close_comm_pipes:
+	close_comm_pipes(&comm_pipes);
+free_tmp:
+	free(tmp);
+}
+
+static void test_ro_pin_on_shared(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_shared(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
+}
+
+static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
+}
+
 typedef void (*test_fn)(char *mem, size_t size);
 
 static void do_run_with_base_page(test_fn fn, bool swapout)
@@ -850,6 +1017,48 @@ static const struct test_case test_cases[] = {
 	},
 
 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
+	/*
+	 * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
+	 * When modifying the page via the page table, the page content change
+	 * must be visible via the pin.
+	 */
+	{
+		"R/O GUP pin on R/O-mapped shared page",
+		test_ro_pin_on_shared,
+	},
+	/* Same as above, but using GUP-fast. */
+	{
+		"R/O GUP-fast pin on R/O-mapped shared page",
+		test_ro_fast_pin_on_shared,
+	},
+	/*
+	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
+	 * was previously shared. When modifying the page via the page table,
+	 * the page content change must be visible via the pin.
+	 */
+	{
+		"R/O GUP pin on R/O-mapped previously-shared page",
+		test_ro_pin_on_ro_previously_shared,
+	},
+	/* Same as above, but using GUP-fast. */
+	{
+		"R/O GUP-fast pin on R/O-mapped previously-shared page",
+		test_ro_fast_pin_on_ro_previously_shared,
+	},
+	/*
+	 * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
+	 * When modifying the page via the page table, the page content change
+	 * must be visible via the pin.
+	 */
+	{
+		"R/O GUP pin on R/O-mapped exclusive page",
+		test_ro_pin_on_ro_exclusive,
+	},
+	/* Same as above, but using GUP-fast. */
+	{
+		"R/O GUP-fast pin on R/O-mapped exclusive page",
+		test_ro_fast_pin_on_ro_exclusive,
+	},
 };
 
 static void run_test_case(struct test_case const *test_case)
@@ -902,6 +1111,7 @@ int main(int argc, char **argv)
 	ksft_print_header();
 	ksft_set_plan(nr_test_cases * tests_per_test_case());
 
+	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
 	if (pagemap_fd < 0)
 		ksft_exit_fail_msg("opening pagemap failed\n");
-- 
GitLab


From f3ad032c2d06be970d78384885c63917974a4af4 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 27 Sep 2022 14:38:26 +0800
Subject: [PATCH 1069/4122] mm: rmap: rename page_not_mapped() to
 folio_not_mapped()

Since commit 2f031c6f042c ("mm/rmap: Convert rmap_walk() to take a
folio"), page_not_mapped() takes folio as parameter, rename it to be
consistent.

Link: https://lkml.kernel.org/r/20220927063826.159590-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/rmap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 92ed6fe3d038f..9bba65b30e4d9 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1801,7 +1801,7 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
 	return vma_is_temporary_stack(vma);
 }
 
-static int page_not_mapped(struct folio *folio)
+static int folio_not_mapped(struct folio *folio)
 {
 	return !folio_mapped(folio);
 }
@@ -1822,7 +1822,7 @@ void try_to_unmap(struct folio *folio, enum ttu_flags flags)
 	struct rmap_walk_control rwc = {
 		.rmap_one = try_to_unmap_one,
 		.arg = (void *)flags,
-		.done = page_not_mapped,
+		.done = folio_not_mapped,
 		.anon_lock = folio_lock_anon_vma_read,
 	};
 
@@ -2150,7 +2150,7 @@ void try_to_migrate(struct folio *folio, enum ttu_flags flags)
 	struct rmap_walk_control rwc = {
 		.rmap_one = try_to_migrate_one,
 		.arg = (void *)flags,
-		.done = page_not_mapped,
+		.done = folio_not_mapped,
 		.anon_lock = folio_lock_anon_vma_read,
 	};
 
@@ -2297,7 +2297,7 @@ static bool folio_make_device_exclusive(struct folio *folio,
 	};
 	struct rmap_walk_control rwc = {
 		.rmap_one = page_make_device_exclusive_one,
-		.done = page_not_mapped,
+		.done = folio_not_mapped,
 		.anon_lock = folio_lock_anon_vma_read,
 		.arg = &args,
 	};
-- 
GitLab


From f9e60beceee5c85dc9d5e71c1090cfed97ab0897 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:40 +0800
Subject: [PATCH 1070/4122] cgroup/cpuset: use hotplug_memory_notifier()
 directly

Patch series "mm: Use hotplug_memory_notifier() instead of
register_hotmemory_notifier()", v4.

Commit f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") introduced register_hotmemory_notifier()
to avoid a compile problem with gcc-4.4.4:

    When CONFIG_MEMORY_HOTPLUG=n, we don't want the memory-hotplug notifier
    handlers to be included in the .o files, for space reasons.

    The existing hotplug_memory_notifier() tries to handle this but testing
    with gcc-4.4.4 shows that it doesn't work - the hotplug functions are
    still present in the .o files.

Since commit 76ae847497bc52 ("Documentation: raise minimum supported
version of GCC to 5.1") has already updated the minimum gcc version to
5.1.  The previous problem mentioned in f02c69680088 does not exist.  So
we can now revert to use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

In the last patch, we move all hotplug memory notifier priority to same
file for easy sorting.


This patch (of 8):

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-1-liushixin2@huawei.com
Link: https://lkml.kernel.org/r/20220923033347.3935160-2-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/cgroup/cpuset.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index b474289c15b82..0c6db6a4f4274 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3630,11 +3630,6 @@ static int cpuset_track_online_nodes(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block cpuset_track_online_nodes_nb = {
-	.notifier_call = cpuset_track_online_nodes,
-	.priority = 10,		/* ??! */
-};
-
 /**
  * cpuset_init_smp - initialize cpus_allowed
  *
@@ -3652,7 +3647,7 @@ void __init cpuset_init_smp(void)
 	cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
 	top_cpuset.effective_mems = node_states[N_MEMORY];
 
-	register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
+	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
 
 	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
 	BUG_ON(!cpuset_migrate_mm_wq);
-- 
GitLab


From 5d89c224328bce791d051bf60aa92d90bae93c01 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:41 +0800
Subject: [PATCH 1071/4122] fs/proc/kcore.c: use hotplug_memory_notifier()
 directly

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-3-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/kcore.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index dff921f7ca332..7692a360972d3 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -18,7 +18,6 @@
 #include <linux/capability.h>
 #include <linux/elf.h>
 #include <linux/elfcore.h>
-#include <linux/notifier.h>
 #include <linux/vmalloc.h>
 #include <linux/highmem.h>
 #include <linux/printk.h>
@@ -638,10 +637,6 @@ static int __meminit kcore_callback(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block kcore_callback_nb __meminitdata = {
-	.notifier_call = kcore_callback,
-	.priority = 0,
-};
 
 static struct kcore_list kcore_vmalloc;
 
@@ -694,7 +689,7 @@ static int __init proc_kcore_init(void)
 	add_modules_range();
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
-	register_hotmemory_notifier(&kcore_callback_nb);
+	hotplug_memory_notifier(kcore_callback, 0);
 
 	return 0;
 }
-- 
GitLab


From 946d5f9c9dcdbaedcd664fad08cea7910139d10f Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:42 +0800
Subject: [PATCH 1072/4122] mm/slub.c: use hotplug_memory_notifier() directly

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-4-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/slub.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index 157527d7101be..f37e6a51e2333 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4771,11 +4771,6 @@ static int slab_memory_callback(struct notifier_block *self,
 	return ret;
 }
 
-static struct notifier_block slab_memory_callback_nb = {
-	.notifier_call = slab_memory_callback,
-	.priority = SLAB_CALLBACK_PRI,
-};
-
 /********************************************************************
  *			Basic setup of slabs
  *******************************************************************/
@@ -4841,7 +4836,7 @@ void __init kmem_cache_init(void)
 	create_boot_cache(kmem_cache_node, "kmem_cache_node",
 		sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
 
-	register_hotmemory_notifier(&slab_memory_callback_nb);
+	hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
 
 	/* Able to allocate the per node structures */
 	slab_state = PARTIAL;
-- 
GitLab


From cddb8d09ff1e477de8236a061a5017b21bab3c14 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:43 +0800
Subject: [PATCH 1073/4122] mm/mmap: use hotplug_memory_notifier() directly

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-5-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mmap.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 2def55555e05f..3f47fd57d1654 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3749,13 +3749,9 @@ static int reserve_mem_notifier(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block reserve_mem_nb = {
-	.notifier_call = reserve_mem_notifier,
-};
-
 static int __meminit init_reserve_notifier(void)
 {
-	if (register_hotmemory_notifier(&reserve_mem_nb))
+	if (hotplug_memory_notifier(reserve_mem_notifier, 0))
 		pr_err("Failed registering memory add/remove notifier for admin reserve\n");
 
 	return 0;
-- 
GitLab


From d46722ef1c090541d56f706f3a90f3f2e84cdf0c Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:44 +0800
Subject: [PATCH 1074/4122] mm/mm_init.c: use hotplug_memory_notifier()
 directly

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-6-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mm_init.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/mm/mm_init.c b/mm/mm_init.c
index 0d7b2bd2454a1..44aadc162d1f0 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -178,16 +178,10 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block compute_batch_nb __meminitdata = {
-	.notifier_call = mm_compute_batch_notifier,
-	.priority = IPC_CALLBACK_PRI, /* use lowest priority */
-};
-
 static int __init mm_compute_batch_init(void)
 {
 	mm_compute_batch(sysctl_overcommit_memory);
-	register_hotmemory_notifier(&compute_batch_nb);
-
+	hotplug_memory_notifier(mm_compute_batch_notifier, IPC_CALLBACK_PRI);
 	return 0;
 }
 
-- 
GitLab


From 82f8661a7982efea3e4437777e9f914781fac640 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:45 +0800
Subject: [PATCH 1075/4122] ACPI: HMAT: use hotplug_memory_notifier() directly

Commit 76ae847497bc52 ("Documentation: raise minimum supported version of
GCC to 5.1") updated the minimum gcc version to 5.1.  So the problem
mentioned in f02c69680088 ("include/linux/memory.h: implement
register_hotmemory_notifier()") no longer exist.  So we can now switch to
use hotplug_memory_notifier() directly rather than
register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-7-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/acpi/numa/hmat.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 23f49a2f4d148..0ecefb6047348 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -767,11 +767,6 @@ static int hmat_callback(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block hmat_callback_nb = {
-	.notifier_call = hmat_callback,
-	.priority = 2,
-};
-
 static __init void hmat_free_structures(void)
 {
 	struct memory_target *target, *tnext;
@@ -854,7 +849,7 @@ static __init int hmat_init(void)
 	hmat_register_targets();
 
 	/* Keep the table and structures if the notifier may use them */
-	if (!register_hotmemory_notifier(&hmat_callback_nb))
+	if (!hotplug_memory_notifier(hmat_callback, 2))
 		return 0;
 out_put:
 	hmat_free_structures();
-- 
GitLab


From eafd296e0cc0cc03b4ae01c2b3b07273514d757c Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:46 +0800
Subject: [PATCH 1076/4122] memory: remove unused register_hotmemory_notifier()

Remove unused register_hotmemory_notifier().

Link: https://lkml.kernel.org/r/20220923033347.3935160-8-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memory.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/linux/memory.h b/include/linux/memory.h
index aa619464a1df0..98d2a2ebcc10b 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -19,7 +19,6 @@
 #include <linux/node.h>
 #include <linux/compiler.h>
 #include <linux/mutex.h>
-#include <linux/notifier.h>
 
 #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
 
@@ -136,9 +135,6 @@ static inline int hotplug_memory_notifier(notifier_fn_t fn, int pri)
 {
 	return 0;
 }
-/* These aren't inline functions due to a GCC bug. */
-#define register_hotmemory_notifier(nb)    ({ (void)(nb); 0; })
-#define unregister_hotmemory_notifier(nb)  ({ (void)(nb); })
 #else /* CONFIG_MEMORY_HOTPLUG */
 extern int register_memory_notifier(struct notifier_block *nb);
 extern void unregister_memory_notifier(struct notifier_block *nb);
@@ -166,8 +162,6 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 		{ .notifier_call = fn, .priority = pri };\
 	register_memory_notifier(&fn##_mem_nb);			\
 })
-#define register_hotmemory_notifier(nb)		register_memory_notifier(nb)
-#define unregister_hotmemory_notifier(nb) 	unregister_memory_notifier(nb)
 
 #ifdef CONFIG_NUMA
 void memory_block_add_nid(struct memory_block *mem, int nid,
-- 
GitLab


From 1eeaa4fd39b0b1b3e986f8eab6978e69b01e3c5e Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Fri, 23 Sep 2022 11:33:47 +0800
Subject: [PATCH 1077/4122] memory: move hotplug memory notifier priority to
 same file for easy sorting

The priority of hotplug memory callback is defined in a different file.
And there are some callers using numbers directly.  Collect them together
into include/linux/memory.h for easy reading.  This allows us to sort
their priorities more intuitively without additional comments.

Link: https://lkml.kernel.org/r/20220923033347.3935160-9-liushixin2@huawei.com
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Waiman Long <longman@redhat.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/acpi/numa/hmat.c     | 2 +-
 fs/proc/kcore.c              | 2 +-
 include/linux/memory-tiers.h | 1 -
 include/linux/memory.h       | 9 +++++++--
 kernel/cgroup/cpuset.c       | 2 +-
 mm/kasan/shadow.c            | 2 +-
 mm/ksm.c                     | 2 +-
 mm/memory-tiers.c            | 2 +-
 mm/mm_init.c                 | 2 +-
 mm/mmap.c                    | 2 +-
 mm/page_ext.c                | 2 +-
 11 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c
index 0ecefb6047348..139e3b41653e5 100644
--- a/drivers/acpi/numa/hmat.c
+++ b/drivers/acpi/numa/hmat.c
@@ -849,7 +849,7 @@ static __init int hmat_init(void)
 	hmat_register_targets();
 
 	/* Keep the table and structures if the notifier may use them */
-	if (!hotplug_memory_notifier(hmat_callback, 2))
+	if (!hotplug_memory_notifier(hmat_callback, HMAT_CALLBACK_PRI))
 		return 0;
 out_put:
 	hmat_free_structures();
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 7692a360972d3..98f3289556e4a 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -689,7 +689,7 @@ static int __init proc_kcore_init(void)
 	add_modules_range();
 	/* Store direct-map area from physical memory map */
 	kcore_update_ram();
-	hotplug_memory_notifier(kcore_callback, 0);
+	hotplug_memory_notifier(kcore_callback, DEFAULT_CALLBACK_PRI);
 
 	return 0;
 }
diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h
index 965009aa01d72..fc9647b1b4f96 100644
--- a/include/linux/memory-tiers.h
+++ b/include/linux/memory-tiers.h
@@ -18,7 +18,6 @@
  * the same memory tier.
  */
 #define MEMTIER_ADISTANCE_DRAM	((4 * MEMTIER_CHUNK_SIZE) + (MEMTIER_CHUNK_SIZE >> 1))
-#define MEMTIER_HOTPLUG_PRIO	100
 
 struct memory_tier;
 struct memory_dev_type {
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 98d2a2ebcc10b..463662ef76142 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -112,8 +112,13 @@ struct mem_section;
  * Priorities for the hotplug memory callback routines (stored in decreasing
  * order in the callback chain)
  */
-#define SLAB_CALLBACK_PRI       1
-#define IPC_CALLBACK_PRI        10
+#define DEFAULT_CALLBACK_PRI	0
+#define SLAB_CALLBACK_PRI	1
+#define HMAT_CALLBACK_PRI	2
+#define MM_COMPUTE_BATCH_PRI	10
+#define CPUSET_CALLBACK_PRI	10
+#define MEMTIER_HOTPLUG_PRI	100
+#define KSM_CALLBACK_PRI	100
 
 #ifndef CONFIG_MEMORY_HOTPLUG
 static inline void memory_dev_init(void)
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 0c6db6a4f4274..3ea2e836e93eb 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -3647,7 +3647,7 @@ void __init cpuset_init_smp(void)
 	cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
 	top_cpuset.effective_mems = node_states[N_MEMORY];
 
-	hotplug_memory_notifier(cpuset_track_online_nodes, 10);
+	hotplug_memory_notifier(cpuset_track_online_nodes, CPUSET_CALLBACK_PRI);
 
 	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
 	BUG_ON(!cpuset_migrate_mm_wq);
diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c
index 0e3648b603a6f..2fba1f51f042e 100644
--- a/mm/kasan/shadow.c
+++ b/mm/kasan/shadow.c
@@ -244,7 +244,7 @@ static int __meminit kasan_mem_notifier(struct notifier_block *nb,
 
 static int __init kasan_memhotplug_init(void)
 {
-	hotplug_memory_notifier(kasan_mem_notifier, 0);
+	hotplug_memory_notifier(kasan_mem_notifier, DEFAULT_CALLBACK_PRI);
 
 	return 0;
 }
diff --git a/mm/ksm.c b/mm/ksm.c
index c19fcca9bc03d..7ba97f86d8316 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -3211,7 +3211,7 @@ static int __init ksm_init(void)
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
 	/* There is no significance to this priority 100 */
-	hotplug_memory_notifier(ksm_memory_callback, 100);
+	hotplug_memory_notifier(ksm_memory_callback, KSM_CALLBACK_PRI);
 #endif
 	return 0;
 
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index fa8c9d07f9ce9..939e200c283bb 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -664,7 +664,7 @@ static int __init memory_tier_init(void)
 	establish_demotion_targets();
 	mutex_unlock(&memory_tier_lock);
 
-	hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRIO);
+	hotplug_memory_notifier(memtier_hotplug_callback, MEMTIER_HOTPLUG_PRI);
 	return 0;
 }
 subsys_initcall(memory_tier_init);
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 44aadc162d1f0..c1883362e71d2 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -181,7 +181,7 @@ static int __meminit mm_compute_batch_notifier(struct notifier_block *self,
 static int __init mm_compute_batch_init(void)
 {
 	mm_compute_batch(sysctl_overcommit_memory);
-	hotplug_memory_notifier(mm_compute_batch_notifier, IPC_CALLBACK_PRI);
+	hotplug_memory_notifier(mm_compute_batch_notifier, MM_COMPUTE_BATCH_PRI);
 	return 0;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 3f47fd57d1654..c697771d406b7 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3751,7 +3751,7 @@ static int reserve_mem_notifier(struct notifier_block *nb,
 
 static int __meminit init_reserve_notifier(void)
 {
-	if (hotplug_memory_notifier(reserve_mem_notifier, 0))
+	if (hotplug_memory_notifier(reserve_mem_notifier, DEFAULT_CALLBACK_PRI))
 		pr_err("Failed registering memory add/remove notifier for admin reserve\n");
 
 	return 0;
diff --git a/mm/page_ext.c b/mm/page_ext.c
index affe80243b6d6..b2ff5c9129f47 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -513,7 +513,7 @@ void __init page_ext_init(void)
 			cond_resched();
 		}
 	}
-	hotplug_memory_notifier(page_ext_callback, 0);
+	hotplug_memory_notifier(page_ext_callback, DEFAULT_CALLBACK_PRI);
 	pr_info("allocated %ld bytes of page_ext\n", total_usage);
 	invoke_init_callbacks();
 	return;
-- 
GitLab


From 3c0c9bc9c9596d5cd69529da822526f88673365b Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:47 +0200
Subject: [PATCH 1078/4122] mm: vmalloc: add alloc_vmap_area trace event

Patch series "Add basic trace events for vmap/vmalloc (v2)", v2.

This small series add some basic trace events for the vmap/vmalloc code.
Since currently we lack any, sometimes it is hard to start debuging vmap
code if an issue is reported or occured.

For example https://lore.kernel.org/linux-mm/Y0p8BZIiDXLQbde%2F@pc636/T/

The final patch adds two reviewers for vmalloc code.


This patch (of 7):

It is for debug purposes and for validation of passed parameters.

Link: https://lkml.kernel.org/r/20221018181053.434508-1-urezki@gmail.com
Link: https://lkml.kernel.org/r/20221018181053.434508-2-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/vmalloc.h | 56 ++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 include/trace/events/vmalloc.h

diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h
new file mode 100644
index 0000000000000..39fbd77c91e78
--- /dev/null
+++ b/include/trace/events/vmalloc.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vmalloc
+
+#if !defined(_TRACE_VMALLOC_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VMALLOC_H
+
+#include <linux/tracepoint.h>
+
+/**
+ * alloc_vmap_area - called when a new vmap allocation occurs
+ * @addr:	an allocated address
+ * @size:	a requested size
+ * @align:	a requested alignment
+ * @vstart:	a requested start range
+ * @vend:	a requested end range
+ * @failed:	an allocation failed or not
+ *
+ * This event is used for a debug purpose, it can give an extra
+ * information for a developer about how often it occurs and which
+ * parameters are passed for further validation.
+ */
+TRACE_EVENT(alloc_vmap_area,
+
+	TP_PROTO(unsigned long addr, unsigned long size, unsigned long align,
+		unsigned long vstart, unsigned long vend, int failed),
+
+	TP_ARGS(addr, size, align, vstart, vend, failed),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, addr)
+		__field(unsigned long, size)
+		__field(unsigned long, align)
+		__field(unsigned long, vstart)
+		__field(unsigned long, vend)
+		__field(int, failed)
+	),
+
+	TP_fast_assign(
+		__entry->addr = addr;
+		__entry->size = size;
+		__entry->align = align;
+		__entry->vstart = vstart;
+		__entry->vend = vend;
+		__entry->failed = failed;
+	),
+
+	TP_printk("va_start: %lu size=%lu align=%lu vstart=0x%lx vend=0x%lx failed=%d",
+		__entry->addr, __entry->size, __entry->align,
+		__entry->vstart, __entry->vend, __entry->failed)
+);
+
+#endif /*  _TRACE_VMALLOC_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
GitLab


From b3a5a7b099162e1b11db459f8128d4374f7d1c05 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:48 +0200
Subject: [PATCH 1079/4122] mm: vmalloc: add purge_vmap_area_lazy trace event

It is for debug purposes to track number of freed vmap areas including a
range it occurs on.

Link: https://lkml.kernel.org/r/20221018181053.434508-3-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/vmalloc.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h
index 39fbd77c91e78..afeb8003a0f25 100644
--- a/include/trace/events/vmalloc.h
+++ b/include/trace/events/vmalloc.h
@@ -50,6 +50,39 @@ TRACE_EVENT(alloc_vmap_area,
 		__entry->vstart, __entry->vend, __entry->failed)
 );
 
+/**
+ * purge_vmap_area_lazy - called when vmap areas were lazily freed
+ * @start:		purging start address
+ * @end:		purging end address
+ * @npurged:	numbed of purged vmap areas
+ *
+ * This event is used for a debug purpose. It gives some
+ * indication about start:end range and how many objects
+ * are released.
+ */
+TRACE_EVENT(purge_vmap_area_lazy,
+
+	TP_PROTO(unsigned long start, unsigned long end,
+		unsigned int npurged),
+
+	TP_ARGS(start, end, npurged),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, start)
+		__field(unsigned long, end)
+		__field(unsigned int, npurged)
+	),
+
+	TP_fast_assign(
+		__entry->start = start;
+		__entry->end = end;
+		__entry->npurged = npurged;
+	),
+
+	TP_printk("start=0x%lx end=0x%lx num_purged=%u",
+		__entry->start, __entry->end, __entry->npurged)
+);
+
 #endif /*  _TRACE_VMALLOC_H */
 
 /* This part must be outside protection */
-- 
GitLab


From fabc27f7649e070c4f6c742e436a51ff68c4a280 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:49 +0200
Subject: [PATCH 1080/4122] mm: vmalloc: add free_vmap_area_noflush trace event

This event is used in order to validate/debug a start address of freed VA,
number of currently outstanding and maximum allowed areas.

Link: https://lkml.kernel.org/r/20221018181053.434508-4-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/vmalloc.h | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/include/trace/events/vmalloc.h b/include/trace/events/vmalloc.h
index afeb8003a0f25..ad4e02191f356 100644
--- a/include/trace/events/vmalloc.h
+++ b/include/trace/events/vmalloc.h
@@ -83,6 +83,40 @@ TRACE_EVENT(purge_vmap_area_lazy,
 		__entry->start, __entry->end, __entry->npurged)
 );
 
+/**
+ * free_vmap_area_noflush - called when a vmap area is freed
+ * @va_start:		a start address of VA
+ * @nr_lazy:		number of current lazy pages
+ * @nr_lazy_max:	number of maximum lazy pages
+ *
+ * This event is used for a debug purpose. It gives some
+ * indication about a VA that is released, number of current
+ * outstanding areas and a maximum allowed threshold before
+ * dropping all of them.
+ */
+TRACE_EVENT(free_vmap_area_noflush,
+
+	TP_PROTO(unsigned long va_start, unsigned long nr_lazy,
+		unsigned long nr_lazy_max),
+
+	TP_ARGS(va_start, nr_lazy, nr_lazy_max),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, va_start)
+		__field(unsigned long, nr_lazy)
+		__field(unsigned long, nr_lazy_max)
+	),
+
+	TP_fast_assign(
+		__entry->va_start = va_start;
+		__entry->nr_lazy = nr_lazy;
+		__entry->nr_lazy_max = nr_lazy_max;
+	),
+
+	TP_printk("va_start=0x%lx nr_lazy=%lu nr_lazy_max=%lu",
+		__entry->va_start, __entry->nr_lazy, __entry->nr_lazy_max)
+);
+
 #endif /*  _TRACE_VMALLOC_H */
 
 /* This part must be outside protection */
-- 
GitLab


From cf243da6ab3987b65b95357194926a31415095b8 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:50 +0200
Subject: [PATCH 1081/4122] mm: vmalloc: use trace_alloc_vmap_area event

This is for debug purpose and is called when an allocation attempt occurs.
This event gives some information about:

- start address of allocated area;
- size that is requested;
- alignment that is required;
- vstart/vend restriction;
- if an allocation fails.

Link: https://lkml.kernel.org/r/20221018181053.434508-5-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmalloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index ccaa461998f3c..849563d334fba 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -43,6 +43,9 @@
 #include <asm/tlbflush.h>
 #include <asm/shmparam.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/vmalloc.h>
+
 #include "internal.h"
 #include "pgalloc-track.h"
 
@@ -1620,6 +1623,8 @@ retry:
 		size, align, vstart, vend);
 	spin_unlock(&free_vmap_area_lock);
 
+	trace_alloc_vmap_area(addr, size, align, vstart, vend, addr == vend);
+
 	/*
 	 * If an allocation fails, the "vend" address is
 	 * returned. Therefore trigger the overflow path.
-- 
GitLab


From 6030fd5fd1f7baaac3661a5301cc7838d4e3b7f6 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:51 +0200
Subject: [PATCH 1082/4122] mm: vmalloc: use trace_purge_vmap_area_lazy event

This is for debug purposes and is called when all outstanding areas are
removed back to the vmap space.  It gives some extra information about:

- a start:end range where set of vmap ares were freed;
- a number of purged areas which were backed off.

[urezki@gmail.com: simplify return boolean expression]
  Link: https://lkml.kernel.org/r/20221020125247.5053-1-urezki@gmail.com
Link: https://lkml.kernel.org/r/20221018181053.434508-6-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmalloc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 849563d334fba..1b1205ade1cff 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1730,6 +1730,7 @@ static void purge_fragmented_blocks_allcpus(void);
 static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 {
 	unsigned long resched_threshold;
+	unsigned int num_purged_areas = 0;
 	struct list_head local_purge_list;
 	struct vmap_area *va, *n_va;
 
@@ -1741,7 +1742,7 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 	spin_unlock(&purge_vmap_area_lock);
 
 	if (unlikely(list_empty(&local_purge_list)))
-		return false;
+		goto out;
 
 	start = min(start,
 		list_first_entry(&local_purge_list,
@@ -1776,12 +1777,16 @@ static bool __purge_vmap_area_lazy(unsigned long start, unsigned long end)
 					      va->va_start, va->va_end);
 
 		atomic_long_sub(nr, &vmap_lazy_nr);
+		num_purged_areas++;
 
 		if (atomic_long_read(&vmap_lazy_nr) < resched_threshold)
 			cond_resched_lock(&free_vmap_area_lock);
 	}
 	spin_unlock(&free_vmap_area_lock);
-	return true;
+
+out:
+	trace_purge_vmap_area_lazy(start, end, num_purged_areas);
+	return num_purged_areas > 0;
 }
 
 /*
-- 
GitLab


From 8c4196fe810a6717a8f9e528083911703f6a5a51 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:52 +0200
Subject: [PATCH 1083/4122] mm: vmalloc: use trace_free_vmap_area_noflush event

It is for debug purposes and is called when a vmap area gets freed.  This
event gives some indication about:

- a start address of released area;
- a current number of outstanding pages;
- a maximum number of allowed outstanding pages.

Link: https://lkml.kernel.org/r/20221018181053.434508-7-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmalloc.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1b1205ade1cff..ca71de7c9d775 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1821,6 +1821,8 @@ static void drain_vmap_area_work(struct work_struct *work)
  */
 static void free_vmap_area_noflush(struct vmap_area *va)
 {
+	unsigned long nr_lazy_max = lazy_max_pages();
+	unsigned long va_start = va->va_start;
 	unsigned long nr_lazy;
 
 	spin_lock(&vmap_area_lock);
@@ -1838,8 +1840,10 @@ static void free_vmap_area_noflush(struct vmap_area *va)
 		&purge_vmap_area_root, &purge_vmap_area_list);
 	spin_unlock(&purge_vmap_area_lock);
 
+	trace_free_vmap_area_noflush(va_start, nr_lazy, nr_lazy_max);
+
 	/* After this point, we may free va at any time */
-	if (unlikely(nr_lazy > lazy_max_pages()))
+	if (unlikely(nr_lazy > nr_lazy_max))
 		schedule_work(&drain_vmap_work);
 }
 
-- 
GitLab


From 65f199b2b40d82e48c79af2f4b5e9fafb290b231 Mon Sep 17 00:00:00 2001
From: "Uladzislau Rezki (Sony)" <urezki@gmail.com>
Date: Tue, 18 Oct 2022 20:10:53 +0200
Subject: [PATCH 1084/4122] vmalloc: add reviewers for vmalloc code

Add myself and Christoph Hellwig as reviewers for vmalloc.

[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/20221018181053.434508-8-urezki@gmail.com
Signed-off-by: Uladzislau Rezki (Sony) <urezki@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oleksiy Avramchenko <oleksiy.avramchenko@sony.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 MAINTAINERS | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 046ff06ff97fa..631bd2e8c5c4a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13294,10 +13294,20 @@ F:	include/linux/memory_hotplug.h
 F:	include/linux/mm.h
 F:	include/linux/mmzone.h
 F:	include/linux/pagewalk.h
-F:	include/linux/vmalloc.h
 F:	mm/
 F:	tools/testing/selftests/vm/
 
+VMALLOC
+M:	Andrew Morton <akpm@linux-foundation.org>
+R:	Uladzislau Rezki <urezki@gmail.com>
+R:	Christoph Hellwig <hch@infradead.org>
+L:	linux-mm@kvack.org
+S:	Maintained
+W:	http://www.linux-mm.org
+T:	git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+F:	include/linux/vmalloc.h
+F:	mm/vmalloc.c
+
 MEMORY HOT(UN)PLUG
 M:	David Hildenbrand <david@redhat.com>
 M:	Oscar Salvador <osalvador@suse.de>
-- 
GitLab


From e025ab842ec35225b1a8e163d1f311beb9e38ce9 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Tue, 18 Oct 2022 15:40:14 +0800
Subject: [PATCH 1085/4122] mm: remove kern_addr_valid() completely

Most architectures (except arm64/x86/sparc) simply return 1 for
kern_addr_valid(), which is only used in read_kcore(), and it calls
copy_from_kernel_nofault() which could check whether the address is a
valid kernel address.  So as there is no need for kern_addr_valid(), let's
remove it.

Link: https://lkml.kernel.org/r/20221018074014.185687-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>	[m68k]
Acked-by: Heiko Carstens <hca@linux.ibm.com>		[s390]
Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Helge Deller <deller@gmx.de>			[parisc]
Acked-by: Michael Ellerman <mpe@ellerman.id.au>		[powerpc]
Acked-by: Guo Ren <guoren@kernel.org>			[csky]
Acked-by: Catalin Marinas <catalin.marinas@arm.com>	[arm64]
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: <aou@eecs.berkeley.edu>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Chris Zankel <chris@zankel.net>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Bottomley <James.Bottomley@HansenPartnership.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Jonas Bonn <jonas@southpole.se>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Max Filippov <jcmvbkbc@gmail.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Palmer Dabbelt <palmer@rivosinc.com>
Cc: Paul Walmsley <paul.walmsley@sifive.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Rich Felker <dalias@libc.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: Stafford Horne <shorne@gmail.com>
Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vineet Gupta <vgupta@kernel.org>
Cc: Will Deacon <will@kernel.org>
Cc: Xuerui Wang <kernel@xen0n.name>
Cc: Yoshinori Sato <ysato@users.osdn.me>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/alpha/include/asm/pgtable.h          |  2 -
 arch/arc/include/asm/pgtable-bits-arcv2.h |  2 -
 arch/arm/include/asm/pgtable-nommu.h      |  2 -
 arch/arm/include/asm/pgtable.h            |  4 --
 arch/arm64/include/asm/pgtable.h          |  2 -
 arch/arm64/mm/mmu.c                       | 47 -----------------------
 arch/arm64/mm/pageattr.c                  |  3 +-
 arch/csky/include/asm/pgtable.h           |  3 --
 arch/hexagon/include/asm/page.h           |  7 ----
 arch/ia64/include/asm/pgtable.h           | 16 --------
 arch/loongarch/include/asm/pgtable.h      |  2 -
 arch/m68k/include/asm/pgtable_mm.h        |  2 -
 arch/m68k/include/asm/pgtable_no.h        |  1 -
 arch/microblaze/include/asm/pgtable.h     |  3 --
 arch/mips/include/asm/pgtable.h           |  2 -
 arch/nios2/include/asm/pgtable.h          |  2 -
 arch/openrisc/include/asm/pgtable.h       |  2 -
 arch/parisc/include/asm/pgtable.h         | 15 --------
 arch/powerpc/include/asm/pgtable.h        |  7 ----
 arch/riscv/include/asm/pgtable.h          |  2 -
 arch/s390/include/asm/pgtable.h           |  2 -
 arch/sh/include/asm/pgtable.h             |  2 -
 arch/sparc/include/asm/pgtable_32.h       |  6 ---
 arch/sparc/mm/init_32.c                   |  3 +-
 arch/sparc/mm/init_64.c                   |  1 -
 arch/um/include/asm/pgtable.h             |  2 -
 arch/x86/include/asm/pgtable_32.h         |  9 -----
 arch/x86/include/asm/pgtable_64.h         |  1 -
 arch/x86/mm/init_64.c                     | 41 --------------------
 arch/xtensa/include/asm/pgtable.h         |  2 -
 fs/proc/kcore.c                           | 26 +++++--------
 31 files changed, 11 insertions(+), 210 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 3ea9661c09ffc..9e45f6735d5d2 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -313,8 +313,6 @@ extern inline pte_t mk_swap_pte(unsigned long type, unsigned long offset)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)	(1)
-
 #define pte_ERROR(e) \
 	printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
 #define pmd_ERROR(e) \
diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h b/arch/arc/include/asm/pgtable-bits-arcv2.h
index b23be557403e3..515e82db519fe 100644
--- a/arch/arc/include/asm/pgtable-bits-arcv2.h
+++ b/arch/arc/include/asm/pgtable-bits-arcv2.h
@@ -120,8 +120,6 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)	(1)
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #include <asm/hugepage.h>
 #endif
diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h
index d16aba48fa0a4..25d8c7bb07e0c 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -21,8 +21,6 @@
 #define pgd_none(pgd)		(0)
 #define pgd_bad(pgd)		(0)
 #define pgd_clear(pgdp)
-#define kern_addr_valid(addr)	(1)
-/* FIXME */
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
  * PGDIR_SHIFT determines what a third-level page table entry can map
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 78a532068fec2..00954ab1a0393 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -298,10 +298,6 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
  */
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
 
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-/* FIXME: this is not correct */
-#define kern_addr_valid(addr)	(1)
-
 /*
  * We provide our own arch_get_unmapped_area to cope with VIPT caches.
  */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 71a1af42f0e89..4873c1d6e7d0d 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1021,8 +1021,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
  */
 #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > __SWP_TYPE_BITS)
 
-extern int kern_addr_valid(unsigned long addr);
-
 #ifdef CONFIG_ARM64_MTE
 
 #define __HAVE_ARCH_PREPARE_TO_SWAP
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 9a7c389651540..556154d821bf4 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -814,53 +814,6 @@ void __init paging_init(void)
 	create_idmap();
 }
 
-/*
- * Check whether a kernel address is valid (derived from arch/x86/).
- */
-int kern_addr_valid(unsigned long addr)
-{
-	pgd_t *pgdp;
-	p4d_t *p4dp;
-	pud_t *pudp, pud;
-	pmd_t *pmdp, pmd;
-	pte_t *ptep, pte;
-
-	addr = arch_kasan_reset_tag(addr);
-	if ((((long)addr) >> VA_BITS) != -1UL)
-		return 0;
-
-	pgdp = pgd_offset_k(addr);
-	if (pgd_none(READ_ONCE(*pgdp)))
-		return 0;
-
-	p4dp = p4d_offset(pgdp, addr);
-	if (p4d_none(READ_ONCE(*p4dp)))
-		return 0;
-
-	pudp = pud_offset(p4dp, addr);
-	pud = READ_ONCE(*pudp);
-	if (pud_none(pud))
-		return 0;
-
-	if (pud_sect(pud))
-		return pfn_valid(pud_pfn(pud));
-
-	pmdp = pmd_offset(pudp, addr);
-	pmd = READ_ONCE(*pmdp);
-	if (pmd_none(pmd))
-		return 0;
-
-	if (pmd_sect(pmd))
-		return pfn_valid(pmd_pfn(pmd));
-
-	ptep = pte_offset_kernel(pmdp, addr);
-	pte = READ_ONCE(*ptep);
-	if (pte_none(pte))
-		return 0;
-
-	return pfn_valid(pte_pfn(pte));
-}
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 static void free_hotplug_page_range(struct page *page, size_t size,
 				    struct vmem_altmap *altmap)
diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c
index d107c3d434e22..0a741a910a6ab 100644
--- a/arch/arm64/mm/pageattr.c
+++ b/arch/arm64/mm/pageattr.c
@@ -201,8 +201,7 @@ void __kernel_map_pages(struct page *page, int numpages, int enable)
 
 /*
  * This function is used to determine if a linear map page has been marked as
- * not-valid. Walk the page table and check the PTE_VALID bit. This is based
- * on kern_addr_valid(), which almost does what we need.
+ * not-valid. Walk the page table and check the PTE_VALID bit.
  *
  * Because this is only called on the kernel linear map,  p?d_sect() implies
  * p?d_present(). When debug_pagealloc is enabled, sections mappings are
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index c3d9b92cbe61c..77bc6caff2d23 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -249,9 +249,6 @@ extern void paging_init(void);
 void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
 		      pte_t *pte);
 
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-#define kern_addr_valid(addr)	(1)
-
 #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \
 	remap_pfn_range(vma, vaddr, pfn, size, prot)
 
diff --git a/arch/hexagon/include/asm/page.h b/arch/hexagon/include/asm/page.h
index 7cbf719c578ec..d7d4f9fca3279 100644
--- a/arch/hexagon/include/asm/page.h
+++ b/arch/hexagon/include/asm/page.h
@@ -131,13 +131,6 @@ static inline void clear_page(void *page)
 
 #define page_to_virt(page)	__va(page_to_phys(page))
 
-/*
- * For port to Hexagon Virtual Machine, MAYBE we check for attempts
- * to reference reserved HVM space, but in any case, the VM will be
- * protected.
- */
-#define kern_addr_valid(addr)   (1)
-
 #include <asm/mem-layout.h>
 #include <asm-generic/memory_model.h>
 /* XXX Todo: implement assembly-optimized version of getorder. */
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 6925e28ae61d1..01517a5e67789 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -181,22 +181,6 @@ ia64_phys_addr_valid (unsigned long addr)
 	return (addr & (local_cpu_data->unimpl_pa_mask)) == 0;
 }
 
-/*
- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
- * memory.  For the return value to be meaningful, ADDR must be >=
- * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
- * require a hash-, or multi-level tree-lookup or something of that
- * sort) but it guarantees to return TRUE only if accessing the page
- * at that address does not cause an error.  Note that there may be
- * addresses for which kern_addr_valid() returns FALSE even though an
- * access would not cause an error (e.g., this is typically true for
- * memory mapped I/O regions.
- *
- * XXX Need to implement this for IA-64.
- */
-#define kern_addr_valid(addr)	(1)
-
-
 /*
  * Now come the defines and routines to manage and access the three-level
  * page table.
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 946704bee599e..fc70b7041b760 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -421,8 +421,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 	__update_tlb(vma, address, (pte_t *)pmdp);
 }
 
-#define kern_addr_valid(addr)	(1)
-
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
 	return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
diff --git a/arch/m68k/include/asm/pgtable_mm.h b/arch/m68k/include/asm/pgtable_mm.h
index 9b4e2fe2ac821..b93c41fe20678 100644
--- a/arch/m68k/include/asm/pgtable_mm.h
+++ b/arch/m68k/include/asm/pgtable_mm.h
@@ -145,8 +145,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 
 #endif /* !__ASSEMBLY__ */
 
-#define kern_addr_valid(addr)	(1)
-
 /* MMU-specific headers */
 
 #ifdef CONFIG_SUN3
diff --git a/arch/m68k/include/asm/pgtable_no.h b/arch/m68k/include/asm/pgtable_no.h
index bce5ca56c3883..fed58da3a6b65 100644
--- a/arch/m68k/include/asm/pgtable_no.h
+++ b/arch/m68k/include/asm/pgtable_no.h
@@ -20,7 +20,6 @@
 #define pgd_none(pgd)		(0)
 #define pgd_bad(pgd)		(0)
 #define pgd_clear(pgdp)
-#define kern_addr_valid(addr)	(1)
 #define	pmd_offset(a, b)	((void *)0)
 
 #define PAGE_NONE	__pgprot(0)
diff --git a/arch/microblaze/include/asm/pgtable.h b/arch/microblaze/include/asm/pgtable.h
index ba348e997dbb4..42f5988e998b8 100644
--- a/arch/microblaze/include/asm/pgtable.h
+++ b/arch/microblaze/include/asm/pgtable.h
@@ -416,9 +416,6 @@ extern unsigned long iopa(unsigned long addr);
 #define	IOMAP_NOCACHE_NONSER	2
 #define	IOMAP_NO_COPYBACK	3
 
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-#define kern_addr_valid(addr)	(1)
-
 void do_page_fault(struct pt_regs *regs, unsigned long address,
 		   unsigned long error_code);
 
diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h
index 6caec386ad2f6..364a06033105c 100644
--- a/arch/mips/include/asm/pgtable.h
+++ b/arch/mips/include/asm/pgtable.h
@@ -550,8 +550,6 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 	__update_tlb(vma, address, pte);
 }
 
-#define kern_addr_valid(addr)	(1)
-
 /*
  * Allow physical addresses to be fixed up to help 36-bit peripherals.
  */
diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h
index b3d45e815295f..ab793bc517f5c 100644
--- a/arch/nios2/include/asm/pgtable.h
+++ b/arch/nios2/include/asm/pgtable.h
@@ -249,8 +249,6 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
 #define __swp_entry_to_pte(swp)	((pte_t) { (swp).val })
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 
-#define kern_addr_valid(addr)		(1)
-
 extern void __init paging_init(void);
 extern void __init mmu_init(void);
 
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index dcae8aea132fd..6477c17b3062d 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -395,8 +395,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 #define __pte_to_swp_entry(pte)		((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)           (1)
-
 typedef pte_t *pte_addr_t;
 
 #endif /* __ASSEMBLY__ */
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index ecd0288544698..bd09a44cfb2d4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -23,21 +23,6 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-/*
- * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
- * memory.  For the return value to be meaningful, ADDR must be >=
- * PAGE_OFFSET.  This operation can be relatively expensive (e.g.,
- * require a hash-, or multi-level tree-lookup or something of that
- * sort) but it guarantees to return TRUE only if accessing the page
- * at that address does not cause an error.  Note that there may be
- * addresses for which kern_addr_valid() returns FALSE even though an
- * access would not cause an error (e.g., this is typically true for
- * memory mapped I/O regions.
- *
- * XXX Need to implement this for parisc.
- */
-#define kern_addr_valid(addr)	(1)
-
 /* This is for the serialization of PxTLB broadcasts. At least on the N class
  * systems, only one PxTLB inter processor broadcast can be active at any one
  * time on the Merced bus. */
diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h
index 283f40d05a4d7..9972626ddaf68 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -81,13 +81,6 @@ void poking_init(void);
 extern unsigned long ioremap_bot;
 extern const pgprot_t protection_map[16];
 
-/*
- * kern_addr_valid is intended to indicate whether an address is a valid
- * kernel address.  Most 32-bit archs define it as always true (like this)
- * but most 64-bit archs actually perform a test.  What should we do here?
- */
-#define kern_addr_valid(addr)	(1)
-
 #ifndef CONFIG_TRANSPARENT_HUGEPAGE
 #define pmd_large(pmd)		0
 #endif
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index 7ec936910a96e..c7993bdf749f1 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -801,8 +801,6 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 
 #endif /* !CONFIG_MMU */
 
-#define kern_addr_valid(addr)   (1) /* FIXME */
-
 extern char _start[];
 extern void *_dtb_early_va;
 extern uintptr_t _dtb_early_pa;
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index f1cb9391190db..e1db072118186 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -1773,8 +1773,6 @@ static inline swp_entry_t __swp_entry(unsigned long type, unsigned long offset)
 #define __pte_to_swp_entry(pte)	((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)	((pte_t) { (x).val })
 
-#define kern_addr_valid(addr)   (1)
-
 extern int vmem_add_mapping(unsigned long start, unsigned long size);
 extern void vmem_remove_mapping(unsigned long start, unsigned long size);
 extern int __vmem_map_4k_page(unsigned long addr, unsigned long phys, pgprot_t prot, bool alloc);
diff --git a/arch/sh/include/asm/pgtable.h b/arch/sh/include/asm/pgtable.h
index 6fb9ec54cf9b4..3ce30becf6dfa 100644
--- a/arch/sh/include/asm/pgtable.h
+++ b/arch/sh/include/asm/pgtable.h
@@ -92,8 +92,6 @@ static inline unsigned long phys_addr_mask(void)
 
 typedef pte_t *pte_addr_t;
 
-#define kern_addr_valid(addr)	(1)
-
 #define pte_pfn(x)		((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
 
 struct vm_area_struct;
diff --git a/arch/sparc/include/asm/pgtable_32.h b/arch/sparc/include/asm/pgtable_32.h
index 8ff549004fac4..5acc05b572e65 100644
--- a/arch/sparc/include/asm/pgtable_32.h
+++ b/arch/sparc/include/asm/pgtable_32.h
@@ -368,12 +368,6 @@ __get_iospace (unsigned long addr)
 	}
 }
 
-extern unsigned long *sparc_valid_addr_bitmap;
-
-/* Needs to be defined here and not in linux/mm.h, as it is arch dependent */
-#define kern_addr_valid(addr) \
-	(test_bit(__pa((unsigned long)(addr))>>20, sparc_valid_addr_bitmap))
-
 /*
  * For sparc32&64, the pfn in io_remap_pfn_range() carries <iospace> in
  * its high 4 bits.  These macros/functions put it there or get it from there.
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index d88e774c8eb49..9c0ea457bdf05 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -37,8 +37,7 @@
 
 #include "mm_32.h"
 
-unsigned long *sparc_valid_addr_bitmap;
-EXPORT_SYMBOL(sparc_valid_addr_bitmap);
+static unsigned long *sparc_valid_addr_bitmap;
 
 unsigned long phys_base;
 EXPORT_SYMBOL(phys_base);
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index d6faee23c77dd..04f9db0c31117 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1667,7 +1667,6 @@ bool kern_addr_valid(unsigned long addr)
 
 	return pfn_valid(pte_pfn(*pte));
 }
-EXPORT_SYMBOL(kern_addr_valid);
 
 static unsigned long __ref kernel_map_hugepud(unsigned long vstart,
 					      unsigned long vend,
diff --git a/arch/um/include/asm/pgtable.h b/arch/um/include/asm/pgtable.h
index 66bc3f99d9bef..4e3052f2671a0 100644
--- a/arch/um/include/asm/pgtable.h
+++ b/arch/um/include/asm/pgtable.h
@@ -298,8 +298,6 @@ extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
 	((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
 #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
 
-#define kern_addr_valid(addr) (1)
-
 /* Clear a kernel PTE and flush it from the TLB */
 #define kpte_clear_flush(ptep, vaddr)		\
 do {						\
diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h
index 7c9c968a42efe..7d4ad8907297c 100644
--- a/arch/x86/include/asm/pgtable_32.h
+++ b/arch/x86/include/asm/pgtable_32.h
@@ -47,15 +47,6 @@ do {						\
 
 #endif /* !__ASSEMBLY__ */
 
-/*
- * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM
- */
-#ifdef CONFIG_FLATMEM
-#define kern_addr_valid(addr)	(1)
-#else
-#define kern_addr_valid(kaddr)	(0)
-#endif
-
 /*
  * This is used to calculate the .brk reservation for initial pagetables.
  * Enough space is reserved to allocate pagetables sufficient to cover all
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index e479491da8d51..7929327abe009 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -240,7 +240,6 @@ static inline void native_pgd_clear(pgd_t *pgd)
 #define __swp_entry_to_pte(x)		((pte_t) { .pte = (x).val })
 #define __swp_entry_to_pmd(x)		((pmd_t) { .pmd = (x).val })
 
-extern int kern_addr_valid(unsigned long addr);
 extern void cleanup_highmap(void);
 
 #define HAVE_ARCH_UNMAPPED_AREA
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 3f040c6e5d13a..e8db4edd7cc9b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1416,47 +1416,6 @@ void mark_rodata_ro(void)
 	debug_checkwx();
 }
 
-int kern_addr_valid(unsigned long addr)
-{
-	unsigned long above = ((long)addr) >> __VIRTUAL_MASK_SHIFT;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	if (above != 0 && above != -1UL)
-		return 0;
-
-	pgd = pgd_offset_k(addr);
-	if (pgd_none(*pgd))
-		return 0;
-
-	p4d = p4d_offset(pgd, addr);
-	if (!p4d_present(*p4d))
-		return 0;
-
-	pud = pud_offset(p4d, addr);
-	if (!pud_present(*pud))
-		return 0;
-
-	if (pud_large(*pud))
-		return pfn_valid(pud_pfn(*pud));
-
-	pmd = pmd_offset(pud, addr);
-	if (!pmd_present(*pmd))
-		return 0;
-
-	if (pmd_large(*pmd))
-		return pfn_valid(pmd_pfn(*pmd));
-
-	pte = pte_offset_kernel(pmd, addr);
-	if (pte_none(*pte))
-		return 0;
-
-	return pfn_valid(pte_pfn(*pte));
-}
-
 /*
  * Block size is the minimum amount of memory which can be hotplugged or
  * hotremoved. It must be power of two and must be equal or larger than
diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h
index 54f577c13afa1..5b5484d707b2e 100644
--- a/arch/xtensa/include/asm/pgtable.h
+++ b/arch/xtensa/include/asm/pgtable.h
@@ -386,8 +386,6 @@ ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 
 #else
 
-#define kern_addr_valid(addr)	(1)
-
 extern  void update_mmu_cache(struct vm_area_struct * vma,
 			      unsigned long address, pte_t *ptep);
 
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 98f3289556e4a..71157ee35c1a2 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -540,25 +540,17 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos)
 			fallthrough;
 		case KCORE_VMEMMAP:
 		case KCORE_TEXT:
-			if (kern_addr_valid(start)) {
-				/*
-				 * Using bounce buffer to bypass the
-				 * hardened user copy kernel text checks.
-				 */
-				if (copy_from_kernel_nofault(buf, (void *)start,
-						tsz)) {
-					if (clear_user(buffer, tsz)) {
-						ret = -EFAULT;
-						goto out;
-					}
-				} else {
-					if (copy_to_user(buffer, buf, tsz)) {
-						ret = -EFAULT;
-						goto out;
-					}
+			/*
+			 * Using bounce buffer to bypass the
+			 * hardened user copy kernel text checks.
+			 */
+			if (copy_from_kernel_nofault(buf, (void *)start, tsz)) {
+				if (clear_user(buffer, tsz)) {
+					ret = -EFAULT;
+					goto out;
 				}
 			} else {
-				if (clear_user(buffer, tsz)) {
+				if (copy_to_user(buffer, buf, tsz)) {
 					ret = -EFAULT;
 					goto out;
 				}
-- 
GitLab


From 9ee2c086271639d82ad8f6e96b91fa7991800c0a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 19 Oct 2022 19:33:29 +0100
Subject: [PATCH 1086/4122] mm/huge_memory: convert split_huge_pages_in_file()
 to use a folio

Patch series "Remove FGP_HEAD flag".

We have just two users left of the FGP_HEAD flag and both of them are
better off; sometimes startlingly so as a result of conversion to use
folios.


This patch (of 4):

Removes a number of calls to compound_head() and a call to
pagecache_get_page().

Link: https://lkml.kernel.org/r/20221019183332.2802139-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20221019183332.2802139-2-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 561a42567477d..bc5dda3d2ad74 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3066,28 +3066,28 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 	mapping = candidate->f_mapping;
 
 	for (index = off_start; index < off_end; index += nr_pages) {
-		struct page *fpage = pagecache_get_page(mapping, index,
-						FGP_ENTRY | FGP_HEAD, 0);
+		struct folio *folio = __filemap_get_folio(mapping, index,
+						FGP_ENTRY, 0);
 
 		nr_pages = 1;
-		if (xa_is_value(fpage) || !fpage)
+		if (xa_is_value(folio) || !folio)
 			continue;
 
-		if (!is_transparent_hugepage(fpage))
+		if (!folio_test_large(folio))
 			goto next;
 
 		total++;
-		nr_pages = thp_nr_pages(fpage);
+		nr_pages = folio_nr_pages(folio);
 
-		if (!trylock_page(fpage))
+		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_huge_page(fpage))
+		if (!split_folio(folio))
 			split++;
 
-		unlock_page(fpage);
+		folio_unlock(folio);
 next:
-		put_page(fpage);
+		folio_put(folio);
 		cond_resched();
 	}
 
-- 
GitLab


From dd8095b15a6034165bc48da1eb6d0acc73c1558a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 19 Oct 2022 19:33:30 +0100
Subject: [PATCH 1087/4122] mm/swap: convert find_get_incore_page to use folios

Eliminates a use of FGP_HEAD and saves 35 bytes of text.

Link: https://lkml.kernel.org/r/20221019183332.2802139-3-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/swap_state.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mm/swap_state.c b/mm/swap_state.c
index 438d0676c5be2..44e3530520e86 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -386,17 +386,14 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
 {
 	swp_entry_t swp;
 	struct swap_info_struct *si;
-	struct page *page = pagecache_get_page(mapping, index,
-						FGP_ENTRY | FGP_HEAD, 0);
+	struct folio *folio = __filemap_get_folio(mapping, index, FGP_ENTRY, 0);
 
-	if (!page)
-		return page;
-	if (!xa_is_value(page))
-		return find_subpage(page, index);
+	if (!xa_is_value(folio))
+		goto out;
 	if (!shmem_mapping(mapping))
 		return NULL;
 
-	swp = radix_to_swp_entry(page);
+	swp = radix_to_swp_entry(folio);
 	/* There might be swapin error entries in shmem mapping. */
 	if (non_swap_entry(swp))
 		return NULL;
@@ -404,9 +401,13 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
 	si = get_swap_device(swp);
 	if (!si)
 		return NULL;
-	page = find_get_page(swap_address_space(swp), swp_offset(swp));
+	index = swp_offset(swp);
+	folio = filemap_get_folio(swap_address_space(swp), index);
 	put_swap_device(si);
-	return page;
+out:
+	if (!folio)
+		return NULL;
+	return folio_file_page(folio, index);
 }
 
 struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-- 
GitLab


From 524984ff66ee4b63264dffe568c3547a20b4136c Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 19 Oct 2022 19:33:31 +0100
Subject: [PATCH 1088/4122] mm: convert find_get_incore_page() to
 filemap_get_incore_folio()

Return the containing folio instead of the precise page.  One of the
callers wants the folio and the other can do the folio->page conversion
itself.  Nets 442 bytes of text size reduction, 478 bytes removed and 36
bytes added.

Link: https://lkml.kernel.org/r/20221019183332.2802139-4-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memcontrol.c | 12 +++++++++---
 mm/mincore.c    | 10 +++++-----
 mm/swap.h       |  8 +++++---
 mm/swap_state.c | 15 +++++++--------
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f264a856ba862..fd707dcd6d042 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5648,15 +5648,21 @@ static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
 static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
 			unsigned long addr, pte_t ptent)
 {
+	unsigned long index;
+	struct folio *folio;
+
 	if (!vma->vm_file) /* anonymous vma */
 		return NULL;
 	if (!(mc.flags & MOVE_FILE))
 		return NULL;
 
-	/* page is moved even if it's not RSS of this task(page-faulted). */
+	/* folio is moved even if it's not RSS of this task(page-faulted). */
 	/* shmem/tmpfs may report page out on swap: account for that too. */
-	return find_get_incore_page(vma->vm_file->f_mapping,
-			linear_page_index(vma, addr));
+	index = linear_page_index(vma, addr);
+	folio = filemap_get_incore_folio(vma->vm_file->f_mapping, index);
+	if (!folio)
+		return NULL;
+	return folio_file_page(folio, index);
 }
 
 /**
diff --git a/mm/mincore.c b/mm/mincore.c
index e7e046fe17d78..a085a2aeabd8e 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -52,7 +52,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
 static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
 {
 	unsigned char present = 0;
-	struct page *page;
+	struct folio *folio;
 
 	/*
 	 * When tmpfs swaps out a page from a file, any process mapping that
@@ -60,10 +60,10 @@ static unsigned char mincore_page(struct address_space *mapping, pgoff_t index)
 	 * any other file mapping (ie. marked !present and faulted in with
 	 * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
 	 */
-	page = find_get_incore_page(mapping, index);
-	if (page) {
-		present = PageUptodate(page);
-		put_page(page);
+	folio = filemap_get_incore_folio(mapping, index);
+	if (folio) {
+		present = folio_test_uptodate(folio);
+		folio_put(folio);
 	}
 
 	return present;
diff --git a/mm/swap.h b/mm/swap.h
index cc08c459c6190..f78065c8ef524 100644
--- a/mm/swap.h
+++ b/mm/swap.h
@@ -41,7 +41,8 @@ void clear_shadow_from_swap_cache(int type, unsigned long begin,
 				  unsigned long end);
 struct folio *swap_cache_get_folio(swp_entry_t entry,
 		struct vm_area_struct *vma, unsigned long addr);
-struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index);
+struct folio *filemap_get_incore_folio(struct address_space *mapping,
+		pgoff_t index);
 
 struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
 				   struct vm_area_struct *vma,
@@ -105,9 +106,10 @@ static inline struct folio *swap_cache_get_folio(swp_entry_t entry,
 }
 
 static inline
-struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
+struct folio *filemap_get_incore_folio(struct address_space *mapping,
+		pgoff_t index)
 {
-	return find_get_page(mapping, index);
+	return filemap_get_folio(mapping, index);
 }
 
 static inline bool add_to_swap(struct folio *folio)
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 44e3530520e86..40fe6f23e1057 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -373,16 +373,17 @@ struct folio *swap_cache_get_folio(swp_entry_t entry,
 }
 
 /**
- * find_get_incore_page - Find and get a page from the page or swap caches.
+ * filemap_get_incore_folio - Find and get a folio from the page or swap caches.
  * @mapping: The address_space to search.
  * @index: The page cache index.
  *
- * This differs from find_get_page() in that it will also look for the
- * page in the swap cache.
+ * This differs from filemap_get_folio() in that it will also look for the
+ * folio in the swap cache.
  *
- * Return: The found page or %NULL.
+ * Return: The found folio or %NULL.
  */
-struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
+struct folio *filemap_get_incore_folio(struct address_space *mapping,
+		pgoff_t index)
 {
 	swp_entry_t swp;
 	struct swap_info_struct *si;
@@ -405,9 +406,7 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
 	folio = filemap_get_folio(swap_address_space(swp), index);
 	put_swap_device(si);
 out:
-	if (!folio)
-		return NULL;
-	return folio_file_page(folio, index);
+	return folio;
 }
 
 struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
-- 
GitLab


From c5255b421fd04ba6a405809b7216a3b6ebd5493a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Wed, 19 Oct 2022 19:33:32 +0100
Subject: [PATCH 1089/4122] mm: remove FGP_HEAD

This is no longer used; all callers have been converted to use folios
instead.  Somehow this manages to save 11 bytes of text.

Link: https://lkml.kernel.org/r/20221019183332.2802139-5-willy@infradead.org
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 5 ++---
 mm/folio-compat.c       | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 060ee98474ef5..b33ab86d5dca4 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -504,9 +504,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_NOFS		0x00000010
 #define FGP_NOWAIT		0x00000020
 #define FGP_FOR_MMAP		0x00000040
-#define FGP_HEAD		0x00000080
-#define FGP_ENTRY		0x00000100
-#define FGP_STABLE		0x00000200
+#define FGP_ENTRY		0x00000080
+#define FGP_STABLE		0x00000100
 
 struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index,
 		int fgp_flags, gfp_t gfp);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 8ae39c06da62a..bac2a366aada0 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -108,7 +108,7 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
 	struct folio *folio;
 
 	folio = __filemap_get_folio(mapping, index, fgp_flags, gfp);
-	if ((fgp_flags & FGP_HEAD) || !folio || xa_is_value(folio))
+	if (!folio || xa_is_value(folio))
 		return &folio->page;
 	return folio_file_page(folio, index);
 }
-- 
GitLab


From 93d38b72e4c1a518f46fd6dfcb5ad7c5003fa372 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Oct 2022 11:49:41 +0800
Subject: [PATCH 1090/4122] nios2: remove unused INIT_MMAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm: cleanup with VM_ACCESS_FLAGS".


This patch (of 5):

It seems that INIT_MMAP is gone in 2.4.10, not sure, anyways, it is
useless now, kill it.

Link: https://lkml.kernel.org/r/20221019034945.93081-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20221019034945.93081-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@gmail.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/nios2/include/asm/processor.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/nios2/include/asm/processor.h b/arch/nios2/include/asm/processor.h
index 8916d93d5c2d0..eb44130364a9a 100644
--- a/arch/nios2/include/asm/processor.h
+++ b/arch/nios2/include/asm/processor.h
@@ -50,9 +50,6 @@ struct thread_struct {
 	unsigned long kpsr;
 };
 
-#define INIT_MMAP \
-	{ &init_mm, (0), (0), __pgprot(0x0), VM_READ | VM_WRITE | VM_EXEC }
-
 # define INIT_THREAD {			\
 	.kregs	= NULL,			\
 	.ksp	= 0,			\
-- 
GitLab


From 4f20566f5c0f42c451f6a43be9bfa6f0b3d142df Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Oct 2022 11:49:42 +0800
Subject: [PATCH 1091/4122] x86/sgx: use VM_ACCESS_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS.

Link: https://lkml.kernel.org/r/20221019034945.93081-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: David Airlie <airlied@gmail.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kernel/cpu/sgx/encl.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c
index 1ec20807de1e8..6225c525372d1 100644
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -268,7 +268,7 @@ static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
 						       unsigned long addr,
 						       unsigned long vm_flags)
 {
-	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+	unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
 	struct sgx_encl_page *entry;
 
 	entry = xa_load(&encl->page_array, PFN_DOWN(addr));
@@ -502,7 +502,7 @@ static void sgx_vma_open(struct vm_area_struct *vma)
 int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
 		     unsigned long end, unsigned long vm_flags)
 {
-	unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+	unsigned long vm_prot_bits = vm_flags & VM_ACCESS_FLAGS;
 	struct sgx_encl_page *page;
 	unsigned long count = 0;
 	int ret = 0;
-- 
GitLab


From e39ee675f42e993bbf1c04b1ad7526db820ccdce Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Oct 2022 11:49:43 +0800
Subject: [PATCH 1092/4122] mm: mprotect: use VM_ACCESS_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS.

Link: https://lkml.kernel.org/r/20221019034945.93081-4-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Airlie <airlied@gmail.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mprotect.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 668bfaa6ed2ae..99762403cc8ff 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -756,8 +756,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
 		 * If a permission is not passed to mprotect(), it must be
 		 * cleared from the VMA.
 		 */
-		mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC |
-					VM_FLAGS_CLEAR;
+		mask_off_old_flags = VM_ACCESS_FLAGS | VM_FLAGS_CLEAR;
 
 		new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey);
 		newflags = calc_vm_prot_bits(prot, new_vma_pkey);
-- 
GitLab


From d7e679b6f9d9f7337f0fdc5011f2ecc9b16f821b Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Oct 2022 11:49:44 +0800
Subject: [PATCH 1093/4122] mm: debug_vm_pgtable: use VM_ACCESS_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Directly use VM_ACCESS_FLAGS instead VMFLAGS.

Link: https://lkml.kernel.org/r/20221019034945.93081-5-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Airlie <airlied@gmail.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/debug_vm_pgtable.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index dc7df1254f0a0..2b61fde8c38cc 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -38,11 +38,7 @@
  * Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
  * expectations that are being validated here. All future changes in here
  * or the documentation need to be in sync.
- */
-
-#define VMFLAGS	(VM_READ|VM_WRITE|VM_EXEC)
-
-/*
+ *
  * On s390 platform, the lower 4 bits are used to identify given page table
  * entry type. But these bits might affect the ability to clear entries with
  * pxx_clear() because of how dynamic page table folding works on s390. So
@@ -1125,7 +1121,7 @@ static int __init init_args(struct pgtable_debug_args *args)
 	 */
 	memset(args, 0, sizeof(*args));
 	args->vaddr              = get_random_vaddr();
-	args->page_prot          = vm_get_page_prot(VMFLAGS);
+	args->page_prot          = vm_get_page_prot(VM_ACCESS_FLAGS);
 	args->page_prot_none     = vm_get_page_prot(VM_NONE);
 	args->is_contiguous_page = false;
 	args->pud_pfn            = ULONG_MAX;
-- 
GitLab


From cc03817c0e8417419ede18a8e0749c5b9699b135 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Wed, 19 Oct 2022 11:49:45 +0800
Subject: [PATCH 1094/4122] amdgpu: use VM_ACCESS_FLAGS
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify VM_READ|VM_WRITE|VM_EXEC with VM_ACCESS_FLAGS.

Link: https://lkml.kernel.org/r/20221019034945.93081-6-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Alex Deucher <alexander.deucher@amd.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: "Pan, Xinhui" <Xinhui.Pan@amd.com>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8ef31d687ef3b..4728be1618284 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -255,7 +255,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
 	 * becoming writable and makes is_cow_mapping(vm_flags) false.
 	 */
 	if (is_cow_mapping(vma->vm_flags) &&
-	    !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+	    !(vma->vm_flags & VM_ACCESS_FLAGS))
 		vma->vm_flags &= ~VM_MAYWRITE;
 
 	return drm_gem_ttm_mmap(obj, vma);
-- 
GitLab


From 4781593d5dbae50500d1c7975be03b590ae2b92a Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 20 Oct 2022 15:38:32 -0400
Subject: [PATCH 1095/4122] mm/hugetlb: unify clearing of RestoreReserve for
 private pages

A trivial cleanup to move clearing of RestoreReserve into adding anon rmap
of private hugetlb mappings.  It matches with the shared mappings where we
only clear the bit when adding into page cache, rather than spreading it
around the code paths.

Link: https://lkml.kernel.org/r/20221020193832.776173-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 14 ++++----------
 mm/rmap.c    |  2 +-
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 0af18c1e4b311..d11e92117d4ac 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4775,7 +4775,6 @@ hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr
 	hugepage_add_new_anon_rmap(new_page, vma, addr);
 	set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
 	hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
-	ClearHPageRestoreReserve(new_page);
 	SetHPageMigratable(new_page);
 }
 
@@ -5438,8 +5437,6 @@ retry_avoidcopy:
 	spin_lock(ptl);
 	ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
 	if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
-		ClearHPageRestoreReserve(new_page);
-
 		/* Break COW or unshare */
 		huge_ptep_clear_flush(vma, haddr, ptep);
 		mmu_notifier_invalidate_range(mm, range.start, range.end);
@@ -5734,10 +5731,9 @@ static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
 	if (!pte_same(huge_ptep_get(ptep), old_pte))
 		goto backout;
 
-	if (anon_rmap) {
-		ClearHPageRestoreReserve(page);
+	if (anon_rmap)
 		hugepage_add_new_anon_rmap(page, vma, haddr);
-	} else
+	else
 		page_dup_file_rmap(page, true);
 	new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
 				&& (vma->vm_flags & VM_SHARED)));
@@ -6120,12 +6116,10 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
 	if (!huge_pte_none_mostly(huge_ptep_get(dst_pte)))
 		goto out_release_unlock;
 
-	if (page_in_pagecache) {
+	if (page_in_pagecache)
 		page_dup_file_rmap(page, true);
-	} else {
-		ClearHPageRestoreReserve(page);
+	else
 		hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
-	}
 
 	/*
 	 * For either: (1) CONTINUE on a non-shared VMA, or (2) UFFDIO_COPY
diff --git a/mm/rmap.c b/mm/rmap.c
index 9bba65b30e4d9..3b2d18bbdc445 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -2571,7 +2571,7 @@ void hugepage_add_new_anon_rmap(struct page *page,
 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
 	atomic_set(compound_mapcount_ptr(page), 0);
 	atomic_set(compound_pincount_ptr(page), 0);
-
+	ClearHPageRestoreReserve(page);
 	__page_set_anon_rmap(page, vma, address, 1);
 }
 #endif /* CONFIG_HUGETLB_PAGE */
-- 
GitLab


From 6e2be1f2ebcea42ed6044432f72f32434e60b34d Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 21 Oct 2022 13:59:52 +0200
Subject: [PATCH 1096/4122] compiler-gcc: be consistent with underscores use
 for `no_sanitize`

Patch series "compiler-gcc: be consistent with underscores use for
`no_sanitize`".


This patch (of 5):

Other macros that define shorthands for attributes in e.g.
`compiler_attributes.h` and elsewhere use underscores.

Link: https://lkml.kernel.org/r/20221021115956.9947-1-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Li <ashimida@linux.alibaba.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index f55a37efdb974..b9530d3515ac1 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -83,25 +83,25 @@
 #endif
 
 #if __has_attribute(__no_sanitize_address__)
-#define __no_sanitize_address __attribute__((no_sanitize_address))
+#define __no_sanitize_address __attribute__((__no_sanitize_address__))
 #else
 #define __no_sanitize_address
 #endif
 
 #if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
-#define __no_sanitize_thread __attribute__((no_sanitize_thread))
+#define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
 #else
 #define __no_sanitize_thread
 #endif
 
 #if __has_attribute(__no_sanitize_undefined__)
-#define __no_sanitize_undefined __attribute__((no_sanitize_undefined))
+#define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__))
 #else
 #define __no_sanitize_undefined
 #endif
 
 #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
-#define __no_sanitize_coverage __attribute__((no_sanitize_coverage))
+#define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
 #else
 #define __no_sanitize_coverage
 #endif
-- 
GitLab


From ae37a9a2c2d0960d643d782b426ea1aa9c05727a Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 21 Oct 2022 13:59:53 +0200
Subject: [PATCH 1097/4122] compiler-gcc: remove attribute support check for
 `__no_sanitize_address__`

The attribute was added in GCC 4.8, while the minimum GCC version
supported by the kernel is GCC 5.1.

Therefore, remove the check.

Link: https://godbolt.org/z/84v56vcn8
Link: https://lkml.kernel.org/r/20221021115956.9947-2-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Li <ashimida@linux.alibaba.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Marco Elver <elver@google.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index b9530d3515ac1..bfce7f4d09783 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -82,11 +82,7 @@
 #define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
 #endif
 
-#if __has_attribute(__no_sanitize_address__)
 #define __no_sanitize_address __attribute__((__no_sanitize_address__))
-#else
-#define __no_sanitize_address
-#endif
 
 #if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
 #define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
-- 
GitLab


From 095ac0763ac507dd4e1a71ad9784f49f51498483 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 21 Oct 2022 13:59:54 +0200
Subject: [PATCH 1098/4122] compiler-gcc: remove attribute support check for
 `__no_sanitize_thread__`

The attribute was added in GCC 5.1, which matches the minimum GCC version
supported by the kernel.

Therefore, remove the check.

Link: https://godbolt.org/z/vbxKejxbx
Link: https://lkml.kernel.org/r/20221021115956.9947-3-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Li <ashimida@linux.alibaba.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index bfce7f4d09783..ba207deb77ca8 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -84,7 +84,7 @@
 
 #define __no_sanitize_address __attribute__((__no_sanitize_address__))
 
-#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__)
+#if defined(__SANITIZE_THREAD__)
 #define __no_sanitize_thread __attribute__((__no_sanitize_thread__))
 #else
 #define __no_sanitize_thread
-- 
GitLab


From 689540cbda7f69594ae5e13fef4c8239519d8b66 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 21 Oct 2022 13:59:55 +0200
Subject: [PATCH 1099/4122] compiler-gcc: remove attribute support check for
 `__no_sanitize_undefined__`

The attribute was added in GCC 4.9, while the minimum GCC version
supported by the kernel is GCC 5.1.

Therefore, remove the check.

Link: https://godbolt.org/z/GrMeo6fYr
Link: https://lkml.kernel.org/r/20221021115956.9947-4-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Li <ashimida@linux.alibaba.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Marco Elver <elver@google.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index ba207deb77ca8..7f2c2bb73815c 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -90,11 +90,7 @@
 #define __no_sanitize_thread
 #endif
 
-#if __has_attribute(__no_sanitize_undefined__)
 #define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__))
-#else
-#define __no_sanitize_undefined
-#endif
 
 #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
 #define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
-- 
GitLab


From f39556bc2530c83a22bc11b73c7a46df9a340685 Mon Sep 17 00:00:00 2001
From: Miguel Ojeda <ojeda@kernel.org>
Date: Fri, 21 Oct 2022 13:59:56 +0200
Subject: [PATCH 1100/4122] compiler-gcc: document minimum version for
 `__no_sanitize_coverage__`

The attribute was added in GCC 12.1.

This will simplify future cleanups, and is closer to what we do
in `compiler_attributes.h`.

Link: https://godbolt.org/z/MGbT76j6G
Link: https://lkml.kernel.org/r/20221021115956.9947-5-ojeda@kernel.org
Signed-off-by: Miguel Ojeda <ojeda@kernel.org>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Dan Li <ashimida@linux.alibaba.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/compiler-gcc.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index 7f2c2bb73815c..7af9e34ec261b 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -92,6 +92,9 @@
 
 #define __no_sanitize_undefined __attribute__((__no_sanitize_undefined__))
 
+/*
+ * Only supported since gcc >= 12
+ */
 #if defined(CONFIG_KCOV) && __has_attribute(__no_sanitize_coverage__)
 #define __no_sanitize_coverage __attribute__((__no_sanitize_coverage__))
 #else
-- 
GitLab


From b5f1fc98c62b6b75e9f7499e7519dc67684affd3 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 21 Oct 2022 16:46:09 +0800
Subject: [PATCH 1101/4122] mm: memory-failure: make put_ref_page() more useful

Pass pfn/flags to put_ref_page(), then check MF_COUNT_INCREASED and drop
refcount to make the code look cleaner.

Link: https://lkml.kernel.org/r/20221021084611.53765-1-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 145bb561ddb3a..8c6a19b9790f9 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1910,17 +1910,25 @@ static inline unsigned long free_raw_hwp_pages(struct page *hpage, bool flag)
 }
 #endif	/* CONFIG_HUGETLB_PAGE */
 
+/* Drop the extra refcount in case we come from madvise() */
+static void put_ref_page(unsigned long pfn, int flags)
+{
+	struct page *page;
+
+	if (!(flags & MF_COUNT_INCREASED))
+		return;
+
+	page = pfn_to_page(pfn);
+	if (page)
+		put_page(page);
+}
+
 static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
 		struct dev_pagemap *pgmap)
 {
-	struct page *page = pfn_to_page(pfn);
 	int rc = -ENXIO;
 
-	if (flags & MF_COUNT_INCREASED)
-		/*
-		 * Drop the extra refcount in case we come from madvise().
-		 */
-		put_page(page);
+	put_ref_page(pfn, flags);
 
 	/* device metadata space is not recoverable */
 	if (!pgmap_pfn_valid(pgmap, pfn))
@@ -2513,12 +2521,6 @@ static int soft_offline_in_use_page(struct page *page)
 	return ret;
 }
 
-static void put_ref_page(struct page *page)
-{
-	if (page)
-		put_page(page);
-}
-
 /**
  * soft_offline_page - Soft offline a page.
  * @pfn: pfn to soft-offline
@@ -2547,19 +2549,17 @@ int soft_offline_page(unsigned long pfn, int flags)
 {
 	int ret;
 	bool try_again = true;
-	struct page *page, *ref_page = NULL;
+	struct page *page;
 
 	WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED));
 
 	if (!pfn_valid(pfn))
 		return -ENXIO;
-	if (flags & MF_COUNT_INCREASED)
-		ref_page = pfn_to_page(pfn);
 
 	/* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
 	page = pfn_to_online_page(pfn);
 	if (!page) {
-		put_ref_page(ref_page);
+		put_ref_page(pfn, flags);
 		return -EIO;
 	}
 
@@ -2567,7 +2567,7 @@ int soft_offline_page(unsigned long pfn, int flags)
 
 	if (PageHWPoison(page)) {
 		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
-		put_ref_page(ref_page);
+		put_ref_page(pfn, flags);
 		mutex_unlock(&mf_mutex);
 		return 0;
 	}
-- 
GitLab


From 183a7c5d15d3c56f49955662d3edd0092141df78 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 21 Oct 2022 16:46:10 +0800
Subject: [PATCH 1102/4122] mm: memory-failure: avoid pfn_valid() twice in
 soft_offline_page()

Simplify WARN_ON_ONCE(flags & MF_COUNT_INCREASED) under !pfn_valid().

Link: https://lkml.kernel.org/r/20221021084611.53765-2-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8c6a19b9790f9..b5e0dba021922 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2551,10 +2551,10 @@ int soft_offline_page(unsigned long pfn, int flags)
 	bool try_again = true;
 	struct page *page;
 
-	WARN_ON_ONCE(!pfn_valid(pfn) && (flags & MF_COUNT_INCREASED));
-
-	if (!pfn_valid(pfn))
+	if (!pfn_valid(pfn)) {
+		WARN_ON_ONCE(flags & MF_COUNT_INCREASED);
 		return -ENXIO;
+	}
 
 	/* Only online pages can be soft-offlined (esp., not ZONE_DEVICE). */
 	page = pfn_to_online_page(pfn);
-- 
GitLab


From b66d00dfebe79ebd0d5a0ec4ee4e26583432c381 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Fri, 21 Oct 2022 16:46:11 +0800
Subject: [PATCH 1103/4122] mm: memory-failure: make action_result() return int

Check mf_result in action_result(), only return 0 when MF_RECOVERED,
or return -EBUSY, which will simplify code a bit.

[wangkefeng.wang@huawei.com: v2]
  Link: https://lkml.kernel.org/r/20221024035138.99119-1-wangkefeng.wang@huawei.com
Link: https://lkml.kernel.org/r/20221021084611.53765-3-wangkefeng.wang@huawei.com
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 45 ++++++++++++++++-----------------------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b5e0dba021922..13594556146c6 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1179,14 +1179,16 @@ static struct page_state error_states[] = {
  * "Dirty/Clean" indication is not 100% accurate due to the possibility of
  * setting PG_dirty outside page lock. See also comment above set_page_dirty().
  */
-static void action_result(unsigned long pfn, enum mf_action_page_type type,
-			  enum mf_result result)
+static int action_result(unsigned long pfn, enum mf_action_page_type type,
+			 enum mf_result result)
 {
 	trace_memory_failure_event(pfn, type, result);
 
 	num_poisoned_pages_inc();
 	pr_err("%#lx: recovery action for %s: %s\n",
 		pfn, action_page_types[type], action_name[result]);
+
+	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
 }
 
 static int page_action(struct page_state *ps, struct page *p,
@@ -1197,14 +1199,12 @@ static int page_action(struct page_state *ps, struct page *p,
 	/* page p should be unlocked after returning from ps->action().  */
 	result = ps->action(ps, p);
 
-	action_result(pfn, ps->type, result);
-
 	/* Could do more checks here if page looks ok */
 	/*
 	 * Could adjust zone counters here to correct for the missing page.
 	 */
 
-	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
+	return action_result(pfn, ps->type, result);
 }
 
 static inline bool PageHWPoisonTakenOff(struct page *page)
@@ -1853,8 +1853,7 @@ retry:
 			flags |= MF_NO_RETRY;
 			goto retry;
 		}
-		action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
-		return res;
+		return action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
 	}
 
 	head = compound_head(p);
@@ -1880,22 +1879,17 @@ retry:
 		} else {
 			res = MF_FAILED;
 		}
-		action_result(pfn, MF_MSG_FREE_HUGE, res);
-		return res == MF_RECOVERED ? 0 : -EBUSY;
+		return action_result(pfn, MF_MSG_FREE_HUGE, res);
 	}
 
 	page_flags = head->flags;
 
 	if (!hwpoison_user_mappings(p, pfn, flags, head)) {
-		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
-		res = -EBUSY;
-		goto out;
+		unlock_page(head);
+		return action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 	}
 
 	return identify_page_state(pfn, p, page_flags);
-out:
-	unlock_page(head);
-	return res;
 }
 
 #else
@@ -2060,16 +2054,13 @@ try_again:
 					}
 					res = MF_FAILED;
 				}
-				action_result(pfn, MF_MSG_BUDDY, res);
-				res = res == MF_RECOVERED ? 0 : -EBUSY;
+				res = action_result(pfn, MF_MSG_BUDDY, res);
 			} else {
-				action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
-				res = -EBUSY;
+				res = action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED);
 			}
 			goto unlock_mutex;
 		} else if (res < 0) {
-			action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
-			res = -EBUSY;
+			res = action_result(pfn, MF_MSG_UNKNOWN, MF_IGNORED);
 			goto unlock_mutex;
 		}
 	}
@@ -2090,8 +2081,7 @@ try_again:
 		 */
 		SetPageHasHWPoisoned(hpage);
 		if (try_to_split_thp_page(p) < 0) {
-			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
-			res = -EBUSY;
+			res = action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);
 			goto unlock_mutex;
 		}
 		VM_BUG_ON_PAGE(!page_count(p), p);
@@ -2124,8 +2114,7 @@ try_again:
 			retry = false;
 			goto try_again;
 		}
-		action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
-		res = -EBUSY;
+		res = action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
 		goto unlock_page;
 	}
 
@@ -2165,8 +2154,7 @@ try_again:
 	 * Abort on fail: __filemap_remove_folio() assumes unmapped page.
 	 */
 	if (!hwpoison_user_mappings(p, pfn, flags, p)) {
-		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
-		res = -EBUSY;
+		res = action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
 		goto unlock_page;
 	}
 
@@ -2174,8 +2162,7 @@ try_again:
 	 * Torn down by someone else?
 	 */
 	if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) {
-		action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
-		res = -EBUSY;
+		res = action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED);
 		goto unlock_page;
 	}
 
-- 
GitLab


From 26215b7ee923b9251f7bb12c4e5f09dc465d35f2 Mon Sep 17 00:00:00 2001
From: Hawkins Jiawei <yin31149@gmail.com>
Date: Fri, 21 Oct 2022 07:16:08 +0800
Subject: [PATCH 1104/4122] hugetlbfs: fix null-ptr-deref in
 hugetlbfs_parse_param()

Syzkaller reports a null-ptr-deref bug as follows:
======================================================
KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
RIP: 0010:hugetlbfs_parse_param+0x1dd/0x8e0 fs/hugetlbfs/inode.c:1380
[...]
Call Trace:
 <TASK>
 vfs_parse_fs_param fs/fs_context.c:148 [inline]
 vfs_parse_fs_param+0x1f9/0x3c0 fs/fs_context.c:129
 vfs_parse_fs_string+0xdb/0x170 fs/fs_context.c:191
 generic_parse_monolithic+0x16f/0x1f0 fs/fs_context.c:231
 do_new_mount fs/namespace.c:3036 [inline]
 path_mount+0x12de/0x1e20 fs/namespace.c:3370
 do_mount fs/namespace.c:3383 [inline]
 __do_sys_mount fs/namespace.c:3591 [inline]
 __se_sys_mount fs/namespace.c:3568 [inline]
 __x64_sys_mount+0x27f/0x300 fs/namespace.c:3568
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
 [...]
 </TASK>
======================================================

According to commit "vfs: parse: deal with zero length string value",
kernel will set the param->string to null pointer in vfs_parse_fs_string()
if fs string has zero length.

Yet the problem is that, hugetlbfs_parse_param() will dereference the
param->string, without checking whether it is a null pointer.  To be more
specific, if hugetlbfs_parse_param() parses an illegal mount parameter,
such as "size=,", kernel will constructs struct fs_parameter with null
pointer in vfs_parse_fs_string(), then passes this struct fs_parameter to
hugetlbfs_parse_param(), which triggers the above null-ptr-deref bug.

This patch solves it by adding sanity check on param->string
in hugetlbfs_parse_param().

Link: https://lkml.kernel.org/r/20221020231609.4810-1-yin31149@gmail.com
Reported-by: syzbot+a3e6acd85ded5c16a709@syzkaller.appspotmail.com
Tested-by: syzbot+a3e6acd85ded5c16a709@syzkaller.appspotmail.com
  Link: https://lore.kernel.org/all/0000000000005ad00405eb7148c6@google.com/
Signed-off-by: Hawkins Jiawei <yin31149@gmail.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hawkins Jiawei <yin31149@gmail.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 00495fc128c5b..09e644f80a4a4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1378,7 +1378,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
 
 	case Opt_size:
 		/* memparse() will accept a K/M/G without a digit */
-		if (!isdigit(param->string[0]))
+		if (!param->string || !isdigit(param->string[0]))
 			goto bad_val;
 		ctx->max_size_opt = memparse(param->string, &rest);
 		ctx->max_val_type = SIZE_STD;
@@ -1388,7 +1388,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
 
 	case Opt_nr_inodes:
 		/* memparse() will accept a K/M/G without a digit */
-		if (!isdigit(param->string[0]))
+		if (!param->string || !isdigit(param->string[0]))
 			goto bad_val;
 		ctx->nr_inodes = memparse(param->string, &rest);
 		return 0;
@@ -1404,7 +1404,7 @@ static int hugetlbfs_parse_param(struct fs_context *fc, struct fs_parameter *par
 
 	case Opt_min_size:
 		/* memparse() will accept a K/M/G without a digit */
-		if (!isdigit(param->string[0]))
+		if (!param->string || !isdigit(param->string[0]))
 			goto bad_val;
 		ctx->min_size_opt = memparse(param->string, &rest);
 		ctx->min_val_type = SIZE_STD;
-- 
GitLab


From b12fdbf15f92b6cf5fecdd8a1855afe8809e5c58 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Mon, 24 Oct 2022 15:33:36 -0400
Subject: [PATCH 1105/4122] Revert "mm/uffd: fix warning without
 PTE_MARKER_UFFD_WP compiled in"

With " mm/uffd: Fix vma check on userfault for wp" to fix the
registration, we'll be safe to remove the macro hacks now.

Link: https://lkml.kernel.org/r/20221024193336.1233616-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c  | 4 ----
 mm/memory.c   | 2 --
 mm/mprotect.c | 2 --
 3 files changed, 8 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d11e92117d4ac..fc8908d715d6d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5114,7 +5114,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 		 * unmapped and its refcount is dropped, so just clear pte here.
 		 */
 		if (unlikely(!pte_present(pte))) {
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
 			/*
 			 * If the pte was wr-protected by uffd-wp in any of the
 			 * swap forms, meanwhile the caller does not want to
@@ -5126,7 +5125,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 				set_huge_pte_at(mm, address, ptep,
 						make_pte_marker(PTE_MARKER_UFFD_WP));
 			else
-#endif
 				huge_pte_clear(mm, address, ptep, sz);
 			spin_unlock(ptl);
 			continue;
@@ -5155,13 +5153,11 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 		tlb_remove_huge_tlb_entry(h, tlb, ptep, address);
 		if (huge_pte_dirty(pte))
 			set_page_dirty(page);
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
 		/* Leave a uffd-wp pte marker if needed */
 		if (huge_pte_uffd_wp(pte) &&
 		    !(zap_flags & ZAP_FLAG_DROP_MARKER))
 			set_huge_pte_at(mm, address, ptep,
 					make_pte_marker(PTE_MARKER_UFFD_WP));
-#endif
 		hugetlb_count_sub(pages_per_huge_page(h), mm);
 		page_remove_rmap(page, vma, true);
 
diff --git a/mm/memory.c b/mm/memory.c
index f88c351aecd41..81cc75e718883 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1393,12 +1393,10 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma,
 			      unsigned long addr, pte_t *pte,
 			      struct zap_details *details, pte_t pteval)
 {
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
 	if (zap_drop_file_uffd_wp(details))
 		return;
 
 	pte_install_uffd_wp_if_needed(vma, addr, pte, pteval);
-#endif
 }
 
 static unsigned long zap_pte_range(struct mmu_gather *tlb,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 99762403cc8ff..8d770855b5915 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -267,7 +267,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
 		} else {
 			/* It must be an none page, or what else?.. */
 			WARN_ON_ONCE(!pte_none(oldpte));
-#ifdef CONFIG_PTE_MARKER_UFFD_WP
 			if (unlikely(uffd_wp && !vma_is_anonymous(vma))) {
 				/*
 				 * For file-backed mem, we need to be able to
@@ -279,7 +278,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
 					   make_pte_marker(PTE_MARKER_UFFD_WP));
 				pages++;
 			}
-#endif
 		}
 	} while (pte++, addr += PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
-- 
GitLab


From fd4a7ac32918d3d7a2d17dc06c5520f45e36eb52 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Mon, 24 Oct 2022 16:34:22 +0800
Subject: [PATCH 1106/4122] mm: migrate: try again if THP split is failed due
 to page refcnt

When creating a virtual machine, we will use memfd_create() to get a file
descriptor which can be used to create share memory mappings using the
mmap function, meanwhile the mmap() will set the MAP_POPULATE flag to
allocate physical pages for the virtual machine.

When allocating physical pages for the guest, the host can fallback to
allocate some CMA pages for the guest when over half of the zone's free
memory is in the CMA area.

In guest os, when the application wants to do some data transaction with
DMA, our QEMU will call VFIO_IOMMU_MAP_DMA ioctl to do longterm-pin and
create IOMMU mappings for the DMA pages.  However, when calling
VFIO_IOMMU_MAP_DMA ioctl to pin the physical pages, we found it will be
failed to longterm-pin sometimes.

After some invetigation, we found the pages used to do DMA mapping can
contain some CMA pages, and these CMA pages will cause a possible failure
of the longterm-pin, due to failed to migrate the CMA pages.  The reason
of migration failure may be temporary reference count or memory allocation
failure.  So that will cause the VFIO_IOMMU_MAP_DMA ioctl returns error,
which makes the application failed to start.

I observed one migration failure case (which is not easy to reproduce) is
that, the 'thp_migration_fail' count is 1 and the 'thp_split_page_failed'
count is also 1.

That means when migrating a THP which is in CMA area, but can not allocate
a new THP due to memory fragmentation, so it will split the THP.  However
THP split is also failed, probably the reason is temporary reference count
of this THP.  And the temporary reference count can be caused by dropping
page caches (I observed the drop caches operation in the system), but we
can not drop the shmem page caches due to they are already dirty at that
time.

Especially for THP split failure, which is caused by temporary reference
count, we can try again to mitigate the failure of migration in this case
according to previous discussion [1].

[1] https://lore.kernel.org/all/470dc638-a300-f261-94b4-e27250e42f96@redhat.com/
Link: https://lkml.kernel.org/r/6784730480a1df82e8f4cba1ed088e4ac767994b.1666599848.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c |  4 ++--
 mm/migrate.c     | 19 ++++++++++++++++---
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index bc5dda3d2ad74..a00e9c335e694 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2712,7 +2712,7 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
 	 * split PMDs
 	 */
 	if (!can_split_folio(folio, &extra_pins)) {
-		ret = -EBUSY;
+		ret = -EAGAIN;
 		goto out_unlock;
 	}
 
@@ -2762,7 +2762,7 @@ fail:
 			xas_unlock(&xas);
 		local_irq_enable();
 		remap_page(folio, folio_nr_pages(folio));
-		ret = -EBUSY;
+		ret = -EAGAIN;
 	}
 
 out_unlock:
diff --git a/mm/migrate.c b/mm/migrate.c
index 556cb1c86e534..f8c85b42e2bcc 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1506,9 +1506,22 @@ thp_subpage_migration:
 				if (is_thp) {
 					nr_thp_failed++;
 					/* THP NUMA faulting doesn't split THP to retry. */
-					if (!nosplit && !try_split_thp(page, &thp_split_pages)) {
-						nr_thp_split++;
-						break;
+					if (!nosplit) {
+						int ret = try_split_thp(page, &thp_split_pages);
+
+						if (!ret) {
+							nr_thp_split++;
+							break;
+						} else if (reason == MR_LONGTERM_PIN &&
+							   ret == -EAGAIN) {
+							/*
+							 * Try again to split THP to mitigate
+							 * the failure of longterm pinning.
+							 */
+							thp_retry++;
+							nr_retry_pages += nr_subpages;
+							break;
+						}
 					}
 				} else if (!no_subpage_counting) {
 					nr_failed++;
-- 
GitLab


From e591ef7d96d6ea249916f351dc26a636e565c635 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 24 Oct 2022 15:20:09 +0900
Subject: [PATCH 1107/4122] mm,hwpoison,hugetlb,memory_hotplug: hotremove
 memory section with hwpoisoned hugepage

Patch series "mm, hwpoison: improve handling workload related to hugetlb
and memory_hotplug", v7.

This patchset tries to solve the issue among memory_hotplug, hugetlb and hwpoison.
In this patchset, memory hotplug handles hwpoison pages like below:

  - hwpoison pages should not prevent memory hotremove,
  - memory block with hwpoison pages should not be onlined.


This patch (of 4):

HWPoisoned page is not supposed to be accessed once marked, but currently
such accesses can happen during memory hotremove because
do_migrate_range() can be called before dissolve_free_huge_pages() is
called.

Clear HPageMigratable for hwpoisoned hugepages to prevent them from being
migrated.  This should be done in hugetlb_lock to avoid race against
isolate_hugetlb().

get_hwpoison_huge_page() needs to have a flag to show it's called from
unpoison to take refcount of hwpoisoned hugepages, so add it.

[naoya.horiguchi@linux.dev: remove TestClearHPageMigratable and reduce to test and clear separately]
  Link: https://lkml.kernel.org/r/20221025053559.GA2104800@ik1-406-35019.vs.sakura.ne.jp
Link: https://lkml.kernel.org/r/20221024062012.1520887-1-naoya.horiguchi@linux.dev
Link: https://lkml.kernel.org/r/20221024062012.1520887-2-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 10 ++++++----
 include/linux/mm.h      |  6 ++++--
 mm/hugetlb.c            |  9 +++++----
 mm/memory-failure.c     | 21 +++++++++++++++++----
 4 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 20a0d5a083956..65ea34022aa2b 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -183,8 +183,9 @@ bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
 						long freed);
 int isolate_hugetlb(struct page *page, struct list_head *list);
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb);
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				bool *migratable_cleared);
 void putback_active_hugepage(struct page *page);
 void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
 void free_huge_page(struct page *page);
@@ -391,12 +392,13 @@ static inline int isolate_hugetlb(struct page *page, struct list_head *list)
 	return -EBUSY;
 }
 
-static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+static inline int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	return 0;
 }
 
-static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared)
 {
 	return 0;
 }
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6d2d2d9e284a..e2ac6fff03a8d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3277,9 +3277,11 @@ extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
-extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags);
+extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared);
 #else
-static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+					bool *migratable_cleared)
 {
 	return 0;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index fc8908d715d6d..fdb36afea2b2f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7265,7 +7265,7 @@ unlock:
 	return ret;
 }
 
-int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
+int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison)
 {
 	int ret = 0;
 
@@ -7275,7 +7275,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 		*hugetlb = true;
 		if (HPageFreed(page))
 			ret = 0;
-		else if (HPageMigratable(page))
+		else if (HPageMigratable(page) || unpoison)
 			ret = get_page_unless_zero(page);
 		else
 			ret = -EBUSY;
@@ -7284,12 +7284,13 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb)
 	return ret;
 }
 
-int get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				bool *migratable_cleared)
 {
 	int ret;
 
 	spin_lock_irq(&hugetlb_lock);
-	ret = __get_huge_page_for_hwpoison(pfn, flags);
+	ret = __get_huge_page_for_hwpoison(pfn, flags, migratable_cleared);
 	spin_unlock_irq(&hugetlb_lock);
 	return ret;
 }
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 13594556146c6..4fff0b36c61dc 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1244,7 +1244,7 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, false);
 	if (hugetlb)
 		return ret;
 
@@ -1334,7 +1334,7 @@ static int __get_unpoison_page(struct page *page)
 	int ret = 0;
 	bool hugetlb = false;
 
-	ret = get_hwpoison_huge_page(head, &hugetlb);
+	ret = get_hwpoison_huge_page(head, &hugetlb, true);
 	if (hugetlb)
 		return ret;
 
@@ -1785,7 +1785,8 @@ void hugetlb_clear_page_hwpoison(struct page *hpage)
  *   -EBUSY        - the hugepage is busy (try to retry)
  *   -EHWPOISON    - the hugepage is already hwpoisoned
  */
-int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
+int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
+				 bool *migratable_cleared)
 {
 	struct page *page = pfn_to_page(pfn);
 	struct page *head = compound_head(page);
@@ -1815,6 +1816,15 @@ int __get_huge_page_for_hwpoison(unsigned long pfn, int flags)
 		goto out;
 	}
 
+	/*
+	 * Clearing HPageMigratable for hwpoisoned hugepages to prevent them
+	 * from being migrated by memory hotremove.
+	 */
+	if (count_increased && HPageMigratable(head)) {
+		ClearHPageMigratable(head);
+		*migratable_cleared = true;
+	}
+
 	return ret;
 out:
 	if (count_increased)
@@ -1834,10 +1844,11 @@ static int try_memory_failure_hugetlb(unsigned long pfn, int flags, int *hugetlb
 	struct page *p = pfn_to_page(pfn);
 	struct page *head;
 	unsigned long page_flags;
+	bool migratable_cleared = false;
 
 	*hugetlb = 1;
 retry:
-	res = get_huge_page_for_hwpoison(pfn, flags);
+	res = get_huge_page_for_hwpoison(pfn, flags, &migratable_cleared);
 	if (res == 2) { /* fallback to normal page handling */
 		*hugetlb = 0;
 		return 0;
@@ -1861,6 +1872,8 @@ retry:
 
 	if (hwpoison_filter(p)) {
 		hugetlb_clear_page_hwpoison(head);
+		if (migratable_cleared)
+			SetHPageMigratable(head);
 		unlock_page(head);
 		if (res == 1)
 			put_page(head);
-- 
GitLab


From d027122d8363e58cd8bc2fa6a16917f7f69b85bb Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 24 Oct 2022 15:20:10 +0900
Subject: [PATCH 1108/4122] mm/hwpoison: move definitions of
 num_poisoned_pages_* to memory-failure.c

These interfaces will be used by drivers/base/memory.c by later patch, so
as a preparatory work move them to more common header file visible to the
file.

Link: https://lkml.kernel.org/r/20221024062012.1520887-3-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/parisc/kernel/pdt.c |  3 +--
 include/linux/mm.h       |  5 +++++
 include/linux/swapops.h  | 24 ++----------------------
 mm/memory-failure.c      | 10 ++++++++++
 4 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index e391b175f5ece..fdc880e2575a5 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -18,8 +18,7 @@
 #include <linux/kthread.h>
 #include <linux/initrd.h>
 #include <linux/pgtable.h>
-#include <linux/swap.h>
-#include <linux/swapops.h>
+#include <linux/mm.h>
 
 #include <asm/pdc.h>
 #include <asm/pdcpat.h>
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e2ac6fff03a8d..c667a6c4b657e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3279,12 +3279,17 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
+extern void num_poisoned_pages_inc(void);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared)
 {
 	return 0;
 }
+
+static inline void num_poisoned_pages_inc(void)
+{
+}
 #endif
 
 #ifndef arch_memory_failure
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 86b95ccb81bb7..3ba9bf56899da 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -581,8 +581,6 @@ static inline int is_pmd_migration_entry(pmd_t pmd)
 
 #ifdef CONFIG_MEMORY_FAILURE
 
-extern atomic_long_t num_poisoned_pages __read_mostly;
-
 /*
  * Support for hardware poisoned pages
  */
@@ -597,17 +595,7 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
 	return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline void num_poisoned_pages_inc(void)
-{
-	atomic_long_inc(&num_poisoned_pages);
-}
-
-static inline void num_poisoned_pages_sub(long i)
-{
-	atomic_long_sub(i, &num_poisoned_pages);
-}
-
-#else  /* CONFIG_MEMORY_FAILURE */
+#else
 
 static inline swp_entry_t make_hwpoison_entry(struct page *page)
 {
@@ -618,15 +606,7 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
 {
 	return 0;
 }
-
-static inline void num_poisoned_pages_inc(void)
-{
-}
-
-static inline void num_poisoned_pages_sub(long i)
-{
-}
-#endif  /* CONFIG_MEMORY_FAILURE */
+#endif
 
 static inline int non_swap_entry(swp_entry_t entry)
 {
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 4fff0b36c61dc..44d7bf6ff214e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,6 +74,16 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
+inline void num_poisoned_pages_inc(void)
+{
+	atomic_long_inc(&num_poisoned_pages);
+}
+
+static inline void num_poisoned_pages_sub(long i)
+{
+	atomic_long_sub(i, &num_poisoned_pages);
+}
+
 /*
  * Return values:
  *   1:   the page is dissolved (if needed) and taken off from buddy,
-- 
GitLab


From a46c9304b4bbf1b164154976cbb7e648980c7b5b Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 24 Oct 2022 15:20:11 +0900
Subject: [PATCH 1109/4122] mm/hwpoison: pass pfn to num_poisoned_pages_*()

No functional change.

Link: https://lkml.kernel.org/r/20221024062012.1520887-4-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/parisc/kernel/pdt.c |  2 +-
 include/linux/mm.h       |  4 ++--
 mm/memory-failure.c      | 14 +++++++-------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c
index fdc880e2575a5..80943a00e2459 100644
--- a/arch/parisc/kernel/pdt.c
+++ b/arch/parisc/kernel/pdt.c
@@ -231,7 +231,7 @@ void __init pdc_pdt_init(void)
 
 		/* mark memory page bad */
 		memblock_reserve(pdt_entry[i] & PAGE_MASK, PAGE_SIZE);
-		num_poisoned_pages_inc();
+		num_poisoned_pages_inc(addr >> PAGE_SHIFT);
 	}
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c667a6c4b657e..78ae2ee09a24c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3279,7 +3279,7 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
-extern void num_poisoned_pages_inc(void);
+extern void num_poisoned_pages_inc(unsigned long pfn);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared)
@@ -3287,7 +3287,7 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 	return 0;
 }
 
-static inline void num_poisoned_pages_inc(void)
+static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
 #endif
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 44d7bf6ff214e..757a46e172ded 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -74,12 +74,12 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
 
 static bool hw_memory_failure __read_mostly = false;
 
-inline void num_poisoned_pages_inc(void)
+inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 	atomic_long_inc(&num_poisoned_pages);
 }
 
-static inline void num_poisoned_pages_sub(long i)
+static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 {
 	atomic_long_sub(i, &num_poisoned_pages);
 }
@@ -125,7 +125,7 @@ static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, boo
 	if (release)
 		put_page(page);
 	page_ref_inc(page);
-	num_poisoned_pages_inc();
+	num_poisoned_pages_inc(page_to_pfn(page));
 
 	return true;
 }
@@ -1194,7 +1194,7 @@ static int action_result(unsigned long pfn, enum mf_action_page_type type,
 {
 	trace_memory_failure_event(pfn, type, result);
 
-	num_poisoned_pages_inc();
+	num_poisoned_pages_inc(pfn);
 	pr_err("%#lx: recovery action for %s: %s\n",
 		pfn, action_page_types[type], action_name[result]);
 
@@ -1741,7 +1741,7 @@ static int hugetlb_set_page_hwpoison(struct page *hpage, struct page *page)
 		llist_add(&raw_hwp->node, head);
 		/* the first error event will be counted in action_result(). */
 		if (ret)
-			num_poisoned_pages_inc();
+			num_poisoned_pages_inc(page_to_pfn(page));
 	} else {
 		/*
 		 * Failed to save raw error info.  We no longer trace all
@@ -2414,7 +2414,7 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
-		num_poisoned_pages_sub(count);
+		num_poisoned_pages_sub(pfn, count);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
 	}
@@ -2630,5 +2630,5 @@ void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
 		}
 	}
 	if (total)
-		num_poisoned_pages_sub(total);
+		num_poisoned_pages_sub(0, total);
 }
-- 
GitLab


From 5033091de814ab4b5623faed2755f3064e19e2d2 Mon Sep 17 00:00:00 2001
From: Naoya Horiguchi <naoya.horiguchi@nec.com>
Date: Mon, 24 Oct 2022 15:20:12 +0900
Subject: [PATCH 1110/4122] mm/hwpoison: introduce per-memory_block hwpoison
 counter

Currently PageHWPoison flag does not behave well when experiencing memory
hotremove/hotplug.  Any data field in struct page is unreliable when the
associated memory is offlined, and the current mechanism can't tell
whether a memory block is onlined because a new memory devices is
installed or because previous failed offline operations are undone.
Especially if there's a hwpoisoned memory, it's unclear what the best
option is.

So introduce a new mechanism to make struct memory_block remember that a
memory block has hwpoisoned memory inside it.  And make any online event
fail if the onlining memory block contains hwpoison.  struct memory_block
is freed and reallocated over ACPI-based hotremove/hotplug, but not over
sysfs-based hotremove/hotplug.  So the new counter can distinguish these
cases.

Link: https://lkml.kernel.org/r/20221024062012.1520887-5-naoya.horiguchi@linux.dev
Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Jane Chu <jane.chu@oracle.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Yang Shi <shy828301@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/base/memory.c  | 38 ++++++++++++++++++++++++++++++++++++++
 include/linux/memory.h |  3 +++
 include/linux/mm.h     | 20 +++++++++++++++++++-
 mm/internal.h          |  8 --------
 mm/memory-failure.c    | 36 +++++++++++-------------------------
 mm/sparse.c            |  2 --
 6 files changed, 71 insertions(+), 36 deletions(-)

diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 9aa0da991cfb9..fe98fb8d94e51 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -175,6 +175,15 @@ int memory_notify(unsigned long val, void *v)
 	return blocking_notifier_call_chain(&memory_chain, val, v);
 }
 
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+static unsigned long memblk_nr_poison(struct memory_block *mem);
+#else
+static inline unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+	return 0;
+}
+#endif
+
 static int memory_block_online(struct memory_block *mem)
 {
 	unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
@@ -183,6 +192,9 @@ static int memory_block_online(struct memory_block *mem)
 	struct zone *zone;
 	int ret;
 
+	if (memblk_nr_poison(mem))
+		return -EHWPOISON;
+
 	zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
 				  start_pfn, nr_pages);
 
@@ -864,6 +876,7 @@ void remove_memory_block_devices(unsigned long start, unsigned long size)
 		mem = find_memory_block_by_id(block_id);
 		if (WARN_ON_ONCE(!mem))
 			continue;
+		num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem));
 		unregister_memory_block_under_nodes(mem);
 		remove_memory_block(mem);
 	}
@@ -1164,3 +1177,28 @@ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
 	}
 	return ret;
 }
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+void memblk_nr_poison_inc(unsigned long pfn)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_inc(&mem->nr_hwpoison);
+}
+
+void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+	const unsigned long block_id = pfn_to_block_id(pfn);
+	struct memory_block *mem = find_memory_block_by_id(block_id);
+
+	if (mem)
+		atomic_long_sub(i, &mem->nr_hwpoison);
+}
+
+static unsigned long memblk_nr_poison(struct memory_block *mem)
+{
+	return atomic_long_read(&mem->nr_hwpoison);
+}
+#endif
diff --git a/include/linux/memory.h b/include/linux/memory.h
index 463662ef76142..31343566c2212 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -84,6 +84,9 @@ struct memory_block {
 	unsigned long nr_vmemmap_pages;
 	struct memory_group *group;	/* group (if any) for this block */
 	struct list_head group_next;	/* next block inside memory group */
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+	atomic_long_t nr_hwpoison;
+#endif
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 78ae2ee09a24c..429ff89bfe06e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3279,7 +3279,8 @@ extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
-extern void num_poisoned_pages_inc(unsigned long pfn);
+void num_poisoned_pages_inc(unsigned long pfn);
+void num_poisoned_pages_sub(unsigned long pfn, long i);
 #else
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared)
@@ -3290,6 +3291,23 @@ static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 static inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 }
+
+static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+{
+}
+#endif
+
+#if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_MEMORY_HOTPLUG)
+extern void memblk_nr_poison_inc(unsigned long pfn);
+extern void memblk_nr_poison_sub(unsigned long pfn, long i);
+#else
+static inline void memblk_nr_poison_inc(unsigned long pfn)
+{
+}
+
+static inline void memblk_nr_poison_sub(unsigned long pfn, long i)
+{
+}
 #endif
 
 #ifndef arch_memory_failure
diff --git a/mm/internal.h b/mm/internal.h
index 68afdbe7106ed..bcf75a8b032de 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -708,14 +708,6 @@ extern u64 hwpoison_filter_flags_value;
 extern u64 hwpoison_filter_memcg;
 extern u32 hwpoison_filter_enable;
 
-#ifdef CONFIG_MEMORY_FAILURE
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages);
-#else
-static inline void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-}
-#endif
-
 extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 757a46e172ded..9b82402ec2421 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -77,11 +77,14 @@ static bool hw_memory_failure __read_mostly = false;
 inline void num_poisoned_pages_inc(unsigned long pfn)
 {
 	atomic_long_inc(&num_poisoned_pages);
+	memblk_nr_poison_inc(pfn);
 }
 
-static inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+inline void num_poisoned_pages_sub(unsigned long pfn, long i)
 {
 	atomic_long_sub(i, &num_poisoned_pages);
+	if (pfn != -1UL)
+		memblk_nr_poison_sub(pfn, i);
 }
 
 /*
@@ -1706,6 +1709,8 @@ static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
 
 		if (move_flag)
 			SetPageHWPoison(p->page);
+		else
+			num_poisoned_pages_sub(page_to_pfn(p->page), 1);
 		kfree(p);
 		count++;
 	}
@@ -2332,6 +2337,7 @@ int unpoison_memory(unsigned long pfn)
 	int ret = -EBUSY;
 	int freeit = 0;
 	unsigned long count = 1;
+	bool huge = false;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
 					DEFAULT_RATELIMIT_BURST);
 
@@ -2380,6 +2386,7 @@ int unpoison_memory(unsigned long pfn)
 	ret = get_hwpoison_page(p, MF_UNPOISON);
 	if (!ret) {
 		if (PageHuge(p)) {
+			huge = true;
 			count = free_raw_hwp_pages(page, false);
 			if (count == 0) {
 				ret = -EBUSY;
@@ -2395,6 +2402,7 @@ int unpoison_memory(unsigned long pfn)
 					 pfn, &unpoison_rs);
 	} else {
 		if (PageHuge(p)) {
+			huge = true;
 			count = free_raw_hwp_pages(page, false);
 			if (count == 0) {
 				ret = -EBUSY;
@@ -2414,7 +2422,8 @@ int unpoison_memory(unsigned long pfn)
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
 	if (!ret || freeit) {
-		num_poisoned_pages_sub(pfn, count);
+		if (!huge)
+			num_poisoned_pages_sub(pfn, 1);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
 				 page_to_pfn(p), &unpoison_rs);
 	}
@@ -2609,26 +2618,3 @@ retry:
 
 	return ret;
 }
-
-void clear_hwpoisoned_pages(struct page *memmap, int nr_pages)
-{
-	int i, total = 0;
-
-	/*
-	 * A further optimization is to have per section refcounted
-	 * num_poisoned_pages.  But that would need more space per memmap, so
-	 * for now just do a quick global check to speed up this routine in the
-	 * absence of bad pages.
-	 */
-	if (atomic_long_read(&num_poisoned_pages) == 0)
-		return;
-
-	for (i = 0; i < nr_pages; i++) {
-		if (PageHWPoison(&memmap[i])) {
-			total++;
-			ClearPageHWPoison(&memmap[i]);
-		}
-	}
-	if (total)
-		num_poisoned_pages_sub(0, total);
-}
diff --git a/mm/sparse.c b/mm/sparse.c
index e5a8a3a0edd74..2779b419ef2a2 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -926,8 +926,6 @@ void sparse_remove_section(struct mem_section *ms, unsigned long pfn,
 		unsigned long nr_pages, unsigned long map_offset,
 		struct vmem_altmap *altmap)
 {
-	clear_hwpoisoned_pages(pfn_to_page(pfn) + map_offset,
-			nr_pages - map_offset);
 	section_deactivate(pfn, nr_pages, altmap);
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
-- 
GitLab


From ea0ffd0c08d0fef1f6e93eb07badbeeabf6b43d6 Mon Sep 17 00:00:00 2001
From: Kairui Song <kasong@tencent.com>
Date: Mon, 24 Oct 2022 00:25:33 +0800
Subject: [PATCH 1111/4122] swap: add a limit for readahead page-cluster value

Currenty there is no upper limit for /proc/sys/vm/page-cluster, and it's a
bit shift value, so it could result in overflow of the 32-bit integer.
Add a reasonable upper limit for it, read-in at most 2**31 pages, which is
a large enough value for readahead.

Link: https://lkml.kernel.org/r/20221023162533.81561-1-ryncsn@gmail.com
Signed-off-by: Kairui Song <kasong@tencent.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 1 +
 kernel/sysctl.c    | 1 +
 mm/swap.c          | 3 ++-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 429ff89bfe06e..255931ebf2dc4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -74,6 +74,7 @@ static inline void totalram_pages_add(long count)
 
 extern void * high_memory;
 extern int page_cluster;
+extern const int page_cluster_max;
 
 #ifdef CONFIG_SYSCTL
 extern int sysctl_legacy_va_layout;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 188c305aeb8b7..71a4350ac601b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2125,6 +2125,7 @@ static struct ctl_table vm_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
+		.extra2		= (void *)&page_cluster_max,
 	},
 	{
 		.procname	= "dirtytime_expire_seconds",
diff --git a/mm/swap.c b/mm/swap.c
index 2f12a2ee1d3af..b9a6817e07ffe 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -43,8 +43,9 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/pagemap.h>
 
-/* How many pages do we try to swap or page in/out together? */
+/* How many pages do we try to swap or page in/out together? As a power of 2 */
 int page_cluster;
+const int page_cluster_max = 31;
 
 /* Protecting only lru_rotate.fbatch which requires disabling interrupts */
 struct lru_rotate {
-- 
GitLab


From e0767e391079687081c5564b1390983c36b49cd1 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 1 Nov 2022 10:35:20 +0800
Subject: [PATCH 1112/4122] soundwire: cadence: rename sdw_cdns_dai_dma_data as
 sdw_cdns_dai_runtime
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing 'struct sdw_cdns_dma_data' has really nothing to do with
DMAs. The information is stored in the dai->dma_data, but this is
really private data that should be stored in a different context.

Beyond the academic elegance discussion, using dma_data is a problem
for new Intel hardware where the dma_data structure is already used
for true DMA handling performed by other parts of the code.

This patch prepares a transition away from the use of dma_data, for
now with a rename-only change.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221101023521.2384586-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/cadence_master.c | 30 +++++-----
 drivers/soundwire/cadence_master.h |  4 +-
 drivers/soundwire/intel.c          | 96 +++++++++++++++---------------
 3 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 93929f19d0831..235617b0542fc 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -1707,40 +1707,40 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai,
 			void *stream, int direction)
 {
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 
 	if (stream) {
 		/* first paranoia check */
 		if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-			dma = dai->playback_dma_data;
+			dai_runtime = dai->playback_dma_data;
 		else
-			dma = dai->capture_dma_data;
+			dai_runtime = dai->capture_dma_data;
 
-		if (dma) {
+		if (dai_runtime) {
 			dev_err(dai->dev,
-				"dma_data already allocated for dai %s\n",
+				"dai_runtime already allocated for dai %s\n",
 				dai->name);
 			return -EINVAL;
 		}
 
-		/* allocate and set dma info */
-		dma = kzalloc(sizeof(*dma), GFP_KERNEL);
-		if (!dma)
+		/* allocate and set dai_runtime info */
+		dai_runtime = kzalloc(sizeof(*dai_runtime), GFP_KERNEL);
+		if (!dai_runtime)
 			return -ENOMEM;
 
-		dma->stream_type = SDW_STREAM_PCM;
+		dai_runtime->stream_type = SDW_STREAM_PCM;
 
-		dma->bus = &cdns->bus;
-		dma->link_id = cdns->instance;
+		dai_runtime->bus = &cdns->bus;
+		dai_runtime->link_id = cdns->instance;
 
-		dma->stream = stream;
+		dai_runtime->stream = stream;
 
 		if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-			dai->playback_dma_data = dma;
+			dai->playback_dma_data = dai_runtime;
 		else
-			dai->capture_dma_data = dma;
+			dai->capture_dma_data = dai_runtime;
 	} else {
-		/* for NULL stream we release allocated dma_data */
+		/* for NULL stream we release allocated dai_runtime */
 		if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
 			kfree(dai->playback_dma_data);
 			dai->playback_dma_data = NULL;
diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h
index ca9e805bab88f..93f23bd46e2c2 100644
--- a/drivers/soundwire/cadence_master.h
+++ b/drivers/soundwire/cadence_master.h
@@ -70,7 +70,7 @@ struct sdw_cdns_stream_config {
 };
 
 /**
- * struct sdw_cdns_dma_data: Cadence DMA data
+ * struct sdw_cdns_dai_runtime: Cadence DAI runtime data
  *
  * @name: SoundWire stream name
  * @stream: stream runtime
@@ -82,7 +82,7 @@ struct sdw_cdns_stream_config {
  * @suspended: status set when suspended, to be used in .prepare
  * @paused: status set in .trigger, to be used in suspend
  */
-struct sdw_cdns_dma_data {
+struct sdw_cdns_dai_runtime {
 	char *name;
 	struct sdw_stream_runtime *stream;
 	struct sdw_cdns_pdi *pdi;
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 244209358784f..1e9c6df4b62c2 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -824,15 +824,15 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 {
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 	struct sdw_cdns_pdi *pdi;
 	struct sdw_stream_config sconfig;
 	struct sdw_port_config *pconfig;
 	int ch, dir;
 	int ret;
 
-	dma = snd_soc_dai_get_dma_data(dai, substream);
-	if (!dma)
+	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dai_runtime)
 		return -EIO;
 
 	ch = params_channels(params);
@@ -854,10 +854,10 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	sdw_cdns_config_stream(cdns, ch, dir, pdi);
 
 	/* store pdi and hw_params, may be needed in prepare step */
-	dma->paused = false;
-	dma->suspended = false;
-	dma->pdi = pdi;
-	dma->hw_params = params;
+	dai_runtime->paused = false;
+	dai_runtime->suspended = false;
+	dai_runtime->pdi = pdi;
+	dai_runtime->hw_params = params;
 
 	/* Inform DSP about PDI stream number */
 	ret = intel_params_stream(sdw, substream->stream, dai, params,
@@ -869,7 +869,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	sconfig.direction = dir;
 	sconfig.ch_count = ch;
 	sconfig.frame_rate = params_rate(params);
-	sconfig.type = dma->stream_type;
+	sconfig.type = dai_runtime->stream_type;
 
 	sconfig.bps = snd_pcm_format_width(params_format(params));
 
@@ -884,7 +884,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	pconfig->ch_mask = (1 << ch) - 1;
 
 	ret = sdw_stream_add_master(&cdns->bus, &sconfig,
-				    pconfig, 1, dma->stream);
+				    pconfig, 1, dai_runtime->stream);
 	if (ret)
 		dev_err(cdns->dev, "add master to stream failed:%d\n", ret);
 
@@ -898,19 +898,19 @@ static int intel_prepare(struct snd_pcm_substream *substream,
 {
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 	int ch, dir;
 	int ret = 0;
 
-	dma = snd_soc_dai_get_dma_data(dai, substream);
-	if (!dma) {
-		dev_err(dai->dev, "failed to get dma data in %s\n",
+	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dai_runtime) {
+		dev_err(dai->dev, "failed to get dai runtime in %s\n",
 			__func__);
 		return -EIO;
 	}
 
-	if (dma->suspended) {
-		dma->suspended = false;
+	if (dai_runtime->suspended) {
+		dai_runtime->suspended = false;
 
 		/*
 		 * .prepare() is called after system resume, where we
@@ -921,21 +921,21 @@ static int intel_prepare(struct snd_pcm_substream *substream,
 		 */
 
 		/* configure stream */
-		ch = params_channels(dma->hw_params);
+		ch = params_channels(dai_runtime->hw_params);
 		if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
 			dir = SDW_DATA_DIR_RX;
 		else
 			dir = SDW_DATA_DIR_TX;
 
-		intel_pdi_shim_configure(sdw, dma->pdi);
-		intel_pdi_alh_configure(sdw, dma->pdi);
-		sdw_cdns_config_stream(cdns, ch, dir, dma->pdi);
+		intel_pdi_shim_configure(sdw, dai_runtime->pdi);
+		intel_pdi_alh_configure(sdw, dai_runtime->pdi);
+		sdw_cdns_config_stream(cdns, ch, dir, dai_runtime->pdi);
 
 		/* Inform DSP about PDI stream number */
 		ret = intel_params_stream(sdw, substream->stream, dai,
-					  dma->hw_params,
+					  dai_runtime->hw_params,
 					  sdw->instance,
-					  dma->pdi->intel_alh_id);
+					  dai_runtime->pdi->intel_alh_id);
 	}
 
 	return ret;
@@ -946,11 +946,11 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
 {
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 	int ret;
 
-	dma = snd_soc_dai_get_dma_data(dai, substream);
-	if (!dma)
+	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dai_runtime)
 		return -EIO;
 
 	/*
@@ -959,10 +959,10 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
 	 * DEPREPARED for the first cpu-dai and to RELEASED for the last
 	 * cpu-dai.
 	 */
-	ret = sdw_stream_remove_master(&cdns->bus, dma->stream);
+	ret = sdw_stream_remove_master(&cdns->bus, dai_runtime->stream);
 	if (ret < 0) {
 		dev_err(dai->dev, "remove master from stream %s failed: %d\n",
-			dma->stream->name, ret);
+			dai_runtime->stream->name, ret);
 		return ret;
 	}
 
@@ -972,8 +972,8 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
 		return ret;
 	}
 
-	dma->hw_params = NULL;
-	dma->pdi = NULL;
+	dai_runtime->hw_params = NULL;
+	dai_runtime->pdi = NULL;
 
 	return 0;
 }
@@ -996,17 +996,17 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai,
 static void *intel_get_sdw_stream(struct snd_soc_dai *dai,
 				  int direction)
 {
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 
 	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-		dma = dai->playback_dma_data;
+		dai_runtime = dai->playback_dma_data;
 	else
-		dma = dai->capture_dma_data;
+		dai_runtime = dai->capture_dma_data;
 
-	if (!dma)
+	if (!dai_runtime)
 		return ERR_PTR(-EINVAL);
 
-	return dma->stream;
+	return dai_runtime->stream;
 }
 
 static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct snd_soc_dai *dai)
@@ -1014,7 +1014,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_intel *sdw = cdns_to_intel(cdns);
 	struct sdw_intel_link_res *res = sdw->link_res;
-	struct sdw_cdns_dma_data *dma;
+	struct sdw_cdns_dai_runtime *dai_runtime;
 	int ret = 0;
 
 	/*
@@ -1025,9 +1025,9 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn
 	if (res->ops && res->ops->trigger)
 		res->ops->trigger(dai, cmd, substream->stream);
 
-	dma = snd_soc_dai_get_dma_data(dai, substream);
-	if (!dma) {
-		dev_err(dai->dev, "failed to get dma data in %s\n",
+	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	if (!dai_runtime) {
+		dev_err(dai->dev, "failed to get dai runtime in %s\n",
 			__func__);
 		return -EIO;
 	}
@@ -1042,17 +1042,17 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn
 		 * the .trigger callback is used to track the suspend case only.
 		 */
 
-		dma->suspended = true;
+		dai_runtime->suspended = true;
 
 		ret = intel_free_stream(sdw, substream->stream, dai, sdw->instance);
 		break;
 
 	case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
-		dma->paused = true;
+		dai_runtime->paused = true;
 		break;
 	case SNDRV_PCM_TRIGGER_STOP:
 	case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
-		dma->paused = false;
+		dai_runtime->paused = false;
 		break;
 	default:
 		break;
@@ -1091,25 +1091,25 @@ static int intel_component_dais_suspend(struct snd_soc_component *component)
 	for_each_component_dais(component, dai) {
 		struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 		struct sdw_intel *sdw = cdns_to_intel(cdns);
-		struct sdw_cdns_dma_data *dma;
+		struct sdw_cdns_dai_runtime *dai_runtime;
 		int stream;
 		int ret;
 
-		dma = dai->playback_dma_data;
+		dai_runtime = dai->playback_dma_data;
 		stream = SNDRV_PCM_STREAM_PLAYBACK;
-		if (!dma) {
-			dma = dai->capture_dma_data;
+		if (!dai_runtime) {
+			dai_runtime = dai->capture_dma_data;
 			stream = SNDRV_PCM_STREAM_CAPTURE;
 		}
 
-		if (!dma)
+		if (!dai_runtime)
 			continue;
 
-		if (dma->suspended)
+		if (dai_runtime->suspended)
 			continue;
 
-		if (dma->paused) {
-			dma->suspended = true;
+		if (dai_runtime->paused) {
+			dai_runtime->suspended = true;
 
 			ret = intel_free_stream(sdw, stream, dai, sdw->instance);
 			if (ret < 0)
-- 
GitLab


From 7dddead766c0826a998e7053e7d1c92b3422f8d6 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Tue, 1 Nov 2022 10:35:21 +0800
Subject: [PATCH 1113/4122] soundwire: cadence: use dai_runtime_array instead
 of dma_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Simplify the code with a Cadence-specific dai_runtime_array, indexed
with dai->id, instead of abusing dma_data.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Péter Ujfalusi <peter.ujfalusi@linux.intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221101023521.2384586-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/cadence_master.c | 30 +++++++++++++--------------
 drivers/soundwire/cadence_master.h |  5 +++++
 drivers/soundwire/intel.c          | 33 +++++++++++++++---------------
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/drivers/soundwire/cadence_master.c b/drivers/soundwire/cadence_master.c
index 235617b0542fc..a1de363eba3ff 100644
--- a/drivers/soundwire/cadence_master.c
+++ b/drivers/soundwire/cadence_master.c
@@ -1709,13 +1709,10 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai,
 	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_cdns_dai_runtime *dai_runtime;
 
+	dai_runtime = cdns->dai_runtime_array[dai->id];
+
 	if (stream) {
 		/* first paranoia check */
-		if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-			dai_runtime = dai->playback_dma_data;
-		else
-			dai_runtime = dai->capture_dma_data;
-
 		if (dai_runtime) {
 			dev_err(dai->dev,
 				"dai_runtime already allocated for dai %s\n",
@@ -1734,20 +1731,21 @@ int cdns_set_sdw_stream(struct snd_soc_dai *dai,
 		dai_runtime->link_id = cdns->instance;
 
 		dai_runtime->stream = stream;
+		dai_runtime->direction = direction;
 
-		if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-			dai->playback_dma_data = dai_runtime;
-		else
-			dai->capture_dma_data = dai_runtime;
+		cdns->dai_runtime_array[dai->id] = dai_runtime;
 	} else {
-		/* for NULL stream we release allocated dai_runtime */
-		if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
-			kfree(dai->playback_dma_data);
-			dai->playback_dma_data = NULL;
-		} else {
-			kfree(dai->capture_dma_data);
-			dai->capture_dma_data = NULL;
+		/* second paranoia check */
+		if (!dai_runtime) {
+			dev_err(dai->dev,
+				"dai_runtime not allocated for dai %s\n",
+				dai->name);
+			return -EINVAL;
 		}
+
+		/* for NULL stream we release allocated dai_runtime */
+		kfree(dai_runtime);
+		cdns->dai_runtime_array[dai->id] = NULL;
 	}
 	return 0;
 }
diff --git a/drivers/soundwire/cadence_master.h b/drivers/soundwire/cadence_master.h
index 93f23bd46e2c2..0434d70d4b1f5 100644
--- a/drivers/soundwire/cadence_master.h
+++ b/drivers/soundwire/cadence_master.h
@@ -81,6 +81,7 @@ struct sdw_cdns_stream_config {
  * @hw_params: hw_params to be applied in .prepare step
  * @suspended: status set when suspended, to be used in .prepare
  * @paused: status set in .trigger, to be used in suspend
+ * @direction: stream direction
  */
 struct sdw_cdns_dai_runtime {
 	char *name;
@@ -92,6 +93,7 @@ struct sdw_cdns_dai_runtime {
 	struct snd_pcm_hw_params *hw_params;
 	bool suspended;
 	bool paused;
+	int direction;
 };
 
 /**
@@ -108,6 +110,7 @@ struct sdw_cdns_dai_runtime {
  * @registers: Cadence registers
  * @link_up: Link status
  * @msg_count: Messages sent on bus
+ * @dai_runtime_array: runtime context for each allocated DAI.
  */
 struct sdw_cdns {
 	struct device *dev;
@@ -135,6 +138,8 @@ struct sdw_cdns {
 	struct work_struct work;
 
 	struct list_head list;
+
+	struct sdw_cdns_dai_runtime **dai_runtime_array;
 };
 
 #define bus_to_cdns(_bus) container_of(_bus, struct sdw_cdns, bus)
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 1e9c6df4b62c2..e8855a2115f64 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -831,7 +831,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
 	int ch, dir;
 	int ret;
 
-	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	dai_runtime = cdns->dai_runtime_array[dai->id];
 	if (!dai_runtime)
 		return -EIO;
 
@@ -902,7 +902,7 @@ static int intel_prepare(struct snd_pcm_substream *substream,
 	int ch, dir;
 	int ret = 0;
 
-	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	dai_runtime = cdns->dai_runtime_array[dai->id];
 	if (!dai_runtime) {
 		dev_err(dai->dev, "failed to get dai runtime in %s\n",
 			__func__);
@@ -949,7 +949,7 @@ intel_hw_free(struct snd_pcm_substream *substream, struct snd_soc_dai *dai)
 	struct sdw_cdns_dai_runtime *dai_runtime;
 	int ret;
 
-	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	dai_runtime = cdns->dai_runtime_array[dai->id];
 	if (!dai_runtime)
 		return -EIO;
 
@@ -996,13 +996,10 @@ static int intel_pcm_set_sdw_stream(struct snd_soc_dai *dai,
 static void *intel_get_sdw_stream(struct snd_soc_dai *dai,
 				  int direction)
 {
+	struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 	struct sdw_cdns_dai_runtime *dai_runtime;
 
-	if (direction == SNDRV_PCM_STREAM_PLAYBACK)
-		dai_runtime = dai->playback_dma_data;
-	else
-		dai_runtime = dai->capture_dma_data;
-
+	dai_runtime = cdns->dai_runtime_array[dai->id];
 	if (!dai_runtime)
 		return ERR_PTR(-EINVAL);
 
@@ -1025,7 +1022,7 @@ static int intel_trigger(struct snd_pcm_substream *substream, int cmd, struct sn
 	if (res->ops && res->ops->trigger)
 		res->ops->trigger(dai, cmd, substream->stream);
 
-	dai_runtime = snd_soc_dai_get_dma_data(dai, substream);
+	dai_runtime = cdns->dai_runtime_array[dai->id];
 	if (!dai_runtime) {
 		dev_err(dai->dev, "failed to get dai runtime in %s\n",
 			__func__);
@@ -1092,15 +1089,9 @@ static int intel_component_dais_suspend(struct snd_soc_component *component)
 		struct sdw_cdns *cdns = snd_soc_dai_get_drvdata(dai);
 		struct sdw_intel *sdw = cdns_to_intel(cdns);
 		struct sdw_cdns_dai_runtime *dai_runtime;
-		int stream;
 		int ret;
 
-		dai_runtime = dai->playback_dma_data;
-		stream = SNDRV_PCM_STREAM_PLAYBACK;
-		if (!dai_runtime) {
-			dai_runtime = dai->capture_dma_data;
-			stream = SNDRV_PCM_STREAM_CAPTURE;
-		}
+		dai_runtime = cdns->dai_runtime_array[dai->id];
 
 		if (!dai_runtime)
 			continue;
@@ -1111,7 +1102,7 @@ static int intel_component_dais_suspend(struct snd_soc_component *component)
 		if (dai_runtime->paused) {
 			dai_runtime->suspended = true;
 
-			ret = intel_free_stream(sdw, stream, dai, sdw->instance);
+			ret = intel_free_stream(sdw, dai_runtime->direction, dai, sdw->instance);
 			if (ret < 0)
 				return ret;
 		}
@@ -1178,6 +1169,7 @@ static int intel_create_dai(struct sdw_cdns *cdns,
 
 static int intel_register_dai(struct sdw_intel *sdw)
 {
+	struct sdw_cdns_dai_runtime **dai_runtime_array;
 	struct sdw_cdns_stream_config config;
 	struct sdw_cdns *cdns = &sdw->cdns;
 	struct sdw_cdns_streams *stream;
@@ -1195,6 +1187,13 @@ static int intel_register_dai(struct sdw_intel *sdw)
 	/* DAIs are created based on total number of PDIs supported */
 	num_dai = cdns->pcm.num_pdi;
 
+	dai_runtime_array = devm_kcalloc(cdns->dev, num_dai,
+					 sizeof(struct sdw_cdns_dai_runtime *),
+					 GFP_KERNEL);
+	if (!dai_runtime_array)
+		return -ENOMEM;
+	cdns->dai_runtime_array = dai_runtime_array;
+
 	dais = devm_kcalloc(cdns->dev, num_dai, sizeof(*dais), GFP_KERNEL);
 	if (!dais)
 		return -ENOMEM;
-- 
GitLab


From febc50b82bc95089ef1d6f68a101c8a2b701e9ce Mon Sep 17 00:00:00 2001
From: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
Date: Tue, 8 Nov 2022 20:16:03 +0530
Subject: [PATCH 1114/4122] dt-bindings: soundwire: Convert text bindings to DT
 Schema

Convert soundwire text bindings to DT Schema format.

Update interrupt property items as per device tree, as it is not
appropriately described in text file.
Update some of the properties description with minimum and maximum range.
Update secondary node info which is used to describe slave devices.

Signed-off-by: Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
Co-developed-by: Ratna Deepthi Kudaravalli <quic_rkudarav@quicinc.com>
Signed-off-by: Ratna Deepthi Kudaravalli <quic_rkudarav@quicinc.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/1667918763-32445-5-git-send-email-quic_srivasam@quicinc.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/soundwire/qcom,sdw.txt           | 215 --------------
 .../bindings/soundwire/qcom,soundwire.yaml    | 270 ++++++++++++++++++
 2 files changed, 270 insertions(+), 215 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
 create mode 100644 Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml

diff --git a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt b/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
deleted file mode 100644
index e0faed8dceac7..0000000000000
--- a/Documentation/devicetree/bindings/soundwire/qcom,sdw.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-Qualcomm SoundWire Controller Bindings
-
-
-This binding describes the Qualcomm SoundWire Controller along with its
-board specific bus parameters.
-
-- compatible:
-	Usage: required
-	Value type: <stringlist>
-	Definition: must be "qcom,soundwire-v<MAJOR>.<MINOR>.<STEP>",
-		    Example:
-			"qcom,soundwire-v1.3.0"
-			"qcom,soundwire-v1.5.0"
-			"qcom,soundwire-v1.5.1"
-			"qcom,soundwire-v1.6.0"
-			"qcom,soundwire-v1.7.0"
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: the base address and size of SoundWire controller
-		    address space.
-
-- interrupts:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: should specify the SoundWire Controller core and optional
-		    wake IRQ
-
-- interrupt-names:
-	Usage: Optional
-	Value type: boolean
-	Value type: <stringlist>
-	Definition: should be "core" for core and "wakeup" for wake interrupt.
-
-- wakeup-source:
-	Usage: Optional
-	Value type: boolean
-	Definition: should specify if SoundWire Controller is wake up capable.
-
-- clock-names:
-	Usage: required
-	Value type: <stringlist>
-	Definition: should be "iface" for SoundWire Controller interface clock
-
-- clocks:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: should specify the SoundWire Controller interface clock
-
-- #sound-dai-cells:
-	Usage: required
-	Value type: <u32>
-	Definition: must be 1 for digital audio interfaces on the controller.
-
-- qcom,dout-ports:
-	Usage: required
-	Value type: <u32>
-	Definition: must be count of data out ports
-
-- qcom,din-ports:
-	Usage: required
-	Value type: <u32>
-	Definition: must be count of data in ports
-
-- qcom,ports-offset1:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: should specify payload transport window offset1 of each
-		    data port. Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-offset2:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: should specify payload transport window offset2 of each
-		    data port. Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-sinterval-low:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: should be sample interval low of each data port.
-		    Out ports followed by In ports. Used for Sample Interval
-		    calculation.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-word-length:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be size of payload channel sample.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-block-pack-mode:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be 0 or 1 to indicate the block packing mode.
-		    0 to indicate Blocks are per Channel
-		    1 to indicate Blocks are per Port.
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-block-group-count:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be in range 1 to 4 to indicate how many sample
-		    intervals are combined into a payload.
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-lane-control:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be in range 0 to 7 to identify which	data lane
-		    the data port uses.
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-hstart:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be number identifying lowerst numbered coloum in
-		    SoundWire Frame, i.e. left edge of the Transport sub-frame
-		    for each port. Values between 0 and 15 are valid.
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,ports-hstop:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be number identifying highest numbered coloum in
-		    SoundWire Frame, i.e. the right edge of the Transport
-		    sub-frame for each port. Values between 0 and 15 are valid.
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- qcom,dports-type:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: should be one of the following types
-		    0 for reduced port
-		    1 for simple ports
-		    2 for full port
-		    Out ports followed by In ports.
-		    Value of 0xFF indicates that this option is not implemented
-		    or applicable for the respective data port.
-		    More info in MIPI Alliance SoundWire 1.0 Specifications.
-
-- reset:
-	Usage: optional
-	Value type: <prop-encoded-array>
-	Definition: Should specify the SoundWire audio CSR reset controller interface,
-		    which is required for SoundWire version 1.6.0 and above.
-
-- reset-names:
-	Usage: optional
-	Value type: <stringlist>
-	Definition: should be "swr_audio_cgcr" for SoundWire audio CSR reset
-		    controller interface.
-
-Note:
-	More Information on detail of encoding of these fields can be
-found in MIPI Alliance SoundWire 1.0 Specifications.
-
-= SoundWire devices
-Each subnode of the bus represents SoundWire device attached to it.
-The properties of these nodes are defined by the individual bindings.
-
-= EXAMPLE
-The following example represents a SoundWire controller on DB845c board
-which has controller integrated inside WCD934x codec on SDM845 SoC.
-
-soundwire: soundwire@c85 {
-	compatible = "qcom,soundwire-v1.3.0";
-	reg = <0xc85 0x20>;
-	interrupts = <20 IRQ_TYPE_EDGE_RISING>;
-	clocks = <&wcc>;
-	clock-names = "iface";
-	resets = <&lpass_audiocc LPASS_AUDIO_SWR_TX_CGCR>;
-	reset-names = "swr_audio_cgcr";
-	#sound-dai-cells = <1>;
-	qcom,dports-type = <0>;
-	qcom,dout-ports	= <6>;
-	qcom,din-ports	= <2>;
-	qcom,ports-sinterval-low = /bits/ 8  <0x07 0x1F 0x3F 0x7 0x1F 0x3F 0x0F 0x0F>;
-	qcom,ports-offset1 = /bits/ 8 <0x01 0x02 0x0C 0x6 0x12 0x0D 0x07 0x0A >;
-	qcom,ports-offset2 = /bits/ 8 <0x00 0x00 0x1F 0x00 0x00 0x1F 0x00 0x00>;
-
-	/* Left Speaker */
-	left{
-		....
-	};
-
-	/* Right Speaker */
-	right{
-		....
-	};
-};
diff --git a/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml
new file mode 100644
index 0000000000000..bcbfa71536cda
--- /dev/null
+++ b/Documentation/devicetree/bindings/soundwire/qcom,soundwire.yaml
@@ -0,0 +1,270 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/soundwire/qcom,soundwire.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SoundWire Controller
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+  - Srinivasa Rao Mandadapu <quic_srivasam@quicinc.com>
+
+description:
+  The Qualcomm SoundWire controller along with its board specific bus parameters.
+
+properties:
+  compatible:
+    enum:
+      - qcom,soundwire-v1.3.0
+      - qcom,soundwire-v1.5.0
+      - qcom,soundwire-v1.5.1
+      - qcom,soundwire-v1.6.0
+      - qcom,soundwire-v1.7.0
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    items:
+      - description: specify the SoundWire controller core.
+      - description: specify the Soundwire controller wake IRQ.
+
+  interrupt-names:
+    minItems: 1
+    items:
+      - const: core
+      - const: wakeup
+
+  clocks:
+    items:
+      - description: iface clock
+
+  clock-names:
+    items:
+      - const: iface
+
+  resets:
+    items:
+      - description: SWR_AUDIO_CGCR RESET
+
+  reset-names:
+    items:
+      - const: swr_audio_cgcr
+
+  '#sound-dai-cells':
+    const: 1
+
+  '#address-cells':
+    const: 2
+
+  '#size-cells':
+    const: 0
+
+  wakeup-source: true
+
+  qcom,din-ports:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: count of data in ports
+
+  qcom,dout-ports:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: count of data out ports
+
+  qcom,ports-word-length:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Size of payload channel sample.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 5
+
+  qcom,ports-sinterval-low:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Sample interval low of each data port.
+      Out ports followed by In ports. Used for Sample Interval calculation.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 8
+
+  qcom,ports-offset1:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Payload transport window offset1 of each data port.
+      Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 8
+
+  qcom,ports-offset2:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Payload transport window offset2 of each data port.
+      Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 8
+
+  qcom,ports-lane-control:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Identify which data lane the data port uses.
+      Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 5
+
+  qcom,ports-block-pack-mode:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Indicate the block packing mode.
+      0 to indicate Blocks are per Channel
+      1 to indicate Blocks are per Port.
+      Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 8
+    items:
+      oneOf:
+        - minimum: 0
+          maximum: 1
+        - const: 0xff
+
+  qcom,ports-hstart:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Identifying lowerst numbered coloum in SoundWire Frame,
+      i.e. left edge of the Transport sub-frame for each port.
+      Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 5
+    items:
+      oneOf:
+        - minimum: 0
+          maximum: 15
+        - const: 0xff
+
+  qcom,ports-hstop:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      Identifying highest numbered coloum in SoundWire Frame,
+      i.e. the right edge of the Transport
+      sub-frame for each port. Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 5
+    items:
+      oneOf:
+        - minimum: 0
+          maximum: 15
+        - const: 0xff
+
+  qcom,ports-block-group-count:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description:
+      In range 1 to 4 to indicate how many sample intervals are combined
+      into a payload. Out ports followed by In ports.
+      Value of 0xff indicates that this option is not implemented
+      or applicable for the respective data port.
+      More info in MIPI Alliance SoundWire 1.0 Specifications.
+    minItems: 3
+    maxItems: 5
+    items:
+      oneOf:
+        - minimum: 0
+          maximum: 4
+        - const: 0xff
+
+  label:
+    maxItems: 1
+
+patternProperties:
+  "^.*@[0-9a-f],[0-9a-f]$":
+    type: object
+    description:
+      Child nodes for a standalone audio codec or speaker amplifier IC.
+      It has RX and TX Soundwire secondary devices.
+    properties:
+      compatible:
+        pattern: "^sdw[0-9a-f]{1}[0-9a-f]{4}[0-9a-f]{4}[0-9a-f]{2}$"
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - '#sound-dai-cells'
+  - '#address-cells'
+  - '#size-cells'
+  - qcom,dout-ports
+  - qcom,din-ports
+  - qcom,ports-sinterval-low
+  - qcom,ports-offset1
+  - qcom,ports-offset2
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/clock/qcom,lpassaudiocc-sc7280.h>
+
+    soundwire@3210000 {
+        compatible = "qcom,soundwire-v1.6.0";
+        reg = <0x03210000 0x2000>;
+
+        interrupts = <GIC_SPI 155 IRQ_TYPE_LEVEL_HIGH>,
+                     <&pdc 130 IRQ_TYPE_LEVEL_HIGH>;
+
+        interrupt-names = "core", "wakeup";
+
+        clocks = <&lpass_rx_macro>;
+        clock-names = "iface";
+
+        qcom,din-ports = <0>;
+        qcom,dout-ports = <5>;
+
+        resets = <&lpass_audiocc LPASS_AUDIO_SWR_RX_CGCR>;
+        reset-names = "swr_audio_cgcr";
+
+        qcom,ports-word-length =        /bits/ 8 <0x01 0x07 0x04 0xff 0xff>;
+        qcom,ports-sinterval-low =      /bits/ 8 <0x03 0x3f 0x1f 0x03 0x03>;
+        qcom,ports-offset1 =            /bits/ 8 <0x00 0x00 0x0b 0x01 0x01>;
+        qcom,ports-offset2 =            /bits/ 8 <0x00 0x00 0x0b 0x00 0x00>;
+        qcom,ports-lane-control =       /bits/ 8 <0x01 0x00 0x00 0x00 0x00>;
+        qcom,ports-block-pack-mode =    /bits/ 8 <0xff 0x00 0x01 0xff 0xff>;
+        qcom,ports-hstart =             /bits/ 8 <0xff 0x03 0xff 0xff 0xff>;
+        qcom,ports-hstop =              /bits/ 8 <0xff 0x06 0xff 0xff 0xff>;
+        qcom,ports-block-group-count =  /bits/ 8 <0xff 0xff 0xff 0xff 0x00>;
+
+        #sound-dai-cells = <1>;
+        #address-cells = <2>;
+        #size-cells = <0>;
+
+        codec@0,4 {
+            compatible = "sdw20217010d00";
+            reg = <0 4>;
+            qcom,rx-port-mapping = <1 2 3 4 5>;
+        };
+    };
-- 
GitLab


From 0349fdab2ff0673cc3c3f300316522d4f2bb1af9 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Fri, 4 Nov 2022 22:55:13 +0100
Subject: [PATCH 1115/4122] usb: gadget: at91-udc: simplify
 at91rm9200_udc_pullup callback

Just simplify the use of is_on and get rid of superfluous condition.

Cc: gregkh@linuxfoundation.org
Cc: nicolas.ferre@microchip.com
Cc: alexandre.belloni@bootlin.com
Cc: linux-usb@vger.kernel.org
Cc: kernel@pengutronix.de
Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>

Link: https://lore.kernel.org/r/20221104215516.2874922-2-m.grzeschik@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/at91_udc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index a9a7b3fc60ec9..922b4187004b0 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -1628,10 +1628,7 @@ static int at91rm9200_udc_init(struct at91_udc *udc)
 
 static void at91rm9200_udc_pullup(struct at91_udc *udc, int is_on)
 {
-	if (is_on)
-		gpiod_set_value(udc->board.pullup_pin, 1);
-	else
-		gpiod_set_value(udc->board.pullup_pin, 0);
+	gpiod_set_value(udc->board.pullup_pin, is_on);
 }
 
 static const struct at91_udc_caps at91rm9200_udc_caps = {
-- 
GitLab


From afb21a5155a136a2549947c151b13507a34976ae Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 3 Nov 2022 12:59:23 +0100
Subject: [PATCH 1116/4122] dt-bindings: usb: usb-drd: Describe default
 dual-role mode

The dual-role mode default, in the absence of the dr_mode property, is
already documented to be OTG. Use the "default" property to mark it as
such more explicitly.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221103115923.1467525-1-thierry.reding@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb-drd.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/usb/usb-drd.yaml b/Documentation/devicetree/bindings/usb/usb-drd.yaml
index 1567549b05cec..114fb5dc0498a 100644
--- a/Documentation/devicetree/bindings/usb/usb-drd.yaml
+++ b/Documentation/devicetree/bindings/usb/usb-drd.yaml
@@ -27,6 +27,7 @@ properties:
       should default to OTG.
     $ref: /schemas/types.yaml#/definitions/string
     enum: [host, peripheral, otg]
+    default: otg
 
   hnp-disable:
     description:
-- 
GitLab


From fff61d4ccf3d1124bf7aa82fa996536833b8204a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 7 Nov 2022 14:42:48 +0100
Subject: [PATCH 1117/4122] dt-bindings: usb: usb251xb: Convert to YAML schema

Convert the usb251xb hub DT bindings from text to yaml schema so it is
possible to validate DTs against the schema.

Adjust the example to describe two different hubs at different I2C bus
addresses, to avoid I2C address collission in the example.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20221107134248.21899-1-marex@denx.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/usb/usb251xb.txt      |  89 ------
 .../devicetree/bindings/usb/usb251xb.yaml     | 271 ++++++++++++++++++
 2 files changed, 271 insertions(+), 89 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/usb/usb251xb.txt
 create mode 100644 Documentation/devicetree/bindings/usb/usb251xb.yaml

diff --git a/Documentation/devicetree/bindings/usb/usb251xb.txt b/Documentation/devicetree/bindings/usb/usb251xb.txt
deleted file mode 100644
index 1a934eab175ee..0000000000000
--- a/Documentation/devicetree/bindings/usb/usb251xb.txt
+++ /dev/null
@@ -1,89 +0,0 @@
-Microchip USB 2.0 Hi-Speed Hub Controller
-
-The device node for the configuration of a Microchip USB251x/xBi USB 2.0
-Hi-Speed Controller.
-
-Required properties :
- - compatible : Should be "microchip,usb251xb" or one of the specific types:
-	"microchip,usb2512b", "microchip,usb2512bi", "microchip,usb2513b",
-	"microchip,usb2513bi", "microchip,usb2514b", "microchip,usb2514bi",
-	"microchip,usb2517", "microchip,usb2517i", "microchip,usb2422"
- - reg : I2C address on the selected bus (default is <0x2C>)
-
-Optional properties :
- - reset-gpios : Should specify the gpio for hub reset
- - vdd-supply : Should specify the phandle to the regulator supplying vdd
- - skip-config : Skip Hub configuration, but only send the USB-Attach command
- - vendor-id : Set USB Vendor ID of the hub (16 bit, default is 0x0424)
- - product-id : Set USB Product ID of the hub (16 bit, default depends on type)
- - device-id : Set USB Device ID of the hub (16 bit, default is 0x0bb3)
- - language-id : Set USB Language ID (16 bit, default is 0x0000)
- - manufacturer : Set USB Manufacturer string (max 31 characters long)
- - product : Set USB Product string (max 31 characters long)
- - serial : Set USB Serial string (max 31 characters long)
- - {bus,self}-powered : selects between self- and bus-powered operation
-	(boolean, default is self-powered)
- - disable-hi-speed : disable USB Hi-Speed support (boolean)
- - {multi,single}-tt : selects between multi- and single-transaction-translator
-	(boolean, default is multi-tt)
- - disable-eop : disable End of Packet generation in full-speed mode (boolean)
- - {ganged,individual}-sensing : select over-current sense type in self-powered
-	mode (boolean, default is individual)
- - {ganged,individual}-port-switching : select port power switching mode
-	(boolean, default is individual)
- - dynamic-power-switching : enable auto-switching from self- to bus-powered
-	operation if the local power source is removed or unavailable (boolean)
- - oc-delay-us : Delay time (in microseconds) for filtering the over-current
-	sense inputs. Valid values are 100, 4000, 8000 (default) and 16000. If
-	an invalid value is given, the default is used instead.
- - compound-device : indicate the hub is part of a compound device (boolean)
- - port-mapping-mode : enable port mapping mode (boolean)
- - led-{usb,speed}-mode : led usb/speed indication mode selection
-	(boolean, default is speed mode)
- - string-support : enable string descriptor support (required for manufacturer,
-	product and serial string configuration)
- - non-removable-ports : Should specify the ports which have a non-removable
-	device connected.
- - sp-disabled-ports : Specifies the ports which will be self-power disabled
- - bp-disabled-ports : Specifies the ports which will be bus-power disabled
- - sp-max-total-current-microamp: Specifies max current consumed by the hub
-	from VBUS when operating in self-powered hub. It includes the hub
-	silicon along with all associated circuitry including a permanently
-	attached peripheral (range: 0 - 100000 uA, default 1000 uA)
- - bp-max-total-current-microamp: Specifies max current consumed by the hub
-	from VBUS when operating in self-powered hub. It includes the hub
-	silicon along with all associated circuitry including a permanently
-	attached peripheral (range: 0 - 510000 uA, default 100000 uA)
- - sp-max-removable-current-microamp: Specifies max current consumed by the hub
-	from VBUS when operating in self-powered hub. It includes the hub
-	silicon along with all associated circuitry excluding a permanently
-	attached peripheral (range: 0 - 100000 uA, default 1000 uA)
- - bp-max-removable-current-microamp: Specifies max current consumed by the hub
-	from VBUS when operating in self-powered hub. It includes the hub
-	silicon along with all associated circuitry excluding a permanently
-	attached peripheral (range: 0 - 510000 uA, default 100000 uA)
- - power-on-time-ms : Specifies the time it takes from the time the host
-	initiates the power-on sequence to a port until the port has adequate
-	power. The value is given in ms in a 0 - 510 range (default is 100ms).
- - swap-dx-lanes : Specifies the ports which will swap the differential-pair
-	(D+/D-), default is not-swapped.
-
-Examples:
-	usb2512b@2c {
-		compatible = "microchip,usb2512b";
-		reg = <0x2c>;
-		reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
-	};
-
-	usb2514b@2c {
-		compatible = "microchip,usb2514b";
-		reg = <0x2c>;
-		vendor-id = /bits/ 16 <0x0000>;
-		product-id = /bits/ 16 <0x0000>;
-		string-support;
-		manufacturer = "Foo";
-		product = "Foo-Bar";
-		serial = "1234567890A";
-		/* correct misplaced usb connectors on port 1,2 */
-		swap-dx-lanes = <1 2>;
-	};
diff --git a/Documentation/devicetree/bindings/usb/usb251xb.yaml b/Documentation/devicetree/bindings/usb/usb251xb.yaml
new file mode 100644
index 0000000000000..4d15308168178
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/usb251xb.yaml
@@ -0,0 +1,271 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/usb251xb.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip USB 2.0 Hi-Speed Hub Controller
+
+maintainers:
+  - Richard Leitner <richard.leitner@skidata.com>
+
+properties:
+  compatible:
+    enum:
+      - microchip,usb2422
+      - microchip,usb2512b
+      - microchip,usb2512bi
+      - microchip,usb2513b
+      - microchip,usb2513bi
+      - microchip,usb2514b
+      - microchip,usb2514bi
+      - microchip,usb2517
+      - microchip,usb2517i
+      - microchip,usb251xb
+
+  reg:
+    maxItems: 1
+
+  reset-gpios:
+    description: |
+      Should specify the gpio for hub reset
+
+  vdd-supply:
+    description: |
+      Should specify the phandle to the regulator supplying vdd
+
+  skip-config:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      Skip Hub configuration, but only send the USB-Attach command
+
+  vendor-id:
+    $ref: /schemas/types.yaml#/definitions/uint16
+    default: 0x0424
+    description: |
+      Set USB Vendor ID of the hub
+
+  product-id:
+    $ref: /schemas/types.yaml#/definitions/uint16
+    description: |
+      Set USB Product ID of the hub
+
+  device-id:
+    $ref: /schemas/types.yaml#/definitions/uint16
+    default: 0x0bb3
+    description: |
+      Set USB Device ID of the hub
+
+  language-id:
+    $ref: /schemas/types.yaml#/definitions/uint16
+    default: 0x0000
+    description: |
+      Set USB Language ID
+
+  manufacturer:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: |
+      Set USB Manufacturer string (max 31 characters long)
+
+  product:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: |
+      Set USB Product string (max 31 characters long)
+
+  serial:
+    $ref: /schemas/types.yaml#/definitions/string
+    description: |
+      Set USB Serial string (max 31 characters long)
+
+  bus-powered:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      selects between self- and bus-powered operation
+      (boolean, default is self-powered)
+
+  self-powered:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      selects between self- and bus-powered operation
+      (boolean, default is self-powered)
+
+  disable-hi-speed:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      disable USB Hi-Speed support (boolean)
+
+  multi-tt:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      selects between multi- and single-transaction-translator
+      (boolean, default is multi-tt)
+
+  single-tt:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      selects between multi- and single-transaction-translator
+      (boolean, default is multi-tt)
+
+  disable-eop:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      disable End of Packet generation in full-speed mode (boolean)
+
+  ganged-sensing:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      select over-current sense type in self-powered mode
+      (boolean, default is individual)
+
+  individual-sensing:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      select over-current sense type in self-powered mode
+      (boolean, default is individual)
+
+  ganged-port-switching:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      select port power switching mode (boolean, default is individual)
+
+  individual-port-switching:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      select port power switching mode (boolean, default is individual)
+
+  dynamic-power-switching:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      enable auto-switching from self- to bus-powered operation if the
+      local power source is removed or unavailable (boolean)
+
+  oc-delay-us:
+    enum: [100, 4000, 8000, 16000]
+    default: 8000
+    description: |
+      Delay time (in microseconds) for filtering the over-current sense
+      inputs. If an invalid value is given, the default is used instead.
+
+  compound-device:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      indicate the hub is part of a compound device (boolean)
+
+  port-mapping-mode:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      enable port mapping mode (boolean)
+
+  led-usb-mode:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      led usb/speed indication mode selection (boolean, default is speed mode)
+
+  led-speed-mode:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      led usb/speed indication mode selection (boolean, default is speed mode)
+
+  string-support:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description: |
+      enable string descriptor support (required for manufacturer, product
+      and serial string configuration)
+
+  non-removable-ports:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description: |
+      Should specify the ports which have a non-removable device connected.
+
+  sp-disabled-ports:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description: |
+      Specifies the ports which will be self-power disabled
+
+  bp-disabled-ports:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description: |
+      Specifies the ports which will be bus-power disabled
+
+  sp-max-total-current-microamp:
+    maximum: 100000
+    default: 1000
+    description: |
+      Specifies max current consumed by the hub from VBUS when
+      operating in self-powered hub. It includes the hub silicon
+      along with all associated circuitry including a permanently
+      attached peripheral.
+
+  bp-max-total-current-microamp:
+    maximum: 510000
+    default: 100000
+    description: |
+      Specifies max current consumed by the hub from VBUS when
+      operating in self-powered hub. It includes the hub silicon
+      along with all associated circuitry including a permanently
+      attached peripheral.
+
+  sp-max-removable-current-microamp:
+    maximum: 100000
+    default: 1000
+    description: |
+      Specifies max current consumed by the hub from VBUS when
+      operating in self-powered hub. It includes the hub silicon
+      along with all associated circuitry excluding a permanently
+      attached peripheral.
+
+  bp-max-removable-current-microamp:
+    maximum: 510000
+    default: 100000
+    description: |
+      Specifies max current consumed by the hub from VBUS when
+      operating in self-powered hub. It includes the hub silicon
+      along with all associated circuitry excluding a permanently
+      attached peripheral.
+
+  power-on-time-ms:
+    maximum: 510
+    default: 100
+    description: |
+      Specifies the time it takes from the time the host initiates the
+      power-on sequence to a port until the port has adequate power.
+
+  swap-dx-lanes:
+    $ref: /schemas/types.yaml#/definitions/uint8-array
+    description: |
+      Specifies the ports which will swap the differential-pair (D+/D-),
+      default is not-swapped.
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      usb-hub@2c {
+        compatible = "microchip,usb2512b";
+        reg = <0x2c>;
+        reset-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
+      };
+
+      usb-hub@2d {
+        compatible = "microchip,usb2514b";
+        reg = <0x2d>;
+        vendor-id = /bits/ 16 <0x0000>;
+        product-id = /bits/ 16 <0x0000>;
+        string-support;
+        manufacturer = "Foo";
+        product = "Foo-Bar";
+        serial = "1234567890A";
+        /* correct misplaced usb connectors on port 1,2 */
+        swap-dx-lanes = <1 2>;
+      };
+    };
-- 
GitLab


From 434d806f077cad81d87a757adc631894bfa01ac2 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 26 Oct 2022 14:12:21 +0800
Subject: [PATCH 1118/4122] dt-bindings: usb: usb-nop-xceiv: add wakeup-source
 property

USB phy may be a system wakeup source, so add wakeup source property
to keep its resource (e.g. power domain) active to make USB remote
wakeup work.

Signed-off-by: Li Jun <jun.li@nxp.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/1666764742-4201-1-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml
index 2824c17285eea..326131dcf14d7 100644
--- a/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml
+++ b/Documentation/devicetree/bindings/usb/usb-nop-xceiv.yaml
@@ -39,6 +39,11 @@ properties:
       the VBus line.
     $ref: /schemas/types.yaml#/definitions/phandle
 
+  wakeup-source:
+    description:
+      Specify if the USB phy can detect the remote wakeup signal
+      while the system sleep.
+
 required:
   - compatible
   - '#phy-cells'
-- 
GitLab


From 4567d1a97f5290cb895a564feff0a5c770d6c332 Mon Sep 17 00:00:00 2001
From: Li Jun <jun.li@nxp.com>
Date: Wed, 26 Oct 2022 14:12:22 +0800
Subject: [PATCH 1119/4122] usb: phy: generic: Add wakeup capability

In case USB phy is the wakeup source, enable its wakeup
capability.

Signed-off-by: Li Jun <jun.li@nxp.com>
Link: https://lore.kernel.org/r/1666764742-4201-2-git-send-email-jun.li@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-generic.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 8ed9327cc4a52..c1309ea24a52e 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -286,6 +286,7 @@ EXPORT_SYMBOL_GPL(usb_phy_gen_create_phy);
 static int usb_phy_generic_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
+	struct device_node *dn = dev->of_node;
 	struct usb_phy_generic	*nop;
 	int err;
 
@@ -323,6 +324,9 @@ static int usb_phy_generic_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, nop);
 
+	device_set_wakeup_capable(&pdev->dev,
+				  of_property_read_bool(dn, "wakeup-source"));
+
 	return 0;
 }
 
-- 
GitLab


From ee9834636f9b07fe1dcf3fffbb325318cdb267d5 Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 28 Oct 2022 18:45:34 -0700
Subject: [PATCH 1120/4122] usb: ehci-pci: Set PROBE_PREFER_ASYNCHRONOUS

This driver often takes on the order of 8ms to start, but every little
bit counts. It shouldn't have many cross-device dependencies to
race with, nor racy access to shared state with other drivers, so this
should be a relatively low risk change.

This driver was pinpointed as part of a survey of top slowest initcalls
(i.e., are built in, and probing synchronously) on a lab of ChromeOS
systems.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/20221028184507.v2.1.I9a5353f81d1509f85f3a04f0cdc9099f6fe60811@changeid
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-pci.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index 17f8b6ea0c356..4b148fe5e43b2 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -411,11 +411,12 @@ static struct pci_driver ehci_pci_driver = {
 	.remove =	ehci_pci_remove,
 	.shutdown = 	usb_hcd_pci_shutdown,
 
-#ifdef CONFIG_PM
 	.driver =	{
-		.pm =	&usb_hcd_pci_pm_ops
-	},
+#ifdef CONFIG_PM
+		.pm =	&usb_hcd_pci_pm_ops,
 #endif
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
 };
 
 static int __init ehci_pci_init(void)
-- 
GitLab


From 4c2604a9a6899bab195edbee35fc8d64ce1444aa Mon Sep 17 00:00:00 2001
From: Brian Norris <briannorris@chromium.org>
Date: Fri, 28 Oct 2022 18:45:35 -0700
Subject: [PATCH 1121/4122] usb: xhci-pci: Set PROBE_PREFER_ASYNCHRONOUS

This driver often takes on the order of 10ms to start, but in some cases
takes more than 100ms. It shouldn't have many cross-device dependencies
to race with, nor racy access to shared state with other drivers, so
this should be a relatively low risk change.

This driver was pinpointed as part of a survey of top slowest initcalls
(i.e., are built in, and probing synchronously) on a lab of ChromeOS
systems.

Signed-off-by: Brian Norris <briannorris@chromium.org>
Link: https://lore.kernel.org/r/20221028184507.v2.2.I5a309231785d3a4e37118a25e84f5caa0136a343@changeid
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 7bccbe50bab15..a29b681b562e4 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -673,11 +673,12 @@ static struct pci_driver xhci_pci_driver = {
 	/* suspend and resume implemented later */
 
 	.shutdown = 	usb_hcd_pci_shutdown,
-#ifdef CONFIG_PM
 	.driver = {
-		.pm = &usb_hcd_pci_pm_ops
-	},
+#ifdef CONFIG_PM
+		.pm = &usb_hcd_pci_pm_ops,
 #endif
+		.probe_type = PROBE_PREFER_ASYNCHRONOUS,
+	},
 };
 
 static int __init xhci_pci_init(void)
-- 
GitLab


From 9c3959bb4cbf2b45c5b53bf8a19426e5ddb5c56c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Fri, 4 Nov 2022 10:58:38 +0100
Subject: [PATCH 1122/4122] usb: chipidea: ci_hdrc_imx: Fix a typo
 ("regualator")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Change "regualator" to "regulator" in this comment.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Link: https://lore.kernel.org/r/20221104095838.2132945-1-j.neuschaefer@gmx.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 923f5c00a1d96..0dc482542d852 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -355,7 +355,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 		data->hsic_pad_regulator =
 				devm_regulator_get_optional(dev, "hsic");
 		if (PTR_ERR(data->hsic_pad_regulator) == -ENODEV) {
-			/* no pad regualator is needed */
+			/* no pad regulator is needed */
 			data->hsic_pad_regulator = NULL;
 		} else if (IS_ERR(data->hsic_pad_regulator))
 			return dev_err_probe(dev, PTR_ERR(data->hsic_pad_regulator),
-- 
GitLab


From 83045e19feae937c425248824d1dc0fc95583842 Mon Sep 17 00:00:00 2001
From: Henry Tian <tianxiaofeng@bytedance.com>
Date: Mon, 24 Oct 2022 09:48:53 +0000
Subject: [PATCH 1123/4122] usb: gadget: aspeed: fix buffer overflow

In ast_vhub_epn_handle_ack() when the received data length exceeds the
buffer, it does not check the case and just copies to req.buf and cause
a buffer overflow, kernel oops on this case.

This issue could be reproduced on a BMC with an OS that enables the
lan over USB:
1. In OS, enable the usb eth dev, verify it pings the BMC OK;
2. In OS, set the usb dev mtu to 2000. (Default is 1500);
3. In OS, ping the BMC with `-s 2000` argument.

The BMC kernel will get oops with below logs:

    skbuff: skb_over_panic: text:8058e098 len:2048 put:2048 head:84c678a0 data:84c678c2 tail:0x84c680c2 end:0x84c67f00 dev:usb0
    ------------[ cut here ]------------
    kernel BUG at net/core/skbuff.c:113!
    Internal error: Oops - BUG: 0 [#1] ARM
    CPU: 0 PID: 0 Comm: swapper Not tainted 5.15.69-c9fb275-dirty-d1e579a #1
    Hardware name: Generic DT based system
    PC is at skb_panic+0x60/0x6c
    LR is at irq_work_queue+0x6c/0x94

Fix the issue by checking the length and set `-EOVERFLOW`.

Tested: Verify the BMC kernel does not get oops in the above case, and
the usb ethernet gets RX packets errors instead.

Signed-off-by: Lei YU <yulei.sh@bytedance.com>
Signed-off-by: Henry Tian <tianxiaofeng@bytedance.com>
Reviewed-by: Neal Liu <neal_liu@aspeedtech.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Link: https://lore.kernel.org/r/20221024094853.2877441-1-yulei.sh@bytedance.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/aspeed-vhub/core.c |  2 +-
 drivers/usb/gadget/udc/aspeed-vhub/epn.c  | 16 ++++++++++++----
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/gadget/udc/aspeed-vhub/core.c b/drivers/usb/gadget/udc/aspeed-vhub/core.c
index 7a635c4997774..ac3ca24f8b045 100644
--- a/drivers/usb/gadget/udc/aspeed-vhub/core.c
+++ b/drivers/usb/gadget/udc/aspeed-vhub/core.c
@@ -37,7 +37,7 @@ void ast_vhub_done(struct ast_vhub_ep *ep, struct ast_vhub_req *req,
 
 	list_del_init(&req->queue);
 
-	if (req->req.status == -EINPROGRESS)
+	if ((req->req.status == -EINPROGRESS) ||  (status == -EOVERFLOW))
 		req->req.status = status;
 
 	if (req->req.dma) {
diff --git a/drivers/usb/gadget/udc/aspeed-vhub/epn.c b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
index b5252880b3894..56e55472daa13 100644
--- a/drivers/usb/gadget/udc/aspeed-vhub/epn.c
+++ b/drivers/usb/gadget/udc/aspeed-vhub/epn.c
@@ -84,6 +84,7 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep)
 {
 	struct ast_vhub_req *req;
 	unsigned int len;
+	int status = 0;
 	u32 stat;
 
 	/* Read EP status */
@@ -119,9 +120,15 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep)
 	len = VHUB_EP_DMA_TX_SIZE(stat);
 
 	/* If not using DMA, copy data out if needed */
-	if (!req->req.dma && !ep->epn.is_in && len)
-		memcpy(req->req.buf + req->req.actual, ep->buf, len);
-
+	if (!req->req.dma && !ep->epn.is_in && len) {
+		if (req->req.actual + len > req->req.length) {
+			req->last_desc = 1;
+			status = -EOVERFLOW;
+			goto done;
+		} else {
+			memcpy(req->req.buf + req->req.actual, ep->buf, len);
+		}
+	}
 	/* Adjust size */
 	req->req.actual += len;
 
@@ -129,9 +136,10 @@ static void ast_vhub_epn_handle_ack(struct ast_vhub_ep *ep)
 	if (len < ep->ep.maxpacket)
 		req->last_desc = 1;
 
+done:
 	/* That's it ? complete the request and pick a new one */
 	if (req->last_desc >= 0) {
-		ast_vhub_done(ep, req, 0);
+		ast_vhub_done(ep, req, status);
 		req = list_first_entry_or_null(&ep->queue, struct ast_vhub_req,
 					       queue);
 
-- 
GitLab


From d119cd95c62ddf6a1d76a006be273f255fd6c5a8 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 31 Oct 2022 09:54:26 +0100
Subject: [PATCH 1124/4122] usb: musb: remove left-over after USB_TI_CPPI_DMA
 removal

Commit 32fee1df5110 ("usb: musb: remove unused davinci support") removes
the config USB_TI_CPPI_DMA, but misses some left-over references in
drivers/usb/musb/musb_dma.h.

Remove the left-over dependent on this removed config.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20221031085426.17175-1-lukas.bulwahn@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_dma.h | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/musb/musb_dma.h b/drivers/usb/musb/musb_dma.h
index 7d67b69df0a07..e2445ca3356d0 100644
--- a/drivers/usb/musb/musb_dma.h
+++ b/drivers/usb/musb/musb_dma.h
@@ -61,12 +61,6 @@ struct musb_hw_ep;
 #define musb_dma_cppi41(musb)		0
 #endif
 
-#ifdef CONFIG_USB_TI_CPPI_DMA
-#define musb_dma_cppi(musb)		(musb->ops->quirks & MUSB_DMA_CPPI)
-#else
-#define musb_dma_cppi(musb)		0
-#endif
-
 #ifdef CONFIG_USB_TUSB_OMAP_DMA
 #define tusb_dma_omap(musb)		(musb->ops->quirks & MUSB_DMA_TUSB_OMAP)
 #else
@@ -79,11 +73,10 @@ struct musb_hw_ep;
 #define musb_dma_inventra(musb)		0
 #endif
 
-#if defined(CONFIG_USB_TI_CPPI_DMA) || defined(CONFIG_USB_TI_CPPI41_DMA)
-#define	is_cppi_enabled(musb)		\
-	(musb_dma_cppi(musb) || musb_dma_cppi41(musb))
+#if defined(CONFIG_USB_TI_CPPI41_DMA)
+#define	is_cppi_enabled(musb)		musb_dma_cppi41(musb)
 #else
-#define	is_cppi_enabled(musb)	0
+#define	is_cppi_enabled(musb)		0
 #endif
 
 /*
-- 
GitLab


From 77ece8123fed2bef451ef31a34b8327849375d26 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 3 Nov 2022 10:06:25 +0800
Subject: [PATCH 1125/4122] Documentation: devres: add missing PHY helpers

Add devm_usb_get_phy_by_phandle() to devres.rst. It's introduced
by commit 5d3c28b5a42d ("usb: otg: add device tree support to otg
library").

Add devm_usb_get_phy_by_node() to devres.rst. It's introduced by
commit e842b84c8e72 ("usb: phy: Add interface to get phy give of
device_node.")

Fixes: 5d3c28b5a42d ("usb: otg: add device tree support to otg library")
Fixes: e842b84c8e72 ("usb: phy: Add interface to get phy give of device_node.")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221103020625.1003759-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/driver-model/devres.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 687adb58048ec..6007ef0704e33 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -387,6 +387,8 @@ PCI
 
 PHY
   devm_usb_get_phy()
+  devm_usb_get_phy_by_node()
+  devm_usb_get_phy_by_phandle()
   devm_usb_put_phy()
 
 PINCTRL
-- 
GitLab


From dced88922c1179dfa2664690318d4cba57ebffb5 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Wed, 26 Oct 2022 20:11:57 +0800
Subject: [PATCH 1126/4122] usb: chipidea: core: wrap ci_handle_power_lost()
 with CONFIG_PM_SLEEP

If CONFIG_PM_SLEEP is not set, the following error will be shown up
when build kernel:
    error: 'ci_handle_power_lost' defined but not used.

This will move ci_handle_power_lost() to an area wrapped by
CONFIG_PM_SLEEP.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Fixes: 74494b33211d ("usb: chipidea: core: add controller resume support when controller is powered off")
Reported-by: Conor Dooley <conor.dooley@microchip.com>
Tested-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221026121157.1491302-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/core.c | 38 ++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 2b170b434d012..484b1cd234317 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -661,25 +661,6 @@ static enum ci_role ci_get_role(struct ci_hdrc *ci)
 	return role;
 }
 
-static void ci_handle_power_lost(struct ci_hdrc *ci)
-{
-	enum ci_role role;
-
-	disable_irq_nosync(ci->irq);
-	if (!ci_otg_is_fsm_mode(ci)) {
-		role = ci_get_role(ci);
-
-		if (ci->role != role) {
-			ci_handle_id_switch(ci);
-		} else if (role == CI_ROLE_GADGET) {
-			if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
-				usb_gadget_vbus_connect(&ci->gadget);
-		}
-	}
-
-	enable_irq(ci->irq);
-}
-
 static struct usb_role_switch_desc ci_role_switch = {
 	.set = ci_usb_role_switch_set,
 	.get = ci_usb_role_switch_get,
@@ -1400,6 +1381,25 @@ static int ci_suspend(struct device *dev)
 	return 0;
 }
 
+static void ci_handle_power_lost(struct ci_hdrc *ci)
+{
+	enum ci_role role;
+
+	disable_irq_nosync(ci->irq);
+	if (!ci_otg_is_fsm_mode(ci)) {
+		role = ci_get_role(ci);
+
+		if (ci->role != role) {
+			ci_handle_id_switch(ci);
+		} else if (role == CI_ROLE_GADGET) {
+			if (ci->is_otg && hw_read_otgsc(ci, OTGSC_BSV))
+				usb_gadget_vbus_connect(&ci->gadget);
+		}
+	}
+
+	enable_irq(ci->irq);
+}
+
 static int ci_resume(struct device *dev)
 {
 	struct ci_hdrc *ci = dev_get_drvdata(dev);
-- 
GitLab


From c5edb757baa99f6d30180b1a4b4f81f7e7f92217 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Date: Sat, 29 Oct 2022 21:43:12 +0530
Subject: [PATCH 1127/4122] tools: usb: ffs-aio-example: Fix build error with
 aarch64-*-gnu-gcc toolchain(s)

The tools/usb/aio_simple.c file when cross-compiled with
aarch64-*-gnu-gcc toolchain(s) leads to the following errors:

  aio_simple.c:30:10: fatal error: endian.h: No such file or directory
  30 | #include <endian.h>
      |          ^~~~~~~~~~

  aio_simple.c:88:14: note: (near initialization for
                      'descriptors.fs_count')
  aio_simple.c:110:14: error: initializer element is not constant
  110 |  .hs_count = htole32(3),
      |              ^~~~~~~
  aio_simple.c:110:14: note: (near initialization for
                      'descriptors.hs_count')
  aio_simple.c:124:22: error: initializer element is not constant
  124 |    .wMaxPacketSize = htole16(512),
      |                      ^~~~~~~
  aio_simple.c:124:22: note: (near initialization for
                       'descriptors.hs_descs.bulk_sink.wMaxPacketSize')

Fix these compilation issues by:
 - Switching to _DEFAULT_SOURCE:
    _BSD_SOURCE is deprecated and gives a build warning. Let's use
    _DEFAULT_SOURCE instead.
 - Currently this file uses library htole16/32 function calls.
   Replace these with equivalent 'cpu_to_le16/32' calls.

Cc: Felipe Balbi <balbi@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Bhupesh Sharma <bhupesh.sharma@linaro.org>
Link: https://lore.kernel.org/r/20221029161312.171165-1-bhupesh.sharma@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../simple/device_app/aio_simple.c            | 44 +++++++++++++------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c
index 1f44a29818bf0..96616eb4600b4 100644
--- a/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c
+++ b/tools/usb/ffs-aio-example/simple/device_app/aio_simple.c
@@ -25,7 +25,9 @@
  * For more information, please refer to <http://unlicense.org/>
  */
 
-#define _BSD_SOURCE /* for endian.h */
+/* $(CROSS_COMPILE)cc -g -o aio_simple aio_simple.c -laio */
+
+#define _DEFAULT_SOURCE /* for endian.h */
 
 #include <endian.h>
 #include <errno.h>
@@ -49,6 +51,22 @@
 
 #define BUF_LEN		8192
 
+/*
+ * cpu_to_le16/32 are used when initializing structures, a context where a
+ * function call is not allowed. To solve this, we code cpu_to_le16/32 in a way
+ * that allows them to be used when initializing structures.
+ */
+
+#if BYTE_ORDER == __LITTLE_ENDIAN
+#define cpu_to_le16(x)  (x)
+#define cpu_to_le32(x)  (x)
+#else
+#define cpu_to_le16(x)  ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8))
+#define cpu_to_le32(x)  \
+	((((x) & 0xff000000u) >> 24) | (((x) & 0x00ff0000u) >>  8) | \
+	(((x) & 0x0000ff00u) <<  8) | (((x) & 0x000000ffu) << 24))
+#endif
+
 /******************** Descriptors and Strings *******************************/
 
 static const struct {
@@ -62,12 +80,12 @@ static const struct {
 	} __attribute__ ((__packed__)) fs_descs, hs_descs;
 } __attribute__ ((__packed__)) descriptors = {
 	.header = {
-		.magic = htole32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
-		.flags = htole32(FUNCTIONFS_HAS_FS_DESC |
+		.magic = cpu_to_le32(FUNCTIONFS_DESCRIPTORS_MAGIC_V2),
+		.flags = cpu_to_le32(FUNCTIONFS_HAS_FS_DESC |
 				     FUNCTIONFS_HAS_HS_DESC),
-		.length = htole32(sizeof(descriptors)),
+		.length = cpu_to_le32(sizeof(descriptors)),
 	},
-	.fs_count = htole32(3),
+	.fs_count = cpu_to_le32(3),
 	.fs_descs = {
 		.intf = {
 			.bLength = sizeof(descriptors.fs_descs.intf),
@@ -89,7 +107,7 @@ static const struct {
 			.bmAttributes = USB_ENDPOINT_XFER_BULK,
 		},
 	},
-	.hs_count = htole32(3),
+	.hs_count = cpu_to_le32(3),
 	.hs_descs = {
 		.intf = {
 			.bLength = sizeof(descriptors.hs_descs.intf),
@@ -103,14 +121,14 @@ static const struct {
 			.bDescriptorType = USB_DT_ENDPOINT,
 			.bEndpointAddress = 1 | USB_DIR_IN,
 			.bmAttributes = USB_ENDPOINT_XFER_BULK,
-			.wMaxPacketSize = htole16(512),
+			.wMaxPacketSize = cpu_to_le16(512),
 		},
 		.bulk_source = {
 			.bLength = sizeof(descriptors.hs_descs.bulk_source),
 			.bDescriptorType = USB_DT_ENDPOINT,
 			.bEndpointAddress = 2 | USB_DIR_OUT,
 			.bmAttributes = USB_ENDPOINT_XFER_BULK,
-			.wMaxPacketSize = htole16(512),
+			.wMaxPacketSize = cpu_to_le16(512),
 		},
 	},
 };
@@ -125,13 +143,13 @@ static const struct {
 	} __attribute__ ((__packed__)) lang0;
 } __attribute__ ((__packed__)) strings = {
 	.header = {
-		.magic = htole32(FUNCTIONFS_STRINGS_MAGIC),
-		.length = htole32(sizeof(strings)),
-		.str_count = htole32(1),
-		.lang_count = htole32(1),
+		.magic = cpu_to_le32(FUNCTIONFS_STRINGS_MAGIC),
+		.length = cpu_to_le32(sizeof(strings)),
+		.str_count = cpu_to_le32(1),
+		.lang_count = cpu_to_le32(1),
 	},
 	.lang0 = {
-		htole16(0x0409), /* en-us */
+		cpu_to_le16(0x0409), /* en-us */
 		STR_INTERFACE,
 	},
 };
-- 
GitLab


From 1dd33a9f1b95ab59cd60f14a7a83fed14697867b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 23 Oct 2022 16:47:06 +0200
Subject: [PATCH 1128/4122] usb: fotg210: Collect pieces of dual mode
 controller

The Faraday FOTG210 is a dual-mode OTG USB controller that can
act as host, peripheral or both. To be able to probe from one
hardware description and to follow the pattern of other dual-
mode controllers such as MUSB or MTU3 we need to collect the
two, currently completely separate drivers in the same
directory.

After this, users need to select the main symbol USB_FOTG210
and then each respective subdriver. We pave the road to
compile both drivers into the same kernel and select the
one we want to use at probe() time, and possibly add OTG
support in the end.

This patch doesn't do much more than create the new symbol
and collect the drivers in one place. We also add a comment
for the section of dual-mode controllers in the Kconfig
file so people can see what these selections are about.

Also add myself as maintainer as there has been little
response on my patches to these drivers.

Cc: Fabian Vogt <fabian@ritter-vogt.de>
Cc: Yuan-Hsin Chen <yhchen@faraday-tech.com>
Cc: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221023144708.3596563-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                                   |  6 ++++
 drivers/usb/Kconfig                           |  4 +++
 drivers/usb/Makefile                          |  2 ++
 drivers/usb/fotg210/Kconfig                   | 36 +++++++++++++++++++
 drivers/usb/fotg210/Makefile                  |  3 ++
 drivers/usb/{host => fotg210}/fotg210-hcd.c   |  2 +-
 .../{host/fotg210.h => fotg210/fotg210-hcd.h} |  0
 .../usb/{gadget/udc => fotg210}/fotg210-udc.c |  2 +-
 .../udc/fotg210.h => fotg210/fotg210-udc.h}   |  0
 drivers/usb/gadget/udc/Kconfig                | 11 ------
 drivers/usb/gadget/udc/Makefile               |  1 -
 drivers/usb/host/Kconfig                      | 11 ------
 drivers/usb/host/Makefile                     |  1 -
 13 files changed, 53 insertions(+), 26 deletions(-)
 create mode 100644 drivers/usb/fotg210/Kconfig
 create mode 100644 drivers/usb/fotg210/Makefile
 rename drivers/usb/{host => fotg210}/fotg210-hcd.c (99%)
 rename drivers/usb/{host/fotg210.h => fotg210/fotg210-hcd.h} (100%)
 rename drivers/usb/{gadget/udc => fotg210}/fotg210-udc.c (99%)
 rename drivers/usb/{gadget/udc/fotg210.h => fotg210/fotg210-udc.h} (100%)

diff --git a/MAINTAINERS b/MAINTAINERS
index 379945f82a643..52ddfc938ac91 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7873,6 +7873,12 @@ F:	fs/notify/fanotify/
 F:	include/linux/fanotify.h
 F:	include/uapi/linux/fanotify.h
 
+FARADAY FOTG210 USB2 DUAL-ROLE CONTROLLER
+M:	Linus Walleij <linus.walleij@linaro.org>
+L:	linux-usb@vger.kernel.org
+S:	Maintained
+F:	drivers/usb/fotg210/
+
 FARSYNC SYNCHRONOUS DRIVER
 M:	Kevin Curtis <kevin.curtis@farsite.co.uk>
 S:	Supported
diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index 578a439e71b57..a871a988829db 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -111,8 +111,12 @@ source "drivers/usb/usbip/Kconfig"
 
 endif
 
+comment "USB dual-mode controller drivers"
+
 source "drivers/usb/cdns3/Kconfig"
 
+source "drivers/usb/fotg210/Kconfig"
+
 source "drivers/usb/mtu3/Kconfig"
 
 source "drivers/usb/musb/Kconfig"
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index 643edf5fe18c6..a81e6ef293af2 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -17,6 +17,8 @@ obj-$(CONFIG_USB_CDNS_SUPPORT)	+= cdns3/
 obj-$(CONFIG_USB_CDNS3)		+= cdns3/
 obj-$(CONFIG_USB_CDNSP_PCI)	+= cdns3/
 
+obj-$(CONFIG_USB_FOTG210)	+= fotg210/
+
 obj-$(CONFIG_USB_MON)		+= mon/
 obj-$(CONFIG_USB_MTU3)		+= mtu3/
 
diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig
new file mode 100644
index 0000000000000..e7a106785f5d4
--- /dev/null
+++ b/drivers/usb/fotg210/Kconfig
@@ -0,0 +1,36 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config USB_FOTG210
+	tristate "Faraday FOTG210 USB2 Dual Role controller"
+	depends on USB || USB_GADGET
+	depends on HAS_DMA && HAS_IOMEM
+	default ARCH_GEMINI
+	help
+	  Faraday FOTG210 is a dual-mode USB controller that can act
+	  in both host controller and peripheral controller mode.
+
+if USB_FOTG210
+
+config USB_FOTG210_HCD
+	tristate "Faraday FOTG210 USB Host Controller support"
+	depends on USB
+	help
+	  Faraday FOTG210 is an OTG controller which can be configured as
+	  an USB2.0 host. It is designed to meet USB2.0 EHCI specification
+	  with minor modification.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called fotg210-hcd.
+
+config USB_FOTG210_UDC
+	depends on USB_GADGET
+	tristate "Faraday FOTG210 USB Peripheral Controller support"
+	help
+	   Faraday USB2.0 OTG controller which can be configured as
+	   high speed or full speed USB device. This driver suppports
+	   Bulk Transfer so far.
+
+	   Say "y" to link the driver statically, or "m" to build a
+	   dynamically linked module called "fotg210-udc".
+
+endif
diff --git a/drivers/usb/fotg210/Makefile b/drivers/usb/fotg210/Makefile
new file mode 100644
index 0000000000000..f4a26ca0e5639
--- /dev/null
+++ b/drivers/usb/fotg210/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
+obj-$(CONFIG_USB_FOTG210_UDC)	+= fotg210-udc.o
diff --git a/drivers/usb/host/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c
similarity index 99%
rename from drivers/usb/host/fotg210-hcd.c
rename to drivers/usb/fotg210/fotg210-hcd.c
index 3d1dbcf4c0732..8fbf63e76d7d1 100644
--- a/drivers/usb/host/fotg210-hcd.c
+++ b/drivers/usb/fotg210/fotg210-hcd.c
@@ -77,7 +77,7 @@ MODULE_PARM_DESC(hird, "host initiated resume duration, +1 for each 75us");
 
 #define INTR_MASK (STS_IAA | STS_FATAL | STS_PCD | STS_ERR | STS_INT)
 
-#include "fotg210.h"
+#include "fotg210-hcd.h"
 
 #define fotg210_dbg(fotg210, fmt, args...) \
 	dev_dbg(fotg210_to_hcd(fotg210)->self.controller, fmt, ## args)
diff --git a/drivers/usb/host/fotg210.h b/drivers/usb/fotg210/fotg210-hcd.h
similarity index 100%
rename from drivers/usb/host/fotg210.h
rename to drivers/usb/fotg210/fotg210-hcd.h
diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
similarity index 99%
rename from drivers/usb/gadget/udc/fotg210-udc.c
rename to drivers/usb/fotg210/fotg210-udc.c
index fdca28e72a3b4..01a4509775b2b 100644
--- a/drivers/usb/gadget/udc/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -16,7 +16,7 @@
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 
-#include "fotg210.h"
+#include "fotg210-udc.h"
 
 #define	DRIVER_DESC	"FOTG210 USB Device Controller Driver"
 #define	DRIVER_VERSION	"30-April-2013"
diff --git a/drivers/usb/gadget/udc/fotg210.h b/drivers/usb/fotg210/fotg210-udc.h
similarity index 100%
rename from drivers/usb/gadget/udc/fotg210.h
rename to drivers/usb/fotg210/fotg210-udc.h
diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 5756acb07b8da..16243964b1cd1 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -108,17 +108,6 @@ config USB_FUSB300
 	help
 	   Faraday usb device controller FUSB300 driver
 
-config USB_FOTG210_UDC
-	depends on HAS_DMA
-	tristate "Faraday FOTG210 USB Peripheral Controller"
-	help
-	   Faraday USB2.0 OTG controller which can be configured as
-	   high speed or full speed USB device. This driver supppors
-	   Bulk Transfer so far.
-
-	   Say "y" to link the driver statically, or "m" to build a
-	   dynamically linked module called "fotg210_udc".
-
 config USB_GR_UDC
 	tristate "Aeroflex Gaisler GRUSBDC USB Peripheral Controller Driver"
 	depends on HAS_DMA
diff --git a/drivers/usb/gadget/udc/Makefile b/drivers/usb/gadget/udc/Makefile
index 12f9e4c9eb0c5..39daf36a2baa2 100644
--- a/drivers/usb/gadget/udc/Makefile
+++ b/drivers/usb/gadget/udc/Makefile
@@ -34,7 +34,6 @@ obj-$(CONFIG_USB_EG20T)		+= pch_udc.o
 obj-$(CONFIG_USB_MV_UDC)	+= mv_udc.o
 mv_udc-y			:= mv_udc_core.o
 obj-$(CONFIG_USB_FUSB300)	+= fusb300_udc.o
-obj-$(CONFIG_USB_FOTG210_UDC)	+= fotg210-udc.o
 obj-$(CONFIG_USB_MV_U3D)	+= mv_u3d_core.o
 obj-$(CONFIG_USB_GR_UDC)	+= gr_udc.o
 obj-$(CONFIG_USB_GADGET_XILINX)	+= udc-xilinx.o
diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
index 8e8db71021a54..8d799d23c476e 100644
--- a/drivers/usb/host/Kconfig
+++ b/drivers/usb/host/Kconfig
@@ -389,17 +389,6 @@ config USB_ISP1362_HCD
 	  To compile this driver as a module, choose M here: the
 	  module will be called isp1362-hcd.
 
-config USB_FOTG210_HCD
-	tristate "FOTG210 HCD support"
-	depends on USB && HAS_DMA && HAS_IOMEM
-	help
-	  Faraday FOTG210 is an OTG controller which can be configured as
-	  an USB2.0 host. It is designed to meet USB2.0 EHCI specification
-	  with minor modification.
-
-	  To compile this driver as a module, choose M here: the
-	  module will be called fotg210-hcd.
-
 config USB_MAX3421_HCD
 	tristate "MAX3421 HCD (USB-over-SPI) support"
 	depends on USB && SPI
diff --git a/drivers/usb/host/Makefile b/drivers/usb/host/Makefile
index 2c8a61be7e466..6d8ee264c9b2b 100644
--- a/drivers/usb/host/Makefile
+++ b/drivers/usb/host/Makefile
@@ -84,6 +84,5 @@ obj-$(CONFIG_USB_EHCI_FSL)	+= ehci-fsl.o
 obj-$(CONFIG_USB_EHCI_MV)	+= ehci-mv.o
 obj-$(CONFIG_USB_HCD_BCMA)	+= bcma-hcd.o
 obj-$(CONFIG_USB_HCD_SSB)	+= ssb-hcd.o
-obj-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
 obj-$(CONFIG_USB_MAX3421_HCD)	+= max3421-hcd.o
 obj-$(CONFIG_USB_XEN_HCD)	+= xen-hcd.o
-- 
GitLab


From aeffd2c3b09f4f50438ec8960095129798bcb33a Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 23 Oct 2022 16:47:07 +0200
Subject: [PATCH 1129/4122] usb: fotg210: Compile into one module

It is since ages perfectly possible to compile both of these
modules into the same kernel, which makes no sense since it
is one piece of hardware.

Compile one module named "fotg210.ko" for both HCD and UDC
drivers by collecting the init calls into a fotg210-core.c
file and start to centralize things handling one and the same
piece of hardware.

Stub out the initcalls if one or the other part of the driver
was not selected.

Tested by compiling one or the other or both of the drivers
into the kernel and as modules.

Cc: Fabian Vogt <fabian@ritter-vogt.de>
Cc: Yuan-Hsin Chen <yhchen@faraday-tech.com>
Cc: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221023144708.3596563-2-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/Kconfig        |  4 +-
 drivers/usb/fotg210/Makefile       | 11 ++++-
 drivers/usb/fotg210/fotg210-core.c | 79 ++++++++++++++++++++++++++++++
 drivers/usb/fotg210/fotg210-hcd.c  | 49 +++---------------
 drivers/usb/fotg210/fotg210-udc.c  | 19 ++-----
 drivers/usb/fotg210/fotg210.h      | 42 ++++++++++++++++
 6 files changed, 142 insertions(+), 62 deletions(-)
 create mode 100644 drivers/usb/fotg210/fotg210-core.c
 create mode 100644 drivers/usb/fotg210/fotg210.h

diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig
index e7a106785f5d4..933c513b5728e 100644
--- a/drivers/usb/fotg210/Kconfig
+++ b/drivers/usb/fotg210/Kconfig
@@ -12,7 +12,7 @@ config USB_FOTG210
 if USB_FOTG210
 
 config USB_FOTG210_HCD
-	tristate "Faraday FOTG210 USB Host Controller support"
+	bool "Faraday FOTG210 USB Host Controller support"
 	depends on USB
 	help
 	  Faraday FOTG210 is an OTG controller which can be configured as
@@ -24,7 +24,7 @@ config USB_FOTG210_HCD
 
 config USB_FOTG210_UDC
 	depends on USB_GADGET
-	tristate "Faraday FOTG210 USB Peripheral Controller support"
+	bool "Faraday FOTG210 USB Peripheral Controller support"
 	help
 	   Faraday USB2.0 OTG controller which can be configured as
 	   high speed or full speed USB device. This driver suppports
diff --git a/drivers/usb/fotg210/Makefile b/drivers/usb/fotg210/Makefile
index f4a26ca0e5639..5aecff21f24b8 100644
--- a/drivers/usb/fotg210/Makefile
+++ b/drivers/usb/fotg210/Makefile
@@ -1,3 +1,10 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
-obj-$(CONFIG_USB_FOTG210_UDC)	+= fotg210-udc.o
+
+# This setup links the different object files into one single
+# module so we don't have to EXPORT() a lot of internal symbols
+# or create unnecessary submodules.
+fotg210-objs-y				+= fotg210-core.o
+fotg210-objs-$(CONFIG_USB_FOTG210_HCD)	+= fotg210-hcd.o
+fotg210-objs-$(CONFIG_USB_FOTG210_UDC)	+= fotg210-udc.o
+fotg210-objs				:= $(fotg210-objs-y)
+obj-$(CONFIG_USB_FOTG210)		+= fotg210.o
diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c
new file mode 100644
index 0000000000000..ab7b8974bc18b
--- /dev/null
+++ b/drivers/usb/fotg210/fotg210-core.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Central probing code for the FOTG210 dual role driver
+ * We register one driver for the hardware and then we decide
+ * whether to proceed with probing the host or the peripheral
+ * driver.
+ */
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/usb.h>
+
+#include "fotg210.h"
+
+static int fotg210_probe(struct platform_device *pdev)
+{
+	int ret;
+
+	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) {
+		ret = fotg210_hcd_probe(pdev);
+		if (ret)
+			return ret;
+	}
+	if (IS_ENABLED(CONFIG_USB_FOTG210_UDC))
+		ret = fotg210_udc_probe(pdev);
+
+	return ret;
+}
+
+static int fotg210_remove(struct platform_device *pdev)
+{
+	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD))
+		fotg210_hcd_remove(pdev);
+	if (IS_ENABLED(CONFIG_USB_FOTG210_UDC))
+		fotg210_udc_remove(pdev);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id fotg210_of_match[] = {
+	{ .compatible = "faraday,fotg210" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, fotg210_of_match);
+#endif
+
+static struct platform_driver fotg210_driver = {
+	.driver = {
+		.name   = "fotg210",
+		.of_match_table = of_match_ptr(fotg210_of_match),
+	},
+	.probe  = fotg210_probe,
+	.remove = fotg210_remove,
+};
+
+static int __init fotg210_init(void)
+{
+	if (usb_disabled())
+		return -ENODEV;
+
+	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD))
+		fotg210_hcd_init();
+	return platform_driver_register(&fotg210_driver);
+}
+module_init(fotg210_init);
+
+static void __exit fotg210_cleanup(void)
+{
+	platform_driver_unregister(&fotg210_driver);
+	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD))
+		fotg210_hcd_cleanup();
+}
+module_exit(fotg210_cleanup);
+
+MODULE_AUTHOR("Yuan-Hsin Chen, Feng-Hsin Chiang");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("FOTG210 Dual Role Controller Driver");
diff --git a/drivers/usb/fotg210/fotg210-hcd.c b/drivers/usb/fotg210/fotg210-hcd.c
index 8fbf63e76d7d1..51ac93a2eb98e 100644
--- a/drivers/usb/fotg210/fotg210-hcd.c
+++ b/drivers/usb/fotg210/fotg210-hcd.c
@@ -39,8 +39,8 @@
 #include <asm/irq.h>
 #include <asm/unaligned.h>
 
-#define DRIVER_AUTHOR "Yuan-Hsin Chen"
-#define DRIVER_DESC "FOTG210 Host Controller (EHCI) Driver"
+#include "fotg210.h"
+
 static const char hcd_name[] = "fotg210_hcd";
 
 #undef FOTG210_URB_TRACE
@@ -5490,9 +5490,6 @@ static int fotg210_get_frame(struct usb_hcd *hcd)
  * functions  and in order to facilitate role switching we cannot
  * give the fotg210 driver exclusive access to those.
  */
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_LICENSE("GPL");
 
 static const struct hc_driver fotg210_fotg210_hc_driver = {
 	.description		= hcd_name,
@@ -5560,7 +5557,7 @@ static void fotg210_init(struct fotg210_hcd *fotg210)
  * then invokes the start() method for the HCD associated with it
  * through the hotplug entry's driver_data.
  */
-static int fotg210_hcd_probe(struct platform_device *pdev)
+int fotg210_hcd_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct usb_hcd *hcd;
@@ -5652,7 +5649,7 @@ fail_create_hcd:
  * @dev: USB Host Controller being removed
  *
  */
-static int fotg210_hcd_remove(struct platform_device *pdev)
+int fotg210_hcd_remove(struct platform_device *pdev)
 {
 	struct usb_hcd *hcd = platform_get_drvdata(pdev);
 	struct fotg210_hcd *fotg210 = hcd_to_fotg210(hcd);
@@ -5668,27 +5665,8 @@ static int fotg210_hcd_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
-static const struct of_device_id fotg210_of_match[] = {
-	{ .compatible = "faraday,fotg210" },
-	{},
-};
-MODULE_DEVICE_TABLE(of, fotg210_of_match);
-#endif
-
-static struct platform_driver fotg210_hcd_driver = {
-	.driver = {
-		.name   = "fotg210-hcd",
-		.of_match_table = of_match_ptr(fotg210_of_match),
-	},
-	.probe  = fotg210_hcd_probe,
-	.remove = fotg210_hcd_remove,
-};
-
-static int __init fotg210_hcd_init(void)
+int __init fotg210_hcd_init(void)
 {
-	int retval = 0;
-
 	if (usb_disabled())
 		return -ENODEV;
 
@@ -5704,24 +5682,11 @@ static int __init fotg210_hcd_init(void)
 
 	fotg210_debug_root = debugfs_create_dir("fotg210", usb_debug_root);
 
-	retval = platform_driver_register(&fotg210_hcd_driver);
-	if (retval < 0)
-		goto clean;
-	return retval;
-
-clean:
-	debugfs_remove(fotg210_debug_root);
-	fotg210_debug_root = NULL;
-
-	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
-	return retval;
+	return 0;
 }
-module_init(fotg210_hcd_init);
 
-static void __exit fotg210_hcd_cleanup(void)
+void __exit fotg210_hcd_cleanup(void)
 {
-	platform_driver_unregister(&fotg210_hcd_driver);
 	debugfs_remove(fotg210_debug_root);
 	clear_bit(USB_EHCI_LOADED, &usb_hcds_loaded);
 }
-module_exit(fotg210_hcd_cleanup);
diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 01a4509775b2b..7757aaa11d6f6 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -16,6 +16,7 @@
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
 
+#include "fotg210.h"
 #include "fotg210-udc.h"
 
 #define	DRIVER_DESC	"FOTG210 USB Device Controller Driver"
@@ -1068,7 +1069,7 @@ static const struct usb_gadget_ops fotg210_gadget_ops = {
 	.udc_stop		= fotg210_udc_stop,
 };
 
-static int fotg210_udc_remove(struct platform_device *pdev)
+int fotg210_udc_remove(struct platform_device *pdev)
 {
 	struct fotg210_udc *fotg210 = platform_get_drvdata(pdev);
 	int i;
@@ -1085,7 +1086,7 @@ static int fotg210_udc_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int fotg210_udc_probe(struct platform_device *pdev)
+int fotg210_udc_probe(struct platform_device *pdev)
 {
 	struct resource *res, *ires;
 	struct fotg210_udc *fotg210 = NULL;
@@ -1208,17 +1209,3 @@ err_alloc:
 err:
 	return ret;
 }
-
-static struct platform_driver fotg210_driver = {
-	.driver		= {
-		.name =	udc_name,
-	},
-	.probe		= fotg210_udc_probe,
-	.remove		= fotg210_udc_remove,
-};
-
-module_platform_driver(fotg210_driver);
-
-MODULE_AUTHOR("Yuan-Hsin Chen, Feng-Hsin Chiang <john453@faraday-tech.com>");
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/usb/fotg210/fotg210.h b/drivers/usb/fotg210/fotg210.h
new file mode 100644
index 0000000000000..ef79d8323d897
--- /dev/null
+++ b/drivers/usb/fotg210/fotg210.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __FOTG210_H
+#define __FOTG210_H
+
+#ifdef CONFIG_USB_FOTG210_HCD
+int fotg210_hcd_probe(struct platform_device *pdev);
+int fotg210_hcd_remove(struct platform_device *pdev);
+int fotg210_hcd_init(void);
+void fotg210_hcd_cleanup(void);
+#else
+static inline int fotg210_hcd_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+static inline int fotg210_hcd_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+static inline int fotg210_hcd_init(void)
+{
+	return 0;
+}
+static inline void fotg210_hcd_cleanup(void)
+{
+}
+#endif
+
+#ifdef CONFIG_USB_FOTG210_UDC
+int fotg210_udc_probe(struct platform_device *pdev);
+int fotg210_udc_remove(struct platform_device *pdev);
+#else
+static inline int fotg210_udc_probe(struct platform_device *pdev)
+{
+	return 0;
+}
+static inline int fotg210_udc_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+#endif /* __FOTG210_H */
-- 
GitLab


From 1fac1c4da8a225ffbfae294ae36e18a3a65cb87e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 23 Oct 2022 16:47:08 +0200
Subject: [PATCH 1130/4122] usb: fotg210: Select subdriver by mode

Check which mode the hardware is in, and selecte the peripheral
driver if the hardware is in explicit peripheral mode, otherwise
select host mode.

This should solve the immediate problem that both subdrivers
can get probed.

Cc: Fabian Vogt <fabian@ritter-vogt.de>
Cc: Yuan-Hsin Chen <yhchen@faraday-tech.com>
Cc: Felipe Balbi <balbi@kernel.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221023144708.3596563-3-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-core.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c
index ab7b8974bc18b..3d07ee46f6d1d 100644
--- a/drivers/usb/fotg210/fotg210-core.c
+++ b/drivers/usb/fotg210/fotg210-core.c
@@ -10,30 +10,37 @@
 #include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/usb.h>
+#include <linux/usb/otg.h>
 
 #include "fotg210.h"
 
 static int fotg210_probe(struct platform_device *pdev)
 {
+	struct device *dev = &pdev->dev;
+	enum usb_dr_mode mode;
 	int ret;
 
-	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD)) {
-		ret = fotg210_hcd_probe(pdev);
-		if (ret)
-			return ret;
-	}
-	if (IS_ENABLED(CONFIG_USB_FOTG210_UDC))
+	mode = usb_get_dr_mode(dev);
+
+	if (mode == USB_DR_MODE_PERIPHERAL)
 		ret = fotg210_udc_probe(pdev);
+	else
+		ret = fotg210_hcd_probe(pdev);
 
 	return ret;
 }
 
 static int fotg210_remove(struct platform_device *pdev)
 {
-	if (IS_ENABLED(CONFIG_USB_FOTG210_HCD))
-		fotg210_hcd_remove(pdev);
-	if (IS_ENABLED(CONFIG_USB_FOTG210_UDC))
+	struct device *dev = &pdev->dev;
+	enum usb_dr_mode mode;
+
+	mode = usb_get_dr_mode(dev);
+
+	if (mode == USB_DR_MODE_PERIPHERAL)
 		fotg210_udc_remove(pdev);
+	else
+		fotg210_hcd_remove(pdev);
 
 	return 0;
 }
-- 
GitLab


From 21acc656a06e912341d9db66c67b58cc7ed071e7 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:51 +0100
Subject: [PATCH 1131/4122] usb: musb: Add and use inline functions
 musb_{get,set}_state

Instead of manipulating musb->xceiv->otg->state directly, use the newly
introduced musb_get_state() and musb_set_state() inline functions.

Later, these inline functions will be modified to get rid of the
musb->xceiv dependency, which prevents the musb code from using the
generic PHY subsystem.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-2-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c    | 62 ++++++++++++++++-----------------
 drivers/usb/musb/musb_core.h    | 11 ++++++
 drivers/usb/musb/musb_debugfs.c |  6 ++--
 drivers/usb/musb/musb_gadget.c  | 28 +++++++--------
 drivers/usb/musb/musb_host.c    |  6 ++--
 drivers/usb/musb/musb_virthub.c | 18 +++++-----
 6 files changed, 71 insertions(+), 60 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 03027c6fa3aba..a0fe2516870bf 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -502,7 +502,7 @@ int musb_set_host(struct musb *musb)
 
 init_data:
 	musb->is_active = 1;
-	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+	musb_set_state(musb, OTG_STATE_A_IDLE);
 	MUSB_HST_MODE(musb);
 
 	return error;
@@ -549,7 +549,7 @@ int musb_set_peripheral(struct musb *musb)
 
 init_data:
 	musb->is_active = 0;
-	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+	musb_set_state(musb, OTG_STATE_B_IDLE);
 	MUSB_DEV_MODE(musb);
 
 	return error;
@@ -599,12 +599,12 @@ static void musb_otg_timer_func(struct timer_list *t)
 	unsigned long	flags;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_B_WAIT_ACON:
 		musb_dbg(musb,
 			"HNP: b_wait_acon timeout; back to b_peripheral");
 		musb_g_disconnect(musb);
-		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		musb->is_active = 0;
 		break;
 	case OTG_STATE_A_SUSPEND:
@@ -612,7 +612,7 @@ static void musb_otg_timer_func(struct timer_list *t)
 		musb_dbg(musb, "HNP: %s timeout",
 			usb_otg_state_string(musb->xceiv->otg->state));
 		musb_platform_set_vbus(musb, 0);
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
+		musb_set_state(musb, OTG_STATE_A_WAIT_VFALL);
 		break;
 	default:
 		musb_dbg(musb, "HNP: Unhandled mode %s",
@@ -633,7 +633,7 @@ void musb_hnp_stop(struct musb *musb)
 	musb_dbg(musb, "HNP: stop from %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
 		musb_dbg(musb, "HNP: back to %s",
@@ -643,7 +643,7 @@ void musb_hnp_stop(struct musb *musb)
 		musb_dbg(musb, "HNP: Disabling HR");
 		if (hcd)
 			hcd->self.is_b_host = 0;
-		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		MUSB_DEV_MODE(musb);
 		reg = musb_readb(mbase, MUSB_POWER);
 		reg |= MUSB_POWER_SUSPENDM;
@@ -671,7 +671,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 			usb_otg_state_string(musb->xceiv->otg->state));
 
 	if (devctl & MUSB_DEVCTL_HM) {
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_SUSPEND:
 			/* remote wakeup? */
 			musb->port1_status |=
@@ -679,14 +679,14 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 					| MUSB_PORT_STAT_RESUME;
 			musb->rh_timer = jiffies
 				+ msecs_to_jiffies(USB_RESUME_TIMEOUT);
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			musb_set_state(musb, OTG_STATE_A_HOST);
 			musb->is_active = 1;
 			musb_host_resume_root_hub(musb);
 			schedule_delayed_work(&musb->finish_resume_work,
 				msecs_to_jiffies(USB_RESUME_TIMEOUT));
 			break;
 		case OTG_STATE_B_WAIT_ACON:
-			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 			musb->is_active = 1;
 			MUSB_DEV_MODE(musb);
 			break;
@@ -696,10 +696,10 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 				usb_otg_state_string(musb->xceiv->otg->state));
 		}
 	} else {
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_SUSPEND:
 			/* possibly DISCONNECT is upcoming */
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			musb_set_state(musb, OTG_STATE_A_HOST);
 			musb_host_resume_root_hub(musb);
 			break;
 		case OTG_STATE_B_WAIT_ACON:
@@ -750,7 +750,7 @@ static irqreturn_t musb_handle_intr_sessreq(struct musb *musb, u8 devctl)
 	 */
 	musb_writeb(mbase, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
 	musb->ep0_stage = MUSB_EP0_START;
-	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+	musb_set_state(musb, OTG_STATE_A_IDLE);
 	MUSB_HST_MODE(musb);
 	musb_platform_set_vbus(musb, 1);
 
@@ -777,7 +777,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl)
 	 * REVISIT:  do delays from lots of DEBUG_KERNEL checks
 	 * make trouble here, keeping VBUS < 4.4V ?
 	 */
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_HOST:
 		/* recovery is dicey once we've gotten past the
 		 * initial stages of enumeration, but if VBUS
@@ -833,7 +833,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl)
 	musb_dbg(musb, "SUSPEND (%s) devctl %02x",
 		usb_otg_state_string(musb->xceiv->otg->state), devctl);
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_PERIPHERAL:
 		/* We also come here if the cable is removed, since
 		 * this silicon doesn't report ID-no-longer-grounded.
@@ -858,7 +858,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl)
 		musb_g_suspend(musb);
 		musb->is_active = musb->g.b_hnp_enable;
 		if (musb->is_active) {
-			musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
+			musb_set_state(musb, OTG_STATE_B_WAIT_ACON);
 			musb_dbg(musb, "HNP: Setting timer for b_ase0_brst");
 			mod_timer(&musb->otg_timer, jiffies
 				+ msecs_to_jiffies(
@@ -871,7 +871,7 @@ static void musb_handle_intr_suspend(struct musb *musb, u8 devctl)
 				+ msecs_to_jiffies(musb->a_wait_bcon));
 		break;
 	case OTG_STATE_A_HOST:
-		musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
+		musb_set_state(musb, OTG_STATE_A_SUSPEND);
 		musb->is_active = musb->hcd->self.b_hnp_enable;
 		break;
 	case OTG_STATE_B_HOST:
@@ -909,7 +909,7 @@ static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb)
 		musb->port1_status |= USB_PORT_STAT_LOW_SPEED;
 
 	/* indicate new connection to OTG machine */
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_B_PERIPHERAL:
 		if (int_usb & MUSB_INTR_SUSPEND) {
 			musb_dbg(musb, "HNP: SUSPEND+CONNECT, now b_host");
@@ -921,7 +921,7 @@ static void musb_handle_intr_connect(struct musb *musb, u8 devctl, u8 int_usb)
 	case OTG_STATE_B_WAIT_ACON:
 		musb_dbg(musb, "HNP: CONNECT, now b_host");
 b_host:
-		musb->xceiv->otg->state = OTG_STATE_B_HOST;
+		musb_set_state(musb, OTG_STATE_B_HOST);
 		if (musb->hcd)
 			musb->hcd->self.is_b_host = 1;
 		del_timer(&musb->otg_timer);
@@ -929,7 +929,7 @@ b_host:
 	default:
 		if ((devctl & MUSB_DEVCTL_VBUS)
 				== (3 << MUSB_DEVCTL_VBUS_SHIFT)) {
-			musb->xceiv->otg->state = OTG_STATE_A_HOST;
+			musb_set_state(musb, OTG_STATE_A_HOST);
 			if (hcd)
 				hcd->self.is_b_host = 0;
 		}
@@ -948,7 +948,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl)
 			usb_otg_state_string(musb->xceiv->otg->state),
 			MUSB_MODE(musb), devctl);
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_HOST:
 	case OTG_STATE_A_SUSPEND:
 		musb_host_resume_root_hub(musb);
@@ -966,7 +966,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl)
 		musb_root_disconnect(musb);
 		if (musb->hcd)
 			musb->hcd->self.is_b_host = 0;
-		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		MUSB_DEV_MODE(musb);
 		musb_g_disconnect(musb);
 		break;
@@ -1006,7 +1006,7 @@ static void musb_handle_intr_reset(struct musb *musb)
 	} else {
 		musb_dbg(musb, "BUS RESET as %s",
 			usb_otg_state_string(musb->xceiv->otg->state));
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_SUSPEND:
 			musb_g_reset(musb);
 			fallthrough;
@@ -1025,11 +1025,11 @@ static void musb_handle_intr_reset(struct musb *musb)
 		case OTG_STATE_B_WAIT_ACON:
 			musb_dbg(musb, "HNP: RESET (%s), to b_peripheral",
 				usb_otg_state_string(musb->xceiv->otg->state));
-			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 			musb_g_reset(musb);
 			break;
 		case OTG_STATE_B_IDLE:
-			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 			fallthrough;
 		case OTG_STATE_B_PERIPHERAL:
 			musb_g_reset(musb);
@@ -1216,8 +1216,8 @@ void musb_start(struct musb *musb)
 	 * (c) peripheral initiates, using SRP
 	 */
 	if (musb->port_mode != MUSB_HOST &&
-			musb->xceiv->otg->state != OTG_STATE_A_WAIT_BCON &&
-			(devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) {
+	    musb_get_state(musb) != OTG_STATE_A_WAIT_BCON &&
+	    (devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS) {
 		musb->is_active = 1;
 	} else {
 		devctl |= MUSB_DEVCTL_SESSION;
@@ -1908,7 +1908,7 @@ vbus_store(struct device *dev, struct device_attribute *attr,
 	spin_lock_irqsave(&musb->lock, flags);
 	/* force T(a_wait_bcon) to be zero/unlimited *OR* valid */
 	musb->a_wait_bcon = val ? max_t(int, val, OTG_TIME_A_WAIT_BCON) : 0 ;
-	if (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)
+	if (musb_get_state(musb) == OTG_STATE_A_WAIT_BCON)
 		musb->is_active = 0;
 	musb_platform_try_idle(musb, jiffies + msecs_to_jiffies(val));
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -2089,8 +2089,8 @@ static void musb_irq_work(struct work_struct *data)
 
 	musb_pm_runtime_check_session(musb);
 
-	if (musb->xceiv->otg->state != musb->xceiv_old_state) {
-		musb->xceiv_old_state = musb->xceiv->otg->state;
+	if (musb_get_state(musb) != musb->xceiv_old_state) {
+		musb->xceiv_old_state = musb_get_state(musb);
 		sysfs_notify(&musb->controller->kobj, NULL, "mode");
 	}
 
@@ -2532,7 +2532,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	}
 
 	MUSB_DEV_MODE(musb);
-	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+	musb_set_state(musb, OTG_STATE_B_IDLE);
 
 	switch (musb->port_mode) {
 	case MUSB_HOST:
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index a8a65effe68b5..4a4d485d37bd3 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -592,6 +592,17 @@ static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum)
 		musb->ops->clear_ep_rxintr(musb, epnum);
 }
 
+static inline void musb_set_state(struct musb *musb,
+				  enum usb_otg_state otg_state)
+{
+	musb->xceiv->otg->state = otg_state;
+}
+
+static inline enum usb_otg_state musb_get_state(struct musb *musb)
+{
+	return musb->xceiv->otg->state;
+}
+
 /*
  * gets the "dr_mode" property from DT and converts it into musb_mode
  * if the property is not found or not recognized returns MUSB_OTG
diff --git a/drivers/usb/musb/musb_debugfs.c b/drivers/usb/musb/musb_debugfs.c
index 30a89aa8a3e7a..78c726a71b177 100644
--- a/drivers/usb/musb/musb_debugfs.c
+++ b/drivers/usb/musb/musb_debugfs.c
@@ -235,7 +235,7 @@ static int musb_softconnect_show(struct seq_file *s, void *unused)
 	u8		reg;
 	int		connect;
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_HOST:
 	case OTG_STATE_A_WAIT_BCON:
 		pm_runtime_get_sync(musb->controller);
@@ -275,7 +275,7 @@ static ssize_t musb_softconnect_write(struct file *file,
 
 	pm_runtime_get_sync(musb->controller);
 	if (!strncmp(buf, "0", 1)) {
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_HOST:
 			musb_root_disconnect(musb);
 			reg = musb_readb(musb->mregs, MUSB_DEVCTL);
@@ -286,7 +286,7 @@ static ssize_t musb_softconnect_write(struct file *file,
 			break;
 		}
 	} else if (!strncmp(buf, "1", 1)) {
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_WAIT_BCON:
 			/*
 			 * musb_save_context() called in musb_runtime_suspend()
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6704a62a16659..b5c7deb288d25 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1523,7 +1523,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 
 	spin_lock_irqsave(&musb->lock, flags);
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_B_PERIPHERAL:
 		/* NOTE:  OTG state machine doesn't include B_SUSPENDED;
 		 * that's part of the standard usb 1.1 state machine, and
@@ -1787,7 +1787,7 @@ int musb_gadget_setup(struct musb *musb)
 	musb->g.speed = USB_SPEED_UNKNOWN;
 
 	MUSB_DEV_MODE(musb);
-	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+	musb_set_state(musb, OTG_STATE_B_IDLE);
 
 	/* this "gadget" abstracts/virtualizes the controller */
 	musb->g.name = musb_driver_name;
@@ -1852,7 +1852,7 @@ static int musb_gadget_start(struct usb_gadget *g,
 	musb->is_active = 1;
 
 	otg_set_peripheral(otg, &musb->g);
-	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+	musb_set_state(musb, OTG_STATE_B_IDLE);
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	musb_start(musb);
@@ -1897,7 +1897,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 
 	(void) musb_gadget_vbus_draw(&musb->g, 0);
 
-	musb->xceiv->otg->state = OTG_STATE_UNDEFINED;
+	musb_set_state(musb, OTG_STATE_UNDEFINED);
 	musb_stop(musb);
 	otg_set_peripheral(musb->xceiv->otg, NULL);
 
@@ -1926,7 +1926,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 void musb_g_resume(struct musb *musb)
 {
 	musb->is_suspended = 0;
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_B_IDLE:
 		break;
 	case OTG_STATE_B_WAIT_ACON:
@@ -1952,10 +1952,10 @@ void musb_g_suspend(struct musb *musb)
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 	musb_dbg(musb, "musb_g_suspend: devctl %02x", devctl);
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_B_IDLE:
 		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS)
-			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+			musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		break;
 	case OTG_STATE_B_PERIPHERAL:
 		musb->is_suspended = 1;
@@ -2001,22 +2001,22 @@ void musb_g_disconnect(struct musb *musb)
 		spin_lock(&musb->lock);
 	}
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	default:
 		musb_dbg(musb, "Unhandled disconnect %s, setting a_idle",
 			usb_otg_state_string(musb->xceiv->otg->state));
-		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+		musb_set_state(musb, OTG_STATE_A_IDLE);
 		MUSB_HST_MODE(musb);
 		break;
 	case OTG_STATE_A_PERIPHERAL:
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
+		musb_set_state(musb, OTG_STATE_A_WAIT_BCON);
 		MUSB_HST_MODE(musb);
 		break;
 	case OTG_STATE_B_WAIT_ACON:
 	case OTG_STATE_B_HOST:
 	case OTG_STATE_B_PERIPHERAL:
 	case OTG_STATE_B_IDLE:
-		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+		musb_set_state(musb, OTG_STATE_B_IDLE);
 		break;
 	case OTG_STATE_B_SRP_INIT:
 		break;
@@ -2080,13 +2080,13 @@ __acquires(musb->lock)
 		 * In that case, do not rely on devctl for setting
 		 * peripheral mode.
 		 */
-		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		musb->g.is_a_peripheral = 0;
 	} else if (devctl & MUSB_DEVCTL_BDEVICE) {
-		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 		musb->g.is_a_peripheral = 0;
 	} else {
-		musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL;
+		musb_set_state(musb, OTG_STATE_A_PERIPHERAL);
 		musb->g.is_a_peripheral = 1;
 	}
 
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 9ff7d891b4b76..ed631447a253c 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2501,7 +2501,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 	if (!is_host_active(musb))
 		return 0;
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_SUSPEND:
 		return 0;
 	case OTG_STATE_A_WAIT_VRISE:
@@ -2511,7 +2511,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 		 */
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS)
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
+			musb_set_state(musb, OTG_STATE_A_WAIT_BCON);
 		break;
 	default:
 		break;
@@ -2720,7 +2720,7 @@ int musb_host_setup(struct musb *musb, int power_budget)
 
 	if (musb->port_mode == MUSB_HOST) {
 		MUSB_HST_MODE(musb);
-		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
+		musb_set_state(musb, OTG_STATE_A_IDLE);
 	}
 	otg_set_host(musb->xceiv->otg, &hcd->self);
 	/* don't support otg protocols */
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index cafc69536e1d7..d1cfd45d69e36 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -43,7 +43,7 @@ void musb_host_finish_resume(struct work_struct *work)
 	musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
 	usb_hcd_poll_rh_status(musb->hcd);
 	/* NOTE: it might really be A_WAIT_BCON ... */
-	musb->xceiv->otg->state = OTG_STATE_A_HOST;
+	musb_set_state(musb, OTG_STATE_A_HOST);
 
 	spin_unlock_irqrestore(&musb->lock, flags);
 }
@@ -85,9 +85,9 @@ int musb_port_suspend(struct musb *musb, bool do_suspend)
 		musb_dbg(musb, "Root port suspended, power %02x", power);
 
 		musb->port1_status |= USB_PORT_STAT_SUSPEND;
-		switch (musb->xceiv->otg->state) {
+		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_HOST:
-			musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
+			musb_set_state(musb, OTG_STATE_A_SUSPEND);
 			musb->is_active = otg->host->b_hnp_enable;
 			if (musb->is_active)
 				mod_timer(&musb->otg_timer, jiffies
@@ -96,7 +96,7 @@ int musb_port_suspend(struct musb *musb, bool do_suspend)
 			musb_platform_try_idle(musb, 0);
 			break;
 		case OTG_STATE_B_HOST:
-			musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
+			musb_set_state(musb, OTG_STATE_B_WAIT_ACON);
 			musb->is_active = otg->host->b_hnp_enable;
 			musb_platform_try_idle(musb, 0);
 			break;
@@ -123,7 +123,7 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 	u8		power;
 	void __iomem	*mbase = musb->mregs;
 
-	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) {
+	if (musb_get_state(musb) == OTG_STATE_B_IDLE) {
 		musb_dbg(musb, "HNP: Returning from HNP; no hub reset from b_idle");
 		musb->port1_status &= ~USB_PORT_STAT_RESET;
 		return;
@@ -204,20 +204,20 @@ void musb_root_disconnect(struct musb *musb)
 	usb_hcd_poll_rh_status(musb->hcd);
 	musb->is_active = 0;
 
-	switch (musb->xceiv->otg->state) {
+	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_SUSPEND:
 		if (otg->host->b_hnp_enable) {
-			musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL;
+			musb_set_state(musb, OTG_STATE_A_PERIPHERAL);
 			musb->g.is_a_peripheral = 1;
 			break;
 		}
 		fallthrough;
 	case OTG_STATE_A_HOST:
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
+		musb_set_state(musb, OTG_STATE_A_WAIT_BCON);
 		musb->is_active = 0;
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
-		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
+		musb_set_state(musb, OTG_STATE_B_IDLE);
 		break;
 	default:
 		musb_dbg(musb, "host disconnect (%s)",
-- 
GitLab


From 285f28bfed89a56ed619054f21125b0bd2f0d4d6 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:52 +0100
Subject: [PATCH 1132/4122] usb: musb: Add and use inline function
 musb_otg_state_string

The musb_otg_state_string() simply calls usb_otg_state_string().
This will make it easier to get rid of the musb->xceiv dependency later.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-3-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c    | 43 +++++++++++++++------------------
 drivers/usb/musb/musb_core.h    |  5 ++++
 drivers/usb/musb/musb_gadget.c  |  8 +++---
 drivers/usb/musb/musb_host.c    |  2 +-
 drivers/usb/musb/musb_virthub.c |  4 +--
 5 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index a0fe2516870bf..9bf0ebaa3b7cf 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -610,13 +610,13 @@ static void musb_otg_timer_func(struct timer_list *t)
 	case OTG_STATE_A_SUSPEND:
 	case OTG_STATE_A_WAIT_BCON:
 		musb_dbg(musb, "HNP: %s timeout",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 		musb_platform_set_vbus(musb, 0);
 		musb_set_state(musb, OTG_STATE_A_WAIT_VFALL);
 		break;
 	default:
 		musb_dbg(musb, "HNP: Unhandled mode %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 }
@@ -630,14 +630,12 @@ void musb_hnp_stop(struct musb *musb)
 	void __iomem	*mbase = musb->mregs;
 	u8	reg;
 
-	musb_dbg(musb, "HNP: stop from %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+	musb_dbg(musb, "HNP: stop from %s", musb_otg_state_string(musb));
 
 	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
-		musb_dbg(musb, "HNP: back to %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+		musb_dbg(musb, "HNP: back to %s", musb_otg_state_string(musb));
 		break;
 	case OTG_STATE_B_HOST:
 		musb_dbg(musb, "HNP: Disabling HR");
@@ -652,7 +650,7 @@ void musb_hnp_stop(struct musb *musb)
 		break;
 	default:
 		musb_dbg(musb, "HNP: Stopping in unknown state %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 	}
 
 	/*
@@ -667,8 +665,7 @@ static void musb_recover_from_babble(struct musb *musb);
 
 static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 {
-	musb_dbg(musb, "RESUME (%s)",
-			usb_otg_state_string(musb->xceiv->otg->state));
+	musb_dbg(musb, "RESUME (%s)", musb_otg_state_string(musb));
 
 	if (devctl & MUSB_DEVCTL_HM) {
 		switch (musb_get_state(musb)) {
@@ -693,7 +690,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 		default:
 			WARNING("bogus %s RESUME (%s)\n",
 				"host",
-				usb_otg_state_string(musb->xceiv->otg->state));
+				musb_otg_state_string(musb));
 		}
 	} else {
 		switch (musb_get_state(musb)) {
@@ -722,7 +719,7 @@ static void musb_handle_intr_resume(struct musb *musb, u8 devctl)
 		default:
 			WARNING("bogus %s RESUME (%s)\n",
 				"peripheral",
-				usb_otg_state_string(musb->xceiv->otg->state));
+				musb_otg_state_string(musb));
 		}
 	}
 }
@@ -738,8 +735,7 @@ static irqreturn_t musb_handle_intr_sessreq(struct musb *musb, u8 devctl)
 		return IRQ_HANDLED;
 	}
 
-	musb_dbg(musb, "SESSION_REQUEST (%s)",
-		usb_otg_state_string(musb->xceiv->otg->state));
+	musb_dbg(musb, "SESSION_REQUEST (%s)", musb_otg_state_string(musb));
 
 	/* IRQ arrives from ID pin sense or (later, if VBUS power
 	 * is removed) SRP.  responses are time critical:
@@ -806,7 +802,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl)
 
 	dev_printk(ignore ? KERN_DEBUG : KERN_ERR, musb->controller,
 			"VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
-			usb_otg_state_string(musb->xceiv->otg->state),
+			musb_otg_state_string(musb),
 			devctl,
 			({ char *s;
 			switch (devctl & MUSB_DEVCTL_VBUS) {
@@ -831,7 +827,7 @@ static void musb_handle_intr_vbuserr(struct musb *musb, u8 devctl)
 static void musb_handle_intr_suspend(struct musb *musb, u8 devctl)
 {
 	musb_dbg(musb, "SUSPEND (%s) devctl %02x",
-		usb_otg_state_string(musb->xceiv->otg->state), devctl);
+		 musb_otg_state_string(musb), devctl);
 
 	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_PERIPHERAL:
@@ -939,13 +935,13 @@ b_host:
 	musb_host_poke_root_hub(musb);
 
 	musb_dbg(musb, "CONNECT (%s) devctl %02x",
-			usb_otg_state_string(musb->xceiv->otg->state), devctl);
+			musb_otg_state_string(musb), devctl);
 }
 
 static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl)
 {
 	musb_dbg(musb, "DISCONNECT (%s) as %s, devctl %02x",
-			usb_otg_state_string(musb->xceiv->otg->state),
+			musb_otg_state_string(musb),
 			MUSB_MODE(musb), devctl);
 
 	switch (musb_get_state(musb)) {
@@ -981,7 +977,7 @@ static void musb_handle_intr_disconnect(struct musb *musb, u8 devctl)
 		break;
 	default:
 		WARNING("unhandled DISCONNECT transition (%s)\n",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			musb_otg_state_string(musb));
 		break;
 	}
 }
@@ -1004,8 +1000,7 @@ static void musb_handle_intr_reset(struct musb *musb)
 		dev_err(musb->controller, "Babble\n");
 		musb_recover_from_babble(musb);
 	} else {
-		musb_dbg(musb, "BUS RESET as %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+		musb_dbg(musb, "BUS RESET as %s", musb_otg_state_string(musb));
 		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_SUSPEND:
 			musb_g_reset(musb);
@@ -1013,7 +1008,7 @@ static void musb_handle_intr_reset(struct musb *musb)
 		case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
 			/* never use invalid T(a_wait_bcon) */
 			musb_dbg(musb, "HNP: in %s, %d msec timeout",
-				usb_otg_state_string(musb->xceiv->otg->state),
+				 musb_otg_state_string(musb),
 				TA_WAIT_BCON(musb));
 			mod_timer(&musb->otg_timer, jiffies
 				+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
@@ -1024,7 +1019,7 @@ static void musb_handle_intr_reset(struct musb *musb)
 			break;
 		case OTG_STATE_B_WAIT_ACON:
 			musb_dbg(musb, "HNP: RESET (%s), to b_peripheral",
-				usb_otg_state_string(musb->xceiv->otg->state));
+				 musb_otg_state_string(musb));
 			musb_set_state(musb, OTG_STATE_B_PERIPHERAL);
 			musb_g_reset(musb);
 			break;
@@ -1036,7 +1031,7 @@ static void musb_handle_intr_reset(struct musb *musb)
 			break;
 		default:
 			musb_dbg(musb, "Unhandled BUS RESET as %s",
-				usb_otg_state_string(musb->xceiv->otg->state));
+				 musb_otg_state_string(musb));
 		}
 	}
 }
@@ -1863,7 +1858,7 @@ mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	ret = sprintf(buf, "%s\n", usb_otg_state_string(musb->xceiv->otg->state));
+	ret = sprintf(buf, "%s\n", musb_otg_state_string(musb));
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	return ret;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index 4a4d485d37bd3..a497c44ab0dae 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -603,6 +603,11 @@ static inline enum usb_otg_state musb_get_state(struct musb *musb)
 	return musb->xceiv->otg->state;
 }
 
+static inline const char *musb_otg_state_string(struct musb *musb)
+{
+	return usb_otg_state_string(musb_get_state(musb));
+}
+
 /*
  * gets the "dr_mode" property from DT and converts it into musb_mode
  * if the property is not found or not recognized returns MUSB_OTG
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index b5c7deb288d25..9f5c531de3872 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1564,7 +1564,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 		goto done;
 	default:
 		musb_dbg(musb, "Unhandled wake: %s",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 		goto done;
 	}
 
@@ -1940,7 +1940,7 @@ void musb_g_resume(struct musb *musb)
 		break;
 	default:
 		WARNING("unhandled RESUME transition (%s)\n",
-				usb_otg_state_string(musb->xceiv->otg->state));
+			musb_otg_state_string(musb));
 	}
 }
 
@@ -1970,7 +1970,7 @@ void musb_g_suspend(struct musb *musb)
 		 * A_PERIPHERAL may need care too
 		 */
 		WARNING("unhandled SUSPEND transition (%s)",
-				usb_otg_state_string(musb->xceiv->otg->state));
+			musb_otg_state_string(musb));
 	}
 }
 
@@ -2004,7 +2004,7 @@ void musb_g_disconnect(struct musb *musb)
 	switch (musb_get_state(musb)) {
 	default:
 		musb_dbg(musb, "Unhandled disconnect %s, setting a_idle",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 		musb_set_state(musb, OTG_STATE_A_IDLE);
 		MUSB_HST_MODE(musb);
 		break;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index ed631447a253c..b7553da7f4bc8 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2519,7 +2519,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 
 	if (musb->is_active) {
 		WARNING("trying to suspend as %s while active\n",
-				usb_otg_state_string(musb->xceiv->otg->state));
+			musb_otg_state_string(musb));
 		return -EBUSY;
 	} else
 		return 0;
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index d1cfd45d69e36..7eb929d75280c 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -102,7 +102,7 @@ int musb_port_suspend(struct musb *musb, bool do_suspend)
 			break;
 		default:
 			musb_dbg(musb, "bogus rh suspend? %s",
-				usb_otg_state_string(musb->xceiv->otg->state));
+				 musb_otg_state_string(musb));
 		}
 	} else if (power & MUSB_POWER_SUSPENDM) {
 		power &= ~MUSB_POWER_SUSPENDM;
@@ -221,7 +221,7 @@ void musb_root_disconnect(struct musb *musb)
 		break;
 	default:
 		musb_dbg(musb, "host disconnect (%s)",
-			usb_otg_state_string(musb->xceiv->otg->state));
+			 musb_otg_state_string(musb));
 	}
 }
 EXPORT_SYMBOL_GPL(musb_root_disconnect);
-- 
GitLab


From a6d45ea063f0a9272f62925c8150439af5640e74 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:53 +0100
Subject: [PATCH 1133/4122] usb: musb: Allow running without CONFIG_USB_PHY

Modify the core so that musb->xceiv is never deferenced without being
checked first.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-4-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_core.c    |  2 +-
 drivers/usb/musb/musb_core.h    | 12 ++++++++++--
 drivers/usb/musb/musb_gadget.c  | 21 +++++++++++++--------
 drivers/usb/musb/musb_host.c    |  8 ++++++--
 drivers/usb/musb/musb_virthub.c | 11 +++++------
 5 files changed, 35 insertions(+), 19 deletions(-)

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index 9bf0ebaa3b7cf..648bb6021c5ef 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -2448,7 +2448,7 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 	else
 		musb->io.set_toggle = musb_default_set_toggle;
 
-	if (!musb->xceiv->io_ops) {
+	if (IS_ENABLED(CONFIG_USB_PHY) && musb->xceiv && !musb->xceiv->io_ops) {
 		musb->xceiv->io_dev = musb->controller;
 		musb->xceiv->io_priv = musb->mregs;
 		musb->xceiv->io_ops = &musb_ulpi_access;
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index a497c44ab0dae..b7588d11cfc59 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -339,6 +339,8 @@ struct musb {
 	struct usb_phy		*xceiv;
 	struct phy		*phy;
 
+	enum usb_otg_state	otg_state;
+
 	int nIrq;
 	unsigned		irq_wake:1;
 
@@ -595,12 +597,18 @@ static inline void musb_platform_clear_ep_rxintr(struct musb *musb, int epnum)
 static inline void musb_set_state(struct musb *musb,
 				  enum usb_otg_state otg_state)
 {
-	musb->xceiv->otg->state = otg_state;
+	if (musb->xceiv)
+		musb->xceiv->otg->state = otg_state;
+	else
+		musb->otg_state = otg_state;
 }
 
 static inline enum usb_otg_state musb_get_state(struct musb *musb)
 {
-	return musb->xceiv->otg->state;
+	if (musb->xceiv)
+		return musb->xceiv->otg->state;
+
+	return musb->otg_state;
 }
 
 static inline const char *musb_otg_state_string(struct musb *musb)
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 9f5c531de3872..66c8b32b16bb4 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1552,9 +1552,11 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 				break;
 		}
 
-		spin_unlock_irqrestore(&musb->lock, flags);
-		otg_start_srp(musb->xceiv->otg);
-		spin_lock_irqsave(&musb->lock, flags);
+		if (musb->xceiv) {
+			spin_unlock_irqrestore(&musb->lock, flags);
+			otg_start_srp(musb->xceiv->otg);
+			spin_lock_irqsave(&musb->lock, flags);
+		}
 
 		/* Block idling for at least 1s */
 		musb_platform_try_idle(musb,
@@ -1628,7 +1630,7 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 {
 	struct musb	*musb = gadget_to_musb(gadget);
 
-	if (!musb->xceiv->set_power)
+	if (!musb->xceiv || !musb->xceiv->set_power)
 		return -EOPNOTSUPP;
 	return usb_phy_set_power(musb->xceiv, mA);
 }
@@ -1834,7 +1836,6 @@ static int musb_gadget_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver)
 {
 	struct musb		*musb = gadget_to_musb(g);
-	struct usb_otg		*otg = musb->xceiv->otg;
 	unsigned long		flags;
 	int			retval = 0;
 
@@ -1851,7 +1852,9 @@ static int musb_gadget_start(struct usb_gadget *g,
 	spin_lock_irqsave(&musb->lock, flags);
 	musb->is_active = 1;
 
-	otg_set_peripheral(otg, &musb->g);
+	if (musb->xceiv)
+		otg_set_peripheral(musb->xceiv->otg, &musb->g);
+
 	musb_set_state(musb, OTG_STATE_B_IDLE);
 	spin_unlock_irqrestore(&musb->lock, flags);
 
@@ -1861,7 +1864,7 @@ static int musb_gadget_start(struct usb_gadget *g,
 	 * handles power budgeting ... this way also
 	 * ensures HdrcStart is indirectly called.
 	 */
-	if (musb->xceiv->last_event == USB_EVENT_ID)
+	if (musb->xceiv && musb->xceiv->last_event == USB_EVENT_ID)
 		musb_platform_set_vbus(musb, 1);
 
 	pm_runtime_mark_last_busy(musb->controller);
@@ -1899,7 +1902,9 @@ static int musb_gadget_stop(struct usb_gadget *g)
 
 	musb_set_state(musb, OTG_STATE_UNDEFINED);
 	musb_stop(musb);
-	otg_set_peripheral(musb->xceiv->otg, NULL);
+
+	if (musb->xceiv)
+		otg_set_peripheral(musb->xceiv->otg, NULL);
 
 	musb->is_active = 0;
 	musb->gadget_driver = NULL;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index b7553da7f4bc8..8ad39ecd3b6fc 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2722,10 +2722,14 @@ int musb_host_setup(struct musb *musb, int power_budget)
 		MUSB_HST_MODE(musb);
 		musb_set_state(musb, OTG_STATE_A_IDLE);
 	}
-	otg_set_host(musb->xceiv->otg, &hcd->self);
+
+	if (musb->xceiv) {
+		otg_set_host(musb->xceiv->otg, &hcd->self);
+		musb->xceiv->otg->host = &hcd->self;
+	}
+
 	/* don't support otg protocols */
 	hcd->self.otg_port = 0;
-	musb->xceiv->otg->host = &hcd->self;
 	hcd->power_budget = 2 * (power_budget ? : 250);
 	hcd->skip_phy_initialization = 1;
 
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index 7eb929d75280c..2b2164e028b31 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -50,7 +50,6 @@ void musb_host_finish_resume(struct work_struct *work)
 
 int musb_port_suspend(struct musb *musb, bool do_suspend)
 {
-	struct usb_otg	*otg = musb->xceiv->otg;
 	u8		power;
 	void __iomem	*mbase = musb->mregs;
 
@@ -88,7 +87,8 @@ int musb_port_suspend(struct musb *musb, bool do_suspend)
 		switch (musb_get_state(musb)) {
 		case OTG_STATE_A_HOST:
 			musb_set_state(musb, OTG_STATE_A_SUSPEND);
-			musb->is_active = otg->host->b_hnp_enable;
+			musb->is_active = musb->xceiv &&
+				musb->xceiv->otg->host->b_hnp_enable;
 			if (musb->is_active)
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(
@@ -97,7 +97,8 @@ int musb_port_suspend(struct musb *musb, bool do_suspend)
 			break;
 		case OTG_STATE_B_HOST:
 			musb_set_state(musb, OTG_STATE_B_WAIT_ACON);
-			musb->is_active = otg->host->b_hnp_enable;
+			musb->is_active = musb->xceiv &&
+				musb->xceiv->otg->host->b_hnp_enable;
 			musb_platform_try_idle(musb, 0);
 			break;
 		default:
@@ -196,8 +197,6 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 
 void musb_root_disconnect(struct musb *musb)
 {
-	struct usb_otg	*otg = musb->xceiv->otg;
-
 	musb->port1_status = USB_PORT_STAT_POWER
 			| (USB_PORT_STAT_C_CONNECTION << 16);
 
@@ -206,7 +205,7 @@ void musb_root_disconnect(struct musb *musb)
 
 	switch (musb_get_state(musb)) {
 	case OTG_STATE_A_SUSPEND:
-		if (otg->host->b_hnp_enable) {
+		if (musb->xceiv && musb->xceiv->otg->host->b_hnp_enable) {
 			musb_set_state(musb, OTG_STATE_A_PERIPHERAL);
 			musb->g.is_a_peripheral = 1;
 			break;
-- 
GitLab


From 0afddf1e49d1172a87c7a73002e62aa66b6af677 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:54 +0100
Subject: [PATCH 1134/4122] usb: musb: Support setting OTG mode using generic
 PHY

When musb->xceiv is not provided but musb->phy is, support setting the
OTG mode (host, peripheral) using the generic PHY framework.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-5-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_gadget.c | 4 ++++
 drivers/usb/musb/musb_host.c   | 2 ++
 2 files changed, 6 insertions(+)

diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 66c8b32b16bb4..6cb9514ef3405 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1854,6 +1854,8 @@ static int musb_gadget_start(struct usb_gadget *g,
 
 	if (musb->xceiv)
 		otg_set_peripheral(musb->xceiv->otg, &musb->g);
+	else
+		phy_set_mode(musb->phy, PHY_MODE_USB_DEVICE);
 
 	musb_set_state(musb, OTG_STATE_B_IDLE);
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -1905,6 +1907,8 @@ static int musb_gadget_stop(struct usb_gadget *g)
 
 	if (musb->xceiv)
 		otg_set_peripheral(musb->xceiv->otg, NULL);
+	else
+		phy_set_mode(musb->phy, PHY_MODE_INVALID);
 
 	musb->is_active = 0;
 	musb->gadget_driver = NULL;
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 8ad39ecd3b6fc..a02c29216955a 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2726,6 +2726,8 @@ int musb_host_setup(struct musb *musb, int power_budget)
 	if (musb->xceiv) {
 		otg_set_host(musb->xceiv->otg, &hcd->self);
 		musb->xceiv->otg->host = &hcd->self;
+	} else {
+		phy_set_mode(musb->phy, PHY_MODE_USB_HOST);
 	}
 
 	/* don't support otg protocols */
-- 
GitLab


From d9b324307777404f978660b5752fb264ee344a22 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:55 +0100
Subject: [PATCH 1135/4122] usb: musb: jz4740: Don't disable external hubs

The jz4740-musb driver does not really support OTG, so it has no reason
to disable external hubs, especially since it's a system-wide setting
and we don't want external hubs to be disabled for other USB host
controllers.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-6-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index f9eec666103cb..290df4d5d5ce8 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -107,7 +107,6 @@ config USB_MUSB_JZ4740
 	depends on OF
 	depends on MIPS || COMPILE_TEST
 	depends on USB_MUSB_GADGET
-	depends on USB=n || USB_OTG_DISABLE_EXTERNAL_HUB
 	select USB_ROLE_SWITCH
 
 config USB_MUSB_MEDIATEK
-- 
GitLab


From 9cd074798ef6bf9c361c78084abc2c30e551ecdc Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:56 +0100
Subject: [PATCH 1136/4122] usb: musb: jz4740: Support the generic PHY
 framework

Support PHYs implemented using the generic PHY framework instead of the
deprecated USB-PHY framework.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-7-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/jz4740.c | 62 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 54 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/musb/jz4740.c b/drivers/usb/musb/jz4740.c
index d1e4e0deb7535..c7b1d2a394d9a 100644
--- a/drivers/usb/musb/jz4740.c
+++ b/drivers/usb/musb/jz4740.c
@@ -11,6 +11,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/usb/role.h>
 #include <linux/usb/usb_phy_generic.h>
@@ -81,6 +82,9 @@ static int jz4740_musb_role_switch_set(struct usb_role_switch *sw,
 	struct jz4740_glue *glue = usb_role_switch_get_drvdata(sw);
 	struct usb_phy *phy = glue->musb->xceiv;
 
+	if (!phy)
+		return 0;
+
 	switch (role) {
 	case USB_ROLE_NONE:
 		atomic_notifier_call_chain(&phy->notifier, USB_EVENT_NONE, phy);
@@ -105,21 +109,51 @@ static int jz4740_musb_init(struct musb *musb)
 		.driver_data = glue,
 		.fwnode = dev_fwnode(dev),
 	};
+	int err;
 
 	glue->musb = musb;
 
-	if (dev->of_node)
-		musb->xceiv = devm_usb_get_phy_by_phandle(dev, "phys", 0);
-	else
-		musb->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
-	if (IS_ERR(musb->xceiv))
-		return dev_err_probe(dev, PTR_ERR(musb->xceiv),
-				     "No transceiver configured\n");
+	if (IS_ENABLED(CONFIG_GENERIC_PHY)) {
+		musb->phy = devm_of_phy_get_by_index(dev, dev->of_node, 0);
+		if (IS_ERR(musb->phy)) {
+			err = PTR_ERR(musb->phy);
+			if (err != -ENODEV) {
+				dev_err(dev, "Unable to get PHY\n");
+				return err;
+			}
+
+			musb->phy = NULL;
+		}
+	}
+
+	if (musb->phy) {
+		err = phy_init(musb->phy);
+		if (err) {
+			dev_err(dev, "Failed to init PHY\n");
+			return err;
+		}
+
+		err = phy_power_on(musb->phy);
+		if (err) {
+			dev_err(dev, "Unable to power on PHY\n");
+			goto err_phy_shutdown;
+		}
+	} else {
+		if (dev->of_node)
+			musb->xceiv = devm_usb_get_phy_by_phandle(dev, "phys", 0);
+		else
+			musb->xceiv = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
+		if (IS_ERR(musb->xceiv)) {
+			dev_err(dev, "No transceiver configured\n");
+			return PTR_ERR(musb->xceiv);
+		}
+	}
 
 	glue->role_sw = usb_role_switch_register(dev, &role_sw_desc);
 	if (IS_ERR(glue->role_sw)) {
 		dev_err(dev, "Failed to register USB role switch\n");
-		return PTR_ERR(glue->role_sw);
+		err = PTR_ERR(glue->role_sw);
+		goto err_phy_power_down;
 	}
 
 	/*
@@ -131,6 +165,14 @@ static int jz4740_musb_init(struct musb *musb)
 	musb->isr = jz4740_musb_interrupt;
 
 	return 0;
+
+err_phy_power_down:
+	if (musb->phy)
+		phy_power_off(musb->phy);
+err_phy_shutdown:
+	if (musb->phy)
+		phy_exit(musb->phy);
+	return err;
 }
 
 static int jz4740_musb_exit(struct musb *musb)
@@ -138,6 +180,10 @@ static int jz4740_musb_exit(struct musb *musb)
 	struct jz4740_glue *glue = dev_get_drvdata(musb->controller->parent);
 
 	usb_role_switch_unregister(glue->role_sw);
+	if (musb->phy) {
+		phy_power_off(musb->phy);
+		phy_exit(musb->phy);
+	}
 
 	return 0;
 }
-- 
GitLab


From 3f2d1f2e40666d6536b663c5050a3a23ca5d9ce8 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Wed, 26 Oct 2022 19:26:57 +0100
Subject: [PATCH 1137/4122] usb: phy: jz4770: Remove driver

This driver has been replaced by the Ingenic PHY driver that uses the
generic PHY framework.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Link: https://lore.kernel.org/r/20221026182657.146630-8-paul@crapouillou.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/Kconfig      |   8 -
 drivers/usb/phy/Makefile     |   1 -
 drivers/usb/phy/phy-jz4770.c | 353 -----------------------------------
 3 files changed, 362 deletions(-)
 delete mode 100644 drivers/usb/phy/phy-jz4770.c

diff --git a/drivers/usb/phy/Kconfig b/drivers/usb/phy/Kconfig
index efdcafdbe46d7..915df5726a5cc 100644
--- a/drivers/usb/phy/Kconfig
+++ b/drivers/usb/phy/Kconfig
@@ -189,12 +189,4 @@ config USB_ULPI_VIEWPORT
 	  Provides read/write operations to the ULPI phy register set for
 	  controllers with a viewport register (e.g. Chipidea/ARC controllers).
 
-config JZ4770_PHY
-	tristate "Ingenic SoCs Transceiver Driver"
-	depends on MIPS || COMPILE_TEST
-	select USB_PHY
-	help
-	  This driver provides PHY support for the USB controller found
-	  on the JZ-series and X-series SoCs from Ingenic.
-
 endmenu
diff --git a/drivers/usb/phy/Makefile b/drivers/usb/phy/Makefile
index b352bdbe87121..df1d99010079f 100644
--- a/drivers/usb/phy/Makefile
+++ b/drivers/usb/phy/Makefile
@@ -24,4 +24,3 @@ obj-$(CONFIG_USB_MXS_PHY)		+= phy-mxs-usb.o
 obj-$(CONFIG_USB_ULPI)			+= phy-ulpi.o
 obj-$(CONFIG_USB_ULPI_VIEWPORT)		+= phy-ulpi-viewport.o
 obj-$(CONFIG_KEYSTONE_USB_PHY)		+= phy-keystone.o
-obj-$(CONFIG_JZ4770_PHY)		+= phy-jz4770.o
diff --git a/drivers/usb/phy/phy-jz4770.c b/drivers/usb/phy/phy-jz4770.c
deleted file mode 100644
index f16adcacdce3f..0000000000000
--- a/drivers/usb/phy/phy-jz4770.c
+++ /dev/null
@@ -1,353 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Ingenic SoCs USB PHY driver
- * Copyright (c) Paul Cercueil <paul@crapouillou.net>
- * Copyright (c) 漆鹏振 (Qi Pengzhen) <aric.pzqi@ingenic.com>
- * Copyright (c) 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
- */
-
-#include <linux/clk.h>
-#include <linux/io.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/usb/otg.h>
-#include <linux/usb/phy.h>
-
-/* OTGPHY register offsets */
-#define REG_USBPCR_OFFSET			0x00
-#define REG_USBRDT_OFFSET			0x04
-#define REG_USBVBFIL_OFFSET			0x08
-#define REG_USBPCR1_OFFSET			0x0c
-
-/* bits within the USBPCR register */
-#define USBPCR_USB_MODE				BIT(31)
-#define USBPCR_AVLD_REG				BIT(30)
-#define USBPCR_COMMONONN			BIT(25)
-#define USBPCR_VBUSVLDEXT			BIT(24)
-#define USBPCR_VBUSVLDEXTSEL		BIT(23)
-#define USBPCR_POR					BIT(22)
-#define USBPCR_SIDDQ				BIT(21)
-#define USBPCR_OTG_DISABLE			BIT(20)
-#define USBPCR_TXPREEMPHTUNE		BIT(6)
-
-#define USBPCR_IDPULLUP_LSB	28
-#define USBPCR_IDPULLUP_MASK		GENMASK(29, USBPCR_IDPULLUP_LSB)
-#define USBPCR_IDPULLUP_ALWAYS		(0x2 << USBPCR_IDPULLUP_LSB)
-#define USBPCR_IDPULLUP_SUSPEND		(0x1 << USBPCR_IDPULLUP_LSB)
-#define USBPCR_IDPULLUP_OTG			(0x0 << USBPCR_IDPULLUP_LSB)
-
-#define USBPCR_COMPDISTUNE_LSB		17
-#define USBPCR_COMPDISTUNE_MASK		GENMASK(19, USBPCR_COMPDISTUNE_LSB)
-#define USBPCR_COMPDISTUNE_DFT		(0x4 << USBPCR_COMPDISTUNE_LSB)
-
-#define USBPCR_OTGTUNE_LSB			14
-#define USBPCR_OTGTUNE_MASK			GENMASK(16, USBPCR_OTGTUNE_LSB)
-#define USBPCR_OTGTUNE_DFT			(0x4 << USBPCR_OTGTUNE_LSB)
-
-#define USBPCR_SQRXTUNE_LSB	11
-#define USBPCR_SQRXTUNE_MASK		GENMASK(13, USBPCR_SQRXTUNE_LSB)
-#define USBPCR_SQRXTUNE_DCR_20PCT	(0x7 << USBPCR_SQRXTUNE_LSB)
-#define USBPCR_SQRXTUNE_DFT			(0x3 << USBPCR_SQRXTUNE_LSB)
-
-#define USBPCR_TXFSLSTUNE_LSB		7
-#define USBPCR_TXFSLSTUNE_MASK		GENMASK(10, USBPCR_TXFSLSTUNE_LSB)
-#define USBPCR_TXFSLSTUNE_DCR_50PPT	(0xf << USBPCR_TXFSLSTUNE_LSB)
-#define USBPCR_TXFSLSTUNE_DCR_25PPT	(0x7 << USBPCR_TXFSLSTUNE_LSB)
-#define USBPCR_TXFSLSTUNE_DFT		(0x3 << USBPCR_TXFSLSTUNE_LSB)
-#define USBPCR_TXFSLSTUNE_INC_25PPT	(0x1 << USBPCR_TXFSLSTUNE_LSB)
-#define USBPCR_TXFSLSTUNE_INC_50PPT	(0x0 << USBPCR_TXFSLSTUNE_LSB)
-
-#define USBPCR_TXHSXVTUNE_LSB		4
-#define USBPCR_TXHSXVTUNE_MASK		GENMASK(5, USBPCR_TXHSXVTUNE_LSB)
-#define USBPCR_TXHSXVTUNE_DFT		(0x3 << USBPCR_TXHSXVTUNE_LSB)
-#define USBPCR_TXHSXVTUNE_DCR_15MV	(0x1 << USBPCR_TXHSXVTUNE_LSB)
-
-#define USBPCR_TXRISETUNE_LSB		4
-#define USBPCR_TXRISETUNE_MASK		GENMASK(5, USBPCR_TXRISETUNE_LSB)
-#define USBPCR_TXRISETUNE_DFT		(0x3 << USBPCR_TXRISETUNE_LSB)
-
-#define USBPCR_TXVREFTUNE_LSB		0
-#define USBPCR_TXVREFTUNE_MASK		GENMASK(3, USBPCR_TXVREFTUNE_LSB)
-#define USBPCR_TXVREFTUNE_INC_25PPT	(0x7 << USBPCR_TXVREFTUNE_LSB)
-#define USBPCR_TXVREFTUNE_DFT		(0x5 << USBPCR_TXVREFTUNE_LSB)
-
-/* bits within the USBRDTR register */
-#define USBRDT_UTMI_RST				BIT(27)
-#define USBRDT_HB_MASK				BIT(26)
-#define USBRDT_VBFIL_LD_EN			BIT(25)
-#define USBRDT_IDDIG_EN				BIT(24)
-#define USBRDT_IDDIG_REG			BIT(23)
-#define USBRDT_VBFIL_EN				BIT(2)
-
-/* bits within the USBPCR1 register */
-#define USBPCR1_BVLD_REG			BIT(31)
-#define USBPCR1_DPPD				BIT(29)
-#define USBPCR1_DMPD				BIT(28)
-#define USBPCR1_USB_SEL				BIT(28)
-#define USBPCR1_WORD_IF_16BIT		BIT(19)
-
-enum ingenic_usb_phy_version {
-	ID_JZ4770,
-	ID_JZ4780,
-	ID_X1000,
-	ID_X1830,
-};
-
-struct ingenic_soc_info {
-	enum ingenic_usb_phy_version version;
-
-	void (*usb_phy_init)(struct usb_phy *phy);
-};
-
-struct jz4770_phy {
-	const struct ingenic_soc_info *soc_info;
-
-	struct usb_phy phy;
-	struct usb_otg otg;
-	struct device *dev;
-	void __iomem *base;
-	struct clk *clk;
-	struct regulator *vcc_supply;
-};
-
-static inline struct jz4770_phy *otg_to_jz4770_phy(struct usb_otg *otg)
-{
-	return container_of(otg, struct jz4770_phy, otg);
-}
-
-static inline struct jz4770_phy *phy_to_jz4770_phy(struct usb_phy *phy)
-{
-	return container_of(phy, struct jz4770_phy, phy);
-}
-
-static int ingenic_usb_phy_set_peripheral(struct usb_otg *otg,
-				     struct usb_gadget *gadget)
-{
-	struct jz4770_phy *priv = otg_to_jz4770_phy(otg);
-	u32 reg;
-
-	if (priv->soc_info->version >= ID_X1000) {
-		reg = readl(priv->base + REG_USBPCR1_OFFSET);
-		reg |= USBPCR1_BVLD_REG;
-		writel(reg, priv->base + REG_USBPCR1_OFFSET);
-	}
-
-	reg = readl(priv->base + REG_USBPCR_OFFSET);
-	reg &= ~USBPCR_USB_MODE;
-	reg |= USBPCR_VBUSVLDEXT | USBPCR_VBUSVLDEXTSEL | USBPCR_OTG_DISABLE;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-
-	return 0;
-}
-
-static int ingenic_usb_phy_set_host(struct usb_otg *otg, struct usb_bus *host)
-{
-	struct jz4770_phy *priv = otg_to_jz4770_phy(otg);
-	u32 reg;
-
-	reg = readl(priv->base + REG_USBPCR_OFFSET);
-	reg &= ~(USBPCR_VBUSVLDEXT | USBPCR_VBUSVLDEXTSEL | USBPCR_OTG_DISABLE);
-	reg |= USBPCR_USB_MODE;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-
-	return 0;
-}
-
-static int ingenic_usb_phy_init(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-	int err;
-	u32 reg;
-
-	err = regulator_enable(priv->vcc_supply);
-	if (err) {
-		dev_err(priv->dev, "Unable to enable VCC: %d\n", err);
-		return err;
-	}
-
-	err = clk_prepare_enable(priv->clk);
-	if (err) {
-		dev_err(priv->dev, "Unable to start clock: %d\n", err);
-		return err;
-	}
-
-	priv->soc_info->usb_phy_init(phy);
-
-	/* Wait for PHY to reset */
-	usleep_range(30, 300);
-	reg = readl(priv->base + REG_USBPCR_OFFSET);
-	writel(reg & ~USBPCR_POR, priv->base + REG_USBPCR_OFFSET);
-	usleep_range(300, 1000);
-
-	return 0;
-}
-
-static void ingenic_usb_phy_shutdown(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-
-	clk_disable_unprepare(priv->clk);
-	regulator_disable(priv->vcc_supply);
-}
-
-static void ingenic_usb_phy_remove(void *phy)
-{
-	usb_remove_phy(phy);
-}
-
-static void jz4770_usb_phy_init(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-	u32 reg;
-
-	reg = USBPCR_AVLD_REG | USBPCR_COMMONONN | USBPCR_IDPULLUP_ALWAYS |
-		USBPCR_COMPDISTUNE_DFT | USBPCR_OTGTUNE_DFT | USBPCR_SQRXTUNE_DFT |
-		USBPCR_TXFSLSTUNE_DFT | USBPCR_TXRISETUNE_DFT | USBPCR_TXVREFTUNE_DFT |
-		USBPCR_POR;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-}
-
-static void jz4780_usb_phy_init(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-	u32 reg;
-
-	reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_USB_SEL |
-		USBPCR1_WORD_IF_16BIT;
-	writel(reg, priv->base + REG_USBPCR1_OFFSET);
-
-	reg = USBPCR_TXPREEMPHTUNE | USBPCR_COMMONONN | USBPCR_POR;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-}
-
-static void x1000_usb_phy_init(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-	u32 reg;
-
-	reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_WORD_IF_16BIT;
-	writel(reg, priv->base + REG_USBPCR1_OFFSET);
-
-	reg = USBPCR_SQRXTUNE_DCR_20PCT | USBPCR_TXPREEMPHTUNE |
-		USBPCR_TXHSXVTUNE_DCR_15MV | USBPCR_TXVREFTUNE_INC_25PPT |
-		USBPCR_COMMONONN | USBPCR_POR;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-}
-
-static void x1830_usb_phy_init(struct usb_phy *phy)
-{
-	struct jz4770_phy *priv = phy_to_jz4770_phy(phy);
-	u32 reg;
-
-	/* rdt */
-	writel(USBRDT_VBFIL_EN | USBRDT_UTMI_RST, priv->base + REG_USBRDT_OFFSET);
-
-	reg = readl(priv->base + REG_USBPCR1_OFFSET) | USBPCR1_WORD_IF_16BIT |
-		USBPCR1_DMPD | USBPCR1_DPPD;
-	writel(reg, priv->base + REG_USBPCR1_OFFSET);
-
-	reg = USBPCR_IDPULLUP_OTG | USBPCR_VBUSVLDEXT |	USBPCR_TXPREEMPHTUNE |
-		USBPCR_COMMONONN | USBPCR_POR;
-	writel(reg, priv->base + REG_USBPCR_OFFSET);
-}
-
-static const struct ingenic_soc_info jz4770_soc_info = {
-	.version = ID_JZ4770,
-
-	.usb_phy_init = jz4770_usb_phy_init,
-};
-
-static const struct ingenic_soc_info jz4780_soc_info = {
-	.version = ID_JZ4780,
-
-	.usb_phy_init = jz4780_usb_phy_init,
-};
-
-static const struct ingenic_soc_info x1000_soc_info = {
-	.version = ID_X1000,
-
-	.usb_phy_init = x1000_usb_phy_init,
-};
-
-static const struct ingenic_soc_info x1830_soc_info = {
-	.version = ID_X1830,
-
-	.usb_phy_init = x1830_usb_phy_init,
-};
-
-static const struct of_device_id ingenic_usb_phy_of_matches[] = {
-	{ .compatible = "ingenic,jz4770-phy", .data = &jz4770_soc_info },
-	{ .compatible = "ingenic,jz4780-phy", .data = &jz4780_soc_info },
-	{ .compatible = "ingenic,x1000-phy", .data = &x1000_soc_info },
-	{ .compatible = "ingenic,x1830-phy", .data = &x1830_soc_info },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, ingenic_usb_phy_of_matches);
-
-static int jz4770_phy_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct jz4770_phy *priv;
-	int err;
-
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
-	priv->soc_info = device_get_match_data(&pdev->dev);
-	if (!priv->soc_info) {
-		dev_err(&pdev->dev, "Error: No device match found\n");
-		return -ENODEV;
-	}
-
-	platform_set_drvdata(pdev, priv);
-	priv->dev = dev;
-	priv->phy.dev = dev;
-	priv->phy.otg = &priv->otg;
-	priv->phy.label = "ingenic-usb-phy";
-	priv->phy.init = ingenic_usb_phy_init;
-	priv->phy.shutdown = ingenic_usb_phy_shutdown;
-
-	priv->otg.state = OTG_STATE_UNDEFINED;
-	priv->otg.usb_phy = &priv->phy;
-	priv->otg.set_host = ingenic_usb_phy_set_host;
-	priv->otg.set_peripheral = ingenic_usb_phy_set_peripheral;
-
-	priv->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(priv->base)) {
-		dev_err(dev, "Failed to map registers\n");
-		return PTR_ERR(priv->base);
-	}
-
-	priv->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(priv->clk))
-		return dev_err_probe(dev, PTR_ERR(priv->clk),
-				     "Failed to get clock\n");
-
-	priv->vcc_supply = devm_regulator_get(dev, "vcc");
-	if (IS_ERR(priv->vcc_supply))
-		return dev_err_probe(dev, PTR_ERR(priv->vcc_supply),
-				     "Failed to get regulator\n");
-
-	err = usb_add_phy(&priv->phy, USB_PHY_TYPE_USB2);
-	if (err)
-		return dev_err_probe(dev, err, "Unable to register PHY\n");
-
-	return devm_add_action_or_reset(dev, ingenic_usb_phy_remove, &priv->phy);
-}
-
-static struct platform_driver ingenic_phy_driver = {
-	.probe		= jz4770_phy_probe,
-	.driver		= {
-		.name	= "jz4770-phy",
-		.of_match_table = ingenic_usb_phy_of_matches,
-	},
-};
-module_platform_driver(ingenic_phy_driver);
-
-MODULE_AUTHOR("周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>");
-MODULE_AUTHOR("漆鹏振 (Qi Pengzhen) <aric.pzqi@ingenic.com>");
-MODULE_AUTHOR("Paul Cercueil <paul@crapouillou.net>");
-MODULE_DESCRIPTION("Ingenic SoCs USB PHY driver");
-MODULE_LICENSE("GPL");
-- 
GitLab


From 321b59870f850a10dbb211ecd2bd87b41497ea6f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 4 Nov 2022 14:10:30 +0100
Subject: [PATCH 1138/4122] usb: gadget: u_ether: Do not make UDC parent of the
 net device

The UDC is not a suitable parent of the net device as the UDC can
change or vanish during the lifecycle of the ethernet gadget. This
can be illustrated with the following:

mkdir -p /sys/kernel/config/usb_gadget/mygadget
cd /sys/kernel/config/usb_gadget/mygadget
mkdir -p configs/c.1/strings/0x409
echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
mkdir -p functions/ecm.usb0
ln -s functions/ecm.usb0 configs/c.1/
echo "dummy_udc.0" > UDC
rmmod dummy_hcd

The 'rmmod' removes the UDC from the just created gadget, leaving
the still existing net device with a no longer existing parent.

Accessing the ethernet device with commands like:

ip --details link show usb0

will result in a KASAN splat:

==================================================================
BUG: KASAN: use-after-free in if_nlmsg_size+0x3e8/0x528
Read of size 4 at addr c5c84754 by task ip/357

CPU: 3 PID: 357 Comm: ip Not tainted 6.1.0-rc3-00013-gd14953726b24-dirty #324
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x58/0x70
 dump_stack_lvl from print_report+0x134/0x4d4
 print_report from kasan_report+0x78/0x10c
 kasan_report from if_nlmsg_size+0x3e8/0x528
 if_nlmsg_size from rtnl_getlink+0x2b4/0x4d0
 rtnl_getlink from rtnetlink_rcv_msg+0x1f4/0x674
 rtnetlink_rcv_msg from netlink_rcv_skb+0xb4/0x1f8
 netlink_rcv_skb from netlink_unicast+0x294/0x478
 netlink_unicast from netlink_sendmsg+0x328/0x640
 netlink_sendmsg from ____sys_sendmsg+0x2a4/0x3b4
 ____sys_sendmsg from ___sys_sendmsg+0xc8/0x12c
 ___sys_sendmsg from sys_sendmsg+0xa0/0x120
 sys_sendmsg from ret_fast_syscall+0x0/0x1c

Solve this by not setting the parent of the ethernet device.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20221104131031.850850-2-s.hauer@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/u_ether.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/usb/gadget/function/u_ether.c b/drivers/usb/gadget/function/u_ether.c
index e06022873df16..8f12f3f8f6eeb 100644
--- a/drivers/usb/gadget/function/u_ether.c
+++ b/drivers/usb/gadget/function/u_ether.c
@@ -798,7 +798,6 @@ struct eth_dev *gether_setup_name(struct usb_gadget *g,
 	net->max_mtu = GETHER_MAX_MTU_SIZE;
 
 	dev->gadget = g;
-	SET_NETDEV_DEV(net, &g->dev);
 	SET_NETDEV_DEVTYPE(net, &gadget_type);
 
 	status = register_netdev(net);
@@ -873,8 +872,6 @@ int gether_register_netdev(struct net_device *net)
 	struct usb_gadget *g;
 	int status;
 
-	if (!net->dev.parent)
-		return -EINVAL;
 	dev = netdev_priv(net);
 	g = dev->gadget;
 
@@ -905,7 +902,6 @@ void gether_set_gadget(struct net_device *net, struct usb_gadget *g)
 
 	dev = netdev_priv(net);
 	dev->gadget = g;
-	SET_NETDEV_DEV(net, &g->dev);
 }
 EXPORT_SYMBOL_GPL(gether_set_gadget);
 
-- 
GitLab


From d65e6b6e884a38360fc1cadf8ff31858151da57f Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 4 Nov 2022 14:10:31 +0100
Subject: [PATCH 1139/4122] usb: gadget: f_ecm: Always set current gadget in
 ecm_bind()

The gadget may change over bind/unbind cycles, so set it each time during
bind, not only the first time. Without it we get a use-after-free with
the following example:

cd /sys/kernel/config/usb_gadget/; mkdir -p mygadget; cd mygadget
mkdir -p configs/c.1/strings/0x409
echo "C1:Composite Device" > configs/c.1/strings/0x409/configuration
mkdir -p functions/ecm.usb0
ln -s functions/ecm.usb0 configs/c.1/
rmmod dummy_hcd
modprobe dummy_hcd

KASAN will complain shortly after the 'modprobe':

usb 2-1: New USB device found, idVendor=0000, idProduct=0000, bcdDevice= 6.01
usb 2-1: New USB device strings: Mfr=0, Product=0, SerialNumber=0
==================================================================
BUG: KASAN: use-after-free in gether_connect+0xb8/0x30c
Read of size 4 at addr cbef170c by task swapper/3/0

CPU: 3 PID: 0 Comm: swapper/3 Not tainted 6.1.0-rc3-00014-g41ff012f50cb-dirty #322
Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree)
 unwind_backtrace from show_stack+0x10/0x14
 show_stack from dump_stack_lvl+0x58/0x70
 dump_stack_lvl from print_report+0x134/0x4d4
 print_report from kasan_report+0x78/0x10c
 kasan_report from gether_connect+0xb8/0x30c
 gether_connect from ecm_set_alt+0x124/0x254
 ecm_set_alt from composite_setup+0xb98/0x2b18
 composite_setup from configfs_composite_setup+0x80/0x98
 configfs_composite_setup from dummy_timer+0x8f0/0x14a0 [dummy_hcd]
 ...

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Link: https://lore.kernel.org/r/20221104131031.850850-3-s.hauer@pengutronix.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_ecm.c | 22 +++++++++-------------
 1 file changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/usb/gadget/function/f_ecm.c b/drivers/usb/gadget/function/f_ecm.c
index ffe2486fce71c..a7ab30e603e20 100644
--- a/drivers/usb/gadget/function/f_ecm.c
+++ b/drivers/usb/gadget/function/f_ecm.c
@@ -685,7 +685,7 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_composite_dev *cdev = c->cdev;
 	struct f_ecm		*ecm = func_to_ecm(f);
 	struct usb_string	*us;
-	int			status;
+	int			status = 0;
 	struct usb_ep		*ep;
 
 	struct f_ecm_opts	*ecm_opts;
@@ -695,23 +695,19 @@ ecm_bind(struct usb_configuration *c, struct usb_function *f)
 
 	ecm_opts = container_of(f->fi, struct f_ecm_opts, func_inst);
 
-	/*
-	 * in drivers/usb/gadget/configfs.c:configfs_composite_bind()
-	 * configurations are bound in sequence with list_for_each_entry,
-	 * in each configuration its functions are bound in sequence
-	 * with list_for_each_entry, so we assume no race condition
-	 * with regard to ecm_opts->bound access
-	 */
+	mutex_lock(&ecm_opts->lock);
+
+	gether_set_gadget(ecm_opts->net, cdev->gadget);
+
 	if (!ecm_opts->bound) {
-		mutex_lock(&ecm_opts->lock);
-		gether_set_gadget(ecm_opts->net, cdev->gadget);
 		status = gether_register_netdev(ecm_opts->net);
-		mutex_unlock(&ecm_opts->lock);
-		if (status)
-			return status;
 		ecm_opts->bound = true;
 	}
 
+	mutex_unlock(&ecm_opts->lock);
+	if (status)
+		return status;
+
 	ecm_string_defs[1].s = ecm->ethaddr;
 
 	us = usb_gstrings_attach(cdev, ecm_strings,
-- 
GitLab


From 2e2b4b896159f9d47d063ccf4ed0a7af9a40f1c5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Wed, 19 Oct 2022 13:55:03 +0300
Subject: [PATCH 1140/4122] tty: Convert tty_buffer flags to bool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The struct tty_buffer has flags which is only used for storing TTYB_NORMAL.
There is also a few quite confusing operations for checking the presense
of TTYB_NORMAL. Simplify things by converting flags to bool.

Despite the name remaining the same, the meaning of "flags" is altered
slightly by this change. Previously it referred to flags of the buffer
(only TTYB_NORMAL being used as a flag). After this change, flags tell
whether the buffer contains/should be allocated with flags array along
with character data array. It is much more suitable name that
TTYB_NORMAL was for this purpose, thus the name remains.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221019105504.16800-1-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c   | 28 ++++++++++++++--------------
 include/linux/tty_buffer.h |  5 +----
 include/linux/tty_flip.h   |  4 ++--
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 3f057805560fe..2df86ed905742 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -107,7 +107,7 @@ static void tty_buffer_reset(struct tty_buffer *p, size_t size)
 	p->commit = 0;
 	p->lookahead = 0;
 	p->read = 0;
-	p->flags = 0;
+	p->flags = true;
 }
 
 /**
@@ -249,7 +249,7 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
  * __tty_buffer_request_room	-	grow tty buffer if needed
  * @port: tty port
  * @size: size desired
- * @flags: buffer flags if new buffer allocated (default = 0)
+ * @flags: buffer has to store flags along character data
  *
  * Make at least @size bytes of linear space available for the tty buffer.
  *
@@ -260,19 +260,19 @@ void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
  * Returns: the size we managed to find.
  */
 static int __tty_buffer_request_room(struct tty_port *port, size_t size,
-				     int flags)
+				     bool flags)
 {
 	struct tty_bufhead *buf = &port->buf;
 	struct tty_buffer *b, *n;
 	int left, change;
 
 	b = buf->tail;
-	if (b->flags & TTYB_NORMAL)
+	if (!b->flags)
 		left = 2 * b->size - b->used;
 	else
 		left = b->size - b->used;
 
-	change = (b->flags & TTYB_NORMAL) && (~flags & TTYB_NORMAL);
+	change = !b->flags && flags;
 	if (change || left < size) {
 		/* This is the slow path - looking for new buffers to use */
 		n = tty_buffer_alloc(port, size);
@@ -300,7 +300,7 @@ static int __tty_buffer_request_room(struct tty_port *port, size_t size,
 
 int tty_buffer_request_room(struct tty_port *port, size_t size)
 {
-	return __tty_buffer_request_room(port, size, 0);
+	return __tty_buffer_request_room(port, size, true);
 }
 EXPORT_SYMBOL_GPL(tty_buffer_request_room);
 
@@ -320,17 +320,17 @@ int tty_insert_flip_string_fixed_flag(struct tty_port *port,
 		const unsigned char *chars, char flag, size_t size)
 {
 	int copied = 0;
+	bool flags = flag != TTY_NORMAL;
 
 	do {
 		int goal = min_t(size_t, size - copied, TTY_BUFFER_PAGE);
-		int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0;
 		int space = __tty_buffer_request_room(port, goal, flags);
 		struct tty_buffer *tb = port->buf.tail;
 
 		if (unlikely(space == 0))
 			break;
 		memcpy(char_buf_ptr(tb, tb->used), chars, space);
-		if (~tb->flags & TTYB_NORMAL)
+		if (tb->flags)
 			memset(flag_buf_ptr(tb, tb->used), flag, space);
 		tb->used += space;
 		copied += space;
@@ -393,13 +393,13 @@ EXPORT_SYMBOL(tty_insert_flip_string_flags);
 int __tty_insert_flip_char(struct tty_port *port, unsigned char ch, char flag)
 {
 	struct tty_buffer *tb;
-	int flags = (flag == TTY_NORMAL) ? TTYB_NORMAL : 0;
+	bool flags = flag != TTY_NORMAL;
 
 	if (!__tty_buffer_request_room(port, 1, flags))
 		return 0;
 
 	tb = port->buf.tail;
-	if (~tb->flags & TTYB_NORMAL)
+	if (tb->flags)
 		*flag_buf_ptr(tb, tb->used) = flag;
 	*char_buf_ptr(tb, tb->used++) = ch;
 
@@ -424,13 +424,13 @@ EXPORT_SYMBOL(__tty_insert_flip_char);
 int tty_prepare_flip_string(struct tty_port *port, unsigned char **chars,
 		size_t size)
 {
-	int space = __tty_buffer_request_room(port, size, TTYB_NORMAL);
+	int space = __tty_buffer_request_room(port, size, false);
 
 	if (likely(space)) {
 		struct tty_buffer *tb = port->buf.tail;
 
 		*chars = char_buf_ptr(tb, tb->used);
-		if (~tb->flags & TTYB_NORMAL)
+		if (tb->flags)
 			memset(flag_buf_ptr(tb, tb->used), TTY_NORMAL, space);
 		tb->used += space;
 	}
@@ -492,7 +492,7 @@ static void lookahead_bufs(struct tty_port *port, struct tty_buffer *head)
 			unsigned char *p, *f = NULL;
 
 			p = char_buf_ptr(head, head->lookahead);
-			if (~head->flags & TTYB_NORMAL)
+			if (head->flags)
 				f = flag_buf_ptr(head, head->lookahead);
 
 			port->client_ops->lookahead_buf(port, p, f, count);
@@ -509,7 +509,7 @@ receive_buf(struct tty_port *port, struct tty_buffer *head, int count)
 	const char *f = NULL;
 	int n;
 
-	if (~head->flags & TTYB_NORMAL)
+	if (head->flags)
 		f = flag_buf_ptr(head, head->read);
 
 	n = port->client_ops->receive_buf(port, p, f, count);
diff --git a/include/linux/tty_buffer.h b/include/linux/tty_buffer.h
index 1796648c29073..6ceb2789e6c81 100644
--- a/include/linux/tty_buffer.h
+++ b/include/linux/tty_buffer.h
@@ -17,14 +17,11 @@ struct tty_buffer {
 	int commit;
 	int lookahead;		/* Lazy update on recv, can become less than "read" */
 	int read;
-	int flags;
+	bool flags;
 	/* Data points here */
 	unsigned long data[];
 };
 
-/* Values for .flags field of tty_buffer */
-#define TTYB_NORMAL	1	/* buffer has no flags buffer */
-
 static inline unsigned char *char_buf_ptr(struct tty_buffer *b, int ofs)
 {
 	return ((unsigned char *)b->data) + ofs;
diff --git a/include/linux/tty_flip.h b/include/linux/tty_flip.h
index 483d41cbcbb70..bfaaeee61a050 100644
--- a/include/linux/tty_flip.h
+++ b/include/linux/tty_flip.h
@@ -25,9 +25,9 @@ static inline int tty_insert_flip_char(struct tty_port *port,
 	struct tty_buffer *tb = port->buf.tail;
 	int change;
 
-	change = (tb->flags & TTYB_NORMAL) && (flag != TTY_NORMAL);
+	change = !tb->flags && (flag != TTY_NORMAL);
 	if (!change && tb->used < tb->size) {
-		if (~tb->flags & TTYB_NORMAL)
+		if (tb->flags)
 			*flag_buf_ptr(tb, tb->used) = flag;
 		*char_buf_ptr(tb, tb->used++) = ch;
 		return 1;
-- 
GitLab


From c6d30576bd6ce33095d39fe66a51ea821e953ac6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jonathan=20Neusch=C3=A4fer?= <j.neuschaefer@gmx.net>
Date: Fri, 4 Nov 2022 11:37:19 +0100
Subject: [PATCH 1141/4122] serial: Fix a typo ("ignorning")
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the two instances of this typo present in the MSM and VT8500 serial
drivers.

Signed-off-by: Jonathan Neuschäfer <j.neuschaefer@gmx.net>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Acked-by: Konrad Dybcio <konrad.dybcio@somainline.org>
Link: https://lore.kernel.org/r/20221104103719.2234098-1-j.neuschaefer@gmx.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/msm_serial.c    | 2 +-
 drivers/tty/serial/vt8500_serial.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/msm_serial.c b/drivers/tty/serial/msm_serial.c
index 2b2e0f74b75a0..843798e630844 100644
--- a/drivers/tty/serial/msm_serial.c
+++ b/drivers/tty/serial/msm_serial.c
@@ -816,7 +816,7 @@ static void msm_handle_rx(struct uart_port *port)
 			port->icount.rx++;
 		}
 
-		/* Mask conditions we're ignorning. */
+		/* Mask conditions we're ignoring. */
 		sr &= port->read_status_mask;
 
 		if (sr & MSM_UART_SR_RX_BREAK)
diff --git a/drivers/tty/serial/vt8500_serial.c b/drivers/tty/serial/vt8500_serial.c
index deedb65131606..cc9157df732f4 100644
--- a/drivers/tty/serial/vt8500_serial.c
+++ b/drivers/tty/serial/vt8500_serial.c
@@ -168,7 +168,7 @@ static void handle_rx(struct uart_port *port)
 
 		c = readw(port->membase + VT8500_RXFIFO) & 0x3ff;
 
-		/* Mask conditions we're ignorning. */
+		/* Mask conditions we're ignoring. */
 		c &= ~port->read_status_mask;
 
 		if (c & FER) {
-- 
GitLab


From 56dc5074cbec02a6922c4bbce11de9827640bb4b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Mon, 7 Nov 2022 12:21:26 +0200
Subject: [PATCH 1142/4122] serial: 8250_dma: Rearm DMA Rx if more data is
 pending
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When DMA Rx completes, the current behavior is to just exit the DMA
completion handler without future actions. If the transfer is still
on-going, UART will trigger an interrupt and that eventually rearms the
DMA Rx. The extra interrupt round-trip has an inherent latency cost
that increases the risk of FIFO overrun. In such situations, the
latency margin tends to already be less due to FIFO not being empty.

Add check into DMA Rx completion handler to detect if LSR has DR (Data
Ready) still set. DR indicates there will be more characters pending
and DMA Rx can be rearmed right away to handle them.

Cc: Gilles BULOZ <gilles.buloz@kontron.com>
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221107102126.56481-1-ilpo.jarvinen@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_dma.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/8250/8250_dma.c b/drivers/tty/serial/8250/8250_dma.c
index b85c82616e8cb..37d6af2ec4272 100644
--- a/drivers/tty/serial/8250/8250_dma.c
+++ b/drivers/tty/serial/8250/8250_dma.c
@@ -38,9 +38,8 @@ static void __dma_tx_complete(void *param)
 	spin_unlock_irqrestore(&p->port.lock, flags);
 }
 
-static void __dma_rx_complete(void *param)
+static void __dma_rx_complete(struct uart_8250_port *p)
 {
-	struct uart_8250_port	*p = param;
 	struct uart_8250_dma	*dma = p->dma;
 	struct tty_port		*tty_port = &p->port.state->port;
 	struct dma_tx_state	state;
@@ -57,6 +56,20 @@ static void __dma_rx_complete(void *param)
 	tty_flip_buffer_push(tty_port);
 }
 
+static void dma_rx_complete(void *param)
+{
+	struct uart_8250_port *p = param;
+	struct uart_8250_dma *dma = p->dma;
+	unsigned long flags;
+
+	__dma_rx_complete(p);
+
+	spin_lock_irqsave(&p->port.lock, flags);
+	if (!dma->rx_running && (serial_lsr_in(p) & UART_LSR_DR))
+		p->dma->rx_dma(p);
+	spin_unlock_irqrestore(&p->port.lock, flags);
+}
+
 int serial8250_tx_dma(struct uart_8250_port *p)
 {
 	struct uart_8250_dma		*dma = p->dma;
@@ -130,7 +143,7 @@ int serial8250_rx_dma(struct uart_8250_port *p)
 		return -EBUSY;
 
 	dma->rx_running = 1;
-	desc->callback = __dma_rx_complete;
+	desc->callback = dma_rx_complete;
 	desc->callback_param = p;
 
 	dma->rx_cookie = dmaengine_submit(desc);
-- 
GitLab


From 6a3ff858915fa8ca36c7eb02c87c9181ae2fc333 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Wed, 9 Nov 2022 07:21:10 +0000
Subject: [PATCH 1143/4122] serial: 8250_bcm7271: Fix error handling in
 brcmuart_init()

A problem about 8250_bcm7271 create debugfs failed is triggered with the
following log given:

 [  324.516635] debugfs: Directory 'bcm7271-uart' with parent '/' already present!

The reason is that brcmuart_init() returns platform_driver_register()
directly without checking its return value, if platform_driver_register()
failed, it returns without destroy the newly created debugfs, resulting
the debugfs of 8250_bcm7271 can never be created later.

 brcmuart_init()
   debugfs_create_dir() # create debugfs directory
   platform_driver_register()
     driver_register()
       bus_add_driver()
         priv = kzalloc(...) # OOM happened
   # return without destroy debugfs directory

Fix by removing debugfs when platform_driver_register() returns error.

Fixes: 41a469482de2 ("serial: 8250: Add new 8250-core based Broadcom STB driver")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221109072110.117291-2-yuancan@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_bcm7271.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_bcm7271.c b/drivers/tty/serial/8250/8250_bcm7271.c
index 062177b64d21f..ed5a947476920 100644
--- a/drivers/tty/serial/8250/8250_bcm7271.c
+++ b/drivers/tty/serial/8250/8250_bcm7271.c
@@ -1210,9 +1210,17 @@ static struct platform_driver brcmuart_platform_driver = {
 
 static int __init brcmuart_init(void)
 {
+	int ret;
+
 	brcmuart_debugfs_root = debugfs_create_dir(
 		brcmuart_platform_driver.driver.name, NULL);
-	return platform_driver_register(&brcmuart_platform_driver);
+	ret = platform_driver_register(&brcmuart_platform_driver);
+	if (ret) {
+		debugfs_remove_recursive(brcmuart_debugfs_root);
+		return ret;
+	}
+
+	return 0;
 }
 module_init(brcmuart_init);
 
-- 
GitLab


From d85bf5105853d57ed27c6c21ac35424cc44a0fbb Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Wed, 9 Nov 2022 08:04:34 +0100
Subject: [PATCH 1144/4122] serial: 8250: 8250_omap: Fix calculation of RS485
 delays
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 801954d1210a ("serial: 8250: 8250_omap: Support native RS485")
calculates RS485 delays from the baudrate.  The baudrate is generated
with either a 16x or 13x divisor.  The divisor is set in the Mode
Definition Register 1 (MDR1).

The commit erroneously assumes that the register stores the divisor as
a bitmask and uses a logical AND to differentiate between 16x and 13x
divisors.  However the divisor is really stored as a 3-bit mode
(see lines 363ff in include/uapi/linux/serial_reg.h).

The logical AND operation is performed with UART_OMAP_MDR1_16X_MODE,
which is defined as 0x0 and hence yields false.  So the commit always
assumes a 13x divisor.  Fix by using an equal comparison.  This works
because we never set any of the other 5 bits in the register.  (They
pertain to IrDA mode, which is not supported by the driver).

Fixes: 801954d1210a ("serial: 8250: 8250_omap: Support native RS485")
Link: https://lore.kernel.org/linux-serial/202211070440.8hWunFUN-lkp@intel.com/
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/7d5b04da13d89b8708b9543a0b125f2b6062a77b.1667977259.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_omap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_omap.c b/drivers/tty/serial/8250/8250_omap.c
index 1c8a48fdc8f24..7bb9da7558a1d 100644
--- a/drivers/tty/serial/8250/8250_omap.c
+++ b/drivers/tty/serial/8250/8250_omap.c
@@ -822,7 +822,7 @@ static int omap8250_rs485_config(struct uart_port *port,
 	 * of the AM65 TRM:  https://www.ti.com/lit/ug/spruid7e/spruid7e.pdf
 	 */
 	if (priv->quot) {
-		if (priv->mdr1 & UART_OMAP_MDR1_16X_MODE)
+		if (priv->mdr1 == UART_OMAP_MDR1_16X_MODE)
 			baud = port->uartclk / (16 * priv->quot);
 		else
 			baud = port->uartclk / (13 * priv->quot);
-- 
GitLab


From 7a1212475f5e313c11300272f34ffa32462bbeef Mon Sep 17 00:00:00 2001
From: Daniel Starke <daniel.starke@siemens.com>
Date: Thu, 3 Nov 2022 10:17:41 +0100
Subject: [PATCH 1145/4122] tty: n_gsm: introduce macro for minimal unit size

n_gsm has a minimal protocol overhead of 7 bytes. The current code already
checks whether the configured MRU/MTU size is at least one byte more than
this.

Introduce the macro MIN_MTU to make this value more obvious.

Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Link: https://lore.kernel.org/r/20221103091743.2119-1-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_gsm.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 5e516f5cac5a3..570c40a3d78f2 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -89,6 +89,7 @@ module_param(debug, int, 0600);
  */
 #define MAX_MRU 1500
 #define MAX_MTU 1500
+#define MIN_MTU (PROT_OVERHEAD + 1)
 /* SOF, ADDR, CTRL, LEN1, LEN2, ..., FCS, EOF */
 #define PROT_OVERHEAD 7
 #define	GSM_NET_TX_TIMEOUT (HZ*10)
@@ -2712,7 +2713,9 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 	if ((c->adaption != 1 && c->adaption != 2) || c->k)
 		return -EOPNOTSUPP;
 	/* Check the MRU/MTU range looks sane */
-	if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8)
+	if (c->mru < MIN_MTU || c->mtu < MIN_MTU)
+		return -EINVAL;
+	if (c->mru > MAX_MRU || c->mtu > MAX_MTU)
 		return -EINVAL;
 	if (c->n2 > 255)
 		return -EINVAL;
@@ -3296,7 +3299,7 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc)
 		return -ENOMEM;
 	}
 	net->mtu = dlci->gsm->mtu;
-	net->min_mtu = 8;
+	net->min_mtu = MIN_MTU;
 	net->max_mtu = dlci->gsm->mtu;
 	mux_net = netdev_priv(net);
 	mux_net->dlci = dlci;
-- 
GitLab


From 2ec7a802a04c545ceea96bf67ee818d8bb0349e2 Mon Sep 17 00:00:00 2001
From: Daniel Starke <daniel.starke@siemens.com>
Date: Thu, 3 Nov 2022 10:17:42 +0100
Subject: [PATCH 1146/4122] tty: n_gsm: add parameters used with parameter
 negotiation

n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010.
See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516
The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to
the newer 27.010 here. Chapter 5.4.6.3.1 describes the encoding of the
parameter negotiation messages.

Add the parameters used there to 'gsm_mux' and 'gsm_dlci' and initialize both
according to the value ranges and recommended defaults defined in chapter 5.7.

Replace the use of the DLC default values from the 'gsm_mux' fields with the DLC
specific values from the 'gsm_dlci' fields where applicable.

Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Link: https://lore.kernel.org/r/20221103091743.2119-2-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_gsm.c | 58 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 570c40a3d78f2..c217013b3e164 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -40,6 +40,7 @@
 #include <linux/tty.h>
 #include <linux/ctype.h>
 #include <linux/mm.h>
+#include <linux/math.h>
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/poll.h>
@@ -75,7 +76,12 @@ module_param(debug, int, 0600);
 
 #define T1	10		/* 100mS */
 #define T2	34		/* 333mS */
+#define T3	10		/* 10s */
 #define N2	3		/* Retry 3 times */
+#define K	2		/* outstanding I frames */
+
+#define MAX_T3 255		/* In seconds. */
+#define MAX_WINDOW_SIZE 7	/* Limit of K in error recovery mode. */
 
 /* Use long timers for testing at low speed with debug on */
 #ifdef DEBUG_TIMING
@@ -160,7 +166,12 @@ struct gsm_dlci {
 	int prev_adaption;
 	u32 modem_rx;		/* Our incoming virtual modem lines */
 	u32 modem_tx;		/* Our outgoing modem lines */
+	unsigned int mtu;
 	bool dead;		/* Refuse re-open */
+	/* Configuration */
+	u8 prio;		/* Priority */
+	u8 ftype;		/* Frame type */
+	u8 k;			/* Window size */
 	/* Flow control */
 	bool throttled;		/* Private copy of throttle state */
 	bool constipated;	/* Throttle status for outgoing */
@@ -283,7 +294,9 @@ struct gsm_mux {
 	int adaption;		/* 1 or 2 supported */
 	u8 ftype;		/* UI or UIH */
 	int t1, t2;		/* Timers in 1/100th of a sec */
+	unsigned int t3;	/* Power wake-up timer in seconds. */
 	int n2;			/* Retry count */
+	u8 k;			/* Window size */
 
 	/* Statistics (not currently exposed) */
 	unsigned long bad_fcs;
@@ -1075,12 +1088,12 @@ static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci)
 		return 0;
 
 	/* MTU/MRU count only the data bits but watch adaption mode */
-	if ((len + h) > gsm->mtu)
-		len = gsm->mtu - h;
+	if ((len + h) > dlci->mtu)
+		len = dlci->mtu - h;
 
 	size = len + h;
 
-	msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+	msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype);
 	if (!msg)
 		return -ENOMEM;
 	dp = msg->data;
@@ -1144,19 +1157,19 @@ static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
 	len = dlci->skb->len + overhead;
 
 	/* MTU/MRU count only the data bits */
-	if (len > gsm->mtu) {
+	if (len > dlci->mtu) {
 		if (dlci->adaption == 3) {
 			/* Over long frame, bin it */
 			dev_kfree_skb_any(dlci->skb);
 			dlci->skb = NULL;
 			return 0;
 		}
-		len = gsm->mtu;
+		len = dlci->mtu;
 	} else
 		last = 1;
 
 	size = len + overhead;
-	msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+	msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype);
 	if (msg == NULL) {
 		skb_queue_tail(&dlci->skb_list, dlci->skb);
 		dlci->skb = NULL;
@@ -1213,7 +1226,7 @@ static int gsm_dlci_modem_output(struct gsm_mux *gsm, struct gsm_dlci *dlci,
 		return -EINVAL;
 	}
 
-	msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+	msg = gsm_data_alloc(gsm, dlci->addr, size, dlci->ftype);
 	if (!msg) {
 		pr_err("%s: gsm_data_alloc error", __func__);
 		return -ENOMEM;
@@ -1338,8 +1351,9 @@ static void gsm_dlci_data_kick(struct gsm_dlci *dlci)
 static int gsm_control_command(struct gsm_mux *gsm, int cmd, const u8 *data,
 			       int dlen)
 {
-	struct gsm_msg *msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype);
+	struct gsm_msg *msg;
 
+	msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->dlci[0]->ftype);
 	if (msg == NULL)
 		return -ENOMEM;
 
@@ -1365,7 +1379,8 @@ static void gsm_control_reply(struct gsm_mux *gsm, int cmd, const u8 *data,
 					int dlen)
 {
 	struct gsm_msg *msg;
-	msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype);
+
+	msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->dlci[0]->ftype);
 	if (msg == NULL)
 		return;
 	msg->data[0] = (cmd & 0xFE) << 1 | EA;	/* Clear C/R */
@@ -2075,6 +2090,13 @@ static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
 	dlci->gsm = gsm;
 	dlci->addr = addr;
 	dlci->adaption = gsm->adaption;
+	dlci->mtu = gsm->mtu;
+	if (addr == 0)
+		dlci->prio = 0;
+	else
+		dlci->prio = roundup(addr + 1, 8) - 1;
+	dlci->ftype = gsm->ftype;
+	dlci->k = gsm->k;
 	dlci->state = DLCI_CLOSED;
 	if (addr) {
 		dlci->data = gsm_dlci_data;
@@ -2650,7 +2672,9 @@ static struct gsm_mux *gsm_alloc_mux(void)
 
 	gsm->t1 = T1;
 	gsm->t2 = T2;
+	gsm->t3 = T3;
 	gsm->n2 = N2;
+	gsm->k = K;
 	gsm->ftype = UIH;
 	gsm->adaption = 1;
 	gsm->encoding = GSM_ADV_OPT;
@@ -2691,7 +2715,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
 	c->initiator = gsm->initiator;
 	c->t1 = gsm->t1;
 	c->t2 = gsm->t2;
-	c->t3 = 0;	/* Not supported */
+	c->t3 = gsm->t3;
 	c->n2 = gsm->n2;
 	if (gsm->ftype == UIH)
 		c->i = 1;
@@ -2700,7 +2724,7 @@ static void gsm_copy_config_values(struct gsm_mux *gsm,
 	pr_debug("Ftype %d i %d\n", gsm->ftype, c->i);
 	c->mru = gsm->mru;
 	c->mtu = gsm->mtu;
-	c->k = 0;
+	c->k = gsm->k;
 }
 
 static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
@@ -2717,12 +2741,16 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 		return -EINVAL;
 	if (c->mru > MAX_MRU || c->mtu > MAX_MTU)
 		return -EINVAL;
+	if (c->t3 > MAX_T3)
+		return -EINVAL;
 	if (c->n2 > 255)
 		return -EINVAL;
 	if (c->encapsulation > 1)	/* Basic, advanced, no I */
 		return -EINVAL;
 	if (c->initiator > 1)
 		return -EINVAL;
+	if (c->k > MAX_WINDOW_SIZE)
+		return -EINVAL;
 	if (c->i == 0 || c->i > 2)	/* UIH and UI only */
 		return -EINVAL;
 	/*
@@ -2770,6 +2798,10 @@ static int gsm_config(struct gsm_mux *gsm, struct gsm_config *c)
 		gsm->t1 = c->t1;
 	if (c->t2)
 		gsm->t2 = c->t2;
+	if (c->t3)
+		gsm->t3 = c->t3;
+	if (c->k)
+		gsm->k = c->k;
 
 	/*
 	 * FIXME: We need to separate activation/deactivation from adding
@@ -3298,9 +3330,9 @@ static int gsm_create_network(struct gsm_dlci *dlci, struct gsm_netconfig *nc)
 		pr_err("alloc_netdev failed\n");
 		return -ENOMEM;
 	}
-	net->mtu = dlci->gsm->mtu;
+	net->mtu = dlci->mtu;
 	net->min_mtu = MIN_MTU;
-	net->max_mtu = dlci->gsm->mtu;
+	net->max_mtu = dlci->mtu;
 	mux_net = netdev_priv(net);
 	mux_net->dlci = dlci;
 	kref_init(&mux_net->ref);
-- 
GitLab


From 92f1f0c3290d994d753dde588698daf1ef91504b Mon Sep 17 00:00:00 2001
From: Daniel Starke <daniel.starke@siemens.com>
Date: Thu, 3 Nov 2022 10:17:43 +0100
Subject: [PATCH 1147/4122] tty: n_gsm: add parameter negotiation support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010.
See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516
The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to
the newer 27.010 here. Chapter 5.1.8.1.1 describes the parameter negotiation
messages and parameters. Chapter 5.4.1 states that the default parameters
are to be used if no negotiation is performed. Chapter 5.4.6.3.1 describes
the encoding of the parameter negotiation message. The meaning of the
parameters and allowed value ranges can be found in chapter 5.7.

Add parameter negotiation support accordingly. DLCI specific parameter
configuration by the user requires additional ioctls. This is subject to
another patch.

Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221103091743.2119-3-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_gsm.c | 335 ++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 327 insertions(+), 8 deletions(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index c217013b3e164..cde1e846c81e3 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -38,6 +38,7 @@
 #include <linux/sched/signal.h>
 #include <linux/interrupt.h>
 #include <linux/tty.h>
+#include <linux/bitfield.h>
 #include <linux/ctype.h>
 #include <linux/mm.h>
 #include <linux/math.h>
@@ -127,6 +128,7 @@ struct gsm_msg {
 
 enum gsm_dlci_state {
 	DLCI_CLOSED,
+	DLCI_CONFIGURE,		/* Sending PN (for adaption > 1) */
 	DLCI_OPENING,		/* Sending SABM not seen UA */
 	DLCI_OPEN,		/* SABM/UA complete */
 	DLCI_CLOSING,		/* Sending DISC not seen UA/DM */
@@ -184,6 +186,32 @@ struct gsm_dlci {
 	struct net_device *net; /* network interface, if created */
 };
 
+/*
+ * Parameter bits used for parameter negotiation according to 3GPP 27.010
+ * chapter 5.4.6.3.1.
+ */
+
+struct gsm_dlci_param_bits {
+	u8 d_bits;
+	u8 i_cl_bits;
+	u8 p_bits;
+	u8 t_bits;
+	__le16 n_bits;
+	u8 na_bits;
+	u8 k_bits;
+};
+
+static_assert(sizeof(struct gsm_dlci_param_bits) == 8);
+
+#define PN_D_FIELD_DLCI		GENMASK(5, 0)
+#define PN_I_CL_FIELD_FTYPE	GENMASK(3, 0)
+#define PN_I_CL_FIELD_ADAPTION	GENMASK(7, 4)
+#define PN_P_FIELD_PRIO		GENMASK(5, 0)
+#define PN_T_FIELD_T1		GENMASK(7, 0)
+#define PN_N_FIELD_N1		GENMASK(15, 0)
+#define PN_NA_FIELD_N2		GENMASK(7, 0)
+#define PN_K_FIELD_K		GENMASK(2, 0)
+
 /* Total number of supported devices */
 #define GSM_TTY_MINORS		256
 
@@ -411,6 +439,7 @@ static const u8 gsm_fcs8[256] = {
 #define INIT_FCS	0xFF
 #define GOOD_FCS	0xCF
 
+static void gsm_dlci_close(struct gsm_dlci *dlci);
 static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len);
 static int gsm_modem_update(struct gsm_dlci *dlci, u8 brk);
 static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
@@ -533,6 +562,57 @@ static void gsm_hex_dump_bytes(const char *fname, const u8 *data,
 	kfree(prefix);
 }
 
+/**
+ * gsm_encode_params	-	encode DLCI parameters
+ * @dlci: DLCI to encode from
+ * @params: buffer to fill with the encoded parameters
+ *
+ * Encodes the parameters according to GSM 07.10 section 5.4.6.3.1
+ * table 3.
+ */
+static int gsm_encode_params(const struct gsm_dlci *dlci,
+			     struct gsm_dlci_param_bits *params)
+{
+	const struct gsm_mux *gsm = dlci->gsm;
+	unsigned int i, cl;
+
+	switch (dlci->ftype) {
+	case UIH:
+		i = 0; /* UIH */
+		break;
+	case UI:
+		i = 1; /* UI */
+		break;
+	default:
+		pr_debug("unsupported frame type %d\n", dlci->ftype);
+		return -EINVAL;
+	}
+
+	switch (dlci->adaption) {
+	case 1: /* Unstructured */
+		cl = 0; /* convergence layer type 1 */
+		break;
+	case 2: /* Unstructured with modem bits. */
+		cl = 1; /* convergence layer type 2 */
+		break;
+	default:
+		pr_debug("unsupported adaption %d\n", dlci->adaption);
+		return -EINVAL;
+	}
+
+	params->d_bits = FIELD_PREP(PN_D_FIELD_DLCI, dlci->addr);
+	/* UIH, convergence layer type 1 */
+	params->i_cl_bits = FIELD_PREP(PN_I_CL_FIELD_FTYPE, i) |
+			    FIELD_PREP(PN_I_CL_FIELD_ADAPTION, cl);
+	params->p_bits = FIELD_PREP(PN_P_FIELD_PRIO, dlci->prio);
+	params->t_bits = FIELD_PREP(PN_T_FIELD_T1, gsm->t1);
+	params->n_bits = cpu_to_le16(FIELD_PREP(PN_N_FIELD_N1, dlci->mtu));
+	params->na_bits = FIELD_PREP(PN_NA_FIELD_N2, gsm->n2);
+	params->k_bits = FIELD_PREP(PN_K_FIELD_K, dlci->k);
+
+	return 0;
+}
+
 /**
  *	gsm_register_devices	-	register all tty devices for a given mux index
  *
@@ -1450,6 +1530,116 @@ static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
 	dlci->modem_rx = mlines;
 }
 
+/**
+ * gsm_process_negotiation	-	process received parameters
+ * @gsm: GSM channel
+ * @addr: DLCI address
+ * @cr: command/response
+ * @params: encoded parameters from the parameter negotiation message
+ *
+ * Used when the response for our parameter negotiation command was
+ * received.
+ */
+static int gsm_process_negotiation(struct gsm_mux *gsm, unsigned int addr,
+				   unsigned int cr,
+				   const struct gsm_dlci_param_bits *params)
+{
+	struct gsm_dlci *dlci = gsm->dlci[addr];
+	unsigned int ftype, i, adaption, prio, n1, k;
+
+	i = FIELD_GET(PN_I_CL_FIELD_FTYPE, params->i_cl_bits);
+	adaption = FIELD_GET(PN_I_CL_FIELD_ADAPTION, params->i_cl_bits) + 1;
+	prio = FIELD_GET(PN_P_FIELD_PRIO, params->p_bits);
+	n1 = FIELD_GET(PN_N_FIELD_N1, get_unaligned_le16(&params->n_bits));
+	k = FIELD_GET(PN_K_FIELD_K, params->k_bits);
+
+	if (n1 < MIN_MTU) {
+		if (debug & DBG_ERRORS)
+			pr_info("%s N1 out of range in PN\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (i) {
+	case 0x00:
+		ftype = UIH;
+		break;
+	case 0x01:
+		ftype = UI;
+		break;
+	case 0x02: /* I frames are not supported */
+		if (debug & DBG_ERRORS)
+			pr_info("%s unsupported I frame request in PN\n",
+				__func__);
+		return -EINVAL;
+	default:
+		if (debug & DBG_ERRORS)
+			pr_info("%s i out of range in PN\n", __func__);
+		return -EINVAL;
+	}
+
+	if (!cr && gsm->initiator) {
+		if (adaption != dlci->adaption) {
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid adaption %d in PN\n",
+					__func__, adaption);
+			return -EINVAL;
+		}
+		if (prio != dlci->prio) {
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid priority %d in PN",
+					__func__, prio);
+			return -EINVAL;
+		}
+		if (n1 > gsm->mru || n1 > dlci->mtu) {
+			/* We requested a frame size but the other party wants
+			 * to send larger frames. The standard allows only a
+			 * smaller response value than requested (5.4.6.3.1).
+			 */
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid N1 %d in PN\n", __func__,
+					n1);
+			return -EINVAL;
+		}
+		dlci->mtu = n1;
+		if (ftype != dlci->ftype) {
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid i %d in PN\n", __func__, i);
+			return -EINVAL;
+		}
+		if (ftype != UI && ftype != UIH && k > dlci->k) {
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid k %d in PN\n", __func__, k);
+			return -EINVAL;
+		}
+		dlci->k = k;
+	} else if (cr && !gsm->initiator) {
+		/* Only convergence layer type 1 and 2 are supported. */
+		if (adaption != 1 && adaption != 2) {
+			if (debug & DBG_ERRORS)
+				pr_info("%s invalid adaption %d in PN\n",
+					__func__, adaption);
+			return -EINVAL;
+		}
+		dlci->adaption = adaption;
+		if (n1 > gsm->mru) {
+			/* Propose a smaller value */
+			dlci->mtu = gsm->mru;
+		} else if (n1 > MAX_MTU) {
+			/* Propose a smaller value */
+			dlci->mtu = MAX_MTU;
+		} else {
+			dlci->mtu = n1;
+		}
+		dlci->prio = prio;
+		dlci->ftype = ftype;
+		dlci->k = k;
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  *	gsm_control_modem	-	modem status received
  *	@gsm: GSM channel
@@ -1503,6 +1693,65 @@ static void gsm_control_modem(struct gsm_mux *gsm, const u8 *data, int clen)
 	gsm_control_reply(gsm, CMD_MSC, data, clen);
 }
 
+/**
+ * gsm_control_negotiation	-	parameter negotiation received
+ * @gsm: GSM channel
+ * @cr: command/response flag
+ * @data: data following command
+ * @dlen: data length
+ *
+ * We have received a parameter negotiation message. This is used by
+ * the GSM mux protocol to configure protocol parameters for a new DLCI.
+ */
+static void gsm_control_negotiation(struct gsm_mux *gsm, unsigned int cr,
+				    const u8 *data, unsigned int dlen)
+{
+	unsigned int addr;
+	struct gsm_dlci_param_bits pn_reply;
+	struct gsm_dlci *dlci;
+	struct gsm_dlci_param_bits *params;
+
+	if (dlen < sizeof(struct gsm_dlci_param_bits))
+		return;
+
+	/* Invalid DLCI? */
+	params = (struct gsm_dlci_param_bits *)data;
+	addr = FIELD_GET(PN_D_FIELD_DLCI, params->d_bits);
+	if (addr == 0 || addr >= NUM_DLCI || !gsm->dlci[addr])
+		return;
+	dlci = gsm->dlci[addr];
+
+	/* Too late for parameter negotiation? */
+	if ((!cr && dlci->state == DLCI_OPENING) || dlci->state == DLCI_OPEN)
+		return;
+
+	/* Process the received parameters */
+	if (gsm_process_negotiation(gsm, addr, cr, params) != 0) {
+		/* Negotiation failed. Close the link. */
+		if (debug & DBG_ERRORS)
+			pr_info("%s PN failed\n", __func__);
+		gsm_dlci_close(dlci);
+		return;
+	}
+
+	if (cr) {
+		/* Reply command with accepted parameters. */
+		if (gsm_encode_params(dlci, &pn_reply) == 0)
+			gsm_control_reply(gsm, CMD_PN, (const u8 *)&pn_reply,
+					  sizeof(pn_reply));
+		else if (debug & DBG_ERRORS)
+			pr_info("%s PN invalid\n", __func__);
+	} else if (dlci->state == DLCI_CONFIGURE) {
+		/* Proceed with link setup by sending SABM before UA */
+		dlci->state = DLCI_OPENING;
+		gsm_command(gsm, dlci->addr, SABM|PF);
+		mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+	} else {
+		if (debug & DBG_ERRORS)
+			pr_info("%s PN in invalid state\n", __func__);
+	}
+}
+
 /**
  *	gsm_control_rls		-	remote line status
  *	@gsm: GSM channel
@@ -1612,8 +1861,12 @@ static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
 		/* Modem wishes to enter power saving state */
 		gsm_control_reply(gsm, CMD_PSC, NULL, 0);
 		break;
+		/* Optional commands */
+	case CMD_PN:
+		/* Modem sends a parameter negotiation command */
+		gsm_control_negotiation(gsm, 1, data, clen);
+		break;
 		/* Optional unsupported commands */
-	case CMD_PN:	/* Parameter negotiation */
 	case CMD_RPN:	/* Remote port negotiation */
 	case CMD_SNC:	/* Service negotiation command */
 	default:
@@ -1646,8 +1899,8 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
 	spin_lock_irqsave(&gsm->control_lock, flags);
 
 	ctrl = gsm->pending_cmd;
-	/* Does the reply match our command */
 	command |= 1;
+	/* Does the reply match our command */
 	if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) {
 		/* Our command was replied to, kill the retry timer */
 		del_timer(&gsm->t2_timer);
@@ -1657,6 +1910,9 @@ static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
 			ctrl->error = -EOPNOTSUPP;
 		ctrl->done = 1;
 		wake_up(&gsm->event);
+	/* Or did we receive the PN response to our PN command */
+	} else if (command == CMD_PN) {
+		gsm_control_negotiation(gsm, 0, data, clen);
 	}
 	spin_unlock_irqrestore(&gsm->control_lock, flags);
 }
@@ -1834,6 +2090,32 @@ static void gsm_dlci_open(struct gsm_dlci *dlci)
 	wake_up(&dlci->gsm->event);
 }
 
+/**
+ * gsm_dlci_negotiate	-	start parameter negotiation
+ * @dlci: DLCI to open
+ *
+ * Starts the parameter negotiation for the new DLCI. This needs to be done
+ * before the DLCI initialized the channel via SABM.
+ */
+static int gsm_dlci_negotiate(struct gsm_dlci *dlci)
+{
+	struct gsm_mux *gsm = dlci->gsm;
+	struct gsm_dlci_param_bits params;
+	int ret;
+
+	ret = gsm_encode_params(dlci, &params);
+	if (ret != 0)
+		return ret;
+
+	/* We cannot asynchronous wait for the command response with
+	 * gsm_command() and gsm_control_wait() at this point.
+	 */
+	ret = gsm_control_command(gsm, CMD_PN, (const u8 *)&params,
+				  sizeof(params));
+
+	return ret;
+}
+
 /**
  *	gsm_dlci_t1		-	T1 timer expiry
  *	@t: timer contained in the DLCI that opened
@@ -1855,6 +2137,14 @@ static void gsm_dlci_t1(struct timer_list *t)
 	struct gsm_mux *gsm = dlci->gsm;
 
 	switch (dlci->state) {
+	case DLCI_CONFIGURE:
+		if (dlci->retries && gsm_dlci_negotiate(dlci) == 0) {
+			dlci->retries--;
+			mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+		} else {
+			gsm_dlci_begin_close(dlci); /* prevent half open link */
+		}
+		break;
 	case DLCI_OPENING:
 		if (dlci->retries) {
 			dlci->retries--;
@@ -1893,17 +2183,46 @@ static void gsm_dlci_t1(struct timer_list *t)
  *	to the modem which should then reply with a UA or ADM, at which point
  *	we will move into open state. Opening is done asynchronously with retry
  *	running off timers and the responses.
+ *	Parameter negotiation is performed before SABM if required.
  */
 
 static void gsm_dlci_begin_open(struct gsm_dlci *dlci)
 {
-	struct gsm_mux *gsm = dlci->gsm;
-	if (dlci->state == DLCI_OPEN || dlci->state == DLCI_OPENING)
+	struct gsm_mux *gsm = dlci ? dlci->gsm : NULL;
+	bool need_pn = false;
+
+	if (!gsm)
 		return;
-	dlci->retries = gsm->n2;
-	dlci->state = DLCI_OPENING;
-	gsm_command(dlci->gsm, dlci->addr, SABM|PF);
-	mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+
+	if (dlci->addr != 0) {
+		if (gsm->adaption != 1 || gsm->adaption != dlci->adaption)
+			need_pn = true;
+		if (dlci->prio != (roundup(dlci->addr + 1, 8) - 1))
+			need_pn = true;
+		if (gsm->ftype != dlci->ftype)
+			need_pn = true;
+	}
+
+	switch (dlci->state) {
+	case DLCI_CLOSED:
+	case DLCI_CLOSING:
+		dlci->retries = gsm->n2;
+		if (!need_pn) {
+			dlci->state = DLCI_OPENING;
+			gsm_command(gsm, dlci->addr, SABM|PF);
+		} else {
+			/* Configure DLCI before setup */
+			dlci->state = DLCI_CONFIGURE;
+			if (gsm_dlci_negotiate(dlci) != 0) {
+				gsm_dlci_close(dlci);
+				return;
+			}
+		}
+		mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+		break;
+	default:
+		break;
+	}
 }
 
 /**
-- 
GitLab


From a3be423f0657e603b45998ef2bb9e5d27dc226c3 Mon Sep 17 00:00:00 2001
From: Shaomin Deng <dengshaomin@cdjrlc.com>
Date: Sat, 5 Nov 2022 11:26:56 -0400
Subject: [PATCH 1148/4122] tty: n_gsm: Delete unneeded semicolon

Delete the unneeded semicolon after curly braces.

Signed-off-by: Shaomin Deng <dengshaomin@cdjrlc.com>
Link: https://lore.kernel.org/r/20221105152656.4638-1-dengshaomin@cdjrlc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_gsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index cde1e846c81e3..4ef88e6b18705 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1379,7 +1379,7 @@ static int gsm_dlci_data_sweep(struct gsm_mux *gsm)
 		}
 		if (!sent)
 			break;
-	};
+	}
 
 	return ret;
 }
-- 
GitLab


From a055204b063ade914e6dd6e270d3c2c0453a3cf5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 2 Sep 2022 17:55:25 -0700
Subject: [PATCH 1149/4122] leds: gpio: switch to using devm_fwnode_gpiod_get()

devm_fwnode_get_gpiod_from_child() is going away as the name is too
unwieldy, let's switch to using the new devm_fwnode_gpiod_get().

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/leds/leds-gpio.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index 092eb59a7d325..ce4e79939731d 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -151,9 +151,8 @@ static struct gpio_leds_priv *gpio_leds_create(struct platform_device *pdev)
 		 * will be updated after LED class device is registered,
 		 * Only then the final LED name is known.
 		 */
-		led.gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL, child,
-							     GPIOD_ASIS,
-							     NULL);
+		led.gpiod = devm_fwnode_gpiod_get(dev, child, NULL, GPIOD_ASIS,
+						  NULL);
 		if (IS_ERR(led.gpiod)) {
 			fwnode_handle_put(child);
 			return ERR_CAST(led.gpiod);
-- 
GitLab


From 17521f263fc0ee2c202d5a762679c0ff0cf24b80 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 2 Sep 2022 17:55:26 -0700
Subject: [PATCH 1150/4122] leds: lgm-sso: switch to using
 devm_fwnode_gpiod_get()

devm_fwnode_get_gpiod_from_child() is going away as the name is too
unwieldy, let's switch to using the new devm_fwnode_gpiod_get().

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/leds/blink/leds-lgm-sso.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c
index 6f270c0272fb1..35c61311e7fd8 100644
--- a/drivers/leds/blink/leds-lgm-sso.c
+++ b/drivers/leds/blink/leds-lgm-sso.c
@@ -635,9 +635,8 @@ __sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
 		led->priv = priv;
 		desc = &led->desc;
 
-		led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL,
-							      fwnode_child,
-							      GPIOD_ASIS, NULL);
+		led->gpiod = devm_fwnode_gpiod_get(dev, fwnode_child, NULL,
+						   GPIOD_ASIS, NULL);
 		if (IS_ERR(led->gpiod)) {
 			ret = dev_err_probe(dev, PTR_ERR(led->gpiod), "led: get gpio fail!\n");
 			goto __dt_err;
-- 
GitLab


From 2fe8e1dcf937272c5425e69947819894fcf077a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 2 Sep 2022 17:55:27 -0700
Subject: [PATCH 1151/4122] gpiolib: remove
 devm_fwnode_get_[index_]gpiod_from_child()

Now that there are no more users of these APIs in the kernel we can
remove them.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/gpio/consumer.h | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 36460ced060b0..45da8f137fe5d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -581,27 +581,6 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev,
 					   flags, label);
 }
 
-static inline
-struct gpio_desc *devm_fwnode_get_index_gpiod_from_child(struct device *dev,
-						const char *con_id, int index,
-						struct fwnode_handle *child,
-						enum gpiod_flags flags,
-						const char *label)
-{
-	return devm_fwnode_gpiod_get_index(dev, child, con_id, index,
-					   flags, label);
-}
-
-static inline
-struct gpio_desc *devm_fwnode_get_gpiod_from_child(struct device *dev,
-						   const char *con_id,
-						   struct fwnode_handle *child,
-						   enum gpiod_flags flags,
-						   const char *label)
-{
-	return devm_fwnode_gpiod_get_index(dev, child, con_id, 0, flags, label);
-}
-
 #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_OF_GPIO)
 struct device_node;
 
-- 
GitLab


From defbab270d45e32b068e7e73c3567232d745c60f Mon Sep 17 00:00:00 2001
From: Matt Redfearn <matt.redfearn@mips.com>
Date: Tue, 27 Sep 2022 14:52:56 -0700
Subject: [PATCH 1152/4122] include/uapi/linux/swab: Fix potentially missing
 __always_inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining
of some byteswap operations") added __always_inline to swab functions
and commit 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to
userspace headers") added a definition of __always_inline for use in
exported headers when the kernel's compiler.h is not available.

However, since swab.h does not include stddef.h, if the header soup does
not indirectly include it, the definition of __always_inline is missing,
resulting in a compilation failure, which was observed compiling the
perf tool using exported headers containing this commit:

In file included from /usr/include/linux/byteorder/little_endian.h:12:0,
                 from /usr/include/asm/byteorder.h:14,
                 from tools/include/uapi/linux/perf_event.h:20,
                 from perf.h:8,
                 from builtin-bench.c:18:
/usr/include/linux/swab.h:160:8: error: unknown type name `__always_inline'
 static __always_inline __u16 __swab16p(const __u16 *p)

Fix this by replacing the inclusion of linux/compiler.h with
linux/stddef.h to ensure that we pick up that definition if required,
without relying on it's indirect inclusion. compiler.h is then included
indirectly, via stddef.h.

Fixes: 283d75737837 ("uapi/linux/stddef.h: Provide __always_inline to userspace headers")
Signed-off-by: Matt Redfearn <matt.redfearn@mips.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Petr Vaněk <arkamar@atlas.cz>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/linux/swab.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h
index 0723a9cce747c..01717181339eb 100644
--- a/include/uapi/linux/swab.h
+++ b/include/uapi/linux/swab.h
@@ -3,7 +3,7 @@
 #define _UAPI_LINUX_SWAB_H
 
 #include <linux/types.h>
-#include <linux/compiler.h>
+#include <linux/stddef.h>
 #include <asm/bitsperlong.h>
 #include <asm/swab.h>
 
-- 
GitLab


From 8afe82550240640617abfb3d6ba2c7579261e7fa Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 8 Nov 2022 15:38:52 +0200
Subject: [PATCH 1153/4122] gpiolib: of: Prepare of_gpiochip_add() /
 of_gpiochip_remove() for fwnode

GPIO library is getting rid of of_node, fwnode should be utilized instead.
Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index be9c34cca3225..000020eb78d83 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -1104,9 +1104,11 @@ static int of_gpiochip_add_pin_range(struct gpio_chip *chip) { return 0; }
 
 int of_gpiochip_add(struct gpio_chip *chip)
 {
+	struct device_node *np;
 	int ret;
 
-	if (!chip->of_node)
+	np = to_of_node(chip->fwnode);
+	if (!np)
 		return 0;
 
 	if (!chip->of_xlate) {
@@ -1123,18 +1125,18 @@ int of_gpiochip_add(struct gpio_chip *chip)
 	if (ret)
 		return ret;
 
-	of_node_get(chip->of_node);
+	fwnode_handle_get(chip->fwnode);
 
 	ret = of_gpiochip_scan_gpios(chip);
 	if (ret)
-		of_node_put(chip->of_node);
+		fwnode_handle_put(chip->fwnode);
 
 	return ret;
 }
 
 void of_gpiochip_remove(struct gpio_chip *chip)
 {
-	of_node_put(chip->of_node);
+	fwnode_handle_put(chip->fwnode);
 }
 
 void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev)
-- 
GitLab


From 27043a7d500c4a3debb899c28bbf492492f64e58 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 8 Nov 2022 15:38:53 +0200
Subject: [PATCH 1154/4122] gpiolib: of: Integrate
 of_gpiochip_init_valid_mask() into gpiochip_init_valid_mask()

In preparation to complete fwnode switch, integrate
of_gpiochip_init_valid_mask() into gpiochip_init_valid_mask().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 42 ------------------------------
 drivers/gpio/gpiolib-of.h |  5 ----
 drivers/gpio/gpiolib.c    | 54 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 000020eb78d83..4be3c21aa7188 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -112,24 +112,6 @@ static struct gpio_desc *of_xlate_and_get_gpiod_flags(struct gpio_chip *chip,
 	return gpiochip_get_desc(chip, ret);
 }
 
-/**
- * of_gpio_need_valid_mask() - figure out if the OF GPIO driver needs
- * to set the .valid_mask
- * @gc: the target gpio_chip
- *
- * Return: true if the valid mask needs to be set
- */
-bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
-{
-	int size;
-	const struct device_node *np = gc->of_node;
-
-	size = of_property_count_u32_elems(np,  "gpio-reserved-ranges");
-	if (size > 0 && size % 2 == 0)
-		return true;
-	return false;
-}
-
 /*
  * Overrides stated polarity of a gpio line and warns when there is a
  * discrepancy.
@@ -989,28 +971,6 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc)
 }
 EXPORT_SYMBOL_GPL(of_mm_gpiochip_remove);
 
-static void of_gpiochip_init_valid_mask(struct gpio_chip *chip)
-{
-	int len, i;
-	u32 start, count;
-	struct device_node *np = chip->of_node;
-
-	len = of_property_count_u32_elems(np,  "gpio-reserved-ranges");
-	if (len < 0 || len % 2 != 0)
-		return;
-
-	for (i = 0; i < len; i += 2) {
-		of_property_read_u32_index(np, "gpio-reserved-ranges",
-					   i, &start);
-		of_property_read_u32_index(np, "gpio-reserved-ranges",
-					   i + 1, &count);
-		if (start >= chip->ngpio || start + count > chip->ngpio)
-			continue;
-
-		bitmap_clear(chip->valid_mask, start, count);
-	}
-};
-
 #ifdef CONFIG_PINCTRL
 static int of_gpiochip_add_pin_range(struct gpio_chip *chip)
 {
@@ -1119,8 +1079,6 @@ int of_gpiochip_add(struct gpio_chip *chip)
 	if (chip->of_gpio_n_cells > MAX_PHANDLE_ARGS)
 		return -EINVAL;
 
-	of_gpiochip_init_valid_mask(chip);
-
 	ret = of_gpiochip_add_pin_range(chip);
 	if (ret)
 		return ret;
diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h
index 8af2bc899aab2..2c32a332ede59 100644
--- a/drivers/gpio/gpiolib-of.h
+++ b/drivers/gpio/gpiolib-of.h
@@ -14,7 +14,6 @@ struct gpio_desc *of_find_gpio(struct device *dev,
 int of_gpiochip_add(struct gpio_chip *gc);
 void of_gpiochip_remove(struct gpio_chip *gc);
 int of_gpio_get_count(struct device *dev, const char *con_id);
-bool of_gpio_need_valid_mask(const struct gpio_chip *gc);
 void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev);
 #else
 static inline struct gpio_desc *of_find_gpio(struct device *dev,
@@ -30,10 +29,6 @@ static inline int of_gpio_get_count(struct device *dev, const char *con_id)
 {
 	return 0;
 }
-static inline bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
-{
-	return false;
-}
 static inline void of_gpio_dev_init(struct gpio_chip *gc,
 				    struct gpio_device *gdev)
 {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8faedca6b14f..11fb7ec883e9f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -445,9 +445,21 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc)
 	return p;
 }
 
+static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc)
+{
+	int size;
+
+	/* Format is "start, count, ..." */
+	size = fwnode_property_count_u32(gc->fwnode, "gpio-reserved-ranges");
+	if (size > 0 && size % 2 == 0)
+		return size;
+
+	return 0;
+}
+
 static int gpiochip_alloc_valid_mask(struct gpio_chip *gc)
 {
-	if (!(of_gpio_need_valid_mask(gc) || gc->init_valid_mask))
+	if (!(gpiochip_count_reserved_ranges(gc) || gc->init_valid_mask))
 		return 0;
 
 	gc->valid_mask = gpiochip_allocate_mask(gc);
@@ -457,8 +469,48 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc)
 	return 0;
 }
 
+static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc)
+{
+	unsigned int size;
+	u32 *ranges;
+	int ret;
+
+	size = gpiochip_count_reserved_ranges(gc);
+	if (size == 0)
+		return 0;
+
+	ranges = kmalloc_array(size, sizeof(*ranges), GFP_KERNEL);
+	if (!ranges)
+		return -ENOMEM;
+
+	ret = fwnode_property_read_u32_array(gc->fwnode, "gpio-reserved-ranges", ranges, size);
+	if (ret) {
+		kfree(ranges);
+		return ret;
+	}
+
+	while (size) {
+		u32 count = ranges[--size];
+		u32 start = ranges[--size];
+
+		if (start >= gc->ngpio || start + count > gc->ngpio)
+			continue;
+
+		bitmap_clear(gc->valid_mask, start, count);
+	}
+
+	kfree(ranges);
+	return 0;
+}
+
 static int gpiochip_init_valid_mask(struct gpio_chip *gc)
 {
+	int ret;
+
+	ret = gpiochip_apply_reserved_ranges(gc);
+	if (ret)
+		return ret;
+
 	if (gc->init_valid_mask)
 		return gc->init_valid_mask(gc,
 					   gc->valid_mask,
-- 
GitLab


From bdf1da5df9da680589a7f74448dd0a94dd3e1446 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>
Date: Mon, 7 Nov 2022 15:50:57 +0100
Subject: [PATCH 1155/4122] RDMA/siw: Fix immediate work request flush to
 completion queue

Correctly set send queue element opcode during immediate work request
flushing in post sendqueue operation, if the QP is in ERROR state.
An undefined ocode value results in out-of-bounds access to an array
for mapping the opcode between siw internal and RDMA core representation
in work completion generation. It resulted in a KASAN BUG report
of type 'global-out-of-bounds' during NFSoRDMA testing.

This patch further fixes a potential case of a malicious user which may
write undefined values for completion queue elements status or opcode,
if the CQ is memory mapped to user land. It avoids the same out-of-bounds
access to arrays for status and opcode mapping as described above.

Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Fixes: b0fff7317bb4 ("rdma/siw: completion queue methods")
Reported-by: Olga Kornievskaia <kolga@netapp.com>
Reviewed-by: Tom Talpey <tom@talpey.com>
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20221107145057.895747-1-bmt@zurich.ibm.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/siw/siw_cq.c    | 24 ++++++++++++++--
 drivers/infiniband/sw/siw/siw_verbs.c | 40 ++++++++++++++++++++++++---
 2 files changed, 58 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index d68e37859e73b..acc7bcd538b53 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -56,8 +56,6 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
 	if (READ_ONCE(cqe->flags) & SIW_WQE_VALID) {
 		memset(wc, 0, sizeof(*wc));
 		wc->wr_id = cqe->id;
-		wc->status = map_cqe_status[cqe->status].ib;
-		wc->opcode = map_wc_opcode[cqe->opcode];
 		wc->byte_len = cqe->bytes;
 
 		/*
@@ -71,10 +69,32 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
 				wc->wc_flags = IB_WC_WITH_INVALIDATE;
 			}
 			wc->qp = cqe->base_qp;
+			wc->opcode = map_wc_opcode[cqe->opcode];
+			wc->status = map_cqe_status[cqe->status].ib;
 			siw_dbg_cq(cq,
 				   "idx %u, type %d, flags %2x, id 0x%pK\n",
 				   cq->cq_get % cq->num_cqe, cqe->opcode,
 				   cqe->flags, (void *)(uintptr_t)cqe->id);
+		} else {
+			/*
+			 * A malicious user may set invalid opcode or
+			 * status in the user mmapped CQE array.
+			 * Sanity check and correct values in that case
+			 * to avoid out-of-bounds access to global arrays
+			 * for opcode and status mapping.
+			 */
+			u8 opcode = cqe->opcode;
+			u16 status = cqe->status;
+
+			if (opcode >= SIW_NUM_OPCODES) {
+				opcode = 0;
+				status = IB_WC_GENERAL_ERR;
+			} else if (status >= SIW_NUM_WC_STATUS) {
+				status = IB_WC_GENERAL_ERR;
+			}
+			wc->opcode = map_wc_opcode[opcode];
+			wc->status = map_cqe_status[status].ib;
+
 		}
 		WRITE_ONCE(cqe->flags, 0);
 		cq->cq_get++;
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 3e814cfb298cf..906fde1a2a0de 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -676,13 +676,45 @@ static int siw_copy_inline_sgl(const struct ib_send_wr *core_wr,
 static int siw_sq_flush_wr(struct siw_qp *qp, const struct ib_send_wr *wr,
 			   const struct ib_send_wr **bad_wr)
 {
-	struct siw_sqe sqe = {};
 	int rv = 0;
 
 	while (wr) {
-		sqe.id = wr->wr_id;
-		sqe.opcode = wr->opcode;
-		rv = siw_sqe_complete(qp, &sqe, 0, SIW_WC_WR_FLUSH_ERR);
+		struct siw_sqe sqe = {};
+
+		switch (wr->opcode) {
+		case IB_WR_RDMA_WRITE:
+			sqe.opcode = SIW_OP_WRITE;
+			break;
+		case IB_WR_RDMA_READ:
+			sqe.opcode = SIW_OP_READ;
+			break;
+		case IB_WR_RDMA_READ_WITH_INV:
+			sqe.opcode = SIW_OP_READ_LOCAL_INV;
+			break;
+		case IB_WR_SEND:
+			sqe.opcode = SIW_OP_SEND;
+			break;
+		case IB_WR_SEND_WITH_IMM:
+			sqe.opcode = SIW_OP_SEND_WITH_IMM;
+			break;
+		case IB_WR_SEND_WITH_INV:
+			sqe.opcode = SIW_OP_SEND_REMOTE_INV;
+			break;
+		case IB_WR_LOCAL_INV:
+			sqe.opcode = SIW_OP_INVAL_STAG;
+			break;
+		case IB_WR_REG_MR:
+			sqe.opcode = SIW_OP_REG_MR;
+			break;
+		default:
+			rv = -EINVAL;
+			break;
+		}
+		if (!rv) {
+			sqe.id = wr->wr_id;
+			rv = siw_sqe_complete(qp, &sqe, 0,
+					      SIW_WC_WR_FLUSH_ERR);
+		}
 		if (rv) {
 			if (bad_wr)
 				*bad_wr = wr;
-- 
GitLab


From ce8cc75c7419ad54cb99437543a54c97c7446db5 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Tue, 18 Oct 2022 16:34:24 +0800
Subject: [PATCH 1156/4122] drivers: staging: r8188eu: Fix
 sleep-in-atomic-context bug in rtw_join_timeout_handler

The rtw_join_timeout_handler() is a timer handler that
runs in atomic context, but it could call msleep().
As a result, the sleep-in-atomic-context bug will happen.
The process is shown below:

     (atomic context)
rtw_join_timeout_handler
 _rtw_join_timeout_handler
  rtw_do_join
   rtw_select_and_join_from_scanned_queue
    rtw_indicate_disconnect
     rtw_lps_ctrl_wk_cmd
      lps_ctrl_wk_hdl
       LPS_Leave
        LPS_RF_ON_check
         msleep //sleep in atomic context

Fix by removing msleep() and replacing with mdelay().

Fixes: 15865124feed ("staging: r8188eu: introduce new core dir for RTL8188eu driver")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Link: https://lore.kernel.org/r/20221018083424.79741-1-duoming@zju.edu.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_pwrctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_pwrctrl.c b/drivers/staging/r8188eu/core/rtw_pwrctrl.c
index 870d81735b8dc..5290ac36f08c1 100644
--- a/drivers/staging/r8188eu/core/rtw_pwrctrl.c
+++ b/drivers/staging/r8188eu/core/rtw_pwrctrl.c
@@ -273,7 +273,7 @@ static s32 LPS_RF_ON_check(struct adapter *padapter, u32 delay_ms)
 			err = -1;
 			break;
 		}
-		msleep(1);
+		mdelay(1);
 	}
 
 	return err;
-- 
GitLab


From 8ce256540f41c84cb910a681c2cb84e8c80ab8db Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 8 Nov 2022 14:34:46 -0600
Subject: [PATCH 1157/4122] staging: ks7010: Avoid clashing function prototypes

When built with Control Flow Integrity, function prototypes between
caller and function declaration must match. These mismatches are visible
at compile time with the new -Wcast-function-type-strict in Clang[1].

Fix a total of 27 warnings like these:

drivers/staging/ks7010/ks_wlan_net.c:2415:2: warning: cast from 'int (*)(struct net_device *, struct iw_request_info *, struct iw_point *, char *)' to 'iw_handler' (aka 'int (*)(struct net_device *, struct iw_request_info *, union iwreq_data *, char *)') converts to incompatible function type [-Wcast-function-type-strict]
        (iw_handler)ks_wlan_get_firmware_version,/* 3 KS_WLAN_GET_FIRM_VERSION */
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The ks_wlan_net Wireless Extension handler callbacks (iw_handler) use a
union for the data argument. Actually use the union and perform explicit
member selection in the function body instead of having a function
prototype mismatch. There are no resulting binary differences
before/after changes.

These changes were made partly manually and partly with the help of
Coccinelle.

Link: https://reviews.llvm.org/D134831 [1]
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/8d2ceee1248b5a76e9b6c379f578e65482c91168.1667934775.git.gustavoars@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/ks7010/ks_wlan_net.c | 248 ++++++++++++++-------------
 1 file changed, 126 insertions(+), 122 deletions(-)

diff --git a/drivers/staging/ks7010/ks_wlan_net.c b/drivers/staging/ks7010/ks_wlan_net.c
index 7e8d37c169f0f..044c807ca0223 100644
--- a/drivers/staging/ks7010/ks_wlan_net.c
+++ b/drivers/staging/ks7010/ks_wlan_net.c
@@ -1763,8 +1763,8 @@ static struct iw_statistics *ks_get_wireless_stats(struct net_device *dev)
 }
 
 static int ks_wlan_set_stop_request(struct net_device *dev,
-				    struct iw_request_info *info, __u32 *uwrq,
-				    char *extra)
+				    struct iw_request_info *info,
+				    union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
@@ -1772,7 +1772,7 @@ static int ks_wlan_set_stop_request(struct net_device *dev,
 		return -EPERM;
 
 	/* for SLEEP MODE */
-	if (!(*uwrq))
+	if (!(uwrq->mode))
 		return -EINVAL;
 
 	hostif_sme_enqueue(priv, SME_STOP_REQUEST);
@@ -1786,7 +1786,9 @@ static int ks_wlan_set_mlme(struct net_device *dev,
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
-	__u32 mode = 1;
+	union iwreq_data uwrq;
+
+	uwrq.mode = 1;
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
@@ -1799,13 +1801,14 @@ static int ks_wlan_set_mlme(struct net_device *dev,
 	    mlme->reason_code == WLAN_REASON_MIC_FAILURE)
 		return 0;
 
-	return ks_wlan_set_stop_request(dev, NULL, &mode, NULL);
+	return ks_wlan_set_stop_request(dev, NULL, &uwrq, NULL);
 }
 
 static int ks_wlan_get_firmware_version(struct net_device *dev,
 					struct iw_request_info *info,
-					struct iw_point *dwrq, char *extra)
+					union iwreq_data *uwrq, char *extra)
 {
+	struct iw_point *dwrq = &uwrq->data;
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	dwrq->length = priv->version_size + 1;
@@ -1814,8 +1817,8 @@ static int ks_wlan_get_firmware_version(struct net_device *dev,
 }
 
 static int ks_wlan_set_preamble(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
@@ -1823,17 +1826,17 @@ static int ks_wlan_set_preamble(struct net_device *dev,
 		return -EPERM;
 
 	/* for SLEEP MODE */
-	if (*uwrq != LONG_PREAMBLE && *uwrq != SHORT_PREAMBLE)
+	if (uwrq->mode != LONG_PREAMBLE && uwrq->mode != SHORT_PREAMBLE)
 		return -EINVAL;
 
-	priv->reg.preamble = *uwrq;
+	priv->reg.preamble = uwrq->mode;
 	priv->need_commit |= SME_MODE_SET;
 	return -EINPROGRESS;	/* Call commit handler */
 }
 
 static int ks_wlan_get_preamble(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
@@ -1841,37 +1844,37 @@ static int ks_wlan_get_preamble(struct net_device *dev,
 		return -EPERM;
 
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.preamble;
+	uwrq->mode = priv->reg.preamble;
 	return 0;
 }
 
 static int ks_wlan_set_power_mgmt(struct net_device *dev,
-				  struct iw_request_info *info, __u32 *uwrq,
-				  char *extra)
+				  struct iw_request_info *info,
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 
-	if (*uwrq != POWER_MGMT_ACTIVE &&
-	    *uwrq != POWER_MGMT_SAVE1 &&
-	    *uwrq != POWER_MGMT_SAVE2)
+	if (uwrq->mode != POWER_MGMT_ACTIVE &&
+	    uwrq->mode != POWER_MGMT_SAVE1 &&
+	    uwrq->mode != POWER_MGMT_SAVE2)
 		return -EINVAL;
 
-	if ((*uwrq == POWER_MGMT_SAVE1 || *uwrq == POWER_MGMT_SAVE2) &&
+	if ((uwrq->mode == POWER_MGMT_SAVE1 || uwrq->mode == POWER_MGMT_SAVE2) &&
 	    (priv->reg.operation_mode != MODE_INFRASTRUCTURE))
 		return -EINVAL;
 
-	priv->reg.power_mgmt = *uwrq;
+	priv->reg.power_mgmt = uwrq->mode;
 	hostif_sme_enqueue(priv, SME_POW_MNGMT_REQUEST);
 
 	return 0;
 }
 
 static int ks_wlan_get_power_mgmt(struct net_device *dev,
-				  struct iw_request_info *info, __u32 *uwrq,
-				  char *extra)
+				  struct iw_request_info *info,
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
@@ -1879,13 +1882,13 @@ static int ks_wlan_get_power_mgmt(struct net_device *dev,
 		return -EPERM;
 
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.power_mgmt;
+	uwrq->mode = priv->reg.power_mgmt;
 	return 0;
 }
 
 static int ks_wlan_set_scan_type(struct net_device *dev,
-				 struct iw_request_info *info, __u32 *uwrq,
-				 char *extra)
+				 struct iw_request_info *info,
+				 union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
@@ -1893,39 +1896,39 @@ static int ks_wlan_set_scan_type(struct net_device *dev,
 		return -EPERM;
 	/* for SLEEP MODE */
 
-	if (*uwrq != ACTIVE_SCAN && *uwrq != PASSIVE_SCAN)
+	if (uwrq->mode != ACTIVE_SCAN && uwrq->mode != PASSIVE_SCAN)
 		return -EINVAL;
 
-	priv->reg.scan_type = *uwrq;
+	priv->reg.scan_type = uwrq->mode;
 	return 0;
 }
 
 static int ks_wlan_get_scan_type(struct net_device *dev,
-				 struct iw_request_info *info, __u32 *uwrq,
-				 char *extra)
+				 struct iw_request_info *info,
+				 union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.scan_type;
+	uwrq->mode = priv->reg.scan_type;
 	return 0;
 }
 
 static int ks_wlan_set_beacon_lost(struct net_device *dev,
-				   struct iw_request_info *info, __u32 *uwrq,
-				   char *extra)
+				   struct iw_request_info *info,
+				   union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	if (*uwrq > BEACON_LOST_COUNT_MAX)
+	if (uwrq->mode > BEACON_LOST_COUNT_MAX)
 		return -EINVAL;
 
-	priv->reg.beacon_lost_count = *uwrq;
+	priv->reg.beacon_lost_count = uwrq->mode;
 
 	if (priv->reg.operation_mode == MODE_INFRASTRUCTURE) {
 		priv->need_commit |= SME_MODE_SET;
@@ -1936,101 +1939,101 @@ static int ks_wlan_set_beacon_lost(struct net_device *dev,
 }
 
 static int ks_wlan_get_beacon_lost(struct net_device *dev,
-				   struct iw_request_info *info, __u32 *uwrq,
-				   char *extra)
+				   struct iw_request_info *info,
+				   union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.beacon_lost_count;
+	uwrq->mode = priv->reg.beacon_lost_count;
 	return 0;
 }
 
 static int ks_wlan_set_phy_type(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 
-	if (*uwrq != D_11B_ONLY_MODE &&
-	    *uwrq != D_11G_ONLY_MODE &&
-	    *uwrq != D_11BG_COMPATIBLE_MODE)
+	if (uwrq->mode != D_11B_ONLY_MODE &&
+	    uwrq->mode != D_11G_ONLY_MODE &&
+	    uwrq->mode != D_11BG_COMPATIBLE_MODE)
 		return -EINVAL;
 
 	/* for SLEEP MODE */
-	priv->reg.phy_type = *uwrq;
+	priv->reg.phy_type = uwrq->mode;
 	priv->need_commit |= SME_MODE_SET;
 	return -EINPROGRESS;	/* Call commit handler */
 }
 
 static int ks_wlan_get_phy_type(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.phy_type;
+	uwrq->mode = priv->reg.phy_type;
 	return 0;
 }
 
 static int ks_wlan_set_cts_mode(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	if (*uwrq != CTS_MODE_FALSE && *uwrq != CTS_MODE_TRUE)
+	if (uwrq->mode != CTS_MODE_FALSE && uwrq->mode != CTS_MODE_TRUE)
 		return -EINVAL;
 
-	priv->reg.cts_mode = (*uwrq == CTS_MODE_FALSE) ? *uwrq :
+	priv->reg.cts_mode = (uwrq->mode == CTS_MODE_FALSE) ? uwrq->mode :
 			      (priv->reg.phy_type == D_11G_ONLY_MODE ||
 			       priv->reg.phy_type == D_11BG_COMPATIBLE_MODE) ?
-			       *uwrq : !*uwrq;
+			       uwrq->mode : !uwrq->mode;
 
 	priv->need_commit |= SME_MODE_SET;
 	return -EINPROGRESS;	/* Call commit handler */
 }
 
 static int ks_wlan_get_cts_mode(struct net_device *dev,
-				struct iw_request_info *info, __u32 *uwrq,
-				char *extra)
+				struct iw_request_info *info,
+				union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->reg.cts_mode;
+	uwrq->mode = priv->reg.cts_mode;
 	return 0;
 }
 
 static int ks_wlan_set_sleep_mode(struct net_device *dev,
 				  struct iw_request_info *info,
-				  __u32 *uwrq, char *extra)
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
-	if (*uwrq != SLP_SLEEP &&
-	    *uwrq != SLP_ACTIVE) {
-		netdev_err(dev, "SET_SLEEP_MODE %d error\n", *uwrq);
+	if (uwrq->mode != SLP_SLEEP &&
+	    uwrq->mode != SLP_ACTIVE) {
+		netdev_err(dev, "SET_SLEEP_MODE %d error\n", uwrq->mode);
 		return -EINVAL;
 	}
 
-	priv->sleep_mode = *uwrq;
+	priv->sleep_mode = uwrq->mode;
 	netdev_info(dev, "SET_SLEEP_MODE %d\n", priv->sleep_mode);
 
-	if (*uwrq == SLP_SLEEP)
+	if (uwrq->mode == SLP_SLEEP)
 		hostif_sme_enqueue(priv, SME_STOP_REQUEST);
 
 	hostif_sme_enqueue(priv, SME_SLEEP_REQUEST);
@@ -2040,52 +2043,53 @@ static int ks_wlan_set_sleep_mode(struct net_device *dev,
 
 static int ks_wlan_get_sleep_mode(struct net_device *dev,
 				  struct iw_request_info *info,
-				  __u32 *uwrq, char *extra)
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
-	*uwrq = priv->sleep_mode;
+	uwrq->mode = priv->sleep_mode;
 
 	return 0;
 }
 
 static int ks_wlan_set_wps_enable(struct net_device *dev,
-				  struct iw_request_info *info, __u32 *uwrq,
-				  char *extra)
+				  struct iw_request_info *info,
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	if (*uwrq != 0 && *uwrq != 1)
+	if (uwrq->mode != 0 && uwrq->mode != 1)
 		return -EINVAL;
 
-	priv->wps.wps_enabled = *uwrq;
+	priv->wps.wps_enabled = uwrq->mode;
 	hostif_sme_enqueue(priv, SME_WPS_ENABLE_REQUEST);
 
 	return 0;
 }
 
 static int ks_wlan_get_wps_enable(struct net_device *dev,
-				  struct iw_request_info *info, __u32 *uwrq,
-				  char *extra)
+				  struct iw_request_info *info,
+				  union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->wps.wps_enabled;
-	netdev_info(dev, "return=%d\n", *uwrq);
+	uwrq->mode = priv->wps.wps_enabled;
+	netdev_info(dev, "return=%d\n", uwrq->mode);
 
 	return 0;
 }
 
 static int ks_wlan_set_wps_probe_req(struct net_device *dev,
 				     struct iw_request_info *info,
-				     struct iw_point *dwrq, char *extra)
+				     union iwreq_data *uwrq, char *extra)
 {
+	struct iw_point *dwrq = &uwrq->data;
 	u8 *p = extra;
 	unsigned char len;
 	struct ks_wlan_private *priv = netdev_priv(dev);
@@ -2114,76 +2118,76 @@ static int ks_wlan_set_wps_probe_req(struct net_device *dev,
 }
 
 static int ks_wlan_set_tx_gain(struct net_device *dev,
-			       struct iw_request_info *info, __u32 *uwrq,
-			       char *extra)
+			       struct iw_request_info *info,
+			       union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	if (*uwrq > 0xFF)
+	if (uwrq->mode > 0xFF)
 		return -EINVAL;
 
-	priv->gain.tx_gain = (u8)*uwrq;
+	priv->gain.tx_gain = (u8)uwrq->mode;
 	priv->gain.tx_mode = (priv->gain.tx_gain < 0xFF) ? 1 : 0;
 	hostif_sme_enqueue(priv, SME_SET_GAIN);
 	return 0;
 }
 
 static int ks_wlan_get_tx_gain(struct net_device *dev,
-			       struct iw_request_info *info, __u32 *uwrq,
-			       char *extra)
+			       struct iw_request_info *info,
+			       union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->gain.tx_gain;
+	uwrq->mode = priv->gain.tx_gain;
 	hostif_sme_enqueue(priv, SME_GET_GAIN);
 	return 0;
 }
 
 static int ks_wlan_set_rx_gain(struct net_device *dev,
-			       struct iw_request_info *info, __u32 *uwrq,
-			       char *extra)
+			       struct iw_request_info *info,
+			       union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	if (*uwrq > 0xFF)
+	if (uwrq->mode > 0xFF)
 		return -EINVAL;
 
-	priv->gain.rx_gain = (u8)*uwrq;
+	priv->gain.rx_gain = (u8)uwrq->mode;
 	priv->gain.rx_mode = (priv->gain.rx_gain < 0xFF) ? 1 : 0;
 	hostif_sme_enqueue(priv, SME_SET_GAIN);
 	return 0;
 }
 
 static int ks_wlan_get_rx_gain(struct net_device *dev,
-			       struct iw_request_info *info, __u32 *uwrq,
-			       char *extra)
+			       struct iw_request_info *info,
+			       union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
 	if (priv->sleep_mode == SLP_SLEEP)
 		return -EPERM;
 	/* for SLEEP MODE */
-	*uwrq = priv->gain.rx_gain;
+	uwrq->mode = priv->gain.rx_gain;
 	hostif_sme_enqueue(priv, SME_GET_GAIN);
 	return 0;
 }
 
 static int ks_wlan_get_eeprom_cksum(struct net_device *dev,
-				    struct iw_request_info *info, __u32 *uwrq,
-				    char *extra)
+				    struct iw_request_info *info,
+				    union iwreq_data *uwrq, char *extra)
 {
 	struct ks_wlan_private *priv = netdev_priv(dev);
 
-	*uwrq = priv->eeprom_checksum;
+	uwrq->mode = priv->eeprom_checksum;
 	return 0;
 }
 
@@ -2302,7 +2306,7 @@ static void print_hif_event(struct net_device *dev, int event)
 
 /* get host command history */
 static int ks_wlan_hostt(struct net_device *dev, struct iw_request_info *info,
-			 __u32 *uwrq, char *extra)
+			 union iwreq_data *uwrq, char *extra)
 {
 	int i, event;
 	struct ks_wlan_private *priv = netdev_priv(dev);
@@ -2409,38 +2413,38 @@ static const iw_handler ks_wlan_handler[] = {
 
 /* private_handler */
 static const iw_handler ks_wlan_private_handler[] = {
-	(iw_handler)NULL,			/* 0 */
-	(iw_handler)NULL,			/* 1, KS_WLAN_GET_DRIVER_VERSION */
-	(iw_handler)NULL,			/* 2 */
-	(iw_handler)ks_wlan_get_firmware_version,/* 3 KS_WLAN_GET_FIRM_VERSION */
-	(iw_handler)ks_wlan_set_wps_enable,	/* 4 KS_WLAN_SET_WPS_ENABLE */
-	(iw_handler)ks_wlan_get_wps_enable,	/* 5 KS_WLAN_GET_WPS_ENABLE */
-	(iw_handler)ks_wlan_set_wps_probe_req,	/* 6 KS_WLAN_SET_WPS_PROBE_REQ */
-	(iw_handler)ks_wlan_get_eeprom_cksum,	/* 7 KS_WLAN_GET_CONNECT */
-	(iw_handler)ks_wlan_set_preamble,	/* 8 KS_WLAN_SET_PREAMBLE */
-	(iw_handler)ks_wlan_get_preamble,	/* 9 KS_WLAN_GET_PREAMBLE */
-	(iw_handler)ks_wlan_set_power_mgmt,	/* 10 KS_WLAN_SET_POWER_SAVE */
-	(iw_handler)ks_wlan_get_power_mgmt,	/* 11 KS_WLAN_GET_POWER_SAVE */
-	(iw_handler)ks_wlan_set_scan_type,	/* 12 KS_WLAN_SET_SCAN_TYPE */
-	(iw_handler)ks_wlan_get_scan_type,	/* 13 KS_WLAN_GET_SCAN_TYPE */
-	(iw_handler)ks_wlan_set_rx_gain,	/* 14 KS_WLAN_SET_RX_GAIN */
-	(iw_handler)ks_wlan_get_rx_gain,	/* 15 KS_WLAN_GET_RX_GAIN */
-	(iw_handler)ks_wlan_hostt,		/* 16 KS_WLAN_HOSTT */
-	(iw_handler)NULL,			/* 17 */
-	(iw_handler)ks_wlan_set_beacon_lost,	/* 18 KS_WLAN_SET_BECAN_LOST */
-	(iw_handler)ks_wlan_get_beacon_lost,	/* 19 KS_WLAN_GET_BECAN_LOST */
-	(iw_handler)ks_wlan_set_tx_gain,	/* 20 KS_WLAN_SET_TX_GAIN */
-	(iw_handler)ks_wlan_get_tx_gain,	/* 21 KS_WLAN_GET_TX_GAIN */
-	(iw_handler)ks_wlan_set_phy_type,	/* 22 KS_WLAN_SET_PHY_TYPE */
-	(iw_handler)ks_wlan_get_phy_type,	/* 23 KS_WLAN_GET_PHY_TYPE */
-	(iw_handler)ks_wlan_set_cts_mode,	/* 24 KS_WLAN_SET_CTS_MODE */
-	(iw_handler)ks_wlan_get_cts_mode,	/* 25 KS_WLAN_GET_CTS_MODE */
-	(iw_handler)NULL,			/* 26 */
-	(iw_handler)NULL,			/* 27 */
-	(iw_handler)ks_wlan_set_sleep_mode,	/* 28 KS_WLAN_SET_SLEEP_MODE */
-	(iw_handler)ks_wlan_get_sleep_mode,	/* 29 KS_WLAN_GET_SLEEP_MODE */
-	(iw_handler)NULL,			/* 30 */
-	(iw_handler)NULL,			/* 31 */
+	NULL,				/* 0 */
+	NULL,				/* 1, KS_WLAN_GET_DRIVER_VERSION */
+	NULL,				/* 2 */
+	ks_wlan_get_firmware_version,	/* 3 KS_WLAN_GET_FIRM_VERSION */
+	ks_wlan_set_wps_enable,		/* 4 KS_WLAN_SET_WPS_ENABLE */
+	ks_wlan_get_wps_enable,		/* 5 KS_WLAN_GET_WPS_ENABLE */
+	ks_wlan_set_wps_probe_req,	/* 6 KS_WLAN_SET_WPS_PROBE_REQ */
+	ks_wlan_get_eeprom_cksum,	/* 7 KS_WLAN_GET_CONNECT */
+	ks_wlan_set_preamble,		/* 8 KS_WLAN_SET_PREAMBLE */
+	ks_wlan_get_preamble,		/* 9 KS_WLAN_GET_PREAMBLE */
+	ks_wlan_set_power_mgmt,		/* 10 KS_WLAN_SET_POWER_SAVE */
+	ks_wlan_get_power_mgmt,		/* 11 KS_WLAN_GET_POWER_SAVE */
+	ks_wlan_set_scan_type,		/* 12 KS_WLAN_SET_SCAN_TYPE */
+	ks_wlan_get_scan_type,		/* 13 KS_WLAN_GET_SCAN_TYPE */
+	ks_wlan_set_rx_gain,		/* 14 KS_WLAN_SET_RX_GAIN */
+	ks_wlan_get_rx_gain,		/* 15 KS_WLAN_GET_RX_GAIN */
+	ks_wlan_hostt,			/* 16 KS_WLAN_HOSTT */
+	NULL,				/* 17 */
+	ks_wlan_set_beacon_lost,	/* 18 KS_WLAN_SET_BECAN_LOST */
+	ks_wlan_get_beacon_lost,	/* 19 KS_WLAN_GET_BECAN_LOST */
+	ks_wlan_set_tx_gain,		/* 20 KS_WLAN_SET_TX_GAIN */
+	ks_wlan_get_tx_gain,		/* 21 KS_WLAN_GET_TX_GAIN */
+	ks_wlan_set_phy_type,		/* 22 KS_WLAN_SET_PHY_TYPE */
+	ks_wlan_get_phy_type,		/* 23 KS_WLAN_GET_PHY_TYPE */
+	ks_wlan_set_cts_mode,		/* 24 KS_WLAN_SET_CTS_MODE */
+	ks_wlan_get_cts_mode,		/* 25 KS_WLAN_GET_CTS_MODE */
+	NULL,				/* 26 */
+	NULL,				/* 27 */
+	ks_wlan_set_sleep_mode,		/* 28 KS_WLAN_SET_SLEEP_MODE */
+	ks_wlan_get_sleep_mode,		/* 29 KS_WLAN_GET_SLEEP_MODE */
+	NULL,				/* 30 */
+	NULL,				/* 31 */
 };
 
 static const struct iw_handler_def ks_wlan_handler_def = {
@@ -2461,7 +2465,7 @@ static int ks_wlan_netdev_ioctl(struct net_device *dev, struct ifreq *rq,
 
 	switch (cmd) {
 	case SIOCIWFIRSTPRIV + 20:	/* KS_WLAN_SET_STOP_REQ */
-		ret = ks_wlan_set_stop_request(dev, NULL, &wrq->u.mode, NULL);
+		ret = ks_wlan_set_stop_request(dev, NULL, &wrq->u, NULL);
 		break;
 		// All other calls are currently unsupported
 	default:
-- 
GitLab


From fb7d5ebea988899500e9a067d950034dbe8a0fbd Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Wed, 9 Nov 2022 16:12:43 +0530
Subject: [PATCH 1158/4122] staging: fieldbus: use sysfs_emit() in show
 functions

The show() methods should only use sysfs_emit() when formatting values
to be returned to the user space.
Ref: Documentation/filesystems/sysfs.rst
Issue identified by coccicheck.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y2uEIzebbM/Fs5Jz@qemulion
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/fieldbus/dev_core.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/fieldbus/dev_core.c b/drivers/staging/fieldbus/dev_core.c
index 5aab734606eae..5f54f2674bd19 100644
--- a/drivers/staging/fieldbus/dev_core.c
+++ b/drivers/staging/fieldbus/dev_core.c
@@ -28,7 +28,7 @@ static ssize_t online_show(struct device *dev, struct device_attribute *attr,
 {
 	struct fieldbus_dev *fb = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%d\n", !!fb->online);
+	return sysfs_emit(buf, "%d\n", !!fb->online);
 }
 static DEVICE_ATTR_RO(online);
 
@@ -39,7 +39,7 @@ static ssize_t enabled_show(struct device *dev, struct device_attribute *attr,
 
 	if (!fb->enable_get)
 		return -EINVAL;
-	return sprintf(buf, "%d\n", !!fb->enable_get(fb));
+	return sysfs_emit(buf, "%d\n", !!fb->enable_get(fb));
 }
 
 static ssize_t enabled_store(struct device *dev, struct device_attribute *attr,
@@ -66,11 +66,8 @@ static ssize_t card_name_show(struct device *dev, struct device_attribute *attr,
 {
 	struct fieldbus_dev *fb = dev_get_drvdata(dev);
 
-	/*
-	 * card_name was provided by child driver, could potentially be long.
-	 * protect against buffer overrun.
-	 */
-	return snprintf(buf, PAGE_SIZE, "%s\n", fb->card_name);
+	/* card_name was provided by child driver. */
+	return sysfs_emit(buf, "%s\n", fb->card_name);
 }
 static DEVICE_ATTR_RO(card_name);
 
@@ -79,7 +76,7 @@ static ssize_t read_area_size_show(struct device *dev,
 {
 	struct fieldbus_dev *fb = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%zu\n", fb->read_area_sz);
+	return sysfs_emit(buf, "%zu\n", fb->read_area_sz);
 }
 static DEVICE_ATTR_RO(read_area_size);
 
@@ -88,7 +85,7 @@ static ssize_t write_area_size_show(struct device *dev,
 {
 	struct fieldbus_dev *fb = dev_get_drvdata(dev);
 
-	return sprintf(buf, "%zu\n", fb->write_area_sz);
+	return sysfs_emit(buf, "%zu\n", fb->write_area_sz);
 }
 static DEVICE_ATTR_RO(write_area_size);
 
@@ -116,7 +113,7 @@ static ssize_t fieldbus_type_show(struct device *dev,
 		break;
 	}
 
-	return sprintf(buf, "%s\n", t);
+	return sysfs_emit(buf, "%s\n", t);
 }
 static DEVICE_ATTR_RO(fieldbus_type);
 
-- 
GitLab


From 335140116703920ddcbd9a09ae9edfb02902f3d1 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Tue, 8 Nov 2022 23:39:56 +0530
Subject: [PATCH 1159/4122] staging: rtl8192u: remove redundant macro
 definitions

Several ieee80211_* symbol names are extended with _rsl tag using
macros. This is done to avoid a conflict when a similar symbol is
already in use in another part of kernel and may lead to conflicts.
However, most of these base symbol names are not found to being used
anywhere in the code and hence are not useful today. These symbols
are not used outside of the module and hence can be safely removed.
The code continues to use the original symbol names.

Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/17f69a042e215c484931a0327fdf7775eea5f918.1667930292.git.drv@mailo.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192u/ieee80211/ieee80211.h    | 61 -------------------
 1 file changed, 61 deletions(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211.h b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
index 00c07455cbb3b..c5c43d2fb93eb 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211.h
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211.h
@@ -223,71 +223,10 @@ struct cb_desc {
 #define MAX_IE_LEN  0xff
 
 // added for kernel conflict
-#define ieee80211_crypt_deinit_entries	ieee80211_crypt_deinit_entries_rsl
-#define ieee80211_crypt_deinit_handler	ieee80211_crypt_deinit_handler_rsl
-#define ieee80211_crypt_delayed_deinit	ieee80211_crypt_delayed_deinit_rsl
-#define ieee80211_register_crypto_ops	ieee80211_register_crypto_ops_rsl
-#define ieee80211_unregister_crypto_ops ieee80211_unregister_crypto_ops_rsl
-#define ieee80211_get_crypto_ops	ieee80211_get_crypto_ops_rsl
-
-#define ieee80211_ccmp_null		ieee80211_ccmp_null_rsl
-
-#define free_ieee80211			free_ieee80211_rsl
-#define alloc_ieee80211			alloc_ieee80211_rsl
-
-#define ieee80211_rx			ieee80211_rx_rsl
-#define ieee80211_rx_mgt		ieee80211_rx_mgt_rsl
-
-#define ieee80211_get_beacon		ieee80211_get_beacon_rsl
 #define ieee80211_wake_queue		ieee80211_wake_queue_rsl
 #define ieee80211_stop_queue		ieee80211_stop_queue_rsl
-#define ieee80211_reset_queue		ieee80211_reset_queue_rsl
-#define ieee80211_softmac_stop_protocol	ieee80211_softmac_stop_protocol_rsl
-#define ieee80211_softmac_start_protocol ieee80211_softmac_start_protocol_rsl
-#define ieee80211_is_shortslot		ieee80211_is_shortslot_rsl
-#define ieee80211_is_54g		ieee80211_is_54g_rsl
-#define ieee80211_wpa_supplicant_ioctl	ieee80211_wpa_supplicant_ioctl_rsl
-#define ieee80211_ps_tx_ack		ieee80211_ps_tx_ack_rsl
-#define ieee80211_softmac_xmit		ieee80211_softmac_xmit_rsl
-#define ieee80211_stop_send_beacons	ieee80211_stop_send_beacons_rsl
 #define notify_wx_assoc_event		notify_wx_assoc_event_rsl
 #define SendDisassociation		SendDisassociation_rsl
-#define ieee80211_disassociate		ieee80211_disassociate_rsl
-#define ieee80211_start_send_beacons	ieee80211_start_send_beacons_rsl
-#define ieee80211_stop_scan		ieee80211_stop_scan_rsl
-#define ieee80211_send_probe_requests	ieee80211_send_probe_requests_rsl
-#define ieee80211_softmac_scan_syncro	ieee80211_softmac_scan_syncro_rsl
-#define ieee80211_start_scan_syncro	ieee80211_start_scan_syncro_rsl
-
-#define ieee80211_wx_get_essid		ieee80211_wx_get_essid_rsl
-#define ieee80211_wx_set_essid		ieee80211_wx_set_essid_rsl
-#define ieee80211_wx_set_rate		ieee80211_wx_set_rate_rsl
-#define ieee80211_wx_get_rate		ieee80211_wx_get_rate_rsl
-#define ieee80211_wx_set_wap		ieee80211_wx_set_wap_rsl
-#define ieee80211_wx_get_wap		ieee80211_wx_get_wap_rsl
-#define ieee80211_wx_set_mode		ieee80211_wx_set_mode_rsl
-#define ieee80211_wx_get_mode		ieee80211_wx_get_mode_rsl
-#define ieee80211_wx_set_scan		ieee80211_wx_set_scan_rsl
-#define ieee80211_wx_get_freq		ieee80211_wx_get_freq_rsl
-#define ieee80211_wx_set_freq		ieee80211_wx_set_freq_rsl
-#define ieee80211_wx_set_rawtx		ieee80211_wx_set_rawtx_rsl
-#define ieee80211_wx_get_name		ieee80211_wx_get_name_rsl
-#define ieee80211_wx_set_power		ieee80211_wx_set_power_rsl
-#define ieee80211_wx_get_power		ieee80211_wx_get_power_rsl
-#define ieee80211_wlan_frequencies	ieee80211_wlan_frequencies_rsl
-#define ieee80211_wx_set_rts		ieee80211_wx_set_rts_rsl
-#define ieee80211_wx_get_rts		ieee80211_wx_get_rts_rsl
-
-#define ieee80211_txb_free		ieee80211_txb_free_rsl
-
-#define ieee80211_wx_set_gen_ie		ieee80211_wx_set_gen_ie_rsl
-#define ieee80211_wx_get_scan		ieee80211_wx_get_scan_rsl
-#define ieee80211_wx_set_encode		ieee80211_wx_set_encode_rsl
-#define ieee80211_wx_get_encode		ieee80211_wx_get_encode_rsl
-#define ieee80211_wx_set_mlme		ieee80211_wx_set_mlme_rsl
-#define ieee80211_wx_set_auth		ieee80211_wx_set_auth_rsl
-#define ieee80211_wx_set_encode_ext	ieee80211_wx_set_encode_ext_rsl
-#define ieee80211_wx_get_encode_ext	ieee80211_wx_get_encode_ext_rsl
 
 
 struct ieee_param {
-- 
GitLab


From 8c3e8a6bdb5253b97ad532570f8b5db5f7a06407 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 26 Oct 2022 16:28:03 +0800
Subject: [PATCH 1160/4122] class: fix possible memory leak in
 __class_register()

If class_add_groups() returns error, the 'cp->subsys' need be
unregister, and the 'cp' need be freed.

We can not call kset_unregister() here, because the 'cls' will
be freed in callback function class_release() and it's also
freed in caller's error path, it will cause double free.

So fix this by calling kobject_del() and kfree_const(name) to
cleanup kobject. Besides, call kfree() to free the 'cp'.

Fault injection test can trigger this:

unreferenced object 0xffff888102fa8190 (size 8):
  comm "modprobe", pid 502, jiffies 4294906074 (age 49.296s)
  hex dump (first 8 bytes):
    70 6b 74 63 64 76 64 00                          pktcdvd.
  backtrace:
    [<00000000e7c7703d>] __kmalloc_track_caller+0x1ae/0x320
    [<000000005e4d70bc>] kstrdup+0x3a/0x70
    [<00000000c2e5e85a>] kstrdup_const+0x68/0x80
    [<000000000049a8c7>] kvasprintf_const+0x10b/0x190
    [<0000000029123163>] kobject_set_name_vargs+0x56/0x150
    [<00000000747219c9>] kobject_set_name+0xab/0xe0
    [<0000000005f1ea4e>] __class_register+0x15c/0x49a

unreferenced object 0xffff888037274000 (size 1024):
  comm "modprobe", pid 502, jiffies 4294906074 (age 49.296s)
  hex dump (first 32 bytes):
    00 40 27 37 80 88 ff ff 00 40 27 37 80 88 ff ff  .@'7.....@'7....
    00 00 00 00 ad 4e ad de ff ff ff ff 00 00 00 00  .....N..........
  backtrace:
    [<00000000151f9600>] kmem_cache_alloc_trace+0x17c/0x2f0
    [<00000000ecf3dd95>] __class_register+0x86/0x49a

Fixes: ced6473e7486 ("driver core: class: add class_groups support")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221026082803.3458760-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 64f7b9a0970f7..8ceafb7d0203b 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -192,6 +192,11 @@ int __class_register(struct class *cls, struct lock_class_key *key)
 	}
 	error = class_add_groups(class_get(cls), cls->class_groups);
 	class_put(cls);
+	if (error) {
+		kobject_del(&cp->subsys.kobj);
+		kfree_const(cp->subsys.kobj.name);
+		kfree(cp);
+	}
 	return error;
 }
 EXPORT_SYMBOL_GPL(__class_register);
-- 
GitLab


From 9e6002ad9e28209dcefa342977ac683779556dbd Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 2 Nov 2022 10:55:34 +0800
Subject: [PATCH 1161/4122] Documentation: devres: add missing MEM helper

Add missing devm_kstrdup_const() to devres.rst. It's introduce by
commit 09d1ea1c7309 ("devres: provide devm_kstrdup_const()").

Fixes: 09d1ea1c7309 ("devres: provide devm_kstrdup_const()")
Cc: Bartosz Golaszewski <brgl@bgdev.pl>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221102025534.1450337-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-api/driver-model/devres.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 687adb58048ec..ac9b9ff37f50e 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -357,6 +357,7 @@ MEM
   devm_kmemdup()
   devm_krealloc()
   devm_kstrdup()
+  devm_kstrdup_const()
   devm_kvasprintf()
   devm_kzalloc()
 
-- 
GitLab


From 6fcd7e702d3d91cc2c3194acffd7d67b2c10b81f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 18 Oct 2022 02:04:10 -0700
Subject: [PATCH 1162/4122] devres: Use kmalloc_size_roundup() to match ksize()
 usage

Round up allocations with kmalloc_size_roundup() so that devres's use
of ksize() is always accurate and no special handling of the memory is
needed by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221018090406.never.856-kees@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/devres.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 4ab2b50ee38f4..c0e100074aa39 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -101,6 +101,9 @@ static bool check_dr_size(size_t size, size_t *tot_size)
 					size, tot_size)))
 		return false;
 
+	/* Actually allocate the full kmalloc bucket size. */
+	*tot_size = kmalloc_size_roundup(*tot_size);
+
 	return true;
 }
 
-- 
GitLab


From e63e99397b2613d50a5f4f02ed07307e67a190f1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Nov 2022 14:40:36 +0800
Subject: [PATCH 1163/4122] drivers: dio: fix possible memory leak in
 dio_init()

If device_register() returns error, the 'dev' and name needs be
freed. Add a release function, and then call put_device() in the
error path, so the name is freed in kobject_cleanup() and to the
'dev' is freed in release function.

Fixes: 2e4c77bea3d8 ("m68k: dio - Kill warn_unused_result warnings")
Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221109064036.1835346-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/dio/dio.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/dio/dio.c b/drivers/dio/dio.c
index 0e5a5662d5a40..0a051d6568800 100644
--- a/drivers/dio/dio.c
+++ b/drivers/dio/dio.c
@@ -109,6 +109,12 @@ static char dio_no_name[] = { 0 };
 
 #endif /* CONFIG_DIO_CONSTANTS */
 
+static void dio_dev_release(struct device *dev)
+{
+	struct dio_dev *ddev = container_of(dev, typeof(struct dio_dev), dev);
+	kfree(ddev);
+}
+
 int __init dio_find(int deviceid)
 {
 	/* Called to find a DIO device before the full bus scan has run.
@@ -225,6 +231,7 @@ static int __init dio_init(void)
 		dev->bus = &dio_bus;
 		dev->dev.parent = &dio_bus.dev;
 		dev->dev.bus = &dio_bus_type;
+		dev->dev.release = dio_dev_release;
 		dev->scode = scode;
 		dev->resource.start = pa;
 		dev->resource.end = pa + DIO_SIZE(scode, va);
@@ -252,6 +259,7 @@ static int __init dio_init(void)
 		if (error) {
 			pr_err("DIO: Error registering device %s\n",
 			       dev->name);
+			put_device(&dev->dev);
 			continue;
 		}
 		error = dio_create_sysfs_dev_files(dev);
-- 
GitLab


From 92cf87051894921ab6b14e9ad60dbae2d669bf15 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 3 Nov 2022 17:38:35 -0700
Subject: [PATCH 1164/4122] debugfs: small Documentation cleaning

Fix punctuation in a parenthetical phrase.
Add 2 article adjectives and change one from "an" to "a".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221104003835.29472-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/debugfs.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/filesystems/debugfs.rst b/Documentation/filesystems/debugfs.rst
index 71b1fee56d2a8..dc35da8b8792c 100644
--- a/Documentation/filesystems/debugfs.rst
+++ b/Documentation/filesystems/debugfs.rst
@@ -155,8 +155,8 @@ any code which does so in the mainline.  Note that all files created with
 debugfs_create_blob() are read-only.
 
 If you want to dump a block of registers (something that happens quite
-often during development, even if little such code reaches mainline.
-Debugfs offers two functions: one to make a registers-only file, and
+often during development, even if little such code reaches mainline),
+debugfs offers two functions: one to make a registers-only file, and
 another to insert a register block in the middle of another sequential
 file::
 
@@ -183,7 +183,7 @@ The "base" argument may be 0, but you may want to build the reg32 array
 using __stringify, and a number of register names (macros) are actually
 byte offsets over a base for the register block.
 
-If you want to dump an u32 array in debugfs, you can create file with::
+If you want to dump a u32 array in debugfs, you can create a file with::
 
     struct debugfs_u32_array {
 	u32 *array;
@@ -197,7 +197,7 @@ If you want to dump an u32 array in debugfs, you can create file with::
 The "array" argument wraps a pointer to the array's data and the number
 of its elements. Note: Once array is created its size can not be changed.
 
-There is a helper function to create device related seq_file::
+There is a helper function to create a device-related seq_file::
 
    void debugfs_create_devm_seqfile(struct device *dev,
 				const char *name,
-- 
GitLab


From 9a6800d1b9da21302758fb37e58b3ab1cadfc643 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 3 Nov 2022 17:39:21 -0700
Subject: [PATCH 1165/4122] sysfs: update Documentation

Make editing corrections and updates to sysfs.rst:

- spell "sysfs" consistently (vs. "Sysfs")
- align field names in a struct
- fix some punctuation and grammar
- list more /sys top-level subdirectories
- change 'fuse.txt' to 'fuse.rst' (although I don't see where the
  example is)

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-doc@vger.kernel.org
Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221104003921.31616-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/filesystems/sysfs.rst | 41 +++++++++++++++++------------
 1 file changed, 24 insertions(+), 17 deletions(-)

diff --git a/Documentation/filesystems/sysfs.rst b/Documentation/filesystems/sysfs.rst
index 8bba676b1365d..f8187d466b97b 100644
--- a/Documentation/filesystems/sysfs.rst
+++ b/Documentation/filesystems/sysfs.rst
@@ -12,10 +12,10 @@ Mike Murphy <mamurph@cs.clemson.edu>
 :Original:   10 January 2003
 
 
-What it is:
-~~~~~~~~~~~
+What it is
+~~~~~~~~~~
 
-sysfs is a ram-based filesystem initially based on ramfs. It provides
+sysfs is a RAM-based filesystem initially based on ramfs. It provides
 a means to export kernel data structures, their attributes, and the
 linkages between them to userspace.
 
@@ -43,7 +43,7 @@ userspace. Top-level directories in sysfs represent the common
 ancestors of object hierarchies; i.e. the subsystems the objects
 belong to.
 
-Sysfs internally stores a pointer to the kobject that implements a
+sysfs internally stores a pointer to the kobject that implements a
 directory in the kernfs_node object associated with the directory. In
 the past this kobject pointer has been used by sysfs to do reference
 counting directly on the kobject whenever the file is opened or closed.
@@ -55,7 +55,7 @@ Attributes
 ~~~~~~~~~~
 
 Attributes can be exported for kobjects in the form of regular files in
-the filesystem. Sysfs forwards file I/O operations to methods defined
+the filesystem. sysfs forwards file I/O operations to methods defined
 for the attributes, providing a means to read and write kernel
 attributes.
 
@@ -72,8 +72,8 @@ you publicly humiliated and your code rewritten without notice.
 An attribute definition is simply::
 
     struct attribute {
-	    char                    * name;
-	    struct module		*owner;
+	    char                    *name;
+	    struct module           *owner;
 	    umode_t                 mode;
     };
 
@@ -138,7 +138,7 @@ __ATTR_WO(name):
 		 assumes a name_store only and is restricted to mode
                  0200 that is root write access only.
 __ATTR_RO_MODE(name, mode):
-	         fore more restrictive RO access currently
+	         for more restrictive RO access; currently
                  only use case is the EFI System Resource Table
                  (see drivers/firmware/efi/esrt.c)
 __ATTR_RW(name):
@@ -207,7 +207,7 @@ IOW, they should take only an object, an attribute, and a buffer as parameters.
 
 
 sysfs allocates a buffer of size (PAGE_SIZE) and passes it to the
-method. Sysfs will call the method exactly once for each read or
+method. sysfs will call the method exactly once for each read or
 write. This forces the following behavior on the method
 implementations:
 
@@ -221,7 +221,7 @@ implementations:
   be called again, rearmed, to fill the buffer.
 
 - On write(2), sysfs expects the entire buffer to be passed during the
-  first write. Sysfs then passes the entire buffer to the store() method.
+  first write. sysfs then passes the entire buffer to the store() method.
   A terminating null is added after the data on stores. This makes
   functions like sysfs_streq() safe to use.
 
@@ -237,7 +237,7 @@ Other notes:
 - Writing causes the show() method to be rearmed regardless of current
   file position.
 
-- The buffer will always be PAGE_SIZE bytes in length. On i386, this
+- The buffer will always be PAGE_SIZE bytes in length. On x86, this
   is 4096.
 
 - show() methods should return the number of bytes printed into the
@@ -253,7 +253,7 @@ Other notes:
   through, be sure to return an error.
 
 - The object passed to the methods will be pinned in memory via sysfs
-  referencing counting its embedded object. However, the physical
+  reference counting its embedded object. However, the physical
   entity (e.g. device) the object represents may not be present. Be
   sure to have a way to check this, if necessary.
 
@@ -295,8 +295,12 @@ The top level sysfs directory looks like::
     dev/
     devices/
     firmware/
-    net/
     fs/
+    hypervisor/
+    kernel/
+    module/
+    net/
+    power/
 
 devices/ contains a filesystem representation of the device tree. It maps
 directly to the internal kernel device tree, which is a hierarchy of
@@ -317,15 +321,18 @@ span multiple bus types).
 
 fs/ contains a directory for some filesystems.  Currently each
 filesystem wanting to export attributes must create its own hierarchy
-below fs/ (see ./fuse.txt for an example).
+below fs/ (see ./fuse.rst for an example).
+
+module/ contains parameter values and state information for all
+loaded system modules, for both builtin and loadable modules.
 
-dev/ contains two directories char/ and block/. Inside these two
+dev/ contains two directories: char/ and block/. Inside these two
 directories there are symlinks named <major>:<minor>.  These symlinks
 point to the sysfs directory for the given device.  /sys/dev provides a
 quick way to lookup the sysfs interface for a device from the result of
 a stat(2) operation.
 
-More information can driver-model specific features can be found in
+More information on driver-model specific features can be found in
 Documentation/driver-api/driver-model/.
 
 
@@ -335,7 +342,7 @@ TODO: Finish this section.
 Current Interfaces
 ~~~~~~~~~~~~~~~~~~
 
-The following interface layers currently exist in sysfs:
+The following interface layers currently exist in sysfs.
 
 
 devices (include/linux/device.h)
-- 
GitLab


From fa627348cfc7fb174468d88756b83c2d97890b07 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 1 Oct 2022 18:54:26 +0200
Subject: [PATCH 1166/4122] driver core: class: make namespace and
 get_ownership take const *

The callbacks in struct class namespace() and get_ownership() do not
modify the struct device passed to them, so mark the pointer as constant
and fix up all callbacks in the kernel to have the correct function
signature.

This helps make it more obvious what calls and callbacks do, and do not,
modify structures passed to them.

Cc: "Rafael J. Wysocki" <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221001165426.2690912-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c              | 4 ++--
 drivers/infiniband/core/device.c | 4 ++--
 drivers/net/ipvlan/ipvtap.c      | 4 ++--
 drivers/net/macvtap.c            | 4 ++--
 include/linux/device/class.h     | 4 ++--
 net/core/net-sysfs.c             | 8 ++++----
 net/wireless/sysfs.c             | 2 +-
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index d02501933467d..f07b1c349f791 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2336,7 +2336,7 @@ static void device_release(struct kobject *kobj)
 
 static const void *device_namespace(struct kobject *kobj)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 	const void *ns = NULL;
 
 	if (dev->class && dev->class->ns_type)
@@ -2347,7 +2347,7 @@ static const void *device_namespace(struct kobject *kobj)
 
 static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 
 	if (dev->class && dev->class->get_ownership)
 		dev->class->get_ownership(dev, uid, gid);
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index ae60c73babcc5..3893b65174215 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -524,9 +524,9 @@ static int ib_device_uevent(struct device *device,
 	return 0;
 }
 
-static const void *net_namespace(struct device *d)
+static const void *net_namespace(const struct device *d)
 {
-	struct ib_core_device *coredev =
+	const struct ib_core_device *coredev =
 			container_of(d, struct ib_core_device, dev);
 
 	return read_pnet(&coredev->rdma_net);
diff --git a/drivers/net/ipvlan/ipvtap.c b/drivers/net/ipvlan/ipvtap.c
index cbabca167a078..dde272586e80b 100644
--- a/drivers/net/ipvlan/ipvtap.c
+++ b/drivers/net/ipvlan/ipvtap.c
@@ -30,9 +30,9 @@
 static dev_t ipvtap_major;
 static struct cdev ipvtap_cdev;
 
-static const void *ipvtap_net_namespace(struct device *d)
+static const void *ipvtap_net_namespace(const struct device *d)
 {
-	struct net_device *dev = to_net_dev(d->parent);
+	const struct net_device *dev = to_net_dev(d->parent);
 	return dev_net(dev);
 }
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index d1f435788e902..031344239f271 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -35,9 +35,9 @@ struct macvtap_dev {
  */
 static dev_t macvtap_major;
 
-static const void *macvtap_net_namespace(struct device *d)
+static const void *macvtap_net_namespace(const struct device *d)
 {
-	struct net_device *dev = to_net_dev(d->parent);
+	const struct net_device *dev = to_net_dev(d->parent);
 	return dev_net(dev);
 }
 
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index e61ec55020197..20103e0b03c34 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -68,9 +68,9 @@ struct class {
 	int (*shutdown_pre)(struct device *dev);
 
 	const struct kobj_ns_type_operations *ns_type;
-	const void *(*namespace)(struct device *dev);
+	const void *(*namespace)(const struct device *dev);
 
-	void (*get_ownership)(struct device *dev, kuid_t *uid, kgid_t *gid);
+	void (*get_ownership)(const struct device *dev, kuid_t *uid, kgid_t *gid);
 
 	const struct dev_pm_ops *pm;
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 8409d41405dfe..a8c5a7cd9701d 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1910,16 +1910,16 @@ static void netdev_release(struct device *d)
 	netdev_freemem(dev);
 }
 
-static const void *net_namespace(struct device *d)
+static const void *net_namespace(const struct device *d)
 {
-	struct net_device *dev = to_net_dev(d);
+	const struct net_device *dev = to_net_dev(d);
 
 	return dev_net(dev);
 }
 
-static void net_get_ownership(struct device *d, kuid_t *uid, kgid_t *gid)
+static void net_get_ownership(const struct device *d, kuid_t *uid, kgid_t *gid)
 {
-	struct net_device *dev = to_net_dev(d);
+	const struct net_device *dev = to_net_dev(d);
 	const struct net *net = dev_net(dev);
 
 	net_ns_get_ownership(net, uid, gid);
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index 0c3f05c9be27a..cdb638647e0b6 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -148,7 +148,7 @@ static SIMPLE_DEV_PM_OPS(wiphy_pm_ops, wiphy_suspend, wiphy_resume);
 #define WIPHY_PM_OPS NULL
 #endif
 
-static const void *wiphy_namespace(struct device *d)
+static const void *wiphy_namespace(const struct device *d)
 {
 	struct wiphy *wiphy = container_of(d, struct wiphy, dev);
 
-- 
GitLab


From 0b9ca98b722969660ad98b39f766a561ccb39f5f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:03:07 +0000
Subject: [PATCH 1167/4122] perf/x86/core: Zero @lbr instead of returning -1 in
 x86_perf_get_lbr() stub

Drop the return value from x86_perf_get_lbr() and have the stub zero out
the @lbr structure instead of returning -1 to indicate "no LBR support".
KVM doesn't actually check the return value, and instead subtly relies on
zeroing the number of LBRs in intel_pmu_init().

Formalize "nr=0 means unsupported" so that KVM doesn't need to add a
pointless check on the return value to fix KVM's benign bug.

Note, the stub is necessary even though KVM x86 selects PERF_EVENTS and
the caller exists only when CONFIG_KVM_INTEL=y.  Despite the name,
KVM_INTEL doesn't strictly require CPU_SUP_INTEL, it can be built with
any of INTEL || CENTAUR || ZHAOXIN CPUs.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221006000314.73240-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/events/intel/lbr.c       | 6 +-----
 arch/x86/include/asm/perf_event.h | 6 +++---
 arch/x86/kvm/vmx/capabilities.h   | 3 ++-
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c
index 8259d725054d0..4dbde69c423ba 100644
--- a/arch/x86/events/intel/lbr.c
+++ b/arch/x86/events/intel/lbr.c
@@ -1603,10 +1603,8 @@ clear_arch_lbr:
  * x86_perf_get_lbr - get the LBR records information
  *
  * @lbr: the caller's memory to store the LBR records information
- *
- * Returns: 0 indicates the LBR info has been successfully obtained
  */
-int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
 {
 	int lbr_fmt = x86_pmu.intel_cap.lbr_format;
 
@@ -1614,8 +1612,6 @@ int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
 	lbr->from = x86_pmu.lbr_from;
 	lbr->to = x86_pmu.lbr_to;
 	lbr->info = (lbr_fmt == LBR_FORMAT_INFO) ? x86_pmu.lbr_info : 0;
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(x86_perf_get_lbr);
 
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 9ac46dbe57d48..5d0f6891ae611 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -543,12 +543,12 @@ static inline void perf_check_microcode(void) { }
 
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL)
 extern struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
-extern int x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
+extern void x86_perf_get_lbr(struct x86_pmu_lbr *lbr);
 #else
 struct perf_guest_switch_msr *perf_guest_get_msrs(int *nr, void *data);
-static inline int x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
+static inline void x86_perf_get_lbr(struct x86_pmu_lbr *lbr)
 {
-	return -1;
+	memset(lbr, 0, sizeof(*lbr));
 }
 #endif
 
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 07254314f3dd5..479124e49bbda 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -407,7 +407,8 @@ static inline u64 vmx_get_perf_capabilities(void)
 	if (boot_cpu_has(X86_FEATURE_PDCM))
 		rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
 
-	if (x86_perf_get_lbr(&lbr) >= 0 && lbr.nr)
+	x86_perf_get_lbr(&lbr);
+	if (lbr.nr)
 		perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
 
 	if (vmx_pebs_supported()) {
-- 
GitLab


From bec46859fb9d797a21c983100b1f425bebe89747 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:03:11 +0000
Subject: [PATCH 1168/4122] KVM: x86: Track supported PERF_CAPABILITIES in
 kvm_caps

Track KVM's supported PERF_CAPABILITIES in kvm_caps instead of computing
the supported capabilities on the fly every time.  Using kvm_caps will
also allow for future cleanups as the kvm_caps values can be used
directly in common x86 code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Acked-by: Like Xu <likexu@tencent.com>
Message-Id: <20221006000314.73240-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c          |  2 ++
 arch/x86/kvm/vmx/capabilities.h | 25 ------------------------
 arch/x86/kvm/vmx/pmu_intel.c    |  2 +-
 arch/x86/kvm/vmx/vmx.c          | 34 +++++++++++++++++++++++++++++----
 arch/x86/kvm/x86.h              |  1 +
 5 files changed, 34 insertions(+), 30 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9f88c8e6766e4..d15f1934c9042 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2714,6 +2714,7 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
 		break;
 	case MSR_IA32_PERF_CAPABILITIES:
+		msr->data = kvm_caps.supported_perf_cap;
 		return 0;
 	default:
 		return KVM_MSR_RET_INVALID;
@@ -4865,6 +4866,7 @@ static __init void svm_set_cpu_caps(void)
 {
 	kvm_set_cpu_caps();
 
+	kvm_caps.supported_perf_cap = 0;
 	kvm_caps.supported_xss = 0;
 
 	/* CPUID 0x80000001 and 0x8000000A (SVM features) */
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h
index 479124e49bbda..cd2ac9536c998 100644
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -395,31 +395,6 @@ static inline bool vmx_pebs_supported(void)
 	return boot_cpu_has(X86_FEATURE_PEBS) && kvm_pmu_cap.pebs_ept;
 }
 
-static inline u64 vmx_get_perf_capabilities(void)
-{
-	u64 perf_cap = PMU_CAP_FW_WRITES;
-	struct x86_pmu_lbr lbr;
-	u64 host_perf_cap = 0;
-
-	if (!enable_pmu)
-		return 0;
-
-	if (boot_cpu_has(X86_FEATURE_PDCM))
-		rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
-
-	x86_perf_get_lbr(&lbr);
-	if (lbr.nr)
-		perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
-
-	if (vmx_pebs_supported()) {
-		perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
-		if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
-			perf_cap &= ~PERF_CAP_PEBS_BASELINE;
-	}
-
-	return perf_cap;
-}
-
 static inline bool cpu_has_notify_vmexit(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 10b33da9bd058..9fabfe71fd879 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -631,7 +631,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->fixed_counters[i].current_config = 0;
 	}
 
-	vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
+	vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
 	lbr_desc->records.nr = 0;
 	lbr_desc->event = NULL;
 	lbr_desc->msr_passthrough = false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 63247c57c72cc..3fec439530516 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1850,7 +1850,7 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 			return 1;
 		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
 	case MSR_IA32_PERF_CAPABILITIES:
-		msr->data = vmx_get_perf_capabilities();
+		msr->data = kvm_caps.supported_perf_cap;
 		return 0;
 	default:
 		return KVM_MSR_RET_INVALID;
@@ -2029,7 +2029,7 @@ static u64 vmx_get_supported_debugctl(struct kvm_vcpu *vcpu, bool host_initiated
 	    (host_initiated || guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)))
 		debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
 
-	if ((vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT) &&
+	if ((kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT) &&
 	    (host_initiated || intel_pmu_lbr_is_enabled(vcpu)))
 		debugctl |= DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI;
 
@@ -2342,14 +2342,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		if (data & PMU_CAP_LBR_FMT) {
 			if ((data & PMU_CAP_LBR_FMT) !=
-			    (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT))
+			    (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
 				return 1;
 			if (!cpuid_model_is_consistent(vcpu))
 				return 1;
 		}
 		if (data & PERF_CAP_PEBS_FORMAT) {
 			if ((data & PERF_CAP_PEBS_MASK) !=
-			    (vmx_get_perf_capabilities() & PERF_CAP_PEBS_MASK))
+			    (kvm_caps.supported_perf_cap & PERF_CAP_PEBS_MASK))
 				return 1;
 			if (!guest_cpuid_has(vcpu, X86_FEATURE_DS))
 				return 1;
@@ -7669,6 +7669,31 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
 	vmx_update_exception_bitmap(vcpu);
 }
 
+static u64 vmx_get_perf_capabilities(void)
+{
+	u64 perf_cap = PMU_CAP_FW_WRITES;
+	struct x86_pmu_lbr lbr;
+	u64 host_perf_cap = 0;
+
+	if (!enable_pmu)
+		return 0;
+
+	if (boot_cpu_has(X86_FEATURE_PDCM))
+		rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap);
+
+	x86_perf_get_lbr(&lbr);
+	if (lbr.nr)
+		perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT;
+
+	if (vmx_pebs_supported()) {
+		perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK;
+		if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4)
+			perf_cap &= ~PERF_CAP_PEBS_BASELINE;
+	}
+
+	return perf_cap;
+}
+
 static __init void vmx_set_cpu_caps(void)
 {
 	kvm_set_cpu_caps();
@@ -7691,6 +7716,7 @@ static __init void vmx_set_cpu_caps(void)
 
 	if (!enable_pmu)
 		kvm_cpu_cap_clear(X86_FEATURE_PDCM);
+	kvm_caps.supported_perf_cap = vmx_get_perf_capabilities();
 
 	if (!enable_sgx) {
 		kvm_cpu_cap_clear(X86_FEATURE_SGX);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 829d3134c1eb0..9de72586f4065 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -27,6 +27,7 @@ struct kvm_caps {
 	u64 supported_mce_cap;
 	u64 supported_xcr0;
 	u64 supported_xss;
+	u64 supported_perf_cap;
 };
 
 void kvm_spurious_fault(void);
-- 
GitLab


From 6c6f82bea96fddb96898edbbee248ad362b768f4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:03:12 +0000
Subject: [PATCH 1169/4122] KVM: x86: Init vcpu->arch.perf_capabilities in
 common x86 code

Initialize vcpu->arch.perf_capabilities in x86's kvm_arch_vcpu_create()
instead of deferring initialization to vendor code.  For better or worse,
common x86 handles reads and writes to the MSR, and so common x86 should
also handle initializing the MSR.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221006000314.73240-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/pmu_intel.c | 1 -
 arch/x86/kvm/x86.c           | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 9fabfe71fd879..b7c3a9874a93c 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -631,7 +631,6 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
 		pmu->fixed_counters[i].current_config = 0;
 	}
 
-	vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
 	lbr_desc->records.nr = 0;
 	lbr_desc->event = NULL;
 	lbr_desc->msr_passthrough = false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ecea83f0da498..6ce7c80180d24 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11893,6 +11893,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
 
 	kvm_async_pf_hash_reset(vcpu);
+
+	vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
 	kvm_pmu_init(vcpu);
 
 	vcpu->arch.pending_external_vector = -1;
-- 
GitLab


From 5fe9805dc2f58bebb3bf5dd298559c4bad5f0448 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:03:13 +0000
Subject: [PATCH 1170/4122] KVM: x86: Handle PERF_CAPABILITIES in common x86's
 kvm_get_msr_feature()

Handle PERF_CAPABILITIES directly in kvm_get_msr_feature() now that the
supported value is available in kvm_caps.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221006000314.73240-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 3 ---
 arch/x86/kvm/vmx/vmx.c | 3 ---
 arch/x86/kvm/x86.c     | 3 +++
 3 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d15f1934c9042..3fc8e4999891a 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -2713,9 +2713,6 @@ static int svm_get_msr_feature(struct kvm_msr_entry *msr)
 		if (boot_cpu_has(X86_FEATURE_LFENCE_RDTSC))
 			msr->data |= MSR_F10H_DECFG_LFENCE_SERIALIZE;
 		break;
-	case MSR_IA32_PERF_CAPABILITIES:
-		msr->data = kvm_caps.supported_perf_cap;
-		return 0;
 	default:
 		return KVM_MSR_RET_INVALID;
 	}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3fec439530516..7a7e14d4d78c2 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1849,9 +1849,6 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
 		if (!nested)
 			return 1;
 		return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
-	case MSR_IA32_PERF_CAPABILITIES:
-		msr->data = kvm_caps.supported_perf_cap;
-		return 0;
 	default:
 		return KVM_MSR_RET_INVALID;
 	}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6ce7c80180d24..6f8a370d2b1d5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1648,6 +1648,9 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
 	case MSR_IA32_ARCH_CAPABILITIES:
 		msr->data = kvm_get_arch_capabilities();
 		break;
+	case MSR_IA32_PERF_CAPABILITIES:
+		msr->data = kvm_caps.supported_perf_cap;
+		break;
 	case MSR_IA32_UCODE_REV:
 		rdmsrl_safe(msr->index, &msr->data);
 		break;
-- 
GitLab


From 686e0f0324f07ea1979462f659b24b1195996fe0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:03:14 +0000
Subject: [PATCH 1171/4122] KVM: x86: Directly query supported
 PERF_CAPABILITIES for WRMSR checks

Use kvm_caps.supported_perf_cap directly instead of bouncing through
kvm_get_msr_feature() when checking the incoming value for writes to
PERF_CAPABILITIES.

Note, kvm_get_msr_feature() is guaranteed to succeed when getting
PERF_CAPABILITIES, i.e. dropping that check is a nop.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221006000314.73240-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 6f8a370d2b1d5..30a5365f4b320 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3563,20 +3563,15 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		vcpu->arch.arch_capabilities = data;
 		break;
-	case MSR_IA32_PERF_CAPABILITIES: {
-		struct kvm_msr_entry msr_ent = {.index = msr, .data = 0};
-
+	case MSR_IA32_PERF_CAPABILITIES:
 		if (!msr_info->host_initiated)
 			return 1;
-		if (kvm_get_msr_feature(&msr_ent))
-			return 1;
-		if (data & ~msr_ent.data)
+		if (data & ~kvm_caps.supported_perf_cap)
 			return 1;
 
 		vcpu->arch.perf_capabilities = data;
 		kvm_pmu_refresh(vcpu);
 		return 0;
-	}
 	case MSR_EFER:
 		return set_efer(vcpu, msr_info);
 	case MSR_K7_HWCR:
-- 
GitLab


From 0f9edb8cab29667ef5ac50736bb1f9a0557e7f94 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 13 Sep 2022 17:05:37 +0800
Subject: [PATCH 1172/4122] KVM: x86: remove obsolete
 kvm_mmu_gva_to_gpa_fetch()

There's no caller. Remove it.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220913090537.25195-1-linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 --
 arch/x86/kvm/x86.c              | 10 ----------
 2 files changed, 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f05ebaa26f0ff..1723a357190d6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1909,8 +1909,6 @@ void kvm_mmu_free_roots(struct kvm *kvm, struct kvm_mmu *mmu,
 void kvm_mmu_free_guest_mode_roots(struct kvm *kvm, struct kvm_mmu *mmu);
 gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 			      struct x86_exception *exception);
-gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
-			       struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 			       struct x86_exception *exception);
 gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 30a5365f4b320..3c63ba5512e9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7154,16 +7154,6 @@ gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva,
 }
 EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
 
- gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
-				struct x86_exception *exception)
-{
-	struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
-
-	u64 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
-	access |= PFERR_FETCH_MASK;
-	return mmu->gva_to_gpa(vcpu, mmu, gva, access, exception);
-}
-
 gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva,
 			       struct x86_exception *exception)
 {
-- 
GitLab


From fa3e42037ef53c2aea85faf51d6f947e16dd8fc5 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 13 Sep 2022 17:17:25 +0800
Subject: [PATCH 1173/4122] KVM: x86/mmu: fix some comment typos

Fix some typos in comments.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220913091725.35953-1-linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c  | 2 +-
 arch/x86/kvm/mmu/spte.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 6f81539061d64..639ac64a4e8c9 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1894,7 +1894,7 @@ static bool is_obsolete_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
 	if (sp->role.invalid)
 		return true;
 
-	/* TDP MMU pages due not use the MMU generation. */
+	/* TDP MMU pages do not use the MMU generation. */
 	return !sp->tdp_mmu_page &&
 	       unlikely(sp->mmu_valid_gen != kvm->arch.mmu_valid_gen);
 }
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 7670c13ce251b..79560d77aa4c7 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -188,7 +188,7 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
  * should not modify the SPTE.
  *
  * Use a semi-arbitrary value that doesn't set RWX bits, i.e. is not-present on
- * bot AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
+ * both AMD and Intel CPUs, and doesn't set PFN bits, i.e. doesn't create a L1TF
  * vulnerability.  Use only low bits to avoid 64-bit immediates.
  *
  * Only used by the TDP MMU.
-- 
GitLab


From 3adbdf81038801ee84367845e78bb1a3b36bde39 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 13 Sep 2022 16:54:52 +0800
Subject: [PATCH 1174/4122] KVM: x86/mmu: use helper macro SPTE_ENT_PER_PAGE

Use helper macro SPTE_ENT_PER_PAGE to get the number of spte entries
per page. Minor readability improvement.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220913085452.25561-1-linmiaohe@huawei.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 639ac64a4e8c9..2640871bdcf17 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1645,7 +1645,7 @@ static int is_empty_shadow_page(u64 *spt)
 	u64 *pos;
 	u64 *end;
 
-	for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
+	for (pos = spt, end = pos + SPTE_ENT_PER_PAGE; pos != end; pos++)
 		if (is_shadow_present_pte(*pos)) {
 			printk(KERN_ERR "%s: %p %llx\n", __func__,
 			       pos, *pos);
-- 
GitLab


From bb5c8abea09453d8b137e6613980b8e257485868 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Tue, 30 Aug 2022 15:52:09 -0700
Subject: [PATCH 1175/4122] KVM: x86: Insert "AMD" in KVM_X86_FEATURE_PSFD

Intel and AMD have separate CPUID bits for each SPEC_CTRL bit. In the
case of every bit other than PFSD, the Intel CPUID bit has no vendor
name qualifier, but the AMD CPUID bit does. For consistency, rename
KVM_X86_FEATURE_PSFD to KVM_X86_FEATURE_AMD_PSFD.

No functional change intended.

Signed-off-by: Jim Mattson <jmattson@google.com>
Cc: Babu Moger <Babu.Moger@amd.com>
Message-Id: <20220830225210.2381310-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 62bc7a01cecca..6b5912578eddc 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -62,7 +62,7 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
  * This one is tied to SSB in the user API, and not
  * visible in /proc/cpuinfo.
  */
-#define KVM_X86_FEATURE_PSFD		(13*32+28) /* Predictive Store Forwarding Disable */
+#define KVM_X86_FEATURE_AMD_PSFD	(13*32+28) /* Predictive Store Forwarding Disable */
 
 #define F feature_bit
 #define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
@@ -694,7 +694,7 @@ void kvm_set_cpu_caps(void)
 		F(CLZERO) | F(XSAVEERPTR) |
 		F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
 		F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON) |
-		__feature_bit(KVM_X86_FEATURE_PSFD)
+		__feature_bit(KVM_X86_FEATURE_AMD_PSFD)
 	);
 
 	/*
-- 
GitLab


From 00009406f0dbc53b95b9062c0cc297d6893ff394 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Thu, 20 Oct 2022 23:01:13 -0300
Subject: [PATCH 1176/4122] x86/kvm: Remove unused virt to phys translation in
 kvm_guest_cpu_init()

Presumably, this was introduced due to a conflict resolution with
commit ef68017eb570 ("x86/kvm: Handle async page faults directly through
do_page_fault()"), given that the last posted version [1] of the blamed
commit was not based on the aforementioned commit.

[1] https://lore.kernel.org/kvm/20200525144125.143875-9-vkuznets@redhat.com/

Fixes: b1d405751cd5 ("KVM: x86: Switch KVM guest to using interrupts for page ready APF delivery")
Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Message-Id: <20221021020113.922027-1-rafaelmendsr@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kernel/kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index d4e48b4a438b2..cf886f86038a0 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -349,7 +349,7 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
 static void kvm_guest_cpu_init(void)
 {
 	if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
-		u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
+		u64 pa;
 
 		WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
 
-- 
GitLab


From 07a368b3f55a79d33cad113d506b279e04fd2a00 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:27 +0300
Subject: [PATCH 1177/4122] bug: introduce ASSERT_STRUCT_OFFSET

ASSERT_STRUCT_OFFSET allows to assert during the build of
the kernel that a field in a struct have an expected offset.

KVM used to have such macro, but there is almost nothing KVM specific
in it so move it to build_bug.h, so that it can be used in other
places in KVM.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-10-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx/vmcs12.h | 5 ++---
 include/linux/build_bug.h | 9 +++++++++
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 746129ddd5ae0..01936013428b5 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -208,9 +208,8 @@ struct __packed vmcs12 {
 /*
  * For save/restore compatibility, the vmcs12 field offsets must not change.
  */
-#define CHECK_OFFSET(field, loc)				\
-	BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc),	\
-		"Offset of " #field " in struct vmcs12 has changed.")
+#define CHECK_OFFSET(field, loc) \
+	ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc)
 
 static inline void vmx_check_vmcs12_offsets(void)
 {
diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h
index e3a0be2c90ad9..3aa3640f8c181 100644
--- a/include/linux/build_bug.h
+++ b/include/linux/build_bug.h
@@ -77,4 +77,13 @@
 #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 
+
+/*
+ * Compile time check that field has an expected offset
+ */
+#define ASSERT_STRUCT_OFFSET(type, field, expected_offset)	\
+	BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset),	\
+		"Offset of " #field " in " #type " has changed.")
+
+
 #endif	/* _LINUX_BUILD_BUG_H */
-- 
GitLab


From d08b48585309247d4d28051dd7a315eef5d1db26 Mon Sep 17 00:00:00 2001
From: Carlos Bilbao <carlos.bilbao@amd.com>
Date: Mon, 24 Oct 2022 11:44:48 -0500
Subject: [PATCH 1178/4122] KVM: SVM: Name and check reserved fields with
 structs offset

Rename reserved fields on all structs in arch/x86/include/asm/svm.h
following their offset within the structs. Include compile time checks for
this in the same place where other BUILD_BUG_ON for the structs are.

This also solves that fields of struct sev_es_save_area are named by their
order of appearance, but right now they jump from reserved_5 to reserved_7.

Link: https://lkml.org/lkml/2022/10/22/376
Signed-off-by: Carlos Bilbao <carlos.bilbao@amd.com>
Message-Id: <20221024164448.203351-1-carlos.bilbao@amd.com>
[Use ASSERT_STRUCT_OFFSET + fix a couple wrong offsets. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/svm.h | 93 ++++++++++++++++++++++++++------------
 arch/x86/kvm/svm/sev.c     |  2 +-
 2 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 0361626841bc0..4352b46dd20c9 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -293,12 +293,13 @@ struct vmcb_save_area {
 	struct vmcb_seg ldtr;
 	struct vmcb_seg idtr;
 	struct vmcb_seg tr;
-	u8 reserved_1[42];
+	/* Reserved fields are named following their struct offset */
+	u8 reserved_0xa0[42];
 	u8 vmpl;
 	u8 cpl;
-	u8 reserved_2[4];
+	u8 reserved_0xcc[4];
 	u64 efer;
-	u8 reserved_3[112];
+	u8 reserved_0xd8[112];
 	u64 cr4;
 	u64 cr3;
 	u64 cr0;
@@ -306,7 +307,7 @@ struct vmcb_save_area {
 	u64 dr6;
 	u64 rflags;
 	u64 rip;
-	u8 reserved_4[88];
+	u8 reserved_0x180[88];
 	u64 rsp;
 	u64 s_cet;
 	u64 ssp;
@@ -321,14 +322,14 @@ struct vmcb_save_area {
 	u64 sysenter_esp;
 	u64 sysenter_eip;
 	u64 cr2;
-	u8 reserved_5[32];
+	u8 reserved_0x248[32];
 	u64 g_pat;
 	u64 dbgctl;
 	u64 br_from;
 	u64 br_to;
 	u64 last_excp_from;
 	u64 last_excp_to;
-	u8 reserved_6[72];
+	u8 reserved_0x298[72];
 	u32 spec_ctrl;		/* Guest version of SPEC_CTRL at 0x2E0 */
 } __packed;
 
@@ -349,12 +350,12 @@ struct sev_es_save_area {
 	u64 vmpl2_ssp;
 	u64 vmpl3_ssp;
 	u64 u_cet;
-	u8 reserved_1[2];
+	u8 reserved_0xc8[2];
 	u8 vmpl;
 	u8 cpl;
-	u8 reserved_2[4];
+	u8 reserved_0xcc[4];
 	u64 efer;
-	u8 reserved_3[104];
+	u8 reserved_0xd8[104];
 	u64 xss;
 	u64 cr4;
 	u64 cr3;
@@ -371,7 +372,7 @@ struct sev_es_save_area {
 	u64 dr1_addr_mask;
 	u64 dr2_addr_mask;
 	u64 dr3_addr_mask;
-	u8 reserved_4[24];
+	u8 reserved_0x1c0[24];
 	u64 rsp;
 	u64 s_cet;
 	u64 ssp;
@@ -386,21 +387,21 @@ struct sev_es_save_area {
 	u64 sysenter_esp;
 	u64 sysenter_eip;
 	u64 cr2;
-	u8 reserved_5[32];
+	u8 reserved_0x248[32];
 	u64 g_pat;
 	u64 dbgctl;
 	u64 br_from;
 	u64 br_to;
 	u64 last_excp_from;
 	u64 last_excp_to;
-	u8 reserved_7[80];
+	u8 reserved_0x298[80];
 	u32 pkru;
-	u8 reserved_8[20];
-	u64 reserved_9;		/* rax already available at 0x01f8 */
+	u32 tsc_aux;
+	u8 reserved_0x2f0[24];
 	u64 rcx;
 	u64 rdx;
 	u64 rbx;
-	u64 reserved_10;	/* rsp already available at 0x01d8 */
+	u64 reserved_0x320;	/* rsp already available at 0x01d8 */
 	u64 rbp;
 	u64 rsi;
 	u64 rdi;
@@ -412,7 +413,7 @@ struct sev_es_save_area {
 	u64 r13;
 	u64 r14;
 	u64 r15;
-	u8 reserved_11[16];
+	u8 reserved_0x380[16];
 	u64 guest_exit_info_1;
 	u64 guest_exit_info_2;
 	u64 guest_exit_int_info;
@@ -425,7 +426,7 @@ struct sev_es_save_area {
 	u64 pcpu_id;
 	u64 event_inj;
 	u64 xcr0;
-	u8 reserved_12[16];
+	u8 reserved_0x3f0[16];
 
 	/* Floating point area */
 	u64 x87_dp;
@@ -443,23 +444,23 @@ struct sev_es_save_area {
 } __packed;
 
 struct ghcb_save_area {
-	u8 reserved_1[203];
+	u8 reserved_0x0[203];
 	u8 cpl;
-	u8 reserved_2[116];
+	u8 reserved_0xcc[116];
 	u64 xss;
-	u8 reserved_3[24];
+	u8 reserved_0x148[24];
 	u64 dr7;
-	u8 reserved_4[16];
+	u8 reserved_0x168[16];
 	u64 rip;
-	u8 reserved_5[88];
+	u8 reserved_0x180[88];
 	u64 rsp;
-	u8 reserved_6[24];
+	u8 reserved_0x1e0[24];
 	u64 rax;
-	u8 reserved_7[264];
+	u8 reserved_0x200[264];
 	u64 rcx;
 	u64 rdx;
 	u64 rbx;
-	u8 reserved_8[8];
+	u8 reserved_0x320[8];
 	u64 rbp;
 	u64 rsi;
 	u64 rdi;
@@ -471,12 +472,12 @@ struct ghcb_save_area {
 	u64 r13;
 	u64 r14;
 	u64 r15;
-	u8 reserved_9[16];
+	u8 reserved_0x380[16];
 	u64 sw_exit_code;
 	u64 sw_exit_info_1;
 	u64 sw_exit_info_2;
 	u64 sw_scratch;
-	u8 reserved_10[56];
+	u8 reserved_0x3b0[56];
 	u64 xcr0;
 	u8 valid_bitmap[16];
 	u64 x87_state_gpa;
@@ -490,7 +491,7 @@ struct ghcb {
 
 	u8 shared_buffer[GHCB_SHARED_BUF_SIZE];
 
-	u8 reserved_1[10];
+	u8 reserved_0xff0[10];
 	u16 protocol_version;	/* negotiated SEV-ES/GHCB protocol version */
 	u32 ghcb_usage;
 } __packed;
@@ -502,6 +503,9 @@ struct ghcb {
 #define EXPECTED_VMCB_CONTROL_AREA_SIZE		1024
 #define EXPECTED_GHCB_SIZE			PAGE_SIZE
 
+#define BUILD_BUG_RESERVED_OFFSET(x, y) \
+	ASSERT_STRUCT_OFFSET(struct x, reserved ## _ ## y, y)
+
 static inline void __unused_size_checks(void)
 {
 	BUILD_BUG_ON(sizeof(struct vmcb_save_area)	!= EXPECTED_VMCB_SAVE_AREA_SIZE);
@@ -509,6 +513,39 @@ static inline void __unused_size_checks(void)
 	BUILD_BUG_ON(sizeof(struct sev_es_save_area)	!= EXPECTED_SEV_ES_SAVE_AREA_SIZE);
 	BUILD_BUG_ON(sizeof(struct vmcb_control_area)	!= EXPECTED_VMCB_CONTROL_AREA_SIZE);
 	BUILD_BUG_ON(sizeof(struct ghcb)		!= EXPECTED_GHCB_SIZE);
+
+	/* Check offsets of reserved fields */
+
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xa0);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0xd8);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x180);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x248);
+	BUILD_BUG_RESERVED_OFFSET(vmcb_save_area, 0x298);
+
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xc8);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0xd8);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x1c0);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x248);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x298);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x2f0);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x320);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x380);
+	BUILD_BUG_RESERVED_OFFSET(sev_es_save_area, 0x3f0);
+
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x0);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0xcc);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x148);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x168);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x180);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x1e0);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x200);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x320);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x380);
+	BUILD_BUG_RESERVED_OFFSET(ghcb_save_area, 0x3b0);
+
+	BUILD_BUG_RESERVED_OFFSET(ghcb, 0xff0);
 }
 
 struct vmcb {
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index efaaef2b7ae11..69dbf17f0d6ad 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -2648,7 +2648,7 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
 		ghcb_scratch_beg = control->ghcb_gpa +
 				   offsetof(struct ghcb, shared_buffer);
 		ghcb_scratch_end = control->ghcb_gpa +
-				   offsetof(struct ghcb, reserved_1);
+				   offsetof(struct ghcb, reserved_0xff0);
 
 		/*
 		 * If the scratch area begins within the GHCB, it must be
-- 
GitLab


From b0b42197b5c6f0d9447e5b710d64c671be8deec1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:09 -0400
Subject: [PATCH 1179/4122] KVM: x86: start moving SMM-related functions to new
 files

Create a new header and source with code related to system management
mode emulation.  Entry and exit will move there too; for now,
opportunistically rename put_smstate to PUT_SMSTATE while moving
it to smm.h, and adjust the SMM state saving code.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-2-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   6 --
 arch/x86/kvm/Makefile           |   1 +
 arch/x86/kvm/emulate.c          |   1 +
 arch/x86/kvm/kvm_cache_regs.h   |   5 --
 arch/x86/kvm/lapic.c            |   8 +-
 arch/x86/kvm/lapic.h            |   2 +-
 arch/x86/kvm/mmu/mmu.c          |   1 +
 arch/x86/kvm/smm.c              |  37 ++++++++
 arch/x86/kvm/smm.h              |  25 ++++++
 arch/x86/kvm/svm/nested.c       |   1 +
 arch/x86/kvm/svm/svm.c          |   5 +-
 arch/x86/kvm/vmx/nested.c       |   1 +
 arch/x86/kvm/vmx/vmx.c          |   1 +
 arch/x86/kvm/x86.c              | 148 ++++++++++++--------------------
 14 files changed, 132 insertions(+), 110 deletions(-)
 create mode 100644 arch/x86/kvm/smm.c
 create mode 100644 arch/x86/kvm/smm.h

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 1723a357190d6..c70e84e69cca6 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2087,12 +2087,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #endif
 }
 
-#define put_smstate(type, buf, offset, val)                      \
-	*(type *)((buf) + (offset) - 0x7e00) = val
-
-#define GET_SMSTATE(type, buf, offset)		\
-	(*(type *)((buf) + (offset) - 0x7e00))
-
 int kvm_cpu_dirty_log_size(void);
 
 int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index f453a0f96e243..b584cb0e06bd8 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,6 +20,7 @@ endif
 
 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
 kvm-$(CONFIG_KVM_XEN)	+= xen.o
+kvm-y			+= smm.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4a43261d25a2a..eea29aac83bd8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,6 +30,7 @@
 #include "tss.h"
 #include "mmu.h"
 #include "pmu.h"
+#include "smm.h"
 
 /*
  * Operand types
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 3febc342360cc..c09174f73a344 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -200,9 +200,4 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
 	return vcpu->arch.hflags & HF_GUEST_MASK;
 }
 
-static inline bool is_smm(struct kvm_vcpu *vcpu)
-{
-	return vcpu->arch.hflags & HF_SMM_MASK;
-}
-
 #endif
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index d7639d126e6c7..1bb63746e9910 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -42,6 +42,7 @@
 #include "x86.h"
 #include "cpuid.h"
 #include "hyperv.h"
+#include "smm.h"
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -1170,9 +1171,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
 		break;
 
 	case APIC_DM_SMI:
-		result = 1;
-		kvm_make_request(KVM_REQ_SMI, vcpu);
-		kvm_vcpu_kick(vcpu);
+		if (!kvm_inject_smi(vcpu)) {
+			kvm_vcpu_kick(vcpu);
+			result = 1;
+		}
 		break;
 
 	case APIC_DM_NMI:
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index a5ac4a5a51792..28e3769066e21 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -7,7 +7,7 @@
 #include <linux/kvm_host.h>
 
 #include "hyperv.h"
-#include "kvm_cache_regs.h"
+#include "smm.h"
 
 #define KVM_APIC_INIT		0
 #define KVM_APIC_SIPI		1
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 2640871bdcf17..f8c92a4a35faa 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -22,6 +22,7 @@
 #include "tdp_mmu.h"
 #include "x86.h"
 #include "kvm_cache_regs.h"
+#include "smm.h"
 #include "kvm_emulate.h"
 #include "cpuid.h"
 #include "spte.h"
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
new file mode 100644
index 0000000000000..b91c48d91f6ed
--- /dev/null
+++ b/arch/x86/kvm/smm.c
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <linux/kvm_host.h>
+#include "x86.h"
+#include "kvm_cache_regs.h"
+#include "kvm_emulate.h"
+#include "smm.h"
+#include "trace.h"
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
+{
+	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
+
+	if (entering_smm) {
+		vcpu->arch.hflags |= HF_SMM_MASK;
+	} else {
+		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
+
+		/* Process a latched INIT or SMI, if any.  */
+		kvm_make_request(KVM_REQ_EVENT, vcpu);
+
+		/*
+		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
+		 * on SMM exit we still need to reload them from
+		 * guest memory
+		 */
+		vcpu->arch.pdptrs_from_userspace = false;
+	}
+
+	kvm_mmu_reset_context(vcpu);
+}
+
+void process_smi(struct kvm_vcpu *vcpu)
+{
+	vcpu->arch.smi_pending = true;
+	kvm_make_request(KVM_REQ_EVENT, vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
new file mode 100644
index 0000000000000..d85d4ccd32dd1
--- /dev/null
+++ b/arch/x86/kvm/smm.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef ASM_KVM_SMM_H
+#define ASM_KVM_SMM_H
+
+#define GET_SMSTATE(type, buf, offset)		\
+	(*(type *)((buf) + (offset) - 0x7e00))
+
+#define PUT_SMSTATE(type, buf, offset, val)                      \
+	*(type *)((buf) + (offset) - 0x7e00) = val
+
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
+{
+	kvm_make_request(KVM_REQ_SMI, vcpu);
+	return 0;
+}
+
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
+void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void process_smi(struct kvm_vcpu *vcpu);
+
+#endif
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 4c620999d230a..cc0fd75f7cbab 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -25,6 +25,7 @@
 #include "trace.h"
 #include "mmu.h"
 #include "x86.h"
+#include "smm.h"
 #include "cpuid.h"
 #include "lapic.h"
 #include "svm.h"
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3fc8e4999891a..3bb07ec789859 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -6,6 +6,7 @@
 #include "mmu.h"
 #include "kvm_cache_regs.h"
 #include "x86.h"
+#include "smm.h"
 #include "cpuid.h"
 #include "pmu.h"
 
@@ -4407,9 +4408,9 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 		return 0;
 
 	/* FED8h - SVM Guest */
-	put_smstate(u64, smstate, 0x7ed8, 1);
+	PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
 	/* FEE0h - SVM Guest VMCB Physical Address */
-	put_smstate(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+	PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
 
 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 0c62352dda6ab..61a2e551640a0 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -16,6 +16,7 @@
 #include "trace.h"
 #include "vmx.h"
 #include "x86.h"
+#include "smm.h"
 
 static bool __read_mostly enable_shadow_vmcs = 1;
 module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7a7e14d4d78c2..49065614a3dbb 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -66,6 +66,7 @@
 #include "vmcs12.h"
 #include "vmx.h"
 #include "x86.h"
+#include "smm.h"
 
 MODULE_AUTHOR("Qumranet");
 MODULE_LICENSE("GPL");
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3c63ba5512e9a..d936b0f15d7da 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -30,6 +30,7 @@
 #include "hyperv.h"
 #include "lapic.h"
 #include "xen.h"
+#include "smm.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
@@ -119,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
-static void process_smi(struct kvm_vcpu *vcpu);
 static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
@@ -4889,13 +4889,6 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
-{
-	kvm_make_request(KVM_REQ_SMI, vcpu);
-
-	return 0;
-}
-
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -5118,8 +5111,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm);
-
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 					      struct kvm_vcpu_events *events)
 {
@@ -5572,7 +5563,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		break;
 	}
 	case KVM_SMI: {
-		r = kvm_vcpu_ioctl_smi(vcpu);
+		r = kvm_inject_smi(vcpu);
 		break;
 	}
 	case KVM_SET_CPUID: {
@@ -8569,29 +8560,6 @@ static bool retry_instruction(struct x86_emulate_ctxt *ctxt,
 static int complete_emulated_mmio(struct kvm_vcpu *vcpu);
 static int complete_emulated_pio(struct kvm_vcpu *vcpu);
 
-static void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
-{
-	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
-
-	if (entering_smm) {
-		vcpu->arch.hflags |= HF_SMM_MASK;
-	} else {
-		vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
-
-		/* Process a latched INIT or SMI, if any.  */
-		kvm_make_request(KVM_REQ_EVENT, vcpu);
-
-		/*
-		 * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
-		 * on SMM exit we still need to reload them from
-		 * guest memory
-		 */
-		vcpu->arch.pdptrs_from_userspace = false;
-	}
-
-	kvm_mmu_reset_context(vcpu);
-}
-
 static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
 				unsigned long *db)
 {
@@ -10088,16 +10056,16 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
 	int offset;
 
 	kvm_get_segment(vcpu, &seg, n);
-	put_smstate(u32, buf, 0x7fa8 + n * 4, seg.selector);
+	PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
 
 	if (n < 3)
 		offset = 0x7f84 + n * 12;
 	else
 		offset = 0x7f2c + (n - 3) * 12;
 
-	put_smstate(u32, buf, offset + 8, seg.base);
-	put_smstate(u32, buf, offset + 4, seg.limit);
-	put_smstate(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+	PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+	PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
 }
 
 #ifdef CONFIG_X86_64
@@ -10111,10 +10079,10 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 	offset = 0x7e00 + n * 16;
 
 	flags = enter_smm_get_segment_flags(&seg) >> 8;
-	put_smstate(u16, buf, offset, seg.selector);
-	put_smstate(u16, buf, offset + 2, flags);
-	put_smstate(u32, buf, offset + 4, seg.limit);
-	put_smstate(u64, buf, offset + 8, seg.base);
+	PUT_SMSTATE(u16, buf, offset, seg.selector);
+	PUT_SMSTATE(u16, buf, offset + 2, flags);
+	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+	PUT_SMSTATE(u64, buf, offset + 8, seg.base);
 }
 #endif
 
@@ -10125,47 +10093,47 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
 	unsigned long val;
 	int i;
 
-	put_smstate(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
-	put_smstate(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
-	put_smstate(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
-	put_smstate(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
 
 	for (i = 0; i < 8; i++)
-		put_smstate(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+		PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
 
 	kvm_get_dr(vcpu, 6, &val);
-	put_smstate(u32, buf, 0x7fcc, (u32)val);
+	PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
 	kvm_get_dr(vcpu, 7, &val);
-	put_smstate(u32, buf, 0x7fc8, (u32)val);
+	PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	put_smstate(u32, buf, 0x7fc4, seg.selector);
-	put_smstate(u32, buf, 0x7f64, seg.base);
-	put_smstate(u32, buf, 0x7f60, seg.limit);
-	put_smstate(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+	PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+	PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+	PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+	PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	put_smstate(u32, buf, 0x7fc0, seg.selector);
-	put_smstate(u32, buf, 0x7f80, seg.base);
-	put_smstate(u32, buf, 0x7f7c, seg.limit);
-	put_smstate(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+	PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+	PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+	PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+	PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
 
 	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	put_smstate(u32, buf, 0x7f74, dt.address);
-	put_smstate(u32, buf, 0x7f70, dt.size);
+	PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
 
 	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	put_smstate(u32, buf, 0x7f58, dt.address);
-	put_smstate(u32, buf, 0x7f54, dt.size);
+	PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
 
 	for (i = 0; i < 6; i++)
 		enter_smm_save_seg_32(vcpu, buf, i);
 
-	put_smstate(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
 
 	/* revision id */
-	put_smstate(u32, buf, 0x7efc, 0x00020000);
-	put_smstate(u32, buf, 0x7ef8, vcpu->arch.smbase);
+	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+	PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
 }
 
 #ifdef CONFIG_X86_64
@@ -10177,46 +10145,46 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
 	int i;
 
 	for (i = 0; i < 16; i++)
-		put_smstate(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+		PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
 
-	put_smstate(u64, buf, 0x7f78, kvm_rip_read(vcpu));
-	put_smstate(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
 
 	kvm_get_dr(vcpu, 6, &val);
-	put_smstate(u64, buf, 0x7f68, val);
+	PUT_SMSTATE(u64, buf, 0x7f68, val);
 	kvm_get_dr(vcpu, 7, &val);
-	put_smstate(u64, buf, 0x7f60, val);
+	PUT_SMSTATE(u64, buf, 0x7f60, val);
 
-	put_smstate(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
-	put_smstate(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
-	put_smstate(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
 
-	put_smstate(u32, buf, 0x7f00, vcpu->arch.smbase);
+	PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
 
 	/* revision id */
-	put_smstate(u32, buf, 0x7efc, 0x00020064);
+	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
 
-	put_smstate(u64, buf, 0x7ed0, vcpu->arch.efer);
+	PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	put_smstate(u16, buf, 0x7e90, seg.selector);
-	put_smstate(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
-	put_smstate(u32, buf, 0x7e94, seg.limit);
-	put_smstate(u64, buf, 0x7e98, seg.base);
+	PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+	PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+	PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+	PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
 
 	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	put_smstate(u32, buf, 0x7e84, dt.size);
-	put_smstate(u64, buf, 0x7e88, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+	PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
 
 	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	put_smstate(u16, buf, 0x7e70, seg.selector);
-	put_smstate(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
-	put_smstate(u32, buf, 0x7e74, seg.limit);
-	put_smstate(u64, buf, 0x7e78, seg.base);
+	PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+	PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+	PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+	PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
 
 	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	put_smstate(u32, buf, 0x7e64, dt.size);
-	put_smstate(u64, buf, 0x7e68, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+	PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
 
 	for (i = 0; i < 6; i++)
 		enter_smm_save_seg_64(vcpu, buf, i);
@@ -10302,12 +10270,6 @@ static void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 }
 
-static void process_smi(struct kvm_vcpu *vcpu)
-{
-	vcpu->arch.smi_pending = true;
-	kvm_make_request(KVM_REQ_EVENT, vcpu);
-}
-
 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
 				       unsigned long *vcpu_bitmap)
 {
-- 
GitLab


From c53da4f3af6e613e82f88abc6eb988e44c5dadd7 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:10 -0400
Subject: [PATCH 1180/4122] KVM: x86: move SMM entry to a new file

Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode.  In preparation for
that, move the SMM entry code out of x86.c and into a new file.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-3-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   1 +
 arch/x86/kvm/smm.c              | 235 +++++++++++++++++++++++++++++++
 arch/x86/kvm/smm.h              |   1 +
 arch/x86/kvm/x86.c              | 239 +-------------------------------
 4 files changed, 239 insertions(+), 237 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index c70e84e69cca6..612ef60631c1f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1844,6 +1844,7 @@ int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu);
 int kvm_emulate_wbinvd(struct kvm_vcpu *vcpu);
 
 void kvm_get_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
+void kvm_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg);
 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector, int seg);
 void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index b91c48d91f6ed..26a6859e421fe 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -5,6 +5,7 @@
 #include "kvm_cache_regs.h"
 #include "kvm_emulate.h"
 #include "smm.h"
+#include "cpuid.h"
 #include "trace.h"
 
 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
@@ -35,3 +36,237 @@ void process_smi(struct kvm_vcpu *vcpu)
 	vcpu->arch.smi_pending = true;
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
+
+static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
+{
+	u32 flags = 0;
+	flags |= seg->g       << 23;
+	flags |= seg->db      << 22;
+	flags |= seg->l       << 21;
+	flags |= seg->avl     << 20;
+	flags |= seg->present << 15;
+	flags |= seg->dpl     << 13;
+	flags |= seg->s       << 12;
+	flags |= seg->type    << 8;
+	return flags;
+}
+
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+	struct kvm_segment seg;
+	int offset;
+
+	kvm_get_segment(vcpu, &seg, n);
+	PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
+
+	if (n < 3)
+		offset = 0x7f84 + n * 12;
+	else
+		offset = 0x7f2c + (n - 3) * 12;
+
+	PUT_SMSTATE(u32, buf, offset + 8, seg.base);
+	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+	PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+{
+	struct kvm_segment seg;
+	int offset;
+	u16 flags;
+
+	kvm_get_segment(vcpu, &seg, n);
+	offset = 0x7e00 + n * 16;
+
+	flags = enter_smm_get_segment_flags(&seg) >> 8;
+	PUT_SMSTATE(u16, buf, offset, seg.selector);
+	PUT_SMSTATE(u16, buf, offset + 2, flags);
+	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
+	PUT_SMSTATE(u64, buf, offset + 8, seg.base);
+}
+#endif
+
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+{
+	struct desc_ptr dt;
+	struct kvm_segment seg;
+	unsigned long val;
+	int i;
+
+	PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+
+	for (i = 0; i < 8; i++)
+		PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+
+	kvm_get_dr(vcpu, 6, &val);
+	PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
+	kvm_get_dr(vcpu, 7, &val);
+	PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
+
+	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+	PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
+	PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
+	PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
+	PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
+
+	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+	PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
+	PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
+	PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
+	PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+
+	static_call(kvm_x86_get_gdt)(vcpu, &dt);
+	PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
+
+	static_call(kvm_x86_get_idt)(vcpu, &dt);
+	PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
+	PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
+
+	for (i = 0; i < 6; i++)
+		enter_smm_save_seg_32(vcpu, buf, i);
+
+	PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+
+	/* revision id */
+	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
+	PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
+}
+
+#ifdef CONFIG_X86_64
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+{
+	struct desc_ptr dt;
+	struct kvm_segment seg;
+	unsigned long val;
+	int i;
+
+	for (i = 0; i < 16; i++)
+		PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+
+	PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
+	PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
+
+	kvm_get_dr(vcpu, 6, &val);
+	PUT_SMSTATE(u64, buf, 0x7f68, val);
+	kvm_get_dr(vcpu, 7, &val);
+	PUT_SMSTATE(u64, buf, 0x7f60, val);
+
+	PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
+	PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+
+	PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
+
+	/* revision id */
+	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
+
+	PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
+
+	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
+	PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
+	PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
+	PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
+	PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
+
+	static_call(kvm_x86_get_idt)(vcpu, &dt);
+	PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
+	PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
+
+	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
+	PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
+	PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
+	PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
+	PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
+
+	static_call(kvm_x86_get_gdt)(vcpu, &dt);
+	PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
+	PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
+
+	for (i = 0; i < 6; i++)
+		enter_smm_save_seg_64(vcpu, buf, i);
+}
+#endif
+
+void enter_smm(struct kvm_vcpu *vcpu)
+{
+	struct kvm_segment cs, ds;
+	struct desc_ptr dt;
+	unsigned long cr0;
+	char buf[512];
+
+	memset(buf, 0, 512);
+#ifdef CONFIG_X86_64
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+		enter_smm_save_state_64(vcpu, buf);
+	else
+#endif
+		enter_smm_save_state_32(vcpu, buf);
+
+	/*
+	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
+	 * state (e.g. leave guest mode) after we've saved the state into the
+	 * SMM state-save area.
+	 */
+	static_call(kvm_x86_enter_smm)(vcpu, buf);
+
+	kvm_smm_changed(vcpu, true);
+	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
+		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+	else
+		static_call(kvm_x86_set_nmi_mask)(vcpu, true);
+
+	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
+	kvm_rip_write(vcpu, 0x8000);
+
+	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
+	static_call(kvm_x86_set_cr0)(vcpu, cr0);
+	vcpu->arch.cr0 = cr0;
+
+	static_call(kvm_x86_set_cr4)(vcpu, 0);
+
+	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
+	dt.address = dt.size = 0;
+	static_call(kvm_x86_set_idt)(vcpu, &dt);
+
+	kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+
+	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
+	cs.base = vcpu->arch.smbase;
+
+	ds.selector = 0;
+	ds.base = 0;
+
+	cs.limit    = ds.limit = 0xffffffff;
+	cs.type     = ds.type = 0x3;
+	cs.dpl      = ds.dpl = 0;
+	cs.db       = ds.db = 0;
+	cs.s        = ds.s = 1;
+	cs.l        = ds.l = 0;
+	cs.g        = ds.g = 1;
+	cs.avl      = ds.avl = 0;
+	cs.present  = ds.present = 1;
+	cs.unusable = ds.unusable = 0;
+	cs.padding  = ds.padding = 0;
+
+	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
+	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
+	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
+	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
+	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
+	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
+
+#ifdef CONFIG_X86_64
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
+		static_call(kvm_x86_set_efer)(vcpu, 0);
+#endif
+
+	kvm_update_cpuid_runtime(vcpu);
+	kvm_mmu_reset_context(vcpu);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index d85d4ccd32dd1..aacc6dac2c99a 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -20,6 +20,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
 }
 
 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
+void enter_smm(struct kvm_vcpu *vcpu);
 void process_smi(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d936b0f15d7da..0730b16564f99 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -120,7 +120,6 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
 
 static void update_cr8_intercept(struct kvm_vcpu *vcpu);
 static void process_nmi(struct kvm_vcpu *vcpu);
-static void enter_smm(struct kvm_vcpu *vcpu);
 static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
 static void store_regs(struct kvm_vcpu *vcpu);
 static int sync_regs(struct kvm_vcpu *vcpu);
@@ -7108,8 +7107,8 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
 	return handled;
 }
 
-static void kvm_set_segment(struct kvm_vcpu *vcpu,
-			struct kvm_segment *var, int seg)
+void kvm_set_segment(struct kvm_vcpu *vcpu,
+		     struct kvm_segment *var, int seg)
 {
 	static_call(kvm_x86_set_segment)(vcpu, var, seg);
 }
@@ -10036,240 +10035,6 @@ static void process_nmi(struct kvm_vcpu *vcpu)
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 }
 
-static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
-{
-	u32 flags = 0;
-	flags |= seg->g       << 23;
-	flags |= seg->db      << 22;
-	flags |= seg->l       << 21;
-	flags |= seg->avl     << 20;
-	flags |= seg->present << 15;
-	flags |= seg->dpl     << 13;
-	flags |= seg->s       << 12;
-	flags |= seg->type    << 8;
-	return flags;
-}
-
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
-{
-	struct kvm_segment seg;
-	int offset;
-
-	kvm_get_segment(vcpu, &seg, n);
-	PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
-	if (n < 3)
-		offset = 0x7f84 + n * 12;
-	else
-		offset = 0x7f2c + (n - 3) * 12;
-
-	PUT_SMSTATE(u32, buf, offset + 8, seg.base);
-	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
-	PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
-{
-	struct kvm_segment seg;
-	int offset;
-	u16 flags;
-
-	kvm_get_segment(vcpu, &seg, n);
-	offset = 0x7e00 + n * 16;
-
-	flags = enter_smm_get_segment_flags(&seg) >> 8;
-	PUT_SMSTATE(u16, buf, offset, seg.selector);
-	PUT_SMSTATE(u16, buf, offset + 2, flags);
-	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
-	PUT_SMSTATE(u64, buf, offset + 8, seg.base);
-}
-#endif
-
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
-{
-	struct desc_ptr dt;
-	struct kvm_segment seg;
-	unsigned long val;
-	int i;
-
-	PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
-
-	for (i = 0; i < 8; i++)
-		PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
-
-	kvm_get_dr(vcpu, 6, &val);
-	PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
-	kvm_get_dr(vcpu, 7, &val);
-	PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
-
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
-	PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
-	PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
-	PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
-	PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
-	PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
-	PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
-
-	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
-	PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
-
-	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
-	PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
-
-	for (i = 0; i < 6; i++)
-		enter_smm_save_seg_32(vcpu, buf, i);
-
-	PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
-
-	/* revision id */
-	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
-	PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
-}
-
-#ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
-{
-	struct desc_ptr dt;
-	struct kvm_segment seg;
-	unsigned long val;
-	int i;
-
-	for (i = 0; i < 16; i++)
-		PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
-
-	PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
-
-	kvm_get_dr(vcpu, 6, &val);
-	PUT_SMSTATE(u64, buf, 0x7f68, val);
-	kvm_get_dr(vcpu, 7, &val);
-	PUT_SMSTATE(u64, buf, 0x7f60, val);
-
-	PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
-	PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
-	PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
-
-	PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
-
-	/* revision id */
-	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
-
-	PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
-
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
-	PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
-	PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
-	PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
-
-	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
-	PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
-
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
-	PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
-	PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
-	PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
-
-	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
-	PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
-
-	for (i = 0; i < 6; i++)
-		enter_smm_save_seg_64(vcpu, buf, i);
-}
-#endif
-
-static void enter_smm(struct kvm_vcpu *vcpu)
-{
-	struct kvm_segment cs, ds;
-	struct desc_ptr dt;
-	unsigned long cr0;
-	char buf[512];
-
-	memset(buf, 0, 512);
-#ifdef CONFIG_X86_64
-	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		enter_smm_save_state_64(vcpu, buf);
-	else
-#endif
-		enter_smm_save_state_32(vcpu, buf);
-
-	/*
-	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
-	 * state (e.g. leave guest mode) after we've saved the state into the
-	 * SMM state-save area.
-	 */
-	static_call(kvm_x86_enter_smm)(vcpu, buf);
-
-	kvm_smm_changed(vcpu, true);
-	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
-
-	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
-		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
-	else
-		static_call(kvm_x86_set_nmi_mask)(vcpu, true);
-
-	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-	kvm_rip_write(vcpu, 0x8000);
-
-	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
-	static_call(kvm_x86_set_cr0)(vcpu, cr0);
-	vcpu->arch.cr0 = cr0;
-
-	static_call(kvm_x86_set_cr4)(vcpu, 0);
-
-	/* Undocumented: IDT limit is set to zero on entry to SMM.  */
-	dt.address = dt.size = 0;
-	static_call(kvm_x86_set_idt)(vcpu, &dt);
-
-	kvm_set_dr(vcpu, 7, DR7_FIXED_1);
-
-	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
-	cs.base = vcpu->arch.smbase;
-
-	ds.selector = 0;
-	ds.base = 0;
-
-	cs.limit    = ds.limit = 0xffffffff;
-	cs.type     = ds.type = 0x3;
-	cs.dpl      = ds.dpl = 0;
-	cs.db       = ds.db = 0;
-	cs.s        = ds.s = 1;
-	cs.l        = ds.l = 0;
-	cs.g        = ds.g = 1;
-	cs.avl      = ds.avl = 0;
-	cs.present  = ds.present = 1;
-	cs.unusable = ds.unusable = 0;
-	cs.padding  = ds.padding = 0;
-
-	kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
-	kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
-	kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
-	kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
-	kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
-	kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
-
-#ifdef CONFIG_X86_64
-	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		static_call(kvm_x86_set_efer)(vcpu, 0);
-#endif
-
-	kvm_update_cpuid_runtime(vcpu);
-	kvm_mmu_reset_context(vcpu);
-}
-
 void kvm_make_scan_ioapic_request_mask(struct kvm *kvm,
 				       unsigned long *vcpu_bitmap)
 {
-- 
GitLab


From f1554150d3c694e30e92c681c20ce9714cac3d42 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 28 Oct 2022 06:01:26 -0400
Subject: [PATCH 1181/4122] KVM: x86: move SMM exit to a new file

Some users of KVM implement the UEFI variable store through a paravirtual
device that does not require the "SMM lockbox" component of edk2, and
would like to compile out system management mode.  In preparation for
that, move the SMM exit code out of emulate.c and into a new file.

The code is still written as a series of invocations of the emulator
callbacks, but the two exiting_smm and leave_smm callbacks are merged
into one, and all the code from em_rsm is now part of the callback.
This removes all knowledge of the format of the SMM save state area
from the emulator.  Further patches will clean up the code and
invoke KVM's own functions to access control registers, descriptor
caches, etc.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-4-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/emulate.c     | 356 +------------------------------------
 arch/x86/kvm/kvm_emulate.h |  34 +++-
 arch/x86/kvm/smm.c         | 316 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/smm.h         |   1 +
 arch/x86/kvm/x86.c         |  14 --
 5 files changed, 351 insertions(+), 370 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index eea29aac83bd8..5cc3efa0e21c1 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -30,7 +30,6 @@
 #include "tss.h"
 #include "mmu.h"
 #include "pmu.h"
-#include "smm.h"
 
 /*
  * Operand types
@@ -243,37 +242,6 @@ enum x86_transfer_type {
 	X86_TRANSFER_TASK_SWITCH,
 };
 
-static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
-	if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
-		nr &= NR_EMULATOR_GPRS - 1;
-
-	if (!(ctxt->regs_valid & (1 << nr))) {
-		ctxt->regs_valid |= 1 << nr;
-		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
-	}
-	return ctxt->_regs[nr];
-}
-
-static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
-	if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
-		nr &= NR_EMULATOR_GPRS - 1;
-
-	BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
-	BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
-
-	ctxt->regs_valid |= 1 << nr;
-	ctxt->regs_dirty |= 1 << nr;
-	return &ctxt->_regs[nr];
-}
-
-static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
-{
-	reg_read(ctxt, nr);
-	return reg_write(ctxt, nr);
-}
-
 static void writeback_registers(struct x86_emulate_ctxt *ctxt)
 {
 	unsigned long dirty = ctxt->regs_dirty;
@@ -2339,335 +2307,15 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
 	return rc;
 }
 
-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
-	return ctxt->ops->guest_has_long_mode(ctxt);
-#else
-	return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
-{
-	desc->g    = (flags >> 23) & 1;
-	desc->d    = (flags >> 22) & 1;
-	desc->l    = (flags >> 21) & 1;
-	desc->avl  = (flags >> 20) & 1;
-	desc->p    = (flags >> 15) & 1;
-	desc->dpl  = (flags >> 13) & 3;
-	desc->s    = (flags >> 12) & 1;
-	desc->type = (flags >>  8) & 15;
-}
-
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
-			   int n)
-{
-	struct desc_struct desc;
-	int offset;
-	u16 selector;
-
-	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-
-	if (n < 3)
-		offset = 0x7f84 + n * 12;
-	else
-		offset = 0x7f2c + (n - 3) * 12;
-
-	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
-	return X86EMUL_CONTINUE;
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
-			   int n)
-{
-	struct desc_struct desc;
-	int offset;
-	u16 selector;
-	u32 base3;
-
-	offset = 0x7e00 + n * 16;
-
-	selector =                GET_SMSTATE(u16, smstate, offset);
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
-	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
-	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
-
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
-	return X86EMUL_CONTINUE;
-}
-#endif
-
-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
-				    u64 cr0, u64 cr3, u64 cr4)
-{
-	int bad;
-	u64 pcid;
-
-	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
-	pcid = 0;
-	if (cr4 & X86_CR4_PCIDE) {
-		pcid = cr3 & 0xfff;
-		cr3 &= ~0xfff;
-	}
-
-	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
-	if (bad)
-		return X86EMUL_UNHANDLEABLE;
-
-	/*
-	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
-	 * Then enable protected mode.	However, PCID cannot be enabled
-	 * if EFER.LMA=0, so set it separately.
-	 */
-	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
-	if (bad)
-		return X86EMUL_UNHANDLEABLE;
-
-	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
-	if (bad)
-		return X86EMUL_UNHANDLEABLE;
-
-	if (cr4 & X86_CR4_PCIDE) {
-		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
-		if (bad)
-			return X86EMUL_UNHANDLEABLE;
-		if (pcid) {
-			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
-			if (bad)
-				return X86EMUL_UNHANDLEABLE;
-		}
-
-	}
-
-	return X86EMUL_CONTINUE;
-}
-
-static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
-			     const char *smstate)
-{
-	struct desc_struct desc;
-	struct desc_ptr dt;
-	u16 selector;
-	u32 val, cr0, cr3, cr4;
-	int i;
-
-	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
-	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
-	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
-	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
-
-	for (i = 0; i < 8; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
-	val = GET_SMSTATE(u32, smstate, 0x7fcc);
-
-	if (ctxt->ops->set_dr(ctxt, 6, val))
-		return X86EMUL_UNHANDLEABLE;
-
-	val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
-	if (ctxt->ops->set_dr(ctxt, 7, val))
-		return X86EMUL_UNHANDLEABLE;
-
-	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
-
-	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
-
-	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
-	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
-	ctxt->ops->set_gdt(ctxt, &dt);
-
-	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
-	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
-	ctxt->ops->set_idt(ctxt, &dt);
-
-	for (i = 0; i < 6; i++) {
-		int r = rsm_load_seg_32(ctxt, smstate, i);
-		if (r != X86EMUL_CONTINUE)
-			return r;
-	}
-
-	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
-
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
-
-	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-}
-
-#ifdef CONFIG_X86_64
-static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
-			     const char *smstate)
-{
-	struct desc_struct desc;
-	struct desc_ptr dt;
-	u64 val, cr0, cr3, cr4;
-	u32 base3;
-	u16 selector;
-	int i, r;
-
-	for (i = 0; i < 16; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-
-	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
-	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
-
-	val = GET_SMSTATE(u64, smstate, 0x7f68);
-
-	if (ctxt->ops->set_dr(ctxt, 6, val))
-		return X86EMUL_UNHANDLEABLE;
-
-	val = GET_SMSTATE(u64, smstate, 0x7f60);
-
-	if (ctxt->ops->set_dr(ctxt, 7, val))
-		return X86EMUL_UNHANDLEABLE;
-
-	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
-	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
-	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
-	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
-
-	if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
-		return X86EMUL_UNHANDLEABLE;
-
-	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
-	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
-
-	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
-	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
-	ctxt->ops->set_idt(ctxt, &dt);
-
-	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
-	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
-
-	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
-	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
-	ctxt->ops->set_gdt(ctxt, &dt);
-
-	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
-	if (r != X86EMUL_CONTINUE)
-		return r;
-
-	for (i = 0; i < 6; i++) {
-		r = rsm_load_seg_64(ctxt, smstate, i);
-		if (r != X86EMUL_CONTINUE)
-			return r;
-	}
-
-	return X86EMUL_CONTINUE;
-}
-#endif
-
 static int em_rsm(struct x86_emulate_ctxt *ctxt)
 {
-	unsigned long cr0, cr4, efer;
-	char buf[512];
-	u64 smbase;
-	int ret;
-
 	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
 		return emulate_ud(ctxt);
 
-	smbase = ctxt->ops->get_smbase(ctxt);
-
-	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
-	if (ret != X86EMUL_CONTINUE)
-		return X86EMUL_UNHANDLEABLE;
-
-	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
-		ctxt->ops->set_nmi_mask(ctxt, false);
-
-	ctxt->ops->exiting_smm(ctxt);
-
-	/*
-	 * Get back to real mode, to prepare a safe state in which to load
-	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
-	 * supports long mode.
-	 */
-	if (emulator_has_longmode(ctxt)) {
-		struct desc_struct cs_desc;
-
-		/* Zero CR4.PCIDE before CR0.PG.  */
-		cr4 = ctxt->ops->get_cr(ctxt, 4);
-		if (cr4 & X86_CR4_PCIDE)
-			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
-
-		/* A 32-bit code segment is required to clear EFER.LMA.  */
-		memset(&cs_desc, 0, sizeof(cs_desc));
-		cs_desc.type = 0xb;
-		cs_desc.s = cs_desc.g = cs_desc.p = 1;
-		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
-	}
-
-	/* For the 64-bit case, this will clear EFER.LMA.  */
-	cr0 = ctxt->ops->get_cr(ctxt, 0);
-	if (cr0 & X86_CR0_PE)
-		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
-
-	if (emulator_has_longmode(ctxt)) {
-		/* Clear CR4.PAE before clearing EFER.LME. */
-		cr4 = ctxt->ops->get_cr(ctxt, 4);
-		if (cr4 & X86_CR4_PAE)
-			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
-
-		/* And finally go back to 32-bit mode.  */
-		efer = 0;
-		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
-	}
-
-	/*
-	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
-	 * state (e.g. enter guest mode) before loading state from the SMM
-	 * state-save area.
-	 */
-	if (ctxt->ops->leave_smm(ctxt, buf))
-		goto emulate_shutdown;
-
-#ifdef CONFIG_X86_64
-	if (emulator_has_longmode(ctxt))
-		ret = rsm_load_state_64(ctxt, buf);
-	else
-#endif
-		ret = rsm_load_state_32(ctxt, buf);
-
-	if (ret != X86EMUL_CONTINUE)
-		goto emulate_shutdown;
+	if (ctxt->ops->leave_smm(ctxt))
+		ctxt->ops->triple_fault(ctxt);
 
-	/*
-	 * Note, the ctxt->ops callbacks are responsible for handling side
-	 * effects when writing MSRs and CRs, e.g. MMU context resets, CPUID
-	 * runtime updates, etc...  If that changes, e.g. this flow is moved
-	 * out of the emulator to make it look more like enter_smm(), then
-	 * those side effects need to be explicitly handled for both success
-	 * and shutdown.
-	 */
 	return emulator_recalc_and_set_mode(ctxt);
-
-emulate_shutdown:
-	ctxt->ops->triple_fault(ctxt);
-	return X86EMUL_CONTINUE;
 }
 
 static void
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 89246446d6aa9..d7afbc448dd24 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -234,8 +234,7 @@ struct x86_emulate_ops {
 	void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
 
 	unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
-	void (*exiting_smm)(struct x86_emulate_ctxt *ctxt);
-	int (*leave_smm)(struct x86_emulate_ctxt *ctxt, const char *smstate);
+	int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
 	void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
 	int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
 };
@@ -526,4 +525,35 @@ void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt);
 void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt);
 bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt);
 
+static inline ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+		nr &= NR_EMULATOR_GPRS - 1;
+
+	if (!(ctxt->regs_valid & (1 << nr))) {
+		ctxt->regs_valid |= 1 << nr;
+		ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr);
+	}
+	return ctxt->_regs[nr];
+}
+
+static inline ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	if (KVM_EMULATOR_BUG_ON(nr >= NR_EMULATOR_GPRS, ctxt))
+		nr &= NR_EMULATOR_GPRS - 1;
+
+	BUILD_BUG_ON(sizeof(ctxt->regs_dirty) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+	BUILD_BUG_ON(sizeof(ctxt->regs_valid) * BITS_PER_BYTE < NR_EMULATOR_GPRS);
+
+	ctxt->regs_valid |= 1 << nr;
+	ctxt->regs_dirty |= 1 << nr;
+	return &ctxt->_regs[nr];
+}
+
+static inline ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr)
+{
+	reg_read(ctxt, nr);
+	return reg_write(ctxt, nr);
+}
+
 #endif /* _ASM_X86_KVM_X86_EMULATE_H */
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 26a6859e421fe..073dad4f04b59 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -270,3 +270,319 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_update_cpuid_runtime(vcpu);
 	kvm_mmu_reset_context(vcpu);
 }
+
+static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
+{
+#ifdef CONFIG_X86_64
+	return ctxt->ops->guest_has_long_mode(ctxt);
+#else
+	return false;
+#endif
+}
+
+static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+{
+	desc->g    = (flags >> 23) & 1;
+	desc->d    = (flags >> 22) & 1;
+	desc->l    = (flags >> 21) & 1;
+	desc->avl  = (flags >> 20) & 1;
+	desc->p    = (flags >> 15) & 1;
+	desc->dpl  = (flags >> 13) & 3;
+	desc->s    = (flags >> 12) & 1;
+	desc->type = (flags >>  8) & 15;
+}
+
+static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+			   int n)
+{
+	struct desc_struct desc;
+	int offset;
+	u16 selector;
+
+	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+
+	if (n < 3)
+		offset = 0x7f84 + n * 12;
+	else
+		offset = 0x7f2c + (n - 3) * 12;
+
+	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+	return X86EMUL_CONTINUE;
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+			   int n)
+{
+	struct desc_struct desc;
+	int offset;
+	u16 selector;
+	u32 base3;
+
+	offset = 0x7e00 + n * 16;
+
+	selector =                GET_SMSTATE(u16, smstate, offset);
+	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
+	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
+	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
+
+	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+	return X86EMUL_CONTINUE;
+}
+#endif
+
+static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+				    u64 cr0, u64 cr3, u64 cr4)
+{
+	int bad;
+	u64 pcid;
+
+	/* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
+	pcid = 0;
+	if (cr4 & X86_CR4_PCIDE) {
+		pcid = cr3 & 0xfff;
+		cr3 &= ~0xfff;
+	}
+
+	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+	if (bad)
+		return X86EMUL_UNHANDLEABLE;
+
+	/*
+	 * First enable PAE, long mode needs it before CR0.PG = 1 is set.
+	 * Then enable protected mode.	However, PCID cannot be enabled
+	 * if EFER.LMA=0, so set it separately.
+	 */
+	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+	if (bad)
+		return X86EMUL_UNHANDLEABLE;
+
+	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+	if (bad)
+		return X86EMUL_UNHANDLEABLE;
+
+	if (cr4 & X86_CR4_PCIDE) {
+		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+		if (bad)
+			return X86EMUL_UNHANDLEABLE;
+		if (pcid) {
+			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+			if (bad)
+				return X86EMUL_UNHANDLEABLE;
+		}
+
+	}
+
+	return X86EMUL_CONTINUE;
+}
+
+static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
+			     const char *smstate)
+{
+	struct desc_struct desc;
+	struct desc_ptr dt;
+	u16 selector;
+	u32 val, cr0, cr3, cr4;
+	int i;
+
+	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
+	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
+	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
+	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
+
+	for (i = 0; i < 8; i++)
+		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
+
+	val = GET_SMSTATE(u32, smstate, 0x7fcc);
+
+	if (ctxt->ops->set_dr(ctxt, 6, val))
+		return X86EMUL_UNHANDLEABLE;
+
+	val = GET_SMSTATE(u32, smstate, 0x7fc8);
+
+	if (ctxt->ops->set_dr(ctxt, 7, val))
+		return X86EMUL_UNHANDLEABLE;
+
+	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
+	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
+	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
+	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
+	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+
+	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
+	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
+	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
+	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
+	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+
+	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
+	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
+	ctxt->ops->set_gdt(ctxt, &dt);
+
+	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
+	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
+	ctxt->ops->set_idt(ctxt, &dt);
+
+	for (i = 0; i < 6; i++) {
+		int r = rsm_load_seg_32(ctxt, smstate, i);
+		if (r != X86EMUL_CONTINUE)
+			return r;
+	}
+
+	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+
+	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+
+	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+}
+
+#ifdef CONFIG_X86_64
+static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
+			     const char *smstate)
+{
+	struct desc_struct desc;
+	struct desc_ptr dt;
+	u64 val, cr0, cr3, cr4;
+	u32 base3;
+	u16 selector;
+	int i, r;
+
+	for (i = 0; i < 16; i++)
+		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
+
+	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
+	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+
+	val = GET_SMSTATE(u64, smstate, 0x7f68);
+
+	if (ctxt->ops->set_dr(ctxt, 6, val))
+		return X86EMUL_UNHANDLEABLE;
+
+	val = GET_SMSTATE(u64, smstate, 0x7f60);
+
+	if (ctxt->ops->set_dr(ctxt, 7, val))
+		return X86EMUL_UNHANDLEABLE;
+
+	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
+	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
+	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
+	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
+
+	if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+		return X86EMUL_UNHANDLEABLE;
+
+	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
+	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
+	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
+	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
+	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
+	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+
+	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
+	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
+	ctxt->ops->set_idt(ctxt, &dt);
+
+	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
+	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
+	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
+	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
+	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
+	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+
+	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
+	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
+	ctxt->ops->set_gdt(ctxt, &dt);
+
+	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+	if (r != X86EMUL_CONTINUE)
+		return r;
+
+	for (i = 0; i < 6; i++) {
+		r = rsm_load_seg_64(ctxt, smstate, i);
+		if (r != X86EMUL_CONTINUE)
+			return r;
+	}
+
+	return X86EMUL_CONTINUE;
+}
+#endif
+
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	unsigned long cr0, cr4, efer;
+	char buf[512];
+	u64 smbase;
+	int ret;
+
+	smbase = ctxt->ops->get_smbase(ctxt);
+
+	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
+	if (ret != X86EMUL_CONTINUE)
+		return X86EMUL_UNHANDLEABLE;
+
+	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+		ctxt->ops->set_nmi_mask(ctxt, false);
+
+	kvm_smm_changed(vcpu, false);
+
+	/*
+	 * Get back to real mode, to prepare a safe state in which to load
+	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
+	 * supports long mode.
+	 *
+	 * The ctxt->ops callbacks will handle all side effects when writing
+	 * writing MSRs and CRs, e.g. MMU context resets, CPUID
+	 * runtime updates, etc.
+	 */
+	if (emulator_has_longmode(ctxt)) {
+		struct desc_struct cs_desc;
+
+		/* Zero CR4.PCIDE before CR0.PG.  */
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		if (cr4 & X86_CR4_PCIDE)
+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+
+		/* A 32-bit code segment is required to clear EFER.LMA.  */
+		memset(&cs_desc, 0, sizeof(cs_desc));
+		cs_desc.type = 0xb;
+		cs_desc.s = cs_desc.g = cs_desc.p = 1;
+		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+	}
+
+	/* For the 64-bit case, this will clear EFER.LMA.  */
+	cr0 = ctxt->ops->get_cr(ctxt, 0);
+	if (cr0 & X86_CR0_PE)
+		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+
+	if (emulator_has_longmode(ctxt)) {
+		/* Clear CR4.PAE before clearing EFER.LME. */
+		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		if (cr4 & X86_CR4_PAE)
+			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+
+		/* And finally go back to 32-bit mode.  */
+		efer = 0;
+		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+	}
+
+	/*
+	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
+	 * state (e.g. enter guest mode) before loading state from the SMM
+	 * state-save area.
+	 */
+	if (static_call(kvm_x86_leave_smm)(vcpu, buf))
+		return X86EMUL_UNHANDLEABLE;
+
+#ifdef CONFIG_X86_64
+	if (emulator_has_longmode(ctxt))
+		return rsm_load_state_64(ctxt, buf);
+	else
+#endif
+		return rsm_load_state_32(ctxt, buf);
+}
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index aacc6dac2c99a..b0602a92e511e 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -21,6 +21,7 @@ static inline bool is_smm(struct kvm_vcpu *vcpu)
 
 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
 void enter_smm(struct kvm_vcpu *vcpu);
+int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
 void process_smi(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0730b16564f99..b953f0184208f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8150,19 +8150,6 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 	return emul_to_vcpu(ctxt)->arch.hflags;
 }
 
-static void emulator_exiting_smm(struct x86_emulate_ctxt *ctxt)
-{
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
-	kvm_smm_changed(vcpu, false);
-}
-
-static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt,
-				  const char *smstate)
-{
-	return static_call(kvm_x86_leave_smm)(emul_to_vcpu(ctxt), smstate);
-}
-
 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
 {
 	kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
@@ -8226,7 +8213,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.guest_has_rdpid     = emulator_guest_has_rdpid,
 	.set_nmi_mask        = emulator_set_nmi_mask,
 	.get_hflags          = emulator_get_hflags,
-	.exiting_smm         = emulator_exiting_smm,
 	.leave_smm           = emulator_leave_smm,
 	.triple_fault        = emulator_triple_fault,
 	.set_xcr             = emulator_set_xcr,
-- 
GitLab


From 1d0da94cdafe38b2c501a8d55f981204e588e259 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:12 -0400
Subject: [PATCH 1182/4122] KVM: x86: do not go through ctxt->ops when
 emulating rsm

Now that RSM is implemented in a single emulator callback, there is no
point in going through other callbacks for the sake of modifying
processor state.  Just invoke KVM's own internal functions directly,
and remove the callbacks that were only used by em_rsm; the only
substantial difference is in the handling of the segment registers
and descriptor cache, which have to be parsed into a struct kvm_segment
instead of a struct desc_struct.

This also fixes a bug where emulator_set_segment was shifting the
limit left by 12 if the G bit is set, but the limit had not been
shifted right upon entry to SMM.

The emulator context is still used to restore EIP and the general
purpose registers.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-5-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/kvm_emulate.h |  13 ---
 arch/x86/kvm/smm.c         | 182 +++++++++++++++++--------------------
 arch/x86/kvm/x86.c         |  33 -------
 3 files changed, 85 insertions(+), 143 deletions(-)

diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index d7afbc448dd24..84b1f26614630 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -116,16 +116,6 @@ struct x86_emulate_ops {
 			unsigned int bytes,
 			struct x86_exception *fault, bool system);
 
-	/*
-	 * read_phys: Read bytes of standard (non-emulated/special) memory.
-	 *            Used for descriptor reading.
-	 *  @addr:  [IN ] Physical address from which to read.
-	 *  @val:   [OUT] Value read from memory.
-	 *  @bytes: [IN ] Number of bytes to read from memory.
-	 */
-	int (*read_phys)(struct x86_emulate_ctxt *ctxt, unsigned long addr,
-			void *val, unsigned int bytes);
-
 	/*
 	 * write_std: Write bytes of standard (non-emulated/special) memory.
 	 *            Used for descriptor writing.
@@ -209,11 +199,8 @@ struct x86_emulate_ops {
 	int (*cpl)(struct x86_emulate_ctxt *ctxt);
 	void (*get_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong *dest);
 	int (*set_dr)(struct x86_emulate_ctxt *ctxt, int dr, ulong value);
-	u64 (*get_smbase)(struct x86_emulate_ctxt *ctxt);
-	void (*set_smbase)(struct x86_emulate_ctxt *ctxt, u64 smbase);
 	int (*set_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
 	int (*get_msr_with_filter)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
-	int (*set_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 data);
 	int (*get_msr)(struct x86_emulate_ctxt *ctxt, u32 msr_index, u64 *pdata);
 	int (*check_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc);
 	int (*read_pmc)(struct x86_emulate_ctxt *ctxt, u32 pmc, u64 *pdata);
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 073dad4f04b59..102ecb8525640 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -271,71 +271,59 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_mmu_reset_context(vcpu);
 }
 
-static int emulator_has_longmode(struct x86_emulate_ctxt *ctxt)
-{
-#ifdef CONFIG_X86_64
-	return ctxt->ops->guest_has_long_mode(ctxt);
-#else
-	return false;
-#endif
-}
-
-static void rsm_set_desc_flags(struct desc_struct *desc, u32 flags)
+static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
 {
 	desc->g    = (flags >> 23) & 1;
-	desc->d    = (flags >> 22) & 1;
+	desc->db   = (flags >> 22) & 1;
 	desc->l    = (flags >> 21) & 1;
 	desc->avl  = (flags >> 20) & 1;
-	desc->p    = (flags >> 15) & 1;
+	desc->present = (flags >> 15) & 1;
 	desc->dpl  = (flags >> 13) & 3;
 	desc->s    = (flags >> 12) & 1;
 	desc->type = (flags >>  8) & 15;
+
+	desc->unusable = !desc->present;
+	desc->padding = 0;
 }
 
-static int rsm_load_seg_32(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
 			   int n)
 {
-	struct desc_struct desc;
+	struct kvm_segment desc;
 	int offset;
-	u16 selector;
-
-	selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
 
 	if (n < 3)
 		offset = 0x7f84 + n * 12;
 	else
 		offset = 0x7f2c + (n - 3) * 12;
 
-	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
+	desc.selector =           GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
+	desc.base =               GET_SMSTATE(u32, smstate, offset + 8);
+	desc.limit =              GET_SMSTATE(u32, smstate, offset + 4);
 	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, n);
+	kvm_set_segment(vcpu, &desc, n);
 	return X86EMUL_CONTINUE;
 }
 
 #ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct x86_emulate_ctxt *ctxt, const char *smstate,
+static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
 			   int n)
 {
-	struct desc_struct desc;
+	struct kvm_segment desc;
 	int offset;
-	u16 selector;
-	u32 base3;
 
 	offset = 0x7e00 + n * 16;
 
-	selector =                GET_SMSTATE(u16, smstate, offset);
+	desc.selector =           GET_SMSTATE(u16, smstate, offset);
 	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
-	set_desc_limit(&desc,     GET_SMSTATE(u32, smstate, offset + 4));
-	set_desc_base(&desc,      GET_SMSTATE(u32, smstate, offset + 8));
-	base3 =                   GET_SMSTATE(u32, smstate, offset + 12);
-
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, n);
+	desc.limit =              GET_SMSTATE(u32, smstate, offset + 4);
+	desc.base =               GET_SMSTATE(u64, smstate, offset + 8);
+	kvm_set_segment(vcpu, &desc, n);
 	return X86EMUL_CONTINUE;
 }
 #endif
 
-static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
+static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
 				    u64 cr0, u64 cr3, u64 cr4)
 {
 	int bad;
@@ -348,7 +336,7 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 		cr3 &= ~0xfff;
 	}
 
-	bad = ctxt->ops->set_cr(ctxt, 3, cr3);
+	bad = kvm_set_cr3(vcpu, cr3);
 	if (bad)
 		return X86EMUL_UNHANDLEABLE;
 
@@ -357,20 +345,20 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 	 * Then enable protected mode.	However, PCID cannot be enabled
 	 * if EFER.LMA=0, so set it separately.
 	 */
-	bad = ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+	bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
 	if (bad)
 		return X86EMUL_UNHANDLEABLE;
 
-	bad = ctxt->ops->set_cr(ctxt, 0, cr0);
+	bad = kvm_set_cr0(vcpu, cr0);
 	if (bad)
 		return X86EMUL_UNHANDLEABLE;
 
 	if (cr4 & X86_CR4_PCIDE) {
-		bad = ctxt->ops->set_cr(ctxt, 4, cr4);
+		bad = kvm_set_cr4(vcpu, cr4);
 		if (bad)
 			return X86EMUL_UNHANDLEABLE;
 		if (pcid) {
-			bad = ctxt->ops->set_cr(ctxt, 3, cr3 | pcid);
+			bad = kvm_set_cr3(vcpu, cr3 | pcid);
 			if (bad)
 				return X86EMUL_UNHANDLEABLE;
 		}
@@ -383,9 +371,9 @@ static int rsm_enter_protected_mode(struct x86_emulate_ctxt *ctxt,
 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate)
 {
-	struct desc_struct desc;
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	struct kvm_segment desc;
 	struct desc_ptr dt;
-	u16 selector;
 	u32 val, cr0, cr3, cr4;
 	int i;
 
@@ -399,56 +387,55 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 
 	val = GET_SMSTATE(u32, smstate, 0x7fcc);
 
-	if (ctxt->ops->set_dr(ctxt, 6, val))
+	if (kvm_set_dr(vcpu, 6, val))
 		return X86EMUL_UNHANDLEABLE;
 
 	val = GET_SMSTATE(u32, smstate, 0x7fc8);
 
-	if (ctxt->ops->set_dr(ctxt, 7, val))
+	if (kvm_set_dr(vcpu, 7, val))
 		return X86EMUL_UNHANDLEABLE;
 
-	selector =                 GET_SMSTATE(u32, smstate, 0x7fc4);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f64));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f60));
+	desc.selector =            GET_SMSTATE(u32, smstate, 0x7fc4);
+	desc.base =                GET_SMSTATE(u32, smstate, 0x7f64);
+	desc.limit =               GET_SMSTATE(u32, smstate, 0x7f60);
 	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_TR);
+	kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
 
-	selector =                 GET_SMSTATE(u32, smstate, 0x7fc0);
-	set_desc_base(&desc,       GET_SMSTATE(u32, smstate, 0x7f80));
-	set_desc_limit(&desc,      GET_SMSTATE(u32, smstate, 0x7f7c));
+	desc.selector =            GET_SMSTATE(u32, smstate, 0x7fc0);
+	desc.base =                GET_SMSTATE(u32, smstate, 0x7f80);
+	desc.limit =               GET_SMSTATE(u32, smstate, 0x7f7c);
 	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
-	ctxt->ops->set_segment(ctxt, selector, &desc, 0, VCPU_SREG_LDTR);
+	kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
 
 	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
 	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
-	ctxt->ops->set_gdt(ctxt, &dt);
+	static_call(kvm_x86_set_gdt)(vcpu, &dt);
 
 	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
 	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
-	ctxt->ops->set_idt(ctxt, &dt);
+	static_call(kvm_x86_set_idt)(vcpu, &dt);
 
 	for (i = 0; i < 6; i++) {
-		int r = rsm_load_seg_32(ctxt, smstate, i);
+		int r = rsm_load_seg_32(vcpu, smstate, i);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
 
 	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
 
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7ef8));
+	vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);
 
-	return rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+	return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
 }
 
 #ifdef CONFIG_X86_64
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 			     const char *smstate)
 {
-	struct desc_struct desc;
+	struct kvm_vcpu *vcpu = ctxt->vcpu;
+	struct kvm_segment desc;
 	struct desc_ptr dt;
 	u64 val, cr0, cr3, cr4;
-	u32 base3;
-	u16 selector;
 	int i, r;
 
 	for (i = 0; i < 16; i++)
@@ -459,51 +446,49 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 
 	val = GET_SMSTATE(u64, smstate, 0x7f68);
 
-	if (ctxt->ops->set_dr(ctxt, 6, val))
+	if (kvm_set_dr(vcpu, 6, val))
 		return X86EMUL_UNHANDLEABLE;
 
 	val = GET_SMSTATE(u64, smstate, 0x7f60);
 
-	if (ctxt->ops->set_dr(ctxt, 7, val))
+	if (kvm_set_dr(vcpu, 7, val))
 		return X86EMUL_UNHANDLEABLE;
 
 	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
 	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
 	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
-	ctxt->ops->set_smbase(ctxt, GET_SMSTATE(u32, smstate, 0x7f00));
+	vcpu->arch.smbase =         GET_SMSTATE(u32, smstate, 0x7f00);
 	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
 
-	if (ctxt->ops->set_msr(ctxt, MSR_EFER, val & ~EFER_LMA))
+	if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
 		return X86EMUL_UNHANDLEABLE;
 
-	selector =                  GET_SMSTATE(u32, smstate, 0x7e90);
+	desc.selector =             GET_SMSTATE(u32, smstate, 0x7e90);
 	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e94));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e98));
-	base3 =                     GET_SMSTATE(u32, smstate, 0x7e9c);
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_TR);
+	desc.limit =                GET_SMSTATE(u32, smstate, 0x7e94);
+	desc.base =                 GET_SMSTATE(u64, smstate, 0x7e98);
+	kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
 
 	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
 	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
-	ctxt->ops->set_idt(ctxt, &dt);
+	static_call(kvm_x86_set_idt)(vcpu, &dt);
 
-	selector =                  GET_SMSTATE(u32, smstate, 0x7e70);
+	desc.selector =             GET_SMSTATE(u32, smstate, 0x7e70);
 	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
-	set_desc_limit(&desc,       GET_SMSTATE(u32, smstate, 0x7e74));
-	set_desc_base(&desc,        GET_SMSTATE(u32, smstate, 0x7e78));
-	base3 =                     GET_SMSTATE(u32, smstate, 0x7e7c);
-	ctxt->ops->set_segment(ctxt, selector, &desc, base3, VCPU_SREG_LDTR);
+	desc.limit =                GET_SMSTATE(u32, smstate, 0x7e74);
+	desc.base =                 GET_SMSTATE(u64, smstate, 0x7e78);
+	kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
 
 	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
 	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
-	ctxt->ops->set_gdt(ctxt, &dt);
+	static_call(kvm_x86_set_gdt)(vcpu, &dt);
 
-	r = rsm_enter_protected_mode(ctxt, cr0, cr3, cr4);
+	r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
 	if (r != X86EMUL_CONTINUE)
 		return r;
 
 	for (i = 0; i < 6; i++) {
-		r = rsm_load_seg_64(ctxt, smstate, i);
+		r = rsm_load_seg_64(vcpu, smstate, i);
 		if (r != X86EMUL_CONTINUE)
 			return r;
 	}
@@ -515,19 +500,19 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
-	unsigned long cr0, cr4, efer;
+	unsigned long cr0;
 	char buf[512];
 	u64 smbase;
 	int ret;
 
-	smbase = ctxt->ops->get_smbase(ctxt);
+	smbase = vcpu->arch.smbase;
 
-	ret = ctxt->ops->read_phys(ctxt, smbase + 0xfe00, buf, sizeof(buf));
-	if (ret != X86EMUL_CONTINUE)
+	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
+	if (ret < 0)
 		return X86EMUL_UNHANDLEABLE;
 
-	if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
-		ctxt->ops->set_nmi_mask(ctxt, false);
+	if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
+		static_call(kvm_x86_set_nmi_mask)(vcpu, false);
 
 	kvm_smm_changed(vcpu, false);
 
@@ -535,41 +520,44 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 	 * Get back to real mode, to prepare a safe state in which to load
 	 * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
 	 * supports long mode.
-	 *
-	 * The ctxt->ops callbacks will handle all side effects when writing
-	 * writing MSRs and CRs, e.g. MMU context resets, CPUID
-	 * runtime updates, etc.
 	 */
-	if (emulator_has_longmode(ctxt)) {
-		struct desc_struct cs_desc;
+#ifdef CONFIG_X86_64
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+		struct kvm_segment cs_desc;
+		unsigned long cr4;
 
 		/* Zero CR4.PCIDE before CR0.PG.  */
-		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		cr4 = kvm_read_cr4(vcpu);
 		if (cr4 & X86_CR4_PCIDE)
-			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PCIDE);
+			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
 
 		/* A 32-bit code segment is required to clear EFER.LMA.  */
 		memset(&cs_desc, 0, sizeof(cs_desc));
 		cs_desc.type = 0xb;
-		cs_desc.s = cs_desc.g = cs_desc.p = 1;
-		ctxt->ops->set_segment(ctxt, 0, &cs_desc, 0, VCPU_SREG_CS);
+		cs_desc.s = cs_desc.g = cs_desc.present = 1;
+		kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
 	}
+#endif
 
 	/* For the 64-bit case, this will clear EFER.LMA.  */
-	cr0 = ctxt->ops->get_cr(ctxt, 0);
+	cr0 = kvm_read_cr0(vcpu);
 	if (cr0 & X86_CR0_PE)
-		ctxt->ops->set_cr(ctxt, 0, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+		kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
+
+#ifdef CONFIG_X86_64
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
+		unsigned long cr4, efer;
 
-	if (emulator_has_longmode(ctxt)) {
 		/* Clear CR4.PAE before clearing EFER.LME. */
-		cr4 = ctxt->ops->get_cr(ctxt, 4);
+		cr4 = kvm_read_cr4(vcpu);
 		if (cr4 & X86_CR4_PAE)
-			ctxt->ops->set_cr(ctxt, 4, cr4 & ~X86_CR4_PAE);
+			kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
 
 		/* And finally go back to 32-bit mode.  */
 		efer = 0;
-		ctxt->ops->set_msr(ctxt, MSR_EFER, efer);
+		kvm_set_msr(vcpu, MSR_EFER, efer);
 	}
+#endif
 
 	/*
 	 * Give leave_smm() a chance to make ISA-specific changes to the vCPU
@@ -580,7 +568,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 		return X86EMUL_UNHANDLEABLE;
 
 #ifdef CONFIG_X86_64
-	if (emulator_has_longmode(ctxt))
+	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		return rsm_load_state_64(ctxt, buf);
 	else
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b953f0184208f..019ba8725412c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7256,15 +7256,6 @@ static int emulator_read_std(struct x86_emulate_ctxt *ctxt,
 	return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access, exception);
 }
 
-static int kvm_read_guest_phys_system(struct x86_emulate_ctxt *ctxt,
-		unsigned long addr, void *val, unsigned int bytes)
-{
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-	int r = kvm_vcpu_read_guest(vcpu, addr, val, bytes);
-
-	return r < 0 ? X86EMUL_IO_NEEDED : X86EMUL_CONTINUE;
-}
-
 static int kvm_write_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
 				      struct kvm_vcpu *vcpu, u64 access,
 				      struct x86_exception *exception)
@@ -8056,26 +8047,6 @@ static int emulator_get_msr(struct x86_emulate_ctxt *ctxt,
 	return kvm_get_msr(emul_to_vcpu(ctxt), msr_index, pdata);
 }
 
-static int emulator_set_msr(struct x86_emulate_ctxt *ctxt,
-			    u32 msr_index, u64 data)
-{
-	return kvm_set_msr(emul_to_vcpu(ctxt), msr_index, data);
-}
-
-static u64 emulator_get_smbase(struct x86_emulate_ctxt *ctxt)
-{
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
-	return vcpu->arch.smbase;
-}
-
-static void emulator_set_smbase(struct x86_emulate_ctxt *ctxt, u64 smbase)
-{
-	struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt);
-
-	vcpu->arch.smbase = smbase;
-}
-
 static int emulator_check_pmc(struct x86_emulate_ctxt *ctxt,
 			      u32 pmc)
 {
@@ -8174,7 +8145,6 @@ static const struct x86_emulate_ops emulate_ops = {
 	.write_gpr           = emulator_write_gpr,
 	.read_std            = emulator_read_std,
 	.write_std           = emulator_write_std,
-	.read_phys           = kvm_read_guest_phys_system,
 	.fetch               = kvm_fetch_guest_virt,
 	.read_emulated       = emulator_read_emulated,
 	.write_emulated      = emulator_write_emulated,
@@ -8194,11 +8164,8 @@ static const struct x86_emulate_ops emulate_ops = {
 	.cpl                 = emulator_get_cpl,
 	.get_dr              = emulator_get_dr,
 	.set_dr              = emulator_set_dr,
-	.get_smbase          = emulator_get_smbase,
-	.set_smbase          = emulator_set_smbase,
 	.set_msr_with_filter = emulator_set_msr_with_filter,
 	.get_msr_with_filter = emulator_get_msr_with_filter,
-	.set_msr             = emulator_set_msr,
 	.get_msr             = emulator_get_msr,
 	.check_pmc	     = emulator_check_pmc,
 	.read_pmc            = emulator_read_pmc,
-- 
GitLab


From 4b8e1b32013da2495244dbdee70f2456e6bc7aca Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:13 -0400
Subject: [PATCH 1183/4122] KVM: allow compiling out SMM support

Some users of KVM implement the UEFI variable store through a paravirtual device
that does not require the "SMM lockbox" component of edk2; allow them to
compile out system management mode, which is not a full implementation
especially in how it interacts with nested virtualization.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-6-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig                          | 11 ++++++++++
 arch/x86/kvm/Makefile                         |  2 +-
 arch/x86/kvm/smm.h                            | 12 ++++++++++
 arch/x86/kvm/svm/svm.c                        |  2 ++
 arch/x86/kvm/vmx/vmx.c                        |  2 ++
 arch/x86/kvm/x86.c                            | 22 +++++++++++++++++--
 tools/testing/selftests/kvm/x86_64/smm_test.c |  2 ++
 7 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 67be7f217e37b..fbeaa9ddef598 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -118,6 +118,17 @@ config KVM_AMD_SEV
 	  Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
 	  with Encrypted State (SEV-ES) on AMD processors.
 
+config KVM_SMM
+	bool "System Management Mode emulation"
+	default y
+	depends on KVM
+	help
+	  Provides support for KVM to emulate System Management Mode (SMM)
+	  in virtual machines.  This can be used by the virtual machine
+	  firmware to implement UEFI secure boot.
+
+	  If unsure, say Y.
+
 config KVM_XEN
 	bool "Support for Xen hypercall interface"
 	depends on KVM
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b584cb0e06bd8..b8a494b6a5ec9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -20,7 +20,7 @@ endif
 
 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
 kvm-$(CONFIG_KVM_XEN)	+= xen.o
-kvm-y			+= smm.o
+kvm-$(CONFIG_KVM_SMM)	+= smm.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index b0602a92e511e..0e1bd8bd6dc47 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -8,6 +8,7 @@
 #define PUT_SMSTATE(type, buf, offset, val)                      \
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
+#ifdef CONFIG_KVM_SMM
 static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
 {
 	kvm_make_request(KVM_REQ_SMI, vcpu);
@@ -23,5 +24,16 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm);
 void enter_smm(struct kvm_vcpu *vcpu);
 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt);
 void process_smi(struct kvm_vcpu *vcpu);
+#else
+static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
+static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
+static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
+
+/*
+ * emulator_leave_smm is used as a function pointer, so the
+ * stub is defined in x86.c.
+ */
+#endif
 
 #endif
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 3bb07ec789859..4cc014b464061 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4115,6 +4115,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index)
 	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
 		return false;
 	case MSR_IA32_SMBASE:
+		if (!IS_ENABLED(CONFIG_KVM_SMM))
+			return false;
 		/* SEV-ES guests do not support SMM, so report false */
 		if (kvm && sev_es_guest(kvm))
 			return false;
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 49065614a3dbb..6a0b65815206b 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6842,6 +6842,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index)
 {
 	switch (index) {
 	case MSR_IA32_SMBASE:
+		if (!IS_ENABLED(CONFIG_KVM_SMM))
+			return false;
 		/*
 		 * We cannot do SMM unless we can run the guest in big
 		 * real mode.
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 019ba8725412c..0a80cd1d91c8c 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3642,7 +3642,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		break;
 	}
 	case MSR_IA32_SMBASE:
-		if (!msr_info->host_initiated)
+		if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
 			return 1;
 		vcpu->arch.smbase = data;
 		break;
@@ -4058,7 +4058,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		msr_info->data = vcpu->arch.ia32_misc_enable_msr;
 		break;
 	case MSR_IA32_SMBASE:
-		if (!msr_info->host_initiated)
+		if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated)
 			return 1;
 		msr_info->data = vcpu->arch.smbase;
 		break;
@@ -4432,6 +4432,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			r |= KVM_X86_DISABLE_EXITS_MWAIT;
 		break;
 	case KVM_CAP_X86_SMM:
+		if (!IS_ENABLED(CONFIG_KVM_SMM))
+			break;
+
 		/* SMBASE is usually relocated above 1M on modern chipsets,
 		 * and SMM handlers might indeed rely on 4G segment limits,
 		 * so do not report SMM to be available if real mode is
@@ -5182,6 +5185,7 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
 	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+#ifdef CONFIG_KVM_SMM
 		if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) {
 			kvm_x86_ops.nested_ops->leave_nested(vcpu);
 			kvm_smm_changed(vcpu, events->smi.smm);
@@ -5196,6 +5200,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 				vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
 		}
 
+#else
+		if (events->smi.smm || events->smi.pending ||
+		    events->smi.smm_inside_nmi)
+			return -EINVAL;
+#endif
+
 		if (lapic_in_kernel(vcpu)) {
 			if (events->smi.latched_init)
 				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
@@ -8121,6 +8131,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
 	return emul_to_vcpu(ctxt)->arch.hflags;
 }
 
+#ifndef CONFIG_KVM_SMM
+static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
+{
+	WARN_ON_ONCE(1);
+	return X86EMUL_UNHANDLEABLE;
+}
+#endif
+
 static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt)
 {
 	kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt));
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index 1f136a81858e5..cb38a478e1f62 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
 	struct kvm_x86_state *state;
 	int stage, stage_reported;
 
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
+
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
-- 
GitLab


From 31e83e21cf00fe5b669eb352ff3ed70e74b40fad Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:14 -0400
Subject: [PATCH 1184/4122] KVM: x86: compile out vendor-specific code if SMM
 is disabled

Vendor-specific code that deals with SMI injection and saving/restoring
SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the
four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window.
The users in svm/nested.c and x86.c also have to be compiled out; the
amount of #ifdef'ed code is small and it's not worth moving it to
smm.c.

enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub
can therefore be removed.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-7-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h | 2 ++
 arch/x86/include/asm/kvm_host.h    | 2 ++
 arch/x86/kvm/smm.h                 | 1 -
 arch/x86/kvm/svm/nested.c          | 2 ++
 arch/x86/kvm/svm/svm.c             | 4 ++++
 arch/x86/kvm/vmx/vmx.c             | 4 ++++
 arch/x86/kvm/x86.c                 | 4 ++++
 7 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index 82ba4a564e587..ea58e67e9a670 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt)
 KVM_X86_OP_OPTIONAL(set_hv_timer)
 KVM_X86_OP_OPTIONAL(cancel_hv_timer)
 KVM_X86_OP(setup_mce)
+#ifdef CONFIG_KVM_SMM
 KVM_X86_OP(smi_allowed)
 KVM_X86_OP(enter_smm)
 KVM_X86_OP(leave_smm)
 KVM_X86_OP(enable_smi_window)
+#endif
 KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
 KVM_X86_OP_OPTIONAL(mem_enc_register_region)
 KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 612ef60631c1f..3e5e54d7baa68 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1612,10 +1612,12 @@ struct kvm_x86_ops {
 
 	void (*setup_mce)(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_KVM_SMM
 	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
 	int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
 	int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
 	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
+#endif
 
 	int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
 	int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 0e1bd8bd6dc47..8debe81494c62 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -27,7 +27,6 @@ void process_smi(struct kvm_vcpu *vcpu);
 #else
 static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
 static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
-static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
 static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
 
 /*
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cc0fd75f7cbab..b258d6988f5dd 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1378,6 +1378,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 
+#ifdef CONFIG_KVM_SMM
 	if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
 		if (block_nested_events)
 			return -EBUSY;
@@ -1386,6 +1387,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
 		nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
 		return 0;
 	}
+#endif
 
 	if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
 		if (block_nested_events)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4cc014b464061..d28de3e59f7f7 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4373,6 +4373,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu)
 	vcpu->arch.mcg_cap &= 0x1ff;
 }
 
+#ifdef CONFIG_KVM_SMM
 bool svm_smi_blocked(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4522,6 +4523,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu)
 		/* We must be in SMM; RSM will cause a vmexit anyway.  */
 	}
 }
+#endif
 
 static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type,
 					void *insn, int insn_len)
@@ -4797,10 +4799,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.pi_update_irte = avic_pi_update_irte,
 	.setup_mce = svm_setup_mce,
 
+#ifdef CONFIG_KVM_SMM
 	.smi_allowed = svm_smi_allowed,
 	.enter_smm = svm_enter_smm,
 	.leave_smm = svm_leave_smm,
 	.enable_smi_window = svm_enable_smi_window,
+#endif
 
 	.mem_enc_ioctl = sev_mem_enc_ioctl,
 	.mem_enc_register_region = sev_mem_enc_register_region,
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6a0b65815206b..6be991b29bb76 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7932,6 +7932,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
 			~FEAT_CTL_LMCE_ENABLED;
 }
 
+#ifdef CONFIG_KVM_SMM
 static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 {
 	/* we need a nested vmexit to enter SMM, postpone if run is pending */
@@ -7986,6 +7987,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu)
 {
 	/* RSM will cause a vmexit anyway.  */
 }
+#endif
 
 static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
 {
@@ -8153,10 +8155,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
 	.setup_mce = vmx_setup_mce,
 
+#ifdef CONFIG_KVM_SMM
 	.smi_allowed = vmx_smi_allowed,
 	.enter_smm = vmx_enter_smm,
 	.leave_smm = vmx_leave_smm,
 	.enable_smi_window = vmx_enable_smi_window,
+#endif
 
 	.can_emulate_instruction = vmx_can_emulate_instruction,
 	.apic_init_signal_blocked = vmx_apic_init_signal_blocked,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a80cd1d91c8c..9ac51c848fc82 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9919,6 +9919,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
 	 * in order to make progress and get back here for another iteration.
 	 * The kvm_x86_ops hooks communicate this by returning -EBUSY.
 	 */
+#ifdef CONFIG_KVM_SMM
 	if (vcpu->arch.smi_pending) {
 		r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY;
 		if (r < 0)
@@ -9931,6 +9932,7 @@ static int kvm_check_and_inject_events(struct kvm_vcpu *vcpu,
 		} else
 			static_call(kvm_x86_enable_smi_window)(vcpu);
 	}
+#endif
 
 	if (vcpu->arch.nmi_pending) {
 		r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY;
@@ -12580,10 +12582,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
 	     static_call(kvm_x86_nmi_allowed)(vcpu, false)))
 		return true;
 
+#ifdef CONFIG_KVM_SMM
 	if (kvm_test_request(KVM_REQ_SMI, vcpu) ||
 	    (vcpu->arch.smi_pending &&
 	     static_call(kvm_x86_smi_allowed)(vcpu, false)))
 		return true;
+#endif
 
 	if (kvm_arch_interrupt_allowed(vcpu) &&
 	    (kvm_cpu_has_interrupt(vcpu) ||
-- 
GitLab


From ba97bb07e0b28c962015aaf219005928774b886c Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:15 -0400
Subject: [PATCH 1185/4122] KVM: x86: remove SMRAM address space if SMM is not
 supported

If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and
we can spare userspace the hassle of setting up the SMRAM address space
simply by reporting that only one address space is supported.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-8-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3e5e54d7baa68..b9f6f854dcef8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1998,11 +1998,14 @@ enum {
 #define HF_SMM_MASK		(1 << 6)
 #define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
 
-#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
-#define KVM_ADDRESS_SPACE_NUM 2
-
-#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
-#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#ifdef CONFIG_KVM_SMM
+# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+# define KVM_ADDRESS_SPACE_NUM 2
+# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
+#else
+# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0)
+#endif
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-- 
GitLab


From cf7316d0361c5d3289611402b5ac0c64c918b20b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 29 Sep 2022 13:20:16 -0400
Subject: [PATCH 1186/4122] KVM: x86: do not define KVM_REQ_SMI if SMM disabled

This ensures that all the relevant code is compiled out, in fact
the process_smi stub can be removed too.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20220929172016.319443-9-pbonzini@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/smm.h              | 1 -
 arch/x86/kvm/x86.c              | 6 ++++++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index b9f6f854dcef8..24a2152a77cd0 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -81,7 +81,9 @@
 #define KVM_REQ_NMI			KVM_ARCH_REQ(9)
 #define KVM_REQ_PMU			KVM_ARCH_REQ(10)
 #define KVM_REQ_PMI			KVM_ARCH_REQ(11)
+#ifdef CONFIG_KVM_SMM
 #define KVM_REQ_SMI			KVM_ARCH_REQ(12)
+#endif
 #define KVM_REQ_MASTERCLOCK_UPDATE	KVM_ARCH_REQ(13)
 #define KVM_REQ_MCLOCK_INPROGRESS \
 	KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 8debe81494c62..53c81394ebdbc 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -27,7 +27,6 @@ void process_smi(struct kvm_vcpu *vcpu);
 #else
 static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; }
 static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; }
-static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); }
 
 /*
  * emulator_leave_smm is used as a function pointer, so the
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ac51c848fc82..73c32030d5142 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5026,8 +5026,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
 	process_nmi(vcpu);
 
+#ifdef CONFIG_KVM_SMM
 	if (kvm_check_request(KVM_REQ_SMI, vcpu))
 		process_smi(vcpu);
+#endif
 
 	/*
 	 * KVM's ABI only allows for one exception to be migrated.  Luckily,
@@ -10266,8 +10268,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		}
 		if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu))
 			record_steal_time(vcpu);
+#ifdef CONFIG_KVM_SMM
 		if (kvm_check_request(KVM_REQ_SMI, vcpu))
 			process_smi(vcpu);
+#endif
 		if (kvm_check_request(KVM_REQ_NMI, vcpu))
 			process_nmi(vcpu);
 		if (kvm_check_request(KVM_REQ_PMU, vcpu))
@@ -12628,7 +12632,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu)
 		return true;
 
 	if (kvm_test_request(KVM_REQ_NMI, vcpu) ||
+#ifdef CONFIG_KVM_SMM
 		kvm_test_request(KVM_REQ_SMI, vcpu) ||
+#endif
 		 kvm_test_request(KVM_REQ_EVENT, vcpu))
 		return true;
 
-- 
GitLab


From 85672346a707d8e4d5657279dac6b356e1edf24a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 27 Oct 2022 12:44:28 -0400
Subject: [PATCH 1187/4122] KVM: zero output of KVM_GET_VCPU_EVENTS before
 filling in the struct

This allows making some fields optional, as will be the case soon
for SMM-related data.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 73c32030d5142..84aa51613e4f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5057,16 +5057,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	    ex->pending && ex->has_payload)
 		kvm_deliver_exception_payload(vcpu, ex);
 
+	memset(events, 0, sizeof(*events));
+
 	/*
 	 * The API doesn't provide the instruction length for software
 	 * exceptions, so don't report them. As long as the guest RIP
 	 * isn't advanced, we should expect to encounter the exception
 	 * again.
 	 */
-	if (kvm_exception_is_soft(ex->vector)) {
-		events->exception.injected = 0;
-		events->exception.pending = 0;
-	} else {
+	if (!kvm_exception_is_soft(ex->vector)) {
 		events->exception.injected = ex->injected;
 		events->exception.pending = ex->pending;
 		/*
@@ -5086,15 +5085,13 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 	events->interrupt.injected =
 		vcpu->arch.interrupt.injected && !vcpu->arch.interrupt.soft;
 	events->interrupt.nr = vcpu->arch.interrupt.nr;
-	events->interrupt.soft = 0;
 	events->interrupt.shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
 
 	events->nmi.injected = vcpu->arch.nmi_injected;
 	events->nmi.pending = vcpu->arch.nmi_pending != 0;
 	events->nmi.masked = static_call(kvm_x86_get_nmi_mask)(vcpu);
-	events->nmi.pad = 0;
 
-	events->sipi_vector = 0; /* never valid when reporting to user space */
+	/* events->sipi_vector is never valid when reporting to user space */
 
 	events->smi.smm = is_smm(vcpu);
 	events->smi.pending = vcpu->arch.smi_pending;
@@ -5111,8 +5108,6 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 		events->triple_fault.pending = kvm_test_request(KVM_REQ_TRIPLE_FAULT, vcpu);
 		events->flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
 	}
-
-	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
 static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
-- 
GitLab


From a7662aa5e56ffa9adf65699eda541f94e157cc83 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 7 Nov 2022 10:11:42 -0500
Subject: [PATCH 1188/4122] KVM: x86: do not define SMM-related constants if
 SMM disabled

The hidden processor flags HF_SMM_MASK and HF_SMM_INSIDE_NMI_MASK
are not needed if CONFIG_KVM_SMM is turned off.  Remove the
definitions altogether and the code that uses them.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 ++-
 arch/x86/kvm/kvm_emulate.h      | 1 -
 arch/x86/kvm/smm.c              | 2 ++
 arch/x86/kvm/x86.c              | 4 ++--
 4 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 24a2152a77cd0..e9862eaac43b8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1997,10 +1997,11 @@ enum {
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
 #define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
+
+#ifdef CONFIG_KVM_SMM
 #define HF_SMM_MASK		(1 << 6)
 #define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
 
-#ifdef CONFIG_KVM_SMM
 # define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
 # define KVM_ADDRESS_SPACE_NUM 2
 # define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 84b1f26614630..2d9662be83337 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -278,7 +278,6 @@ enum x86emul_mode {
 /* These match some of the HF_* flags defined in kvm_host.h  */
 #define X86EMUL_GUEST_MASK           (1 << 5) /* VCPU is in guest-mode */
 #define X86EMUL_SMM_MASK             (1 << 6)
-#define X86EMUL_SMM_INSIDE_NMI_MASK  (1 << 7)
 
 /*
  * fastop functions are declared as taking a never-defined fastop parameter,
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 102ecb8525640..d9cf104ad94f5 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -10,6 +10,8 @@
 
 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 {
+	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
+
 	trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
 
 	if (entering_smm) {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 84aa51613e4f5..cbec2e675c18f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5093,10 +5093,12 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
 	/* events->sipi_vector is never valid when reporting to user space */
 
+#ifdef CONFIG_KVM_SMM
 	events->smi.smm = is_smm(vcpu);
 	events->smi.pending = vcpu->arch.smi_pending;
 	events->smi.smm_inside_nmi =
 		!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+#endif
 	events->smi.latched_init = kvm_lapic_latched_init(vcpu);
 
 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
@@ -8267,8 +8269,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
 		     cs_db				? X86EMUL_MODE_PROT32 :
 							  X86EMUL_MODE_PROT16;
 	BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
-	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
-	BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
 
 	ctxt->interruptibility = 0;
 	ctxt->have_exception = false;
-- 
GitLab


From 89dccf82e99e95ee465e2b00428494fe64679256 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:33 +0300
Subject: [PATCH 1189/4122] KVM: x86: smm: check for failures on smm entry

In the rare case of the failure on SMM entry, the KVM should at
least terminate the VM instead of going south.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-16-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index d9cf104ad94f5..2d5bb2af70e40 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -213,11 +213,17 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
 	 * state (e.g. leave guest mode) after we've saved the state into the
 	 * SMM state-save area.
+	 *
+	 * Kill the VM in the unlikely case of failure, because the VM
+	 * can be in undefined state in this case.
 	 */
-	static_call(kvm_x86_enter_smm)(vcpu, buf);
+	if (static_call(kvm_x86_enter_smm)(vcpu, buf))
+		goto error;
 
 	kvm_smm_changed(vcpu, true);
-	kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf));
+
+	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)))
+		goto error;
 
 	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
 		vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
@@ -237,7 +243,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	dt.address = dt.size = 0;
 	static_call(kvm_x86_set_idt)(vcpu, &dt);
 
-	kvm_set_dr(vcpu, 7, DR7_FIXED_1);
+	if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
+		goto error;
 
 	cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
 	cs.base = vcpu->arch.smbase;
@@ -266,11 +273,15 @@ void enter_smm(struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		static_call(kvm_x86_set_efer)(vcpu, 0);
+		if (static_call(kvm_x86_set_efer)(vcpu, 0))
+			goto error;
 #endif
 
 	kvm_update_cpuid_runtime(vcpu);
 	kvm_mmu_reset_context(vcpu);
+	return;
+error:
+	kvm_vm_dead(vcpu->kvm);
 }
 
 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
-- 
GitLab


From 09779c16e3eda95312ca14cd263dbb05da147b75 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:34 +0300
Subject: [PATCH 1190/4122] KVM: x86: smm: add structs for KVM's smram layout

Add structs that will be used to define and read/write the KVM's
SMRAM layout, instead of reading/writing to raw offsets.

Also document the differences between KVM's SMRAM layout and SMRAM
layout that is used by real Intel/AMD cpus.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-17-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.c |  98 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/smm.h | 133 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 2d5bb2af70e40..2e6ec79a581ef 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -8,6 +8,102 @@
 #include "cpuid.h"
 #include "trace.h"
 
+#define CHECK_SMRAM32_OFFSET(field, offset) \
+	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
+
+#define CHECK_SMRAM64_OFFSET(field, offset) \
+	ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
+
+static void check_smram_offsets(void)
+{
+	/* 32 bit SMRAM image */
+	CHECK_SMRAM32_OFFSET(reserved1,			0xFE00);
+	CHECK_SMRAM32_OFFSET(smbase,			0xFEF8);
+	CHECK_SMRAM32_OFFSET(smm_revision,		0xFEFC);
+	CHECK_SMRAM32_OFFSET(io_inst_restart,		0xFF00);
+	CHECK_SMRAM32_OFFSET(auto_hlt_restart,		0xFF02);
+	CHECK_SMRAM32_OFFSET(io_restart_rdi,		0xFF04);
+	CHECK_SMRAM32_OFFSET(io_restart_rcx,		0xFF08);
+	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
+	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
+	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
+	CHECK_SMRAM32_OFFSET(reserved3,			0xFF18);
+	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
+	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
+	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
+	CHECK_SMRAM32_OFFSET(idtr,			0xFF50);
+	CHECK_SMRAM32_OFFSET(tr,			0xFF5C);
+	CHECK_SMRAM32_OFFSET(gdtr,			0xFF6C);
+	CHECK_SMRAM32_OFFSET(ldtr,			0xFF78);
+	CHECK_SMRAM32_OFFSET(es,			0xFF84);
+	CHECK_SMRAM32_OFFSET(cs,			0xFF90);
+	CHECK_SMRAM32_OFFSET(ss,			0xFF9C);
+	CHECK_SMRAM32_OFFSET(es_sel,			0xFFA8);
+	CHECK_SMRAM32_OFFSET(cs_sel,			0xFFAC);
+	CHECK_SMRAM32_OFFSET(ss_sel,			0xFFB0);
+	CHECK_SMRAM32_OFFSET(ds_sel,			0xFFB4);
+	CHECK_SMRAM32_OFFSET(fs_sel,			0xFFB8);
+	CHECK_SMRAM32_OFFSET(gs_sel,			0xFFBC);
+	CHECK_SMRAM32_OFFSET(ldtr_sel,			0xFFC0);
+	CHECK_SMRAM32_OFFSET(tr_sel,			0xFFC4);
+	CHECK_SMRAM32_OFFSET(dr7,			0xFFC8);
+	CHECK_SMRAM32_OFFSET(dr6,			0xFFCC);
+	CHECK_SMRAM32_OFFSET(gprs,			0xFFD0);
+	CHECK_SMRAM32_OFFSET(eip,			0xFFF0);
+	CHECK_SMRAM32_OFFSET(eflags,			0xFFF4);
+	CHECK_SMRAM32_OFFSET(cr3,			0xFFF8);
+	CHECK_SMRAM32_OFFSET(cr0,			0xFFFC);
+
+	/* 64 bit SMRAM image */
+	CHECK_SMRAM64_OFFSET(es,			0xFE00);
+	CHECK_SMRAM64_OFFSET(cs,			0xFE10);
+	CHECK_SMRAM64_OFFSET(ss,			0xFE20);
+	CHECK_SMRAM64_OFFSET(ds,			0xFE30);
+	CHECK_SMRAM64_OFFSET(fs,			0xFE40);
+	CHECK_SMRAM64_OFFSET(gs,			0xFE50);
+	CHECK_SMRAM64_OFFSET(gdtr,			0xFE60);
+	CHECK_SMRAM64_OFFSET(ldtr,			0xFE70);
+	CHECK_SMRAM64_OFFSET(idtr,			0xFE80);
+	CHECK_SMRAM64_OFFSET(tr,			0xFE90);
+	CHECK_SMRAM64_OFFSET(io_restart_rip,		0xFEA0);
+	CHECK_SMRAM64_OFFSET(io_restart_rcx,		0xFEA8);
+	CHECK_SMRAM64_OFFSET(io_restart_rsi,		0xFEB0);
+	CHECK_SMRAM64_OFFSET(io_restart_rdi,		0xFEB8);
+	CHECK_SMRAM64_OFFSET(io_restart_dword,		0xFEC0);
+	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
+	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
+	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
+	CHECK_SMRAM64_OFFSET(reserved2,			0xFECA);
+	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
+	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
+	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
+	CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,	0xFEE8);
+	CHECK_SMRAM64_OFFSET(reserved3,			0xFEF0);
+	CHECK_SMRAM64_OFFSET(smm_revison,		0xFEFC);
+	CHECK_SMRAM64_OFFSET(smbase,			0xFF00);
+	CHECK_SMRAM64_OFFSET(reserved4,			0xFF04);
+	CHECK_SMRAM64_OFFSET(ssp,			0xFF18);
+	CHECK_SMRAM64_OFFSET(svm_guest_pat,		0xFF20);
+	CHECK_SMRAM64_OFFSET(svm_host_efer,		0xFF28);
+	CHECK_SMRAM64_OFFSET(svm_host_cr4,		0xFF30);
+	CHECK_SMRAM64_OFFSET(svm_host_cr3,		0xFF38);
+	CHECK_SMRAM64_OFFSET(svm_host_cr0,		0xFF40);
+	CHECK_SMRAM64_OFFSET(cr4,			0xFF48);
+	CHECK_SMRAM64_OFFSET(cr3,			0xFF50);
+	CHECK_SMRAM64_OFFSET(cr0,			0xFF58);
+	CHECK_SMRAM64_OFFSET(dr7,			0xFF60);
+	CHECK_SMRAM64_OFFSET(dr6,			0xFF68);
+	CHECK_SMRAM64_OFFSET(rflags,			0xFF70);
+	CHECK_SMRAM64_OFFSET(rip,			0xFF78);
+	CHECK_SMRAM64_OFFSET(gprs,			0xFF80);
+
+	BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
+}
+
+#undef CHECK_SMRAM64_OFFSET
+#undef CHECK_SMRAM32_OFFSET
+
+
 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
 {
 	BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
@@ -201,6 +297,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	unsigned long cr0;
 	char buf[512];
 
+	check_smram_offsets();
+
 	memset(buf, 0, 512);
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 53c81394ebdbc..b66da263ec822 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -2,6 +2,8 @@
 #ifndef ASM_KVM_SMM_H
 #define ASM_KVM_SMM_H
 
+#include <linux/build_bug.h>
+
 #define GET_SMSTATE(type, buf, offset)		\
 	(*(type *)((buf) + (offset) - 0x7e00))
 
@@ -9,6 +11,137 @@
 	*(type *)((buf) + (offset) - 0x7e00) = val
 
 #ifdef CONFIG_KVM_SMM
+
+
+/*
+ * 32 bit KVM's emulated SMM layout. Based on Intel P6 layout
+ * (https://www.sandpile.org/x86/smm.htm).
+ */
+
+struct kvm_smm_seg_state_32 {
+	u32 flags;
+	u32 limit;
+	u32 base;
+} __packed;
+
+struct kvm_smram_state_32 {
+	u32 reserved1[62];
+	u32 smbase;
+	u32 smm_revision;
+	u16 io_inst_restart;
+	u16 auto_hlt_restart;
+	u32 io_restart_rdi;
+	u32 io_restart_rcx;
+	u32 io_restart_rsi;
+	u32 io_restart_rip;
+	u32 cr4;
+
+	/* A20M#, CPL, shutdown and other reserved/undocumented fields */
+	u32 reserved3[5];
+
+	struct kvm_smm_seg_state_32 ds;
+	struct kvm_smm_seg_state_32 fs;
+	struct kvm_smm_seg_state_32 gs;
+	struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */
+	struct kvm_smm_seg_state_32 tr;
+	u32 reserved;
+	struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */
+	struct kvm_smm_seg_state_32 ldtr;
+	struct kvm_smm_seg_state_32 es;
+	struct kvm_smm_seg_state_32 cs;
+	struct kvm_smm_seg_state_32 ss;
+
+	u32 es_sel;
+	u32 cs_sel;
+	u32 ss_sel;
+	u32 ds_sel;
+	u32 fs_sel;
+	u32 gs_sel;
+	u32 ldtr_sel;
+	u32 tr_sel;
+
+	u32 dr7;
+	u32 dr6;
+	u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */
+	u32 eip;
+	u32 eflags;
+	u32 cr3;
+	u32 cr0;
+} __packed;
+
+
+/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */
+
+struct kvm_smm_seg_state_64 {
+	u16 selector;
+	u16 attributes;
+	u32 limit;
+	u64 base;
+};
+
+struct kvm_smram_state_64 {
+
+	struct kvm_smm_seg_state_64 es;
+	struct kvm_smm_seg_state_64 cs;
+	struct kvm_smm_seg_state_64 ss;
+	struct kvm_smm_seg_state_64 ds;
+	struct kvm_smm_seg_state_64 fs;
+	struct kvm_smm_seg_state_64 gs;
+	struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/
+	struct kvm_smm_seg_state_64 ldtr;
+	struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/
+	struct kvm_smm_seg_state_64 tr;
+
+	/* I/O restart and auto halt restart are not implemented by KVM */
+	u64 io_restart_rip;
+	u64 io_restart_rcx;
+	u64 io_restart_rsi;
+	u64 io_restart_rdi;
+	u32 io_restart_dword;
+	u32 reserved1;
+	u8 io_inst_restart;
+	u8 auto_hlt_restart;
+	u8 reserved2[6];
+
+	u64 efer;
+
+	/*
+	 * Two fields below are implemented on AMD only, to store
+	 * SVM guest vmcb address if the #SMI was received while in the guest mode.
+	 */
+	u64 svm_guest_flag;
+	u64 svm_guest_vmcb_gpa;
+	u64 svm_guest_virtual_int; /* unknown purpose, not implemented */
+
+	u32 reserved3[3];
+	u32 smm_revison;
+	u32 smbase;
+	u32 reserved4[5];
+
+	/* ssp and svm_* fields below are not implemented by KVM */
+	u64 ssp;
+	u64 svm_guest_pat;
+	u64 svm_host_efer;
+	u64 svm_host_cr4;
+	u64 svm_host_cr3;
+	u64 svm_host_cr0;
+
+	u64 cr4;
+	u64 cr3;
+	u64 cr0;
+	u64 dr7;
+	u64 dr6;
+	u64 rflags;
+	u64 rip;
+	u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */
+};
+
+union kvm_smram {
+	struct kvm_smram_state_64 smram64;
+	struct kvm_smram_state_32 smram32;
+	u8 bytes[512];
+};
+
 static inline int kvm_inject_smi(struct kvm_vcpu *vcpu)
 {
 	kvm_make_request(KVM_REQ_SMI, vcpu);
-- 
GitLab


From 58c1d206d545464f9051ad080674b719d553215b Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:35 +0300
Subject: [PATCH 1191/4122] KVM: x86: smm: use smram structs in the common code

Use kvm_smram union instad of raw arrays in the common smm code.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-18-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++--
 arch/x86/kvm/smm.c              | 27 ++++++++++++++-------------
 arch/x86/kvm/svm/svm.c          |  8 ++++++--
 arch/x86/kvm/vmx/vmx.c          |  4 ++--
 4 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index e9862eaac43b8..4443869056323 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -206,6 +206,7 @@ typedef enum exit_fastpath_completion fastpath_t;
 
 struct x86_emulate_ctxt;
 struct x86_exception;
+union kvm_smram;
 enum x86_intercept;
 enum x86_intercept_stage;
 
@@ -1616,8 +1617,8 @@ struct kvm_x86_ops {
 
 #ifdef CONFIG_KVM_SMM
 	int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection);
-	int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate);
-	int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate);
+	int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram);
+	int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram);
 	void (*enable_smi_window)(struct kvm_vcpu *vcpu);
 #endif
 
diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 2e6ec79a581ef..ba2733e535fd7 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -295,17 +295,18 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	struct kvm_segment cs, ds;
 	struct desc_ptr dt;
 	unsigned long cr0;
-	char buf[512];
+	union kvm_smram smram;
 
 	check_smram_offsets();
 
-	memset(buf, 0, 512);
+	memset(smram.bytes, 0, sizeof(smram.bytes));
+
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		enter_smm_save_state_64(vcpu, buf);
+		enter_smm_save_state_64(vcpu, smram.bytes);
 	else
 #endif
-		enter_smm_save_state_32(vcpu, buf);
+		enter_smm_save_state_32(vcpu, smram.bytes);
 
 	/*
 	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
@@ -315,12 +316,12 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	 * Kill the VM in the unlikely case of failure, because the VM
 	 * can be in undefined state in this case.
 	 */
-	if (static_call(kvm_x86_enter_smm)(vcpu, buf))
+	if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
 		goto error;
 
 	kvm_smm_changed(vcpu, true);
 
-	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)))
+	if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
 		goto error;
 
 	if (static_call(kvm_x86_get_nmi_mask)(vcpu))
@@ -480,7 +481,7 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
 }
 
 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
-			     const char *smstate)
+			     u8 *smstate)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
 	struct kvm_segment desc;
@@ -541,7 +542,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 
 #ifdef CONFIG_X86_64
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
-			     const char *smstate)
+			     u8 *smstate)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
 	struct kvm_segment desc;
@@ -612,13 +613,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
 	unsigned long cr0;
-	char buf[512];
+	union kvm_smram smram;
 	u64 smbase;
 	int ret;
 
 	smbase = vcpu->arch.smbase;
 
-	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf));
+	ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
 	if (ret < 0)
 		return X86EMUL_UNHANDLEABLE;
 
@@ -675,13 +676,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 	 * state (e.g. enter guest mode) before loading state from the SMM
 	 * state-save area.
 	 */
-	if (static_call(kvm_x86_leave_smm)(vcpu, buf))
+	if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
 		return X86EMUL_UNHANDLEABLE;
 
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		return rsm_load_state_64(ctxt, buf);
+		return rsm_load_state_64(ctxt, smram.bytes);
 	else
 #endif
-		return rsm_load_state_32(ctxt, buf);
+		return rsm_load_state_32(ctxt, smram.bytes);
 }
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d28de3e59f7f7..9d08214031d29 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4401,12 +4401,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	return 1;
 }
 
-static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_host_map map_save;
 	int ret;
 
+	char *smstate = (char *)smram;
+
 	if (!is_guest_mode(vcpu))
 		return 0;
 
@@ -4448,7 +4450,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_host_map map, map_save;
@@ -4456,6 +4458,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
 	struct vmcb *vmcb12;
 	int ret;
 
+	const char *smstate = (const char *)smram;
+
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		return 0;
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 6be991b29bb76..aca88524fd1ec 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7941,7 +7941,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection)
 	return !is_smm(vcpu);
 }
 
-static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
+static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
@@ -7962,7 +7962,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate)
 	return 0;
 }
 
-static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate)
+static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int ret;
-- 
GitLab


From f34bdf4c1707cdc687db87965d08bb5a51300c58 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:36 +0300
Subject: [PATCH 1192/4122] KVM: x86: smm: use smram struct for 32 bit smram
 load/restore

Use kvm_smram_state_32 struct to save/restore 32 bit SMM state
(used when X86_FEATURE_LM is not present in the guest CPUID).

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-19-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.c | 155 ++++++++++++++++++---------------------------
 1 file changed, 61 insertions(+), 94 deletions(-)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index ba2733e535fd7..2c808d0a8e921 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -149,22 +149,17 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
 	return flags;
 }
 
-static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
+				  struct kvm_smm_seg_state_32 *state,
+				  u32 *selector, int n)
 {
 	struct kvm_segment seg;
-	int offset;
 
 	kvm_get_segment(vcpu, &seg, n);
-	PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector);
-
-	if (n < 3)
-		offset = 0x7f84 + n * 12;
-	else
-		offset = 0x7f2c + (n - 3) * 12;
-
-	PUT_SMSTATE(u32, buf, offset + 8, seg.base);
-	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
-	PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg));
+	*selector = seg.selector;
+	state->base = seg.base;
+	state->limit = seg.limit;
+	state->flags = enter_smm_get_segment_flags(&seg);
 }
 
 #ifdef CONFIG_X86_64
@@ -185,54 +180,48 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
 }
 #endif
 
-static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
+				    struct kvm_smram_state_32 *smram)
 {
 	struct desc_ptr dt;
-	struct kvm_segment seg;
 	unsigned long val;
 	int i;
 
-	PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu));
+	smram->cr0     = kvm_read_cr0(vcpu);
+	smram->cr3     = kvm_read_cr3(vcpu);
+	smram->eflags  = kvm_get_rflags(vcpu);
+	smram->eip     = kvm_rip_read(vcpu);
 
 	for (i = 0; i < 8; i++)
-		PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i));
+		smram->gprs[i] = kvm_register_read_raw(vcpu, i);
 
 	kvm_get_dr(vcpu, 6, &val);
-	PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val);
+	smram->dr6     = (u32)val;
 	kvm_get_dr(vcpu, 7, &val);
-	PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val);
+	smram->dr7     = (u32)val;
 
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector);
-	PUT_SMSTATE(u32, buf, 0x7f64, seg.base);
-	PUT_SMSTATE(u32, buf, 0x7f60, seg.limit);
-	PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg));
-
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector);
-	PUT_SMSTATE(u32, buf, 0x7f80, seg.base);
-	PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit);
-	PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg));
+	enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
+	enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
 
 	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7f74, dt.address);
-	PUT_SMSTATE(u32, buf, 0x7f70, dt.size);
+	smram->gdtr.base = dt.address;
+	smram->gdtr.limit = dt.size;
 
 	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7f58, dt.address);
-	PUT_SMSTATE(u32, buf, 0x7f54, dt.size);
+	smram->idtr.base = dt.address;
+	smram->idtr.limit = dt.size;
 
-	for (i = 0; i < 6; i++)
-		enter_smm_save_seg_32(vcpu, buf, i);
+	enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
+	enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
+	enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
 
-	PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu));
+	enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
+	enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
+	enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
 
-	/* revision id */
-	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000);
-	PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase);
+	smram->cr4 = kvm_read_cr4(vcpu);
+	smram->smm_revision = 0x00020000;
+	smram->smbase = vcpu->arch.smbase;
 }
 
 #ifdef CONFIG_X86_64
@@ -306,7 +295,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
 		enter_smm_save_state_64(vcpu, smram.bytes);
 	else
 #endif
-		enter_smm_save_state_32(vcpu, smram.bytes);
+		enter_smm_save_state_32(vcpu, &smram.smram32);
 
 	/*
 	 * Give enter_smm() a chance to make ISA-specific changes to the vCPU
@@ -398,21 +387,16 @@ static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
 	desc->padding = 0;
 }
 
-static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate,
-			   int n)
+static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
+			   const struct kvm_smm_seg_state_32 *state,
+			   u16 selector, int n)
 {
 	struct kvm_segment desc;
-	int offset;
-
-	if (n < 3)
-		offset = 0x7f84 + n * 12;
-	else
-		offset = 0x7f2c + (n - 3) * 12;
 
-	desc.selector =           GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4);
-	desc.base =               GET_SMSTATE(u32, smstate, offset + 8);
-	desc.limit =              GET_SMSTATE(u32, smstate, offset + 4);
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset));
+	desc.selector =           selector;
+	desc.base =               state->base;
+	desc.limit =              state->limit;
+	rsm_set_desc_flags(&desc, state->flags);
 	kvm_set_segment(vcpu, &desc, n);
 	return X86EMUL_CONTINUE;
 }
@@ -481,63 +465,46 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
 }
 
 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
-			     u8 *smstate)
+			     const struct kvm_smram_state_32 *smstate)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
-	struct kvm_segment desc;
 	struct desc_ptr dt;
-	u32 val, cr0, cr3, cr4;
 	int i;
 
-	cr0 =                      GET_SMSTATE(u32, smstate, 0x7ffc);
-	cr3 =                      GET_SMSTATE(u32, smstate, 0x7ff8);
-	ctxt->eflags =             GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED;
-	ctxt->_eip =               GET_SMSTATE(u32, smstate, 0x7ff0);
+	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
+	ctxt->_eip =  smstate->eip;
 
 	for (i = 0; i < 8; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4);
-
-	val = GET_SMSTATE(u32, smstate, 0x7fcc);
+		*reg_write(ctxt, i) = smstate->gprs[i];
 
-	if (kvm_set_dr(vcpu, 6, val))
+	if (kvm_set_dr(vcpu, 6, smstate->dr6))
 		return X86EMUL_UNHANDLEABLE;
-
-	val = GET_SMSTATE(u32, smstate, 0x7fc8);
-
-	if (kvm_set_dr(vcpu, 7, val))
+	if (kvm_set_dr(vcpu, 7, smstate->dr7))
 		return X86EMUL_UNHANDLEABLE;
 
-	desc.selector =            GET_SMSTATE(u32, smstate, 0x7fc4);
-	desc.base =                GET_SMSTATE(u32, smstate, 0x7f64);
-	desc.limit =               GET_SMSTATE(u32, smstate, 0x7f60);
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f5c));
-	kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
-
-	desc.selector =            GET_SMSTATE(u32, smstate, 0x7fc0);
-	desc.base =                GET_SMSTATE(u32, smstate, 0x7f80);
-	desc.limit =               GET_SMSTATE(u32, smstate, 0x7f7c);
-	rsm_set_desc_flags(&desc,  GET_SMSTATE(u32, smstate, 0x7f78));
-	kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
+	rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
+	rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
 
-	dt.address =               GET_SMSTATE(u32, smstate, 0x7f74);
-	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f70);
+	dt.address =               smstate->gdtr.base;
+	dt.size =                  smstate->gdtr.limit;
 	static_call(kvm_x86_set_gdt)(vcpu, &dt);
 
-	dt.address =               GET_SMSTATE(u32, smstate, 0x7f58);
-	dt.size =                  GET_SMSTATE(u32, smstate, 0x7f54);
+	dt.address =               smstate->idtr.base;
+	dt.size =                  smstate->idtr.limit;
 	static_call(kvm_x86_set_idt)(vcpu, &dt);
 
-	for (i = 0; i < 6; i++) {
-		int r = rsm_load_seg_32(vcpu, smstate, i);
-		if (r != X86EMUL_CONTINUE)
-			return r;
-	}
+	rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
+	rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
+	rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
 
-	cr4 = GET_SMSTATE(u32, smstate, 0x7f14);
+	rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
+	rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
+	rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
 
-	vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8);
+	vcpu->arch.smbase = smstate->smbase;
 
-	return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
+	return rsm_enter_protected_mode(vcpu, smstate->cr0,
+					smstate->cr3, smstate->cr4);
 }
 
 #ifdef CONFIG_X86_64
@@ -684,5 +651,5 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 		return rsm_load_state_64(ctxt, smram.bytes);
 	else
 #endif
-		return rsm_load_state_32(ctxt, smram.bytes);
+		return rsm_load_state_32(ctxt, &smram.smram32);
 }
-- 
GitLab


From 8bcda1dee95ae88cade0ad671e0f4d371c005c4d Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:37 +0300
Subject: [PATCH 1193/4122] KVM: x86: smm: use smram struct for 64 bit smram
 load/restore

Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state
(used when X86_FEATURE_LM is present in the guest CPUID,
regardless of 32-bitness of the guest).

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-20-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.c | 153 +++++++++++++++++++--------------------------
 1 file changed, 63 insertions(+), 90 deletions(-)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index 2c808d0a8e921..e3ecb1f841681 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -163,20 +163,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
 }
 
 #ifdef CONFIG_X86_64
-static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n)
+static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
+				  struct kvm_smm_seg_state_64 *state,
+				  int n)
 {
 	struct kvm_segment seg;
-	int offset;
-	u16 flags;
 
 	kvm_get_segment(vcpu, &seg, n);
-	offset = 0x7e00 + n * 16;
-
-	flags = enter_smm_get_segment_flags(&seg) >> 8;
-	PUT_SMSTATE(u16, buf, offset, seg.selector);
-	PUT_SMSTATE(u16, buf, offset + 2, flags);
-	PUT_SMSTATE(u32, buf, offset + 4, seg.limit);
-	PUT_SMSTATE(u64, buf, offset + 8, seg.base);
+	state->selector = seg.selector;
+	state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
+	state->limit = seg.limit;
+	state->base = seg.base;
 }
 #endif
 
@@ -225,57 +222,52 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
 }
 
 #ifdef CONFIG_X86_64
-static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf)
+static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
+				    struct kvm_smram_state_64 *smram)
 {
 	struct desc_ptr dt;
-	struct kvm_segment seg;
 	unsigned long val;
 	int i;
 
 	for (i = 0; i < 16; i++)
-		PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i));
+		smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
+
+	smram->rip    = kvm_rip_read(vcpu);
+	smram->rflags = kvm_get_rflags(vcpu);
 
-	PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu));
-	PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu));
 
 	kvm_get_dr(vcpu, 6, &val);
-	PUT_SMSTATE(u64, buf, 0x7f68, val);
+	smram->dr6 = val;
 	kvm_get_dr(vcpu, 7, &val);
-	PUT_SMSTATE(u64, buf, 0x7f60, val);
-
-	PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu));
-	PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu));
-	PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu));
+	smram->dr7 = val;
 
-	PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase);
+	smram->cr0 = kvm_read_cr0(vcpu);
+	smram->cr3 = kvm_read_cr3(vcpu);
+	smram->cr4 = kvm_read_cr4(vcpu);
 
-	/* revision id */
-	PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064);
+	smram->smbase = vcpu->arch.smbase;
+	smram->smm_revison = 0x00020064;
 
-	PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer);
+	smram->efer = vcpu->arch.efer;
 
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_TR);
-	PUT_SMSTATE(u16, buf, 0x7e90, seg.selector);
-	PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8);
-	PUT_SMSTATE(u32, buf, 0x7e94, seg.limit);
-	PUT_SMSTATE(u64, buf, 0x7e98, seg.base);
+	enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
 
 	static_call(kvm_x86_get_idt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7e84, dt.size);
-	PUT_SMSTATE(u64, buf, 0x7e88, dt.address);
+	smram->idtr.limit = dt.size;
+	smram->idtr.base = dt.address;
 
-	kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR);
-	PUT_SMSTATE(u16, buf, 0x7e70, seg.selector);
-	PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8);
-	PUT_SMSTATE(u32, buf, 0x7e74, seg.limit);
-	PUT_SMSTATE(u64, buf, 0x7e78, seg.base);
+	enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
 
 	static_call(kvm_x86_get_gdt)(vcpu, &dt);
-	PUT_SMSTATE(u32, buf, 0x7e64, dt.size);
-	PUT_SMSTATE(u64, buf, 0x7e68, dt.address);
+	smram->gdtr.limit = dt.size;
+	smram->gdtr.base = dt.address;
 
-	for (i = 0; i < 6; i++)
-		enter_smm_save_seg_64(vcpu, buf, i);
+	enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
+	enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
+	enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
+	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
+	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
+	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
 }
 #endif
 
@@ -292,7 +284,7 @@ void enter_smm(struct kvm_vcpu *vcpu)
 
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		enter_smm_save_state_64(vcpu, smram.bytes);
+		enter_smm_save_state_64(vcpu, &smram.smram64);
 	else
 #endif
 		enter_smm_save_state_32(vcpu, &smram.smram32);
@@ -402,18 +394,17 @@ static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
 }
 
 #ifdef CONFIG_X86_64
-static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate,
+
+static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
+			   const struct kvm_smm_seg_state_64 *state,
 			   int n)
 {
 	struct kvm_segment desc;
-	int offset;
-
-	offset = 0x7e00 + n * 16;
 
-	desc.selector =           GET_SMSTATE(u16, smstate, offset);
-	rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8);
-	desc.limit =              GET_SMSTATE(u32, smstate, offset + 4);
-	desc.base =               GET_SMSTATE(u64, smstate, offset + 8);
+	desc.selector =           state->selector;
+	rsm_set_desc_flags(&desc, state->attributes << 8);
+	desc.limit =              state->limit;
+	desc.base =               state->base;
 	kvm_set_segment(vcpu, &desc, n);
 	return X86EMUL_CONTINUE;
 }
@@ -509,68 +500,50 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 
 #ifdef CONFIG_X86_64
 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
-			     u8 *smstate)
+			     const struct kvm_smram_state_64 *smstate)
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
-	struct kvm_segment desc;
 	struct desc_ptr dt;
-	u64 val, cr0, cr3, cr4;
 	int i, r;
 
 	for (i = 0; i < 16; i++)
-		*reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8);
-
-	ctxt->_eip   = GET_SMSTATE(u64, smstate, 0x7f78);
-	ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED;
+		*reg_write(ctxt, i) = smstate->gprs[15 - i];
 
-	val = GET_SMSTATE(u64, smstate, 0x7f68);
+	ctxt->_eip   = smstate->rip;
+	ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
 
-	if (kvm_set_dr(vcpu, 6, val))
+	if (kvm_set_dr(vcpu, 6, smstate->dr6))
 		return X86EMUL_UNHANDLEABLE;
-
-	val = GET_SMSTATE(u64, smstate, 0x7f60);
-
-	if (kvm_set_dr(vcpu, 7, val))
+	if (kvm_set_dr(vcpu, 7, smstate->dr7))
 		return X86EMUL_UNHANDLEABLE;
 
-	cr0 =                       GET_SMSTATE(u64, smstate, 0x7f58);
-	cr3 =                       GET_SMSTATE(u64, smstate, 0x7f50);
-	cr4 =                       GET_SMSTATE(u64, smstate, 0x7f48);
-	vcpu->arch.smbase =         GET_SMSTATE(u32, smstate, 0x7f00);
-	val =                       GET_SMSTATE(u64, smstate, 0x7ed0);
+	vcpu->arch.smbase =         smstate->smbase;
 
-	if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA))
+	if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
 		return X86EMUL_UNHANDLEABLE;
 
-	desc.selector =             GET_SMSTATE(u32, smstate, 0x7e90);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e92) << 8);
-	desc.limit =                GET_SMSTATE(u32, smstate, 0x7e94);
-	desc.base =                 GET_SMSTATE(u64, smstate, 0x7e98);
-	kvm_set_segment(vcpu, &desc, VCPU_SREG_TR);
+	rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
 
-	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e84);
-	dt.address =                GET_SMSTATE(u64, smstate, 0x7e88);
+	dt.size =                   smstate->idtr.limit;
+	dt.address =                smstate->idtr.base;
 	static_call(kvm_x86_set_idt)(vcpu, &dt);
 
-	desc.selector =             GET_SMSTATE(u32, smstate, 0x7e70);
-	rsm_set_desc_flags(&desc,   GET_SMSTATE(u32, smstate, 0x7e72) << 8);
-	desc.limit =                GET_SMSTATE(u32, smstate, 0x7e74);
-	desc.base =                 GET_SMSTATE(u64, smstate, 0x7e78);
-	kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR);
+	rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
 
-	dt.size =                   GET_SMSTATE(u32, smstate, 0x7e64);
-	dt.address =                GET_SMSTATE(u64, smstate, 0x7e68);
+	dt.size =                   smstate->gdtr.limit;
+	dt.address =                smstate->gdtr.base;
 	static_call(kvm_x86_set_gdt)(vcpu, &dt);
 
-	r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4);
+	r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
 	if (r != X86EMUL_CONTINUE)
 		return r;
 
-	for (i = 0; i < 6; i++) {
-		r = rsm_load_seg_64(vcpu, smstate, i);
-		if (r != X86EMUL_CONTINUE)
-			return r;
-	}
+	rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
+	rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
+	rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
+	rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
+	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
+	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
 
 	return X86EMUL_CONTINUE;
 }
@@ -648,7 +621,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
 
 #ifdef CONFIG_X86_64
 	if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
-		return rsm_load_state_64(ctxt, smram.bytes);
+		return rsm_load_state_64(ctxt, &smram.smram64);
 	else
 #endif
 		return rsm_load_state_32(ctxt, &smram.smram32);
-- 
GitLab


From e6a82199b610d843d810bc90d1f5df667906c402 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:38 +0300
Subject: [PATCH 1194/4122] KVM: svm: drop explicit return value of
 kvm_vcpu_map

if kvm_vcpu_map returns non zero value, error path should be triggered
regardless of the exact returned error value.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-21-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 9d08214031d29..dfcdca3f538b5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4437,8 +4437,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 	 * that, see svm_prepare_switch_to_guest()) which must be
 	 * preserved.
 	 */
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr),
-			 &map_save) == -EINVAL)
+	if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
 		return 1;
 
 	BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400);
@@ -4475,11 +4474,11 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 		return 1;
 
 	vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL)
+	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
 		return 1;
 
 	ret = 1;
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL)
+	if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save))
 		goto unmap_map;
 
 	if (svm_allocate_nested(svm))
-- 
GitLab


From dd5045fed588b3e7ac0a4546138b2fe16d5d0fff Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:39 +0300
Subject: [PATCH 1195/4122] KVM: x86: SVM: use smram structs

Use SMM structs in the SVM code as well, which removes the last user of
put_smstate/GET_SMSTATE so remove these macros as well.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-22-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.h     |  6 ------
 arch/x86/kvm/svm/svm.c | 21 +++++++--------------
 2 files changed, 7 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index b66da263ec822..520217467ac20 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -4,12 +4,6 @@
 
 #include <linux/build_bug.h>
 
-#define GET_SMSTATE(type, buf, offset)		\
-	(*(type *)((buf) + (offset) - 0x7e00))
-
-#define PUT_SMSTATE(type, buf, offset, val)                      \
-	*(type *)((buf) + (offset) - 0x7e00) = val
-
 #ifdef CONFIG_KVM_SMM
 
 
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index dfcdca3f538b5..2be8050bf981d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4407,15 +4407,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 	struct kvm_host_map map_save;
 	int ret;
 
-	char *smstate = (char *)smram;
-
 	if (!is_guest_mode(vcpu))
 		return 0;
 
-	/* FED8h - SVM Guest */
-	PUT_SMSTATE(u64, smstate, 0x7ed8, 1);
-	/* FEE0h - SVM Guest VMCB Physical Address */
-	PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa);
+	smram->smram64.svm_guest_flag = 1;
+	smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
 
 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
@@ -4453,28 +4449,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 	struct kvm_host_map map, map_save;
-	u64 saved_efer, vmcb12_gpa;
 	struct vmcb *vmcb12;
 	int ret;
 
-	const char *smstate = (const char *)smram;
+	const struct kvm_smram_state_64 *smram64 = &smram->smram64;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
 		return 0;
 
 	/* Non-zero if SMI arrived while vCPU was in guest mode. */
-	if (!GET_SMSTATE(u64, smstate, 0x7ed8))
+	if (!smram64->svm_guest_flag)
 		return 0;
 
 	if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM))
 		return 1;
 
-	saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0);
-	if (!(saved_efer & EFER_SVME))
+	if (!(smram64->efer & EFER_SVME))
 		return 1;
 
-	vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0);
-	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
+	if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map))
 		return 1;
 
 	ret = 1;
@@ -4500,7 +4493,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram)
 	vmcb12 = map.hva;
 	nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
 	nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
-	ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false);
+	ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false);
 
 	if (ret)
 		goto unmap_save;
-- 
GitLab


From 95504c7c981b3260b3b238ace03f3519bd9a0b6d Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:40 +0300
Subject: [PATCH 1196/4122] KVM: x86: SVM: don't save SVM state to SMRAM when
 VM is not long mode capable

When the guest CPUID doesn't have support for long mode, 32 bit SMRAM
layout is used and it has no support for preserving EFER and/or SVM
state.

Note that this isn't relevant to running 32 bit guests on VM which is
long mode capable - such VM can still run 32 bit guests in compatibility
mode.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-23-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/svm.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 2be8050bf981d..527f18d8cc448 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4410,6 +4410,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram)
 	if (!is_guest_mode(vcpu))
 		return 0;
 
+	/*
+	 * 32-bit SMRAM format doesn't preserve EFER and SVM state.  Userspace is
+	 * responsible for ensuring nested SVM and SMIs are mutually exclusive.
+	 */
+
+	if (!guest_cpuid_has(vcpu, X86_FEATURE_LM))
+		return 1;
+
 	smram->smram64.svm_guest_flag = 1;
 	smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa;
 
-- 
GitLab


From fb28875fd7da184079150295da7ee8d80a70917e Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <mlevitsk@redhat.com>
Date: Tue, 25 Oct 2022 15:47:41 +0300
Subject: [PATCH 1197/4122] KVM: x86: smm: preserve interrupt shadow in SMRAM

When #SMI is asserted, the CPU can be in interrupt shadow due to sti or
mov ss.

It is not mandatory in  Intel/AMD prm to have the #SMI blocked during the
shadow, and on top of that, since neither SVM nor VMX has true support
for SMI window, waiting for one instruction would mean single stepping
the guest.

Instead, allow #SMI in this case, but both reset the interrupt window and
stash its value in SMRAM to restore it on exit from SMM.

This fixes rare failures seen mostly on windows guests on VMX, when #SMI
falls on the sti instruction which mainfest in VM entry failure due
to EFLAGS.IF not being set, but STI interrupt window still being set
in the VMCS.

Signed-off-by: Maxim Levitsky <mlevitsk@redhat.com>
Message-Id: <20221025124741.228045-24-mlevitsk@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/smm.c | 29 +++++++++++++++++++++++++----
 arch/x86/kvm/smm.h |  8 ++++++--
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c
index e3ecb1f841681..a9c1c2af8d94c 100644
--- a/arch/x86/kvm/smm.c
+++ b/arch/x86/kvm/smm.c
@@ -27,7 +27,9 @@ static void check_smram_offsets(void)
 	CHECK_SMRAM32_OFFSET(io_restart_rsi,		0xFF0C);
 	CHECK_SMRAM32_OFFSET(io_restart_rip,		0xFF10);
 	CHECK_SMRAM32_OFFSET(cr4,			0xFF14);
-	CHECK_SMRAM32_OFFSET(reserved3,			0xFF18);
+	CHECK_SMRAM32_OFFSET(reserved2,			0xFF18);
+	CHECK_SMRAM32_OFFSET(int_shadow,		0xFF1A);
+	CHECK_SMRAM32_OFFSET(reserved3,			0xFF1B);
 	CHECK_SMRAM32_OFFSET(ds,			0xFF2C);
 	CHECK_SMRAM32_OFFSET(fs,			0xFF38);
 	CHECK_SMRAM32_OFFSET(gs,			0xFF44);
@@ -73,7 +75,9 @@ static void check_smram_offsets(void)
 	CHECK_SMRAM64_OFFSET(reserved1,			0xFEC4);
 	CHECK_SMRAM64_OFFSET(io_inst_restart,		0xFEC8);
 	CHECK_SMRAM64_OFFSET(auto_hlt_restart,		0xFEC9);
-	CHECK_SMRAM64_OFFSET(reserved2,			0xFECA);
+	CHECK_SMRAM64_OFFSET(amd_nmi_mask,		0xFECA);
+	CHECK_SMRAM64_OFFSET(int_shadow,		0xFECB);
+	CHECK_SMRAM64_OFFSET(reserved2,			0xFECC);
 	CHECK_SMRAM64_OFFSET(efer,			0xFED0);
 	CHECK_SMRAM64_OFFSET(svm_guest_flag,		0xFED8);
 	CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,	0xFEE0);
@@ -219,6 +223,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
 	smram->cr4 = kvm_read_cr4(vcpu);
 	smram->smm_revision = 0x00020000;
 	smram->smbase = vcpu->arch.smbase;
+
+	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
 }
 
 #ifdef CONFIG_X86_64
@@ -268,6 +274,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
 	enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
 	enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
 	enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
+
+	smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
 }
 #endif
 
@@ -313,6 +321,8 @@ void enter_smm(struct kvm_vcpu *vcpu)
 	kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
 	kvm_rip_write(vcpu, 0x8000);
 
+	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+
 	cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
 	static_call(kvm_x86_set_cr0)(vcpu, cr0);
 	vcpu->arch.cr0 = cr0;
@@ -460,7 +470,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 {
 	struct kvm_vcpu *vcpu = ctxt->vcpu;
 	struct desc_ptr dt;
-	int i;
+	int i, r;
 
 	ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
 	ctxt->_eip =  smstate->eip;
@@ -494,8 +504,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
 
 	vcpu->arch.smbase = smstate->smbase;
 
-	return rsm_enter_protected_mode(vcpu, smstate->cr0,
+	r = rsm_enter_protected_mode(vcpu, smstate->cr0,
 					smstate->cr3, smstate->cr4);
+
+	if (r != X86EMUL_CONTINUE)
+		return r;
+
+	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+	ctxt->interruptibility = (u8)smstate->int_shadow;
+
+	return r;
 }
 
 #ifdef CONFIG_X86_64
@@ -545,6 +563,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
 	rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
 	rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
 
+	static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
+	ctxt->interruptibility = (u8)smstate->int_shadow;
+
 	return X86EMUL_CONTINUE;
 }
 #endif
diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h
index 520217467ac20..a1cf2ac5bd78d 100644
--- a/arch/x86/kvm/smm.h
+++ b/arch/x86/kvm/smm.h
@@ -31,7 +31,9 @@ struct kvm_smram_state_32 {
 	u32 cr4;
 
 	/* A20M#, CPL, shutdown and other reserved/undocumented fields */
-	u32 reserved3[5];
+	u16 reserved2;
+	u8 int_shadow; /* KVM extension */
+	u8 reserved3[17];
 
 	struct kvm_smm_seg_state_32 ds;
 	struct kvm_smm_seg_state_32 fs;
@@ -95,7 +97,9 @@ struct kvm_smram_state_64 {
 	u32 reserved1;
 	u8 io_inst_restart;
 	u8 auto_hlt_restart;
-	u8 reserved2[6];
+	u8 amd_nmi_mask; /* Documented in AMD BKDG as NMI mask, not used by KVM */
+	u8 int_shadow;
+	u32 reserved2;
 
 	u64 efer;
 
-- 
GitLab


From 93c5c61d9e58a9ea423439d358c198be5b674a58 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 11 Oct 2022 15:58:06 -0400
Subject: [PATCH 1198/4122] mm/gup: Add FOLL_INTERRUPTIBLE

We have had FAULT_FLAG_INTERRUPTIBLE but it was never applied to GUPs.  One
issue with it is that not all GUP paths are able to handle signal delivers
besides SIGKILL.

That's not ideal for the GUP users who are actually able to handle these
cases, like KVM.

KVM uses GUP extensively on faulting guest pages, during which we've got
existing infrastructures to retry a page fault at a later time.  Allowing
the GUP to be interrupted by generic signals can make KVM related threads
to be more responsive.  For examples:

  (1) SIGUSR1: which QEMU/KVM uses to deliver an inter-process IPI,
      e.g. when the admin issues a vm_stop QMP command, SIGUSR1 can be
      generated to kick the vcpus out of kernel context immediately,

  (2) SIGINT: which can be used with interactive hypervisor users to stop a
      virtual machine with Ctrl-C without any delays/hangs,

  (3) SIGTRAP: which grants GDB capability even during page faults that are
      stuck for a long time.

Normally hypervisor will be able to receive these signals properly, but not
if we're stuck in a GUP for a long time for whatever reason.  It happens
easily with a stucked postcopy migration when e.g. a network temp failure
happens, then some vcpu threads can hang death waiting for the pages.  With
the new FOLL_INTERRUPTIBLE, we can allow GUP users like KVM to selectively
enable the ability to trap these signals.

Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Peter Xu <peterx@redhat.com>
Message-Id: <20221011195809.557016-2-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/mm.h |  1 +
 mm/gup.c           | 33 +++++++++++++++++++++++++++++----
 mm/hugetlb.c       |  5 ++++-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc55654..3c84f4e48cd77 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
 #define FOLL_PIN	0x40000	/* pages must be released via unpin_user_page */
 #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
+#define FOLL_INTERRUPTIBLE  0x100000 /* allow interrupts from generic signals */
 
 /*
  * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
diff --git a/mm/gup.c b/mm/gup.c
index fe195d47de74a..90e372352e824 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -989,8 +989,17 @@ static int faultin_page(struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_WRITE;
 	if (*flags & FOLL_REMOTE)
 		fault_flags |= FAULT_FLAG_REMOTE;
-	if (locked)
+	if (locked) {
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+		/*
+		 * FAULT_FLAG_INTERRUPTIBLE is opt-in. GUP callers must set
+		 * FOLL_INTERRUPTIBLE to enable FAULT_FLAG_INTERRUPTIBLE.
+		 * That's because some callers may not be prepared to
+		 * handle early exits caused by non-fatal signals.
+		 */
+		if (*flags & FOLL_INTERRUPTIBLE)
+			fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
+	}
 	if (*flags & FOLL_NOWAIT)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
 	if (*flags & FOLL_TRIED) {
@@ -1391,6 +1400,22 @@ retry:
 }
 EXPORT_SYMBOL_GPL(fixup_user_fault);
 
+/*
+ * GUP always responds to fatal signals.  When FOLL_INTERRUPTIBLE is
+ * specified, it'll also respond to generic signals.  The caller of GUP
+ * that has FOLL_INTERRUPTIBLE should take care of the GUP interruption.
+ */
+static bool gup_signal_pending(unsigned int flags)
+{
+	if (fatal_signal_pending(current))
+		return true;
+
+	if (!(flags & FOLL_INTERRUPTIBLE))
+		return false;
+
+	return signal_pending(current);
+}
+
 /*
  * Please note that this function, unlike __get_user_pages will not
  * return 0 for nr_pages > 0 without FOLL_NOWAIT
@@ -1472,11 +1497,11 @@ retry:
 		 * Repeat on the address that fired VM_FAULT_RETRY
 		 * with both FAULT_FLAG_ALLOW_RETRY and
 		 * FAULT_FLAG_TRIED.  Note that GUP can be interrupted
-		 * by fatal signals, so we need to check it before we
+		 * by fatal signals of even common signals, depending on
+		 * the caller's request. So we need to check it before we
 		 * start trying again otherwise it can loop forever.
 		 */
-
-		if (fatal_signal_pending(current)) {
+		if (gup_signal_pending(flags)) {
 			if (!pages_done)
 				pages_done = -EINTR;
 			break;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 546df97c31e4c..b5ed54f760bb2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6285,9 +6285,12 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 				fault_flags |= FAULT_FLAG_WRITE;
 			else if (unshare)
 				fault_flags |= FAULT_FLAG_UNSHARE;
-			if (locked)
+			if (locked) {
 				fault_flags |= FAULT_FLAG_ALLOW_RETRY |
 					FAULT_FLAG_KILLABLE;
+				if (flags & FOLL_INTERRUPTIBLE)
+					fault_flags |= FAULT_FLAG_INTERRUPTIBLE;
+			}
 			if (flags & FOLL_NOWAIT)
 				fault_flags |= FAULT_FLAG_ALLOW_RETRY |
 					FAULT_FLAG_RETRY_NOWAIT;
-- 
GitLab


From fe5ed56c79733b7808f968567c581118ab79552e Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 11 Oct 2022 15:58:07 -0400
Subject: [PATCH 1199/4122] kvm: Add KVM_PFN_ERR_SIGPENDING

Add a new pfn error to show that we've got a pending signal to handle
during hva_to_pfn_slow() procedure (of -EINTR retval).

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221011195809.557016-3-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 10 ++++++++++
 virt/kvm/kvm_main.c      |  2 ++
 2 files changed, 12 insertions(+)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 18592bdf4c1bf..911b064878dfa 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -96,6 +96,7 @@
 #define KVM_PFN_ERR_FAULT	(KVM_PFN_ERR_MASK)
 #define KVM_PFN_ERR_HWPOISON	(KVM_PFN_ERR_MASK + 1)
 #define KVM_PFN_ERR_RO_FAULT	(KVM_PFN_ERR_MASK + 2)
+#define KVM_PFN_ERR_SIGPENDING	(KVM_PFN_ERR_MASK + 3)
 
 /*
  * error pfns indicate that the gfn is in slot but faild to
@@ -106,6 +107,15 @@ static inline bool is_error_pfn(kvm_pfn_t pfn)
 	return !!(pfn & KVM_PFN_ERR_MASK);
 }
 
+/*
+ * KVM_PFN_ERR_SIGPENDING indicates that fetching the PFN was interrupted
+ * by a pending signal.  Note, the signal may or may not be fatal.
+ */
+static inline bool is_sigpending_pfn(kvm_pfn_t pfn)
+{
+	return pfn == KVM_PFN_ERR_SIGPENDING;
+}
+
 /*
  * error_noslot pfns indicate that the gfn can not be
  * translated to pfn - it is not in slot or failed to
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25d7872b29c17..558f52dbebbde 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2667,6 +2667,8 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
 	if (npages == 1)
 		return pfn;
+	if (npages == -EINTR)
+		return KVM_PFN_ERR_SIGPENDING;
 
 	mmap_read_lock(current->mm);
 	if (npages == -EHWPOISON ||
-- 
GitLab


From c8b88b332bedf47a9aa008dfb69998c90623375c Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 11 Oct 2022 15:58:08 -0400
Subject: [PATCH 1200/4122] kvm: Add interruptible flag to
 __gfn_to_pfn_memslot()

Add a new "interruptible" flag showing that the caller is willing to be
interrupted by signals during the __gfn_to_pfn_memslot() request.  Wire it
up with a FOLL_INTERRUPTIBLE flag that we've just introduced.

This prepares KVM to be able to respond to SIGUSR1 (for QEMU that's the
SIGIPI) even during e.g. handling an userfaultfd page fault.

No functional change intended.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221011195809.557016-4-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/mmu.c                   |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_hv.c    |  2 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c |  2 +-
 arch/x86/kvm/mmu/mmu.c                 |  4 ++--
 include/linux/kvm_host.h               |  4 ++--
 virt/kvm/kvm_main.c                    | 28 ++++++++++++++++----------
 virt/kvm/kvm_mm.h                      |  4 ++--
 virt/kvm/pfncache.c                    |  2 +-
 8 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 60ee3d9f01f8c..f154d4a7fae0b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1239,7 +1239,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 */
 	smp_rmb();
 
-	pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+	pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
 				   write_fault, &writable, NULL);
 	if (pfn == KVM_PFN_ERR_HWPOISON) {
 		kvm_send_hwpoison_signal(hva, vma_shift);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e9744b41a226c..4939f57b6f6a2 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -598,7 +598,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
 		write_ok = true;
 	} else {
 		/* Call KVM generic code to do the slow-path check */
-		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
 					   writing, &write_ok, NULL);
 		if (is_error_noslot_pfn(pfn))
 			return -EFAULT;
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d5e12f3bf864..9d3743ca16d53 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -846,7 +846,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 		unsigned long pfn;
 
 		/* Call KVM generic code to do the slow-path check */
-		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
+		pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
 					   writing, upgrade_p, NULL);
 		if (is_error_noslot_pfn(pfn))
 			return -EFAULT;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f8c92a4a35faa..0bbfb33fa7355 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -4170,7 +4170,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	}
 
 	async = false;
-	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, &async,
+	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, &async,
 					  fault->write, &fault->map_writable,
 					  &fault->hva);
 	if (!async)
@@ -4187,7 +4187,7 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 	}
 
-	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, NULL,
+	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, NULL,
 					  fault->write, &fault->map_writable,
 					  &fault->hva);
 	return RET_PF_CONTINUE;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 911b064878dfa..8fe4665bd020b 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1150,8 +1150,8 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
-			       bool atomic, bool *async, bool write_fault,
-			       bool *writable, hva_t *hva);
+			       bool atomic, bool interruptible, bool *async,
+			       bool write_fault, bool *writable, hva_t *hva);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
 void kvm_release_pfn_dirty(kvm_pfn_t pfn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 558f52dbebbde..43bbe4fde078f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2514,7 +2514,7 @@ static bool hva_to_pfn_fast(unsigned long addr, bool write_fault,
  * 1 indicates success, -errno is returned if error is detected.
  */
 static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
-			   bool *writable, kvm_pfn_t *pfn)
+			   bool interruptible, bool *writable, kvm_pfn_t *pfn)
 {
 	unsigned int flags = FOLL_HWPOISON;
 	struct page *page;
@@ -2529,6 +2529,8 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 		flags |= FOLL_WRITE;
 	if (async)
 		flags |= FOLL_NOWAIT;
+	if (interruptible)
+		flags |= FOLL_INTERRUPTIBLE;
 
 	npages = get_user_pages_unlocked(addr, 1, &page, flags);
 	if (npages != 1)
@@ -2638,6 +2640,7 @@ out:
  * Pin guest page in memory and return its pfn.
  * @addr: host virtual address which maps memory to the guest
  * @atomic: whether this function can sleep
+ * @interruptible: whether the process can be interrupted by non-fatal signals
  * @async: whether this function need to wait IO complete if the
  *         host page is not in the memory
  * @write_fault: whether we should get a writable host page
@@ -2648,8 +2651,8 @@ out:
  * 2): @write_fault = false && @writable, @writable will tell the caller
  *     whether the mapping is writable.
  */
-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-		     bool write_fault, bool *writable)
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
+		     bool *async, bool write_fault, bool *writable)
 {
 	struct vm_area_struct *vma;
 	kvm_pfn_t pfn;
@@ -2664,7 +2667,8 @@ kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	if (atomic)
 		return KVM_PFN_ERR_FAULT;
 
-	npages = hva_to_pfn_slow(addr, async, write_fault, writable, &pfn);
+	npages = hva_to_pfn_slow(addr, async, write_fault, interruptible,
+				 writable, &pfn);
 	if (npages == 1)
 		return pfn;
 	if (npages == -EINTR)
@@ -2699,8 +2703,8 @@ exit:
 }
 
 kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
-			       bool atomic, bool *async, bool write_fault,
-			       bool *writable, hva_t *hva)
+			       bool atomic, bool interruptible, bool *async,
+			       bool write_fault, bool *writable, hva_t *hva)
 {
 	unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
 
@@ -2725,7 +2729,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn,
 		writable = NULL;
 	}
 
-	return hva_to_pfn(addr, atomic, async, write_fault,
+	return hva_to_pfn(addr, atomic, interruptible, async, write_fault,
 			  writable);
 }
 EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
@@ -2733,20 +2737,22 @@ EXPORT_SYMBOL_GPL(__gfn_to_pfn_memslot);
 kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
 		      bool *writable)
 {
-	return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
-				    write_fault, writable, NULL);
+	return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, false,
+				    NULL, write_fault, writable, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
 kvm_pfn_t gfn_to_pfn_memslot(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, false, false, NULL, true,
+				    NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
 
 kvm_pfn_t gfn_to_pfn_memslot_atomic(const struct kvm_memory_slot *slot, gfn_t gfn)
 {
-	return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
+	return __gfn_to_pfn_memslot(slot, gfn, true, false, NULL, true,
+				    NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);
 
diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h
index 41da467d99c95..a1ab15006af34 100644
--- a/virt/kvm/kvm_mm.h
+++ b/virt/kvm/kvm_mm.h
@@ -24,8 +24,8 @@
 #define KVM_MMU_READ_UNLOCK(kvm)	spin_unlock(&(kvm)->mmu_lock)
 #endif /* KVM_HAVE_MMU_RWLOCK */
 
-kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
-		     bool write_fault, bool *writable);
+kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool interruptible,
+		     bool *async, bool write_fault, bool *writable);
 
 #ifdef CONFIG_HAVE_KVM_PFNCACHE
 void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 346e47f155724..bd4a46aee384b 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -185,7 +185,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 		}
 
 		/* We always request a writeable mapping */
-		new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
+		new_pfn = hva_to_pfn(gpc->uhva, false, false, NULL, true, NULL);
 		if (is_error_noslot_pfn(new_pfn))
 			goto out_error;
 
-- 
GitLab


From 766576874b9732ad6a65595296de351c064b4c0b Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Tue, 11 Oct 2022 15:59:47 -0400
Subject: [PATCH 1201/4122] kvm: x86: Allow to respond to generic signals
 during slow PF

Enable x86 slow page faults to be able to respond to non-fatal signals,
returning -EINTR properly when it happens.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221011195947.557281-1-peterx@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0bbfb33fa7355..5d662b43a63ef 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3149,8 +3149,13 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *
 	send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, PAGE_SHIFT, tsk);
 }
 
-static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
+static int kvm_handle_error_pfn(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
 {
+	if (is_sigpending_pfn(pfn)) {
+		kvm_handle_signal_exit(vcpu);
+		return -EINTR;
+	}
+
 	/*
 	 * Do not cache the mmio info caused by writing the readonly gfn
 	 * into the spte otherwise read access on readonly gfn also can
@@ -3172,7 +3177,7 @@ static int handle_abnormal_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fau
 {
 	/* The pfn is invalid, report the error! */
 	if (unlikely(is_error_pfn(fault->pfn)))
-		return kvm_handle_bad_page(vcpu, fault->gfn, fault->pfn);
+		return kvm_handle_error_pfn(vcpu, fault->gfn, fault->pfn);
 
 	if (unlikely(!fault->slot)) {
 		gva_t gva = fault->is_tdp ? 0 : fault->addr;
@@ -4187,7 +4192,12 @@ static int kvm_faultin_pfn(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 	}
 
-	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, false, NULL,
+	/*
+	 * Allow gup to bail on pending non-fatal signals when it's also allowed
+	 * to wait for IO.  Note, gup always bails if it is unable to quickly
+	 * get a page and a fatal signal, i.e. SIGKILL, is pending.
+	 */
+	fault->pfn = __gfn_to_pfn_memslot(slot, fault->gfn, false, true, NULL,
 					  fault->write, &fault->map_writable,
 					  &fault->hva);
 	return RET_PF_CONTINUE;
-- 
GitLab


From be83794210e7020fef98596f4513aafbed659cd1 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 21 Sep 2022 15:15:21 +0000
Subject: [PATCH 1202/4122] KVM: x86: Disallow the use of
 KVM_MSR_FILTER_DEFAULT_ALLOW in the kernel

Protect the kernel from using the flag KVM_MSR_FILTER_DEFAULT_ALLOW.
Its value is 0, and using it incorrectly could have unintended
consequences. E.g. prevent someone in the kernel from writing something
like this.

if (filter.flags & KVM_MSR_FILTER_DEFAULT_ALLOW)
        <allow the MSR>

and getting confused when it doesn't work.

It would be more ideal to remove this flag altogether, but userspace
may already be using it, so protecting the kernel is all that can
reasonably be done at this point.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220921151525.904162-2-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 46de10a809ecb..73ad693aa653d 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -222,7 +222,9 @@ struct kvm_msr_filter_range {
 
 #define KVM_MSR_FILTER_MAX_RANGES 16
 struct kvm_msr_filter {
+#ifndef __KERNEL__
 #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+#endif
 #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
 	__u32 flags;
 	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
-- 
GitLab


From db205f7e1edc8d8f0880f0218d3a03b13fe94af3 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 21 Sep 2022 15:15:22 +0000
Subject: [PATCH 1203/4122] KVM: x86: Add a VALID_MASK for the MSR exit reason
 flags

Add the mask KVM_MSR_EXIT_REASON_VALID_MASK for the MSR exit reason
flags.  This simplifies checks that validate these flags, and makes it
easier to introduce new flags in the future.

No functional change intended.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220921151525.904162-3-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       | 4 +---
 include/uapi/linux/kvm.h | 3 +++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cbec2e675c18f..9ba8c1b73db49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6230,9 +6230,7 @@ split_irqchip_unlock:
 		break;
 	case KVM_CAP_X86_USER_SPACE_MSR:
 		r = -EINVAL;
-		if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
-				     KVM_MSR_EXIT_REASON_UNKNOWN |
-				     KVM_MSR_EXIT_REASON_FILTER))
+		if (cap->args[0] & ~KVM_MSR_EXIT_REASON_VALID_MASK)
 			break;
 		kvm->arch.user_space_msr_mask = cap->args[0];
 		r = 0;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0d5d4419139ae..7fea12369245a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -485,6 +485,9 @@ struct kvm_run {
 #define KVM_MSR_EXIT_REASON_INVAL	(1 << 0)
 #define KVM_MSR_EXIT_REASON_UNKNOWN	(1 << 1)
 #define KVM_MSR_EXIT_REASON_FILTER	(1 << 2)
+#define KVM_MSR_EXIT_REASON_VALID_MASK	(KVM_MSR_EXIT_REASON_INVAL   |	\
+					 KVM_MSR_EXIT_REASON_UNKNOWN |	\
+					 KVM_MSR_EXIT_REASON_FILTER)
 			__u32 reason; /* kernel -> user */
 			__u32 index; /* kernel -> user */
 			__u64 data; /* kernel <-> user */
-- 
GitLab


From c1340fe3590ebbe729294c728234434ef31a7c7a Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 21 Sep 2022 15:15:23 +0000
Subject: [PATCH 1204/4122] KVM: x86: Add a VALID_MASK for the flag in
 kvm_msr_filter

Add the mask KVM_MSR_FILTER_VALID_MASK for the flag in the struct
kvm_msr_filter.  This makes it easier to introduce new flags in the
future.

No functional change intended.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220921151525.904162-4-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h | 1 +
 arch/x86/kvm/x86.c              | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 73ad693aa653d..ae4324674c490 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -226,6 +226,7 @@ struct kvm_msr_filter {
 #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
 #endif
 #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+#define KVM_MSR_FILTER_VALID_MASK (KVM_MSR_FILTER_DEFAULT_DENY)
 	__u32 flags;
 	struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
 };
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9ba8c1b73db49..5208b9501c880 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6441,7 +6441,7 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm,
 	int r = 0;
 	u32 i;
 
-	if (filter->flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+	if (filter->flags & ~KVM_MSR_FILTER_VALID_MASK)
 		return -EINVAL;
 
 	for (i = 0; i < ARRAY_SIZE(filter->ranges); i++)
-- 
GitLab


From 8aff460f216753d86ab90ddbcab0125ab2400335 Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 21 Sep 2022 15:15:24 +0000
Subject: [PATCH 1205/4122] KVM: x86: Add a VALID_MASK for the flags in
 kvm_msr_filter_range

Add the mask KVM_MSR_FILTER_RANGE_VALID_MASK for the flags in the
struct kvm_msr_filter_range.  This simplifies checks that validate
these flags, and makes it easier to introduce new flags in the future.

No functional change intended.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220921151525.904162-5-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/kvm.h | 2 ++
 arch/x86/kvm/x86.c              | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index ae4324674c490..c6df6b16a0885 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -214,6 +214,8 @@ struct kvm_msr_list {
 struct kvm_msr_filter_range {
 #define KVM_MSR_FILTER_READ  (1 << 0)
 #define KVM_MSR_FILTER_WRITE (1 << 1)
+#define KVM_MSR_FILTER_RANGE_VALID_MASK (KVM_MSR_FILTER_READ | \
+					 KVM_MSR_FILTER_WRITE)
 	__u32 flags;
 	__u32 nmsrs; /* number of msrs in bitmap */
 	__u32 base;  /* MSR index the bitmap starts at */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5208b9501c880..e46e458c5b08a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6407,7 +6407,7 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
 	if (!user_range->nmsrs)
 		return 0;
 
-	if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+	if (user_range->flags & ~KVM_MSR_FILTER_RANGE_VALID_MASK)
 		return -EINVAL;
 
 	if (!user_range->flags)
-- 
GitLab


From f7d64772712350fd35f1d76d16dec030a81029eb Mon Sep 17 00:00:00 2001
From: Aaron Lewis <aaronlewis@google.com>
Date: Wed, 21 Sep 2022 15:15:25 +0000
Subject: [PATCH 1206/4122] selftests: kvm/x86: Test the flags in MSR filtering
 and MSR exiting

When using the flags in KVM_X86_SET_MSR_FILTER and
KVM_CAP_X86_USER_SPACE_MSR it is expected that an attempt to write to
any of the unused bits will fail.  Add testing to walk over every bit
in each of the flag fields in MSR filtering and MSR exiting to verify
that unused bits return and error and used bits, i.e. valid bits,
succeed.

Signed-off-by: Aaron Lewis <aaronlewis@google.com>
Message-Id: <20220921151525.904162-6-aaronlewis@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../kvm/x86_64/userspace_msr_exit_test.c      | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)

diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index a4f06370a2454..fae95089e6551 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -733,6 +733,89 @@ static void test_msr_permission_bitmap(void)
 	kvm_vm_free(vm);
 }
 
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask)	\
+({									\
+	int r = __vm_ioctl(vm, cmd, arg);				\
+									\
+	if (flag & valid_mask)						\
+		TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r));		\
+	else								\
+		TEST_ASSERT(r == -1 && errno == EINVAL,			\
+			    "Wanted EINVAL for %s with flag = 0x%llx, got  rc: %i errno: %i (%s)", \
+			    #cmd, flag, r, errno,  strerror(errno));	\
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+	struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+	int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+	int rc;
+	int i;
+
+	rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+	TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+	for (i = 0; i < nflags; i++) {
+		cap.args[0] = BIT_ULL(i);
+		test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+			   BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+	}
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+	u64 deny_bits = 0;
+	struct kvm_msr_filter filter = {
+		.flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+		.ranges = {
+			{
+				.flags = KVM_MSR_FILTER_READ,
+				.nmsrs = 1,
+				.base = 0,
+				.bitmap = (uint8_t *)&deny_bits,
+			},
+		},
+	};
+	int nflags;
+	int rc;
+	int i;
+
+	rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+	TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+	nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+	for (i = 0; i < nflags; i++) {
+		filter.flags = BIT_ULL(i);
+		test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+			   BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+	}
+
+	filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+	nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+	for (i = 0; i < nflags; i++) {
+		filter.ranges[0].flags = BIT_ULL(i);
+		test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+			   BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+	}
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+static void test_user_exit_msr_flags(void)
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+	/* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+	run_user_space_msr_flag_test(vm);
+
+	/* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+	run_msr_filter_flag_test(vm);
+
+	kvm_vm_free(vm);
+}
+
 int main(int argc, char *argv[])
 {
 	/* Tell stdout not to buffer its content */
@@ -744,5 +827,7 @@ int main(int argc, char *argv[])
 
 	test_msr_permission_bitmap();
 
+	test_user_exit_msr_flags();
+
 	return 0;
 }
-- 
GitLab


From 428e921611bcad9ab95078baf9abe14688de43f0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:11 +0000
Subject: [PATCH 1207/4122] KVM: x86/mmu: Tag disallowed NX huge pages even if
 they're not tracked

Tag shadow pages that cannot be replaced with an NX huge page regardless
of whether or not zapping the page would allow KVM to immediately create
a huge page, e.g. because something else prevents creating a huge page.

I.e. track pages that are disallowed from being NX huge pages regardless
of whether or not the page could have been huge at the time of fault.
KVM currently tracks pages that were disallowed from being huge due to
the NX workaround if and only if the page could otherwise be huge.  But
that fails to handled the scenario where whatever restriction prevented
KVM from installing a huge page goes away, e.g. if dirty logging is
disabled, the host mapping level changes, etc...

Failure to tag shadow pages appropriately could theoretically lead to
false negatives, e.g. if a fetch fault requests a small page and thus
isn't tracked, and a read/write fault later requests a huge page, KVM
will not reject the huge page as it should.

To avoid yet another flag, initialize the list_head and use list_empty()
to determine whether or not a page is on the list of NX huge pages that
should be recovered.

Note, the TDP MMU accounting is still flawed as fixing the TDP MMU is
more involved due to mmu_lock being held for read.  This will be
addressed in a future commit.

Fixes: 5bcaf3e1715f ("KVM: x86/mmu: Account NX huge page disallowed iff huge page was requested")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221019165618.927057-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          | 32 ++++++++++++++++++++++++--------
 arch/x86/kvm/mmu/mmu_internal.h | 10 +++++++++-
 arch/x86/kvm/mmu/paging_tmpl.h  |  6 +++---
 arch/x86/kvm/mmu/tdp_mmu.c      |  4 +++-
 4 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5d662b43a63ef..989586e7dd86e 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -803,15 +803,25 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
 }
 
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+			  bool nx_huge_page_possible)
 {
-	if (sp->lpage_disallowed)
+	sp->lpage_disallowed = true;
+
+	/*
+	 * If it's possible to replace the shadow page with an NX huge page,
+	 * i.e. if the shadow page is the only thing currently preventing KVM
+	 * from using a huge page, add the shadow page to the list of "to be
+	 * zapped for NX recovery" pages.  Note, the shadow page can already be
+	 * on the list if KVM is reusing an existing shadow page, i.e. if KVM
+	 * links a shadow page at multiple points.
+	 */
+	if (!nx_huge_page_possible || !list_empty(&sp->lpage_disallowed_link))
 		return;
 
 	++kvm->stat.nx_lpage_splits;
 	list_add_tail(&sp->lpage_disallowed_link,
 		      &kvm->arch.lpage_disallowed_mmu_pages);
-	sp->lpage_disallowed = true;
 }
 
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -833,9 +843,13 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	--kvm->stat.nx_lpage_splits;
 	sp->lpage_disallowed = false;
-	list_del(&sp->lpage_disallowed_link);
+
+	if (list_empty(&sp->lpage_disallowed_link))
+		return;
+
+	--kvm->stat.nx_lpage_splits;
+	list_del_init(&sp->lpage_disallowed_link);
 }
 
 static struct kvm_memory_slot *
@@ -2130,6 +2144,8 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
 
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
+	INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+
 	/*
 	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
 	 * depends on valid pages being added to the head of the list.  See
@@ -3127,9 +3143,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			continue;
 
 		link_shadow_page(vcpu, it.sptep, sp);
-		if (fault->is_tdp && fault->huge_page_disallowed &&
-		    fault->req_level >= it.level)
-			account_huge_nx_page(vcpu->kvm, sp);
+		if (fault->is_tdp && fault->huge_page_disallowed)
+			account_huge_nx_page(vcpu->kvm, sp,
+					     fault->req_level >= it.level);
 	}
 
 	if (WARN_ON_ONCE(it.level != fault->goal_level))
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 582def531d4d9..cca1ad75d0961 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -100,6 +100,13 @@ struct kvm_mmu_page {
 		};
 	};
 
+	/*
+	 * Tracks shadow pages that, if zapped, would allow KVM to create an NX
+	 * huge page.  A shadow page will have lpage_disallowed set but not be
+	 * on the list if a huge page is disallowed for other reasons, e.g.
+	 * because KVM is shadowing a PTE at the same gfn, the memslot isn't
+	 * properly aligned, etc...
+	 */
 	struct list_head lpage_disallowed_link;
 #ifdef CONFIG_X86_32
 	/*
@@ -315,7 +322,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 
 void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+			  bool nx_huge_page_possible);
 void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 5ab5f94dcb6fd..8fd0c4e1e5750 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -713,9 +713,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 			continue;
 
 		link_shadow_page(vcpu, it.sptep, sp);
-		if (fault->huge_page_disallowed &&
-		    fault->req_level >= it.level)
-			account_huge_nx_page(vcpu->kvm, sp);
+		if (fault->huge_page_disallowed)
+			account_huge_nx_page(vcpu->kvm, sp,
+					     fault->req_level >= it.level);
 	}
 
 	if (WARN_ON_ONCE(it.level != fault->goal_level))
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 672f0432d7777..80a4a1a091310 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -284,6 +284,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
 static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
 			    gfn_t gfn, union kvm_mmu_page_role role)
 {
+	INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
 	sp->role = role;
@@ -1141,7 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
 	if (account_nx)
-		account_huge_nx_page(kvm, sp);
+		account_huge_nx_page(kvm, sp, true);
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 	tdp_account_mmu_page(kvm, sp);
 
-- 
GitLab


From 55c510e26ab6181c132327a8b90c864e6193ce27 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:12 +0000
Subject: [PATCH 1208/4122] KVM: x86/mmu: Rename NX huge pages fields/functions
 for consistency

Rename most of the variables/functions involved in the NX huge page
mitigation to provide consistency, e.g. lpage vs huge page, and NX huge
vs huge NX, and also to provide clarity, e.g. to make it obvious the flag
applies only to the NX huge page mitigation, not to any condition that
prevents creating a huge page.

Add a comment explaining what the newly named "possible_nx_huge_pages"
tracks.

Leave the nx_lpage_splits stat alone as the name is ABI and thus set in
stone.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20221019165618.927057-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 19 +++++++--
 arch/x86/kvm/mmu/mmu.c          | 71 +++++++++++++++++----------------
 arch/x86/kvm/mmu/mmu_internal.h | 22 ++++++----
 arch/x86/kvm/mmu/paging_tmpl.h  |  2 +-
 arch/x86/kvm/mmu/tdp_mmu.c      |  8 ++--
 5 files changed, 71 insertions(+), 51 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4443869056323..9030e6263f954 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1159,7 +1159,18 @@ struct kvm_arch {
 	struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
 	struct list_head active_mmu_pages;
 	struct list_head zapped_obsolete_pages;
-	struct list_head lpage_disallowed_mmu_pages;
+	/*
+	 * A list of kvm_mmu_page structs that, if zapped, could possibly be
+	 * replaced by an NX huge page.  A shadow page is on this list if its
+	 * existence disallows an NX huge page (nx_huge_page_disallowed is set)
+	 * and there are no other conditions that prevent a huge page, e.g.
+	 * the backing host page is huge, dirtly logging is not enabled for its
+	 * memslot, etc...  Note, zapping shadow pages on this list doesn't
+	 * guarantee an NX huge page will be created in its stead, e.g. if the
+	 * guest attempts to execute from the region then KVM obviously can't
+	 * create an NX huge page (without hanging the guest).
+	 */
+	struct list_head possible_nx_huge_pages;
 	struct kvm_page_track_notifier_node mmu_sp_tracker;
 	struct kvm_page_track_notifier_head track_notifier_head;
 	/*
@@ -1275,7 +1286,7 @@ struct kvm_arch {
 	bool sgx_provisioning_allowed;
 
 	struct kvm_pmu_event_filter __rcu *pmu_event_filter;
-	struct task_struct *nx_lpage_recovery_thread;
+	struct task_struct *nx_huge_page_recovery_thread;
 
 #ifdef CONFIG_X86_64
 	/*
@@ -1320,8 +1331,8 @@ struct kvm_arch {
 	 *  - tdp_mmu_roots (above)
 	 *  - tdp_mmu_pages (above)
 	 *  - the link field of kvm_mmu_page structs used by the TDP MMU
-	 *  - lpage_disallowed_mmu_pages
-	 *  - the lpage_disallowed_link field of kvm_mmu_page structs used
+	 *  - possible_nx_huge_pages;
+	 *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
 	 *    by the TDP MMU
 	 * It is acceptable, but not necessary, to acquire this lock when
 	 * the thread holds the MMU lock in write mode.
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 989586e7dd86e..09482ef4d8320 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -803,10 +803,10 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
 }
 
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 			  bool nx_huge_page_possible)
 {
-	sp->lpage_disallowed = true;
+	sp->nx_huge_page_disallowed = true;
 
 	/*
 	 * If it's possible to replace the shadow page with an NX huge page,
@@ -816,12 +816,13 @@ void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	 * on the list if KVM is reusing an existing shadow page, i.e. if KVM
 	 * links a shadow page at multiple points.
 	 */
-	if (!nx_huge_page_possible || !list_empty(&sp->lpage_disallowed_link))
+	if (!nx_huge_page_possible ||
+	    !list_empty(&sp->possible_nx_huge_page_link))
 		return;
 
 	++kvm->stat.nx_lpage_splits;
-	list_add_tail(&sp->lpage_disallowed_link,
-		      &kvm->arch.lpage_disallowed_mmu_pages);
+	list_add_tail(&sp->possible_nx_huge_page_link,
+		      &kvm->arch.possible_nx_huge_pages);
 }
 
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
@@ -841,15 +842,15 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
 
-void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	sp->lpage_disallowed = false;
+	sp->nx_huge_page_disallowed = false;
 
-	if (list_empty(&sp->lpage_disallowed_link))
+	if (list_empty(&sp->possible_nx_huge_page_link))
 		return;
 
 	--kvm->stat.nx_lpage_splits;
-	list_del_init(&sp->lpage_disallowed_link);
+	list_del_init(&sp->possible_nx_huge_page_link);
 }
 
 static struct kvm_memory_slot *
@@ -2144,7 +2145,7 @@ static struct kvm_mmu_page *kvm_mmu_alloc_shadow_page(struct kvm *kvm,
 
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
-	INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+	INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
 
 	/*
 	 * active_mmu_pages must be a FIFO list, as kvm_zap_obsolete_pages()
@@ -2503,8 +2504,8 @@ static bool __kvm_mmu_prepare_zap_page(struct kvm *kvm,
 		zapped_root = !is_obsolete_sp(kvm, sp);
 	}
 
-	if (sp->lpage_disallowed)
-		unaccount_huge_nx_page(kvm, sp);
+	if (sp->nx_huge_page_disallowed)
+		unaccount_nx_huge_page(kvm, sp);
 
 	sp->role.invalid = 1;
 
@@ -3144,7 +3145,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 
 		link_shadow_page(vcpu, it.sptep, sp);
 		if (fault->is_tdp && fault->huge_page_disallowed)
-			account_huge_nx_page(vcpu->kvm, sp,
+			account_nx_huge_page(vcpu->kvm, sp,
 					     fault->req_level >= it.level);
 	}
 
@@ -5998,7 +5999,7 @@ int kvm_mmu_init_vm(struct kvm *kvm)
 
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
-	INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages);
+	INIT_LIST_HEAD(&kvm->arch.possible_nx_huge_pages);
 	spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
 
 	r = kvm_mmu_init_tdp_mmu(kvm);
@@ -6683,7 +6684,7 @@ static int set_nx_huge_pages(const char *val, const struct kernel_param *kp)
 			kvm_mmu_zap_all_fast(kvm);
 			mutex_unlock(&kvm->slots_lock);
 
-			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+			wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
 		}
 		mutex_unlock(&kvm_lock);
 	}
@@ -6815,7 +6816,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
 		mutex_lock(&kvm_lock);
 
 		list_for_each_entry(kvm, &vm_list, vm_list)
-			wake_up_process(kvm->arch.nx_lpage_recovery_thread);
+			wake_up_process(kvm->arch.nx_huge_page_recovery_thread);
 
 		mutex_unlock(&kvm_lock);
 	}
@@ -6823,7 +6824,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
 	return err;
 }
 
-static void kvm_recover_nx_lpages(struct kvm *kvm)
+static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 {
 	unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
 	int rcu_idx;
@@ -6846,23 +6847,25 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 	ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
 	to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
 	for ( ; to_zap; --to_zap) {
-		if (list_empty(&kvm->arch.lpage_disallowed_mmu_pages))
+		if (list_empty(&kvm->arch.possible_nx_huge_pages))
 			break;
 
 		/*
 		 * We use a separate list instead of just using active_mmu_pages
-		 * because the number of lpage_disallowed pages is expected to
-		 * be relatively small compared to the total.
+		 * because the number of shadow pages that be replaced with an
+		 * NX huge page is expected to be relatively small compared to
+		 * the total number of shadow pages.  And because the TDP MMU
+		 * doesn't use active_mmu_pages.
 		 */
-		sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages,
+		sp = list_first_entry(&kvm->arch.possible_nx_huge_pages,
 				      struct kvm_mmu_page,
-				      lpage_disallowed_link);
-		WARN_ON_ONCE(!sp->lpage_disallowed);
+				      possible_nx_huge_page_link);
+		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
 		if (is_tdp_mmu_page(sp)) {
 			flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
 		} else {
 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-			WARN_ON_ONCE(sp->lpage_disallowed);
+			WARN_ON_ONCE(sp->nx_huge_page_disallowed);
 		}
 
 		if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
@@ -6883,7 +6886,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
 	srcu_read_unlock(&kvm->srcu, rcu_idx);
 }
 
-static long get_nx_lpage_recovery_timeout(u64 start_time)
+static long get_nx_huge_page_recovery_timeout(u64 start_time)
 {
 	bool enabled;
 	uint period;
@@ -6894,19 +6897,19 @@ static long get_nx_lpage_recovery_timeout(u64 start_time)
 		       : MAX_SCHEDULE_TIMEOUT;
 }
 
-static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
+static int kvm_nx_huge_page_recovery_worker(struct kvm *kvm, uintptr_t data)
 {
 	u64 start_time;
 	long remaining_time;
 
 	while (true) {
 		start_time = get_jiffies_64();
-		remaining_time = get_nx_lpage_recovery_timeout(start_time);
+		remaining_time = get_nx_huge_page_recovery_timeout(start_time);
 
 		set_current_state(TASK_INTERRUPTIBLE);
 		while (!kthread_should_stop() && remaining_time > 0) {
 			schedule_timeout(remaining_time);
-			remaining_time = get_nx_lpage_recovery_timeout(start_time);
+			remaining_time = get_nx_huge_page_recovery_timeout(start_time);
 			set_current_state(TASK_INTERRUPTIBLE);
 		}
 
@@ -6915,7 +6918,7 @@ static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data)
 		if (kthread_should_stop())
 			return 0;
 
-		kvm_recover_nx_lpages(kvm);
+		kvm_recover_nx_huge_pages(kvm);
 	}
 }
 
@@ -6923,17 +6926,17 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
 {
 	int err;
 
-	err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0,
+	err = kvm_vm_create_worker_thread(kvm, kvm_nx_huge_page_recovery_worker, 0,
 					  "kvm-nx-lpage-recovery",
-					  &kvm->arch.nx_lpage_recovery_thread);
+					  &kvm->arch.nx_huge_page_recovery_thread);
 	if (!err)
-		kthread_unpark(kvm->arch.nx_lpage_recovery_thread);
+		kthread_unpark(kvm->arch.nx_huge_page_recovery_thread);
 
 	return err;
 }
 
 void kvm_mmu_pre_destroy_vm(struct kvm *kvm)
 {
-	if (kvm->arch.nx_lpage_recovery_thread)
-		kthread_stop(kvm->arch.nx_lpage_recovery_thread);
+	if (kvm->arch.nx_huge_page_recovery_thread)
+		kthread_stop(kvm->arch.nx_huge_page_recovery_thread);
 }
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index cca1ad75d0961..67879459a25c8 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -57,7 +57,13 @@ struct kvm_mmu_page {
 	bool tdp_mmu_page;
 	bool unsync;
 	u8 mmu_valid_gen;
-	bool lpage_disallowed; /* Can't be replaced by an equiv large page */
+
+	 /*
+	  * The shadow page can't be replaced by an equivalent huge page
+	  * because it is being used to map an executable page in the guest
+	  * and the NX huge page mitigation is enabled.
+	  */
+	bool nx_huge_page_disallowed;
 
 	/*
 	 * The following two entries are used to key the shadow page in the
@@ -102,12 +108,12 @@ struct kvm_mmu_page {
 
 	/*
 	 * Tracks shadow pages that, if zapped, would allow KVM to create an NX
-	 * huge page.  A shadow page will have lpage_disallowed set but not be
-	 * on the list if a huge page is disallowed for other reasons, e.g.
-	 * because KVM is shadowing a PTE at the same gfn, the memslot isn't
-	 * properly aligned, etc...
+	 * huge page.  A shadow page will have nx_huge_page_disallowed set but
+	 * not be on the list if a huge page is disallowed for other reasons,
+	 * e.g. because KVM is shadowing a PTE at the same gfn, the memslot
+	 * isn't properly aligned, etc...
 	 */
-	struct list_head lpage_disallowed_link;
+	struct list_head possible_nx_huge_page_link;
 #ifdef CONFIG_X86_32
 	/*
 	 * Used out of the mmu-lock to avoid reading spte values while an
@@ -322,8 +328,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 
 void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 
-void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 			  bool nx_huge_page_possible);
-void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h
index 8fd0c4e1e5750..0f64550720557 100644
--- a/arch/x86/kvm/mmu/paging_tmpl.h
+++ b/arch/x86/kvm/mmu/paging_tmpl.h
@@ -714,7 +714,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault,
 
 		link_shadow_page(vcpu, it.sptep, sp);
 		if (fault->huge_page_disallowed)
-			account_huge_nx_page(vcpu->kvm, sp,
+			account_nx_huge_page(vcpu->kvm, sp,
 					     fault->req_level >= it.level);
 	}
 
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 80a4a1a091310..73eb28ed1f03f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -284,7 +284,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
 static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
 			    gfn_t gfn, union kvm_mmu_page_role role)
 {
-	INIT_LIST_HEAD(&sp->lpage_disallowed_link);
+	INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
 
 	set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
 
@@ -403,8 +403,8 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 		lockdep_assert_held_write(&kvm->mmu_lock);
 
 	list_del(&sp->link);
-	if (sp->lpage_disallowed)
-		unaccount_huge_nx_page(kvm, sp);
+	if (sp->nx_huge_page_disallowed)
+		unaccount_nx_huge_page(kvm, sp);
 
 	if (shared)
 		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1143,7 +1143,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
 	if (account_nx)
-		account_huge_nx_page(kvm, sp, true);
+		account_nx_huge_page(kvm, sp, true);
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 	tdp_account_mmu_page(kvm, sp);
 
-- 
GitLab


From b5b0977f4aa28ef2166894b05f37d8f8028a76ce Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:13 +0000
Subject: [PATCH 1209/4122] KVM: x86/mmu: Properly account NX huge page
 workaround for nonpaging MMUs

Account and track NX huge pages for nonpaging MMUs so that a future
enhancement to precisely check if a shadow page can't be replaced by a NX
huge page doesn't get false positives.  Without correct tracking, KVM can
get stuck in a loop if an instruction is fetching and writing data on the
same huge page, e.g. KVM installs a small executable page on the fetch
fault, replaces it with an NX huge page on the write fault, and faults
again on the fetch.

Alternatively, and perhaps ideally, KVM would simply not enforce the
workaround for nonpaging MMUs.  The guest has no page tables to abuse
and KVM is guaranteed to switch to a different MMU on CR0.PG being
toggled so there's no security or performance concerns.  However, getting
make_spte() to play nice now and in the future is unnecessarily complex.

In the current code base, make_spte() can enforce the mitigation if TDP
is enabled or the MMU is indirect, but make_spte() may not always have a
vCPU/MMU to work with, e.g. if KVM were to support in-line huge page
promotion when disabling dirty logging.

Without a vCPU/MMU, KVM could either pass in the correct information
and/or derive it from the shadow page, but the former is ugly and the
latter subtly non-trivial due to the possibility of direct shadow pages
in indirect MMUs.  Given that using shadow paging with an unpaged guest
is far from top priority _and_ has been subjected to the workaround since
its inception, keep it simple and just fix the accounting glitch.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20221019165618.927057-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c  |  2 +-
 arch/x86/kvm/mmu/spte.c | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 09482ef4d8320..f11e4bbfc0bcc 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3144,7 +3144,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			continue;
 
 		link_shadow_page(vcpu, it.sptep, sp);
-		if (fault->is_tdp && fault->huge_page_disallowed)
+		if (fault->huge_page_disallowed)
 			account_nx_huge_page(vcpu->kvm, sp,
 					     fault->req_level >= it.level);
 	}
diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
index 2e08b2a453612..c0fd7e049b4e4 100644
--- a/arch/x86/kvm/mmu/spte.c
+++ b/arch/x86/kvm/mmu/spte.c
@@ -161,6 +161,18 @@ bool make_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
 	if (!prefetch)
 		spte |= spte_shadow_accessed_mask(spte);
 
+	/*
+	 * For simplicity, enforce the NX huge page mitigation even if not
+	 * strictly necessary.  KVM could ignore the mitigation if paging is
+	 * disabled in the guest, as the guest doesn't have an page tables to
+	 * abuse.  But to safely ignore the mitigation, KVM would have to
+	 * ensure a new MMU is loaded (or all shadow pages zapped) when CR0.PG
+	 * is toggled on, and that's a net negative for performance when TDP is
+	 * enabled.  When TDP is disabled, KVM will always switch to a new MMU
+	 * when CR0.PG is toggled, but leveraging that to ignore the mitigation
+	 * would tie make_spte() further to vCPU/MMU state, and add complexity
+	 * just to optimize a mode that is anything but performance critical.
+	 */
 	if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) &&
 	    is_nx_huge_page_enabled(vcpu->kvm)) {
 		pte_access &= ~ACC_EXEC_MASK;
-- 
GitLab


From 61f94478547bb4fdcd4c4f37a0aa723d610a7422 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:14 +0000
Subject: [PATCH 1210/4122] KVM: x86/mmu: Set disallowed_nx_huge_page in TDP
 MMU before setting SPTE

Set nx_huge_page_disallowed in TDP MMU shadow pages before making the SP
visible to other readers, i.e. before setting its SPTE.  This will allow
KVM to query the flag when determining if a shadow page can be replaced
by a NX huge page without violating the rules of the mitigation.

Note, the shadow/legacy MMU holds mmu_lock for write, so it's impossible
for another CPU to see a shadow page without an up-to-date
nx_huge_page_disallowed, i.e. only the TDP MMU needs the complicated
dance.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
Message-Id: <20221019165618.927057-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          | 28 +++++++++++++++++++---------
 arch/x86/kvm/mmu/mmu_internal.h |  5 ++---
 arch/x86/kvm/mmu/tdp_mmu.c      | 31 ++++++++++++++++++-------------
 3 files changed, 39 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index f11e4bbfc0bcc..e384b78e099c1 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -803,11 +803,8 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 		kvm_flush_remote_tlbs_with_address(kvm, gfn, 1);
 }
 
-void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			  bool nx_huge_page_possible)
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	sp->nx_huge_page_disallowed = true;
-
 	/*
 	 * If it's possible to replace the shadow page with an NX huge page,
 	 * i.e. if the shadow page is the only thing currently preventing KVM
@@ -816,8 +813,7 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 	 * on the list if KVM is reusing an existing shadow page, i.e. if KVM
 	 * links a shadow page at multiple points.
 	 */
-	if (!nx_huge_page_possible ||
-	    !list_empty(&sp->possible_nx_huge_page_link))
+	if (!list_empty(&sp->possible_nx_huge_page_link))
 		return;
 
 	++kvm->stat.nx_lpage_splits;
@@ -825,6 +821,15 @@ void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
 		      &kvm->arch.possible_nx_huge_pages);
 }
 
+static void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
+				 bool nx_huge_page_possible)
+{
+	sp->nx_huge_page_disallowed = true;
+
+	if (nx_huge_page_possible)
+		track_possible_nx_huge_page(kvm, sp);
+}
+
 static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	struct kvm_memslots *slots;
@@ -842,10 +847,8 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp)
 	kvm_mmu_gfn_allow_lpage(slot, gfn);
 }
 
-void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
-	sp->nx_huge_page_disallowed = false;
-
 	if (list_empty(&sp->possible_nx_huge_page_link))
 		return;
 
@@ -853,6 +856,13 @@ void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 	list_del_init(&sp->possible_nx_huge_page_link);
 }
 
+static void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+	sp->nx_huge_page_disallowed = false;
+
+	untrack_possible_nx_huge_page(kvm, sp);
+}
+
 static struct kvm_memory_slot *
 gfn_to_memslot_dirty_bitmap(struct kvm_vcpu *vcpu, gfn_t gfn,
 			    bool no_dirty_log)
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 67879459a25c8..22152241bd290 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -328,8 +328,7 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 
 void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc);
 
-void account_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp,
-			  bool nx_huge_page_possible);
-void unaccount_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void track_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
+void untrack_possible_nx_huge_page(struct kvm *kvm, struct kvm_mmu_page *sp);
 
 #endif /* __KVM_X86_MMU_INTERNAL_H */
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 73eb28ed1f03f..059231c823452 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -403,8 +403,11 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 		lockdep_assert_held_write(&kvm->mmu_lock);
 
 	list_del(&sp->link);
-	if (sp->nx_huge_page_disallowed)
-		unaccount_nx_huge_page(kvm, sp);
+
+	if (sp->nx_huge_page_disallowed) {
+		sp->nx_huge_page_disallowed = false;
+		untrack_possible_nx_huge_page(kvm, sp);
+	}
 
 	if (shared)
 		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1118,16 +1121,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
  * @sp: The new TDP page table to install.
- * @account_nx: True if this page table is being installed to split a
- *              non-executable huge page.
  * @shared: This operation is running under the MMU lock in read mode.
  *
  * Returns: 0 if the new page table was installed. Non-0 if the page table
  *          could not be installed (e.g. the atomic compare-exchange failed).
  */
 static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
-			   struct kvm_mmu_page *sp, bool account_nx,
-			   bool shared)
+			   struct kvm_mmu_page *sp, bool shared)
 {
 	u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
 	int ret = 0;
@@ -1142,8 +1142,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 
 	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
-	if (account_nx)
-		account_nx_huge_page(kvm, sp, true);
 	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 	tdp_account_mmu_page(kvm, sp);
 
@@ -1157,6 +1155,7 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 {
 	struct kvm_mmu *mmu = vcpu->arch.mmu;
+	struct kvm *kvm = vcpu->kvm;
 	struct tdp_iter iter;
 	struct kvm_mmu_page *sp;
 	int ret;
@@ -1193,9 +1192,6 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 
 		if (!is_shadow_present_pte(iter.old_spte)) {
-			bool account_nx = fault->huge_page_disallowed &&
-					  fault->req_level >= iter.level;
-
 			/*
 			 * If SPTE has been frozen by another thread, just
 			 * give up and retry, avoiding unnecessary page table
@@ -1207,10 +1203,19 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 			sp = tdp_mmu_alloc_sp(vcpu);
 			tdp_mmu_init_child_sp(sp, &iter);
 
-			if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) {
+			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+
+			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
 				tdp_mmu_free_sp(sp);
 				break;
 			}
+
+			if (fault->huge_page_disallowed &&
+			    fault->req_level >= iter.level) {
+				spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+				track_possible_nx_huge_page(kvm, sp);
+				spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+			}
 		}
 	}
 
@@ -1498,7 +1503,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
 	 * correctness standpoint since the translation will be the same either
 	 * way.
 	 */
-	ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared);
+	ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
 	if (ret)
 		goto out;
 
-- 
GitLab


From d25ceb9264364dca0683748b301340097fdab6c7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:15 +0000
Subject: [PATCH 1211/4122] KVM: x86/mmu: Track the number of TDP MMU pages,
 but not the actual pages

Track the number of TDP MMU "shadow" pages instead of tracking the pages
themselves. With the NX huge page list manipulation moved out of the common
linking flow, elminating the list-based tracking means the happy path of
adding a shadow page doesn't need to acquire a spinlock and can instead
inc/dec an atomic.

Keep the tracking as the WARN during TDP MMU teardown on leaked shadow
pages is very, very useful for detecting KVM bugs.

Tracking the number of pages will also make it trivial to expose the
counter to userspace as a stat in the future, which may or may not be
desirable.

Note, the TDP MMU needs to use a separate counter (and stat if that ever
comes to be) from the existing n_used_mmu_pages. The TDP MMU doesn't bother
supporting the shrinker nor does it honor KVM_SET_NR_MMU_PAGES (because the
TDP MMU consumes so few pages relative to shadow paging), and including TDP
MMU pages in that counter would break both the shrinker and shadow MMUs,
e.g. if a VM is using nested TDP.

Cc: Yan Zhao <yan.y.zhao@intel.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
Message-Id: <20221019165618.927057-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 11 +++--------
 arch/x86/kvm/mmu/tdp_mmu.c      | 20 +++++++++-----------
 2 files changed, 12 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9030e6263f954..9362b9736d877 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1298,6 +1298,9 @@ struct kvm_arch {
 	 */
 	bool tdp_mmu_enabled;
 
+	/* The number of TDP MMU pages across all roots. */
+	atomic64_t tdp_mmu_pages;
+
 	/*
 	 * List of kvm_mmu_page structs being used as roots.
 	 * All kvm_mmu_page structs in the list should have
@@ -1318,18 +1321,10 @@ struct kvm_arch {
 	 */
 	struct list_head tdp_mmu_roots;
 
-	/*
-	 * List of kvm_mmu_page structs not being used as roots.
-	 * All kvm_mmu_page structs in the list should have
-	 * tdp_mmu_page set and a tdp_mmu_root_count of 0.
-	 */
-	struct list_head tdp_mmu_pages;
-
 	/*
 	 * Protects accesses to the following fields when the MMU lock
 	 * is held in read mode:
 	 *  - tdp_mmu_roots (above)
-	 *  - tdp_mmu_pages (above)
 	 *  - the link field of kvm_mmu_page structs used by the TDP MMU
 	 *  - possible_nx_huge_pages;
 	 *  - the possible_nx_huge_page_link field of kvm_mmu_page structs used
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 059231c823452..4e5b3ae824c16 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
 	kvm->arch.tdp_mmu_enabled = true;
 	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
 	spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
-	INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
 	kvm->arch.tdp_mmu_zap_wq = wq;
 	return 1;
 }
@@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
 	/* Also waits for any queued work items.  */
 	destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
 
-	WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
+	WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
 	WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
 
 	/*
@@ -377,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
 static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	kvm_account_pgtable_pages((void *)sp->spt, +1);
+	atomic64_inc(&kvm->arch.tdp_mmu_pages);
 }
 
 static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
 {
 	kvm_account_pgtable_pages((void *)sp->spt, -1);
+	atomic64_dec(&kvm->arch.tdp_mmu_pages);
 }
 
 /**
@@ -397,17 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
 			      bool shared)
 {
 	tdp_unaccount_mmu_page(kvm, sp);
+
+	if (!sp->nx_huge_page_disallowed)
+		return;
+
 	if (shared)
 		spin_lock(&kvm->arch.tdp_mmu_pages_lock);
 	else
 		lockdep_assert_held_write(&kvm->mmu_lock);
 
-	list_del(&sp->link);
-
-	if (sp->nx_huge_page_disallowed) {
-		sp->nx_huge_page_disallowed = false;
-		untrack_possible_nx_huge_page(kvm, sp);
-	}
+	sp->nx_huge_page_disallowed = false;
+	untrack_possible_nx_huge_page(kvm, sp);
 
 	if (shared)
 		spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1140,9 +1141,6 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 		tdp_mmu_set_spte(kvm, iter, spte);
 	}
 
-	spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-	list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
-	spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 	tdp_account_mmu_page(kvm, sp);
 
 	return 0;
-- 
GitLab


From 5e3edd7e8b7e8004c9bb8310fd669a9ca81de207 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:16 +0000
Subject: [PATCH 1212/4122] KVM: x86/mmu: Add helper to convert SPTE value to
 its shadow page

Add a helper to convert a SPTE to its shadow page to deduplicate a
variety of flows and hopefully avoid future bugs, e.g. if KVM attempts to
get the shadow page for a SPTE without dropping high bits.

Opportunistically add a comment in mmu_free_root_page() documenting why
it treats the root HPA as a SPTE.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221019165618.927057-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c          | 17 ++++++++++-------
 arch/x86/kvm/mmu/mmu_internal.h | 12 ------------
 arch/x86/kvm/mmu/spte.h         | 17 +++++++++++++++++
 arch/x86/kvm/mmu/tdp_mmu.h      |  2 ++
 4 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e384b78e099c1..5f9b57c6e5068 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -1819,7 +1819,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp,
 			continue;
 		}
 
-		child = to_shadow_page(ent & SPTE_BASE_ADDR_MASK);
+		child = spte_to_child_sp(ent);
 
 		if (child->unsync_children) {
 			if (mmu_pages_add(pvec, child, i))
@@ -2378,7 +2378,7 @@ static void validate_direct_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		 * so we should update the spte at this point to get
 		 * a new sp with the correct access.
 		 */
-		child = to_shadow_page(*sptep & SPTE_BASE_ADDR_MASK);
+		child = spte_to_child_sp(*sptep);
 		if (child->role.access == direct_access)
 			return;
 
@@ -2399,7 +2399,7 @@ static int mmu_page_zap_pte(struct kvm *kvm, struct kvm_mmu_page *sp,
 		if (is_last_spte(pte, sp->role.level)) {
 			drop_spte(kvm, spte);
 		} else {
-			child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK);
+			child = spte_to_child_sp(pte);
 			drop_parent_pte(child, spte);
 
 			/*
@@ -2838,7 +2838,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, struct kvm_memory_slot *slot,
 			struct kvm_mmu_page *child;
 			u64 pte = *sptep;
 
-			child = to_shadow_page(pte & SPTE_BASE_ADDR_MASK);
+			child = spte_to_child_sp(pte);
 			drop_parent_pte(child, sptep);
 			flush = true;
 		} else if (pfn != spte_to_pfn(*sptep)) {
@@ -3455,7 +3455,11 @@ static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
 	if (!VALID_PAGE(*root_hpa))
 		return;
 
-	sp = to_shadow_page(*root_hpa & SPTE_BASE_ADDR_MASK);
+	/*
+	 * The "root" may be a special root, e.g. a PAE entry, treat it as a
+	 * SPTE to ensure any non-PA bits are dropped.
+	 */
+	sp = spte_to_child_sp(*root_hpa);
 	if (WARN_ON(!sp))
 		return;
 
@@ -3940,8 +3944,7 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
 		hpa_t root = vcpu->arch.mmu->pae_root[i];
 
 		if (IS_VALID_PAE_ROOT(root)) {
-			root &= SPTE_BASE_ADDR_MASK;
-			sp = to_shadow_page(root);
+			sp = spte_to_child_sp(root);
 			mmu_sync_children(vcpu, sp, true);
 		}
 	}
diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h
index 22152241bd290..dbaf6755c5a71 100644
--- a/arch/x86/kvm/mmu/mmu_internal.h
+++ b/arch/x86/kvm/mmu/mmu_internal.h
@@ -133,18 +133,6 @@ struct kvm_mmu_page {
 
 extern struct kmem_cache *mmu_page_header_cache;
 
-static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
-{
-	struct page *page = pfn_to_page(shadow_page >> PAGE_SHIFT);
-
-	return (struct kvm_mmu_page *)page_private(page);
-}
-
-static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
-{
-	return to_shadow_page(__pa(sptep));
-}
-
 static inline int kvm_mmu_role_as_id(union kvm_mmu_page_role role)
 {
 	return role.smm ? 1 : 0;
diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
index 79560d77aa4c7..1f03701b943a1 100644
--- a/arch/x86/kvm/mmu/spte.h
+++ b/arch/x86/kvm/mmu/spte.h
@@ -219,6 +219,23 @@ static inline int spte_index(u64 *sptep)
  */
 extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
 
+static inline struct kvm_mmu_page *to_shadow_page(hpa_t shadow_page)
+{
+	struct page *page = pfn_to_page((shadow_page) >> PAGE_SHIFT);
+
+	return (struct kvm_mmu_page *)page_private(page);
+}
+
+static inline struct kvm_mmu_page *spte_to_child_sp(u64 spte)
+{
+	return to_shadow_page(spte & SPTE_BASE_ADDR_MASK);
+}
+
+static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
+{
+	return to_shadow_page(__pa(sptep));
+}
+
 static inline bool is_mmio_spte(u64 spte)
 {
 	return (spte & shadow_mmio_mask) == shadow_mmio_value &&
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index c163f7cc23ca5..d3714200b932a 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -5,6 +5,8 @@
 
 #include <linux/kvm_host.h>
 
+#include "spte.h"
+
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
 
 __must_check static inline bool kvm_tdp_mmu_get_root(struct kvm_mmu_page *root)
-- 
GitLab


From 76901e56fb517db61939efdf54e9581b117d615d Mon Sep 17 00:00:00 2001
From: Mingwei Zhang <mizhang@google.com>
Date: Wed, 19 Oct 2022 16:56:17 +0000
Subject: [PATCH 1213/4122] KVM: x86/mmu: explicitly check nx_hugepage in
 disallowed_hugepage_adjust()

Explicitly check if a NX huge page is disallowed when determining if a
page fault needs to be forced to use a smaller sized page.  KVM currently
assumes that the NX huge page mitigation is the only scenario where KVM
will force a shadow page instead of a huge page, and so unnecessarily
keeps an existing shadow page instead of replacing it with a huge page.

Any scenario that causes KVM to zap leaf SPTEs may result in having a SP
that can be made huge without violating the NX huge page mitigation.
E.g. prior to commit 5ba7c4c6d1c7 ("KVM: x86/MMU: Zap non-leaf SPTEs when
disabling dirty logging"), KVM would keep shadow pages after disabling
dirty logging due to a live migration being canceled, resulting in
degraded performance due to running with 4kb pages instead of huge pages.

Although the dirty logging case is "fixed", that fix is coincidental,
i.e. is an implementation detail, and there are other scenarios where KVM
will zap leaf SPTEs.  E.g. zapping leaf SPTEs in response to a host page
migration (mmu_notifier invalidation) to create a huge page would yield a
similar result; KVM would see the shadow-present non-leaf SPTE and assume
a huge page is disallowed.

Fixes: b8e8c8303ff2 ("kvm: mmu: ITLB_MULTIHIT mitigation")
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Signed-off-by: Mingwei Zhang <mizhang@google.com>
[sean: use spte_to_child_sp(), massage changelog, fold into if-statement]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Yan Zhao <yan.y.zhao@intel.com>
Message-Id: <20221019165618.927057-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 5f9b57c6e5068..efce5e4e24c30 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3112,7 +3112,8 @@ void disallowed_hugepage_adjust(struct kvm_page_fault *fault, u64 spte, int cur_
 	if (cur_level > PG_LEVEL_4K &&
 	    cur_level == fault->goal_level &&
 	    is_shadow_present_pte(spte) &&
-	    !is_large_pte(spte)) {
+	    !is_large_pte(spte) &&
+	    spte_to_child_sp(spte)->nx_huge_page_disallowed) {
 		/*
 		 * A small SPTE exists for this pfn, but FNAME(fetch)
 		 * and __direct_map would like to create a large PTE
-- 
GitLab


From 3a05675722250a522c148f6de0cc190f407c4bb5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 19 Oct 2022 16:56:18 +0000
Subject: [PATCH 1214/4122] KVM: x86/mmu: WARN if TDP MMU SP disallows hugepage
 after being zapped

Extend the accounting sanity check in kvm_recover_nx_huge_pages() to the
TDP MMU, i.e. verify that zapping a shadow page unaccounts the disallowed
NX huge page regardless of the MMU type.  Recovery runs while holding
mmu_lock for write and so it should be impossible to get false positives
on the WARN.

Suggested-by: Yan Zhao <yan.y.zhao@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221019165618.927057-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index efce5e4e24c30..93c389eaf4711 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6875,12 +6875,11 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 				      struct kvm_mmu_page,
 				      possible_nx_huge_page_link);
 		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
-		if (is_tdp_mmu_page(sp)) {
+		if (is_tdp_mmu_page(sp))
 			flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
-		} else {
+		else
 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-			WARN_ON_ONCE(sp->nx_huge_page_disallowed);
-		}
+		WARN_ON_ONCE(sp->nx_huge_page_disallowed);
 
 		if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
 			kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
-- 
GitLab


From f1c5651fda43e0c62a32456cdc6f254f40457409 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 23 Sep 2022 00:13:52 +0000
Subject: [PATCH 1215/4122] KVM: x86/pmu: Force reprogramming of all counters
 on PMU filter change

Force vCPUs to reprogram all counters on a PMU filter change to provide
a sane ABI for userspace.  Use the existing KVM_REQ_PMU to do the
programming, and take advantage of the fact that the reprogram_pmi bitmap
fits in a u64 to set all bits in a single atomic update.  Note, setting
the bitmap and making the request needs to be done _after_ the SRCU
synchronization to ensure that vCPUs will reprogram using the new filter.

KVM's current "lazy" approach is confusing and non-deterministic.  It's
confusing because, from a developer perspective, the code is buggy as it
makes zero sense to let userspace modify the filter but then not actually
enforce the new filter.  The lazy approach is non-deterministic because
KVM enforces the filter whenever a counter is reprogrammed, not just on
guest WRMSRs, i.e. a guest might gain/lose access to an event at random
times depending on what is going on in the host.

Note, the resulting behavior is still non-determinstic while the filter
is in flux.  If userspace wants to guarantee deterministic behavior, all
vCPUs should be paused during the filter update.

Jim Mattson <jmattson@google.com>

Fixes: 66bb8a065f5a ("KVM: x86: PMU Event Filter")
Cc: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-2-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 11 ++++++++++-
 arch/x86/kvm/pmu.c              | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 9362b9736d877..58a562bb197c8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -527,7 +527,16 @@ struct kvm_pmu {
 	struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC];
 	struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED];
 	struct irq_work irq_work;
-	DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+
+	/*
+	 * Overlay the bitmap with a 64-bit atomic so that all bits can be
+	 * set in a single access, e.g. to reprogram all counters when the PMU
+	 * filter changes.
+	 */
+	union {
+		DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX);
+		atomic64_t __reprogram_pmi;
+	};
 	DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX);
 	DECLARE_BITMAP(pmc_in_use, X86_PMC_IDX_MAX);
 
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index de1fd73697365..bf2c32ea3255e 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -577,6 +577,8 @@ EXPORT_SYMBOL_GPL(kvm_pmu_trigger_event);
 int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 {
 	struct kvm_pmu_event_filter tmp, *filter;
+	struct kvm_vcpu *vcpu;
+	unsigned long i;
 	size_t size;
 	int r;
 
@@ -613,9 +615,18 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
 	mutex_lock(&kvm->lock);
 	filter = rcu_replace_pointer(kvm->arch.pmu_event_filter, filter,
 				     mutex_is_locked(&kvm->lock));
+	synchronize_srcu_expedited(&kvm->srcu);
+
+	BUILD_BUG_ON(sizeof(((struct kvm_pmu *)0)->reprogram_pmi) >
+		     sizeof(((struct kvm_pmu *)0)->__reprogram_pmi));
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		atomic64_set(&vcpu_to_pmu(vcpu)->__reprogram_pmi, -1ull);
+
+	kvm_make_all_cpus_request(kvm, KVM_REQ_PMU);
+
 	mutex_unlock(&kvm->lock);
 
-	synchronize_srcu_expedited(&kvm->srcu);
 	r = 0;
 cleanup:
 	kfree(filter);
-- 
GitLab


From dcbb816a2842e41d3ec22605c6760837d800b20a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 23 Sep 2022 00:13:53 +0000
Subject: [PATCH 1216/4122] KVM: x86/pmu: Clear "reprogram" bit if counter is
 disabled or disallowed

When reprogramming a counter, clear the counter's "reprogram pending" bit
if the counter is disabled (by the guest) or is disallowed (by the
userspace filter).  In both cases, there's no need to re-attempt
programming on the next coincident KVM_REQ_PMU as enabling the counter by
either method will trigger reprogramming.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-3-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/pmu.c | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index bf2c32ea3255e..be8ce5aeb454f 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -150,9 +150,9 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
 	__kvm_perf_overflow(pmc, true);
 }
 
-static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
-				  u64 config, bool exclude_user,
-				  bool exclude_kernel, bool intr)
+static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
+				 bool exclude_user, bool exclude_kernel,
+				 bool intr)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 	struct perf_event *event;
@@ -204,14 +204,14 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
 	if (IS_ERR(event)) {
 		pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
 			    PTR_ERR(event), pmc->idx);
-		return;
+		return PTR_ERR(event);
 	}
 
 	pmc->perf_event = event;
 	pmc_to_pmu(pmc)->event_count++;
-	clear_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
 	pmc->is_paused = false;
 	pmc->intr = intr || pebs;
+	return 0;
 }
 
 static void pmc_pause_counter(struct kvm_pmc *pmc)
@@ -245,7 +245,6 @@ static bool pmc_resume_counter(struct kvm_pmc *pmc)
 	perf_event_enable(pmc->perf_event);
 	pmc->is_paused = false;
 
-	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 	return true;
 }
 
@@ -303,10 +302,10 @@ void reprogram_counter(struct kvm_pmc *pmc)
 	pmc_pause_counter(pmc);
 
 	if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc))
-		return;
+		goto reprogram_complete;
 
 	if (!check_pmu_event_filter(pmc))
-		return;
+		goto reprogram_complete;
 
 	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
 		printk_once("kvm pmu: pin control bit is ignored\n");
@@ -324,16 +323,27 @@ void reprogram_counter(struct kvm_pmc *pmc)
 	}
 
 	if (pmc->current_config == new_config && pmc_resume_counter(pmc))
-		return;
+		goto reprogram_complete;
 
 	pmc_release_perf_event(pmc);
 
 	pmc->current_config = new_config;
-	pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
-			      (eventsel & pmu->raw_event_mask),
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
-			      !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
-			      eventsel & ARCH_PERFMON_EVENTSEL_INT);
+
+	/*
+	 * If reprogramming fails, e.g. due to contention, leave the counter's
+	 * regprogram bit set, i.e. opportunistically try again on the next PMU
+	 * refresh.  Don't make a new request as doing so can stall the guest
+	 * if reprogramming repeatedly fails.
+	 */
+	if (pmc_reprogram_counter(pmc, PERF_TYPE_RAW,
+				  (eventsel & pmu->raw_event_mask),
+				  !(eventsel & ARCH_PERFMON_EVENTSEL_USR),
+				  !(eventsel & ARCH_PERFMON_EVENTSEL_OS),
+				  eventsel & ARCH_PERFMON_EVENTSEL_INT))
+		return;
+
+reprogram_complete:
+	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 }
 EXPORT_SYMBOL_GPL(reprogram_counter);
 
-- 
GitLab


From 68fb4757e8678894530ee0b15c29a3567207b970 Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Fri, 23 Sep 2022 00:13:54 +0000
Subject: [PATCH 1217/4122] KVM: x86/pmu: Defer reprogram_counter() to
 kvm_pmu_handle_event()

Batch reprogramming PMU counters by setting KVM_REQ_PMU and thus
deferring reprogramming kvm_pmu_handle_event() to avoid reprogramming
a counter multiple times during a single VM-Exit.

Deferring programming will also allow KVM to fix a bug where immediately
reprogramming a counter can result in sleeping (taking a mutex) while
interrupts are disabled in the VM-Exit fastpath.

Introduce kvm_pmu_request_counter_reprogam() to make it obvious that
KVM is _requesting_ a reprogram and not actually doing the reprogram.

Opportunistically refine related comments to avoid misunderstandings.

Signed-off-by: Like Xu <likexu@tencent.com>
Link: https://lore.kernel.org/r/20220831085328.45489-5-likexu@tencent.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/pmu.c              | 17 ++++++++++++-----
 arch/x86/kvm/pmu.h              |  6 +++++-
 arch/x86/kvm/svm/pmu.c          |  2 +-
 arch/x86/kvm/vmx/pmu_intel.c    |  6 +++---
 5 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 58a562bb197c8..afadbc4b72c45 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -496,6 +496,7 @@ struct kvm_pmc {
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
 	/*
+	 * only for creating or reusing perf_event,
 	 * eventsel value for general purpose counters,
 	 * ctrl value for fixed counters.
 	 */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index be8ce5aeb454f..3054b35b41436 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -101,7 +101,11 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 	bool skip_pmi = false;
 
-	/* Ignore counters that have been reprogrammed already. */
+	/*
+	 * Ignore overflow events for counters that are scheduled to be
+	 * reprogrammed, e.g. if a PMI for the previous event races with KVM's
+	 * handling of a related guest WRMSR.
+	 */
 	if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
 		return;
 
@@ -292,7 +296,7 @@ out:
 	return allow_event;
 }
 
-void reprogram_counter(struct kvm_pmc *pmc)
+static void reprogram_counter(struct kvm_pmc *pmc)
 {
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 	u64 eventsel = pmc->eventsel;
@@ -345,7 +349,6 @@ void reprogram_counter(struct kvm_pmc *pmc)
 reprogram_complete:
 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
 }
-EXPORT_SYMBOL_GPL(reprogram_counter);
 
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 {
@@ -355,10 +358,11 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
 	for_each_set_bit(bit, pmu->reprogram_pmi, X86_PMC_IDX_MAX) {
 		struct kvm_pmc *pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, bit);
 
-		if (unlikely(!pmc || !pmc->perf_event)) {
+		if (unlikely(!pmc)) {
 			clear_bit(bit, pmu->reprogram_pmi);
 			continue;
 		}
+
 		reprogram_counter(pmc);
 	}
 
@@ -552,12 +556,15 @@ static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
 static inline bool cpl_is_matched(struct kvm_pmc *pmc)
 {
 	bool select_os, select_user;
-	u64 config = pmc->current_config;
+	u64 config;
 
 	if (pmc_is_gp(pmc)) {
+		config = pmc->eventsel;
 		select_os = config & ARCH_PERFMON_EVENTSEL_OS;
 		select_user = config & ARCH_PERFMON_EVENTSEL_USR;
 	} else {
+		config = fixed_ctrl_field(pmc_to_pmu(pmc)->fixed_ctr_ctrl,
+					  pmc->idx - INTEL_PMC_IDX_FIXED);
 		select_os = config & 0x1;
 		select_user = config & 0x2;
 	}
diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h
index 5cc5721f260bf..85ff3c0588bac 100644
--- a/arch/x86/kvm/pmu.h
+++ b/arch/x86/kvm/pmu.h
@@ -183,7 +183,11 @@ static inline void kvm_init_pmu_capability(void)
 					     KVM_PMC_MAX_FIXED);
 }
 
-void reprogram_counter(struct kvm_pmc *pmc);
+static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc)
+{
+	set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi);
+	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
+}
 
 void kvm_pmu_deliver_pmi(struct kvm_vcpu *vcpu);
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index 9d65cd095691b..c4b322ffdac48 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -159,7 +159,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		data &= ~pmu->reserved_bits;
 		if (data != pmc->eventsel) {
 			pmc->eventsel = data;
-			reprogram_counter(pmc);
+			kvm_pmu_request_counter_reprogam(pmc);
 		}
 		return 0;
 	}
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b7c3a9874a93c..6c80dff37b772 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -52,7 +52,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 		pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i);
 
 		__set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use);
-		reprogram_counter(pmc);
+		kvm_pmu_request_counter_reprogam(pmc);
 	}
 }
 
@@ -76,7 +76,7 @@ static void reprogram_counters(struct kvm_pmu *pmu, u64 diff)
 	for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) {
 		pmc = intel_pmc_idx_to_pmc(pmu, bit);
 		if (pmc)
-			reprogram_counter(pmc);
+			kvm_pmu_request_counter_reprogam(pmc);
 	}
 }
 
@@ -477,7 +477,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 				reserved_bits ^= HSW_IN_TX_CHECKPOINTED;
 			if (!(data & reserved_bits)) {
 				pmc->eventsel = data;
-				reprogram_counter(pmc);
+				kvm_pmu_request_counter_reprogam(pmc);
 				return 0;
 			}
 		} else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false))
-- 
GitLab


From de0f619564f4713bd548b82d535a954ffa1ee7d8 Mon Sep 17 00:00:00 2001
From: Like Xu <likexu@tencent.com>
Date: Fri, 23 Sep 2022 00:13:55 +0000
Subject: [PATCH 1218/4122] KVM: x86/pmu: Defer counter emulated overflow via
 pmc->prev_counter

Defer reprogramming counters and handling overflow via KVM_REQ_PMU
when incrementing counters.  KVM skips emulated WRMSR in the VM-Exit
fastpath, the fastpath runs with IRQs disabled, skipping instructions
can increment and reprogram counters, reprogramming counters can
sleep, and sleeping is disallowed while IRQs are disabled.

 [*] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:580
 [*] in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 2981888, name: CPU 15/KVM
 [*] preempt_count: 1, expected: 0
 [*] RCU nest depth: 0, expected: 0
 [*] INFO: lockdep is turned off.
 [*] irq event stamp: 0
 [*] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
 [*] hardirqs last disabled at (0): [<ffffffff8121222a>] copy_process+0x146a/0x62d0
 [*] softirqs last  enabled at (0): [<ffffffff81212269>] copy_process+0x14a9/0x62d0
 [*] softirqs last disabled at (0): [<0000000000000000>] 0x0
 [*] Preemption disabled at:
 [*] [<ffffffffc2063fc1>] vcpu_enter_guest+0x1001/0x3dc0 [kvm]
 [*] CPU: 17 PID: 2981888 Comm: CPU 15/KVM Kdump: 5.19.0-rc1-g239111db364c-dirty #2
 [*] Call Trace:
 [*]  <TASK>
 [*]  dump_stack_lvl+0x6c/0x9b
 [*]  __might_resched.cold+0x22e/0x297
 [*]  __mutex_lock+0xc0/0x23b0
 [*]  perf_event_ctx_lock_nested+0x18f/0x340
 [*]  perf_event_pause+0x1a/0x110
 [*]  reprogram_counter+0x2af/0x1490 [kvm]
 [*]  kvm_pmu_trigger_event+0x429/0x950 [kvm]
 [*]  kvm_skip_emulated_instruction+0x48/0x90 [kvm]
 [*]  handle_fastpath_set_msr_irqoff+0x349/0x3b0 [kvm]
 [*]  vmx_vcpu_run+0x268e/0x3b80 [kvm_intel]
 [*]  vcpu_enter_guest+0x1d22/0x3dc0 [kvm]

Add a field to kvm_pmc to track the previous counter value in order
to defer overflow detection to kvm_pmu_handle_event() (the counter must
be paused before handling overflow, and that may increment the counter).

Opportunistically shrink sizeof(struct kvm_pmc) a bit.

Suggested-by: Wanpeng Li <wanpengli@tencent.com>
Fixes: 9cd803d496e7 ("KVM: x86: Update vPMCs when retiring instructions")
Signed-off-by: Like Xu <likexu@tencent.com>
Link: https://lore.kernel.org/r/20220831085328.45489-6-likexu@tencent.com
[sean: avoid re-triggering KVM_REQ_PMU on overflow, tweak changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20220923001355.3741194-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  5 +++--
 arch/x86/kvm/pmu.c              | 32 ++++++++++++++++----------------
 arch/x86/kvm/svm/pmu.c          |  2 +-
 arch/x86/kvm/vmx/pmu_intel.c    |  4 ++--
 4 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index afadbc4b72c45..81114a376c4ee 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -491,7 +491,10 @@ enum pmc_type {
 struct kvm_pmc {
 	enum pmc_type type;
 	u8 idx;
+	bool is_paused;
+	bool intr;
 	u64 counter;
+	u64 prev_counter;
 	u64 eventsel;
 	struct perf_event *perf_event;
 	struct kvm_vcpu *vcpu;
@@ -501,8 +504,6 @@ struct kvm_pmc {
 	 * ctrl value for fixed counters.
 	 */
 	u64 current_config;
-	bool is_paused;
-	bool intr;
 };
 
 /* More counters may conflict with other existing Architectural MSRs */
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 3054b35b41436..684393c221052 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -101,14 +101,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 	struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 	bool skip_pmi = false;
 
-	/*
-	 * Ignore overflow events for counters that are scheduled to be
-	 * reprogrammed, e.g. if a PMI for the previous event races with KVM's
-	 * handling of a related guest WRMSR.
-	 */
-	if (test_and_set_bit(pmc->idx, pmu->reprogram_pmi))
-		return;
-
 	if (pmc->perf_event && pmc->perf_event->attr.precise_ip) {
 		if (!in_pmi) {
 			/*
@@ -126,7 +118,6 @@ static inline void __kvm_perf_overflow(struct kvm_pmc *pmc, bool in_pmi)
 	} else {
 		__set_bit(pmc->idx, (unsigned long *)&pmu->global_status);
 	}
-	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 
 	if (!pmc->intr || skip_pmi)
 		return;
@@ -151,7 +142,17 @@ static void kvm_perf_overflow(struct perf_event *perf_event,
 {
 	struct kvm_pmc *pmc = perf_event->overflow_handler_context;
 
+	/*
+	 * Ignore overflow events for counters that are scheduled to be
+	 * reprogrammed, e.g. if a PMI for the previous event races with KVM's
+	 * handling of a related guest WRMSR.
+	 */
+	if (test_and_set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi))
+		return;
+
 	__kvm_perf_overflow(pmc, true);
+
+	kvm_make_request(KVM_REQ_PMU, pmc->vcpu);
 }
 
 static int pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, u64 config,
@@ -311,6 +312,9 @@ static void reprogram_counter(struct kvm_pmc *pmc)
 	if (!check_pmu_event_filter(pmc))
 		goto reprogram_complete;
 
+	if (pmc->counter < pmc->prev_counter)
+		__kvm_perf_overflow(pmc, false);
+
 	if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
 		printk_once("kvm pmu: pin control bit is ignored\n");
 
@@ -348,6 +352,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
 
 reprogram_complete:
 	clear_bit(pmc->idx, (unsigned long *)&pmc_to_pmu(pmc)->reprogram_pmi);
+	pmc->prev_counter = 0;
 }
 
 void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
@@ -536,14 +541,9 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
 
 static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
 {
-	u64 prev_count;
-
-	prev_count = pmc->counter;
+	pmc->prev_counter = pmc->counter;
 	pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc);
-
-	reprogram_counter(pmc);
-	if (pmc->counter < prev_count)
-		__kvm_perf_overflow(pmc, false);
+	kvm_pmu_request_counter_reprogam(pmc);
 }
 
 static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc,
diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
index c4b322ffdac48..0e313fbae0556 100644
--- a/arch/x86/kvm/svm/pmu.c
+++ b/arch/x86/kvm/svm/pmu.c
@@ -212,7 +212,7 @@ static void amd_pmu_reset(struct kvm_vcpu *vcpu)
 		struct kvm_pmc *pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = pmc->eventsel = 0;
+		pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index 6c80dff37b772..e5cec07ca8d95 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -646,14 +646,14 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 		pmc = &pmu->gp_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = pmc->eventsel = 0;
+		pmc->counter = pmc->prev_counter = pmc->eventsel = 0;
 	}
 
 	for (i = 0; i < KVM_PMC_MAX_FIXED; i++) {
 		pmc = &pmu->fixed_counters[i];
 
 		pmc_stop_counter(pmc);
-		pmc->counter = 0;
+		pmc->counter = pmc->prev_counter = 0;
 	}
 
 	pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = 0;
-- 
GitLab


From d663b8a285986072428a6a145e5994bc275df994 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 3 Nov 2022 10:44:10 -0400
Subject: [PATCH 1219/4122] KVM: replace direct irq.h inclusion

virt/kvm/irqchip.c is including "irq.h" from the arch-specific KVM source
directory (i.e. not from arch/*/include) for the sole purpose of retrieving
irqchip_in_kernel.

Making the function inline in a header that is already included,
such as asm/kvm_host.h, is not possible because it needs to look at
struct kvm which is defined after asm/kvm_host.h is included.  So add a
kvm_arch_irqchip_in_kernel non-inline function; irqchip_in_kernel() is
only performance critical on arm64 and x86, and the non-inline function
is enough on all other architectures.

irq.h can then be deleted from all architectures except x86.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm64/kvm/arm.c       |  5 +++++
 arch/arm64/kvm/irq.h       | 16 ----------------
 arch/powerpc/kvm/irq.h     | 22 ----------------------
 arch/powerpc/kvm/powerpc.c | 18 ++++++++++++++++--
 arch/s390/kvm/irq.h        | 19 -------------------
 arch/s390/kvm/kvm-s390.c   |  5 +++++
 arch/x86/kvm/irq.c         |  5 +++++
 include/linux/kvm_host.h   |  2 ++
 virt/kvm/irqchip.c         |  3 +--
 9 files changed, 34 insertions(+), 61 deletions(-)
 delete mode 100644 arch/arm64/kvm/irq.h
 delete mode 100644 arch/powerpc/kvm/irq.h
 delete mode 100644 arch/s390/kvm/irq.h

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 94d33e296e10c..7b107fa540fac 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -2130,6 +2130,11 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 	return NULL;
 }
 
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
+
 bool kvm_arch_has_irq_bypass(void)
 {
 	return true;
diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h
deleted file mode 100644
index 0d257de42c109..0000000000000
--- a/arch/arm64/kvm/irq.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * irq.h: in kernel interrupt controller related definitions
- * Copyright (c) 2016 Red Hat, Inc.
- *
- * This header is included by irqchip.c. However, on ARM, interrupt
- * controller declarations are located in include/kvm/arm_vgic.h since
- * they are mostly shared between arm and arm64.
- */
-
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include <kvm/arm_vgic.h>
-
-#endif
diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h
deleted file mode 100644
index e6463f866abc2..0000000000000
--- a/arch/powerpc/kvm/irq.h
+++ /dev/null
@@ -1,22 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __IRQ_H
-#define __IRQ_H
-
-#include <linux/kvm_host.h>
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	int ret = 0;
-
-#ifdef CONFIG_KVM_MPIC
-	ret = ret || (kvm->arch.mpic != NULL);
-#endif
-#ifdef CONFIG_KVM_XICS
-	ret = ret || (kvm->arch.xics != NULL);
-	ret = ret || (kvm->arch.xive != NULL);
-#endif
-	smp_rmb();
-	return ret;
-}
-
-#endif
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index b850b0efa201a..04494a4fb37a0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -36,7 +36,6 @@
 #include <asm/setup.h>
 
 #include "timing.h"
-#include "irq.h"
 #include "../mm/mmu_decl.h"
 
 #define CREATE_TRACE_POINTS
@@ -2165,10 +2164,25 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo)
 	return 0;
 }
 
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	int ret = 0;
+
+#ifdef CONFIG_KVM_MPIC
+	ret = ret || (kvm->arch.mpic != NULL);
+#endif
+#ifdef CONFIG_KVM_XICS
+	ret = ret || (kvm->arch.xics != NULL);
+	ret = ret || (kvm->arch.xive != NULL);
+#endif
+	smp_rmb();
+	return ret;
+}
+
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 			  bool line_status)
 {
-	if (!irqchip_in_kernel(kvm))
+	if (!kvm_arch_irqchip_in_kernel(kvm))
 		return -ENXIO;
 
 	irq_event->status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
diff --git a/arch/s390/kvm/irq.h b/arch/s390/kvm/irq.h
deleted file mode 100644
index 484608c71dd01..0000000000000
--- a/arch/s390/kvm/irq.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * s390 irqchip routines
- *
- * Copyright IBM Corp. 2014
- *
- *    Author(s): Cornelia Huck <cornelia.huck@de.ibm.com>
- */
-#ifndef __KVM_IRQ_H
-#define __KVM_IRQ_H
-
-#include <linux/kvm_host.h>
-
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-	return 1;
-}
-
-#endif
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bc491a73815c3..5c7532dbc96b1 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -5567,6 +5567,11 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
 	return VM_FAULT_SIGBUS;
 }
 
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return true;
+}
+
 /* Section: memory related */
 int kvm_arch_prepare_memory_region(struct kvm *kvm,
 				   const struct kvm_memory_slot *old,
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index f371f1292ca3e..d8d50558f1657 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -165,3 +165,8 @@ bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args)
 
 	return resample ? irqchip_kernel(kvm) : irqchip_in_kernel(kvm);
 }
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm)
+{
+	return irqchip_in_kernel(kvm);
+}
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8fe4665bd020b..e6e66c5e56f24 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -663,6 +663,8 @@ struct kvm_irq_routing_table {
 	 */
 	struct hlist_head map[];
 };
+
+bool kvm_arch_irqchip_in_kernel(struct kvm *kvm);
 #endif
 
 #ifndef KVM_INTERNAL_MEM_SLOTS
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 58e4f88b2b9fb..1e567d1f6d3df 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -17,7 +17,6 @@
 #include <linux/srcu.h>
 #include <linux/export.h>
 #include <trace/events/kvm.h>
-#include "irq.h"
 
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi)
@@ -50,7 +49,7 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 	struct kvm_kernel_irq_routing_entry route;
 
-	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
+	if (!kvm_arch_irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
 		return -EINVAL;
 
 	route.msi.address_lo = msi->address_lo;
-- 
GitLab


From 837a55847ead27362aac80aa1cf402459a9757f7 Mon Sep 17 00:00:00 2001
From: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Date: Mon, 7 Nov 2022 14:53:38 +0900
Subject: [PATCH 1220/4122] RDMA/rxe: Implement packet length validation on
 responder

The function check_length() is supposed to check the length of inbound
packets on responder, but it actually has been a stub since the driver was
born. Let it check the payload length and the DMA length.

Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Link: https://lore.kernel.org/r/20221107055338.357184-1-matsuda-daisuke@fujitsu.com
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Acked-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 34 ++++++++++++++++++++++------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index c32bc12cc82f7..382d2053db43e 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -393,16 +393,36 @@ static enum resp_states check_resource(struct rxe_qp *qp,
 static enum resp_states check_length(struct rxe_qp *qp,
 				     struct rxe_pkt_info *pkt)
 {
-	switch (qp_type(qp)) {
-	case IB_QPT_RC:
-		return RESPST_CHK_RKEY;
+	int mtu = qp->mtu;
+	u32 payload = payload_size(pkt);
+	u32 dmalen = reth_len(pkt);
 
-	case IB_QPT_UC:
-		return RESPST_CHK_RKEY;
+	/* RoCEv2 packets do not have LRH.
+	 * Let's skip checking it.
+	 */
 
-	default:
-		return RESPST_CHK_RKEY;
+	if ((pkt->opcode & RXE_START_MASK) &&
+	    (pkt->opcode & RXE_END_MASK)) {
+		/* "only" packets */
+		if (payload > mtu)
+			return RESPST_ERR_LENGTH;
+	} else if ((pkt->opcode & RXE_START_MASK) ||
+		   (pkt->opcode & RXE_MIDDLE_MASK)) {
+		/* "first" or "middle" packets */
+		if (payload != mtu)
+			return RESPST_ERR_LENGTH;
+	} else if (pkt->opcode & RXE_END_MASK) {
+		/* "last" packets */
+		if ((payload == 0) || (payload > mtu))
+			return RESPST_ERR_LENGTH;
+	}
+
+	if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) {
+		if (dmalen > (1 << 31))
+			return RESPST_ERR_LENGTH;
 	}
+
+	return RESPST_CHK_RKEY;
 }
 
 static enum resp_states check_rkey(struct rxe_qp *qp,
-- 
GitLab


From 5d0557c75b2f2c7a868742d21a3ad94813ca97f4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 4 Nov 2022 14:18:03 +0200
Subject: [PATCH 1221/4122] perf intel-pt: Start turning
 intel-pt-pkt-decoder-test.c into a suite of intel-pt subtests

In preparation for adding more Intel PT testing, rename
intel-pt-pkt-decoder-test.c to intel-pt-test.c.

Subtests will later be added to intel-pt-test.c.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221104121805.5264-2-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/Build                                 | 2 +-
 .../x86/tests/{intel-pt-pkt-decoder-test.c => intel-pt-test.c}  | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 rename tools/perf/arch/x86/tests/{intel-pt-pkt-decoder-test.c => intel-pt-test.c} (100%)

diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build
index 70b5bcbc15df7..6f4e8636c3bf7 100644
--- a/tools/perf/arch/x86/tests/Build
+++ b/tools/perf/arch/x86/tests/Build
@@ -3,5 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 
 perf-y += arch-tests.o
 perf-y += sample-parsing.o
-perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
+perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o
 perf-$(CONFIG_X86_64) += bp-modify.o
diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
similarity index 100%
rename from tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c
rename to tools/perf/arch/x86/tests/intel-pt-test.c
-- 
GitLab


From 828143f8da2856014df3102f63f7e4e4dc2d1c22 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 4 Nov 2022 14:18:04 +0200
Subject: [PATCH 1222/4122] perf intel-pt: Redefine test_suite to allow for
 adding more subtests

In preparation for adding more Intel PT testing, redefine the test_suite
to allow for adding more subtests.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221104121805.5264-3-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/tests/arch-tests.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 04018b8aa85b9..8d5e4a0831d54 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -5,7 +5,17 @@
 
 #ifdef HAVE_AUXTRACE_SUPPORT
 DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
-DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder);
+
+static struct test_case intel_pt_tests[] = {
+	TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder),
+	{ .name = NULL, }
+};
+
+struct test_suite suite__intel_pt = {
+	.desc = "Intel PT packet decoder",
+	.test_cases = intel_pt_tests,
+};
+
 #endif
 #if defined(__x86_64__)
 DEFINE_SUITE("x86 bp modify", bp_modify);
@@ -18,7 +28,7 @@ struct test_suite *arch_tests[] = {
 #endif
 #ifdef HAVE_AUXTRACE_SUPPORT
 	&suite__insn_x86,
-	&suite__intel_pt_pkt_decoder,
+	&suite__intel_pt,
 #endif
 #if defined(__x86_64__)
 	&suite__bp_modify,
-- 
GitLab


From 44a037f54b97e4215a282d39d0f7f28c588f185c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 4 Nov 2022 14:18:05 +0200
Subject: [PATCH 1223/4122] perf intel-pt: Add hybrid CPU compatibility test

The kernel driver assumes hybrid CPUs will have Intel PT capabilities
that are compatible with the boot CPU. Add a test to check that is the
case.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221104121805.5264-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/include/arch-tests.h  |   1 +
 tools/perf/arch/x86/tests/arch-tests.c    |   3 +-
 tools/perf/arch/x86/tests/intel-pt-test.c | 154 ++++++++++++++++++++++
 3 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h
index 6a1a1b3c08275..902e9ea9b99ed 100644
--- a/tools/perf/arch/x86/include/arch-tests.h
+++ b/tools/perf/arch/x86/include/arch-tests.h
@@ -8,6 +8,7 @@ struct test_suite;
 int test__rdpmc(struct test_suite *test, int subtest);
 int test__insn_x86(struct test_suite *test, int subtest);
 int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest);
+int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest);
 int test__bp_modify(struct test_suite *test, int subtest);
 int test__x86_sample_parsing(struct test_suite *test, int subtest);
 
diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c
index 8d5e4a0831d54..aae6ea0fe52b7 100644
--- a/tools/perf/arch/x86/tests/arch-tests.c
+++ b/tools/perf/arch/x86/tests/arch-tests.c
@@ -8,11 +8,12 @@ DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86);
 
 static struct test_case intel_pt_tests[] = {
 	TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder),
+	TEST_CASE("Intel PT hybrid CPU compatibility", intel_pt_hybrid_compat),
 	{ .name = NULL, }
 };
 
 struct test_suite suite__intel_pt = {
-	.desc = "Intel PT packet decoder",
+	.desc = "Intel PT",
 	.test_cases = intel_pt_tests,
 };
 
diff --git a/tools/perf/arch/x86/tests/intel-pt-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c
index 42237656f4539..70b7f79396b18 100644
--- a/tools/perf/arch/x86/tests/intel-pt-test.c
+++ b/tools/perf/arch/x86/tests/intel-pt-test.c
@@ -1,12 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <linux/compiler.h>
+#include <linux/bits.h>
 #include <string.h>
+#include <cpuid.h>
+#include <sched.h>
 
 #include "intel-pt-decoder/intel-pt-pkt-decoder.h"
 
 #include "debug.h"
 #include "tests/tests.h"
 #include "arch-tests.h"
+#include "cpumap.h"
 
 /**
  * struct test_data - Test data.
@@ -313,3 +318,152 @@ int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subte
 
 	return TEST_OK;
 }
+
+static int setaffinity(int cpu)
+{
+	cpu_set_t cpu_set;
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(cpu, &cpu_set);
+	if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set)) {
+		pr_debug("sched_setaffinity() failed for CPU %d\n", cpu);
+		return -1;
+	}
+	return 0;
+}
+
+#define INTEL_PT_ADDR_FILT_CNT_MASK	GENMASK(2, 0)
+#define INTEL_PT_SUBLEAF_CNT		2
+#define CPUID_REG_CNT			4
+
+struct cpuid_result {
+	union {
+		struct {
+			unsigned int eax;
+			unsigned int ebx;
+			unsigned int ecx;
+			unsigned int edx;
+		};
+		unsigned int reg[CPUID_REG_CNT];
+	};
+};
+
+struct pt_caps {
+	struct cpuid_result subleaf[INTEL_PT_SUBLEAF_CNT];
+};
+
+static int get_pt_caps(int cpu, struct pt_caps *caps)
+{
+	struct cpuid_result r;
+	int i;
+
+	if (setaffinity(cpu))
+		return -1;
+
+	memset(caps, 0, sizeof(*caps));
+
+	for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
+		__get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx);
+		pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i);
+		pr_debug("eax = 0x%08x\n", r.eax);
+		pr_debug("ebx = 0x%08x\n", r.ebx);
+		pr_debug("ecx = 0x%08x\n", r.ecx);
+		pr_debug("edx = 0x%08x\n", r.edx);
+		caps->subleaf[i] = r;
+	}
+
+	return 0;
+}
+
+static bool is_hydrid(void)
+{
+	unsigned int eax, ebx, ecx, edx = 0;
+	bool result;
+
+	__get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx);
+	result = edx & BIT(15);
+	pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n",
+		 result ? "" : "not ", edx);
+	return result;
+}
+
+static int compare_caps(int cpu, struct pt_caps *caps, struct pt_caps *caps0)
+{
+	struct pt_caps mask = { /* Mask of bits to check*/
+		.subleaf = {
+			[0] = {
+				.ebx = GENMASK(8, 0),
+				.ecx = GENMASK(3, 0),
+			},
+			[1] = {
+				.eax = GENMASK(31, 16),
+				.ebx = GENMASK(31, 0),
+			}
+		}
+	};
+	unsigned int m, reg, reg0;
+	int ret = 0;
+	int i, j;
+
+	for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) {
+		for (j = 0; j < CPUID_REG_CNT; j++) {
+			m = mask.subleaf[i].reg[j];
+			reg = m & caps->subleaf[i].reg[j];
+			reg0 = m & caps0->subleaf[i].reg[j];
+			if ((reg & reg0) != reg0) {
+				pr_debug("CPU %d subleaf %d reg %d FAIL %#x vs %#x\n",
+					 cpu, i, j, reg, reg0);
+				ret = -1;
+			}
+		}
+	}
+
+	m = INTEL_PT_ADDR_FILT_CNT_MASK;
+	reg = m & caps->subleaf[1].eax;
+	reg0 = m & caps0->subleaf[1].eax;
+	if (reg < reg0) {
+		pr_debug("CPU %d subleaf 1 reg 0 FAIL address filter count %#x vs %#x\n",
+			 cpu, reg, reg0);
+		ret = -1;
+	}
+
+	if (!ret)
+		pr_debug("CPU %d OK\n", cpu);
+
+	return ret;
+}
+
+int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest)
+{
+	int max_cpu = cpu__max_cpu().cpu;
+	struct pt_caps last_caps;
+	struct pt_caps caps0;
+	int ret = TEST_OK;
+	int cpu;
+
+	if (!is_hydrid()) {
+		test->test_cases[subtest].skip_reason = "not hybrid";
+		return TEST_SKIP;
+	}
+
+	if (get_pt_caps(0, &caps0))
+		return TEST_FAIL;
+
+	for (cpu = 1, last_caps = caps0; cpu < max_cpu; cpu++) {
+		struct pt_caps caps;
+
+		if (get_pt_caps(cpu, &caps)) {
+			pr_debug("CPU %d not found\n", cpu);
+			continue;
+		}
+		if (!memcmp(&caps, &last_caps, sizeof(caps))) {
+			pr_debug("CPU %d same caps as previous CPU\n", cpu);
+			continue;
+		}
+		if (compare_caps(cpu, &caps, &caps0))
+			ret = TEST_FAIL;
+		last_caps = caps;
+	}
+
+	return ret;
+}
-- 
GitLab


From aa382ffa705bea9931ec92b6f3c70e1fdb372195 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 8 Nov 2022 17:05:59 -0600
Subject: [PATCH 1224/4122] PCI/sysfs: Fix double free in error path

When pci_create_attr() fails, pci_remove_resource_files() is called which
will iterate over the res_attr[_wc] arrays and frees every non NULL entry.
To avoid a double free here set the array entry only after it's clear we
successfully initialized it.

Fixes: b562ec8f74e4 ("PCI: Don't leak memory if sysfs_create_bin_file() fails")
Link: https://lore.kernel.org/r/20221007070735.GX986@pengutronix.de/
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/pci-sysfs.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 0a2eeb82cebde..ba38fc47d35e9 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -1175,11 +1175,9 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
 
 	sysfs_bin_attr_init(res_attr);
 	if (write_combine) {
-		pdev->res_attr_wc[num] = res_attr;
 		sprintf(res_attr_name, "resource%d_wc", num);
 		res_attr->mmap = pci_mmap_resource_wc;
 	} else {
-		pdev->res_attr[num] = res_attr;
 		sprintf(res_attr_name, "resource%d", num);
 		if (pci_resource_flags(pdev, num) & IORESOURCE_IO) {
 			res_attr->read = pci_read_resource_io;
@@ -1197,10 +1195,17 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine)
 	res_attr->size = pci_resource_len(pdev, num);
 	res_attr->private = (void *)(unsigned long)num;
 	retval = sysfs_create_bin_file(&pdev->dev.kobj, res_attr);
-	if (retval)
+	if (retval) {
 		kfree(res_attr);
+		return retval;
+	}
+
+	if (write_combine)
+		pdev->res_attr_wc[num] = res_attr;
+	else
+		pdev->res_attr[num] = res_attr;
 
-	return retval;
+	return 0;
 }
 
 /**
-- 
GitLab


From a39a1466dae5e3ed0dd7c03334a60894e4d1f334 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Thu, 13 Oct 2022 14:46:36 -0700
Subject: [PATCH 1225/4122] MAINTAINERS: git://github -> https://github.com for
 awilliam
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Github deprecated the git:// links about a year ago, so let's move to
the https:// URLs instead.

Reported-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://github.blog/2021-09-01-improving-git-protocol-security-github/
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Link: https://lore.kernel.org/r/20221013214636.30721-1-palmer@rivosinc.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 046ff06ff97fa..daa6a7a755ece 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -21539,7 +21539,7 @@ M:	Alex Williamson <alex.williamson@redhat.com>
 R:	Cornelia Huck <cohuck@redhat.com>
 L:	kvm@vger.kernel.org
 S:	Maintained
-T:	git git://github.com/awilliam/linux-vfio.git
+T:	git https://github.com/awilliam/linux-vfio.git
 F:	Documentation/ABI/testing/sysfs-devices-vfio-dev
 F:	Documentation/driver-api/vfio.rst
 F:	drivers/vfio/
-- 
GitLab


From cd48ebc5c4f2e94830b238f035ebf04f1c3a4433 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 22 Sep 2022 20:35:07 +0800
Subject: [PATCH 1226/4122] vfio/mlx5: Switch to use module_pci_driver() macro

Since pci provides the helper macro module_pci_driver(), we may replace
the module_init/exit with it.

Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20220922123507.11222-1-shangxiaojing@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/main.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index fd6ccb8454a24..457138b92f134 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -676,18 +676,7 @@ static struct pci_driver mlx5vf_pci_driver = {
 	.driver_managed_dma = true,
 };
 
-static void __exit mlx5vf_pci_cleanup(void)
-{
-	pci_unregister_driver(&mlx5vf_pci_driver);
-}
-
-static int __init mlx5vf_pci_init(void)
-{
-	return pci_register_driver(&mlx5vf_pci_driver);
-}
-
-module_init(mlx5vf_pci_init);
-module_exit(mlx5vf_pci_cleanup);
+module_pci_driver(mlx5vf_pci_driver);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Max Gurtovoy <mgurtovoy@nvidia.com>");
-- 
GitLab


From e67e070632a665c932d534b8b800477bb3111449 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Tue, 18 Oct 2022 12:28:25 -0300
Subject: [PATCH 1227/4122] vfio: platform: Do not pass return buffer to ACPI
 _RST method

The ACPI _RST method has no return value, there's no need to pass a return
buffer to acpi_evaluate_object().

Fixes: d30daa33ec1d ("vfio: platform: call _RST method when using ACPI")
Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Link: https://lore.kernel.org/r/20221018152825.891032-1-rafaelmendsr@gmail.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/platform/vfio_platform_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c
index 55dc4f43c31e3..1a0a238ffa354 100644
--- a/drivers/vfio/platform/vfio_platform_common.c
+++ b/drivers/vfio/platform/vfio_platform_common.c
@@ -72,12 +72,11 @@ static int vfio_platform_acpi_call_reset(struct vfio_platform_device *vdev,
 				  const char **extra_dbg)
 {
 #ifdef CONFIG_ACPI
-	struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
 	struct device *dev = vdev->device;
 	acpi_handle handle = ACPI_HANDLE(dev);
 	acpi_status acpi_ret;
 
-	acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, &buffer);
+	acpi_ret = acpi_evaluate_object(handle, "_RST", NULL, NULL);
 	if (ACPI_FAILURE(acpi_ret)) {
 		if (extra_dbg)
 			*extra_dbg = acpi_format_exception(acpi_ret);
-- 
GitLab


From ea00d4ededcd8639f9e814513426cfeccdd3aaf0 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Tue, 25 Oct 2022 20:31:13 +0100
Subject: [PATCH 1228/4122] vfio/iova_bitmap: Explicitly include linux/slab.h

kzalloc/kzfree are used so include `slab.h`. While it happens to work
without it, due to commit 8b9f3ac5b01d ("fs: introduce alloc_inode_sb() to
allocate filesystems specific inode") which indirectly includes via:

. ./include/linux/mm.h
.. ./include/linux/huge_mm.h
... ./include/linux/fs.h
.... ./include/linux/slab.h

Make it explicit should any of its indirect dependencies be dropped/changed
for entirely different reasons as it was the cause prior to commit above
recently (i.e. <= v5.18).

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Link: https://lore.kernel.org/r/20221025193114.58695-2-joao.m.martins@oracle.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/iova_bitmap.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c
index 6631e8befe1b2..56816ba1be9b1 100644
--- a/drivers/vfio/iova_bitmap.c
+++ b/drivers/vfio/iova_bitmap.c
@@ -5,6 +5,7 @@
  */
 #include <linux/iova_bitmap.h>
 #include <linux/mm.h>
+#include <linux/slab.h>
 #include <linux/highmem.h>
 
 #define BITS_PER_PAGE (PAGE_SIZE * BITS_PER_BYTE)
-- 
GitLab


From f38044e5ef58ad0346fdabd7027ea5c1e1a3b624 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Tue, 25 Oct 2022 20:31:14 +0100
Subject: [PATCH 1229/4122] vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps

iova_bitmap_set() doesn't consider the end of the page boundary when the
first bitmap page offset isn't zero, and wrongly changes the consecutive
page right after. Consequently this leads to missing dirty pages from
reported by the device as seen from the VMM.

The current logic iterates over a given number of base pages and clamps it
to the remaining indexes to iterate in the last page.  Instead of having to
consider extra pages to pin (e.g. first and extra pages), just handle the
first page as its own range and let the rest of the bitmap be handled as if
it was base page aligned.

This is done by changing iova_bitmap_mapped_remaining() to return PAGE_SIZE
- pgoff (on the first bitmap page), and leads to pgoff being set to 0 on
following iterations.

Fixes: 58ccf0190d19 ("vfio: Add an IOVA bitmap support")
Reported-by: Avihai Horon <avihaih@nvidia.com>
Tested-by: Avihai Horon <avihaih@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Link: https://lore.kernel.org/r/20221025193114.58695-3-joao.m.martins@oracle.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/iova_bitmap.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c
index 56816ba1be9b1..de6d6ea5c4967 100644
--- a/drivers/vfio/iova_bitmap.c
+++ b/drivers/vfio/iova_bitmap.c
@@ -296,11 +296,15 @@ void iova_bitmap_free(struct iova_bitmap *bitmap)
  */
 static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
 {
-	unsigned long remaining;
+	unsigned long remaining, bytes;
+
+	/* Cap to one page in the first iteration, if PAGE_SIZE unaligned. */
+	bytes = !bitmap->mapped.pgoff ? bitmap->mapped.npages << PAGE_SHIFT :
+					PAGE_SIZE - bitmap->mapped.pgoff;
 
 	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
 	remaining = min_t(unsigned long, remaining,
-	      (bitmap->mapped.npages << PAGE_SHIFT) / sizeof(*bitmap->bitmap));
+			  bytes / sizeof(*bitmap->bitmap));
 
 	return remaining;
 }
-- 
GitLab


From 253b642eec936974cc709ebb6bed83786c391279 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:24 +0100
Subject: [PATCH 1230/4122] phy: qcom-qmp-pcie: sort device-id table

Sort the device-id table by compatible string to make it easier to find
and add new entries.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 7c81667dd9680..4e5111d19692c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2282,17 +2282,17 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 
 static const struct of_device_id qmp_pcie_of_match_table[] = {
 	{
-		.compatible = "qcom,msm8998-qmp-pcie-phy",
-		.data = &msm8998_pciephy_cfg,
-	}, {
-		.compatible = "qcom,ipq8074-qmp-pcie-phy",
-		.data = &ipq8074_pciephy_cfg,
+		.compatible = "qcom,ipq6018-qmp-pcie-phy",
+		.data = &ipq6018_pciephy_cfg,
 	}, {
 		.compatible = "qcom,ipq8074-qmp-gen3-pcie-phy",
 		.data = &ipq8074_pciephy_gen3_cfg,
 	}, {
-		.compatible = "qcom,ipq6018-qmp-pcie-phy",
-		.data = &ipq6018_pciephy_cfg,
+		.compatible = "qcom,ipq8074-qmp-pcie-phy",
+		.data = &ipq8074_pciephy_cfg,
+	}, {
+		.compatible = "qcom,msm8998-qmp-pcie-phy",
+		.data = &msm8998_pciephy_cfg,
 	}, {
 		.compatible = "qcom,sc8180x-qmp-pcie-phy",
 		.data = &sc8180x_pciephy_cfg,
@@ -2302,6 +2302,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sdm845-qmp-pcie-phy",
 		.data = &sdm845_qmp_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sdx55-qmp-pcie-phy",
+		.data = &sdx55_qmp_pciephy_cfg,
 	}, {
 		.compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy",
 		.data = &sm8250_qmp_gen3x1_pciephy_cfg,
@@ -2311,9 +2314,6 @@ static const struct of_device_id qmp_pcie_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sm8250-qmp-modem-pcie-phy",
 		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sdx55-qmp-pcie-phy",
-		.data = &sdx55_qmp_pciephy_cfg,
 	}, {
 		.compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy",
 		.data = &sm8450_qmp_gen3x1_pciephy_cfg,
-- 
GitLab


From cebc6ca76e400a90cb7cbc9f96f26966167f5b6f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:25 +0100
Subject: [PATCH 1231/4122] phy: qcom-qmp-pcie: move device-id table

Move the device-id table below probe() and next to the driver structure
to keep the driver callback functions grouped together.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 4e5111d19692c..e66f6adc404ba 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2280,51 +2280,6 @@ static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct of_device_id qmp_pcie_of_match_table[] = {
-	{
-		.compatible = "qcom,ipq6018-qmp-pcie-phy",
-		.data = &ipq6018_pciephy_cfg,
-	}, {
-		.compatible = "qcom,ipq8074-qmp-gen3-pcie-phy",
-		.data = &ipq8074_pciephy_gen3_cfg,
-	}, {
-		.compatible = "qcom,ipq8074-qmp-pcie-phy",
-		.data = &ipq8074_pciephy_cfg,
-	}, {
-		.compatible = "qcom,msm8998-qmp-pcie-phy",
-		.data = &msm8998_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sc8180x-qmp-pcie-phy",
-		.data = &sc8180x_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sdm845-qhp-pcie-phy",
-		.data = &sdm845_qhp_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sdm845-qmp-pcie-phy",
-		.data = &sdm845_qmp_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sdx55-qmp-pcie-phy",
-		.data = &sdx55_qmp_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy",
-		.data = &sm8250_qmp_gen3x1_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy",
-		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sm8250-qmp-modem-pcie-phy",
-		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy",
-		.data = &sm8450_qmp_gen3x1_pciephy_cfg,
-	}, {
-		.compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy",
-		.data = &sm8450_qmp_gen4x2_pciephy_cfg,
-	},
-	{ },
-};
-MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table);
-
 static int qmp_pcie_probe(struct platform_device *pdev)
 {
 	struct qcom_qmp *qmp;
@@ -2408,6 +2363,51 @@ err_node_put:
 	return ret;
 }
 
+static const struct of_device_id qmp_pcie_of_match_table[] = {
+	{
+		.compatible = "qcom,ipq6018-qmp-pcie-phy",
+		.data = &ipq6018_pciephy_cfg,
+	}, {
+		.compatible = "qcom,ipq8074-qmp-gen3-pcie-phy",
+		.data = &ipq8074_pciephy_gen3_cfg,
+	}, {
+		.compatible = "qcom,ipq8074-qmp-pcie-phy",
+		.data = &ipq8074_pciephy_cfg,
+	}, {
+		.compatible = "qcom,msm8998-qmp-pcie-phy",
+		.data = &msm8998_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sc8180x-qmp-pcie-phy",
+		.data = &sc8180x_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qhp-pcie-phy",
+		.data = &sdm845_qhp_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sdm845-qmp-pcie-phy",
+		.data = &sdm845_qmp_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sdx55-qmp-pcie-phy",
+		.data = &sdx55_qmp_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-gen3x1-pcie-phy",
+		.data = &sm8250_qmp_gen3x1_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy",
+		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sm8250-qmp-modem-pcie-phy",
+		.data = &sm8250_qmp_gen3x2_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sm8450-qmp-gen3x1-pcie-phy",
+		.data = &sm8450_qmp_gen3x1_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sm8450-qmp-gen4x2-pcie-phy",
+		.data = &sm8450_qmp_gen4x2_pciephy_cfg,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(of, qmp_pcie_of_match_table);
+
 static struct platform_driver qmp_pcie_driver = {
 	.probe		= qmp_pcie_probe,
 	.driver = {
-- 
GitLab


From 2fdedef3ea8e8a1a68a1da6eee1537074b308f63 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:26 +0100
Subject: [PATCH 1232/4122] phy: qcom-qmp-pcie: merge driver data

The PCIe QMP PHY driver only manages a single PHY so merge the old
qcom_qmp and qmp_phy structures and drop the PHY array.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 218 +++++++++--------------
 1 file changed, 88 insertions(+), 130 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index e66f6adc404ba..667a87e7c9179 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1365,56 +1365,26 @@ struct qmp_phy_cfg {
 	unsigned long pipe_clock_rate;
 };
 
-/**
- * struct qmp_phy - per-lane phy descriptor
- *
- * @phy: generic phy
- * @cfg: phy specific configuration
- * @serdes: iomapped memory space for phy's serdes (i.e. PLL)
- * @tx: iomapped memory space for lane's tx
- * @rx: iomapped memory space for lane's rx
- * @pcs: iomapped memory space for lane's pcs
- * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs)
- * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs)
- * @pcs_misc: iomapped memory space for lane's pcs_misc
- * @pipe_clk: pipe clock
- * @qmp: QMP phy to which this lane belongs
- * @mode: currently selected PHY mode
- */
-struct qmp_phy {
-	struct phy *phy;
+struct qmp_pcie {
+	struct device *dev;
+
 	const struct qmp_phy_cfg *cfg;
+
 	void __iomem *serdes;
+	void __iomem *pcs;
+	void __iomem *pcs_misc;
 	void __iomem *tx;
 	void __iomem *rx;
-	void __iomem *pcs;
 	void __iomem *tx2;
 	void __iomem *rx2;
-	void __iomem *pcs_misc;
-	struct clk *pipe_clk;
-	struct qcom_qmp *qmp;
-	int mode;
-};
-
-/**
- * struct qcom_qmp - structure holding QMP phy block attributes
- *
- * @dev: device
- *
- * @clks: array of clocks required by phy
- * @resets: array of resets required by phy
- * @vregs: regulator supplies bulk data
- *
- * @phys: array of per-lane phy descriptors
- */
-struct qcom_qmp {
-	struct device *dev;
 
+	struct clk *pipe_clk;
 	struct clk_bulk_data *clks;
 	struct reset_control_bulk_data *resets;
 	struct regulator_bulk_data *vregs;
 
-	struct qmp_phy **phys;
+	struct phy *phy;
+	int mode;
 };
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
@@ -1850,9 +1820,9 @@ static void qmp_pcie_configure(void __iomem *base,
 	qmp_pcie_configure_lane(base, tbl, num, 0xff);
 }
 
-static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
 {
-	void __iomem *serdes = qphy->serdes;
+	void __iomem *serdes = qmp->serdes;
 
 	if (!tables)
 		return;
@@ -1860,11 +1830,11 @@ static void qmp_pcie_serdes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_
 	qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num);
 }
 
-static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->tx;
-	void __iomem *rx = qphy->rx;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *tx = qmp->tx;
+	void __iomem *rx = qmp->rx;
 
 	if (!tables)
 		return;
@@ -1872,17 +1842,17 @@ static void qmp_pcie_lanes_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_t
 	qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qphy->tx2, tables->tx, tables->tx_num, 2);
+		qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2);
 
 	qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1);
 	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qphy->rx2, tables->rx, tables->rx_num, 2);
+		qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2);
 }
 
-static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
 {
-	void __iomem *pcs = qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
+	void __iomem *pcs = qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 
 	if (!tables)
 		return;
@@ -1893,9 +1863,8 @@ static void qmp_pcie_pcs_init(struct qmp_phy *qphy, const struct qmp_phy_cfg_tab
 
 static int qmp_pcie_init(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_pcie *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	int ret;
 
 	ret = regulator_bulk_enable(cfg->num_vregs, qmp->vregs);
@@ -1932,9 +1901,8 @@ err_disable_regulators:
 
 static int qmp_pcie_exit(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_pcie *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	reset_control_bulk_assert(cfg->num_resets, qmp->resets);
 
@@ -1947,11 +1915,10 @@ static int qmp_pcie_exit(struct phy *phy)
 
 static int qmp_pcie_power_on(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_pcie *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	const struct qmp_phy_cfg_tables *mode_tables;
-	void __iomem *pcs = qphy->pcs;
+	void __iomem *pcs = qmp->pcs;
 	void __iomem *status;
 	unsigned int mask, val;
 	int ret;
@@ -1959,26 +1926,26 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			cfg->pwrdn_ctrl);
 
-	if (qphy->mode == PHY_MODE_PCIE_RC)
+	if (qmp->mode == PHY_MODE_PCIE_RC)
 		mode_tables = cfg->tables_rc;
 	else
 		mode_tables = cfg->tables_ep;
 
-	qmp_pcie_serdes_init(qphy, &cfg->tables);
-	qmp_pcie_serdes_init(qphy, mode_tables);
+	qmp_pcie_serdes_init(qmp, &cfg->tables);
+	qmp_pcie_serdes_init(qmp, mode_tables);
 
-	ret = clk_prepare_enable(qphy->pipe_clk);
+	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
 		dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
 		return ret;
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_pcie_lanes_init(qphy, &cfg->tables);
-	qmp_pcie_lanes_init(qphy, mode_tables);
+	qmp_pcie_lanes_init(qmp, &cfg->tables);
+	qmp_pcie_lanes_init(qmp, mode_tables);
 
-	qmp_pcie_pcs_init(qphy, &cfg->tables);
-	qmp_pcie_pcs_init(qphy, mode_tables);
+	qmp_pcie_pcs_init(qmp, &cfg->tables);
+	qmp_pcie_pcs_init(qmp, mode_tables);
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -2001,27 +1968,27 @@ static int qmp_pcie_power_on(struct phy *phy)
 	return 0;
 
 err_disable_pipe_clk:
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	return ret;
 }
 
 static int qmp_pcie_power_off(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_pcie *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	/* PHY reset */
-	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+	qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL],
 			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			cfg->pwrdn_ctrl);
 
 	return 0;
@@ -2055,12 +2022,12 @@ static int qmp_pcie_disable(struct phy *phy)
 
 static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_pcie *qmp = phy_get_drvdata(phy);
 
 	switch (submode) {
 	case PHY_MODE_PCIE_RC:
 	case PHY_MODE_PCIE_EP:
-		qphy->mode = submode;
+		qmp->mode = submode;
 		break;
 	default:
 		dev_err(&phy->dev, "Unsupported submode %d\n", submode);
@@ -2072,7 +2039,7 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 
 static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_pcie *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_vregs;
 	int i;
 
@@ -2088,7 +2055,7 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 
 static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_pcie *qmp = dev_get_drvdata(dev);
 	int i;
 	int ret;
 
@@ -2109,7 +2076,7 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg
 
 static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_pcie *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_clks;
 	int i;
 
@@ -2146,7 +2113,7 @@ static void phy_clk_release_provider(void *res)
  *    clk  |   +-------+   |                   +-----+
  *         +---------------+
  */
-static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
+static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np)
 {
 	struct clk_fixed_rate *fixed;
 	struct clk_init_data init = { };
@@ -2168,8 +2135,8 @@ static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
 	 * Controllers using QMP PHY-s use 125MHz pipe clock interface
 	 * unless other frequency is specified in the PHY config.
 	 */
-	if (qmp->phys[0]->cfg->pipe_clock_rate)
-		fixed->fixed_rate = qmp->phys[0]->cfg->pipe_clock_rate;
+	if (qmp->cfg->pipe_clock_rate)
+		fixed->fixed_rate = qmp->cfg->pipe_clock_rate;
 	else
 		fixed->fixed_rate = 125000000;
 
@@ -2197,97 +2164,92 @@ static const struct phy_ops qmp_pcie_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int qmp_pcie_create(struct device *dev, struct device_node *np, int id,
+static int qmp_pcie_create(struct device *dev, struct device_node *np,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_pcie *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
-	struct qmp_phy *qphy;
 	int ret;
 
-	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
-	if (!qphy)
-		return -ENOMEM;
+	qmp->mode = PHY_MODE_PCIE_RC;
 
-	qphy->mode = PHY_MODE_PCIE_RC;
+	qmp->cfg = cfg;
+	qmp->serdes = serdes;
 
-	qphy->cfg = cfg;
-	qphy->serdes = serdes;
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
 	 */
-	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	qmp->tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qmp->tx))
+		return PTR_ERR(qmp->tx);
 
 	if (of_device_is_compatible(dev->of_node, "qcom,sdm845-qhp-pcie-phy"))
-		qphy->rx = qphy->tx;
+		qmp->rx = qmp->tx;
 	else
-		qphy->rx = devm_of_iomap(dev, np, 1, NULL);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+		qmp->rx = devm_of_iomap(dev, np, 1, NULL);
+	if (IS_ERR(qmp->rx))
+		return PTR_ERR(qmp->rx);
 
-	qphy->pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	qmp->pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qmp->pcs))
+		return PTR_ERR(qmp->pcs);
 
 	if (cfg->lanes >= 2) {
-		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->tx2))
-			return PTR_ERR(qphy->tx2);
+		qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+		if (IS_ERR(qmp->tx2))
+			return PTR_ERR(qmp->tx2);
 
-		qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
-		if (IS_ERR(qphy->rx2))
-			return PTR_ERR(qphy->rx2);
+		qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+		if (IS_ERR(qmp->rx2))
+			return PTR_ERR(qmp->rx2);
 
-		qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
 	} else {
-		qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
+		qmp->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
 	}
 
-	if (IS_ERR(qphy->pcs_misc) &&
+	if (IS_ERR(qmp->pcs_misc) &&
 	    of_device_is_compatible(dev->of_node, "qcom,ipq6018-qmp-pcie-phy"))
-		qphy->pcs_misc = qphy->pcs + 0x400;
+		qmp->pcs_misc = qmp->pcs + 0x400;
 
-	if (IS_ERR(qphy->pcs_misc)) {
+	if (IS_ERR(qmp->pcs_misc)) {
 		if (cfg->tables.pcs_misc ||
 		    (cfg->tables_rc && cfg->tables_rc->pcs_misc) ||
-		    (cfg->tables_ep && cfg->tables_ep->pcs_misc))
-			return PTR_ERR(qphy->pcs_misc);
+		    (cfg->tables_ep && cfg->tables_ep->pcs_misc)) {
+			return PTR_ERR(qmp->pcs_misc);
+		}
 	}
 
-	qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
-	if (IS_ERR(qphy->pipe_clk)) {
-		return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk),
-				     "failed to get lane%d pipe clock\n", id);
+	qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
+	if (IS_ERR(qmp->pipe_clk)) {
+		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+				     "failed to get pipe clock\n");
 	}
 
 	generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create qphy %d\n", ret);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
 		return ret;
 	}
 
-	qphy->phy = generic_phy;
-	qphy->qmp = qmp;
-	qmp->phys[id] = qphy;
-	phy_set_drvdata(generic_phy, qphy);
+	qmp->phy = generic_phy;
+	phy_set_drvdata(generic_phy, qmp);
 
 	return 0;
 }
 
 static int qmp_pcie_probe(struct platform_device *pdev)
 {
-	struct qcom_qmp *qmp;
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
+	struct qmp_pcie *qmp;
 	int num, id;
 	int ret;
 
@@ -2326,14 +2288,10 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (num > 1)
 		return -EINVAL;
 
-	qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL);
-	if (!qmp->phys)
-		return -ENOMEM;
-
 	id = 0;
 	for_each_available_child_of_node(dev->of_node, child) {
 		/* Create per-lane phy */
-		ret = qmp_pcie_create(dev, child, id, serdes, cfg);
+		ret = qmp_pcie_create(dev, child, serdes, cfg);
 		if (ret) {
 			dev_err(dev, "failed to create lane%d phy, %d\n",
 				id, ret);
-- 
GitLab


From 393ed5d515494250712dd1703418bb541765afe3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:27 +0100
Subject: [PATCH 1233/4122] phy: qcom-qmp-pcie: clean up device-tree parsing

Since the QMP driver split there will be at most a single child node so
drop the obsolete iteration construct.

While at it, drop the verbose error logging that would have been
printed also on probe deferrals.

Note that there's no need to check if there are additional child nodes
(the kernel is not a devicetree validator), but let's return an error if
there are no child nodes at all for now.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 34 +++++++-----------------
 1 file changed, 9 insertions(+), 25 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 667a87e7c9179..bc96518ad6b0e 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2250,7 +2250,6 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	void __iomem *serdes;
 	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_pcie *qmp;
-	int num, id;
 	int ret;
 
 	qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
@@ -2283,34 +2282,19 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	num = of_get_available_child_count(dev->of_node);
-	/* do we have a rogue child node ? */
-	if (num > 1)
+	child = of_get_next_available_child(dev->of_node, NULL);
+	if (!child)
 		return -EINVAL;
 
-	id = 0;
-	for_each_available_child_of_node(dev->of_node, child) {
-		/* Create per-lane phy */
-		ret = qmp_pcie_create(dev, child, serdes, cfg);
-		if (ret) {
-			dev_err(dev, "failed to create lane%d phy, %d\n",
-				id, ret);
-			goto err_node_put;
-		}
+	ret = qmp_pcie_create(dev, child, serdes, cfg);
+	if (ret)
+		goto err_node_put;
 
-		/*
-		 * Register the pipe clock provided by phy.
-		 * See function description to see details of this pipe clock.
-		 */
-		ret = phy_pipe_clk_register(qmp, child);
-		if (ret) {
-			dev_err(qmp->dev,
-				"failed to register pipe clock source\n");
-			goto err_node_put;
-		}
+	ret = phy_pipe_clk_register(qmp, child);
+	if (ret)
+		goto err_node_put;
 
-		id++;
-	}
+	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
-- 
GitLab


From 52b997732eb6bf64df242403c7510520d89be266 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:28 +0100
Subject: [PATCH 1234/4122] phy: qcom-qmp-pcie: clean up probe initialisation

Stop abusing the driver data pointer and instead pass the driver state
structure directly to the initialisation helpers during probe.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 49 +++++++++++-------------
 1 file changed, 23 insertions(+), 26 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index bc96518ad6b0e..e30cbc94cbf6b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2037,9 +2037,10 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 	return 0;
 }
 
-static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_pcie_vreg_init(struct qmp_pcie *qmp)
 {
-	struct qmp_pcie *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_vregs;
 	int i;
 
@@ -2053,9 +2054,10 @@ static int qmp_pcie_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 	return devm_regulator_bulk_get(dev, num, qmp->vregs);
 }
 
-static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_pcie_reset_init(struct qmp_pcie *qmp)
 {
-	struct qmp_pcie *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int i;
 	int ret;
 
@@ -2074,9 +2076,10 @@ static int qmp_pcie_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg
 	return 0;
 }
 
-static int qmp_pcie_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_pcie_clk_init(struct qmp_pcie *qmp)
 {
-	struct qmp_pcie *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_clks;
 	int i;
 
@@ -2164,18 +2167,15 @@ static const struct phy_ops qmp_pcie_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int qmp_pcie_create(struct device *dev, struct device_node *np,
-			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
 {
-	struct qmp_pcie *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	struct phy *generic_phy;
 	int ret;
 
 	qmp->mode = PHY_MODE_PCIE_RC;
 
-	qmp->cfg = cfg;
-	qmp->serdes = serdes;
-
 	/*
 	 * Get memory resources for the PHY:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
@@ -2247,8 +2247,6 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
-	void __iomem *serdes;
-	const struct qmp_phy_cfg *cfg = NULL;
 	struct qmp_pcie *qmp;
 	int ret;
 
@@ -2257,28 +2255,27 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	qmp->dev = dev;
-	dev_set_drvdata(dev, qmp);
 
-	cfg = of_device_get_match_data(dev);
-	if (!cfg)
+	qmp->cfg = of_device_get_match_data(dev);
+	if (!qmp->cfg)
 		return -EINVAL;
 
-	WARN_ON_ONCE(!cfg->pwrdn_ctrl);
-	WARN_ON_ONCE(!cfg->phy_status);
+	WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl);
+	WARN_ON_ONCE(!qmp->cfg->phy_status);
 
-	serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(serdes))
-		return PTR_ERR(serdes);
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
-	ret = qmp_pcie_clk_init(dev, cfg);
+	ret = qmp_pcie_clk_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_pcie_reset_init(dev, cfg);
+	ret = qmp_pcie_reset_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_pcie_vreg_init(dev, cfg);
+	ret = qmp_pcie_vreg_init(qmp);
 	if (ret)
 		return ret;
 
@@ -2286,7 +2283,7 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (!child)
 		return -EINVAL;
 
-	ret = qmp_pcie_create(dev, child, serdes, cfg);
+	ret = qmp_pcie_create(qmp, child);
 	if (ret)
 		goto err_node_put;
 
-- 
GitLab


From 63bf101ae1912570260912087bfd7b1cf420c3dc Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:29 +0100
Subject: [PATCH 1235/4122] phy: qcom-qmp-pcie: rename PHY ops structure

Rename the PHY operation structure so that it has a "phy_ops" suffix and
move it next to the implementation.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index e30cbc94cbf6b..bd946438e3c37 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2037,6 +2037,13 @@ static int qmp_pcie_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 	return 0;
 }
 
+static const struct phy_ops qmp_pcie_phy_ops = {
+	.power_on	= qmp_pcie_enable,
+	.power_off	= qmp_pcie_disable,
+	.set_mode	= qmp_pcie_set_mode,
+	.owner		= THIS_MODULE,
+};
+
 static int qmp_pcie_vreg_init(struct qmp_pcie *qmp)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -2160,13 +2167,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static const struct phy_ops qmp_pcie_ops = {
-	.power_on	= qmp_pcie_enable,
-	.power_off	= qmp_pcie_disable,
-	.set_mode	= qmp_pcie_set_mode,
-	.owner		= THIS_MODULE,
-};
-
 static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -2229,7 +2229,7 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
 				     "failed to get pipe clock\n");
 	}
 
-	generic_phy = devm_phy_create(dev, np, &qmp_pcie_ops);
+	generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
-- 
GitLab


From f8b641146484b6be33c291fa8279ffe423e169d9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:30 +0100
Subject: [PATCH 1236/4122] phy: qcom-qmp-pcie: clean up PHY lane init

Clean up the PHY lane initialisation somewhat by adding further
temporary variables and programming both tx and rx for the second lane
after the first lane.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index bd946438e3c37..dd7e72424fc04 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1835,18 +1835,19 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	void __iomem *tx = qmp->tx;
 	void __iomem *rx = qmp->rx;
+	void __iomem *tx2 = qmp->tx2;
+	void __iomem *rx2 = qmp->rx2;
 
 	if (!tables)
 		return;
 
 	qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qmp->tx2, tables->tx, tables->tx_num, 2);
-
 	qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1);
-	if (cfg->lanes >= 2)
-		qmp_pcie_configure_lane(qmp->rx2, tables->rx, tables->rx_num, 2);
+
+	if (cfg->lanes >= 2) {
+		qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2);
+		qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2);
+	}
 }
 
 static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
-- 
GitLab


From d8c9a1e9c223951836692eb8c3810e18ae06ffc2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:31 +0100
Subject: [PATCH 1237/4122] phy: qcom-qmp-pcie: use shorter tables identifiers

The QMP drivers all use 'tbl' to refer to their register initialisation
tables.

For consistency use 'tbls' rather than 'tables' to refer to the new
aggregate table structures.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221105145939.20318-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 90 ++++++++++++------------
 1 file changed, 45 insertions(+), 45 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index dd7e72424fc04..a977f2bbd8317 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1313,7 +1313,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] =
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08),
 };
 
-struct qmp_phy_cfg_tables {
+struct qmp_phy_cfg_tbls {
 	const struct qmp_phy_init_tbl *serdes;
 	int serdes_num;
 	const struct qmp_phy_init_tbl *tx;
@@ -1331,7 +1331,7 @@ struct qmp_phy_cfg {
 	int lanes;
 
 	/* Main init sequence for PHY blocks - serdes, tx, rx, pcs */
-	const struct qmp_phy_cfg_tables tables;
+	const struct qmp_phy_cfg_tbls tbls;
 	/*
 	 * Additional init sequences for PHY blocks, providing additional
 	 * register programming. They are used for providing separate sequences
@@ -1339,8 +1339,8 @@ struct qmp_phy_cfg {
 	 *
 	 * If EP mode is not supported, both tables can be left unset.
 	 */
-	const struct qmp_phy_cfg_tables *tables_rc;
-	const struct qmp_phy_cfg_tables *tables_ep;
+	const struct qmp_phy_cfg_tbls *tbls_rc;
+	const struct qmp_phy_cfg_tbls *tbls_ep;
 
 	/* clock ids to be requested */
 	const char * const *clk_list;
@@ -1442,7 +1442,7 @@ static const char * const sdm845_pciephy_reset_l[] = {
 static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= ipq8074_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(ipq8074_pcie_serdes_tbl),
 		.tx		= ipq8074_pcie_tx_tbl,
@@ -1467,7 +1467,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= ipq8074_pcie_gen3_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(ipq8074_pcie_gen3_serdes_tbl),
 		.tx		= ipq8074_pcie_gen3_tx_tbl,
@@ -1494,7 +1494,7 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= ipq6018_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(ipq6018_pcie_serdes_tbl),
 		.tx		= ipq6018_pcie_tx_tbl,
@@ -1521,7 +1521,7 @@ static const struct qmp_phy_cfg ipq6018_pciephy_cfg = {
 static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sdm845_qmp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sdm845_qmp_pcie_serdes_tbl),
 		.tx		= sdm845_qmp_pcie_tx_tbl,
@@ -1548,7 +1548,7 @@ static const struct qmp_phy_cfg sdm845_qmp_pciephy_cfg = {
 static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sdm845_qhp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sdm845_qhp_pcie_serdes_tbl),
 		.tx		= sdm845_qhp_pcie_tx_tbl,
@@ -1573,7 +1573,7 @@ static const struct qmp_phy_cfg sdm845_qhp_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sm8250_qmp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
 		.tx		= sm8250_qmp_pcie_tx_tbl,
@@ -1585,7 +1585,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 		.pcs_misc	= sm8250_qmp_pcie_pcs_misc_tbl,
 		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
 	},
-	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
 		.serdes		= sm8250_qmp_gen3x1_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8250_qmp_gen3x1_pcie_serdes_tbl),
 		.rx		= sm8250_qmp_gen3x1_pcie_rx_tbl,
@@ -1610,7 +1610,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x1_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 	.lanes			= 2,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sm8250_qmp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8250_qmp_pcie_serdes_tbl),
 		.tx		= sm8250_qmp_pcie_tx_tbl,
@@ -1622,7 +1622,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 		.pcs_misc	= sm8250_qmp_pcie_pcs_misc_tbl,
 		.pcs_misc_num	= ARRAY_SIZE(sm8250_qmp_pcie_pcs_misc_tbl),
 	},
-	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
 		.tx		= sm8250_qmp_gen3x2_pcie_tx_tbl,
 		.tx_num		= ARRAY_SIZE(sm8250_qmp_gen3x2_pcie_tx_tbl),
 		.rx		= sm8250_qmp_gen3x2_pcie_rx_tbl,
@@ -1647,7 +1647,7 @@ static const struct qmp_phy_cfg sm8250_qmp_gen3x2_pciephy_cfg = {
 static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= msm8998_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(msm8998_pcie_serdes_tbl),
 		.tx		= msm8998_pcie_tx_tbl,
@@ -1674,7 +1674,7 @@ static const struct qmp_phy_cfg msm8998_pciephy_cfg = {
 static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sc8180x_qmp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sc8180x_qmp_pcie_serdes_tbl),
 		.tx		= sc8180x_qmp_pcie_tx_tbl,
@@ -1701,7 +1701,7 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.lanes			= 2,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sdx55_qmp_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sdx55_qmp_pcie_serdes_tbl),
 		.tx		= sdx55_qmp_pcie_tx_tbl,
@@ -1728,7 +1728,7 @@ static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 	.lanes			= 1,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sm8450_qmp_gen3x1_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen3x1_pcie_serdes_tbl),
 		.tx		= sm8450_qmp_gen3x1_pcie_tx_tbl,
@@ -1755,7 +1755,7 @@ static const struct qmp_phy_cfg sm8450_qmp_gen3x1_pciephy_cfg = {
 static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 	.lanes			= 2,
 
-	.tables = {
+	.tbls = {
 		.serdes		= sm8450_qmp_gen4x2_pcie_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_serdes_tbl),
 		.tx		= sm8450_qmp_gen4x2_pcie_tx_tbl,
@@ -1768,14 +1768,14 @@ static const struct qmp_phy_cfg sm8450_qmp_gen4x2_pciephy_cfg = {
 		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_pcs_misc_tbl),
 	},
 
-	.tables_rc = &(const struct qmp_phy_cfg_tables) {
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
 		.serdes		= sm8450_qmp_gen4x2_pcie_rc_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_serdes_tbl),
 		.pcs_misc	= sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl,
 		.pcs_misc_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl),
 	},
 
-	.tables_ep = &(const struct qmp_phy_cfg_tables) {
+	.tbls_ep = &(const struct qmp_phy_cfg_tbls) {
 		.serdes		= sm8450_qmp_gen4x2_pcie_ep_serdes_tbl,
 		.serdes_num	= ARRAY_SIZE(sm8450_qmp_gen4x2_pcie_ep_serdes_tbl),
 		.pcs_misc	= sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl,
@@ -1820,17 +1820,17 @@ static void qmp_pcie_configure(void __iomem *base,
 	qmp_pcie_configure_lane(base, tbl, num, 0xff);
 }
 
-static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
 	void __iomem *serdes = qmp->serdes;
 
-	if (!tables)
+	if (!tbls)
 		return;
 
-	qmp_pcie_configure(serdes, tables->serdes, tables->serdes_num);
+	qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num);
 }
 
-static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	void __iomem *tx = qmp->tx;
@@ -1838,28 +1838,28 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t
 	void __iomem *tx2 = qmp->tx2;
 	void __iomem *rx2 = qmp->rx2;
 
-	if (!tables)
+	if (!tbls)
 		return;
 
-	qmp_pcie_configure_lane(tx, tables->tx, tables->tx_num, 1);
-	qmp_pcie_configure_lane(rx, tables->rx, tables->rx_num, 1);
+	qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
+	qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
 
 	if (cfg->lanes >= 2) {
-		qmp_pcie_configure_lane(tx2, tables->tx, tables->tx_num, 2);
-		qmp_pcie_configure_lane(rx2, tables->rx, tables->rx_num, 2);
+		qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2);
+		qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2);
 	}
 }
 
-static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tables *tables)
+static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
 	void __iomem *pcs = qmp->pcs;
 	void __iomem *pcs_misc = qmp->pcs_misc;
 
-	if (!tables)
+	if (!tbls)
 		return;
 
-	qmp_pcie_configure(pcs, tables->pcs, tables->pcs_num);
-	qmp_pcie_configure(pcs_misc, tables->pcs_misc, tables->pcs_misc_num);
+	qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num);
+	qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num);
 }
 
 static int qmp_pcie_init(struct phy *phy)
@@ -1918,7 +1918,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 {
 	struct qmp_pcie *qmp = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
-	const struct qmp_phy_cfg_tables *mode_tables;
+	const struct qmp_phy_cfg_tbls *mode_tbls;
 	void __iomem *pcs = qmp->pcs;
 	void __iomem *status;
 	unsigned int mask, val;
@@ -1928,12 +1928,12 @@ static int qmp_pcie_power_on(struct phy *phy)
 			cfg->pwrdn_ctrl);
 
 	if (qmp->mode == PHY_MODE_PCIE_RC)
-		mode_tables = cfg->tables_rc;
+		mode_tbls = cfg->tbls_rc;
 	else
-		mode_tables = cfg->tables_ep;
+		mode_tbls = cfg->tbls_ep;
 
-	qmp_pcie_serdes_init(qmp, &cfg->tables);
-	qmp_pcie_serdes_init(qmp, mode_tables);
+	qmp_pcie_serdes_init(qmp, &cfg->tbls);
+	qmp_pcie_serdes_init(qmp, mode_tbls);
 
 	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
@@ -1942,11 +1942,11 @@ static int qmp_pcie_power_on(struct phy *phy)
 	}
 
 	/* Tx, Rx, and PCS configurations */
-	qmp_pcie_lanes_init(qmp, &cfg->tables);
-	qmp_pcie_lanes_init(qmp, mode_tables);
+	qmp_pcie_lanes_init(qmp, &cfg->tbls);
+	qmp_pcie_lanes_init(qmp, mode_tbls);
 
-	qmp_pcie_pcs_init(qmp, &cfg->tables);
-	qmp_pcie_pcs_init(qmp, mode_tables);
+	qmp_pcie_pcs_init(qmp, &cfg->tbls);
+	qmp_pcie_pcs_init(qmp, mode_tbls);
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -2217,9 +2217,9 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
 		qmp->pcs_misc = qmp->pcs + 0x400;
 
 	if (IS_ERR(qmp->pcs_misc)) {
-		if (cfg->tables.pcs_misc ||
-		    (cfg->tables_rc && cfg->tables_rc->pcs_misc) ||
-		    (cfg->tables_ep && cfg->tables_ep->pcs_misc)) {
+		if (cfg->tbls.pcs_misc ||
+		    (cfg->tbls_rc && cfg->tbls_rc->pcs_misc) ||
+		    (cfg->tbls_ep && cfg->tbls_ep->pcs_misc)) {
 			return PTR_ERR(qmp->pcs_misc);
 		}
 	}
-- 
GitLab


From ec7bc1b40b363c46af0b17d4cb0ea4dc21b7cf9b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:32 +0100
Subject: [PATCH 1238/4122] phy: qcom-qmp-pcie: add register init helper

Generalise the serdes initialisation helper so that it can be used to
initialise all the PHY registers (e.g. serdes, tx, rx, pcs).

Note that this defers the ungating of the PIPE clock somewhat, which is
fine as it isn't needed until starting the PHY.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 37 +++++-------------------
 1 file changed, 8 insertions(+), 29 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index a977f2bbd8317..09999d5b52689 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1820,27 +1820,22 @@ static void qmp_pcie_configure(void __iomem *base,
 	qmp_pcie_configure_lane(base, tbl, num, 0xff);
 }
 
-static void qmp_pcie_serdes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
-{
-	void __iomem *serdes = qmp->serdes;
-
-	if (!tbls)
-		return;
-
-	qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num);
-}
-
-static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
+static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *serdes = qmp->serdes;
 	void __iomem *tx = qmp->tx;
 	void __iomem *rx = qmp->rx;
 	void __iomem *tx2 = qmp->tx2;
 	void __iomem *rx2 = qmp->rx2;
+	void __iomem *pcs = qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 
 	if (!tbls)
 		return;
 
+	qmp_pcie_configure(serdes, tbls->serdes, tbls->serdes_num);
+
 	qmp_pcie_configure_lane(tx, tbls->tx, tbls->tx_num, 1);
 	qmp_pcie_configure_lane(rx, tbls->rx, tbls->rx_num, 1);
 
@@ -1848,15 +1843,6 @@ static void qmp_pcie_lanes_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_t
 		qmp_pcie_configure_lane(tx2, tbls->tx, tbls->tx_num, 2);
 		qmp_pcie_configure_lane(rx2, tbls->rx, tbls->rx_num, 2);
 	}
-}
-
-static void qmp_pcie_pcs_init(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
-{
-	void __iomem *pcs = qmp->pcs;
-	void __iomem *pcs_misc = qmp->pcs_misc;
-
-	if (!tbls)
-		return;
 
 	qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num);
 	qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num);
@@ -1932,8 +1918,8 @@ static int qmp_pcie_power_on(struct phy *phy)
 	else
 		mode_tbls = cfg->tbls_ep;
 
-	qmp_pcie_serdes_init(qmp, &cfg->tbls);
-	qmp_pcie_serdes_init(qmp, mode_tbls);
+	qmp_pcie_init_registers(qmp, &cfg->tbls);
+	qmp_pcie_init_registers(qmp, mode_tbls);
 
 	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
@@ -1941,13 +1927,6 @@ static int qmp_pcie_power_on(struct phy *phy)
 		return ret;
 	}
 
-	/* Tx, Rx, and PCS configurations */
-	qmp_pcie_lanes_init(qmp, &cfg->tbls);
-	qmp_pcie_lanes_init(qmp, mode_tbls);
-
-	qmp_pcie_pcs_init(qmp, &cfg->tbls);
-	qmp_pcie_pcs_init(qmp, mode_tbls);
-
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
-- 
GitLab


From dcb93f47dd14cb0c206424b3258399b4cd205b20 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:33 +0100
Subject: [PATCH 1239/4122] dt-bindings: phy: qcom,qmp-pcie: rename current
 bindings

The current QMP PCIe PHY bindings are based on the original MSM8996
binding which provided multiple PHYs per IP block and these in turn were
described by child nodes.

Later QMP PCIe PHY blocks only provide a single PHY and the remnant
child node does not really reflect the hardware.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers (e.g. does not include the per lane PCS
registers).

In preparation for adding new bindings for SC8280XP which further
bindings can be based on, rename the current schema file after IPQ8074,
which was the first SoC added to the bindings after MSM8996 (which has
already been split out), and add a reference to the SC8280XP bindings.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 ...om,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)
 rename Documentation/devicetree/bindings/phy/{qcom,qmp-pcie-phy.yaml => qcom,ipq8074-qmp-pcie-phy.yaml} (96%)

diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml
similarity index 96%
rename from Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml
rename to Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml
index 324ad7d03a383..62045dcfb20c5 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,ipq8074-qmp-pcie-phy.yaml
@@ -1,10 +1,10 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/phy/qcom,qmp-pcie-phy.yaml#
+$id: http://devicetree.org/schemas/phy/qcom,ipq8074-qmp-pcie-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm QMP PHY controller (PCIe)
+title: Qualcomm QMP PHY controller (PCIe, IPQ8074)
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
@@ -13,6 +13,9 @@ description:
   QMP PHY controller supports physical layer functionality for a number of
   controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
 
+  Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see
+  qcom,sc8280xp-qmp-pcie-phy.yaml.
+
 properties:
   compatible:
     enum:
-- 
GitLab


From 306382305c5cefce892784da8686c242956f5fd2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:34 +0100
Subject: [PATCH 1240/4122] dt-bindings: phy: qcom,qmp-pcie: add sc8280xp
 bindings

Add bindings for the PCIe QMP PHYs found on SC8280XP.

The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in
4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as
PCIe2A and PCIe2B).

The configuration for a specific system can be read from a TCSR register.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qcom,sc8280xp-qmp-pcie-phy.yaml       | 165 ++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml

diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml
new file mode 100644
index 0000000000000..80aa8d2507fb6
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-pcie-phy.yaml
@@ -0,0 +1,165 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-pcie-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QMP PHY controller (PCIe, SC8280XP)
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description:
+  The QMP PHY controller supports physical layer functionality for a number of
+  controllers on Qualcomm chipsets, such as, PCIe, UFS, and USB.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc8280xp-qmp-gen3x1-pcie-phy
+      - qcom,sc8280xp-qmp-gen3x2-pcie-phy
+      - qcom,sc8280xp-qmp-gen3x4-pcie-phy
+
+  reg:
+    minItems: 1
+    maxItems: 2
+
+  clocks:
+    maxItems: 6
+
+  clock-names:
+    items:
+      - const: aux
+      - const: cfg_ahb
+      - const: ref
+      - const: rchng
+      - const: pipe
+      - const: pipediv2
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: phy
+
+  vdda-phy-supply: true
+
+  vdda-pll-supply: true
+
+  qcom,4ln-config-sel:
+    description: PCIe 4-lane configuration
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    items:
+      - items:
+          - description: phandle of TCSR syscon
+          - description: offset of PCIe 4-lane configuration register
+          - description: offset of configuration bit for this PHY
+
+  "#clock-cells":
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+  "#phy-cells":
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - reset-names
+  - vdda-phy-supply
+  - vdda-pll-supply
+  - "#clock-cells"
+  - clock-output-names
+  - "#phy-cells"
+
+additionalProperties: false
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,sc8280xp-qmp-gen3x4-pcie-phy
+    then:
+      properties:
+        reg:
+          items:
+            - description: port a
+            - description: port b
+      required:
+        - qcom,4ln-config-sel
+    else:
+      properties:
+        reg:
+          maxItems: 1
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+
+    pcie2b_phy: phy@1c18000 {
+      compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy";
+      reg = <0x01c18000 0x2000>;
+
+      clocks = <&gcc GCC_PCIE_2B_AUX_CLK>,
+               <&gcc GCC_PCIE_2B_CFG_AHB_CLK>,
+               <&gcc GCC_PCIE_2A2B_CLKREF_CLK>,
+               <&gcc GCC_PCIE2B_PHY_RCHNG_CLK>,
+               <&gcc GCC_PCIE_2B_PIPE_CLK>,
+               <&gcc GCC_PCIE_2B_PIPEDIV2_CLK>;
+      clock-names = "aux", "cfg_ahb", "ref", "rchng",
+                    "pipe", "pipediv2";
+
+      power-domains = <&gcc PCIE_2B_GDSC>;
+
+      resets = <&gcc GCC_PCIE_2B_PHY_BCR>;
+      reset-names = "phy";
+
+      vdda-phy-supply = <&vreg_l6d>;
+      vdda-pll-supply = <&vreg_l4d>;
+
+      #clock-cells = <0>;
+      clock-output-names = "pcie_2b_pipe_clk";
+
+      #phy-cells = <0>;
+    };
+
+    pcie2a_phy: phy@1c24000 {
+      compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy";
+      reg = <0x01c24000 0x2000>, <0x01c26000 0x2000>;
+
+      clocks = <&gcc GCC_PCIE_2A_AUX_CLK>,
+               <&gcc GCC_PCIE_2A_CFG_AHB_CLK>,
+               <&gcc GCC_PCIE_2A2B_CLKREF_CLK>,
+               <&gcc GCC_PCIE2A_PHY_RCHNG_CLK>,
+               <&gcc GCC_PCIE_2A_PIPE_CLK>,
+               <&gcc GCC_PCIE_2A_PIPEDIV2_CLK>;
+      clock-names = "aux", "cfg_ahb", "ref", "rchng",
+                    "pipe", "pipediv2";
+
+      power-domains = <&gcc PCIE_2A_GDSC>;
+
+      resets = <&gcc GCC_PCIE_2A_PHY_BCR>;
+      reset-names = "phy";
+
+      vdda-phy-supply = <&vreg_l6d>;
+      vdda-pll-supply = <&vreg_l4d>;
+
+      qcom,4ln-config-sel = <&tcsr 0xa044 0>;
+
+      #clock-cells = <0>;
+      clock-output-names = "pcie_2a_pipe_clk";
+
+      #phy-cells = <0>;
+    };
-- 
GitLab


From 7bc609e34899dd3065fc9cbc73bb8a4902e257df Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:35 +0100
Subject: [PATCH 1241/4122] phy: qcom-qmp-pcie: restructure PHY creation

In preparation for supporting devicetree bindings which do not use a
child node, move the PHY creation to probe() proper and parse the serdes
resource in what is now the legacy devicetree helper.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 36 +++++++++++-------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 09999d5b52689..8af84ff755ab1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -2147,14 +2147,15 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
+static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np)
 {
+	struct platform_device *pdev = to_platform_device(qmp->dev);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	struct device *dev = qmp->dev;
-	struct phy *generic_phy;
-	int ret;
 
-	qmp->mode = PHY_MODE_PCIE_RC;
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
 	/*
 	 * Get memory resources for the PHY:
@@ -2209,16 +2210,6 @@ static int qmp_pcie_create(struct qmp_pcie *qmp, struct device_node *np)
 				     "failed to get pipe clock\n");
 	}
 
-	generic_phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops);
-	if (IS_ERR(generic_phy)) {
-		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create PHY: %d\n", ret);
-		return ret;
-	}
-
-	qmp->phy = generic_phy;
-	phy_set_drvdata(generic_phy, qmp);
-
 	return 0;
 }
 
@@ -2243,10 +2234,6 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	WARN_ON_ONCE(!qmp->cfg->pwrdn_ctrl);
 	WARN_ON_ONCE(!qmp->cfg->phy_status);
 
-	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(qmp->serdes))
-		return PTR_ERR(qmp->serdes);
-
 	ret = qmp_pcie_clk_init(qmp);
 	if (ret)
 		return ret;
@@ -2263,7 +2250,7 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (!child)
 		return -EINVAL;
 
-	ret = qmp_pcie_create(qmp, child);
+	ret = qmp_pcie_parse_dt_legacy(qmp, child);
 	if (ret)
 		goto err_node_put;
 
@@ -2271,6 +2258,17 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_node_put;
 
+	qmp->mode = PHY_MODE_PCIE_RC;
+
+	qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops);
+	if (IS_ERR(qmp->phy)) {
+		ret = PTR_ERR(qmp->phy);
+		dev_err(dev, "failed to create PHY: %d\n", ret);
+		goto err_node_put;
+	}
+
+	phy_set_drvdata(qmp->phy, qmp);
+
 	of_node_put(child);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
-- 
GitLab


From fffdeaf853d8088c5149fc776974344b0f815dc8 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:36 +0100
Subject: [PATCH 1242/4122] phy: qcom-qmp-pcie: fix initialisation reset

Add the missing delay after asserting reset. This is specifically needed
for the reset to have any effect on SC8280XP.

The vendor driver uses a 1 ms delay, but that seems a bit excessive.
Instead use a 200 us delay which appears to be more than enough and also
matches the UFS reset delay added by commit 870b1279c7a0 ("scsi:
ufs-qcom: Add reset control support for host controller").

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 8af84ff755ab1..06844552922ee 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1866,6 +1866,8 @@ static int qmp_pcie_init(struct phy *phy)
 		goto err_disable_regulators;
 	}
 
+	usleep_range(200, 300);
+
 	ret = reset_control_bulk_deassert(cfg->num_resets, qmp->resets);
 	if (ret) {
 		dev_err(qmp->dev, "reset deassert failed\n");
-- 
GitLab


From 9e420f1e7eddbbb6b73a78aca2c280ddd8a63096 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:37 +0100
Subject: [PATCH 1243/4122] phy: qcom-qmp-pcie: add support for pipediv2 clock

Some QMP PHYs have a second fixed-divider pipe clock that needs to be
enabled along with the pipe clock.

Add support for an optional "pipediv2" clock.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-15-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 25 ++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 06844552922ee..d671b05c73dde 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1378,8 +1378,10 @@ struct qmp_pcie {
 	void __iomem *tx2;
 	void __iomem *rx2;
 
-	struct clk *pipe_clk;
 	struct clk_bulk_data *clks;
+	struct clk_bulk_data pipe_clks[2];
+	int num_pipe_clks;
+
 	struct reset_control_bulk_data *resets;
 	struct regulator_bulk_data *vregs;
 
@@ -1923,11 +1925,9 @@ static int qmp_pcie_power_on(struct phy *phy)
 	qmp_pcie_init_registers(qmp, &cfg->tbls);
 	qmp_pcie_init_registers(qmp, mode_tbls);
 
-	ret = clk_prepare_enable(qmp->pipe_clk);
-	if (ret) {
-		dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
+	ret = clk_bulk_prepare_enable(qmp->num_pipe_clks, qmp->pipe_clks);
+	if (ret)
 		return ret;
-	}
 
 	/* Pull PHY out of reset state */
 	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -1950,7 +1950,7 @@ static int qmp_pcie_power_on(struct phy *phy)
 	return 0;
 
 err_disable_pipe_clk:
-	clk_disable_unprepare(qmp->pipe_clk);
+	clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks);
 
 	return ret;
 }
@@ -1960,7 +1960,7 @@ static int qmp_pcie_power_off(struct phy *phy)
 	struct qmp_pcie *qmp = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	clk_disable_unprepare(qmp->pipe_clk);
+	clk_bulk_disable_unprepare(qmp->num_pipe_clks, qmp->pipe_clks);
 
 	/* PHY reset */
 	qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
@@ -2154,6 +2154,7 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np
 	struct platform_device *pdev = to_platform_device(qmp->dev);
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	struct device *dev = qmp->dev;
+	struct clk *clk;
 
 	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(qmp->serdes))
@@ -2206,12 +2207,16 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np
 		}
 	}
 
-	qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
-	if (IS_ERR(qmp->pipe_clk)) {
-		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+	clk = devm_get_clk_from_child(dev, np, NULL);
+	if (IS_ERR(clk)) {
+		return dev_err_probe(dev, PTR_ERR(clk),
 				     "failed to get pipe clock\n");
 	}
 
+	qmp->num_pipe_clks = 1;
+	qmp->pipe_clks[0].id = "pipe";
+	qmp->pipe_clks[0].clk = clk;
+
 	return 0;
 }
 
-- 
GitLab


From d0a846ba28ddb589fc5a5741ae566e19c1034034 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:38 +0100
Subject: [PATCH 1244/4122] phy: qcom-qmp-pcie: add support for sc8280xp

Add support for the single and dual-lane PHYs found on SC8280XP.

Note that the SC8280XP binding does not try to describe every register
subregion and instead the driver holds the corresponding offsets.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-16-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c      | 299 +++++++++++++++++-
 .../phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h   |   2 +
 2 files changed, 291 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index d671b05c73dde..f507a67a8361b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -834,6 +834,143 @@ static const struct qmp_phy_init_tbl sc8180x_qmp_pcie_pcs_misc_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
 };
 
+static const struct qmp_phy_init_tbl sc8280xp_qmp_pcie_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_EN_CENTER, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER1, 0x31),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_PER2, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE0, 0xde),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE0, 0x07),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE1_MODE1, 0x4c),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SSC_STEP_SIZE2_MODE1, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_ENABLE1, 0x90),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x06),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x16),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x36),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x36),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0x0a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x1a),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x14),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x68),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE0, 0x55),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE0, 0x55),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE0, 0x03),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START1_MODE1, 0xab),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START2_MODE1, 0xaa),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_DIV_FRAC_START3_MODE1, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE0, 0x24),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE1_MODE1, 0xb4),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE2_MODE1, 0x03),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CLK_SELECT, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x01),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_CORECLK_DIV_MODE1, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xb9),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0x94),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x18),
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_BUF_ENABLE, 0x07),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x1d),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbf),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_TX_ADAPT_POST_THRESH, 0xf0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x07),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x77),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1, 0x00),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_EQ_CONFIG2, 0x0f),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_tx_tbl[] = {
+	QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x02, 1),
+	QMP_PHY_INIT_CFG_LANE(QSERDES_V5_TX_PI_QEC_CTRL, 0x04, 2),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xd5),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x11),
+	QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rx_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0xff),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0x7f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xd8),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xdc),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0x5c),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa6),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x34),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf0),
+	QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_REFGEN_REQ_CONFIG1, 0x05),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x88),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_RATE_SLEW_CNTRL1, 0x0b),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x0f),
+};
+
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl[] = {
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2, 0x1d),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4, 0x07),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
+	QMP_PHY_INIT_CFG(QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+};
+
 static const struct qmp_phy_init_tbl sm8250_qmp_pcie_serdes_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_V4_COM_SYSCLK_EN_SEL, 0x08),
 	QMP_PHY_INIT_CFG(QSERDES_V4_COM_CLK_SELECT, 0x34),
@@ -1313,6 +1450,16 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_pcs_misc_tbl[] =
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x08),
 };
 
+struct qmp_pcie_offsets {
+	u16 serdes;
+	u16 pcs;
+	u16 pcs_misc;
+	u16 tx;
+	u16 rx;
+	u16 tx2;
+	u16 rx2;
+};
+
 struct qmp_phy_cfg_tbls {
 	const struct qmp_phy_init_tbl *serdes;
 	int serdes_num;
@@ -1330,6 +1477,8 @@ struct qmp_phy_cfg_tbls {
 struct qmp_phy_cfg {
 	int lanes;
 
+	const struct qmp_pcie_offsets *offsets;
+
 	/* Main init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_cfg_tbls tbls;
 	/*
@@ -1422,6 +1571,9 @@ static const char * const msm8996_phy_clk_l[] = {
 	"aux", "cfg_ahb", "ref",
 };
 
+static const char * const sc8280xp_pciephy_clk_l[] = {
+	"aux", "cfg_ahb", "ref", "rchng",
+};
 
 static const char * const sdm845_pciephy_clk_l[] = {
 	"aux", "cfg_ahb", "ref", "refgen",
@@ -1441,6 +1593,16 @@ static const char * const sdm845_pciephy_reset_l[] = {
 	"phy",
 };
 
+static const struct qmp_pcie_offsets qmp_pcie_offsets_v5 = {
+	.serdes		= 0,
+	.pcs		= 0x0200,
+	.pcs_misc	= 0x0600,
+	.tx		= 0x0e00,
+	.rx		= 0x1000,
+	.tx2		= 0x1600,
+	.rx2		= 0x1800,
+};
+
 static const struct qmp_phy_cfg ipq8074_pciephy_cfg = {
 	.lanes			= 1,
 
@@ -1700,6 +1862,76 @@ static const struct qmp_phy_cfg sc8180x_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 };
 
+static const struct qmp_phy_cfg sc8280xp_qmp_gen3x1_pciephy_cfg = {
+	.lanes			= 1,
+
+	.offsets		= &qmp_pcie_offsets_v5,
+
+	.tbls = {
+		.serdes		= sc8280xp_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl),
+		.tx		= sc8280xp_qmp_gen3x1_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_tx_tbl),
+		.rx		= sc8280xp_qmp_gen3x1_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rx_tbl),
+		.pcs		= sc8280xp_qmp_gen3x1_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_tbl),
+		.pcs_misc	= sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_pcs_misc_tbl),
+	},
+
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
+		.serdes		= sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x1_pcie_rc_serdes_tbl),
+	},
+
+	.clk_list		= sc8280xp_pciephy_clk_l,
+	.num_clks		= ARRAY_SIZE(sc8280xp_pciephy_clk_l),
+	.reset_list		= sdm845_pciephy_reset_l,
+	.num_resets		= ARRAY_SIZE(sdm845_pciephy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8250_pcie_regs_layout,
+
+	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
+};
+
+static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = {
+	.lanes			= 2,
+
+	.offsets		= &qmp_pcie_offsets_v5,
+
+	.tbls = {
+		.serdes		= sc8280xp_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl),
+		.tx		= sc8280xp_qmp_gen3x2_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl),
+		.rx		= sc8280xp_qmp_gen3x2_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl),
+		.pcs		= sc8280xp_qmp_gen3x2_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl),
+		.pcs_misc	= sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl),
+	},
+
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
+		.serdes		= sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl),
+	},
+
+	.clk_list		= sc8280xp_pciephy_clk_l,
+	.num_clks		= ARRAY_SIZE(sc8280xp_pciephy_clk_l),
+	.reset_list		= sdm845_pciephy_reset_l,
+	.num_resets		= ARRAY_SIZE(sdm845_pciephy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8250_pcie_regs_layout,
+
+	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
+};
+
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.lanes			= 2,
 
@@ -2220,11 +2452,49 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np
 	return 0;
 }
 
+static int qmp_pcie_parse_dt(struct qmp_pcie *qmp)
+{
+	struct platform_device *pdev = to_platform_device(qmp->dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	const struct qmp_pcie_offsets *offs = cfg->offsets;
+	struct device *dev = qmp->dev;
+	void __iomem *base;
+	int ret;
+
+	if (!offs)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	qmp->serdes = base + offs->serdes;
+	qmp->pcs = base + offs->pcs;
+	qmp->pcs_misc = base + offs->pcs_misc;
+	qmp->tx = base + offs->tx;
+	qmp->rx = base + offs->rx;
+
+	if (cfg->lanes >= 2) {
+		qmp->tx2 = base + offs->tx2;
+		qmp->rx2 = base + offs->rx2;
+	}
+
+	qmp->num_pipe_clks = 2;
+	qmp->pipe_clks[0].id = "pipe";
+	qmp->pipe_clks[1].id = "pipediv2";
+
+	ret = devm_clk_bulk_get(dev, qmp->num_pipe_clks, qmp->pipe_clks);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int qmp_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *child;
 	struct phy_provider *phy_provider;
+	struct device_node *np;
 	struct qmp_pcie *qmp;
 	int ret;
 
@@ -2253,21 +2523,24 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	child = of_get_next_available_child(dev->of_node, NULL);
-	if (!child)
-		return -EINVAL;
-
-	ret = qmp_pcie_parse_dt_legacy(qmp, child);
+	/* Check for legacy binding with child node. */
+	np = of_get_next_available_child(dev->of_node, NULL);
+	if (np) {
+		ret = qmp_pcie_parse_dt_legacy(qmp, np);
+	} else {
+		np = of_node_get(dev->of_node);
+		ret = qmp_pcie_parse_dt(qmp);
+	}
 	if (ret)
 		goto err_node_put;
 
-	ret = phy_pipe_clk_register(qmp, child);
+	ret = phy_pipe_clk_register(qmp, np);
 	if (ret)
 		goto err_node_put;
 
 	qmp->mode = PHY_MODE_PCIE_RC;
 
-	qmp->phy = devm_phy_create(dev, child, &qmp_pcie_phy_ops);
+	qmp->phy = devm_phy_create(dev, np, &qmp_pcie_phy_ops);
 	if (IS_ERR(qmp->phy)) {
 		ret = PTR_ERR(qmp->phy);
 		dev_err(dev, "failed to create PHY: %d\n", ret);
@@ -2276,14 +2549,14 @@ static int qmp_pcie_probe(struct platform_device *pdev)
 
 	phy_set_drvdata(qmp->phy, qmp);
 
-	of_node_put(child);
+	of_node_put(np);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
 	return PTR_ERR_OR_ZERO(phy_provider);
 
 err_node_put:
-	of_node_put(child);
+	of_node_put(np);
 	return ret;
 }
 
@@ -2303,6 +2576,12 @@ static const struct of_device_id qmp_pcie_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sc8180x-qmp-pcie-phy",
 		.data = &sc8180x_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sc8280xp-qmp-gen3x1-pcie-phy",
+		.data = &sc8280xp_qmp_gen3x1_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy",
+		.data = &sc8280xp_qmp_gen3x2_pciephy_cfg,
 	}, {
 		.compatible = "qcom,sdm845-qhp-pcie-phy",
 		.data = &sdm845_qhp_pciephy_cfg,
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h
index 2e19fb3f051e0..a469ae2a10a13 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5.h
@@ -8,6 +8,8 @@
 #define QCOM_PHY_QMP_PCS_PCIE_V5_H_
 
 /* Only for QMP V5 PHY - PCS_PCIE registers */
+#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG2		0x0c
+#define QPHY_V5_PCS_PCIE_POWER_STATE_CONFIG4		0x14
 #define QPHY_V5_PCS_PCIE_ENDPOINT_REFCLK_DRIVE		0x20
 #define QPHY_V5_PCS_PCIE_INT_AUX_CLK_CONFIG1		0x54
 #define QPHY_V5_PCS_PCIE_OSC_DTCT_ACTIONS		0x94
-- 
GitLab


From 6c37a02b25180350ed7bd199c074a79fe6d16e51 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Sat, 5 Nov 2022 15:59:39 +0100
Subject: [PATCH 1245/4122] phy: qcom-qmp-pcie: add support for sc8280xp 4-lane
 PHYs

The PCIe2 and PCIe3 controllers and PHYs on SC8280XP can be used in
4-lane mode or as separate controllers and PHYs in 2-lane mode (e.g. as
PCIe2A and PCIe2B).

Add support for fetching the 4-lane configuration from the TCSR and
programming the lane registers of the second port when in 4-lane mode.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221105145939.20318-17-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/Kconfig             |   1 +
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 117 +++++++++++++++++++++++
 2 files changed, 118 insertions(+)

diff --git a/drivers/phy/qualcomm/Kconfig b/drivers/phy/qualcomm/Kconfig
index 5c98850f5a360..eb9ddc685b385 100644
--- a/drivers/phy/qualcomm/Kconfig
+++ b/drivers/phy/qualcomm/Kconfig
@@ -54,6 +54,7 @@ config PHY_QCOM_QMP
 	tristate "Qualcomm QMP PHY Driver"
 	depends on OF && COMMON_CLK && (ARCH_QCOM || COMPILE_TEST)
 	select GENERIC_PHY
+	select MFD_SYSCON
 	help
 	  Enable this to support the QMP PHY transceiver that is used
 	  with controllers such as PCIe, UFS, and USB on Qualcomm chips.
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index f507a67a8361b..111716e25b173 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -10,6 +10,7 @@
 #include <linux/io.h>
 #include <linux/iopoll.h>
 #include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -17,6 +18,7 @@
 #include <linux/phy/pcie.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 #include <linux/slab.h>
@@ -886,6 +888,10 @@ static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl[] =
 	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x14),
 };
 
+static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl[] = {
+	QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIAS_EN_CLKBUFLR_EN, 0x1c),
+};
+
 static const struct qmp_phy_init_tbl sc8280xp_qmp_gen3x1_pcie_tx_tbl[] = {
 	QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x20),
 	QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x75),
@@ -1491,6 +1497,9 @@ struct qmp_phy_cfg {
 	const struct qmp_phy_cfg_tbls *tbls_rc;
 	const struct qmp_phy_cfg_tbls *tbls_ep;
 
+	const struct qmp_phy_init_tbl *serdes_4ln_tbl;
+	int serdes_4ln_num;
+
 	/* clock ids to be requested */
 	const char * const *clk_list;
 	int num_clks;
@@ -1518,6 +1527,7 @@ struct qmp_pcie {
 	struct device *dev;
 
 	const struct qmp_phy_cfg *cfg;
+	bool tcsr_4ln_config;
 
 	void __iomem *serdes;
 	void __iomem *pcs;
@@ -1527,6 +1537,8 @@ struct qmp_pcie {
 	void __iomem *tx2;
 	void __iomem *rx2;
 
+	void __iomem *port_b;
+
 	struct clk_bulk_data *clks;
 	struct clk_bulk_data pipe_clks[2];
 	int num_pipe_clks;
@@ -1932,6 +1944,44 @@ static const struct qmp_phy_cfg sc8280xp_qmp_gen3x2_pciephy_cfg = {
 	.phy_status		= PHYSTATUS,
 };
 
+static const struct qmp_phy_cfg sc8280xp_qmp_gen3x4_pciephy_cfg = {
+	.lanes			= 4,
+
+	.offsets		= &qmp_pcie_offsets_v5,
+
+	.tbls = {
+		.serdes		= sc8280xp_qmp_pcie_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_pcie_serdes_tbl),
+		.tx		= sc8280xp_qmp_gen3x2_pcie_tx_tbl,
+		.tx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_tx_tbl),
+		.rx		= sc8280xp_qmp_gen3x2_pcie_rx_tbl,
+		.rx_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rx_tbl),
+		.pcs		= sc8280xp_qmp_gen3x2_pcie_pcs_tbl,
+		.pcs_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_tbl),
+		.pcs_misc	= sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_pcs_misc_tbl),
+	},
+
+	.tbls_rc = &(const struct qmp_phy_cfg_tbls) {
+		.serdes		= sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl,
+		.serdes_num	= ARRAY_SIZE(sc8280xp_qmp_gen3x2_pcie_rc_serdes_tbl),
+	},
+
+	.serdes_4ln_tbl		= sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl,
+	.serdes_4ln_num		= ARRAY_SIZE(sc8280xp_qmp_gen3x4_pcie_serdes_4ln_tbl),
+
+	.clk_list		= sc8280xp_pciephy_clk_l,
+	.num_clks		= ARRAY_SIZE(sc8280xp_pciephy_clk_l),
+	.reset_list		= sdm845_pciephy_reset_l,
+	.num_resets		= ARRAY_SIZE(sdm845_pciephy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= sm8250_pcie_regs_layout,
+
+	.pwrdn_ctrl		= SW_PWRDN | REFCLK_DRV_DSBL,
+	.phy_status		= PHYSTATUS,
+};
+
 static const struct qmp_phy_cfg sdx55_qmp_pciephy_cfg = {
 	.lanes			= 2,
 
@@ -2054,6 +2104,24 @@ static void qmp_pcie_configure(void __iomem *base,
 	qmp_pcie_configure_lane(base, tbl, num, 0xff);
 }
 
+static void qmp_pcie_init_port_b(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
+{
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	const struct qmp_pcie_offsets *offs = cfg->offsets;
+	void __iomem *tx3, *rx3, *tx4, *rx4;
+
+	tx3 = qmp->port_b + offs->tx;
+	rx3 = qmp->port_b + offs->rx;
+	tx4 = qmp->port_b + offs->tx2;
+	rx4 = qmp->port_b + offs->rx2;
+
+	qmp_pcie_configure_lane(tx3, tbls->tx, tbls->tx_num, 1);
+	qmp_pcie_configure_lane(rx3, tbls->rx, tbls->rx_num, 1);
+
+	qmp_pcie_configure_lane(tx4, tbls->tx, tbls->tx_num, 2);
+	qmp_pcie_configure_lane(rx4, tbls->rx, tbls->rx_num, 2);
+}
+
 static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_cfg_tbls *tbls)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
@@ -2080,6 +2148,11 @@ static void qmp_pcie_init_registers(struct qmp_pcie *qmp, const struct qmp_phy_c
 
 	qmp_pcie_configure(pcs, tbls->pcs, tbls->pcs_num);
 	qmp_pcie_configure(pcs_misc, tbls->pcs_misc, tbls->pcs_misc_num);
+
+	if (cfg->lanes >= 4 && qmp->tcsr_4ln_config) {
+		qmp_pcie_configure(serdes, cfg->serdes_4ln_tbl, cfg->serdes_4ln_num);
+		qmp_pcie_init_port_b(qmp, tbls);
+	}
 }
 
 static int qmp_pcie_init(struct phy *phy)
@@ -2452,6 +2525,37 @@ static int qmp_pcie_parse_dt_legacy(struct qmp_pcie *qmp, struct device_node *np
 	return 0;
 }
 
+static int qmp_pcie_get_4ln_config(struct qmp_pcie *qmp)
+{
+	struct regmap *tcsr;
+	unsigned int args[2];
+	int ret;
+
+	tcsr = syscon_regmap_lookup_by_phandle_args(qmp->dev->of_node,
+						    "qcom,4ln-config-sel",
+						    ARRAY_SIZE(args), args);
+	if (IS_ERR(tcsr)) {
+		ret = PTR_ERR(tcsr);
+		if (ret == -ENOENT)
+			return 0;
+
+		dev_err(qmp->dev, "failed to lookup syscon: %d\n", ret);
+		return ret;
+	}
+
+	ret = regmap_test_bits(tcsr, args[0], BIT(args[1]));
+	if (ret < 0) {
+		dev_err(qmp->dev, "failed to read tcsr: %d\n", ret);
+		return ret;
+	}
+
+	qmp->tcsr_4ln_config = ret;
+
+	dev_dbg(qmp->dev, "4ln_config_sel = %d\n", qmp->tcsr_4ln_config);
+
+	return 0;
+}
+
 static int qmp_pcie_parse_dt(struct qmp_pcie *qmp)
 {
 	struct platform_device *pdev = to_platform_device(qmp->dev);
@@ -2464,6 +2568,10 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp)
 	if (!offs)
 		return -EINVAL;
 
+	ret = qmp_pcie_get_4ln_config(qmp);
+	if (ret)
+		return ret;
+
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
@@ -2479,6 +2587,12 @@ static int qmp_pcie_parse_dt(struct qmp_pcie *qmp)
 		qmp->rx2 = base + offs->rx2;
 	}
 
+	if (qmp->cfg->lanes >= 4 && qmp->tcsr_4ln_config) {
+		qmp->port_b = devm_platform_ioremap_resource(pdev, 1);
+		if (IS_ERR(qmp->port_b))
+			return PTR_ERR(qmp->port_b);
+	}
+
 	qmp->num_pipe_clks = 2;
 	qmp->pipe_clks[0].id = "pipe";
 	qmp->pipe_clks[1].id = "pipediv2";
@@ -2582,6 +2696,9 @@ static const struct of_device_id qmp_pcie_of_match_table[] = {
 	}, {
 		.compatible = "qcom,sc8280xp-qmp-gen3x2-pcie-phy",
 		.data = &sc8280xp_qmp_gen3x2_pciephy_cfg,
+	}, {
+		.compatible = "qcom,sc8280xp-qmp-gen3x4-pcie-phy",
+		.data = &sc8280xp_qmp_gen3x4_pciephy_cfg,
 	}, {
 		.compatible = "qcom,sdm845-qhp-pcie-phy",
 		.data = &sdm845_qhp_pciephy_cfg,
-- 
GitLab


From 2584068a9ef4a7bff3b9302dd058a4c95ce68631 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Thu, 3 Nov 2022 22:21:24 +0100
Subject: [PATCH 1246/4122] phy: qcom-qmp-pcie: split pcs_misc init cfg for
 ipq8074 pcs table

Commit af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018
pcie gen3") reworked the pcs regs values and removed the 0x400 offset
for each pcs_misc regs.

This change caused the malfunction of ipq8074 downstream since it still
has the legacy pcs table where pcs_misc are not placed on a different
table and instead put together assuming the offset of 0x400 for the
related pcs_misc regs.

Split pcs_misc init cfg from the ipq8074 pcs init table to be handled
correctly to prepare for actual support for gen3 pcie for ipq8074.

Fixes: af6643242d3a ("phy: qcom-qmp-pcie: split pcs_misc region for ipq6018 pcie gen3")
Reported-by: Robert Marko <robimarko@gmail.com>
Tested-by: Robert Marko <robimarko@gmail.com>
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://lore.kernel.org/r/20221103212125.17156-1-ansuelsmth@gmail.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 111716e25b173..43bd4576bee2c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -487,6 +487,13 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_FLL_CNTRL1, 0x01),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_H, 0x0),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_P2U3_WAKEUP_DLY_TIME_AUXCLK_L, 0x1),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+	QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d),
+};
+
+static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_misc_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_ACTIONS, 0x0),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_H, 0x00),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_L1P1_WAKEUP_DLY_TIME_AUXCLK_L, 0x01),
@@ -499,11 +506,7 @@ static const struct qmp_phy_init_tbl ipq8074_pcie_gen3_pcs_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG2, 0x50),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG4, 0x1a),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5, 0x6),
-	QMP_PHY_INIT_CFG(QPHY_V4_PCS_G12S1_TXDEEMPH_M3P5DB, 0x10),
 	QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
-	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_DCC_CAL_CONFIG, 0x01),
-	QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
-	QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x0d),
 };
 
 static const struct qmp_phy_init_tbl sdm845_qmp_pcie_serdes_tbl[] = {
@@ -1652,6 +1655,8 @@ static const struct qmp_phy_cfg ipq8074_pciephy_gen3_cfg = {
 		.rx_num		= ARRAY_SIZE(ipq8074_pcie_gen3_rx_tbl),
 		.pcs		= ipq8074_pcie_gen3_pcs_tbl,
 		.pcs_num	= ARRAY_SIZE(ipq8074_pcie_gen3_pcs_tbl),
+		.pcs_misc	= ipq8074_pcie_gen3_pcs_misc_tbl,
+		.pcs_misc_num	= ARRAY_SIZE(ipq8074_pcie_gen3_pcs_misc_tbl),
 	},
 	.clk_list		= ipq8074_pciephy_clk_l,
 	.num_clks		= ARRAY_SIZE(ipq8074_pciephy_clk_l),
-- 
GitLab


From 9ddcd920f8edfe65c3670fbd0b49db00e1e562fe Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 2 Nov 2022 13:48:34 +0530
Subject: [PATCH 1247/4122] phy: qcom-qmp-pcie: Fix high latency with 4x2 PHY
 when ASPM is enabled

The PCIe QMP 4x2 RC PHY generates high latency when ASPM is enabled. This
seem to be fixed by clearing the QPHY_V5_20_PCS_PCIE_PRESET_P10_POST
register of the pcs_misc register space.

Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20221102081835.41892-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c           | 1 +
 drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 43bd4576bee2c..cb45f53a965bf 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1430,6 +1430,7 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rc_pcs_misc_tbl[] = {
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_ENDPOINT_REFCLK_DRIVE, 0xc1),
 	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS, 0x00),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_PCIE_PRESET_P10_POST, 0x00),
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_ep_serdes_tbl[] = {
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
index c9fa90b454756..3d9713d348fe6 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-pcie-v5_20.h
@@ -11,6 +11,7 @@
 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_MODE2_CONFIG5	0x084
 #define QPHY_V5_20_PCS_PCIE_OSC_DTCT_ACTIONS		0x090
 #define QPHY_V5_20_PCS_PCIE_EQ_CONFIG1			0x0a0
+#define QPHY_V5_20_PCS_PCIE_PRESET_P10_POST		0x0e0
 #define QPHY_V5_20_PCS_PCIE_G4_EQ_CONFIG5		0x108
 #define QPHY_V5_20_PCS_PCIE_G4_PRE_GAIN			0x15c
 #define QPHY_V5_20_PCS_PCIE_RX_MARGINING_CONFIG3	0x184
-- 
GitLab


From 883aebf6e1ea88145d64dcf940dbcb5181313338 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 2 Nov 2022 13:48:35 +0530
Subject: [PATCH 1248/4122] phy: qcom-qmp-pcie: Fix
 sm8450_qmp_gen4x2_pcie_pcs_tbl[] register names

sm8450_qmp_gen4x2_pcie_pcs_tbl[] contains the init sequence for PCS
registers of QMP PHY v5.20. So use the v5.20 specific register names.
Only major change is the rename of PCS_EQ_CONFIG{2/3} registers to
PCS_EQ_CONFIG{4/5}.

Fixes: 2c91bf6bf290 ("phy: qcom-qmp: Add SM8450 PCIe1 PHY support")
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20221102081835.41892-2-manivannan.sadhasivam@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c      |  8 ++++----
 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h | 14 ++++++++++++++
 drivers/phy/qualcomm/phy-qcom-qmp.h           |  1 +
 3 files changed, 19 insertions(+), 4 deletions(-)
 create mode 100644 drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index cb45f53a965bf..47cccc4b35b22 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1414,10 +1414,10 @@ static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_rx_tbl[] = {
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_tbl[] = {
-	QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG2, 0x16),
-	QMP_PHY_INIT_CFG(QPHY_V5_PCS_EQ_CONFIG3, 0x22),
-	QMP_PHY_INIT_CFG(QPHY_V5_PCS_G3S2_PRE_GAIN, 0x2e),
-	QMP_PHY_INIT_CFG(QPHY_V5_PCS_RX_SIGDET_LVL, 0x99),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG4, 0x16),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_EQ_CONFIG5, 0x22),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_G3S2_PRE_GAIN, 0x2e),
+	QMP_PHY_INIT_CFG(QPHY_V5_20_PCS_RX_SIGDET_LVL, 0x99),
 };
 
 static const struct qmp_phy_init_tbl sm8450_qmp_gen4x2_pcie_pcs_misc_tbl[] = {
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h
new file mode 100644
index 0000000000000..9a5a20daf62cd
--- /dev/null
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcs-v5_20.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2022, Linaro Ltd.
+ */
+
+#ifndef QCOM_PHY_QMP_PCS_V5_20_H_
+#define QCOM_PHY_QMP_PCS_V5_20_H_
+
+#define QPHY_V5_20_PCS_G3S2_PRE_GAIN			0x170
+#define QPHY_V5_20_PCS_RX_SIGDET_LVL			0x188
+#define QPHY_V5_20_PCS_EQ_CONFIG4			0x1e0
+#define QPHY_V5_20_PCS_EQ_CONFIG5			0x1e4
+
+#endif
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp.h b/drivers/phy/qualcomm/phy-qcom-qmp.h
index 26274e3c0cf95..29a48f0436d2a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp.h
+++ b/drivers/phy/qualcomm/phy-qcom-qmp.h
@@ -38,6 +38,7 @@
 #include "phy-qcom-qmp-pcs-pcie-v4_20.h"
 
 #include "phy-qcom-qmp-pcs-v5.h"
+#include "phy-qcom-qmp-pcs-v5_20.h"
 #include "phy-qcom-qmp-pcs-pcie-v5.h"
 #include "phy-qcom-qmp-pcs-usb-v5.h"
 #include "phy-qcom-qmp-pcs-ufs-v5.h"
-- 
GitLab


From d7abac084536b6d7efcc0c1edc7d9035c34314d9 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 8 Nov 2022 09:54:59 +0900
Subject: [PATCH 1249/4122] dt-bindings: phy: renesas: Document Renesas
 Ethernet SERDES

Document Renesas Etherent SERDES for R-Car S4-8 (r8a779f0).

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221108005500.3011449-2-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/renesas,r8a779f0-ether-serdes.yaml    | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml

diff --git a/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml
new file mode 100644
index 0000000000000..93ab72874228c
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/renesas,r8a779f0-ether-serdes.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/renesas,r8a779f0-ether-serdes.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas Ethernet SERDES
+
+maintainers:
+  - Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
+
+properties:
+  compatible:
+    const: renesas,r8a779f0-ether-serdes
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  power-domains:
+    maxItems: 1
+
+  '#phy-cells':
+    description: Port number of SERDES.
+    const: 1
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - resets
+  - power-domains
+  - '#phy-cells'
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/r8a779f0-cpg-mssr.h>
+    #include <dt-bindings/power/r8a779f0-sysc.h>
+
+    phy@e6444000 {
+        compatible = "renesas,r8a779f0-ether-serdes";
+        reg = <0xe6444000 0xc00>;
+        clocks = <&cpg CPG_MOD 1506>;
+        power-domains = <&sysc R8A779F0_PD_ALWAYS_ON>;
+        resets = <&cpg 1506>;
+        #phy-cells = <1>;
+    };
-- 
GitLab


From 742859441d44be4b408274206244011a09618a91 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Tue, 8 Nov 2022 09:55:00 +0900
Subject: [PATCH 1250/4122] phy: renesas: Add Renesas Ethernet SERDES driver
 for R-Car S4-8

Add Renesas Ethernet SERDES driver for R-Car S4-8 (r8a779f0).
The datasheet describes initialization procedure without any information
about registers' name/bits. So, this is all black magic to initialize
the hardware. Especially, all channels should be initialized at once.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Link: https://lore.kernel.org/r/20221108005500.3011449-3-yoshihiro.shimoda.uh@renesas.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/renesas/Kconfig                 |   8 +
 drivers/phy/renesas/Makefile                |   1 +
 drivers/phy/renesas/r8a779f0-ether-serdes.c | 417 ++++++++++++++++++++
 3 files changed, 426 insertions(+)
 create mode 100644 drivers/phy/renesas/r8a779f0-ether-serdes.c

diff --git a/drivers/phy/renesas/Kconfig b/drivers/phy/renesas/Kconfig
index 111bdcae775c8..36505fc5f386e 100644
--- a/drivers/phy/renesas/Kconfig
+++ b/drivers/phy/renesas/Kconfig
@@ -2,6 +2,14 @@
 #
 # Phy drivers for Renesas platforms
 #
+# NOTE: Please sorted config names alphabetically.
+config PHY_R8A779F0_ETHERNET_SERDES
+	tristate "Renesas R-Car S4-8 Ethernet SERDES driver"
+	depends on ARCH_RENESAS || COMPILE_TEST
+	select GENERIC_PHY
+	help
+	  Support for Ethernet SERDES found on Renesas R-Car S4-8 SoCs.
+
 config PHY_RCAR_GEN2
 	tristate "Renesas R-Car generation 2 USB PHY driver"
 	depends on ARCH_RENESAS
diff --git a/drivers/phy/renesas/Makefile b/drivers/phy/renesas/Makefile
index b599ff8a4349c..8896d1919faa6 100644
--- a/drivers/phy/renesas/Makefile
+++ b/drivers/phy/renesas/Makefile
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_PHY_R8A779F0_ETHERNET_SERDES)	+= r8a779f0-ether-serdes.o
 obj-$(CONFIG_PHY_RCAR_GEN2)		+= phy-rcar-gen2.o
 obj-$(CONFIG_PHY_RCAR_GEN3_PCIE)	+= phy-rcar-gen3-pcie.o
 obj-$(CONFIG_PHY_RCAR_GEN3_USB2)	+= phy-rcar-gen3-usb2.o
diff --git a/drivers/phy/renesas/r8a779f0-ether-serdes.c b/drivers/phy/renesas/r8a779f0-ether-serdes.c
new file mode 100644
index 0000000000000..ec6594e6dc275
--- /dev/null
+++ b/drivers/phy/renesas/r8a779f0-ether-serdes.c
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Renesas Ethernet SERDES device driver
+ *
+ * Copyright (C) 2022 Renesas Electronics Corporation
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/iopoll.h>
+#include <linux/kernel.h>
+#include <linux/phy.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#define R8A779F0_ETH_SERDES_NUM			3
+#define R8A779F0_ETH_SERDES_OFFSET		0x0400
+#define R8A779F0_ETH_SERDES_BANK_SELECT		0x03fc
+#define R8A779F0_ETH_SERDES_TIMEOUT_US		100000
+#define R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP	3
+#define R8A779F0_ETH_SERDES_NUM_RETRY_INIT	3
+
+struct r8a779f0_eth_serdes_drv_data;
+struct r8a779f0_eth_serdes_channel {
+	struct r8a779f0_eth_serdes_drv_data *dd;
+	struct phy *phy;
+	void __iomem *addr;
+	phy_interface_t phy_interface;
+	int speed;
+	int index;
+};
+
+struct r8a779f0_eth_serdes_drv_data {
+	void __iomem *addr;
+	struct platform_device *pdev;
+	struct reset_control *reset;
+	struct r8a779f0_eth_serdes_channel channel[R8A779F0_ETH_SERDES_NUM];
+	bool initialized;
+};
+
+/*
+ * The datasheet describes initialization procedure without any information
+ * about registers' name/bits. So, this is all black magic to initialize
+ * the hardware.
+ */
+static void r8a779f0_eth_serdes_write32(void __iomem *addr, u32 offs, u32 bank, u32 data)
+{
+	iowrite32(bank, addr + R8A779F0_ETH_SERDES_BANK_SELECT);
+	iowrite32(data, addr + offs);
+}
+
+static int
+r8a779f0_eth_serdes_reg_wait(struct r8a779f0_eth_serdes_channel *channel,
+			     u32 offs, u32 bank, u32 mask, u32 expected)
+{
+	int ret;
+	u32 val;
+
+	iowrite32(bank, channel->addr + R8A779F0_ETH_SERDES_BANK_SELECT);
+
+	ret = readl_poll_timeout_atomic(channel->addr + offs, val,
+					(val & mask) == expected,
+					1, R8A779F0_ETH_SERDES_TIMEOUT_US);
+	if (ret)
+		dev_dbg(&channel->phy->dev,
+			"%s: index %d, offs %x, bank %x, mask %x, expected %x\n",
+			 __func__, channel->index, offs, bank, mask, expected);
+
+	return ret;
+}
+
+static int
+r8a779f0_eth_serdes_common_init_ram(struct r8a779f0_eth_serdes_drv_data *dd)
+{
+	struct r8a779f0_eth_serdes_channel *channel;
+	int i, ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		channel = &dd->channel[i];
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x026c, 0x180, BIT(0), 0x01);
+		if (ret)
+			return ret;
+	}
+
+	r8a779f0_eth_serdes_write32(dd->addr, 0x026c, 0x180, 0x03);
+
+	return ret;
+}
+
+static int
+r8a779f0_eth_serdes_common_setting(struct r8a779f0_eth_serdes_channel *channel)
+{
+	struct r8a779f0_eth_serdes_drv_data *dd = channel->dd;
+
+	switch (channel->phy_interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		r8a779f0_eth_serdes_write32(dd->addr, 0x0244, 0x180, 0x0097);
+		r8a779f0_eth_serdes_write32(dd->addr, 0x01d0, 0x180, 0x0060);
+		r8a779f0_eth_serdes_write32(dd->addr, 0x01d8, 0x180, 0x2200);
+		r8a779f0_eth_serdes_write32(dd->addr, 0x01d4, 0x180, 0x0000);
+		r8a779f0_eth_serdes_write32(dd->addr, 0x01e0, 0x180, 0x003d);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int
+r8a779f0_eth_serdes_chan_setting(struct r8a779f0_eth_serdes_channel *channel)
+{
+	int ret;
+
+	switch (channel->phy_interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2000);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x01c0, 0x180, 0x0011);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0248, 0x180, 0x0540);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0258, 0x180, 0x0015);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x01a0, 0x180, 0x0000);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00d0, 0x180, 0x0002);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0150, 0x180, 0x0003);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x180, 0x0100);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0100);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0174, 0x180, 0x0000);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0160, 0x180, 0x0007);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x01ac, 0x180, 0x0000);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x0310);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00c8, 0x380, 0x0101);
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x00c8, 0x0180, BIT(0), 0);
+		if (ret)
+			return ret;
+
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0148, 0x180, 0x0101);
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0148, 0x0180, BIT(0), 0);
+		if (ret)
+			return ret;
+
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00c4, 0x180, 0x1310);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00d8, 0x180, 0x1800);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x00dc, 0x180, 0x0000);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x001c, 0x300, 0x0001);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x380, 0x2100);
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0000, 0x0380, BIT(8), 0);
+		if (ret)
+			return ret;
+
+		if (channel->speed == 1000)
+			r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x0140);
+		else if (channel->speed == 100)
+			r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x2100);
+
+		/* For AN_ON */
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0004, 0x1f80, 0x0005);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0028, 0x1f80, 0x07a1);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f80, 0x0208);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int
+r8a779f0_eth_serdes_chan_speed(struct r8a779f0_eth_serdes_channel *channel)
+{
+	int ret;
+
+	switch (channel->phy_interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+		/* For AN_ON */
+		if (channel->speed == 1000)
+			r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x1140);
+		else if (channel->speed == 100)
+			r8a779f0_eth_serdes_write32(channel->addr, 0x0000, 0x1f00, 0x3100);
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0008, 0x1f80, BIT(0), 1);
+		if (ret)
+			return ret;
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0008, 0x1f80, 0x0000);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+
+static int r8a779f0_eth_serdes_monitor_linkup(struct r8a779f0_eth_serdes_channel *channel)
+{
+	int i, ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_LINKUP; i++) {
+		ret = r8a779f0_eth_serdes_reg_wait(channel, 0x0004, 0x300,
+						   BIT(2), BIT(2));
+		if (!ret)
+			break;
+
+		/* restart */
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0100);
+		udelay(1);
+		r8a779f0_eth_serdes_write32(channel->addr, 0x0144, 0x180, 0x0000);
+	}
+
+	return ret;
+}
+
+static int r8a779f0_eth_serdes_hw_init(struct r8a779f0_eth_serdes_channel *channel)
+{
+	struct r8a779f0_eth_serdes_drv_data *dd = channel->dd;
+	int i, ret;
+
+	if (dd->initialized)
+		return 0;
+
+	ret = r8a779f0_eth_serdes_common_init_ram(dd);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[i], 0x0000,
+						   0x300, BIT(15), 0);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++)
+		r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d4, 0x380, 0x0443);
+
+	ret = r8a779f0_eth_serdes_common_setting(channel);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++)
+		r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0001);
+
+
+	r8a779f0_eth_serdes_write32(dd->addr, 0x0000, 0x380, 0x8000);
+
+	ret = r8a779f0_eth_serdes_common_init_ram(dd);
+	if (ret)
+		return ret;
+
+	ret = r8a779f0_eth_serdes_reg_wait(&dd->channel[0], 0x0000, 0x380, BIT(15), 0);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		ret = r8a779f0_eth_serdes_chan_setting(&dd->channel[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		ret = r8a779f0_eth_serdes_chan_speed(&dd->channel[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++)
+		r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03c0, 0x380, 0x0000);
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++)
+		r8a779f0_eth_serdes_write32(dd->channel[i].addr, 0x03d0, 0x380, 0x0000);
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		ret = r8a779f0_eth_serdes_monitor_linkup(&dd->channel[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int r8a779f0_eth_serdes_init(struct phy *p)
+{
+	struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p);
+	int i, ret;
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM_RETRY_INIT; i++) {
+		ret = r8a779f0_eth_serdes_hw_init(channel);
+		if (!ret) {
+			channel->dd->initialized = true;
+			break;
+		}
+		usleep_range(1000, 2000);
+	}
+
+	return ret;
+}
+
+static int r8a779f0_eth_serdes_set_mode(struct phy *p, enum phy_mode mode,
+					int submode)
+{
+	struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p);
+
+	if (mode != PHY_MODE_ETHERNET)
+		return -EOPNOTSUPP;
+
+	switch (submode) {
+	case PHY_INTERFACE_MODE_GMII:
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_USXGMII:
+		channel->phy_interface = submode;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int r8a779f0_eth_serdes_set_speed(struct phy *p, int speed)
+{
+	struct r8a779f0_eth_serdes_channel *channel = phy_get_drvdata(p);
+
+	channel->speed = speed;
+
+	return 0;
+}
+
+static const struct phy_ops r8a779f0_eth_serdes_ops = {
+	.init		= r8a779f0_eth_serdes_init,
+	.set_mode	= r8a779f0_eth_serdes_set_mode,
+	.set_speed	= r8a779f0_eth_serdes_set_speed,
+};
+
+static struct phy *r8a779f0_eth_serdes_xlate(struct device *dev,
+					     struct of_phandle_args *args)
+{
+	struct r8a779f0_eth_serdes_drv_data *dd = dev_get_drvdata(dev);
+
+	if (args->args[0] >= R8A779F0_ETH_SERDES_NUM)
+		return ERR_PTR(-ENODEV);
+
+	return dd->channel[args->args[0]].phy;
+}
+
+static const struct of_device_id r8a779f0_eth_serdes_of_table[] = {
+	{ .compatible = "renesas,r8a779f0-ether-serdes", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, r8a779f0_eth_serdes_of_table);
+
+static int r8a779f0_eth_serdes_probe(struct platform_device *pdev)
+{
+	struct r8a779f0_eth_serdes_drv_data *dd;
+	struct phy_provider *provider;
+	struct resource *res;
+	int i;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "invalid resource\n");
+		return -EINVAL;
+	}
+
+	dd = devm_kzalloc(&pdev->dev, sizeof(*dd), GFP_KERNEL);
+	if (!dd)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dd);
+	dd->pdev = pdev;
+	dd->addr = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(dd->addr))
+		return PTR_ERR(dd->addr);
+
+	dd->reset = devm_reset_control_get(&pdev->dev, NULL);
+	if (IS_ERR(dd->reset))
+		return PTR_ERR(dd->reset);
+
+	reset_control_reset(dd->reset);
+
+	for (i = 0; i < R8A779F0_ETH_SERDES_NUM; i++) {
+		struct r8a779f0_eth_serdes_channel *channel = &dd->channel[i];
+
+		channel->phy = devm_phy_create(&pdev->dev, NULL,
+					       &r8a779f0_eth_serdes_ops);
+		if (IS_ERR(channel->phy))
+			return PTR_ERR(channel->phy);
+		channel->addr = dd->addr + R8A779F0_ETH_SERDES_OFFSET * i;
+		channel->dd = dd;
+		channel->index = i;
+		phy_set_drvdata(channel->phy, channel);
+	}
+
+	provider = devm_of_phy_provider_register(&pdev->dev,
+						 r8a779f0_eth_serdes_xlate);
+	if (IS_ERR(provider))
+		return PTR_ERR(provider);
+
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_get_sync(&pdev->dev);
+
+	return 0;
+}
+
+static int r8a779f0_eth_serdes_remove(struct platform_device *pdev)
+{
+	pm_runtime_put(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static struct platform_driver r8a779f0_eth_serdes_driver_platform = {
+	.probe = r8a779f0_eth_serdes_probe,
+	.remove = r8a779f0_eth_serdes_remove,
+	.driver = {
+		.name = "r8a779f0_eth_serdes",
+		.of_match_table = r8a779f0_eth_serdes_of_table,
+	}
+};
+module_platform_driver(r8a779f0_eth_serdes_driver_platform);
+MODULE_AUTHOR("Yoshihiro Shimoda");
+MODULE_DESCRIPTION("Renesas Ethernet SERDES device driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 5c20311d76cbaeb7ed2ecf9c8b8322f8fc4a7ae3 Mon Sep 17 00:00:00 2001
From: Leonid Ravich <lravich@gmail.com>
Date: Wed, 9 Nov 2022 11:57:17 +0200
Subject: [PATCH 1251/4122] IB/mad: Don't call to function that might sleep
 while in atomic context

Tracepoints are not allowed to sleep, as such the following splat is
generated due to call to ib_query_pkey() in atomic context.

WARNING: CPU: 0 PID: 1888000 at kernel/trace/ring_buffer.c:2492 rb_commit+0xc1/0x220
CPU: 0 PID: 1888000 Comm: kworker/u9:0 Kdump: loaded Tainted: G           OE    --------- -  - 4.18.0-305.3.1.el8.x86_64 #1
 Hardware name: Red Hat KVM, BIOS 1.13.0-2.module_el8.3.0+555+a55c8938 04/01/2014
 Workqueue: ib-comp-unb-wq ib_cq_poll_work [ib_core]
 RIP: 0010:rb_commit+0xc1/0x220
 RSP: 0000:ffffa8ac80f9bca0 EFLAGS: 00010202
 RAX: ffff8951c7c01300 RBX: ffff8951c7c14a00 RCX: 0000000000000246
 RDX: ffff8951c707c000 RSI: ffff8951c707c57c RDI: ffff8951c7c14a00
 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
 R10: ffff8951c7c01300 R11: 0000000000000001 R12: 0000000000000246
 R13: 0000000000000000 R14: ffffffff964c70c0 R15: 0000000000000000
 FS:  0000000000000000(0000) GS:ffff8951fbc00000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 00007f20e8f39010 CR3: 000000002ca10005 CR4: 0000000000170ef0
 Call Trace:
  ring_buffer_unlock_commit+0x1d/0xa0
  trace_buffer_unlock_commit_regs+0x3b/0x1b0
  trace_event_buffer_commit+0x67/0x1d0
  trace_event_raw_event_ib_mad_recv_done_handler+0x11c/0x160 [ib_core]
  ib_mad_recv_done+0x48b/0xc10 [ib_core]
  ? trace_event_raw_event_cq_poll+0x6f/0xb0 [ib_core]
  __ib_process_cq+0x91/0x1c0 [ib_core]
  ib_cq_poll_work+0x26/0x80 [ib_core]
  process_one_work+0x1a7/0x360
  ? create_worker+0x1a0/0x1a0
  worker_thread+0x30/0x390
  ? create_worker+0x1a0/0x1a0
  kthread+0x116/0x130
  ? kthread_flush_work_fn+0x10/0x10
  ret_from_fork+0x35/0x40
 ---[ end trace 78ba8509d3830a16 ]---

Fixes: 821bf1de45a1 ("IB/MAD: Add recv path trace point")
Signed-off-by: Leonid Ravich <lravich@gmail.com>
Link: https://lore.kernel.org/r/Y2t5feomyznrVj7V@leonid-Inspiron-3421
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/mad.c |  5 -----
 include/trace/events/ib_mad.h | 13 ++++---------
 2 files changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c
index 1893aa613ad73..674344eb8e2f4 100644
--- a/drivers/infiniband/core/mad.c
+++ b/drivers/infiniband/core/mad.c
@@ -59,9 +59,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
 			  struct ib_mad_qp_info *qp_info,
 			  struct trace_event_raw_ib_mad_send_template *entry)
 {
-	u16 pkey;
-	struct ib_device *dev = qp_info->port_priv->device;
-	u32 pnum = qp_info->port_priv->port_num;
 	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
 	struct rdma_ah_attr attr = {};
 
@@ -69,8 +66,6 @@ static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
 
 	/* These are common */
 	entry->sl = attr.sl;
-	ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
-	entry->pkey = pkey;
 	entry->rqpn = wr->remote_qpn;
 	entry->rqkey = wr->remote_qkey;
 	entry->dlid = rdma_ah_get_dlid(&attr);
diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h
index 59363a083ecb9..d92691c78cff6 100644
--- a/include/trace/events/ib_mad.h
+++ b/include/trace/events/ib_mad.h
@@ -49,7 +49,6 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
 		__field(int,            retries_left)
 		__field(int,            max_retries)
 		__field(int,            retry)
-		__field(u16,            pkey)
 	),
 
 	TP_fast_assign(
@@ -89,7 +88,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
 		  "hdr : base_ver 0x%x class 0x%x class_ver 0x%x " \
 		  "method 0x%x status 0x%x class_specific 0x%x tid 0x%llx " \
 		  "attr_id 0x%x attr_mod 0x%x  => dlid 0x%08x sl %d "\
-		  "pkey 0x%x rpqn 0x%x rqpkey 0x%x",
+		  "rpqn 0x%x rqpkey 0x%x",
 		__entry->dev_index, __entry->port_num, __entry->qp_num,
 		__entry->agent_priv, be64_to_cpu(__entry->wrtid),
 		__entry->retries_left, __entry->max_retries,
@@ -100,7 +99,7 @@ DECLARE_EVENT_CLASS(ib_mad_send_template,
 		be16_to_cpu(__entry->class_specific),
 		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
 		be32_to_cpu(__entry->attr_mod),
-		be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey,
+		be32_to_cpu(__entry->dlid), __entry->sl,
 		__entry->rqpn, __entry->rqkey
 	)
 );
@@ -204,7 +203,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
 		__field(u16,            wc_status)
 		__field(u32,            slid)
 		__field(u32,            dev_index)
-		__field(u16,            pkey)
 	),
 
 	TP_fast_assign(
@@ -224,9 +222,6 @@ TRACE_EVENT(ib_mad_recv_done_handler,
 		__entry->slid = wc->slid;
 		__entry->src_qp = wc->src_qp;
 		__entry->sl = wc->sl;
-		ib_query_pkey(qp_info->port_priv->device,
-			      qp_info->port_priv->port_num,
-			      wc->pkey_index, &__entry->pkey);
 		__entry->wc_status = wc->status;
 	),
 
@@ -234,7 +229,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
 		  "base_ver 0x%02x class 0x%02x class_ver 0x%02x " \
 		  "method 0x%02x status 0x%04x class_specific 0x%04x " \
 		  "tid 0x%016llx attr_id 0x%04x attr_mod 0x%08x " \
-		  "slid 0x%08x src QP%d, sl %d pkey 0x%04x",
+		  "slid 0x%08x src QP%d, sl %d",
 		__entry->dev_index, __entry->port_num, __entry->qp_num,
 		__entry->wc_status,
 		__entry->length,
@@ -244,7 +239,7 @@ TRACE_EVENT(ib_mad_recv_done_handler,
 		be16_to_cpu(__entry->class_specific),
 		be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id),
 		be32_to_cpu(__entry->attr_mod),
-		__entry->slid, __entry->src_qp, __entry->sl, __entry->pkey
+		__entry->slid, __entry->src_qp, __entry->sl
 	)
 );
 
-- 
GitLab


From cf87ac739e488055a6046a410caa8f4da108948f Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:08 +0800
Subject: [PATCH 1252/4122] KVM: x86: Introduce KVM_REQ_DIRTY_RING_SOFT_FULL

The VCPU isn't expected to be runnable when the dirty ring becomes soft
full, until the dirty pages are harvested and the dirty ring is reset
from userspace. So there is a check in each guest's entrace to see if
the dirty ring is soft full or not. The VCPU is stopped from running if
its dirty ring has been soft full. The similar check will be needed when
the feature is going to be supported on ARM64. As Marc Zyngier suggested,
a new event will avoid pointless overhead to check the size of the dirty
ring ('vcpu->kvm->dirty_ring_size') in each guest's entrance.

Add KVM_REQ_DIRTY_RING_SOFT_FULL. The event is raised when the dirty ring
becomes soft full in kvm_dirty_ring_push(). The event is only cleared in
the check, done in the newly added helper kvm_dirty_ring_check_request().
Since the VCPU is not runnable when the dirty ring becomes soft full, the
KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent the VCPU from
running until the dirty pages are harvested and the dirty ring is reset by
userspace.

kvm_dirty_ring_soft_full() becomes a private function with the newly added
helper kvm_dirty_ring_check_request(). The alignment for the various event
definitions in kvm_host.h is changed to tab character by the way. In order
to avoid using 'container_of()', the argument @ring is replaced by @vcpu
in kvm_dirty_ring_push().

Link: https://lore.kernel.org/kvmarm/87lerkwtm5.wl-maz@kernel.org
Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-2-gshan@redhat.com
---
 arch/x86/kvm/x86.c             | 15 ++++++---------
 include/linux/kvm_dirty_ring.h | 12 ++++--------
 include/linux/kvm_host.h       |  9 +++++----
 virt/kvm/dirty_ring.c          | 32 ++++++++++++++++++++++++++++++--
 virt/kvm/kvm_main.c            |  3 +--
 5 files changed, 46 insertions(+), 25 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9cf1ba865562e..d0d32e67ebf34 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10499,20 +10499,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 	bool req_immediate_exit = false;
 
-	/* Forbid vmenter if vcpu dirty ring is soft-full */
-	if (unlikely(vcpu->kvm->dirty_ring_size &&
-		     kvm_dirty_ring_soft_full(&vcpu->dirty_ring))) {
-		vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
-		trace_kvm_dirty_ring_exit(vcpu);
-		r = 0;
-		goto out;
-	}
-
 	if (kvm_request_pending(vcpu)) {
 		if (kvm_check_request(KVM_REQ_VM_DEAD, vcpu)) {
 			r = -EIO;
 			goto out;
 		}
+
+		if (kvm_dirty_ring_check_request(vcpu)) {
+			r = 0;
+			goto out;
+		}
+
 		if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
 			if (unlikely(!kvm_x86_ops.nested_ops->get_nested_state_pages(vcpu))) {
 				r = 0;
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 906f899813dc9..9c13c4c3d30c1 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -49,7 +49,7 @@ static inline int kvm_dirty_ring_reset(struct kvm *kvm,
 	return 0;
 }
 
-static inline void kvm_dirty_ring_push(struct kvm_dirty_ring *ring,
+static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu,
 				       u32 slot, u64 offset)
 {
 }
@@ -64,11 +64,6 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
 {
 }
 
-static inline bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
-{
-	return true;
-}
-
 #else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 u32 kvm_dirty_ring_get_rsvd_entries(void);
@@ -84,13 +79,14 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring);
  * returns =0: successfully pushed
  *         <0: unable to push, need to wait
  */
-void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset);
+void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset);
+
+bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);
 
 /* for use in vm_operations_struct */
 struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset);
 
 void kvm_dirty_ring_free(struct kvm_dirty_ring *ring);
-bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring);
 
 #endif /* CONFIG_HAVE_KVM_DIRTY_RING */
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 00c3448ba7f8b..648d663f32c46 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -153,10 +153,11 @@ static inline bool is_error_page(struct page *page)
  * Architecture-independent vcpu->requests bit members
  * Bits 3-7 are reserved for more arch-independent bits.
  */
-#define KVM_REQ_TLB_FLUSH         (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_VM_DEAD           (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
-#define KVM_REQ_UNBLOCK           2
-#define KVM_REQUEST_ARCH_BASE     8
+#define KVM_REQ_TLB_FLUSH		(0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_VM_DEAD			(1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_UNBLOCK			2
+#define KVM_REQ_DIRTY_RING_SOFT_FULL	3
+#define KVM_REQUEST_ARCH_BASE		8
 
 /*
  * KVM_REQ_OUTSIDE_GUEST_MODE exists is purely as way to force the vCPU to
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index d6fabf238032a..fecbb7d75ad22 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -26,7 +26,7 @@ static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
 	return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
 }
 
-bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
+static bool kvm_dirty_ring_soft_full(struct kvm_dirty_ring *ring)
 {
 	return kvm_dirty_ring_used(ring) >= ring->soft_limit;
 }
@@ -142,13 +142,19 @@ int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring)
 
 	kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask);
 
+	/*
+	 * The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared
+	 * by the VCPU thread next time when it enters the guest.
+	 */
+
 	trace_kvm_dirty_ring_reset(ring);
 
 	return count;
 }
 
-void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
+void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)
 {
+	struct kvm_dirty_ring *ring = &vcpu->dirty_ring;
 	struct kvm_dirty_gfn *entry;
 
 	/* It should never get full */
@@ -166,6 +172,28 @@ void kvm_dirty_ring_push(struct kvm_dirty_ring *ring, u32 slot, u64 offset)
 	kvm_dirty_gfn_set_dirtied(entry);
 	ring->dirty_index++;
 	trace_kvm_dirty_ring_push(ring, slot, offset);
+
+	if (kvm_dirty_ring_soft_full(ring))
+		kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
+}
+
+bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu)
+{
+	/*
+	 * The VCPU isn't runnable when the dirty ring becomes soft full.
+	 * The KVM_REQ_DIRTY_RING_SOFT_FULL event is always set to prevent
+	 * the VCPU from running until the dirty pages are harvested and
+	 * the dirty ring is reset by userspace.
+	 */
+	if (kvm_check_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu) &&
+	    kvm_dirty_ring_soft_full(&vcpu->dirty_ring)) {
+		kvm_make_request(KVM_REQ_DIRTY_RING_SOFT_FULL, vcpu);
+		vcpu->run->exit_reason = KVM_EXIT_DIRTY_RING_FULL;
+		trace_kvm_dirty_ring_exit(vcpu);
+		return true;
+	}
+
+	return false;
 }
 
 struct page *kvm_dirty_ring_get_page(struct kvm_dirty_ring *ring, u32 offset)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 46e8ed1ae6470..04b22d2f99d8c 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3314,8 +3314,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 		u32 slot = (memslot->as_id << 16) | memslot->id;
 
 		if (kvm->dirty_ring_size)
-			kvm_dirty_ring_push(&vcpu->dirty_ring,
-					    slot, rel_gfn);
+			kvm_dirty_ring_push(vcpu, slot, rel_gfn);
 		else
 			set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
-- 
GitLab


From e8a18565e59303ac12c626a161d72bd890bd2062 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:09 +0800
Subject: [PATCH 1253/4122] KVM: Move declaration of kvm_cpu_dirty_log_size()
 to kvm_dirty_ring.h

Not all architectures like ARM64 need to override the function. Move
its declaration to kvm_dirty_ring.h to avoid the following compiling
warning on ARM64 when the feature is enabled.

  arch/arm64/kvm/../../../virt/kvm/dirty_ring.c:14:12:        \
  warning: no previous prototype for 'kvm_cpu_dirty_log_size' \
  [-Wmissing-prototypes]                                      \
  int __weak kvm_cpu_dirty_log_size(void)

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-3-gshan@redhat.com
---
 arch/x86/include/asm/kvm_host.h | 2 --
 include/linux/kvm_dirty_ring.h  | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 7551b6f9c31c5..b4dbde7d9eb1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -2090,8 +2090,6 @@ static inline int kvm_cpu_get_apicid(int mps_cpu)
 #define GET_SMSTATE(type, buf, offset)		\
 	(*(type *)((buf) + (offset) - 0x7e00))
 
-int kvm_cpu_dirty_log_size(void);
-
 int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
 
 #define KVM_CLOCK_VALID_FLAGS						\
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 9c13c4c3d30c1..199ead37b1049 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -66,6 +66,7 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
 
 #else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
+int kvm_cpu_dirty_log_size(void);
 u32 kvm_dirty_ring_get_rsvd_entries(void);
 int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
 
-- 
GitLab


From 86bdf3ebcfe1ded055282536fecce13001874740 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:10 +0800
Subject: [PATCH 1254/4122] KVM: Support dirty ring in conjunction with bitmap

ARM64 needs to dirty memory outside of a VCPU context when VGIC/ITS is
enabled. It's conflicting with that ring-based dirty page tracking always
requires a running VCPU context.

Introduce a new flavor of dirty ring that requires the use of both VCPU
dirty rings and a dirty bitmap. The expectation is that for non-VCPU
sources of dirty memory (such as the VGIC/ITS on arm64), KVM writes to
the dirty bitmap. Userspace should scan the dirty bitmap before migrating
the VM to the target.

Use an additional capability to advertise this behavior. The newly added
capability (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) can't be enabled before
KVM_CAP_DIRTY_LOG_RING_ACQ_REL on ARM64. In this way, the newly added
capability is treated as an extension of KVM_CAP_DIRTY_LOG_RING_ACQ_REL.

Suggested-by: Marc Zyngier <maz@kernel.org>
Suggested-by: Peter Xu <peterx@redhat.com>
Co-developed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-4-gshan@redhat.com
---
 Documentation/virt/kvm/api.rst                | 34 ++++++++---
 .../virt/kvm/devices/arm-vgic-its.rst         |  5 +-
 include/linux/kvm_dirty_ring.h                |  7 +++
 include/linux/kvm_host.h                      |  1 +
 include/uapi/linux/kvm.h                      |  1 +
 virt/kvm/Kconfig                              |  6 ++
 virt/kvm/dirty_ring.c                         | 14 +++++
 virt/kvm/kvm_main.c                           | 61 ++++++++++++++++---
 8 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986f..1f1b09aa6db47 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8003,13 +8003,6 @@ flushing is done by the KVM_GET_DIRTY_LOG ioctl).  To achieve that, one
 needs to kick the vcpu out of KVM_RUN using a signal.  The resulting
 vmexit ensures that all dirty GFNs are flushed to the dirty rings.
 
-NOTE: the capability KVM_CAP_DIRTY_LOG_RING and the corresponding
-ioctl KVM_RESET_DIRTY_RINGS are mutual exclusive to the existing ioctls
-KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG.  After enabling
-KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual
-machine will switch to ring-buffer dirty page tracking and further
-KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail.
-
 NOTE: KVM_CAP_DIRTY_LOG_RING_ACQ_REL is the only capability that
 should be exposed by weakly ordered architecture, in order to indicate
 the additional memory ordering requirements imposed on userspace when
@@ -8018,6 +8011,33 @@ Architecture with TSO-like ordering (such as x86) are allowed to
 expose both KVM_CAP_DIRTY_LOG_RING and KVM_CAP_DIRTY_LOG_RING_ACQ_REL
 to userspace.
 
+After enabling the dirty rings, the userspace needs to detect the
+capability of KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP to see whether the
+ring structures can be backed by per-slot bitmaps. With this capability
+advertised, it means the architecture can dirty guest pages without
+vcpu/ring context, so that some of the dirty information will still be
+maintained in the bitmap structure. KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP
+can't be enabled if the capability of KVM_CAP_DIRTY_LOG_RING_ACQ_REL
+hasn't been enabled, or any memslot has been existing.
+
+Note that the bitmap here is only a backup of the ring structure. The
+use of the ring and bitmap combination is only beneficial if there is
+only a very small amount of memory that is dirtied out of vcpu/ring
+context. Otherwise, the stand-alone per-slot bitmap mechanism needs to
+be considered.
+
+To collect dirty bits in the backup bitmap, userspace can use the same
+KVM_GET_DIRTY_LOG ioctl. KVM_CLEAR_DIRTY_LOG isn't needed as long as all
+the generation of the dirty bits is done in a single pass. Collecting
+the dirty bitmap should be the very last thing that the VMM does before
+considering the state as complete. VMM needs to ensure that the dirty
+state is final and avoid missing dirty pages from another ioctl ordered
+after the bitmap collection.
+
+NOTE: One example of using the backup bitmap is saving arm64 vgic/its
+tables through KVM_DEV_ARM_{VGIC_GRP_CTRL, ITS_SAVE_TABLES} command on
+KVM device "kvm-arm-vgic-its" when dirty ring is enabled.
+
 8.30 KVM_CAP_XEN_HVM
 --------------------
 
diff --git a/Documentation/virt/kvm/devices/arm-vgic-its.rst b/Documentation/virt/kvm/devices/arm-vgic-its.rst
index d257eddbae296..e053124f77c48 100644
--- a/Documentation/virt/kvm/devices/arm-vgic-its.rst
+++ b/Documentation/virt/kvm/devices/arm-vgic-its.rst
@@ -52,7 +52,10 @@ KVM_DEV_ARM_VGIC_GRP_CTRL
 
     KVM_DEV_ARM_ITS_SAVE_TABLES
       save the ITS table data into guest RAM, at the location provisioned
-      by the guest in corresponding registers/table entries.
+      by the guest in corresponding registers/table entries. Should userspace
+      require a form of dirty tracking to identify which pages are modified
+      by the saving process, it should use a bitmap even if using another
+      mechanism to track the memory dirtied by the vCPUs.
 
       The layout of the tables in guest memory defines an ABI. The entries
       are laid out in little endian format as described in the last paragraph.
diff --git a/include/linux/kvm_dirty_ring.h b/include/linux/kvm_dirty_ring.h
index 199ead37b1049..4862c98d80d36 100644
--- a/include/linux/kvm_dirty_ring.h
+++ b/include/linux/kvm_dirty_ring.h
@@ -37,6 +37,11 @@ static inline u32 kvm_dirty_ring_get_rsvd_entries(void)
 	return 0;
 }
 
+static inline bool kvm_use_dirty_bitmap(struct kvm *kvm)
+{
+	return true;
+}
+
 static inline int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring,
 				       int index, u32 size)
 {
@@ -67,6 +72,8 @@ static inline void kvm_dirty_ring_free(struct kvm_dirty_ring *ring)
 #else /* CONFIG_HAVE_KVM_DIRTY_RING */
 
 int kvm_cpu_dirty_log_size(void);
+bool kvm_use_dirty_bitmap(struct kvm *kvm);
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm);
 u32 kvm_dirty_ring_get_rsvd_entries(void);
 int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 648d663f32c46..db83f63f4e61e 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -779,6 +779,7 @@ struct kvm {
 	pid_t userspace_pid;
 	unsigned int max_halt_poll_ns;
 	u32 dirty_ring_size;
+	bool dirty_ring_with_bitmap;
 	bool vm_bugged;
 	bool vm_dead;
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0d5d4419139ae..c87b5882d7aef 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_ZPCI_OP 221
 #define KVM_CAP_S390_CPU_TOPOLOGY 222
 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
+#define KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP 224
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index 800f9470e36b1..9fb1ff6f19e5f 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL
        bool
        select HAVE_KVM_DIRTY_RING
 
+# Allow enabling both the dirty bitmap and dirty ring. Only architectures
+# that need to dirty memory outside of a vCPU context should select this.
+config NEED_KVM_DIRTY_RING_WITH_BITMAP
+	bool
+	depends on HAVE_KVM_DIRTY_RING
+
 config HAVE_KVM_EVENTFD
        bool
        select EVENTFD
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index fecbb7d75ad22..c1cd7dfe4a908 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -21,6 +21,20 @@ u32 kvm_dirty_ring_get_rsvd_entries(void)
 	return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size();
 }
 
+bool kvm_use_dirty_bitmap(struct kvm *kvm)
+{
+	lockdep_assert_held(&kvm->slots_lock);
+
+	return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap;
+}
+
+#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
+{
+	return false;
+}
+#endif
+
 static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring)
 {
 	return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 04b22d2f99d8c..be40d1ce6e913 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1617,7 +1617,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm,
 			new->dirty_bitmap = NULL;
 		else if (old && old->dirty_bitmap)
 			new->dirty_bitmap = old->dirty_bitmap;
-		else if (!kvm->dirty_ring_size) {
+		else if (kvm_use_dirty_bitmap(kvm)) {
 			r = kvm_alloc_dirty_bitmap(new);
 			if (r)
 				return r;
@@ -2060,8 +2060,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log,
 	unsigned long n;
 	unsigned long any = 0;
 
-	/* Dirty ring tracking is exclusive to dirty log tracking */
-	if (kvm->dirty_ring_size)
+	/* Dirty ring tracking may be exclusive to dirty log tracking */
+	if (!kvm_use_dirty_bitmap(kvm))
 		return -ENXIO;
 
 	*memslot = NULL;
@@ -2125,8 +2125,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
 	unsigned long *dirty_bitmap_buffer;
 	bool flush;
 
-	/* Dirty ring tracking is exclusive to dirty log tracking */
-	if (kvm->dirty_ring_size)
+	/* Dirty ring tracking may be exclusive to dirty log tracking */
+	if (!kvm_use_dirty_bitmap(kvm))
 		return -ENXIO;
 
 	as_id = log->slot >> 16;
@@ -2237,8 +2237,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
 	unsigned long *dirty_bitmap_buffer;
 	bool flush;
 
-	/* Dirty ring tracking is exclusive to dirty log tracking */
-	if (kvm->dirty_ring_size)
+	/* Dirty ring tracking may be exclusive to dirty log tracking */
+	if (!kvm_use_dirty_bitmap(kvm))
 		return -ENXIO;
 
 	as_id = log->slot >> 16;
@@ -3305,7 +3305,10 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 	struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
 
 #ifdef CONFIG_HAVE_KVM_DIRTY_RING
-	if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm))
+	if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
+		return;
+
+	if (WARN_ON_ONCE(!kvm_arch_allow_write_without_running_vcpu(kvm) && !vcpu))
 		return;
 #endif
 
@@ -3313,7 +3316,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 		unsigned long rel_gfn = gfn - memslot->base_gfn;
 		u32 slot = (memslot->as_id << 16) | memslot->id;
 
-		if (kvm->dirty_ring_size)
+		if (kvm->dirty_ring_size && vcpu)
 			kvm_dirty_ring_push(vcpu, slot, rel_gfn);
 		else
 			set_bit_le(rel_gfn, memslot->dirty_bitmap);
@@ -4482,6 +4485,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 		return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn);
 #else
 		return 0;
+#endif
+#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP
+	case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP:
 #endif
 	case KVM_CAP_BINARY_STATS_FD:
 	case KVM_CAP_SYSTEM_EVENT_DATA:
@@ -4558,6 +4564,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm,
 	return -EINVAL;
 }
 
+static bool kvm_are_all_memslots_empty(struct kvm *kvm)
+{
+	int i;
+
+	lockdep_assert_held(&kvm->slots_lock);
+
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		if (!kvm_memslots_empty(__kvm_memslots(kvm, i)))
+			return false;
+	}
+
+	return true;
+}
+
 static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 					   struct kvm_enable_cap *cap)
 {
@@ -4588,6 +4608,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm,
 			return -EINVAL;
 
 		return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]);
+	case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: {
+		int r = -EINVAL;
+
+		if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) ||
+		    !kvm->dirty_ring_size || cap->flags)
+			return r;
+
+		mutex_lock(&kvm->slots_lock);
+
+		/*
+		 * For simplicity, allow enabling ring+bitmap if and only if
+		 * there are no memslots, e.g. to ensure all memslots allocate
+		 * a bitmap after the capability is enabled.
+		 */
+		if (kvm_are_all_memslots_empty(kvm)) {
+			kvm->dirty_ring_with_bitmap = true;
+			r = 0;
+		}
+
+		mutex_unlock(&kvm->slots_lock);
+
+		return r;
+	}
 	default:
 		return kvm_vm_ioctl_enable_cap(kvm, cap);
 	}
-- 
GitLab


From 9cb1096f8590bc590326087bea65db932b53c3b5 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:11 +0800
Subject: [PATCH 1255/4122] KVM: arm64: Enable ring-based dirty memory tracking

Enable ring-based dirty memory tracking on ARM64:

  - Enable CONFIG_HAVE_KVM_DIRTY_RING_ACQ_REL.

  - Enable CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP.

  - Set KVM_DIRTY_LOG_PAGE_OFFSET for the ring buffer's physical page
    offset.

  - Add ARM64 specific kvm_arch_allow_write_without_running_vcpu() to
    keep the site of saving vgic/its tables out of the no-running-vcpu
    radar.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-5-gshan@redhat.com
---
 Documentation/virt/kvm/api.rst    |  2 +-
 arch/arm64/include/uapi/asm/kvm.h |  1 +
 arch/arm64/kvm/Kconfig            |  2 ++
 arch/arm64/kvm/arm.c              |  3 +++
 arch/arm64/kvm/vgic/vgic-its.c    | 20 ++++++++++++++++++++
 include/kvm/arm_vgic.h            |  1 +
 6 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 1f1b09aa6db47..773e4b202f479 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7921,7 +7921,7 @@ regardless of what has actually been exposed through the CPUID leaf.
 8.29 KVM_CAP_DIRTY_LOG_RING/KVM_CAP_DIRTY_LOG_RING_ACQ_REL
 ----------------------------------------------------------
 
-:Architectures: x86
+:Architectures: x86, arm64
 :Parameters: args[0] - size of the dirty log ring
 
 KVM is capable of tracking dirty memory using ring buffers that are
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 316917b987070..a7a857f1784d8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -43,6 +43,7 @@
 #define __KVM_HAVE_VCPU_EVENTS
 
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+#define KVM_DIRTY_LOG_PAGE_OFFSET 64
 
 #define KVM_REG_SIZE(id)						\
 	(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 815cc118c675f..05da3c8f7e88f 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -32,6 +32,8 @@ menuconfig KVM
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
+	select HAVE_KVM_DIRTY_RING_ACQ_REL
+	select NEED_KVM_DIRTY_RING_WITH_BITMAP
 	select HAVE_KVM_MSI
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQ_ROUTING
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 94d33e296e10c..6b097605e38ce 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -746,6 +746,9 @@ static int check_vcpu_requests(struct kvm_vcpu *vcpu)
 
 		if (kvm_check_request(KVM_REQ_SUSPEND, vcpu))
 			return kvm_vcpu_suspend(vcpu);
+
+		if (kvm_dirty_ring_check_request(vcpu))
+			return 0;
 	}
 
 	return 1;
diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c
index 733b53055f976..94a666dd1443d 100644
--- a/arch/arm64/kvm/vgic/vgic-its.c
+++ b/arch/arm64/kvm/vgic/vgic-its.c
@@ -2743,6 +2743,7 @@ static int vgic_its_has_attr(struct kvm_device *dev,
 static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
 {
 	const struct vgic_its_abi *abi = vgic_its_get_abi(its);
+	struct vgic_dist *dist = &kvm->arch.vgic;
 	int ret = 0;
 
 	if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */
@@ -2762,7 +2763,9 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
 		vgic_its_reset(kvm, its);
 		break;
 	case KVM_DEV_ARM_ITS_SAVE_TABLES:
+		dist->save_its_tables_in_progress = true;
 		ret = abi->save_tables(its);
+		dist->save_its_tables_in_progress = false;
 		break;
 	case KVM_DEV_ARM_ITS_RESTORE_TABLES:
 		ret = abi->restore_tables(its);
@@ -2775,6 +2778,23 @@ static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr)
 	return ret;
 }
 
+/*
+ * kvm_arch_allow_write_without_running_vcpu - allow writing guest memory
+ * without the running VCPU when dirty ring is enabled.
+ *
+ * The running VCPU is required to track dirty guest pages when dirty ring
+ * is enabled. Otherwise, the backup bitmap should be used to track the
+ * dirty guest pages. When vgic/its tables are being saved, the backup
+ * bitmap is used to track the dirty guest pages due to the missed running
+ * VCPU in the period.
+ */
+bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm)
+{
+	struct vgic_dist *dist = &kvm->arch.vgic;
+
+	return dist->save_its_tables_in_progress;
+}
+
 static int vgic_its_set_attr(struct kvm_device *dev,
 			     struct kvm_device_attr *attr)
 {
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index 4df9e73a8bb5f..9270cd87da3fc 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -263,6 +263,7 @@ struct vgic_dist {
 	struct vgic_io_device	dist_iodev;
 
 	bool			has_its;
+	bool			save_its_tables_in_progress;
 
 	/*
 	 * Contains the attributes and gpa of the LPI configuration table.
-- 
GitLab


From a737f5ffb1e883e580730122be11c9eb832a7749 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:12 +0800
Subject: [PATCH 1256/4122] KVM: selftests: Use host page size to map ring
 buffer in dirty_log_test

In vcpu_map_dirty_ring(), the guest's page size is used to figure out
the offset in the virtual area. It works fine when we have same page
sizes on host and guest. However, it fails when the page sizes on host
and guest are different on arm64, like below error messages indicates.

  # ./dirty_log_test -M dirty-ring -m 7
  Setting log mode to: 'dirty-ring'
  Test iterations: 32, interval: 10 (ms)
  Testing guest mode: PA-bits:40,  VA-bits:48, 64K pages
  guest physical test memory offset: 0xffbffc0000
  vcpu stops because vcpu is kicked out...
  Notifying vcpu to continue
  vcpu continues now.
  ==== Test Assertion Failure ====
  lib/kvm_util.c:1477: addr == MAP_FAILED
  pid=9000 tid=9000 errno=0 - Success
  1  0x0000000000405f5b: vcpu_map_dirty_ring at kvm_util.c:1477
  2  0x0000000000402ebb: dirty_ring_collect_dirty_pages at dirty_log_test.c:349
  3  0x00000000004029b3: log_mode_collect_dirty_pages at dirty_log_test.c:478
  4  (inlined by) run_test at dirty_log_test.c:778
  5  (inlined by) run_test at dirty_log_test.c:691
  6  0x0000000000403a57: for_each_guest_mode at guest_modes.c:105
  7  0x0000000000401ccf: main at dirty_log_test.c:921
  8  0x0000ffffb06ec79b: ?? ??:0
  9  0x0000ffffb06ec86b: ?? ??:0
  10 0x0000000000401def: _start at ??:?
  Dirty ring mapped private

Fix the issue by using host's page size to map the ring buffer.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-6-gshan@redhat.com
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f1cb1627161fd..89a1a420ebd5e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1506,7 +1506,7 @@ struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
 
 void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
 {
-	uint32_t page_size = vcpu->vm->page_size;
+	uint32_t page_size = getpagesize();
 	uint32_t size = vcpu->vm->dirty_ring_size;
 
 	TEST_ASSERT(size > 0, "Should enable dirty ring first");
-- 
GitLab


From 7167190ddb863bd061c0c6b61f4cec94184b40da Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:13 +0800
Subject: [PATCH 1257/4122] KVM: selftests: Clear dirty ring states between two
 modes in dirty_log_test

There are two states, which need to be cleared before next mode
is executed. Otherwise, we will hit failure as the following messages
indicate.

- The variable 'dirty_ring_vcpu_ring_full' shared by main and vcpu
  thread. It's indicating if the vcpu exit due to full ring buffer.
  The value can be carried from previous mode (VM_MODE_P40V48_4K) to
  current one (VM_MODE_P40V48_64K) when VM_MODE_P40V48_16K isn't
  supported.

- The current ring buffer index needs to be reset before next mode
  (VM_MODE_P40V48_64K) is executed. Otherwise, the stale value is
  carried from previous mode (VM_MODE_P40V48_4K).

  # ./dirty_log_test -M dirty-ring
  Setting log mode to: 'dirty-ring'
  Test iterations: 32, interval: 10 (ms)
  Testing guest mode: PA-bits:40,  VA-bits:48,  4K pages
  guest physical test memory offset: 0xffbfffc000
    :
  Dirtied 995328 pages
  Total bits checked: dirty (1012434), clear (7114123), track_next (966700)
  Testing guest mode: PA-bits:40,  VA-bits:48, 64K pages
  guest physical test memory offset: 0xffbffc0000
  vcpu stops because vcpu is kicked out...
  vcpu continues now.
  Notifying vcpu to continue
  Iteration 1 collected 0 pages
  vcpu stops because dirty ring is full...
  vcpu continues now.
  vcpu stops because dirty ring is full...
  vcpu continues now.
  vcpu stops because dirty ring is full...
  ==== Test Assertion Failure ====
  dirty_log_test.c:369: cleared == count
  pid=10541 tid=10541 errno=22 - Invalid argument
     1	0x0000000000403087: dirty_ring_collect_dirty_pages at dirty_log_test.c:369
     2	0x0000000000402a0b: log_mode_collect_dirty_pages at dirty_log_test.c:492
     3	 (inlined by) run_test at dirty_log_test.c:795
     4	 (inlined by) run_test at dirty_log_test.c:705
     5	0x0000000000403a37: for_each_guest_mode at guest_modes.c:100
     6	0x0000000000401ccf: main at dirty_log_test.c:938
     7	0x0000ffff9ecd279b: ?? ??:0
     8	0x0000ffff9ecd286b: ?? ??:0
     9	0x0000000000401def: _start at ??:?
  Reset dirty pages (0) mismatch with collected (35566)

Fix the issues by clearing 'dirty_ring_vcpu_ring_full' and the ring
buffer index before next new mode is to be executed.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-7-gshan@redhat.com
---
 tools/testing/selftests/kvm/dirty_log_test.c | 27 ++++++++++++--------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index b5234d6efbe15..8758c10ec8503 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -226,13 +226,15 @@ static void clear_log_create_vm_done(struct kvm_vm *vm)
 }
 
 static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
-					  void *bitmap, uint32_t num_pages)
+					  void *bitmap, uint32_t num_pages,
+					  uint32_t *unused)
 {
 	kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
 }
 
 static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
-					  void *bitmap, uint32_t num_pages)
+					  void *bitmap, uint32_t num_pages,
+					  uint32_t *unused)
 {
 	kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
 	kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
@@ -329,10 +331,9 @@ static void dirty_ring_continue_vcpu(void)
 }
 
 static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
-					   void *bitmap, uint32_t num_pages)
+					   void *bitmap, uint32_t num_pages,
+					   uint32_t *ring_buf_idx)
 {
-	/* We only have one vcpu */
-	static uint32_t fetch_index = 0;
 	uint32_t count = 0, cleared;
 	bool continued_vcpu = false;
 
@@ -349,7 +350,8 @@ static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
 
 	/* Only have one vcpu */
 	count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
-				       slot, bitmap, num_pages, &fetch_index);
+				       slot, bitmap, num_pages,
+				       ring_buf_idx);
 
 	cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
 
@@ -406,7 +408,8 @@ struct log_mode {
 	void (*create_vm_done)(struct kvm_vm *vm);
 	/* Hook to collect the dirty pages into the bitmap provided */
 	void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
-				     void *bitmap, uint32_t num_pages);
+				     void *bitmap, uint32_t num_pages,
+				     uint32_t *ring_buf_idx);
 	/* Hook to call when after each vcpu run */
 	void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
 	void (*before_vcpu_join) (void);
@@ -471,13 +474,14 @@ static void log_mode_create_vm_done(struct kvm_vm *vm)
 }
 
 static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
-					 void *bitmap, uint32_t num_pages)
+					 void *bitmap, uint32_t num_pages,
+					 uint32_t *ring_buf_idx)
 {
 	struct log_mode *mode = &log_modes[host_log_mode];
 
 	TEST_ASSERT(mode->collect_dirty_pages != NULL,
 		    "collect_dirty_pages() is required for any log mode!");
-	mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages);
+	mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
 }
 
 static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
@@ -696,6 +700,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	unsigned long *bmap;
+	uint32_t ring_buf_idx = 0;
 
 	if (!log_mode_supported()) {
 		print_skip("Log mode '%s' not supported",
@@ -771,6 +776,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	host_dirty_count = 0;
 	host_clear_count = 0;
 	host_track_next_count = 0;
+	WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
 
 	pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
 
@@ -778,7 +784,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		/* Give the vcpu thread some time to dirty some pages */
 		usleep(p->interval * 1000);
 		log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
-					     bmap, host_num_pages);
+					     bmap, host_num_pages,
+					     &ring_buf_idx);
 
 		/*
 		 * See vcpu_sync_stop_requested definition for details on why
-- 
GitLab


From dc6df7d4d0633e65850d5372ae9f1234bcc6e26e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 10 Nov 2022 18:49:14 +0800
Subject: [PATCH 1258/4122] KVM: selftests: Automate choosing dirty ring size
 in dirty_log_test

In the dirty ring case, we rely on vcpu exit due to full dirty ring
state. On ARM64 system, there are 4096 host pages when the host
page size is 64KB. In this case, the vcpu never exits due to the
full dirty ring state. The similar case is 4KB page size on host
and 64KB page size on guest. The vcpu corrupts same set of host
pages, but the dirty page information isn't collected in the main
thread. This leads to infinite loop as the following log shows.

  # ./dirty_log_test -M dirty-ring -c 65536 -m 5
  Setting log mode to: 'dirty-ring'
  Test iterations: 32, interval: 10 (ms)
  Testing guest mode: PA-bits:40,  VA-bits:48,  4K pages
  guest physical test memory offset: 0xffbffe0000
  vcpu stops because vcpu is kicked out...
  Notifying vcpu to continue
  vcpu continues now.
  Iteration 1 collected 576 pages
  <No more output afterwards>

Fix the issue by automatically choosing the best dirty ring size,
to ensure vcpu exit due to full dirty ring state. The option '-c'
becomes a hint to the dirty ring count, instead of the value of it.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110104914.31280-8-gshan@redhat.com
---
 tools/testing/selftests/kvm/dirty_log_test.c | 26 +++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 8758c10ec8503..a87e5f78ebf10 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -24,6 +24,9 @@
 #include "guest_modes.h"
 #include "processor.h"
 
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K  12
+
 /* The memory slot index to track dirty pages */
 #define TEST_MEM_SLOT_INDEX		1
 
@@ -273,6 +276,24 @@ static bool dirty_ring_supported(void)
 
 static void dirty_ring_create_vm_done(struct kvm_vm *vm)
 {
+	uint64_t pages;
+	uint32_t limit;
+
+	/*
+	 * We rely on vcpu exit due to full dirty ring state. Adjust
+	 * the ring buffer size to ensure we're able to reach the
+	 * full dirty ring state.
+	 */
+	pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
+	pages = vm_adjust_num_guest_pages(vm->mode, pages);
+	if (vm->page_size < getpagesize())
+		pages = vm_num_host_pages(vm->mode, pages);
+
+	limit = 1 << (31 - __builtin_clz(pages));
+	test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
+	test_dirty_ring_count = min(limit, test_dirty_ring_count);
+	pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
+
 	/*
 	 * Switch to dirty ring mode after VM creation but before any
 	 * of the vcpu creation.
@@ -685,9 +706,6 @@ static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
 	return vm;
 }
 
-#define DIRTY_MEM_BITS 30 /* 1G */
-#define PAGE_SHIFT_4K  12
-
 struct test_params {
 	unsigned long iterations;
 	unsigned long interval;
@@ -830,7 +848,7 @@ static void help(char *name)
 	printf("usage: %s [-h] [-i iterations] [-I interval] "
 	       "[-p offset] [-m mode]\n", name);
 	puts("");
-	printf(" -c: specify dirty ring size, in number of entries\n");
+	printf(" -c: hint to dirty ring size, in number of entries\n");
 	printf("     (only useful for dirty-ring test; default: %"PRIu32")\n",
 	       TEST_DIRTY_RING_COUNT);
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
-- 
GitLab


From 8dab99c9eab3162bfb4326c35579a3388dbf68f2 Mon Sep 17 00:00:00 2001
From: Guillaume La Roque <glaroque@baylibre.com>
Date: Mon, 7 Nov 2022 18:29:21 +0100
Subject: [PATCH 1259/4122] gpio: davinci: add support of module build

Added module build support for the davinci gpio driver

Signed-off-by: Guillaume La Roque <glaroque@baylibre.com>
Signed-off-by: Nicolas Frayer <nfrayer@baylibre.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Kconfig        |  2 +-
 drivers/gpio/gpio-davinci.c | 11 +++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index 4bfedb0109a76..ec7cfd4f52b1e 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -228,7 +228,7 @@ config GPIO_CLPS711X
 	  Say yes here to support GPIO on CLPS711X SoCs.
 
 config GPIO_DAVINCI
-	bool "TI Davinci/Keystone GPIO support"
+	tristate "TI Davinci/Keystone GPIO support"
 	default y if ARCH_DAVINCI
 	depends on (ARM || ARM64) && (ARCH_DAVINCI || ARCH_KEYSTONE || ARCH_K3)
 	help
diff --git a/drivers/gpio/gpio-davinci.c b/drivers/gpio/gpio-davinci.c
index 1018860c83c27..fa51a91afa54f 100644
--- a/drivers/gpio/gpio-davinci.c
+++ b/drivers/gpio/gpio-davinci.c
@@ -727,3 +727,14 @@ static int __init davinci_gpio_drv_reg(void)
 	return platform_driver_register(&davinci_gpio_driver);
 }
 postcore_initcall(davinci_gpio_drv_reg);
+
+static void __exit davinci_gpio_exit(void)
+{
+	platform_driver_unregister(&davinci_gpio_driver);
+}
+module_exit(davinci_gpio_exit);
+
+MODULE_AUTHOR("Jan Kotas <jank@cadence.com>");
+MODULE_DESCRIPTION("DAVINCI GPIO driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:gpio-davinci");
-- 
GitLab


From dfc7a7769ab7f2a2f629c673717ef1fa7b63aa42 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:31 +0000
Subject: [PATCH 1260/4122] KVM: arm64: Combine visitor arguments into a
 context structure

Passing new arguments by value to the visitor callbacks is extremely
inflexible for stuffing new parameters used by only some of the
visitors. Use a context structure instead and pass the pointer through
to the visitor callback.

While at it, redefine the 'flags' parameter to the visitor to contain
the bit indicating the phase of the walk. Pass the entire set of flags
through the context structure such that the walker can communicate
additional state to the visitor callback.

No functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-2-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h  |  15 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  10 +-
 arch/arm64/kvm/hyp/nvhe/setup.c       |  16 +-
 arch/arm64/kvm/hyp/pgtable.c          | 269 +++++++++++++-------------
 4 files changed, 154 insertions(+), 156 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 3252eb50ecfe5..607f9bb8aab4e 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -199,10 +199,17 @@ enum kvm_pgtable_walk_flags {
 	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
 };
 
-typedef int (*kvm_pgtable_visitor_fn_t)(u64 addr, u64 end, u32 level,
-					kvm_pte_t *ptep,
-					enum kvm_pgtable_walk_flags flag,
-					void * const arg);
+struct kvm_pgtable_visit_ctx {
+	kvm_pte_t				*ptep;
+	void					*arg;
+	u64					addr;
+	u64					end;
+	u32					level;
+	enum kvm_pgtable_walk_flags		flags;
+};
+
+typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
+					enum kvm_pgtable_walk_flags visit);
 
 /**
  * struct kvm_pgtable_walker - Hook into a page-table walk.
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 1e78acf9662eb..8f5b6a36a039e 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -417,13 +417,11 @@ struct check_walk_data {
 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte);
 };
 
-static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
-				      kvm_pte_t *ptep,
-				      enum kvm_pgtable_walk_flags flag,
-				      void * const arg)
+static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
+				      enum kvm_pgtable_walk_flags visit)
 {
-	struct check_walk_data *d = arg;
-	kvm_pte_t pte = *ptep;
+	struct check_walk_data *d = ctx->arg;
+	kvm_pte_t pte = *ctx->ptep;
 
 	if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
 		return -EINVAL;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index e8d4ea2fcfa09..a293cf5eba1b1 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -186,15 +186,13 @@ static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
-static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
-					 kvm_pte_t *ptep,
-					 enum kvm_pgtable_walk_flags flag,
-					 void * const arg)
+static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx,
+					 enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = arg;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
 	enum kvm_pgtable_prot prot;
 	enum pkvm_page_state state;
-	kvm_pte_t pte = *ptep;
+	kvm_pte_t pte = *ctx->ptep;
 	phys_addr_t phys;
 
 	if (!kvm_pte_valid(pte))
@@ -205,11 +203,11 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
 	 * was unable to access the hyp_vmemmap and so the buddy allocator has
 	 * initialised the refcount to '1'.
 	 */
-	mm_ops->get_page(ptep);
-	if (flag != KVM_PGTABLE_WALK_LEAF)
+	mm_ops->get_page(ctx->ptep);
+	if (visit != KVM_PGTABLE_WALK_LEAF)
 		return 0;
 
-	if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
+	if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
 		return -EINVAL;
 
 	phys = kvm_pte_to_phys(pte);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index cdf8e76b0be14..900c8b9c0cfcd 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -64,20 +64,20 @@ static bool kvm_phys_is_valid(u64 phys)
 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
 }
 
-static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level)
+static bool kvm_block_mapping_supported(const struct kvm_pgtable_visit_ctx *ctx, u64 phys)
 {
-	u64 granule = kvm_granule_size(level);
+	u64 granule = kvm_granule_size(ctx->level);
 
-	if (!kvm_level_supports_block_mapping(level))
+	if (!kvm_level_supports_block_mapping(ctx->level))
 		return false;
 
-	if (granule > (end - addr))
+	if (granule > (ctx->end - ctx->addr))
 		return false;
 
 	if (kvm_phys_is_valid(phys) && !IS_ALIGNED(phys, granule))
 		return false;
 
-	return IS_ALIGNED(addr, granule);
+	return IS_ALIGNED(ctx->addr, granule);
 }
 
 static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
@@ -172,12 +172,12 @@ static kvm_pte_t kvm_init_invalid_leaf_owner(u8 owner_id)
 	return FIELD_PREP(KVM_INVALID_PTE_OWNER_MASK, owner_id);
 }
 
-static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data, u64 addr,
-				  u32 level, kvm_pte_t *ptep,
-				  enum kvm_pgtable_walk_flags flag)
+static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
+				  const struct kvm_pgtable_visit_ctx *ctx,
+				  enum kvm_pgtable_walk_flags visit)
 {
 	struct kvm_pgtable_walker *walker = data->walker;
-	return walker->cb(addr, data->end, level, ptep, flag, walker->arg);
+	return walker->cb(ctx, visit);
 }
 
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
@@ -186,20 +186,24 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 				      kvm_pte_t *ptep, u32 level)
 {
+	enum kvm_pgtable_walk_flags flags = data->walker->flags;
+	struct kvm_pgtable_visit_ctx ctx = {
+		.ptep	= ptep,
+		.arg	= data->walker->arg,
+		.addr	= data->addr,
+		.end	= data->end,
+		.level	= level,
+		.flags	= flags,
+	};
 	int ret = 0;
-	u64 addr = data->addr;
 	kvm_pte_t *childp, pte = *ptep;
 	bool table = kvm_pte_table(pte, level);
-	enum kvm_pgtable_walk_flags flags = data->walker->flags;
 
-	if (table && (flags & KVM_PGTABLE_WALK_TABLE_PRE)) {
-		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
-					     KVM_PGTABLE_WALK_TABLE_PRE);
-	}
+	if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
+		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
 
-	if (!table && (flags & KVM_PGTABLE_WALK_LEAF)) {
-		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
-					     KVM_PGTABLE_WALK_LEAF);
+	if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
+		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
 		pte = *ptep;
 		table = kvm_pte_table(pte, level);
 	}
@@ -218,10 +222,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 	if (ret)
 		goto out;
 
-	if (flags & KVM_PGTABLE_WALK_TABLE_POST) {
-		ret = kvm_pgtable_visitor_cb(data, addr, level, ptep,
-					     KVM_PGTABLE_WALK_TABLE_POST);
-	}
+	if (ctx.flags & KVM_PGTABLE_WALK_TABLE_POST)
+		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_POST);
 
 out:
 	return ret;
@@ -292,13 +294,13 @@ struct leaf_walk_data {
 	u32		level;
 };
 
-static int leaf_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-		       enum kvm_pgtable_walk_flags flag, void * const arg)
+static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
+		       enum kvm_pgtable_walk_flags visit)
 {
-	struct leaf_walk_data *data = arg;
+	struct leaf_walk_data *data = ctx->arg;
 
-	data->pte   = *ptep;
-	data->level = level;
+	data->pte   = *ctx->ptep;
+	data->level = ctx->level;
 
 	return 0;
 }
@@ -383,47 +385,47 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
 	return prot;
 }
 
-static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level,
-				    kvm_pte_t *ptep, struct hyp_map_data *data)
+static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
+				    struct hyp_map_data *data)
 {
-	kvm_pte_t new, old = *ptep;
-	u64 granule = kvm_granule_size(level), phys = data->phys;
+	kvm_pte_t new, old = *ctx->ptep;
+	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
 
-	if (!kvm_block_mapping_supported(addr, end, phys, level))
+	if (!kvm_block_mapping_supported(ctx, phys))
 		return false;
 
 	data->phys += granule;
-	new = kvm_init_valid_leaf_pte(phys, data->attr, level);
+	new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
 	if (old == new)
 		return true;
 	if (!kvm_pte_valid(old))
-		data->mm_ops->get_page(ptep);
+		data->mm_ops->get_page(ctx->ptep);
 	else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
 		return false;
 
-	smp_store_release(ptep, new);
+	smp_store_release(ctx->ptep, new);
 	return true;
 }
 
-static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			  enum kvm_pgtable_walk_flags flag, void * const arg)
+static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			  enum kvm_pgtable_walk_flags visit)
 {
 	kvm_pte_t *childp;
-	struct hyp_map_data *data = arg;
+	struct hyp_map_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (hyp_map_walker_try_leaf(addr, end, level, ptep, arg))
+	if (hyp_map_walker_try_leaf(ctx, data))
 		return 0;
 
-	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
+	if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
 		return -EINVAL;
 
 	childp = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
 	if (!childp)
 		return -ENOMEM;
 
-	kvm_set_table_pte(ptep, childp, mm_ops);
-	mm_ops->get_page(ptep);
+	kvm_set_table_pte(ctx->ptep, childp, mm_ops);
+	mm_ops->get_page(ctx->ptep);
 	return 0;
 }
 
@@ -456,39 +458,39 @@ struct hyp_unmap_data {
 	struct kvm_pgtable_mm_ops	*mm_ops;
 };
 
-static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			    enum kvm_pgtable_walk_flags flag, void * const arg)
+static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			    enum kvm_pgtable_walk_flags visit)
 {
-	kvm_pte_t pte = *ptep, *childp = NULL;
-	u64 granule = kvm_granule_size(level);
-	struct hyp_unmap_data *data = arg;
+	kvm_pte_t pte = *ctx->ptep, *childp = NULL;
+	u64 granule = kvm_granule_size(ctx->level);
+	struct hyp_unmap_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
 	if (!kvm_pte_valid(pte))
 		return -EINVAL;
 
-	if (kvm_pte_table(pte, level)) {
+	if (kvm_pte_table(pte, ctx->level)) {
 		childp = kvm_pte_follow(pte, mm_ops);
 
 		if (mm_ops->page_count(childp) != 1)
 			return 0;
 
-		kvm_clear_pte(ptep);
+		kvm_clear_pte(ctx->ptep);
 		dsb(ishst);
-		__tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level);
+		__tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
 	} else {
-		if (end - addr < granule)
+		if (ctx->end - ctx->addr < granule)
 			return -EINVAL;
 
-		kvm_clear_pte(ptep);
+		kvm_clear_pte(ctx->ptep);
 		dsb(ishst);
-		__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level);
+		__tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
 		data->unmapped += granule;
 	}
 
 	dsb(ish);
 	isb();
-	mm_ops->put_page(ptep);
+	mm_ops->put_page(ctx->ptep);
 
 	if (childp)
 		mm_ops->put_page(childp);
@@ -532,18 +534,18 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
 	return 0;
 }
 
-static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			   enum kvm_pgtable_walk_flags flag, void * const arg)
+static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			   enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = arg;
-	kvm_pte_t pte = *ptep;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
+	kvm_pte_t pte = *ctx->ptep;
 
 	if (!kvm_pte_valid(pte))
 		return 0;
 
-	mm_ops->put_page(ptep);
+	mm_ops->put_page(ctx->ptep);
 
-	if (kvm_pte_table(pte, level))
+	if (kvm_pte_table(pte, ctx->level))
 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
 
 	return 0;
@@ -682,19 +684,19 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
 	return !!pte;
 }
 
-static void stage2_put_pte(kvm_pte_t *ptep, struct kvm_s2_mmu *mmu, u64 addr,
-			   u32 level, struct kvm_pgtable_mm_ops *mm_ops)
+static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
+			   struct kvm_pgtable_mm_ops *mm_ops)
 {
 	/*
 	 * Clear the existing PTE, and perform break-before-make with
 	 * TLB maintenance if it was valid.
 	 */
-	if (kvm_pte_valid(*ptep)) {
-		kvm_clear_pte(ptep);
-		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, addr, level);
+	if (kvm_pte_valid(*ctx->ptep)) {
+		kvm_clear_pte(ctx->ptep);
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
 	}
 
-	mm_ops->put_page(ptep);
+	mm_ops->put_page(ctx->ptep);
 }
 
 static bool stage2_pte_cacheable(struct kvm_pgtable *pgt, kvm_pte_t pte)
@@ -708,29 +710,28 @@ static bool stage2_pte_executable(kvm_pte_t pte)
 	return !(pte & KVM_PTE_LEAF_ATTR_HI_S2_XN);
 }
 
-static bool stage2_leaf_mapping_allowed(u64 addr, u64 end, u32 level,
+static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
 					struct stage2_map_data *data)
 {
-	if (data->force_pte && (level < (KVM_PGTABLE_MAX_LEVELS - 1)))
+	if (data->force_pte && (ctx->level < (KVM_PGTABLE_MAX_LEVELS - 1)))
 		return false;
 
-	return kvm_block_mapping_supported(addr, end, data->phys, level);
+	return kvm_block_mapping_supported(ctx, data->phys);
 }
 
-static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
-				      kvm_pte_t *ptep,
+static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				      struct stage2_map_data *data)
 {
-	kvm_pte_t new, old = *ptep;
-	u64 granule = kvm_granule_size(level), phys = data->phys;
+	kvm_pte_t new, old = *ctx->ptep;
+	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
 	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
+	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return -E2BIG;
 
 	if (kvm_phys_is_valid(phys))
-		new = kvm_init_valid_leaf_pte(phys, data->attr, level);
+		new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
 	else
 		new = kvm_init_invalid_leaf_owner(data->owner_id);
 
@@ -744,7 +745,7 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 		if (!stage2_pte_needs_update(old, new))
 			return -EAGAIN;
 
-		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
+		stage2_put_pte(ctx, data->mmu, mm_ops);
 	}
 
 	/* Perform CMOs before installation of the guest stage-2 PTE */
@@ -755,26 +756,25 @@ static int stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
 	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
 		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
 
-	smp_store_release(ptep, new);
+	smp_store_release(ctx->ptep, new);
 	if (stage2_pte_is_counted(new))
-		mm_ops->get_page(ptep);
+		mm_ops->get_page(ctx->ptep);
 	if (kvm_phys_is_valid(phys))
 		data->phys += granule;
 	return 0;
 }
 
-static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
-				     kvm_pte_t *ptep,
+static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 				     struct stage2_map_data *data)
 {
 	if (data->anchor)
 		return 0;
 
-	if (!stage2_leaf_mapping_allowed(addr, end, level, data))
+	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return 0;
 
-	data->childp = kvm_pte_follow(*ptep, data->mm_ops);
-	kvm_clear_pte(ptep);
+	data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops);
+	kvm_clear_pte(ctx->ptep);
 
 	/*
 	 * Invalidate the whole stage-2, as we may have numerous leaf
@@ -782,29 +782,29 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
 	 * individually.
 	 */
 	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
-	data->anchor = ptep;
+	data->anchor = ctx->ptep;
 	return 0;
 }
 
-static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				struct stage2_map_data *data)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
-	kvm_pte_t *childp, pte = *ptep;
+	kvm_pte_t *childp, pte = *ctx->ptep;
 	int ret;
 
 	if (data->anchor) {
 		if (stage2_pte_is_counted(pte))
-			mm_ops->put_page(ptep);
+			mm_ops->put_page(ctx->ptep);
 
 		return 0;
 	}
 
-	ret = stage2_map_walker_try_leaf(addr, end, level, ptep, data);
+	ret = stage2_map_walker_try_leaf(ctx, data);
 	if (ret != -E2BIG)
 		return ret;
 
-	if (WARN_ON(level == KVM_PGTABLE_MAX_LEVELS - 1))
+	if (WARN_ON(ctx->level == KVM_PGTABLE_MAX_LEVELS - 1))
 		return -EINVAL;
 
 	if (!data->memcache)
@@ -820,16 +820,15 @@ static int stage2_map_walk_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	 * will be mapped lazily.
 	 */
 	if (stage2_pte_is_counted(pte))
-		stage2_put_pte(ptep, data->mmu, addr, level, mm_ops);
+		stage2_put_pte(ctx, data->mmu, mm_ops);
 
-	kvm_set_table_pte(ptep, childp, mm_ops);
-	mm_ops->get_page(ptep);
+	kvm_set_table_pte(ctx->ptep, childp, mm_ops);
+	mm_ops->get_page(ctx->ptep);
 
 	return 0;
 }
 
-static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
-				      kvm_pte_t *ptep,
+static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
 				      struct stage2_map_data *data)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
@@ -839,17 +838,17 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
 	if (!data->anchor)
 		return 0;
 
-	if (data->anchor == ptep) {
+	if (data->anchor == ctx->ptep) {
 		childp = data->childp;
 		data->anchor = NULL;
 		data->childp = NULL;
-		ret = stage2_map_walk_leaf(addr, end, level, ptep, data);
+		ret = stage2_map_walk_leaf(ctx, data);
 	} else {
-		childp = kvm_pte_follow(*ptep, mm_ops);
+		childp = kvm_pte_follow(*ctx->ptep, mm_ops);
 	}
 
 	mm_ops->put_page(childp);
-	mm_ops->put_page(ptep);
+	mm_ops->put_page(ctx->ptep);
 
 	return ret;
 }
@@ -873,18 +872,18 @@ static int stage2_map_walk_table_post(u64 addr, u64 end, u32 level,
  * the page-table, installing the block entry when it revisits the anchor
  * pointer and clearing the anchor to NULL.
  */
-static int stage2_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			     enum kvm_pgtable_walk_flags flag, void * const arg)
+static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			     enum kvm_pgtable_walk_flags visit)
 {
-	struct stage2_map_data *data = arg;
+	struct stage2_map_data *data = ctx->arg;
 
-	switch (flag) {
+	switch (visit) {
 	case KVM_PGTABLE_WALK_TABLE_PRE:
-		return stage2_map_walk_table_pre(addr, end, level, ptep, data);
+		return stage2_map_walk_table_pre(ctx, data);
 	case KVM_PGTABLE_WALK_LEAF:
-		return stage2_map_walk_leaf(addr, end, level, ptep, data);
+		return stage2_map_walk_leaf(ctx, data);
 	case KVM_PGTABLE_WALK_TABLE_POST:
-		return stage2_map_walk_table_post(addr, end, level, ptep, data);
+		return stage2_map_walk_table_post(ctx, data);
 	}
 
 	return -EINVAL;
@@ -949,25 +948,24 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	return ret;
 }
 
-static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			       enum kvm_pgtable_walk_flags flag,
-			       void * const arg)
+static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			       enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable *pgt = arg;
+	struct kvm_pgtable *pgt = ctx->arg;
 	struct kvm_s2_mmu *mmu = pgt->mmu;
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
-	kvm_pte_t pte = *ptep, *childp = NULL;
+	kvm_pte_t pte = *ctx->ptep, *childp = NULL;
 	bool need_flush = false;
 
 	if (!kvm_pte_valid(pte)) {
 		if (stage2_pte_is_counted(pte)) {
-			kvm_clear_pte(ptep);
-			mm_ops->put_page(ptep);
+			kvm_clear_pte(ctx->ptep);
+			mm_ops->put_page(ctx->ptep);
 		}
 		return 0;
 	}
 
-	if (kvm_pte_table(pte, level)) {
+	if (kvm_pte_table(pte, ctx->level)) {
 		childp = kvm_pte_follow(pte, mm_ops);
 
 		if (mm_ops->page_count(childp) != 1)
@@ -981,11 +979,11 @@ static int stage2_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 	 * block entry and rely on the remaining portions being faulted
 	 * back lazily.
 	 */
-	stage2_put_pte(ptep, mmu, addr, level, mm_ops);
+	stage2_put_pte(ctx, mmu, mm_ops);
 
 	if (need_flush && mm_ops->dcache_clean_inval_poc)
 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
-					       kvm_granule_size(level));
+					       kvm_granule_size(ctx->level));
 
 	if (childp)
 		mm_ops->put_page(childp);
@@ -1012,18 +1010,17 @@ struct stage2_attr_data {
 	struct kvm_pgtable_mm_ops	*mm_ops;
 };
 
-static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			      enum kvm_pgtable_walk_flags flag,
-			      void * const arg)
+static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			      enum kvm_pgtable_walk_flags visit)
 {
-	kvm_pte_t pte = *ptep;
-	struct stage2_attr_data *data = arg;
+	kvm_pte_t pte = *ctx->ptep;
+	struct stage2_attr_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
 	if (!kvm_pte_valid(pte))
 		return 0;
 
-	data->level = level;
+	data->level = ctx->level;
 	data->pte = pte;
 	pte &= ~data->attr_clr;
 	pte |= data->attr_set;
@@ -1039,10 +1036,10 @@ static int stage2_attr_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 		 * stage-2 PTE if we are going to add executable permission.
 		 */
 		if (mm_ops->icache_inval_pou &&
-		    stage2_pte_executable(pte) && !stage2_pte_executable(*ptep))
+		    stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep))
 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
-						  kvm_granule_size(level));
-		WRITE_ONCE(*ptep, pte);
+						  kvm_granule_size(ctx->level));
+		WRITE_ONCE(*ctx->ptep, pte);
 	}
 
 	return 0;
@@ -1140,20 +1137,19 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 	return ret;
 }
 
-static int stage2_flush_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			       enum kvm_pgtable_walk_flags flag,
-			       void * const arg)
+static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			       enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable *pgt = arg;
+	struct kvm_pgtable *pgt = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
-	kvm_pte_t pte = *ptep;
+	kvm_pte_t pte = *ctx->ptep;
 
 	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
 		return 0;
 
 	if (mm_ops->dcache_clean_inval_poc)
 		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
-					       kvm_granule_size(level));
+					       kvm_granule_size(ctx->level));
 	return 0;
 }
 
@@ -1200,19 +1196,18 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 	return 0;
 }
 
-static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
-			      enum kvm_pgtable_walk_flags flag,
-			      void * const arg)
+static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
+			      enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = arg;
-	kvm_pte_t pte = *ptep;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
+	kvm_pte_t pte = *ctx->ptep;
 
 	if (!stage2_pte_is_counted(pte))
 		return 0;
 
-	mm_ops->put_page(ptep);
+	mm_ops->put_page(ctx->ptep);
 
-	if (kvm_pte_table(pte, level))
+	if (kvm_pte_table(pte, ctx->level))
 		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
 
 	return 0;
-- 
GitLab


From 83844a2317ecad935f6735abd854e4bf3f757040 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:32 +0000
Subject: [PATCH 1261/4122] KVM: arm64: Stash observed pte value in visitor
 context

Rather than reading the ptep all over the shop, read the ptep once from
__kvm_pgtable_visit() and stick it in the visitor context. Reread the
ptep after visiting a leaf in case the callback installed a new table
underneath.

No functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-3-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h  |  1 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  5 +-
 arch/arm64/kvm/hyp/nvhe/setup.c       |  7 +--
 arch/arm64/kvm/hyp/pgtable.c          | 86 +++++++++++++--------------
 4 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 607f9bb8aab4e..14d4b68a1e92c 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -201,6 +201,7 @@ enum kvm_pgtable_walk_flags {
 
 struct kvm_pgtable_visit_ctx {
 	kvm_pte_t				*ptep;
+	kvm_pte_t				old;
 	void					*arg;
 	u64					addr;
 	u64					end;
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8f5b6a36a039e..d21d1b08a055b 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -421,12 +421,11 @@ static int __check_page_state_visitor(const struct kvm_pgtable_visit_ctx *ctx,
 				      enum kvm_pgtable_walk_flags visit)
 {
 	struct check_walk_data *d = ctx->arg;
-	kvm_pte_t pte = *ctx->ptep;
 
-	if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
+	if (kvm_pte_valid(ctx->old) && !addr_is_memory(kvm_pte_to_phys(ctx->old)))
 		return -EINVAL;
 
-	return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
+	return d->get_page_state(ctx->old) == d->desired ? 0 : -EPERM;
 }
 
 static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index a293cf5eba1b1..6af443c9d78e5 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -192,10 +192,9 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
 	enum kvm_pgtable_prot prot;
 	enum pkvm_page_state state;
-	kvm_pte_t pte = *ctx->ptep;
 	phys_addr_t phys;
 
-	if (!kvm_pte_valid(pte))
+	if (!kvm_pte_valid(ctx->old))
 		return 0;
 
 	/*
@@ -210,7 +209,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx
 	if (ctx->level != (KVM_PGTABLE_MAX_LEVELS - 1))
 		return -EINVAL;
 
-	phys = kvm_pte_to_phys(pte);
+	phys = kvm_pte_to_phys(ctx->old);
 	if (!addr_is_memory(phys))
 		return -EINVAL;
 
@@ -218,7 +217,7 @@ static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx
 	 * Adjust the host stage-2 mappings to match the ownership attributes
 	 * configured in the hypervisor stage-1.
 	 */
-	state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
+	state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(ctx->old));
 	switch (state) {
 	case PKVM_PAGE_OWNED:
 		return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 900c8b9c0cfcd..fb3696b3a9976 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -189,6 +189,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
 	struct kvm_pgtable_visit_ctx ctx = {
 		.ptep	= ptep,
+		.old	= READ_ONCE(*ptep),
 		.arg	= data->walker->arg,
 		.addr	= data->addr,
 		.end	= data->end,
@@ -196,16 +197,16 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		.flags	= flags,
 	};
 	int ret = 0;
-	kvm_pte_t *childp, pte = *ptep;
-	bool table = kvm_pte_table(pte, level);
+	kvm_pte_t *childp;
+	bool table = kvm_pte_table(ctx.old, level);
 
 	if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
 		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_TABLE_PRE);
 
 	if (!table && (ctx.flags & KVM_PGTABLE_WALK_LEAF)) {
 		ret = kvm_pgtable_visitor_cb(data, &ctx, KVM_PGTABLE_WALK_LEAF);
-		pte = *ptep;
-		table = kvm_pte_table(pte, level);
+		ctx.old = READ_ONCE(*ptep);
+		table = kvm_pte_table(ctx.old, level);
 	}
 
 	if (ret)
@@ -217,7 +218,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		goto out;
 	}
 
-	childp = kvm_pte_follow(pte, data->pgt->mm_ops);
+	childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops);
 	ret = __kvm_pgtable_walk(data, childp, level + 1);
 	if (ret)
 		goto out;
@@ -299,7 +300,7 @@ static int leaf_walker(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	struct leaf_walk_data *data = ctx->arg;
 
-	data->pte   = *ctx->ptep;
+	data->pte   = ctx->old;
 	data->level = ctx->level;
 
 	return 0;
@@ -388,7 +389,7 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte)
 static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				    struct hyp_map_data *data)
 {
-	kvm_pte_t new, old = *ctx->ptep;
+	kvm_pte_t new;
 	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
 
 	if (!kvm_block_mapping_supported(ctx, phys))
@@ -396,11 +397,11 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 
 	data->phys += granule;
 	new = kvm_init_valid_leaf_pte(phys, data->attr, ctx->level);
-	if (old == new)
+	if (ctx->old == new)
 		return true;
-	if (!kvm_pte_valid(old))
+	if (!kvm_pte_valid(ctx->old))
 		data->mm_ops->get_page(ctx->ptep);
-	else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
+	else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
 		return false;
 
 	smp_store_release(ctx->ptep, new);
@@ -461,16 +462,16 @@ struct hyp_unmap_data {
 static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			    enum kvm_pgtable_walk_flags visit)
 {
-	kvm_pte_t pte = *ctx->ptep, *childp = NULL;
+	kvm_pte_t *childp = NULL;
 	u64 granule = kvm_granule_size(ctx->level);
 	struct hyp_unmap_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (!kvm_pte_valid(pte))
+	if (!kvm_pte_valid(ctx->old))
 		return -EINVAL;
 
-	if (kvm_pte_table(pte, ctx->level)) {
-		childp = kvm_pte_follow(pte, mm_ops);
+	if (kvm_pte_table(ctx->old, ctx->level)) {
+		childp = kvm_pte_follow(ctx->old, mm_ops);
 
 		if (mm_ops->page_count(childp) != 1)
 			return 0;
@@ -538,15 +539,14 @@ static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			   enum kvm_pgtable_walk_flags visit)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
-	kvm_pte_t pte = *ctx->ptep;
 
-	if (!kvm_pte_valid(pte))
+	if (!kvm_pte_valid(ctx->old))
 		return 0;
 
 	mm_ops->put_page(ctx->ptep);
 
-	if (kvm_pte_table(pte, ctx->level))
-		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
+	if (kvm_pte_table(ctx->old, ctx->level))
+		mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
 
 	return 0;
 }
@@ -691,7 +691,7 @@ static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s
 	 * Clear the existing PTE, and perform break-before-make with
 	 * TLB maintenance if it was valid.
 	 */
-	if (kvm_pte_valid(*ctx->ptep)) {
+	if (kvm_pte_valid(ctx->old)) {
 		kvm_clear_pte(ctx->ptep);
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
 	}
@@ -722,7 +722,7 @@ static bool stage2_leaf_mapping_allowed(const struct kvm_pgtable_visit_ctx *ctx,
 static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				      struct stage2_map_data *data)
 {
-	kvm_pte_t new, old = *ctx->ptep;
+	kvm_pte_t new;
 	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
 	struct kvm_pgtable *pgt = data->mmu->pgt;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
@@ -735,14 +735,14 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	else
 		new = kvm_init_invalid_leaf_owner(data->owner_id);
 
-	if (stage2_pte_is_counted(old)) {
+	if (stage2_pte_is_counted(ctx->old)) {
 		/*
 		 * Skip updating the PTE if we are trying to recreate the exact
 		 * same mapping or only change the access permissions. Instead,
 		 * the vCPU will exit one more time from guest if still needed
 		 * and then go through the path of relaxing permissions.
 		 */
-		if (!stage2_pte_needs_update(old, new))
+		if (!stage2_pte_needs_update(ctx->old, new))
 			return -EAGAIN;
 
 		stage2_put_pte(ctx, data->mmu, mm_ops);
@@ -773,7 +773,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return 0;
 
-	data->childp = kvm_pte_follow(*ctx->ptep, data->mm_ops);
+	data->childp = kvm_pte_follow(ctx->old, data->mm_ops);
 	kvm_clear_pte(ctx->ptep);
 
 	/*
@@ -790,11 +790,11 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				struct stage2_map_data *data)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
-	kvm_pte_t *childp, pte = *ctx->ptep;
+	kvm_pte_t *childp;
 	int ret;
 
 	if (data->anchor) {
-		if (stage2_pte_is_counted(pte))
+		if (stage2_pte_is_counted(ctx->old))
 			mm_ops->put_page(ctx->ptep);
 
 		return 0;
@@ -819,7 +819,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	 * a table. Accesses beyond 'end' that fall within the new table
 	 * will be mapped lazily.
 	 */
-	if (stage2_pte_is_counted(pte))
+	if (stage2_pte_is_counted(ctx->old))
 		stage2_put_pte(ctx, data->mmu, mm_ops);
 
 	kvm_set_table_pte(ctx->ptep, childp, mm_ops);
@@ -844,7 +844,7 @@ static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
 		data->childp = NULL;
 		ret = stage2_map_walk_leaf(ctx, data);
 	} else {
-		childp = kvm_pte_follow(*ctx->ptep, mm_ops);
+		childp = kvm_pte_follow(ctx->old, mm_ops);
 	}
 
 	mm_ops->put_page(childp);
@@ -954,23 +954,23 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	struct kvm_pgtable *pgt = ctx->arg;
 	struct kvm_s2_mmu *mmu = pgt->mmu;
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
-	kvm_pte_t pte = *ctx->ptep, *childp = NULL;
+	kvm_pte_t *childp = NULL;
 	bool need_flush = false;
 
-	if (!kvm_pte_valid(pte)) {
-		if (stage2_pte_is_counted(pte)) {
+	if (!kvm_pte_valid(ctx->old)) {
+		if (stage2_pte_is_counted(ctx->old)) {
 			kvm_clear_pte(ctx->ptep);
 			mm_ops->put_page(ctx->ptep);
 		}
 		return 0;
 	}
 
-	if (kvm_pte_table(pte, ctx->level)) {
-		childp = kvm_pte_follow(pte, mm_ops);
+	if (kvm_pte_table(ctx->old, ctx->level)) {
+		childp = kvm_pte_follow(ctx->old, mm_ops);
 
 		if (mm_ops->page_count(childp) != 1)
 			return 0;
-	} else if (stage2_pte_cacheable(pgt, pte)) {
+	} else if (stage2_pte_cacheable(pgt, ctx->old)) {
 		need_flush = !stage2_has_fwb(pgt);
 	}
 
@@ -982,7 +982,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	stage2_put_pte(ctx, mmu, mm_ops);
 
 	if (need_flush && mm_ops->dcache_clean_inval_poc)
-		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
 					       kvm_granule_size(ctx->level));
 
 	if (childp)
@@ -1013,11 +1013,11 @@ struct stage2_attr_data {
 static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			      enum kvm_pgtable_walk_flags visit)
 {
-	kvm_pte_t pte = *ctx->ptep;
+	kvm_pte_t pte = ctx->old;
 	struct stage2_attr_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
 
-	if (!kvm_pte_valid(pte))
+	if (!kvm_pte_valid(ctx->old))
 		return 0;
 
 	data->level = ctx->level;
@@ -1036,7 +1036,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
 		 * stage-2 PTE if we are going to add executable permission.
 		 */
 		if (mm_ops->icache_inval_pou &&
-		    stage2_pte_executable(pte) && !stage2_pte_executable(*ctx->ptep))
+		    stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old))
 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
 						  kvm_granule_size(ctx->level));
 		WRITE_ONCE(*ctx->ptep, pte);
@@ -1142,13 +1142,12 @@ static int stage2_flush_walker(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	struct kvm_pgtable *pgt = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
-	kvm_pte_t pte = *ctx->ptep;
 
-	if (!kvm_pte_valid(pte) || !stage2_pte_cacheable(pgt, pte))
+	if (!kvm_pte_valid(ctx->old) || !stage2_pte_cacheable(pgt, ctx->old))
 		return 0;
 
 	if (mm_ops->dcache_clean_inval_poc)
-		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(pte, mm_ops),
+		mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
 					       kvm_granule_size(ctx->level));
 	return 0;
 }
@@ -1200,15 +1199,14 @@ static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			      enum kvm_pgtable_walk_flags visit)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
-	kvm_pte_t pte = *ctx->ptep;
 
-	if (!stage2_pte_is_counted(pte))
+	if (!stage2_pte_is_counted(ctx->old))
 		return 0;
 
 	mm_ops->put_page(ctx->ptep);
 
-	if (kvm_pte_table(pte, ctx->level))
-		mm_ops->put_page(kvm_pte_follow(pte, mm_ops));
+	if (kvm_pte_table(ctx->old, ctx->level))
+		mm_ops->put_page(kvm_pte_follow(ctx->old, mm_ops));
 
 	return 0;
 }
-- 
GitLab


From 2a611c7f87f26cca405da63a57f06d0e4dc14240 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:33 +0000
Subject: [PATCH 1262/4122] KVM: arm64: Pass mm_ops through the visitor context

As a prerequisite for getting visitors off of struct kvm_pgtable, pass
mm_ops through the visitor context.

No functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-4-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h |  1 +
 arch/arm64/kvm/hyp/nvhe/setup.c      |  3 +-
 arch/arm64/kvm/hyp/pgtable.c         | 63 +++++++++++-----------------
 3 files changed, 26 insertions(+), 41 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 14d4b68a1e92c..a752793482cb0 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -203,6 +203,7 @@ struct kvm_pgtable_visit_ctx {
 	kvm_pte_t				*ptep;
 	kvm_pte_t				old;
 	void					*arg;
+	struct kvm_pgtable_mm_ops		*mm_ops;
 	u64					addr;
 	u64					end;
 	u32					level;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 6af443c9d78e5..1068338d77f3c 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -189,7 +189,7 @@ static void hpool_put_page(void *addr)
 static int finalize_host_mappings_walker(const struct kvm_pgtable_visit_ctx *ctx,
 					 enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 	enum kvm_pgtable_prot prot;
 	enum pkvm_page_state state;
 	phys_addr_t phys;
@@ -239,7 +239,6 @@ static int finalize_host_mappings(void)
 	struct kvm_pgtable_walker walker = {
 		.cb	= finalize_host_mappings_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
-		.arg	= pkvm_pgtable.mm_ops,
 	};
 	int i, ret;
 
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index fb3696b3a9976..db25e81a9890e 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -181,9 +181,10 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
 }
 
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
-			      kvm_pte_t *pgtable, u32 level);
+			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level);
 
 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
+				      struct kvm_pgtable_mm_ops *mm_ops,
 				      kvm_pte_t *ptep, u32 level)
 {
 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
@@ -191,6 +192,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		.ptep	= ptep,
 		.old	= READ_ONCE(*ptep),
 		.arg	= data->walker->arg,
+		.mm_ops	= mm_ops,
 		.addr	= data->addr,
 		.end	= data->end,
 		.level	= level,
@@ -218,8 +220,8 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		goto out;
 	}
 
-	childp = kvm_pte_follow(ctx.old, data->pgt->mm_ops);
-	ret = __kvm_pgtable_walk(data, childp, level + 1);
+	childp = kvm_pte_follow(ctx.old, mm_ops);
+	ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
 	if (ret)
 		goto out;
 
@@ -231,7 +233,7 @@ out:
 }
 
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
-			      kvm_pte_t *pgtable, u32 level)
+			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level)
 {
 	u32 idx;
 	int ret = 0;
@@ -245,7 +247,7 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
 		if (data->addr >= data->end)
 			break;
 
-		ret = __kvm_pgtable_visit(data, ptep, level);
+		ret = __kvm_pgtable_visit(data, mm_ops, ptep, level);
 		if (ret)
 			break;
 	}
@@ -269,7 +271,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
 	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
 		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
 
-		ret = __kvm_pgtable_walk(data, ptep, pgt->start_level);
+		ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level);
 		if (ret)
 			break;
 	}
@@ -332,7 +334,6 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
 struct hyp_map_data {
 	u64				phys;
 	kvm_pte_t			attr;
-	struct kvm_pgtable_mm_ops	*mm_ops;
 };
 
 static int hyp_set_prot_attr(enum kvm_pgtable_prot prot, kvm_pte_t *ptep)
@@ -400,7 +401,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (ctx->old == new)
 		return true;
 	if (!kvm_pte_valid(ctx->old))
-		data->mm_ops->get_page(ctx->ptep);
+		ctx->mm_ops->get_page(ctx->ptep);
 	else if (WARN_ON((ctx->old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW))
 		return false;
 
@@ -413,7 +414,7 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	kvm_pte_t *childp;
 	struct hyp_map_data *data = ctx->arg;
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (hyp_map_walker_try_leaf(ctx, data))
 		return 0;
@@ -436,7 +437,6 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
 	int ret;
 	struct hyp_map_data map_data = {
 		.phys	= ALIGN_DOWN(phys, PAGE_SIZE),
-		.mm_ops	= pgt->mm_ops,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb	= hyp_map_walker,
@@ -454,18 +454,13 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
 	return ret;
 }
 
-struct hyp_unmap_data {
-	u64				unmapped;
-	struct kvm_pgtable_mm_ops	*mm_ops;
-};
-
 static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			    enum kvm_pgtable_walk_flags visit)
 {
 	kvm_pte_t *childp = NULL;
 	u64 granule = kvm_granule_size(ctx->level);
-	struct hyp_unmap_data *data = ctx->arg;
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	u64 *unmapped = ctx->arg;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (!kvm_pte_valid(ctx->old))
 		return -EINVAL;
@@ -486,7 +481,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 		kvm_clear_pte(ctx->ptep);
 		dsb(ishst);
 		__tlbi_level(vale2is, __TLBI_VADDR(ctx->addr, 0), ctx->level);
-		data->unmapped += granule;
+		*unmapped += granule;
 	}
 
 	dsb(ish);
@@ -501,12 +496,10 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
-	struct hyp_unmap_data unmap_data = {
-		.mm_ops	= pgt->mm_ops,
-	};
+	u64 unmapped = 0;
 	struct kvm_pgtable_walker walker = {
 		.cb	= hyp_unmap_walker,
-		.arg	= &unmap_data,
+		.arg	= &unmapped,
 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
 	};
 
@@ -514,7 +507,7 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 		return 0;
 
 	kvm_pgtable_walk(pgt, addr, size, &walker);
-	return unmap_data.unmapped;
+	return unmapped;
 }
 
 int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
@@ -538,7 +531,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
 static int hyp_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			   enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (!kvm_pte_valid(ctx->old))
 		return 0;
@@ -556,7 +549,6 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 	struct kvm_pgtable_walker walker = {
 		.cb	= hyp_free_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
-		.arg	= pgt->mm_ops,
 	};
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
@@ -575,8 +567,6 @@ struct stage2_map_data {
 	struct kvm_s2_mmu		*mmu;
 	void				*memcache;
 
-	struct kvm_pgtable_mm_ops	*mm_ops;
-
 	/* Force mappings to page granularity */
 	bool				force_pte;
 };
@@ -725,7 +715,7 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	kvm_pte_t new;
 	u64 granule = kvm_granule_size(ctx->level), phys = data->phys;
 	struct kvm_pgtable *pgt = data->mmu->pgt;
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return -E2BIG;
@@ -773,7 +763,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return 0;
 
-	data->childp = kvm_pte_follow(ctx->old, data->mm_ops);
+	data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops);
 	kvm_clear_pte(ctx->ptep);
 
 	/*
@@ -789,7 +779,7 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				struct stage2_map_data *data)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 	kvm_pte_t *childp;
 	int ret;
 
@@ -831,7 +821,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
 				      struct stage2_map_data *data)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 	kvm_pte_t *childp;
 	int ret = 0;
 
@@ -898,7 +888,6 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.phys		= ALIGN_DOWN(phys, PAGE_SIZE),
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
-		.mm_ops		= pgt->mm_ops,
 		.force_pte	= pgt->force_pte_cb && pgt->force_pte_cb(addr, addr + size, prot),
 	};
 	struct kvm_pgtable_walker walker = {
@@ -929,7 +918,6 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.phys		= KVM_PHYS_INVALID,
 		.mmu		= pgt->mmu,
 		.memcache	= mc,
-		.mm_ops		= pgt->mm_ops,
 		.owner_id	= owner_id,
 		.force_pte	= true,
 	};
@@ -953,7 +941,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	struct kvm_pgtable *pgt = ctx->arg;
 	struct kvm_s2_mmu *mmu = pgt->mmu;
-	struct kvm_pgtable_mm_ops *mm_ops = pgt->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 	kvm_pte_t *childp = NULL;
 	bool need_flush = false;
 
@@ -1007,7 +995,6 @@ struct stage2_attr_data {
 	kvm_pte_t			attr_clr;
 	kvm_pte_t			pte;
 	u32				level;
-	struct kvm_pgtable_mm_ops	*mm_ops;
 };
 
 static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
@@ -1015,7 +1002,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
 {
 	kvm_pte_t pte = ctx->old;
 	struct stage2_attr_data *data = ctx->arg;
-	struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (!kvm_pte_valid(ctx->old))
 		return 0;
@@ -1055,7 +1042,6 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 	struct stage2_attr_data data = {
 		.attr_set	= attr_set & attr_mask,
 		.attr_clr	= attr_clr & attr_mask,
-		.mm_ops		= pgt->mm_ops,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_attr_walker,
@@ -1198,7 +1184,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 static int stage2_free_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			      enum kvm_pgtable_walk_flags visit)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = ctx->arg;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
 	if (!stage2_pte_is_counted(ctx->old))
 		return 0;
@@ -1218,7 +1204,6 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 		.cb	= stage2_free_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF |
 			  KVM_PGTABLE_WALK_TABLE_POST,
-		.arg	= pgt->mm_ops,
 	};
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
-- 
GitLab


From fa002e8e79b3f980455ba585c1f47b26680de5b9 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:34 +0000
Subject: [PATCH 1263/4122] KVM: arm64: Don't pass kvm_pgtable through
 kvm_pgtable_walk_data

In order to tear down page tables from outside the context of
kvm_pgtable (such as an RCU callback), stop passing a pointer through
kvm_pgtable_walk_data.

No functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-5-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index db25e81a9890e..93989b750a263 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -50,7 +50,6 @@
 #define KVM_MAX_OWNER_ID		1
 
 struct kvm_pgtable_walk_data {
-	struct kvm_pgtable		*pgt;
 	struct kvm_pgtable_walker	*walker;
 
 	u64				addr;
@@ -88,7 +87,7 @@ static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level)
 	return (data->addr >> shift) & mask;
 }
 
-static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
+static u32 kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
 {
 	u64 shift = kvm_granule_shift(pgt->start_level - 1); /* May underflow */
 	u64 mask = BIT(pgt->ia_bits) - 1;
@@ -96,11 +95,6 @@ static u32 __kvm_pgd_page_idx(struct kvm_pgtable *pgt, u64 addr)
 	return (addr & mask) >> shift;
 }
 
-static u32 kvm_pgd_page_idx(struct kvm_pgtable_walk_data *data)
-{
-	return __kvm_pgd_page_idx(data->pgt, data->addr);
-}
-
 static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
 {
 	struct kvm_pgtable pgt = {
@@ -108,7 +102,7 @@ static u32 kvm_pgd_pages(u32 ia_bits, u32 start_level)
 		.start_level	= start_level,
 	};
 
-	return __kvm_pgd_page_idx(&pgt, -1ULL) + 1;
+	return kvm_pgd_page_idx(&pgt, -1ULL) + 1;
 }
 
 static bool kvm_pte_table(kvm_pte_t pte, u32 level)
@@ -255,11 +249,10 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
 	return ret;
 }
 
-static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
+static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_data *data)
 {
 	u32 idx;
 	int ret = 0;
-	struct kvm_pgtable *pgt = data->pgt;
 	u64 limit = BIT(pgt->ia_bits);
 
 	if (data->addr > limit || data->end > limit)
@@ -268,7 +261,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data)
 	if (!pgt->pgd)
 		return -EINVAL;
 
-	for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) {
+	for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) {
 		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
 
 		ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level);
@@ -283,13 +276,12 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		     struct kvm_pgtable_walker *walker)
 {
 	struct kvm_pgtable_walk_data walk_data = {
-		.pgt	= pgt,
 		.addr	= ALIGN_DOWN(addr, PAGE_SIZE),
 		.end	= PAGE_ALIGN(walk_data.addr + size),
 		.walker	= walker,
 	};
 
-	return _kvm_pgtable_walk(&walk_data);
+	return _kvm_pgtable_walk(pgt, &walk_data);
 }
 
 struct leaf_walk_data {
-- 
GitLab


From 8e94e1252cc054bb31fd3e9a15235cd831970ec1 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:35 +0000
Subject: [PATCH 1264/4122] KVM: arm64: Add a helper to tear down unlinked
 stage-2 subtrees

A subsequent change to KVM will move the tear down of an unlinked
stage-2 subtree out of the critical path of the break-before-make
sequence.

Introduce a new helper for tearing down unlinked stage-2 subtrees.
Leverage the existing stage-2 free walkers to do so, with a deep call
into __kvm_pgtable_walk() as the subtree is no longer reachable from the
root.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-6-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h | 11 +++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 23 +++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index a752793482cb0..93b1feeaebabb 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -333,6 +333,17 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
  */
 void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
 
+/**
+ * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * @mm_ops:	Memory management callbacks.
+ * @pgtable:	Unlinked stage-2 paging structure to be freed.
+ * @level:	Level of the stage-2 paging structure to be freed.
+ *
+ * The page-table is assumed to be unreachable by any hardware walkers prior to
+ * freeing and therefore no TLB invalidation is performed.
+ */
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
 /**
  * kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
  * @pgt:	Page-table structure initialised by kvm_pgtable_stage2_init*().
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 93989b750a263..363a5cce7e1a1 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1203,3 +1203,26 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
 	pgt->pgd = NULL;
 }
+
+void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
+{
+	kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
+	struct kvm_pgtable_walker walker = {
+		.cb	= stage2_free_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF |
+			  KVM_PGTABLE_WALK_TABLE_POST,
+	};
+	struct kvm_pgtable_walk_data data = {
+		.walker	= &walker,
+
+		/*
+		 * At this point the IPA really doesn't matter, as the page
+		 * table being traversed has already been removed from the stage
+		 * 2. Set an appropriate range to cover the entire page table.
+		 */
+		.addr	= 0,
+		.end	= kvm_granule_size(level),
+	};
+
+	WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
+}
-- 
GitLab


From 6b91b8f95cadd3441c056182daf9024475ac4a91 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:36 +0000
Subject: [PATCH 1265/4122] KVM: arm64: Use an opaque type for pteps

Use an opaque type for pteps and require visitors explicitly dereference
the pointer before using. Protecting page table memory with RCU requires
that KVM dereferences RCU-annotated pointers before using. However, RCU
is not available for use in the nVHE hypervisor and the opaque type can
be conditionally annotated with RCU for the stage-2 MMU.

Call the type a 'pteref' to avoid a naming collision with raw pteps. No
functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-7-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h |  9 ++++++++-
 arch/arm64/kvm/hyp/pgtable.c         | 27 ++++++++++++++-------------
 arch/arm64/kvm/mmu.c                 |  2 +-
 3 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 93b1feeaebabb..cbd2851eefc1c 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
+{
+	return pteref;
+}
+
 #define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
@@ -175,7 +182,7 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
 struct kvm_pgtable {
 	u32					ia_bits;
 	u32					start_level;
-	kvm_pte_t				*pgd;
+	kvm_pteref_t				pgd;
 	struct kvm_pgtable_mm_ops		*mm_ops;
 
 	/* Stage-2 only */
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 363a5cce7e1a1..7511494537e5b 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -175,13 +175,14 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
 }
 
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
-			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level);
+			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level);
 
 static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 				      struct kvm_pgtable_mm_ops *mm_ops,
-				      kvm_pte_t *ptep, u32 level)
+				      kvm_pteref_t pteref, u32 level)
 {
 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
+	kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false);
 	struct kvm_pgtable_visit_ctx ctx = {
 		.ptep	= ptep,
 		.old	= READ_ONCE(*ptep),
@@ -193,7 +194,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		.flags	= flags,
 	};
 	int ret = 0;
-	kvm_pte_t *childp;
+	kvm_pteref_t childp;
 	bool table = kvm_pte_table(ctx.old, level);
 
 	if (table && (ctx.flags & KVM_PGTABLE_WALK_TABLE_PRE))
@@ -214,7 +215,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 		goto out;
 	}
 
-	childp = kvm_pte_follow(ctx.old, mm_ops);
+	childp = (kvm_pteref_t)kvm_pte_follow(ctx.old, mm_ops);
 	ret = __kvm_pgtable_walk(data, mm_ops, childp, level + 1);
 	if (ret)
 		goto out;
@@ -227,7 +228,7 @@ out:
 }
 
 static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
-			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pte_t *pgtable, u32 level)
+			      struct kvm_pgtable_mm_ops *mm_ops, kvm_pteref_t pgtable, u32 level)
 {
 	u32 idx;
 	int ret = 0;
@@ -236,12 +237,12 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data,
 		return -EINVAL;
 
 	for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) {
-		kvm_pte_t *ptep = &pgtable[idx];
+		kvm_pteref_t pteref = &pgtable[idx];
 
 		if (data->addr >= data->end)
 			break;
 
-		ret = __kvm_pgtable_visit(data, mm_ops, ptep, level);
+		ret = __kvm_pgtable_visit(data, mm_ops, pteref, level);
 		if (ret)
 			break;
 	}
@@ -262,9 +263,9 @@ static int _kvm_pgtable_walk(struct kvm_pgtable *pgt, struct kvm_pgtable_walk_da
 		return -EINVAL;
 
 	for (idx = kvm_pgd_page_idx(pgt, data->addr); data->addr < data->end; ++idx) {
-		kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE];
+		kvm_pteref_t pteref = &pgt->pgd[idx * PTRS_PER_PTE];
 
-		ret = __kvm_pgtable_walk(data, pgt->mm_ops, ptep, pgt->start_level);
+		ret = __kvm_pgtable_walk(data, pgt->mm_ops, pteref, pgt->start_level);
 		if (ret)
 			break;
 	}
@@ -507,7 +508,7 @@ int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits,
 {
 	u64 levels = ARM64_HW_PGTABLE_LEVELS(va_bits);
 
-	pgt->pgd = (kvm_pte_t *)mm_ops->zalloc_page(NULL);
+	pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_page(NULL);
 	if (!pgt->pgd)
 		return -ENOMEM;
 
@@ -544,7 +545,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 	};
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
-	pgt->mm_ops->put_page(pgt->pgd);
+	pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false));
 	pgt->pgd = NULL;
 }
 
@@ -1157,7 +1158,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
 
 	pgd_sz = kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
-	pgt->pgd = mm_ops->zalloc_pages_exact(pgd_sz);
+	pgt->pgd = (kvm_pteref_t)mm_ops->zalloc_pages_exact(pgd_sz);
 	if (!pgt->pgd)
 		return -ENOMEM;
 
@@ -1200,7 +1201,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
 	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
-	pgt->mm_ops->free_pages_exact(pgt->pgd, pgd_sz);
+	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz);
 	pgt->pgd = NULL;
 }
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 60ee3d9f01f8c..5e197ae190ef9 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -640,7 +640,7 @@ static struct kvm_pgtable_mm_ops kvm_user_mm_ops = {
 static int get_user_mapping_size(struct kvm *kvm, u64 addr)
 {
 	struct kvm_pgtable pgt = {
-		.pgd		= (kvm_pte_t *)kvm->mm->pgd,
+		.pgd		= (kvm_pteref_t)kvm->mm->pgd,
 		.ia_bits	= VA_BITS,
 		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
 				   CONFIG_PGTABLE_LEVELS),
-- 
GitLab


From 5c359cca1faf6d7671537fe1c240e8668467864d Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:37 +0000
Subject: [PATCH 1266/4122] KVM: arm64: Tear down unlinked stage-2 subtree
 after break-before-make

The break-before-make sequence is a bit annoying as it opens a window
wherein memory is unmapped from the guest. KVM should replace the PTE
as quickly as possible and avoid unnecessary work in between.

Presently, the stage-2 map walker tears down a removed table before
installing a block mapping when coalescing a table into a block. As the
removed table is no longer visible to hardware walkers after the
DSB+TLBI, it is possible to move the remaining cleanup to happen after
installing the new PTE.

Reshuffle the stage-2 map walker to install the new block entry in
the pre-order callback. Unwire all of the teardown logic and replace
it with a call to kvm_pgtable_stage2_free_removed() after fixing
the PTE. The post-order visitor is now completely unnecessary, so drop
it. Finally, touch up the comments to better represent the now
simplified map walker.

Note that the call to tear down the unlinked stage-2 is indirected
as a subsequent change will use an RCU callback to trigger tear down.
RCU is not available to pKVM, so there is a need to use different
implementations on pKVM and non-pKVM VMs.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-8-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h  |  3 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  6 ++
 arch/arm64/kvm/hyp/pgtable.c          | 85 +++++++--------------------
 arch/arm64/kvm/mmu.c                  |  8 +++
 4 files changed, 39 insertions(+), 63 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index cbd2851eefc1c..e70cf57b719ec 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -92,6 +92,8 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
  *				allocation is physically contiguous.
  * @free_pages_exact:		Free an exact number of memory pages previously
  *				allocated by zalloc_pages_exact.
+ * @free_removed_table:		Free a removed paging structure by unlinking and
+ *				dropping references.
  * @get_page:			Increment the refcount on a page.
  * @put_page:			Decrement the refcount on a page. When the
  *				refcount reaches 0 the page is automatically
@@ -110,6 +112,7 @@ struct kvm_pgtable_mm_ops {
 	void*		(*zalloc_page)(void *arg);
 	void*		(*zalloc_pages_exact)(size_t size);
 	void		(*free_pages_exact)(void *addr, size_t size);
+	void		(*free_removed_table)(void *addr, u32 level);
 	void		(*get_page)(void *addr);
 	void		(*put_page)(void *addr);
 	int		(*page_count)(void *addr);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index d21d1b08a055b..735769886b55c 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -79,6 +79,11 @@ static void host_s2_put_page(void *addr)
 	hyp_put_page(&host_s2_pool, addr);
 }
 
+static void host_s2_free_removed_table(void *addr, u32 level)
+{
+	kvm_pgtable_stage2_free_removed(&host_kvm.mm_ops, addr, level);
+}
+
 static int prepare_s2_pool(void *pgt_pool_base)
 {
 	unsigned long nr_pages, pfn;
@@ -93,6 +98,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
 	host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
 		.zalloc_page = host_s2_zalloc_page,
+		.free_removed_table = host_s2_free_removed_table,
 		.phys_to_virt = hyp_phys_to_virt,
 		.virt_to_phys = hyp_virt_to_phys,
 		.page_count = hyp_page_count,
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 7511494537e5b..7c9782347570b 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -750,13 +750,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 				     struct stage2_map_data *data)
 {
-	if (data->anchor)
-		return 0;
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+	kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
+	int ret;
 
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return 0;
 
-	data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops);
 	kvm_clear_pte(ctx->ptep);
 
 	/*
@@ -765,8 +765,13 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 	 * individually.
 	 */
 	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
-	data->anchor = ctx->ptep;
-	return 0;
+
+	ret = stage2_map_walker_try_leaf(ctx, data);
+
+	mm_ops->put_page(ctx->ptep);
+	mm_ops->free_removed_table(childp, ctx->level);
+
+	return ret;
 }
 
 static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
@@ -776,13 +781,6 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	kvm_pte_t *childp;
 	int ret;
 
-	if (data->anchor) {
-		if (stage2_pte_is_counted(ctx->old))
-			mm_ops->put_page(ctx->ptep);
-
-		return 0;
-	}
-
 	ret = stage2_map_walker_try_leaf(ctx, data);
 	if (ret != -E2BIG)
 		return ret;
@@ -811,49 +809,14 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	return 0;
 }
 
-static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
-				      struct stage2_map_data *data)
-{
-	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
-	kvm_pte_t *childp;
-	int ret = 0;
-
-	if (!data->anchor)
-		return 0;
-
-	if (data->anchor == ctx->ptep) {
-		childp = data->childp;
-		data->anchor = NULL;
-		data->childp = NULL;
-		ret = stage2_map_walk_leaf(ctx, data);
-	} else {
-		childp = kvm_pte_follow(ctx->old, mm_ops);
-	}
-
-	mm_ops->put_page(childp);
-	mm_ops->put_page(ctx->ptep);
-
-	return ret;
-}
-
 /*
- * This is a little fiddly, as we use all three of the walk flags. The idea
- * is that the TABLE_PRE callback runs for table entries on the way down,
- * looking for table entries which we could conceivably replace with a
- * block entry for this mapping. If it finds one, then it sets the 'anchor'
- * field in 'struct stage2_map_data' to point at the table entry, before
- * clearing the entry to zero and descending into the now detached table.
- *
- * The behaviour of the LEAF callback then depends on whether or not the
- * anchor has been set. If not, then we're not using a block mapping higher
- * up the table and we perform the mapping at the existing leaves instead.
- * If, on the other hand, the anchor _is_ set, then we drop references to
- * all valid leaves so that the pages beneath the anchor can be freed.
+ * The TABLE_PRE callback runs for table entries on the way down, looking
+ * for table entries which we could conceivably replace with a block entry
+ * for this mapping. If it finds one it replaces the entry and calls
+ * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
  *
- * Finally, the TABLE_POST callback does nothing if the anchor has not
- * been set, but otherwise frees the page-table pages while walking back up
- * the page-table, installing the block entry when it revisits the anchor
- * pointer and clearing the anchor to NULL.
+ * Otherwise, the LEAF callback performs the mapping at the existing leaves
+ * instead.
  */
 static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			     enum kvm_pgtable_walk_flags visit)
@@ -865,11 +828,9 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 		return stage2_map_walk_table_pre(ctx, data);
 	case KVM_PGTABLE_WALK_LEAF:
 		return stage2_map_walk_leaf(ctx, data);
-	case KVM_PGTABLE_WALK_TABLE_POST:
-		return stage2_map_walk_table_post(ctx, data);
+	default:
+		return -EINVAL;
 	}
-
-	return -EINVAL;
 }
 
 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -886,8 +847,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
-				  KVM_PGTABLE_WALK_LEAF |
-				  KVM_PGTABLE_WALK_TABLE_POST,
+				  KVM_PGTABLE_WALK_LEAF,
 		.arg		= &map_data,
 	};
 
@@ -917,8 +877,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
 		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
-				  KVM_PGTABLE_WALK_LEAF |
-				  KVM_PGTABLE_WALK_TABLE_POST,
+				  KVM_PGTABLE_WALK_LEAF,
 		.arg		= &map_data,
 	};
 
@@ -1207,7 +1166,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 
 void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
 {
-	kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
+	kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
 	struct kvm_pgtable_walker walker = {
 		.cb	= stage2_free_walker,
 		.flags	= KVM_PGTABLE_WALK_LEAF |
@@ -1225,5 +1184,5 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
 		.end	= kvm_granule_size(level),
 	};
 
-	WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
+	WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
 }
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 5e197ae190ef9..73ae908eb5d93 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -128,6 +128,13 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
 	free_pages_exact(virt, size);
 }
 
+static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
+
+static void stage2_free_removed_table(void *addr, u32 level)
+{
+	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
+}
+
 static void kvm_host_get_page(void *addr)
 {
 	get_page(virt_to_page(addr));
@@ -662,6 +669,7 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
 	.zalloc_page		= stage2_memcache_zalloc_page,
 	.zalloc_pages_exact	= kvm_s2_zalloc_pages_exact,
 	.free_pages_exact	= kvm_s2_free_pages_exact,
+	.free_removed_table	= stage2_free_removed_table,
 	.get_page		= kvm_host_get_page,
 	.put_page		= kvm_s2_put_page,
 	.page_count		= kvm_host_page_count,
-- 
GitLab


From c3119ae45dfb6038ca458ab5ba7a9fba2810845b Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:38 +0000
Subject: [PATCH 1267/4122] KVM: arm64: Protect stage-2 traversal with RCU

Use RCU to safely walk the stage-2 page tables in parallel. Acquire and
release the RCU read lock when traversing the page tables. Defer the
freeing of table memory to an RCU callback. Indirect the calls into RCU
and provide stubs for hypervisor code, as RCU is not available in such a
context.

The RCU protection doesn't amount to much at the moment, as readers are
already protected by the read-write lock (all walkers that free table
memory take the write lock). Nonetheless, a subsequent change will
futher relax the locking requirements around the stage-2 MMU, thereby
depending on RCU.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-9-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h | 49 ++++++++++++++++++++++++++++
 arch/arm64/kvm/hyp/pgtable.c         | 10 +++++-
 arch/arm64/kvm/mmu.c                 | 14 +++++++-
 3 files changed, 71 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index e70cf57b719ec..7634b6964779a 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -37,6 +37,13 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
 typedef kvm_pte_t *kvm_pteref_t;
 
 static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
@@ -44,6 +51,40 @@ static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared
 	return pteref;
 }
 
+static inline void kvm_pgtable_walk_begin(void) {}
+static inline void kvm_pgtable_walk_end(void) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+	return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
+{
+	return rcu_dereference_check(pteref, !shared);
+}
+
+static inline void kvm_pgtable_walk_begin(void)
+{
+	rcu_read_lock();
+}
+
+static inline void kvm_pgtable_walk_end(void)
+{
+	rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+	return rcu_read_lock_held();
+}
+
+#endif
+
 #define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
@@ -202,11 +243,14 @@ struct kvm_pgtable {
  *					children.
  * @KVM_PGTABLE_WALK_TABLE_POST:	Visit table entries after their
  *					children.
+ * @KVM_PGTABLE_WALK_SHARED:		Indicates the page-tables may be shared
+ *					with other software walkers.
  */
 enum kvm_pgtable_walk_flags {
 	KVM_PGTABLE_WALK_LEAF			= BIT(0),
 	KVM_PGTABLE_WALK_TABLE_PRE		= BIT(1),
 	KVM_PGTABLE_WALK_TABLE_POST		= BIT(2),
+	KVM_PGTABLE_WALK_SHARED			= BIT(3),
 };
 
 struct kvm_pgtable_visit_ctx {
@@ -223,6 +267,11 @@ struct kvm_pgtable_visit_ctx {
 typedef int (*kvm_pgtable_visitor_fn_t)(const struct kvm_pgtable_visit_ctx *ctx,
 					enum kvm_pgtable_walk_flags visit);
 
+static inline bool kvm_pgtable_walk_shared(const struct kvm_pgtable_visit_ctx *ctx)
+{
+	return ctx->flags & KVM_PGTABLE_WALK_SHARED;
+}
+
 /**
  * struct kvm_pgtable_walker - Hook into a page-table walk.
  * @cb:		Callback function to invoke during the walk.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 7c9782347570b..d8d963521d4eb 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -171,6 +171,9 @@ static int kvm_pgtable_visitor_cb(struct kvm_pgtable_walk_data *data,
 				  enum kvm_pgtable_walk_flags visit)
 {
 	struct kvm_pgtable_walker *walker = data->walker;
+
+	/* Ensure the appropriate lock is held (e.g. RCU lock for stage-2 MMU) */
+	WARN_ON_ONCE(kvm_pgtable_walk_shared(ctx) && !kvm_pgtable_walk_lock_held());
 	return walker->cb(ctx, visit);
 }
 
@@ -281,8 +284,13 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 		.end	= PAGE_ALIGN(walk_data.addr + size),
 		.walker	= walker,
 	};
+	int r;
+
+	kvm_pgtable_walk_begin();
+	r = _kvm_pgtable_walk(pgt, &walk_data);
+	kvm_pgtable_walk_end();
 
-	return _kvm_pgtable_walk(pgt, &walk_data);
+	return r;
 }
 
 struct leaf_walk_data {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 73ae908eb5d93..52e042399ba5d 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -130,9 +130,21 @@ static void kvm_s2_free_pages_exact(void *virt, size_t size)
 
 static struct kvm_pgtable_mm_ops kvm_s2_mm_ops;
 
+static void stage2_free_removed_table_rcu_cb(struct rcu_head *head)
+{
+	struct page *page = container_of(head, struct page, rcu_head);
+	void *pgtable = page_to_virt(page);
+	u32 level = page_private(page);
+
+	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, pgtable, level);
+}
+
 static void stage2_free_removed_table(void *addr, u32 level)
 {
-	kvm_pgtable_stage2_free_removed(&kvm_s2_mm_ops, addr, level);
+	struct page *page = virt_to_page(addr);
+
+	set_page_private(page, (unsigned long)level);
+	call_rcu(&page->rcu_head, stage2_free_removed_table_rcu_cb);
 }
 
 static void kvm_host_get_page(void *addr)
-- 
GitLab


From ca5de2448c3b4c018fe3d6223df8b59068be1cd7 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:39 +0000
Subject: [PATCH 1268/4122] KVM: arm64: Atomically update stage 2 leaf
 attributes in parallel walks

The stage2 attr walker is already used for parallel walks. Since commit
f783ef1c0e82 ("KVM: arm64: Add fast path to handle permission relaxation
during dirty logging"), KVM acquires the read lock when
write-unprotecting a PTE. However, the walker only uses a simple store
to update the PTE. This is safe as the only possible race is with
hardware updates to the access flag, which is benign.

However, a subsequent change to KVM will allow more changes to the stage
2 page tables to be done in parallel. Prepare the stage 2 attribute
walker by performing atomic updates to the PTE when walking in parallel.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-10-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index d8d963521d4eb..a34e2050f9314 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -185,7 +185,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 				      kvm_pteref_t pteref, u32 level)
 {
 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
-	kvm_pte_t *ptep = kvm_dereference_pteref(pteref, false);
+	kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED);
 	struct kvm_pgtable_visit_ctx ctx = {
 		.ptep	= ptep,
 		.old	= READ_ONCE(*ptep),
@@ -675,6 +675,16 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
 	return !!pte;
 }
 
+static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
+{
+	if (!kvm_pgtable_walk_shared(ctx)) {
+		WRITE_ONCE(*ctx->ptep, new);
+		return true;
+	}
+
+	return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old;
+}
+
 static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
 			   struct kvm_pgtable_mm_ops *mm_ops)
 {
@@ -986,7 +996,9 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
 		    stage2_pte_executable(pte) && !stage2_pte_executable(ctx->old))
 			mm_ops->icache_inval_pou(kvm_pte_follow(pte, mm_ops),
 						  kvm_granule_size(ctx->level));
-		WRITE_ONCE(*ctx->ptep, pte);
+
+		if (!stage2_try_set_pte(ctx, pte))
+			return -EAGAIN;
 	}
 
 	return 0;
@@ -995,7 +1007,7 @@ static int stage2_attr_walker(const struct kvm_pgtable_visit_ctx *ctx,
 static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 				    u64 size, kvm_pte_t attr_set,
 				    kvm_pte_t attr_clr, kvm_pte_t *orig_pte,
-				    u32 *level)
+				    u32 *level, enum kvm_pgtable_walk_flags flags)
 {
 	int ret;
 	kvm_pte_t attr_mask = KVM_PTE_LEAF_ATTR_LO | KVM_PTE_LEAF_ATTR_HI;
@@ -1006,7 +1018,7 @@ static int stage2_update_leaf_attrs(struct kvm_pgtable *pgt, u64 addr,
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_attr_walker,
 		.arg		= &data,
-		.flags		= KVM_PGTABLE_WALK_LEAF,
+		.flags		= flags | KVM_PGTABLE_WALK_LEAF,
 	};
 
 	ret = kvm_pgtable_walk(pgt, addr, size, &walker);
@@ -1025,14 +1037,14 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
 	return stage2_update_leaf_attrs(pgt, addr, size, 0,
 					KVM_PTE_LEAF_ATTR_LO_S2_S2AP_W,
-					NULL, NULL);
+					NULL, NULL, 0);
 }
 
 kvm_pte_t kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
 	stage2_update_leaf_attrs(pgt, addr, 1, KVM_PTE_LEAF_ATTR_LO_S2_AF, 0,
-				 &pte, NULL);
+				 &pte, NULL, 0);
 	dsb(ishst);
 	return pte;
 }
@@ -1041,7 +1053,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
 	stage2_update_leaf_attrs(pgt, addr, 1, 0, KVM_PTE_LEAF_ATTR_LO_S2_AF,
-				 &pte, NULL);
+				 &pte, NULL, 0);
 	/*
 	 * "But where's the TLBI?!", you scream.
 	 * "Over in the core code", I sigh.
@@ -1054,7 +1066,7 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr)
 bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr)
 {
 	kvm_pte_t pte = 0;
-	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL);
+	stage2_update_leaf_attrs(pgt, addr, 1, 0, 0, &pte, NULL, 0);
 	return pte & KVM_PTE_LEAF_ATTR_LO_S2_AF;
 }
 
@@ -1077,7 +1089,8 @@ int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
 	if (prot & KVM_PGTABLE_PROT_X)
 		clr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
 
-	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level);
+	ret = stage2_update_leaf_attrs(pgt, addr, 1, set, clr, NULL, &level,
+				       KVM_PGTABLE_WALK_SHARED);
 	if (!ret)
 		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, pgt->mmu, addr, level);
 	return ret;
-- 
GitLab


From 331aa3a0547d1c794587e0df374d13b16645e832 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:56:40 +0000
Subject: [PATCH 1269/4122] KVM: arm64: Split init and set for table PTE

Create a helper to initialize a table and directly call
smp_store_release() to install it (for now). Prepare for a subsequent
change that generalizes PTE writes with a helper.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215644.1895162-11-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index a34e2050f9314..f4dd77c6c97d2 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -136,16 +136,13 @@ static void kvm_clear_pte(kvm_pte_t *ptep)
 	WRITE_ONCE(*ptep, 0);
 }
 
-static void kvm_set_table_pte(kvm_pte_t *ptep, kvm_pte_t *childp,
-			      struct kvm_pgtable_mm_ops *mm_ops)
+static kvm_pte_t kvm_init_table_pte(kvm_pte_t *childp, struct kvm_pgtable_mm_ops *mm_ops)
 {
-	kvm_pte_t old = *ptep, pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
+	kvm_pte_t pte = kvm_phys_to_pte(mm_ops->virt_to_phys(childp));
 
 	pte |= FIELD_PREP(KVM_PTE_TYPE, KVM_PTE_TYPE_TABLE);
 	pte |= KVM_PTE_VALID;
-
-	WARN_ON(kvm_pte_valid(old));
-	smp_store_release(ptep, pte);
+	return pte;
 }
 
 static kvm_pte_t kvm_init_valid_leaf_pte(u64 pa, kvm_pte_t attr, u32 level)
@@ -413,7 +410,7 @@ static bool hyp_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 			  enum kvm_pgtable_walk_flags visit)
 {
-	kvm_pte_t *childp;
+	kvm_pte_t *childp, new;
 	struct hyp_map_data *data = ctx->arg;
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
 
@@ -427,8 +424,10 @@ static int hyp_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!childp)
 		return -ENOMEM;
 
-	kvm_set_table_pte(ctx->ptep, childp, mm_ops);
+	new = kvm_init_table_pte(childp, mm_ops);
 	mm_ops->get_page(ctx->ptep);
+	smp_store_release(ctx->ptep, new);
+
 	return 0;
 }
 
@@ -796,7 +795,7 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 				struct stage2_map_data *data)
 {
 	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
-	kvm_pte_t *childp;
+	kvm_pte_t *childp, new;
 	int ret;
 
 	ret = stage2_map_walker_try_leaf(ctx, data);
@@ -821,8 +820,9 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (stage2_pte_is_counted(ctx->old))
 		stage2_put_pte(ctx, data->mmu, mm_ops);
 
-	kvm_set_table_pte(ctx->ptep, childp, mm_ops);
+	new = kvm_init_table_pte(childp, mm_ops);
 	mm_ops->get_page(ctx->ptep);
+	smp_store_release(ctx->ptep, new);
 
 	return 0;
 }
-- 
GitLab


From 0ab12f3574db6cb432917a667f9392a88e8f0dfc Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:58:55 +0000
Subject: [PATCH 1270/4122] KVM: arm64: Make block->table PTE changes
 parallel-aware

In order to service stage-2 faults in parallel, stage-2 table walkers
must take exclusive ownership of the PTE being worked on. An additional
requirement of the architecture is that software must perform a
'break-before-make' operation when changing the block size used for
mapping memory.

Roll these two concepts together into helpers for performing a
'break-before-make' sequence. Use a special PTE value to indicate a PTE
has been locked by a software walker. Additionally, use an atomic
compare-exchange to 'break' the PTE when the stage-2 page tables are
possibly shared with another software walker. Elide the DSB + TLBI if
the evicted PTE was invalid (and thus not subject to break-before-make).

All of the atomics do nothing for now, as the stage-2 walker isn't fully
ready to perform parallel walks.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215855.1895367-1-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 80 +++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f4dd77c6c97d2..b9f0d792b8d94 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -49,6 +49,12 @@
 #define KVM_INVALID_PTE_OWNER_MASK	GENMASK(9, 2)
 #define KVM_MAX_OWNER_ID		1
 
+/*
+ * Used to indicate a pte for which a 'break-before-make' sequence is in
+ * progress.
+ */
+#define KVM_INVALID_PTE_LOCKED		BIT(10)
+
 struct kvm_pgtable_walk_data {
 	struct kvm_pgtable_walker	*walker;
 
@@ -674,6 +680,11 @@ static bool stage2_pte_is_counted(kvm_pte_t pte)
 	return !!pte;
 }
 
+static bool stage2_pte_is_locked(kvm_pte_t pte)
+{
+	return !kvm_pte_valid(pte) && (pte & KVM_INVALID_PTE_LOCKED);
+}
+
 static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
 {
 	if (!kvm_pgtable_walk_shared(ctx)) {
@@ -684,6 +695,64 @@ static bool stage2_try_set_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_
 	return cmpxchg(ctx->ptep, ctx->old, new) == ctx->old;
 }
 
+/**
+ * stage2_try_break_pte() - Invalidates a pte according to the
+ *			    'break-before-make' requirements of the
+ *			    architecture.
+ *
+ * @ctx: context of the visited pte.
+ * @mmu: stage-2 mmu
+ *
+ * Returns: true if the pte was successfully broken.
+ *
+ * If the removed pte was valid, performs the necessary serialization and TLB
+ * invalidation for the old value. For counted ptes, drops the reference count
+ * on the containing table page.
+ */
+static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
+				 struct kvm_s2_mmu *mmu)
+{
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+
+	if (stage2_pte_is_locked(ctx->old)) {
+		/*
+		 * Should never occur if this walker has exclusive access to the
+		 * page tables.
+		 */
+		WARN_ON(!kvm_pgtable_walk_shared(ctx));
+		return false;
+	}
+
+	if (!stage2_try_set_pte(ctx, KVM_INVALID_PTE_LOCKED))
+		return false;
+
+	/*
+	 * Perform the appropriate TLB invalidation based on the evicted pte
+	 * value (if any).
+	 */
+	if (kvm_pte_table(ctx->old, ctx->level))
+		kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+	else if (kvm_pte_valid(ctx->old))
+		kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+	if (stage2_pte_is_counted(ctx->old))
+		mm_ops->put_page(ctx->ptep);
+
+	return true;
+}
+
+static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t new)
+{
+	struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+
+	WARN_ON(!stage2_pte_is_locked(*ctx->ptep));
+
+	if (stage2_pte_is_counted(new))
+		mm_ops->get_page(ctx->ptep);
+
+	smp_store_release(ctx->ptep, new);
+}
+
 static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
 			   struct kvm_pgtable_mm_ops *mm_ops)
 {
@@ -812,17 +881,18 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!childp)
 		return -ENOMEM;
 
+	if (!stage2_try_break_pte(ctx, data->mmu)) {
+		mm_ops->put_page(childp);
+		return -EAGAIN;
+	}
+
 	/*
 	 * If we've run into an existing block mapping then replace it with
 	 * a table. Accesses beyond 'end' that fall within the new table
 	 * will be mapped lazily.
 	 */
-	if (stage2_pte_is_counted(ctx->old))
-		stage2_put_pte(ctx, data->mmu, mm_ops);
-
 	new = kvm_init_table_pte(childp, mm_ops);
-	mm_ops->get_page(ctx->ptep);
-	smp_store_release(ctx->ptep, new);
+	stage2_make_pte(ctx, new);
 
 	return 0;
 }
-- 
GitLab


From 946fbfdf336b811479e024136c7cabc00157b6b9 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 21:59:34 +0000
Subject: [PATCH 1271/4122] KVM: arm64: Make leaf->leaf PTE changes
 parallel-aware

Convert stage2_map_walker_try_leaf() to use the new break-before-make
helpers, thereby making the handler parallel-aware. As before, avoid the
break-before-make if recreating the existing mapping. Additionally,
retry execution if another vCPU thread is modifying the same PTE.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107215934.1895478-1-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index b9f0d792b8d94..238f29389617b 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -804,18 +804,17 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	else
 		new = kvm_init_invalid_leaf_owner(data->owner_id);
 
-	if (stage2_pte_is_counted(ctx->old)) {
-		/*
-		 * Skip updating the PTE if we are trying to recreate the exact
-		 * same mapping or only change the access permissions. Instead,
-		 * the vCPU will exit one more time from guest if still needed
-		 * and then go through the path of relaxing permissions.
-		 */
-		if (!stage2_pte_needs_update(ctx->old, new))
-			return -EAGAIN;
+	/*
+	 * Skip updating the PTE if we are trying to recreate the exact
+	 * same mapping or only change the access permissions. Instead,
+	 * the vCPU will exit one more time from guest if still needed
+	 * and then go through the path of relaxing permissions.
+	 */
+	if (!stage2_pte_needs_update(ctx->old, new))
+		return -EAGAIN;
 
-		stage2_put_pte(ctx, data->mmu, mm_ops);
-	}
+	if (!stage2_try_break_pte(ctx, data->mmu))
+		return -EAGAIN;
 
 	/* Perform CMOs before installation of the guest stage-2 PTE */
 	if (mm_ops->dcache_clean_inval_poc && stage2_pte_cacheable(pgt, new))
@@ -825,9 +824,8 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
 	if (mm_ops->icache_inval_pou && stage2_pte_executable(new))
 		mm_ops->icache_inval_pou(kvm_pte_follow(new, mm_ops), granule);
 
-	smp_store_release(ctx->ptep, new);
-	if (stage2_pte_is_counted(new))
-		mm_ops->get_page(ctx->ptep);
+	stage2_make_pte(ctx, new);
+
 	if (kvm_phys_is_valid(phys))
 		data->phys += granule;
 	return 0;
-- 
GitLab


From af87fc03cfdf6893011df419588d27acdfb9c197 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 22:00:06 +0000
Subject: [PATCH 1272/4122] KVM: arm64: Make table->block changes
 parallel-aware

stage2_map_walker_try_leaf() and friends now handle stage-2 PTEs
generically, and perform the correct flush when a table PTE is removed.
Additionally, they've been made parallel-aware, using an atomic break
to take ownership of the PTE.

Stop clearing the PTE in the pre-order callback and instead let
stage2_map_walker_try_leaf() deal with it.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107220006.1895572-1-oliver.upton@linux.dev
---
 arch/arm64/kvm/hyp/pgtable.c | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 238f29389617b..f814422ef7952 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -841,21 +841,12 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
 	if (!stage2_leaf_mapping_allowed(ctx, data))
 		return 0;
 
-	kvm_clear_pte(ctx->ptep);
-
-	/*
-	 * Invalidate the whole stage-2, as we may have numerous leaf
-	 * entries below us which would otherwise need invalidating
-	 * individually.
-	 */
-	kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
-
 	ret = stage2_map_walker_try_leaf(ctx, data);
+	if (ret)
+		return ret;
 
-	mm_ops->put_page(ctx->ptep);
 	mm_ops->free_removed_table(childp, ctx->level);
-
-	return ret;
+	return 0;
 }
 
 static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
-- 
GitLab


From 1577cb5823cefdff4416f272a88143ee933d97f5 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Mon, 7 Nov 2022 22:00:33 +0000
Subject: [PATCH 1273/4122] KVM: arm64: Handle stage-2 faults in parallel

The stage-2 map walker has been made parallel-aware, and as such can be
called while only holding the read side of the MMU lock. Rip out the
conditional locking in user_mem_abort() and instead grab the read lock.
Continue to take the write lock from other callsites to
kvm_pgtable_stage2_map().

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221107220033.1895655-1-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h  |  3 ++-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c |  2 +-
 arch/arm64/kvm/hyp/pgtable.c          |  5 +++--
 arch/arm64/kvm/mmu.c                  | 31 ++++++---------------------
 4 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 7634b6964779a..a874ce0ce7b51 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -412,6 +412,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
  * @prot:	Permissions and attributes for the mapping.
  * @mc:		Cache of pre-allocated and zeroed memory from which to allocate
  *		page-table pages.
+ * @flags:	Flags to control the page-table walk (ex. a shared walk)
  *
  * The offset of @addr within a page is ignored, @size is rounded-up to
  * the next page boundary and @phys is rounded-down to the previous page
@@ -433,7 +434,7 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
  */
 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 			   u64 phys, enum kvm_pgtable_prot prot,
-			   void *mc);
+			   void *mc, enum kvm_pgtable_walk_flags flags);
 
 /**
  * kvm_pgtable_stage2_set_owner() - Unmap and annotate pages in the IPA space to
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 735769886b55c..f6d82bf33ce1d 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -257,7 +257,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
 				      enum kvm_pgtable_prot prot)
 {
 	return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
-				      prot, &host_s2_pool);
+				      prot, &host_s2_pool, 0);
 }
 
 /*
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f814422ef7952..5bca9610d040d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -912,7 +912,7 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
 
 int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 			   u64 phys, enum kvm_pgtable_prot prot,
-			   void *mc)
+			   void *mc, enum kvm_pgtable_walk_flags flags)
 {
 	int ret;
 	struct stage2_map_data map_data = {
@@ -923,7 +923,8 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	};
 	struct kvm_pgtable_walker walker = {
 		.cb		= stage2_map_walker,
-		.flags		= KVM_PGTABLE_WALK_TABLE_PRE |
+		.flags		= flags |
+				  KVM_PGTABLE_WALK_TABLE_PRE |
 				  KVM_PGTABLE_WALK_LEAF,
 		.arg		= &map_data,
 	};
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 52e042399ba5d..410c2a37fe32f 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -861,7 +861,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 
 		write_lock(&kvm->mmu_lock);
 		ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
-					     &cache);
+					     &cache, 0);
 		write_unlock(&kvm->mmu_lock);
 		if (ret)
 			break;
@@ -1156,7 +1156,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	gfn_t gfn;
 	kvm_pfn_t pfn;
 	bool logging_active = memslot_is_logging(memslot);
-	bool use_read_lock = false;
 	unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
 	unsigned long vma_pagesize, fault_granule;
 	enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
@@ -1191,8 +1190,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (logging_active) {
 		force_pte = true;
 		vma_shift = PAGE_SHIFT;
-		use_read_lock = (fault_status == FSC_PERM && write_fault &&
-				 fault_granule == PAGE_SIZE);
 	} else {
 		vma_shift = get_vma_page_shift(vma, hva);
 	}
@@ -1291,15 +1288,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (exec_fault && device)
 		return -ENOEXEC;
 
-	/*
-	 * To reduce MMU contentions and enhance concurrency during dirty
-	 * logging dirty logging, only acquire read lock for permission
-	 * relaxation.
-	 */
-	if (use_read_lock)
-		read_lock(&kvm->mmu_lock);
-	else
-		write_lock(&kvm->mmu_lock);
+	read_lock(&kvm->mmu_lock);
 	pgt = vcpu->arch.hw_mmu->pgt;
 	if (mmu_invalidate_retry(kvm, mmu_seq))
 		goto out_unlock;
@@ -1343,15 +1332,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	 * permissions only if vma_pagesize equals fault_granule. Otherwise,
 	 * kvm_pgtable_stage2_map() should be called to change block size.
 	 */
-	if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
+	if (fault_status == FSC_PERM && vma_pagesize == fault_granule)
 		ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
-	} else {
-		WARN_ONCE(use_read_lock, "Attempted stage-2 map outside of write lock\n");
-
+	else
 		ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
 					     __pfn_to_phys(pfn), prot,
-					     memcache);
-	}
+					     memcache, KVM_PGTABLE_WALK_SHARED);
 
 	/* Mark the page dirty only if the fault is handled successfully */
 	if (writable && !ret) {
@@ -1360,10 +1346,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 out_unlock:
-	if (use_read_lock)
-		read_unlock(&kvm->mmu_lock);
-	else
-		write_unlock(&kvm->mmu_lock);
+	read_unlock(&kvm->mmu_lock);
 	kvm_set_pfn_accessed(pfn);
 	kvm_release_pfn_clean(pfn);
 	return ret != -EAGAIN ? ret : 0;
@@ -1569,7 +1552,7 @@ bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 	 */
 	kvm_pgtable_stage2_map(kvm->arch.mmu.pgt, range->start << PAGE_SHIFT,
 			       PAGE_SIZE, __pfn_to_phys(pfn),
-			       KVM_PGTABLE_PROT_R, NULL);
+			       KVM_PGTABLE_PROT_R, NULL, 0);
 
 	return false;
 }
-- 
GitLab


From fba31beab3578b793060f549188fe682df7d3ed9 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Wed, 9 Nov 2022 15:10:39 +0530
Subject: [PATCH 1274/4122] PCI: qcom: Fix error message for
 reset_control_assert()

Fix the error message to mention "assert" instead of "deassert".

Link: https://lore.kernel.org/r/20221109094039.25753-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index f711acacaeaf8..cf27345f65759 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -1236,7 +1236,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
 
 	ret = reset_control_assert(res->pci_reset);
 	if (ret < 0) {
-		dev_err(dev, "cannot deassert pci reset\n");
+		dev_err(dev, "cannot assert pci reset\n");
 		goto err_disable_clocks;
 	}
 
-- 
GitLab


From c9bfd858402c86b6559aa05227eb5dbae3ce862e Mon Sep 17 00:00:00 2001
From: Jianjun Wang <jianjun.wang@mediatek.com>
Date: Thu, 3 Nov 2022 10:56:54 +0800
Subject: [PATCH 1275/4122] dt-bindings: PCI: mediatek-gen3: Support mt8195

In order to support mt8195 pcie node, update the yaml to support new
properties of iommu and power-domain, and update the reset-names
property to allow only one 'mac' name.

Link: https://lore.kernel.org/r/20221103025656.8714-2-tinghan.shen@mediatek.com
Signed-off-by: Jianjun Wang <jianjun.wang@mediatek.com>
Signed-off-by: TingHan Shen <tinghan.shen@mediatek.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/pci/mediatek-pcie-gen3.yaml | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
index c00be39af64e5..bc90f0ec7bd99 100644
--- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
@@ -70,15 +70,21 @@ properties:
     minItems: 1
     maxItems: 8
 
+  iommu-map:
+    maxItems: 1
+
+  iommu-map-mask:
+    const: 0
+
   resets:
     minItems: 1
     maxItems: 2
 
   reset-names:
     minItems: 1
+    maxItems: 2
     items:
-      - const: phy
-      - const: mac
+      enum: [ phy, mac ]
 
   clocks:
     maxItems: 6
@@ -107,6 +113,9 @@ properties:
     items:
       - const: pcie-phy
 
+  power-domains:
+    maxItems: 1
+
   '#interrupt-cells':
     const: 1
 
-- 
GitLab


From 8405d8f0956d227c3355d9bdbabc23f79f721ce4 Mon Sep 17 00:00:00 2001
From: Vidya Sagar <vidyas@nvidia.com>
Date: Tue, 13 Sep 2022 15:42:37 +0530
Subject: [PATCH 1276/4122] PCI: dwc: Use dev_info for PCIe link down event
 logging

Some of the platforms (like Tegra194 and Tegra234) have open slots and
not having an endpoint connected to the slot is not an error.
So, changing the macro from dev_err to dev_info to log the event.

Link: https://lore.kernel.org/r/20220913101237.4337-1-vidyas@nvidia.com
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Vidya Sagar <vidyas@nvidia.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Acked-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/pci/controller/dwc/pcie-designware.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 9e4d96e5a3f5a..432aead68d1fd 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -448,7 +448,7 @@ int dw_pcie_wait_for_link(struct dw_pcie *pci)
 	}
 
 	if (retries >= LINK_WAIT_MAX_RETRIES) {
-		dev_err(pci->dev, "Phy link never came up\n");
+		dev_info(pci->dev, "Phy link never came up\n");
 		return -ETIMEDOUT;
 	}
 
-- 
GitLab


From 72f542ac4f39fb42b8a6380ac8d9b3c39019d2d6 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Fri, 28 Oct 2022 02:17:16 -0700
Subject: [PATCH 1277/4122] dt-bindings: PCI: Add host mode device-id for
 j721s2 platform

Add unique device-id of 0xb013 for j721s2 platform to oneOf field.

Link: https://lore.kernel.org/r/20221028091716.21414-1-mranostay@ti.com
Signed-off-by: Matt Ranostay <mranostay@ti.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
index d9df7cd922f1f..b0513b197d087 100644
--- a/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
+++ b/Documentation/devicetree/bindings/pci/ti,j721e-pci-host.yaml
@@ -73,6 +73,8 @@ properties:
           - const: 0xb00f
       - items:
           - const: 0xb010
+      - items:
+          - const: 0xb013
 
   msi-map: true
 
-- 
GitLab


From be7e8b917ead54754cc14b6c03769c8738a3f3f3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Nov 2022 15:48:43 +0100
Subject: [PATCH 1278/4122] blkdev: make struct
 block_device_operations.devnode() take a const *

The devnode() callback in struct block_device_operations should not be
modifying the device that is passed into it, so mark it as a const * and
propagate the function signature changes out into the one subsystem that
actually uses this callback.

Acked-by: Jens Axboe <axboe@kernel.dk>
Link: https://lore.kernel.org/r/20221109144843.679668-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/block/pktcdvd.c | 2 +-
 include/linux/blkdev.h  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 4cea3b08087ed..8dc15be959625 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2632,7 +2632,7 @@ static unsigned int pkt_check_events(struct gendisk *disk,
 	return attached_disk->fops->check_events(attached_disk, clearing);
 }
 
-static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+static char *pkt_devnode(const struct gendisk *disk, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
 }
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50e358a19d986..2a455793462b2 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1413,7 +1413,7 @@ struct block_device_operations {
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
-	char *(*devnode)(struct gendisk *disk, umode_t *mode);
+	char *(*devnode)(const struct gendisk *disk, umode_t *mode);
 	/* returns the length of the identifier or a negative errno: */
 	int (*get_unique_id)(struct gendisk *disk, u8 id[16],
 			enum blk_unique_id id_type);
-- 
GitLab


From 927bdd1e65bd14ae035d9c625df2f4ccd51e8a83 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Nov 2022 15:07:10 +0100
Subject: [PATCH 1279/4122] driver core: remove devm_device_remove_groups()

There is no in-kernel user of this function, so it is not needed anymore
and can be removed.

Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221109140711.105222-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 17 -----------------
 include/linux/device.h |  2 --
 2 files changed, 19 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f07b1c349f791..5fd99f2df692c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2693,23 +2693,6 @@ int devm_device_add_groups(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(devm_device_add_groups);
 
-/**
- * devm_device_remove_groups - remove a list of managed groups
- *
- * @dev:	The device for the groups to be removed from
- * @groups:	NULL terminated list of groups to be removed
- *
- * If groups is not NULL, remove the specified groups from the device.
- */
-void devm_device_remove_groups(struct device *dev,
-			       const struct attribute_group **groups)
-{
-	WARN_ON(devres_release(dev, devm_attr_groups_remove,
-			       devm_attr_group_match,
-			       /* cast away const */ (void *)groups));
-}
-EXPORT_SYMBOL_GPL(devm_device_remove_groups);
-
 static int device_add_attrs(struct device *dev)
 {
 	struct class *class = dev->class;
diff --git a/include/linux/device.h b/include/linux/device.h
index 023ea50b19162..4efc607c008cd 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1062,8 +1062,6 @@ static inline void device_remove_group(struct device *dev,
 
 int __must_check devm_device_add_groups(struct device *dev,
 					const struct attribute_group **groups);
-void devm_device_remove_groups(struct device *dev,
-			       const struct attribute_group **groups);
 int __must_check devm_device_add_group(struct device *dev,
 				       const struct attribute_group *grp);
 void devm_device_remove_group(struct device *dev,
-- 
GitLab


From 0f0605d550ed986279030d452c7ed10df34da449 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Nov 2022 15:07:11 +0100
Subject: [PATCH 1280/4122] driver core: remove devm_device_remove_group()

There is no in-kernel user of this function, so it is not needed anymore
and can be removed.

Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221109140711.105222-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c    | 22 ----------------------
 include/linux/device.h |  2 --
 2 files changed, 24 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 5fd99f2df692c..af721e6c0253e 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2585,11 +2585,6 @@ union device_attr_group_devres {
 	const struct attribute_group **groups;
 };
 
-static int devm_attr_group_match(struct device *dev, void *res, void *data)
-{
-	return ((union device_attr_group_devres *)res)->group == data;
-}
-
 static void devm_attr_group_remove(struct device *dev, void *res)
 {
 	union device_attr_group_devres *devres = res;
@@ -2640,23 +2635,6 @@ int devm_device_add_group(struct device *dev, const struct attribute_group *grp)
 }
 EXPORT_SYMBOL_GPL(devm_device_add_group);
 
-/**
- * devm_device_remove_group: remove a managed group from a device
- * @dev:	device to remove the group from
- * @grp:	group to remove
- *
- * This function removes a group of attributes from a device. The attributes
- * previously have to have been created for this group, otherwise it will fail.
- */
-void devm_device_remove_group(struct device *dev,
-			      const struct attribute_group *grp)
-{
-	WARN_ON(devres_release(dev, devm_attr_group_remove,
-			       devm_attr_group_match,
-			       /* cast away const */ (void *)grp));
-}
-EXPORT_SYMBOL_GPL(devm_device_remove_group);
-
 /**
  * devm_device_add_groups - create a bunch of managed attribute groups
  * @dev:	The device to create the group for
diff --git a/include/linux/device.h b/include/linux/device.h
index 4efc607c008cd..84ae52de6746a 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1064,8 +1064,6 @@ int __must_check devm_device_add_groups(struct device *dev,
 					const struct attribute_group **groups);
 int __must_check devm_device_add_group(struct device *dev,
 				       const struct attribute_group *grp);
-void devm_device_remove_group(struct device *dev,
-			      const struct attribute_group *grp);
 
 /*
  * Platform "fixup" functions - allow the platform to have their say
-- 
GitLab


From 189a87f8ef8ceed16b2a230dc0ce65117068ac30 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 30 Oct 2022 10:22:55 +0100
Subject: [PATCH 1281/4122] driver core: mark driver_allows_async_probing
 static

driver_allows_async_probing is only used in drivers/base/dd.c, so mark
it static and remove the declaration in drivers/base/base.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221030092255.872280-1-hch@lst.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/base.h | 1 -
 drivers/base/dd.c   | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index b902d1ecc247f..7d4803c03d3e6 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -146,7 +146,6 @@ static inline int driver_match_device(struct device_driver *drv,
 {
 	return drv->bus->match ? drv->bus->match(dev, drv) : 1;
 }
-extern bool driver_allows_async_probing(struct device_driver *drv);
 
 extern int driver_add_groups(struct device_driver *drv,
 			     const struct attribute_group **groups);
diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 3dda62503102f..4001e22617ab0 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -843,7 +843,7 @@ static int __init save_async_options(char *buf)
 }
 __setup("driver_async_probe=", save_async_options);
 
-bool driver_allows_async_probing(struct device_driver *drv)
+static bool driver_allows_async_probing(struct device_driver *drv)
 {
 	switch (drv->probe_type) {
 	case PROBE_PREFER_ASYNCHRONOUS:
-- 
GitLab


From 3da72e18371c41a6f6f96b594854b178168c7757 Mon Sep 17 00:00:00 2001
From: Pierre Gondois <pierre.gondois@arm.com>
Date: Wed, 26 Oct 2022 20:59:54 +0200
Subject: [PATCH 1282/4122] cacheinfo: Decrement refcount in
 cache_setup_of_node()

Refcounts to DT nodes are only incremented in the function
and never decremented. Decrease the refcounts when necessary.

Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Link: https://lore.kernel.org/r/20221026185954.991547-1-pierre.gondois@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/cacheinfo.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 4b5cd08c5a657..04317cde800cb 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -196,7 +196,7 @@ static void cache_of_set_props(struct cacheinfo *this_leaf,
 
 static int cache_setup_of_node(unsigned int cpu)
 {
-	struct device_node *np;
+	struct device_node *np, *prev;
 	struct cacheinfo *this_leaf;
 	unsigned int index = 0;
 
@@ -206,19 +206,24 @@ static int cache_setup_of_node(unsigned int cpu)
 		return -ENOENT;
 	}
 
+	prev = np;
+
 	while (index < cache_leaves(cpu)) {
 		this_leaf = per_cpu_cacheinfo_idx(cpu, index);
-		if (this_leaf->level != 1)
+		if (this_leaf->level != 1) {
 			np = of_find_next_cache_node(np);
-		else
-			np = of_node_get(np);/* cpu node itself */
-		if (!np)
-			break;
+			of_node_put(prev);
+			prev = np;
+			if (!np)
+				break;
+		}
 		cache_of_set_props(this_leaf, np);
 		this_leaf->fw_token = np;
 		index++;
 	}
 
+	of_node_put(np);
+
 	if (index != cache_leaves(cpu)) /* not all OF nodes populated */
 		return -ENOENT;
 
-- 
GitLab


From 730600223b64918324ab322ab174361bd41073c0 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:14:17 +0100
Subject: [PATCH 1283/4122] driver core: Use kstrtobool() instead of
 strtobool()

strtobool() is the same as kstrtobool().
However, the latter is more used within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/02ba683a5c0716638ad8ca11e8b0fdca97c4f294.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index af721e6c0253e..ab01828fe6c1f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -14,6 +14,7 @@
 #include <linux/err.h>
 #include <linux/fwnode.h>
 #include <linux/init.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/string.h>
@@ -1628,7 +1629,7 @@ early_param("fw_devlink", fw_devlink_setup);
 static bool fw_devlink_strict;
 static int __init fw_devlink_strict_setup(char *arg)
 {
-	return strtobool(arg, &fw_devlink_strict);
+	return kstrtobool(arg, &fw_devlink_strict);
 }
 early_param("fw_devlink.strict", fw_devlink_strict_setup);
 
@@ -2280,7 +2281,7 @@ ssize_t device_store_bool(struct device *dev, struct device_attribute *attr,
 {
 	struct dev_ext_attribute *ea = to_ext_attr(attr);
 
-	if (strtobool(buf, ea->var) < 0)
+	if (kstrtobool(buf, ea->var) < 0)
 		return -EINVAL;
 
 	return size;
@@ -2534,7 +2535,7 @@ static ssize_t online_store(struct device *dev, struct device_attribute *attr,
 	bool val;
 	int ret;
 
-	ret = strtobool(buf, &val);
+	ret = kstrtobool(buf, &val);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 27c0d217340e47ec995557f61423ef415afba987 Mon Sep 17 00:00:00 2001
From: "Isaac J. Manjarres" <isaacmanjarres@google.com>
Date: Tue, 20 Sep 2022 17:14:13 -0700
Subject: [PATCH 1284/4122] driver core: Fix bus_type.match() error handling in
 __driver_attach()

When a driver registers with a bus, it will attempt to match with every
device on the bus through the __driver_attach() function. Currently, if
the bus_type.match() function encounters an error that is not
-EPROBE_DEFER, __driver_attach() will return a negative error code, which
causes the driver registration logic to stop trying to match with the
remaining devices on the bus.

This behavior is not correct; a failure while matching a driver to a
device does not mean that the driver won't be able to match and bind
with other devices on the bus. Update the logic in __driver_attach()
to reflect this.

Fixes: 656b8035b0ee ("ARM: 8524/1: driver cohandle -EPROBE_DEFER from bus_type.match()")
Cc: stable@vger.kernel.org
Cc: Saravana Kannan <saravanak@google.com>
Signed-off-by: Isaac J. Manjarres <isaacmanjarres@google.com>
Link: https://lore.kernel.org/r/20220921001414.4046492-1-isaacmanjarres@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/dd.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index 4001e22617ab0..e9b2f9c25efe4 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -1162,7 +1162,11 @@ static int __driver_attach(struct device *dev, void *data)
 		return 0;
 	} else if (ret < 0) {
 		dev_dbg(dev, "Bus failed to match device: %d\n", ret);
-		return ret;
+		/*
+		 * Driver could not match with device, but may match with
+		 * another device on the bus.
+		 */
+		return 0;
 	} /* ret > 0 means positive match */
 
 	if (driver_allows_async_probing(drv)) {
-- 
GitLab


From 02c39bbb36bab698b29f066a28c3e342d1136e53 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Oct 2022 07:27:17 +0200
Subject: [PATCH 1285/4122] virt: fsl_hypervisor: Replace NO_IRQ by 0

NO_IRQ is used to check the return of irq_of_parse_and_map().

On some architecture NO_IRQ is 0, on other architectures it is -1.

irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ.

So use 0 instead of using NO_IRQ.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/20dd37b96bac0a72caef28e7462b32c93487a516.1665033909.git.christophe.leroy@csgroup.eu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virt/fsl_hypervisor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/virt/fsl_hypervisor.c b/drivers/virt/fsl_hypervisor.c
index 07035249a5e1e..f8b4389d60d9f 100644
--- a/drivers/virt/fsl_hypervisor.c
+++ b/drivers/virt/fsl_hypervisor.c
@@ -839,7 +839,7 @@ static int __init fsl_hypervisor_init(void)
 
 		handle = of_get_property(np, "interrupts", NULL);
 		irq = irq_of_parse_and_map(np, 0);
-		if (!handle || (irq == NO_IRQ)) {
+		if (!handle || !irq) {
 			pr_err("fsl-hv: no 'interrupts' property in %pOF node\n",
 				np);
 			continue;
-- 
GitLab


From 2d2879e1834d02852347d9dc910fffd5d22ac337 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Thu, 10 Nov 2022 15:37:54 +0530
Subject: [PATCH 1286/4122] dt-bindings: spmi: Add qcom,bus-id

For PMIC arbiter version 7 and beyond we need to define if we are using
primary or secondary bus, so add a new property of qcom,bus-id

Signed-off-by: Vinod Koul <vkoul@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20221110100755.4032505-2-vkoul@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml   | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml
index fee4f0eb46652..f983b4af6db93 100644
--- a/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml
+++ b/Documentation/devicetree/bindings/spmi/qcom,spmi-pmic-arb.yaml
@@ -85,6 +85,14 @@ properties:
     description: >
       which of the PMIC Arb provided channels to use for accesses
 
+  qcom,bus-id:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 1
+    description: >
+      SPMI bus instance. only applicable to PMIC arbiter version 7 and beyond.
+      Supported values, 0 = primary bus, 1 = secondary bus
+
 required:
   - compatible
   - reg-names
@@ -113,5 +121,7 @@ examples:
 
         interrupt-controller;
         #interrupt-cells = <4>;
+
+        qcom,bus-id = <0>;
     };
 
-- 
GitLab


From 231601cd22bd60e332dfa9ead22f871e93ad9821 Mon Sep 17 00:00:00 2001
From: David Collins <collinsd@codeaurora.org>
Date: Thu, 10 Nov 2022 15:37:55 +0530
Subject: [PATCH 1287/4122] spmi: pmic-arb: Add support for PMIC v7

PMIC v7 has different offset values and seqeunces, so add support for
this new version of PMIC

Signed-off-by: David Collins <collinsd@codeaurora.org>
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org> # SM8450 HDK
Acked-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20221110100755.4032505-3-vkoul@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/spmi/spmi-pmic-arb.c | 242 ++++++++++++++++++++++++++++++++---
 1 file changed, 221 insertions(+), 21 deletions(-)

diff --git a/drivers/spmi/spmi-pmic-arb.c b/drivers/spmi/spmi-pmic-arb.c
index 2cf3203b2397e..8b6a42ab816fb 100644
--- a/drivers/spmi/spmi-pmic-arb.c
+++ b/drivers/spmi/spmi-pmic-arb.c
@@ -22,8 +22,14 @@
 #define PMIC_ARB_VERSION_V2_MIN		0x20010000
 #define PMIC_ARB_VERSION_V3_MIN		0x30000000
 #define PMIC_ARB_VERSION_V5_MIN		0x50000000
+#define PMIC_ARB_VERSION_V7_MIN		0x70000000
 #define PMIC_ARB_INT_EN			0x0004
 
+#define PMIC_ARB_FEATURES		0x0004
+#define PMIC_ARB_FEATURES_PERIPH_MASK	GENMASK(10, 0)
+
+#define PMIC_ARB_FEATURES1		0x0008
+
 /* PMIC Arbiter channel registers offsets */
 #define PMIC_ARB_CMD			0x00
 #define PMIC_ARB_CONFIG			0x04
@@ -48,7 +54,6 @@
 #define INVALID_EE				0xFF
 
 /* Ownership Table */
-#define SPMI_OWNERSHIP_TABLE_REG(N)	(0x0700 + (4 * (N)))
 #define SPMI_OWNERSHIP_PERIPH2OWNER(X)	((X) & 0x7)
 
 /* Channel Status fields */
@@ -91,6 +96,7 @@ enum pmic_arb_channel {
 
 /* Maximum number of support PMIC peripherals */
 #define PMIC_ARB_MAX_PERIPHS		512
+#define PMIC_ARB_MAX_PERIPHS_V7		1024
 #define PMIC_ARB_TIMEOUT_US		1000
 #define PMIC_ARB_MAX_TRANS_BYTES	(8)
 
@@ -104,12 +110,12 @@ enum pmic_arb_channel {
 	((((slave_id) & 0xF)   << 28) | \
 	(((periph_id) & 0xFF)  << 20) | \
 	(((irq_id)    & 0x7)   << 16) | \
-	(((apid)      & 0x1FF) << 0))
+	(((apid)      & 0x3FF) << 0))
 
 #define hwirq_to_sid(hwirq)  (((hwirq) >> 28) & 0xF)
 #define hwirq_to_per(hwirq)  (((hwirq) >> 20) & 0xFF)
 #define hwirq_to_irq(hwirq)  (((hwirq) >> 16) & 0x7)
-#define hwirq_to_apid(hwirq) (((hwirq) >> 0)  & 0x1FF)
+#define hwirq_to_apid(hwirq) (((hwirq) >> 0)  & 0x3FF)
 
 struct pmic_arb_ver_ops;
 
@@ -130,13 +136,21 @@ struct apid_data {
  * @channel:		execution environment channel to use for accesses.
  * @irq:		PMIC ARB interrupt.
  * @ee:			the current Execution Environment
+ * @bus_instance:	on v7: 0 = primary SPMI bus, 1 = secondary SPMI bus
  * @min_apid:		minimum APID (used for bounding IRQ search)
  * @max_apid:		maximum APID
+ * @base_apid:		on v7: minimum APID associated with the particular SPMI
+ *			bus instance
+ * @apid_count:		on v5 and v7: number of APIDs associated with the
+ *			particular SPMI bus instance
  * @mapping_table:	in-memory copy of PPID -> APID mapping table.
  * @domain:		irq domain object for PMIC IRQ domain
  * @spmic:		SPMI controller object
  * @ver_ops:		version dependent operations.
- * @ppid_to_apid	in-memory copy of PPID -> APID mapping table.
+ * @ppid_to_apid:	in-memory copy of PPID -> APID mapping table.
+ * @last_apid:		Highest value APID in use
+ * @apid_data:		Table of data for all APIDs
+ * @max_periphs:	Number of elements in apid_data[]
  */
 struct spmi_pmic_arb {
 	void __iomem		*rd_base;
@@ -149,8 +163,11 @@ struct spmi_pmic_arb {
 	u8			channel;
 	int			irq;
 	u8			ee;
+	u32			bus_instance;
 	u16			min_apid;
 	u16			max_apid;
+	u16			base_apid;
+	int			apid_count;
 	u32			*mapping_table;
 	DECLARE_BITMAP(mapping_table_valid, PMIC_ARB_MAX_PERIPHS);
 	struct irq_domain	*domain;
@@ -158,7 +175,8 @@ struct spmi_pmic_arb {
 	const struct pmic_arb_ver_ops *ver_ops;
 	u16			*ppid_to_apid;
 	u16			last_apid;
-	struct apid_data	apid_data[PMIC_ARB_MAX_PERIPHS];
+	struct apid_data	*apid_data;
+	int			max_periphs;
 };
 
 /**
@@ -180,6 +198,7 @@ struct spmi_pmic_arb {
  * @irq_clear:		on v1 address of PMIC_ARB_SPMI_PIC_IRQ_CLEARn
  *			on v2 address of SPMI_PIC_IRQ_CLEARn.
  * @apid_map_offset:	offset of PMIC_ARB_REG_CHNLn
+ * @apid_owner:		on v2 and later address of SPMI_PERIPHn_2OWNER_TABLE_REG
  */
 struct pmic_arb_ver_ops {
 	const char *ver_str;
@@ -196,6 +215,7 @@ struct pmic_arb_ver_ops {
 	void __iomem *(*irq_status)(struct spmi_pmic_arb *pmic_arb, u16 n);
 	void __iomem *(*irq_clear)(struct spmi_pmic_arb *pmic_arb, u16 n);
 	u32 (*apid_map_offset)(u16 n);
+	void __iomem *(*apid_owner)(struct spmi_pmic_arb *pmic_arb, u16 n);
 };
 
 static inline void pmic_arb_base_write(struct spmi_pmic_arb *pmic_arb,
@@ -627,6 +647,11 @@ static void pmic_arb_chained_irq(struct irq_desc *desc)
 	struct irq_chip *chip = irq_desc_get_chip(desc);
 	int first = pmic_arb->min_apid;
 	int last = pmic_arb->max_apid;
+	/*
+	 * acc_offset will be non-zero for the secondary SPMI bus instance on
+	 * v7 controllers.
+	 */
+	int acc_offset = pmic_arb->base_apid >> 5;
 	u8 ee = pmic_arb->ee;
 	u32 status, enable, handled = 0;
 	int i, id, apid;
@@ -637,8 +662,7 @@ static void pmic_arb_chained_irq(struct irq_desc *desc)
 	chained_irq_enter(chip, desc);
 
 	for (i = first >> 5; i <= last >> 5; ++i) {
-		status = readl_relaxed(
-				ver_ops->owner_acc_status(pmic_arb, ee, i));
+		status = readl_relaxed(ver_ops->owner_acc_status(pmic_arb, ee, i - acc_offset));
 		if (status)
 			acc_valid = true;
 
@@ -983,8 +1007,8 @@ static u16 pmic_arb_find_apid(struct spmi_pmic_arb *pmic_arb, u16 ppid)
 		if (offset >= pmic_arb->core_size)
 			break;
 
-		regval = readl_relaxed(pmic_arb->cnfg +
-				      SPMI_OWNERSHIP_TABLE_REG(apid));
+		regval = readl_relaxed(pmic_arb->ver_ops->apid_owner(pmic_arb,
+								     apid));
 		apidd->irq_ee = SPMI_OWNERSHIP_PERIPH2OWNER(regval);
 		apidd->write_ee = apidd->irq_ee;
 
@@ -1020,21 +1044,30 @@ static int pmic_arb_ppid_to_apid_v2(struct spmi_pmic_arb *pmic_arb, u16 ppid)
 
 static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb)
 {
-	struct apid_data *apidd = pmic_arb->apid_data;
+	struct apid_data *apidd;
 	struct apid_data *prev_apidd;
-	u16 i, apid, ppid;
+	u16 i, apid, ppid, apid_max;
 	bool valid, is_irq_ee;
 	u32 regval, offset;
 
 	/*
 	 * In order to allow multiple EEs to write to a single PPID in arbiter
-	 * version 5, there is more than one APID mapped to each PPID.
+	 * version 5 and 7, there is more than one APID mapped to each PPID.
 	 * The owner field for each of these mappings specifies the EE which is
 	 * allowed to write to the APID.  The owner of the last (highest) APID
 	 * which has the IRQ owner bit set for a given PPID will receive
 	 * interrupts from the PPID.
+	 *
+	 * In arbiter version 7, the APID numbering space is divided between
+	 * the primary bus (0) and secondary bus (1) such that:
+	 * APID = 0 to N-1 are assigned to the primary bus
+	 * APID = N to N+M-1 are assigned to the secondary bus
+	 * where N = number of APIDs supported by the primary bus and
+	 *       M = number of APIDs supported by the secondary bus
 	 */
-	for (i = 0; ; i++, apidd++) {
+	apidd = &pmic_arb->apid_data[pmic_arb->base_apid];
+	apid_max = pmic_arb->base_apid + pmic_arb->apid_count;
+	for (i = pmic_arb->base_apid; i < apid_max; i++, apidd++) {
 		offset = pmic_arb->ver_ops->apid_map_offset(i);
 		if (offset >= pmic_arb->core_size)
 			break;
@@ -1045,8 +1078,8 @@ static int pmic_arb_read_apid_map_v5(struct spmi_pmic_arb *pmic_arb)
 		ppid = (regval >> 8) & PMIC_ARB_PPID_MASK;
 		is_irq_ee = PMIC_ARB_CHAN_IS_IRQ_OWNER(regval);
 
-		regval = readl_relaxed(pmic_arb->cnfg +
-				      SPMI_OWNERSHIP_TABLE_REG(i));
+		regval = readl_relaxed(pmic_arb->ver_ops->apid_owner(pmic_arb,
+								     i));
 		apidd->write_ee = SPMI_OWNERSHIP_PERIPH2OWNER(regval);
 
 		apidd->irq_ee = is_irq_ee ? apidd->write_ee : INVALID_EE;
@@ -1145,6 +1178,40 @@ static int pmic_arb_offset_v5(struct spmi_pmic_arb *pmic_arb, u8 sid, u16 addr,
 	return offset;
 }
 
+/*
+ * v7 offset per ee and per apid for observer channels and per apid for
+ * read/write channels.
+ */
+static int pmic_arb_offset_v7(struct spmi_pmic_arb *pmic_arb, u8 sid, u16 addr,
+			   enum pmic_arb_channel ch_type)
+{
+	u16 apid;
+	int rc;
+	u32 offset = 0;
+	u16 ppid = (sid << 8) | (addr >> 8);
+
+	rc = pmic_arb->ver_ops->ppid_to_apid(pmic_arb, ppid);
+	if (rc < 0)
+		return rc;
+
+	apid = rc;
+	switch (ch_type) {
+	case PMIC_ARB_CHANNEL_OBS:
+		offset = 0x8000 * pmic_arb->ee + 0x20 * apid;
+		break;
+	case PMIC_ARB_CHANNEL_RW:
+		if (pmic_arb->apid_data[apid].write_ee != pmic_arb->ee) {
+			dev_err(&pmic_arb->spmic->dev, "disallowed SPMI write to sid=%u, addr=0x%04X\n",
+				sid, addr);
+			return -EPERM;
+		}
+		offset = 0x1000 * apid;
+		break;
+	}
+
+	return offset;
+}
+
 static u32 pmic_arb_fmt_cmd_v1(u8 opc, u8 sid, u16 addr, u8 bc)
 {
 	return (opc << 27) | ((sid & 0xf) << 20) | (addr << 4) | (bc & 0x7);
@@ -1179,6 +1246,12 @@ pmic_arb_owner_acc_status_v5(struct spmi_pmic_arb *pmic_arb, u8 m, u16 n)
 	return pmic_arb->intr + 0x10000 * m + 0x4 * n;
 }
 
+static void __iomem *
+pmic_arb_owner_acc_status_v7(struct spmi_pmic_arb *pmic_arb, u8 m, u16 n)
+{
+	return pmic_arb->intr + 0x1000 * m + 0x4 * n;
+}
+
 static void __iomem *
 pmic_arb_acc_enable_v1(struct spmi_pmic_arb *pmic_arb, u16 n)
 {
@@ -1197,6 +1270,12 @@ pmic_arb_acc_enable_v5(struct spmi_pmic_arb *pmic_arb, u16 n)
 	return pmic_arb->wr_base + 0x100 + 0x10000 * n;
 }
 
+static void __iomem *
+pmic_arb_acc_enable_v7(struct spmi_pmic_arb *pmic_arb, u16 n)
+{
+	return pmic_arb->wr_base + 0x100 + 0x1000 * n;
+}
+
 static void __iomem *
 pmic_arb_irq_status_v1(struct spmi_pmic_arb *pmic_arb, u16 n)
 {
@@ -1215,6 +1294,12 @@ pmic_arb_irq_status_v5(struct spmi_pmic_arb *pmic_arb, u16 n)
 	return pmic_arb->wr_base + 0x104 + 0x10000 * n;
 }
 
+static void __iomem *
+pmic_arb_irq_status_v7(struct spmi_pmic_arb *pmic_arb, u16 n)
+{
+	return pmic_arb->wr_base + 0x104 + 0x1000 * n;
+}
+
 static void __iomem *
 pmic_arb_irq_clear_v1(struct spmi_pmic_arb *pmic_arb, u16 n)
 {
@@ -1233,6 +1318,12 @@ pmic_arb_irq_clear_v5(struct spmi_pmic_arb *pmic_arb, u16 n)
 	return pmic_arb->wr_base + 0x108 + 0x10000 * n;
 }
 
+static void __iomem *
+pmic_arb_irq_clear_v7(struct spmi_pmic_arb *pmic_arb, u16 n)
+{
+	return pmic_arb->wr_base + 0x108 + 0x1000 * n;
+}
+
 static u32 pmic_arb_apid_map_offset_v2(u16 n)
 {
 	return 0x800 + 0x4 * n;
@@ -1243,6 +1334,28 @@ static u32 pmic_arb_apid_map_offset_v5(u16 n)
 	return 0x900 + 0x4 * n;
 }
 
+static u32 pmic_arb_apid_map_offset_v7(u16 n)
+{
+	return 0x2000 + 0x4 * n;
+}
+
+static void __iomem *
+pmic_arb_apid_owner_v2(struct spmi_pmic_arb *pmic_arb, u16 n)
+{
+	return pmic_arb->cnfg + 0x700 + 0x4 * n;
+}
+
+/*
+ * For arbiter version 7, APID ownership table registers have independent
+ * numbering space for each SPMI bus instance, so each is indexed starting from
+ * 0.
+ */
+static void __iomem *
+pmic_arb_apid_owner_v7(struct spmi_pmic_arb *pmic_arb, u16 n)
+{
+	return pmic_arb->cnfg + 0x4 * (n - pmic_arb->base_apid);
+}
+
 static const struct pmic_arb_ver_ops pmic_arb_v1 = {
 	.ver_str		= "v1",
 	.ppid_to_apid		= pmic_arb_ppid_to_apid_v1,
@@ -1254,6 +1367,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v1 = {
 	.irq_status		= pmic_arb_irq_status_v1,
 	.irq_clear		= pmic_arb_irq_clear_v1,
 	.apid_map_offset	= pmic_arb_apid_map_offset_v2,
+	.apid_owner		= pmic_arb_apid_owner_v2,
 };
 
 static const struct pmic_arb_ver_ops pmic_arb_v2 = {
@@ -1267,6 +1381,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v2 = {
 	.irq_status		= pmic_arb_irq_status_v2,
 	.irq_clear		= pmic_arb_irq_clear_v2,
 	.apid_map_offset	= pmic_arb_apid_map_offset_v2,
+	.apid_owner		= pmic_arb_apid_owner_v2,
 };
 
 static const struct pmic_arb_ver_ops pmic_arb_v3 = {
@@ -1280,6 +1395,7 @@ static const struct pmic_arb_ver_ops pmic_arb_v3 = {
 	.irq_status		= pmic_arb_irq_status_v2,
 	.irq_clear		= pmic_arb_irq_clear_v2,
 	.apid_map_offset	= pmic_arb_apid_map_offset_v2,
+	.apid_owner		= pmic_arb_apid_owner_v2,
 };
 
 static const struct pmic_arb_ver_ops pmic_arb_v5 = {
@@ -1293,6 +1409,21 @@ static const struct pmic_arb_ver_ops pmic_arb_v5 = {
 	.irq_status		= pmic_arb_irq_status_v5,
 	.irq_clear		= pmic_arb_irq_clear_v5,
 	.apid_map_offset	= pmic_arb_apid_map_offset_v5,
+	.apid_owner		= pmic_arb_apid_owner_v2,
+};
+
+static const struct pmic_arb_ver_ops pmic_arb_v7 = {
+	.ver_str		= "v7",
+	.ppid_to_apid		= pmic_arb_ppid_to_apid_v5,
+	.non_data_cmd		= pmic_arb_non_data_cmd_v2,
+	.offset			= pmic_arb_offset_v7,
+	.fmt_cmd		= pmic_arb_fmt_cmd_v2,
+	.owner_acc_status	= pmic_arb_owner_acc_status_v7,
+	.acc_enable		= pmic_arb_acc_enable_v7,
+	.irq_status		= pmic_arb_irq_status_v7,
+	.irq_clear		= pmic_arb_irq_clear_v7,
+	.apid_map_offset	= pmic_arb_apid_map_offset_v7,
+	.apid_owner		= pmic_arb_apid_owner_v7,
 };
 
 static const struct irq_domain_ops pmic_arb_irq_domain_ops = {
@@ -1319,8 +1450,18 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 	pmic_arb = spmi_controller_get_drvdata(ctrl);
 	pmic_arb->spmic = ctrl;
 
+	/*
+	 * Please don't replace this with devm_platform_ioremap_resource() or
+	 * devm_ioremap_resource().  These both result in a call to
+	 * devm_request_mem_region() which prevents multiple mappings of this
+	 * register address range.  SoCs with PMIC arbiter v7 may define two
+	 * arbiter devices, for the two physical SPMI interfaces, which  share
+	 * some register address ranges (i.e. "core", "obsrvr", and "chnls").
+	 * Ensure that both devices probe successfully by calling devm_ioremap()
+	 * which does not result in a devm_request_mem_region() call.
+	 */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "core");
-	core = devm_ioremap_resource(&ctrl->dev, res);
+	core = devm_ioremap(&ctrl->dev, res->start, resource_size(res));
 	if (IS_ERR(core)) {
 		err = PTR_ERR(core);
 		goto err_put_ctrl;
@@ -1349,12 +1490,15 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 			pmic_arb->ver_ops = &pmic_arb_v2;
 		else if (hw_ver < PMIC_ARB_VERSION_V5_MIN)
 			pmic_arb->ver_ops = &pmic_arb_v3;
-		else
+		else if (hw_ver < PMIC_ARB_VERSION_V7_MIN)
 			pmic_arb->ver_ops = &pmic_arb_v5;
+		else
+			pmic_arb->ver_ops = &pmic_arb_v7;
 
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 						   "obsrvr");
-		pmic_arb->rd_base = devm_ioremap_resource(&ctrl->dev, res);
+		pmic_arb->rd_base = devm_ioremap(&ctrl->dev, res->start,
+						 resource_size(res));
 		if (IS_ERR(pmic_arb->rd_base)) {
 			err = PTR_ERR(pmic_arb->rd_base);
 			goto err_put_ctrl;
@@ -1362,13 +1506,69 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 						   "chnls");
-		pmic_arb->wr_base = devm_ioremap_resource(&ctrl->dev, res);
+		pmic_arb->wr_base = devm_ioremap(&ctrl->dev, res->start,
+						 resource_size(res));
 		if (IS_ERR(pmic_arb->wr_base)) {
 			err = PTR_ERR(pmic_arb->wr_base);
 			goto err_put_ctrl;
 		}
 	}
 
+	pmic_arb->max_periphs = PMIC_ARB_MAX_PERIPHS;
+
+	if (hw_ver >= PMIC_ARB_VERSION_V7_MIN) {
+		pmic_arb->max_periphs = PMIC_ARB_MAX_PERIPHS_V7;
+		/* Optional property for v7: */
+		of_property_read_u32(pdev->dev.of_node, "qcom,bus-id",
+					&pmic_arb->bus_instance);
+		if (pmic_arb->bus_instance > 1) {
+			err = -EINVAL;
+			dev_err(&pdev->dev, "invalid bus instance (%u) specified\n",
+				pmic_arb->bus_instance);
+			goto err_put_ctrl;
+		}
+
+		if (pmic_arb->bus_instance == 0) {
+			pmic_arb->base_apid = 0;
+			pmic_arb->apid_count =
+				readl_relaxed(core + PMIC_ARB_FEATURES) &
+				PMIC_ARB_FEATURES_PERIPH_MASK;
+		} else {
+			pmic_arb->base_apid =
+				readl_relaxed(core + PMIC_ARB_FEATURES) &
+				PMIC_ARB_FEATURES_PERIPH_MASK;
+			pmic_arb->apid_count =
+				readl_relaxed(core + PMIC_ARB_FEATURES1) &
+				PMIC_ARB_FEATURES_PERIPH_MASK;
+		}
+
+		if (pmic_arb->base_apid + pmic_arb->apid_count > pmic_arb->max_periphs) {
+			err = -EINVAL;
+			dev_err(&pdev->dev, "Unsupported APID count %d detected\n",
+				pmic_arb->base_apid + pmic_arb->apid_count);
+			goto err_put_ctrl;
+		}
+	} else if (hw_ver >= PMIC_ARB_VERSION_V5_MIN) {
+		pmic_arb->base_apid = 0;
+		pmic_arb->apid_count = readl_relaxed(core + PMIC_ARB_FEATURES) &
+					PMIC_ARB_FEATURES_PERIPH_MASK;
+
+		if (pmic_arb->apid_count > pmic_arb->max_periphs) {
+			err = -EINVAL;
+			dev_err(&pdev->dev, "Unsupported APID count %d detected\n",
+				pmic_arb->apid_count);
+			goto err_put_ctrl;
+		}
+	}
+
+	pmic_arb->apid_data = devm_kcalloc(&ctrl->dev, pmic_arb->max_periphs,
+					   sizeof(*pmic_arb->apid_data),
+					   GFP_KERNEL);
+	if (!pmic_arb->apid_data) {
+		err = -ENOMEM;
+		goto err_put_ctrl;
+	}
+
 	dev_info(&ctrl->dev, "PMIC arbiter version %s (0x%x)\n",
 		 pmic_arb->ver_ops->ver_str, hw_ver);
 
@@ -1420,7 +1620,7 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 	}
 
 	pmic_arb->ee = ee;
-	mapping_table = devm_kcalloc(&ctrl->dev, PMIC_ARB_MAX_PERIPHS,
+	mapping_table = devm_kcalloc(&ctrl->dev, pmic_arb->max_periphs,
 					sizeof(*mapping_table), GFP_KERNEL);
 	if (!mapping_table) {
 		err = -ENOMEM;
@@ -1431,7 +1631,7 @@ static int spmi_pmic_arb_probe(struct platform_device *pdev)
 	/* Initialize max_apid/min_apid to the opposite bounds, during
 	 * the irq domain translation, we are sure to update these */
 	pmic_arb->max_apid = 0;
-	pmic_arb->min_apid = PMIC_ARB_MAX_PERIPHS - 1;
+	pmic_arb->min_apid = pmic_arb->max_periphs - 1;
 
 	platform_set_drvdata(pdev, ctrl);
 	raw_spin_lock_init(&pmic_arb->lock);
-- 
GitLab


From 4634c973096a64662a24d9914c47cebc2a8b72f4 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Wed, 2 Nov 2022 15:26:59 +0800
Subject: [PATCH 1288/4122] chardev: Fix potential memory leak when cdev_add()
 failed

Some init function of cdev(like comedi) will call kobject_set_name()
before cdev_add(), but won't free the cdev.kobj.name or put the ref cnt
of cdev.kobj when cdev_add() failed. As the result, cdev.kobj.name will
be leaked.

Free the name of kobject in cdev_add() fail path to prevent memleak. With
this fix, the callers don't need to care about freeing the name of
kobject if cdev_add() fails.

unreferenced object 0xffff8881000fa8c0 (size 8):
  comm "modprobe", pid 239, jiffies 4294905173 (age 51.308s)
  hex dump (first 8 bytes):
    63 6f 6d 65 64 69 00 ff                          comedi..
  backtrace:
    [<000000005f9878f7>] __kmalloc_node_track_caller+0x4c/0x1c0
    [<000000000fd70302>] kstrdup+0x3f/0x70
    [<000000009428bc33>] kstrdup_const+0x46/0x60
    [<00000000ed50d9de>] kvasprintf_const+0xdb/0xf0
    [<00000000b2766964>] kobject_set_name_vargs+0x3c/0xe0
    [<00000000f2424ef7>] kobject_set_name+0x62/0x90
    [<000000005d5a125b>] 0xffffffffa0013098
    [<00000000f331e663>] do_one_initcall+0x7a/0x380
    [<00000000aa7bac96>] do_init_module+0x5c/0x230
    [<000000005fd72335>] load_module+0x227d/0x2420
    [<00000000ad550cf1>] __do_sys_finit_module+0xd5/0x140
    [<00000000069a60c5>] do_syscall_64+0x3f/0x90
    [<00000000c5e0d521>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Suggested-by: Greg KH <gregkh@linuxfoundation.org>
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Link: https://lore.kernel.org/r/20221102072659.23671-1-shangxiaojing@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/char_dev.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/fs/char_dev.c b/fs/char_dev.c
index ba0ded7842a77..340e4543b24a9 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -483,17 +483,24 @@ int cdev_add(struct cdev *p, dev_t dev, unsigned count)
 	p->dev = dev;
 	p->count = count;
 
-	if (WARN_ON(dev == WHITEOUT_DEV))
-		return -EBUSY;
+	if (WARN_ON(dev == WHITEOUT_DEV)) {
+		error = -EBUSY;
+		goto err;
+	}
 
 	error = kobj_map(cdev_map, dev, count, NULL,
 			 exact_match, exact_lock, p);
 	if (error)
-		return error;
+		goto err;
 
 	kobject_get(p->kobj.parent);
 
 	return 0;
+
+err:
+	kfree_const(p->kobj.name);
+	p->kobj.name = NULL;
+	return error;
 }
 
 /**
-- 
GitLab


From 982a84455e94bf195f2c35f221a6b4fe239d74d2 Mon Sep 17 00:00:00 2001
From: Bo Liu <liubo03@inspur.com>
Date: Mon, 31 Oct 2022 02:35:57 -0400
Subject: [PATCH 1289/4122] misc: genwqe: card_base: Fix some kernel-doc
 warnings

Fixes the following W=1 kernel build warning(s):
  drivers/misc/genwqe/card_base.c:3: warning: This comment starts with '/**', but isn't a kernel-doc comment. Refer Documentation/doc-guide/kernel-doc.rst

Signed-off-by: Bo Liu <liubo03@inspur.com>
Link: https://lore.kernel.org/r/20221031063557.2710-1-liubo03@inspur.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/genwqe/card_base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index 693981891870c..bae8114f28059 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
  * IBM Accelerator Family 'GenWQE'
  *
  * (C) Copyright IBM Corp. 2013
-- 
GitLab


From e48031603ae644054a9251c6fc7f0e0fd4e2683a Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 6 Oct 2022 07:36:50 +0200
Subject: [PATCH 1290/4122] uio: uio_fsl_elbc_gpcm: Replace NO_IRQ by 0

NO_IRQ is used to check the return of irq_of_parse_and_map().

On some architecture NO_IRQ is 0, on other architectures it is -1.

irq_of_parse_and_map() returns 0 on error, independent of NO_IRQ.

So use 0 instead of using NO_IRQ.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://lore.kernel.org/r/68ccdf51811ab26bdb452babf17ae860fa4900c2.1665034535.git.christophe.leroy@csgroup.eu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_fsl_elbc_gpcm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/uio/uio_fsl_elbc_gpcm.c b/drivers/uio/uio_fsl_elbc_gpcm.c
index 7d8eb9dc20681..82dda799f327d 100644
--- a/drivers/uio/uio_fsl_elbc_gpcm.c
+++ b/drivers/uio/uio_fsl_elbc_gpcm.c
@@ -390,13 +390,13 @@ static int uio_fsl_elbc_gpcm_probe(struct platform_device *pdev)
 	info->priv = priv;
 	info->name = uio_name;
 	info->version = "0.0.1";
-	if (irq != NO_IRQ) {
+	if (irq) {
 		if (priv->irq_handler) {
 			info->irq = irq;
 			info->irq_flags = IRQF_SHARED;
 			info->handler = priv->irq_handler;
 		} else {
-			irq = NO_IRQ;
+			irq = 0;
 			dev_warn(priv->dev, "ignoring irq, no handler\n");
 		}
 	}
@@ -417,7 +417,7 @@ static int uio_fsl_elbc_gpcm_probe(struct platform_device *pdev)
 	dev_info(priv->dev,
 		 "eLBC/GPCM device (%s) at 0x%llx, bank %d, irq=%d\n",
 		 priv->name, (unsigned long long)res.start, priv->bank,
-		 irq != NO_IRQ ? irq : -1);
+		 irq ? : -1);
 
 	return 0;
 out_err2:
-- 
GitLab


From d88bd098f45e0dcf317f8924a38b48e8a14a3854 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 28 Sep 2022 22:16:37 +0100
Subject: [PATCH 1291/4122] test_firmware: Fix spelling mistake "EMTPY" ->
 "EMPTY"

There are spelling mistakes in config show text. Fix these.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Russ Weight <russell.h.weight@intel.com>
Link: https://lore.kernel.org/r/20220928211637.62529-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_firmware.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index c82b65947ce68..0c714cdd51eff 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -284,7 +284,7 @@ static ssize_t config_show(struct device *dev,
 				test_fw_config->name);
 	else
 		len += scnprintf(buf + len, PAGE_SIZE - len,
-				"name:\tEMTPY\n");
+				"name:\tEMPTY\n");
 
 	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"num_requests:\t%u\n", test_fw_config->num_requests);
@@ -315,7 +315,7 @@ static ssize_t config_show(struct device *dev,
 				test_fw_config->upload_name);
 	else
 		len += scnprintf(buf + len, PAGE_SIZE - len,
-				"upload_name:\tEMTPY\n");
+				"upload_name:\tEMPTY\n");
 
 	mutex_unlock(&test_fw_mutex);
 
-- 
GitLab


From 19d54020883c210a0cc78e5c735900ee9e9f64b3 Mon Sep 17 00:00:00 2001
From: Jack Rosenthal <jrosenth@chromium.org>
Date: Fri, 4 Nov 2022 10:15:28 -0600
Subject: [PATCH 1292/4122] firmware: google: Implement cbmem in sysfs driver

The CBMEM area is a downward-growing memory region used by coreboot to
dynamically allocate tagged data structures ("CBMEM entries") that
remain resident during boot.

This implements a driver which exports access to the CBMEM entries
via sysfs under /sys/bus/coreboot/devices/cbmem-<id>.

This implementation is quite versatile.  Examples of how it could be
used are given below:

* Tools like util/cbmem from the coreboot tree could use this driver
  instead of finding CBMEM in /dev/mem directly.  Alternatively,
  firmware developers debugging an issue may find the sysfs interface
  more ergonomic than the cbmem tool and choose to use it directly.

* The crossystem tool, which exposes verified boot variables, can use
  this driver to read the vboot work buffer.

* Tools which read the BIOS SPI flash (e.g., flashrom) can find the
  flash layout in CBMEM directly, which is significantly faster than
  searching the flash directly.

Write access is provided to all CBMEM regions via
/sys/bus/coreboot/devices/cbmem-<id>/mem, as the existing cbmem
tooling updates this memory region, and envisioned use cases with
crossystem can benefit from updating memory regions.

Link: https://issuetracker.google.com/239604743
Cc: Stephen Boyd <swboyd@chromium.org>
Cc: Tzung-Bi Shih <tzungbi@kernel.org>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Reviewed-by: Julius Werner <jwerner@chromium.org>
Tested-by: Jack Rosenthal <jrosenth@chromium.org>
Signed-off-by: Jack Rosenthal <jrosenth@chromium.org>
Link: https://lore.kernel.org/r/20221104161528.531248-1-jrosenth@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-bus-coreboot |  45 +++++++
 drivers/firmware/google/Kconfig              |  15 +++
 drivers/firmware/google/Makefile             |   3 +
 drivers/firmware/google/cbmem.c              | 129 +++++++++++++++++++
 drivers/firmware/google/coreboot_table.c     |  11 +-
 drivers/firmware/google/coreboot_table.h     |  18 +++
 6 files changed, 220 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-coreboot
 create mode 100644 drivers/firmware/google/cbmem.c

diff --git a/Documentation/ABI/testing/sysfs-bus-coreboot b/Documentation/ABI/testing/sysfs-bus-coreboot
new file mode 100644
index 0000000000000..9c5accecc4709
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-coreboot
@@ -0,0 +1,45 @@
+What:		/sys/bus/coreboot
+Date:		August 2022
+Contact:	Jack Rosenthal <jrosenth@chromium.org>
+Description:
+		The coreboot bus provides a variety of virtual devices used to
+		access data structures created by the Coreboot BIOS.
+
+What:		/sys/bus/coreboot/devices/cbmem-<id>
+Date:		August 2022
+Contact:	Jack Rosenthal <jrosenth@chromium.org>
+Description:
+		CBMEM is a downwards-growing memory region created by Coreboot,
+		and contains tagged data structures to be shared with payloads
+		in the boot process and the OS.  Each CBMEM entry is given a
+		directory in /sys/bus/coreboot/devices based on its id.
+		A list of ids known to Coreboot can be found in the coreboot
+		source tree at
+		``src/commonlib/bsd/include/commonlib/bsd/cbmem_id.h``.
+
+What:		/sys/bus/coreboot/devices/cbmem-<id>/address
+Date:		August 2022
+Contact:	Jack Rosenthal <jrosenth@chromium.org>
+Description:
+		This is the pyhsical memory address that the CBMEM entry's data
+		begins at, in hexadecimal (e.g., ``0x76ffe000``).
+
+What:		/sys/bus/coreboot/devices/cbmem-<id>/size
+Date:		August 2022
+Contact:	Jack Rosenthal <jrosenth@chromium.org>
+Description:
+		This is the size of the CBMEM entry's data, in hexadecimal
+		(e.g., ``0x1234``).
+
+What:		/sys/bus/coreboot/devices/cbmem-<id>/mem
+Date:		August 2022
+Contact:	Jack Rosenthal <jrosenth@chromium.org>
+Description:
+		A file exposing read/write access to the entry's data.  Note
+		that this file does not support mmap(), as coreboot
+		does not guarantee that the data will be page-aligned.
+
+		The mode of this file is 0600.  While there shouldn't be
+		anything security-sensitive contained in CBMEM, read access
+		requires root privileges given this is exposing a small subset
+		of physical memory.
diff --git a/drivers/firmware/google/Kconfig b/drivers/firmware/google/Kconfig
index 983e07dc022ed..9f190eab43ed1 100644
--- a/drivers/firmware/google/Kconfig
+++ b/drivers/firmware/google/Kconfig
@@ -19,6 +19,21 @@ config GOOGLE_SMI
 	  driver provides an interface for reading and writing NVRAM
 	  variables.
 
+config GOOGLE_CBMEM
+	tristate "CBMEM entries in sysfs"
+	depends on GOOGLE_COREBOOT_TABLE
+	help
+	  CBMEM is a downwards-growing memory region created by the
+	  Coreboot BIOS containing tagged data structures from the
+	  BIOS.  These data structures expose things like the verified
+	  boot firmware variables, flash layout, firmware event log,
+	  and more.
+
+	  This option enables the cbmem module, which causes the
+	  kernel to search for Coreboot CBMEM entries, and expose the
+	  memory for each entry in sysfs under
+	  /sys/bus/coreboot/devices/cbmem-<id>.
+
 config GOOGLE_COREBOOT_TABLE
 	tristate "Coreboot Table Access"
 	depends on HAS_IOMEM && (ACPI || OF)
diff --git a/drivers/firmware/google/Makefile b/drivers/firmware/google/Makefile
index d17caded5d883..8151e323cc434 100644
--- a/drivers/firmware/google/Makefile
+++ b/drivers/firmware/google/Makefile
@@ -7,5 +7,8 @@ obj-$(CONFIG_GOOGLE_MEMCONSOLE)            += memconsole.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE_COREBOOT)   += memconsole-coreboot.o
 obj-$(CONFIG_GOOGLE_MEMCONSOLE_X86_LEGACY) += memconsole-x86-legacy.o
 
+# Must come after coreboot_table.o, as this driver depends on that bus type.
+obj-$(CONFIG_GOOGLE_CBMEM)		+= cbmem.o
+
 vpd-sysfs-y := vpd.o vpd_decode.o
 obj-$(CONFIG_GOOGLE_VPD)		+= vpd-sysfs.o
diff --git a/drivers/firmware/google/cbmem.c b/drivers/firmware/google/cbmem.c
new file mode 100644
index 0000000000000..685f3070ce9d8
--- /dev/null
+++ b/drivers/firmware/google/cbmem.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cbmem.c
+ *
+ * Driver for exporting cbmem entries in sysfs.
+ *
+ * Copyright 2022 Google LLC
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/kobject.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/sysfs.h>
+
+#include "coreboot_table.h"
+
+struct cbmem_entry {
+	char *mem_file_buf;
+	u32 size;
+};
+
+static struct cbmem_entry *to_cbmem_entry(struct kobject *kobj)
+{
+	return dev_get_drvdata(kobj_to_dev(kobj));
+}
+
+static ssize_t mem_read(struct file *filp, struct kobject *kobj,
+			struct bin_attribute *bin_attr, char *buf, loff_t pos,
+			size_t count)
+{
+	struct cbmem_entry *entry = to_cbmem_entry(kobj);
+
+	return memory_read_from_buffer(buf, count, &pos, entry->mem_file_buf,
+				       entry->size);
+}
+
+static ssize_t mem_write(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *bin_attr, char *buf, loff_t pos,
+			 size_t count)
+{
+	struct cbmem_entry *entry = to_cbmem_entry(kobj);
+
+	if (pos < 0 || pos >= entry->size)
+		return -EINVAL;
+	if (count > entry->size - pos)
+		count = entry->size - pos;
+
+	memcpy(entry->mem_file_buf + pos, buf, count);
+	return count;
+}
+static BIN_ATTR_ADMIN_RW(mem, 0);
+
+static ssize_t address_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct coreboot_device *cbdev = dev_to_coreboot_device(dev);
+
+	return sysfs_emit(buf, "0x%llx\n", cbdev->cbmem_entry.address);
+}
+static DEVICE_ATTR_RO(address);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct coreboot_device *cbdev = dev_to_coreboot_device(dev);
+
+	return sysfs_emit(buf, "0x%x\n", cbdev->cbmem_entry.entry_size);
+}
+static DEVICE_ATTR_RO(size);
+
+static struct attribute *attrs[] = {
+	&dev_attr_address.attr,
+	&dev_attr_size.attr,
+	NULL,
+};
+
+static struct bin_attribute *bin_attrs[] = {
+	&bin_attr_mem,
+	NULL,
+};
+
+static const struct attribute_group cbmem_entry_group = {
+	.attrs = attrs,
+	.bin_attrs = bin_attrs,
+};
+
+static const struct attribute_group *dev_groups[] = {
+	&cbmem_entry_group,
+	NULL,
+};
+
+static int cbmem_entry_probe(struct coreboot_device *dev)
+{
+	struct cbmem_entry *entry;
+
+	entry = devm_kzalloc(&dev->dev, sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	dev_set_drvdata(&dev->dev, entry);
+	entry->mem_file_buf = devm_memremap(&dev->dev, dev->cbmem_entry.address,
+					    dev->cbmem_entry.entry_size,
+					    MEMREMAP_WB);
+	if (!entry->mem_file_buf)
+		return -ENOMEM;
+
+	entry->size = dev->cbmem_entry.entry_size;
+
+	return 0;
+}
+
+static struct coreboot_driver cbmem_entry_driver = {
+	.probe = cbmem_entry_probe,
+	.drv = {
+		.name = "cbmem",
+		.owner = THIS_MODULE,
+		.dev_groups = dev_groups,
+	},
+	.tag = LB_TAG_CBMEM_ENTRY,
+};
+module_coreboot_driver(cbmem_entry_driver);
+
+MODULE_AUTHOR("Jack Rosenthal <jrosenth@chromium.org>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/firmware/google/coreboot_table.c b/drivers/firmware/google/coreboot_table.c
index c52bcaa9def60..7748067eb9e6b 100644
--- a/drivers/firmware/google/coreboot_table.c
+++ b/drivers/firmware/google/coreboot_table.c
@@ -97,12 +97,21 @@ static int coreboot_table_populate(struct device *dev, void *ptr)
 		if (!device)
 			return -ENOMEM;
 
-		dev_set_name(&device->dev, "coreboot%d", i);
 		device->dev.parent = dev;
 		device->dev.bus = &coreboot_bus_type;
 		device->dev.release = coreboot_device_release;
 		memcpy(&device->entry, ptr_entry, entry->size);
 
+		switch (device->entry.tag) {
+		case LB_TAG_CBMEM_ENTRY:
+			dev_set_name(&device->dev, "cbmem-%08x",
+				     device->cbmem_entry.id);
+			break;
+		default:
+			dev_set_name(&device->dev, "coreboot%d", i);
+			break;
+		}
+
 		ret = device_register(&device->dev);
 		if (ret) {
 			put_device(&device->dev);
diff --git a/drivers/firmware/google/coreboot_table.h b/drivers/firmware/google/coreboot_table.h
index beb778674acdc..37f4d335a606d 100644
--- a/drivers/firmware/google/coreboot_table.h
+++ b/drivers/firmware/google/coreboot_table.h
@@ -39,6 +39,18 @@ struct lb_cbmem_ref {
 	u64 cbmem_addr;
 };
 
+#define LB_TAG_CBMEM_ENTRY 0x31
+
+/* Corresponds to LB_TAG_CBMEM_ENTRY */
+struct lb_cbmem_entry {
+	u32 tag;
+	u32 size;
+
+	u64 address;
+	u32 entry_size;
+	u32 id;
+};
+
 /* Describes framebuffer setup by coreboot */
 struct lb_framebuffer {
 	u32 tag;
@@ -65,10 +77,16 @@ struct coreboot_device {
 	union {
 		struct coreboot_table_entry entry;
 		struct lb_cbmem_ref cbmem_ref;
+		struct lb_cbmem_entry cbmem_entry;
 		struct lb_framebuffer framebuffer;
 	};
 };
 
+static inline struct coreboot_device *dev_to_coreboot_device(struct device *dev)
+{
+	return container_of(dev, struct coreboot_device, dev);
+}
+
 /* A driver for handling devices described in coreboot tables. */
 struct coreboot_driver {
 	int (*probe)(struct coreboot_device *);
-- 
GitLab


From 9de255c461d1b3f0242b3ad1450c3323a3e00b34 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Fri, 30 Sep 2022 19:40:57 -0300
Subject: [PATCH 1293/4122] uio: uio_dmem_genirq: Fix missing unlock in irq
 configuration

Commit b74351287d4b ("uio: fix a sleep-in-atomic-context bug in
uio_dmem_genirq_irqcontrol()") started calling disable_irq() without
holding the spinlock because it can sleep. However, that fix introduced
another bug: if interrupt is already disabled and a new disable request
comes in, then the spinlock is not unlocked:

root@localhost:~# printf '\x00\x00\x00\x00' > /dev/uio0
root@localhost:~# printf '\x00\x00\x00\x00' > /dev/uio0
root@localhost:~# [   14.851538] BUG: scheduling while atomic: bash/223/0x00000002
[   14.851991] Modules linked in: uio_dmem_genirq uio myfpga(OE) bochs drm_vram_helper drm_ttm_helper ttm drm_kms_helper drm snd_pcm ppdev joydev psmouse snd_timer snd e1000fb_sys_fops syscopyarea parport sysfillrect soundcore sysimgblt input_leds pcspkr i2c_piix4 serio_raw floppy evbug qemu_fw_cfg mac_hid pata_acpi ip_tables x_tables autofs4 [last unloaded: parport_pc]
[   14.854206] CPU: 0 PID: 223 Comm: bash Tainted: G           OE      6.0.0-rc7 #21
[   14.854786] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
[   14.855664] Call Trace:
[   14.855861]  <TASK>
[   14.856025]  dump_stack_lvl+0x4d/0x67
[   14.856325]  dump_stack+0x14/0x1a
[   14.856583]  __schedule_bug.cold+0x4b/0x5c
[   14.856915]  __schedule+0xe81/0x13d0
[   14.857199]  ? idr_find+0x13/0x20
[   14.857456]  ? get_work_pool+0x2d/0x50
[   14.857756]  ? __flush_work+0x233/0x280
[   14.858068]  ? __schedule+0xa95/0x13d0
[   14.858307]  ? idr_find+0x13/0x20
[   14.858519]  ? get_work_pool+0x2d/0x50
[   14.858798]  schedule+0x6c/0x100
[   14.859009]  schedule_hrtimeout_range_clock+0xff/0x110
[   14.859335]  ? tty_write_room+0x1f/0x30
[   14.859598]  ? n_tty_poll+0x1ec/0x220
[   14.859830]  ? tty_ldisc_deref+0x1a/0x20
[   14.860090]  schedule_hrtimeout_range+0x17/0x20
[   14.860373]  do_select+0x596/0x840
[   14.860627]  ? __kernel_text_address+0x16/0x50
[   14.860954]  ? poll_freewait+0xb0/0xb0
[   14.861235]  ? poll_freewait+0xb0/0xb0
[   14.861517]  ? rpm_resume+0x49d/0x780
[   14.861798]  ? common_interrupt+0x59/0xa0
[   14.862127]  ? asm_common_interrupt+0x2b/0x40
[   14.862511]  ? __uart_start.isra.0+0x61/0x70
[   14.862902]  ? __check_object_size+0x61/0x280
[   14.863255]  core_sys_select+0x1c6/0x400
[   14.863575]  ? vfs_write+0x1c9/0x3d0
[   14.863853]  ? vfs_write+0x1c9/0x3d0
[   14.864121]  ? _copy_from_user+0x45/0x70
[   14.864526]  do_pselect.constprop.0+0xb3/0xf0
[   14.864893]  ? do_syscall_64+0x6d/0x90
[   14.865228]  ? do_syscall_64+0x6d/0x90
[   14.865556]  __x64_sys_pselect6+0x76/0xa0
[   14.865906]  do_syscall_64+0x60/0x90
[   14.866214]  ? syscall_exit_to_user_mode+0x2a/0x50
[   14.866640]  ? do_syscall_64+0x6d/0x90
[   14.866972]  ? do_syscall_64+0x6d/0x90
[   14.867286]  ? do_syscall_64+0x6d/0x90
[   14.867626]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[...] stripped
[   14.872959]  </TASK>

('myfpga' is a simple 'uio_dmem_genirq' driver I wrote to test this)

The implementation of "uio_dmem_genirq" was based on "uio_pdrv_genirq" and
it is used in a similar manner to the "uio_pdrv_genirq" driver with respect
to interrupt configuration and handling. At the time "uio_dmem_genirq" was
introduced, both had the same implementation of the 'uio_info' handlers
irqcontrol() and handler(). Then commit 34cb27528398 ("UIO: Fix concurrency
issue"), which was only applied to "uio_pdrv_genirq", ended up making them
a little different. That commit, among other things, changed disable_irq()
to disable_irq_nosync() in the implementation of irqcontrol(). The
motivation there was to avoid a deadlock between irqcontrol() and
handler(), since it added a spinlock in the irq handler, and disable_irq()
waits for the completion of the irq handler.

By changing disable_irq() to disable_irq_nosync() in irqcontrol(), we also
avoid the sleeping-while-atomic bug that commit b74351287d4b ("uio: fix a
sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") was trying to
fix. Thus, this fixes the missing unlock in irqcontrol() by importing the
implementation of irqcontrol() handler from the "uio_pdrv_genirq" driver.
In the end, it reverts commit b74351287d4b ("uio: fix a
sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()") and change
disable_irq() to disable_irq_nosync().

It is worth noting that this still does not address the concurrency issue
fixed by commit 34cb27528398 ("UIO: Fix concurrency issue"). It will be
addressed separately in the next commits.

Split out from commit 34cb27528398 ("UIO: Fix concurrency issue").

Fixes: b74351287d4b ("uio: fix a sleep-in-atomic-context bug in uio_dmem_genirq_irqcontrol()")
Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Link: https://lore.kernel.org/r/20220930224100.816175-2-rafaelmendsr@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_dmem_genirq.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index 1106f33764047..cb283ee36eaa9 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -132,13 +132,11 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
 	if (irq_on) {
 		if (test_and_clear_bit(0, &priv->flags))
 			enable_irq(dev_info->irq);
-		spin_unlock_irqrestore(&priv->lock, flags);
 	} else {
-		if (!test_and_set_bit(0, &priv->flags)) {
-			spin_unlock_irqrestore(&priv->lock, flags);
-			disable_irq(dev_info->irq);
-		}
+		if (!test_and_set_bit(0, &priv->flags))
+			disable_irq_nosync(dev_info->irq);
 	}
+	spin_unlock_irqrestore(&priv->lock, flags);
 
 	return 0;
 }
-- 
GitLab


From 118b918018175d9fcd8db667f905012e986cc2c9 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Fri, 30 Sep 2022 19:40:58 -0300
Subject: [PATCH 1294/4122] uio: uio_dmem_genirq: Fix deadlock between irq
 config and handling

This fixes a concurrency issue addressed in commit 34cb27528398 ("UIO: Fix
concurrency issue"):

  "In a SMP case there was a race condition issue between
  Uio_pdrv_genirq_irqcontrol() running on one CPU and irq handler on
  another CPU. Fix it by spin_locking shared resources access inside irq
  handler."

The implementation of "uio_dmem_genirq" was based on "uio_pdrv_genirq" and
it is used in a similar manner to the "uio_pdrv_genirq" driver with respect
to interrupt configuration and handling. At the time "uio_dmem_genirq" was
merged, both had the same implementation of the 'uio_info' handlers
irqcontrol() and handler(), thus, both had the same concurrency issue
mentioned by the above commit. However, the above patch was only applied to
the "uio_pdrv_genirq" driver.

Split out from commit 34cb27528398 ("UIO: Fix concurrency issue").

Fixes: 0a0c3b5a24bd ("Add new uio device for dynamic memory allocation")
Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Link: https://lore.kernel.org/r/20220930224100.816175-3-rafaelmendsr@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_dmem_genirq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index cb283ee36eaa9..792c3e9c9ce53 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -110,8 +110,10 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info)
 	 * remember the state so we can allow user space to enable it later.
 	 */
 
+	spin_lock(&priv->lock);
 	if (!test_and_set_bit(0, &priv->flags))
 		disable_irq_nosync(irq);
+	spin_unlock(&priv->lock);
 
 	return IRQ_HANDLED;
 }
@@ -125,7 +127,8 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
 	 * in the interrupt controller, but keep track of the
 	 * state to prevent per-irq depth damage.
 	 *
-	 * Serialize this operation to support multiple tasks.
+	 * Serialize this operation to support multiple tasks and concurrency
+	 * with irq handler on SMP systems.
 	 */
 
 	spin_lock_irqsave(&priv->lock, flags);
-- 
GitLab


From a3fc57bc49a24960fd6a907457f9360a3e65b968 Mon Sep 17 00:00:00 2001
From: Rafael Mendonca <rafaelmendsr@gmail.com>
Date: Fri, 30 Sep 2022 19:40:59 -0300
Subject: [PATCH 1295/4122] uio: uio_dmem_genirq: Use non-atomic bit operations
 in irq config and handling

This finishes the port of the irq configuration and handling from
"uio_pdrv_genirq" to "uio_dmem_genirq". It changes the atomic
bit-manipulation routines to their non-atomic counterparts as we are
already guarding the code by spinlock.

Split out from commit 34cb27528398 ("UIO: Fix concurrency issue").

Signed-off-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Link: https://lore.kernel.org/r/20220930224100.816175-4-rafaelmendsr@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/uio/uio_dmem_genirq.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/uio/uio_dmem_genirq.c b/drivers/uio/uio_dmem_genirq.c
index 792c3e9c9ce53..5313307c2754a 100644
--- a/drivers/uio/uio_dmem_genirq.c
+++ b/drivers/uio/uio_dmem_genirq.c
@@ -41,6 +41,11 @@ struct uio_dmem_genirq_platdata {
 	unsigned int refcnt;
 };
 
+/* Bits in uio_dmem_genirq_platdata.flags */
+enum {
+	UIO_IRQ_DISABLED = 0,
+};
+
 static int uio_dmem_genirq_open(struct uio_info *info, struct inode *inode)
 {
 	struct uio_dmem_genirq_platdata *priv = info->priv;
@@ -111,7 +116,7 @@ static irqreturn_t uio_dmem_genirq_handler(int irq, struct uio_info *dev_info)
 	 */
 
 	spin_lock(&priv->lock);
-	if (!test_and_set_bit(0, &priv->flags))
+	if (!__test_and_set_bit(UIO_IRQ_DISABLED, &priv->flags))
 		disable_irq_nosync(irq);
 	spin_unlock(&priv->lock);
 
@@ -133,10 +138,10 @@ static int uio_dmem_genirq_irqcontrol(struct uio_info *dev_info, s32 irq_on)
 
 	spin_lock_irqsave(&priv->lock, flags);
 	if (irq_on) {
-		if (test_and_clear_bit(0, &priv->flags))
+		if (__test_and_clear_bit(UIO_IRQ_DISABLED, &priv->flags))
 			enable_irq(dev_info->irq);
 	} else {
-		if (!test_and_set_bit(0, &priv->flags))
+		if (!__test_and_set_bit(UIO_IRQ_DISABLED, &priv->flags))
 			disable_irq_nosync(dev_info->irq);
 	}
 	spin_unlock_irqrestore(&priv->lock, flags);
-- 
GitLab


From d4ad017d634561907ecdd1e467a28612b369ee00 Mon Sep 17 00:00:00 2001
From: Soha Jin <soha@lohu.info>
Date: Sat, 1 Oct 2022 00:26:04 +0800
Subject: [PATCH 1296/4122] platform: use fwnode_irq_get_byname instead of
 of_irq_get_byname to get irq

Not only platform devices described by OF have named interrupts, but
devices described by ACPI also have named interrupts. The fwnode is an
abstraction to different standards, and using fwnode_irq_get_byname can
support more devices.

Signed-off-by: Soha Jin <soha@lohu.info>
Tested-by: Wende Tan <twd2.me@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 51bb2289865c7..968f3d71eeab2 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -441,8 +441,8 @@ static int __platform_get_irq_byname(struct platform_device *dev,
 	struct resource *r;
 	int ret;
 
-	if (IS_ENABLED(CONFIG_OF_IRQ) && dev->dev.of_node) {
-		ret = of_irq_get_byname(dev->dev.of_node, name);
+	if (!dev->dev.of_node || IS_ENABLED(CONFIG_OF_IRQ)) {
+		ret = fwnode_irq_get_byname(dev_fwnode(&dev->dev), name);
 		if (ret > 0 || ret == -EPROBE_DEFER)
 			return ret;
 	}
-- 
GitLab


From 2a4e628570d42fcc13a94f1acf25e3cfeaec08f6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Nov 2022 17:56:17 +0200
Subject: [PATCH 1297/4122] resource: Replace printk(KERN_WARNING) by
 pr_warn(), printk() by pr_info()

Replace printk(KERN_WARNING) by pr_warn() and printk() by pr_info().

While at it, use %pa for the resource_size_t variables. With that,
for the sake of consistency, introduce a temporary variable for
the end address in iomem_map_sanity_check() like it's done in another
function in the same module.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221109155618.42276-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 kernel/resource.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/kernel/resource.c b/kernel/resource.c
index 4c5e80b92f2f4..ab32b015bd509 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -888,7 +888,7 @@ void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
 		if (conflict->end > new->end)
 			new->end = conflict->end;
 
-		printk("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
+		pr_info("Expanded resource %s due to conflict with %s\n", new->name, conflict->name);
 	}
 	write_unlock(&resource_lock);
 }
@@ -1283,9 +1283,7 @@ void __release_region(struct resource *parent, resource_size_t start,
 
 	write_unlock(&resource_lock);
 
-	printk(KERN_WARNING "Trying to free nonexistent resource "
-		"<%016llx-%016llx>\n", (unsigned long long)start,
-		(unsigned long long)end);
+	pr_warn("Trying to free nonexistent resource <%pa-%pa>\n", &start, &end);
 }
 EXPORT_SYMBOL(__release_region);
 
@@ -1658,6 +1656,7 @@ __setup("reserve=", reserve_setup);
 int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
 {
 	struct resource *p = &iomem_resource;
+	resource_size_t end = addr + size - 1;
 	int err = 0;
 	loff_t l;
 
@@ -1667,12 +1666,12 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
 		 * We can probably skip the resources without
 		 * IORESOURCE_IO attribute?
 		 */
-		if (p->start >= addr + size)
+		if (p->start > end)
 			continue;
 		if (p->end < addr)
 			continue;
 		if (PFN_DOWN(p->start) <= PFN_DOWN(addr) &&
-		    PFN_DOWN(p->end) >= PFN_DOWN(addr + size - 1))
+		    PFN_DOWN(p->end) >= PFN_DOWN(end))
 			continue;
 		/*
 		 * if a resource is "BUSY", it's not a hardware resource
@@ -1683,10 +1682,8 @@ int iomem_map_sanity_check(resource_size_t addr, unsigned long size)
 		if (p->flags & IORESOURCE_BUSY)
 			continue;
 
-		printk(KERN_WARNING "resource sanity check: requesting [mem %#010llx-%#010llx], which spans more than %s %pR\n",
-		       (unsigned long long)addr,
-		       (unsigned long long)(addr + size - 1),
-		       p->name, p);
+		pr_warn("resource sanity check: requesting [mem %pa-%pa], which spans more than %s %pR\n",
+			&addr, &end, p->name, p);
 		err = -1;
 		break;
 	}
-- 
GitLab


From 52c4d11f1dce60453ab2a75fd7103118cedb2b58 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 9 Nov 2022 17:56:18 +0200
Subject: [PATCH 1298/4122] resource: Convert DEFINE_RES_NAMED() to be compound
 literal

Currently DEFINE_RES_NAMED() can only be used to fill the static data.
In some cases it would be convenient to use it as right value in the
assignment operation. But it can't be done as is, because compiler has
no clue about the data layout. Converting it to be a compound literal
allows the above mentioned usage.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221109155618.42276-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ioport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 27642ca15d932..67d3fb2133b6c 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -155,7 +155,7 @@ enum {
 
 /* helpers to define resources */
 #define DEFINE_RES_NAMED(_start, _size, _name, _flags)			\
-	{								\
+(struct resource) {							\
 		.start = (_start),					\
 		.end = (_start) + (_size) - 1,				\
 		.name = (_name),					\
-- 
GitLab


From 882cf4c913d730a74175db039d941005b883de38 Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Mon, 17 Oct 2022 15:40:38 +0800
Subject: [PATCH 1299/4122] drivers: bus: simple-pm-bus: Use clocks

Simple Power-Managed bus controller may need functional clock(s)
to be enabled before child devices connected to the bus can be
accessed.  Get the clock(s) as a bulk and enable/disable the
clock(s) when the bus is being power managed.

One example is that Freescale i.MX8qxp pixel link MSI bus controller
needs MSI clock and AHB clock to be enabled before accessing child
devices.

Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Link: https://lore.kernel.org/r/20221017074039.4181843-2-victor.liu@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/simple-pm-bus.c | 48 +++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c
index 6b8d6257ed8a4..d7b043fefde9e 100644
--- a/drivers/bus/simple-pm-bus.c
+++ b/drivers/bus/simple-pm-bus.c
@@ -8,17 +8,24 @@
  * for more details.
  */
 
+#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
+struct simple_pm_bus {
+	struct clk_bulk_data *clks;
+	int num_clks;
+};
+
 static int simple_pm_bus_probe(struct platform_device *pdev)
 {
 	const struct device *dev = &pdev->dev;
 	const struct of_dev_auxdata *lookup = dev_get_platdata(dev);
 	struct device_node *np = dev->of_node;
 	const struct of_device_id *match;
+	struct simple_pm_bus *bus;
 
 	/*
 	 * Allow user to use driver_override to bind this driver to a
@@ -44,6 +51,16 @@ static int simple_pm_bus_probe(struct platform_device *pdev)
 			return -ENODEV;
 	}
 
+	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	bus->num_clks = devm_clk_bulk_get_all(&pdev->dev, &bus->clks);
+	if (bus->num_clks < 0)
+		return dev_err_probe(&pdev->dev, bus->num_clks, "failed to get clocks\n");
+
+	dev_set_drvdata(&pdev->dev, bus);
+
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
 	pm_runtime_enable(&pdev->dev);
@@ -67,6 +84,36 @@ static int simple_pm_bus_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int simple_pm_bus_runtime_suspend(struct device *dev)
+{
+	struct simple_pm_bus *bus = dev_get_drvdata(dev);
+
+	clk_bulk_disable_unprepare(bus->num_clks, bus->clks);
+
+	return 0;
+}
+
+static int simple_pm_bus_runtime_resume(struct device *dev)
+{
+	struct simple_pm_bus *bus = dev_get_drvdata(dev);
+	int ret;
+
+	ret = clk_bulk_prepare_enable(bus->num_clks, bus->clks);
+	if (ret) {
+		dev_err(dev, "failed to enable clocks: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct dev_pm_ops simple_pm_bus_pm_ops = {
+	SET_RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend,
+			   simple_pm_bus_runtime_resume, NULL)
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				      pm_runtime_force_resume)
+};
+
 #define ONLY_BUS	((void *) 1) /* Match if the device is only a bus. */
 
 static const struct of_device_id simple_pm_bus_of_match[] = {
@@ -85,6 +132,7 @@ static struct platform_driver simple_pm_bus_driver = {
 	.driver = {
 		.name = "simple-pm-bus",
 		.of_match_table = simple_pm_bus_of_match,
+		.pm = &simple_pm_bus_pm_ops,
 	},
 };
 
-- 
GitLab


From c08645ea215c446ceb21029fe5416e6a62cbbed7 Mon Sep 17 00:00:00 2001
From: Liu Ying <victor.liu@nxp.com>
Date: Mon, 17 Oct 2022 15:40:39 +0800
Subject: [PATCH 1300/4122] dt-bindings: bus: Add Freescale i.MX8qxp pixel link
 MSI bus binding

Freescale i.MX8qxp pixel link MSI bus is a simple memory-mapped bus.
It is used to access peripherals in i.MX8qm/qxp imaging, LVDS, MIPI
DSI and HDMI TX subsystems, like I2C controller, PWM controller,
MIPI DSI controller and Control and Status Registers (CSR) module.

Reference simple-pm-bus bindings and add Freescale i.MX8qxp pixel
link MSI bus specific bindings.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Liu Ying <victor.liu@nxp.com>
Link: https://lore.kernel.org/r/20221017074039.4181843-3-victor.liu@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bus/fsl,imx8qxp-pixel-link-msi-bus.yaml   | 232 ++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml

diff --git a/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml
new file mode 100644
index 0000000000000..b568d0ce438d2
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml
@@ -0,0 +1,232 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/bus/fsl,imx8qxp-pixel-link-msi-bus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Freescale i.MX8qxp Pixel Link Medium Speed Interconnect (MSI) Bus
+
+maintainers:
+  - Liu Ying <victor.liu@nxp.com>
+
+description: |
+  i.MX8qxp pixel link MSI bus is used to control settings of PHYs, I/Os
+  sitting together with the PHYs.  It is not the same as the MSI bus coming
+  from i.MX8 System Controller Unit (SCU) which is used to control power,
+  clock and reset through the i.MX8 Distributed Slave System Controller (DSC).
+
+  i.MX8qxp pixel link MSI bus is a simple memory-mapped bus. Two input clocks,
+  that is, MSI clock and AHB clock, need to be enabled so that peripherals
+  connected to the bus can be accessed. Also, the bus is part of a power
+  domain. The power domain needs to be enabled before the peripherals can
+  be accessed.
+
+  Peripherals in i.MX8qm/qxp imaging, LVDS, MIPI DSI and HDMI TX subsystems,
+  like I2C controller, PWM controller, MIPI DSI controller and Control and
+  Status Registers (CSR) module, are accessed through the bus.
+
+  The i.MX System Controller Firmware (SCFW) owns and uses the i.MX8qm/qxp
+  pixel link MSI bus controller and does not allow SCFW user to control it.
+  So, the controller's registers cannot be accessed by SCFW user. Hence,
+  the interrupts generated by the controller don't make any sense from SCFW
+  user's point of view.
+
+allOf:
+  - $ref: simple-pm-bus.yaml#
+
+# We need a select here so we don't match all nodes with 'simple-pm-bus'.
+select:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - fsl,imx8qxp-display-pixel-link-msi-bus
+          - fsl,imx8qm-display-pixel-link-msi-bus
+  required:
+    - compatible
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - fsl,imx8qxp-display-pixel-link-msi-bus
+          - fsl,imx8qm-display-pixel-link-msi-bus
+      - const: simple-pm-bus
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: master gated clock from system
+      - description: AHB clock
+
+  clock-names:
+    items:
+      - const: msi
+      - const: ahb
+
+patternProperties:
+  "^.*@[0-9a-f]+$":
+    description: Devices attached to the bus
+    type: object
+    properties:
+      reg:
+        maxItems: 1
+
+    required:
+      - reg
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/imx8-lpcg.h>
+    #include <dt-bindings/firmware/imx/rsrc.h>
+    bus@56200000 {
+        compatible = "fsl,imx8qxp-display-pixel-link-msi-bus", "simple-pm-bus";
+        reg = <0x56200000 0x20000>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        interrupt-parent = <&dc0_irqsteer>;
+        interrupts = <320>;
+        ranges;
+        clocks = <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>,
+                 <&dc0_disp_ctrl_link_mst0_lpcg IMX_LPCG_CLK_4>;
+        clock-names = "msi", "ahb";
+        power-domains = <&pd IMX_SC_R_DC_0>;
+
+        syscon@56221000 {
+            compatible = "fsl,imx8qxp-mipi-lvds-csr", "syscon", "simple-mfd";
+            reg = <0x56221000 0x1000>;
+            clocks = <&mipi_lvds_0_di_mipi_lvds_regs_lpcg IMX_LPCG_CLK_4>;
+            clock-names = "ipg";
+
+            pxl2dpi {
+                compatible = "fsl,imx8qxp-pxl2dpi";
+                fsl,sc-resource = <IMX_SC_R_MIPI_0>;
+                power-domains = <&pd IMX_SC_R_MIPI_0>;
+
+                ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    port@0 {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+                        reg = <0>;
+
+                        mipi_lvds_0_pxl2dpi_dc0_pixel_link0: endpoint@0 {
+                            reg = <0>;
+                            remote-endpoint = <&dc0_pixel_link0_mipi_lvds_0_pxl2dpi>;
+                        };
+
+                        mipi_lvds_0_pxl2dpi_dc0_pixel_link1: endpoint@1 {
+                            reg = <1>;
+                            remote-endpoint = <&dc0_pixel_link1_mipi_lvds_0_pxl2dpi>;
+                        };
+                    };
+
+                    port@1 {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+                        reg = <1>;
+
+                        mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0: endpoint@0 {
+                            reg = <0>;
+                            remote-endpoint = <&mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi>;
+                        };
+
+                        mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1: endpoint@1 {
+                            reg = <1>;
+                            remote-endpoint = <&mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi>;
+                        };
+                    };
+                };
+            };
+
+            ldb {
+                #address-cells = <1>;
+                #size-cells = <0>;
+                compatible = "fsl,imx8qxp-ldb";
+                clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_MISC2>,
+                         <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_BYPASS>;
+                clock-names = "pixel", "bypass";
+                power-domains = <&pd IMX_SC_R_LVDS_0>;
+
+                channel@0 {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+                    reg = <0>;
+                    phys = <&mipi_lvds_0_phy>;
+                    phy-names = "lvds_phy";
+
+                    port@0 {
+                        reg = <0>;
+
+                        mipi_lvds_0_ldb_ch0_mipi_lvds_0_pxl2dpi: endpoint {
+                            remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch0>;
+                        };
+                    };
+
+                    port@1 {
+                        reg = <1>;
+
+                        /* ... */
+                    };
+                };
+
+                channel@1 {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+                    reg = <1>;
+                    phys = <&mipi_lvds_0_phy>;
+                    phy-names = "lvds_phy";
+
+                    port@0 {
+                        reg = <0>;
+
+                        mipi_lvds_0_ldb_ch1_mipi_lvds_0_pxl2dpi: endpoint {
+                            remote-endpoint = <&mipi_lvds_0_pxl2dpi_mipi_lvds_0_ldb_ch1>;
+                        };
+                    };
+
+                    port@1 {
+                        reg = <1>;
+
+                        /* ... */
+                    };
+                };
+            };
+        };
+
+        clock-controller@56223004 {
+            compatible = "fsl,imx8qxp-lpcg";
+            reg = <0x56223004 0x4>;
+            #clock-cells = <1>;
+            clocks = <&mipi_lvds_0_ipg_clk>;
+            clock-indices = <IMX_LPCG_CLK_4>;
+            clock-output-names = "mipi_lvds_0_di_mipi_lvds_regs_lpcg_ipg_clk";
+            power-domains = <&pd IMX_SC_R_MIPI_0>;
+        };
+
+        phy@56228300 {
+            compatible = "fsl,imx8qxp-mipi-dphy";
+            reg = <0x56228300 0x100>;
+            clocks = <&clk IMX_SC_R_LVDS_0 IMX_SC_PM_CLK_PHY>;
+            clock-names = "phy_ref";
+            #phy-cells = <0>;
+            fsl,syscon = <&mipi_lvds_0_csr>;
+            power-domains = <&pd IMX_SC_R_MIPI_0>;
+        };
+    };
-- 
GitLab


From 4a4a4e9ebaa3ce903a3cdf8bb173eeaf87828cea Mon Sep 17 00:00:00 2001
From: Quan Nguyen <quan@os.amperecomputing.com>
Date: Mon, 31 Oct 2022 09:44:41 +0700
Subject: [PATCH 1301/4122] misc: smpro-errmon: Add Ampere's SMpro error
 monitor driver

Add Ampere's SMpro error monitor driver for monitoring and reporting
RAS-related errors as reported by SMpro co-processor found on Ampere's
Altra processor family.

Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20221031024442.2490881-3-quan@os.amperecomputing.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../sysfs-bus-platform-devices-ampere-smpro   | 264 +++++++++
 drivers/misc/Kconfig                          |  12 +
 drivers/misc/Makefile                         |   1 +
 drivers/misc/smpro-errmon.c                   | 529 ++++++++++++++++++
 4 files changed, 806 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
 create mode 100644 drivers/misc/smpro-errmon.c

diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
new file mode 100644
index 0000000000000..2b84dc8c3149d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
@@ -0,0 +1,264 @@
+What:		/sys/bus/platform/devices/smpro-errmon.*/error_[core|mem|pcie|other]_[ce|ue]
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) Contains the 48-byte Ampere (Vendor-Specific) Error Record printed
+		in hex format according to the table below:
+
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| Offset |     Field     | Size (byte) |                     Description                            |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 00     | Error Type    | 1           | See :ref:`the table below <smpro-error-types>` for details |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 01     | Subtype       | 1           | See :ref:`the table below <smpro-error-types>` for details |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 02     | Instance      | 2           | See :ref:`the table below <smpro-error-types>` for details |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 04     | Error status  | 4           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 08     | Error Address | 8           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 16     | Error Misc 0  | 8           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 24     | Error Misc 1  | 8           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 32     | Error Misc 2  | 8           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+		| 40     | Error Misc 3  | 8           | See ARM RAS specification for details                      |
+		+--------+---------------+-------------+------------------------------------------------------------+
+
+		The table below defines the value of error types, their subtype, subcomponent and instance:
+
+		.. _smpro-error-types:
+
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		|   Error Group   | Error Type | Sub type | Sub component  |               Instance                 |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| CPM (core)      | 0          | 0        | Snoop-Logic    | CPM #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| CPM (core)      | 0          | 2        | Armv8 Core 1   | CPM #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 1        | ERR1           | MCU # \| SLOT << 11                    |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 2        | ERR2           | MCU # \| SLOT << 11                    |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 3        | ERR3           | MCU #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 4        | ERR4           | MCU #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 5        | ERR5           | MCU #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 6        | ERR6           | MCU #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| MCU (mem)       | 1          | 7        | Link Error     | MCU #                                  |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| Mesh (other)    | 2          | 0        | Cross Point    | X \| (Y << 5) \| NS <<11               |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| Mesh (other)    | 2          | 1        | Home Node(IO)  | X \| (Y << 5) \| NS <<11               |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| Mesh (other)    | 2          | 2        | Home Node(Mem) | X \| (Y << 5) \| NS <<11 \| device<<12 |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| Mesh (other)    | 2          | 4        | CCIX Node      | X \| (Y << 5) \| NS <<11               |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| 2P Link (other) | 3          | 0        | N/A            | Altra 2P Link #                        |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 0        | ERR0           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 1        | ERR1           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 2        | ERR2           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 3        | ERR3           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 4        | ERR4           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 5        | ERR5           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 6        | ERR6           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 7        | ERR7           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 8        | ERR8           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 9        | ERR9           | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 10       | ERR10          | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 11       | ERR11          | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 12       | ERR12          | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| GIC (other)     | 5          | 13-21    | ERR13          | RC # + 1                               |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TCU      | 100            | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU0     | 0              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU1     | 1              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU2     | 2              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU3     | 3              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU4     | 4              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU5     | 5              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU6     | 6              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU7     | 7              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU8     | 8              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMMU (other)    | 6          | TBU9     | 9              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PCIe AER (pcie) | 7          | Root     | 0              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PCIe AER (pcie) | 7          | Device   | 1              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PCIe RC (pcie)  | 8          | RCA HB   | 0              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PCIe RC (pcie)  | 8          | RCB HB   | 1              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PCIe RC (pcie)  | 8          | RASDP    | 8              | RC #                                   |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| OCM (other)     | 9          | ERR0     | 0              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| OCM (other)     | 9          | ERR1     | 1              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| OCM (other)     | 9          | ERR2     | 2              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMpro (other)   | 10         | ERR0     | 0              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMpro (other)   | 10         | ERR1     | 1              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| SMpro (other)   | 10         | MPA_ERR  | 2              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PMpro (other)   | 11         | ERR0     | 0              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PMpro (other)   | 11         | ERR1     | 1              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+		| PMpro (other)   | 11         | MPA_ERR  | 2              | 0                                      |
+		+-----------------+------------+----------+----------------+----------------------------------------+
+
+		Example::
+
+		 # cat error_other_ue
+		 880807001e004010401040101500000001004010401040100c0000000000000000000000000000000000000000000000
+
+		The detail of each sysfs entries is as below:
+
+		+-------------+---------------------------------------------------------+----------------------------------+
+		|   Error     |                   Sysfs entry                           |   Description (when triggered)   |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Core's CE   | /sys/bus/platform/devices/smpro-errmon.*/error_core_ce  | Core has CE error                |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Core's UE   | /sys/bus/platform/devices/smpro-errmon.*/error_core_ue  | Core has UE error                |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ce   | Memory has CE error              |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/error_mem_ue   | Memory has UE error              |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| PCIe's CE   | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ce  | any PCIe controller has CE error |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| PCIe's UE   | /sys/bus/platform/devices/smpro-errmon.*/error_pcie_ue  | any PCIe controller has UE error |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Other's CE  | /sys/bus/platform/devices/smpro-errmon.*/error_other_ce | any other CE error               |
+		+-------------+---------------------------------------------------------+----------------------------------+
+		| Other's UE  | /sys/bus/platform/devices/smpro-errmon.*/error_other_ue | any other UE error               |
+		+-------------+---------------------------------------------------------+----------------------------------+
+
+		UE: Uncorrect-able Error
+		CE: Correct-able Error
+
+		For details, see section `3.3 Ampere (Vendor-Specific) Error Record Formats,
+		Altra Family RAS Supplement`.
+
+
+What:		/sys/bus/platform/devices/smpro-errmon.*/overflow_[core|mem|pcie|other]_[ce|ue]
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) Return the overflow status of each type HW error reported:
+
+		  - 0      : No overflow
+		  - 1      : There is an overflow and the oldest HW errors are dropped
+
+		The detail of each sysfs entries is as below:
+
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		|   Overflow  |                   Sysfs entry                             |             Description               |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Core's CE   | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ce | Core CE error overflow                |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Core's UE   | /sys/bus/platform/devices/smpro-errmon.*/overflow_core_ue | Core UE error overflow                |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Memory's CE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ce  | Memory CE error overflow              |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Memory's UE | /sys/bus/platform/devices/smpro-errmon.*/overflow_mem_ue  | Memory UE error overflow              |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| PCIe's CE   | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ce | any PCIe controller CE error overflow |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| PCIe's UE   | /sys/bus/platform/devices/smpro-errmon.*/overflow_pcie_ue | any PCIe controller UE error overflow |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Other's CE  | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ce| any other CE error overflow           |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+		| Other's UE  | /sys/bus/platform/devices/smpro-errmon.*/overflow_other_ue| other UE error overflow               |
+		+-------------+-----------------------------------------------------------+---------------------------------------+
+
+		where:
+
+		  - UE: Uncorrect-able Error
+		  - CE: Correct-able Error
+
+What:		/sys/bus/platform/devices/smpro-errmon.*/[error|warn]_[smpro|pmpro]
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) Contains the internal firmware error/warning printed as hex format.
+
+		The detail of each sysfs entries is as below:
+
+		+---------------+------------------------------------------------------+--------------------------+
+		|   Error       |                   Sysfs entry                        |        Description       |
+		+---------------+------------------------------------------------------+--------------------------+
+		| SMpro error   | /sys/bus/platform/devices/smpro-errmon.*/error_smpro | system has SMpro error   |
+		+---------------+------------------------------------------------------+--------------------------+
+		| SMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_smpro  | system has SMpro warning |
+		+---------------+------------------------------------------------------+--------------------------+
+		| PMpro error   | /sys/bus/platform/devices/smpro-errmon.*/error_pmpro | system has PMpro error   |
+		+---------------+------------------------------------------------------+--------------------------+
+		| PMpro warning | /sys/bus/platform/devices/smpro-errmon.*/warn_pmpro  | system has PMpro warning |
+		+---------------+------------------------------------------------------+--------------------------+
+
+		For details, see section `5.10 RAS Internal Error Register Definitions,
+		Altra Family Soc BMC Interface Specification`.
+
+What:		/sys/bus/platform/devices/smpro-errmon.*/event_[vrd_warn_fault|vrd_hot|dimm_hot]
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) Contains the detail information in case of VRD/DIMM warning/hot events
+		in hex format as below::
+
+		    AAAA
+
+		where:
+
+		  - ``AAAA``: The event detail information data
+
+		The detail of each sysfs entries is as below:
+
+		+---------------+---------------------------------------------------------------+---------------------+
+		|   Event       |                        Sysfs entry                            |     Description     |
+		+---------------+---------------------------------------------------------------+---------------------+
+		| VRD HOT       | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_hot        | VRD Hot             |
+		+---------------+---------------------------------------------------------------+---------------------+
+		| VR Warn/Fault | /sys/bus/platform/devices/smpro-errmon.*/event_vrd_warn_fault | VR Warning or Fault |
+		+---------------+---------------------------------------------------------------+---------------------+
+		| DIMM HOT      | /sys/bus/platform/devices/smpro-errmon.*/event_dimm_hot       | DIMM Hot            |
+		+---------------+---------------------------------------------------------------+---------------------+
+
+		For more details, see section `5.7 GPI Status Registers,
+		Altra Family Soc BMC Interface Specification`.
+
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 358ad56f65245..b9ceee949dab8 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -176,6 +176,18 @@ config SGI_XP
 	  this feature will allow for direct communication between SSIs
 	  based on a network adapter and DMA messaging.
 
+config SMPRO_ERRMON
+	tristate "Ampere Computing SMPro error monitor driver"
+	depends on MFD_SMPRO || COMPILE_TEST
+	help
+	  Say Y here to get support for the SMpro error monitor function
+	  provided by Ampere Computing's Altra and Altra Max SoCs. Upon
+	  loading, the driver creates sysfs files which can be use to gather
+	  multiple HW error data reported via read and write system calls.
+
+	  To compile this driver as a module, say M here. The driver will be
+	  called smpro-errmon.
+
 config CS5535_MFGPT
 	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
 	depends on MFD_CS5535
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index ac9b3e757ba1d..bbe24d4511a33 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_ENCLOSURE_SERVICES) += enclosure.o
 obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
 obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
+obj-$(CONFIG_SMPRO_ERRMON)	+= smpro-errmon.o
 obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
 obj-$(CONFIG_GEHC_ACHC)		+= gehc-achc.o
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
diff --git a/drivers/misc/smpro-errmon.c b/drivers/misc/smpro-errmon.c
new file mode 100644
index 0000000000000..d1431d419aa42
--- /dev/null
+++ b/drivers/misc/smpro-errmon.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ampere Computing SoC's SMpro Error Monitoring Driver
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ *
+ */
+
+#include <linux/i2c.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* GPI RAS Error Registers */
+#define GPI_RAS_ERR		0x7E
+
+/* Core and L2C Error Registers */
+#define CORE_CE_ERR_CNT		0x80
+#define CORE_CE_ERR_LEN		0x81
+#define CORE_CE_ERR_DATA	0x82
+#define CORE_UE_ERR_CNT		0x83
+#define CORE_UE_ERR_LEN		0x84
+#define CORE_UE_ERR_DATA	0x85
+
+/* Memory Error Registers */
+#define MEM_CE_ERR_CNT		0x90
+#define MEM_CE_ERR_LEN		0x91
+#define MEM_CE_ERR_DATA		0x92
+#define MEM_UE_ERR_CNT		0x93
+#define MEM_UE_ERR_LEN		0x94
+#define MEM_UE_ERR_DATA		0x95
+
+/* RAS Error/Warning Registers */
+#define ERR_SMPRO_TYPE		0xA0
+#define ERR_PMPRO_TYPE		0xA1
+#define ERR_SMPRO_INFO_LO	0xA2
+#define ERR_SMPRO_INFO_HI	0xA3
+#define ERR_SMPRO_DATA_LO	0xA4
+#define ERR_SMPRO_DATA_HI	0xA5
+#define WARN_SMPRO_INFO_LO	0xAA
+#define WARN_SMPRO_INFO_HI	0xAB
+#define ERR_PMPRO_INFO_LO	0xA6
+#define ERR_PMPRO_INFO_HI	0xA7
+#define ERR_PMPRO_DATA_LO	0xA8
+#define ERR_PMPRO_DATA_HI	0xA9
+#define WARN_PMPRO_INFO_LO	0xAC
+#define WARN_PMPRO_INFO_HI	0xAD
+
+/* PCIE Error Registers */
+#define PCIE_CE_ERR_CNT		0xC0
+#define PCIE_CE_ERR_LEN		0xC1
+#define PCIE_CE_ERR_DATA	0xC2
+#define PCIE_UE_ERR_CNT		0xC3
+#define PCIE_UE_ERR_LEN		0xC4
+#define PCIE_UE_ERR_DATA	0xC5
+
+/* Other Error Registers */
+#define OTHER_CE_ERR_CNT	0xD0
+#define OTHER_CE_ERR_LEN	0xD1
+#define OTHER_CE_ERR_DATA	0xD2
+#define OTHER_UE_ERR_CNT	0xD8
+#define OTHER_UE_ERR_LEN	0xD9
+#define OTHER_UE_ERR_DATA	0xDA
+
+/* Event Data Registers */
+#define VRD_WARN_FAULT_EVENT_DATA	0x78
+#define VRD_HOT_EVENT_DATA		0x79
+#define DIMM_HOT_EVENT_DATA		0x7A
+
+#define MAX_READ_BLOCK_LENGTH	48
+
+#define RAS_SMPRO_ERR		0
+#define RAS_PMPRO_ERR		1
+
+enum RAS_48BYTES_ERR_TYPES {
+	CORE_CE_ERR,
+	CORE_UE_ERR,
+	MEM_CE_ERR,
+	MEM_UE_ERR,
+	PCIE_CE_ERR,
+	PCIE_UE_ERR,
+	OTHER_CE_ERR,
+	OTHER_UE_ERR,
+	NUM_48BYTES_ERR_TYPE,
+};
+
+struct smpro_error_hdr {
+	u8 count;	/* Number of the RAS errors */
+	u8 len;		/* Number of data bytes */
+	u8 data;	/* Start of 48-byte data */
+	u8 max_cnt;	/* Max num of errors */
+};
+
+/*
+ * Included Address of registers to get Count, Length of data and Data
+ * of the 48 bytes error data
+ */
+static struct smpro_error_hdr smpro_error_table[] = {
+	[CORE_CE_ERR] = {
+		.count = CORE_CE_ERR_CNT,
+		.len = CORE_CE_ERR_LEN,
+		.data = CORE_CE_ERR_DATA,
+		.max_cnt = 32
+	},
+	[CORE_UE_ERR] = {
+		.count = CORE_UE_ERR_CNT,
+		.len = CORE_UE_ERR_LEN,
+		.data = CORE_UE_ERR_DATA,
+		.max_cnt = 32
+	},
+	[MEM_CE_ERR] = {
+		.count = MEM_CE_ERR_CNT,
+		.len = MEM_CE_ERR_LEN,
+		.data = MEM_CE_ERR_DATA,
+		.max_cnt = 16
+	},
+	[MEM_UE_ERR] = {
+		.count = MEM_UE_ERR_CNT,
+		.len = MEM_UE_ERR_LEN,
+		.data = MEM_UE_ERR_DATA,
+		.max_cnt = 16
+	},
+	[PCIE_CE_ERR] = {
+		.count = PCIE_CE_ERR_CNT,
+		.len = PCIE_CE_ERR_LEN,
+		.data = PCIE_CE_ERR_DATA,
+		.max_cnt = 96
+	},
+	[PCIE_UE_ERR] = {
+		.count = PCIE_UE_ERR_CNT,
+		.len = PCIE_UE_ERR_LEN,
+		.data = PCIE_UE_ERR_DATA,
+		.max_cnt = 96
+	},
+	[OTHER_CE_ERR] = {
+		.count = OTHER_CE_ERR_CNT,
+		.len = OTHER_CE_ERR_LEN,
+		.data = OTHER_CE_ERR_DATA,
+		.max_cnt = 8
+	},
+	[OTHER_UE_ERR] = {
+		.count = OTHER_UE_ERR_CNT,
+		.len = OTHER_UE_ERR_LEN,
+		.data = OTHER_UE_ERR_DATA,
+		.max_cnt = 8
+	},
+};
+
+/*
+ * List of SCP registers which are used to get
+ * one type of RAS Internal errors.
+ */
+struct smpro_int_error_hdr {
+	u8 type;
+	u8 info_l;
+	u8 info_h;
+	u8 data_l;
+	u8 data_h;
+	u8 warn_l;
+	u8 warn_h;
+};
+
+static struct smpro_int_error_hdr list_smpro_int_error_hdr[] = {
+	[RAS_SMPRO_ERR] = {
+		.type = ERR_SMPRO_TYPE,
+		.info_l = ERR_SMPRO_INFO_LO,
+		.info_h = ERR_SMPRO_INFO_HI,
+		.data_l = ERR_SMPRO_DATA_LO,
+		.data_h = ERR_SMPRO_DATA_HI,
+		.warn_l = WARN_SMPRO_INFO_LO,
+		.warn_h = WARN_SMPRO_INFO_HI,
+	},
+	[RAS_PMPRO_ERR] = {
+		.type = ERR_PMPRO_TYPE,
+		.info_l = ERR_PMPRO_INFO_LO,
+		.info_h = ERR_PMPRO_INFO_HI,
+		.data_l = ERR_PMPRO_DATA_LO,
+		.data_h = ERR_PMPRO_DATA_HI,
+		.warn_l = WARN_PMPRO_INFO_LO,
+		.warn_h = WARN_PMPRO_INFO_HI,
+	},
+};
+
+struct smpro_errmon {
+	struct regmap *regmap;
+};
+
+enum EVENT_TYPES {
+	VRD_WARN_FAULT_EVENT,
+	VRD_HOT_EVENT,
+	DIMM_HOT_EVENT,
+	NUM_EVENTS_TYPE,
+};
+
+/* Included Address of event source and data registers */
+static u8 smpro_event_table[NUM_EVENTS_TYPE] = {
+	VRD_WARN_FAULT_EVENT_DATA,
+	VRD_HOT_EVENT_DATA,
+	DIMM_HOT_EVENT_DATA,
+};
+
+static ssize_t smpro_event_data_read(struct device *dev,
+				     struct device_attribute *da, char *buf,
+				     int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	s32 event_data;
+	int ret;
+
+	ret = regmap_read(errmon->regmap, smpro_event_table[channel], &event_data);
+	if (ret)
+		return ret;
+	/* Clear event after read */
+	if (event_data != 0)
+		regmap_write(errmon->regmap, smpro_event_table[channel], event_data);
+
+	return sysfs_emit(buf, "%04x\n", event_data);
+}
+
+static ssize_t smpro_overflow_data_read(struct device *dev, struct device_attribute *da,
+					char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_error_hdr *err_info;
+	s32 err_count;
+	int ret;
+
+	err_info = &smpro_error_table[channel];
+
+	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
+	if (ret)
+		return ret;
+
+	/* Bit 8 indicates the overflow status */
+	return sysfs_emit(buf, "%d\n", (err_count & BIT(8)) ? 1 : 0);
+}
+
+static ssize_t smpro_error_data_read(struct device *dev, struct device_attribute *da,
+				     char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	unsigned char err_data[MAX_READ_BLOCK_LENGTH];
+	struct smpro_error_hdr *err_info;
+	s32 err_count, err_length;
+	int ret;
+
+	err_info = &smpro_error_table[channel];
+
+	ret = regmap_read(errmon->regmap, err_info->count, &err_count);
+	/* Error count is the low byte */
+	err_count &= 0xff;
+	if (ret || !err_count || err_count > err_info->max_cnt)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->len, &err_length);
+	if (ret || err_length <= 0)
+		return ret;
+
+	if (err_length > MAX_READ_BLOCK_LENGTH)
+		err_length = MAX_READ_BLOCK_LENGTH;
+
+	memset(err_data, 0x00, MAX_READ_BLOCK_LENGTH);
+	ret = regmap_noinc_read(errmon->regmap, err_info->data, err_data, err_length);
+	if (ret < 0)
+		return ret;
+
+	/* clear the error */
+	ret = regmap_write(errmon->regmap, err_info->count, 0x100);
+	if (ret)
+		return ret;
+	/*
+	 * The output of Core/Memory/PCIe/Others UE/CE errors follows the format
+	 * specified in section 5.8.1 CE/UE Error Data record in
+	 * Altra SOC BMC Interface specification.
+	 */
+	return sysfs_emit(buf, "%*phN\n", MAX_READ_BLOCK_LENGTH, err_data);
+}
+
+/*
+ * Output format:
+ * <4-byte hex value of error info><4-byte hex value of error extensive data>
+ * Where:
+ *   + error info : The error information
+ *   + error data : Extensive data (32 bits)
+ * Reference to section 5.10 RAS Internal Error Register Definition in
+ * Altra SOC BMC Interface specification
+ */
+static ssize_t smpro_internal_err_read(struct device *dev, struct device_attribute *da,
+				       char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_int_error_hdr *err_info;
+	unsigned int err[4] = { 0 };
+	unsigned int err_type;
+	unsigned int val;
+	int ret;
+
+	/* read error status */
+	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
+	if (ret)
+		return ret;
+
+	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
+	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
+		return 0;
+
+	err_info = &list_smpro_int_error_hdr[channel];
+	ret = regmap_read(errmon->regmap, err_info->type, &val);
+	if (ret)
+		return ret;
+
+	err_type = (val & BIT(1)) ? BIT(1) :
+		   (val & BIT(2)) ? BIT(2) : 0;
+
+	if (!err_type)
+		return 0;
+
+	ret = regmap_read(errmon->regmap, err_info->info_l, err + 1);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->info_h, err);
+	if (ret)
+		return ret;
+
+	if (err_type & BIT(2)) {
+		/* Error with data type */
+		ret = regmap_read(errmon->regmap, err_info->data_l, err + 3);
+		if (ret)
+			return ret;
+
+		ret = regmap_read(errmon->regmap, err_info->data_h, err + 2);
+		if (ret)
+			return ret;
+	}
+
+	/* clear the read errors */
+	ret = regmap_write(errmon->regmap, err_info->type, err_type);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(err), err);
+}
+
+/*
+ * Output format:
+ * <4-byte hex value of warining info>
+ * Reference to section 5.10 RAS Internal Error Register Definition in
+ * Altra SOC BMC Interface specification
+ */
+static ssize_t smpro_internal_warn_read(struct device *dev, struct device_attribute *da,
+					char *buf, int channel)
+{
+	struct smpro_errmon *errmon = dev_get_drvdata(dev);
+	struct smpro_int_error_hdr *err_info;
+	unsigned int warn[2] = { 0 };
+	unsigned int val;
+	int ret;
+
+	/* read error status */
+	ret = regmap_read(errmon->regmap, GPI_RAS_ERR, &val);
+	if (ret)
+		return ret;
+
+	if ((channel == RAS_SMPRO_ERR && !(val & BIT(0))) ||
+	    (channel == RAS_PMPRO_ERR && !(val & BIT(1))))
+		return 0;
+
+	err_info = &list_smpro_int_error_hdr[channel];
+	ret = regmap_read(errmon->regmap, err_info->type, &val);
+	if (ret)
+		return ret;
+
+	if (!(val & BIT(0)))
+		return 0;
+
+	ret = regmap_read(errmon->regmap, err_info->warn_l, warn + 1);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(errmon->regmap, err_info->warn_h, warn);
+	if (ret)
+		return ret;
+
+	/* clear the warning */
+	ret = regmap_write(errmon->regmap, err_info->type, BIT(0));
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(warn), warn);
+}
+
+#define ERROR_OVERFLOW_RO(_error, _index) \
+	static ssize_t overflow_##_error##_show(struct device *dev,            \
+						struct device_attribute *da,   \
+						char *buf)                     \
+	{                                                                      \
+		return smpro_overflow_data_read(dev, da, buf, _index);         \
+	}                                                                      \
+	static DEVICE_ATTR_RO(overflow_##_error)
+
+ERROR_OVERFLOW_RO(core_ce, CORE_CE_ERR);
+ERROR_OVERFLOW_RO(core_ue, CORE_UE_ERR);
+ERROR_OVERFLOW_RO(mem_ce, MEM_CE_ERR);
+ERROR_OVERFLOW_RO(mem_ue, MEM_UE_ERR);
+ERROR_OVERFLOW_RO(pcie_ce, PCIE_CE_ERR);
+ERROR_OVERFLOW_RO(pcie_ue, PCIE_UE_ERR);
+ERROR_OVERFLOW_RO(other_ce, OTHER_CE_ERR);
+ERROR_OVERFLOW_RO(other_ue, OTHER_UE_ERR);
+
+#define ERROR_RO(_error, _index) \
+	static ssize_t error_##_error##_show(struct device *dev,            \
+					     struct device_attribute *da,   \
+					     char *buf)                     \
+	{                                                                   \
+		return smpro_error_data_read(dev, da, buf, _index);         \
+	}                                                                   \
+	static DEVICE_ATTR_RO(error_##_error)
+
+ERROR_RO(core_ce, CORE_CE_ERR);
+ERROR_RO(core_ue, CORE_UE_ERR);
+ERROR_RO(mem_ce, MEM_CE_ERR);
+ERROR_RO(mem_ue, MEM_UE_ERR);
+ERROR_RO(pcie_ce, PCIE_CE_ERR);
+ERROR_RO(pcie_ue, PCIE_UE_ERR);
+ERROR_RO(other_ce, OTHER_CE_ERR);
+ERROR_RO(other_ue, OTHER_UE_ERR);
+
+static ssize_t error_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_err_read(dev, da, buf, RAS_SMPRO_ERR);
+}
+static DEVICE_ATTR_RO(error_smpro);
+
+static ssize_t error_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_err_read(dev, da, buf, RAS_PMPRO_ERR);
+}
+static DEVICE_ATTR_RO(error_pmpro);
+
+static ssize_t warn_smpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_warn_read(dev, da, buf, RAS_SMPRO_ERR);
+}
+static DEVICE_ATTR_RO(warn_smpro);
+
+static ssize_t warn_pmpro_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	return smpro_internal_warn_read(dev, da, buf, RAS_PMPRO_ERR);
+}
+static DEVICE_ATTR_RO(warn_pmpro);
+
+#define EVENT_RO(_event, _index) \
+	static ssize_t event_##_event##_show(struct device *dev,            \
+					     struct device_attribute *da,   \
+					     char *buf)                     \
+	{                                                                   \
+		return smpro_event_data_read(dev, da, buf, _index);         \
+	}                                                                   \
+	static DEVICE_ATTR_RO(event_##_event)
+
+EVENT_RO(vrd_warn_fault, VRD_WARN_FAULT_EVENT);
+EVENT_RO(vrd_hot, VRD_HOT_EVENT);
+EVENT_RO(dimm_hot, DIMM_HOT_EVENT);
+
+static struct attribute *smpro_errmon_attrs[] = {
+	&dev_attr_overflow_core_ce.attr,
+	&dev_attr_overflow_core_ue.attr,
+	&dev_attr_overflow_mem_ce.attr,
+	&dev_attr_overflow_mem_ue.attr,
+	&dev_attr_overflow_pcie_ce.attr,
+	&dev_attr_overflow_pcie_ue.attr,
+	&dev_attr_overflow_other_ce.attr,
+	&dev_attr_overflow_other_ue.attr,
+	&dev_attr_error_core_ce.attr,
+	&dev_attr_error_core_ue.attr,
+	&dev_attr_error_mem_ce.attr,
+	&dev_attr_error_mem_ue.attr,
+	&dev_attr_error_pcie_ce.attr,
+	&dev_attr_error_pcie_ue.attr,
+	&dev_attr_error_other_ce.attr,
+	&dev_attr_error_other_ue.attr,
+	&dev_attr_error_smpro.attr,
+	&dev_attr_error_pmpro.attr,
+	&dev_attr_warn_smpro.attr,
+	&dev_attr_warn_pmpro.attr,
+	&dev_attr_event_vrd_warn_fault.attr,
+	&dev_attr_event_vrd_hot.attr,
+	&dev_attr_event_dimm_hot.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(smpro_errmon);
+
+static int smpro_errmon_probe(struct platform_device *pdev)
+{
+	struct smpro_errmon *errmon;
+
+	errmon = devm_kzalloc(&pdev->dev, sizeof(struct smpro_errmon), GFP_KERNEL);
+	if (!errmon)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, errmon);
+
+	errmon->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!errmon->regmap)
+		return -ENODEV;
+
+	return 0;
+}
+
+static struct platform_driver smpro_errmon_driver = {
+	.probe          = smpro_errmon_probe,
+	.driver = {
+		.name   = "smpro-errmon",
+		.dev_groups = smpro_errmon_groups,
+	},
+};
+
+module_platform_driver(smpro_errmon_driver);
+
+MODULE_AUTHOR("Tung Nguyen <tung.nguyen@amperecomputing.com>");
+MODULE_AUTHOR("Thinh Pham <thinh.pham@amperecomputing.com>");
+MODULE_AUTHOR("Hoang Nguyen <hnguyen@amperecomputing.com>");
+MODULE_AUTHOR("Thu Nguyen <thu@os.amperecomputing.com>");
+MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
+MODULE_DESCRIPTION("Ampere Altra SMpro driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 763dc90e9a4332f82ad43c866c6878742b15d4ab Mon Sep 17 00:00:00 2001
From: Quan Nguyen <quan@os.amperecomputing.com>
Date: Mon, 31 Oct 2022 09:44:42 +0700
Subject: [PATCH 1302/4122] misc: smpro-misc: Add Ampere's Altra SMpro misc
 driver

Add driver support for accessing various information reported by
Ampere's SMpro co-processor such as Boot Progress and other
miscellaneous data.

Signed-off-by: Quan Nguyen <quan@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20221031024442.2490881-4-quan@os.amperecomputing.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../sysfs-bus-platform-devices-ampere-smpro   |  48 ++++++
 drivers/misc/Kconfig                          |  10 ++
 drivers/misc/Makefile                         |   1 +
 drivers/misc/smpro-misc.c                     | 145 ++++++++++++++++++
 4 files changed, 204 insertions(+)
 create mode 100644 drivers/misc/smpro-misc.c

diff --git a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
index 2b84dc8c3149d..ca93c215ef995 100644
--- a/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
+++ b/Documentation/ABI/testing/sysfs-bus-platform-devices-ampere-smpro
@@ -262,3 +262,51 @@ Description:
 		For more details, see section `5.7 GPI Status Registers,
 		Altra Family Soc BMC Interface Specification`.
 
+What:		/sys/bus/platform/devices/smpro-misc.*/boot_progress
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RO) Contains the boot stages information in hex as format below::
+
+		    AABBCCCCCCCC
+
+		where:
+
+		  - ``AA``      : The boot stages
+
+		    - 00: SMpro firmware booting
+		    - 01: PMpro firmware booting
+		    - 02: ATF BL1 firmware booting
+		    - 03: DDR initialization
+		    - 04: DDR training report status
+		    - 05: ATF BL2 firmware booting
+		    - 06: ATF BL31 firmware booting
+		    - 07: ATF BL32 firmware booting
+		    - 08: UEFI firmware booting
+		    - 09: OS booting
+
+		  - ``BB``      : Boot status
+
+		    - 00: Not started
+		    - 01: Started
+		    - 02: Completed without error
+		    - 03: Failed.
+
+		  - ``CCCCCCCC``: Boot status information defined for each boot stages
+
+		For details, see section `5.11 Boot Stage Register Definitions`
+		and section `6. Processor Boot Progress Codes, Altra Family Soc BMC
+		Interface Specification`.
+
+
+What:		/sys/bus/platform/devices/smpro-misc*/soc_power_limit
+KernelVersion:	6.1
+Contact:	Quan Nguyen <quan@os.amperecomputing.com>
+Description:
+		(RW) Contains the desired SoC power limit in Watt.
+		Writes to this sysfs set the desired SoC power limit (W).
+		Reads from this register return the current SoC power limit (W).
+		The value ranges:
+
+		  - Minimum: 120 W
+		  - Maximum: Socket TDP power
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index b9ceee949dab8..9947b7892bd5a 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -188,6 +188,16 @@ config SMPRO_ERRMON
 	  To compile this driver as a module, say M here. The driver will be
 	  called smpro-errmon.
 
+config SMPRO_MISC
+	tristate "Ampere Computing SMPro miscellaneous driver"
+	depends on MFD_SMPRO || COMPILE_TEST
+	help
+	  Say Y here to get support for the SMpro error miscellalenous function
+	  provided by Ampere Computing's Altra and Altra Max SoCs.
+
+	  To compile this driver as a module, say M here. The driver will be
+	  called smpro-misc.
+
 config CS5535_MFGPT
 	tristate "CS5535/CS5536 Geode Multi-Function General Purpose Timer (MFGPT) support"
 	depends on MFD_CS5535
diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index bbe24d4511a33..87b54a4a44224 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -24,6 +24,7 @@ obj-$(CONFIG_KGDB_TESTS)	+= kgdbts.o
 obj-$(CONFIG_SGI_XP)		+= sgi-xp/
 obj-$(CONFIG_SGI_GRU)		+= sgi-gru/
 obj-$(CONFIG_SMPRO_ERRMON)	+= smpro-errmon.o
+obj-$(CONFIG_SMPRO_MISC)	+= smpro-misc.o
 obj-$(CONFIG_CS5535_MFGPT)	+= cs5535-mfgpt.o
 obj-$(CONFIG_GEHC_ACHC)		+= gehc-achc.o
 obj-$(CONFIG_HP_ILO)		+= hpilo.o
diff --git a/drivers/misc/smpro-misc.c b/drivers/misc/smpro-misc.c
new file mode 100644
index 0000000000000..6c427141e51b4
--- /dev/null
+++ b/drivers/misc/smpro-misc.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Ampere Computing SoC's SMpro Misc Driver
+ *
+ * Copyright (c) 2022, Ampere Computing LLC
+ */
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+/* Boot Stage/Progress Registers */
+#define BOOTSTAGE	0xB0
+#define BOOTSTAGE_LO	0xB1
+#define CUR_BOOTSTAGE	0xB2
+#define BOOTSTAGE_HI	0xB3
+
+/* SOC State Registers */
+#define SOC_POWER_LIMIT		0xE5
+
+struct smpro_misc {
+	struct regmap *regmap;
+};
+
+static ssize_t boot_progress_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	u16 boot_progress[3] = { 0 };
+	u32 bootstage;
+	u8 boot_stage;
+	u8 cur_stage;
+	u32 reg_lo;
+	u32 reg;
+	int ret;
+
+	/* Read current boot stage */
+	ret = regmap_read(misc->regmap, CUR_BOOTSTAGE, &reg);
+	if (ret)
+		return ret;
+
+	cur_stage = reg & 0xff;
+
+	ret = regmap_read(misc->regmap, BOOTSTAGE, &bootstage);
+	if (ret)
+		return ret;
+
+	boot_stage = (bootstage >> 8) & 0xff;
+
+	if (boot_stage > cur_stage)
+		return -EINVAL;
+
+	ret = regmap_read(misc->regmap,	BOOTSTAGE_LO, &reg_lo);
+	if (!ret)
+		ret = regmap_read(misc->regmap, BOOTSTAGE_HI, &reg);
+	if (ret)
+		return ret;
+
+	/* Firmware to report new boot stage next time */
+	if (boot_stage < cur_stage) {
+		ret = regmap_write(misc->regmap, BOOTSTAGE, ((bootstage & 0xff00) | 0x1));
+		if (ret)
+			return ret;
+	}
+
+	boot_progress[0] = bootstage;
+	boot_progress[1] = swab16(reg);
+	boot_progress[2] = swab16(reg_lo);
+
+	return sysfs_emit(buf, "%*phN\n", (int)sizeof(boot_progress), boot_progress);
+}
+
+static DEVICE_ATTR_RO(boot_progress);
+
+static ssize_t soc_power_limit_show(struct device *dev, struct device_attribute *da, char *buf)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	unsigned int value;
+	int ret;
+
+	ret = regmap_read(misc->regmap, SOC_POWER_LIMIT, &value);
+	if (ret)
+		return ret;
+
+	return sysfs_emit(buf, "%d\n", value);
+}
+
+static ssize_t soc_power_limit_store(struct device *dev, struct device_attribute *da,
+				     const char *buf, size_t count)
+{
+	struct smpro_misc *misc = dev_get_drvdata(dev);
+	unsigned long val;
+	s32 ret;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	ret = regmap_write(misc->regmap, SOC_POWER_LIMIT, (unsigned int)val);
+	if (ret)
+		return -EPROTO;
+
+	return count;
+}
+
+static DEVICE_ATTR_RW(soc_power_limit);
+
+static struct attribute *smpro_misc_attrs[] = {
+	&dev_attr_boot_progress.attr,
+	&dev_attr_soc_power_limit.attr,
+	NULL
+};
+
+ATTRIBUTE_GROUPS(smpro_misc);
+
+static int smpro_misc_probe(struct platform_device *pdev)
+{
+	struct smpro_misc *misc;
+
+	misc = devm_kzalloc(&pdev->dev, sizeof(struct smpro_misc), GFP_KERNEL);
+	if (!misc)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, misc);
+
+	misc->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!misc->regmap)
+		return -ENODEV;
+
+	return 0;
+}
+
+static struct platform_driver smpro_misc_driver = {
+	.probe		= smpro_misc_probe,
+	.driver = {
+		.name	= "smpro-misc",
+		.dev_groups = smpro_misc_groups,
+	},
+};
+
+module_platform_driver(smpro_misc_driver);
+
+MODULE_AUTHOR("Tung Nguyen <tungnguyen@os.amperecomputing.com>");
+MODULE_AUTHOR("Quan Nguyen <quan@os.amperecomputing.com>");
+MODULE_DESCRIPTION("Ampere Altra SMpro Misc driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 0d4a030b3db1b53498f1c1ae243677fa0dd2e7ce Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:13:59 +0100
Subject: [PATCH 1303/4122] greybus: svc: Use kstrtobool() instead of
 strtobool()

strtobool() is the same as kstrtobool().
However, the latter is more used within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Johan Hovold <johan@kernel.org>
Reviewed-by: Alex Elder <elder@linaro.org>
Link: https://lore.kernel.org/r/ebf1e6988a53a455990230a37cf759ee542ea7ec.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/greybus/svc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/greybus/svc.c b/drivers/greybus/svc.c
index 56d2b44d6fefc..16cced80867a6 100644
--- a/drivers/greybus/svc.c
+++ b/drivers/greybus/svc.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/debugfs.h>
+#include <linux/kstrtox.h>
 #include <linux/workqueue.h>
 #include <linux/greybus.h>
 
@@ -83,7 +84,7 @@ static ssize_t watchdog_store(struct device *dev,
 	int retval;
 	bool user_request;
 
-	retval = strtobool(buf, &user_request);
+	retval = kstrtobool(buf, &user_request);
 	if (retval)
 		return retval;
 
-- 
GitLab


From d3d76fbde1c456a6d19991baa99ea8c2d6e6696f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Thu, 3 Nov 2022 16:24:07 +0100
Subject: [PATCH 1304/4122] kernel/ksysfs.c: export kernel cpu byteorder
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Certain files in procfs are formatted in byteorder-dependent formats.
For example the IP addresses in /proc/net/udp.

When using emulation like qemu-user, applications are not guaranteed to
be using the same byteorder as the kernel.
Therefore the kernel needs to provide a way for applications to discover
the byteorder used in API-filesystems.
Using systemcalls is not enough because these are intercepted and
translated by the emulation.

Also this makes it easier for non-compiled applications like
shellscripts to discover the byteorder.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Link: https://lore.kernel.org/r/20221103152407.3348-1-linux@weissschuh.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../ABI/testing/sysfs-kernel-cpu_byteorder     | 12 ++++++++++++
 kernel/ksysfs.c                                | 18 ++++++++++++++++++
 2 files changed, 30 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-cpu_byteorder

diff --git a/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder
new file mode 100644
index 0000000000000..f0e6ac1b53566
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-cpu_byteorder
@@ -0,0 +1,12 @@
+What:		/sys/kernel/cpu_byteorder
+Date:		February 2023
+KernelVersion:	6.2
+Contact:	Thomas Weißschuh <linux@weissschuh.net>
+Description:
+		The endianness of the running kernel.
+
+		Access: Read
+
+		Valid values:
+			"little", "big"
+Users:		util-linux
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 65dba9076f312..2df00b789b907 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -6,6 +6,7 @@
  * Copyright (C) 2004 Kay Sievers <kay.sievers@vrfy.org>
  */
 
+#include <asm/byteorder.h>
 #include <linux/kobject.h>
 #include <linux/string.h>
 #include <linux/sysfs.h>
@@ -20,6 +21,14 @@
 
 #include <linux/rcupdate.h>	/* rcu_expedited and rcu_normal */
 
+#if defined(__LITTLE_ENDIAN)
+#define CPU_BYTEORDER_STRING	"little"
+#elif defined(__BIG_ENDIAN)
+#define CPU_BYTEORDER_STRING	"big"
+#else
+#error Unknown byteorder
+#endif
+
 #define KERNEL_ATTR_RO(_name) \
 static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
 
@@ -34,6 +43,14 @@ static ssize_t uevent_seqnum_show(struct kobject *kobj,
 }
 KERNEL_ATTR_RO(uevent_seqnum);
 
+/* cpu byteorder */
+static ssize_t cpu_byteorder_show(struct kobject *kobj,
+				  struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%s\n", CPU_BYTEORDER_STRING);
+}
+KERNEL_ATTR_RO(cpu_byteorder);
+
 #ifdef CONFIG_UEVENT_HELPER
 /* uevent helper program, used during early boot */
 static ssize_t uevent_helper_show(struct kobject *kobj,
@@ -215,6 +232,7 @@ EXPORT_SYMBOL_GPL(kernel_kobj);
 static struct attribute * kernel_attrs[] = {
 	&fscaps_attr.attr,
 	&uevent_seqnum_attr.attr,
+	&cpu_byteorder_attr.attr,
 #ifdef CONFIG_UEVENT_HELPER
 	&uevent_helper_attr.attr,
 #endif
-- 
GitLab


From 9b351be25360c5cedfb98b88d6dfd89327849e52 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 22 Oct 2022 16:56:36 -0600
Subject: [PATCH 1305/4122] vmlinux.lds.h: add BOUNDED_SECTION* macros

vmlinux.lds.h has ~45 occurrences of this general pattern:

  __start_foo = .;
  KEEP(*(foo))
  __stop_foo = .;

Reduce this pattern to a (group of 4) macros, and use them to reduce
linecount.  This was inspired by the codetag patchset.

no functional change.

CC: Suren Baghdasaryan <surenb@google.com>
CC: Kent Overstreet <kent.overstreet@linux.dev>
Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20221022225637.1406715-2-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/vmlinux.lds.h | 219 +++++++++++-------------------
 1 file changed, 79 insertions(+), 140 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c15de165ec8ff..9f6352171f886 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -193,100 +193,99 @@
 # endif
 #endif
 
+#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_)		\
+	_s_##_label_ = .;						\
+	KEEP(*(_sec_))							\
+	_e_##_label_ = .;
+
+#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_)		\
+	_label_##_s_ = .;						\
+	KEEP(*(_sec_))							\
+	_label_##_e_ = .;
+
+#define BOUNDED_SECTION_BY(_sec_, _label_)				\
+	BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop)
+
+#define BOUNDED_SECTION(_sec)	 BOUNDED_SECTION_BY(_sec, _sec)
+
 #ifdef CONFIG_TRACE_BRANCH_PROFILING
-#define LIKELY_PROFILE()	__start_annotated_branch_profile = .;	\
-				KEEP(*(_ftrace_annotated_branch))	\
-				__stop_annotated_branch_profile = .;
+#define LIKELY_PROFILE()						\
+	BOUNDED_SECTION_BY(_ftrace_annotated_branch, _annotated_branch_profile)
 #else
 #define LIKELY_PROFILE()
 #endif
 
 #ifdef CONFIG_PROFILE_ALL_BRANCHES
-#define BRANCH_PROFILE()	__start_branch_profile = .;		\
-				KEEP(*(_ftrace_branch))			\
-				__stop_branch_profile = .;
+#define BRANCH_PROFILE()					\
+	BOUNDED_SECTION_BY(_ftrace_branch, _branch_profile)
 #else
 #define BRANCH_PROFILE()
 #endif
 
 #ifdef CONFIG_KPROBES
-#define KPROBE_BLACKLIST()	. = ALIGN(8);				      \
-				__start_kprobe_blacklist = .;		      \
-				KEEP(*(_kprobe_blacklist))		      \
-				__stop_kprobe_blacklist = .;
+#define KPROBE_BLACKLIST()				\
+	. = ALIGN(8);					\
+	BOUNDED_SECTION(_kprobe_blacklist)
 #else
 #define KPROBE_BLACKLIST()
 #endif
 
 #ifdef CONFIG_FUNCTION_ERROR_INJECTION
-#define ERROR_INJECT_WHITELIST()	STRUCT_ALIGN();			      \
-			__start_error_injection_whitelist = .;		      \
-			KEEP(*(_error_injection_whitelist))		      \
-			__stop_error_injection_whitelist = .;
+#define ERROR_INJECT_WHITELIST()			\
+	STRUCT_ALIGN();					\
+	BOUNDED_SECTION(_error_injection_whitelist)
 #else
 #define ERROR_INJECT_WHITELIST()
 #endif
 
 #ifdef CONFIG_EVENT_TRACING
-#define FTRACE_EVENTS()	. = ALIGN(8);					\
-			__start_ftrace_events = .;			\
-			KEEP(*(_ftrace_events))				\
-			__stop_ftrace_events = .;			\
-			__start_ftrace_eval_maps = .;			\
-			KEEP(*(_ftrace_eval_map))			\
-			__stop_ftrace_eval_maps = .;
+#define FTRACE_EVENTS()							\
+	. = ALIGN(8);							\
+	BOUNDED_SECTION(_ftrace_events)					\
+	BOUNDED_SECTION_BY(_ftrace_eval_map, _ftrace_eval_maps)
 #else
 #define FTRACE_EVENTS()
 #endif
 
 #ifdef CONFIG_TRACING
-#define TRACE_PRINTKS()	 __start___trace_bprintk_fmt = .;      \
-			 KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \
-			 __stop___trace_bprintk_fmt = .;
-#define TRACEPOINT_STR() __start___tracepoint_str = .;	\
-			 KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \
-			 __stop___tracepoint_str = .;
+#define TRACE_PRINTKS()		BOUNDED_SECTION_BY(__trace_printk_fmt, ___trace_bprintk_fmt)
+#define TRACEPOINT_STR()	BOUNDED_SECTION_BY(__tracepoint_str, ___tracepoint_str)
 #else
 #define TRACE_PRINTKS()
 #define TRACEPOINT_STR()
 #endif
 
 #ifdef CONFIG_FTRACE_SYSCALLS
-#define TRACE_SYSCALLS() . = ALIGN(8);					\
-			 __start_syscalls_metadata = .;			\
-			 KEEP(*(__syscalls_metadata))			\
-			 __stop_syscalls_metadata = .;
+#define TRACE_SYSCALLS()			\
+	. = ALIGN(8);				\
+	BOUNDED_SECTION_BY(__syscalls_metadata, _syscalls_metadata)
 #else
 #define TRACE_SYSCALLS()
 #endif
 
 #ifdef CONFIG_BPF_EVENTS
-#define BPF_RAW_TP() STRUCT_ALIGN();					\
-			 __start__bpf_raw_tp = .;			\
-			 KEEP(*(__bpf_raw_tp_map))			\
-			 __stop__bpf_raw_tp = .;
+#define BPF_RAW_TP() STRUCT_ALIGN();				\
+	BOUNDED_SECTION_BY(__bpf_raw_tp_map, __bpf_raw_tp)
 #else
 #define BPF_RAW_TP()
 #endif
 
 #ifdef CONFIG_SERIAL_EARLYCON
-#define EARLYCON_TABLE() . = ALIGN(8);				\
-			 __earlycon_table = .;			\
-			 KEEP(*(__earlycon_table))		\
-			 __earlycon_table_end = .;
+#define EARLYCON_TABLE()						\
+	. = ALIGN(8);							\
+	BOUNDED_SECTION_POST_LABEL(__earlycon_table, __earlycon_table, , _end)
 #else
 #define EARLYCON_TABLE()
 #endif
 
 #ifdef CONFIG_SECURITY
-#define LSM_TABLE()	. = ALIGN(8);					\
-			__start_lsm_info = .;				\
-			KEEP(*(.lsm_info.init))				\
-			__end_lsm_info = .;
-#define EARLY_LSM_TABLE()	. = ALIGN(8);				\
-			__start_early_lsm_info = .;			\
-			KEEP(*(.early_lsm_info.init))			\
-			__end_early_lsm_info = .;
+#define LSM_TABLE()					\
+	. = ALIGN(8);					\
+	BOUNDED_SECTION_PRE_LABEL(.lsm_info.init, _lsm_info, __start, __end)
+
+#define EARLY_LSM_TABLE()						\
+	. = ALIGN(8);							\
+	BOUNDED_SECTION_PRE_LABEL(.early_lsm_info.init, _early_lsm_info, __start, __end)
 #else
 #define LSM_TABLE()
 #define EARLY_LSM_TABLE()
@@ -312,9 +311,8 @@
 #ifdef CONFIG_ACPI
 #define ACPI_PROBE_TABLE(name)						\
 	. = ALIGN(8);							\
-	__##name##_acpi_probe_table = .;				\
-	KEEP(*(__##name##_acpi_probe_table))				\
-	__##name##_acpi_probe_table_end = .;
+	BOUNDED_SECTION_POST_LABEL(__##name##_acpi_probe_table,		\
+				   __##name##_acpi_probe_table,, _end)
 #else
 #define ACPI_PROBE_TABLE(name)
 #endif
@@ -322,9 +320,8 @@
 #ifdef CONFIG_THERMAL
 #define THERMAL_TABLE(name)						\
 	. = ALIGN(8);							\
-	__##name##_thermal_table = .;					\
-	KEEP(*(__##name##_thermal_table))				\
-	__##name##_thermal_table_end = .;
+	BOUNDED_SECTION_POST_LABEL(__##name##_thermal_table,		\
+				   __##name##_thermal_table,, _end)
 #else
 #define THERMAL_TABLE(name)
 #endif
@@ -353,12 +350,8 @@
 	*(__tracepoints)						\
 	/* implement dynamic printk debug */				\
 	. = ALIGN(8);							\
-	__start___dyndbg_classes = .;					\
-	KEEP(*(__dyndbg_classes))					\
-	__stop___dyndbg_classes = .;					\
-	__start___dyndbg = .;						\
-	KEEP(*(__dyndbg))						\
-	__stop___dyndbg = .;						\
+	BOUNDED_SECTION_BY(__dyndbg_classes, ___dyndbg_classes)		\
+	BOUNDED_SECTION_BY(__dyndbg, ___dyndbg)				\
 	LIKELY_PROFILE()		       				\
 	BRANCH_PROFILE()						\
 	TRACE_PRINTKS()							\
@@ -401,19 +394,13 @@
 
 #define JUMP_TABLE_DATA							\
 	. = ALIGN(8);							\
-	__start___jump_table = .;					\
-	KEEP(*(__jump_table))						\
-	__stop___jump_table = .;
+	BOUNDED_SECTION_BY(__jump_table, ___jump_table)
 
 #ifdef CONFIG_HAVE_STATIC_CALL_INLINE
 #define STATIC_CALL_DATA						\
 	. = ALIGN(8);							\
-	__start_static_call_sites = .;					\
-	KEEP(*(.static_call_sites))					\
-	__stop_static_call_sites = .;					\
-	__start_static_call_tramp_key = .;				\
-	KEEP(*(.static_call_tramp_key))					\
-	__stop_static_call_tramp_key = .;
+	BOUNDED_SECTION_BY(.static_call_sites, _static_call_sites)	\
+	BOUNDED_SECTION_BY(.static_call_tramp_key, _static_call_tramp_key)
 #else
 #define STATIC_CALL_DATA
 #endif
@@ -439,9 +426,7 @@
 #ifdef CONFIG_ARCH_USES_CFI_TRAPS
 #define KCFI_TRAPS							\
 	__kcfi_traps : AT(ADDR(__kcfi_traps) - LOAD_OFFSET) {		\
-		__start___kcfi_traps = .;				\
-		KEEP(*(.kcfi_traps))					\
-		__stop___kcfi_traps = .;				\
+		BOUNDED_SECTION_BY(.kcfi_traps, ___kcfi_traps)		\
 	}
 #else
 #define KCFI_TRAPS
@@ -459,9 +444,7 @@
 		SCHED_DATA						\
 		RO_AFTER_INIT_DATA	/* Read only after init */	\
 		. = ALIGN(8);						\
-		__start___tracepoints_ptrs = .;				\
-		KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \
-		__stop___tracepoints_ptrs = .;				\
+		BOUNDED_SECTION_BY(__tracepoints_ptrs, ___tracepoints_ptrs) \
 		*(__tracepoints_strings)/* Tracepoints: strings */	\
 	}								\
 									\
@@ -471,30 +454,14 @@
 									\
 	/* PCI quirks */						\
 	.pci_fixup        : AT(ADDR(.pci_fixup) - LOAD_OFFSET) {	\
-		__start_pci_fixups_early = .;				\
-		KEEP(*(.pci_fixup_early))				\
-		__end_pci_fixups_early = .;				\
-		__start_pci_fixups_header = .;				\
-		KEEP(*(.pci_fixup_header))				\
-		__end_pci_fixups_header = .;				\
-		__start_pci_fixups_final = .;				\
-		KEEP(*(.pci_fixup_final))				\
-		__end_pci_fixups_final = .;				\
-		__start_pci_fixups_enable = .;				\
-		KEEP(*(.pci_fixup_enable))				\
-		__end_pci_fixups_enable = .;				\
-		__start_pci_fixups_resume = .;				\
-		KEEP(*(.pci_fixup_resume))				\
-		__end_pci_fixups_resume = .;				\
-		__start_pci_fixups_resume_early = .;			\
-		KEEP(*(.pci_fixup_resume_early))			\
-		__end_pci_fixups_resume_early = .;			\
-		__start_pci_fixups_suspend = .;				\
-		KEEP(*(.pci_fixup_suspend))				\
-		__end_pci_fixups_suspend = .;				\
-		__start_pci_fixups_suspend_late = .;			\
-		KEEP(*(.pci_fixup_suspend_late))			\
-		__end_pci_fixups_suspend_late = .;			\
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_early,  _pci_fixups_early,  __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_header, _pci_fixups_header, __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_final,  _pci_fixups_final,  __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_enable, _pci_fixups_enable, __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_resume, _pci_fixups_resume, __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_suspend, _pci_fixups_suspend, __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_resume_early, _pci_fixups_resume_early, __start, __end) \
+		BOUNDED_SECTION_PRE_LABEL(.pci_fixup_suspend_late, _pci_fixups_suspend_late, __start, __end) \
 	}								\
 									\
 	FW_LOADER_BUILT_IN_DATA						\
@@ -544,16 +511,12 @@
 									\
 	/* Built-in module parameters. */				\
 	__param : AT(ADDR(__param) - LOAD_OFFSET) {			\
-		__start___param = .;					\
-		KEEP(*(__param))					\
-		__stop___param = .;					\
+		BOUNDED_SECTION_BY(__param, ___param)			\
 	}								\
 									\
 	/* Built-in module versions. */					\
 	__modver : AT(ADDR(__modver) - LOAD_OFFSET) {			\
-		__start___modver = .;					\
-		KEEP(*(__modver))					\
-		__stop___modver = .;					\
+		BOUNDED_SECTION_BY(__modver, ___modver)			\
 	}								\
 									\
 	KCFI_TRAPS							\
@@ -663,9 +626,7 @@
 #define EXCEPTION_TABLE(align)						\
 	. = ALIGN(align);						\
 	__ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {		\
-		__start___ex_table = .;					\
-		KEEP(*(__ex_table))					\
-		__stop___ex_table = .;					\
+		BOUNDED_SECTION_BY(__ex_table, ___ex_table)		\
 	}
 
 /*
@@ -674,9 +635,7 @@
 #ifdef CONFIG_DEBUG_INFO_BTF
 #define BTF								\
 	.BTF : AT(ADDR(.BTF) - LOAD_OFFSET) {				\
-		__start_BTF = .;					\
-		KEEP(*(.BTF))						\
-		__stop_BTF = .;						\
+		BOUNDED_SECTION_BY(.BTF, _BTF)				\
 	}								\
 	. = ALIGN(4);							\
 	.BTF_ids : AT(ADDR(.BTF_ids) - LOAD_OFFSET) {			\
@@ -853,9 +812,7 @@
 #define BUG_TABLE							\
 	. = ALIGN(8);							\
 	__bug_table : AT(ADDR(__bug_table) - LOAD_OFFSET) {		\
-		__start___bug_table = .;				\
-		KEEP(*(__bug_table))					\
-		__stop___bug_table = .;					\
+		BOUNDED_SECTION_BY(__bug_table, ___bug_table)		\
 	}
 #else
 #define BUG_TABLE
@@ -865,15 +822,11 @@
 #define ORC_UNWIND_TABLE						\
 	. = ALIGN(4);							\
 	.orc_unwind_ip : AT(ADDR(.orc_unwind_ip) - LOAD_OFFSET) {	\
-		__start_orc_unwind_ip = .;				\
-		KEEP(*(.orc_unwind_ip))					\
-		__stop_orc_unwind_ip = .;				\
+		BOUNDED_SECTION_BY(.orc_unwind_ip, _orc_unwind_ip)	\
 	}								\
 	. = ALIGN(2);							\
 	.orc_unwind : AT(ADDR(.orc_unwind) - LOAD_OFFSET) {		\
-		__start_orc_unwind = .;					\
-		KEEP(*(.orc_unwind))					\
-		__stop_orc_unwind = .;					\
+		BOUNDED_SECTION_BY(.orc_unwind, _orc_unwind)		\
 	}								\
 	text_size = _etext - _stext;					\
 	. = ALIGN(4);							\
@@ -891,9 +844,7 @@
 #ifdef CONFIG_FW_LOADER
 #define FW_LOADER_BUILT_IN_DATA						\
 	.builtin_fw : AT(ADDR(.builtin_fw) - LOAD_OFFSET) ALIGN(8) {	\
-		__start_builtin_fw = .;					\
-		KEEP(*(.builtin_fw))					\
-		__end_builtin_fw = .;					\
+		BOUNDED_SECTION_PRE_LABEL(.builtin_fw, _builtin_fw, __start, __end) \
 	}
 #else
 #define FW_LOADER_BUILT_IN_DATA
@@ -903,9 +854,7 @@
 #define TRACEDATA							\
 	. = ALIGN(4);							\
 	.tracedata : AT(ADDR(.tracedata) - LOAD_OFFSET) {		\
-		__tracedata_start = .;					\
-		KEEP(*(.tracedata))					\
-		__tracedata_end = .;					\
+		BOUNDED_SECTION_POST_LABEL(.tracedata, __tracedata, _start, _end) \
 	}
 #else
 #define TRACEDATA
@@ -914,9 +863,7 @@
 #ifdef CONFIG_PRINTK_INDEX
 #define PRINTK_INDEX							\
 	.printk_index : AT(ADDR(.printk_index) - LOAD_OFFSET) {		\
-		__start_printk_index = .;				\
-		*(.printk_index)					\
-		__stop_printk_index = .;				\
+		BOUNDED_SECTION_BY(.printk_index, _printk_index)	\
 	}
 #else
 #define PRINTK_INDEX
@@ -924,17 +871,13 @@
 
 #define NOTES								\
 	.notes : AT(ADDR(.notes) - LOAD_OFFSET) {			\
-		__start_notes = .;					\
-		KEEP(*(.note.*))					\
-		__stop_notes = .;					\
+		BOUNDED_SECTION_BY(.note.*, _notes)			\
 	} NOTES_HEADERS							\
 	NOTES_HEADERS_RESTORE
 
 #define INIT_SETUP(initsetup_align)					\
 		. = ALIGN(initsetup_align);				\
-		__setup_start = .;					\
-		KEEP(*(.init.setup))					\
-		__setup_end = .;
+		BOUNDED_SECTION_POST_LABEL(.init.setup, __setup, _start, _end)
 
 #define INIT_CALLS_LEVEL(level)						\
 		__initcall##level##_start = .;				\
@@ -956,16 +899,12 @@
 		__initcall_end = .;
 
 #define CON_INITCALL							\
-		__con_initcall_start = .;				\
-		KEEP(*(.con_initcall.init))				\
-		__con_initcall_end = .;
+	BOUNDED_SECTION_POST_LABEL(.con_initcall.init, __con_initcall, _start, _end)
 
 /* Alignment must be consistent with (kunit_suite *) in include/kunit/test.h */
 #define KUNIT_TABLE()							\
 		. = ALIGN(8);						\
-		__kunit_suites_start = .;				\
-		KEEP(*(.kunit_test_suites))				\
-		__kunit_suites_end = .;
+		BOUNDED_SECTION_POST_LABEL(.kunit_test_suites, __kunit_suites, _start, _end)
 
 #ifdef CONFIG_BLK_DEV_INITRD
 #define INIT_RAM_FS							\
-- 
GitLab


From 2f465b921bb8ff97025017e05f6c7a7a1f6a5749 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Sat, 22 Oct 2022 16:56:37 -0600
Subject: [PATCH 1306/4122] vmlinux.lds.h: place optional header space in
 BOUNDED_SECTION

Extend recently added BOUNDED_SECTION(_name) macro by adding a
KEEP(*(.gnu.linkonce.##_name)) before the KEEP(*(_name)).

This does nothing by itself, vmlinux is the same before and after this
patch.  But if a developer adds a .gnu.linkonce.foo record, that
record is placed in the front of the section, where it can be used as
a header for the table.

The intent is to create an up-link to another organizing struct, from
where related tables can be referenced.  And since every item in a
table has a known offset from its header, that same offset can be used
to fetch records from the related tables.

By itself, this doesnt gain much, unless maybe the pattern of access
is to scan 1 or 2 fields in each fat record, but with 2 16 bit .map*
fields added, we could de-duplicate 2 related tables.

The use case here is struct _ddebug, which has 3 pointers (function,
file, module) with substantial repetition; respectively 53%, 90%, and
the module column is fully recoverable after dynamic_debug_init()
splits the table into a linked list of "module" chunks.

On a DYNAMIC_DEBUG=y kernel with 5k pr_debugs, the memory savings
should be ~100 KiB.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20221022225637.1406715-3-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/vmlinux.lds.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9f6352171f886..b3ca56ac163f7 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -195,11 +195,13 @@
 
 #define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_)		\
 	_s_##_label_ = .;						\
+	KEEP(*(.gnu.linkonce.##_sec_))					\
 	KEEP(*(_sec_))							\
 	_e_##_label_ = .;
 
 #define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_)		\
 	_label_##_s_ = .;						\
+	KEEP(*(.gnu.linkonce.##_sec_))					\
 	KEEP(*(_sec_))							\
 	_label_##_e_ = .;
 
-- 
GitLab


From 9e6f07cd1eaa72c41719050c5ca9d22a8c0b7c02 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:01 +0100
Subject: [PATCH 1307/4122] vfio/ccw: create a parent struct

Move the stuff associated with the mdev parent (and thus the
subchannel struct) into its own struct, and leave the rest in
the existing private structure.

The subchannel will point to the parent, and the parent will point
to the private, for the areas where one or both are needed. Further
separation of these structs will follow.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-2-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     | 98 +++++++++++++++++++++++------
 drivers/s390/cio/vfio_ccw_ops.c     |  8 ++-
 drivers/s390/cio/vfio_ccw_private.h | 20 ++++--
 3 files changed, 101 insertions(+), 25 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 7f5402fe857a2..444b32047397b 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -36,10 +36,19 @@ debug_info_t *vfio_ccw_debug_trace_id;
  */
 int vfio_ccw_sch_quiesce(struct subchannel *sch)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 	DECLARE_COMPLETION_ONSTACK(completion);
 	int iretry, ret = 0;
 
+	/*
+	 * Probably an impossible situation, after being called through
+	 * FSM callbacks. But in the event it did, register a warning
+	 * and return as if things were fine.
+	 */
+	if (WARN_ON(!private))
+		return 0;
+
 	iretry = 255;
 	do {
 
@@ -121,7 +130,23 @@ static void vfio_ccw_crw_todo(struct work_struct *work)
  */
 static void vfio_ccw_sch_irq(struct subchannel *sch)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
+
+	/*
+	 * The subchannel should still be disabled at this point,
+	 * so an interrupt would be quite surprising. As with an
+	 * interrupt while the FSM is closed, let's attempt to
+	 * disable the subchannel again.
+	 */
+	if (!private) {
+		VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: unexpected interrupt\n",
+				   sch->schid.cssid, sch->schid.ssid,
+				   sch->schid.sch_no);
+
+		cio_disable_subchannel(sch);
+		return;
+	}
 
 	inc_irq_stat(IRQIO_CIO);
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
@@ -201,10 +226,19 @@ static void vfio_ccw_free_private(struct vfio_ccw_private *private)
 	mutex_destroy(&private->io_mutex);
 	kfree(private);
 }
+
+static void vfio_ccw_free_parent(struct device *dev)
+{
+	struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev);
+
+	kfree(parent);
+}
+
 static int vfio_ccw_sch_probe(struct subchannel *sch)
 {
 	struct pmcw *pmcw = &sch->schib.pmcw;
 	struct vfio_ccw_private *private;
+	struct vfio_ccw_parent *parent;
 	int ret = -ENOMEM;
 
 	if (pmcw->qf) {
@@ -213,38 +247,58 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 		return -ENODEV;
 	}
 
+	parent = kzalloc(sizeof(*parent), GFP_KERNEL);
+	if (!parent)
+		return -ENOMEM;
+
+	dev_set_name(&parent->dev, "parent");
+	parent->dev.parent = &sch->dev;
+	parent->dev.release = &vfio_ccw_free_parent;
+	ret = device_register(&parent->dev);
+	if (ret)
+		goto out_free;
+
 	private = vfio_ccw_alloc_private(sch);
-	if (IS_ERR(private))
+	if (IS_ERR(private)) {
+		device_unregister(&parent->dev);
 		return PTR_ERR(private);
+	}
 
-	dev_set_drvdata(&sch->dev, private);
+	dev_set_drvdata(&sch->dev, parent);
+	dev_set_drvdata(&parent->dev, private);
 
-	private->mdev_type.sysfs_name = "io";
-	private->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
-	private->mdev_types[0] = &private->mdev_type;
-	ret = mdev_register_parent(&private->parent, &sch->dev,
+	parent->mdev_type.sysfs_name = "io";
+	parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
+	parent->mdev_types[0] = &parent->mdev_type;
+	ret = mdev_register_parent(&parent->parent, &sch->dev,
 				   &vfio_ccw_mdev_driver,
-				   private->mdev_types, 1);
+				   parent->mdev_types, 1);
 	if (ret)
-		goto out_free;
+		goto out_unreg;
 
 	VFIO_CCW_MSG_EVENT(4, "bound to subchannel %x.%x.%04x\n",
 			   sch->schid.cssid, sch->schid.ssid,
 			   sch->schid.sch_no);
 	return 0;
 
+out_unreg:
+	device_unregister(&parent->dev);
 out_free:
+	dev_set_drvdata(&parent->dev, NULL);
 	dev_set_drvdata(&sch->dev, NULL);
-	vfio_ccw_free_private(private);
+	if (private)
+		vfio_ccw_free_private(private);
 	return ret;
 }
 
 static void vfio_ccw_sch_remove(struct subchannel *sch)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 
-	mdev_unregister_parent(&private->parent);
+	mdev_unregister_parent(&parent->parent);
 
+	device_unregister(&parent->dev);
 	dev_set_drvdata(&sch->dev, NULL);
 
 	vfio_ccw_free_private(private);
@@ -256,7 +310,11 @@ static void vfio_ccw_sch_remove(struct subchannel *sch)
 
 static void vfio_ccw_sch_shutdown(struct subchannel *sch)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
+
+	if (WARN_ON(!private))
+		return;
 
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_CLOSE);
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
@@ -274,7 +332,8 @@ static void vfio_ccw_sch_shutdown(struct subchannel *sch)
  */
 static int vfio_ccw_sch_event(struct subchannel *sch, int process)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 	unsigned long flags;
 	int rc = -EAGAIN;
 
@@ -287,8 +346,10 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)
 
 	rc = 0;
 
-	if (cio_update_schib(sch))
-		vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+	if (cio_update_schib(sch)) {
+		if (private)
+			vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+	}
 
 out_unlock:
 	spin_unlock_irqrestore(sch->lock, flags);
@@ -326,7 +387,8 @@ static void vfio_ccw_queue_crw(struct vfio_ccw_private *private,
 static int vfio_ccw_chp_event(struct subchannel *sch,
 			      struct chp_link *link, int event)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 	int mask = chp_ssd_get_mask(&sch->ssd_info, link);
 	int retry = 255;
 
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 6ae4d012d8008..dc084883d872d 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -55,7 +55,9 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev)
 
 static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent);
+	struct subchannel *sch = to_subchannel(mdev->dev.parent);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 	int ret;
 
 	if (private->state == VFIO_CCW_STATE_NOT_OPER)
@@ -100,7 +102,9 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
 
 static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
 {
-	struct vfio_ccw_private *private = dev_get_drvdata(mdev->dev.parent);
+	struct subchannel *sch = to_subchannel(mdev->dev.parent);
+	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
+	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n",
 			   private->sch->schid.cssid,
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index bd5fb81456af8..1f598d58d9694 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -67,6 +67,21 @@ struct vfio_ccw_crw {
 	struct crw		crw;
 };
 
+/**
+ * struct vfio_ccw_parent
+ *
+ * @dev: embedded device struct
+ * @parent: parent data structures for mdevs created
+ * @mdev_type(s): identifying information for mdevs created
+ */
+struct vfio_ccw_parent {
+	struct device		dev;
+
+	struct mdev_parent	parent;
+	struct mdev_type	mdev_type;
+	struct mdev_type	*mdev_types[1];
+};
+
 /**
  * struct vfio_ccw_private
  * @vdev: Embedded VFIO device
@@ -89,7 +104,6 @@ struct vfio_ccw_crw {
  * @io_work: work for deferral process of I/O handling
  * @crw_work: work for deferral process of CRW handling
  * @release_comp: synchronization helper for vfio device release
- * @parent: parent data structures for mdevs created
  */
 struct vfio_ccw_private {
 	struct vfio_device vdev;
@@ -116,10 +130,6 @@ struct vfio_ccw_private {
 	struct work_struct	crw_work;
 
 	struct completion	release_comp;
-
-	struct mdev_parent	parent;
-	struct mdev_type	mdev_type;
-	struct mdev_type	*mdev_types[1];
 } __aligned(8);
 
 int vfio_ccw_sch_quiesce(struct subchannel *sch);
-- 
GitLab


From 008a011d68036ebfa4dede07cb9b93dedaa958b1 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:02 +0100
Subject: [PATCH 1308/4122] vfio/ccw: remove private->sch

These places all rely on the ability to jump from a private
struct back to the subchannel struct. Rather than keeping a
copy in our back pocket, let's use the relationship provided
by the vfio_device embedded within the private.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-3-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_chp.c     |  5 +++--
 drivers/s390/cio/vfio_ccw_drv.c     |  3 +--
 drivers/s390/cio/vfio_ccw_fsm.c     | 27 ++++++++++++---------------
 drivers/s390/cio/vfio_ccw_ops.c     | 12 ++++++------
 drivers/s390/cio/vfio_ccw_private.h |  7 ++++---
 5 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_chp.c b/drivers/s390/cio/vfio_ccw_chp.c
index 13b26a1c79886..d3f3a611f95b4 100644
--- a/drivers/s390/cio/vfio_ccw_chp.c
+++ b/drivers/s390/cio/vfio_ccw_chp.c
@@ -16,6 +16,7 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private,
 					  char __user *buf, size_t count,
 					  loff_t *ppos)
 {
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	unsigned int i = VFIO_CCW_OFFSET_TO_INDEX(*ppos) - VFIO_CCW_NUM_REGIONS;
 	loff_t pos = *ppos & VFIO_CCW_OFFSET_MASK;
 	struct ccw_schib_region *region;
@@ -27,12 +28,12 @@ static ssize_t vfio_ccw_schib_region_read(struct vfio_ccw_private *private,
 	mutex_lock(&private->io_mutex);
 	region = private->region[i].data;
 
-	if (cio_update_schib(private->sch)) {
+	if (cio_update_schib(sch)) {
 		ret = -ENODEV;
 		goto out;
 	}
 
-	memcpy(region, &private->sch->schib, sizeof(*region));
+	memcpy(region, &sch->schib, sizeof(*region));
 
 	if (copy_to_user(buf, (void *)region + pos, count)) {
 		ret = -EFAULT;
diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 444b32047397b..2c680a5563832 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -160,7 +160,6 @@ static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch)
 	if (!private)
 		return ERR_PTR(-ENOMEM);
 
-	private->sch = sch;
 	mutex_init(&private->io_mutex);
 	private->state = VFIO_CCW_STATE_STANDBY;
 	INIT_LIST_HEAD(&private->crw);
@@ -395,7 +394,7 @@ static int vfio_ccw_chp_event(struct subchannel *sch,
 	if (!private || !mask)
 		return 0;
 
-	trace_vfio_ccw_chp_event(private->sch->schid, mask, event);
+	trace_vfio_ccw_chp_event(sch->schid, mask, event);
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: mask=0x%x event=%d\n",
 			   sch->schid.cssid,
 			   sch->schid.ssid, sch->schid.sch_no,
diff --git a/drivers/s390/cio/vfio_ccw_fsm.c b/drivers/s390/cio/vfio_ccw_fsm.c
index a59c758869f8f..e67fad897af3a 100644
--- a/drivers/s390/cio/vfio_ccw_fsm.c
+++ b/drivers/s390/cio/vfio_ccw_fsm.c
@@ -18,15 +18,13 @@
 
 static int fsm_io_helper(struct vfio_ccw_private *private)
 {
-	struct subchannel *sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	union orb *orb;
 	int ccode;
 	__u8 lpm;
 	unsigned long flags;
 	int ret;
 
-	sch = private->sch;
-
 	spin_lock_irqsave(sch->lock, flags);
 
 	orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm);
@@ -80,13 +78,11 @@ out:
 
 static int fsm_do_halt(struct vfio_ccw_private *private)
 {
-	struct subchannel *sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	unsigned long flags;
 	int ccode;
 	int ret;
 
-	sch = private->sch;
-
 	spin_lock_irqsave(sch->lock, flags);
 
 	VFIO_CCW_TRACE_EVENT(2, "haltIO");
@@ -121,13 +117,11 @@ static int fsm_do_halt(struct vfio_ccw_private *private)
 
 static int fsm_do_clear(struct vfio_ccw_private *private)
 {
-	struct subchannel *sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	unsigned long flags;
 	int ccode;
 	int ret;
 
-	sch = private->sch;
-
 	spin_lock_irqsave(sch->lock, flags);
 
 	VFIO_CCW_TRACE_EVENT(2, "clearIO");
@@ -160,7 +154,7 @@ static int fsm_do_clear(struct vfio_ccw_private *private)
 static void fsm_notoper(struct vfio_ccw_private *private,
 			enum vfio_ccw_event event)
 {
-	struct subchannel *sch = private->sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: notoper event %x state %x\n",
 			   sch->schid.cssid,
@@ -228,7 +222,7 @@ static void fsm_async_retry(struct vfio_ccw_private *private,
 static void fsm_disabled_irq(struct vfio_ccw_private *private,
 			     enum vfio_ccw_event event)
 {
-	struct subchannel *sch = private->sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 
 	/*
 	 * An interrupt in a disabled state means a previous disable was not
@@ -238,7 +232,9 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private,
 }
 inline struct subchannel_id get_schid(struct vfio_ccw_private *p)
 {
-	return p->sch->schid;
+	struct subchannel *sch = to_subchannel(p->vdev.dev->parent);
+
+	return sch->schid;
 }
 
 /*
@@ -360,10 +356,11 @@ static void fsm_async_request(struct vfio_ccw_private *private,
 static void fsm_irq(struct vfio_ccw_private *private,
 		    enum vfio_ccw_event event)
 {
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	struct irb *irb = this_cpu_ptr(&cio_irb);
 
 	VFIO_CCW_TRACE_EVENT(6, "IRQ");
-	VFIO_CCW_TRACE_EVENT(6, dev_name(&private->sch->dev));
+	VFIO_CCW_TRACE_EVENT(6, dev_name(&sch->dev));
 
 	memcpy(&private->irb, irb, sizeof(*irb));
 
@@ -376,7 +373,7 @@ static void fsm_irq(struct vfio_ccw_private *private,
 static void fsm_open(struct vfio_ccw_private *private,
 		     enum vfio_ccw_event event)
 {
-	struct subchannel *sch = private->sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	int ret;
 
 	spin_lock_irq(sch->lock);
@@ -397,7 +394,7 @@ err_unlock:
 static void fsm_close(struct vfio_ccw_private *private,
 		      enum vfio_ccw_event event)
 {
-	struct subchannel *sch = private->sch;
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
 	int ret;
 
 	spin_lock_irq(sch->lock);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index dc084883d872d..79c50cb7dcb81 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -68,9 +68,9 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
 		return ret;
 
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n",
-			   private->sch->schid.cssid,
-			   private->sch->schid.ssid,
-			   private->sch->schid.sch_no);
+			   sch->schid.cssid,
+			   sch->schid.ssid,
+			   sch->schid.sch_no);
 
 	ret = vfio_register_emulated_iommu_dev(&private->vdev);
 	if (ret)
@@ -107,9 +107,9 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
 	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: remove\n",
-			   private->sch->schid.cssid,
-			   private->sch->schid.ssid,
-			   private->sch->schid.sch_no);
+			   sch->schid.cssid,
+			   sch->schid.ssid,
+			   sch->schid.sch_no);
 
 	vfio_unregister_group_dev(&private->vdev);
 
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 1f598d58d9694..b28af2f639630 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -85,7 +85,6 @@ struct vfio_ccw_parent {
 /**
  * struct vfio_ccw_private
  * @vdev: Embedded VFIO device
- * @sch: pointer to the subchannel
  * @state: internal state of the device
  * @completion: synchronization helper of the I/O completion
  * @io_region: MMIO region to input/output I/O arguments/results
@@ -107,7 +106,6 @@ struct vfio_ccw_parent {
  */
 struct vfio_ccw_private {
 	struct vfio_device vdev;
-	struct subchannel	*sch;
 	int			state;
 	struct completion	*completion;
 	struct ccw_io_region	*io_region;
@@ -172,7 +170,10 @@ extern fsm_func_t *vfio_ccw_jumptable[NR_VFIO_CCW_STATES][NR_VFIO_CCW_EVENTS];
 static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
 				      enum vfio_ccw_event event)
 {
-	trace_vfio_ccw_fsm_event(private->sch->schid, private->state, event);
+	struct subchannel *sch = to_subchannel(private->vdev.dev->parent);
+
+	if (sch)
+		trace_vfio_ccw_fsm_event(sch->schid, private->state, event);
 	vfio_ccw_jumptable[private->state][event](private, event);
 }
 
-- 
GitLab


From 06caaa27df8f1a8a6be78212c5ef5cac04500176 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:03 +0100
Subject: [PATCH 1309/4122] vfio/ccw: move private initialization to callback

There's already a device initialization callback that is used to
initialize the release completion workaround that was introduced
by commit ebb72b765fb49 ("vfio/ccw: Use the new device life cycle
helpers").

Move the other elements of the vfio_ccw_private struct that
require distinct initialization over to that routine.

With that done, the vfio_ccw_alloc_private routine only does a
kzalloc, so fold it inline.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-4-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     | 74 ++++-------------------------
 drivers/s390/cio/vfio_ccw_ops.c     | 43 +++++++++++++++++
 drivers/s390/cio/vfio_ccw_private.h |  7 ++-
 3 files changed, 58 insertions(+), 66 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 2c680a5563832..fbc26338ceabe 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -23,10 +23,10 @@
 #include "vfio_ccw_private.h"
 
 struct workqueue_struct *vfio_ccw_work_q;
-static struct kmem_cache *vfio_ccw_io_region;
-static struct kmem_cache *vfio_ccw_cmd_region;
-static struct kmem_cache *vfio_ccw_schib_region;
-static struct kmem_cache *vfio_ccw_crw_region;
+struct kmem_cache *vfio_ccw_io_region;
+struct kmem_cache *vfio_ccw_cmd_region;
+struct kmem_cache *vfio_ccw_schib_region;
+struct kmem_cache *vfio_ccw_crw_region;
 
 debug_info_t *vfio_ccw_debug_msg_id;
 debug_info_t *vfio_ccw_debug_trace_id;
@@ -79,7 +79,7 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch)
 	return ret;
 }
 
-static void vfio_ccw_sch_io_todo(struct work_struct *work)
+void vfio_ccw_sch_io_todo(struct work_struct *work)
 {
 	struct vfio_ccw_private *private;
 	struct irb *irb;
@@ -115,7 +115,7 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
 		eventfd_signal(private->io_trigger, 1);
 }
 
-static void vfio_ccw_crw_todo(struct work_struct *work)
+void vfio_ccw_crw_todo(struct work_struct *work)
 {
 	struct vfio_ccw_private *private;
 
@@ -152,62 +152,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
 }
 
-static struct vfio_ccw_private *vfio_ccw_alloc_private(struct subchannel *sch)
-{
-	struct vfio_ccw_private *private;
-
-	private = kzalloc(sizeof(*private), GFP_KERNEL);
-	if (!private)
-		return ERR_PTR(-ENOMEM);
-
-	mutex_init(&private->io_mutex);
-	private->state = VFIO_CCW_STATE_STANDBY;
-	INIT_LIST_HEAD(&private->crw);
-	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
-	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
-
-	private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
-				       GFP_KERNEL);
-	if (!private->cp.guest_cp)
-		goto out_free_private;
-
-	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
-					       GFP_KERNEL | GFP_DMA);
-	if (!private->io_region)
-		goto out_free_cp;
-
-	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
-						GFP_KERNEL | GFP_DMA);
-	if (!private->cmd_region)
-		goto out_free_io;
-
-	private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region,
-						  GFP_KERNEL | GFP_DMA);
-
-	if (!private->schib_region)
-		goto out_free_cmd;
-
-	private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region,
-						GFP_KERNEL | GFP_DMA);
-
-	if (!private->crw_region)
-		goto out_free_schib;
-	return private;
-
-out_free_schib:
-	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
-out_free_cmd:
-	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
-out_free_io:
-	kmem_cache_free(vfio_ccw_io_region, private->io_region);
-out_free_cp:
-	kfree(private->cp.guest_cp);
-out_free_private:
-	mutex_destroy(&private->io_mutex);
-	kfree(private);
-	return ERR_PTR(-ENOMEM);
-}
-
 static void vfio_ccw_free_private(struct vfio_ccw_private *private)
 {
 	struct vfio_ccw_crw *crw, *temp;
@@ -257,10 +201,10 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	if (ret)
 		goto out_free;
 
-	private = vfio_ccw_alloc_private(sch);
-	if (IS_ERR(private)) {
+	private = kzalloc(sizeof(*private), GFP_KERNEL);
+	if (!private) {
 		device_unregister(&parent->dev);
-		return PTR_ERR(private);
+		return -ENOMEM;
 	}
 
 	dev_set_drvdata(&sch->dev, parent);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 79c50cb7dcb81..eb0b8cc210bb8 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -49,8 +49,51 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev)
 	struct vfio_ccw_private *private =
 		container_of(vdev, struct vfio_ccw_private, vdev);
 
+	mutex_init(&private->io_mutex);
+	private->state = VFIO_CCW_STATE_STANDBY;
+	INIT_LIST_HEAD(&private->crw);
+	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
+	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
 	init_completion(&private->release_comp);
+
+	private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
+				       GFP_KERNEL);
+	if (!private->cp.guest_cp)
+		goto out_free_private;
+
+	private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
+					       GFP_KERNEL | GFP_DMA);
+	if (!private->io_region)
+		goto out_free_cp;
+
+	private->cmd_region = kmem_cache_zalloc(vfio_ccw_cmd_region,
+						GFP_KERNEL | GFP_DMA);
+	if (!private->cmd_region)
+		goto out_free_io;
+
+	private->schib_region = kmem_cache_zalloc(vfio_ccw_schib_region,
+						  GFP_KERNEL | GFP_DMA);
+	if (!private->schib_region)
+		goto out_free_cmd;
+
+	private->crw_region = kmem_cache_zalloc(vfio_ccw_crw_region,
+						GFP_KERNEL | GFP_DMA);
+	if (!private->crw_region)
+		goto out_free_schib;
+
 	return 0;
+
+out_free_schib:
+	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
+out_free_cmd:
+	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+out_free_io:
+	kmem_cache_free(vfio_ccw_io_region, private->io_region);
+out_free_cp:
+	kfree(private->cp.guest_cp);
+out_free_private:
+	mutex_destroy(&private->io_mutex);
+	return -ENOMEM;
 }
 
 static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index b28af2f639630..55d636225cff8 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -131,6 +131,8 @@ struct vfio_ccw_private {
 } __aligned(8);
 
 int vfio_ccw_sch_quiesce(struct subchannel *sch);
+void vfio_ccw_sch_io_todo(struct work_struct *work);
+void vfio_ccw_crw_todo(struct work_struct *work);
 
 extern struct mdev_driver vfio_ccw_mdev_driver;
 
@@ -178,7 +180,10 @@ static inline void vfio_ccw_fsm_event(struct vfio_ccw_private *private,
 }
 
 extern struct workqueue_struct *vfio_ccw_work_q;
-
+extern struct kmem_cache *vfio_ccw_io_region;
+extern struct kmem_cache *vfio_ccw_cmd_region;
+extern struct kmem_cache *vfio_ccw_schib_region;
+extern struct kmem_cache *vfio_ccw_crw_region;
 
 /* s390 debug feature, similar to base cio */
 extern debug_info_t *vfio_ccw_debug_msg_id;
-- 
GitLab


From 3d62fe18b6a3a93c5360c2d590b9b40b6842133e Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:04 +0100
Subject: [PATCH 1310/4122] vfio/ccw: move private to mdev lifecycle

Now that the mdev parent data is split out into its own struct,
it is safe to move the remaining private data to follow the
mdev probe/remove lifecycle. The mdev parent data will remain
where it is, and follow the subchannel and the css driver
interfaces.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-5-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     | 16 +---------------
 drivers/s390/cio/vfio_ccw_ops.c     | 26 +++++++++++++-------------
 drivers/s390/cio/vfio_ccw_private.h |  2 ++
 3 files changed, 16 insertions(+), 28 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index fbc26338ceabe..9fbd1b27a1ac1 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -152,7 +152,7 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
 }
 
-static void vfio_ccw_free_private(struct vfio_ccw_private *private)
+void vfio_ccw_free_private(struct vfio_ccw_private *private)
 {
 	struct vfio_ccw_crw *crw, *temp;
 
@@ -180,7 +180,6 @@ static void vfio_ccw_free_parent(struct device *dev)
 static int vfio_ccw_sch_probe(struct subchannel *sch)
 {
 	struct pmcw *pmcw = &sch->schib.pmcw;
-	struct vfio_ccw_private *private;
 	struct vfio_ccw_parent *parent;
 	int ret = -ENOMEM;
 
@@ -201,14 +200,7 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	if (ret)
 		goto out_free;
 
-	private = kzalloc(sizeof(*private), GFP_KERNEL);
-	if (!private) {
-		device_unregister(&parent->dev);
-		return -ENOMEM;
-	}
-
 	dev_set_drvdata(&sch->dev, parent);
-	dev_set_drvdata(&parent->dev, private);
 
 	parent->mdev_type.sysfs_name = "io";
 	parent->mdev_type.pretty_name = "I/O subchannel (Non-QDIO)";
@@ -227,25 +219,19 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 out_unreg:
 	device_unregister(&parent->dev);
 out_free:
-	dev_set_drvdata(&parent->dev, NULL);
 	dev_set_drvdata(&sch->dev, NULL);
-	if (private)
-		vfio_ccw_free_private(private);
 	return ret;
 }
 
 static void vfio_ccw_sch_remove(struct subchannel *sch)
 {
 	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
-	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
 
 	mdev_unregister_parent(&parent->parent);
 
 	device_unregister(&parent->dev);
 	dev_set_drvdata(&sch->dev, NULL);
 
-	vfio_ccw_free_private(private);
-
 	VFIO_CCW_MSG_EVENT(4, "unbound from subchannel %x.%x.%04x\n",
 			   sch->schid.cssid, sch->schid.ssid,
 			   sch->schid.sch_no);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index eb0b8cc210bb8..e45d4acb109b0 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -100,15 +100,20 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
 {
 	struct subchannel *sch = to_subchannel(mdev->dev.parent);
 	struct vfio_ccw_parent *parent = dev_get_drvdata(&sch->dev);
-	struct vfio_ccw_private *private = dev_get_drvdata(&parent->dev);
+	struct vfio_ccw_private *private;
 	int ret;
 
-	if (private->state == VFIO_CCW_STATE_NOT_OPER)
-		return -ENODEV;
+	private = kzalloc(sizeof(*private), GFP_KERNEL);
+	if (!private)
+		return -ENOMEM;
 
 	ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops);
-	if (ret)
+	if (ret) {
+		kfree(private);
 		return ret;
+	}
+
+	dev_set_drvdata(&parent->dev, private);
 
 	VFIO_CCW_MSG_EVENT(2, "sch %x.%x.%04x: create\n",
 			   sch->schid.cssid,
@@ -122,6 +127,7 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
 	return 0;
 
 err_put_vdev:
+	dev_set_drvdata(&parent->dev, NULL);
 	vfio_put_device(&private->vdev);
 	return ret;
 }
@@ -131,15 +137,6 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
 	struct vfio_ccw_private *private =
 		container_of(vdev, struct vfio_ccw_private, vdev);
 
-	/*
-	 * We cannot free vfio_ccw_private here because it includes
-	 * parent info which must be free'ed by css driver.
-	 *
-	 * Use a workaround by memset'ing the core device part and
-	 * then notifying the remove path that all active references
-	 * to this device have been released.
-	 */
-	memset(vdev, 0, sizeof(*vdev));
 	complete(&private->release_comp);
 }
 
@@ -156,6 +153,7 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
 
 	vfio_unregister_group_dev(&private->vdev);
 
+	dev_set_drvdata(&parent->dev, NULL);
 	vfio_put_device(&private->vdev);
 	/*
 	 * Wait for all active references on mdev are released so it
@@ -166,6 +164,8 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
 	 * cycle.
 	 */
 	wait_for_completion(&private->release_comp);
+
+	vfio_ccw_free_private(private);
 }
 
 static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 55d636225cff8..747aba5f52723 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -134,6 +134,8 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch);
 void vfio_ccw_sch_io_todo(struct work_struct *work);
 void vfio_ccw_crw_todo(struct work_struct *work);
 
+void vfio_ccw_free_private(struct vfio_ccw_private *private);
+
 extern struct mdev_driver vfio_ccw_mdev_driver;
 
 /*
-- 
GitLab


From f4da83f7e3f096ece936512d86ef3726e470fbfd Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:05 +0100
Subject: [PATCH 1311/4122] vfio/ccw: remove release completion

There's enough separation between the parent and private structs now,
that it is fine to remove the release completion hack.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-6-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_ops.c     | 14 +-------------
 drivers/s390/cio/vfio_ccw_private.h |  3 ---
 2 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index e45d4acb109b0..8a929a9cf3c66 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -54,7 +54,6 @@ static int vfio_ccw_mdev_init_dev(struct vfio_device *vdev)
 	INIT_LIST_HEAD(&private->crw);
 	INIT_WORK(&private->io_work, vfio_ccw_sch_io_todo);
 	INIT_WORK(&private->crw_work, vfio_ccw_crw_todo);
-	init_completion(&private->release_comp);
 
 	private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
 				       GFP_KERNEL);
@@ -137,7 +136,7 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
 	struct vfio_ccw_private *private =
 		container_of(vdev, struct vfio_ccw_private, vdev);
 
-	complete(&private->release_comp);
+	vfio_ccw_free_private(private);
 }
 
 static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
@@ -155,17 +154,6 @@ static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
 
 	dev_set_drvdata(&parent->dev, NULL);
 	vfio_put_device(&private->vdev);
-	/*
-	 * Wait for all active references on mdev are released so it
-	 * is safe to defer kfree() to a later point.
-	 *
-	 * TODO: the clean fix is to split parent/mdev info from ccw
-	 * private structure so each can be managed in its own life
-	 * cycle.
-	 */
-	wait_for_completion(&private->release_comp);
-
-	vfio_ccw_free_private(private);
 }
 
 static int vfio_ccw_mdev_open_device(struct vfio_device *vdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 747aba5f52723..2278fd38d34ea 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -102,7 +102,6 @@ struct vfio_ccw_parent {
  * @req_trigger: eventfd ctx for signaling userspace to return device
  * @io_work: work for deferral process of I/O handling
  * @crw_work: work for deferral process of CRW handling
- * @release_comp: synchronization helper for vfio device release
  */
 struct vfio_ccw_private {
 	struct vfio_device vdev;
@@ -126,8 +125,6 @@ struct vfio_ccw_private {
 	struct eventfd_ctx	*req_trigger;
 	struct work_struct	io_work;
 	struct work_struct	crw_work;
-
-	struct completion	release_comp;
 } __aligned(8);
 
 int vfio_ccw_sch_quiesce(struct subchannel *sch);
-- 
GitLab


From 6ac73820993c13f30d226f9521f8ffae62acdf42 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 10 Nov 2022 15:30:10 -0300
Subject: [PATCH 1312/4122] perf trace: Add augmenter for clock_gettime's rqtp
 timespec arg

One more before going the BTF way:

  # perf trace -e /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o,*nanosleep
         ? pool-gsd-smart/2893  ... [continued]: clock_nanosleep())    = 0
         ? gpm/1042  ... [continued]: clock_nanosleep())    = 0
     1.232 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ...
     1.232 pool-gsd-smart/2893  ... [continued]: clock_nanosleep())    = 0
   327.329 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ...
  1002.482 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) = 0
   327.329 gpm/1042  ... [continued]: clock_nanosleep())    = 0
  2003.947 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ...
  2003.947 pool-gsd-smart/2893  ... [continued]: clock_nanosleep())    = 0
  2327.858 gpm/1042 clock_nanosleep(rqtp: { .tv_sec: 2, .tv_nsec: 0 }, rmtp: 0x7ffddfd1cf20) ...
         ? crond/1384  ... [continued]: clock_nanosleep())    = 0
  3005.382 pool-gsd-smart/2893 clock_nanosleep(rqtp: { .tv_sec: 1, .tv_nsec: 0 }, rmtp: 0x7f64d7ffec50) ...
  3005.382 pool-gsd-smart/2893  ... [continued]: clock_nanosleep())    = 0
  3675.633 crond/1384 clock_nanosleep(rqtp: { .tv_sec: 60, .tv_nsec: 0 }, rmtp: 0x7ffcc02b66b0) ...
^C#

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c                    |  2 ++
 .../examples/bpf/augmented_raw_syscalls.c     | 28 +++++++++++++++++++
 tools/perf/trace/beauty/Build                 |  1 +
 tools/perf/trace/beauty/beauty.h              |  3 ++
 tools/perf/trace/beauty/timespec.c            | 21 ++++++++++++++
 5 files changed, 55 insertions(+)
 create mode 100644 tools/perf/trace/beauty/timespec.c

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 5690c33c523b3..c93b359abc312 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -924,6 +924,8 @@ static struct syscall_fmt syscall_fmts[] = {
 	  .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, },
 	{ .name     = "clock_gettime",
 	  .arg = { [0] = STRARRAY(clk_id, clockid), }, },
+	{ .name	    = "clock_nanosleep",
+	  .arg = { [2] = { .scnprintf = SCA_TIMESPEC,  /* rqtp */ }, }, },
 	{ .name	    = "clone",	    .errpid = true, .nr_args = 5,
 	  .arg = { [0] = { .name = "flags",	    .scnprintf = SCA_CLONE_FLAGS, },
 		   [1] = { .name = "child_stack",   .scnprintf = SCA_HEX, },
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 0599823e8ae1b..7dc24c9173a75 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -21,6 +21,13 @@
 // FIXME: These should come from system headers
 typedef char bool;
 typedef int pid_t;
+typedef long long int __s64;
+typedef __s64 time64_t;
+
+struct timespec64 {
+	time64_t	tv_sec;
+	long int	tv_nsec;
+};
 
 /* bpf-output associated map */
 struct __augmented_syscalls__ {
@@ -337,6 +344,27 @@ failure:
 	return 1; /* Failure: don't filter */
 }
 
+SEC("!syscalls:sys_enter_clock_nanosleep")
+int sys_enter_clock_nanosleep(struct syscall_enter_args *args)
+{
+	struct augmented_args_payload *augmented_args = augmented_args_payload();
+	const void *rqtp_arg = (const void *)args->args[2];
+	unsigned int len = sizeof(augmented_args->args);
+	__u32 size = sizeof(struct timespec64);
+
+        if (augmented_args == NULL)
+		goto failure;
+
+	if (size > sizeof(augmented_args->__data))
+                goto failure;
+
+	bpf_probe_read(&augmented_args->__data, size, rqtp_arg);
+
+	return augmented__output(args, augmented_args, len + size);
+failure:
+	return 1; /* Failure: don't filter */
+}
+
 static pid_t getpid(void)
 {
 	return bpf_get_current_pid_tgid();
diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build
index 433dc39053a7f..d11ce256f5114 100644
--- a/tools/perf/trace/beauty/Build
+++ b/tools/perf/trace/beauty/Build
@@ -17,4 +17,5 @@ perf-y += sockaddr.o
 perf-y += socket.o
 perf-y += statx.o
 perf-y += sync_file_range.o
+perf-y += timespec.o
 perf-y += tracepoints/
diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h
index f527a46ab4e72..4c59edddd6a87 100644
--- a/tools/perf/trace/beauty/beauty.h
+++ b/tools/perf/trace/beauty/beauty.h
@@ -244,6 +244,9 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a
 size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg);
 #define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags
 
+size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg);
+#define SCA_TIMESPEC syscall_arg__scnprintf_timespec
+
 size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix);
 
 void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg,
diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c
new file mode 100644
index 0000000000000..e1a61f092aad8
--- /dev/null
+++ b/tools/perf/trace/beauty/timespec.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: LGPL-2.1
+// Copyright (C) 2022, Red Hat Inc, Arnaldo Carvalho de Melo <acme@redhat.com>
+
+#include "trace/beauty/beauty.h"
+#include <inttypes.h>
+#include <time.h>
+
+static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size)
+{
+	struct timespec *ts = (struct timespec *)arg->augmented.args;
+
+	return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec);
+}
+
+size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg)
+{
+	if (arg->augmented.args)
+		return syscall_arg__scnprintf_augmented_timespec(arg, bf, size);
+
+	return scnprintf(bf, size, "%#lx", arg->val);
+}
-- 
GitLab


From d1104f9327df9b26901b97cd026949f80ccab0d3 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:06 +0100
Subject: [PATCH 1313/4122] vfio/ccw: replace vfio_init_device with _alloc_

Now that we have a reasonable separation of structs that follow
the subchannel and mdev lifecycles, there's no reason we can't
call the official vfio_alloc_device routine for our private data,
and behave like everyone else.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-7-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c     | 18 ------------------
 drivers/s390/cio/vfio_ccw_ops.c     | 28 ++++++++++++++++++----------
 drivers/s390/cio/vfio_ccw_private.h |  2 --
 drivers/vfio/vfio_main.c            | 10 +++++-----
 include/linux/vfio.h                |  2 --
 5 files changed, 23 insertions(+), 37 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index 9fbd1b27a1ac1..c2a65808605af 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -152,24 +152,6 @@ static void vfio_ccw_sch_irq(struct subchannel *sch)
 	vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_INTERRUPT);
 }
 
-void vfio_ccw_free_private(struct vfio_ccw_private *private)
-{
-	struct vfio_ccw_crw *crw, *temp;
-
-	list_for_each_entry_safe(crw, temp, &private->crw, next) {
-		list_del(&crw->next);
-		kfree(crw);
-	}
-
-	kmem_cache_free(vfio_ccw_crw_region, private->crw_region);
-	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
-	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
-	kmem_cache_free(vfio_ccw_io_region, private->io_region);
-	kfree(private->cp.guest_cp);
-	mutex_destroy(&private->io_mutex);
-	kfree(private);
-}
-
 static void vfio_ccw_free_parent(struct device *dev)
 {
 	struct vfio_ccw_parent *parent = container_of(dev, struct vfio_ccw_parent, dev);
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 8a929a9cf3c66..1155f8bcedd99 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -102,15 +102,10 @@ static int vfio_ccw_mdev_probe(struct mdev_device *mdev)
 	struct vfio_ccw_private *private;
 	int ret;
 
-	private = kzalloc(sizeof(*private), GFP_KERNEL);
-	if (!private)
-		return -ENOMEM;
-
-	ret = vfio_init_device(&private->vdev, &mdev->dev, &vfio_ccw_dev_ops);
-	if (ret) {
-		kfree(private);
-		return ret;
-	}
+	private = vfio_alloc_device(vfio_ccw_private, vdev, &mdev->dev,
+				    &vfio_ccw_dev_ops);
+	if (IS_ERR(private))
+		return PTR_ERR(private);
 
 	dev_set_drvdata(&parent->dev, private);
 
@@ -135,8 +130,21 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
 {
 	struct vfio_ccw_private *private =
 		container_of(vdev, struct vfio_ccw_private, vdev);
+	struct vfio_ccw_crw *crw, *temp;
+
+	list_for_each_entry_safe(crw, temp, &private->crw, next) {
+		list_del(&crw->next);
+		kfree(crw);
+	}
+
+	kmem_cache_free(vfio_ccw_crw_region, private->crw_region);
+	kmem_cache_free(vfio_ccw_schib_region, private->schib_region);
+	kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
+	kmem_cache_free(vfio_ccw_io_region, private->io_region);
+	kfree(private->cp.guest_cp);
+	mutex_destroy(&private->io_mutex);
 
-	vfio_ccw_free_private(private);
+	vfio_free_device(vdev);
 }
 
 static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_private.h b/drivers/s390/cio/vfio_ccw_private.h
index 2278fd38d34ea..b441ae6700fd2 100644
--- a/drivers/s390/cio/vfio_ccw_private.h
+++ b/drivers/s390/cio/vfio_ccw_private.h
@@ -131,8 +131,6 @@ int vfio_ccw_sch_quiesce(struct subchannel *sch);
 void vfio_ccw_sch_io_todo(struct work_struct *work);
 void vfio_ccw_crw_todo(struct work_struct *work);
 
-void vfio_ccw_free_private(struct vfio_ccw_private *private);
-
 extern struct mdev_driver vfio_ccw_mdev_driver;
 
 /*
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2d168793d4e1c..2901b8ad5be92 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -348,6 +348,9 @@ static void vfio_device_release(struct device *dev)
 	device->ops->release(device);
 }
 
+static int vfio_init_device(struct vfio_device *device, struct device *dev,
+			    const struct vfio_device_ops *ops);
+
 /*
  * Allocate and initialize vfio_device so it can be registered to vfio
  * core.
@@ -386,11 +389,9 @@ EXPORT_SYMBOL_GPL(_vfio_alloc_device);
 
 /*
  * Initialize a vfio_device so it can be registered to vfio core.
- *
- * Only vfio-ccw driver should call this interface.
  */
-int vfio_init_device(struct vfio_device *device, struct device *dev,
-		     const struct vfio_device_ops *ops)
+static int vfio_init_device(struct vfio_device *device, struct device *dev,
+			    const struct vfio_device_ops *ops)
 {
 	int ret;
 
@@ -422,7 +423,6 @@ out_uninit:
 	ida_free(&vfio.device_ida, device->index);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(vfio_init_device);
 
 /*
  * The helper called by driver @release callback to free the device
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7cebeb875dd1..ba809268a48e0 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -176,8 +176,6 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
 					dev, ops),				\
 		     struct dev_struct, member)
 
-int vfio_init_device(struct vfio_device *device, struct device *dev,
-		     const struct vfio_device_ops *ops);
 void vfio_free_device(struct vfio_device *device);
 static inline void vfio_put_device(struct vfio_device *device)
 {
-- 
GitLab


From 913447d06f032a9e9c84870bec0b1adb8c588f29 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.ibm.com>
Date: Fri, 4 Nov 2022 15:20:07 +0100
Subject: [PATCH 1314/4122] vfio: Remove vfio_free_device

With the "mess" sorted out, we should be able to inline the
vfio_free_device call introduced by commit cb9ff3f3b84c
("vfio: Add helpers for unifying vfio_device life cycle")
and remove them from driver release callbacks.

Signed-off-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>	# vfio-ap part
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221104142007.1314999-8-farman@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c      |  1 -
 drivers/s390/cio/vfio_ccw_ops.c       |  2 --
 drivers/s390/crypto/vfio_ap_ops.c     |  6 ------
 drivers/vfio/fsl-mc/vfio_fsl_mc.c     |  1 -
 drivers/vfio/pci/vfio_pci_core.c      |  1 -
 drivers/vfio/platform/vfio_amba.c     |  1 -
 drivers/vfio/platform/vfio_platform.c |  1 -
 drivers/vfio/vfio_main.c              | 22 ++++------------------
 include/linux/vfio.h                  |  1 -
 samples/vfio-mdev/mbochs.c            |  1 -
 samples/vfio-mdev/mdpy.c              |  1 -
 samples/vfio-mdev/mtty.c              |  1 -
 12 files changed, 4 insertions(+), 35 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7a45e5360caf2..eee6805e67ded 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1461,7 +1461,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
 	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 
 	intel_gvt_destroy_vgpu(vgpu);
-	vfio_free_device(vfio_dev);
 }
 
 static const struct vfio_device_ops intel_vgpu_dev_ops = {
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 1155f8bcedd99..598a3814d4282 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -143,8 +143,6 @@ static void vfio_ccw_mdev_release_dev(struct vfio_device *vdev)
 	kmem_cache_free(vfio_ccw_io_region, private->io_region);
 	kfree(private->cp.guest_cp);
 	mutex_destroy(&private->io_mutex);
-
-	vfio_free_device(vdev);
 }
 
 static void vfio_ccw_mdev_remove(struct mdev_device *mdev)
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0b4cc8c597ae6..f108c0f147125 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -765,11 +765,6 @@ static void vfio_ap_mdev_unlink_fr_queues(struct ap_matrix_mdev *matrix_mdev)
 	}
 }
 
-static void vfio_ap_mdev_release_dev(struct vfio_device *vdev)
-{
-	vfio_free_device(vdev);
-}
-
 static void vfio_ap_mdev_remove(struct mdev_device *mdev)
 {
 	struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(&mdev->dev);
@@ -1784,7 +1779,6 @@ static const struct attribute_group vfio_queue_attr_group = {
 
 static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
 	.init = vfio_ap_mdev_init_dev,
-	.release = vfio_ap_mdev_release_dev,
 	.open_device = vfio_ap_mdev_open_device,
 	.close_device = vfio_ap_mdev_close_device,
 	.ioctl = vfio_ap_mdev_ioctl,
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index b16874e913e4f..7b8889f550076 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -568,7 +568,6 @@ static void vfio_fsl_mc_release_dev(struct vfio_device *core_vdev)
 
 	vfio_fsl_uninit_device(vdev);
 	mutex_destroy(&vdev->igate);
-	vfio_free_device(core_vdev);
 }
 
 static int vfio_fsl_mc_remove(struct fsl_mc_device *mc_dev)
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index badc9d828cac2..9be2d5be5d959 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -2109,7 +2109,6 @@ void vfio_pci_core_release_dev(struct vfio_device *core_vdev)
 	mutex_destroy(&vdev->vma_lock);
 	kfree(vdev->region);
 	kfree(vdev->pm_save);
-	vfio_free_device(core_vdev);
 }
 EXPORT_SYMBOL_GPL(vfio_pci_core_release_dev);
 
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index eaea63e5294c5..18faf2678b998 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -95,7 +95,6 @@ static void vfio_amba_release_dev(struct vfio_device *core_vdev)
 
 	vfio_platform_release_common(vdev);
 	kfree(vdev->name);
-	vfio_free_device(core_vdev);
 }
 
 static void vfio_amba_remove(struct amba_device *adev)
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 82cedcebfd902..9910451dc3415 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -83,7 +83,6 @@ static void vfio_platform_release_dev(struct vfio_device *core_vdev)
 		container_of(core_vdev, struct vfio_platform_device, vdev);
 
 	vfio_platform_release_common(vdev);
-	vfio_free_device(core_vdev);
 }
 
 static int vfio_platform_remove(struct platform_device *pdev)
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2901b8ad5be92..9835757e2bee4 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -339,13 +339,10 @@ static void vfio_device_release(struct device *dev)
 	vfio_release_device_set(device);
 	ida_free(&vfio.device_ida, device->index);
 
-	/*
-	 * kvfree() cannot be done here due to a life cycle mess in
-	 * vfio-ccw. Before the ccw part is fixed all drivers are
-	 * required to support @release and call vfio_free_device()
-	 * from there.
-	 */
-	device->ops->release(device);
+	if (device->ops->release)
+		device->ops->release(device);
+
+	kvfree(device);
 }
 
 static int vfio_init_device(struct vfio_device *device, struct device *dev,
@@ -424,17 +421,6 @@ out_uninit:
 	return ret;
 }
 
-/*
- * The helper called by driver @release callback to free the device
- * structure. Drivers which don't have private data to clean can
- * simply use this helper as its @release.
- */
-void vfio_free_device(struct vfio_device *device)
-{
-	kvfree(device);
-}
-EXPORT_SYMBOL_GPL(vfio_free_device);
-
 static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 		enum vfio_group_type type)
 {
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index ba809268a48e0..e7480154825ec 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -176,7 +176,6 @@ struct vfio_device *_vfio_alloc_device(size_t size, struct device *dev,
 					dev, ops),				\
 		     struct dev_struct, member)
 
-void vfio_free_device(struct vfio_device *device);
 static inline void vfio_put_device(struct vfio_device *device)
 {
 	put_device(&device->device);
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 117a8d799f711..8b5a3a778a257 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -594,7 +594,6 @@ static void mbochs_release_dev(struct vfio_device *vdev)
 	atomic_add(mdev_state->type->mbytes, &mbochs_avail_mbytes);
 	kfree(mdev_state->pages);
 	kfree(mdev_state->vconfig);
-	vfio_free_device(vdev);
 }
 
 static void mbochs_remove(struct mdev_device *mdev)
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 946e8cfde6fdd..721fb06c6413f 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -283,7 +283,6 @@ static void mdpy_release_dev(struct vfio_device *vdev)
 
 	vfree(mdev_state->memblk);
 	kfree(mdev_state->vconfig);
-	vfio_free_device(vdev);
 }
 
 static void mdpy_remove(struct mdev_device *mdev)
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index e72085fc13763..3c2a421b9b696 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -784,7 +784,6 @@ static void mtty_release_dev(struct vfio_device *vdev)
 
 	atomic_add(mdev_state->nr_ports, &mdev_avail_ports);
 	kfree(mdev_state->vconfig);
-	vfio_free_device(vdev);
 }
 
 static void mtty_remove(struct mdev_device *mdev)
-- 
GitLab


From 30b331d2e3bc5c7c95568477d4bf2661b6e6cb3e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 3 Nov 2022 22:14:40 -0700
Subject: [PATCH 1315/4122] perf lock: Allow concurrent record and report

To support live monitoring of kernel lock contention without BPF,
it should support something like below:

  # perf lock record -a -o- sleep 1 | perf lock contention -i-
   contended   total wait     max wait     avg wait         type   caller

           2     10.27 us      6.17 us      5.13 us     spinlock   load_balance+0xc03
           1      5.29 us      5.29 us      5.29 us     rwlock:W   ep_scan_ready_list+0x54
           1      4.12 us      4.12 us      4.12 us     spinlock   smpboot_thread_fn+0x116
           1      3.28 us      3.28 us      3.28 us        mutex   pipe_read+0x50

To do that, it needs to handle HEAD_ATTR, HEADER_EVENT_UPDATE and
HEADER_TRACING_DATA which are generated only for the pipe mode.
And setting event handler also should be delayed until it gets the
event information.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221104051440.220989-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c                 | 68 +++++++++++++++--------
 tools/perf/tests/shell/lock_contention.sh | 15 ++++-
 2 files changed, 60 insertions(+), 23 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6f79175365a80..0d280093b19a5 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -1390,6 +1390,34 @@ static int dump_info(void)
 	return rc;
 }
 
+static const struct evsel_str_handler lock_tracepoints[] = {
+	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
+	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
+	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
+};
+
+static const struct evsel_str_handler contention_tracepoints[] = {
+	{ "lock:contention_begin", evsel__process_contention_begin, },
+	{ "lock:contention_end",   evsel__process_contention_end,   },
+};
+
+static int process_event_update(struct perf_tool *tool,
+				union perf_event *event,
+				struct evlist **pevlist)
+{
+	int ret;
+
+	ret = perf_event__process_event_update(tool, event, pevlist);
+	if (ret < 0)
+		return ret;
+
+	/* this can return -EEXIST since we call it for each evsel */
+	perf_session__set_tracepoints_handlers(session, lock_tracepoints);
+	perf_session__set_tracepoints_handlers(session, contention_tracepoints);
+	return 0;
+}
+
 typedef int (*tracepoint_handler)(struct evsel *evsel,
 				  struct perf_sample *sample);
 
@@ -1545,28 +1573,19 @@ next:
 	print_bad_events(bad, total);
 }
 
-static const struct evsel_str_handler lock_tracepoints[] = {
-	{ "lock:lock_acquire",	 evsel__process_lock_acquire,   }, /* CONFIG_LOCKDEP */
-	{ "lock:lock_acquired",	 evsel__process_lock_acquired,  }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
-	{ "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
-	{ "lock:lock_release",	 evsel__process_lock_release,   }, /* CONFIG_LOCKDEP */
-};
-
-static const struct evsel_str_handler contention_tracepoints[] = {
-	{ "lock:contention_begin", evsel__process_contention_begin, },
-	{ "lock:contention_end",   evsel__process_contention_end,   },
-};
-
 static bool force;
 
 static int __cmd_report(bool display_info)
 {
 	int err = -EINVAL;
 	struct perf_tool eops = {
+		.attr		 = perf_event__process_attr,
+		.event_update	 = process_event_update,
 		.sample		 = process_sample_event,
 		.comm		 = perf_event__process_comm,
 		.mmap		 = perf_event__process_mmap,
 		.namespaces	 = perf_event__process_namespaces,
+		.tracing_data	 = perf_event__process_tracing_data,
 		.ordered_events	 = true,
 	};
 	struct perf_data data = {
@@ -1585,17 +1604,19 @@ static int __cmd_report(bool display_info)
 	symbol_conf.sort_by_name = true;
 	symbol__init(&session->header.env);
 
-	if (!perf_session__has_traces(session, "lock record"))
-		goto out_delete;
+	if (!data.is_pipe) {
+		if (!perf_session__has_traces(session, "lock record"))
+			goto out_delete;
 
-	if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
-		pr_err("Initializing perf session tracepoint handlers failed\n");
-		goto out_delete;
-	}
+		if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
+			pr_err("Initializing perf session tracepoint handlers failed\n");
+			goto out_delete;
+		}
 
-	if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
-		pr_err("Initializing perf session tracepoint handlers failed\n");
-		goto out_delete;
+		if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) {
+			pr_err("Initializing perf session tracepoint handlers failed\n");
+			goto out_delete;
+		}
 	}
 
 	if (setup_output_field(false, output_fields))
@@ -1633,9 +1654,12 @@ static int __cmd_contention(int argc, const char **argv)
 {
 	int err = -EINVAL;
 	struct perf_tool eops = {
+		.attr		 = perf_event__process_attr,
+		.event_update	 = process_event_update,
 		.sample		 = process_sample_event,
 		.comm		 = perf_event__process_comm,
 		.mmap		 = perf_event__process_mmap,
+		.tracing_data	 = perf_event__process_tracing_data,
 		.ordered_events	 = true,
 	};
 	struct perf_data data = {
@@ -1698,7 +1722,7 @@ static int __cmd_contention(int argc, const char **argv)
 			pr_err("lock contention BPF setup failed\n");
 			goto out_delete;
 		}
-	} else {
+	} else if (!data.is_pipe) {
 		if (!perf_session__has_traces(session, "lock record"))
 			goto out_delete;
 
diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index 04bf604e3c6f8..f7bd0d8eb5c38 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -53,7 +53,7 @@ test_bpf()
 
 	if ! perf lock con -b true > /dev/null 2>&1 ; then
 		echo "[Skip] No BPF support"
-		exit
+		return
 	fi
 
 	# the perf lock contention output goes to the stderr
@@ -65,9 +65,22 @@ test_bpf()
 	fi
 }
 
+test_record_concurrent()
+{
+	echo "Testing perf lock record and perf lock contention at the same time"
+	perf lock record -o- -- perf bench sched messaging 2> /dev/null | \
+	perf lock contention -i- -E 1 -q 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+}
+
 check
 
 test_record
 test_bpf
+test_record_concurrent
 
 exit ${err}
-- 
GitLab


From 9d895e46842908aa49a042e699097df64ab20b7f Mon Sep 17 00:00:00 2001
From: Dmitrii Dolgov <9erthalion6@gmail.com>
Date: Wed, 9 Nov 2022 11:39:32 +0100
Subject: [PATCH 1316/4122] perf data: Add tracepoint fields when converting to
 JSON

When converting recorded data into JSON format, perf data omits probe
variables. Add them to the output in the format "field name": "field value"
using tep_print_field:

    $ perf data convert --to-json output.json

    // output.json
    {
        "linux-perf-json-version": 1,
        "headers": { ... },
        "samples": [
            {
            "timestamp": 29182079082999,
            "pid": 309194,
                    [...]
            "__probe_ip": "0x93ee35",
            "query_string_string": "select 2;",
            "nxids": "0"
            }
        ]
    }

Signed-off-by: Dmitrii Dolgov <9erthalion6@gmail.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20221109103932.65675-1-9erthalion6@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-json.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 613d6ae826633..57db59068cb68 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -217,6 +217,26 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 	output_json_format(out, false, 3, "]");
 
+	if (sample->raw_data) {
+		int i;
+		struct tep_format_field **fields;
+
+		fields = tep_event_fields(evsel->tp_format);
+		if (fields) {
+			i = 0;
+			while (fields[i]) {
+				struct trace_seq s;
+
+				trace_seq_init(&s);
+				tep_print_field(&s, sample->raw_data, fields[i]);
+				output_json_key_string(out, true, 3, fields[i]->name, s.buffer);
+
+				i++;
+			}
+			free(fields);
+		}
+	}
+
 	output_json_format(out, false, 2, "}");
 	return 0;
 }
-- 
GitLab


From cf9f67b36303de65596ae7504a2a7573c08876bb Mon Sep 17 00:00:00 2001
From: Kang Minchul <tegongkang@gmail.com>
Date: Sat, 5 Nov 2022 22:59:32 +0900
Subject: [PATCH 1317/4122] perf print-events: Remove redundant comparison with
 zero

Since variable npmus is unsigned int, comparing with 0 is unnecessary.

Signed-off-by: Kang Minchul <tegongkang@gmail.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221105135932.81612-1-tegongkang@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/print-events.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index c4d5d87fae2f6..6df947df1c0fc 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -311,10 +311,8 @@ restart:
 				if ((hybrid_supported == 0) ||
 				    (hybrid_supported == npmus)) {
 					evt_list[evt_i] = strdup(name);
-					if (npmus > 0) {
-						for (j = 0; j < npmus; j++)
-							zfree(&evt_pmus[j]);
-					}
+					for (j = 0; j < npmus; j++)
+						zfree(&evt_pmus[j]);
 				} else {
 					for (j = 0; j < hybrid_supported; j++) {
 						evt_list[evt_i++] = evt_pmus[j];
-- 
GitLab


From 612a5337ae7a87893f90de7878b20701b2b17d7d Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Thu, 20 Oct 2022 14:45:11 +0100
Subject: [PATCH 1318/4122] perf vendor events: Add Arm Neoverse V2 PMU events

Rename the neoverse-n2 folder to make it clear that it includes V2, and
add V2 to mapfile.csv. V2 has the same events as N2, visible by running
the following command in the ARM-software/data github repo [1]:

  diff pmu/neoverse-v2.json pmu/neoverse-n2.json | grep code

Testing:

  $ perf test pmu

  10: PMU events                                           :
  10.1: PMU event table sanity                             : Ok
  10.2: PMU event map aliases                              : Ok
  10.3: Parsing of PMU event table metrics                 : Ok
  10.4: Parsing of PMU event table metrics with fake PMUs  : Ok

[1]: https://github.com/ARM-software/data

Reviewed-by: Nick Forrington <nick.forrington@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <al.grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221020134512.1345013-1-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/branch.json | 0
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/bus.json    | 0
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/cache.json  | 0
 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/exception.json   | 0
 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/instruction.json | 0
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/memory.json | 0
 .../arm64/arm/{neoverse-n2 => neoverse-n2-v2}/pipeline.json    | 0
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/spe.json    | 0
 .../arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/trace.json  | 0
 tools/perf/pmu-events/arch/arm64/mapfile.csv                   | 3 ++-
 10 files changed, 2 insertions(+), 1 deletion(-)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/branch.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/bus.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/cache.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/exception.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/instruction.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/memory.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/pipeline.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/spe.json (100%)
 rename tools/perf/pmu-events/arch/arm64/arm/{neoverse-n2 => neoverse-n2-v2}/trace.json (100%)

diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json
diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
similarity index 100%
rename from tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json
rename to tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json
diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv
index ad502d00f4607..f134e833c069d 100644
--- a/tools/perf/pmu-events/arch/arm64/mapfile.csv
+++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv
@@ -34,7 +34,8 @@
 0x00000000410fd460,v1,arm/cortex-a510,core
 0x00000000410fd470,v1,arm/cortex-a710,core
 0x00000000410fd480,v1,arm/cortex-x2,core
-0x00000000410fd490,v1,arm/neoverse-n2,core
+0x00000000410fd490,v1,arm/neoverse-n2-v2,core
+0x00000000410fd4f0,v1,arm/neoverse-n2-v2,core
 0x00000000420f5160,v1,cavium/thunderx2,core
 0x00000000430f0af0,v1,cavium/thunderx2,core
 0x00000000460f0010,v1,fujitsu/a64fx,core
-- 
GitLab


From 3bfadb2325891d122771ce534336af531e93d7b2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:04 +0800
Subject: [PATCH 1319/4122] KVM: selftests: memslot_perf_test: Use data->nslots
 in prepare_vm()

In prepare_vm(), 'data->nslots' is assigned with 'max_mem_slots - 1'
at the beginning, meaning they are interchangeable.

Use 'data->nslots' isntead of 'max_mem_slots - 1'. With this, it
becomes easier to move the logic of probing number of slots into
upper layer in subsequent patches.

No functional change intended.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-2-gshan@redhat.com
---
 tools/testing/selftests/kvm/memslot_perf_test.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 44995446d942a..231cc8449c2ef 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -280,14 +280,14 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	ucall_init(data->vm, NULL);
 
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
-		max_mem_slots - 1, data->pages_per_slot, rempages);
+		data->nslots, data->pages_per_slot, rempages);
 
 	clock_gettime(CLOCK_MONOTONIC, &tstart);
-	for (slot = 1, guest_addr = MEM_GPA; slot < max_mem_slots; slot++) {
+	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
 		uint64_t npages;
 
 		npages = data->pages_per_slot;
-		if (slot == max_mem_slots - 1)
+		if (slot == data->nslots)
 			npages += rempages;
 
 		vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
@@ -297,12 +297,12 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	}
 	*slot_runtime = timespec_elapsed(tstart);
 
-	for (slot = 0, guest_addr = MEM_GPA; slot < max_mem_slots - 1; slot++) {
+	for (slot = 0, guest_addr = MEM_GPA; slot < data->nslots; slot++) {
 		uint64_t npages;
 		uint64_t gpa;
 
 		npages = data->pages_per_slot;
-		if (slot == max_mem_slots - 2)
+		if (slot == data->nslots - 1)
 			npages += rempages;
 
 		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
-- 
GitLab


From 2aae5e6795e1407334bb849f96f11c9051b959e2 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:05 +0800
Subject: [PATCH 1320/4122] KVM: selftests: memslot_perf_test: Consolidate loop
 conditions in prepare_vm()

There are two loops in prepare_vm(), which have different conditions.
'slot' is treated as meory slot index in the first loop, but index of
the host virtual address array in the second loop. It makes it a bit
hard to understand the code.

Change the usage of 'slot' in the second loop, to treat it as the
memory slot index either.

No functional change intended.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-3-gshan@redhat.com
---
 tools/testing/selftests/kvm/memslot_perf_test.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 231cc8449c2ef..dcb492b3f27b2 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -297,21 +297,20 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	}
 	*slot_runtime = timespec_elapsed(tstart);
 
-	for (slot = 0, guest_addr = MEM_GPA; slot < data->nslots; slot++) {
+	for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
 		uint64_t npages;
 		uint64_t gpa;
 
 		npages = data->pages_per_slot;
-		if (slot == data->nslots - 1)
+		if (slot == data->nslots)
 			npages += rempages;
 
-		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr,
-					 slot + 1);
+		gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
 		TEST_ASSERT(gpa == guest_addr,
 			    "vm_phy_pages_alloc() failed\n");
 
-		data->hva_slots[slot] = addr_gpa2hva(data->vm, guest_addr);
-		memset(data->hva_slots[slot], 0, npages * 4096);
+		data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
+		memset(data->hva_slots[slot - 1], 0, npages * 4096);
 
 		guest_addr += npages * 4096;
 	}
-- 
GitLab


From 34396437b11f904fc61b272e3974f4c92868451b Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:06 +0800
Subject: [PATCH 1321/4122] KVM: selftests: memslot_perf_test: Probe memory
 slots for once

prepare_vm() is called in every iteration and run. The allowed memory
slots (KVM_CAP_NR_MEMSLOTS) are probed for multiple times. It's not
free and unnecessary.

Move the probing logic for the allowed memory slots to parse_args()
for once, which is upper layer of prepare_vm().

No functional change intended.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-4-gshan@redhat.com
---
 .../testing/selftests/kvm/memslot_perf_test.c | 32 +++++++++++--------
 1 file changed, 19 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index dcb492b3f27b2..f0ea3f75b6e1d 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -245,27 +245,17 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 		       void *guest_code, uint64_t mempages,
 		       struct timespec *slot_runtime)
 {
-	uint32_t max_mem_slots;
 	uint64_t rempages;
 	uint64_t guest_addr;
 	uint32_t slot;
 	struct timespec tstart;
 	struct sync_area *sync;
 
-	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
-	TEST_ASSERT(max_mem_slots > 1,
-		    "KVM_CAP_NR_MEMSLOTS should be greater than 1");
-	TEST_ASSERT(nslots > 1 || nslots == -1,
-		    "Slot count cap should be greater than 1");
-	if (nslots != -1)
-		max_mem_slots = min(max_mem_slots, (uint32_t)nslots);
-	pr_info_v("Allowed number of memory slots: %"PRIu32"\n", max_mem_slots);
-
 	TEST_ASSERT(mempages > 1,
 		    "Can't test without any memory");
 
 	data->npages = mempages;
-	data->nslots = max_mem_slots - 1;
+	data->nslots = nslots;
 	data->pages_per_slot = mempages / data->nslots;
 	if (!data->pages_per_slot) {
 		*maxslots = mempages + 1;
@@ -869,6 +859,7 @@ static void help(char *name, struct test_args *targs)
 static bool parse_args(int argc, char *argv[],
 		       struct test_args *targs)
 {
+	uint32_t max_mem_slots;
 	int opt;
 
 	while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
@@ -885,8 +876,8 @@ static bool parse_args(int argc, char *argv[],
 			break;
 		case 's':
 			targs->nslots = atoi(optarg);
-			if (targs->nslots <= 0 && targs->nslots != -1) {
-				pr_info("Slot count cap has to be positive or -1 for no cap\n");
+			if (targs->nslots <= 1 && targs->nslots != -1) {
+				pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
 				return false;
 			}
 			break;
@@ -932,6 +923,21 @@ static bool parse_args(int argc, char *argv[],
 		return false;
 	}
 
+	max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+	if (max_mem_slots <= 1) {
+		pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
+		return false;
+	}
+
+	/* Memory slot 0 is reserved */
+	if (targs->nslots == -1)
+		targs->nslots = max_mem_slots - 1;
+	else
+		targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
+
+	pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
+		  targs->nslots + 1);
+
 	return true;
 }
 
-- 
GitLab


From 8675c6f226986ddb67752be22279a0e2385b197e Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:07 +0800
Subject: [PATCH 1322/4122] KVM: selftests: memslot_perf_test: Support variable
 guest page size

The test case is obviously broken on aarch64 because non-4KB guest
page size is supported. The guest page size on aarch64 could be 4KB,
16KB or 64KB.

This supports variable guest page size, mostly for aarch64.

  - The host determines the guest page size when virtual machine is
    created. The value is also passed to guest through the synchronization
    area.

  - The number of guest pages are unknown until the virtual machine
    is to be created. So all the related macros are dropped. Instead,
    their values are dynamically calculated based on the guest page
    size.

  - The static checks on memory sizes and pages becomes dependent
    on guest page size, which is unknown until the virtual machine
    is about to be created. So all the static checks are converted
    to dynamic checks, done in check_memory_sizes().

  - As the address passed to madvise() should be aligned to host page,
    the size of page chunk is automatically selected, other than one
    page.

  - MEM_TEST_MOVE_SIZE has fixed and non-working 64KB. It will be
    consolidated in next patch. However, the comments about how
    it's calculated has been correct.

  - All other changes included in this patch are almost mechanical
    replacing '4096' with 'guest_page_size'.

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-5-gshan@redhat.com
---
 .../testing/selftests/kvm/memslot_perf_test.c | 210 +++++++++++-------
 1 file changed, 129 insertions(+), 81 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index f0ea3f75b6e1d..9af61ca8ad0a0 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -26,14 +26,11 @@
 #include <processor.h>
 
 #define MEM_SIZE		((512U << 20) + 4096)
-#define MEM_SIZE_PAGES		(MEM_SIZE / 4096)
 #define MEM_GPA		0x10000000UL
 #define MEM_AUX_GPA		MEM_GPA
 #define MEM_SYNC_GPA		MEM_AUX_GPA
 #define MEM_TEST_GPA		(MEM_AUX_GPA + 4096)
 #define MEM_TEST_SIZE		(MEM_SIZE - 4096)
-static_assert(MEM_SIZE % 4096 == 0, "invalid mem size");
-static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
 
 /*
  * 32 MiB is max size that gets well over 100 iterations on 509 slots.
@@ -42,43 +39,37 @@ static_assert(MEM_TEST_SIZE % 4096 == 0, "invalid mem test size");
  * limited resolution).
  */
 #define MEM_SIZE_MAP		((32U << 20) + 4096)
-#define MEM_SIZE_MAP_PAGES	(MEM_SIZE_MAP / 4096)
 #define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - 4096)
-#define MEM_TEST_MAP_SIZE_PAGES (MEM_TEST_MAP_SIZE / 4096)
-static_assert(MEM_SIZE_MAP % 4096 == 0, "invalid map test region size");
-static_assert(MEM_TEST_MAP_SIZE % 4096 == 0, "invalid map test region size");
-static_assert(MEM_TEST_MAP_SIZE_PAGES % 2 == 0, "invalid map test region size");
-static_assert(MEM_TEST_MAP_SIZE_PAGES > 2, "invalid map test region size");
 
 /*
  * 128 MiB is min size that fills 32k slots with at least one page in each
  * while at the same time gets 100+ iterations in such test
+ *
+ * 2 MiB chunk size like a typical huge page
  */
 #define MEM_TEST_UNMAP_SIZE		(128U << 20)
-#define MEM_TEST_UNMAP_SIZE_PAGES	(MEM_TEST_UNMAP_SIZE / 4096)
-/* 2 MiB chunk size like a typical huge page */
-#define MEM_TEST_UNMAP_CHUNK_PAGES	(2U << (20 - 12))
-static_assert(MEM_TEST_UNMAP_SIZE <= MEM_TEST_SIZE,
-	      "invalid unmap test region size");
-static_assert(MEM_TEST_UNMAP_SIZE % 4096 == 0,
-	      "invalid unmap test region size");
-static_assert(MEM_TEST_UNMAP_SIZE_PAGES %
-	      (2 * MEM_TEST_UNMAP_CHUNK_PAGES) == 0,
-	      "invalid unmap test region size");
+#define MEM_TEST_UNMAP_CHUNK_SIZE	(2U << 20)
 
 /*
  * For the move active test the middle of the test area is placed on
  * a memslot boundary: half lies in the memslot being moved, half in
  * other memslot(s).
  *
- * When running this test with 32k memslots (32764, really) each memslot
- * contains 4 pages.
- * The last one additionally contains the remaining 21 pages of memory,
- * for the total size of 25 pages.
- * Hence, the maximum size here is 50 pages.
+ * We have different number of memory slots, excluding the reserved
+ * memory slot 0, on various architectures and configurations. The
+ * memory size in this test is calculated by picking the maximal
+ * last memory slot's memory size, with alignment to the largest
+ * supported page size (64KB). In this way, the selected memory
+ * size for this test is compatible with test_memslot_move_prepare().
+ *
+ * architecture   slots    memory-per-slot    memory-on-last-slot
+ * --------------------------------------------------------------
+ * x86-4KB        32763    16KB               100KB
+ * arm64-4KB      32766    16KB               52KB
+ * arm64-16KB     32766    16KB               48KB
+ * arm64-64KB     8192     64KB               64KB
  */
-#define MEM_TEST_MOVE_SIZE_PAGES	(50)
-#define MEM_TEST_MOVE_SIZE		(MEM_TEST_MOVE_SIZE_PAGES * 4096)
+#define MEM_TEST_MOVE_SIZE		0x10000
 #define MEM_TEST_MOVE_GPA_DEST		(MEM_GPA + MEM_SIZE)
 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
 	      "invalid move test region size");
@@ -100,6 +91,7 @@ struct vm_data {
 };
 
 struct sync_area {
+	uint32_t    guest_page_size;
 	atomic_bool start_flag;
 	atomic_bool exit_flag;
 	atomic_bool sync_flag;
@@ -192,14 +184,15 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
 	uint64_t gpage, pgoffs;
 	uint32_t slot, slotoffs;
 	void *base;
+	uint32_t guest_page_size = data->vm->page_size;
 
 	TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
-	TEST_ASSERT(gpa < MEM_GPA + data->npages * 4096,
+	TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
 		    "Too high gpa to translate");
 	gpa -= MEM_GPA;
 
-	gpage = gpa / 4096;
-	pgoffs = gpa % 4096;
+	gpage = gpa / guest_page_size;
+	pgoffs = gpa % guest_page_size;
 	slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
 	slotoffs = gpage - (slot * data->pages_per_slot);
 
@@ -217,14 +210,16 @@ static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
 	}
 
 	base = data->hva_slots[slot];
-	return (uint8_t *)base + slotoffs * 4096 + pgoffs;
+	return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
 }
 
 static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
 {
+	uint32_t guest_page_size = data->vm->page_size;
+
 	TEST_ASSERT(slot < data->nslots, "Too high slot number");
 
-	return MEM_GPA + slot * data->pages_per_slot * 4096;
+	return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
 }
 
 static struct vm_data *alloc_vm(void)
@@ -242,33 +237,35 @@ static struct vm_data *alloc_vm(void)
 }
 
 static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
-		       void *guest_code, uint64_t mempages,
+		       void *guest_code, uint64_t mem_size,
 		       struct timespec *slot_runtime)
 {
-	uint64_t rempages;
+	uint64_t mempages, rempages;
 	uint64_t guest_addr;
-	uint32_t slot;
+	uint32_t slot, guest_page_size;
 	struct timespec tstart;
 	struct sync_area *sync;
 
-	TEST_ASSERT(mempages > 1,
-		    "Can't test without any memory");
+	guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+	mempages = mem_size / guest_page_size;
+
+	data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
+	ucall_init(data->vm, NULL);
+	TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
 
 	data->npages = mempages;
+	TEST_ASSERT(data->npages > 1, "Can't test without any memory");
 	data->nslots = nslots;
-	data->pages_per_slot = mempages / data->nslots;
+	data->pages_per_slot = data->npages / data->nslots;
 	if (!data->pages_per_slot) {
-		*maxslots = mempages + 1;
+		*maxslots = data->npages + 1;
 		return false;
 	}
 
-	rempages = mempages % data->nslots;
+	rempages = data->npages % data->nslots;
 	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
-	data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
-	ucall_init(data->vm, NULL);
-
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 		data->nslots, data->pages_per_slot, rempages);
 
@@ -283,7 +280,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 		vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
 					    guest_addr, slot, npages,
 					    0);
-		guest_addr += npages * 4096;
+		guest_addr += npages * guest_page_size;
 	}
 	*slot_runtime = timespec_elapsed(tstart);
 
@@ -300,12 +297,12 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 			    "vm_phy_pages_alloc() failed\n");
 
 		data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
-		memset(data->hva_slots[slot - 1], 0, npages * 4096);
+		memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
 
-		guest_addr += npages * 4096;
+		guest_addr += npages * guest_page_size;
 	}
 
-	virt_map(data->vm, MEM_GPA, MEM_GPA, mempages);
+	virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
 
 	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 	atomic_init(&sync->start_flag, false);
@@ -404,6 +401,7 @@ static bool guest_perform_sync(void)
 static void guest_code_test_memslot_move(void)
 {
 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
 	uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
 
 	GUEST_SYNC(0);
@@ -414,7 +412,7 @@ static void guest_code_test_memslot_move(void)
 		uintptr_t ptr;
 
 		for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
-		     ptr += 4096)
+		     ptr += page_size)
 			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 
 		/*
@@ -432,6 +430,7 @@ static void guest_code_test_memslot_move(void)
 static void guest_code_test_memslot_map(void)
 {
 	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
 
 	GUEST_SYNC(0);
 
@@ -441,14 +440,16 @@ static void guest_code_test_memslot_map(void)
 		uintptr_t ptr;
 
 		for (ptr = MEM_TEST_GPA;
-		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2; ptr += 4096)
+		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+		     ptr += page_size)
 			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 
 		if (!guest_perform_sync())
 			break;
 
 		for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
-		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE; ptr += 4096)
+		     ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
+		     ptr += page_size)
 			*(uint64_t *)ptr = MEM_TEST_VAL_2;
 
 		if (!guest_perform_sync())
@@ -495,6 +496,9 @@ static void guest_code_test_memslot_unmap(void)
 
 static void guest_code_test_memslot_rw(void)
 {
+	struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+	uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+
 	GUEST_SYNC(0);
 
 	guest_spin_until_start();
@@ -503,14 +507,14 @@ static void guest_code_test_memslot_rw(void)
 		uintptr_t ptr;
 
 		for (ptr = MEM_TEST_GPA;
-		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096)
+		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
 			*(uint64_t *)ptr = MEM_TEST_VAL_1;
 
 		if (!guest_perform_sync())
 			break;
 
-		for (ptr = MEM_TEST_GPA + 4096 / 2;
-		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += 4096) {
+		for (ptr = MEM_TEST_GPA + page_size / 2;
+		     ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
 			uint64_t val = *(uint64_t *)ptr;
 
 			GUEST_ASSERT_1(val == MEM_TEST_VAL_2, val);
@@ -528,6 +532,8 @@ static bool test_memslot_move_prepare(struct vm_data *data,
 				      struct sync_area *sync,
 				      uint64_t *maxslots, bool isactive)
 {
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t move_pages = MEM_TEST_MOVE_SIZE / guest_page_size;
 	uint64_t movesrcgpa, movetestgpa;
 
 	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
@@ -536,7 +542,7 @@ static bool test_memslot_move_prepare(struct vm_data *data,
 		uint64_t lastpages;
 
 		vm_gpa2hva(data, movesrcgpa, &lastpages);
-		if (lastpages < MEM_TEST_MOVE_SIZE_PAGES / 2) {
+		if (lastpages < move_pages / 2) {
 			*maxslots = 0;
 			return false;
 		}
@@ -582,8 +588,9 @@ static void test_memslot_do_unmap(struct vm_data *data,
 				  uint64_t offsp, uint64_t count)
 {
 	uint64_t gpa, ctr;
+	uint32_t guest_page_size = data->vm->page_size;
 
-	for (gpa = MEM_TEST_GPA + offsp * 4096, ctr = 0; ctr < count; ) {
+	for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
 		uint64_t npages;
 		void *hva;
 		int ret;
@@ -591,12 +598,12 @@ static void test_memslot_do_unmap(struct vm_data *data,
 		hva = vm_gpa2hva(data, gpa, &npages);
 		TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
 		npages = min(npages, count - ctr);
-		ret = madvise(hva, npages * 4096, MADV_DONTNEED);
+		ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
 		TEST_ASSERT(!ret,
 			    "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
 			    hva, gpa);
 		ctr += npages;
-		gpa += npages * 4096;
+		gpa += npages * guest_page_size;
 	}
 	TEST_ASSERT(ctr == count,
 		    "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
@@ -607,11 +614,12 @@ static void test_memslot_map_unmap_check(struct vm_data *data,
 {
 	uint64_t gpa;
 	uint64_t *val;
+	uint32_t guest_page_size = data->vm->page_size;
 
 	if (!map_unmap_verify)
 		return;
 
-	gpa = MEM_TEST_GPA + offsp * 4096;
+	gpa = MEM_TEST_GPA + offsp * guest_page_size;
 	val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
 	TEST_ASSERT(*val == valexp,
 		    "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
@@ -621,12 +629,14 @@ static void test_memslot_map_unmap_check(struct vm_data *data,
 
 static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 {
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
+
 	/*
 	 * Unmap the second half of the test area while guest writes to (maps)
 	 * the first half.
 	 */
-	test_memslot_do_unmap(data, MEM_TEST_MAP_SIZE_PAGES / 2,
-			      MEM_TEST_MAP_SIZE_PAGES / 2);
+	test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
 
 	/*
 	 * Wait for the guest to finish writing the first half of the test
@@ -637,10 +647,8 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 	 */
 	host_perform_sync(sync);
 	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
-	test_memslot_map_unmap_check(data,
-				     MEM_TEST_MAP_SIZE_PAGES / 2 - 1,
-				     MEM_TEST_VAL_1);
-	test_memslot_do_unmap(data, 0, MEM_TEST_MAP_SIZE_PAGES / 2);
+	test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
+	test_memslot_do_unmap(data, 0, guest_pages / 2);
 
 
 	/*
@@ -653,16 +661,16 @@ static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
 	 * the test area.
 	 */
 	host_perform_sync(sync);
-	test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES / 2,
-				     MEM_TEST_VAL_2);
-	test_memslot_map_unmap_check(data, MEM_TEST_MAP_SIZE_PAGES - 1,
-				     MEM_TEST_VAL_2);
+	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+	test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
 }
 
 static void test_memslot_unmap_loop_common(struct vm_data *data,
 					   struct sync_area *sync,
 					   uint64_t chunk)
 {
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
 	uint64_t ctr;
 
 	/*
@@ -674,42 +682,49 @@ static void test_memslot_unmap_loop_common(struct vm_data *data,
 	 */
 	host_perform_sync(sync);
 	test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
-	for (ctr = 0; ctr < MEM_TEST_UNMAP_SIZE_PAGES / 2; ctr += chunk)
+	for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
 		test_memslot_do_unmap(data, ctr, chunk);
 
 	/* Likewise, but for the opposite host / guest areas */
 	host_perform_sync(sync);
-	test_memslot_map_unmap_check(data, MEM_TEST_UNMAP_SIZE_PAGES / 2,
-				     MEM_TEST_VAL_2);
-	for (ctr = MEM_TEST_UNMAP_SIZE_PAGES / 2;
-	     ctr < MEM_TEST_UNMAP_SIZE_PAGES; ctr += chunk)
+	test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+	for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
 		test_memslot_do_unmap(data, ctr, chunk);
 }
 
 static void test_memslot_unmap_loop(struct vm_data *data,
 				    struct sync_area *sync)
 {
-	test_memslot_unmap_loop_common(data, sync, 1);
+	uint32_t host_page_size = getpagesize();
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
+					1 : host_page_size / guest_page_size;
+
+	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
 }
 
 static void test_memslot_unmap_loop_chunked(struct vm_data *data,
 					    struct sync_area *sync)
 {
-	test_memslot_unmap_loop_common(data, sync, MEM_TEST_UNMAP_CHUNK_PAGES);
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
+
+	test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
 }
 
 static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
 {
 	uint64_t gptr;
+	uint32_t guest_page_size = data->vm->page_size;
 
-	for (gptr = MEM_TEST_GPA + 4096 / 2;
-	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096)
+	for (gptr = MEM_TEST_GPA + guest_page_size / 2;
+	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
 		*(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
 
 	host_perform_sync(sync);
 
 	for (gptr = MEM_TEST_GPA;
-	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += 4096) {
+	     gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
 		uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
 		uint64_t val = *vptr;
 
@@ -738,7 +753,7 @@ static bool test_execute(int nslots, uint64_t *maxslots,
 			 struct timespec *slot_runtime,
 			 struct timespec *guest_runtime)
 {
-	uint64_t mem_size = tdata->mem_size ? : MEM_SIZE_PAGES;
+	uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
 	struct vm_data *data;
 	struct sync_area *sync;
 	struct timespec tstart;
@@ -753,6 +768,7 @@ static bool test_execute(int nslots, uint64_t *maxslots,
 
 	sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
 
+	sync->guest_page_size = data->vm->page_size;
 	if (tdata->prepare &&
 	    !tdata->prepare(data, sync, maxslots)) {
 		ret = false;
@@ -786,19 +802,19 @@ exit_free:
 static const struct test_data tests[] = {
 	{
 		.name = "map",
-		.mem_size = MEM_SIZE_MAP_PAGES,
+		.mem_size = MEM_SIZE_MAP,
 		.guest_code = guest_code_test_memslot_map,
 		.loop = test_memslot_map_loop,
 	},
 	{
 		.name = "unmap",
-		.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+		.mem_size = MEM_TEST_UNMAP_SIZE + 4096,
 		.guest_code = guest_code_test_memslot_unmap,
 		.loop = test_memslot_unmap_loop,
 	},
 	{
 		.name = "unmap chunked",
-		.mem_size = MEM_TEST_UNMAP_SIZE_PAGES + 1,
+		.mem_size = MEM_TEST_UNMAP_SIZE + 4096,
 		.guest_code = guest_code_test_memslot_unmap,
 		.loop = test_memslot_unmap_loop_chunked,
 	},
@@ -856,6 +872,35 @@ static void help(char *name, struct test_args *targs)
 		pr_info("%d: %s\n", ctr, tests[ctr].name);
 }
 
+static bool check_memory_sizes(void)
+{
+	uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+
+	if (MEM_SIZE % guest_page_size ||
+	    MEM_TEST_SIZE % guest_page_size) {
+		pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
+		return false;
+	}
+
+	if (MEM_SIZE_MAP % guest_page_size		||
+	    MEM_TEST_MAP_SIZE % guest_page_size		||
+	    (MEM_TEST_MAP_SIZE / guest_page_size) <= 2	||
+	    (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
+		pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
+		return false;
+	}
+
+	if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE		||
+	    MEM_TEST_UNMAP_SIZE % guest_page_size	||
+	    (MEM_TEST_UNMAP_SIZE / guest_page_size) %
+	    (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
+		pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
+		return false;
+	}
+
+	return true;
+}
+
 static bool parse_args(int argc, char *argv[],
 		       struct test_args *targs)
 {
@@ -1015,6 +1060,9 @@ int main(int argc, char *argv[])
 	/* Tell stdout not to buffer its content */
 	setbuf(stdout, NULL);
 
+	if (!check_memory_sizes())
+		return -1;
+
 	if (!parse_args(argc, argv, &targs))
 		return -1;
 
-- 
GitLab


From 88a64e65484ef6b5cb09fe545d0dd00c950a1131 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:08 +0800
Subject: [PATCH 1323/4122] KVM: selftests: memslot_perf_test: Consolidate
 memory

The addresses and sizes passed to vm_userspace_mem_region_add() and
madvise() should be aligned to host page size, which can be 64KB on
aarch64. So it's wrong by passing additional fixed 4KB memory area
to various tests.

Fix it by passing additional fixed 64KB memory area to various tests.
We also add checks to ensure that none of host/guest page size exceeds
64KB. MEM_TEST_MOVE_SIZE is fixed up to 192KB either.

With this, the following command works fine on 64KB-page-size-host and
4KB-page-size-guest.

  # ./memslot_perf_test -v -s 512

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-6-gshan@redhat.com
---
 .../testing/selftests/kvm/memslot_perf_test.c | 43 +++++++++++--------
 1 file changed, 26 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 9af61ca8ad0a0..daebc264de5ad 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -20,17 +20,20 @@
 #include <unistd.h>
 
 #include <linux/compiler.h>
+#include <linux/sizes.h>
 
 #include <test_util.h>
 #include <kvm_util.h>
 #include <processor.h>
 
-#define MEM_SIZE		((512U << 20) + 4096)
-#define MEM_GPA		0x10000000UL
+#define MEM_EXTRA_SIZE		SZ_64K
+
+#define MEM_SIZE		(SZ_512M + MEM_EXTRA_SIZE)
+#define MEM_GPA			SZ_256M
 #define MEM_AUX_GPA		MEM_GPA
 #define MEM_SYNC_GPA		MEM_AUX_GPA
-#define MEM_TEST_GPA		(MEM_AUX_GPA + 4096)
-#define MEM_TEST_SIZE		(MEM_SIZE - 4096)
+#define MEM_TEST_GPA		(MEM_AUX_GPA + MEM_EXTRA_SIZE)
+#define MEM_TEST_SIZE		(MEM_SIZE - MEM_EXTRA_SIZE)
 
 /*
  * 32 MiB is max size that gets well over 100 iterations on 509 slots.
@@ -38,8 +41,8 @@
  * 8194 slots in use can then be tested (although with slightly
  * limited resolution).
  */
-#define MEM_SIZE_MAP		((32U << 20) + 4096)
-#define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - 4096)
+#define MEM_SIZE_MAP		(SZ_32M + MEM_EXTRA_SIZE)
+#define MEM_TEST_MAP_SIZE	(MEM_SIZE_MAP - MEM_EXTRA_SIZE)
 
 /*
  * 128 MiB is min size that fills 32k slots with at least one page in each
@@ -47,8 +50,8 @@
  *
  * 2 MiB chunk size like a typical huge page
  */
-#define MEM_TEST_UNMAP_SIZE		(128U << 20)
-#define MEM_TEST_UNMAP_CHUNK_SIZE	(2U << 20)
+#define MEM_TEST_UNMAP_SIZE		SZ_128M
+#define MEM_TEST_UNMAP_CHUNK_SIZE	SZ_2M
 
 /*
  * For the move active test the middle of the test area is placed on
@@ -64,12 +67,12 @@
  *
  * architecture   slots    memory-per-slot    memory-on-last-slot
  * --------------------------------------------------------------
- * x86-4KB        32763    16KB               100KB
- * arm64-4KB      32766    16KB               52KB
- * arm64-16KB     32766    16KB               48KB
- * arm64-64KB     8192     64KB               64KB
+ * x86-4KB        32763    16KB               160KB
+ * arm64-4KB      32766    16KB               112KB
+ * arm64-16KB     32766    16KB               112KB
+ * arm64-64KB     8192     64KB               128KB
  */
-#define MEM_TEST_MOVE_SIZE		0x10000
+#define MEM_TEST_MOVE_SIZE		(3 * SZ_64K)
 #define MEM_TEST_MOVE_GPA_DEST		(MEM_GPA + MEM_SIZE)
 static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
 	      "invalid move test region size");
@@ -533,7 +536,6 @@ static bool test_memslot_move_prepare(struct vm_data *data,
 				      uint64_t *maxslots, bool isactive)
 {
 	uint32_t guest_page_size = data->vm->page_size;
-	uint64_t move_pages = MEM_TEST_MOVE_SIZE / guest_page_size;
 	uint64_t movesrcgpa, movetestgpa;
 
 	movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
@@ -542,7 +544,7 @@ static bool test_memslot_move_prepare(struct vm_data *data,
 		uint64_t lastpages;
 
 		vm_gpa2hva(data, movesrcgpa, &lastpages);
-		if (lastpages < move_pages / 2) {
+		if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
 			*maxslots = 0;
 			return false;
 		}
@@ -808,13 +810,13 @@ static const struct test_data tests[] = {
 	},
 	{
 		.name = "unmap",
-		.mem_size = MEM_TEST_UNMAP_SIZE + 4096,
+		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
 		.guest_code = guest_code_test_memslot_unmap,
 		.loop = test_memslot_unmap_loop,
 	},
 	{
 		.name = "unmap chunked",
-		.mem_size = MEM_TEST_UNMAP_SIZE + 4096,
+		.mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
 		.guest_code = guest_code_test_memslot_unmap,
 		.loop = test_memslot_unmap_loop_chunked,
 	},
@@ -874,8 +876,15 @@ static void help(char *name, struct test_args *targs)
 
 static bool check_memory_sizes(void)
 {
+	uint32_t host_page_size = getpagesize();
 	uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
 
+	if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
+		pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
+			host_page_size, guest_page_size);
+		return false;
+	}
+
 	if (MEM_SIZE % guest_page_size ||
 	    MEM_TEST_SIZE % guest_page_size) {
 		pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
-- 
GitLab


From a69170c65acdf430e24fc1b6174dcc3aa501fe2f Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Thu, 20 Oct 2022 15:12:09 +0800
Subject: [PATCH 1324/4122] KVM: selftests: memslot_perf_test: Report optimal
 memory slots

The memory area in each slot should be aligned to host page size.
Otherwise, the test will fail. For example, the following command
fails with the following messages with 64KB-page-size-host and
4KB-pae-size-guest. It's not user friendly to abort the test.
Lets do something to report the optimal memory slots, instead of
failing the test.

  # ./memslot_perf_test -v -s 1000
  Number of memory slots: 999
  Testing map performance with 1 runs, 5 seconds each
  Adding slots 1..999, each slot with 8 pages + 216 extra pages last
  ==== Test Assertion Failure ====
    lib/kvm_util.c:824: vm_adjust_num_guest_pages(vm->mode, npages) == npages
    pid=19872 tid=19872 errno=0 - Success
       1  0x00000000004065b3: vm_userspace_mem_region_add at kvm_util.c:822
       2  0x0000000000401d6b: prepare_vm at memslot_perf_test.c:273
       3  (inlined by) test_execute at memslot_perf_test.c:756
       4  (inlined by) test_loop at memslot_perf_test.c:994
       5  (inlined by) main at memslot_perf_test.c:1073
       6  0x0000ffff7ebb4383: ?? ??:0
       7  0x00000000004021ff: _start at :?
    Number of guest pages is not compatible with the host. Try npages=16

Report the optimal memory slots instead of failing the test when
the memory area in each slot isn't aligned to host page size. With
this applied, the optimal memory slots is reported.

  # ./memslot_perf_test -v -s 1000
  Number of memory slots: 999
  Testing map performance with 1 runs, 5 seconds each
  Memslot count too high for this test, decrease the cap (max is 514)

Signed-off-by: Gavin Shan <gshan@redhat.com>
Reviewed-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020071209.559062-7-gshan@redhat.com
---
 .../testing/selftests/kvm/memslot_perf_test.c | 45 +++++++++++++++++--
 1 file changed, 41 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index daebc264de5ad..2ad40f7c9c08e 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -239,16 +239,52 @@ static struct vm_data *alloc_vm(void)
 	return data;
 }
 
+static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
+			     uint64_t pages_per_slot, uint64_t rempages)
+{
+	if (!pages_per_slot)
+		return false;
+
+	if ((pages_per_slot * guest_page_size) % host_page_size)
+		return false;
+
+	if ((rempages * guest_page_size) % host_page_size)
+		return false;
+
+	return true;
+}
+
+
+static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
+{
+	uint32_t guest_page_size = data->vm->page_size;
+	uint64_t mempages, pages_per_slot, rempages;
+	uint64_t slots;
+
+	mempages = data->npages;
+	slots = data->nslots;
+	while (--slots > 1) {
+		pages_per_slot = mempages / slots;
+		rempages = mempages % pages_per_slot;
+		if (check_slot_pages(host_page_size, guest_page_size,
+				     pages_per_slot, rempages))
+			return slots + 1;	/* slot 0 is reserved */
+	}
+
+	return 0;
+}
+
 static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 		       void *guest_code, uint64_t mem_size,
 		       struct timespec *slot_runtime)
 {
 	uint64_t mempages, rempages;
 	uint64_t guest_addr;
-	uint32_t slot, guest_page_size;
+	uint32_t slot, host_page_size, guest_page_size;
 	struct timespec tstart;
 	struct sync_area *sync;
 
+	host_page_size = getpagesize();
 	guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
 	mempages = mem_size / guest_page_size;
 
@@ -260,12 +296,13 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	TEST_ASSERT(data->npages > 1, "Can't test without any memory");
 	data->nslots = nslots;
 	data->pages_per_slot = data->npages / data->nslots;
-	if (!data->pages_per_slot) {
-		*maxslots = data->npages + 1;
+	rempages = data->npages % data->nslots;
+	if (!check_slot_pages(host_page_size, guest_page_size,
+			      data->pages_per_slot, rempages)) {
+		*maxslots = get_max_slots(data, host_page_size);
 		return false;
 	}
 
-	rempages = data->npages % data->nslots;
 	data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
-- 
GitLab


From 1a6182033f2d5c481aec1f8c1c26ebc649693d57 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:54 -0700
Subject: [PATCH 1325/4122] KVM: arm64: selftests: Use FIELD_GET() to extract
 ID register fields

Use FIELD_GET() macro to extract ID register fields for existing
aarch64 selftests code. No functional change intended.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-2-reijiw@google.com
---
 tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c  | 3 ++-
 tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 3 ++-
 tools/testing/selftests/kvm/lib/aarch64/processor.c    | 7 ++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
index 6f9c1f19c7f64..b6a5e8861b354 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -13,6 +13,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 #include "test_util.h"
+#include <linux/bitfield.h>
 
 #define BAD_ID_REG_VAL	0x1badc0deul
 
@@ -145,7 +146,7 @@ static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
 
 	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
 
-	el0 = (val & ARM64_FEATURE_MASK(ID_AA64PFR0_EL0)) >> ID_AA64PFR0_EL0_SHIFT;
+	el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), val);
 	return el0 == ID_AA64PFR0_ELx_64BIT_ONLY;
 }
 
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 947bd201435ce..3808d3d750558 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -2,6 +2,7 @@
 #include <test_util.h>
 #include <kvm_util.h>
 #include <processor.h>
+#include <linux/bitfield.h>
 
 #define MDSCR_KDE	(1 << 13)
 #define MDSCR_MDE	(1 << 15)
@@ -284,7 +285,7 @@ static int debug_version(struct kvm_vcpu *vcpu)
 	uint64_t id_aa64dfr0;
 
 	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
-	return id_aa64dfr0 & 0xf;
+	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
 }
 
 static void test_guest_debug_exceptions(void)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 6f5551368944e..7c96b931edd5c 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -11,6 +11,7 @@
 #include "guest_modes.h"
 #include "kvm_util.h"
 #include "processor.h"
+#include <linux/bitfield.h>
 
 #define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN	0xac0000
 
@@ -486,9 +487,9 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
 	err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
 	TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
 
-	*ps4k = ((val >> 28) & 0xf) != 0xf;
-	*ps64k = ((val >> 24) & 0xf) == 0;
-	*ps16k = ((val >> 20) & 0xf) != 0;
+	*ps4k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN4), val) != 0xf;
+	*ps64k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN64), val) == 0;
+	*ps16k = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_TGRAN16), val) != 0;
 
 	close(vcpu_fd);
 	close(vm_fd);
-- 
GitLab


From f6d02aa28ae21161d64300bac62b2dde85584004 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:55 -0700
Subject: [PATCH 1326/4122] KVM: arm64: selftests: Add write_dbg{b,w}{c,v}r
 helpers in debug-exceptions

Introduce helpers in the debug-exceptions test to write to
dbg{b,w}{c,v}r registers. Those helpers will be useful for
test cases that will be added to the test in subsequent patches.

No functional change intended.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-3-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 72 +++++++++++++++++--
 1 file changed, 68 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 3808d3d750558..d9884907fe87b 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -30,6 +30,69 @@ static volatile uint64_t svc_addr;
 static volatile uint64_t ss_addr[4], ss_idx;
 #define  PC(v)  ((uint64_t)&(v))
 
+#define GEN_DEBUG_WRITE_REG(reg_name)			\
+static void write_##reg_name(int num, uint64_t val)	\
+{							\
+	switch (num) {					\
+	case 0:						\
+		write_sysreg(val, reg_name##0_el1);	\
+		break;					\
+	case 1:						\
+		write_sysreg(val, reg_name##1_el1);	\
+		break;					\
+	case 2:						\
+		write_sysreg(val, reg_name##2_el1);	\
+		break;					\
+	case 3:						\
+		write_sysreg(val, reg_name##3_el1);	\
+		break;					\
+	case 4:						\
+		write_sysreg(val, reg_name##4_el1);	\
+		break;					\
+	case 5:						\
+		write_sysreg(val, reg_name##5_el1);	\
+		break;					\
+	case 6:						\
+		write_sysreg(val, reg_name##6_el1);	\
+		break;					\
+	case 7:						\
+		write_sysreg(val, reg_name##7_el1);	\
+		break;					\
+	case 8:						\
+		write_sysreg(val, reg_name##8_el1);	\
+		break;					\
+	case 9:						\
+		write_sysreg(val, reg_name##9_el1);	\
+		break;					\
+	case 10:					\
+		write_sysreg(val, reg_name##10_el1);	\
+		break;					\
+	case 11:					\
+		write_sysreg(val, reg_name##11_el1);	\
+		break;					\
+	case 12:					\
+		write_sysreg(val, reg_name##12_el1);	\
+		break;					\
+	case 13:					\
+		write_sysreg(val, reg_name##13_el1);	\
+		break;					\
+	case 14:					\
+		write_sysreg(val, reg_name##14_el1);	\
+		break;					\
+	case 15:					\
+		write_sysreg(val, reg_name##15_el1);	\
+		break;					\
+	default:					\
+		GUEST_ASSERT(0);			\
+	}						\
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
 static void reset_debug_state(void)
 {
 	asm volatile("msr daifset, #8");
@@ -61,8 +124,9 @@ static void install_wp(uint64_t addr)
 	uint32_t mdscr;
 
 	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
-	write_sysreg(wcr, dbgwcr0_el1);
-	write_sysreg(addr, dbgwvr0_el1);
+	write_dbgwcr(0, wcr);
+	write_dbgwvr(0, addr);
+
 	isb();
 
 	asm volatile("msr daifclr, #8");
@@ -78,8 +142,8 @@ static void install_hw_bp(uint64_t addr)
 	uint32_t mdscr;
 
 	bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
-	write_sysreg(bcr, dbgbcr0_el1);
-	write_sysreg(addr, dbgbvr0_el1);
+	write_dbgbcr(0, bcr);
+	write_dbgbvr(0, addr);
 	isb();
 
 	asm volatile("msr daifclr, #8");
-- 
GitLab


From 700b8860e02cbaa7dd1181a914ff38e0fae18bf0 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:56 -0700
Subject: [PATCH 1327/4122] KVM: arm64: selftests: Remove the hard-coded
 {b,w}pn#0 from debug-exceptions

Remove the hard-coded {break,watch}point #0 from the guest_code() in
debug-exceptions to allow {break,watch}point number to be specified.
Change reset_debug_state() to zeroing all dbg{b,w}{c,v}r_el0 registers
so that guest_code() can use the function to reset those registers
even when non-zero {break,watch}points are specified for guest_code().
Subsequent patches will add test cases for non-zero {break,watch}points.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-4-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 50 ++++++++++++-------
 1 file changed, 32 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index d9884907fe87b..608a6c8db9a21 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -95,6 +95,9 @@ GEN_DEBUG_WRITE_REG(dbgwvr)
 
 static void reset_debug_state(void)
 {
+	uint8_t brps, wrps, i;
+	uint64_t dfr0;
+
 	asm volatile("msr daifset, #8");
 
 	write_sysreg(0, osdlr_el1);
@@ -102,11 +105,20 @@ static void reset_debug_state(void)
 	isb();
 
 	write_sysreg(0, mdscr_el1);
-	/* This test only uses the first bp and wp slot. */
-	write_sysreg(0, dbgbvr0_el1);
-	write_sysreg(0, dbgbcr0_el1);
-	write_sysreg(0, dbgwcr0_el1);
-	write_sysreg(0, dbgwvr0_el1);
+
+	/* Reset all bcr/bvr/wcr/wvr registers */
+	dfr0 = read_sysreg(id_aa64dfr0_el1);
+	brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), dfr0);
+	for (i = 0; i <= brps; i++) {
+		write_dbgbcr(i, 0);
+		write_dbgbvr(i, 0);
+	}
+	wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), dfr0);
+	for (i = 0; i <= wrps; i++) {
+		write_dbgwcr(i, 0);
+		write_dbgwvr(i, 0);
+	}
+
 	isb();
 }
 
@@ -118,14 +130,14 @@ static void enable_os_lock(void)
 	GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
 }
 
-static void install_wp(uint64_t addr)
+static void install_wp(uint8_t wpn, uint64_t addr)
 {
 	uint32_t wcr;
 	uint32_t mdscr;
 
 	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
-	write_dbgwcr(0, wcr);
-	write_dbgwvr(0, addr);
+	write_dbgwcr(wpn, wcr);
+	write_dbgwvr(wpn, addr);
 
 	isb();
 
@@ -136,14 +148,14 @@ static void install_wp(uint64_t addr)
 	isb();
 }
 
-static void install_hw_bp(uint64_t addr)
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
 {
 	uint32_t bcr;
 	uint32_t mdscr;
 
 	bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
-	write_dbgbcr(0, bcr);
-	write_dbgbvr(0, addr);
+	write_dbgbcr(bpn, bcr);
+	write_dbgbvr(bpn, addr);
 	isb();
 
 	asm volatile("msr daifclr, #8");
@@ -166,7 +178,7 @@ static void install_ss(void)
 
 static volatile char write_data;
 
-static void guest_code(void)
+static void guest_code(uint8_t bpn, uint8_t wpn)
 {
 	GUEST_SYNC(0);
 
@@ -179,7 +191,7 @@ static void guest_code(void)
 
 	/* Hardware-breakpoint */
 	reset_debug_state();
-	install_hw_bp(PC(hw_bp));
+	install_hw_bp(bpn, PC(hw_bp));
 	asm volatile("hw_bp: nop");
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
 
@@ -187,7 +199,7 @@ static void guest_code(void)
 
 	/* Hardware-breakpoint + svc */
 	reset_debug_state();
-	install_hw_bp(PC(bp_svc));
+	install_hw_bp(bpn, PC(bp_svc));
 	asm volatile("bp_svc: svc #0");
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
 	GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
@@ -196,7 +208,7 @@ static void guest_code(void)
 
 	/* Hardware-breakpoint + software-breakpoint */
 	reset_debug_state();
-	install_hw_bp(PC(bp_brk));
+	install_hw_bp(bpn, PC(bp_brk));
 	asm volatile("bp_brk: brk #0");
 	GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
@@ -205,7 +217,7 @@ static void guest_code(void)
 
 	/* Watchpoint */
 	reset_debug_state();
-	install_wp(PC(write_data));
+	install_wp(wpn, PC(write_data));
 	write_data = 'x';
 	GUEST_ASSERT_EQ(write_data, 'x');
 	GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
@@ -239,7 +251,7 @@ static void guest_code(void)
 	/* OS Lock blocking hardware-breakpoint */
 	reset_debug_state();
 	enable_os_lock();
-	install_hw_bp(PC(hw_bp2));
+	install_hw_bp(bpn, PC(hw_bp2));
 	hw_bp_addr = 0;
 	asm volatile("hw_bp2: nop");
 	GUEST_ASSERT_EQ(hw_bp_addr, 0);
@@ -251,7 +263,7 @@ static void guest_code(void)
 	enable_os_lock();
 	write_data = '\0';
 	wp_data_addr = 0;
-	install_wp(PC(write_data));
+	install_wp(wpn, PC(write_data));
 	write_data = 'x';
 	GUEST_ASSERT_EQ(write_data, 'x');
 	GUEST_ASSERT_EQ(wp_data_addr, 0);
@@ -376,6 +388,8 @@ static void test_guest_debug_exceptions(void)
 	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
 				ESR_EC_SVC64, guest_svc_handler);
 
+	/* Run tests with breakpoint#0 and watchpoint#0. */
+	vcpu_args_set(vcpu, 2, 0, 0);
 	for (stage = 0; stage < 11; stage++) {
 		vcpu_run(vcpu);
 
-- 
GitLab


From 152880d8edf5ad6df5b4b4915a4d9f9085ab8fef Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:57 -0700
Subject: [PATCH 1328/4122] KVM: arm64: selftests: Add helpers to enable debug
 exceptions

Add helpers to enable breakpoint and watchpoint exceptions.
No functional change intended.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-5-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 25 ++++++++++---------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 608a6c8db9a21..0c237022f4d39 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -130,10 +130,20 @@ static void enable_os_lock(void)
 	GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
 }
 
+static void enable_monitor_debug_exceptions(void)
+{
+	uint32_t mdscr;
+
+	asm volatile("msr daifclr, #8");
+
+	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+	write_sysreg(mdscr, mdscr_el1);
+	isb();
+}
+
 static void install_wp(uint8_t wpn, uint64_t addr)
 {
 	uint32_t wcr;
-	uint32_t mdscr;
 
 	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
 	write_dbgwcr(wpn, wcr);
@@ -141,28 +151,19 @@ static void install_wp(uint8_t wpn, uint64_t addr)
 
 	isb();
 
-	asm volatile("msr daifclr, #8");
-
-	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-	write_sysreg(mdscr, mdscr_el1);
-	isb();
+	enable_monitor_debug_exceptions();
 }
 
 static void install_hw_bp(uint8_t bpn, uint64_t addr)
 {
 	uint32_t bcr;
-	uint32_t mdscr;
 
 	bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
 	write_dbgbcr(bpn, bcr);
 	write_dbgbvr(bpn, addr);
 	isb();
 
-	asm volatile("msr daifclr, #8");
-
-	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-	write_sysreg(mdscr, mdscr_el1);
-	isb();
+	enable_monitor_debug_exceptions();
 }
 
 static void install_ss(void)
-- 
GitLab


From 948f439c9d0080972ec937f4aefbe51229546510 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:58 -0700
Subject: [PATCH 1329/4122] KVM: arm64: selftests: Stop unnecessary test stage
 tracking of debug-exceptions

Currently, debug-exceptions test unnecessarily tracks some test stages
using GUEST_SYNC().  The code for it needs to be updated as test cases
are added or removed.  Stop doing the unnecessary stage tracking,
as they are not so useful and are a bit pain to maintain.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-6-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 46 ++++---------------
 1 file changed, 9 insertions(+), 37 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 0c237022f4d39..040e4d7f8755c 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -181,23 +181,17 @@ static volatile char write_data;
 
 static void guest_code(uint8_t bpn, uint8_t wpn)
 {
-	GUEST_SYNC(0);
-
 	/* Software-breakpoint */
 	reset_debug_state();
 	asm volatile("sw_bp: brk #0");
 	GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
 
-	GUEST_SYNC(1);
-
 	/* Hardware-breakpoint */
 	reset_debug_state();
 	install_hw_bp(bpn, PC(hw_bp));
 	asm volatile("hw_bp: nop");
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
 
-	GUEST_SYNC(2);
-
 	/* Hardware-breakpoint + svc */
 	reset_debug_state();
 	install_hw_bp(bpn, PC(bp_svc));
@@ -205,8 +199,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
 	GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
 
-	GUEST_SYNC(3);
-
 	/* Hardware-breakpoint + software-breakpoint */
 	reset_debug_state();
 	install_hw_bp(bpn, PC(bp_brk));
@@ -214,8 +206,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
 
-	GUEST_SYNC(4);
-
 	/* Watchpoint */
 	reset_debug_state();
 	install_wp(wpn, PC(write_data));
@@ -223,8 +213,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	GUEST_ASSERT_EQ(write_data, 'x');
 	GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
 
-	GUEST_SYNC(5);
-
 	/* Single-step */
 	reset_debug_state();
 	install_ss();
@@ -238,8 +226,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
 	GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
 
-	GUEST_SYNC(6);
-
 	/* OS Lock does not block software-breakpoint */
 	reset_debug_state();
 	enable_os_lock();
@@ -247,8 +233,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	asm volatile("sw_bp2: brk #0");
 	GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
 
-	GUEST_SYNC(7);
-
 	/* OS Lock blocking hardware-breakpoint */
 	reset_debug_state();
 	enable_os_lock();
@@ -257,8 +241,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	asm volatile("hw_bp2: nop");
 	GUEST_ASSERT_EQ(hw_bp_addr, 0);
 
-	GUEST_SYNC(8);
-
 	/* OS Lock blocking watchpoint */
 	reset_debug_state();
 	enable_os_lock();
@@ -269,8 +251,6 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 	GUEST_ASSERT_EQ(write_data, 'x');
 	GUEST_ASSERT_EQ(wp_data_addr, 0);
 
-	GUEST_SYNC(9);
-
 	/* OS Lock blocking single-step */
 	reset_debug_state();
 	enable_os_lock();
@@ -370,7 +350,6 @@ static void test_guest_debug_exceptions(void)
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
-	int stage;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	ucall_init(vm, NULL);
@@ -391,23 +370,16 @@ static void test_guest_debug_exceptions(void)
 
 	/* Run tests with breakpoint#0 and watchpoint#0. */
 	vcpu_args_set(vcpu, 2, 0, 0);
-	for (stage = 0; stage < 11; stage++) {
-		vcpu_run(vcpu);
 
-		switch (get_ucall(vcpu, &uc)) {
-		case UCALL_SYNC:
-			TEST_ASSERT(uc.args[1] == stage,
-				"Stage %d: Unexpected sync ucall, got %lx",
-				stage, (ulong)uc.args[1]);
-			break;
-		case UCALL_ABORT:
-			REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
-			break;
-		case UCALL_DONE:
-			goto done;
-		default:
-			TEST_FAIL("Unknown ucall %lu", uc.cmd);
-		}
+	vcpu_run(vcpu);
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+		break;
+	case UCALL_DONE:
+		goto done;
+	default:
+		TEST_FAIL("Unknown ucall %lu", uc.cmd);
 	}
 
 done:
-- 
GitLab


From 5dd544e882d96d43b363c5ef64683281f2a386d9 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:41:59 -0700
Subject: [PATCH 1330/4122] KVM: arm64: selftests: Change debug_version() to
 take ID_AA64DFR0_EL1

Change debug_version() to take the ID_AA64DFR0_EL1 value instead of
vcpu as an argument, and change its callsite to read ID_AA64DFR0_EL1
(and pass it to debug_version()).
Subsequent patches will reuse the register value in the callsite.

No functional change intended.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-7-reijiw@google.com
---
 tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 040e4d7f8755c..72ec9bb166827 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -337,11 +337,8 @@ static void guest_code_ss(int test_cnt)
 	GUEST_DONE();
 }
 
-static int debug_version(struct kvm_vcpu *vcpu)
+static int debug_version(uint64_t id_aa64dfr0)
 {
-	uint64_t id_aa64dfr0;
-
-	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
 	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
 }
 
@@ -466,9 +463,11 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	int opt;
 	int ss_iteration = 10000;
+	uint64_t aa64dfr0;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	__TEST_REQUIRE(debug_version(vcpu) >= 6,
+	vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0);
+	__TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
 		       "Armv8 debug architecture not supported.");
 	kvm_vm_free(vm);
 
-- 
GitLab


From 142365932f5f296df593dd653d79194ff5457722 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:42:00 -0700
Subject: [PATCH 1331/4122] KVM: arm64: selftests: Add a test case for a linked
 breakpoint

Currently, the debug-exceptions test doesn't have a test case for
a linked breakpoint. Add a test case for the linked breakpoint to
the test. The new test case uses a pair of breakpoints. One is the
higiest numbered context-aware breakpoint (for Context ID match),
and the other one is the breakpoint#0 (for Address Match), which
is linked to the context-aware breakpoint.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-8-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 63 +++++++++++++++++--
 1 file changed, 57 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 72ec9bb166827..362e7668a9782 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -12,6 +12,10 @@
 #define DBGBCR_EXEC	(0x0 << 3)
 #define DBGBCR_EL1	(0x1 << 1)
 #define DBGBCR_E	(0x1 << 0)
+#define DBGBCR_LBN_SHIFT	16
+#define DBGBCR_BT_SHIFT		20
+#define DBGBCR_BT_ADDR_LINK_CTX	(0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK	(0x3 << DBGBCR_BT_SHIFT)
 
 #define DBGWCR_LEN8	(0xff << 5)
 #define DBGWCR_RD	(0x1 << 3)
@@ -22,7 +26,7 @@
 #define SPSR_D		(1 << 9)
 #define SPSR_SS		(1 << 21)
 
-extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start;
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
 extern unsigned char iter_ss_begin, iter_ss_end;
 static volatile uint64_t sw_bp_addr, hw_bp_addr;
 static volatile uint64_t wp_addr, wp_data_addr;
@@ -105,6 +109,7 @@ static void reset_debug_state(void)
 	isb();
 
 	write_sysreg(0, mdscr_el1);
+	write_sysreg(0, contextidr_el1);
 
 	/* Reset all bcr/bvr/wcr/wvr registers */
 	dfr0 = read_sysreg(id_aa64dfr0_el1);
@@ -166,6 +171,31 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr)
 	enable_monitor_debug_exceptions();
 }
 
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+		       uint64_t ctx)
+{
+	uint32_t addr_bcr, ctx_bcr;
+
+	/* Setup a context-aware breakpoint for Linked Context ID Match */
+	ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+		  DBGBCR_BT_CTX_LINK;
+	write_dbgbcr(ctx_bp, ctx_bcr);
+	write_dbgbvr(ctx_bp, ctx);
+
+	/*
+	 * Setup a normal breakpoint for Linked Address Match, and link it
+	 * to the context-aware breakpoint.
+	 */
+	addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+		   DBGBCR_BT_ADDR_LINK_CTX |
+		   ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+	write_dbgbcr(addr_bp, addr_bcr);
+	write_dbgbvr(addr_bp, addr);
+	isb();
+
+	enable_monitor_debug_exceptions();
+}
+
 static void install_ss(void)
 {
 	uint32_t mdscr;
@@ -179,8 +209,10 @@ static void install_ss(void)
 
 static volatile char write_data;
 
-static void guest_code(uint8_t bpn, uint8_t wpn)
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
 {
+	uint64_t ctx = 0xabcdef;	/* a random context number */
+
 	/* Software-breakpoint */
 	reset_debug_state();
 	asm volatile("sw_bp: brk #0");
@@ -263,6 +295,17 @@ static void guest_code(uint8_t bpn, uint8_t wpn)
 		     : : : "x0");
 	GUEST_ASSERT_EQ(ss_addr[0], 0);
 
+	/* Linked hardware-breakpoint */
+	hw_bp_addr = 0;
+	reset_debug_state();
+	install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+	/* Set context id */
+	write_sysreg(ctx, contextidr_el1);
+	isb();
+	asm volatile("hw_bp_ctx: nop");
+	write_sysreg(0, contextidr_el1);
+	GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
 	GUEST_DONE();
 }
 
@@ -342,11 +385,12 @@ static int debug_version(uint64_t id_aa64dfr0)
 	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
 }
 
-static void test_guest_debug_exceptions(void)
+static void test_guest_debug_exceptions(uint64_t aa64dfr0)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
+	uint8_t brp_num;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	ucall_init(vm, NULL);
@@ -365,8 +409,15 @@ static void test_guest_debug_exceptions(void)
 	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
 				ESR_EC_SVC64, guest_svc_handler);
 
-	/* Run tests with breakpoint#0 and watchpoint#0. */
-	vcpu_args_set(vcpu, 2, 0, 0);
+	/* Number of breakpoints */
+	brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
+	__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+	/*
+	 * Run tests with breakpoint#0, watchpoint#0, and the higiest
+	 * numbered (context-aware) breakpoint.
+	 */
+	vcpu_args_set(vcpu, 3, 0, 0, brp_num - 1);
 
 	vcpu_run(vcpu);
 	switch (get_ucall(vcpu, &uc)) {
@@ -483,7 +534,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	test_guest_debug_exceptions();
+	test_guest_debug_exceptions(aa64dfr0);
 	test_single_step_from_userspace(ss_iteration);
 
 	return 0;
-- 
GitLab


From 5ced4e533b676b1a582d89aba5328e4b316957e0 Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:42:01 -0700
Subject: [PATCH 1332/4122] KVM: arm64: selftests: Add a test case for a linked
 watchpoint

Currently, the debug-exceptions test doesn't have a test case for
a linked watchpoint. Add a test case for the linked watchpoint to
the test. The new test case uses the highest numbered context-aware
breakpoint (for Context ID match), and the watchpoint#0, which is
linked to the context-aware breakpoint.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-9-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 362e7668a9782..73a95e6b345e5 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -22,6 +22,9 @@
 #define DBGWCR_WR	(0x2 << 3)
 #define DBGWCR_EL1	(0x1 << 1)
 #define DBGWCR_E	(0x1 << 0)
+#define DBGWCR_LBN_SHIFT	16
+#define DBGWCR_WT_SHIFT		20
+#define DBGWCR_WT_LINK		(0x1 << DBGWCR_WT_SHIFT)
 
 #define SPSR_D		(1 << 9)
 #define SPSR_SS		(1 << 21)
@@ -171,6 +174,28 @@ static void install_hw_bp(uint8_t bpn, uint64_t addr)
 	enable_monitor_debug_exceptions();
 }
 
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+			   uint64_t ctx)
+{
+	uint32_t wcr;
+	uint64_t ctx_bcr;
+
+	/* Setup a context-aware breakpoint for Linked Context ID Match */
+	ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+		  DBGBCR_BT_CTX_LINK;
+	write_dbgbcr(ctx_bp, ctx_bcr);
+	write_dbgbvr(ctx_bp, ctx);
+
+	/* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+	      DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+	write_dbgwcr(addr_wp, wcr);
+	write_dbgwvr(addr_wp, addr);
+	isb();
+
+	enable_monitor_debug_exceptions();
+}
+
 void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
 		       uint64_t ctx)
 {
@@ -306,6 +331,16 @@ static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
 	write_sysreg(0, contextidr_el1);
 	GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
 
+	/* Linked watchpoint */
+	reset_debug_state();
+	install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+	/* Set context id */
+	write_sysreg(ctx, contextidr_el1);
+	isb();
+	write_data = 'x';
+	GUEST_ASSERT_EQ(write_data, 'x');
+	GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
 	GUEST_DONE();
 }
 
-- 
GitLab


From ebb8cc10316de3040efc4cfb40030f374cbbaa3b Mon Sep 17 00:00:00 2001
From: Reiji Watanabe <reijiw@google.com>
Date: Wed, 19 Oct 2022 22:42:02 -0700
Subject: [PATCH 1333/4122] KVM: arm64: selftests: Test with every
 breakpoint/watchpoint

Currently, the debug-exceptions test always uses only
{break,watch}point#0 and the highest numbered context-aware
breakpoint. Modify the test to use all {break,watch}points and
context-aware breakpoints supported on the system.

Signed-off-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221020054202.2119018-10-reijiw@google.com
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 54 ++++++++++++++-----
 1 file changed, 42 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 73a95e6b345e5..b30add3e77269 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -420,12 +420,11 @@ static int debug_version(uint64_t id_aa64dfr0)
 	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), id_aa64dfr0);
 }
 
-static void test_guest_debug_exceptions(uint64_t aa64dfr0)
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct ucall uc;
-	uint8_t brp_num;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	ucall_init(vm, NULL);
@@ -444,15 +443,9 @@ static void test_guest_debug_exceptions(uint64_t aa64dfr0)
 	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
 				ESR_EC_SVC64, guest_svc_handler);
 
-	/* Number of breakpoints */
-	brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
-	__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
-
-	/*
-	 * Run tests with breakpoint#0, watchpoint#0, and the higiest
-	 * numbered (context-aware) breakpoint.
-	 */
-	vcpu_args_set(vcpu, 3, 0, 0, brp_num - 1);
+	/* Specify bpn/wpn/ctx_bpn to be tested */
+	vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+	pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
 
 	vcpu_run(vcpu);
 	switch (get_ucall(vcpu, &uc)) {
@@ -535,6 +528,43 @@ void test_single_step_from_userspace(int test_cnt)
 	kvm_vm_free(vm);
 }
 
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+	uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+	int b, w, c;
+
+	/* Number of breakpoints */
+	brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_BRPS), aa64dfr0) + 1;
+	__TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+	/* Number of watchpoints */
+	wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_WRPS), aa64dfr0) + 1;
+
+	/* Number of context aware breakpoints */
+	ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_CTX_CMPS), aa64dfr0) + 1;
+
+	pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+		 brp_num, wrp_num, ctx_brp_num);
+
+	/* Number of normal (non-context aware) breakpoints */
+	normal_brp_num = brp_num - ctx_brp_num;
+
+	/* Lowest context aware breakpoint number */
+	ctx_brp_base = normal_brp_num;
+
+	/* Run tests with all supported breakpoints/watchpoints */
+	for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+		for (b = 0; b < normal_brp_num; b++) {
+			for (w = 0; w < wrp_num; w++)
+				test_guest_debug_exceptions(b, w, c);
+		}
+	}
+}
+
 static void help(char *name)
 {
 	puts("");
@@ -569,7 +599,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	test_guest_debug_exceptions(aa64dfr0);
+	test_guest_debug_exceptions_all(aa64dfr0);
 	test_single_step_from_userspace(ss_iteration);
 
 	return 0;
-- 
GitLab


From a93871d0ea9fd59fb5eb783619334183d7f07f51 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:21 +0000
Subject: [PATCH 1334/4122] KVM: selftests: Add a userfaultfd library

Move the generic userfaultfd code out of demand_paging_test.c into a
common library, userfaultfd_util. This library consists of a setup and a
stop function. The setup function starts a thread for handling page
faults using the handler callback function. This setup returns a
uffd_desc object which is then used in the stop function (to wait and
destroy the threads).

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-2-ricarkol@google.com
---
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/demand_paging_test.c        | 228 +++---------------
 .../selftests/kvm/include/userfaultfd_util.h  |  45 ++++
 .../selftests/kvm/lib/userfaultfd_util.c      | 186 ++++++++++++++
 4 files changed, 262 insertions(+), 198 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/include/userfaultfd_util.h
 create mode 100644 tools/testing/selftests/kvm/lib/userfaultfd_util.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0172eb6cb6eee..08a2606aff331 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -47,6 +47,7 @@ LIBKVM += lib/perf_test_util.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
+LIBKVM += lib/userfaultfd_util.c
 
 LIBKVM_STRING += lib/string_override.c
 
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 779ae54f89c40..8e1fe4ffcccdf 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -22,23 +22,13 @@
 #include "test_util.h"
 #include "perf_test_util.h"
 #include "guest_modes.h"
+#include "userfaultfd_util.h"
 
 #ifdef __NR_userfaultfd
 
-#ifdef PRINT_PER_PAGE_UPDATES
-#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
-#ifdef PRINT_PER_VCPU_UPDATES
-#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
 static int nr_vcpus = 1;
 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
 static size_t demand_paging_size;
 static char *guest_data_prototype;
 
@@ -67,9 +57,11 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 		       ts_diff.tv_sec, ts_diff.tv_nsec);
 }
 
-static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd,
+		struct uffd_msg *msg)
 {
 	pid_t tid = syscall(__NR_gettid);
+	uint64_t addr = msg->arg.pagefault.address;
 	struct timespec start;
 	struct timespec ts_diff;
 	int r;
@@ -116,174 +108,32 @@ static int handle_uffd_page_request(int uffd_mode, int uffd, uint64_t addr)
 	return 0;
 }
 
-bool quit_uffd_thread;
-
-struct uffd_handler_args {
+struct test_params {
 	int uffd_mode;
-	int uffd;
-	int pipefd;
-	useconds_t delay;
+	useconds_t uffd_delay;
+	enum vm_mem_backing_src_type src_type;
+	bool partition_vcpu_memory_access;
 };
 
-static void *uffd_handler_thread_fn(void *arg)
-{
-	struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
-	int uffd = uffd_args->uffd;
-	int pipefd = uffd_args->pipefd;
-	useconds_t delay = uffd_args->delay;
-	int64_t pages = 0;
-	struct timespec start;
-	struct timespec ts_diff;
-
-	clock_gettime(CLOCK_MONOTONIC, &start);
-	while (!quit_uffd_thread) {
-		struct uffd_msg msg;
-		struct pollfd pollfd[2];
-		char tmp_chr;
-		int r;
-		uint64_t addr;
-
-		pollfd[0].fd = uffd;
-		pollfd[0].events = POLLIN;
-		pollfd[1].fd = pipefd;
-		pollfd[1].events = POLLIN;
-
-		r = poll(pollfd, 2, -1);
-		switch (r) {
-		case -1:
-			pr_info("poll err");
-			continue;
-		case 0:
-			continue;
-		case 1:
-			break;
-		default:
-			pr_info("Polling uffd returned %d", r);
-			return NULL;
-		}
-
-		if (pollfd[0].revents & POLLERR) {
-			pr_info("uffd revents has POLLERR");
-			return NULL;
-		}
-
-		if (pollfd[1].revents & POLLIN) {
-			r = read(pollfd[1].fd, &tmp_chr, 1);
-			TEST_ASSERT(r == 1,
-				    "Error reading pipefd in UFFD thread\n");
-			return NULL;
-		}
-
-		if (!(pollfd[0].revents & POLLIN))
-			continue;
-
-		r = read(uffd, &msg, sizeof(msg));
-		if (r == -1) {
-			if (errno == EAGAIN)
-				continue;
-			pr_info("Read of uffd got errno %d\n", errno);
-			return NULL;
-		}
-
-		if (r != sizeof(msg)) {
-			pr_info("Read on uffd returned unexpected size: %d bytes", r);
-			return NULL;
-		}
-
-		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
-			continue;
-
-		if (delay)
-			usleep(delay);
-		addr =  msg.arg.pagefault.address;
-		r = handle_uffd_page_request(uffd_args->uffd_mode, uffd, addr);
-		if (r < 0)
-			return NULL;
-		pages++;
-	}
-
-	ts_diff = timespec_elapsed(start);
-	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
-		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
-		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
-
-	return NULL;
-}
-
-static void setup_demand_paging(struct kvm_vm *vm,
-				pthread_t *uffd_handler_thread, int pipefd,
-				int uffd_mode, useconds_t uffd_delay,
-				struct uffd_handler_args *uffd_args,
-				void *hva, void *alias, uint64_t len)
+static void prefault_mem(void *alias, uint64_t len)
 {
-	bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
-	int uffd;
-	struct uffdio_api uffdio_api;
-	struct uffdio_register uffdio_register;
-	uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
-	int ret;
+	size_t p;
 
-	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
-		       is_minor ? "MINOR" : "MISSING",
-		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
-
-	/* In order to get minor faults, prefault via the alias. */
-	if (is_minor) {
-		size_t p;
-
-		expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
-
-		TEST_ASSERT(alias != NULL, "Alias required for minor faults");
-		for (p = 0; p < (len / demand_paging_size); ++p) {
-			memcpy(alias + (p * demand_paging_size),
-			       guest_data_prototype, demand_paging_size);
-		}
+	TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+	for (p = 0; p < (len / demand_paging_size); ++p) {
+		memcpy(alias + (p * demand_paging_size),
+		       guest_data_prototype, demand_paging_size);
 	}
-
-	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
-	TEST_ASSERT(uffd >= 0, __KVM_SYSCALL_ERROR("userfaultfd()", uffd));
-
-	uffdio_api.api = UFFD_API;
-	uffdio_api.features = 0;
-	ret = ioctl(uffd, UFFDIO_API, &uffdio_api);
-	TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_API", ret));
-
-	uffdio_register.range.start = (uint64_t)hva;
-	uffdio_register.range.len = len;
-	uffdio_register.mode = uffd_mode;
-	ret = ioctl(uffd, UFFDIO_REGISTER, &uffdio_register);
-	TEST_ASSERT(ret != -1, __KVM_SYSCALL_ERROR("UFFDIO_REGISTER", ret));
-	TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
-		    expected_ioctls, "missing userfaultfd ioctls");
-
-	uffd_args->uffd_mode = uffd_mode;
-	uffd_args->uffd = uffd;
-	uffd_args->pipefd = pipefd;
-	uffd_args->delay = uffd_delay;
-	pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
-		       uffd_args);
-
-	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
-		       hva, hva + len);
 }
 
-struct test_params {
-	int uffd_mode;
-	useconds_t uffd_delay;
-	enum vm_mem_backing_src_type src_type;
-	bool partition_vcpu_memory_access;
-};
-
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
-	pthread_t *uffd_handler_threads = NULL;
-	struct uffd_handler_args *uffd_args = NULL;
+	struct uffd_desc **uffd_descs = NULL;
 	struct timespec start;
 	struct timespec ts_diff;
-	int *pipefds = NULL;
 	struct kvm_vm *vm;
-	int r, i;
+	int i;
 
 	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
 				 p->src_type, p->partition_vcpu_memory_access);
@@ -296,15 +146,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	memset(guest_data_prototype, 0xAB, demand_paging_size);
 
 	if (p->uffd_mode) {
-		uffd_handler_threads =
-			malloc(nr_vcpus * sizeof(*uffd_handler_threads));
-		TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
-
-		uffd_args = malloc(nr_vcpus * sizeof(*uffd_args));
-		TEST_ASSERT(uffd_args, "Memory allocation failed");
-
-		pipefds = malloc(sizeof(int) * nr_vcpus * 2);
-		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+		uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
+		TEST_ASSERT(uffd_descs, "Memory allocation failed");
 
 		for (i = 0; i < nr_vcpus; i++) {
 			struct perf_test_vcpu_args *vcpu_args;
@@ -317,19 +160,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
 			vcpu_alias = addr_gpa2alias(vm, vcpu_args->gpa);
 
+			prefault_mem(vcpu_alias,
+				vcpu_args->pages * perf_test_args.guest_page_size);
+
 			/*
 			 * Set up user fault fd to handle demand paging
 			 * requests.
 			 */
-			r = pipe2(&pipefds[i * 2],
-				  O_CLOEXEC | O_NONBLOCK);
-			TEST_ASSERT(!r, "Failed to set up pipefd");
-
-			setup_demand_paging(vm, &uffd_handler_threads[i],
-					    pipefds[i * 2], p->uffd_mode,
-					    p->uffd_delay, &uffd_args[i],
-					    vcpu_hva, vcpu_alias,
-					    vcpu_args->pages * perf_test_args.guest_page_size);
+			uffd_descs[i] = uffd_setup_demand_paging(
+				p->uffd_mode, p->uffd_delay, vcpu_hva,
+				vcpu_args->pages * perf_test_args.guest_page_size,
+				&handle_uffd_page_request);
 		}
 	}
 
@@ -344,15 +185,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("All vCPU threads joined\n");
 
 	if (p->uffd_mode) {
-		char c;
-
 		/* Tell the user fault fd handler threads to quit */
-		for (i = 0; i < nr_vcpus; i++) {
-			r = write(pipefds[i * 2 + 1], &c, 1);
-			TEST_ASSERT(r == 1, "Unable to write to pipefd");
-
-			pthread_join(uffd_handler_threads[i], NULL);
-		}
+		for (i = 0; i < nr_vcpus; i++)
+			uffd_stop_demand_paging(uffd_descs[i]);
 	}
 
 	pr_info("Total guest execution time: %ld.%.9lds\n",
@@ -364,11 +199,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	perf_test_destroy_vm(vm);
 
 	free(guest_data_prototype);
-	if (p->uffd_mode) {
-		free(uffd_handler_threads);
-		free(uffd_args);
-		free(pipefds);
-	}
+	if (p->uffd_mode)
+		free(uffd_descs);
 }
 
 static void help(char *name)
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
new file mode 100644
index 0000000000000..877449c345928
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM userfaultfd util
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+
+#define _GNU_SOURCE /* for pipe2 */
+
+#include <inttypes.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+
+#include "test_util.h"
+
+typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
+
+struct uffd_desc {
+	int uffd_mode;
+	int uffd;
+	int pipefds[2];
+	useconds_t delay;
+	uffd_handler_t handler;
+	pthread_t thread;
+};
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+					   void *hva, uint64_t len,
+					   uffd_handler_t handler);
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd);
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
new file mode 100644
index 0000000000000..3b44846fc277e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM userfaultfd util
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+
+#define _GNU_SOURCE /* for pipe2 */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <sys/syscall.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "perf_test_util.h"
+#include "userfaultfd_util.h"
+
+#ifdef __NR_userfaultfd
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+	struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
+	int uffd = uffd_desc->uffd;
+	int pipefd = uffd_desc->pipefds[0];
+	useconds_t delay = uffd_desc->delay;
+	int64_t pages = 0;
+	struct timespec start;
+	struct timespec ts_diff;
+
+	clock_gettime(CLOCK_MONOTONIC, &start);
+	while (1) {
+		struct uffd_msg msg;
+		struct pollfd pollfd[2];
+		char tmp_chr;
+		int r;
+
+		pollfd[0].fd = uffd;
+		pollfd[0].events = POLLIN;
+		pollfd[1].fd = pipefd;
+		pollfd[1].events = POLLIN;
+
+		r = poll(pollfd, 2, -1);
+		switch (r) {
+		case -1:
+			pr_info("poll err");
+			continue;
+		case 0:
+			continue;
+		case 1:
+			break;
+		default:
+			pr_info("Polling uffd returned %d", r);
+			return NULL;
+		}
+
+		if (pollfd[0].revents & POLLERR) {
+			pr_info("uffd revents has POLLERR");
+			return NULL;
+		}
+
+		if (pollfd[1].revents & POLLIN) {
+			r = read(pollfd[1].fd, &tmp_chr, 1);
+			TEST_ASSERT(r == 1,
+				    "Error reading pipefd in UFFD thread\n");
+			return NULL;
+		}
+
+		if (!(pollfd[0].revents & POLLIN))
+			continue;
+
+		r = read(uffd, &msg, sizeof(msg));
+		if (r == -1) {
+			if (errno == EAGAIN)
+				continue;
+			pr_info("Read of uffd got errno %d\n", errno);
+			return NULL;
+		}
+
+		if (r != sizeof(msg)) {
+			pr_info("Read on uffd returned unexpected size: %d bytes", r);
+			return NULL;
+		}
+
+		if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+			continue;
+
+		if (delay)
+			usleep(delay);
+		r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
+		if (r < 0)
+			return NULL;
+		pages++;
+	}
+
+	ts_diff = timespec_elapsed(start);
+	PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+		       pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+		       pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+
+	return NULL;
+}
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+					   void *hva, uint64_t len,
+					   uffd_handler_t handler)
+{
+	struct uffd_desc *uffd_desc;
+	bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
+	int uffd;
+	struct uffdio_api uffdio_api;
+	struct uffdio_register uffdio_register;
+	uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+	int ret;
+
+	PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+		       is_minor ? "MINOR" : "MISSING",
+		       is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+	uffd_desc = malloc(sizeof(struct uffd_desc));
+	TEST_ASSERT(uffd_desc, "malloc failed");
+
+	/* In order to get minor faults, prefault via the alias. */
+	if (is_minor)
+		expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = 0;
+	TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+		    "ioctl UFFDIO_API failed: %" PRIu64,
+		    (uint64_t)uffdio_api.api);
+
+	uffdio_register.range.start = (uint64_t)hva;
+	uffdio_register.range.len = len;
+	uffdio_register.mode = uffd_mode;
+	TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+		    "ioctl UFFDIO_REGISTER failed");
+	TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+		    expected_ioctls, "missing userfaultfd ioctls");
+
+	ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
+	TEST_ASSERT(!ret, "Failed to set up pipefd");
+
+	uffd_desc->uffd_mode = uffd_mode;
+	uffd_desc->uffd = uffd;
+	uffd_desc->delay = delay;
+	uffd_desc->handler = handler;
+	pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
+		       uffd_desc);
+
+	PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+		       hva, hva + len);
+
+	return uffd_desc;
+}
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd)
+{
+	char c = 0;
+	int ret;
+
+	ret = write(uffd->pipefds[1], &c, 1);
+	TEST_ASSERT(ret == 1, "Unable to write to pipefd");
+
+	ret = pthread_join(uffd->thread, NULL);
+	TEST_ASSERT(ret == 0, "Pthread_join failed.");
+
+	close(uffd->uffd);
+
+	close(uffd->pipefds[1]);
+	close(uffd->pipefds[0]);
+
+	free(uffd);
+}
+
+#endif /* __NR_userfaultfd */
-- 
GitLab


From 228f324dc718f702e8777164c4e2e7426824fb13 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:22 +0000
Subject: [PATCH 1335/4122] KVM: selftests: aarch64: Add virt_get_pte_hva()
 library function

Add a library function to get the PTE (a host virtual address) of a
given GVA.  This will be used in a future commit by a test to clear and
check the access flag of a particular page.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-3-ricarkol@google.com
---
 .../selftests/kvm/include/aarch64/processor.h       |  2 ++
 tools/testing/selftests/kvm/lib/aarch64/processor.c | 13 ++++++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index a8124f9dd68a4..df4bfac695517 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -109,6 +109,8 @@ void vm_install_exception_handler(struct kvm_vm *vm,
 void vm_install_sync_handler(struct kvm_vm *vm,
 		int vector, int ec, handler_fn handler);
 
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
 static inline void cpu_relax(void)
 {
 	asm volatile("yield" ::: "memory");
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 6f5551368944e..63ef3c78e55ee 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -138,7 +138,7 @@ void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 	_virt_pg_map(vm, vaddr, paddr, attr_idx);
 }
 
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
 {
 	uint64_t *ptep;
 
@@ -169,11 +169,18 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 		TEST_FAIL("Page table levels must be 2, 3, or 4");
 	}
 
-	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+	return ptep;
 
 unmapped_gva:
 	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-	exit(1);
+	exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+	uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+	return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
 }
 
 static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
-- 
GitLab


From b6b03b86c0250a80b671313dbc0d7bcdbab78f41 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:23 +0000
Subject: [PATCH 1336/4122] KVM: selftests: Add missing close and munmap in
 __vm_mem_region_delete()

Deleting a memslot (when freeing a VM) is not closing the backing fd,
nor it's unmapping the alias mapping. Fix by adding the missing close
and munmap.

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Oliver Upton <oupton@google.com>
Reviewed-by: Ben Gardon <bgardon@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-4-ricarkol@google.com
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f1cb1627161fd..19e37fb7de7cc 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -586,6 +586,12 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
 	sparsebit_free(&region->unused_phy_pages);
 	ret = munmap(region->mmap_start, region->mmap_size);
 	TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+	if (region->fd >= 0) {
+		/* There's an extra map when using shared memory. */
+		ret = munmap(region->mmap_alias, region->mmap_size);
+		TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+		close(region->fd);
+	}
 
 	free(region);
 }
-- 
GitLab


From 41f5189ea9c08f7fc28340a7aefc93d0d2dcb769 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:24 +0000
Subject: [PATCH 1337/4122] KVM: selftests: aarch64: Construct DEFAULT_MAIR_EL1
 using sysreg.h macros

Define macros for memory type indexes and construct DEFAULT_MAIR_EL1
with macros from asm/sysreg.h.  The index macros can then be used when
constructing PTEs (instead of using raw numbers).

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-5-ricarkol@google.com
---
 .../selftests/kvm/include/aarch64/processor.h | 25 ++++++++++++++-----
 .../selftests/kvm/lib/aarch64/processor.c     |  2 +-
 2 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index df4bfac695517..c1ddca8db225a 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -38,12 +38,25 @@
  * NORMAL             4     1111:1111
  * NORMAL_WT          5     1011:1011
  */
-#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
-			  (0x04ul << (1 * 8)) | \
-			  (0x0cul << (2 * 8)) | \
-			  (0x44ul << (3 * 8)) | \
-			  (0xfful << (4 * 8)) | \
-			  (0xbbul << (5 * 8)))
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE	UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT	UL(0xbb)
+
+#define MT_DEVICE_nGnRnE	0
+#define MT_DEVICE_nGnRE		1
+#define MT_DEVICE_GRE		2
+#define MT_NORMAL_NC		3
+#define MT_NORMAL		4
+#define MT_NORMAL_WT		5
+
+#define DEFAULT_MAIR_EL1							\
+	(MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) |		\
+	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) |		\
+	 MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) |			\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) |			\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) |				\
+	 MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
 
 #define MPIDR_HWID_BITMASK (0xff00fffffful)
 
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 63ef3c78e55ee..26f0eccff6fee 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -133,7 +133,7 @@ static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 
 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
 {
-	uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+	uint64_t attr_idx = MT_NORMAL;
 
 	_virt_pg_map(vm, vaddr, paddr, attr_idx);
 }
-- 
GitLab


From 590b949597b1e811d35df2f32021dd17d8e47f8c Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:25 +0000
Subject: [PATCH 1338/4122] tools: Copy bitfield.h from the kernel sources

Copy bitfield.h from include/linux/bitfield.h.  A subsequent change will
make use of some FIELD_{GET,PREP} macros defined in this header.

The header was copied as-is, no changes needed.

Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Reviewed-by: Oliver Upton <oupton@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-6-ricarkol@google.com
---
 tools/include/linux/bitfield.h | 176 +++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)
 create mode 100644 tools/include/linux/bitfield.h

diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h
new file mode 100644
index 0000000000000..6093fa6db2600
--- /dev/null
+++ b/tools/include/linux/bitfield.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2014 Felix Fietkau <nbd@nbd.name>
+ * Copyright (C) 2004 - 2009 Ivo van Doorn <IvDoorn@gmail.com>
+ */
+
+#ifndef _LINUX_BITFIELD_H
+#define _LINUX_BITFIELD_H
+
+#include <linux/build_bug.h>
+#include <asm/byteorder.h>
+
+/*
+ * Bitfield access macros
+ *
+ * FIELD_{GET,PREP} macros take as first parameter shifted mask
+ * from which they extract the base mask and shift amount.
+ * Mask must be a compilation time constant.
+ *
+ * Example:
+ *
+ *  #define REG_FIELD_A  GENMASK(6, 0)
+ *  #define REG_FIELD_B  BIT(7)
+ *  #define REG_FIELD_C  GENMASK(15, 8)
+ *  #define REG_FIELD_D  GENMASK(31, 16)
+ *
+ * Get:
+ *  a = FIELD_GET(REG_FIELD_A, reg);
+ *  b = FIELD_GET(REG_FIELD_B, reg);
+ *
+ * Set:
+ *  reg = FIELD_PREP(REG_FIELD_A, 1) |
+ *	  FIELD_PREP(REG_FIELD_B, 0) |
+ *	  FIELD_PREP(REG_FIELD_C, c) |
+ *	  FIELD_PREP(REG_FIELD_D, 0x40);
+ *
+ * Modify:
+ *  reg &= ~REG_FIELD_C;
+ *  reg |= FIELD_PREP(REG_FIELD_C, c);
+ */
+
+#define __bf_shf(x) (__builtin_ffsll(x) - 1)
+
+#define __scalar_type_to_unsigned_cases(type)				\
+		unsigned type:	(unsigned type)0,			\
+		signed type:	(unsigned type)0
+
+#define __unsigned_scalar_typeof(x) typeof(				\
+		_Generic((x),						\
+			char:	(unsigned char)0,			\
+			__scalar_type_to_unsigned_cases(char),		\
+			__scalar_type_to_unsigned_cases(short),		\
+			__scalar_type_to_unsigned_cases(int),		\
+			__scalar_type_to_unsigned_cases(long),		\
+			__scalar_type_to_unsigned_cases(long long),	\
+			default: (x)))
+
+#define __bf_cast_unsigned(type, x)	((__unsigned_scalar_typeof(type))(x))
+
+#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx)			\
+	({								\
+		BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask),		\
+				 _pfx "mask is not constant");		\
+		BUILD_BUG_ON_MSG((_mask) == 0, _pfx "mask is zero");	\
+		BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ?		\
+				 ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \
+				 _pfx "value too large for the field"); \
+		BUILD_BUG_ON_MSG(__bf_cast_unsigned(_mask, _mask) >	\
+				 __bf_cast_unsigned(_reg, ~0ull),	\
+				 _pfx "type of reg too small for mask"); \
+		__BUILD_BUG_ON_NOT_POWER_OF_2((_mask) +			\
+					      (1ULL << __bf_shf(_mask))); \
+	})
+
+/**
+ * FIELD_MAX() - produce the maximum value representable by a field
+ * @_mask: shifted mask defining the field's length and position
+ *
+ * FIELD_MAX() returns the maximum value that can be held in the field
+ * specified by @_mask.
+ */
+#define FIELD_MAX(_mask)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_MAX: ");	\
+		(typeof(_mask))((_mask) >> __bf_shf(_mask));		\
+	})
+
+/**
+ * FIELD_FIT() - check if value fits in the field
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to test against the field
+ *
+ * Return: true if @_val can fit inside @_mask, false if @_val is too big.
+ */
+#define FIELD_FIT(_mask, _val)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, 0ULL, 0ULL, "FIELD_FIT: ");	\
+		!((((typeof(_mask))_val) << __bf_shf(_mask)) & ~(_mask)); \
+	})
+
+/**
+ * FIELD_PREP() - prepare a bitfield element
+ * @_mask: shifted mask defining the field's length and position
+ * @_val:  value to put in the field
+ *
+ * FIELD_PREP() masks and shifts up the value.  The result should
+ * be combined with other fields of the bitfield using logical OR.
+ */
+#define FIELD_PREP(_mask, _val)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: ");	\
+		((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask);	\
+	})
+
+/**
+ * FIELD_GET() - extract a bitfield element
+ * @_mask: shifted mask defining the field's length and position
+ * @_reg:  value of entire bitfield
+ *
+ * FIELD_GET() extracts the field specified by @_mask from the
+ * bitfield passed in as @_reg by masking and shifting it down.
+ */
+#define FIELD_GET(_mask, _reg)						\
+	({								\
+		__BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: ");	\
+		(typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask));	\
+	})
+
+extern void __compiletime_error("value doesn't fit into mask")
+__field_overflow(void);
+extern void __compiletime_error("bad bitfield mask")
+__bad_mask(void);
+static __always_inline u64 field_multiplier(u64 field)
+{
+	if ((field | (field - 1)) & ((field | (field - 1)) + 1))
+		__bad_mask();
+	return field & -field;
+}
+static __always_inline u64 field_mask(u64 field)
+{
+	return field / field_multiplier(field);
+}
+#define field_max(field)	((typeof(field))field_mask(field))
+#define ____MAKE_OP(type,base,to,from)					\
+static __always_inline __##type type##_encode_bits(base v, base field)	\
+{									\
+	if (__builtin_constant_p(v) && (v & ~field_mask(field)))	\
+		__field_overflow();					\
+	return to((v & field_mask(field)) * field_multiplier(field));	\
+}									\
+static __always_inline __##type type##_replace_bits(__##type old,	\
+					base val, base field)		\
+{									\
+	return (old & ~to(field)) | type##_encode_bits(val, field);	\
+}									\
+static __always_inline void type##p_replace_bits(__##type *p,		\
+					base val, base field)		\
+{									\
+	*p = (*p & ~to(field)) | type##_encode_bits(val, field);	\
+}									\
+static __always_inline base type##_get_bits(__##type v, base field)	\
+{									\
+	return (from(v) & field)/field_multiplier(field);		\
+}
+#define __MAKE_OP(size)							\
+	____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu)	\
+	____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu)	\
+	____MAKE_OP(u##size,u##size,,)
+____MAKE_OP(u8,u8,,)
+__MAKE_OP(16)
+__MAKE_OP(32)
+__MAKE_OP(64)
+#undef __MAKE_OP
+#undef ____MAKE_OP
+
+#endif
-- 
GitLab


From bd3ed7e1a47eb7b3838ca09439f1eb289ec3be1f Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:26 +0000
Subject: [PATCH 1339/4122] KVM: selftests: Stash backing_src_type in struct
 userspace_mem_region

Add the backing_src_type into struct userspace_mem_region. This struct
already stores a lot of info about memory regions, except the backing
source type.  This info will be used by a future commit in order to
determine the method for punching a hole.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-7-ricarkol@google.com
---
 tools/testing/selftests/kvm/include/kvm_util_base.h | 1 +
 tools/testing/selftests/kvm/lib/kvm_util.c          | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index e42a09cd24a04..a9264ed22cca5 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -34,6 +34,7 @@ struct userspace_mem_region {
 	struct sparsebit *unused_phy_pages;
 	int fd;
 	off_t offset;
+	enum vm_mem_backing_src_type backing_src_type;
 	void *host_mem;
 	void *host_alias;
 	void *mmap_start;
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 19e37fb7de7cc..6affce47e899a 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -929,6 +929,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
 			    vm_mem_backing_src_alias(src_type)->name);
 	}
 
+	region->backing_src_type = src_type;
 	region->unused_phy_pages = sparsebit_alloc();
 	sparsebit_set_num(region->unused_phy_pages,
 		guest_paddr >> vm->page_shift, npages);
-- 
GitLab


From 290c5b54012b7f05e9c51af32d557574bf69a654 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:27 +0000
Subject: [PATCH 1340/4122] KVM: selftests: Add vm->memslots[] and enum
 kvm_mem_region_type

The vm_create() helpers are hardcoded to place most page types (code,
page-tables, stacks, etc) in the same memslot #0, and always backed with
anonymous 4K.  There are a couple of issues with that.  First, tests
willing to differ a bit, like placing page-tables in a different backing
source type must replicate much of what's already done by the vm_create()
functions.  Second, the hardcoded assumption of memslot #0 holding most
things is spread everywhere; this makes it very hard to change.

Fix the above issues by having selftests specify how they want memory to be
laid out. Start by changing ____vm_create() to not create memslot #0; a
test (to come) will specify all memslots used by the VM.  Then, add the
vm->memslots[] array to specify the right memslot for different memory
allocators, e.g.,: lib/elf should use the vm->[MEM_REGION_CODE] memslot.
This will be used as a way to specify the page-tables memslots (to be
backed by huge pages for example).

There is no functional change intended. The current commit lays out memory
exactly as before. A future commit will change the allocators to get the
region they should be using, e.g.,: like the page table allocators using
the pt memslot.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-8-ricarkol@google.com
---
 .../selftests/kvm/include/kvm_util_base.h     | 26 +++++++++++++++++--
 tools/testing/selftests/kvm/lib/kvm_util.c    | 18 +++++++------
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index a9264ed22cca5..6442aa9e9061f 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -65,6 +65,14 @@ struct userspace_mem_regions {
 	DECLARE_HASHTABLE(slot_hash, 9);
 };
 
+enum kvm_mem_region_type {
+	MEM_REGION_CODE,
+	MEM_REGION_DATA,
+	MEM_REGION_PT,
+	MEM_REGION_TEST_DATA,
+	NR_MEM_REGIONS,
+};
+
 struct kvm_vm {
 	int mode;
 	unsigned long type;
@@ -93,6 +101,13 @@ struct kvm_vm {
 	int stats_fd;
 	struct kvm_stats_header stats_header;
 	struct kvm_stats_desc *stats_desc;
+
+	/*
+	 * KVM region slots. These are the default memslots used by page
+	 * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
+	 * memslot.
+	 */
+	uint32_t memslots[NR_MEM_REGIONS];
 };
 
 
@@ -105,6 +120,13 @@ struct kvm_vm {
 struct userspace_mem_region *
 memslot2region(struct kvm_vm *vm, uint32_t memslot);
 
+static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
+							     enum kvm_mem_region_type type)
+{
+	assert(type < NR_MEM_REGIONS);
+	return memslot2region(vm, vm->memslots[type]);
+}
+
 /* Minimum allocated guest virtual and physical addresses */
 #define KVM_UTIL_MIN_VADDR		0x2000
 #define KVM_GUEST_PAGE_TABLE_MIN_PADDR	0x180000
@@ -647,13 +669,13 @@ vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
  * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
  * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
  */
-struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages);
+struct kvm_vm *____vm_create(enum vm_guest_mode mode);
 struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 			   uint64_t nr_extra_pages);
 
 static inline struct kvm_vm *vm_create_barebones(void)
 {
-	return ____vm_create(VM_MODE_DEFAULT, 0);
+	return ____vm_create(VM_MODE_DEFAULT);
 }
 
 static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 6affce47e899a..f3dfa4e9ee0f5 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -185,13 +185,10 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
 
-struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
+struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 {
 	struct kvm_vm *vm;
 
-	pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
-		 vm_guest_mode_string(mode), nr_pages);
-
 	vm = calloc(1, sizeof(*vm));
 	TEST_ASSERT(vm != NULL, "Insufficient Memory");
 
@@ -287,9 +284,6 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode, uint64_t nr_pages)
 
 	/* Allocate and setup memory for guest. */
 	vm->vpages_mapped = sparsebit_alloc();
-	if (nr_pages != 0)
-		vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-					    0, 0, nr_pages, 0);
 
 	return vm;
 }
@@ -335,8 +329,16 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 	uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
 						 nr_extra_pages);
 	struct kvm_vm *vm;
+	int i;
+
+	pr_debug("%s: mode='%s' pages='%ld'\n", __func__,
+		 vm_guest_mode_string(mode), nr_pages);
+
+	vm = ____vm_create(mode);
 
-	vm = ____vm_create(mode, nr_pages);
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+	for (i = 0; i < NR_MEM_REGIONS; i++)
+		vm->memslots[i] = 0;
 
 	kvm_vm_elf_load(vm, program_invocation_name);
 
-- 
GitLab


From 5485e822e31a75dfac3713d94b6b22025d4895da Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:28 +0000
Subject: [PATCH 1341/4122] KVM: selftests: Fix alignment in
 virt_arch_pgd_alloc() and vm_vaddr_alloc()

Refactor virt_arch_pgd_alloc() and vm_vaddr_alloc() in both RISC-V and
aarch64 to fix the alignment of parameters in a couple of calls. This will
make it easier to fix the alignment in a future commit that adds an extra
parameter (that happens to be very long).

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-9-ricarkol@google.com
---
 .../selftests/kvm/lib/aarch64/processor.c     | 27 ++++++++++---------
 .../selftests/kvm/lib/riscv/processor.c       | 27 ++++++++++---------
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 26f0eccff6fee..6ff2b9d6cea6a 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -76,13 +76,14 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
 
 void virt_arch_pgd_alloc(struct kvm_vm *vm)
 {
-	if (!vm->pgd_created) {
-		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
-			page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-		vm->pgd = paddr;
-		vm->pgd_created = true;
-	}
+	size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+	if (vm->pgd_created)
+		return;
+
+	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+				     KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+	vm->pgd_created = true;
 }
 
 static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
@@ -325,13 +326,15 @@ void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
 struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 				  struct kvm_vcpu_init *init, void *guest_code)
 {
-	size_t stack_size = vm->page_size == 4096 ?
-					DEFAULT_STACK_PGS * vm->page_size :
-					vm->page_size;
-	uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-					      DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
+	size_t stack_size;
+	uint64_t stack_vaddr;
 	struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
 
+	stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+					     vm->page_size;
+	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+				     DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
+
 	aarch64_vcpu_setup(vcpu, init);
 
 	vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index 6044781512120..ac7fc9d317db3 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -55,13 +55,14 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
 
 void virt_arch_pgd_alloc(struct kvm_vm *vm)
 {
-	if (!vm->pgd_created) {
-		vm_paddr_t paddr = vm_phy_pages_alloc(vm,
-			page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size,
-			KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
-		vm->pgd = paddr;
-		vm->pgd_created = true;
-	}
+	size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+
+	if (vm->pgd_created)
+		return;
+
+	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+				     KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+	vm->pgd_created = true;
 }
 
 void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
@@ -279,15 +280,17 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 				  void *guest_code)
 {
 	int r;
-	size_t stack_size = vm->page_size == 4096 ?
-					DEFAULT_STACK_PGS * vm->page_size :
-					vm->page_size;
-	unsigned long stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-					DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+	size_t stack_size;
+	unsigned long stack_vaddr;
 	unsigned long current_gp = 0;
 	struct kvm_mp_state mps;
 	struct kvm_vcpu *vcpu;
 
+	stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+					     vm->page_size;
+	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
+				     DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+
 	vcpu = __vm_vcpu_add(vm, vcpu_id);
 	riscv_vcpu_mmu_setup(vcpu);
 
-- 
GitLab


From 1446e331432d7f24ed56b870ad605a4345fee43f Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:29 +0000
Subject: [PATCH 1342/4122] KVM: selftests: Use the right memslot for code,
 page-tables, and data allocations

Now that kvm_vm allows specifying different memslots for code, page tables,
and data, use the appropriate memslot when making allocations in
common/libraty code. Change them accordingly:

- code (allocated by lib/elf) use the CODE memslot
- stacks, exception tables, and other core data pages (like the TSS in x86)
  use the DATA memslot
- page tables and the PGD use the PT memslot
- test data (anything allocated with vm_vaddr_alloc()) uses the TEST_DATA
  memslot

No functional change intended. All allocators keep using memslot #0.

Cc: Sean Christopherson <seanjc@google.com>
Cc: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-10-ricarkol@google.com
---
 .../selftests/kvm/include/kvm_util_base.h     |  4 ++
 .../selftests/kvm/lib/aarch64/processor.c     | 12 ++--
 tools/testing/selftests/kvm/lib/elf.c         |  3 +-
 tools/testing/selftests/kvm/lib/kvm_util.c    | 57 ++++++++++++-------
 .../selftests/kvm/lib/riscv/processor.c       |  8 ++-
 .../selftests/kvm/lib/s390x/processor.c       |  8 ++-
 .../selftests/kvm/lib/x86_64/processor.c      | 13 +++--
 7 files changed, 65 insertions(+), 40 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 6442aa9e9061f..b0da75af1ff33 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -407,7 +407,11 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+			    enum kvm_mem_region_type type);
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
+				 enum kvm_mem_region_type type);
 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
 
 void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 6ff2b9d6cea6a..2883dfd1ad49e 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -82,7 +82,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 		return;
 
 	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
-				     KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+				     KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+				     vm->memslots[MEM_REGION_PT]);
 	vm->pgd_created = true;
 }
 
@@ -332,8 +333,9 @@ struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 
 	stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
 					     vm->page_size;
-	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-				     DEFAULT_ARM64_GUEST_STACK_VADDR_MIN);
+	stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+				       DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+				       MEM_REGION_DATA);
 
 	aarch64_vcpu_setup(vcpu, init);
 
@@ -438,8 +440,8 @@ unexpected_exception:
 
 void vm_init_descriptor_tables(struct kvm_vm *vm)
 {
-	vm->handlers = vm_vaddr_alloc(vm, sizeof(struct handlers),
-			vm->page_size);
+	vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+					vm->page_size, MEM_REGION_DATA);
 
 	*(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
 }
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index 9f54c098d9d08..51f280c412bad 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -161,7 +161,8 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
 		seg_vend |= vm->page_size - 1;
 		size_t seg_size = seg_vend - seg_vstart + 1;
 
-		vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart);
+		vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
+						    MEM_REGION_CODE);
 		TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
 			"virtual memory for segment at requested min addr,\n"
 			"  segment idx: %u\n"
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f3dfa4e9ee0f5..5ad4acaec8e01 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1226,32 +1226,15 @@ va_found:
 	return pgidx_start * vm->page_size;
 }
 
-/*
- * VM Virtual Address Allocate
- *
- * Input Args:
- *   vm - Virtual Machine
- *   sz - Size in bytes
- *   vaddr_min - Minimum starting virtual address
- *
- * Output Args: None
- *
- * Return:
- *   Starting guest virtual address
- *
- * Allocates at least sz bytes within the virtual address space of the vm
- * given by vm.  The allocated bytes are mapped to a virtual address >=
- * the address given by vaddr_min.  Note that each allocation uses a
- * a unique set of pages, with the minimum real allocation being at least
- * a page.
- */
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+			    enum kvm_mem_region_type type)
 {
 	uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
 
 	virt_pgd_alloc(vm);
 	vm_paddr_t paddr = vm_phy_pages_alloc(vm, pages,
-					      KVM_UTIL_MIN_PFN * vm->page_size, 0);
+					      KVM_UTIL_MIN_PFN * vm->page_size,
+					      vm->memslots[type]);
 
 	/*
 	 * Find an unused range of virtual page addresses of at least
@@ -1272,6 +1255,30 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
 	return vaddr_start;
 }
 
+/*
+ * VM Virtual Address Allocate
+ *
+ * Input Args:
+ *   vm - Virtual Machine
+ *   sz - Size in bytes
+ *   vaddr_min - Minimum starting virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ *   Starting guest virtual address
+ *
+ * Allocates at least sz bytes within the virtual address space of the vm
+ * given by vm.  The allocated bytes are mapped to a virtual address >=
+ * the address given by vaddr_min.  Note that each allocation uses a
+ * a unique set of pages, with the minimum real allocation being at least
+ * a page. The allocated physical space comes from the TEST_DATA memory region.
+ */
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
+{
+	return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
+}
+
 /*
  * VM Virtual Address Allocate Pages
  *
@@ -1291,6 +1298,11 @@ vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
 	return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
 }
 
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
+{
+	return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
+}
+
 /*
  * VM Virtual Address Allocate Page
  *
@@ -1856,7 +1868,8 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
 
 vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
 {
-	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+	return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+				 vm->memslots[MEM_REGION_PT]);
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index ac7fc9d317db3..d146ca71e0c09 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -61,7 +61,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 		return;
 
 	vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
-				     KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+				     KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+				     vm->memslots[MEM_REGION_PT]);
 	vm->pgd_created = true;
 }
 
@@ -288,8 +289,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 
 	stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
 					     vm->page_size;
-	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-				     DEFAULT_RISCV_GUEST_STACK_VADDR_MIN);
+	stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+				       DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
+				       MEM_REGION_DATA);
 
 	vcpu = __vm_vcpu_add(vm, vcpu_id);
 	riscv_vcpu_mmu_setup(vcpu);
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index 89d7340d9cbd8..15945121daf17 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -21,7 +21,8 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 		return;
 
 	paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
-				   KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
+				   KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+				   vm->memslots[MEM_REGION_PT]);
 	memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
 
 	vm->pgd = paddr;
@@ -167,8 +168,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 	TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
 		    vm->page_size);
 
-	stack_vaddr = vm_vaddr_alloc(vm, stack_size,
-				     DEFAULT_GUEST_STACK_VADDR_MIN);
+	stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+				       DEFAULT_GUEST_STACK_VADDR_MIN,
+				       MEM_REGION_DATA);
 
 	vcpu = __vm_vcpu_add(vm, vcpu_id);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 39c4409ef56a6..b199dde90e9ff 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -552,7 +552,7 @@ unmapped_gva:
 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
 {
 	if (!vm->gdt)
-		vm->gdt = vm_vaddr_alloc_page(vm);
+		vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
 
 	dt->base = vm->gdt;
 	dt->limit = getpagesize();
@@ -562,7 +562,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
 				int selector)
 {
 	if (!vm->tss)
-		vm->tss = vm_vaddr_alloc_page(vm);
+		vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
 
 	memset(segp, 0, sizeof(*segp));
 	segp->base = vm->tss;
@@ -647,8 +647,9 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 	vm_vaddr_t stack_vaddr;
 	struct kvm_vcpu *vcpu;
 
-	stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
-				     DEFAULT_GUEST_STACK_VADDR_MIN);
+	stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+				       DEFAULT_GUEST_STACK_VADDR_MIN,
+				       MEM_REGION_DATA);
 
 	vcpu = __vm_vcpu_add(vm, vcpu_id);
 	vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
@@ -1145,8 +1146,8 @@ void vm_init_descriptor_tables(struct kvm_vm *vm)
 	extern void *idt_handlers;
 	int i;
 
-	vm->idt = vm_vaddr_alloc_page(vm);
-	vm->handlers = vm_vaddr_alloc_page(vm);
+	vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+	vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
 	/* Handlers have the same address in both address spaces.*/
 	for (i = 0; i < NUM_INTERRUPTS; i++)
 		set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
-- 
GitLab


From 35c5810157124cb71aaa939cd2d5508192714877 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:30 +0000
Subject: [PATCH 1343/4122] KVM: selftests: aarch64: Add
 aarch64/page_fault_test

Add a new test for stage 2 faults when using different combinations of
guest accesses (e.g., write, S1PTW), backing source type (e.g., anon)
and types of faults (e.g., read on hugetlbfs with a hole). The next
commits will add different handling methods and more faults (e.g., uffd
and dirty logging). This first commit starts by adding two sanity checks
for all types of accesses: AF setting by the hw, and accessing memslots
with holes.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-11-ricarkol@google.com
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/aarch64/page_fault_test.c   | 594 ++++++++++++++++++
 .../selftests/kvm/include/aarch64/processor.h |   8 +
 4 files changed, 604 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/aarch64/page_fault_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 2f0d705db9dba..4a30d684e208c 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -4,6 +4,7 @@
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/hypercalls
+/aarch64/page_fault_test
 /aarch64/psci_test
 /aarch64/vcpu_width_config
 /aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 08a2606aff331..50c30335460f8 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -153,6 +153,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
+TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
 TEST_GEN_PROGS_aarch64 += aarch64/psci_test
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
new file mode 100644
index 0000000000000..28859a96053f6
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -0,0 +1,594 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+
+#define _GNU_SOURCE
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA				0xc0000000
+#define TEST_EXEC_GVA				(TEST_GVA + 0x8)
+#define TEST_PTE_GVA				0xb0000000
+#define TEST_DATA				0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE				(0)
+#define CMD_SKIP_TEST				(1ULL << 1)
+#define CMD_HOLE_PT				(1ULL << 2)
+#define CMD_HOLE_DATA				(1ULL << 3)
+
+#define PREPARE_FN_NR				10
+#define CHECK_FN_NR				10
+
+struct test_desc {
+	const char *name;
+	uint64_t mem_mark_cmd;
+	/* Skip the test if any prepare function returns false */
+	bool (*guest_prepare[PREPARE_FN_NR])(void);
+	void (*guest_test)(void);
+	void (*guest_test_check[CHECK_FN_NR])(void);
+	void (*dabt_handler)(struct ex_regs *regs);
+	void (*iabt_handler)(struct ex_regs *regs);
+	uint32_t pt_memslot_flags;
+	uint32_t data_memslot_flags;
+	bool skip;
+};
+
+struct test_params {
+	enum vm_mem_backing_src_type src_type;
+	struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+	uint64_t page = vaddr >> 12;
+
+	dsb(ishst);
+	asm volatile("tlbi vaae1is, %0" :: "r" (page));
+	dsb(ish);
+	isb();
+}
+
+static void guest_write64(void)
+{
+	uint64_t val;
+
+	WRITE_ONCE(*guest_test_memory, TEST_DATA);
+	val = READ_ONCE(*guest_test_memory);
+	GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+	uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+	uint64_t atomic;
+
+	atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS), isar0);
+	return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+	uint64_t dczid = read_sysreg(dczid_el0);
+	uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_DZP), dczid);
+
+	return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+	uint64_t val;
+
+	GUEST_ASSERT(guest_check_lse());
+	asm volatile(".arch_extension lse\n"
+		     "casal %0, %1, [%2]\n"
+		     :: "r" (0), "r" (TEST_DATA), "r" (guest_test_memory));
+	val = READ_ONCE(*guest_test_memory);
+	GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+	uint64_t val;
+
+	val = READ_ONCE(*guest_test_memory);
+	GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+	uint64_t par;
+
+	asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+	par = read_sysreg(par_el1);
+	isb();
+
+	/* Bit 1 indicates whether the AT was successful */
+	GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+	uint16_t val;
+
+	asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+	dsb(ish);
+	val = READ_ONCE(*guest_test_memory);
+	GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+	uint64_t val;
+	uint64_t addr = TEST_GVA - 8;
+
+	/*
+	 * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+	 * in a gap between memslots not backing by anything.
+	 */
+	asm volatile("ldr %0, [%1, #8]!"
+		     : "=r" (val), "+r" (addr));
+	GUEST_ASSERT_EQ(val, 0);
+	GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+	uint64_t val = TEST_DATA;
+	uint64_t addr = TEST_GVA - 8;
+
+	asm volatile("str %0, [%1, #8]!"
+		     : "+r" (val), "+r" (addr));
+
+	GUEST_ASSERT_EQ(addr, TEST_GVA);
+	val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+	uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+	uint64_t hadbs, tcr;
+
+	/* Skip if HA is not supported. */
+	hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS), mmfr1);
+	if (hadbs == 0)
+		return false;
+
+	tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+	write_sysreg(tcr, tcr_el1);
+	isb();
+
+	return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+	*((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+	flush_tlb_page(TEST_GVA);
+
+	return true;
+}
+
+static void guest_check_pte_af(void)
+{
+	dsb(ish);
+	GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_exec(void)
+{
+	int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+	int ret;
+
+	ret = code();
+	GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+	bool (*prepare_fn)(void);
+	int i;
+
+	for (i = 0; i < PREPARE_FN_NR; i++) {
+		prepare_fn = test->guest_prepare[i];
+		if (prepare_fn && !prepare_fn())
+			return false;
+	}
+
+	return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+	void (*check_fn)(void);
+	int i;
+
+	for (i = 0; i < CHECK_FN_NR; i++) {
+		check_fn = test->guest_test_check[i];
+		if (check_fn)
+			check_fn();
+	}
+}
+
+static void guest_code(struct test_desc *test)
+{
+	if (!guest_prepare(test))
+		GUEST_SYNC(CMD_SKIP_TEST);
+
+	GUEST_SYNC(test->mem_mark_cmd);
+
+	if (test->guest_test)
+		test->guest_test();
+
+	guest_test_check(test);
+	GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT_1(false, read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+	GUEST_ASSERT_1(false, regs->pc);
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+					struct userspace_mem_region *region)
+{
+	void *hva = (void *)region->region.userspace_addr;
+	uint64_t paging_size = region->region.memory_size;
+	int ret, fd = region->fd;
+
+	if (fd != -1) {
+		ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+				0, paging_size);
+		TEST_ASSERT(ret == 0, "fallocate failed\n");
+	} else {
+		ret = madvise(hva, paging_size, MADV_DONTNEED);
+		TEST_ASSERT(ret == 0, "madvise failed\n");
+	}
+
+	return true;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+	struct userspace_mem_region *data_region, *pt_region;
+	bool continue_test = true;
+
+	data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+	pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+
+	if (cmd == CMD_SKIP_TEST)
+		continue_test = false;
+
+	if (cmd & CMD_HOLE_PT)
+		continue_test = punch_hole_in_backing_store(vm, pt_region);
+	if (cmd & CMD_HOLE_DATA)
+		continue_test = punch_hole_in_backing_store(vm, data_region);
+
+	return continue_test;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+	asm volatile("__exec_test: mov x0, #0x77\n"
+		     "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+	uint64_t *code;
+	struct userspace_mem_region *region;
+	void *hva;
+
+	region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+	hva = (void *)region->region.userspace_addr;
+
+	assert(TEST_EXEC_GVA > TEST_GVA);
+	code = hva + TEST_EXEC_GVA - TEST_GVA;
+	memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+				 struct test_desc *test)
+{
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
+
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_DABT, no_dabt_handler);
+	vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+				ESR_EC_IABT, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+	struct userspace_mem_region *region;
+	uint64_t pte_gpa;
+
+	region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+	/* Map TEST_GVA first. This will install a new PTE. */
+	virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+	/* Then map TEST_PTE_GVA to the above PTE. */
+	pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+	virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+	CODE_AND_DATA_MEMSLOT,
+	PAGE_TABLE_MEMSLOT,
+	TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+	uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+	uint64_t guest_page_size = vm->page_size;
+	uint64_t max_gfn = vm_compute_max_gfn(vm);
+	/* Enough for 2M of code when using 4K guest pages. */
+	uint64_t code_npages = 512;
+	uint64_t pt_size, data_size, data_gpa;
+
+	/*
+	 * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+	 * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs.  That's 13
+	 * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+	 * twice that just in case.
+	 */
+	pt_size = 26 * guest_page_size;
+
+	/* memslot sizes and gpa's must be aligned to the backing page size */
+	pt_size = align_up(pt_size, backing_src_pagesz);
+	data_size = align_up(guest_page_size, backing_src_pagesz);
+	data_gpa = (max_gfn * guest_page_size) - data_size;
+	data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+				    CODE_AND_DATA_MEMSLOT, code_npages, 0);
+	vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+	vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+	vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+				    PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+				    p->test_desc->pt_memslot_flags);
+	vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+	vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+				    data_size / guest_page_size,
+				    p->test_desc->data_memslot_flags);
+	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+	struct test_desc *test = p->test_desc;
+
+	pr_debug("Test: %s\n", test->name);
+	pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+	pr_debug("Testing memory backing src type: %s\n",
+		 vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+			  struct test_desc *test)
+{
+	struct ucall uc;
+
+	for (;;) {
+		vcpu_run(vcpu);
+
+		switch (get_ucall(vcpu, &uc)) {
+		case UCALL_SYNC:
+			if (!handle_cmd(vm, uc.args[1])) {
+				test->skip = true;
+				goto done;
+			}
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT_2(uc, "values: %#lx, %#lx");
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+	}
+
+done:
+	pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+	struct test_params *p = (struct test_params *)arg;
+	struct test_desc *test = p->test_desc;
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu;
+
+	print_test_banner(mode, p);
+
+	vm = ____vm_create(mode);
+	setup_memslots(vm, p);
+	kvm_vm_elf_load(vm, program_invocation_name);
+	vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+	setup_gva_maps(vm);
+
+	ucall_init(vm, NULL);
+
+	load_exec_code_for_test(vm);
+	setup_abort_handlers(vm, vcpu, test);
+	vcpu_args_set(vcpu, 1, test);
+
+	vcpu_run_loop(vm, vcpu, test);
+
+	ucall_uninit(vm);
+	kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+	puts("");
+	printf("usage: %s [-h] [-s mem-type]\n", name);
+	puts("");
+	guest_modes_help();
+	backing_src_help("-s");
+	puts("");
+}
+
+#define SNAME(s)			#s
+#define SCAT2(a, b)			SNAME(a ## _ ## b)
+#define SCAT3(a, b, c)			SCAT2(a, SCAT2(b, c))
+
+#define _CHECK(_test)			_CHECK_##_test
+#define _PREPARE(_test)			_PREPARE_##_test
+#define _PREPARE_guest_read64		NULL
+#define _PREPARE_guest_ld_preidx	NULL
+#define _PREPARE_guest_write64		NULL
+#define _PREPARE_guest_st_preidx	NULL
+#define _PREPARE_guest_exec		NULL
+#define _PREPARE_guest_at		NULL
+#define _PREPARE_guest_dc_zva		guest_check_dc_zva
+#define _PREPARE_guest_cas		guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af		guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af			NULL
+#define _CHECK_with_af			guest_check_pte_af
+#define _CHECK_no_af			NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd)				\
+{										\
+	.name			= SCAT3(_access, _with_af, #_mark_cmd),		\
+	.guest_prepare		= { _PREPARE(_with_af),				\
+				    _PREPARE(_access) },			\
+	.mem_mark_cmd		= _mark_cmd,					\
+	.guest_test		= _access,					\
+	.guest_test_check	= { _CHECK(_with_af) },				\
+}
+
+static struct test_desc tests[] = {
+
+	/* Check that HW is setting the Access Flag (AF) (sanity checks). */
+	TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+	TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+	TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+	TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+	TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+	TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+	TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+	/*
+	 * Punch a hole in the data backing store, and then try multiple
+	 * accesses: reads should rturn zeroes, and writes should
+	 * re-populate the page. Moreover, the test also check that no
+	 * exception was generated in the guest.  Note that this
+	 * reading/writing behavior is the same as reading/writing a
+	 * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+	 * userspace.
+	 */
+	TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+	TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+	{ 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+	struct test_desc *t;
+
+	for (t = &tests[0]; t->name; t++) {
+		if (t->skip)
+			continue;
+
+		struct test_params p = {
+			.src_type = src_type,
+			.test_desc = t,
+		};
+
+		for_each_guest_mode(run_test, &p);
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	enum vm_mem_backing_src_type src_type;
+	int opt;
+
+	setbuf(stdout, NULL);
+
+	src_type = DEFAULT_VM_MEM_SRC;
+
+	while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+		switch (opt) {
+		case 'm':
+			guest_modes_cmdline(optarg);
+			break;
+		case 's':
+			src_type = parse_backing_src_type(optarg);
+			break;
+		case 'h':
+		default:
+			help(argv[0]);
+			exit(0);
+		}
+	}
+
+	for_each_test_and_guest_mode(src_type);
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index c1ddca8db225a..5f977528e09c0 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -105,11 +105,19 @@ enum {
 #define ESR_EC_MASK		(ESR_EC_NUM - 1)
 
 #define ESR_EC_SVC64		0x15
+#define ESR_EC_IABT		0x21
+#define ESR_EC_DABT		0x25
 #define ESR_EC_HW_BP_CURRENT	0x31
 #define ESR_EC_SSTEP_CURRENT	0x33
 #define ESR_EC_WP_CURRENT	0x35
 #define ESR_EC_BRK_INS		0x3c
 
+/* Access flag */
+#define PTE_AF			(1ULL << 10)
+
+/* Access flag update enable/disable */
+#define TCR_EL1_HA		(1ULL << 39)
+
 void aarch64_get_supported_page_sizes(uint32_t ipa,
 				      bool *ps4k, bool *ps16k, bool *ps64k);
 
-- 
GitLab


From 3b1d915659c64dce079f4926a648f2271faea008 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:31 +0000
Subject: [PATCH 1344/4122] KVM: selftests: aarch64: Add userfaultfd tests into
 page_fault_test

Add some userfaultfd tests into page_fault_test. Punch holes into the
data and/or page-table memslots, perform some accesses, and check that
the faults are taken (or not taken) when expected.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-12-ricarkol@google.com
---
 .../selftests/kvm/aarch64/page_fault_test.c   | 187 ++++++++++++++++++
 1 file changed, 187 insertions(+)

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 28859a96053f6..8ecc2ac8c476c 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -35,6 +35,12 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
 #define PREPARE_FN_NR				10
 #define CHECK_FN_NR				10
 
+static struct event_cnt {
+	int uffd_faults;
+	/* uffd_faults is incremented from multiple threads. */
+	pthread_mutex_t uffd_faults_mutex;
+} events;
+
 struct test_desc {
 	const char *name;
 	uint64_t mem_mark_cmd;
@@ -42,11 +48,14 @@ struct test_desc {
 	bool (*guest_prepare[PREPARE_FN_NR])(void);
 	void (*guest_test)(void);
 	void (*guest_test_check[CHECK_FN_NR])(void);
+	uffd_handler_t uffd_pt_handler;
+	uffd_handler_t uffd_data_handler;
 	void (*dabt_handler)(struct ex_regs *regs);
 	void (*iabt_handler)(struct ex_regs *regs);
 	uint32_t pt_memslot_flags;
 	uint32_t data_memslot_flags;
 	bool skip;
+	struct event_cnt expected_events;
 };
 
 struct test_params {
@@ -263,7 +272,110 @@ static void no_iabt_handler(struct ex_regs *regs)
 	GUEST_ASSERT_1(false, regs->pc);
 }
 
+static struct uffd_args {
+	char *copy;
+	void *hva;
+	uint64_t paging_size;
+} pt_args, data_args;
+
 /* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+				struct uffd_args *args, bool expect_write)
+{
+	uint64_t addr = msg->arg.pagefault.address;
+	uint64_t flags = msg->arg.pagefault.flags;
+	struct uffdio_copy copy;
+	int ret;
+
+	TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+		    "The only expected UFFD mode is MISSING");
+	ASSERT_EQ(!!(flags & UFFD_PAGEFAULT_FLAG_WRITE), expect_write);
+	ASSERT_EQ(addr, (uint64_t)args->hva);
+
+	pr_debug("uffd fault: addr=%p write=%d\n",
+		 (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+	copy.src = (uint64_t)args->copy;
+	copy.dst = addr;
+	copy.len = args->paging_size;
+	copy.mode = 0;
+
+	ret = ioctl(uffd, UFFDIO_COPY, &copy);
+	if (ret == -1) {
+		pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+			addr, errno);
+		return ret;
+	}
+
+	pthread_mutex_lock(&events.uffd_faults_mutex);
+	events.uffd_faults += 1;
+	pthread_mutex_unlock(&events.uffd_faults_mutex);
+	return 0;
+}
+
+static int uffd_pt_write_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+	return uffd_generic_handler(mode, uffd, msg, &pt_args, true);
+}
+
+static int uffd_data_write_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+	return uffd_generic_handler(mode, uffd, msg, &data_args, true);
+}
+
+static int uffd_data_read_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+	return uffd_generic_handler(mode, uffd, msg, &data_args, false);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+			    struct uffd_args *args)
+{
+	args->hva = (void *)region->region.userspace_addr;
+	args->paging_size = region->region.memory_size;
+
+	args->copy = malloc(args->paging_size);
+	TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+	memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+		       struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+	struct test_desc *test = p->test_desc;
+	int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+	setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+	setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+	*pt_uffd = NULL;
+	if (test->uffd_pt_handler)
+		*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+						    pt_args.hva,
+						    pt_args.paging_size,
+						    test->uffd_pt_handler);
+
+	*data_uffd = NULL;
+	if (test->uffd_data_handler)
+		*data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+						      data_args.hva,
+						      data_args.paging_size,
+						      test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+		      struct uffd_desc *data_uffd)
+{
+	if (test->uffd_pt_handler)
+		uffd_stop_demand_paging(pt_uffd);
+	if (test->uffd_data_handler)
+		uffd_stop_demand_paging(data_uffd);
+
+	free(pt_args.copy);
+	free(data_args.copy);
+}
+
+/* Returns false if the test should be skipped. */
 static bool punch_hole_in_backing_store(struct kvm_vm *vm,
 					struct userspace_mem_region *region)
 {
@@ -404,6 +516,11 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
 	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
 }
 
+static void check_event_counts(struct test_desc *test)
+{
+	ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+}
+
 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
 {
 	struct test_desc *test = p->test_desc;
@@ -414,6 +531,11 @@ static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
 		 vm_mem_backing_src_alias(p->src_type)->name);
 }
 
+static void reset_event_counts(void)
+{
+	memset(&events, 0, sizeof(events));
+}
+
 /*
  * This function either succeeds, skips the test (after setting test->skip), or
  * fails with a TEST_FAIL that aborts all tests.
@@ -453,6 +575,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct test_desc *test = p->test_desc;
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu;
+	struct uffd_desc *pt_uffd, *data_uffd;
 
 	print_test_banner(mode, p);
 
@@ -465,7 +588,16 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	ucall_init(vm, NULL);
 
+	reset_event_counts();
+
+	/*
+	 * Set some code in the data memslot for the guest to execute (only
+	 * applicable to the EXEC tests). This has to be done before
+	 * setup_uffd() as that function copies the memslot data for the uffd
+	 * handler.
+	 */
 	load_exec_code_for_test(vm);
+	setup_uffd(vm, p, &pt_uffd, &data_uffd);
 	setup_abort_handlers(vm, vcpu, test);
 	vcpu_args_set(vcpu, 1, test);
 
@@ -473,6 +605,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	ucall_uninit(vm);
 	kvm_vm_free(vm);
+	free_uffd(test, pt_uffd, data_uffd);
+
+	/*
+	 * Make sure we check the events after the uffd threads have exited,
+	 * which means they updated their respective event counters.
+	 */
+	if (!test->skip)
+		check_event_counts(test);
 }
 
 static void help(char *name)
@@ -488,6 +628,7 @@ static void help(char *name)
 #define SNAME(s)			#s
 #define SCAT2(a, b)			SNAME(a ## _ ## b)
 #define SCAT3(a, b, c)			SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d)		SCAT2(a, SCAT3(b, c, d))
 
 #define _CHECK(_test)			_CHECK_##_test
 #define _PREPARE(_test)			_PREPARE_##_test
@@ -515,6 +656,21 @@ static void help(char *name)
 	.mem_mark_cmd		= _mark_cmd,					\
 	.guest_test		= _access,					\
 	.guest_test_check	= { _CHECK(_with_af) },				\
+	.expected_events	= { 0 },					\
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd,					\
+		  _uffd_data_handler, _uffd_pt_handler, _uffd_faults)		\
+{										\
+	.name			= SCAT4(uffd, _access, _with_af, #_mark_cmd),	\
+	.guest_prepare		= { _PREPARE(_with_af),				\
+				    _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.mem_mark_cmd		= _mark_cmd,					\
+	.guest_test_check	= { _CHECK(_with_af) },				\
+	.uffd_data_handler	= _uffd_data_handler,				\
+	.uffd_pt_handler	= _uffd_pt_handler,				\
+	.expected_events	= { .uffd_faults = _uffd_faults, },		\
 }
 
 static struct test_desc tests[] = {
@@ -545,6 +701,37 @@ static struct test_desc tests[] = {
 	TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
 	TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
 
+	/*
+	 * Punch holes in the data and PT backing stores and mark them for
+	 * userfaultfd handling. This should result in 2 faults: the access
+	 * on the data backing store, and its respective S1 page table walk
+	 * (S1PTW).
+	 */
+	TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 2),
+	/* no_af should also lead to a PT write. */
+	TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 2),
+	/* Note how that cas invokes the read handler. */
+	TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 2),
+	/*
+	 * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+	 * The S1PTW fault should still be marked as a write.
+	 */
+	TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 1),
+	TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 2),
+	TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_write_handler, uffd_pt_write_handler, 2),
+	TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_write_handler, uffd_pt_write_handler, 2),
+	TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_write_handler, uffd_pt_write_handler, 2),
+	TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+		  uffd_data_read_handler, uffd_pt_write_handler, 2),
+
 	{ 0 }
 };
 
-- 
GitLab


From a4edf25b3e25656c69cbc768d1c704868e4a616f Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:32 +0000
Subject: [PATCH 1345/4122] KVM: selftests: aarch64: Add dirty logging tests
 into page_fault_test

Add some dirty logging tests into page_fault_test. Mark the data and/or
page-table memory regions for dirty logging, perform some accesses, and
check that the dirty log bits are set or clean when expected.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-13-ricarkol@google.com
---
 .../selftests/kvm/aarch64/page_fault_test.c   | 76 +++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 8ecc2ac8c476c..a36001143aff5 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -31,6 +31,11 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
 #define CMD_SKIP_TEST				(1ULL << 1)
 #define CMD_HOLE_PT				(1ULL << 2)
 #define CMD_HOLE_DATA				(1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG		(1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG		(1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG		(1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG	(1ULL << 7)
+#define CMD_SET_PTE_AF				(1ULL << 8)
 
 #define PREPARE_FN_NR				10
 #define CHECK_FN_NR				10
@@ -213,6 +218,21 @@ static void guest_check_pte_af(void)
 	GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
 }
 
+static void guest_check_write_in_dirty_log(void)
+{
+	GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+	GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+	GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
 static void guest_exec(void)
 {
 	int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
@@ -395,6 +415,22 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm,
 	return true;
 }
 
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+				     struct userspace_mem_region *region,
+				     uint64_t host_pg_nr)
+{
+	unsigned long *bmap;
+	bool first_page_dirty;
+	uint64_t size = region->region.memory_size;
+
+	/* getpage_size() is not always equal to vm->page_size */
+	bmap = bitmap_zalloc(size / getpagesize());
+	kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+	first_page_dirty = test_bit(host_pg_nr, bmap);
+	free(bmap);
+	return first_page_dirty;
+}
+
 /* Returns true to continue the test, and false if it should be skipped. */
 static bool handle_cmd(struct kvm_vm *vm, int cmd)
 {
@@ -411,6 +447,18 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
 		continue_test = punch_hole_in_backing_store(vm, pt_region);
 	if (cmd & CMD_HOLE_DATA)
 		continue_test = punch_hole_in_backing_store(vm, data_region);
+	if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+		TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+			    "Missing write in dirty log");
+	if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+		TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, 0),
+			    "Missing s1ptw write in dirty log");
+	if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+		TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+			    "Unexpected write in dirty log");
+	if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+		TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, 0),
+			    "Unexpected s1ptw write in dirty log");
 
 	return continue_test;
 }
@@ -673,6 +721,19 @@ static void help(char *name)
 	.expected_events	= { .uffd_faults = _uffd_faults, },		\
 }
 
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check)				\
+{										\
+	.name			= SCAT3(dirty_log, _access, _with_af),		\
+	.data_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.pt_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.guest_prepare		= { _PREPARE(_with_af),				\
+				    _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.guest_test_check	= { _CHECK(_with_af), _test_check,		\
+				    guest_check_s1ptw_wr_in_dirty_log},		\
+	.expected_events	= { 0 },					\
+}
+
 static struct test_desc tests[] = {
 
 	/* Check that HW is setting the Access Flag (AF) (sanity checks). */
@@ -732,6 +793,21 @@ static struct test_desc tests[] = {
 	TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
 		  uffd_data_read_handler, uffd_pt_write_handler, 2),
 
+	/*
+	 * Try accesses when the data and PT memory regions are both
+	 * tracked for dirty logging.
+	 */
+	TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log),
+	/* no_af should also lead to a PT write. */
+	TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_ld_preidx, with_af, guest_check_no_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log),
+	TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log),
+
 	{ 0 }
 };
 
-- 
GitLab


From 45acde40f538a30e759f3b3f4aa5089edf097b2f Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:33 +0000
Subject: [PATCH 1346/4122] KVM: selftests: aarch64: Add readonly memslot tests
 into page_fault_test

Add some readonly memslot tests into page_fault_test. Mark the data and/or
page-table memory regions as readonly, perform some accesses, and check
that the right fault is triggered when expected (e.g., a store with no
write-back should lead to an mmio exit).

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-14-ricarkol@google.com
---
 .../selftests/kvm/aarch64/page_fault_test.c   | 102 +++++++++++++++++-
 1 file changed, 101 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index a36001143aff5..727f4f2b6cc44 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -41,6 +41,8 @@ static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
 #define CHECK_FN_NR				10
 
 static struct event_cnt {
+	int mmio_exits;
+	int fail_vcpu_runs;
 	int uffd_faults;
 	/* uffd_faults is incremented from multiple threads. */
 	pthread_mutex_t uffd_faults_mutex;
@@ -57,6 +59,8 @@ struct test_desc {
 	uffd_handler_t uffd_data_handler;
 	void (*dabt_handler)(struct ex_regs *regs);
 	void (*iabt_handler)(struct ex_regs *regs);
+	void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+	void (*fail_vcpu_run_handler)(int ret);
 	uint32_t pt_memslot_flags;
 	uint32_t data_memslot_flags;
 	bool skip;
@@ -415,6 +419,31 @@ static bool punch_hole_in_backing_store(struct kvm_vm *vm,
 	return true;
 }
 
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+	struct userspace_mem_region *region;
+	void *hva;
+
+	region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+	hva = (void *)region->region.userspace_addr;
+
+	ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+	memcpy(hva, run->mmio.data, run->mmio.len);
+	events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+	uint64_t data;
+
+	memcpy(&data, run->mmio.data, sizeof(data));
+	pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+		 run->mmio.phys_addr, run->mmio.len,
+		 run->mmio.is_write, data);
+	TEST_FAIL("There was no MMIO exit expected.");
+}
+
 static bool check_write_in_dirty_log(struct kvm_vm *vm,
 				     struct userspace_mem_region *region,
 				     uint64_t host_pg_nr)
@@ -463,6 +492,18 @@ static bool handle_cmd(struct kvm_vm *vm, int cmd)
 	return continue_test;
 }
 
+void fail_vcpu_run_no_handler(int ret)
+{
+	TEST_FAIL("Unexpected vcpu run failure\n");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+	TEST_ASSERT(errno == ENOSYS,
+		    "The mmio handler should have returned not implemented.");
+	events.fail_vcpu_runs += 1;
+}
+
 typedef uint32_t aarch64_insn_t;
 extern aarch64_insn_t __exec_test[2];
 
@@ -564,9 +605,20 @@ static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
 	vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
 }
 
+static void setup_default_handlers(struct test_desc *test)
+{
+	if (!test->mmio_handler)
+		test->mmio_handler = mmio_no_handler;
+
+	if (!test->fail_vcpu_run_handler)
+		test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
 static void check_event_counts(struct test_desc *test)
 {
 	ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+	ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+	ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
 }
 
 static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
@@ -591,10 +643,18 @@ static void reset_event_counts(void)
 static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
 			  struct test_desc *test)
 {
+	struct kvm_run *run;
 	struct ucall uc;
+	int ret;
+
+	run = vcpu->run;
 
 	for (;;) {
-		vcpu_run(vcpu);
+		ret = _vcpu_run(vcpu);
+		if (ret) {
+			test->fail_vcpu_run_handler(ret);
+			goto done;
+		}
 
 		switch (get_ucall(vcpu, &uc)) {
 		case UCALL_SYNC:
@@ -608,6 +668,10 @@ static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
 			break;
 		case UCALL_DONE:
 			goto done;
+		case UCALL_NONE:
+			if (run->exit_reason == KVM_EXIT_MMIO)
+				test->mmio_handler(vm, run);
+			break;
 		default:
 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
 		}
@@ -647,6 +711,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	load_exec_code_for_test(vm);
 	setup_uffd(vm, p, &pt_uffd, &data_uffd);
 	setup_abort_handlers(vm, vcpu, test);
+	setup_default_handlers(test);
 	vcpu_args_set(vcpu, 1, test);
 
 	vcpu_run_loop(vm, vcpu, test);
@@ -734,6 +799,25 @@ static void help(char *name)
 	.expected_events	= { 0 },					\
 }
 
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)			\
+{										\
+	.name			= SCAT3(ro_memslot, _access, _with_af),		\
+	.data_memslot_flags	= KVM_MEM_READONLY,				\
+	.guest_prepare		= { _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.mmio_handler		= _mmio_handler,				\
+	.expected_events	= { .mmio_exits = _mmio_exits },		\
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access)					\
+{										\
+	.name			= SCAT2(ro_memslot_no_syndrome, _access),	\
+	.data_memslot_flags	= KVM_MEM_READONLY,				\
+	.guest_test		= _access,					\
+	.fail_vcpu_run_handler	= fail_vcpu_run_mmio_no_syndrome_handler,	\
+	.expected_events	= { .fail_vcpu_runs = 1 },			\
+}
+
 static struct test_desc tests[] = {
 
 	/* Check that HW is setting the Access Flag (AF) (sanity checks). */
@@ -808,6 +892,22 @@ static struct test_desc tests[] = {
 	TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log),
 	TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log),
 
+	/*
+	 * Try accesses when the data memory region is marked read-only
+	 * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+	 * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+	 * failed vcpu run, and reads/execs with and without syndroms do
+	 * not fault.
+	 */
+	TEST_RO_MEMSLOT(guest_read64, 0, 0),
+	TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+	TEST_RO_MEMSLOT(guest_at, 0, 0),
+	TEST_RO_MEMSLOT(guest_exec, 0, 0),
+	TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+	TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+	TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+	TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
 	{ 0 }
 };
 
-- 
GitLab


From ff2b5509e1d252cd18bb1430b5461d5044701559 Mon Sep 17 00:00:00 2001
From: Ricardo Koller <ricarkol@google.com>
Date: Mon, 17 Oct 2022 19:58:34 +0000
Subject: [PATCH 1347/4122] KVM: selftests: aarch64: Add mix of tests into
 page_fault_test

Add some mix of tests into page_fault_test: memory regions with all the
pairwise combinations of read-only, userfaultfd, and dirty-logging.  For
example, writing into a read-only region which has a hole handled with
userfaultfd.

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221017195834.2295901-15-ricarkol@google.com
---
 .../selftests/kvm/aarch64/page_fault_test.c   | 155 ++++++++++++++++++
 1 file changed, 155 insertions(+)

diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 727f4f2b6cc44..05bb6a6369c25 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -399,6 +399,12 @@ static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
 	free(data_args.copy);
 }
 
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+	TEST_FAIL("There was no UFFD fault expected.");
+	return -1;
+}
+
 /* Returns false if the test should be skipped. */
 static bool punch_hole_in_backing_store(struct kvm_vm *vm,
 					struct userspace_mem_region *region)
@@ -799,6 +805,22 @@ static void help(char *name)
 	.expected_events	= { 0 },					\
 }
 
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler,		\
+				_uffd_faults, _test_check)			\
+{										\
+	.name			= SCAT3(uffd_and_dirty_log, _access, _with_af),	\
+	.data_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.pt_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.guest_prepare		= { _PREPARE(_with_af),				\
+				    _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.mem_mark_cmd		= CMD_HOLE_DATA | CMD_HOLE_PT,			\
+	.guest_test_check	= { _CHECK(_with_af), _test_check },		\
+	.uffd_data_handler	= _uffd_data_handler,				\
+	.uffd_pt_handler	= uffd_pt_write_handler,			\
+	.expected_events	= { .uffd_faults = _uffd_faults, },		\
+}
+
 #define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits)			\
 {										\
 	.name			= SCAT3(ro_memslot, _access, _with_af),		\
@@ -818,6 +840,59 @@ static void help(char *name)
 	.expected_events	= { .fail_vcpu_runs = 1 },			\
 }
 
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits,	\
+				      _test_check)				\
+{										\
+	.name			= SCAT3(ro_memslot, _access, _with_af),		\
+	.data_memslot_flags	= KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,	\
+	.pt_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.guest_prepare		= { _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.guest_test_check	= { _test_check },				\
+	.mmio_handler		= _mmio_handler,				\
+	.expected_events	= { .mmio_exits = _mmio_exits},			\
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check)		\
+{										\
+	.name			= SCAT2(ro_memslot_no_syn_and_dlog, _access),	\
+	.data_memslot_flags	= KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES,	\
+	.pt_memslot_flags	= KVM_MEM_LOG_DIRTY_PAGES,			\
+	.guest_test		= _access,					\
+	.guest_test_check	= { _test_check },				\
+	.fail_vcpu_run_handler	= fail_vcpu_run_mmio_no_syndrome_handler,	\
+	.expected_events	= { .fail_vcpu_runs = 1 },			\
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits,		\
+				 _uffd_data_handler, _uffd_faults)		\
+{										\
+	.name			= SCAT2(ro_memslot_uffd, _access),		\
+	.data_memslot_flags	= KVM_MEM_READONLY,				\
+	.mem_mark_cmd		= CMD_HOLE_DATA | CMD_HOLE_PT,			\
+	.guest_prepare		= { _PREPARE(_access) },			\
+	.guest_test		= _access,					\
+	.uffd_data_handler	= _uffd_data_handler,				\
+	.uffd_pt_handler	= uffd_pt_write_handler,			\
+	.mmio_handler		= _mmio_handler,				\
+	.expected_events	= { .mmio_exits = _mmio_exits,			\
+				    .uffd_faults = _uffd_faults },		\
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler,	\
+					     _uffd_faults)			\
+{										\
+	.name			= SCAT2(ro_memslot_no_syndrome, _access),	\
+	.data_memslot_flags	= KVM_MEM_READONLY,				\
+	.mem_mark_cmd		= CMD_HOLE_DATA | CMD_HOLE_PT,			\
+	.guest_test		= _access,					\
+	.uffd_data_handler	= _uffd_data_handler,				\
+	.uffd_pt_handler	= uffd_pt_write_handler,			\
+	.fail_vcpu_run_handler	= fail_vcpu_run_mmio_no_syndrome_handler,	\
+	.expected_events	= { .fail_vcpu_runs = 1,			\
+				    .uffd_faults = _uffd_faults },		\
+}
+
 static struct test_desc tests[] = {
 
 	/* Check that HW is setting the Access Flag (AF) (sanity checks). */
@@ -892,6 +967,35 @@ static struct test_desc tests[] = {
 	TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log),
 	TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log),
 
+	/*
+	 * Access when the data and PT memory regions are both marked for
+	 * dirty logging and UFFD at the same time. The expected result is
+	 * that writes should mark the dirty log and trigger a userfaultfd
+	 * write fault.  Reads/execs should result in a read userfaultfd
+	 * fault, and nothing in the dirty log.  Any S1PTW should result in
+	 * a write in the dirty log and a userfaultfd write.
+	 */
+	TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af, uffd_data_read_handler, 2,
+				guest_check_no_write_in_dirty_log),
+	/* no_af should also lead to a PT write. */
+	TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af, uffd_data_read_handler, 2,
+				guest_check_no_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af, uffd_data_read_handler,
+				2, guest_check_no_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, 0, 1,
+				guest_check_no_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af, uffd_data_read_handler, 2,
+				guest_check_no_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af, uffd_data_write_handler,
+				2, guest_check_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af, uffd_data_read_handler, 2,
+				guest_check_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af, uffd_data_write_handler,
+				2, guest_check_write_in_dirty_log),
+	TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+				uffd_data_write_handler, 2,
+				guest_check_write_in_dirty_log),
+
 	/*
 	 * Try accesses when the data memory region is marked read-only
 	 * (with KVM_MEM_READONLY). Writes with a syndrome result in an
@@ -908,6 +1012,57 @@ static struct test_desc tests[] = {
 	TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
 	TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
 
+	/*
+	 * Access when both the data region is both read-only and marked
+	 * for dirty logging at the same time. The expected result is that
+	 * for writes there should be no write in the dirty log. The
+	 * readonly handling is the same as if the memslot was not marked
+	 * for dirty logging: writes with a syndrome result in an MMIO
+	 * exit, and writes with no syndrome result in a failed vcpu run.
+	 */
+	TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+				      guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+				      guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+				      guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+				      guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+				      1, guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+						  guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+						  guest_check_no_write_in_dirty_log),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+						  guest_check_no_write_in_dirty_log),
+
+	/*
+	 * Access when the data region is both read-only and punched with
+	 * holes tracked with userfaultfd.  The expected result is the
+	 * union of both userfaultfd and read-only behaviors. For example,
+	 * write accesses result in a userfaultfd write fault and an MMIO
+	 * exit.  Writes with no syndrome result in a failed vcpu run and
+	 * no userfaultfd write fault. Reads result in userfaultfd getting
+	 * triggered.
+	 */
+	TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0,
+				 uffd_data_read_handler, 2),
+	TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0,
+				 uffd_data_read_handler, 2),
+	TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0,
+				 uffd_no_handler, 1),
+	TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0,
+				 uffd_data_read_handler, 2),
+	TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+				 uffd_data_write_handler, 2),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas,
+					     uffd_data_read_handler, 2),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva,
+					     uffd_no_handler, 1),
+	TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx,
+					     uffd_no_handler, 1),
+
 	{ 0 }
 };
 
-- 
GitLab


From 579d7ebe90a332cc5b6c02db9250fd0816a64f63 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Thu, 3 Nov 2022 15:05:06 +0000
Subject: [PATCH 1348/4122] KVM: arm64: Fix kvm init failure when mode!=vhe and
 VA_BITS=52.

For nvhe and protected modes, the hyp stage 1 page-tables were previously
configured to have the same number of VA bits as the kernel's idmap.
However, for kernel configs with VA_BITS=52 and where the kernel is
loaded in physical memory below 48 bits, the idmap VA bits is actually
smaller than the kernel's normal stage 1 VA bits. This can lead to
kernel addresses that can't be mapped into the hypervisor, leading to
kvm initialization failure during boot:

  kvm [1]: IPA Size Limit: 48 bits
  kvm [1]: Cannot map world-switch code
  kvm [1]: error initializing Hyp mode: -34

Fix this by ensuring that the hyp stage 1 VA size is the maximum of
what's used for the idmap and the regular kernel stage 1. At the same
time, refactor the code so that the hyp VA bits is only calculated in
one place.

Prior to 7ba8f2b2d652, the idmap was always 52 bits for a 52 VA bits
kernel and therefore the hyp stage1 was also always 52 bits.

Fixes: 7ba8f2b2d652 ("arm64: mm: use a 48-bit ID map when possible on 52-bit VA builds")
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
[maz: commit message fixes]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221103150507.32948-2-ryan.roberts@arm.com
---
 arch/arm64/kvm/arm.c | 20 +++-----------------
 arch/arm64/kvm/mmu.c | 28 +++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 94d33e296e10c..803055da3ee3e 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1518,7 +1518,7 @@ static int kvm_init_vector_slots(void)
 	return 0;
 }
 
-static void cpu_prepare_hyp_mode(int cpu)
+static void cpu_prepare_hyp_mode(int cpu, u32 hyp_va_bits)
 {
 	struct kvm_nvhe_init_params *params = per_cpu_ptr_nvhe_sym(kvm_init_params, cpu);
 	unsigned long tcr;
@@ -1534,23 +1534,9 @@ static void cpu_prepare_hyp_mode(int cpu)
 
 	params->mair_el2 = read_sysreg(mair_el1);
 
-	/*
-	 * The ID map may be configured to use an extended virtual address
-	 * range. This is only the case if system RAM is out of range for the
-	 * currently configured page size and VA_BITS, in which case we will
-	 * also need the extended virtual range for the HYP ID map, or we won't
-	 * be able to enable the EL2 MMU.
-	 *
-	 * However, at EL2, there is only one TTBR register, and we can't switch
-	 * between translation tables *and* update TCR_EL2.T0SZ at the same
-	 * time. Bottom line: we need to use the extended range with *both* our
-	 * translation tables.
-	 *
-	 * So use the same T0SZ value we use for the ID map.
-	 */
 	tcr = (read_sysreg(tcr_el1) & TCR_EL2_MASK) | TCR_EL2_RES1;
 	tcr &= ~TCR_T0SZ_MASK;
-	tcr |= (idmap_t0sz & GENMASK(TCR_TxSZ_WIDTH - 1, 0)) << TCR_T0SZ_OFFSET;
+	tcr |= TCR_T0SZ(hyp_va_bits);
 	params->tcr_el2 = tcr;
 
 	params->pgd_pa = kvm_mmu_get_httbr();
@@ -2054,7 +2040,7 @@ static int init_hyp_mode(void)
 		}
 
 		/* Prepare the CPU initialization parameters */
-		cpu_prepare_hyp_mode(cpu);
+		cpu_prepare_hyp_mode(cpu, hyp_va_bits);
 	}
 
 	if (is_protected_kvm_enabled()) {
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 60ee3d9f01f8c..4efb983cff436 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1618,6 +1618,8 @@ static struct kvm_pgtable_mm_ops kvm_hyp_mm_ops = {
 int kvm_mmu_init(u32 *hyp_va_bits)
 {
 	int err;
+	u32 idmap_bits;
+	u32 kernel_bits;
 
 	hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start);
 	hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE);
@@ -1631,7 +1633,31 @@ int kvm_mmu_init(u32 *hyp_va_bits)
 	 */
 	BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK);
 
-	*hyp_va_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
+	/*
+	 * The ID map may be configured to use an extended virtual address
+	 * range. This is only the case if system RAM is out of range for the
+	 * currently configured page size and VA_BITS_MIN, in which case we will
+	 * also need the extended virtual range for the HYP ID map, or we won't
+	 * be able to enable the EL2 MMU.
+	 *
+	 * However, in some cases the ID map may be configured for fewer than
+	 * the number of VA bits used by the regular kernel stage 1. This
+	 * happens when VA_BITS=52 and the kernel image is placed in PA space
+	 * below 48 bits.
+	 *
+	 * At EL2, there is only one TTBR register, and we can't switch between
+	 * translation tables *and* update TCR_EL2.T0SZ at the same time. Bottom
+	 * line: we need to use the extended range with *both* our translation
+	 * tables.
+	 *
+	 * So use the maximum of the idmap VA bits and the regular kernel stage
+	 * 1 VA bits to assure that the hypervisor can both ID map its code page
+	 * and map any kernel memory.
+	 */
+	idmap_bits = 64 - ((idmap_t0sz & TCR_T0SZ_MASK) >> TCR_T0SZ_OFFSET);
+	kernel_bits = vabits_actual;
+	*hyp_va_bits = max(idmap_bits, kernel_bits);
+
 	kvm_debug("Using %u-bit virtual addresses at EL2\n", *hyp_va_bits);
 	kvm_debug("IDMAP page: %lx\n", hyp_idmap_start);
 	kvm_debug("HYP VA range: %lx:%lx\n",
-- 
GitLab


From a0d37784bfd7f699986ba3a64cfeb68a03cb7fd0 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Thu, 3 Nov 2022 15:05:07 +0000
Subject: [PATCH 1349/4122] KVM: arm64: Fix PAR_TO_HPFAR() to work
 independently of PA_BITS.

Kernel configs with PAGE_SIZE=64KB and PA_BITS=48 still advertise 52 bit
IPA space on HW that implements LPA. This is by design (admitedly this
is a very unlikely configuration in the real world).

However on such a config, attempting to create a vm with the guest
kernel placed above 48 bits in IPA space results in misbehaviour due to
the hypervisor incorrectly interpretting a faulting IPA.

Fix up PAR_TO_HPFAR() to always take 52 bits out of the PAR rather than
masking to CONFIG_ARM64_PA_BITS. If the system has a smaller implemented
PARange this should be safe because the bits are res0.

A more robust approach would be to discover the IPA size in use by the
page-table and mask based on that, to avoid relying on res0 reading back
as zero. But this information is difficult to access safely from the
code's location, so take the easy way out.

Fixes: bc1d7de8c550 ("kvm: arm64: Add 52bit support for PAR to HPFAR conversoin")
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
[maz: commit message fixes]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221103150507.32948-3-ryan.roberts@arm.com
---
 arch/arm64/include/asm/kvm_arm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8aa8492dafc0f..a82f2493a72bb 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -340,9 +340,13 @@
  * We have
  *	PAR	[PA_Shift - 1	: 12] = PA	[PA_Shift - 1 : 12]
  *	HPFAR	[PA_Shift - 9	: 4]  = FIPA	[PA_Shift - 1 : 12]
+ *
+ * Always assume 52 bit PA since at this point, we don't know how many PA bits
+ * the page table has been set up for. This should be safe since unused address
+ * bits in PAR are res0.
  */
 #define PAR_TO_HPFAR(par)		\
-	(((par) & GENMASK_ULL(PHYS_MASK_SHIFT - 1, 12)) >> 8)
+	(((par) & GENMASK_ULL(52 - 1, 12)) >> 8)
 
 #define ECN(x) { ESR_ELx_EC_##x, #x }
 
-- 
GitLab


From 4554bac48a8c464ff00136a64efe8847e4da4ea8 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:09:59 -0500
Subject: [PATCH 1350/4122] RDMA/rxe: Add ibdev_dbg macros for rxe

Add macros borrowed from siw to call dynamic debug macro ibdev_dbg.

Link: https://lore.kernel.org/r/20221103171013.20659-2-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe.h b/drivers/infiniband/sw/rxe/rxe.h
index 30fbdf3bc76a3..ab334900fcc3d 100644
--- a/drivers/infiniband/sw/rxe/rxe.h
+++ b/drivers/infiniband/sw/rxe/rxe.h
@@ -38,6 +38,25 @@
 
 #define RXE_ROCE_V2_SPORT		(0xc000)
 
+#define rxe_dbg(rxe, fmt, ...) ibdev_dbg(&(rxe)->ib_dev,		\
+		"%s: " fmt, __func__, ##__VA_ARGS__)
+#define rxe_dbg_uc(uc, fmt, ...) ibdev_dbg((uc)->ibuc.device,		\
+		"uc#%d %s: " fmt, (uc)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_pd(pd, fmt, ...) ibdev_dbg((pd)->ibpd.device,		\
+		"pd#%d %s: " fmt, (pd)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_ah(ah, fmt, ...) ibdev_dbg((ah)->ibah.device,		\
+		"ah#%d %s: " fmt, (ah)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_srq(srq, fmt, ...) ibdev_dbg((srq)->ibsrq.device,	\
+		"srq#%d %s: " fmt, (srq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_qp(qp, fmt, ...) ibdev_dbg((qp)->ibqp.device,		\
+		"qp#%d %s: " fmt, (qp)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_cq(cq, fmt, ...) ibdev_dbg((cq)->ibcq.device,		\
+		"cq#%d %s: " fmt, (cq)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mr(mr, fmt, ...) ibdev_dbg((mr)->ibmr.device,		\
+		"mr#%d %s:  " fmt, (mr)->elem.index, __func__, ##__VA_ARGS__)
+#define rxe_dbg_mw(mw, fmt, ...) ibdev_dbg((mw)->ibmw.device,		\
+		"mw#%d %s:  " fmt, (mw)->elem.index, __func__, ##__VA_ARGS__)
+
 void rxe_set_mtu(struct rxe_dev *rxe, unsigned int dev_mtu);
 
 int rxe_add(struct rxe_dev *rxe, unsigned int mtu, const char *ibdev_name);
-- 
GitLab


From 27c4c520bd3908c095401e93c71e7d3696ae8bdd Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:00 -0500
Subject: [PATCH 1351/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_comp.c

Replace calls to pr_xxx() in rxe_comp.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-3-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 66f392810c860..4dca4f8bbb5aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -114,7 +114,7 @@ void retransmit_timer(struct timer_list *t)
 {
 	struct rxe_qp *qp = from_timer(qp, t, retrans_timer);
 
-	pr_debug("%s: fired for qp#%d\n", __func__, qp->elem.index);
+	rxe_dbg_qp(qp, "retransmit timer fired\n");
 
 	if (qp->valid) {
 		qp->comp.timeout = 1;
@@ -334,7 +334,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 				return COMPST_ERROR;
 
 			default:
-				pr_warn("unexpected nak %x\n", syn);
+				rxe_dbg_qp(qp, "unexpected nak %x\n", syn);
 				wqe->status = IB_WC_REM_OP_ERR;
 				return COMPST_ERROR;
 			}
@@ -345,7 +345,7 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		break;
 
 	default:
-		pr_warn("unexpected opcode\n");
+		rxe_dbg_qp(qp, "unexpected opcode\n");
 	}
 
 	return COMPST_ERROR;
@@ -598,8 +598,7 @@ int rxe_completer(void *arg)
 	state = COMPST_GET_ACK;
 
 	while (1) {
-		pr_debug("qp#%d state = %s\n", qp_num(qp),
-			 comp_state_name[state]);
+		rxe_dbg_qp(qp, "state = %s\n", comp_state_name[state]);
 		switch (state) {
 		case COMPST_GET_ACK:
 			skb = skb_dequeue(&qp->resp_pkts);
@@ -746,8 +745,7 @@ int rxe_completer(void *arg)
 				 * rnr timer has fired
 				 */
 				qp->req.wait_for_rnr_timer = 1;
-				pr_debug("qp#%d set rnr nak timer\n",
-					 qp_num(qp));
+				rxe_dbg_qp(qp, "set rnr nak timer\n");
 				mod_timer(&qp->rnr_nak_timer,
 					  jiffies + rnrnak_jiffies(aeth_syn(pkt)
 						& ~AETH_TYPE_MASK));
-- 
GitLab


From 52920f537ab08237bd8a3d30ccbb06a9d57717cf Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:01 -0500
Subject: [PATCH 1352/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_cq.c

Replace calls to pr_xxx() in rxe_cq.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-4-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_cq.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_cq.c b/drivers/infiniband/sw/rxe/rxe_cq.c
index b1a0ab3cd4bd1..1df186534639a 100644
--- a/drivers/infiniband/sw/rxe/rxe_cq.c
+++ b/drivers/infiniband/sw/rxe/rxe_cq.c
@@ -14,12 +14,12 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
 	int count;
 
 	if (cqe <= 0) {
-		pr_warn("cqe(%d) <= 0\n", cqe);
+		rxe_dbg(rxe, "cqe(%d) <= 0\n", cqe);
 		goto err1;
 	}
 
 	if (cqe > rxe->attr.max_cqe) {
-		pr_debug("cqe(%d) > max_cqe(%d)\n",
+		rxe_dbg(rxe, "cqe(%d) > max_cqe(%d)\n",
 				cqe, rxe->attr.max_cqe);
 		goto err1;
 	}
@@ -27,7 +27,7 @@ int rxe_cq_chk_attr(struct rxe_dev *rxe, struct rxe_cq *cq,
 	if (cq) {
 		count = queue_count(cq->queue, QUEUE_TYPE_TO_CLIENT);
 		if (cqe < count) {
-			pr_debug("cqe(%d) < current # elements in queue (%d)",
+			rxe_dbg_cq(cq, "cqe(%d) < current # elements in queue (%d)",
 					cqe, count);
 			goto err1;
 		}
@@ -65,7 +65,7 @@ int rxe_cq_from_init(struct rxe_dev *rxe, struct rxe_cq *cq, int cqe,
 	cq->queue = rxe_queue_init(rxe, &cqe,
 			sizeof(struct rxe_cqe), type);
 	if (!cq->queue) {
-		pr_warn("unable to create cq\n");
+		rxe_dbg(rxe, "unable to create cq\n");
 		return -ENOMEM;
 	}
 
-- 
GitLab


From 2778b72b1df0c8ef61e0b3b3ef1c8c62eb03fa75 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:02 -0500
Subject: [PATCH 1353/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c

Replace calls to pr_xxx() in rxe_mr.c by rxe_dbg_mr().

Link: https://lore.kernel.org/r/20221103171013.20659-5-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c    | 40 ++++++++++++---------------
 drivers/infiniband/sw/rxe/rxe_verbs.c |  3 ++
 2 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index bc081002bddcb..cd846cf82a84c 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -38,8 +38,7 @@ int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
 		return 0;
 
 	default:
-		pr_warn("%s: mr type (%d) not supported\n",
-			__func__, mr->ibmr.type);
+		rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type);
 		return -EFAULT;
 	}
 }
@@ -125,8 +124,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
 	if (IS_ERR(umem)) {
-		pr_warn("%s: Unable to pin memory region err = %d\n",
-			__func__, (int)PTR_ERR(umem));
+		rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
+			(int)PTR_ERR(umem));
 		err = PTR_ERR(umem);
 		goto err_out;
 	}
@@ -137,8 +136,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 	err = rxe_mr_alloc(mr, num_buf);
 	if (err) {
-		pr_warn("%s: Unable to allocate memory for map\n",
-				__func__);
+		rxe_dbg_mr(mr, "Unable to allocate memory for map\n");
 		goto err_release_umem;
 	}
 
@@ -159,8 +157,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 			vaddr = page_address(sg_page_iter_page(&sg_iter));
 			if (!vaddr) {
-				pr_warn("%s: Unable to get virtual address\n",
-						__func__);
+				rxe_dbg_mr(mr, "Unable to get virtual address\n");
 				err = -ENOMEM;
 				goto err_cleanup_map;
 			}
@@ -255,7 +252,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 	void *addr;
 
 	if (mr->state != RXE_MR_STATE_VALID) {
-		pr_warn("mr not in valid state\n");
+		rxe_dbg_mr(mr, "Not in valid state\n");
 		addr = NULL;
 		goto out;
 	}
@@ -266,7 +263,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 	}
 
 	if (mr_check_range(mr, iova, length)) {
-		pr_warn("range violation\n");
+		rxe_dbg_mr(mr, "Range violation\n");
 		addr = NULL;
 		goto out;
 	}
@@ -274,7 +271,7 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
 	lookup_iova(mr, iova, &m, &n, &offset);
 
 	if (offset + length > mr->map[m]->buf[n].size) {
-		pr_warn("crosses page boundary\n");
+		rxe_dbg_mr(mr, "Crosses page boundary\n");
 		addr = NULL;
 		goto out;
 	}
@@ -527,27 +524,26 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
 
 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
 	if (!mr) {
-		pr_err("%s: No MR for key %#x\n", __func__, key);
+		rxe_dbg_mr(mr, "No MR for key %#x\n", key);
 		ret = -EINVAL;
 		goto err;
 	}
 
 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
-		pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
-			__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
+		rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
+			key, (mr->rkey ? mr->rkey : mr->lkey));
 		ret = -EINVAL;
 		goto err_drop_ref;
 	}
 
 	if (atomic_read(&mr->num_mw) > 0) {
-		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
-			__func__);
+		rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
 		ret = -EINVAL;
 		goto err_drop_ref;
 	}
 
 	if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
-		pr_warn("%s: mr type (%d) is wrong\n", __func__, mr->ibmr.type);
+		rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
 		ret = -EINVAL;
 		goto err_drop_ref;
 	}
@@ -576,22 +572,20 @@ int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 
 	/* user can only register MR in free state */
 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
-		pr_warn("%s: mr->lkey = 0x%x not free\n",
-			__func__, mr->lkey);
+		rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
 		return -EINVAL;
 	}
 
 	/* user can only register mr with qp in same protection domain */
 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
-		pr_warn("%s: qp->pd and mr->pd don't match\n",
-			__func__);
+		rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
 		return -EINVAL;
 	}
 
 	/* user is only allowed to change key portion of l/rkey */
 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
-		pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
-			__func__, key, mr->lkey);
+		rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
+			key, mr->lkey);
 		return -EINVAL;
 	}
 
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index bcdfdadaebbc8..510ae471ac7a8 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -875,6 +875,7 @@ static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access)
 
 	rxe_get(pd);
 	mr->ibmr.pd = ibpd;
+	mr->ibmr.device = ibpd->device;
 
 	rxe_mr_init_dma(access, mr);
 	rxe_finalize(mr);
@@ -899,6 +900,7 @@ static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd,
 
 	rxe_get(pd);
 	mr->ibmr.pd = ibpd;
+	mr->ibmr.device = ibpd->device;
 
 	err = rxe_mr_init_user(rxe, start, length, iova, access, mr);
 	if (err)
@@ -930,6 +932,7 @@ static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
 
 	rxe_get(pd);
 	mr->ibmr.pd = ibpd;
+	mr->ibmr.device = ibpd->device;
 
 	err = rxe_mr_init_fast(max_num_sg, mr);
 	if (err)
-- 
GitLab


From e8a87efdf87455454d0a14fd486c679769bfeee2 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:03 -0500
Subject: [PATCH 1354/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mw.c

Replace calls to pr_xxx() int rxe_mw.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-6-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mw.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mw.c b/drivers/infiniband/sw/rxe/rxe_mw.c
index 8df1c9066ed89..afa5ce1a71166 100644
--- a/drivers/infiniband/sw/rxe/rxe_mw.c
+++ b/drivers/infiniband/sw/rxe/rxe_mw.c
@@ -52,14 +52,14 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 {
 	if (mw->ibmw.type == IB_MW_TYPE_1) {
 		if (unlikely(mw->state != RXE_MW_STATE_VALID)) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to bind a type 1 MW not in the valid state\n");
 			return -EINVAL;
 		}
 
 		/* o10-36.2.2 */
 		if (unlikely((mw->access & IB_ZERO_BASED))) {
-			pr_err_once("attempt to bind a zero based type 1 MW\n");
+			rxe_dbg_mw(mw, "attempt to bind a zero based type 1 MW\n");
 			return -EINVAL;
 		}
 	}
@@ -67,21 +67,21 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	if (mw->ibmw.type == IB_MW_TYPE_2) {
 		/* o10-37.2.30 */
 		if (unlikely(mw->state != RXE_MW_STATE_FREE)) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to bind a type 2 MW not in the free state\n");
 			return -EINVAL;
 		}
 
 		/* C10-72 */
 		if (unlikely(qp->pd != to_rpd(mw->ibmw.pd))) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to bind type 2 MW with qp with different PD\n");
 			return -EINVAL;
 		}
 
 		/* o10-37.2.40 */
 		if (unlikely(!mr || wqe->wr.wr.mw.length == 0)) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to invalidate type 2 MW by binding with NULL or zero length MR\n");
 			return -EINVAL;
 		}
@@ -92,13 +92,13 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		return 0;
 
 	if (unlikely(mr->access & IB_ZERO_BASED)) {
-		pr_err_once("attempt to bind MW to zero based MR\n");
+		rxe_dbg_mw(mw, "attempt to bind MW to zero based MR\n");
 		return -EINVAL;
 	}
 
 	/* C10-73 */
 	if (unlikely(!(mr->access & IB_ACCESS_MW_BIND))) {
-		pr_err_once(
+		rxe_dbg_mw(mw,
 			"attempt to bind an MW to an MR without bind access\n");
 		return -EINVAL;
 	}
@@ -107,7 +107,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	if (unlikely((mw->access &
 		      (IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_ATOMIC)) &&
 		     !(mr->access & IB_ACCESS_LOCAL_WRITE))) {
-		pr_err_once(
+		rxe_dbg_mw(mw,
 			"attempt to bind an Writable MW to an MR without local write access\n");
 		return -EINVAL;
 	}
@@ -115,7 +115,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 	/* C10-75 */
 	if (mw->access & IB_ZERO_BASED) {
 		if (unlikely(wqe->wr.wr.mw.length > mr->ibmr.length)) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to bind a ZB MW outside of the MR\n");
 			return -EINVAL;
 		}
@@ -123,7 +123,7 @@ static int rxe_check_bind_mw(struct rxe_qp *qp, struct rxe_send_wqe *wqe,
 		if (unlikely((wqe->wr.wr.mw.addr < mr->ibmr.iova) ||
 			     ((wqe->wr.wr.mw.addr + wqe->wr.wr.mw.length) >
 			      (mr->ibmr.iova + mr->ibmr.length)))) {
-			pr_err_once(
+			rxe_dbg_mw(mw,
 				"attempt to bind a VA MW outside of the MR\n");
 			return -EINVAL;
 		}
-- 
GitLab


From 34549e88e0a3088416177023abf1232fe40e721c Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:04 -0500
Subject: [PATCH 1355/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_net.c

Replace (some) calls to pr_xxx() in rxe_net.c with rxe_dbg_xxx().
Calls with a rxe device not yet in scope are left as is.

Link: https://lore.kernel.org/r/20221103171013.20659-7-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_net.c | 38 ++++++++++++++++-------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_net.c b/drivers/infiniband/sw/rxe/rxe_net.c
index c36cad9c7a664..e02e1624bcf4d 100644
--- a/drivers/infiniband/sw/rxe/rxe_net.c
+++ b/drivers/infiniband/sw/rxe/rxe_net.c
@@ -20,9 +20,10 @@
 
 static struct rxe_recv_sockets recv_sockets;
 
-static struct dst_entry *rxe_find_route4(struct net_device *ndev,
-				  struct in_addr *saddr,
-				  struct in_addr *daddr)
+static struct dst_entry *rxe_find_route4(struct rxe_qp *qp,
+					 struct net_device *ndev,
+					 struct in_addr *saddr,
+					 struct in_addr *daddr)
 {
 	struct rtable *rt;
 	struct flowi4 fl = { { 0 } };
@@ -35,7 +36,7 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
 
 	rt = ip_route_output_key(&init_net, &fl);
 	if (IS_ERR(rt)) {
-		pr_err_ratelimited("no route to %pI4\n", &daddr->s_addr);
+		rxe_dbg_qp(qp, "no route to %pI4\n", &daddr->s_addr);
 		return NULL;
 	}
 
@@ -43,7 +44,8 @@ static struct dst_entry *rxe_find_route4(struct net_device *ndev,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+					 struct net_device *ndev,
 					 struct in6_addr *saddr,
 					 struct in6_addr *daddr)
 {
@@ -60,12 +62,12 @@ static struct dst_entry *rxe_find_route6(struct net_device *ndev,
 					       recv_sockets.sk6->sk, &fl6,
 					       NULL);
 	if (IS_ERR(ndst)) {
-		pr_err_ratelimited("no route to %pI6\n", daddr);
+		rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
 		return NULL;
 	}
 
 	if (unlikely(ndst->error)) {
-		pr_err("no route to %pI6\n", daddr);
+		rxe_dbg_qp(qp, "no route to %pI6\n", daddr);
 		goto put;
 	}
 
@@ -77,7 +79,8 @@ put:
 
 #else
 
-static struct dst_entry *rxe_find_route6(struct net_device *ndev,
+static struct dst_entry *rxe_find_route6(struct rxe_qp *qp,
+					 struct net_device *ndev,
 					 struct in6_addr *saddr,
 					 struct in6_addr *daddr)
 {
@@ -105,14 +108,14 @@ static struct dst_entry *rxe_find_route(struct net_device *ndev,
 
 			saddr = &av->sgid_addr._sockaddr_in.sin_addr;
 			daddr = &av->dgid_addr._sockaddr_in.sin_addr;
-			dst = rxe_find_route4(ndev, saddr, daddr);
+			dst = rxe_find_route4(qp, ndev, saddr, daddr);
 		} else if (av->network_type == RXE_NETWORK_TYPE_IPV6) {
 			struct in6_addr *saddr6;
 			struct in6_addr *daddr6;
 
 			saddr6 = &av->sgid_addr._sockaddr_in6.sin6_addr;
 			daddr6 = &av->dgid_addr._sockaddr_in6.sin6_addr;
-			dst = rxe_find_route6(ndev, saddr6, daddr6);
+			dst = rxe_find_route6(qp, ndev, saddr6, daddr6);
 #if IS_ENABLED(CONFIG_IPV6)
 			if (dst)
 				qp->dst_cookie =
@@ -282,7 +285,7 @@ static int prepare4(struct rxe_av *av, struct rxe_pkt_info *pkt,
 
 	dst = rxe_find_route(skb->dev, qp, av);
 	if (!dst) {
-		pr_err("Host not reachable\n");
+		rxe_dbg_qp(qp, "Host not reachable\n");
 		return -EHOSTUNREACH;
 	}
 
@@ -306,7 +309,7 @@ static int prepare6(struct rxe_av *av, struct rxe_pkt_info *pkt,
 
 	dst = rxe_find_route(skb->dev, qp, av);
 	if (!dst) {
-		pr_err("Host not reachable\n");
+		rxe_dbg_qp(qp, "Host not reachable\n");
 		return -EHOSTUNREACH;
 	}
 
@@ -365,7 +368,8 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
 		err = ip6_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb);
 	} else {
-		pr_err("Unknown layer 3 protocol: %d\n", skb->protocol);
+		rxe_dbg_qp(pkt->qp, "Unknown layer 3 protocol: %d\n",
+				skb->protocol);
 		atomic_dec(&pkt->qp->skb_out);
 		rxe_put(pkt->qp);
 		kfree_skb(skb);
@@ -373,7 +377,7 @@ static int rxe_send(struct sk_buff *skb, struct rxe_pkt_info *pkt)
 	}
 
 	if (unlikely(net_xmit_eval(err))) {
-		pr_debug("error sending packet: %d\n", err);
+		rxe_dbg_qp(pkt->qp, "error sending packet: %d\n", err);
 		return -EAGAIN;
 	}
 
@@ -411,7 +415,7 @@ int rxe_xmit_packet(struct rxe_qp *qp, struct rxe_pkt_info *pkt,
 
 	if ((is_request && (qp->req.state != QP_STATE_READY)) ||
 	    (!is_request && (qp->resp.state != QP_STATE_READY))) {
-		pr_info("Packet dropped. QP is not in ready state\n");
+		rxe_dbg_qp(qp, "Packet dropped. QP is not in ready state\n");
 		goto drop;
 	}
 
@@ -592,7 +596,7 @@ static int rxe_notify(struct notifier_block *not_blk,
 		rxe_port_down(rxe);
 		break;
 	case NETDEV_CHANGEMTU:
-		pr_info("%s changed mtu to %d\n", ndev->name, ndev->mtu);
+		rxe_dbg(rxe, "%s changed mtu to %d\n", ndev->name, ndev->mtu);
 		rxe_set_mtu(rxe, ndev->mtu);
 		break;
 	case NETDEV_CHANGE:
@@ -604,7 +608,7 @@ static int rxe_notify(struct notifier_block *not_blk,
 	case NETDEV_CHANGENAME:
 	case NETDEV_FEAT_CHANGE:
 	default:
-		pr_info("ignoring netdev event = %ld for %s\n",
+		rxe_dbg(rxe, "ignoring netdev event = %ld for %s\n",
 			event, ndev->name);
 		break;
 	}
-- 
GitLab


From 6af70060d2e561d6479b33fe94cc2f38582e830b Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:05 -0500
Subject: [PATCH 1356/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_qp.c

Replace calls to pr_xxx() in rxe_qp.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-8-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 65 ++++++++++++++----------------
 1 file changed, 30 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 3f6d62a80bea9..bcbfe6068b8b0 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -19,33 +19,33 @@ static int rxe_qp_chk_cap(struct rxe_dev *rxe, struct ib_qp_cap *cap,
 			  int has_srq)
 {
 	if (cap->max_send_wr > rxe->attr.max_qp_wr) {
-		pr_debug("invalid send wr = %u > %d\n",
+		rxe_dbg(rxe, "invalid send wr = %u > %d\n",
 			 cap->max_send_wr, rxe->attr.max_qp_wr);
 		goto err1;
 	}
 
 	if (cap->max_send_sge > rxe->attr.max_send_sge) {
-		pr_debug("invalid send sge = %u > %d\n",
+		rxe_dbg(rxe, "invalid send sge = %u > %d\n",
 			 cap->max_send_sge, rxe->attr.max_send_sge);
 		goto err1;
 	}
 
 	if (!has_srq) {
 		if (cap->max_recv_wr > rxe->attr.max_qp_wr) {
-			pr_debug("invalid recv wr = %u > %d\n",
+			rxe_dbg(rxe, "invalid recv wr = %u > %d\n",
 				 cap->max_recv_wr, rxe->attr.max_qp_wr);
 			goto err1;
 		}
 
 		if (cap->max_recv_sge > rxe->attr.max_recv_sge) {
-			pr_debug("invalid recv sge = %u > %d\n",
+			rxe_dbg(rxe, "invalid recv sge = %u > %d\n",
 				 cap->max_recv_sge, rxe->attr.max_recv_sge);
 			goto err1;
 		}
 	}
 
 	if (cap->max_inline_data > rxe->max_inline_data) {
-		pr_debug("invalid max inline data = %u > %d\n",
+		rxe_dbg(rxe, "invalid max inline data = %u > %d\n",
 			 cap->max_inline_data, rxe->max_inline_data);
 		goto err1;
 	}
@@ -73,7 +73,7 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
 	}
 
 	if (!init->recv_cq || !init->send_cq) {
-		pr_debug("missing cq\n");
+		rxe_dbg(rxe, "missing cq\n");
 		goto err1;
 	}
 
@@ -82,14 +82,14 @@ int rxe_qp_chk_init(struct rxe_dev *rxe, struct ib_qp_init_attr *init)
 
 	if (init->qp_type == IB_QPT_GSI) {
 		if (!rdma_is_port_valid(&rxe->ib_dev, port_num)) {
-			pr_debug("invalid port = %d\n", port_num);
+			rxe_dbg(rxe, "invalid port = %d\n", port_num);
 			goto err1;
 		}
 
 		port = &rxe->port;
 
 		if (init->qp_type == IB_QPT_GSI && port->qp_gsi_index) {
-			pr_debug("GSI QP exists for port %d\n", port_num);
+			rxe_dbg(rxe, "GSI QP exists for port %d\n", port_num);
 			goto err1;
 		}
 	}
@@ -264,9 +264,6 @@ static int rxe_qp_init_resp(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 		wqe_size = rcv_wqe_size(qp->rq.max_sge);
 
-		pr_debug("qp#%d max_wr = %d, max_sge = %d, wqe_size = %d\n",
-			 qp_num(qp), qp->rq.max_wr, qp->rq.max_sge, wqe_size);
-
 		type = QUEUE_TYPE_FROM_CLIENT;
 		qp->rq.queue = rxe_queue_init(rxe, &qp->rq.max_wr,
 					wqe_size, type);
@@ -395,7 +392,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 					attr->qp_state : cur_state;
 
 	if (!ib_modify_qp_is_ok(cur_state, new_state, qp_type(qp), mask)) {
-		pr_debug("invalid mask or state for qp\n");
+		rxe_dbg_qp(qp, "invalid mask or state\n");
 		goto err1;
 	}
 
@@ -409,7 +406,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	if (mask & IB_QP_PORT) {
 		if (!rdma_is_port_valid(&rxe->ib_dev, attr->port_num)) {
-			pr_debug("invalid port %d\n", attr->port_num);
+			rxe_dbg_qp(qp, "invalid port %d\n", attr->port_num);
 			goto err1;
 		}
 	}
@@ -424,11 +421,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 		if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
 			goto err1;
 		if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num))  {
-			pr_debug("invalid alt port %d\n", attr->alt_port_num);
+			rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num);
 			goto err1;
 		}
 		if (attr->alt_timeout > 31) {
-			pr_debug("invalid QP alt timeout %d > 31\n",
+			rxe_dbg_qp(qp, "invalid alt timeout %d > 31\n",
 				 attr->alt_timeout);
 			goto err1;
 		}
@@ -441,7 +438,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 		enum ib_mtu mtu = attr->path_mtu;
 
 		if (mtu > max_mtu) {
-			pr_debug("invalid mtu (%d) > (%d)\n",
+			rxe_dbg_qp(qp, "invalid mtu (%d) > (%d)\n",
 				 ib_mtu_enum_to_int(mtu),
 				 ib_mtu_enum_to_int(max_mtu));
 			goto err1;
@@ -450,7 +447,7 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	if (mask & IB_QP_MAX_QP_RD_ATOMIC) {
 		if (attr->max_rd_atomic > rxe->attr.max_qp_rd_atom) {
-			pr_debug("invalid max_rd_atomic %d > %d\n",
+			rxe_dbg_qp(qp, "invalid max_rd_atomic %d > %d\n",
 				 attr->max_rd_atomic,
 				 rxe->attr.max_qp_rd_atom);
 			goto err1;
@@ -459,7 +456,8 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 
 	if (mask & IB_QP_TIMEOUT) {
 		if (attr->timeout > 31) {
-			pr_debug("invalid QP timeout %d > 31\n", attr->timeout);
+			rxe_dbg_qp(qp, "invalid timeout %d > 31\n",
+					attr->timeout);
 			goto err1;
 		}
 	}
@@ -637,27 +635,24 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 	if (mask & IB_QP_RETRY_CNT) {
 		qp->attr.retry_cnt = attr->retry_cnt;
 		qp->comp.retry_cnt = attr->retry_cnt;
-		pr_debug("qp#%d set retry count = %d\n", qp_num(qp),
-			 attr->retry_cnt);
+		rxe_dbg_qp(qp, "set retry count = %d\n", attr->retry_cnt);
 	}
 
 	if (mask & IB_QP_RNR_RETRY) {
 		qp->attr.rnr_retry = attr->rnr_retry;
 		qp->comp.rnr_retry = attr->rnr_retry;
-		pr_debug("qp#%d set rnr retry count = %d\n", qp_num(qp),
-			 attr->rnr_retry);
+		rxe_dbg_qp(qp, "set rnr retry count = %d\n", attr->rnr_retry);
 	}
 
 	if (mask & IB_QP_RQ_PSN) {
 		qp->attr.rq_psn = (attr->rq_psn & BTH_PSN_MASK);
 		qp->resp.psn = qp->attr.rq_psn;
-		pr_debug("qp#%d set resp psn = 0x%x\n", qp_num(qp),
-			 qp->resp.psn);
+		rxe_dbg_qp(qp, "set resp psn = 0x%x\n", qp->resp.psn);
 	}
 
 	if (mask & IB_QP_MIN_RNR_TIMER) {
 		qp->attr.min_rnr_timer = attr->min_rnr_timer;
-		pr_debug("qp#%d set min rnr timer = 0x%x\n", qp_num(qp),
+		rxe_dbg_qp(qp, "set min rnr timer = 0x%x\n",
 			 attr->min_rnr_timer);
 	}
 
@@ -665,7 +660,7 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 		qp->attr.sq_psn = (attr->sq_psn & BTH_PSN_MASK);
 		qp->req.psn = qp->attr.sq_psn;
 		qp->comp.psn = qp->attr.sq_psn;
-		pr_debug("qp#%d set req psn = 0x%x\n", qp_num(qp), qp->req.psn);
+		rxe_dbg_qp(qp, "set req psn = 0x%x\n", qp->req.psn);
 	}
 
 	if (mask & IB_QP_PATH_MIG_STATE)
@@ -679,40 +674,40 @@ int rxe_qp_from_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask,
 
 		switch (attr->qp_state) {
 		case IB_QPS_RESET:
-			pr_debug("qp#%d state -> RESET\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> RESET\n");
 			rxe_qp_reset(qp);
 			break;
 
 		case IB_QPS_INIT:
-			pr_debug("qp#%d state -> INIT\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> INIT\n");
 			qp->req.state = QP_STATE_INIT;
 			qp->resp.state = QP_STATE_INIT;
 			qp->comp.state = QP_STATE_INIT;
 			break;
 
 		case IB_QPS_RTR:
-			pr_debug("qp#%d state -> RTR\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> RTR\n");
 			qp->resp.state = QP_STATE_READY;
 			break;
 
 		case IB_QPS_RTS:
-			pr_debug("qp#%d state -> RTS\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> RTS\n");
 			qp->req.state = QP_STATE_READY;
 			qp->comp.state = QP_STATE_READY;
 			break;
 
 		case IB_QPS_SQD:
-			pr_debug("qp#%d state -> SQD\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> SQD\n");
 			rxe_qp_drain(qp);
 			break;
 
 		case IB_QPS_SQE:
-			pr_warn("qp#%d state -> SQE !!?\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> SQE !!?\n");
 			/* Not possible from modify_qp. */
 			break;
 
 		case IB_QPS_ERR:
-			pr_debug("qp#%d state -> ERR\n", qp_num(qp));
+			rxe_dbg_qp(qp, "state -> ERR\n");
 			rxe_qp_error(qp);
 			break;
 		}
@@ -752,7 +747,7 @@ int rxe_qp_to_attr(struct rxe_qp *qp, struct ib_qp_attr *attr, int mask)
 		attr->sq_draining = 0;
 	}
 
-	pr_debug("attr->sq_draining = %d\n", attr->sq_draining);
+	rxe_dbg_qp(qp, "attr->sq_draining = %d\n", attr->sq_draining);
 
 	return 0;
 }
@@ -764,7 +759,7 @@ int rxe_qp_chk_destroy(struct rxe_qp *qp)
 	 * will fail immediately.
 	 */
 	if (atomic_read(&qp->mcg_num)) {
-		pr_debug("Attempt to destroy QP while attached to multicast group\n");
+		rxe_dbg_qp(qp, "Attempt to destroy while attached to multicast group\n");
 		return -EBUSY;
 	}
 
-- 
GitLab


From 0edfb15e30a53e8bd039c35a17f61e49cba9f4dd Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:06 -0500
Subject: [PATCH 1357/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_req.c

Replace calls to pr_xxx() in rxe_req.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-9-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 41f1d84f0acbf..4d45f508392fe 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -100,7 +100,7 @@ void rnr_nak_timer(struct timer_list *t)
 {
 	struct rxe_qp *qp = from_timer(qp, t, rnr_nak_timer);
 
-	pr_debug("%s: fired for qp#%d\n", __func__, qp_num(qp));
+	rxe_dbg_qp(qp, "nak timer fired\n");
 
 	/* request a send queue retry */
 	qp->req.need_retry = 1;
@@ -595,7 +595,7 @@ static int rxe_do_local_ops(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
 		}
 		break;
 	default:
-		pr_err("Unexpected send wqe opcode %d\n", opcode);
+		rxe_dbg_qp(qp, "Unexpected send wqe opcode %d\n", opcode);
 		wqe->status = IB_WC_LOC_QP_OP_ERR;
 		return -EINVAL;
 	}
@@ -748,14 +748,14 @@ int rxe_requester(void *arg)
 
 	av = rxe_get_av(&pkt, &ah);
 	if (unlikely(!av)) {
-		pr_err("qp#%d Failed no address vector\n", qp_num(qp));
+		rxe_dbg_qp(qp, "Failed no address vector\n");
 		wqe->status = IB_WC_LOC_QP_OP_ERR;
 		goto err;
 	}
 
 	skb = init_req_packet(qp, av, wqe, opcode, payload, &pkt);
 	if (unlikely(!skb)) {
-		pr_err("qp#%d Failed allocating skb\n", qp_num(qp));
+		rxe_dbg_qp(qp, "Failed allocating skb\n");
 		wqe->status = IB_WC_LOC_QP_OP_ERR;
 		if (ah)
 			rxe_put(ah);
@@ -764,7 +764,7 @@ int rxe_requester(void *arg)
 
 	err = finish_packet(qp, av, wqe, &pkt, skb, payload);
 	if (unlikely(err)) {
-		pr_debug("qp#%d Error during finish packet\n", qp_num(qp));
+		rxe_dbg_qp(qp, "Error during finish packet\n");
 		if (err == -EFAULT)
 			wqe->status = IB_WC_LOC_PROT_ERR;
 		else
-- 
GitLab


From 74ddf7233c571d51bcb802bb192a9f7d77cd8830 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:07 -0500
Subject: [PATCH 1358/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_resp.c

Replace calls to pr_xxx() in rxe_resp.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-10-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 382d2053db43e..6761bcd1d4d8f 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -317,7 +317,7 @@ static enum resp_states get_srq_wqe(struct rxe_qp *qp)
 	/* don't trust user space data */
 	if (unlikely(wqe->dma.num_sge > srq->rq.max_sge)) {
 		spin_unlock_irqrestore(&srq->rq.consumer_lock, flags);
-		pr_warn("%s: invalid num_sge in SRQ entry\n", __func__);
+		rxe_dbg_qp(qp, "invalid num_sge in SRQ entry\n");
 		return RESPST_ERR_MALFORMED_WQE;
 	}
 	size = sizeof(*wqe) + wqe->dma.num_sge*sizeof(struct rxe_sge);
@@ -473,15 +473,14 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	if (rkey_is_mw(rkey)) {
 		mw = rxe_lookup_mw(qp, access, rkey);
 		if (!mw) {
-			pr_debug("%s: no MW matches rkey %#x\n",
-					__func__, rkey);
+			rxe_dbg_qp(qp, "no MW matches rkey %#x\n", rkey);
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
 
 		mr = mw->mr;
 		if (!mr) {
-			pr_err("%s: MW doesn't have an MR\n", __func__);
+			rxe_dbg_qp(qp, "MW doesn't have an MR\n");
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
@@ -494,8 +493,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	} else {
 		mr = lookup_mr(qp->pd, access, rkey, RXE_LOOKUP_REMOTE);
 		if (!mr) {
-			pr_debug("%s: no MR matches rkey %#x\n",
-					__func__, rkey);
+			rxe_dbg_qp(qp, "no MR matches rkey %#x\n", rkey);
 			state = RESPST_ERR_RKEY_VIOLATION;
 			goto err;
 		}
@@ -1064,7 +1062,7 @@ static int send_common_ack(struct rxe_qp *qp, u8 syndrome, u32 psn,
 
 	err = rxe_xmit_packet(qp, &ack_pkt, skb);
 	if (err)
-		pr_err_ratelimited("Failed sending %s\n", msg);
+		rxe_dbg_qp(qp, "Failed sending %s\n", msg);
 
 	return err;
 }
@@ -1310,8 +1308,7 @@ int rxe_responder(void *arg)
 	}
 
 	while (1) {
-		pr_debug("qp#%d state = %s\n", qp_num(qp),
-			 resp_state_name[state]);
+		rxe_dbg_qp(qp, "state = %s\n", resp_state_name[state]);
 		switch (state) {
 		case RESPST_GET_REQ:
 			state = get_req(qp, &pkt);
@@ -1468,7 +1465,7 @@ int rxe_responder(void *arg)
 
 		case RESPST_ERROR:
 			qp->resp.goto_error = 0;
-			pr_debug("qp#%d moved to error state\n", qp_num(qp));
+			rxe_dbg_qp(qp, "moved to error state\n");
 			rxe_qp_error(qp);
 			goto exit;
 
-- 
GitLab


From 0e6090024b3ebf8d162f82c542eba9632a9c85fc Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:08 -0500
Subject: [PATCH 1359/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_srq.c

Replace calls to pr_xxx() in rxe_srq.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-11-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_srq.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_srq.c b/drivers/infiniband/sw/rxe/rxe_srq.c
index 02b39498c370d..82e37a41ced40 100644
--- a/drivers/infiniband/sw/rxe/rxe_srq.c
+++ b/drivers/infiniband/sw/rxe/rxe_srq.c
@@ -13,13 +13,13 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
 	struct ib_srq_attr *attr = &init->attr;
 
 	if (attr->max_wr > rxe->attr.max_srq_wr) {
-		pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+		rxe_dbg(rxe, "max_wr(%d) > max_srq_wr(%d)\n",
 			attr->max_wr, rxe->attr.max_srq_wr);
 		goto err1;
 	}
 
 	if (attr->max_wr <= 0) {
-		pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+		rxe_dbg(rxe, "max_wr(%d) <= 0\n", attr->max_wr);
 		goto err1;
 	}
 
@@ -27,7 +27,7 @@ int rxe_srq_chk_init(struct rxe_dev *rxe, struct ib_srq_init_attr *init)
 		attr->max_wr = RXE_MIN_SRQ_WR;
 
 	if (attr->max_sge > rxe->attr.max_srq_sge) {
-		pr_warn("max_sge(%d) > max_srq_sge(%d)\n",
+		rxe_dbg(rxe, "max_sge(%d) > max_srq_sge(%d)\n",
 			attr->max_sge, rxe->attr.max_srq_sge);
 		goto err1;
 	}
@@ -65,7 +65,7 @@ int rxe_srq_from_init(struct rxe_dev *rxe, struct rxe_srq *srq,
 	type = QUEUE_TYPE_FROM_CLIENT;
 	q = rxe_queue_init(rxe, &srq->rq.max_wr, srq_wqe_size, type);
 	if (!q) {
-		pr_warn("unable to allocate queue for srq\n");
+		rxe_dbg_srq(srq, "Unable to allocate queue\n");
 		return -ENOMEM;
 	}
 
@@ -94,24 +94,24 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 		     struct ib_srq_attr *attr, enum ib_srq_attr_mask mask)
 {
 	if (srq->error) {
-		pr_warn("srq in error state\n");
+		rxe_dbg_srq(srq, "in error state\n");
 		goto err1;
 	}
 
 	if (mask & IB_SRQ_MAX_WR) {
 		if (attr->max_wr > rxe->attr.max_srq_wr) {
-			pr_warn("max_wr(%d) > max_srq_wr(%d)\n",
+			rxe_dbg_srq(srq, "max_wr(%d) > max_srq_wr(%d)\n",
 				attr->max_wr, rxe->attr.max_srq_wr);
 			goto err1;
 		}
 
 		if (attr->max_wr <= 0) {
-			pr_warn("max_wr(%d) <= 0\n", attr->max_wr);
+			rxe_dbg_srq(srq, "max_wr(%d) <= 0\n", attr->max_wr);
 			goto err1;
 		}
 
 		if (srq->limit && (attr->max_wr < srq->limit)) {
-			pr_warn("max_wr (%d) < srq->limit (%d)\n",
+			rxe_dbg_srq(srq, "max_wr (%d) < srq->limit (%d)\n",
 				attr->max_wr, srq->limit);
 			goto err1;
 		}
@@ -122,13 +122,13 @@ int rxe_srq_chk_attr(struct rxe_dev *rxe, struct rxe_srq *srq,
 
 	if (mask & IB_SRQ_LIMIT) {
 		if (attr->srq_limit > rxe->attr.max_srq_wr) {
-			pr_warn("srq_limit(%d) > max_srq_wr(%d)\n",
+			rxe_dbg_srq(srq, "srq_limit(%d) > max_srq_wr(%d)\n",
 				attr->srq_limit, rxe->attr.max_srq_wr);
 			goto err1;
 		}
 
 		if (attr->srq_limit > srq->rq.queue->buf->index_mask) {
-			pr_warn("srq_limit (%d) > cur limit(%d)\n",
+			rxe_dbg_srq(srq, "srq_limit (%d) > cur limit(%d)\n",
 				attr->srq_limit,
 				srq->rq.queue->buf->index_mask);
 			goto err1;
-- 
GitLab


From 14e501fdb0de3b45b8ee8a2a031c3c64aaa01817 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:09 -0500
Subject: [PATCH 1360/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_verbs.c

Replace calls to pr_xxx() in rxe_verbs.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-12-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_verbs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index 510ae471ac7a8..e6eca21c54e6e 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -1103,7 +1103,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
 
 	err = ib_register_device(dev, ibdev_name, NULL);
 	if (err)
-		pr_warn("%s failed with error %d\n", __func__, err);
+		rxe_dbg(rxe, "failed with error %d\n", err);
 
 	/*
 	 * Note that rxe may be invalid at this point if another thread
-- 
GitLab


From 25fd735a4c9e38c65904eea2769ed92f6f8586d0 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:10 -0500
Subject: [PATCH 1361/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_av.c

Replace calls to pr_xxx() in rxe_av.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-13-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_av.c    | 43 ++++++++++++++++++++++-----
 drivers/infiniband/sw/rxe/rxe_loc.h   |  8 ++---
 drivers/infiniband/sw/rxe/rxe_qp.c    |  4 +--
 drivers/infiniband/sw/rxe/rxe_verbs.c | 13 ++++----
 4 files changed, 47 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_av.c b/drivers/infiniband/sw/rxe/rxe_av.c
index 3b05314ca739e..889d7adbd4550 100644
--- a/drivers/infiniband/sw/rxe/rxe_av.c
+++ b/drivers/infiniband/sw/rxe/rxe_av.c
@@ -14,26 +14,45 @@ void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av)
 	memcpy(av->dmac, attr->roce.dmac, ETH_ALEN);
 }
 
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
+static int chk_attr(void *obj, struct rdma_ah_attr *attr, bool obj_is_ah)
 {
 	const struct ib_global_route *grh = rdma_ah_read_grh(attr);
 	struct rxe_port *port;
+	struct rxe_dev *rxe;
+	struct rxe_qp *qp;
+	struct rxe_ah *ah;
 	int type;
 
+	if (obj_is_ah) {
+		ah = obj;
+		rxe = to_rdev(ah->ibah.device);
+	} else {
+		qp = obj;
+		rxe = to_rdev(qp->ibqp.device);
+	}
+
 	port = &rxe->port;
 
 	if (rdma_ah_get_ah_flags(attr) & IB_AH_GRH) {
 		if (grh->sgid_index > port->attr.gid_tbl_len) {
-			pr_warn("invalid sgid index = %d\n",
-					grh->sgid_index);
+			if (obj_is_ah)
+				rxe_dbg_ah(ah, "invalid sgid index = %d\n",
+						grh->sgid_index);
+			else
+				rxe_dbg_qp(qp, "invalid sgid index = %d\n",
+						grh->sgid_index);
 			return -EINVAL;
 		}
 
 		type = rdma_gid_attr_network_type(grh->sgid_attr);
 		if (type < RDMA_NETWORK_IPV4 ||
 		    type > RDMA_NETWORK_IPV6) {
-			pr_warn("invalid network type for rdma_rxe = %d\n",
-					type);
+			if (obj_is_ah)
+				rxe_dbg_ah(ah, "invalid network type for rdma_rxe = %d\n",
+						type);
+			else
+				rxe_dbg_qp(qp, "invalid network type for rdma_rxe = %d\n",
+						type);
 			return -EINVAL;
 		}
 	}
@@ -41,6 +60,16 @@ int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr)
 	return 0;
 }
 
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr)
+{
+	return chk_attr(qp, attr, false);
+}
+
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr)
+{
+	return chk_attr(ah, attr, true);
+}
+
 void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
 		     struct rdma_ah_attr *attr)
 {
@@ -121,12 +150,12 @@ struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp)
 		/* only new user provider or kernel client */
 		ah = rxe_pool_get_index(&pkt->rxe->ah_pool, ah_num);
 		if (!ah) {
-			pr_warn("Unable to find AH matching ah_num\n");
+			rxe_dbg_qp(pkt->qp, "Unable to find AH matching ah_num\n");
 			return NULL;
 		}
 
 		if (rxe_ah_pd(ah) != pkt->qp->pd) {
-			pr_warn("PDs don't match for AH and QP\n");
+			rxe_dbg_qp(pkt->qp, "PDs don't match for AH and QP\n");
 			rxe_put(ah);
 			return NULL;
 		}
diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index c2a5c8814a48b..a22476d27b384 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -9,16 +9,12 @@
 
 /* rxe_av.c */
 void rxe_init_av(struct rdma_ah_attr *attr, struct rxe_av *av);
-
-int rxe_av_chk_attr(struct rxe_dev *rxe, struct rdma_ah_attr *attr);
-
+int rxe_av_chk_attr(struct rxe_qp *qp, struct rdma_ah_attr *attr);
+int rxe_ah_chk_attr(struct rxe_ah *ah, struct rdma_ah_attr *attr);
 void rxe_av_from_attr(u8 port_num, struct rxe_av *av,
 		     struct rdma_ah_attr *attr);
-
 void rxe_av_to_attr(struct rxe_av *av, struct rdma_ah_attr *attr);
-
 void rxe_av_fill_ip_info(struct rxe_av *av, struct rdma_ah_attr *attr);
-
 struct rxe_av *rxe_get_av(struct rxe_pkt_info *pkt, struct rxe_ah **ahp);
 
 /* rxe_cq.c */
diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index bcbfe6068b8b0..46f6c74ce00e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -414,11 +414,11 @@ int rxe_qp_chk_attr(struct rxe_dev *rxe, struct rxe_qp *qp,
 	if (mask & IB_QP_CAP && rxe_qp_chk_cap(rxe, &attr->cap, !!qp->srq))
 		goto err1;
 
-	if (mask & IB_QP_AV && rxe_av_chk_attr(rxe, &attr->ah_attr))
+	if (mask & IB_QP_AV && rxe_av_chk_attr(qp, &attr->ah_attr))
 		goto err1;
 
 	if (mask & IB_QP_ALT_PATH) {
-		if (rxe_av_chk_attr(rxe, &attr->alt_ah_attr))
+		if (rxe_av_chk_attr(qp, &attr->alt_ah_attr))
 			goto err1;
 		if (!rdma_is_port_valid(&rxe->ib_dev, attr->alt_port_num))  {
 			rxe_dbg_qp(qp, "invalid alt port %d\n", attr->alt_port_num);
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c
index e6eca21c54e6e..025b35bf014e2 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.c
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.c
@@ -172,10 +172,6 @@ static int rxe_create_ah(struct ib_ah *ibah,
 		ah->is_user = false;
 	}
 
-	err = rxe_av_chk_attr(rxe, init_attr->ah_attr);
-	if (err)
-		return err;
-
 	err = rxe_add_to_pool_ah(&rxe->ah_pool, ah,
 			init_attr->flags & RDMA_CREATE_AH_SLEEPABLE);
 	if (err)
@@ -184,6 +180,12 @@ static int rxe_create_ah(struct ib_ah *ibah,
 	/* create index > 0 */
 	ah->ah_num = ah->elem.index;
 
+	err = rxe_ah_chk_attr(ah, init_attr->ah_attr);
+	if (err) {
+		rxe_cleanup(ah);
+		return err;
+	}
+
 	if (uresp) {
 		/* only if new user provider */
 		err = copy_to_user(&uresp->ah_num, &ah->ah_num,
@@ -206,10 +208,9 @@ static int rxe_create_ah(struct ib_ah *ibah,
 static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr)
 {
 	int err;
-	struct rxe_dev *rxe = to_rdev(ibah->device);
 	struct rxe_ah *ah = to_rah(ibah);
 
-	err = rxe_av_chk_attr(rxe, attr);
+	err = rxe_ah_chk_attr(ah, attr);
 	if (err)
 		return err;
 
-- 
GitLab


From fc50597934411170ed149d3368b0b733bc5119f1 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:11 -0500
Subject: [PATCH 1362/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_task.c

Replace calls to pr_xxx() in rxe_task.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-14-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_task.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_task.c b/drivers/infiniband/sw/rxe/rxe_task.c
index 0208d833a41b9..60b90e33a8849 100644
--- a/drivers/infiniband/sw/rxe/rxe_task.c
+++ b/drivers/infiniband/sw/rxe/rxe_task.c
@@ -29,6 +29,7 @@ static void do_task(struct tasklet_struct *t)
 	int cont;
 	int ret;
 	struct rxe_task *task = from_tasklet(task, t, tasklet);
+	struct rxe_qp *qp = (struct rxe_qp *)task->arg;
 	unsigned int iterations = RXE_MAX_ITERATIONS;
 
 	spin_lock_bh(&task->lock);
@@ -47,7 +48,7 @@ static void do_task(struct tasklet_struct *t)
 
 	default:
 		spin_unlock_bh(&task->lock);
-		pr_warn("%s failed with bad state %d\n", __func__, task->state);
+		rxe_dbg_qp(qp, "failed with bad state %d\n", task->state);
 		return;
 	}
 
@@ -81,8 +82,8 @@ static void do_task(struct tasklet_struct *t)
 			break;
 
 		default:
-			pr_warn("%s failed with bad state %d\n", __func__,
-				task->state);
+			rxe_dbg_qp(qp, "failed with bad state %d\n",
+					task->state);
 		}
 		spin_unlock_bh(&task->lock);
 	} while (cont);
-- 
GitLab


From c6aba5ea00550017629c56972b635f33bb6a6903 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:12 -0500
Subject: [PATCH 1363/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe.c

Replace calls to pr_xxx() in rxe.c with rxe_dbg_xxx().
Calls with a rxe device not yet in scope are left as is.

Link: https://lore.kernel.org/r/20221103171013.20659-15-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe.c b/drivers/infiniband/sw/rxe/rxe.c
index 51daac5c4feb7..136c2efe34660 100644
--- a/drivers/infiniband/sw/rxe/rxe.c
+++ b/drivers/infiniband/sw/rxe/rxe.c
@@ -187,14 +187,14 @@ static int rxe_newlink(const char *ibdev_name, struct net_device *ndev)
 	exists = rxe_get_dev_from_net(ndev);
 	if (exists) {
 		ib_device_put(&exists->ib_dev);
-		pr_err("already configured on %s\n", ndev->name);
+		rxe_dbg(exists, "already configured on %s\n", ndev->name);
 		err = -EEXIST;
 		goto err;
 	}
 
 	err = rxe_net_add(ibdev_name, ndev);
 	if (err) {
-		pr_err("failed to add %s\n", ndev->name);
+		rxe_dbg(exists, "failed to add %s\n", ndev->name);
 		goto err;
 	}
 err:
-- 
GitLab


From 813728043b79a11f7029ba196decfc7f576ae487 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:13 -0500
Subject: [PATCH 1364/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_icrc.c

Replace calls to pr_xxx() in rxe_icrc.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-16-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_icrc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_icrc.c b/drivers/infiniband/sw/rxe/rxe_icrc.c
index 46bb07c5c4df2..71bc2c1895888 100644
--- a/drivers/infiniband/sw/rxe/rxe_icrc.c
+++ b/drivers/infiniband/sw/rxe/rxe_icrc.c
@@ -21,7 +21,7 @@ int rxe_icrc_init(struct rxe_dev *rxe)
 
 	tfm = crypto_alloc_shash("crc32", 0, 0);
 	if (IS_ERR(tfm)) {
-		pr_warn("failed to init crc32 algorithm err:%ld\n",
+		rxe_dbg(rxe, "failed to init crc32 algorithm err: %ld\n",
 			       PTR_ERR(tfm));
 		return PTR_ERR(tfm);
 	}
@@ -51,7 +51,7 @@ static __be32 rxe_crc32(struct rxe_dev *rxe, __be32 crc, void *next, size_t len)
 	*(__be32 *)shash_desc_ctx(shash) = crc;
 	err = crypto_shash_update(shash, next, len);
 	if (unlikely(err)) {
-		pr_warn_ratelimited("failed crc calculation, err: %d\n", err);
+		rxe_dbg(rxe, "failed crc calculation, err: %d\n", err);
 		return (__force __be32)crc32_le((__force u32)crc, next, len);
 	}
 
-- 
GitLab


From 5de087250f1d8f7b81abaf94110884994793f073 Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 3 Nov 2022 12:10:14 -0500
Subject: [PATCH 1365/4122] RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in
 rxe_mmap.c

Replace calls to pr_xxx() in rxe_mmap.c with rxe_dbg_xxx().

Link: https://lore.kernel.org/r/20221103171013.20659-17-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mmap.c b/drivers/infiniband/sw/rxe/rxe_mmap.c
index 9149b60954296..a47d72dbc5376 100644
--- a/drivers/infiniband/sw/rxe/rxe_mmap.c
+++ b/drivers/infiniband/sw/rxe/rxe_mmap.c
@@ -79,7 +79,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 
 		/* Don't allow a mmap larger than the object. */
 		if (size > ip->info.size) {
-			pr_err("mmap region is larger than the object!\n");
+			rxe_dbg(rxe, "mmap region is larger than the object!\n");
 			spin_unlock_bh(&rxe->pending_lock);
 			ret = -EINVAL;
 			goto done;
@@ -87,7 +87,7 @@ int rxe_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
 
 		goto found_it;
 	}
-	pr_warn("unable to find pending mmap info\n");
+	rxe_dbg(rxe, "unable to find pending mmap info\n");
 	spin_unlock_bh(&rxe->pending_lock);
 	ret = -EINVAL;
 	goto done;
@@ -98,7 +98,7 @@ found_it:
 
 	ret = remap_vmalloc_range(vma, ip->obj, 0);
 	if (ret) {
-		pr_err("err %d from remap_vmalloc_range\n", ret);
+		rxe_dbg(rxe, "err %d from remap_vmalloc_range\n", ret);
 		goto done;
 	}
 
-- 
GitLab


From 8c50cd059c5cd974da4285af17adf0e38905b25b Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 31 Oct 2022 10:39:50 -0500
Subject: [PATCH 1366/4122] PCI: altera-msi: Include <linux/irqdomain.h>
 explicitly

pcie-altera-msi.c uses irq_domain_add_linear() and related interfaces, so
it needs <linux/irqdomain.h> but doesn't include it directly; it relies on
the fact that <linux/of_irq.h> includes it.

But pcie-altera-msi.c *doesn't* need <linux/of_irq.h> itself.  Include
<linux/irqdomain.h> directly to remove this implicit dependency so a future
patch can drop <linux/of_irq.h>.

Link: https://lore.kernel.org/r/20221031153954.1163623-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pcie-altera-msi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
index 7b1d3ebc34ecd..4366e042e98ba 100644
--- a/drivers/pci/controller/pcie-altera-msi.c
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -9,6 +9,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/msi.h>
-- 
GitLab


From 606a0430b37af4a1dd3e1e3baaf073b42fbb53e3 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 31 Oct 2022 10:39:51 -0500
Subject: [PATCH 1367/4122] PCI: microchip: Include <linux/irqdomain.h>
 explicitly

pcie-microchip-host.c uses irq_domain_add_linear() and related interfaces,
so it needs <linux/irqdomain.h> but doesn't include it directly; it relies
on the fact that <linux/of_irq.h> includes it.

But pcie-microchip-host.c *doesn't* need <linux/of_irq.h> itself.  Include
<linux/irqdomain.h> directly to remove this implicit dependency so a future
patch can drop <linux/of_irq.h>.

Link: https://lore.kernel.org/r/20221031153954.1163623-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
---
 drivers/pci/controller/pcie-microchip-host.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
index 7263d175b5adb..57b2a62f52c8a 100644
--- a/drivers/pci/controller/pcie-microchip-host.c
+++ b/drivers/pci/controller/pcie-microchip-host.c
@@ -9,6 +9,7 @@
 
 #include <linux/clk.h>
 #include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of_address.h>
-- 
GitLab


From 763d25e7affe13c7be923fec6192ca6325f33c73 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 31 Oct 2022 10:39:52 -0500
Subject: [PATCH 1368/4122] PCI: mvebu: Include <linux/irqdomain.h> explicitly

pci-mvebu.c uses irq_domain_add_linear() and related interfaces but relies
on <linux/irqdomain.h> but doesn't include it directly; it relies on the
fact that <linux/of_irq.h> includes it.

Include <linux/irqdomain.h> directly to remove this implicit dependency.

Link: https://lore.kernel.org/r/20221031153954.1163623-4-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
---
 drivers/pci/controller/pci-mvebu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 1ced73726a267..73db99035c2b7 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/init.h>
+#include <linux/irqdomain.h>
 #include <linux/mbus.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
-- 
GitLab


From 753596dcdb753afb63874c644b2fb20ef7fb3948 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 31 Oct 2022 10:39:53 -0500
Subject: [PATCH 1369/4122] PCI: xgene-msi: Include <linux/irqdomain.h>
 explicitly

pci-xgene-msi.c uses irq_domain_add_linear() and related interfaces, so it
needs <linux/irqdomain.h> but doesn't include it directly; it relies on the
fact that <linux/of_irq.h> includes it.

But pci-xgene-msi.c *doesn't* need <linux/of_irq.h> itself.  Include
<linux/irqdomain.h> directly to remove this implicit dependency so a future
patch can drop <linux/of_irq.h>.

Link: https://lore.kernel.org/r/20221031153954.1163623-5-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pci-xgene-msi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
index bfa259781b692..bacb14e558eeb 100644
--- a/drivers/pci/controller/pci-xgene-msi.c
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -8,6 +8,7 @@
  */
 #include <linux/cpu.h>
 #include <linux/interrupt.h>
+#include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of_irq.h>
-- 
GitLab


From 277004d7a4a348de185fb4149ff29a651e994ff4 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 31 Oct 2022 10:39:54 -0500
Subject: [PATCH 1370/4122] PCI: Remove unnecessary <linux/of_irq.h> includes

Many host controller drivers #include <linux/of_irq.h> even though they
don't need it.  Remove the unnecessary #includes.

Link: https://lore.kernel.org/r/20221031153954.1163623-6-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Roy Zang <roy.zang@nxp.com>
---
 drivers/pci/controller/cadence/pci-j721e.c   | 1 -
 drivers/pci/controller/dwc/pci-layerscape.c  | 1 -
 drivers/pci/controller/dwc/pcie-armada8k.c   | 1 -
 drivers/pci/controller/dwc/pcie-tegra194.c   | 1 -
 drivers/pci/controller/pci-v3-semi.c         | 1 -
 drivers/pci/controller/pci-xgene-msi.c       | 1 -
 drivers/pci/controller/pci-xgene.c           | 1 -
 drivers/pci/controller/pcie-altera-msi.c     | 1 -
 drivers/pci/controller/pcie-iproc-platform.c | 1 -
 drivers/pci/controller/pcie-iproc.c          | 1 -
 drivers/pci/controller/pcie-microchip-host.c | 1 -
 drivers/pci/controller/pcie-rockchip-host.c  | 1 -
 drivers/pci/controller/pcie-xilinx-cpm.c     | 1 -
 drivers/pci/controller/pcie-xilinx-nwl.c     | 1 -
 14 files changed, 14 deletions(-)

diff --git a/drivers/pci/controller/cadence/pci-j721e.c b/drivers/pci/controller/cadence/pci-j721e.c
index a82f845cc4b52..cc83a8925ce03 100644
--- a/drivers/pci/controller/cadence/pci-j721e.c
+++ b/drivers/pci/controller/cadence/pci-j721e.c
@@ -15,7 +15,6 @@
 #include <linux/mfd/syscon.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
diff --git a/drivers/pci/controller/dwc/pci-layerscape.c b/drivers/pci/controller/dwc/pci-layerscape.c
index 879b8692f96a5..ed5fb492fe084 100644
--- a/drivers/pci/controller/dwc/pci-layerscape.c
+++ b/drivers/pci/controller/dwc/pci-layerscape.c
@@ -13,7 +13,6 @@
 #include <linux/init.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
-#include <linux/of_irq.h>
 #include <linux/of_address.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pci/controller/dwc/pcie-armada8k.c b/drivers/pci/controller/dwc/pcie-armada8k.c
index dc469ef8e99b3..5c999e15c357f 100644
--- a/drivers/pci/controller/dwc/pcie-armada8k.c
+++ b/drivers/pci/controller/dwc/pcie-armada8k.c
@@ -21,7 +21,6 @@
 #include <linux/platform_device.h>
 #include <linux/resource.h>
 #include <linux/of_pci.h>
-#include <linux/of_irq.h>
 
 #include "pcie-designware.h"
 
diff --git a/drivers/pci/controller/dwc/pcie-tegra194.c b/drivers/pci/controller/dwc/pcie-tegra194.c
index 1b6b437823d22..02d78a12b6e72 100644
--- a/drivers/pci/controller/dwc/pcie-tegra194.c
+++ b/drivers/pci/controller/dwc/pcie-tegra194.c
@@ -21,7 +21,6 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 #include <linux/phy/phy.h>
diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c
index 154a5398633cc..784fcf35599c6 100644
--- a/drivers/pci/controller/pci-v3-semi.c
+++ b/drivers/pci/controller/pci-v3-semi.c
@@ -22,7 +22,6 @@
 #include <linux/kernel.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pci/controller/pci-xgene-msi.c b/drivers/pci/controller/pci-xgene-msi.c
index bacb14e558eeb..d7987b281f799 100644
--- a/drivers/pci/controller/pci-xgene-msi.c
+++ b/drivers/pci/controller/pci-xgene-msi.c
@@ -11,7 +11,6 @@
 #include <linux/irqdomain.h>
 #include <linux/module.h>
 #include <linux/msi.h>
-#include <linux/of_irq.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pci/controller/pci-xgene.c b/drivers/pci/controller/pci-xgene.c
index 549d3bd6d1c27..887b4941ff32f 100644
--- a/drivers/pci/controller/pci-xgene.c
+++ b/drivers/pci/controller/pci-xgene.c
@@ -14,7 +14,6 @@
 #include <linux/init.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 #include <linux/pci-acpi.h>
diff --git a/drivers/pci/controller/pcie-altera-msi.c b/drivers/pci/controller/pcie-altera-msi.c
index 4366e042e98ba..65e8a20cc4426 100644
--- a/drivers/pci/controller/pcie-altera-msi.c
+++ b/drivers/pci/controller/pcie-altera-msi.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pci/controller/pcie-iproc-platform.c b/drivers/pci/controller/pcie-iproc-platform.c
index 538115246c799..4142a73e611d1 100644
--- a/drivers/pci/controller/pcie-iproc-platform.c
+++ b/drivers/pci/controller/pcie-iproc-platform.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
-#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/phy/phy.h>
 
diff --git a/drivers/pci/controller/pcie-iproc.c b/drivers/pci/controller/pcie-iproc.c
index 2519201b0e51c..83029bdfd8844 100644
--- a/drivers/pci/controller/pcie-iproc.c
+++ b/drivers/pci/controller/pcie-iproc.c
@@ -18,7 +18,6 @@
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
-#include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/phy/phy.h>
 
diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
index 57b2a62f52c8a..0ebf7015e9afd 100644
--- a/drivers/pci/controller/pcie-microchip-host.c
+++ b/drivers/pci/controller/pcie-microchip-host.c
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/msi.h>
 #include <linux/of_address.h>
-#include <linux/of_irq.h>
 #include <linux/of_pci.h>
 #include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
diff --git a/drivers/pci/controller/pcie-rockchip-host.c b/drivers/pci/controller/pcie-rockchip-host.c
index 7352b5ff8d359..c96c0f454570c 100644
--- a/drivers/pci/controller/pcie-rockchip-host.c
+++ b/drivers/pci/controller/pcie-rockchip-host.c
@@ -28,7 +28,6 @@
 #include <linux/of_device.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
-#include <linux/of_irq.h>
 #include <linux/pci.h>
 #include <linux/pci_ids.h>
 #include <linux/phy/phy.h>
diff --git a/drivers/pci/controller/pcie-xilinx-cpm.c b/drivers/pci/controller/pcie-xilinx-cpm.c
index e4ab48041eb6d..4a787a941674b 100644
--- a/drivers/pci/controller/pcie-xilinx-cpm.c
+++ b/drivers/pci/controller/pcie-xilinx-cpm.c
@@ -16,7 +16,6 @@
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
-#include <linux/of_irq.h>
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/pci-ecam.h>
diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 40d070e54ad2e..f0271b6c6f8d2 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -17,7 +17,6 @@
 #include <linux/of_address.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
-#include <linux/of_irq.h>
 #include <linux/pci.h>
 #include <linux/pci-ecam.h>
 #include <linux/platform_device.h>
-- 
GitLab


From 4c2e9ba05c7abac58bdb58e47eb69b156027fb7b Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:50 +0300
Subject: [PATCH 1371/4122] dmaengine: at_hdmac: Do not print messages on
 console while holding the lock

The descriptor was already removed from the transfer list, there's no
reason to keep the channel lock while printing desc info, thus do the
prints without holding the lock.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-17-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 858bd64f13135..f365ac4d87ffe 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -549,6 +549,8 @@ static void atc_handle_error(struct at_dma_chan *atchan)
 		atc_dostart(atchan, desc);
 	}
 
+	spin_unlock_irqrestore(&atchan->lock, flags);
+
 	/*
 	 * KERN_CRITICAL may seem harsh, but since this only happens
 	 * when someone submits a bad physical address in a
@@ -564,8 +566,6 @@ static void atc_handle_error(struct at_dma_chan *atchan)
 	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
 		atc_dump_lli(atchan, &child->lli);
 
-	spin_unlock_irqrestore(&atchan->lock, flags);
-
 	/* Pretend the descriptor completed successfully */
 	atc_chain_complete(atchan, bad_desc);
 }
-- 
GitLab


From 83c196152fc9a93c3d5a7cbf3229f42f87f232d8 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:51 +0300
Subject: [PATCH 1372/4122] dmaengine: at_hdmac: Return dma_cookie_status()'s
 ret code when txstate is NULL

txstate is an optional parameter used to get a struct with auxilary
transfer status information. When not provided the call to
device_tx_status() should return the status of the dma cookie. Return the
status of dma cookie when the txstate optional parameter is not provided.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-18-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index f365ac4d87ffe..10b6b0435d52e 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1461,14 +1461,8 @@ atc_tx_status(struct dma_chan *chan,
 	int bytes = 0;
 
 	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_COMPLETE)
+	if (ret == DMA_COMPLETE || !txstate)
 		return ret;
-	/*
-	 * There's no point calculating the residue if there's
-	 * no txstate to store the value.
-	 */
-	if (!txstate)
-		return DMA_ERROR;
 
 	spin_lock_irqsave(&atchan->lock, flags);
 
-- 
GitLab


From 0e75c28c52962b528947843e947b4bd0c74d40d2 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:52 +0300
Subject: [PATCH 1373/4122] dmaengine: at_hdmac: Remove superfluous cast

Conversions of void * are applied automatically when other pointer types
are assigned to and from void *. Remove the superfluous cast.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-19-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 10b6b0435d52e..fbfb207104e98 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -603,7 +603,7 @@ static void atc_tasklet(struct tasklet_struct *t)
 
 static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
 {
-	struct at_dma		*atdma = (struct at_dma *)dev_id;
+	struct at_dma		*atdma = dev_id;
 	struct at_dma_chan	*atchan;
 	int			i;
 	u32			status, pending, imr;
-- 
GitLab


From f5d79afa3a858eb6e5cf2bcd894ed53b5bd8b5ff Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:53 +0300
Subject: [PATCH 1374/4122] dmaengine: at_hdmac: Pass residue by address to
 avoid unnecessary implicit casts

struct dma_tx_state defines residue as u32. atc_get_bytes_left() returned
an int which could be either an error or the value of the residue. This
could cause problems if the controller supported a u32 buffer transfer size
and the u32 value was past the max int can hold. Our controller does not
support u32 buffer transfer size, but even so, improve the code and pass
the residue by address to avoid unnecessary implicit casts and make
atc_get_bytes_left() return 0 on success or -errno on errors.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-20-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 54 +++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 24 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index fbfb207104e98..e2c46f32b2840 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -293,7 +293,7 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
  * @current_len: the number of bytes left before reading CTRLA
  * @ctrla: the value of CTRLA
  */
-static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
+static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 {
 	u32 btsize = (ctrla & ATC_BTSIZE_MAX);
 	u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
@@ -308,17 +308,20 @@ static inline int atc_calc_bytes_left(int current_len, u32 ctrla)
 }
 
 /**
- * atc_get_bytes_left - get the number of bytes residue for a cookie
+ * atc_get_bytes_left - get the number of bytes residue for a cookie.
+ * The residue is passed by address and updated on success.
  * @chan: DMA channel
  * @cookie: transaction identifier to check status of
+ * @residue: residue to be updated.
+ * Return 0 on success, -errono otherwise.
  */
-static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
+static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie,
+			      u32 *residue)
 {
 	struct at_dma_chan      *atchan = to_at_dma_chan(chan);
 	struct at_desc *desc_first = atc_first_active(atchan);
 	struct at_desc *desc;
-	int ret;
-	u32 ctrla, dscr;
+	u32 len, ctrla, dscr;
 	unsigned int i;
 
 	/*
@@ -333,7 +336,7 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 		return desc->total_len;
 
 	/* cookie matches to the currently running transfer */
-	ret = desc_first->total_len;
+	len = desc_first->total_len;
 
 	if (desc_first->lli.dscr) {
 		/* hardware linked list transfer */
@@ -419,29 +422,31 @@ static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie)
 			return -ETIMEDOUT;
 
 		/* for the first descriptor we can be more accurate */
-		if (desc_first->lli.dscr == dscr)
-			return atc_calc_bytes_left(ret, ctrla);
+		if (desc_first->lli.dscr == dscr) {
+			*residue = atc_calc_bytes_left(len, ctrla);
+			return 0;
+		}
 
-		ret -= desc_first->len;
+		len -= desc_first->len;
 		list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
 			if (desc->lli.dscr == dscr)
 				break;
 
-			ret -= desc->len;
+			len -= desc->len;
 		}
 
 		/*
 		 * For the current descriptor in the chain we can calculate
 		 * the remaining bytes using the channel's register.
 		 */
-		ret = atc_calc_bytes_left(ret, ctrla);
+		*residue = atc_calc_bytes_left(len, ctrla);
 	} else {
 		/* single transfer */
 		ctrla = channel_readl(atchan, CTRLA);
-		ret = atc_calc_bytes_left(ret, ctrla);
+		*residue = atc_calc_bytes_left(len, ctrla);
 	}
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -1457,31 +1462,32 @@ atc_tx_status(struct dma_chan *chan,
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	unsigned long		flags;
-	enum dma_status		ret;
-	int bytes = 0;
+	enum dma_status		dma_status;
+	u32 residue;
+	int ret;
 
-	ret = dma_cookie_status(chan, cookie, txstate);
-	if (ret == DMA_COMPLETE || !txstate)
-		return ret;
+	dma_status = dma_cookie_status(chan, cookie, txstate);
+	if (dma_status == DMA_COMPLETE || !txstate)
+		return dma_status;
 
 	spin_lock_irqsave(&atchan->lock, flags);
 
 	/*  Get number of bytes left in the active transactions */
-	bytes = atc_get_bytes_left(chan, cookie);
+	ret = atc_get_bytes_left(chan, cookie, &residue);
 
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
-	if (unlikely(bytes < 0)) {
+	if (unlikely(ret < 0)) {
 		dev_vdbg(chan2dev(chan), "get residual bytes error\n");
 		return DMA_ERROR;
 	} else {
-		dma_set_residue(txstate, bytes);
+		dma_set_residue(txstate, residue);
 	}
 
-	dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %d\n",
-		 ret, cookie, bytes);
+	dev_vdbg(chan2dev(chan), "tx_status %d: cookie = %d residue = %u\n",
+		 dma_status, cookie, residue);
 
-	return ret;
+	return dma_status;
 }
 
 /**
-- 
GitLab


From 91617bf6bb41b32119f5ac007871ad1d115d4ed2 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:54 +0300
Subject: [PATCH 1375/4122] dmaengine: at_hdmac:
 s/atc_get_bytes_left/atc_get_residue

Use dmaengine terminology and rename the method to better indicate what it
does: it gets the residue value which will be later on set with
dma_set_residue().

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-21-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index e2c46f32b2840..6c328cd169832 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -308,15 +308,15 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 }
 
 /**
- * atc_get_bytes_left - get the number of bytes residue for a cookie.
+ * atc_get_residue - get the number of bytes residue for a cookie.
  * The residue is passed by address and updated on success.
  * @chan: DMA channel
  * @cookie: transaction identifier to check status of
  * @residue: residue to be updated.
  * Return 0 on success, -errono otherwise.
  */
-static int atc_get_bytes_left(struct dma_chan *chan, dma_cookie_t cookie,
-			      u32 *residue)
+static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
+			   u32 *residue)
 {
 	struct at_dma_chan      *atchan = to_at_dma_chan(chan);
 	struct at_desc *desc_first = atc_first_active(atchan);
@@ -1471,10 +1471,7 @@ atc_tx_status(struct dma_chan *chan,
 		return dma_status;
 
 	spin_lock_irqsave(&atchan->lock, flags);
-
-	/*  Get number of bytes left in the active transactions */
-	ret = atc_get_bytes_left(chan, cookie, &residue);
-
+	ret = atc_get_residue(chan, cookie, &residue);
 	spin_unlock_irqrestore(&atchan->lock, flags);
 
 	if (unlikely(ret < 0)) {
-- 
GitLab


From b50cf4bdfb9164c55d002624217d5a5ef4ab9573 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:55 +0300
Subject: [PATCH 1376/4122] dmaengine: at_hdmac: Introduce
 atc_get_llis_residue()

Introduce a method to get the residue for a hardware linked list transfer.
It makes the code easier to read.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-22-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 221 ++++++++++++++++++++---------------------
 1 file changed, 110 insertions(+), 111 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 6c328cd169832..6bd9e35db8f9b 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -307,6 +307,109 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 	return current_len - (btsize << src_width);
 }
 
+/**
+ * atc_get_llis_residue - Get residue for a hardware linked list transfer
+ *
+ * Calculate the residue by removing the length of the child descriptors already
+ * transferred from the total length. To get the current child descriptor we can
+ * use the value of the channel's DSCR register and compare it against the value
+ * of the hardware linked list structure of each child descriptor.
+ *
+ * The CTRLA register provides us with the amount of data already read from the
+ * source for the current child descriptor. So we can compute a more accurate
+ * residue by also removing the number of bytes corresponding to this amount of
+ * data.
+ *
+ * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a
+ * race condition may occur: the first read register may refer to one child
+ * descriptor whereas the second read may refer to a later child descriptor in
+ * the list because of the DMA transfer progression inbetween the two reads.
+ *
+ * One solution could have been to pause the DMA transfer, read the DSCR and
+ * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some
+ * drawbacks:
+ * - If the DMA transfer is paused, RX overruns or TX underruns are more likey
+ *   to occur depending on the system latency. Taking the USART driver as an
+ *   example, it uses a cyclic DMA transfer to read data from the Receive
+ *   Holding Register (RHR) to avoid RX overruns since the RHR is not protected
+ *   by any FIFO on most Atmel SoCs. So pausing the DMA transfer to compute the
+ *   residue would break the USART driver design.
+ * - The atc_pause() function masks interrupts but we'd rather avoid to do so
+ * for system latency purpose.
+ *
+ * Then we'd rather use another solution: the DSCR is read a first time, the
+ * CTRLA is read in turn, next the DSCR is read a second time. If the two
+ * consecutive read values of the DSCR are the same then we assume both refers
+ * to the very same child descriptor as well as the CTRLA value read inbetween
+ * does. For cyclic tranfers, the assumption is that a full loop is "not so
+ * fast". If the two DSCR values are different, we read again the CTRLA then the
+ * DSCR till two consecutive read values from DSCR are equal or till the
+ * maximum trials is reach. This algorithm is very unlikely not to find a stable
+ * value for DSCR.
+ * @atchan: pointer to an atmel hdmac channel.
+ * @desc: pointer to the descriptor for which the residue is calculated.
+ * @residue: residue to be set to dma_tx_state.
+ * Returns 0 on success, -errno otherwise.
+ */
+static int atc_get_llis_residue(struct at_dma_chan *atchan,
+				struct at_desc *desc, u32 *residue)
+{
+	struct at_desc *child;
+	u32 len, ctrla, dscr;
+	unsigned int i;
+
+	len = desc->total_len;
+	dscr = channel_readl(atchan, DSCR);
+	rmb(); /* ensure DSCR is read before CTRLA */
+	ctrla = channel_readl(atchan, CTRLA);
+	for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) {
+		u32 new_dscr;
+
+		rmb(); /* ensure DSCR is read after CTRLA */
+		new_dscr = channel_readl(atchan, DSCR);
+
+		/*
+		 * If the DSCR register value has not changed inside the DMA
+		 * controller since the previous read, we assume that both the
+		 * dscr and ctrla values refers to the very same descriptor.
+		 */
+		if (likely(new_dscr == dscr))
+			break;
+
+		/*
+		 * DSCR has changed inside the DMA controller, so the previouly
+		 * read value of CTRLA may refer to an already processed
+		 * descriptor hence could be outdated. We need to update ctrla
+		 * to match the current descriptor.
+		 */
+		dscr = new_dscr;
+		rmb(); /* ensure DSCR is read before CTRLA */
+		ctrla = channel_readl(atchan, CTRLA);
+	}
+	if (unlikely(i == ATC_MAX_DSCR_TRIALS))
+		return -ETIMEDOUT;
+
+	/* For the first descriptor we can be more accurate. */
+	if (desc->lli.dscr == dscr) {
+		*residue = atc_calc_bytes_left(len, ctrla);
+		return 0;
+	}
+
+	len -= desc->len;
+	list_for_each_entry(child, &desc->tx_list, desc_node) {
+		if (child->lli.dscr == dscr)
+			break;
+		len -= child->len;
+	}
+
+	/*
+	 * For the current descriptor in the chain we can calculate the
+	 * remaining bytes using the channel's register.
+	 */
+	*residue = atc_calc_bytes_left(len, ctrla);
+	return 0;
+}
+
 /**
  * atc_get_residue - get the number of bytes residue for a cookie.
  * The residue is passed by address and updated on success.
@@ -321,8 +424,7 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
 	struct at_dma_chan      *atchan = to_at_dma_chan(chan);
 	struct at_desc *desc_first = atc_first_active(atchan);
 	struct at_desc *desc;
-	u32 len, ctrla, dscr;
-	unsigned int i;
+	u32 len, ctrla;
 
 	/*
 	 * If the cookie doesn't match to the currently running transfer then
@@ -335,117 +437,14 @@ static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
 	else if (desc != desc_first)
 		return desc->total_len;
 
-	/* cookie matches to the currently running transfer */
-	len = desc_first->total_len;
-
-	if (desc_first->lli.dscr) {
+	if (desc_first->lli.dscr)
 		/* hardware linked list transfer */
+		return atc_get_llis_residue(atchan, desc_first, residue);
 
-		/*
-		 * Calculate the residue by removing the length of the child
-		 * descriptors already transferred from the total length.
-		 * To get the current child descriptor we can use the value of
-		 * the channel's DSCR register and compare it against the value
-		 * of the hardware linked list structure of each child
-		 * descriptor.
-		 *
-		 * The CTRLA register provides us with the amount of data
-		 * already read from the source for the current child
-		 * descriptor. So we can compute a more accurate residue by also
-		 * removing the number of bytes corresponding to this amount of
-		 * data.
-		 *
-		 * However, the DSCR and CTRLA registers cannot be read both
-		 * atomically. Hence a race condition may occur: the first read
-		 * register may refer to one child descriptor whereas the second
-		 * read may refer to a later child descriptor in the list
-		 * because of the DMA transfer progression inbetween the two
-		 * reads.
-		 *
-		 * One solution could have been to pause the DMA transfer, read
-		 * the DSCR and CTRLA then resume the DMA transfer. Nonetheless,
-		 * this approach presents some drawbacks:
-		 * - If the DMA transfer is paused, RX overruns or TX underruns
-		 *   are more likey to occur depending on the system latency.
-		 *   Taking the USART driver as an example, it uses a cyclic DMA
-		 *   transfer to read data from the Receive Holding Register
-		 *   (RHR) to avoid RX overruns since the RHR is not protected
-		 *   by any FIFO on most Atmel SoCs. So pausing the DMA transfer
-		 *   to compute the residue would break the USART driver design.
-		 * - The atc_pause() function masks interrupts but we'd rather
-		 *   avoid to do so for system latency purpose.
-		 *
-		 * Then we'd rather use another solution: the DSCR is read a
-		 * first time, the CTRLA is read in turn, next the DSCR is read
-		 * a second time. If the two consecutive read values of the DSCR
-		 * are the same then we assume both refers to the very same
-		 * child descriptor as well as the CTRLA value read inbetween
-		 * does. For cyclic tranfers, the assumption is that a full loop
-		 * is "not so fast".
-		 * If the two DSCR values are different, we read again the CTRLA
-		 * then the DSCR till two consecutive read values from DSCR are
-		 * equal or till the maxium trials is reach.
-		 * This algorithm is very unlikely not to find a stable value for
-		 * DSCR.
-		 */
-
-		dscr = channel_readl(atchan, DSCR);
-		rmb(); /* ensure DSCR is read before CTRLA */
-		ctrla = channel_readl(atchan, CTRLA);
-		for (i = 0; i < ATC_MAX_DSCR_TRIALS; ++i) {
-			u32 new_dscr;
-
-			rmb(); /* ensure DSCR is read after CTRLA */
-			new_dscr = channel_readl(atchan, DSCR);
-
-			/*
-			 * If the DSCR register value has not changed inside the
-			 * DMA controller since the previous read, we assume
-			 * that both the dscr and ctrla values refers to the
-			 * very same descriptor.
-			 */
-			if (likely(new_dscr == dscr))
-				break;
-
-			/*
-			 * DSCR has changed inside the DMA controller, so the
-			 * previouly read value of CTRLA may refer to an already
-			 * processed descriptor hence could be outdated.
-			 * We need to update ctrla to match the current
-			 * descriptor.
-			 */
-			dscr = new_dscr;
-			rmb(); /* ensure DSCR is read before CTRLA */
-			ctrla = channel_readl(atchan, CTRLA);
-		}
-		if (unlikely(i == ATC_MAX_DSCR_TRIALS))
-			return -ETIMEDOUT;
-
-		/* for the first descriptor we can be more accurate */
-		if (desc_first->lli.dscr == dscr) {
-			*residue = atc_calc_bytes_left(len, ctrla);
-			return 0;
-		}
-
-		len -= desc_first->len;
-		list_for_each_entry(desc, &desc_first->tx_list, desc_node) {
-			if (desc->lli.dscr == dscr)
-				break;
-
-			len -= desc->len;
-		}
-
-		/*
-		 * For the current descriptor in the chain we can calculate
-		 * the remaining bytes using the channel's register.
-		 */
-		*residue = atc_calc_bytes_left(len, ctrla);
-	} else {
-		/* single transfer */
-		ctrla = channel_readl(atchan, CTRLA);
-		*residue = atc_calc_bytes_left(len, ctrla);
-	}
-
+	/* single transfer */
+	len = desc_first->total_len;
+	ctrla = channel_readl(atchan, CTRLA);
+	*residue = atc_calc_bytes_left(len, ctrla);
 	return 0;
 }
 
-- 
GitLab


From 5f1d429b43b34b310a93651681d0cd8a39a86e3d Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:56 +0300
Subject: [PATCH 1377/4122] dmaengine: at_hdmac: Use devm_kzalloc() and
 struct_size()

Use the resource-managed kzalloc to simplify error logic. Memory allocated
with this function is automatically freed on driver detach. Use
struct_size() helper to calculate the size of the atdma structure with its
trailing flexible array. While here, move the mem allocation higher in the
probe method, as failing to allocate memory indicates a serious system
issue, and everything else does not matter anyway. All these help the code
look a bit cleaner.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-23-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 6bd9e35db8f9b..f3fbb0aa8b241 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/overflow.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 
@@ -1786,6 +1787,12 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	if (!plat_dat)
 		return -ENODEV;
 
+	atdma = devm_kzalloc(&pdev->dev,
+			     struct_size(atdma, chan, plat_dat->nr_channels),
+			     GFP_KERNEL);
+	if (!atdma)
+		return -ENOMEM;
+
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!io)
 		return -EINVAL;
@@ -1794,21 +1801,13 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	size = sizeof(struct at_dma);
-	size += plat_dat->nr_channels * sizeof(struct at_dma_chan);
-	atdma = kzalloc(size, GFP_KERNEL);
-	if (!atdma)
-		return -ENOMEM;
-
 	/* discover transaction capabilities */
 	atdma->dma_common.cap_mask = plat_dat->cap_mask;
 	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
 
 	size = resource_size(io);
-	if (!request_mem_region(io->start, size, pdev->dev.driver->name)) {
-		err = -EBUSY;
-		goto err_kfree;
-	}
+	if (!request_mem_region(io->start, size, pdev->dev.driver->name))
+		return -EBUSY;
 
 	atdma->regs = ioremap(io->start, size);
 	if (!atdma->regs) {
@@ -1963,8 +1962,6 @@ err_clk:
 	atdma->regs = NULL;
 err_release_r:
 	release_mem_region(io->start, size);
-err_kfree:
-	kfree(atdma);
 	return err;
 }
 
@@ -2003,8 +2000,6 @@ static int at_dma_remove(struct platform_device *pdev)
 	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	release_mem_region(io->start, resource_size(io));
 
-	kfree(atdma);
-
 	return 0;
 }
 
-- 
GitLab


From 8bfe4a61d40df2ddb707bf7d0b278907de2dd4f6 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:57 +0300
Subject: [PATCH 1378/4122] dmaengine: at_hdmac: Use
 devm_platform_ioremap_resource

Use devm_platform_ioremap_resource() helper for cleanner code and easier
resource management.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-24-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 37 ++++++-------------------------------
 1 file changed, 6 insertions(+), 31 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index f3fbb0aa8b241..10c250618a338 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1765,9 +1765,7 @@ static void at_dma_off(struct at_dma *atdma)
 
 static int __init at_dma_probe(struct platform_device *pdev)
 {
-	struct resource		*io;
 	struct at_dma		*atdma;
-	size_t			size;
 	int			irq;
 	int			err;
 	int			i;
@@ -1793,9 +1791,9 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	if (!atdma)
 		return -ENOMEM;
 
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!io)
-		return -EINVAL;
+	atdma->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(atdma->regs))
+		return PTR_ERR(atdma->regs);
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -1805,21 +1803,10 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.cap_mask = plat_dat->cap_mask;
 	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
 
-	size = resource_size(io);
-	if (!request_mem_region(io->start, size, pdev->dev.driver->name))
-		return -EBUSY;
-
-	atdma->regs = ioremap(io->start, size);
-	if (!atdma->regs) {
-		err = -ENOMEM;
-		goto err_release_r;
-	}
-
 	atdma->clk = clk_get(&pdev->dev, "dma_clk");
-	if (IS_ERR(atdma->clk)) {
-		err = PTR_ERR(atdma->clk);
-		goto err_clk;
-	}
+	if (IS_ERR(atdma->clk))
+		return PTR_ERR(atdma->clk);
+
 	err = clk_prepare_enable(atdma->clk);
 	if (err)
 		goto err_clk_prepare;
@@ -1957,11 +1944,6 @@ err_irq:
 	clk_disable_unprepare(atdma->clk);
 err_clk_prepare:
 	clk_put(atdma->clk);
-err_clk:
-	iounmap(atdma->regs);
-	atdma->regs = NULL;
-err_release_r:
-	release_mem_region(io->start, size);
 	return err;
 }
 
@@ -1969,7 +1951,6 @@ static int at_dma_remove(struct platform_device *pdev)
 {
 	struct at_dma		*atdma = platform_get_drvdata(pdev);
 	struct dma_chan		*chan, *_chan;
-	struct resource		*io;
 
 	at_dma_off(atdma);
 	if (pdev->dev.of_node)
@@ -1994,12 +1975,6 @@ static int at_dma_remove(struct platform_device *pdev)
 	clk_disable_unprepare(atdma->clk);
 	clk_put(atdma->clk);
 
-	iounmap(atdma->regs);
-	atdma->regs = NULL;
-
-	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	release_mem_region(io->start, resource_size(io));
-
 	return 0;
 }
 
-- 
GitLab


From 4c15a4c7f693f1f45ef534ddc428f2a9aa67bd13 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:58 +0300
Subject: [PATCH 1379/4122] dmaengine: at_hdmac: Use devm_clk_get()

Clocks that are get with this method will be automatically put on driver
detach. Use devm_clk_get() and simplify the error handling.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-25-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 10c250618a338..444aa7d75ff53 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -1803,13 +1803,13 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	atdma->dma_common.cap_mask = plat_dat->cap_mask;
 	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
 
-	atdma->clk = clk_get(&pdev->dev, "dma_clk");
+	atdma->clk = devm_clk_get(&pdev->dev, "dma_clk");
 	if (IS_ERR(atdma->clk))
 		return PTR_ERR(atdma->clk);
 
 	err = clk_prepare_enable(atdma->clk);
 	if (err)
-		goto err_clk_prepare;
+		return err;
 
 	/* force dma off, just in case */
 	at_dma_off(atdma);
@@ -1942,8 +1942,6 @@ err_desc_pool_create:
 	free_irq(platform_get_irq(pdev, 0), atdma);
 err_irq:
 	clk_disable_unprepare(atdma->clk);
-err_clk_prepare:
-	clk_put(atdma->clk);
 	return err;
 }
 
@@ -1973,7 +1971,6 @@ static int at_dma_remove(struct platform_device *pdev)
 	}
 
 	clk_disable_unprepare(atdma->clk);
-	clk_put(atdma->clk);
 
 	return 0;
 }
-- 
GitLab


From c23cd8c971f0b4697f344d981f13aae4123f866d Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:02:59 +0300
Subject: [PATCH 1380/4122] dmaengine: at_hdmac: Use pm_ptr()

Use pm_ptr() macro to fill at_dma_driver.driver.pm. In case CONFIG_PM is
not enabled, the macro will return NULL. When NULL, at_dma_dev_pm_ops will
end up being unused, so prepend it with the __maybe_unused attribute.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-26-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 444aa7d75ff53..4e3c519e60799 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -2084,7 +2084,7 @@ static int at_dma_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static const struct dev_pm_ops at_dma_dev_pm_ops = {
+static const struct dev_pm_ops __maybe_unused at_dma_dev_pm_ops = {
 	.prepare = at_dma_prepare,
 	.suspend_noirq = at_dma_suspend_noirq,
 	.resume_noirq = at_dma_resume_noirq,
@@ -2096,7 +2096,7 @@ static struct platform_driver at_dma_driver = {
 	.id_table	= atdma_devtypes,
 	.driver = {
 		.name	= "at_hdmac",
-		.pm	= &at_dma_dev_pm_ops,
+		.pm	= pm_ptr(&at_dma_dev_pm_ops),
 		.of_match_table	= of_match_ptr(atmel_dma_dt_ids),
 	},
 };
-- 
GitLab


From e3e672b8f95be38db26c971bc4c6b43d18a9836a Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:00 +0300
Subject: [PATCH 1381/4122] dmaengine: at_hdmac: Set include entries in
 alphabetic order

It's a good practice to set the include entries in alphabetic order. It
helps humans to read the code easier. Alphabetic order should also prove
that each header is self-contained, i.e. can be included without
prerequisites.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-27-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 4e3c519e60799..a3fa8bffdb749 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -12,16 +12,16 @@
 #include <dt-bindings/dma/at91.h>
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/overflow.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 #include "at_hdmac_regs.h"
 #include "dmaengine.h"
-- 
GitLab


From 5cecadc3e2a4fb72ab37d9420df0a9e1179b8a3e Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:01 +0300
Subject: [PATCH 1382/4122] dmaengine: at_hdmac: Keep register definitions and
 structures private to at_hdmac.c

Do not expose register definitions, structures and helpers via a .h file
because there are used only by at_hdmac.c. Since there are no other users,
remove the ambiguity and move all the .h contents to the .c file.

One may notice some checkpatch warnings and errors with this move. The move
was done "as it was", checkpatch complaints can be fixed in a further
patch.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-28-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 MAINTAINERS                 |   1 -
 drivers/dma/at_hdmac.c      | 469 ++++++++++++++++++++++++++++++++++-
 drivers/dma/at_hdmac_regs.h | 478 ------------------------------------
 3 files changed, 468 insertions(+), 480 deletions(-)
 delete mode 100644 drivers/dma/at_hdmac_regs.h

diff --git a/MAINTAINERS b/MAINTAINERS
index cd1264d24db85..c1b11b55ea89f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13415,7 +13415,6 @@ L:	dmaengine@vger.kernel.org
 S:	Supported
 F:	Documentation/devicetree/bindings/dma/atmel-dma.txt
 F:	drivers/dma/at_hdmac.c
-F:	drivers/dma/at_hdmac_regs.h
 F:	drivers/dma/at_xdmac.c
 F:	include/dt-bindings/dma/at91.h
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index a3fa8bffdb749..a07e3355f09a0 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -23,7 +23,6 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 
-#include "at_hdmac_regs.h"
 #include "dmaengine.h"
 
 /*
@@ -35,6 +34,474 @@
  * atc_	/ atchan	: ATmel DMA Channel entity related
  */
 
+#define	AT_DMA_MAX_NR_CHANNELS	8
+
+
+#define	AT_DMA_GCFG	0x00	/* Global Configuration Register */
+#define		AT_DMA_IF_BIGEND(i)	(0x1 << (i))	/* AHB-Lite Interface i in Big-endian mode */
+#define		AT_DMA_ARB_CFG	(0x1 << 4)	/* Arbiter mode. */
+#define			AT_DMA_ARB_CFG_FIXED		(0x0 << 4)
+#define			AT_DMA_ARB_CFG_ROUND_ROBIN	(0x1 << 4)
+
+#define	AT_DMA_EN	0x04	/* Controller Enable Register */
+#define		AT_DMA_ENABLE	(0x1 << 0)
+
+#define	AT_DMA_SREQ	0x08	/* Software Single Request Register */
+#define		AT_DMA_SSREQ(x)	(0x1 << ((x) << 1))		/* Request a source single transfer on channel x */
+#define		AT_DMA_DSREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination single transfer on channel x */
+
+#define	AT_DMA_CREQ	0x0C	/* Software Chunk Transfer Request Register */
+#define		AT_DMA_SCREQ(x)	(0x1 << ((x) << 1))		/* Request a source chunk transfer on channel x */
+#define		AT_DMA_DCREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination chunk transfer on channel x */
+
+#define	AT_DMA_LAST	0x10	/* Software Last Transfer Flag Register */
+#define		AT_DMA_SLAST(x)	(0x1 << ((x) << 1))		/* This src rq is last tx of buffer on channel x */
+#define		AT_DMA_DLAST(x)	(0x1 << (1 + ((x) << 1)))	/* This dst rq is last tx of buffer on channel x */
+
+#define	AT_DMA_SYNC	0x14	/* Request Synchronization Register */
+#define		AT_DMA_SYR(h)	(0x1 << (h))			/* Synchronize handshake line h */
+
+/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
+#define	AT_DMA_EBCIER	0x18	/* Enable register */
+#define	AT_DMA_EBCIDR	0x1C	/* Disable register */
+#define	AT_DMA_EBCIMR	0x20	/* Mask Register */
+#define	AT_DMA_EBCISR	0x24	/* Status Register */
+#define		AT_DMA_CBTC_OFFSET	8
+#define		AT_DMA_ERR_OFFSET	16
+#define		AT_DMA_BTC(x)	(0x1 << (x))
+#define		AT_DMA_CBTC(x)	(0x1 << (AT_DMA_CBTC_OFFSET + (x)))
+#define		AT_DMA_ERR(x)	(0x1 << (AT_DMA_ERR_OFFSET + (x)))
+
+#define	AT_DMA_CHER	0x28	/* Channel Handler Enable Register */
+#define		AT_DMA_ENA(x)	(0x1 << (x))
+#define		AT_DMA_SUSP(x)	(0x1 << ( 8 + (x)))
+#define		AT_DMA_KEEP(x)	(0x1 << (24 + (x)))
+
+#define	AT_DMA_CHDR	0x2C	/* Channel Handler Disable Register */
+#define		AT_DMA_DIS(x)	(0x1 << (x))
+#define		AT_DMA_RES(x)	(0x1 << ( 8 + (x)))
+
+#define	AT_DMA_CHSR	0x30	/* Channel Handler Status Register */
+#define		AT_DMA_EMPT(x)	(0x1 << (16 + (x)))
+#define		AT_DMA_STAL(x)	(0x1 << (24 + (x)))
+
+
+#define	AT_DMA_CH_REGS_BASE	0x3C	/* Channel registers base address */
+#define	ch_regs(x)	(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+
+/* Hardware register offset for each channel */
+#define	ATC_SADDR_OFFSET	0x00	/* Source Address Register */
+#define	ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
+#define	ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
+#define	ATC_CTRLA_OFFSET	0x0C	/* Control A Register */
+#define	ATC_CTRLB_OFFSET	0x10	/* Control B Register */
+#define	ATC_CFG_OFFSET		0x14	/* Configuration Register */
+#define	ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
+#define	ATC_DPIP_OFFSET		0x1C	/* Dst PIP Configuration Register */
+
+
+/* Bitfield definitions */
+
+/* Bitfields in DSCR */
+#define	ATC_DSCR_IF(i)		(0x3 & (i))	/* Dsc feched via AHB-Lite Interface i */
+
+/* Bitfields in CTRLA */
+#define	ATC_BTSIZE_MAX		0xFFFFUL	/* Maximum Buffer Transfer Size */
+#define	ATC_BTSIZE(x)		(ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
+#define	ATC_SCSIZE_MASK		(0x7 << 16)	/* Source Chunk Transfer Size */
+#define		ATC_SCSIZE(x)		(ATC_SCSIZE_MASK & ((x) << 16))
+#define		ATC_SCSIZE_1		(0x0 << 16)
+#define		ATC_SCSIZE_4		(0x1 << 16)
+#define		ATC_SCSIZE_8		(0x2 << 16)
+#define		ATC_SCSIZE_16		(0x3 << 16)
+#define		ATC_SCSIZE_32		(0x4 << 16)
+#define		ATC_SCSIZE_64		(0x5 << 16)
+#define		ATC_SCSIZE_128		(0x6 << 16)
+#define		ATC_SCSIZE_256		(0x7 << 16)
+#define	ATC_DCSIZE_MASK		(0x7 << 20)	/* Destination Chunk Transfer Size */
+#define		ATC_DCSIZE(x)		(ATC_DCSIZE_MASK & ((x) << 20))
+#define		ATC_DCSIZE_1		(0x0 << 20)
+#define		ATC_DCSIZE_4		(0x1 << 20)
+#define		ATC_DCSIZE_8		(0x2 << 20)
+#define		ATC_DCSIZE_16		(0x3 << 20)
+#define		ATC_DCSIZE_32		(0x4 << 20)
+#define		ATC_DCSIZE_64		(0x5 << 20)
+#define		ATC_DCSIZE_128		(0x6 << 20)
+#define		ATC_DCSIZE_256		(0x7 << 20)
+#define	ATC_SRC_WIDTH_MASK	(0x3 << 24)	/* Source Single Transfer Size */
+#define		ATC_SRC_WIDTH(x)	((x) << 24)
+#define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
+#define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
+#define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
+#define		ATC_REG_TO_SRC_WIDTH(r)	(((r) >> 24) & 0x3)
+#define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
+#define		ATC_DST_WIDTH(x)	((x) << 28)
+#define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
+#define		ATC_DST_WIDTH_HALFWORD	(0x1 << 28)
+#define		ATC_DST_WIDTH_WORD	(0x2 << 28)
+#define	ATC_DONE		(0x1 << 31)	/* Tx Done (only written back in descriptor) */
+
+/* Bitfields in CTRLB */
+#define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
+#define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
+				  /* Specify AHB interfaces */
+#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
+#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
+
+#define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
+#define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
+#define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
+#define	ATC_DST_DSCR_DIS	(0x1 << 20)	/* Dst Descriptor fetch disable */
+#define	ATC_FC_MASK		(0x7 << 21)	/* Choose Flow Controller */
+#define		ATC_FC_MEM2MEM		(0x0 << 21)	/* Mem-to-Mem (DMA) */
+#define		ATC_FC_MEM2PER		(0x1 << 21)	/* Mem-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM		(0x2 << 21)	/* Periph-to-Mem (DMA) */
+#define		ATC_FC_PER2PER		(0x3 << 21)	/* Periph-to-Periph (DMA) */
+#define		ATC_FC_PER2MEM_PER	(0x4 << 21)	/* Periph-to-Mem (Peripheral) */
+#define		ATC_FC_MEM2PER_PER	(0x5 << 21)	/* Mem-to-Periph (Peripheral) */
+#define		ATC_FC_PER2PER_SRCPER	(0x6 << 21)	/* Periph-to-Periph (Src Peripheral) */
+#define		ATC_FC_PER2PER_DSTPER	(0x7 << 21)	/* Periph-to-Periph (Dst Peripheral) */
+#define	ATC_SRC_ADDR_MODE_MASK	(0x3 << 24)
+#define		ATC_SRC_ADDR_MODE_INCR	(0x0 << 24)	/* Incrementing Mode */
+#define		ATC_SRC_ADDR_MODE_DECR	(0x1 << 24)	/* Decrementing Mode */
+#define		ATC_SRC_ADDR_MODE_FIXED	(0x2 << 24)	/* Fixed Mode */
+#define	ATC_DST_ADDR_MODE_MASK	(0x3 << 28)
+#define		ATC_DST_ADDR_MODE_INCR	(0x0 << 28)	/* Incrementing Mode */
+#define		ATC_DST_ADDR_MODE_DECR	(0x1 << 28)	/* Decrementing Mode */
+#define		ATC_DST_ADDR_MODE_FIXED	(0x2 << 28)	/* Fixed Mode */
+#define	ATC_IEN			(0x1 << 30)	/* BTC interrupt enable (active low) */
+#define	ATC_AUTO		(0x1 << 31)	/* Auto multiple buffer tx enable */
+
+/* Bitfields in CFG */
+#define ATC_PER_MSB(h)	((0x30U & (h)) >> 4)	/* Extract most significant bits of a handshaking identifier */
+
+#define	ATC_SRC_PER(h)		(0xFU & (h))	/* Channel src rq associated with periph handshaking ifc h */
+#define	ATC_DST_PER(h)		((0xFU & (h)) <<  4)	/* Channel dst rq associated with periph handshaking ifc h */
+#define	ATC_SRC_REP		(0x1 <<  8)	/* Source Replay Mod */
+#define	ATC_SRC_H2SEL		(0x1 <<  9)	/* Source Handshaking Mod */
+#define		ATC_SRC_H2SEL_SW	(0x0 <<  9)
+#define		ATC_SRC_H2SEL_HW	(0x1 <<  9)
+#define	ATC_SRC_PER_MSB(h)	(ATC_PER_MSB(h) << 10)	/* Channel src rq (most significant bits) */
+#define	ATC_DST_REP		(0x1 << 12)	/* Destination Replay Mod */
+#define	ATC_DST_H2SEL		(0x1 << 13)	/* Destination Handshaking Mod */
+#define		ATC_DST_H2SEL_SW	(0x0 << 13)
+#define		ATC_DST_H2SEL_HW	(0x1 << 13)
+#define	ATC_DST_PER_MSB(h)	(ATC_PER_MSB(h) << 14)	/* Channel dst rq (most significant bits) */
+#define	ATC_SOD			(0x1 << 16)	/* Stop On Done */
+#define	ATC_LOCK_IF		(0x1 << 20)	/* Interface Lock */
+#define	ATC_LOCK_B		(0x1 << 21)	/* AHB Bus Lock */
+#define	ATC_LOCK_IF_L		(0x1 << 22)	/* Master Interface Arbiter Lock */
+#define		ATC_LOCK_IF_L_CHUNK	(0x0 << 22)
+#define		ATC_LOCK_IF_L_BUFFER	(0x1 << 22)
+#define	ATC_AHB_PROT_MASK	(0x7 << 24)	/* AHB Protection */
+#define	ATC_FIFOCFG_MASK	(0x3 << 28)	/* FIFO Request Configuration */
+#define		ATC_FIFOCFG_LARGESTBURST	(0x0 << 28)
+#define		ATC_FIFOCFG_HALFFIFO		(0x1 << 28)
+#define		ATC_FIFOCFG_ENOUGHSPACE		(0x2 << 28)
+
+/* Bitfields in SPIP */
+#define	ATC_SPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_SPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+/* Bitfields in DPIP */
+#define	ATC_DPIP_HOLE(x)	(0xFFFFU & (x))
+#define	ATC_DPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+
+
+/*--  descriptors  -----------------------------------------------------*/
+
+/* LLI == Linked List Item; aka DMA buffer descriptor */
+struct at_lli {
+	/* values that are not changed by hardware */
+	u32 saddr;
+	u32 daddr;
+	/* value that may get written back: */
+	u32 ctrla;
+	/* more values that are not changed by hardware */
+	u32 ctrlb;
+	u32 dscr;	/* chain to next lli */
+};
+
+/**
+ * struct at_desc - software descriptor
+ * @at_lli: hardware lli structure
+ * @txd: support for the async_tx api
+ * @desc_node: node on the channed descriptors list
+ * @len: descriptor byte count
+ * @total_len: total transaction byte count
+ */
+struct at_desc {
+	/* FIRST values the hardware uses */
+	struct at_lli			lli;
+
+	/* THEN values for driver housekeeping */
+	struct list_head		tx_list;
+	struct dma_async_tx_descriptor	txd;
+	struct list_head		desc_node;
+	size_t				len;
+	size_t				total_len;
+
+	/* Interleaved data */
+	size_t				boundary;
+	size_t				dst_hole;
+	size_t				src_hole;
+
+	/* Memset temporary buffer */
+	bool				memset_buffer;
+	dma_addr_t			memset_paddr;
+	int				*memset_vaddr;
+};
+
+static inline struct at_desc *
+txd_to_at_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct at_desc, txd);
+}
+
+
+/*--  Channels  --------------------------------------------------------*/
+
+/**
+ * atc_status - information bits stored in channel status flag
+ *
+ * Manipulated with atomic operations.
+ */
+enum atc_status {
+	ATC_IS_ERROR = 0,
+	ATC_IS_PAUSED = 1,
+	ATC_IS_CYCLIC = 24,
+};
+
+/**
+ * struct at_dma_chan - internal representation of an Atmel HDMAC channel
+ * @chan_common: common dmaengine channel object members
+ * @device: parent device
+ * @ch_regs: memory mapped register base
+ * @mask: channel index in a mask
+ * @per_if: peripheral interface
+ * @mem_if: memory interface
+ * @status: transmit status information from irq/prep* functions
+ *                to tasklet (use atomic operations)
+ * @tasklet: bottom half to finish transaction work
+ * @save_cfg: configuration register that is saved on suspend/resume cycle
+ * @save_dscr: for cyclic operations, preserve next descriptor address in
+ *             the cyclic list on suspend/resume cycle
+ * @dma_sconfig: configuration for slave transfers, passed via
+ * .device_config
+ * @lock: serializes enqueue/dequeue operations to descriptors lists
+ * @active_list: list of descriptors dmaengine is being running on
+ * @queue: list of descriptors ready to be submitted to engine
+ * @free_list: list of descriptors usable by the channel
+ */
+struct at_dma_chan {
+	struct dma_chan		chan_common;
+	struct at_dma		*device;
+	void __iomem		*ch_regs;
+	u8			mask;
+	u8			per_if;
+	u8			mem_if;
+	unsigned long		status;
+	struct tasklet_struct	tasklet;
+	u32			save_cfg;
+	u32			save_dscr;
+	struct dma_slave_config dma_sconfig;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+};
+
+#define	channel_readl(atchan, name) \
+	__raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
+
+#define	channel_writel(atchan, name, val) \
+	__raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
+{
+	return container_of(dchan, struct at_dma_chan, chan_common);
+}
+
+/*
+ * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
+ * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
+ *
+ * This can be done by finding most significant bit set.
+ */
+static inline void convert_burst(u32 *maxburst)
+{
+	if (*maxburst > 1)
+		*maxburst = fls(*maxburst) - 2;
+	else
+		*maxburst = 0;
+}
+
+/*
+ * Fix sconfig's bus width according to at_hdmac.
+ * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
+ */
+static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
+{
+	switch (addr_width) {
+	case DMA_SLAVE_BUSWIDTH_2_BYTES:
+		return 1;
+	case DMA_SLAVE_BUSWIDTH_4_BYTES:
+		return 2;
+	default:
+		/* For 1 byte width or fallback */
+		return 0;
+	}
+}
+
+/*--  Controller  ------------------------------------------------------*/
+
+/**
+ * struct at_dma - internal representation of an Atmel HDMA Controller
+ * @chan_common: common dmaengine dma_device object members
+ * @atdma_devtype: identifier of DMA controller compatibility
+ * @ch_regs: memory mapped register base
+ * @clk: dma controller clock
+ * @save_imr: interrupt mask register that is saved on suspend/resume cycle
+ * @all_chan_mask: all channels availlable in a mask
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @chan: channels table to store at_dma_chan structures
+ */
+struct at_dma {
+	struct dma_device	dma_common;
+	void __iomem		*regs;
+	struct clk		*clk;
+	u32			save_imr;
+
+	u8			all_chan_mask;
+
+	struct dma_pool		*dma_desc_pool;
+	struct dma_pool		*memset_pool;
+	/* AT THE END channels table */
+	struct at_dma_chan	chan[];
+};
+
+#define	dma_readl(atdma, name) \
+	__raw_readl((atdma)->regs + AT_DMA_##name)
+#define	dma_writel(atdma, name, val) \
+	__raw_writel((val), (atdma)->regs + AT_DMA_##name)
+
+static inline struct at_dma *to_at_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct at_dma, dma_common);
+}
+
+
+/*--  Helper functions  ------------------------------------------------*/
+
+static struct device *chan2dev(struct dma_chan *chan)
+{
+	return &chan->dev->device;
+}
+
+#if defined(VERBOSE_DEBUG)
+static void vdbg_dump_regs(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
+		atchan->chan_common.chan_id,
+		dma_readl(atdma, EBCIMR),
+		dma_readl(atdma, CHSR));
+
+	dev_err(chan2dev(&atchan->chan_common),
+		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
+		channel_readl(atchan, SADDR),
+		channel_readl(atchan, DADDR),
+		channel_readl(atchan, CTRLA),
+		channel_readl(atchan, CTRLB),
+		channel_readl(atchan, CFG),
+		channel_readl(atchan, DSCR));
+}
+#else
+static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
+#endif
+
+static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
+{
+	dev_crit(chan2dev(&atchan->chan_common),
+		 "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
+		 &lli->saddr, &lli->daddr,
+		 lli->ctrla, lli->ctrlb, &lli->dscr);
+}
+
+
+static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on)
+{
+	u32 ebci;
+
+	/* enable interrupts on buffer transfer completion & error */
+	ebci =    AT_DMA_BTC(chan_id)
+		| AT_DMA_ERR(chan_id);
+	if (on)
+		dma_writel(atdma, EBCIER, ebci);
+	else
+		dma_writel(atdma, EBCIDR, ebci);
+}
+
+static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 1);
+}
+
+static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
+{
+	atc_setup_irq(atdma, chan_id, 0);
+}
+
+
+/**
+ * atc_chan_is_enabled - test if given channel is enabled
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
+{
+	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+
+	return !!(dma_readl(atdma, CHSR) & atchan->mask);
+}
+
+/**
+ * atc_chan_is_paused - test channel pause/resume status
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_paused(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_PAUSED, &atchan->status);
+}
+
+/**
+ * atc_chan_is_cyclic - test if given channel has cyclic property set
+ * @atchan: channel we want to test status
+ */
+static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
+{
+	return test_bit(ATC_IS_CYCLIC, &atchan->status);
+}
+
+/**
+ * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * @desc: descriptor, signle or at the end of a chain, to end chain on
+ */
+static void set_desc_eol(struct at_desc *desc)
+{
+	u32 ctrlb = desc->lli.ctrlb;
+
+	ctrlb &= ~ATC_IEN;
+	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
+
+	desc->lli.ctrlb = ctrlb;
+	desc->lli.dscr = 0;
+}
+
 #define	ATC_DEFAULT_CFG		(ATC_FIFOCFG_HALFFIFO)
 #define	ATC_DEFAULT_CTRLB	(ATC_SIF(AT_DMA_MEM_IF) \
 				|ATC_DIF(AT_DMA_MEM_IF))
diff --git a/drivers/dma/at_hdmac_regs.h b/drivers/dma/at_hdmac_regs.h
deleted file mode 100644
index d4d382d746078..0000000000000
--- a/drivers/dma/at_hdmac_regs.h
+++ /dev/null
@@ -1,478 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Header file for the Atmel AHB DMA Controller driver
- *
- * Copyright (C) 2008 Atmel Corporation
- */
-#ifndef AT_HDMAC_REGS_H
-#define	AT_HDMAC_REGS_H
-
-#define	AT_DMA_MAX_NR_CHANNELS	8
-
-
-#define	AT_DMA_GCFG	0x00	/* Global Configuration Register */
-#define		AT_DMA_IF_BIGEND(i)	(0x1 << (i))	/* AHB-Lite Interface i in Big-endian mode */
-#define		AT_DMA_ARB_CFG	(0x1 << 4)	/* Arbiter mode. */
-#define			AT_DMA_ARB_CFG_FIXED		(0x0 << 4)
-#define			AT_DMA_ARB_CFG_ROUND_ROBIN	(0x1 << 4)
-
-#define	AT_DMA_EN	0x04	/* Controller Enable Register */
-#define		AT_DMA_ENABLE	(0x1 << 0)
-
-#define	AT_DMA_SREQ	0x08	/* Software Single Request Register */
-#define		AT_DMA_SSREQ(x)	(0x1 << ((x) << 1))		/* Request a source single transfer on channel x */
-#define		AT_DMA_DSREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination single transfer on channel x */
-
-#define	AT_DMA_CREQ	0x0C	/* Software Chunk Transfer Request Register */
-#define		AT_DMA_SCREQ(x)	(0x1 << ((x) << 1))		/* Request a source chunk transfer on channel x */
-#define		AT_DMA_DCREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination chunk transfer on channel x */
-
-#define	AT_DMA_LAST	0x10	/* Software Last Transfer Flag Register */
-#define		AT_DMA_SLAST(x)	(0x1 << ((x) << 1))		/* This src rq is last tx of buffer on channel x */
-#define		AT_DMA_DLAST(x)	(0x1 << (1 + ((x) << 1)))	/* This dst rq is last tx of buffer on channel x */
-
-#define	AT_DMA_SYNC	0x14	/* Request Synchronization Register */
-#define		AT_DMA_SYR(h)	(0x1 << (h))			/* Synchronize handshake line h */
-
-/* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
-#define	AT_DMA_EBCIER	0x18	/* Enable register */
-#define	AT_DMA_EBCIDR	0x1C	/* Disable register */
-#define	AT_DMA_EBCIMR	0x20	/* Mask Register */
-#define	AT_DMA_EBCISR	0x24	/* Status Register */
-#define		AT_DMA_CBTC_OFFSET	8
-#define		AT_DMA_ERR_OFFSET	16
-#define		AT_DMA_BTC(x)	(0x1 << (x))
-#define		AT_DMA_CBTC(x)	(0x1 << (AT_DMA_CBTC_OFFSET + (x)))
-#define		AT_DMA_ERR(x)	(0x1 << (AT_DMA_ERR_OFFSET + (x)))
-
-#define	AT_DMA_CHER	0x28	/* Channel Handler Enable Register */
-#define		AT_DMA_ENA(x)	(0x1 << (x))
-#define		AT_DMA_SUSP(x)	(0x1 << ( 8 + (x)))
-#define		AT_DMA_KEEP(x)	(0x1 << (24 + (x)))
-
-#define	AT_DMA_CHDR	0x2C	/* Channel Handler Disable Register */
-#define		AT_DMA_DIS(x)	(0x1 << (x))
-#define		AT_DMA_RES(x)	(0x1 << ( 8 + (x)))
-
-#define	AT_DMA_CHSR	0x30	/* Channel Handler Status Register */
-#define		AT_DMA_EMPT(x)	(0x1 << (16 + (x)))
-#define		AT_DMA_STAL(x)	(0x1 << (24 + (x)))
-
-
-#define	AT_DMA_CH_REGS_BASE	0x3C	/* Channel registers base address */
-#define	ch_regs(x)	(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
-
-/* Hardware register offset for each channel */
-#define	ATC_SADDR_OFFSET	0x00	/* Source Address Register */
-#define	ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
-#define	ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
-#define	ATC_CTRLA_OFFSET	0x0C	/* Control A Register */
-#define	ATC_CTRLB_OFFSET	0x10	/* Control B Register */
-#define	ATC_CFG_OFFSET		0x14	/* Configuration Register */
-#define	ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
-#define	ATC_DPIP_OFFSET		0x1C	/* Dst PIP Configuration Register */
-
-
-/* Bitfield definitions */
-
-/* Bitfields in DSCR */
-#define	ATC_DSCR_IF(i)		(0x3 & (i))	/* Dsc feched via AHB-Lite Interface i */
-
-/* Bitfields in CTRLA */
-#define	ATC_BTSIZE_MAX		0xFFFFUL	/* Maximum Buffer Transfer Size */
-#define	ATC_BTSIZE(x)		(ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
-#define	ATC_SCSIZE_MASK		(0x7 << 16)	/* Source Chunk Transfer Size */
-#define		ATC_SCSIZE(x)		(ATC_SCSIZE_MASK & ((x) << 16))
-#define		ATC_SCSIZE_1		(0x0 << 16)
-#define		ATC_SCSIZE_4		(0x1 << 16)
-#define		ATC_SCSIZE_8		(0x2 << 16)
-#define		ATC_SCSIZE_16		(0x3 << 16)
-#define		ATC_SCSIZE_32		(0x4 << 16)
-#define		ATC_SCSIZE_64		(0x5 << 16)
-#define		ATC_SCSIZE_128		(0x6 << 16)
-#define		ATC_SCSIZE_256		(0x7 << 16)
-#define	ATC_DCSIZE_MASK		(0x7 << 20)	/* Destination Chunk Transfer Size */
-#define		ATC_DCSIZE(x)		(ATC_DCSIZE_MASK & ((x) << 20))
-#define		ATC_DCSIZE_1		(0x0 << 20)
-#define		ATC_DCSIZE_4		(0x1 << 20)
-#define		ATC_DCSIZE_8		(0x2 << 20)
-#define		ATC_DCSIZE_16		(0x3 << 20)
-#define		ATC_DCSIZE_32		(0x4 << 20)
-#define		ATC_DCSIZE_64		(0x5 << 20)
-#define		ATC_DCSIZE_128		(0x6 << 20)
-#define		ATC_DCSIZE_256		(0x7 << 20)
-#define	ATC_SRC_WIDTH_MASK	(0x3 << 24)	/* Source Single Transfer Size */
-#define		ATC_SRC_WIDTH(x)	((x) << 24)
-#define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
-#define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
-#define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
-#define		ATC_REG_TO_SRC_WIDTH(r)	(((r) >> 24) & 0x3)
-#define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
-#define		ATC_DST_WIDTH(x)	((x) << 28)
-#define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
-#define		ATC_DST_WIDTH_HALFWORD	(0x1 << 28)
-#define		ATC_DST_WIDTH_WORD	(0x2 << 28)
-#define	ATC_DONE		(0x1 << 31)	/* Tx Done (only written back in descriptor) */
-
-/* Bitfields in CTRLB */
-#define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
-#define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
-				  /* Specify AHB interfaces */
-#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
-#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
-
-#define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
-#define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
-#define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
-#define	ATC_DST_DSCR_DIS	(0x1 << 20)	/* Dst Descriptor fetch disable */
-#define	ATC_FC_MASK		(0x7 << 21)	/* Choose Flow Controller */
-#define		ATC_FC_MEM2MEM		(0x0 << 21)	/* Mem-to-Mem (DMA) */
-#define		ATC_FC_MEM2PER		(0x1 << 21)	/* Mem-to-Periph (DMA) */
-#define		ATC_FC_PER2MEM		(0x2 << 21)	/* Periph-to-Mem (DMA) */
-#define		ATC_FC_PER2PER		(0x3 << 21)	/* Periph-to-Periph (DMA) */
-#define		ATC_FC_PER2MEM_PER	(0x4 << 21)	/* Periph-to-Mem (Peripheral) */
-#define		ATC_FC_MEM2PER_PER	(0x5 << 21)	/* Mem-to-Periph (Peripheral) */
-#define		ATC_FC_PER2PER_SRCPER	(0x6 << 21)	/* Periph-to-Periph (Src Peripheral) */
-#define		ATC_FC_PER2PER_DSTPER	(0x7 << 21)	/* Periph-to-Periph (Dst Peripheral) */
-#define	ATC_SRC_ADDR_MODE_MASK	(0x3 << 24)
-#define		ATC_SRC_ADDR_MODE_INCR	(0x0 << 24)	/* Incrementing Mode */
-#define		ATC_SRC_ADDR_MODE_DECR	(0x1 << 24)	/* Decrementing Mode */
-#define		ATC_SRC_ADDR_MODE_FIXED	(0x2 << 24)	/* Fixed Mode */
-#define	ATC_DST_ADDR_MODE_MASK	(0x3 << 28)
-#define		ATC_DST_ADDR_MODE_INCR	(0x0 << 28)	/* Incrementing Mode */
-#define		ATC_DST_ADDR_MODE_DECR	(0x1 << 28)	/* Decrementing Mode */
-#define		ATC_DST_ADDR_MODE_FIXED	(0x2 << 28)	/* Fixed Mode */
-#define	ATC_IEN			(0x1 << 30)	/* BTC interrupt enable (active low) */
-#define	ATC_AUTO		(0x1 << 31)	/* Auto multiple buffer tx enable */
-
-/* Bitfields in CFG */
-#define ATC_PER_MSB(h)	((0x30U & (h)) >> 4)	/* Extract most significant bits of a handshaking identifier */
-
-#define	ATC_SRC_PER(h)		(0xFU & (h))	/* Channel src rq associated with periph handshaking ifc h */
-#define	ATC_DST_PER(h)		((0xFU & (h)) <<  4)	/* Channel dst rq associated with periph handshaking ifc h */
-#define	ATC_SRC_REP		(0x1 <<  8)	/* Source Replay Mod */
-#define	ATC_SRC_H2SEL		(0x1 <<  9)	/* Source Handshaking Mod */
-#define		ATC_SRC_H2SEL_SW	(0x0 <<  9)
-#define		ATC_SRC_H2SEL_HW	(0x1 <<  9)
-#define	ATC_SRC_PER_MSB(h)	(ATC_PER_MSB(h) << 10)	/* Channel src rq (most significant bits) */
-#define	ATC_DST_REP		(0x1 << 12)	/* Destination Replay Mod */
-#define	ATC_DST_H2SEL		(0x1 << 13)	/* Destination Handshaking Mod */
-#define		ATC_DST_H2SEL_SW	(0x0 << 13)
-#define		ATC_DST_H2SEL_HW	(0x1 << 13)
-#define	ATC_DST_PER_MSB(h)	(ATC_PER_MSB(h) << 14)	/* Channel dst rq (most significant bits) */
-#define	ATC_SOD			(0x1 << 16)	/* Stop On Done */
-#define	ATC_LOCK_IF		(0x1 << 20)	/* Interface Lock */
-#define	ATC_LOCK_B		(0x1 << 21)	/* AHB Bus Lock */
-#define	ATC_LOCK_IF_L		(0x1 << 22)	/* Master Interface Arbiter Lock */
-#define		ATC_LOCK_IF_L_CHUNK	(0x0 << 22)
-#define		ATC_LOCK_IF_L_BUFFER	(0x1 << 22)
-#define	ATC_AHB_PROT_MASK	(0x7 << 24)	/* AHB Protection */
-#define	ATC_FIFOCFG_MASK	(0x3 << 28)	/* FIFO Request Configuration */
-#define		ATC_FIFOCFG_LARGESTBURST	(0x0 << 28)
-#define		ATC_FIFOCFG_HALFFIFO		(0x1 << 28)
-#define		ATC_FIFOCFG_ENOUGHSPACE		(0x2 << 28)
-
-/* Bitfields in SPIP */
-#define	ATC_SPIP_HOLE(x)	(0xFFFFU & (x))
-#define	ATC_SPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
-
-/* Bitfields in DPIP */
-#define	ATC_DPIP_HOLE(x)	(0xFFFFU & (x))
-#define	ATC_DPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
-
-
-/*--  descriptors  -----------------------------------------------------*/
-
-/* LLI == Linked List Item; aka DMA buffer descriptor */
-struct at_lli {
-	/* values that are not changed by hardware */
-	u32 saddr;
-	u32 daddr;
-	/* value that may get written back: */
-	u32 ctrla;
-	/* more values that are not changed by hardware */
-	u32 ctrlb;
-	u32 dscr;	/* chain to next lli */
-};
-
-/**
- * struct at_desc - software descriptor
- * @at_lli: hardware lli structure
- * @txd: support for the async_tx api
- * @desc_node: node on the channed descriptors list
- * @len: descriptor byte count
- * @total_len: total transaction byte count
- */
-struct at_desc {
-	/* FIRST values the hardware uses */
-	struct at_lli			lli;
-
-	/* THEN values for driver housekeeping */
-	struct list_head		tx_list;
-	struct dma_async_tx_descriptor	txd;
-	struct list_head		desc_node;
-	size_t				len;
-	size_t				total_len;
-
-	/* Interleaved data */
-	size_t				boundary;
-	size_t				dst_hole;
-	size_t				src_hole;
-
-	/* Memset temporary buffer */
-	bool				memset_buffer;
-	dma_addr_t			memset_paddr;
-	int				*memset_vaddr;
-};
-
-static inline struct at_desc *
-txd_to_at_desc(struct dma_async_tx_descriptor *txd)
-{
-	return container_of(txd, struct at_desc, txd);
-}
-
-
-/*--  Channels  --------------------------------------------------------*/
-
-/**
- * atc_status - information bits stored in channel status flag
- *
- * Manipulated with atomic operations.
- */
-enum atc_status {
-	ATC_IS_ERROR = 0,
-	ATC_IS_PAUSED = 1,
-	ATC_IS_CYCLIC = 24,
-};
-
-/**
- * struct at_dma_chan - internal representation of an Atmel HDMAC channel
- * @chan_common: common dmaengine channel object members
- * @device: parent device
- * @ch_regs: memory mapped register base
- * @mask: channel index in a mask
- * @per_if: peripheral interface
- * @mem_if: memory interface
- * @status: transmit status information from irq/prep* functions
- *                to tasklet (use atomic operations)
- * @tasklet: bottom half to finish transaction work
- * @save_cfg: configuration register that is saved on suspend/resume cycle
- * @save_dscr: for cyclic operations, preserve next descriptor address in
- *             the cyclic list on suspend/resume cycle
- * @dma_sconfig: configuration for slave transfers, passed via
- * .device_config
- * @lock: serializes enqueue/dequeue operations to descriptors lists
- * @active_list: list of descriptors dmaengine is being running on
- * @queue: list of descriptors ready to be submitted to engine
- * @free_list: list of descriptors usable by the channel
- */
-struct at_dma_chan {
-	struct dma_chan		chan_common;
-	struct at_dma		*device;
-	void __iomem		*ch_regs;
-	u8			mask;
-	u8			per_if;
-	u8			mem_if;
-	unsigned long		status;
-	struct tasklet_struct	tasklet;
-	u32			save_cfg;
-	u32			save_dscr;
-	struct dma_slave_config dma_sconfig;
-
-	spinlock_t		lock;
-
-	/* these other elements are all protected by lock */
-	struct list_head	active_list;
-	struct list_head	queue;
-	struct list_head	free_list;
-};
-
-#define	channel_readl(atchan, name) \
-	__raw_readl((atchan)->ch_regs + ATC_##name##_OFFSET)
-
-#define	channel_writel(atchan, name, val) \
-	__raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
-
-static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
-{
-	return container_of(dchan, struct at_dma_chan, chan_common);
-}
-
-/*
- * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
- * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
- *
- * This can be done by finding most significant bit set.
- */
-static inline void convert_burst(u32 *maxburst)
-{
-	if (*maxburst > 1)
-		*maxburst = fls(*maxburst) - 2;
-	else
-		*maxburst = 0;
-}
-
-/*
- * Fix sconfig's bus width according to at_hdmac.
- * 1 byte -> 0, 2 bytes -> 1, 4 bytes -> 2.
- */
-static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
-{
-	switch (addr_width) {
-	case DMA_SLAVE_BUSWIDTH_2_BYTES:
-		return 1;
-	case DMA_SLAVE_BUSWIDTH_4_BYTES:
-		return 2;
-	default:
-		/* For 1 byte width or fallback */
-		return 0;
-	}
-}
-
-/*--  Controller  ------------------------------------------------------*/
-
-/**
- * struct at_dma - internal representation of an Atmel HDMA Controller
- * @chan_common: common dmaengine dma_device object members
- * @atdma_devtype: identifier of DMA controller compatibility
- * @ch_regs: memory mapped register base
- * @clk: dma controller clock
- * @save_imr: interrupt mask register that is saved on suspend/resume cycle
- * @all_chan_mask: all channels availlable in a mask
- * @dma_desc_pool: base of DMA descriptor region (DMA address)
- * @chan: channels table to store at_dma_chan structures
- */
-struct at_dma {
-	struct dma_device	dma_common;
-	void __iomem		*regs;
-	struct clk		*clk;
-	u32			save_imr;
-
-	u8			all_chan_mask;
-
-	struct dma_pool		*dma_desc_pool;
-	struct dma_pool		*memset_pool;
-	/* AT THE END channels table */
-	struct at_dma_chan	chan[];
-};
-
-#define	dma_readl(atdma, name) \
-	__raw_readl((atdma)->regs + AT_DMA_##name)
-#define	dma_writel(atdma, name, val) \
-	__raw_writel((val), (atdma)->regs + AT_DMA_##name)
-
-static inline struct at_dma *to_at_dma(struct dma_device *ddev)
-{
-	return container_of(ddev, struct at_dma, dma_common);
-}
-
-
-/*--  Helper functions  ------------------------------------------------*/
-
-static struct device *chan2dev(struct dma_chan *chan)
-{
-	return &chan->dev->device;
-}
-
-#if defined(VERBOSE_DEBUG)
-static void vdbg_dump_regs(struct at_dma_chan *atchan)
-{
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
-
-	dev_err(chan2dev(&atchan->chan_common),
-		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
-		atchan->chan_common.chan_id,
-		dma_readl(atdma, EBCIMR),
-		dma_readl(atdma, CHSR));
-
-	dev_err(chan2dev(&atchan->chan_common),
-		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
-		channel_readl(atchan, SADDR),
-		channel_readl(atchan, DADDR),
-		channel_readl(atchan, CTRLA),
-		channel_readl(atchan, CTRLB),
-		channel_readl(atchan, CFG),
-		channel_readl(atchan, DSCR));
-}
-#else
-static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
-#endif
-
-static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
-{
-	dev_crit(chan2dev(&atchan->chan_common),
-		 "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
-		 &lli->saddr, &lli->daddr,
-		 lli->ctrla, lli->ctrlb, &lli->dscr);
-}
-
-
-static void atc_setup_irq(struct at_dma *atdma, int chan_id, int on)
-{
-	u32 ebci;
-
-	/* enable interrupts on buffer transfer completion & error */
-	ebci =    AT_DMA_BTC(chan_id)
-		| AT_DMA_ERR(chan_id);
-	if (on)
-		dma_writel(atdma, EBCIER, ebci);
-	else
-		dma_writel(atdma, EBCIDR, ebci);
-}
-
-static void atc_enable_chan_irq(struct at_dma *atdma, int chan_id)
-{
-	atc_setup_irq(atdma, chan_id, 1);
-}
-
-static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
-{
-	atc_setup_irq(atdma, chan_id, 0);
-}
-
-
-/**
- * atc_chan_is_enabled - test if given channel is enabled
- * @atchan: channel we want to test status
- */
-static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
-{
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
-
-	return !!(dma_readl(atdma, CHSR) & atchan->mask);
-}
-
-/**
- * atc_chan_is_paused - test channel pause/resume status
- * @atchan: channel we want to test status
- */
-static inline int atc_chan_is_paused(struct at_dma_chan *atchan)
-{
-	return test_bit(ATC_IS_PAUSED, &atchan->status);
-}
-
-/**
- * atc_chan_is_cyclic - test if given channel has cyclic property set
- * @atchan: channel we want to test status
- */
-static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
-{
-	return test_bit(ATC_IS_CYCLIC, &atchan->status);
-}
-
-/**
- * set_desc_eol - set end-of-link to descriptor so it will end transfer
- * @desc: descriptor, signle or at the end of a chain, to end chain on
- */
-static void set_desc_eol(struct at_desc *desc)
-{
-	u32 ctrlb = desc->lli.ctrlb;
-
-	ctrlb &= ~ATC_IEN;
-	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
-
-	desc->lli.ctrlb = ctrlb;
-	desc->lli.dscr = 0;
-}
-
-#endif /* AT_HDMAC_REGS_H */
-- 
GitLab


From d8840a7edcf0aa840e175af17d61476a7dbc65f7 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:02 +0300
Subject: [PATCH 1383/4122] dmaengine: at_hdmac: Use bitfield access macros

Use the bitfield access macros in order to clean and to make the driver
easier to read. One will see some "line length exceeds 100 columns"
checkpatch warnings. I chose to not introduce new lines for regs
descriptions in order to preserve the style of the comments throughout
the definitions. Style can be fixed in a further patch.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-29-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 408 ++++++++++++++++++++---------------------
 1 file changed, 198 insertions(+), 210 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index a07e3355f09a0..edec6a8c730c0 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -10,6 +10,7 @@
  */
 
 #include <dt-bindings/dma/at91.h>
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/dmaengine.h>
 #include <linux/dmapool.h>
@@ -36,176 +37,153 @@
 
 #define	AT_DMA_MAX_NR_CHANNELS	8
 
+/* Global Configuration Register */
+#define AT_DMA_GCFG		0x00
+#define AT_DMA_IF_BIGEND(i)	BIT((i))	/* AHB-Lite Interface i in Big-endian mode */
+#define AT_DMA_ARB_CFG		BIT(4)		/* Arbiter mode. */
 
-#define	AT_DMA_GCFG	0x00	/* Global Configuration Register */
-#define		AT_DMA_IF_BIGEND(i)	(0x1 << (i))	/* AHB-Lite Interface i in Big-endian mode */
-#define		AT_DMA_ARB_CFG	(0x1 << 4)	/* Arbiter mode. */
-#define			AT_DMA_ARB_CFG_FIXED		(0x0 << 4)
-#define			AT_DMA_ARB_CFG_ROUND_ROBIN	(0x1 << 4)
+/* Controller Enable Register */
+#define AT_DMA_EN		0x04
+#define AT_DMA_ENABLE		BIT(0)
 
-#define	AT_DMA_EN	0x04	/* Controller Enable Register */
-#define		AT_DMA_ENABLE	(0x1 << 0)
+/* Software Single Request Register */
+#define AT_DMA_SREQ		0x08
+#define AT_DMA_SSREQ(x)		BIT((x) << 1)		/* Request a source single transfer on channel x */
+#define AT_DMA_DSREQ(x)		BIT(1 + ((x) << 1))	/* Request a destination single transfer on channel x */
 
-#define	AT_DMA_SREQ	0x08	/* Software Single Request Register */
-#define		AT_DMA_SSREQ(x)	(0x1 << ((x) << 1))		/* Request a source single transfer on channel x */
-#define		AT_DMA_DSREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination single transfer on channel x */
+/* Software Chunk Transfer Request Register */
+#define AT_DMA_CREQ		0x0c
+#define AT_DMA_SCREQ(x)		BIT((x) << 1)		/* Request a source chunk transfer on channel x */
+#define AT_DMA_DCREQ(x)		BIT(1 + ((x) << 1))	/* Request a destination chunk transfer on channel x */
 
-#define	AT_DMA_CREQ	0x0C	/* Software Chunk Transfer Request Register */
-#define		AT_DMA_SCREQ(x)	(0x1 << ((x) << 1))		/* Request a source chunk transfer on channel x */
-#define		AT_DMA_DCREQ(x)	(0x1 << (1 + ((x) << 1)))	/* Request a destination chunk transfer on channel x */
+/* Software Last Transfer Flag Register */
+#define AT_DMA_LAST		0x10
+#define AT_DMA_SLAST(x)		BIT((x) << 1)		/* This src rq is last tx of buffer on channel x */
+#define AT_DMA_DLAST(x)		BIT(1 + ((x) << 1))	/* This dst rq is last tx of buffer on channel x */
 
-#define	AT_DMA_LAST	0x10	/* Software Last Transfer Flag Register */
-#define		AT_DMA_SLAST(x)	(0x1 << ((x) << 1))		/* This src rq is last tx of buffer on channel x */
-#define		AT_DMA_DLAST(x)	(0x1 << (1 + ((x) << 1)))	/* This dst rq is last tx of buffer on channel x */
-
-#define	AT_DMA_SYNC	0x14	/* Request Synchronization Register */
-#define		AT_DMA_SYR(h)	(0x1 << (h))			/* Synchronize handshake line h */
+/* Request Synchronization Register */
+#define AT_DMA_SYNC		0x14
+#define AT_DMA_SYR(h)		BIT((h))		/* Synchronize handshake line h */
 
 /* Error, Chained Buffer transfer completed and Buffer transfer completed Interrupt registers */
-#define	AT_DMA_EBCIER	0x18	/* Enable register */
-#define	AT_DMA_EBCIDR	0x1C	/* Disable register */
-#define	AT_DMA_EBCIMR	0x20	/* Mask Register */
-#define	AT_DMA_EBCISR	0x24	/* Status Register */
-#define		AT_DMA_CBTC_OFFSET	8
-#define		AT_DMA_ERR_OFFSET	16
-#define		AT_DMA_BTC(x)	(0x1 << (x))
-#define		AT_DMA_CBTC(x)	(0x1 << (AT_DMA_CBTC_OFFSET + (x)))
-#define		AT_DMA_ERR(x)	(0x1 << (AT_DMA_ERR_OFFSET + (x)))
-
-#define	AT_DMA_CHER	0x28	/* Channel Handler Enable Register */
-#define		AT_DMA_ENA(x)	(0x1 << (x))
-#define		AT_DMA_SUSP(x)	(0x1 << ( 8 + (x)))
-#define		AT_DMA_KEEP(x)	(0x1 << (24 + (x)))
-
-#define	AT_DMA_CHDR	0x2C	/* Channel Handler Disable Register */
-#define		AT_DMA_DIS(x)	(0x1 << (x))
-#define		AT_DMA_RES(x)	(0x1 << ( 8 + (x)))
-
-#define	AT_DMA_CHSR	0x30	/* Channel Handler Status Register */
-#define		AT_DMA_EMPT(x)	(0x1 << (16 + (x)))
-#define		AT_DMA_STAL(x)	(0x1 << (24 + (x)))
-
-
-#define	AT_DMA_CH_REGS_BASE	0x3C	/* Channel registers base address */
-#define	ch_regs(x)	(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
+#define AT_DMA_EBCIER		0x18			/* Enable register */
+#define AT_DMA_EBCIDR		0x1c			/* Disable register */
+#define AT_DMA_EBCIMR		0x20			/* Mask Register */
+#define AT_DMA_EBCISR		0x24			/* Status Register */
+#define AT_DMA_CBTC_OFFSET	8
+#define AT_DMA_ERR_OFFSET	16
+#define AT_DMA_BTC(x)		BIT((x))
+#define AT_DMA_CBTC(x)		BIT(AT_DMA_CBTC_OFFSET + (x))
+#define AT_DMA_ERR(x)		BIT(AT_DMA_ERR_OFFSET + (x))
+
+/* Channel Handler Enable Register */
+#define AT_DMA_CHER		0x28
+#define AT_DMA_ENA(x)		BIT((x))
+#define AT_DMA_SUSP(x)		BIT(8 + (x))
+#define AT_DMA_KEEP(x)		BIT(24 + (x))
+
+/* Channel Handler Disable Register */
+#define AT_DMA_CHDR		0x2c
+#define AT_DMA_DIS(x)		BIT(x)
+#define AT_DMA_RES(x)		BIT(8 + (x))
+
+/* Channel Handler Status Register */
+#define AT_DMA_CHSR		0x30
+#define AT_DMA_EMPT(x)		BIT(16 + (x))
+#define AT_DMA_STAL(x)		BIT(24 + (x))
+
+/* Channel registers base address */
+#define AT_DMA_CH_REGS_BASE	0x3c
+#define ch_regs(x)		(AT_DMA_CH_REGS_BASE + (x) * 0x28) /* Channel x base addr */
 
 /* Hardware register offset for each channel */
-#define	ATC_SADDR_OFFSET	0x00	/* Source Address Register */
-#define	ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
-#define	ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
-#define	ATC_CTRLA_OFFSET	0x0C	/* Control A Register */
-#define	ATC_CTRLB_OFFSET	0x10	/* Control B Register */
-#define	ATC_CFG_OFFSET		0x14	/* Configuration Register */
-#define	ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
-#define	ATC_DPIP_OFFSET		0x1C	/* Dst PIP Configuration Register */
+#define ATC_SADDR_OFFSET	0x00	/* Source Address Register */
+#define ATC_DADDR_OFFSET	0x04	/* Destination Address Register */
+#define ATC_DSCR_OFFSET		0x08	/* Descriptor Address Register */
+#define ATC_CTRLA_OFFSET	0x0c	/* Control A Register */
+#define ATC_CTRLB_OFFSET	0x10	/* Control B Register */
+#define ATC_CFG_OFFSET		0x14	/* Configuration Register */
+#define ATC_SPIP_OFFSET		0x18	/* Src PIP Configuration Register */
+#define ATC_DPIP_OFFSET		0x1c	/* Dst PIP Configuration Register */
 
 
 /* Bitfield definitions */
 
 /* Bitfields in DSCR */
-#define	ATC_DSCR_IF(i)		(0x3 & (i))	/* Dsc feched via AHB-Lite Interface i */
+#define ATC_DSCR_IF		GENMASK(1, 0)	/* Dsc feched via AHB-Lite Interface */
 
 /* Bitfields in CTRLA */
-#define	ATC_BTSIZE_MAX		0xFFFFUL	/* Maximum Buffer Transfer Size */
-#define	ATC_BTSIZE(x)		(ATC_BTSIZE_MAX & (x)) /* Buffer Transfer Size */
-#define	ATC_SCSIZE_MASK		(0x7 << 16)	/* Source Chunk Transfer Size */
-#define		ATC_SCSIZE(x)		(ATC_SCSIZE_MASK & ((x) << 16))
-#define		ATC_SCSIZE_1		(0x0 << 16)
-#define		ATC_SCSIZE_4		(0x1 << 16)
-#define		ATC_SCSIZE_8		(0x2 << 16)
-#define		ATC_SCSIZE_16		(0x3 << 16)
-#define		ATC_SCSIZE_32		(0x4 << 16)
-#define		ATC_SCSIZE_64		(0x5 << 16)
-#define		ATC_SCSIZE_128		(0x6 << 16)
-#define		ATC_SCSIZE_256		(0x7 << 16)
-#define	ATC_DCSIZE_MASK		(0x7 << 20)	/* Destination Chunk Transfer Size */
-#define		ATC_DCSIZE(x)		(ATC_DCSIZE_MASK & ((x) << 20))
-#define		ATC_DCSIZE_1		(0x0 << 20)
-#define		ATC_DCSIZE_4		(0x1 << 20)
-#define		ATC_DCSIZE_8		(0x2 << 20)
-#define		ATC_DCSIZE_16		(0x3 << 20)
-#define		ATC_DCSIZE_32		(0x4 << 20)
-#define		ATC_DCSIZE_64		(0x5 << 20)
-#define		ATC_DCSIZE_128		(0x6 << 20)
-#define		ATC_DCSIZE_256		(0x7 << 20)
-#define	ATC_SRC_WIDTH_MASK	(0x3 << 24)	/* Source Single Transfer Size */
-#define		ATC_SRC_WIDTH(x)	((x) << 24)
-#define		ATC_SRC_WIDTH_BYTE	(0x0 << 24)
-#define		ATC_SRC_WIDTH_HALFWORD	(0x1 << 24)
-#define		ATC_SRC_WIDTH_WORD	(0x2 << 24)
-#define		ATC_REG_TO_SRC_WIDTH(r)	(((r) >> 24) & 0x3)
-#define	ATC_DST_WIDTH_MASK	(0x3 << 28)	/* Destination Single Transfer Size */
-#define		ATC_DST_WIDTH(x)	((x) << 28)
-#define		ATC_DST_WIDTH_BYTE	(0x0 << 28)
-#define		ATC_DST_WIDTH_HALFWORD	(0x1 << 28)
-#define		ATC_DST_WIDTH_WORD	(0x2 << 28)
-#define	ATC_DONE		(0x1 << 31)	/* Tx Done (only written back in descriptor) */
+#define ATC_BTSIZE_MAX		GENMASK(15, 0)	/* Maximum Buffer Transfer Size */
+#define ATC_BTSIZE		GENMASK(15, 0)	/* Buffer Transfer Size */
+#define ATC_SCSIZE		GENMASK(18, 16)	/* Source Chunk Transfer Size */
+#define ATC_DCSIZE		GENMASK(22, 20)	/* Destination Chunk Transfer Size */
+#define ATC_SRC_WIDTH		GENMASK(25, 24)	/* Source Single Transfer Size */
+#define ATC_DST_WIDTH		GENMASK(29, 28)	/* Destination Single Transfer Size */
+#define ATC_DONE		BIT(31)	/* Tx Done (only written back in descriptor) */
 
 /* Bitfields in CTRLB */
-#define	ATC_SIF(i)		(0x3 & (i))	/* Src tx done via AHB-Lite Interface i */
-#define	ATC_DIF(i)		((0x3 & (i)) <<  4)	/* Dst tx done via AHB-Lite Interface i */
-				  /* Specify AHB interfaces */
-#define AT_DMA_MEM_IF		0 /* interface 0 as memory interface */
-#define AT_DMA_PER_IF		1 /* interface 1 as peripheral interface */
-
-#define	ATC_SRC_PIP		(0x1 <<  8)	/* Source Picture-in-Picture enabled */
-#define	ATC_DST_PIP		(0x1 << 12)	/* Destination Picture-in-Picture enabled */
-#define	ATC_SRC_DSCR_DIS	(0x1 << 16)	/* Src Descriptor fetch disable */
-#define	ATC_DST_DSCR_DIS	(0x1 << 20)	/* Dst Descriptor fetch disable */
-#define	ATC_FC_MASK		(0x7 << 21)	/* Choose Flow Controller */
-#define		ATC_FC_MEM2MEM		(0x0 << 21)	/* Mem-to-Mem (DMA) */
-#define		ATC_FC_MEM2PER		(0x1 << 21)	/* Mem-to-Periph (DMA) */
-#define		ATC_FC_PER2MEM		(0x2 << 21)	/* Periph-to-Mem (DMA) */
-#define		ATC_FC_PER2PER		(0x3 << 21)	/* Periph-to-Periph (DMA) */
-#define		ATC_FC_PER2MEM_PER	(0x4 << 21)	/* Periph-to-Mem (Peripheral) */
-#define		ATC_FC_MEM2PER_PER	(0x5 << 21)	/* Mem-to-Periph (Peripheral) */
-#define		ATC_FC_PER2PER_SRCPER	(0x6 << 21)	/* Periph-to-Periph (Src Peripheral) */
-#define		ATC_FC_PER2PER_DSTPER	(0x7 << 21)	/* Periph-to-Periph (Dst Peripheral) */
-#define	ATC_SRC_ADDR_MODE_MASK	(0x3 << 24)
-#define		ATC_SRC_ADDR_MODE_INCR	(0x0 << 24)	/* Incrementing Mode */
-#define		ATC_SRC_ADDR_MODE_DECR	(0x1 << 24)	/* Decrementing Mode */
-#define		ATC_SRC_ADDR_MODE_FIXED	(0x2 << 24)	/* Fixed Mode */
-#define	ATC_DST_ADDR_MODE_MASK	(0x3 << 28)
-#define		ATC_DST_ADDR_MODE_INCR	(0x0 << 28)	/* Incrementing Mode */
-#define		ATC_DST_ADDR_MODE_DECR	(0x1 << 28)	/* Decrementing Mode */
-#define		ATC_DST_ADDR_MODE_FIXED	(0x2 << 28)	/* Fixed Mode */
-#define	ATC_IEN			(0x1 << 30)	/* BTC interrupt enable (active low) */
-#define	ATC_AUTO		(0x1 << 31)	/* Auto multiple buffer tx enable */
+#define ATC_SIF			GENMASK(1, 0)	/* Src tx done via AHB-Lite Interface i */
+#define ATC_DIF			GENMASK(5, 4)	/* Dst tx done via AHB-Lite Interface i */
+#define AT_DMA_MEM_IF		0x0		/* interface 0 as memory interface */
+#define AT_DMA_PER_IF		0x1		/* interface 1 as peripheral interface */
+#define ATC_SRC_PIP		BIT(8)		/* Source Picture-in-Picture enabled */
+#define ATC_DST_PIP		BIT(12)		/* Destination Picture-in-Picture enabled */
+#define ATC_SRC_DSCR_DIS	BIT(16)		/* Src Descriptor fetch disable */
+#define ATC_DST_DSCR_DIS	BIT(20)		/* Dst Descriptor fetch disable */
+#define ATC_FC			GENMASK(22, 21)	/* Choose Flow Controller */
+#define ATC_FC_MEM2MEM		0x0		/* Mem-to-Mem (DMA) */
+#define ATC_FC_MEM2PER		0x1		/* Mem-to-Periph (DMA) */
+#define ATC_FC_PER2MEM		0x2		/* Periph-to-Mem (DMA) */
+#define ATC_FC_PER2PER		0x3		/* Periph-to-Periph (DMA) */
+#define ATC_FC_PER2MEM_PER	0x4		/* Periph-to-Mem (Peripheral) */
+#define ATC_FC_MEM2PER_PER	0x5		/* Mem-to-Periph (Peripheral) */
+#define ATC_FC_PER2PER_SRCPER	0x6		/* Periph-to-Periph (Src Peripheral) */
+#define ATC_FC_PER2PER_DSTPER	0x7		/* Periph-to-Periph (Dst Peripheral) */
+#define ATC_SRC_ADDR_MODE	GENMASK(25, 24)
+#define ATC_SRC_ADDR_MODE_INCR	0x0		/* Incrementing Mode */
+#define ATC_SRC_ADDR_MODE_DECR	0x1		/* Decrementing Mode */
+#define ATC_SRC_ADDR_MODE_FIXED	0x2		/* Fixed Mode */
+#define ATC_DST_ADDR_MODE	GENMASK(29, 28)
+#define ATC_DST_ADDR_MODE_INCR	0x0		/* Incrementing Mode */
+#define ATC_DST_ADDR_MODE_DECR	0x1		/* Decrementing Mode */
+#define ATC_DST_ADDR_MODE_FIXED	0x2		/* Fixed Mode */
+#define ATC_IEN			BIT(30)		/* BTC interrupt enable (active low) */
+#define ATC_AUTO		BIT(31)		/* Auto multiple buffer tx enable */
 
 /* Bitfields in CFG */
 #define ATC_PER_MSB(h)	((0x30U & (h)) >> 4)	/* Extract most significant bits of a handshaking identifier */
 
-#define	ATC_SRC_PER(h)		(0xFU & (h))	/* Channel src rq associated with periph handshaking ifc h */
-#define	ATC_DST_PER(h)		((0xFU & (h)) <<  4)	/* Channel dst rq associated with periph handshaking ifc h */
-#define	ATC_SRC_REP		(0x1 <<  8)	/* Source Replay Mod */
-#define	ATC_SRC_H2SEL		(0x1 <<  9)	/* Source Handshaking Mod */
-#define		ATC_SRC_H2SEL_SW	(0x0 <<  9)
-#define		ATC_SRC_H2SEL_HW	(0x1 <<  9)
-#define	ATC_SRC_PER_MSB(h)	(ATC_PER_MSB(h) << 10)	/* Channel src rq (most significant bits) */
-#define	ATC_DST_REP		(0x1 << 12)	/* Destination Replay Mod */
-#define	ATC_DST_H2SEL		(0x1 << 13)	/* Destination Handshaking Mod */
-#define		ATC_DST_H2SEL_SW	(0x0 << 13)
-#define		ATC_DST_H2SEL_HW	(0x1 << 13)
-#define	ATC_DST_PER_MSB(h)	(ATC_PER_MSB(h) << 14)	/* Channel dst rq (most significant bits) */
-#define	ATC_SOD			(0x1 << 16)	/* Stop On Done */
-#define	ATC_LOCK_IF		(0x1 << 20)	/* Interface Lock */
-#define	ATC_LOCK_B		(0x1 << 21)	/* AHB Bus Lock */
-#define	ATC_LOCK_IF_L		(0x1 << 22)	/* Master Interface Arbiter Lock */
-#define		ATC_LOCK_IF_L_CHUNK	(0x0 << 22)
-#define		ATC_LOCK_IF_L_BUFFER	(0x1 << 22)
-#define	ATC_AHB_PROT_MASK	(0x7 << 24)	/* AHB Protection */
-#define	ATC_FIFOCFG_MASK	(0x3 << 28)	/* FIFO Request Configuration */
-#define		ATC_FIFOCFG_LARGESTBURST	(0x0 << 28)
-#define		ATC_FIFOCFG_HALFFIFO		(0x1 << 28)
-#define		ATC_FIFOCFG_ENOUGHSPACE		(0x2 << 28)
+#define ATC_SRC_PER		GENMASK(3, 0)	/* Channel src rq associated with periph handshaking ifc h */
+#define ATC_DST_PER		GENMASK(7, 4)	/* Channel dst rq associated with periph handshaking ifc h */
+#define ATC_SRC_REP		BIT(8)		/* Source Replay Mod */
+#define ATC_SRC_H2SEL		BIT(9)		/* Source Handshaking Mod */
+#define ATC_SRC_PER_MSB		GENMASK(11, 10)	/* Channel src rq (most significant bits) */
+#define ATC_DST_REP		BIT(12)		/* Destination Replay Mod */
+#define ATC_DST_H2SEL		BIT(13)		/* Destination Handshaking Mod */
+#define ATC_DST_PER_MSB		GENMASK(15, 14)	/* Channel dst rq (most significant bits) */
+#define ATC_SOD			BIT(16)		/* Stop On Done */
+#define ATC_LOCK_IF		BIT(20)		/* Interface Lock */
+#define ATC_LOCK_B		BIT(21)		/* AHB Bus Lock */
+#define ATC_LOCK_IF_L		BIT(22)		/* Master Interface Arbiter Lock */
+#define ATC_AHB_PROT		GENMASK(26, 24)	/* AHB Protection */
+#define ATC_FIFOCFG		GENMASK(29, 28)	/* FIFO Request Configuration */
+#define ATC_FIFOCFG_LARGESTBURST	0x0
+#define ATC_FIFOCFG_HALFFIFO		0x1
+#define ATC_FIFOCFG_ENOUGHSPACE		0x2
 
 /* Bitfields in SPIP */
-#define	ATC_SPIP_HOLE(x)	(0xFFFFU & (x))
-#define	ATC_SPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+#define ATC_SPIP_HOLE		GENMASK(15, 0)
+#define ATC_SPIP_BOUNDARY	GENMASK(25, 16)
 
 /* Bitfields in DPIP */
-#define	ATC_DPIP_HOLE(x)	(0xFFFFU & (x))
-#define	ATC_DPIP_BOUNDARY(x)	((0x3FF & (x)) << 16)
+#define ATC_DPIP_HOLE		GENMASK(15, 0)
+#define ATC_DPIP_BOUNDARY	GENMASK(25, 16)
+
+#define ATC_SRC_PER_ID(id)	(FIELD_PREP(ATC_SRC_PER_MSB, (id)) |	\
+				 FIELD_PREP(ATC_SRC_PER, (id)))
+#define ATC_DST_PER_ID(id)	(FIELD_PREP(ATC_DST_PER_MSB, (id)) |	\
+				 FIELD_PREP(ATC_DST_PER, (id)))
+
 
 
 /*--  descriptors  -----------------------------------------------------*/
@@ -502,9 +480,9 @@ static void set_desc_eol(struct at_desc *desc)
 	desc->lli.dscr = 0;
 }
 
-#define	ATC_DEFAULT_CFG		(ATC_FIFOCFG_HALFFIFO)
-#define	ATC_DEFAULT_CTRLB	(ATC_SIF(AT_DMA_MEM_IF) \
-				|ATC_DIF(AT_DMA_MEM_IF))
+#define	ATC_DEFAULT_CFG		FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO)
+#define	ATC_DEFAULT_CTRLB	(FIELD_PREP(ATC_SIF, AT_DMA_MEM_IF) | \
+				 FIELD_PREP(ATC_DIF, AT_DMA_MEM_IF))
 #define ATC_DMA_BUSWIDTHS\
 	(BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) |\
 	BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) |\
@@ -720,10 +698,12 @@ static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
 	channel_writel(atchan, CTRLA, 0);
 	channel_writel(atchan, CTRLB, 0);
 	channel_writel(atchan, DSCR, first->txd.phys);
-	channel_writel(atchan, SPIP, ATC_SPIP_HOLE(first->src_hole) |
-		       ATC_SPIP_BOUNDARY(first->boundary));
-	channel_writel(atchan, DPIP, ATC_DPIP_HOLE(first->dst_hole) |
-		       ATC_DPIP_BOUNDARY(first->boundary));
+	channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE,
+						first->src_hole) |
+		       FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary));
+	channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE,
+						first->dst_hole) |
+		       FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary));
 	/* Don't allow CPU to reorder channel enable. */
 	wmb();
 	dma_writel(atdma, CHER, atchan->mask);
@@ -763,8 +743,8 @@ static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
  */
 static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 {
-	u32 btsize = (ctrla & ATC_BTSIZE_MAX);
-	u32 src_width = ATC_REG_TO_SRC_WIDTH(ctrla);
+	u32 btsize = FIELD_GET(ATC_BTSIZE, ctrla);
+	u32 src_width = FIELD_GET(ATC_SRC_WIDTH, ctrla);
 
 	/*
 	 * According to the datasheet, when reading the Control A Register
@@ -1203,15 +1183,14 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 		return NULL;
 	}
 
-	ctrla = ATC_SRC_WIDTH(dwidth) |
-		ATC_DST_WIDTH(dwidth);
+	ctrla = FIELD_PREP(ATC_SRC_WIDTH, dwidth) |
+		FIELD_PREP(ATC_DST_WIDTH, dwidth);
 
-	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
-		| ATC_SRC_ADDR_MODE_INCR
-		| ATC_DST_ADDR_MODE_INCR
-		| ATC_SRC_PIP
-		| ATC_DST_PIP
-		| ATC_FC_MEM2MEM;
+	ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
+		FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) |
+		FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) |
+		ATC_SRC_PIP | ATC_DST_PIP |
+		FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM);
 
 	/* create the transfer */
 	desc = atc_desc_get(atchan);
@@ -1272,10 +1251,10 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		return NULL;
 	}
 
-	ctrlb =   ATC_DEFAULT_CTRLB | ATC_IEN
-		| ATC_SRC_ADDR_MODE_INCR
-		| ATC_DST_ADDR_MODE_INCR
-		| ATC_FC_MEM2MEM;
+	ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
+		FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) |
+		FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) |
+		FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM);
 
 	/*
 	 * We can be a lot more clever here, but this should take care
@@ -1283,8 +1262,8 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	 */
 	src_width = dst_width = atc_get_xfer_width(src, dest, len);
 
-	ctrla = ATC_SRC_WIDTH(src_width) |
-		ATC_DST_WIDTH(dst_width);
+	ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) |
+		FIELD_PREP(ATC_DST_WIDTH, dst_width);
 
 	for (offset = 0; offset < len; offset += xfer_count << src_width) {
 		xfer_count = min_t(size_t, (len - offset) >> src_width,
@@ -1330,11 +1309,11 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
 	struct at_desc *desc;
 	size_t xfer_count;
 
-	u32 ctrla = ATC_SRC_WIDTH(2) | ATC_DST_WIDTH(2);
+	u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2);
 	u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
-		ATC_SRC_ADDR_MODE_FIXED |
-		ATC_DST_ADDR_MODE_INCR |
-		ATC_FC_MEM2MEM;
+		    FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) |
+		    FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) |
+		    FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM);
 
 	xfer_count = len >> 2;
 	if (xfer_count > ATC_BTSIZE_MAX) {
@@ -1549,18 +1528,20 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 	}
 
-	ctrla =   ATC_SCSIZE(sconfig->src_maxburst)
-		| ATC_DCSIZE(sconfig->dst_maxburst);
+	ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) |
+		FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst);
 	ctrlb = ATC_IEN;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
 		reg_width = convert_buswidth(sconfig->dst_addr_width);
-		ctrla |=  ATC_DST_WIDTH(reg_width);
-		ctrlb |=  ATC_DST_ADDR_MODE_FIXED
-			| ATC_SRC_ADDR_MODE_INCR
-			| ATC_FC_MEM2PER
-			| ATC_SIF(atchan->mem_if) | ATC_DIF(atchan->per_if);
+		ctrla |= FIELD_PREP(ATC_DST_WIDTH, reg_width);
+		ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE,
+				    ATC_DST_ADDR_MODE_FIXED) |
+			 FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) |
+			 FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) |
+			 FIELD_PREP(ATC_SIF, atchan->mem_if) |
+			 FIELD_PREP(ATC_DIF, atchan->per_if);
 		reg = sconfig->dst_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
 			struct at_desc	*desc;
@@ -1584,9 +1565,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 			desc->lli.saddr = mem;
 			desc->lli.daddr = reg;
-			desc->lli.ctrla = ctrla
-					| ATC_SRC_WIDTH(mem_width)
-					| len >> mem_width;
+			desc->lli.ctrla = ctrla |
+					  FIELD_PREP(ATC_SRC_WIDTH, mem_width) |
+					  len >> mem_width;
 			desc->lli.ctrlb = ctrlb;
 			desc->len = len;
 
@@ -1596,11 +1577,13 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		break;
 	case DMA_DEV_TO_MEM:
 		reg_width = convert_buswidth(sconfig->src_addr_width);
-		ctrla |=  ATC_SRC_WIDTH(reg_width);
-		ctrlb |=  ATC_DST_ADDR_MODE_INCR
-			| ATC_SRC_ADDR_MODE_FIXED
-			| ATC_FC_PER2MEM
-			| ATC_SIF(atchan->per_if) | ATC_DIF(atchan->mem_if);
+		ctrla |= FIELD_PREP(ATC_SRC_WIDTH, reg_width);
+		ctrlb |= FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) |
+			 FIELD_PREP(ATC_SRC_ADDR_MODE,
+				    ATC_SRC_ADDR_MODE_FIXED) |
+			 FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) |
+			 FIELD_PREP(ATC_SIF, atchan->per_if) |
+			 FIELD_PREP(ATC_DIF, atchan->mem_if);
 
 		reg = sconfig->src_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
@@ -1625,9 +1608,9 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 			desc->lli.saddr = reg;
 			desc->lli.daddr = mem;
-			desc->lli.ctrla = ctrla
-					| ATC_DST_WIDTH(mem_width)
-					| len >> reg_width;
+			desc->lli.ctrla = ctrla |
+					  FIELD_PREP(ATC_DST_WIDTH, mem_width) |
+					  len >> reg_width;
 			desc->lli.ctrlb = ctrlb;
 			desc->len = len;
 
@@ -1693,22 +1676,24 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 	u32			ctrla;
 
 	/* prepare common CRTLA value */
-	ctrla =   ATC_SCSIZE(sconfig->src_maxburst)
-		| ATC_DCSIZE(sconfig->dst_maxburst)
-		| ATC_DST_WIDTH(reg_width)
-		| ATC_SRC_WIDTH(reg_width)
-		| period_len >> reg_width;
+	ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) |
+		FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) |
+		FIELD_PREP(ATC_DST_WIDTH, reg_width) |
+		FIELD_PREP(ATC_SRC_WIDTH, reg_width) |
+		period_len >> reg_width;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
 		desc->lli.saddr = buf_addr + (period_len * period_index);
 		desc->lli.daddr = sconfig->dst_addr;
 		desc->lli.ctrla = ctrla;
-		desc->lli.ctrlb = ATC_DST_ADDR_MODE_FIXED
-				| ATC_SRC_ADDR_MODE_INCR
-				| ATC_FC_MEM2PER
-				| ATC_SIF(atchan->mem_if)
-				| ATC_DIF(atchan->per_if);
+		desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
+					     ATC_DST_ADDR_MODE_FIXED) |
+				  FIELD_PREP(ATC_SRC_ADDR_MODE,
+					     ATC_SRC_ADDR_MODE_INCR) |
+				  FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) |
+				  FIELD_PREP(ATC_SIF, atchan->mem_if) |
+				  FIELD_PREP(ATC_DIF, atchan->per_if);
 		desc->len = period_len;
 		break;
 
@@ -1716,11 +1701,13 @@ atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
 		desc->lli.saddr = sconfig->src_addr;
 		desc->lli.daddr = buf_addr + (period_len * period_index);
 		desc->lli.ctrla = ctrla;
-		desc->lli.ctrlb = ATC_DST_ADDR_MODE_INCR
-				| ATC_SRC_ADDR_MODE_FIXED
-				| ATC_FC_PER2MEM
-				| ATC_SIF(atchan->per_if)
-				| ATC_DIF(atchan->mem_if);
+		desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
+					     ATC_DST_ADDR_MODE_INCR) |
+				  FIELD_PREP(ATC_SRC_ADDR_MODE,
+					     ATC_SRC_ADDR_MODE_FIXED) |
+				  FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) |
+				  FIELD_PREP(ATC_SIF, atchan->per_if) |
+				  FIELD_PREP(ATC_DIF, atchan->mem_if);
 		desc->len = period_len;
 		break;
 
@@ -2115,14 +2102,13 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
 		return NULL;
 	}
 
-	atslave->cfg = ATC_DST_H2SEL_HW | ATC_SRC_H2SEL_HW;
+	atslave->cfg = ATC_DST_H2SEL | ATC_SRC_H2SEL;
 	/*
 	 * We can fill both SRC_PER and DST_PER, one of these fields will be
 	 * ignored depending on DMA transfer direction.
 	 */
 	per_id = dma_spec->args[1] & AT91_DMA_CFG_PER_ID_MASK;
-	atslave->cfg |= ATC_DST_PER_MSB(per_id) | ATC_DST_PER(per_id)
-		     | ATC_SRC_PER_MSB(per_id) | ATC_SRC_PER(per_id);
+	atslave->cfg |= ATC_DST_PER_ID(per_id) |  ATC_SRC_PER_ID(per_id);
 	/*
 	 * We have to translate the value we get from the device tree since
 	 * the half FIFO configuration value had to be 0 to keep backward
@@ -2130,14 +2116,16 @@ static struct dma_chan *at_dma_xlate(struct of_phandle_args *dma_spec,
 	 */
 	switch (dma_spec->args[1] & AT91_DMA_CFG_FIFOCFG_MASK) {
 	case AT91_DMA_CFG_FIFOCFG_ALAP:
-		atslave->cfg |= ATC_FIFOCFG_LARGESTBURST;
+		atslave->cfg |= FIELD_PREP(ATC_FIFOCFG,
+					   ATC_FIFOCFG_LARGESTBURST);
 		break;
 	case AT91_DMA_CFG_FIFOCFG_ASAP:
-		atslave->cfg |= ATC_FIFOCFG_ENOUGHSPACE;
+		atslave->cfg |= FIELD_PREP(ATC_FIFOCFG,
+					   ATC_FIFOCFG_ENOUGHSPACE);
 		break;
 	case AT91_DMA_CFG_FIFOCFG_HALF:
 	default:
-		atslave->cfg |= ATC_FIFOCFG_HALFFIFO;
+		atslave->cfg |= FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO);
 	}
 	atslave->dma_dev = &dmac_pdev->dev;
 
-- 
GitLab


From 1c1114d850b6993184c117edad7c91f7f09cb9d5 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:03 +0300
Subject: [PATCH 1384/4122] dmaengine: at_hdmac: Rename "dma_common" to
 "dma_device"

"dma_common" name was misleading and did not suggest that's actually
a struct dma_device underneath. Rename it so that readers can follow the
code easier. One may see some checks and a warning when running
checkpatch. Those have nothing to do with the rename and will be addressed
in a further patch.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-30-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 92 +++++++++++++++++++++---------------------
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index edec6a8c730c0..b2619600f68a6 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -338,7 +338,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
 
 /**
  * struct at_dma - internal representation of an Atmel HDMA Controller
- * @chan_common: common dmaengine dma_device object members
+ * @dma_device: dmaengine dma_device object members
  * @atdma_devtype: identifier of DMA controller compatibility
  * @ch_regs: memory mapped register base
  * @clk: dma controller clock
@@ -348,7 +348,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
  * @chan: channels table to store at_dma_chan structures
  */
 struct at_dma {
-	struct dma_device	dma_common;
+	struct dma_device	dma_device;
 	void __iomem		*regs;
 	struct clk		*clk;
 	u32			save_imr;
@@ -368,7 +368,7 @@ struct at_dma {
 
 static inline struct at_dma *to_at_dma(struct dma_device *ddev)
 {
-	return container_of(ddev, struct at_dma, dma_common);
+	return container_of(ddev, struct at_dma, dma_device);
 }
 
 
@@ -1069,11 +1069,11 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
 		if (!pending)
 			break;
 
-		dev_vdbg(atdma->dma_common.dev,
+		dev_vdbg(atdma->dma_device.dev,
 			"interrupt: status = 0x%08x, 0x%08x, 0x%08x\n",
 			 status, imr, pending);
 
-		for (i = 0; i < atdma->dma_common.chancnt; i++) {
+		for (i = 0; i < atdma->dma_device.chancnt; i++) {
 			atchan = &atdma->chan[i];
 			if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
 				if (pending & AT_DMA_ERR(i)) {
@@ -2000,7 +2000,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 		 * We need controller-specific data to set up slave
 		 * transfers.
 		 */
-		BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_common.dev);
+		BUG_ON(!atslave->dma_dev || atslave->dma_dev != atdma->dma_device.dev);
 
 		/* if cfg configuration specified take it instead of default */
 		if (atslave->cfg)
@@ -2011,7 +2011,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 	for (i = 0; i < init_nr_desc_per_channel; i++) {
 		desc = atc_alloc_descriptor(chan, GFP_KERNEL);
 		if (!desc) {
-			dev_err(atdma->dma_common.dev,
+			dev_err(atdma->dma_device.dev,
 				"Only %d initial descriptors\n", i);
 			break;
 		}
@@ -2255,7 +2255,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 		return irq;
 
 	/* discover transaction capabilities */
-	atdma->dma_common.cap_mask = plat_dat->cap_mask;
+	atdma->dma_device.cap_mask = plat_dat->cap_mask;
 	atdma->all_chan_mask = (1 << plat_dat->nr_channels) - 1;
 
 	atdma->clk = devm_clk_get(&pdev->dev, "dma_clk");
@@ -2299,16 +2299,16 @@ static int __init at_dma_probe(struct platform_device *pdev)
 		cpu_relax();
 
 	/* initialize channels related values */
-	INIT_LIST_HEAD(&atdma->dma_common.channels);
+	INIT_LIST_HEAD(&atdma->dma_device.channels);
 	for (i = 0; i < plat_dat->nr_channels; i++) {
 		struct at_dma_chan	*atchan = &atdma->chan[i];
 
 		atchan->mem_if = AT_DMA_MEM_IF;
 		atchan->per_if = AT_DMA_PER_IF;
-		atchan->chan_common.device = &atdma->dma_common;
+		atchan->chan_common.device = &atdma->dma_device;
 		dma_cookie_init(&atchan->chan_common);
 		list_add_tail(&atchan->chan_common.device_node,
-				&atdma->dma_common.channels);
+				&atdma->dma_device.channels);
 
 		atchan->ch_regs = atdma->regs + ch_regs(i);
 		spin_lock_init(&atchan->lock);
@@ -2323,49 +2323,49 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	}
 
 	/* set base routines */
-	atdma->dma_common.device_alloc_chan_resources = atc_alloc_chan_resources;
-	atdma->dma_common.device_free_chan_resources = atc_free_chan_resources;
-	atdma->dma_common.device_tx_status = atc_tx_status;
-	atdma->dma_common.device_issue_pending = atc_issue_pending;
-	atdma->dma_common.dev = &pdev->dev;
+	atdma->dma_device.device_alloc_chan_resources = atc_alloc_chan_resources;
+	atdma->dma_device.device_free_chan_resources = atc_free_chan_resources;
+	atdma->dma_device.device_tx_status = atc_tx_status;
+	atdma->dma_device.device_issue_pending = atc_issue_pending;
+	atdma->dma_device.dev = &pdev->dev;
 
 	/* set prep routines based on capability */
-	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_common.cap_mask))
-		atdma->dma_common.device_prep_interleaved_dma = atc_prep_dma_interleaved;
+	if (dma_has_cap(DMA_INTERLEAVE, atdma->dma_device.cap_mask))
+		atdma->dma_device.device_prep_interleaved_dma = atc_prep_dma_interleaved;
 
-	if (dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask))
-		atdma->dma_common.device_prep_dma_memcpy = atc_prep_dma_memcpy;
+	if (dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask))
+		atdma->dma_device.device_prep_dma_memcpy = atc_prep_dma_memcpy;
 
-	if (dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask)) {
-		atdma->dma_common.device_prep_dma_memset = atc_prep_dma_memset;
-		atdma->dma_common.device_prep_dma_memset_sg = atc_prep_dma_memset_sg;
-		atdma->dma_common.fill_align = DMAENGINE_ALIGN_4_BYTES;
+	if (dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask)) {
+		atdma->dma_device.device_prep_dma_memset = atc_prep_dma_memset;
+		atdma->dma_device.device_prep_dma_memset_sg = atc_prep_dma_memset_sg;
+		atdma->dma_device.fill_align = DMAENGINE_ALIGN_4_BYTES;
 	}
 
-	if (dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)) {
-		atdma->dma_common.device_prep_slave_sg = atc_prep_slave_sg;
+	if (dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask)) {
+		atdma->dma_device.device_prep_slave_sg = atc_prep_slave_sg;
 		/* controller can do slave DMA: can trigger cyclic transfers */
-		dma_cap_set(DMA_CYCLIC, atdma->dma_common.cap_mask);
-		atdma->dma_common.device_prep_dma_cyclic = atc_prep_dma_cyclic;
-		atdma->dma_common.device_config = atc_config;
-		atdma->dma_common.device_pause = atc_pause;
-		atdma->dma_common.device_resume = atc_resume;
-		atdma->dma_common.device_terminate_all = atc_terminate_all;
-		atdma->dma_common.src_addr_widths = ATC_DMA_BUSWIDTHS;
-		atdma->dma_common.dst_addr_widths = ATC_DMA_BUSWIDTHS;
-		atdma->dma_common.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-		atdma->dma_common.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
+		dma_cap_set(DMA_CYCLIC, atdma->dma_device.cap_mask);
+		atdma->dma_device.device_prep_dma_cyclic = atc_prep_dma_cyclic;
+		atdma->dma_device.device_config = atc_config;
+		atdma->dma_device.device_pause = atc_pause;
+		atdma->dma_device.device_resume = atc_resume;
+		atdma->dma_device.device_terminate_all = atc_terminate_all;
+		atdma->dma_device.src_addr_widths = ATC_DMA_BUSWIDTHS;
+		atdma->dma_device.dst_addr_widths = ATC_DMA_BUSWIDTHS;
+		atdma->dma_device.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
+		atdma->dma_device.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 	}
 
 	dma_writel(atdma, EN, AT_DMA_ENABLE);
 
 	dev_info(&pdev->dev, "Atmel AHB DMA Controller ( %s%s%s), %d channels\n",
-	  dma_has_cap(DMA_MEMCPY, atdma->dma_common.cap_mask) ? "cpy " : "",
-	  dma_has_cap(DMA_MEMSET, atdma->dma_common.cap_mask) ? "set " : "",
-	  dma_has_cap(DMA_SLAVE, atdma->dma_common.cap_mask)  ? "slave " : "",
+	  dma_has_cap(DMA_MEMCPY, atdma->dma_device.cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_MEMSET, atdma->dma_device.cap_mask) ? "set " : "",
+	  dma_has_cap(DMA_SLAVE, atdma->dma_device.cap_mask)  ? "slave " : "",
 	  plat_dat->nr_channels);
 
-	err = dma_async_device_register(&atdma->dma_common);
+	err = dma_async_device_register(&atdma->dma_device);
 	if (err) {
 		dev_err(&pdev->dev, "Unable to register: %d.\n", err);
 		goto err_dma_async_device_register;
@@ -2388,7 +2388,7 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	return 0;
 
 err_of_dma_controller_register:
-	dma_async_device_unregister(&atdma->dma_common);
+	dma_async_device_unregister(&atdma->dma_device);
 err_dma_async_device_register:
 	dma_pool_destroy(atdma->memset_pool);
 err_memset_pool_create:
@@ -2408,13 +2408,13 @@ static int at_dma_remove(struct platform_device *pdev)
 	at_dma_off(atdma);
 	if (pdev->dev.of_node)
 		of_dma_controller_free(pdev->dev.of_node);
-	dma_async_device_unregister(&atdma->dma_common);
+	dma_async_device_unregister(&atdma->dma_device);
 
 	dma_pool_destroy(atdma->memset_pool);
 	dma_pool_destroy(atdma->dma_desc_pool);
 	free_irq(platform_get_irq(pdev, 0), atdma);
 
-	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels,
 			device_node) {
 		struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 
@@ -2443,7 +2443,7 @@ static int at_dma_prepare(struct device *dev)
 	struct at_dma *atdma = dev_get_drvdata(dev);
 	struct dma_chan *chan, *_chan;
 
-	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels,
 			device_node) {
 		struct at_dma_chan *atchan = to_at_dma_chan(chan);
 		/* wait for transaction completion (except in cyclic case) */
@@ -2478,7 +2478,7 @@ static int at_dma_suspend_noirq(struct device *dev)
 	struct dma_chan *chan, *_chan;
 
 	/* preserve data */
-	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels,
 			device_node) {
 		struct at_dma_chan *atchan = to_at_dma_chan(chan);
 
@@ -2528,7 +2528,7 @@ static int at_dma_resume_noirq(struct device *dev)
 
 	/* restore saved data */
 	dma_writel(atdma, EBCIER, atdma->save_imr);
-	list_for_each_entry_safe(chan, _chan, &atdma->dma_common.channels,
+	list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels,
 			device_node) {
 		struct at_dma_chan *atchan = to_at_dma_chan(chan);
 
-- 
GitLab


From 304184f79c7eb50b32915b29f7cacd58455048d2 Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:04 +0300
Subject: [PATCH 1385/4122] dmaengine: at_hdmac: Rename "chan_common" to
 "dma_chan"

"chan_common" was misleading and did not suggest that's actually
a struct dma_chan underneath. Rename it so that readers can follow the
code easier. One may see some checks when running checkpatch. Those have
nothing to do with the rename and will be addressed in a further patch.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-31-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 62 +++++++++++++++++++++---------------------
 1 file changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index b2619600f68a6..e39b9b47234a3 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -252,7 +252,7 @@ enum atc_status {
 
 /**
  * struct at_dma_chan - internal representation of an Atmel HDMAC channel
- * @chan_common: common dmaengine channel object members
+ * @dma_chan: common dmaengine channel object members
  * @device: parent device
  * @ch_regs: memory mapped register base
  * @mask: channel index in a mask
@@ -272,7 +272,7 @@ enum atc_status {
  * @free_list: list of descriptors usable by the channel
  */
 struct at_dma_chan {
-	struct dma_chan		chan_common;
+	struct dma_chan		dma_chan;
 	struct at_dma		*device;
 	void __iomem		*ch_regs;
 	u8			mask;
@@ -300,7 +300,7 @@ struct at_dma_chan {
 
 static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
 {
-	return container_of(dchan, struct at_dma_chan, chan_common);
+	return container_of(dchan, struct at_dma_chan, dma_chan);
 }
 
 /*
@@ -382,15 +382,15 @@ static struct device *chan2dev(struct dma_chan *chan)
 #if defined(VERBOSE_DEBUG)
 static void vdbg_dump_regs(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
 
-	dev_err(chan2dev(&atchan->chan_common),
+	dev_err(chan2dev(&atchan->dma_chan),
 		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
-		atchan->chan_common.chan_id,
+		atchan->dma_chan.chan_id,
 		dma_readl(atdma, EBCIMR),
 		dma_readl(atdma, CHSR));
 
-	dev_err(chan2dev(&atchan->chan_common),
+	dev_err(chan2dev(&atchan->dma_chan),
 		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
 		channel_readl(atchan, SADDR),
 		channel_readl(atchan, DADDR),
@@ -405,7 +405,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
 
 static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
 {
-	dev_crit(chan2dev(&atchan->chan_common),
+	dev_crit(chan2dev(&atchan->dma_chan),
 		 "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
 		 &lli->saddr, &lli->daddr,
 		 lli->ctrla, lli->ctrlb, &lli->dscr);
@@ -442,7 +442,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
  */
 static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
 
 	return !!(dma_readl(atdma, CHSR) & atchan->mask);
 }
@@ -603,16 +603,16 @@ static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
 			ret = desc;
 			break;
 		}
-		dev_dbg(chan2dev(&atchan->chan_common),
+		dev_dbg(chan2dev(&atchan->dma_chan),
 				"desc %p not ACKed\n", desc);
 	}
 	spin_unlock_irqrestore(&atchan->lock, flags);
-	dev_vdbg(chan2dev(&atchan->chan_common),
+	dev_vdbg(chan2dev(&atchan->dma_chan),
 		"scanned %u descriptors on freelist\n", i);
 
 	/* no more descriptor available in initial pool: create one more */
 	if (!ret)
-		ret = atc_alloc_descriptor(&atchan->chan_common, GFP_NOWAIT);
+		ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT);
 
 	return ret;
 }
@@ -630,11 +630,11 @@ static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
 
 		spin_lock_irqsave(&atchan->lock, flags);
 		list_for_each_entry(child, &desc->tx_list, desc_node)
-			dev_vdbg(chan2dev(&atchan->chan_common),
+			dev_vdbg(chan2dev(&atchan->dma_chan),
 					"moving child desc %p to freelist\n",
 					child);
 		list_splice_init(&desc->tx_list, &atchan->free_list);
-		dev_vdbg(chan2dev(&atchan->chan_common),
+		dev_vdbg(chan2dev(&atchan->dma_chan),
 			 "moving desc %p to freelist\n", desc);
 		list_add(&desc->desc_node, &atchan->free_list);
 		spin_unlock_irqrestore(&atchan->lock, flags);
@@ -673,13 +673,13 @@ static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
  */
 static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
 
 	/* ASSERT:  channel is idle */
 	if (atc_chan_is_enabled(atchan)) {
-		dev_err(chan2dev(&atchan->chan_common),
+		dev_err(chan2dev(&atchan->dma_chan),
 			"BUG: Attempted to start non-idle channel\n");
-		dev_err(chan2dev(&atchan->chan_common),
+		dev_err(chan2dev(&atchan->dma_chan),
 			"  channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
 			channel_readl(atchan, SADDR),
 			channel_readl(atchan, DADDR),
@@ -905,10 +905,10 @@ static void
 atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
 {
 	struct dma_async_tx_descriptor	*txd = &desc->txd;
-	struct at_dma			*atdma = to_at_dma(atchan->chan_common.device);
+	struct at_dma			*atdma = to_at_dma(atchan->dma_chan.device);
 	unsigned long flags;
 
-	dev_vdbg(chan2dev(&atchan->chan_common),
+	dev_vdbg(chan2dev(&atchan->dma_chan),
 		"descriptor %u complete\n", txd->cookie);
 
 	spin_lock_irqsave(&atchan->lock, flags);
@@ -951,7 +951,7 @@ static void atc_advance_work(struct at_dma_chan *atchan)
 	struct at_desc *desc;
 	unsigned long flags;
 
-	dev_vdbg(chan2dev(&atchan->chan_common), "advance_work\n");
+	dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n");
 
 	spin_lock_irqsave(&atchan->lock, flags);
 	if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list))
@@ -1010,9 +1010,9 @@ static void atc_handle_error(struct at_dma_chan *atchan)
 	 * controller flagged an error instead of scribbling over
 	 * random memory locations.
 	 */
-	dev_crit(chan2dev(&atchan->chan_common),
+	dev_crit(chan2dev(&atchan->dma_chan),
 			"Bad descriptor submitted for DMA!\n");
-	dev_crit(chan2dev(&atchan->chan_common),
+	dev_crit(chan2dev(&atchan->dma_chan),
 			"  cookie: %d\n", bad_desc->txd.cookie);
 	atc_dump_lli(atchan, &bad_desc->lli);
 	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
@@ -1031,7 +1031,7 @@ static void atc_handle_cyclic(struct at_dma_chan *atchan)
 	struct at_desc			*first = atc_first_active(atchan);
 	struct dma_async_tx_descriptor	*txd = &first->txd;
 
-	dev_vdbg(chan2dev(&atchan->chan_common),
+	dev_vdbg(chan2dev(&atchan->dma_chan),
 			"new cyclic period llp 0x%08x\n",
 			channel_readl(atchan, DSCR));
 
@@ -1825,7 +1825,7 @@ static int atc_pause(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->chan_common.chan_id;
+	int			chan_id = atchan->dma_chan.chan_id;
 	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
@@ -1844,7 +1844,7 @@ static int atc_resume(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->chan_common.chan_id;
+	int			chan_id = atchan->dma_chan.chan_id;
 	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
@@ -1866,7 +1866,7 @@ static int atc_terminate_all(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->chan_common.chan_id;
+	int			chan_id = atchan->dma_chan.chan_id;
 	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
@@ -2305,9 +2305,9 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
 		atchan->mem_if = AT_DMA_MEM_IF;
 		atchan->per_if = AT_DMA_PER_IF;
-		atchan->chan_common.device = &atdma->dma_device;
-		dma_cookie_init(&atchan->chan_common);
-		list_add_tail(&atchan->chan_common.device_node,
+		atchan->dma_chan.device = &atdma->dma_device;
+		dma_cookie_init(&atchan->dma_chan);
+		list_add_tail(&atchan->dma_chan.device_node,
 				&atdma->dma_device.channels);
 
 		atchan->ch_regs = atdma->regs + ch_regs(i);
@@ -2455,7 +2455,7 @@ static int at_dma_prepare(struct device *dev)
 
 static void atc_suspend_cyclic(struct at_dma_chan *atchan)
 {
-	struct dma_chan	*chan = &atchan->chan_common;
+	struct dma_chan	*chan = &atchan->dma_chan;
 
 	/* Channel should be paused by user
 	 * do it anyway even if it is not done already */
@@ -2496,7 +2496,7 @@ static int at_dma_suspend_noirq(struct device *dev)
 
 static void atc_resume_cyclic(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->chan_common.device);
+	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
 
 	/* restore channel status for cyclic descriptors list:
 	 * next descriptor in the cyclic list at the time of suspend */
-- 
GitLab


From 993b397752f3babb698932f20c1c363c4eca4efc Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:05 +0300
Subject: [PATCH 1386/4122] dmaengine: at_hdmac: Remove unused member of
 at_dma_chan

The pointer to at_dma engine was never used, remove it.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-32-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/at_hdmac.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index e39b9b47234a3..143d75c18d1e6 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -253,7 +253,6 @@ enum atc_status {
 /**
  * struct at_dma_chan - internal representation of an Atmel HDMAC channel
  * @dma_chan: common dmaengine channel object members
- * @device: parent device
  * @ch_regs: memory mapped register base
  * @mask: channel index in a mask
  * @per_if: peripheral interface
@@ -273,7 +272,6 @@ enum atc_status {
  */
 struct at_dma_chan {
 	struct dma_chan		dma_chan;
-	struct at_dma		*device;
 	void __iomem		*ch_regs;
 	u8			mask;
 	u8			per_if;
-- 
GitLab


From ac803b56860f6506c55a3c9330007837e3f4edda Mon Sep 17 00:00:00 2001
From: Tudor Ambarus <tudor.ambarus@microchip.com>
Date: Tue, 25 Oct 2022 12:03:06 +0300
Subject: [PATCH 1387/4122] dmaengine: at_hdmac: Convert driver to use virt-dma

Convert the driver to use the core virt-dma. The driver will be easier to
maintain as it uses the list handling and the tasklet from virt-dma.

With the conversion replace the election of a new transfer in the tasklet
with the election of the new transfer in the interrupt handler. With this
we have a shorter idle window as we remove the scheduling latency of the
tasklet. I chose to do this while doing the conversion to virt-dma,
because if I made a prerequisite patch with the new transfer election in
the irq handler, I would have to duplicate some virt-dma code in the
at_hdmac driver that would end up being removed at the virt-dma conversion
anyway. So do this in a single step.

Signed-off-by: Tudor Ambarus <tudor.ambarus@microchip.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Link: https://lore.kernel.org/r/20221025090306.297886-1-tudor.ambarus@microchip.com
Link: https://lore.kernel.org/r/20221025090306.297886-33-tudor.ambarus@microchip.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig    |    1 +
 drivers/dma/at_hdmac.c | 1108 +++++++++++++++-------------------------
 2 files changed, 403 insertions(+), 706 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index ea81d825575fb..b9d54f20812f7 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -97,6 +97,7 @@ config AT_HDMAC
 	tristate "Atmel AHB DMA support"
 	depends on ARCH_AT91
 	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
 	help
 	  Support the Atmel AHB DMA controller.
 
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 143d75c18d1e6..8858470246e15 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -3,6 +3,7 @@
  * Driver for the Atmel AHB DMA Controller (aka HDMA or DMAC on AT91 systems)
  *
  * Copyright (C) 2008 Atmel Corporation
+ * Copyright (C) 2022 Microchip Technology, Inc. and its subsidiaries
  *
  * This supports the Atmel AHB DMA Controller found in several Atmel SoCs.
  * The only Atmel DMA Controller that is not covered by this driver is the one
@@ -25,6 +26,7 @@
 #include <linux/slab.h>
 
 #include "dmaengine.h"
+#include "virt-dma.h"
 
 /*
  * Glossary
@@ -200,25 +202,31 @@ struct at_lli {
 	u32 dscr;	/* chain to next lli */
 };
 
+/**
+ * struct atdma_sg - atdma scatter gather entry
+ * @len: length of the current Linked List Item.
+ * @lli: linked list item that is passed to the DMA controller
+ * @lli_phys: physical address of the LLI.
+ */
+struct atdma_sg {
+	unsigned int len;
+	struct at_lli *lli;
+	dma_addr_t lli_phys;
+};
+
 /**
  * struct at_desc - software descriptor
- * @at_lli: hardware lli structure
- * @txd: support for the async_tx api
- * @desc_node: node on the channed descriptors list
- * @len: descriptor byte count
+ * @vd: pointer to the virtual dma descriptor.
+ * @atchan: pointer to the atmel dma channel.
  * @total_len: total transaction byte count
+ * @sg_len: number of sg entries.
+ * @sg: array of sgs.
  */
 struct at_desc {
-	/* FIRST values the hardware uses */
-	struct at_lli			lli;
-
-	/* THEN values for driver housekeeping */
-	struct list_head		tx_list;
-	struct dma_async_tx_descriptor	txd;
-	struct list_head		desc_node;
-	size_t				len;
+	struct				virt_dma_desc vd;
+	struct				at_dma_chan *atchan;
 	size_t				total_len;
-
+	unsigned int			sglen;
 	/* Interleaved data */
 	size_t				boundary;
 	size_t				dst_hole;
@@ -228,15 +236,9 @@ struct at_desc {
 	bool				memset_buffer;
 	dma_addr_t			memset_paddr;
 	int				*memset_vaddr;
+	struct atdma_sg			sg[];
 };
 
-static inline struct at_desc *
-txd_to_at_desc(struct dma_async_tx_descriptor *txd)
-{
-	return container_of(txd, struct at_desc, txd);
-}
-
-
 /*--  Channels  --------------------------------------------------------*/
 
 /**
@@ -245,49 +247,40 @@ txd_to_at_desc(struct dma_async_tx_descriptor *txd)
  * Manipulated with atomic operations.
  */
 enum atc_status {
-	ATC_IS_ERROR = 0,
 	ATC_IS_PAUSED = 1,
 	ATC_IS_CYCLIC = 24,
 };
 
 /**
  * struct at_dma_chan - internal representation of an Atmel HDMAC channel
- * @dma_chan: common dmaengine channel object members
+ * @vc: virtual dma channel entry.
+ * @atdma: pointer to the driver data.
  * @ch_regs: memory mapped register base
  * @mask: channel index in a mask
  * @per_if: peripheral interface
  * @mem_if: memory interface
  * @status: transmit status information from irq/prep* functions
  *                to tasklet (use atomic operations)
- * @tasklet: bottom half to finish transaction work
  * @save_cfg: configuration register that is saved on suspend/resume cycle
  * @save_dscr: for cyclic operations, preserve next descriptor address in
  *             the cyclic list on suspend/resume cycle
  * @dma_sconfig: configuration for slave transfers, passed via
  * .device_config
- * @lock: serializes enqueue/dequeue operations to descriptors lists
- * @active_list: list of descriptors dmaengine is being running on
- * @queue: list of descriptors ready to be submitted to engine
- * @free_list: list of descriptors usable by the channel
+ * @desc: pointer to the atmel dma descriptor.
  */
 struct at_dma_chan {
-	struct dma_chan		dma_chan;
+	struct virt_dma_chan	vc;
+	struct at_dma		*atdma;
 	void __iomem		*ch_regs;
 	u8			mask;
 	u8			per_if;
 	u8			mem_if;
 	unsigned long		status;
-	struct tasklet_struct	tasklet;
 	u32			save_cfg;
 	u32			save_dscr;
-	struct dma_slave_config dma_sconfig;
-
-	spinlock_t		lock;
-
-	/* these other elements are all protected by lock */
-	struct list_head	active_list;
-	struct list_head	queue;
-	struct list_head	free_list;
+	struct dma_slave_config	dma_sconfig;
+	bool			cyclic;
+	struct at_desc		*desc;
 };
 
 #define	channel_readl(atchan, name) \
@@ -296,11 +289,6 @@ struct at_dma_chan {
 #define	channel_writel(atchan, name, val) \
 	__raw_writel((val), (atchan)->ch_regs + ATC_##name##_OFFSET)
 
-static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *dchan)
-{
-	return container_of(dchan, struct at_dma_chan, dma_chan);
-}
-
 /*
  * Fix sconfig's burst size according to at_hdmac. We need to convert them as:
  * 1 -> 0, 4 -> 1, 8 -> 2, 16 -> 3, 32 -> 4, 64 -> 5, 128 -> 6, 256 -> 7.
@@ -342,7 +330,7 @@ static inline u8 convert_buswidth(enum dma_slave_buswidth addr_width)
  * @clk: dma controller clock
  * @save_imr: interrupt mask register that is saved on suspend/resume cycle
  * @all_chan_mask: all channels availlable in a mask
- * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @lli_pool: hw lli table
  * @chan: channels table to store at_dma_chan structures
  */
 struct at_dma {
@@ -353,7 +341,7 @@ struct at_dma {
 
 	u8			all_chan_mask;
 
-	struct dma_pool		*dma_desc_pool;
+	struct dma_pool		*lli_pool;
 	struct dma_pool		*memset_pool;
 	/* AT THE END channels table */
 	struct at_dma_chan	chan[];
@@ -364,6 +352,16 @@ struct at_dma {
 #define	dma_writel(atdma, name, val) \
 	__raw_writel((val), (atdma)->regs + AT_DMA_##name)
 
+static inline struct at_desc *to_atdma_desc(struct dma_async_tx_descriptor *t)
+{
+	return container_of(t, struct at_desc, vd.tx);
+}
+
+static inline struct at_dma_chan *to_at_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct at_dma_chan, vc.chan);
+}
+
 static inline struct at_dma *to_at_dma(struct dma_device *ddev)
 {
 	return container_of(ddev, struct at_dma, dma_device);
@@ -380,15 +378,15 @@ static struct device *chan2dev(struct dma_chan *chan)
 #if defined(VERBOSE_DEBUG)
 static void vdbg_dump_regs(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
+	struct at_dma	*atdma = to_at_dma(atchan->vc.chan.device);
 
-	dev_err(chan2dev(&atchan->dma_chan),
+	dev_err(chan2dev(&atchan->vc.chan),
 		"  channel %d : imr = 0x%x, chsr = 0x%x\n",
-		atchan->dma_chan.chan_id,
+		atchan->vc.chan.chan_id,
 		dma_readl(atdma, EBCIMR),
 		dma_readl(atdma, CHSR));
 
-	dev_err(chan2dev(&atchan->dma_chan),
+	dev_err(chan2dev(&atchan->vc.chan),
 		"  channel: s0x%x d0x%x ctrl0x%x:0x%x cfg0x%x l0x%x\n",
 		channel_readl(atchan, SADDR),
 		channel_readl(atchan, DADDR),
@@ -403,7 +401,7 @@ static void vdbg_dump_regs(struct at_dma_chan *atchan) {}
 
 static void atc_dump_lli(struct at_dma_chan *atchan, struct at_lli *lli)
 {
-	dev_crit(chan2dev(&atchan->dma_chan),
+	dev_crit(chan2dev(&atchan->vc.chan),
 		 "desc: s%pad d%pad ctrl0x%x:0x%x l%pad\n",
 		 &lli->saddr, &lli->daddr,
 		 lli->ctrla, lli->ctrlb, &lli->dscr);
@@ -440,7 +438,7 @@ static void atc_disable_chan_irq(struct at_dma *atdma, int chan_id)
  */
 static inline int atc_chan_is_enabled(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
+	struct at_dma *atdma = to_at_dma(atchan->vc.chan.device);
 
 	return !!(dma_readl(atdma, CHSR) & atchan->mask);
 }
@@ -464,18 +462,19 @@ static inline int atc_chan_is_cyclic(struct at_dma_chan *atchan)
 }
 
 /**
- * set_desc_eol - set end-of-link to descriptor so it will end transfer
+ * set_lli_eol - set end-of-link to descriptor so it will end transfer
  * @desc: descriptor, signle or at the end of a chain, to end chain on
+ * @i: index of the atmel scatter gather entry that is at the end of the chain.
  */
-static void set_desc_eol(struct at_desc *desc)
+static void set_lli_eol(struct at_desc *desc, unsigned int i)
 {
-	u32 ctrlb = desc->lli.ctrlb;
+	u32 ctrlb = desc->sg[i].lli->ctrlb;
 
 	ctrlb &= ~ATC_IEN;
 	ctrlb |= ATC_SRC_DSCR_DIS | ATC_DST_DSCR_DIS;
 
-	desc->lli.ctrlb = ctrlb;
-	desc->lli.dscr = 0;
+	desc->sg[i].lli->ctrlb = ctrlb;
+	desc->sg[i].lli->dscr = 0;
 }
 
 #define	ATC_DEFAULT_CFG		FIELD_PREP(ATC_FIFOCFG, ATC_FIFOCFG_HALFFIFO)
@@ -518,13 +517,6 @@ struct at_dma_slave {
 	u32			cfg;
 };
 
-/* prototypes */
-static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
-static void atc_issue_pending(struct dma_chan *chan);
-
-
-/*----------------------------------------------------------------------*/
-
 static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
 						size_t len)
 {
@@ -540,196 +532,72 @@ static inline unsigned int atc_get_xfer_width(dma_addr_t src, dma_addr_t dst,
 	return width;
 }
 
-static struct at_desc *atc_first_active(struct at_dma_chan *atchan)
+static void atdma_lli_chain(struct at_desc *desc, unsigned int i)
 {
-	return list_first_entry(&atchan->active_list,
-				struct at_desc, desc_node);
-}
+	struct atdma_sg *atdma_sg = &desc->sg[i];
 
-static struct at_desc *atc_first_queued(struct at_dma_chan *atchan)
-{
-	return list_first_entry(&atchan->queue,
-				struct at_desc, desc_node);
-}
-
-/**
- * atc_alloc_descriptor - allocate and return an initialized descriptor
- * @chan: the channel to allocate descriptors for
- * @gfp_flags: GFP allocation flags
- *
- * Note: The ack-bit is positioned in the descriptor flag at creation time
- *       to make initial allocation more convenient. This bit will be cleared
- *       and control will be given to client at usage time (during
- *       preparation functions).
- */
-static struct at_desc *atc_alloc_descriptor(struct dma_chan *chan,
-					    gfp_t gfp_flags)
-{
-	struct at_desc	*desc = NULL;
-	struct at_dma	*atdma = to_at_dma(chan->device);
-	dma_addr_t phys;
-
-	desc = dma_pool_zalloc(atdma->dma_desc_pool, gfp_flags, &phys);
-	if (desc) {
-		INIT_LIST_HEAD(&desc->tx_list);
-		dma_async_tx_descriptor_init(&desc->txd, chan);
-		/* txd.flags will be overwritten in prep functions */
-		desc->txd.flags = DMA_CTRL_ACK;
-		desc->txd.tx_submit = atc_tx_submit;
-		desc->txd.phys = phys;
-	}
-
-	return desc;
-}
-
-/**
- * atc_desc_get - get an unused descriptor from free_list
- * @atchan: channel we want a new descriptor for
- */
-static struct at_desc *atc_desc_get(struct at_dma_chan *atchan)
-{
-	struct at_desc *desc, *_desc;
-	struct at_desc *ret = NULL;
-	unsigned long flags;
-	unsigned int i = 0;
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
-		i++;
-		if (async_tx_test_ack(&desc->txd)) {
-			list_del(&desc->desc_node);
-			ret = desc;
-			break;
-		}
-		dev_dbg(chan2dev(&atchan->dma_chan),
-				"desc %p not ACKed\n", desc);
-	}
-	spin_unlock_irqrestore(&atchan->lock, flags);
-	dev_vdbg(chan2dev(&atchan->dma_chan),
-		"scanned %u descriptors on freelist\n", i);
-
-	/* no more descriptor available in initial pool: create one more */
-	if (!ret)
-		ret = atc_alloc_descriptor(&atchan->dma_chan, GFP_NOWAIT);
-
-	return ret;
-}
-
-/**
- * atc_desc_put - move a descriptor, including any children, to the free list
- * @atchan: channel we work on
- * @desc: descriptor, at the head of a chain, to move to free list
- */
-static void atc_desc_put(struct at_dma_chan *atchan, struct at_desc *desc)
-{
-	if (desc) {
-		struct at_desc *child;
-		unsigned long flags;
-
-		spin_lock_irqsave(&atchan->lock, flags);
-		list_for_each_entry(child, &desc->tx_list, desc_node)
-			dev_vdbg(chan2dev(&atchan->dma_chan),
-					"moving child desc %p to freelist\n",
-					child);
-		list_splice_init(&desc->tx_list, &atchan->free_list);
-		dev_vdbg(chan2dev(&atchan->dma_chan),
-			 "moving desc %p to freelist\n", desc);
-		list_add(&desc->desc_node, &atchan->free_list);
-		spin_unlock_irqrestore(&atchan->lock, flags);
-	}
-}
-
-/**
- * atc_desc_chain - build chain adding a descriptor
- * @first: address of first descriptor of the chain
- * @prev: address of previous descriptor of the chain
- * @desc: descriptor to queue
- *
- * Called from prep_* functions
- */
-static void atc_desc_chain(struct at_desc **first, struct at_desc **prev,
-			   struct at_desc *desc)
-{
-	if (!(*first)) {
-		*first = desc;
-	} else {
-		/* inform the HW lli about chaining */
-		(*prev)->lli.dscr = desc->txd.phys;
-		/* insert the link descriptor to the LD ring */
-		list_add_tail(&desc->desc_node,
-				&(*first)->tx_list);
-	}
-	*prev = desc;
+	if (i)
+		desc->sg[i - 1].lli->dscr = atdma_sg->lli_phys;
 }
 
 /**
  * atc_dostart - starts the DMA engine for real
  * @atchan: the channel we want to start
- * @first: first descriptor in the list we want to begin with
- *
- * Called with atchan->lock held and bh disabled
  */
-static void atc_dostart(struct at_dma_chan *atchan, struct at_desc *first)
+static void atc_dostart(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
+	struct virt_dma_desc *vd = vchan_next_desc(&atchan->vc);
+	struct at_desc *desc;
 
-	/* ASSERT:  channel is idle */
-	if (atc_chan_is_enabled(atchan)) {
-		dev_err(chan2dev(&atchan->dma_chan),
-			"BUG: Attempted to start non-idle channel\n");
-		dev_err(chan2dev(&atchan->dma_chan),
-			"  channel: s0x%x d0x%x ctrl0x%x:0x%x l0x%x\n",
-			channel_readl(atchan, SADDR),
-			channel_readl(atchan, DADDR),
-			channel_readl(atchan, CTRLA),
-			channel_readl(atchan, CTRLB),
-			channel_readl(atchan, DSCR));
-
-		/* The tasklet will hopefully advance the queue... */
+	if (!vd) {
+		atchan->desc = NULL;
 		return;
 	}
 
 	vdbg_dump_regs(atchan);
 
+	list_del(&vd->node);
+	atchan->desc = desc = to_atdma_desc(&vd->tx);
+
 	channel_writel(atchan, SADDR, 0);
 	channel_writel(atchan, DADDR, 0);
 	channel_writel(atchan, CTRLA, 0);
 	channel_writel(atchan, CTRLB, 0);
-	channel_writel(atchan, DSCR, first->txd.phys);
-	channel_writel(atchan, SPIP, FIELD_PREP(ATC_SPIP_HOLE,
-						first->src_hole) |
-		       FIELD_PREP(ATC_SPIP_BOUNDARY, first->boundary));
-	channel_writel(atchan, DPIP, FIELD_PREP(ATC_DPIP_HOLE,
-						first->dst_hole) |
-		       FIELD_PREP(ATC_DPIP_BOUNDARY, first->boundary));
+	channel_writel(atchan, DSCR, desc->sg[0].lli_phys);
+	channel_writel(atchan, SPIP,
+		       FIELD_PREP(ATC_SPIP_HOLE, desc->src_hole) |
+		       FIELD_PREP(ATC_SPIP_BOUNDARY, desc->boundary));
+	channel_writel(atchan, DPIP,
+		       FIELD_PREP(ATC_DPIP_HOLE, desc->dst_hole) |
+		       FIELD_PREP(ATC_DPIP_BOUNDARY, desc->boundary));
+
 	/* Don't allow CPU to reorder channel enable. */
 	wmb();
-	dma_writel(atdma, CHER, atchan->mask);
+	dma_writel(atchan->atdma, CHER, atchan->mask);
 
 	vdbg_dump_regs(atchan);
 }
 
-/*
- * atc_get_desc_by_cookie - get the descriptor of a cookie
- * @atchan: the DMA channel
- * @cookie: the cookie to get the descriptor for
- */
-static struct at_desc *atc_get_desc_by_cookie(struct at_dma_chan *atchan,
-						dma_cookie_t cookie)
+static void atdma_desc_free(struct virt_dma_desc *vd)
 {
-	struct at_desc *desc, *_desc;
+	struct at_dma *atdma = to_at_dma(vd->tx.chan->device);
+	struct at_desc *desc = to_atdma_desc(&vd->tx);
+	unsigned int i;
 
-	list_for_each_entry_safe(desc, _desc, &atchan->queue, desc_node) {
-		if (desc->txd.cookie == cookie)
-			return desc;
+	for (i = 0; i < desc->sglen; i++) {
+		if (desc->sg[i].lli)
+			dma_pool_free(atdma->lli_pool, desc->sg[i].lli,
+				      desc->sg[i].lli_phys);
 	}
 
-	list_for_each_entry_safe(desc, _desc, &atchan->active_list, desc_node) {
-		if (desc->txd.cookie == cookie)
-			return desc;
+	/* If the transfer was a memset, free our temporary buffer */
+	if (desc->memset_buffer) {
+		dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
+			      desc->memset_paddr);
+		desc->memset_buffer = false;
 	}
 
-	return NULL;
+	kfree(desc);
 }
 
 /**
@@ -756,20 +624,19 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 /**
  * atc_get_llis_residue - Get residue for a hardware linked list transfer
  *
- * Calculate the residue by removing the length of the child descriptors already
- * transferred from the total length. To get the current child descriptor we can
- * use the value of the channel's DSCR register and compare it against the value
- * of the hardware linked list structure of each child descriptor.
+ * Calculate the residue by removing the length of the Linked List Item (LLI)
+ * already transferred from the total length. To get the current LLI we can use
+ * the value of the channel's DSCR register and compare it against the DSCR
+ * value of each LLI.
  *
  * The CTRLA register provides us with the amount of data already read from the
- * source for the current child descriptor. So we can compute a more accurate
- * residue by also removing the number of bytes corresponding to this amount of
- * data.
+ * source for the LLI. So we can compute a more accurate residue by also
+ * removing the number of bytes corresponding to this amount of data.
  *
  * However, the DSCR and CTRLA registers cannot be read both atomically. Hence a
- * race condition may occur: the first read register may refer to one child
- * descriptor whereas the second read may refer to a later child descriptor in
- * the list because of the DMA transfer progression inbetween the two reads.
+ * race condition may occur: the first read register may refer to one LLI
+ * whereas the second read may refer to a later LLI in the list because of the
+ * DMA transfer progression inbetween the two reads.
  *
  * One solution could have been to pause the DMA transfer, read the DSCR and
  * CTRLA then resume the DMA transfer. Nonetheless, this approach presents some
@@ -786,12 +653,11 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
  * Then we'd rather use another solution: the DSCR is read a first time, the
  * CTRLA is read in turn, next the DSCR is read a second time. If the two
  * consecutive read values of the DSCR are the same then we assume both refers
- * to the very same child descriptor as well as the CTRLA value read inbetween
- * does. For cyclic tranfers, the assumption is that a full loop is "not so
- * fast". If the two DSCR values are different, we read again the CTRLA then the
- * DSCR till two consecutive read values from DSCR are equal or till the
- * maximum trials is reach. This algorithm is very unlikely not to find a stable
- * value for DSCR.
+ * to the very same LLI as well as the CTRLA value read inbetween does. For
+ * cyclic tranfers, the assumption is that a full loop is "not so fast". If the
+ * two DSCR values are different, we read again the CTRLA then the DSCR till two
+ * consecutive read values from DSCR are equal or till the maximum trials is
+ * reach. This algorithm is very unlikely not to find a stable value for DSCR.
  * @atchan: pointer to an atmel hdmac channel.
  * @desc: pointer to the descriptor for which the residue is calculated.
  * @residue: residue to be set to dma_tx_state.
@@ -800,7 +666,6 @@ static inline u32 atc_calc_bytes_left(u32 current_len, u32 ctrla)
 static int atc_get_llis_residue(struct at_dma_chan *atchan,
 				struct at_desc *desc, u32 *residue)
 {
-	struct at_desc *child;
 	u32 len, ctrla, dscr;
 	unsigned int i;
 
@@ -836,24 +701,25 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan,
 		return -ETIMEDOUT;
 
 	/* For the first descriptor we can be more accurate. */
-	if (desc->lli.dscr == dscr) {
+	if (desc->sg[0].lli->dscr == dscr) {
 		*residue = atc_calc_bytes_left(len, ctrla);
 		return 0;
 	}
+	len -= desc->sg[0].len;
 
-	len -= desc->len;
-	list_for_each_entry(child, &desc->tx_list, desc_node) {
-		if (child->lli.dscr == dscr)
+	for (i = 1; i < desc->sglen; i++) {
+		if (desc->sg[i].lli && desc->sg[i].lli->dscr == dscr)
 			break;
-		len -= child->len;
+		len -= desc->sg[i].len;
 	}
 
 	/*
-	 * For the current descriptor in the chain we can calculate the
-	 * remaining bytes using the channel's register.
+	 * For the current LLI in the chain we can calculate the remaining bytes
+	 * using the channel's CTRLA register.
 	 */
 	*residue = atc_calc_bytes_left(len, ctrla);
 	return 0;
+
 }
 
 /**
@@ -867,139 +733,42 @@ static int atc_get_llis_residue(struct at_dma_chan *atchan,
 static int atc_get_residue(struct dma_chan *chan, dma_cookie_t cookie,
 			   u32 *residue)
 {
-	struct at_dma_chan      *atchan = to_at_dma_chan(chan);
-	struct at_desc *desc_first = atc_first_active(atchan);
-	struct at_desc *desc;
+	struct at_dma_chan *atchan = to_at_dma_chan(chan);
+	struct virt_dma_desc *vd;
+	struct at_desc *desc = NULL;
 	u32 len, ctrla;
 
-	/*
-	 * If the cookie doesn't match to the currently running transfer then
-	 * we can return the total length of the associated DMA transfer,
-	 * because it is still queued.
-	 */
-	desc = atc_get_desc_by_cookie(atchan, cookie);
-	if (desc == NULL)
+	vd = vchan_find_desc(&atchan->vc, cookie);
+	if (vd)
+		desc = to_atdma_desc(&vd->tx);
+	else if (atchan->desc && atchan->desc->vd.tx.cookie == cookie)
+		desc = atchan->desc;
+
+	if (!desc)
 		return -EINVAL;
-	else if (desc != desc_first)
-		return desc->total_len;
 
-	if (desc_first->lli.dscr)
+	if (desc->sg[0].lli->dscr)
 		/* hardware linked list transfer */
-		return atc_get_llis_residue(atchan, desc_first, residue);
+		return atc_get_llis_residue(atchan, desc, residue);
 
 	/* single transfer */
-	len = desc_first->total_len;
+	len = desc->total_len;
 	ctrla = channel_readl(atchan, CTRLA);
 	*residue = atc_calc_bytes_left(len, ctrla);
 	return 0;
 }
 
-/**
- * atc_chain_complete - finish work for one transaction chain
- * @atchan: channel we work on
- * @desc: descriptor at the head of the chain we want do complete
- */
-static void
-atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc)
-{
-	struct dma_async_tx_descriptor	*txd = &desc->txd;
-	struct at_dma			*atdma = to_at_dma(atchan->dma_chan.device);
-	unsigned long flags;
-
-	dev_vdbg(chan2dev(&atchan->dma_chan),
-		"descriptor %u complete\n", txd->cookie);
-
-	spin_lock_irqsave(&atchan->lock, flags);
-
-	/* mark the descriptor as complete for non cyclic cases only */
-	if (!atc_chan_is_cyclic(atchan))
-		dma_cookie_complete(txd);
-
-	spin_unlock_irqrestore(&atchan->lock, flags);
-
-	dma_descriptor_unmap(txd);
-	/* for cyclic transfers,
-	 * no need to replay callback function while stopping */
-	if (!atc_chan_is_cyclic(atchan))
-		dmaengine_desc_get_callback_invoke(txd, NULL);
-
-	dma_run_dependencies(txd);
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	/* move children to free_list */
-	list_splice_init(&desc->tx_list, &atchan->free_list);
-	/* add myself to free_list */
-	list_add(&desc->desc_node, &atchan->free_list);
-	spin_unlock_irqrestore(&atchan->lock, flags);
-
-	/* If the transfer was a memset, free our temporary buffer */
-	if (desc->memset_buffer) {
-		dma_pool_free(atdma->memset_pool, desc->memset_vaddr,
-			      desc->memset_paddr);
-		desc->memset_buffer = false;
-	}
-}
-
-/**
- * atc_advance_work - at the end of a transaction, move forward
- * @atchan: channel where the transaction ended
- */
-static void atc_advance_work(struct at_dma_chan *atchan)
-{
-	struct at_desc *desc;
-	unsigned long flags;
-
-	dev_vdbg(chan2dev(&atchan->dma_chan), "advance_work\n");
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	if (atc_chan_is_enabled(atchan) || list_empty(&atchan->active_list))
-		return spin_unlock_irqrestore(&atchan->lock, flags);
-
-	desc = atc_first_active(atchan);
-	/* Remove the transfer node from the active list. */
-	list_del_init(&desc->desc_node);
-	spin_unlock_irqrestore(&atchan->lock, flags);
-	atc_chain_complete(atchan, desc);
-
-	/* advance work */
-	spin_lock_irqsave(&atchan->lock, flags);
-	if (!list_empty(&atchan->active_list)) {
-		desc = atc_first_queued(atchan);
-		list_move_tail(&desc->desc_node, &atchan->active_list);
-		atc_dostart(atchan, desc);
-	}
-	spin_unlock_irqrestore(&atchan->lock, flags);
-}
-
-
 /**
  * atc_handle_error - handle errors reported by DMA controller
- * @atchan: channel where error occurs
+ * @atchan: channel where error occurs.
+ * @i: channel index
  */
-static void atc_handle_error(struct at_dma_chan *atchan)
+static void atc_handle_error(struct at_dma_chan *atchan, unsigned int i)
 {
-	struct at_desc *bad_desc;
-	struct at_desc *desc;
-	struct at_desc *child;
-	unsigned long flags;
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	/*
-	 * The descriptor currently at the head of the active list is
-	 * broked. Since we don't have any way to report errors, we'll
-	 * just have to scream loudly and try to carry on.
-	 */
-	bad_desc = atc_first_active(atchan);
-	list_del_init(&bad_desc->desc_node);
-
-	/* Try to restart the controller */
-	if (!list_empty(&atchan->active_list)) {
-		desc = atc_first_queued(atchan);
-		list_move_tail(&desc->desc_node, &atchan->active_list);
-		atc_dostart(atchan, desc);
-	}
+	struct at_desc *desc = atchan->desc;
 
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	/* Disable channel on AHB error */
+	dma_writel(atchan->atdma, CHDR, AT_DMA_RES(i) | atchan->mask);
 
 	/*
 	 * KERN_CRITICAL may seem harsh, but since this only happens
@@ -1008,47 +777,37 @@ static void atc_handle_error(struct at_dma_chan *atchan)
 	 * controller flagged an error instead of scribbling over
 	 * random memory locations.
 	 */
-	dev_crit(chan2dev(&atchan->dma_chan),
-			"Bad descriptor submitted for DMA!\n");
-	dev_crit(chan2dev(&atchan->dma_chan),
-			"  cookie: %d\n", bad_desc->txd.cookie);
-	atc_dump_lli(atchan, &bad_desc->lli);
-	list_for_each_entry(child, &bad_desc->tx_list, desc_node)
-		atc_dump_lli(atchan, &child->lli);
-
-	/* Pretend the descriptor completed successfully */
-	atc_chain_complete(atchan, bad_desc);
-}
-
-/**
- * atc_handle_cyclic - at the end of a period, run callback function
- * @atchan: channel used for cyclic operations
- */
-static void atc_handle_cyclic(struct at_dma_chan *atchan)
-{
-	struct at_desc			*first = atc_first_active(atchan);
-	struct dma_async_tx_descriptor	*txd = &first->txd;
-
-	dev_vdbg(chan2dev(&atchan->dma_chan),
-			"new cyclic period llp 0x%08x\n",
-			channel_readl(atchan, DSCR));
-
-	dmaengine_desc_get_callback_invoke(txd, NULL);
+	dev_crit(chan2dev(&atchan->vc.chan), "Bad descriptor submitted for DMA!\n");
+	dev_crit(chan2dev(&atchan->vc.chan), "cookie: %d\n",
+		 desc->vd.tx.cookie);
+	for (i = 0; i < desc->sglen; i++)
+		atc_dump_lli(atchan, desc->sg[i].lli);
 }
 
-/*--  IRQ & Tasklet  ---------------------------------------------------*/
-
-static void atc_tasklet(struct tasklet_struct *t)
+static void atdma_handle_chan_done(struct at_dma_chan *atchan, u32 pending,
+				   unsigned int i)
 {
-	struct at_dma_chan *atchan = from_tasklet(atchan, t, tasklet);
+	struct at_desc *desc;
 
-	if (test_and_clear_bit(ATC_IS_ERROR, &atchan->status))
-		return atc_handle_error(atchan);
+	spin_lock(&atchan->vc.lock);
+	desc = atchan->desc;
 
-	if (atc_chan_is_cyclic(atchan))
-		return atc_handle_cyclic(atchan);
+	if (desc) {
+		if (pending & AT_DMA_ERR(i)) {
+			atc_handle_error(atchan, i);
+			/* Pretend the descriptor completed successfully */
+		}
 
-	atc_advance_work(atchan);
+		if (atc_chan_is_cyclic(atchan)) {
+			vchan_cyclic_callback(&desc->vd);
+		} else {
+			vchan_cookie_complete(&desc->vd);
+			atchan->desc = NULL;
+			if (!(atc_chan_is_enabled(atchan)))
+				atc_dostart(atchan);
+		}
+	}
+	spin_unlock(&atchan->vc.lock);
 }
 
 static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
@@ -1073,17 +832,10 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
 
 		for (i = 0; i < atdma->dma_device.chancnt; i++) {
 			atchan = &atdma->chan[i];
-			if (pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))) {
-				if (pending & AT_DMA_ERR(i)) {
-					/* Disable channel on AHB error */
-					dma_writel(atdma, CHDR,
-						AT_DMA_RES(i) | atchan->mask);
-					/* Give information to tasklet */
-					set_bit(ATC_IS_ERROR, &atchan->status);
-				}
-				tasklet_schedule(&atchan->tasklet);
-				ret = IRQ_HANDLED;
-			}
+			if (!(pending & (AT_DMA_BTC(i) | AT_DMA_ERR(i))))
+				continue;
+			atdma_handle_chan_done(atchan, pending, i);
+			ret = IRQ_HANDLED;
 		}
 
 	} while (pending);
@@ -1091,35 +843,7 @@ static irqreturn_t at_dma_interrupt(int irq, void *dev_id)
 	return ret;
 }
 
-
 /*--  DMA Engine API  --------------------------------------------------*/
-
-/**
- * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
- * @tx: descriptor at the head of the transaction chain
- *
- * Queue chain if DMA engine is working already
- *
- * Cookie increment and adding to active_list or queue must be atomic
- */
-static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-	struct at_desc		*desc = txd_to_at_desc(tx);
-	struct at_dma_chan	*atchan = to_at_dma_chan(tx->chan);
-	dma_cookie_t		cookie;
-	unsigned long		flags;
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	cookie = dma_cookie_assign(tx);
-
-	list_add_tail(&desc->desc_node, &atchan->queue);
-	spin_unlock_irqrestore(&atchan->lock, flags);
-
-	dev_vdbg(chan2dev(tx->chan), "tx_submit: queued %u\n",
-		 desc->txd.cookie);
-	return cookie;
-}
-
 /**
  * atc_prep_dma_interleaved - prepare memory to memory interleaved operation
  * @chan: the channel to prepare operation on
@@ -1131,9 +855,12 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 			 struct dma_interleaved_template *xt,
 			 unsigned long flags)
 {
+	struct at_dma		*atdma = to_at_dma(chan->device);
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct data_chunk	*first;
-	struct at_desc		*desc = NULL;
+	struct atdma_sg		*atdma_sg;
+	struct at_desc		*desc;
+	struct at_lli		*lli;
 	size_t			xfer_count;
 	unsigned int		dwidth;
 	u32			ctrla;
@@ -1172,8 +899,7 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 		len += chunk->size;
 	}
 
-	dwidth = atc_get_xfer_width(xt->src_start,
-				    xt->dst_start, len);
+	dwidth = atc_get_xfer_width(xt->src_start, xt->dst_start, len);
 
 	xfer_count = len >> dwidth;
 	if (xfer_count > ATC_BTSIZE_MAX) {
@@ -1190,32 +916,34 @@ atc_prep_dma_interleaved(struct dma_chan *chan,
 		ATC_SRC_PIP | ATC_DST_PIP |
 		FIELD_PREP(ATC_FC, ATC_FC_MEM2MEM);
 
-	/* create the transfer */
-	desc = atc_desc_get(atchan);
-	if (!desc) {
-		dev_err(chan2dev(chan),
-			"%s: couldn't allocate our descriptor\n", __func__);
+	desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC);
+	if (!desc)
+		return NULL;
+	desc->sglen = 1;
+
+	atdma_sg = desc->sg;
+	atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT,
+				       &atdma_sg->lli_phys);
+	if (!atdma_sg->lli) {
+		kfree(desc);
 		return NULL;
 	}
+	lli = atdma_sg->lli;
 
-	desc->lli.saddr = xt->src_start;
-	desc->lli.daddr = xt->dst_start;
-	desc->lli.ctrla = ctrla | xfer_count;
-	desc->lli.ctrlb = ctrlb;
+	lli->saddr = xt->src_start;
+	lli->daddr = xt->dst_start;
+	lli->ctrla = ctrla | xfer_count;
+	lli->ctrlb = ctrlb;
 
 	desc->boundary = first->size >> dwidth;
 	desc->dst_hole = (dmaengine_get_dst_icg(xt, first) >> dwidth) + 1;
 	desc->src_hole = (dmaengine_get_src_icg(xt, first) >> dwidth) + 1;
 
-	desc->txd.cookie = -EBUSY;
-	desc->total_len = desc->len = len;
-
-	/* set end-of-link to the last link descriptor of list*/
-	set_desc_eol(desc);
-
-	desc->txd.flags = flags; /* client is in control of this ack */
+	atdma_sg->len = len;
+	desc->total_len = len;
 
-	return &desc->txd;
+	set_lli_eol(desc, 0);
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 }
 
 /**
@@ -1230,25 +958,32 @@ static struct dma_async_tx_descriptor *
 atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 		size_t len, unsigned long flags)
 {
+	struct at_dma		*atdma = to_at_dma(chan->device);
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_desc		*desc = NULL;
-	struct at_desc		*first = NULL;
-	struct at_desc		*prev = NULL;
 	size_t			xfer_count;
 	size_t			offset;
+	size_t			sg_len;
 	unsigned int		src_width;
 	unsigned int		dst_width;
+	unsigned int		i;
 	u32			ctrla;
 	u32			ctrlb;
 
-	dev_vdbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
-			&dest, &src, len, flags);
+	dev_dbg(chan2dev(chan), "prep_dma_memcpy: d%pad s%pad l0x%zx f0x%lx\n",
+		&dest, &src, len, flags);
 
 	if (unlikely(!len)) {
-		dev_dbg(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
+		dev_err(chan2dev(chan), "prep_dma_memcpy: length is zero!\n");
 		return NULL;
 	}
 
+	sg_len = DIV_ROUND_UP(len, ATC_BTSIZE_MAX);
+	desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC);
+	if (!desc)
+		return NULL;
+	desc->sglen = sg_len;
+
 	ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
 		FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_INCR) |
 		FIELD_PREP(ATC_DST_ADDR_MODE, ATC_DST_ADDR_MODE_INCR) |
@@ -1263,50 +998,49 @@ atc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
 	ctrla = FIELD_PREP(ATC_SRC_WIDTH, src_width) |
 		FIELD_PREP(ATC_DST_WIDTH, dst_width);
 
-	for (offset = 0; offset < len; offset += xfer_count << src_width) {
-		xfer_count = min_t(size_t, (len - offset) >> src_width,
-				ATC_BTSIZE_MAX);
+	for (offset = 0, i = 0; offset < len;
+	     offset += xfer_count << src_width, i++) {
+		struct atdma_sg *atdma_sg = &desc->sg[i];
+		struct at_lli *lli;
 
-		desc = atc_desc_get(atchan);
-		if (!desc)
+		atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT,
+					       &atdma_sg->lli_phys);
+		if (!atdma_sg->lli)
 			goto err_desc_get;
+		lli = atdma_sg->lli;
+
+		xfer_count = min_t(size_t, (len - offset) >> src_width,
+				   ATC_BTSIZE_MAX);
 
-		desc->lli.saddr = src + offset;
-		desc->lli.daddr = dest + offset;
-		desc->lli.ctrla = ctrla | xfer_count;
-		desc->lli.ctrlb = ctrlb;
+		lli->saddr = src + offset;
+		lli->daddr = dest + offset;
+		lli->ctrla = ctrla | xfer_count;
+		lli->ctrlb = ctrlb;
 
-		desc->txd.cookie = 0;
-		desc->len = xfer_count << src_width;
+		desc->sg[i].len = xfer_count << src_width;
 
-		atc_desc_chain(&first, &prev, desc);
+		atdma_lli_chain(desc, i);
 	}
 
-	/* First descriptor of the chain embedds additional information */
-	first->txd.cookie = -EBUSY;
-	first->total_len = len;
+	desc->total_len = len;
 
 	/* set end-of-link to the last link descriptor of list*/
-	set_desc_eol(desc);
-
-	first->txd.flags = flags; /* client is in control of this ack */
+	set_lli_eol(desc, i - 1);
 
-	return &first->txd;
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 
 err_desc_get:
-	atc_desc_put(atchan, first);
+	atdma_desc_free(&desc->vd);
 	return NULL;
 }
 
-static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
-					      dma_addr_t psrc,
-					      dma_addr_t pdst,
-					      size_t len)
+static int atdma_create_memset_lli(struct dma_chan *chan,
+				   struct atdma_sg *atdma_sg,
+				   dma_addr_t psrc, dma_addr_t pdst, size_t len)
 {
-	struct at_dma_chan *atchan = to_at_dma_chan(chan);
-	struct at_desc *desc;
+	struct at_dma *atdma = to_at_dma(chan->device);
+	struct at_lli *lli;
 	size_t xfer_count;
-
 	u32 ctrla = FIELD_PREP(ATC_SRC_WIDTH, 2) | FIELD_PREP(ATC_DST_WIDTH, 2);
 	u32 ctrlb = ATC_DEFAULT_CTRLB | ATC_IEN |
 		    FIELD_PREP(ATC_SRC_ADDR_MODE, ATC_SRC_ADDR_MODE_FIXED) |
@@ -1315,27 +1049,24 @@ static struct at_desc *atc_create_memset_desc(struct dma_chan *chan,
 
 	xfer_count = len >> 2;
 	if (xfer_count > ATC_BTSIZE_MAX) {
-		dev_err(chan2dev(chan), "%s: buffer is too big\n",
-			__func__);
-		return NULL;
+		dev_err(chan2dev(chan), "%s: buffer is too big\n", __func__);
+		return -EINVAL;
 	}
 
-	desc = atc_desc_get(atchan);
-	if (!desc) {
-		dev_err(chan2dev(chan), "%s: can't get a descriptor\n",
-			__func__);
-		return NULL;
-	}
+	atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_NOWAIT,
+				       &atdma_sg->lli_phys);
+	if (!atdma_sg->lli)
+		return -ENOMEM;
+	lli = atdma_sg->lli;
 
-	desc->lli.saddr = psrc;
-	desc->lli.daddr = pdst;
-	desc->lli.ctrla = ctrla | xfer_count;
-	desc->lli.ctrlb = ctrlb;
+	lli->saddr = psrc;
+	lli->daddr = pdst;
+	lli->ctrla = ctrla | xfer_count;
+	lli->ctrlb = ctrlb;
 
-	desc->txd.cookie = 0;
-	desc->len = len;
+	atdma_sg->len = len;
 
-	return desc;
+	return 0;
 }
 
 /**
@@ -1350,11 +1081,13 @@ static struct dma_async_tx_descriptor *
 atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
 		    size_t len, unsigned long flags)
 {
+	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
 	struct at_desc		*desc;
 	void __iomem		*vaddr;
 	dma_addr_t		paddr;
 	char			fill_pattern;
+	int			ret;
 
 	dev_vdbg(chan2dev(chan), "%s: d%pad v0x%x l0x%zx f0x%lx\n", __func__,
 		&dest, value, len, flags);
@@ -1385,27 +1118,28 @@ atc_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
 		       (fill_pattern << 8) |
 		       fill_pattern;
 
-	desc = atc_create_memset_desc(chan, paddr, dest, len);
-	if (!desc) {
-		dev_err(chan2dev(chan), "%s: couldn't get a descriptor\n",
-			__func__);
+	desc = kzalloc(struct_size(desc, sg, 1), GFP_ATOMIC);
+	if (!desc)
 		goto err_free_buffer;
-	}
+	desc->sglen = 1;
+
+	ret = atdma_create_memset_lli(chan, desc->sg, paddr, dest, len);
+	if (ret)
+		goto err_free_desc;
 
 	desc->memset_paddr = paddr;
 	desc->memset_vaddr = vaddr;
 	desc->memset_buffer = true;
 
-	desc->txd.cookie = -EBUSY;
 	desc->total_len = len;
 
 	/* set end-of-link on the descriptor */
-	set_desc_eol(desc);
-
-	desc->txd.flags = flags;
+	set_lli_eol(desc, 0);
 
-	return &desc->txd;
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 
+err_free_desc:
+	kfree(desc);
 err_free_buffer:
 	dma_pool_free(atdma->memset_pool, vaddr, paddr);
 	return NULL;
@@ -1419,12 +1153,13 @@ atc_prep_dma_memset_sg(struct dma_chan *chan,
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	struct at_desc		*desc = NULL, *first = NULL, *prev = NULL;
+	struct at_desc		*desc;
 	struct scatterlist	*sg;
 	void __iomem		*vaddr;
 	dma_addr_t		paddr;
 	size_t			total_len = 0;
 	int			i;
+	int			ret;
 
 	dev_vdbg(chan2dev(chan), "%s: v0x%x l0x%zx f0x%lx\n", __func__,
 		 value, sg_len, flags);
@@ -1443,6 +1178,11 @@ atc_prep_dma_memset_sg(struct dma_chan *chan,
 	}
 	*(u32*)vaddr = value;
 
+	desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC);
+	if (!desc)
+		goto err_free_dma_buf;
+	desc->sglen = sg_len;
+
 	for_each_sg(sgl, sg, sg_len, i) {
 		dma_addr_t dest = sg_dma_address(sg);
 		size_t len = sg_dma_len(sg);
@@ -1453,38 +1193,33 @@ atc_prep_dma_memset_sg(struct dma_chan *chan,
 		if (!is_dma_fill_aligned(chan->device, dest, 0, len)) {
 			dev_err(chan2dev(chan), "%s: buffer is not aligned\n",
 				__func__);
-			goto err_put_desc;
+			goto err_free_desc;
 		}
 
-		desc = atc_create_memset_desc(chan, paddr, dest, len);
-		if (!desc)
-			goto err_put_desc;
-
-		atc_desc_chain(&first, &prev, desc);
+		ret = atdma_create_memset_lli(chan, &desc->sg[i], paddr, dest,
+					      len);
+		if (ret)
+			goto err_free_desc;
 
+		atdma_lli_chain(desc, i);
 		total_len += len;
 	}
 
-	/*
-	 * Only set the buffer pointers on the last descriptor to
-	 * avoid free'ing while we have our transfer still going
-	 */
 	desc->memset_paddr = paddr;
 	desc->memset_vaddr = vaddr;
 	desc->memset_buffer = true;
 
-	first->txd.cookie = -EBUSY;
-	first->total_len = total_len;
+	desc->total_len = total_len;
 
 	/* set end-of-link on the descriptor */
-	set_desc_eol(desc);
-
-	first->txd.flags = flags;
+	set_lli_eol(desc, i - 1);
 
-	return &first->txd;
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 
-err_put_desc:
-	atc_desc_put(atchan, first);
+err_free_desc:
+	atdma_desc_free(&desc->vd);
+err_free_dma_buf:
+	dma_pool_free(atdma->memset_pool, vaddr, paddr);
 	return NULL;
 }
 
@@ -1502,11 +1237,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		unsigned int sg_len, enum dma_transfer_direction direction,
 		unsigned long flags, void *context)
 {
+	struct at_dma		*atdma = to_at_dma(chan->device);
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
 	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
-	struct at_desc		*first = NULL;
-	struct at_desc		*prev = NULL;
+	struct at_desc		*desc;
 	u32			ctrla;
 	u32			ctrlb;
 	dma_addr_t		reg;
@@ -1526,6 +1261,11 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 		return NULL;
 	}
 
+	desc = kzalloc(struct_size(desc, sg, sg_len), GFP_ATOMIC);
+	if (!desc)
+		return NULL;
+	desc->sglen = sg_len;
+
 	ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) |
 		FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst);
 	ctrlb = ATC_IEN;
@@ -1542,13 +1282,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			 FIELD_PREP(ATC_DIF, atchan->per_if);
 		reg = sconfig->dst_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
-			struct at_desc	*desc;
+			struct atdma_sg *atdma_sg = &desc->sg[i];
+			struct at_lli *lli;
 			u32		len;
 			u32		mem;
 
-			desc = atc_desc_get(atchan);
-			if (!desc)
+			atdma_sg->lli = dma_pool_alloc(atdma->lli_pool,
+						       GFP_NOWAIT,
+						       &atdma_sg->lli_phys);
+			if (!atdma_sg->lli)
 				goto err_desc_get;
+			lli = atdma_sg->lli;
 
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
@@ -1561,16 +1305,18 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (unlikely(mem & 3 || len & 3))
 				mem_width = 0;
 
-			desc->lli.saddr = mem;
-			desc->lli.daddr = reg;
-			desc->lli.ctrla = ctrla |
-					  FIELD_PREP(ATC_SRC_WIDTH, mem_width) |
-					  len >> mem_width;
-			desc->lli.ctrlb = ctrlb;
-			desc->len = len;
+			lli->saddr = mem;
+			lli->daddr = reg;
+			lli->ctrla = ctrla |
+				     FIELD_PREP(ATC_SRC_WIDTH, mem_width) |
+				     len >> mem_width;
+			lli->ctrlb = ctrlb;
 
-			atc_desc_chain(&first, &prev, desc);
+			atdma_sg->len = len;
 			total_len += len;
+
+			desc->sg[i].len = len;
+			atdma_lli_chain(desc, i);
 		}
 		break;
 	case DMA_DEV_TO_MEM:
@@ -1585,13 +1331,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 
 		reg = sconfig->src_addr;
 		for_each_sg(sgl, sg, sg_len, i) {
-			struct at_desc	*desc;
+			struct atdma_sg *atdma_sg = &desc->sg[i];
+			struct at_lli *lli;
 			u32		len;
 			u32		mem;
 
-			desc = atc_desc_get(atchan);
-			if (!desc)
+			atdma_sg->lli = dma_pool_alloc(atdma->lli_pool,
+						       GFP_NOWAIT,
+						       &atdma_sg->lli_phys);
+			if (!atdma_sg->lli)
 				goto err_desc_get;
+			lli = atdma_sg->lli;
 
 			mem = sg_dma_address(sg);
 			len = sg_dma_len(sg);
@@ -1604,16 +1354,17 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 			if (unlikely(mem & 3 || len & 3))
 				mem_width = 0;
 
-			desc->lli.saddr = reg;
-			desc->lli.daddr = mem;
-			desc->lli.ctrla = ctrla |
-					  FIELD_PREP(ATC_DST_WIDTH, mem_width) |
-					  len >> reg_width;
-			desc->lli.ctrlb = ctrlb;
-			desc->len = len;
+			lli->saddr = reg;
+			lli->daddr = mem;
+			lli->ctrla = ctrla |
+				     FIELD_PREP(ATC_DST_WIDTH, mem_width) |
+				     len >> reg_width;
+			lli->ctrlb = ctrlb;
 
-			atc_desc_chain(&first, &prev, desc);
+			desc->sg[i].len = len;
 			total_len += len;
+
+			atdma_lli_chain(desc, i);
 		}
 		break;
 	default:
@@ -1621,21 +1372,16 @@ atc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
 	}
 
 	/* set end-of-link to the last link descriptor of list*/
-	set_desc_eol(prev);
-
-	/* First descriptor of the chain embedds additional information */
-	first->txd.cookie = -EBUSY;
-	first->total_len = total_len;
+	set_lli_eol(desc, i - 1);
 
-	/* first link descriptor of list is responsible of flags */
-	first->txd.flags = flags; /* client is in control of this ack */
+	desc->total_len = total_len;
 
-	return &first->txd;
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 
 err_desc_get:
 	dev_err(chan2dev(chan), "not enough descriptors available\n");
 err:
-	atc_desc_put(atchan, first);
+	atdma_desc_free(&desc->vd);
 	return NULL;
 }
 
@@ -1665,54 +1411,59 @@ err_out:
  */
 static int
 atc_dma_cyclic_fill_desc(struct dma_chan *chan, struct at_desc *desc,
-		unsigned int period_index, dma_addr_t buf_addr,
+		unsigned int i, dma_addr_t buf_addr,
 		unsigned int reg_width, size_t period_len,
 		enum dma_transfer_direction direction)
 {
+	struct at_dma		*atdma = to_at_dma(chan->device);
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
-	u32			ctrla;
+	struct atdma_sg		*atdma_sg = &desc->sg[i];
+	struct at_lli		*lli;
 
-	/* prepare common CRTLA value */
-	ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) |
-		FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) |
-		FIELD_PREP(ATC_DST_WIDTH, reg_width) |
-		FIELD_PREP(ATC_SRC_WIDTH, reg_width) |
-		period_len >> reg_width;
+	atdma_sg->lli = dma_pool_alloc(atdma->lli_pool, GFP_ATOMIC,
+				       &atdma_sg->lli_phys);
+	if (!atdma_sg->lli)
+		return -ENOMEM;
+	lli = atdma_sg->lli;
 
 	switch (direction) {
 	case DMA_MEM_TO_DEV:
-		desc->lli.saddr = buf_addr + (period_len * period_index);
-		desc->lli.daddr = sconfig->dst_addr;
-		desc->lli.ctrla = ctrla;
-		desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
-					     ATC_DST_ADDR_MODE_FIXED) |
-				  FIELD_PREP(ATC_SRC_ADDR_MODE,
-					     ATC_SRC_ADDR_MODE_INCR) |
-				  FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) |
-				  FIELD_PREP(ATC_SIF, atchan->mem_if) |
-				  FIELD_PREP(ATC_DIF, atchan->per_if);
-		desc->len = period_len;
+		lli->saddr = buf_addr + (period_len * i);
+		lli->daddr = sconfig->dst_addr;
+		lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
+					ATC_DST_ADDR_MODE_FIXED) |
+			     FIELD_PREP(ATC_SRC_ADDR_MODE,
+					ATC_SRC_ADDR_MODE_INCR) |
+			     FIELD_PREP(ATC_FC, ATC_FC_MEM2PER) |
+			     FIELD_PREP(ATC_SIF, atchan->mem_if) |
+			     FIELD_PREP(ATC_DIF, atchan->per_if);
+
 		break;
 
 	case DMA_DEV_TO_MEM:
-		desc->lli.saddr = sconfig->src_addr;
-		desc->lli.daddr = buf_addr + (period_len * period_index);
-		desc->lli.ctrla = ctrla;
-		desc->lli.ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
-					     ATC_DST_ADDR_MODE_INCR) |
-				  FIELD_PREP(ATC_SRC_ADDR_MODE,
-					     ATC_SRC_ADDR_MODE_FIXED) |
-				  FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) |
-				  FIELD_PREP(ATC_SIF, atchan->per_if) |
-				  FIELD_PREP(ATC_DIF, atchan->mem_if);
-		desc->len = period_len;
+		lli->saddr = sconfig->src_addr;
+		lli->daddr = buf_addr + (period_len * i);
+		lli->ctrlb = FIELD_PREP(ATC_DST_ADDR_MODE,
+					ATC_DST_ADDR_MODE_INCR) |
+			     FIELD_PREP(ATC_SRC_ADDR_MODE,
+					ATC_SRC_ADDR_MODE_FIXED) |
+			     FIELD_PREP(ATC_FC, ATC_FC_PER2MEM) |
+			     FIELD_PREP(ATC_SIF, atchan->per_if) |
+			     FIELD_PREP(ATC_DIF, atchan->mem_if);
 		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	lli->ctrla = FIELD_PREP(ATC_SCSIZE, sconfig->src_maxburst) |
+		     FIELD_PREP(ATC_DCSIZE, sconfig->dst_maxburst) |
+		     FIELD_PREP(ATC_DST_WIDTH, reg_width) |
+		     FIELD_PREP(ATC_SRC_WIDTH, reg_width) |
+		     period_len >> reg_width;
+	desc->sg[i].len = period_len;
+
 	return 0;
 }
 
@@ -1733,8 +1484,7 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma_slave	*atslave = chan->private;
 	struct dma_slave_config	*sconfig = &atchan->dma_sconfig;
-	struct at_desc		*first = NULL;
-	struct at_desc		*prev = NULL;
+	struct at_desc		*desc;
 	unsigned long		was_cyclic;
 	unsigned int		reg_width;
 	unsigned int		periods = buf_len / period_len;
@@ -1768,33 +1518,26 @@ atc_prep_dma_cyclic(struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 	if (atc_dma_cyclic_check_values(reg_width, buf_addr, period_len))
 		goto err_out;
 
+	desc = kzalloc(struct_size(desc, sg, periods), GFP_ATOMIC);
+	if (!desc)
+		goto err_out;
+	desc->sglen = periods;
+
 	/* build cyclic linked list */
 	for (i = 0; i < periods; i++) {
-		struct at_desc	*desc;
-
-		desc = atc_desc_get(atchan);
-		if (!desc)
-			goto err_desc_get;
-
 		if (atc_dma_cyclic_fill_desc(chan, desc, i, buf_addr,
 					     reg_width, period_len, direction))
-			goto err_desc_get;
-
-		atc_desc_chain(&first, &prev, desc);
+			goto err_fill_desc;
+		atdma_lli_chain(desc, i);
 	}
-
+	desc->total_len = buf_len;
 	/* lets make a cyclic list */
-	prev->lli.dscr = first->txd.phys;
-
-	/* First descriptor of the chain embedds additional information */
-	first->txd.cookie = -EBUSY;
-	first->total_len = buf_len;
+	desc->sg[i - 1].lli->dscr = desc->sg[0].lli_phys;
 
-	return &first->txd;
+	return vchan_tx_prep(&atchan->vc, &desc->vd, flags);
 
-err_desc_get:
-	dev_err(chan2dev(chan), "not enough descriptors available\n");
-	atc_desc_put(atchan, first);
+err_fill_desc:
+	atdma_desc_free(&desc->vd);
 err_out:
 	clear_bit(ATC_IS_CYCLIC, &atchan->status);
 	return NULL;
@@ -1823,17 +1566,17 @@ static int atc_pause(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->dma_chan.chan_id;
+	int			chan_id = atchan->vc.chan.chan_id;
 	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
-	spin_lock_irqsave(&atchan->lock, flags);
+	spin_lock_irqsave(&atchan->vc.lock, flags);
 
 	dma_writel(atdma, CHER, AT_DMA_SUSP(chan_id));
 	set_bit(ATC_IS_PAUSED, &atchan->status);
 
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	spin_unlock_irqrestore(&atchan->vc.lock, flags);
 
 	return 0;
 }
@@ -1842,7 +1585,7 @@ static int atc_resume(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->dma_chan.chan_id;
+	int			chan_id = atchan->vc.chan.chan_id;
 	unsigned long		flags;
 
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
@@ -1850,12 +1593,12 @@ static int atc_resume(struct dma_chan *chan)
 	if (!atc_chan_is_paused(atchan))
 		return 0;
 
-	spin_lock_irqsave(&atchan->lock, flags);
+	spin_lock_irqsave(&atchan->vc.lock, flags);
 
 	dma_writel(atdma, CHDR, AT_DMA_RES(chan_id));
 	clear_bit(ATC_IS_PAUSED, &atchan->status);
 
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	spin_unlock_irqrestore(&atchan->vc.lock, flags);
 
 	return 0;
 }
@@ -1864,9 +1607,11 @@ static int atc_terminate_all(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	int			chan_id = atchan->dma_chan.chan_id;
+	int			chan_id = atchan->vc.chan.chan_id;
 	unsigned long		flags;
 
+	LIST_HEAD(list);
+
 	dev_vdbg(chan2dev(chan), "%s\n", __func__);
 
 	/*
@@ -1875,7 +1620,7 @@ static int atc_terminate_all(struct dma_chan *chan)
 	 * channel. We still have to poll the channel enable bit due
 	 * to AHB/HSB limitations.
 	 */
-	spin_lock_irqsave(&atchan->lock, flags);
+	spin_lock_irqsave(&atchan->vc.lock, flags);
 
 	/* disabling channel: must also remove suspend state */
 	dma_writel(atdma, CHDR, AT_DMA_RES(chan_id) | atchan->mask);
@@ -1884,15 +1629,20 @@ static int atc_terminate_all(struct dma_chan *chan)
 	while (dma_readl(atdma, CHSR) & atchan->mask)
 		cpu_relax();
 
-	/* active_list entries will end up before queued entries */
-	list_splice_tail_init(&atchan->queue, &atchan->free_list);
-	list_splice_tail_init(&atchan->active_list, &atchan->free_list);
+	if (atchan->desc) {
+		vchan_terminate_vdesc(&atchan->desc->vd);
+		atchan->desc = NULL;
+	}
+
+	vchan_get_all_descriptors(&atchan->vc, &list);
 
 	clear_bit(ATC_IS_PAUSED, &atchan->status);
 	/* if channel dedicated to cyclic operations, free it */
 	clear_bit(ATC_IS_CYCLIC, &atchan->status);
 
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	spin_unlock_irqrestore(&atchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&atchan->vc, &list);
 
 	return 0;
 }
@@ -1922,9 +1672,10 @@ atc_tx_status(struct dma_chan *chan,
 	if (dma_status == DMA_COMPLETE || !txstate)
 		return dma_status;
 
-	spin_lock_irqsave(&atchan->lock, flags);
+	spin_lock_irqsave(&atchan->vc.lock, flags);
+	/*  Get number of bytes left in the active transactions */
 	ret = atc_get_residue(chan, cookie, &residue);
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	spin_unlock_irqrestore(&atchan->vc.lock, flags);
 
 	if (unlikely(ret < 0)) {
 		dev_vdbg(chan2dev(chan), "get residual bytes error\n");
@@ -1939,27 +1690,17 @@ atc_tx_status(struct dma_chan *chan,
 	return dma_status;
 }
 
-/**
- * atc_issue_pending - takes the first transaction descriptor in the pending
- * queue and starts the transfer.
- * @chan: target DMA channel
- */
 static void atc_issue_pending(struct dma_chan *chan)
 {
 	struct at_dma_chan *atchan = to_at_dma_chan(chan);
-	struct at_desc *desc;
 	unsigned long flags;
 
-	dev_vdbg(chan2dev(chan), "issue_pending\n");
-
-	spin_lock_irqsave(&atchan->lock, flags);
-	if (atc_chan_is_enabled(atchan) || list_empty(&atchan->queue))
-		return spin_unlock_irqrestore(&atchan->lock, flags);
-
-	desc = atc_first_queued(atchan);
-	list_move_tail(&desc->desc_node, &atchan->active_list);
-	atc_dostart(atchan, desc);
-	spin_unlock_irqrestore(&atchan->lock, flags);
+	spin_lock_irqsave(&atchan->vc.lock, flags);
+	if (vchan_issue_pending(&atchan->vc) && !atchan->desc) {
+		if (!(atc_chan_is_enabled(atchan)))
+			atc_dostart(atchan);
+	}
+	spin_unlock_irqrestore(&atchan->vc.lock, flags);
 }
 
 /**
@@ -1972,9 +1713,7 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
 	struct at_dma		*atdma = to_at_dma(chan->device);
-	struct at_desc		*desc;
 	struct at_dma_slave	*atslave;
-	int			i;
 	u32			cfg;
 
 	dev_vdbg(chan2dev(chan), "alloc_chan_resources\n");
@@ -1985,11 +1724,6 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 		return -EIO;
 	}
 
-	if (!list_empty(&atchan->free_list)) {
-		dev_dbg(chan2dev(chan), "can't allocate channel resources (channel not freed from a previous use)\n");
-		return -EIO;
-	}
-
 	cfg = ATC_DEFAULT_CFG;
 
 	atslave = chan->private;
@@ -2005,26 +1739,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 			cfg = atslave->cfg;
 	}
 
-	/* Allocate initial pool of descriptors */
-	for (i = 0; i < init_nr_desc_per_channel; i++) {
-		desc = atc_alloc_descriptor(chan, GFP_KERNEL);
-		if (!desc) {
-			dev_err(atdma->dma_device.dev,
-				"Only %d initial descriptors\n", i);
-			break;
-		}
-		list_add_tail(&desc->desc_node, &atchan->free_list);
-	}
-
-	dma_cookie_init(chan);
-
 	/* channel parameters */
 	channel_writel(atchan, CFG, cfg);
 
-	dev_dbg(chan2dev(chan),
-		"alloc_chan_resources: allocated %d descriptors\n", i);
-
-	return i;
+	return 0;
 }
 
 /**
@@ -2034,22 +1752,10 @@ static int atc_alloc_chan_resources(struct dma_chan *chan)
 static void atc_free_chan_resources(struct dma_chan *chan)
 {
 	struct at_dma_chan	*atchan = to_at_dma_chan(chan);
-	struct at_dma		*atdma = to_at_dma(chan->device);
-	struct at_desc		*desc, *_desc;
-	LIST_HEAD(list);
 
-	/* ASSERT:  channel is idle */
-	BUG_ON(!list_empty(&atchan->active_list));
-	BUG_ON(!list_empty(&atchan->queue));
 	BUG_ON(atc_chan_is_enabled(atchan));
 
-	list_for_each_entry_safe(desc, _desc, &atchan->free_list, desc_node) {
-		dev_vdbg(chan2dev(chan), "  freeing descriptor %p\n", desc);
-		list_del(&desc->desc_node);
-		/* free link descriptor */
-		dma_pool_free(atdma->dma_desc_pool, desc, desc->txd.phys);
-	}
-	list_splice_init(&atchan->free_list, &list);
+	vchan_free_chan_resources(to_virt_chan(chan));
 	atchan->status = 0;
 
 	/*
@@ -2274,11 +1980,11 @@ static int __init at_dma_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, atdma);
 
 	/* create a pool of consistent memory blocks for hardware descriptors */
-	atdma->dma_desc_pool = dma_pool_create("at_hdmac_desc_pool",
-			&pdev->dev, sizeof(struct at_desc),
-			4 /* word alignment */, 0);
-	if (!atdma->dma_desc_pool) {
-		dev_err(&pdev->dev, "No memory for descriptors dma pool\n");
+	atdma->lli_pool = dma_pool_create("at_hdmac_lli_pool",
+					  &pdev->dev, sizeof(struct at_lli),
+					  4 /* word alignment */, 0);
+	if (!atdma->lli_pool) {
+		dev_err(&pdev->dev, "Unable to allocate DMA LLI descriptor pool\n");
 		err = -ENOMEM;
 		goto err_desc_pool_create;
 	}
@@ -2303,20 +2009,13 @@ static int __init at_dma_probe(struct platform_device *pdev)
 
 		atchan->mem_if = AT_DMA_MEM_IF;
 		atchan->per_if = AT_DMA_PER_IF;
-		atchan->dma_chan.device = &atdma->dma_device;
-		dma_cookie_init(&atchan->dma_chan);
-		list_add_tail(&atchan->dma_chan.device_node,
-				&atdma->dma_device.channels);
 
 		atchan->ch_regs = atdma->regs + ch_regs(i);
-		spin_lock_init(&atchan->lock);
 		atchan->mask = 1 << i;
 
-		INIT_LIST_HEAD(&atchan->active_list);
-		INIT_LIST_HEAD(&atchan->queue);
-		INIT_LIST_HEAD(&atchan->free_list);
-
-		tasklet_setup(&atchan->tasklet, atc_tasklet);
+		atchan->atdma = atdma;
+		atchan->vc.desc_free = atdma_desc_free;
+		vchan_init(&atchan->vc, &atdma->dma_device);
 		atc_enable_chan_irq(atdma, i);
 	}
 
@@ -2390,7 +2089,7 @@ err_of_dma_controller_register:
 err_dma_async_device_register:
 	dma_pool_destroy(atdma->memset_pool);
 err_memset_pool_create:
-	dma_pool_destroy(atdma->dma_desc_pool);
+	dma_pool_destroy(atdma->lli_pool);
 err_desc_pool_create:
 	free_irq(platform_get_irq(pdev, 0), atdma);
 err_irq:
@@ -2409,17 +2108,13 @@ static int at_dma_remove(struct platform_device *pdev)
 	dma_async_device_unregister(&atdma->dma_device);
 
 	dma_pool_destroy(atdma->memset_pool);
-	dma_pool_destroy(atdma->dma_desc_pool);
+	dma_pool_destroy(atdma->lli_pool);
 	free_irq(platform_get_irq(pdev, 0), atdma);
 
 	list_for_each_entry_safe(chan, _chan, &atdma->dma_device.channels,
 			device_node) {
-		struct at_dma_chan	*atchan = to_at_dma_chan(chan);
-
 		/* Disable interrupts */
 		atc_disable_chan_irq(atdma, chan->chan_id);
-
-		tasklet_kill(&atchan->tasklet);
 		list_del(&chan->device_node);
 	}
 
@@ -2453,7 +2148,7 @@ static int at_dma_prepare(struct device *dev)
 
 static void atc_suspend_cyclic(struct at_dma_chan *atchan)
 {
-	struct dma_chan	*chan = &atchan->dma_chan;
+	struct dma_chan	*chan = &atchan->vc.chan;
 
 	/* Channel should be paused by user
 	 * do it anyway even if it is not done already */
@@ -2494,7 +2189,7 @@ static int at_dma_suspend_noirq(struct device *dev)
 
 static void atc_resume_cyclic(struct at_dma_chan *atchan)
 {
-	struct at_dma	*atdma = to_at_dma(atchan->dma_chan.device);
+	struct at_dma	*atdma = to_at_dma(atchan->vc.chan.device);
 
 	/* restore channel status for cyclic descriptors list:
 	 * next descriptor in the cyclic list at the time of suspend */
@@ -2568,5 +2263,6 @@ module_exit(at_dma_exit);
 
 MODULE_DESCRIPTION("Atmel AHB DMA Controller driver");
 MODULE_AUTHOR("Nicolas Ferre <nicolas.ferre@atmel.com>");
+MODULE_AUTHOR("Tudor Ambarus <tudor.ambarus@microchip.com>");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:at_hdmac");
-- 
GitLab


From 00fb05ff87bc63a3e9000e3f7c15c86951aca76d Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 9 Nov 2022 21:05:54 +0100
Subject: [PATCH 1388/4122] usb: fotg2: add Gemini-specific handling

The Cortina Systems Gemini has bolted on a PHY inside the
silicon that can be handled by six bits in a MISC register in
the system controller.

If we are running on Gemini, look up a syscon regmap through
a phandle and enable VBUS and optionally the Mini-B connector.

If the device is flagged as "wakeup-source" using the standard
DT bindings, we also enable this in the global controller for
respective port.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221109200554.1957185-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/Kconfig        |  1 +
 drivers/usb/fotg210/fotg210-core.c | 80 ++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)

diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig
index 933c513b5728e..534206ee0d1db 100644
--- a/drivers/usb/fotg210/Kconfig
+++ b/drivers/usb/fotg210/Kconfig
@@ -5,6 +5,7 @@ config USB_FOTG210
 	depends on USB || USB_GADGET
 	depends on HAS_DMA && HAS_IOMEM
 	default ARCH_GEMINI
+	select MFD_SYSCON
 	help
 	  Faraday FOTG210 is a dual-mode USB controller that can act
 	  in both host controller and peripheral controller mode.
diff --git a/drivers/usb/fotg210/fotg210-core.c b/drivers/usb/fotg210/fotg210-core.c
index 3d07ee46f6d1d..8a54edf921ac2 100644
--- a/drivers/usb/fotg210/fotg210-core.c
+++ b/drivers/usb/fotg210/fotg210-core.c
@@ -5,15 +5,86 @@
  * whether to proceed with probing the host or the peripheral
  * driver.
  */
+#include <linux/bitops.h>
 #include <linux/device.h>
+#include <linux/mfd/syscon.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/regmap.h>
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
 
 #include "fotg210.h"
 
+/*
+ * Gemini-specific initialization function, only executed on the
+ * Gemini SoC using the global misc control register.
+ *
+ * The gemini USB blocks are connected to either Mini-A (host mode) or
+ * Mini-B (peripheral mode) plugs. There is no role switch support on the
+ * Gemini SoC, just either-or.
+ */
+#define GEMINI_GLOBAL_MISC_CTRL		0x30
+#define GEMINI_MISC_USB0_WAKEUP		BIT(14)
+#define GEMINI_MISC_USB1_WAKEUP		BIT(15)
+#define GEMINI_MISC_USB0_VBUS_ON	BIT(22)
+#define GEMINI_MISC_USB1_VBUS_ON	BIT(23)
+#define GEMINI_MISC_USB0_MINI_B		BIT(29)
+#define GEMINI_MISC_USB1_MINI_B		BIT(30)
+
+static int fotg210_gemini_init(struct device *dev, struct resource *res,
+			       enum usb_dr_mode mode)
+{
+	struct device_node *np = dev->of_node;
+	struct regmap *map;
+	bool wakeup;
+	u32 mask, val;
+	int ret;
+
+	map = syscon_regmap_lookup_by_phandle(np, "syscon");
+	if (IS_ERR(map)) {
+		dev_err(dev, "no syscon\n");
+		return PTR_ERR(map);
+	}
+	wakeup = of_property_read_bool(np, "wakeup-source");
+
+	/*
+	 * Figure out if this is USB0 or USB1 by simply checking the
+	 * physical base address.
+	 */
+	mask = 0;
+	if (res->start == 0x69000000) {
+		mask = GEMINI_MISC_USB1_VBUS_ON | GEMINI_MISC_USB1_MINI_B |
+			GEMINI_MISC_USB1_WAKEUP;
+		if (mode == USB_DR_MODE_HOST)
+			val = GEMINI_MISC_USB1_VBUS_ON;
+		else
+			val = GEMINI_MISC_USB1_MINI_B;
+		if (wakeup)
+			val |= GEMINI_MISC_USB1_WAKEUP;
+	} else {
+		mask = GEMINI_MISC_USB0_VBUS_ON | GEMINI_MISC_USB0_MINI_B |
+			GEMINI_MISC_USB0_WAKEUP;
+		if (mode == USB_DR_MODE_HOST)
+			val = GEMINI_MISC_USB0_VBUS_ON;
+		else
+			val = GEMINI_MISC_USB0_MINI_B;
+		if (wakeup)
+			val |= GEMINI_MISC_USB0_WAKEUP;
+	}
+
+	ret = regmap_update_bits(map, GEMINI_GLOBAL_MISC_CTRL, mask, val);
+	if (ret) {
+		dev_err(dev, "failed to initialize Gemini PHY\n");
+		return ret;
+	}
+
+	dev_info(dev, "initialized Gemini PHY in %s mode\n",
+		 (mode == USB_DR_MODE_HOST) ? "host" : "gadget");
+	return 0;
+}
+
 static int fotg210_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -22,6 +93,15 @@ static int fotg210_probe(struct platform_device *pdev)
 
 	mode = usb_get_dr_mode(dev);
 
+	if (of_device_is_compatible(dev->of_node, "cortina,gemini-usb")) {
+		struct resource *res;
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		ret = fotg210_gemini_init(dev, res, mode);
+		if (ret)
+			return ret;
+	}
+
 	if (mode == USB_DR_MODE_PERIPHERAL)
 		ret = fotg210_udc_probe(pdev);
 	else
-- 
GitLab


From 46ed6026ca2181c917c8334a82e3eaf40a6234dd Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 11 Nov 2022 10:03:17 +0100
Subject: [PATCH 1389/4122] usb: fotg210-udc: Fix ages old endianness issues

The code in the FOTG210 driver isn't entirely endianness-agnostic
as reported by the kernel robot sparse testing. This came to
the surface while moving the files around.

The driver is only used on little-endian systems, so this causes
no real-world regression, but it is nice to be strict and have
some compile coverage also on big endian machines, so fix it
up with the right LE accessors.

Fixes: b84a8dee23fd ("usb: gadget: add Faraday fotg210_udc driver")
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/linux-usb/202211110910.0dJ7nZCn-lkp@intel.com/
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221111090317.94228-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 7757aaa11d6f6..3c357ce42d3b1 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -630,10 +630,10 @@ static void fotg210_request_error(struct fotg210_udc *fotg210)
 static void fotg210_set_address(struct fotg210_udc *fotg210,
 				struct usb_ctrlrequest *ctrl)
 {
-	if (ctrl->wValue >= 0x0100) {
+	if (le16_to_cpu(ctrl->wValue) >= 0x0100) {
 		fotg210_request_error(fotg210);
 	} else {
-		fotg210_set_dev_addr(fotg210, ctrl->wValue);
+		fotg210_set_dev_addr(fotg210, le16_to_cpu(ctrl->wValue));
 		fotg210_set_cxdone(fotg210);
 	}
 }
@@ -714,17 +714,17 @@ static void fotg210_get_status(struct fotg210_udc *fotg210,
 
 	switch (ctrl->bRequestType & USB_RECIP_MASK) {
 	case USB_RECIP_DEVICE:
-		fotg210->ep0_data = 1 << USB_DEVICE_SELF_POWERED;
+		fotg210->ep0_data = cpu_to_le16(1 << USB_DEVICE_SELF_POWERED);
 		break;
 	case USB_RECIP_INTERFACE:
-		fotg210->ep0_data = 0;
+		fotg210->ep0_data = cpu_to_le16(0);
 		break;
 	case USB_RECIP_ENDPOINT:
 		epnum = ctrl->wIndex & USB_ENDPOINT_NUMBER_MASK;
 		if (epnum)
 			fotg210->ep0_data =
-				fotg210_is_epnstall(fotg210->ep[epnum])
-				<< USB_ENDPOINT_HALT;
+				cpu_to_le16(fotg210_is_epnstall(fotg210->ep[epnum])
+					    << USB_ENDPOINT_HALT);
 		else
 			fotg210_request_error(fotg210);
 		break;
-- 
GitLab


From 8836402d4b208b2211fc60538ff45d6bb3b73a64 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Thu, 10 Nov 2022 18:54:35 +0100
Subject: [PATCH 1390/4122] usb: Check !irq instead of irq == NO_IRQ

NO_IRQ is a relic from the old days. It is not used anymore in core
functions. By the way, function irq_of_parse_and_map() returns value 0
on error.

In some drivers, NO_IRQ is erroneously used to check the return of
irq_of_parse_and_map().

It is not a real bug today because the only architectures using the
drivers being fixed by this patch define NO_IRQ as 0, but there are
architectures which define NO_IRQ as -1. If one day those
architectures start using the non fixed drivers, there will be a
problem.

Long time ago Linus advocated for not using NO_IRQ, see
https://lkml.org/lkml/2005/11/21/221 . He re-iterated the same view
recently in https://lkml.org/lkml/2022/10/12/622

So test !irq instead of tesing irq == NO_IRQ.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Link: https://lore.kernel.org/r/13feefdf6b240817944e6441e26a8ddc1d81ced1.1668102802.git.christophe.leroy@csgroup.eu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-grlib.c  | 2 +-
 drivers/usb/host/ehci-ppc-of.c | 2 +-
 drivers/usb/host/fhci-hcd.c    | 2 +-
 drivers/usb/host/ohci-ppc-of.c | 2 +-
 drivers/usb/host/uhci-grlib.c  | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/ehci-grlib.c b/drivers/usb/host/ehci-grlib.c
index a2c3b4ec8a8b5..0717f2ccf49d5 100644
--- a/drivers/usb/host/ehci-grlib.c
+++ b/drivers/usb/host/ehci-grlib.c
@@ -99,7 +99,7 @@ static int ehci_hcd_grlib_probe(struct platform_device *op)
 	hcd->rsrc_len = resource_size(&res);
 
 	irq = irq_of_parse_and_map(dn, 0);
-	if (irq == NO_IRQ) {
+	if (!irq) {
 		dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n",
 			__FILE__);
 		rv = -EBUSY;
diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
index 28a19693c19fe..62a0a193798c2 100644
--- a/drivers/usb/host/ehci-ppc-of.c
+++ b/drivers/usb/host/ehci-ppc-of.c
@@ -119,7 +119,7 @@ static int ehci_hcd_ppc_of_probe(struct platform_device *op)
 	hcd->rsrc_len = resource_size(&res);
 
 	irq = irq_of_parse_and_map(dn, 0);
-	if (irq == NO_IRQ) {
+	if (!irq) {
 		dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n",
 			__FILE__);
 		rv = -EBUSY;
diff --git a/drivers/usb/host/fhci-hcd.c b/drivers/usb/host/fhci-hcd.c
index 95a44462bed04..64a64140c2fd2 100644
--- a/drivers/usb/host/fhci-hcd.c
+++ b/drivers/usb/host/fhci-hcd.c
@@ -676,7 +676,7 @@ static int of_fhci_probe(struct platform_device *ofdev)
 
 	/* USB Host interrupt. */
 	usb_irq = irq_of_parse_and_map(node, 0);
-	if (usb_irq == NO_IRQ) {
+	if (!usb_irq) {
 		dev_err(dev, "could not get usb irq\n");
 		ret = -EINVAL;
 		goto err_usb_irq;
diff --git a/drivers/usb/host/ohci-ppc-of.c b/drivers/usb/host/ohci-ppc-of.c
index 591f675cc9306..f2f6c832ec98c 100644
--- a/drivers/usb/host/ohci-ppc-of.c
+++ b/drivers/usb/host/ohci-ppc-of.c
@@ -120,7 +120,7 @@ static int ohci_hcd_ppc_of_probe(struct platform_device *op)
 	}
 
 	irq = irq_of_parse_and_map(dn, 0);
-	if (irq == NO_IRQ) {
+	if (!irq) {
 		dev_err(&op->dev, "%s: irq_of_parse_and_map failed\n",
 			__FILE__);
 		rv = -EBUSY;
diff --git a/drivers/usb/host/uhci-grlib.c b/drivers/usb/host/uhci-grlib.c
index 3ef6d52839e53..907d5f01edfd4 100644
--- a/drivers/usb/host/uhci-grlib.c
+++ b/drivers/usb/host/uhci-grlib.c
@@ -116,7 +116,7 @@ static int uhci_hcd_grlib_probe(struct platform_device *op)
 	hcd->rsrc_len = resource_size(&res);
 
 	irq = irq_of_parse_and_map(dn, 0);
-	if (irq == NO_IRQ) {
+	if (!irq) {
 		printk(KERN_ERR "%s: irq_of_parse_and_map failed\n", __FILE__);
 		rv = -EBUSY;
 		goto err_usb;
-- 
GitLab


From dd65a243a915ca319ed5fee9161a168c836fa2f2 Mon Sep 17 00:00:00 2001
From: Shuah Khan <skhan@linuxfoundation.org>
Date: Thu, 10 Nov 2022 12:47:38 -0700
Subject: [PATCH 1391/4122] usb/usbip: Fix v_recv_cmd_submit() to use PIPE_BULK
 define

Fix v_recv_cmd_submit() to use PIPE_BULK define instead of hard coded
values. This also fixes the following signed integer overflow error
reported by cppcheck. This is not an issue since pipe is unsigned int.
However, this change improves the code to use proper define.

drivers/usb/usbip/vudc_rx.c:152:26: error: Signed integer overflow for expression '3<<30'. [integerOverflow]
 urb_p->urb->pipe &= ~(3 << 30);

In addition, add a build time check for PIPE_BULK != 3 as the code path
depends on PIPE_BULK = 3.

Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221110194738.38514-1-skhan@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usbip/vudc_rx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/usbip/vudc_rx.c b/drivers/usb/usbip/vudc_rx.c
index d4a2f30a75809..51bb70837b902 100644
--- a/drivers/usb/usbip/vudc_rx.c
+++ b/drivers/usb/usbip/vudc_rx.c
@@ -149,7 +149,9 @@ static int v_recv_cmd_submit(struct vudc *udc,
 	urb_p->urb->status = -EINPROGRESS;
 
 	/* FIXME: more pipe setup to please usbip_common */
-	urb_p->urb->pipe &= ~(3 << 30);
+	BUILD_BUG_ON_MSG(PIPE_BULK != 3, "PIPE_* doesn't range from 0 to 3");
+
+	urb_p->urb->pipe &= ~(PIPE_BULK << 30);
 	switch (urb_p->ep->type) {
 	case USB_ENDPOINT_XFER_BULK:
 		urb_p->urb->pipe |= (PIPE_BULK << 30);
-- 
GitLab


From 282a4b71816b6076029017a7bab3a9dcee12a920 Mon Sep 17 00:00:00 2001
From: Eli Billauer <eli.billauer@gmail.com>
Date: Sun, 30 Oct 2022 11:42:09 +0200
Subject: [PATCH 1392/4122] char: xillybus: Prevent use-after-free due to race
 condition

The driver for XillyUSB devices maintains a kref reference count on each
xillyusb_dev structure, which represents a physical device. This reference
count reaches zero when the device has been disconnected and there are no
open file descriptors that are related to the device. When this occurs,
kref_put() calls cleanup_dev(), which clears up the device's data,
including the structure itself.

However, when xillyusb_open() is called, this reference count becomes
tricky: This function needs to obtain the xillyusb_dev structure that
relates to the inode's major and minor (as there can be several such).
xillybus_find_inode() (which is defined in xillybus_class.c) is called
for this purpose. xillybus_find_inode() holds a mutex that is global in
xillybus_class.c to protect the list of devices, and releases this
mutex before returning. As a result, nothing protects the xillyusb_dev's
reference counter from being decremented to zero before xillyusb_open()
increments it on its own behalf. Hence the structure can be freed
due to a rare race condition.

To solve this, a mutex is added. It is locked by xillyusb_open() before
the call to xillybus_find_inode() and is released only after the kref
counter has been incremented on behalf of the newly opened inode. This
protects the kref reference counters of all xillyusb_dev structs from
being decremented by xillyusb_disconnect() during this time segment, as
the call to kref_put() in this function is done with the same lock held.

There is no need to hold the lock on other calls to kref_put(), because
if xillybus_find_inode() finds a struct, xillyusb_disconnect() has not
made the call to remove it, and hence not made its call to kref_put(),
which takes place afterwards. Hence preventing xillyusb_disconnect's
call to kref_put() is enough to ensure that the reference doesn't reach
zero before it's incremented by xillyusb_open().

It would have been more natural to increment the reference count in
xillybus_find_inode() of course, however this function is also called by
Xillybus' driver for PCIe / OF, which registers a completely different
structure. Therefore, xillybus_find_inode() treats these structures as
void pointers, and accordingly can't make any changes.

Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Suggested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
Link: https://lore.kernel.org/r/20221030094209.65916-1-eli.billauer@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/xillybus/xillyusb.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/char/xillybus/xillyusb.c b/drivers/char/xillybus/xillyusb.c
index 39bcbfd908b46..5a5afa14ca8cb 100644
--- a/drivers/char/xillybus/xillyusb.c
+++ b/drivers/char/xillybus/xillyusb.c
@@ -184,6 +184,14 @@ struct xillyusb_dev {
 	struct mutex process_in_mutex; /* synchronize wakeup_all() */
 };
 
+/*
+ * kref_mutex is used in xillyusb_open() to prevent the xillyusb_dev
+ * struct from being freed during the gap between being found by
+ * xillybus_find_inode() and having its reference count incremented.
+ */
+
+static DEFINE_MUTEX(kref_mutex);
+
 /* FPGA to host opcodes */
 enum {
 	OPCODE_DATA = 0,
@@ -1237,9 +1245,16 @@ static int xillyusb_open(struct inode *inode, struct file *filp)
 	int rc;
 	int index;
 
+	mutex_lock(&kref_mutex);
+
 	rc = xillybus_find_inode(inode, (void **)&xdev, &index);
-	if (rc)
+	if (rc) {
+		mutex_unlock(&kref_mutex);
 		return rc;
+	}
+
+	kref_get(&xdev->kref);
+	mutex_unlock(&kref_mutex);
 
 	chan = &xdev->channels[index];
 	filp->private_data = chan;
@@ -1275,8 +1290,6 @@ static int xillyusb_open(struct inode *inode, struct file *filp)
 	    ((filp->f_mode & FMODE_WRITE) && chan->open_for_write))
 		goto unmutex_fail;
 
-	kref_get(&xdev->kref);
-
 	if (filp->f_mode & FMODE_READ)
 		chan->open_for_read = 1;
 
@@ -1413,6 +1426,7 @@ unopen:
 	return rc;
 
 unmutex_fail:
+	kref_put(&xdev->kref, cleanup_dev);
 	mutex_unlock(&chan->lock);
 	return rc;
 }
@@ -2227,7 +2241,9 @@ static void xillyusb_disconnect(struct usb_interface *interface)
 
 	xdev->dev = NULL;
 
+	mutex_lock(&kref_mutex);
 	kref_put(&xdev->kref, cleanup_dev);
+	mutex_unlock(&kref_mutex);
 }
 
 static struct usb_driver xillyusb_driver = {
-- 
GitLab


From 0266a177631d4c6b963b5b12dd986a8c5abdbf06 Mon Sep 17 00:00:00 2001
From: Long Li <longli@microsoft.com>
Date: Thu, 3 Nov 2022 12:16:30 -0700
Subject: [PATCH 1393/4122] RDMA/mana_ib: Add a driver for Microsoft Azure
 Network Adapter

Add a RDMA VF driver for Microsoft Azure Network Adapter (MANA).

Co-developed-by: Ajay Sharma <sharmaajay@microsoft.com>
Signed-off-by: Ajay Sharma <sharmaajay@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Signed-off-by: Long Li <longli@microsoft.com>
Link: https://lore.kernel.org/r/1667502990-2559-13-git-send-email-longli@linuxonhyperv.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 MAINTAINERS                             |   9 +
 drivers/infiniband/Kconfig              |   1 +
 drivers/infiniband/hw/Makefile          |   1 +
 drivers/infiniband/hw/mana/Kconfig      |  10 +
 drivers/infiniband/hw/mana/Makefile     |   4 +
 drivers/infiniband/hw/mana/cq.c         |  79 ++++
 drivers/infiniband/hw/mana/device.c     | 117 ++++++
 drivers/infiniband/hw/mana/main.c       | 521 ++++++++++++++++++++++++
 drivers/infiniband/hw/mana/mana_ib.h    | 162 ++++++++
 drivers/infiniband/hw/mana/mr.c         | 198 +++++++++
 drivers/infiniband/hw/mana/qp.c         | 506 +++++++++++++++++++++++
 drivers/infiniband/hw/mana/wq.c         | 115 ++++++
 include/net/mana/mana.h                 |   3 +
 include/uapi/rdma/ib_user_ioctl_verbs.h |   1 +
 include/uapi/rdma/mana-abi.h            |  66 +++
 15 files changed, 1793 insertions(+)
 create mode 100644 drivers/infiniband/hw/mana/Kconfig
 create mode 100644 drivers/infiniband/hw/mana/Makefile
 create mode 100644 drivers/infiniband/hw/mana/cq.c
 create mode 100644 drivers/infiniband/hw/mana/device.c
 create mode 100644 drivers/infiniband/hw/mana/main.c
 create mode 100644 drivers/infiniband/hw/mana/mana_ib.h
 create mode 100644 drivers/infiniband/hw/mana/mr.c
 create mode 100644 drivers/infiniband/hw/mana/qp.c
 create mode 100644 drivers/infiniband/hw/mana/wq.c
 create mode 100644 include/uapi/rdma/mana-abi.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 441a65d41eb44..4db8e4e02c05f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13669,6 +13669,15 @@ F:	drivers/scsi/smartpqi/smartpqi*.[ch]
 F:	include/linux/cciss*.h
 F:	include/uapi/linux/cciss*.h
 
+MICROSOFT MANA RDMA DRIVER
+M:	Long Li <longli@microsoft.com>
+M:	Ajay Sharma <sharmaajay@microsoft.com>
+L:	linux-rdma@vger.kernel.org
+S:	Supported
+F:	drivers/infiniband/hw/mana/
+F:	include/net/mana
+F:	include/uapi/rdma/mana-abi.h
+
 MICROSOFT SURFACE AGGREGATOR TABLET-MODE SWITCH
 M:	Maximilian Luz <luzmaximilian@gmail.com>
 L:	platform-driver-x86@vger.kernel.org
diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index aa36ac618e729..ccc874478f0b6 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -85,6 +85,7 @@ source "drivers/infiniband/hw/erdma/Kconfig"
 source "drivers/infiniband/hw/hfi1/Kconfig"
 source "drivers/infiniband/hw/hns/Kconfig"
 source "drivers/infiniband/hw/irdma/Kconfig"
+source "drivers/infiniband/hw/mana/Kconfig"
 source "drivers/infiniband/hw/mlx4/Kconfig"
 source "drivers/infiniband/hw/mlx5/Kconfig"
 source "drivers/infiniband/hw/mthca/Kconfig"
diff --git a/drivers/infiniband/hw/Makefile b/drivers/infiniband/hw/Makefile
index 6b3a88046125a..1211f4317a9f4 100644
--- a/drivers/infiniband/hw/Makefile
+++ b/drivers/infiniband/hw/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_INFINIBAND_QIB)		+= qib/
 obj-$(CONFIG_INFINIBAND_CXGB4)		+= cxgb4/
 obj-$(CONFIG_INFINIBAND_EFA)		+= efa/
 obj-$(CONFIG_INFINIBAND_IRDMA)		+= irdma/
+obj-$(CONFIG_MANA_INFINIBAND)		+= mana/
 obj-$(CONFIG_MLX4_INFINIBAND)		+= mlx4/
 obj-$(CONFIG_MLX5_INFINIBAND)		+= mlx5/
 obj-$(CONFIG_INFINIBAND_OCRDMA)		+= ocrdma/
diff --git a/drivers/infiniband/hw/mana/Kconfig b/drivers/infiniband/hw/mana/Kconfig
new file mode 100644
index 0000000000000..546640657bac2
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Kconfig
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config MANA_INFINIBAND
+	tristate "Microsoft Azure Network Adapter support"
+	depends on NETDEVICES && ETHERNET && PCI && MICROSOFT_MANA
+	help
+	  This driver provides low-level RDMA support for Microsoft Azure
+	  Network Adapter (MANA). MANA supports RDMA features that can be used
+	  for workloads (e.g. DPDK, MPI etc) that uses RDMA verbs to directly
+	  access hardware from user-mode processes in Microsoft Azure cloud
+	  environment.
diff --git a/drivers/infiniband/hw/mana/Makefile b/drivers/infiniband/hw/mana/Makefile
new file mode 100644
index 0000000000000..88655fe5e398a
--- /dev/null
+++ b/drivers/infiniband/hw/mana/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
+obj-$(CONFIG_MANA_INFINIBAND) += mana_ib.o
+
+mana_ib-y := device.o main.o wq.o qp.o cq.o mr.o
diff --git a/drivers/infiniband/hw/mana/cq.c b/drivers/infiniband/hw/mana/cq.c
new file mode 100644
index 0000000000000..d141cab8a1e69
--- /dev/null
+++ b/drivers/infiniband/hw/mana/cq.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		      struct ib_udata *udata)
+{
+	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+	struct ib_device *ibdev = ibcq->device;
+	struct mana_ib_create_cq ucmd = {};
+	struct mana_ib_dev *mdev;
+	int err;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+	if (udata->inlen < sizeof(ucmd))
+		return -EINVAL;
+
+	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(ibdev,
+			  "Failed to copy from udata for create cq, %d\n", err);
+		return err;
+	}
+
+	if (attr->cqe > MAX_SEND_BUFFERS_PER_QUEUE) {
+		ibdev_dbg(ibdev, "CQE %d exceeding limit\n", attr->cqe);
+		return -EINVAL;
+	}
+
+	cq->cqe = attr->cqe;
+	cq->umem = ib_umem_get(ibdev, ucmd.buf_addr, cq->cqe * COMP_ENTRY_SIZE,
+			       IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(cq->umem)) {
+		err = PTR_ERR(cq->umem);
+		ibdev_dbg(ibdev, "Failed to get umem for create cq, err %d\n",
+			  err);
+		return err;
+	}
+
+	err = mana_ib_gd_create_dma_region(mdev, cq->umem, &cq->gdma_region);
+	if (err) {
+		ibdev_dbg(ibdev,
+			  "Failed to create dma region for create cq, %d\n",
+			  err);
+		goto err_release_umem;
+	}
+
+	ibdev_dbg(ibdev,
+		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+		  err, cq->gdma_region);
+
+	/*
+	 * The CQ ID is not known at this time. The ID is generated at create_qp
+	 */
+
+	return 0;
+
+err_release_umem:
+	ib_umem_release(cq->umem);
+	return err;
+}
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
+{
+	struct mana_ib_cq *cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+	struct ib_device *ibdev = ibcq->device;
+	struct mana_ib_dev *mdev;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+	mana_ib_gd_destroy_dma_region(mdev, cq->gdma_region);
+	ib_umem_release(cq->umem);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mana/device.c b/drivers/infiniband/hw/mana/device.c
new file mode 100644
index 0000000000000..d4541b8707e4c
--- /dev/null
+++ b/drivers/infiniband/hw/mana/device.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+#include <net/mana/mana_auxiliary.h>
+
+MODULE_DESCRIPTION("Microsoft Azure Network Adapter IB driver");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(NET_MANA);
+
+static const struct ib_device_ops mana_ib_dev_ops = {
+	.owner = THIS_MODULE,
+	.driver_id = RDMA_DRIVER_MANA,
+	.uverbs_abi_ver = MANA_IB_UVERBS_ABI_VERSION,
+
+	.alloc_pd = mana_ib_alloc_pd,
+	.alloc_ucontext = mana_ib_alloc_ucontext,
+	.create_cq = mana_ib_create_cq,
+	.create_qp = mana_ib_create_qp,
+	.create_rwq_ind_table = mana_ib_create_rwq_ind_table,
+	.create_wq = mana_ib_create_wq,
+	.dealloc_pd = mana_ib_dealloc_pd,
+	.dealloc_ucontext = mana_ib_dealloc_ucontext,
+	.dereg_mr = mana_ib_dereg_mr,
+	.destroy_cq = mana_ib_destroy_cq,
+	.destroy_qp = mana_ib_destroy_qp,
+	.destroy_rwq_ind_table = mana_ib_destroy_rwq_ind_table,
+	.destroy_wq = mana_ib_destroy_wq,
+	.disassociate_ucontext = mana_ib_disassociate_ucontext,
+	.get_port_immutable = mana_ib_get_port_immutable,
+	.mmap = mana_ib_mmap,
+	.modify_qp = mana_ib_modify_qp,
+	.modify_wq = mana_ib_modify_wq,
+	.query_device = mana_ib_query_device,
+	.query_gid = mana_ib_query_gid,
+	.query_port = mana_ib_query_port,
+	.reg_user_mr = mana_ib_reg_user_mr,
+
+	INIT_RDMA_OBJ_SIZE(ib_cq, mana_ib_cq, ibcq),
+	INIT_RDMA_OBJ_SIZE(ib_pd, mana_ib_pd, ibpd),
+	INIT_RDMA_OBJ_SIZE(ib_qp, mana_ib_qp, ibqp),
+	INIT_RDMA_OBJ_SIZE(ib_ucontext, mana_ib_ucontext, ibucontext),
+	INIT_RDMA_OBJ_SIZE(ib_rwq_ind_table, mana_ib_rwq_ind_table,
+			   ib_ind_table),
+};
+
+static int mana_ib_probe(struct auxiliary_device *adev,
+			 const struct auxiliary_device_id *id)
+{
+	struct mana_adev *madev = container_of(adev, struct mana_adev, adev);
+	struct gdma_dev *mdev = madev->mdev;
+	struct mana_context *mc;
+	struct mana_ib_dev *dev;
+	int ret;
+
+	mc = mdev->driver_data;
+
+	dev = ib_alloc_device(mana_ib_dev, ib_dev);
+	if (!dev)
+		return -ENOMEM;
+
+	ib_set_device_ops(&dev->ib_dev, &mana_ib_dev_ops);
+
+	dev->ib_dev.phys_port_cnt = mc->num_ports;
+
+	ibdev_dbg(&dev->ib_dev, "mdev=%p id=%d num_ports=%d\n", mdev,
+		  mdev->dev_id.as_uint32, dev->ib_dev.phys_port_cnt);
+
+	dev->gdma_dev = mdev;
+	dev->ib_dev.node_type = RDMA_NODE_IB_CA;
+
+	/*
+	 * num_comp_vectors needs to set to the max MSIX index
+	 * when interrupts and event queues are implemented
+	 */
+	dev->ib_dev.num_comp_vectors = 1;
+	dev->ib_dev.dev.parent = mdev->gdma_context->dev;
+
+	ret = ib_register_device(&dev->ib_dev, "mana_%d",
+				 mdev->gdma_context->dev);
+	if (ret) {
+		ib_dealloc_device(&dev->ib_dev);
+		return ret;
+	}
+
+	dev_set_drvdata(&adev->dev, dev);
+
+	return 0;
+}
+
+static void mana_ib_remove(struct auxiliary_device *adev)
+{
+	struct mana_ib_dev *dev = dev_get_drvdata(&adev->dev);
+
+	ib_unregister_device(&dev->ib_dev);
+	ib_dealloc_device(&dev->ib_dev);
+}
+
+static const struct auxiliary_device_id mana_id_table[] = {
+	{
+		.name = "mana.rdma",
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(auxiliary, mana_id_table);
+
+static struct auxiliary_driver mana_driver = {
+	.name = "rdma",
+	.probe = mana_ib_probe,
+	.remove = mana_ib_remove,
+	.id_table = mana_id_table,
+};
+
+module_auxiliary_driver(mana_driver);
diff --git a/drivers/infiniband/hw/mana/main.c b/drivers/infiniband/hw/mana/main.c
new file mode 100644
index 0000000000000..8b3bc302d6f3a
--- /dev/null
+++ b/drivers/infiniband/hw/mana/main.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+			 u32 port)
+{
+	struct gdma_dev *gd = dev->gdma_dev;
+	struct mana_port_context *mpc;
+	struct net_device *ndev;
+	struct mana_context *mc;
+
+	mc = gd->driver_data;
+	ndev = mc->ports[port];
+	mpc = netdev_priv(ndev);
+
+	mutex_lock(&pd->vport_mutex);
+
+	pd->vport_use_count--;
+	WARN_ON(pd->vport_use_count < 0);
+
+	if (!pd->vport_use_count)
+		mana_uncfg_vport(mpc);
+
+	mutex_unlock(&pd->vport_mutex);
+}
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port, struct mana_ib_pd *pd,
+		      u32 doorbell_id)
+{
+	struct gdma_dev *mdev = dev->gdma_dev;
+	struct mana_port_context *mpc;
+	struct mana_context *mc;
+	struct net_device *ndev;
+	int err;
+
+	mc = mdev->driver_data;
+	ndev = mc->ports[port];
+	mpc = netdev_priv(ndev);
+
+	mutex_lock(&pd->vport_mutex);
+
+	pd->vport_use_count++;
+	if (pd->vport_use_count > 1) {
+		ibdev_dbg(&dev->ib_dev,
+			  "Skip as this PD is already configured vport\n");
+		mutex_unlock(&pd->vport_mutex);
+		return 0;
+	}
+
+	err = mana_cfg_vport(mpc, pd->pdn, doorbell_id);
+	if (err) {
+		pd->vport_use_count--;
+		mutex_unlock(&pd->vport_mutex);
+
+		ibdev_dbg(&dev->ib_dev, "Failed to configure vPort %d\n", err);
+		return err;
+	}
+
+	mutex_unlock(&pd->vport_mutex);
+
+	pd->tx_shortform_allowed = mpc->tx_shortform_allowed;
+	pd->tx_vp_offset = mpc->tx_vp_offset;
+
+	ibdev_dbg(&dev->ib_dev, "vport handle %llx pdid %x doorbell_id %x\n",
+		  mpc->port_handle, pd->pdn, doorbell_id);
+
+	return 0;
+}
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+	struct ib_device *ibdev = ibpd->device;
+	struct gdma_create_pd_resp resp = {};
+	struct gdma_create_pd_req req = {};
+	enum gdma_pd_flags flags = 0;
+	struct mana_ib_dev *dev;
+	struct gdma_dev *mdev;
+	int err;
+
+	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mdev = dev->gdma_dev;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_PD, sizeof(req),
+			     sizeof(resp));
+
+	req.flags = flags;
+	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+				   sizeof(resp), &resp);
+
+	if (err || resp.hdr.status) {
+		ibdev_dbg(&dev->ib_dev,
+			  "Failed to get pd_id err %d status %u\n", err,
+			  resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+
+		return err;
+	}
+
+	pd->pd_handle = resp.pd_handle;
+	pd->pdn = resp.pd_id;
+	ibdev_dbg(&dev->ib_dev, "pd_handle 0x%llx pd_id %d\n",
+		  pd->pd_handle, pd->pdn);
+
+	mutex_init(&pd->vport_mutex);
+	pd->vport_use_count = 0;
+	return 0;
+}
+
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
+{
+	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+	struct ib_device *ibdev = ibpd->device;
+	struct gdma_destory_pd_resp resp = {};
+	struct gdma_destroy_pd_req req = {};
+	struct mana_ib_dev *dev;
+	struct gdma_dev *mdev;
+	int err;
+
+	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	mdev = dev->gdma_dev;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_PD, sizeof(req),
+			     sizeof(resp));
+
+	req.pd_handle = pd->pd_handle;
+	err = mana_gd_send_request(mdev->gdma_context, sizeof(req), &req,
+				   sizeof(resp), &resp);
+
+	if (err || resp.hdr.status) {
+		ibdev_dbg(&dev->ib_dev,
+			  "Failed to destroy pd_handle 0x%llx err %d status %u",
+			  pd->pd_handle, err, resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+	}
+
+	return err;
+}
+
+static int mana_gd_destroy_doorbell_page(struct gdma_context *gc,
+					 int doorbell_page)
+{
+	struct gdma_destroy_resource_range_req req = {};
+	struct gdma_resp_hdr resp = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_RESOURCE_RANGE,
+			     sizeof(req), sizeof(resp));
+
+	req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+	req.num_resources = 1;
+	req.allocated_resources = doorbell_page;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.status) {
+		dev_err(gc->dev,
+			"Failed to destroy doorbell page: ret %d, 0x%x\n",
+			err, resp.status);
+		return err ?: -EPROTO;
+	}
+
+	return 0;
+}
+
+static int mana_gd_allocate_doorbell_page(struct gdma_context *gc,
+					  int *doorbell_page)
+{
+	struct gdma_allocate_resource_range_req req = {};
+	struct gdma_allocate_resource_range_resp resp = {};
+	int err;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_ALLOCATE_RESOURCE_RANGE,
+			     sizeof(req), sizeof(resp));
+
+	req.resource_type = GDMA_RESOURCE_DOORBELL_PAGE;
+	req.num_resources = 1;
+	req.alignment = 1;
+
+	/* Have GDMA start searching from 0 */
+	req.allocated_resources = 0;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev,
+			"Failed to allocate doorbell page: ret %d, 0x%x\n",
+			err, resp.hdr.status);
+		return err ?: -EPROTO;
+	}
+
+	*doorbell_page = resp.allocated_resources;
+
+	return 0;
+}
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+			   struct ib_udata *udata)
+{
+	struct mana_ib_ucontext *ucontext =
+		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+	struct ib_device *ibdev = ibcontext->device;
+	struct mana_ib_dev *mdev;
+	struct gdma_context *gc;
+	struct gdma_dev *dev;
+	int doorbell_page;
+	int ret;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	dev = mdev->gdma_dev;
+	gc = dev->gdma_context;
+
+	/* Allocate a doorbell page index */
+	ret = mana_gd_allocate_doorbell_page(gc, &doorbell_page);
+	if (ret) {
+		ibdev_dbg(ibdev, "Failed to allocate doorbell page %d\n", ret);
+		return ret;
+	}
+
+	ibdev_dbg(ibdev, "Doorbell page allocated %d\n", doorbell_page);
+
+	ucontext->doorbell = doorbell_page;
+
+	return 0;
+}
+
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
+{
+	struct mana_ib_ucontext *mana_ucontext =
+		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+	struct ib_device *ibdev = ibcontext->device;
+	struct mana_ib_dev *mdev;
+	struct gdma_context *gc;
+	int ret;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gc = mdev->gdma_dev->gdma_context;
+
+	ret = mana_gd_destroy_doorbell_page(gc, mana_ucontext->doorbell);
+	if (ret)
+		ibdev_dbg(ibdev, "Failed to destroy doorbell page %d\n", ret);
+}
+
+static int
+mana_ib_gd_first_dma_region(struct mana_ib_dev *dev,
+			    struct gdma_context *gc,
+			    struct gdma_create_dma_region_req *create_req,
+			    size_t num_pages, mana_handle_t *gdma_region)
+{
+	struct gdma_create_dma_region_resp create_resp = {};
+	unsigned int create_req_msg_size;
+	int err;
+
+	create_req_msg_size =
+		struct_size(create_req, page_addr_list, num_pages);
+	create_req->page_addr_list_len = num_pages;
+
+	err = mana_gd_send_request(gc, create_req_msg_size, create_req,
+				   sizeof(create_resp), &create_resp);
+	if (err || create_resp.hdr.status) {
+		ibdev_dbg(&dev->ib_dev,
+			  "Failed to create DMA region: %d, 0x%x\n",
+			  err, create_resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+
+		return err;
+	}
+
+	*gdma_region = create_resp.dma_region_handle;
+	ibdev_dbg(&dev->ib_dev, "Created DMA region handle 0x%llx\n",
+		  *gdma_region);
+
+	return 0;
+}
+
+static int
+mana_ib_gd_add_dma_region(struct mana_ib_dev *dev, struct gdma_context *gc,
+			  struct gdma_dma_region_add_pages_req *add_req,
+			  unsigned int num_pages, u32 expected_status)
+{
+	unsigned int add_req_msg_size =
+		struct_size(add_req, page_addr_list, num_pages);
+	struct gdma_general_resp add_resp = {};
+	int err;
+
+	mana_gd_init_req_hdr(&add_req->hdr, GDMA_DMA_REGION_ADD_PAGES,
+			     add_req_msg_size, sizeof(add_resp));
+	add_req->page_addr_list_len = num_pages;
+
+	err = mana_gd_send_request(gc, add_req_msg_size, add_req,
+				   sizeof(add_resp), &add_resp);
+	if (err || add_resp.hdr.status != expected_status) {
+		ibdev_dbg(&dev->ib_dev,
+			  "Failed to create DMA region: %d, 0x%x\n",
+			  err, add_resp.hdr.status);
+
+		if (!err)
+			err = -EPROTO;
+
+		return err;
+	}
+
+	return 0;
+}
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+				 mana_handle_t *gdma_region)
+{
+	struct gdma_dma_region_add_pages_req *add_req = NULL;
+	size_t num_pages_processed = 0, num_pages_to_handle;
+	struct gdma_create_dma_region_req *create_req;
+	unsigned int create_req_msg_size;
+	struct hw_channel_context *hwc;
+	struct ib_block_iter biter;
+	size_t max_pgs_add_cmd = 0;
+	size_t max_pgs_create_cmd;
+	struct gdma_context *gc;
+	size_t num_pages_total;
+	struct gdma_dev *mdev;
+	unsigned long page_sz;
+	unsigned int tail = 0;
+	u64 *page_addr_list;
+	void *request_buf;
+	int err;
+
+	mdev = dev->gdma_dev;
+	gc = mdev->gdma_context;
+	hwc = gc->hwc.driver_data;
+
+	/* Hardware requires dma region to align to chosen page size */
+	page_sz = ib_umem_find_best_pgsz(umem, PAGE_SZ_BM, 0);
+	if (!page_sz) {
+		ibdev_dbg(&dev->ib_dev, "failed to find page size.\n");
+		return -ENOMEM;
+	}
+	num_pages_total = ib_umem_num_dma_blocks(umem, page_sz);
+
+	max_pgs_create_cmd =
+		(hwc->max_req_msg_size - sizeof(*create_req)) / sizeof(u64);
+	num_pages_to_handle =
+		min_t(size_t, num_pages_total, max_pgs_create_cmd);
+	create_req_msg_size =
+		struct_size(create_req, page_addr_list, num_pages_to_handle);
+
+	request_buf = kzalloc(hwc->max_req_msg_size, GFP_KERNEL);
+	if (!request_buf)
+		return -ENOMEM;
+
+	create_req = request_buf;
+	mana_gd_init_req_hdr(&create_req->hdr, GDMA_CREATE_DMA_REGION,
+			     create_req_msg_size,
+			     sizeof(struct gdma_create_dma_region_resp));
+
+	create_req->length = umem->length;
+	create_req->offset_in_page = umem->address & (page_sz - 1);
+	create_req->gdma_page_type = order_base_2(page_sz) - PAGE_SHIFT;
+	create_req->page_count = num_pages_total;
+
+	ibdev_dbg(&dev->ib_dev, "size_dma_region %lu num_pages_total %lu\n",
+		  umem->length, num_pages_total);
+
+	ibdev_dbg(&dev->ib_dev, "page_sz %lu offset_in_page %u\n",
+		  page_sz, create_req->offset_in_page);
+
+	ibdev_dbg(&dev->ib_dev, "num_pages_to_handle %lu, gdma_page_type %u",
+		  num_pages_to_handle, create_req->gdma_page_type);
+
+	page_addr_list = create_req->page_addr_list;
+	rdma_umem_for_each_dma_block(umem, &biter, page_sz) {
+		page_addr_list[tail++] = rdma_block_iter_dma_address(&biter);
+		if (tail < num_pages_to_handle)
+			continue;
+
+		if (!num_pages_processed) {
+			/* First create message */
+			err = mana_ib_gd_first_dma_region(dev, gc, create_req,
+							  tail, gdma_region);
+			if (err)
+				goto out;
+
+			max_pgs_add_cmd = (hwc->max_req_msg_size -
+				sizeof(*add_req)) / sizeof(u64);
+
+			add_req = request_buf;
+			add_req->dma_region_handle = *gdma_region;
+			add_req->reserved3 = 0;
+			page_addr_list = add_req->page_addr_list;
+		} else {
+			/* Subsequent create messages */
+			u32 expected_s = 0;
+
+			if (num_pages_processed + num_pages_to_handle <
+			    num_pages_total)
+				expected_s = GDMA_STATUS_MORE_ENTRIES;
+
+			err = mana_ib_gd_add_dma_region(dev, gc, add_req, tail,
+							expected_s);
+			if (err)
+				break;
+		}
+
+		num_pages_processed += tail;
+		tail = 0;
+
+		/* The remaining pages to create */
+		num_pages_to_handle =
+			min_t(size_t,
+			      num_pages_total - num_pages_processed,
+			      max_pgs_add_cmd);
+	}
+
+	if (err)
+		mana_ib_gd_destroy_dma_region(dev, *gdma_region);
+
+out:
+	kfree(request_buf);
+	return err;
+}
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev, u64 gdma_region)
+{
+	struct gdma_dev *mdev = dev->gdma_dev;
+	struct gdma_context *gc;
+
+	gc = mdev->gdma_context;
+	ibdev_dbg(&dev->ib_dev, "destroy dma region 0x%llx\n", gdma_region);
+
+	return mana_gd_destroy_dma_region(gc, gdma_region);
+}
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
+{
+	struct mana_ib_ucontext *mana_ucontext =
+		container_of(ibcontext, struct mana_ib_ucontext, ibucontext);
+	struct ib_device *ibdev = ibcontext->device;
+	struct mana_ib_dev *mdev;
+	struct gdma_context *gc;
+	phys_addr_t pfn;
+	pgprot_t prot;
+	int ret;
+
+	mdev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+	gc = mdev->gdma_dev->gdma_context;
+
+	if (vma->vm_pgoff != 0) {
+		ibdev_dbg(ibdev, "Unexpected vm_pgoff %lu\n", vma->vm_pgoff);
+		return -EINVAL;
+	}
+
+	/* Map to the page indexed by ucontext->doorbell */
+	pfn = (gc->phys_db_page_base +
+	       gc->db_page_size * mana_ucontext->doorbell) >>
+	      PAGE_SHIFT;
+	prot = pgprot_writecombine(vma->vm_page_prot);
+
+	ret = rdma_user_mmap_io(ibcontext, vma, pfn, gc->db_page_size, prot,
+				NULL);
+	if (ret)
+		ibdev_dbg(ibdev, "can't rdma_user_mmap_io ret %d\n", ret);
+	else
+		ibdev_dbg(ibdev, "mapped I/O pfn 0x%llx page_size %u, ret %d\n",
+			  pfn, gc->db_page_size, ret);
+
+	return ret;
+}
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+			       struct ib_port_immutable *immutable)
+{
+	/*
+	 * This version only support RAW_PACKET
+	 * other values need to be filled for other types
+	 */
+	immutable->core_cap_flags = RDMA_CORE_PORT_RAW_PACKET;
+
+	return 0;
+}
+
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+			 struct ib_udata *uhw)
+{
+	props->max_qp = MANA_MAX_NUM_QUEUES;
+	props->max_qp_wr = MAX_SEND_BUFFERS_PER_QUEUE;
+
+	/*
+	 * max_cqe could be potentially much bigger.
+	 * As this version of driver only support RAW QP, set it to the same
+	 * value as max_qp_wr
+	 */
+	props->max_cqe = MAX_SEND_BUFFERS_PER_QUEUE;
+
+	props->max_mr_size = MANA_IB_MAX_MR_SIZE;
+	props->max_mr = MANA_IB_MAX_MR;
+	props->max_send_sge = MAX_TX_WQE_SGL_ENTRIES;
+	props->max_recv_sge = MAX_RX_WQE_SGL_ENTRIES;
+
+	return 0;
+}
+
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+		       struct ib_port_attr *props)
+{
+	/* This version doesn't return port properties */
+	return 0;
+}
+
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+		      union ib_gid *gid)
+{
+	/* This version doesn't return GID properties */
+	return 0;
+}
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
+{
+}
diff --git a/drivers/infiniband/hw/mana/mana_ib.h b/drivers/infiniband/hw/mana/mana_ib.h
new file mode 100644
index 0000000000000..502cc8672eefa
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mana_ib.h
@@ -0,0 +1,162 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef _MANA_IB_H_
+#define _MANA_IB_H_
+
+#include <rdma/ib_verbs.h>
+#include <rdma/ib_mad.h>
+#include <rdma/ib_umem.h>
+#include <rdma/mana-abi.h>
+#include <rdma/uverbs_ioctl.h>
+
+#include <net/mana/mana.h>
+
+#define PAGE_SZ_BM                                                             \
+	(SZ_4K | SZ_8K | SZ_16K | SZ_32K | SZ_64K | SZ_128K | SZ_256K |        \
+	 SZ_512K | SZ_1M | SZ_2M)
+
+/* MANA doesn't have any limit for MR size */
+#define MANA_IB_MAX_MR_SIZE	U64_MAX
+
+/*
+ * The hardware limit of number of MRs is greater than maximum number of MRs
+ * that can possibly represent in 24 bits
+ */
+#define MANA_IB_MAX_MR		0xFFFFFFu
+
+struct mana_ib_dev {
+	struct ib_device ib_dev;
+	struct gdma_dev *gdma_dev;
+};
+
+struct mana_ib_wq {
+	struct ib_wq ibwq;
+	struct ib_umem *umem;
+	int wqe;
+	u32 wq_buf_size;
+	u64 gdma_region;
+	u64 id;
+	mana_handle_t rx_object;
+};
+
+struct mana_ib_pd {
+	struct ib_pd ibpd;
+	u32 pdn;
+	mana_handle_t pd_handle;
+
+	/* Mutex for sharing access to vport_use_count */
+	struct mutex vport_mutex;
+	int vport_use_count;
+
+	bool tx_shortform_allowed;
+	u32 tx_vp_offset;
+};
+
+struct mana_ib_mr {
+	struct ib_mr ibmr;
+	struct ib_umem *umem;
+	mana_handle_t mr_handle;
+};
+
+struct mana_ib_cq {
+	struct ib_cq ibcq;
+	struct ib_umem *umem;
+	int cqe;
+	u64 gdma_region;
+	u64 id;
+};
+
+struct mana_ib_qp {
+	struct ib_qp ibqp;
+
+	/* Work queue info */
+	struct ib_umem *sq_umem;
+	int sqe;
+	u64 sq_gdma_region;
+	u64 sq_id;
+	mana_handle_t tx_object;
+
+	/* The port on the IB device, starting with 1 */
+	u32 port;
+};
+
+struct mana_ib_ucontext {
+	struct ib_ucontext ibucontext;
+	u32 doorbell;
+};
+
+struct mana_ib_rwq_ind_table {
+	struct ib_rwq_ind_table ib_ind_table;
+};
+
+int mana_ib_gd_create_dma_region(struct mana_ib_dev *dev, struct ib_umem *umem,
+				 mana_handle_t *gdma_region);
+
+int mana_ib_gd_destroy_dma_region(struct mana_ib_dev *dev,
+				  mana_handle_t gdma_region);
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+				struct ib_wq_init_attr *init_attr,
+				struct ib_udata *udata);
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+		      u32 wq_attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata);
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+				 struct ib_rwq_ind_table_init_attr *init_attr,
+				 struct ib_udata *udata);
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl);
+
+struct ib_mr *mana_ib_get_dma_mr(struct ib_pd *ibpd, int access_flags);
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+				  u64 iova, int access_flags,
+				  struct ib_udata *udata);
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata);
+
+int mana_ib_create_qp(struct ib_qp *qp, struct ib_qp_init_attr *qp_init_attr,
+		      struct ib_udata *udata);
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask, struct ib_udata *udata);
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
+int mana_ib_cfg_vport(struct mana_ib_dev *dev, u32 port_id,
+		      struct mana_ib_pd *pd, u32 doorbell_id);
+void mana_ib_uncfg_vport(struct mana_ib_dev *dev, struct mana_ib_pd *pd,
+			 u32 port);
+
+int mana_ib_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
+		      struct ib_udata *udata);
+
+int mana_ib_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata);
+
+int mana_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+int mana_ib_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata);
+
+int mana_ib_alloc_ucontext(struct ib_ucontext *ibcontext,
+			   struct ib_udata *udata);
+void mana_ib_dealloc_ucontext(struct ib_ucontext *ibcontext);
+
+int mana_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma);
+
+int mana_ib_get_port_immutable(struct ib_device *ibdev, u32 port_num,
+			       struct ib_port_immutable *immutable);
+int mana_ib_query_device(struct ib_device *ibdev, struct ib_device_attr *props,
+			 struct ib_udata *uhw);
+int mana_ib_query_port(struct ib_device *ibdev, u32 port,
+		       struct ib_port_attr *props);
+int mana_ib_query_gid(struct ib_device *ibdev, u32 port, int index,
+		      union ib_gid *gid);
+
+void mana_ib_disassociate_ucontext(struct ib_ucontext *ibcontext);
+
+#endif
diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
new file mode 100644
index 0000000000000..a56236cdd9eec
--- /dev/null
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+#define VALID_MR_FLAGS                                                         \
+	(IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_REMOTE_READ)
+
+static enum gdma_mr_access_flags
+mana_ib_verbs_to_gdma_access_flags(int access_flags)
+{
+	enum gdma_mr_access_flags flags = GDMA_ACCESS_FLAG_LOCAL_READ;
+
+	if (access_flags & IB_ACCESS_LOCAL_WRITE)
+		flags |= GDMA_ACCESS_FLAG_LOCAL_WRITE;
+
+	if (access_flags & IB_ACCESS_REMOTE_WRITE)
+		flags |= GDMA_ACCESS_FLAG_REMOTE_WRITE;
+
+	if (access_flags & IB_ACCESS_REMOTE_READ)
+		flags |= GDMA_ACCESS_FLAG_REMOTE_READ;
+
+	return flags;
+}
+
+static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
+				struct gdma_create_mr_params *mr_params)
+{
+	struct gdma_create_mr_response resp = {};
+	struct gdma_create_mr_request req = {};
+	struct gdma_dev *mdev = dev->gdma_dev;
+	struct gdma_context *gc;
+	int err;
+
+	gc = mdev->gdma_context;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_CREATE_MR, sizeof(req),
+			     sizeof(resp));
+	req.pd_handle = mr_params->pd_handle;
+	req.mr_type = mr_params->mr_type;
+
+	switch (mr_params->mr_type) {
+	case GDMA_MR_TYPE_GVA:
+		req.gva.dma_region_handle = mr_params->gva.dma_region_handle;
+		req.gva.virtual_address = mr_params->gva.virtual_address;
+		req.gva.access_flags = mr_params->gva.access_flags;
+		break;
+
+	default:
+		ibdev_dbg(&dev->ib_dev,
+			  "invalid param (GDMA_MR_TYPE) passed, type %d\n",
+			  req.mr_type);
+		return -EINVAL;
+	}
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+
+	if (err || resp.hdr.status) {
+		ibdev_dbg(&dev->ib_dev, "Failed to create mr %d, %u", err,
+			  resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+
+		return err;
+	}
+
+	mr->ibmr.lkey = resp.lkey;
+	mr->ibmr.rkey = resp.rkey;
+	mr->mr_handle = resp.mr_handle;
+
+	return 0;
+}
+
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev,
+				 gdma_obj_handle_t mr_handle)
+{
+	struct gdma_destroy_mr_response resp = {};
+	struct gdma_destroy_mr_request req = {};
+	struct gdma_dev *mdev = dev->gdma_dev;
+	struct gdma_context *gc;
+	int err;
+
+	gc = mdev->gdma_context;
+
+	mana_gd_init_req_hdr(&req.hdr, GDMA_DESTROY_MR, sizeof(req),
+			     sizeof(resp));
+
+	req.mr_handle = mr_handle;
+
+	err = mana_gd_send_request(gc, sizeof(req), &req, sizeof(resp), &resp);
+	if (err || resp.hdr.status) {
+		dev_err(gc->dev, "Failed to destroy MR: %d, 0x%x\n", err,
+			resp.hdr.status);
+		if (!err)
+			err = -EPROTO;
+		return err;
+	}
+
+	return 0;
+}
+
+struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
+				  u64 iova, int access_flags,
+				  struct ib_udata *udata)
+{
+	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+	struct gdma_create_mr_params mr_params = {};
+	struct ib_device *ibdev = ibpd->device;
+	gdma_obj_handle_t dma_region_handle;
+	struct mana_ib_dev *dev;
+	struct mana_ib_mr *mr;
+	int err;
+
+	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+	ibdev_dbg(ibdev,
+		  "start 0x%llx, iova 0x%llx length 0x%llx access_flags 0x%x",
+		  start, iova, length, access_flags);
+
+	if (access_flags & ~VALID_MR_FLAGS)
+		return ERR_PTR(-EINVAL);
+
+	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+	if (!mr)
+		return ERR_PTR(-ENOMEM);
+
+	mr->umem = ib_umem_get(ibdev, start, length, access_flags);
+	if (IS_ERR(mr->umem)) {
+		err = PTR_ERR(mr->umem);
+		ibdev_dbg(ibdev,
+			  "Failed to get umem for register user-mr, %d\n", err);
+		goto err_free;
+	}
+
+	err = mana_ib_gd_create_dma_region(dev, mr->umem, &dma_region_handle);
+	if (err) {
+		ibdev_dbg(ibdev, "Failed create dma region for user-mr, %d\n",
+			  err);
+		goto err_umem;
+	}
+
+	ibdev_dbg(ibdev,
+		  "mana_ib_gd_create_dma_region ret %d gdma_region %llx\n", err,
+		  dma_region_handle);
+
+	mr_params.pd_handle = pd->pd_handle;
+	mr_params.mr_type = GDMA_MR_TYPE_GVA;
+	mr_params.gva.dma_region_handle = dma_region_handle;
+	mr_params.gva.virtual_address = iova;
+	mr_params.gva.access_flags =
+		mana_ib_verbs_to_gdma_access_flags(access_flags);
+
+	err = mana_ib_gd_create_mr(dev, mr, &mr_params);
+	if (err)
+		goto err_dma_region;
+
+	/*
+	 * There is no need to keep track of dma_region_handle after MR is
+	 * successfully created. The dma_region_handle is tracked in the PF
+	 * as part of the lifecycle of this MR.
+	 */
+
+	return &mr->ibmr;
+
+err_dma_region:
+	mana_gd_destroy_dma_region(dev->gdma_dev->gdma_context,
+				   dma_region_handle);
+
+err_umem:
+	ib_umem_release(mr->umem);
+
+err_free:
+	kfree(mr);
+	return ERR_PTR(err);
+}
+
+int mana_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
+{
+	struct mana_ib_mr *mr = container_of(ibmr, struct mana_ib_mr, ibmr);
+	struct ib_device *ibdev = ibmr->device;
+	struct mana_ib_dev *dev;
+	int err;
+
+	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
+
+	err = mana_ib_gd_destroy_mr(dev, mr->mr_handle);
+	if (err)
+		return err;
+
+	if (mr->umem)
+		ib_umem_release(mr->umem);
+
+	kfree(mr);
+
+	return 0;
+}
diff --git a/drivers/infiniband/hw/mana/qp.c b/drivers/infiniband/hw/mana/qp.c
new file mode 100644
index 0000000000000..ea15ec77e3212
--- /dev/null
+++ b/drivers/infiniband/hw/mana/qp.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+static int mana_ib_cfg_vport_steering(struct mana_ib_dev *dev,
+				      struct net_device *ndev,
+				      mana_handle_t default_rxobj,
+				      mana_handle_t ind_table[],
+				      u32 log_ind_tbl_size, u32 rx_hash_key_len,
+				      u8 *rx_hash_key)
+{
+	struct mana_port_context *mpc = netdev_priv(ndev);
+	struct mana_cfg_rx_steer_req *req = NULL;
+	struct mana_cfg_rx_steer_resp resp = {};
+	mana_handle_t *req_indir_tab;
+	struct gdma_context *gc;
+	struct gdma_dev *mdev;
+	u32 req_buf_size;
+	int i, err;
+
+	mdev = dev->gdma_dev;
+	gc = mdev->gdma_context;
+
+	req_buf_size =
+		sizeof(*req) + sizeof(mana_handle_t) * MANA_INDIRECT_TABLE_SIZE;
+	req = kzalloc(req_buf_size, GFP_KERNEL);
+	if (!req)
+		return -ENOMEM;
+
+	mana_gd_init_req_hdr(&req->hdr, MANA_CONFIG_VPORT_RX, req_buf_size,
+			     sizeof(resp));
+
+	req->vport = mpc->port_handle;
+	req->rx_enable = 1;
+	req->update_default_rxobj = 1;
+	req->default_rxobj = default_rxobj;
+	req->hdr.dev_id = mdev->dev_id;
+
+	/* If there are more than 1 entries in indirection table, enable RSS */
+	if (log_ind_tbl_size)
+		req->rss_enable = true;
+
+	req->num_indir_entries = MANA_INDIRECT_TABLE_SIZE;
+	req->indir_tab_offset = sizeof(*req);
+	req->update_indir_tab = true;
+
+	req_indir_tab = (mana_handle_t *)(req + 1);
+	/* The ind table passed to the hardware must have
+	 * MANA_INDIRECT_TABLE_SIZE entries. Adjust the verb
+	 * ind_table to MANA_INDIRECT_TABLE_SIZE if required
+	 */
+	ibdev_dbg(&dev->ib_dev, "ind table size %u\n", 1 << log_ind_tbl_size);
+	for (i = 0; i < MANA_INDIRECT_TABLE_SIZE; i++) {
+		req_indir_tab[i] = ind_table[i % (1 << log_ind_tbl_size)];
+		ibdev_dbg(&dev->ib_dev, "index %u handle 0x%llx\n", i,
+			  req_indir_tab[i]);
+	}
+
+	req->update_hashkey = true;
+	if (rx_hash_key_len)
+		memcpy(req->hashkey, rx_hash_key, rx_hash_key_len);
+	else
+		netdev_rss_key_fill(req->hashkey, MANA_HASH_KEY_SIZE);
+
+	ibdev_dbg(&dev->ib_dev, "vport handle %llu default_rxobj 0x%llx\n",
+		  req->vport, default_rxobj);
+
+	err = mana_gd_send_request(gc, req_buf_size, req, sizeof(resp), &resp);
+	if (err) {
+		netdev_err(ndev, "Failed to configure vPort RX: %d\n", err);
+		goto out;
+	}
+
+	if (resp.hdr.status) {
+		netdev_err(ndev, "vPort RX configuration failed: 0x%x\n",
+			   resp.hdr.status);
+		err = -EPROTO;
+		goto out;
+	}
+
+	netdev_info(ndev, "Configured steering vPort %llu log_entries %u\n",
+		    mpc->port_handle, log_ind_tbl_size);
+
+out:
+	kfree(req);
+	return err;
+}
+
+static int mana_ib_create_qp_rss(struct ib_qp *ibqp, struct ib_pd *pd,
+				 struct ib_qp_init_attr *attr,
+				 struct ib_udata *udata)
+{
+	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+	struct mana_ib_dev *mdev =
+		container_of(pd->device, struct mana_ib_dev, ib_dev);
+	struct ib_rwq_ind_table *ind_tbl = attr->rwq_ind_tbl;
+	struct mana_ib_create_qp_rss_resp resp = {};
+	struct mana_ib_create_qp_rss ucmd = {};
+	struct gdma_dev *gd = mdev->gdma_dev;
+	mana_handle_t *mana_ind_table;
+	struct mana_port_context *mpc;
+	struct mana_context *mc;
+	struct net_device *ndev;
+	struct mana_ib_cq *cq;
+	struct mana_ib_wq *wq;
+	unsigned int ind_tbl_size;
+	struct ib_cq *ibcq;
+	struct ib_wq *ibwq;
+	int i = 0;
+	u32 port;
+	int ret;
+
+	mc = gd->driver_data;
+
+	if (!udata || udata->inlen < sizeof(ucmd))
+		return -EINVAL;
+
+	ret = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+	if (ret) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed copy from udata for create rss-qp, err %d\n",
+			  ret);
+		return ret;
+	}
+
+	if (attr->cap.max_recv_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Requested max_recv_wr %d exceeding limit\n",
+			  attr->cap.max_recv_wr);
+		return -EINVAL;
+	}
+
+	if (attr->cap.max_recv_sge > MAX_RX_WQE_SGL_ENTRIES) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Requested max_recv_sge %d exceeding limit\n",
+			  attr->cap.max_recv_sge);
+		return -EINVAL;
+	}
+
+	ind_tbl_size = 1 << ind_tbl->log_ind_tbl_size;
+	if (ind_tbl_size > MANA_INDIRECT_TABLE_SIZE) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Indirect table size %d exceeding limit\n",
+			  ind_tbl_size);
+		return -EINVAL;
+	}
+
+	if (ucmd.rx_hash_function != MANA_IB_RX_HASH_FUNC_TOEPLITZ) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "RX Hash function is not supported, %d\n",
+			  ucmd.rx_hash_function);
+		return -EINVAL;
+	}
+
+	/* IB ports start with 1, MANA start with 0 */
+	port = ucmd.port;
+	if (port < 1 || port > mc->num_ports) {
+		ibdev_dbg(&mdev->ib_dev, "Invalid port %u in creating qp\n",
+			  port);
+		return -EINVAL;
+	}
+	ndev = mc->ports[port - 1];
+	mpc = netdev_priv(ndev);
+
+	ibdev_dbg(&mdev->ib_dev, "rx_hash_function %d port %d\n",
+		  ucmd.rx_hash_function, port);
+
+	mana_ind_table = kcalloc(ind_tbl_size, sizeof(mana_handle_t),
+				 GFP_KERNEL);
+	if (!mana_ind_table) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	qp->port = port;
+
+	for (i = 0; i < ind_tbl_size; i++) {
+		struct mana_obj_spec wq_spec = {};
+		struct mana_obj_spec cq_spec = {};
+
+		ibwq = ind_tbl->ind_tbl[i];
+		wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+
+		ibcq = ibwq->cq;
+		cq = container_of(ibcq, struct mana_ib_cq, ibcq);
+
+		wq_spec.gdma_region = wq->gdma_region;
+		wq_spec.queue_size = wq->wq_buf_size;
+
+		cq_spec.gdma_region = cq->gdma_region;
+		cq_spec.queue_size = cq->cqe * COMP_ENTRY_SIZE;
+		cq_spec.modr_ctx_id = 0;
+		cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+		ret = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_RQ,
+					 &wq_spec, &cq_spec, &wq->rx_object);
+		if (ret)
+			goto fail;
+
+		/* The GDMA regions are now owned by the WQ object */
+		wq->gdma_region = GDMA_INVALID_DMA_REGION;
+		cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+		wq->id = wq_spec.queue_index;
+		cq->id = cq_spec.queue_index;
+
+		ibdev_dbg(&mdev->ib_dev,
+			  "ret %d rx_object 0x%llx wq id %llu cq id %llu\n",
+			  ret, wq->rx_object, wq->id, cq->id);
+
+		resp.entries[i].cqid = cq->id;
+		resp.entries[i].wqid = wq->id;
+
+		mana_ind_table[i] = wq->rx_object;
+	}
+	resp.num_entries = i;
+
+	ret = mana_ib_cfg_vport_steering(mdev, ndev, wq->rx_object,
+					 mana_ind_table,
+					 ind_tbl->log_ind_tbl_size,
+					 ucmd.rx_hash_key_len,
+					 ucmd.rx_hash_key);
+	if (ret)
+		goto fail;
+
+	ret = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (ret) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to copy to udata create rss-qp, %d\n",
+			  ret);
+		goto fail;
+	}
+
+	kfree(mana_ind_table);
+
+	return 0;
+
+fail:
+	while (i-- > 0) {
+		ibwq = ind_tbl->ind_tbl[i];
+		wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+		mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+	}
+
+	kfree(mana_ind_table);
+
+	return ret;
+}
+
+static int mana_ib_create_qp_raw(struct ib_qp *ibqp, struct ib_pd *ibpd,
+				 struct ib_qp_init_attr *attr,
+				 struct ib_udata *udata)
+{
+	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+	struct mana_ib_dev *mdev =
+		container_of(ibpd->device, struct mana_ib_dev, ib_dev);
+	struct mana_ib_cq *send_cq =
+		container_of(attr->send_cq, struct mana_ib_cq, ibcq);
+	struct mana_ib_ucontext *mana_ucontext =
+		rdma_udata_to_drv_context(udata, struct mana_ib_ucontext,
+					  ibucontext);
+	struct mana_ib_create_qp_resp resp = {};
+	struct gdma_dev *gd = mdev->gdma_dev;
+	struct mana_ib_create_qp ucmd = {};
+	struct mana_obj_spec wq_spec = {};
+	struct mana_obj_spec cq_spec = {};
+	struct mana_port_context *mpc;
+	struct mana_context *mc;
+	struct net_device *ndev;
+	struct ib_umem *umem;
+	int err;
+	u32 port;
+
+	mc = gd->driver_data;
+
+	if (!mana_ucontext || udata->inlen < sizeof(ucmd))
+		return -EINVAL;
+
+	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to copy from udata create qp-raw, %d\n", err);
+		return err;
+	}
+
+	/* IB ports start with 1, MANA Ethernet ports start with 0 */
+	port = ucmd.port;
+	if (ucmd.port > mc->num_ports)
+		return -EINVAL;
+
+	if (attr->cap.max_send_wr > MAX_SEND_BUFFERS_PER_QUEUE) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Requested max_send_wr %d exceeding limit\n",
+			  attr->cap.max_send_wr);
+		return -EINVAL;
+	}
+
+	if (attr->cap.max_send_sge > MAX_TX_WQE_SGL_ENTRIES) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Requested max_send_sge %d exceeding limit\n",
+			  attr->cap.max_send_sge);
+		return -EINVAL;
+	}
+
+	ndev = mc->ports[port - 1];
+	mpc = netdev_priv(ndev);
+	ibdev_dbg(&mdev->ib_dev, "port %u ndev %p mpc %p\n", port, ndev, mpc);
+
+	err = mana_ib_cfg_vport(mdev, port - 1, pd, mana_ucontext->doorbell);
+	if (err)
+		return -ENODEV;
+
+	qp->port = port;
+
+	ibdev_dbg(&mdev->ib_dev, "ucmd sq_buf_addr 0x%llx port %u\n",
+		  ucmd.sq_buf_addr, ucmd.port);
+
+	umem = ib_umem_get(ibpd->device, ucmd.sq_buf_addr, ucmd.sq_buf_size,
+			   IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(umem)) {
+		err = PTR_ERR(umem);
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to get umem for create qp-raw, err %d\n",
+			  err);
+		goto err_free_vport;
+	}
+	qp->sq_umem = umem;
+
+	err = mana_ib_gd_create_dma_region(mdev, qp->sq_umem,
+					   &qp->sq_gdma_region);
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to create dma region for create qp-raw, %d\n",
+			  err);
+		goto err_release_umem;
+	}
+
+	ibdev_dbg(&mdev->ib_dev,
+		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+		  err, qp->sq_gdma_region);
+
+	/* Create a WQ on the same port handle used by the Ethernet */
+	wq_spec.gdma_region = qp->sq_gdma_region;
+	wq_spec.queue_size = ucmd.sq_buf_size;
+
+	cq_spec.gdma_region = send_cq->gdma_region;
+	cq_spec.queue_size = send_cq->cqe * COMP_ENTRY_SIZE;
+	cq_spec.modr_ctx_id = 0;
+	cq_spec.attached_eq = GDMA_CQ_NO_EQ;
+
+	err = mana_create_wq_obj(mpc, mpc->port_handle, GDMA_SQ, &wq_spec,
+				 &cq_spec, &qp->tx_object);
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to create wq for create raw-qp, err %d\n",
+			  err);
+		goto err_destroy_dma_region;
+	}
+
+	/* The GDMA regions are now owned by the WQ object */
+	qp->sq_gdma_region = GDMA_INVALID_DMA_REGION;
+	send_cq->gdma_region = GDMA_INVALID_DMA_REGION;
+
+	qp->sq_id = wq_spec.queue_index;
+	send_cq->id = cq_spec.queue_index;
+
+	ibdev_dbg(&mdev->ib_dev,
+		  "ret %d qp->tx_object 0x%llx sq id %llu cq id %llu\n", err,
+		  qp->tx_object, qp->sq_id, send_cq->id);
+
+	resp.sqid = qp->sq_id;
+	resp.cqid = send_cq->id;
+	resp.tx_vp_offset = pd->tx_vp_offset;
+
+	err = ib_copy_to_udata(udata, &resp, sizeof(resp));
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed copy udata for create qp-raw, %d\n",
+			  err);
+		goto err_destroy_wq_obj;
+	}
+
+	return 0;
+
+err_destroy_wq_obj:
+	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+err_destroy_dma_region:
+	mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+
+err_release_umem:
+	ib_umem_release(umem);
+
+err_free_vport:
+	mana_ib_uncfg_vport(mdev, pd, port - 1);
+
+	return err;
+}
+
+int mana_ib_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attr,
+		      struct ib_udata *udata)
+{
+	switch (attr->qp_type) {
+	case IB_QPT_RAW_PACKET:
+		/* When rwq_ind_tbl is used, it's for creating WQs for RSS */
+		if (attr->rwq_ind_tbl)
+			return mana_ib_create_qp_rss(ibqp, ibqp->pd, attr,
+						     udata);
+
+		return mana_ib_create_qp_raw(ibqp, ibqp->pd, attr, udata);
+	default:
+		/* Creating QP other than IB_QPT_RAW_PACKET is not supported */
+		ibdev_dbg(ibqp->device, "Creating QP type %u not supported\n",
+			  attr->qp_type);
+	}
+
+	return -EINVAL;
+}
+
+int mana_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
+		      int attr_mask, struct ib_udata *udata)
+{
+	/* modify_qp is not supported by this version of the driver */
+	return -EOPNOTSUPP;
+}
+
+static int mana_ib_destroy_qp_rss(struct mana_ib_qp *qp,
+				  struct ib_rwq_ind_table *ind_tbl,
+				  struct ib_udata *udata)
+{
+	struct mana_ib_dev *mdev =
+		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+	struct gdma_dev *gd = mdev->gdma_dev;
+	struct mana_port_context *mpc;
+	struct mana_context *mc;
+	struct net_device *ndev;
+	struct mana_ib_wq *wq;
+	struct ib_wq *ibwq;
+	int i;
+
+	mc = gd->driver_data;
+	ndev = mc->ports[qp->port - 1];
+	mpc = netdev_priv(ndev);
+
+	for (i = 0; i < (1 << ind_tbl->log_ind_tbl_size); i++) {
+		ibwq = ind_tbl->ind_tbl[i];
+		wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+		ibdev_dbg(&mdev->ib_dev, "destroying wq->rx_object %llu\n",
+			  wq->rx_object);
+		mana_destroy_wq_obj(mpc, GDMA_RQ, wq->rx_object);
+	}
+
+	return 0;
+}
+
+static int mana_ib_destroy_qp_raw(struct mana_ib_qp *qp, struct ib_udata *udata)
+{
+	struct mana_ib_dev *mdev =
+		container_of(qp->ibqp.device, struct mana_ib_dev, ib_dev);
+	struct gdma_dev *gd = mdev->gdma_dev;
+	struct ib_pd *ibpd = qp->ibqp.pd;
+	struct mana_port_context *mpc;
+	struct mana_context *mc;
+	struct net_device *ndev;
+	struct mana_ib_pd *pd;
+
+	mc = gd->driver_data;
+	ndev = mc->ports[qp->port - 1];
+	mpc = netdev_priv(ndev);
+	pd = container_of(ibpd, struct mana_ib_pd, ibpd);
+
+	mana_destroy_wq_obj(mpc, GDMA_SQ, qp->tx_object);
+
+	if (qp->sq_umem) {
+		mana_ib_gd_destroy_dma_region(mdev, qp->sq_gdma_region);
+		ib_umem_release(qp->sq_umem);
+	}
+
+	mana_ib_uncfg_vport(mdev, pd, qp->port - 1);
+
+	return 0;
+}
+
+int mana_ib_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+{
+	struct mana_ib_qp *qp = container_of(ibqp, struct mana_ib_qp, ibqp);
+
+	switch (ibqp->qp_type) {
+	case IB_QPT_RAW_PACKET:
+		if (ibqp->rwq_ind_tbl)
+			return mana_ib_destroy_qp_rss(qp, ibqp->rwq_ind_tbl,
+						      udata);
+
+		return mana_ib_destroy_qp_raw(qp, udata);
+
+	default:
+		ibdev_dbg(ibqp->device, "Unexpected QP type %u\n",
+			  ibqp->qp_type);
+	}
+
+	return -ENOENT;
+}
diff --git a/drivers/infiniband/hw/mana/wq.c b/drivers/infiniband/hw/mana/wq.c
new file mode 100644
index 0000000000000..372d361510e0c
--- /dev/null
+++ b/drivers/infiniband/hw/mana/wq.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#include "mana_ib.h"
+
+struct ib_wq *mana_ib_create_wq(struct ib_pd *pd,
+				struct ib_wq_init_attr *init_attr,
+				struct ib_udata *udata)
+{
+	struct mana_ib_dev *mdev =
+		container_of(pd->device, struct mana_ib_dev, ib_dev);
+	struct mana_ib_create_wq ucmd = {};
+	struct mana_ib_wq *wq;
+	struct ib_umem *umem;
+	int err;
+
+	if (udata->inlen < sizeof(ucmd))
+		return ERR_PTR(-EINVAL);
+
+	err = ib_copy_from_udata(&ucmd, udata, min(sizeof(ucmd), udata->inlen));
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to copy from udata for create wq, %d\n", err);
+		return ERR_PTR(err);
+	}
+
+	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
+	if (!wq)
+		return ERR_PTR(-ENOMEM);
+
+	ibdev_dbg(&mdev->ib_dev, "ucmd wq_buf_addr 0x%llx\n", ucmd.wq_buf_addr);
+
+	umem = ib_umem_get(pd->device, ucmd.wq_buf_addr, ucmd.wq_buf_size,
+			   IB_ACCESS_LOCAL_WRITE);
+	if (IS_ERR(umem)) {
+		err = PTR_ERR(umem);
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to get umem for create wq, err %d\n", err);
+		goto err_free_wq;
+	}
+
+	wq->umem = umem;
+	wq->wqe = init_attr->max_wr;
+	wq->wq_buf_size = ucmd.wq_buf_size;
+	wq->rx_object = INVALID_MANA_HANDLE;
+
+	err = mana_ib_gd_create_dma_region(mdev, wq->umem, &wq->gdma_region);
+	if (err) {
+		ibdev_dbg(&mdev->ib_dev,
+			  "Failed to create dma region for create wq, %d\n",
+			  err);
+		goto err_release_umem;
+	}
+
+	ibdev_dbg(&mdev->ib_dev,
+		  "mana_ib_gd_create_dma_region ret %d gdma_region 0x%llx\n",
+		  err, wq->gdma_region);
+
+	/* WQ ID is returned at wq_create time, doesn't know the value yet */
+
+	return &wq->ibwq;
+
+err_release_umem:
+	ib_umem_release(umem);
+
+err_free_wq:
+	kfree(wq);
+
+	return ERR_PTR(err);
+}
+
+int mana_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
+		      u32 wq_attr_mask, struct ib_udata *udata)
+{
+	/* modify_wq is not supported by this version of the driver */
+	return -EOPNOTSUPP;
+}
+
+int mana_ib_destroy_wq(struct ib_wq *ibwq, struct ib_udata *udata)
+{
+	struct mana_ib_wq *wq = container_of(ibwq, struct mana_ib_wq, ibwq);
+	struct ib_device *ib_dev = ibwq->device;
+	struct mana_ib_dev *mdev;
+
+	mdev = container_of(ib_dev, struct mana_ib_dev, ib_dev);
+
+	mana_ib_gd_destroy_dma_region(mdev, wq->gdma_region);
+	ib_umem_release(wq->umem);
+
+	kfree(wq);
+
+	return 0;
+}
+
+int mana_ib_create_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_table,
+				 struct ib_rwq_ind_table_init_attr *init_attr,
+				 struct ib_udata *udata)
+{
+	/*
+	 * There is no additional data in ind_table to be maintained by this
+	 * driver, do nothing
+	 */
+	return 0;
+}
+
+int mana_ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *ib_rwq_ind_tbl)
+{
+	/*
+	 * There is no additional data in ind_table to be maintained by this
+	 * driver, do nothing
+	 */
+	return 0;
+}
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 713a8f8cca9a7..20212ffeefb9e 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -412,6 +412,9 @@ int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
 
 extern const struct ethtool_ops mana_ethtool_ops;
 
+/* A CQ can be created not associated with any EQ */
+#define GDMA_CQ_NO_EQ  0xffff
+
 struct mana_obj_spec {
 	u32 queue_index;
 	u64 gdma_region;
diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index 7dd56210226f5..e0c25537fd2e9 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -251,6 +251,7 @@ enum rdma_driver_id {
 	RDMA_DRIVER_EFA,
 	RDMA_DRIVER_SIW,
 	RDMA_DRIVER_ERDMA,
+	RDMA_DRIVER_MANA,
 };
 
 enum ib_uverbs_gid_type {
diff --git a/include/uapi/rdma/mana-abi.h b/include/uapi/rdma/mana-abi.h
new file mode 100644
index 0000000000000..5fcb31b37fb91
--- /dev/null
+++ b/include/uapi/rdma/mana-abi.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) */
+/*
+ * Copyright (c) 2022, Microsoft Corporation. All rights reserved.
+ */
+
+#ifndef MANA_ABI_USER_H
+#define MANA_ABI_USER_H
+
+#include <linux/types.h>
+#include <rdma/ib_user_ioctl_verbs.h>
+
+/*
+ * Increment this value if any changes that break userspace ABI
+ * compatibility are made.
+ */
+
+#define MANA_IB_UVERBS_ABI_VERSION 1
+
+struct mana_ib_create_cq {
+	__aligned_u64 buf_addr;
+};
+
+struct mana_ib_create_qp {
+	__aligned_u64 sq_buf_addr;
+	__u32 sq_buf_size;
+	__u32 port;
+};
+
+struct mana_ib_create_qp_resp {
+	__u32 sqid;
+	__u32 cqid;
+	__u32 tx_vp_offset;
+	__u32 reserved;
+};
+
+struct mana_ib_create_wq {
+	__aligned_u64 wq_buf_addr;
+	__u32 wq_buf_size;
+	__u32 reserved;
+};
+
+/* RX Hash function flags */
+enum mana_ib_rx_hash_function_flags {
+	MANA_IB_RX_HASH_FUNC_TOEPLITZ = 1 << 0,
+};
+
+struct mana_ib_create_qp_rss {
+	__aligned_u64 rx_hash_fields_mask;
+	__u8 rx_hash_function;
+	__u8 reserved[7];
+	__u32 rx_hash_key_len;
+	__u8 rx_hash_key[40];
+	__u32 port;
+};
+
+struct rss_resp_entry {
+	__u32 cqid;
+	__u32 wqid;
+};
+
+struct mana_ib_create_qp_rss_resp {
+	__aligned_u64 num_entries;
+	struct rss_resp_entry entries[64];
+};
+
+#endif
-- 
GitLab


From c5e7ee72862eeeee77fd71b99fa9064f830e0b00 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 23 Jul 2021 12:15:59 -0400
Subject: [PATCH 1394/4122] Revert "csky: Fixup CONFIG_DEBUG_RSEQ"

This reverts commit f36e0aab6f1f78d770ce859df3f07a9c5763ce5f.

The csky rseq support has been merged without ever notifying the rseq
maintainers, and without any of the required asssembler glue in the rseq
selftests, which means it is entirely untested.

It is also derived from a non-upstream riscv patch which has known bugs.

The assembly part of this revert should be carefully reviewed by the
architecture maintainer because it touches code which has changed since
the merge of the reverted patch.

The rseq selftests assembly glue should be introduced at the same time
as the architecture rseq support. Without the presence of any test, I
recommend reverting rseq support from csky for now.

Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-csky@vger.kernel.org
---
 arch/csky/kernel/entry.S | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index 547b4cd1b24b4..a4b519fbb371f 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -50,11 +50,15 @@ ENTRY(csky_systemcall)
 	SAVE_ALL TRAP0_SIZE
 	zero_fp
 	context_tracking
+#ifdef CONFIG_RSEQ_DEBUG
+	mov     a0, sp
+	jbsr    rseq_syscall
+#endif
 	psrset  ee, ie
 
 	lrw     r9, __NR_syscalls
 	cmphs   syscallid, r9		/* Check nr of syscall */
-	bt      1f
+	bt      ret_from_exception
 
 	lrw     r9, sys_call_table
 	ixw     r9, syscallid
@@ -80,11 +84,6 @@ ENTRY(csky_systemcall)
 	jsr     syscallid
 #endif
 	stw     a0, (sp, LSAVE_A0)      /* Save return value */
-1:
-#ifdef CONFIG_DEBUG_RSEQ
-	mov	a0, sp
-	jbsr	rseq_syscall
-#endif
 	jmpi    ret_from_exception
 
 csky_syscall_trace:
@@ -113,10 +112,6 @@ csky_syscall_trace:
 	stw	a0, (sp, LSAVE_A0)	/* Save return value */
 
 1:
-#ifdef CONFIG_DEBUG_RSEQ
-	mov	a0, sp
-	jbsr	rseq_syscall
-#endif
 	mov     a0, sp                  /* right now, sp --> pt_regs */
 	jbsr    syscall_trace_exit
 	br	ret_from_exception
-- 
GitLab


From 7e2004906fb52257772be0ef262fba2d5eb1653b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Fri, 23 Jul 2021 12:16:00 -0400
Subject: [PATCH 1395/4122] Revert "csky: Add support for restartable sequence"

This reverts commit 9866d141a0977ace974400bf1f793dfc163409ce.

The csky rseq support has been merged without ever notifying the rseq
maintainers, and without any of the required asssembler glue in the rseq
selftests, which means it is entirely untested.

It is also derived from a non-upstream riscv patch which has known bugs.

The assembly part of this revert should be carefully reviewed by the
architecture maintainer because it touches code which has changed since
the merge of the reverted patch.

The rseq selftests assembly glue should be introduced at the same time
as the architecture rseq support. Without the presence of any test, I
recommend reverting rseq support from csky for now.

Link: https://lore.kernel.org/lkml/1257037909.25426.1626705790861.JavaMail.zimbra@efficios.com/
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-csky@vger.kernel.org
---
 arch/csky/Kconfig         | 1 -
 arch/csky/kernel/entry.S  | 4 ----
 arch/csky/kernel/signal.c | 2 --
 3 files changed, 7 deletions(-)

diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index e0ecd1cc81a94..dba02da6fa344 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -94,7 +94,6 @@ config CSKY
 	select HAVE_PERF_USER_STACK_DUMP
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_REGS_AND_STACK_ACCESS_API
-	select HAVE_RSEQ
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select MAY_HAVE_SPARSE_IRQ
diff --git a/arch/csky/kernel/entry.S b/arch/csky/kernel/entry.S
index a4b519fbb371f..c68cdcc76d60e 100644
--- a/arch/csky/kernel/entry.S
+++ b/arch/csky/kernel/entry.S
@@ -50,10 +50,6 @@ ENTRY(csky_systemcall)
 	SAVE_ALL TRAP0_SIZE
 	zero_fp
 	context_tracking
-#ifdef CONFIG_RSEQ_DEBUG
-	mov     a0, sp
-	jbsr    rseq_syscall
-#endif
 	psrset  ee, ie
 
 	lrw     r9, __NR_syscalls
diff --git a/arch/csky/kernel/signal.c b/arch/csky/kernel/signal.c
index b7b3685283d76..10da0fefd4319 100644
--- a/arch/csky/kernel/signal.c
+++ b/arch/csky/kernel/signal.c
@@ -179,8 +179,6 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
-	rseq_signal_deliver(ksig, regs);
-
 	/* Are we from a system call? */
 	if (in_syscall(regs)) {
 		/* Avoid additional syscall restarting via ret_from_exception */
-- 
GitLab


From 61c581a46a9668747d355436bd4b2505594539bd Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 3 Nov 2022 20:22:57 +0100
Subject: [PATCH 1396/4122] crypto: move gf128mul library into lib/crypto

The gf128mul library does not depend on the crypto API at all, so it can
be moved into lib/crypto. This will allow us to use it in other library
code in a subsequent patch without having to depend on CONFIG_CRYPTO.

While at it, change the Kconfig symbol name to align with other crypto
library implementations. However, the source file name is retained, as
it is reflected in the module .ko filename, and changing this might
break things for users.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/Kconfig           | 2 +-
 arch/arm64/crypto/Kconfig         | 2 +-
 crypto/Kconfig                    | 9 +++------
 crypto/Makefile                   | 1 -
 drivers/crypto/chelsio/Kconfig    | 2 +-
 lib/crypto/Kconfig                | 3 +++
 lib/crypto/Makefile               | 2 ++
 {crypto => lib/crypto}/gf128mul.c | 0
 8 files changed, 11 insertions(+), 10 deletions(-)
 rename {crypto => lib/crypto}/gf128mul.c (100%)

diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index 3858c4d4cb988..7b2b7d043d9be 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -18,7 +18,7 @@ config CRYPTO_GHASH_ARM_CE
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_CRYPTD
-	select CRYPTO_GF128MUL
+	select CRYPTO_LIB_GF128MUL
 	help
 	  GCM GHASH function (NIST SP800-38D)
 
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index 6793d5bc3ee53..6d06b448a66e0 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -6,8 +6,8 @@ config CRYPTO_GHASH_ARM64_CE
 	tristate "Hash functions: GHASH (ARMv8 Crypto Extensions)"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
-	select CRYPTO_GF128MUL
 	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_GF128MUL
 	select CRYPTO_AEAD
 	help
 	  GCM GHASH function (NIST SP800-38D)
diff --git a/crypto/Kconfig b/crypto/Kconfig
index d779667671b23..9c86f70451576 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -175,9 +175,6 @@ config CRYPTO_MANAGER_EXTRA_TESTS
 	  This is intended for developer use only, as these tests take much
 	  longer to run than the normal self tests.
 
-config CRYPTO_GF128MUL
-	tristate
-
 config CRYPTO_NULL
 	tristate "Null algorithms"
 	select CRYPTO_NULL2
@@ -714,9 +711,9 @@ config CRYPTO_KEYWRAP
 
 config CRYPTO_LRW
 	tristate "LRW (Liskov Rivest Wagner)"
+	select CRYPTO_LIB_GF128MUL
 	select CRYPTO_SKCIPHER
 	select CRYPTO_MANAGER
-	select CRYPTO_GF128MUL
 	select CRYPTO_ECB
 	help
 	  LRW (Liskov Rivest Wagner) mode
@@ -926,8 +923,8 @@ config CRYPTO_CMAC
 
 config CRYPTO_GHASH
 	tristate "GHASH"
-	select CRYPTO_GF128MUL
 	select CRYPTO_HASH
+	select CRYPTO_LIB_GF128MUL
 	help
 	  GCM GHASH function (NIST SP800-38D)
 
@@ -967,8 +964,8 @@ config CRYPTO_MICHAEL_MIC
 
 config CRYPTO_POLYVAL
 	tristate
-	select CRYPTO_GF128MUL
 	select CRYPTO_HASH
+	select CRYPTO_LIB_GF128MUL
 	help
 	  POLYVAL hash function for HCTR2
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 303b21c43df05..d0126c915834b 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -85,7 +85,6 @@ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
 CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
 obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
 CFLAGS_blake2b_generic.o := -Wframe-larger-than=4096 #  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105930
-obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
 obj-$(CONFIG_CRYPTO_ECB) += ecb.o
 obj-$(CONFIG_CRYPTO_CBC) += cbc.o
 obj-$(CONFIG_CRYPTO_CFB) += cfb.o
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index f886401af13e7..5dd3f6a4781a2 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -3,11 +3,11 @@ config CRYPTO_DEV_CHELSIO
 	tristate "Chelsio Crypto Co-processor Driver"
 	depends on CHELSIO_T4
 	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_GF128MUL
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
 	select CRYPTO_AUTHENC
-	select CRYPTO_GF128MUL
 	help
 	  The Chelsio Crypto Co-processor driver for T6 adapters.
 
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 7e9683e9f5c63..6767d86959de8 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -11,6 +11,9 @@ config CRYPTO_LIB_AES
 config CRYPTO_LIB_ARC4
 	tristate
 
+config CRYPTO_LIB_GF128MUL
+	tristate
+
 config CRYPTO_ARCH_HAVE_LIB_BLAKE2S
 	bool
 	help
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index c852f067ab060..7000eeb72286e 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -13,6 +13,8 @@ libaes-y					:= aes.o
 obj-$(CONFIG_CRYPTO_LIB_ARC4)			+= libarc4.o
 libarc4-y					:= arc4.o
 
+obj-$(CONFIG_CRYPTO_LIB_GF128MUL)		+= gf128mul.o
+
 # blake2s is used by the /dev/random driver which is always builtin
 obj-y						+= libblake2s.o
 libblake2s-y					:= blake2s.o
diff --git a/crypto/gf128mul.c b/lib/crypto/gf128mul.c
similarity index 100%
rename from crypto/gf128mul.c
rename to lib/crypto/gf128mul.c
-- 
GitLab


From b67ce439fef69a1a339cf2743c8198e8d90e6821 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 3 Nov 2022 20:22:58 +0100
Subject: [PATCH 1397/4122] crypto: lib/gf128mul - make gf128mul_lle time
 invariant

The gf128mul library has different variants with different
memory/performance tradeoffs, where the faster ones use 4k or 64k lookup
tables precomputed at runtime, which are based on one of the
multiplication factors, which is commonly the key for keyed hash
algorithms such as GHASH.

The slowest variant is gf128_mul_lle() [and its bbe/ble counterparts],
which does not use precomputed lookup tables, but it still relies on a
single u16[256] lookup table which is input independent. The use of such
a table may cause the execution time of gf128_mul_lle() to correlate
with the value of the inputs, which is generally something that must be
avoided for cryptographic algorithms. On top of that, the function uses
a sequence of if () statements that conditionally invoke be128_xor()
based on which bits are set in the second argument of the function,
which is usually a pointer to the multiplication factor that represents
the key.

In order to remove the correlation between the execution time of
gf128_mul_lle() and the value of its inputs, let's address the
identified shortcomings:
- add a time invariant version of gf128mul_x8_lle() that replaces the
  table lookup with the expression that is used at compile time to
  populate the lookup table;
- make the invocations of be128_xor() unconditional, but pass a zero
  vector as the third argument if the associated bit in the key is
  cleared.

The resulting code is likely to be significantly slower. However, given
that this is the slowest version already, making it even slower in order
to make it more secure is assumed to be justified.

The bbe and ble counterparts could receive the same treatment, but the
former is never used anywhere in the kernel, and the latter is only
used in the driver for a asynchronous crypto h/w accelerator (Chelsio),
where timing variances are unlikely to matter.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crypto/gf128mul.c | 58 +++++++++++++++++++++++++++++--------------
 1 file changed, 39 insertions(+), 19 deletions(-)

diff --git a/lib/crypto/gf128mul.c b/lib/crypto/gf128mul.c
index a69ae3e6c16cb..8f8c45e0cdcf2 100644
--- a/lib/crypto/gf128mul.c
+++ b/lib/crypto/gf128mul.c
@@ -146,6 +146,17 @@ static void gf128mul_x8_lle(be128 *x)
 	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
 }
 
+/* time invariant version of gf128mul_x8_lle */
+static void gf128mul_x8_lle_ti(be128 *x)
+{
+	u64 a = be64_to_cpu(x->a);
+	u64 b = be64_to_cpu(x->b);
+	u64 _tt = xda_le(b & 0xff); /* avoid table lookup */
+
+	x->b = cpu_to_be64((b >> 8) | (a << 56));
+	x->a = cpu_to_be64((a >> 8) ^ (_tt << 48));
+}
+
 static void gf128mul_x8_bbe(be128 *x)
 {
 	u64 a = be64_to_cpu(x->a);
@@ -169,38 +180,47 @@ EXPORT_SYMBOL(gf128mul_x8_ble);
 
 void gf128mul_lle(be128 *r, const be128 *b)
 {
-	be128 p[8];
+	/*
+	 * The p array should be aligned to twice the size of its element type,
+	 * so that every even/odd pair is guaranteed to share a cacheline
+	 * (assuming a cacheline size of 32 bytes or more, which is by far the
+	 * most common). This ensures that each be128_xor() call in the loop
+	 * takes the same amount of time regardless of the value of 'ch', which
+	 * is derived from function parameter 'b', which is commonly used as a
+	 * key, e.g., for GHASH. The odd array elements are all set to zero,
+	 * making each be128_xor() a NOP if its associated bit in 'ch' is not
+	 * set, and this is equivalent to calling be128_xor() conditionally.
+	 * This approach aims to avoid leaking information about such keys
+	 * through execution time variances.
+	 *
+	 * Unfortunately, __aligned(16) or higher does not work on x86 for
+	 * variables on the stack so we need to perform the alignment by hand.
+	 */
+	be128 array[16 + 3] = {};
+	be128 *p = PTR_ALIGN(&array[0], 2 * sizeof(be128));
 	int i;
 
 	p[0] = *r;
 	for (i = 0; i < 7; ++i)
-		gf128mul_x_lle(&p[i + 1], &p[i]);
+		gf128mul_x_lle(&p[2 * i + 2], &p[2 * i]);
 
 	memset(r, 0, sizeof(*r));
 	for (i = 0;;) {
 		u8 ch = ((u8 *)b)[15 - i];
 
-		if (ch & 0x80)
-			be128_xor(r, r, &p[0]);
-		if (ch & 0x40)
-			be128_xor(r, r, &p[1]);
-		if (ch & 0x20)
-			be128_xor(r, r, &p[2]);
-		if (ch & 0x10)
-			be128_xor(r, r, &p[3]);
-		if (ch & 0x08)
-			be128_xor(r, r, &p[4]);
-		if (ch & 0x04)
-			be128_xor(r, r, &p[5]);
-		if (ch & 0x02)
-			be128_xor(r, r, &p[6]);
-		if (ch & 0x01)
-			be128_xor(r, r, &p[7]);
+		be128_xor(r, r, &p[ 0 + !(ch & 0x80)]);
+		be128_xor(r, r, &p[ 2 + !(ch & 0x40)]);
+		be128_xor(r, r, &p[ 4 + !(ch & 0x20)]);
+		be128_xor(r, r, &p[ 6 + !(ch & 0x10)]);
+		be128_xor(r, r, &p[ 8 + !(ch & 0x08)]);
+		be128_xor(r, r, &p[10 + !(ch & 0x04)]);
+		be128_xor(r, r, &p[12 + !(ch & 0x02)]);
+		be128_xor(r, r, &p[14 + !(ch & 0x01)]);
 
 		if (++i >= 16)
 			break;
 
-		gf128mul_x8_lle(r);
+		gf128mul_x8_lle_ti(r); /* use the time invariant version */
 	}
 }
 EXPORT_SYMBOL(gf128mul_lle);
-- 
GitLab


From 520af5da664a8edc4f4c1cd8e6e8488ecccdb7e5 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Thu, 3 Nov 2022 20:22:59 +0100
Subject: [PATCH 1398/4122] crypto: lib/aesgcm - Provide minimal library
 implementation

Implement a minimal library version of AES-GCM based on the existing
library implementations of AES and multiplication in GF(2^128). Using
these primitives, GCM can be implemented in a straight-forward manner.

GCM has a couple of sharp edges, i.e., the amount of input data
processed with the same initialization vector (IV) should be capped to
protect the counter from 32-bit rollover (or carry), and the size of the
authentication tag should be fixed for a given key. [0]

The former concern is addressed trivially, given that the function call
API uses 32-bit signed types for the input lengths. It is still up to
the caller to avoid IV reuse in general, but this is not something we
can police at the implementation level.

As for the latter concern, let's make the authentication tag size part
of the key schedule, and only permit it to be configured as part of the
key expansion routine.

Note that table based AES implementations are susceptible to known
plaintext timing attacks on the encryption key. The AES library already
attempts to mitigate this to some extent, but given that the counter
mode encryption used by GCM operates exclusively on known plaintext by
construction (the IV and therefore the initial counter value are known
to an attacker), let's take some extra care to mitigate this, by calling
the AES library with interrupts disabled.

[0] https://nvlpubs.nist.gov/nistpubs/legacy/sp/nistspecialpublication800-38d.pdf

Link: https://lore.kernel.org/all/c6fb9b25-a4b6-2e4a-2dd1-63adda055a49@amd.com/
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Nikunj A Dadhania <nikunj@amd.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/gcm.h |  22 ++
 lib/crypto/Kconfig   |   6 +
 lib/crypto/Makefile  |   3 +
 lib/crypto/aesgcm.c  | 727 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 758 insertions(+)
 create mode 100644 lib/crypto/aesgcm.c

diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h
index 9d7eff04f2244..fd9df607a8364 100644
--- a/include/crypto/gcm.h
+++ b/include/crypto/gcm.h
@@ -3,6 +3,9 @@
 
 #include <linux/errno.h>
 
+#include <crypto/aes.h>
+#include <crypto/gf128mul.h>
+
 #define GCM_AES_IV_SIZE 12
 #define GCM_RFC4106_IV_SIZE 8
 #define GCM_RFC4543_IV_SIZE 8
@@ -60,4 +63,23 @@ static inline int crypto_ipsec_check_assoclen(unsigned int assoclen)
 
 	return 0;
 }
+
+struct aesgcm_ctx {
+	be128			ghash_key;
+	struct crypto_aes_ctx	aes_ctx;
+	unsigned int		authsize;
+};
+
+int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
+		     unsigned int keysize, unsigned int authsize);
+
+void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+		    int crypt_len, const u8 *assoc, int assoc_len,
+		    const u8 iv[GCM_AES_IV_SIZE], u8 *authtag);
+
+bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst,
+				 const u8 *src, int crypt_len, const u8 *assoc,
+				 int assoc_len, const u8 iv[GCM_AES_IV_SIZE],
+				 const u8 *authtag);
+
 #endif
diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig
index 6767d86959de8..45436bfc6dffe 100644
--- a/lib/crypto/Kconfig
+++ b/lib/crypto/Kconfig
@@ -8,6 +8,12 @@ config CRYPTO_LIB_UTILS
 config CRYPTO_LIB_AES
 	tristate
 
+config CRYPTO_LIB_AESGCM
+	tristate
+	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_GF128MUL
+	select CRYPTO_LIB_UTILS
+
 config CRYPTO_LIB_ARC4
 	tristate
 
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 7000eeb72286e..6ec2d4543d9ca 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -10,6 +10,9 @@ obj-$(CONFIG_CRYPTO_LIB_CHACHA_GENERIC)		+= libchacha.o
 obj-$(CONFIG_CRYPTO_LIB_AES)			+= libaes.o
 libaes-y					:= aes.o
 
+obj-$(CONFIG_CRYPTO_LIB_AESGCM)			+= libaesgcm.o
+libaesgcm-y					:= aesgcm.o
+
 obj-$(CONFIG_CRYPTO_LIB_ARC4)			+= libarc4.o
 libarc4-y					:= arc4.o
 
diff --git a/lib/crypto/aesgcm.c b/lib/crypto/aesgcm.c
new file mode 100644
index 0000000000000..c632d6e17af87
--- /dev/null
+++ b/lib/crypto/aesgcm.c
@@ -0,0 +1,727 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Minimal library implementation of GCM
+ *
+ * Copyright 2022 Google LLC
+ */
+
+#include <linux/module.h>
+
+#include <crypto/algapi.h>
+#include <crypto/gcm.h>
+#include <crypto/ghash.h>
+
+#include <asm/irqflags.h>
+
+static void aesgcm_encrypt_block(const struct crypto_aes_ctx *ctx, void *dst,
+				 const void *src)
+{
+	unsigned long flags;
+
+	/*
+	 * In AES-GCM, both the GHASH key derivation and the CTR mode
+	 * encryption operate on known plaintext, making them susceptible to
+	 * timing attacks on the encryption key. The AES library already
+	 * mitigates this risk to some extent by pulling the entire S-box into
+	 * the caches before doing any substitutions, but this strategy is more
+	 * effective when running with interrupts disabled.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(ctx, dst, src);
+	local_irq_restore(flags);
+}
+
+/**
+ * aesgcm_expandkey - Expands the AES and GHASH keys for the AES-GCM key
+ *		      schedule
+ *
+ * @ctx:	The data structure that will hold the AES-GCM key schedule
+ * @key:	The AES encryption input key
+ * @keysize:	The length in bytes of the input key
+ * @authsize:	The size in bytes of the GCM authentication tag
+ *
+ * Returns: 0 on success, or -EINVAL if @keysize or @authsize contain values
+ * that are not permitted by the GCM specification.
+ */
+int aesgcm_expandkey(struct aesgcm_ctx *ctx, const u8 *key,
+		     unsigned int keysize, unsigned int authsize)
+{
+	u8 kin[AES_BLOCK_SIZE] = {};
+	int ret;
+
+	ret = crypto_gcm_check_authsize(authsize) ?:
+	      aes_expandkey(&ctx->aes_ctx, key, keysize);
+	if (ret)
+		return ret;
+
+	ctx->authsize = authsize;
+	aesgcm_encrypt_block(&ctx->aes_ctx, &ctx->ghash_key, kin);
+
+	return 0;
+}
+EXPORT_SYMBOL(aesgcm_expandkey);
+
+static void aesgcm_ghash(be128 *ghash, const be128 *key, const void *src,
+			 int len)
+{
+	while (len > 0) {
+		crypto_xor((u8 *)ghash, src, min(len, GHASH_BLOCK_SIZE));
+		gf128mul_lle(ghash, key);
+
+		src += GHASH_BLOCK_SIZE;
+		len -= GHASH_BLOCK_SIZE;
+	}
+}
+
+static void aesgcm_mac(const struct aesgcm_ctx *ctx, const u8 *src, int src_len,
+		       const u8 *assoc, int assoc_len, __be32 *ctr, u8 *authtag)
+{
+	be128 tail = { cpu_to_be64(assoc_len * 8), cpu_to_be64(src_len * 8) };
+	u8 buf[AES_BLOCK_SIZE];
+	be128 ghash = {};
+
+	aesgcm_ghash(&ghash, &ctx->ghash_key, assoc, assoc_len);
+	aesgcm_ghash(&ghash, &ctx->ghash_key, src, src_len);
+	aesgcm_ghash(&ghash, &ctx->ghash_key, &tail, sizeof(tail));
+
+	ctr[3] = cpu_to_be32(1);
+	aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+	crypto_xor_cpy(authtag, buf, (u8 *)&ghash, ctx->authsize);
+
+	memzero_explicit(&ghash, sizeof(ghash));
+	memzero_explicit(buf, sizeof(buf));
+}
+
+static void aesgcm_crypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+			 int len, __be32 *ctr)
+{
+	u8 buf[AES_BLOCK_SIZE];
+	unsigned int n = 2;
+
+	while (len > 0) {
+		/*
+		 * The counter increment below must not result in overflow or
+		 * carry into the next 32-bit word, as this could result in
+		 * inadvertent IV reuse, which must be avoided at all cost for
+		 * stream ciphers such as AES-CTR. Given the range of 'int
+		 * len', this cannot happen, so no explicit test is necessary.
+		 */
+		ctr[3] = cpu_to_be32(n++);
+		aesgcm_encrypt_block(&ctx->aes_ctx, buf, ctr);
+		crypto_xor_cpy(dst, src, buf, min(len, AES_BLOCK_SIZE));
+
+		dst += AES_BLOCK_SIZE;
+		src += AES_BLOCK_SIZE;
+		len -= AES_BLOCK_SIZE;
+	}
+	memzero_explicit(buf, sizeof(buf));
+}
+
+/**
+ * aesgcm_encrypt - Perform AES-GCM encryption on a block of data
+ *
+ * @ctx:	The AES-GCM key schedule
+ * @dst:	Pointer to the ciphertext output buffer
+ * @src:	Pointer the plaintext (may equal @dst for encryption in place)
+ * @crypt_len:	The size in bytes of the plaintext and ciphertext.
+ * @assoc:	Pointer to the associated data,
+ * @assoc_len:	The size in bytes of the associated data
+ * @iv:		The initialization vector (IV) to use for this block of data
+ *		(must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag:	The address of the buffer in memory where the authentication
+ *		tag should be stored. The buffer is assumed to have space for
+ *		@ctx->authsize bytes.
+ */
+void aesgcm_encrypt(const struct aesgcm_ctx *ctx, u8 *dst, const u8 *src,
+		    int crypt_len, const u8 *assoc, int assoc_len,
+		    const u8 iv[GCM_AES_IV_SIZE], u8 *authtag)
+{
+	__be32 ctr[4];
+
+	memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+	aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+	aesgcm_mac(ctx, dst, crypt_len, assoc, assoc_len, ctr, authtag);
+}
+EXPORT_SYMBOL(aesgcm_encrypt);
+
+/**
+ * aesgcm_decrypt - Perform AES-GCM decryption on a block of data
+ *
+ * @ctx:	The AES-GCM key schedule
+ * @dst:	Pointer to the plaintext output buffer
+ * @src:	Pointer the ciphertext (may equal @dst for decryption in place)
+ * @crypt_len:	The size in bytes of the plaintext and ciphertext.
+ * @assoc:	Pointer to the associated data,
+ * @assoc_len:	The size in bytes of the associated data
+ * @iv:		The initialization vector (IV) to use for this block of data
+ *		(must be 12 bytes in size as per the GCM spec recommendation)
+ * @authtag:	The address of the buffer in memory where the authentication
+ *		tag is stored.
+ *
+ * Returns: true on success, or false if the ciphertext failed authentication.
+ * On failure, no plaintext will be returned.
+ */
+bool __must_check aesgcm_decrypt(const struct aesgcm_ctx *ctx, u8 *dst,
+				 const u8 *src, int crypt_len, const u8 *assoc,
+				 int assoc_len, const u8 iv[GCM_AES_IV_SIZE],
+				 const u8 *authtag)
+{
+	u8 tagbuf[AES_BLOCK_SIZE];
+	__be32 ctr[4];
+
+	memcpy(ctr, iv, GCM_AES_IV_SIZE);
+
+	aesgcm_mac(ctx, src, crypt_len, assoc, assoc_len, ctr, tagbuf);
+	if (crypto_memneq(authtag, tagbuf, ctx->authsize)) {
+		memzero_explicit(tagbuf, sizeof(tagbuf));
+		return false;
+	}
+	aesgcm_crypt(ctx, dst, src, crypt_len, ctr);
+	return true;
+}
+EXPORT_SYMBOL(aesgcm_decrypt);
+
+MODULE_DESCRIPTION("Generic AES-GCM library");
+MODULE_AUTHOR("Ard Biesheuvel <ardb@kernel.org>");
+MODULE_LICENSE("GPL");
+
+#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+
+/*
+ * Test code below. Vectors taken from crypto/testmgr.h
+ */
+
+static const u8 __initconst ctext0[16] =
+	"\x58\xe2\xfc\xce\xfa\x7e\x30\x61"
+	"\x36\x7f\x1d\x57\xa4\xe7\x45\x5a";
+
+static const u8 __initconst ptext1[16];
+
+static const u8 __initconst ctext1[32] =
+	"\x03\x88\xda\xce\x60\xb6\xa3\x92"
+	"\xf3\x28\xc2\xb9\x71\xb2\xfe\x78"
+	"\xab\x6e\x47\xd4\x2c\xec\x13\xbd"
+	"\xf5\x3a\x67\xb2\x12\x57\xbd\xdf";
+
+static const u8 __initconst ptext2[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext2[80] =
+	"\x42\x83\x1e\xc2\x21\x77\x74\x24"
+	"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+	"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+	"\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+	"\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+	"\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+	"\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+	"\x3d\x58\xe0\x91\x47\x3f\x59\x85"
+	"\x4d\x5c\x2a\xf3\x27\xcd\x64\xa6"
+	"\x2c\xf3\x5a\xbd\x2b\xa6\xfa\xb4";
+
+static const u8 __initconst ptext3[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext3[76] =
+	"\x42\x83\x1e\xc2\x21\x77\x74\x24"
+	"\x4b\x72\x21\xb7\x84\xd0\xd4\x9c"
+	"\xe3\xaa\x21\x2f\x2c\x02\xa4\xe0"
+	"\x35\xc1\x7e\x23\x29\xac\xa1\x2e"
+	"\x21\xd5\x14\xb2\x54\x66\x93\x1c"
+	"\x7d\x8f\x6a\x5a\xac\x84\xaa\x05"
+	"\x1b\xa3\x0b\x39\x6a\x0a\xac\x97"
+	"\x3d\x58\xe0\x91"
+	"\x5b\xc9\x4f\xbc\x32\x21\xa5\xdb"
+	"\x94\xfa\xe9\x5a\xe7\x12\x1a\x47";
+
+static const u8 __initconst ctext4[16] =
+	"\xcd\x33\xb2\x8a\xc7\x73\xf7\x4b"
+	"\xa0\x0e\xd1\xf3\x12\x57\x24\x35";
+
+static const u8 __initconst ctext5[32] =
+	"\x98\xe7\x24\x7c\x07\xf0\xfe\x41"
+	"\x1c\x26\x7e\x43\x84\xb0\xf6\x00"
+	"\x2f\xf5\x8d\x80\x03\x39\x27\xab"
+	"\x8e\xf4\xd4\x58\x75\x14\xf0\xfb";
+
+static const u8 __initconst ptext6[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext6[80] =
+	"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+	"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+	"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+	"\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+	"\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+	"\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+	"\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+	"\xcc\xda\x27\x10\xac\xad\xe2\x56"
+	"\x99\x24\xa7\xc8\x58\x73\x36\xbf"
+	"\xb1\x18\x02\x4d\xb8\x67\x4a\x14";
+
+static const u8 __initconst ctext7[16] =
+	"\x53\x0f\x8a\xfb\xc7\x45\x36\xb9"
+	"\xa9\x63\xb4\xf1\xc4\xcb\x73\x8b";
+
+static const u8 __initconst ctext8[32] =
+	"\xce\xa7\x40\x3d\x4d\x60\x6b\x6e"
+	"\x07\x4e\xc5\xd3\xba\xf3\x9d\x18"
+	"\xd0\xd1\xc8\xa7\x99\x99\x6b\xf0"
+	"\x26\x5b\x98\xb5\xd4\x8a\xb9\x19";
+
+static const u8 __initconst ptext9[64] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39\x1a\xaf\xd2\x55";
+
+static const u8 __initconst ctext9[80] =
+	"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+	"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+	"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+	"\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+	"\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+	"\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+	"\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+	"\xbc\xc9\xf6\x62\x89\x80\x15\xad"
+	"\xb0\x94\xda\xc5\xd9\x34\x71\xbd"
+	"\xec\x1a\x50\x22\x70\xe3\xcc\x6c";
+
+static const u8 __initconst ptext10[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext10[76] =
+	"\x52\x2d\xc1\xf0\x99\x56\x7d\x07"
+	"\xf4\x7f\x37\xa3\x2a\x84\x42\x7d"
+	"\x64\x3a\x8c\xdc\xbf\xe5\xc0\xc9"
+	"\x75\x98\xa2\xbd\x25\x55\xd1\xaa"
+	"\x8c\xb0\x8e\x48\x59\x0d\xbb\x3d"
+	"\xa7\xb0\x8b\x10\x56\x82\x88\x38"
+	"\xc5\xf6\x1e\x63\x93\xba\x7a\x0a"
+	"\xbc\xc9\xf6\x62"
+	"\x76\xfc\x6e\xce\x0f\x4e\x17\x68"
+	"\xcd\xdf\x88\x53\xbb\x2d\x55\x1b";
+
+static const u8 __initconst ptext11[60] =
+	"\xd9\x31\x32\x25\xf8\x84\x06\xe5"
+	"\xa5\x59\x09\xc5\xaf\xf5\x26\x9a"
+	"\x86\xa7\xa9\x53\x15\x34\xf7\xda"
+	"\x2e\x4c\x30\x3d\x8a\x31\x8a\x72"
+	"\x1c\x3c\x0c\x95\x95\x68\x09\x53"
+	"\x2f\xcf\x0e\x24\x49\xa6\xb5\x25"
+	"\xb1\x6a\xed\xf5\xaa\x0d\xe6\x57"
+	"\xba\x63\x7b\x39";
+
+static const u8 __initconst ctext11[76] =
+	"\x39\x80\xca\x0b\x3c\x00\xe8\x41"
+	"\xeb\x06\xfa\xc4\x87\x2a\x27\x57"
+	"\x85\x9e\x1c\xea\xa6\xef\xd9\x84"
+	"\x62\x85\x93\xb4\x0c\xa1\xe1\x9c"
+	"\x7d\x77\x3d\x00\xc1\x44\xc5\x25"
+	"\xac\x61\x9d\x18\xc8\x4a\x3f\x47"
+	"\x18\xe2\x44\x8b\x2f\xe3\x24\xd9"
+	"\xcc\xda\x27\x10"
+	"\x25\x19\x49\x8e\x80\xf1\x47\x8f"
+	"\x37\xba\x55\xbd\x6d\x27\x61\x8c";
+
+static const u8 __initconst ptext12[719] =
+	"\x42\xc1\xcc\x08\x48\x6f\x41\x3f"
+	"\x2f\x11\x66\x8b\x2a\x16\xf0\xe0"
+	"\x58\x83\xf0\xc3\x70\x14\xc0\x5b"
+	"\x3f\xec\x1d\x25\x3c\x51\xd2\x03"
+	"\xcf\x59\x74\x1f\xb2\x85\xb4\x07"
+	"\xc6\x6a\x63\x39\x8a\x5b\xde\xcb"
+	"\xaf\x08\x44\xbd\x6f\x91\x15\xe1"
+	"\xf5\x7a\x6e\x18\xbd\xdd\x61\x50"
+	"\x59\xa9\x97\xab\xbb\x0e\x74\x5c"
+	"\x00\xa4\x43\x54\x04\x54\x9b\x3b"
+	"\x77\xec\xfd\x5c\xa6\xe8\x7b\x08"
+	"\xae\xe6\x10\x3f\x32\x65\xd1\xfc"
+	"\xa4\x1d\x2c\x31\xfb\x33\x7a\xb3"
+	"\x35\x23\xf4\x20\x41\xd4\xad\x82"
+	"\x8b\xa4\xad\x96\x1c\x20\x53\xbe"
+	"\x0e\xa6\xf4\xdc\x78\x49\x3e\x72"
+	"\xb1\xa9\xb5\x83\xcb\x08\x54\xb7"
+	"\xad\x49\x3a\xae\x98\xce\xa6\x66"
+	"\x10\x30\x90\x8c\x55\x83\xd7\x7c"
+	"\x8b\xe6\x53\xde\xd2\x6e\x18\x21"
+	"\x01\x52\xd1\x9f\x9d\xbb\x9c\x73"
+	"\x57\xcc\x89\x09\x75\x9b\x78\x70"
+	"\xed\x26\x97\x4d\xb4\xe4\x0c\xa5"
+	"\xfa\x70\x04\x70\xc6\x96\x1c\x7d"
+	"\x54\x41\x77\xa8\xe3\xb0\x7e\x96"
+	"\x82\xd9\xec\xa2\x87\x68\x55\xf9"
+	"\x8f\x9e\x73\x43\x47\x6a\x08\x36"
+	"\x93\x67\xa8\x2d\xde\xac\x41\xa9"
+	"\x5c\x4d\x73\x97\x0f\x70\x68\xfa"
+	"\x56\x4d\x00\xc2\x3b\x1f\xc8\xb9"
+	"\x78\x1f\x51\x07\xe3\x9a\x13\x4e"
+	"\xed\x2b\x2e\xa3\xf7\x44\xb2\xe7"
+	"\xab\x19\x37\xd9\xba\x76\x5e\xd2"
+	"\xf2\x53\x15\x17\x4c\x6b\x16\x9f"
+	"\x02\x66\x49\xca\x7c\x91\x05\xf2"
+	"\x45\x36\x1e\xf5\x77\xad\x1f\x46"
+	"\xa8\x13\xfb\x63\xb6\x08\x99\x63"
+	"\x82\xa2\xed\xb3\xac\xdf\x43\x19"
+	"\x45\xea\x78\x73\xd9\xb7\x39\x11"
+	"\xa3\x13\x7c\xf8\x3f\xf7\xad\x81"
+	"\x48\x2f\xa9\x5c\x5f\xa0\xf0\x79"
+	"\xa4\x47\x7d\x80\x20\x26\xfd\x63"
+	"\x0a\xc7\x7e\x6d\x75\x47\xff\x76"
+	"\x66\x2e\x8a\x6c\x81\x35\xaf\x0b"
+	"\x2e\x6a\x49\x60\xc1\x10\xe1\xe1"
+	"\x54\x03\xa4\x09\x0c\x37\x7a\x15"
+	"\x23\x27\x5b\x8b\x4b\xa5\x64\x97"
+	"\xae\x4a\x50\x73\x1f\x66\x1c\x5c"
+	"\x03\x25\x3c\x8d\x48\x58\x71\x34"
+	"\x0e\xec\x4e\x55\x1a\x03\x6a\xe5"
+	"\xb6\x19\x2b\x84\x2a\x20\xd1\xea"
+	"\x80\x6f\x96\x0e\x05\x62\xc7\x78"
+	"\x87\x79\x60\x38\x46\xb4\x25\x57"
+	"\x6e\x16\x63\xf8\xad\x6e\xd7\x42"
+	"\x69\xe1\x88\xef\x6e\xd5\xb4\x9a"
+	"\x3c\x78\x6c\x3b\xe5\xa0\x1d\x22"
+	"\x86\x5c\x74\x3a\xeb\x24\x26\xc7"
+	"\x09\xfc\x91\x96\x47\x87\x4f\x1a"
+	"\xd6\x6b\x2c\x18\x47\xc0\xb8\x24"
+	"\xa8\x5a\x4a\x9e\xcb\x03\xe7\x2a"
+	"\x09\xe6\x4d\x9c\x6d\x86\x60\xf5"
+	"\x2f\x48\x69\x37\x9f\xf2\xd2\xcb"
+	"\x0e\x5a\xdd\x6e\x8a\xfb\x6a\xfe"
+	"\x0b\x63\xde\x87\x42\x79\x8a\x68"
+	"\x51\x28\x9b\x7a\xeb\xaf\xb8\x2f"
+	"\x9d\xd1\xc7\x45\x90\x08\xc9\x83"
+	"\xe9\x83\x84\xcb\x28\x69\x09\x69"
+	"\xce\x99\x46\x00\x54\xcb\xd8\x38"
+	"\xf9\x53\x4a\xbf\x31\xce\x57\x15"
+	"\x33\xfa\x96\x04\x33\x42\xe3\xc0"
+	"\xb7\x54\x4a\x65\x7a\x7c\x02\xe6"
+	"\x19\x95\xd0\x0e\x82\x07\x63\xf9"
+	"\xe1\x2b\x2a\xfc\x55\x92\x52\xc9"
+	"\xb5\x9f\x23\x28\x60\xe7\x20\x51"
+	"\x10\xd3\xed\x6d\x9b\xab\xb8\xe2"
+	"\x5d\x9a\x34\xb3\xbe\x9c\x64\xcb"
+	"\x78\xc6\x91\x22\x40\x91\x80\xbe"
+	"\xd7\x78\x5c\x0e\x0a\xdc\x08\xe9"
+	"\x67\x10\xa4\x83\x98\x79\x23\xe7"
+	"\x92\xda\xa9\x22\x16\xb1\xe7\x78"
+	"\xa3\x1c\x6c\x8f\x35\x7c\x4d\x37"
+	"\x2f\x6e\x0b\x50\x5c\x34\xb9\xf9"
+	"\xe6\x3d\x91\x0d\x32\x95\xaa\x3d"
+	"\x48\x11\x06\xbb\x2d\xf2\x63\x88"
+	"\x3f\x73\x09\xe2\x45\x56\x31\x51"
+	"\xfa\x5e\x4e\x62\xf7\x90\xf9\xa9"
+	"\x7d\x7b\x1b\xb1\xc8\x26\x6e\x66"
+	"\xf6\x90\x9a\x7f\xf2\x57\xcc\x23"
+	"\x59\xfa\xfa\xaa\x44\x04\x01\xa7"
+	"\xa4\x78\xdb\x74\x3d\x8b\xb5";
+
+static const u8 __initconst ctext12[735] =
+	"\x84\x0b\xdb\xd5\xb7\xa8\xfe\x20"
+	"\xbb\xb1\x12\x7f\x41\xea\xb3\xc0"
+	"\xa2\xb4\x37\x19\x11\x58\xb6\x0b"
+	"\x4c\x1d\x38\x05\x54\xd1\x16\x73"
+	"\x8e\x1c\x20\x90\xa2\x9a\xb7\x74"
+	"\x47\xe6\xd8\xfc\x18\x3a\xb4\xea"
+	"\xd5\x16\x5a\x2c\x53\x01\x46\xb3"
+	"\x18\x33\x74\x6c\x50\xf2\xe8\xc0"
+	"\x73\xda\x60\x22\xeb\xe3\xe5\x9b"
+	"\x20\x93\x6c\x4b\x37\x99\xb8\x23"
+	"\x3b\x4e\xac\xe8\x5b\xe8\x0f\xb7"
+	"\xc3\x8f\xfb\x4a\x37\xd9\x39\x95"
+	"\x34\xf1\xdb\x8f\x71\xd9\xc7\x0b"
+	"\x02\xf1\x63\xfc\x9b\xfc\xc5\xab"
+	"\xb9\x14\x13\x21\xdf\xce\xaa\x88"
+	"\x44\x30\x1e\xce\x26\x01\x92\xf8"
+	"\x9f\x00\x4b\x0c\x4b\xf7\x5f\xe0"
+	"\x89\xca\x94\x66\x11\x21\x97\xca"
+	"\x3e\x83\x74\x2d\xdb\x4d\x11\xeb"
+	"\x97\xc2\x14\xff\x9e\x1e\xa0\x6b"
+	"\x08\xb4\x31\x2b\x85\xc6\x85\x6c"
+	"\x90\xec\x39\xc0\xec\xb3\xb5\x4e"
+	"\xf3\x9c\xe7\x83\x3a\x77\x0a\xf4"
+	"\x56\xfe\xce\x18\x33\x6d\x0b\x2d"
+	"\x33\xda\xc8\x05\x5c\xb4\x09\x2a"
+	"\xde\x6b\x52\x98\x01\xef\x36\x3d"
+	"\xbd\xf9\x8f\xa8\x3e\xaa\xcd\xd1"
+	"\x01\x2d\x42\x49\xc3\xb6\x84\xbb"
+	"\x48\x96\xe0\x90\x93\x6c\x48\x64"
+	"\xd4\xfa\x7f\x93\x2c\xa6\x21\xc8"
+	"\x7a\x23\x7b\xaa\x20\x56\x12\xae"
+	"\x16\x9d\x94\x0f\x54\xa1\xec\xca"
+	"\x51\x4e\xf2\x39\xf4\xf8\x5f\x04"
+	"\x5a\x0d\xbf\xf5\x83\xa1\x15\xe1"
+	"\xf5\x3c\xd8\x62\xa3\xed\x47\x89"
+	"\x85\x4c\xe5\xdb\xac\x9e\x17\x1d"
+	"\x0c\x09\xe3\x3e\x39\x5b\x4d\x74"
+	"\x0e\xf5\x34\xee\x70\x11\x4c\xfd"
+	"\xdb\x34\xb1\xb5\x10\x3f\x73\xb7"
+	"\xf5\xfa\xed\xb0\x1f\xa5\xcd\x3c"
+	"\x8d\x35\x83\xd4\x11\x44\x6e\x6c"
+	"\x5b\xe0\x0e\x69\xa5\x39\xe5\xbb"
+	"\xa9\x57\x24\x37\xe6\x1f\xdd\xcf"
+	"\x16\x2a\x13\xf9\x6a\x2d\x90\xa0"
+	"\x03\x60\x7a\xed\x69\xd5\x00\x8b"
+	"\x7e\x4f\xcb\xb9\xfa\x91\xb9\x37"
+	"\xc1\x26\xce\x90\x97\x22\x64\x64"
+	"\xc1\x72\x43\x1b\xf6\xac\xc1\x54"
+	"\x8a\x10\x9c\xdd\x8d\xd5\x8e\xb2"
+	"\xe4\x85\xda\xe0\x20\x5f\xf4\xb4"
+	"\x15\xb5\xa0\x8d\x12\x74\x49\x23"
+	"\x3a\xdf\x4a\xd3\xf0\x3b\x89\xeb"
+	"\xf8\xcc\x62\x7b\xfb\x93\x07\x41"
+	"\x61\x26\x94\x58\x70\xa6\x3c\xe4"
+	"\xff\x58\xc4\x13\x3d\xcb\x36\x6b"
+	"\x32\xe5\xb2\x6d\x03\x74\x6f\x76"
+	"\x93\x77\xde\x48\xc4\xfa\x30\x4a"
+	"\xda\x49\x80\x77\x0f\x1c\xbe\x11"
+	"\xc8\x48\xb1\xe5\xbb\xf2\x8a\xe1"
+	"\x96\x2f\x9f\xd1\x8e\x8a\x5c\xe2"
+	"\xf7\xd7\xd8\x54\xf3\x3f\xc4\x91"
+	"\xb8\xfb\x86\xdc\x46\x24\x91\x60"
+	"\x6c\x2f\xc9\x41\x37\x51\x49\x54"
+	"\x09\x81\x21\xf3\x03\x9f\x2b\xe3"
+	"\x1f\x39\x63\xaf\xf4\xd7\x53\x60"
+	"\xa7\xc7\x54\xf9\xee\xb1\xb1\x7d"
+	"\x75\x54\x65\x93\xfe\xb1\x68\x6b"
+	"\x57\x02\xf9\xbb\x0e\xf9\xf8\xbf"
+	"\x01\x12\x27\xb4\xfe\xe4\x79\x7a"
+	"\x40\x5b\x51\x4b\xdf\x38\xec\xb1"
+	"\x6a\x56\xff\x35\x4d\x42\x33\xaa"
+	"\x6f\x1b\xe4\xdc\xe0\xdb\x85\x35"
+	"\x62\x10\xd4\xec\xeb\xc5\x7e\x45"
+	"\x1c\x6f\x17\xca\x3b\x8e\x2d\x66"
+	"\x4f\x4b\x36\x56\xcd\x1b\x59\xaa"
+	"\xd2\x9b\x17\xb9\x58\xdf\x7b\x64"
+	"\x8a\xff\x3b\x9c\xa6\xb5\x48\x9e"
+	"\xaa\xe2\x5d\x09\x71\x32\x5f\xb6"
+	"\x29\xbe\xe7\xc7\x52\x7e\x91\x82"
+	"\x6b\x6d\x33\xe1\x34\x06\x36\x21"
+	"\x5e\xbe\x1e\x2f\x3e\xc1\xfb\xea"
+	"\x49\x2c\xb5\xca\xf7\xb0\x37\xea"
+	"\x1f\xed\x10\x04\xd9\x48\x0d\x1a"
+	"\x1c\xfb\xe7\x84\x0e\x83\x53\x74"
+	"\xc7\x65\xe2\x5c\xe5\xba\x73\x4c"
+	"\x0e\xe1\xb5\x11\x45\x61\x43\x46"
+	"\xaa\x25\x8f\xbd\x85\x08\xfa\x4c"
+	"\x15\xc1\xc0\xd8\xf5\xdc\x16\xbb"
+	"\x7b\x1d\xe3\x87\x57\xa7\x2a\x1d"
+	"\x38\x58\x9e\x8a\x43\xdc\x57"
+	"\xd1\x81\x7d\x2b\xe9\xff\x99\x3a"
+	"\x4b\x24\x52\x58\x55\xe1\x49\x14";
+
+static struct {
+	const u8	*ptext;
+	const u8	*ctext;
+
+	u8		key[AES_MAX_KEY_SIZE];
+	u8		iv[GCM_AES_IV_SIZE];
+	u8		assoc[20];
+
+	int		klen;
+	int		clen;
+	int		plen;
+	int		alen;
+} const aesgcm_tv[] __initconst = {
+	{ /* From McGrew & Viega - http://citeseer.ist.psu.edu/656989.html */
+		.klen	= 16,
+		.ctext	= ctext0,
+		.clen	= sizeof(ctext0),
+	}, {
+		.klen	= 16,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext1,
+		.clen	= sizeof(ctext1),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext2,
+		.plen	= sizeof(ptext2),
+		.ctext	= ctext2,
+		.clen	= sizeof(ctext2),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 16,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext3,
+		.plen	= sizeof(ptext3),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext3,
+		.clen	= sizeof(ctext3),
+	}, {
+		.klen	= 24,
+		.ctext	= ctext4,
+		.clen	= sizeof(ctext4),
+	}, {
+		.klen	= 24,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext5,
+		.clen	= sizeof(ctext5),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+		.klen	= 24,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext6,
+		.plen	= sizeof(ptext6),
+		.ctext	= ctext6,
+		.clen	= sizeof(ctext6),
+	}, {
+		.klen	= 32,
+		.ctext	= ctext7,
+		.clen	= sizeof(ctext7),
+	}, {
+		.klen	= 32,
+		.ptext	= ptext1,
+		.plen	= sizeof(ptext1),
+		.ctext	= ctext8,
+		.clen	= sizeof(ctext8),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 32,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext9,
+		.plen	= sizeof(ptext9),
+		.ctext	= ctext9,
+		.clen	= sizeof(ctext9),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08",
+		.klen	= 32,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext10,
+		.plen	= sizeof(ptext10),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext10,
+		.clen	= sizeof(ctext10),
+	}, {
+		.key	= "\xfe\xff\xe9\x92\x86\x65\x73\x1c"
+			  "\x6d\x6a\x8f\x94\x67\x30\x83\x08"
+			  "\xfe\xff\xe9\x92\x86\x65\x73\x1c",
+		.klen	= 24,
+		.iv	= "\xca\xfe\xba\xbe\xfa\xce\xdb\xad"
+			  "\xde\xca\xf8\x88",
+		.ptext	= ptext11,
+		.plen	= sizeof(ptext11),
+		.assoc	= "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xfe\xed\xfa\xce\xde\xad\xbe\xef"
+			  "\xab\xad\xda\xd2",
+		.alen	= 20,
+		.ctext	= ctext11,
+		.clen	= sizeof(ctext11),
+	}, {
+		.key	= "\x62\x35\xf8\x95\xfc\xa5\xeb\xf6"
+			  "\x0e\x92\x12\x04\xd3\xa1\x3f\x2e"
+			  "\x8b\x32\xcf\xe7\x44\xed\x13\x59"
+			  "\x04\x38\x77\xb0\xb9\xad\xb4\x38",
+		.klen	= 32,
+		.iv	= "\x00\xff\xff\xff\xff\x00\x00\xff"
+			  "\xff\xff\x00\xff",
+		.ptext	= ptext12,
+		.plen	= sizeof(ptext12),
+		.ctext	= ctext12,
+		.clen	= sizeof(ctext12),
+	}
+};
+
+static int __init libaesgcm_init(void)
+{
+	for (int i = 0; i < ARRAY_SIZE(aesgcm_tv); i++) {
+		u8 tagbuf[AES_BLOCK_SIZE];
+		int plen = aesgcm_tv[i].plen;
+		struct aesgcm_ctx ctx;
+		u8 buf[sizeof(ptext12)];
+
+		if (aesgcm_expandkey(&ctx, aesgcm_tv[i].key, aesgcm_tv[i].klen,
+				     aesgcm_tv[i].clen - plen)) {
+			pr_err("aesgcm_expandkey() failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		if (!aesgcm_decrypt(&ctx, buf, aesgcm_tv[i].ctext, plen,
+				    aesgcm_tv[i].assoc, aesgcm_tv[i].alen,
+				    aesgcm_tv[i].iv, aesgcm_tv[i].ctext + plen)
+		    || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+			pr_err("aesgcm_decrypt() #1 failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		/* encrypt in place */
+		aesgcm_encrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+			       aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf);
+		if (memcmp(buf, aesgcm_tv[i].ctext, plen)) {
+			pr_err("aesgcm_encrypt() failed on vector %d\n", i);
+			return -ENODEV;
+		}
+
+		/* decrypt in place */
+		if (!aesgcm_decrypt(&ctx, buf, buf, plen, aesgcm_tv[i].assoc,
+				    aesgcm_tv[i].alen, aesgcm_tv[i].iv, tagbuf)
+		    || memcmp(buf, aesgcm_tv[i].ptext, plen)) {
+			pr_err("aesgcm_decrypt() #2 failed on vector %d\n", i);
+			return -ENODEV;
+		}
+	}
+	return 0;
+}
+module_init(libaesgcm_init);
+
+static void __exit libaesgcm_exit(void)
+{
+}
+module_exit(libaesgcm_exit);
+#endif
-- 
GitLab


From fb11cddfe24caad33667b5c9dd859562b2be6c75 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 4 Nov 2022 15:45:27 +0800
Subject: [PATCH 1399/4122] crypto: rockchip - Remove surplus dev_err() when
 using platform_get_irq()

There is no need to call the dev_err() function directly to print a
custom message when handling an error from either the platform_get_irq()
or platform_get_irq_byname() functions as both are going to display an
appropriate error message in case of a failure.

./drivers/crypto/rockchip/rk3288_crypto.c:351:2-9: line 351 is
redundant because platform_get_irq() already prints an error

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=2677
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Acked-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/rockchip/rk3288_crypto.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 6217e73ba4c41..9f6ba770a90a1 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -348,7 +348,6 @@ static int rk_crypto_probe(struct platform_device *pdev)
 
 	crypto_info->irq = platform_get_irq(pdev, 0);
 	if (crypto_info->irq < 0) {
-		dev_err(&pdev->dev, "control Interrupt is not available.\n");
 		err = crypto_info->irq;
 		goto err_crypto;
 	}
-- 
GitLab


From 557ffd5a4726f8b6f0dd1d4b632ae02c1c063233 Mon Sep 17 00:00:00 2001
From: Shashank Gupta <shashank.gupta@intel.com>
Date: Fri, 4 Nov 2022 13:21:07 -0400
Subject: [PATCH 1400/4122] crypto: qat - remove ADF_STATUS_PF_RUNNING flag
 from probe

The ADF_STATUS_PF_RUNNING bit is set after the successful initialization
of the communication between VF to PF in adf_vf2pf_notify_init().
So, it is not required to be set after the execution of the function
adf_dev_init().

Signed-off-by: Shashank Gupta <shashank.gupta@intel.com>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_c3xxxvf/adf_drv.c    | 2 --
 drivers/crypto/qat/qat_c62xvf/adf_drv.c     | 2 --
 drivers/crypto/qat/qat_dh895xccvf/adf_drv.c | 2 --
 3 files changed, 6 deletions(-)

diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
index fa18d8009f533..cf4ef83e186f8 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_err_dev_shutdown;
 
-	set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
 	ret = adf_dev_start(accel_dev);
 	if (ret)
 		goto out_err_dev_stop;
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_drv.c b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
index 686ec752d0e9e..0e642c94b9293 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_err_dev_shutdown;
 
-	set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
 	ret = adf_dev_start(accel_dev);
 	if (ret)
 		goto out_err_dev_stop;
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
index 18756b2e1c912..c1485e702b3eb 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_drv.c
@@ -177,8 +177,6 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_err_dev_shutdown;
 
-	set_bit(ADF_STATUS_PF_RUNNING, &accel_dev->status);
-
 	ret = adf_dev_start(accel_dev);
 	if (ret)
 		goto out_err_dev_stop;
-- 
GitLab


From 198acab1772f22f2e91f68a2fc1331e91dad780a Mon Sep 17 00:00:00 2001
From: Jim Quinlan <jim2101024@gmail.com>
Date: Tue, 11 Oct 2022 14:42:06 -0400
Subject: [PATCH 1401/4122] PCI: brcmstb: Enable Multi-MSI

We always wanted to enable Multi-MSI but didn't have a test device until
recently.  In addition, there are some devices out there that will ask for
multiple MSI but refuse to work if they are only granted one.

Link: https://lore.kernel.org/r/20221011184211.18128-2-jim2101024@gmail.com
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index 521acd632f1ac..a45ce7d61847a 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -445,7 +445,8 @@ static struct irq_chip brcm_msi_irq_chip = {
 
 static struct msi_domain_info brcm_msi_domain_info = {
 	/* Multi MSI is supported by the controller, but not by this driver */
-	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI),
 	.chip	= &brcm_msi_irq_chip,
 };
 
@@ -505,21 +506,23 @@ static struct irq_chip brcm_msi_bottom_irq_chip = {
 	.irq_ack                = brcm_msi_ack_irq,
 };
 
-static int brcm_msi_alloc(struct brcm_msi *msi)
+static int brcm_msi_alloc(struct brcm_msi *msi, unsigned int nr_irqs)
 {
 	int hwirq;
 
 	mutex_lock(&msi->lock);
-	hwirq = bitmap_find_free_region(msi->used, msi->nr, 0);
+	hwirq = bitmap_find_free_region(msi->used, msi->nr,
+					order_base_2(nr_irqs));
 	mutex_unlock(&msi->lock);
 
 	return hwirq;
 }
 
-static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq)
+static void brcm_msi_free(struct brcm_msi *msi, unsigned long hwirq,
+			  unsigned int nr_irqs)
 {
 	mutex_lock(&msi->lock);
-	bitmap_release_region(msi->used, hwirq, 0);
+	bitmap_release_region(msi->used, hwirq, order_base_2(nr_irqs));
 	mutex_unlock(&msi->lock);
 }
 
@@ -527,16 +530,17 @@ static int brcm_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 				 unsigned int nr_irqs, void *args)
 {
 	struct brcm_msi *msi = domain->host_data;
-	int hwirq;
+	int hwirq, i;
 
-	hwirq = brcm_msi_alloc(msi);
+	hwirq = brcm_msi_alloc(msi, nr_irqs);
 
 	if (hwirq < 0)
 		return hwirq;
 
-	irq_domain_set_info(domain, virq, (irq_hw_number_t)hwirq,
-			    &brcm_msi_bottom_irq_chip, domain->host_data,
-			    handle_edge_irq, NULL, NULL);
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_set_info(domain, virq + i, hwirq + i,
+				    &brcm_msi_bottom_irq_chip, domain->host_data,
+				    handle_edge_irq, NULL, NULL);
 	return 0;
 }
 
@@ -546,7 +550,7 @@ static void brcm_irq_domain_free(struct irq_domain *domain,
 	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
 	struct brcm_msi *msi = irq_data_get_irq_chip_data(d);
 
-	brcm_msi_free(msi, d->hwirq);
+	brcm_msi_free(msi, d->hwirq, nr_irqs);
 }
 
 static const struct irq_domain_ops msi_domain_ops = {
-- 
GitLab


From 3ae140ad827b359bc4fa7c7985691c4c1e3ca8f4 Mon Sep 17 00:00:00 2001
From: Jim Quinlan <jim2101024@gmail.com>
Date: Tue, 11 Oct 2022 14:42:07 -0400
Subject: [PATCH 1402/4122] PCI: brcmstb: Wait for 100ms following PERST#
 deassert

Be prudent and give some time for power and clocks to become stable.  As
described in the PCIe CEM specification sections 2.2 and 2.2.1; as well as
PCIe r5.0, 6.6.1.

Link: https://lore.kernel.org/r/20221011184211.18128-3-jim2101024@gmail.com
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index a45ce7d61847a..39b545713ba02 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -1037,8 +1037,15 @@ static int brcm_pcie_start_link(struct brcm_pcie *pcie)
 	pcie->perst_set(pcie, 0);
 
 	/*
-	 * Give the RC/EP time to wake up, before trying to configure RC.
-	 * Intermittently check status for link-up, up to a total of 100ms.
+	 * Wait for 100ms after PERST# deassertion; see PCIe CEM specification
+	 * sections 2.2, PCIe r5.0, 6.6.1.
+	 */
+	msleep(100);
+
+	/*
+	 * Give the RC/EP even more time to wake up, before trying to
+	 * configure RC.  Intermittently check status for link-up, up to a
+	 * total of 100ms.
 	 */
 	for (i = 0; i < 100 && !brcm_pcie_link_up(pcie); i += 5)
 		msleep(5);
-- 
GitLab


From ca5dcc76314d1fa6d7307fd3b95039b08d2f2b97 Mon Sep 17 00:00:00 2001
From: Jim Quinlan <jim2101024@gmail.com>
Date: Tue, 11 Oct 2022 14:42:08 -0400
Subject: [PATCH 1403/4122] PCI: brcmstb: Replace status loops with
 read_poll_timeout_atomic()

It would be nice to replace the PCIe link-up loop as well but
there are too many uses of this that do not poll (and the
read_poll_timeout uses "timeout==0" to loop forever).

Link: https://lore.kernel.org/r/20221011184211.18128-4-jim2101024@gmail.com
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index 39b545713ba02..c7210cec1f582 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -9,6 +9,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/ioport.h>
 #include <linux/irqchip/chained_irq.h>
 #include <linux/irqdomain.h>
@@ -302,42 +303,34 @@ static u32 brcm_pcie_mdio_form_pkt(int port, int regad, int cmd)
 /* negative return value indicates error */
 static int brcm_pcie_mdio_read(void __iomem *base, u8 port, u8 regad, u32 *val)
 {
-	int tries;
 	u32 data;
+	int err;
 
 	writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_READ),
 		   base + PCIE_RC_DL_MDIO_ADDR);
 	readl(base + PCIE_RC_DL_MDIO_ADDR);
-
-	data = readl(base + PCIE_RC_DL_MDIO_RD_DATA);
-	for (tries = 0; !MDIO_RD_DONE(data) && tries < 10; tries++) {
-		udelay(10);
-		data = readl(base + PCIE_RC_DL_MDIO_RD_DATA);
-	}
-
+	err = readl_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_RD_DATA, data,
+					MDIO_RD_DONE(data), 10, 100);
 	*val = FIELD_GET(MDIO_DATA_MASK, data);
-	return MDIO_RD_DONE(data) ? 0 : -EIO;
+
+	return err;
 }
 
 /* negative return value indicates error */
 static int brcm_pcie_mdio_write(void __iomem *base, u8 port,
 				u8 regad, u16 wrdata)
 {
-	int tries;
 	u32 data;
+	int err;
 
 	writel(brcm_pcie_mdio_form_pkt(port, regad, MDIO_CMD_WRITE),
 		   base + PCIE_RC_DL_MDIO_ADDR);
 	readl(base + PCIE_RC_DL_MDIO_ADDR);
 	writel(MDIO_DATA_DONE_MASK | wrdata, base + PCIE_RC_DL_MDIO_WR_DATA);
 
-	data = readl(base + PCIE_RC_DL_MDIO_WR_DATA);
-	for (tries = 0; !MDIO_WT_DONE(data) && tries < 10; tries++) {
-		udelay(10);
-		data = readl(base + PCIE_RC_DL_MDIO_WR_DATA);
-	}
-
-	return MDIO_WT_DONE(data) ? 0 : -EIO;
+	err = readw_poll_timeout_atomic(base + PCIE_RC_DL_MDIO_WR_DATA, data,
+					MDIO_WT_DONE(data), 10, 100);
+	return err;
 }
 
 /*
-- 
GitLab


From 137b57413f569d558c1054e2a181313574eb9a87 Mon Sep 17 00:00:00 2001
From: Jim Quinlan <jim2101024@gmail.com>
Date: Tue, 11 Oct 2022 14:42:09 -0400
Subject: [PATCH 1404/4122] PCI: brcmstb: Drop needless 'inline' annotations

A number of inline functions are called rarely and/or are not
time-critical.  Take out the "inline" and let the compiler do its work.

Link: https://lore.kernel.org/r/20221011184211.18128-5-jim2101024@gmail.com
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index c7210cec1f582..e3045f1eadbc5 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -723,7 +723,7 @@ static void __iomem *brcm7425_pcie_map_bus(struct pci_bus *bus,
 	return base + DATA_ADDR(pcie);
 }
 
-static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val)
+static void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val)
 {
 	u32 tmp, mask =  RGR1_SW_INIT_1_INIT_GENERIC_MASK;
 	u32 shift = RGR1_SW_INIT_1_INIT_GENERIC_SHIFT;
@@ -733,7 +733,7 @@ static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie,
 	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
 }
 
-static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val)
+static void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val)
 {
 	u32 tmp, mask =  RGR1_SW_INIT_1_INIT_7278_MASK;
 	u32 shift = RGR1_SW_INIT_1_INIT_7278_SHIFT;
@@ -743,7 +743,7 @@ static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32
 	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
 }
 
-static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val)
+static void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val)
 {
 	if (WARN_ONCE(!pcie->perst_reset, "missing PERST# reset controller\n"))
 		return;
@@ -754,7 +754,7 @@ static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val)
 		reset_control_deassert(pcie->perst_reset);
 }
 
-static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val)
+static void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val)
 {
 	u32 tmp;
 
@@ -764,7 +764,7 @@ static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val)
 	writel(tmp, pcie->base +  PCIE_MISC_PCIE_CTRL);
 }
 
-static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val)
+static void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val)
 {
 	u32 tmp;
 
@@ -773,7 +773,7 @@ static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val)
 	writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
 }
 
-static inline int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
+static int brcm_pcie_get_rc_bar2_size_and_offset(struct brcm_pcie *pcie,
 							u64 *rc_bar2_size,
 							u64 *rc_bar2_offset)
 {
-- 
GitLab


From 602fb860945fd6dce7989fcd3727d5fe4282f785 Mon Sep 17 00:00:00 2001
From: Jim Quinlan <jim2101024@gmail.com>
Date: Tue, 11 Oct 2022 14:42:10 -0400
Subject: [PATCH 1405/4122] PCI: brcmstb: Set RCB_{MPS,64B}_MODE bits

Set RCB_MPS mode bit so that data for PCIe read requests up to the size of
the Maximum Payload Size (MPS) are returned in one completion, and data for
PCIe read requests greater than the MPS are split at the specified Read
Completion Boundary setting.

Set RCB_64B so that the Read Compeletion Boundary is 64B.

Link: https://lore.kernel.org/r/20221011184211.18128-6-jim2101024@gmail.com
Signed-off-by: Jim Quinlan <jim2101024@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
---
 drivers/pci/controller/pcie-brcmstb.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pcie-brcmstb.c b/drivers/pci/controller/pcie-brcmstb.c
index e3045f1eadbc5..edf283e2b5dd0 100644
--- a/drivers/pci/controller/pcie-brcmstb.c
+++ b/drivers/pci/controller/pcie-brcmstb.c
@@ -53,6 +53,8 @@
 #define PCIE_RC_DL_MDIO_RD_DATA				0x1108
 
 #define PCIE_MISC_MISC_CTRL				0x4008
+#define  PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK	0x80
+#define  PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK	0x400
 #define  PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK		0x1000
 #define  PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK	0x2000
 #define  PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK	0x300000
@@ -900,11 +902,16 @@ static int brcm_pcie_setup(struct brcm_pcie *pcie)
 	else
 		burst = 0x2; /* 512 bytes */
 
-	/* Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN */
+	/*
+	 * Set SCB_MAX_BURST_SIZE, CFG_READ_UR_MODE, SCB_ACCESS_EN,
+	 * RCB_MPS_MODE, RCB_64B_MODE
+	 */
 	tmp = readl(base + PCIE_MISC_MISC_CTRL);
 	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_SCB_ACCESS_EN_MASK);
 	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_CFG_READ_UR_MODE_MASK);
 	u32p_replace_bits(&tmp, burst, PCIE_MISC_MISC_CTRL_MAX_BURST_SIZE_MASK);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_MPS_MODE_MASK);
+	u32p_replace_bits(&tmp, 1, PCIE_MISC_MISC_CTRL_PCIE_RCB_64B_MODE_MASK);
 	writel(tmp, base + PCIE_MISC_MISC_CTRL);
 
 	ret = brcm_pcie_get_rc_bar2_size_and_offset(pcie, &rc_bar2_size,
-- 
GitLab


From d899aa668498c07ff217b666ae9712990306e682 Mon Sep 17 00:00:00 2001
From: Nirmal Patel <nirmal.patel@linux.intel.com>
Date: Wed, 9 Nov 2022 07:26:52 -0700
Subject: [PATCH 1406/4122] PCI: vmd: Disable MSI remapping after suspend

MSI remapping is disabled by VMD driver for Intel's Icelake and
newer systems in order to improve performance by setting
VMCONFIG_MSI_REMAP. By design VMCONFIG_MSI_REMAP register is cleared
by firmware during boot. The same register gets cleared when system
is put in S3 power state. VMD driver needs to set this register again
in order to avoid interrupt issues with devices behind VMD if MSI
remapping was disabled before.

Link: https://lore.kernel.org/r/20221109142652.450998-1-nirmal.patel@linux.intel.com
Fixes: ee81ee84f873 ("PCI: vmd: Disable MSI-X remapping when possible")
Signed-off-by: Nirmal Patel <nirmal.patel@linux.intel.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Francisco Munoz <francisco.munoz.ruiz@linux.intel.com>
---
 drivers/pci/controller/vmd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index e06e9f4fc50f7..98e0746e681c9 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -980,6 +980,11 @@ static int vmd_resume(struct device *dev)
 	struct vmd_dev *vmd = pci_get_drvdata(pdev);
 	int err, i;
 
+       if (vmd->irq_domain)
+               vmd_set_msi_remapping(vmd, true);
+       else
+               vmd_set_msi_remapping(vmd, false);
+
 	for (i = 0; i < vmd->msix_count; i++) {
 		err = devm_request_irq(dev, vmd->irqs[i].virq,
 				       vmd_irq, IRQF_NO_THREAD,
-- 
GitLab


From 3a936b2a5a581e50dd5cab20a48eb0055a527f02 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 2 Nov 2022 10:07:04 +0100
Subject: [PATCH 1407/4122] dt-bindings: PCI: qcom: Add SC8280XP/SA8540P
 interconnects

Add the missing SC8280XP/SA8540P "pcie-mem" and "cpu-pcie" interconnect
paths to the bindings.

Link: https://lore.kernel.org/r/20221102090705.23634-2-johan+linaro@kernel.org
Fixes: 76d777ae045e ("dt-bindings: PCI: qcom: Add SC8280XP to binding")
Fixes: 76c4207f4085 ("dt-bindings: PCI: qcom: Add SA8540P to binding")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
---
 .../devicetree/bindings/pci/qcom,pcie.yaml    | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
index 54f07852d279e..2f851c804bb07 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
@@ -62,6 +62,14 @@ properties:
     minItems: 3
     maxItems: 13
 
+  interconnects:
+    maxItems: 2
+
+  interconnect-names:
+    items:
+      - const: pcie-mem
+      - const: cpu-pcie
+
   resets:
     minItems: 1
     maxItems: 12
@@ -631,6 +639,18 @@ allOf:
           items:
             - const: pci # PCIe core reset
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,pcie-sa8540p
+              - qcom,pcie-sc8280xp
+    then:
+      required:
+        - interconnects
+        - interconnect-names
+
   - if:
       not:
         properties:
-- 
GitLab


From c4860af88d0cb1bb006df12615c5515ae509f73b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 2 Nov 2022 10:07:05 +0100
Subject: [PATCH 1408/4122] PCI: qcom: Add basic interconnect support

On Qualcomm platforms like SC8280XP and SA8540P, interconnect bandwidth
must be requested before enabling interconnect clocks.

Add basic support for managing an optional "pcie-mem" interconnect path
by setting a low constraint before enabling clocks and updating it after
the link is up.

Note that it is not possible for a controller driver to set anything but
a maximum peak bandwidth as expected average bandwidth will vary with
use case and actual use (and power policy?). This very much remains an
unresolved problem with the interconnect framework.

Also note that no constraint is set for the SC8280XP/SA8540P "cpu-pcie"
path for now as it is not clear what an appropriate constraint would be
(and the system does not crash when left unspecified).

Link: https://lore.kernel.org/r/20221102090705.23634-3-johan+linaro@kernel.org
Fixes: 70574511f3fc ("PCI: qcom: Add support for SC8280XP")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Brian Masney <bmasney@redhat.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Acked-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/pci/controller/dwc/pcie-qcom.c | 76 ++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)

diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
index f711acacaeaf8..ec230b988ba25 100644
--- a/drivers/pci/controller/dwc/pcie-qcom.c
+++ b/drivers/pci/controller/dwc/pcie-qcom.c
@@ -12,6 +12,7 @@
 #include <linux/crc8.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
+#include <linux/interconnect.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -223,6 +224,7 @@ struct qcom_pcie {
 	union qcom_pcie_resources res;
 	struct phy *phy;
 	struct gpio_desc *reset;
+	struct icc_path *icc_mem;
 	const struct qcom_pcie_cfg *cfg;
 };
 
@@ -1639,6 +1641,74 @@ static const struct dw_pcie_ops dw_pcie_ops = {
 	.start_link = qcom_pcie_start_link,
 };
 
+static int qcom_pcie_icc_init(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	int ret;
+
+	pcie->icc_mem = devm_of_icc_get(pci->dev, "pcie-mem");
+	if (IS_ERR(pcie->icc_mem))
+		return PTR_ERR(pcie->icc_mem);
+
+	/*
+	 * Some Qualcomm platforms require interconnect bandwidth constraints
+	 * to be set before enabling interconnect clocks.
+	 *
+	 * Set an initial peak bandwidth corresponding to single-lane Gen 1
+	 * for the pcie-mem path.
+	 */
+	ret = icc_set_bw(pcie->icc_mem, 0, MBps_to_icc(250));
+	if (ret) {
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void qcom_pcie_icc_update(struct qcom_pcie *pcie)
+{
+	struct dw_pcie *pci = pcie->pci;
+	u32 offset, status, bw;
+	int speed, width;
+	int ret;
+
+	if (!pcie->icc_mem)
+		return;
+
+	offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+	status = readw(pci->dbi_base + offset + PCI_EXP_LNKSTA);
+
+	/* Only update constraints if link is up. */
+	if (!(status & PCI_EXP_LNKSTA_DLLLA))
+		return;
+
+	speed = FIELD_GET(PCI_EXP_LNKSTA_CLS, status);
+	width = FIELD_GET(PCI_EXP_LNKSTA_NLW, status);
+
+	switch (speed) {
+	case 1:
+		bw = MBps_to_icc(250);
+		break;
+	case 2:
+		bw = MBps_to_icc(500);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		fallthrough;
+	case 3:
+		bw = MBps_to_icc(985);
+		break;
+	}
+
+	ret = icc_set_bw(pcie->icc_mem, 0, width * bw);
+	if (ret) {
+		dev_err(pci->dev, "failed to set interconnect bandwidth: %d\n",
+			ret);
+	}
+}
+
 static int qcom_pcie_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -1699,6 +1769,10 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_pm_runtime_put;
 	}
 
+	ret = qcom_pcie_icc_init(pcie);
+	if (ret)
+		goto err_pm_runtime_put;
+
 	ret = pcie->cfg->ops->get_resources(pcie);
 	if (ret)
 		goto err_pm_runtime_put;
@@ -1717,6 +1791,8 @@ static int qcom_pcie_probe(struct platform_device *pdev)
 		goto err_phy_exit;
 	}
 
+	qcom_pcie_icc_update(pcie);
+
 	return 0;
 
 err_phy_exit:
-- 
GitLab


From 2759ddf7535d63381f9b9b1412e4c46e13ed773a Mon Sep 17 00:00:00 2001
From: Shunsuke Mie <mie@igel.co.jp>
Date: Mon, 15 Aug 2022 11:50:06 +0900
Subject: [PATCH 1409/4122] PCI: endpoint: Fix Kconfig indent style

Change to follow the Kconfig style guide. This patch fixes to use tab
rather than space to indent, while help text is indented an additional
two spaces.

Link: https://lore.kernel.org/r/20220815025006.48167-1-mie@igel.co.jp
Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
Signed-off-by: Shunsuke Mie <mie@igel.co.jp>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/endpoint/functions/Kconfig | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/endpoint/functions/Kconfig b/drivers/pci/endpoint/functions/Kconfig
index 295a033ee9a27..9fd5608868718 100644
--- a/drivers/pci/endpoint/functions/Kconfig
+++ b/drivers/pci/endpoint/functions/Kconfig
@@ -27,13 +27,13 @@ config PCI_EPF_NTB
 	  If in doubt, say "N" to disable Endpoint NTB driver.
 
 config PCI_EPF_VNTB
-        tristate "PCI Endpoint NTB driver"
-        depends on PCI_ENDPOINT
-        depends on NTB
-        select CONFIGFS_FS
-        help
-          Select this configuration option to enable the Non-Transparent
-          Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB
-          between PCI Root Port and PCIe Endpoint.
+	tristate "PCI Endpoint NTB driver"
+	depends on PCI_ENDPOINT
+	depends on NTB
+	select CONFIGFS_FS
+	help
+	  Select this configuration option to enable the Non-Transparent
+	  Bridge (NTB) driver for PCIe Endpoint. NTB driver implements NTB
+	  between PCI Root Port and PCIe Endpoint.
 
-          If in doubt, say "N" to disable Endpoint NTB driver.
+	  If in doubt, say "N" to disable Endpoint NTB driver.
-- 
GitLab


From ae6b9a65af480144da323436d90e149501ea8937 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 1 Nov 2022 10:57:14 +0100
Subject: [PATCH 1410/4122] PCI: imx6: Initialize PHY before deasserting core
 reset

When the PHY is the reference clock provider then it must be initialized
and powered on before the reset on the client is deasserted, otherwise
the link will never come up. The order was changed in cf236e0c0d59.
Restore the correct order to make the driver work again on boards where
the PHY provides the reference clock. This also changes the order for
boards where the Soc is the PHY reference clock divider, but this
shouldn't do any harm.

Link: https://lore.kernel.org/r/20221101095714.440001-1-s.hauer@pengutronix.de
Fixes: cf236e0c0d59 ("PCI: imx6: Do not hide PHY driver callbacks and refine the error handling")
Tested-by: Richard Zhu <hongxing.zhu@nxp.com>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/controller/dwc/pci-imx6.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/controller/dwc/pci-imx6.c b/drivers/pci/controller/dwc/pci-imx6.c
index 2616585ca5f8a..1dde5c579edc8 100644
--- a/drivers/pci/controller/dwc/pci-imx6.c
+++ b/drivers/pci/controller/dwc/pci-imx6.c
@@ -952,12 +952,6 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp)
 		}
 	}
 
-	ret = imx6_pcie_deassert_core_reset(imx6_pcie);
-	if (ret < 0) {
-		dev_err(dev, "pcie deassert core reset failed: %d\n", ret);
-		goto err_phy_off;
-	}
-
 	if (imx6_pcie->phy) {
 		ret = phy_power_on(imx6_pcie->phy);
 		if (ret) {
@@ -965,6 +959,13 @@ static int imx6_pcie_host_init(struct dw_pcie_rp *pp)
 			goto err_phy_off;
 		}
 	}
+
+	ret = imx6_pcie_deassert_core_reset(imx6_pcie);
+	if (ret < 0) {
+		dev_err(dev, "pcie deassert core reset failed: %d\n", ret);
+		goto err_phy_off;
+	}
+
 	imx6_setup_phy_mpll(imx6_pcie);
 
 	return 0;
-- 
GitLab


From 83f8a81dece8bc4237d8d94af357fb5df0083e63 Mon Sep 17 00:00:00 2001
From: Usama Arif <usama.arif@bytedance.com>
Date: Thu, 3 Nov 2022 13:12:10 +0000
Subject: [PATCH 1411/4122] KVM: arm64: Fix pvtime documentation

This includes table format and using reST labels for
cross-referencing to vcpu.rst.

Suggested-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Usama Arif <usama.arif@bytedance.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221103131210.3603385-1-usama.arif@bytedance.com
---
 Documentation/virt/kvm/arm/pvtime.rst   | 14 ++++++++------
 Documentation/virt/kvm/devices/vcpu.rst |  2 ++
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/Documentation/virt/kvm/arm/pvtime.rst b/Documentation/virt/kvm/arm/pvtime.rst
index 392521af7c905..e88b34e586be2 100644
--- a/Documentation/virt/kvm/arm/pvtime.rst
+++ b/Documentation/virt/kvm/arm/pvtime.rst
@@ -23,21 +23,23 @@ the PV_TIME_FEATURES hypercall should be probed using the SMCCC 1.1
 ARCH_FEATURES mechanism before calling it.
 
 PV_TIME_FEATURES
-    ============= ========    ==========
+
+    ============= ========    =================================================
     Function ID:  (uint32)    0xC5000020
     PV_call_id:   (uint32)    The function to query for support.
                               Currently only PV_TIME_ST is supported.
     Return value: (int64)     NOT_SUPPORTED (-1) or SUCCESS (0) if the relevant
                               PV-time feature is supported by the hypervisor.
-    ============= ========    ==========
+    ============= ========    =================================================
 
 PV_TIME_ST
-    ============= ========    ==========
+
+    ============= ========    ==============================================
     Function ID:  (uint32)    0xC5000021
     Return value: (int64)     IPA of the stolen time data structure for this
                               VCPU. On failure:
                               NOT_SUPPORTED (-1)
-    ============= ========    ==========
+    ============= ========    ==============================================
 
 The IPA returned by PV_TIME_ST should be mapped by the guest as normal memory
 with inner and outer write back caching attributes, in the inner shareable
@@ -76,5 +78,5 @@ It is advisable that one or more 64k pages are set aside for the purpose of
 these structures and not used for other purposes, this enables the guest to map
 the region using 64k pages and avoids conflicting attributes with other memory.
 
-For the user space interface see Documentation/virt/kvm/devices/vcpu.rst
-section "3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL".
+For the user space interface see
+:ref:`Documentation/virt/kvm/devices/vcpu.rst <kvm_arm_vcpu_pvtime_ctrl>`.
\ No newline at end of file
diff --git a/Documentation/virt/kvm/devices/vcpu.rst b/Documentation/virt/kvm/devices/vcpu.rst
index 716aa3edae14f..31f14ec4a65b6 100644
--- a/Documentation/virt/kvm/devices/vcpu.rst
+++ b/Documentation/virt/kvm/devices/vcpu.rst
@@ -171,6 +171,8 @@ configured values on other VCPUs.  Userspace should configure the interrupt
 numbers on at least one VCPU after creating all VCPUs and before running any
 VCPUs.
 
+.. _kvm_arm_vcpu_pvtime_ctrl:
+
 3. GROUP: KVM_ARM_VCPU_PVTIME_CTRL
 ==================================
 
-- 
GitLab


From e1b3253340029b06f5f648d8390807cba4e4ec23 Mon Sep 17 00:00:00 2001
From: Zhiyuan Dai <daizhiyuan@phytium.com.cn>
Date: Sun, 6 Nov 2022 20:30:40 +0800
Subject: [PATCH 1412/4122] KVM: arm64: Fix typo in comment

Fix typo in comment (nVHE/VHE).

Signed-off-by: Zhiyuan Dai <daizhiyuan@phytium.com.cn>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/1667737840-702-1-git-send-email-daizhiyuan@phytium.com.cn
---
 arch/arm64/kvm/hyp/vhe/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/vhe/Makefile b/arch/arm64/kvm/hyp/vhe/Makefile
index 96bec0ecf9dd9..3b9e5464b5b39 100644
--- a/arch/arm64/kvm/hyp/vhe/Makefile
+++ b/arch/arm64/kvm/hyp/vhe/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 #
-# Makefile for Kernel-based Virtual Machine module, HYP/nVHE part
+# Makefile for Kernel-based Virtual Machine module, HYP/VHE part
 #
 
 asflags-y := -D__KVM_VHE_HYPERVISOR__
-- 
GitLab


From 0f4f7ae10ee4e6403659b2d9ddf05424eecde45b Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:34 +0000
Subject: [PATCH 1413/4122] KVM: arm64: Move hyp refcount manipulation helpers
 to common header file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We will soon need to manipulate 'struct hyp_page' refcounts from outside
page_alloc.c, so move the helpers to a common header file to allow them
to be reused easily.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-2-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/memory.h | 22 ++++++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/page_alloc.c     | 19 -------------------
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 592b7edb3edb4..9422900e5c6a0 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -38,6 +38,10 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
 #define hyp_page_to_virt(page)	__hyp_va(hyp_page_to_phys(page))
 #define hyp_page_to_pool(page)	(((struct hyp_page *)page)->pool)
 
+/*
+ * Refcounting for 'struct hyp_page'.
+ * hyp_pool::lock must be held if atomic access to the refcount is required.
+ */
 static inline int hyp_page_count(void *addr)
 {
 	struct hyp_page *p = hyp_virt_to_page(addr);
@@ -45,4 +49,22 @@ static inline int hyp_page_count(void *addr)
 	return p->refcount;
 }
 
+static inline void hyp_page_ref_inc(struct hyp_page *p)
+{
+	BUG_ON(p->refcount == USHRT_MAX);
+	p->refcount++;
+}
+
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+	BUG_ON(!p->refcount);
+	p->refcount--;
+	return (p->refcount == 0);
+}
+
+static inline void hyp_set_page_refcounted(struct hyp_page *p)
+{
+	BUG_ON(p->refcount);
+	p->refcount = 1;
+}
 #endif /* __KVM_HYP_MEMORY_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index d40f0b30b5347..1ded09fc9b104 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -144,25 +144,6 @@ static struct hyp_page *__hyp_extract_page(struct hyp_pool *pool,
 	return p;
 }
 
-static inline void hyp_page_ref_inc(struct hyp_page *p)
-{
-	BUG_ON(p->refcount == USHRT_MAX);
-	p->refcount++;
-}
-
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
-{
-	BUG_ON(!p->refcount);
-	p->refcount--;
-	return (p->refcount == 0);
-}
-
-static inline void hyp_set_page_refcounted(struct hyp_page *p)
-{
-	BUG_ON(p->refcount);
-	p->refcount = 1;
-}
-
 static void __hyp_put_page(struct hyp_pool *pool, struct hyp_page *p)
 {
 	if (hyp_page_ref_dec_and_test(p))
-- 
GitLab


From 72a5bc0f153ce8ca80e9abbd1d9adec7d586915a Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:35 +0000
Subject: [PATCH 1414/4122] KVM: arm64: Allow attaching of non-coalescable
 pages to a hyp pool

All the contiguous pages used to initialize a 'struct hyp_pool' are
considered coalescable, which means that the hyp page allocator will
actively try to merge them with their buddies on the hyp_put_page() path.
However, using hyp_put_page() on a page that is not part of the inital
memory range given to a hyp_pool() is currently unsupported.

In order to allow dynamically extending hyp pools at run-time, add a
check to __hyp_attach_page() to allow inserting 'external' pages into
the free-list of order 0. This will be necessary to allow lazy donation
of pages from the host to the hypervisor when allocating guest stage-2
page-table pages at EL2.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-3-will@kernel.org
---
 arch/arm64/kvm/hyp/nvhe/page_alloc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index 1ded09fc9b104..dad88e2035984 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -93,11 +93,16 @@ static inline struct hyp_page *node_to_page(struct list_head *node)
 static void __hyp_attach_page(struct hyp_pool *pool,
 			      struct hyp_page *p)
 {
+	phys_addr_t phys = hyp_page_to_phys(p);
 	unsigned short order = p->order;
 	struct hyp_page *buddy;
 
 	memset(hyp_page_to_virt(p), 0, PAGE_SIZE << p->order);
 
+	/* Skip coalescing for 'external' pages being freed into the pool. */
+	if (phys < pool->range_start || phys >= pool->range_end)
+		goto insert;
+
 	/*
 	 * Only the first struct hyp_page of a high-order page (otherwise known
 	 * as the 'head') should have p->order set. The non-head pages should
@@ -116,6 +121,7 @@ static void __hyp_attach_page(struct hyp_pool *pool,
 		p = min(p, buddy);
 	}
 
+insert:
 	/* Mark the new head, and insert it */
 	p->order = order;
 	page_add_to_list(p, &pool->free_area[order]);
-- 
GitLab


From 8e6bcc3a4502a0d8d065466efd888b6b59b85789 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:36 +0000
Subject: [PATCH 1415/4122] KVM: arm64: Back the hypervisor 'struct hyp_page'
 array for all memory

The EL2 'vmemmap' array in nVHE Protected mode is currently very sparse:
only memory pages owned by the hypervisor itself have a matching 'struct
hyp_page'. However, as the size of this struct has been reduced
significantly since its introduction, it appears that we can now afford
to back the vmemmap for all of memory.

Having an easily accessible 'struct hyp_page' for every physical page in
memory provides the hypervisor with a simple mechanism to store metadata
(e.g. a refcount) that wouldn't otherwise fit in the very limited number
of software bits available in the host stage-2 page-table entries. This
will be used in subsequent patches when pinning host memory pages for
use by the hypervisor at EL2.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-4-will@kernel.org
---
 arch/arm64/include/asm/kvm_pkvm.h    | 26 +++++++++++++++++++++++
 arch/arm64/kvm/hyp/include/nvhe/mm.h | 14 +------------
 arch/arm64/kvm/hyp/nvhe/mm.c         | 31 ++++++++++++++++++++++++----
 arch/arm64/kvm/hyp/nvhe/page_alloc.c |  4 +---
 arch/arm64/kvm/hyp/nvhe/setup.c      |  7 +++----
 arch/arm64/kvm/pkvm.c                | 18 ++--------------
 6 files changed, 60 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 9f4ad2a8df59c..8f7b8a2314bbb 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -14,6 +14,32 @@
 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
 
+static inline unsigned long
+hyp_vmemmap_memblock_size(struct memblock_region *reg, size_t vmemmap_entry_size)
+{
+	unsigned long nr_pages = reg->size >> PAGE_SHIFT;
+	unsigned long start, end;
+
+	start = (reg->base >> PAGE_SHIFT) * vmemmap_entry_size;
+	end = start + nr_pages * vmemmap_entry_size;
+	start = ALIGN_DOWN(start, PAGE_SIZE);
+	end = ALIGN(end, PAGE_SIZE);
+
+	return end - start;
+}
+
+static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
+{
+	unsigned long res = 0, i;
+
+	for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) {
+		res += hyp_vmemmap_memblock_size(&kvm_nvhe_sym(hyp_memory)[i],
+						 vmemmap_entry_size);
+	}
+
+	return res >> PAGE_SHIFT;
+}
+
 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
 {
 	unsigned long total = 0, i;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index 42d8eb9bfe725..b2ee6d5df55b1 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -15,7 +15,7 @@ extern hyp_spinlock_t pkvm_pgd_lock;
 
 int hyp_create_idmap(u32 hyp_va_bits);
 int hyp_map_vectors(void);
-int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back);
+int hyp_back_vmemmap(phys_addr_t back);
 int pkvm_cpu_set_vector(enum arm64_hyp_spectre_vector slot);
 int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot);
 int pkvm_create_mappings_locked(void *from, void *to, enum kvm_pgtable_prot prot);
@@ -24,16 +24,4 @@ int __pkvm_create_private_mapping(phys_addr_t phys, size_t size,
 				  unsigned long *haddr);
 int pkvm_alloc_private_va_range(size_t size, unsigned long *haddr);
 
-static inline void hyp_vmemmap_range(phys_addr_t phys, unsigned long size,
-				     unsigned long *start, unsigned long *end)
-{
-	unsigned long nr_pages = size >> PAGE_SHIFT;
-	struct hyp_page *p = hyp_phys_to_page(phys);
-
-	*start = (unsigned long)p;
-	*end = *start + nr_pages * sizeof(struct hyp_page);
-	*start = ALIGN_DOWN(*start, PAGE_SIZE);
-	*end = ALIGN(*end, PAGE_SIZE);
-}
-
 #endif /* __KVM_HYP_MM_H */
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 96193cb31a399..d3a3b47181de0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -129,13 +129,36 @@ int pkvm_create_mappings(void *from, void *to, enum kvm_pgtable_prot prot)
 	return ret;
 }
 
-int hyp_back_vmemmap(phys_addr_t phys, unsigned long size, phys_addr_t back)
+int hyp_back_vmemmap(phys_addr_t back)
 {
-	unsigned long start, end;
+	unsigned long i, start, size, end = 0;
+	int ret;
 
-	hyp_vmemmap_range(phys, size, &start, &end);
+	for (i = 0; i < hyp_memblock_nr; i++) {
+		start = hyp_memory[i].base;
+		start = ALIGN_DOWN((u64)hyp_phys_to_page(start), PAGE_SIZE);
+		/*
+		 * The begining of the hyp_vmemmap region for the current
+		 * memblock may already be backed by the page backing the end
+		 * the previous region, so avoid mapping it twice.
+		 */
+		start = max(start, end);
+
+		end = hyp_memory[i].base + hyp_memory[i].size;
+		end = PAGE_ALIGN((u64)hyp_phys_to_page(end));
+		if (start >= end)
+			continue;
+
+		size = end - start;
+		ret = __pkvm_create_mappings(start, size, back, PAGE_HYP);
+		if (ret)
+			return ret;
+
+		memset(hyp_phys_to_virt(back), 0, size);
+		back += size;
+	}
 
-	return __pkvm_create_mappings(start, end - start, back, PAGE_HYP);
+	return 0;
 }
 
 static void *__hyp_bp_vect_base;
diff --git a/arch/arm64/kvm/hyp/nvhe/page_alloc.c b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
index dad88e2035984..803ba3222e75f 100644
--- a/arch/arm64/kvm/hyp/nvhe/page_alloc.c
+++ b/arch/arm64/kvm/hyp/nvhe/page_alloc.c
@@ -236,10 +236,8 @@ int hyp_pool_init(struct hyp_pool *pool, u64 pfn, unsigned int nr_pages,
 
 	/* Init the vmemmap portion */
 	p = hyp_phys_to_page(phys);
-	for (i = 0; i < nr_pages; i++) {
-		p[i].order = 0;
+	for (i = 0; i < nr_pages; i++)
 		hyp_set_page_refcounted(&p[i]);
-	}
 
 	/* Attach the unused pages to the buddy tree */
 	for (i = reserved_pages; i < nr_pages; i++)
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index e8d4ea2fcfa09..579eb4f734768 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -31,12 +31,11 @@ static struct hyp_pool hpool;
 
 static int divide_memory_pool(void *virt, unsigned long size)
 {
-	unsigned long vstart, vend, nr_pages;
+	unsigned long nr_pages;
 
 	hyp_early_alloc_init(virt, size);
 
-	hyp_vmemmap_range(__hyp_pa(virt), size, &vstart, &vend);
-	nr_pages = (vend - vstart) >> PAGE_SHIFT;
+	nr_pages = hyp_vmemmap_pages(sizeof(struct hyp_page));
 	vmemmap_base = hyp_early_alloc_contig(nr_pages);
 	if (!vmemmap_base)
 		return -ENOMEM;
@@ -78,7 +77,7 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
-	ret = hyp_back_vmemmap(phys, size, hyp_virt_to_phys(vmemmap_base));
+	ret = hyp_back_vmemmap(hyp_virt_to_phys(vmemmap_base));
 	if (ret)
 		return ret;
 
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index ebecb7c045f43..34229425b25d3 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -53,7 +53,7 @@ static int __init register_memblock_regions(void)
 
 void __init kvm_hyp_reserve(void)
 {
-	u64 nr_pages, prev, hyp_mem_pages = 0;
+	u64 hyp_mem_pages = 0;
 	int ret;
 
 	if (!is_hyp_mode_available() || is_kernel_in_hyp_mode())
@@ -71,21 +71,7 @@ void __init kvm_hyp_reserve(void)
 
 	hyp_mem_pages += hyp_s1_pgtable_pages();
 	hyp_mem_pages += host_s2_pgtable_pages();
-
-	/*
-	 * The hyp_vmemmap needs to be backed by pages, but these pages
-	 * themselves need to be present in the vmemmap, so compute the number
-	 * of pages needed by looking for a fixed point.
-	 */
-	nr_pages = 0;
-	do {
-		prev = nr_pages;
-		nr_pages = hyp_mem_pages + prev;
-		nr_pages = DIV_ROUND_UP(nr_pages * STRUCT_HYP_PAGE_SIZE,
-					PAGE_SIZE);
-		nr_pages += __hyp_pgtable_max_pages(nr_pages);
-	} while (nr_pages != prev);
-	hyp_mem_pages += nr_pages;
+	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
 
 	/*
 	 * Try to allocate a PMD-aligned region to reduce TLB pressure once
-- 
GitLab


From 0d16d12eb26ef85602ef8a678d94825a66772774 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:37 +0000
Subject: [PATCH 1416/4122] KVM: arm64: Fix-up hyp stage-1 refcounts for all
 pages mapped at EL2

In order to allow unmapping arbitrary memory pages from the hypervisor
stage-1 page-table, fix-up the initial refcount for pages that have been
mapped before the 'vmemmap' array was up and running so that it
accurately accounts for all existing hypervisor mappings.

This is achieved by traversing the entire hypervisor stage-1 page-table
during initialisation of EL2 and updating the corresponding
'struct hyp_page' for each valid mapping.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-5-will@kernel.org
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 62 +++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 579eb4f734768..8f2726d7e2010 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -185,12 +185,11 @@ static void hpool_put_page(void *addr)
 	hyp_put_page(&hpool, addr);
 }
 
-static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
-					 kvm_pte_t *ptep,
-					 enum kvm_pgtable_walk_flags flag,
-					 void * const arg)
+static int fix_host_ownership_walker(u64 addr, u64 end, u32 level,
+				     kvm_pte_t *ptep,
+				     enum kvm_pgtable_walk_flags flag,
+				     void * const arg)
 {
-	struct kvm_pgtable_mm_ops *mm_ops = arg;
 	enum kvm_pgtable_prot prot;
 	enum pkvm_page_state state;
 	kvm_pte_t pte = *ptep;
@@ -199,15 +198,6 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
 	if (!kvm_pte_valid(pte))
 		return 0;
 
-	/*
-	 * Fix-up the refcount for the page-table pages as the early allocator
-	 * was unable to access the hyp_vmemmap and so the buddy allocator has
-	 * initialised the refcount to '1'.
-	 */
-	mm_ops->get_page(ptep);
-	if (flag != KVM_PGTABLE_WALK_LEAF)
-		return 0;
-
 	if (level != (KVM_PGTABLE_MAX_LEVELS - 1))
 		return -EINVAL;
 
@@ -236,12 +226,30 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level,
 	return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
 }
 
-static int finalize_host_mappings(void)
+static int fix_hyp_pgtable_refcnt_walker(u64 addr, u64 end, u32 level,
+					 kvm_pte_t *ptep,
+					 enum kvm_pgtable_walk_flags flag,
+					 void * const arg)
+{
+	struct kvm_pgtable_mm_ops *mm_ops = arg;
+	kvm_pte_t pte = *ptep;
+
+	/*
+	 * Fix-up the refcount for the page-table pages as the early allocator
+	 * was unable to access the hyp_vmemmap and so the buddy allocator has
+	 * initialised the refcount to '1'.
+	 */
+	if (kvm_pte_valid(pte))
+		mm_ops->get_page(ptep);
+
+	return 0;
+}
+
+static int fix_host_ownership(void)
 {
 	struct kvm_pgtable_walker walker = {
-		.cb	= finalize_host_mappings_walker,
-		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
-		.arg	= pkvm_pgtable.mm_ops,
+		.cb	= fix_host_ownership_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
 	};
 	int i, ret;
 
@@ -257,6 +265,18 @@ static int finalize_host_mappings(void)
 	return 0;
 }
 
+static int fix_hyp_pgtable_refcnt(void)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= fix_hyp_pgtable_refcnt_walker,
+		.flags	= KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
+		.arg	= pkvm_pgtable.mm_ops,
+	};
+
+	return kvm_pgtable_walk(&pkvm_pgtable, 0, BIT(pkvm_pgtable.ia_bits),
+				&walker);
+}
+
 void __noreturn __pkvm_init_finalise(void)
 {
 	struct kvm_host_data *host_data = this_cpu_ptr(&kvm_host_data);
@@ -286,7 +306,11 @@ void __noreturn __pkvm_init_finalise(void)
 	};
 	pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops;
 
-	ret = finalize_host_mappings();
+	ret = fix_host_ownership();
+	if (ret)
+		goto out;
+
+	ret = fix_hyp_pgtable_refcnt();
 	if (ret)
 		goto out;
 
-- 
GitLab


From 33bc332d4061e95db55594893c4f80105b1dd813 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:38 +0000
Subject: [PATCH 1417/4122] KVM: arm64: Unify identifiers used to distinguish
 host and hypervisor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 'pkvm_component_id' enum type provides constants to refer to the
host and the hypervisor, yet this information is duplicated by the
'pkvm_hyp_id' constant.

Remove the definition of 'pkvm_hyp_id' and move the 'pkvm_component_id'
type definition to 'mem_protect.h' so that it can be used outside of
the memory protection code, for example when initialising the owner for
hypervisor-owned pages.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-6-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 +++++-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 8 --------
 arch/arm64/kvm/hyp/nvhe/setup.c               | 2 +-
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 80e99836eac79..f5705a1e972ff 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -51,7 +51,11 @@ struct host_kvm {
 };
 extern struct host_kvm host_kvm;
 
-extern const u8 pkvm_hyp_id;
+/* This corresponds to page-table locking order */
+enum pkvm_component_id {
+	PKVM_ID_HOST,
+	PKVM_ID_HYP,
+};
 
 int __pkvm_prot_finalize(void);
 int __pkvm_host_share_hyp(u64 pfn);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 1e78acf9662eb..ff86f5bd230f8 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -26,8 +26,6 @@ struct host_kvm host_kvm;
 
 static struct hyp_pool host_s2_pool;
 
-const u8 pkvm_hyp_id = 1;
-
 static void host_lock_component(void)
 {
 	hyp_spin_lock(&host_kvm.lock);
@@ -380,12 +378,6 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt)
 	BUG_ON(ret && ret != -EAGAIN);
 }
 
-/* This corresponds to locking order */
-enum pkvm_component_id {
-	PKVM_ID_HOST,
-	PKVM_ID_HYP,
-};
-
 struct pkvm_mem_transition {
 	u64				nr_pages;
 
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 8f2726d7e2010..0312c9c74a5a4 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -212,7 +212,7 @@ static int fix_host_ownership_walker(u64 addr, u64 end, u32 level,
 	state = pkvm_getstate(kvm_pgtable_hyp_pte_prot(pte));
 	switch (state) {
 	case PKVM_PAGE_OWNED:
-		return host_stage2_set_owner_locked(phys, PAGE_SIZE, pkvm_hyp_id);
+		return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
 	case PKVM_PAGE_SHARED_OWNED:
 		prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
 		break;
-- 
GitLab


From 1ed5c24c26f48ff61dc5d97c655769821f36a622 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:39 +0000
Subject: [PATCH 1418/4122] KVM: arm64: Implement do_donate() helper for
 donating memory

Transferring ownership information of a memory region from one component
to another can be achieved using a "donate" operation, which results
in the previous owner losing access to the underlying pages entirely
and the new owner having exclusive access to the page.

Implement a do_donate() helper, along the same lines as do_{un,}share,
and provide this functionality for the host-{to,from}-hyp cases as this
will later be used to donate/reclaim memory pages to store VM metadata
at EL2.

In a similar manner to the sharing transitions, permission checks are
performed by the hypervisor to ensure that the component initiating the
transition really is the owner of the page and also that the completer
does not currently have a page mapped at the target address.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Quentin Perret <qperret@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-7-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   2 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 239 ++++++++++++++++++
 2 files changed, 241 insertions(+)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index f5705a1e972ff..c87b19b2d4680 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -60,6 +60,8 @@ enum pkvm_component_id {
 int __pkvm_prot_finalize(void);
 int __pkvm_host_share_hyp(u64 pfn);
 int __pkvm_host_unshare_hyp(u64 pfn);
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
 
 bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index ff86f5bd230f8..10069cd327873 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -391,6 +391,9 @@ struct pkvm_mem_transition {
 				/* Address in the completer's address space */
 				u64	completer_addr;
 			} host;
+			struct {
+				u64	completer_addr;
+			} hyp;
 		};
 	} initiator;
 
@@ -404,6 +407,10 @@ struct pkvm_mem_share {
 	const enum kvm_pgtable_prot		completer_prot;
 };
 
+struct pkvm_mem_donation {
+	const struct pkvm_mem_transition	tx;
+};
+
 struct check_walk_data {
 	enum pkvm_page_state	desired;
 	enum pkvm_page_state	(*get_page_state)(kvm_pte_t pte);
@@ -503,6 +510,46 @@ static int host_initiate_unshare(u64 *completer_addr,
 	return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED);
 }
 
+static int host_initiate_donation(u64 *completer_addr,
+				  const struct pkvm_mem_transition *tx)
+{
+	u8 owner_id = tx->completer.id;
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	*completer_addr = tx->initiator.host.completer_addr;
+	return host_stage2_set_owner_locked(tx->initiator.addr, size, owner_id);
+}
+
+static bool __host_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
+{
+	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
+		 tx->initiator.id != PKVM_ID_HYP);
+}
+
+static int __host_ack_transition(u64 addr, const struct pkvm_mem_transition *tx,
+				 enum pkvm_page_state state)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	if (__host_ack_skip_pgtable_check(tx))
+		return 0;
+
+	return __host_check_page_state_range(addr, size, state);
+}
+
+static int host_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	return __host_ack_transition(addr, tx, PKVM_NOPAGE);
+}
+
+static int host_complete_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u8 host_id = tx->completer.id;
+
+	return host_stage2_set_owner_locked(addr, size, host_id);
+}
+
 static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte)
 {
 	if (!kvm_pte_valid(pte))
@@ -523,6 +570,27 @@ static int __hyp_check_page_state_range(u64 addr, u64 size,
 	return check_page_state_range(&pkvm_pgtable, addr, size, &d);
 }
 
+static int hyp_request_donation(u64 *completer_addr,
+				const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	u64 addr = tx->initiator.addr;
+
+	*completer_addr = tx->initiator.hyp.completer_addr;
+	return __hyp_check_page_state_range(addr, size, PKVM_PAGE_OWNED);
+}
+
+static int hyp_initiate_donation(u64 *completer_addr,
+				 const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+	int ret;
+
+	*completer_addr = tx->initiator.hyp.completer_addr;
+	ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, tx->initiator.addr, size);
+	return (ret != size) ? -EFAULT : 0;
+}
+
 static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx)
 {
 	return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) ||
@@ -554,6 +622,16 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 					    PKVM_PAGE_SHARED_BORROWED);
 }
 
+static int hyp_ack_donation(u64 addr, const struct pkvm_mem_transition *tx)
+{
+	u64 size = tx->nr_pages * PAGE_SIZE;
+
+	if (__hyp_ack_skip_pgtable_check(tx))
+		return 0;
+
+	return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE);
+}
+
 static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx,
 			      enum kvm_pgtable_prot perms)
 {
@@ -572,6 +650,15 @@ static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 	return (ret != size) ? -EFAULT : 0;
 }
 
+static int hyp_complete_donation(u64 addr,
+				 const struct pkvm_mem_transition *tx)
+{
+	void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE);
+	enum kvm_pgtable_prot prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_OWNED);
+
+	return pkvm_create_mappings_locked(start, end, prot);
+}
+
 static int check_share(struct pkvm_mem_share *share)
 {
 	const struct pkvm_mem_transition *tx = &share->tx;
@@ -724,6 +811,94 @@ static int do_unshare(struct pkvm_mem_share *share)
 	return WARN_ON(__do_unshare(share));
 }
 
+static int check_donation(struct pkvm_mem_donation *donation)
+{
+	const struct pkvm_mem_transition *tx = &donation->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_request_owned_transition(&completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_request_donation(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id) {
+	case PKVM_ID_HOST:
+		ret = host_ack_donation(completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_ack_donation(completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int __do_donate(struct pkvm_mem_donation *donation)
+{
+	const struct pkvm_mem_transition *tx = &donation->tx;
+	u64 completer_addr;
+	int ret;
+
+	switch (tx->initiator.id) {
+	case PKVM_ID_HOST:
+		ret = host_initiate_donation(&completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_initiate_donation(&completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	if (ret)
+		return ret;
+
+	switch (tx->completer.id) {
+	case PKVM_ID_HOST:
+		ret = host_complete_donation(completer_addr, tx);
+		break;
+	case PKVM_ID_HYP:
+		ret = hyp_complete_donation(completer_addr, tx);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+/*
+ * do_donate():
+ *
+ * The page owner transfers ownership to another component, losing access
+ * as a consequence.
+ *
+ * Initiator: OWNED	=> NOPAGE
+ * Completer: NOPAGE	=> OWNED
+ */
+static int do_donate(struct pkvm_mem_donation *donation)
+{
+	int ret;
+
+	ret = check_donation(donation);
+	if (ret)
+		return ret;
+
+	return WARN_ON(__do_donate(donation));
+}
+
 int __pkvm_host_share_hyp(u64 pfn)
 {
 	int ret;
@@ -789,3 +964,67 @@ int __pkvm_host_unshare_hyp(u64 pfn)
 
 	return ret;
 }
+
+int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages)
+{
+	int ret;
+	u64 host_addr = hyp_pfn_to_phys(pfn);
+	u64 hyp_addr = (u64)__hyp_va(host_addr);
+	struct pkvm_mem_donation donation = {
+		.tx	= {
+			.nr_pages	= nr_pages,
+			.initiator	= {
+				.id	= PKVM_ID_HOST,
+				.addr	= host_addr,
+				.host	= {
+					.completer_addr = hyp_addr,
+				},
+			},
+			.completer	= {
+				.id	= PKVM_ID_HYP,
+			},
+		},
+	};
+
+	host_lock_component();
+	hyp_lock_component();
+
+	ret = do_donate(&donation);
+
+	hyp_unlock_component();
+	host_unlock_component();
+
+	return ret;
+}
+
+int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
+{
+	int ret;
+	u64 host_addr = hyp_pfn_to_phys(pfn);
+	u64 hyp_addr = (u64)__hyp_va(host_addr);
+	struct pkvm_mem_donation donation = {
+		.tx	= {
+			.nr_pages	= nr_pages,
+			.initiator	= {
+				.id	= PKVM_ID_HYP,
+				.addr	= hyp_addr,
+				.hyp	= {
+					.completer_addr = host_addr,
+				},
+			},
+			.completer	= {
+				.id	= PKVM_ID_HOST,
+			},
+		},
+	};
+
+	host_lock_component();
+	hyp_lock_component();
+
+	ret = do_donate(&donation);
+
+	hyp_unlock_component();
+	host_unlock_component();
+
+	return ret;
+}
-- 
GitLab


From 43c1ff8b75011bc3e3e923adf31ba815864a2494 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:40 +0000
Subject: [PATCH 1419/4122] KVM: arm64: Prevent the donation of no-map pages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Memory regions marked as "no-map" in the host device-tree routinely
include TrustZone carev-outs and DMA pools. Although donating such pages
to the hypervisor may not breach confidentiality, it could be used to
corrupt its state in uncontrollable ways. To prevent this, let's block
host-initiated memory transitions targeting "no-map" pages altogether in
nVHE protected mode as there should be no valid reason to do this in
current operation.

Thankfully, the pKVM EL2 hypervisor has a full copy of the host's list
of memblock regions, so we can easily check for the presence of the
MEMBLOCK_NOMAP flag on a region containing pages being donated from the
host.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-8-will@kernel.org
---
 arch/arm64/kvm/hyp/nvhe/mem_protect.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 10069cd327873..f7e3afaf9f119 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -193,7 +193,7 @@ struct kvm_mem_range {
 	u64 end;
 };
 
-static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
+static struct memblock_region *find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 {
 	int cur, left = 0, right = hyp_memblock_nr;
 	struct memblock_region *reg;
@@ -216,18 +216,28 @@ static bool find_mem_range(phys_addr_t addr, struct kvm_mem_range *range)
 		} else {
 			range->start = reg->base;
 			range->end = end;
-			return true;
+			return reg;
 		}
 	}
 
-	return false;
+	return NULL;
 }
 
 bool addr_is_memory(phys_addr_t phys)
 {
 	struct kvm_mem_range range;
 
-	return find_mem_range(phys, &range);
+	return !!find_mem_range(phys, &range);
+}
+
+static bool addr_is_allowed_memory(phys_addr_t phys)
+{
+	struct memblock_region *reg;
+	struct kvm_mem_range range;
+
+	reg = find_mem_range(phys, &range);
+
+	return reg && !(reg->flags & MEMBLOCK_NOMAP);
 }
 
 static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
@@ -346,7 +356,7 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr
 static int host_stage2_idmap(u64 addr)
 {
 	struct kvm_mem_range range;
-	bool is_memory = find_mem_range(addr, &range);
+	bool is_memory = !!find_mem_range(addr, &range);
 	enum kvm_pgtable_prot prot;
 	int ret;
 
@@ -424,7 +434,7 @@ static int __check_page_state_visitor(u64 addr, u64 end, u32 level,
 	struct check_walk_data *d = arg;
 	kvm_pte_t pte = *ptep;
 
-	if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte)))
+	if (kvm_pte_valid(pte) && !addr_is_allowed_memory(kvm_pte_to_phys(pte)))
 		return -EINVAL;
 
 	return d->get_page_state(pte) == d->desired ? 0 : -EPERM;
-- 
GitLab


From 9926cfce8dcb880255f30ab9ac930add787e1ead Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:41 +0000
Subject: [PATCH 1420/4122] KVM: arm64: Add helpers to pin memory shared with
 the hypervisor at EL2

Add helpers allowing the hypervisor to check whether a range of pages
are currently shared by the host, and 'pin' them if so by blocking host
unshare operations until the memory has been unpinned.

This will allow the hypervisor to take references on host-provided
data-structures (e.g. 'struct kvm') with the guarantee that these pages
will remain in a stable state until the hypervisor decides to release
them, for example during guest teardown.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-9-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  3 ++
 arch/arm64/kvm/hyp/include/nvhe/memory.h      |  7 ++-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 48 +++++++++++++++++++
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index c87b19b2d4680..998bf165af711 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -69,6 +69,9 @@ int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
+int hyp_pin_shared_mem(void *from, void *to);
+void hyp_unpin_shared_mem(void *from, void *to);
+
 static __always_inline void __load_host_stage2(void)
 {
 	if (static_branch_likely(&kvm_protected_mode_initialized))
diff --git a/arch/arm64/kvm/hyp/include/nvhe/memory.h b/arch/arm64/kvm/hyp/include/nvhe/memory.h
index 9422900e5c6a0..ab205c4d67748 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/memory.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/memory.h
@@ -55,10 +55,15 @@ static inline void hyp_page_ref_inc(struct hyp_page *p)
 	p->refcount++;
 }
 
-static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+static inline void hyp_page_ref_dec(struct hyp_page *p)
 {
 	BUG_ON(!p->refcount);
 	p->refcount--;
+}
+
+static inline int hyp_page_ref_dec_and_test(struct hyp_page *p)
+{
+	hyp_page_ref_dec(p);
 	return (p->refcount == 0);
 }
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index f7e3afaf9f119..83c2f67e1b582 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -625,6 +625,9 @@ static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx)
 {
 	u64 size = tx->nr_pages * PAGE_SIZE;
 
+	if (tx->initiator.id == PKVM_ID_HOST && hyp_page_count((void *)addr))
+		return -EBUSY;
+
 	if (__hyp_ack_skip_pgtable_check(tx))
 		return 0;
 
@@ -1038,3 +1041,48 @@ int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages)
 
 	return ret;
 }
+
+int hyp_pin_shared_mem(void *from, void *to)
+{
+	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+	u64 end = PAGE_ALIGN((u64)to);
+	u64 size = end - start;
+	int ret;
+
+	host_lock_component();
+	hyp_lock_component();
+
+	ret = __host_check_page_state_range(__hyp_pa(start), size,
+					    PKVM_PAGE_SHARED_OWNED);
+	if (ret)
+		goto unlock;
+
+	ret = __hyp_check_page_state_range(start, size,
+					   PKVM_PAGE_SHARED_BORROWED);
+	if (ret)
+		goto unlock;
+
+	for (cur = start; cur < end; cur += PAGE_SIZE)
+		hyp_page_ref_inc(hyp_virt_to_page(cur));
+
+unlock:
+	hyp_unlock_component();
+	host_unlock_component();
+
+	return ret;
+}
+
+void hyp_unpin_shared_mem(void *from, void *to)
+{
+	u64 cur, start = ALIGN_DOWN((u64)from, PAGE_SIZE);
+	u64 end = PAGE_ALIGN((u64)to);
+
+	host_lock_component();
+	hyp_lock_component();
+
+	for (cur = start; cur < end; cur += PAGE_SIZE)
+		hyp_page_ref_dec(hyp_virt_to_page(cur));
+
+	hyp_unlock_component();
+	host_unlock_component();
+}
-- 
GitLab


From 4d968b12e6bbe4440f4f220c41d779e02df8af1a Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:42 +0000
Subject: [PATCH 1421/4122] KVM: arm64: Include asm/kvm_mmu.h in
 nvhe/mem_protect.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

nvhe/mem_protect.h refers to __load_stage2() in the definition of
__load_host_stage2() but doesn't include the relevant header.

Include asm/kvm_mmu.h in nvhe/mem_protect.h so that users of the latter
don't have to do this themselves.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-10-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 998bf165af711..3bea816296dc8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -8,6 +8,7 @@
 #define __KVM_NVHE_MEM_PROTECT__
 #include <linux/kvm_host.h>
 #include <asm/kvm_hyp.h>
+#include <asm/kvm_mmu.h>
 #include <asm/kvm_pgtable.h>
 #include <asm/virt.h>
 #include <nvhe/spinlock.h>
-- 
GitLab


From 1c80002e3264552d8b9c0e162e09aa4087403716 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Thu, 10 Nov 2022 19:02:43 +0000
Subject: [PATCH 1422/4122] KVM: arm64: Add hyp_spinlock_t static initializer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce a static initializer macro for 'hyp_spinlock_t' so that it is
straightforward to instantiate global locks at EL2. This will be later
utilised for locking the VM table in the hypervisor.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-11-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/spinlock.h | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
index 4652fd04bdbee..7c7ea8c55405f 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/spinlock.h
@@ -28,9 +28,17 @@ typedef union hyp_spinlock {
 	};
 } hyp_spinlock_t;
 
+#define __HYP_SPIN_LOCK_INITIALIZER \
+	{ .__val = 0 }
+
+#define __HYP_SPIN_LOCK_UNLOCKED \
+	((hyp_spinlock_t) __HYP_SPIN_LOCK_INITIALIZER)
+
+#define DEFINE_HYP_SPINLOCK(x)	hyp_spinlock_t x = __HYP_SPIN_LOCK_UNLOCKED
+
 #define hyp_spin_lock_init(l)						\
 do {									\
-	*(l) = (hyp_spinlock_t){ .__val = 0 };				\
+	*(l) = __HYP_SPIN_LOCK_UNLOCKED;				\
 } while (0)
 
 static inline void hyp_spin_lock(hyp_spinlock_t *lock)
-- 
GitLab


From 5304002dc3754a5663d75c977bfa2d9e3c08906d Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:44 +0000
Subject: [PATCH 1423/4122] KVM: arm64: Rename 'host_kvm' to 'host_mmu'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In preparation for introducing VM and vCPU state at EL2, rename the
existing 'struct host_kvm' and its singleton 'host_kvm' instance to
'host_mmu' so as to avoid confusion between the structure tracking the
host stage-2 MMU state and the host instance of a 'struct kvm' for a
protected guest.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-12-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  6 +--
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 46 +++++++++----------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 3bea816296dc8..0a6d3e7f2a435 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -44,13 +44,13 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
 	return prot & PKVM_PAGE_STATE_PROT_MASK;
 }
 
-struct host_kvm {
+struct host_mmu {
 	struct kvm_arch arch;
 	struct kvm_pgtable pgt;
 	struct kvm_pgtable_mm_ops mm_ops;
 	hyp_spinlock_t lock;
 };
-extern struct host_kvm host_kvm;
+extern struct host_mmu host_mmu;
 
 /* This corresponds to page-table locking order */
 enum pkvm_component_id {
@@ -76,7 +76,7 @@ void hyp_unpin_shared_mem(void *from, void *to);
 static __always_inline void __load_host_stage2(void)
 {
 	if (static_branch_likely(&kvm_protected_mode_initialized))
-		__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
+		__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
 	else
 		write_sysreg(0, vttbr_el2);
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 83c2f67e1b582..06c6a24c0eae6 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -22,18 +22,18 @@
 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
 
 extern unsigned long hyp_nr_cpus;
-struct host_kvm host_kvm;
+struct host_mmu host_mmu;
 
 static struct hyp_pool host_s2_pool;
 
 static void host_lock_component(void)
 {
-	hyp_spin_lock(&host_kvm.lock);
+	hyp_spin_lock(&host_mmu.lock);
 }
 
 static void host_unlock_component(void)
 {
-	hyp_spin_unlock(&host_kvm.lock);
+	hyp_spin_unlock(&host_mmu.lock);
 }
 
 static void hyp_lock_component(void)
@@ -88,7 +88,7 @@ static int prepare_s2_pool(void *pgt_pool_base)
 	if (ret)
 		return ret;
 
-	host_kvm.mm_ops = (struct kvm_pgtable_mm_ops) {
+	host_mmu.mm_ops = (struct kvm_pgtable_mm_ops) {
 		.zalloc_pages_exact = host_s2_zalloc_pages_exact,
 		.zalloc_page = host_s2_zalloc_page,
 		.phys_to_virt = hyp_phys_to_virt,
@@ -109,7 +109,7 @@ static void prepare_host_vtcr(void)
 	parange = kvm_get_parange(id_aa64mmfr0_el1_sys_val);
 	phys_shift = id_aa64mmfr0_parange_to_phys_shift(parange);
 
-	host_kvm.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
+	host_mmu.arch.vtcr = kvm_get_vtcr(id_aa64mmfr0_el1_sys_val,
 					  id_aa64mmfr1_el1_sys_val, phys_shift);
 }
 
@@ -117,25 +117,25 @@ static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot pr
 
 int kvm_host_prepare_stage2(void *pgt_pool_base)
 {
-	struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
+	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
 	int ret;
 
 	prepare_host_vtcr();
-	hyp_spin_lock_init(&host_kvm.lock);
-	mmu->arch = &host_kvm.arch;
+	hyp_spin_lock_init(&host_mmu.lock);
+	mmu->arch = &host_mmu.arch;
 
 	ret = prepare_s2_pool(pgt_pool_base);
 	if (ret)
 		return ret;
 
-	ret = __kvm_pgtable_stage2_init(&host_kvm.pgt, mmu,
-					&host_kvm.mm_ops, KVM_HOST_S2_FLAGS,
+	ret = __kvm_pgtable_stage2_init(&host_mmu.pgt, mmu,
+					&host_mmu.mm_ops, KVM_HOST_S2_FLAGS,
 					host_stage2_force_pte_cb);
 	if (ret)
 		return ret;
 
-	mmu->pgd_phys = __hyp_pa(host_kvm.pgt.pgd);
-	mmu->pgt = &host_kvm.pgt;
+	mmu->pgd_phys = __hyp_pa(host_mmu.pgt.pgd);
+	mmu->pgt = &host_mmu.pgt;
 	atomic64_set(&mmu->vmid.id, 0);
 
 	return 0;
@@ -143,19 +143,19 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 
 int __pkvm_prot_finalize(void)
 {
-	struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu;
+	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
 	struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params);
 
 	if (params->hcr_el2 & HCR_VM)
 		return -EPERM;
 
 	params->vttbr = kvm_get_vttbr(mmu);
-	params->vtcr = host_kvm.arch.vtcr;
+	params->vtcr = host_mmu.arch.vtcr;
 	params->hcr_el2 |= HCR_VM;
 	kvm_flush_dcache_to_poc(params, sizeof(*params));
 
 	write_sysreg(params->hcr_el2, hcr_el2);
-	__load_stage2(&host_kvm.arch.mmu, &host_kvm.arch);
+	__load_stage2(&host_mmu.arch.mmu, &host_mmu.arch);
 
 	/*
 	 * Make sure to have an ISB before the TLB maintenance below but only
@@ -173,7 +173,7 @@ int __pkvm_prot_finalize(void)
 
 static int host_stage2_unmap_dev_all(void)
 {
-	struct kvm_pgtable *pgt = &host_kvm.pgt;
+	struct kvm_pgtable *pgt = &host_mmu.pgt;
 	struct memblock_region *reg;
 	u64 addr = 0;
 	int i, ret;
@@ -258,7 +258,7 @@ static bool range_is_memory(u64 start, u64 end)
 static inline int __host_stage2_idmap(u64 start, u64 end,
 				      enum kvm_pgtable_prot prot)
 {
-	return kvm_pgtable_stage2_map(&host_kvm.pgt, start, end - start, start,
+	return kvm_pgtable_stage2_map(&host_mmu.pgt, start, end - start, start,
 				      prot, &host_s2_pool);
 }
 
@@ -271,7 +271,7 @@ static inline int __host_stage2_idmap(u64 start, u64 end,
 #define host_stage2_try(fn, ...)					\
 	({								\
 		int __ret;						\
-		hyp_assert_lock_held(&host_kvm.lock);			\
+		hyp_assert_lock_held(&host_mmu.lock);			\
 		__ret = fn(__VA_ARGS__);				\
 		if (__ret == -ENOMEM) {					\
 			__ret = host_stage2_unmap_dev_all();		\
@@ -294,8 +294,8 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
 	u32 level;
 	int ret;
 
-	hyp_assert_lock_held(&host_kvm.lock);
-	ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, &level);
+	hyp_assert_lock_held(&host_mmu.lock);
+	ret = kvm_pgtable_get_leaf(&host_mmu.pgt, addr, &pte, &level);
 	if (ret)
 		return ret;
 
@@ -327,7 +327,7 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
 
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
 {
-	return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
+	return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
 			       addr, size, &host_s2_pool, owner_id);
 }
 
@@ -468,8 +468,8 @@ static int __host_check_page_state_range(u64 addr, u64 size,
 		.get_page_state	= host_get_page_state,
 	};
 
-	hyp_assert_lock_held(&host_kvm.lock);
-	return check_page_state_range(&host_kvm.pgt, addr, size, &d);
+	hyp_assert_lock_held(&host_mmu.lock);
+	return check_page_state_range(&host_mmu.pgt, addr, size, &d);
 }
 
 static int __host_set_page_state_range(u64 addr, u64 size,
-- 
GitLab


From a1ec5c70d3f63d8a143fb83cd7f53bd8ff2f72c8 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Thu, 10 Nov 2022 19:02:45 +0000
Subject: [PATCH 1424/4122] KVM: arm64: Add infrastructure to create and track
 pKVM instances at EL2

Introduce a global table (and lock) to track pKVM instances at EL2, and
provide hypercalls that can be used by the untrusted host to create and
destroy pKVM VMs and their vCPUs. pKVM VM/vCPU state is directly
accessible only by the trusted hypervisor (EL2).

Each pKVM VM is directly associated with an untrusted host KVM instance,
and is referenced by the host using an opaque handle. Future patches
will provide hypercalls to allow the host to initialize/set/get pKVM
VM/vCPU state using the opaque handle.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Co-developed-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
[maz: silence warning on unmap_donated_memory_noclear()]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-13-will@kernel.org
---
 arch/arm64/include/asm/kvm_asm.h              |   3 +
 arch/arm64/include/asm/kvm_host.h             |   8 +
 arch/arm64/include/asm/kvm_pgtable.h          |   8 +
 arch/arm64/include/asm/kvm_pkvm.h             |   8 +
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   3 +
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h        |  58 +++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c            |  31 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |  14 +
 arch/arm64/kvm/hyp/nvhe/pkvm.c                | 380 ++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/setup.c               |   8 +
 arch/arm64/kvm/hyp/pgtable.c                  |   9 +
 arch/arm64/kvm/pkvm.c                         |   1 +
 12 files changed, 531 insertions(+)
 create mode 100644 arch/arm64/kvm/hyp/include/nvhe/pkvm.h

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index 53035763e48e8..de52ba775d48f 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -76,6 +76,9 @@ enum __kvm_host_smccc_func {
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs,
 	__KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs,
 	__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps,
+	__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
+	__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
+	__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
 };
 
 #define DECLARE_KVM_VHE_SYM(sym)	extern char sym[]
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 45e2136322ba2..d3dd7ab9c79ea 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -115,6 +115,8 @@ struct kvm_smccc_features {
 	unsigned long vendor_hyp_bmap;
 };
 
+typedef unsigned int pkvm_handle_t;
+
 struct kvm_arch {
 	struct kvm_s2_mmu mmu;
 
@@ -166,6 +168,12 @@ struct kvm_arch {
 
 	/* Hypercall features firmware registers' descriptor */
 	struct kvm_smccc_features smccc_feat;
+
+	/*
+	 * For an untrusted host VM, 'pkvm_handle' is used to lookup
+	 * the associated pKVM instance in the hypervisor.
+	 */
+	pkvm_handle_t pkvm_handle;
 };
 
 struct kvm_vcpu_fault_info {
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 3252eb50ecfe5..15c389db19318 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -296,6 +296,14 @@ u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
  */
 u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
 
+/**
+ * kvm_pgtable_stage2_pgd_size() - Helper to compute size of a stage-2 PGD
+ * @vtcr:	Content of the VTCR register.
+ *
+ * Return: the size (in bytes) of the stage-2 PGD
+ */
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr);
+
 /**
  * __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
  * @pgt:	Uninitialised page-table structure to initialise.
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index 8f7b8a2314bbb..f4e3133d65500 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -9,6 +9,9 @@
 #include <linux/memblock.h>
 #include <asm/kvm_pgtable.h>
 
+/* Maximum number of VMs that can co-exist under pKVM. */
+#define KVM_MAX_PVMS 255
+
 #define HYP_MEMBLOCK_REGIONS 128
 
 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
@@ -40,6 +43,11 @@ static inline unsigned long hyp_vmemmap_pages(size_t vmemmap_entry_size)
 	return res >> PAGE_SHIFT;
 }
 
+static inline unsigned long hyp_vm_table_pages(void)
+{
+	return PAGE_ALIGN(KVM_MAX_PVMS * sizeof(void *)) >> PAGE_SHIFT;
+}
+
 static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages)
 {
 	unsigned long total = 0, i;
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 0a6d3e7f2a435..ce9a796a85eee 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -11,6 +11,7 @@
 #include <asm/kvm_mmu.h>
 #include <asm/kvm_pgtable.h>
 #include <asm/virt.h>
+#include <nvhe/pkvm.h>
 #include <nvhe/spinlock.h>
 
 /*
@@ -68,10 +69,12 @@ bool addr_is_memory(phys_addr_t phys);
 int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);
 int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id);
 int kvm_host_prepare_stage2(void *pgt_pool_base);
+int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd);
 void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
 int hyp_pin_shared_mem(void *from, void *to);
 void hyp_unpin_shared_mem(void *from, void *to);
+void reclaim_guest_pages(struct pkvm_hyp_vm *vm);
 
 static __always_inline void __load_host_stage2(void)
 {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
new file mode 100644
index 0000000000000..8c653a3b9501d
--- /dev/null
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Google LLC
+ * Author: Fuad Tabba <tabba@google.com>
+ */
+
+#ifndef __ARM64_KVM_NVHE_PKVM_H__
+#define __ARM64_KVM_NVHE_PKVM_H__
+
+#include <asm/kvm_pkvm.h>
+
+/*
+ * Holds the relevant data for maintaining the vcpu state completely at hyp.
+ */
+struct pkvm_hyp_vcpu {
+	struct kvm_vcpu vcpu;
+
+	/* Backpointer to the host's (untrusted) vCPU instance. */
+	struct kvm_vcpu *host_vcpu;
+};
+
+/*
+ * Holds the relevant data for running a protected vm.
+ */
+struct pkvm_hyp_vm {
+	struct kvm kvm;
+
+	/* Backpointer to the host's (untrusted) KVM instance. */
+	struct kvm *host_kvm;
+
+	/* The guest's stage-2 page-table managed by the hypervisor. */
+	struct kvm_pgtable pgt;
+
+	/*
+	 * The number of vcpus initialized and ready to run.
+	 * Modifying this is protected by 'vm_table_lock'.
+	 */
+	unsigned int nr_vcpus;
+
+	/* Array of the hyp vCPU structures for this VM. */
+	struct pkvm_hyp_vcpu *vcpus[];
+};
+
+static inline struct pkvm_hyp_vm *
+pkvm_hyp_vcpu_to_hyp_vm(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+	return container_of(hyp_vcpu->vcpu.kvm, struct pkvm_hyp_vm, kvm);
+}
+
+void pkvm_hyp_vm_table_init(void *tbl);
+
+int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
+		   unsigned long pgd_hva);
+int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
+		     unsigned long vcpu_hva);
+int __pkvm_teardown_vm(pkvm_handle_t handle);
+
+#endif /* __ARM64_KVM_NVHE_PKVM_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index 3cea4b6ac23ec..b5f3fcfe91357 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -15,6 +15,7 @@
 
 #include <nvhe/mem_protect.h>
 #include <nvhe/mm.h>
+#include <nvhe/pkvm.h>
 #include <nvhe/trap_handler.h>
 
 DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
@@ -191,6 +192,33 @@ static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt)
 	__pkvm_vcpu_init_traps(kern_hyp_va(vcpu));
 }
 
+static void handle___pkvm_init_vm(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(struct kvm *, host_kvm, host_ctxt, 1);
+	DECLARE_REG(unsigned long, vm_hva, host_ctxt, 2);
+	DECLARE_REG(unsigned long, pgd_hva, host_ctxt, 3);
+
+	host_kvm = kern_hyp_va(host_kvm);
+	cpu_reg(host_ctxt, 1) = __pkvm_init_vm(host_kvm, vm_hva, pgd_hva);
+}
+
+static void handle___pkvm_init_vcpu(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 2);
+	DECLARE_REG(unsigned long, vcpu_hva, host_ctxt, 3);
+
+	host_vcpu = kern_hyp_va(host_vcpu);
+	cpu_reg(host_ctxt, 1) = __pkvm_init_vcpu(handle, host_vcpu, vcpu_hva);
+}
+
+static void handle___pkvm_teardown_vm(struct kvm_cpu_context *host_ctxt)
+{
+	DECLARE_REG(pkvm_handle_t, handle, host_ctxt, 1);
+
+	cpu_reg(host_ctxt, 1) = __pkvm_teardown_vm(handle);
+}
+
 typedef void (*hcall_t)(struct kvm_cpu_context *);
 
 #define HANDLE_FUNC(x)	[__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x
@@ -220,6 +248,9 @@ static const hcall_t host_hcall[] = {
 	HANDLE_FUNC(__vgic_v3_save_aprs),
 	HANDLE_FUNC(__vgic_v3_restore_aprs),
 	HANDLE_FUNC(__pkvm_vcpu_init_traps),
+	HANDLE_FUNC(__pkvm_init_vm),
+	HANDLE_FUNC(__pkvm_init_vcpu),
+	HANDLE_FUNC(__pkvm_teardown_vm),
 };
 
 static void handle_host_hcall(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 06c6a24c0eae6..459957b3082ea 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -141,6 +141,20 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 	return 0;
 }
 
+int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
+{
+	vm->pgt.pgd = pgd;
+	return 0;
+}
+
+void reclaim_guest_pages(struct pkvm_hyp_vm *vm)
+{
+	unsigned long nr_pages;
+
+	nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages));
+}
+
 int __pkvm_prot_finalize(void)
 {
 	struct kvm_s2_mmu *mmu = &host_mmu.arch.mmu;
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 85d3b7ae720fb..135c9a095eca2 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -7,6 +7,9 @@
 #include <linux/kvm_host.h>
 #include <linux/mm.h>
 #include <nvhe/fixed_config.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/memory.h>
+#include <nvhe/pkvm.h>
 #include <nvhe/trap_handler.h>
 
 /*
@@ -183,3 +186,380 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
 	pvm_init_traps_aa64mmfr0(vcpu);
 	pvm_init_traps_aa64mmfr1(vcpu);
 }
+
+/*
+ * Start the VM table handle at the offset defined instead of at 0.
+ * Mainly for sanity checking and debugging.
+ */
+#define HANDLE_OFFSET 0x1000
+
+static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
+{
+	return handle - HANDLE_OFFSET;
+}
+
+static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
+{
+	return idx + HANDLE_OFFSET;
+}
+
+/*
+ * Spinlock for protecting state related to the VM table. Protects writes
+ * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
+ * 'last_hyp_vcpu_lookup'.
+ */
+static DEFINE_HYP_SPINLOCK(vm_table_lock);
+
+/*
+ * The table of VM entries for protected VMs in hyp.
+ * Allocated at hyp initialization and setup.
+ */
+static struct pkvm_hyp_vm **vm_table;
+
+void pkvm_hyp_vm_table_init(void *tbl)
+{
+	WARN_ON(vm_table);
+	vm_table = tbl;
+}
+
+/*
+ * Return the hyp vm structure corresponding to the handle.
+ */
+static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
+{
+	unsigned int idx = vm_handle_to_idx(handle);
+
+	if (unlikely(idx >= KVM_MAX_PVMS))
+		return NULL;
+
+	return vm_table[idx];
+}
+
+static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
+{
+	if (host_vcpu)
+		hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
+}
+
+static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
+			     unsigned int nr_vcpus)
+{
+	int i;
+
+	for (i = 0; i < nr_vcpus; i++)
+		unpin_host_vcpu(hyp_vcpus[i]->host_vcpu);
+}
+
+static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
+			     unsigned int nr_vcpus)
+{
+	hyp_vm->host_kvm = host_kvm;
+	hyp_vm->kvm.created_vcpus = nr_vcpus;
+	hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
+}
+
+static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
+			      struct pkvm_hyp_vm *hyp_vm,
+			      struct kvm_vcpu *host_vcpu,
+			      unsigned int vcpu_idx)
+{
+	int ret = 0;
+
+	if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
+		return -EBUSY;
+
+	if (host_vcpu->vcpu_idx != vcpu_idx) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	hyp_vcpu->host_vcpu = host_vcpu;
+
+	hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
+	hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
+	hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
+
+	hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
+done:
+	if (ret)
+		unpin_host_vcpu(host_vcpu);
+	return ret;
+}
+
+static int find_free_vm_table_entry(struct kvm *host_kvm)
+{
+	int i;
+
+	for (i = 0; i < KVM_MAX_PVMS; ++i) {
+		if (!vm_table[i])
+			return i;
+	}
+
+	return -ENOMEM;
+}
+
+/*
+ * Allocate a VM table entry and insert a pointer to the new vm.
+ *
+ * Return a unique handle to the protected VM on success,
+ * negative error code on failure.
+ */
+static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
+					   struct pkvm_hyp_vm *hyp_vm)
+{
+	struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
+	int idx;
+
+	hyp_assert_lock_held(&vm_table_lock);
+
+	/*
+	 * Initializing protected state might have failed, yet a malicious
+	 * host could trigger this function. Thus, ensure that 'vm_table'
+	 * exists.
+	 */
+	if (unlikely(!vm_table))
+		return -EINVAL;
+
+	idx = find_free_vm_table_entry(host_kvm);
+	if (idx < 0)
+		return idx;
+
+	hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx);
+
+	/* VMID 0 is reserved for the host */
+	atomic64_set(&mmu->vmid.id, idx + 1);
+
+	mmu->arch = &hyp_vm->kvm.arch;
+	mmu->pgt = &hyp_vm->pgt;
+
+	vm_table[idx] = hyp_vm;
+	return hyp_vm->kvm.arch.pkvm_handle;
+}
+
+/*
+ * Deallocate and remove the VM table entry corresponding to the handle.
+ */
+static void remove_vm_table_entry(pkvm_handle_t handle)
+{
+	hyp_assert_lock_held(&vm_table_lock);
+	vm_table[vm_handle_to_idx(handle)] = NULL;
+}
+
+static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
+{
+	return size_add(sizeof(struct pkvm_hyp_vm),
+		size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
+}
+
+static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
+{
+	void *va = (void *)kern_hyp_va(host_va);
+
+	if (!PAGE_ALIGNED(va))
+		return NULL;
+
+	if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
+				   PAGE_ALIGN(size) >> PAGE_SHIFT))
+		return NULL;
+
+	return va;
+}
+
+static void *map_donated_memory(unsigned long host_va, size_t size)
+{
+	void *va = map_donated_memory_noclear(host_va, size);
+
+	if (va)
+		memset(va, 0, size);
+
+	return va;
+}
+
+static void __unmap_donated_memory(void *va, size_t size)
+{
+	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
+				       PAGE_ALIGN(size) >> PAGE_SHIFT));
+}
+
+static void unmap_donated_memory(void *va, size_t size)
+{
+	if (!va)
+		return;
+
+	memset(va, 0, size);
+	__unmap_donated_memory(va, size);
+}
+
+static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size)
+{
+	if (!va)
+		return;
+
+	__unmap_donated_memory(va, size);
+}
+
+/*
+ * Initialize the hypervisor copy of the protected VM state using the
+ * memory donated by the host.
+ *
+ * Unmaps the donated memory from the host at stage 2.
+ *
+ * host_kvm: A pointer to the host's struct kvm.
+ * vm_hva: The host va of the area being donated for the VM state.
+ *	   Must be page aligned.
+ * pgd_hva: The host va of the area being donated for the stage-2 PGD for
+ *	    the VM. Must be page aligned. Its size is implied by the VM's
+ *	    VTCR.
+ *
+ * Return a unique handle to the protected VM on success,
+ * negative error code on failure.
+ */
+int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
+		   unsigned long pgd_hva)
+{
+	struct pkvm_hyp_vm *hyp_vm = NULL;
+	size_t vm_size, pgd_size;
+	unsigned int nr_vcpus;
+	void *pgd = NULL;
+	int ret;
+
+	ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
+	if (ret)
+		return ret;
+
+	nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
+	if (nr_vcpus < 1) {
+		ret = -EINVAL;
+		goto err_unpin_kvm;
+	}
+
+	vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
+	pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+
+	ret = -ENOMEM;
+
+	hyp_vm = map_donated_memory(vm_hva, vm_size);
+	if (!hyp_vm)
+		goto err_remove_mappings;
+
+	pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
+	if (!pgd)
+		goto err_remove_mappings;
+
+	init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
+
+	hyp_spin_lock(&vm_table_lock);
+	ret = insert_vm_table_entry(host_kvm, hyp_vm);
+	if (ret < 0)
+		goto err_unlock;
+
+	ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
+	if (ret)
+		goto err_remove_vm_table_entry;
+	hyp_spin_unlock(&vm_table_lock);
+
+	return hyp_vm->kvm.arch.pkvm_handle;
+
+err_remove_vm_table_entry:
+	remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle);
+err_unlock:
+	hyp_spin_unlock(&vm_table_lock);
+err_remove_mappings:
+	unmap_donated_memory(hyp_vm, vm_size);
+	unmap_donated_memory(pgd, pgd_size);
+err_unpin_kvm:
+	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
+	return ret;
+}
+
+/*
+ * Initialize the hypervisor copy of the protected vCPU state using the
+ * memory donated by the host.
+ *
+ * handle: The handle for the protected vm.
+ * host_vcpu: A pointer to the corresponding host vcpu.
+ * vcpu_hva: The host va of the area being donated for the vcpu state.
+ *	     Must be page aligned. The size of the area must be equal to
+ *	     the page-aligned size of 'struct pkvm_hyp_vcpu'.
+ * Return 0 on success, negative error code on failure.
+ */
+int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
+		     unsigned long vcpu_hva)
+{
+	struct pkvm_hyp_vcpu *hyp_vcpu;
+	struct pkvm_hyp_vm *hyp_vm;
+	unsigned int idx;
+	int ret;
+
+	hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
+	if (!hyp_vcpu)
+		return -ENOMEM;
+
+	hyp_spin_lock(&vm_table_lock);
+
+	hyp_vm = get_vm_by_handle(handle);
+	if (!hyp_vm) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	idx = hyp_vm->nr_vcpus;
+	if (idx >= hyp_vm->kvm.created_vcpus) {
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx);
+	if (ret)
+		goto unlock;
+
+	hyp_vm->vcpus[idx] = hyp_vcpu;
+	hyp_vm->nr_vcpus++;
+unlock:
+	hyp_spin_unlock(&vm_table_lock);
+
+	if (ret)
+		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+
+	return ret;
+}
+
+int __pkvm_teardown_vm(pkvm_handle_t handle)
+{
+	struct pkvm_hyp_vm *hyp_vm;
+	struct kvm *host_kvm;
+	size_t vm_size;
+	int err;
+
+	hyp_spin_lock(&vm_table_lock);
+	hyp_vm = get_vm_by_handle(handle);
+	if (!hyp_vm) {
+		err = -ENOENT;
+		goto err_unlock;
+	}
+
+	if (WARN_ON(hyp_page_count(hyp_vm))) {
+		err = -EBUSY;
+		goto err_unlock;
+	}
+
+	/* Ensure the VMID is clean before it can be reallocated */
+	__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
+	remove_vm_table_entry(handle);
+	hyp_spin_unlock(&vm_table_lock);
+
+	/* Reclaim guest pages (including page-table pages) */
+	reclaim_guest_pages(hyp_vm);
+	unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
+
+	/* Return the metadata pages to the host */
+	host_kvm = hyp_vm->host_kvm;
+	vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
+	unmap_donated_memory(hyp_vm, vm_size);
+	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
+	return 0;
+
+err_unlock:
+	hyp_spin_unlock(&vm_table_lock);
+	return err;
+}
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0312c9c74a5a4..2be72fbe72799 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -16,6 +16,7 @@
 #include <nvhe/memory.h>
 #include <nvhe/mem_protect.h>
 #include <nvhe/mm.h>
+#include <nvhe/pkvm.h>
 #include <nvhe/trap_handler.h>
 
 unsigned long hyp_nr_cpus;
@@ -24,6 +25,7 @@ unsigned long hyp_nr_cpus;
 			 (unsigned long)__per_cpu_start)
 
 static void *vmemmap_base;
+static void *vm_table_base;
 static void *hyp_pgt_base;
 static void *host_s2_pgt_base;
 static struct kvm_pgtable_mm_ops pkvm_pgtable_mm_ops;
@@ -40,6 +42,11 @@ static int divide_memory_pool(void *virt, unsigned long size)
 	if (!vmemmap_base)
 		return -ENOMEM;
 
+	nr_pages = hyp_vm_table_pages();
+	vm_table_base = hyp_early_alloc_contig(nr_pages);
+	if (!vm_table_base)
+		return -ENOMEM;
+
 	nr_pages = hyp_s1_pgtable_pages();
 	hyp_pgt_base = hyp_early_alloc_contig(nr_pages);
 	if (!hyp_pgt_base)
@@ -314,6 +321,7 @@ void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	pkvm_hyp_vm_table_init(vm_table_base);
 out:
 	/*
 	 * We tail-called to here from handle___pkvm_init() and will not return,
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index cdf8e76b0be14..a1a27f88a312d 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -1200,6 +1200,15 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
 	return 0;
 }
 
+size_t kvm_pgtable_stage2_pgd_size(u64 vtcr)
+{
+	u32 ia_bits = VTCR_EL2_IPA(vtcr);
+	u32 sl0 = FIELD_GET(VTCR_EL2_SL0_MASK, vtcr);
+	u32 start_level = VTCR_EL2_TGRAN_SL0_BASE - sl0;
+
+	return kvm_pgd_pages(ia_bits, start_level) * PAGE_SIZE;
+}
+
 static int stage2_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
 			      enum kvm_pgtable_walk_flags flag,
 			      void * const arg)
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 34229425b25d3..71493136e59c2 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -71,6 +71,7 @@ void __init kvm_hyp_reserve(void)
 
 	hyp_mem_pages += hyp_s1_pgtable_pages();
 	hyp_mem_pages += host_s2_pgtable_pages();
+	hyp_mem_pages += hyp_vm_table_pages();
 	hyp_mem_pages += hyp_vmemmap_pages(STRUCT_HYP_PAGE_SIZE);
 
 	/*
-- 
GitLab


From 9d0c063a4d1d10ef8e6288899b8524413e40cfa0 Mon Sep 17 00:00:00 2001
From: Fuad Tabba <tabba@google.com>
Date: Thu, 10 Nov 2022 19:02:46 +0000
Subject: [PATCH 1425/4122] KVM: arm64: Instantiate pKVM hypervisor VM and vCPU
 structures from EL1

With the pKVM hypervisor at EL2 now offering hypercalls to the host for
creating and destroying VM and vCPU structures, plumb these in to the
existing arm64 KVM backend to ensure that the hypervisor data structures
are allocated and initialised on first vCPU run for a pKVM guest.

In the host, 'struct kvm_protected_vm' is introduced to hold the handle
of the pKVM VM instance as well as to track references to the memory
donated to the hypervisor so that it can be freed back to the host
allocator following VM teardown. The stage-2 page-table, hypervisor VM
and vCPU structures are allocated separately so as to avoid the need for
a large physically-contiguous allocation in the host at run-time.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-14-will@kernel.org
---
 arch/arm64/include/asm/kvm_host.h  |  14 ++-
 arch/arm64/include/asm/kvm_pkvm.h  |   4 +
 arch/arm64/kvm/arm.c               |  14 +++
 arch/arm64/kvm/hyp/hyp-constants.c |   3 +
 arch/arm64/kvm/hyp/nvhe/pkvm.c     |  15 +++-
 arch/arm64/kvm/pkvm.c              | 138 +++++++++++++++++++++++++++++
 6 files changed, 182 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index d3dd7ab9c79ea..467393e7331f1 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -117,6 +117,16 @@ struct kvm_smccc_features {
 
 typedef unsigned int pkvm_handle_t;
 
+struct kvm_protected_vm {
+	pkvm_handle_t handle;
+
+	struct {
+		void *pgd;
+		void *vm;
+		void *vcpus[KVM_MAX_VCPUS];
+	} hyp_donations;
+};
+
 struct kvm_arch {
 	struct kvm_s2_mmu mmu;
 
@@ -170,10 +180,10 @@ struct kvm_arch {
 	struct kvm_smccc_features smccc_feat;
 
 	/*
-	 * For an untrusted host VM, 'pkvm_handle' is used to lookup
+	 * For an untrusted host VM, 'pkvm.handle' is used to lookup
 	 * the associated pKVM instance in the hypervisor.
 	 */
-	pkvm_handle_t pkvm_handle;
+	struct kvm_protected_vm pkvm;
 };
 
 struct kvm_vcpu_fault_info {
diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h
index f4e3133d65500..01129b0d4c689 100644
--- a/arch/arm64/include/asm/kvm_pkvm.h
+++ b/arch/arm64/include/asm/kvm_pkvm.h
@@ -14,6 +14,10 @@
 
 #define HYP_MEMBLOCK_REGIONS 128
 
+int pkvm_init_host_vm(struct kvm *kvm);
+int pkvm_create_hyp_vm(struct kvm *kvm);
+void pkvm_destroy_hyp_vm(struct kvm *kvm);
+
 extern struct memblock_region kvm_nvhe_sym(hyp_memory)[];
 extern unsigned int kvm_nvhe_sym(hyp_memblock_nr);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 94d33e296e10c..30d6fc5d3a93d 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -37,6 +37,7 @@
 #include <asm/kvm_arm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmu.h>
+#include <asm/kvm_pkvm.h>
 #include <asm/kvm_emulate.h>
 #include <asm/sections.h>
 
@@ -150,6 +151,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	ret = pkvm_init_host_vm(kvm);
+	if (ret)
+		goto out_free_stage2_pgd;
+
 	if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
 		ret = -ENOMEM;
 		goto out_free_stage2_pgd;
@@ -187,6 +192,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 
 	kvm_vgic_destroy(kvm);
 
+	if (is_protected_kvm_enabled())
+		pkvm_destroy_hyp_vm(kvm);
+
 	kvm_destroy_vcpus(kvm);
 
 	kvm_unshare_hyp(kvm, kvm + 1);
@@ -569,6 +577,12 @@ int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu)
 	if (ret)
 		return ret;
 
+	if (is_protected_kvm_enabled()) {
+		ret = pkvm_create_hyp_vm(kvm);
+		if (ret)
+			return ret;
+	}
+
 	if (!irqchip_in_kernel(kvm)) {
 		/*
 		 * Tell the rest of the code that there are userspace irqchip
diff --git a/arch/arm64/kvm/hyp/hyp-constants.c b/arch/arm64/kvm/hyp/hyp-constants.c
index b3742a6691e87..b257a3b4bfc5c 100644
--- a/arch/arm64/kvm/hyp/hyp-constants.c
+++ b/arch/arm64/kvm/hyp/hyp-constants.c
@@ -2,9 +2,12 @@
 
 #include <linux/kbuild.h>
 #include <nvhe/memory.h>
+#include <nvhe/pkvm.h>
 
 int main(void)
 {
 	DEFINE(STRUCT_HYP_PAGE_SIZE,	sizeof(struct hyp_page));
+	DEFINE(PKVM_HYP_VM_SIZE,	sizeof(struct pkvm_hyp_vm));
+	DEFINE(PKVM_HYP_VCPU_SIZE,	sizeof(struct pkvm_hyp_vcpu));
 	return 0;
 }
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 135c9a095eca2..2c73c4640e4dd 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -324,7 +324,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
 	if (idx < 0)
 		return idx;
 
-	hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx);
+	hyp_vm->kvm.arch.pkvm.handle = idx_to_vm_handle(idx);
 
 	/* VMID 0 is reserved for the host */
 	atomic64_set(&mmu->vmid.id, idx + 1);
@@ -333,7 +333,7 @@ static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
 	mmu->pgt = &hyp_vm->pgt;
 
 	vm_table[idx] = hyp_vm;
-	return hyp_vm->kvm.arch.pkvm_handle;
+	return hyp_vm->kvm.arch.pkvm.handle;
 }
 
 /*
@@ -458,10 +458,10 @@ int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
 		goto err_remove_vm_table_entry;
 	hyp_spin_unlock(&vm_table_lock);
 
-	return hyp_vm->kvm.arch.pkvm_handle;
+	return hyp_vm->kvm.arch.pkvm.handle;
 
 err_remove_vm_table_entry:
-	remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle);
+	remove_vm_table_entry(hyp_vm->kvm.arch.pkvm.handle);
 err_unlock:
 	hyp_spin_unlock(&vm_table_lock);
 err_remove_mappings:
@@ -528,6 +528,7 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
 {
 	struct pkvm_hyp_vm *hyp_vm;
 	struct kvm *host_kvm;
+	unsigned int idx;
 	size_t vm_size;
 	int err;
 
@@ -553,6 +554,12 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
 	unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
 
 	/* Return the metadata pages to the host */
+	for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
+		struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
+
+		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+	}
+
 	host_kvm = hyp_vm->host_kvm;
 	vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
 	unmap_donated_memory(hyp_vm, vm_size);
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 71493136e59c2..8c443b915e439 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -6,6 +6,7 @@
 
 #include <linux/kvm_host.h>
 #include <linux/memblock.h>
+#include <linux/mutex.h>
 #include <linux/sort.h>
 
 #include <asm/kvm_pkvm.h>
@@ -94,3 +95,140 @@ void __init kvm_hyp_reserve(void)
 	kvm_info("Reserved %lld MiB at 0x%llx\n", hyp_mem_size >> 20,
 		 hyp_mem_base);
 }
+
+/*
+ * Allocates and donates memory for hypervisor VM structs at EL2.
+ *
+ * Allocates space for the VM state, which includes the hyp vm as well as
+ * the hyp vcpus.
+ *
+ * Stores an opaque handler in the kvm struct for future reference.
+ *
+ * Return 0 on success, negative error code on failure.
+ */
+static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
+{
+	size_t pgd_sz, hyp_vm_sz, hyp_vcpu_sz;
+	struct kvm_vcpu *host_vcpu;
+	pkvm_handle_t handle;
+	void *pgd, *hyp_vm;
+	unsigned long idx;
+	int ret;
+
+	if (host_kvm->created_vcpus < 1)
+		return -EINVAL;
+
+	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
+
+	/*
+	 * The PGD pages will be reclaimed using a hyp_memcache which implies
+	 * page granularity. So, use alloc_pages_exact() to get individual
+	 * refcounts.
+	 */
+	pgd = alloc_pages_exact(pgd_sz, GFP_KERNEL_ACCOUNT);
+	if (!pgd)
+		return -ENOMEM;
+
+	/* Allocate memory to donate to hyp for vm and vcpu pointers. */
+	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
+					size_mul(sizeof(void *),
+						 host_kvm->created_vcpus)));
+	hyp_vm = alloc_pages_exact(hyp_vm_sz, GFP_KERNEL_ACCOUNT);
+	if (!hyp_vm) {
+		ret = -ENOMEM;
+		goto free_pgd;
+	}
+
+	/* Donate the VM memory to hyp and let hyp initialize it. */
+	ret = kvm_call_hyp_nvhe(__pkvm_init_vm, host_kvm, hyp_vm, pgd);
+	if (ret < 0)
+		goto free_vm;
+
+	handle = ret;
+
+	host_kvm->arch.pkvm.handle = handle;
+	host_kvm->arch.pkvm.hyp_donations.pgd = pgd;
+	host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm;
+
+	/* Donate memory for the vcpus at hyp and initialize it. */
+	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
+	kvm_for_each_vcpu(idx, host_vcpu, host_kvm) {
+		void *hyp_vcpu;
+
+		/* Indexing of the vcpus to be sequential starting at 0. */
+		if (WARN_ON(host_vcpu->vcpu_idx != idx)) {
+			ret = -EINVAL;
+			goto destroy_vm;
+		}
+
+		hyp_vcpu = alloc_pages_exact(hyp_vcpu_sz, GFP_KERNEL_ACCOUNT);
+		if (!hyp_vcpu) {
+			ret = -ENOMEM;
+			goto destroy_vm;
+		}
+
+		host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu;
+
+		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
+					hyp_vcpu);
+		if (ret)
+			goto destroy_vm;
+	}
+
+	return 0;
+
+destroy_vm:
+	pkvm_destroy_hyp_vm(host_kvm);
+	return ret;
+free_vm:
+	free_pages_exact(hyp_vm, hyp_vm_sz);
+free_pgd:
+	free_pages_exact(pgd, pgd_sz);
+	return ret;
+}
+
+int pkvm_create_hyp_vm(struct kvm *host_kvm)
+{
+	int ret = 0;
+
+	mutex_lock(&host_kvm->lock);
+	if (!host_kvm->arch.pkvm.handle)
+		ret = __pkvm_create_hyp_vm(host_kvm);
+	mutex_unlock(&host_kvm->lock);
+
+	return ret;
+}
+
+void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
+{
+	unsigned long idx, nr_vcpus = host_kvm->created_vcpus;
+	size_t pgd_sz, hyp_vm_sz;
+
+	if (host_kvm->arch.pkvm.handle)
+		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
+					  host_kvm->arch.pkvm.handle));
+
+	host_kvm->arch.pkvm.handle = 0;
+
+	for (idx = 0; idx < nr_vcpus; ++idx) {
+		void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx];
+
+		if (!hyp_vcpu)
+			break;
+
+		free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE));
+	}
+
+	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
+					size_mul(sizeof(void *), nr_vcpus)));
+	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
+
+	free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz);
+	free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz);
+}
+
+int pkvm_init_host_vm(struct kvm *host_kvm)
+{
+	mutex_init(&host_kvm->lock);
+	return 0;
+}
-- 
GitLab


From aa6948f82f0b7060fbbac21911dc7996b144ba3c Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:47 +0000
Subject: [PATCH 1426/4122] KVM: arm64: Add per-cpu fixmap infrastructure at
 EL2

Mapping pages in a guest page-table from within the pKVM hypervisor at
EL2 may require cache maintenance to ensure that the initialised page
contents is visible even to non-cacheable (e.g. MMU-off) accesses from
the guest.

In preparation for performing this maintenance at EL2, introduce a
per-vCPU fixmap which allows the pKVM hypervisor to map guest pages
temporarily into its stage-1 page-table for the purposes of cache
maintenance and, in future, poisoning on the reclaim path. The use of a
fixmap avoids the need for memory allocation or locking on the map()
path.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Co-developed-by: Will Deacon <will@kernel.org>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-15-will@kernel.org
---
 arch/arm64/include/asm/kvm_pgtable.h          |  14 +++
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |   2 +
 arch/arm64/kvm/hyp/include/nvhe/mm.h          |   4 +
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         |   1 -
 arch/arm64/kvm/hyp/nvhe/mm.c                  | 104 ++++++++++++++++++
 arch/arm64/kvm/hyp/nvhe/setup.c               |   4 +
 arch/arm64/kvm/hyp/pgtable.c                  |  12 --
 7 files changed, 128 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 15c389db19318..34cb93f3c96de 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -42,6 +42,8 @@ typedef u64 kvm_pte_t;
 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
 #define KVM_PTE_ADDR_51_48		GENMASK(15, 12)
 
+#define KVM_PHYS_INVALID		(-1ULL)
+
 static inline bool kvm_pte_valid(kvm_pte_t pte)
 {
 	return pte & KVM_PTE_VALID;
@@ -57,6 +59,18 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
 	return pa;
 }
 
+static inline kvm_pte_t kvm_phys_to_pte(u64 pa)
+{
+	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
+
+	if (PAGE_SHIFT == 16) {
+		pa &= GENMASK(51, 48);
+		pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
+	}
+
+	return pte;
+}
+
 static inline u64 kvm_granule_shift(u32 level)
 {
 	/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index ce9a796a85eee..ef31a1872c93b 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -59,6 +59,8 @@ enum pkvm_component_id {
 	PKVM_ID_HYP,
 };
 
+extern unsigned long hyp_nr_cpus;
+
 int __pkvm_prot_finalize(void);
 int __pkvm_host_share_hyp(u64 pfn);
 int __pkvm_host_unshare_hyp(u64 pfn);
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mm.h b/arch/arm64/kvm/hyp/include/nvhe/mm.h
index b2ee6d5df55b1..d5ec972b5c1ec 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mm.h
@@ -13,6 +13,10 @@
 extern struct kvm_pgtable pkvm_pgtable;
 extern hyp_spinlock_t pkvm_pgd_lock;
 
+int hyp_create_pcpu_fixmap(void);
+void *hyp_fixmap_map(phys_addr_t phys);
+void hyp_fixmap_unmap(void);
+
 int hyp_create_idmap(u32 hyp_va_bits);
 int hyp_map_vectors(void);
 int hyp_back_vmemmap(phys_addr_t back);
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 459957b3082ea..8b4d3f0aa7a07 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -21,7 +21,6 @@
 
 #define KVM_HOST_S2_FLAGS (KVM_PGTABLE_S2_NOFWB | KVM_PGTABLE_S2_IDMAP)
 
-extern unsigned long hyp_nr_cpus;
 struct host_mmu host_mmu;
 
 static struct hyp_pool host_s2_pool;
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index d3a3b47181de0..5648ac21e62d0 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -14,6 +14,7 @@
 #include <nvhe/early_alloc.h>
 #include <nvhe/gfp.h>
 #include <nvhe/memory.h>
+#include <nvhe/mem_protect.h>
 #include <nvhe/mm.h>
 #include <nvhe/spinlock.h>
 
@@ -25,6 +26,12 @@ unsigned int hyp_memblock_nr;
 
 static u64 __io_map_base;
 
+struct hyp_fixmap_slot {
+	u64 addr;
+	kvm_pte_t *ptep;
+};
+static DEFINE_PER_CPU(struct hyp_fixmap_slot, fixmap_slots);
+
 static int __pkvm_create_mappings(unsigned long start, unsigned long size,
 				  unsigned long phys, enum kvm_pgtable_prot prot)
 {
@@ -212,6 +219,103 @@ int hyp_map_vectors(void)
 	return 0;
 }
 
+void *hyp_fixmap_map(phys_addr_t phys)
+{
+	struct hyp_fixmap_slot *slot = this_cpu_ptr(&fixmap_slots);
+	kvm_pte_t pte, *ptep = slot->ptep;
+
+	pte = *ptep;
+	pte &= ~kvm_phys_to_pte(KVM_PHYS_INVALID);
+	pte |= kvm_phys_to_pte(phys) | KVM_PTE_VALID;
+	WRITE_ONCE(*ptep, pte);
+	dsb(ishst);
+
+	return (void *)slot->addr;
+}
+
+static void fixmap_clear_slot(struct hyp_fixmap_slot *slot)
+{
+	kvm_pte_t *ptep = slot->ptep;
+	u64 addr = slot->addr;
+
+	WRITE_ONCE(*ptep, *ptep & ~KVM_PTE_VALID);
+
+	/*
+	 * Irritatingly, the architecture requires that we use inner-shareable
+	 * broadcast TLB invalidation here in case another CPU speculates
+	 * through our fixmap and decides to create an "amalagamation of the
+	 * values held in the TLB" due to the apparent lack of a
+	 * break-before-make sequence.
+	 *
+	 * https://lore.kernel.org/kvm/20221017115209.2099-1-will@kernel.org/T/#mf10dfbaf1eaef9274c581b81c53758918c1d0f03
+	 */
+	dsb(ishst);
+	__tlbi_level(vale2is, __TLBI_VADDR(addr, 0), (KVM_PGTABLE_MAX_LEVELS - 1));
+	dsb(ish);
+	isb();
+}
+
+void hyp_fixmap_unmap(void)
+{
+	fixmap_clear_slot(this_cpu_ptr(&fixmap_slots));
+}
+
+static int __create_fixmap_slot_cb(u64 addr, u64 end, u32 level, kvm_pte_t *ptep,
+				   enum kvm_pgtable_walk_flags flag,
+				   void * const arg)
+{
+	struct hyp_fixmap_slot *slot = per_cpu_ptr(&fixmap_slots, (u64)arg);
+
+	if (!kvm_pte_valid(*ptep) || level != KVM_PGTABLE_MAX_LEVELS - 1)
+		return -EINVAL;
+
+	slot->addr = addr;
+	slot->ptep = ptep;
+
+	/*
+	 * Clear the PTE, but keep the page-table page refcount elevated to
+	 * prevent it from ever being freed. This lets us manipulate the PTEs
+	 * by hand safely without ever needing to allocate memory.
+	 */
+	fixmap_clear_slot(slot);
+
+	return 0;
+}
+
+static int create_fixmap_slot(u64 addr, u64 cpu)
+{
+	struct kvm_pgtable_walker walker = {
+		.cb	= __create_fixmap_slot_cb,
+		.flags	= KVM_PGTABLE_WALK_LEAF,
+		.arg = (void *)cpu,
+	};
+
+	return kvm_pgtable_walk(&pkvm_pgtable, addr, PAGE_SIZE, &walker);
+}
+
+int hyp_create_pcpu_fixmap(void)
+{
+	unsigned long addr, i;
+	int ret;
+
+	for (i = 0; i < hyp_nr_cpus; i++) {
+		ret = pkvm_alloc_private_va_range(PAGE_SIZE, &addr);
+		if (ret)
+			return ret;
+
+		ret = kvm_pgtable_hyp_map(&pkvm_pgtable, addr, PAGE_SIZE,
+					  __hyp_pa(__hyp_bss_start), PAGE_HYP);
+		if (ret)
+			return ret;
+
+		ret = create_fixmap_slot(addr, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
 int hyp_create_idmap(u32 hyp_va_bits)
 {
 	unsigned long start, end;
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 2be72fbe72799..0f69c1393416d 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -321,6 +321,10 @@ void __noreturn __pkvm_init_finalise(void)
 	if (ret)
 		goto out;
 
+	ret = hyp_create_pcpu_fixmap();
+	if (ret)
+		goto out;
+
 	pkvm_hyp_vm_table_init(vm_table_base);
 out:
 	/*
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index a1a27f88a312d..2bcb2d5903bac 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -57,8 +57,6 @@ struct kvm_pgtable_walk_data {
 	u64				end;
 };
 
-#define KVM_PHYS_INVALID (-1ULL)
-
 static bool kvm_phys_is_valid(u64 phys)
 {
 	return phys < BIT(id_aa64mmfr0_parange_to_phys_shift(ID_AA64MMFR0_EL1_PARANGE_MAX));
@@ -122,16 +120,6 @@ static bool kvm_pte_table(kvm_pte_t pte, u32 level)
 	return FIELD_GET(KVM_PTE_TYPE, pte) == KVM_PTE_TYPE_TABLE;
 }
 
-static kvm_pte_t kvm_phys_to_pte(u64 pa)
-{
-	kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK;
-
-	if (PAGE_SHIFT == 16)
-		pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48);
-
-	return pte;
-}
-
 static kvm_pte_t *kvm_pte_follow(kvm_pte_t pte, struct kvm_pgtable_mm_ops *mm_ops)
 {
 	return mm_ops->phys_to_virt(kvm_pte_to_phys(pte));
-- 
GitLab


From 6c165223e9a6384aa1e934b90f2650e71adb972a Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:48 +0000
Subject: [PATCH 1427/4122] KVM: arm64: Initialise hypervisor copies of host
 symbols unconditionally
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The nVHE object at EL2 maintains its own copies of some host variables
so that, when pKVM is enabled, the host cannot directly modify the
hypervisor state. When running in normal nVHE mode, however, these
variables are still mirrored at EL2 but are not initialised.

Initialise the hypervisor symbols from the host copies regardless of
pKVM, ensuring that any reference to this data at EL2 with normal nVHE
will return a sensibly initialised value.

Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-16-will@kernel.org
---
 arch/arm64/kvm/arm.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 30d6fc5d3a93d..584626e11797f 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1884,11 +1884,8 @@ static int do_pkvm_init(u32 hyp_va_bits)
 	return ret;
 }
 
-static int kvm_hyp_init_protection(u32 hyp_va_bits)
+static void kvm_hyp_init_symbols(void)
 {
-	void *addr = phys_to_virt(hyp_mem_base);
-	int ret;
-
 	kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
 	kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1);
 	kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1);
@@ -1897,6 +1894,12 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits)
 	kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
+}
+
+static int kvm_hyp_init_protection(u32 hyp_va_bits)
+{
+	void *addr = phys_to_virt(hyp_mem_base);
+	int ret;
 
 	ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP);
 	if (ret)
@@ -2071,6 +2074,8 @@ static int init_hyp_mode(void)
 		cpu_prepare_hyp_mode(cpu);
 	}
 
+	kvm_hyp_init_symbols();
+
 	if (is_protected_kvm_enabled()) {
 		init_cpu_logical_map();
 
@@ -2078,9 +2083,7 @@ static int init_hyp_mode(void)
 			err = -ENODEV;
 			goto out_err;
 		}
-	}
 
-	if (is_protected_kvm_enabled()) {
 		err = kvm_hyp_init_protection(hyp_va_bits);
 		if (err) {
 			kvm_err("Failed to init hyp memory protection\n");
-- 
GitLab


From 13e248aab73d2f1c27b458ef09d38b44f3e5bf2e Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:49 +0000
Subject: [PATCH 1428/4122] KVM: arm64: Provide I-cache invalidation by virtual
 address at EL2

In preparation for handling cache maintenance of guest pages from within
the pKVM hypervisor at EL2, introduce an EL2 copy of icache_inval_pou()
which will later be plumbed into the stage-2 page-table cache
maintenance callbacks, ensuring that the initial contents of pages
mapped as executable into the guest stage-2 page-table is visible to the
instruction fetcher.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-17-will@kernel.org
---
 arch/arm64/include/asm/kvm_hyp.h |  1 +
 arch/arm64/kernel/image-vars.h   |  3 ---
 arch/arm64/kvm/arm.c             |  1 +
 arch/arm64/kvm/hyp/nvhe/cache.S  | 11 +++++++++++
 arch/arm64/kvm/hyp/nvhe/pkvm.c   |  3 +++
 5 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index aa7fa2a08f060..fd99cf09972de 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -123,4 +123,5 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
 
+extern unsigned long kvm_nvhe_sym(__icache_flags);
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 8151412653de2..7f4e43bfaade7 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
 /* Vectors installed by hyp-init on reset HVC. */
 KVM_NVHE_ALIAS(__hyp_stub_vectors);
 
-/* Kernel symbol used by icache_is_vpipt(). */
-KVM_NVHE_ALIAS(__icache_flags);
-
 /* VMID bits set by the KVM VMID allocator */
 KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 584626e11797f..d99e93e6ddf71 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1894,6 +1894,7 @@ static void kvm_hyp_init_symbols(void)
 	kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
+	kvm_nvhe_sym(__icache_flags) = __icache_flags;
 }
 
 static int kvm_hyp_init_protection(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S
index 0c367eb5f4e28..85936c17ae407 100644
--- a/arch/arm64/kvm/hyp/nvhe/cache.S
+++ b/arch/arm64/kvm/hyp/nvhe/cache.S
@@ -12,3 +12,14 @@ SYM_FUNC_START(__pi_dcache_clean_inval_poc)
 	ret
 SYM_FUNC_END(__pi_dcache_clean_inval_poc)
 SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc)
+
+SYM_FUNC_START(__pi_icache_inval_pou)
+alternative_if ARM64_HAS_CACHE_DIC
+	isb
+	ret
+alternative_else_nop_endif
+
+	invalidate_icache_by_line x0, x1, x2, x3
+	ret
+SYM_FUNC_END(__pi_icache_inval_pou)
+SYM_FUNC_ALIAS(icache_inval_pou, __pi_icache_inval_pou)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 2c73c4640e4dd..0768307566d42 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -12,6 +12,9 @@
 #include <nvhe/pkvm.h>
 #include <nvhe/trap_handler.h>
 
+/* Used by icache_is_vpipt(). */
+unsigned long __icache_flags;
+
 /*
  * Set trap register values based on features in ID_AA64PFR0.
  */
-- 
GitLab


From 717a7eebac106a5cc5d5493f8eef9cf4ae6edf19 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:50 +0000
Subject: [PATCH 1429/4122] KVM: arm64: Add generic hyp_memcache helpers

The host at EL1 and the pKVM hypervisor at EL2 will soon need to
exchange memory pages dynamically for creating and destroying VM state.

Indeed, the hypervisor will rely on the host to donate memory pages it
can use to create guest stage-2 page-tables and to store VM and vCPU
metadata. In order to ease this process, introduce a
'struct hyp_memcache' which is essentially a linked list of available
pages, indexed by physical addresses so that it can be passed
meaningfully between the different virtual address spaces configured at
EL1 and EL2.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-18-will@kernel.org
---
 arch/arm64/include/asm/kvm_host.h             | 57 +++++++++++++++++++
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +
 arch/arm64/kvm/hyp/nvhe/mm.c                  | 33 +++++++++++
 arch/arm64/kvm/mmu.c                          | 26 +++++++++
 4 files changed, 118 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 467393e7331f1..835987e0f868d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -73,6 +73,63 @@ u32 __attribute_const__ kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
 
+struct kvm_hyp_memcache {
+	phys_addr_t head;
+	unsigned long nr_pages;
+};
+
+static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
+				     phys_addr_t *p,
+				     phys_addr_t (*to_pa)(void *virt))
+{
+	*p = mc->head;
+	mc->head = to_pa(p);
+	mc->nr_pages++;
+}
+
+static inline void *pop_hyp_memcache(struct kvm_hyp_memcache *mc,
+				     void *(*to_va)(phys_addr_t phys))
+{
+	phys_addr_t *p = to_va(mc->head);
+
+	if (!mc->nr_pages)
+		return NULL;
+
+	mc->head = *p;
+	mc->nr_pages--;
+
+	return p;
+}
+
+static inline int __topup_hyp_memcache(struct kvm_hyp_memcache *mc,
+				       unsigned long min_pages,
+				       void *(*alloc_fn)(void *arg),
+				       phys_addr_t (*to_pa)(void *virt),
+				       void *arg)
+{
+	while (mc->nr_pages < min_pages) {
+		phys_addr_t *p = alloc_fn(arg);
+
+		if (!p)
+			return -ENOMEM;
+		push_hyp_memcache(mc, p, to_pa);
+	}
+
+	return 0;
+}
+
+static inline void __free_hyp_memcache(struct kvm_hyp_memcache *mc,
+				       void (*free_fn)(void *virt, void *arg),
+				       void *(*to_va)(phys_addr_t phys),
+				       void *arg)
+{
+	while (mc->nr_pages)
+		free_fn(pop_hyp_memcache(mc, to_va), arg);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc);
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages);
+
 struct kvm_vmid {
 	atomic64_t id;
 };
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index ef31a1872c93b..420b87e755a42 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -77,6 +77,8 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 int hyp_pin_shared_mem(void *from, void *to);
 void hyp_unpin_shared_mem(void *from, void *to);
 void reclaim_guest_pages(struct pkvm_hyp_vm *vm);
+int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
+		    struct kvm_hyp_memcache *host_mc);
 
 static __always_inline void __load_host_stage2(void)
 {
diff --git a/arch/arm64/kvm/hyp/nvhe/mm.c b/arch/arm64/kvm/hyp/nvhe/mm.c
index 5648ac21e62d0..c80b2c007619a 100644
--- a/arch/arm64/kvm/hyp/nvhe/mm.c
+++ b/arch/arm64/kvm/hyp/nvhe/mm.c
@@ -340,3 +340,36 @@ int hyp_create_idmap(u32 hyp_va_bits)
 
 	return __pkvm_create_mappings(start, end - start, start, PAGE_HYP_EXEC);
 }
+
+static void *admit_host_page(void *arg)
+{
+	struct kvm_hyp_memcache *host_mc = arg;
+
+	if (!host_mc->nr_pages)
+		return NULL;
+
+	/*
+	 * The host still owns the pages in its memcache, so we need to go
+	 * through a full host-to-hyp donation cycle to change it. Fortunately,
+	 * __pkvm_host_donate_hyp() takes care of races for us, so if it
+	 * succeeds we're good to go.
+	 */
+	if (__pkvm_host_donate_hyp(hyp_phys_to_pfn(host_mc->head), 1))
+		return NULL;
+
+	return pop_hyp_memcache(host_mc, hyp_phys_to_virt);
+}
+
+/* Refill our local memcache by poping pages from the one provided by the host. */
+int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
+		    struct kvm_hyp_memcache *host_mc)
+{
+	struct kvm_hyp_memcache tmp = *host_mc;
+	int ret;
+
+	ret =  __topup_hyp_memcache(mc, min_pages, admit_host_page,
+				    hyp_virt_to_phys, &tmp);
+	*host_mc = tmp;
+
+	return ret;
+}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 60ee3d9f01f8c..18061163c607b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -807,6 +807,32 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
 	}
 }
 
+static void hyp_mc_free_fn(void *addr, void *unused)
+{
+	free_page((unsigned long)addr);
+}
+
+static void *hyp_mc_alloc_fn(void *unused)
+{
+	return (void *)__get_free_page(GFP_KERNEL_ACCOUNT);
+}
+
+void free_hyp_memcache(struct kvm_hyp_memcache *mc)
+{
+	if (is_protected_kvm_enabled())
+		__free_hyp_memcache(mc, hyp_mc_free_fn,
+				    kvm_host_va, NULL);
+}
+
+int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
+{
+	if (!is_protected_kvm_enabled())
+		return 0;
+
+	return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
+				    kvm_host_pa, NULL);
+}
+
 /**
  * kvm_phys_addr_ioremap - map a device range to guest IPA
  *
-- 
GitLab


From 315775ff7c6de497dd07c3f6eff499fb538783eb Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:51 +0000
Subject: [PATCH 1430/4122] KVM: arm64: Consolidate stage-2 initialisation into
 a single function

The initialisation of guest stage-2 page-tables is currently split
across two functions: kvm_init_stage2_mmu() and kvm_arm_setup_stage2().
That is presumably for historical reasons as kvm_arm_setup_stage2()
originates from the (now defunct) KVM port for 32-bit Arm.

Simplify this code path by merging both functions into one, taking care
to map the 'struct kvm' into the hypervisor stage-1 early on in order to
simplify the failure path.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-19-will@kernel.org
---
 arch/arm64/include/asm/kvm_arm.h  |  2 +-
 arch/arm64/include/asm/kvm_host.h |  2 --
 arch/arm64/include/asm/kvm_mmu.h  |  2 +-
 arch/arm64/kvm/arm.c              | 27 +++++++++++++--------------
 arch/arm64/kvm/mmu.c              | 27 ++++++++++++++++++++++++++-
 arch/arm64/kvm/reset.c            | 29 -----------------------------
 6 files changed, 41 insertions(+), 48 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 8aa8492dafc0f..89e63585dae4c 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -135,7 +135,7 @@
  * 40 bits wide (T0SZ = 24).  Systems with a PARange smaller than 40 bits are
  * not known to exist and will break with this configuration.
  *
- * The VTCR_EL2 is configured per VM and is initialised in kvm_arm_setup_stage2().
+ * The VTCR_EL2 is configured per VM and is initialised in kvm_init_stage2_mmu.
  *
  * Note that when using 4K pages, we concatenate two first level page tables
  * together. With 16K pages, we concatenate 16 first level page tables.
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 835987e0f868d..57218f0c449e0 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -990,8 +990,6 @@ int kvm_set_ipa_limit(void);
 #define __KVM_HAVE_ARCH_VM_ALLOC
 struct kvm *kvm_arch_alloc_vm(void);
 
-int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type);
-
 static inline bool kvm_vm_is_protected(struct kvm *kvm)
 {
 	return false;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 7784081088e78..e4a7e63694998 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -166,7 +166,7 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
 void free_hyp_pgds(void);
 
 void stage2_unmap_vm(struct kvm *kvm);
-int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu);
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
 void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu);
 int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
 			  phys_addr_t pa, unsigned long size, bool writable);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index d99e93e6ddf71..f78eefa02f6bd 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -139,28 +139,24 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	int ret;
 
-	ret = kvm_arm_setup_stage2(kvm, type);
-	if (ret)
-		return ret;
-
-	ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu);
-	if (ret)
-		return ret;
-
 	ret = kvm_share_hyp(kvm, kvm + 1);
 	if (ret)
-		goto out_free_stage2_pgd;
+		return ret;
 
 	ret = pkvm_init_host_vm(kvm);
 	if (ret)
-		goto out_free_stage2_pgd;
+		goto err_unshare_kvm;
 
 	if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
 		ret = -ENOMEM;
-		goto out_free_stage2_pgd;
+		goto err_unshare_kvm;
 	}
 	cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
 
+	ret = kvm_init_stage2_mmu(kvm, &kvm->arch.mmu, type);
+	if (ret)
+		goto err_free_cpumask;
+
 	kvm_vgic_early_init(kvm);
 
 	/* The maximum number of VCPUs is limited by the host's GIC model */
@@ -169,9 +165,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_default_spectre(kvm);
 	kvm_arm_init_hypercalls(kvm);
 
-	return ret;
-out_free_stage2_pgd:
-	kvm_free_stage2_pgd(&kvm->arch.mmu);
+	return 0;
+
+err_free_cpumask:
+	free_cpumask_var(kvm->arch.supported_cpus);
+err_unshare_kvm:
+	kvm_unshare_hyp(kvm, kvm + 1);
 	return ret;
 }
 
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 18061163c607b..3e56c6393caeb 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -675,15 +675,40 @@ static struct kvm_pgtable_mm_ops kvm_s2_mm_ops = {
  * kvm_init_stage2_mmu - Initialise a S2 MMU structure
  * @kvm:	The pointer to the KVM structure
  * @mmu:	The pointer to the s2 MMU structure
+ * @type:	The machine type of the virtual machine
  *
  * Allocates only the stage-2 HW PGD level table(s).
  * Note we don't need locking here as this is only called when the VM is
  * created, which can only be done once.
  */
-int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
+int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type)
 {
+	u32 kvm_ipa_limit = get_kvm_ipa_limit();
 	int cpu, err;
 	struct kvm_pgtable *pgt;
+	u64 mmfr0, mmfr1;
+	u32 phys_shift;
+
+	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
+		return -EINVAL;
+
+	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
+	if (phys_shift) {
+		if (phys_shift > kvm_ipa_limit ||
+		    phys_shift < ARM64_MIN_PARANGE_BITS)
+			return -EINVAL;
+	} else {
+		phys_shift = KVM_PHYS_SHIFT;
+		if (phys_shift > kvm_ipa_limit) {
+			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
+				     current->comm);
+			return -EINVAL;
+		}
+	}
+
+	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
+	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
 
 	if (mmu->pgt != NULL) {
 		kvm_err("kvm_arch already initialized?\n");
diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c
index 5ae18472205a9..e0267f672b8ab 100644
--- a/arch/arm64/kvm/reset.c
+++ b/arch/arm64/kvm/reset.c
@@ -395,32 +395,3 @@ int kvm_set_ipa_limit(void)
 
 	return 0;
 }
-
-int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
-{
-	u64 mmfr0, mmfr1;
-	u32 phys_shift;
-
-	if (type & ~KVM_VM_TYPE_ARM_IPA_SIZE_MASK)
-		return -EINVAL;
-
-	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
-	if (phys_shift) {
-		if (phys_shift > kvm_ipa_limit ||
-		    phys_shift < ARM64_MIN_PARANGE_BITS)
-			return -EINVAL;
-	} else {
-		phys_shift = KVM_PHYS_SHIFT;
-		if (phys_shift > kvm_ipa_limit) {
-			pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
-				     current->comm);
-			return -EINVAL;
-		}
-	}
-
-	mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
-	mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
-	kvm->arch.vtcr = kvm_get_vtcr(mmfr0, mmfr1, phys_shift);
-
-	return 0;
-}
-- 
GitLab


From 60dfe093ec13b056856c672e1daa35134be38283 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:52 +0000
Subject: [PATCH 1431/4122] KVM: arm64: Instantiate guest stage-2 page-tables
 at EL2

Extend the initialisation of guest data structures within the pKVM
hypervisor at EL2 so that we instantiate a memory pool and a full
'struct kvm_s2_mmu' structure for each VM, with a stage-2 page-table
entirely independent from the one managed by the host at EL1.

The 'struct kvm_pgtable_mm_ops' used by the page-table code is populated
with a set of callbacks that can manage guest pages in the hypervisor
without any direct intervention from the host, allocating page-table
pages from the provided pool and returning these to the host on VM
teardown. To keep things simple, the stage-2 MMU for the guest is
configured identically to the host stage-2 in the VTCR register and so
the IPA size of the guest must match the PA size of the host.

For now, the new page-table is unused as there is no way for the host
to map anything into it. Yet.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-20-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h |   6 ++
 arch/arm64/kvm/hyp/nvhe/mem_protect.c  | 125 ++++++++++++++++++++++++-
 arch/arm64/kvm/mmu.c                   |   4 +-
 3 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index 8c653a3b9501d..d14dfbcb7da16 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -9,6 +9,9 @@
 
 #include <asm/kvm_pkvm.h>
 
+#include <nvhe/gfp.h>
+#include <nvhe/spinlock.h>
+
 /*
  * Holds the relevant data for maintaining the vcpu state completely at hyp.
  */
@@ -30,6 +33,9 @@ struct pkvm_hyp_vm {
 
 	/* The guest's stage-2 page-table managed by the hypervisor. */
 	struct kvm_pgtable pgt;
+	struct kvm_pgtable_mm_ops mm_ops;
+	struct hyp_pool pool;
+	hyp_spinlock_t lock;
 
 	/*
 	 * The number of vcpus initialized and ready to run.
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 8b4d3f0aa7a07..0162afba6dc48 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -25,6 +25,21 @@ struct host_mmu host_mmu;
 
 static struct hyp_pool host_s2_pool;
 
+static DEFINE_PER_CPU(struct pkvm_hyp_vm *, __current_vm);
+#define current_vm (*this_cpu_ptr(&__current_vm))
+
+static void guest_lock_component(struct pkvm_hyp_vm *vm)
+{
+	hyp_spin_lock(&vm->lock);
+	current_vm = vm;
+}
+
+static void guest_unlock_component(struct pkvm_hyp_vm *vm)
+{
+	current_vm = NULL;
+	hyp_spin_unlock(&vm->lock);
+}
+
 static void host_lock_component(void)
 {
 	hyp_spin_lock(&host_mmu.lock);
@@ -140,18 +155,124 @@ int kvm_host_prepare_stage2(void *pgt_pool_base)
 	return 0;
 }
 
+static bool guest_stage2_force_pte_cb(u64 addr, u64 end,
+				      enum kvm_pgtable_prot prot)
+{
+	return true;
+}
+
+static void *guest_s2_zalloc_pages_exact(size_t size)
+{
+	void *addr = hyp_alloc_pages(&current_vm->pool, get_order(size));
+
+	WARN_ON(size != (PAGE_SIZE << get_order(size)));
+	hyp_split_page(hyp_virt_to_page(addr));
+
+	return addr;
+}
+
+static void guest_s2_free_pages_exact(void *addr, unsigned long size)
+{
+	u8 order = get_order(size);
+	unsigned int i;
+
+	for (i = 0; i < (1 << order); i++)
+		hyp_put_page(&current_vm->pool, addr + (i * PAGE_SIZE));
+}
+
+static void *guest_s2_zalloc_page(void *mc)
+{
+	struct hyp_page *p;
+	void *addr;
+
+	addr = hyp_alloc_pages(&current_vm->pool, 0);
+	if (addr)
+		return addr;
+
+	addr = pop_hyp_memcache(mc, hyp_phys_to_virt);
+	if (!addr)
+		return addr;
+
+	memset(addr, 0, PAGE_SIZE);
+	p = hyp_virt_to_page(addr);
+	memset(p, 0, sizeof(*p));
+	p->refcount = 1;
+
+	return addr;
+}
+
+static void guest_s2_get_page(void *addr)
+{
+	hyp_get_page(&current_vm->pool, addr);
+}
+
+static void guest_s2_put_page(void *addr)
+{
+	hyp_put_page(&current_vm->pool, addr);
+}
+
+static void clean_dcache_guest_page(void *va, size_t size)
+{
+	__clean_dcache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
+	hyp_fixmap_unmap();
+}
+
+static void invalidate_icache_guest_page(void *va, size_t size)
+{
+	__invalidate_icache_guest_page(hyp_fixmap_map(__hyp_pa(va)), size);
+	hyp_fixmap_unmap();
+}
+
 int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
 {
-	vm->pgt.pgd = pgd;
+	struct kvm_s2_mmu *mmu = &vm->kvm.arch.mmu;
+	unsigned long nr_pages;
+	int ret;
+
+	nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+	ret = hyp_pool_init(&vm->pool, hyp_virt_to_pfn(pgd), nr_pages, 0);
+	if (ret)
+		return ret;
+
+	hyp_spin_lock_init(&vm->lock);
+	vm->mm_ops = (struct kvm_pgtable_mm_ops) {
+		.zalloc_pages_exact	= guest_s2_zalloc_pages_exact,
+		.free_pages_exact	= guest_s2_free_pages_exact,
+		.zalloc_page		= guest_s2_zalloc_page,
+		.phys_to_virt		= hyp_phys_to_virt,
+		.virt_to_phys		= hyp_virt_to_phys,
+		.page_count		= hyp_page_count,
+		.get_page		= guest_s2_get_page,
+		.put_page		= guest_s2_put_page,
+		.dcache_clean_inval_poc	= clean_dcache_guest_page,
+		.icache_inval_pou	= invalidate_icache_guest_page,
+	};
+
+	guest_lock_component(vm);
+	ret = __kvm_pgtable_stage2_init(mmu->pgt, mmu, &vm->mm_ops, 0,
+					guest_stage2_force_pte_cb);
+	guest_unlock_component(vm);
+	if (ret)
+		return ret;
+
+	vm->kvm.arch.mmu.pgd_phys = __hyp_pa(vm->pgt.pgd);
+
 	return 0;
 }
 
 void reclaim_guest_pages(struct pkvm_hyp_vm *vm)
 {
+	void *pgd = vm->pgt.pgd;
 	unsigned long nr_pages;
 
 	nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
-	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(vm->pgt.pgd), nr_pages));
+
+	guest_lock_component(vm);
+	kvm_pgtable_stage2_destroy(&vm->pgt);
+	vm->kvm.arch.mmu.pgd_phys = 0ULL;
+	guest_unlock_component(vm);
+
+	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages));
 }
 
 int __pkvm_prot_finalize(void)
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 3e56c6393caeb..962f4472601b1 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -693,7 +693,9 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
 		return -EINVAL;
 
 	phys_shift = KVM_VM_TYPE_ARM_IPA_SIZE(type);
-	if (phys_shift) {
+	if (is_protected_kvm_enabled()) {
+		phys_shift = kvm_ipa_limit;
+	} else if (phys_shift) {
 		if (phys_shift > kvm_ipa_limit ||
 		    phys_shift < ARM64_MIN_PARANGE_BITS)
 			return -EINVAL;
-- 
GitLab


From f41dff4efb918db68923a826e966ca62c7c8e929 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:53 +0000
Subject: [PATCH 1432/4122] KVM: arm64: Return guest memory from EL2 via
 dedicated teardown memcache

Rather than relying on the host to free the previously-donated pKVM
hypervisor VM pages explicitly on teardown, introduce a dedicated
teardown memcache which allows the host to reclaim guest memory
resources without having to keep track of all of the allocations made by
the pKVM hypervisor at EL2.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
[maz: dropped __maybe_unused from unmap_donated_memory_noclear()]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-21-will@kernel.org
---
 arch/arm64/include/asm/kvm_host.h             |  7 +----
 arch/arm64/kvm/hyp/include/nvhe/mem_protect.h |  2 +-
 arch/arm64/kvm/hyp/nvhe/mem_protect.c         | 17 ++++++----
 arch/arm64/kvm/hyp/nvhe/pkvm.c                | 27 ++++++++++++----
 arch/arm64/kvm/pkvm.c                         | 31 ++++---------------
 5 files changed, 40 insertions(+), 44 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 57218f0c449e0..63307e7dc9c5b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -176,12 +176,7 @@ typedef unsigned int pkvm_handle_t;
 
 struct kvm_protected_vm {
 	pkvm_handle_t handle;
-
-	struct {
-		void *pgd;
-		void *vm;
-		void *vcpus[KVM_MAX_VCPUS];
-	} hyp_donations;
+	struct kvm_hyp_memcache teardown_mc;
 };
 
 struct kvm_arch {
diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
index 420b87e755a42..b7bdbe63deed8 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h
@@ -76,7 +76,7 @@ void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt);
 
 int hyp_pin_shared_mem(void *from, void *to);
 void hyp_unpin_shared_mem(void *from, void *to);
-void reclaim_guest_pages(struct pkvm_hyp_vm *vm);
+void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc);
 int refill_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages,
 		    struct kvm_hyp_memcache *host_mc);
 
diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
index 0162afba6dc48..94cd48f7850e5 100644
--- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c
+++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c
@@ -260,19 +260,24 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
 	return 0;
 }
 
-void reclaim_guest_pages(struct pkvm_hyp_vm *vm)
+void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
 {
-	void *pgd = vm->pgt.pgd;
-	unsigned long nr_pages;
-
-	nr_pages = kvm_pgtable_stage2_pgd_size(vm->kvm.arch.vtcr) >> PAGE_SHIFT;
+	void *addr;
 
+	/* Dump all pgtable pages in the hyp_pool */
 	guest_lock_component(vm);
 	kvm_pgtable_stage2_destroy(&vm->pgt);
 	vm->kvm.arch.mmu.pgd_phys = 0ULL;
 	guest_unlock_component(vm);
 
-	WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(pgd), nr_pages));
+	/* Drain the hyp_pool into the memcache */
+	addr = hyp_alloc_pages(&vm->pool, 0);
+	while (addr) {
+		memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
+		push_hyp_memcache(mc, addr, hyp_virt_to_phys);
+		WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
+		addr = hyp_alloc_pages(&vm->pool, 0);
+	}
 }
 
 int __pkvm_prot_finalize(void)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 0768307566d42..81835c2f4c5a0 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -393,7 +393,7 @@ static void unmap_donated_memory(void *va, size_t size)
 	__unmap_donated_memory(va, size);
 }
 
-static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size)
+static void unmap_donated_memory_noclear(void *va, size_t size)
 {
 	if (!va)
 		return;
@@ -527,8 +527,21 @@ unlock:
 	return ret;
 }
 
+static void
+teardown_donated_memory(struct kvm_hyp_memcache *mc, void *addr, size_t size)
+{
+	size = PAGE_ALIGN(size);
+	memset(addr, 0, size);
+
+	for (void *start = addr; start < addr + size; start += PAGE_SIZE)
+		push_hyp_memcache(mc, start, hyp_virt_to_phys);
+
+	unmap_donated_memory_noclear(addr, size);
+}
+
 int __pkvm_teardown_vm(pkvm_handle_t handle)
 {
+	struct kvm_hyp_memcache *mc;
 	struct pkvm_hyp_vm *hyp_vm;
 	struct kvm *host_kvm;
 	unsigned int idx;
@@ -547,25 +560,27 @@ int __pkvm_teardown_vm(pkvm_handle_t handle)
 		goto err_unlock;
 	}
 
+	host_kvm = hyp_vm->host_kvm;
+
 	/* Ensure the VMID is clean before it can be reallocated */
 	__kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
 	remove_vm_table_entry(handle);
 	hyp_spin_unlock(&vm_table_lock);
 
 	/* Reclaim guest pages (including page-table pages) */
-	reclaim_guest_pages(hyp_vm);
+	mc = &host_kvm->arch.pkvm.teardown_mc;
+	reclaim_guest_pages(hyp_vm, mc);
 	unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
 
-	/* Return the metadata pages to the host */
+	/* Push the metadata pages to the teardown memcache */
 	for (idx = 0; idx < hyp_vm->nr_vcpus; ++idx) {
 		struct pkvm_hyp_vcpu *hyp_vcpu = hyp_vm->vcpus[idx];
 
-		unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+		teardown_donated_memory(mc, hyp_vcpu, sizeof(*hyp_vcpu));
 	}
 
-	host_kvm = hyp_vm->host_kvm;
 	vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
-	unmap_donated_memory(hyp_vm, vm_size);
+	teardown_donated_memory(mc, hyp_vm, vm_size);
 	hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
 	return 0;
 
diff --git a/arch/arm64/kvm/pkvm.c b/arch/arm64/kvm/pkvm.c
index 8c443b915e439..cf56958b1492a 100644
--- a/arch/arm64/kvm/pkvm.c
+++ b/arch/arm64/kvm/pkvm.c
@@ -147,8 +147,6 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
 	handle = ret;
 
 	host_kvm->arch.pkvm.handle = handle;
-	host_kvm->arch.pkvm.hyp_donations.pgd = pgd;
-	host_kvm->arch.pkvm.hyp_donations.vm = hyp_vm;
 
 	/* Donate memory for the vcpus at hyp and initialize it. */
 	hyp_vcpu_sz = PAGE_ALIGN(PKVM_HYP_VCPU_SIZE);
@@ -167,12 +165,12 @@ static int __pkvm_create_hyp_vm(struct kvm *host_kvm)
 			goto destroy_vm;
 		}
 
-		host_kvm->arch.pkvm.hyp_donations.vcpus[idx] = hyp_vcpu;
-
 		ret = kvm_call_hyp_nvhe(__pkvm_init_vcpu, handle, host_vcpu,
 					hyp_vcpu);
-		if (ret)
+		if (ret) {
+			free_pages_exact(hyp_vcpu, hyp_vcpu_sz);
 			goto destroy_vm;
+		}
 	}
 
 	return 0;
@@ -201,30 +199,13 @@ int pkvm_create_hyp_vm(struct kvm *host_kvm)
 
 void pkvm_destroy_hyp_vm(struct kvm *host_kvm)
 {
-	unsigned long idx, nr_vcpus = host_kvm->created_vcpus;
-	size_t pgd_sz, hyp_vm_sz;
-
-	if (host_kvm->arch.pkvm.handle)
+	if (host_kvm->arch.pkvm.handle) {
 		WARN_ON(kvm_call_hyp_nvhe(__pkvm_teardown_vm,
 					  host_kvm->arch.pkvm.handle));
-
-	host_kvm->arch.pkvm.handle = 0;
-
-	for (idx = 0; idx < nr_vcpus; ++idx) {
-		void *hyp_vcpu = host_kvm->arch.pkvm.hyp_donations.vcpus[idx];
-
-		if (!hyp_vcpu)
-			break;
-
-		free_pages_exact(hyp_vcpu, PAGE_ALIGN(PKVM_HYP_VCPU_SIZE));
 	}
 
-	hyp_vm_sz = PAGE_ALIGN(size_add(PKVM_HYP_VM_SIZE,
-					size_mul(sizeof(void *), nr_vcpus)));
-	pgd_sz = kvm_pgtable_stage2_pgd_size(host_kvm->arch.vtcr);
-
-	free_pages_exact(host_kvm->arch.pkvm.hyp_donations.vm, hyp_vm_sz);
-	free_pages_exact(host_kvm->arch.pkvm.hyp_donations.pgd, pgd_sz);
+	host_kvm->arch.pkvm.handle = 0;
+	free_hyp_memcache(&host_kvm->arch.pkvm.teardown_mc);
 }
 
 int pkvm_init_host_vm(struct kvm *host_kvm)
-- 
GitLab


From fe41a7f8c0ee3ee2f682f8c28c7e1c5ff2be8a79 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:54 +0000
Subject: [PATCH 1433/4122] KVM: arm64: Unmap 'kvm_arm_hyp_percpu_base' from
 the host

When pKVM is enabled, the hypervisor at EL2 does not trust the host at
EL1 and must therefore prevent it from having unrestricted access to
internal hypervisor state.

The 'kvm_arm_hyp_percpu_base' array holds the offsets for hypervisor
per-cpu allocations, so move this this into the nVHE code where it
cannot be modified by the untrusted host at EL1.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-22-will@kernel.org
---
 arch/arm64/include/asm/kvm_asm.h  | 4 ++--
 arch/arm64/kernel/image-vars.h    | 3 ---
 arch/arm64/kvm/arm.c              | 9 ++++-----
 arch/arm64/kvm/hyp/nvhe/hyp-smp.c | 2 ++
 4 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index de52ba775d48f..43c3bc0f9544d 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -109,7 +109,7 @@ enum __kvm_host_smccc_func {
 #define per_cpu_ptr_nvhe_sym(sym, cpu)						\
 	({									\
 		unsigned long base, off;					\
-		base = kvm_arm_hyp_percpu_base[cpu];				\
+		base = kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu];		\
 		off = (unsigned long)&CHOOSE_NVHE_SYM(sym) -			\
 		      (unsigned long)&CHOOSE_NVHE_SYM(__per_cpu_start);		\
 		base ? (typeof(CHOOSE_NVHE_SYM(sym))*)(base + off) : NULL;	\
@@ -214,7 +214,7 @@ DECLARE_KVM_HYP_SYM(__kvm_hyp_vector);
 #define __kvm_hyp_init		CHOOSE_NVHE_SYM(__kvm_hyp_init)
 #define __kvm_hyp_vector	CHOOSE_HYP_SYM(__kvm_hyp_vector)
 
-extern unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
+extern unsigned long kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[];
 DECLARE_KVM_NVHE_SYM(__per_cpu_start);
 DECLARE_KVM_NVHE_SYM(__per_cpu_end);
 
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 7f4e43bfaade7..ae8f37f4aa8c6 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -89,9 +89,6 @@ KVM_NVHE_ALIAS(gic_nonsecure_priorities);
 KVM_NVHE_ALIAS(__start___kvm_ex_table);
 KVM_NVHE_ALIAS(__stop___kvm_ex_table);
 
-/* Array containing bases of nVHE per-CPU memory regions. */
-KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
-
 /* PMU available static key */
 #ifdef CONFIG_HW_PERF_EVENTS
 KVM_NVHE_ALIAS(kvm_arm_pmu_available);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index f78eefa02f6bd..25467f24803dd 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -51,7 +51,6 @@ DEFINE_STATIC_KEY_FALSE(kvm_protected_mode_initialized);
 DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector);
 
 DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
-unsigned long kvm_arm_hyp_percpu_base[NR_CPUS];
 DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 static bool vgic_present;
@@ -1857,13 +1856,13 @@ static void teardown_hyp_mode(void)
 	free_hyp_pgds();
 	for_each_possible_cpu(cpu) {
 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
-		free_pages(kvm_arm_hyp_percpu_base[cpu], nvhe_percpu_order());
+		free_pages(kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu], nvhe_percpu_order());
 	}
 }
 
 static int do_pkvm_init(u32 hyp_va_bits)
 {
-	void *per_cpu_base = kvm_ksym_ref(kvm_arm_hyp_percpu_base);
+	void *per_cpu_base = kvm_ksym_ref(kvm_nvhe_sym(kvm_arm_hyp_percpu_base));
 	int ret;
 
 	preempt_disable();
@@ -1967,7 +1966,7 @@ static int init_hyp_mode(void)
 
 		page_addr = page_address(page);
 		memcpy(page_addr, CHOOSE_NVHE_SYM(__per_cpu_start), nvhe_percpu_size());
-		kvm_arm_hyp_percpu_base[cpu] = (unsigned long)page_addr;
+		kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu] = (unsigned long)page_addr;
 	}
 
 	/*
@@ -2060,7 +2059,7 @@ static int init_hyp_mode(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		char *percpu_begin = (char *)kvm_arm_hyp_percpu_base[cpu];
+		char *percpu_begin = (char *)kvm_nvhe_sym(kvm_arm_hyp_percpu_base)[cpu];
 		char *percpu_end = percpu_begin + nvhe_percpu_size();
 
 		/* Map Hyp percpu pages */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
index 9f54833af4009..04d194583f1e4 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-smp.c
@@ -23,6 +23,8 @@ u64 cpu_logical_map(unsigned int cpu)
 	return hyp_cpu_logical_map[cpu];
 }
 
+unsigned long __ro_after_init kvm_arm_hyp_percpu_base[NR_CPUS];
+
 unsigned long __hyp_per_cpu_offset(unsigned int cpu)
 {
 	unsigned long *cpu_base_array;
-- 
GitLab


From 73f38ef2ae531b180685173e0923225551434fcb Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:55 +0000
Subject: [PATCH 1434/4122] KVM: arm64: Maintain a copy of 'kvm_arm_vmid_bits'
 at EL2

Sharing 'kvm_arm_vmid_bits' between EL1 and EL2 allows the host to
modify the variable arbitrarily, potentially leading to all sorts of
shenanians as this is used to configure the VTTBR register for the
guest stage-2.

In preparation for unmapping host sections entirely from EL2, maintain
a copy of 'kvm_arm_vmid_bits' in the pKVM hypervisor and initialise it
from the host value while it is still trusted.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-23-will@kernel.org
---
 arch/arm64/include/asm/kvm_hyp.h | 2 ++
 arch/arm64/kernel/image-vars.h   | 3 ---
 arch/arm64/kvm/arm.c             | 1 +
 arch/arm64/kvm/hyp/nvhe/pkvm.c   | 3 +++
 4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fd99cf09972de..6797eafe7890b 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -124,4 +124,6 @@ extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val);
 extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val);
 
 extern unsigned long kvm_nvhe_sym(__icache_flags);
+extern unsigned int kvm_nvhe_sym(kvm_arm_vmid_bits);
+
 #endif /* __ARM64_KVM_HYP_H__ */
diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index ae8f37f4aa8c6..31ad75da4d589 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -71,9 +71,6 @@ KVM_NVHE_ALIAS(nvhe_hyp_panic_handler);
 /* Vectors installed by hyp-init on reset HVC. */
 KVM_NVHE_ALIAS(__hyp_stub_vectors);
 
-/* VMID bits set by the KVM VMID allocator */
-KVM_NVHE_ALIAS(kvm_arm_vmid_bits);
-
 /* Static keys which are set if a vGIC trap should be handled in hyp. */
 KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
 KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 25467f24803dd..1d4b8122d0108 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1893,6 +1893,7 @@ static void kvm_hyp_init_symbols(void)
 	kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
 	kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1);
 	kvm_nvhe_sym(__icache_flags) = __icache_flags;
+	kvm_nvhe_sym(kvm_arm_vmid_bits) = kvm_arm_vmid_bits;
 }
 
 static int kvm_hyp_init_protection(u32 hyp_va_bits)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 81835c2f4c5a0..ed6ceac1e8547 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -15,6 +15,9 @@
 /* Used by icache_is_vpipt(). */
 unsigned long __icache_flags;
 
+/* Used by kvm_get_vttbr(). */
+unsigned int kvm_arm_vmid_bits;
+
 /*
  * Set trap register values based on features in ID_AA64PFR0.
  */
-- 
GitLab


From 27eb26bfff5d358d42911d04bbecc62e659ec32b Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:56 +0000
Subject: [PATCH 1435/4122] KVM: arm64: Explicitly map 'kvm_vgic_global_state'
 at EL2

The pkvm hypervisor at EL2 may need to read the 'kvm_vgic_global_state'
variable from the host, for example when saving and restoring the state
of the virtual GIC.

Explicitly map 'kvm_vgic_global_state' in the stage-1 page-table of the
pKVM hypervisor rather than relying on mapping all of the host '.rodata'
section.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-24-will@kernel.org
---
 arch/arm64/kvm/hyp/nvhe/setup.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 0f69c1393416d..5a371ab236db7 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -161,6 +161,11 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	if (ret)
 		return ret;
 
+	ret = pkvm_create_mappings(&kvm_vgic_global_state,
+				   &kvm_vgic_global_state + 1, prot);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
-- 
GitLab


From 169cd0f8238f2598b85d2db2e15828e8f8da18e5 Mon Sep 17 00:00:00 2001
From: Quentin Perret <qperret@google.com>
Date: Thu, 10 Nov 2022 19:02:57 +0000
Subject: [PATCH 1436/4122] KVM: arm64: Don't unnecessarily map host kernel
 sections at EL2

We no longer need to map the host's '.rodata' and '.bss' sections in the
stage-1 page-table of the pKVM hypervisor at EL2, so remove those
mappings and avoid creating any future dependencies at EL2 on
host-controlled data structures.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Quentin Perret <qperret@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-25-will@kernel.org
---
 arch/arm64/kernel/image-vars.h  |  6 ------
 arch/arm64/kvm/hyp/nvhe/setup.c | 14 +++-----------
 2 files changed, 3 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h
index 31ad75da4d589..e3f88b5836a20 100644
--- a/arch/arm64/kernel/image-vars.h
+++ b/arch/arm64/kernel/image-vars.h
@@ -102,12 +102,6 @@ KVM_NVHE_ALIAS_HYP(__memcpy, __pi_memcpy);
 KVM_NVHE_ALIAS_HYP(__memset, __pi_memset);
 #endif
 
-/* Kernel memory sections */
-KVM_NVHE_ALIAS(__start_rodata);
-KVM_NVHE_ALIAS(__end_rodata);
-KVM_NVHE_ALIAS(__bss_start);
-KVM_NVHE_ALIAS(__bss_stop);
-
 /* Hyp memory sections */
 KVM_NVHE_ALIAS(__hyp_idmap_text_start);
 KVM_NVHE_ALIAS(__hyp_idmap_text_end);
diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c
index 5a371ab236db7..5cdf3fb09bb42 100644
--- a/arch/arm64/kvm/hyp/nvhe/setup.c
+++ b/arch/arm64/kvm/hyp/nvhe/setup.c
@@ -144,23 +144,15 @@ static int recreate_hyp_mappings(phys_addr_t phys, unsigned long size,
 	}
 
 	/*
-	 * Map the host's .bss and .rodata sections RO in the hypervisor, but
-	 * transfer the ownership from the host to the hypervisor itself to
-	 * make sure it can't be donated or shared with another entity.
+	 * Map the host sections RO in the hypervisor, but transfer the
+	 * ownership from the host to the hypervisor itself to make sure they
+	 * can't be donated or shared with another entity.
 	 *
 	 * The ownership transition requires matching changes in the host
 	 * stage-2. This will be done later (see finalize_host_mappings()) once
 	 * the hyp_vmemmap is addressable.
 	 */
 	prot = pkvm_mkstate(PAGE_HYP_RO, PKVM_PAGE_SHARED_OWNED);
-	ret = pkvm_create_mappings(__start_rodata, __end_rodata, prot);
-	if (ret)
-		return ret;
-
-	ret = pkvm_create_mappings(__hyp_bss_end, __bss_stop, prot);
-	if (ret)
-		return ret;
-
 	ret = pkvm_create_mappings(&kvm_vgic_global_state,
 				   &kvm_vgic_global_state + 1, prot);
 	if (ret)
-- 
GitLab


From be66e67f175096f283c9d5614c4991fc9e7ed975 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 10 Nov 2022 19:02:59 +0000
Subject: [PATCH 1437/4122] KVM: arm64: Use the pKVM hyp vCPU structure in
 handle___kvm_vcpu_run()

As a stepping stone towards deprivileging the host's access to the
guest's vCPU structures, introduce some naive flush/sync routines to
copy most of the host vCPU into the hyp vCPU on vCPU run and back
again on return to EL1.

This allows us to run using the pKVM hyp structures when KVM is
initialised in protected mode.

Tested-by: Vincent Donnefort <vdonnefort@google.com>
Co-developed-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221110190259.26861-27-will@kernel.org
---
 arch/arm64/kvm/hyp/include/nvhe/pkvm.h |  4 ++
 arch/arm64/kvm/hyp/nvhe/hyp-main.c     | 79 +++++++++++++++++++++++++-
 arch/arm64/kvm/hyp/nvhe/pkvm.c         | 28 +++++++++
 3 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
index d14dfbcb7da16..82b3d62538a61 100644
--- a/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
+++ b/arch/arm64/kvm/hyp/include/nvhe/pkvm.h
@@ -61,4 +61,8 @@ int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
 		     unsigned long vcpu_hva);
 int __pkvm_teardown_vm(pkvm_handle_t handle);
 
+struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
+					 unsigned int vcpu_idx);
+void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu);
+
 #endif /* __ARM64_KVM_NVHE_PKVM_H__ */
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index b5f3fcfe91357..728e01d4536b0 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -22,11 +22,86 @@ DEFINE_PER_CPU(struct kvm_nvhe_init_params, kvm_init_params);
 
 void __kvm_hyp_host_forward_smc(struct kvm_cpu_context *host_ctxt);
 
+static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+
+	hyp_vcpu->vcpu.arch.ctxt	= host_vcpu->arch.ctxt;
+
+	hyp_vcpu->vcpu.arch.sve_state	= kern_hyp_va(host_vcpu->arch.sve_state);
+	hyp_vcpu->vcpu.arch.sve_max_vl	= host_vcpu->arch.sve_max_vl;
+
+	hyp_vcpu->vcpu.arch.hw_mmu	= host_vcpu->arch.hw_mmu;
+
+	hyp_vcpu->vcpu.arch.hcr_el2	= host_vcpu->arch.hcr_el2;
+	hyp_vcpu->vcpu.arch.mdcr_el2	= host_vcpu->arch.mdcr_el2;
+	hyp_vcpu->vcpu.arch.cptr_el2	= host_vcpu->arch.cptr_el2;
+
+	hyp_vcpu->vcpu.arch.iflags	= host_vcpu->arch.iflags;
+	hyp_vcpu->vcpu.arch.fp_state	= host_vcpu->arch.fp_state;
+
+	hyp_vcpu->vcpu.arch.debug_ptr	= kern_hyp_va(host_vcpu->arch.debug_ptr);
+	hyp_vcpu->vcpu.arch.host_fpsimd_state = host_vcpu->arch.host_fpsimd_state;
+
+	hyp_vcpu->vcpu.arch.vsesr_el2	= host_vcpu->arch.vsesr_el2;
+
+	hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3 = host_vcpu->arch.vgic_cpu.vgic_v3;
+}
+
+static void sync_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+	struct kvm_vcpu *host_vcpu = hyp_vcpu->host_vcpu;
+	struct vgic_v3_cpu_if *hyp_cpu_if = &hyp_vcpu->vcpu.arch.vgic_cpu.vgic_v3;
+	struct vgic_v3_cpu_if *host_cpu_if = &host_vcpu->arch.vgic_cpu.vgic_v3;
+	unsigned int i;
+
+	host_vcpu->arch.ctxt		= hyp_vcpu->vcpu.arch.ctxt;
+
+	host_vcpu->arch.hcr_el2		= hyp_vcpu->vcpu.arch.hcr_el2;
+	host_vcpu->arch.cptr_el2	= hyp_vcpu->vcpu.arch.cptr_el2;
+
+	host_vcpu->arch.fault		= hyp_vcpu->vcpu.arch.fault;
+
+	host_vcpu->arch.iflags		= hyp_vcpu->vcpu.arch.iflags;
+	host_vcpu->arch.fp_state	= hyp_vcpu->vcpu.arch.fp_state;
+
+	host_cpu_if->vgic_hcr		= hyp_cpu_if->vgic_hcr;
+	for (i = 0; i < hyp_cpu_if->used_lrs; ++i)
+		host_cpu_if->vgic_lr[i] = hyp_cpu_if->vgic_lr[i];
+}
+
 static void handle___kvm_vcpu_run(struct kvm_cpu_context *host_ctxt)
 {
-	DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1);
+	DECLARE_REG(struct kvm_vcpu *, host_vcpu, host_ctxt, 1);
+	int ret;
+
+	host_vcpu = kern_hyp_va(host_vcpu);
+
+	if (unlikely(is_protected_kvm_enabled())) {
+		struct pkvm_hyp_vcpu *hyp_vcpu;
+		struct kvm *host_kvm;
+
+		host_kvm = kern_hyp_va(host_vcpu->kvm);
+		hyp_vcpu = pkvm_load_hyp_vcpu(host_kvm->arch.pkvm.handle,
+					      host_vcpu->vcpu_idx);
+		if (!hyp_vcpu) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		flush_hyp_vcpu(hyp_vcpu);
+
+		ret = __kvm_vcpu_run(&hyp_vcpu->vcpu);
+
+		sync_hyp_vcpu(hyp_vcpu);
+		pkvm_put_hyp_vcpu(hyp_vcpu);
+	} else {
+		/* The host is fully trusted, run its vCPU directly. */
+		ret = __kvm_vcpu_run(host_vcpu);
+	}
 
-	cpu_reg(host_ctxt, 1) =  __kvm_vcpu_run(kern_hyp_va(vcpu));
+out:
+	cpu_reg(host_ctxt, 1) =  ret;
 }
 
 static void handle___kvm_adjust_pc(struct kvm_cpu_context *host_ctxt)
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index ed6ceac1e8547..a06ece14a6d8e 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -241,6 +241,33 @@ static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
 	return vm_table[idx];
 }
 
+struct pkvm_hyp_vcpu *pkvm_load_hyp_vcpu(pkvm_handle_t handle,
+					 unsigned int vcpu_idx)
+{
+	struct pkvm_hyp_vcpu *hyp_vcpu = NULL;
+	struct pkvm_hyp_vm *hyp_vm;
+
+	hyp_spin_lock(&vm_table_lock);
+	hyp_vm = get_vm_by_handle(handle);
+	if (!hyp_vm || hyp_vm->nr_vcpus <= vcpu_idx)
+		goto unlock;
+
+	hyp_vcpu = hyp_vm->vcpus[vcpu_idx];
+	hyp_page_ref_inc(hyp_virt_to_page(hyp_vm));
+unlock:
+	hyp_spin_unlock(&vm_table_lock);
+	return hyp_vcpu;
+}
+
+void pkvm_put_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
+{
+	struct pkvm_hyp_vm *hyp_vm = pkvm_hyp_vcpu_to_hyp_vm(hyp_vcpu);
+
+	hyp_spin_lock(&vm_table_lock);
+	hyp_page_ref_dec(hyp_virt_to_page(hyp_vm));
+	hyp_spin_unlock(&vm_table_lock);
+}
+
 static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
 {
 	if (host_vcpu)
@@ -286,6 +313,7 @@ static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
 	hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
 
 	hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
+	hyp_vcpu->vcpu.arch.cflags = READ_ONCE(host_vcpu->arch.cflags);
 done:
 	if (ret)
 		unpin_host_vcpu(host_vcpu);
-- 
GitLab


From e6ecb142429183cef4835f31d4134050ae660032 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 8 Nov 2022 17:59:34 -0800
Subject: [PATCH 1438/4122] f2fs: allow to read node block after shutdown

If block address is still alive, we should give a valid node block even after
shutdown. Otherwise, we can see zero data when reading out a file.

Cc: stable@vger.kernel.org
Fixes: 83a3bfdb5a8a ("f2fs: indicate shutdown f2fs to allow unmount successfully")
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/node.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 983572f238969..b9ee5a1176a07 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1360,8 +1360,7 @@ static int read_node_page(struct page *page, blk_opf_t op_flags)
 		return err;
 
 	/* NEW_ADDR can be seen, after cp_error drops some dirty node pages */
-	if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR) ||
-			is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
+	if (unlikely(ni.blk_addr == NULL_ADDR || ni.blk_addr == NEW_ADDR)) {
 		ClearPageUptodate(page);
 		return -ENOENT;
 	}
-- 
GitLab


From 225d6795abf47c3340214ca1b4c22728e463db4f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 25 Oct 2022 21:26:38 +0800
Subject: [PATCH 1439/4122] f2fs: add proc entry to show discard_plist info

This patch adds a new proc entry to show discard_plist
information in more detail, which is very helpful to
know the discard pend list count clearly.

Such as:

Discard pend list(Show diacrd_cmd count on each entry, .:not exist):
  0       390     156      85      67      46      37      26      14
  8        17      12       9       9       6      12      11      10
  16        5       9       2       4       8       3       4       1
  24        3       2       2       5       2       4       5       4
  32        3       3       2       3       .       3       3       1
  40        .       4       1       3       2       1       2       1
  48        1       .       1       1       .       1       1       .
  56        .       1       1       1       .       2       .       1
  64        1       2       .       .       .       .       .       .
  72        .       1       .       .       .       .       .       .
  80        3       1       .       .       1       1       .       .
  88        1       .       .       .       1       .       .       1
......

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/sysfs.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 032c03e09580d..97bf0dbb0974b 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -1252,6 +1252,44 @@ static int __maybe_unused victim_bits_seq_show(struct seq_file *seq,
 	return 0;
 }
 
+static int __maybe_unused discard_plist_seq_show(struct seq_file *seq,
+						void *offset)
+{
+	struct super_block *sb = seq->private;
+	struct f2fs_sb_info *sbi = F2FS_SB(sb);
+	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+	int i, count;
+
+	seq_puts(seq, "Discard pend list(Show diacrd_cmd count on each entry, .:not exist):\n");
+	if (!f2fs_realtime_discard_enable(sbi))
+		return 0;
+
+	if (dcc) {
+		mutex_lock(&dcc->cmd_lock);
+		for (i = 0; i < MAX_PLIST_NUM; i++) {
+			struct list_head *pend_list;
+			struct discard_cmd *dc, *tmp;
+
+			if (i % 8 == 0)
+				seq_printf(seq, "  %-3d", i);
+			count = 0;
+			pend_list = &dcc->pend_list[i];
+			list_for_each_entry_safe(dc, tmp, pend_list, list)
+				count++;
+			if (count)
+				seq_printf(seq, " %7d", count);
+			else
+				seq_puts(seq, "       .");
+			if (i % 8 == 7)
+				seq_putc(seq, '\n');
+		}
+		seq_putc(seq, '\n');
+		mutex_unlock(&dcc->cmd_lock);
+	}
+
+	return 0;
+}
+
 int __init f2fs_init_sysfs(void)
 {
 	int ret;
@@ -1322,6 +1360,8 @@ int f2fs_register_sysfs(struct f2fs_sb_info *sbi)
 #endif
 		proc_create_single_data("victim_bits", 0444, sbi->s_proc,
 				victim_bits_seq_show, sb);
+		proc_create_single_data("discard_plist_info", 0444, sbi->s_proc,
+				discard_plist_seq_show, sb);
 	}
 	return 0;
 put_feature_list_kobj:
@@ -1345,6 +1385,7 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
 		remove_proc_entry("segment_info", sbi->s_proc);
 		remove_proc_entry("segment_bits", sbi->s_proc);
 		remove_proc_entry("victim_bits", sbi->s_proc);
+		remove_proc_entry("discard_plist_info", sbi->s_proc);
 		remove_proc_entry(sbi->sb->s_id, f2fs_proc_root);
 	}
 
-- 
GitLab


From 4d8d45df2252980f800c1b2fde941a103a18a70e Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Mon, 31 Oct 2022 12:24:15 -0700
Subject: [PATCH 1440/4122] f2fs: correct i_size change for atomic writes

We need to make sure i_size doesn't change until atomic write commit is
successful and restore it when commit is failed.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    |  8 ++++++++
 fs/f2fs/file.c    | 18 +++++++++++-------
 fs/f2fs/inode.c   |  5 ++++-
 fs/f2fs/segment.c | 14 ++++++++++----
 4 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 04ef4cce3d7f4..11c475beca2c1 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -768,6 +768,7 @@ enum {
 	FI_COMPRESS_RELEASED,	/* compressed blocks were released */
 	FI_ALIGNED_WRITE,	/* enable aligned write */
 	FI_COW_FILE,		/* indicate COW file */
+	FI_ATOMIC_COMMITTED,	/* indicate atomic commit completed except disk sync */
 	FI_MAX,			/* max flag, never be used */
 };
 
@@ -826,6 +827,7 @@ struct f2fs_inode_info {
 	unsigned int i_cluster_size;		/* cluster size */
 
 	unsigned int atomic_write_cnt;
+	loff_t original_i_size;		/* original i_size before atomic write */
 };
 
 static inline void get_extent_info(struct extent_info *ext,
@@ -3075,6 +3077,8 @@ static inline void f2fs_i_blocks_write(struct inode *inode,
 		set_inode_flag(inode, FI_AUTO_RECOVER);
 }
 
+static inline bool f2fs_is_atomic_file(struct inode *inode);
+
 static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
 {
 	bool clean = !is_inode_flag_set(inode, FI_DIRTY_INODE);
@@ -3084,6 +3088,10 @@ static inline void f2fs_i_size_write(struct inode *inode, loff_t i_size)
 		return;
 
 	i_size_write(inode, i_size);
+
+	if (f2fs_is_atomic_file(inode))
+		return;
+
 	f2fs_mark_inode_dirty_sync(inode, true);
 	if (clean || recover)
 		set_inode_flag(inode, FI_AUTO_RECOVER);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index c605a4f2bce21..28f586e779992 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2041,6 +2041,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct inode *pinode;
+	loff_t isize;
 	int ret;
 
 	if (!inode_owner_or_capable(mnt_userns, inode))
@@ -2099,7 +2100,12 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 		f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
 		goto out;
 	}
-	f2fs_i_size_write(fi->cow_inode, i_size_read(inode));
+
+	f2fs_write_inode(inode, NULL);
+
+	isize = i_size_read(inode);
+	fi->original_i_size = isize;
+	f2fs_i_size_write(fi->cow_inode, isize);
 
 	stat_inc_atomic_inode(inode);
 
@@ -2137,16 +2143,14 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
 
 	if (f2fs_is_atomic_file(inode)) {
 		ret = f2fs_commit_atomic_write(inode);
-		if (ret)
-			goto unlock_out;
-
-		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
 		if (!ret)
-			f2fs_abort_atomic_write(inode, false);
+			ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true);
+
+		f2fs_abort_atomic_write(inode, ret);
 	} else {
 		ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 1, false);
 	}
-unlock_out:
+
 	inode_unlock(inode);
 	mnt_drop_write_file(filp);
 	return ret;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 9f0d3864d9f13..577f109b4e1d9 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -621,9 +621,12 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
 	ri->i_uid = cpu_to_le32(i_uid_read(inode));
 	ri->i_gid = cpu_to_le32(i_gid_read(inode));
 	ri->i_links = cpu_to_le32(inode->i_nlink);
-	ri->i_size = cpu_to_le64(i_size_read(inode));
 	ri->i_blocks = cpu_to_le64(SECTOR_TO_BLOCK(inode->i_blocks) + 1);
 
+	if (!f2fs_is_atomic_file(inode) ||
+			is_inode_flag_set(inode, FI_ATOMIC_COMMITTED))
+		ri->i_size = cpu_to_le64(i_size_read(inode));
+
 	if (et) {
 		read_lock(&et->lock);
 		set_raw_extent(&et->largest, &ri->i_ext);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index aa4be7f25963d..8aa81238c770f 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -192,14 +192,18 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
 	if (!f2fs_is_atomic_file(inode))
 		return;
 
-	if (clean)
-		truncate_inode_pages_final(inode->i_mapping);
 	clear_inode_flag(fi->cow_inode, FI_COW_FILE);
 	iput(fi->cow_inode);
 	fi->cow_inode = NULL;
 	release_atomic_write_cnt(inode);
+	clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
 	clear_inode_flag(inode, FI_ATOMIC_FILE);
 	stat_dec_atomic_inode(inode);
+
+	if (clean) {
+		truncate_inode_pages_final(inode->i_mapping);
+		f2fs_i_size_write(inode, fi->original_i_size);
+	}
 }
 
 static int __replace_atomic_write_block(struct inode *inode, pgoff_t index,
@@ -335,10 +339,12 @@ next:
 	}
 
 out:
-	if (ret)
+	if (ret) {
 		sbi->revoked_atomic_block += fi->atomic_write_cnt;
-	else
+	} else {
 		sbi->committed_atomic_block += fi->atomic_write_cnt;
+		set_inode_flag(inode, FI_ATOMIC_COMMITTED);
+	}
 
 	__complete_revoke_list(inode, &revoke_list, ret ? true : false);
 
-- 
GitLab


From cc249e4cba9a6002c9d9e1438daf8440a160bc9e Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Sun, 6 Nov 2022 21:25:44 +0800
Subject: [PATCH 1441/4122] f2fs: fix to avoid accessing uninitialized spinlock

syzbot reports a kernel bug:

 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0x1e3/0x2cb lib/dump_stack.c:106
 assign_lock_key+0x22a/0x240 kernel/locking/lockdep.c:981
 register_lock_class+0x287/0x9b0 kernel/locking/lockdep.c:1294
 __lock_acquire+0xe4/0x1f60 kernel/locking/lockdep.c:4934
 lock_acquire+0x1a7/0x400 kernel/locking/lockdep.c:5668
 __raw_spin_lock include/linux/spinlock_api_smp.h:133 [inline]
 _raw_spin_lock+0x2a/0x40 kernel/locking/spinlock.c:154
 spin_lock include/linux/spinlock.h:350 [inline]
 f2fs_save_errors fs/f2fs/super.c:3868 [inline]
 f2fs_handle_error+0x29/0x230 fs/f2fs/super.c:3896
 f2fs_iget+0x215/0x4bb0 fs/f2fs/inode.c:516
 f2fs_fill_super+0x47d3/0x7b50 fs/f2fs/super.c:4222
 mount_bdev+0x26c/0x3a0 fs/super.c:1401
 legacy_get_tree+0xea/0x180 fs/fs_context.c:610
 vfs_get_tree+0x88/0x270 fs/super.c:1531
 do_new_mount+0x289/0xad0 fs/namespace.c:3040
 do_mount fs/namespace.c:3383 [inline]
 __do_sys_mount fs/namespace.c:3591 [inline]
 __se_sys_mount+0x2e3/0x3d0 fs/namespace.c:3568
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

F2FS-fs (loop1): Failed to read F2FS meta data inode

The root cause is if sbi->error_lock may be accessed before
its initialization, fix it.

Link: https://lore.kernel.org/linux-f2fs-devel/0000000000007edb6605ecbb6442@google.com/T/#u
Reported-by: syzbot+40642be9b7e0bb28e0df@syzkaller.appspotmail.com
Fixes: 95fa90c9e5a7 ("f2fs: support recording errors into superblock")
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a43d8a46a6e56..68a6c2eedcaca 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4196,6 +4196,9 @@ try_onemore:
 	if (err)
 		goto free_bio_info;
 
+	spin_lock_init(&sbi->error_lock);
+	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+
 	init_f2fs_rwsem(&sbi->cp_rwsem);
 	init_f2fs_rwsem(&sbi->quota_sem);
 	init_waitqueue_head(&sbi->cp_wait);
@@ -4263,9 +4266,6 @@ try_onemore:
 		goto free_devices;
 	}
 
-	spin_lock_init(&sbi->error_lock);
-	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
-
 	sbi->total_valid_node_count =
 				le32_to_cpu(sbi->ckpt->valid_node_count);
 	percpu_counter_set(&sbi->total_valid_inode_count,
-- 
GitLab


From 59237a21776f70ffb0420611c23e7158e1317037 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Tue, 8 Nov 2022 22:33:21 +0800
Subject: [PATCH 1442/4122] f2fs: optimize iteration over sparse directories

Wei Chen reports a kernel bug as blew:

INFO: task syz-executor.0:29056 blocked for more than 143 seconds.
      Not tainted 5.15.0-rc5 #1
"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
task:syz-executor.0  state:D stack:14632 pid:29056 ppid:  6574 flags:0x00000004
Call Trace:
 __schedule+0x4a1/0x1720
 schedule+0x36/0xe0
 rwsem_down_write_slowpath+0x322/0x7a0
 fscrypt_ioctl_set_policy+0x11f/0x2a0
 __f2fs_ioctl+0x1a9f/0x5780
 f2fs_ioctl+0x89/0x3a0
 __x64_sys_ioctl+0xe8/0x140
 do_syscall_64+0x34/0xb0
 entry_SYSCALL_64_after_hwframe+0x44/0xae

Eric did some investigation on this issue, quoted from reply of Eric:

"Well, the quality of this bug report has a lot to be desired (not on
upstream kernel, reproducer is full of totally irrelevant stuff, not
sent to the mailing list of the filesystem whose disk image is being
fuzzed, etc.).  But what is going on is that f2fs_empty_dir() doesn't
consider the case of a directory with an extremely large i_size on a
malicious disk image.

Specifically, the reproducer mounts an f2fs image with a directory
that has an i_size of 14814520042850357248, then calls
FS_IOC_SET_ENCRYPTION_POLICY on it.

That results in a call to f2fs_empty_dir() to check whether the
directory is empty.  f2fs_empty_dir() then iterates through all
3616826182336513 blocks the directory allegedly contains to check
whether any contain anything.  i_rwsem is held during this, so
anything else that tries to take it will hang."

In order to solve this issue, let's use f2fs_get_next_page_offset()
to speed up iteration by skipping holes for all below functions:
- f2fs_empty_dir
- f2fs_readdir
- find_in_level

The way why we can speed up iteration was described in
'commit 3cf4574705b4 ("f2fs: introduce get_next_page_offset to speed
up SEEK_DATA")'.

Meanwhile, in f2fs_empty_dir(), let's use f2fs_find_data_page()
instead f2fs_get_lock_data_page(), due to i_rwsem was held in
caller of f2fs_empty_dir(), there shouldn't be any races, so it's
fine to not lock dentry page during lookuping dirents in the page.

Link: https://lore.kernel.org/lkml/536944df-a0ae-1dd8-148f-510b476e1347@kernel.org/T/
Reported-by: Wei Chen <harperchen1110@gmail.com>
Cc: Eric Biggers <ebiggers@google.com>
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 17 ++++++++++++-----
 fs/f2fs/dir.c  | 34 ++++++++++++++++++++++++----------
 fs/f2fs/f2fs.h |  5 +++--
 fs/f2fs/gc.c   |  4 ++--
 4 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a71e818cd67b4..9b47ded653d1c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1206,7 +1206,8 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 }
 
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
-				     blk_opf_t op_flags, bool for_write)
+				     blk_opf_t op_flags, bool for_write,
+				     pgoff_t *next_pgofs)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
@@ -1232,12 +1233,17 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 
 	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
-	if (err)
+	if (err) {
+		if (err == -ENOENT && next_pgofs)
+			*next_pgofs = f2fs_get_next_page_offset(&dn, index);
 		goto put_err;
+	}
 	f2fs_put_dnode(&dn);
 
 	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
 		err = -ENOENT;
+		if (next_pgofs)
+			*next_pgofs = index + 1;
 		goto put_err;
 	}
 	if (dn.data_blkaddr != NEW_ADDR &&
@@ -1281,7 +1287,8 @@ put_err:
 	return ERR_PTR(err);
 }
 
-struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
+					pgoff_t *next_pgofs)
 {
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
@@ -1291,7 +1298,7 @@ struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index)
 		return page;
 	f2fs_put_page(page, 0);
 
-	page = f2fs_get_read_data_page(inode, index, 0, false);
+	page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
 	if (IS_ERR(page))
 		return page;
 
@@ -1317,7 +1324,7 @@ struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 	struct address_space *mapping = inode->i_mapping;
 	struct page *page;
 repeat:
-	page = f2fs_get_read_data_page(inode, index, 0, for_write);
+	page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
 	if (IS_ERR(page))
 		return page;
 
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 21960a899b6ad..030b7fd4142ff 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -340,6 +340,7 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 	unsigned int bidx, end_block;
 	struct page *dentry_page;
 	struct f2fs_dir_entry *de = NULL;
+	pgoff_t next_pgofs;
 	bool room = false;
 	int max_slots;
 
@@ -350,12 +351,13 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 			       le32_to_cpu(fname->hash) % nbucket);
 	end_block = bidx + nblock;
 
-	for (; bidx < end_block; bidx++) {
+	while (bidx < end_block) {
 		/* no need to allocate new dentry pages to all the indices */
-		dentry_page = f2fs_find_data_page(dir, bidx);
+		dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs);
 		if (IS_ERR(dentry_page)) {
 			if (PTR_ERR(dentry_page) == -ENOENT) {
 				room = true;
+				bidx = next_pgofs;
 				continue;
 			} else {
 				*res_page = dentry_page;
@@ -376,6 +378,8 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		if (max_slots >= s)
 			room = true;
 		f2fs_put_page(dentry_page, 0);
+
+		bidx++;
 	}
 
 	if (!de && room && F2FS_I(dir)->chash != fname->hash) {
@@ -956,7 +960,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
 
 bool f2fs_empty_dir(struct inode *dir)
 {
-	unsigned long bidx;
+	unsigned long bidx = 0;
 	struct page *dentry_page;
 	unsigned int bit_pos;
 	struct f2fs_dentry_block *dentry_blk;
@@ -965,13 +969,17 @@ bool f2fs_empty_dir(struct inode *dir)
 	if (f2fs_has_inline_dentry(dir))
 		return f2fs_empty_inline_dir(dir);
 
-	for (bidx = 0; bidx < nblock; bidx++) {
-		dentry_page = f2fs_get_lock_data_page(dir, bidx, false);
+	while (bidx < nblock) {
+		pgoff_t next_pgofs;
+
+		dentry_page = f2fs_find_data_page(dir, bidx, &next_pgofs);
 		if (IS_ERR(dentry_page)) {
-			if (PTR_ERR(dentry_page) == -ENOENT)
+			if (PTR_ERR(dentry_page) == -ENOENT) {
+				bidx = next_pgofs;
 				continue;
-			else
+			} else {
 				return false;
+			}
 		}
 
 		dentry_blk = page_address(dentry_page);
@@ -983,10 +991,12 @@ bool f2fs_empty_dir(struct inode *dir)
 						NR_DENTRY_IN_BLOCK,
 						bit_pos);
 
-		f2fs_put_page(dentry_page, 1);
+		f2fs_put_page(dentry_page, 0);
 
 		if (bit_pos < NR_DENTRY_IN_BLOCK)
 			return false;
+
+		bidx++;
 	}
 	return true;
 }
@@ -1104,7 +1114,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 		goto out_free;
 	}
 
-	for (; n < npages; n++, ctx->pos = n * NR_DENTRY_IN_BLOCK) {
+	for (; n < npages; ctx->pos = n * NR_DENTRY_IN_BLOCK) {
+		pgoff_t next_pgofs;
 
 		/* allow readdir() to be interrupted */
 		if (fatal_signal_pending(current)) {
@@ -1118,11 +1129,12 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 			page_cache_sync_readahead(inode->i_mapping, ra, file, n,
 				min(npages - n, (pgoff_t)MAX_DIR_RA_PAGES));
 
-		dentry_page = f2fs_find_data_page(inode, n);
+		dentry_page = f2fs_find_data_page(inode, n, &next_pgofs);
 		if (IS_ERR(dentry_page)) {
 			err = PTR_ERR(dentry_page);
 			if (err == -ENOENT) {
 				err = 0;
+				n = next_pgofs;
 				continue;
 			} else {
 				goto out_free;
@@ -1141,6 +1153,8 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
 		}
 
 		f2fs_put_page(dentry_page, 0);
+
+		n++;
 	}
 out_free:
 	fscrypt_fname_free_buffer(&fstr);
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 11c475beca2c1..6a8cbf5bb1871 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3807,8 +3807,9 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn);
 int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index);
 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index);
 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
-			blk_opf_t op_flags, bool for_write);
-struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index);
+			blk_opf_t op_flags, bool for_write, pgoff_t *next_pgofs);
+struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
+							pgoff_t *next_pgofs);
 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
 			bool for_write);
 struct page *f2fs_get_new_data_page(struct inode *inode,
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6466db75af5d2..f1a46519a5fe3 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1562,8 +1562,8 @@ next_step:
 				continue;
 			}
 
-			data_page = f2fs_get_read_data_page(inode,
-						start_bidx, REQ_RAHEAD, true);
+			data_page = f2fs_get_read_data_page(inode, start_bidx,
+							REQ_RAHEAD, true, NULL);
 			f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
 			if (IS_ERR(data_page)) {
 				iput(inode);
-- 
GitLab


From 92b4cf5b48955a4bdd15fe4e2067db8ebd87f04c Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Wed, 9 Nov 2022 07:04:42 +0900
Subject: [PATCH 1443/4122] f2fs: initialize locks earlier in f2fs_fill_super()

syzbot is reporting lockdep warning at f2fs_handle_error() [1], for
spin_lock(&sbi->error_lock) is called before spin_lock_init() is called.
For safe locking in error handling, move initialization of locks (and
obvious structures) in f2fs_fill_super() to immediately after memory
allocation.

Link: https://syzkaller.appspot.com/bug?extid=40642be9b7e0bb28e0df [1]
Reported-by: syzbot <syzbot+40642be9b7e0bb28e0df@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+40642be9b7e0bb28e0df@syzkaller.appspotmail.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 38 ++++++++++++++++++++------------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 68a6c2eedcaca..8f4fc3ad67650 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4103,6 +4103,24 @@ try_onemore:
 
 	sbi->sb = sb;
 
+	/* initialize locks within allocated memory */
+	init_f2fs_rwsem(&sbi->gc_lock);
+	mutex_init(&sbi->writepages);
+	init_f2fs_rwsem(&sbi->cp_global_sem);
+	init_f2fs_rwsem(&sbi->node_write);
+	init_f2fs_rwsem(&sbi->node_change);
+	spin_lock_init(&sbi->stat_lock);
+	init_f2fs_rwsem(&sbi->cp_rwsem);
+	init_f2fs_rwsem(&sbi->quota_sem);
+	init_waitqueue_head(&sbi->cp_wait);
+	spin_lock_init(&sbi->error_lock);
+
+	for (i = 0; i < NR_INODE_TYPE; i++) {
+		INIT_LIST_HEAD(&sbi->inode_list[i]);
+		spin_lock_init(&sbi->inode_lock[i]);
+	}
+	mutex_init(&sbi->flush_lock);
+
 	/* Load the checksum driver */
 	sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
 	if (IS_ERR(sbi->s_chksum_driver)) {
@@ -4126,6 +4144,8 @@ try_onemore:
 	sb->s_fs_info = sbi;
 	sbi->raw_super = raw_super;
 
+	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
+
 	/* precompute checksum seed for metadata */
 	if (f2fs_sb_has_inode_chksum(sbi))
 		sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
@@ -4182,26 +4202,14 @@ try_onemore:
 
 	/* init f2fs-specific super block info */
 	sbi->valid_super_block = valid_super_block;
-	init_f2fs_rwsem(&sbi->gc_lock);
-	mutex_init(&sbi->writepages);
-	init_f2fs_rwsem(&sbi->cp_global_sem);
-	init_f2fs_rwsem(&sbi->node_write);
-	init_f2fs_rwsem(&sbi->node_change);
 
 	/* disallow all the data/node/meta page writes */
 	set_sbi_flag(sbi, SBI_POR_DOING);
-	spin_lock_init(&sbi->stat_lock);
 
 	err = f2fs_init_write_merge_io(sbi);
 	if (err)
 		goto free_bio_info;
 
-	spin_lock_init(&sbi->error_lock);
-	memcpy(sbi->errors, raw_super->s_errors, MAX_F2FS_ERRORS);
-
-	init_f2fs_rwsem(&sbi->cp_rwsem);
-	init_f2fs_rwsem(&sbi->quota_sem);
-	init_waitqueue_head(&sbi->cp_wait);
 	init_sb_info(sbi);
 
 	err = f2fs_init_iostat(sbi);
@@ -4279,12 +4287,6 @@ try_onemore:
 	limit_reserve_root(sbi);
 	adjust_unusable_cap_perc(sbi);
 
-	for (i = 0; i < NR_INODE_TYPE; i++) {
-		INIT_LIST_HEAD(&sbi->inode_list[i]);
-		spin_lock_init(&sbi->inode_lock[i]);
-	}
-	mutex_init(&sbi->flush_lock);
-
 	f2fs_init_extent_cache_info(sbi);
 
 	f2fs_init_ino_entry_info(sbi);
-- 
GitLab


From 967eaad1fed5f6335ea97a47d45214744dc57925 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 10 Nov 2022 17:15:01 +0800
Subject: [PATCH 1444/4122] f2fs: fix to set flush_merge opt and show
 noflush_merge

Some minor modifications to flush_merge and related parameters:

  1.The FLUSH_MERGE opt is set by default only in non-ro mode.
  2.When ro and merge are set at the same time, an error is reported.
  3.Display noflush_merge mount opt.

Suggested-by: Chao Yu <chao@kernel.org>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 8f4fc3ad67650..75027ff85cd93 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1353,6 +1353,12 @@ default_check:
 		return -EINVAL;
 	}
 
+	if ((f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb)) &&
+		test_opt(sbi, FLUSH_MERGE)) {
+		f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode");
+		return -EINVAL;
+	}
+
 	if (f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb)) {
 		f2fs_err(sbi, "Allow to mount readonly mode only");
 		return -EROFS;
@@ -1941,8 +1947,10 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",inline_dentry");
 	else
 		seq_puts(seq, ",noinline_dentry");
-	if (!f2fs_readonly(sbi->sb) && test_opt(sbi, FLUSH_MERGE))
+	if (test_opt(sbi, FLUSH_MERGE))
 		seq_puts(seq, ",flush_merge");
+	else
+		seq_puts(seq, ",noflush_merge");
 	if (test_opt(sbi, NOBARRIER))
 		seq_puts(seq, ",nobarrier");
 	else
@@ -2073,7 +2081,8 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, MERGE_CHECKPOINT);
 	F2FS_OPTION(sbi).unusable_cap = 0;
 	sbi->sb->s_flags |= SB_LAZYTIME;
-	set_opt(sbi, FLUSH_MERGE);
+	if (!f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb))
+		set_opt(sbi, FLUSH_MERGE);
 	if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
 		set_opt(sbi, DISCARD);
 	if (f2fs_sb_has_blkzoned(sbi)) {
-- 
GitLab


From 98b04dd0b4577894520493d96bc4623387767445 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 26 Oct 2022 02:11:21 -0400
Subject: [PATCH 1445/4122] PCI: Fix pci_device_is_present() for VFs by
 checking PF

pci_device_is_present() previously didn't work for VFs because it reads the
Vendor and Device ID, which are 0xffff for VFs, which looks like they
aren't present.  Check the PF instead.

Wei Gong reported that if virtio I/O is in progress when the driver is
unbound or "0" is written to /sys/.../sriov_numvfs, the virtio I/O
operation hangs, which may result in output like this:

  task:bash state:D stack:    0 pid: 1773 ppid:  1241 flags:0x00004002
  Call Trace:
   schedule+0x4f/0xc0
   blk_mq_freeze_queue_wait+0x69/0xa0
   blk_mq_freeze_queue+0x1b/0x20
   blk_cleanup_queue+0x3d/0xd0
   virtblk_remove+0x3c/0xb0 [virtio_blk]
   virtio_dev_remove+0x4b/0x80
   ...
   device_unregister+0x1b/0x60
   unregister_virtio_device+0x18/0x30
   virtio_pci_remove+0x41/0x80
   pci_device_remove+0x3e/0xb0

This happened because pci_device_is_present(VF) returned "false" in
virtio_pci_remove(), so it called virtio_break_device().  The broken vq
meant that vring_interrupt() skipped the vq.callback() that would have
completed the virtio I/O operation via virtblk_done().

[bhelgaas: commit log, simplify to always use pci_physfn(), add stable tag]
Link: https://lore.kernel.org/r/20221026060912.173250-1-mst@redhat.com
Reported-by: Wei Gong <gongwei833x@gmail.com>
Tested-by: Wei Gong <gongwei833x@gmail.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org
---
 drivers/pci/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 9f3cc829dfeea..fba95486caaf2 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -6447,6 +6447,8 @@ bool pci_device_is_present(struct pci_dev *pdev)
 {
 	u32 v;
 
+	/* Check PF if pdev is a VF, since VF Vendor/Device IDs are 0xffff */
+	pdev = pci_physfn(pdev);
 	if (pci_dev_is_disconnected(pdev))
 		return false;
 	return pci_bus_read_dev_vendor_id(pdev->bus, pdev->devfn, &v, 0);
-- 
GitLab


From c42edde5de3af6285fbb38c9d503a40ef491d10d Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:37 +0200
Subject: [PATCH 1446/4122] i2c: designware: Fix slave state machine for
 sequential reads

Some read types from I2C bus don't work correctly when testing the
i2c-designware-slave.c with the slave-eeprom backend. The same reads
work correctly when testing with a real 24c02 EEPROM chip.

In the following tests an i2c-designware-slave.c instance with the
slave-eeprom backend is configured to act as a simulated 24c02 at
address 0x65 on an I2C host bus 6:

1. i2cdump -y 6 0x65 b (OK)
   Random read. Each byte are read using a byte address write with a
   current address read in a same message.
2. i2cdump -y 6 0x65 c (OK, was NOK before commit 3b5f7f10ff6e when it
                        was repeating the 1st byte)
   Repeated current address read. One byte address write message
   followed by repeated current address read messages.
3. i2cdump -y 6 0x65 i (NOK, each 32 byte block repeats the 1st byte of
                        block)
   Sequential read using SMBus Block Read. For each 32 byte block a byte
   address write followed by 32 sequental reads in a same message.

These findings are explained because the implementation has had a
mismatch between hardware interrupts and what I2C slave events should be
sent after those interrupts. Despite that the case 1 happened to have
always the I2C slave events sent to a right order with a right data
between backend and the I2C bus.

Hardware generates the DW_IC_INTR_RD_REQ interrupt when another host is
attempting to read and for sequential reads after. DW_IC_INTR_RX_DONE
occurs when host does not acknowledge a transmitted byte which is an
indication the end of transmission.

Those interrupts do not match directly with I2C_SLAVE_READ_REQUESTED and
I2C_SLAVE_READ_PROCESSED events which is how the code was and is
practically using them. The slave-eeprom backend increases the buffer
index with the I2C_SLAVE_READ_PROCESSED event and returns the data from
current index when receiving only the I2C_SLAVE_READ_REQUESTED event.

That explains the repeated bytes in case 3 and also case 2 before
commit 3b5f7f10ff6e ("i2c: designware: slave should do WRITE_REQUESTED
before WRITE_RECEIVED").

Patch fixes the case 3 while keep cases 1 and 2 working with following
changes:

- First DW_IC_INTR_RD_REQ interrupt will change the state machine to
  read in progress state, send I2C_SLAVE_READ_REQUESTED event and
  transmit the first byte from backend
- Subsequent DW_IC_INTR_RD_REQ interrupts will send
  I2C_SLAVE_READ_PROCESSED events and transmit next bytes from backend
- STOP won't change the state machine. Otherwise case 2 won't work since
  we cannot distinguish current address read from sequentiel read
- DW_IC_INTR_RX_DONE interrupt is needless since there is no mechanism
  to inform it to a backend. It cannot be used to change state machine
  at the end of read either due the same reason than above
- Next host write to us will change the state machine from read to write
  in progress state
- STATUS_WRITE_IN_PROGRESS and STATUS_READ_IN_PROGRESS are considered
  now to be status flags not the state of the driver. This is how we
  treat them in i2c-designware-master.c

While at it do not test the return code from i2c_slave_event() for
I2C_SLAVE_READ_REQUESTED and I2C_SLAVE_READ_PROCESSED since it returns
always 0.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-core.h  |  1 -
 drivers/i2c/busses/i2c-designware-slave.c | 32 +++++++++++------------
 2 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 4d3a3b464ecd8..dbf6bdc5f01b3 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -103,7 +103,6 @@
 #define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
 					 DW_IC_INTR_TX_EMPTY)
 #define DW_IC_INTR_SLAVE_MASK		(DW_IC_INTR_DEFAULT_MASK | \
-					 DW_IC_INTR_RX_DONE | \
 					 DW_IC_INTR_RX_UNDER | \
 					 DW_IC_INTR_RD_REQ)
 
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 0d15f4c1e9f7e..1eac4f4d5573b 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -173,8 +173,9 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 		enabled, slave_activity, raw_stat, stat);
 
 	if (stat & DW_IC_INTR_RX_FULL) {
-		if (dev->status != STATUS_WRITE_IN_PROGRESS) {
-			dev->status = STATUS_WRITE_IN_PROGRESS;
+		if (!(dev->status & STATUS_WRITE_IN_PROGRESS)) {
+			dev->status |= STATUS_WRITE_IN_PROGRESS;
+			dev->status &= ~STATUS_READ_IN_PROGRESS;
 			i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_REQUESTED,
 					&val);
 		}
@@ -190,24 +191,23 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 		if (slave_activity) {
 			regmap_read(dev->map, DW_IC_CLR_RD_REQ, &tmp);
 
-			dev->status = STATUS_READ_IN_PROGRESS;
-			if (!i2c_slave_event(dev->slave,
-					     I2C_SLAVE_READ_REQUESTED,
-					     &val))
-				regmap_write(dev->map, DW_IC_DATA_CMD, val);
+			if (!(dev->status & STATUS_READ_IN_PROGRESS)) {
+				i2c_slave_event(dev->slave,
+						I2C_SLAVE_READ_REQUESTED,
+						&val);
+				dev->status |= STATUS_READ_IN_PROGRESS;
+				dev->status &= ~STATUS_WRITE_IN_PROGRESS;
+			} else {
+				i2c_slave_event(dev->slave,
+						I2C_SLAVE_READ_PROCESSED,
+						&val);
+			}
+			regmap_write(dev->map, DW_IC_DATA_CMD, val);
 		}
 	}
 
-	if (stat & DW_IC_INTR_RX_DONE) {
-		if (!i2c_slave_event(dev->slave, I2C_SLAVE_READ_PROCESSED,
-				     &val))
-			regmap_read(dev->map, DW_IC_CLR_RX_DONE, &tmp);
-	}
-
-	if (stat & DW_IC_INTR_STOP_DET) {
-		dev->status = STATUS_IDLE;
+	if (stat & DW_IC_INTR_STOP_DET)
 		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
-	}
 
 	return 1;
 }
-- 
GitLab


From dcf1bf648f94d651a3e6fb433e9ba0a3e43c1047 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:38 +0200
Subject: [PATCH 1447/4122] i2c: designware: Empty receive FIFO in slave
 interrupt handler

Writes from I2C bus often fail when testing the i2c-designware-slave.c
with the slave-eeprom backend. The same writes work correctly when
testing with a real 24c02 EEPROM chip.

In the tests below an i2c-designware-slave.c instance with the
slave-eeprom backend is configured to act as a simulated 24c02 at
address 0x65 on an I2C host bus 6.

1. i2cset -y 6 0x65 0x00 0x55

Single byte 0x55 write into address 0x00. No data goes into simulated
EEPROM. Debug prints from the i2c_dw_irq_handler_slave():

0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x714 : INTR_STAT=0x204
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4

2. i2ctransfer -y 6 w9@0x65 0x00 0xff-

Write 8 bytes with decrementing value starting from 0xff at address 0x00
and forward. Only some of the data goes into arbitrary addresses.
Content is something like below but varies:

00000000  f9 f8 00 00 00 00 00 00  00 00 00 00 00 00 00 00  |................|
00000050  00 00 00 00 00 00 ff fe  00 00 00 00 00 00 00 00  |................|
000000f0  00 00 00 00 00 00 00 00  00 00 00 00 00 fc fb fa  |................|

In this case debug prints were:

0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x1 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x714 : INTR_STAT=0x204
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x514 : INTR_STAT=0x4
0x1 STATUS SLAVE_ACTIVITY=0x0 : RAW_INTR_STAT=0x510 : INTR_STAT=0x0

Both cases show there is more data coming from the receive FIFO still
after detecting the STOP condition. This can be seen from interrupt
status bits DW_IC_INTR_STOP_DET (0x200) and DW_IC_INTR_RX_FULL (0x4).

Perhaps due interrupt latencies the receive FIFO is not read fast
enough, STOP detection happens synchronously when it occurs on the I2C
bus and the DW_IC_INTR_RX_FULL keeps coming as long as there are more
bytes in the receive FIFO.

Fix this by reading the receive FIFO completely empty whenever
DW_IC_INTR_RX_FULL occurs. Use RFNE, Receive FIFO Not Empty bit in the
DW_IC_STATUS register to loop through bytes in the FIFO.

While at it do not test the return code from i2c_slave_event() for the
I2C_SLAVE_WRITE_RECEIVED since to my understanding this hardware cannot
generate NACK to incoming bytes and debug print itself does not have
much value.

Reported-by: Tian Ye <tianye@sugon.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-core.h  |  1 +
 drivers/i2c/busses/i2c-designware-slave.c | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index dbf6bdc5f01b3..6d1df28dd93b1 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -108,6 +108,7 @@
 
 #define DW_IC_STATUS_ACTIVITY		BIT(0)
 #define DW_IC_STATUS_TFE		BIT(2)
+#define DW_IC_STATUS_RFNE		BIT(3)
 #define DW_IC_STATUS_MASTER_ACTIVITY	BIT(5)
 #define DW_IC_STATUS_SLAVE_ACTIVITY	BIT(6)
 
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 1eac4f4d5573b..295774a69b67a 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -180,11 +180,13 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 					&val);
 		}
 
-		regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
-		val = tmp;
-		if (!i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED,
-				     &val))
-			dev_vdbg(dev->dev, "Byte %X acked!", val);
+		do {
+			regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
+			val = tmp;
+			i2c_slave_event(dev->slave, I2C_SLAVE_WRITE_RECEIVED,
+					&val);
+			regmap_read(dev->map, DW_IC_STATUS, &tmp);
+		} while (tmp & DW_IC_STATUS_RFNE);
 	}
 
 	if (stat & DW_IC_INTR_RD_REQ) {
-- 
GitLab


From 4d827824b7bb29ce944172f6297db6d7a1ec37b3 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:39 +0200
Subject: [PATCH 1448/4122] i2c: designware: Define software status flags with
 BIT()

Define software status flags with a BIT() macro. While at it remove
STATUS_IDLE and replace its use with zero initialization and status
flags clearing with a mask.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-core.h   | 10 +++++-----
 drivers/i2c/busses/i2c-designware-master.c |  4 ++--
 drivers/i2c/busses/i2c-designware-slave.c  |  2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 6d1df28dd93b1..457e6966f85e0 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -123,12 +123,12 @@
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
 
 /*
- * status codes
+ * Sofware status flags
  */
-#define STATUS_IDLE			0x0
-#define STATUS_ACTIVE			0x1
-#define STATUS_WRITE_IN_PROGRESS	0x2
-#define STATUS_READ_IN_PROGRESS		0x4
+#define STATUS_ACTIVE			BIT(0)
+#define STATUS_WRITE_IN_PROGRESS	BIT(1)
+#define STATUS_READ_IN_PROGRESS		BIT(2)
+#define STATUS_MASK			GENMASK(2, 0)
 
 /*
  * operation modes
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index dc3c5a15a95b9..1b7db2b58f311 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -574,7 +574,7 @@ i2c_dw_xfer(struct i2c_adapter *adap, struct i2c_msg msgs[], int num)
 	dev->msg_write_idx = 0;
 	dev->msg_read_idx = 0;
 	dev->msg_err = 0;
-	dev->status = STATUS_IDLE;
+	dev->status = 0;
 	dev->abort_source = 0;
 	dev->rx_outstanding = 0;
 
@@ -731,7 +731,7 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
 
 	if (stat & DW_IC_INTR_TX_ABRT) {
 		dev->cmd_err |= DW_IC_ERR_TX_ABRT;
-		dev->status = STATUS_IDLE;
+		dev->status &= ~STATUS_MASK;
 		dev->rx_outstanding = 0;
 
 		/*
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 295774a69b67a..84eb0bec70fa0 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -82,7 +82,7 @@ static int i2c_dw_reg_slave(struct i2c_client *slave)
 	dev->msg_write_idx = 0;
 	dev->msg_read_idx = 0;
 	dev->msg_err = 0;
-	dev->status = STATUS_IDLE;
+	dev->status = 0;
 	dev->abort_source = 0;
 	dev->rx_outstanding = 0;
 
-- 
GitLab


From 40015c67533548986eaba42a3d9ba9d004bee41e Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:40 +0200
Subject: [PATCH 1449/4122] i2c: designware: Remove needless initializations
 from i2c_dw_reg_slave()

These struct dw_i2c_dev members are not used in i2c-designware-slave.c
so remove re-initialization of them from i2c_dw_reg_slave().

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-slave.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 84eb0bec70fa0..421a604bf68f2 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -78,13 +78,7 @@ static int i2c_dw_reg_slave(struct i2c_client *slave)
 
 	__i2c_dw_enable(dev);
 
-	dev->cmd_err = 0;
-	dev->msg_write_idx = 0;
-	dev->msg_read_idx = 0;
-	dev->msg_err = 0;
 	dev->status = 0;
-	dev->abort_source = 0;
-	dev->rx_outstanding = 0;
 
 	return 0;
 }
-- 
GitLab


From 5cd69850308f7da3c2bfc117fca5fbfa3c530ade Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:41 +0200
Subject: [PATCH 1450/4122] i2c: designware: Remove unused completion code from
 i2c-designware-slave

Remove unused completion code from i2c-designware-slave.c. Used only in
i2c-designware-master.c.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-slave.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 421a604bf68f2..12f0417aa0ae3 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -214,8 +214,6 @@ static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
 	int ret;
 
 	ret = i2c_dw_irq_handler_slave(dev);
-	if (ret > 0)
-		complete(&dev->cmd_complete);
 
 	return IRQ_RETVAL(ret);
 }
@@ -242,8 +240,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev)
 	struct i2c_adapter *adap = &dev->adapter;
 	int ret;
 
-	init_completion(&dev->cmd_complete);
-
 	dev->init = i2c_dw_init_slave;
 	dev->disable = i2c_dw_disable;
 	dev->disable_int = i2c_dw_disable_int;
-- 
GitLab


From 4c7107c2974270ff369342f6f1bd5d3669c182f1 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:42 +0200
Subject: [PATCH 1451/4122] i2c: designware: Simplify slave interrupt handler
 nesting

Interrupt processing code in i2c-designware-slave.c is bit more readable
if not divided into another subroutine. Also explicit IRQ_NONE and
IRQ_HANDLED return values are more obvious.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-slave.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 12f0417aa0ae3..3c855cd45c34c 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -147,9 +147,9 @@ static u32 i2c_dw_read_clear_intrbits_slave(struct dw_i2c_dev *dev)
  * Interrupt service routine. This gets called whenever an I2C slave interrupt
  * occurs.
  */
-
-static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
+static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
 {
+	struct dw_i2c_dev *dev = dev_id;
 	u32 raw_stat, stat, enabled, tmp;
 	u8 val = 0, slave_activity;
 
@@ -159,7 +159,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 	slave_activity = ((tmp & DW_IC_STATUS_SLAVE_ACTIVITY) >> 6);
 
 	if (!enabled || !(raw_stat & ~DW_IC_INTR_ACTIVITY) || !dev->slave)
-		return 0;
+		return IRQ_NONE;
 
 	stat = i2c_dw_read_clear_intrbits_slave(dev);
 	dev_dbg(dev->dev,
@@ -205,17 +205,7 @@ static int i2c_dw_irq_handler_slave(struct dw_i2c_dev *dev)
 	if (stat & DW_IC_INTR_STOP_DET)
 		i2c_slave_event(dev->slave, I2C_SLAVE_STOP, &val);
 
-	return 1;
-}
-
-static irqreturn_t i2c_dw_isr_slave(int this_irq, void *dev_id)
-{
-	struct dw_i2c_dev *dev = dev_id;
-	int ret;
-
-	ret = i2c_dw_irq_handler_slave(dev);
-
-	return IRQ_RETVAL(ret);
+	return IRQ_HANDLED;
 }
 
 static const struct i2c_algorithm i2c_dw_algo = {
-- 
GitLab


From cdbd2f169bf1aff9f679a3e07d62244fc4341968 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:43 +0200
Subject: [PATCH 1452/4122] i2c: designware: Do not process interrupt when
 device is suspended

Do not return with interrupt handled if host controller is off and thus
interrupt is originating from other device or is spurious.

Add a check to detect when controller is runtime suspended or
transitioning/reset. In latter case all raw interrupt status register
bits may read one. In both cases return IRQ_NONE to indicate interrupt
was not from this device.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-master.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 1b7db2b58f311..d6fcf955dfc0b 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -778,6 +778,8 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
 	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
 		return IRQ_NONE;
+	if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
+		return IRQ_NONE;
 
 	i2c_dw_irq_handler_master(dev);
 
-- 
GitLab


From 184c475ace922e4029f157080be559d49d70009f Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:44 +0200
Subject: [PATCH 1453/4122] i2c: designware: Move debug print in i2c_dw_isr()

It is kind of needless to print interrupt status when code immediately
after that finds interrupt was not originating from this device.
Therefore move it after spurious interrupt detection.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-master.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index d6fcf955dfc0b..9c2c9d002dc34 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -775,11 +775,11 @@ static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 
 	regmap_read(dev->map, DW_IC_ENABLE, &enabled);
 	regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
-	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
 	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
 		return IRQ_NONE;
 	if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
 		return IRQ_NONE;
+	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
 
 	i2c_dw_irq_handler_master(dev);
 
-- 
GitLab


From a92c3388b4ce34ee83f8ea398c1e00676a3dc467 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:45 +0200
Subject: [PATCH 1454/4122] i2c: designware: Simplify master interrupt handler
 nesting

In my opinion a few lines of spurious interrupt detection code can be
moved to the actual master interrupt handling function i2c_dw_isr()
without hurting readability.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-master.c | 33 ++++++++--------------
 1 file changed, 12 insertions(+), 21 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index 9c2c9d002dc34..dfb499e54c05d 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -711,9 +711,18 @@ static u32 i2c_dw_read_clear_intrbits(struct dw_i2c_dev *dev)
  * Interrupt service routine. This gets called whenever an I2C master interrupt
  * occurs.
  */
-static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
+static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
 {
-	u32 stat;
+	struct dw_i2c_dev *dev = dev_id;
+	u32 stat, enabled;
+
+	regmap_read(dev->map, DW_IC_ENABLE, &enabled);
+	regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
+	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
+		return IRQ_NONE;
+	if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
+		return IRQ_NONE;
+	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
 
 	stat = i2c_dw_read_clear_intrbits(dev);
 
@@ -726,7 +735,7 @@ static int i2c_dw_irq_handler_master(struct dw_i2c_dev *dev)
 		 * the HW active).
 		 */
 		regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-		return 0;
+		return IRQ_HANDLED;
 	}
 
 	if (stat & DW_IC_INTR_TX_ABRT) {
@@ -765,24 +774,6 @@ tx_aborted:
 		regmap_write(dev->map, DW_IC_INTR_MASK, stat);
 	}
 
-	return 0;
-}
-
-static irqreturn_t i2c_dw_isr(int this_irq, void *dev_id)
-{
-	struct dw_i2c_dev *dev = dev_id;
-	u32 stat, enabled;
-
-	regmap_read(dev->map, DW_IC_ENABLE, &enabled);
-	regmap_read(dev->map, DW_IC_RAW_INTR_STAT, &stat);
-	if (!enabled || !(stat & ~DW_IC_INTR_ACTIVITY))
-		return IRQ_NONE;
-	if (pm_runtime_suspended(dev->dev) || stat == GENMASK(31, 0))
-		return IRQ_NONE;
-	dev_dbg(dev->dev, "enabled=%#x stat=%#x\n", enabled, stat);
-
-	i2c_dw_irq_handler_master(dev);
-
 	return IRQ_HANDLED;
 }
 
-- 
GitLab


From fee61247b7f67a628bb24314b1a3a20d1f1c60f0 Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:46 +0200
Subject: [PATCH 1455/4122] i2c: designware: Remove common i2c_dw_disable_int()

Commit 90312351fd1e ("i2c: designware: MASTER mode as separated driver")
introduced disable_int pointer but there is no real use for it. Both
i2c-designware-master.c and i2c-designware-slave.c set it to the same
i2c_dw_disable_int() and scope is inside the same kernel module.

Since i2c_dw_disable_int() is just masking interrupts and the direct
DW_IC_INTR_MASK register write looks more clear in the code use that and
remove it from common code.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-common.c | 5 -----
 drivers/i2c/busses/i2c-designware-core.h   | 3 ---
 drivers/i2c/busses/i2c-designware-master.c | 9 ++++-----
 drivers/i2c/busses/i2c-designware-slave.c  | 3 +--
 4 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-common.c b/drivers/i2c/busses/i2c-designware-common.c
index c023b691441ea..a3240ece55b2b 100644
--- a/drivers/i2c/busses/i2c-designware-common.c
+++ b/drivers/i2c/busses/i2c-designware-common.c
@@ -625,10 +625,5 @@ void i2c_dw_disable(struct dw_i2c_dev *dev)
 	i2c_dw_release_lock(dev);
 }
 
-void i2c_dw_disable_int(struct dw_i2c_dev *dev)
-{
-	regmap_write(dev->map, DW_IC_INTR_MASK, 0);
-}
-
 MODULE_DESCRIPTION("Synopsys DesignWare I2C bus adapter core");
 MODULE_LICENSE("GPL");
diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 457e6966f85e0..49e5860b16651 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -232,7 +232,6 @@ struct reset_control;
  *	-1 if there is no semaphore.
  * @shared_with_punit: true if this bus is shared with the SoCs PUNIT
  * @disable: function to disable the controller
- * @disable_int: function to disable all interrupts
  * @init: function to initialize the I2C hardware
  * @set_sda_hold_time: callback to retrieve IP specific SDA hold timing
  * @mode: operation mode - DW_IC_MASTER or DW_IC_SLAVE
@@ -290,7 +289,6 @@ struct dw_i2c_dev {
 	int			semaphore_idx;
 	bool			shared_with_punit;
 	void			(*disable)(struct dw_i2c_dev *dev);
-	void			(*disable_int)(struct dw_i2c_dev *dev);
 	int			(*init)(struct dw_i2c_dev *dev);
 	int			(*set_sda_hold_time)(struct dw_i2c_dev *dev);
 	int			mode;
@@ -331,7 +329,6 @@ int i2c_dw_handle_tx_abort(struct dw_i2c_dev *dev);
 int i2c_dw_set_fifo_size(struct dw_i2c_dev *dev);
 u32 i2c_dw_func(struct i2c_adapter *adap);
 void i2c_dw_disable(struct dw_i2c_dev *dev);
-void i2c_dw_disable_int(struct dw_i2c_dev *dev);
 
 static inline void __i2c_dw_enable(struct dw_i2c_dev *dev)
 {
diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c
index dfb499e54c05d..45f569155bfe1 100644
--- a/drivers/i2c/busses/i2c-designware-master.c
+++ b/drivers/i2c/busses/i2c-designware-master.c
@@ -239,7 +239,7 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev)
 		     msgs[dev->msg_write_idx].addr | ic_tar);
 
 	/* Enforce disabled interrupts (due to HW issues) */
-	i2c_dw_disable_int(dev);
+	regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 
 	/* Enable the adapter */
 	__i2c_dw_enable(dev);
@@ -299,7 +299,7 @@ static int amd_i2c_dw_xfer_quirk(struct i2c_adapter *adap, struct i2c_msg *msgs,
 	dev->msgs = msgs;
 	dev->msgs_num = num_msgs;
 	i2c_dw_xfer_init(dev);
-	i2c_dw_disable_int(dev);
+	regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 
 	/* Initiate messages read/write transaction */
 	for (msg_wrt_idx = 0; msg_wrt_idx < num_msgs; msg_wrt_idx++) {
@@ -770,7 +770,7 @@ tx_aborted:
 	else if (unlikely(dev->flags & ACCESS_INTR_MASK)) {
 		/* Workaround to trigger pending interrupt */
 		regmap_read(dev->map, DW_IC_INTR_MASK, &stat);
-		i2c_dw_disable_int(dev);
+		regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 		regmap_write(dev->map, DW_IC_INTR_MASK, stat);
 	}
 
@@ -871,7 +871,6 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev)
 
 	dev->init = i2c_dw_init_master;
 	dev->disable = i2c_dw_disable;
-	dev->disable_int = i2c_dw_disable_int;
 
 	ret = i2c_dw_init_regmap(dev);
 	if (ret)
@@ -910,7 +909,7 @@ int i2c_dw_probe_master(struct dw_i2c_dev *dev)
 	if (ret)
 		return ret;
 
-	i2c_dw_disable_int(dev);
+	regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 	i2c_dw_release_lock(dev);
 
 	ret = devm_request_irq(dev->dev, dev->irq, i2c_dw_isr, irq_flags,
diff --git a/drivers/i2c/busses/i2c-designware-slave.c b/drivers/i2c/busses/i2c-designware-slave.c
index 3c855cd45c34c..c6d2e4c2ac23c 100644
--- a/drivers/i2c/busses/i2c-designware-slave.c
+++ b/drivers/i2c/busses/i2c-designware-slave.c
@@ -87,7 +87,7 @@ static int i2c_dw_unreg_slave(struct i2c_client *slave)
 {
 	struct dw_i2c_dev *dev = i2c_get_adapdata(slave->adapter);
 
-	dev->disable_int(dev);
+	regmap_write(dev->map, DW_IC_INTR_MASK, 0);
 	dev->disable(dev);
 	synchronize_irq(dev->irq);
 	dev->slave = NULL;
@@ -232,7 +232,6 @@ int i2c_dw_probe_slave(struct dw_i2c_dev *dev)
 
 	dev->init = i2c_dw_init_slave;
 	dev->disable = i2c_dw_disable;
-	dev->disable_int = i2c_dw_disable_int;
 
 	ret = i2c_dw_init_regmap(dev);
 	if (ret)
-- 
GitLab


From 966b7d3c738ab8d82363eff9e4c62e429697893e Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:47 +0200
Subject: [PATCH 1456/4122] i2c: designware: Align defines in
 i2c-designware-core.h

Align all defines to the same column.

Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-core.h | 230 +++++++++++------------
 1 file changed, 115 insertions(+), 115 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 49e5860b16651..0668888d557d5 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -18,12 +18,12 @@
 #include <linux/regmap.h>
 #include <linux/types.h>
 
-#define DW_IC_DEFAULT_FUNCTIONALITY (I2C_FUNC_I2C |			\
-					I2C_FUNC_SMBUS_BYTE |		\
-					I2C_FUNC_SMBUS_BYTE_DATA |	\
-					I2C_FUNC_SMBUS_WORD_DATA |	\
-					I2C_FUNC_SMBUS_BLOCK_DATA |	\
-					I2C_FUNC_SMBUS_I2C_BLOCK)
+#define DW_IC_DEFAULT_FUNCTIONALITY		(I2C_FUNC_I2C | \
+						 I2C_FUNC_SMBUS_BYTE | \
+						 I2C_FUNC_SMBUS_BYTE_DATA | \
+						 I2C_FUNC_SMBUS_WORD_DATA | \
+						 I2C_FUNC_SMBUS_BLOCK_DATA | \
+						 I2C_FUNC_SMBUS_I2C_BLOCK)
 
 #define DW_IC_CON_MASTER			BIT(0)
 #define DW_IC_CON_SPEED_STD			(1 << 1)
@@ -43,81 +43,81 @@
 /*
  * Registers offset
  */
-#define DW_IC_CON		0x00
-#define DW_IC_TAR		0x04
-#define DW_IC_SAR		0x08
-#define DW_IC_DATA_CMD		0x10
-#define DW_IC_SS_SCL_HCNT	0x14
-#define DW_IC_SS_SCL_LCNT	0x18
-#define DW_IC_FS_SCL_HCNT	0x1c
-#define DW_IC_FS_SCL_LCNT	0x20
-#define DW_IC_HS_SCL_HCNT	0x24
-#define DW_IC_HS_SCL_LCNT	0x28
-#define DW_IC_INTR_STAT		0x2c
-#define DW_IC_INTR_MASK		0x30
-#define DW_IC_RAW_INTR_STAT	0x34
-#define DW_IC_RX_TL		0x38
-#define DW_IC_TX_TL		0x3c
-#define DW_IC_CLR_INTR		0x40
-#define DW_IC_CLR_RX_UNDER	0x44
-#define DW_IC_CLR_RX_OVER	0x48
-#define DW_IC_CLR_TX_OVER	0x4c
-#define DW_IC_CLR_RD_REQ	0x50
-#define DW_IC_CLR_TX_ABRT	0x54
-#define DW_IC_CLR_RX_DONE	0x58
-#define DW_IC_CLR_ACTIVITY	0x5c
-#define DW_IC_CLR_STOP_DET	0x60
-#define DW_IC_CLR_START_DET	0x64
-#define DW_IC_CLR_GEN_CALL	0x68
-#define DW_IC_ENABLE		0x6c
-#define DW_IC_STATUS		0x70
-#define DW_IC_TXFLR		0x74
-#define DW_IC_RXFLR		0x78
-#define DW_IC_SDA_HOLD		0x7c
-#define DW_IC_TX_ABRT_SOURCE	0x80
-#define DW_IC_ENABLE_STATUS	0x9c
-#define DW_IC_CLR_RESTART_DET	0xa8
-#define DW_IC_COMP_PARAM_1	0xf4
-#define DW_IC_COMP_VERSION	0xf8
-#define DW_IC_SDA_HOLD_MIN_VERS	0x3131312A
-#define DW_IC_COMP_TYPE		0xfc
-#define DW_IC_COMP_TYPE_VALUE	0x44570140
-
-#define DW_IC_INTR_RX_UNDER	BIT(0)
-#define DW_IC_INTR_RX_OVER	BIT(1)
-#define DW_IC_INTR_RX_FULL	BIT(2)
-#define DW_IC_INTR_TX_OVER	BIT(3)
-#define DW_IC_INTR_TX_EMPTY	BIT(4)
-#define DW_IC_INTR_RD_REQ	BIT(5)
-#define DW_IC_INTR_TX_ABRT	BIT(6)
-#define DW_IC_INTR_RX_DONE	BIT(7)
-#define DW_IC_INTR_ACTIVITY	BIT(8)
-#define DW_IC_INTR_STOP_DET	BIT(9)
-#define DW_IC_INTR_START_DET	BIT(10)
-#define DW_IC_INTR_GEN_CALL	BIT(11)
-#define DW_IC_INTR_RESTART_DET	BIT(12)
-
-#define DW_IC_INTR_DEFAULT_MASK		(DW_IC_INTR_RX_FULL | \
-					 DW_IC_INTR_TX_ABRT | \
-					 DW_IC_INTR_STOP_DET)
-#define DW_IC_INTR_MASTER_MASK		(DW_IC_INTR_DEFAULT_MASK | \
-					 DW_IC_INTR_TX_EMPTY)
-#define DW_IC_INTR_SLAVE_MASK		(DW_IC_INTR_DEFAULT_MASK | \
-					 DW_IC_INTR_RX_UNDER | \
-					 DW_IC_INTR_RD_REQ)
-
-#define DW_IC_STATUS_ACTIVITY		BIT(0)
-#define DW_IC_STATUS_TFE		BIT(2)
-#define DW_IC_STATUS_RFNE		BIT(3)
-#define DW_IC_STATUS_MASTER_ACTIVITY	BIT(5)
-#define DW_IC_STATUS_SLAVE_ACTIVITY	BIT(6)
-
-#define DW_IC_SDA_HOLD_RX_SHIFT		16
-#define DW_IC_SDA_HOLD_RX_MASK		GENMASK(23, 16)
-
-#define DW_IC_ERR_TX_ABRT	0x1
-
-#define DW_IC_TAR_10BITADDR_MASTER	BIT(12)
+#define DW_IC_CON				0x00
+#define DW_IC_TAR				0x04
+#define DW_IC_SAR				0x08
+#define DW_IC_DATA_CMD				0x10
+#define DW_IC_SS_SCL_HCNT			0x14
+#define DW_IC_SS_SCL_LCNT			0x18
+#define DW_IC_FS_SCL_HCNT			0x1c
+#define DW_IC_FS_SCL_LCNT			0x20
+#define DW_IC_HS_SCL_HCNT			0x24
+#define DW_IC_HS_SCL_LCNT			0x28
+#define DW_IC_INTR_STAT				0x2c
+#define DW_IC_INTR_MASK				0x30
+#define DW_IC_RAW_INTR_STAT			0x34
+#define DW_IC_RX_TL				0x38
+#define DW_IC_TX_TL				0x3c
+#define DW_IC_CLR_INTR				0x40
+#define DW_IC_CLR_RX_UNDER			0x44
+#define DW_IC_CLR_RX_OVER			0x48
+#define DW_IC_CLR_TX_OVER			0x4c
+#define DW_IC_CLR_RD_REQ			0x50
+#define DW_IC_CLR_TX_ABRT			0x54
+#define DW_IC_CLR_RX_DONE			0x58
+#define DW_IC_CLR_ACTIVITY			0x5c
+#define DW_IC_CLR_STOP_DET			0x60
+#define DW_IC_CLR_START_DET			0x64
+#define DW_IC_CLR_GEN_CALL			0x68
+#define DW_IC_ENABLE				0x6c
+#define DW_IC_STATUS				0x70
+#define DW_IC_TXFLR				0x74
+#define DW_IC_RXFLR				0x78
+#define DW_IC_SDA_HOLD				0x7c
+#define DW_IC_TX_ABRT_SOURCE			0x80
+#define DW_IC_ENABLE_STATUS			0x9c
+#define DW_IC_CLR_RESTART_DET			0xa8
+#define DW_IC_COMP_PARAM_1			0xf4
+#define DW_IC_COMP_VERSION			0xf8
+#define DW_IC_SDA_HOLD_MIN_VERS			0x3131312A
+#define DW_IC_COMP_TYPE				0xfc
+#define DW_IC_COMP_TYPE_VALUE			0x44570140
+
+#define DW_IC_INTR_RX_UNDER			BIT(0)
+#define DW_IC_INTR_RX_OVER			BIT(1)
+#define DW_IC_INTR_RX_FULL			BIT(2)
+#define DW_IC_INTR_TX_OVER			BIT(3)
+#define DW_IC_INTR_TX_EMPTY			BIT(4)
+#define DW_IC_INTR_RD_REQ			BIT(5)
+#define DW_IC_INTR_TX_ABRT			BIT(6)
+#define DW_IC_INTR_RX_DONE			BIT(7)
+#define DW_IC_INTR_ACTIVITY			BIT(8)
+#define DW_IC_INTR_STOP_DET			BIT(9)
+#define DW_IC_INTR_START_DET			BIT(10)
+#define DW_IC_INTR_GEN_CALL			BIT(11)
+#define DW_IC_INTR_RESTART_DET			BIT(12)
+
+#define DW_IC_INTR_DEFAULT_MASK			(DW_IC_INTR_RX_FULL | \
+						 DW_IC_INTR_TX_ABRT | \
+						 DW_IC_INTR_STOP_DET)
+#define DW_IC_INTR_MASTER_MASK			(DW_IC_INTR_DEFAULT_MASK | \
+						 DW_IC_INTR_TX_EMPTY)
+#define DW_IC_INTR_SLAVE_MASK			(DW_IC_INTR_DEFAULT_MASK | \
+						 DW_IC_INTR_RX_UNDER | \
+						 DW_IC_INTR_RD_REQ)
+
+#define DW_IC_STATUS_ACTIVITY			BIT(0)
+#define DW_IC_STATUS_TFE			BIT(2)
+#define DW_IC_STATUS_RFNE			BIT(3)
+#define DW_IC_STATUS_MASTER_ACTIVITY		BIT(5)
+#define DW_IC_STATUS_SLAVE_ACTIVITY		BIT(6)
+
+#define DW_IC_SDA_HOLD_RX_SHIFT			16
+#define DW_IC_SDA_HOLD_RX_MASK			GENMASK(23, 16)
+
+#define DW_IC_ERR_TX_ABRT			0x1
+
+#define DW_IC_TAR_10BITADDR_MASTER		BIT(12)
 
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_HIGH	(BIT(2) | BIT(3))
 #define DW_IC_COMP_PARAM_1_SPEED_MODE_MASK	GENMASK(3, 2)
@@ -125,16 +125,16 @@
 /*
  * Sofware status flags
  */
-#define STATUS_ACTIVE			BIT(0)
-#define STATUS_WRITE_IN_PROGRESS	BIT(1)
-#define STATUS_READ_IN_PROGRESS		BIT(2)
-#define STATUS_MASK			GENMASK(2, 0)
+#define STATUS_ACTIVE				BIT(0)
+#define STATUS_WRITE_IN_PROGRESS		BIT(1)
+#define STATUS_READ_IN_PROGRESS			BIT(2)
+#define STATUS_MASK				GENMASK(2, 0)
 
 /*
  * operation modes
  */
-#define DW_IC_MASTER		0
-#define DW_IC_SLAVE		1
+#define DW_IC_MASTER				0
+#define DW_IC_SLAVE				1
 
 /*
  * Hardware abort codes from the DW_IC_TX_ABRT_SOURCE register
@@ -142,20 +142,20 @@
  * Only expected abort codes are listed here
  * refer to the datasheet for the full list
  */
-#define ABRT_7B_ADDR_NOACK	0
-#define ABRT_10ADDR1_NOACK	1
-#define ABRT_10ADDR2_NOACK	2
-#define ABRT_TXDATA_NOACK	3
-#define ABRT_GCALL_NOACK	4
-#define ABRT_GCALL_READ		5
-#define ABRT_SBYTE_ACKDET	7
-#define ABRT_SBYTE_NORSTRT	9
-#define ABRT_10B_RD_NORSTRT	10
-#define ABRT_MASTER_DIS		11
-#define ARB_LOST		12
-#define ABRT_SLAVE_FLUSH_TXFIFO	13
-#define ABRT_SLAVE_ARBLOST	14
-#define ABRT_SLAVE_RD_INTX	15
+#define ABRT_7B_ADDR_NOACK			0
+#define ABRT_10ADDR1_NOACK			1
+#define ABRT_10ADDR2_NOACK			2
+#define ABRT_TXDATA_NOACK			3
+#define ABRT_GCALL_NOACK			4
+#define ABRT_GCALL_READ				5
+#define ABRT_SBYTE_ACKDET			7
+#define ABRT_SBYTE_NORSTRT			9
+#define ABRT_10B_RD_NORSTRT			10
+#define ABRT_MASTER_DIS				11
+#define ARB_LOST				12
+#define ABRT_SLAVE_FLUSH_TXFIFO			13
+#define ABRT_SLAVE_ARBLOST			14
+#define ABRT_SLAVE_RD_INTX			15
 
 #define DW_IC_TX_ABRT_7B_ADDR_NOACK		BIT(ABRT_7B_ADDR_NOACK)
 #define DW_IC_TX_ABRT_10ADDR1_NOACK		BIT(ABRT_10ADDR1_NOACK)
@@ -172,11 +172,11 @@
 #define DW_IC_RX_ABRT_SLAVE_ARBLOST		BIT(ABRT_SLAVE_ARBLOST)
 #define DW_IC_RX_ABRT_SLAVE_FLUSH_TXFIFO	BIT(ABRT_SLAVE_FLUSH_TXFIFO)
 
-#define DW_IC_TX_ABRT_NOACK		(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR1_NOACK | \
-					 DW_IC_TX_ABRT_10ADDR2_NOACK | \
-					 DW_IC_TX_ABRT_TXDATA_NOACK | \
-					 DW_IC_TX_ABRT_GCALL_NOACK)
+#define DW_IC_TX_ABRT_NOACK			(DW_IC_TX_ABRT_7B_ADDR_NOACK | \
+						 DW_IC_TX_ABRT_10ADDR1_NOACK | \
+						 DW_IC_TX_ABRT_10ADDR2_NOACK | \
+						 DW_IC_TX_ABRT_TXDATA_NOACK | \
+						 DW_IC_TX_ABRT_GCALL_NOACK)
 
 struct clk;
 struct device;
@@ -295,21 +295,21 @@ struct dw_i2c_dev {
 	struct i2c_bus_recovery_info rinfo;
 };
 
-#define ACCESS_INTR_MASK	BIT(0)
-#define ACCESS_NO_IRQ_SUSPEND	BIT(1)
-#define ARBITRATION_SEMAPHORE	BIT(2)
+#define ACCESS_INTR_MASK			BIT(0)
+#define ACCESS_NO_IRQ_SUSPEND			BIT(1)
+#define ARBITRATION_SEMAPHORE			BIT(2)
 
-#define MODEL_MSCC_OCELOT	BIT(8)
-#define MODEL_BAIKAL_BT1	BIT(9)
-#define MODEL_AMD_NAVI_GPU	BIT(10)
-#define MODEL_MASK		GENMASK(11, 8)
+#define MODEL_MSCC_OCELOT			BIT(8)
+#define MODEL_BAIKAL_BT1			BIT(9)
+#define MODEL_AMD_NAVI_GPU			BIT(10)
+#define MODEL_MASK				GENMASK(11, 8)
 
 /*
  * Enable UCSI interrupt by writing 0xd at register
  * offset 0x474 specified in hardware specification.
  */
-#define AMD_UCSI_INTR_REG	0x474
-#define AMD_UCSI_INTR_EN	0xd
+#define AMD_UCSI_INTR_REG			0x474
+#define AMD_UCSI_INTR_EN			0xd
 
 struct i2c_dw_semaphore_callbacks {
 	int	(*probe)(struct dw_i2c_dev *dev);
-- 
GitLab


From 4bae6da1cbf4658a92e6f58da30bf536746d1e5d Mon Sep 17 00:00:00 2001
From: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Date: Mon, 7 Nov 2022 15:42:48 +0200
Subject: [PATCH 1457/4122] i2c: designware: Add comment to custom register
 value constants

DW_IC_COMP_VERSION register contains the ASCII representation of the
Synopsys component version. Here 0x3131312A == "111*" means version
1.11* required for DW_IC_SDA_HOLD register availability where '*' means
any letter starting from 'a'.

DW_IC_COMP_TYPE is constant and is derived from two ASCII letters "DW"
followed by a 16-bit unsigned number.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-designware-core.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 0668888d557d5..95ebc5eaa5d12 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -79,9 +79,9 @@
 #define DW_IC_CLR_RESTART_DET			0xa8
 #define DW_IC_COMP_PARAM_1			0xf4
 #define DW_IC_COMP_VERSION			0xf8
-#define DW_IC_SDA_HOLD_MIN_VERS			0x3131312A
+#define DW_IC_SDA_HOLD_MIN_VERS			0x3131312A /* "111*" == v1.11* */
 #define DW_IC_COMP_TYPE				0xfc
-#define DW_IC_COMP_TYPE_VALUE			0x44570140
+#define DW_IC_COMP_TYPE_VALUE			0x44570140 /* "DW" + 0x0140 */
 
 #define DW_IC_INTR_RX_UNDER			BIT(0)
 #define DW_IC_INTR_RX_OVER			BIT(1)
-- 
GitLab


From c57351a75d013c30e4a726aef1ad441676a99da4 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gshan@redhat.com>
Date: Sat, 12 Nov 2022 17:43:22 +0800
Subject: [PATCH 1458/4122] KVM: Push dirty information unconditionally to
 backup bitmap

In mark_page_dirty_in_slot(), we bail out when no running vcpu exists
and a running vcpu context is strictly required by architecture. It may
cause backwards compatible issue. Currently, saving vgic/its tables is
the only known case where no running vcpu context is expected. We may
have other unknown cases where no running vcpu context exists and it's
reported by the warning message and we bail out without pushing the
dirty information to the backup bitmap. For this, the application is
going to enable the backup bitmap for the unknown cases. However, the
dirty information can't be pushed to the backup bitmap even though the
backup bitmap is enabled for those unknown cases in the application,
until the unknown cases are added to the allowed list of non-running
vcpu context with extra code changes to the host kernel.

In order to make the new application, where the backup bitmap has been
enabled, to work with the unchanged host, we continue to push the dirty
information to the backup bitmap instead of bailing out early. With the
added check on 'memslot->dirty_bitmap' to mark_page_dirty_in_slot(), the
kernel crash is avoided silently by the combined conditions: no running
vcpu context, kvm_arch_allow_write_without_running_vcpu() returns 'true',
and the backup bitmap (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) isn't enabled
yet.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221112094322.21911-1-gshan@redhat.com
---
 virt/kvm/kvm_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index be40d1ce6e913..0fa541ba8ab54 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3308,8 +3308,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 	if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm))
 		return;
 
-	if (WARN_ON_ONCE(!kvm_arch_allow_write_without_running_vcpu(kvm) && !vcpu))
-		return;
+	WARN_ON_ONCE(!vcpu && !kvm_arch_allow_write_without_running_vcpu(kvm));
 #endif
 
 	if (memslot && kvm_slot_dirty_track_enabled(memslot)) {
@@ -3318,7 +3317,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm,
 
 		if (kvm->dirty_ring_size && vcpu)
 			kvm_dirty_ring_push(vcpu, slot, rel_gfn);
-		else
+		else if (memslot->dirty_bitmap)
 			set_bit_le(rel_gfn, memslot->dirty_bitmap);
 	}
 }
-- 
GitLab


From 8502bee5584235943c4d371597c740d6779991db Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Thu, 10 Nov 2022 17:23:42 +0800
Subject: [PATCH 1459/4122] i2c: imx: use
 devm_platform_get_and_ioremap_resource()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Reviewed-by: Mukesh Ojha <quic_mojha@quicinc.com>
Reviewed-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-imx.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/i2c/busses/i2c-imx.c b/drivers/i2c/busses/i2c-imx.c
index 3082183bd66a4..1ce0cf7a323fa 100644
--- a/drivers/i2c/busses/i2c-imx.c
+++ b/drivers/i2c/busses/i2c-imx.c
@@ -1449,8 +1449,7 @@ static int i2c_imx_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-- 
GitLab


From e826192cc26bd69746bbf22e6bdf72b87cb3d97b Mon Sep 17 00:00:00 2001
From: Arminder Singh <arminders208@outlook.com>
Date: Sat, 5 Nov 2022 07:56:49 -0400
Subject: [PATCH 1460/4122] i2c: /pasemi: PASemi I2C controller IRQ enablement

This patch adds IRQ support to the PASemi I2C controller driver to
increase the performace of I2C transactions on platforms with PASemi I2C
controllers. While primarily intended for Apple silicon platforms, this
patch should also help in enabling IRQ support for older PASemi hardware
as well should the need arise.

This version of the patch has been tested on an M1 Ultra Mac Studio,
as well as an M1 MacBook Pro, and userspace launches successfully
while using the IRQ path for I2C transactions.

Signed-off-by: Arminder Singh <arminders208@outlook.com>
Reviewed-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Hector Martin <marcan@marcan.st>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-pasemi-core.c     | 32 ++++++++++++++++++++----
 drivers/i2c/busses/i2c-pasemi-core.h     |  5 ++++
 drivers/i2c/busses/i2c-pasemi-platform.c |  6 +++++
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pasemi-core.c b/drivers/i2c/busses/i2c-pasemi-core.c
index 9028ffb58cc07..7d54a9f34c74b 100644
--- a/drivers/i2c/busses/i2c-pasemi-core.c
+++ b/drivers/i2c/busses/i2c-pasemi-core.c
@@ -21,6 +21,7 @@
 #define REG_MTXFIFO	0x00
 #define REG_MRXFIFO	0x04
 #define REG_SMSTA	0x14
+#define REG_IMASK	0x18
 #define REG_CTL		0x1c
 #define REG_REV		0x28
 
@@ -66,6 +67,7 @@ static void pasemi_reset(struct pasemi_smbus *smbus)
 		val |= CTL_EN;
 
 	reg_write(smbus, REG_CTL, val);
+	reinit_completion(&smbus->irq_completion);
 }
 
 static void pasemi_smb_clear(struct pasemi_smbus *smbus)
@@ -78,14 +80,21 @@ static void pasemi_smb_clear(struct pasemi_smbus *smbus)
 
 static int pasemi_smb_waitready(struct pasemi_smbus *smbus)
 {
-	int timeout = 10;
+	int timeout = 100;
 	unsigned int status;
 
-	status = reg_read(smbus, REG_SMSTA);
-
-	while (!(status & SMSTA_XEN) && timeout--) {
-		msleep(1);
+	if (smbus->use_irq) {
+		reinit_completion(&smbus->irq_completion);
+		reg_write(smbus, REG_IMASK, SMSTA_XEN | SMSTA_MTN);
+		wait_for_completion_timeout(&smbus->irq_completion, msecs_to_jiffies(100));
+		reg_write(smbus, REG_IMASK, 0);
 		status = reg_read(smbus, REG_SMSTA);
+	} else {
+		status = reg_read(smbus, REG_SMSTA);
+		while (!(status & SMSTA_XEN) && timeout--) {
+			msleep(1);
+			status = reg_read(smbus, REG_SMSTA);
+		}
 	}
 
 	/* Got NACK? */
@@ -344,10 +353,14 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus)
 
 	/* set up the sysfs linkage to our parent device */
 	smbus->adapter.dev.parent = smbus->dev;
+	smbus->use_irq = 0;
+	init_completion(&smbus->irq_completion);
 
 	if (smbus->hw_rev != PASEMI_HW_REV_PCI)
 		smbus->hw_rev = reg_read(smbus, REG_REV);
 
+	reg_write(smbus, REG_IMASK, 0);
+
 	pasemi_reset(smbus);
 
 	error = devm_i2c_add_adapter(smbus->dev, &smbus->adapter);
@@ -356,3 +369,12 @@ int pasemi_i2c_common_probe(struct pasemi_smbus *smbus)
 
 	return 0;
 }
+
+irqreturn_t pasemi_irq_handler(int irq, void *dev_id)
+{
+	struct pasemi_smbus *smbus = dev_id;
+
+	reg_write(smbus, REG_IMASK, 0);
+	complete(&smbus->irq_completion);
+	return IRQ_HANDLED;
+}
diff --git a/drivers/i2c/busses/i2c-pasemi-core.h b/drivers/i2c/busses/i2c-pasemi-core.h
index 4655124a37f3a..88821f4e8a9f8 100644
--- a/drivers/i2c/busses/i2c-pasemi-core.h
+++ b/drivers/i2c/busses/i2c-pasemi-core.h
@@ -7,6 +7,7 @@
 #include <linux/i2c-smbus.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
+#include <linux/completion.h>
 
 #define PASEMI_HW_REV_PCI -1
 
@@ -16,6 +17,10 @@ struct pasemi_smbus {
 	void __iomem		*ioaddr;
 	unsigned int		 clk_div;
 	int			 hw_rev;
+	int			 use_irq;
+	struct completion	 irq_completion;
 };
 
 int pasemi_i2c_common_probe(struct pasemi_smbus *smbus);
+
+irqreturn_t pasemi_irq_handler(int irq, void *dev_id);
diff --git a/drivers/i2c/busses/i2c-pasemi-platform.c b/drivers/i2c/busses/i2c-pasemi-platform.c
index 88a54aaf7e3c3..e35945a91dbef 100644
--- a/drivers/i2c/busses/i2c-pasemi-platform.c
+++ b/drivers/i2c/busses/i2c-pasemi-platform.c
@@ -49,6 +49,7 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev)
 	struct pasemi_smbus *smbus;
 	u32 frequency;
 	int error;
+	int irq_num;
 
 	data = devm_kzalloc(dev, sizeof(struct pasemi_platform_i2c_data),
 			    GFP_KERNEL);
@@ -82,6 +83,11 @@ static int pasemi_platform_i2c_probe(struct platform_device *pdev)
 	if (error)
 		goto out_clk_disable;
 
+	irq_num = platform_get_irq(pdev, 0);
+	error = devm_request_irq(smbus->dev, irq_num, pasemi_irq_handler, 0, "pasemi_apple_i2c", (void *)smbus);
+
+	if (!error)
+		smbus->use_irq = 1;
 	platform_set_drvdata(pdev, data);
 
 	return 0;
-- 
GitLab


From dc901d98b1fe6e52ab81cd3e0879379168e06daa Mon Sep 17 00:00:00 2001
From: Fenghua Yu <fenghua.yu@intel.com>
Date: Thu, 10 Nov 2022 17:27:15 -0800
Subject: [PATCH 1461/4122] dmaengine: idxd: Fix crc_val field for completion
 record

The crc_val in the completion record should be 64 bits and not 32 bits.

Fixes: 4ac823e9cd85 ("dmaengine: idxd: fix delta_rec and crc size field for completion record")
Reported-by: Nirav N Shah <nirav.n.shah@intel.com>
Signed-off-by: Fenghua Yu <fenghua.yu@intel.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Link: https://lore.kernel.org/r/20221111012715.2031481-1-fenghua.yu@intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 include/uapi/linux/idxd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index 2b9e7feba3f32..1d553bedbdb51 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -295,7 +295,7 @@ struct dsa_completion_record {
 		};
 
 		uint32_t	delta_rec_size;
-		uint32_t	crc_val;
+		uint64_t	crc_val;
 
 		/* DIF check & strip */
 		struct {
-- 
GitLab


From 22c354cf3fec6aa52cf2df6685b33ce5f265edf8 Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Wed, 9 Nov 2022 12:12:27 +0100
Subject: [PATCH 1462/4122] dt-bindings: dmaengine: qcom: gpi: add compatible
 for SM6375

Document the compatible for GPI DMA controller on SM6375 SoC.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221109111236.46003-3-konrad.dybcio@linaro.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/dma/qcom,gpi.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
index 232895fa1d8d2..e7ba1c47a88ea 100644
--- a/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
+++ b/Documentation/devicetree/bindings/dma/qcom,gpi.yaml
@@ -26,6 +26,7 @@ properties:
           - enum:
               - qcom,sc7280-gpi-dma
               - qcom,sm6115-gpi-dma
+              - qcom,sm6375-gpi-dma
               - qcom,sm8350-gpi-dma
               - qcom,sm8450-gpi-dma
           - const: qcom,sm6350-gpi-dma
-- 
GitLab


From 444eef7d5695393f214d83180f3e4bb99621cd07 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 13 Nov 2022 21:34:02 +0100
Subject: [PATCH 1463/4122] dmaengine: idxd: Remove linux/msi.h include

Nothing in this file needs anything from linux/msi.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vkoul@kernel.org>
Cc: dmaengine@vger.kernel.org
Link: https://lore.kernel.org/r/20221113202428.573536003@linutronix.de
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/idxd/device.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 6f44fa8f78a5d..06f5d3783d771 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -7,7 +7,6 @@
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/dmaengine.h>
 #include <linux/irq.h>
-#include <linux/msi.h>
 #include <uapi/linux/idxd.h>
 #include "../dmaengine.h"
 #include "idxd.h"
-- 
GitLab


From d57b2a65cde743a490a848236641fe9aa5536a9b Mon Sep 17 00:00:00 2001
From: Akhil R <akhilrajeev@nvidia.com>
Date: Thu, 10 Nov 2022 22:47:46 +0530
Subject: [PATCH 1464/4122] dt-bindings: dmaengine: Add dma-channel-mask to
 Tegra GPCDMA

Add dma-channel-mask property in Tegra GPCDMA document.

The property would help to specify the channels to be used in
kernel and reserve few for the firmware. This was previously
achieved by limiting the channel number to 31 in the driver.
This is wrong and does not align with the hardware. Correct
this and set the max interrupts to 32.

Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Acked-by: Thierry Reding <treding@nvidia.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221110171748.40304-2-akhilrajeev@nvidia.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml   | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml
index c8894476b6abf..851bd50ee67fe 100644
--- a/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/nvidia,tegra186-gpc-dma.yaml
@@ -39,7 +39,7 @@ properties:
       Should contain all of the per-channel DMA interrupts in
       ascending order with respect to the DMA channel index.
     minItems: 1
-    maxItems: 31
+    maxItems: 32
 
   resets:
     maxItems: 1
@@ -52,6 +52,9 @@ properties:
 
   dma-coherent: true
 
+  dma-channel-mask:
+    maxItems: 1
+
 required:
   - compatible
   - reg
@@ -60,6 +63,7 @@ required:
   - reset-names
   - "#dma-cells"
   - iommus
+  - dma-channel-mask
 
 additionalProperties: false
 
@@ -108,5 +112,6 @@ examples:
         #dma-cells = <1>;
         iommus = <&smmu TEGRA186_SID_GPCDMA_0>;
         dma-coherent;
+        dma-channel-mask = <0xfffffffe>;
     };
 ...
-- 
GitLab


From 3a0c95b61385f583424f44e79c15f1bdf050776d Mon Sep 17 00:00:00 2001
From: Akhil R <akhilrajeev@nvidia.com>
Date: Thu, 10 Nov 2022 22:47:48 +0530
Subject: [PATCH 1465/4122] dmaengine: tegra: Add support for dma-channel-mask

Add support for dma-channel-mask so that only the specified channels
are used. This helps to reserve some channels for the firmware.

This was initially achieved by limiting the channel number to 31 in
the driver and adjusting the register address to skip channel0 which
was reserved for a firmware. This is wrong and does not align with
the hardware.

Now, with this change, the driver can align more to the actual hardware
which has 32 channels. But this implies that there will be a break in the
ABI and the device tree need to be updated along with this change for the
driver to pickup the right interrupt corresponding to the channel

Reviewed-by: Jon Hunter <jonathanh@nvidia.com>
Link: https://lore.kernel.org/all/Y2EFoG1H9YpfxRjs@orome/
Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Link: https://lore.kernel.org/r/20221110171748.40304-4-akhilrajeev@nvidia.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/tegra186-gpc-dma.c | 37 +++++++++++++++++++++++++++-------
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/dma/tegra186-gpc-dma.c b/drivers/dma/tegra186-gpc-dma.c
index fa9bda4a2bc6f..1d1180db6d4ec 100644
--- a/drivers/dma/tegra186-gpc-dma.c
+++ b/drivers/dma/tegra186-gpc-dma.c
@@ -161,7 +161,10 @@
 #define TEGRA_GPCDMA_BURST_COMPLETION_TIMEOUT	5000 /* 5 msec */
 
 /* Channel base address offset from GPCDMA base address */
-#define TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET	0x20000
+#define TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET	0x10000
+
+/* Default channel mask reserving channel0 */
+#define TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK	0xfffffffe
 
 struct tegra_dma;
 struct tegra_dma_channel;
@@ -246,6 +249,7 @@ struct tegra_dma {
 	const struct tegra_dma_chip_data *chip_data;
 	unsigned long sid_m2d_reserved;
 	unsigned long sid_d2m_reserved;
+	u32 chan_mask;
 	void __iomem *base_addr;
 	struct device *dev;
 	struct dma_device dma_dev;
@@ -1288,7 +1292,7 @@ static struct dma_chan *tegra_dma_of_xlate(struct of_phandle_args *dma_spec,
 }
 
 static const struct tegra_dma_chip_data tegra186_dma_chip_data = {
-	.nr_channels = 31,
+	.nr_channels = 32,
 	.channel_reg_size = SZ_64K,
 	.max_dma_count = SZ_1G,
 	.hw_support_pause = false,
@@ -1296,7 +1300,7 @@ static const struct tegra_dma_chip_data tegra186_dma_chip_data = {
 };
 
 static const struct tegra_dma_chip_data tegra194_dma_chip_data = {
-	.nr_channels = 31,
+	.nr_channels = 32,
 	.channel_reg_size = SZ_64K,
 	.max_dma_count = SZ_1G,
 	.hw_support_pause = true,
@@ -1304,7 +1308,7 @@ static const struct tegra_dma_chip_data tegra194_dma_chip_data = {
 };
 
 static const struct tegra_dma_chip_data tegra234_dma_chip_data = {
-	.nr_channels = 31,
+	.nr_channels = 32,
 	.channel_reg_size = SZ_64K,
 	.max_dma_count = SZ_1G,
 	.hw_support_pause = true,
@@ -1380,15 +1384,28 @@ static int tegra_dma_probe(struct platform_device *pdev)
 	}
 	stream_id = iommu_spec->ids[0] & 0xffff;
 
+	ret = device_property_read_u32(&pdev->dev, "dma-channel-mask",
+				       &tdma->chan_mask);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			 "Missing dma-channel-mask property, using default channel mask %#x\n",
+			 TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK);
+		tdma->chan_mask = TEGRA_GPCDMA_DEFAULT_CHANNEL_MASK;
+	}
+
 	INIT_LIST_HEAD(&tdma->dma_dev.channels);
 	for (i = 0; i < cdata->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 
+		/* Check for channel mask */
+		if (!(tdma->chan_mask & BIT(i)))
+			continue;
+
 		tdc->irq = platform_get_irq(pdev, i);
 		if (tdc->irq < 0)
 			return tdc->irq;
 
-		tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADD_OFFSET +
+		tdc->chan_base_offset = TEGRA_GPCDMA_CHANNEL_BASE_ADDR_OFFSET +
 					i * cdata->channel_reg_size;
 		snprintf(tdc->name, sizeof(tdc->name), "gpcdma.%d", i);
 		tdc->tdma = tdma;
@@ -1449,8 +1466,8 @@ static int tegra_dma_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	dev_info(&pdev->dev, "GPC DMA driver register %d channels\n",
-		 cdata->nr_channels);
+	dev_info(&pdev->dev, "GPC DMA driver register %lu channels\n",
+		 hweight_long(tdma->chan_mask));
 
 	return 0;
 }
@@ -1473,6 +1490,9 @@ static int __maybe_unused tegra_dma_pm_suspend(struct device *dev)
 	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 
+		if (!(tdma->chan_mask & BIT(i)))
+			continue;
+
 		if (tdc->dma_desc) {
 			dev_err(tdma->dev, "channel %u busy\n", i);
 			return -EBUSY;
@@ -1492,6 +1512,9 @@ static int __maybe_unused tegra_dma_pm_resume(struct device *dev)
 	for (i = 0; i < tdma->chip_data->nr_channels; i++) {
 		struct tegra_dma_channel *tdc = &tdma->channels[i];
 
+		if (!(tdma->chan_mask & BIT(i)))
+			continue;
+
 		tegra_dma_program_sid(tdc, tdc->stream_id);
 	}
 
-- 
GitLab


From 3574cfdca28543e2e8db649297cd6659ea8e4bb8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Fri, 11 Nov 2022 11:55:29 +0200
Subject: [PATCH 1466/4122] RDMA/mana: Remove redefinition of basic u64 type

gdma_obj_handle_t is no more than redefinition of basic
u64 type. Remove such obfuscation.

Link: https://lore.kernel.org/r/3c1e821279e6a165d058655d2343722d6650e776.1668160486.git.leonro@nvidia.com
Acked-by: Long Li <longli@microsoft.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
 drivers/infiniband/hw/mana/mr.c               |  5 ++-
 .../net/ethernet/microsoft/mana/gdma_main.c   |  3 +-
 include/net/mana/gdma.h                       | 31 +++++++++----------
 3 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/drivers/infiniband/hw/mana/mr.c b/drivers/infiniband/hw/mana/mr.c
index a56236cdd9eec..351207c60eb65 100644
--- a/drivers/infiniband/hw/mana/mr.c
+++ b/drivers/infiniband/hw/mana/mr.c
@@ -73,8 +73,7 @@ static int mana_ib_gd_create_mr(struct mana_ib_dev *dev, struct mana_ib_mr *mr,
 	return 0;
 }
 
-static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev,
-				 gdma_obj_handle_t mr_handle)
+static int mana_ib_gd_destroy_mr(struct mana_ib_dev *dev, u64 mr_handle)
 {
 	struct gdma_destroy_mr_response resp = {};
 	struct gdma_destroy_mr_request req = {};
@@ -108,9 +107,9 @@ struct ib_mr *mana_ib_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length,
 	struct mana_ib_pd *pd = container_of(ibpd, struct mana_ib_pd, ibpd);
 	struct gdma_create_mr_params mr_params = {};
 	struct ib_device *ibdev = ibpd->device;
-	gdma_obj_handle_t dma_region_handle;
 	struct mana_ib_dev *dev;
 	struct mana_ib_mr *mr;
+	u64 dma_region_handle;
 	int err;
 
 	dev = container_of(ibdev, struct mana_ib_dev, ib_dev);
diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c
index 46a7d1e6ece98..69224ff8efb60 100644
--- a/drivers/net/ethernet/microsoft/mana/gdma_main.c
+++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c
@@ -671,8 +671,7 @@ free_q:
 	return err;
 }
 
-int mana_gd_destroy_dma_region(struct gdma_context *gc,
-			       gdma_obj_handle_t dma_region_handle)
+int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle)
 {
 	struct gdma_destroy_dma_region_req req = {};
 	struct gdma_general_resp resp = {};
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
index 221adc96340cb..a9fdae14d24c0 100644
--- a/include/net/mana/gdma.h
+++ b/include/net/mana/gdma.h
@@ -65,8 +65,6 @@ enum {
 	GDMA_DEVICE_MANA	= 2,
 };
 
-typedef u64 gdma_obj_handle_t;
-
 struct gdma_resource {
 	/* Protect the bitmap */
 	spinlock_t lock;
@@ -200,7 +198,7 @@ struct gdma_mem_info {
 	u64 length;
 
 	/* Allocated by the PF driver */
-	gdma_obj_handle_t dma_region_handle;
+	u64 dma_region_handle;
 };
 
 #define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
@@ -624,7 +622,7 @@ struct gdma_create_queue_req {
 	u32 reserved1;
 	u32 pdid;
 	u32 doolbell_id;
-	gdma_obj_handle_t gdma_region;
+	u64 gdma_region;
 	u32 reserved2;
 	u32 queue_size;
 	u32 log2_throttle_limit;
@@ -699,14 +697,14 @@ struct gdma_create_dma_region_req {
 
 struct gdma_create_dma_region_resp {
 	struct gdma_resp_hdr hdr;
-	gdma_obj_handle_t dma_region_handle;
+	u64 dma_region_handle;
 }; /* HW DATA */
 
 /* GDMA_DMA_REGION_ADD_PAGES */
 struct gdma_dma_region_add_pages_req {
 	struct gdma_req_hdr hdr;
 
-	gdma_obj_handle_t dma_region_handle;
+	u64 dma_region_handle;
 
 	u32 page_addr_list_len;
 	u32 reserved3;
@@ -718,7 +716,7 @@ struct gdma_dma_region_add_pages_req {
 struct gdma_destroy_dma_region_req {
 	struct gdma_req_hdr hdr;
 
-	gdma_obj_handle_t dma_region_handle;
+	u64 dma_region_handle;
 }; /* HW DATA */
 
 enum gdma_pd_flags {
@@ -733,14 +731,14 @@ struct gdma_create_pd_req {
 
 struct gdma_create_pd_resp {
 	struct gdma_resp_hdr hdr;
-	gdma_obj_handle_t pd_handle;
+	u64 pd_handle;
 	u32 pd_id;
 	u32 reserved;
 };/* HW DATA */
 
 struct gdma_destroy_pd_req {
 	struct gdma_req_hdr hdr;
-	gdma_obj_handle_t pd_handle;
+	u64 pd_handle;
 };/* HW DATA */
 
 struct gdma_destory_pd_resp {
@@ -756,11 +754,11 @@ enum gdma_mr_type {
 };
 
 struct gdma_create_mr_params {
-	gdma_obj_handle_t pd_handle;
+	u64 pd_handle;
 	enum gdma_mr_type mr_type;
 	union {
 		struct {
-			gdma_obj_handle_t dma_region_handle;
+			u64 dma_region_handle;
 			u64 virtual_address;
 			enum gdma_mr_access_flags access_flags;
 		} gva;
@@ -769,13 +767,13 @@ struct gdma_create_mr_params {
 
 struct gdma_create_mr_request {
 	struct gdma_req_hdr hdr;
-	gdma_obj_handle_t pd_handle;
+	u64 pd_handle;
 	enum gdma_mr_type mr_type;
 	u32 reserved_1;
 
 	union {
 		struct {
-			gdma_obj_handle_t dma_region_handle;
+			u64 dma_region_handle;
 			u64 virtual_address;
 			enum gdma_mr_access_flags access_flags;
 		} gva;
@@ -786,14 +784,14 @@ struct gdma_create_mr_request {
 
 struct gdma_create_mr_response {
 	struct gdma_resp_hdr hdr;
-	gdma_obj_handle_t mr_handle;
+	u64 mr_handle;
 	u32 lkey;
 	u32 rkey;
 };/* HW DATA */
 
 struct gdma_destroy_mr_request {
 	struct gdma_req_hdr hdr;
-	gdma_obj_handle_t mr_handle;
+	u64 mr_handle;
 };/* HW DATA */
 
 struct gdma_destroy_mr_response {
@@ -827,7 +825,6 @@ void mana_gd_free_memory(struct gdma_mem_info *gmi);
 int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
 			 u32 resp_len, void *resp);
 
-int mana_gd_destroy_dma_region(struct gdma_context *gc,
-			       gdma_obj_handle_t dma_region_handle);
+int mana_gd_destroy_dma_region(struct gdma_context *gc, u64 dma_region_handle);
 
 #endif /* _GDMA_H */
-- 
GitLab


From f613facc82cfd4b02b937b14872f24e27da4b909 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Sun, 13 Nov 2022 21:10:27 +0200
Subject: [PATCH 1467/4122] mfd: vexpress-sysreg: Fix resource compound literal
 assignments

Since DEFINE_RES_*() macros were converted to provide a compound literal
the user doesn't need to repeat it. Moreover, it may not be compiled.

Fixes: 52c4d11f1dce ("resource: Convert DEFINE_RES_NAMED() to be compound literal")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221113191027.2327-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mfd/vexpress-sysreg.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index aaf24af287dd3..eab82619ec314 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -61,35 +61,27 @@ static struct mfd_cell vexpress_sysreg_cells[] = {
 		.name = "basic-mmio-gpio",
 		.of_compatible = "arm,vexpress-sysreg,sys_led",
 		.num_resources = 1,
-		.resources = (struct resource []) {
-			DEFINE_RES_MEM_NAMED(SYS_LED, 0x4, "dat"),
-		},
+		.resources = &DEFINE_RES_MEM_NAMED(SYS_LED, 0x4, "dat"),
 		.platform_data = &vexpress_sysreg_sys_led_pdata,
 		.pdata_size = sizeof(vexpress_sysreg_sys_led_pdata),
 	}, {
 		.name = "basic-mmio-gpio",
 		.of_compatible = "arm,vexpress-sysreg,sys_mci",
 		.num_resources = 1,
-		.resources = (struct resource []) {
-			DEFINE_RES_MEM_NAMED(SYS_MCI, 0x4, "dat"),
-		},
+		.resources = &DEFINE_RES_MEM_NAMED(SYS_MCI, 0x4, "dat"),
 		.platform_data = &vexpress_sysreg_sys_mci_pdata,
 		.pdata_size = sizeof(vexpress_sysreg_sys_mci_pdata),
 	}, {
 		.name = "basic-mmio-gpio",
 		.of_compatible = "arm,vexpress-sysreg,sys_flash",
 		.num_resources = 1,
-		.resources = (struct resource []) {
-			DEFINE_RES_MEM_NAMED(SYS_FLASH, 0x4, "dat"),
-		},
+		.resources = &DEFINE_RES_MEM_NAMED(SYS_FLASH, 0x4, "dat"),
 		.platform_data = &vexpress_sysreg_sys_flash_pdata,
 		.pdata_size = sizeof(vexpress_sysreg_sys_flash_pdata),
 	}, {
 		.name = "vexpress-syscfg",
 		.num_resources = 1,
-		.resources = (struct resource []) {
-			DEFINE_RES_MEM(SYS_MISC, 0x4c),
-		},
+		.resources = &DEFINE_RES_MEM(SYS_MISC, 0x4c),
 	}
 };
 
-- 
GitLab


From e8433659bf701b6f53a6c2168262c9b2c363c049 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 14 Nov 2022 10:03:38 +0100
Subject: [PATCH 1468/4122] Revert "drivers: bus: simple-pm-bus: Use clocks"

This reverts commit 882cf4c913d730a74175db039d941005b883de38 as it
breaks the build in linux-next.

Link: https://lore.kernel.org/r/20221114181752.08a850f0@canb.auug.org.au
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Liu Ying <victor.liu@nxp.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/bus/simple-pm-bus.c | 48 -------------------------------------
 1 file changed, 48 deletions(-)

diff --git a/drivers/bus/simple-pm-bus.c b/drivers/bus/simple-pm-bus.c
index d7b043fefde9e..6b8d6257ed8a4 100644
--- a/drivers/bus/simple-pm-bus.c
+++ b/drivers/bus/simple-pm-bus.c
@@ -8,24 +8,17 @@
  * for more details.
  */
 
-#include <linux/clk.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
-struct simple_pm_bus {
-	struct clk_bulk_data *clks;
-	int num_clks;
-};
-
 static int simple_pm_bus_probe(struct platform_device *pdev)
 {
 	const struct device *dev = &pdev->dev;
 	const struct of_dev_auxdata *lookup = dev_get_platdata(dev);
 	struct device_node *np = dev->of_node;
 	const struct of_device_id *match;
-	struct simple_pm_bus *bus;
 
 	/*
 	 * Allow user to use driver_override to bind this driver to a
@@ -51,16 +44,6 @@ static int simple_pm_bus_probe(struct platform_device *pdev)
 			return -ENODEV;
 	}
 
-	bus = devm_kzalloc(&pdev->dev, sizeof(*bus), GFP_KERNEL);
-	if (!bus)
-		return -ENOMEM;
-
-	bus->num_clks = devm_clk_bulk_get_all(&pdev->dev, &bus->clks);
-	if (bus->num_clks < 0)
-		return dev_err_probe(&pdev->dev, bus->num_clks, "failed to get clocks\n");
-
-	dev_set_drvdata(&pdev->dev, bus);
-
 	dev_dbg(&pdev->dev, "%s\n", __func__);
 
 	pm_runtime_enable(&pdev->dev);
@@ -84,36 +67,6 @@ static int simple_pm_bus_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int simple_pm_bus_runtime_suspend(struct device *dev)
-{
-	struct simple_pm_bus *bus = dev_get_drvdata(dev);
-
-	clk_bulk_disable_unprepare(bus->num_clks, bus->clks);
-
-	return 0;
-}
-
-static int simple_pm_bus_runtime_resume(struct device *dev)
-{
-	struct simple_pm_bus *bus = dev_get_drvdata(dev);
-	int ret;
-
-	ret = clk_bulk_prepare_enable(bus->num_clks, bus->clks);
-	if (ret) {
-		dev_err(dev, "failed to enable clocks: %d\n", ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static const struct dev_pm_ops simple_pm_bus_pm_ops = {
-	SET_RUNTIME_PM_OPS(simple_pm_bus_runtime_suspend,
-			   simple_pm_bus_runtime_resume, NULL)
-	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
-				      pm_runtime_force_resume)
-};
-
 #define ONLY_BUS	((void *) 1) /* Match if the device is only a bus. */
 
 static const struct of_device_id simple_pm_bus_of_match[] = {
@@ -132,7 +85,6 @@ static struct platform_driver simple_pm_bus_driver = {
 	.driver = {
 		.name = "simple-pm-bus",
 		.of_match_table = simple_pm_bus_of_match,
-		.pm = &simple_pm_bus_pm_ops,
 	},
 };
 
-- 
GitLab


From 1d26a55fbeb9c24bb24fa84595c56efee8783f35 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 6 Sep 2022 13:43:00 -0700
Subject: [PATCH 1469/4122] PCI: histb: Switch to using gpiod API

This patch switches the driver away from legacy gpio/of_gpio API to
gpiod API, and removes use of of_get_named_gpio_flags() which I want to
make private to gpiolib.

Link: https://lore.kernel.org/r/20220906204301.3736813-1-dmitry.torokhov@gmail.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/pci/controller/dwc/pcie-histb.c | 39 ++++++++++++-------------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-histb.c b/drivers/pci/controller/dwc/pcie-histb.c
index e2b80f10030d0..43c27812dd6d4 100644
--- a/drivers/pci/controller/dwc/pcie-histb.c
+++ b/drivers/pci/controller/dwc/pcie-histb.c
@@ -10,11 +10,11 @@
 
 #include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/pci.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
@@ -60,7 +60,7 @@ struct histb_pcie {
 	struct reset_control *sys_reset;
 	struct reset_control *bus_reset;
 	void __iomem *ctrl;
-	int reset_gpio;
+	struct gpio_desc *reset_gpio;
 	struct regulator *vpcie;
 };
 
@@ -212,8 +212,8 @@ static void histb_pcie_host_disable(struct histb_pcie *hipcie)
 	clk_disable_unprepare(hipcie->sys_clk);
 	clk_disable_unprepare(hipcie->bus_clk);
 
-	if (gpio_is_valid(hipcie->reset_gpio))
-		gpio_set_value_cansleep(hipcie->reset_gpio, 0);
+	if (hipcie->reset_gpio)
+		gpiod_set_value_cansleep(hipcie->reset_gpio, 1);
 
 	if (hipcie->vpcie)
 		regulator_disable(hipcie->vpcie);
@@ -235,8 +235,8 @@ static int histb_pcie_host_enable(struct dw_pcie_rp *pp)
 		}
 	}
 
-	if (gpio_is_valid(hipcie->reset_gpio))
-		gpio_set_value_cansleep(hipcie->reset_gpio, 1);
+	if (hipcie->reset_gpio)
+		gpiod_set_value_cansleep(hipcie->reset_gpio, 0);
 
 	ret = clk_prepare_enable(hipcie->bus_clk);
 	if (ret) {
@@ -298,10 +298,7 @@ static int histb_pcie_probe(struct platform_device *pdev)
 	struct histb_pcie *hipcie;
 	struct dw_pcie *pci;
 	struct dw_pcie_rp *pp;
-	struct device_node *np = pdev->dev.of_node;
 	struct device *dev = &pdev->dev;
-	enum of_gpio_flags of_flags;
-	unsigned long flag = GPIOF_DIR_OUT;
 	int ret;
 
 	hipcie = devm_kzalloc(dev, sizeof(*hipcie), GFP_KERNEL);
@@ -336,17 +333,19 @@ static int histb_pcie_probe(struct platform_device *pdev)
 		hipcie->vpcie = NULL;
 	}
 
-	hipcie->reset_gpio = of_get_named_gpio_flags(np,
-				"reset-gpios", 0, &of_flags);
-	if (of_flags & OF_GPIO_ACTIVE_LOW)
-		flag |= GPIOF_ACTIVE_LOW;
-	if (gpio_is_valid(hipcie->reset_gpio)) {
-		ret = devm_gpio_request_one(dev, hipcie->reset_gpio,
-				flag, "PCIe device power control");
-		if (ret) {
-			dev_err(dev, "unable to request gpio\n");
-			return ret;
-		}
+	hipcie->reset_gpio = devm_gpiod_get_optional(dev, "reset",
+						     GPIOD_OUT_HIGH);
+	ret = PTR_ERR_OR_ZERO(hipcie->reset_gpio);
+	if (ret) {
+		dev_err(dev, "unable to request reset gpio: %d\n", ret);
+		return ret;
+	}
+
+	ret = gpiod_set_consumer_name(hipcie->reset_gpio,
+				      "PCIe device power control");
+	if (ret) {
+		dev_err(dev, "unable to set reset gpio name: %d\n", ret);
+		return ret;
 	}
 
 	hipcie->aux_clk = devm_clk_get(dev, "aux");
-- 
GitLab


From 4529992c947401adac53111cf15958a7ea97b1ef Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:06 -0800
Subject: [PATCH 1470/4122] interconnect: qcom: osm-l3: Use
 platform-independent node ids

The identifiers used for nodes needs to be unique in the running system,
but defining them per platform results in a lot of duplicated
definitions and prevents us from using generic compatibles.

As these identifiers are not exposed outside the kernel, change to use
driver-local numbers, picked completely at random.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-2-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 87 +++++++++++-------------------
 1 file changed, 30 insertions(+), 57 deletions(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index ddbdf0943f94e..d23769844419c 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -74,6 +74,11 @@ struct qcom_osm_l3_desc {
 	unsigned int reg_perf_state;
 };
 
+enum {
+	OSM_L3_MASTER_NODE = 10000,
+	OSM_L3_SLAVE_NODE,
+};
+
 #define DEFINE_QNODE(_name, _id, _buswidth, ...)			\
 	static const struct qcom_osm_l3_node _name = {			\
 		.name = #_name,						\
@@ -83,97 +88,65 @@ struct qcom_osm_l3_desc {
 		.links = { __VA_ARGS__ },				\
 	}
 
-DEFINE_QNODE(sdm845_osm_apps_l3, SDM845_MASTER_OSM_L3_APPS, 16, SDM845_SLAVE_OSM_L3);
-DEFINE_QNODE(sdm845_osm_l3, SDM845_SLAVE_OSM_L3, 16);
+DEFINE_QNODE(osm_l3_master, OSM_L3_MASTER_NODE, 16, OSM_L3_SLAVE_NODE);
+DEFINE_QNODE(osm_l3_slave, OSM_L3_SLAVE_NODE, 16);
+
+static const struct qcom_osm_l3_node * const osm_l3_nodes[] = {
+	[MASTER_OSM_L3_APPS] = &osm_l3_master,
+	[SLAVE_OSM_L3] = &osm_l3_slave,
+};
+
+DEFINE_QNODE(epss_l3_master, OSM_L3_MASTER_NODE, 32, OSM_L3_SLAVE_NODE);
+DEFINE_QNODE(epss_l3_slave, OSM_L3_SLAVE_NODE, 32);
 
-static const struct qcom_osm_l3_node * const sdm845_osm_l3_nodes[] = {
-	[MASTER_OSM_L3_APPS] = &sdm845_osm_apps_l3,
-	[SLAVE_OSM_L3] = &sdm845_osm_l3,
+static const struct qcom_osm_l3_node * const epss_l3_nodes[] = {
+	[MASTER_EPSS_L3_APPS] = &epss_l3_master,
+	[SLAVE_EPSS_L3_SHARED] = &epss_l3_slave,
 };
 
 static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = {
-	.nodes = sdm845_osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sdm845_osm_l3_nodes),
+	.nodes = osm_l3_nodes,
+	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
 	.lut_row_size = OSM_LUT_ROW_SIZE,
 	.reg_freq_lut = OSM_REG_FREQ_LUT,
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-DEFINE_QNODE(sc7180_osm_apps_l3, SC7180_MASTER_OSM_L3_APPS, 16, SC7180_SLAVE_OSM_L3);
-DEFINE_QNODE(sc7180_osm_l3, SC7180_SLAVE_OSM_L3, 16);
-
-static const struct qcom_osm_l3_node * const sc7180_osm_l3_nodes[] = {
-	[MASTER_OSM_L3_APPS] = &sc7180_osm_apps_l3,
-	[SLAVE_OSM_L3] = &sc7180_osm_l3,
-};
-
 static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = {
-	.nodes = sc7180_osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sc7180_osm_l3_nodes),
+	.nodes = osm_l3_nodes,
+	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
 	.lut_row_size = OSM_LUT_ROW_SIZE,
 	.reg_freq_lut = OSM_REG_FREQ_LUT,
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-DEFINE_QNODE(sc7280_epss_apps_l3, SC7280_MASTER_EPSS_L3_APPS, 32, SC7280_SLAVE_EPSS_L3);
-DEFINE_QNODE(sc7280_epss_l3, SC7280_SLAVE_EPSS_L3, 32);
-
-static const struct qcom_osm_l3_node * const sc7280_epss_l3_nodes[] = {
-	[MASTER_EPSS_L3_APPS] = &sc7280_epss_apps_l3,
-	[SLAVE_EPSS_L3_SHARED] = &sc7280_epss_l3,
-};
-
 static const struct qcom_osm_l3_desc sc7280_icc_epss_l3 = {
-	.nodes = sc7280_epss_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sc7280_epss_l3_nodes),
+	.nodes = epss_l3_nodes,
+	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
 	.lut_row_size = EPSS_LUT_ROW_SIZE,
 	.reg_freq_lut = EPSS_REG_FREQ_LUT,
 	.reg_perf_state = EPSS_REG_PERF_STATE,
 };
 
-DEFINE_QNODE(sc8180x_osm_apps_l3, SC8180X_MASTER_OSM_L3_APPS, 32, SC8180X_SLAVE_OSM_L3);
-DEFINE_QNODE(sc8180x_osm_l3, SC8180X_SLAVE_OSM_L3, 32);
-
-static const struct qcom_osm_l3_node * const sc8180x_osm_l3_nodes[] = {
-	[MASTER_OSM_L3_APPS] = &sc8180x_osm_apps_l3,
-	[SLAVE_OSM_L3] = &sc8180x_osm_l3,
-};
-
 static const struct qcom_osm_l3_desc sc8180x_icc_osm_l3 = {
-	.nodes = sc8180x_osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sc8180x_osm_l3_nodes),
+	.nodes = osm_l3_nodes,
+	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
 	.lut_row_size = OSM_LUT_ROW_SIZE,
 	.reg_freq_lut = OSM_REG_FREQ_LUT,
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-DEFINE_QNODE(sm8150_osm_apps_l3, SM8150_MASTER_OSM_L3_APPS, 32, SM8150_SLAVE_OSM_L3);
-DEFINE_QNODE(sm8150_osm_l3, SM8150_SLAVE_OSM_L3, 32);
-
-static const struct qcom_osm_l3_node * const sm8150_osm_l3_nodes[] = {
-	[MASTER_OSM_L3_APPS] = &sm8150_osm_apps_l3,
-	[SLAVE_OSM_L3] = &sm8150_osm_l3,
-};
-
 static const struct qcom_osm_l3_desc sm8150_icc_osm_l3 = {
-	.nodes = sm8150_osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sm8150_osm_l3_nodes),
+	.nodes = osm_l3_nodes,
+	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
 	.lut_row_size = OSM_LUT_ROW_SIZE,
 	.reg_freq_lut = OSM_REG_FREQ_LUT,
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-DEFINE_QNODE(sm8250_epss_apps_l3, SM8250_MASTER_EPSS_L3_APPS, 32, SM8250_SLAVE_EPSS_L3);
-DEFINE_QNODE(sm8250_epss_l3, SM8250_SLAVE_EPSS_L3, 32);
-
-static const struct qcom_osm_l3_node * const sm8250_epss_l3_nodes[] = {
-	[MASTER_EPSS_L3_APPS] = &sm8250_epss_apps_l3,
-	[SLAVE_EPSS_L3_SHARED] = &sm8250_epss_l3,
-};
-
 static const struct qcom_osm_l3_desc sm8250_icc_epss_l3 = {
-	.nodes = sm8250_epss_l3_nodes,
-	.num_nodes = ARRAY_SIZE(sm8250_epss_l3_nodes),
+	.nodes = epss_l3_nodes,
+	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
 	.lut_row_size = EPSS_LUT_ROW_SIZE,
 	.reg_freq_lut = EPSS_REG_FREQ_LUT,
 	.reg_perf_state = EPSS_REG_PERF_STATE,
-- 
GitLab


From d623264f62d40ca2d2380437f8a6318a2a9e4c66 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:07 -0800
Subject: [PATCH 1471/4122] interconnect: qcom: osm-l3: Squash common
 descriptors

Each platform defines their own OSM L3 descriptor, but in practice
there's only two: one for OSM and one for EPSS. Remove the duplicated
definitions.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-3-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 48 +++++-------------------------
 1 file changed, 8 insertions(+), 40 deletions(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index d23769844419c..7d68442532412 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -104,7 +104,7 @@ static const struct qcom_osm_l3_node * const epss_l3_nodes[] = {
 	[SLAVE_EPSS_L3_SHARED] = &epss_l3_slave,
 };
 
-static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = {
+static const struct qcom_osm_l3_desc osm_l3 = {
 	.nodes = osm_l3_nodes,
 	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
 	.lut_row_size = OSM_LUT_ROW_SIZE,
@@ -112,39 +112,7 @@ static const struct qcom_osm_l3_desc sdm845_icc_osm_l3 = {
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-static const struct qcom_osm_l3_desc sc7180_icc_osm_l3 = {
-	.nodes = osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
-	.lut_row_size = OSM_LUT_ROW_SIZE,
-	.reg_freq_lut = OSM_REG_FREQ_LUT,
-	.reg_perf_state = OSM_REG_PERF_STATE,
-};
-
-static const struct qcom_osm_l3_desc sc7280_icc_epss_l3 = {
-	.nodes = epss_l3_nodes,
-	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
-	.lut_row_size = EPSS_LUT_ROW_SIZE,
-	.reg_freq_lut = EPSS_REG_FREQ_LUT,
-	.reg_perf_state = EPSS_REG_PERF_STATE,
-};
-
-static const struct qcom_osm_l3_desc sc8180x_icc_osm_l3 = {
-	.nodes = osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
-	.lut_row_size = OSM_LUT_ROW_SIZE,
-	.reg_freq_lut = OSM_REG_FREQ_LUT,
-	.reg_perf_state = OSM_REG_PERF_STATE,
-};
-
-static const struct qcom_osm_l3_desc sm8150_icc_osm_l3 = {
-	.nodes = osm_l3_nodes,
-	.num_nodes = ARRAY_SIZE(osm_l3_nodes),
-	.lut_row_size = OSM_LUT_ROW_SIZE,
-	.reg_freq_lut = OSM_REG_FREQ_LUT,
-	.reg_perf_state = OSM_REG_PERF_STATE,
-};
-
-static const struct qcom_osm_l3_desc sm8250_icc_epss_l3 = {
+static const struct qcom_osm_l3_desc epss_l3 = {
 	.nodes = epss_l3_nodes,
 	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
 	.lut_row_size = EPSS_LUT_ROW_SIZE,
@@ -317,12 +285,12 @@ err:
 }
 
 static const struct of_device_id osm_l3_of_match[] = {
-	{ .compatible = "qcom,sc7180-osm-l3", .data = &sc7180_icc_osm_l3 },
-	{ .compatible = "qcom,sc7280-epss-l3", .data = &sc7280_icc_epss_l3 },
-	{ .compatible = "qcom,sdm845-osm-l3", .data = &sdm845_icc_osm_l3 },
-	{ .compatible = "qcom,sm8150-osm-l3", .data = &sm8150_icc_osm_l3 },
-	{ .compatible = "qcom,sc8180x-osm-l3", .data = &sc8180x_icc_osm_l3 },
-	{ .compatible = "qcom,sm8250-epss-l3", .data = &sm8250_icc_epss_l3 },
+	{ .compatible = "qcom,sc7180-osm-l3", .data = &osm_l3 },
+	{ .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3 },
+	{ .compatible = "qcom,sdm845-osm-l3", .data = &osm_l3 },
+	{ .compatible = "qcom,sm8150-osm-l3", .data = &osm_l3 },
+	{ .compatible = "qcom,sc8180x-osm-l3", .data = &osm_l3 },
+	{ .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3 },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, osm_l3_of_match);
-- 
GitLab


From 9235253ec73dfd71cc83d154693476930fc8dd77 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:08 -0800
Subject: [PATCH 1472/4122] interconnect: qcom: osm-l3: Add per-core EPSS L3
 support

The EPSS instance in e.g. SM8350 and SC8280XP has per-core L3 voting
enabled. In this configuration, the "shared" vote is done using the
REG_L3_VOTE register instead of PERF_STATE.

Rename epss_l3 to clarify that it's affecting the PERF_STATE register
and add a new L3_VOTE description. Given platform lineage it's assumed
that the L3_VOTE-based case will be the predominant one, so use this for
a new generic qcom,epss-l3 compatible.

While adding the EPSS generic, also add qcom,osm-l3.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-4-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index 7d68442532412..469be732a00bb 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -34,6 +34,7 @@
 
 /* EPSS Register offsets */
 #define EPSS_LUT_ROW_SIZE		4
+#define EPSS_REG_L3_VOTE		0x90
 #define EPSS_REG_FREQ_LUT		0x100
 #define EPSS_REG_PERF_STATE		0x320
 
@@ -112,7 +113,7 @@ static const struct qcom_osm_l3_desc osm_l3 = {
 	.reg_perf_state = OSM_REG_PERF_STATE,
 };
 
-static const struct qcom_osm_l3_desc epss_l3 = {
+static const struct qcom_osm_l3_desc epss_l3_perf_state = {
 	.nodes = epss_l3_nodes,
 	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
 	.lut_row_size = EPSS_LUT_ROW_SIZE,
@@ -120,6 +121,14 @@ static const struct qcom_osm_l3_desc epss_l3 = {
 	.reg_perf_state = EPSS_REG_PERF_STATE,
 };
 
+static const struct qcom_osm_l3_desc epss_l3_l3_vote = {
+	.nodes = epss_l3_nodes,
+	.num_nodes = ARRAY_SIZE(epss_l3_nodes),
+	.lut_row_size = EPSS_LUT_ROW_SIZE,
+	.reg_freq_lut = EPSS_REG_FREQ_LUT,
+	.reg_perf_state = EPSS_REG_L3_VOTE,
+};
+
 static int qcom_osm_l3_set(struct icc_node *src, struct icc_node *dst)
 {
 	struct qcom_osm_l3_icc_provider *qp;
@@ -285,12 +294,14 @@ err:
 }
 
 static const struct of_device_id osm_l3_of_match[] = {
+	{ .compatible = "qcom,epss-l3", .data = &epss_l3_l3_vote },
+	{ .compatible = "qcom,osm-l3", .data = &osm_l3 },
 	{ .compatible = "qcom,sc7180-osm-l3", .data = &osm_l3 },
-	{ .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3 },
+	{ .compatible = "qcom,sc7280-epss-l3", .data = &epss_l3_perf_state },
 	{ .compatible = "qcom,sdm845-osm-l3", .data = &osm_l3 },
 	{ .compatible = "qcom,sm8150-osm-l3", .data = &osm_l3 },
 	{ .compatible = "qcom,sc8180x-osm-l3", .data = &osm_l3 },
-	{ .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3 },
+	{ .compatible = "qcom,sm8250-epss-l3", .data = &epss_l3_perf_state },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, osm_l3_of_match);
-- 
GitLab


From b6bcef163ae0c4329187eea8431a735a60b1d7bb Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:09 -0800
Subject: [PATCH 1473/4122] interconnect: qcom: osm-l3: Simplify osm_l3_set()

The aggregation over votes for all nodes in the provider will always
only find the bandwidth votes for the destination side of the path.
Further more, the average kBps value will always be 0.

Simplify the logic by directly looking at the destination node's peak
bandwidth request.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-5-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/osm-l3.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/interconnect/qcom/osm-l3.c b/drivers/interconnect/qcom/osm-l3.c
index 469be732a00bb..5fa1710874258 100644
--- a/drivers/interconnect/qcom/osm-l3.c
+++ b/drivers/interconnect/qcom/osm-l3.c
@@ -134,22 +134,14 @@ static int qcom_osm_l3_set(struct icc_node *src, struct icc_node *dst)
 	struct qcom_osm_l3_icc_provider *qp;
 	struct icc_provider *provider;
 	const struct qcom_osm_l3_node *qn;
-	struct icc_node *n;
 	unsigned int index;
-	u32 agg_peak = 0;
-	u32 agg_avg = 0;
 	u64 rate;
 
 	qn = src->data;
 	provider = src->provider;
 	qp = to_osm_l3_provider(provider);
 
-	list_for_each_entry(n, &provider->nodes, node_list)
-		provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
-				    &agg_avg, &agg_peak);
-
-	rate = max(agg_avg, agg_peak);
-	rate = icc_units_to_bps(rate);
+	rate = icc_units_to_bps(dst->peak_bw);
 	do_div(rate, qn->buswidth);
 
 	for (index = 0; index < qp->max_state - 1; index++) {
-- 
GitLab


From 6313f4b5a438023c0d20960f19df16483cbbb8d7 Mon Sep 17 00:00:00 2001
From: Melody Olvera <quic_molvera@quicinc.com>
Date: Wed, 26 Oct 2022 12:05:33 -0700
Subject: [PATCH 1474/4122] dt-bindings: arm-smmu: Add 'compatible' for QDU1000
 and QRU1000

Add compatible bindings for Qualcomm QDU1000 and QRU1000 platforms.

Signed-off-by: Melody Olvera <quic_molvera@quicinc.com>
Link: https://lore.kernel.org/r/20221026190534.4004945-2-quic_molvera@quicinc.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 9066e6df1ba10..6258302ed9e75 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -34,6 +34,7 @@ properties:
         items:
           - enum:
               - qcom,qcm2290-smmu-500
+              - qcom,qdu1000-smmu-500
               - qcom,sc7180-smmu-500
               - qcom,sc7280-smmu-500
               - qcom,sc8180x-smmu-500
-- 
GitLab


From 7b52f53ce1914f5b3542665ff6a373ee858161c9 Mon Sep 17 00:00:00 2001
From: Melody Olvera <quic_molvera@quicinc.com>
Date: Wed, 26 Oct 2022 12:05:34 -0700
Subject: [PATCH 1475/4122] drivers: arm-smmu-impl: Add QDU1000 and QRU1000
 iommu implementation

Add compatible for Qualcomm QDU1000 and QRU1000 SoCs to add iommu
support for them.

Signed-off-by: Melody Olvera <quic_molvera@quicinc.com>
Link: https://lore.kernel.org/r/20221026190534.4004945-3-quic_molvera@quicinc.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index b2708de25ea34..0580a381a04ba 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -426,6 +426,7 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,msm8998-smmu-v2" },
 	{ .compatible = "qcom,qcm2290-smmu-500" },
+	{ .compatible = "qcom,qdu1000-smmu-500" },
 	{ .compatible = "qcom,sc7180-smmu-500" },
 	{ .compatible = "qcom,sc7280-smmu-500" },
 	{ .compatible = "qcom,sc8180x-smmu-500" },
-- 
GitLab


From 728b22a57232a1738d87b2148908a81615240a37 Mon Sep 17 00:00:00 2001
From: Adam Skladowski <a39.skl@gmail.com>
Date: Sun, 30 Oct 2022 11:42:54 +0200
Subject: [PATCH 1476/4122] dt-bindings: arm-smmu: Add compatible for Qualcomm
 SM6115

Add compatible for the Qualcomm SM6115 platform to the ARM SMMU
DeviceTree binding.

Signed-off-by: Adam Skladowski <a39.skl@gmail.com>
Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221030094258.486428-5-iskren.chernev@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 6258302ed9e75..406b98d090f62 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -42,6 +42,7 @@ properties:
               - qcom,sdm845-smmu-500
               - qcom,sdx55-smmu-500
               - qcom,sdx65-smmu-500
+              - qcom,sm6115-smmu-500
               - qcom,sm6350-smmu-500
               - qcom,sm6375-smmu-500
               - qcom,sm8150-smmu-500
-- 
GitLab


From 2fd6e1ad7e199c1ef54341a54fa4e11edc31b63c Mon Sep 17 00:00:00 2001
From: Adam Skladowski <a39.skl@gmail.com>
Date: Sun, 30 Oct 2022 11:42:55 +0200
Subject: [PATCH 1477/4122] iommu/arm-smmu-qcom: Add SM6115 support

Add the Qualcomm SM6115 platform to the list of compatible,
this target uses MMU500 for both APSS and GPU.

Signed-off-by: Adam Skladowski <a39.skl@gmail.com>
Signed-off-by: Iskren Chernev <iskren.chernev@gmail.com>
Link: https://lore.kernel.org/r/20221030094258.486428-6-iskren.chernev@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 0580a381a04ba..0f4eaf217983d 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -433,6 +433,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sc8280xp-smmu-500" },
 	{ .compatible = "qcom,sdm630-smmu-v2" },
 	{ .compatible = "qcom,sdm845-smmu-500" },
+	{ .compatible = "qcom,sm6115-smmu-500" },
 	{ .compatible = "qcom,sm6125-smmu-500" },
 	{ .compatible = "qcom,sm6350-smmu-500" },
 	{ .compatible = "qcom,sm6375-smmu-500" },
-- 
GitLab


From 8d3a9ec6ae2886305fdd03652592b3c7ffea672b Mon Sep 17 00:00:00 2001
From: Richard Acayan <mailingradian@gmail.com>
Date: Thu, 10 Nov 2022 19:36:06 -0500
Subject: [PATCH 1478/4122] dt-bindings: iommu: arm-smmu: add sdm670 compatible

The Snapdragon 670 needs the IOMMU for GENI I2C. Add a compatible string in
the documentation to represent its support.

Signed-off-by: Richard Acayan <mailingradian@gmail.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221111003606.126795-2-mailingradian@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 406b98d090f62..3ade2dbca70ee 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -39,6 +39,7 @@ properties:
               - qcom,sc7280-smmu-500
               - qcom,sc8180x-smmu-500
               - qcom,sc8280xp-smmu-500
+              - qcom,sdm670-smmu-500
               - qcom,sdm845-smmu-500
               - qcom,sdx55-smmu-500
               - qcom,sdx65-smmu-500
-- 
GitLab


From 4ea0be1f0db588afdb2d4d94dfa921b01f34a3c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:07 -0800
Subject: [PATCH 1479/4122] perf stat: Increase metric length to align outputs

When perf stat is called with very detailed events, the output doesn't
align well like below:

  $ sudo perf stat -a -ddd sleep 1

   Performance counter stats for 'system wide':

          8,020.23 msec cpu-clock                        #    7.997 CPUs utilized
             3,970      context-switches                 #  494.998 /sec
               169      cpu-migrations                   #   21.072 /sec
               586      page-faults                      #   73.065 /sec
       649,568,060      cycles                           #    0.081 GHz                      (30.42%)
       304,044,345      instructions                     #    0.47  insn per cycle           (38.40%)
        60,313,022      branches                         #    7.520 M/sec                    (38.89%)
         2,766,919      branch-misses                    #    4.59% of all branches          (39.26%)
        74,422,951      L1-dcache-loads                  #    9.279 M/sec                    (39.39%)
         8,025,568      L1-dcache-load-misses            #   10.78% of all L1-dcache accesses  (39.22%)
         3,314,995      LLC-loads                        #  413.329 K/sec                    (30.83%)
         1,225,619      LLC-load-misses                  #   36.97% of all LL-cache accesses  (30.45%)
   <not supported>      L1-icache-loads
        20,420,493      L1-icache-load-misses            #    0.00% of all L1-icache accesses  (30.29%)
        58,017,947      dTLB-loads                       #    7.234 M/sec                    (30.37%)
           704,677      dTLB-load-misses                 #    1.21% of all dTLB cache accesses  (30.27%)
           234,225      iTLB-loads                       #   29.204 K/sec                    (30.29%)
           417,166      iTLB-load-misses                 #  178.10% of all iTLB cache accesses  (30.32%)
   <not supported>      L1-dcache-prefetches
   <not supported>      L1-dcache-prefetch-misses

       1.002947355 seconds time elapsed

Increase the METRIC_LEN by 3 so that it can align properly.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 657434cd29eea..576283afa319c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -218,7 +218,7 @@ struct outstate {
 	struct evsel *evsel;
 };
 
-#define METRIC_LEN  35
+#define METRIC_LEN  38
 
 static void new_line_std(struct perf_stat_config *config __maybe_unused,
 			 void *ctx)
-- 
GitLab


From 81a02c6577ecfee7056ccafbd028984d0d670c0c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:08 -0800
Subject: [PATCH 1480/4122] perf stat: Clear screen only if output file is a
 tty

The --interval-clear option makes perf stat to clear the terminal at
each interval.  But it doesn't need to clear the screen when it saves
to a file.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 576283afa319c..ccb804546d5a3 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -890,7 +890,7 @@ static void print_interval(struct perf_stat_config *config,
 	FILE *output = config->output;
 	static int num_print_interval;
 
-	if (config->interval_clear)
+	if (config->interval_clear && isatty(fileno(output)))
 		puts(CONSOLE_CLEAR);
 
 	if (!config->iostat_run && !config->json_output)
-- 
GitLab


From f4e55f88da923f39f0b76edc3da3c52d0b72d429 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:09 -0800
Subject: [PATCH 1481/4122] perf stat: Move common code in
 print_metric_headers()

The struct perf_stat_output_ctx is set in a loop with the same values.
Move the code out of the loop and keep the loop minimal.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ccb804546d5a3..173f4715189c9 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -835,11 +835,16 @@ static void print_metric_headers(struct perf_stat_config *config,
 				 struct evlist *evlist,
 				 const char *prefix, bool no_indent)
 {
-	struct perf_stat_output_ctx out;
 	struct evsel *counter;
 	struct outstate os = {
 		.fh = config->output
 	};
+	struct perf_stat_output_ctx out = {
+		.ctx = &os,
+		.print_metric = print_metric_header,
+		.new_line = new_line_metric,
+		.force_header = true,
+	};
 	bool first = true;
 
 	if (config->json_output && !config->interval)
@@ -863,13 +868,11 @@ static void print_metric_headers(struct perf_stat_config *config,
 	/* Print metrics headers only */
 	evlist__for_each_entry(evlist, counter) {
 		os.evsel = counter;
-		out.ctx = &os;
-		out.print_metric = print_metric_header;
+
 		if (!first && config->json_output)
 			fprintf(config->output, ", ");
 		first = false;
-		out.new_line = new_line_metric;
-		out.force_header = true;
+
 		perf_stat__print_shadow_stats(config, counter, 0,
 					      0,
 					      &out,
-- 
GitLab


From fdc7d6082459aa6705dd39b827214f6cfc1fa054 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:10 -0800
Subject: [PATCH 1482/4122] perf stat: Fix --metric-only --json output

Currently it prints all metric headers for JSON output.  But actually it
skips some metrics with valid_only_metric().  So the output looks like:

  $ perf stat --metric-only --json true
  {"unit" : "CPUs utilized", "unit" : "/sec", "unit" : "/sec", "unit" : "/sec", "unit" : "GHz", "unit" : "insn per cycle", "unit" : "/sec", "unit" : "branch-misses of all branches"}
  {"metric-value" : "3.861"}{"metric-value" : "0.79"}{"metric-value" : "3.04"}

As you can see there are 8 units in the header but only 3 metric-values
are there.  It should skip the unused headers as well.  Also each unit
should be printed as a separate object like metric values.

With this patch:

  $ perf stat --metric-only --json true
  {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"}
  {"metric-value" : "4.166"}{"metric-value" : "0.73"}{"metric-value" : "2.96"}

Fixes: df936cadfb58ba93 ("perf stat: Add JSON output option")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Claire Jensen <cjense@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 173f4715189c9..5c10cf49fd12c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -430,12 +430,12 @@ static void print_metric_header(struct perf_stat_config *config,
 	    os->evsel->priv != os->evsel->evlist->selected->priv)
 		return;
 
-	if (!valid_only_metric(unit) && !config->json_output)
+	if (!valid_only_metric(unit))
 		return;
 	unit = fixunit(tbuf, os->evsel, unit);
 
 	if (config->json_output)
-		fprintf(os->fh, "\"unit\" : \"%s\"", unit);
+		fprintf(os->fh, "{\"unit\" : \"%s\"}", unit);
 	else if (config->csv_output)
 		fprintf(os->fh, "%s%s", unit, config->csv_sep);
 	else
@@ -845,10 +845,6 @@ static void print_metric_headers(struct perf_stat_config *config,
 		.new_line = new_line_metric,
 		.force_header = true,
 	};
-	bool first = true;
-
-	if (config->json_output && !config->interval)
-		fprintf(config->output, "{");
 
 	if (prefix && !config->json_output)
 		fprintf(config->output, "%s", prefix);
@@ -869,18 +865,12 @@ static void print_metric_headers(struct perf_stat_config *config,
 	evlist__for_each_entry(evlist, counter) {
 		os.evsel = counter;
 
-		if (!first && config->json_output)
-			fprintf(config->output, ", ");
-		first = false;
-
 		perf_stat__print_shadow_stats(config, counter, 0,
 					      0,
 					      &out,
 					      &config->metric_events,
 					      &rt_stat);
 	}
-	if (config->json_output)
-		fprintf(config->output, "}");
 	fputc('\n', config->output);
 }
 
@@ -952,14 +942,8 @@ static void print_interval(struct perf_stat_config *config,
 		}
 	}
 
-	if ((num_print_interval == 0 || config->interval_clear)
-			 && metric_only && !config->json_output)
+	if ((num_print_interval == 0 || config->interval_clear) && metric_only)
 		print_metric_headers(config, evlist, " ", true);
-	if ((num_print_interval == 0 || config->interval_clear)
-			 && metric_only && config->json_output) {
-		fprintf(output, "{");
-		print_metric_headers(config, evlist, " ", true);
-	}
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
 }
-- 
GitLab


From 6d0a7e394eabd456f70c2f2f8e958b2343074360 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:11 -0800
Subject: [PATCH 1483/4122] perf stat: Do not indent headers for JSON

Currently --metric-only with --json indents header lines.  This is not
needed for JSON.

  $ perf stat -aA --metric-only -j true
        {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"}
  {"cpu" : "0", {"metric-value" : "0.101"}{"metric-value" : "0.86"}{"metric-value" : "1.91"}
  {"cpu" : "1", {"metric-value" : "0.102"}{"metric-value" : "0.87"}{"metric-value" : "2.02"}
  {"cpu" : "2", {"metric-value" : "0.085"}{"metric-value" : "1.02"}{"metric-value" : "1.69"}
  ...

Note that the other lines are broken JSON, but it will be handled later.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 5c10cf49fd12c..99bc0db8f068a 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -849,7 +849,7 @@ static void print_metric_headers(struct perf_stat_config *config,
 	if (prefix && !config->json_output)
 		fprintf(config->output, "%s", prefix);
 
-	if (!config->csv_output && !no_indent)
+	if (!config->csv_output && !config->json_output && !no_indent)
 		fprintf(config->output, "%*s",
 			aggr_header_lens[config->aggr_mode], "");
 	if (config->csv_output) {
-- 
GitLab


From 1cc7642abba7b281ecd836bfb56fc6dedac32555 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:12 -0800
Subject: [PATCH 1484/4122] perf stat: Add header for interval in JSON output

It missed to print a matching header line for intervals.

Before:

  # perf stat -a -e cycles,instructions --metric-only -j -I 500
  {"unit" : "insn per cycle"}
  {"interval" : 0.500544283}{"metric-value" : "1.96"}
  ^C

After:

  # perf stat -a -e cycles,instructions --metric-only -j -I 500
  {"unit" : "sec"}{"unit" : "insn per cycle"}
  {"interval" : 0.500515681}{"metric-value" : "2.31"}
  ^C

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 99bc0db8f068a..2a08c66121ec5 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -858,6 +858,10 @@ static void print_metric_headers(struct perf_stat_config *config,
 		if (!config->iostat_run)
 			fputs(aggr_header_csv[config->aggr_mode], config->output);
 	}
+	if (config->json_output) {
+		if (config->interval)
+			fputs("{\"unit\" : \"sec\"}", config->output);
+	}
 	if (config->iostat_run)
 		iostat_print_header_prefix(config);
 
-- 
GitLab


From f1db5a1d1d4de248534567fd95efa570bc693f73 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:13 -0800
Subject: [PATCH 1485/4122] perf stat: Fix condition in print_interval()

The num_print_interval and config->interval_clear should be checked
together like other places like later in the function.  Otherwise,
the --interval-clear option could print the headers for the CSV or
JSON output unnecessarily.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 2a08c66121ec5..7b2ec400813e9 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -900,8 +900,8 @@ static void print_interval(struct perf_stat_config *config,
 		sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
 				 ts->tv_sec, ts->tv_nsec);
 
-	if ((num_print_interval == 0 && !config->csv_output && !config->json_output)
-			 || config->interval_clear) {
+	if ((num_print_interval == 0 || config->interval_clear) &&
+			!config->csv_output && !config->json_output) {
 		switch (config->aggr_mode) {
 		case AGGR_NODE:
 			fprintf(output, "#           time node   cpus");
-- 
GitLab


From 20e2e31779377b36ada04c06d572ce0b4e234bb1 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 7 Nov 2022 13:33:14 -0800
Subject: [PATCH 1486/4122] perf stat: Consolidate condition to print metrics

The pm variable holds an appropriate function to print metrics for CSV
anf JSON already.  So we can combine the if statement to simplify the
code a little bit.  This also matches to the above condition for non-CSV
and non-JSON case.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221107213314.3239159-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7b2ec400813e9..cb2a116bded6d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -600,9 +600,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 			pm(config, &os, NULL, NULL, "", 0);
 		print_noise(config, counter, noise);
 		print_running(config, run, ena);
-		if (config->csv_output)
-			pm(config, &os, NULL, NULL, "", 0);
-		else if (config->json_output)
+		if (config->csv_output || config->json_output)
 			pm(config, &os, NULL, NULL, "", 0);
 		return;
 	}
-- 
GitLab


From dbf88f74358338cc444933346a0a57635fbb4c94 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:26 +0300
Subject: [PATCH 1487/4122] dt-bindings: arm-smmu: Add missing Qualcomm SMMU
 compatibles

Add missing compatibles used for Adreno SMMU on sc7280 and sm8450
platforms and for the Qualcomm v2 SMMU used on SDM630 platform.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-2-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 3ade2dbca70ee..aa863811996f1 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -28,6 +28,7 @@ properties:
           - enum:
               - qcom,msm8996-smmu-v2
               - qcom,msm8998-smmu-v2
+              - qcom,sdm630-smmu-v2
           - const: qcom,smmu-v2
 
       - description: Qcom SoCs implementing "arm,mmu-500"
@@ -51,10 +52,20 @@ properties:
               - qcom,sm8350-smmu-500
               - qcom,sm8450-smmu-500
           - const: arm,mmu-500
+
+      - description: Qcom Adreno GPUs implementing "arm,smmu-500"
+        items:
+          - enum:
+              - qcom,sc7280-smmu-500
+              - qcom,sm8250-smmu-500
+          - const: qcom,adreno-smmu
+          - const: arm,mmu-500
       - description: Qcom Adreno GPUs implementing "arm,smmu-v2"
         items:
           - enum:
+              - qcom,msm8996-smmu-v2
               - qcom,sc7180-smmu-v2
+              - qcom,sdm630-smmu-v2
               - qcom,sdm845-smmu-v2
           - const: qcom,adreno-smmu
           - const: qcom,smmu-v2
-- 
GitLab


From 982295bfe36925919ab61aab0657528541a2aa83 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:27 +0300
Subject: [PATCH 1488/4122] dt-bindings: arm-smmu: fix clocks/clock-names
 schema

Rework clocks/clock-names properties schema to properly describe
possible usage cases.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-3-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../devicetree/bindings/iommu/arm,smmu.yaml   | 129 ++++++++++++++++--
 1 file changed, 121 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index aa863811996f1..99f34a40cfe20 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -161,16 +161,12 @@ properties:
       present in such cases.
 
   clock-names:
-    items:
-      - const: bus
-      - const: iface
+    minItems: 1
+    maxItems: 7
 
   clocks:
-    items:
-      - description: bus clock required for downstream bus access and for the
-          smmu ptw
-      - description: interface clock required to access smmu's registers
-          through the TCU's programming interface.
+    minItems: 1
+    maxItems: 7
 
   power-domains:
     maxItems: 1
@@ -220,6 +216,123 @@ allOf:
         reg:
           maxItems: 1
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,msm8998-smmu-v2
+              - qcom,sdm630-smmu-v2
+    then:
+      anyOf:
+        - properties:
+            clock-names:
+              items:
+                - const: bus
+            clocks:
+              items:
+                - description: bus clock required for downstream bus access and for
+                    the smmu ptw
+        - properties:
+            clock-names:
+              items:
+                - const: iface
+                - const: mem
+                - const: mem_iface
+            clocks:
+              items:
+                - description: interface clock required to access smmu's registers
+                    through the TCU's programming interface.
+                - description: bus clock required for memory access
+                - description: bus clock required for GPU memory access
+        - properties:
+            clock-names:
+              items:
+                - const: iface-mm
+                - const: iface-smmu
+                - const: bus-mm
+                - const: bus-smmu
+            clocks:
+              items:
+                - description: interface clock required to access mnoc's registers
+                    through the TCU's programming interface.
+                - description: interface clock required to access smmu's registers
+                    through the TCU's programming interface.
+                - description: bus clock required for downstream bus access
+                - description: bus clock required for the smmu ptw
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,msm8996-smmu-v2
+              - qcom,sc7180-smmu-v2
+              - qcom,sdm845-smmu-v2
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: bus
+            - const: iface
+
+        clocks:
+          items:
+            - description: bus clock required for downstream bus access and for
+                the smmu ptw
+            - description: interface clock required to access smmu's registers
+                through the TCU's programming interface.
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,sc7280-smmu-500
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: gcc_gpu_memnoc_gfx_clk
+            - const: gcc_gpu_snoc_dvm_gfx_clk
+            - const: gpu_cc_ahb_clk
+            - const: gpu_cc_hlos1_vote_gpu_smmu_clk
+            - const: gpu_cc_cx_gmu_clk
+            - const: gpu_cc_hub_cx_int_clk
+            - const: gpu_cc_hub_aon_clk
+
+        clocks:
+          items:
+            - description: GPU memnoc_gfx clock
+            - description: GPU snoc_dvm_gfx clock
+            - description: GPU ahb clock
+            - description: GPU hlos1_vote_GPU smmu clock
+            - description: GPU cx_gmu clock
+            - description: GPU hub_cx_int clock
+            - description: GPU hub_aon clock
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,sm8150-smmu-500
+              - qcom,sm8250-smmu-500
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: ahb
+            - const: bus
+            - const: iface
+
+        clocks:
+          items:
+            - description: bus clock required for AHB bus access
+            - description: bus clock required for downstream bus access and for
+                the smmu ptw
+            - description: interface clock required to access smmu's registers
+                through the TCU's programming interface.
+
 examples:
   - |+
     /* SMMU with stream matching or stream indexing */
-- 
GitLab


From 3a12e8c065362f0d900a4a93ee60565253d7fde7 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:28 +0300
Subject: [PATCH 1489/4122] dt-bindings: arm-smmu: add special case for Google
 Cheza platform

Cheza fw does not properly program the GPU aperture to allow the
GPU to update the SMMU pagetables for context switches. The board file
works around this by dropping the "qcom,adreno-smmu" compat string.
Add this usecase to arm,smmu.yaml schema.

Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-4-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 99f34a40cfe20..29a8b3ff8fa00 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -69,6 +69,10 @@ properties:
               - qcom,sdm845-smmu-v2
           - const: qcom,adreno-smmu
           - const: qcom,smmu-v2
+      - description: Qcom Adreno GPUs on Google Cheza platform
+        items:
+          - const: qcom,sdm845-smmu-v2
+          - const: qcom,smmu-v2
       - description: Marvell SoCs implementing "arm,mmu-500"
         items:
           - const: marvell,ap806-smmu-500
-- 
GitLab


From 6c84bbd103d85696af9cc0f746c01f9b2847637e Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:29 +0300
Subject: [PATCH 1490/4122] dt-bindings: arm-smmu: Add generic qcom,smmu-500
 bindings

Add generic bindings for the Qualcomm variant of the ARM MMU-500. It is
expected that all future platforms will use the generic qcom,smmu-500
compat string in addition to SoC-specific and the generic arm,mmu-500
ones. Older bindings are now described as deprecated.

Note: I have split the sdx55 and sdx65 from the legacy bindings. They
are not supported by the qcom SMMU implementation. I can suppose that
they are using the generic implementation rather than the
Qualcomm-speicific one.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-5-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../devicetree/bindings/iommu/arm,smmu.yaml   | 29 ++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 29a8b3ff8fa00..28f5720824cd3 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -31,7 +31,7 @@ properties:
               - qcom,sdm630-smmu-v2
           - const: qcom,smmu-v2
 
-      - description: Qcom SoCs implementing "arm,mmu-500"
+      - description: Qcom SoCs implementing "qcom,smmu-500" and "arm,mmu-500"
         items:
           - enum:
               - qcom,qcm2290-smmu-500
@@ -42,8 +42,35 @@ properties:
               - qcom,sc8280xp-smmu-500
               - qcom,sdm670-smmu-500
               - qcom,sdm845-smmu-500
+              - qcom,sm6115-smmu-500
+              - qcom,sm6350-smmu-500
+              - qcom,sm6375-smmu-500
+              - qcom,sm8150-smmu-500
+              - qcom,sm8250-smmu-500
+              - qcom,sm8350-smmu-500
+              - qcom,sm8450-smmu-500
+          - const: qcom,smmu-500
+          - const: arm,mmu-500
+
+      - description: Qcom SoCs implementing "arm,mmu-500" (non-qcom implementation)
+        deprecated: true
+        items:
+          - enum:
               - qcom,sdx55-smmu-500
               - qcom,sdx65-smmu-500
+          - const: arm,mmu-500
+
+      - description: Qcom SoCs implementing "arm,mmu-500" (legacy binding)
+        deprecated: true
+        items:
+          # Do not add additional SoC to this list. Instead use two previous lists.
+          - enum:
+              - qcom,qcm2290-smmu-500
+              - qcom,sc7180-smmu-500
+              - qcom,sc7280-smmu-500
+              - qcom,sc8180x-smmu-500
+              - qcom,sc8280xp-smmu-500
+              - qcom,sdm845-smmu-500
               - qcom,sm6115-smmu-500
               - qcom,sm6350-smmu-500
               - qcom,sm6375-smmu-500
-- 
GitLab


From 4c1d0ad153f8bca09776da6031639d3b965d849a Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:30 +0300
Subject: [PATCH 1491/4122] iommu/arm-smmu-qcom: Move implementation data into
 match data

In preparation to rework of the implementation and configuration
details, make qcom_smmu_create() accept new qcom_smmu_match_data
structure pointer. Make implementation a field in this struct.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-6-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 62 ++++++++++++++--------
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h |  4 ++
 2 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 0f4eaf217983d..a7bd49e44bcac 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -405,10 +405,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 };
 
 static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
-		const struct arm_smmu_impl *impl)
+		const struct qcom_smmu_match_data *data)
 {
+	const struct arm_smmu_impl *impl;
 	struct qcom_smmu *qsmmu;
 
+	if (!data)
+		return ERR_PTR(-EINVAL);
+
+	impl = data->impl;
+	if (!impl)
+		return smmu;
+
 	/* Check to make sure qcom_scm has finished probing */
 	if (!qcom_scm_is_available())
 		return ERR_PTR(-EPROBE_DEFER);
@@ -423,24 +431,32 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 	return &qsmmu->smmu;
 }
 
+static const struct qcom_smmu_match_data qcom_smmu_data = {
+	.impl = &qcom_smmu_impl,
+};
+
+static const struct qcom_smmu_match_data qcom_adreno_smmu_data = {
+	.impl = &qcom_adreno_smmu_impl,
+};
+
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
-	{ .compatible = "qcom,msm8998-smmu-v2" },
-	{ .compatible = "qcom,qcm2290-smmu-500" },
-	{ .compatible = "qcom,qdu1000-smmu-500" },
-	{ .compatible = "qcom,sc7180-smmu-500" },
-	{ .compatible = "qcom,sc7280-smmu-500" },
-	{ .compatible = "qcom,sc8180x-smmu-500" },
-	{ .compatible = "qcom,sc8280xp-smmu-500" },
-	{ .compatible = "qcom,sdm630-smmu-v2" },
-	{ .compatible = "qcom,sdm845-smmu-500" },
-	{ .compatible = "qcom,sm6115-smmu-500" },
-	{ .compatible = "qcom,sm6125-smmu-500" },
-	{ .compatible = "qcom,sm6350-smmu-500" },
-	{ .compatible = "qcom,sm6375-smmu-500" },
-	{ .compatible = "qcom,sm8150-smmu-500" },
-	{ .compatible = "qcom,sm8250-smmu-500" },
-	{ .compatible = "qcom,sm8350-smmu-500" },
-	{ .compatible = "qcom,sm8450-smmu-500" },
+	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data  },
+	{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data },
 	{ }
 };
 
@@ -455,12 +471,13 @@ static struct acpi_platform_list qcom_acpi_platlist[] = {
 struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 {
 	const struct device_node *np = smmu->dev->of_node;
+	const struct of_device_id *match;
 
 #ifdef CONFIG_ACPI
 	if (np == NULL) {
 		/* Match platform for ACPI boot */
 		if (acpi_match_platform_list(qcom_acpi_platlist) >= 0)
-			return qcom_smmu_create(smmu, &qcom_smmu_impl);
+			return qcom_smmu_create(smmu, &qcom_smmu_data);
 	}
 #endif
 
@@ -471,10 +488,11 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 	 * features if the order is changed.
 	 */
 	if (of_device_is_compatible(np, "qcom,adreno-smmu"))
-		return qcom_smmu_create(smmu, &qcom_adreno_smmu_impl);
+		return qcom_smmu_create(smmu, &qcom_adreno_smmu_data);
 
-	if (of_match_node(qcom_smmu_impl_of_match, np))
-		return qcom_smmu_create(smmu, &qcom_smmu_impl);
+	match = of_match_node(qcom_smmu_impl_of_match, np);
+	if (match)
+		return qcom_smmu_create(smmu, match->data);
 
 	return smmu;
 }
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
index 99ec8f8629a0d..2424f10b7110b 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -14,6 +14,10 @@ struct qcom_smmu {
 	u32 stall_enabled;
 };
 
+struct qcom_smmu_match_data {
+	const struct arm_smmu_impl *impl;
+};
+
 #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu);
 const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu);
-- 
GitLab


From 30b912a03d91727d75ae14f277b64aca8fb915e4 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:31 +0300
Subject: [PATCH 1492/4122] iommu/arm-smmu-qcom: Move the qcom,adreno-smmu
 check into qcom_smmu_create

Move special handling of qcom,adreno-smmu into qcom_smmu_create()
function. This allows us to further customize the Adreno SMMU
implementation.

Note, this also adds two entries to the qcom_smmu_impl_of_match table.
They were used with the qcom,adreno-smmu compat and were handled by the
removed clause.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-7-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 32 ++++++++++++----------
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h |  1 +
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index a7bd49e44bcac..e611941277728 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -407,13 +407,18 @@ static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		const struct qcom_smmu_match_data *data)
 {
+	const struct device_node *np = smmu->dev->of_node;
 	const struct arm_smmu_impl *impl;
 	struct qcom_smmu *qsmmu;
 
 	if (!data)
 		return ERR_PTR(-EINVAL);
 
-	impl = data->impl;
+	if (np && of_device_is_compatible(np, "qcom,adreno-smmu"))
+		impl = data->adreno_impl;
+	else
+		impl = data->impl;
+
 	if (!impl)
 		return smmu;
 
@@ -431,15 +436,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 	return &qsmmu->smmu;
 }
 
-static const struct qcom_smmu_match_data qcom_smmu_data = {
-	.impl = &qcom_smmu_impl,
+/*
+ * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996,
+ * there are not enough context banks.
+ */
+static const struct qcom_smmu_match_data msm8996_smmu_data = {
+	.impl = NULL,
+	.adreno_impl = &qcom_adreno_smmu_impl,
 };
 
-static const struct qcom_smmu_match_data qcom_adreno_smmu_data = {
-	.impl = &qcom_adreno_smmu_impl,
+static const struct qcom_smmu_match_data qcom_smmu_data = {
+	.impl = &qcom_smmu_impl,
+	.adreno_impl = &qcom_adreno_smmu_impl,
 };
 
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
+	{ .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data },
 	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data  },
@@ -448,6 +460,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data },
@@ -481,15 +494,6 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 	}
 #endif
 
-	/*
-	 * Do not change this order of implementation, i.e., first adreno
-	 * smmu impl and then apss smmu since we can have both implementing
-	 * arm,mmu-500 in which case we will miss setting adreno smmu specific
-	 * features if the order is changed.
-	 */
-	if (of_device_is_compatible(np, "qcom,adreno-smmu"))
-		return qcom_smmu_create(smmu, &qcom_adreno_smmu_data);
-
 	match = of_match_node(qcom_smmu_impl_of_match, np);
 	if (match)
 		return qcom_smmu_create(smmu, match->data);
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
index 2424f10b7110b..424d8d342ce06 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -16,6 +16,7 @@ struct qcom_smmu {
 
 struct qcom_smmu_match_data {
 	const struct arm_smmu_impl *impl;
+	const struct arm_smmu_impl *adreno_impl;
 };
 
 #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
-- 
GitLab


From 417b76adcf1d141666866eba5afdd42953f66e2f Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:32 +0300
Subject: [PATCH 1493/4122] iommu/arm-smmu-qcom: provide separate
 implementation for SDM845-smmu-500

There is only one platform, which needs special care in the reset
function, the SDM845. Add special handler for sdm845 and drop the
qcom_smmu500_reset() function.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-8-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 37 +++++++++++++---------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index e611941277728..6dc7fa9187991 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -361,6 +361,8 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
 {
 	int ret;
 
+	arm_mmu500_reset(smmu);
+
 	/*
 	 * To address performance degradation in non-real time clients,
 	 * such as USB and UFS, turn off wait-for-safe on sdm845 based boards,
@@ -374,23 +376,20 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
 	return ret;
 }
 
-static int qcom_smmu500_reset(struct arm_smmu_device *smmu)
-{
-	const struct device_node *np = smmu->dev->of_node;
-
-	arm_mmu500_reset(smmu);
-
-	if (of_device_is_compatible(np, "qcom,sdm845-smmu-500"))
-		return qcom_sdm845_smmu500_reset(smmu);
-
-	return 0;
-}
-
 static const struct arm_smmu_impl qcom_smmu_impl = {
 	.init_context = qcom_smmu_init_context,
 	.cfg_probe = qcom_smmu_cfg_probe,
 	.def_domain_type = qcom_smmu_def_domain_type,
-	.reset = qcom_smmu500_reset,
+	.reset = arm_mmu500_reset,
+	.write_s2cr = qcom_smmu_write_s2cr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+static const struct arm_smmu_impl sdm845_smmu_500_impl = {
+	.init_context = qcom_smmu_init_context,
+	.cfg_probe = qcom_smmu_cfg_probe,
+	.def_domain_type = qcom_smmu_def_domain_type,
+	.reset = qcom_sdm845_smmu500_reset,
 	.write_s2cr = qcom_smmu_write_s2cr,
 	.tlb_sync = qcom_smmu_tlb_sync,
 };
@@ -398,7 +397,7 @@ static const struct arm_smmu_impl qcom_smmu_impl = {
 static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
 	.init_context = qcom_adreno_smmu_init_context,
 	.def_domain_type = qcom_smmu_def_domain_type,
-	.reset = qcom_smmu500_reset,
+	.reset = arm_mmu500_reset,
 	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
 	.write_sctlr = qcom_adreno_smmu_write_sctlr,
 	.tlb_sync = qcom_smmu_tlb_sync,
@@ -450,6 +449,14 @@ static const struct qcom_smmu_match_data qcom_smmu_data = {
 	.adreno_impl = &qcom_adreno_smmu_impl,
 };
 
+static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
+	.impl = &sdm845_smmu_500_impl,
+	/*
+	 * No need for adreno impl here. On sdm845 the Adreno SMMU is handled
+	 * by the separate sdm845-smmu-v2 device.
+	 */
+};
+
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data },
 	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data },
@@ -461,7 +468,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sdm845-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
 	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data },
-- 
GitLab


From 4172dda2b30a9a0e628e81d2a3bc9a6ef0936774 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:33 +0300
Subject: [PATCH 1494/4122] iommu/arm-smmu-qcom: Merge table from
 arm-smmu-qcom-debug into match data

There is little point in having a separate match table in
arm-smmu-qcom-debug.c. Merge it into the main match data table in
arm-smmu-qcom.c

Note, this also enables debug support for qdu1000, sm6115, sm6375 and
ACPI-based sc8180x systems, since these SoCs are expected to support
tlb_sync debug.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-9-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 .../iommu/arm/arm-smmu/arm-smmu-qcom-debug.c  | 91 -------------------
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c    | 50 ++++++----
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h    | 16 +++-
 3 files changed, 45 insertions(+), 112 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
index 6eed8e67a0ca8..74e9ef2fd5804 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom-debug.c
@@ -10,16 +10,6 @@
 #include "arm-smmu.h"
 #include "arm-smmu-qcom.h"
 
-enum qcom_smmu_impl_reg_offset {
-	QCOM_SMMU_TBU_PWR_STATUS,
-	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
-	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
-};
-
-struct qcom_smmu_config {
-	const u32 *reg_offset;
-};
-
 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
 {
 	int ret;
@@ -59,84 +49,3 @@ void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu)
 			tbu_pwr_status, sync_inv_ack, sync_inv_progress);
 	}
 }
-
-/* Implementation Defined Register Space 0 register offsets */
-static const u32 qcom_smmu_impl0_reg_offset[] = {
-	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
-	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
-	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
-};
-
-static const struct qcom_smmu_config qcm2290_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sc7180_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sc7280_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sc8180x_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sc8280xp_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm6125_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm6350_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm8150_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm8250_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm8350_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct qcom_smmu_config sm8450_smmu_cfg = {
-	.reg_offset = qcom_smmu_impl0_reg_offset,
-};
-
-static const struct of_device_id __maybe_unused qcom_smmu_impl_debug_match[] = {
-	{ .compatible = "qcom,msm8998-smmu-v2" },
-	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcm2290_smmu_cfg },
-	{ .compatible = "qcom,sc7180-smmu-500", .data = &sc7180_smmu_cfg },
-	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_cfg},
-	{ .compatible = "qcom,sc8180x-smmu-500", .data = &sc8180x_smmu_cfg },
-	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &sc8280xp_smmu_cfg },
-	{ .compatible = "qcom,sdm630-smmu-v2" },
-	{ .compatible = "qcom,sdm845-smmu-500" },
-	{ .compatible = "qcom,sm6125-smmu-500", .data = &sm6125_smmu_cfg},
-	{ .compatible = "qcom,sm6350-smmu-500", .data = &sm6350_smmu_cfg},
-	{ .compatible = "qcom,sm8150-smmu-500", .data = &sm8150_smmu_cfg },
-	{ .compatible = "qcom,sm8250-smmu-500", .data = &sm8250_smmu_cfg },
-	{ .compatible = "qcom,sm8350-smmu-500", .data = &sm8350_smmu_cfg },
-	{ .compatible = "qcom,sm8450-smmu-500", .data = &sm8450_smmu_cfg },
-	{ }
-};
-
-const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu)
-{
-	const struct of_device_id *match;
-	const struct device_node *np = smmu->dev->of_node;
-
-	match = of_match_node(qcom_smmu_impl_debug_match, np);
-	if (!match)
-		return NULL;
-
-	return match->data;
-}
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 6dc7fa9187991..1843bcd81402e 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -430,11 +430,22 @@ static struct arm_smmu_device *qcom_smmu_create(struct arm_smmu_device *smmu,
 		return ERR_PTR(-ENOMEM);
 
 	qsmmu->smmu.impl = impl;
-	qsmmu->cfg = qcom_smmu_impl_data(smmu);
+	qsmmu->cfg = data->cfg;
 
 	return &qsmmu->smmu;
 }
 
+/* Implementation Defined Register Space 0 register offsets */
+static const u32 qcom_smmu_impl0_reg_offset[] = {
+	[QCOM_SMMU_TBU_PWR_STATUS]		= 0x2204,
+	[QCOM_SMMU_STATS_SYNC_INV_TBU_ACK]	= 0x25dc,
+	[QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR]	= 0x2670,
+};
+
+static const struct qcom_smmu_config qcom_smmu_impl0_cfg = {
+	.reg_offset = qcom_smmu_impl0_reg_offset,
+};
+
 /*
  * It is not yet possible to use MDP SMMU with the bypass quirk on the msm8996,
  * there are not enough context banks.
@@ -455,28 +466,35 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
 	 * No need for adreno impl here. On sdm845 the Adreno SMMU is handled
 	 * by the separate sdm845-smmu-v2 device.
 	 */
+	/* Also no debug configuration. */
+};
+
+static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
+	.impl = &qcom_smmu_impl,
+	.adreno_impl = &qcom_adreno_smmu_impl,
+	.cfg = &qcom_smmu_impl0_cfg,
 };
 
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data },
 	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_data  },
-	{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data  },
+	{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data },
 	{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
-	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
+	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ }
 };
 
@@ -497,7 +515,7 @@ struct arm_smmu_device *qcom_smmu_impl_init(struct arm_smmu_device *smmu)
 	if (np == NULL) {
 		/* Match platform for ACPI boot */
 		if (acpi_match_platform_list(qcom_acpi_platlist) >= 0)
-			return qcom_smmu_create(smmu, &qcom_smmu_data);
+			return qcom_smmu_create(smmu, &qcom_smmu_500_impl0_data);
 	}
 #endif
 
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
index 424d8d342ce06..593910567b884 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.h
@@ -14,20 +14,26 @@ struct qcom_smmu {
 	u32 stall_enabled;
 };
 
+enum qcom_smmu_impl_reg_offset {
+	QCOM_SMMU_TBU_PWR_STATUS,
+	QCOM_SMMU_STATS_SYNC_INV_TBU_ACK,
+	QCOM_SMMU_MMU2QSS_AND_SAFE_WAIT_CNTR,
+};
+
+struct qcom_smmu_config {
+	const u32 *reg_offset;
+};
+
 struct qcom_smmu_match_data {
+	const struct qcom_smmu_config *cfg;
 	const struct arm_smmu_impl *impl;
 	const struct arm_smmu_impl *adreno_impl;
 };
 
 #ifdef CONFIG_ARM_SMMU_QCOM_DEBUG
 void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu);
-const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu);
 #else
 static inline void qcom_smmu_tlb_sync_debug(struct arm_smmu_device *smmu) { }
-static inline const void *qcom_smmu_impl_data(struct arm_smmu_device *smmu)
-{
-	return NULL;
-}
 #endif
 
 #endif /* _ARM_SMMU_QCOM_H */
-- 
GitLab


From b4c6ee515c426f5fffc3e25772a03e44655d6e1c Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:34 +0300
Subject: [PATCH 1495/4122] iommu/arm-smmu-qcom: Stop using mmu500 reset for v2
 MMUs

The arm_mmu500_reset() writes into registers specific for MMU500. For
the generic ARM SMMU v2 these registers (sACR) are defined as
'implementation defined'. Downstream Qualcomm driver for SMMUv2 doesn't
touch them.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-10-dmitry.baryshkov@linaro.org
[will: Remove unused 'qcom_smmu_data' stucture]
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 38 +++++++++++++++-------
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 1843bcd81402e..07372db4184e1 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -376,7 +376,15 @@ static int qcom_sdm845_smmu500_reset(struct arm_smmu_device *smmu)
 	return ret;
 }
 
-static const struct arm_smmu_impl qcom_smmu_impl = {
+static const struct arm_smmu_impl qcom_smmu_v2_impl = {
+	.init_context = qcom_smmu_init_context,
+	.cfg_probe = qcom_smmu_cfg_probe,
+	.def_domain_type = qcom_smmu_def_domain_type,
+	.write_s2cr = qcom_smmu_write_s2cr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+static const struct arm_smmu_impl qcom_smmu_500_impl = {
 	.init_context = qcom_smmu_init_context,
 	.cfg_probe = qcom_smmu_cfg_probe,
 	.def_domain_type = qcom_smmu_def_domain_type,
@@ -394,7 +402,15 @@ static const struct arm_smmu_impl sdm845_smmu_500_impl = {
 	.tlb_sync = qcom_smmu_tlb_sync,
 };
 
-static const struct arm_smmu_impl qcom_adreno_smmu_impl = {
+static const struct arm_smmu_impl qcom_adreno_smmu_v2_impl = {
+	.init_context = qcom_adreno_smmu_init_context,
+	.def_domain_type = qcom_smmu_def_domain_type,
+	.alloc_context_bank = qcom_adreno_smmu_alloc_context_bank,
+	.write_sctlr = qcom_adreno_smmu_write_sctlr,
+	.tlb_sync = qcom_smmu_tlb_sync,
+};
+
+static const struct arm_smmu_impl qcom_adreno_smmu_500_impl = {
 	.init_context = qcom_adreno_smmu_init_context,
 	.def_domain_type = qcom_smmu_def_domain_type,
 	.reset = arm_mmu500_reset,
@@ -452,12 +468,12 @@ static const struct qcom_smmu_config qcom_smmu_impl0_cfg = {
  */
 static const struct qcom_smmu_match_data msm8996_smmu_data = {
 	.impl = NULL,
-	.adreno_impl = &qcom_adreno_smmu_impl,
+	.adreno_impl = &qcom_adreno_smmu_v2_impl,
 };
 
-static const struct qcom_smmu_match_data qcom_smmu_data = {
-	.impl = &qcom_smmu_impl,
-	.adreno_impl = &qcom_adreno_smmu_impl,
+static const struct qcom_smmu_match_data qcom_smmu_v2_data = {
+	.impl = &qcom_smmu_v2_impl,
+	.adreno_impl = &qcom_adreno_smmu_v2_impl,
 };
 
 static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
@@ -470,22 +486,22 @@ static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
 };
 
 static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
-	.impl = &qcom_smmu_impl,
-	.adreno_impl = &qcom_adreno_smmu_impl,
+	.impl = &qcom_smmu_500_impl,
+	.adreno_impl = &qcom_adreno_smmu_500_impl,
 	.cfg = &qcom_smmu_impl0_cfg,
 };
 
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data },
-	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data },
 	{ .compatible = "qcom,qcm2290-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data  },
 	{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
-	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_data },
-	{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_data },
+	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data },
+	{ .compatible = "qcom,sdm845-smmu-v2", .data = &qcom_smmu_v2_data },
 	{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
 	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
 	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data },
-- 
GitLab


From 80b71080720e34eaf06642c372d4c11d046baf27 Mon Sep 17 00:00:00 2001
From: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Date: Mon, 14 Nov 2022 20:06:35 +0300
Subject: [PATCH 1496/4122] iommu/arm-smmu-qcom: Add generic qcom,smmu-500
 match entry

Add generic qcom,smmu-500 compatibility string. Newer platforms should
use this generic entry rather than declaring per-SoC entries.

Reviewed-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Tested-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114170635.1406534-11-dmitry.baryshkov@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 07372db4184e1..c94daf88c5051 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -491,6 +491,10 @@ static const struct qcom_smmu_match_data qcom_smmu_500_impl0_data = {
 	.cfg = &qcom_smmu_impl0_cfg,
 };
 
+/*
+ * Do not add any more qcom,SOC-smmu-500 entries to this list, unless they need
+ * special handling and can not be covered by the qcom,smmu-500 entry.
+ */
 static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,msm8996-smmu-v2", .data = &msm8996_smmu_data },
 	{ .compatible = "qcom,msm8998-smmu-v2", .data = &qcom_smmu_v2_data },
@@ -511,6 +515,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8450-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ }
 };
 
-- 
GitLab


From 4e016f969529f2aec0545e90119e7eb3cb124c46 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Sun, 6 Nov 2022 19:46:18 +0200
Subject: [PATCH 1497/4122] vfio: Add an option to get migration data size

Add an option to get migration data size by introducing a new migration
feature named VFIO_DEVICE_FEATURE_MIG_DATA_SIZE.

Upon VFIO_DEVICE_FEATURE_GET the estimated data length that will be
required to complete STOP_COPY is returned.

This option may better enable user space to consider before moving to
STOP_COPY whether it can meet the downtime SLA based on the returned
data.

The patch also includes the implementation for mlx5 and hisi for this
new option to make it feature complete for the existing drivers in this
area.

Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Longfang Liu <liulongfang@huawei.com>
Link: https://lore.kernel.org/r/20221106174630.25909-2-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    |  9 ++++++
 drivers/vfio/pci/mlx5/main.c                  | 18 +++++++++++
 drivers/vfio/pci/vfio_pci_core.c              |  3 +-
 drivers/vfio/vfio_main.c                      | 32 +++++++++++++++++++
 include/linux/vfio.h                          |  5 +++
 include/uapi/linux/vfio.h                     | 13 ++++++++
 6 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 39eeca18a0f7c..0c0c0c7f05215 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -957,6 +957,14 @@ hisi_acc_vfio_pci_set_device_state(struct vfio_device *vdev,
 	return res;
 }
 
+static int
+hisi_acc_vfio_pci_get_data_size(struct vfio_device *vdev,
+				unsigned long *stop_copy_length)
+{
+	*stop_copy_length = sizeof(struct acc_vf_data);
+	return 0;
+}
+
 static int
 hisi_acc_vfio_pci_get_device_state(struct vfio_device *vdev,
 				   enum vfio_device_mig_state *curr_state)
@@ -1213,6 +1221,7 @@ static void hisi_acc_vfio_pci_close_device(struct vfio_device *core_vdev)
 static const struct vfio_migration_ops hisi_acc_vfio_pci_migrn_state_ops = {
 	.migration_set_state = hisi_acc_vfio_pci_set_device_state,
 	.migration_get_state = hisi_acc_vfio_pci_get_device_state,
+	.migration_get_data_size = hisi_acc_vfio_pci_get_data_size,
 };
 
 static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 457138b92f134..6e9cf2aacc527 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -512,6 +512,23 @@ mlx5vf_pci_set_device_state(struct vfio_device *vdev,
 	return res;
 }
 
+static int mlx5vf_pci_get_data_size(struct vfio_device *vdev,
+				    unsigned long *stop_copy_length)
+{
+	struct mlx5vf_pci_core_device *mvdev = container_of(
+		vdev, struct mlx5vf_pci_core_device, core_device.vdev);
+	size_t state_size;
+	int ret;
+
+	mutex_lock(&mvdev->state_mutex);
+	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
+						    &state_size);
+	if (!ret)
+		*stop_copy_length = state_size;
+	mlx5vf_state_mutex_unlock(mvdev);
+	return ret;
+}
+
 static int mlx5vf_pci_get_device_state(struct vfio_device *vdev,
 				       enum vfio_device_mig_state *curr_state)
 {
@@ -577,6 +594,7 @@ static void mlx5vf_pci_close_device(struct vfio_device *core_vdev)
 static const struct vfio_migration_ops mlx5vf_pci_mig_ops = {
 	.migration_set_state = mlx5vf_pci_set_device_state,
 	.migration_get_state = mlx5vf_pci_get_device_state,
+	.migration_get_data_size = mlx5vf_pci_get_data_size,
 };
 
 static const struct vfio_log_ops mlx5vf_pci_log_ops = {
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 9be2d5be5d959..189d4930c276d 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -2127,7 +2127,8 @@ int vfio_pci_core_register_device(struct vfio_pci_core_device *vdev)
 
 	if (vdev->vdev.mig_ops) {
 		if (!(vdev->vdev.mig_ops->migration_get_state &&
-		      vdev->vdev.mig_ops->migration_set_state) ||
+		      vdev->vdev.mig_ops->migration_set_state &&
+		      vdev->vdev.mig_ops->migration_get_data_size) ||
 		    !(vdev->vdev.migration_flags & VFIO_MIGRATION_STOP_COPY))
 			return -EINVAL;
 	}
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 9835757e2bee4..662e267a3e13d 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1242,6 +1242,34 @@ out_copy:
 	return 0;
 }
 
+static int
+vfio_ioctl_device_feature_migration_data_size(struct vfio_device *device,
+					      u32 flags, void __user *arg,
+					      size_t argsz)
+{
+	struct vfio_device_feature_mig_data_size data_size = {};
+	unsigned long stop_copy_length;
+	int ret;
+
+	if (!device->mig_ops)
+		return -ENOTTY;
+
+	ret = vfio_check_feature(flags, argsz, VFIO_DEVICE_FEATURE_GET,
+				 sizeof(data_size));
+	if (ret != 1)
+		return ret;
+
+	ret = device->mig_ops->migration_get_data_size(device, &stop_copy_length);
+	if (ret)
+		return ret;
+
+	data_size.stop_copy_length = stop_copy_length;
+	if (copy_to_user(arg, &data_size, sizeof(data_size)))
+		return -EFAULT;
+
+	return 0;
+}
+
 static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
 					       u32 flags, void __user *arg,
 					       size_t argsz)
@@ -1469,6 +1497,10 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
 		return vfio_ioctl_device_feature_logging_report(
 			device, feature.flags, arg->data,
 			feature.argsz - minsz);
+	case VFIO_DEVICE_FEATURE_MIG_DATA_SIZE:
+		return vfio_ioctl_device_feature_migration_data_size(
+			device, feature.flags, arg->data,
+			feature.argsz - minsz);
 	default:
 		if (unlikely(!device->ops->device_feature))
 			return -EINVAL;
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7480154825ec..43b67e46a2cb5 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -107,6 +107,9 @@ struct vfio_device_ops {
  * @migration_get_state: Optional callback to get the migration state for
  *         devices that support migration. It's mandatory for
  *         VFIO_DEVICE_FEATURE_MIGRATION migration support.
+ * @migration_get_data_size: Optional callback to get the estimated data
+ *          length that will be required to complete stop copy. It's mandatory for
+ *          VFIO_DEVICE_FEATURE_MIGRATION migration support.
  */
 struct vfio_migration_ops {
 	struct file *(*migration_set_state)(
@@ -114,6 +117,8 @@ struct vfio_migration_ops {
 		enum vfio_device_mig_state new_state);
 	int (*migration_get_state)(struct vfio_device *device,
 				   enum vfio_device_mig_state *curr_state);
+	int (*migration_get_data_size)(struct vfio_device *device,
+				       unsigned long *stop_copy_length);
 };
 
 /**
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index d7d8e0922376c..3e45dbaf190ef 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -1128,6 +1128,19 @@ struct vfio_device_feature_dma_logging_report {
 
 #define VFIO_DEVICE_FEATURE_DMA_LOGGING_REPORT 8
 
+/*
+ * Upon VFIO_DEVICE_FEATURE_GET read back the estimated data length that will
+ * be required to complete stop copy.
+ *
+ * Note: Can be called on each device state.
+ */
+
+struct vfio_device_feature_mig_data_size {
+	__aligned_u64 stop_copy_length;
+};
+
+#define VFIO_DEVICE_FEATURE_MIG_DATA_SIZE 9
+
 /* -------- API for Type1 VFIO IOMMU -------- */
 
 /**
-- 
GitLab


From 2f5d8cef45c30edcf3972d345f606df563d3a48e Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Sun, 6 Nov 2022 19:46:19 +0200
Subject: [PATCH 1498/4122] vfio/mlx5: Fix a typo in
 mlx5vf_cmd_load_vhca_state()

Fix a typo in mlx5vf_cmd_load_vhca_state() to use the 'load' memory
layout.

As in/out sizes are equal for save and load commands there wasn't any
functional issue.

Fixes: f1d98f346ee3 ("vfio/mlx5: Expose migration commands over mlx5 device")
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20221106174630.25909-3-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index c604b70437a5d..0848bc905d3ea 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -378,8 +378,8 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf)
 {
 	struct mlx5_core_dev *mdev;
-	u32 out[MLX5_ST_SZ_DW(save_vhca_state_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+	u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {};
 	u32 pdn, mkey;
 	int err;
 
-- 
GitLab


From a80e0e156ca6d7c339e314d15db2f038755da6c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 11 Nov 2022 19:22:43 -0800
Subject: [PATCH 1499/4122] perf stat: Fix summary output in CSV with
 --metric-only

It should not print "summary" for each event when --metric-only is set.

Before:

  $ sudo perf stat -a --per-socket --summary -x, --metric-only true
   time,socket,cpusGhz,insn per cycle,branch-misses of all branches,
       0.000709079,S0,8,0.893,2.40,0.45,
  S0,8,         summary,         summary,         summary,         summary,         summary,0.893,         summary,2.40,         summary,         summary,0.45,

After:

  $ sudo perf stat -a --per-socket --summary -x, --metric-only true
   time,socket,cpusGHz,insn per cycle,branch-misses of all branches,
       0.000882297,S0,8,0.598,1.64,0.64,
           summary,S0,8,0.598,1.64,0.64,

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221112032244.1077370-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cc206781c57cc..6ea7314032706 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -549,7 +549,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 	}
 
 	if (!config->no_csv_summary && config->csv_output &&
-	    config->summary && !config->interval) {
+	    config->summary && !config->interval && !config->metric_only) {
 		fprintf(config->output, "%16s%s", "summary", config->csv_sep);
 	}
 
@@ -732,8 +732,13 @@ static void print_aggr(struct perf_stat_config *config,
 	 * Without each counter has its own line.
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		if (prefix && metric_only)
-			fprintf(output, "%s", prefix);
+		if (metric_only) {
+			if (prefix)
+				fprintf(output, "%s", prefix);
+			else if (config->summary && !config->no_csv_summary &&
+				 config->csv_output && !config->interval)
+				fprintf(output, "%16s%s", "summary", config->csv_sep);
+		}
 
 		first = true;
 		evlist__for_each_entry(evlist, counter) {
-- 
GitLab


From 7565f9617efac0c0c8e2dbd08dbe0695d56684f5 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 11 Nov 2022 19:22:44 -0800
Subject: [PATCH 1500/4122] perf stat: Add missing separator in the CSV header

It should have a comma after 'cpus' for socket and die aggregation mode.
The output of the following command shows the issue.

  $ sudo perf stat -a --per-socket -x, --metric-only -I1 true

Before:

                  +--- here
                  V
   time,socket,cpusGhz,insn per cycle,branch-misses of all branches,
       0.000908461,S0,8,0.950,1.65,1.21,

After:

   time,socket,cpus,GHz,insn per cycle,branch-misses of all branches,
       0.000683094,S0,8,0.593,2.00,0.60,

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221112032244.1077370-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 6ea7314032706..2a3c1e0098b9a 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -828,8 +828,8 @@ static int aggr_header_lens[] = {
 
 static const char *aggr_header_csv[] = {
 	[AGGR_CORE] 	= 	"core,cpus,",
-	[AGGR_DIE] 	= 	"die,cpus",
-	[AGGR_SOCKET] 	= 	"socket,cpus",
+	[AGGR_DIE] 	= 	"die,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus,",
 	[AGGR_NONE] 	= 	"cpu,",
 	[AGGR_THREAD] 	= 	"comm-pid,",
 	[AGGR_NODE] 	= 	"node,",
-- 
GitLab


From 913a144164d8f09fab7e4175d693168b29d5843b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 2 Nov 2022 09:02:33 -0700
Subject: [PATCH 1501/4122] HSI: ssi_protocol: Fix return type of
 ssip_pn_xmit()

With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
indirect call targets are validated against the expected function
pointer prototype to make sure the call target is valid to help mitigate
ROP attacks. If they are not identical, there is a failure at run time,
which manifests as either a kernel panic or thread getting killed. A
proposed warning in clang aims to catch these at compile time, which
reveals:

  drivers/hsi/clients/ssi_protocol.c:1053:20: error: incompatible function pointer types initializing 'netdev_tx_t (*)(struct sk_buff *, struct net_device *)' (aka 'enum netdev_tx (*)(struct sk_buff *, struct net_device *)') with an expression of type 'int (struct sk_buff *, struct net_device *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          .ndo_start_xmit = ssip_pn_xmit,
                            ^~~~~~~~~~~~
  1 error generated.

->ndo_start_xmit() in 'struct net_device_ops' expects a return type of
'netdev_tx_t', not 'int'. Adjust the return type of ssip_pn_xmit() to
match the prototype's to resolve the warning and CFI failure.
Additionally, use the enum 'NETDEV_TX_OK' instead of a raw '0' for the
return value of ssip_pn_xmit().

Link: https://github.com/ClangBuiltLinux/linux/issues/1750
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/clients/ssi_protocol.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/hsi/clients/ssi_protocol.c b/drivers/hsi/clients/ssi_protocol.c
index 274ad8443f8c2..38e572faff435 100644
--- a/drivers/hsi/clients/ssi_protocol.c
+++ b/drivers/hsi/clients/ssi_protocol.c
@@ -968,7 +968,7 @@ static void ssip_xmit_work(struct work_struct *work)
 	ssip_xmit(cl);
 }
 
-static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hsi_client *cl = to_hsi_client(dev->dev.parent);
 	struct ssi_protocol *ssi = hsi_client_drvdata(cl);
@@ -1027,7 +1027,7 @@ static int ssip_pn_xmit(struct sk_buff *skb, struct net_device *dev)
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
 
-	return 0;
+	return NETDEV_TX_OK;
 drop2:
 	hsi_free_msg(msg);
 drop:
@@ -1035,7 +1035,7 @@ drop:
 inc_dropped:
 	dev->stats.tx_dropped++;
 
-	return 0;
+	return NETDEV_TX_OK;
 }
 
 /* CMT reset event handler */
-- 
GitLab


From f5181c35ed7ba0ceb6e42872aad1334d994b0175 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 1 Nov 2022 11:41:18 +0800
Subject: [PATCH 1502/4122] HSI: omap_ssi_core: fix unbalanced
 pm_runtime_disable()

In error label 'out1' path in ssi_probe(), the pm_runtime_enable()
has not been called yet, so pm_runtime_disable() is not needed.

Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/controllers/omap_ssi_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
index eb98201583185..b23a576ed88a5 100644
--- a/drivers/hsi/controllers/omap_ssi_core.c
+++ b/drivers/hsi/controllers/omap_ssi_core.c
@@ -536,9 +536,9 @@ out3:
 	device_for_each_child(&pd->dev, NULL, ssi_remove_ports);
 out2:
 	ssi_remove_controller(ssi);
+	pm_runtime_disable(&pd->dev);
 out1:
 	platform_set_drvdata(pd, NULL);
-	pm_runtime_disable(&pd->dev);
 
 	return err;
 }
-- 
GitLab


From dac153f2802db1ad46207283cb9b2aae3d707a45 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 7 Nov 2022 10:51:34 +0200
Subject: [PATCH 1503/4122] RDMA/restrack: Release MR restrack when delete

The MR restrack also needs to be released when delete it, otherwise it
cause memory leak as the task struct won't be released.

Fixes: 13ef5539def7 ("RDMA/restrack: Count references to the verbs objects")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/703db18e8d4ef628691fb93980a709be673e62e3.1667810736.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/restrack.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/infiniband/core/restrack.c b/drivers/infiniband/core/restrack.c
index 1f935d9f61785..01a499a8b88db 100644
--- a/drivers/infiniband/core/restrack.c
+++ b/drivers/infiniband/core/restrack.c
@@ -343,8 +343,6 @@ void rdma_restrack_del(struct rdma_restrack_entry *res)
 	rt = &dev->res[res->type];
 
 	old = xa_erase(&rt->xa, res->id);
-	if (res->type == RDMA_RESTRACK_MR)
-		return;
 	WARN_ON(old != res);
 
 out:
-- 
GitLab


From 5e15ff29b156bbbdeadae230c8ecd5ecd8ca2477 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 7 Nov 2022 10:51:35 +0200
Subject: [PATCH 1504/4122] RDMA/core: Make sure "ib_port" is valid when access
 sysfs node

The "ib_port" structure must be set before adding the sysfs kobject,
and reset after removing it, otherwise it may crash when accessing
the sysfs node:
  Unable to handle kernel NULL pointer dereference at virtual address 0000000000000050
  Mem abort info:
    ESR = 0x96000006
    Exception class = DABT (current EL), IL = 32 bits
    SET = 0, FnV = 0
    EA = 0, S1PTW = 0
  Data abort info:
    ISV = 0, ISS = 0x00000006
    CM = 0, WnR = 0
  user pgtable: 4k pages, 48-bit VAs, pgdp = 00000000e85f5ba5
  [0000000000000050] pgd=0000000848fd9003, pud=000000085b387003, pmd=0000000000000000
  Internal error: Oops: 96000006 [#2] PREEMPT SMP
  Modules linked in: ib_umad(O) mlx5_ib(O) nfnetlink_cttimeout(E) nfnetlink(E) act_gact(E) cls_flower(E) sch_ingress(E) openvswitch(E) nsh(E) nf_nat_ipv6(E) nf_nat_ipv4(E) nf_conncount(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) mst_pciconf(O) ipmi_devintf(E) ipmi_msghandler(E) ipmb_dev_int(OE) mlx5_core(O) mlxfw(O) mlxdevm(O) auxiliary(O) ib_uverbs(O) ib_core(O) mlx_compat(O) psample(E) sbsa_gwdt(E) uio_pdrv_genirq(E) uio(E) mlxbf_pmc(OE) mlxbf_gige(OE) mlxbf_tmfifo(OE) gpio_mlxbf2(OE) pwr_mlxbf(OE) mlx_trio(OE) i2c_mlxbf(OE) mlx_bootctl(OE) bluefield_edac(OE) knem(O) ip_tables(E) ipv6(E) crc_ccitt(E) [last unloaded: mst_pci]
  Process grep (pid: 3372, stack limit = 0x0000000022055c92)
  CPU: 5 PID: 3372 Comm: grep Tainted: G      D    OE     4.19.161-mlnx.47.gadcd9e3 #1
  Hardware name: https://www.mellanox.com BlueField SoC/BlueField SoC, BIOS BlueField:3.9.2-15-ga2403ab Sep  8 2022
  pstate: 40000005 (nZcv daif -PAN -UAO)
  pc : hw_stat_port_show+0x4c/0x80 [ib_core]
  lr : port_attr_show+0x40/0x58 [ib_core]
  sp : ffff000029f43b50
  x29: ffff000029f43b50 x28: 0000000019375000
  x27: ffff8007b821a540 x26: ffff000029f43e30
  x25: 0000000000008000 x24: ffff000000eaa958
  x23: 0000000000001000 x22: ffff8007a4ce3000
  x21: ffff8007baff8000 x20: ffff8007b9066ac0
  x19: ffff8007bae97578 x18: 0000000000000000
  x17: 0000000000000000 x16: 0000000000000000
  x15: 0000000000000000 x14: 0000000000000000
  x13: 0000000000000000 x12: 0000000000000000
  x11: 0000000000000000 x10: 0000000000000000
  x9 : 0000000000000000 x8 : ffff8007a4ce4000
  x7 : 0000000000000000 x6 : 000000000000003f
  x5 : ffff000000e6a280 x4 : ffff8007a4ce3000
  x3 : 0000000000000000 x2 : aaaaaaaaaaaaaaab
  x1 : ffff8007b9066a10 x0 : ffff8007baff8000
  Call trace:
   hw_stat_port_show+0x4c/0x80 [ib_core]
   port_attr_show+0x40/0x58 [ib_core]
   sysfs_kf_seq_show+0x8c/0x150
   kernfs_seq_show+0x44/0x50
   seq_read+0x1b4/0x45c
   kernfs_fop_read+0x148/0x1d8
   __vfs_read+0x58/0x180
   vfs_read+0x94/0x154
   ksys_read+0x68/0xd8
   __arm64_sys_read+0x28/0x34
   el0_svc_common+0x88/0x18c
   el0_svc_handler+0x78/0x94
   el0_svc+0x8/0xe8
  Code: f2955562 aa1603e4 aa1503e0 f9405683 (f9402861)

Fixes: d8a5883814b9 ("RDMA/core: Replace the ib_port_data hw_stats pointers with a ib_port pointer")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/88867e705c42c1cd2011e45201c25eecdb9fef94.1667810736.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/sysfs.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 84c53bd2a52db..ee59d73915689 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -1213,6 +1213,9 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
 	p->port_num = port_num;
 	kobject_init(&p->kobj, &port_type);
 
+	if (device->port_data && is_full_dev)
+		device->port_data[port_num].sysfs = p;
+
 	cur_group = p->groups_list;
 	ret = alloc_port_table_group("gids", &p->groups[0], p->attrs_list,
 				     attr->gid_tbl_len, show_port_gid);
@@ -1258,9 +1261,6 @@ static struct ib_port *setup_port(struct ib_core_device *coredev, int port_num,
 	}
 
 	list_add_tail(&p->kobj.entry, &coredev->port_list);
-	if (device->port_data && is_full_dev)
-		device->port_data[port_num].sysfs = p;
-
 	return p;
 
 err_groups:
@@ -1268,6 +1268,8 @@ err_groups:
 err_del:
 	kobject_del(&p->kobj);
 err_put:
+	if (device->port_data && is_full_dev)
+		device->port_data[port_num].sysfs = NULL;
 	kobject_put(&p->kobj);
 	return ERR_PTR(ret);
 }
@@ -1276,14 +1278,17 @@ static void destroy_port(struct ib_core_device *coredev, struct ib_port *port)
 {
 	bool is_full_dev = &port->ibdev->coredev == coredev;
 
-	if (port->ibdev->port_data &&
-	    port->ibdev->port_data[port->port_num].sysfs == port)
-		port->ibdev->port_data[port->port_num].sysfs = NULL;
 	list_del(&port->kobj.entry);
 	if (is_full_dev)
 		sysfs_remove_groups(&port->kobj, port->ibdev->ops.port_groups);
+
 	sysfs_remove_groups(&port->kobj, port->groups_list);
 	kobject_del(&port->kobj);
+
+	if (port->ibdev->port_data &&
+	    port->ibdev->port_data[port->port_num].sysfs == port)
+		port->ibdev->port_data[port->port_num].sysfs = NULL;
+
 	kobject_put(&port->kobj);
 }
 
-- 
GitLab


From ecacb3751f254572af0009b9501e2cdc83a30b6a Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 7 Nov 2022 10:51:36 +0200
Subject: [PATCH 1505/4122] RDMA/nldev: Return "-EAGAIN" if the cm_id isn't
 from expected port

When filling a cm_id entry, return "-EAGAIN" instead of 0 if the cm_id
doesn'the have the same port as requested, otherwise an incomplete entry
may be returned, which causes "rdam res show cm_id" to return an error.

For example on a machine with two rdma devices with "rping -C 1 -v -s"
running background, the "rdma" command fails:
  $ rdma -V
  rdma utility, iproute2-5.19.0
  $ rdma res show cm_id
  link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 28056 comm rping src-addr 0.0.0.0:7174
  error: Protocol not available

While with this fix it succeeds:
  $ rdma res show cm_id
  link mlx5_0/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174
  link mlx5_1/- cm-idn 0 state LISTEN ps TCP pid 26395 comm rping src-addr 0.0.0.0:7174

Fixes: 00313983cda6 ("RDMA/nldev: provide detailed CM_ID information")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Link: https://lore.kernel.org/r/a08e898cdac5e28428eb749a99d9d981571b8ea7.1667810736.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/nldev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index b92358f606d00..2be76a3fdd87a 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -552,7 +552,7 @@ static int fill_res_cm_id_entry(struct sk_buff *msg, bool has_cap_net_admin,
 	struct rdma_cm_id *cm_id = &id_priv->id;
 
 	if (port && port != cm_id->port_num)
-		return 0;
+		return -EAGAIN;
 
 	if (cm_id->port_num &&
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, cm_id->port_num))
-- 
GitLab


From 93e3f45a26310e3f3f8558be40df411e23ab742c Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:39 +0530
Subject: [PATCH 1506/4122] powerpc: Fix __WARN_FLAGS() for use with Objtool

Commit 1e688dd2a3d675 ("powerpc/bug: Provide better flexibility to
WARN_ON/__WARN_FLAGS() with asm goto") updated __WARN_FLAGS() to use asm
goto, and added a call to 'unreachable()' after the asm goto for optimal
code generation. With CONFIG_OBJTOOL enabled, 'annotate_unreachable()'
statement in 'unreachable()' tries to note down the location of the
subsequent instruction in a separate elf section to aid code flow
analysis. However, on powerpc, this results in gcc emitting a call to a
symbol of size 0. This results in objtool complaining of "unannotated
intra-function call" since the target symbol is not a valid function
call destination.

Objtool wants this annotation for code flow analysis, which we are not
yet enabling on powerpc. As such, expand the call to 'unreachable()' in
__WARN_FLAGS() without annotate_unreachable():
        barrier_before_unreachable();
        __builtin_unreachable();

This still results in optimal code generation for __WARN_FLAGS(), while
getting rid of the objtool warning.

We still need barrier_before_unreachable() to work around gcc bugs 82365
and 106751:
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365
- https://gcc.gnu.org/bugzilla/show_bug.cgi?id=106751

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-2-sv@linux.ibm.com
---
 arch/powerpc/include/asm/bug.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h
index 61a4736355c24..ef42adb44aa3f 100644
--- a/arch/powerpc/include/asm/bug.h
+++ b/arch/powerpc/include/asm/bug.h
@@ -99,7 +99,8 @@
 	__label__ __label_warn_on;				\
 								\
 	WARN_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags), __label_warn_on); \
-	unreachable();						\
+	barrier_before_unreachable();				\
+	__builtin_unreachable();				\
 								\
 __label_warn_on:						\
 	break;							\
-- 
GitLab


From 01f2cf0b990e58ae89142f57c7e02d33621311d2 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:40 +0530
Subject: [PATCH 1507/4122] powerpc: Override __ALIGN and __ALIGN_STR macros

In a subsequent patch, we would want to annotate powerpc assembly functions
with SYM_FUNC_START_LOCAL macro. This macro depends on __ALIGN macro.

The default expansion of __ALIGN macro is:
        #define __ALIGN      .align 4,0x90

So, override __ALIGN and __ALIGN_STR macros to use the same alignment as
that of the existing _GLOBAL macro. Also, do not pad with 0x90, because
repeated 0x90s are not a nop or trap on powerpc.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-3-sv@linux.ibm.com
---
 arch/powerpc/include/asm/linkage.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/linkage.h b/arch/powerpc/include/asm/linkage.h
index b71b9582e7546..b88d1d2cf3044 100644
--- a/arch/powerpc/include/asm/linkage.h
+++ b/arch/powerpc/include/asm/linkage.h
@@ -4,6 +4,9 @@
 
 #include <asm/types.h>
 
+#define __ALIGN		.align 2
+#define __ALIGN_STR	".align 2"
+
 #ifdef CONFIG_PPC64_ELF_ABI_V1
 #define cond_syscall(x) \
 	asm ("\t.weak " #x "\n\t.set " #x ", sys_ni_syscall\n"		\
-- 
GitLab


From 29a011fc79e625b2b02f25262657f7c4c59ae9f7 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:41 +0530
Subject: [PATCH 1508/4122] powerpc: Fix objtool unannotated intra-function
 call warnings

Objtool throws unannotated intra-function call warnings in the following
assembly files:

arch/powerpc/kernel/vector.o: warning: objtool: .text+0x53c: unannotated intra-function call

arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x60: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x124: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5d4: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x5dc: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xcb8: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xd0c: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0x1030: unannotated intra-function call

arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x358: unannotated intra-function call
arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x728: unannotated intra-function call
arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4d94: unannotated intra-function call
arch/powerpc/kernel/head_64.o: warning: objtool: .text+0x4ec4: unannotated intra-function call

arch/powerpc/kvm/book3s_hv_interrupts.o: warning: objtool: .text+0x6c: unannotated intra-function call
arch/powerpc/kernel/misc_64.o: warning: objtool: .text+0x64: unannotated intra-function call

Objtool does not add STT_NOTYPE symbols with size 0 to the rbtree, which
is why find_call_destination() function is not able to find the
destination symbol for 'bl' instruction. For such symbols, objtool is
throwing unannotated intra-function call warnings in assembly files. Fix
these warnings by annotating those symbols with SYM_FUNC_START_LOCAL and
SYM_FUNC_END macros, inorder to set symbol type to STT_FUNC and symbol
size accordingly.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-4-sv@linux.ibm.com
---
 arch/powerpc/kernel/exceptions-64s.S    |  4 +++-
 arch/powerpc/kernel/head_64.S           |  7 +++++--
 arch/powerpc/kernel/misc_64.S           |  4 +++-
 arch/powerpc/kernel/vector.S            |  4 +++-
 arch/powerpc/kvm/book3s_hv_interrupts.S |  4 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 22 +++++++++++++++-------
 6 files changed, 32 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 5381a43e50fef..77201ad9f329d 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -13,6 +13,7 @@
  *
  */
 
+#include <linux/linkage.h>
 #include <asm/hw_irq.h>
 #include <asm/exception-64s.h>
 #include <asm/ptrace.h>
@@ -3112,7 +3113,7 @@ _GLOBAL(enable_machine_check)
 	blr
 
 /* MSR[RI] should be clear because this uses SRR[01] */
-disable_machine_check:
+SYM_FUNC_START_LOCAL(disable_machine_check)
 	mflr	r0
 	bcl	20,31,$+4
 0:	mflr	r3
@@ -3125,3 +3126,4 @@ disable_machine_check:
 	RFI_TO_KERNEL
 1:	mtlr	r0
 	blr
+SYM_FUNC_END(disable_machine_check)
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index dedcc6fe2263a..874efd25cc454 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -18,6 +18,7 @@
  *  variants.
  */
 
+#include <linux/linkage.h>
 #include <linux/threads.h>
 #include <linux/init.h>
 #include <asm/reg.h>
@@ -462,7 +463,7 @@ generic_secondary_common_init:
  * Assumes we're mapped EA == RA if the MMU is on.
  */
 #ifdef CONFIG_PPC_BOOK3S
-__mmu_off:
+SYM_FUNC_START_LOCAL(__mmu_off)
 	mfmsr	r3
 	andi.	r0,r3,MSR_IR|MSR_DR
 	beqlr
@@ -473,6 +474,7 @@ __mmu_off:
 	sync
 	rfid
 	b	.	/* prevent speculative execution */
+SYM_FUNC_END(__mmu_off)
 #endif
 
 
@@ -869,7 +871,7 @@ _GLOBAL(start_secondary_resume)
 /*
  * This subroutine clobbers r11 and r12
  */
-enable_64b_mode:
+SYM_FUNC_START_LOCAL(enable_64b_mode)
 	mfmsr	r11			/* grab the current MSR */
 #ifdef CONFIG_PPC_BOOK3E_64
 	oris	r11,r11,0x8000		/* CM bit set, we'll set ICM later */
@@ -881,6 +883,7 @@ enable_64b_mode:
 	isync
 #endif
 	blr
+SYM_FUNC_END(enable_64b_mode)
 
 /*
  * This puts the TOC pointer into r2, offset by 0x8000 (as expected
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 36184cada00b1..c61a7ba446a8a 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -9,6 +9,7 @@
  * PPC64 updates by Dave Engebretsen (engebret@us.ibm.com)
  */
 
+#include <linux/linkage.h>
 #include <linux/sys.h>
 #include <asm/unistd.h>
 #include <asm/errno.h>
@@ -353,7 +354,7 @@ _GLOBAL(kexec_smp_wait)
  *
  * don't overwrite r3 here, it is live for kexec_wait above.
  */
-real_mode:	/* assume normal blr return */
+SYM_FUNC_START_LOCAL(real_mode)	/* assume normal blr return */
 #ifdef CONFIG_PPC_BOOK3E_64
 	/* Create an identity mapping. */
 	b	kexec_create_tlb
@@ -370,6 +371,7 @@ real_mode:	/* assume normal blr return */
 	mtspr	SPRN_SRR0,r11
 	rfid
 #endif
+SYM_FUNC_END(real_mode)
 
 /*
  * kexec_sequence(newstack, start, image, control, clear_all(),
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index 5cf64740edb82..ffe5d90abe179 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/reg.h>
@@ -185,7 +186,7 @@ fphalf:
  * Internal routine to enable floating point and set FPSCR to 0.
  * Don't call it from C; it doesn't use the normal calling convention.
  */
-fpenable:
+SYM_FUNC_START_LOCAL(fpenable)
 #ifdef CONFIG_PPC32
 	stwu	r1,-64(r1)
 #else
@@ -202,6 +203,7 @@ fpenable:
 	mffs	fr31
 	MTFSF_L(fr1)
 	blr
+SYM_FUNC_END(fpenable)
 
 fpdisable:
 	mtlr	r12
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 59d89e4b154a6..c0deeea7eef30 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -9,6 +9,7 @@
  * Authors: Alexander Graf <agraf@suse.de>
  */
 
+#include <linux/linkage.h>
 #include <asm/ppc_asm.h>
 #include <asm/kvm_asm.h>
 #include <asm/reg.h>
@@ -107,7 +108,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
 /*
  * void kvmhv_save_host_pmu(void)
  */
-kvmhv_save_host_pmu:
+SYM_FUNC_START_LOCAL(kvmhv_save_host_pmu)
 BEGIN_FTR_SECTION
 	/* Work around P8 PMAE bug */
 	li	r3, -1
@@ -154,3 +155,4 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	stw	r8, HSTATE_PMC5(r13)
 	stw	r9, HSTATE_PMC6(r13)
 31:	blr
+SYM_FUNC_END(kvmhv_save_host_pmu)
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 37f50861dd98f..a69d36cbf43be 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -10,6 +10,7 @@
  * Authors: Alexander Graf <agraf@suse.de>
  */
 
+#include <linux/linkage.h>
 #include <asm/ppc_asm.h>
 #include <asm/code-patching-asm.h>
 #include <asm/kvm_asm.h>
@@ -2358,7 +2359,7 @@ hmi_realmode:
  * This routine calls kvmppc_read_intr, a C function, if an external
  * interrupt is pending.
  */
-kvmppc_check_wake_reason:
+SYM_FUNC_START_LOCAL(kvmppc_check_wake_reason)
 	mfspr	r6, SPRN_SRR1
 BEGIN_FTR_SECTION
 	rlwinm	r6, r6, 45-31, 0xf	/* extract wake reason field (P8) */
@@ -2427,6 +2428,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	addi	r1, r1, PPC_MIN_STKFRM
 	mtlr	r0
 	blr
+SYM_FUNC_END(kvmppc_check_wake_reason)
 
 /*
  * Save away FP, VMX and VSX registers.
@@ -2434,7 +2436,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  * N.B. r30 and r31 are volatile across this function,
  * thus it is not callable from C.
  */
-kvmppc_save_fp:
+SYM_FUNC_START_LOCAL(kvmppc_save_fp)
 	mflr	r30
 	mr	r31,r3
 	mfmsr	r5
@@ -2462,6 +2464,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	stw	r6,VCPU_VRSAVE(r31)
 	mtlr	r30
 	blr
+SYM_FUNC_END(kvmppc_save_fp)
 
 /*
  * Load up FP, VMX and VSX registers
@@ -2469,7 +2472,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
  * N.B. r30 and r31 are volatile across this function,
  * thus it is not callable from C.
  */
-kvmppc_load_fp:
+SYM_FUNC_START_LOCAL(kvmppc_load_fp)
 	mflr	r30
 	mr	r31,r4
 	mfmsr	r9
@@ -2498,6 +2501,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	mtlr	r30
 	mr	r4,r31
 	blr
+SYM_FUNC_END(kvmppc_load_fp)
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 /*
@@ -2746,7 +2750,7 @@ kvmppc_bad_host_intr:
  *   r9 has a vcpu pointer (in)
  *   r0 is used as a scratch register
  */
-kvmppc_msr_interrupt:
+SYM_FUNC_START_LOCAL(kvmppc_msr_interrupt)
 	rldicl	r0, r11, 64 - MSR_TS_S_LG, 62
 	cmpwi	r0, 2 /* Check if we are in transactional state..  */
 	ld	r11, VCPU_INTR_MSR(r9)
@@ -2755,13 +2759,14 @@ kvmppc_msr_interrupt:
 	li	r0, 1
 1:	rldimi	r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
 	blr
+SYM_FUNC_END(kvmppc_msr_interrupt)
 
 /*
  * void kvmhv_load_guest_pmu(struct kvm_vcpu *vcpu)
  *
  * Load up guest PMU state.  R3 points to the vcpu struct.
  */
-kvmhv_load_guest_pmu:
+SYM_FUNC_START_LOCAL(kvmhv_load_guest_pmu)
 	mr	r4, r3
 	mflr	r0
 	li	r3, 1
@@ -2811,13 +2816,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	isync
 	mtlr	r0
 	blr
+SYM_FUNC_END(kvmhv_load_guest_pmu)
 
 /*
  * void kvmhv_load_host_pmu(void)
  *
  * Reload host PMU state saved in the PACA by kvmhv_save_host_pmu.
  */
-kvmhv_load_host_pmu:
+SYM_FUNC_START_LOCAL(kvmhv_load_host_pmu)
 	mflr	r0
 	lbz	r4, PACA_PMCINUSE(r13) /* is the host using the PMU? */
 	cmpwi	r4, 0
@@ -2859,6 +2865,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 	isync
 	mtlr	r0
 23:	blr
+SYM_FUNC_END(kvmhv_load_host_pmu)
 
 /*
  * void kvmhv_save_guest_pmu(struct kvm_vcpu *vcpu, bool pmu_in_use)
@@ -2866,7 +2873,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
  * Save guest PMU state into the vcpu struct.
  * r3 = vcpu, r4 = full save flag (PMU in use flag set in VPA)
  */
-kvmhv_save_guest_pmu:
+SYM_FUNC_START_LOCAL(kvmhv_save_guest_pmu)
 	mr	r9, r3
 	mr	r8, r4
 BEGIN_FTR_SECTION
@@ -2942,6 +2949,7 @@ BEGIN_FTR_SECTION
 	mtspr	SPRN_MMCRS, r4
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 22:	blr
+SYM_FUNC_END(kvmhv_save_guest_pmu)
 
 /*
  * This works around a hardware bug on POWER8E processors, where
-- 
GitLab


From 8d0c21b50655bfe136a76cf384495ba1f9c87224 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:42 +0530
Subject: [PATCH 1509/4122] powerpc: Curb objtool unannotated intra-function
 call warnings

objtool throws the following unannotated intra-function call warnings:
arch/powerpc/kernel/entry_64.o: warning: objtool: .text+0x4: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xe64: unannotated intra-function call
arch/powerpc/kvm/book3s_hv_rmhandlers.o: warning: objtool: .text+0xee4: unannotated intra-function call

Fix these warnings by annotating intra-function calls, using
ANNOTATE_INTRA_FUNCTION_CALL macro, to indicate that the branch targets
are valid.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-5-sv@linux.ibm.com
---
 arch/powerpc/kernel/entry_64.S          | 2 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 3e2e37e6ecabe..1bf1121e17f1c 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -14,6 +14,7 @@
  *  code, and exception/interrupt return code for PowerPC.
  */
 
+#include <linux/objtool.h>
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <asm/cache.h>
@@ -73,6 +74,7 @@ flush_branch_caches:
 
 	// Flush the link stack
 	.rept 64
+	ANNOTATE_INTRA_FUNCTION_CALL
 	bl	.+4
 	.endr
 	b	1f
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a69d36cbf43be..96b65b5301569 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -11,6 +11,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/objtool.h>
 #include <asm/ppc_asm.h>
 #include <asm/code-patching-asm.h>
 #include <asm/kvm_asm.h>
@@ -1523,12 +1524,14 @@ kvm_flush_link_stack:
 
 	/* Flush the link stack. On Power8 it's up to 32 entries in size. */
 	.rept 32
+	ANNOTATE_INTRA_FUNCTION_CALL
 	bl	.+4
 	.endr
 
 	/* And on Power9 it's up to 64. */
 BEGIN_FTR_SECTION
 	.rept 32
+	ANNOTATE_INTRA_FUNCTION_CALL
 	bl	.+4
 	.endr
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-- 
GitLab


From 07445ae1c26367928311e13f2a821ae94410da7e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 15 Nov 2022 11:16:24 +0100
Subject: [PATCH 1510/4122] gpiolib: of: change of_find_gpio() to accept device
 node

In preparation of switching all OF-based GPIO lookups to go through
of_find_gpio() let's change it to accept device node as its argument as
we do not always have access to device structure.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 7 +++----
 drivers/gpio/gpiolib-of.h | 4 ++--
 drivers/gpio/gpiolib.c    | 5 +++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 4be3c21aa7188..596b8e21700ef 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -605,7 +605,7 @@ static const of_find_gpio_quirk of_find_gpio_quirks[] = {
 	NULL
 };
 
-struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
+struct gpio_desc *of_find_gpio(struct device_node *np, const char *con_id,
 			       unsigned int idx, unsigned long *flags)
 {
 	char prop_name[32]; /* 32 is max size of property name */
@@ -623,8 +623,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 			snprintf(prop_name, sizeof(prop_name), "%s",
 				 gpio_suffixes[i]);
 
-		desc = of_get_named_gpiod_flags(dev->of_node, prop_name, idx,
-						&of_flags);
+		desc = of_get_named_gpiod_flags(np, prop_name, idx, &of_flags);
 
 		if (!gpiod_not_found(desc))
 			break;
@@ -632,7 +631,7 @@ struct gpio_desc *of_find_gpio(struct device *dev, const char *con_id,
 
 	/* Properly named GPIO was not found, try workarounds */
 	for (q = of_find_gpio_quirks; gpiod_not_found(desc) && *q; q++)
-		desc = (*q)(dev->of_node, con_id, idx, &of_flags);
+		desc = (*q)(np, con_id, idx, &of_flags);
 
 	if (IS_ERR(desc))
 		return desc;
diff --git a/drivers/gpio/gpiolib-of.h b/drivers/gpio/gpiolib-of.h
index 2c32a332ede59..bd4131ff61d36 100644
--- a/drivers/gpio/gpiolib-of.h
+++ b/drivers/gpio/gpiolib-of.h
@@ -7,7 +7,7 @@ struct gpio_chip;
 enum of_gpio_flags;
 
 #ifdef CONFIG_OF_GPIO
-struct gpio_desc *of_find_gpio(struct device *dev,
+struct gpio_desc *of_find_gpio(struct device_node *np,
 			       const char *con_id,
 			       unsigned int idx,
 			       unsigned long *lookupflags);
@@ -16,7 +16,7 @@ void of_gpiochip_remove(struct gpio_chip *gc);
 int of_gpio_get_count(struct device *dev, const char *con_id);
 void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev);
 #else
-static inline struct gpio_desc *of_find_gpio(struct device *dev,
+static inline struct gpio_desc *of_find_gpio(struct device_node *np,
 					     const char *con_id,
 					     unsigned int idx,
 					     unsigned long *lookupflags)
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 11fb7ec883e9f..a80fc8abb03f3 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4122,14 +4122,15 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 	int ret;
 	/* Maybe we have a device name, maybe not */
 	const char *devname = dev ? dev_name(dev) : "?";
-	const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
+	struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
 
 	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
 
 	/* Using device tree? */
 	if (is_of_node(fwnode)) {
 		dev_dbg(dev, "using device tree for GPIO lookup\n");
-		desc = of_find_gpio(dev, con_id, idx, &lookupflags);
+		desc = of_find_gpio(to_of_node(fwnode),
+				    con_id, idx, &lookupflags);
 	} else if (is_acpi_node(fwnode)) {
 		dev_dbg(dev, "using ACPI for GPIO lookup\n");
 		desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags);
-- 
GitLab


From 2b6bce80ae70b91134a5731d85076042ae90c300 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 11 Nov 2022 14:19:04 -0800
Subject: [PATCH 1511/4122] gpiolib: acpi: change acpi_find_gpio() to accept
 firmware node

In preparation of switching all ACPI-based GPIO lookups to go through
acpi_find_gpio() let's change it to accept device node as its argument
as we do not always have access to device structure.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 8 ++++++--
 drivers/gpio/gpiolib-acpi.h | 4 ++--
 drivers/gpio/gpiolib.c      | 3 ++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index a7d2358736fe7..61b311e4560ff 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -906,18 +906,22 @@ static bool acpi_can_fallback_to_crs(struct acpi_device *adev,
 	return con_id == NULL;
 }
 
-struct gpio_desc *acpi_find_gpio(struct device *dev,
+struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 const char *con_id,
 				 unsigned int idx,
 				 enum gpiod_flags *dflags,
 				 unsigned long *lookupflags)
 {
-	struct acpi_device *adev = ACPI_COMPANION(dev);
+	struct acpi_device *adev;
 	struct acpi_gpio_info info;
 	struct gpio_desc *desc;
 	char propname[32];
 	int i;
 
+	adev = to_acpi_device_node(fwnode);
+	if (!adev)
+		return ERR_PTR(-ENODEV);
+
 	/* Try first from _DSD */
 	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
 		if (con_id) {
diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h
index 1ac6816839dbc..9fc34830639ce 100644
--- a/drivers/gpio/gpiolib-acpi.h
+++ b/drivers/gpio/gpiolib-acpi.h
@@ -48,7 +48,7 @@ int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags,
 int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
 					struct acpi_gpio_info *info);
 
-struct gpio_desc *acpi_find_gpio(struct device *dev,
+struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 const char *con_id,
 				 unsigned int idx,
 				 enum gpiod_flags *dflags,
@@ -83,7 +83,7 @@ acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
 }
 
 static inline struct gpio_desc *
-acpi_find_gpio(struct device *dev, const char *con_id,
+acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id,
 	       unsigned int idx, enum gpiod_flags *dflags,
 	       unsigned long *lookupflags)
 {
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a80fc8abb03f3..e874bb0ef685e 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -4133,7 +4133,8 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 				    con_id, idx, &lookupflags);
 	} else if (is_acpi_node(fwnode)) {
 		dev_dbg(dev, "using ACPI for GPIO lookup\n");
-		desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags);
+		desc = acpi_find_gpio(fwnode,
+				      con_id, idx, &flags, &lookupflags);
 	}
 
 	/*
-- 
GitLab


From 16ba046e86e93f42117efe7ca7a7940b83c60afc Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 11 Nov 2022 14:19:05 -0800
Subject: [PATCH 1512/4122] gpiolib: acpi: teach acpi_find_gpio() to handle
 data-only nodes

In preparation of switching all ACPI-based GPIO lookups to go through
acpi_find_gpio() we need to make sure it can handle data-only ACPI
nodes, same as existing acpi_node_get_gpiod().

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 76 ++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 26 deletions(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 61b311e4560ff..bd36fac20ea0d 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -864,8 +864,9 @@ static int acpi_gpio_property_lookup(struct fwnode_handle *fwnode,
  * function only returns the first.
  */
 static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
-					  const char *propname, int index,
-					  struct acpi_gpio_info *info)
+						 const char *propname,
+						 int index,
+						 struct acpi_gpio_info *info)
 {
 	struct acpi_gpio_lookup lookup;
 	int ret;
@@ -896,6 +897,44 @@ static struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 	return ret ? ERR_PTR(ret) : lookup.desc;
 }
 
+/**
+ * acpi_get_gpiod_from_data() - get a GPIO descriptor from ACPI data node
+ * @fwnode: pointer to an ACPI firmware node to get the GPIO information from
+ * @propname: Property name of the GPIO
+ * @index: index of GpioIo/GpioInt resource (starting from %0)
+ * @info: info pointer to fill in (optional)
+ *
+ * This function uses the property-based GPIO lookup to get to the GPIO
+ * resource with the relevant information from a data-only ACPI firmware node
+ * and uses that to obtain the GPIO descriptor to return.
+ *
+ * If the GPIO cannot be translated or there is an error an ERR_PTR is
+ * returned.
+ */
+static struct gpio_desc *acpi_get_gpiod_from_data(struct fwnode_handle *fwnode,
+						  const char *propname,
+						  int index,
+						  struct acpi_gpio_info *info)
+{
+	struct acpi_gpio_lookup lookup;
+	int ret;
+
+	if (!is_acpi_data_node(fwnode))
+		return ERR_PTR(-ENODEV);
+
+	if (!propname)
+		return ERR_PTR(-EINVAL);
+
+	lookup.index = index;
+
+	ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = acpi_gpio_resource_lookup(&lookup, info);
+	return ret ? ERR_PTR(ret) : lookup.desc;
+}
+
 static bool acpi_can_fallback_to_crs(struct acpi_device *adev,
 				     const char *con_id)
 {
@@ -912,16 +951,12 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 enum gpiod_flags *dflags,
 				 unsigned long *lookupflags)
 {
-	struct acpi_device *adev;
+	struct acpi_device *adev = to_acpi_device_node(fwnode);
 	struct acpi_gpio_info info;
 	struct gpio_desc *desc;
 	char propname[32];
 	int i;
 
-	adev = to_acpi_device_node(fwnode);
-	if (!adev)
-		return ERR_PTR(-ENODEV);
-
 	/* Try first from _DSD */
 	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
 		if (con_id) {
@@ -932,7 +967,12 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 gpio_suffixes[i]);
 		}
 
-		desc = acpi_get_gpiod_by_index(adev, propname, idx, &info);
+		if (adev)
+			desc = acpi_get_gpiod_by_index(adev,
+						       propname, idx, &info);
+		else
+			desc = acpi_get_gpiod_from_data(fwnode,
+						        propname, idx, &info);
 		if (!IS_ERR(desc))
 			break;
 		if (PTR_ERR(desc) == -EPROBE_DEFER)
@@ -941,7 +981,7 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 
 	/* Then from plain _CRS GPIOs */
 	if (IS_ERR(desc)) {
-		if (!acpi_can_fallback_to_crs(adev, con_id))
+		if (!adev || !acpi_can_fallback_to_crs(adev, con_id))
 			return ERR_PTR(-ENOENT);
 
 		desc = acpi_get_gpiod_by_index(adev, NULL, idx, &info);
@@ -979,29 +1019,13 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
 				      const char *propname, int index,
 				      struct acpi_gpio_info *info)
 {
-	struct acpi_gpio_lookup lookup;
 	struct acpi_device *adev;
-	int ret;
 
 	adev = to_acpi_device_node(fwnode);
 	if (adev)
 		return acpi_get_gpiod_by_index(adev, propname, index, info);
 
-	if (!is_acpi_data_node(fwnode))
-		return ERR_PTR(-ENODEV);
-
-	if (!propname)
-		return ERR_PTR(-EINVAL);
-
-	memset(&lookup, 0, sizeof(lookup));
-	lookup.index = index;
-
-	ret = acpi_gpio_property_lookup(fwnode, propname, index, &lookup);
-	if (ret)
-		return ERR_PTR(ret);
-
-	ret = acpi_gpio_resource_lookup(&lookup, info);
-	return ret ? ERR_PTR(ret) : lookup.desc;
+	return acpi_get_gpiod_from_data(fwnode, propname, index, info);
 }
 
 /**
-- 
GitLab


From b7452d670fdef8974e18754342fe6f68e20c2567 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 15 Nov 2022 11:20:47 +0100
Subject: [PATCH 1513/4122] gpiolib: acpi: avoid leaking ACPI details into
 upper gpiolib layers

There is no need for the generic parts of GPIOLIB to be aware of
implementation details of ACPI-bases lookups.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c | 51 +++++++++++++++++++++++++++++++------
 drivers/gpio/gpiolib-acpi.h | 46 +++------------------------------
 drivers/gpio/gpiolib.c      |  8 ++----
 3 files changed, 48 insertions(+), 57 deletions(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index bd36fac20ea0d..1d69b707cbb18 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -89,6 +89,30 @@ struct acpi_gpio_chip {
 	struct list_head deferred_req_irqs_list_entry;
 };
 
+/**
+ * struct acpi_gpio_info - ACPI GPIO specific information
+ * @adev: reference to ACPI device which consumes GPIO resource
+ * @flags: GPIO initialization flags
+ * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
+ * @pin_config: pin bias as provided by ACPI
+ * @polarity: interrupt polarity as provided by ACPI
+ * @triggering: triggering type as provided by ACPI
+ * @wake_capable: wake capability as provided by ACPI
+ * @debounce: debounce timeout as provided by ACPI
+ * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping
+ */
+struct acpi_gpio_info {
+	struct acpi_device *adev;
+	enum gpiod_flags flags;
+	bool gpioint;
+	int pin_config;
+	int polarity;
+	int triggering;
+	bool wake_capable;
+	unsigned int debounce;
+	unsigned int quirks;
+};
+
 /*
  * For GPIO chips which call acpi_gpiochip_request_interrupts() before late_init
  * (so builtin drivers) we register the ACPI GpioInt IRQ handlers from a
@@ -670,8 +694,8 @@ __acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, enum gpiod_flags update)
 	return ret;
 }
 
-int
-acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info)
+static int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags,
+				        struct acpi_gpio_info *info)
 {
 	struct device *dev = &info->adev->dev;
 	enum gpiod_flags old = *flags;
@@ -690,8 +714,8 @@ acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *inf
 	return ret;
 }
 
-int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
-					struct acpi_gpio_info *info)
+static int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
+					       struct acpi_gpio_info *info)
 {
 	switch (info->pin_config) {
 	case ACPI_PIN_CONFIG_PULLUP:
@@ -1005,7 +1029,8 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
  * @fwnode: pointer to an ACPI firmware node to get the GPIO information from
  * @propname: Property name of the GPIO
  * @index: index of GpioIo/GpioInt resource (starting from %0)
- * @info: info pointer to fill in (optional)
+ * @lflags: bitmask of gpio_lookup_flags GPIO_* values
+ * @dflags: gpiod initialization flags
  *
  * If @fwnode is an ACPI device object, call acpi_get_gpiod_by_index() for it.
  * Otherwise (i.e. it is a data-only non-device object), use the property-based
@@ -1017,15 +1042,25 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
  */
 struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
 				      const char *propname, int index,
-				      struct acpi_gpio_info *info)
+				      unsigned long *lflags,
+				      enum gpiod_flags *dflags)
 {
+	struct acpi_gpio_info info;
 	struct acpi_device *adev;
+	struct gpio_desc *desc;
 
 	adev = to_acpi_device_node(fwnode);
 	if (adev)
-		return acpi_get_gpiod_by_index(adev, propname, index, info);
+		desc = acpi_get_gpiod_by_index(adev, propname, index, &info);
+	else
+		desc = acpi_get_gpiod_from_data(fwnode, propname, index, &info);
 
-	return acpi_get_gpiod_from_data(fwnode, propname, index, info);
+	if (!IS_ERR(desc)) {
+		acpi_gpio_update_gpiod_flags(dflags, &info);
+		acpi_gpio_update_gpiod_lookup_flags(lflags, &info);
+	}
+
+	return desc;
 }
 
 /**
diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h
index 9fc34830639ce..42adaab518c5b 100644
--- a/drivers/gpio/gpiolib-acpi.h
+++ b/drivers/gpio/gpiolib-acpi.h
@@ -10,30 +10,6 @@
 
 struct acpi_device;
 
-/**
- * struct acpi_gpio_info - ACPI GPIO specific information
- * @adev: reference to ACPI device which consumes GPIO resource
- * @flags: GPIO initialization flags
- * @gpioint: if %true this GPIO is of type GpioInt otherwise type is GpioIo
- * @pin_config: pin bias as provided by ACPI
- * @polarity: interrupt polarity as provided by ACPI
- * @triggering: triggering type as provided by ACPI
- * @wake_capable: wake capability as provided by ACPI
- * @debounce: debounce timeout as provided by ACPI
- * @quirks: Linux specific quirks as provided by struct acpi_gpio_mapping
- */
-struct acpi_gpio_info {
-	struct acpi_device *adev;
-	enum gpiod_flags flags;
-	bool gpioint;
-	int pin_config;
-	int polarity;
-	int triggering;
-	bool wake_capable;
-	unsigned int debounce;
-	unsigned int quirks;
-};
-
 #ifdef CONFIG_ACPI
 void acpi_gpiochip_add(struct gpio_chip *chip);
 void acpi_gpiochip_remove(struct gpio_chip *chip);
@@ -43,11 +19,6 @@ void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev);
 void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
 void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
 
-int acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags,
-				 struct acpi_gpio_info *info);
-int acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
-					struct acpi_gpio_info *info);
-
 struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 const char *con_id,
 				 unsigned int idx,
@@ -55,7 +26,8 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 unsigned long *lookupflags);
 struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
 				      const char *propname, int index,
-				      struct acpi_gpio_info *info);
+				      unsigned long *lflags,
+				      enum gpiod_flags *dflags);
 
 int acpi_gpio_count(struct device *dev, const char *con_id);
 #else
@@ -70,18 +42,6 @@ acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
 static inline void
 acpi_gpiochip_free_interrupts(struct gpio_chip *chip) { }
 
-static inline int
-acpi_gpio_update_gpiod_flags(enum gpiod_flags *flags, struct acpi_gpio_info *info)
-{
-	return 0;
-}
-static inline int
-acpi_gpio_update_gpiod_lookup_flags(unsigned long *lookupflags,
-				    struct acpi_gpio_info *info)
-{
-	return 0;
-}
-
 static inline struct gpio_desc *
 acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id,
 	       unsigned int idx, enum gpiod_flags *dflags,
@@ -91,7 +51,7 @@ acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id,
 }
 static inline struct gpio_desc *
 acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname,
-		    int index, struct acpi_gpio_info *info)
+		    int index, unsigned long *lflags, enum gpiod_flags *dflags)
 {
 	return ERR_PTR(-ENXIO);
 }
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e874bb0ef685e..b213f5f93ae09 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -3890,14 +3890,10 @@ static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
 					      label);
 		return desc;
 	} else if (is_acpi_node(fwnode)) {
-		struct acpi_gpio_info info;
-
-		desc = acpi_node_get_gpiod(fwnode, propname, index, &info);
+		desc = acpi_node_get_gpiod(fwnode, propname, index,
+					   &lflags, &dflags);
 		if (IS_ERR(desc))
 			return desc;
-
-		acpi_gpio_update_gpiod_flags(&dflags, &info);
-		acpi_gpio_update_gpiod_lookup_flags(&lflags, &info);
 	} else {
 		return ERR_PTR(-EINVAL);
 	}
-- 
GitLab


From 8eb1f71e7acca4f92cf9cf83030cbb8ec2524025 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 11 Nov 2022 14:19:07 -0800
Subject: [PATCH 1514/4122] gpiolib: consolidate GPIO lookups

Ensure that all paths to obtain/look up GPIOD from generic
consumer-visible APIs go through the new gpiod_find_and_request()
helper, so that we can easily extend it with support for new firmware
mechanisms.

The only exception is OF-specific [devm_]gpiod_get_from_of_node() API
that is still being used by a couple of drivers and will be removed as
soon as patches converting them to use generic fwnode/device APIs are
accepted.

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-acpi.c |  39 -------
 drivers/gpio/gpiolib-acpi.h |  10 --
 drivers/gpio/gpiolib.c      | 204 ++++++++++++++----------------------
 3 files changed, 76 insertions(+), 177 deletions(-)

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 1d69b707cbb18..c99c94e5483f6 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -1024,45 +1024,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 	return desc;
 }
 
-/**
- * acpi_node_get_gpiod() - get a GPIO descriptor from ACPI resources
- * @fwnode: pointer to an ACPI firmware node to get the GPIO information from
- * @propname: Property name of the GPIO
- * @index: index of GpioIo/GpioInt resource (starting from %0)
- * @lflags: bitmask of gpio_lookup_flags GPIO_* values
- * @dflags: gpiod initialization flags
- *
- * If @fwnode is an ACPI device object, call acpi_get_gpiod_by_index() for it.
- * Otherwise (i.e. it is a data-only non-device object), use the property-based
- * GPIO lookup to get to the GPIO resource with the relevant information and use
- * that to obtain the GPIO descriptor to return.
- *
- * If the GPIO cannot be translated or there is an error an ERR_PTR is
- * returned.
- */
-struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
-				      const char *propname, int index,
-				      unsigned long *lflags,
-				      enum gpiod_flags *dflags)
-{
-	struct acpi_gpio_info info;
-	struct acpi_device *adev;
-	struct gpio_desc *desc;
-
-	adev = to_acpi_device_node(fwnode);
-	if (adev)
-		desc = acpi_get_gpiod_by_index(adev, propname, index, &info);
-	else
-		desc = acpi_get_gpiod_from_data(fwnode, propname, index, &info);
-
-	if (!IS_ERR(desc)) {
-		acpi_gpio_update_gpiod_flags(dflags, &info);
-		acpi_gpio_update_gpiod_lookup_flags(lflags, &info);
-	}
-
-	return desc;
-}
-
 /**
  * acpi_dev_gpio_irq_wake_get_by() - Find GpioInt and translate it to Linux IRQ number
  * @adev: pointer to a ACPI device to get IRQ from
diff --git a/drivers/gpio/gpiolib-acpi.h b/drivers/gpio/gpiolib-acpi.h
index 42adaab518c5b..d2b5dab5c5bff 100644
--- a/drivers/gpio/gpiolib-acpi.h
+++ b/drivers/gpio/gpiolib-acpi.h
@@ -24,10 +24,6 @@ struct gpio_desc *acpi_find_gpio(struct fwnode_handle *fwnode,
 				 unsigned int idx,
 				 enum gpiod_flags *dflags,
 				 unsigned long *lookupflags);
-struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
-				      const char *propname, int index,
-				      unsigned long *lflags,
-				      enum gpiod_flags *dflags);
 
 int acpi_gpio_count(struct device *dev, const char *con_id);
 #else
@@ -49,12 +45,6 @@ acpi_find_gpio(struct fwnode_handle *fwnode, const char *con_id,
 {
 	return ERR_PTR(-ENOENT);
 }
-static inline struct gpio_desc *
-acpi_node_get_gpiod(struct fwnode_handle *fwnode, const char *propname,
-		    int index, unsigned long *lflags, enum gpiod_flags *dflags)
-{
-	return ERR_PTR(-ENXIO);
-}
 static inline int acpi_gpio_count(struct device *dev, const char *con_id)
 {
 	return -ENODEV;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index b213f5f93ae09..7f739096c4cf7 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -366,7 +366,7 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 {
 	struct gpio_device *gdev = chip->gpiodev;
-	struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
+	const struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
 	const char **names;
 	int ret, i;
 	int count;
@@ -3853,58 +3853,84 @@ static int platform_gpio_count(struct device *dev, const char *con_id)
 	return count;
 }
 
-/**
- * fwnode_get_named_gpiod - obtain a GPIO from firmware node
- * @fwnode:	handle of the firmware node
- * @propname:	name of the firmware property representing the GPIO
- * @index:	index of the GPIO to obtain for the consumer
- * @dflags:	GPIO initialization flags
- * @label:	label to attach to the requested GPIO
- *
- * This function can be used for drivers that get their configuration
- * from opaque firmware.
- *
- * The function properly finds the corresponding GPIO using whatever is the
- * underlying firmware interface and then makes sure that the GPIO
- * descriptor is requested before it is returned to the caller.
- *
- * Returns:
- * On successful request the GPIO pin is configured in accordance with
- * provided @dflags.
- *
- * In case of error an ERR_PTR() is returned.
- */
-static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
-						const char *propname, int index,
-						enum gpiod_flags dflags,
-						const char *label)
+static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
+					      struct device *consumer,
+					      const char *con_id,
+					      unsigned int idx,
+					      enum gpiod_flags *flags,
+					      unsigned long *lookupflags)
 {
-	unsigned long lflags = GPIO_LOOKUP_FLAGS_DEFAULT;
-	struct gpio_desc *desc = ERR_PTR(-ENODEV);
-	int ret;
+	struct gpio_desc *desc = ERR_PTR(-ENOENT);
 
 	if (is_of_node(fwnode)) {
-		desc = gpiod_get_from_of_node(to_of_node(fwnode),
-					      propname, index,
-					      dflags,
-					      label);
-		return desc;
+		dev_dbg(consumer, "using DT '%pfw' for '%s' GPIO lookup\n",
+			fwnode, con_id);
+		desc = of_find_gpio(to_of_node(fwnode), con_id, idx, lookupflags);
 	} else if (is_acpi_node(fwnode)) {
-		desc = acpi_node_get_gpiod(fwnode, propname, index,
-					   &lflags, &dflags);
-		if (IS_ERR(desc))
-			return desc;
-	} else {
-		return ERR_PTR(-EINVAL);
+		dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n",
+			fwnode, con_id);
+		desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
 	}
 
-	/* Currently only ACPI takes this path */
+	return desc;
+}
+
+static struct gpio_desc *gpiod_find_and_request(struct device *consumer,
+						struct fwnode_handle *fwnode,
+						const char *con_id,
+						unsigned int idx,
+						enum gpiod_flags flags,
+						const char *label,
+						bool platform_lookup_allowed)
+{
+	struct gpio_desc *desc = ERR_PTR(-ENOENT);
+	unsigned long lookupflags;
+	int ret;
+
+	if (!IS_ERR_OR_NULL(fwnode))
+		desc = gpiod_find_by_fwnode(fwnode, consumer, con_id, idx,
+					    &flags, &lookupflags);
+
+	if (gpiod_not_found(desc) && platform_lookup_allowed) {
+		/*
+		 * Either we are not using DT or ACPI, or their lookup did not
+		 * return a result. In that case, use platform lookup as a
+		 * fallback.
+		 */
+		dev_dbg(consumer, "using lookup tables for GPIO lookup\n");
+		desc = gpiod_find(consumer, con_id, idx, &lookupflags);
+	}
+
+	if (IS_ERR(desc)) {
+		dev_dbg(consumer, "No GPIO consumer %s found\n", con_id);
+		return desc;
+	}
+
+	/*
+	 * If a connection label was passed use that, else attempt to use
+	 * the device name as label
+	 */
 	ret = gpiod_request(desc, label);
-	if (ret)
-		return ERR_PTR(ret);
+	if (ret) {
+		if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
+			return ERR_PTR(ret);
+
+		/*
+		 * This happens when there are several consumers for
+		 * the same GPIO line: we just return here without
+		 * further initialization. It is a bit of a hack.
+		 * This is necessary to support fixed regulators.
+		 *
+		 * FIXME: Make this more sane and safe.
+		 */
+		dev_info(consumer,
+			 "nonexclusive access to GPIO for %s\n", con_id);
+		return desc;
+	}
 
-	ret = gpiod_configure_flags(desc, propname, lflags, dflags);
+	ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
 	if (ret < 0) {
+		dev_dbg(consumer, "setup of GPIO %s failed\n", con_id);
 		gpiod_put(desc);
 		return ERR_PTR(ret);
 	}
@@ -3937,29 +3963,12 @@ static struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
  * In case of error an ERR_PTR() is returned.
  */
 struct gpio_desc *fwnode_gpiod_get_index(struct fwnode_handle *fwnode,
-					 const char *con_id, int index,
+					 const char *con_id,
+					 int index,
 					 enum gpiod_flags flags,
 					 const char *label)
 {
-	struct gpio_desc *desc;
-	char prop_name[32]; /* 32 is max size of property name */
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(gpio_suffixes); i++) {
-		if (con_id)
-			snprintf(prop_name, sizeof(prop_name), "%s-%s",
-					    con_id, gpio_suffixes[i]);
-		else
-			snprintf(prop_name, sizeof(prop_name), "%s",
-					    gpio_suffixes[i]);
-
-		desc = fwnode_get_named_gpiod(fwnode, prop_name, index, flags,
-					      label);
-		if (!gpiod_not_found(desc))
-			break;
-	}
-
-	return desc;
+	return gpiod_find_and_request(NULL, fwnode, con_id, index, flags, label, false);
 }
 EXPORT_SYMBOL_GPL(fwnode_gpiod_get_index);
 
@@ -4113,72 +4122,11 @@ struct gpio_desc *__must_check gpiod_get_index(struct device *dev,
 					       unsigned int idx,
 					       enum gpiod_flags flags)
 {
-	unsigned long lookupflags = GPIO_LOOKUP_FLAGS_DEFAULT;
-	struct gpio_desc *desc = NULL;
-	int ret;
-	/* Maybe we have a device name, maybe not */
-	const char *devname = dev ? dev_name(dev) : "?";
 	struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
+	const char *devname = dev ? dev_name(dev) : "?";
+	const char *label = con_id ?: devname;
 
-	dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
-
-	/* Using device tree? */
-	if (is_of_node(fwnode)) {
-		dev_dbg(dev, "using device tree for GPIO lookup\n");
-		desc = of_find_gpio(to_of_node(fwnode),
-				    con_id, idx, &lookupflags);
-	} else if (is_acpi_node(fwnode)) {
-		dev_dbg(dev, "using ACPI for GPIO lookup\n");
-		desc = acpi_find_gpio(fwnode,
-				      con_id, idx, &flags, &lookupflags);
-	}
-
-	/*
-	 * Either we are not using DT or ACPI, or their lookup did not return
-	 * a result. In that case, use platform lookup as a fallback.
-	 */
-	if (!desc || gpiod_not_found(desc)) {
-		dev_dbg(dev, "using lookup tables for GPIO lookup\n");
-		desc = gpiod_find(dev, con_id, idx, &lookupflags);
-	}
-
-	if (IS_ERR(desc)) {
-		dev_dbg(dev, "No GPIO consumer %s found\n", con_id);
-		return desc;
-	}
-
-	/*
-	 * If a connection label was passed use that, else attempt to use
-	 * the device name as label
-	 */
-	ret = gpiod_request(desc, con_id ?: devname);
-	if (ret) {
-		if (!(ret == -EBUSY && flags & GPIOD_FLAGS_BIT_NONEXCLUSIVE))
-			return ERR_PTR(ret);
-
-		/*
-		 * This happens when there are several consumers for
-		 * the same GPIO line: we just return here without
-		 * further initialization. It is a bit of a hack.
-		 * This is necessary to support fixed regulators.
-		 *
-		 * FIXME: Make this more sane and safe.
-		 */
-		dev_info(dev, "nonexclusive access to GPIO for %s\n", con_id ?: devname);
-		return desc;
-	}
-
-	ret = gpiod_configure_flags(desc, con_id, lookupflags, flags);
-	if (ret < 0) {
-		dev_dbg(dev, "setup of GPIO %s failed\n", con_id);
-		gpiod_put(desc);
-		return ERR_PTR(ret);
-	}
-
-	blocking_notifier_call_chain(&desc->gdev->notifier,
-				     GPIOLINE_CHANGED_REQUESTED, desc);
-
-	return desc;
+	return gpiod_find_and_request(dev, fwnode, con_id, idx, flags, label, true);
 }
 EXPORT_SYMBOL_GPL(gpiod_get_index);
 
-- 
GitLab


From e7f9ff5dc90c3826231343439c35c6b7e9e57378 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 11 Nov 2022 14:19:08 -0800
Subject: [PATCH 1515/4122] gpiolib: add support for software nodes

Now that static device properties understand notion of child nodes and
references, let's teach gpiolib to handle them:

- GPIOs are represented as a references to software nodes representing
  gpiochip
- references must have 2 arguments - GPIO number within the chip and
  GPIO flags (GPIO_ACTIVE_LOW/GPIO_ACTIVE_HIGH, etc)
- a new PROPERTY_ENTRY_GPIO() macro is supplied to ensure the above
- name of the software node representing gpiochip must match label of
  the gpiochip, as we use it to locate gpiochip structure at runtime

The following illustrates use of software nodes to describe a "System"
button that is currently specified via use of gpio_keys_platform_data
in arch/mips/alchemy/board-mtx1.c. It follows bindings specified in
Documentation/devicetree/bindings/input/gpio-keys.yaml.

static const struct software_node mxt1_gpiochip2_node = {
	.name = "alchemy-gpio2",
};

static const struct property_entry mtx1_gpio_button_props[] = {
	PROPERTY_ENTRY_U32("linux,code", BTN_0),
	PROPERTY_ENTRY_STRING("label", "System button"),
	PROPERTY_ENTRY_GPIO("gpios", &mxt1_gpiochip2_node, 7, GPIO_ACTIVE_LOW),
	{ }
};

Similarly, arch/arm/mach-tegra/board-paz00.c can be converted to:

static const struct software_node tegra_gpiochip_node = {
	.name = "tegra-gpio",
};

static struct property_entry wifi_rfkill_prop[] __initdata = {
	PROPERTY_ENTRY_STRING("name", "wifi_rfkill"),
	PROPERTY_ENTRY_STRING("type", "wlan"),
	PROPERTY_ENTRY_GPIO("reset-gpios",
			    &tegra_gpiochip_node, 25, GPIO_ACTIVE_HIGH);
	PROPERTY_ENTRY_GPIO("shutdown-gpios",
			    &tegra_gpiochip_node, 85, GPIO_ACTIVE_HIGH);
	{ },
};

static struct platform_device wifi_rfkill_device = {
	.name	= "rfkill_gpio",
	.id	= -1,
};

...

	software_node_register(&tegra_gpiochip_node);
	device_create_managed_software_node(&wifi_rfkill_device.dev,
					    wifi_rfkill_prop, NULL);

Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/Makefile         |   1 +
 drivers/gpio/gpiolib-swnode.c | 123 ++++++++++++++++++++++++++++++++++
 drivers/gpio/gpiolib-swnode.h |  14 ++++
 drivers/gpio/gpiolib.c        |   7 ++
 include/linux/gpio/property.h |  11 +++
 5 files changed, 156 insertions(+)
 create mode 100644 drivers/gpio/gpiolib-swnode.c
 create mode 100644 drivers/gpio/gpiolib-swnode.h
 create mode 100644 include/linux/gpio/property.h

diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 8629e9eaf79e1..010587025fc87 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_OF_GPIO)		+= gpiolib-of.o
 obj-$(CONFIG_GPIO_CDEV)		+= gpiolib-cdev.o
 obj-$(CONFIG_GPIO_SYSFS)	+= gpiolib-sysfs.o
 obj-$(CONFIG_GPIO_ACPI)		+= gpiolib-acpi.o
+obj-$(CONFIG_GPIOLIB)		+= gpiolib-swnode.o
 
 # Device drivers. Generally keep list sorted alphabetically
 obj-$(CONFIG_GPIO_REGMAP)	+= gpio-regmap.o
diff --git a/drivers/gpio/gpiolib-swnode.c b/drivers/gpio/gpiolib-swnode.c
new file mode 100644
index 0000000000000..dd9ccac214d19
--- /dev/null
+++ b/drivers/gpio/gpiolib-swnode.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Software Node helpers for the GPIO API
+ *
+ * Copyright 2022 Google LLC
+ */
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/gpio/consumer.h>
+#include <linux/gpio/driver.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/property.h>
+#include <linux/string.h>
+
+#include "gpiolib.h"
+#include "gpiolib-swnode.h"
+
+static void swnode_format_propname(const char *con_id, char *propname,
+				   size_t max_size)
+{
+	/*
+	 * Note we do not need to try both -gpios and -gpio suffixes,
+	 * as, unlike OF and ACPI, we can fix software nodes to conform
+	 * to the proper binding.
+	 */
+	if (con_id)
+		snprintf(propname, max_size, "%s-gpios", con_id);
+	else
+		strscpy(propname, "gpios", max_size);
+}
+
+static int swnode_gpiochip_match_name(struct gpio_chip *chip, void *data)
+{
+	return !strcmp(chip->label, data);
+}
+
+static struct gpio_chip *swnode_get_chip(struct fwnode_handle *fwnode)
+{
+	const struct software_node *chip_node;
+	struct gpio_chip *chip;
+
+	chip_node = to_software_node(fwnode);
+	if (!chip_node || !chip_node->name)
+		return ERR_PTR(-EINVAL);
+
+	chip = gpiochip_find((void *)chip_node->name, swnode_gpiochip_match_name);
+	return chip ?: ERR_PTR(-EPROBE_DEFER);
+}
+
+struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode,
+				   const char *con_id, unsigned int idx,
+				   unsigned long *flags)
+{
+	const struct software_node *swnode;
+	struct fwnode_reference_args args;
+	struct gpio_chip *chip;
+	struct gpio_desc *desc;
+	char propname[32]; /* 32 is max size of property name */
+	int error;
+
+	swnode = to_software_node(fwnode);
+	if (!swnode)
+		return ERR_PTR(-EINVAL);
+
+	swnode_format_propname(con_id, propname, sizeof(propname));
+
+	/*
+	 * We expect all swnode-described GPIOs have GPIO number and
+	 * polarity arguments, hence nargs is set to 2.
+	 */
+	error = fwnode_property_get_reference_args(fwnode, propname, NULL, 2, idx, &args);
+	if (error) {
+		pr_debug("%s: can't parse '%s' property of node '%pfwP[%d]'\n",
+			__func__, propname, fwnode, idx);
+		return ERR_PTR(error);
+	}
+
+	chip = swnode_get_chip(args.fwnode);
+	fwnode_handle_put(args.fwnode);
+	if (IS_ERR(chip))
+		return ERR_CAST(chip);
+
+	desc = gpiochip_get_desc(chip, args.args[0]);
+	*flags = args.args[1]; /* We expect native GPIO flags */
+
+	pr_debug("%s: parsed '%s' property of node '%pfwP[%d]' - status (%d)\n",
+		 __func__, propname, fwnode, idx, PTR_ERR_OR_ZERO(desc));
+
+	return desc;
+}
+
+/**
+ * swnode_gpio_count - count the GPIOs associated with a device / function
+ * @fwnode:	firmware node of the GPIO consumer, can be %NULL for
+ *		system-global GPIOs
+ * @con_id:	function within the GPIO consumer
+ *
+ * Return:
+ * The number of GPIOs associated with a device / function or %-ENOENT,
+ * if no GPIO has been assigned to the requested function.
+ */
+int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id)
+{
+	struct fwnode_reference_args args;
+	char propname[32];
+	int count;
+
+	swnode_format_propname(con_id, propname, sizeof(propname));
+
+	/*
+	 * This is not very efficient, but GPIO lists usually have only
+	 * 1 or 2 entries.
+	 */
+	count = 0;
+	while (fwnode_property_get_reference_args(fwnode, propname, NULL, 0,
+						  count, &args) == 0) {
+		fwnode_handle_put(args.fwnode);
+		count++;
+	}
+
+	return count ?: -ENOENT;
+}
diff --git a/drivers/gpio/gpiolib-swnode.h b/drivers/gpio/gpiolib-swnode.h
new file mode 100644
index 0000000000000..af849e56f6bce
--- /dev/null
+++ b/drivers/gpio/gpiolib-swnode.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef GPIOLIB_SWNODE_H
+#define GPIOLIB_SWNODE_H
+
+struct fwnode_handle;
+struct gpio_desc;
+
+struct gpio_desc *swnode_find_gpio(struct fwnode_handle *fwnode,
+				   const char *con_id, unsigned int idx,
+				   unsigned long *flags);
+int swnode_gpio_count(const struct fwnode_handle *fwnode, const char *con_id);
+
+#endif /* GPIOLIB_SWNODE_H */
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7f739096c4cf7..7936d54a2e302 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -26,6 +26,7 @@
 #include "gpiolib.h"
 #include "gpiolib-of.h"
 #include "gpiolib-acpi.h"
+#include "gpiolib-swnode.h"
 #include "gpiolib-cdev.h"
 #include "gpiolib-sysfs.h"
 
@@ -3870,6 +3871,10 @@ static struct gpio_desc *gpiod_find_by_fwnode(struct fwnode_handle *fwnode,
 		dev_dbg(consumer, "using ACPI '%pfw' for '%s' GPIO lookup\n",
 			fwnode, con_id);
 		desc = acpi_find_gpio(fwnode, con_id, idx, flags, lookupflags);
+	} else if (is_software_node(fwnode)) {
+		dev_dbg(consumer, "using swnode '%pfw' for '%s' GPIO lookup\n",
+			fwnode, con_id);
+		desc = swnode_find_gpio(fwnode, con_id, idx, lookupflags);
 	}
 
 	return desc;
@@ -3987,6 +3992,8 @@ int gpiod_count(struct device *dev, const char *con_id)
 		count = of_gpio_get_count(dev, con_id);
 	else if (is_acpi_node(fwnode))
 		count = acpi_gpio_count(dev, con_id);
+	else if (is_software_node(fwnode))
+		count = swnode_gpio_count(fwnode, con_id);
 
 	if (count < 0)
 		count = platform_gpio_count(dev, con_id);
diff --git a/include/linux/gpio/property.h b/include/linux/gpio/property.h
new file mode 100644
index 0000000000000..6c75c8bd44a0b
--- /dev/null
+++ b/include/linux/gpio/property.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0+
+#ifndef __LINUX_GPIO_PROPERTY_H
+#define __LINUX_GPIO_PROPERTY_H
+
+#include <dt-bindings/gpio/gpio.h> /* for GPIO_* flags */
+#include <linux/property.h>
+
+#define PROPERTY_ENTRY_GPIO(_name_, _chip_node_, _idx_, _flags_) \
+	PROPERTY_ENTRY_REF(_name_, _chip_node_, _idx_, _flags_)
+
+#endif /* __LINUX_GPIO_PROPERTY_H */
-- 
GitLab


From 77289b2f5aa3535a2e49b448c6afb36f5526016a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 14 Nov 2022 20:46:25 +0200
Subject: [PATCH 1516/4122] gpiolib: of: Prepare of_mm_gpiochip_add_data() for
 fwnode

GPIO library is getting rid of of_node, fwnode should be utilized instead.
Prepare of_mm_gpiochip_add_data() for fwnode.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index 596b8e21700ef..c9b0c9fdeca8a 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -934,8 +934,8 @@ int of_mm_gpiochip_add_data(struct device_node *np,
 	if (mm_gc->save_regs)
 		mm_gc->save_regs(mm_gc);
 
-	of_node_put(mm_gc->gc.of_node);
-	mm_gc->gc.of_node = of_node_get(np);
+	fwnode_handle_put(mm_gc->gc.fwnode);
+	mm_gc->gc.fwnode = fwnode_handle_get(of_fwnode_handle(np));
 
 	ret = gpiochip_add_data(gc, data);
 	if (ret)
-- 
GitLab


From a431803852de00d8d3c143b19f5690254225538f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 14 Nov 2022 20:46:26 +0200
Subject: [PATCH 1517/4122] gpiolib: of: Drop redundant check in
 of_mm_gpiochip_remove()

The callers never call the function with invalid pointer.
Moreover, compiler quite likely dropped that check anyway
because we use that pointer before the check.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index c9b0c9fdeca8a..a6871859a59de 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -961,9 +961,6 @@ void of_mm_gpiochip_remove(struct of_mm_gpio_chip *mm_gc)
 {
 	struct gpio_chip *gc = &mm_gc->gc;
 
-	if (!mm_gc)
-		return;
-
 	gpiochip_remove(gc);
 	iounmap(mm_gc->regs);
 	kfree(gc->label);
-- 
GitLab


From f87f6e5b4539639460ab105e597e5190c9b2500f Mon Sep 17 00:00:00 2001
From: Chen Lin <chen45464546@163.com>
Date: Fri, 4 Nov 2022 06:21:21 +0800
Subject: [PATCH 1518/4122] iommu/arm-smmu: Warn once when the perfetcher
 errata patch fails to apply

Default reset value of secure banked register SMMU_sACR.cache_lock is 1.
If it is not been set to 0 by secure software(eg: atf), the non-secure
linux cannot clear ARM_MMU500_ACTLR_CPRE bit. In this situation,
the prefetcher errata is not applied successfully, warn once.

Signed-off-by: Chen Lin <chen45464546@163.com>
Link: https://lore.kernel.org/r/20221103222121.3051-1-chen45464546@163.com
[will: Tweaked wording of diagnostic]
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-impl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
index 658f3cc832781..9dc772f2cbb27 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-impl.c
@@ -136,6 +136,9 @@ int arm_mmu500_reset(struct arm_smmu_device *smmu)
 		reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
 		reg &= ~ARM_MMU500_ACTLR_CPRE;
 		arm_smmu_cb_write(smmu, i, ARM_SMMU_CB_ACTLR, reg);
+		reg = arm_smmu_cb_read(smmu, i, ARM_SMMU_CB_ACTLR);
+		if (reg & ARM_MMU500_ACTLR_CPRE)
+			dev_warn_once(smmu->dev, "Failed to disable prefetcher [errata #841119 and #826419], check ACR.CACHE_LOCK\n");
 	}
 
 	return 0;
-- 
GitLab


From e5f4afbe395f1248e7501d470118a2a947fe87e6 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:14 -0800
Subject: [PATCH 1519/4122] perf pmu: Remove mostly unused 'struct perf_pmu'
 'is_hybrid' member

Replace usage with perf_pmu__is_hybrid().

Suggested-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c |  5 +----
 tools/perf/util/evsel.h |  2 +-
 tools/perf/util/pmu.c   |  3 +--
 tools/perf/util/pmu.h   |  1 -
 tools/perf/util/stat.c  | 11 +++--------
 5 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index cdde5b5f8ad2b..ca6abb64c91d2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -3124,11 +3124,8 @@ void evsel__zero_per_pkg(struct evsel *evsel)
 	}
 }
 
-bool evsel__is_hybrid(struct evsel *evsel)
+bool evsel__is_hybrid(const struct evsel *evsel)
 {
-	if (evsel->pmu)
-		return evsel->pmu->is_hybrid;
-
 	return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name);
 }
 
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 989865e16aadd..467bb0b32fef8 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -498,7 +498,7 @@ struct perf_env *evsel__env(struct evsel *evsel);
 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
 
 void evsel__zero_per_pkg(struct evsel *evsel);
-bool evsel__is_hybrid(struct evsel *evsel);
+bool evsel__is_hybrid(const struct evsel *evsel);
 struct evsel *evsel__leader(struct evsel *evsel);
 bool evsel__has_leader(struct evsel *evsel, struct evsel *leader);
 bool evsel__is_leader(struct evsel *evsel);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 6a86e6af0903c..48e7be6f3baa1 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -980,7 +980,6 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
 	pmu->is_uncore = pmu_is_uncore(name);
 	if (pmu->is_uncore)
 		pmu->id = pmu_id(name);
-	pmu->is_hybrid = is_hybrid;
 	pmu->max_precise = pmu_max_precise(name);
 	pmu_add_cpu_aliases(&aliases, pmu);
 	pmu_add_sys_aliases(&aliases, pmu);
@@ -992,7 +991,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name)
 	list_splice(&aliases, &pmu->aliases);
 	list_add_tail(&pmu->list, &pmus);
 
-	if (pmu->is_hybrid)
+	if (is_hybrid)
 		list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus);
 
 	pmu->default_config = perf_pmu__get_default_config(pmu);
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 68e15c38ae710..0d556d02ce525 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -40,7 +40,6 @@ struct perf_pmu {
 	__u32 type;
 	bool selectable;
 	bool is_uncore;
-	bool is_hybrid;
 	bool auxtrace;
 	int max_precise;
 	struct perf_event_attr *default_config;
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 3a432a949d469..acf0edf5fdd1a 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -604,15 +604,10 @@ static void evsel__merge_aliases(struct evsel *evsel)
 	}
 }
 
-static bool evsel__should_merge_hybrid(struct evsel *evsel, struct perf_stat_config *config)
+static bool evsel__should_merge_hybrid(const struct evsel *evsel,
+				       const struct perf_stat_config *config)
 {
-	struct perf_pmu *pmu;
-
-	if (!config->hybrid_merge)
-		return false;
-
-	pmu = evsel__find_pmu(evsel);
-	return pmu && pmu->is_hybrid;
+	return config->hybrid_merge && evsel__is_hybrid(evsel);
 }
 
 static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config)
-- 
GitLab


From fe13d43d07393f46957c2fc09f09f097f7969a5d Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:15 -0800
Subject: [PATCH 1520/4122] perf pmu: Add data structure documentation

Add documentation to 'struct perf_pmu' and the associated structs of
'perf_pmu_alias' and 'perf_pmu_format'.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c |  16 ++++++
 tools/perf/util/pmu.h | 122 +++++++++++++++++++++++++++++++++++++++---
 2 files changed, 132 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 48e7be6f3baa1..057e1528c32fe 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -31,10 +31,26 @@
 
 struct perf_pmu perf_pmu__fake;
 
+/**
+ * struct perf_pmu_format - Values from a format file read from
+ * <sysfs>/devices/cpu/format/ held in struct perf_pmu.
+ *
+ * For example, the contents of <sysfs>/devices/cpu/format/event may be
+ * "config:0-7" and will be represented here as name="event",
+ * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set.
+ */
 struct perf_pmu_format {
+	/** @name: The modifier/file name. */
 	char *name;
+	/**
+	 * @value : Which config value the format relates to. Supported values
+	 * are from PERF_PMU_FORMAT_VALUE_CONFIG to
+	 * PERF_PMU_FORMAT_VALUE_CONFIG_END.
+	 */
 	int value;
+	/** @bits: Which config bits are set by this format value. */
 	DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS);
+	/** @list: Element on list within struct perf_pmu. */
 	struct list_head list;
 };
 
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 0d556d02ce525..ee02e1ef91878 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -33,30 +33,101 @@ struct perf_pmu_caps {
 	struct list_head list;
 };
 
+/**
+ * struct perf_pmu - hi
+ */
 struct perf_pmu {
+	/** @name: The name of the PMU such as "cpu". */
 	char *name;
+	/**
+	 * @alias_name: Optional alternate name for the PMU determined in
+	 * architecture specific code.
+	 */
 	char *alias_name;
+	/**
+	 * @id: Optional PMU identifier read from
+	 * <sysfs>/bus/event_source/devices/<name>/identifier.
+	 */
 	char *id;
+	/**
+	 * @type: Perf event attributed type value, read from
+	 * <sysfs>/bus/event_source/devices/<name>/type.
+	 */
 	__u32 type;
+	/**
+	 * @selectable: Can the PMU name be selected as if it were an event?
+	 */
 	bool selectable;
+	/**
+	 * @is_uncore: Is the PMU not within the CPU core? Determined by the
+	 * presence of <sysfs>/bus/event_source/devices/<name>/cpumask.
+	 */
 	bool is_uncore;
+	/**
+	 * @auxtrace: Are events auxiliary events? Determined in architecture
+	 * specific code.
+	 */
 	bool auxtrace;
+	/**
+	 * @max_precise: Number of levels of :ppp precision supported by the
+	 * PMU, read from
+	 * <sysfs>/bus/event_source/devices/<name>/caps/max_precise.
+	 */
 	int max_precise;
+	/**
+	 * @default_config: Optional default perf_event_attr determined in
+	 * architecture specific code.
+	 */
 	struct perf_event_attr *default_config;
+	/**
+	 * @cpus: Empty or the contents of either of:
+	 * <sysfs>/bus/event_source/devices/<name>/cpumask.
+	 * <sysfs>/bus/event_source/devices/<cpu>/cpus.
+	 */
 	struct perf_cpu_map *cpus;
-	struct list_head format;  /* HEAD struct perf_pmu_format -> list */
-	struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */
+	/**
+	 * @format: Holds the contents of files read from
+	 * <sysfs>/bus/event_source/devices/<name>/format/. The contents specify
+	 * which event parameter changes what config, config1 or config2 bits.
+	 */
+	struct list_head format;
+	/**
+	 * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an
+	 * event read from <sysfs>/bus/event_source/devices/<name>/events/ or
+	 * from json events in pmu-events.c.
+	 */
+	struct list_head aliases;
+	/** @caps_initialized: Has the list caps been initialized? */
 	bool caps_initialized;
+	/** @nr_caps: The length of the list caps. */
 	u32 nr_caps;
-	struct list_head caps;    /* HEAD struct perf_pmu_caps -> list */
-	struct list_head list;    /* ELEM */
+	/**
+	 * @caps: Holds the contents of files read from
+	 * <sysfs>/bus/event_source/devices/<name>/caps/.
+	 *
+	 * The contents are pairs of the filename with the value of its
+	 * contents, for example, max_precise (see above) may have a value of 3.
+	 */
+	struct list_head caps;
+	/** @list: Element on pmus list in pmu.c. */
+	struct list_head list;
+	/** @hybrid_list: Element on perf_pmu__hybrid_pmus. */
 	struct list_head hybrid_list;
 
+	/**
+	 * @missing_features: Features to inhibit when events on this PMU are
+	 * opened.
+	 */
 	struct {
+		/**
+		 * @exclude_guest: Disables perf_event_attr exclude_guest and
+		 * exclude_host.
+		 */
 		bool exclude_guest;
 	} missing_features;
 };
 
+/** @perf_pmu__fake: A special global PMU used for testing. */
 extern struct perf_pmu perf_pmu__fake;
 
 struct perf_pmu_info {
@@ -70,21 +141,60 @@ struct perf_pmu_info {
 
 #define UNIT_MAX_LEN	31 /* max length for event unit name */
 
+/**
+ * struct perf_pmu_alias - An event either read from sysfs or builtin in
+ * pmu-events.c, created by parsing the pmu-events json files.
+ */
 struct perf_pmu_alias {
+	/** @name: Name of the event like "mem-loads". */
 	char *name;
+	/** @desc: Optional short description of the event. */
 	char *desc;
+	/** @long_desc: Optional long description. */
 	char *long_desc;
+	/**
+	 * @topic: Optional topic such as cache or pipeline, particularly for
+	 * json events.
+	 */
 	char *topic;
+	/**
+	 * @str: Comma separated parameter list like
+	 * "event=0xcd,umask=0x1,ldlat=0x3".
+	 */
 	char *str;
-	struct list_head terms; /* HEAD struct parse_events_term -> list */
-	struct list_head list;  /* ELEM */
+	/** @terms: Owned list of the original parsed parameters. */
+	struct list_head terms;
+	/** @list: List element of struct perf_pmu aliases. */
+	struct list_head list;
+	/** @unit: Units for the event, such as bytes or cache lines. */
 	char unit[UNIT_MAX_LEN+1];
+	/** @scale: Value to scale read counter values by. */
 	double scale;
+	/**
+	 * @per_pkg: Does the file
+	 * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or
+	 * equivalent json value exist and have the value 1.
+	 */
 	bool per_pkg;
+	/**
+	 * @snapshot: Does the file
+	 * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot
+	 * exist and have the value 1.
+	 */
 	bool snapshot;
+	/**
+	 * @deprecated: Is the event hidden and so not shown in perf list by
+	 * default.
+	 */
 	bool deprecated;
+	/**
+	 * @metric_expr: A metric expression associated with an event. Doing
+	 * this makes little sense due to scale and unit applying to both.
+	 */
 	char *metric_expr;
+	/** @metric_name: A name for the metric. unit applying to both. */
 	char *metric_name;
+	/** @pmu_name: The name copied from struct perf_pmu. */
 	char *pmu_name;
 };
 
-- 
GitLab


From 1504b6f97bad166b484d6f27dc99746fdca5f467 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:16 -0800
Subject: [PATCH 1521/4122] tools lib api fs tracing_path: Add scandir
 alphasort

tracing_events__opendir() allows iteration over files in
<debugfs>/tracing/events but with an arbitrary sort order.

Add a scandir alternative where the results are alphabetically sorted.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/fs/tracing_path.c | 16 ++++++++++++++++
 tools/lib/api/fs/tracing_path.h |  1 +
 2 files changed, 17 insertions(+)

diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c
index 5afb11b30fca4..b8e457c841aba 100644
--- a/tools/lib/api/fs/tracing_path.c
+++ b/tools/lib/api/fs/tracing_path.c
@@ -113,6 +113,22 @@ DIR *tracing_events__opendir(void)
 	return dir;
 }
 
+int tracing_events__scandir_alphasort(struct dirent ***namelist)
+{
+	char *path = get_tracing_file("events");
+	int ret;
+
+	if (!path) {
+		*namelist = NULL;
+		return 0;
+	}
+
+	ret = scandir(path, namelist, NULL, alphasort);
+	put_events_file(path);
+
+	return ret;
+}
+
 int tracing_path__strerror_open_tp(int err, char *buf, size_t size,
 				   const char *sys, const char *name)
 {
diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h
index a19136b086dce..fc6347c11deb3 100644
--- a/tools/lib/api/fs/tracing_path.h
+++ b/tools/lib/api/fs/tracing_path.h
@@ -6,6 +6,7 @@
 #include <dirent.h>
 
 DIR *tracing_events__opendir(void);
+int tracing_events__scandir_alphasort(struct dirent ***namelist);
 
 void tracing_path_set(const char *mountpoint);
 const char *tracing_path_mount(void);
-- 
GitLab


From d74060c03368063c64e63004319e2a39930c4297 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:17 -0800
Subject: [PATCH 1522/4122] perf tracepoint: Sort events in iterator

In print_tracepoint_events() use tracing_events__scandir_alphasort() and
scandir alphasort so that the subsystem and events are sorted and don't
need a secondary qsort. Locally this results in the following change:

...
   ext4:ext4_zero_range                               [Tracepoint event]
-  fib6:fib6_table_lookup                             [Tracepoint event]
   fib:fib_table_lookup                               [Tracepoint event]
+  fib6:fib6_table_lookup                             [Tracepoint event]
   filelock:break_lease_block                         [Tracepoint event]
...

ie fib6 now is after fib and not before it. This is more consistent
with how numbers are more generally sorted, such as:

...
  syscalls:sys_enter_renameat                        [Tracepoint event]
  syscalls:sys_enter_renameat2                       [Tracepoint event]
...

and so an improvement over the qsort approach.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/print-events.c | 108 +++++++++++----------------------
 1 file changed, 37 insertions(+), 71 deletions(-)

diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index 6df947df1c0fc..dae927250826a 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -66,26 +66,21 @@ static int cmp_string(const void *a, const void *b)
 void print_tracepoint_events(const char *subsys_glob,
 			     const char *event_glob, bool name_only)
 {
-	DIR *sys_dir, *evt_dir;
-	struct dirent *sys_dirent, *evt_dirent;
-	char evt_path[MAXPATHLEN];
-	char *dir_path;
-	char **evt_list = NULL;
-	unsigned int evt_i = 0, evt_num = 0;
-	bool evt_num_known = false;
-
-restart:
-	sys_dir = tracing_events__opendir();
-	if (!sys_dir)
-		return;
-
-	if (evt_num_known) {
-		evt_list = zalloc(sizeof(char *) * evt_num);
-		if (!evt_list)
-			goto out_close_sys_dir;
-	}
+	struct dirent **sys_namelist = NULL;
+	bool printed = false;
+	int sys_items = tracing_events__scandir_alphasort(&sys_namelist);
+
+	for (int i = 0; i < sys_items; i++) {
+		struct dirent *sys_dirent = sys_namelist[i];
+		struct dirent **evt_namelist = NULL;
+		char *dir_path;
+		int evt_items;
+
+		if (sys_dirent->d_type != DT_DIR ||
+		    !strcmp(sys_dirent->d_name, ".") ||
+		    !strcmp(sys_dirent->d_name, ".."))
+			continue;
 
-	for_each_subsystem(sys_dir, sys_dirent) {
 		if (subsys_glob != NULL &&
 		    !strglobmatch(sys_dirent->d_name, subsys_glob))
 			continue;
@@ -93,69 +88,40 @@ restart:
 		dir_path = get_events_file(sys_dirent->d_name);
 		if (!dir_path)
 			continue;
-		evt_dir = opendir(dir_path);
-		if (!evt_dir)
-			goto next;
 
-		for_each_event(dir_path, evt_dir, evt_dirent) {
-			if (event_glob != NULL &&
-			    !strglobmatch(evt_dirent->d_name, event_glob))
+		evt_items = scandir(dir_path, &evt_namelist, NULL, alphasort);
+		for (int j = 0; j < evt_items; j++) {
+			struct dirent *evt_dirent = evt_namelist[j];
+			char evt_path[MAXPATHLEN];
+
+			if (evt_dirent->d_type != DT_DIR ||
+			    !strcmp(evt_dirent->d_name, ".") ||
+			    !strcmp(evt_dirent->d_name, ".."))
 				continue;
 
-			if (!evt_num_known) {
-				evt_num++;
+			if (tp_event_has_id(dir_path, evt_dirent) != 0)
+				continue;
+
+			if (event_glob != NULL &&
+			    !strglobmatch(evt_dirent->d_name, event_glob))
 				continue;
-			}
 
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent->d_name, evt_dirent->d_name);
-
-			evt_list[evt_i] = strdup(evt_path);
-			if (evt_list[evt_i] == NULL) {
-				put_events_file(dir_path);
-				goto out_close_evt_dir;
+			if (name_only)
+				printf("%s ", evt_path);
+			else {
+				printf("  %-50s [%s]\n", evt_path,
+				       event_type_descriptors[PERF_TYPE_TRACEPOINT]);
 			}
-			evt_i++;
+			printed = true;
 		}
-		closedir(evt_dir);
-next:
-		put_events_file(dir_path);
+		free(dir_path);
+		free(evt_namelist);
 	}
-	closedir(sys_dir);
-
-	if (!evt_num_known) {
-		evt_num_known = true;
-		goto restart;
-	}
-	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
-	evt_i = 0;
-	while (evt_i < evt_num) {
-		if (name_only) {
-			printf("%s ", evt_list[evt_i++]);
-			continue;
-		}
-		printf("  %-50s [%s]\n", evt_list[evt_i++],
-				event_type_descriptors[PERF_TYPE_TRACEPOINT]);
-	}
-	if (evt_num && pager_in_use())
+	free(sys_namelist);
+	if (printed && pager_in_use())
 		printf("\n");
-
-out_free:
-	evt_num = evt_i;
-	for (evt_i = 0; evt_i < evt_num; evt_i++)
-		zfree(&evt_list[evt_i]);
-	zfree(&evt_list);
-	return;
-
-out_close_evt_dir:
-	closedir(evt_dir);
-out_close_sys_dir:
-	closedir(sys_dir);
-
-	printf("FATAL: not enough memory to print %s\n",
-			event_type_descriptors[PERF_TYPE_TRACEPOINT]);
-	if (evt_list)
-		goto out_free;
 }
 
 void print_sdt_events(const char *subsys_glob, const char *event_glob,
-- 
GitLab


From ca0fe62413f3f536201d788672cd6cf7c4ed52dd Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:18 -0800
Subject: [PATCH 1523/4122] perf list: Generalize limiting to a PMU name

Deprecate the --cputype option and add a --unit option where '--unit
cpu_atom' behaves like '--cputype atom'. The --unit option can be used
with arbitrary PMUs, for example:

```
$ perf list --unit msr pmu

List of pre-defined events (to be used in -e or -M):

  msr/aperf/                                         [Kernel PMU event]
  msr/cpu_thermal_margin/                            [Kernel PMU event]
  msr/mperf/                                         [Kernel PMU event]
  msr/pperf/                                         [Kernel PMU event]
  msr/smi/                                           [Kernel PMU event]
  msr/tsc/                                           [Kernel PMU event]
```

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-list.txt |  6 +++---
 tools/perf/builtin-list.c              | 18 ++++++++++++------
 tools/perf/util/metricgroup.c          |  3 ++-
 tools/perf/util/pmu.c                  |  4 +---
 4 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 57384a97c04f4..44a819af573de 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -39,9 +39,9 @@ any extra expressions computed by perf stat.
 --deprecated::
 Print deprecated events. By default the deprecated events are hidden.
 
---cputype::
-Print events applying cpu with this type for hybrid platform
-(e.g. --cputype core or --cputype atom)
+--unit::
+Print PMU events and metrics limited to the specific PMU name.
+(e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom)
 
 [[EVENT_MODIFIERS]]
 EVENT MODIFIERS
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 58e1ec1654ef4..cc84ced6da266 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -21,7 +21,6 @@
 
 static bool desc_flag = true;
 static bool details_flag;
-static const char *hybrid_type;
 
 int cmd_list(int argc, const char **argv)
 {
@@ -30,6 +29,8 @@ int cmd_list(int argc, const char **argv)
 	bool long_desc_flag = false;
 	bool deprecated = false;
 	char *pmu_name = NULL;
+	const char *hybrid_name = NULL;
+	const char *unit_name = NULL;
 	struct option list_options[] = {
 		OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
 		OPT_BOOLEAN('d', "desc", &desc_flag,
@@ -40,9 +41,10 @@ int cmd_list(int argc, const char **argv)
 			    "Print information on the perf event names and expressions used internally by events."),
 		OPT_BOOLEAN(0, "deprecated", &deprecated,
 			    "Print deprecated events."),
-		OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type",
-			   "Print events applying cpu with this type for hybrid platform "
-			   "(e.g. core or atom)"),
+		OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
+			   "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
+		OPT_STRING(0, "unit", &unit_name, "PMU name",
+			   "Limit PMU or metric printing to the specified PMU."),
 		OPT_INCR(0, "debug", &verbose,
 			     "Enable debugging output"),
 		OPT_END()
@@ -53,6 +55,8 @@ int cmd_list(int argc, const char **argv)
 	};
 
 	set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN);
+	/* Hide hybrid flag for the more generic 'unit' flag. */
+	set_option_flag(list_options, 0, "cputype", PARSE_OPT_HIDDEN);
 
 	argc = parse_options(argc, argv, list_options, list_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);
@@ -62,8 +66,10 @@ int cmd_list(int argc, const char **argv)
 	if (!raw_dump && pager_in_use())
 		printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
 
-	if (hybrid_type) {
-		pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type);
+	if (unit_name)
+		pmu_name = strdup(unit_name);
+	else if (hybrid_name) {
+		pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name);
 		if (!pmu_name)
 			pr_warning("WARNING: hybrid cputype is not supported!\n");
 	}
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 4c98ac29ee13f..1943fed9b6d92 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -556,11 +556,12 @@ static int metricgroup__print_callback(const struct pmu_event *pe,
 				       void *vdata)
 {
 	struct metricgroup_print_data *data = vdata;
+	const char *pmu = pe->pmu ?: "cpu";
 
 	if (!pe->metric_expr)
 		return 0;
 
-	if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu))
+	if (data->pmu_name && strcmp(data->pmu_name, pmu))
 		return 0;
 
 	return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter,
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 057e1528c32fe..e6790175307b4 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1695,10 +1695,8 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 	pmu = NULL;
 	j = 0;
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
-		if (pmu_name && perf_pmu__is_hybrid(pmu->name) &&
-		    strcmp(pmu_name, pmu->name)) {
+		if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name))
 			continue;
-		}
 
 		list_for_each_entry(alias, &pmu->aliases, list) {
 			char *name = alias->desc ? alias->name :
-- 
GitLab


From 3301b3fe9bdcfcb109e44e31160e830fe558faec Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:19 -0800
Subject: [PATCH 1524/4122] perf list: Simplify cache event printing

The current code computes an array of cache names then sorts and prints
them. Use a strlist to create a list of names that is sorted. Keep the
hybrid names, it is unclear how to generalize it, but drop the
computation of evt_pmus that is never used.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kang Minchul <tegongkang@gmail.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-7-irogers@google.com
[ Fixed up clash with cf9f67b36303de65 ("perf print-events: Remove redundant comparison with zero")]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/print-events.c | 130 +++++++--------------------------
 1 file changed, 27 insertions(+), 103 deletions(-)

diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index dae927250826a..ff7793944246e 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -206,135 +206,59 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
 
 int print_hwcache_events(const char *event_glob, bool name_only)
 {
-	unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0;
-	char name[64], new_name[128];
-	char **evt_list = NULL, **evt_pmus = NULL;
-	bool evt_num_known = false;
-	struct perf_pmu *pmu = NULL;
-
-	if (perf_pmu__has_hybrid()) {
-		npmus = perf_pmu__hybrid_pmu_num();
-		evt_pmus = zalloc(sizeof(char *) * npmus);
-		if (!evt_pmus)
-			goto out_enomem;
-	}
+	struct strlist *evt_name_list = strlist__new(NULL, NULL);
+	struct str_node *nd;
 
-restart:
-	if (evt_num_known) {
-		evt_list = zalloc(sizeof(char *) * evt_num);
-		if (!evt_list)
-			goto out_enomem;
+	if (!evt_name_list) {
+		pr_debug("Failed to allocate new strlist for hwcache events\n");
+		return -ENOMEM;
 	}
-
-	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
-		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
+	for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
+		for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
 			/* skip invalid cache type */
 			if (!evsel__is_cache_op_valid(type, op))
 				continue;
 
-			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
-				unsigned int hybrid_supported = 0, j;
-				bool supported;
+			for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+				struct perf_pmu *pmu = NULL;
+				char name[64];
 
 				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
 				if (event_glob != NULL && !strglobmatch(name, event_glob))
 					continue;
 
 				if (!perf_pmu__has_hybrid()) {
-					if (!is_event_supported(PERF_TYPE_HW_CACHE,
-								type | (op << 8) | (i << 16))) {
-						continue;
-					}
-				} else {
-					perf_pmu__for_each_hybrid_pmu(pmu) {
-						if (!evt_num_known) {
-							evt_num++;
-							continue;
-						}
-
-						supported = is_event_supported(
-							PERF_TYPE_HW_CACHE,
-							type | (op << 8) | (i << 16) |
-							((__u64)pmu->type << PERF_PMU_TYPE_SHIFT));
-						if (supported) {
-							snprintf(new_name, sizeof(new_name),
-								 "%s/%s/", pmu->name, name);
-							evt_pmus[hybrid_supported] =
-								strdup(new_name);
-							hybrid_supported++;
-						}
-					}
-
-					if (hybrid_supported == 0)
-						continue;
-				}
-
-				if (!evt_num_known) {
-					evt_num++;
+					if (is_event_supported(PERF_TYPE_HW_CACHE,
+							       type | (op << 8) | (i << 16)))
+						strlist__add(evt_name_list, name);
 					continue;
 				}
-
-				if ((hybrid_supported == 0) ||
-				    (hybrid_supported == npmus)) {
-					evt_list[evt_i] = strdup(name);
-					for (j = 0; j < npmus; j++)
-						zfree(&evt_pmus[j]);
-				} else {
-					for (j = 0; j < hybrid_supported; j++) {
-						evt_list[evt_i++] = evt_pmus[j];
-						evt_pmus[j] = NULL;
+				perf_pmu__for_each_hybrid_pmu(pmu) {
+					if (is_event_supported(PERF_TYPE_HW_CACHE,
+					    type | (op << 8) | (i << 16) |
+					    ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) {
+						char new_name[128];
+							snprintf(new_name, sizeof(new_name),
+								 "%s/%s/", pmu->name, name);
+							strlist__add(evt_name_list, new_name);
 					}
-					continue;
 				}
-
-				if (evt_list[evt_i] == NULL)
-					goto out_enomem;
-				evt_i++;
 			}
 		}
 	}
 
-	if (!evt_num_known) {
-		evt_num_known = true;
-		goto restart;
-	}
-
-	for (evt_i = 0; evt_i < evt_num; evt_i++) {
-		if (!evt_list[evt_i])
-			break;
-	}
-
-	evt_num = evt_i;
-	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
-	evt_i = 0;
-	while (evt_i < evt_num) {
+	strlist__for_each_entry(nd, evt_name_list) {
 		if (name_only) {
-			printf("%s ", evt_list[evt_i++]);
+			printf("%s ", nd->s);
 			continue;
 		}
-		printf("  %-50s [%s]\n", evt_list[evt_i++],
-				event_type_descriptors[PERF_TYPE_HW_CACHE]);
+		printf("  %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]);
 	}
-	if (evt_num && pager_in_use())
+	if (!strlist__empty(evt_name_list) && pager_in_use())
 		printf("\n");
 
-out_free:
-	evt_num = evt_i;
-	for (evt_i = 0; evt_i < evt_num; evt_i++)
-		zfree(&evt_list[evt_i]);
-	zfree(&evt_list);
-
-	for (evt_i = 0; evt_i < npmus; evt_i++)
-		zfree(&evt_pmus[evt_i]);
-	zfree(&evt_pmus);
-	return evt_num;
-
-out_enomem:
-	printf("FATAL: not enough memory to print %s\n",
-		event_type_descriptors[PERF_TYPE_HW_CACHE]);
-	if (evt_list)
-		goto out_free;
-	return evt_num;
+	strlist__delete(evt_name_list);
+	return 0;
 }
 
 static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
-- 
GitLab


From de3752a7d6c4435087ce7cb8537bfa987b93ec8b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:20 -0800
Subject: [PATCH 1525/4122] perf list: Simplify symbol event printing

The current code computes an array of symbol names then sorts and prints
them. Use a strlist to create a list of names that is sorted and then
print it.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-8-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/print-events.c | 79 +++++++++-------------------------
 1 file changed, 21 insertions(+), 58 deletions(-)

diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index ff7793944246e..d53dba033597b 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -52,14 +52,6 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
 	},
 };
 
-static int cmp_string(const void *a, const void *b)
-{
-	const char * const *as = a;
-	const char * const *bs = b;
-
-	return strcmp(*as, *bs);
-}
-
 /*
  * Print the events from <debugfs_mount_point>/tracing/events
  */
@@ -298,77 +290,48 @@ void print_symbol_events(const char *event_glob, unsigned int type,
 			 struct event_symbol *syms, unsigned int max,
 			 bool name_only)
 {
-	unsigned int i, evt_i = 0, evt_num = 0;
-	char name[MAX_NAME_LEN];
-	char **evt_list = NULL;
-	bool evt_num_known = false;
-
-restart:
-	if (evt_num_known) {
-		evt_list = zalloc(sizeof(char *) * evt_num);
-		if (!evt_list)
-			goto out_enomem;
-		syms -= max;
-	}
+	struct strlist *evt_name_list = strlist__new(NULL, NULL);
+	struct str_node *nd;
 
-	for (i = 0; i < max; i++, syms++) {
+	if (!evt_name_list) {
+		pr_debug("Failed to allocate new strlist for symbol events\n");
+		return;
+	}
+	for (unsigned int i = 0; i < max; i++) {
 		/*
 		 * New attr.config still not supported here, the latest
 		 * example was PERF_COUNT_SW_CGROUP_SWITCHES
 		 */
-		if (syms->symbol == NULL)
+		if (syms[i].symbol == NULL)
 			continue;
 
-		if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) ||
-		      (syms->alias && strglobmatch(syms->alias, event_glob))))
+		if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) ||
+		      (syms[i].alias && strglobmatch(syms[i].alias, event_glob))))
 			continue;
 
 		if (!is_event_supported(type, i))
 			continue;
 
-		if (!evt_num_known) {
-			evt_num++;
-			continue;
-		}
-
-		if (!name_only && strlen(syms->alias))
-			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
-		else
-			strlcpy(name, syms->symbol, MAX_NAME_LEN);
+		if (strlen(syms[i].alias)) {
+			char name[MAX_NAME_LEN];
 
-		evt_list[evt_i] = strdup(name);
-		if (evt_list[evt_i] == NULL)
-			goto out_enomem;
-		evt_i++;
+			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias);
+			strlist__add(evt_name_list, name);
+		} else
+			strlist__add(evt_name_list, syms[i].symbol);
 	}
 
-	if (!evt_num_known) {
-		evt_num_known = true;
-		goto restart;
-	}
-	qsort(evt_list, evt_num, sizeof(char *), cmp_string);
-	evt_i = 0;
-	while (evt_i < evt_num) {
+	strlist__for_each_entry(nd, evt_name_list) {
 		if (name_only) {
-			printf("%s ", evt_list[evt_i++]);
+			printf("%s ", nd->s);
 			continue;
 		}
-		printf("  %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]);
+		printf("  %-50s [%s]\n", nd->s, event_type_descriptors[type]);
 	}
-	if (evt_num && pager_in_use())
+	if (!strlist__empty(evt_name_list) && pager_in_use())
 		printf("\n");
 
-out_free:
-	evt_num = evt_i;
-	for (evt_i = 0; evt_i < evt_num; evt_i++)
-		zfree(&evt_list[evt_i]);
-	zfree(&evt_list);
-	return;
-
-out_enomem:
-	printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]);
-	if (evt_list)
-		goto out_free;
+	strlist__delete(evt_name_list);
 }
 
 /*
-- 
GitLab


From eb2d4514a5971444f67ac8b95bcd63e7702fa6bf Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:21 -0800
Subject: [PATCH 1526/4122] perf pmu: Restructure print_pmu_events() to avoid
 memory allocations

Previously print_pmu_events() would compute the values to be printed,
place them in struct sevent, sort them and then print them.

Modify the code so that struct sevent holds just the PMU and event, sort
these and then in the main print loop calculate aliases for names, etc.

This avoids memory allocations for copied values as they are computed
then printed.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-9-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 208 ++++++++++++++++++++++--------------------
 1 file changed, 110 insertions(+), 98 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e6790175307b4..075c82dd1347f 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1553,8 +1553,8 @@ static int sub_non_neg(int a, int b)
 	return a - b;
 }
 
-static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
-			  struct perf_pmu_alias *alias)
+static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
+			  const struct perf_pmu_alias *alias)
 {
 	struct parse_events_term *term;
 	int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name);
@@ -1579,51 +1579,67 @@ static char *format_alias(char *buf, int len, struct perf_pmu *pmu,
 	return buf;
 }
 
-static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu,
-			     struct perf_pmu_alias *alias)
+static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu,
+			     const struct perf_pmu_alias *alias)
 {
 	snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
 	return buf;
 }
 
+/** Struct for ordering events as output in perf list. */
 struct sevent {
-	char *name;
-	char *desc;
-	char *topic;
-	char *str;
-	char *pmu;
-	char *metric_expr;
-	char *metric_name;
-	int is_cpu;
+	/** PMU for event. */
+	const struct perf_pmu *pmu;
+	/**
+	 * Optional event for name, desc, etc. If not present then this is a
+	 * selectable PMU and the event name is shown as "//".
+	 */
+	const struct perf_pmu_alias *event;
+	/** Is the PMU for the CPU? */
+	bool is_cpu;
 };
 
 static int cmp_sevent(const void *a, const void *b)
 {
 	const struct sevent *as = a;
 	const struct sevent *bs = b;
+	const char *a_pmu_name, *b_pmu_name;
+	const char *a_name = "//", *a_desc = NULL, *a_topic = "";
+	const char *b_name = "//", *b_desc = NULL, *b_topic = "";
 	int ret;
 
-	/* Put extra events last */
-	if (!!as->desc != !!bs->desc)
-		return !!as->desc - !!bs->desc;
-	if (as->topic && bs->topic) {
-		int n = strcmp(as->topic, bs->topic);
-
-		if (n)
-			return n;
+	if (as->event) {
+		a_name = as->event->name;
+		a_desc = as->event->desc;
+		a_topic = as->event->topic ?: "";
 	}
+	if (bs->event) {
+		b_name = bs->event->name;
+		b_desc = bs->event->desc;
+		b_topic = bs->event->topic ?: "";
+	}
+	/* Put extra events last. */
+	if (!!a_desc != !!b_desc)
+		return !!a_desc - !!b_desc;
+
+	/* Order by topics. */
+	ret = strcmp(a_topic, b_topic);
+	if (ret)
+		return ret;
 
 	/* Order CPU core events to be first */
 	if (as->is_cpu != bs->is_cpu)
 		return bs->is_cpu - as->is_cpu;
 
-	ret = strcmp(as->name, bs->name);
-	if (!ret) {
-		if (as->pmu && bs->pmu)
-			return strcmp(as->pmu, bs->pmu);
-	}
+	/* Order by PMU name. */
+	a_pmu_name = as->pmu->name ?: "";
+	b_pmu_name = bs->pmu->name ?: "";
+	ret = strcmp(a_pmu_name, b_pmu_name);
+	if (ret)
+		return ret;
 
-	return ret;
+	/* Order by event name. */
+	return strcmp(a_name, b_name);
 }
 
 static void wordwrap(char *s, int start, int max, int corr)
@@ -1655,16 +1671,18 @@ bool is_pmu_core(const char *name)
 static bool pmu_alias_is_duplicate(struct sevent *alias_a,
 				   struct sevent *alias_b)
 {
-	/* Different names -> never duplicates */
-	if (strcmp(alias_a->name, alias_b->name))
-		return false;
+	const char *a_pmu_name, *b_pmu_name;
+	const char *a_name = alias_a->event ? alias_a->event->name : "//";
+	const char *b_name = alias_b->event ? alias_b->event->name : "//";
 
-	/* Don't remove duplicates for hybrid PMUs */
-	if (perf_pmu__is_hybrid(alias_a->pmu) &&
-	    perf_pmu__is_hybrid(alias_b->pmu))
+	/* Different names -> never duplicates */
+	if (strcmp(a_name, b_name))
 		return false;
 
-	return true;
+	/* Don't remove duplicates for different PMUs */
+	a_pmu_name = alias_a->pmu->name ?: "";
+	b_pmu_name = alias_b->pmu->name ?: "";
+	return strcmp(a_pmu_name, b_pmu_name) == 0;
 }
 
 void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
@@ -1690,110 +1708,104 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 			len++;
 	}
 	aliases = zalloc(sizeof(struct sevent) * len);
-	if (!aliases)
-		goto out_enomem;
+	if (!aliases) {
+		pr_err("FATAL: not enough memory to print PMU events\n");
+		return;
+	}
 	pmu = NULL;
 	j = 0;
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+		bool is_cpu;
+
 		if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name))
 			continue;
 
-		list_for_each_entry(alias, &pmu->aliases, list) {
-			char *name = alias->desc ? alias->name :
-				format_alias(buf, sizeof(buf), pmu, alias);
-			bool is_cpu = is_pmu_core(pmu->name) ||
-				      perf_pmu__is_hybrid(pmu->name);
+		is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name);
 
+		list_for_each_entry(alias, &pmu->aliases, list) {
 			if (alias->deprecated && !deprecated)
 				continue;
 
 			if (event_glob != NULL &&
-			    !(strglobmatch_nocase(name, event_glob) ||
-			      (!is_cpu && strglobmatch_nocase(alias->name,
-						       event_glob)) ||
+			    !(strglobmatch_nocase(alias->name, event_glob) ||
+			      (!is_cpu &&
+			       strglobmatch_nocase(alias->name, event_glob)) ||
 			      (alias->topic &&
 			       strglobmatch_nocase(alias->topic, event_glob))))
 				continue;
 
-			if (is_cpu && !name_only && !alias->desc)
-				name = format_alias_or(buf, sizeof(buf), pmu, alias);
-
-			aliases[j].name = name;
-			if (is_cpu && !name_only && !alias->desc)
-				aliases[j].name = format_alias_or(buf,
-								  sizeof(buf),
-								  pmu, alias);
-			aliases[j].name = strdup(aliases[j].name);
-			if (!aliases[j].name)
-				goto out_enomem;
-
-			aliases[j].desc = long_desc ? alias->long_desc :
-						alias->desc;
-			aliases[j].topic = alias->topic;
-			aliases[j].str = alias->str;
-			aliases[j].pmu = pmu->name;
-			aliases[j].metric_expr = alias->metric_expr;
-			aliases[j].metric_name = alias->metric_name;
+			aliases[j].event = alias;
+			aliases[j].pmu = pmu;
 			aliases[j].is_cpu = is_cpu;
 			j++;
 		}
 		if (pmu->selectable &&
 		    (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
-			char *s;
-			if (asprintf(&s, "%s//", pmu->name) < 0)
-				goto out_enomem;
-			aliases[j].name = s;
+			aliases[j].event = NULL;
+			aliases[j].pmu = pmu;
+			aliases[j].is_cpu = is_cpu;
 			j++;
 		}
 	}
 	len = j;
 	qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
 	for (j = 0; j < len; j++) {
+		char *name, *desc;
+
 		/* Skip duplicates */
 		if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
 			continue;
 
+		if (!aliases[j].event) {
+			/* A selectable event. */
+			snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name);
+			name = buf;
+		} else if (aliases[j].event->desc) {
+			name = aliases[j].event->name;
+		} else {
+			if (!name_only && aliases[j].is_cpu) {
+				name = format_alias_or(buf, sizeof(buf), aliases[j].pmu,
+						       aliases[j].event);
+			} else {
+				name = format_alias(buf, sizeof(buf), aliases[j].pmu,
+						    aliases[j].event);
+			}
+		}
 		if (name_only) {
-			printf("%s ", aliases[j].name);
+			printf("%s ", name);
 			continue;
 		}
-		if (aliases[j].desc && !quiet_flag) {
-			if (numdesc++ == 0)
-				printf("\n");
-			if (aliases[j].topic && (!topic ||
-					strcmp(topic, aliases[j].topic))) {
-				printf("%s%s:\n", topic ? "\n" : "",
-						aliases[j].topic);
-				topic = aliases[j].topic;
-			}
-			printf("  %-50s\n", aliases[j].name);
-			printf("%*s", 8, "[");
-			wordwrap(aliases[j].desc, 8, columns, 0);
-			printf("]\n");
-			if (details_flag) {
-				printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str);
-				if (aliases[j].metric_name)
-					printf(" MetricName: %s", aliases[j].metric_name);
-				if (aliases[j].metric_expr)
-					printf(" MetricExpr: %s", aliases[j].metric_expr);
-				putchar('\n');
-			}
-		} else
-			printf("  %-50s [Kernel PMU event]\n", aliases[j].name);
 		printed++;
+		if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) {
+			printf("  %-50s [Kernel PMU event]\n", name);
+			continue;
+		}
+		if (numdesc++ == 0)
+			printf("\n");
+		if (aliases[j].event->topic && (!topic ||
+						strcmp(topic, aliases[j].event->topic))) {
+			printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic);
+			topic = aliases[j].event->topic;
+		}
+		printf("  %-50s\n", name);
+		printf("%*s", 8, "[");
+		desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc;
+		wordwrap(desc, 8, columns, 0);
+		printf("]\n");
+		if (details_flag) {
+			printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str);
+			if (aliases[j].event->metric_name)
+				printf(" MetricName: %s", aliases[j].event->metric_name);
+			if (aliases[j].event->metric_expr)
+				printf(" MetricExpr: %s", aliases[j].event->metric_expr);
+			putchar('\n');
+		}
 	}
 	if (printed && pager_in_use())
 		printf("\n");
-out_free:
-	for (j = 0; j < len; j++)
-		zfree(&aliases[j].name);
+
 	zfree(&aliases);
 	return;
-
-out_enomem:
-	printf("FATAL: not enough memory to print PMU events\n");
-	if (aliases)
-		goto out_free;
 }
 
 bool pmu_have_event(const char *pname, const char *name)
-- 
GitLab


From ddf07bd874be791a63fca5ac0e3def1e15f2338f Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 11 Nov 2022 12:37:32 +0100
Subject: [PATCH 1527/4122] gpiolib: of: Use correct fwnode for DT-probed chips

The OF node store in chip->fwnode is used to explicitly override the FW
node for a GPIO chip. For chips that use the default FW node (i.e. that
of their parent device), this will be NULL and cause the chip not to be
fully registered.

Instead, use the GPIO device's FW node, which is set to either the node
of the parent device or the explicit override in chip->fwnode.

Fixes: 8afe82550240 ("gpiolib: of: Prepare of_gpiochip_add() / of_gpiochip_remove() for fwnode")
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Tested-by: Robert Marko <robimarko@gmail.com>
Tested-by: Andrew Halaney <ahalaney@redhat.com>
Reviewed-by: Brian Masney <bmasney@redhat.com>
Tested-by: Brian Masney <bmasney@redhat.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib-of.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib-of.c b/drivers/gpio/gpiolib-of.c
index a6871859a59de..4fff7258ee41a 100644
--- a/drivers/gpio/gpiolib-of.c
+++ b/drivers/gpio/gpiolib-of.c
@@ -1063,7 +1063,7 @@ int of_gpiochip_add(struct gpio_chip *chip)
 	struct device_node *np;
 	int ret;
 
-	np = to_of_node(chip->fwnode);
+	np = to_of_node(dev_fwnode(&chip->gpiodev->dev));
 	if (!np)
 		return 0;
 
-- 
GitLab


From 739be9b6a84b23c40b0fb534b749602fb8285e70 Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Sat, 12 Nov 2022 15:29:28 +0000
Subject: [PATCH 1528/4122] gpio: sl28cpld: Replace irqchip mask_invert with
 unmask_base

Remove use of the deprecated mask_invert flag. Inverted mask
registers (where a '1' bit enables an IRQ) can be described more
directly as an unmask register.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sl28cpld.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/gpio/gpio-sl28cpld.c b/drivers/gpio/gpio-sl28cpld.c
index 52404736ac861..2195f88c20485 100644
--- a/drivers/gpio/gpio-sl28cpld.c
+++ b/drivers/gpio/gpio-sl28cpld.c
@@ -70,8 +70,7 @@ static int sl28cpld_gpio_irq_init(struct platform_device *pdev,
 	irq_chip->num_irqs = ARRAY_SIZE(sl28cpld_gpio_irqs);
 	irq_chip->num_regs = 1;
 	irq_chip->status_base = base + GPIO_REG_IP;
-	irq_chip->mask_base = base + GPIO_REG_IE;
-	irq_chip->mask_invert = true;
+	irq_chip->unmask_base = base + GPIO_REG_IE;
 	irq_chip->ack_base = base + GPIO_REG_IP;
 
 	ret = devm_regmap_add_irq_chip_fwnode(dev, dev_fwnode(dev),
-- 
GitLab


From e67ad9354a9b7621341adec4ac2c63d5269f835d Mon Sep 17 00:00:00 2001
From: Albert Zhou <albert.zhou.50@gmail.com>
Date: Tue, 15 Nov 2022 22:38:56 +1100
Subject: [PATCH 1529/4122] PCI: pciehp: Enable by default if USB4 enabled

Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug.  Enable
pciehp by default if USB4 is enabled.

[bhelgaas: squash, update subject, commit logs, tidy whitespace]
Link: https://lore.kernel.org/r/20221115113857.35800-2-albert.zhou.50@gmail.com
Link: https://lore.kernel.org/r/20221115113857.35800-3-albert.zhou.50@gmail.com
Signed-off-by: Albert Zhou <albert.zhou.50@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
---
 drivers/pci/hotplug/Kconfig | 3 +++
 drivers/pci/pcie/Kconfig    | 8 ++++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/Kconfig b/drivers/pci/hotplug/Kconfig
index 840a84bb5ee26..48113b210cf93 100644
--- a/drivers/pci/hotplug/Kconfig
+++ b/drivers/pci/hotplug/Kconfig
@@ -6,11 +6,14 @@
 menuconfig HOTPLUG_PCI
 	bool "Support for PCI Hotplug"
 	depends on PCI && SYSFS
+	default y if USB4
 	help
 	  Say Y here if you have a motherboard with a PCI Hotplug controller.
 	  This allows you to add and remove PCI cards while the machine is
 	  powered up and running.
 
+	  Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug.
+
 	  When in doubt, say N.
 
 if HOTPLUG_PCI
diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig
index 788ac8df3f9db..228652a59f27e 100644
--- a/drivers/pci/pcie/Kconfig
+++ b/drivers/pci/pcie/Kconfig
@@ -4,6 +4,7 @@
 #
 config PCIEPORTBUS
 	bool "PCI Express Port Bus support"
+	default y if USB4
 	help
 	  This enables PCI Express Port Bus support. Users can then enable
 	  support for Native Hot-Plug, Advanced Error Reporting, Power
@@ -15,9 +16,12 @@ config PCIEPORTBUS
 config HOTPLUG_PCI_PCIE
 	bool "PCI Express Hotplug driver"
 	depends on HOTPLUG_PCI && PCIEPORTBUS
+	default y if USB4
 	help
-	  Say Y here if you have a motherboard that supports PCI Express Native
-	  Hotplug
+	  Say Y here if you have a motherboard that supports PCIe native
+	  hotplug.
+
+	  Thunderbolt/USB4 PCIe tunneling depends on native PCIe hotplug.
 
 	  When in doubt, say N.
 
-- 
GitLab


From 65d9cc3fd0e7f98964622557c0c94240e68441e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:06:58 +0200
Subject: [PATCH 1530/4122] rtc: abx80x: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe(). The device_id array has to move up for that
to work.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-abx80x.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index 9b0138d07232d..e7f325ced9402 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -673,13 +673,28 @@ static int abx80x_setup_watchdog(struct abx80x_priv *priv)
 }
 #endif
 
-static int abx80x_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static const struct i2c_device_id abx80x_id[] = {
+	{ "abx80x", ABX80X },
+	{ "ab0801", AB0801 },
+	{ "ab0803", AB0803 },
+	{ "ab0804", AB0804 },
+	{ "ab0805", AB0805 },
+	{ "ab1801", AB1801 },
+	{ "ab1803", AB1803 },
+	{ "ab1804", AB1804 },
+	{ "ab1805", AB1805 },
+	{ "rv1805", RV1805 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, abx80x_id);
+
+static int abx80x_probe(struct i2c_client *client)
 {
 	struct device_node *np = client->dev.of_node;
 	struct abx80x_priv *priv;
 	int i, data, err, trickle_cfg = -EINVAL;
 	char buf[7];
+	const struct i2c_device_id *id = i2c_match_id(abx80x_id, client);
 	unsigned int part = id->driver_data;
 	unsigned int partnumber;
 	unsigned int majrev, minrev;
@@ -847,21 +862,6 @@ static int abx80x_probe(struct i2c_client *client,
 	return devm_rtc_register_device(priv->rtc);
 }
 
-static const struct i2c_device_id abx80x_id[] = {
-	{ "abx80x", ABX80X },
-	{ "ab0801", AB0801 },
-	{ "ab0803", AB0803 },
-	{ "ab0804", AB0804 },
-	{ "ab0805", AB0805 },
-	{ "ab1801", AB1801 },
-	{ "ab1803", AB1803 },
-	{ "ab1804", AB1804 },
-	{ "ab1805", AB1805 },
-	{ "rv1805", RV1805 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, abx80x_id);
-
 #ifdef CONFIG_OF
 static const struct of_device_id abx80x_of_match[] = {
 	{
@@ -914,7 +914,7 @@ static struct i2c_driver abx80x_driver = {
 		.name	= "rtc-abx80x",
 		.of_match_table = of_match_ptr(abx80x_of_match),
 	},
-	.probe		= abx80x_probe,
+	.probe_new	= abx80x_probe,
 	.id_table	= abx80x_id,
 };
 
-- 
GitLab


From 2611e6d743be8f1ad2216622f39e12c63352a457 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:00 +0200
Subject: [PATCH 1531/4122] rtc: isl1208: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-4-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-isl1208.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-isl1208.c b/drivers/rtc/rtc-isl1208.c
index f448a525333e1..73cc6aaf9b8b7 100644
--- a/drivers/rtc/rtc-isl1208.c
+++ b/drivers/rtc/rtc-isl1208.c
@@ -797,7 +797,7 @@ static int isl1208_setup_irq(struct i2c_client *client, int irq)
 }
 
 static int
-isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
+isl1208_probe(struct i2c_client *client)
 {
 	int rc = 0;
 	struct isl1208_state *isl1208;
@@ -821,6 +821,8 @@ isl1208_probe(struct i2c_client *client, const struct i2c_device_id *id)
 		if (!isl1208->config)
 			return -ENODEV;
 	} else {
+		const struct i2c_device_id *id = i2c_match_id(isl1208_id, client);
+
 		if (id->driver_data >= ISL_LAST_ID)
 			return -ENODEV;
 		isl1208->config = &isl1208_configs[id->driver_data];
@@ -906,7 +908,7 @@ static struct i2c_driver isl1208_driver = {
 		.name = "rtc-isl1208",
 		.of_match_table = of_match_ptr(isl1208_of_match),
 	},
-	.probe = isl1208_probe,
+	.probe_new = isl1208_probe,
 	.id_table = isl1208_id,
 };
 
-- 
GitLab


From 67db6f0515d15e6becbd59bc5da4ca2d03d51789 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:01 +0200
Subject: [PATCH 1532/4122] rtc: m41t80: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-5-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-m41t80.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index e0b4d3794320b..d3144ffdebb58 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -876,8 +876,7 @@ static struct notifier_block wdt_notifier = {
  *****************************************************************************
  */
 
-static int m41t80_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int m41t80_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int rc = 0;
@@ -897,11 +896,13 @@ static int m41t80_probe(struct i2c_client *client,
 		return -ENOMEM;
 
 	m41t80_data->client = client;
-	if (client->dev.of_node)
+	if (client->dev.of_node) {
 		m41t80_data->features = (unsigned long)
 			of_device_get_match_data(&client->dev);
-	else
+	} else {
+		const struct i2c_device_id *id = i2c_match_id(m41t80_id, client);
 		m41t80_data->features = id->driver_data;
+	}
 	i2c_set_clientdata(client, m41t80_data);
 
 	m41t80_data->rtc =  devm_rtc_allocate_device(&client->dev);
@@ -1007,7 +1008,7 @@ static struct i2c_driver m41t80_driver = {
 		.of_match_table = of_match_ptr(m41t80_of_match),
 		.pm = &m41t80_pm,
 	},
-	.probe = m41t80_probe,
+	.probe_new = m41t80_probe,
 	.remove = m41t80_remove,
 	.id_table = m41t80_id,
 };
-- 
GitLab


From c050dedb875c2eee920010c1dba458ce5bf8a171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:02 +0200
Subject: [PATCH 1533/4122] rtc: nct3018y: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-6-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-nct3018y.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-nct3018y.c b/drivers/rtc/rtc-nct3018y.c
index d43acd3920ed3..0a3b14c95d902 100644
--- a/drivers/rtc/rtc-nct3018y.c
+++ b/drivers/rtc/rtc-nct3018y.c
@@ -452,8 +452,7 @@ static const struct rtc_class_ops nct3018y_rtc_ops = {
 	.ioctl		= nct3018y_ioctl,
 };
 
-static int nct3018y_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int nct3018y_probe(struct i2c_client *client)
 {
 	struct nct3018y *nct3018y;
 	int err, flags;
@@ -541,7 +540,7 @@ static struct i2c_driver nct3018y_driver = {
 		.name	= "rtc-nct3018y",
 		.of_match_table = of_match_ptr(nct3018y_of_match),
 	},
-	.probe		= nct3018y_probe,
+	.probe_new	= nct3018y_probe,
 	.id_table	= nct3018y_id,
 };
 
-- 
GitLab


From 5418e595f30bf4fde83ebb0121417c0c95cff98e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:03 +0200
Subject: [PATCH 1534/4122] rtc: pcf2127: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe(). The device_id array has to move up for that
to work.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-7-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf2127.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-pcf2127.c b/drivers/rtc/rtc-pcf2127.c
index 63b275b014bd6..87f4fc9df68b4 100644
--- a/drivers/rtc/rtc-pcf2127.c
+++ b/drivers/rtc/rtc-pcf2127.c
@@ -885,9 +885,17 @@ static const struct regmap_bus pcf2127_i2c_regmap = {
 
 static struct i2c_driver pcf2127_i2c_driver;
 
-static int pcf2127_i2c_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static const struct i2c_device_id pcf2127_i2c_id[] = {
+	{ "pcf2127", 1 },
+	{ "pcf2129", 0 },
+	{ "pca2129", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id);
+
+static int pcf2127_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_match_id(pcf2127_i2c_id, client);
 	struct regmap *regmap;
 	static const struct regmap_config config = {
 		.reg_bits = 8,
@@ -910,20 +918,12 @@ static int pcf2127_i2c_probe(struct i2c_client *client,
 			     pcf2127_i2c_driver.driver.name, id->driver_data);
 }
 
-static const struct i2c_device_id pcf2127_i2c_id[] = {
-	{ "pcf2127", 1 },
-	{ "pcf2129", 0 },
-	{ "pca2129", 0 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, pcf2127_i2c_id);
-
 static struct i2c_driver pcf2127_i2c_driver = {
 	.driver		= {
 		.name	= "rtc-pcf2127-i2c",
 		.of_match_table = of_match_ptr(pcf2127_of_match),
 	},
-	.probe		= pcf2127_i2c_probe,
+	.probe_new	= pcf2127_i2c_probe,
 	.id_table	= pcf2127_i2c_id,
 };
 
-- 
GitLab


From 8d94da6678702fac20ed3e1421b8b1ad03368a8e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:04 +0200
Subject: [PATCH 1535/4122] rtc: rs5c372: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-8-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rs5c372.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 9562c477e1c96..5047afefcceb7 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -791,8 +791,7 @@ static int rs5c_oscillator_setup(struct rs5c372 *rs5c372)
 	return 0;
 }
 
-static int rs5c372_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int rs5c372_probe(struct i2c_client *client)
 {
 	int err = 0;
 	int smbus_mode = 0;
@@ -826,11 +825,13 @@ static int rs5c372_probe(struct i2c_client *client,
 
 	rs5c372->client = client;
 	i2c_set_clientdata(client, rs5c372);
-	if (client->dev.of_node)
+	if (client->dev.of_node) {
 		rs5c372->type = (enum rtc_type)
 			of_device_get_match_data(&client->dev);
-	else
+	} else {
+		const struct i2c_device_id *id = i2c_match_id(rs5c372_id, client);
 		rs5c372->type = id->driver_data;
+	}
 
 	/* we read registers 0x0f then 0x00-0x0f; skip the first one */
 	rs5c372->regs = &rs5c372->buf[1];
@@ -920,7 +921,7 @@ static struct i2c_driver rs5c372_driver = {
 		.name	= "rtc-rs5c372",
 		.of_match_table = of_match_ptr(rs5c372_of_match),
 	},
-	.probe		= rs5c372_probe,
+	.probe_new	= rs5c372_probe,
 	.remove		= rs5c372_remove,
 	.id_table	= rs5c372_id,
 };
-- 
GitLab


From 84c2fb386f71d364cf00ec9f4596a2b279e3ca54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:05 +0200
Subject: [PATCH 1536/4122] rtc: rv8803: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe(). The device_id array has to move up for that
to work.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-9-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rv8803.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/rtc/rtc-rv8803.c b/drivers/rtc/rtc-rv8803.c
index 3527a0521e9b2..b581b6d5ad731 100644
--- a/drivers/rtc/rtc-rv8803.c
+++ b/drivers/rtc/rtc-rv8803.c
@@ -576,8 +576,16 @@ static int rv8803_regs_configure(struct rv8803_data *rv8803)
 	return 0;
 }
 
-static int rv8803_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static const struct i2c_device_id rv8803_id[] = {
+	{ "rv8803", rv_8803 },
+	{ "rv8804", rx_8804 },
+	{ "rx8803", rx_8803 },
+	{ "rx8900", rx_8900 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, rv8803_id);
+
+static int rv8803_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct rv8803_data *rv8803;
@@ -605,11 +613,14 @@ static int rv8803_probe(struct i2c_client *client,
 
 	mutex_init(&rv8803->flags_lock);
 	rv8803->client = client;
-	if (client->dev.of_node)
+	if (client->dev.of_node) {
 		rv8803->type = (enum rv8803_type)
 			of_device_get_match_data(&client->dev);
-	else
+	} else {
+		const struct i2c_device_id *id = i2c_match_id(rv8803_id, client);
+
 		rv8803->type = id->driver_data;
+	}
 	i2c_set_clientdata(client, rv8803);
 
 	flags = rv8803_read_reg(client, RV8803_FLAG);
@@ -666,15 +677,6 @@ static int rv8803_probe(struct i2c_client *client,
 	return 0;
 }
 
-static const struct i2c_device_id rv8803_id[] = {
-	{ "rv8803", rv_8803 },
-	{ "rv8804", rx_8804 },
-	{ "rx8803", rx_8803 },
-	{ "rx8900", rx_8900 },
-	{ }
-};
-MODULE_DEVICE_TABLE(i2c, rv8803_id);
-
 static const __maybe_unused struct of_device_id rv8803_of_match[] = {
 	{
 		.compatible = "microcrystal,rv8803",
@@ -701,7 +703,7 @@ static struct i2c_driver rv8803_driver = {
 		.name = "rtc-rv8803",
 		.of_match_table = of_match_ptr(rv8803_of_match),
 	},
-	.probe		= rv8803_probe,
+	.probe_new	= rv8803_probe,
 	.id_table	= rv8803_id,
 };
 module_i2c_driver(rv8803_driver);
-- 
GitLab


From 8ffb7733e162d26393523bd95c5bfffa6e09baf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 21 Oct 2022 15:07:06 +0200
Subject: [PATCH 1537/4122] rtc: rx8025: Convert to .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in .probe().

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221021130706.178687-10-u.kleine-koenig@pengutronix.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rx8025.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index dde86f3e2a4bd..77d3cb08b5ec2 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -519,9 +519,9 @@ static const struct attribute_group rx8025_attr_group = {
 	.attrs	= rx8025_attrs,
 };
 
-static int rx8025_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int rx8025_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_match_id(rx8025_id, client);
 	struct i2c_adapter *adapter = client->adapter;
 	struct rx8025_data *rx8025;
 	int err = 0;
@@ -580,7 +580,7 @@ static struct i2c_driver rx8025_driver = {
 	.driver = {
 		.name = "rtc-rx8025",
 	},
-	.probe		= rx8025_probe,
+	.probe_new	= rx8025_probe,
 	.id_table	= rx8025_id,
 };
 
-- 
GitLab


From 9800f24f7bd5b99fb4fc4ce981427102e2e15a1c Mon Sep 17 00:00:00 2001
From: Yushan Zhou <katrinzhou@tencent.com>
Date: Mon, 7 Nov 2022 17:25:44 +0800
Subject: [PATCH 1538/4122] rtc: rzn1: Check return value in rzn1_rtc_probe

The rzn1_rtc_probe() function utilizes devm_pm_runtime_enable()
but wasn't checking the return value. Fix it by adding missing
check.

Fixes: deeb4b5393e1 ("rtc: rzn1: Add new RTC driver")

Signed-off-by: Yushan Zhou <katrinzhou@tencent.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/r/20221107092544.3721053-1-zys.zljxml@gmail.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rzn1.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rzn1.c b/drivers/rtc/rtc-rzn1.c
index ac788799c8e3e..0d36bc50197c1 100644
--- a/drivers/rtc/rtc-rzn1.c
+++ b/drivers/rtc/rtc-rzn1.c
@@ -355,7 +355,9 @@ static int rzn1_rtc_probe(struct platform_device *pdev)
 	set_bit(RTC_FEATURE_ALARM_RES_MINUTE, rtc->rtcdev->features);
 	clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->rtcdev->features);
 
-	devm_pm_runtime_enable(&pdev->dev);
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret < 0)
+		return ret;
 	ret = pm_runtime_resume_and_get(&pdev->dev);
 	if (ret < 0)
 		return ret;
-- 
GitLab


From f2fa14b0b586bad3ff2c0d908b8a44b22eaebe15 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 8 Nov 2022 13:02:53 +0100
Subject: [PATCH 1539/4122] dt-bindings: rtc: qcom-pm8xxx: document
 qcom,pm8921-rtc as fallback of qcom,pm8018-rtc

The PM8018 RTC is used as compatible with PM8921 RTC on the MDM9615,
document this situation.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-1-bbb120c6766a@linaro.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
index 23ab5bb4f3956..0a7aa29563c1c 100644
--- a/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/qcom-pm8xxx-rtc.yaml
@@ -11,12 +11,16 @@ maintainers:
 
 properties:
   compatible:
-    enum:
-      - qcom,pm8058-rtc
-      - qcom,pm8921-rtc
-      - qcom,pm8941-rtc
-      - qcom,pm8018-rtc
-      - qcom,pmk8350-rtc
+    oneOf:
+      - enum:
+          - qcom,pm8058-rtc
+          - qcom,pm8921-rtc
+          - qcom,pm8941-rtc
+          - qcom,pmk8350-rtc
+      - items:
+          - enum:
+              - qcom,pm8018-rtc
+          - const: qcom,pm8921-rtc
 
   reg:
     minItems: 1
-- 
GitLab


From 741a2830734bc22238e5c248630311e401481ecc Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 8 Nov 2022 13:02:54 +0100
Subject: [PATCH 1540/4122] rtc: pm8xxx: drop unused pm8018 compatible

The PM8018 compatible is always used with PM8921 fallback, so PM8018
compatible can be safely removed from device ID table

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Link: https://lore.kernel.org/r/20220928-mdm9615-dt-schema-fixes-v5-2-bbb120c6766a@linaro.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pm8xxx.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/rtc/rtc-pm8xxx.c b/drivers/rtc/rtc-pm8xxx.c
index dc6d1476baa59..716e5d9ad74d1 100644
--- a/drivers/rtc/rtc-pm8xxx.c
+++ b/drivers/rtc/rtc-pm8xxx.c
@@ -461,7 +461,6 @@ static const struct pm8xxx_rtc_regs pmk8350_regs = {
  */
 static const struct of_device_id pm8xxx_id_table[] = {
 	{ .compatible = "qcom,pm8921-rtc", .data = &pm8921_regs },
-	{ .compatible = "qcom,pm8018-rtc", .data = &pm8921_regs },
 	{ .compatible = "qcom,pm8058-rtc", .data = &pm8058_regs },
 	{ .compatible = "qcom,pm8941-rtc", .data = &pm8941_regs },
 	{ .compatible = "qcom,pmk8350-rtc", .data = &pmk8350_regs },
-- 
GitLab


From f27efee663701f0e93351cf052677214fed40a42 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 28 Oct 2022 17:54:00 -0700
Subject: [PATCH 1541/4122] rtc: cros-ec: Limit RTC alarm range if needed

RTC chips on some older Chromebooks can only handle alarms less than 24
hours in the future. Attempts to set an alarm beyond that range fails.
The most severe impact of this limitation is that suspend requests fail
if alarmtimer_suspend() tries to set an alarm for more than 24 hours
in the future.

Try to set the real-time alarm to just below 24 hours if setting it to
a larger value fails to work around the problem. While not perfect, it
is better than just failing the call. A similar workaround is already
implemented in the rtc-tps6586x driver.

Drop error messages in cros_ec_rtc_get() and cros_ec_rtc_set() since the
calling code also logs an error and to avoid spurious error messages if
setting the alarm ultimately succeeds.

Cc: Brian Norris <briannorris@chromium.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Commit: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Tzung-Bi Shih <tzungbi@kernel.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Tested-by: Brian Norris <briannorris@chromium.org>
Link: https://lore.kernel.org/r/20221029005400.2712577-1-linux@roeck-us.net
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cros-ec.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/rtc/rtc-cros-ec.c b/drivers/rtc/rtc-cros-ec.c
index 887f5193e253d..a3ec066d80667 100644
--- a/drivers/rtc/rtc-cros-ec.c
+++ b/drivers/rtc/rtc-cros-ec.c
@@ -14,6 +14,8 @@
 
 #define DRV_NAME	"cros-ec-rtc"
 
+#define SECS_PER_DAY	(24 * 60 * 60)
+
 /**
  * struct cros_ec_rtc - Driver data for EC RTC
  *
@@ -43,13 +45,8 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command,
 	msg.msg.insize = sizeof(msg.data);
 
 	ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg);
-	if (ret < 0) {
-		dev_err(cros_ec->dev,
-			"error getting %s from EC: %d\n",
-			command == EC_CMD_RTC_GET_VALUE ? "time" : "alarm",
-			ret);
+	if (ret < 0)
 		return ret;
-	}
 
 	*response = msg.data.time;
 
@@ -59,7 +56,7 @@ static int cros_ec_rtc_get(struct cros_ec_device *cros_ec, u32 command,
 static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command,
 			   u32 param)
 {
-	int ret = 0;
+	int ret;
 	struct {
 		struct cros_ec_command msg;
 		struct ec_response_rtc data;
@@ -71,13 +68,8 @@ static int cros_ec_rtc_set(struct cros_ec_device *cros_ec, u32 command,
 	msg.data.time = param;
 
 	ret = cros_ec_cmd_xfer_status(cros_ec, &msg.msg);
-	if (ret < 0) {
-		dev_err(cros_ec->dev, "error setting %s on EC: %d\n",
-			command == EC_CMD_RTC_SET_VALUE ? "time" : "alarm",
-			ret);
+	if (ret < 0)
 		return ret;
-	}
-
 	return 0;
 }
 
@@ -190,8 +182,21 @@ static int cros_ec_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 	ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM, alarm_offset);
 	if (ret < 0) {
-		dev_err(dev, "error setting alarm: %d\n", ret);
-		return ret;
+		if (ret == -EINVAL && alarm_offset >= SECS_PER_DAY) {
+			/*
+			 * RTC chips on some older Chromebooks can only handle
+			 * alarms up to 24h in the future. Try to set an alarm
+			 * below that limit to avoid suspend failures.
+			 */
+			ret = cros_ec_rtc_set(cros_ec, EC_CMD_RTC_SET_ALARM,
+					      SECS_PER_DAY - 1);
+		}
+
+		if (ret < 0) {
+			dev_err(dev, "error setting alarm in %u seconds: %d\n",
+				alarm_offset, ret);
+			return ret;
+		}
 	}
 
 	return 0;
-- 
GitLab


From 5dc8356830428656c3a00dd702fb9102fe43550f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 13 Sep 2022 22:49:05 +0800
Subject: [PATCH 1542/4122] rtc: ds1302: remove unnecessary spi_set_drvdata()

Remove unnecessary spi_set_drvdata() in ds1302_remove(), the driver_data
will be set to NULL in device_unbind_cleanup() after calling ->remove().
After this, ds1302_remove() is an empty function, so remove it too.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20220913144905.2004924-1-yangyingliang@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-ds1302.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index 6d66ab5a8b176..ecc7d03079320 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -185,11 +185,6 @@ static int ds1302_probe(struct spi_device *spi)
 	return 0;
 }
 
-static void ds1302_remove(struct spi_device *spi)
-{
-	spi_set_drvdata(spi, NULL);
-}
-
 #ifdef CONFIG_OF
 static const struct of_device_id ds1302_dt_ids[] = {
 	{ .compatible = "maxim,ds1302", },
@@ -208,7 +203,6 @@ static struct spi_driver ds1302_driver = {
 	.driver.name	= "rtc-ds1302",
 	.driver.of_match_table = of_match_ptr(ds1302_dt_ids),
 	.probe		= ds1302_probe,
-	.remove		= ds1302_remove,
 	.id_table	= ds1302_spi_ids,
 };
 
-- 
GitLab


From eb633de6abcb3003a8a2c03377d39a91a8d57d20 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 19 Sep 2022 16:38:12 +0800
Subject: [PATCH 1543/4122] rtc: s3c: Switch to use dev_err_probe() helper

In the probe path, dev_err() can be replace with dev_err_probe()
which will check if error code is -EPROBE_DEFER and prints the
error name.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20220919083812.755082-1-yangyingliang@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-s3c.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index db529733c9c46..8fc5efde3e0b3 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -429,14 +429,9 @@ static int s3c_rtc_probe(struct platform_device *pdev)
 		return PTR_ERR(info->base);
 
 	info->rtc_clk = devm_clk_get(&pdev->dev, "rtc");
-	if (IS_ERR(info->rtc_clk)) {
-		ret = PTR_ERR(info->rtc_clk);
-		if (ret != -EPROBE_DEFER)
-			dev_err(&pdev->dev, "failed to find rtc clock\n");
-		else
-			dev_dbg(&pdev->dev, "probe deferred due to missing rtc clk\n");
-		return ret;
-	}
+	if (IS_ERR(info->rtc_clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(info->rtc_clk),
+				     "failed to find rtc clock\n");
 	ret = clk_prepare_enable(info->rtc_clk);
 	if (ret)
 		return ret;
-- 
GitLab


From 97e78b64d138021c837a30bfa78201caf27c16b9 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 19 Oct 2022 17:29:34 +0200
Subject: [PATCH 1544/4122] rtc: remove davinci rtc driver

The Davinci dm365 SoC support was removed, so the rtc driver
has no remaining users.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Kevin Hilman <khilman@baylibre.com>
Link: https://lore.kernel.org/r/20221019152947.3857217-9-arnd@kernel.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/Kconfig       |  10 -
 drivers/rtc/Makefile      |   1 -
 drivers/rtc/rtc-davinci.c | 512 --------------------------------------
 3 files changed, 523 deletions(-)
 delete mode 100644 drivers/rtc/rtc-davinci.c

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index bb63edb507da4..b45fd08d51dc2 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -1351,16 +1351,6 @@ config RTC_DRV_ASM9260
 	  This driver can also be built as a module. If so, the module
 	  will be called rtc-asm9260.
 
-config RTC_DRV_DAVINCI
-	tristate "TI DaVinci RTC"
-	depends on ARCH_DAVINCI_DM365 || COMPILE_TEST
-	help
-	  If you say yes here you get support for the RTC on the
-	  DaVinci platforms (DM365).
-
-	  This driver can also be built as a module. If so, the module
-	  will be called rtc-davinci.
-
 config RTC_DRV_DIGICOLOR
 	tristate "Conexant Digicolor RTC"
 	depends on ARCH_DIGICOLOR || COMPILE_TEST
diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile
index aab22bc634321..791994eb913d9 100644
--- a/drivers/rtc/Makefile
+++ b/drivers/rtc/Makefile
@@ -44,7 +44,6 @@ obj-$(CONFIG_RTC_DRV_CROS_EC)	+= rtc-cros-ec.o
 obj-$(CONFIG_RTC_DRV_DA9052)	+= rtc-da9052.o
 obj-$(CONFIG_RTC_DRV_DA9055)	+= rtc-da9055.o
 obj-$(CONFIG_RTC_DRV_DA9063)	+= rtc-da9063.o
-obj-$(CONFIG_RTC_DRV_DAVINCI)	+= rtc-davinci.o
 obj-$(CONFIG_RTC_DRV_DIGICOLOR)	+= rtc-digicolor.o
 obj-$(CONFIG_RTC_DRV_DM355EVM)	+= rtc-dm355evm.o
 obj-$(CONFIG_RTC_DRV_DS1216)	+= rtc-ds1216.o
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
deleted file mode 100644
index 6bef0f2353da4..0000000000000
--- a/drivers/rtc/rtc-davinci.c
+++ /dev/null
@@ -1,512 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DaVinci Power Management and Real Time Clock Driver for TI platforms
- *
- * Copyright (C) 2009 Texas Instruments, Inc
- *
- * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com>
- */
-#include <linux/kernel.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/delay.h>
-#include <linux/spinlock.h>
-#include <linux/rtc.h>
-#include <linux/bcd.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-
-/*
- * The DaVinci RTC is a simple RTC with the following
- * Sec: 0 - 59 : BCD count
- * Min: 0 - 59 : BCD count
- * Hour: 0 - 23 : BCD count
- * Day: 0 - 0x7FFF(32767) : Binary count ( Over 89 years )
- */
-
-/* PRTC interface registers */
-#define DAVINCI_PRTCIF_PID		0x00
-#define PRTCIF_CTLR			0x04
-#define PRTCIF_LDATA			0x08
-#define PRTCIF_UDATA			0x0C
-#define PRTCIF_INTEN			0x10
-#define PRTCIF_INTFLG			0x14
-
-/* PRTCIF_CTLR bit fields */
-#define PRTCIF_CTLR_BUSY		BIT(31)
-#define PRTCIF_CTLR_SIZE		BIT(25)
-#define PRTCIF_CTLR_DIR			BIT(24)
-#define PRTCIF_CTLR_BENU_MSB		BIT(23)
-#define PRTCIF_CTLR_BENU_3RD_BYTE	BIT(22)
-#define PRTCIF_CTLR_BENU_2ND_BYTE	BIT(21)
-#define PRTCIF_CTLR_BENU_LSB		BIT(20)
-#define PRTCIF_CTLR_BENU_MASK		(0x00F00000)
-#define PRTCIF_CTLR_BENL_MSB		BIT(19)
-#define PRTCIF_CTLR_BENL_3RD_BYTE	BIT(18)
-#define PRTCIF_CTLR_BENL_2ND_BYTE	BIT(17)
-#define PRTCIF_CTLR_BENL_LSB		BIT(16)
-#define PRTCIF_CTLR_BENL_MASK		(0x000F0000)
-
-/* PRTCIF_INTEN bit fields */
-#define PRTCIF_INTEN_RTCSS		BIT(1)
-#define PRTCIF_INTEN_RTCIF		BIT(0)
-#define PRTCIF_INTEN_MASK		(PRTCIF_INTEN_RTCSS \
-					| PRTCIF_INTEN_RTCIF)
-
-/* PRTCIF_INTFLG bit fields */
-#define PRTCIF_INTFLG_RTCSS		BIT(1)
-#define PRTCIF_INTFLG_RTCIF		BIT(0)
-#define PRTCIF_INTFLG_MASK		(PRTCIF_INTFLG_RTCSS \
-					| PRTCIF_INTFLG_RTCIF)
-
-/* PRTC subsystem registers */
-#define PRTCSS_RTC_INTC_EXTENA1		(0x0C)
-#define PRTCSS_RTC_CTRL			(0x10)
-#define PRTCSS_RTC_WDT			(0x11)
-#define PRTCSS_RTC_TMR0			(0x12)
-#define PRTCSS_RTC_TMR1			(0x13)
-#define PRTCSS_RTC_CCTRL		(0x14)
-#define PRTCSS_RTC_SEC			(0x15)
-#define PRTCSS_RTC_MIN			(0x16)
-#define PRTCSS_RTC_HOUR			(0x17)
-#define PRTCSS_RTC_DAY0			(0x18)
-#define PRTCSS_RTC_DAY1			(0x19)
-#define PRTCSS_RTC_AMIN			(0x1A)
-#define PRTCSS_RTC_AHOUR		(0x1B)
-#define PRTCSS_RTC_ADAY0		(0x1C)
-#define PRTCSS_RTC_ADAY1		(0x1D)
-#define PRTCSS_RTC_CLKC_CNT		(0x20)
-
-/* PRTCSS_RTC_INTC_EXTENA1 */
-#define PRTCSS_RTC_INTC_EXTENA1_MASK	(0x07)
-
-/* PRTCSS_RTC_CTRL bit fields */
-#define PRTCSS_RTC_CTRL_WDTBUS		BIT(7)
-#define PRTCSS_RTC_CTRL_WEN		BIT(6)
-#define PRTCSS_RTC_CTRL_WDRT		BIT(5)
-#define PRTCSS_RTC_CTRL_WDTFLG		BIT(4)
-#define PRTCSS_RTC_CTRL_TE		BIT(3)
-#define PRTCSS_RTC_CTRL_TIEN		BIT(2)
-#define PRTCSS_RTC_CTRL_TMRFLG		BIT(1)
-#define PRTCSS_RTC_CTRL_TMMD		BIT(0)
-
-/* PRTCSS_RTC_CCTRL bit fields */
-#define PRTCSS_RTC_CCTRL_CALBUSY	BIT(7)
-#define PRTCSS_RTC_CCTRL_DAEN		BIT(5)
-#define PRTCSS_RTC_CCTRL_HAEN		BIT(4)
-#define PRTCSS_RTC_CCTRL_MAEN		BIT(3)
-#define PRTCSS_RTC_CCTRL_ALMFLG		BIT(2)
-#define PRTCSS_RTC_CCTRL_AIEN		BIT(1)
-#define PRTCSS_RTC_CCTRL_CAEN		BIT(0)
-
-static DEFINE_SPINLOCK(davinci_rtc_lock);
-
-struct davinci_rtc {
-	struct rtc_device		*rtc;
-	void __iomem			*base;
-	int				irq;
-};
-
-static inline void rtcif_write(struct davinci_rtc *davinci_rtc,
-			       u32 val, u32 addr)
-{
-	writel(val, davinci_rtc->base + addr);
-}
-
-static inline u32 rtcif_read(struct davinci_rtc *davinci_rtc, u32 addr)
-{
-	return readl(davinci_rtc->base + addr);
-}
-
-static inline void rtcif_wait(struct davinci_rtc *davinci_rtc)
-{
-	while (rtcif_read(davinci_rtc, PRTCIF_CTLR) & PRTCIF_CTLR_BUSY)
-		cpu_relax();
-}
-
-static inline void rtcss_write(struct davinci_rtc *davinci_rtc,
-			       unsigned long val, u8 addr)
-{
-	rtcif_wait(davinci_rtc);
-
-	rtcif_write(davinci_rtc, PRTCIF_CTLR_BENL_LSB | addr, PRTCIF_CTLR);
-	rtcif_write(davinci_rtc, val, PRTCIF_LDATA);
-
-	rtcif_wait(davinci_rtc);
-}
-
-static inline u8 rtcss_read(struct davinci_rtc *davinci_rtc, u8 addr)
-{
-	rtcif_wait(davinci_rtc);
-
-	rtcif_write(davinci_rtc, PRTCIF_CTLR_DIR | PRTCIF_CTLR_BENL_LSB | addr,
-		    PRTCIF_CTLR);
-
-	rtcif_wait(davinci_rtc);
-
-	return rtcif_read(davinci_rtc, PRTCIF_LDATA);
-}
-
-static inline void davinci_rtcss_calendar_wait(struct davinci_rtc *davinci_rtc)
-{
-	while (rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) &
-	       PRTCSS_RTC_CCTRL_CALBUSY)
-		cpu_relax();
-}
-
-static irqreturn_t davinci_rtc_interrupt(int irq, void *class_dev)
-{
-	struct davinci_rtc *davinci_rtc = class_dev;
-	unsigned long events = 0;
-	u32 irq_flg;
-	u8 alm_irq, tmr_irq;
-	u8 rtc_ctrl, rtc_cctrl;
-	int ret = IRQ_NONE;
-
-	irq_flg = rtcif_read(davinci_rtc, PRTCIF_INTFLG) &
-		  PRTCIF_INTFLG_RTCSS;
-
-	alm_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL) &
-		  PRTCSS_RTC_CCTRL_ALMFLG;
-
-	tmr_irq = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL) &
-		  PRTCSS_RTC_CTRL_TMRFLG;
-
-	if (irq_flg) {
-		if (alm_irq) {
-			events |= RTC_IRQF | RTC_AF;
-			rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL);
-			rtc_cctrl |=  PRTCSS_RTC_CCTRL_ALMFLG;
-			rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL);
-		} else if (tmr_irq) {
-			events |= RTC_IRQF | RTC_PF;
-			rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
-			rtc_ctrl |=  PRTCSS_RTC_CTRL_TMRFLG;
-			rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-		}
-
-		rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS,
-				    PRTCIF_INTFLG);
-		rtc_update_irq(davinci_rtc->rtc, 1, events);
-
-		ret = IRQ_HANDLED;
-	}
-
-	return ret;
-}
-
-static int
-davinci_rtc_ioctl(struct device *dev, unsigned int cmd, unsigned long arg)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	u8 rtc_ctrl;
-	unsigned long flags;
-	int ret = 0;
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	rtc_ctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CTRL);
-
-	switch (cmd) {
-	case RTC_WIE_ON:
-		rtc_ctrl |= PRTCSS_RTC_CTRL_WEN | PRTCSS_RTC_CTRL_WDTFLG;
-		break;
-	case RTC_WIE_OFF:
-		rtc_ctrl &= ~PRTCSS_RTC_CTRL_WEN;
-		break;
-	default:
-		ret = -ENOIOCTLCMD;
-	}
-
-	rtcss_write(davinci_rtc, rtc_ctrl, PRTCSS_RTC_CTRL);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
-	return ret;
-}
-
-static void convertfromdays(u16 days, struct rtc_time *tm)
-{
-	int tmp_days, year, mon;
-
-	for (year = 2000;; year++) {
-		tmp_days = rtc_year_days(1, 12, year);
-		if (days >= tmp_days)
-			days -= tmp_days;
-		else {
-			for (mon = 0;; mon++) {
-				tmp_days = rtc_month_days(mon, year);
-				if (days >= tmp_days) {
-					days -= tmp_days;
-				} else {
-					tm->tm_year = year - 1900;
-					tm->tm_mon = mon;
-					tm->tm_mday = days + 1;
-					break;
-				}
-			}
-			break;
-		}
-	}
-}
-
-static void convert2days(u16 *days, struct rtc_time *tm)
-{
-	int i;
-	*days = 0;
-
-	for (i = 2000; i < 1900 + tm->tm_year; i++)
-		*days += rtc_year_days(1, 12, i);
-
-	*days += rtc_year_days(tm->tm_mday, tm->tm_mon, 1900 + tm->tm_year);
-}
-
-static int davinci_rtc_read_time(struct device *dev, struct rtc_time *tm)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	u16 days = 0;
-	u8 day0, day1;
-	unsigned long flags;
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	tm->tm_sec = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_SEC));
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	tm->tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_MIN));
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	tm->tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_HOUR));
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY0);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_DAY1);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
-	days |= day1;
-	days <<= 8;
-	days |= day0;
-
-	convertfromdays(days, tm);
-
-	return 0;
-}
-
-static int davinci_rtc_set_time(struct device *dev, struct rtc_time *tm)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	u16 days;
-	u8 rtc_cctrl;
-	unsigned long flags;
-
-	convert2days(&days, tm);
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, bin2bcd(tm->tm_sec), PRTCSS_RTC_SEC);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, bin2bcd(tm->tm_min), PRTCSS_RTC_MIN);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, bin2bcd(tm->tm_hour), PRTCSS_RTC_HOUR);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_DAY0);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_DAY1);
-
-	rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL);
-	rtc_cctrl |= PRTCSS_RTC_CCTRL_CAEN;
-	rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
-	return 0;
-}
-
-static int davinci_rtc_alarm_irq_enable(struct device *dev,
-					unsigned int enabled)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	unsigned long flags;
-	u8 rtc_cctrl = rtcss_read(davinci_rtc, PRTCSS_RTC_CCTRL);
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	if (enabled)
-		rtc_cctrl |= PRTCSS_RTC_CCTRL_DAEN |
-			     PRTCSS_RTC_CCTRL_HAEN |
-			     PRTCSS_RTC_CCTRL_MAEN |
-			     PRTCSS_RTC_CCTRL_ALMFLG |
-			     PRTCSS_RTC_CCTRL_AIEN;
-	else
-		rtc_cctrl &= ~PRTCSS_RTC_CCTRL_AIEN;
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, rtc_cctrl, PRTCSS_RTC_CCTRL);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
-	return 0;
-}
-
-static int davinci_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	u16 days = 0;
-	u8 day0, day1;
-	unsigned long flags;
-
-	alm->time.tm_sec = 0;
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	alm->time.tm_min = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AMIN));
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	alm->time.tm_hour = bcd2bin(rtcss_read(davinci_rtc, PRTCSS_RTC_AHOUR));
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	day0 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY0);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	day1 = rtcss_read(davinci_rtc, PRTCSS_RTC_ADAY1);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-	days |= day1;
-	days <<= 8;
-	days |= day0;
-
-	convertfromdays(days, &alm->time);
-
-	alm->pending = !!(rtcss_read(davinci_rtc,
-			  PRTCSS_RTC_CCTRL) &
-			PRTCSS_RTC_CCTRL_AIEN);
-	alm->enabled = alm->pending && device_may_wakeup(dev);
-
-	return 0;
-}
-
-static int davinci_rtc_set_alarm(struct device *dev, struct rtc_wkalrm *alm)
-{
-	struct davinci_rtc *davinci_rtc = dev_get_drvdata(dev);
-	unsigned long flags;
-	u16 days;
-
-	convert2days(&days, &alm->time);
-
-	spin_lock_irqsave(&davinci_rtc_lock, flags);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_min), PRTCSS_RTC_AMIN);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, bin2bcd(alm->time.tm_hour), PRTCSS_RTC_AHOUR);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, days & 0xFF, PRTCSS_RTC_ADAY0);
-
-	davinci_rtcss_calendar_wait(davinci_rtc);
-	rtcss_write(davinci_rtc, (days & 0xFF00) >> 8, PRTCSS_RTC_ADAY1);
-
-	spin_unlock_irqrestore(&davinci_rtc_lock, flags);
-
-	return 0;
-}
-
-static const struct rtc_class_ops davinci_rtc_ops = {
-	.ioctl			= davinci_rtc_ioctl,
-	.read_time		= davinci_rtc_read_time,
-	.set_time		= davinci_rtc_set_time,
-	.alarm_irq_enable	= davinci_rtc_alarm_irq_enable,
-	.read_alarm		= davinci_rtc_read_alarm,
-	.set_alarm		= davinci_rtc_set_alarm,
-};
-
-static int __init davinci_rtc_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct davinci_rtc *davinci_rtc;
-	int ret = 0;
-
-	davinci_rtc = devm_kzalloc(&pdev->dev, sizeof(struct davinci_rtc), GFP_KERNEL);
-	if (!davinci_rtc)
-		return -ENOMEM;
-
-	davinci_rtc->irq = platform_get_irq(pdev, 0);
-	if (davinci_rtc->irq < 0)
-		return davinci_rtc->irq;
-
-	davinci_rtc->base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(davinci_rtc->base))
-		return PTR_ERR(davinci_rtc->base);
-
-	platform_set_drvdata(pdev, davinci_rtc);
-
-	davinci_rtc->rtc = devm_rtc_allocate_device(&pdev->dev);
-	if (IS_ERR(davinci_rtc->rtc))
-		return PTR_ERR(davinci_rtc->rtc);
-
-	davinci_rtc->rtc->ops = &davinci_rtc_ops;
-	davinci_rtc->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
-	davinci_rtc->rtc->range_max = RTC_TIMESTAMP_BEGIN_2000 + (1 << 16) * 86400ULL - 1;
-
-	rtcif_write(davinci_rtc, PRTCIF_INTFLG_RTCSS, PRTCIF_INTFLG);
-	rtcif_write(davinci_rtc, 0, PRTCIF_INTEN);
-	rtcss_write(davinci_rtc, 0, PRTCSS_RTC_INTC_EXTENA1);
-
-	rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CTRL);
-	rtcss_write(davinci_rtc, 0, PRTCSS_RTC_CCTRL);
-
-	ret = devm_request_irq(dev, davinci_rtc->irq, davinci_rtc_interrupt,
-			  0, "davinci_rtc", davinci_rtc);
-	if (ret < 0) {
-		dev_err(dev, "unable to register davinci RTC interrupt\n");
-		return ret;
-	}
-
-	/* Enable interrupts */
-	rtcif_write(davinci_rtc, PRTCIF_INTEN_RTCSS, PRTCIF_INTEN);
-	rtcss_write(davinci_rtc, PRTCSS_RTC_INTC_EXTENA1_MASK,
-			    PRTCSS_RTC_INTC_EXTENA1);
-
-	rtcss_write(davinci_rtc, PRTCSS_RTC_CCTRL_CAEN, PRTCSS_RTC_CCTRL);
-
-	device_init_wakeup(&pdev->dev, 0);
-
-	return devm_rtc_register_device(davinci_rtc->rtc);
-}
-
-static int __exit davinci_rtc_remove(struct platform_device *pdev)
-{
-	struct davinci_rtc *davinci_rtc = platform_get_drvdata(pdev);
-
-	device_init_wakeup(&pdev->dev, 0);
-
-	rtcif_write(davinci_rtc, 0, PRTCIF_INTEN);
-
-	return 0;
-}
-
-static struct platform_driver davinci_rtc_driver = {
-	.remove		= __exit_p(davinci_rtc_remove),
-	.driver		= {
-		.name = "rtc_davinci",
-	},
-};
-
-module_platform_driver_probe(davinci_rtc_driver, davinci_rtc_probe);
-
-MODULE_AUTHOR("Miguel Aguilar <miguel.aguilar@ridgerun.com>");
-MODULE_DESCRIPTION("Texas Instruments DaVinci PRTC Driver");
-MODULE_LICENSE("GPL");
-- 
GitLab


From 1ff56edf137a2f034fee9b9a398ddcd8cb7a5a34 Mon Sep 17 00:00:00 2001
From: Zhang Jianhua <chris.zjh@huawei.com>
Date: Tue, 6 Sep 2022 22:30:37 +0800
Subject: [PATCH 1545/4122] rtc: fsl-ftm-alarm: Use module_platform_driver
 replace device_initcall

The ftm_rtc_driver has been registered while module init, however there
is not unregister step for module exit, now use the macro
module_platform_driver replace device_initcall, which can register and
unregister platform driver automatically.

Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com>
Link: https://lore.kernel.org/r/20220906143037.1455317-1-chris.zjh@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-fsl-ftm-alarm.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-fsl-ftm-alarm.c b/drivers/rtc/rtc-fsl-ftm-alarm.c
index c0df49fb978ce..3d7c4077fe1c6 100644
--- a/drivers/rtc/rtc-fsl-ftm-alarm.c
+++ b/drivers/rtc/rtc-fsl-ftm-alarm.c
@@ -327,12 +327,7 @@ static struct platform_driver ftm_rtc_driver = {
 	},
 };
 
-static int __init ftm_alarm_init(void)
-{
-	return platform_driver_register(&ftm_rtc_driver);
-}
-
-device_initcall(ftm_alarm_init);
+module_platform_driver(ftm_rtc_driver);
 
 MODULE_DESCRIPTION("NXP/Freescale FlexTimer alarm driver");
 MODULE_AUTHOR("Biwen Li <biwen.li@nxp.com>");
-- 
GitLab


From c69bffe199270ce001d5764985a8e414c7e05fee Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Mon, 24 Oct 2022 18:55:49 +0200
Subject: [PATCH 1546/4122] dt-bindings: rtc: convert hym8563 bindings to
 json-schema

Convert RTC binding for Haoyu Microelectronics HYM8563 to Device Tree
Schema format.

Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221024165549.74574-7-sebastian.reichel@collabora.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../devicetree/bindings/rtc/haoyu,hym8563.txt | 30 ----------
 .../bindings/rtc/haoyu,hym8563.yaml           | 56 +++++++++++++++++++
 2 files changed, 56 insertions(+), 30 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
 create mode 100644 Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml

diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
deleted file mode 100644
index a8934fe2ab4c1..0000000000000
--- a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.txt
+++ /dev/null
@@ -1,30 +0,0 @@
-Haoyu Microelectronics HYM8563 Real Time Clock
-
-The HYM8563 provides basic rtc and alarm functionality
-as well as a clock output of up to 32kHz.
-
-Required properties:
-- compatible: should be: "haoyu,hym8563"
-- reg: i2c address
-- #clock-cells: the value should be 0
-
-Optional properties:
-- clock-output-names: From common clock binding
-- interrupts: rtc alarm/event interrupt
-
-Example:
-
-hym8563: hym8563@51 {
-	compatible = "haoyu,hym8563";
-	reg = <0x51>;
-
-	interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
-
-	#clock-cells = <0>;
-};
-
-device {
-...
-	clocks = <&hym8563>;
-...
-};
diff --git a/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml
new file mode 100644
index 0000000000000..0b9f39ef0edc3
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/haoyu,hym8563.yaml
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/haoyu,hym8563.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Haoyu Microelectronics HYM8563 RTC
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+  compatible:
+    const: haoyu,hym8563
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 0
+
+  clock-output-names:
+    description: From common clock binding to override the default output clock name.
+    maxItems: 1
+
+  wakeup-source:
+    description: Enables wake up of host system on alarm.
+
+allOf:
+  - $ref: rtc.yaml
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - "#clock-cells"
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        rtc@51 {
+            compatible = "haoyu,hym8563";
+            reg = <0x51>;
+            interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
+            #clock-cells = <0>;
+        };
+    };
-- 
GitLab


From f8513363b0b7155afaee09cca777fc608dc957e3 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Mon, 5 Sep 2022 09:01:19 +0000
Subject: [PATCH 1547/4122] rtc: s35390a: Remove the unneeded result variable

Return the value s35390a_set_reg() directly instead of storing it in
another redundant variable.

Reported-by: Zeal Robot <zealci@zte.com.cn>
Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/20220905090119.335121-1-ye.xingchen@zte.com.cn
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-s35390a.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-s35390a.c b/drivers/rtc/rtc-s35390a.c
index 81d97b1d31591..b18daaf72b17d 100644
--- a/drivers/rtc/rtc-s35390a.c
+++ b/drivers/rtc/rtc-s35390a.c
@@ -211,7 +211,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct i2c_client *client = to_i2c_client(dev);
 	struct s35390a	*s35390a = i2c_get_clientdata(client);
-	int i, err;
+	int i;
 	char buf[7], status;
 
 	dev_dbg(&client->dev, "%s: tm is secs=%d, mins=%d, hours=%d mday=%d, "
@@ -234,9 +234,7 @@ static int s35390a_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	for (i = 0; i < 7; ++i)
 		buf[i] = bitrev8(buf[i]);
 
-	err = s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf));
-
-	return err;
+	return s35390a_set_reg(s35390a, S35390A_CMD_TIME1, buf, sizeof(buf));
 }
 
 static int s35390a_rtc_read_time(struct device *dev, struct rtc_time *tm)
-- 
GitLab


From 8d816c1eaa752546fa3221f5027af0a667ff0c8f Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 4 Nov 2022 12:02:25 +0100
Subject: [PATCH 1548/4122] rtc: isl12022: add support for temperature sensor

The isl12022 has built-in temperature compensation effective over the
range -40C to +85C. It exposes the average of the last two temperature
measurements as a 10-bit value in half-Kelvins. Make this available
via the hwmon framework.

Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/r/20221104110225.2219761-1-linux@rasmusvillemoes.dk
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-isl12022.c | 94 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/drivers/rtc/rtc-isl12022.c b/drivers/rtc/rtc-isl12022.c
index ca677c4265e6c..a3b0de3393f57 100644
--- a/drivers/rtc/rtc-isl12022.c
+++ b/drivers/rtc/rtc-isl12022.c
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/regmap.h>
+#include <linux/hwmon.h>
 
 /* ISL register offsets */
 #define ISL12022_REG_SC		0x00
@@ -30,6 +31,9 @@
 #define ISL12022_REG_SR		0x07
 #define ISL12022_REG_INT	0x08
 
+#define ISL12022_REG_BETA	0x0d
+#define ISL12022_REG_TEMP_L	0x28
+
 /* ISL register bits */
 #define ISL12022_HR_MIL		(1 << 7)	/* military or 24 hour time */
 
@@ -38,6 +42,7 @@
 
 #define ISL12022_INT_WRTC	(1 << 6)
 
+#define ISL12022_BETA_TSE	(1 << 7)
 
 static struct i2c_driver isl12022_driver;
 
@@ -46,6 +51,93 @@ struct isl12022 {
 	struct regmap *regmap;
 };
 
+static umode_t isl12022_hwmon_is_visible(const void *data,
+					 enum hwmon_sensor_types type,
+					 u32 attr, int channel)
+{
+	if (type == hwmon_temp && attr == hwmon_temp_input)
+		return 0444;
+
+	return 0;
+}
+
+/*
+ * A user-initiated temperature conversion is not started by this function,
+ * so the temperature is updated once every ~60 seconds.
+ */
+static int isl12022_hwmon_read_temp(struct device *dev, long *mC)
+{
+	struct isl12022 *isl12022 = dev_get_drvdata(dev);
+	struct regmap *regmap = isl12022->regmap;
+	u8 temp_buf[2];
+	int temp, ret;
+
+	ret = regmap_bulk_read(regmap, ISL12022_REG_TEMP_L,
+			       temp_buf, sizeof(temp_buf));
+	if (ret)
+		return ret;
+	/*
+	 * Temperature is represented as a 10-bit number, unit half-Kelvins.
+	 */
+	temp = (temp_buf[1] << 8) | temp_buf[0];
+	temp *= 500;
+	temp -= 273000;
+
+	*mC = temp;
+
+	return 0;
+}
+
+static int isl12022_hwmon_read(struct device *dev,
+			       enum hwmon_sensor_types type,
+			       u32 attr, int channel, long *val)
+{
+	if (type == hwmon_temp && attr == hwmon_temp_input)
+		return isl12022_hwmon_read_temp(dev, val);
+
+	return -EOPNOTSUPP;
+}
+
+static const struct hwmon_channel_info *isl12022_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(temp, HWMON_T_INPUT),
+	NULL
+};
+
+static const struct hwmon_ops isl12022_hwmon_ops = {
+	.is_visible = isl12022_hwmon_is_visible,
+	.read = isl12022_hwmon_read,
+};
+
+static const struct hwmon_chip_info isl12022_hwmon_chip_info = {
+	.ops = &isl12022_hwmon_ops,
+	.info = isl12022_hwmon_info,
+};
+
+static void isl12022_hwmon_register(struct device *dev)
+{
+	struct isl12022 *isl12022;
+	struct device *hwmon;
+	int ret;
+
+	if (!IS_REACHABLE(CONFIG_HWMON))
+		return;
+
+	isl12022 = dev_get_drvdata(dev);
+
+	ret = regmap_update_bits(isl12022->regmap, ISL12022_REG_BETA,
+				 ISL12022_BETA_TSE, ISL12022_BETA_TSE);
+	if (ret) {
+		dev_warn(dev, "unable to enable temperature sensor\n");
+		return;
+	}
+
+	hwmon = devm_hwmon_device_register_with_info(dev, "isl12022", isl12022,
+						     &isl12022_hwmon_chip_info,
+						     NULL);
+	if (IS_ERR(hwmon))
+		dev_warn(dev, "unable to register hwmon device: %pe\n", hwmon);
+}
+
 /*
  * In the routines that deal directly with the isl12022 hardware, we use
  * rtc_time -- month 0-11, hour 0-23, yr = calendar year-epoch.
@@ -160,6 +252,8 @@ static int isl12022_probe(struct i2c_client *client)
 		return PTR_ERR(isl12022->regmap);
 	}
 
+	isl12022_hwmon_register(&client->dev);
+
 	isl12022->rtc = devm_rtc_allocate_device(&client->dev);
 	if (IS_ERR(isl12022->rtc))
 		return PTR_ERR(isl12022->rtc);
-- 
GitLab


From e59b3c730b44f042540319d62cba73054fd928c8 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 6 Nov 2022 09:00:51 +0100
Subject: [PATCH 1549/4122] rtc: Include <linux/kstrtox.h> when appropriate

The kstrto<something>() functions have been moved from kernel.h to
kstrtox.h.

So, include the latter directly in the appropriate files.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Link: https://lore.kernel.org/r/786421fd0435a32206288904a1f879436a717529.1667721637.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-abx80x.c   | 1 +
 drivers/rtc/rtc-bq32k.c    | 1 +
 drivers/rtc/rtc-ds1307.c   | 1 +
 drivers/rtc/rtc-rv3029c2.c | 1 +
 drivers/rtc/rtc-rx8025.c   | 1 +
 drivers/rtc/sysfs.c        | 1 +
 6 files changed, 6 insertions(+)

diff --git a/drivers/rtc/rtc-abx80x.c b/drivers/rtc/rtc-abx80x.c
index e7f325ced9402..2e0e6432901b8 100644
--- a/drivers/rtc/rtc-abx80x.c
+++ b/drivers/rtc/rtc-abx80x.c
@@ -12,6 +12,7 @@
 
 #include <linux/bcd.h>
 #include <linux/i2c.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/rtc.h>
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 6d6a55efb9cc7..967ddc6bf76d6 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -13,6 +13,7 @@
 #include <linux/i2c.h>
 #include <linux/rtc.h>
 #include <linux/init.h>
+#include <linux/kstrtox.h>
 #include <linux/errno.h>
 #include <linux/bcd.h>
 
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index d51565bcc1896..7c2276cf5514a 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -11,6 +11,7 @@
 #include <linux/bcd.h>
 #include <linux/i2c.h>
 #include <linux/init.h>
+#include <linux/kstrtox.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/property.h>
diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c
index eb483a30bd92f..e4fdd47ae066c 100644
--- a/drivers/rtc/rtc-rv3029c2.c
+++ b/drivers/rtc/rtc-rv3029c2.c
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
+#include <linux/kstrtox.h>
 #include <linux/regmap.h>
 
 /* Register map */
diff --git a/drivers/rtc/rtc-rx8025.c b/drivers/rtc/rtc-rx8025.c
index 77d3cb08b5ec2..331c20d4d843c 100644
--- a/drivers/rtc/rtc-rx8025.c
+++ b/drivers/rtc/rtc-rx8025.c
@@ -19,6 +19,7 @@
 #include <linux/bitops.h>
 #include <linux/i2c.h>
 #include <linux/kernel.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/rtc.h>
 
diff --git a/drivers/rtc/sysfs.c b/drivers/rtc/sysfs.c
index 00f1945bcb7e1..e3062c4d3f2ce 100644
--- a/drivers/rtc/sysfs.c
+++ b/drivers/rtc/sysfs.c
@@ -6,6 +6,7 @@
  * Author: Alessandro Zummo <a.zummo@towertech.it>
  */
 
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 #include <linux/rtc.h>
 
-- 
GitLab


From 4dfe05bdc1ade79b943d4979a2e2a8b5ef68fbb5 Mon Sep 17 00:00:00 2001
From: Ian Abbott <abbotti@mev.co.uk>
Date: Thu, 27 Oct 2022 17:32:49 +0100
Subject: [PATCH 1550/4122] rtc: ds1347: fix value written to century register

In `ds1347_set_time()`, the wrong value is being written to the
`DS1347_CENTURY_REG` register.  It needs to be converted to BCD.  Fix
it.

Fixes: 147dae76dbb9 ("rtc: ds1347: handle century register")
Cc: <stable@vger.kernel.org> # v5.5+
Signed-off-by: Ian Abbott <abbotti@mev.co.uk>
Link: https://lore.kernel.org/r/20221027163249.447416-1-abbotti@mev.co.uk
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-ds1347.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-ds1347.c b/drivers/rtc/rtc-ds1347.c
index 157bf5209ac40..a40c1a52df659 100644
--- a/drivers/rtc/rtc-ds1347.c
+++ b/drivers/rtc/rtc-ds1347.c
@@ -112,7 +112,7 @@ static int ds1347_set_time(struct device *dev, struct rtc_time *dt)
 		return err;
 
 	century = (dt->tm_year / 100) + 19;
-	err = regmap_write(map, DS1347_CENTURY_REG, century);
+	err = regmap_write(map, DS1347_CENTURY_REG, bin2bcd(century));
 	if (err)
 		return err;
 
-- 
GitLab


From 60da73808298ff2cfa9f165d55eb3d7aa7078601 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 10 Nov 2022 17:08:10 +0800
Subject: [PATCH 1551/4122] rtc: class: Fix potential memleak in
 devm_rtc_allocate_device()

devm_rtc_allocate_device() will alloc a rtc_device first, and then run
dev_set_name(). If dev_set_name() failed, the rtc_device will memleak.
Move devm_add_action_or_reset() in front of dev_set_name() to prevent
memleak.

unreferenced object 0xffff888110a53000 (size 2048):
  comm "python3", pid 470, jiffies 4296078308 (age 58.882s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 08 30 a5 10 81 88 ff ff  .........0......
    08 30 a5 10 81 88 ff ff 00 00 00 00 00 00 00 00  .0..............
  backtrace:
    [<000000004aac0364>] kmalloc_trace+0x21/0x110
    [<000000000ff02202>] devm_rtc_allocate_device+0xd4/0x400
    [<000000001bdf5639>] devm_rtc_device_register+0x1a/0x80
    [<00000000351bf81c>] rx4581_probe+0xdd/0x110 [rtc_rx4581]
    [<00000000f0eba0ae>] spi_probe+0xde/0x130
    [<00000000bff89ee8>] really_probe+0x175/0x3f0
    [<00000000128e8d84>] __driver_probe_device+0xe6/0x170
    [<00000000ee5bf913>] device_driver_attach+0x32/0x80
    [<00000000f3f28f92>] bind_store+0x10b/0x1a0
    [<000000009ff812d8>] drv_attr_store+0x49/0x70
    [<000000008139c323>] sysfs_kf_write+0x8d/0xb0
    [<00000000b6146e01>] kernfs_fop_write_iter+0x214/0x2d0
    [<00000000ecbe3895>] vfs_write+0x61a/0x7d0
    [<00000000aa2196ea>] ksys_write+0xc8/0x190
    [<0000000046a600f5>] do_syscall_64+0x37/0x90
    [<00000000541a336f>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 24d23181e43d ("rtc: class: check return value when calling dev_set_name()")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221110090810.11225-1-shangxiaojing@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/class.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index e48223c00c672..e5b7b48cffac0 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -374,11 +374,11 @@ struct rtc_device *devm_rtc_allocate_device(struct device *dev)
 
 	rtc->id = id;
 	rtc->dev.parent = dev;
-	err = dev_set_name(&rtc->dev, "rtc%d", id);
+	err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc);
 	if (err)
 		return ERR_PTR(err);
 
-	err = devm_add_action_or_reset(dev, devm_rtc_release_device, rtc);
+	err = dev_set_name(&rtc->dev, "rtc%d", id);
 	if (err)
 		return ERR_PTR(err);
 
-- 
GitLab


From 508ccdfb86b21da37ad091003a4d4567709d5dfb Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Nov 2022 13:07:08 +0100
Subject: [PATCH 1552/4122] rtc: cmos: Call cmos_wake_setup() from
 cmos_do_probe()

Notice that cmos_wake_setup() is the only user of acpi_rtc_info and it
can operate on the cmos_rtc variable directly, so it need not set the
platform_data pointer before cmos_do_probe() is called.  Instead, it
can be called by cmos_do_probe() in the case when the platform_data
pointer is not set to implement the default behavior (which is to use
the FADT information as long as ACPI support is enabled).

Modify the code accordingly.

While at it, drop a comment that doesn't really match the code it is
supposed to be describing.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/4803444.31r3eYUQgx@kreacher
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 47 ++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 25 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 58cc2bae2f8a0..a84262265d6d9 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -744,6 +744,8 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 		return IRQ_NONE;
 }
 
+static void cmos_wake_setup(struct device *dev);
+
 #ifdef	CONFIG_PNP
 #define	INITSECTION
 
@@ -827,19 +829,27 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 		if (info->address_space)
 			address_space = info->address_space;
 
-		if (info->rtc_day_alarm && info->rtc_day_alarm < 128)
-			cmos_rtc.day_alrm = info->rtc_day_alarm;
-		if (info->rtc_mon_alarm && info->rtc_mon_alarm < 128)
-			cmos_rtc.mon_alrm = info->rtc_mon_alarm;
-		if (info->rtc_century && info->rtc_century < 128)
-			cmos_rtc.century = info->rtc_century;
+		cmos_rtc.day_alrm = info->rtc_day_alarm;
+		cmos_rtc.mon_alrm = info->rtc_mon_alarm;
+		cmos_rtc.century = info->rtc_century;
 
 		if (info->wake_on && info->wake_off) {
 			cmos_rtc.wake_on = info->wake_on;
 			cmos_rtc.wake_off = info->wake_off;
 		}
+	} else {
+		cmos_wake_setup(dev);
 	}
 
+	if (cmos_rtc.day_alrm >= 128)
+		cmos_rtc.day_alrm = 0;
+
+	if (cmos_rtc.mon_alrm >= 128)
+		cmos_rtc.mon_alrm = 0;
+
+	if (cmos_rtc.century >= 128)
+		cmos_rtc.century = 0;
+
 	cmos_rtc.dev = dev;
 	dev_set_drvdata(dev, &cmos_rtc);
 
@@ -1275,13 +1285,6 @@ static void use_acpi_alarm_quirks(void)
 static inline void use_acpi_alarm_quirks(void) { }
 #endif
 
-/* Every ACPI platform has a mc146818 compatible "cmos rtc".  Here we find
- * its device node and pass extra config data.  This helps its driver use
- * capabilities that the now-obsolete mc146818 didn't have, and informs it
- * that this board's RTC is wakeup-capable (per ACPI spec).
- */
-static struct cmos_rtc_board_info acpi_rtc_info;
-
 static void cmos_wake_setup(struct device *dev)
 {
 	if (acpi_disabled)
@@ -1289,26 +1292,23 @@ static void cmos_wake_setup(struct device *dev)
 
 	use_acpi_alarm_quirks();
 
-	acpi_rtc_info.wake_on = rtc_wake_on;
-	acpi_rtc_info.wake_off = rtc_wake_off;
+	cmos_rtc.wake_on = rtc_wake_on;
+	cmos_rtc.wake_off = rtc_wake_off;
 
-	/* workaround bug in some ACPI tables */
+	/* ACPI tables bug workaround. */
 	if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) {
 		dev_dbg(dev, "bogus FADT month_alarm (%d)\n",
 			acpi_gbl_FADT.month_alarm);
 		acpi_gbl_FADT.month_alarm = 0;
 	}
 
-	acpi_rtc_info.rtc_day_alarm = acpi_gbl_FADT.day_alarm;
-	acpi_rtc_info.rtc_mon_alarm = acpi_gbl_FADT.month_alarm;
-	acpi_rtc_info.rtc_century = acpi_gbl_FADT.century;
+	cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm;
+	cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm;
+	cmos_rtc.century = acpi_gbl_FADT.century;
 
-	/* NOTE:  S4_RTC_WAKE is NOT currently useful to Linux */
 	if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE)
 		dev_info(dev, "RTC can wake from S4\n");
 
-	dev->platform_data = &acpi_rtc_info;
-
 	/* RTC always wakes from S1/S2/S3, and often S4/STD */
 	device_init_wakeup(dev, 1);
 }
@@ -1359,8 +1359,6 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 {
 	int irq, ret;
 
-	cmos_wake_setup(&pnp->dev);
-
 	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) {
 		irq = 0;
 #ifdef CONFIG_X86
@@ -1468,7 +1466,6 @@ static int __init cmos_platform_probe(struct platform_device *pdev)
 	int irq, ret;
 
 	cmos_of_init(pdev);
-	cmos_wake_setup(&pdev->dev);
 
 	if (RTC_IOMAPPED)
 		resource = platform_get_resource(pdev, IORESOURCE_IO, 0);
-- 
GitLab


From 375bbba09692fe4c5218eddee8e312dd733fa846 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Nov 2022 13:09:07 +0100
Subject: [PATCH 1553/4122] rtc: cmos: Call rtc_wake_setup() from
 cmos_do_probe()

To reduce code duplication, move the invocation of rtc_wake_setup()
into cmos_do_probe() and simplify the callers of the latter.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/2143522.irdbgypaU6@kreacher
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 28 ++++++++++++----------------
 1 file changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index a84262265d6d9..583116994a37f 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -744,6 +744,7 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 		return IRQ_NONE;
 }
 
+static inline void rtc_wake_setup(struct device *dev);
 static void cmos_wake_setup(struct device *dev);
 
 #ifdef	CONFIG_PNP
@@ -938,6 +939,13 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	nvmem_cfg.size = address_space - NVRAM_OFFSET;
 	devm_rtc_nvmem_register(cmos_rtc.rtc, &nvmem_cfg);
 
+	/*
+	 * Everything has gone well so far, so by default register a handler for
+	 * the ACPI RTC fixed event.
+	 */
+	if (!info)
+		rtc_wake_setup(dev);
+
 	dev_info(dev, "%s%s, %d bytes nvram%s\n",
 		 !is_valid_irq(rtc_irq) ? "no alarms" :
 		 cmos_rtc.mon_alrm ? "alarms up to one year" :
@@ -1357,7 +1365,7 @@ static void rtc_wake_setup(struct device *dev)
 
 static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 {
-	int irq, ret;
+	int irq;
 
 	if (pnp_port_start(pnp, 0) == 0x70 && !pnp_irq_valid(pnp, 0)) {
 		irq = 0;
@@ -1373,13 +1381,7 @@ static int cmos_pnp_probe(struct pnp_dev *pnp, const struct pnp_device_id *id)
 		irq = pnp_irq(pnp, 0);
 	}
 
-	ret = cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
-	if (ret)
-		return ret;
-
-	rtc_wake_setup(&pnp->dev);
-
-	return 0;
+	return cmos_do_probe(&pnp->dev, pnp_get_resource(pnp, IORESOURCE_IO, 0), irq);
 }
 
 static void cmos_pnp_remove(struct pnp_dev *pnp)
@@ -1463,7 +1465,7 @@ static inline void cmos_of_init(struct platform_device *pdev) {}
 static int __init cmos_platform_probe(struct platform_device *pdev)
 {
 	struct resource *resource;
-	int irq, ret;
+	int irq;
 
 	cmos_of_init(pdev);
 
@@ -1475,13 +1477,7 @@ static int __init cmos_platform_probe(struct platform_device *pdev)
 	if (irq < 0)
 		irq = -1;
 
-	ret = cmos_do_probe(&pdev->dev, resource, irq);
-	if (ret)
-		return ret;
-
-	rtc_wake_setup(&pdev->dev);
-
-	return 0;
+	return cmos_do_probe(&pdev->dev, resource, irq);
 }
 
 static int cmos_platform_remove(struct platform_device *pdev)
-- 
GitLab


From dca4d3b71c8a09a16951add656711fbd6f5bfbb0 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Nov 2022 13:09:32 +0100
Subject: [PATCH 1554/4122] rtc: cmos: Eliminate forward declarations of some
 functions

Reorder the ACPI-related code before cmos_do_probe() so as to eliminate
excessive forward declarations of some functions.

While at it, for consistency, add the inline modifier to the
definitions of empty stub static funtions and remove it from the
corresponding definitions of functions with non-empty bodies.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/13157911.uLZWGnKmhe@kreacher
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 304 ++++++++++++++++++++---------------------
 1 file changed, 149 insertions(+), 155 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 583116994a37f..2a21d8281aa61 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -744,8 +744,155 @@ static irqreturn_t cmos_interrupt(int irq, void *p)
 		return IRQ_NONE;
 }
 
-static inline void rtc_wake_setup(struct device *dev);
-static void cmos_wake_setup(struct device *dev);
+#ifdef	CONFIG_ACPI
+
+#include <linux/acpi.h>
+
+static u32 rtc_handler(void *context)
+{
+	struct device *dev = context;
+	struct cmos_rtc *cmos = dev_get_drvdata(dev);
+	unsigned char rtc_control = 0;
+	unsigned char rtc_intr;
+	unsigned long flags;
+
+
+	/*
+	 * Always update rtc irq when ACPI is used as RTC Alarm.
+	 * Or else, ACPI SCI is enabled during suspend/resume only,
+	 * update rtc irq in that case.
+	 */
+	if (cmos_use_acpi_alarm())
+		cmos_interrupt(0, (void *)cmos->rtc);
+	else {
+		/* Fix me: can we use cmos_interrupt() here as well? */
+		spin_lock_irqsave(&rtc_lock, flags);
+		if (cmos_rtc.suspend_ctrl)
+			rtc_control = CMOS_READ(RTC_CONTROL);
+		if (rtc_control & RTC_AIE) {
+			cmos_rtc.suspend_ctrl &= ~RTC_AIE;
+			CMOS_WRITE(rtc_control, RTC_CONTROL);
+			rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
+			rtc_update_irq(cmos->rtc, 1, rtc_intr);
+		}
+		spin_unlock_irqrestore(&rtc_lock, flags);
+	}
+
+	pm_wakeup_hard_event(dev);
+	acpi_clear_event(ACPI_EVENT_RTC);
+	acpi_disable_event(ACPI_EVENT_RTC, 0);
+	return ACPI_INTERRUPT_HANDLED;
+}
+
+static void rtc_wake_setup(struct device *dev)
+{
+	if (acpi_disabled)
+		return;
+
+	acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
+	/*
+	 * After the RTC handler is installed, the Fixed_RTC event should
+	 * be disabled. Only when the RTC alarm is set will it be enabled.
+	 */
+	acpi_clear_event(ACPI_EVENT_RTC);
+	acpi_disable_event(ACPI_EVENT_RTC, 0);
+}
+
+static void rtc_wake_on(struct device *dev)
+{
+	acpi_clear_event(ACPI_EVENT_RTC);
+	acpi_enable_event(ACPI_EVENT_RTC, 0);
+}
+
+static void rtc_wake_off(struct device *dev)
+{
+	acpi_disable_event(ACPI_EVENT_RTC, 0);
+}
+
+#ifdef CONFIG_X86
+/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */
+static void use_acpi_alarm_quirks(void)
+{
+	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
+		return;
+
+	if (!is_hpet_enabled())
+		return;
+
+	if (dmi_get_bios_year() < 2015)
+		return;
+
+	use_acpi_alarm = true;
+}
+#else
+static inline void use_acpi_alarm_quirks(void) { }
+#endif
+
+static void cmos_wake_setup(struct device *dev)
+{
+	if (acpi_disabled)
+		return;
+
+	use_acpi_alarm_quirks();
+
+	cmos_rtc.wake_on = rtc_wake_on;
+	cmos_rtc.wake_off = rtc_wake_off;
+
+	/* ACPI tables bug workaround. */
+	if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) {
+		dev_dbg(dev, "bogus FADT month_alarm (%d)\n",
+			acpi_gbl_FADT.month_alarm);
+		acpi_gbl_FADT.month_alarm = 0;
+	}
+
+	cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm;
+	cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm;
+	cmos_rtc.century = acpi_gbl_FADT.century;
+
+	if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE)
+		dev_info(dev, "RTC can wake from S4\n");
+
+	/* RTC always wakes from S1/S2/S3, and often S4/STD */
+	device_init_wakeup(dev, 1);
+}
+
+static void cmos_check_acpi_rtc_status(struct device *dev,
+					      unsigned char *rtc_control)
+{
+	struct cmos_rtc *cmos = dev_get_drvdata(dev);
+	acpi_event_status rtc_status;
+	acpi_status status;
+
+	if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC)
+		return;
+
+	status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status);
+	if (ACPI_FAILURE(status)) {
+		dev_err(dev, "Could not get RTC status\n");
+	} else if (rtc_status & ACPI_EVENT_FLAG_SET) {
+		unsigned char mask;
+		*rtc_control &= ~RTC_AIE;
+		CMOS_WRITE(*rtc_control, RTC_CONTROL);
+		mask = CMOS_READ(RTC_INTR_FLAGS);
+		rtc_update_irq(cmos->rtc, 1, mask);
+	}
+}
+
+#else /* !CONFIG_ACPI */
+
+static inline void rtc_wake_setup(struct device *dev)
+{
+}
+
+static inline void cmos_wake_setup(struct device *dev)
+{
+}
+
+static inline void cmos_check_acpi_rtc_status(struct device *dev,
+					      unsigned char *rtc_control)
+{
+}
+#endif /* CONFIG_ACPI */
 
 #ifdef	CONFIG_PNP
 #define	INITSECTION
@@ -1140,9 +1287,6 @@ static void cmos_check_wkalrm(struct device *dev)
 	}
 }
 
-static void cmos_check_acpi_rtc_status(struct device *dev,
-				       unsigned char *rtc_control);
-
 static int __maybe_unused cmos_resume(struct device *dev)
 {
 	struct cmos_rtc	*cmos = dev_get_drvdata(dev);
@@ -1209,156 +1353,6 @@ static SIMPLE_DEV_PM_OPS(cmos_pm_ops, cmos_suspend, cmos_resume);
  * predate even PNPBIOS should set up platform_bus devices.
  */
 
-#ifdef	CONFIG_ACPI
-
-#include <linux/acpi.h>
-
-static u32 rtc_handler(void *context)
-{
-	struct device *dev = context;
-	struct cmos_rtc *cmos = dev_get_drvdata(dev);
-	unsigned char rtc_control = 0;
-	unsigned char rtc_intr;
-	unsigned long flags;
-
-
-	/*
-	 * Always update rtc irq when ACPI is used as RTC Alarm.
-	 * Or else, ACPI SCI is enabled during suspend/resume only,
-	 * update rtc irq in that case.
-	 */
-	if (cmos_use_acpi_alarm())
-		cmos_interrupt(0, (void *)cmos->rtc);
-	else {
-		/* Fix me: can we use cmos_interrupt() here as well? */
-		spin_lock_irqsave(&rtc_lock, flags);
-		if (cmos_rtc.suspend_ctrl)
-			rtc_control = CMOS_READ(RTC_CONTROL);
-		if (rtc_control & RTC_AIE) {
-			cmos_rtc.suspend_ctrl &= ~RTC_AIE;
-			CMOS_WRITE(rtc_control, RTC_CONTROL);
-			rtc_intr = CMOS_READ(RTC_INTR_FLAGS);
-			rtc_update_irq(cmos->rtc, 1, rtc_intr);
-		}
-		spin_unlock_irqrestore(&rtc_lock, flags);
-	}
-
-	pm_wakeup_hard_event(dev);
-	acpi_clear_event(ACPI_EVENT_RTC);
-	acpi_disable_event(ACPI_EVENT_RTC, 0);
-	return ACPI_INTERRUPT_HANDLED;
-}
-
-static inline void rtc_wake_setup(struct device *dev)
-{
-	if (acpi_disabled)
-		return;
-
-	acpi_install_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler, dev);
-	/*
-	 * After the RTC handler is installed, the Fixed_RTC event should
-	 * be disabled. Only when the RTC alarm is set will it be enabled.
-	 */
-	acpi_clear_event(ACPI_EVENT_RTC);
-	acpi_disable_event(ACPI_EVENT_RTC, 0);
-}
-
-static void rtc_wake_on(struct device *dev)
-{
-	acpi_clear_event(ACPI_EVENT_RTC);
-	acpi_enable_event(ACPI_EVENT_RTC, 0);
-}
-
-static void rtc_wake_off(struct device *dev)
-{
-	acpi_disable_event(ACPI_EVENT_RTC, 0);
-}
-
-#ifdef CONFIG_X86
-/* Enable use_acpi_alarm mode for Intel platforms no earlier than 2015 */
-static void use_acpi_alarm_quirks(void)
-{
-	if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
-		return;
-
-	if (!is_hpet_enabled())
-		return;
-
-	if (dmi_get_bios_year() < 2015)
-		return;
-
-	use_acpi_alarm = true;
-}
-#else
-static inline void use_acpi_alarm_quirks(void) { }
-#endif
-
-static void cmos_wake_setup(struct device *dev)
-{
-	if (acpi_disabled)
-		return;
-
-	use_acpi_alarm_quirks();
-
-	cmos_rtc.wake_on = rtc_wake_on;
-	cmos_rtc.wake_off = rtc_wake_off;
-
-	/* ACPI tables bug workaround. */
-	if (acpi_gbl_FADT.month_alarm && !acpi_gbl_FADT.day_alarm) {
-		dev_dbg(dev, "bogus FADT month_alarm (%d)\n",
-			acpi_gbl_FADT.month_alarm);
-		acpi_gbl_FADT.month_alarm = 0;
-	}
-
-	cmos_rtc.day_alrm = acpi_gbl_FADT.day_alarm;
-	cmos_rtc.mon_alrm = acpi_gbl_FADT.month_alarm;
-	cmos_rtc.century = acpi_gbl_FADT.century;
-
-	if (acpi_gbl_FADT.flags & ACPI_FADT_S4_RTC_WAKE)
-		dev_info(dev, "RTC can wake from S4\n");
-
-	/* RTC always wakes from S1/S2/S3, and often S4/STD */
-	device_init_wakeup(dev, 1);
-}
-
-static void cmos_check_acpi_rtc_status(struct device *dev,
-				       unsigned char *rtc_control)
-{
-	struct cmos_rtc *cmos = dev_get_drvdata(dev);
-	acpi_event_status rtc_status;
-	acpi_status status;
-
-	if (acpi_gbl_FADT.flags & ACPI_FADT_FIXED_RTC)
-		return;
-
-	status = acpi_get_event_status(ACPI_EVENT_RTC, &rtc_status);
-	if (ACPI_FAILURE(status)) {
-		dev_err(dev, "Could not get RTC status\n");
-	} else if (rtc_status & ACPI_EVENT_FLAG_SET) {
-		unsigned char mask;
-		*rtc_control &= ~RTC_AIE;
-		CMOS_WRITE(*rtc_control, RTC_CONTROL);
-		mask = CMOS_READ(RTC_INTR_FLAGS);
-		rtc_update_irq(cmos->rtc, 1, mask);
-	}
-}
-
-#else
-
-static void cmos_wake_setup(struct device *dev)
-{
-}
-
-static void cmos_check_acpi_rtc_status(struct device *dev,
-				       unsigned char *rtc_control)
-{
-}
-
-static void rtc_wake_setup(struct device *dev)
-{
-}
-#endif
-
 #ifdef	CONFIG_PNP
 
 #include <linux/pnp.h>
-- 
GitLab


From d13e9ad9f5146f066a5c5a1cc993d09e4fb21ead Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Nov 2022 13:12:00 +0100
Subject: [PATCH 1555/4122] rtc: cmos: Rename ACPI-related functions

The names of rtc_wake_setup() and cmos_wake_setup() don't indicate
that these functions are ACPI-related, which is the case, and the
former doesn't really reflect the role of the function.

Rename them to acpi_rtc_event_setup() and acpi_cmos_wake_setup(),
respectively, to address this shortcoming.

No intentional functional impact.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/3225614.44csPzL39Z@kreacher
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 2a21d8281aa61..039486bfedf48 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -784,7 +784,7 @@ static u32 rtc_handler(void *context)
 	return ACPI_INTERRUPT_HANDLED;
 }
 
-static void rtc_wake_setup(struct device *dev)
+static void acpi_rtc_event_setup(struct device *dev)
 {
 	if (acpi_disabled)
 		return;
@@ -828,7 +828,7 @@ static void use_acpi_alarm_quirks(void)
 static inline void use_acpi_alarm_quirks(void) { }
 #endif
 
-static void cmos_wake_setup(struct device *dev)
+static void acpi_cmos_wake_setup(struct device *dev)
 {
 	if (acpi_disabled)
 		return;
@@ -880,11 +880,11 @@ static void cmos_check_acpi_rtc_status(struct device *dev,
 
 #else /* !CONFIG_ACPI */
 
-static inline void rtc_wake_setup(struct device *dev)
+static inline void acpi_rtc_event_setup(struct device *dev)
 {
 }
 
-static inline void cmos_wake_setup(struct device *dev)
+static inline void acpi_cmos_wake_setup(struct device *dev)
 {
 }
 
@@ -986,7 +986,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 			cmos_rtc.wake_off = info->wake_off;
 		}
 	} else {
-		cmos_wake_setup(dev);
+		acpi_cmos_wake_setup(dev);
 	}
 
 	if (cmos_rtc.day_alrm >= 128)
@@ -1091,7 +1091,7 @@ cmos_do_probe(struct device *dev, struct resource *ports, int rtc_irq)
 	 * the ACPI RTC fixed event.
 	 */
 	if (!info)
-		rtc_wake_setup(dev);
+		acpi_rtc_event_setup(dev);
 
 	dev_info(dev, "%s%s, %d bytes nvram%s\n",
 		 !is_valid_irq(rtc_irq) ? "no alarms" :
-- 
GitLab


From 83ebb7b3036d151ee39a4a752018665648fc3bd4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 9 Nov 2022 13:15:36 +0100
Subject: [PATCH 1556/4122] rtc: cmos: Disable ACPI RTC event on removal

Make cmos_do_remove() drop the ACPI RTC fixed event handler so as to
prevent it from operating on stale data in case the event triggers
after driver removal.

Fixes: 311ee9c151ad ("rtc: cmos: allow using ACPI for RTC alarm instead of HPET")
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/2224609.iZASKD2KPV@kreacher
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-cmos.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 039486bfedf48..00e2ca7374ecf 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -798,6 +798,14 @@ static void acpi_rtc_event_setup(struct device *dev)
 	acpi_disable_event(ACPI_EVENT_RTC, 0);
 }
 
+static void acpi_rtc_event_cleanup(void)
+{
+	if (acpi_disabled)
+		return;
+
+	acpi_remove_fixed_event_handler(ACPI_EVENT_RTC, rtc_handler);
+}
+
 static void rtc_wake_on(struct device *dev)
 {
 	acpi_clear_event(ACPI_EVENT_RTC);
@@ -884,6 +892,10 @@ static inline void acpi_rtc_event_setup(struct device *dev)
 {
 }
 
+static inline void acpi_rtc_event_cleanup(void)
+{
+}
+
 static inline void acpi_cmos_wake_setup(struct device *dev)
 {
 }
@@ -1138,6 +1150,9 @@ static void cmos_do_remove(struct device *dev)
 			hpet_unregister_irq_handler(cmos_interrupt);
 	}
 
+	if (!dev_get_platdata(dev))
+		acpi_rtc_event_cleanup();
+
 	cmos->rtc = NULL;
 
 	ports = cmos->iomem;
-- 
GitLab


From 0462681e207ccc44778a77b3297af728b1cf5b9f Mon Sep 17 00:00:00 2001
From: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Date: Sun, 6 Nov 2022 12:59:15 +0100
Subject: [PATCH 1557/4122] rtc: snvs: Allow a time difference on clock
 register read

On an iMX6ULL the following message appears when a wakealarm is set:

echo 0 > /sys/class/rtc/rtc1/wakealarm
rtc rtc1: Timeout trying to get valid LPSRT Counter read

This does not always happen but is reproducible quite often (7 out of 10
times). The problem appears because the iMX6ULL is not able to read the
registers within one 32kHz clock cycle which is the base clock of the
RTC. Therefore, this patch allows a difference of up to 320 cycles
(10ms). 10ms was chosen to be big enough even on systems with less cpu
power (e.g. iMX6ULL). According to the reference manual a difference is
fine:
- If the two consecutive reads are similar, the value is correct.
The values have to be similar, not equal.

Fixes: cd7f3a249dbe ("rtc: snvs: Add timeouts to avoid kernel lockups")
Reviewed-by: Francesco Dolcini <francesco.dolcini@toradex.com>
Signed-off-by: Stefan Eichenberger <stefan.eichenberger@toradex.com>
Signed-off-by: Francesco Dolcini <francesco@dolcini.it>
Link: https://lore.kernel.org/r/20221106115915.7930-1-francesco@dolcini.it
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-snvs.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index bd929b0e7d7de..d82acf1af1fae 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -32,6 +32,14 @@
 #define SNVS_LPPGDR_INIT	0x41736166
 #define CNTR_TO_SECS_SH		15
 
+/* The maximum RTC clock cycles that are allowed to pass between two
+ * consecutive clock counter register reads. If the values are corrupted a
+ * bigger difference is expected. The RTC frequency is 32kHz. With 320 cycles
+ * we end at 10ms which should be enough for most cases. If it once takes
+ * longer than expected we do a retry.
+ */
+#define MAX_RTC_READ_DIFF_CYCLES	320
+
 struct snvs_rtc_data {
 	struct rtc_device *rtc;
 	struct regmap *regmap;
@@ -56,6 +64,7 @@ static u64 rtc_read_lpsrt(struct snvs_rtc_data *data)
 static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
 {
 	u64 read1, read2;
+	s64 diff;
 	unsigned int timeout = 100;
 
 	/* As expected, the registers might update between the read of the LSB
@@ -66,7 +75,8 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
 	do {
 		read2 = read1;
 		read1 = rtc_read_lpsrt(data);
-	} while (read1 != read2 && --timeout);
+		diff = read1 - read2;
+	} while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout);
 	if (!timeout)
 		dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n");
 
@@ -78,13 +88,15 @@ static u32 rtc_read_lp_counter(struct snvs_rtc_data *data)
 static int rtc_read_lp_counter_lsb(struct snvs_rtc_data *data, u32 *lsb)
 {
 	u32 count1, count2;
+	s32 diff;
 	unsigned int timeout = 100;
 
 	regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1);
 	do {
 		count2 = count1;
 		regmap_read(data->regmap, data->offset + SNVS_LPSRTCLR, &count1);
-	} while (count1 != count2 && --timeout);
+		diff = count1 - count2;
+	} while (((diff < 0) || (diff > MAX_RTC_READ_DIFF_CYCLES)) && --timeout);
 	if (!timeout) {
 		dev_err(&data->rtc->dev, "Timeout trying to get valid LPSRT Counter read\n");
 		return -ETIMEDOUT;
-- 
GitLab


From 60da2d11fcbc043304910e4d2ca82f9bab953e63 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>
Date: Tue, 15 Nov 2022 18:07:47 +0100
Subject: [PATCH 1558/4122] RDMA/siw: Set defined status for work completion
 with undefined status

A malicious user may write undefined values into memory mapped completion
queue elements status or opcode. Undefined status or opcode values will
result in out-of-bounds access to an array mapping siw internal
representation of opcode and status to RDMA core representation when
reaping CQ elements. While siw detects those undefined values, it did not
correctly set completion status to a defined value, thus defeating the
whole purpose of the check.

This bug leads to the following Smatch static checker warning:

	drivers/infiniband/sw/siw/siw_cq.c:96 siw_reap_cqe()
	error: buffer overflow 'map_cqe_status' 10 <= 21

Fixes: bdf1da5df9da ("RDMA/siw: Fix immediate work request flush to completion queue")
Link: https://lore.kernel.org/r/20221115170747.1263298-1-bmt@zurich.ibm.com
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_cq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_cq.c b/drivers/infiniband/sw/siw/siw_cq.c
index acc7bcd538b53..403029de6b92d 100644
--- a/drivers/infiniband/sw/siw/siw_cq.c
+++ b/drivers/infiniband/sw/siw/siw_cq.c
@@ -88,9 +88,9 @@ int siw_reap_cqe(struct siw_cq *cq, struct ib_wc *wc)
 
 			if (opcode >= SIW_NUM_OPCODES) {
 				opcode = 0;
-				status = IB_WC_GENERAL_ERR;
+				status = SIW_WC_GENERAL_ERR;
 			} else if (status >= SIW_NUM_WC_STATUS) {
-				status = IB_WC_GENERAL_ERR;
+				status = SIW_WC_GENERAL_ERR;
 			}
 			wc->opcode = map_wc_opcode[opcode];
 			wc->status = map_cqe_status[status].ib;
-- 
GitLab


From 2d08a893b87cf9b2f9dbb3afaff60ca4530d55a2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:17:07 +0000
Subject: [PATCH 1559/4122] x86/debug: Include percpu.h in debugreg.h to get
 DECLARE_PER_CPU() et al
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include percpu.h to pick up the definition of DECLARE_PER_CPU() and
friends instead of relying on the parent to provide the #include.  E.g.
swapping the order of includes in arch/x86/kvm/vmx/nested.c (simulating
KVM code movement being done for other purposes) results in build errors:

  In file included from arch/x86/kvm/vmx/nested.c:3:
  arch/x86/include/asm/debugreg.h:9:32: error: unknown type name âcpu_dr7â=99
      9 | DECLARE_PER_CPU(unsigned long, cpu_dr7);
        |                                ^~~~~~~

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110201707.1976032-1-seanjc@google.com
---
 arch/x86/include/asm/debugreg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h
index cfdf307ddc012..b049d950612fd 100644
--- a/arch/x86/include/asm/debugreg.h
+++ b/arch/x86/include/asm/debugreg.h
@@ -2,8 +2,8 @@
 #ifndef _ASM_X86_DEBUGREG_H
 #define _ASM_X86_DEBUGREG_H
 
-
 #include <linux/bug.h>
+#include <linux/percpu.h>
 #include <uapi/asm/debugreg.h>
 
 DECLARE_PER_CPU(unsigned long, cpu_dr7);
-- 
GitLab


From 24c94060fc9b4e0f19e6e018869db46db21d6bc7 Mon Sep 17 00:00:00 2001
From: Brian Masney <bmasney@redhat.com>
Date: Mon, 14 Nov 2022 15:29:43 -0500
Subject: [PATCH 1560/4122] gpiolib: ensure that fwnode is properly set

Note that this is a RFC patch and not meant to be merged. I looked into
a problem with linux-next-20221110 on the Qualcomm SA8540P automotive
board (sc8280xp) where the UFS host controller would fail to probe due
to repeated probe deferrals when trying to get reset-gpios via
devm_gpiod_get_optional().

of_get_named_gpiod_flags() returns -EPROBE_DEFER, which is caused by
of_gpiochip_match_node_and_xlate() returning 0 since the of_xlate function
pointer is not set for the qcom,sc8280xp-tlmm pinctrl driver. The
pinctrl driver doesn't define one, so of_gpiochip_add() should
automatically setup of_gpio_simple_xlate() on it's behalf. This doesn't
happen since the fwnode member on the struct gpiochip is set to null
when of_gpiochip_add() is called. Let's work around this by ensuring
that it's set if available.

Note that this broke sometime within the last few weeks within
linux-next and I haven't bisected this. I'm posting this in the hopes
that someone may know offhand which patch(es) may have broken this.

Signed-off-by: Brian Masney <bmasney@redhat.com>
Tested-by: Marijn Suijten <marijn.suijten@somainline.org>
Tested-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Tested-by: Steev Klimaszewski <steev@kali.org> #Lenovo Thinkpad X13s
Tested-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 7936d54a2e302..51afdc6ac9198 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -679,7 +679,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	 * Assign fwnode depending on the result of the previous calls,
 	 * if none of them succeed, assign it to the parent's one.
 	 */
-	gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode;
+	gc->fwnode = gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode;
 
 	gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL);
 	if (gdev->id < 0) {
-- 
GitLab


From 40059212f99c31f26c69763e560325e59eac02c6 Mon Sep 17 00:00:00 2001
From: Nishanth Menon <nm@ti.com>
Date: Tue, 15 Nov 2022 17:10:21 -0600
Subject: [PATCH 1561/4122] dt-bindings: gpio: gpio-davinci: Increase maxItems
 in gpio-line-names

gpio-line-names really depends on ti,ngpios. However, the maximum value
we have seen across the board is on K2G and da850 platforms where it can
be upto 144.

Link: https://lore.kernel.org/linux-arm-kernel/20221115200357.qa2rvw3clbz7unzq@symptom/T/#u
Fixes: c830b87a761b ("dt-bindings: gpio: gpio-davinci: Convert to json-schema")
Reported-by: Robert Nelson <robertcnelson@gmail.com>
Signed-off-by: Nishanth Menon <nm@ti.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 Documentation/devicetree/bindings/gpio/gpio-davinci.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml
index f32e09ef937c0..10e56cf306dba 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-davinci.yaml
@@ -35,7 +35,7 @@ properties:
   gpio-line-names:
     description: strings describing the names of each gpio line.
     minItems: 1
-    maxItems: 100
+    maxItems: 144
 
   "#gpio-cells":
     const: 2
-- 
GitLab


From f5bc4428cc6e096170d35fc79a640d90e33dd9c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:09 -0800
Subject: [PATCH 1562/4122] perf stat: Clear screen only if output file is a
 tty

The --interval-clear option makes perf stat to clear the terminal at
each interval.  But it doesn't need to clear the screen when it saves
to a file.  Make it fail when it's enabled with the output options.

  $ perf stat -I 1 --interval-clear -o myfile true
  --interval-clear does not work with output

   Usage: perf stat [<options>] [<command>]

      -o, --output <file>   output file name
          --log-fd <n>      log output to fd, instead of stderr
          --interval-clear  clear screen in between new interval

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d5e1670bca204..1d79801f4e844 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -2403,6 +2403,14 @@ int cmd_stat(int argc, const char **argv)
 		}
 	}
 
+	if (stat_config.interval_clear && !isatty(fileno(output))) {
+		fprintf(stderr, "--interval-clear does not work with output\n");
+		parse_options_usage(stat_usage, stat_options, "o", 1);
+		parse_options_usage(NULL, stat_options, "log-fd", 0);
+		parse_options_usage(NULL, stat_options, "interval-clear", 0);
+		return -1;
+	}
+
 	stat_config.output = output;
 
 	/*
-- 
GitLab


From 31bf6aea997674a030be02999560d8e6f7a6b974 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:10 -0800
Subject: [PATCH 1563/4122] perf stat: Split print_running() function

To make the code more obvious and hopefully simpler, factor out the
code for each output mode - stdio, CSV, JSON.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 37 +++++++++++++++++++++++++---------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 2a3c1e0098b9a..281b811f8574d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -25,24 +25,41 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
-static void print_running(struct perf_stat_config *config,
-			  u64 run, u64 ena)
+static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena)
+{
+	if (run != ena)
+		fprintf(config->output, "  (%.2f%%)", 100.0 * run / ena);
+}
+
+static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena)
 {
+	double enabled_percent = 100;
+
+	if (run != ena)
+		enabled_percent = 100 * run / ena;
+	fprintf(config->output, "%s%" PRIu64 "%s%.2f",
+		config->csv_sep, run, config->csv_sep, enabled_percent);
+}
 
+static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena)
+{
 	double enabled_percent = 100;
 
 	if (run != ena)
 		enabled_percent = 100 * run / ena;
+	fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
+		run, enabled_percent);
+}
+
+static void print_running(struct perf_stat_config *config,
+			  u64 run, u64 ena)
+{
 	if (config->json_output)
-		fprintf(config->output,
-			"\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ",
-			run, enabled_percent);
+		print_running_json(config, run, ena);
 	else if (config->csv_output)
-		fprintf(config->output,
-			"%s%" PRIu64 "%s%.2f", config->csv_sep,
-			run, config->csv_sep, enabled_percent);
-	else if (run != ena)
-		fprintf(config->output, "  (%.2f%%)", 100.0 * run / ena);
+		print_running_csv(config, run, ena);
+	else
+		print_running_std(config, run, ena);
 }
 
 static void print_noise_pct(struct perf_stat_config *config,
-- 
GitLab


From def99d60df6f21b4c36d23f0071ec8a73f39f28b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:11 -0800
Subject: [PATCH 1564/4122] perf stat: Split print_noise_pct() function

Likewise, split print_noise_pct() for each output mode.  Although it's
a tiny function, more logic will be added soon so it'd be better split
it and treat it in the same way.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 27 +++++++++++++++++++++++----
 1 file changed, 23 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 281b811f8574d..a230f65efa624 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -62,17 +62,36 @@ static void print_running(struct perf_stat_config *config,
 		print_running_std(config, run, ena);
 }
 
+static void print_noise_pct_std(struct perf_stat_config *config,
+				double pct)
+{
+	if (pct)
+		fprintf(config->output, "  ( +-%6.2f%% )", pct);
+}
+
+static void print_noise_pct_csv(struct perf_stat_config *config,
+				double pct)
+{
+	fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
+}
+
+static void print_noise_pct_json(struct perf_stat_config *config,
+				 double pct)
+{
+	fprintf(config->output, "\"variance\" : %.2f, ", pct);
+}
+
 static void print_noise_pct(struct perf_stat_config *config,
 			    double total, double avg)
 {
 	double pct = rel_stddev_stats(total, avg);
 
 	if (config->json_output)
-		fprintf(config->output, "\"variance\" : %.2f, ", pct);
+		print_noise_pct_json(config, pct);
 	else if (config->csv_output)
-		fprintf(config->output, "%s%.2f%%", config->csv_sep, pct);
-	else if (pct)
-		fprintf(config->output, "  ( +-%6.2f%% )", pct);
+		print_noise_pct_csv(config, pct);
+	else
+		print_noise_pct_std(config, pct);
 }
 
 static void print_noise(struct perf_stat_config *config,
-- 
GitLab


From 41cb875242e71bf1bf5539a724bc65a0b470bb12 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:12 -0800
Subject: [PATCH 1565/4122] perf stat: Split print_cgroup() function

Likewise, split print_cgroup() for each output mode.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a230f65efa624..af2a561eb20c2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -106,15 +106,32 @@ static void print_noise(struct perf_stat_config *config,
 	print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
 }
 
+static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
+{
+	fprintf(config->output, " %s", cgrp_name);
+}
+
+static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name)
+{
+	fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+}
+
+static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name)
+{
+	fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+}
+
 static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
 {
 	if (nr_cgroups) {
 		const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name  : "";
 
 		if (config->json_output)
-			fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
+			print_cgroup_json(config, cgrp_name);
+		if (config->csv_output)
+			print_cgroup_csv(config, cgrp_name);
 		else
-			fprintf(config->output, "%s%s", config->csv_sep, cgrp_name);
+			print_cgroup_std(config, cgrp_name);
 	}
 }
 
-- 
GitLab


From 33b2e2c2ad68489bc0c97660ef1e6bf7df0c9d7d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:13 -0800
Subject: [PATCH 1566/4122] perf stat: Split aggr_printout() function

The aggr_printout() function is to print aggr_id and count (nr).
Split it for each output mode to simplify the code.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 220 ++++++++++++++++++---------------
 1 file changed, 121 insertions(+), 99 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index af2a561eb20c2..ed421f6d512f4 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -135,124 +135,135 @@ static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
 	}
 }
 
-
-static void aggr_printout(struct perf_stat_config *config,
-			  struct evsel *evsel, struct aggr_cpu_id id, int nr)
+static void print_aggr_id_std(struct perf_stat_config *config,
+			      struct evsel *evsel, struct aggr_cpu_id id, int nr)
 {
+	FILE *output = config->output;
 
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+		fprintf(output, "S%d-D%d-C%*d %*d ",
+			id.socket, id.die, -8, id.core, 4, nr);
+		break;
+	case AGGR_DIE:
+		fprintf(output, "S%d-D%*d %*d ",
+			id.socket, -8, id.die, 4, nr);
+		break;
+	case AGGR_SOCKET:
+		fprintf(output, "S%*d %*d ",
+			-5, id.socket, 4, nr);
+		break;
+	case AGGR_NODE:
+		fprintf(output, "N%*d %*d ",
+			-5, id.node, 4, nr);
+		break;
+	case AGGR_NONE:
+		if (evsel->percore && !config->percore_show_thread) {
+			fprintf(output, "S%d-D%d-C%*d ",
+				id.socket, id.die, -3, id.core);
+		} else if (id.cpu.cpu > -1) {
+			fprintf(output, "CPU%*d ",
+				-7, id.cpu.cpu);
+		}
+		break;
+	case AGGR_THREAD:
+		fprintf(output, "%*s-%*d ",
+			16, perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+			-8, perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	case AGGR_MAX:
+	default:
+		break;
+	}
+}
 
-	if (config->json_output && !config->interval)
-		fprintf(config->output, "{");
+static void print_aggr_id_csv(struct perf_stat_config *config,
+			      struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+	FILE *output = config->output;
+	const char *sep = config->csv_sep;
 
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
-		if (config->json_output) {
-			fprintf(config->output,
-				"\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
-				id.socket,
-				id.die,
-				id.core,
-				nr);
-		} else {
-			fprintf(config->output, "S%d-D%d-C%*d%s%*d%s",
-				id.socket,
-				id.die,
-				config->csv_output ? 0 : -8,
-				id.core,
-				config->csv_sep,
-				config->csv_output ? 0 : 4,
-				nr,
-				config->csv_sep);
-		}
+		fprintf(output, "S%d-D%d-C%d%s%d%s",
+			id.socket, id.die, id.core, sep, nr, sep);
 		break;
 	case AGGR_DIE:
-		if (config->json_output) {
-			fprintf(config->output,
-				"\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
-				id.socket,
-				id.die,
-				nr);
-		} else {
-			fprintf(config->output, "S%d-D%*d%s%*d%s",
-				id.socket,
-				config->csv_output ? 0 : -8,
-				id.die,
-				config->csv_sep,
-				config->csv_output ? 0 : 4,
-				nr,
-				config->csv_sep);
-		}
+		fprintf(output, "S%d-D%d%s%d%s",
+			id.socket, id.die, sep, nr, sep);
 		break;
 	case AGGR_SOCKET:
-		if (config->json_output) {
-			fprintf(config->output,
-				"\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
-				id.socket,
-				nr);
-		} else {
-			fprintf(config->output, "S%*d%s%*d%s",
-				config->csv_output ? 0 : -5,
-				id.socket,
-				config->csv_sep,
-				config->csv_output ? 0 : 4,
-				nr,
-				config->csv_sep);
-		}
+		fprintf(output, "S%d%s%d%s",
+			id.socket, sep, nr, sep);
 		break;
 	case AGGR_NODE:
-		if (config->json_output) {
-			fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
-				id.node,
-				nr);
-		} else {
-			fprintf(config->output, "N%*d%s%*d%s",
-				config->csv_output ? 0 : -5,
-				id.node,
-				config->csv_sep,
-				config->csv_output ? 0 : 4,
-				nr,
-				config->csv_sep);
-		}
+		fprintf(output, "N%d%s%d%s",
+			id.node, sep, nr, sep);
 		break;
 	case AGGR_NONE:
-		if (config->json_output) {
-			if (evsel->percore && !config->percore_show_thread) {
-				fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"",
-					id.socket,
-					id.die,
-					id.core);
-			} else if (id.cpu.cpu > -1) {
-				fprintf(config->output, "\"cpu\" : \"%d\", ",
-					id.cpu.cpu);
-			}
-		} else {
-			if (evsel->percore && !config->percore_show_thread) {
-				fprintf(config->output, "S%d-D%d-C%*d%s",
-					id.socket,
-					id.die,
-					config->csv_output ? 0 : -3,
-					id.core, config->csv_sep);
-			} else if (id.cpu.cpu > -1) {
-				fprintf(config->output, "CPU%*d%s",
-					config->csv_output ? 0 : -7,
-					id.cpu.cpu, config->csv_sep);
-			}
+		if (evsel->percore && !config->percore_show_thread) {
+			fprintf(output, "S%d-D%d-C%d%s",
+				id.socket, id.die, id.core, sep);
+		} else if (id.cpu.cpu > -1) {
+			fprintf(output, "CPU%d%s",
+				id.cpu.cpu, sep);
 		}
 		break;
 	case AGGR_THREAD:
-		if (config->json_output) {
-			fprintf(config->output, "\"thread\" : \"%s-%d\", ",
-				perf_thread_map__comm(evsel->core.threads, id.thread_idx),
-				perf_thread_map__pid(evsel->core.threads, id.thread_idx));
-		} else {
-			fprintf(config->output, "%*s-%*d%s",
-				config->csv_output ? 0 : 16,
-				perf_thread_map__comm(evsel->core.threads, id.thread_idx),
-				config->csv_output ? 0 : -8,
-				perf_thread_map__pid(evsel->core.threads, id.thread_idx),
-				config->csv_sep);
+		fprintf(output, "%s-%d%s",
+			perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+			perf_thread_map__pid(evsel->core.threads, id.thread_idx),
+			sep);
+		break;
+	case AGGR_GLOBAL:
+	case AGGR_UNSET:
+	case AGGR_MAX:
+	default:
+		break;
+	}
+}
+
+static void print_aggr_id_json(struct perf_stat_config *config,
+			       struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+	FILE *output = config->output;
+
+	if (!config->interval)
+		fputc('{', output);
+
+	switch (config->aggr_mode) {
+	case AGGR_CORE:
+		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
+			id.socket, id.die, id.core, nr);
+		break;
+	case AGGR_DIE:
+		fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
+			id.socket, id.die, nr);
+		break;
+	case AGGR_SOCKET:
+		fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
+			id.socket, nr);
+		break;
+	case AGGR_NODE:
+		fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
+			id.node, nr);
+		break;
+	case AGGR_NONE:
+		if (evsel->percore && !config->percore_show_thread) {
+			fprintf(output, "\"core\" : \"S%d-D%d-C%d\"",
+				id.socket, id.die, id.core);
+		} else if (id.cpu.cpu > -1) {
+			fprintf(output, "\"cpu\" : \"%d\", ",
+				id.cpu.cpu);
 		}
 		break;
+	case AGGR_THREAD:
+		fprintf(output, "\"thread\" : \"%s-%d\", ",
+			perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+			perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+		break;
 	case AGGR_GLOBAL:
 	case AGGR_UNSET:
 	case AGGR_MAX:
@@ -261,6 +272,17 @@ static void aggr_printout(struct perf_stat_config *config,
 	}
 }
 
+static void aggr_printout(struct perf_stat_config *config,
+			  struct evsel *evsel, struct aggr_cpu_id id, int nr)
+{
+	if (config->json_output)
+		print_aggr_id_json(config, evsel, id, nr);
+	else if (config->csv_output)
+		print_aggr_id_csv(config, evsel, id, nr);
+	else
+		print_aggr_id_std(config, evsel, id, nr);
+}
+
 struct outstate {
 	FILE *fh;
 	bool newline;
-- 
GitLab


From c2019f844eacd06decc435bdcee9eab1e1a6f4ec Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:14 -0800
Subject: [PATCH 1567/4122] perf stat: Factor out print_counter_value()
 function

And split it for each output mode like others.  I believe it makes the
code simpler and more intuitive.  Now abs_printout() becomes just to
call sub-functions.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 81 ++++++++++++++++++++++------------
 1 file changed, 53 insertions(+), 28 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ed421f6d512f4..a72c7442ff3dd 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -517,46 +517,71 @@ static void print_metric_header(struct perf_stat_config *config,
 		fprintf(os->fh, "%*s ", config->metric_only_len, unit);
 }
 
-static void abs_printout(struct perf_stat_config *config,
-			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
+static void print_counter_value_std(struct perf_stat_config *config,
+				    struct evsel *evsel, double avg)
 {
 	FILE *output = config->output;
 	double sc =  evsel->scale;
 	const char *fmt;
 
-	if (config->csv_output) {
-		fmt = floor(sc) != sc ?  "%.2f%s" : "%.0f%s";
-	} else {
-		if (config->big_num)
-			fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s";
-		else
-			fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s";
-	}
+	if (config->big_num)
+		fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f ";
+	else
+		fmt = floor(sc) != sc ? "%18.2f " : "%18.0f ";
 
-	aggr_printout(config, evsel, id, nr);
+	fprintf(output, fmt, avg);
 
-	if (config->json_output)
-		fprintf(output, "\"counter-value\" : \"%f\", ", avg);
-	else
-		fprintf(output, fmt, avg, config->csv_sep);
+	if (evsel->unit)
+		fprintf(output, "%-*s ", config->unit_width, evsel->unit);
 
-	if (config->json_output) {
-		if (evsel->unit) {
-			fprintf(output, "\"unit\" : \"%s\", ",
-				evsel->unit);
-		}
-	} else {
-		if (evsel->unit)
-			fprintf(output, "%-*s%s",
-				config->csv_output ? 0 : config->unit_width,
-				evsel->unit, config->csv_sep);
-	}
+	fprintf(output, "%-*s", 32, evsel__name(evsel));
+}
 
+static void print_counter_value_csv(struct perf_stat_config *config,
+				    struct evsel *evsel, double avg)
+{
+	FILE *output = config->output;
+	double sc =  evsel->scale;
+	const char *sep = config->csv_sep;
+	const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+
+	fprintf(output, fmt, avg, sep);
+
+	if (evsel->unit)
+		fprintf(output, "%s%s", evsel->unit, sep);
+
+	fprintf(output, "%s", evsel__name(evsel));
+}
+
+static void print_counter_value_json(struct perf_stat_config *config,
+				     struct evsel *evsel, double avg)
+{
+	FILE *output = config->output;
+
+	fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+
+	if (evsel->unit)
+		fprintf(output, "\"unit\" : \"%s\", ", evsel->unit);
+
+	fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+}
+
+static void print_counter_value(struct perf_stat_config *config,
+				struct evsel *evsel, double avg)
+{
 	if (config->json_output)
-		fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel));
+		print_counter_value_json(config, evsel, avg);
+	else if (config->csv_output)
+		print_counter_value_csv(config, evsel, avg);
 	else
-		fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel));
+		print_counter_value_std(config, evsel, avg);
+}
 
+static void abs_printout(struct perf_stat_config *config,
+			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
+{
+	aggr_printout(config, evsel, id, nr);
+	print_counter_value(config, evsel, avg);
 	print_cgroup(config, evsel);
 }
 
-- 
GitLab


From d6aeb861b1fb26f55c1b5be4b6e900ad1e734516 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:15 -0800
Subject: [PATCH 1568/4122] perf stat: Handle bad events in abs_printout()

In the printout() function, it checks if the event is bad (i.e. not
counted or not supported) and print the result.  But it does the same
what abs_printout() is doing.  So add an argument to indicate the value
is ok or not and use the same function in both cases.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 68 ++++++++++++++--------------------
 1 file changed, 27 insertions(+), 41 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index a72c7442ff3dd..fe54838932894 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -518,18 +518,22 @@ static void print_metric_header(struct perf_stat_config *config,
 }
 
 static void print_counter_value_std(struct perf_stat_config *config,
-				    struct evsel *evsel, double avg)
+				    struct evsel *evsel, double avg, bool ok)
 {
 	FILE *output = config->output;
 	double sc =  evsel->scale;
 	const char *fmt;
+	const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
 
 	if (config->big_num)
 		fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f ";
 	else
 		fmt = floor(sc) != sc ? "%18.2f " : "%18.0f ";
 
-	fprintf(output, fmt, avg);
+	if (ok)
+		fprintf(output, fmt, avg);
+	else
+		fprintf(output, "%18s ", bad_count);
 
 	if (evsel->unit)
 		fprintf(output, "%-*s ", config->unit_width, evsel->unit);
@@ -538,14 +542,18 @@ static void print_counter_value_std(struct perf_stat_config *config,
 }
 
 static void print_counter_value_csv(struct perf_stat_config *config,
-				    struct evsel *evsel, double avg)
+				    struct evsel *evsel, double avg, bool ok)
 {
 	FILE *output = config->output;
 	double sc =  evsel->scale;
 	const char *sep = config->csv_sep;
 	const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s";
+	const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
 
-	fprintf(output, fmt, avg, sep);
+	if (ok)
+		fprintf(output, fmt, avg, sep);
+	else
+		fprintf(output, "%s%s", bad_count, sep);
 
 	if (evsel->unit)
 		fprintf(output, "%s%s", evsel->unit, sep);
@@ -554,11 +562,15 @@ static void print_counter_value_csv(struct perf_stat_config *config,
 }
 
 static void print_counter_value_json(struct perf_stat_config *config,
-				     struct evsel *evsel, double avg)
+				     struct evsel *evsel, double avg, bool ok)
 {
 	FILE *output = config->output;
+	const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
 
-	fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+	if (ok)
+		fprintf(output, "\"counter-value\" : \"%f\", ", avg);
+	else
+		fprintf(output, "\"counter-value\" : \"%s\", ", bad_count);
 
 	if (evsel->unit)
 		fprintf(output, "\"unit\" : \"%s\", ", evsel->unit);
@@ -567,21 +579,22 @@ static void print_counter_value_json(struct perf_stat_config *config,
 }
 
 static void print_counter_value(struct perf_stat_config *config,
-				struct evsel *evsel, double avg)
+				struct evsel *evsel, double avg, bool ok)
 {
 	if (config->json_output)
-		print_counter_value_json(config, evsel, avg);
+		print_counter_value_json(config, evsel, avg, ok);
 	else if (config->csv_output)
-		print_counter_value_csv(config, evsel, avg);
+		print_counter_value_csv(config, evsel, avg, ok);
 	else
-		print_counter_value_std(config, evsel, avg);
+		print_counter_value_std(config, evsel, avg, ok);
 }
 
 static void abs_printout(struct perf_stat_config *config,
-			 struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg)
+			 struct aggr_cpu_id id, int nr,
+			 struct evsel *evsel, double avg, bool ok)
 {
 	aggr_printout(config, evsel, id, nr);
-	print_counter_value(config, evsel, avg);
+	print_counter_value(config, evsel, avg, ok);
 	print_cgroup(config, evsel);
 }
 
@@ -658,17 +671,8 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 			pm(config, &os, NULL, "", "", 0);
 			return;
 		}
-		aggr_printout(config, counter, id, nr);
 
-		if (config->json_output) {
-			fprintf(config->output, "\"counter-value\" : \"%s\", ",
-					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED);
-		} else {
-			fprintf(config->output, "%*s%s",
-				config->csv_output ? 0 : 18,
-				counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
-				config->csv_sep);
-		}
+		abs_printout(config, id, nr, counter, uval, /*ok=*/false);
 
 		if (counter->supported) {
 			if (!evlist__has_hybrid(counter->evlist)) {
@@ -678,24 +682,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 			}
 		}
 
-		if (config->json_output) {
-			fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit);
-		} else {
-			fprintf(config->output, "%-*s%s",
-				config->csv_output ? 0 : config->unit_width,
-				counter->unit, config->csv_sep);
-		}
-
-		if (config->json_output) {
-			fprintf(config->output, "\"event\" : \"%s\", ",
-				evsel__name(counter));
-		} else {
-			fprintf(config->output, "%*s",
-				 config->csv_output ? 0 : -25, evsel__name(counter));
-		}
-
-		print_cgroup(config, counter);
-
 		if (!config->csv_output && !config->json_output)
 			pm(config, &os, NULL, NULL, "", 0);
 		print_noise(config, counter, noise);
@@ -706,7 +692,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 	}
 
 	if (!config->metric_only)
-		abs_printout(config, id, nr, counter, uval);
+		abs_printout(config, id, nr, counter, uval, /*ok=*/true);
 
 	out.print_metric = pm;
 	out.new_line = nl;
-- 
GitLab


From df46a3c92b510b7776fed14728a271f8836ef19b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:16 -0800
Subject: [PATCH 1569/4122] perf stat: Add before_metric argument

Unfortunately, event running time, percentage and noise data are printed
in different positions in normal output than CSV/JSON.  I think it's
better to put such details in where it actually prints.

So add before_metric argument to print_noise() and print_running() and
call them twice before and after the metric.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 82 +++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index fe54838932894..bf3f2f9d5deec 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -52,14 +52,18 @@ static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena
 }
 
 static void print_running(struct perf_stat_config *config,
-			  u64 run, u64 ena)
+			  u64 run, u64 ena, bool before_metric)
 {
-	if (config->json_output)
-		print_running_json(config, run, ena);
-	else if (config->csv_output)
-		print_running_csv(config, run, ena);
-	else
-		print_running_std(config, run, ena);
+	if (config->json_output) {
+		if (before_metric)
+			print_running_json(config, run, ena);
+	} else if (config->csv_output) {
+		if (before_metric)
+			print_running_csv(config, run, ena);
+	} else {
+		if (!before_metric)
+			print_running_std(config, run, ena);
+	}
 }
 
 static void print_noise_pct_std(struct perf_stat_config *config,
@@ -82,20 +86,24 @@ static void print_noise_pct_json(struct perf_stat_config *config,
 }
 
 static void print_noise_pct(struct perf_stat_config *config,
-			    double total, double avg)
+			    double total, double avg, bool before_metric)
 {
 	double pct = rel_stddev_stats(total, avg);
 
-	if (config->json_output)
-		print_noise_pct_json(config, pct);
-	else if (config->csv_output)
-		print_noise_pct_csv(config, pct);
-	else
-		print_noise_pct_std(config, pct);
+	if (config->json_output) {
+		if (before_metric)
+			print_noise_pct_json(config, pct);
+	} else if (config->csv_output) {
+		if (before_metric)
+			print_noise_pct_csv(config, pct);
+	} else {
+		if (!before_metric)
+			print_noise_pct_std(config, pct);
+	}
 }
 
 static void print_noise(struct perf_stat_config *config,
-			struct evsel *evsel, double avg)
+			struct evsel *evsel, double avg, bool before_metric)
 {
 	struct perf_stat_evsel *ps;
 
@@ -103,7 +111,7 @@ static void print_noise(struct perf_stat_config *config,
 		return;
 
 	ps = evsel->stats;
-	print_noise_pct(config, stddev_stats(&ps->res_stats), avg);
+	print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric);
 }
 
 static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
@@ -637,6 +645,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 	};
 	print_metric_t pm;
 	new_line_t nl;
+	bool ok = true;
 
 	if (config->csv_output) {
 		static const int aggr_fields[AGGR_MAX] = {
@@ -672,7 +681,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 			return;
 		}
 
-		abs_printout(config, id, nr, counter, uval, /*ok=*/false);
+		ok = false;
 
 		if (counter->supported) {
 			if (!evlist__has_hybrid(counter->evlist)) {
@@ -681,37 +690,30 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 					config->print_mixed_hw_group_error = 1;
 			}
 		}
-
-		if (!config->csv_output && !config->json_output)
-			pm(config, &os, NULL, NULL, "", 0);
-		print_noise(config, counter, noise);
-		print_running(config, run, ena);
-		if (config->csv_output || config->json_output)
-			pm(config, &os, NULL, NULL, "", 0);
-		return;
 	}
 
-	if (!config->metric_only)
-		abs_printout(config, id, nr, counter, uval, /*ok=*/true);
-
 	out.print_metric = pm;
 	out.new_line = nl;
 	out.ctx = &os;
 	out.force_header = false;
 
-	if (config->csv_output && !config->metric_only) {
-		print_noise(config, counter, noise);
-		print_running(config, run, ena);
-	} else if (config->json_output && !config->metric_only) {
-		print_noise(config, counter, noise);
-		print_running(config, run, ena);
+	if (!config->metric_only) {
+		abs_printout(config, id, nr, counter, uval, ok);
+
+		print_noise(config, counter, noise, /*before_metric=*/true);
+		print_running(config, run, ena, /*before_metric=*/true);
+	}
+
+	if (ok) {
+		perf_stat__print_shadow_stats(config, counter, uval, map_idx,
+					      &out, &config->metric_events, st);
+	} else {
+		pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
 	}
 
-	perf_stat__print_shadow_stats(config, counter, uval, map_idx,
-				&out, &config->metric_events, st);
-	if (!config->csv_output && !config->metric_only && !config->json_output) {
-		print_noise(config, counter, noise);
-		print_running(config, run, ena);
+	if (!config->metric_only) {
+		print_noise(config, counter, noise, /*before_metric=*/false);
+		print_running(config, run, ena, /*before_metric=*/false);
 	}
 }
 
@@ -1151,7 +1153,7 @@ static void print_footer(struct perf_stat_config *config)
 		fprintf(output, " %17.*f +- %.*f seconds time elapsed",
 			precision, avg, precision, sd);
 
-		print_noise_pct(config, sd, avg);
+		print_noise_pct(config, sd, avg, /*before_metric=*/false);
 	}
 	fprintf(output, "\n\n");
 
-- 
GitLab


From 8d500292bd55c05130c96f6e84cc3e6ba3ebed99 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:17 -0800
Subject: [PATCH 1570/4122] perf stat: Align cgroup names

We don't know how long cgroup name is, but at least we can align short
ones like below.

  $ perf stat -a --for-each-cgroup system.slice,user.slice true

   Performance counter stats for 'system wide':

           0.13 msec cpu-clock         system.slice  #    0.010 CPUs utilized
              4      context-switches  system.slice  #   31.989 K/sec
              1      cpu-migrations    system.slice  #    7.997 K/sec
              0      page-faults       system.slice  #    0.000 /sec
        450,673      cycles            system.slice  #    3.604 GHz             (92.41%)
        161,216      instructions      system.slice  #    0.36  insn per cycle  (92.41%)
         32,678      branches          system.slice  #  261.332 M/sec           (92.41%)
          2,628      branch-misses     system.slice  #    8.04% of all branches (92.41%)
          14.29 msec cpu-clock         user.slice    #    1.163 CPUs utilized
             35      context-switches  user.slice    #    2.449 K/sec
             12      cpu-migrations    user.slice    #  839.691 /sec
             57      page-faults       user.slice    #    3.989 K/sec
     49,683,026      cycles            user.slice    #    3.477 GHz             (99.38%)
    110,790,266      instructions      user.slice    #    2.23  insn per cycle  (99.38%)
     24,552,255      branches          user.slice    #    1.718 G/sec           (99.38%)
        127,779      branch-misses     user.slice    #    0.52% of all branches (99.38%)

    0.012289431 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bf3f2f9d5deec..e66f766a3d786 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -116,7 +116,7 @@ static void print_noise(struct perf_stat_config *config,
 
 static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
 {
-	fprintf(config->output, " %s", cgrp_name);
+	fprintf(config->output, " %-16s", cgrp_name);
 }
 
 static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name)
-- 
GitLab


From b2d9832e00a0d93ad6127fa8ccf2b8e0cfe67397 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:18 -0800
Subject: [PATCH 1571/4122] perf stat: Split print_metric_headers() function

The print_metric_headers() shows metric headers a little bit for each
mode.  Split it out to make the code clearer.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 52 ++++++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index e66f766a3d786..bb2791459f5f0 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -924,6 +924,37 @@ static const char *aggr_header_csv[] = {
 	[AGGR_GLOBAL] 	=	""
 };
 
+static void print_metric_headers_std(struct perf_stat_config *config,
+				     const char *prefix, bool no_indent)
+{
+	if (prefix)
+		fprintf(config->output, "%s", prefix);
+	if (!no_indent) {
+		fprintf(config->output, "%*s",
+			aggr_header_lens[config->aggr_mode], "");
+	}
+}
+
+static void print_metric_headers_csv(struct perf_stat_config *config,
+				     const char *prefix,
+				     bool no_indent __maybe_unused)
+{
+	if (prefix)
+		fprintf(config->output, "%s", prefix);
+	if (config->interval)
+		fputs("time,", config->output);
+	if (!config->iostat_run)
+		fputs(aggr_header_csv[config->aggr_mode], config->output);
+}
+
+static void print_metric_headers_json(struct perf_stat_config *config,
+				      const char *prefix __maybe_unused,
+				      bool no_indent __maybe_unused)
+{
+	if (config->interval)
+		fputs("{\"unit\" : \"sec\"}", config->output);
+}
+
 static void print_metric_headers(struct perf_stat_config *config,
 				 struct evlist *evlist,
 				 const char *prefix, bool no_indent)
@@ -939,22 +970,13 @@ static void print_metric_headers(struct perf_stat_config *config,
 		.force_header = true,
 	};
 
-	if (prefix && !config->json_output)
-		fprintf(config->output, "%s", prefix);
+	if (config->json_output)
+		print_metric_headers_json(config, prefix, no_indent);
+	else if (config->csv_output)
+		print_metric_headers_csv(config, prefix, no_indent);
+	else
+		print_metric_headers_std(config, prefix, no_indent);
 
-	if (!config->csv_output && !config->json_output && !no_indent)
-		fprintf(config->output, "%*s",
-			aggr_header_lens[config->aggr_mode], "");
-	if (config->csv_output) {
-		if (config->interval)
-			fputs("time,", config->output);
-		if (!config->iostat_run)
-			fputs(aggr_header_csv[config->aggr_mode], config->output);
-	}
-	if (config->json_output) {
-		if (config->interval)
-			fputs("{\"unit\" : \"sec\"}", config->output);
-	}
 	if (config->iostat_run)
 		iostat_print_header_prefix(config);
 
-- 
GitLab


From 208cbcd21bf57c80ceaf90fcec16cfdfb55a0a4d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:19 -0800
Subject: [PATCH 1572/4122] perf stat: Factor out prepare_interval()

This logic does not print the time directly, but it just puts the
timestamp in the buffer as a prefix.  To reduce the confusion, factor
out the code into a separate function.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 39 +++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bb2791459f5f0..c234be656db93 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -993,9 +993,25 @@ static void print_metric_headers(struct perf_stat_config *config,
 	fputc('\n', config->output);
 }
 
+static void prepare_interval(struct perf_stat_config *config,
+			     char *prefix, struct timespec *ts)
+{
+	if (config->iostat_run)
+		return;
+
+	if (!config->json_output)
+		sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec,
+				 ts->tv_nsec, config->csv_sep);
+	else if (!config->metric_only)
+		sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
+				 ts->tv_sec, ts->tv_nsec);
+	else
+		sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
+				 ts->tv_sec, ts->tv_nsec);
+}
+
 static void print_interval(struct perf_stat_config *config,
-			   struct evlist *evlist,
-			   char *prefix, struct timespec *ts)
+			   struct evlist *evlist)
 {
 	bool metric_only = config->metric_only;
 	unsigned int unit_width = config->unit_width;
@@ -1005,16 +1021,6 @@ static void print_interval(struct perf_stat_config *config,
 	if (config->interval_clear && isatty(fileno(output)))
 		puts(CONSOLE_CLEAR);
 
-	if (!config->iostat_run && !config->json_output)
-		sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec,
-				 ts->tv_nsec, config->csv_sep);
-	if (!config->iostat_run && config->json_output && !config->metric_only)
-		sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
-				 ts->tv_sec, ts->tv_nsec);
-	if (!config->iostat_run && config->json_output && config->metric_only)
-		sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
-				 ts->tv_sec, ts->tv_nsec);
-
 	if ((num_print_interval == 0 || config->interval_clear) &&
 			!config->csv_output && !config->json_output) {
 		switch (config->aggr_mode) {
@@ -1252,10 +1258,13 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	if (config->iostat_run)
 		evlist->selected = evlist__first(evlist);
 
-	if (interval)
-		print_interval(config, evlist, prefix = buf, ts);
-	else
+	if (interval) {
+		prefix = buf;
+		prepare_interval(config, prefix, ts);
+		print_interval(config, evlist);
+	} else {
 		print_header(config, _target, argc, argv);
+	}
 
 	if (metric_only) {
 		static int num_print_iv;
-- 
GitLab


From 33c4ed47990f3eaaa732d3fcc2760e6165c16c40 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:20 -0800
Subject: [PATCH 1573/4122] perf stat: Cleanup interval print alignment

Instead of using magic values, define symbolic constants and use them.
Also add aggr_header_std[] array to simplify aggr_mode handling.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-13-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 165 ++++++++++++++++++---------------
 1 file changed, 91 insertions(+), 74 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index c234be656db93..f983432aaddde 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -25,6 +25,45 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
+#define METRIC_LEN   38
+#define EVNAME_LEN   32
+#define COUNTS_LEN   18
+#define INTERVAL_LEN 16
+#define CGROUP_LEN   16
+#define COMM_LEN     16
+#define PID_LEN       7
+#define CPUS_LEN      4
+
+static int aggr_header_lens[] = {
+	[AGGR_CORE] 	= 18,
+	[AGGR_DIE] 	= 12,
+	[AGGR_SOCKET] 	= 6,
+	[AGGR_NODE] 	= 6,
+	[AGGR_NONE] 	= 6,
+	[AGGR_THREAD] 	= 16,
+	[AGGR_GLOBAL] 	= 0,
+};
+
+static const char *aggr_header_csv[] = {
+	[AGGR_CORE] 	= 	"core,cpus,",
+	[AGGR_DIE] 	= 	"die,cpus,",
+	[AGGR_SOCKET] 	= 	"socket,cpus,",
+	[AGGR_NONE] 	= 	"cpu,",
+	[AGGR_THREAD] 	= 	"comm-pid,",
+	[AGGR_NODE] 	= 	"node,",
+	[AGGR_GLOBAL] 	=	""
+};
+
+static const char *aggr_header_std[] = {
+	[AGGR_CORE] 	= 	"core",
+	[AGGR_DIE] 	= 	"die",
+	[AGGR_SOCKET] 	= 	"socket",
+	[AGGR_NONE] 	= 	"cpu",
+	[AGGR_THREAD] 	= 	"comm-pid",
+	[AGGR_NODE] 	= 	"node",
+	[AGGR_GLOBAL] 	=	""
+};
+
 static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena)
 {
 	if (run != ena)
@@ -116,7 +155,7 @@ static void print_noise(struct perf_stat_config *config,
 
 static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name)
 {
-	fprintf(config->output, " %-16s", cgrp_name);
+	fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name);
 }
 
 static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name)
@@ -147,44 +186,46 @@ static void print_aggr_id_std(struct perf_stat_config *config,
 			      struct evsel *evsel, struct aggr_cpu_id id, int nr)
 {
 	FILE *output = config->output;
+	int idx = config->aggr_mode;
+	char buf[128];
 
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
-		fprintf(output, "S%d-D%d-C%*d %*d ",
-			id.socket, id.die, -8, id.core, 4, nr);
+		snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core);
 		break;
 	case AGGR_DIE:
-		fprintf(output, "S%d-D%*d %*d ",
-			id.socket, -8, id.die, 4, nr);
+		snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die);
 		break;
 	case AGGR_SOCKET:
-		fprintf(output, "S%*d %*d ",
-			-5, id.socket, 4, nr);
+		snprintf(buf, sizeof(buf), "S%d", id.socket);
 		break;
 	case AGGR_NODE:
-		fprintf(output, "N%*d %*d ",
-			-5, id.node, 4, nr);
+		snprintf(buf, sizeof(buf), "N%d", id.node);
 		break;
 	case AGGR_NONE:
 		if (evsel->percore && !config->percore_show_thread) {
-			fprintf(output, "S%d-D%d-C%*d ",
-				id.socket, id.die, -3, id.core);
+			snprintf(buf, sizeof(buf), "S%d-D%d-C%d ",
+				id.socket, id.die, id.core);
+			fprintf(output, "%-*s ",
+				aggr_header_lens[AGGR_CORE], buf);
 		} else if (id.cpu.cpu > -1) {
-			fprintf(output, "CPU%*d ",
-				-7, id.cpu.cpu);
+			fprintf(output, "CPU%-*d ",
+				aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu);
 		}
-		break;
+		return;
 	case AGGR_THREAD:
-		fprintf(output, "%*s-%*d ",
-			16, perf_thread_map__comm(evsel->core.threads, id.thread_idx),
-			-8, perf_thread_map__pid(evsel->core.threads, id.thread_idx));
-		break;
+		fprintf(output, "%*s-%-*d ",
+			COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx),
+			PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx));
+		return;
 	case AGGR_GLOBAL:
 	case AGGR_UNSET:
 	case AGGR_MAX:
 	default:
-		break;
+		return;
 	}
+
+	fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, nr);
 }
 
 static void print_aggr_id_csv(struct perf_stat_config *config,
@@ -301,8 +342,6 @@ struct outstate {
 	struct evsel *evsel;
 };
 
-#define METRIC_LEN  38
-
 static void new_line_std(struct perf_stat_config *config __maybe_unused,
 			 void *ctx)
 {
@@ -534,19 +573,19 @@ static void print_counter_value_std(struct perf_stat_config *config,
 	const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED;
 
 	if (config->big_num)
-		fmt = floor(sc) != sc ? "%'18.2f " : "%'18.0f ";
+		fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f ";
 	else
-		fmt = floor(sc) != sc ? "%18.2f " : "%18.0f ";
+		fmt = floor(sc) != sc ? "%*.2f " : "%*.0f ";
 
 	if (ok)
-		fprintf(output, fmt, avg);
+		fprintf(output, fmt, COUNTS_LEN, avg);
 	else
-		fprintf(output, "%18s ", bad_count);
+		fprintf(output, "%*s ", COUNTS_LEN, bad_count);
 
 	if (evsel->unit)
 		fprintf(output, "%-*s ", config->unit_width, evsel->unit);
 
-	fprintf(output, "%-*s", 32, evsel__name(evsel));
+	fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel));
 }
 
 static void print_counter_value_csv(struct perf_stat_config *config,
@@ -904,34 +943,19 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 	}
 }
 
-static int aggr_header_lens[] = {
-	[AGGR_CORE] = 24,
-	[AGGR_DIE] = 18,
-	[AGGR_SOCKET] = 12,
-	[AGGR_NONE] = 6,
-	[AGGR_THREAD] = 24,
-	[AGGR_NODE] = 6,
-	[AGGR_GLOBAL] = 0,
-};
-
-static const char *aggr_header_csv[] = {
-	[AGGR_CORE] 	= 	"core,cpus,",
-	[AGGR_DIE] 	= 	"die,cpus,",
-	[AGGR_SOCKET] 	= 	"socket,cpus,",
-	[AGGR_NONE] 	= 	"cpu,",
-	[AGGR_THREAD] 	= 	"comm-pid,",
-	[AGGR_NODE] 	= 	"node,",
-	[AGGR_GLOBAL] 	=	""
-};
-
 static void print_metric_headers_std(struct perf_stat_config *config,
 				     const char *prefix, bool no_indent)
 {
 	if (prefix)
 		fprintf(config->output, "%s", prefix);
+
 	if (!no_indent) {
-		fprintf(config->output, "%*s",
-			aggr_header_lens[config->aggr_mode], "");
+		int len = aggr_header_lens[config->aggr_mode];
+
+		if (nr_cgroups)
+			len += CGROUP_LEN + 1;
+
+		fprintf(config->output, "%*s", len, "");
 	}
 }
 
@@ -1025,46 +1049,39 @@ static void print_interval(struct perf_stat_config *config,
 			!config->csv_output && !config->json_output) {
 		switch (config->aggr_mode) {
 		case AGGR_NODE:
-			fprintf(output, "#           time node   cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
 		case AGGR_SOCKET:
-			fprintf(output, "#           time socket cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
 		case AGGR_DIE:
-			fprintf(output, "#           time die          cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			break;
 		case AGGR_CORE:
-			fprintf(output, "#           time core            cpus");
-			if (!metric_only)
-				fprintf(output, "             counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#%*s %-*s cpus",
+				INTERVAL_LEN - 1, "time",
+				aggr_header_lens[config->aggr_mode],
+				aggr_header_std[config->aggr_mode]);
 			break;
 		case AGGR_NONE:
-			fprintf(output, "#           time CPU    ");
-			if (!metric_only)
-				fprintf(output, "                counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#%*s %-*s",
+				INTERVAL_LEN - 1, "time",
+				aggr_header_lens[config->aggr_mode],
+				aggr_header_std[config->aggr_mode]);
 			break;
 		case AGGR_THREAD:
-			fprintf(output, "#           time             comm-pid");
-			if (!metric_only)
-				fprintf(output, "                  counts %*s events\n", unit_width, "unit");
+			fprintf(output, "#%*s %*s-%-*s",
+				INTERVAL_LEN - 1, "time",
+				COMM_LEN, "comm", PID_LEN, "pid");
 			break;
 		case AGGR_GLOBAL:
 		default:
-			if (!config->iostat_run) {
-				fprintf(output, "#           time");
-				if (!metric_only)
-					fprintf(output, "             counts %*s events\n", unit_width, "unit");
-			}
+			if (!config->iostat_run)
+				fprintf(output, "#%*s",
+					INTERVAL_LEN - 1, "time");
 		case AGGR_UNSET:
 		case AGGR_MAX:
 			break;
 		}
+
+		if (!metric_only) {
+			fprintf(output, " %*s %*s events\n",
+				COUNTS_LEN, "counts", unit_width, "unit");
+		}
 	}
 
 	if ((num_print_interval == 0 || config->interval_clear) && metric_only)
-- 
GitLab


From 6108712c07422f8ececb163ace7fad2ae0e9a24c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:21 -0800
Subject: [PATCH 1574/4122] perf stat: Remove impossible condition

The print would run only if metric_only is not set, but it's already in a
block that says it's in metric_only case.  And there's no place to change
the setting.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-14-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index f983432aaddde..cc8bb6d07dcb2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1292,9 +1292,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 			num_print_iv = 0;
 		if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run)
 			fprintf(config->output, "%s", prefix);
-
-		if (config->json_output && !config->metric_only)
-			fprintf(config->output, "}");
 	}
 
 	switch (config->aggr_mode) {
-- 
GitLab


From 4c86b664f4cce6940c187ecfc019f434f8dbd4bc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:22 -0800
Subject: [PATCH 1575/4122] perf stat: Rework header display

There are print_header() and print_interval() to print header lines before
actual counter values.  Also print_metric_headers() needs to be called for
the metric-only case.

Let's move all these logics to a single place including num_print_iv to
refresh the headers for interval mode.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-15-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 185 +++++++++++++++++++--------------
 1 file changed, 106 insertions(+), 79 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cc8bb6d07dcb2..f97817628478d 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1034,94 +1034,129 @@ static void prepare_interval(struct perf_stat_config *config,
 				 ts->tv_sec, ts->tv_nsec);
 }
 
-static void print_interval(struct perf_stat_config *config,
-			   struct evlist *evlist)
+static void print_header_interval_std(struct perf_stat_config *config,
+				      struct target *_target __maybe_unused,
+				      struct evlist *evlist,
+				      int argc __maybe_unused,
+				      const char **argv __maybe_unused)
 {
-	bool metric_only = config->metric_only;
-	unsigned int unit_width = config->unit_width;
 	FILE *output = config->output;
-	static int num_print_interval;
 
-	if (config->interval_clear && isatty(fileno(output)))
-		puts(CONSOLE_CLEAR);
+	switch (config->aggr_mode) {
+	case AGGR_NODE:
+	case AGGR_SOCKET:
+	case AGGR_DIE:
+	case AGGR_CORE:
+		fprintf(output, "#%*s %-*s cpus",
+			INTERVAL_LEN - 1, "time",
+			aggr_header_lens[config->aggr_mode],
+			aggr_header_std[config->aggr_mode]);
+		break;
+	case AGGR_NONE:
+		fprintf(output, "#%*s %-*s",
+			INTERVAL_LEN - 1, "time",
+			aggr_header_lens[config->aggr_mode],
+			aggr_header_std[config->aggr_mode]);
+		break;
+	case AGGR_THREAD:
+		fprintf(output, "#%*s %*s-%-*s",
+			INTERVAL_LEN - 1, "time",
+			COMM_LEN, "comm", PID_LEN, "pid");
+		break;
+	case AGGR_GLOBAL:
+	default:
+		if (!config->iostat_run)
+			fprintf(output, "#%*s",
+				INTERVAL_LEN - 1, "time");
+	case AGGR_UNSET:
+	case AGGR_MAX:
+		break;
+	}
 
-	if ((num_print_interval == 0 || config->interval_clear) &&
-			!config->csv_output && !config->json_output) {
-		switch (config->aggr_mode) {
-		case AGGR_NODE:
-		case AGGR_SOCKET:
-		case AGGR_DIE:
-		case AGGR_CORE:
-			fprintf(output, "#%*s %-*s cpus",
-				INTERVAL_LEN - 1, "time",
-				aggr_header_lens[config->aggr_mode],
-				aggr_header_std[config->aggr_mode]);
-			break;
-		case AGGR_NONE:
-			fprintf(output, "#%*s %-*s",
-				INTERVAL_LEN - 1, "time",
-				aggr_header_lens[config->aggr_mode],
-				aggr_header_std[config->aggr_mode]);
-			break;
-		case AGGR_THREAD:
-			fprintf(output, "#%*s %*s-%-*s",
-				INTERVAL_LEN - 1, "time",
-				COMM_LEN, "comm", PID_LEN, "pid");
-			break;
-		case AGGR_GLOBAL:
-		default:
-			if (!config->iostat_run)
-				fprintf(output, "#%*s",
-					INTERVAL_LEN - 1, "time");
-		case AGGR_UNSET:
-		case AGGR_MAX:
-			break;
-		}
+	if (config->metric_only)
+		print_metric_headers(config, evlist, " ", true);
+	else
+		fprintf(output, " %*s %*s events\n",
+			COUNTS_LEN, "counts", config->unit_width, "unit");
+}
 
-		if (!metric_only) {
-			fprintf(output, " %*s %*s events\n",
-				COUNTS_LEN, "counts", unit_width, "unit");
-		}
-	}
+static void print_header_std(struct perf_stat_config *config,
+			     struct target *_target, struct evlist *evlist,
+			     int argc, const char **argv)
+{
+	FILE *output = config->output;
+	int i;
+
+	fprintf(output, "\n");
+	fprintf(output, " Performance counter stats for ");
+	if (_target->bpf_str)
+		fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
+	else if (_target->system_wide)
+		fprintf(output, "\'system wide");
+	else if (_target->cpu_list)
+		fprintf(output, "\'CPU(s) %s", _target->cpu_list);
+	else if (!target__has_task(_target)) {
+		fprintf(output, "\'%s", argv ? argv[0] : "pipe");
+		for (i = 1; argv && (i < argc); i++)
+			fprintf(output, " %s", argv[i]);
+	} else if (_target->pid)
+		fprintf(output, "process id \'%s", _target->pid);
+	else
+		fprintf(output, "thread id \'%s", _target->tid);
 
-	if ((num_print_interval == 0 || config->interval_clear) && metric_only)
+	fprintf(output, "\'");
+	if (config->run_count > 1)
+		fprintf(output, " (%d runs)", config->run_count);
+	fprintf(output, ":\n\n");
+
+	if (config->metric_only)
+		print_metric_headers(config, evlist, " ", false);
+}
+
+static void print_header_csv(struct perf_stat_config *config,
+			     struct target *_target __maybe_unused,
+			     struct evlist *evlist,
+			     int argc __maybe_unused,
+			     const char **argv __maybe_unused)
+{
+	if (config->metric_only)
+		print_metric_headers(config, evlist, " ", true);
+}
+static void print_header_json(struct perf_stat_config *config,
+			      struct target *_target __maybe_unused,
+			      struct evlist *evlist,
+			      int argc __maybe_unused,
+			      const char **argv __maybe_unused)
+{
+	if (config->metric_only)
 		print_metric_headers(config, evlist, " ", true);
-	if (++num_print_interval == 25)
-		num_print_interval = 0;
 }
 
 static void print_header(struct perf_stat_config *config,
 			 struct target *_target,
+			 struct evlist *evlist,
 			 int argc, const char **argv)
 {
-	FILE *output = config->output;
-	int i;
+	static int num_print_iv;
 
 	fflush(stdout);
 
-	if (!config->csv_output && !config->json_output) {
-		fprintf(output, "\n");
-		fprintf(output, " Performance counter stats for ");
-		if (_target->bpf_str)
-			fprintf(output, "\'BPF program(s) %s", _target->bpf_str);
-		else if (_target->system_wide)
-			fprintf(output, "\'system wide");
-		else if (_target->cpu_list)
-			fprintf(output, "\'CPU(s) %s", _target->cpu_list);
-		else if (!target__has_task(_target)) {
-			fprintf(output, "\'%s", argv ? argv[0] : "pipe");
-			for (i = 1; argv && (i < argc); i++)
-				fprintf(output, " %s", argv[i]);
-		} else if (_target->pid)
-			fprintf(output, "process id \'%s", _target->pid);
-		else
-			fprintf(output, "thread id \'%s", _target->tid);
+	if (config->interval_clear)
+		puts(CONSOLE_CLEAR);
 
-		fprintf(output, "\'");
-		if (config->run_count > 1)
-			fprintf(output, " (%d runs)", config->run_count);
-		fprintf(output, ":\n\n");
+	if (num_print_iv == 0 || config->interval_clear) {
+		if (config->json_output)
+			print_header_json(config, _target, evlist, argc, argv);
+		else if (config->csv_output)
+			print_header_csv(config, _target, evlist, argc, argv);
+		else if (config->interval)
+			print_header_interval_std(config, _target, evlist, argc, argv);
+		else
+			print_header_std(config, _target, evlist, argc, argv);
 	}
+
+	if (num_print_iv++ == 25)
+		num_print_iv = 0;
 }
 
 static int get_precision(double num)
@@ -1278,18 +1313,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	if (interval) {
 		prefix = buf;
 		prepare_interval(config, prefix, ts);
-		print_interval(config, evlist);
-	} else {
-		print_header(config, _target, argc, argv);
 	}
 
+	print_header(config, _target, evlist, argc, argv);
 	if (metric_only) {
-		static int num_print_iv;
-
-		if (num_print_iv == 0 && !interval)
-			print_metric_headers(config, evlist, prefix, false);
-		if (num_print_iv++ == 25)
-			num_print_iv = 0;
 		if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run)
 			fprintf(config->output, "%s", prefix);
 	}
-- 
GitLab


From 453279d5739f12cf27b604c84c061a8a7a3937da Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:23 -0800
Subject: [PATCH 1576/4122] perf stat: Move condition to print_footer()

Likewise, I think it'd better to have the control inside the function, and keep
the higher level function clearer.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-16-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index f97817628478d..73cf898060c06 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1205,6 +1205,9 @@ static void print_footer(struct perf_stat_config *config)
 	double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC;
 	FILE *output = config->output;
 
+	if (config->interval || config->csv_output || config->json_output)
+		return;
+
 	if (!config->null_run)
 		fprintf(output, "\n");
 
@@ -1359,8 +1362,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		break;
 	}
 
-	if (!interval && !config->csv_output && !config->json_output)
-		print_footer(config);
+	print_footer(config);
 
 	fflush(config->output);
 }
-- 
GitLab


From 2cf38236d94605586da0ed0f9ec432bf0eb415c3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:24 -0800
Subject: [PATCH 1577/4122] perf stat: Factor out prefix display

The prefix is needed for interval mode to print timestamp at the
beginning of each line.  But the it's tricky for the metric only
mode since it doesn't print every evsel and combines the metrics
into a single line.

So it needed to pass 'first' argument to print_counter_aggrdata()
to determine if the current event is being printed at first.  This
makes the code hard to read.

Let's move the logic out of the function and do it in the outer
print loop.  This would enable further cleanups later.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-17-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 43 ++++++++++++----------------------
 1 file changed, 15 insertions(+), 28 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 73cf898060c06..bb40ed29300d0 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -805,8 +805,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun
 
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
-				   char *prefix, bool metric_only,
-				   bool *first)
+				   char *prefix, bool metric_only)
 {
 	FILE *output = config->output;
 	u64 ena, run, val;
@@ -825,10 +824,6 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	ena = aggr->counts.ena;
 	run = aggr->counts.run;
 
-	if (*first && metric_only) {
-		*first = false;
-		aggr_printout(config, counter, id, aggr->nr);
-	}
 	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
 
@@ -849,7 +844,6 @@ static void print_aggr(struct perf_stat_config *config,
 	FILE *output = config->output;
 	struct evsel *counter;
 	int s;
-	bool first;
 
 	if (!config->aggr_map || !config->aggr_get_id)
 		return;
@@ -860,21 +854,23 @@ static void print_aggr(struct perf_stat_config *config,
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
 		if (metric_only) {
+			struct perf_stat_aggr *aggr;
+			struct aggr_cpu_id id = config->aggr_map->map[s];
+
 			if (prefix)
 				fprintf(output, "%s", prefix);
-			else if (config->summary && !config->no_csv_summary &&
-				 config->csv_output && !config->interval)
-				fprintf(output, "%16s%s", "summary", config->csv_sep);
+
+			counter = evlist__first(evlist);
+			aggr = &counter->stats->aggr[s];
+			aggr_printout(config, counter, id, aggr->nr);
 		}
 
-		first = true;
 		evlist__for_each_entry(evlist, counter) {
 			if (counter->merged_stat)
 				continue;
 
-			print_counter_aggrdata(config, counter, s,
-					       prefix, metric_only,
-					       &first);
+			print_counter_aggrdata(config, counter, s, prefix,
+					       metric_only);
 		}
 		if (metric_only)
 			fputc('\n', output);
@@ -885,7 +881,6 @@ static void print_counter(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
 	bool metric_only = config->metric_only;
-	bool first = false;
 	int s;
 
 	/* AGGR_THREAD doesn't have config->aggr_get_id */
@@ -896,9 +891,8 @@ static void print_counter(struct perf_stat_config *config,
 		return;
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_counter_aggrdata(config, counter, s,
-				       prefix, metric_only,
-				       &first);
+		print_counter_aggrdata(config, counter, s, prefix,
+				       metric_only);
 	}
 }
 
@@ -1260,7 +1254,6 @@ static void print_percore(struct perf_stat_config *config,
 	FILE *output = config->output;
 	struct cpu_aggr_map *core_map;
 	int s, c, i;
-	bool first = true;
 
 	if (!config->aggr_map || !config->aggr_get_id)
 		return;
@@ -1288,11 +1281,7 @@ static void print_percore(struct perf_stat_config *config,
 		if (found)
 			continue;
 
-		if (prefix && metric_only)
-			fprintf(output, "%s", prefix);
-
-		print_counter_aggrdata(config, counter, s,
-				       prefix, metric_only, &first);
+		print_counter_aggrdata(config, counter, s, prefix, metric_only);
 
 		core_map->map[c++] = core_id;
 	}
@@ -1319,10 +1308,6 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	}
 
 	print_header(config, _target, evlist, argc, argv);
-	if (metric_only) {
-		if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run)
-			fprintf(config->output, "%s", prefix);
-	}
 
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
@@ -1337,6 +1322,8 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 			iostat_print_counters(evlist, config, ts, prefix = buf,
 					      print_counter);
 		else {
+			if (prefix && metric_only)
+				fprintf(config->output, "%s", prefix);
 			evlist__for_each_entry(evlist, counter) {
 				print_counter(config, counter, prefix);
 			}
-- 
GitLab


From 78670daefd33c8e0a30b121a2a271a90bca57130 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:25 -0800
Subject: [PATCH 1578/4122] perf stat: Factor out print_metric_{begin,end}()

For the metric-only case, add new functions to handle the start and the
end of each metric display.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-18-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 56 +++++++++++++++++++++-------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index bb40ed29300d0..7a0673be720b8 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -836,12 +836,39 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 		fputc('\n', output);
 }
 
+static void print_metric_begin(struct perf_stat_config *config,
+			       struct evlist *evlist,
+			       char *prefix, int aggr_idx)
+{
+	struct perf_stat_aggr *aggr;
+	struct aggr_cpu_id id;
+	struct evsel *evsel;
+
+	if (!config->metric_only)
+		return;
+
+	if (prefix)
+		fprintf(config->output, "%s", prefix);
+
+	evsel = evlist__first(evlist);
+	id = config->aggr_map->map[aggr_idx];
+	aggr = &evsel->stats->aggr[aggr_idx];
+	aggr_printout(config, evsel, id, aggr->nr);
+}
+
+static void print_metric_end(struct perf_stat_config *config)
+{
+	if (!config->metric_only)
+		return;
+
+	fputc('\n', config->output);
+}
+
 static void print_aggr(struct perf_stat_config *config,
 		       struct evlist *evlist,
 		       char *prefix)
 {
 	bool metric_only = config->metric_only;
-	FILE *output = config->output;
 	struct evsel *counter;
 	int s;
 
@@ -853,17 +880,7 @@ static void print_aggr(struct perf_stat_config *config,
 	 * Without each counter has its own line.
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		if (metric_only) {
-			struct perf_stat_aggr *aggr;
-			struct aggr_cpu_id id = config->aggr_map->map[s];
-
-			if (prefix)
-				fprintf(output, "%s", prefix);
-
-			counter = evlist__first(evlist);
-			aggr = &counter->stats->aggr[s];
-			aggr_printout(config, counter, id, aggr->nr);
-		}
+		print_metric_begin(config, evlist, prefix, s);
 
 		evlist__for_each_entry(evlist, counter) {
 			if (counter->merged_stat)
@@ -872,8 +889,7 @@ static void print_aggr(struct perf_stat_config *config,
 			print_counter_aggrdata(config, counter, s, prefix,
 					       metric_only);
 		}
-		if (metric_only)
-			fputc('\n', output);
+		print_metric_end(config);
 	}
 }
 
@@ -919,9 +935,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 
 			id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
-				if (prefix)
-					fputs(prefix, config->output);
-				aggr_printout(config, counter, id, 0);
+				print_metric_begin(config, evlist, prefix, counter_idx);
 				first = false;
 			}
 			val = ps->aggr[counter_idx].counts.val;
@@ -933,7 +947,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 				 run, ena, 1.0, &rt_stat, counter_idx);
 		}
 		if (!first)
-			fputc('\n', config->output);
+			print_metric_end(config);
 	}
 }
 
@@ -1322,13 +1336,11 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 			iostat_print_counters(evlist, config, ts, prefix = buf,
 					      print_counter);
 		else {
-			if (prefix && metric_only)
-				fprintf(config->output, "%s", prefix);
+			print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0);
 			evlist__for_each_entry(evlist, counter) {
 				print_counter(config, counter, prefix);
 			}
-			if (metric_only)
-				fputc('\n', config->output);
+			print_metric_end(config);
 		}
 		break;
 	case AGGR_NONE:
-- 
GitLab


From 67f8b7eb4e9195d037b60f1614880234aca15fee Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:26 -0800
Subject: [PATCH 1579/4122] perf stat: Support --for-each-cgroup and
 --metric-only

When we have events for each cgroup, the metric should be printed for
each cgroup separately.  Add print_cgroup_counter() to handle that
situation properly.

Also change print_metric_headers() not to print duplicate headers
by checking cgroups.

  $ perf stat -a --for-each-cgroup system.slice,user.slice --metric-only sleep 1

   Performance counter stats for 'system wide':

                                     GHz       insn per cycle branch-misses of all branches
   system.slice                   3.792                0.61                                3.24%
   user.slice                     3.661                2.32                                0.37%

         1.016111516 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-19-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 58 +++++++++++++++++++++++++++-------
 1 file changed, 47 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7a0673be720b8..cf25ed99b5df9 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -168,10 +168,10 @@ static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_
 	fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name);
 }
 
-static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel)
+static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp)
 {
-	if (nr_cgroups) {
-		const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name  : "";
+	if (nr_cgroups || config->cgroup_list) {
+		const char *cgrp_name = cgrp ? cgrp->name  : "";
 
 		if (config->json_output)
 			print_cgroup_json(config, cgrp_name);
@@ -340,6 +340,7 @@ struct outstate {
 	int  nr;
 	struct aggr_cpu_id id;
 	struct evsel *evsel;
+	struct cgroup *cgrp;
 };
 
 static void new_line_std(struct perf_stat_config *config __maybe_unused,
@@ -552,6 +553,9 @@ static void print_metric_header(struct perf_stat_config *config,
 	    os->evsel->priv != os->evsel->evlist->selected->priv)
 		return;
 
+	if (os->evsel->cgrp != os->cgrp)
+		return;
+
 	if (!valid_only_metric(unit))
 		return;
 	unit = fixunit(tbuf, os->evsel, unit);
@@ -642,7 +646,7 @@ static void abs_printout(struct perf_stat_config *config,
 {
 	aggr_printout(config, evsel, id, nr);
 	print_counter_value(config, evsel, avg, ok);
-	print_cgroup(config, evsel);
+	print_cgroup(config, evsel->cgrp);
 }
 
 static bool is_mixed_hw_group(struct evsel *counter)
@@ -838,7 +842,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 static void print_metric_begin(struct perf_stat_config *config,
 			       struct evlist *evlist,
-			       char *prefix, int aggr_idx)
+			       char *prefix, int aggr_idx,
+			       struct cgroup *cgrp)
 {
 	struct perf_stat_aggr *aggr;
 	struct aggr_cpu_id id;
@@ -854,6 +859,8 @@ static void print_metric_begin(struct perf_stat_config *config,
 	id = config->aggr_map->map[aggr_idx];
 	aggr = &evsel->stats->aggr[aggr_idx];
 	aggr_printout(config, evsel, id, aggr->nr);
+
+	print_cgroup(config, cgrp);
 }
 
 static void print_metric_end(struct perf_stat_config *config)
@@ -880,7 +887,7 @@ static void print_aggr(struct perf_stat_config *config,
 	 * Without each counter has its own line.
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_metric_begin(config, evlist, prefix, s);
+		print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL);
 
 		evlist__for_each_entry(evlist, counter) {
 			if (counter->merged_stat)
@@ -935,7 +942,8 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 
 			id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
-				print_metric_begin(config, evlist, prefix, counter_idx);
+				print_metric_begin(config, evlist, prefix,
+						   counter_idx, /*cgrp=*/NULL);
 				first = false;
 			}
 			val = ps->aggr[counter_idx].counts.val;
@@ -960,7 +968,7 @@ static void print_metric_headers_std(struct perf_stat_config *config,
 	if (!no_indent) {
 		int len = aggr_header_lens[config->aggr_mode];
 
-		if (nr_cgroups)
+		if (nr_cgroups || config->cgroup_list)
 			len += CGROUP_LEN + 1;
 
 		fprintf(config->output, "%*s", len, "");
@@ -1012,6 +1020,9 @@ static void print_metric_headers(struct perf_stat_config *config,
 	if (config->iostat_run)
 		iostat_print_header_prefix(config);
 
+	if (config->cgroup_list)
+		os.cgrp = evlist__first(evlist)->cgrp;
+
 	/* Print metrics headers only */
 	evlist__for_each_entry(evlist, counter) {
 		os.evsel = counter;
@@ -1305,6 +1316,28 @@ static void print_percore(struct perf_stat_config *config,
 		fputc('\n', output);
 }
 
+static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
+				 char *prefix)
+{
+	struct cgroup *cgrp = NULL;
+	struct evsel *counter;
+
+	evlist__for_each_entry(evlist, counter) {
+		if (cgrp != counter->cgrp) {
+			if (cgrp != NULL)
+				print_metric_end(config);
+
+			cgrp = counter->cgrp;
+			print_metric_begin(config, evlist, prefix,
+					   /*aggr_idx=*/0, cgrp);
+		}
+
+		print_counter(config, counter, prefix);
+	}
+	if (cgrp)
+		print_metric_end(config);
+}
+
 void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
 			    struct target *_target, struct timespec *ts, int argc, const char **argv)
 {
@@ -1332,11 +1365,14 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		break;
 	case AGGR_THREAD:
 	case AGGR_GLOBAL:
-		if (config->iostat_run)
+		if (config->iostat_run) {
 			iostat_print_counters(evlist, config, ts, prefix = buf,
 					      print_counter);
-		else {
-			print_metric_begin(config, evlist, prefix, /*aggr_idx=*/0);
+		} else if (config->cgroup_list) {
+			print_cgroup_counter(config, evlist, prefix);
+		} else {
+			print_metric_begin(config, evlist, prefix,
+					   /*aggr_idx=*/0, /*cgrp=*/NULL);
 			evlist__for_each_entry(evlist, counter) {
 				print_counter(config, counter, prefix);
 			}
-- 
GitLab


From 4dd7ff4a0311eee3ac946f0824442de94b34c42e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 14 Nov 2022 15:02:27 -0800
Subject: [PATCH 1580/4122] perf stat: Add print_aggr_cgroup() for
 --for-each-cgroup and --topdown

Normally, --for-each-cgroup only works with AGGR_GLOBAL.  However
the --topdown on some cpu (e.g. Intel Skylake) converts it to the
AGGR_CORE internally.

To support those machines, add print_aggr_cgroup and handle the events
like in print_cgroup_events().

  $ perf stat -a --for-each-cgroup system.slice,user.slice --topdown sleep 1
  nmi_watchdog enabled with topdown. May give wrong results.
  Disable with echo 0 > /proc/sys/kernel/nmi_watchdog

   Performance counter stats for 'system wide':

                               retiring   bad speculation  frontend bound  backend bound
  S0-D0-C0  2  system.slice       49.0%            -46.6%           31.4%
  S0-D0-C1  2  system.slice       55.5%              8.0%           45.5%          -9.0%
  S0-D0-C2  2  system.slice       87.8%             22.1%           30.3%         -40.3%
  S0-D0-C3  2  system.slice       53.3%            -11.9%           45.2%          13.4%
  S0-D0-C0  2  user.slice        123.5%              4.0%           48.5%         -75.9%
  S0-D0-C1  2  user.slice         19.9%              6.5%           89.9%         -16.3%
  S0-D0-C2  2  user.slice         29.9%              7.9%           71.3%          -9.1
  S0-D0-C3  2  user.slice         28.0%              7.2%           43.3%          21.5%

         1.004136937 seconds time elapsed

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221114230227.1255976-20-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 41 +++++++++++++++++++++++++++++++++-
 1 file changed, 40 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cf25ed99b5df9..f5501760ff2e1 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -900,6 +900,42 @@ static void print_aggr(struct perf_stat_config *config,
 	}
 }
 
+static void print_aggr_cgroup(struct perf_stat_config *config,
+			      struct evlist *evlist,
+			      char *prefix)
+{
+	bool metric_only = config->metric_only;
+	struct evsel *counter, *evsel;
+	struct cgroup *cgrp = NULL;
+	int s;
+
+	if (!config->aggr_map || !config->aggr_get_id)
+		return;
+
+	evlist__for_each_entry(evlist, evsel) {
+		if (cgrp == evsel->cgrp)
+			continue;
+
+		cgrp = evsel->cgrp;
+
+		for (s = 0; s < config->aggr_map->nr; s++) {
+			print_metric_begin(config, evlist, prefix, s, cgrp);
+
+			evlist__for_each_entry(evlist, counter) {
+				if (counter->merged_stat)
+					continue;
+
+				if (counter->cgrp != cgrp)
+					continue;
+
+				print_counter_aggrdata(config, counter, s, prefix,
+						       metric_only);
+			}
+			print_metric_end(config);
+		}
+	}
+}
+
 static void print_counter(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
@@ -1361,7 +1397,10 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	case AGGR_DIE:
 	case AGGR_SOCKET:
 	case AGGR_NODE:
-		print_aggr(config, evlist, prefix);
+		if (config->cgroup_list)
+			print_aggr_cgroup(config, evlist, prefix);
+		else
+			print_aggr(config, evlist, prefix);
 		break;
 	case AGGR_THREAD:
 	case AGGR_GLOBAL:
-- 
GitLab


From a6efaa2c89bf35c3cb7f21dee221dabf20dccf59 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:01 -0800
Subject: [PATCH 1581/4122] tools lib api: Add install target

This allows libapi to be installed as a dependency.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cmc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Cc: nicolas schier <nicolas@fjasle.eu>
Link: http://lore.kernel.org/lkml/20221109184914.1357295-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/Makefile | 49 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index e21e1b40b5257..6629d0fd01306 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld
 
 MAKEFLAGS += --no-print-directory
 
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
 LIBFILE = $(OUTPUT)libapi.a
 
 CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
@@ -45,10 +55,23 @@ RM = rm -f
 
 API_IN := $(OUTPUT)libapi-in.o
 
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
 all:
 
 export srctree OUTPUT CC LD CFLAGS V
 include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
 
 all: fixdep $(LIBFILE)
 
@@ -58,6 +81,32 @@ $(API_IN): FORCE
 $(LIBFILE): $(API_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN)
 
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+	fi
+endef
+
+define do_install
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+	fi;                                             \
+	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+	$(call QUIET_INSTALL, $(LIBFILE)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,cpu.h,$(prefix)/include/api,644); \
+		$(call do_install,debug.h,$(prefix)/include/api,644); \
+		$(call do_install,io.h,$(prefix)/include/api,644);
+
+install: install_lib install_headers
+
 clean:
 	$(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \
 	find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
-- 
GitLab


From 630ae80ea1dd253609cb50cff87f3248f901aca3 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:02 -0800
Subject: [PATCH 1582/4122] tools lib subcmd: Add install target

This allows libsubcmd to be installed as a dependency.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/Makefile | 49 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 8f1a09cdfd17e..e96566f8991cc 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -17,6 +17,15 @@ RM = rm -f
 
 MAKEFLAGS += --no-print-directory
 
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
 LIBFILE = $(OUTPUT)libsubcmd.a
 
 CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC
@@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
 
 SUBCMD_IN := $(OUTPUT)libsubcmd-in.o
 
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
 all:
 
 export srctree OUTPUT CC LD CFLAGS V
@@ -61,6 +82,34 @@ $(SUBCMD_IN): FORCE
 $(LIBFILE): $(SUBCMD_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN)
 
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+	fi
+endef
+
+define do_install
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+	fi;                                             \
+	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+	$(call QUIET_INSTALL, $(LIBFILE)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \
+		$(call do_install,help.h,$(prefix)/include/subcmd,644); \
+		$(call do_install,pager.h,$(prefix)/include/subcmd,644); \
+		$(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \
+		$(call do_install,run-command.h,$(prefix)/include/subcmd,644);
+
+install: install_lib install_headers
+
 clean:
 	$(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \
 	find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
-- 
GitLab


From 911920b06e6be3faf87450affb0dbbc6117fd0a5 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:03 -0800
Subject: [PATCH 1583/4122] perf build: Install libsubcmd locally when building

The perf build currently has a '-Itools/lib' on the CC command
line. This causes issues as the libapi, libsubcmd, libtraceevent,
libbpf headers are all found via this path, making it impossible to
override include behavior. Change the libsubcmd build mirroring the
libbpf build, so that it is installed in a directory along with its
headers. A later change will modify the include behavior.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore    |  1 +
 tools/perf/Makefile.perf | 24 ++++++++++++++++--------
 2 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index fd7a6ff9e7aae..413ca7e4a02a8 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -40,6 +40,7 @@ pmu-events/pmu-events.c
 pmu-events/jevents
 feature/
 libbpf/
+libsubcmd/
 fixdep
 libtraceevent-dynamic-list
 Documentation/doc.dep
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 67819f9056116..4ec6b95a1c49a 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -244,7 +244,7 @@ else # force_fixdep
 LIB_DIR         = $(srctree)/tools/lib/api/
 TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
-SUBCMD_DIR      = $(srctree)/tools/lib/subcmd/
+LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
 LIBPERF_DIR     = $(srctree)/tools/lib/perf/
 DOC_DIR         = $(srctree)/tools/perf/Documentation/
 
@@ -294,7 +294,6 @@ strip-libs = $(filter-out -l%,$(1))
 ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
   PLUGINS_PATH=$(OUTPUT)
-  SUBCMD_PATH=$(OUTPUT)
   LIBPERF_PATH=$(OUTPUT)
 ifneq ($(subdir),)
   API_PATH=$(OUTPUT)/../lib/api/
@@ -305,7 +304,6 @@ else
   TE_PATH=$(TRACE_EVENT_DIR)
   PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
   API_PATH=$(LIB_DIR)
-  SUBCMD_PATH=$(SUBCMD_DIR)
   LIBPERF_PATH=$(LIBPERF_DIR)
 endif
 
@@ -332,7 +330,14 @@ LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
 LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
 LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
 
-LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a
+ifneq ($(OUTPUT),)
+  LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
+else
+  LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd
+endif
+LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT)
+LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include
+LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
 
 LIBPERF = $(LIBPERF_PATH)libperf.a
 export LIBPERF
@@ -849,11 +854,14 @@ $(LIBPERF)-clean:
 	$(call QUIET_CLEAN, libperf)
 	$(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null
 
-$(LIBSUBCMD): FORCE
-	$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a
+$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \
+		DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \
+		$@ install_headers
 
 $(LIBSUBCMD)-clean:
-	$(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean
+	$(call QUIET_CLEAN, libsubcmd)
+	$(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT)
 
 help:
 	@echo 'Perf make targets:'
@@ -1039,7 +1047,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 
-$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
+$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
 ifdef BUILD_BPF_SKEL
-- 
GitLab


From 00314c9bca8faad495e4e4f702491ecd37444784 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:04 -0800
Subject: [PATCH 1584/4122] perf build: Install libapi locally when building

The perf build currently has a '-Itools/lib' on the CC command line.
This causes issues as the libapi, libsubcmd, libtraceevent, libbpf
headers are all found via this path, making it impossible to override
include behavior.

Change the libapi build mirroring the libbpf and libsubcmd build, so
that it is installed in a directory along with its headers. A later
change will modify the include behavior.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore    |  1 +
 tools/perf/Makefile.perf | 27 +++++++++++++++------------
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 413ca7e4a02a8..4a8a76da861d4 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -39,6 +39,7 @@ trace/beauty/generated/
 pmu-events/pmu-events.c
 pmu-events/jevents
 feature/
+libapi/
 libbpf/
 libsubcmd/
 fixdep
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4ec6b95a1c49a..f8c712e26a693 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -241,7 +241,7 @@ sub-make: fixdep
 
 else # force_fixdep
 
-LIB_DIR         = $(srctree)/tools/lib/api/
+LIBAPI_DIR      = $(srctree)/tools/lib/api/
 TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
 LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
@@ -295,15 +295,9 @@ ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
   PLUGINS_PATH=$(OUTPUT)
   LIBPERF_PATH=$(OUTPUT)
-ifneq ($(subdir),)
-  API_PATH=$(OUTPUT)/../lib/api/
-else
-  API_PATH=$(OUTPUT)
-endif
 else
   TE_PATH=$(TRACE_EVENT_DIR)
   PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
-  API_PATH=$(LIB_DIR)
   LIBPERF_PATH=$(LIBPERF_DIR)
 endif
 
@@ -318,7 +312,14 @@ LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
 DYNAMIC_LIST_LDFLAGS               = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
 LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
 
-LIBAPI = $(API_PATH)libapi.a
+ifneq ($(OUTPUT),)
+  LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi
+else
+  LIBAPI_OUTPUT = $(CURDIR)/libapi
+endif
+LIBAPI_DESTDIR = $(LIBAPI_OUTPUT)
+LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include
+LIBAPI = $(LIBAPI_OUTPUT)/libapi.a
 export LIBAPI
 
 ifneq ($(OUTPUT),)
@@ -831,12 +832,14 @@ $(LIBTRACEEVENT)-clean:
 install-traceevent-plugins: libtraceevent_plugins
 	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
 
-$(LIBAPI): FORCE
-	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a
+$(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
+		DESTDIR=$(LIBAPI_DESTDIR) prefix= \
+		$@ install_headers
 
 $(LIBAPI)-clean:
 	$(call QUIET_CLEAN, libapi)
-	$(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+	$(Q)$(RM) -r -- $(LIBAPI_OUTPUT)
 
 $(LIBBPF): FORCE | $(LIBBPF_OUTPUT)
 	$(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \
@@ -1047,7 +1050,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 
-$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT):
+$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
 ifdef BUILD_BPF_SKEL
-- 
GitLab


From 91009a3a9913f2755f8b9616a2124a51147c375c Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:05 -0800
Subject: [PATCH 1585/4122] perf build: Install libperf locally when building

The perf build currently has a '-Itools/lib' on the CC command
line. This causes issues as the libapi, libsubcmd, libtraceevent,
libbpf headers are all found via this path, making it impossible to
override include behavior.

Change the libperf build mirroring the libbpf, libsubcmd and libapi
build, so that it is installed in a directory along with its headers. A
later change will modify the include behavior.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore    |  1 +
 tools/perf/Makefile.perf | 21 ++++++++++++++-------
 2 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 4a8a76da861d4..b5f10d5603f1d 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -41,6 +41,7 @@ pmu-events/jevents
 feature/
 libapi/
 libbpf/
+libperf/
 libsubcmd/
 fixdep
 libtraceevent-dynamic-list
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index f8c712e26a693..57aec5a972704 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -294,11 +294,9 @@ strip-libs = $(filter-out -l%,$(1))
 ifneq ($(OUTPUT),)
   TE_PATH=$(OUTPUT)
   PLUGINS_PATH=$(OUTPUT)
-  LIBPERF_PATH=$(OUTPUT)
 else
   TE_PATH=$(TRACE_EVENT_DIR)
   PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
-  LIBPERF_PATH=$(LIBPERF_DIR)
 endif
 
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
@@ -340,7 +338,14 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT)
 LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include
 LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
 
-LIBPERF = $(LIBPERF_PATH)libperf.a
+ifneq ($(OUTPUT),)
+  LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf
+else
+  LIBPERF_OUTPUT = $(CURDIR)/libperf
+endif
+LIBPERF_DESTDIR = $(LIBPERF_OUTPUT)
+LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include
+LIBPERF = $(LIBPERF_OUTPUT)/libperf.a
 export LIBPERF
 
 # python extension build directories
@@ -850,12 +855,14 @@ $(LIBBPF)-clean:
 	$(call QUIET_CLEAN, libbpf)
 	$(Q)$(RM) -r -- $(LIBBPF_OUTPUT)
 
-$(LIBPERF): FORCE
-	$(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a
+$(LIBPERF): FORCE | $(LIBPERF_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \
+		DESTDIR=$(LIBPERF_DESTDIR) prefix= \
+		$@ install_headers
 
 $(LIBPERF)-clean:
 	$(call QUIET_CLEAN, libperf)
-	$(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null
+	$(Q)$(RM) -r -- $(LIBPERF_OUTPUT)
 
 $(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT)
 	$(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \
@@ -1050,7 +1057,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 
-$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBSUBCMD_OUTPUT):
+$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
 ifdef BUILD_BPF_SKEL
-- 
GitLab


From ef019df01e207971200ffcb06559f791980668a1 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:06 -0800
Subject: [PATCH 1586/4122] perf build: Install libtraceevent locally when
 building

The perf build currently has a '-Itools/lib' on the CC command
line. This causes issues as the libapi, libsubcmd, libtraceevent,
libbpf headers are all found via this path, making it impossible to
override include behavior.

Change the libtraceevent build mirroring the libbpf, libsubcmd, libapi
and libperf build, so that it is installed in a directory along with its
headers. A later change will modify the include behavior.

Similarly, the plugins are now installed into libtraceevent_plugins
except they have no header files.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore    |  3 ++-
 tools/perf/Makefile.perf | 57 ++++++++++++++++++++++++++--------------
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index b5f10d5603f1d..bc165f4e48308 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -43,6 +43,7 @@ libapi/
 libbpf/
 libperf/
 libsubcmd/
+libtraceevent/
+libtraceevent_plugins/
 fixdep
-libtraceevent-dynamic-list
 Documentation/doc.dep
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 57aec5a972704..6e5e3f7730b58 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -242,7 +242,8 @@ sub-make: fixdep
 else # force_fixdep
 
 LIBAPI_DIR      = $(srctree)/tools/lib/api/
-TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/
+LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
 LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
 LIBPERF_DIR     = $(srctree)/tools/lib/perf/
@@ -292,16 +293,17 @@ grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
 ifneq ($(OUTPUT),)
-  TE_PATH=$(OUTPUT)
-  PLUGINS_PATH=$(OUTPUT)
+  LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent
 else
-  TE_PATH=$(TRACE_EVENT_DIR)
-  PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/
+  LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent
 endif
-
-LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins
+LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT)
+LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include
+LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a
 export LIBTRACEEVENT
-LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list
+LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list
 
 #
 # The static build has no dynsym table, so this does not work for
@@ -821,21 +823,33 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))'
 
-$(LIBTRACEEVENT): FORCE
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
-
-libtraceevent_plugins: FORCE
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
-
-$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
+$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \
+		DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \
+		$@ install_headers
 
 $(LIBTRACEEVENT)-clean:
 	$(call QUIET_CLEAN, libtraceevent)
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+	$(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT)
+
+libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+		plugins
+
+libtraceevent_plugins-clean:
+	$(call QUIET_CLEAN, libtraceevent_plugins)
+	$(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT)
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+		$(LIBTRACEEVENT_FLAGS) $@
 
 install-traceevent-plugins: libtraceevent_plugins
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins
+	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
+		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+		$(LIBTRACEEVENT_FLAGS) install
 
 $(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
 	$(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
@@ -1060,6 +1074,11 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
+ifndef LIBTRACEEVENT_DYNAMIC
+$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT):
+	$(Q)$(MKDIR) -p $@
+endif
+
 ifdef BUILD_BPF_SKEL
 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
 BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE)
@@ -1102,7 +1121,7 @@ endif # BUILD_BPF_SKEL
 bpf-skel-clean:
 	$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
 	$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
-- 
GitLab


From 8d1f68bd76a6517c132fd1dd223c85c5e1defa49 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:07 -0800
Subject: [PATCH 1587/4122] tools lib api: Add missing install headers

Headers necessary for the perf build.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-8-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/Makefile | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 6629d0fd01306..3e5ef1e0e8908 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -103,7 +103,10 @@ install_headers:
 	$(call QUIET_INSTALL, headers) \
 		$(call do_install,cpu.h,$(prefix)/include/api,644); \
 		$(call do_install,debug.h,$(prefix)/include/api,644); \
-		$(call do_install,io.h,$(prefix)/include/api,644);
+		$(call do_install,io.h,$(prefix)/include/api,644); \
+		$(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \
+		$(call do_install,fs/fs.h,$(prefix)/include/api/fs,644);
+		$(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644);
 
 install: install_lib install_headers
 
-- 
GitLab


From a6e8caf5db2e1db8e2ec51f87b9749bd28f6b969 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:08 -0800
Subject: [PATCH 1588/4122] tools lib perf: Add missing install headers

Headers necessary for the perf build. Note, internal headers are also
installed as these are necessary for the build.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-9-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/Makefile | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 21df023a2103e..1badc0a046768 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -189,13 +189,21 @@ install_lib: libs
 
 install_headers:
 	$(call QUIET_INSTALL, headers) \
+		$(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644);
+		$(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \
+		$(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \
+		$(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644);
 
 install_pkgconfig: $(LIBPERF_PC)
 	$(call QUIET_INSTALL, $(LIBPERF_PC)) \
-- 
GitLab


From 160be157eaba2a37233ff2d27093e5915b6b084e Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:09 -0800
Subject: [PATCH 1589/4122] tool lib symbol: Add Makefile/Build

Add sufficient Makefile for libsymbol to be built as a dependency and
header files installed.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-10-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/symbol/Build    |   1 +
 tools/lib/symbol/Makefile | 115 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 116 insertions(+)
 create mode 100644 tools/lib/symbol/Build
 create mode 100644 tools/lib/symbol/Makefile

diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build
new file mode 100644
index 0000000000000..9b9a9c78d3c98
--- /dev/null
+++ b/tools/lib/symbol/Build
@@ -0,0 +1 @@
+libsymbol-y += kallsyms.o
diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile
new file mode 100644
index 0000000000000..4c1d6b53032d2
--- /dev/null
+++ b/tools/lib/symbol/Makefile
@@ -0,0 +1,115 @@
+# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.include
+include ../../scripts/utilities.mak		# QUIET_CLEAN
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+CC ?= $(CROSS_COMPILE)gcc
+AR ?= $(CROSS_COMPILE)ar
+LD ?= $(CROSS_COMPILE)ld
+
+MAKEFLAGS += --no-print-directory
+
+INSTALL = install
+
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+LIBFILE = $(OUTPUT)libsymbol.a
+
+CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS)
+CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC
+
+ifeq ($(DEBUG),0)
+ifeq ($(CC_NO_CLANG), 0)
+  CFLAGS += -O3
+else
+  CFLAGS += -O6
+endif
+endif
+
+ifeq ($(DEBUG),0)
+  CFLAGS += -D_FORTIFY_SOURCE
+endif
+
+# Treat warnings as errors unless directed not to
+ifneq ($(WERROR),0)
+  CFLAGS += -Werror
+endif
+
+CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+
+CFLAGS += -I$(srctree)/tools/lib
+CFLAGS += -I$(srctree)/tools/include
+
+RM = rm -f
+
+SYMBOL_IN := $(OUTPUT)libsymbol-in.o
+
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
+prefix ?=
+libdir = $(prefix)/$(libdir_relative)
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+
+all:
+
+export srctree OUTPUT CC LD CFLAGS V
+include $(srctree)/tools/build/Makefile.include
+include $(srctree)/tools/scripts/Makefile.include
+
+all: fixdep $(LIBFILE)
+
+$(SYMBOL_IN): FORCE
+	$(MAKE) $(build)=libsymbol V=1
+
+$(LIBFILE): $(SYMBOL_IN)
+	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN)
+
+define do_install_mkdir
+	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
+	fi
+endef
+
+define do_install
+	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
+		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+	fi;                                             \
+	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: $(LIBFILE)
+	$(call QUIET_INSTALL, $(LIBFILE)) \
+		$(call do_install_mkdir,$(libdir_SQ)); \
+		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
+
+install_headers:
+	$(call QUIET_INSTALL, headers) \
+		$(call do_install,kallsyms.h,$(prefix)/include/symbol,644);
+
+install: install_lib install_headers
+
+clean:
+	$(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \
+	find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM)
+
+FORCE:
+
+.PHONY: clean FORCE
-- 
GitLab


From 84bec6f0b31fb2ac8c5e4b0ded7727e6ec1115db Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:10 -0800
Subject: [PATCH 1590/4122] perf build: Install libsymbol locally when building

The perf build currently has a '-Itools/lib' on the CC command
line. This causes issues as the libapi, libsubcmd, libtraceevent,
libbpf and libsymbol headers are all found via this path, making it
impossible to override include behavior.

Change the libsymbol build mirroring the libbpf, libsubcmd, libapi,
libperf and libtraceevent build, so that it is installed in a directory
along with its headers.

A later change will modify the include behavior.  Don't build kallsyms.o
as part of util as this will lead to duplicate definitions. Add
kallsym's directory to the MANIFEST rather than individual files, so
that the Build and Makefile are added to a source tar ball.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-11-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore    |  1 +
 tools/perf/MANIFEST      |  3 +--
 tools/perf/Makefile.perf | 25 ++++++++++++++++++++++---
 tools/perf/util/Build    |  5 -----
 4 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index bc165f4e48308..05806ecfc33c1 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -43,6 +43,7 @@ libapi/
 libbpf/
 libperf/
 libsubcmd/
+libsymbol/
 libtraceevent/
 libtraceevent_plugins/
 fixdep
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f5d72f936a6ba..c8e8e05b4ff1f 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -13,8 +13,7 @@ tools/lib/ctype.c
 tools/lib/hweight.c
 tools/lib/rbtree.c
 tools/lib/string.c
-tools/lib/symbol/kallsyms.c
-tools/lib/symbol/kallsyms.h
+tools/lib/symbol
 tools/lib/find_bit.c
 tools/lib/bitmap.c
 tools/lib/list_sort.c
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6e5e3f7730b58..6c1a2a3ccc381 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -246,6 +246,7 @@ LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/
 LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
 LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
+LIBSYMBOL_DIR   = $(srctree)/tools/lib/symbol/
 LIBPERF_DIR     = $(srctree)/tools/lib/perf/
 DOC_DIR         = $(srctree)/tools/perf/Documentation/
 
@@ -340,6 +341,15 @@ LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT)
 LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include
 LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
 
+ifneq ($(OUTPUT),)
+  LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol
+else
+  LIBSYMBOL_OUTPUT = $(CURDIR)/libsymbol
+endif
+LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT)
+LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include
+LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a
+
 ifneq ($(OUTPUT),)
   LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf
 else
@@ -398,7 +408,7 @@ endif
 
 export PERL_PATH
 
-PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF)
+PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL)
 ifndef NO_LIBBPF
   ifndef LIBBPF_DYNAMIC
     PERFLIBS += $(LIBBPF)
@@ -887,6 +897,15 @@ $(LIBSUBCMD)-clean:
 	$(call QUIET_CLEAN, libsubcmd)
 	$(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT)
 
+$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT)
+	$(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \
+		DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \
+		$@ install_headers
+
+$(LIBSYMBOL)-clean:
+	$(call QUIET_CLEAN, libsymbol)
+	$(Q)$(RM) -r -- $(LIBSYMBOL_OUTPUT)
+
 help:
 	@echo 'Perf make targets:'
 	@echo '  doc		- make *all* documentation (see below)'
@@ -1071,7 +1090,7 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h
 SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 
-$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT):
+$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
 ifndef LIBTRACEEVENT_DYNAMIC
@@ -1121,7 +1140,7 @@ endif # BUILD_BPF_SKEL
 bpf-skel-clean:
 	$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
 	$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 47a7db3ad0a1f..ab37f588ee8b3 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -19,7 +19,6 @@ perf-y += perf_event_attr_fprintf.o
 perf-y += evswitch.o
 perf-y += find_bit.o
 perf-y += get_current_dir_name.o
-perf-y += kallsyms.o
 perf-y += levenshtein.o
 perf-y += llvm-utils.o
 perf-y += mmap.o
@@ -294,10 +293,6 @@ CFLAGS_expr.o          += -Wno-redundant-decls
 CFLAGS_header.o        += -include $(OUTPUT)PERF-VERSION-FILE
 CFLAGS_arm-spe.o       += -I$(srctree)/tools/arch/arm64/include/
 
-$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE
-	$(call rule_mkdir)
-	$(call if_changed_dep,cc_o_c)
-
 $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE
 	$(call rule_mkdir)
 	$(call if_changed_dep,cc_o_c)
-- 
GitLab


From bd560973c5d3b2a37fb13d769b34385ed320d547 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:11 -0800
Subject: [PATCH 1591/4122] perf expr: Tidy hashmap dependency

hashmap.h comes from libbpf but isn't installed with its
headers. Always use the header file of the code in util. Change the
hashmap.h dependency in expr.h to a forward declaration, add the
necessary header file includes in the C files.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-12-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/expr.c       | 1 +
 tools/perf/tests/pmu-events.c | 1 +
 tools/perf/util/bpf-loader.c  | 4 ----
 tools/perf/util/evsel.c       | 4 ----
 tools/perf/util/expr.c        | 1 +
 tools/perf/util/expr.h        | 7 +------
 tools/perf/util/metricgroup.c | 1 +
 tools/perf/util/stat-shadow.c | 1 +
 tools/perf/util/stat.c        | 4 ----
 9 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c
index 6512f5e22045a..b6667501ebb45 100644
--- a/tools/perf/tests/expr.c
+++ b/tools/perf/tests/expr.c
@@ -2,6 +2,7 @@
 #include "util/cputopo.h"
 #include "util/debug.h"
 #include "util/expr.h"
+#include "util/hashmap.h"
 #include "util/header.h"
 #include "util/smt.h"
 #include "tests.h"
diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c
index 5d0d3b239a68b..f7b9dbbad97f5 100644
--- a/tools/perf/tests/pmu-events.c
+++ b/tools/perf/tests/pmu-events.c
@@ -12,6 +12,7 @@
 #include <perf/evlist.h>
 #include "util/evlist.h"
 #include "util/expr.h"
+#include "util/hashmap.h"
 #include "util/parse-events.h"
 #include "metricgroup.h"
 #include "stat.h"
diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c
index f4adeccdbbcb4..b3c8174360bf3 100644
--- a/tools/perf/util/bpf-loader.c
+++ b/tools/perf/util/bpf-loader.c
@@ -27,11 +27,7 @@
 #include "util.h"
 #include "llvm-utils.h"
 #include "c++/clang-c.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
 #include "util/hashmap.h"
-#endif
 #include "asm/bug.h"
 
 #include <internal/xyarray.h>
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ca6abb64c91d2..ca554f8202f94 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -46,11 +46,7 @@
 #include "string2.h"
 #include "memswap.h"
 #include "util.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
 #include "util/hashmap.h"
-#endif
 #include "pmu-hybrid.h"
 #include "off_cpu.h"
 #include "../perf-sys.h"
diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c
index aaacf514dc09c..140f2acdb3259 100644
--- a/tools/perf/util/expr.c
+++ b/tools/perf/util/expr.c
@@ -11,6 +11,7 @@
 #include "expr.h"
 #include "expr-bison.h"
 #include "expr-flex.h"
+#include "util/hashmap.h"
 #include "smt.h"
 #include "tsc.h"
 #include <linux/err.h>
diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h
index d6c1668dc1a08..029271540fb0d 100644
--- a/tools/perf/util/expr.h
+++ b/tools/perf/util/expr.h
@@ -2,12 +2,7 @@
 #ifndef PARSE_CTX_H
 #define PARSE_CTX_H 1
 
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
-#include "util/hashmap.h"
-#endif
-
+struct hashmap;
 struct metric_ref;
 
 struct expr_scanner_ctx {
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index 1943fed9b6d92..cf9e2452d3229 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -28,6 +28,7 @@
 #include "util.h"
 #include <asm/bug.h>
 #include "cgroup.h"
+#include "util/hashmap.h"
 
 struct metric_event *metricgroup__lookup(struct rblist *metric_events,
 					 struct evsel *evsel,
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index 07b29fe272c79..9bde9224a97ce 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -14,6 +14,7 @@
 #include "units.h"
 #include <linux/zalloc.h>
 #include "iostat.h"
+#include "util/hashmap.h"
 
 /*
  * AGGR_GLOBAL: Use CPU 0
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index acf0edf5fdd1a..673f017a211f6 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -14,11 +14,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
-#ifdef HAVE_LIBBPF_SUPPORT
-#include <bpf/hashmap.h>
-#else
 #include "util/hashmap.h"
-#endif
 #include <linux/zalloc.h>
 
 void update_stats(struct stats *stats, u64 val)
-- 
GitLab


From fd3f518fc1140622e752ac51d0ff18bb74f1d180 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:12 -0800
Subject: [PATCH 1592/4122] perf thread_map: Reduce exposure of libperf
 internal API

Remove unnecessary include of internal threadmap.h and refcount.h in
thread_map.h. Switch to using public APIs when possible or including
the internal header file in the C file. Fix a transitive dependency in
openat-syscall.c broken by the clean up.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-13-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c                              | 1 +
 tools/perf/builtin-trace.c                             | 4 ++--
 tools/perf/tests/openat-syscall.c                      | 1 +
 tools/perf/tests/thread-map.c                          | 1 +
 tools/perf/util/bpf_counter.c                          | 2 +-
 tools/perf/util/evsel.c                                | 1 +
 tools/perf/util/python.c                               | 6 +++---
 tools/perf/util/scripting-engines/trace-event-python.c | 2 +-
 tools/perf/util/thread_map.c                           | 1 +
 tools/perf/util/thread_map.h                           | 2 --
 10 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 1d79801f4e844..d040fbcdcc5a7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -93,6 +93,7 @@
 
 #include <linux/ctype.h>
 #include <perf/evlist.h>
+#include <internal/threadmap.h>
 
 #define DEFAULT_SEPARATOR	" "
 #define FREEZE_ON_SMI_PATH	"devices/cpu/freeze_on_smi"
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c93b359abc312..3257da5cad231 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -4095,8 +4095,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	}
 
 	trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 ||
-				  evlist->core.threads->nr > 1 ||
-				  evlist__first(evlist)->core.attr.inherit;
+		perf_thread_map__nr(evlist->core.threads) > 1 ||
+		evlist__first(evlist)->core.attr.inherit;
 
 	/*
 	 * Now that we already used evsel->core.attr to ask the kernel to setup the
diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c
index 7e05b8b5cc95f..131b62271bfa2 100644
--- a/tools/perf/tests/openat-syscall.c
+++ b/tools/perf/tests/openat-syscall.c
@@ -7,6 +7,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
+#include <unistd.h>
 #include "thread_map.h"
 #include "evsel.h"
 #include "debug.h"
diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c
index e413c1387fcba..74308c1368fe1 100644
--- a/tools/perf/tests/thread-map.c
+++ b/tools/perf/tests/thread-map.c
@@ -11,6 +11,7 @@
 #include "util/synthetic-events.h"
 #include <linux/zalloc.h>
 #include <perf/event.h>
+#include <internal/threadmap.h>
 
 struct perf_sample;
 struct perf_tool;
diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c
index ef1c15e4aeba5..eeee899fcf34f 100644
--- a/tools/perf/util/bpf_counter.c
+++ b/tools/perf/util/bpf_counter.c
@@ -561,7 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target)
 
 		if (filter_type == BPERF_FILTER_PID ||
 		    filter_type == BPERF_FILTER_TGID)
-			key = evsel->core.threads->map[i].pid;
+			key = perf_thread_map__pid(evsel->core.threads, i);
 		else if (filter_type == BPERF_FILTER_CPU)
 			key = evsel->core.cpus->map[i].cpu;
 		else
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ca554f8202f94..45f4f08399ae1 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -53,6 +53,7 @@
 #include "util/parse-branch-options.h"
 #include <internal/xyarray.h>
 #include <internal/lib.h>
+#include <internal/threadmap.h>
 
 #include <linux/ctype.h>
 
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 5be5fa2391de2..b5941c74a0d64 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -718,17 +718,17 @@ static Py_ssize_t pyrf_thread_map__length(PyObject *obj)
 {
 	struct pyrf_thread_map *pthreads = (void *)obj;
 
-	return pthreads->threads->nr;
+	return perf_thread_map__nr(pthreads->threads);
 }
 
 static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i)
 {
 	struct pyrf_thread_map *pthreads = (void *)obj;
 
-	if (i >= pthreads->threads->nr)
+	if (i >= perf_thread_map__nr(pthreads->threads))
 		return NULL;
 
-	return Py_BuildValue("i", pthreads->threads->map[i]);
+	return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i));
 }
 
 static PySequenceMethods pyrf_thread_map__sequence_methods = {
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1985d1a42a224..1cf65db8f861d 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -1654,7 +1654,7 @@ static void python_process_stat(struct perf_stat_config *config,
 	struct perf_cpu_map *cpus = counter->core.cpus;
 	int cpu, thread;
 
-	for (thread = 0; thread < threads->nr; thread++) {
+	for (thread = 0; thread < perf_thread_map__nr(threads); thread++) {
 		for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) {
 			process_stat(counter, perf_cpu_map__cpu(cpus, cpu),
 				     perf_thread_map__pid(threads, thread), tstamp,
diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index c9bfe4696943b..e848579e61a86 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -18,6 +18,7 @@
 #include "thread_map.h"
 #include "debug.h"
 #include "event.h"
+#include <internal/threadmap.h>
 
 /* Skip "." and ".." directories */
 static int filter(const struct dirent *dir)
diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h
index 3bb860a32b8e9..00ec05fc1656d 100644
--- a/tools/perf/util/thread_map.h
+++ b/tools/perf/util/thread_map.h
@@ -4,8 +4,6 @@
 
 #include <sys/types.h>
 #include <stdio.h>
-#include <linux/refcount.h>
-#include <internal/threadmap.h>
 #include <perf/threadmap.h>
 
 struct perf_record_thread_map;
-- 
GitLab


From c5c4f72ad4faab641cb852fdd890e8a64cb39f24 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:13 -0700
Subject: [PATCH 1593/4122] KVM: selftests: Add missing break between -e and -g
 option in dirty_log_perf_test

Passing -e option (Run VCPUs while dirty logging is being disabled) in
dirty_log_perf_test also unintentionally enables -g (Do not enable
KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2). Add break between two switch case
logic.

Fixes: cfe12e64b065 ("KVM: selftests: Add an option to run vCPUs while disabling dirty logging")
Signed-off-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-2-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index f99e39a672d3f..56e08da3a87f4 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -411,6 +411,7 @@ int main(int argc, char *argv[])
 		case 'e':
 			/* 'e' is for evil. */
 			run_vcpus_while_disabling_dirty_logging = true;
+			break;
 		case 'g':
 			dirty_log_manual_caps = 0;
 			break;
-- 
GitLab


From 0eb88a4121861ce3d5f925a183abb13ad954dbe6 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:14 -0700
Subject: [PATCH 1594/4122] KVM: selftests: Put command line options in
 alphabetical order in dirty_log_perf_test

There are 13 command line options and they are not in any order. Put
them in alphabetical order to make it easy to add new options.

No functional change intended.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: Wei Wang <wei.w.wang@intel.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-3-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/dirty_log_perf_test.c       | 36 ++++++++++---------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 56e08da3a87f4..5bb6954b23587 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -406,50 +406,52 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "eghi:p:m:nb:f:v:os:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "b:ef:ghi:m:nop:s:v:x:")) != -1) {
 		switch (opt) {
+		case 'b':
+			guest_percpu_mem_size = parse_size(optarg);
+			break;
 		case 'e':
 			/* 'e' is for evil. */
 			run_vcpus_while_disabling_dirty_logging = true;
 			break;
+		case 'f':
+			p.wr_fract = atoi(optarg);
+			TEST_ASSERT(p.wr_fract >= 1,
+				    "Write fraction cannot be less than one");
+			break;
 		case 'g':
 			dirty_log_manual_caps = 0;
 			break;
+		case 'h':
+			help(argv[0]);
+			break;
 		case 'i':
 			p.iterations = atoi(optarg);
 			break;
-		case 'p':
-			p.phys_offset = strtoull(optarg, NULL, 0);
-			break;
 		case 'm':
 			guest_modes_cmdline(optarg);
 			break;
 		case 'n':
 			perf_test_args.nested = true;
 			break;
-		case 'b':
-			guest_percpu_mem_size = parse_size(optarg);
+		case 'o':
+			p.partition_vcpu_memory_access = false;
 			break;
-		case 'f':
-			p.wr_fract = atoi(optarg);
-			TEST_ASSERT(p.wr_fract >= 1,
-				    "Write fraction cannot be less than one");
+		case 'p':
+			p.phys_offset = strtoull(optarg, NULL, 0);
+			break;
+		case 's':
+			p.backing_src = parse_backing_src_type(optarg);
 			break;
 		case 'v':
 			nr_vcpus = atoi(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
-		case 'o':
-			p.partition_vcpu_memory_access = false;
-			break;
-		case 's':
-			p.backing_src = parse_backing_src_type(optarg);
-			break;
 		case 'x':
 			p.slots = atoi(optarg);
 			break;
-		case 'h':
 		default:
 			help(argv[0]);
 			break;
-- 
GitLab


From 018ea2d71a43372cb984021f03514dc6dd3d46df Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:15 -0700
Subject: [PATCH 1595/4122] KVM: selftests: Add atoi_paranoid() to catch errors
 missed by atoi()

atoi() doesn't detect errors. There is no way to know that a 0 return
is correct conversion or due to an error.

Introduce atoi_paranoid() to detect errors and provide correct
conversion. Replace all atoi() calls with atoi_paranoid().

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Suggested-by: David Matlack <dmatlack@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-4-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/aarch64/arch_timer.c        |  8 ++++----
 .../selftests/kvm/aarch64/debug-exceptions.c  |  2 +-
 .../testing/selftests/kvm/aarch64/vgic_irq.c  |  6 +++---
 .../selftests/kvm/access_tracking_perf_test.c |  2 +-
 .../selftests/kvm/demand_paging_test.c        |  2 +-
 .../selftests/kvm/dirty_log_perf_test.c       |  8 ++++----
 .../testing/selftests/kvm/include/test_util.h |  2 ++
 .../selftests/kvm/kvm_page_table_test.c       |  2 +-
 tools/testing/selftests/kvm/lib/test_util.c   | 19 +++++++++++++++++++
 .../selftests/kvm/max_guest_memory_test.c     |  6 +++---
 .../kvm/memslot_modification_stress_test.c    |  6 +++---
 .../testing/selftests/kvm/memslot_perf_test.c | 10 +++++-----
 .../selftests/kvm/set_memory_region_test.c    |  2 +-
 .../selftests/kvm/x86_64/nx_huge_pages_test.c |  4 ++--
 14 files changed, 50 insertions(+), 29 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 574eb73f0e904..251e7ff048831 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -414,7 +414,7 @@ static bool parse_args(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
 		switch (opt) {
 		case 'n':
-			test_args.nr_vcpus = atoi(optarg);
+			test_args.nr_vcpus = atoi_paranoid(optarg);
 			if (test_args.nr_vcpus <= 0) {
 				pr_info("Positive value needed for -n\n");
 				goto err;
@@ -425,21 +425,21 @@ static bool parse_args(int argc, char *argv[])
 			}
 			break;
 		case 'i':
-			test_args.nr_iter = atoi(optarg);
+			test_args.nr_iter = atoi_paranoid(optarg);
 			if (test_args.nr_iter <= 0) {
 				pr_info("Positive value needed for -i\n");
 				goto err;
 			}
 			break;
 		case 'p':
-			test_args.timer_period_ms = atoi(optarg);
+			test_args.timer_period_ms = atoi_paranoid(optarg);
 			if (test_args.timer_period_ms <= 0) {
 				pr_info("Positive value needed for -p\n");
 				goto err;
 			}
 			break;
 		case 'm':
-			test_args.migration_freq_ms = atoi(optarg);
+			test_args.migration_freq_ms = atoi_paranoid(optarg);
 			if (test_args.migration_freq_ms < 0) {
 				pr_info("0 or positive value needed for -m\n");
 				goto err;
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 947bd201435ce..19fffdf19c9f9 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -423,7 +423,7 @@ int main(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "i:")) != -1) {
 		switch (opt) {
 		case 'i':
-			ss_iteration = atoi(optarg);
+			ss_iteration = atoi_paranoid(optarg);
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 17417220a083f..ae90b718070aa 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -824,16 +824,16 @@ int main(int argc, char **argv)
 	while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
 		switch (opt) {
 		case 'n':
-			nr_irqs = atoi(optarg);
+			nr_irqs = atoi_paranoid(optarg);
 			if (nr_irqs > 1024 || nr_irqs % 32)
 				help(argv[0]);
 			break;
 		case 'e':
-			eoi_split = (bool)atoi(optarg);
+			eoi_split = (bool)atoi_paranoid(optarg);
 			default_args = false;
 			break;
 		case 'l':
-			level_sensitive = (bool)atoi(optarg);
+			level_sensitive = (bool)atoi_paranoid(optarg);
 			default_args = false;
 			break;
 		case 'h':
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 76c583a07ea22..c6bcc5301e2c2 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -368,7 +368,7 @@ int main(int argc, char *argv[])
 			params.vcpu_memory_bytes = parse_size(optarg);
 			break;
 		case 'v':
-			params.nr_vcpus = atoi(optarg);
+			params.nr_vcpus = atoi_paranoid(optarg);
 			break;
 		case 'o':
 			overlap_memory_access = true;
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 779ae54f89c40..82597fb04146f 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -427,7 +427,7 @@ int main(int argc, char *argv[])
 			p.src_type = parse_backing_src_type(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi(optarg);
+			nr_vcpus = atoi_paranoid(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 5bb6954b23587..ecda802b78ffa 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -416,7 +416,7 @@ int main(int argc, char *argv[])
 			run_vcpus_while_disabling_dirty_logging = true;
 			break;
 		case 'f':
-			p.wr_fract = atoi(optarg);
+			p.wr_fract = atoi_paranoid(optarg);
 			TEST_ASSERT(p.wr_fract >= 1,
 				    "Write fraction cannot be less than one");
 			break;
@@ -427,7 +427,7 @@ int main(int argc, char *argv[])
 			help(argv[0]);
 			break;
 		case 'i':
-			p.iterations = atoi(optarg);
+			p.iterations = atoi_paranoid(optarg);
 			break;
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -445,12 +445,12 @@ int main(int argc, char *argv[])
 			p.backing_src = parse_backing_src_type(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi(optarg);
+			nr_vcpus = atoi_paranoid(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 'x':
-			p.slots = atoi(optarg);
+			p.slots = atoi_paranoid(optarg);
 			break;
 		default:
 			help(argv[0]);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index befc754ce9b3b..feae428637599 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -152,4 +152,6 @@ static inline void *align_ptr_up(void *x, size_t size)
 	return (void *)align_up((unsigned long)x, size);
 }
 
+int atoi_paranoid(const char *num_str);
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index f42c6ac6d71db..ea7feb69bb88a 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -461,7 +461,7 @@ int main(int argc, char *argv[])
 			p.test_mem_size = parse_size(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi(optarg);
+			nr_vcpus = atoi_paranoid(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 6d23878bbfe1a..c2d9c68277793 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -334,3 +334,22 @@ long get_run_delay(void)
 
 	return val[1];
 }
+
+int atoi_paranoid(const char *num_str)
+{
+	char *end_ptr;
+	long num;
+
+	errno = 0;
+	num = strtol(num_str, &end_ptr, 0);
+	TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str);
+	TEST_ASSERT(num_str != end_ptr,
+		    "strtol(\"%s\") didn't find a valid integer.", num_str);
+	TEST_ASSERT(*end_ptr == '\0',
+		    "strtol(\"%s\") failed to parse trailing characters \"%s\".",
+		    num_str, end_ptr);
+	TEST_ASSERT(num >= INT_MIN && num <= INT_MAX,
+		    "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX);
+
+	return num;
+}
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 9a6e4f3ad6b57..1595b73dc09a8 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -193,15 +193,15 @@ int main(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) {
 		switch (opt) {
 		case 'c':
-			nr_vcpus = atoi(optarg);
+			nr_vcpus = atoi_paranoid(optarg);
 			TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0");
 			break;
 		case 'm':
-			max_mem = atoi(optarg) * size_1gb;
+			max_mem = atoi_paranoid(optarg) * size_1gb;
 			TEST_ASSERT(max_mem > 0, "memory size must be >0");
 			break;
 		case 's':
-			slot_size = atoi(optarg) * size_1gb;
+			slot_size = atoi_paranoid(optarg) * size_1gb;
 			TEST_ASSERT(slot_size > 0, "slot size must be >0");
 			break;
 		case 'H':
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index bb1d17a1171bc..7d19a27d80d2d 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -158,7 +158,7 @@ int main(int argc, char *argv[])
 			guest_modes_cmdline(optarg);
 			break;
 		case 'd':
-			p.memslot_modification_delay = strtoul(optarg, NULL, 0);
+			p.memslot_modification_delay = atoi_paranoid(optarg);
 			TEST_ASSERT(p.memslot_modification_delay >= 0,
 				    "A negative delay is not supported.");
 			break;
@@ -166,7 +166,7 @@ int main(int argc, char *argv[])
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi(optarg);
+			nr_vcpus = atoi_paranoid(optarg);
 			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d",
 				    max_vcpus);
@@ -175,7 +175,7 @@ int main(int argc, char *argv[])
 			p.partition_vcpu_memory_access = false;
 			break;
 		case 'i':
-			p.nr_memslot_modifications = atoi(optarg);
+			p.nr_memslot_modifications = atoi_paranoid(optarg);
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 44995446d942a..4bae9e3f5ca1e 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -885,21 +885,21 @@ static bool parse_args(int argc, char *argv[],
 			map_unmap_verify = true;
 			break;
 		case 's':
-			targs->nslots = atoi(optarg);
+			targs->nslots = atoi_paranoid(optarg);
 			if (targs->nslots <= 0 && targs->nslots != -1) {
 				pr_info("Slot count cap has to be positive or -1 for no cap\n");
 				return false;
 			}
 			break;
 		case 'f':
-			targs->tfirst = atoi(optarg);
+			targs->tfirst = atoi_paranoid(optarg);
 			if (targs->tfirst < 0) {
 				pr_info("First test to run has to be non-negative\n");
 				return false;
 			}
 			break;
 		case 'e':
-			targs->tlast = atoi(optarg);
+			targs->tlast = atoi_paranoid(optarg);
 			if (targs->tlast < 0 || targs->tlast >= NTESTS) {
 				pr_info("Last test to run has to be non-negative and less than %zu\n",
 					NTESTS);
@@ -907,14 +907,14 @@ static bool parse_args(int argc, char *argv[],
 			}
 			break;
 		case 'l':
-			targs->seconds = atoi(optarg);
+			targs->seconds = atoi_paranoid(optarg);
 			if (targs->seconds < 0) {
 				pr_info("Test length in seconds has to be non-negative\n");
 				return false;
 			}
 			break;
 		case 'r':
-			targs->runs = atoi(optarg);
+			targs->runs = atoi_paranoid(optarg);
 			if (targs->runs <= 0) {
 				pr_info("Runs per test has to be positive\n");
 				return false;
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 0d55f508d5953..c366949c8362c 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -407,7 +407,7 @@ int main(int argc, char *argv[])
 
 #ifdef __x86_64__
 	if (argc > 1)
-		loops = atoi(argv[1]);
+		loops = atoi_paranoid(argv[1]);
 	else
 		loops = 10;
 
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 59ffe7fd354f8..354b6902849c5 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -241,10 +241,10 @@ int main(int argc, char **argv)
 	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
 		switch (opt) {
 		case 'p':
-			reclaim_period_ms = atoi(optarg);
+			reclaim_period_ms = atoi_paranoid(optarg);
 			break;
 		case 't':
-			token = atoi(optarg);
+			token = atoi_paranoid(optarg);
 			break;
 		case 'r':
 			reboot_permissions = true;
-- 
GitLab


From 69a62e2004b8bc3f9572f88a592b168345a6bbf9 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:16 -0700
Subject: [PATCH 1596/4122] KVM: selftests: Use SZ_* macros from sizes.h in
 max_guest_memory_test.c

Replace size_1gb defined in max_guest_memory_test.c with the SZ_1G,
SZ_2G and SZ_4G from linux/sizes.h header file.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-5-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/max_guest_memory_test.c        | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 1595b73dc09a8..8056dc5831b52 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -11,6 +11,7 @@
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/atomic.h>
+#include <linux/sizes.h>
 
 #include "kvm_util.h"
 #include "test_util.h"
@@ -162,8 +163,7 @@ int main(int argc, char *argv[])
 	 * just below the 4gb boundary.  This test could create memory at
 	 * 1gb-3gb,but it's simpler to skip straight to 4gb.
 	 */
-	const uint64_t size_1gb = (1 << 30);
-	const uint64_t start_gpa = (4ull * size_1gb);
+	const uint64_t start_gpa = SZ_4G;
 	const int first_slot = 1;
 
 	struct timespec time_start, time_run1, time_reset, time_run2;
@@ -180,13 +180,13 @@ int main(int argc, char *argv[])
 	 * are quite common for x86, requires changing only max_mem (KVM allows
 	 * 32k memslots, 32k * 2gb == ~64tb of guest memory).
 	 */
-	slot_size = 2 * size_1gb;
+	slot_size = SZ_2G;
 
 	max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
 	TEST_ASSERT(max_slots > first_slot, "KVM is broken");
 
 	/* All KVM MMUs should be able to survive a 128gb guest. */
-	max_mem = 128 * size_1gb;
+	max_mem = 128ull * SZ_1G;
 
 	calc_default_nr_vcpus();
 
@@ -197,11 +197,11 @@ int main(int argc, char *argv[])
 			TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0");
 			break;
 		case 'm':
-			max_mem = atoi_paranoid(optarg) * size_1gb;
+			max_mem = 1ull * atoi_paranoid(optarg) * SZ_1G;
 			TEST_ASSERT(max_mem > 0, "memory size must be >0");
 			break;
 		case 's':
-			slot_size = atoi_paranoid(optarg) * size_1gb;
+			slot_size = 1ull * atoi_paranoid(optarg) * SZ_1G;
 			TEST_ASSERT(slot_size > 0, "slot size must be >0");
 			break;
 		case 'H':
@@ -245,7 +245,7 @@ int main(int argc, char *argv[])
 
 #ifdef __x86_64__
 		/* Identity map memory in the guest using 1gb pages. */
-		for (i = 0; i < slot_size; i += size_1gb)
+		for (i = 0; i < slot_size; i += SZ_1G)
 			__virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
 #else
 		for (i = 0; i < slot_size; i += vm->page_size)
@@ -260,7 +260,7 @@ int main(int argc, char *argv[])
 	vcpus = NULL;
 
 	pr_info("Running with %lugb of guest memory and %u vCPUs\n",
-		(gpa - start_gpa) / size_1gb, nr_vcpus);
+		(gpa - start_gpa) / SZ_1G, nr_vcpus);
 
 	rendezvous_with_vcpus(&time_start, "spawning");
 	rendezvous_with_vcpus(&time_run1, "run 1");
-- 
GitLab


From c15bdebb32ddc73faac5e5180d6997b360e81619 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:17 -0700
Subject: [PATCH 1597/4122] KVM: selftests: Shorten the test args in
 memslot_modification_stress_test.c

Change test args memslot_modification_delay and nr_memslot_modifications
to delay and nr_iterations for simplicity.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-6-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../kvm/memslot_modification_stress_test.c     | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 7d19a27d80d2d..3a67d3637f481 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -87,8 +87,8 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
 }
 
 struct test_params {
-	useconds_t memslot_modification_delay;
-	uint64_t nr_memslot_modifications;
+	useconds_t delay;
+	uint64_t nr_iterations;
 	bool partition_vcpu_memory_access;
 };
 
@@ -107,8 +107,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	pr_info("Started all vCPUs\n");
 
-	add_remove_memslot(vm, p->memslot_modification_delay,
-			   p->nr_memslot_modifications);
+	add_remove_memslot(vm, p->delay, p->nr_iterations);
 
 	run_vcpus = false;
 
@@ -144,9 +143,8 @@ int main(int argc, char *argv[])
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	int opt;
 	struct test_params p = {
-		.memslot_modification_delay = 0,
-		.nr_memslot_modifications =
-			DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
+		.delay = 0,
+		.nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
 		.partition_vcpu_memory_access = true
 	};
 
@@ -158,8 +156,8 @@ int main(int argc, char *argv[])
 			guest_modes_cmdline(optarg);
 			break;
 		case 'd':
-			p.memslot_modification_delay = atoi_paranoid(optarg);
-			TEST_ASSERT(p.memslot_modification_delay >= 0,
+			p.delay = atoi_paranoid(optarg);
+			TEST_ASSERT(p.delay >= 0,
 				    "A negative delay is not supported.");
 			break;
 		case 'b':
@@ -175,7 +173,7 @@ int main(int argc, char *argv[])
 			p.partition_vcpu_memory_access = false;
 			break;
 		case 'i':
-			p.nr_memslot_modifications = atoi_paranoid(optarg);
+			p.nr_iterations = atoi_paranoid(optarg);
 			break;
 		case 'h':
 		default:
-- 
GitLab


From 0001725d0f9b5d749540021befb67c117d566416 Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:18 -0700
Subject: [PATCH 1598/4122] KVM: selftests: Add atoi_positive() and
 atoi_non_negative() for input validation

Many KVM selftests take command line arguments which are supposed to be
positive (>0) or non-negative (>=0). Some tests do these validation and
some missed adding the check.

Add atoi_positive() and atoi_non_negative() to validate inputs in
selftests before proceeding to use those values.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-7-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/aarch64/arch_timer.c        | 25 ++++---------------
 .../selftests/kvm/aarch64/debug-exceptions.c  |  2 +-
 .../testing/selftests/kvm/aarch64/vgic_irq.c  |  2 +-
 .../selftests/kvm/access_tracking_perf_test.c |  2 +-
 .../selftests/kvm/demand_paging_test.c        |  4 +--
 .../selftests/kvm/dirty_log_perf_test.c       | 12 ++++-----
 .../testing/selftests/kvm/include/test_util.h | 16 ++++++++++++
 .../selftests/kvm/kvm_page_table_test.c       |  4 +--
 .../selftests/kvm/max_guest_memory_test.c     |  9 +++----
 .../kvm/memslot_modification_stress_test.c    | 10 +++-----
 .../testing/selftests/kvm/memslot_perf_test.c | 22 ++++------------
 .../selftests/kvm/set_memory_region_test.c    |  2 +-
 .../selftests/kvm/x86_64/nx_huge_pages_test.c |  3 +--
 13 files changed, 47 insertions(+), 66 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 251e7ff048831..9409617fce9c4 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -414,36 +414,21 @@ static bool parse_args(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
 		switch (opt) {
 		case 'n':
-			test_args.nr_vcpus = atoi_paranoid(optarg);
-			if (test_args.nr_vcpus <= 0) {
-				pr_info("Positive value needed for -n\n");
-				goto err;
-			} else if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+			test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+			if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
 				pr_info("Max allowed vCPUs: %u\n",
 					KVM_MAX_VCPUS);
 				goto err;
 			}
 			break;
 		case 'i':
-			test_args.nr_iter = atoi_paranoid(optarg);
-			if (test_args.nr_iter <= 0) {
-				pr_info("Positive value needed for -i\n");
-				goto err;
-			}
+			test_args.nr_iter = atoi_positive("Number of iterations", optarg);
 			break;
 		case 'p':
-			test_args.timer_period_ms = atoi_paranoid(optarg);
-			if (test_args.timer_period_ms <= 0) {
-				pr_info("Positive value needed for -p\n");
-				goto err;
-			}
+			test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
 			break;
 		case 'm':
-			test_args.migration_freq_ms = atoi_paranoid(optarg);
-			if (test_args.migration_freq_ms < 0) {
-				pr_info("0 or positive value needed for -m\n");
-				goto err;
-			}
+			test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 19fffdf19c9f9..878c334607e1a 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -423,7 +423,7 @@ int main(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "i:")) != -1) {
 		switch (opt) {
 		case 'i':
-			ss_iteration = atoi_paranoid(optarg);
+			ss_iteration = atoi_positive("Number of iterations", optarg);
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index ae90b718070aa..4ead42a072b7d 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -824,7 +824,7 @@ int main(int argc, char **argv)
 	while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
 		switch (opt) {
 		case 'n':
-			nr_irqs = atoi_paranoid(optarg);
+			nr_irqs = atoi_non_negative("Number of IRQs", optarg);
 			if (nr_irqs > 1024 || nr_irqs % 32)
 				help(argv[0]);
 			break;
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index c6bcc5301e2c2..a81e7a7ae18fe 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -368,7 +368,7 @@ int main(int argc, char *argv[])
 			params.vcpu_memory_bytes = parse_size(optarg);
 			break;
 		case 'v':
-			params.nr_vcpus = atoi_paranoid(optarg);
+			params.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
 			break;
 		case 'o':
 			overlap_memory_access = true;
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 82597fb04146f..0c98181fa2489 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -427,8 +427,8 @@ int main(int argc, char *argv[])
 			p.src_type = parse_backing_src_type(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi_paranoid(optarg);
-			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 'o':
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index ecda802b78ffa..4d639683b8efa 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -416,9 +416,7 @@ int main(int argc, char *argv[])
 			run_vcpus_while_disabling_dirty_logging = true;
 			break;
 		case 'f':
-			p.wr_fract = atoi_paranoid(optarg);
-			TEST_ASSERT(p.wr_fract >= 1,
-				    "Write fraction cannot be less than one");
+			p.wr_fract = atoi_positive("Write fraction", optarg);
 			break;
 		case 'g':
 			dirty_log_manual_caps = 0;
@@ -427,7 +425,7 @@ int main(int argc, char *argv[])
 			help(argv[0]);
 			break;
 		case 'i':
-			p.iterations = atoi_paranoid(optarg);
+			p.iterations = atoi_positive("Number of iterations", optarg);
 			break;
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -445,12 +443,12 @@ int main(int argc, char *argv[])
 			p.backing_src = parse_backing_src_type(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi_paranoid(optarg);
-			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 'x':
-			p.slots = atoi_paranoid(optarg);
+			p.slots = atoi_positive("Number of slots", optarg);
 			break;
 		default:
 			help(argv[0]);
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index feae428637599..3be98e81189a7 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -154,4 +154,20 @@ static inline void *align_ptr_up(void *x, size_t size)
 
 int atoi_paranoid(const char *num_str);
 
+static inline uint32_t atoi_positive(const char *name, const char *num_str)
+{
+	int num = atoi_paranoid(num_str);
+
+	TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str);
+	return num;
+}
+
+static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
+{
+	int num = atoi_paranoid(num_str);
+
+	TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str);
+	return num;
+}
+
 #endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index ea7feb69bb88a..696b366be06b4 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -461,8 +461,8 @@ int main(int argc, char *argv[])
 			p.test_mem_size = parse_size(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi_paranoid(optarg);
-			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
 		case 's':
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 8056dc5831b52..feaf2be20ff25 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -193,16 +193,13 @@ int main(int argc, char *argv[])
 	while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) {
 		switch (opt) {
 		case 'c':
-			nr_vcpus = atoi_paranoid(optarg);
-			TEST_ASSERT(nr_vcpus > 0, "number of vcpus must be >0");
+			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
 			break;
 		case 'm':
-			max_mem = 1ull * atoi_paranoid(optarg) * SZ_1G;
-			TEST_ASSERT(max_mem > 0, "memory size must be >0");
+			max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G;
 			break;
 		case 's':
-			slot_size = 1ull * atoi_paranoid(optarg) * SZ_1G;
-			TEST_ASSERT(slot_size > 0, "slot size must be >0");
+			slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G;
 			break;
 		case 'H':
 			hugepages = true;
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 3a67d3637f481..4bdfc910ba4d3 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -156,16 +156,14 @@ int main(int argc, char *argv[])
 			guest_modes_cmdline(optarg);
 			break;
 		case 'd':
-			p.delay = atoi_paranoid(optarg);
-			TEST_ASSERT(p.delay >= 0,
-				    "A negative delay is not supported.");
+			p.delay = atoi_non_negative("Delay", optarg);
 			break;
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
 		case 'v':
-			nr_vcpus = atoi_paranoid(optarg);
-			TEST_ASSERT(nr_vcpus > 0 && nr_vcpus <= max_vcpus,
+			nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d",
 				    max_vcpus);
 			break;
@@ -173,7 +171,7 @@ int main(int argc, char *argv[])
 			p.partition_vcpu_memory_access = false;
 			break;
 		case 'i':
-			p.nr_iterations = atoi_paranoid(optarg);
+			p.nr_iterations = atoi_positive("Number of iterations", optarg);
 			break;
 		case 'h':
 		default:
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 4bae9e3f5ca1e..330aaef1c02fb 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -892,33 +892,21 @@ static bool parse_args(int argc, char *argv[],
 			}
 			break;
 		case 'f':
-			targs->tfirst = atoi_paranoid(optarg);
-			if (targs->tfirst < 0) {
-				pr_info("First test to run has to be non-negative\n");
-				return false;
-			}
+			targs->tfirst = atoi_non_negative("First test", optarg);
 			break;
 		case 'e':
-			targs->tlast = atoi_paranoid(optarg);
-			if (targs->tlast < 0 || targs->tlast >= NTESTS) {
+			targs->tlast = atoi_non_negative("Last test", optarg);
+			if (targs->tlast >= NTESTS) {
 				pr_info("Last test to run has to be non-negative and less than %zu\n",
 					NTESTS);
 				return false;
 			}
 			break;
 		case 'l':
-			targs->seconds = atoi_paranoid(optarg);
-			if (targs->seconds < 0) {
-				pr_info("Test length in seconds has to be non-negative\n");
-				return false;
-			}
+			targs->seconds = atoi_non_negative("Test length", optarg);
 			break;
 		case 'r':
-			targs->runs = atoi_paranoid(optarg);
-			if (targs->runs <= 0) {
-				pr_info("Runs per test has to be positive\n");
-				return false;
-			}
+			targs->runs = atoi_positive("Runs per test", optarg);
 			break;
 		}
 	}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index c366949c8362c..85c16f09a50e1 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -407,7 +407,7 @@ int main(int argc, char *argv[])
 
 #ifdef __x86_64__
 	if (argc > 1)
-		loops = atoi_paranoid(argv[1]);
+		loops = atoi_positive("Number of iterations", argv[1]);
 	else
 		loops = 10;
 
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 354b6902849c5..ea0978f22db86 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -241,7 +241,7 @@ int main(int argc, char **argv)
 	while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
 		switch (opt) {
 		case 'p':
-			reclaim_period_ms = atoi_paranoid(optarg);
+			reclaim_period_ms = atoi_non_negative("Reclaim period", optarg);
 			break;
 		case 't':
 			token = atoi_paranoid(optarg);
@@ -257,7 +257,6 @@ int main(int argc, char **argv)
 	}
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
-	TEST_REQUIRE(reclaim_period_ms > 0);
 
 	__TEST_REQUIRE(token == MAGIC_TOKEN,
 		       "This test must be run with the magic token %d.\n"
-- 
GitLab


From d886724ea81c6a4dc5e37d4ee09287a31ab8335e Mon Sep 17 00:00:00 2001
From: Vipin Sharma <vipinsh@google.com>
Date: Thu, 3 Nov 2022 12:17:19 -0700
Subject: [PATCH 1599/4122] KVM: selftests: Allowing running
 dirty_log_perf_test on specific CPUs

Add a command line option, -c, to pin vCPUs to physical CPUs (pCPUs),
i.e.  to force vCPUs to run on specific pCPUs.

Requirement to implement this feature came in discussion on the patch
"Make page tables for eager page splitting NUMA aware"
https://lore.kernel.org/lkml/YuhPT2drgqL+osLl@google.com/

This feature is useful as it provides a way to analyze performance based
on the vCPUs and dirty log worker locations, like on the different NUMA
nodes or on the same NUMA nodes.

To keep things simple, implementation is intentionally very limited,
either all of the vCPUs will be pinned followed by an optional main
thread or nothing will be pinned.

Signed-off-by: Vipin Sharma <vipinsh@google.com>
Suggested-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221103191719.1559407-8-vipinsh@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/dirty_log_perf_test.c       | 25 ++++++++-
 .../selftests/kvm/include/kvm_util_base.h     |  4 ++
 .../selftests/kvm/include/perf_test_util.h    |  4 ++
 tools/testing/selftests/kvm/lib/kvm_util.c    | 54 +++++++++++++++++++
 .../selftests/kvm/lib/perf_test_util.c        |  8 ++-
 5 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 4d639683b8efa..0612158329aa0 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -353,7 +353,7 @@ static void help(char *name)
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
-	       "[-x memslots]\n", name);
+	       "[-x memslots] [-c physical cpus to run test on]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -383,6 +383,17 @@ static void help(char *name)
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
+	printf(" -c: Pin tasks to physical CPUs.  Takes a list of comma separated\n"
+	       "     values (target pCPU), one for each vCPU, plus an optional\n"
+	       "     entry for the main application task (specified via entry\n"
+	       "     <nr_vcpus + 1>).  If used, entries must be provided for all\n"
+	       "     vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+	       "     E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+	       "     vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+	       "         ./dirty_log_perf_test -v 3 -c 22,23,24,50\n\n"
+	       "     To leave the application task unpinned, drop the final entry:\n\n"
+	       "         ./dirty_log_perf_test -v 3 -c 22,23,24\n\n"
+	       "     (default: no pinning)\n");
 	puts("");
 	exit(0);
 }
@@ -390,6 +401,7 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+	const char *pcpu_list = NULL;
 	struct test_params p = {
 		.iterations = TEST_HOST_LOOP_N,
 		.wr_fract = 1,
@@ -406,11 +418,14 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "b:ef:ghi:m:nop:s:v:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:s:v:x:")) != -1) {
 		switch (opt) {
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
+		case 'c':
+			pcpu_list = optarg;
+			break;
 		case 'e':
 			/* 'e' is for evil. */
 			run_vcpus_while_disabling_dirty_logging = true;
@@ -456,6 +471,12 @@ int main(int argc, char *argv[])
 		}
 	}
 
+	if (pcpu_list) {
+		kvm_parse_vcpu_pinning(pcpu_list, perf_test_args.vcpu_to_pcpu,
+				       nr_vcpus);
+		perf_test_args.pin_vcpus = true;
+	}
+
 	TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
 
 	pr_info("Test iterations: %"PRIu64"\n",	p.iterations);
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index e42a09cd24a04..3bf2333ef95d2 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -688,6 +688,10 @@ static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
 
 struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
 
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+			    int nr_vcpus);
+
 unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
 unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
 unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index eaa88df0555a8..849c875dd0ffa 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -39,6 +39,10 @@ struct perf_test_args {
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
+	/* True if all vCPUs are pinned to pCPUs */
+	bool pin_vcpus;
+	/* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
+	uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
 
 	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index f1cb1627161fd..3b7710fb37840 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -11,6 +11,7 @@
 #include "processor.h"
 
 #include <assert.h>
+#include <sched.h>
 #include <sys/mman.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -443,6 +444,59 @@ struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
 	return vm_vcpu_recreate(vm, 0);
 }
 
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
+{
+	cpu_set_t mask;
+	int r;
+
+	CPU_ZERO(&mask);
+	CPU_SET(pcpu, &mask);
+	r = sched_setaffinity(0, sizeof(mask), &mask);
+	TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.\n", pcpu);
+}
+
+static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
+{
+	uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
+
+	TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
+		    "Not allowed to run on pCPU '%d', check cgroups?\n", pcpu);
+	return pcpu;
+}
+
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+			    int nr_vcpus)
+{
+	cpu_set_t allowed_mask;
+	char *cpu, *cpu_list;
+	char delim[2] = ",";
+	int i, r;
+
+	cpu_list = strdup(pcpus_string);
+	TEST_ASSERT(cpu_list, "strdup() allocation failed.\n");
+
+	r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
+	TEST_ASSERT(!r, "sched_getaffinity() failed");
+
+	cpu = strtok(cpu_list, delim);
+
+	/* 1. Get all pcpus for vcpus. */
+	for (i = 0; i < nr_vcpus; i++) {
+		TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'\n", i);
+		vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
+		cpu = strtok(NULL, delim);
+	}
+
+	/* 2. Check if the main worker needs to be pinned. */
+	if (cpu) {
+		kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+		cpu = strtok(NULL, delim);
+	}
+
+	TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
+	free(cpu_list);
+}
+
 /*
  * Userspace Memory Region Find
  *
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9618b37c66f7e..3a1d0a44419b0 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -2,6 +2,8 @@
 /*
  * Copyright (C) 2020, Google LLC.
  */
+#define _GNU_SOURCE
+
 #include <inttypes.h>
 
 #include "kvm_util.h"
@@ -243,6 +245,10 @@ void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_v
 static void *vcpu_thread_main(void *data)
 {
 	struct vcpu_thread *vcpu = data;
+	int vcpu_idx = vcpu->vcpu_idx;
+
+	if (perf_test_args.pin_vcpus)
+		kvm_pin_this_task_to_pcpu(perf_test_args.vcpu_to_pcpu[vcpu_idx]);
 
 	WRITE_ONCE(vcpu->running, true);
 
@@ -255,7 +261,7 @@ static void *vcpu_thread_main(void *data)
 	while (!READ_ONCE(all_vcpu_threads_running))
 		;
 
-	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu->vcpu_idx]);
+	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu_idx]);
 
 	return NULL;
 }
-- 
GitLab


From b31f21a7e97eee501db86714868d84377e68e4df Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Mon, 7 Nov 2022 18:22:05 +0000
Subject: [PATCH 1600/4122] KVM: selftests: implement random number generator
 for guest code

Implement random number generator for guest code to randomize parts
of the test, making it less predictable and a more accurate reflection
of reality.

The random number generator chosen is the Park-Miller Linear
Congruential Generator, a fancy name for a basic and well-understood
random number generator entirely sufficient for this purpose.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221107182208.479157-2-coltonlewis@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/include/test_util.h |  7 +++++++
 tools/testing/selftests/kvm/lib/test_util.c     | 17 +++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 3be98e81189a7..80d6416f3012e 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -77,6 +77,13 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
 struct timespec timespec_elapsed(struct timespec start);
 struct timespec timespec_div(struct timespec ts, int divisor);
 
+struct guest_random_state {
+	uint32_t seed;
+};
+
+struct guest_random_state new_guest_random_state(uint32_t seed);
+uint32_t guest_random_u32(struct guest_random_state *state);
+
 enum vm_mem_backing_src_type {
 	VM_MEM_SRC_ANONYMOUS,
 	VM_MEM_SRC_ANONYMOUS_THP,
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index c2d9c68277793..5c22fa4c2825e 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -17,6 +17,23 @@
 
 #include "test_util.h"
 
+/*
+ * Random number generator that is usable from guest code. This is the
+ * Park-Miller LCG using standard constants.
+ */
+
+struct guest_random_state new_guest_random_state(uint32_t seed)
+{
+	struct guest_random_state s = {.seed = seed};
+	return s;
+}
+
+uint32_t guest_random_u32(struct guest_random_state *state)
+{
+	state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1);
+	return state->seed;
+}
+
 /*
  * Parses "[0-9]+[kmgt]?".
  */
-- 
GitLab


From f11aa24bdbc66a10378d28ee962b95426e8d2a09 Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Mon, 7 Nov 2022 18:22:06 +0000
Subject: [PATCH 1601/4122] KVM: selftests: create -r argument to specify
 random seed

Create a -r argument to specify a random seed. If no argument is
provided, the seed defaults to 1. The random seed is set with
perf_test_set_random_seed() and must be set before guest_code runs to
apply.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221107182208.479157-3-coltonlewis@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c    | 12 ++++++++++--
 tools/testing/selftests/kvm/include/perf_test_util.h |  2 ++
 tools/testing/selftests/kvm/lib/perf_test_util.c     |  6 ++++++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 0612158329aa0..eb63ca12b519d 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -132,6 +132,7 @@ struct test_params {
 	bool partition_vcpu_memory_access;
 	enum vm_mem_backing_src_type backing_src;
 	int slots;
+	uint32_t random_seed;
 };
 
 static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
@@ -225,6 +226,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 				 p->slots, p->backing_src,
 				 p->partition_vcpu_memory_access);
 
+	pr_info("Random seed: %u\n", p->random_seed);
+	perf_test_set_random_seed(vm, p->random_seed);
 	perf_test_set_wr_fract(vm, p->wr_fract);
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
@@ -352,7 +355,7 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
-	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
 	       "[-x memslots] [-c physical cpus to run test on]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
@@ -380,6 +383,7 @@ static void help(char *name)
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
+	printf(" -r: specify the starting random seed.\n");
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
@@ -408,6 +412,7 @@ int main(int argc, char *argv[])
 		.partition_vcpu_memory_access = true,
 		.backing_src = DEFAULT_VM_MEM_SRC,
 		.slots = 1,
+		.random_seed = 1,
 	};
 	int opt;
 
@@ -418,7 +423,7 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:s:v:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:r:s:v:x:")) != -1) {
 		switch (opt) {
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
@@ -454,6 +459,9 @@ int main(int argc, char *argv[])
 		case 'p':
 			p.phys_offset = strtoull(optarg, NULL, 0);
 			break;
+		case 'r':
+			p.random_seed = atoi_positive("Random seed", optarg);
+			break;
 		case 's':
 			p.backing_src = parse_backing_src_type(optarg);
 			break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 849c875dd0ffa..5a0e48b625a20 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -35,6 +35,7 @@ struct perf_test_args {
 	uint64_t gpa;
 	uint64_t size;
 	uint64_t guest_page_size;
+	uint32_t random_seed;
 	int wr_fract;
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
@@ -56,6 +57,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 void perf_test_destroy_vm(struct kvm_vm *vm);
 
 void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
 
 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
 void perf_test_join_vcpu_threads(int vcpus);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 3a1d0a44419b0..d48ee4f604f0a 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -231,6 +231,12 @@ void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
 	sync_global_to_guest(vm, perf_test_args);
 }
 
+void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
+{
+	perf_test_args.random_seed = random_seed;
+	sync_global_to_guest(vm, perf_test_args.random_seed);
+}
+
 uint64_t __weak perf_test_nested_pages(int nr_vcpus)
 {
 	return 0;
-- 
GitLab


From 6864c6442f4dfa02c7cf48199cf3ea6bb1fe74ed Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Mon, 7 Nov 2022 18:22:07 +0000
Subject: [PATCH 1602/4122] KVM: selftests: randomize which pages are written
 vs read

Randomize which pages are written vs read using the random number
generator.

Change the variable wr_fract and associated function calls to
write_percent that now operates as a percentage from 0 to 100 where X
means each page has an X% chance of being written. Change the -f
argument to -w to reflect the new variable semantics. Keep the same
default of 100% writes.

Population always uses 100% writes to ensure all memory is actually
populated and not just mapped to the zero page. The prevents expensive
copy-on-write faults from occurring during the dirty memory iterations
below, which would pollute the performance results.

Each vCPU calculates its own random seed by adding its index to the
seed provided.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221107182208.479157-4-coltonlewis@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/access_tracking_perf_test.c |  2 +-
 .../selftests/kvm/dirty_log_perf_test.c       | 37 +++++++++++++------
 .../selftests/kvm/include/perf_test_util.h    |  4 +-
 .../selftests/kvm/lib/perf_test_util.c        | 13 ++++---
 4 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index a81e7a7ae18fe..c0cdf07de1471 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -279,7 +279,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti
 static void access_memory(struct kvm_vm *vm, int nr_vcpus,
 			  enum access_type access, const char *description)
 {
-	perf_test_set_wr_fract(vm, (access == ACCESS_READ) ? INT_MAX : 1);
+	perf_test_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
 	iteration_work = ITERATION_ACCESS_MEMORY;
 	run_iteration(vm, nr_vcpus, description);
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index eb63ca12b519d..e9ce50fe7295c 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -128,10 +128,10 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 struct test_params {
 	unsigned long iterations;
 	uint64_t phys_offset;
-	int wr_fract;
 	bool partition_vcpu_memory_access;
 	enum vm_mem_backing_src_type backing_src;
 	int slots;
+	uint32_t write_percent;
 	uint32_t random_seed;
 };
 
@@ -228,7 +228,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	pr_info("Random seed: %u\n", p->random_seed);
 	perf_test_set_random_seed(vm, p->random_seed);
-	perf_test_set_wr_fract(vm, p->wr_fract);
+	perf_test_set_write_percent(vm, p->write_percent);
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -251,6 +251,14 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	for (i = 0; i < nr_vcpus; i++)
 		vcpu_last_completed_iteration[i] = -1;
 
+	/*
+	 * Use 100% writes during the population phase to ensure all
+	 * memory is actually populated and not just mapped to the zero
+	 * page. The prevents expensive copy-on-write faults from
+	 * occurring during the dirty memory iterations below, which
+	 * would pollute the performance results.
+	 */
+	perf_test_set_write_percent(vm, 100);
 	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	/* Allow the vCPUs to populate memory */
@@ -272,6 +280,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
+	perf_test_set_write_percent(vm, p->write_percent);
+
 	while (iteration < p->iterations) {
 		/*
 		 * Incrementing the iteration number will start the vCPUs
@@ -356,7 +366,7 @@ static void help(char *name)
 	puts("");
 	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
-	       "[-x memslots] [-c physical cpus to run test on]\n", name);
+	       "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);
 	puts("");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
@@ -376,10 +386,6 @@ static void help(char *name)
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     dirtied by each vCPU. e.g. 10M or 3G.\n"
 	       "     (default: 1G)\n");
-	printf(" -f: specify the fraction of pages which should be written to\n"
-	       "     as opposed to simply read, in the form\n"
-	       "     1/<fraction of pages to write>.\n"
-	       "     (default: 1 i.e. all pages are written to.)\n");
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
@@ -387,6 +393,11 @@ static void help(char *name)
 	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
+	printf(" -w: specify the percentage of pages which should be written to\n"
+	       "     as an integer from 0-100 inclusive. This is probabalistic,\n"
+	       "     so -w X means each page has an X%% chance of writing\n"
+	       "     and a (100-X)%% chance of reading.\n"
+	       "     (default: 100 i.e. all pages are written to.)\n");
 	printf(" -c: Pin tasks to physical CPUs.  Takes a list of comma separated\n"
 	       "     values (target pCPU), one for each vCPU, plus an optional\n"
 	       "     entry for the main application task (specified via entry\n"
@@ -408,11 +419,11 @@ int main(int argc, char *argv[])
 	const char *pcpu_list = NULL;
 	struct test_params p = {
 		.iterations = TEST_HOST_LOOP_N,
-		.wr_fract = 1,
 		.partition_vcpu_memory_access = true,
 		.backing_src = DEFAULT_VM_MEM_SRC,
 		.slots = 1,
 		.random_seed = 1,
+		.write_percent = 100,
 	};
 	int opt;
 
@@ -423,7 +434,7 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "b:c:ef:ghi:m:nop:r:s:v:x:")) != -1) {
+	while ((opt = getopt(argc, argv, "b:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
 		switch (opt) {
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
@@ -435,9 +446,6 @@ int main(int argc, char *argv[])
 			/* 'e' is for evil. */
 			run_vcpus_while_disabling_dirty_logging = true;
 			break;
-		case 'f':
-			p.wr_fract = atoi_positive("Write fraction", optarg);
-			break;
 		case 'g':
 			dirty_log_manual_caps = 0;
 			break;
@@ -470,6 +478,11 @@ int main(int argc, char *argv[])
 			TEST_ASSERT(nr_vcpus <= max_vcpus,
 				    "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
 			break;
+		case 'w':
+			p.write_percent = atoi_non_negative("Write percentage", optarg);
+			TEST_ASSERT(p.write_percent <= 100,
+				    "Write percentage must be between 0 and 100");
+			break;
 		case 'x':
 			p.slots = atoi_positive("Number of slots", optarg);
 			break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 5a0e48b625a20..6470ca0fec4c0 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -36,7 +36,7 @@ struct perf_test_args {
 	uint64_t size;
 	uint64_t guest_page_size;
 	uint32_t random_seed;
-	int wr_fract;
+	uint32_t write_percent;
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
@@ -56,7 +56,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 				   bool partition_vcpu_memory_access);
 void perf_test_destroy_vm(struct kvm_vm *vm);
 
-void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract);
+void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
 void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
 
 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index d48ee4f604f0a..15000f71cdeea 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -48,10 +48,13 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 {
 	struct perf_test_args *pta = &perf_test_args;
 	struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx];
+	struct guest_random_state rand_state;
 	uint64_t gva;
 	uint64_t pages;
 	int i;
 
+	rand_state = new_guest_random_state(pta->random_seed + vcpu_idx);
+
 	gva = vcpu_args->gva;
 	pages = vcpu_args->pages;
 
@@ -62,7 +65,7 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 		for (i = 0; i < pages; i++) {
 			uint64_t addr = gva + (i * pta->guest_page_size);
 
-			if (i % pta->wr_fract == 0)
+			if (guest_random_u32(&rand_state) % 100 < pta->write_percent)
 				*(uint64_t *)addr = 0x0123456789ABCDEF;
 			else
 				READ_ONCE(*(uint64_t *)addr);
@@ -123,7 +126,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
 	/* By default vCPUs will write to memory. */
-	pta->wr_fract = 1;
+	pta->write_percent = 100;
 
 	/*
 	 * Snapshot the non-huge page size.  This is used by the guest code to
@@ -225,10 +228,10 @@ void perf_test_destroy_vm(struct kvm_vm *vm)
 	kvm_vm_free(vm);
 }
 
-void perf_test_set_wr_fract(struct kvm_vm *vm, int wr_fract)
+void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
 {
-	perf_test_args.wr_fract = wr_fract;
-	sync_global_to_guest(vm, perf_test_args);
+	perf_test_args.write_percent = write_percent;
+	sync_global_to_guest(vm, perf_test_args.write_percent);
 }
 
 void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
-- 
GitLab


From c967a4752ac66cc0ef8c0b1f4914151ca8758709 Mon Sep 17 00:00:00 2001
From: Colton Lewis <coltonlewis@google.com>
Date: Mon, 7 Nov 2022 18:22:08 +0000
Subject: [PATCH 1603/4122] KVM: selftests: randomize page access order

Create the ability to randomize page access order with the -a
argument. This includes the possibility that the same pages may be hit
multiple times during an iteration or not at all.

Population has random access as false to ensure all pages will be
touched by population and avoid page faults in late dirty memory that
would pollute the test results.

Signed-off-by: Colton Lewis <coltonlewis@google.com>
Reviewed-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221107182208.479157-5-coltonlewis@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 11 +++++++++--
 .../selftests/kvm/include/perf_test_util.h        |  3 +++
 tools/testing/selftests/kvm/lib/perf_test_util.c  | 15 ++++++++++++++-
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index e9ce50fe7295c..47cbda3580fd6 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -133,6 +133,7 @@ struct test_params {
 	int slots;
 	uint32_t write_percent;
 	uint32_t random_seed;
+	bool random_access;
 };
 
 static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
@@ -259,6 +260,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	 * would pollute the performance results.
 	 */
 	perf_test_set_write_percent(vm, 100);
+	perf_test_set_random_access(vm, false);
 	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	/* Allow the vCPUs to populate memory */
@@ -281,6 +283,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
 	perf_test_set_write_percent(vm, p->write_percent);
+	perf_test_set_random_access(vm, p->random_access);
 
 	while (iteration < p->iterations) {
 		/*
@@ -364,10 +367,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 static void help(char *name)
 {
 	puts("");
-	printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
+	printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "
 	       "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
 	       "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);
 	puts("");
+	printf(" -a: access memory randomly rather than in order.\n");
 	printf(" -i: specify iteration counts (default: %"PRIu64")\n",
 	       TEST_HOST_LOOP_N);
 	printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
@@ -434,8 +438,11 @@ int main(int argc, char *argv[])
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "b:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
+	while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
 		switch (opt) {
+		case 'a':
+			p.random_access = true;
+			break;
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index 6470ca0fec4c0..75ca679059dc2 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -40,6 +40,8 @@ struct perf_test_args {
 
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
+	/* Randomize which pages are accessed by the guest. */
+	bool random_access;
 	/* True if all vCPUs are pinned to pCPUs */
 	bool pin_vcpus;
 	/* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
@@ -58,6 +60,7 @@ void perf_test_destroy_vm(struct kvm_vm *vm);
 
 void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
 void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
+void perf_test_set_random_access(struct kvm_vm *vm, bool random_access);
 
 void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
 void perf_test_join_vcpu_threads(int vcpus);
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 15000f71cdeea..3a9a3ea01a973 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -51,6 +51,8 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 	struct guest_random_state rand_state;
 	uint64_t gva;
 	uint64_t pages;
+	uint64_t addr;
+	uint64_t page;
 	int i;
 
 	rand_state = new_guest_random_state(pta->random_seed + vcpu_idx);
@@ -63,7 +65,12 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 
 	while (true) {
 		for (i = 0; i < pages; i++) {
-			uint64_t addr = gva + (i * pta->guest_page_size);
+			if (pta->random_access)
+				page = guest_random_u32(&rand_state) % pages;
+			else
+				page = i;
+
+			addr = gva + (page * pta->guest_page_size);
 
 			if (guest_random_u32(&rand_state) % 100 < pta->write_percent)
 				*(uint64_t *)addr = 0x0123456789ABCDEF;
@@ -240,6 +247,12 @@ void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
 	sync_global_to_guest(vm, perf_test_args.random_seed);
 }
 
+void perf_test_set_random_access(struct kvm_vm *vm, bool random_access)
+{
+	perf_test_args.random_access = random_access;
+	sync_global_to_guest(vm, perf_test_args.random_access);
+}
+
 uint64_t __weak perf_test_nested_pages(int nr_vcpus)
 {
 	return 0;
-- 
GitLab


From 9fda6753c9dd4594e2c66c56b48b56a326ee686f Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 12 Oct 2022 09:57:27 -0700
Subject: [PATCH 1604/4122] KVM: selftests: Rename perf_test_util.[ch] to
 memstress.[ch]

Rename the perf_test_util.[ch] files to memstress.[ch]. Symbols are
renamed in the following commit to reduce the amount of churn here in
hopes of playiing nice with git's file rename detection.

The name "memstress" was chosen to better describe the functionality
proveded by this library, which is to create and run a VM that
reads/writes to guest memory on all vCPUs in parallel.

"memstress" also contains the same number of chracters as "perf_test",
making it a drop-in replacement in symbols, e.g. function names, without
impacting line lengths. Also the lack of underscore between "mem" and
"stress" makes it clear "memstress" is a noun.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221012165729.3505266-2-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/Makefile                      | 4 ++--
 tools/testing/selftests/kvm/access_tracking_perf_test.c   | 2 +-
 tools/testing/selftests/kvm/demand_paging_test.c          | 2 +-
 tools/testing/selftests/kvm/dirty_log_perf_test.c         | 2 +-
 .../kvm/include/{perf_test_util.h => memstress.h}         | 8 ++++----
 .../selftests/kvm/lib/{perf_test_util.c => memstress.c}   | 2 +-
 .../kvm/lib/x86_64/{perf_test_util.c => memstress.c}      | 4 ++--
 .../selftests/kvm/memslot_modification_stress_test.c      | 4 ++--
 8 files changed, 14 insertions(+), 14 deletions(-)
 rename tools/testing/selftests/kvm/include/{perf_test_util.h => memstress.h} (91%)
 rename tools/testing/selftests/kvm/lib/{perf_test_util.c => memstress.c} (99%)
 rename tools/testing/selftests/kvm/lib/x86_64/{perf_test_util.c => memstress.c} (97%)

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0172eb6cb6eee..a00253b79040b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -43,7 +43,7 @@ LIBKVM += lib/elf.c
 LIBKVM += lib/guest_modes.c
 LIBKVM += lib/io.c
 LIBKVM += lib/kvm_util.c
-LIBKVM += lib/perf_test_util.c
+LIBKVM += lib/memstress.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
@@ -52,7 +52,7 @@ LIBKVM_STRING += lib/string_override.c
 
 LIBKVM_x86_64 += lib/x86_64/apic.c
 LIBKVM_x86_64 += lib/x86_64/handlers.S
-LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
+LIBKVM_x86_64 += lib/x86_64/memstress.c
 LIBKVM_x86_64 += lib/x86_64/processor.c
 LIBKVM_x86_64 += lib/x86_64/svm.c
 LIBKVM_x86_64 += lib/x86_64/ucall.c
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index c0cdf07de1471..534d18cc4a6af 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -44,7 +44,7 @@
 
 #include "kvm_util.h"
 #include "test_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "guest_modes.h"
 
 /* Global variable used to synchronize all of the vCPU threads. */
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 0c98181fa2489..37501e83d1d87 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -20,7 +20,7 @@
 
 #include "kvm_util.h"
 #include "test_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "guest_modes.h"
 
 #ifdef __NR_userfaultfd
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 47cbda3580fd6..d2bac493da5dd 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -16,7 +16,7 @@
 
 #include "kvm_util.h"
 #include "test_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "guest_modes.h"
 
 #ifdef __aarch64__
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/memstress.h
similarity index 91%
rename from tools/testing/selftests/kvm/include/perf_test_util.h
rename to tools/testing/selftests/kvm/include/memstress.h
index 75ca679059dc2..64a523e061259 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -1,12 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * tools/testing/selftests/kvm/include/perf_test_util.h
+ * tools/testing/selftests/kvm/include/memstress.h
  *
  * Copyright (C) 2020, Google LLC.
  */
 
-#ifndef SELFTEST_KVM_PERF_TEST_UTIL_H
-#define SELFTEST_KVM_PERF_TEST_UTIL_H
+#ifndef SELFTEST_KVM_MEMSTRESS_H
+#define SELFTEST_KVM_MEMSTRESS_H
 
 #include <pthread.h>
 
@@ -69,4 +69,4 @@ void perf_test_guest_code(uint32_t vcpu_id);
 uint64_t perf_test_nested_pages(int nr_vcpus);
 void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
 
-#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
+#endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/memstress.c
similarity index 99%
rename from tools/testing/selftests/kvm/lib/perf_test_util.c
rename to tools/testing/selftests/kvm/lib/memstress.c
index 3a9a3ea01a973..72f88e5851dd6 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -7,7 +7,7 @@
 #include <inttypes.h>
 
 #include "kvm_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "processor.h"
 
 struct perf_test_args perf_test_args;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
similarity index 97%
rename from tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
rename to tools/testing/selftests/kvm/lib/x86_64/memstress.c
index 0f344a7c89c4b..0bb717ac2cc57 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /*
- * x86_64-specific extensions to perf_test_util.c.
+ * x86_64-specific extensions to memstress.c.
  *
  * Copyright (C) 2022, Google, Inc.
  */
@@ -11,7 +11,7 @@
 
 #include "test_util.h"
 #include "kvm_util.h"
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "processor.h"
 #include "vmx.h"
 
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 4bdfc910ba4d3..0490bd4606e50 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -21,7 +21,7 @@
 #include <linux/bitops.h>
 #include <linux/userfaultfd.h>
 
-#include "perf_test_util.h"
+#include "memstress.h"
 #include "processor.h"
 #include "test_util.h"
 #include "guest_modes.h"
@@ -72,7 +72,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
 	int i;
 
 	/*
-	 * Add the dummy memslot just below the perf_test_util memslot, which is
+	 * Add the dummy memslot just below the memstress memslot, which is
 	 * at the top of the guest physical address space.
 	 */
 	gpa = perf_test_args.gpa - pages * vm->page_size;
-- 
GitLab


From a008a3351feaffdcf97a2bcf90b789626585258b Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 12 Oct 2022 09:57:28 -0700
Subject: [PATCH 1605/4122] KVM: selftests: Rename pta (short for
 perf_test_args) to args

Rename the local variables "pta" (which is short for perf_test_args) for
args. "pta" is not an obvious acronym and using "args" mirrors
"vcpu_args".

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221012165729.3505266-3-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/lib/memstress.c | 60 ++++++++++-----------
 1 file changed, 30 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 72f88e5851dd6..255f77d863304 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -46,8 +46,8 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
  */
 void perf_test_guest_code(uint32_t vcpu_idx)
 {
-	struct perf_test_args *pta = &perf_test_args;
-	struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_idx];
+	struct perf_test_args *args = &perf_test_args;
+	struct perf_test_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
 	struct guest_random_state rand_state;
 	uint64_t gva;
 	uint64_t pages;
@@ -55,7 +55,7 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 	uint64_t page;
 	int i;
 
-	rand_state = new_guest_random_state(pta->random_seed + vcpu_idx);
+	rand_state = new_guest_random_state(args->random_seed + vcpu_idx);
 
 	gva = vcpu_args->gva;
 	pages = vcpu_args->pages;
@@ -65,14 +65,14 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 
 	while (true) {
 		for (i = 0; i < pages; i++) {
-			if (pta->random_access)
+			if (args->random_access)
 				page = guest_random_u32(&rand_state) % pages;
 			else
 				page = i;
 
-			addr = gva + (page * pta->guest_page_size);
+			addr = gva + (page * args->guest_page_size);
 
-			if (guest_random_u32(&rand_state) % 100 < pta->write_percent)
+			if (guest_random_u32(&rand_state) % 100 < args->write_percent)
 				*(uint64_t *)addr = 0x0123456789ABCDEF;
 			else
 				READ_ONCE(*(uint64_t *)addr);
@@ -87,12 +87,12 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
 			   uint64_t vcpu_memory_bytes,
 			   bool partition_vcpu_memory_access)
 {
-	struct perf_test_args *pta = &perf_test_args;
+	struct perf_test_args *args = &perf_test_args;
 	struct perf_test_vcpu_args *vcpu_args;
 	int i;
 
 	for (i = 0; i < nr_vcpus; i++) {
-		vcpu_args = &pta->vcpu_args[i];
+		vcpu_args = &args->vcpu_args[i];
 
 		vcpu_args->vcpu = vcpus[i];
 		vcpu_args->vcpu_idx = i;
@@ -101,20 +101,20 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
 			vcpu_args->gva = guest_test_virt_mem +
 					 (i * vcpu_memory_bytes);
 			vcpu_args->pages = vcpu_memory_bytes /
-					   pta->guest_page_size;
-			vcpu_args->gpa = pta->gpa + (i * vcpu_memory_bytes);
+					   args->guest_page_size;
+			vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes);
 		} else {
 			vcpu_args->gva = guest_test_virt_mem;
 			vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) /
-					   pta->guest_page_size;
-			vcpu_args->gpa = pta->gpa;
+					   args->guest_page_size;
+			vcpu_args->gpa = args->gpa;
 		}
 
 		vcpu_args_set(vcpus[i], 1, i);
 
 		pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
 			 i, vcpu_args->gpa, vcpu_args->gpa +
-			 (vcpu_args->pages * pta->guest_page_size));
+			 (vcpu_args->pages * args->guest_page_size));
 	}
 }
 
@@ -123,7 +123,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 				   enum vm_mem_backing_src_type backing_src,
 				   bool partition_vcpu_memory_access)
 {
-	struct perf_test_args *pta = &perf_test_args;
+	struct perf_test_args *args = &perf_test_args;
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages, slot0_pages = 0;
 	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
@@ -133,20 +133,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
 
 	/* By default vCPUs will write to memory. */
-	pta->write_percent = 100;
+	args->write_percent = 100;
 
 	/*
 	 * Snapshot the non-huge page size.  This is used by the guest code to
 	 * access/dirty pages at the logging granularity.
 	 */
-	pta->guest_page_size = vm_guest_mode_params[mode].page_size;
+	args->guest_page_size = vm_guest_mode_params[mode].page_size;
 
 	guest_num_pages = vm_adjust_num_guest_pages(mode,
-				(nr_vcpus * vcpu_memory_bytes) / pta->guest_page_size);
+				(nr_vcpus * vcpu_memory_bytes) / args->guest_page_size);
 
 	TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
 		    "Guest memory size is not host page size aligned.");
-	TEST_ASSERT(vcpu_memory_bytes % pta->guest_page_size == 0,
+	TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0,
 		    "Guest memory size is not guest page size aligned.");
 	TEST_ASSERT(guest_num_pages % slots == 0,
 		    "Guest memory cannot be evenly divided into %d slots.",
@@ -156,7 +156,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	 * If using nested, allocate extra pages for the nested page tables and
 	 * in-memory data structures.
 	 */
-	if (pta->nested)
+	if (args->nested)
 		slot0_pages += perf_test_nested_pages(nr_vcpus);
 
 	/*
@@ -167,7 +167,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages,
 				    perf_test_guest_code, vcpus);
 
-	pta->vm = vm;
+	args->vm = vm;
 
 	/* Put the test region at the top guest physical memory. */
 	region_end_gfn = vm->max_gfn + 1;
@@ -177,8 +177,8 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	 * When running vCPUs in L2, restrict the test region to 48 bits to
 	 * avoid needing 5-level page tables to identity map L2.
 	 */
-	if (pta->nested)
-		region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
+	if (args->nested)
+		region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size);
 #endif
 	/*
 	 * If there should be more memory in the guest test region than there
@@ -190,20 +190,20 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 		    " nr_vcpus: %d wss: %" PRIx64 "]\n",
 		    guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
 
-	pta->gpa = (region_end_gfn - guest_num_pages - 1) * pta->guest_page_size;
-	pta->gpa = align_down(pta->gpa, backing_src_pagesz);
+	args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
+	args->gpa = align_down(args->gpa, backing_src_pagesz);
 #ifdef __s390x__
 	/* Align to 1M (segment size) */
-	pta->gpa = align_down(pta->gpa, 1 << 20);
+	args->gpa = align_down(args->gpa, 1 << 20);
 #endif
-	pta->size = guest_num_pages * pta->guest_page_size;
+	args->size = guest_num_pages * args->guest_page_size;
 	pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
-		pta->gpa, pta->gpa + pta->size);
+		args->gpa, args->gpa + args->size);
 
 	/* Add extra memory slots for testing */
 	for (i = 0; i < slots; i++) {
 		uint64_t region_pages = guest_num_pages / slots;
-		vm_paddr_t region_start = pta->gpa + region_pages * pta->guest_page_size * i;
+		vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
 
 		vm_userspace_mem_region_add(vm, backing_src, region_start,
 					    PERF_TEST_MEM_SLOT_INDEX + i,
@@ -211,12 +211,12 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	}
 
 	/* Do mapping for the demand paging memory slot */
-	virt_map(vm, guest_test_virt_mem, pta->gpa, guest_num_pages);
+	virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages);
 
 	perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
 			      partition_vcpu_memory_access);
 
-	if (pta->nested) {
+	if (args->nested) {
 		pr_info("Configuring vCPUs to run in L2 (nested).\n");
 		perf_test_setup_nested(vm, nr_vcpus, vcpus);
 	}
-- 
GitLab


From 7812d80c0f89c2b610558e09647736b6632beb08 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 12 Oct 2022 09:57:29 -0700
Subject: [PATCH 1606/4122] KVM: selftests: Rename perf_test_util symbols to
 memstress

Replace the perf_test_ prefix on symbol names with memstress_ to match
the new file name.

"memstress" better describes the functionality proveded by this library,
which is to provide functionality for creating and running a VM that
stresses VM memory by reading and writing to guest memory on all vCPUs
in parallel.

"memstress" also contains the same number of chracters as "perf_test",
making it a drop-in replacement in symbols, e.g. function names, without
impacting line lengths. Also the lack of underscore between "mem" and
"stress" makes it clear "memstress" is a noun.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221012165729.3505266-4-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/access_tracking_perf_test.c | 18 ++---
 .../selftests/kvm/demand_paging_test.c        | 18 ++---
 .../selftests/kvm/dirty_log_perf_test.c       | 34 +++++-----
 .../testing/selftests/kvm/include/memstress.h | 30 ++++----
 tools/testing/selftests/kvm/lib/memstress.c   | 68 +++++++++----------
 .../selftests/kvm/lib/x86_64/memstress.c      | 32 ++++-----
 .../kvm/memslot_modification_stress_test.c    | 12 ++--
 7 files changed, 106 insertions(+), 106 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 534d18cc4a6af..02d3587cab0a3 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -126,7 +126,7 @@ static void mark_page_idle(int page_idle_fd, uint64_t pfn)
 }
 
 static void mark_vcpu_memory_idle(struct kvm_vm *vm,
-				  struct perf_test_vcpu_args *vcpu_args)
+				  struct memstress_vcpu_args *vcpu_args)
 {
 	int vcpu_idx = vcpu_args->vcpu_idx;
 	uint64_t base_gva = vcpu_args->gva;
@@ -148,7 +148,7 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
 	TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
 
 	for (page = 0; page < pages; page++) {
-		uint64_t gva = base_gva + page * perf_test_args.guest_page_size;
+		uint64_t gva = base_gva + page * memstress_args.guest_page_size;
 		uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
 
 		if (!pfn) {
@@ -220,10 +220,10 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
 	return true;
 }
 
-static void vcpu_thread_main(struct perf_test_vcpu_args *vcpu_args)
+static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
 {
 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
-	struct kvm_vm *vm = perf_test_args.vm;
+	struct kvm_vm *vm = memstress_args.vm;
 	int vcpu_idx = vcpu_args->vcpu_idx;
 	int current_iteration = 0;
 
@@ -279,7 +279,7 @@ static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *descripti
 static void access_memory(struct kvm_vm *vm, int nr_vcpus,
 			  enum access_type access, const char *description)
 {
-	perf_test_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
+	memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
 	iteration_work = ITERATION_ACCESS_MEMORY;
 	run_iteration(vm, nr_vcpus, description);
 }
@@ -303,10 +303,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_vm *vm;
 	int nr_vcpus = params->nr_vcpus;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
+	vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
 				 params->backing_src, !overlap_memory_access);
 
-	perf_test_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
+	memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
 
 	pr_info("\n");
 	access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
@@ -324,8 +324,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	/* Set done to signal the vCPU threads to exit */
 	done = true;
 
-	perf_test_join_vcpu_threads(nr_vcpus);
-	perf_test_destroy_vm(vm);
+	memstress_join_vcpu_threads(nr_vcpus);
+	memstress_destroy_vm(vm);
 }
 
 static void help(char *name)
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 37501e83d1d87..3a977ddf07b20 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -42,7 +42,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 static size_t demand_paging_size;
 static char *guest_data_prototype;
 
-static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 {
 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
 	int vcpu_idx = vcpu_args->vcpu_idx;
@@ -285,7 +285,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct kvm_vm *vm;
 	int r, i;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
 				 p->src_type, p->partition_vcpu_memory_access);
 
 	demand_paging_size = get_backing_src_pagesz(p->src_type);
@@ -307,11 +307,11 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 		TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
 
 		for (i = 0; i < nr_vcpus; i++) {
-			struct perf_test_vcpu_args *vcpu_args;
+			struct memstress_vcpu_args *vcpu_args;
 			void *vcpu_hva;
 			void *vcpu_alias;
 
-			vcpu_args = &perf_test_args.vcpu_args[i];
+			vcpu_args = &memstress_args.vcpu_args[i];
 
 			/* Cache the host addresses of the region */
 			vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
@@ -329,17 +329,17 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 					    pipefds[i * 2], p->uffd_mode,
 					    p->uffd_delay, &uffd_args[i],
 					    vcpu_hva, vcpu_alias,
-					    vcpu_args->pages * perf_test_args.guest_page_size);
+					    vcpu_args->pages * memstress_args.guest_page_size);
 		}
 	}
 
 	pr_info("Finished creating vCPUs and starting uffd threads\n");
 
 	clock_gettime(CLOCK_MONOTONIC, &start);
-	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
 	pr_info("Started all vCPUs\n");
 
-	perf_test_join_vcpu_threads(nr_vcpus);
+	memstress_join_vcpu_threads(nr_vcpus);
 	ts_diff = timespec_elapsed(start);
 	pr_info("All vCPU threads joined\n");
 
@@ -358,10 +358,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("Total guest execution time: %ld.%.9lds\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 	pr_info("Overall demand paging rate: %f pgs/sec\n",
-		perf_test_args.vcpu_args[0].pages * nr_vcpus /
+		memstress_args.vcpu_args[0].pages * nr_vcpus /
 		((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
 
-	perf_test_destroy_vm(vm);
+	memstress_destroy_vm(vm);
 
 	free(guest_data_prototype);
 	if (p->uffd_mode) {
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index d2bac493da5dd..c33e89012ae6f 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -67,7 +67,7 @@ static bool host_quit;
 static int iteration;
 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
-static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 {
 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
 	int vcpu_idx = vcpu_args->vcpu_idx;
@@ -141,7 +141,7 @@ static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
 	int i;
 
 	for (i = 0; i < slots; i++) {
-		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
 		int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
 
 		vm_mem_region_set_flags(vm, slot, flags);
@@ -163,7 +163,7 @@ static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots
 	int i;
 
 	for (i = 0; i < slots; i++) {
-		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
 
 		kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
 	}
@@ -175,7 +175,7 @@ static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
 	int i;
 
 	for (i = 0; i < slots; i++) {
-		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
+		int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
 
 		kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
 	}
@@ -223,13 +223,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct timespec clear_dirty_log_total = (struct timespec){0};
 	int i;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
 				 p->slots, p->backing_src,
 				 p->partition_vcpu_memory_access);
 
 	pr_info("Random seed: %u\n", p->random_seed);
-	perf_test_set_random_seed(vm, p->random_seed);
-	perf_test_set_write_percent(vm, p->write_percent);
+	memstress_set_random_seed(vm, p->random_seed);
+	memstress_set_write_percent(vm, p->write_percent);
 
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
@@ -259,9 +259,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	 * occurring during the dirty memory iterations below, which
 	 * would pollute the performance results.
 	 */
-	perf_test_set_write_percent(vm, 100);
-	perf_test_set_random_access(vm, false);
-	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+	memstress_set_write_percent(vm, 100);
+	memstress_set_random_access(vm, false);
+	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	/* Allow the vCPUs to populate memory */
 	pr_debug("Starting iteration %d - Populating\n", iteration);
@@ -282,8 +282,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
 		ts_diff.tv_sec, ts_diff.tv_nsec);
 
-	perf_test_set_write_percent(vm, p->write_percent);
-	perf_test_set_random_access(vm, p->random_access);
+	memstress_set_write_percent(vm, p->write_percent);
+	memstress_set_random_access(vm, p->random_access);
 
 	while (iteration < p->iterations) {
 		/*
@@ -345,7 +345,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	 * wait for them to exit.
 	 */
 	host_quit = true;
-	perf_test_join_vcpu_threads(nr_vcpus);
+	memstress_join_vcpu_threads(nr_vcpus);
 
 	avg = timespec_div(get_dirty_log_total, p->iterations);
 	pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
@@ -361,7 +361,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	free_bitmaps(bitmaps, p->slots);
 	arch_cleanup_vm(vm);
-	perf_test_destroy_vm(vm);
+	memstress_destroy_vm(vm);
 }
 
 static void help(char *name)
@@ -466,7 +466,7 @@ int main(int argc, char *argv[])
 			guest_modes_cmdline(optarg);
 			break;
 		case 'n':
-			perf_test_args.nested = true;
+			memstress_args.nested = true;
 			break;
 		case 'o':
 			p.partition_vcpu_memory_access = false;
@@ -500,9 +500,9 @@ int main(int argc, char *argv[])
 	}
 
 	if (pcpu_list) {
-		kvm_parse_vcpu_pinning(pcpu_list, perf_test_args.vcpu_to_pcpu,
+		kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu,
 				       nr_vcpus);
-		perf_test_args.pin_vcpus = true;
+		memstress_args.pin_vcpus = true;
 	}
 
 	TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index 64a523e061259..bbd2a302df100 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -17,9 +17,9 @@
 
 #define DEFAULT_PER_VCPU_MEM_SIZE	(1 << 30) /* 1G */
 
-#define PERF_TEST_MEM_SLOT_INDEX	1
+#define MEMSTRESS_MEM_SLOT_INDEX	1
 
-struct perf_test_vcpu_args {
+struct memstress_vcpu_args {
 	uint64_t gpa;
 	uint64_t gva;
 	uint64_t pages;
@@ -29,7 +29,7 @@ struct perf_test_vcpu_args {
 	int vcpu_idx;
 };
 
-struct perf_test_args {
+struct memstress_args {
 	struct kvm_vm *vm;
 	/* The starting address and size of the guest test region. */
 	uint64_t gpa;
@@ -47,26 +47,26 @@ struct perf_test_args {
 	/* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
 	uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
 
-	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
+	struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
 
-extern struct perf_test_args perf_test_args;
+extern struct memstress_args memstress_args;
 
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 				   uint64_t vcpu_memory_bytes, int slots,
 				   enum vm_mem_backing_src_type backing_src,
 				   bool partition_vcpu_memory_access);
-void perf_test_destroy_vm(struct kvm_vm *vm);
+void memstress_destroy_vm(struct kvm_vm *vm);
 
-void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
-void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
-void perf_test_set_random_access(struct kvm_vm *vm, bool random_access);
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
+void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
 
-void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
-void perf_test_join_vcpu_threads(int vcpus);
-void perf_test_guest_code(uint32_t vcpu_id);
+void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
+void memstress_join_vcpu_threads(int vcpus);
+void memstress_guest_code(uint32_t vcpu_id);
 
-uint64_t perf_test_nested_pages(int nr_vcpus);
-void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
+uint64_t memstress_nested_pages(int nr_vcpus);
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
 
 #endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 255f77d863304..503da78c558dd 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -10,7 +10,7 @@
 #include "memstress.h"
 #include "processor.h"
 
-struct perf_test_args perf_test_args;
+struct memstress_args memstress_args;
 
 /*
  * Guest virtual memory offset of the testing memory slot.
@@ -33,7 +33,7 @@ struct vcpu_thread {
 static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
 
 /* The function run by each vCPU thread, as provided by the test. */
-static void (*vcpu_thread_fn)(struct perf_test_vcpu_args *);
+static void (*vcpu_thread_fn)(struct memstress_vcpu_args *);
 
 /* Set to true once all vCPU threads are up and running. */
 static bool all_vcpu_threads_running;
@@ -44,10 +44,10 @@ static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
  * Continuously write to the first 8 bytes of each page in the
  * specified region.
  */
-void perf_test_guest_code(uint32_t vcpu_idx)
+void memstress_guest_code(uint32_t vcpu_idx)
 {
-	struct perf_test_args *args = &perf_test_args;
-	struct perf_test_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
+	struct memstress_args *args = &memstress_args;
+	struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
 	struct guest_random_state rand_state;
 	uint64_t gva;
 	uint64_t pages;
@@ -82,13 +82,13 @@ void perf_test_guest_code(uint32_t vcpu_idx)
 	}
 }
 
-void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
+void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
 			   struct kvm_vcpu *vcpus[],
 			   uint64_t vcpu_memory_bytes,
 			   bool partition_vcpu_memory_access)
 {
-	struct perf_test_args *args = &perf_test_args;
-	struct perf_test_vcpu_args *vcpu_args;
+	struct memstress_args *args = &memstress_args;
+	struct memstress_vcpu_args *vcpu_args;
 	int i;
 
 	for (i = 0; i < nr_vcpus; i++) {
@@ -118,12 +118,12 @@ void perf_test_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
 	}
 }
 
-struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 				   uint64_t vcpu_memory_bytes, int slots,
 				   enum vm_mem_backing_src_type backing_src,
 				   bool partition_vcpu_memory_access)
 {
-	struct perf_test_args *args = &perf_test_args;
+	struct memstress_args *args = &memstress_args;
 	struct kvm_vm *vm;
 	uint64_t guest_num_pages, slot0_pages = 0;
 	uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
@@ -157,7 +157,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	 * in-memory data structures.
 	 */
 	if (args->nested)
-		slot0_pages += perf_test_nested_pages(nr_vcpus);
+		slot0_pages += memstress_nested_pages(nr_vcpus);
 
 	/*
 	 * Pass guest_num_pages to populate the page tables for test memory.
@@ -165,7 +165,7 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 	 * effect as KVM allows aliasing HVAs in meslots.
 	 */
 	vm = __vm_create_with_vcpus(mode, nr_vcpus, slot0_pages + guest_num_pages,
-				    perf_test_guest_code, vcpus);
+				    memstress_guest_code, vcpus);
 
 	args->vm = vm;
 
@@ -206,59 +206,59 @@ struct kvm_vm *perf_test_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 		vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
 
 		vm_userspace_mem_region_add(vm, backing_src, region_start,
-					    PERF_TEST_MEM_SLOT_INDEX + i,
+					    MEMSTRESS_MEM_SLOT_INDEX + i,
 					    region_pages, 0);
 	}
 
 	/* Do mapping for the demand paging memory slot */
 	virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages);
 
-	perf_test_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
+	memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
 			      partition_vcpu_memory_access);
 
 	if (args->nested) {
 		pr_info("Configuring vCPUs to run in L2 (nested).\n");
-		perf_test_setup_nested(vm, nr_vcpus, vcpus);
+		memstress_setup_nested(vm, nr_vcpus, vcpus);
 	}
 
 	ucall_init(vm, NULL);
 
 	/* Export the shared variables to the guest. */
-	sync_global_to_guest(vm, perf_test_args);
+	sync_global_to_guest(vm, memstress_args);
 
 	return vm;
 }
 
-void perf_test_destroy_vm(struct kvm_vm *vm)
+void memstress_destroy_vm(struct kvm_vm *vm)
 {
 	ucall_uninit(vm);
 	kvm_vm_free(vm);
 }
 
-void perf_test_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
 {
-	perf_test_args.write_percent = write_percent;
-	sync_global_to_guest(vm, perf_test_args.write_percent);
+	memstress_args.write_percent = write_percent;
+	sync_global_to_guest(vm, memstress_args.write_percent);
 }
 
-void perf_test_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
+void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
 {
-	perf_test_args.random_seed = random_seed;
-	sync_global_to_guest(vm, perf_test_args.random_seed);
+	memstress_args.random_seed = random_seed;
+	sync_global_to_guest(vm, memstress_args.random_seed);
 }
 
-void perf_test_set_random_access(struct kvm_vm *vm, bool random_access)
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
 {
-	perf_test_args.random_access = random_access;
-	sync_global_to_guest(vm, perf_test_args.random_access);
+	memstress_args.random_access = random_access;
+	sync_global_to_guest(vm, memstress_args.random_access);
 }
 
-uint64_t __weak perf_test_nested_pages(int nr_vcpus)
+uint64_t __weak memstress_nested_pages(int nr_vcpus)
 {
 	return 0;
 }
 
-void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
+void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
 {
 	pr_info("%s() not support on this architecture, skipping.\n", __func__);
 	exit(KSFT_SKIP);
@@ -269,8 +269,8 @@ static void *vcpu_thread_main(void *data)
 	struct vcpu_thread *vcpu = data;
 	int vcpu_idx = vcpu->vcpu_idx;
 
-	if (perf_test_args.pin_vcpus)
-		kvm_pin_this_task_to_pcpu(perf_test_args.vcpu_to_pcpu[vcpu_idx]);
+	if (memstress_args.pin_vcpus)
+		kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
 
 	WRITE_ONCE(vcpu->running, true);
 
@@ -283,13 +283,13 @@ static void *vcpu_thread_main(void *data)
 	while (!READ_ONCE(all_vcpu_threads_running))
 		;
 
-	vcpu_thread_fn(&perf_test_args.vcpu_args[vcpu_idx]);
+	vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]);
 
 	return NULL;
 }
 
-void perf_test_start_vcpu_threads(int nr_vcpus,
-				  void (*vcpu_fn)(struct perf_test_vcpu_args *))
+void memstress_start_vcpu_threads(int nr_vcpus,
+				  void (*vcpu_fn)(struct memstress_vcpu_args *))
 {
 	int i;
 
@@ -313,7 +313,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus,
 	WRITE_ONCE(all_vcpu_threads_running, true);
 }
 
-void perf_test_join_vcpu_threads(int nr_vcpus)
+void memstress_join_vcpu_threads(int nr_vcpus)
 {
 	int i;
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
index 0bb717ac2cc57..2b3b47e4a973e 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
@@ -15,21 +15,21 @@
 #include "processor.h"
 #include "vmx.h"
 
-void perf_test_l2_guest_code(uint64_t vcpu_id)
+void memstress_l2_guest_code(uint64_t vcpu_id)
 {
-	perf_test_guest_code(vcpu_id);
+	memstress_guest_code(vcpu_id);
 	vmcall();
 }
 
-extern char perf_test_l2_guest_entry[];
+extern char memstress_l2_guest_entry[];
 __asm__(
-"perf_test_l2_guest_entry:"
+"memstress_l2_guest_entry:"
 "	mov (%rsp), %rdi;"
-"	call perf_test_l2_guest_code;"
+"	call memstress_l2_guest_code;"
 "	ud2;"
 );
 
-static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -42,14 +42,14 @@ static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
 
 	rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
 	*rsp = vcpu_id;
-	prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
+	prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
 
 	GUEST_ASSERT(!vmlaunch());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 	GUEST_DONE();
 }
 
-uint64_t perf_test_nested_pages(int nr_vcpus)
+uint64_t memstress_nested_pages(int nr_vcpus)
 {
 	/*
 	 * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
@@ -59,7 +59,7 @@ uint64_t perf_test_nested_pages(int nr_vcpus)
 	return 513 + 10 * nr_vcpus;
 }
 
-void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
 {
 	uint64_t start, end;
 
@@ -72,12 +72,12 @@ void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
 	 */
 	nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
 
-	start = align_down(perf_test_args.gpa, PG_SIZE_1G);
-	end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
+	start = align_down(memstress_args.gpa, PG_SIZE_1G);
+	end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
 	nested_identity_map_1g(vmx, vm, start, end - start);
 }
 
-void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
 {
 	struct vmx_pages *vmx, *vmx0 = NULL;
 	struct kvm_regs regs;
@@ -90,7 +90,7 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
 		vmx = vcpu_alloc_vmx(vm, &vmx_gva);
 
 		if (vcpu_id == 0) {
-			perf_test_setup_ept(vmx, vm);
+			memstress_setup_ept(vmx, vm);
 			vmx0 = vmx;
 		} else {
 			/* Share the same EPT table across all vCPUs. */
@@ -100,11 +100,11 @@ void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
 		}
 
 		/*
-		 * Override the vCPU to run perf_test_l1_guest_code() which will
-		 * bounce it into L2 before calling perf_test_guest_code().
+		 * Override the vCPU to run memstress_l1_guest_code() which will
+		 * bounce it into L2 before calling memstress_guest_code().
 		 */
 		vcpu_regs_get(vcpus[vcpu_id], &regs);
-		regs.rip = (unsigned long) perf_test_l1_guest_code;
+		regs.rip = (unsigned long) memstress_l1_guest_code;
 		vcpu_regs_set(vcpus[vcpu_id], &regs);
 		vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
 	}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 0490bd4606e50..d07e921bfcc53 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -36,7 +36,7 @@ static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
 static bool run_vcpus = true;
 
-static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
 {
 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
 	struct kvm_run *run;
@@ -75,7 +75,7 @@ static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
 	 * Add the dummy memslot just below the memstress memslot, which is
 	 * at the top of the guest physical address space.
 	 */
-	gpa = perf_test_args.gpa - pages * vm->page_size;
+	gpa = memstress_args.gpa - pages * vm->page_size;
 
 	for (i = 0; i < nr_modifications; i++) {
 		usleep(delay);
@@ -97,13 +97,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	struct test_params *p = arg;
 	struct kvm_vm *vm;
 
-	vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+	vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
 				 VM_MEM_SRC_ANONYMOUS,
 				 p->partition_vcpu_memory_access);
 
 	pr_info("Finished creating vCPUs\n");
 
-	perf_test_start_vcpu_threads(nr_vcpus, vcpu_worker);
+	memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
 
 	pr_info("Started all vCPUs\n");
 
@@ -111,10 +111,10 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	run_vcpus = false;
 
-	perf_test_join_vcpu_threads(nr_vcpus);
+	memstress_join_vcpu_threads(nr_vcpus);
 	pr_info("All vCPU threads joined\n");
 
-	perf_test_destroy_vm(vm);
+	memstress_destroy_vm(vm);
 }
 
 static void help(char *name)
-- 
GitLab


From c4d9d95f84d7f4758db76f358b03a0d6809c929b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 9 Nov 2022 10:49:13 -0800
Subject: [PATCH 1607/4122] perf cpumap: Tidy libperf includes

Use public API when possible, don't include internal API in header
files in evsel.h. Fix any related breakages.

Committer note:

There was one missing case, when building for arm64:

  arch/arm64/util/pmu.c: In function 'pmu_events_table__find':
  arch/arm64/util/pmu.c:18:30: error: invalid use of undefined type 'struct perf_cpu_map'
     18 |                 if (pmu->cpus->nr != cpu__max_cpu().cpu)
        |                              ^~

Fix it by adding one more exception, including <internal/cpumap.h>

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-14-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/util/pmu.c | 1 +
 tools/perf/tests/cpumap.c        | 2 +-
 tools/perf/util/auxtrace.h       | 2 +-
 tools/perf/util/cpumap.c         | 1 +
 tools/perf/util/cpumap.h         | 2 +-
 tools/perf/util/evsel.h          | 2 --
 6 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c
index f849b1e88d433..477e513972a40 100644
--- a/tools/perf/arch/arm64/util/pmu.c
+++ b/tools/perf/arch/arm64/util/pmu.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <internal/cpumap.h>
 #include "../../../util/cpumap.h"
 #include "../../../util/pmu.h"
 
diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c
index 7c873c6ae3eb9..3150fc1fed6f5 100644
--- a/tools/perf/tests/cpumap.c
+++ b/tools/perf/tests/cpumap.c
@@ -6,7 +6,7 @@
 #include "util/synthetic-events.h"
 #include <string.h>
 #include <linux/bitops.h>
-#include <perf/cpumap.h>
+#include <internal/cpumap.h>
 #include "debug.h"
 
 struct machine;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 6a0f9b98f059b..2cf63d3778313 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -15,7 +15,7 @@
 #include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
-#include <internal/cpumap.h>
+#include <perf/cpumap.h>
 #include <asm/bitsperlong.h>
 #include <asm/barrier.h>
 
diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c
index 6e3fcf523de93..5e564974fba4f 100644
--- a/tools/perf/util/cpumap.c
+++ b/tools/perf/util/cpumap.c
@@ -12,6 +12,7 @@
 
 #include <linux/ctype.h>
 #include <linux/zalloc.h>
+#include <internal/cpumap.h>
 
 static struct perf_cpu max_cpu_num;
 static struct perf_cpu max_present_cpu_num;
diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h
index da28b3146ef9f..c2f5824a3a22b 100644
--- a/tools/perf/util/cpumap.h
+++ b/tools/perf/util/cpumap.h
@@ -4,8 +4,8 @@
 
 #include <stdbool.h>
 #include <stdio.h>
-#include <internal/cpumap.h>
 #include <perf/cpumap.h>
+#include <linux/refcount.h>
 
 /** Identify where counts are aggregated, -1 implies not to aggregate. */
 struct aggr_cpu_id {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 467bb0b32fef8..f3485799ddf9e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -10,8 +10,6 @@
 #include <internal/evsel.h>
 #include <perf/evsel.h>
 #include "symbol_conf.h"
-#include <internal/cpumap.h>
-#include <perf/cpumap.h>
 
 struct bpf_object;
 struct cgroup;
-- 
GitLab


From 746bd29e348f99b44c14cb2b2a57f1d3efb66538 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 15 Nov 2022 23:22:11 -0800
Subject: [PATCH 1608/4122] perf build: Use tools/lib headers from install path

Switch -I from tools/lib to the install path for the tools/lib
libraries. Add the include_headers build targets to prepare target, as
well as pmu-events.c compilation that dependes on libperf.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221109184914.1357295-15-irogers@google.com
Link: http://lore.kernel.org/lkml/20221116072211.2837834-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config |  2 --
 tools/perf/Makefile.perf   | 16 ++++++++++++++--
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index a7f6c0669fae4..9cc3c48f32881 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -349,7 +349,6 @@ ifeq ($(DEBUG),0)
   endif
 endif
 
-INC_FLAGS += -I$(srctree)/tools/lib/perf/include
 INC_FLAGS += -I$(src-perf)/util/include
 INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include
 INC_FLAGS += -I$(srctree)/tools/include/
@@ -367,7 +366,6 @@ endif
 
 INC_FLAGS += -I$(src-perf)/util
 INC_FLAGS += -I$(src-perf)
-INC_FLAGS += -I$(srctree)/tools/lib/
 
 CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
 
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6c1a2a3ccc381..8c0df762fb02f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -305,6 +305,7 @@ LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include
 LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a
 export LIBTRACEEVENT
 LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list
+CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include
 
 #
 # The static build has no dynsym table, so this does not work for
@@ -322,6 +323,7 @@ LIBAPI_DESTDIR = $(LIBAPI_OUTPUT)
 LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include
 LIBAPI = $(LIBAPI_OUTPUT)/libapi.a
 export LIBAPI
+CFLAGS += -I$(LIBAPI_OUTPUT)/include
 
 ifneq ($(OUTPUT),)
   LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf
@@ -331,6 +333,7 @@ endif
 LIBBPF_DESTDIR = $(LIBBPF_OUTPUT)
 LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include
 LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a
+CFLAGS += -I$(LIBBPF_OUTPUT)/include
 
 ifneq ($(OUTPUT),)
   LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd
@@ -340,6 +343,7 @@ endif
 LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT)
 LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include
 LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a
+CFLAGS += -I$(LIBSUBCMD_OUTPUT)/include
 
 ifneq ($(OUTPUT),)
   LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol
@@ -349,6 +353,7 @@ endif
 LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT)
 LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include
 LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a
+CFLAGS += -I$(LIBSYMBOL_OUTPUT)/include
 
 ifneq ($(OUTPUT),)
   LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf
@@ -359,6 +364,7 @@ LIBPERF_DESTDIR = $(LIBPERF_OUTPUT)
 LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include
 LIBPERF = $(LIBPERF_OUTPUT)/libperf.a
 export LIBPERF
+CFLAGS += -I$(LIBPERF_OUTPUT)/include
 
 # python extension build directories
 PYTHON_EXTBUILD     := $(OUTPUT)python_ext_build/
@@ -691,14 +697,14 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 $(PERF_IN): prepare FORCE
 	$(Q)$(MAKE) $(build)=perf
 
-$(PMU_EVENTS_IN): FORCE
+$(PMU_EVENTS_IN): FORCE prepare
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
 		$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
 
-$(GTK_IN): FORCE
+$(GTK_IN): FORCE prepare
 	$(Q)$(MAKE) $(build)=gtk
 
 $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS)
@@ -774,6 +780,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(rename_flags_array) \
 	$(arch_errno_name_array) \
 	$(sync_file_range_arrays) \
+	$(LIBAPI) \
+	$(LIBBPF) \
+	$(LIBPERF) \
+	$(LIBSUBCMD) \
+	$(LIBSYMBOL) \
+	$(LIBTRACEEVENT) \
 	bpf-skel
 
 $(OUTPUT)%.o: %.c prepare FORCE
-- 
GitLab


From 35fef9b471c70413f8277984920129ddf601f5e9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 16 Nov 2022 15:40:46 -0300
Subject: [PATCH 1609/4122] libperf: Remove recursive perf/cpumap.h include
 from perf/cpumap.h

It just hits the header guard, becoming a no-op, ditch it.

Acked-by: Ian Rogers <irogers@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/include/perf/cpumap.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 03aceb72a783c..98e463ec15a7f 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -3,7 +3,6 @@
 #define __LIBPERF_CPUMAP_H
 
 #include <perf/core.h>
-#include <perf/cpumap.h>
 #include <stdio.h>
 #include <stdbool.h>
 
-- 
GitLab


From 63a3bf5e8d9e79ce456c8f73d4395a5a51d841b1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 16 Nov 2022 15:43:34 -0300
Subject: [PATCH 1610/4122] libperf: Add missing 'struct perf_cpu_map' forward
 declaration to perf/cpumap.h

The perf/cpumap.h header is getting the 'struct perf_cpu_map' forward
declaration by luck, add it.

Acked-by: Ian Rogers <irogers@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/include/perf/cpumap.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h
index 98e463ec15a7f..3f43f770cdac5 100644
--- a/tools/lib/perf/include/perf/cpumap.h
+++ b/tools/lib/perf/include/perf/cpumap.h
@@ -11,6 +11,8 @@ struct perf_cpu {
 	int cpu;
 };
 
+struct perf_cpu_map;
+
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);
 LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list);
-- 
GitLab


From 86fdd15e10e404e70ecb2a3bff24d70356d42b36 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Tue, 15 Nov 2022 10:37:12 +0800
Subject: [PATCH 1611/4122] iio: fix memory leak in
 iio_device_register_eventset()

When iio_device_register_sysfs_group() returns failed,
iio_device_register_eventset() needs to free attrs array.

Otherwise, kmemleak would scan & report memory leak as below:

unreferenced object 0xffff88810a1cc3c0 (size 32):
  comm "100-i2c-vcnl302", pid 728, jiffies 4295052307 (age 156.027s)
  backtrace:
    __kmalloc+0x46/0x1b0
    iio_device_register_eventset at drivers/iio/industrialio-event.c:541
    __iio_device_register at drivers/iio/industrialio-core.c:1959
    __devm_iio_device_register at drivers/iio/industrialio-core.c:2040

Fixes: 32f171724e5c ("iio: core: rework iio device group creation")
Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Link: https://lore.kernel.org/r/20221115023712.3726854-1-zengheng4@huawei.com
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-event.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 3d78da2531a9a..727e2ef66aa4b 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -556,7 +556,7 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
 
 	ret = iio_device_register_sysfs_group(indio_dev, &ev_int->group);
 	if (ret)
-		goto error_free_setup_event_lines;
+		goto error_free_group_attrs;
 
 	ev_int->ioctl_handler.ioctl = iio_event_ioctl;
 	iio_device_ioctl_handler_register(&iio_dev_opaque->indio_dev,
@@ -564,6 +564,8 @@ int iio_device_register_eventset(struct iio_dev *indio_dev)
 
 	return 0;
 
+error_free_group_attrs:
+	kfree(ev_int->group.attrs);
 error_free_setup_event_lines:
 	iio_free_chan_devattr_list(&ev_int->dev_attr_list);
 	kfree(ev_int);
-- 
GitLab


From a4ff8e7a71601321f7bf7b58ede664dc0d774274 Mon Sep 17 00:00:00 2001
From: Li Ming <ming4.li@intel.com>
Date: Wed, 16 Nov 2022 09:56:37 +0800
Subject: [PATCH 1612/4122] PCI/DOE: Fix maximum data object length
 miscalculation

Per PCIe r6.0, sec 6.30.1, a data object Length of 0x0 indicates 2^18
DWORDs (256K DW or 1MB) being transferred.  Adjust the value of data object
length for this case on both sending side and receiving side.

Don't bother checking whether Length is greater than SZ_1M because all
values of the 18-bit Length field are valid, and it is impossible to
represent anything larger than SZ_1M:

  0x00000    256K DW (1M bytes)
  0x00001       1 DW (4 bytes)
  ...
  0x3ffff  256K-1 DW (1M - 4 bytes)

[bhelgaas: commit log]
Link: https://lore.kernel.org/r/20221116015637.3299664-1-ming4.li@intel.com
Fixes: 9d24322e887b ("PCI/DOE: Add DOE mailbox support functions")
Signed-off-by: Li Ming <ming4.li@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
Cc: stable@vger.kernel.org	# v6.0+
---
 drivers/pci/doe.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/doe.c b/drivers/pci/doe.c
index e402f05068a53..66d9ab2886468 100644
--- a/drivers/pci/doe.c
+++ b/drivers/pci/doe.c
@@ -29,6 +29,9 @@
 #define PCI_DOE_FLAG_CANCEL	0
 #define PCI_DOE_FLAG_DEAD	1
 
+/* Max data object length is 2^18 dwords */
+#define PCI_DOE_MAX_LENGTH	(1 << 18)
+
 /**
  * struct pci_doe_mb - State for a single DOE mailbox
  *
@@ -107,6 +110,7 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 {
 	struct pci_dev *pdev = doe_mb->pdev;
 	int offset = doe_mb->cap_offset;
+	size_t length;
 	u32 val;
 	int i;
 
@@ -123,15 +127,20 @@ static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
 	if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
 		return -EIO;
 
+	/* Length is 2 DW of header + length of payload in DW */
+	length = 2 + task->request_pl_sz / sizeof(u32);
+	if (length > PCI_DOE_MAX_LENGTH)
+		return -EIO;
+	if (length == PCI_DOE_MAX_LENGTH)
+		length = 0;
+
 	/* Write DOE Header */
 	val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->prot.vid) |
 		FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->prot.type);
 	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
-	/* Length is 2 DW of header + length of payload in DW */
 	pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
 			       FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
-					  2 + task->request_pl_sz /
-						sizeof(u32)));
+					  length));
 	for (i = 0; i < task->request_pl_sz / sizeof(u32); i++)
 		pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
 				       task->request_pl[i]);
@@ -178,7 +187,10 @@ static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *tas
 	pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
 
 	length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val);
-	if (length > SZ_1M || length < 2)
+	/* A value of 0x0 indicates max data object length */
+	if (!length)
+		length = PCI_DOE_MAX_LENGTH;
+	if (length < 2)
 		return -EIO;
 
 	/* First 2 dwords have already been read */
-- 
GitLab


From 60cfac17d0a1c28cd41959e95ba1e0ecc47165e7 Mon Sep 17 00:00:00 2001
From: Vincent Whitchurch <vincent.whitchurch@axis.com>
Date: Tue, 1 Mar 2022 14:12:20 +0100
Subject: [PATCH 1613/4122] rtc: pcf8563: clear RTC_FEATURE_ALARM if no irq

If there is no IRQ hooked up, clear RTC_FEATURE_ALARM to make the core
ensure that userspace is made aware that alarms are not supported.

Signed-off-by: Vincent Whitchurch <vincent.whitchurch@axis.com>
Link: https://lore.kernel.org/r/20220301131220.4011810-1-vincent.whitchurch@axis.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf8563.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rtc/rtc-pcf8563.c b/drivers/rtc/rtc-pcf8563.c
index 11fa9788558be..0a7fd94784651 100644
--- a/drivers/rtc/rtc-pcf8563.c
+++ b/drivers/rtc/rtc-pcf8563.c
@@ -567,6 +567,8 @@ static int pcf8563_probe(struct i2c_client *client)
 								client->irq);
 			return err;
 		}
+	} else {
+		clear_bit(RTC_FEATURE_ALARM, pcf8563->rtc->features);
 	}
 
 	err = devm_rtc_register_device(pcf8563->rtc);
-- 
GitLab


From eec79501cce6e8965e92174760c6a9e92d78a038 Mon Sep 17 00:00:00 2001
From: Riwen Lu <luriwen@kylinos.cn>
Date: Wed, 10 Aug 2022 15:01:09 +0800
Subject: [PATCH 1614/4122] rtc: efi: Add wakeup support

Add wakeup support for rtc-efi, so we can wakeup from S3/S4/S5 through
rtcwake.

Signed-off-by: Riwen Lu <luriwen@kylinos.cn>
Link: https://lore.kernel.org/r/TYWP286MB260191455377CEBD2336557EB1659@TYWP286MB2601.JPNP286.PROD.OUTLOOK.COM
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-efi.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index 11850c2880ad4..e991cccdb6e9c 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -271,6 +271,8 @@ static int __init efi_rtc_probe(struct platform_device *dev)
 	clear_bit(RTC_FEATURE_UPDATE_INTERRUPT, rtc->features);
 	set_bit(RTC_FEATURE_ALARM_WAKEUP_ONLY, rtc->features);
 
+	device_init_wakeup(&dev->dev, true);
+
 	return devm_rtc_register_device(rtc);
 }
 
-- 
GitLab


From fe0157ba679dc95407dd5eae6550a4ceaea75040 Mon Sep 17 00:00:00 2001
From: paulmn <paulmn@axis.com>
Date: Mon, 29 Aug 2022 14:46:39 +0200
Subject: [PATCH 1615/4122] rtc: pcf8523: fix for stop bit

Bugfix for an issue detected when a goldcap capacitor gets
fully discharged due to a long absence of the power supply,
and then recharges again. The RTC failed to continue to keep
the real-time clock.

This was caused by the incorrect handling of the STOP bit in
the RTC internal register.  This fix solves the problem.

Signed-off-by: paulmn <paulmn@axis.com>
Link: https://lore.kernel.org/r/20220829124639.10906-1-paulmn@axis.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf8523.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index 6174b3fd4b985..92de99f11a7a5 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -99,24 +99,24 @@ static irqreturn_t pcf8523_irq(int irq, void *dev_id)
 static int pcf8523_rtc_read_time(struct device *dev, struct rtc_time *tm)
 {
 	struct pcf8523 *pcf8523 = dev_get_drvdata(dev);
-	u8 regs[7];
+	u8 regs[10];
 	int err;
 
-	err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_SECONDS, regs,
+	err = regmap_bulk_read(pcf8523->regmap, PCF8523_REG_CONTROL1, regs,
 			       sizeof(regs));
 	if (err < 0)
 		return err;
 
-	if (regs[0] & PCF8523_SECONDS_OS)
+	if ((regs[0] & PCF8523_CONTROL1_STOP) || (regs[3] & PCF8523_SECONDS_OS))
 		return -EINVAL;
 
-	tm->tm_sec = bcd2bin(regs[0] & 0x7f);
-	tm->tm_min = bcd2bin(regs[1] & 0x7f);
-	tm->tm_hour = bcd2bin(regs[2] & 0x3f);
-	tm->tm_mday = bcd2bin(regs[3] & 0x3f);
-	tm->tm_wday = regs[4] & 0x7;
-	tm->tm_mon = bcd2bin(regs[5] & 0x1f) - 1;
-	tm->tm_year = bcd2bin(regs[6]) + 100;
+	tm->tm_sec = bcd2bin(regs[3] & 0x7f);
+	tm->tm_min = bcd2bin(regs[4] & 0x7f);
+	tm->tm_hour = bcd2bin(regs[5] & 0x3f);
+	tm->tm_mday = bcd2bin(regs[6] & 0x3f);
+	tm->tm_wday = regs[7] & 0x7;
+	tm->tm_mon = bcd2bin(regs[8] & 0x1f) - 1;
+	tm->tm_year = bcd2bin(regs[9]) + 100;
 
 	return 0;
 }
-- 
GitLab


From a6ceee26fd5ed9b5bd37322b1ca88e4548cee4a3 Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Wed, 21 Sep 2022 09:41:41 +0200
Subject: [PATCH 1616/4122] rtc: pcf85063: Fix reading alarm

If the alarms are disabled the topmost bit (AEN_*) is set in the alarm
registers. This is also interpreted in BCD number leading to this warning:
rtc rtc0: invalid alarm value: 2022-09-21T80:80:80

Fix this by masking alarm enabling and reserved bits.

Fixes: 05cb3a56ee8c ("rtc: pcf85063: add alarm support")
Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20220921074141.3903104-1-alexander.stein@ew.tq-group.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf85063.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 095891999da11..99f9cc57c7b30 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -169,10 +169,10 @@ static int pcf85063_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
 	if (ret)
 		return ret;
 
-	alrm->time.tm_sec = bcd2bin(buf[0]);
-	alrm->time.tm_min = bcd2bin(buf[1]);
-	alrm->time.tm_hour = bcd2bin(buf[2]);
-	alrm->time.tm_mday = bcd2bin(buf[3]);
+	alrm->time.tm_sec = bcd2bin(buf[0] & 0x7f);
+	alrm->time.tm_min = bcd2bin(buf[1] & 0x7f);
+	alrm->time.tm_hour = bcd2bin(buf[2] & 0x3f);
+	alrm->time.tm_mday = bcd2bin(buf[3] & 0x3f);
 
 	ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &val);
 	if (ret)
-- 
GitLab


From 9a48b4a6fd512bdaed7e38ba844be743163d49c6 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:13 -0800
Subject: [PATCH 1617/4122] xfs: fully initialize xfs_da_args in
 xchk_directory_blocks

While running the online fsck test suite, I noticed the following
assertion in the kernel log (edited for brevity):

XFS: Assertion failed: 0, file: fs/xfs/xfs_health.c, line: 571
------------[ cut here ]------------
WARNING: CPU: 3 PID: 11667 at fs/xfs/xfs_message.c:104 assfail+0x46/0x4a [xfs]
CPU: 3 PID: 11667 Comm: xfs_scrub Tainted: G        W         5.19.0-rc7-xfsx #rc7 6e6475eb29fd9dda3181f81b7ca7ff961d277a40
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
RIP: 0010:assfail+0x46/0x4a [xfs]
Call Trace:
 <TASK>
 xfs_dir2_isblock+0xcc/0xe0
 xchk_directory_blocks+0xc7/0x420
 xchk_directory+0x53/0xb0
 xfs_scrub_metadata+0x2b6/0x6b0
 xfs_scrubv_metadata+0x35e/0x4d0
 xfs_ioc_scrubv_metadata+0x111/0x160
 xfs_file_ioctl+0x4ec/0xef0
 __x64_sys_ioctl+0x82/0xa0
 do_syscall_64+0x2b/0x80
 entry_SYSCALL_64_after_hwframe+0x46/0xb0

This assertion triggers in xfs_dirattr_mark_sick when the caller passes
in a whichfork value that is neither of XFS_{DATA,ATTR}_FORK.  The cause
of this is that xchk_directory_blocks only partially initializes the
xfs_da_args structure that is passed to xfs_dir2_isblock.  If the data
fork is not correct, the XFS_IS_CORRUPT clause will trigger.  My
development branch reports this failure to the health monitoring
subsystem, which accesses the uninitialized args->whichfork field,
leading the the assertion tripping.  We really shouldn't be passing
random stack contents around, so the solution here is to force the
compiler to zero-initialize the struct.

Found by fuzzing u3.bmx[0].blockcount = middlebit on xfs/1554.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/dir.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index 5c87800ab223d..d1b0f23c2c593 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -666,7 +666,12 @@ xchk_directory_blocks(
 	struct xfs_scrub	*sc)
 {
 	struct xfs_bmbt_irec	got;
-	struct xfs_da_args	args;
+	struct xfs_da_args	args = {
+		.dp		= sc ->ip,
+		.whichfork	= XFS_DATA_FORK,
+		.geo		= sc->mp->m_dir_geo,
+		.trans		= sc->tp,
+	};
 	struct xfs_ifork	*ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK);
 	struct xfs_mount	*mp = sc->mp;
 	xfs_fileoff_t		leaf_lblk;
@@ -689,9 +694,6 @@ xchk_directory_blocks(
 	free_lblk = XFS_B_TO_FSB(mp, XFS_DIR2_FREE_OFFSET);
 
 	/* Is this a block dir? */
-	args.dp = sc->ip;
-	args.geo = mp->m_dir_geo;
-	args.trans = sc->tp;
 	error = xfs_dir2_isblock(&args, &is_block);
 	if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, lblk, &error))
 		goto out;
-- 
GitLab


From be1317fdb8d4e3ccbac43e199b360c248c600d99 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:14 -0800
Subject: [PATCH 1618/4122] xfs: don't track the AGFL buffer in the scrub AG
 context

While scrubbing an allocation group, we don't need to hold the AGFL
buffer as part of the scrub context.  All that is necessary to lock an
AG is to hold the AGI and AGF buffers, so fix all the existing users of
the AGFL buffer to grab them only when necessary.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/agheader.c        | 47 +++++++++++++++++++++-------------
 fs/xfs/scrub/agheader_repair.c |  1 -
 fs/xfs/scrub/common.c          |  8 ------
 fs/xfs/scrub/repair.c          | 11 ++++----
 fs/xfs/scrub/scrub.h           |  1 -
 5 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index b7b838bd4ba43..af284baa6f4cb 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -609,9 +609,16 @@ out:
 /* AGFL */
 
 struct xchk_agfl_info {
-	unsigned int		sz_entries;
+	/* Number of AGFL entries that the AGF claims are in use. */
+	unsigned int		agflcount;
+
+	/* Number of AGFL entries that we found. */
 	unsigned int		nr_entries;
+
+	/* Buffer to hold AGFL entries for extent checking. */
 	xfs_agblock_t		*entries;
+
+	struct xfs_buf		*agfl_bp;
 	struct xfs_scrub	*sc;
 };
 
@@ -641,10 +648,10 @@ xchk_agfl_block(
 	struct xfs_scrub	*sc = sai->sc;
 
 	if (xfs_verify_agbno(sc->sa.pag, agbno) &&
-	    sai->nr_entries < sai->sz_entries)
+	    sai->nr_entries < sai->agflcount)
 		sai->entries[sai->nr_entries++] = agbno;
 	else
-		xchk_block_set_corrupt(sc, sc->sa.agfl_bp);
+		xchk_block_set_corrupt(sc, sai->agfl_bp);
 
 	xchk_agfl_block_xref(sc, agbno);
 
@@ -696,19 +703,26 @@ int
 xchk_agfl(
 	struct xfs_scrub	*sc)
 {
-	struct xchk_agfl_info	sai;
+	struct xchk_agfl_info	sai = {
+		.sc		= sc,
+	};
 	struct xfs_agf		*agf;
 	xfs_agnumber_t		agno = sc->sm->sm_agno;
-	unsigned int		agflcount;
 	unsigned int		i;
 	int			error;
 
+	/* Lock the AGF and AGI so that nobody can touch this AG. */
 	error = xchk_ag_read_headers(sc, agno, &sc->sa);
 	if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
-		goto out;
+		return error;
 	if (!sc->sa.agf_bp)
 		return -EFSCORRUPTED;
-	xchk_buffer_recheck(sc, sc->sa.agfl_bp);
+
+	/* Try to read the AGFL, and verify its structure if we get it. */
+	error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &sai.agfl_bp);
+	if (!xchk_process_error(sc, agno, XFS_AGFL_BLOCK(sc->mp), &error))
+		return error;
+	xchk_buffer_recheck(sc, sai.agfl_bp);
 
 	xchk_agfl_xref(sc);
 
@@ -717,24 +731,21 @@ xchk_agfl(
 
 	/* Allocate buffer to ensure uniqueness of AGFL entries. */
 	agf = sc->sa.agf_bp->b_addr;
-	agflcount = be32_to_cpu(agf->agf_flcount);
-	if (agflcount > xfs_agfl_size(sc->mp)) {
+	sai.agflcount = be32_to_cpu(agf->agf_flcount);
+	if (sai.agflcount > xfs_agfl_size(sc->mp)) {
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out;
 	}
-	memset(&sai, 0, sizeof(sai));
-	sai.sc = sc;
-	sai.sz_entries = agflcount;
-	sai.entries = kmem_zalloc(sizeof(xfs_agblock_t) * agflcount,
-			KM_MAYFAIL);
+	sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t),
+			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!sai.entries) {
 		error = -ENOMEM;
 		goto out;
 	}
 
 	/* Check the blocks in the AGFL. */
-	error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr,
-			sc->sa.agfl_bp, xchk_agfl_block, &sai);
+	error = xfs_agfl_walk(sc->mp, sc->sa.agf_bp->b_addr, sai.agfl_bp,
+			xchk_agfl_block, &sai);
 	if (error == -ECANCELED) {
 		error = 0;
 		goto out_free;
@@ -742,7 +753,7 @@ xchk_agfl(
 	if (error)
 		goto out_free;
 
-	if (agflcount != sai.nr_entries) {
+	if (sai.agflcount != sai.nr_entries) {
 		xchk_block_set_corrupt(sc, sc->sa.agf_bp);
 		goto out_free;
 	}
@@ -758,7 +769,7 @@ xchk_agfl(
 	}
 
 out_free:
-	kmem_free(sai.entries);
+	kvfree(sai.entries);
 out:
 	return error;
 }
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 1b0b4e243f771..2e75ff9b5b2e9 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -697,7 +697,6 @@ xrep_agfl(
 	 * freespace overflow to the freespace btrees.
 	 */
 	sc->sa.agf_bp = agf_bp;
-	sc->sa.agfl_bp = agfl_bp;
 	error = xrep_roll_ag_trans(sc);
 	if (error)
 		goto err;
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9bbbf20f401b3..ad70f29233c36 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -424,10 +424,6 @@ xchk_ag_read_headers(
 	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGF))
 		return error;
 
-	error = xfs_alloc_read_agfl(sa->pag, sc->tp, &sa->agfl_bp);
-	if (error && want_ag_read_header_failure(sc, XFS_SCRUB_TYPE_AGFL))
-		return error;
-
 	return 0;
 }
 
@@ -515,10 +511,6 @@ xchk_ag_free(
 	struct xchk_ag		*sa)
 {
 	xchk_ag_btcur_free(sa);
-	if (sa->agfl_bp) {
-		xfs_trans_brelse(sc->tp, sa->agfl_bp);
-		sa->agfl_bp = NULL;
-	}
 	if (sa->agf_bp) {
 		xfs_trans_brelse(sc->tp, sa->agf_bp);
 		sa->agf_bp = NULL;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index c18bd039fce9e..2ada7fc1c3983 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -126,8 +126,6 @@ xrep_roll_ag_trans(
 		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
 	if (sc->sa.agf_bp)
 		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
-	if (sc->sa.agfl_bp)
-		xfs_trans_bhold(sc->tp, sc->sa.agfl_bp);
 
 	/*
 	 * Roll the transaction.  We still own the buffer and the buffer lock
@@ -145,8 +143,6 @@ xrep_roll_ag_trans(
 		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
 	if (sc->sa.agf_bp)
 		xfs_trans_bjoin(sc->tp, sc->sa.agf_bp);
-	if (sc->sa.agfl_bp)
-		xfs_trans_bjoin(sc->tp, sc->sa.agfl_bp);
 
 	return 0;
 }
@@ -498,6 +494,7 @@ xrep_put_freelist(
 	struct xfs_scrub	*sc,
 	xfs_agblock_t		agbno)
 {
+	struct xfs_buf		*agfl_bp;
 	int			error;
 
 	/* Make sure there's space on the freelist. */
@@ -516,8 +513,12 @@ xrep_put_freelist(
 		return error;
 
 	/* Put the block on the AGFL. */
+	error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp);
+	if (error)
+		return error;
+
 	error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp,
-			sc->sa.agfl_bp, agbno, 0);
+			agfl_bp, agbno, 0);
 	if (error)
 		return error;
 	xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 3de5287e98d84..151567f883665 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -39,7 +39,6 @@ struct xchk_ag {
 
 	/* AG btree roots */
 	struct xfs_buf		*agf_bp;
-	struct xfs_buf		*agfl_bp;
 	struct xfs_buf		*agi_bp;
 
 	/* AG btrees */
-- 
GitLab


From 3e59c0103e66d6e687a8b47fd70169542aba938e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:14 -0800
Subject: [PATCH 1619/4122] xfs: log the AGI/AGF buffers when rolling
 transactions during an AG repair

Currently, the only way to lock an allocation group is to hold the AGI
and AGF buffers.  If a repair needs to roll the transaction while
repairing some AG metadata, it maintains that lock by holding the two
buffers across the transaction roll and joins them afterwards.

However, repair is not like other parts of XFS that employ the bhold -
roll - bjoin sequence because it's possible that the AGI or AGF buffers
are not actually dirty before the roll.  This presents two problems --
First, we need to redirty those buffers to keep them moving along in the
log to avoid pinning the log tail.  Second, a clean buffer log item can
detach from the buffer.  If this happens, the buffer type state is
discarded along with the bli and must be reattached before the next time
the buffer is logged.   If it is not, the logging code will complain and
log recovery will not work properly.

An earlier version of this patch tried to fix the second problem by
re-setting the buffer type in the bli after joining the buffer to the
new transaction, but that looked weird and didn't solve the first
problem.  Instead, solve both problems by logging the buffer before
rolling the transaction.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/repair.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 2ada7fc1c3983..22335619c84e7 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -121,24 +121,36 @@ xrep_roll_ag_trans(
 {
 	int			error;
 
-	/* Keep the AG header buffers locked so we can keep going. */
-	if (sc->sa.agi_bp)
+	/*
+	 * Keep the AG header buffers locked while we roll the transaction.
+	 * Ensure that both AG buffers are dirty and held when we roll the
+	 * transaction so that they move forward in the log without losing the
+	 * bli (and hence the bli type) when the transaction commits.
+	 *
+	 * Normal code would never hold clean buffers across a roll, but repair
+	 * needs both buffers to maintain a total lock on the AG.
+	 */
+	if (sc->sa.agi_bp) {
+		xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, XFS_AGI_MAGICNUM);
 		xfs_trans_bhold(sc->tp, sc->sa.agi_bp);
-	if (sc->sa.agf_bp)
+	}
+
+	if (sc->sa.agf_bp) {
+		xfs_alloc_log_agf(sc->tp, sc->sa.agf_bp, XFS_AGF_MAGICNUM);
 		xfs_trans_bhold(sc->tp, sc->sa.agf_bp);
+	}
 
 	/*
-	 * Roll the transaction.  We still own the buffer and the buffer lock
-	 * regardless of whether or not the roll succeeds.  If the roll fails,
-	 * the buffers will be released during teardown on our way out of the
-	 * kernel.  If it succeeds, we join them to the new transaction and
-	 * move on.
+	 * Roll the transaction.  We still hold the AG header buffers locked
+	 * regardless of whether or not that succeeds.  On failure, the buffers
+	 * will be released during teardown on our way out of the kernel.  If
+	 * successful, join the buffers to the new transaction and move on.
 	 */
 	error = xfs_trans_roll(&sc->tp);
 	if (error)
 		return error;
 
-	/* Join AG headers to the new transaction. */
+	/* Join the AG headers to the new transaction. */
 	if (sc->sa.agi_bp)
 		xfs_trans_bjoin(sc->tp, sc->sa.agi_bp);
 	if (sc->sa.agf_bp)
-- 
GitLab


From 48ff40458f871fb19e7b1b40e0e5084b8751d9cb Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:15 -0800
Subject: [PATCH 1620/4122] xfs: standardize GFP flags usage in online scrub

Memory allocation usage is the same throughout online fsck -- we want
kernel memory, we have to be able to back out if we can't allocate
memory, and we don't want to spray dmesg with memory allocation failure
reports.  Standardize the GFP flag usage and document these requirements.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/agheader.c | 2 +-
 fs/xfs/scrub/attr.c     | 9 ++++-----
 fs/xfs/scrub/scrub.h    | 9 +++++++++
 fs/xfs/scrub/symlink.c  | 2 +-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index af284baa6f4cb..4dd52b15f09c6 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -737,7 +737,7 @@ xchk_agfl(
 		goto out;
 	}
 	sai.entries = kvcalloc(sai.agflcount, sizeof(xfs_agblock_t),
-			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+			       XCHK_GFP_FLAGS);
 	if (!sai.entries) {
 		error = -ENOMEM;
 		goto out;
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index b6f0c9f3f1245..11b2593a2be79 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -79,7 +79,8 @@ xchk_setup_xattr(
 	 * without the inode lock held, which means we can sleep.
 	 */
 	if (sc->flags & XCHK_TRY_HARDER) {
-		error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX, GFP_KERNEL);
+		error = xchk_setup_xattr_buf(sc, XATTR_SIZE_MAX,
+				XCHK_GFP_FLAGS);
 		if (error)
 			return error;
 	}
@@ -138,8 +139,7 @@ xchk_xattr_listent(
 	 * doesn't work, we overload the seen_enough variable to convey
 	 * the error message back to the main scrub function.
 	 */
-	error = xchk_setup_xattr_buf(sx->sc, valuelen,
-			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+	error = xchk_setup_xattr_buf(sx->sc, valuelen, XCHK_GFP_FLAGS);
 	if (error == -ENOMEM)
 		error = -EDEADLOCK;
 	if (error) {
@@ -324,8 +324,7 @@ xchk_xattr_block(
 		return 0;
 
 	/* Allocate memory for block usage checking. */
-	error = xchk_setup_xattr_buf(ds->sc, 0,
-			GFP_KERNEL | __GFP_RETRY_MAYFAIL);
+	error = xchk_setup_xattr_buf(ds->sc, 0, XCHK_GFP_FLAGS);
 	if (error == -ENOMEM)
 		return -EDEADLOCK;
 	if (error)
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 151567f883665..a0f097b8acb0a 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -8,6 +8,15 @@
 
 struct xfs_scrub;
 
+/*
+ * Standard flags for allocating memory within scrub.  NOFS context is
+ * configured by the process allocation scope.  Scrub and repair must be able
+ * to back out gracefully if there isn't enough memory.  Force-cast to avoid
+ * complaints from static checkers.
+ */
+#define XCHK_GFP_FLAGS	((__force gfp_t)(GFP_KERNEL | __GFP_NOWARN | \
+					 __GFP_RETRY_MAYFAIL))
+
 /* Type info and names for the scrub types. */
 enum xchk_type {
 	ST_NONE = 1,	/* disabled */
diff --git a/fs/xfs/scrub/symlink.c b/fs/xfs/scrub/symlink.c
index 75311f8daeeb8..c1c99ffe7408a 100644
--- a/fs/xfs/scrub/symlink.c
+++ b/fs/xfs/scrub/symlink.c
@@ -21,7 +21,7 @@ xchk_setup_symlink(
 	struct xfs_scrub	*sc)
 {
 	/* Allocate the buffer without the inode lock held. */
-	sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, GFP_KERNEL);
+	sc->buf = kvzalloc(XFS_SYMLINK_MAXLEN + 1, XCHK_GFP_FLAGS);
 	if (!sc->buf)
 		return -ENOMEM;
 
-- 
GitLab


From b255fab0f80cc65a334fcd90cd278673cddbc988 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:14 -0800
Subject: [PATCH 1621/4122] xfs: make AGFL repair function avoid crosslinked
 blocks

Teach the AGFL repair function to check each block of the proposed AGFL
against the rmap btree.  If the rmapbt finds any mappings that are not
OWN_AG, strike that block from the list.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c | 78 ++++++++++++++++++++++++++++------
 1 file changed, 66 insertions(+), 12 deletions(-)

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 2e75ff9b5b2e9..82ceb60ea5fcd 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -442,12 +442,18 @@ out_revert:
 /* AGFL */
 
 struct xrep_agfl {
+	/* Bitmap of alleged AGFL blocks that we're not going to add. */
+	struct xbitmap		crossed;
+
 	/* Bitmap of other OWN_AG metadata blocks. */
 	struct xbitmap		agmetablocks;
 
 	/* Bitmap of free space. */
 	struct xbitmap		*freesp;
 
+	/* rmapbt cursor for finding crosslinked blocks */
+	struct xfs_btree_cur	*rmap_cur;
+
 	struct xfs_scrub	*sc;
 };
 
@@ -477,6 +483,41 @@ xrep_agfl_walk_rmap(
 	return xbitmap_set_btcur_path(&ra->agmetablocks, cur);
 }
 
+/* Strike out the blocks that are cross-linked according to the rmapbt. */
+STATIC int
+xrep_agfl_check_extent(
+	struct xrep_agfl	*ra,
+	uint64_t		start,
+	uint64_t		len)
+{
+	xfs_agblock_t		agbno = XFS_FSB_TO_AGBNO(ra->sc->mp, start);
+	xfs_agblock_t		last_agbno = agbno + len - 1;
+	int			error;
+
+	ASSERT(XFS_FSB_TO_AGNO(ra->sc->mp, start) == ra->sc->sa.pag->pag_agno);
+
+	while (agbno <= last_agbno) {
+		bool		other_owners;
+
+		error = xfs_rmap_has_other_keys(ra->rmap_cur, agbno, 1,
+				&XFS_RMAP_OINFO_AG, &other_owners);
+		if (error)
+			return error;
+
+		if (other_owners) {
+			error = xbitmap_set(&ra->crossed, agbno, 1);
+			if (error)
+				return error;
+		}
+
+		if (xchk_should_terminate(ra->sc, &error))
+			return error;
+		agbno++;
+	}
+
+	return 0;
+}
+
 /*
  * Map out all the non-AGFL OWN_AG space in this AG so that we can deduce
  * which blocks belong to the AGFL.
@@ -496,44 +537,58 @@ xrep_agfl_collect_blocks(
 	struct xrep_agfl	ra;
 	struct xfs_mount	*mp = sc->mp;
 	struct xfs_btree_cur	*cur;
+	struct xbitmap_range	*br, *n;
 	int			error;
 
 	ra.sc = sc;
 	ra.freesp = agfl_extents;
 	xbitmap_init(&ra.agmetablocks);
+	xbitmap_init(&ra.crossed);
 
 	/* Find all space used by the free space btrees & rmapbt. */
 	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
 	error = xfs_rmap_query_all(cur, xrep_agfl_walk_rmap, &ra);
-	if (error)
-		goto err;
 	xfs_btree_del_cursor(cur, error);
+	if (error)
+		goto out_bmp;
 
 	/* Find all blocks currently being used by the bnobt. */
 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
 			sc->sa.pag, XFS_BTNUM_BNO);
 	error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
-	if (error)
-		goto err;
 	xfs_btree_del_cursor(cur, error);
+	if (error)
+		goto out_bmp;
 
 	/* Find all blocks currently being used by the cntbt. */
 	cur = xfs_allocbt_init_cursor(mp, sc->tp, agf_bp,
 			sc->sa.pag, XFS_BTNUM_CNT);
 	error = xbitmap_set_btblocks(&ra.agmetablocks, cur);
-	if (error)
-		goto err;
-
 	xfs_btree_del_cursor(cur, error);
+	if (error)
+		goto out_bmp;
 
 	/*
 	 * Drop the freesp meta blocks that are in use by btrees.
 	 * The remaining blocks /should/ be AGFL blocks.
 	 */
 	error = xbitmap_disunion(agfl_extents, &ra.agmetablocks);
-	xbitmap_destroy(&ra.agmetablocks);
 	if (error)
-		return error;
+		goto out_bmp;
+
+	/* Strike out the blocks that are cross-linked. */
+	ra.rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, sc->sa.pag);
+	for_each_xbitmap_extent(br, n, agfl_extents) {
+		error = xrep_agfl_check_extent(&ra, br->start, br->len);
+		if (error)
+			break;
+	}
+	xfs_btree_del_cursor(ra.rmap_cur, error);
+	if (error)
+		goto out_bmp;
+	error = xbitmap_disunion(agfl_extents, &ra.crossed);
+	if (error)
+		goto out_bmp;
 
 	/*
 	 * Calculate the new AGFL size.  If we found more blocks than fit in
@@ -541,11 +596,10 @@ xrep_agfl_collect_blocks(
 	 */
 	*flcount = min_t(uint64_t, xbitmap_hweight(agfl_extents),
 			 xfs_agfl_size(mp));
-	return 0;
 
-err:
+out_bmp:
+	xbitmap_destroy(&ra.crossed);
 	xbitmap_destroy(&ra.agmetablocks);
-	xfs_btree_del_cursor(cur, error);
 	return error;
 }
 
-- 
GitLab


From a7a0f9a5503f4da3b6489583ce4ef9abc0ab2475 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:16 -0800
Subject: [PATCH 1622/4122] xfs: return EINTR when a fatal signal terminates
 scrub

If the program calling online fsck is terminated with a fatal signal,
bail out to userspace by returning EINTR, not EAGAIN.  EAGAIN is used by
scrubbers to indicate that we should try again with more resources
locked, and not to indicate that the operation was cancelled.  The
miswiring is mostly harmless, but it shows up in the trace data.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 454145db10e71..b73648d81d230 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -25,7 +25,7 @@ xchk_should_terminate(
 
 	if (fatal_signal_pending(current)) {
 		if (*error == 0)
-			*error = -EAGAIN;
+			*error = -EINTR;
 		return true;
 	}
 	return false;
-- 
GitLab


From 0a713bd41ea2b19904232b9c5278012c4361bc04 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:17 -0800
Subject: [PATCH 1623/4122] xfs: fix return code when fatal signal encountered
 during dquot scrub

If the scrub process is sent a fatal signal while we're checking dquots,
the predicate for this will set the error code to -EINTR.  Don't then
squash that into -ECANCELED, because the wrong errno turns up in the
trace output.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/quota.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 21b4c90068591..0b643ff32b222 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -84,7 +84,7 @@ xchk_quota_item(
 	int			error = 0;
 
 	if (xchk_should_terminate(sc, &error))
-		return -ECANCELED;
+		return error;
 
 	/*
 	 * Except for the root dquot, the actual dquot we got must either have
-- 
GitLab


From fcd2a43488d5a211aec94e28369b2a72c28258a2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:15 -0800
Subject: [PATCH 1624/4122] xfs: initialize the check_owner object fully

Initialize the check_owner list head so that we don't corrupt the list.
Reduce the scope of the object pointer.

Fixes: 858333dcf021 ("xfs: check btree block ownership with bnobt/rmapbt when scrubbing btree")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/btree.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 2f4519590dc14..075ff3071122e 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -408,7 +408,6 @@ xchk_btree_check_owner(
 	struct xfs_buf		*bp)
 {
 	struct xfs_btree_cur	*cur = bs->cur;
-	struct check_owner	*co;
 
 	/*
 	 * In theory, xfs_btree_get_block should only give us a null buffer
@@ -431,10 +430,14 @@ xchk_btree_check_owner(
 	 * later scanning.
 	 */
 	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
+		struct check_owner	*co;
+
 		co = kmem_alloc(sizeof(struct check_owner),
 				KM_MAYFAIL);
 		if (!co)
 			return -ENOMEM;
+
+		INIT_LIST_HEAD(&co->list);
 		co->level = level;
 		co->daddr = xfs_buf_daddr(bp);
 		list_add_tail(&co->list, &bs->to_check);
-- 
GitLab


From 6bf2f87915970160ded16c310e2e8887deff97a2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:17 -0800
Subject: [PATCH 1625/4122] xfs: don't retry repairs harder when EAGAIN is
 returned

Repair functions will not return EAGAIN -- if they were not able to
obtain resources, they should return EDEADLOCK (like the rest of online
fsck) to signal that we need to grab all the resources and try again.
Hence we don't need to deal with this case except as a debugging
assertion.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/repair.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 22335619c84e7..7323bd9fddfb2 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -61,7 +61,6 @@ xrep_attempt(
 		sc->flags |= XREP_ALREADY_FIXED;
 		return -EAGAIN;
 	case -EDEADLOCK:
-	case -EAGAIN:
 		/* Tell the caller to try again having grabbed all the locks. */
 		if (!(sc->flags & XCHK_TRY_HARDER)) {
 			sc->flags |= XCHK_TRY_HARDER;
@@ -74,6 +73,11 @@ xrep_attempt(
 		 */
 		return -EFSCORRUPTED;
 	default:
+		/*
+		 * EAGAIN tells the caller to re-scrub, so we cannot return
+		 * that here.
+		 */
+		ASSERT(error != -EAGAIN);
 		return error;
 	}
 }
-- 
GitLab


From 306195f355bbdcc3eff6cffac05bcd93a5e419ed Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:16 -0800
Subject: [PATCH 1626/4122] xfs: pivot online scrub away from kmem.[ch]

Convert all the online scrub code to use the Linux slab allocator
functions directly instead of going through the kmem wrappers.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/agheader_repair.c |  2 +-
 fs/xfs/scrub/attr.c            |  2 +-
 fs/xfs/scrub/bitmap.c          | 11 ++++++-----
 fs/xfs/scrub/btree.c           |  9 ++++-----
 fs/xfs/scrub/dabtree.c         |  4 ++--
 fs/xfs/scrub/fscounters.c      |  2 +-
 fs/xfs/scrub/refcount.c        | 12 ++++++------
 fs/xfs/scrub/scrub.c           |  6 +++---
 8 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 82ceb60ea5fcd..d75d82151eeba 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -685,7 +685,7 @@ xrep_agfl_init_header(
 		if (br->len)
 			break;
 		list_del(&br->list);
-		kmem_free(br);
+		kfree(br);
 	}
 
 	/* Write new AGFL to disk. */
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 11b2593a2be79..31529b9bf3891 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -49,7 +49,7 @@ xchk_setup_xattr_buf(
 	if (ab) {
 		if (sz <= ab->sz)
 			return 0;
-		kmem_free(ab);
+		kvfree(ab);
 		sc->buf = NULL;
 	}
 
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index b89bf9de9b1c5..a255f09e9f0a6 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -10,6 +10,7 @@
 #include "xfs_trans_resv.h"
 #include "xfs_mount.h"
 #include "xfs_btree.h"
+#include "scrub/scrub.h"
 #include "scrub/bitmap.h"
 
 /*
@@ -25,7 +26,7 @@ xbitmap_set(
 {
 	struct xbitmap_range	*bmr;
 
-	bmr = kmem_alloc(sizeof(struct xbitmap_range), KM_MAYFAIL);
+	bmr = kmalloc(sizeof(struct xbitmap_range), XCHK_GFP_FLAGS);
 	if (!bmr)
 		return -ENOMEM;
 
@@ -47,7 +48,7 @@ xbitmap_destroy(
 
 	for_each_xbitmap_extent(bmr, n, bitmap) {
 		list_del(&bmr->list);
-		kmem_free(bmr);
+		kfree(bmr);
 	}
 }
 
@@ -174,15 +175,15 @@ xbitmap_disunion(
 			/* Total overlap, just delete ex. */
 			lp = lp->next;
 			list_del(&br->list);
-			kmem_free(br);
+			kfree(br);
 			break;
 		case 0:
 			/*
 			 * Deleting from the middle: add the new right extent
 			 * and then shrink the left extent.
 			 */
-			new_br = kmem_alloc(sizeof(struct xbitmap_range),
-					KM_MAYFAIL);
+			new_br = kmalloc(sizeof(struct xbitmap_range),
+					XCHK_GFP_FLAGS);
 			if (!new_br) {
 				error = -ENOMEM;
 				goto out;
diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
index 075ff3071122e..0fd36d5b46467 100644
--- a/fs/xfs/scrub/btree.c
+++ b/fs/xfs/scrub/btree.c
@@ -432,8 +432,7 @@ xchk_btree_check_owner(
 	if (cur->bc_btnum == XFS_BTNUM_BNO || cur->bc_btnum == XFS_BTNUM_RMAP) {
 		struct check_owner	*co;
 
-		co = kmem_alloc(sizeof(struct check_owner),
-				KM_MAYFAIL);
+		co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS);
 		if (!co)
 			return -ENOMEM;
 
@@ -652,7 +651,7 @@ xchk_btree(
 		xchk_btree_set_corrupt(sc, cur, 0);
 		return 0;
 	}
-	bs = kmem_zalloc(cur_sz, KM_NOFS | KM_MAYFAIL);
+	bs = kzalloc(cur_sz, XCHK_GFP_FLAGS);
 	if (!bs)
 		return -ENOMEM;
 	bs->cur = cur;
@@ -743,9 +742,9 @@ out:
 			error = xchk_btree_check_block_owner(bs, co->level,
 					co->daddr);
 		list_del(&co->list);
-		kmem_free(co);
+		kfree(co);
 	}
-	kmem_free(bs);
+	kfree(bs);
 
 	return error;
 }
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 84fe3d33d6995..d17cee1770854 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -486,7 +486,7 @@ xchk_da_btree(
 		return 0;
 
 	/* Set up initial da state. */
-	ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL);
+	ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS);
 	if (!ds)
 		return -ENOMEM;
 	ds->dargs.dp = sc->ip;
@@ -591,6 +591,6 @@ out:
 
 out_state:
 	xfs_da_state_free(ds->state);
-	kmem_free(ds);
+	kfree(ds);
 	return error;
 }
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 6a6f8fe7f87c0..3c56f5890da4f 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -116,7 +116,7 @@ xchk_setup_fscounters(
 	struct xchk_fscounters	*fsc;
 	int			error;
 
-	sc->buf = kmem_zalloc(sizeof(struct xchk_fscounters), 0);
+	sc->buf = kzalloc(sizeof(struct xchk_fscounters), XCHK_GFP_FLAGS);
 	if (!sc->buf)
 		return -ENOMEM;
 	fsc = sc->buf;
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index a26ee0f24ef2a..d9c1b3cea4a52 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -127,8 +127,8 @@ xchk_refcountbt_rmap_check(
 		 * is healthy each rmap_irec we see will be in agbno order
 		 * so we don't need insertion sort here.
 		 */
-		frag = kmem_alloc(sizeof(struct xchk_refcnt_frag),
-				KM_MAYFAIL);
+		frag = kmalloc(sizeof(struct xchk_refcnt_frag),
+				XCHK_GFP_FLAGS);
 		if (!frag)
 			return -ENOMEM;
 		memcpy(&frag->rm, rec, sizeof(frag->rm));
@@ -215,7 +215,7 @@ xchk_refcountbt_process_rmap_fragments(
 				continue;
 			}
 			list_del(&frag->list);
-			kmem_free(frag);
+			kfree(frag);
 			nr++;
 		}
 
@@ -257,11 +257,11 @@ done:
 	/* Delete fragments and work list. */
 	list_for_each_entry_safe(frag, n, &worklist, list) {
 		list_del(&frag->list);
-		kmem_free(frag);
+		kfree(frag);
 	}
 	list_for_each_entry_safe(frag, n, &refchk->fragments, list) {
 		list_del(&frag->list);
-		kmem_free(frag);
+		kfree(frag);
 	}
 }
 
@@ -306,7 +306,7 @@ xchk_refcountbt_xref_rmap(
 out_free:
 	list_for_each_entry_safe(frag, n, &refchk.fragments, list) {
 		list_del(&frag->list);
-		kmem_free(frag);
+		kfree(frag);
 	}
 }
 
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 2e8e400f10a9a..07a7a75f987fc 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -174,7 +174,7 @@ xchk_teardown(
 	if (sc->flags & XCHK_REAPING_DISABLED)
 		xchk_start_reaping(sc);
 	if (sc->buf) {
-		kmem_free(sc->buf);
+		kvfree(sc->buf);
 		sc->buf = NULL;
 	}
 	return error;
@@ -467,7 +467,7 @@ xfs_scrub_metadata(
 	xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
  "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
 
-	sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL);
+	sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
 	if (!sc) {
 		error = -ENOMEM;
 		goto out;
@@ -557,7 +557,7 @@ out_nofix:
 out_teardown:
 	error = xchk_teardown(sc, error);
 out_sc:
-	kmem_free(sc);
+	kfree(sc);
 out:
 	trace_xchk_done(XFS_I(file_inode(file)), sm, error);
 	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
-- 
GitLab


From 9e13975bb0620c2bfa1a4d2943e7eb8514f7708e Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:18 -0800
Subject: [PATCH 1627/4122] xfs: load rtbitmap and rtsummary extent mapping
 btrees at mount time

It turns out that GETFSMAP and online fsck have had a bug for years due
to their use of ILOCK_SHARED to coordinate their linear scans of the
realtime bitmap.  If the bitmap file's data fork happens to be in BTREE
format and the scan occurs immediately after mounting, the incore bmbt
will not be populated, leading to ASSERTs tripping over the incorrect
inode state.  Because the bitmap scans always lock bitmap buffers in
increasing order of file offset, it is appropriate for these two callers
to take a shared ILOCK to improve scalability.

To fix this problem, load both data and attr fork state into memory when
mounting the realtime inodes.  Realtime metadata files aren't supposed
to have an attr fork so the second step is likely a nop.

On most filesystems this is unlikely since the rtbitmap data fork is
usually in extents format, but it's possible to craft a filesystem that
will by fragmenting the free space in the data section and growfsing the
rt section.

Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap")
Also-Fixes: 46d9bfb5e706 ("xfs: cross-reference the realtime bitmap")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_rtalloc.c | 56 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 52 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 292d5e54a92cc..b0846204c436d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1325,6 +1325,41 @@ xfs_rtalloc_reinit_frextents(
 	return 0;
 }
 
+/*
+ * Read in the bmbt of an rt metadata inode so that we never have to load them
+ * at runtime.  This enables the use of shared ILOCKs for rtbitmap scans.  Use
+ * an empty transaction to avoid deadlocking on loops in the bmbt.
+ */
+static inline int
+xfs_rtmount_iread_extents(
+	struct xfs_inode	*ip,
+	unsigned int		lock_class)
+{
+	struct xfs_trans	*tp;
+	int			error;
+
+	error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+	if (error)
+		return error;
+
+	xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
+
+	error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+	if (error)
+		goto out_unlock;
+
+	if (xfs_inode_has_attr_fork(ip)) {
+		error = xfs_iread_extents(tp, ip, XFS_ATTR_FORK);
+		if (error)
+			goto out_unlock;
+	}
+
+out_unlock:
+	xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
+	xfs_trans_cancel(tp);
+	return error;
+}
+
 /*
  * Get the bitmap and summary inodes and the summary cache into the mount
  * structure at mount time.
@@ -1342,14 +1377,27 @@ xfs_rtmount_inodes(
 		return error;
 	ASSERT(mp->m_rbmip != NULL);
 
+	error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
+	if (error)
+		goto out_rele_bitmap;
+
 	error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
-	if (error) {
-		xfs_irele(mp->m_rbmip);
-		return error;
-	}
+	if (error)
+		goto out_rele_bitmap;
 	ASSERT(mp->m_rsumip != NULL);
+
+	error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
+	if (error)
+		goto out_rele_summary;
+
 	xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
 	return 0;
+
+out_rele_summary:
+	xfs_irele(mp->m_rsumip);
+out_rele_bitmap:
+	xfs_irele(mp->m_rbmip);
+	return error;
 }
 
 void
-- 
GitLab


From 11f97e684583469fc342a561387cc44fac4f9b1f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:19 -0800
Subject: [PATCH 1628/4122] xfs: skip fscounters comparisons when the scan is
 incomplete

If any part of the per-AG summary counter scan loop aborts without
collecting all of the data we need, the scrubber's observation data will
be invalid.  Set the incomplete flag so that we abort the scrub without
reporting false corruptions.  Document the data dependency here too.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/fscounters.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 3c56f5890da4f..eeb36ac2c6d2c 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -138,6 +138,18 @@ xchk_setup_fscounters(
 	return xchk_trans_alloc(sc, 0);
 }
 
+/*
+ * Part 1: Collecting filesystem summary counts.  For each AG, we add its
+ * summary counts (total inodes, free inodes, free data blocks) to an incore
+ * copy of the overall filesystem summary counts.
+ *
+ * To avoid false corruption reports in part 2, any failure in this part must
+ * set the INCOMPLETE flag even when a negative errno is returned.  This care
+ * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED,
+ * ECANCELED) that are absorbed into a scrub state flag update by
+ * xchk_*_process_error.
+ */
+
 /* Count free space btree blocks manually for pre-lazysbcount filesystems. */
 static int
 xchk_fscount_btreeblks(
@@ -225,8 +237,10 @@ retry:
 	}
 	if (pag)
 		xfs_perag_put(pag);
-	if (error)
+	if (error) {
+		xchk_set_incomplete(sc);
 		return error;
+	}
 
 	/*
 	 * The global incore space reservation is taken from the incore
@@ -267,6 +281,11 @@ retry:
 	return 0;
 }
 
+/*
+ * Part 2: Comparing filesystem summary counters.  All we have to do here is
+ * sum the percpu counters and compare them to what we've observed.
+ */
+
 /*
  * Is the @counter reasonably close to the @expected value?
  *
-- 
GitLab


From 93b0c58ed04b6cbe45354f23bb5628fff31f9084 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:17 -0800
Subject: [PATCH 1629/4122] xfs: don't return -EFSCORRUPTED from repair when
 resources cannot be grabbed

If we tried to repair something but the repair failed with -EDEADLOCK,
that means that the repair function couldn't grab some resource it
needed and wants us to try again.  If we try again (with TRY_HARDER) but
still can't get all the resources we need, the repair fails and errors
remain on the filesystem.

Right now, repair returns the -EDEADLOCK to the caller as -EFSCORRUPTED,
which results in XFS_SCRUB_OFLAG_CORRUPT being passed out to userspace.
This is not correct because repair has not determined that anything is
corrupt.  If the repair had been invoked on an object that could be
optimized but wasn't corrupt (OFLAG_PREEN), the inability to grab
resources will be reported to userspace as corrupt metadata, and users
will be unnecessarily alarmed that their suboptimal metadata turned into
a corruption.

Fix this by returning zero so that the results of the actual scrub will
be copied back out to userspace.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/repair.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 7323bd9fddfb2..4b92f9253ccd2 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -69,9 +69,9 @@ xrep_attempt(
 		/*
 		 * We tried harder but still couldn't grab all the resources
 		 * we needed to fix it.  The corruption has not been fixed,
-		 * so report back to userspace.
+		 * so exit to userspace with the scan's output flags unchanged.
 		 */
-		return -EFSCORRUPTED;
+		return 0;
 	default:
 		/*
 		 * EAGAIN tells the caller to re-scrub, so we cannot return
-- 
GitLab


From 5f369dc5b4eb2becbdfd08924dcaf00e391f4ea1 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:18 -0800
Subject: [PATCH 1630/4122] xfs: make rtbitmap ILOCKing consistent when
 scanning the rt bitmap file

xfs_rtalloc_query_range scans the realtime bitmap file in order of
increasing file offset, so this caller can take ILOCK_SHARED on the rt
bitmap inode instead of ILOCK_EXCL.  This isn't going to yield any
practical benefits at mount time, but we'd like to make the locking
usage consistent around xfs_rtalloc_query_all calls.  Make all the
places we do this use the same xfs_ilock lockflags for consistency.

Fixes: 4c934c7dd60c ("xfs: report realtime space information via the rtbitmap")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_fsmap.c   | 4 ++--
 fs/xfs/xfs_rtalloc.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index d8337274c74d0..88a88506ffffe 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -524,7 +524,7 @@ xfs_getfsmap_rtdev_rtbitmap_query(
 	struct xfs_mount		*mp = tp->t_mountp;
 	int				error;
 
-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED);
+	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 
 	/*
 	 * Set up query parameters to return free rtextents covering the range
@@ -551,7 +551,7 @@ xfs_getfsmap_rtdev_rtbitmap_query(
 	if (error)
 		goto err;
 err:
-	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED);
+	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index b0846204c436d..16534e9873f69 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1311,10 +1311,10 @@ xfs_rtalloc_reinit_frextents(
 	uint64_t		val = 0;
 	int			error;
 
-	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
+	xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 	error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent,
 			&val);
-	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL);
+	xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
 	if (error)
 		return error;
 
-- 
GitLab


From e74331d6fa2c21a8ecccfe0648dad5193b83defe Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:19 -0800
Subject: [PATCH 1631/4122] xfs: online checking of the free rt extent count

Teach the summary count checker to count the number of free realtime
extents and compare that to the superblock copy.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/fscounters.c | 86 ++++++++++++++++++++++++++++++++++++++-
 fs/xfs/scrub/scrub.h      |  8 ----
 2 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index eeb36ac2c6d2c..4777e7b89fdc6 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -14,6 +14,8 @@
 #include "xfs_health.h"
 #include "xfs_btree.h"
 #include "xfs_ag.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 #include "scrub/trace.h"
@@ -43,6 +45,16 @@
  * our tolerance for mismatch between expected and actual counter values.
  */
 
+struct xchk_fscounters {
+	struct xfs_scrub	*sc;
+	uint64_t		icount;
+	uint64_t		ifree;
+	uint64_t		fdblocks;
+	uint64_t		frextents;
+	unsigned long long	icount_min;
+	unsigned long long	icount_max;
+};
+
 /*
  * Since the expected value computation is lockless but only browses incore
  * values, the percpu counters should be fairly close to each other.  However,
@@ -120,6 +132,7 @@ xchk_setup_fscounters(
 	if (!sc->buf)
 		return -ENOMEM;
 	fsc = sc->buf;
+	fsc->sc = sc;
 
 	xfs_icount_range(sc->mp, &fsc->icount_min, &fsc->icount_max);
 
@@ -281,6 +294,59 @@ retry:
 	return 0;
 }
 
+#ifdef CONFIG_XFS_RT
+STATIC int
+xchk_fscount_add_frextent(
+	struct xfs_mount		*mp,
+	struct xfs_trans		*tp,
+	const struct xfs_rtalloc_rec	*rec,
+	void				*priv)
+{
+	struct xchk_fscounters		*fsc = priv;
+	int				error = 0;
+
+	fsc->frextents += rec->ar_extcount;
+
+	xchk_should_terminate(fsc->sc, &error);
+	return error;
+}
+
+/* Calculate the number of free realtime extents from the realtime bitmap. */
+STATIC int
+xchk_fscount_count_frextents(
+	struct xfs_scrub	*sc,
+	struct xchk_fscounters	*fsc)
+{
+	struct xfs_mount	*mp = sc->mp;
+	int			error;
+
+	fsc->frextents = 0;
+	if (!xfs_has_realtime(mp))
+		return 0;
+
+	xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+	error = xfs_rtalloc_query_all(sc->mp, sc->tp,
+			xchk_fscount_add_frextent, fsc);
+	if (error) {
+		xchk_set_incomplete(sc);
+		goto out_unlock;
+	}
+
+out_unlock:
+	xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+	return error;
+}
+#else
+STATIC int
+xchk_fscount_count_frextents(
+	struct xfs_scrub	*sc,
+	struct xchk_fscounters	*fsc)
+{
+	fsc->frextents = 0;
+	return 0;
+}
+#endif /* CONFIG_XFS_RT */
+
 /*
  * Part 2: Comparing filesystem summary counters.  All we have to do here is
  * sum the percpu counters and compare them to what we've observed.
@@ -352,16 +418,17 @@ xchk_fscounters(
 {
 	struct xfs_mount	*mp = sc->mp;
 	struct xchk_fscounters	*fsc = sc->buf;
-	int64_t			icount, ifree, fdblocks;
+	int64_t			icount, ifree, fdblocks, frextents;
 	int			error;
 
 	/* Snapshot the percpu counters. */
 	icount = percpu_counter_sum(&mp->m_icount);
 	ifree = percpu_counter_sum(&mp->m_ifree);
 	fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+	frextents = percpu_counter_sum(&mp->m_frextents);
 
 	/* No negative values, please! */
-	if (icount < 0 || ifree < 0 || fdblocks < 0)
+	if (icount < 0 || ifree < 0 || fdblocks < 0 || frextents < 0)
 		xchk_set_corrupt(sc);
 
 	/* See if icount is obviously wrong. */
@@ -372,6 +439,10 @@ xchk_fscounters(
 	if (fdblocks > mp->m_sb.sb_dblocks)
 		xchk_set_corrupt(sc);
 
+	/* See if frextents is obviously wrong. */
+	if (frextents > mp->m_sb.sb_rextents)
+		xchk_set_corrupt(sc);
+
 	/*
 	 * If ifree exceeds icount by more than the minimum variance then
 	 * something's probably wrong with the counters.
@@ -386,6 +457,13 @@ xchk_fscounters(
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
 		return 0;
 
+	/* Count the free extents counter for rt volumes. */
+	error = xchk_fscount_count_frextents(sc, fsc);
+	if (!xchk_process_error(sc, 0, XFS_SB_BLOCK(mp), &error))
+		return error;
+	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE)
+		return 0;
+
 	/* Compare the in-core counters with whatever we counted. */
 	if (!xchk_fscount_within_range(sc, icount, &mp->m_icount, fsc->icount))
 		xchk_set_corrupt(sc);
@@ -397,5 +475,9 @@ xchk_fscounters(
 			fsc->fdblocks))
 		xchk_set_corrupt(sc);
 
+	if (!xchk_fscount_within_range(sc, frextents, &mp->m_frextents,
+			fsc->frextents))
+		xchk_set_corrupt(sc);
+
 	return 0;
 }
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index a0f097b8acb0a..b4d391b4c9385 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -169,12 +169,4 @@ void xchk_xref_is_used_rt_space(struct xfs_scrub *sc, xfs_rtblock_t rtbno,
 # define xchk_xref_is_used_rt_space(sc, rtbno, len) do { } while (0)
 #endif
 
-struct xchk_fscounters {
-	uint64_t		icount;
-	uint64_t		ifree;
-	uint64_t		fdblocks;
-	unsigned long long	icount_min;
-	unsigned long long	icount_max;
-};
-
 #endif	/* __XFS_SCRUB_SCRUB_H__ */
-- 
GitLab


From 033985b6fe875a7a971cf4e3941e1f3085ba037c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:19 -0800
Subject: [PATCH 1632/4122] xfs: fix perag loop in xchk_bmap_check_rmaps

sparse complains that we can return an uninitialized error from this
function and that pag could be uninitialized.  We know that there are no
zero-AG filesystems and hence we had to call xchk_bmap_check_ag_rmaps at
least once, so this is not actually possible, but I'm too worn out from
automated complaints from unsophisticated AIs so let's just fix this and
move on to more interesting problems, eh?

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/bmap.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index f0b9cb6506fdd..cb203e083a4c8 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -599,14 +599,14 @@ xchk_bmap_check_rmaps(
 
 	for_each_perag(sc->mp, agno, pag) {
 		error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
-		if (error)
-			break;
-		if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-			break;
+		if (error ||
+		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
+			xfs_perag_put(pag);
+			return error;
+		}
 	}
-	if (pag)
-		xfs_perag_put(pag);
-	return error;
+
+	return 0;
 }
 
 /*
-- 
GitLab


From 6a5777865eebee1b53d7ae0fd2fa9ec2c6596df6 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:20 -0800
Subject: [PATCH 1633/4122] xfs: teach scrub to check for adjacent bmaps when
 rmap larger than bmap

When scrub is checking file fork mappings against rmap records and
the rmap record starts before or ends after the bmap record, check the
adjacent bmap records to make sure that they're adjacent to the one
we're checking.  This helps us to detect cases where the rmaps cover
territory that the bmaps do not.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/bmap.c | 74 +++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 72 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index cb203e083a4c8..a4a156dcaa8a0 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -90,6 +90,7 @@ out:
 
 struct xchk_bmap_info {
 	struct xfs_scrub	*sc;
+	struct xfs_iext_cursor	icur;
 	xfs_fileoff_t		lastoff;
 	bool			is_rt;
 	bool			is_shared;
@@ -146,6 +147,48 @@ xchk_bmap_get_rmap(
 	return has_rmap;
 }
 
+static inline bool
+xchk_bmap_has_prev(
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec)
+{
+	struct xfs_bmbt_irec	got;
+	struct xfs_ifork	*ifp;
+
+	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
+
+	if (!xfs_iext_peek_prev_extent(ifp, &info->icur, &got))
+		return false;
+	if (got.br_startoff + got.br_blockcount != irec->br_startoff)
+		return false;
+	if (got.br_startblock + got.br_blockcount != irec->br_startblock)
+		return false;
+	if (got.br_state != irec->br_state)
+		return false;
+	return true;
+}
+
+static inline bool
+xchk_bmap_has_next(
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec)
+{
+	struct xfs_bmbt_irec	got;
+	struct xfs_ifork	*ifp;
+
+	ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork);
+
+	if (!xfs_iext_peek_next_extent(ifp, &info->icur, &got))
+		return false;
+	if (irec->br_startoff + irec->br_blockcount != got.br_startoff)
+		return false;
+	if (irec->br_startblock + irec->br_blockcount != got.br_startblock)
+		return false;
+	if (got.br_state != irec->br_state)
+		return false;
+	return true;
+}
+
 /* Make sure that we have rmapbt records for this extent. */
 STATIC void
 xchk_bmap_xref_rmap(
@@ -214,6 +257,34 @@ xchk_bmap_xref_rmap(
 	if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK)
 		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
+
+	/*
+	 * If the rmap starts before this bmbt record, make sure there's a bmbt
+	 * record for the previous offset that is contiguous with this mapping.
+	 * Skip this for CoW fork extents because the refcount btree (and not
+	 * the inode) is the ondisk owner for those extents.
+	 */
+	if (info->whichfork != XFS_COW_FORK && rmap.rm_startblock < agbno &&
+	    !xchk_bmap_has_prev(info, irec)) {
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+		return;
+	}
+
+	/*
+	 * If the rmap ends after this bmbt record, make sure there's a bmbt
+	 * record for the next offset that is contiguous with this mapping.
+	 * Skip this for CoW fork extents because the refcount btree (and not
+	 * the inode) is the ondisk owner for those extents.
+	 */
+	rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount;
+	if (info->whichfork != XFS_COW_FORK &&
+	    rmap_end > agbno + irec->br_blockcount &&
+	    !xchk_bmap_has_next(info, irec)) {
+		xchk_fblock_xref_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+		return;
+	}
 }
 
 /* Cross-reference a single rtdev extent record. */
@@ -626,7 +697,6 @@ xchk_bmap(
 	struct xfs_inode	*ip = sc->ip;
 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
 	xfs_fileoff_t		endoff;
-	struct xfs_iext_cursor	icur;
 	int			error = 0;
 
 	/* Non-existent forks can be ignored. */
@@ -690,7 +760,7 @@ xchk_bmap(
 	/* Scrub extent records. */
 	info.lastoff = 0;
 	ifp = xfs_ifork_ptr(ip, whichfork);
-	for_each_xfs_iext(ifp, &icur, &irec) {
+	for_each_xfs_iext(ifp, &info.icur, &irec) {
 		if (xchk_should_terminate(sc, &error) ||
 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 			goto out;
-- 
GitLab


From 830ffa09fb130d31f111848a75b65506fdac623d Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:20 -0800
Subject: [PATCH 1634/4122] xfs: block map scrub should handle incore delalloc
 reservations

Enhance the block map scrubber to check delayed allocation reservations.
Though there are no physical space allocations to check, we do need to
make sure that the range of file offsets being mapped are correct, and
to bump the lastoff cursor so that key order checking works correctly.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/bmap.c | 55 +++++++++++++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index a4a156dcaa8a0..fa8f22ed057a7 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -368,14 +368,13 @@ xchk_bmap_dirattr_extent(
 }
 
 /* Scrub a single extent record. */
-STATIC int
+STATIC void
 xchk_bmap_iextent(
 	struct xfs_inode	*ip,
 	struct xchk_bmap_info	*info,
 	struct xfs_bmbt_irec	*irec)
 {
 	struct xfs_mount	*mp = info->sc->mp;
-	int			error = 0;
 
 	/*
 	 * Check for out-of-order extents.  This record could have come
@@ -396,14 +395,6 @@ xchk_bmap_iextent(
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
-	/*
-	 * Check for delalloc extents.  We never iterate the ones in the
-	 * in-core extent scan, and we should never see these in the bmbt.
-	 */
-	if (isnullstartblock(irec->br_startblock))
-		xchk_fblock_set_corrupt(info->sc, info->whichfork,
-				irec->br_startoff);
-
 	/* Make sure the extent points to a valid place. */
 	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
@@ -424,15 +415,12 @@ xchk_bmap_iextent(
 				irec->br_startoff);
 
 	if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
-		return 0;
+		return;
 
 	if (info->is_rt)
 		xchk_bmap_rt_iextent_xref(ip, info, irec);
 	else
 		xchk_bmap_iextent_xref(ip, info, irec);
-
-	info->lastoff = irec->br_startoff + irec->br_blockcount;
-	return error;
 }
 
 /* Scrub a bmbt record. */
@@ -680,6 +668,33 @@ xchk_bmap_check_rmaps(
 	return 0;
 }
 
+/* Scrub a delalloc reservation from the incore extent map tree. */
+STATIC void
+xchk_bmap_iextent_delalloc(
+	struct xfs_inode	*ip,
+	struct xchk_bmap_info	*info,
+	struct xfs_bmbt_irec	*irec)
+{
+	struct xfs_mount	*mp = info->sc->mp;
+
+	/*
+	 * Check for out-of-order extents.  This record could have come
+	 * from the incore list, for which there is no ordering check.
+	 */
+	if (irec->br_startoff < info->lastoff)
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
+	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
+	/* Make sure the extent points to a valid place. */
+	if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN)
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+}
+
 /*
  * Scrub an inode fork's block mappings.
  *
@@ -764,16 +779,18 @@ xchk_bmap(
 		if (xchk_should_terminate(sc, &error) ||
 		    (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 			goto out;
-		if (isnullstartblock(irec.br_startblock))
-			continue;
+
 		if (irec.br_startoff >= endoff) {
 			xchk_fblock_set_corrupt(sc, whichfork,
 					irec.br_startoff);
 			goto out;
 		}
-		error = xchk_bmap_iextent(ip, &info, &irec);
-		if (error)
-			goto out;
+
+		if (isnullstartblock(irec.br_startblock))
+			xchk_bmap_iextent_delalloc(ip, &info, &irec);
+		else
+			xchk_bmap_iextent(ip, &info, &irec);
+		info.lastoff = irec.br_startoff + irec.br_blockcount;
 	}
 
 	error = xchk_bmap_check_rmaps(sc, whichfork);
-- 
GitLab


From f23c40443d1c2af87c99c1424f9e43bbd7307f92 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:21 -0800
Subject: [PATCH 1635/4122] xfs: check quota files for unwritten extents

Teach scrub to flag quota files containing unwritten extents.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/quota.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/scrub/quota.c b/fs/xfs/scrub/quota.c
index 0b643ff32b222..9eeac85653948 100644
--- a/fs/xfs/scrub/quota.c
+++ b/fs/xfs/scrub/quota.c
@@ -14,6 +14,7 @@
 #include "xfs_inode.h"
 #include "xfs_quota.h"
 #include "xfs_qm.h"
+#include "xfs_bmap.h"
 #include "scrub/scrub.h"
 #include "scrub/common.h"
 
@@ -189,11 +190,12 @@ xchk_quota_data_fork(
 	for_each_xfs_iext(ifp, &icur, &irec) {
 		if (xchk_should_terminate(sc, &error))
 			break;
+
 		/*
-		 * delalloc extents or blocks mapped above the highest
+		 * delalloc/unwritten extents or blocks mapped above the highest
 		 * quota id shouldn't happen.
 		 */
-		if (isnullstartblock(irec.br_startblock) ||
+		if (!xfs_bmap_is_written_extent(&irec) ||
 		    irec.br_startoff > max_dqid_off ||
 		    irec.br_startoff + irec.br_blockcount - 1 > max_dqid_off) {
 			xchk_fblock_set_corrupt(sc, XFS_DATA_FORK,
-- 
GitLab


From 31785537010a91a0d1dc403e5d049a38a3d4a30b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:21 -0800
Subject: [PATCH 1636/4122] xfs: check that CoW fork extents are not shared

Ensure that extents in an inode's CoW fork are not marked as shared in
the refcount btree.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/bmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index fa8f22ed057a7..4bb6672b02ba1 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -335,6 +335,8 @@ xchk_bmap_iextent_xref(
 	case XFS_COW_FORK:
 		xchk_xref_is_cow_staging(info->sc, agbno,
 				irec->br_blockcount);
+		xchk_xref_is_not_shared(info->sc, agbno,
+				irec->br_blockcount);
 		break;
 	}
 
-- 
GitLab


From 5eef46358fae1a6018d9f886a3ecd30e843728dd Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Sun, 6 Nov 2022 17:03:22 -0800
Subject: [PATCH 1637/4122] xfs: teach scrub to flag non-extents format cow
 forks

CoW forks only exist in memory, which means that they can only ever have
an incore extent tree.  Hence they must always be FMT_EXTENTS, so check
this when we're scrubbing them.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/bmap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 4bb6672b02ba1..d50d0eab196ab 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -748,6 +748,8 @@ xchk_bmap(
 	case XFS_DINODE_FMT_DEV:
 	case XFS_DINODE_FMT_LOCAL:
 		/* No mappings to check. */
+		if (whichfork == XFS_COW_FORK)
+			xchk_fblock_set_corrupt(sc, whichfork, 0);
 		goto out;
 	case XFS_DINODE_FMT_EXTENTS:
 		break;
-- 
GitLab


From bd5ab5f9874109586cbae5bc98e1f9ff574627e2 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 16 Nov 2022 16:08:03 -0800
Subject: [PATCH 1638/4122] xfs: don't warn about files that are exactly
 s_maxbytes long

We can handle files that are exactly s_maxbytes bytes long; we just
can't handle anything larger than that.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 51820b40ab1c8..7a2f38e5202c3 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -365,7 +365,7 @@ xchk_dinode(
 	 * pagecache can't cache all the blocks in this file due to
 	 * overly large offsets, flag the inode for admin review.
 	 */
-	if (isize >= mp->m_super->s_maxbytes)
+	if (isize > mp->m_super->s_maxbytes)
 		xchk_ino_set_warning(sc, ino);
 
 	/* di_nblocks */
-- 
GitLab


From f36b954a1f1bf06b5746fea7ecf0fa639ac65324 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 16 Nov 2022 16:08:03 -0800
Subject: [PATCH 1639/4122] xfs: check inode core when scrubbing metadata files

Metadata files (e.g. realtime bitmaps and quota files) do not show up in
the bulkstat output, which means that scrub-by-handle does not work;
they can only be checked through a specific scrub type.  Therefore, each
scrub type calls xchk_metadata_inode_forks to check the metadata for
whatever's in the file.

Unfortunately, that function doesn't actually check the inode record
itself.  Refactor the function a bit to make that happen.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/scrub/common.c | 40 ++++++++++++++++++++++++++++++++++------
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index ad70f29233c36..613260b04a3dc 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -781,6 +781,33 @@ xchk_buffer_recheck(
 	trace_xchk_block_error(sc, xfs_buf_daddr(bp), fa);
 }
 
+static inline int
+xchk_metadata_inode_subtype(
+	struct xfs_scrub	*sc,
+	unsigned int		scrub_type)
+{
+	__u32			smtype = sc->sm->sm_type;
+	int			error;
+
+	sc->sm->sm_type = scrub_type;
+
+	switch (scrub_type) {
+	case XFS_SCRUB_TYPE_INODE:
+		error = xchk_inode(sc);
+		break;
+	case XFS_SCRUB_TYPE_BMBTD:
+		error = xchk_bmap_data(sc);
+		break;
+	default:
+		ASSERT(0);
+		error = -EFSCORRUPTED;
+		break;
+	}
+
+	sc->sm->sm_type = smtype;
+	return error;
+}
+
 /*
  * Scrub the attr/data forks of a metadata inode.  The metadata inode must be
  * pointed to by sc->ip and the ILOCK must be held.
@@ -789,13 +816,17 @@ int
 xchk_metadata_inode_forks(
 	struct xfs_scrub	*sc)
 {
-	__u32			smtype;
 	bool			shared;
 	int			error;
 
 	if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
 		return 0;
 
+	/* Check the inode record. */
+	error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_INODE);
+	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+		return error;
+
 	/* Metadata inodes don't live on the rt device. */
 	if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) {
 		xchk_ino_set_corrupt(sc, sc->ip->i_ino);
@@ -815,10 +846,7 @@ xchk_metadata_inode_forks(
 	}
 
 	/* Invoke the data fork scrubber. */
-	smtype = sc->sm->sm_type;
-	sc->sm->sm_type = XFS_SCRUB_TYPE_BMBTD;
-	error = xchk_bmap_data(sc);
-	sc->sm->sm_type = smtype;
+	error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
 	if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
 		return error;
 
@@ -833,7 +861,7 @@ xchk_metadata_inode_forks(
 			xchk_ino_set_corrupt(sc, sc->ip->i_ino);
 	}
 
-	return error;
+	return 0;
 }
 
 /*
-- 
GitLab


From 1cec8bbc1764964de24d19983fbf9fee6ce3c09d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Nov 2022 00:23:49 +0000
Subject: [PATCH 1640/4122] KVM: arm64: selftests: Disable single-step with
 correct KVM define

Disable single-step by setting debug.control to KVM_GUESTDBG_ENABLE,
not to SINGLE_STEP_DISABLE.  The latter is an arbitrary test enum that
just happens to have the same value as KVM_GUESTDBG_ENABLE, and so
effectively disables single-step debug.

No functional change intended.

Cc: Reiji Watanabe <reijiw@google.com>
Fixes: b18e4d4aebdd ("KVM: arm64: selftests: Add a test case for KVM_GUESTDBG_SINGLESTEP")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221117002350.2178351-2-seanjc@google.com
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
---
 tools/testing/selftests/kvm/aarch64/debug-exceptions.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 878c334607e1a..0316f225d36a6 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -369,7 +369,7 @@ void test_single_step_from_userspace(int test_cnt)
 						KVM_GUESTDBG_SINGLESTEP;
 				ss_enable = true;
 			} else {
-				debug.control = SINGLE_STEP_DISABLE;
+				debug.control = KVM_GUESTDBG_ENABLE;
 				ss_enable = false;
 			}
 
-- 
GitLab


From b3d937722de0e64eebe267451a0e3d5ed5107ef7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 17 Nov 2022 00:23:50 +0000
Subject: [PATCH 1641/4122] KVM: arm64: selftests: Disable single-step without
 relying on ucall()

Automatically disable single-step when the guest reaches the end of the
verified section instead of using an explicit ucall() to ask userspace to
disable single-step.  An upcoming change to implement a pool-based scheme
for ucall() will add an atomic operation (bit test and set) in the guest
ucall code, and if the compiler generate "old school" atomics, e.g.

  40e57c:       c85f7c20        ldxr    x0, [x1]
  40e580:       aa100011        orr     x17, x0, x16
  40e584:       c80ffc31        stlxr   w15, x17, [x1]
  40e588:       35ffffaf        cbnz    w15, 40e57c <__aarch64_ldset8_sync+0x1c>

the guest will hang as the local exclusive monitor is reset by eret,
i.e. the stlxr will always fail due to the debug exception taken to EL2.

Link: https://lore.kernel.org/all/20221006003409.649993-8-seanjc@google.com
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Marc Zyngier <maz@kernel.org>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221117002350.2178351-3-seanjc@google.com
Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 28 ++++++++++---------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index 0316f225d36a6..a3c2216b65fb3 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -241,7 +241,6 @@ static void guest_svc_handler(struct ex_regs *regs)
 
 enum single_step_op {
 	SINGLE_STEP_ENABLE = 0,
-	SINGLE_STEP_DISABLE = 1,
 };
 
 static void guest_code_ss(int test_cnt)
@@ -258,7 +257,7 @@ static void guest_code_ss(int test_cnt)
 		GUEST_SYNC(SINGLE_STEP_ENABLE);
 
 		/*
-		 * The userspace will veriry that the pc is as expected during
+		 * The userspace will verify that the pc is as expected during
 		 * single step execution between iter_ss_begin and iter_ss_end.
 		 */
 		asm volatile("iter_ss_begin:nop\n");
@@ -268,11 +267,9 @@ static void guest_code_ss(int test_cnt)
 		bvr = read_sysreg(dbgbvr0_el1);
 		wvr = read_sysreg(dbgwvr0_el1);
 
+		/* Userspace disables Single Step when the end is nigh. */
 		asm volatile("iter_ss_end:\n");
 
-		/* Disable Single Step execution */
-		GUEST_SYNC(SINGLE_STEP_DISABLE);
-
 		GUEST_ASSERT(bvr == w_bvr);
 		GUEST_ASSERT(wvr == w_wvr);
 	}
@@ -364,15 +361,12 @@ void test_single_step_from_userspace(int test_cnt)
 			TEST_ASSERT(cmd == UCALL_SYNC,
 				    "Unexpected ucall cmd 0x%lx", cmd);
 
-			if (uc.args[1] == SINGLE_STEP_ENABLE) {
-				debug.control = KVM_GUESTDBG_ENABLE |
-						KVM_GUESTDBG_SINGLESTEP;
-				ss_enable = true;
-			} else {
-				debug.control = KVM_GUESTDBG_ENABLE;
-				ss_enable = false;
-			}
+			TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE,
+				    "Unexpected ucall action 0x%lx", uc.args[1]);
 
+			debug.control = KVM_GUESTDBG_ENABLE |
+					KVM_GUESTDBG_SINGLESTEP;
+			ss_enable = true;
 			vcpu_guest_debug_set(vcpu, &debug);
 			continue;
 		}
@@ -385,6 +379,14 @@ void test_single_step_from_userspace(int test_cnt)
 			    "Unexpected pc 0x%lx (expected 0x%lx)",
 			    pc, test_pc);
 
+		if ((pc + 4) == (uint64_t)&iter_ss_end) {
+			test_pc = 0;
+			debug.control = KVM_GUESTDBG_ENABLE;
+			ss_enable = false;
+			vcpu_guest_debug_set(vcpu, &debug);
+			continue;
+		}
+
 		/*
 		 * If the current pc is between iter_ss_bgin and
 		 * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
-- 
GitLab


From 7046638192d52416adbfc273c36950f0e3311191 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:34:03 +0000
Subject: [PATCH 1642/4122] KVM: selftests: Consolidate common code for
 populating ucall struct

Make ucall() a common helper that populates struct ucall, and only calls
into arch code to make the actually call out to userspace.

Rename all arch-specific helpers to make it clear they're arch-specific,
and to avoid collisions with common helpers (one more on its way...)

Add WRITE_ONCE() to stores in ucall() code (as already done to aarch64
code in commit 9e2f6498efbb ("selftests: KVM: Handle compiler
optimizations in ucall")) to prevent clang optimizations breaking ucalls.

Cc: Colton Lewis <coltonlewis@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Tested-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-2-seanjc@google.com
---
 tools/testing/selftests/kvm/Makefile          |  1 +
 .../selftests/kvm/include/ucall_common.h      | 23 ++++++++++++++++---
 .../testing/selftests/kvm/lib/aarch64/ucall.c | 22 ++++--------------
 tools/testing/selftests/kvm/lib/riscv/ucall.c | 23 ++++---------------
 tools/testing/selftests/kvm/lib/s390x/ucall.c | 23 ++++---------------
 .../testing/selftests/kvm/lib/ucall_common.c  | 20 ++++++++++++++++
 .../testing/selftests/kvm/lib/x86_64/ucall.c  | 23 ++++---------------
 7 files changed, 61 insertions(+), 74 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/ucall_common.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a00253b79040b..6e2a683629c77 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -47,6 +47,7 @@ LIBKVM += lib/memstress.c
 LIBKVM += lib/rbtree.c
 LIBKVM += lib/sparsebit.c
 LIBKVM += lib/test_util.c
+LIBKVM += lib/ucall_common.c
 
 LIBKVM_STRING += lib/string_override.c
 
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index ee79d180e07e4..5a85f5318bbe5 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -24,10 +24,27 @@ struct ucall {
 	uint64_t args[UCALL_MAX_ARGS];
 };
 
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
+void ucall_arch_init(struct kvm_vm *vm, void *arg);
+void ucall_arch_uninit(struct kvm_vm *vm);
+void ucall_arch_do_ucall(vm_vaddr_t uc);
+uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+
 void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+
+static inline void ucall_init(struct kvm_vm *vm, void *arg)
+{
+	ucall_arch_init(vm, arg);
+}
+
+static inline void ucall_uninit(struct kvm_vm *vm)
+{
+	ucall_arch_uninit(vm);
+}
+
+static inline uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+	return ucall_arch_get_ucall(vcpu, uc);
+}
 
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index ed237b7446907..3630708c32d62 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -21,7 +21,7 @@ static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
 	return true;
 }
 
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, void *arg)
 {
 	vm_paddr_t gpa, start, end, step, offset;
 	unsigned int bits;
@@ -64,30 +64,18 @@ void ucall_init(struct kvm_vm *vm, void *arg)
 	TEST_FAIL("Can't find a ucall mmio address");
 }
 
-void ucall_uninit(struct kvm_vm *vm)
+void ucall_arch_uninit(struct kvm_vm *vm)
 {
 	ucall_exit_mmio_addr = 0;
 	sync_global_to_guest(vm, ucall_exit_mmio_addr);
 }
 
-void ucall(uint64_t cmd, int nargs, ...)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-	struct ucall uc = {};
-	va_list va;
-	int i;
-
-	WRITE_ONCE(uc.cmd, cmd);
-	nargs = min(nargs, UCALL_MAX_ARGS);
-
-	va_start(va, nargs);
-	for (i = 0; i < nargs; ++i)
-		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
-	va_end(va);
-
-	WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
+	WRITE_ONCE(*ucall_exit_mmio_addr, uc);
 }
 
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 {
 	struct kvm_run *run = vcpu->run;
 	struct ucall ucall = {};
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 087b9740bc8fc..b1598f418c1f8 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -10,11 +10,11 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, void *arg)
 {
 }
 
-void ucall_uninit(struct kvm_vm *vm)
+void ucall_arch_uninit(struct kvm_vm *vm)
 {
 }
 
@@ -44,27 +44,14 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 	return ret;
 }
 
-void ucall(uint64_t cmd, int nargs, ...)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-	struct ucall uc = {
-		.cmd = cmd,
-	};
-	va_list va;
-	int i;
-
-	nargs = min(nargs, UCALL_MAX_ARGS);
-
-	va_start(va, nargs);
-	for (i = 0; i < nargs; ++i)
-		uc.args[i] = va_arg(va, uint64_t);
-	va_end(va);
-
 	sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
 		  KVM_RISCV_SELFTESTS_SBI_UCALL,
-		  (vm_vaddr_t)&uc, 0, 0, 0, 0, 0);
+		  uc, 0, 0, 0, 0, 0);
 }
 
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 {
 	struct kvm_run *run = vcpu->run;
 	struct ucall ucall = {};
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 73dc4e21190ff..114cb4af295f5 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -6,34 +6,21 @@
  */
 #include "kvm_util.h"
 
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, void *arg)
 {
 }
 
-void ucall_uninit(struct kvm_vm *vm)
+void ucall_arch_uninit(struct kvm_vm *vm)
 {
 }
 
-void ucall(uint64_t cmd, int nargs, ...)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-	struct ucall uc = {
-		.cmd = cmd,
-	};
-	va_list va;
-	int i;
-
-	nargs = min(nargs, UCALL_MAX_ARGS);
-
-	va_start(va, nargs);
-	for (i = 0; i < nargs; ++i)
-		uc.args[i] = va_arg(va, uint64_t);
-	va_end(va);
-
 	/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
-	asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
+	asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
 }
 
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 {
 	struct kvm_run *run = vcpu->run;
 	struct ucall ucall = {};
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
new file mode 100644
index 0000000000000..2395c7f1d5435
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kvm_util.h"
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+	struct ucall uc = {};
+	va_list va;
+	int i;
+
+	WRITE_ONCE(uc.cmd, cmd);
+
+	nargs = min(nargs, UCALL_MAX_ARGS);
+
+	va_start(va, nargs);
+	for (i = 0; i < nargs; ++i)
+		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+	va_end(va);
+
+	ucall_arch_do_ucall((vm_vaddr_t)&uc);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index e5f0f9e0d3ee0..9f532dba10035 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -8,34 +8,21 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, void *arg)
 {
 }
 
-void ucall_uninit(struct kvm_vm *vm)
+void ucall_arch_uninit(struct kvm_vm *vm)
 {
 }
 
-void ucall(uint64_t cmd, int nargs, ...)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
-	struct ucall uc = {
-		.cmd = cmd,
-	};
-	va_list va;
-	int i;
-
-	nargs = min(nargs, UCALL_MAX_ARGS);
-
-	va_start(va, nargs);
-	for (i = 0; i < nargs; ++i)
-		uc.args[i] = va_arg(va, uint64_t);
-	va_end(va);
-
 	asm volatile("in %[port], %%al"
-		: : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
+		: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
 }
 
-uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 {
 	struct kvm_run *run = vcpu->run;
 	struct ucall ucall = {};
-- 
GitLab


From ef38871eb22879438d2af8642ed7a52c1616f410 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:34:04 +0000
Subject: [PATCH 1643/4122] KVM: selftests: Consolidate boilerplate code in
 get_ucall()

Consolidate the actual copying of a ucall struct from guest=>host into
the common get_ucall().  Return a host virtual address instead of a guest
virtual address even though the addr_gva2hva() part could be moved to
get_ucall() too.  Conceptually, get_ucall() is invoked from the host and
should return a host virtual address (and returning NULL for "nothing to
see here" is far superior to returning 0).

Use pointer shenanigans instead of an unnecessary bounce buffer when the
caller of get_ucall() provides a valid pointer.

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Tested-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-3-seanjc@google.com
---
 .../selftests/kvm/include/ucall_common.h      |  8 ++------
 .../testing/selftests/kvm/lib/aarch64/ucall.c | 14 +++-----------
 tools/testing/selftests/kvm/lib/riscv/ucall.c | 19 +++----------------
 tools/testing/selftests/kvm/lib/s390x/ucall.c | 16 +++-------------
 .../testing/selftests/kvm/lib/ucall_common.c  | 19 +++++++++++++++++++
 .../testing/selftests/kvm/lib/x86_64/ucall.c  | 16 +++-------------
 6 files changed, 33 insertions(+), 59 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 5a85f5318bbe5..63bfc60be995e 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -27,9 +27,10 @@ struct ucall {
 void ucall_arch_init(struct kvm_vm *vm, void *arg);
 void ucall_arch_uninit(struct kvm_vm *vm);
 void ucall_arch_do_ucall(vm_vaddr_t uc);
-uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
 void ucall(uint64_t cmd, int nargs, ...);
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 
 static inline void ucall_init(struct kvm_vm *vm, void *arg)
 {
@@ -41,11 +42,6 @@ static inline void ucall_uninit(struct kvm_vm *vm)
 	ucall_arch_uninit(vm);
 }
 
-static inline uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
-{
-	return ucall_arch_get_ucall(vcpu, uc);
-}
-
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 3630708c32d62..f214f5cc53d3b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -75,13 +75,9 @@ void ucall_arch_do_ucall(vm_vaddr_t uc)
 	WRITE_ONCE(*ucall_exit_mmio_addr, uc);
 }
 
-uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	struct ucall ucall = {};
-
-	if (uc)
-		memset(uc, 0, sizeof(*uc));
 
 	if (run->exit_reason == KVM_EXIT_MMIO &&
 	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
@@ -90,12 +86,8 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
 			    "Unexpected ucall exit mmio address access");
 		memcpy(&gva, run->mmio.data, sizeof(gva));
-		memcpy(&ucall, addr_gva2hva(vcpu->vm, gva), sizeof(ucall));
-
-		vcpu_run_complete_io(vcpu);
-		if (uc)
-			memcpy(uc, &ucall, sizeof(ucall));
+		return addr_gva2hva(vcpu->vm, gva);
 	}
 
-	return ucall.cmd;
+	return NULL;
 }
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index b1598f418c1f8..37e091d4366e5 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -51,27 +51,15 @@ void ucall_arch_do_ucall(vm_vaddr_t uc)
 		  uc, 0, 0, 0, 0, 0);
 }
 
-uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	struct ucall ucall = {};
-
-	if (uc)
-		memset(uc, 0, sizeof(*uc));
 
 	if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
 	    run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
 		switch (run->riscv_sbi.function_id) {
 		case KVM_RISCV_SELFTESTS_SBI_UCALL:
-			memcpy(&ucall,
-			       addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]),
-			       sizeof(ucall));
-
-			vcpu_run_complete_io(vcpu);
-			if (uc)
-				memcpy(uc, &ucall, sizeof(ucall));
-
-			break;
+			return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
 		case KVM_RISCV_SELFTESTS_SBI_UNEXP:
 			vcpu_dump(stderr, vcpu, 2);
 			TEST_ASSERT(0, "Unexpected trap taken by guest");
@@ -80,6 +68,5 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 			break;
 		}
 	}
-
-	return ucall.cmd;
+	return NULL;
 }
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 114cb4af295f5..0f695a031d35d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -20,13 +20,9 @@ void ucall_arch_do_ucall(vm_vaddr_t uc)
 	asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
 }
 
-uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	struct ucall ucall = {};
-
-	if (uc)
-		memset(uc, 0, sizeof(*uc));
 
 	if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
 	    run->s390_sieic.icptcode == 4 &&
@@ -34,13 +30,7 @@ uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
 	    (run->s390_sieic.ipb >> 16) == 0x501) {
 		int reg = run->s390_sieic.ipa & 0xf;
 
-		memcpy(&ucall, addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]),
-		       sizeof(ucall));
-
-		vcpu_run_complete_io(vcpu);
-		if (uc)
-			memcpy(uc, &ucall, sizeof(ucall));
+		return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
 	}
-
-	return ucall.cmd;
+	return NULL;
 }
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index 2395c7f1d5435..ced4808607463 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -18,3 +18,22 @@ void ucall(uint64_t cmd, int nargs, ...)
 
 	ucall_arch_do_ucall((vm_vaddr_t)&uc);
 }
+
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+	struct ucall ucall;
+	void *addr;
+
+	if (!uc)
+		uc = &ucall;
+
+	addr = ucall_arch_get_ucall(vcpu);
+	if (addr) {
+		memcpy(uc, addr, sizeof(*uc));
+		vcpu_run_complete_io(vcpu);
+	} else {
+		memset(uc, 0, sizeof(*uc));
+	}
+
+	return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index 9f532dba10035..ead9946399abf 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -22,25 +22,15 @@ void ucall_arch_do_ucall(vm_vaddr_t uc)
 		: : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory");
 }
 
-uint64_t ucall_arch_get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
-	struct ucall ucall = {};
-
-	if (uc)
-		memset(uc, 0, sizeof(*uc));
 
 	if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
 		struct kvm_regs regs;
 
 		vcpu_regs_get(vcpu, &regs);
-		memcpy(&ucall, addr_gva2hva(vcpu->vm, (vm_vaddr_t)regs.rdi),
-		       sizeof(ucall));
-
-		vcpu_run_complete_io(vcpu);
-		if (uc)
-			memcpy(uc, &ucall, sizeof(ucall));
+		return addr_gva2hva(vcpu->vm, regs.rdi);
 	}
-
-	return ucall.cmd;
+	return NULL;
 }
-- 
GitLab


From dc88244bf5488b04fb7bbe47d8d9c38ff8f7dbb4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:34:05 +0000
Subject: [PATCH 1644/4122] KVM: selftests: Automatically do init_ucall() for
 non-barebones VMs

Do init_ucall() automatically during VM creation to kill two (three?)
birds with one stone.

First, initializing ucall immediately after VM creations allows forcing
aarch64's MMIO ucall address to immediately follow memslot0.  This is
still somewhat fragile as tests could clobber the MMIO address with a
new memslot, but it's safe-ish since tests have to be conversative when
accounting for memslot0.  And this can be hardened in the future by
creating a read-only memslot for the MMIO page (KVM ARM exits with MMIO
if the guest writes to a read-only memslot).  Add a TODO to document that
selftests can and should use a memslot for the ucall MMIO (doing so
requires yet more rework because tests assumes thay can use all memslots
except memslot0).

Second, initializing ucall for all VMs prepares for making ucall
initialization meaningful on all architectures.  aarch64 is currently the
only arch that needs to do any setup, but that will change in the future
by switching to a pool-based implementation (instead of the current
stack-based approach).

Lastly, defining the ucall MMIO address from common code will simplify
switching all architectures (except s390) to a common MMIO-based ucall
implementation (if there's ever sufficient motivation to do so).

Cc: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Tested-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-4-seanjc@google.com
---
 .../selftests/kvm/aarch64/aarch32_id_regs.c   |  2 -
 .../selftests/kvm/aarch64/arch_timer.c        |  1 -
 .../selftests/kvm/aarch64/debug-exceptions.c  |  2 -
 .../selftests/kvm/aarch64/hypercalls.c        |  1 -
 .../testing/selftests/kvm/aarch64/psci_test.c |  1 -
 .../testing/selftests/kvm/aarch64/vgic_init.c |  2 -
 .../testing/selftests/kvm/aarch64/vgic_irq.c  |  1 -
 tools/testing/selftests/kvm/dirty_log_test.c  |  2 -
 .../selftests/kvm/include/ucall_common.h      |  6 +--
 .../selftests/kvm/kvm_page_table_test.c       |  1 -
 .../testing/selftests/kvm/lib/aarch64/ucall.c | 54 ++-----------------
 tools/testing/selftests/kvm/lib/kvm_util.c    | 11 ++++
 tools/testing/selftests/kvm/lib/memstress.c   |  2 -
 tools/testing/selftests/kvm/lib/riscv/ucall.c |  2 +-
 tools/testing/selftests/kvm/lib/s390x/ucall.c |  2 +-
 .../testing/selftests/kvm/lib/x86_64/ucall.c  |  2 +-
 .../testing/selftests/kvm/memslot_perf_test.c |  1 -
 tools/testing/selftests/kvm/rseq_test.c       |  1 -
 tools/testing/selftests/kvm/steal_time.c      |  1 -
 .../kvm/system_counter_offset_test.c          |  1 -
 20 files changed, 20 insertions(+), 76 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
index 6f9c1f19c7f64..03f6b3af6b4d7 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -158,8 +158,6 @@ int main(void)
 
 	TEST_REQUIRE(vcpu_aarch64_only(vcpu));
 
-	ucall_init(vm, NULL);
-
 	test_user_raz_wi(vcpu);
 	test_user_raz_invariant(vcpu);
 	test_guest_raz(vcpu);
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 9409617fce9c4..54016cdd8a098 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -375,7 +375,6 @@ static struct kvm_vm *test_vm_create(void)
 	for (i = 0; i < nr_vcpus; i++)
 		vcpu_init_descriptor_tables(vcpus[i]);
 
-	ucall_init(vm, NULL);
 	test_init_timer_irq(vm);
 	gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
 	__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index a3c2216b65fb3..d86c4e4d1c826 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -292,7 +292,6 @@ static void test_guest_debug_exceptions(void)
 	int stage;
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	ucall_init(vm, NULL);
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vcpu);
@@ -343,7 +342,6 @@ void test_single_step_from_userspace(int test_cnt)
 	struct kvm_guest_debug debug = {};
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
-	ucall_init(vm, NULL);
 	run = vcpu->run;
 	vcpu_args_set(vcpu, 1, test_cnt);
 
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
index a39da3fe49525..3dceecfd1f628 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -236,7 +236,6 @@ static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
 
 	vm = vm_create_with_one_vcpu(vcpu, guest_code);
 
-	ucall_init(vm, NULL);
 	steal_time_init(*vcpu);
 
 	return vm;
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index e0b9e81a3e091..cfa36f3879484 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -79,7 +79,6 @@ static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
 	struct kvm_vm *vm;
 
 	vm = vm_create(2);
-	ucall_init(vm, NULL);
 
 	vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
 	init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index 9c131d977a1b5..eef816b80993f 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -68,8 +68,6 @@ static void guest_code(void)
 /* we don't want to assert on run execution, hence that helper */
 static int run_vcpu(struct kvm_vcpu *vcpu)
 {
-	ucall_init(vcpu->vm, NULL);
-
 	return __vcpu_run(vcpu) ? -errno : 0;
 }
 
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 4ead42a072b7d..e0310ebc313ca 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -756,7 +756,6 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
 	print_args(&args);
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	ucall_init(vm, NULL);
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vcpu);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index b5234d6efbe15..b458a2701634b 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -756,8 +756,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	/* Cache the HVA pointer of the region */
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
 
-	ucall_init(vm, NULL);
-
 	/* Export the shared variables to the guest */
 	sync_global_to_guest(vm, host_page_size);
 	sync_global_to_guest(vm, guest_page_size);
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 63bfc60be995e..8077a6d8b1ba5 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -24,7 +24,7 @@ struct ucall {
 	uint64_t args[UCALL_MAX_ARGS];
 };
 
-void ucall_arch_init(struct kvm_vm *vm, void *arg);
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 void ucall_arch_uninit(struct kvm_vm *vm);
 void ucall_arch_do_ucall(vm_vaddr_t uc);
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
@@ -32,9 +32,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 
-static inline void ucall_init(struct kvm_vm *vm, void *arg)
+static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
-	ucall_arch_init(vm, arg);
+	ucall_arch_init(vm, mmio_gpa);
 }
 
 static inline void ucall_uninit(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 696b366be06b4..3db32d56787e8 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -289,7 +289,6 @@ static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
 	host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
 
 	/* Export shared structure test_args to guest */
-	ucall_init(vm, NULL);
 	sync_global_to_guest(vm, test_args);
 
 	ret = sem_init(&test_stage_updated, 0, 0);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index f214f5cc53d3b..f02ae27c3e43a 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -8,60 +8,12 @@
 
 static vm_vaddr_t *ucall_exit_mmio_addr;
 
-static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
-	if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
-		return false;
+	virt_pg_map(vm, mmio_gpa, mmio_gpa);
 
-	virt_pg_map(vm, gpa, gpa);
-
-	ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
+	ucall_exit_mmio_addr = (vm_vaddr_t *)mmio_gpa;
 	sync_global_to_guest(vm, ucall_exit_mmio_addr);
-
-	return true;
-}
-
-void ucall_arch_init(struct kvm_vm *vm, void *arg)
-{
-	vm_paddr_t gpa, start, end, step, offset;
-	unsigned int bits;
-	bool ret;
-
-	if (arg) {
-		gpa = (vm_paddr_t)arg;
-		ret = ucall_mmio_init(vm, gpa);
-		TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
-		return;
-	}
-
-	/*
-	 * Find an address within the allowed physical and virtual address
-	 * spaces, that does _not_ have a KVM memory region associated with
-	 * it. Identity mapping an address like this allows the guest to
-	 * access it, but as KVM doesn't know what to do with it, it
-	 * will assume it's something userspace handles and exit with
-	 * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
-	 * Here we start with a guess that the addresses around 5/8th
-	 * of the allowed space are unmapped and then work both down and
-	 * up from there in 1/16th allowed space sized steps.
-	 *
-	 * Note, we need to use VA-bits - 1 when calculating the allowed
-	 * virtual address space for an identity mapping because the upper
-	 * half of the virtual address space is the two's complement of the
-	 * lower and won't match physical addresses.
-	 */
-	bits = vm->va_bits - 1;
-	bits = min(vm->pa_bits, bits);
-	end = 1ul << bits;
-	start = end * 5 / 8;
-	step = end / 16;
-	for (offset = 0; offset < end - start; offset += step) {
-		if (ucall_mmio_init(vm, start - offset))
-			return;
-		if (ucall_mmio_init(vm, start + offset))
-			return;
-	}
-	TEST_FAIL("Can't find a ucall mmio address");
 }
 
 void ucall_arch_uninit(struct kvm_vm *vm)
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 3b7710fb37840..07c8edd4e5486 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -335,15 +335,26 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 {
 	uint64_t nr_pages = vm_nr_pages_required(mode, nr_runnable_vcpus,
 						 nr_extra_pages);
+	struct userspace_mem_region *slot0;
 	struct kvm_vm *vm;
 
 	vm = ____vm_create(mode, nr_pages);
 
 	kvm_vm_elf_load(vm, program_invocation_name);
 
+	/*
+	 * TODO: Add proper defines to protect the library's memslots, and then
+	 * carve out memslot1 for the ucall MMIO address.  KVM treats writes to
+	 * read-only memslots as MMIO, and creating a read-only memslot for the
+	 * MMIO region would prevent silently clobbering the MMIO region.
+	 */
+	slot0 = memslot2region(vm, 0);
+	ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
 #ifdef __x86_64__
 	vm_create_irqchip(vm);
 #endif
+
 	return vm;
 }
 
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index 503da78c558dd..b66404e56a3f8 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -221,8 +221,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 		memstress_setup_nested(vm, nr_vcpus, vcpus);
 	}
 
-	ucall_init(vm, NULL);
-
 	/* Export the shared variables to the guest. */
 	sync_global_to_guest(vm, memstress_args);
 
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 37e091d4366e5..c58ecb8a0981b 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -10,7 +10,7 @@
 #include "kvm_util.h"
 #include "processor.h"
 
-void ucall_arch_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 0f695a031d35d..208f0f04299bb 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -6,7 +6,7 @@
  */
 #include "kvm_util.h"
 
-void ucall_arch_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index ead9946399abf..016a0487cf723 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -8,7 +8,7 @@
 
 #define UCALL_PIO_PORT ((uint16_t)0x1000)
 
-void ucall_arch_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index 330aaef1c02fb..d771262ea584b 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -277,7 +277,6 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
 	data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
-	ucall_init(data->vm, NULL);
 
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 		max_mem_slots - 1, data->pages_per_slot, rempages);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 6f88da7e60be6..0e9e2b48a51f7 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -224,7 +224,6 @@ int main(int argc, char *argv[])
 	 * CPU affinity.
 	 */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-	ucall_init(vm, NULL);
 
 	pthread_create(&migration_thread, NULL, migration_worker,
 		       (void *)(unsigned long)syscall(SYS_gettid));
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index db8967f1a17bc..c87f387120736 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -266,7 +266,6 @@ int main(int ac, char **av)
 	gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
 	virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
-	ucall_init(vm, NULL);
 
 	TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
 
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
index 1c274933912bb..7f5b330b6a1b1 100644
--- a/tools/testing/selftests/kvm/system_counter_offset_test.c
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -121,7 +121,6 @@ int main(void)
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_main);
 	check_preconditions(vcpu);
-	ucall_init(vm, NULL);
 
 	enter_guest(vcpu);
 	kvm_vm_free(vm);
-- 
GitLab


From cf4694be2b2cf74945e50d39a02ea2307c4495f4 Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Thu, 6 Oct 2022 00:34:06 +0000
Subject: [PATCH 1645/4122] tools: Add atomic_test_and_set_bit()

Add x86 and generic implementations of atomic_test_and_set_bit() to allow
KVM selftests to atomically manage bitmaps.

Note, the generic version is taken from arch_test_and_set_bit() as of
commit 415d83249709 ("locking/atomic: Make test_and_*_bit() ordered on
failure").

Signed-off-by: Peter Gonda <pgonda@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-5-seanjc@google.com
---
 tools/arch/x86/include/asm/atomic.h    |  7 +++++++
 tools/include/asm-generic/atomic-gcc.h | 12 ++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
index 1f5e26aae9fc5..01cc27ec4520d 100644
--- a/tools/arch/x86/include/asm/atomic.h
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -8,6 +8,7 @@
 
 #define LOCK_PREFIX "\n\tlock; "
 
+#include <asm/asm.h>
 #include <asm/cmpxchg.h>
 
 /*
@@ -70,4 +71,10 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return cmpxchg(&v->counter, old, new);
 }
 
+static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
+{
+	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c");
+
+}
+
 #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
index 4c1966f7c77a7..6daa68bf5b9e0 100644
--- a/tools/include/asm-generic/atomic-gcc.h
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -4,6 +4,7 @@
 
 #include <linux/compiler.h>
 #include <linux/types.h>
+#include <linux/bitops.h>
 
 /*
  * Atomic operations that C can't guarantee us.  Useful for
@@ -69,4 +70,15 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
 	return cmpxchg(&(v)->counter, oldval, newval);
 }
 
+static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	long old;
+
+	addr += BIT_WORD(nr);
+
+	old = __sync_fetch_and_or(addr, mask);
+	return !!(old & mask);
+}
+
 #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
-- 
GitLab


From 03b4750533fc6519845ac2ca0e1d88a81ac260a1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:34:07 +0000
Subject: [PATCH 1646/4122] KVM: selftests: Make arm64's MMIO ucall multi-VM
 friendly

Fix a mostly-theoretical bug where ARM's ucall MMIO setup could result in
different VMs stomping on each other by cloberring the global pointer.

Fix the most obvious issue by saving the MMIO gpa into the VM.

A more subtle bug is that creating VMs in parallel (on multiple tasks)
could result in a VM using the wrong address.  Synchronizing a global to
a guest effectively snapshots the value on a per-VM basis, i.e. the
"global" is already prepped to work with multiple VMs, but setting the
global in the host is not thread-safe.  To fix that bug, add
write_guest_global() to allow stuffing a VM's copy of a "global" without
modifying the host value.

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Tested-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-6-seanjc@google.com
---
 .../selftests/kvm/include/kvm_util_base.h     | 15 +++++++++++++++
 .../testing/selftests/kvm/lib/aarch64/ucall.c | 19 ++++++++++++++-----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 3bf2333ef95d2..a7047e0767d33 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -16,6 +16,7 @@
 #include <linux/kvm.h>
 #include "linux/rbtree.h"
 
+#include <asm/atomic.h>
 
 #include <sys/ioctl.h>
 
@@ -81,6 +82,7 @@ struct kvm_vm {
 	struct sparsebit *vpages_mapped;
 	bool has_irqchip;
 	bool pgd_created;
+	vm_paddr_t ucall_mmio_addr;
 	vm_paddr_t pgd;
 	vm_vaddr_t gdt;
 	vm_vaddr_t tss;
@@ -722,6 +724,19 @@ kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
 	memcpy(&(g), _p, sizeof(g));				\
 })
 
+/*
+ * Write a global value, but only in the VM's (guest's) domain.  Primarily used
+ * for "globals" that hold per-VM values (VMs always duplicate code and global
+ * data into their own region of physical memory), but can be used anytime it's
+ * undesirable to change the host's copy of the global.
+ */
+#define write_guest_global(vm, g, val) ({			\
+	typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g));	\
+	typeof(g) _val = val;					\
+								\
+	memcpy(_p, &(_val), sizeof(g));				\
+})
+
 void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
 
 void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index f02ae27c3e43a..1c38bd260f90b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -6,20 +6,29 @@
  */
 #include "kvm_util.h"
 
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
 static vm_vaddr_t *ucall_exit_mmio_addr;
 
+static void ucall_set_mmio_addr(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+	vm->ucall_mmio_addr = mmio_gpa;
+
+	write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa);
+}
+
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 	virt_pg_map(vm, mmio_gpa, mmio_gpa);
 
-	ucall_exit_mmio_addr = (vm_vaddr_t *)mmio_gpa;
-	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+	ucall_set_mmio_addr(vm, mmio_gpa);
 }
 
 void ucall_arch_uninit(struct kvm_vm *vm)
 {
-	ucall_exit_mmio_addr = 0;
-	sync_global_to_guest(vm, ucall_exit_mmio_addr);
+	ucall_set_mmio_addr(vm, (vm_paddr_t)NULL);
 }
 
 void ucall_arch_do_ucall(vm_vaddr_t uc)
@@ -32,7 +41,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 	struct kvm_run *run = vcpu->run;
 
 	if (run->exit_reason == KVM_EXIT_MMIO &&
-	    run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
+	    run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
 		vm_vaddr_t gva;
 
 		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
-- 
GitLab


From 28a65567acb51759079adf5c6e3fcd047cda8120 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:34:08 +0000
Subject: [PATCH 1647/4122] KVM: selftests: Drop now-unnecessary ucall_uninit()

Drop ucall_uninit() and ucall_arch_uninit() now that ARM doesn't modify
the host's copy of ucall_exit_mmio_addr, i.e. now that there's no need to
reset the pointer before potentially creating a new VM.  The few calls to
ucall_uninit() are all immediately followed by kvm_vm_free(), and that is
likely always going to hold true, i.e. it's extremely unlikely a test
will want to effectively disable ucall in the middle of a test.

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Tested-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-7-seanjc@google.com
---
 .../selftests/kvm/aarch64/aarch32_id_regs.c        |  1 -
 tools/testing/selftests/kvm/dirty_log_test.c       |  1 -
 tools/testing/selftests/kvm/include/ucall_common.h |  6 ------
 tools/testing/selftests/kvm/kvm_page_table_test.c  |  1 -
 tools/testing/selftests/kvm/lib/aarch64/ucall.c    | 14 ++------------
 tools/testing/selftests/kvm/lib/memstress.c        |  1 -
 tools/testing/selftests/kvm/lib/riscv/ucall.c      |  4 ----
 tools/testing/selftests/kvm/lib/s390x/ucall.c      |  4 ----
 tools/testing/selftests/kvm/lib/x86_64/ucall.c     |  4 ----
 9 files changed, 2 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
index 03f6b3af6b4d7..b1d2158c0b6de 100644
--- a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -162,6 +162,5 @@ int main(void)
 	test_user_raz_invariant(vcpu);
 	test_guest_raz(vcpu);
 
-	ucall_uninit(vm);
 	kvm_vm_free(vm);
 }
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index b458a2701634b..a38c4369fb8ed 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -811,7 +811,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 	free(bmap);
 	free(host_bmap_track);
-	ucall_uninit(vm);
 	kvm_vm_free(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 8077a6d8b1ba5..2662a4352a8c0 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -25,7 +25,6 @@ struct ucall {
 };
 
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
-void ucall_arch_uninit(struct kvm_vm *vm);
 void ucall_arch_do_ucall(vm_vaddr_t uc);
 void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
@@ -37,11 +36,6 @@ static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 	ucall_arch_init(vm, mmio_gpa);
 }
 
-static inline void ucall_uninit(struct kvm_vm *vm)
-{
-	ucall_arch_uninit(vm);
-}
-
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 3db32d56787e8..b3b00be1ef824 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -416,7 +416,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	TEST_ASSERT(ret == 0, "Error in sem_destroy");
 
 	free(vcpu_threads);
-	ucall_uninit(vm);
 	kvm_vm_free(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 1c38bd260f90b..21d73afcb14fb 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -12,23 +12,13 @@
  */
 static vm_vaddr_t *ucall_exit_mmio_addr;
 
-static void ucall_set_mmio_addr(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-	vm->ucall_mmio_addr = mmio_gpa;
-
-	write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa);
-}
-
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 	virt_pg_map(vm, mmio_gpa, mmio_gpa);
 
-	ucall_set_mmio_addr(vm, mmio_gpa);
-}
+	vm->ucall_mmio_addr = mmio_gpa;
 
-void ucall_arch_uninit(struct kvm_vm *vm)
-{
-	ucall_set_mmio_addr(vm, (vm_paddr_t)NULL);
+	write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gpa);
 }
 
 void ucall_arch_do_ucall(vm_vaddr_t uc)
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index b66404e56a3f8..2de8a5d527b3a 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -229,7 +229,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
 
 void memstress_destroy_vm(struct kvm_vm *vm)
 {
-	ucall_uninit(vm);
 	kvm_vm_free(vm);
 }
 
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index c58ecb8a0981b..78acdb084ab0f 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -14,10 +14,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
-void ucall_arch_uninit(struct kvm_vm *vm)
-{
-}
-
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
 			unsigned long arg1, unsigned long arg2,
 			unsigned long arg3, unsigned long arg4,
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index 208f0f04299bb..cbee520a26f2a 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -10,10 +10,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
-void ucall_arch_uninit(struct kvm_vm *vm)
-{
-}
-
 void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
 	/* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index 016a0487cf723..eb8bf55b359a5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -12,10 +12,6 @@ void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 {
 }
 
-void ucall_arch_uninit(struct kvm_vm *vm)
-{
-}
-
 void ucall_arch_do_ucall(vm_vaddr_t uc)
 {
 	asm volatile("in %[port], %%al"
-- 
GitLab


From 426729b2cf2e02ff4bd5c988832c044c8b77f4c7 Mon Sep 17 00:00:00 2001
From: Peter Gonda <pgonda@google.com>
Date: Thu, 6 Oct 2022 00:34:09 +0000
Subject: [PATCH 1648/4122] KVM: selftests: Add ucall pool based implementation

To play nice with guests whose stack memory is encrypted, e.g. AMD SEV,
introduce a new "ucall pool" implementation that passes the ucall struct
via dedicated memory (which can be mapped shared, a.k.a. as plain text).

Because not all architectures have access to the vCPU index in the guest,
use a bitmap with atomic accesses to track which entries in the pool are
free/used.  A list+lock could also work in theory, but synchronizing the
individual pointers to the guest would be a mess.

Note, there's no need to rewalk the bitmap to ensure success.  If all
vCPUs are simply allocating, success is guaranteed because there are
enough entries for all vCPUs.  If one or more vCPUs are freeing and then
reallocating, success is guaranteed because vCPUs _always_ walk the
bitmap from 0=>N; if vCPU frees an entry and then wins a race to
re-allocate, then either it will consume the entry it just freed (bit is
the first free bit), or the losing vCPU is guaranteed to see the freed
bit (winner consumes an earlier bit, which the loser hasn't yet visited).

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Signed-off-by: Peter Gonda <pgonda@google.com>
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006003409.649993-8-seanjc@google.com
---
 .../selftests/kvm/include/ucall_common.h      |  9 ++-
 .../testing/selftests/kvm/lib/aarch64/ucall.c |  7 +-
 tools/testing/selftests/kvm/lib/riscv/ucall.c |  2 +-
 tools/testing/selftests/kvm/lib/s390x/ucall.c |  2 +-
 .../testing/selftests/kvm/lib/ucall_common.c  | 72 +++++++++++++++++--
 .../testing/selftests/kvm/lib/x86_64/ucall.c  |  2 +-
 6 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index 2662a4352a8c0..bdd373189a777 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -22,6 +22,9 @@ enum {
 struct ucall {
 	uint64_t cmd;
 	uint64_t args[UCALL_MAX_ARGS];
+
+	/* Host virtual address of this struct. */
+	struct ucall *hva;
 };
 
 void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
@@ -30,11 +33,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
 
 void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
-
-static inline void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
-{
-	ucall_arch_init(vm, mmio_gpa);
-}
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index 21d73afcb14fb..562c16dfbb002 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -32,12 +32,9 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 
 	if (run->exit_reason == KVM_EXIT_MMIO &&
 	    run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
-		vm_vaddr_t gva;
-
-		TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+		TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
 			    "Unexpected ucall exit mmio address access");
-		memcpy(&gva, run->mmio.data, sizeof(gva));
-		return addr_gva2hva(vcpu->vm, gva);
+		return (void *)(*((uint64_t *)run->mmio.data));
 	}
 
 	return NULL;
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 78acdb084ab0f..9a3476a2dfca4 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -55,7 +55,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 	    run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
 		switch (run->riscv_sbi.function_id) {
 		case KVM_RISCV_SELFTESTS_SBI_UCALL:
-			return addr_gva2hva(vcpu->vm, run->riscv_sbi.args[0]);
+			return (void *)run->riscv_sbi.args[0];
 		case KVM_RISCV_SELFTESTS_SBI_UNEXP:
 			vcpu_dump(stderr, vcpu, 2);
 			TEST_ASSERT(0, "Unexpected trap taken by guest");
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index cbee520a26f2a..a7f02dc372cf7 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -26,7 +26,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 	    (run->s390_sieic.ipb >> 16) == 0x501) {
 		int reg = run->s390_sieic.ipa & 0xf;
 
-		return addr_gva2hva(vcpu->vm, run->s.regs.gprs[reg]);
+		return (void *)run->s.regs.gprs[reg];
 	}
 	return NULL;
 }
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index ced4808607463..fcae96461e460 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -1,22 +1,86 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #include "kvm_util.h"
+#include "linux/types.h"
+#include "linux/bitmap.h"
+#include "linux/atomic.h"
+
+struct ucall_header {
+	DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
+	struct ucall ucalls[KVM_MAX_VCPUS];
+};
+
+/*
+ * ucall_pool holds per-VM values (global data is duplicated by each VM), it
+ * must not be accessed from host code.
+ */
+static struct ucall_header *ucall_pool;
+
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+	struct ucall_header *hdr;
+	struct ucall *uc;
+	vm_vaddr_t vaddr;
+	int i;
+
+	vaddr = vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR);
+	hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+	memset(hdr, 0, sizeof(*hdr));
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		uc = &hdr->ucalls[i];
+		uc->hva = uc;
+	}
+
+	write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr);
+
+	ucall_arch_init(vm, mmio_gpa);
+}
+
+static struct ucall *ucall_alloc(void)
+{
+	struct ucall *uc;
+	int i;
+
+	GUEST_ASSERT(ucall_pool);
+
+	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+		if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
+			uc = &ucall_pool->ucalls[i];
+			memset(uc->args, 0, sizeof(uc->args));
+			return uc;
+		}
+	}
+
+	GUEST_ASSERT(0);
+	return NULL;
+}
+
+static void ucall_free(struct ucall *uc)
+{
+	/* Beware, here be pointer arithmetic.  */
+	clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
+}
 
 void ucall(uint64_t cmd, int nargs, ...)
 {
-	struct ucall uc = {};
+	struct ucall *uc;
 	va_list va;
 	int i;
 
-	WRITE_ONCE(uc.cmd, cmd);
+	uc = ucall_alloc();
+
+	WRITE_ONCE(uc->cmd, cmd);
 
 	nargs = min(nargs, UCALL_MAX_ARGS);
 
 	va_start(va, nargs);
 	for (i = 0; i < nargs; ++i)
-		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
+		WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
 	va_end(va);
 
-	ucall_arch_do_ucall((vm_vaddr_t)&uc);
+	ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+	ucall_free(uc);
 }
 
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index eb8bf55b359a5..4d41dc63cc9ed 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -26,7 +26,7 @@ void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
 		struct kvm_regs regs;
 
 		vcpu_regs_get(vcpu, &regs);
-		return addr_gva2hva(vcpu->vm, regs.rdi);
+		return (void *)regs.rdi;
 	}
 	return NULL;
 }
-- 
GitLab


From 9a6418dacd241169df9e0eeefc7980b3f9b40794 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 28 Sep 2022 22:34:58 +0100
Subject: [PATCH 1649/4122] KVM: selftests: Fix spelling mistake "begining" ->
 "beginning"

There is a spelling mistake in an assert message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Link: https://lore.kernel.org/r/20220928213458.64089-1-colin.i.king@gmail.com
[sean: fix an ironic typo in the changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/lib/elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index 9f54c098d9d08..d71a9a5974de5 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -138,7 +138,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
 		offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
 		offset_rv = lseek(fd, offset, SEEK_SET);
 		TEST_ASSERT(offset_rv == offset,
-			"Failed to seek to begining of program header %u,\n"
+			"Failed to seek to beginning of program header %u,\n"
 			"  filename: %s\n"
 			"  rv: %jd errno: %i",
 			n1, filename, (intmax_t) offset_rv, errno);
-- 
GitLab


From 816c54b74742ac1a74a74de9355ab982d11e63e6 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:06 +0000
Subject: [PATCH 1650/4122] KVM: selftests: Drop helpers to read/write page
 table entries

Drop vm_{g,s}et_page_table_entry() and instead expose the "inner"
helper (was _vm_get_page_table_entry()) that returns a _pointer_ to the
PTE, i.e. let tests directly modify PTEs instead of bouncing through
helpers that just make life difficult.

Opportunsitically use BIT_ULL() in emulator_error_test, and use the
MAXPHYADDR define to set the "rogue" GPA bit instead of open coding the
same value.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-2-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  |  6 ++----
 .../selftests/kvm/lib/x86_64/processor.c      | 21 ++-----------------
 .../kvm/x86_64/emulator_error_test.c          |  6 ++++--
 3 files changed, 8 insertions(+), 25 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e8ca0d8a6a7e0..30d5df1ebaad0 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -827,10 +827,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
 
 bool kvm_is_tdp_enabled(void);
 
-uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-				 uint64_t vaddr);
-void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-			     uint64_t vaddr, uint64_t pte);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+				  uint64_t vaddr);
 
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		       uint64_t a3);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 39c4409ef56a6..90b35998b0f3a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -241,9 +241,8 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	}
 }
 
-static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
-					  struct kvm_vcpu *vcpu,
-					  uint64_t vaddr)
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+				  uint64_t vaddr)
 {
 	uint16_t index[4];
 	uint64_t *pml4e, *pdpe, *pde;
@@ -313,22 +312,6 @@ static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm,
 	return &pte[index[0]];
 }
 
-uint64_t vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-				 uint64_t vaddr)
-{
-	uint64_t *pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
-
-	return *(uint64_t *)pte;
-}
-
-void vm_set_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-			     uint64_t vaddr, uint64_t pte)
-{
-	uint64_t *new_pte = _vm_get_page_table_entry(vm, vcpu, vaddr);
-
-	*(uint64_t *)new_pte = pte;
-}
-
 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
 	uint64_t *pml4e, *pml4e_start;
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index 236e11755ba67..bde247f3c8a16 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -152,8 +152,9 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
-	uint64_t gpa, pte;
+	uint64_t *pte;
 	uint64_t *hva;
+	uint64_t gpa;
 	int rc;
 
 	/* Tell stdout not to buffer its content */
@@ -178,8 +179,9 @@ int main(int argc, char *argv[])
 	virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
 	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
 	memset(hva, 0, PAGE_SIZE);
+
 	pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA);
-	vm_set_page_table_entry(vm, vcpu, MEM_REGION_GVA, pte | (1ull << 36));
+	*pte |= BIT_ULL(MAXPHYADDR);
 
 	vcpu_run(vcpu);
 	process_exit_on_emulation_error(vcpu);
-- 
GitLab


From 751f280017b697d98936618a21ca3defdc03a9f4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:07 +0000
Subject: [PATCH 1651/4122] KVM: selftests: Drop reserved bit checks from PTE
 accessor

Drop the reserved bit checks from the helper to retrieve a PTE, there's
very little value in sanity checking the constructed page tables as any
will quickly be noticed in the form of an unexpected #PF.  The checks
also place unnecessary restrictions on the usage of the helpers, e.g. if
a test _wanted_ to set reserved bits for whatever reason.

Removing the NX check in particular allows for the removal of the @vcpu
param, which will in turn allow the helper to be reused nearly verbatim
for addr_gva2gpa().

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-3-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  |  3 +--
 .../selftests/kvm/lib/x86_64/processor.c      | 26 +------------------
 .../kvm/x86_64/emulator_error_test.c          |  2 +-
 3 files changed, 3 insertions(+), 28 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 30d5df1ebaad0..53d52a5ace48b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -827,8 +827,7 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
 
 bool kvm_is_tdp_enabled(void);
 
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-				  uint64_t vaddr);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
 
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
 		       uint64_t a3);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 90b35998b0f3a..9e196837a794b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -241,29 +241,11 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	}
 }
 
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
-				  uint64_t vaddr)
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
 {
 	uint16_t index[4];
 	uint64_t *pml4e, *pdpe, *pde;
 	uint64_t *pte;
-	struct kvm_sregs sregs;
-	uint64_t rsvd_mask = 0;
-
-	/* Set the high bits in the reserved mask. */
-	if (vm->pa_bits < 52)
-		rsvd_mask = GENMASK_ULL(51, vm->pa_bits);
-
-	/*
-	 * SDM vol 3, fig 4-11 "Formats of CR3 and Paging-Structure Entries
-	 * with 4-Level Paging and 5-Level Paging".
-	 * If IA32_EFER.NXE = 0 and the P flag of a paging-structure entry is 1,
-	 * the XD flag (bit 63) is reserved.
-	 */
-	vcpu_sregs_get(vcpu, &sregs);
-	if ((sregs.efer & EFER_NX) == 0) {
-		rsvd_mask |= PTE_NX_MASK;
-	}
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -286,24 +268,18 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
 	pml4e = addr_gpa2hva(vm, vm->pgd);
 	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
 		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT((pml4e[index[3]] & (rsvd_mask | PTE_LARGE_MASK)) == 0,
-		"Unexpected reserved bits set.");
 
 	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
 	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
 		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
 	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
 		"Expected pdpe to map a pde not a 1-GByte page.");
-	TEST_ASSERT((pdpe[index[2]] & rsvd_mask) == 0,
-		"Unexpected reserved bits set.");
 
 	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
 	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
 		"Expected pde to be present for gva: 0x%08lx", vaddr);
 	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
 		"Expected pde to map a pte not a 2-MByte page.");
-	TEST_ASSERT((pde[index[1]] & rsvd_mask) == 0,
-		"Unexpected reserved bits set.");
 
 	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
 	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index bde247f3c8a16..1abb347357545 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
 	hva = addr_gpa2hva(vm, MEM_REGION_GPA);
 	memset(hva, 0, PAGE_SIZE);
 
-	pte = vm_get_page_table_entry(vm, vcpu, MEM_REGION_GVA);
+	pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
 	*pte |= BIT_ULL(MAXPHYADDR);
 
 	vcpu_run(vcpu);
-- 
GitLab


From 91add12d384c650d243b8ccdee1f2ddea3c9a85d Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:08 +0000
Subject: [PATCH 1652/4122] KVM: selftests: Remove useless shifts when creating
 guest page tables

Remove the pointless shift from GPA=>GFN and immediately back to
GFN=>GPA when creating guest page tables.  Ignore the other walkers
that have a similar pattern for the moment, they will be converted
to use virt_get_pte() in the near future.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-4-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h    |  3 ++-
 .../selftests/kvm/lib/x86_64/processor.c        | 17 ++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 53d52a5ace48b..9676a34647580 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -177,7 +177,8 @@ struct kvm_x86_cpu_feature {
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
 #define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
-#define PTE_GET_PFN(pte)        (((pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
+#define PTE_GET_PA(pte)		((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
 
 /* General Registers in 64-Bit Mode */
 struct gpr64_regs {
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 9e196837a794b..324bf24564a1a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -131,23 +131,23 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 	}
 }
 
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_pfn, uint64_t vaddr,
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_gpa, uint64_t vaddr,
 			  int level)
 {
-	uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
+	uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
 	int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
 
 	return &page_table[index];
 }
 
 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
-				       uint64_t pt_pfn,
+				       uint64_t pt_gpa,
 				       uint64_t vaddr,
 				       uint64_t paddr,
 				       int current_level,
 				       int target_level)
 {
-	uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
+	uint64_t *pte = virt_get_pte(vm, pt_gpa, vaddr, current_level);
 
 	if (!(*pte & PTE_PRESENT_MASK)) {
 		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
@@ -197,21 +197,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
 	 * Allocate upper level page tables, if not already present.  Return
 	 * early if a hugepage was created.
 	 */
-	pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
-				      vaddr, paddr, PG_LEVEL_512G, level);
+	pml4e = virt_create_upper_pte(vm, vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
 	if (*pml4e & PTE_LARGE_MASK)
 		return;
 
-	pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
+	pdpe = virt_create_upper_pte(vm, PTE_GET_PA(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
 	if (*pdpe & PTE_LARGE_MASK)
 		return;
 
-	pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
+	pde = virt_create_upper_pte(vm, PTE_GET_PA(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
 	if (*pde & PTE_LARGE_MASK)
 		return;
 
 	/* Fill in page table entry. */
-	pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
+	pte = virt_get_pte(vm, PTE_GET_PA(*pde), vaddr, PG_LEVEL_4K);
 	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
 	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-- 
GitLab


From ed0b58fc6f0bdde360c28314e0faedc9a0a6c3de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:09 +0000
Subject: [PATCH 1653/4122] KVM: selftests: Verify parent PTE is PRESENT when
 getting child PTE

Verify the parent PTE is PRESENT when getting a child via virt_get_pte()
so that the helper can be used for getting PTEs/GPAs without losing
sanity checks that the walker isn't wandering into the weeds.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-5-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c      | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 324bf24564a1a..c9649f19aca17 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -131,23 +131,28 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm)
 	}
 }
 
-static void *virt_get_pte(struct kvm_vm *vm, uint64_t pt_gpa, uint64_t vaddr,
-			  int level)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+			  uint64_t vaddr, int level)
 {
+	uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
 	uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
 	int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
 
+	TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+		    "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+		    level + 1, vaddr);
+
 	return &page_table[index];
 }
 
 static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
-				       uint64_t pt_gpa,
+				       uint64_t *parent_pte,
 				       uint64_t vaddr,
 				       uint64_t paddr,
 				       int current_level,
 				       int target_level)
 {
-	uint64_t *pte = virt_get_pte(vm, pt_gpa, vaddr, current_level);
+	uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
 
 	if (!(*pte & PTE_PRESENT_MASK)) {
 		*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
@@ -197,20 +202,20 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
 	 * Allocate upper level page tables, if not already present.  Return
 	 * early if a hugepage was created.
 	 */
-	pml4e = virt_create_upper_pte(vm, vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+	pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
 	if (*pml4e & PTE_LARGE_MASK)
 		return;
 
-	pdpe = virt_create_upper_pte(vm, PTE_GET_PA(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
+	pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
 	if (*pdpe & PTE_LARGE_MASK)
 		return;
 
-	pde = virt_create_upper_pte(vm, PTE_GET_PA(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
+	pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
 	if (*pde & PTE_LARGE_MASK)
 		return;
 
 	/* Fill in page table entry. */
-	pte = virt_get_pte(vm, PTE_GET_PA(*pde), vaddr, PG_LEVEL_4K);
+	pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
 	TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
 		    "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
 	*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
-- 
GitLab


From 99d51c6eef2dadc204363ab3bf58c91d02f895be Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:10 +0000
Subject: [PATCH 1654/4122] KVM: selftests: Use virt_get_pte() when getting PTE
 pointer

Use virt_get_pte() in vm_get_page_table_entry() instead of open coding
equivalent code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-6-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c      | 29 ++++---------------
 1 file changed, 6 insertions(+), 23 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index c9649f19aca17..09b550fd88152 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -247,9 +247,7 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 
 uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
 {
-	uint16_t index[4];
 	uint64_t *pml4e, *pdpe, *pde;
-	uint64_t *pte;
 
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
@@ -264,32 +262,17 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
 	TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
 		"Canonical check failed.  The virtual address is invalid.");
 
-	index[0] = (vaddr >> 12) & 0x1ffu;
-	index[1] = (vaddr >> 21) & 0x1ffu;
-	index[2] = (vaddr >> 30) & 0x1ffu;
-	index[3] = (vaddr >> 39) & 0x1ffu;
-
-	pml4e = addr_gpa2hva(vm, vm->pgd);
-	TEST_ASSERT(pml4e[index[3]] & PTE_PRESENT_MASK,
-		"Expected pml4e to be present for gva: 0x%08lx", vaddr);
+	pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
 
-	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
-	TEST_ASSERT(pdpe[index[2]] & PTE_PRESENT_MASK,
-		"Expected pdpe to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(!(pdpe[index[2]] & PTE_LARGE_MASK),
+	pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+	TEST_ASSERT(!(*pdpe & PTE_LARGE_MASK),
 		"Expected pdpe to map a pde not a 1-GByte page.");
 
-	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
-	TEST_ASSERT(pde[index[1]] & PTE_PRESENT_MASK,
-		"Expected pde to be present for gva: 0x%08lx", vaddr);
-	TEST_ASSERT(!(pde[index[1]] & PTE_LARGE_MASK),
+	pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+	TEST_ASSERT(!(*pde & PTE_LARGE_MASK),
 		"Expected pde to map a pte not a 2-MByte page.");
 
-	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
-	TEST_ASSERT(pte[index[0]] & PTE_PRESENT_MASK,
-		"Expected pte to be present for gva: 0x%08lx", vaddr);
-
-	return &pte[index[0]];
+	return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
 }
 
 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-- 
GitLab


From efe91dc307d00766911fbcb5021bdc3a1cf9c79e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:11 +0000
Subject: [PATCH 1655/4122] KVM: selftests: Use vm_get_page_table_entry() in
 addr_arch_gva2gpa()

Use vm_get_page_table_entry() in addr_arch_gva2gpa() to get the leaf PTE
instead of manually walking page tables.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-7-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c      | 38 ++-----------------
 1 file changed, 4 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 09b550fd88152..053f64191122b 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -458,41 +458,11 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 
 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
-	uint16_t index[4];
-	uint64_t *pml4e, *pdpe, *pde;
-	uint64_t *pte;
-
-	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
-		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
-	index[0] = (gva >> 12) & 0x1ffu;
-	index[1] = (gva >> 21) & 0x1ffu;
-	index[2] = (gva >> 30) & 0x1ffu;
-	index[3] = (gva >> 39) & 0x1ffu;
-
-	if (!vm->pgd_created)
-		goto unmapped_gva;
-	pml4e = addr_gpa2hva(vm, vm->pgd);
-	if (!(pml4e[index[3]] & PTE_PRESENT_MASK))
-		goto unmapped_gva;
-
-	pdpe = addr_gpa2hva(vm, PTE_GET_PFN(pml4e[index[3]]) * vm->page_size);
-	if (!(pdpe[index[2]] & PTE_PRESENT_MASK))
-		goto unmapped_gva;
-
-	pde = addr_gpa2hva(vm, PTE_GET_PFN(pdpe[index[2]]) * vm->page_size);
-	if (!(pde[index[1]] & PTE_PRESENT_MASK))
-		goto unmapped_gva;
-
-	pte = addr_gpa2hva(vm, PTE_GET_PFN(pde[index[1]]) * vm->page_size);
-	if (!(pte[index[0]] & PTE_PRESENT_MASK))
-		goto unmapped_gva;
-
-	return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK);
+	uint64_t *pte = vm_get_page_table_entry(vm, gva);
 
-unmapped_gva:
-	TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
-	exit(EXIT_FAILURE);
+	TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+		    "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
+	return PTE_GET_PA(*pte) | (gva & ~PAGE_MASK);
 }
 
 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
-- 
GitLab


From 96b69958c77d84e49c06ebe2e3502e4c1620e3c0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:45:12 +0000
Subject: [PATCH 1656/4122] KVM: selftests: Play nice with huge pages when
 getting PTEs/GPAs

Play nice with huge pages when getting PTEs and translating GVAs to GPAs,
there's no reason to disallow using huge pages in selftests.  Use
PG_LEVEL_NONE to indicate that the caller doesn't care about the mapping
level and just wants to get the pte+level.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006004512.666529-8-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 11 ++++-
 .../selftests/kvm/lib/x86_64/processor.c      | 45 ++++++++++++++++---
 2 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 9676a34647580..e000e35c948fa 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -172,11 +172,16 @@ struct kvm_x86_cpu_feature {
 #define PTE_GLOBAL_MASK         BIT_ULL(8)
 #define PTE_NX_MASK             BIT_ULL(63)
 
+#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
+
 #define PAGE_SHIFT		12
 #define PAGE_SIZE		(1ULL << PAGE_SHIFT)
-#define PAGE_MASK		(~(PAGE_SIZE-1))
+#define PAGE_MASK		(~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x)	(1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x)	(~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
 
-#define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
 #define PTE_GET_PA(pte)		((pte) & PHYSICAL_PAGE_MASK)
 #define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
 
@@ -828,6 +833,8 @@ static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
 
 bool kvm_is_tdp_enabled(void);
 
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+				    int *level);
 uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
 
 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 053f64191122b..efa20d0f99271 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -245,10 +245,26 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 	}
 }
 
-uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+	if (*pte & PTE_LARGE_MASK) {
+		TEST_ASSERT(*level == PG_LEVEL_NONE ||
+			    *level == current_level,
+			    "Unexpected hugepage at level %d\n", current_level);
+		*level = current_level;
+	}
+
+	return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+				    int *level)
 {
 	uint64_t *pml4e, *pdpe, *pde;
 
+	TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+		    "Invalid PG_LEVEL_* '%d'", *level);
+
 	TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
 		"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
 	TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
@@ -263,18 +279,27 @@ uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
 		"Canonical check failed.  The virtual address is invalid.");
 
 	pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+	if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+		return pml4e;
 
 	pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
-	TEST_ASSERT(!(*pdpe & PTE_LARGE_MASK),
-		"Expected pdpe to map a pde not a 1-GByte page.");
+	if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+		return pdpe;
 
 	pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
-	TEST_ASSERT(!(*pde & PTE_LARGE_MASK),
-		"Expected pde to map a pte not a 2-MByte page.");
+	if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+		return pde;
 
 	return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
 }
 
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
+{
+	int level = PG_LEVEL_4K;
+
+	return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
 void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 {
 	uint64_t *pml4e, *pml4e_start;
@@ -458,11 +483,17 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
 
 vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
 {
-	uint64_t *pte = vm_get_page_table_entry(vm, gva);
+	int level = PG_LEVEL_NONE;
+	uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
 
 	TEST_ASSERT(*pte & PTE_PRESENT_MASK,
 		    "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
-	return PTE_GET_PA(*pte) | (gva & ~PAGE_MASK);
+
+	/*
+	 * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+	 * address bits to be zero.
+	 */
+	return PTE_GET_PA(*pte) | (gva & ~HUGEPAGE_MASK(level));
 }
 
 static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
-- 
GitLab


From 197ebb713ad04518ffef6d966954b519a0805100 Mon Sep 17 00:00:00 2001
From: Vishal Annapurve <vannapurve@google.com>
Date: Tue, 15 Nov 2022 21:38:43 +0000
Subject: [PATCH 1657/4122] KVM: selftests: move common startup logic to
 kvm_util.c

Consolidate common startup logic in one place by implementing a single
setup function with __attribute((constructor)) for all selftests within
kvm_util.c.

This allows moving logic like:
        /* Tell stdout not to buffer its content */
        setbuf(stdout, NULL);
to a single file for all selftests.

This will also allow any required setup at entry in future to be done in
common main function.

Link: https://lore.kernel.org/lkml/Ywa9T+jKUpaHLu%2Fl@google.com
Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Vishal Annapurve <vannapurve@google.com>
Link: https://lore.kernel.org/r/20221115213845.3348210-2-vannapurve@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/aarch64/arch_timer.c            | 3 ---
 tools/testing/selftests/kvm/aarch64/hypercalls.c            | 2 --
 tools/testing/selftests/kvm/aarch64/vgic_irq.c              | 3 ---
 tools/testing/selftests/kvm/lib/kvm_util.c                  | 6 ++++++
 tools/testing/selftests/kvm/memslot_perf_test.c             | 3 ---
 tools/testing/selftests/kvm/rseq_test.c                     | 3 ---
 tools/testing/selftests/kvm/s390x/memop.c                   | 2 --
 tools/testing/selftests/kvm/s390x/resets.c                  | 2 --
 tools/testing/selftests/kvm/s390x/sync_regs_test.c          | 3 ---
 tools/testing/selftests/kvm/set_memory_region_test.c        | 3 ---
 tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c    | 3 ---
 tools/testing/selftests/kvm/x86_64/emulator_error_test.c    | 3 ---
 tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c           | 3 ---
 tools/testing/selftests/kvm/x86_64/platform_info_test.c     | 3 ---
 tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c  | 3 ---
 tools/testing/selftests/kvm/x86_64/set_sregs_test.c         | 3 ---
 .../selftests/kvm/x86_64/svm_nested_soft_inject_test.c      | 3 ---
 tools/testing/selftests/kvm/x86_64/sync_regs_test.c         | 3 ---
 tools/testing/selftests/kvm/x86_64/userspace_io_test.c      | 3 ---
 .../testing/selftests/kvm/x86_64/userspace_msr_exit_test.c  | 3 ---
 20 files changed, 6 insertions(+), 54 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 54016cdd8a098..f2a96779716a8 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -446,9 +446,6 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vm *vm;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	if (!parse_args(argc, argv))
 		exit(KSFT_SKIP);
 
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
index 3dceecfd1f628..bef1499fb4654 100644
--- a/tools/testing/selftests/kvm/aarch64/hypercalls.c
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -305,8 +305,6 @@ static void test_run(void)
 
 int main(void)
 {
-	setbuf(stdout, NULL);
-
 	test_run();
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index e0310ebc313ca..90d854e0fcffe 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -817,9 +817,6 @@ int main(int argc, char **argv)
 	int opt;
 	bool eoi_split = false;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
 		switch (opt) {
 		case 'n':
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 07c8edd4e5486..575a0c38d1c0c 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2086,3 +2086,9 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
 		break;
 	}
 }
+
+void __attribute((constructor)) kvm_selftest_init(void)
+{
+	/* Tell stdout not to buffer its content. */
+	setbuf(stdout, NULL);
+}
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index d771262ea584b..36b20abfb948e 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -994,9 +994,6 @@ int main(int argc, char *argv[])
 	struct test_result rbestslottime;
 	int tctr;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	if (!parse_args(argc, argv, &targs))
 		return -1;
 
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 0e9e2b48a51f7..3045fdf9bdf59 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -205,9 +205,6 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu *vcpu;
 	u32 cpu, rseq_cpu;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
 	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
 		    strerror(errno));
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9113696d5178a..3fd81e58f40c2 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -760,8 +760,6 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
 
-	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
-
 	ksft_print_header();
 
 	ksft_set_plan(ARRAY_SIZE(testlist));
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index 19486084eb309..e41e2cb8ffa97 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -296,8 +296,6 @@ int main(int argc, char *argv[])
 	bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
 	int idx;
 
-	setbuf(stdout, NULL);	/* Tell stdout not to buffer its content */
-
 	ksft_print_header();
 	ksft_set_plan(ARRAY_SIZE(testlist));
 
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 3fdb6e2598ebe..2ddde41c44ba9 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -231,9 +231,6 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	ksft_print_header();
 
 	ksft_set_plan(ARRAY_SIZE(testlist));
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index 85c16f09a50e1..2ef1d1b72ce43 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -392,9 +392,6 @@ int main(int argc, char *argv[])
 	int i, loops;
 #endif
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 #ifdef __x86_64__
 	/*
 	 * FIXME: the zero-memslot test fails on aarch64 and s390x because
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 4208487652f8e..1027a671c7d3a 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -57,9 +57,6 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	run = vcpu->run;
 
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
index 1abb347357545..d945e571e7a0c 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
@@ -157,9 +157,6 @@ int main(int argc, char *argv[])
 	uint64_t gpa;
 	int rc;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index e804eb08dff90..5c27efbf405e1 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -134,9 +134,6 @@ int main(int argc, char *argv[])
 	const struct kvm_cpuid2 *hv_cpuid_entries;
 	struct kvm_vcpu *vcpu;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 76417c7d687bd..310a104d94f06 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -72,9 +72,6 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	uint64_t msr_platform_info;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index ea4e259a1e2ef..a6ffa245c8971 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -447,9 +447,6 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
 
 	TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 2bb08bf2125dd..a284fcef6ed74 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -82,9 +82,6 @@ int main(int argc, char *argv[])
 	uint64_t cr4;
 	int rc;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	/*
 	 * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
 	 * use it to verify all supported CR4 bits can be set prior to defining
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
index e637d77360123..e497ace629c19 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -194,9 +194,6 @@ done:
 
 int main(int argc, char *argv[])
 {
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
 
 	TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index 9b6db0b0b13e9..d2f9b5bdfab20 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -90,9 +90,6 @@ int main(int argc, char *argv[])
 	struct kvm_vcpu_events events;
 	int rv, cap;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
 	TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
 	TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
index 7316521428f86..91076c9787b41 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -56,9 +56,6 @@ int main(int argc, char *argv[])
 	struct kvm_vm *vm;
 	struct ucall uc;
 
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 	run = vcpu->run;
 
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index fae95089e6551..25fa55344a10e 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -818,9 +818,6 @@ static void test_user_exit_msr_flags(void)
 
 int main(int argc, char *argv[])
 {
-	/* Tell stdout not to buffer its content */
-	setbuf(stdout, NULL);
-
 	test_msr_filter_allow();
 
 	test_msr_filter_deny();
-- 
GitLab


From e1ab31245c4efe973db4dcd6c82a41c2aec09b27 Mon Sep 17 00:00:00 2001
From: Vishal Annapurve <vannapurve@google.com>
Date: Tue, 15 Nov 2022 21:38:44 +0000
Subject: [PATCH 1658/4122] KVM: selftests: Add arch specific initialization

Introduce arch specific API: kvm_selftest_arch_init to allow each arch to
handle initialization before running any selftest logic.

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Vishal Annapurve <vannapurve@google.com>
Link: https://lore.kernel.org/r/20221115213845.3348210-3-vannapurve@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/include/kvm_util_base.h      |  7 +++++++
 .../selftests/kvm/lib/aarch64/processor.c      | 18 +++++++++---------
 tools/testing/selftests/kvm/lib/kvm_util.c     |  6 ++++++
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index a7047e0767d33..c58eec6d44a7e 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -857,4 +857,11 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
 	return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
 }
 
+/*
+ * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * to allow for arch-specific setup that is common to all tests, e.g. computing
+ * the default guest "mode".
+ */
+void kvm_selftest_arch_init(void);
+
 #endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 6f5551368944e..0de4aabc0c764 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -495,15 +495,6 @@ void aarch64_get_supported_page_sizes(uint32_t ipa,
 	close(kvm_fd);
 }
 
-/*
- * arm64 doesn't have a true default mode, so start by computing the
- * available IPA space and page sizes early.
- */
-void __attribute__((constructor)) init_guest_modes(void)
-{
-       guest_modes_append_default();
-}
-
 void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
 	       uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
 	       uint64_t arg6, struct arm_smccc_res *res)
@@ -528,3 +519,12 @@ void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
 		       [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6)
 		     : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7");
 }
+
+void kvm_selftest_arch_init(void)
+{
+	/*
+	 * arm64 doesn't have a true default mode, so start by computing the
+	 * available IPA space and page sizes early.
+	 */
+	guest_modes_append_default();
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 575a0c38d1c0c..db62680d59180 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -2087,8 +2087,14 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
 	}
 }
 
+__weak void kvm_selftest_arch_init(void)
+{
+}
+
 void __attribute((constructor)) kvm_selftest_init(void)
 {
 	/* Tell stdout not to buffer its content. */
 	setbuf(stdout, NULL);
+
+	kvm_selftest_arch_init();
 }
-- 
GitLab


From 2115713cfab05dd6efd48ad6bf56e67f556dcec5 Mon Sep 17 00:00:00 2001
From: Vishal Annapurve <vannapurve@google.com>
Date: Tue, 15 Nov 2022 21:38:45 +0000
Subject: [PATCH 1659/4122] KVM: selftests: Add arch specific post vm creation
 hook

Add arch specific API kvm_arch_vm_post_create to perform any required setup
after VM creation.

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Peter Gonda <pgonda@google.com>
Signed-off-by: Vishal Annapurve <vannapurve@google.com>
Link: https://lore.kernel.org/r/20221115213845.3348210-4-vannapurve@google.com
[sean: place x86's implementation by vm_arch_vcpu_add()]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/include/kvm_util_base.h | 2 ++
 tools/testing/selftests/kvm/lib/kvm_util.c          | 8 +++++---
 tools/testing/selftests/kvm/lib/x86_64/processor.c  | 5 +++++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index c58eec6d44a7e..228212ede05eb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -864,4 +864,6 @@ static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
  */
 void kvm_selftest_arch_init(void);
 
+void kvm_arch_vm_post_create(struct kvm_vm *vm);
+
 #endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index db62680d59180..5ac8f207ed92d 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -351,9 +351,7 @@ struct kvm_vm *__vm_create(enum vm_guest_mode mode, uint32_t nr_runnable_vcpus,
 	slot0 = memslot2region(vm, 0);
 	ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
 
-#ifdef __x86_64__
-	vm_create_irqchip(vm);
-#endif
+	kvm_arch_vm_post_create(vm);
 
 	return vm;
 }
@@ -2087,6 +2085,10 @@ void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
 	}
 }
 
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+}
+
 __weak void kvm_selftest_arch_init(void)
 {
 }
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index efa20d0f99271..999576146d69d 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -586,6 +586,11 @@ void __vm_xsave_require_permission(int bit, const char *name)
 		    bitmask);
 }
 
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+	vm_create_irqchip(vm);
+}
+
 struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
 				  void *guest_code)
 {
-- 
GitLab


From 376bc1b458c9a6db347c70f02fa6eb8b5f187455 Mon Sep 17 00:00:00 2001
From: Gautam Menghani <gautammenghani201@gmail.com>
Date: Mon, 17 Oct 2022 23:28:19 +0530
Subject: [PATCH 1660/4122] KVM: selftests: Don't assume vcpu->id is '0' in
 xAPIC state test

In xapic_state_test's test_icr(), explicitly skip iterations that would
match vcpu->id instead of assuming vcpu->id is '0', so that IPIs are
are correctly sent to non-existent vCPUs.

Suggested-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/kvm/YyoZr9rXSSMEtdh5@google.com
Signed-off-by: Gautam Menghani <gautammenghani201@gmail.com>
Link: https://lore.kernel.org/r/20221017175819.12672-1-gautammenghani201@gmail.com
[sean: massage shortlog and changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/x86_64/xapic_state_test.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index 6f7a5ef667186..d7d37dae3eebf 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -114,7 +114,9 @@ static void test_icr(struct xapic_vcpu *x)
 	 * vCPUs, not vcpu.id + 1.  Arbitrarily use vector 0xff.
 	 */
 	icr = APIC_INT_ASSERT | 0xff;
-	for (i = vcpu->id + 1; i < 0xff; i++) {
+	for (i = 0; i < 0xff; i++) {
+		if (i == vcpu->id)
+			continue;
 		for (j = 0; j < 8; j++)
 			__test_icr(x, i << (32 + 24) | icr | (j << 8));
 	}
-- 
GitLab


From 52d3a4fb5be1579b0fb8d48f8abaf9a9c7db1083 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:45 -0700
Subject: [PATCH 1661/4122] KVM: selftests: Rename emulator_error_test to
 smaller_maxphyaddr_emulation_test

Rename emulator_error_test to smaller_maxphyaddr_emulation_test and
update the comment at the top of the file to document that this is
explicitly a test to validate that KVM emulates instructions in response
to an EPT violation when emulating a smaller MAXPHYADDR.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-2-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/.gitignore                         | 2 +-
 tools/testing/selftests/kvm/Makefile                           | 2 +-
 ...ulator_error_test.c => smaller_maxphyaddr_emulation_test.c} | 3 ++-
 3 files changed, 4 insertions(+), 3 deletions(-)
 rename tools/testing/selftests/kvm/x86_64/{emulator_error_test.c => smaller_maxphyaddr_emulation_test.c} (96%)

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 2f0d705db9dba..053e5d34cd030 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -17,7 +17,6 @@
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
-/x86_64/emulator_error_test
 /x86_64/fix_hypercall_test
 /x86_64/get_msr_index_features
 /x86_64/kvm_clock_test
@@ -36,6 +35,7 @@
 /x86_64/set_boot_cpu_id
 /x86_64/set_sregs_test
 /x86_64/sev_migrate_tests
+/x86_64/smaller_maxphyaddr_emulation_test
 /x86_64/smm_test
 /x86_64/state_test
 /x86_64/svm_vmcall_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 6e2a683629c77..cff3a7ff87829 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -82,7 +82,6 @@ TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
-TEST_GEN_PROGS_x86_64 += x86_64/emulator_error_test
 TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
@@ -97,6 +96,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
 TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
 TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
diff --git a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
similarity index 96%
rename from tools/testing/selftests/kvm/x86_64/emulator_error_test.c
rename to tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index d945e571e7a0c..6fe5af8bd47ef 100644
--- a/tools/testing/selftests/kvm/x86_64/emulator_error_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -2,7 +2,8 @@
 /*
  * Copyright (C) 2020, Google LLC.
  *
- * Tests for KVM_CAP_EXIT_ON_EMULATION_FAILURE capability.
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
  */
 
 #define _GNU_SOURCE /* for program_invocation_short_name */
-- 
GitLab


From 48e59373398a554098863bc7a3d1350cd0d5c4d0 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:46 -0700
Subject: [PATCH 1662/4122] KVM: selftests: Explicitly require instructions
 bytes

Hard-code the flds instruction and assert the exact instruction bytes
are present in run->emulation_failure. The test already requires the
instruction bytes to be present because that's the only way the test
will advance the RIP past the flds and get to GUEST_DONE().

Note that KVM does not necessarily return exactly 2 bytes in
run->emulation_failure since it may not know the exact instruction
length in all cases. So just assert that
run->emulation_failure.insn_size is at least 2.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-3-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../smaller_maxphyaddr_emulation_test.c       | 68 ++++++-------------
 1 file changed, 20 insertions(+), 48 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index 6fe5af8bd47ef..6f7891b0e193b 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -19,41 +19,20 @@
 #define MEM_REGION_SLOT	10
 #define MEM_REGION_SIZE PAGE_SIZE
 
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
 static void guest_code(void)
 {
-	__asm__ __volatile__("flds (%[addr])"
-			     :: [addr]"r"(MEM_REGION_GVA));
+	__asm__ __volatile__(FLDS_MEM_EAX :: "a"(MEM_REGION_GVA));
 
 	GUEST_DONE();
 }
 
-/*
- * Accessors to get R/M, REG, and Mod bits described in the SDM vol 2,
- * figure 2-2 "Table Interpretation of ModR/M Byte (C8H)".
- */
-#define GET_RM(insn_byte) (insn_byte & 0x7)
-#define GET_REG(insn_byte) ((insn_byte & 0x38) >> 3)
-#define GET_MOD(insn_byte) ((insn_byte & 0xc) >> 6)
-
-/* Ensure we are dealing with a simple 2-byte flds instruction. */
-static bool is_flds(uint8_t *insn_bytes, uint8_t insn_size)
-{
-	return insn_size >= 2 &&
-	       insn_bytes[0] == 0xd9 &&
-	       GET_REG(insn_bytes[1]) == 0x0 &&
-	       GET_MOD(insn_bytes[1]) == 0x0 &&
-	       /* Ensure there is no SIB byte. */
-	       GET_RM(insn_bytes[1]) != 0x4 &&
-	       /* Ensure there is no displacement byte. */
-	       GET_RM(insn_bytes[1]) != 0x5;
-}
-
 static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
 {
 	struct kvm_run *run = vcpu->run;
 	struct kvm_regs regs;
 	uint8_t *insn_bytes;
-	uint8_t insn_size;
 	uint64_t flags;
 
 	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
@@ -65,30 +44,23 @@ static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
 		    "Unexpected suberror: %u",
 		    run->emulation_failure.suberror);
 
-	if (run->emulation_failure.ndata >= 1) {
-		flags = run->emulation_failure.flags;
-		if ((flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES) &&
-		    run->emulation_failure.ndata >= 3) {
-			insn_size = run->emulation_failure.insn_size;
-			insn_bytes = run->emulation_failure.insn_bytes;
-
-			TEST_ASSERT(insn_size <= 15 && insn_size > 0,
-				    "Unexpected instruction size: %u",
-				    insn_size);
-
-			TEST_ASSERT(is_flds(insn_bytes, insn_size),
-				    "Unexpected instruction.  Expected 'flds' (0xd9 /0)");
-
-			/*
-			 * If is_flds() succeeded then the instruction bytes
-			 * contained an flds instruction that is 2-bytes in
-			 * length (ie: no prefix, no SIB, no displacement).
-			 */
-			vcpu_regs_get(vcpu, &regs);
-			regs.rip += 2;
-			vcpu_regs_set(vcpu, &regs);
-		}
-	}
+	flags = run->emulation_failure.flags;
+	TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+		    flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+		    "run->emulation_failure is missing instruction bytes");
+
+	TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+		    "Expected a 2-byte opcode for 'flds', got %d bytes",
+		    run->emulation_failure.insn_size);
+
+	insn_bytes = run->emulation_failure.insn_bytes;
+	TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+		    "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n",
+		    insn_bytes[0], insn_bytes[1]);
+
+	vcpu_regs_get(vcpu, &regs);
+	regs.rip += 2;
+	vcpu_regs_set(vcpu, &regs);
 }
 
 static void do_guest_assert(struct ucall *uc)
-- 
GitLab


From 50824c6eee39eb2d7a60d665b4b245552e852705 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:47 -0700
Subject: [PATCH 1663/4122] KVM: selftests: Delete dead ucall code

Delete a bunch of code related to ucall handling from
smaller_maxphyaddr_emulation_test. The only thing
smaller_maxphyaddr_emulation_test needs to check is that the vCPU exits
with UCALL_DONE after the second vcpu_run().

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-4-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../smaller_maxphyaddr_emulation_test.c       | 61 +------------------
 1 file changed, 1 insertion(+), 60 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index 6f7891b0e193b..b72aeefda70c6 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -63,64 +63,6 @@ static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
 	vcpu_regs_set(vcpu, &regs);
 }
 
-static void do_guest_assert(struct ucall *uc)
-{
-	REPORT_GUEST_ASSERT(*uc);
-}
-
-static void check_for_guest_assert(struct kvm_vcpu *vcpu)
-{
-	struct ucall uc;
-
-	if (vcpu->run->exit_reason == KVM_EXIT_IO &&
-	    get_ucall(vcpu, &uc) == UCALL_ABORT) {
-		do_guest_assert(&uc);
-	}
-}
-
-static void process_ucall_done(struct kvm_vcpu *vcpu)
-{
-	struct kvm_run *run = vcpu->run;
-	struct ucall uc;
-
-	check_for_guest_assert(vcpu);
-
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
-
-	TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
-		    "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
-		    uc.cmd, UCALL_DONE);
-}
-
-static uint64_t process_ucall(struct kvm_vcpu *vcpu)
-{
-	struct kvm_run *run = vcpu->run;
-	struct ucall uc;
-
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
-
-	switch (get_ucall(vcpu, &uc)) {
-	case UCALL_SYNC:
-		break;
-	case UCALL_ABORT:
-		do_guest_assert(&uc);
-		break;
-	case UCALL_DONE:
-		process_ucall_done(vcpu);
-		break;
-	default:
-		TEST_ASSERT(false, "Unexpected ucall");
-	}
-
-	return uc.cmd;
-}
-
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
@@ -156,8 +98,7 @@ int main(int argc, char *argv[])
 	vcpu_run(vcpu);
 	process_exit_on_emulation_error(vcpu);
 	vcpu_run(vcpu);
-
-	TEST_ASSERT(process_ucall(vcpu) == UCALL_DONE, "Expected UCALL_DONE");
+	ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
 	kvm_vm_free(vm);
 
-- 
GitLab


From 19a2b32f5d242844e49764cf88256cfd53de0082 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:48 -0700
Subject: [PATCH 1664/4122] KVM: selftests: Move flds instruction emulation
 failure handling to header

Move the flds instruction emulation failure handling code to a header
so it can be re-used in an upcoming test.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-5-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/x86_64/flds_emulation.h     | 55 +++++++++++++++++++
 .../smaller_maxphyaddr_emulation_test.c       | 44 ++-------------
 2 files changed, 59 insertions(+), 40 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/flds_emulation.h

diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
new file mode 100644
index 0000000000000..e43a7df25f2c5
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+	__asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+	struct kvm_run *run = vcpu->run;
+	struct kvm_regs regs;
+	uint8_t *insn_bytes;
+	uint64_t flags;
+
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
+		    "Unexpected exit reason: %u (%s)",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+		    "Unexpected suberror: %u",
+		    run->emulation_failure.suberror);
+
+	flags = run->emulation_failure.flags;
+	TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+		    flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+		    "run->emulation_failure is missing instruction bytes");
+
+	TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+		    "Expected a 2-byte opcode for 'flds', got %d bytes",
+		    run->emulation_failure.insn_size);
+
+	insn_bytes = run->emulation_failure.insn_bytes;
+	TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+		    "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n",
+		    insn_bytes[0], insn_bytes[1]);
+
+	vcpu_regs_get(vcpu, &regs);
+	regs.rip += 2;
+	vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index b72aeefda70c6..a8d0811789176 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -8,6 +8,8 @@
 
 #define _GNU_SOURCE /* for program_invocation_short_name */
 
+#include "flds_emulation.h"
+
 #include "test_util.h"
 #include "kvm_util.h"
 #include "vmx.h"
@@ -19,50 +21,12 @@
 #define MEM_REGION_SLOT	10
 #define MEM_REGION_SIZE PAGE_SIZE
 
-#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
-
 static void guest_code(void)
 {
-	__asm__ __volatile__(FLDS_MEM_EAX :: "a"(MEM_REGION_GVA));
-
+	flds(MEM_REGION_GVA);
 	GUEST_DONE();
 }
 
-static void process_exit_on_emulation_error(struct kvm_vcpu *vcpu)
-{
-	struct kvm_run *run = vcpu->run;
-	struct kvm_regs regs;
-	uint8_t *insn_bytes;
-	uint64_t flags;
-
-	TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
-		    "Unexpected exit reason: %u (%s)",
-		    run->exit_reason,
-		    exit_reason_str(run->exit_reason));
-
-	TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
-		    "Unexpected suberror: %u",
-		    run->emulation_failure.suberror);
-
-	flags = run->emulation_failure.flags;
-	TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
-		    flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
-		    "run->emulation_failure is missing instruction bytes");
-
-	TEST_ASSERT(run->emulation_failure.insn_size >= 2,
-		    "Expected a 2-byte opcode for 'flds', got %d bytes",
-		    run->emulation_failure.insn_size);
-
-	insn_bytes = run->emulation_failure.insn_bytes;
-	TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
-		    "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x\n",
-		    insn_bytes[0], insn_bytes[1]);
-
-	vcpu_regs_get(vcpu, &regs);
-	regs.rip += 2;
-	vcpu_regs_set(vcpu, &regs);
-}
-
 int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
@@ -96,7 +60,7 @@ int main(int argc, char *argv[])
 	*pte |= BIT_ULL(MAXPHYADDR);
 
 	vcpu_run(vcpu);
-	process_exit_on_emulation_error(vcpu);
+	handle_flds_emulation_failure_exit(vcpu);
 	vcpu_run(vcpu);
 	ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
 
-- 
GitLab


From d6ecfe976ac342e5c3249b9439da762f65e98015 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:49 -0700
Subject: [PATCH 1665/4122] KVM: x86/mmu: Use BIT{,_ULL}() for PFERR masks

Use the preferred BIT() and BIT_ULL() to construct the PFERR masks
rather than open-coding the bit shifting.

No functional change intended.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-6-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/include/asm/kvm_host.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 81114a376c4ee..598eb3b9ae440 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -256,16 +256,16 @@ enum x86_intercept_stage;
 #define PFERR_GUEST_PAGE_BIT 33
 #define PFERR_IMPLICIT_ACCESS_BIT 48
 
-#define PFERR_PRESENT_MASK (1U << PFERR_PRESENT_BIT)
-#define PFERR_WRITE_MASK (1U << PFERR_WRITE_BIT)
-#define PFERR_USER_MASK (1U << PFERR_USER_BIT)
-#define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
-#define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
-#define PFERR_PK_MASK (1U << PFERR_PK_BIT)
-#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
-#define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
-#define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
-#define PFERR_IMPLICIT_ACCESS (1ULL << PFERR_IMPLICIT_ACCESS_BIT)
+#define PFERR_PRESENT_MASK	BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK	BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK		BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK		BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK	BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK		BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK		BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK	BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK	BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS	BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
 
 #define PFERR_NESTED_GUEST_PAGE (PFERR_GUEST_PAGE_MASK |	\
 				 PFERR_WRITE_MASK |		\
-- 
GitLab


From 77f7813cc2b9d95166a3539a5203663d5bbb0fd0 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:50 -0700
Subject: [PATCH 1666/4122] KVM: selftests: Copy KVM PFERR masks into selftests

Copy KVM's macros for page fault error masks into processor.h so they
can be used in selftests.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-7-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../selftests/kvm/include/x86_64/processor.h  | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index e000e35c948fa..159ea618d90f8 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -887,4 +887,27 @@ void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 #define XSTATE_XTILE_DATA_MASK		(1ULL << XSTATE_XTILE_DATA_BIT)
 #define XFEATURE_XTILE_MASK		(XSTATE_XTILE_CFG_MASK | \
 					XSTATE_XTILE_DATA_MASK)
+
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK	BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK	BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK		BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK		BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK	BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK		BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK		BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK	BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK	BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS	BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
 #endif /* SELFTEST_KVM_PROCESSOR_H */
-- 
GitLab


From f2e5b53b4ba9bc10d3febc3682bdf22e946bf6eb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 2 Nov 2022 11:46:51 -0700
Subject: [PATCH 1667/4122] KVM: selftests: Avoid JMP in non-faulting path of
 KVM_ASM_SAFE()

Clear R9 in the non-faulting path of KVM_ASM_SAFE() and fall through to
to a common load of "vector" to effectively load "vector" with '0' to
reduce the code footprint of the asm blob, to reduce the runtime overhead
of the non-faulting path (when "vector" is stored in a register), and so
that additional output constraints that are valid if and only if a fault
occur are loaded even in the non-faulting case.

A future patch will add a 64-bit output for the error code, and if its
output is not explicitly loaded with _something_, the user of the asm
blob can end up technically consuming uninitialized data.  Using a
common path to load the output constraints will allow using an existing
scratch register, e.g. r10, to hold the error code in the faulting path,
while also guaranteeing the error code is initialized with deterministic
data in the non-faulting patch (r10 is loaded with the RIP of
to-be-executed instruction).

Consuming the error code when a fault doesn't occur would obviously be a
test bug, but there's no guarantee the compiler will detect uninitialized
consumption.  And conversely, it's theoretically possible that the
compiler might throw a false positive on uninitialized data, e.g. if the
compiler can't determine that the non-faulting path won't touch the error
code.

Alternatively, the error code could be explicitly loaded in the
non-faulting path, but loading a 64-bit memory|register output operand
with an explicitl value requires a sign-extended "MOV imm32, r/m64",
which isn't exactly straightforward and has a largish code footprint.
And loading the error code with what is effectively garbage (from a
scratch register) avoids having to choose an arbitrary value for the
non-faulting case.

Opportunistically remove a rogue asterisk in the block comment.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-8-dmatlack@google.com
---
 tools/testing/selftests/kvm/include/x86_64/processor.h | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 159ea618d90f8..6f4c727d68188 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -770,7 +770,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
  * for recursive faults when accessing memory in the handler.  The downside to
  * using registers is that it restricts what registers can be used by the actual
  * instruction.  But, selftests are 64-bit only, making register* pressure a
- * minor concern.  Use r9-r11 as they are volatile, i.e. don't need* to be saved
+ * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
  * by the callee, and except for r11 are not implicit parameters to any
  * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
  * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
@@ -792,11 +792,9 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 	"lea 1f(%%rip), %%r10\n\t"				\
 	"lea 2f(%%rip), %%r11\n\t"				\
 	"1: " insn "\n\t"					\
-	"movb $0, %[vector]\n\t"				\
-	"jmp 3f\n\t"						\
+	"xor %%r9, %%r9\n\t"					\
 	"2:\n\t"						\
-	"mov  %%r9b, %[vector]\n\t"				\
-	"3:\n\t"
+	"mov  %%r9b, %[vector]\n\t"
 
 #define KVM_ASM_SAFE_OUTPUTS(v)	[vector] "=qm"(v)
 #define KVM_ASM_SAFE_CLOBBERS	"r9", "r10", "r11"
-- 
GitLab


From b9635930f0a73c1ef7b465121896c3fb2e3b77cd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 2 Nov 2022 11:46:52 -0700
Subject: [PATCH 1668/4122] KVM: selftests: Provide error code as a
 KVM_ASM_SAFE() output

Provide the error code on a fault in KVM_ASM_SAFE(), e.g. to allow tests
to assert that #PF generates the correct error code without needing to
manually install a #PF handler.  Use r10 as the scratch register for the
error code, as it's already clobbered by the asm blob (loaded with the
RIP of the to-be-executed instruction).  Deliberately load the output
"error_code" even in the non-faulting path so that error_code is always
initialized with deterministic data (the aforementioned RIP), i.e to
ensure a selftest won't end up with uninitialized consumption regardless
of how KVM_ASM_SAFE() is used.

Don't clear r10 in the non-faulting case and instead load error code with
the RIP (see above).  The error code is valid if and only if an exception
occurs, and '0' isn't necessarily a better "invalid" value, e.g. '0'
could result in false passes for a buggy test.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-9-dmatlack@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 39 +++++++++++++------
 .../selftests/kvm/lib/x86_64/processor.c      |  1 +
 .../selftests/kvm/x86_64/hyperv_features.c    |  3 +-
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 6f4c727d68188..6093d0d53b4e6 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -786,6 +786,7 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
  *
  * REGISTER OUTPUTS:
  * r9  = exception vector (non-zero)
+ * r10 = error code
  */
 #define KVM_ASM_SAFE(insn)					\
 	"mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"	\
@@ -794,29 +795,43 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 	"1: " insn "\n\t"					\
 	"xor %%r9, %%r9\n\t"					\
 	"2:\n\t"						\
-	"mov  %%r9b, %[vector]\n\t"
+	"mov  %%r9b, %[vector]\n\t"				\
+	"mov  %%r10, %[error_code]\n\t"
 
-#define KVM_ASM_SAFE_OUTPUTS(v)	[vector] "=qm"(v)
+#define KVM_ASM_SAFE_OUTPUTS(v, ec)	[vector] "=qm"(v), [error_code] "=rm"(ec)
 #define KVM_ASM_SAFE_CLOBBERS	"r9", "r10", "r11"
 
-#define kvm_asm_safe(insn, inputs...)			\
-({							\
-	uint8_t vector;					\
-							\
-	asm volatile(KVM_ASM_SAFE(insn)			\
-		     : KVM_ASM_SAFE_OUTPUTS(vector)	\
-		     : inputs				\
-		     : KVM_ASM_SAFE_CLOBBERS);		\
-	vector;						\
+#define kvm_asm_safe(insn, inputs...)					\
+({									\
+	uint64_t ign_error_code;					\
+	uint8_t vector;							\
+									\
+	asm volatile(KVM_ASM_SAFE(insn)					\
+		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
+		     : inputs						\
+		     : KVM_ASM_SAFE_CLOBBERS);				\
+	vector;								\
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...)			\
+({									\
+	uint8_t vector;							\
+									\
+	asm volatile(KVM_ASM_SAFE(insn)					\
+		     : KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
+		     : inputs						\
+		     : KVM_ASM_SAFE_CLOBBERS);				\
+	vector;								\
 })
 
 static inline uint8_t rdmsr_safe(uint32_t msr, uint64_t *val)
 {
+	uint64_t error_code;
 	uint8_t vector;
 	uint32_t a, d;
 
 	asm volatile(KVM_ASM_SAFE("rdmsr")
-		     : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector)
+		     : "=a"(a), "=d"(d), KVM_ASM_SAFE_OUTPUTS(vector, error_code)
 		     : "c"(msr)
 		     : KVM_ASM_SAFE_CLOBBERS);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 999576146d69d..4623874a805bc 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1068,6 +1068,7 @@ static bool kvm_fixup_exception(struct ex_regs *regs)
 
 	regs->rip = regs->r11;
 	regs->r9 = regs->vector;
+	regs->r10 = regs->error_code;
 	return true;
 }
 
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 05b32e550a802..2b6d455acf8ae 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -18,6 +18,7 @@
 static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
 				vm_vaddr_t output_address, uint64_t *hv_status)
 {
+	uint64_t error_code;
 	uint8_t vector;
 
 	/* Note both the hypercall and the "asm safe" clobber r9-r11. */
@@ -25,7 +26,7 @@ static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
 		     KVM_ASM_SAFE("vmcall")
 		     : "=a" (*hv_status),
 		       "+c" (control), "+d" (input_address),
-		       KVM_ASM_SAFE_OUTPUTS(vector)
+		       KVM_ASM_SAFE_OUTPUTS(vector, error_code)
 		     : [output_address] "r"(output_address),
 		       "a" (-EFAULT)
 		     : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
-- 
GitLab


From a323845d6c3d2f667274bde8145906580359fe06 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:53 -0700
Subject: [PATCH 1669/4122] KVM: selftests: Expect #PF(RSVD) when TDP is
 disabled

Change smaller_maxphyaddr_emulation_test to expect a #PF(RSVD), rather
than an emulation failure, when TDP is disabled. KVM only needs to
emulate instructions to emulate a smaller guest.MAXPHYADDR when TDP is
enabled.

Fixes: 39bbcc3a4e39 ("selftests: kvm: Allows userspace to handle emulation errors.")
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-10-dmatlack@google.com
[sean: massage comment to talk about having to emulate due to MAXPHYADDR]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 .../smaller_maxphyaddr_emulation_test.c       | 51 +++++++++++++++++--
 1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index a8d0811789176..06edf00a97d61 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -21,9 +21,27 @@
 #define MEM_REGION_SLOT	10
 #define MEM_REGION_SIZE PAGE_SIZE
 
-static void guest_code(void)
+static void guest_code(bool tdp_enabled)
 {
-	flds(MEM_REGION_GVA);
+	uint64_t error_code;
+	uint64_t vector;
+
+	vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+	/*
+	 * When TDP is enabled, flds will trigger an emulation failure, exit to
+	 * userspace, and then the selftest host "VMM" skips the instruction.
+	 *
+	 * When TDP is disabled, no instruction emulation is required so flds
+	 * should generate #PF(RSVD).
+	 */
+	if (tdp_enabled) {
+		GUEST_ASSERT(!vector);
+	} else {
+		GUEST_ASSERT_EQ(vector, PF_VECTOR);
+		GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+	}
+
 	GUEST_DONE();
 }
 
@@ -31,6 +49,7 @@ int main(int argc, char *argv[])
 {
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
+	struct ucall uc;
 	uint64_t *pte;
 	uint64_t *hva;
 	uint64_t gpa;
@@ -39,6 +58,10 @@ int main(int argc, char *argv[])
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
 
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vcpu);
 
 	vcpu_set_cpuid_maxphyaddr(vcpu, MAXPHYADDR);
 
@@ -60,9 +83,27 @@ int main(int argc, char *argv[])
 	*pte |= BIT_ULL(MAXPHYADDR);
 
 	vcpu_run(vcpu);
-	handle_flds_emulation_failure_exit(vcpu);
-	vcpu_run(vcpu);
-	ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	/*
+	 * When TDP is enabled, KVM must emulate in response the guest physical
+	 * address that is illegal from the guest's perspective, but is legal
+	 * from hardware's perspeective.  This should result in an emulation
+	 * failure exit to userspace since KVM doesn't support emulating flds.
+	 */
+	if (kvm_is_tdp_enabled()) {
+		handle_flds_emulation_failure_exit(vcpu);
+		vcpu_run(vcpu);
+	}
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		break;
+	case UCALL_DONE:
+		break;
+	default:
+		TEST_FAIL("Unrecognized ucall: %lu\n", uc.cmd);
+	}
 
 	kvm_vm_free(vm);
 
-- 
GitLab


From 3ae5b759c3c033b4ca2b7ef19836622902151517 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 2 Nov 2022 11:46:54 -0700
Subject: [PATCH 1670/4122] KVM: selftests: Add a test for
 KVM_CAP_EXIT_ON_EMULATION_FAILURE

Add a selftest to exercise the KVM_CAP_EXIT_ON_EMULATION_FAILURE
capability.

This capability is also exercised through
smaller_maxphyaddr_emulation_test, but that test requires
allow_smaller_maxphyaddr=Y, which is off by default on Intel when ept=Y
and unconditionally disabled on AMD when npt=Y. This new test ensures
that KVM_CAP_EXIT_ON_EMULATION_FAILURE is exercised independent of
allow_smaller_maxphyaddr.

Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20221102184654.282799-11-dmatlack@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/.gitignore        |  1 +
 tools/testing/selftests/kvm/Makefile          |  1 +
 .../x86_64/exit_on_emulation_failure_test.c   | 45 +++++++++++++++++++
 3 files changed, 47 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 053e5d34cd030..bef984e4c39d9 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -17,6 +17,7 @@
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
 /x86_64/evmcs_test
+/x86_64/exit_on_emulation_failure_test
 /x86_64/fix_hypercall_test
 /x86_64/get_msr_index_features
 /x86_64/kvm_clock_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index cff3a7ff87829..487248c67decf 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -82,6 +82,7 @@ TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
 TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
new file mode 100644
index 0000000000000..37c61f712fd5c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "flds_emulation.h"
+
+#include "test_util.h"
+
+#define MMIO_GPA	0x700000000
+#define MMIO_GVA	MMIO_GPA
+
+static void guest_code(void)
+{
+	/* Execute flds with an MMIO address to force KVM to emulate it. */
+	flds(MMIO_GVA);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vcpu *vcpu;
+	struct kvm_vm *vm;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+	vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+	virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+	vcpu_run(vcpu);
+	handle_flds_emulation_failure_exit(vcpu);
+	vcpu_run(vcpu);
+	ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+	kvm_vm_free(vm);
+	return 0;
+}
-- 
GitLab


From 3bd396353d18b4f4e4f9953e5f5c46b0045a5477 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:14 +0000
Subject: [PATCH 1671/4122] KVM: selftests: Add X86_FEATURE_PAE and use it calc
 "fallback" MAXPHYADDR

Add X86_FEATURE_PAE and use it to guesstimate the MAXPHYADDR when the
MAXPHYADDR CPUID entry isn't supported.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-2-seanjc@google.com
---
 tools/testing/selftests/kvm/include/x86_64/processor.h | 1 +
 tools/testing/selftests/kvm/lib/x86_64/processor.c     | 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 6093d0d53b4e6..4f928fe3ad179 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -89,6 +89,7 @@ struct kvm_x86_cpu_feature {
 #define	X86_FEATURE_XSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
 #define	X86_FEATURE_OSXSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
 #define	X86_FEATURE_RDRAND		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define X86_FEATURE_PAE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
 #define	X86_FEATURE_MCE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
 #define	X86_FEATURE_APIC		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
 #define	X86_FEATURE_CLFLUSH		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 4623874a805bc..061e001c8a48f 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1012,12 +1012,10 @@ bool is_amd_cpu(void)
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
 {
 	const struct kvm_cpuid_entry2 *entry;
-	bool pae;
 
 	/* SDM 4.1.4 */
 	if (kvm_get_cpuid_max_extended() < 0x80000008) {
-		pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
-		*pa_bits = pae ? 36 : 32;
+		*pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
 		*va_bits = 32;
 	} else {
 		entry = kvm_get_supported_cpuid_entry(0x80000008);
-- 
GitLab


From ee3795536664e514196cbe7396d3eb4c9925de98 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:15 +0000
Subject: [PATCH 1672/4122] KVM: selftests: Refactor X86_FEATURE_* framework to
 prep for X86_PROPERTY_*

Refactor the X86_FEATURE_* framework to prepare for extending the core
logic to support "properties".  The "feature" framework allows querying a
single CPUID bit to detect the presence of a feature; the "property"
framework will extend the idea to allow querying a value, i.e. to get a
value that is a set of contiguous bits in a CPUID leaf.

Opportunistically add static asserts to ensure features are fully defined
at compile time, and to try and catch mistakes in the definition of
features.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-3-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 38 ++++++++++++-------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4f928fe3ad179..52e12d40e66c2 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -63,16 +63,21 @@ struct kvm_x86_cpu_feature {
 	u8	reg;
 	u8	bit;
 };
-#define	KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)	\
-({							\
-	struct kvm_x86_cpu_feature feature = {		\
-		.function = fn,				\
-		.index = idx,				\
-		.reg = KVM_CPUID_##gpr,			\
-		.bit = __bit,				\
-	};						\
-							\
-	feature;					\
+#define	KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)				\
+({										\
+	struct kvm_x86_cpu_feature feature = {					\
+		.function = fn,							\
+		.index = idx,							\
+		.reg = KVM_CPUID_##gpr,						\
+		.bit = __bit,							\
+	};									\
+										\
+	static_assert((fn & 0xc0000000) == 0 ||					\
+		      (fn & 0xc0000000) == 0x40000000 ||			\
+		      (fn & 0xc0000000) == 0x80000000 ||			\
+		      (fn & 0xc0000000) == 0xc0000000);				\
+	static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));	\
+	feature;								\
 })
 
 /*
@@ -432,15 +437,22 @@ static inline void cpuid(uint32_t function,
 	return __cpuid(function, 0, eax, ebx, ecx, edx);
 }
 
-static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+				      uint8_t reg, uint8_t lo, uint8_t hi)
 {
 	uint32_t gprs[4];
 
-	__cpuid(feature.function, feature.index,
+	__cpuid(function, index,
 		&gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
 		&gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
 
-	return gprs[feature.reg] & BIT(feature.bit);
+	return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+	return __this_cpu_has(feature.function, feature.index,
+			      feature.reg, feature.bit, feature.bit);
 }
 
 #define SET_XMM(__var, __xmm) \
-- 
GitLab


From 53a7dc0f215ec91b098e3e6d7bb4bb9cef43a99a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:16 +0000
Subject: [PATCH 1673/4122] KVM: selftests: Add X86_PROPERTY_* framework to
 retrieve CPUID values

Introduce X86_PROPERTY_* to allow retrieving values/properties from CPUID
leafs, e.g. MAXPHYADDR from CPUID.0x80000008.  Use the same core code as
X86_FEATURE_*, the primary difference is that properties are multi-bit
values, whereas features enumerate a single bit.

Add this_cpu_has_p() to allow querying whether or not a property exists
based on the maximum leaf associated with the property, e.g. MAXPHYADDR
doesn't exist if the max leaf for 0x8000_xxxx is less than 0x8000_0008.

Use the new property infrastructure in vm_compute_max_gfn() to prove
that the code works as intended.  Future patches will convert additional
selftests code.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-4-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 68 +++++++++++++++++++
 .../selftests/kvm/lib/x86_64/processor.c      | 18 +++--
 2 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 52e12d40e66c2..df1ba5af89ed3 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -168,6 +168,48 @@ struct kvm_x86_cpu_feature {
 #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
 #define X86_FEATURE_KVM_MIGRATION_CONTROL	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
 
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature.  Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+	u32	function;
+	u8	index;
+	u8	reg;
+	u8	lo_bit;
+	u8	hi_bit;
+};
+#define	KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)			\
+({										\
+	struct kvm_x86_cpu_property property = {				\
+		.function = fn,							\
+		.index = idx,							\
+		.reg = KVM_CPUID_##gpr,						\
+		.lo_bit = low_bit,						\
+		.hi_bit = high_bit,						\
+	};									\
+										\
+	static_assert(low_bit < high_bit);					\
+	static_assert((fn & 0xc0000000) == 0 ||					\
+		      (fn & 0xc0000000) == 0x40000000 ||			\
+		      (fn & 0xc0000000) == 0x80000000 ||			\
+		      (fn & 0xc0000000) == 0xc0000000);				\
+	static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));	\
+	property;								\
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_KVM_LEAF		KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF		KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION	KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+
 /* Page table bitfield declarations */
 #define PTE_PRESENT_MASK        BIT_ULL(0)
 #define PTE_WRITABLE_MASK       BIT_ULL(1)
@@ -455,6 +497,32 @@ static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
 			      feature.reg, feature.bit, feature.bit);
 }
 
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+	return __this_cpu_has(property.function, property.index,
+			      property.reg, property.lo_bit, property.hi_bit);
+}
+
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+	uint32_t max_leaf;
+
+	switch (property.function & 0xc0000000) {
+	case 0:
+		max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+		break;
+	case 0x40000000:
+		max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+		break;
+	case 0x80000000:
+		max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+		break;
+	case 0xc0000000:
+		max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+	}
+	return max_leaf >= property.function;
+}
+
 #define SET_XMM(__var, __xmm) \
 	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 061e001c8a48f..23321c1d0631e 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1229,7 +1229,8 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
 {
 	const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
 	unsigned long ht_gfn, max_gfn, max_pfn;
-	uint32_t eax, ebx, ecx, edx, max_ext_leaf;
+	uint32_t eax, ebx, ecx, edx;
+	uint8_t maxphyaddr;
 
 	max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
 
@@ -1252,17 +1253,14 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
 	 * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX.  Use
 	 * the old conservative value if MAXPHYADDR is not enumerated.
 	 */
-	cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
-	max_ext_leaf = eax;
-	if (max_ext_leaf < 0x80000008)
+	if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
 		goto done;
 
-	cpuid(0x80000008, &eax, &ebx, &ecx, &edx);
-	max_pfn = (1ULL << ((eax & 0xff) - vm->page_shift)) - 1;
-	if (max_ext_leaf >= 0x8000001f) {
-		cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);
-		max_pfn >>= (ebx >> 6) & 0x3f;
-	}
+	maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+	max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+	if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+		max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
 
 	ht_gfn = max_pfn - num_ht_pages;
 done:
-- 
GitLab


From d80ddad2a8e042e6499d69d5a45a17051092e161 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:17 +0000
Subject: [PATCH 1674/4122] KVM: selftests: Use X86_PROPERTY_MAX_KVM_LEAF in
 CPUID test

Use X86_PROPERTY_MAX_KVM_LEAF to replace the equivalent open coded check
on KVM's maximum paravirt CPUID leaf.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-5-seanjc@google.com
---
 tools/testing/selftests/kvm/x86_64/cpuid_test.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
index a6aeee2e62e4f..2fc3ad9c887e2 100644
--- a/tools/testing/selftests/kvm/x86_64/cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -43,15 +43,6 @@ static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
 
 }
 
-static void test_cpuid_40000000(struct kvm_cpuid2 *guest_cpuid)
-{
-	u32 eax, ebx, ecx, edx;
-
-	cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
-
-	GUEST_ASSERT(eax == 0x40000001);
-}
-
 static void guest_main(struct kvm_cpuid2 *guest_cpuid)
 {
 	GUEST_SYNC(1);
@@ -60,7 +51,7 @@ static void guest_main(struct kvm_cpuid2 *guest_cpuid)
 
 	GUEST_SYNC(2);
 
-	test_cpuid_40000000(guest_cpuid);
+	GUEST_ASSERT(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF) == 0x40000001);
 
 	GUEST_DONE();
 }
-- 
GitLab


From a29e6e383b0d0d59a93ebbf6e93d3d41b905d336 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:18 +0000
Subject: [PATCH 1675/4122] KVM: selftests: Refactor kvm_cpuid_has() to prep
 for X86_PROPERTY_* support

Refactor kvm_cpuid_has() to prepare for extending X86_PROPERTY_* support
to KVM as well as "this CPU".

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-6-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c      | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 23321c1d0631e..710e5851a8635 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -652,8 +652,9 @@ const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 	return cpuid;
 }
 
-bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
-		   struct kvm_x86_cpu_feature feature)
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+			      uint32_t function, uint32_t index,
+			      uint8_t reg, uint8_t lo, uint8_t hi)
 {
 	const struct kvm_cpuid_entry2 *entry;
 	int i;
@@ -666,12 +667,18 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
 		 * order, but kvm_x86_cpu_feature matches that mess, so yay
 		 * pointer shenanigans!
 		 */
-		if (entry->function == feature.function &&
-		    entry->index == feature.index)
-			return (&entry->eax)[feature.reg] & BIT(feature.bit);
+		if (entry->function == function && entry->index == index)
+			return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
 	}
 
-	return false;
+	return 0;
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+		   struct kvm_x86_cpu_feature feature)
+{
+	return __kvm_cpu_has(cpuid, feature.function, feature.index,
+			     feature.reg, feature.bit, feature.bit);
 }
 
 uint64_t kvm_get_feature_msr(uint64_t msr_index)
-- 
GitLab


From 40854713e3254f7a4fc4a92388309140e51e046c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:19 +0000
Subject: [PATCH 1676/4122] KVM: selftests: Add kvm_cpu_*() support for
 X86_PROPERTY_*

Extent X86_PROPERTY_* support to KVM, i.e. add kvm_cpu_property() and
kvm_cpu_has_p(), and use the new helpers in kvm_get_cpu_address_width().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-7-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 34 ++++++++++++++++---
 .../selftests/kvm/lib/x86_64/processor.c      | 17 ++++++----
 2 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index df1ba5af89ed3..1eb76268c9f73 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -205,6 +205,7 @@ struct kvm_x86_cpu_property {
 
 #define X86_PROPERTY_MAX_EXT_LEAF		KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
 #define X86_PROPERTY_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
 #define X86_PROPERTY_PHYS_ADDR_REDUCTION	KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
 
 #define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
@@ -703,6 +704,34 @@ static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
 	return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
 }
 
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+			    struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+	return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+	uint32_t max_leaf;
+
+	switch (property.function & 0xc0000000) {
+	case 0:
+		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+		break;
+	case 0x40000000:
+		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+		break;
+	case 0x80000000:
+		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+		break;
+	case 0xc0000000:
+		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+	}
+	return max_leaf >= property.function;
+}
+
 static inline size_t kvm_cpuid2_size(int nr_entries)
 {
 	return sizeof(struct kvm_cpuid2) +
@@ -815,11 +844,6 @@ static inline uint32_t kvm_get_cpuid_max_basic(void)
 	return kvm_get_supported_cpuid_entry(0)->eax;
 }
 
-static inline uint32_t kvm_get_cpuid_max_extended(void)
-{
-	return kvm_get_supported_cpuid_entry(0x80000000)->eax;
-}
-
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 710e5851a8635..af23c70ac129e 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -681,6 +681,13 @@ bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
 			     feature.reg, feature.bit, feature.bit);
 }
 
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+			    struct kvm_x86_cpu_property property)
+{
+	return __kvm_cpu_has(cpuid, property.function, property.index,
+			     property.reg, property.lo_bit, property.hi_bit);
+}
+
 uint64_t kvm_get_feature_msr(uint64_t msr_index)
 {
 	struct {
@@ -1018,16 +1025,12 @@ bool is_amd_cpu(void)
 
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
 {
-	const struct kvm_cpuid_entry2 *entry;
-
-	/* SDM 4.1.4 */
-	if (kvm_get_cpuid_max_extended() < 0x80000008) {
+	if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
 		*pa_bits == kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
 		*va_bits = 32;
 	} else {
-		entry = kvm_get_supported_cpuid_entry(0x80000008);
-		*pa_bits = entry->eax & 0xff;
-		*va_bits = (entry->eax >> 8) & 0xff;
+		*pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+		*va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
 	}
 }
 
-- 
GitLab


From 5dc19f1c7dd302f48a9f7fe7f29bb186d3477795 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:20 +0000
Subject: [PATCH 1677/4122] KVM: selftests: Convert AMX test to use
 X86_PROPRETY_XXX

Add and use x86 "properties" for the myriad AMX CPUID values that are
validated by the AMX test.  Drop most of the test's single-usage
helpers so that the asserts more precisely capture what check failed.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-8-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  |   9 ++
 tools/testing/selftests/kvm/x86_64/amx_test.c | 101 ++++--------------
 2 files changed, 31 insertions(+), 79 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 1eb76268c9f73..841ef8c4ba003 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -200,6 +200,15 @@ struct kvm_x86_cpu_property {
 })
 
 #define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0	KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE		KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
+#define X86_PROPERTY_XSTATE_TILE_SIZE		KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET		KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES	KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE		KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS		KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
 
 #define X86_PROPERTY_MAX_KVM_LEAF		KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
 
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index dadcbad10a1dc..21de6ae42086d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -39,11 +39,6 @@
 #define XFEATURE_MASK_XTILEDATA		(1 << XFEATURE_XTILEDATA)
 #define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
 
-#define TILE_CPUID			0x1d
-#define XSTATE_CPUID			0xd
-#define TILE_PALETTE_CPUID_SUBLEAVE	0x1
-#define XSTATE_USER_STATE_SUBLEAVE	0x0
-
 #define XSAVE_HDR_OFFSET		512
 
 struct xsave_data {
@@ -129,71 +124,26 @@ static bool check_xsave_supports_xtile(void)
 	return __xgetbv(0) & XFEATURE_MASK_XTILE;
 }
 
-static bool enum_xtile_config(void)
-{
-	u32 eax, ebx, ecx, edx;
-
-	__cpuid(TILE_CPUID, TILE_PALETTE_CPUID_SUBLEAVE, &eax, &ebx, &ecx, &edx);
-	if (!eax || !ebx || !ecx)
-		return false;
-
-	xtile.max_names = ebx >> 16;
-	if (xtile.max_names < NUM_TILES)
-		return false;
-
-	xtile.bytes_per_tile = eax >> 16;
-	if (xtile.bytes_per_tile < TILE_SIZE)
-		return false;
-
-	xtile.bytes_per_row = ebx;
-	xtile.max_rows = ecx;
-
-	return true;
-}
-
-static bool enum_xsave_tile(void)
-{
-	u32 eax, ebx, ecx, edx;
-
-	__cpuid(XSTATE_CPUID, XFEATURE_XTILEDATA, &eax, &ebx, &ecx, &edx);
-	if (!eax || !ebx)
-		return false;
-
-	xtile.xsave_offset = ebx;
-	xtile.xsave_size = eax;
-
-	return true;
-}
-
-static bool check_xsave_size(void)
-{
-	u32 eax, ebx, ecx, edx;
-	bool valid = false;
-
-	__cpuid(XSTATE_CPUID, XSTATE_USER_STATE_SUBLEAVE, &eax, &ebx, &ecx, &edx);
-	if (ebx && ebx <= XSAVE_SIZE)
-		valid = true;
-
-	return valid;
-}
-
-static bool check_xtile_info(void)
+static void check_xtile_info(void)
 {
-	bool ret = false;
-
-	if (!check_xsave_size())
-		return ret;
-
-	if (!enum_xsave_tile())
-		return ret;
-
-	if (!enum_xtile_config())
-		return ret;
+	GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+	GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
 
-	if (sizeof(struct tile_data) >= xtile.xsave_size)
-		ret = true;
+	xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+	GUEST_ASSERT(xtile.xsave_offset == 2816);
+	xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+	GUEST_ASSERT(xtile.xsave_size == 8192);
+	GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
 
-	return ret;
+	GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+	xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+	GUEST_ASSERT(xtile.max_names == 8);
+	xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+	GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+	xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+	GUEST_ASSERT(xtile.bytes_per_row == 64);
+	xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+	GUEST_ASSERT(xtile.max_rows == 16);
 }
 
 static void set_tilecfg(struct tile_config *cfg)
@@ -238,16 +188,8 @@ static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
 {
 	init_regs();
 	check_cpuid_xsave();
-	GUEST_ASSERT(check_xsave_supports_xtile());
-	GUEST_ASSERT(check_xtile_info());
-
-	/* check xtile configs */
-	GUEST_ASSERT(xtile.xsave_offset == 2816);
-	GUEST_ASSERT(xtile.xsave_size == 8192);
-	GUEST_ASSERT(xtile.max_names == 8);
-	GUEST_ASSERT(xtile.bytes_per_tile == 1024);
-	GUEST_ASSERT(xtile.bytes_per_row == 64);
-	GUEST_ASSERT(xtile.max_rows == 16);
+	check_xsave_supports_xtile();
+	check_xtile_info();
 	GUEST_SYNC(1);
 
 	/* xfd=0, enable amx */
@@ -317,8 +259,9 @@ int main(int argc, char *argv[])
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
 
-	/* Get xsave/restore max size */
-	xsave_restore_size = kvm_get_supported_cpuid_entry(0xd)->ecx;
+	TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+		    "KVM should enumerate max XSAVE size when XSAVE is supported");
+	xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
 
 	run = vcpu->run;
 	vcpu_regs_get(vcpu, &regs1);
-- 
GitLab


From 4feb9d21a407318129b6ea3a6735f1866439b9ab Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:21 +0000
Subject: [PATCH 1678/4122] KVM: selftests: Convert vmx_pmu_caps_test to use
 X86_PROPERTY_*

Add X86_PROPERTY_PMU_VERSION and use it in vmx_pmu_caps_test to replace
open coded versions of the same functionality.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-9-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  |  6 ++----
 .../selftests/kvm/x86_64/vmx_pmu_caps_test.c  | 19 ++-----------------
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 841ef8c4ba003..848dfbc9866b1 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -200,6 +200,8 @@ struct kvm_x86_cpu_property {
 })
 
 #define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+
 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0	KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
 #define X86_PROPERTY_XSTATE_MAX_SIZE		KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
 #define X86_PROPERTY_XSTATE_TILE_SIZE		KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
@@ -848,10 +850,6 @@ static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index,
 	TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
 }
 
-static inline uint32_t kvm_get_cpuid_max_basic(void)
-{
-	return kvm_get_supported_cpuid_entry(0)->eax;
-}
 
 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
 bool vm_is_unrestricted_guest(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index 069589c52f419..c280ba1e65724 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -20,16 +20,6 @@
 #define PMU_CAP_FW_WRITES	(1ULL << 13)
 #define PMU_CAP_LBR_FMT		0x3f
 
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
 union perf_capabilities {
 	struct {
 		u64	lbr_format:6;
@@ -53,11 +43,9 @@ static void guest_code(void)
 
 int main(int argc, char *argv[])
 {
-	const struct kvm_cpuid_entry2 *entry_a_0;
 	struct kvm_vm *vm;
 	struct kvm_vcpu *vcpu;
 	int ret;
-	union cpuid10_eax eax;
 	union perf_capabilities host_cap;
 	uint64_t val;
 
@@ -69,11 +57,8 @@ int main(int argc, char *argv[])
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
 
-	TEST_REQUIRE(kvm_get_cpuid_max_basic() >= 0xa);
-	entry_a_0 = kvm_get_supported_cpuid_entry(0xa);
-
-	eax.full = entry_a_0->eax;
-	__TEST_REQUIRE(eax.split.version_id, "PMU is not supported by the vCPU");
+	TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+	TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
 
 	/* testcase 1, set capabilities when we have PDCM bit */
 	vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES);
-- 
GitLab


From 5228c02a4c541a065ec071ea7ec2c1c76f3723dd Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:22 +0000
Subject: [PATCH 1679/4122] KVM: selftests: Add PMU feature framework, use in
 PMU event filter test

Add an X86_PMU_FEATURE_* framework to simplify probing architectural
events on Intel PMUs, which require checking the length of a bit vector
and the _absence_ of a "feature" bit.  Add helpers for both KVM and
"this CPU", and use the newfangled magic (along with X86_PROPERTY_*)
to  clean up pmu_event_filter_test.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-10-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 41 +++++++++++++++
 .../kvm/x86_64/pmu_event_filter_test.c        | 51 +++----------------
 2 files changed, 48 insertions(+), 44 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 848dfbc9866b1..343393308005c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -201,6 +201,8 @@ struct kvm_x86_cpu_property {
 
 #define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
 #define X86_PROPERTY_PMU_VERSION		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH	KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
 
 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0	KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
 #define X86_PROPERTY_XSTATE_MAX_SIZE		KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
@@ -221,6 +223,29 @@ struct kvm_x86_cpu_property {
 
 #define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
 
+/*
+ * Intel's architectural PMU events are bizarre.  They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features.  A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set.
+ *
+ * Wrap the "unavailable" feature to simplify checking whether or not a given
+ * architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+	struct kvm_x86_cpu_feature anti_feature;
+};
+#define	KVM_X86_PMU_FEATURE(name, __bit)					\
+({										\
+	struct kvm_x86_pmu_feature feature = {					\
+		.anti_feature = KVM_X86_CPU_FEATURE(0xa, 0, EBX, __bit),	\
+	};									\
+										\
+	feature;								\
+})
+
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED	KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5)
 
 /* Page table bitfield declarations */
 #define PTE_PRESENT_MASK        BIT_ULL(0)
@@ -535,6 +560,14 @@ static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
 	return max_leaf >= property.function;
 }
 
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+	uint32_t nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+
+	return nr_bits > feature.anti_feature.bit &&
+	       !this_cpu_has(feature.anti_feature);
+}
+
 #define SET_XMM(__var, __xmm) \
 	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
 
@@ -743,6 +776,14 @@ static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
 	return max_leaf >= property.function;
 }
 
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+	uint32_t nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+
+	return nr_bits > feature.anti_feature.bit &&
+	       !kvm_cpu_has(feature.anti_feature);
+}
+
 static inline size_t kvm_cpuid2_size(int nr_entries)
 {
 	return sizeof(struct kvm_cpuid2) +
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index a6ffa245c8971..201c4ea44ca97 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -21,29 +21,6 @@
 #define ARCH_PERFMON_EVENTSEL_OS			(1ULL << 17)
 #define ARCH_PERFMON_EVENTSEL_ENABLE			(1ULL << 22)
 
-union cpuid10_eax {
-	struct {
-		unsigned int version_id:8;
-		unsigned int num_counters:8;
-		unsigned int bit_width:8;
-		unsigned int mask_length:8;
-	} split;
-	unsigned int full;
-};
-
-union cpuid10_ebx {
-	struct {
-		unsigned int no_unhalted_core_cycles:1;
-		unsigned int no_instructions_retired:1;
-		unsigned int no_unhalted_reference_cycles:1;
-		unsigned int no_llc_reference:1;
-		unsigned int no_llc_misses:1;
-		unsigned int no_branch_instruction_retired:1;
-		unsigned int no_branch_misses_retired:1;
-	} split;
-	unsigned int full;
-};
-
 /* End of stuff taken from perf_event.h. */
 
 /* Oddly, this isn't in perf_event.h. */
@@ -380,30 +357,16 @@ static void test_pmu_config_disable(void (*guest_code)(void))
 }
 
 /*
- * Check for a non-zero PMU version, at least one general-purpose
- * counter per logical processor, an EBX bit vector of length greater
- * than 5, and EBX[5] clear.
- */
-static bool check_intel_pmu_leaf(const struct kvm_cpuid_entry2 *entry)
-{
-	union cpuid10_eax eax = { .full = entry->eax };
-	union cpuid10_ebx ebx = { .full = entry->ebx };
-
-	return eax.split.version_id && eax.split.num_counters > 0 &&
-		eax.split.mask_length > ARCH_PERFMON_BRANCHES_RETIRED &&
-		!ebx.split.no_branch_instruction_retired;
-}
-
-/*
- * Note that CPUID leaf 0xa is Intel-specific. This leaf should be
- * clear on AMD hardware.
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
  */
 static bool use_intel_pmu(void)
 {
-	const struct kvm_cpuid_entry2 *entry;
-
-	entry = kvm_get_supported_cpuid_entry(0xa);
-	return is_intel_cpu() && check_intel_pmu_leaf(entry);
+	return is_intel_cpu() &&
+	       kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+	       kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+	       kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
 }
 
 static bool is_zen1(uint32_t eax)
-- 
GitLab


From 24f3f9898e3c463b39dac8a03870c628dab8176e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:23 +0000
Subject: [PATCH 1680/4122] KVM: selftests: Add dedicated helpers for getting
 x86 Family and Model

Add dedicated helpers for getting x86's Family and Model, which are the
last holdouts that "need" raw access to CPUID information.  FMS info is
a mess and requires not only splicing together multiple values, but
requires doing so conditional in the Family case.

Provide wrappers to reduce the odds of copy+paste errors, but mostly to
allow for the eventual removal of kvm_get_supported_cpuid_entry().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-11-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 52 +++++++++++++------
 .../selftests/kvm/lib/x86_64/processor.c      |  4 +-
 2 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 343393308005c..5d0acf8a96332 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -247,6 +247,23 @@ struct kvm_x86_pmu_feature {
 
 #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED	KVM_X86_PMU_FEATURE(BRANCH_INSNS_RETIRED, 5)
 
+static inline unsigned int x86_family(unsigned int eax)
+{
+	unsigned int x86;
+
+	x86 = (eax >> 8) & 0xf;
+
+	if (x86 == 0xf)
+		x86 += (eax >> 20) & 0xff;
+
+	return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+	return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
 /* Page table bitfield declarations */
 #define PTE_PRESENT_MASK        BIT_ULL(0)
 #define PTE_WRITABLE_MASK       BIT_ULL(1)
@@ -516,6 +533,24 @@ static inline void cpuid(uint32_t function,
 	return __cpuid(function, 0, eax, ebx, ecx, edx);
 }
 
+static inline uint32_t this_cpu_fms(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	cpuid(1, &eax, &ebx, &ecx, &edx);
+	return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+	return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+	return x86_model(this_cpu_fms());
+}
+
 static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
 				      uint8_t reg, uint8_t lo, uint8_t hi)
 {
@@ -658,23 +693,6 @@ static inline void cpu_relax(void)
 bool is_intel_cpu(void);
 bool is_amd_cpu(void);
 
-static inline unsigned int x86_family(unsigned int eax)
-{
-	unsigned int x86;
-
-	x86 = (eax >> 8) & 0xf;
-
-	if (x86 == 0xf)
-		x86 += (eax >> 20) & 0xff;
-
-	return x86;
-}
-
-static inline unsigned int x86_model(unsigned int eax)
-{
-	return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
-}
-
 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
 void kvm_x86_state_cleanup(struct kvm_x86_state *state);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index af23c70ac129e..65e87b5acb6ff 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -1239,7 +1239,6 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
 {
 	const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
 	unsigned long ht_gfn, max_gfn, max_pfn;
-	uint32_t eax, ebx, ecx, edx;
 	uint8_t maxphyaddr;
 
 	max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
@@ -1254,8 +1253,7 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
 
 	/* Before family 17h, the HyperTransport area is just below 1T.  */
 	ht_gfn = (1 << 28) - num_ht_pages;
-	cpuid(1, &eax, &ebx, &ecx, &edx);
-	if (x86_family(eax) < 0x17)
+	if (this_cpu_family() < 0x17)
 		goto done;
 
 	/*
-- 
GitLab


From 074e9d4c9c6046f4605c9e37e90cff404119c525 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:24 +0000
Subject: [PATCH 1681/4122] KVM: selftests: Add and use KVM helpers for x86
 Family and Model

Add KVM variants of the x86 Family and Model helpers, and use them in the
PMU event filter test.  Open code the retrieval of KVM's supported CPUID
entry 0x1.0 in anticipation of dropping kvm_get_supported_cpuid_entry().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-12-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  | 19 +++++++++++++--
 .../kvm/x86_64/pmu_event_filter_test.c        | 23 +++++++++----------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 5d0acf8a96332..4366dbcc1bcdc 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -754,10 +754,27 @@ static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
 	vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
 }
 
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+					       uint32_t function, uint32_t index);
 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
 const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
 const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
 
+static inline uint32_t kvm_cpu_fms(void)
+{
+	return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+	return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+	return x86_model(kvm_cpu_fms());
+}
+
 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
 		   struct kvm_x86_cpu_feature feature);
 
@@ -825,8 +842,6 @@ static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
 	return cpuid;
 }
 
-const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
-					       uint32_t function, uint32_t index);
 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
 void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
 
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 201c4ea44ca97..2de98fce7eddc 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -369,20 +369,19 @@ static bool use_intel_pmu(void)
 	       kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
 }
 
-static bool is_zen1(uint32_t eax)
+static bool is_zen1(uint32_t family, uint32_t model)
 {
-	return x86_family(eax) == 0x17 && x86_model(eax) <= 0x0f;
+	return family == 0x17 && model <= 0x0f;
 }
 
-static bool is_zen2(uint32_t eax)
+static bool is_zen2(uint32_t family, uint32_t model)
 {
-	return x86_family(eax) == 0x17 &&
-		x86_model(eax) >= 0x30 && x86_model(eax) <= 0x3f;
+	return family == 0x17 && model >= 0x30 && model <= 0x3f;
 }
 
-static bool is_zen3(uint32_t eax)
+static bool is_zen3(uint32_t family, uint32_t model)
 {
-	return x86_family(eax) == 0x19 && x86_model(eax) <= 0x0f;
+	return family == 0x19 && model <= 0x0f;
 }
 
 /*
@@ -395,13 +394,13 @@ static bool is_zen3(uint32_t eax)
  */
 static bool use_amd_pmu(void)
 {
-	const struct kvm_cpuid_entry2 *entry;
+	uint32_t family = kvm_cpu_family();
+	uint32_t model = kvm_cpu_model();
 
-	entry = kvm_get_supported_cpuid_entry(1);
 	return is_amd_cpu() &&
-		(is_zen1(entry->eax) ||
-		 is_zen2(entry->eax) ||
-		 is_zen3(entry->eax));
+		(is_zen1(family, model) ||
+		 is_zen2(family, model) ||
+		 is_zen3(family, model));
 }
 
 int main(int argc, char *argv[])
-- 
GitLab


From b941ba2380ccf51e048dc58ff0e6bdf11828f6d9 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:51:25 +0000
Subject: [PATCH 1682/4122] KVM: selftests: Drop helpers for getting specific
 KVM supported CPUID entry

Drop kvm_get_supported_cpuid_entry() and its inner helper now that all
known usage can use X86_FEATURE_*, X86_PROPERTY_*, X86_PMU_FEATURE_*, or
the dedicated Family/Model helpers.  Providing "raw" access to CPUID
leafs is undesirable as it encourages open coding CPUID checks, which is
often error prone and not self-documenting.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006005125.680782-13-seanjc@google.com
---
 .../testing/selftests/kvm/include/x86_64/processor.h  | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 4366dbcc1bcdc..481f44c683aa1 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -902,17 +902,6 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
 	vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
 }
 
-static inline const struct kvm_cpuid_entry2 *__kvm_get_supported_cpuid_entry(uint32_t function,
-									     uint32_t index)
-{
-	return get_cpuid_entry(kvm_get_supported_cpuid(), function, index);
-}
-
-static inline const struct kvm_cpuid_entry2 *kvm_get_supported_cpuid_entry(uint32_t function)
-{
-	return __kvm_get_supported_cpuid_entry(function, 0);
-}
-
 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
 
-- 
GitLab


From 5c107f7085f45e071bbcf13006fffccd8e5de0e1 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 16 Nov 2022 12:46:31 -0800
Subject: [PATCH 1683/4122] KVM: selftests: Assert in prepare_eptp() that nEPT
 is supported

Now that a VM isn't needed to check for nEPT support, assert that KVM
supports nEPT in prepare_eptp() instead of skipping the test, and push
the TEST_REQUIRE() check out to individual tests.  The require+assert are
somewhat redundant and will incur some amount of ongoing maintenance
burden, but placing the "require" logic in the test makes it easier to
find/understand a test's requirements and in this case, provides a very
strong hint that the test cares about nEPT.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20220927165209.930904-1-dmatlack@google.com
[sean: rebase on merged code, write changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/lib/x86_64/memstress.c      | 1 +
 tools/testing/selftests/kvm/lib/x86_64/vmx.c            | 2 +-
 tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
index 2b3b47e4a973e..d61e623afc8cf 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/memstress.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
@@ -85,6 +85,7 @@ void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vc
 	int vcpu_id;
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has_ept());
 
 	for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
 		vmx = vcpu_alloc_vmx(vm, &vmx_gva);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 6800fc9aeef01..3e4ea846366cb 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -559,7 +559,7 @@ bool kvm_cpu_has_ept(void)
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot)
 {
-	TEST_REQUIRE(kvm_cpu_has_ept());
+	TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
 
 	vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
 	vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 2d8c23d639f77..f0456fb031b1e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -78,6 +78,7 @@ int main(int argc, char *argv[])
 	bool done = false;
 
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+	TEST_REQUIRE(kvm_cpu_has_ept());
 
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
-- 
GitLab


From ecb89a51724b3cd89c13ba7364e82f9879b68dcf Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 16 Nov 2022 12:42:28 -0800
Subject: [PATCH 1684/4122] KVM: selftests: Check for KVM nEPT support using
 "feature" MSRs

When checking for nEPT support in KVM, use kvm_get_feature_msr() instead
of vcpu_get_msr() to retrieve KVM's default TRUE_PROCBASED_CTLS and
PROCBASED_CTLS2 MSR values, i.e. don't require a VM+vCPU to query nEPT
support.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Matlack <dmatlack@google.com>
Link: https://lore.kernel.org/r/20220927165209.930904-1-dmatlack@google.com
[sean: rebase on merged code, write changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 tools/testing/selftests/kvm/include/x86_64/vmx.h |  2 +-
 tools/testing/selftests/kvm/lib/x86_64/vmx.c     | 12 ++++--------
 2 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 71b290b6469d6..e9c96b49966a6 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -572,7 +572,7 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
 			uint32_t memslot);
 void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
 			    uint64_t addr, uint64_t size);
-bool kvm_vm_has_ept(struct kvm_vm *vm);
+bool kvm_cpu_has_ept(void);
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot);
 void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index d21049c38fc56..6800fc9aeef01 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -544,26 +544,22 @@ void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
 	__nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
 }
 
-bool kvm_vm_has_ept(struct kvm_vm *vm)
+bool kvm_cpu_has_ept(void)
 {
-	struct kvm_vcpu *vcpu;
 	uint64_t ctrl;
 
-	vcpu = list_first_entry(&vm->vcpus, struct kvm_vcpu, list);
-	TEST_ASSERT(vcpu, "Cannot determine EPT support without vCPUs.\n");
-
-	ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+	ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
 	if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
 		return false;
 
-	ctrl = vcpu_get_msr(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+	ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
 	return ctrl & SECONDARY_EXEC_ENABLE_EPT;
 }
 
 void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
 		  uint32_t eptp_memslot)
 {
-	TEST_REQUIRE(kvm_vm_has_ept(vm));
+	TEST_REQUIRE(kvm_cpu_has_ept());
 
 	vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
 	vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
-- 
GitLab


From 2653d53345bda90604f673bb211dd060a5a5c232 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 16 Nov 2022 19:20:20 -0800
Subject: [PATCH 1685/4122] xfs: fix incorrect error-out in xfs_remove

Clean up resources if resetting the dotdot entry doesn't succeed.
Observed through code inspection.

Fixes: 5838d0356bb3 ("xfs: reset child dir '..' entry when unlinking child")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Andrey Albershteyn <aalbersh@redhat.com>
---
 fs/xfs/xfs_inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index aa303be11576f..d354ea2b74f96 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2479,7 +2479,7 @@ xfs_remove(
 			error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
 					tp->t_mountp->m_sb.sb_rootino, 0);
 			if (error)
-				return error;
+				goto out_trans_cancel;
 		}
 	} else {
 		/*
-- 
GitLab


From 59f6ab40fd8735c9a1a15401610a31cc06a0bbd6 Mon Sep 17 00:00:00 2001
From: Long Li <leo.lilong@huawei.com>
Date: Wed, 16 Nov 2022 19:20:20 -0800
Subject: [PATCH 1686/4122] xfs: fix sb write verify for lazysbcount

When lazysbcount is enabled, fsstress and loop mount/unmount test report
the following problems:

XFS (loop0): SB summary counter sanity check failed
XFS (loop0): Metadata corruption detected at xfs_sb_write_verify+0x13b/0x460,
	xfs_sb block 0x0
XFS (loop0): Unmount and run xfs_repair
XFS (loop0): First 128 bytes of corrupted metadata buffer:
00000000: 58 46 53 42 00 00 10 00 00 00 00 00 00 28 00 00  XFSB.........(..
00000010: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
00000020: 69 fb 7c cd 5f dc 44 af 85 74 e0 cc d4 e3 34 5a  i.|._.D..t....4Z
00000030: 00 00 00 00 00 20 00 06 00 00 00 00 00 00 00 80  ..... ..........
00000040: 00 00 00 00 00 00 00 81 00 00 00 00 00 00 00 82  ................
00000050: 00 00 00 01 00 0a 00 00 00 00 00 04 00 00 00 00  ................
00000060: 00 00 0a 00 b4 b5 02 00 02 00 00 08 00 00 00 00  ................
00000070: 00 00 00 00 00 00 00 00 0c 09 09 03 14 00 00 19  ................
XFS (loop0): Corruption of in-memory data (0x8) detected at _xfs_buf_ioapply
	+0xe1e/0x10e0 (fs/xfs/xfs_buf.c:1580).  Shutting down filesystem.
XFS (loop0): Please unmount the filesystem and rectify the problem(s)
XFS (loop0): log mount/recovery failed: error -117
XFS (loop0): log mount failed

This corruption will shutdown the file system and the file system will
no longer be mountable. The following script can reproduce the problem,
but it may take a long time.

 #!/bin/bash

 device=/dev/sda
 testdir=/mnt/test
 round=0

 function fail()
 {
	 echo "$*"
	 exit 1
 }

 mkdir -p $testdir
 while [ $round -lt 10000 ]
 do
	 echo "******* round $round ********"
	 mkfs.xfs -f $device
	 mount $device $testdir || fail "mount failed!"
	 fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null &
	 sleep 4
	 killall -w fsstress
	 umount $testdir
	 xfs_repair -e $device > /dev/null
	 if [ $? -eq 2 ];then
		 echo "ERR CODE 2: Dirty log exception during repair."
		 exit 1
	 fi
	 round=$(($round+1))
 done

With lazysbcount is enabled, There is no additional lock protection for
reading m_ifree and m_icount in xfs_log_sb(), if other cpu modifies the
m_ifree, this will make the m_ifree greater than m_icount. For example,
consider the following sequence and ifreedelta is postive:

 CPU0				 CPU1
 xfs_log_sb			 xfs_trans_unreserve_and_mod_sb
 ----------			 ------------------------------
 percpu_counter_sum(&mp->m_icount)
				 percpu_counter_add_batch(&mp->m_icount,
						idelta, XFS_ICOUNT_BATCH)
				 percpu_counter_add(&mp->m_ifree, ifreedelta);
 percpu_counter_sum(&mp->m_ifree)

After this, incorrect inode count (sb_ifree > sb_icount) will be writen to
the log. In the subsequent writing of sb, incorrect inode count (sb_ifree >
sb_icount) will fail to pass the boundary check in xfs_validate_sb_write()
that cause the file system shutdown.

When lazysbcount is enabled, we don't need to guarantee that Lazy sb
counters are completely correct, but we do need to guarantee that sb_ifree
<= sb_icount. On the other hand, the constraint that m_ifree <= m_icount
must be satisfied any time that there /cannot/ be other threads allocating
or freeing inode chunks. If the constraint is violated under these
circumstances, sb_i{count,free} (the ondisk superblock inode counters)
maybe incorrect and need to be marked sick at unmount, the count will
be rebuilt on the next mount.

Fixes: 8756a5af1819 ("libxfs: add more bounds checking to sb sanity checks")
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_sb.c |  4 +++-
 fs/xfs/xfs_mount.c     | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index a20cade590e9f..1eeecf2eb2a77 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -972,7 +972,9 @@ xfs_log_sb(
 	 */
 	if (xfs_has_lazysbcount(mp)) {
 		mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
-		mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+		mp->m_sb.sb_ifree = min_t(uint64_t,
+				percpu_counter_sum(&mp->m_ifree),
+				mp->m_sb.sb_icount);
 		mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
 	}
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index e8bb3c2e847e1..fb87ffb48f7fe 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -538,6 +538,20 @@ xfs_check_summary_counts(
 	return 0;
 }
 
+static void
+xfs_unmount_check(
+	struct xfs_mount	*mp)
+{
+	if (xfs_is_shutdown(mp))
+		return;
+
+	if (percpu_counter_sum(&mp->m_ifree) >
+			percpu_counter_sum(&mp->m_icount)) {
+		xfs_alert(mp, "ifree/icount mismatch at unmount");
+		xfs_fs_mark_sick(mp, XFS_SICK_FS_COUNTERS);
+	}
+}
+
 /*
  * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
  * internal inode structures can be sitting in the CIL and AIL at this point,
@@ -1077,6 +1091,7 @@ xfs_unmountfs(
 	if (error)
 		xfs_warn(mp, "Unable to free reserved block pool. "
 				"Freespace may not be correct on next mount.");
+	xfs_unmount_check(mp);
 
 	xfs_log_unmount(mp);
 	xfs_da_unmount(mp);
-- 
GitLab


From 64c80dfd04d1dd2ecf550542c8f3f41b54b20207 Mon Sep 17 00:00:00 2001
From: Lukas Herbolt <lukas@herbolt.com>
Date: Wed, 16 Nov 2022 19:20:21 -0800
Subject: [PATCH 1687/4122] xfs: Print XFS UUID on mount and umount events.

As of now only device names are printed out over __xfs_printk().
The device names are not persistent across reboots which in case
of searching for origin of corruption brings another task to properly
identify the devices. This patch add XFS UUID upon every mount/umount
event which will make the identification much easier.

Signed-off-by: Lukas Herbolt <lukas@herbolt.com>
[sandeen: rebase onto current upstream kernel]
Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_log.c   | 10 ++++++----
 fs/xfs/xfs_super.c |  2 +-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index f02a0dd522b3d..0141d9907d31b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -644,12 +644,14 @@ xfs_log_mount(
 	int		min_logfsbs;
 
 	if (!xfs_has_norecovery(mp)) {
-		xfs_notice(mp, "Mounting V%d Filesystem",
-			   XFS_SB_VERSION_NUM(&mp->m_sb));
+		xfs_notice(mp, "Mounting V%d Filesystem %pU",
+			   XFS_SB_VERSION_NUM(&mp->m_sb),
+			   &mp->m_sb.sb_uuid);
 	} else {
 		xfs_notice(mp,
-"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
-			   XFS_SB_VERSION_NUM(&mp->m_sb));
+"Mounting V%d filesystem %pU in no-recovery mode. Filesystem will be inconsistent.",
+			   XFS_SB_VERSION_NUM(&mp->m_sb),
+			   &mp->m_sb.sb_uuid);
 		ASSERT(xfs_is_readonly(mp));
 	}
 
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index ee4b429a2f2c9..0c4b73e9b29d2 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1110,7 +1110,7 @@ xfs_fs_put_super(
 	if (!sb->s_fs_info)
 		return;
 
-	xfs_notice(mp, "Unmounting Filesystem");
+	xfs_notice(mp, "Unmounting Filesystem %pU", &mp->m_sb.sb_uuid);
 	xfs_filestream_unmount(mp);
 	xfs_unmountfs(mp);
 
-- 
GitLab


From 4f44e519b6a945068755708119cca5b74d01d1f6 Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Mon, 14 Nov 2022 19:16:59 -0600
Subject: [PATCH 1688/4122] RDMA/irdma: Fix inline for multiple SGE's

Currently, inline send and inline write assume a single
SGE and only copy data from the first one. Add support
for multiple SGE's.

Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Link: https://lore.kernel.org/r/20221115011701.1379-2-shiraz.saleem@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/irdma/uk.c    | 149 ++++++++++++++++++----------
 drivers/infiniband/hw/irdma/user.h  |  19 +---
 drivers/infiniband/hw/irdma/verbs.c |  55 ++++------
 3 files changed, 119 insertions(+), 104 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index a6e5d350a94ce..1a57ed9d77ff3 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -566,21 +566,37 @@ static void irdma_set_mw_bind_wqe_gen_1(__le64 *wqe,
 
 /**
  * irdma_copy_inline_data_gen_1 - Copy inline data to wqe
- * @dest: pointer to wqe
- * @src: pointer to inline data
- * @len: length of inline data to copy
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: Total inline data length
  * @polarity: compatibility parameter
  */
-static void irdma_copy_inline_data_gen_1(u8 *dest, u8 *src, u32 len,
-					 u8 polarity)
+static void irdma_copy_inline_data_gen_1(u8 *wqe, struct ib_sge *sge_list,
+					 u32 num_sges, u8 polarity)
 {
-	if (len <= 16) {
-		memcpy(dest, src, len);
-	} else {
-		memcpy(dest, src, 16);
-		src += 16;
-		dest = dest + 32;
-		memcpy(dest, src, len - 16);
+	u32 quanta_bytes_remaining = 16;
+	int i;
+
+	for (i = 0; i < num_sges; i++) {
+		u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+		u32 sge_len = sge_list[i].length;
+
+		while (sge_len) {
+			u32 bytes_copied;
+
+			bytes_copied = min(sge_len, quanta_bytes_remaining);
+			memcpy(wqe, cur_sge, bytes_copied);
+			wqe += bytes_copied;
+			cur_sge += bytes_copied;
+			quanta_bytes_remaining -= bytes_copied;
+			sge_len -= bytes_copied;
+
+			if (!quanta_bytes_remaining) {
+				/* Remaining inline bytes reside after hdr */
+				wqe += 16;
+				quanta_bytes_remaining = 32;
+			}
+		}
 	}
 }
 
@@ -612,35 +628,51 @@ static void irdma_set_mw_bind_wqe(__le64 *wqe,
 
 /**
  * irdma_copy_inline_data - Copy inline data to wqe
- * @dest: pointer to wqe
- * @src: pointer to inline data
- * @len: length of inline data to copy
+ * @wqe: pointer to wqe
+ * @sge_list: table of pointers to inline data
+ * @num_sges: number of SGE's
  * @polarity: polarity of wqe valid bit
  */
-static void irdma_copy_inline_data(u8 *dest, u8 *src, u32 len, u8 polarity)
+static void irdma_copy_inline_data(u8 *wqe, struct ib_sge *sge_list,
+				   u32 num_sges, u8 polarity)
 {
 	u8 inline_valid = polarity << IRDMA_INLINE_VALID_S;
-	u32 copy_size;
-
-	dest += 8;
-	if (len <= 8) {
-		memcpy(dest, src, len);
-		return;
-	}
-
-	*((u64 *)dest) = *((u64 *)src);
-	len -= 8;
-	src += 8;
-	dest += 24; /* point to additional 32 byte quanta */
-
-	while (len) {
-		copy_size = len < 31 ? len : 31;
-		memcpy(dest, src, copy_size);
-		*(dest + 31) = inline_valid;
-		len -= copy_size;
-		dest += 32;
-		src += copy_size;
+	u32 quanta_bytes_remaining = 8;
+	bool first_quanta = true;
+	int i;
+
+	wqe += 8;
+
+	for (i = 0; i < num_sges; i++) {
+		u8 *cur_sge = (u8 *)(uintptr_t)sge_list[i].addr;
+		u32 sge_len = sge_list[i].length;
+
+		while (sge_len) {
+			u32 bytes_copied;
+
+			bytes_copied = min(sge_len, quanta_bytes_remaining);
+			memcpy(wqe, cur_sge, bytes_copied);
+			wqe += bytes_copied;
+			cur_sge += bytes_copied;
+			quanta_bytes_remaining -= bytes_copied;
+			sge_len -= bytes_copied;
+
+			if (!quanta_bytes_remaining) {
+				quanta_bytes_remaining = 31;
+
+				/* Remaining inline bytes reside after hdr */
+				if (first_quanta) {
+					first_quanta = false;
+					wqe += 16;
+				} else {
+					*wqe = inline_valid;
+					wqe++;
+				}
+			}
+		}
 	}
+	if (!first_quanta && quanta_bytes_remaining < 31)
+		*(wqe + quanta_bytes_remaining) = inline_valid;
 }
 
 /**
@@ -679,20 +711,27 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
 			       struct irdma_post_sq_info *info, bool post_sq)
 {
 	__le64 *wqe;
-	struct irdma_inline_rdma_write *op_info;
+	struct irdma_rdma_write *op_info;
 	u64 hdr = 0;
 	u32 wqe_idx;
 	bool read_fence = false;
+	u32 i, total_size = 0;
 	u16 quanta;
 
 	info->push_wqe = qp->push_db ? true : false;
-	op_info = &info->op.inline_rdma_write;
+	op_info = &info->op.rdma_write;
+
+	if (unlikely(qp->max_sq_frag_cnt < op_info->num_lo_sges))
+		return -EINVAL;
 
-	if (op_info->len > qp->max_inline_data)
+	for (i = 0; i < op_info->num_lo_sges; i++)
+		total_size += op_info->lo_sg_list[i].length;
+
+	if (unlikely(total_size > qp->max_inline_data))
 		return -EINVAL;
 
-	quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
-	wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+	quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+	wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
 					 info);
 	if (!wqe)
 		return -ENOMEM;
@@ -705,7 +744,7 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
 
 	hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, op_info->rem_addr.lkey) |
 	      FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
-	      FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
+	      FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
 	      FIELD_PREP(IRDMAQPSQ_REPORTRTT, info->report_rtt ? 1 : 0) |
 	      FIELD_PREP(IRDMAQPSQ_INLINEDATAFLAG, 1) |
 	      FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG, info->imm_data_valid ? 1 : 0) |
@@ -719,7 +758,8 @@ int irdma_uk_inline_rdma_write(struct irdma_qp_uk *qp,
 		set_64bit_val(wqe, 0,
 			      FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
 
-	qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
+	qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->lo_sg_list,
+					op_info->num_lo_sges,
 					qp->swqe_polarity);
 	dma_wmb(); /* make sure WQE is populated before valid bit is set */
 
@@ -745,20 +785,27 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
 			 struct irdma_post_sq_info *info, bool post_sq)
 {
 	__le64 *wqe;
-	struct irdma_post_inline_send *op_info;
+	struct irdma_post_send *op_info;
 	u64 hdr;
 	u32 wqe_idx;
 	bool read_fence = false;
+	u32 i, total_size = 0;
 	u16 quanta;
 
 	info->push_wqe = qp->push_db ? true : false;
-	op_info = &info->op.inline_send;
+	op_info = &info->op.send;
 
-	if (op_info->len > qp->max_inline_data)
+	if (unlikely(qp->max_sq_frag_cnt < op_info->num_sges))
 		return -EINVAL;
 
-	quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(op_info->len);
-	wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, op_info->len,
+	for (i = 0; i < op_info->num_sges; i++)
+		total_size += op_info->sg_list[i].length;
+
+	if (unlikely(total_size > qp->max_inline_data))
+		return -EINVAL;
+
+	quanta = qp->wqe_ops.iw_inline_data_size_to_quanta(total_size);
+	wqe = irdma_qp_get_next_send_wqe(qp, &wqe_idx, quanta, total_size,
 					 info);
 	if (!wqe)
 		return -ENOMEM;
@@ -773,7 +820,7 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
 	hdr = FIELD_PREP(IRDMAQPSQ_REMSTAG, info->stag_to_inv) |
 	      FIELD_PREP(IRDMAQPSQ_AHID, op_info->ah_id) |
 	      FIELD_PREP(IRDMAQPSQ_OPCODE, info->op_type) |
-	      FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, op_info->len) |
+	      FIELD_PREP(IRDMAQPSQ_INLINEDATALEN, total_size) |
 	      FIELD_PREP(IRDMAQPSQ_IMMDATAFLAG,
 			 (info->imm_data_valid ? 1 : 0)) |
 	      FIELD_PREP(IRDMAQPSQ_REPORTRTT, (info->report_rtt ? 1 : 0)) |
@@ -789,8 +836,8 @@ int irdma_uk_inline_send(struct irdma_qp_uk *qp,
 	if (info->imm_data_valid)
 		set_64bit_val(wqe, 0,
 			      FIELD_PREP(IRDMAQPSQ_IMMDATA, info->imm_data));
-	qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->data, op_info->len,
-					qp->swqe_polarity);
+	qp->wqe_ops.iw_copy_inline_data((u8 *)wqe, op_info->sg_list,
+					op_info->num_sges, qp->swqe_polarity);
 
 	dma_wmb(); /* make sure WQE is populated before valid bit is set */
 
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index 2ef61923c9268..424d4aa8cdcd5 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -173,14 +173,6 @@ struct irdma_post_send {
 	u32 ah_id;
 };
 
-struct irdma_post_inline_send {
-	void *data;
-	u32 len;
-	u32 qkey;
-	u32 dest_qp;
-	u32 ah_id;
-};
-
 struct irdma_post_rq_info {
 	u64 wr_id;
 	struct ib_sge *sg_list;
@@ -193,12 +185,6 @@ struct irdma_rdma_write {
 	struct ib_sge rem_addr;
 };
 
-struct irdma_inline_rdma_write {
-	void *data;
-	u32 len;
-	struct ib_sge rem_addr;
-};
-
 struct irdma_rdma_read {
 	struct ib_sge *lo_sg_list;
 	u32 num_lo_sges;
@@ -241,8 +227,6 @@ struct irdma_post_sq_info {
 		struct irdma_rdma_read rdma_read;
 		struct irdma_bind_window bind_window;
 		struct irdma_inv_local_stag inv_local_stag;
-		struct irdma_inline_rdma_write inline_rdma_write;
-		struct irdma_post_inline_send inline_send;
 	} op;
 };
 
@@ -291,7 +275,8 @@ int irdma_uk_stag_local_invalidate(struct irdma_qp_uk *qp,
 				   bool post_sq);
 
 struct irdma_wqe_uk_ops {
-	void (*iw_copy_inline_data)(u8 *dest, u8 *src, u32 len, u8 polarity);
+	void (*iw_copy_inline_data)(u8 *dest, struct ib_sge *sge_list,
+				    u32 num_sges, u8 polarity);
 	u16 (*iw_inline_data_size_to_quanta)(u32 data_size);
 	void (*iw_set_fragment)(__le64 *wqe, u32 offset, struct ib_sge *sge,
 				u8 valid);
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 434241789f123..e252f431e2aca 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -3136,30 +3136,20 @@ static int irdma_post_send(struct ib_qp *ibqp,
 				info.stag_to_inv = ib_wr->ex.invalidate_rkey;
 			}
 
-			if (ib_wr->send_flags & IB_SEND_INLINE) {
-				info.op.inline_send.data = (void *)(unsigned long)
-							   ib_wr->sg_list[0].addr;
-				info.op.inline_send.len = ib_wr->sg_list[0].length;
-				if (iwqp->ibqp.qp_type == IB_QPT_UD ||
-				    iwqp->ibqp.qp_type == IB_QPT_GSI) {
-					ah = to_iwah(ud_wr(ib_wr)->ah);
-					info.op.inline_send.ah_id = ah->sc_ah.ah_info.ah_idx;
-					info.op.inline_send.qkey = ud_wr(ib_wr)->remote_qkey;
-					info.op.inline_send.dest_qp = ud_wr(ib_wr)->remote_qpn;
-				}
+			info.op.send.num_sges = ib_wr->num_sge;
+			info.op.send.sg_list = ib_wr->sg_list;
+			if (iwqp->ibqp.qp_type == IB_QPT_UD ||
+			    iwqp->ibqp.qp_type == IB_QPT_GSI) {
+				ah = to_iwah(ud_wr(ib_wr)->ah);
+				info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
+				info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
+				info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
+			}
+
+			if (ib_wr->send_flags & IB_SEND_INLINE)
 				err = irdma_uk_inline_send(ukqp, &info, false);
-			} else {
-				info.op.send.num_sges = ib_wr->num_sge;
-				info.op.send.sg_list = ib_wr->sg_list;
-				if (iwqp->ibqp.qp_type == IB_QPT_UD ||
-				    iwqp->ibqp.qp_type == IB_QPT_GSI) {
-					ah = to_iwah(ud_wr(ib_wr)->ah);
-					info.op.send.ah_id = ah->sc_ah.ah_info.ah_idx;
-					info.op.send.qkey = ud_wr(ib_wr)->remote_qkey;
-					info.op.send.dest_qp = ud_wr(ib_wr)->remote_qpn;
-				}
+			else
 				err = irdma_uk_send(ukqp, &info, false);
-			}
 			break;
 		case IB_WR_RDMA_WRITE_WITH_IMM:
 			if (ukqp->qp_caps & IRDMA_WRITE_WITH_IMM) {
@@ -3176,22 +3166,15 @@ static int irdma_post_send(struct ib_qp *ibqp,
 			else
 				info.op_type = IRDMA_OP_TYPE_RDMA_WRITE;
 
-			if (ib_wr->send_flags & IB_SEND_INLINE) {
-				info.op.inline_rdma_write.data = (void *)(uintptr_t)ib_wr->sg_list[0].addr;
-				info.op.inline_rdma_write.len =
-						ib_wr->sg_list[0].length;
-				info.op.inline_rdma_write.rem_addr.addr =
-						rdma_wr(ib_wr)->remote_addr;
-				info.op.inline_rdma_write.rem_addr.lkey =
-						rdma_wr(ib_wr)->rkey;
+			info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
+			info.op.rdma_write.lo_sg_list = ib_wr->sg_list;
+			info.op.rdma_write.rem_addr.addr =
+				rdma_wr(ib_wr)->remote_addr;
+			info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+			if (ib_wr->send_flags & IB_SEND_INLINE)
 				err = irdma_uk_inline_rdma_write(ukqp, &info, false);
-			} else {
-				info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
-				info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
-				info.op.rdma_write.rem_addr.addr = rdma_wr(ib_wr)->remote_addr;
-				info.op.rdma_write.rem_addr.lkey = rdma_wr(ib_wr)->rkey;
+			else
 				err = irdma_uk_rdma_write(ukqp, &info, false);
-			}
 			break;
 		case IB_WR_RDMA_READ_WITH_INV:
 			inv_stag = true;
-- 
GitLab


From 24419777e9431137d5923a747f546facb1e49b1f Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Mon, 14 Nov 2022 19:17:00 -0600
Subject: [PATCH 1689/4122] RDMA/irdma: Fix RQ completion opcode

The opcode written by HW, in the RQ CQE, is the
RoCEv2/iWARP protocol opcode from the received
packet and not the SW opcode as currently assumed.
Fix this by returning the raw operation type and
queue type in the CQE to irdma_process_cqe and add
2 helpers set_ib_wc_op_sq set_ib_wc_op_rq to map
IRDMA HW op types to IB op types.

Note that for iWARP, only Write with Immediate is
supported so the opcode can only be IB_WC_RECV_RDMA_WITH_IMM
when there is immediate data present.

Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Link: https://lore.kernel.org/r/20221115011701.1379-3-shiraz.saleem@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/irdma/uk.c    | 21 +++++-------
 drivers/infiniband/hw/irdma/user.h  |  1 +
 drivers/infiniband/hw/irdma/utils.c |  2 ++
 drivers/infiniband/hw/irdma/verbs.c | 39 ++++-----------------
 drivers/infiniband/hw/irdma/verbs.h | 53 +++++++++++++++++++++++++++++
 5 files changed, 71 insertions(+), 45 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/uk.c b/drivers/infiniband/hw/irdma/uk.c
index 1a57ed9d77ff3..16183e894da77 100644
--- a/drivers/infiniband/hw/irdma/uk.c
+++ b/drivers/infiniband/hw/irdma/uk.c
@@ -1049,11 +1049,10 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
 	__le64 *cqe;
 	struct irdma_qp_uk *qp;
 	struct irdma_ring *pring = NULL;
-	u32 wqe_idx, q_type;
+	u32 wqe_idx;
 	int ret_code;
 	bool move_cq_head = true;
 	u8 polarity;
-	u8 op_type;
 	bool ext_valid;
 	__le64 *ext_cqe;
 
@@ -1121,7 +1120,7 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
 		info->ud_vlan_valid = false;
 	}
 
-	q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
+	info->q_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
 	info->error = (bool)FIELD_GET(IRDMA_CQ_ERROR, qword3);
 	info->push_dropped = (bool)FIELD_GET(IRDMACQ_PSHDROP, qword3);
 	info->ipv4 = (bool)FIELD_GET(IRDMACQ_IPV4, qword3);
@@ -1160,8 +1159,9 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
 	}
 	wqe_idx = (u32)FIELD_GET(IRDMA_CQ_WQEIDX, qword3);
 	info->qp_handle = (irdma_qp_handle)(unsigned long)qp;
+	info->op_type = (u8)FIELD_GET(IRDMA_CQ_SQ, qword3);
 
-	if (q_type == IRDMA_CQE_QTYPE_RQ) {
+	if (info->q_type == IRDMA_CQE_QTYPE_RQ) {
 		u32 array_idx;
 
 		array_idx = wqe_idx / qp->rq_wqe_size_multiplier;
@@ -1181,10 +1181,6 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
 
 		info->bytes_xfered = (u32)FIELD_GET(IRDMACQ_PAYLDLEN, qword0);
 
-		if (info->imm_valid)
-			info->op_type = IRDMA_OP_TYPE_REC_IMM;
-		else
-			info->op_type = IRDMA_OP_TYPE_REC;
 		if (qword3 & IRDMACQ_STAG) {
 			info->stag_invalid_set = true;
 			info->inv_stag = (u32)FIELD_GET(IRDMACQ_INVSTAG, qword2);
@@ -1242,17 +1238,18 @@ int irdma_uk_cq_poll_cmpl(struct irdma_cq_uk *cq,
 				sw_wqe = qp->sq_base[tail].elem;
 				get_64bit_val(sw_wqe, 24,
 					      &wqe_qword);
-				op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, wqe_qword);
-				info->op_type = op_type;
+				info->op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE,
+							      wqe_qword);
 				IRDMA_RING_SET_TAIL(qp->sq_ring,
 						    tail + qp->sq_wrtrk_array[tail].quanta);
-				if (op_type != IRDMAQP_OP_NOP) {
+				if (info->op_type != IRDMAQP_OP_NOP) {
 					info->wr_id = qp->sq_wrtrk_array[tail].wrid;
 					info->bytes_xfered = qp->sq_wrtrk_array[tail].wr_len;
 					break;
 				}
 			} while (1);
-			if (op_type == IRDMA_OP_TYPE_BIND_MW && info->minor_err == FLUSH_PROT_ERR)
+			if (info->op_type == IRDMA_OP_TYPE_BIND_MW &&
+			    info->minor_err == FLUSH_PROT_ERR)
 				info->minor_err = FLUSH_MW_BIND_ERR;
 			qp->sq_flush_seen = true;
 			if (!IRDMA_RING_MORE_WORK(qp->sq_ring))
diff --git a/drivers/infiniband/hw/irdma/user.h b/drivers/infiniband/hw/irdma/user.h
index 424d4aa8cdcd5..d0cdf609f5e06 100644
--- a/drivers/infiniband/hw/irdma/user.h
+++ b/drivers/infiniband/hw/irdma/user.h
@@ -245,6 +245,7 @@ struct irdma_cq_poll_info {
 	u16 ud_vlan;
 	u8 ud_smac[6];
 	u8 op_type;
+	u8 q_type;
 	bool stag_invalid_set:1; /* or L_R_Key set */
 	bool push_dropped:1;
 	bool error:1;
diff --git a/drivers/infiniband/hw/irdma/utils.c b/drivers/infiniband/hw/irdma/utils.c
index 8dfc9e154d733..445e69e864097 100644
--- a/drivers/infiniband/hw/irdma/utils.c
+++ b/drivers/infiniband/hw/irdma/utils.c
@@ -2591,6 +2591,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
 			sw_wqe = qp->sq_base[wqe_idx].elem;
 			get_64bit_val(sw_wqe, 24, &wqe_qword);
 			cmpl->cpi.op_type = (u8)FIELD_GET(IRDMAQPSQ_OPCODE, IRDMAQPSQ_OPCODE);
+			cmpl->cpi.q_type = IRDMA_CQE_QTYPE_SQ;
 			/* remove the SQ WR by moving SQ tail*/
 			IRDMA_RING_SET_TAIL(*sq_ring,
 				sq_ring->tail + qp->sq_wrtrk_array[sq_ring->tail].quanta);
@@ -2629,6 +2630,7 @@ void irdma_generate_flush_completions(struct irdma_qp *iwqp)
 
 			cmpl->cpi.wr_id = qp->rq_wrid_array[wqe_idx];
 			cmpl->cpi.op_type = IRDMA_OP_TYPE_REC;
+			cmpl->cpi.q_type = IRDMA_CQE_QTYPE_RQ;
 			/* remove the RQ WR by moving RQ tail */
 			IRDMA_RING_SET_TAIL(*rq_ring, rq_ring->tail + 1);
 			ibdev_dbg(iwqp->iwrcq->ibcq.device,
diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index e252f431e2aca..01d0dc4b56490 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -3334,7 +3334,6 @@ static enum ib_wc_status irdma_flush_err_to_ib_wc_status(enum irdma_flush_opcode
 static void irdma_process_cqe(struct ib_wc *entry,
 			      struct irdma_cq_poll_info *cq_poll_info)
 {
-	struct irdma_qp *iwqp;
 	struct irdma_sc_qp *qp;
 
 	entry->wc_flags = 0;
@@ -3342,7 +3341,6 @@ static void irdma_process_cqe(struct ib_wc *entry,
 	entry->wr_id = cq_poll_info->wr_id;
 
 	qp = cq_poll_info->qp_handle;
-	iwqp = qp->qp_uk.back_qp;
 	entry->qp = qp->qp_uk.back_qp;
 
 	if (cq_poll_info->error) {
@@ -3375,42 +3373,17 @@ static void irdma_process_cqe(struct ib_wc *entry,
 		}
 	}
 
-	switch (cq_poll_info->op_type) {
-	case IRDMA_OP_TYPE_RDMA_WRITE:
-	case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
-		entry->opcode = IB_WC_RDMA_WRITE;
-		break;
-	case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
-	case IRDMA_OP_TYPE_RDMA_READ:
-		entry->opcode = IB_WC_RDMA_READ;
-		break;
-	case IRDMA_OP_TYPE_SEND_INV:
-	case IRDMA_OP_TYPE_SEND_SOL:
-	case IRDMA_OP_TYPE_SEND_SOL_INV:
-	case IRDMA_OP_TYPE_SEND:
-		entry->opcode = IB_WC_SEND;
-		break;
-	case IRDMA_OP_TYPE_FAST_REG_NSMR:
-		entry->opcode = IB_WC_REG_MR;
-		break;
-	case IRDMA_OP_TYPE_INV_STAG:
-		entry->opcode = IB_WC_LOCAL_INV;
-		break;
-	case IRDMA_OP_TYPE_REC_IMM:
-	case IRDMA_OP_TYPE_REC:
-		entry->opcode = cq_poll_info->op_type == IRDMA_OP_TYPE_REC_IMM ?
-			IB_WC_RECV_RDMA_WITH_IMM : IB_WC_RECV;
+	if (cq_poll_info->q_type == IRDMA_CQE_QTYPE_SQ) {
+		set_ib_wc_op_sq(cq_poll_info, entry);
+	} else {
+		set_ib_wc_op_rq(cq_poll_info, entry,
+				qp->qp_uk.qp_caps & IRDMA_SEND_WITH_IMM ?
+				true : false);
 		if (qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_UD &&
 		    cq_poll_info->stag_invalid_set) {
 			entry->ex.invalidate_rkey = cq_poll_info->inv_stag;
 			entry->wc_flags |= IB_WC_WITH_INVALIDATE;
 		}
-		break;
-	default:
-		ibdev_err(&iwqp->iwdev->ibdev,
-			  "Invalid opcode = %d in CQE\n", cq_poll_info->op_type);
-		entry->status = IB_WC_GENERAL_ERR;
-		return;
 	}
 
 	if (qp->qp_uk.qp_type == IRDMA_QP_TYPE_ROCE_UD) {
diff --git a/drivers/infiniband/hw/irdma/verbs.h b/drivers/infiniband/hw/irdma/verbs.h
index 4309b7159f42c..a536e9fa85ebf 100644
--- a/drivers/infiniband/hw/irdma/verbs.h
+++ b/drivers/infiniband/hw/irdma/verbs.h
@@ -232,6 +232,59 @@ static inline u16 irdma_fw_minor_ver(struct irdma_sc_dev *dev)
 	return (u16)FIELD_GET(IRDMA_FW_VER_MINOR, dev->feature_info[IRDMA_FEATURE_FW_INFO]);
 }
 
+static inline void set_ib_wc_op_sq(struct irdma_cq_poll_info *cq_poll_info,
+				   struct ib_wc *entry)
+{
+	switch (cq_poll_info->op_type) {
+	case IRDMA_OP_TYPE_RDMA_WRITE:
+	case IRDMA_OP_TYPE_RDMA_WRITE_SOL:
+		entry->opcode = IB_WC_RDMA_WRITE;
+		break;
+	case IRDMA_OP_TYPE_RDMA_READ_INV_STAG:
+	case IRDMA_OP_TYPE_RDMA_READ:
+		entry->opcode = IB_WC_RDMA_READ;
+		break;
+	case IRDMA_OP_TYPE_SEND_SOL:
+	case IRDMA_OP_TYPE_SEND_SOL_INV:
+	case IRDMA_OP_TYPE_SEND_INV:
+	case IRDMA_OP_TYPE_SEND:
+		entry->opcode = IB_WC_SEND;
+		break;
+	case IRDMA_OP_TYPE_FAST_REG_NSMR:
+		entry->opcode = IB_WC_REG_MR;
+		break;
+	case IRDMA_OP_TYPE_INV_STAG:
+		entry->opcode = IB_WC_LOCAL_INV;
+		break;
+	default:
+		entry->status = IB_WC_GENERAL_ERR;
+	}
+}
+
+static inline void set_ib_wc_op_rq(struct irdma_cq_poll_info *cq_poll_info,
+				   struct ib_wc *entry, bool send_imm_support)
+{
+	/**
+	 * iWARP does not support sendImm, so the presence of Imm data
+	 * must be WriteImm.
+	 */
+	if (!send_imm_support) {
+		entry->opcode = cq_poll_info->imm_valid ?
+					IB_WC_RECV_RDMA_WITH_IMM :
+					IB_WC_RECV;
+		return;
+	}
+
+	switch (cq_poll_info->op_type) {
+	case IB_OPCODE_RDMA_WRITE_ONLY_WITH_IMMEDIATE:
+	case IB_OPCODE_RDMA_WRITE_LAST_WITH_IMMEDIATE:
+		entry->opcode = IB_WC_RECV_RDMA_WITH_IMM;
+		break;
+	default:
+		entry->opcode = IB_WC_RECV;
+	}
+}
+
 void irdma_mcast_mac(u32 *ip_addr, u8 *mac, bool ipv4);
 int irdma_ib_register_device(struct irdma_device *iwdev);
 void irdma_ib_unregister_device(struct irdma_device *iwdev);
-- 
GitLab


From 8f7e2daa6336f9f4b6f8a4715a809674606df16b Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Mon, 14 Nov 2022 19:17:01 -0600
Subject: [PATCH 1690/4122] RDMA/irdma: Do not request 2-level PBLEs for CQ
 alloc

When allocating PBLE's for a large CQ, it is possible
that a 2-level PBLE is returned which would cause the
CQ allocation to fail since 1-level is assumed and checked for.
Fix this by requesting a level one PBLE only.

Fixes: b48c24c2d710 ("RDMA/irdma: Implement device supported verb APIs")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Link: https://lore.kernel.org/r/20221115011701.1379-4-shiraz.saleem@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/irdma/verbs.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index 01d0dc4b56490..dc3f5f3fee901 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -2329,9 +2329,10 @@ static bool irdma_check_mr_contiguous(struct irdma_pble_alloc *palloc,
  * @rf: RDMA PCI function
  * @iwmr: mr pointer for this memory registration
  * @use_pbles: flag if to use pble's
+ * @lvl_1_only: request only level 1 pble if true
  */
 static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
-			     bool use_pbles)
+			     bool use_pbles, bool lvl_1_only)
 {
 	struct irdma_pbl *iwpbl = &iwmr->iwpbl;
 	struct irdma_pble_alloc *palloc = &iwpbl->pble_alloc;
@@ -2342,7 +2343,7 @@ static int irdma_setup_pbles(struct irdma_pci_f *rf, struct irdma_mr *iwmr,
 
 	if (use_pbles) {
 		status = irdma_get_pble(rf->pble_rsrc, palloc, iwmr->page_cnt,
-					false);
+					lvl_1_only);
 		if (status)
 			return status;
 
@@ -2385,16 +2386,10 @@ static int irdma_handle_q_mem(struct irdma_device *iwdev,
 	bool ret = true;
 
 	pg_size = iwmr->page_size;
-	err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+	err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, true);
 	if (err)
 		return err;
 
-	if (use_pbles && palloc->level != PBLE_LEVEL_1) {
-		irdma_free_pble(iwdev->rf->pble_rsrc, palloc);
-		iwpbl->pbl_allocated = false;
-		return -ENOMEM;
-	}
-
 	if (use_pbles)
 		arr = palloc->level1.addr;
 
@@ -2870,7 +2865,7 @@ static struct ib_mr *irdma_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
 	case IRDMA_MEMREG_TYPE_MEM:
 		use_pbles = (iwmr->page_cnt != 1);
 
-		err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles);
+		err = irdma_setup_pbles(iwdev->rf, iwmr, use_pbles, false);
 		if (err)
 			goto error;
 
-- 
GitLab


From d7115727e32e94c77a5441892e74ae0339afa1a5 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:38 +0800
Subject: [PATCH 1691/4122] RDMA/rtrs-srv: Refactor rtrs_srv_rdma_cm_handler

The RDMA_CM_EVENT_CONNECT_REQUEST is quite different to other types,
let's check it separately at the beginning of routine, then we can
avoid the indentation accordingly.

Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-2-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 22d7ba05e9fe8..5fe3699cb8ff9 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -1950,22 +1950,21 @@ static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id,
 {
 	struct rtrs_srv_path *srv_path = NULL;
 	struct rtrs_path *s = NULL;
+	struct rtrs_con *c = NULL;
 
-	if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) {
-		struct rtrs_con *c = cm_id->context;
-
-		s = c->path;
-		srv_path = to_srv_path(s);
-	}
-
-	switch (ev->event) {
-	case RDMA_CM_EVENT_CONNECT_REQUEST:
+	if (ev->event == RDMA_CM_EVENT_CONNECT_REQUEST)
 		/*
 		 * In case of error cma.c will destroy cm_id,
 		 * see cma_process_remove()
 		 */
 		return rtrs_rdma_connect(cm_id, ev->param.conn.private_data,
 					  ev->param.conn.private_data_len);
+
+	c = cm_id->context;
+	s = c->path;
+	srv_path = to_srv_path(s);
+
+	switch (ev->event) {
 	case RDMA_CM_EVENT_ESTABLISHED:
 		/* Nothing here */
 		break;
-- 
GitLab


From 0f597ac618d04beb9de997fda59a29c9d3818fb2 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:39 +0800
Subject: [PATCH 1692/4122] RDMA/rtrs-srv: Refactor the handling of failure
 case in map_cont_bufs

Let's call unmap_cont_bufs when failure happens, and also only update
mrs_num after everything is settled which means we can remove 'mri'.

Acked-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-3-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv.c | 47 +++++++++++---------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 5fe3699cb8ff9..b877dd57b6b92 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -561,9 +561,11 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
 {
 	struct rtrs_srv_sess *srv = srv_path->srv;
 	struct rtrs_path *ss = &srv_path->s;
-	int i, mri, err, mrs_num;
+	int i, err, mrs_num;
 	unsigned int chunk_bits;
 	int chunks_per_mr = 1;
+	struct ib_mr *mr;
+	struct sg_table *sgt;
 
 	/*
 	 * Here we map queue_depth chunks to MR.  Firstly we have to
@@ -586,16 +588,14 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
 	if (!srv_path->mrs)
 		return -ENOMEM;
 
-	srv_path->mrs_num = mrs_num;
-
-	for (mri = 0; mri < mrs_num; mri++) {
-		struct rtrs_srv_mr *srv_mr = &srv_path->mrs[mri];
-		struct sg_table *sgt = &srv_mr->sgt;
+	for (srv_path->mrs_num = 0; srv_path->mrs_num < mrs_num;
+	     srv_path->mrs_num++) {
+		struct rtrs_srv_mr *srv_mr = &srv_path->mrs[srv_path->mrs_num];
 		struct scatterlist *s;
-		struct ib_mr *mr;
 		int nr, nr_sgt, chunks;
 
-		chunks = chunks_per_mr * mri;
+		sgt = &srv_mr->sgt;
+		chunks = chunks_per_mr * srv_path->mrs_num;
 		if (!always_invalidate)
 			chunks_per_mr = min_t(int, chunks_per_mr,
 					      srv->queue_depth - chunks);
@@ -644,31 +644,24 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
 
 		ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey));
 		srv_mr->mr = mr;
-
-		continue;
-err:
-		while (mri--) {
-			srv_mr = &srv_path->mrs[mri];
-			sgt = &srv_mr->sgt;
-			mr = srv_mr->mr;
-			rtrs_iu_free(srv_mr->iu, srv_path->s.dev->ib_dev, 1);
-dereg_mr:
-			ib_dereg_mr(mr);
-unmap_sg:
-			ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
-					sgt->nents, DMA_BIDIRECTIONAL);
-free_sg:
-			sg_free_table(sgt);
-		}
-		kfree(srv_path->mrs);
-
-		return err;
 	}
 
 	chunk_bits = ilog2(srv->queue_depth - 1) + 1;
 	srv_path->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits);
 
 	return 0;
+
+dereg_mr:
+	ib_dereg_mr(mr);
+unmap_sg:
+	ib_dma_unmap_sg(srv_path->s.dev->ib_dev, sgt->sgl,
+			sgt->nents, DMA_BIDIRECTIONAL);
+free_sg:
+	sg_free_table(sgt);
+err:
+	unmap_cont_bufs(srv_path);
+
+	return err;
 }
 
 static void rtrs_srv_hb_err_handler(struct rtrs_con *c)
-- 
GitLab


From 102d2f70ec0999a5cde181f1ccbe8a81cba45b10 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:40 +0800
Subject: [PATCH 1693/4122] RDMA/rtrs-srv: Correct the checking of ib_map_mr_sg

We should check with nr_sgt, also the only successful case is that
all sg elements are mapped, so make it explicitly.

Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-4-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index b877dd57b6b92..581c850e71d64 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -622,7 +622,7 @@ static int map_cont_bufs(struct rtrs_srv_path *srv_path)
 		}
 		nr = ib_map_mr_sg(mr, sgt->sgl, nr_sgt,
 				  NULL, max_chunk_size);
-		if (nr < 0 || nr < sgt->nents) {
+		if (nr != nr_sgt) {
 			err = nr < 0 ? nr : -EINVAL;
 			goto dereg_mr;
 		}
-- 
GitLab


From f5708e6699c230f64736107c90b63a53bdc0a613 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:41 +0800
Subject: [PATCH 1694/4122] RDMA/rtrs-clt: Correct the checking of ib_map_mr_sg

We should check with count, also the only successful case is that
all sg elements are mapped, so make it explicitly.

Acked-by: Jack Wang <jinpu.wang@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-5-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-clt.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-clt.c b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
index 8546b8816524c..be7c8480f9472 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-clt.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-clt.c
@@ -1064,10 +1064,8 @@ static int rtrs_map_sg_fr(struct rtrs_clt_io_req *req, size_t count)
 
 	/* Align the MR to a 4K page size to match the block virt boundary */
 	nr = ib_map_mr_sg(req->mr, req->sglist, count, NULL, SZ_4K);
-	if (nr < 0)
-		return nr;
-	if (nr < req->sg_cnt)
-		return -EINVAL;
+	if (nr != count)
+		return nr < 0 ? nr : -EINVAL;
 	ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
 
 	return nr;
-- 
GitLab


From a4399563356c86eafeadcdc155d5d93320713b6e Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:42 +0800
Subject: [PATCH 1695/4122] RDMA/rtrs-srv: Remove outdated comments from
 create_con

Remove the orphan comments.

Acked-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-6-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv.c b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
index 581c850e71d64..d1703e2c0b82f 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv.c
@@ -1671,12 +1671,6 @@ static int create_con(struct rtrs_srv_path *srv_path,
 				      srv->queue_depth * (1 + 2) + 1);
 
 		max_recv_wr = srv->queue_depth + 1;
-		/*
-		 * If we have all receive requests posted and
-		 * all write requests posted and each read request
-		 * requires an invalidate request + drain
-		 * and qp gets into error state.
-		 */
 	}
 	cq_num = max_send_wr + max_recv_wr;
 	atomic_set(&con->c.sq_wr_avail, max_send_wr);
-- 
GitLab


From 7526198f27107278c600a3aee41f1a77ed84dd78 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:43 +0800
Subject: [PATCH 1696/4122] RDMA/rtrs: Clean up rtrs_rdma_dev_pd_ops

Let's remove them since the three members are not used.

Acked-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-7-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-pri.h |  3 ---
 drivers/infiniband/ulp/rtrs/rtrs.c     | 22 ++++------------------
 2 files changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-pri.h b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
index a2420eecaf5a1..ab25619261d28 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-pri.h
+++ b/drivers/infiniband/ulp/rtrs/rtrs-pri.h
@@ -68,10 +68,7 @@ enum {
 struct rtrs_ib_dev;
 
 struct rtrs_rdma_dev_pd_ops {
-	struct rtrs_ib_dev *(*alloc)(void);
-	void (*free)(struct rtrs_ib_dev *dev);
 	int (*init)(struct rtrs_ib_dev *dev);
-	void (*deinit)(struct rtrs_ib_dev *dev);
 };
 
 struct rtrs_rdma_dev_pd {
diff --git a/drivers/infiniband/ulp/rtrs/rtrs.c b/drivers/infiniband/ulp/rtrs/rtrs.c
index ed324b47d93ae..4bf9d868cc522 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs.c
@@ -557,7 +557,6 @@ EXPORT_SYMBOL(rtrs_addr_to_sockaddr);
 void rtrs_rdma_dev_pd_init(enum ib_pd_flags pd_flags,
 			    struct rtrs_rdma_dev_pd *pool)
 {
-	WARN_ON(pool->ops && (!pool->ops->alloc ^ !pool->ops->free));
 	INIT_LIST_HEAD(&pool->list);
 	mutex_init(&pool->mutex);
 	pool->pd_flags = pd_flags;
@@ -583,15 +582,8 @@ static void dev_free(struct kref *ref)
 	list_del(&dev->entry);
 	mutex_unlock(&pool->mutex);
 
-	if (pool->ops && pool->ops->deinit)
-		pool->ops->deinit(dev);
-
 	ib_dealloc_pd(dev->ib_pd);
-
-	if (pool->ops && pool->ops->free)
-		pool->ops->free(dev);
-	else
-		kfree(dev);
+	kfree(dev);
 }
 
 int rtrs_ib_dev_put(struct rtrs_ib_dev *dev)
@@ -618,11 +610,8 @@ rtrs_ib_dev_find_or_add(struct ib_device *ib_dev,
 			goto out_unlock;
 	}
 	mutex_unlock(&pool->mutex);
-	if (pool->ops && pool->ops->alloc)
-		dev = pool->ops->alloc();
-	else
-		dev = kzalloc(sizeof(*dev), GFP_KERNEL);
-	if (IS_ERR_OR_NULL(dev))
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev)
 		goto out_err;
 
 	kref_init(&dev->ref);
@@ -644,10 +633,7 @@ out_unlock:
 out_free_pd:
 	ib_dealloc_pd(dev->ib_pd);
 out_free_dev:
-	if (pool->ops && pool->ops->free)
-		pool->ops->free(dev);
-	else
-		kfree(dev);
+	kfree(dev);
 out_err:
 	return NULL;
 }
-- 
GitLab


From 6af4609c18b3aa69209d022b9c00e6db78c57ae5 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:44 +0800
Subject: [PATCH 1697/4122] RDMA/rtrs-srv: Fix several issues in
 rtrs_srv_destroy_path_files

There are several issues in the function which is supposed to be paired
with rtrs_srv_create_path_files.

1. rtrs_srv_stats_attr_group is not removed though it is created in
   rtrs_srv_create_stats_files.

2. it makes more sense to check kobj_stats.state_in_sysfs before destroy
   kobj_stats instead of rely on kobj.state_in_sysfs.

3. kobject_init_and_add is used for both kobjs (srv_path->kobj and
   srv_path->stats->kobj_stats), however we missed to call kobject_del
   for srv_path->kobj which was called in free_path.

4. rtrs_srv_destroy_once_sysfs_root_folders is independent of either
   kobj or kobj_stats.

Acked-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-8-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index 2a3c9ac64a42e..da8e205ce3311 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -304,12 +304,18 @@ destroy_root:
 
 void rtrs_srv_destroy_path_files(struct rtrs_srv_path *srv_path)
 {
-	if (srv_path->kobj.state_in_sysfs) {
+	if (srv_path->stats->kobj_stats.state_in_sysfs) {
+		sysfs_remove_group(&srv_path->stats->kobj_stats,
+				   &rtrs_srv_stats_attr_group);
 		kobject_del(&srv_path->stats->kobj_stats);
 		kobject_put(&srv_path->stats->kobj_stats);
+	}
+
+	if (srv_path->kobj.state_in_sysfs) {
 		sysfs_remove_group(&srv_path->kobj, &rtrs_srv_path_attr_group);
+		kobject_del(&srv_path->kobj);
 		kobject_put(&srv_path->kobj);
-
-		rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
 	}
+
+	rtrs_srv_destroy_once_sysfs_root_folders(srv_path);
 }
-- 
GitLab


From 34a046f08b62fb855ac590c1f4dfb1934f1fdb64 Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Thu, 17 Nov 2022 18:19:45 +0800
Subject: [PATCH 1698/4122] RDMA/rtrs-srv: Remove kobject_del from
 rtrs_srv_destroy_once_sysfs_root_folders

The kobj_paths which is created dynamically by kobject_create_and_add,
and per the comment above kobject_create_and_add, we only need to call
kobject_put which is not same as other kobjs such as stats->kobj_stats
and srv_path->kobj.

Acked-by: Md Haris Iqbal <haris.iqbal@ionos.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20221117101945.6317-9-guoqing.jiang@linux.dev
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
index da8e205ce3311..c76ba29da1e20 100644
--- a/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
+++ b/drivers/infiniband/ulp/rtrs/rtrs-srv-sysfs.c
@@ -203,7 +203,6 @@ rtrs_srv_destroy_once_sysfs_root_folders(struct rtrs_srv_path *srv_path)
 
 	mutex_lock(&srv->paths_mutex);
 	if (!--srv->dev_ref) {
-		kobject_del(srv->kobj_paths);
 		kobject_put(srv->kobj_paths);
 		mutex_unlock(&srv->paths_mutex);
 		device_del(&srv->dev);
-- 
GitLab


From 5562c6a9657e1bffd4d66df841db142c596f411c Mon Sep 17 00:00:00 2001
From: Slark Xiao <slark_xiao@163.com>
Date: Mon, 7 Nov 2022 19:27:00 +0800
Subject: [PATCH 1699/4122] bus: mhi: host: pci_generic: Add definition for
 some VIDs

To make code neat and for convenience purpose, add definition for some
VIDs. Adding it locally until these VIDs are used in multiple places.

Signed-off-by: Slark Xiao <slark_xiao@163.com>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Link: https://lore.kernel.org/r/20221107112700.773-1-slark_xiao@163.com
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/bus/mhi/host/pci_generic.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/bus/mhi/host/pci_generic.c b/drivers/bus/mhi/host/pci_generic.c
index b58a303678964..f39657f71483c 100644
--- a/drivers/bus/mhi/host/pci_generic.c
+++ b/drivers/bus/mhi/host/pci_generic.c
@@ -24,6 +24,10 @@
 
 #define HEALTH_CHECK_PERIOD (HZ * 2)
 
+/* PCI VID definitions */
+#define PCI_VENDOR_ID_THALES	0x1269
+#define PCI_VENDOR_ID_QUECTEL	0x1eac
+
 /**
  * struct mhi_pci_dev_info - MHI PCI device specific information
  * @config: MHI controller configuration
@@ -559,11 +563,11 @@ static const struct pci_device_id mhi_pci_id_table[] = {
 		.driver_data = (kernel_ulong_t) &mhi_telit_fn990_info },
 	{ PCI_DEVICE(PCI_VENDOR_ID_QCOM, 0x0308),
 		.driver_data = (kernel_ulong_t) &mhi_qcom_sdx65_info },
-	{ PCI_DEVICE(0x1eac, 0x1001), /* EM120R-GL (sdx24) */
+	{ PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x1001), /* EM120R-GL (sdx24) */
 		.driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
-	{ PCI_DEVICE(0x1eac, 0x1002), /* EM160R-GL (sdx24) */
+	{ PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x1002), /* EM160R-GL (sdx24) */
 		.driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
-	{ PCI_DEVICE(0x1eac, 0x2001), /* EM120R-GL for FCCL (sdx24) */
+	{ PCI_DEVICE(PCI_VENDOR_ID_QUECTEL, 0x2001), /* EM120R-GL for FCCL (sdx24) */
 		.driver_data = (kernel_ulong_t) &mhi_quectel_em1xx_info },
 	/* T99W175 (sdx55), Both for eSIM and Non-eSIM */
 	{ PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0ab),
@@ -587,16 +591,16 @@ static const struct pci_device_id mhi_pci_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_FOXCONN, 0xe0d9),
 		.driver_data = (kernel_ulong_t) &mhi_foxconn_sdx65_info },
 	/* MV31-W (Cinterion) */
-	{ PCI_DEVICE(0x1269, 0x00b3),
+	{ PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00b3),
 		.driver_data = (kernel_ulong_t) &mhi_mv31_info },
 	/* MV31-W (Cinterion), based on new baseline */
-	{ PCI_DEVICE(0x1269, 0x00b4),
+	{ PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00b4),
 		.driver_data = (kernel_ulong_t) &mhi_mv31_info },
 	/* MV32-WA (Cinterion) */
-	{ PCI_DEVICE(0x1269, 0x00ba),
+	{ PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00ba),
 		.driver_data = (kernel_ulong_t) &mhi_mv32_info },
 	/* MV32-WB (Cinterion) */
-	{ PCI_DEVICE(0x1269, 0x00bb),
+	{ PCI_DEVICE(PCI_VENDOR_ID_THALES, 0x00bb),
 		.driver_data = (kernel_ulong_t) &mhi_mv32_info },
 	/* T99W175 (sdx55), HP variant */
 	{ PCI_DEVICE(0x03f0, 0x0a6c),
-- 
GitLab


From c70edc06773976f4e6ccfe250030a73c2896e131 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:10 -0800
Subject: [PATCH 1700/4122] dt-bindings: interconnect: Add sm8350, sc8280xp and
 generic OSM L3 compatibles

Add EPSS L3 compatibles for sm8350 and sc8280xp, but while at it also
introduce generic compatible for both qcom,osm-l3 and qcom,epss-l3.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-6-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 .../bindings/interconnect/qcom,osm-l3.yaml    | 24 ++++++++++++-------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
index bf538c0c5a811..aadae4424ba95 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,osm-l3.yaml
@@ -16,13 +16,21 @@ description:
 
 properties:
   compatible:
-    enum:
-      - qcom,sc7180-osm-l3
-      - qcom,sc7280-epss-l3
-      - qcom,sc8180x-osm-l3
-      - qcom,sdm845-osm-l3
-      - qcom,sm8150-osm-l3
-      - qcom,sm8250-epss-l3
+    oneOf:
+      - items:
+          - enum:
+              - qcom,sc7180-osm-l3
+              - qcom,sc8180x-osm-l3
+              - qcom,sdm845-osm-l3
+              - qcom,sm8150-osm-l3
+          - const: qcom,osm-l3
+      - items:
+          - enum:
+              - qcom,sc7280-epss-l3
+              - qcom,sc8280xp-epss-l3
+              - qcom,sm8250-epss-l3
+              - qcom,sm8350-epss-l3
+          - const: qcom,epss-l3
 
   reg:
     maxItems: 1
@@ -56,7 +64,7 @@ examples:
     #define RPMH_CXO_CLK        0
 
     osm_l3: interconnect@17d41000 {
-      compatible = "qcom,sdm845-osm-l3";
+      compatible = "qcom,sdm845-osm-l3", "qcom,osm-l3";
       reg = <0x17d41000 0x1400>;
 
       clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>;
-- 
GitLab


From 2d710b00f22f3fcbc4e0189524bbf36731d9baf4 Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <quic_bjorande@quicinc.com>
Date: Thu, 10 Nov 2022 19:25:14 -0800
Subject: [PATCH 1701/4122] dt-bindings: interconnect: qcom,msm8998-bwmon: Add
 sc8280xp bwmon instances

The sc8280xp platform has two BWMON instances, one v4 and one v5. Extend
the existing qcom,msm8998-bwmon and qcom,sc7280-llcc-bwmon to describe
these.

Signed-off-by: Bjorn Andersson <quic_bjorande@quicinc.com>
Tested-by: Steev Klimaszewski <steev@kali.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Sibi Sankar <quic_sibis@quicinc.com>
Link: https://lore.kernel.org/r/20221111032515.3460-10-quic_bjorande@quicinc.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 .../devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml
index be29e0b80995e..0c720dbde36e1 100644
--- a/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml
+++ b/Documentation/devicetree/bindings/interconnect/qcom,msm8998-bwmon.yaml
@@ -25,9 +25,14 @@ properties:
       - items:
           - enum:
               - qcom,sc7280-cpu-bwmon
+              - qcom,sc8280xp-cpu-bwmon
               - qcom,sdm845-bwmon
           - const: qcom,msm8998-bwmon
       - const: qcom,msm8998-bwmon       # BWMON v4
+      - items:
+          - enum:
+              - qcom,sc8280xp-llcc-bwmon
+          - const: qcom,sc7280-llcc-bwmon
       - const: qcom,sc7280-llcc-bwmon   # BWMON v5
       - const: qcom,sdm845-llcc-bwmon   # BWMON v5
 
-- 
GitLab


From d017eeabd5092565c3dd1c8a7b00ba724c33c18f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:17 +0000
Subject: [PATCH 1702/4122] arm64: Add ID_DFR0_EL1.PerfMon values for PMUv3p7
 and IMP_DEF

Align the ID_DFR0_EL1.PerfMon values with ID_AA64DFR0_EL1.PMUver.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-2-maz@kernel.org
---
 arch/arm64/include/asm/sysreg.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index 7d301700d1a93..84f59ce1dc6db 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -698,6 +698,8 @@
 #define ID_DFR0_PERFMON_8_1		0x4
 #define ID_DFR0_PERFMON_8_4		0x5
 #define ID_DFR0_PERFMON_8_5		0x6
+#define ID_DFR0_PERFMON_8_7		0x7
+#define ID_DFR0_PERFMON_IMP_DEF		0xf
 
 #define ID_ISAR4_SWP_FRAC_SHIFT		28
 #define ID_ISAR4_PSR_M_SHIFT		24
-- 
GitLab


From bead02204e9806807bb290137b1ccabfcb4b16fd Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:18 +0000
Subject: [PATCH 1703/4122] KVM: arm64: PMU: Align chained counter
 implementation with architecture pseudocode

Ricardo recently pointed out that the PMU chained counter emulation
in KVM wasn't quite behaving like the one on actual hardware, in
the sense that a chained counter would expose an overflow on
both halves of a chained counter, while KVM would only expose the
overflow on the top half.

The difference is subtle, but significant. What does the architecture
say (DDI0087 H.a):

- Up to PMUv3p4, all counters but the cycle counter are 32bit

- A 32bit counter that overflows generates a CHAIN event on the
  adjacent counter after exposing its own overflow status

- The CHAIN event is accounted if the counter is correctly
  configured (CHAIN event selected and counter enabled)

This all means that our current implementation (which uses 64bit
perf events) prevents us from emulating this overflow on the lower half.

How to fix this? By implementing the above, to the letter.

This largely results in code deletion, removing the notions of
"counter pair", "chained counters", and "canonical counter".
The code is further restructured to make the CHAIN handling similar
to SWINC, as the two are now extremely similar in behaviour.

Reported-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Link: https://lore.kernel.org/r/20221113163832.3154370-3-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 320 ++++++++++----------------------------
 include/kvm/arm_pmu.h     |   2 -
 2 files changed, 86 insertions(+), 236 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 0003c7d37533a..57765be69bea0 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -15,16 +15,14 @@
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_vgic.h>
 
+#define PERF_ATTR_CFG1_COUNTER_64BIT	BIT(0)
+
 DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
 
 static LIST_HEAD(arm_pmus);
 static DEFINE_MUTEX(arm_pmus_lock);
 
 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
-static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx);
-static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc);
-
-#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
 
 static u32 kvm_pmu_event_mask(struct kvm *kvm)
 {
@@ -57,6 +55,11 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
 		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
 }
 
+static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
+{
+	return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX);
+}
+
 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 {
 	struct kvm_pmu *pmu;
@@ -69,91 +72,22 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 }
 
 /**
- * kvm_pmu_pmc_is_chained - determine if the pmc is chained
- * @pmc: The PMU counter pointer
- */
-static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
-{
-	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
-
-	return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
-}
-
-/**
- * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
- * @select_idx: The counter index
- */
-static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
-{
-	return select_idx & 0x1;
-}
-
-/**
- * kvm_pmu_get_canonical_pmc - obtain the canonical pmc
- * @pmc: The PMU counter pointer
- *
- * When a pair of PMCs are chained together we use the low counter (canonical)
- * to hold the underlying perf event.
- */
-static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
-{
-	if (kvm_pmu_pmc_is_chained(pmc) &&
-	    kvm_pmu_idx_is_high_counter(pmc->idx))
-		return pmc - 1;
-
-	return pmc;
-}
-static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc)
-{
-	if (kvm_pmu_idx_is_high_counter(pmc->idx))
-		return pmc - 1;
-	else
-		return pmc + 1;
-}
-
-/**
- * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
+ * kvm_pmu_get_counter_value - get PMU counter value
  * @vcpu: The vcpu pointer
  * @select_idx: The counter index
  */
-static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
-{
-	u64 eventsel, reg;
-
-	select_idx |= 0x1;
-
-	if (select_idx == ARMV8_PMU_CYCLE_IDX)
-		return false;
-
-	reg = PMEVTYPER0_EL0 + select_idx;
-	eventsel = __vcpu_sys_reg(vcpu, reg) & kvm_pmu_event_mask(vcpu->kvm);
-
-	return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
-}
-
-/**
- * kvm_pmu_get_pair_counter_value - get PMU counter value
- * @vcpu: The vcpu pointer
- * @pmc: The PMU counter pointer
- */
-static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
-					  struct kvm_pmc *pmc)
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-	u64 counter, counter_high, reg, enabled, running;
-
-	if (kvm_pmu_pmc_is_chained(pmc)) {
-		pmc = kvm_pmu_get_canonical_pmc(pmc);
-		reg = PMEVCNTR0_EL0 + pmc->idx;
+	u64 counter, reg, enabled, running;
+	struct kvm_pmu *pmu = &vcpu->arch.pmu;
+	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
 
-		counter = __vcpu_sys_reg(vcpu, reg);
-		counter_high = __vcpu_sys_reg(vcpu, reg + 1);
+	if (!kvm_vcpu_has_pmu(vcpu))
+		return 0;
 
-		counter = lower_32_bits(counter) | (counter_high << 32);
-	} else {
-		reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
-		      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
-		counter = __vcpu_sys_reg(vcpu, reg);
-	}
+	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
+		? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
+	counter = __vcpu_sys_reg(vcpu, reg);
 
 	/*
 	 * The real counter value is equal to the value of counter register plus
@@ -163,29 +97,7 @@ static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
 		counter += perf_event_read_value(pmc->perf_event, &enabled,
 						 &running);
 
-	return counter;
-}
-
-/**
- * kvm_pmu_get_counter_value - get PMU counter value
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
- */
-u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
-{
-	u64 counter;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
-
-	if (!kvm_vcpu_has_pmu(vcpu))
-		return 0;
-
-	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
-
-	if (kvm_pmu_pmc_is_chained(pmc) &&
-	    kvm_pmu_idx_is_high_counter(select_idx))
-		counter = upper_32_bits(counter);
-	else if (select_idx != ARMV8_PMU_CYCLE_IDX)
+	if (select_idx != ARMV8_PMU_CYCLE_IDX)
 		counter = lower_32_bits(counter);
 
 	return counter;
@@ -218,7 +130,6 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
  */
 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
 {
-	pmc = kvm_pmu_get_canonical_pmc(pmc);
 	if (pmc->perf_event) {
 		perf_event_disable(pmc->perf_event);
 		perf_event_release_kernel(pmc->perf_event);
@@ -236,11 +147,10 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 {
 	u64 counter, reg, val;
 
-	pmc = kvm_pmu_get_canonical_pmc(pmc);
 	if (!pmc->perf_event)
 		return;
 
-	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
+	counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
 
 	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
 		reg = PMCCNTR_EL0;
@@ -252,9 +162,6 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 
 	__vcpu_sys_reg(vcpu, reg) = val;
 
-	if (kvm_pmu_pmc_is_chained(pmc))
-		__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
-
 	kvm_pmu_release_perf_event(pmc);
 }
 
@@ -285,8 +192,6 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	for_each_set_bit(i, &mask, 32)
 		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
-
-	bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
 }
 
 /**
@@ -340,12 +245,9 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 
 		pmc = &pmu->pmc[i];
 
-		/* A change in the enable state may affect the chain state */
-		kvm_pmu_update_pmc_chained(vcpu, i);
-		kvm_pmu_create_perf_event(vcpu, i);
-
-		/* At this point, pmc must be the canonical */
-		if (pmc->perf_event) {
+		if (!pmc->perf_event) {
+			kvm_pmu_create_perf_event(vcpu, i);
+		} else {
 			perf_event_enable(pmc->perf_event);
 			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
 				kvm_debug("fail to enable perf event\n");
@@ -375,11 +277,6 @@ void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 
 		pmc = &pmu->pmc[i];
 
-		/* A change in the enable state may affect the chain state */
-		kvm_pmu_update_pmc_chained(vcpu, i);
-		kvm_pmu_create_perf_event(vcpu, i);
-
-		/* At this point, pmc must be the canonical */
 		if (pmc->perf_event)
 			perf_event_disable(pmc->perf_event);
 	}
@@ -484,6 +381,48 @@ static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
 	kvm_vcpu_kick(vcpu);
 }
 
+/*
+ * Perform an increment on any of the counters described in @mask,
+ * generating the overflow if required, and propagate it as a chained
+ * event if possible.
+ */
+static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
+				      unsigned long mask, u32 event)
+{
+	int i;
+
+	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
+		return;
+
+	/* Weed out disabled counters */
+	mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
+
+	for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
+		u64 type, reg;
+
+		/* Filter on event type */
+		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
+		type &= kvm_pmu_event_mask(vcpu->kvm);
+		if (type != event)
+			continue;
+
+		/* Increment this counter */
+		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+		reg = lower_32_bits(reg);
+		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+
+		if (reg) /* No overflow? move on */
+			continue;
+
+		/* Mark overflow */
+		__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
+
+		if (kvm_pmu_counter_can_chain(vcpu, i))
+			kvm_pmu_counter_increment(vcpu, BIT(i + 1),
+						  ARMV8_PMUV3_PERFCTR_CHAIN);
+	}
+}
+
 /**
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
@@ -514,6 +453,10 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 
 	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
 
+	if (kvm_pmu_counter_can_chain(vcpu, idx))
+		kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
+					  ARMV8_PMUV3_PERFCTR_CHAIN);
+
 	if (kvm_pmu_overflow_status(vcpu)) {
 		kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 
@@ -533,50 +476,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
  */
 void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
 {
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	int i;
-
-	if (!kvm_vcpu_has_pmu(vcpu))
-		return;
-
-	if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
-		return;
-
-	/* Weed out disabled counters */
-	val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
-
-	for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) {
-		u64 type, reg;
-
-		if (!(val & BIT(i)))
-			continue;
-
-		/* PMSWINC only applies to ... SW_INC! */
-		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
-		type &= kvm_pmu_event_mask(vcpu->kvm);
-		if (type != ARMV8_PMUV3_PERFCTR_SW_INCR)
-			continue;
-
-		/* increment this even SW_INC counter */
-		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
-		reg = lower_32_bits(reg);
-		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
-
-		if (reg) /* no overflow on the low part */
-			continue;
-
-		if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) {
-			/* increment the high counter */
-			reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1;
-			reg = lower_32_bits(reg);
-			__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg;
-			if (!reg) /* mark overflow on the high counter */
-				__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1);
-		} else {
-			/* mark overflow on low counter */
-			__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
-		}
-	}
+	kvm_pmu_counter_increment(vcpu, val, ARMV8_PMUV3_PERFCTR_SW_INCR);
 }
 
 /**
@@ -625,18 +525,11 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 {
 	struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
 	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc;
+	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
 	struct perf_event *event;
 	struct perf_event_attr attr;
 	u64 eventsel, counter, reg, data;
 
-	/*
-	 * For chained counters the event type and filtering attributes are
-	 * obtained from the low/even counter. We also use this counter to
-	 * determine if the event is enabled/disabled.
-	 */
-	pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
-
 	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
 	data = __vcpu_sys_reg(vcpu, reg);
@@ -647,8 +540,12 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	else
 		eventsel = data & kvm_pmu_event_mask(vcpu->kvm);
 
-	/* Software increment event doesn't need to be backed by a perf event */
-	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
+	/*
+	 * Neither SW increment nor chained events need to be backed
+	 * by a perf event.
+	 */
+	if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR ||
+	    eventsel == ARMV8_PMUV3_PERFCTR_CHAIN)
 		return;
 
 	/*
@@ -670,30 +567,21 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	attr.exclude_host = 1; /* Don't count host events */
 	attr.config = eventsel;
 
-	counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
+	counter = kvm_pmu_get_counter_value(vcpu, select_idx);
 
-	if (kvm_pmu_pmc_is_chained(pmc)) {
-		/**
-		 * The initial sample period (overflow count) of an event. For
-		 * chained counters we only support overflow interrupts on the
-		 * high counter.
-		 */
+	/*
+	 * If counting with a 64bit counter, advertise it to the perf
+	 * code, carefully dealing with the initial sample period.
+	 */
+	if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
+		attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
 		attr.sample_period = (-counter) & GENMASK(63, 0);
-		attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
-
-		event = perf_event_create_kernel_counter(&attr, -1, current,
-							 kvm_pmu_perf_overflow,
-							 pmc + 1);
 	} else {
-		/* The initial sample period (overflow count) of an event. */
-		if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
-			attr.sample_period = (-counter) & GENMASK(63, 0);
-		else
-			attr.sample_period = (-counter) & GENMASK(31, 0);
+		attr.sample_period = (-counter) & GENMASK(31, 0);
+	}
 
-		event = perf_event_create_kernel_counter(&attr, -1, current,
+	event = perf_event_create_kernel_counter(&attr, -1, current,
 						 kvm_pmu_perf_overflow, pmc);
-	}
 
 	if (IS_ERR(event)) {
 		pr_err_once("kvm: pmu event creation failed %ld\n",
@@ -704,41 +592,6 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	pmc->perf_event = event;
 }
 
-/**
- * kvm_pmu_update_pmc_chained - update chained bitmap
- * @vcpu: The vcpu pointer
- * @select_idx: The number of selected counter
- *
- * Update the chained bitmap based on the event type written in the
- * typer register and the enable state of the odd register.
- */
-static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
-{
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc;
-	bool new_state, old_state;
-
-	old_state = kvm_pmu_pmc_is_chained(pmc);
-	new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) &&
-		    kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1);
-
-	if (old_state == new_state)
-		return;
-
-	canonical_pmc = kvm_pmu_get_canonical_pmc(pmc);
-	kvm_pmu_stop_counter(vcpu, canonical_pmc);
-	if (new_state) {
-		/*
-		 * During promotion from !chained to chained we must ensure
-		 * the adjacent counter is stopped and its event destroyed
-		 */
-		kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc));
-		set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
-		return;
-	}
-	clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
-}
-
 /**
  * kvm_pmu_set_counter_event_type - set selected counter to monitor some event
  * @vcpu: The vcpu pointer
@@ -766,7 +619,6 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 
 	__vcpu_sys_reg(vcpu, reg) = data & mask;
 
-	kvm_pmu_update_pmc_chained(vcpu, select_idx);
 	kvm_pmu_create_perf_event(vcpu, select_idx);
 }
 
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index c0b868ce6a8f2..96b192139a23a 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -11,7 +11,6 @@
 #include <asm/perf_event.h>
 
 #define ARMV8_PMU_CYCLE_IDX		(ARMV8_PMU_MAX_COUNTERS - 1)
-#define ARMV8_PMU_MAX_COUNTER_PAIRS	((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
 
 #ifdef CONFIG_HW_PERF_EVENTS
 
@@ -29,7 +28,6 @@ struct kvm_pmu {
 	struct irq_work overflow_work;
 	struct kvm_pmu_events events;
 	struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
-	DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
 	int irq_num;
 	bool created;
 	bool irq_level;
-- 
GitLab


From c423f01633eb948ba6f8c98872b4119685e007fb Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Sat, 24 Sep 2022 09:50:43 +0800
Subject: [PATCH 1704/4122] interconnect: qcom: icc-rpm: Remove redundant
 dev_err call

devm_ioremap_resource() prints error message in itself. Remove the
dev_err call to avoid redundant error message.

Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20220924015043.25130-1-shangxiaojing@huawei.com
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/icc-rpm.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
index 39e43b9575998..ba6781f54ab73 100644
--- a/drivers/interconnect/qcom/icc-rpm.c
+++ b/drivers/interconnect/qcom/icc-rpm.c
@@ -477,11 +477,8 @@ int qnoc_probe(struct platform_device *pdev)
 		}
 
 		mmio = devm_ioremap_resource(dev, res);
-
-		if (IS_ERR(mmio)) {
-			dev_err(dev, "Cannot ioremap interconnect bus resource\n");
+		if (IS_ERR(mmio))
 			return PTR_ERR(mmio);
-		}
 
 		qp->regmap = devm_regmap_init_mmio(dev, mmio, desc->regmap_cfg);
 		if (IS_ERR(qp->regmap)) {
-- 
GitLab


From f24227a640344f894522045f74bb2decbdc4f55e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Oct 2022 11:48:46 -0400
Subject: [PATCH 1705/4122] interconnect: qcom: sc7180: fix dropped const of
 qcom_icc_bcm

Pointers to struct qcom_icc_bcm are const, but the change was dropped
during merge.

Fixes: 016fca59f95f ("Merge branch 'icc-const' into icc-next")
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221027154848.293523-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/sc7180.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c
index 35cd448efdfbe..82d5e8a8c19ea 100644
--- a/drivers/interconnect/qcom/sc7180.c
+++ b/drivers/interconnect/qcom/sc7180.c
@@ -369,7 +369,7 @@ static const struct qcom_icc_desc sc7180_gem_noc = {
 	.num_bcms = ARRAY_SIZE(gem_noc_bcms),
 };
 
-static struct qcom_icc_bcm *mc_virt_bcms[] = {
+static struct qcom_icc_bcm * const mc_virt_bcms[] = {
 	&bcm_acv,
 	&bcm_mc0,
 };
-- 
GitLab


From 7870c7076aa07d9caaf53652d6b5a3cd74b1d157 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Oct 2022 11:48:47 -0400
Subject: [PATCH 1706/4122] interconnect: qcom: sc7180: drop double space

Drop double white-space.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221027154848.293523-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/sc7180.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/qcom/sc7180.c b/drivers/interconnect/qcom/sc7180.c
index 82d5e8a8c19ea..ef4e13fb49831 100644
--- a/drivers/interconnect/qcom/sc7180.c
+++ b/drivers/interconnect/qcom/sc7180.c
@@ -443,7 +443,7 @@ static struct qcom_icc_node * const qup_virt_nodes[] = {
 	[SLAVE_QUP_CORE_1] = &qup_core_slave_2,
 };
 
-static const  struct qcom_icc_desc sc7180_qup_virt = {
+static const struct qcom_icc_desc sc7180_qup_virt = {
 	.nodes = qup_virt_nodes,
 	.num_nodes = ARRAY_SIZE(qup_virt_nodes),
 	.bcms = qup_virt_bcms,
-- 
GitLab


From c1c537cf30bc539d8f6fa4ac315a8def23fd4ae8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Oct 2022 11:48:48 -0400
Subject: [PATCH 1707/4122] interconnect: qcom: sc8180x: constify pointer to
 qcom_icc_node

Pointers to struct qcom_icc_node are const.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221027154848.293523-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Georgi Djakov <djakov@kernel.org>
---
 drivers/interconnect/qcom/sc8180x.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/interconnect/qcom/sc8180x.c b/drivers/interconnect/qcom/sc8180x.c
index 8e32ca958824c..0f515bf10bd71 100644
--- a/drivers/interconnect/qcom/sc8180x.c
+++ b/drivers/interconnect/qcom/sc8180x.c
@@ -1889,7 +1889,7 @@ static struct qcom_icc_bcm * const qup_virt_bcms[] = {
 	&bcm_qup0,
 };
 
-static struct qcom_icc_node *qup_virt_nodes[] = {
+static struct qcom_icc_node * const qup_virt_nodes[] = {
 	[MASTER_QUP_CORE_0] = &mas_qup_core_0,
 	[MASTER_QUP_CORE_1] = &mas_qup_core_1,
 	[MASTER_QUP_CORE_2] = &mas_qup_core_2,
-- 
GitLab


From acdd8a4e13a008a83c6da88bb53eecbecda9714c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:19 +0000
Subject: [PATCH 1708/4122] KVM: arm64: PMU: Always advertise the CHAIN event

Even when the underlying HW doesn't offer the CHAIN event
(which happens with QEMU), we can always support it as we're
in control of the counter overflow.

Always advertise the event via PMCEID0_EL0.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-4-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 57765be69bea0..69b67ab3c4bfa 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -701,6 +701,8 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 
 	if (!pmceid1) {
 		val = read_sysreg(pmceid0_el0);
+		/* always support CHAIN */
+		val |= BIT(ARMV8_PMUV3_PERFCTR_CHAIN);
 		base = 0;
 	} else {
 		val = read_sysreg(pmceid1_el0);
-- 
GitLab


From c82d28cbf1d4f9fe174041b4485c635cb970afa7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:20 +0000
Subject: [PATCH 1709/4122] KVM: arm64: PMU: Distinguish between 64bit counter
 and 64bit overflow

The PMU architecture makes a subtle difference between a 64bit
counter and a counter that has a 64bit overflow. This is for example
the case of the cycle counter, which can generate an overflow on
a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being
done on 64 bits.

Use this distinction in the few cases where it matters in the code,
as we will reuse this with PMUv3p5 long counters.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-5-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 43 ++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 69b67ab3c4bfa..d050143326b58 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
  * @select_idx: The counter index
  */
 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
+{
+	return (select_idx == ARMV8_PMU_CYCLE_IDX);
+}
+
+static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx)
 {
 	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
 		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
@@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
 
 static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
 {
-	return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX);
+	return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX &&
+		!kvm_pmu_idx_has_64bit_overflow(vcpu, idx));
 }
 
 static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
@@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 		counter += perf_event_read_value(pmc->perf_event, &enabled,
 						 &running);
 
-	if (select_idx != ARMV8_PMU_CYCLE_IDX)
+	if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
 		counter = lower_32_bits(counter);
 
 	return counter;
@@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 	}
 }
 
+/* Compute the sample period for a given counter value */
+static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter)
+{
+	u64 val;
+
+	if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
+		if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx))
+			val = -(counter & GENMASK(31, 0));
+		else
+			val = (-counter) & GENMASK(63, 0);
+	} else {
+		val = (-counter) & GENMASK(31, 0);
+	}
+
+	return val;
+}
+
 /**
  * When the perf event overflows, set the overflow status and inform the vcpu.
  */
@@ -442,10 +465,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 	 * Reset the sample period to the architectural limit,
 	 * i.e. the point where the counter overflows.
 	 */
-	period = -(local64_read(&perf_event->count));
-
-	if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
-		period &= GENMASK(31, 0);
+	period = compute_period(vcpu, idx, local64_read(&perf_event->count));
 
 	local64_set(&perf_event->hw.period_left, 0);
 	perf_event->attr.sample_period = period;
@@ -571,14 +591,13 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 
 	/*
 	 * If counting with a 64bit counter, advertise it to the perf
-	 * code, carefully dealing with the initial sample period.
+	 * code, carefully dealing with the initial sample period
+	 * which also depends on the overflow.
 	 */
-	if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
+	if (kvm_pmu_idx_is_64bit(vcpu, select_idx))
 		attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
-		attr.sample_period = (-counter) & GENMASK(63, 0);
-	} else {
-		attr.sample_period = (-counter) & GENMASK(31, 0);
-	}
+
+	attr.sample_period = compute_period(vcpu, select_idx, counter);
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
 						 kvm_pmu_perf_overflow, pmc);
-- 
GitLab


From 001d85bd6c039d3662a4f33a5d212ef3e0438b27 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:21 +0000
Subject: [PATCH 1710/4122] KVM: arm64: PMU: Narrow the overflow checking when
 required

For 64bit counters that overflow on a 32bit boundary, make
sure we only check the bottom 32bit to generate a CHAIN event.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Link: https://lore.kernel.org/r/20221113163832.3154370-6-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index d050143326b58..9e6bc7edc4de0 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -417,7 +417,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 		reg = lower_32_bits(reg);
 		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 
-		if (reg) /* No overflow? move on */
+		/* No overflow? move on */
+		if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg))
 			continue;
 
 		/* Mark overflow */
-- 
GitLab


From 0f1e172b54f7574ca6aa46b851b332896add955f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:22 +0000
Subject: [PATCH 1711/4122] KVM: arm64: PMU: Only narrow counters that are not
 64bit wide

The current PMU emulation sometimes narrows counters to 32bit
if the counter isn't the cycle counter. As this is going to
change with PMUv3p5 where the counters are all 64bit, fix
the couple of cases where this happens unconditionally.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Reviewed-by: Reiji Watanabe <reijiw@google.com>
Link: https://lore.kernel.org/r/20221113163832.3154370-7-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 9e6bc7edc4de0..1fab889dbc74f 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -151,20 +151,17 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
  */
 static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 {
-	u64 counter, reg, val;
+	u64 reg, val;
 
 	if (!pmc->perf_event)
 		return;
 
-	counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
+	val = kvm_pmu_get_counter_value(vcpu, pmc->idx);
 
-	if (pmc->idx == ARMV8_PMU_CYCLE_IDX) {
+	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 		reg = PMCCNTR_EL0;
-		val = counter;
-	} else {
+	else
 		reg = PMEVCNTR0_EL0 + pmc->idx;
-		val = lower_32_bits(counter);
-	}
 
 	__vcpu_sys_reg(vcpu, reg) = val;
 
@@ -414,7 +411,8 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 
 		/* Increment this counter */
 		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
-		reg = lower_32_bits(reg);
+		if (!kvm_pmu_idx_is_64bit(vcpu, i))
+			reg = lower_32_bits(reg);
 		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
 
 		/* No overflow? move on */
-- 
GitLab


From 0cb9c3c87a9d3287eaf353936e6846d885102439 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:23 +0000
Subject: [PATCH 1712/4122] KVM: arm64: PMU: Add counter_index_to_*reg()
 helpers

In order to reduce the boilerplate code, add two helpers returning
the counter register index (resp. the event register) in the vcpu
register file from the counter index.

Reviewed-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-8-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 1fab889dbc74f..faab0f57a45de 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -77,6 +77,16 @@ static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
 	return container_of(vcpu_arch, struct kvm_vcpu, arch);
 }
 
+static u32 counter_index_to_reg(u64 idx)
+{
+	return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + idx;
+}
+
+static u32 counter_index_to_evtreg(u64 idx)
+{
+	return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
+}
+
 /**
  * kvm_pmu_get_counter_value - get PMU counter value
  * @vcpu: The vcpu pointer
@@ -91,8 +101,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 	if (!kvm_vcpu_has_pmu(vcpu))
 		return 0;
 
-	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
-		? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
+	reg = counter_index_to_reg(select_idx);
 	counter = __vcpu_sys_reg(vcpu, reg);
 
 	/*
@@ -122,8 +131,7 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 	if (!kvm_vcpu_has_pmu(vcpu))
 		return;
 
-	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
-	      ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
+	reg = counter_index_to_reg(select_idx);
 	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
 
 	/* Recreate the perf event to reflect the updated sample_period */
@@ -158,10 +166,7 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
 
 	val = kvm_pmu_get_counter_value(vcpu, pmc->idx);
 
-	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
-		reg = PMCCNTR_EL0;
-	else
-		reg = PMEVCNTR0_EL0 + pmc->idx;
+	reg = counter_index_to_reg(pmc->idx);
 
 	__vcpu_sys_reg(vcpu, reg) = val;
 
@@ -404,16 +409,16 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 		u64 type, reg;
 
 		/* Filter on event type */
-		type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i);
+		type = __vcpu_sys_reg(vcpu, counter_index_to_evtreg(i));
 		type &= kvm_pmu_event_mask(vcpu->kvm);
 		if (type != event)
 			continue;
 
 		/* Increment this counter */
-		reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1;
+		reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
 		if (!kvm_pmu_idx_is_64bit(vcpu, i))
 			reg = lower_32_bits(reg);
-		__vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg;
+		__vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
 
 		/* No overflow? move on */
 		if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg))
@@ -549,8 +554,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	struct perf_event_attr attr;
 	u64 eventsel, counter, reg, data;
 
-	reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
-	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
+	reg = counter_index_to_evtreg(select_idx);
 	data = __vcpu_sys_reg(vcpu, reg);
 
 	kvm_pmu_stop_counter(vcpu, pmc);
@@ -632,8 +636,7 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
 	mask |= kvm_pmu_event_mask(vcpu->kvm);
 
-	reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
-	      ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
+	reg = counter_index_to_evtreg(select_idx);
 
 	__vcpu_sys_reg(vcpu, reg) = data & mask;
 
-- 
GitLab


From 9917264d74d9063341968a8e071266358496777b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:24 +0000
Subject: [PATCH 1713/4122] KVM: arm64: PMU: Simplify setting a counter to a
 specific value

kvm_pmu_set_counter_value() is pretty odd, as it tries to update
the counter value while taking into account the value that is
currently held by the running perf counter.

This is not only complicated, this is quite wrong. Nowhere in
the architecture is it said that the counter would be offset
by something that is pending. The counter should be updated
with the value set by SW, and start counting from there if
required.

Remove the odd computation and just assign the provided value
after having released the perf event (which is then restarted).

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-9-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index faab0f57a45de..ea0c8411641fd 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -23,6 +23,7 @@ static LIST_HEAD(arm_pmus);
 static DEFINE_MUTEX(arm_pmus_lock);
 
 static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
 
 static u32 kvm_pmu_event_mask(struct kvm *kvm)
 {
@@ -131,8 +132,10 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 	if (!kvm_vcpu_has_pmu(vcpu))
 		return;
 
+	kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]);
+
 	reg = counter_index_to_reg(select_idx);
-	__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
+	__vcpu_sys_reg(vcpu, reg) = val;
 
 	/* Recreate the perf event to reflect the updated sample_period */
 	kvm_pmu_create_perf_event(vcpu, select_idx);
-- 
GitLab


From c4b33d28ea51c7d194b19a41c96a4f973cc0a280 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 9 Nov 2022 10:59:05 -0800
Subject: [PATCH 1714/4122] KVM: x86/mmu: Split huge pages mapped by the TDP
 MMU on fault

Now that the TDP MMU has a mechanism to split huge pages, use it in the
fault path when a huge page needs to be replaced with a mapping at a
lower level.

This change reduces the negative performance impact of NX HugePages.
Prior to this change if a vCPU executed from a huge page and NX
HugePages was enabled, the vCPU would take a fault, zap the huge page,
and mapping the faulting address at 4KiB with execute permissions
enabled. The rest of the memory would be left *unmapped* and have to be
faulted back in by the guest upon access (read, write, or execute). If
guest is backed by 1GiB, a single execute instruction can zap an entire
GiB of its physical address space.

For example, it can take a VM longer to execute from its memory than to
populate that memory in the first place:

$ ./execute_perf_test -s anonymous_hugetlb_1gb -v96

Populating memory             : 2.748378795s
Executing from memory         : 2.899670885s

With this change, such faults split the huge page instead of zapping it,
which avoids the non-present faults on the rest of the huge page:

$ ./execute_perf_test -s anonymous_hugetlb_1gb -v96

Populating memory             : 2.729544474s
Executing from memory         : 0.111965688s   <---

This change also reduces the performance impact of dirty logging when
eager_page_split=N. eager_page_split=N (abbreviated "eps=N" below) can
be desirable for read-heavy workloads, as it avoids allocating memory to
split huge pages that are never written and avoids increasing the TLB
miss cost on reads of those pages.

             | Config: ept=Y, tdp_mmu=Y, 5% writes           |
             | Iteration 1 dirty memory time                 |
             | --------------------------------------------- |
vCPU Count   | eps=N (Before) | eps=N (After) | eps=Y        |
------------ | -------------- | ------------- | ------------ |
2            | 0.332305091s   | 0.019615027s  | 0.006108211s |
4            | 0.353096020s   | 0.019452131s  | 0.006214670s |
8            | 0.453938562s   | 0.019748246s  | 0.006610997s |
16           | 0.719095024s   | 0.019972171s  | 0.007757889s |
32           | 1.698727124s   | 0.021361615s  | 0.012274432s |
64           | 2.630673582s   | 0.031122014s  | 0.016994683s |
96           | 3.016535213s   | 0.062608739s  | 0.044760838s |

Eager page splitting remains beneficial for write-heavy workloads, but
the gap is now reduced.

             | Config: ept=Y, tdp_mmu=Y, 100% writes         |
             | Iteration 1 dirty memory time                 |
             | --------------------------------------------- |
vCPU Count   | eps=N (Before) | eps=N (After) | eps=Y        |
------------ | -------------- | ------------- | ------------ |
2            | 0.317710329s   | 0.296204596s  | 0.058689782s |
4            | 0.337102375s   | 0.299841017s  | 0.060343076s |
8            | 0.386025681s   | 0.297274460s  | 0.060399702s |
16           | 0.791462524s   | 0.298942578s  | 0.062508699s |
32           | 1.719646014s   | 0.313101996s  | 0.075984855s |
64           | 2.527973150s   | 0.455779206s  | 0.079789363s |
96           | 2.681123208s   | 0.673778787s  | 0.165386739s |

Further study is needed to determine if the remaining gap is acceptable
for customer workloads or if eager_page_split=N still requires a-priori
knowledge of the VM workload, especially when considering these costs
extrapolated out to large VMs with e.g. 416 vCPUs and 12TB RAM.

Signed-off-by: David Matlack <dmatlack@google.com>
Reviewed-by: Mingwei Zhang <mizhang@google.com>
Message-Id: <20221109185905.486172-3-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 73 ++++++++++++++++++--------------------
 1 file changed, 35 insertions(+), 38 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4e5b3ae824c16..e085967754276 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1146,6 +1146,9 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
 	return 0;
 }
 
+static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
+				   struct kvm_mmu_page *sp, bool shared);
+
 /*
  * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
  * page tables and SPTEs to translate the faulting guest physical address.
@@ -1171,49 +1174,42 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		if (iter.level == fault->goal_level)
 			break;
 
-		/*
-		 * If there is an SPTE mapping a large page at a higher level
-		 * than the target, that SPTE must be cleared and replaced
-		 * with a non-leaf SPTE.
-		 */
+		/* Step down into the lower level page table if it exists. */
 		if (is_shadow_present_pte(iter.old_spte) &&
-		    is_large_pte(iter.old_spte)) {
-			if (tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
-				break;
+		    !is_large_pte(iter.old_spte))
+			continue;
 
-			/*
-			 * The iter must explicitly re-read the spte here
-			 * because the new value informs the !present
-			 * path below.
-			 */
-			iter.old_spte = kvm_tdp_mmu_read_spte(iter.sptep);
-		}
+		/*
+		 * If SPTE has been frozen by another thread, just give up and
+		 * retry, avoiding unnecessary page table allocation and free.
+		 */
+		if (is_removed_spte(iter.old_spte))
+			break;
 
-		if (!is_shadow_present_pte(iter.old_spte)) {
-			/*
-			 * If SPTE has been frozen by another thread, just
-			 * give up and retry, avoiding unnecessary page table
-			 * allocation and free.
-			 */
-			if (is_removed_spte(iter.old_spte))
-				break;
+		/*
+		 * The SPTE is either non-present or points to a huge page that
+		 * needs to be split.
+		 */
+		sp = tdp_mmu_alloc_sp(vcpu);
+		tdp_mmu_init_child_sp(sp, &iter);
 
-			sp = tdp_mmu_alloc_sp(vcpu);
-			tdp_mmu_init_child_sp(sp, &iter);
+		sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
 
-			sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
+		if (is_shadow_present_pte(iter.old_spte))
+			ret = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
+		else
+			ret = tdp_mmu_link_sp(kvm, &iter, sp, true);
 
-			if (tdp_mmu_link_sp(kvm, &iter, sp, true)) {
-				tdp_mmu_free_sp(sp);
-				break;
-			}
+		if (ret) {
+			tdp_mmu_free_sp(sp);
+			break;
+		}
 
-			if (fault->huge_page_disallowed &&
-			    fault->req_level >= iter.level) {
-				spin_lock(&kvm->arch.tdp_mmu_pages_lock);
-				track_possible_nx_huge_page(kvm, sp);
-				spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
-			}
+		if (fault->huge_page_disallowed &&
+		    fault->req_level >= iter.level) {
+			spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+			track_possible_nx_huge_page(kvm, sp);
+			spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
 		}
 	}
 
@@ -1477,6 +1473,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
 	return sp;
 }
 
+/* Note, the caller is responsible for initializing @sp. */
 static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
 				   struct kvm_mmu_page *sp, bool shared)
 {
@@ -1484,8 +1481,6 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
 	const int level = iter->level;
 	int ret, i;
 
-	tdp_mmu_init_child_sp(sp, iter);
-
 	/*
 	 * No need for atomics when writing to sp->spt since the page table has
 	 * not been linked in yet and thus is not reachable from any other CPU.
@@ -1561,6 +1556,8 @@ retry:
 				continue;
 		}
 
+		tdp_mmu_init_child_sp(sp, &iter);
+
 		if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared))
 			goto retry;
 
-- 
GitLab


From 63d28a25e04cb48e6bd15141506645ac99d9f8b2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 17 Nov 2022 11:05:51 -0500
Subject: [PATCH 1715/4122] KVM: x86/mmu: simplify kvm_tdp_mmu_map flow when
 guest has to retry

A removed SPTE is never present, hence the "if" in kvm_tdp_mmu_map
only fails in the exact same conditions that the earlier loop
tested in order to issue a  "break". So, instead of checking twice the
condition (upper level SPTEs could not be created or was frozen), just
exit the loop with a goto---the usual poor-man C replacement for RAII
early returns.

While at it, do not use the "ret" variable for return values of
functions that do not return a RET_PF_* enum.  This is clearer
and also makes it possible to initialize ret to RET_PF_RETRY.

Suggested-by: Robert Hoo <robert.hu@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/tdp_mmu.c | 40 ++++++++++++++++++--------------------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index e085967754276..771210ce51811 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -1159,7 +1159,7 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	struct kvm *kvm = vcpu->kvm;
 	struct tdp_iter iter;
 	struct kvm_mmu_page *sp;
-	int ret;
+	int ret = RET_PF_RETRY;
 
 	kvm_mmu_hugepage_adjust(vcpu, fault);
 
@@ -1168,23 +1168,25 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 	rcu_read_lock();
 
 	tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
+		int r;
+
 		if (fault->nx_huge_page_workaround_enabled)
 			disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
 
 		if (iter.level == fault->goal_level)
 			break;
 
-		/* Step down into the lower level page table if it exists. */
-		if (is_shadow_present_pte(iter.old_spte) &&
-		    !is_large_pte(iter.old_spte))
-			continue;
-
 		/*
 		 * If SPTE has been frozen by another thread, just give up and
 		 * retry, avoiding unnecessary page table allocation and free.
 		 */
 		if (is_removed_spte(iter.old_spte))
-			break;
+			goto retry;
+
+		/* Step down into the lower level page table if it exists. */
+		if (is_shadow_present_pte(iter.old_spte) &&
+		    !is_large_pte(iter.old_spte))
+			continue;
 
 		/*
 		 * The SPTE is either non-present or points to a huge page that
@@ -1196,13 +1198,17 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
 
 		if (is_shadow_present_pte(iter.old_spte))
-			ret = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
+			r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
 		else
-			ret = tdp_mmu_link_sp(kvm, &iter, sp, true);
+			r = tdp_mmu_link_sp(kvm, &iter, sp, true);
 
-		if (ret) {
+		/*
+		 * Also force the guest to retry the access if the upper level SPTEs
+		 * aren't in place.
+		 */
+		if (r) {
 			tdp_mmu_free_sp(sp);
-			break;
+			goto retry;
 		}
 
 		if (fault->huge_page_disallowed &&
@@ -1213,18 +1219,10 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
 		}
 	}
 
-	/*
-	 * Force the guest to retry the access if the upper level SPTEs aren't
-	 * in place, or if the target leaf SPTE is frozen by another CPU.
-	 */
-	if (iter.level != fault->goal_level || is_removed_spte(iter.old_spte)) {
-		rcu_read_unlock();
-		return RET_PF_RETRY;
-	}
-
 	ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
-	rcu_read_unlock();
 
+retry:
+	rcu_read_unlock();
 	return ret;
 }
 
-- 
GitLab


From eb298605705a5c6b3d61c754e3c80ac8ef8e8724 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Thu, 3 Nov 2022 13:44:21 -0700
Subject: [PATCH 1716/4122] KVM: x86/mmu: Do not recover dirty-tracked NX Huge
 Pages

Do not recover (i.e. zap) an NX Huge Page that is being dirty tracked,
as it will just be faulted back in at the same 4KiB granularity when
accessed by a vCPU. This may need to be changed if KVM ever supports
2MiB (or larger) dirty tracking granularity, or faulting huge pages
during dirty tracking for reads/executes. However for now, these zaps
are entirely wasteful.

In order to check if this commit increases the CPU usage of the NX
recovery worker thread I used a modified version of execute_perf_test
[1] that supports splitting guest memory into multiple slots and reports
/proc/pid/schedstat:se.sum_exec_runtime for the NX recovery worker just
before tearing down the VM. The goal was to force a large number of NX
Huge Page recoveries and see if the recovery worker used any more CPU.

Test Setup:

  echo 1000 > /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
  echo 10 > /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio

Test Command:

  ./execute_perf_test -v64 -s anonymous_hugetlb_1gb -x 16 -o

        | kvm-nx-lpage-re:se.sum_exec_runtime      |
        | ---------------------------------------- |
Run     | Before             | After               |
------- | ------------------ | ------------------- |
1       | 730.084105         | 724.375314          |
2       | 728.751339         | 740.581988          |
3       | 736.264720         | 757.078163          |

Comparing the median results, this commit results in about a 1% increase
CPU usage of the NX recovery worker when testing a VM with 16 slots.
However, the effect is negligible with the default halving time of NX
pages, which is 1 hour rather than 10 seconds given by period_ms = 1000,
ratio = 10.

[1] https://lore.kernel.org/kvm/20221019234050.3919566-2-dmatlack@google.com/

Signed-off-by: David Matlack <dmatlack@google.com>
Message-Id: <20221103204421.1146958-1-dmatlack@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 93c389eaf4711..cfff74685a259 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6841,6 +6841,7 @@ static int set_nx_huge_pages_recovery_param(const char *val, const struct kernel
 static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 {
 	unsigned long nx_lpage_splits = kvm->stat.nx_lpage_splits;
+	struct kvm_memory_slot *slot;
 	int rcu_idx;
 	struct kvm_mmu_page *sp;
 	unsigned int ratio;
@@ -6875,7 +6876,21 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 				      struct kvm_mmu_page,
 				      possible_nx_huge_page_link);
 		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
-		if (is_tdp_mmu_page(sp))
+		WARN_ON_ONCE(!sp->role.direct);
+
+		slot = gfn_to_memslot(kvm, sp->gfn);
+		WARN_ON_ONCE(!slot);
+
+		/*
+		 * Unaccount and do not attempt to recover any NX Huge Pages
+		 * that are being dirty tracked, as they would just be faulted
+		 * back in as 4KiB pages. The NX Huge Pages in this slot will be
+		 * recovered, along with all the other huge pages in the slot,
+		 * when dirty logging is disabled.
+		 */
+		if (slot && kvm_slot_dirty_track_enabled(slot))
+			unaccount_nx_huge_page(kvm, sp);
+		else if (is_tdp_mmu_page(sp))
 			flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
 		else
 			kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
-- 
GitLab


From effae0e3d9e1139d583e9b5d050f4f948825b8a3 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 15 Nov 2022 10:51:33 +0000
Subject: [PATCH 1717/4122] riscv: Kconfig: Enable cpufreq kconfig menu

Enable cpufreq kconfig menu for RISC-V.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221115105135.1180490-2-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 7cd981f96f48b..3b41165a8b100 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -684,6 +684,8 @@ menu "CPU Power Management"
 
 source "drivers/cpuidle/Kconfig"
 
+source "drivers/cpufreq/Kconfig"
+
 endmenu # "CPU Power Management"
 
 source "arch/riscv/kvm/Kconfig"
-- 
GitLab


From 729c287e9f7481d630b69c73960e2ac990cd04fc Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 8 Nov 2022 10:36:20 -0800
Subject: [PATCH 1718/4122] scsi: lpfc: Remove redundant pointer 'lp'

Pointer lp is being initialized and incremented but the result is never
read. The pointer is redundant and can be removed.

Once lp is removed, pcmd is not longer used. So remove pcmd as well

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: James Smart <jsmart2021@gmail.com>
Link: https://lore.kernel.org/r/20221108183620.93978-1-jsmart2021@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 2b03210264bb0..9326340d4226a 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -9172,15 +9172,10 @@ static int
 lpfc_els_rcv_farpr(struct lpfc_vport *vport, struct lpfc_iocbq *cmdiocb,
 		   struct lpfc_nodelist  *ndlp)
 {
-	struct lpfc_dmabuf *pcmd;
-	uint32_t *lp;
 	uint32_t did;
 
 	did = get_job_els_rsp64_did(vport->phba, cmdiocb);
-	pcmd = cmdiocb->cmd_dmabuf;
-	lp = (uint32_t *)pcmd->virt;
 
-	lp++;
 	/* FARP-RSP received from DID <did> */
 	lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
 			 "0600 FARP-RSP received from DID x%x\n", did);
-- 
GitLab


From b27ac2faa2fc0b2677cf1cbd270af734a1f5fd95 Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microchip.com>
Date: Tue, 8 Nov 2022 13:21:38 -0600
Subject: [PATCH 1719/4122] scsi: smartpqi: Convert to host_tagset

Add support for host_tagset.

Also move the reserved command slots to the end of the pool to eliminate an
addition operation for every SCSI request.

This patch was originally authored by Hannes Reinecke here:

Link: https://lore.kernel.org/linux-block/20191126131009.71726-8-hare@suse.de/

But we NAKed this patch because we wanted to fully test multipath
failover operations.

Suggested-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Mahesh Rajashekhara <Mahesh.Rajashekhara@microchip.com>
Reviewed-by: Mike McGowen <Mike.McGowen@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793529811.322537.3294617845448383948.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi.h      |  3 --
 drivers/scsi/smartpqi/smartpqi_init.c | 68 +++++++++++++++++----------
 2 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index e550b12e525a1..8cdf4d2476dd6 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1307,7 +1307,6 @@ struct pqi_ctrl_info {
 	dma_addr_t	error_buffer_dma_handle;
 	size_t		sg_chain_buffer_length;
 	unsigned int	num_queue_groups;
-	u16		max_hw_queue_index;
 	u16		num_elements_per_iq;
 	u16		num_elements_per_oq;
 	u16		max_inbound_iu_length_per_firmware;
@@ -1369,8 +1368,6 @@ struct pqi_ctrl_info {
 	u64		sas_address;
 
 	struct pqi_io_request *io_request_pool;
-	u16		next_io_request_slot;
-
 	struct pqi_event events[PQI_NUM_SUPPORTED_EVENTS];
 	struct work_struct event_work;
 
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index b971fbe3b3a17..651dca535b3bc 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -678,23 +678,36 @@ static inline void pqi_reinit_io_request(struct pqi_io_request *io_request)
 	io_request->raid_bypass = false;
 }
 
-static struct pqi_io_request *pqi_alloc_io_request(
-	struct pqi_ctrl_info *ctrl_info)
+static inline struct pqi_io_request *pqi_alloc_io_request(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd)
 {
 	struct pqi_io_request *io_request;
-	u16 i = ctrl_info->next_io_request_slot;	/* benignly racy */
+	u16 i;
 
-	while (1) {
+	if (scmd) { /* SML I/O request */
+		u32 blk_tag = blk_mq_unique_tag(scsi_cmd_to_rq(scmd));
+
+		i = blk_mq_unique_tag_to_tag(blk_tag);
 		io_request = &ctrl_info->io_request_pool[i];
-		if (atomic_inc_return(&io_request->refcount) == 1)
-			break;
-		atomic_dec(&io_request->refcount);
-		i = (i + 1) % ctrl_info->max_io_slots;
+		if (atomic_inc_return(&io_request->refcount) > 1) {
+			atomic_dec(&io_request->refcount);
+			return NULL;
+		}
+	} else { /* IOCTL or driver internal request */
+		/*
+		 * benignly racy - may have to wait for an open slot.
+		 * command slot range is scsi_ml_can_queue -
+		 *         [scsi_ml_can_queue + (PQI_RESERVED_IO_SLOTS - 1)]
+		 */
+		i = 0;
+		while (1) {
+			io_request = &ctrl_info->io_request_pool[ctrl_info->scsi_ml_can_queue + i];
+			if (atomic_inc_return(&io_request->refcount) == 1)
+				break;
+			atomic_dec(&io_request->refcount);
+			i = (i + 1) % PQI_RESERVED_IO_SLOTS;
+		}
 	}
 
-	/* benignly racy */
-	ctrl_info->next_io_request_slot = (i + 1) % ctrl_info->max_io_slots;
-
 	pqi_reinit_io_request(io_request);
 
 	return io_request;
@@ -4586,7 +4599,7 @@ static int pqi_submit_raid_request_synchronous(struct pqi_ctrl_info *ctrl_info,
 		goto out;
 	}
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, NULL);
 
 	put_unaligned_le16(io_request->index,
 		&(((struct pqi_raid_path_request *)request)->request_id));
@@ -5233,7 +5246,6 @@ static void pqi_calculate_queue_resources(struct pqi_ctrl_info *ctrl_info)
 	}
 
 	ctrl_info->num_queue_groups = num_queue_groups;
-	ctrl_info->max_hw_queue_index = num_queue_groups - 1;
 
 	/*
 	 * Make sure that the max. inbound IU length is an even multiple
@@ -5567,7 +5579,9 @@ static inline int pqi_raid_submit_scsi_cmd(struct pqi_ctrl_info *ctrl_info,
 {
 	struct pqi_io_request *io_request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
 
 	return pqi_raid_submit_scsi_cmd_with_io_request(ctrl_info, io_request,
 		device, scmd, queue_group);
@@ -5671,7 +5685,9 @@ static int pqi_aio_submit_io(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *device;
 
 	device = scmd->device->hostdata;
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
 	io_request->io_complete_callback = pqi_aio_io_complete;
 	io_request->scmd = scmd;
 	io_request->raid_bypass = raid_bypass;
@@ -5743,7 +5759,10 @@ static  int pqi_aio_submit_r1_write_io(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_io_request *io_request;
 	struct pqi_aio_r1_path_request *r1_request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
+
 	io_request->io_complete_callback = pqi_aio_io_complete;
 	io_request->scmd = scmd;
 	io_request->raid_bypass = true;
@@ -5801,7 +5820,9 @@ static int pqi_aio_submit_r56_write_io(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_io_request *io_request;
 	struct pqi_aio_r56_path_request *r56_request;
 
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, scmd);
+	if (!io_request)
+		return SCSI_MLQUEUE_HOST_BUSY;
 	io_request->io_complete_callback = pqi_aio_io_complete;
 	io_request->scmd = scmd;
 	io_request->raid_bypass = true;
@@ -5860,13 +5881,10 @@ static int pqi_aio_submit_r56_write_io(struct pqi_ctrl_info *ctrl_info,
 static inline u16 pqi_get_hw_queue(struct pqi_ctrl_info *ctrl_info,
 	struct scsi_cmnd *scmd)
 {
-	u16 hw_queue;
-
-	hw_queue = blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
-	if (hw_queue > ctrl_info->max_hw_queue_index)
-		hw_queue = 0;
-
-	return hw_queue;
+	/*
+	 * We are setting host_tagset = 1 during init.
+	 */
+	return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(scsi_cmd_to_rq(scmd)));
 }
 
 static inline bool pqi_is_bypass_eligible_request(struct scsi_cmnd *scmd)
@@ -6268,7 +6286,7 @@ static int pqi_lun_reset(struct pqi_ctrl_info *ctrl_info, struct scsi_cmnd *scmd
 	struct pqi_scsi_dev *device;
 
 	device = scmd->device->hostdata;
-	io_request = pqi_alloc_io_request(ctrl_info);
+	io_request = pqi_alloc_io_request(ctrl_info, NULL);
 	io_request->io_complete_callback = pqi_lun_reset_complete;
 	io_request->context = &wait;
 
-- 
GitLab


From 0b93cf2a9097b1c3d75642ef878ba87f15f03043 Mon Sep 17 00:00:00 2001
From: Mike McGowen <mike.mcgowen@microchip.com>
Date: Tue, 8 Nov 2022 13:21:43 -0600
Subject: [PATCH 1720/4122] scsi: smartpqi: Add new controller PCI IDs

All PCI ID entries in Hex.
Add PCI IDs for ByteDance controllers:
                                            VID  / DID  / SVID / SDID
                                            ----   ----   ----   ----
    ByteHBA JGH43024-8                      9005 / 028f / 1e93 / 1000
    ByteHBA JGH43034-8                      9005 / 028f / 1e93 / 1001
    ByteHBA JGH44014-8                      9005 / 028f / 1e93 / 1002

Add PCI IDs for new Inspur controllers:
                                            VID  / DID  / SVID / SDID
                                            ----   ----   ----   ----
    INSPUR RT0800M7E                        9005 / 028f / 1bd4 / 0086
    INSPUR RT0800M7H                        9005 / 028f / 1bd4 / 0087
    INSPUR RT0804M7R                        9005 / 028f / 1bd4 / 0088
    INSPUR RT0808M7R                        9005 / 028f / 1bd4 / 0089

Add PCI IDs for new FAB A controllers:
                                            VID  / DID  / SVID / SDID
                                            ----   ----   ----   ----
    Adaptec SmartRAID 3254-16e /e           9005 / 028f / 9005 / 1475
    Adaptec HBA 1200-16e                    9005 / 028f / 9005 / 14c3
    Adaptec HBA 1200-8e                     9005 / 028f / 9005 / 14c4

Add H3C controller PCI IDs:
                                            VID  / DID  / SVID / SDID
                                            ----   ----   ----   ----
    H3C H4508-Mf-8i                         9005 / 028f / 193d / 110b

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Signed-off-by: Mike McGowen <mike.mcgowen@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793530327.322537.6056884426657539311.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 44 +++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 651dca535b3bc..6cda12078130b 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -9320,6 +9320,10 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       0x193d, 0x1109)
 	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x193d, 0x110b)
+	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       0x193d, 0x8460)
@@ -9420,6 +9424,22 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       0x1bd4, 0x0072)
 	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1bd4, 0x0086)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1bd4, 0x0087)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1bd4, 0x0088)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       0x1bd4, 0x0089)
+	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       0x19e5, 0xd227)
@@ -9668,6 +9688,10 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_VENDOR_ID_ADAPTEC2, 0x1474)
 	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       PCI_VENDOR_ID_ADAPTEC2, 0x1475)
+	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_VENDOR_ID_ADAPTEC2, 0x1480)
@@ -9724,6 +9748,14 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_VENDOR_ID_ADAPTEC2, 0x14c2)
 	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       PCI_VENDOR_ID_ADAPTEC2, 0x14c3)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+			       PCI_VENDOR_ID_ADAPTEC2, 0x14c4)
+	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_VENDOR_ID_ADAPTEC2, 0x14d0)
@@ -9960,6 +9992,18 @@ static const struct pci_device_id pqi_pci_id_table[] = {
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_VENDOR_ID_LENOVO, 0x0623)
 	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+				0x1e93, 0x1000)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+				0x1e93, 0x1001)
+	},
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
+				0x1e93, 0x1002)
+	},
 	{
 		PCI_DEVICE_SUB(PCI_VENDOR_ID_ADAPTEC2, 0x028f,
 			       PCI_ANY_ID, PCI_ANY_ID)
-- 
GitLab


From 7c56850637ea820a89ce2f52fca66c5ae12d0f0a Mon Sep 17 00:00:00 2001
From: Kevin Barnett <kevin.barnett@microchip.com>
Date: Tue, 8 Nov 2022 13:21:48 -0600
Subject: [PATCH 1721/4122] scsi: smartpqi: Correct max LUN number

Correct maximum LUN number for multi-actuator devices.

When multi-actuator support was added to smartpqi, the maximum number of
LUNs supported for multi-actuator devices was supposed to be changed from
unlimited to 256, but the setting was inadvertently left at unlimited.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793530842.322537.816949081443241857.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 6cda12078130b..33059355f9cd8 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -7423,7 +7423,6 @@ static int pqi_register_scsi(struct pqi_ctrl_info *ctrl_info)
 	shost->max_channel = PQI_MAX_BUS;
 	shost->max_cmd_len = MAX_COMMAND_SIZE;
 	shost->max_lun = PQI_MAX_LUNS_PER_DEVICE;
-	shost->max_lun = ~0;
 	shost->max_id = ~0;
 	shost->max_sectors = ctrl_info->max_sectors;
 	shost->can_queue = ctrl_info->scsi_ml_can_queue;
-- 
GitLab


From cbe42ac15698a23b204a9b5c66eb0067b22cbd42 Mon Sep 17 00:00:00 2001
From: Kevin Barnett <kevin.barnett@microchip.com>
Date: Tue, 8 Nov 2022 13:21:53 -0600
Subject: [PATCH 1722/4122] scsi: smartpqi: Change sysfs raid_level attribute
 to N/A for controllers

Change the sysfs raid_level attribute from "RAID-0" to N/A.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Mike McGowan <mike.mcgowan@microchip.com>
Signed-off-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793531357.322537.8639138137605612362.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 33059355f9cd8..20fc6c8044ac5 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -7255,7 +7255,7 @@ static ssize_t pqi_raid_level_show(struct device *dev,
 		return -ENODEV;
 	}
 
-	if (pqi_is_logical_device(device))
+	if (pqi_is_logical_device(device) && device->devtype == TYPE_DISK)
 		raid_level = pqi_raid_level_to_string(device->raid_level);
 	else
 		raid_level = "N/A";
-- 
GitLab


From cc9befcbbb5ebce77726f938508700d913530035 Mon Sep 17 00:00:00 2001
From: Kumar Meiyappan <Kumar.Meiyappan@microchip.com>
Date: Tue, 8 Nov 2022 13:21:58 -0600
Subject: [PATCH 1723/4122] scsi: smartpqi: Correct device removal for
 multi-actuator devices

Correct device count for multi-actuator drives which can cause kernel
panics.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Mike Mcgowan <mike.mcgowan@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Kumar Meiyappan <Kumar.Meiyappan@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793531872.322537.9003385780343419275.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi.h      |  2 +-
 drivers/scsi/smartpqi/smartpqi_init.c | 33 +++++++++++++++++++--------
 2 files changed, 25 insertions(+), 10 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi.h b/drivers/scsi/smartpqi/smartpqi.h
index 8cdf4d2476dd6..af27bb0f3133e 100644
--- a/drivers/scsi/smartpqi/smartpqi.h
+++ b/drivers/scsi/smartpqi/smartpqi.h
@@ -1130,7 +1130,7 @@ struct pqi_scsi_dev {
 	u8	phy_id;
 	u8	ncq_prio_enable;
 	u8	ncq_prio_support;
-	u8	multi_lun_device_lun_count;
+	u8	lun_count;
 	bool	raid_bypass_configured;	/* RAID bypass configured */
 	bool	raid_bypass_enabled;	/* RAID bypass enabled */
 	u32	next_bypass_group[RAID_MAP_MAX_DATA_DISKS_PER_ROW];
diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index 20fc6c8044ac5..e82f4de46ea7b 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -1623,9 +1623,7 @@ static int pqi_get_physical_device_info(struct pqi_ctrl_info *ctrl_info,
 		&id_phys->alternate_paths_phys_connector,
 		sizeof(device->phys_connector));
 	device->bay = id_phys->phys_bay_in_box;
-	device->multi_lun_device_lun_count = id_phys->multi_lun_device_lun_count;
-	if (!device->multi_lun_device_lun_count)
-		device->multi_lun_device_lun_count = 1;
+	device->lun_count = id_phys->multi_lun_device_lun_count;
 	if ((id_phys->even_more_flags & PQI_DEVICE_PHY_MAP_SUPPORTED) &&
 		id_phys->phy_count)
 		device->phy_id =
@@ -1759,7 +1757,7 @@ out:
 	return offline;
 }
 
-static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info,
+static int pqi_get_device_info_phys_logical(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *device,
 	struct bmic_identify_physical_device *id_phys)
 {
@@ -1776,6 +1774,20 @@ static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info,
 	return rc;
 }
 
+static int pqi_get_device_info(struct pqi_ctrl_info *ctrl_info,
+	struct pqi_scsi_dev *device,
+	struct bmic_identify_physical_device *id_phys)
+{
+	int rc;
+
+	rc = pqi_get_device_info_phys_logical(ctrl_info, device, id_phys);
+
+	if (rc == 0 && device->lun_count == 0)
+		device->lun_count = 1;
+
+	return rc;
+}
+
 static void pqi_show_volume_status(struct pqi_ctrl_info *ctrl_info,
 	struct pqi_scsi_dev *device)
 {
@@ -1910,7 +1922,7 @@ static inline void pqi_remove_device(struct pqi_ctrl_info *ctrl_info, struct pqi
 	int rc;
 	int lun;
 
-	for (lun = 0; lun < device->multi_lun_device_lun_count; lun++) {
+	for (lun = 0; lun < device->lun_count; lun++) {
 		rc = pqi_device_wait_for_pending_io(ctrl_info, device, lun,
 			PQI_REMOVE_DEVICE_PENDING_IO_TIMEOUT_MSECS);
 		if (rc)
@@ -2089,6 +2101,7 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info,
 	existing_device->sas_address = new_device->sas_address;
 	existing_device->queue_depth = new_device->queue_depth;
 	existing_device->device_offline = false;
+	existing_device->lun_count = new_device->lun_count;
 
 	if (pqi_is_logical_device(existing_device)) {
 		existing_device->is_external_raid_device = new_device->is_external_raid_device;
@@ -2121,10 +2134,6 @@ static void pqi_scsi_update_device(struct pqi_ctrl_info *ctrl_info,
 		existing_device->phy_connected_dev_type = new_device->phy_connected_dev_type;
 		memcpy(existing_device->box, new_device->box, sizeof(existing_device->box));
 		memcpy(existing_device->phys_connector, new_device->phys_connector, sizeof(existing_device->phys_connector));
-
-		existing_device->multi_lun_device_lun_count = new_device->multi_lun_device_lun_count;
-		if (existing_device->multi_lun_device_lun_count == 0)
-			existing_device->multi_lun_device_lun_count = 1;
 	}
 }
 
@@ -6502,6 +6511,12 @@ static void pqi_slave_destroy(struct scsi_device *sdev)
 		return;
 	}
 
+	device->lun_count--;
+	if (device->lun_count > 0) {
+		mutex_unlock(&ctrl_info->scan_mutex);
+		return;
+	}
+
 	spin_lock_irqsave(&ctrl_info->scsi_device_list_lock, flags);
 	list_del(&device->scsi_device_list_entry);
 	spin_unlock_irqrestore(&ctrl_info->scsi_device_list_lock, flags);
-- 
GitLab


From 14063fb625c4541f48ff0dc7ae005b0d5a159c3f Mon Sep 17 00:00:00 2001
From: Gilbert Wu <Gilbert.Wu@microchip.com>
Date: Tue, 8 Nov 2022 13:22:03 -0600
Subject: [PATCH 1724/4122] scsi: smartpqi: Add controller cache flush during
 rmmod

Add in a call to flush the controller cache during driver removal.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Mike Mcgowan <mike.mcgowan@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Gilbert Wu <Gilbert.Wu@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793532388.322537.878022136408270892.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index e82f4de46ea7b..e9c924ac1bb2e 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -9040,6 +9040,7 @@ static void pqi_pci_remove(struct pci_dev *pci_dev)
 {
 	struct pqi_ctrl_info *ctrl_info;
 	u16 vendor_id;
+	int rc;
 
 	ctrl_info = pci_get_drvdata(pci_dev);
 	if (!ctrl_info)
@@ -9051,6 +9052,13 @@ static void pqi_pci_remove(struct pci_dev *pci_dev)
 	else
 		ctrl_info->ctrl_removal_state = PQI_CTRL_GRACEFUL_REMOVAL;
 
+	if (ctrl_info->ctrl_removal_state == PQI_CTRL_GRACEFUL_REMOVAL) {
+		rc = pqi_flush_cache(ctrl_info, RESTART);
+		if (rc)
+			dev_err(&pci_dev->dev,
+				"unable to flush controller cache during remove\n");
+	}
+
 	pqi_remove_ctrl(ctrl_info);
 }
 
-- 
GitLab


From 921800a1deeaa832e4303e9335a31b4234c41ac1 Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microchip.com>
Date: Tue, 8 Nov 2022 13:22:09 -0600
Subject: [PATCH 1725/4122] scsi: smartpqi: Initialize feature section info

Initialize features to 0 before processing.

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Mike Mcgowan <mike.mcgowan@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793532902.322537.2436075977808555348.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index e9c924ac1bb2e..fb4a33decde18 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -8004,7 +8004,7 @@ static int pqi_process_config_table(struct pqi_ctrl_info *ctrl_info)
 	struct pqi_config_table *config_table;
 	struct pqi_config_table_section_header *section;
 	struct pqi_config_table_section_info section_info;
-	struct pqi_config_table_section_info feature_section_info;
+	struct pqi_config_table_section_info feature_section_info = {0};
 
 	table_length = ctrl_info->config_table_length;
 	if (table_length == 0)
-- 
GitLab


From 2ae45329a956ff86ff8bec36463b6f49d2ca9bea Mon Sep 17 00:00:00 2001
From: Don Brace <don.brace@microchip.com>
Date: Tue, 8 Nov 2022 13:22:14 -0600
Subject: [PATCH 1726/4122] scsi: smartpqi: Change version to 2.1.20-035

Reviewed-by: Scott Benesh <scott.benesh@microchip.com>
Reviewed-by: Scott Teel <scott.teel@microchip.com>
Reviewed-by: Gerry Morong <gerry.morong@microchip.com>
Reviewed-by: Kevin Barnett <kevin.barnett@microchip.com>
Signed-off-by: Don Brace <don.brace@microchip.com>
Link: https://lore.kernel.org/r/166793533417.322537.3074216622272955440.stgit@brunhilda
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/smartpqi/smartpqi_init.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/smartpqi/smartpqi_init.c b/drivers/scsi/smartpqi/smartpqi_init.c
index fb4a33decde18..d0446d4d44656 100644
--- a/drivers/scsi/smartpqi/smartpqi_init.c
+++ b/drivers/scsi/smartpqi/smartpqi_init.c
@@ -33,11 +33,11 @@
 #define BUILD_TIMESTAMP
 #endif
 
-#define DRIVER_VERSION		"2.1.18-045"
+#define DRIVER_VERSION		"2.1.20-035"
 #define DRIVER_MAJOR		2
 #define DRIVER_MINOR		1
-#define DRIVER_RELEASE		18
-#define DRIVER_REVISION		45
+#define DRIVER_RELEASE		20
+#define DRIVER_REVISION		35
 
 #define DRIVER_NAME		"Microchip SmartPQI Driver (v" \
 				DRIVER_VERSION BUILD_TIMESTAMP ")"
-- 
GitLab


From 216e179724c1d9f57a8ababf8bd7aaabef67f01b Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 11 Nov 2022 02:05:25 -0800
Subject: [PATCH 1727/4122] scsi: scsi_debug: Fix a warning in
 resp_write_scat()

As 'lbdof_blen' is coming from user, if the size in kzalloc() is >=
MAX_ORDER then we hit a warning.

Call trace:

sg_ioctl
 sg_ioctl_common
   scsi_ioctl
    sg_scsi_ioctl
     blk_execute_rq
      blk_mq_sched_insert_request
       blk_mq_run_hw_queue
        __blk_mq_delay_run_hw_queue
         __blk_mq_run_hw_queue
          blk_mq_sched_dispatch_requests
           __blk_mq_sched_dispatch_requests
            blk_mq_dispatch_rq_list
             scsi_queue_rq
              scsi_dispatch_cmd
               scsi_debug_queuecommand
                schedule_resp
                 resp_write_scat

If you try to allocate a memory larger than(>=) MAX_ORDER, then kmalloc()
will definitely fail.  It creates a stack trace and messes up dmesg.  The
user controls the size here so if they specify a too large size it will
fail.

Add __GFP_NOWARN in order to avoid too large allocation warning.  This is
detected by static analysis using smatch.

Fixes: 481b5e5c7949 ("scsi: scsi_debug: add resp_write_scat function")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Link: https://lore.kernel.org/r/20221111100526.1790533-1-harshit.m.mogalapalli@oracle.com
Acked-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 697fc57bc711f..273224d29ce94 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -3778,7 +3778,7 @@ static int resp_write_scat(struct scsi_cmnd *scp,
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB, 0);
 		return illegal_condition_result;
 	}
-	lrdp = kzalloc(lbdof_blen, GFP_ATOMIC);
+	lrdp = kzalloc(lbdof_blen, GFP_ATOMIC | __GFP_NOWARN);
 	if (lrdp == NULL)
 		return SCSI_MLQUEUE_HOST_BUSY;
 	if (sdebug_verbose)
-- 
GitLab


From b29e91385ce2d3aae70906f80f517f9b93d97a7b Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Fri, 11 Nov 2022 15:43:10 +0800
Subject: [PATCH 1728/4122] scsi: lpfc: Use memset_startat() helper

Use memset_startat() helper to simplify the code, no functional changes in
this patch.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Link: https://lore.kernel.org/r/20221111074310.132125-1-xiujianfeng@huawei.com
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index d25afc9dde142..35c9404d5e49d 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1373,7 +1373,6 @@ static void
 __lpfc_sli_release_iocbq_s4(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 {
 	struct lpfc_sglq *sglq;
-	size_t start_clean = offsetof(struct lpfc_iocbq, wqe);
 	unsigned long iflag = 0;
 	struct lpfc_sli_ring *pring;
 
@@ -1430,7 +1429,7 @@ out:
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
-	memset((char *)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	memset_startat(iocbq, 0, wqe);
 	iocbq->sli4_lxritag = NO_XRI;
 	iocbq->sli4_xritag = NO_XRI;
 	iocbq->cmd_flag &= ~(LPFC_IO_NVME | LPFC_IO_NVMET | LPFC_IO_CMF |
@@ -1453,12 +1452,11 @@ out:
 static void
 __lpfc_sli_release_iocbq_s3(struct lpfc_hba *phba, struct lpfc_iocbq *iocbq)
 {
-	size_t start_clean = offsetof(struct lpfc_iocbq, iocb);
 
 	/*
 	 * Clean all volatile data fields, preserve iotag and node struct.
 	 */
-	memset((char*)iocbq + start_clean, 0, sizeof(*iocbq) - start_clean);
+	memset_startat(iocbq, 0, iocb);
 	iocbq->sli4_xritag = NO_XRI;
 	list_add_tail(&iocbq->list, &phba->lpfc_iocb_list);
 }
-- 
GitLab


From 0824050682aef5151ade16129b3a0498a07ca6c9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 11 Nov 2022 17:08:24 +0000
Subject: [PATCH 1729/4122] scsi: libfc: Remove redundant variable ev_qual

Variable ev_qual is being assigned and modified but the end result is never
used. The variable is redundant and can be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221111170824.558250-1-colin.i.king@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libfc/fc_disc.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c
index 942fc60f7c21c..0f32ded246d0b 100644
--- a/drivers/scsi/libfc/fc_disc.c
+++ b/drivers/scsi/libfc/fc_disc.c
@@ -75,7 +75,6 @@ static void fc_disc_recv_rscn_req(struct fc_disc *disc, struct fc_frame *fp)
 	struct fc_seq_els_data rjt_data;
 	unsigned int len;
 	int redisc = 0;
-	enum fc_els_rscn_ev_qual ev_qual;
 	enum fc_els_rscn_addr_fmt fmt;
 	LIST_HEAD(disc_ports);
 	struct fc_disc_port *dp, *next;
@@ -107,8 +106,6 @@ static void fc_disc_recv_rscn_req(struct fc_disc *disc, struct fc_frame *fp)
 		goto reject;
 
 	for (pp = (void *)(rp + 1); len > 0; len -= sizeof(*pp), pp++) {
-		ev_qual = pp->rscn_page_flags >> ELS_RSCN_EV_QUAL_BIT;
-		ev_qual &= ELS_RSCN_EV_QUAL_MASK;
 		fmt = pp->rscn_page_flags >> ELS_RSCN_ADDR_FMT_BIT;
 		fmt &= ELS_RSCN_ADDR_FMT_MASK;
 		/*
-- 
GitLab


From c4c5fa35563a47957fa4f9c299ca1c6aadc27d50 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 15 Nov 2022 14:25:16 -0600
Subject: [PATCH 1730/4122] scsi: bfa: Replace one-element array with
 flexible-array member

One-element arrays are deprecated, and we are replacing them with flexible
array members instead. So, replace one-element array with flexible-array
member in struct fdmi_attr_s.

Important to mention is that doing a build before/after this patch results
in no binary output differences.

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines
on memcpy() and help us make progress towards globally enabling
-fstrict-flex-arrays=3 [1].

Link: https://github.com/KSPP/linux/issues/209
Link: https://github.com/KSPP/linux/issues/79
Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1]
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/Y3P1rEEBq7HzJygq@work
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/bfa/bfa_fc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/bfa/bfa_fc.h b/drivers/scsi/bfa/bfa_fc.h
index 0314e4b9e1fbb..a12d693065cef 100644
--- a/drivers/scsi/bfa/bfa_fc.h
+++ b/drivers/scsi/bfa/bfa_fc.h
@@ -1548,7 +1548,7 @@ enum fdmi_port_attribute_type {
 struct fdmi_attr_s {
 	__be16        type;
 	__be16        len;
-	u8         value[1];
+	u8         value[];
 };
 
 /*
-- 
GitLab


From 2c1a0a7584f5084f3ec79f86c9a54ee4c55307c4 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Tue, 15 Nov 2022 17:19:16 -0800
Subject: [PATCH 1731/4122] scsi: lpfc: Fix WQ|CQ|EQ resource check

Adapter configurations with limited EQ resources may fail to initialize.

Firmware resources are queried in lpfc_sli4_read_config().  The driver
parameters cfg_irq_chann and cfg_hdw_queue are adjusted from defaults if
constrained by firmware resources.

The minimum resource check includes a special allocation for queues such as
ELS, MBOX, NVME LS. However the additional reservation was also incorrectly
applied to EQ resources.

Reordered WQ|CQ|EQ resource checks to apply the special allocation
adjustment to WQ and CQ resources only.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221116011921.105995-2-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index b49c39569386a..a6e32ecd4151c 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -10092,17 +10092,15 @@ lpfc_sli4_read_config(struct lpfc_hba *phba)
 		qmin = phba->sli4_hba.max_cfg_param.max_wq;
 		if (phba->sli4_hba.max_cfg_param.max_cq < qmin)
 			qmin = phba->sli4_hba.max_cfg_param.max_cq;
-		if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
-			qmin = phba->sli4_hba.max_cfg_param.max_eq;
 		/*
-		 * Whats left after this can go toward NVME / FCP.
-		 * The minus 4 accounts for ELS, NVME LS, MBOX
-		 * plus one extra. When configured for
-		 * NVMET, FCP io channel WQs are not created.
+		 * Reserve 4 (ELS, NVME LS, MBOX, plus one extra) and
+		 * the remainder can be used for NVME / FCP.
 		 */
 		qmin -= 4;
+		if (phba->sli4_hba.max_cfg_param.max_eq < qmin)
+			qmin = phba->sli4_hba.max_cfg_param.max_eq;
 
-		/* Check to see if there is enough for NVME */
+		/* Check to see if there is enough for default cfg */
 		if ((phba->cfg_irq_chann > qmin) ||
 		    (phba->cfg_hdw_queue > qmin)) {
 			lpfc_printf_log(phba, KERN_ERR, LOG_TRACE_EVENT,
-- 
GitLab


From ae696255d655bec673e5a5707f37ff6a098e89c2 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Tue, 15 Nov 2022 17:19:17 -0800
Subject: [PATCH 1732/4122] scsi: lpfc: Correct bandwidth logging during
 receipt of congestion sync WCQE

The lpfc_cmf_timer adjusts phba->cmf_link_byte_count periodically and can
artifically inflate bandwidth percent.

During bandwidth calculation, correct for this by setting a cap of logging
a maximum of 100%.

Bandwidth calculation is only used for display under LOG_CGN_MGMT so there
is no expectation of impacts on performance.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221116011921.105995-3-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_sli.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 35c9404d5e49d..182aaae603868 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -1846,6 +1846,12 @@ lpfc_cmf_sync_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 				  phba->cmf_link_byte_count);
 		bwpcent = div64_u64(bw * 100 + slop,
 				    phba->cmf_link_byte_count);
+		/* Because of bytes adjustment due to shorter timer in
+		 * lpfc_cmf_timer() the cmf_link_byte_count can be shorter and
+		 * may seem like BW is above 100%.
+		 */
+		if (bwpcent > 100)
+			bwpcent = 100;
 
 		if (phba->cmf_max_bytes_per_interval < bw &&
 		    bwpcent > 95)
-- 
GitLab


From d99af587d59ca39747b4328dad0b193655835c90 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Tue, 15 Nov 2022 17:19:18 -0800
Subject: [PATCH 1733/4122] scsi: lpfc: Fix MI capability display in cmf_info
 sysfs attribute

The dynamic mi_ver value holds the currently configured MI setting.  mi_ver
was being displayed as part of the cmf_info sysfs attribute, when the
output string meant to display MI capabilities instead.

Add a mi_cap member in the lpfc_pc_sli4_params structure that will store MI
capabilities during initialization so that cmf_info prints out capabilities
instead of current configuration.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221116011921.105995-4-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_attr.c | 2 +-
 drivers/scsi/lpfc/lpfc_init.c | 3 +++
 drivers/scsi/lpfc/lpfc_sli4.h | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 030ad1d59cbd6..77e1b2911cb41 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -134,7 +134,7 @@ lpfc_cmf_info_show(struct device *dev, struct device_attribute *attr,
 	scnprintf(tmp, sizeof(tmp),
 		  "Congestion Mgmt Info: E2Eattr %d Ver %d "
 		  "CMF %d cnt %d\n",
-		  phba->sli4_hba.pc_sli4_params.mi_ver,
+		  phba->sli4_hba.pc_sli4_params.mi_cap,
 		  cp ? cp->cgn_info_version : 0,
 		  phba->sli4_hba.pc_sli4_params.cmf, phba->cmf_timer_cnt);
 
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a6e32ecd4151c..a119c06742b82 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -699,6 +699,8 @@ lpfc_sli4_refresh_params(struct lpfc_hba *phba)
 		return rc;
 	}
 	mbx_sli4_parameters = &mqe->un.get_sli4_parameters.sli4_parameters;
+	phba->sli4_hba.pc_sli4_params.mi_cap =
+		bf_get(cfg_mi_ver, mbx_sli4_parameters);
 
 	/* Are we forcing MI off via module parameter? */
 	if (phba->cfg_enable_mi)
@@ -13839,6 +13841,7 @@ lpfc_get_sli4_parameters(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
 					   mbx_sli4_parameters);
 	phba->sli4_hba.extents_in_use = bf_get(cfg_ext, mbx_sli4_parameters);
 	phba->sli4_hba.rpi_hdrs_in_use = bf_get(cfg_hdrr, mbx_sli4_parameters);
+	sli4_params->mi_cap = bf_get(cfg_mi_ver, mbx_sli4_parameters);
 
 	/* Check for Extended Pre-Registered SGL support */
 	phba->cfg_xpsgl = bf_get(cfg_xpsgl, mbx_sli4_parameters);
diff --git a/drivers/scsi/lpfc/lpfc_sli4.h b/drivers/scsi/lpfc/lpfc_sli4.h
index cbb1aa1cf025b..f927c2a25d546 100644
--- a/drivers/scsi/lpfc/lpfc_sli4.h
+++ b/drivers/scsi/lpfc/lpfc_sli4.h
@@ -556,6 +556,7 @@ struct lpfc_pc_sli4_params {
 #define LPFC_MIB3_SUPPORT	3
 	uint16_t mi_value;
 #define LPFC_DFLT_MIB_VAL	2
+	uint8_t mi_cap;
 	uint8_t mib_bde_cnt;
 	uint8_t cmf;
 	uint8_t cqv;
-- 
GitLab


From 97f256913c5d8a633efe4f11d4ed2d6a3ea42635 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Tue, 15 Nov 2022 17:19:19 -0800
Subject: [PATCH 1734/4122] scsi: lpfc: Fix crash involving race between FLOGI
 timeout and devloss handler

When a FLOGI completes with a sequence timeout error, a freed kref ptr
dereference crash can occur due to a timing race involving ndlp referencing
in lpfc_dev_loss_tmo_callbk.

Fix by ensuring the driver accounts for an outstanding FLOGI when dev_loss
is active.  Also, don't remove the HBA_FLOGI_OUTSTANDING flag when the
FLOGI is retried to allow the driver to handle the reference counts
correctly in lpfc_dev_loss_tmo_handler.

Reported-by: Dietmar Hahn <dietmar.hahn@fujitsu.com>
Tested-by: Dietmar Hahn <dietmar.hahn@fujitsu.com>
Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221116011921.105995-5-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_els.c     | 36 +++++++++++++++++++++++++++-----
 drivers/scsi/lpfc/lpfc_hbadisc.c | 36 +++++++++++++++++++++++---------
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/lpfc/lpfc_els.c b/drivers/scsi/lpfc/lpfc_els.c
index 9326340d4226a..919741bbe267b 100644
--- a/drivers/scsi/lpfc/lpfc_els.c
+++ b/drivers/scsi/lpfc/lpfc_els.c
@@ -952,6 +952,7 @@ lpfc_cmpl_els_flogi(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
 	uint16_t fcf_index;
 	int rc;
 	u32 ulp_status, ulp_word4, tmo;
+	bool flogi_in_retry = false;
 
 	/* Check to see if link went down during discovery */
 	if (lpfc_els_chk_latt(vport)) {
@@ -1022,8 +1023,23 @@ stop_rr_fcf_flogi:
 					 phba->hba_flag, phba->fcf.fcf_flag);
 
 		/* Check for retry */
-		if (lpfc_els_retry(phba, cmdiocb, rspiocb))
+		if (lpfc_els_retry(phba, cmdiocb, rspiocb)) {
+			/* Address a timing race with dev_loss.  If dev_loss
+			 * is active on this FPort node, put the initial ref
+			 * count back to stop premature node release actions.
+			 */
+			lpfc_check_nlp_post_devloss(vport, ndlp);
+			flogi_in_retry = true;
 			goto out;
+		}
+
+		/* The FLOGI will not be retried.  If the FPort node is not
+		 * registered with the SCSI transport, remove the initial
+		 * reference to trigger node release.
+		 */
+		if (!(ndlp->nlp_flag & NLP_IN_DEV_LOSS) &&
+		    !(ndlp->fc4_xpt_flags & SCSI_XPT_REGD))
+			lpfc_nlp_put(ndlp);
 
 		lpfc_printf_vlog(vport, KERN_WARNING, LOG_TRACE_EVENT,
 				 "0150 FLOGI failure Status:x%x/x%x "
@@ -1086,7 +1102,7 @@ stop_rr_fcf_flogi:
 	spin_unlock_irq(shost->host_lock);
 
 	/*
-	 * The FLogI succeeded.  Sync the data for the CPU before
+	 * The FLOGI succeeded.  Sync the data for the CPU before
 	 * accessing it.
 	 */
 	prsp = list_get_first(&pcmd->list, struct lpfc_dmabuf, list);
@@ -1108,6 +1124,12 @@ stop_rr_fcf_flogi:
 		vport->phba->pport->vmid_flag |= (LPFC_VMID_ISSUE_QFPA |
 						  LPFC_VMID_TYPE_PRIO);
 
+	/*
+	 * Address a timing race with dev_loss.  If dev_loss is active on
+	 * this FPort node, put the initial ref count back to stop premature
+	 * node release actions.
+	 */
+	lpfc_check_nlp_post_devloss(vport, ndlp);
 	if (vport->port_state == LPFC_FLOGI) {
 		/*
 		 * If Common Service Parameters indicate Nport
@@ -1198,7 +1220,9 @@ flogifail:
 		lpfc_issue_clear_la(phba, vport);
 	}
 out:
-	phba->hba_flag &= ~HBA_FLOGI_OUTSTANDING;
+	if (!flogi_in_retry)
+		phba->hba_flag &= ~HBA_FLOGI_OUTSTANDING;
+
 	lpfc_els_free_iocb(phba, cmdiocb);
 	lpfc_nlp_put(ndlp);
 }
@@ -1365,15 +1389,17 @@ lpfc_issue_els_flogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
 		return 1;
 	}
 
+	/* Avoid race with FLOGI completion and hba_flags. */
+	phba->hba_flag |= (HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING);
+
 	rc = lpfc_issue_fabric_iocb(phba, elsiocb);
 	if (rc == IOCB_ERROR) {
+		phba->hba_flag &= ~(HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING);
 		lpfc_els_free_iocb(phba, elsiocb);
 		lpfc_nlp_put(ndlp);
 		return 1;
 	}
 
-	phba->hba_flag |= (HBA_FLOGI_ISSUED | HBA_FLOGI_OUTSTANDING);
-
 	/* Clear external loopback plug detected flag */
 	phba->link_flag &= ~LS_EXTERNAL_LOOPBACK;
 
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d38ebd7281b9b..80375d73b7320 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -426,10 +426,6 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 	name = (uint8_t *)&ndlp->nlp_portname;
 	phba = vport->phba;
 
-	spin_lock_irqsave(&ndlp->lock, iflags);
-	ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
-	spin_unlock_irqrestore(&ndlp->lock, iflags);
-
 	if (phba->sli_rev == LPFC_SLI_REV4)
 		fcf_inuse = lpfc_fcf_inuse(phba);
 
@@ -451,22 +447,36 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 				 *name, *(name+1), *(name+2), *(name+3),
 				 *(name+4), *(name+5), *(name+6), *(name+7),
 				 ndlp->nlp_DID);
+
+		spin_lock_irqsave(&ndlp->lock, iflags);
+		ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+		spin_unlock_irqrestore(&ndlp->lock, iflags);
 		return fcf_inuse;
 	}
 
 	/* Fabric nodes are done. */
 	if (ndlp->nlp_type & NLP_FABRIC) {
 		spin_lock_irqsave(&ndlp->lock, iflags);
-		/* In massive vport configuration settings, it's possible
-		 * dev_loss_tmo fired during node recovery.  So, check if
-		 * fabric nodes are in discovery states outstanding.
+
+		/* In massive vport configuration settings or when the FLOGI
+		 * completes with a sequence timeout, it's possible
+		 * dev_loss_tmo fired during node recovery.  The driver has to
+		 * account for this race to allow for recovery and keep
+		 * the reference counting correct.
 		 */
 		switch (ndlp->nlp_DID) {
 		case Fabric_DID:
 			fc_vport = vport->fc_vport;
-			if (fc_vport &&
-			    fc_vport->vport_state == FC_VPORT_INITIALIZING)
-				recovering = true;
+			if (fc_vport) {
+				/* NPIV path. */
+				if (fc_vport->vport_state ==
+				    FC_VPORT_INITIALIZING)
+					recovering = true;
+			} else {
+				/* Physical port path. */
+				if (phba->hba_flag & HBA_FLOGI_OUTSTANDING)
+					recovering = true;
+			}
 			break;
 		case Fabric_Cntl_DID:
 			if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
@@ -514,6 +524,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 			return fcf_inuse;
 		}
 
+		spin_lock_irqsave(&ndlp->lock, iflags);
+		ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+		spin_unlock_irqrestore(&ndlp->lock, iflags);
 		lpfc_nlp_put(ndlp);
 		return fcf_inuse;
 	}
@@ -552,6 +565,9 @@ lpfc_dev_loss_tmo_handler(struct lpfc_nodelist *ndlp)
 		return fcf_inuse;
 	}
 
+	spin_lock_irqsave(&ndlp->lock, iflags);
+	ndlp->nlp_flag &= ~NLP_IN_DEV_LOSS;
+	spin_unlock_irqrestore(&ndlp->lock, iflags);
 	if (!(ndlp->fc4_xpt_flags & NVME_XPT_REGD))
 		lpfc_disc_state_machine(vport, ndlp, NULL, NLP_EVT_DEVICE_RM);
 
-- 
GitLab


From 435d6b6564568b82ec74ee7d4d0bcb1f24ccb5f5 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Thu, 17 Nov 2022 10:16:32 -0700
Subject: [PATCH 1735/4122] vmlinux.lds.h: fix BOUNDED_SECTION_(PRE|POST)_LABEL
 macros

Commit 2f465b921bb8 ("vmlinux.lds.h: place optional header space in BOUNDED_SECTION")

added BOUNDED_SECTION_(PRE|POST)_LABEL macros, encapsulating the basic
boilerplate to KEEP/pack records into a section, and to mark the begin
and end of the section with linker-symbols.

But it tried to do extra, adding KEEP(*(.gnu.linkonce.##_sec_)) to
optionally reserve a header record in front of the data.  It wrongly
placed the KEEP after the linker-symbol starting the section,
so if a header was added, it would wind up in the data.

Moving the KEEP to the "correct" place proved brittle, and too clever
by half.  The obvious safe fix is to remove the KEEP and restore the
plain old boilerplate.  The header can be added later, with separate
macros.

Also, the macro var-names: _s_, _e_ are nearly invisible, change them
to more obvious names: _BEGIN_, _END_

Fixes: 2f465b921bb8 ("vmlinux.lds.h: place optional header space in BOUNDED_SECTION")
Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20221117171633.923628-2-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/vmlinux.lds.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index b3ca56ac163f7..c17f947852530 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -193,17 +193,15 @@
 # endif
 #endif
 
-#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _s_, _e_)		\
-	_s_##_label_ = .;						\
-	KEEP(*(.gnu.linkonce.##_sec_))					\
+#define BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_)	\
+	_BEGIN_##_label_ = .;						\
 	KEEP(*(_sec_))							\
-	_e_##_label_ = .;
+	_END_##_label_ = .;
 
-#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _s_, _e_)		\
-	_label_##_s_ = .;						\
-	KEEP(*(.gnu.linkonce.##_sec_))					\
+#define BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_)	\
+	_label_##_BEGIN_ = .;						\
 	KEEP(*(_sec_))							\
-	_label_##_e_ = .;
+	_label_##_END_ = .;
 
 #define BOUNDED_SECTION_BY(_sec_, _label_)				\
 	BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop)
-- 
GitLab


From 1d926e259d8f8195fdfaeea7951149001894b473 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Thu, 17 Nov 2022 10:16:33 -0700
Subject: [PATCH 1736/4122] vmlinux.lds.h: add HEADERED_SECTION_* macros

These macros elaborate on BOUNDED_SECTION_(PRE|POST)_LABEL macros,
prepending an optional KEEP(.gnu.linkonce##_sec_) reservation, and a
linker-symbol to address it.

This allows a developer to define a header struct (which must fit with
the section's base struct-type), and could contain:

1- fields whose value is common to the entire set of data-records.
   This allows the header & data structs to specialize, complement
   each other, and shrink.

2- an uplink pointer to an organizing struct
   which refs other related/sub data-tables
   header record is addressable via the extern'd header linker-symbol

Once the linker-symbols created by the macro are ref'd extern in code,
that code can compute a record's index (ptr - start) in the "primary"
table, then use it to index into the related/sub tables.  Adding a
primary.map_* field foreach sub-table would then allow deduplication
and remapping of that sub-table.

This is aimed at dyndbg's struct _ddebug __dyndbg[] section, whose 3
columns: function, file, module are 50%, 90%, 100% redundant.  The
module column is fully recoverable after dynamic_debug_init() saves it
to each ddebug_table.module as the builtin __dyndbg[] table is parsed.

Given that those 3 columns use 24/56 of a _ddebug record, a dyndbg=y
kernel with ~5k callsites could reduce kernel memory substantially.
Returning that memory to the kernel buddy-allocator? is then possible.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20221117171633.923628-3-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/asm-generic/vmlinux.lds.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c17f947852530..c9a475a30803e 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -208,6 +208,21 @@
 
 #define BOUNDED_SECTION(_sec)	 BOUNDED_SECTION_BY(_sec, _sec)
 
+#define HEADERED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_, _HDR_) \
+	_HDR_##_label_	= .;						\
+	KEEP(*(.gnu.linkonce.##_sec_))					\
+	BOUNDED_SECTION_PRE_LABEL(_sec_, _label_, _BEGIN_, _END_)
+
+#define HEADERED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_, _HDR_) \
+	_label_##_HDR_ = .;						\
+	KEEP(*(.gnu.linkonce.##_sec_))					\
+	BOUNDED_SECTION_POST_LABEL(_sec_, _label_, _BEGIN_, _END_)
+
+#define HEADERED_SECTION_BY(_sec_, _label_)				\
+	HEADERED_SECTION_PRE_LABEL(_sec_, _label_, __start, __stop)
+
+#define HEADERED_SECTION(_sec)	 HEADERED_SECTION_BY(_sec, _sec)
+
 #ifdef CONFIG_TRACE_BRANCH_PROFILING
 #define LIKELY_PROFILE()						\
 	BOUNDED_SECTION_BY(_ftrace_annotated_branch, _annotated_branch_profile)
-- 
GitLab


From a3f3e97a039cab5b7efc754dc2907ab5044c7c88 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Thu, 10 Nov 2022 10:09:27 +0100
Subject: [PATCH 1737/4122] staging: r8188eu: rename three functions

Prefix the names of the following functions with the driver name. The
original names are bad for the global namespace. While at it, convert
is_IBSS_empty() to all lower case to follow kernel coding style.

is_client_associated_to_ap()
is_client_associated_to_ibss()
is_IBSS_empty()

Suggested-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221110090927.17274-1-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 14 +++++++-------
 drivers/staging/r8188eu/core/rtw_wlan_util.c   |  8 ++++----
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  6 +++---
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index be33489d3dfdd..161cb67f78828 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -5831,7 +5831,7 @@ void rtw_mlme_site_survey_done(struct adapter *adapter)
 	int res;
 	u8 reg;
 
-	if ((is_client_associated_to_ap(adapter)) ||
+	if ((r8188eu_is_client_associated_to_ap(adapter)) ||
 	    ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE)) {
 		/* enable to rx data frame */
 		rtw_write16(adapter, REG_RXFLTMAP2, 0xFFFF);
@@ -5982,7 +5982,7 @@ void site_survey(struct adapter *padapter)
 			Restore_DM_Func_Flag(padapter);
 			/* Switch_DM_Func(padapter, DYNAMIC_ALL_FUNC_ENABLE, true); */
 
-			if (is_client_associated_to_ap(padapter))
+			if (r8188eu_is_client_associated_to_ap(padapter))
 				issue_nulldata(padapter, NULL, 0, 3, 500);
 
 			rtw_mlme_site_survey_done(padapter);
@@ -6952,7 +6952,7 @@ void mlmeext_sta_del_event_callback(struct adapter *padapter)
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 
-	if (is_client_associated_to_ap(padapter) || is_IBSS_empty(padapter)) {
+	if (r8188eu_is_client_associated_to_ap(padapter) || r8188eu_is_ibss_empty(padapter)) {
 		mlme_disconnect(padapter);
 		rtw_set_bssid(padapter, null_addr);
 
@@ -7025,7 +7025,7 @@ void linked_status_chk(struct adapter *padapter)
 
 	rtl8188e_sreset_linked_status_check(padapter);
 
-	if (is_client_associated_to_ap(padapter)) {
+	if (r8188eu_is_client_associated_to_ap(padapter)) {
 		/* linked infrastructure client mode */
 
 		int tx_chk = _SUCCESS, rx_chk = _SUCCESS;
@@ -7097,7 +7097,7 @@ void linked_status_chk(struct adapter *padapter)
 				pmlmeinfo->link_count = 0;
 			}
 		} /* end of if ((psta = rtw_get_stainfo(pstapriv, passoc_res->network.MacAddress)) != NULL) */
-	} else if (is_client_associated_to_ibss(padapter)) {
+	} else if (r8188eu_is_client_associated_to_ibss(padapter)) {
 		/* linked IBSS mode */
 		/* for each assoc list entry to check the rx pkt counter */
 		for (i = IBSS_START_MAC_ID; i < NUM_STA; i++) {
@@ -7415,7 +7415,7 @@ u8 disconnect_hdl(struct adapter *padapter, unsigned char *pbuf)
 	u8 val8;
 	int res;
 
-	if (is_client_associated_to_ap(padapter))
+	if (r8188eu_is_client_associated_to_ap(padapter))
 		issue_deauth_ex(padapter, pnetwork->MacAddress, WLAN_REASON_DEAUTH_LEAVING, param->deauth_timeout_ms / 100, 100);
 
 	mlme_disconnect(padapter);
@@ -7527,7 +7527,7 @@ u8 sitesurvey_cmd_hdl(struct adapter *padapter, u8 *pbuf)
 		pmlmeext->sitesurvey_res.scan_mode = pparm->scan_mode;
 
 		/* issue null data if associating to the AP */
-		if (is_client_associated_to_ap(padapter)) {
+		if (r8188eu_is_client_associated_to_ap(padapter)) {
 			pmlmeext->sitesurvey_res.state = SCAN_TXNULL;
 
 			issue_nulldata(padapter, NULL, 1, 3, 500);
diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c
index c95438a12b598..965bb7da4cce3 100644
--- a/drivers/staging/r8188eu/core/rtw_wlan_util.c
+++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c
@@ -331,7 +331,7 @@ u16 get_beacon_interval(struct wlan_bssid_ex *bss)
 	return le16_to_cpu(val);
 }
 
-bool is_client_associated_to_ap(struct adapter *padapter)
+bool r8188eu_is_client_associated_to_ap(struct adapter *padapter)
 {
 	struct mlme_ext_priv	*pmlmeext;
 	struct mlme_ext_info	*pmlmeinfo;
@@ -348,7 +348,7 @@ bool is_client_associated_to_ap(struct adapter *padapter)
 	return false;
 }
 
-bool is_client_associated_to_ibss(struct adapter *padapter)
+bool r8188eu_is_client_associated_to_ibss(struct adapter *padapter)
 {
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
@@ -359,7 +359,7 @@ bool is_client_associated_to_ibss(struct adapter *padapter)
 	return false;
 }
 
-bool is_IBSS_empty(struct adapter *padapter)
+bool r8188eu_is_ibss_empty(struct adapter *padapter)
 {
 	unsigned int i;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
@@ -893,7 +893,7 @@ int rtw_check_bcn_info(struct adapter  *Adapter, u8 *pframe, u32 packet_len)
 	unsigned short	ht_cap_info;
 	unsigned char	ht_info_infos_0;
 
-	if (!is_client_associated_to_ap(Adapter))
+	if (!r8188eu_is_client_associated_to_ap(Adapter))
 		return true;
 
 	len = packet_len - sizeof(struct ieee80211_hdr_3addr);
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 9c0af4704607b..c46fc1a530859 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -431,9 +431,9 @@ void update_network(struct wlan_bssid_ex *dst, struct wlan_bssid_ex *src,
 u8 *get_my_bssid(struct wlan_bssid_ex *pnetwork);
 u16 get_beacon_interval(struct wlan_bssid_ex *bss);
 
-bool is_client_associated_to_ap(struct adapter *padapter);
-bool is_client_associated_to_ibss(struct adapter *padapter);
-bool is_IBSS_empty(struct adapter *padapter);
+bool r8188eu_is_client_associated_to_ap(struct adapter *padapter);
+bool r8188eu_is_client_associated_to_ibss(struct adapter *padapter);
+bool r8188eu_is_ibss_empty(struct adapter *padapter);
 
 unsigned char check_assoc_AP(u8 *pframe, uint len);
 
-- 
GitLab


From 9b35a6926c613457e0dce179871028a0aa6efb06 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Fri, 11 Nov 2022 09:37:30 +0100
Subject: [PATCH 1738/4122] staging: r8188eu: convert aes_cipher() to void

The function aes_cipher() returns always _SUCCESS and its callers
do not use the return value. So we can convert the return type to
void and get rid of another use of _SUCCESS.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221111083733.3144-2-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_security.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_security.c b/drivers/staging/r8188eu/core/rtw_security.c
index 5bba57d18b5fc..780019ce1b983 100644
--- a/drivers/staging/r8188eu/core/rtw_security.c
+++ b/drivers/staging/r8188eu/core/rtw_security.c
@@ -954,7 +954,7 @@ static void bitwise_xor(u8 *ina, u8 *inb, u8 *out)
 
 }
 
-static int aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen)
+static void aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen)
 {
 	uint	qc_exists, a4_exists, i, j, payload_remainder,
 		num_blocks, payload_index;
@@ -1083,8 +1083,6 @@ static int aes_cipher(u8 *key, uint hdrlen, u8 *pframe, uint plen)
 	bitwise_xor(aes_out, padded_buffer, chain_buffer);
 	for (j = 0; j < 8; j++)
 		pframe[payload_index++] = chain_buffer[j];
-
-	return _SUCCESS;
 }
 
 u32 rtw_aes_encrypt(struct adapter *padapter, struct xmit_frame *pxmitframe)
-- 
GitLab


From 8985814bb3732aaf7ce5bf892d2827bd4d854fc7 Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Fri, 11 Nov 2022 09:37:31 +0100
Subject: [PATCH 1739/4122] staging: r8188eu: convert rtw_xmit_resource_alloc()
 to common error logic

Convert the function rtw_xmit_resource_alloc() away from returning
_FAIL or _SUCCESS which uses inverted error logic. Use the common
error logic instead. Return 0 for success and negative values for
failure.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221111083733.3144-3-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_xmit.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index bd6c1a401c590..c0fda6ce7c56c 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -38,7 +38,7 @@ static int rtw_xmit_resource_alloc(struct adapter *padapter, struct xmit_buf *px
 {
 	pxmitbuf->pallocated_buf = kzalloc(alloc_sz, GFP_KERNEL);
 	if (!pxmitbuf->pallocated_buf)
-		return _FAIL;
+		return -ENOMEM;
 
 	pxmitbuf->pbuf = (u8 *)ALIGN((size_t)(pxmitbuf->pallocated_buf), XMITBUF_ALIGN_SZ);
 	pxmitbuf->dma_transfer_addr = 0;
@@ -46,10 +46,10 @@ static int rtw_xmit_resource_alloc(struct adapter *padapter, struct xmit_buf *px
 	pxmitbuf->pxmit_urb = usb_alloc_urb(0, GFP_KERNEL);
 	if (!pxmitbuf->pxmit_urb) {
 		kfree(pxmitbuf->pallocated_buf);
-		return _FAIL;
+		return -ENOMEM;
 	}
 
-	return _SUCCESS;
+	return 0;
 }
 
 static void rtw_xmit_resource_free(struct adapter *padapter, struct xmit_buf *pxmitbuf,
@@ -151,12 +151,12 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 		pxmitbuf->ext_tag = false;
 
 		/* Tx buf allocation may fail sometimes, so sleep and retry. */
-		res = rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ));
-		if (res == _FAIL) {
+		if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) {
 			msleep(10);
-			res = rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ));
-			if (res == _FAIL)
+			if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) {
+				res = _FAIL;
 				goto free_xmitbuf;
+			}
 		}
 
 		pxmitbuf->flags = XMIT_VO_QUEUE;
@@ -188,8 +188,7 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 		pxmitbuf->padapter = padapter;
 		pxmitbuf->ext_tag = true;
 
-		res = rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ);
-		if (res == _FAIL) {
+		if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ)) {
 			res = _FAIL;
 			goto free_xmit_extbuf;
 		}
-- 
GitLab


From 64ce3acd0a50b3ad7ff345fe11bbb3d39cab216b Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Fri, 11 Nov 2022 09:37:32 +0100
Subject: [PATCH 1740/4122] staging: r8188eu: convert _rtw_init_xmit_priv() to
 common error logic

Convert the function _rtw_init_xmit_priv() away from returning _FAIL
or _SUCCESS which uses inverted error logic. Use the common error
logic instead. Return 0 for success and negative values for failure.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221111083733.3144-4-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_xmit.c    | 28 +++++++---------------
 drivers/staging/r8188eu/include/rtw_xmit.h |  2 +-
 drivers/staging/r8188eu/os_dep/os_intfs.c  |  2 +-
 3 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_xmit.c b/drivers/staging/r8188eu/core/rtw_xmit.c
index c0fda6ce7c56c..34494f08c0cdf 100644
--- a/drivers/staging/r8188eu/core/rtw_xmit.c
+++ b/drivers/staging/r8188eu/core/rtw_xmit.c
@@ -59,12 +59,11 @@ static void rtw_xmit_resource_free(struct adapter *padapter, struct xmit_buf *px
 	kfree(pxmitbuf->pallocated_buf);
 }
 
-s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
+int _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 {
 	int i;
 	struct xmit_buf *pxmitbuf;
 	struct xmit_frame *pxframe;
-	int	res = _SUCCESS;
 	u32 max_xmit_extbuf_size = MAX_XMIT_EXTBUF_SZ;
 	u32 num_xmit_extbuf = NR_XMIT_EXTBUFF;
 
@@ -97,7 +96,6 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 
 	if (!pxmitpriv->pallocated_frame_buf) {
 		pxmitpriv->pxmit_frame_buf = NULL;
-		res = _FAIL;
 		goto exit;
 	}
 	pxmitpriv->pxmit_frame_buf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_frame_buf), 4);
@@ -132,10 +130,8 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 
 	pxmitpriv->pallocated_xmitbuf = vzalloc(NR_XMITBUFF * sizeof(struct xmit_buf) + 4);
 
-	if (!pxmitpriv->pallocated_xmitbuf) {
-		res = _FAIL;
+	if (!pxmitpriv->pallocated_xmitbuf)
 		goto free_frame_buf;
-	}
 
 	pxmitpriv->pxmitbuf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_xmitbuf), 4);
 	/* pxmitpriv->pxmitbuf = pxmitpriv->pallocated_xmitbuf + 4 - */
@@ -153,10 +149,8 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 		/* Tx buf allocation may fail sometimes, so sleep and retry. */
 		if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) {
 			msleep(10);
-			if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ))) {
-				res = _FAIL;
+			if (rtw_xmit_resource_alloc(padapter, pxmitbuf, (MAX_XMITBUF_SZ + XMITBUF_ALIGN_SZ)))
 				goto free_xmitbuf;
-			}
 		}
 
 		pxmitbuf->flags = XMIT_VO_QUEUE;
@@ -172,10 +166,8 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 
 	pxmitpriv->pallocated_xmit_extbuf = vzalloc(num_xmit_extbuf * sizeof(struct xmit_buf) + 4);
 
-	if (!pxmitpriv->pallocated_xmit_extbuf) {
-		res = _FAIL;
+	if (!pxmitpriv->pallocated_xmit_extbuf)
 		goto free_xmitbuf;
-	}
 
 	pxmitpriv->pxmit_extbuf = (u8 *)ALIGN((size_t)(pxmitpriv->pallocated_xmit_extbuf), 4);
 
@@ -188,10 +180,8 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 		pxmitbuf->padapter = padapter;
 		pxmitbuf->ext_tag = true;
 
-		if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ)) {
-			res = _FAIL;
+		if (rtw_xmit_resource_alloc(padapter, pxmitbuf, max_xmit_extbuf_size + XMITBUF_ALIGN_SZ))
 			goto free_xmit_extbuf;
-		}
 
 		list_add_tail(&pxmitbuf->list, &pxmitpriv->free_xmit_extbuf_queue.queue);
 		pxmitbuf++;
@@ -199,10 +189,8 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 
 	pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
 
-	if (rtw_alloc_hwxmits(padapter)) {
-		res = _FAIL;
+	if (rtw_alloc_hwxmits(padapter))
 		goto free_xmit_extbuf;
-	}
 
 	rtw_init_hwxmits(pxmitpriv->hwxmits, pxmitpriv->hwxmit_entry);
 
@@ -225,7 +213,7 @@ s32	_rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter)
 
 	rtl8188eu_init_xmit_priv(padapter);
 
-	return _SUCCESS;
+	return 0;
 
 free_xmit_extbuf:
 	pxmitbuf = (struct xmit_buf *)pxmitpriv->pxmit_extbuf;
@@ -245,7 +233,7 @@ free_xmitbuf:
 free_frame_buf:
 	vfree(pxmitpriv->pallocated_frame_buf);
 exit:
-	return res;
+	return -ENOMEM;
 }
 
 static void rtw_pkt_complete(struct adapter *padapter, struct sk_buff *pkt)
diff --git a/drivers/staging/r8188eu/include/rtw_xmit.h b/drivers/staging/r8188eu/include/rtw_xmit.h
index cff065554608d..6e7ebea5362d8 100644
--- a/drivers/staging/r8188eu/include/rtw_xmit.h
+++ b/drivers/staging/r8188eu/include/rtw_xmit.h
@@ -351,7 +351,7 @@ s32 rtw_txframes_pending(struct adapter *padapter);
 s32 rtw_txframes_sta_ac_pending(struct adapter *padapter,
 				struct pkt_attrib *pattrib);
 void rtw_init_hwxmits(struct hw_xmit *phwxmit, int entry);
-s32 _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter);
+int _rtw_init_xmit_priv(struct xmit_priv *pxmitpriv, struct adapter *padapter);
 void _rtw_free_xmit_priv(struct xmit_priv *pxmitpriv);
 int rtw_alloc_hwxmits(struct adapter *padapter);
 void rtw_free_hwxmits(struct adapter *padapter);
diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 66556e07ed93d..44eb95a7682c8 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -480,7 +480,7 @@ u8 rtw_init_drv_sw(struct adapter *padapter)
 
 	init_mlme_ext_priv(padapter);
 
-	if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter) == _FAIL) {
+	if (_rtw_init_xmit_priv(&padapter->xmitpriv, padapter)) {
 		dev_err(dvobj_to_dev(padapter->dvobj), "_rtw_init_xmit_priv failed\n");
 		goto free_mlme_ext;
 	}
-- 
GitLab


From 7209757311d6eec4c531feba31b170c33122491e Mon Sep 17 00:00:00 2001
From: Michael Straube <straube.linux@gmail.com>
Date: Fri, 11 Nov 2022 09:37:33 +0100
Subject: [PATCH 1741/4122] staging: r8188eu: binstallGrpkey is set to _FAIL

The variable binstallGrpkey is set to _FAIL which is defined as 0.
Use false to set the variable to get rid of another use of _FAIL.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Michael Straube <straube.linux@gmail.com>
Link: https://lore.kernel.org/r/20221111083733.3144-5-straube.linux@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/os_dep/os_intfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/os_dep/os_intfs.c b/drivers/staging/r8188eu/os_dep/os_intfs.c
index 44eb95a7682c8..2f59bb9947960 100644
--- a/drivers/staging/r8188eu/os_dep/os_intfs.c
+++ b/drivers/staging/r8188eu/os_dep/os_intfs.c
@@ -405,7 +405,7 @@ static void rtw_init_default_value(struct adapter *padapter)
 	pmlmepriv->htpriv.ampdu_enable = false;/* set to disabled */
 
 	/* security_priv */
-	psecuritypriv->binstallGrpkey = _FAIL;
+	psecuritypriv->binstallGrpkey = false;
 	psecuritypriv->sw_encrypt = pregistrypriv->software_encrypt;
 	psecuritypriv->sw_decrypt = pregistrypriv->software_decrypt;
 	psecuritypriv->dot11AuthAlgrthm = dot11AuthAlgrthm_Open; /* open system */
-- 
GitLab


From b397fc48c5d9b70ad17ffeaf8607a4f49055c0d9 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:34:54 +0100
Subject: [PATCH 1742/4122] staging: rtl8192e: Rename bTxDisableRate..,
 RegMaxLPSAwa.. and bTxUseD..

Rename variable bTxDisableRateFallBack to tx_dis_rate_fallback,
RegMaxLPSAwakeIntvl to reg_max_lps_awake_intvl and
bTxUseDriverAssingedRate to tx_use_drv_assinged_rate to avoid CamelCase
which is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/092eb2dc73d37daf851ea9ef9cb7e4df6f766845.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_dev.c    |  4 ++--
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  | 10 ++++----
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    |  2 +-
 drivers/staging/rtl8192e/rtl819x_HTProc.c     |  4 ++--
 drivers/staging/rtl8192e/rtllib.h             | 10 ++++----
 drivers/staging/rtl8192e/rtllib_softmac.c     | 14 +++++------
 drivers/staging/rtl8192e/rtllib_tx.c          | 24 +++++++++----------
 7 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 210b7ecc273ab..39aac83d89604 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1136,8 +1136,8 @@ void  rtl92e_fill_tx_desc(struct net_device *dev, struct tx_desc *pdesc,
 							  cb_desc->priority);
 	pdesc->TxFWInfoSize = sizeof(struct tx_fwinfo_8190pci);
 
-	pdesc->DISFB = cb_desc->bTxDisableRateFallBack;
-	pdesc->USERATE = cb_desc->bTxUseDriverAssingedRate;
+	pdesc->DISFB = cb_desc->tx_dis_rate_fallback;
+	pdesc->USERATE = cb_desc->tx_use_drv_assinged_rate;
 
 	pdesc->FirstSeg = 1;
 	pdesc->LastSeg = 1;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 213aac943ef76..952fe8d7a7be4 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -494,8 +494,8 @@ static void _rtl92e_prepare_beacon(struct tasklet_struct *t)
 	tcb_desc->queue_index = BEACON_QUEUE;
 	tcb_desc->data_rate = 2;
 	tcb_desc->RATRIndex = 7;
-	tcb_desc->bTxDisableRateFallBack = 1;
-	tcb_desc->bTxUseDriverAssingedRate = 1;
+	tcb_desc->tx_dis_rate_fallback = 1;
+	tcb_desc->tx_use_drv_assinged_rate = 1;
 	skb_push(pnewskb, priv->rtllib->tx_headroom);
 
 	pdesc = &ring->desc[0];
@@ -822,7 +822,7 @@ static void _rtl92e_init_priv_constant(struct net_device *dev)
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->PowerSaveControl;
 
-	pPSC->RegMaxLPSAwakeIntvl = 5;
+	pPSC->reg_max_lps_awake_intvl = 5;
 }
 
 static void _rtl92e_init_priv_variable(struct net_device *dev)
@@ -1538,8 +1538,8 @@ static int _rtl92e_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	tcb_desc->RATRIndex = 7;
-	tcb_desc->bTxDisableRateFallBack = 1;
-	tcb_desc->bTxUseDriverAssingedRate = 1;
+	tcb_desc->tx_dis_rate_fallback = 1;
+	tcb_desc->tx_use_drv_assinged_rate = 1;
 	tcb_desc->bTxEnableFwCalcDur = 1;
 	skb_push(skb, priv->rtllib->tx_headroom);
 	ret = _rtl92e_tx(dev, skb);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index 12eea4fcb9dde..ffef63e8dcaee 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -196,7 +196,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev,
 	netdev_info(dev, "%s(): set lps awake interval ! extra is %d\n",
 		    __func__, *extra);
 
-	pPSC->RegMaxLPSAwakeIntvl = *extra;
+	pPSC->reg_max_lps_awake_intvl = *extra;
 	mutex_unlock(&priv->wx_mutex);
 	return 0;
 }
diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 2c0a8d5c8f279..7fad983ad4be1 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -91,8 +91,8 @@ void HTUpdateDefaultSetting(struct rtllib_device *ieee)
 	pHTInfo->self_mimo_ps = 3;
 	if (pHTInfo->self_mimo_ps == 2)
 		pHTInfo->self_mimo_ps = 3;
-	ieee->bTxDisableRateFallBack = 0;
-	ieee->bTxUseDriverAssingedRate = 0;
+	ieee->tx_dis_rate_fallback = 0;
+	ieee->tx_use_drv_assinged_rate = 0;
 
 	ieee->bTxEnableFwCalcDur = 1;
 
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index e0fa87b12cebb..ca9e016ffa9fe 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -126,8 +126,8 @@ struct cb_desc {
 	u8 bFirstSeg:1;
 	u8 bLastSeg:1;
 	u8 bEncrypt:1;
-	u8 bTxDisableRateFallBack:1;
-	u8 bTxUseDriverAssingedRate:1;
+	u8 tx_dis_rate_fallback:1;
+	u8 tx_use_drv_assinged_rate:1;
 	u8 bHwSec:1;
 
 	u8 nStuckCount;
@@ -1256,7 +1256,7 @@ struct rt_pwr_save_ctrl {
 
 	bool				bLeisurePs;
 	u8				LpsIdleCount;
-	u8				RegMaxLPSAwakeIntvl;
+	u8				reg_max_lps_awake_intvl;
 	u8				LPSAwakeIntvl;
 
 	u32				CurPsLevel;
@@ -1441,8 +1441,8 @@ struct rtllib_device {
 	u8	RegHTSuppRateSet[16];
 	u8	HTCurrentOperaRate;
 	u8	HTHighestOperaRate;
-	u8	bTxDisableRateFallBack;
-	u8	bTxUseDriverAssingedRate;
+	u8	tx_dis_rate_fallback;
+	u8	tx_use_drv_assinged_rate;
 	u8	bTxEnableFwCalcDur;
 	atomic_t	atm_swbw;
 
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 1253de4818053..604d4fd7e2cde 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -220,8 +220,8 @@ inline void softmac_mgmt_xmit(struct sk_buff *skb, struct rtllib_device *ieee)
 
 	tcb_desc->data_rate = MgntQuery_MgntFrameTxRate(ieee);
 	tcb_desc->RATRIndex = 7;
-	tcb_desc->bTxDisableRateFallBack = 1;
-	tcb_desc->bTxUseDriverAssingedRate = 1;
+	tcb_desc->tx_dis_rate_fallback = 1;
+	tcb_desc->tx_use_drv_assinged_rate = 1;
 	if (single) {
 		if (ieee->queue_stop) {
 			enqueue_mgmt(ieee, skb);
@@ -298,8 +298,8 @@ softmac_ps_mgmt_xmit(struct sk_buff *skb,
 
 	tcb_desc->data_rate = MgntQuery_MgntFrameTxRate(ieee);
 	tcb_desc->RATRIndex = 7;
-	tcb_desc->bTxDisableRateFallBack = 1;
-	tcb_desc->bTxUseDriverAssingedRate = 1;
+	tcb_desc->tx_dis_rate_fallback = 1;
+	tcb_desc->tx_use_drv_assinged_rate = 1;
 	if (single) {
 		if (type != RTLLIB_FTYPE_CTL) {
 			header->seq_ctl = cpu_to_le16(ieee->seq_ctrl[0] << 4);
@@ -1996,12 +1996,12 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time)
 
 			if (pPSC->LPSAwakeIntvl == 0)
 				pPSC->LPSAwakeIntvl = 1;
-			if (pPSC->RegMaxLPSAwakeIntvl == 0)
+			if (pPSC->reg_max_lps_awake_intvl == 0)
 				MaxPeriod = 1;
-			else if (pPSC->RegMaxLPSAwakeIntvl == 0xFF)
+			else if (pPSC->reg_max_lps_awake_intvl == 0xFF)
 				MaxPeriod = ieee->current_network.dtim_period;
 			else
-				MaxPeriod = pPSC->RegMaxLPSAwakeIntvl;
+				MaxPeriod = pPSC->reg_max_lps_awake_intvl;
 			pPSC->LPSAwakeIntvl = (pPSC->LPSAwakeIntvl >=
 					       MaxPeriod) ? MaxPeriod :
 					       (pPSC->LPSAwakeIntvl + 1);
diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index 101f441291451..be3779efcd233 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -477,13 +477,13 @@ NO_PROTECTION:
 static void rtllib_txrate_selectmode(struct rtllib_device *ieee,
 				     struct cb_desc *tcb_desc)
 {
-	if (ieee->bTxDisableRateFallBack)
-		tcb_desc->bTxDisableRateFallBack = true;
+	if (ieee->tx_dis_rate_fallback)
+		tcb_desc->tx_dis_rate_fallback = true;
 
-	if (ieee->bTxUseDriverAssingedRate)
-		tcb_desc->bTxUseDriverAssingedRate = true;
-	if (!tcb_desc->bTxDisableRateFallBack ||
-	    !tcb_desc->bTxUseDriverAssingedRate) {
+	if (ieee->tx_use_drv_assinged_rate)
+		tcb_desc->tx_use_drv_assinged_rate = true;
+	if (!tcb_desc->tx_dis_rate_fallback ||
+	    !tcb_desc->tx_use_drv_assinged_rate) {
 		if (ieee->iw_mode == IW_MODE_INFRA ||
 		    ieee->iw_mode == IW_MODE_ADHOC)
 			tcb_desc->RATRIndex = 0;
@@ -886,14 +886,14 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 			    HT_IOT_ACT_WA_IOT_Broadcom) {
 				tcb_desc->data_rate =
 					 MgntQuery_TxRateExcludeCCKRates(ieee);
-				tcb_desc->bTxDisableRateFallBack = false;
+				tcb_desc->tx_dis_rate_fallback = false;
 			} else {
 				tcb_desc->data_rate = ieee->basic_rate;
-				tcb_desc->bTxDisableRateFallBack = 1;
+				tcb_desc->tx_dis_rate_fallback = 1;
 			}
 
 			tcb_desc->RATRIndex = 7;
-			tcb_desc->bTxUseDriverAssingedRate = 1;
+			tcb_desc->tx_use_drv_assinged_rate = 1;
 		} else {
 			if (is_multicast_ether_addr(header.addr1))
 				tcb_desc->bMulticast = 1;
@@ -910,14 +910,14 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 				    HT_IOT_ACT_WA_IOT_Broadcom) {
 					tcb_desc->data_rate =
 					   MgntQuery_TxRateExcludeCCKRates(ieee);
-					tcb_desc->bTxDisableRateFallBack = false;
+					tcb_desc->tx_dis_rate_fallback = false;
 				} else {
 					tcb_desc->data_rate = MGN_1M;
-					tcb_desc->bTxDisableRateFallBack = 1;
+					tcb_desc->tx_dis_rate_fallback = 1;
 				}
 
 				tcb_desc->RATRIndex = 7;
-				tcb_desc->bTxUseDriverAssingedRate = 1;
+				tcb_desc->tx_use_drv_assinged_rate = 1;
 				tcb_desc->bdhcp = 1;
 			}
 
-- 
GitLab


From c7bf6d33d095637fb78ba484a181d17bc1a73a68 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:12 +0100
Subject: [PATCH 1743/4122] staging: rtl8192e: Rename Regdot11HTOper..,
 bSupportM.. and PowerSaveCo..

Rename variable Regdot11HTOperationalRateSet to
reg_dot11ht_oper_rate_set, bSupportMode to support_mode and
PowerSaveControl to pwr_save_ctrl to avoid CamelCase which is not accepted
by checkpatch. Fix unnecessary parentheses warning from checkpatch when
used with this variables.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/1ef7fd22b4a037c4d1f8685065ce7916b6f0930b.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_phy.c    |  2 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  | 30 +++++++++----------
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c    | 10 +++----
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    |  4 +--
 drivers/staging/rtl8192e/rtl819x_HTProc.c     |  2 +-
 drivers/staging/rtl8192e/rtllib.h             |  4 +--
 drivers/staging/rtl8192e/rtllib_softmac.c     | 10 +++----
 7 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index 58da2dab55bd6..b14497423cbf7 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -1304,7 +1304,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&(priv->rtllib->PowerSaveControl));
+					(&priv->rtllib->pwr_save_ctrl);
 	bool bResult = true;
 	u8	i = 0, QueueID = 0;
 	struct rtl8192_tx_ring  *ring = NULL;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 952fe8d7a7be4..16e15b8e1b7a6 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -607,13 +607,13 @@ static void _rtl92e_refresh_support_rate(struct r8192_priv *priv)
 
 	if (ieee->mode == WIRELESS_MODE_N_24G ||
 	    ieee->mode == WIRELESS_MODE_N_5G) {
-		memcpy(ieee->Regdot11HTOperationalRateSet,
+		memcpy(ieee->reg_dot11ht_oper_rate_set,
 		       ieee->RegHTSuppRateSet, 16);
 		memcpy(ieee->Regdot11TxHTOperationalRateSet,
 		       ieee->RegHTSuppRateSet, 16);
 
 	} else {
-		memset(ieee->Regdot11HTOperationalRateSet, 0, 16);
+		memset(ieee->reg_dot11ht_oper_rate_set, 0, 16);
 	}
 }
 
@@ -642,19 +642,19 @@ static u8 _rtl92e_get_supported_wireless_mode(struct net_device *dev)
 void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	u8 bSupportMode = _rtl92e_get_supported_wireless_mode(dev);
+	u8 support_mode = _rtl92e_get_supported_wireless_mode(dev);
 
 	if ((wireless_mode == WIRELESS_MODE_AUTO) ||
-	    ((wireless_mode & bSupportMode) == 0)) {
-		if (bSupportMode & WIRELESS_MODE_N_24G) {
+	    ((wireless_mode & support_mode) == 0)) {
+		if (support_mode & WIRELESS_MODE_N_24G) {
 			wireless_mode = WIRELESS_MODE_N_24G;
-		} else if (bSupportMode & WIRELESS_MODE_N_5G) {
+		} else if (support_mode & WIRELESS_MODE_N_5G) {
 			wireless_mode = WIRELESS_MODE_N_5G;
-		} else if ((bSupportMode & WIRELESS_MODE_A)) {
+		} else if ((support_mode & WIRELESS_MODE_A)) {
 			wireless_mode = WIRELESS_MODE_A;
-		} else if ((bSupportMode & WIRELESS_MODE_G)) {
+		} else if ((support_mode & WIRELESS_MODE_G)) {
 			wireless_mode = WIRELESS_MODE_G;
-		} else if ((bSupportMode & WIRELESS_MODE_B)) {
+		} else if ((support_mode & WIRELESS_MODE_B)) {
 			wireless_mode = WIRELESS_MODE_B;
 		} else {
 			netdev_info(dev,
@@ -683,7 +683,7 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&priv->rtllib->PowerSaveControl);
+					(&priv->rtllib->pwr_save_ctrl);
 	bool init_status;
 
 	priv->bdisable_nic = false;
@@ -820,7 +820,7 @@ static void _rtl92e_init_priv_constant(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&priv->rtllib->PowerSaveControl;
+					&priv->rtllib->pwr_save_ctrl;
 
 	pPSC->reg_max_lps_awake_intvl = 5;
 }
@@ -877,7 +877,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rf_change_in_progress = false;
 	priv->bHwRfOffAction = 0;
 	priv->SetRFPowerStateInProgress = false;
-	priv->rtllib->PowerSaveControl.bLeisurePs = true;
+	priv->rtllib->pwr_save_ctrl.bLeisurePs = true;
 	priv->rtllib->LPSDelayCnt = 0;
 	priv->rtllib->sta_sleep = LPS_IS_WAKE;
 	priv->rtllib->rf_power_state = rf_on;
@@ -1272,7 +1272,7 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	static u8 check_reset_cnt;
 	unsigned long flags;
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&priv->rtllib->PowerSaveControl);
+					(&priv->rtllib->pwr_save_ctrl);
 	bool bBusyTraffic = false;
 	bool	bHigherBusyTraffic = false;
 	bool	bHigherBusyRxTraffic = false;
@@ -1295,7 +1295,7 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 		     RTLLIB_NOLINK) &&
 		     (ieee->rf_power_state == rf_on) && !ieee->is_set_key &&
 		     (!ieee->proto_stoppping) && !ieee->wx_set_enc) {
-			if ((ieee->PowerSaveControl.ReturnPoint ==
+			if ((ieee->pwr_save_ctrl.ReturnPoint ==
 			     IPS_CALLBACK_NONE) &&
 			     (!ieee->bNetPromiscuousMode)) {
 				rtl92e_ips_enter(dev);
@@ -2422,7 +2422,7 @@ bool rtl92e_enable_nic(struct net_device *dev)
 	bool init_status = true;
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&priv->rtllib->PowerSaveControl);
+					(&priv->rtllib->pwr_save_ctrl);
 
 	if (!priv->up) {
 		netdev_warn(dev, "%s(): Driver is already down!\n", __func__);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index 1501f7be8eee9..8a6b7f58ebfdd 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -101,7 +101,7 @@ static void _rtl92e_ps_update_rf_state(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&(priv->rtllib->PowerSaveControl);
+					&priv->rtllib->pwr_save_ctrl;
 
 	pPSC->bSwRfProcessing = true;
 	rtl92e_set_rf_state(dev, pPSC->eInactivePowerState, RF_CHANGE_BY_IPS);
@@ -113,7 +113,7 @@ void rtl92e_ips_enter(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&(priv->rtllib->PowerSaveControl);
+					&priv->rtllib->pwr_save_ctrl;
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
@@ -129,7 +129,7 @@ void rtl92e_ips_leave(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&(priv->rtllib->PowerSaveControl);
+					&priv->rtllib->pwr_save_ctrl;
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
@@ -207,7 +207,7 @@ void rtl92e_leisure_ps_enter(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&(priv->rtllib->PowerSaveControl);
+					&priv->rtllib->pwr_save_ctrl;
 
 	if (!((priv->rtllib->iw_mode == IW_MODE_INFRA) &&
 	    (priv->rtllib->state == RTLLIB_LINKED))
@@ -233,7 +233,7 @@ void rtl92e_leisure_ps_leave(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					&(priv->rtllib->PowerSaveControl);
+					&priv->rtllib->pwr_save_ctrl;
 
 	if (pPSC->bLeisurePs) {
 		if (priv->rtllib->ps != RTLLIB_PS_DISABLED) {
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index ffef63e8dcaee..d68f8f5902c8d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -160,7 +160,7 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev,
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&priv->rtllib->PowerSaveControl);
+					(&priv->rtllib->pwr_save_ctrl);
 	struct rtllib_device *ieee = priv->rtllib;
 
 	mutex_lock(&priv->wx_mutex);
@@ -189,7 +189,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev,
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
-					(&priv->rtllib->PowerSaveControl);
+					(&priv->rtllib->pwr_save_ctrl);
 
 	mutex_lock(&priv->wx_mutex);
 
diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 7fad983ad4be1..52f41562dd824 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -323,7 +323,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap,
 		pCapELE->MPDUDensity	= 0;
 	}
 
-	memcpy(pCapELE->MCS, ieee->Regdot11HTOperationalRateSet, 16);
+	memcpy(pCapELE->MCS, ieee->reg_dot11ht_oper_rate_set, 16);
 	memset(&pCapELE->ExtHTCapInfo, 0, 2);
 	memset(pCapELE->TxBFCap, 0, 4);
 
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index ca9e016ffa9fe..d80883b211cea 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1435,7 +1435,7 @@ struct rtllib_device {
 	struct rt_hi_throughput *pHTInfo;
 
 	spinlock_t reorder_spinlock;
-	u8	Regdot11HTOperationalRateSet[16];
+	u8	reg_dot11ht_oper_rate_set[16];
 	u8	Regdot11TxHTOperationalRateSet[16];
 	u8	dot11HTOperationalRateSet[16];
 	u8	RegHTSuppRateSet[16];
@@ -1645,7 +1645,7 @@ struct rtllib_device {
 
 	struct rt_link_detect LinkDetectInfo;
 	bool bIsAggregateFrame;
-	struct rt_pwr_save_ctrl PowerSaveControl;
+	struct rt_pwr_save_ctrl pwr_save_ctrl;
 
 	/* used if IEEE_SOFTMAC_TX_QUEUE is set */
 	struct tx_pending tx_pending;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 604d4fd7e2cde..498febc88b15e 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1504,7 +1504,7 @@ static void rtllib_associate_complete_wq(void *data)
 				     container_of_work_rsl(data,
 				     struct rtllib_device,
 				     associate_complete_wq);
-	struct rt_pwr_save_ctrl *pPSC = &(ieee->PowerSaveControl);
+	struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl;
 
 	netdev_info(ieee->dev, "Associated successfully with %pM\n",
 		    ieee->current_network.bssid);
@@ -1960,7 +1960,7 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time)
 {
 	int timeout;
 	u8 dtim;
-	struct rt_pwr_save_ctrl *pPSC = &(ieee->PowerSaveControl);
+	struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl;
 
 	if (ieee->LPSDelayCnt) {
 		ieee->LPSDelayCnt--;
@@ -2984,9 +2984,9 @@ int rtllib_softmac_init(struct rtllib_device *ieee)
 	ieee->ps = RTLLIB_PS_DISABLED;
 	ieee->sta_sleep = LPS_IS_WAKE;
 
-	ieee->Regdot11HTOperationalRateSet[0] = 0xff;
-	ieee->Regdot11HTOperationalRateSet[1] = 0xff;
-	ieee->Regdot11HTOperationalRateSet[4] = 0x01;
+	ieee->reg_dot11ht_oper_rate_set[0] = 0xff;
+	ieee->reg_dot11ht_oper_rate_set[1] = 0xff;
+	ieee->reg_dot11ht_oper_rate_set[4] = 0x01;
 
 	ieee->Regdot11TxHTOperationalRateSet[0] = 0xff;
 	ieee->Regdot11TxHTOperationalRateSet[1] = 0xff;
-- 
GitLab


From ca25401e21ab40539a5ed4b86aecc3698f89b86f Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:27 +0100
Subject: [PATCH 1744/4122] staging: rtl8192e: Rename Regdot11TxHT..,
 dot11HTOpera.. and RegHTSuppRa..

Rename variable Regdot11TxHTOperationalRateSet to
reg_dot11tx_ht_oper_rate_set, dot11HTOperationalRateSet to
dot11ht_oper_rate_set and RegHTSuppRateSet to reg_ht_supp_rate_set to
avoid CamelCase which is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/0c87e7ffc94be1c26f6400f5e12419f2df0418a3.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c |  2 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   |  6 +++---
 drivers/staging/rtl8192e/rtl819x_HTProc.c      | 14 +++++++-------
 drivers/staging/rtl8192e/rtllib.h              |  6 +++---
 drivers/staging/rtl8192e/rtllib_softmac.c      | 10 +++++-----
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 39aac83d89604..bbaf20fbfde05 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1958,7 +1958,7 @@ void rtl92e_update_ratr_table(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	struct rtllib_device *ieee = priv->rtllib;
-	u8 *pMcsRate = ieee->dot11HTOperationalRateSet;
+	u8 *pMcsRate = ieee->dot11ht_oper_rate_set;
 	u32 ratr_value = 0;
 	u16 rate_config = 0;
 	u8 rate_index = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 16e15b8e1b7a6..d5eec6d6b644f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -608,9 +608,9 @@ static void _rtl92e_refresh_support_rate(struct r8192_priv *priv)
 	if (ieee->mode == WIRELESS_MODE_N_24G ||
 	    ieee->mode == WIRELESS_MODE_N_5G) {
 		memcpy(ieee->reg_dot11ht_oper_rate_set,
-		       ieee->RegHTSuppRateSet, 16);
-		memcpy(ieee->Regdot11TxHTOperationalRateSet,
-		       ieee->RegHTSuppRateSet, 16);
+		       ieee->reg_ht_supp_rate_set, 16);
+		memcpy(ieee->reg_dot11tx_ht_oper_rate_set,
+		       ieee->reg_ht_supp_rate_set, 16);
 
 	} else {
 		memset(ieee->reg_dot11ht_oper_rate_set, 0, 16);
diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 52f41562dd824..2092ebf717a1f 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -489,7 +489,7 @@ static u8 HTFilterMCSRate(struct rtllib_device *ieee, u8 *pSupportMCS,
 	u8 i;
 
 	for (i = 0; i <= 15; i++)
-		pOperateMCS[i] = ieee->Regdot11TxHTOperationalRateSet[i] &
+		pOperateMCS[i] = ieee->reg_dot11tx_ht_oper_rate_set[i] &
 				 pSupportMCS[i];
 
 	HT_PickMCSRate(ieee, pOperateMCS);
@@ -604,7 +604,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 
 	HTIOTActDetermineRaFunc(ieee, ((pPeerHTCap->MCS[1]) != 0));
 
-	HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11HTOperationalRateSet);
+	HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11ht_oper_rate_set);
 
 	pHTInfo->peer_mimo_ps = pPeerHTCap->MimoPwrSave;
 	if (pHTInfo->peer_mimo_ps == MIMO_PS_STATIC)
@@ -612,7 +612,7 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 	else
 		pMcsFilter = MCS_FILTER_ALL;
 	ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee,
-						       ieee->dot11HTOperationalRateSet,
+						       ieee->dot11ht_oper_rate_set,
 						       pMcsFilter);
 	ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
@@ -661,7 +661,7 @@ void HTInitializeHTInfo(struct rtllib_device *ieee)
 	pHTInfo->iot_ra_func = 0;
 
 	{
-		u8 *RegHTSuppRateSets = &ieee->RegHTSuppRateSet[0];
+		u8 *RegHTSuppRateSets = &ieee->reg_ht_supp_rate_set[0];
 
 		RegHTSuppRateSets[0] = 0xFF;
 		RegHTSuppRateSets[1] = 0xFF;
@@ -796,10 +796,10 @@ void HTUseDefaultSetting(struct rtllib_device *ieee)
 
 		pHTInfo->current_mpdu_density = pHTInfo->current_mpdu_density;
 
-		HTFilterMCSRate(ieee, ieee->Regdot11TxHTOperationalRateSet,
-				ieee->dot11HTOperationalRateSet);
+		HTFilterMCSRate(ieee, ieee->reg_dot11tx_ht_oper_rate_set,
+				ieee->dot11ht_oper_rate_set);
 		ieee->HTHighestOperaRate = HTGetHighestMCSRate(ieee,
-							       ieee->dot11HTOperationalRateSet,
+							       ieee->dot11ht_oper_rate_set,
 							       MCS_FILTER_ALL);
 		ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index d80883b211cea..487aa337eafa4 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1436,9 +1436,9 @@ struct rtllib_device {
 
 	spinlock_t reorder_spinlock;
 	u8	reg_dot11ht_oper_rate_set[16];
-	u8	Regdot11TxHTOperationalRateSet[16];
-	u8	dot11HTOperationalRateSet[16];
-	u8	RegHTSuppRateSet[16];
+	u8	reg_dot11tx_ht_oper_rate_set[16];
+	u8	dot11ht_oper_rate_set[16];
+	u8	reg_ht_supp_rate_set[16];
 	u8	HTCurrentOperaRate;
 	u8	HTHighestOperaRate;
 	u8	tx_dis_rate_fallback;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 498febc88b15e..09cc3b3068cc8 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1532,7 +1532,7 @@ static void rtllib_associate_complete_wq(void *data)
 			    "Successfully associated, ht not enabled(%d, %d)\n",
 			    ieee->pHTInfo->bCurrentHTSupport,
 			    ieee->pHTInfo->bEnableHT);
-		memset(ieee->dot11HTOperationalRateSet, 0, 16);
+		memset(ieee->dot11ht_oper_rate_set, 0, 16);
 	}
 	ieee->LinkDetectInfo.SlotNum = 2 * (1 +
 				       ieee->current_network.beacon_interval /
@@ -1728,7 +1728,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee,
 					netdev_info(ieee->dev,
 						    "Using B rates\n");
 				}
-				memset(ieee->dot11HTOperationalRateSet, 0, 16);
+				memset(ieee->dot11ht_oper_rate_set, 0, 16);
 				ieee->state = RTLLIB_LINKED;
 			}
 		}
@@ -2988,9 +2988,9 @@ int rtllib_softmac_init(struct rtllib_device *ieee)
 	ieee->reg_dot11ht_oper_rate_set[1] = 0xff;
 	ieee->reg_dot11ht_oper_rate_set[4] = 0x01;
 
-	ieee->Regdot11TxHTOperationalRateSet[0] = 0xff;
-	ieee->Regdot11TxHTOperationalRateSet[1] = 0xff;
-	ieee->Regdot11TxHTOperationalRateSet[4] = 0x01;
+	ieee->reg_dot11tx_ht_oper_rate_set[0] = 0xff;
+	ieee->reg_dot11tx_ht_oper_rate_set[1] = 0xff;
+	ieee->reg_dot11tx_ht_oper_rate_set[4] = 0x01;
 
 	ieee->FirstIe_InScan = false;
 	ieee->actscanning = false;
-- 
GitLab


From ca0298c09a5ec55954c0c0c1585e6a5643b8d3ca Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:34 +0100
Subject: [PATCH 1745/4122] staging: rtl8192e: Rename pPSC

Rename variable pPSC to psc to avoid CamelCase which is not accepted by
checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/47ded8a906e55d6f09b51cd8f2dfb78b7b92c1cc.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_phy.c    | 14 +++----
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  | 16 ++++----
 drivers/staging/rtl8192e/rtl8192e/rtl_ps.c    | 32 ++++++++--------
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    | 10 ++---
 drivers/staging/rtl8192e/rtllib.h             | 10 ++---
 drivers/staging/rtl8192e/rtllib_softmac.c     | 38 +++++++++----------
 6 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index b14497423cbf7..c357adf95a3d0 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -1303,7 +1303,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 				       enum rt_rf_power_state rf_power_state)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 	bool bResult = true;
 	u8	i = 0, QueueID = 0;
@@ -1318,7 +1318,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 		switch (rf_power_state) {
 		case rf_on:
 			if ((priv->rtllib->rf_power_state == rf_off) &&
-			     RT_IN_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC)) {
+			     RT_IN_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC)) {
 				bool rtstatus;
 				u32 InitilizeCount = 3;
 
@@ -1335,7 +1335,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 					return false;
 				}
 
-				RT_CLEAR_PS_LEVEL(pPSC,
+				RT_CLEAR_PS_LEVEL(psc,
 						  RT_RF_OFF_LEVL_HALT_NIC);
 			} else {
 				rtl92e_writeb(dev, ANAPAR, 0x37);
@@ -1399,11 +1399,11 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 					break;
 			}
 
-			if (pPSC->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC &&
-			    !RT_IN_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC)) {
+			if (psc->RegRfPsLevel & RT_RF_OFF_LEVL_HALT_NIC &&
+			    !RT_IN_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC)) {
 				rtl92e_disable_nic(dev);
-				RT_SET_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC);
-			} else if (!(pPSC->RegRfPsLevel &
+				RT_SET_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC);
+			} else if (!(psc->RegRfPsLevel &
 				   RT_RF_OFF_LEVL_HALT_NIC)) {
 				rtl92e_set_rf_off(dev);
 			}
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index d5eec6d6b644f..144bc7c9abd52 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -682,7 +682,7 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode)
 static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 	bool init_status;
 
@@ -700,7 +700,7 @@ static int _rtl92e_sta_up(struct net_device *dev, bool is_silent_reset)
 		return -1;
 	}
 
-	RT_CLEAR_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC);
+	RT_CLEAR_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC);
 	priv->bfirst_init = false;
 
 	if (priv->polling_timer_on == 0)
@@ -819,10 +819,10 @@ static void _rtl92e_init_priv_handler(struct net_device *dev)
 static void _rtl92e_init_priv_constant(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 
-	pPSC->reg_max_lps_awake_intvl = 5;
+	psc->reg_max_lps_awake_intvl = 5;
 }
 
 static void _rtl92e_init_priv_variable(struct net_device *dev)
@@ -1271,7 +1271,7 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	enum reset_type ResetType = RESET_TYPE_NORESET;
 	static u8 check_reset_cnt;
 	unsigned long flags;
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 	bool bBusyTraffic = false;
 	bool	bHigherBusyTraffic = false;
@@ -1389,7 +1389,7 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 
 	spin_lock_irqsave(&priv->tx_lock, flags);
 	if ((check_reset_cnt++ >= 3) && (!ieee->is_roaming) &&
-	    (!priv->rf_change_in_progress) && (!pPSC->bSwRfProcessing)) {
+	    (!priv->rf_change_in_progress) && (!psc->bSwRfProcessing)) {
 		ResetType = _rtl92e_if_check_reset(dev);
 		check_reset_cnt = 3;
 	}
@@ -2421,7 +2421,7 @@ bool rtl92e_enable_nic(struct net_device *dev)
 {
 	bool init_status = true;
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 
 	if (!priv->up) {
@@ -2437,7 +2437,7 @@ bool rtl92e_enable_nic(struct net_device *dev)
 		priv->bdisable_nic = false;
 		return false;
 	}
-	RT_CLEAR_PS_LEVEL(pPSC, RT_RF_OFF_LEVL_HALT_NIC);
+	RT_CLEAR_PS_LEVEL(psc, RT_RF_OFF_LEVL_HALT_NIC);
 	priv->bfirst_init = false;
 
 	rtl92e_irq_enable(dev);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
index 8a6b7f58ebfdd..ef4f736ce3255 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_ps.c
@@ -100,27 +100,27 @@ void rtl92e_enter_sleep(struct net_device *dev, u64 time)
 static void _rtl92e_ps_update_rf_state(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 
-	pPSC->bSwRfProcessing = true;
-	rtl92e_set_rf_state(dev, pPSC->eInactivePowerState, RF_CHANGE_BY_IPS);
+	psc->bSwRfProcessing = true;
+	rtl92e_set_rf_state(dev, psc->eInactivePowerState, RF_CHANGE_BY_IPS);
 
-	pPSC->bSwRfProcessing = false;
+	psc->bSwRfProcessing = false;
 }
 
 void rtl92e_ips_enter(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
-	if (rt_state == rf_on && !pPSC->bSwRfProcessing &&
+	if (rt_state == rf_on && !psc->bSwRfProcessing &&
 		(priv->rtllib->state != RTLLIB_LINKED) &&
 		(priv->rtllib->iw_mode != IW_MODE_MASTER)) {
-		pPSC->eInactivePowerState = rf_off;
+		psc->eInactivePowerState = rf_off;
 		_rtl92e_ps_update_rf_state(dev);
 	}
 }
@@ -128,14 +128,14 @@ void rtl92e_ips_enter(struct net_device *dev)
 void rtl92e_ips_leave(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 	enum rt_rf_power_state rt_state;
 
 	rt_state = priv->rtllib->rf_power_state;
-	if (rt_state != rf_on  && !pPSC->bSwRfProcessing &&
+	if (rt_state != rf_on  && !psc->bSwRfProcessing &&
 	    priv->rtllib->rf_off_reason <= RF_CHANGE_BY_IPS) {
-		pPSC->eInactivePowerState = rf_on;
+		psc->eInactivePowerState = rf_on;
 		_rtl92e_ps_update_rf_state(dev);
 	}
 }
@@ -206,7 +206,7 @@ static bool _rtl92e_ps_set_mode(struct net_device *dev, u8 rtPsMode)
 void rtl92e_leisure_ps_enter(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 
 	if (!((priv->rtllib->iw_mode == IW_MODE_INFRA) &&
@@ -215,8 +215,8 @@ void rtl92e_leisure_ps_enter(struct net_device *dev)
 	    (priv->rtllib->iw_mode == IW_MODE_MASTER))
 		return;
 
-	if (pPSC->bLeisurePs) {
-		if (pPSC->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) {
+	if (psc->bLeisurePs) {
+		if (psc->LpsIdleCount >= RT_CHECK_FOR_HANG_PERIOD) {
 
 			if (priv->rtllib->ps == RTLLIB_PS_DISABLED) {
 				if (priv->rtllib->SetFwCmdHandler)
@@ -225,17 +225,17 @@ void rtl92e_leisure_ps_enter(struct net_device *dev)
 							 RTLLIB_PS_UNICAST);
 			}
 		} else
-			pPSC->LpsIdleCount++;
+			psc->LpsIdleCount++;
 	}
 }
 
 void rtl92e_leisure_ps_leave(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					&priv->rtllib->pwr_save_ctrl;
 
-	if (pPSC->bLeisurePs) {
+	if (psc->bLeisurePs) {
 		if (priv->rtllib->ps != RTLLIB_PS_DISABLED) {
 			_rtl92e_ps_set_mode(dev, RTLLIB_PS_DISABLED);
 			if (priv->rtllib->SetFwCmdHandler)
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index d68f8f5902c8d..b200d53c8e3d2 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -159,7 +159,7 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev,
 					   union iwreq_data *wrqu, char *extra)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 	struct rtllib_device *ieee = priv->rtllib;
 
@@ -167,13 +167,13 @@ static int _rtl92e_wx_adapter_power_status(struct net_device *dev,
 
 	if (*extra || priv->force_lps) {
 		priv->ps_force = false;
-		pPSC->bLeisurePs = true;
+		psc->bLeisurePs = true;
 	} else {
 		if (priv->rtllib->state == RTLLIB_LINKED)
 			rtl92e_leisure_ps_leave(dev);
 
 		priv->ps_force = true;
-		pPSC->bLeisurePs = false;
+		psc->bLeisurePs = false;
 		ieee->ps = *extra;
 	}
 
@@ -188,7 +188,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev,
 					     char *extra)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_pwr_save_ctrl *pPSC = (struct rt_pwr_save_ctrl *)
+	struct rt_pwr_save_ctrl *psc = (struct rt_pwr_save_ctrl *)
 					(&priv->rtllib->pwr_save_ctrl);
 
 	mutex_lock(&priv->wx_mutex);
@@ -196,7 +196,7 @@ static int _rtl92e_wx_set_lps_awake_interval(struct net_device *dev,
 	netdev_info(dev, "%s(): set lps awake interval ! extra is %d\n",
 		    __func__, *extra);
 
-	pPSC->reg_max_lps_awake_intvl = *extra;
+	psc->reg_max_lps_awake_intvl = *extra;
 	mutex_unlock(&priv->wx_mutex);
 	return 0;
 }
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 487aa337eafa4..6b7b2c887c347 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -111,11 +111,11 @@ static inline void *netdev_priv_rsl(struct net_device *dev)
 #define SUPPORT_CKIP_MIC			0x08
 #define SUPPORT_CKIP_PK			0x10
 #define	RT_RF_OFF_LEVL_HALT_NIC		BIT3
-#define	RT_IN_PS_LEVEL(pPSC, _PS_FLAG)		\
-	((pPSC->CurPsLevel & _PS_FLAG) ? true : false)
-#define	RT_CLEAR_PS_LEVEL(pPSC, _PS_FLAG)	\
-	(pPSC->CurPsLevel &= (~(_PS_FLAG)))
-#define	RT_SET_PS_LEVEL(pPSC, _PS_FLAG)	(pPSC->CurPsLevel |= _PS_FLAG)
+#define	RT_IN_PS_LEVEL(psc, _PS_FLAG)		\
+	((psc->CurPsLevel & _PS_FLAG) ? true : false)
+#define	RT_CLEAR_PS_LEVEL(psc, _PS_FLAG)	\
+	(psc->CurPsLevel &= (~(_PS_FLAG)))
+#define	RT_SET_PS_LEVEL(psc, _PS_FLAG)	(psc->CurPsLevel |= _PS_FLAG)
 
 /* defined for skb cb field */
 /* At most 28 byte */
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 09cc3b3068cc8..ee4173a6ade02 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1504,7 +1504,7 @@ static void rtllib_associate_complete_wq(void *data)
 				     container_of_work_rsl(data,
 				     struct rtllib_device,
 				     associate_complete_wq);
-	struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl;
+	struct rt_pwr_save_ctrl *psc = &ieee->pwr_save_ctrl;
 
 	netdev_info(ieee->dev, "Associated successfully with %pM\n",
 		    ieee->current_network.bssid);
@@ -1542,7 +1542,7 @@ static void rtllib_associate_complete_wq(void *data)
 		ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1;
 		ieee->LinkDetectInfo.NumRecvDataInPeriod = 1;
 	}
-	pPSC->LpsIdleCount = 0;
+	psc->LpsIdleCount = 0;
 	ieee->link_change(ieee->dev);
 
 	if (ieee->is_silent_reset) {
@@ -1960,7 +1960,7 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time)
 {
 	int timeout;
 	u8 dtim;
-	struct rt_pwr_save_ctrl *pPSC = &ieee->pwr_save_ctrl;
+	struct rt_pwr_save_ctrl *psc = &ieee->pwr_save_ctrl;
 
 	if (ieee->LPSDelayCnt) {
 		ieee->LPSDelayCnt--;
@@ -1990,21 +1990,21 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time)
 
 	if (time) {
 		if (ieee->bAwakePktSent) {
-			pPSC->LPSAwakeIntvl = 1;
+			psc->LPSAwakeIntvl = 1;
 		} else {
 			u8 MaxPeriod = 1;
 
-			if (pPSC->LPSAwakeIntvl == 0)
-				pPSC->LPSAwakeIntvl = 1;
-			if (pPSC->reg_max_lps_awake_intvl == 0)
+			if (psc->LPSAwakeIntvl == 0)
+				psc->LPSAwakeIntvl = 1;
+			if (psc->reg_max_lps_awake_intvl == 0)
 				MaxPeriod = 1;
-			else if (pPSC->reg_max_lps_awake_intvl == 0xFF)
+			else if (psc->reg_max_lps_awake_intvl == 0xFF)
 				MaxPeriod = ieee->current_network.dtim_period;
 			else
-				MaxPeriod = pPSC->reg_max_lps_awake_intvl;
-			pPSC->LPSAwakeIntvl = (pPSC->LPSAwakeIntvl >=
+				MaxPeriod = psc->reg_max_lps_awake_intvl;
+			psc->LPSAwakeIntvl = (psc->LPSAwakeIntvl >=
 					       MaxPeriod) ? MaxPeriod :
-					       (pPSC->LPSAwakeIntvl + 1);
+					       (psc->LPSAwakeIntvl + 1);
 		}
 		{
 			u8 LPSAwakeIntvl_tmp = 0;
@@ -2012,23 +2012,23 @@ static short rtllib_sta_ps_sleep(struct rtllib_device *ieee, u64 *time)
 			u8 count = ieee->current_network.tim.tim_count;
 
 			if (count == 0) {
-				if (pPSC->LPSAwakeIntvl > period)
+				if (psc->LPSAwakeIntvl > period)
 					LPSAwakeIntvl_tmp = period +
-						 (pPSC->LPSAwakeIntvl -
+						 (psc->LPSAwakeIntvl -
 						 period) -
-						 ((pPSC->LPSAwakeIntvl-period) %
+						 ((psc->LPSAwakeIntvl-period) %
 						 period);
 				else
-					LPSAwakeIntvl_tmp = pPSC->LPSAwakeIntvl;
+					LPSAwakeIntvl_tmp = psc->LPSAwakeIntvl;
 
 			} else {
-				if (pPSC->LPSAwakeIntvl >
+				if (psc->LPSAwakeIntvl >
 				    ieee->current_network.tim.tim_count)
 					LPSAwakeIntvl_tmp = count +
-					(pPSC->LPSAwakeIntvl - count) -
-					((pPSC->LPSAwakeIntvl-count)%period);
+					(psc->LPSAwakeIntvl - count) -
+					((psc->LPSAwakeIntvl-count)%period);
 				else
-					LPSAwakeIntvl_tmp = pPSC->LPSAwakeIntvl;
+					LPSAwakeIntvl_tmp = psc->LPSAwakeIntvl;
 			}
 
 		*time = ieee->current_network.last_dtim_sta_time
-- 
GitLab


From ab4bcf795e46de03772c583e5180dfaa67eb0293 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:43 +0100
Subject: [PATCH 1746/4122] staging: rtl8192e: Rename RFInProgres.., bEnableHT
 and RegChannelPlan

Rename variable RFInProgressTimeOut to rf_in_progress_timeout, bEnableHT
to enable_ht and RegChannelPlan to reg_chnl_plan to avoid CamelCase which
is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/aa3b8fd4a51fc9b1c32566cd079590bf11a9190d.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c |  4 ++--
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 12 ++++++------
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   |  2 +-
 drivers/staging/rtl8192e/rtl819x_HT.h          |  2 +-
 drivers/staging/rtl8192e/rtl819x_HTProc.c      |  4 ++--
 drivers/staging/rtl8192e/rtllib_softmac.c      | 14 +++++++-------
 drivers/staging/rtl8192e/rtllib_softmac_wx.c   |  2 +-
 drivers/staging/rtl8192e/rtllib_tx.c           |  8 ++++----
 8 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index bbaf20fbfde05..d0ce898370737 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -474,10 +474,10 @@ static void _rtl92e_read_eeprom_info(struct net_device *dev)
 
 	priv->rf_chip = RF_8256;
 
-	if (priv->RegChannelPlan == 0xf)
+	if (priv->reg_chnl_plan == 0xf)
 		priv->ChannelPlan = priv->eeprom_ChannelPlan;
 	else
-		priv->ChannelPlan = priv->RegChannelPlan;
+		priv->ChannelPlan = priv->reg_chnl_plan;
 
 	if (priv->eeprom_vid == 0x1186 &&  priv->eeprom_did == 0x3304)
 		priv->CustomerID =  RT_CID_DLINK;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 144bc7c9abd52..1bf5e760d87ae 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -672,9 +672,9 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode)
 
 	if ((wireless_mode == WIRELESS_MODE_N_24G) ||
 	    (wireless_mode == WIRELESS_MODE_N_5G)) {
-		priv->rtllib->pHTInfo->bEnableHT = 1;
+		priv->rtllib->pHTInfo->enable_ht = 1;
 	} else {
-		priv->rtllib->pHTInfo->bEnableHT = 0;
+		priv->rtllib->pHTInfo->enable_ht = 0;
 	}
 	_rtl92e_refresh_support_rate(priv);
 }
@@ -723,7 +723,7 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
 	unsigned long flags = 0;
-	u8 RFInProgressTimeOut = 0;
+	u8 rf_in_progress_timeout = 0;
 
 	if (priv->up == 0)
 		return -1;
@@ -755,12 +755,12 @@ static int _rtl92e_sta_down(struct net_device *dev, bool shutdownrf)
 	spin_lock_irqsave(&priv->rf_ps_lock, flags);
 	while (priv->rf_change_in_progress) {
 		spin_unlock_irqrestore(&priv->rf_ps_lock, flags);
-		if (RFInProgressTimeOut > 100) {
+		if (rf_in_progress_timeout > 100) {
 			spin_lock_irqsave(&priv->rf_ps_lock, flags);
 			break;
 		}
 		mdelay(1);
-		RFInProgressTimeOut++;
+		rf_in_progress_timeout++;
 		spin_lock_irqsave(&priv->rf_ps_lock, flags);
 	}
 	priv->rf_change_in_progress = true;
@@ -845,7 +845,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rxringcount = MAX_RX_COUNT;
 	priv->irq_enabled = 0;
 	priv->chan = 1;
-	priv->RegChannelPlan = 0xf;
+	priv->reg_chnl_plan = 0xf;
 	priv->rtllib->mode = WIRELESS_MODE_AUTO;
 	priv->rtllib->iw_mode = IW_MODE_INFRA;
 	priv->rtllib->bNetPromiscuousMode = false;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 1ae3c77e2fefc..98c750730f871 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -467,7 +467,7 @@ struct r8192_priv {
 
 	bool bTXPowerDataReadFromEEPORM;
 
-	u16 RegChannelPlan;
+	u16 reg_chnl_plan;
 	u16 ChannelPlan;
 	u8 bHwRfOffAction;
 
diff --git a/drivers/staging/rtl8192e/rtl819x_HT.h b/drivers/staging/rtl8192e/rtl819x_HT.h
index 76bc9c5a6d83d..22e4f126ed565 100644
--- a/drivers/staging/rtl8192e/rtl819x_HT.h
+++ b/drivers/staging/rtl8192e/rtl819x_HT.h
@@ -96,7 +96,7 @@ enum ht_aggre_mode {
 
 
 struct rt_hi_throughput {
-	u8				bEnableHT;
+	u8				enable_ht;
 	u8				bCurrentHTSupport;
 
 	u8				bRegBW40MHz;
diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 2092ebf717a1f..2c4c1cb4cbed2 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -690,7 +690,7 @@ void HTResetSelfAndSavePeerSetting(struct rtllib_device *ieee,
 	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
 	u8	bIOTAction = 0;
 
-	/* unmark bEnableHT flag here is the same reason why unmarked in
+	/* unmark enable_ht flag here is the same reason why unmarked in
 	 * function rtllib_softmac_new_net. WB 2008.09.10
 	 */
 	if (pNetwork->bssht.bd_support_ht) {
@@ -776,7 +776,7 @@ void HTUseDefaultSetting(struct rtllib_device *ieee)
 {
 	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
 
-	if (pHTInfo->bEnableHT) {
+	if (pHTInfo->enable_ht) {
 		pHTInfo->bCurrentHTSupport = true;
 		pHTInfo->bCurSuppCCK = pHTInfo->bRegSuppCCK;
 
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index ee4173a6ade02..ac8132d8c45ac 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1179,11 +1179,11 @@ rtllib_association_req(struct rtllib_network *beacon,
 	if ((ieee->rtllib_ap_sec_type &&
 	    (ieee->rtllib_ap_sec_type(ieee) & SEC_ALG_TKIP)) ||
 	    ieee->bForcedBgMode) {
-		ieee->pHTInfo->bEnableHT = 0;
+		ieee->pHTInfo->enable_ht = 0;
 		ieee->mode = WIRELESS_MODE_G;
 	}
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) {
+	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
 		ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap);
 		ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap);
 		HTConstructCapabilityElement(ieee, ht_cap_buf, &ht_cap_len,
@@ -1324,7 +1324,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 		memcpy(tag, osCcxVerNum.Octet, osCcxVerNum.Length);
 		tag += osCcxVerNum.Length;
 	}
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) {
+	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
 		if (ieee->pHTInfo->ePeerHTSpecVer != HT_SPEC_VER_EWC) {
 			tag = skb_put(skb, ht_cap_len);
 			*tag++ = MFIE_TYPE_HT_CAP;
@@ -1358,7 +1358,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 		rtllib_TURBO_Info(ieee, &tag);
 	}
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) {
+	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
 		if (ieee->pHTInfo->ePeerHTSpecVer == HT_SPEC_VER_EWC) {
 			tag = skb_put(skb, ht_cap_len);
 			*tag++ = MFIE_TYPE_GENERIC;
@@ -1524,14 +1524,14 @@ static void rtllib_associate_complete_wq(void *data)
 		ieee->SetWirelessMode(ieee->dev, IEEE_B);
 		netdev_info(ieee->dev, "Using B rates:%d\n", ieee->rate);
 	}
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT) {
+	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
 		netdev_info(ieee->dev, "Successfully associated, ht enabled\n");
 		HTOnAssocRsp(ieee);
 	} else {
 		netdev_info(ieee->dev,
 			    "Successfully associated, ht not enabled(%d, %d)\n",
 			    ieee->pHTInfo->bCurrentHTSupport,
-			    ieee->pHTInfo->bEnableHT);
+			    ieee->pHTInfo->enable_ht);
 		memset(ieee->dot11ht_oper_rate_set, 0, 16);
 	}
 	ieee->LinkDetectInfo.SlotNum = 2 * (1 +
@@ -1684,7 +1684,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee,
 				    ieee->current_network.ssid,
 				    ieee->current_network.channel,
 				    ieee->current_network.qos_data.supported,
-				    ieee->pHTInfo->bEnableHT,
+				    ieee->pHTInfo->enable_ht,
 				    ieee->current_network.bssht.bd_support_ht,
 				    ieee->current_network.mode,
 				    ieee->current_network.flags);
diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
index fdf867a5dd7a2..7bac30b87c93d 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
@@ -359,7 +359,7 @@ void rtllib_wx_sync_scan_wq(void *data)
 	if (ieee->ScanOperationBackupHandler)
 		ieee->ScanOperationBackupHandler(ieee->dev, SCAN_OPT_BACKUP);
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->bEnableHT &&
+	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht &&
 	    ieee->pHTInfo->bCurBW40MHz) {
 		b40M = 1;
 		chan_offset = ieee->pHTInfo->CurSTAExtChnlOffset;
diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index be3779efcd233..8b01cfe033813 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -273,7 +273,7 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 	if (rtllib_act_scanning(ieee, false))
 		return;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT)
+	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
 		return;
 	if (!IsQoSDataFrame(skb->data))
 		return;
@@ -354,7 +354,7 @@ static void rtllib_query_HTCapShortGI(struct rtllib_device *ieee,
 
 	tcb_desc->bUseShortGI		= false;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT)
+	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
 		return;
 
 	if (pHTInfo->forced_short_gi) {
@@ -375,7 +375,7 @@ static void rtllib_query_BandwidthMode(struct rtllib_device *ieee,
 
 	tcb_desc->bPacketBW = false;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->bEnableHT)
+	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
 		return;
 
 	if (tcb_desc->bMulticast || tcb_desc->bBroadcast)
@@ -438,7 +438,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 			tcb_desc->rts_rate = MGN_24M;
 			break;
 		}
-		if (pHTInfo->bCurrentHTSupport  && pHTInfo->bEnableHT) {
+		if (pHTInfo->bCurrentHTSupport  && pHTInfo->enable_ht) {
 			u8 HTOpMode = pHTInfo->current_op_mode;
 
 			if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 ||
-- 
GitLab


From 7e5cfa3c77d324fa235e94cc4363b14624ff2c45 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:49 +0100
Subject: [PATCH 1747/4122] staging: rtl8192e: Rename LinkDetectInfo

Rename variable LinkDetectInfo to link_detect_info to avoid CamelCase
which is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/48d043893fa755490e810af204e5b7ad2ba606de.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 58 ++++++++++----------
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c   |  2 +-
 drivers/staging/rtl8192e/rtllib.h            |  2 +-
 drivers/staging/rtl8192e/rtllib_rx.c         | 14 ++---
 drivers/staging/rtl8192e/rtllib_softmac.c    | 26 ++++-----
 drivers/staging/rtl8192e/rtllib_softmac_wx.c |  8 +--
 6 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 1bf5e760d87ae..3379b4229a85f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -1250,15 +1250,15 @@ static void _rtl92e_update_rxcounts(struct r8192_priv *priv, u32 *TotalRxBcnNum,
 	*TotalRxBcnNum = 0;
 	*TotalRxDataNum = 0;
 
-	SlotIndex = (priv->rtllib->LinkDetectInfo.SlotIndex++) %
-			(priv->rtllib->LinkDetectInfo.SlotNum);
-	priv->rtllib->LinkDetectInfo.RxBcnNum[SlotIndex] =
-			priv->rtllib->LinkDetectInfo.NumRecvBcnInPeriod;
-	priv->rtllib->LinkDetectInfo.RxDataNum[SlotIndex] =
-			priv->rtllib->LinkDetectInfo.NumRecvDataInPeriod;
-	for (i = 0; i < priv->rtllib->LinkDetectInfo.SlotNum; i++) {
-		*TotalRxBcnNum += priv->rtllib->LinkDetectInfo.RxBcnNum[i];
-		*TotalRxDataNum += priv->rtllib->LinkDetectInfo.RxDataNum[i];
+	SlotIndex = (priv->rtllib->link_detect_info.SlotIndex++) %
+			(priv->rtllib->link_detect_info.SlotNum);
+	priv->rtllib->link_detect_info.RxBcnNum[SlotIndex] =
+			priv->rtllib->link_detect_info.NumRecvBcnInPeriod;
+	priv->rtllib->link_detect_info.RxDataNum[SlotIndex] =
+			priv->rtllib->link_detect_info.NumRecvDataInPeriod;
+	for (i = 0; i < priv->rtllib->link_detect_info.SlotNum; i++) {
+		*TotalRxBcnNum += priv->rtllib->link_detect_info.RxBcnNum[i];
+		*TotalRxDataNum += priv->rtllib->link_detect_info.RxDataNum[i];
 	}
 }
 
@@ -1304,22 +1304,22 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	}
 	if ((ieee->state == RTLLIB_LINKED) && (ieee->iw_mode ==
 	     IW_MODE_INFRA) && (!ieee->bNetPromiscuousMode)) {
-		if (ieee->LinkDetectInfo.NumRxOkInPeriod > 100 ||
-		ieee->LinkDetectInfo.NumTxOkInPeriod > 100)
+		if (ieee->link_detect_info.NumRxOkInPeriod > 100 ||
+		ieee->link_detect_info.NumTxOkInPeriod > 100)
 			bBusyTraffic = true;
 
-		if (ieee->LinkDetectInfo.NumRxOkInPeriod > 4000 ||
-		    ieee->LinkDetectInfo.NumTxOkInPeriod > 4000) {
+		if (ieee->link_detect_info.NumRxOkInPeriod > 4000 ||
+		    ieee->link_detect_info.NumTxOkInPeriod > 4000) {
 			bHigherBusyTraffic = true;
-			if (ieee->LinkDetectInfo.NumRxOkInPeriod > 5000)
+			if (ieee->link_detect_info.NumRxOkInPeriod > 5000)
 				bHigherBusyRxTraffic = true;
 			else
 				bHigherBusyRxTraffic = false;
 		}
 
-		if (((ieee->LinkDetectInfo.NumRxUnicastOkInPeriod +
-		    ieee->LinkDetectInfo.NumTxOkInPeriod) > 8) ||
-		    (ieee->LinkDetectInfo.NumRxUnicastOkInPeriod > 2))
+		if (((ieee->link_detect_info.NumRxUnicastOkInPeriod +
+		    ieee->link_detect_info.NumTxOkInPeriod) > 8) ||
+		    (ieee->link_detect_info.NumRxUnicastOkInPeriod > 2))
 			bEnterPS = false;
 		else
 			bEnterPS = true;
@@ -1336,13 +1336,13 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 		rtl92e_leisure_ps_leave(dev);
 	}
 
-	ieee->LinkDetectInfo.NumRxOkInPeriod = 0;
-	ieee->LinkDetectInfo.NumTxOkInPeriod = 0;
-	ieee->LinkDetectInfo.NumRxUnicastOkInPeriod = 0;
-	ieee->LinkDetectInfo.bBusyTraffic = bBusyTraffic;
+	ieee->link_detect_info.NumRxOkInPeriod = 0;
+	ieee->link_detect_info.NumTxOkInPeriod = 0;
+	ieee->link_detect_info.NumRxUnicastOkInPeriod = 0;
+	ieee->link_detect_info.bBusyTraffic = bBusyTraffic;
 
-	ieee->LinkDetectInfo.bHigherBusyTraffic = bHigherBusyTraffic;
-	ieee->LinkDetectInfo.bHigherBusyRxTraffic = bHigherBusyRxTraffic;
+	ieee->link_detect_info.bHigherBusyTraffic = bHigherBusyTraffic;
+	ieee->link_detect_info.bHigherBusyRxTraffic = bHigherBusyRxTraffic;
 
 	if (ieee->state == RTLLIB_LINKED && ieee->iw_mode == IW_MODE_INFRA) {
 		u32	TotalRxBcnNum = 0;
@@ -1383,8 +1383,8 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 
 			priv->check_roaming_cnt = 0;
 		}
-		ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0;
-		ieee->LinkDetectInfo.NumRecvDataInPeriod = 0;
+		ieee->link_detect_info.NumRecvBcnInPeriod = 0;
+		ieee->link_detect_info.NumRecvDataInPeriod = 0;
 	}
 
 	spin_lock_irqsave(&priv->tx_lock, flags);
@@ -2213,25 +2213,25 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev)
 
 	if (inta & IMR_BKDOK) {
 		priv->stats.txbkokint++;
-		priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++;
+		priv->rtllib->link_detect_info.NumTxOkInPeriod++;
 		_rtl92e_tx_isr(dev, BK_QUEUE);
 	}
 
 	if (inta & IMR_BEDOK) {
 		priv->stats.txbeokint++;
-		priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++;
+		priv->rtllib->link_detect_info.NumTxOkInPeriod++;
 		_rtl92e_tx_isr(dev, BE_QUEUE);
 	}
 
 	if (inta & IMR_VIDOK) {
 		priv->stats.txviokint++;
-		priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++;
+		priv->rtllib->link_detect_info.NumTxOkInPeriod++;
 		_rtl92e_tx_isr(dev, VI_QUEUE);
 	}
 
 	if (inta & IMR_VODOK) {
 		priv->stats.txvookint++;
-		priv->rtllib->LinkDetectInfo.NumTxOkInPeriod++;
+		priv->rtllib->link_detect_info.NumTxOkInPeriod++;
 		_rtl92e_tx_isr(dev, VO_QUEUE);
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index b200d53c8e3d2..4f16c3d079cc0 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -393,7 +393,7 @@ static int _rtl92e_wx_set_scan(struct net_device *dev,
 	rt_state = priv->rtllib->rf_power_state;
 	if (!priv->up)
 		return -ENETDOWN;
-	if (priv->rtllib->LinkDetectInfo.bBusyTraffic == true)
+	if (priv->rtllib->link_detect_info.bBusyTraffic == true)
 		return -EAGAIN;
 
 	if (wrqu->data.flags & IW_SCAN_THIS_ESSID) {
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 6b7b2c887c347..b5313c1ea32d9 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1643,7 +1643,7 @@ struct rtllib_device {
 	struct bandwidth_autoswitch bandwidth_auto_switch;
 	bool FwRWRF;
 
-	struct rt_link_detect LinkDetectInfo;
+	struct rt_link_detect link_detect_info;
 	bool bIsAggregateFrame;
 	struct rt_pwr_save_ctrl pwr_save_ctrl;
 
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index 46d75e925ee9b..f9a35fcb1d1b2 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -1211,9 +1211,9 @@ static void rtllib_rx_check_leave_lps(struct rtllib_device *ieee, u8 unicast,
 	if (unicast) {
 
 		if (ieee->state == RTLLIB_LINKED) {
-			if (((ieee->LinkDetectInfo.NumRxUnicastOkInPeriod +
-			    ieee->LinkDetectInfo.NumTxOkInPeriod) > 8) ||
-			    (ieee->LinkDetectInfo.NumRxUnicastOkInPeriod > 2)) {
+			if (((ieee->link_detect_info.NumRxUnicastOkInPeriod +
+			    ieee->link_detect_info.NumTxOkInPeriod) > 8) ||
+			    (ieee->link_detect_info.NumRxUnicastOkInPeriod > 2)) {
 				if (ieee->LeisurePSLeave)
 					ieee->LeisurePSLeave(ieee->dev);
 			}
@@ -1355,8 +1355,8 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb,
 
 	/* Update statstics for AP roaming */
 	if (!bToOtherSTA) {
-		ieee->LinkDetectInfo.NumRecvDataInPeriod++;
-		ieee->LinkDetectInfo.NumRxOkInPeriod++;
+		ieee->link_detect_info.NumRecvDataInPeriod++;
+		ieee->link_detect_info.NumRxOkInPeriod++;
 	}
 
 	/* Data frame - extract src/dst addresses */
@@ -1437,7 +1437,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb,
 		else
 			nr_subframes = 1;
 		if (unicast)
-			ieee->LinkDetectInfo.NumRxUnicastOkInPeriod += nr_subframes;
+			ieee->link_detect_info.NumRxUnicastOkInPeriod += nr_subframes;
 		rtllib_rx_check_leave_lps(ieee, unicast, nr_subframes);
 	}
 
@@ -2620,7 +2620,7 @@ static inline void rtllib_process_probe_response(
 		}
 		if (is_beacon(frame_ctl)) {
 			if (ieee->state >= RTLLIB_LINKED)
-				ieee->LinkDetectInfo.NumRecvBcnInPeriod++;
+				ieee->link_detect_info.NumRecvBcnInPeriod++;
 		}
 	}
 	list_for_each_entry(target, &ieee->network_list, list) {
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index ac8132d8c45ac..1d583e73d7533 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -1534,13 +1534,13 @@ static void rtllib_associate_complete_wq(void *data)
 			    ieee->pHTInfo->enable_ht);
 		memset(ieee->dot11ht_oper_rate_set, 0, 16);
 	}
-	ieee->LinkDetectInfo.SlotNum = 2 * (1 +
+	ieee->link_detect_info.SlotNum = 2 * (1 +
 				       ieee->current_network.beacon_interval /
 				       500);
-	if (ieee->LinkDetectInfo.NumRecvBcnInPeriod == 0 ||
-	    ieee->LinkDetectInfo.NumRecvDataInPeriod == 0) {
-		ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1;
-		ieee->LinkDetectInfo.NumRecvDataInPeriod = 1;
+	if (ieee->link_detect_info.NumRecvBcnInPeriod == 0 ||
+	    ieee->link_detect_info.NumRecvDataInPeriod == 0) {
+		ieee->link_detect_info.NumRecvBcnInPeriod = 1;
+		ieee->link_detect_info.NumRecvDataInPeriod = 1;
 	}
 	psc->LpsIdleCount = 0;
 	ieee->link_change(ieee->dev);
@@ -2369,7 +2369,7 @@ rtllib_rx_deauth(struct rtllib_device *ieee, struct sk_buff *skb)
 		ieee->state = RTLLIB_ASSOCIATING;
 		ieee->softmac_stats.reassoc++;
 		ieee->is_roaming = true;
-		ieee->LinkDetectInfo.bBusyTraffic = false;
+		ieee->link_detect_info.bBusyTraffic = false;
 		rtllib_disassociate(ieee);
 		RemovePeerTS(ieee, header->addr2);
 		if (ieee->LedControlHandler != NULL)
@@ -2963,13 +2963,13 @@ int rtllib_softmac_init(struct rtllib_device *ieee)
 	if (!ieee->dot11d_info)
 		return -ENOMEM;
 
-	ieee->LinkDetectInfo.SlotIndex = 0;
-	ieee->LinkDetectInfo.SlotNum = 2;
-	ieee->LinkDetectInfo.NumRecvBcnInPeriod = 0;
-	ieee->LinkDetectInfo.NumRecvDataInPeriod = 0;
-	ieee->LinkDetectInfo.NumTxOkInPeriod = 0;
-	ieee->LinkDetectInfo.NumRxOkInPeriod = 0;
-	ieee->LinkDetectInfo.NumRxUnicastOkInPeriod = 0;
+	ieee->link_detect_info.SlotIndex = 0;
+	ieee->link_detect_info.SlotNum = 2;
+	ieee->link_detect_info.NumRecvBcnInPeriod = 0;
+	ieee->link_detect_info.NumRecvDataInPeriod = 0;
+	ieee->link_detect_info.NumTxOkInPeriod = 0;
+	ieee->link_detect_info.NumRxOkInPeriod = 0;
+	ieee->link_detect_info.NumRxUnicastOkInPeriod = 0;
 	ieee->bIsAggregateFrame = false;
 	ieee->assoc_id = 0;
 	ieee->queue_stop = 0;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
index 7bac30b87c93d..e02e7d9566b24 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
@@ -391,10 +391,10 @@ void rtllib_wx_sync_scan_wq(void *data)
 	/* Notify AP that I wake up again */
 	rtllib_sta_ps_send_null_frame(ieee, 0);
 
-	if (ieee->LinkDetectInfo.NumRecvBcnInPeriod == 0 ||
-	    ieee->LinkDetectInfo.NumRecvDataInPeriod == 0) {
-		ieee->LinkDetectInfo.NumRecvBcnInPeriod = 1;
-		ieee->LinkDetectInfo.NumRecvDataInPeriod = 1;
+	if (ieee->link_detect_info.NumRecvBcnInPeriod == 0 ||
+	    ieee->link_detect_info.NumRecvDataInPeriod == 0) {
+		ieee->link_detect_info.NumRecvBcnInPeriod = 1;
+		ieee->link_detect_info.NumRecvDataInPeriod = 1;
 	}
 
 	if (ieee->data_hard_resume)
-- 
GitLab


From 8e5d08ceb8db933bdb1f60772984ffc166d2eb55 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:35:55 +0100
Subject: [PATCH 1748/4122] staging: rtl8192e: Rename bNetPromisc..,
 IntelPromiscu.. and bPromiscu..

Rename variable bNetPromiscuousMode to net_promiscuous_md,
IntelPromiscuousModeInfo to intel_promiscuous_md_info and bPromiscuousOn
to promiscuous_on to avoid CamelCase which is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/69d9998a30ce2286c3ae6cb4510174e1255b3f9e.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_dev.c    |  2 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  | 10 +++++-----
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    | 20 +++++++++----------
 drivers/staging/rtl8192e/rtllib.h             |  6 +++---
 drivers/staging/rtl8192e/rtllib_rx.c          |  8 ++++----
 drivers/staging/rtl8192e/rtllib_softmac.c     |  4 ++--
 6 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index d0ce898370737..2ccd1e0542c24 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -859,7 +859,7 @@ void rtl92e_link_change(struct net_device *dev)
 
 		reg = rtl92e_readl(dev, RCR);
 		if (priv->rtllib->state == RTLLIB_LINKED) {
-			if (ieee->IntelPromiscuousModeInfo.bPromiscuousOn)
+			if (ieee->intel_promiscuous_md_info.promiscuous_on)
 				;
 			else
 				priv->ReceiveConfig = reg |= RCR_CBSSID;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 3379b4229a85f..94b8ed2e3489d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -848,9 +848,9 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->reg_chnl_plan = 0xf;
 	priv->rtllib->mode = WIRELESS_MODE_AUTO;
 	priv->rtllib->iw_mode = IW_MODE_INFRA;
-	priv->rtllib->bNetPromiscuousMode = false;
-	priv->rtllib->IntelPromiscuousModeInfo.bPromiscuousOn = false;
-	priv->rtllib->IntelPromiscuousModeInfo.bFilterSourceStationFrame =
+	priv->rtllib->net_promiscuous_md = false;
+	priv->rtllib->intel_promiscuous_md_info.promiscuous_on = false;
+	priv->rtllib->intel_promiscuous_md_info.bFilterSourceStationFrame =
 								 false;
 	priv->rtllib->ieee_up = 0;
 	priv->retry_rts = DEFAULT_RETRY_RTS;
@@ -1297,13 +1297,13 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 		     (!ieee->proto_stoppping) && !ieee->wx_set_enc) {
 			if ((ieee->pwr_save_ctrl.ReturnPoint ==
 			     IPS_CALLBACK_NONE) &&
-			     (!ieee->bNetPromiscuousMode)) {
+			     (!ieee->net_promiscuous_md)) {
 				rtl92e_ips_enter(dev);
 			}
 		}
 	}
 	if ((ieee->state == RTLLIB_LINKED) && (ieee->iw_mode ==
-	     IW_MODE_INFRA) && (!ieee->bNetPromiscuousMode)) {
+	     IW_MODE_INFRA) && (!ieee->net_promiscuous_md)) {
 		if (ieee->link_detect_info.NumRxOkInPeriod > 100 ||
 		ieee->link_detect_info.NumTxOkInPeriod > 100)
 			bBusyTraffic = true;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index 4f16c3d079cc0..c31e5b572e9e2 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -251,7 +251,7 @@ static int _rtl92e_wx_set_mode(struct net_device *dev,
 	rt_state = priv->rtllib->rf_power_state;
 	mutex_lock(&priv->wx_mutex);
 	if (wrqu->mode == IW_MODE_ADHOC || wrqu->mode == IW_MODE_MONITOR ||
-	    ieee->bNetPromiscuousMode) {
+	    ieee->net_promiscuous_md) {
 		if (rt_state == rf_off) {
 			if (priv->rtllib->rf_off_reason >
 			    RF_CHANGE_BY_IPS) {
@@ -1014,28 +1014,28 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev,
 	u32 info_buf[3];
 
 	u32 oid;
-	u32 bPromiscuousOn;
+	u32 promiscuous_on;
 	u32 bFilterSourceStationFrame;
 
 	if (copy_from_user(info_buf, wrqu->data.pointer, sizeof(info_buf)))
 		return -EFAULT;
 
 	oid = info_buf[0];
-	bPromiscuousOn = info_buf[1];
+	promiscuous_on = info_buf[1];
 	bFilterSourceStationFrame = info_buf[2];
 
 	if (oid == OID_RT_INTEL_PROMISCUOUS_MODE) {
-		ieee->IntelPromiscuousModeInfo.bPromiscuousOn =
-					(bPromiscuousOn) ? (true) : (false);
-		ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame =
+		ieee->intel_promiscuous_md_info.promiscuous_on =
+					(promiscuous_on) ? (true) : (false);
+		ieee->intel_promiscuous_md_info.bFilterSourceStationFrame =
 			(bFilterSourceStationFrame) ? (true) : (false);
-		(bPromiscuousOn) ?
+		(promiscuous_on) ?
 		(rtllib_EnableIntelPromiscuousMode(dev, false)) :
 		(rtllib_DisableIntelPromiscuousMode(dev, false));
 
 		netdev_info(dev,
 			    "=======>%s(), on = %d, filter src sta = %d\n",
-			    __func__, bPromiscuousOn,
+			    __func__, promiscuous_on,
 			    bFilterSourceStationFrame);
 	} else {
 		return -1;
@@ -1054,8 +1054,8 @@ static int _rtl92e_wx_get_promisc_mode(struct net_device *dev,
 	mutex_lock(&priv->wx_mutex);
 
 	snprintf(extra, 45, "PromiscuousMode:%d, FilterSrcSTAFrame:%d",
-		 ieee->IntelPromiscuousModeInfo.bPromiscuousOn,
-		 ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame);
+		 ieee->intel_promiscuous_md_info.promiscuous_on,
+		 ieee->intel_promiscuous_md_info.bFilterSourceStationFrame);
 	wrqu->data.length = strlen(extra) + 1;
 
 	mutex_unlock(&priv->wx_mutex);
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index b5313c1ea32d9..493759cc6ccf1 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1384,7 +1384,7 @@ struct rt_pmkid_list {
 };
 
 struct rt_intel_promisc_mode {
-	bool bPromiscuousOn;
+	bool promiscuous_on;
 	bool bFilterSourceStationFrame;
 };
 
@@ -1470,8 +1470,8 @@ struct rtllib_device {
 	int scan_age;
 
 	int iw_mode; /* operating mode (IW_MODE_*) */
-	bool bNetPromiscuousMode;
-	struct rt_intel_promisc_mode IntelPromiscuousModeInfo;
+	bool net_promiscuous_md;
+	struct rt_intel_promisc_mode intel_promiscuous_md_info;
 
 	spinlock_t lock;
 	spinlock_t wpax_suitlist_lock;
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index f9a35fcb1d1b2..ccb61d8decd3c 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -999,8 +999,8 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc,
 	}
 
 	/* Filter packets sent by an STA that will be forwarded by AP */
-	if (ieee->IntelPromiscuousModeInfo.bPromiscuousOn  &&
-		ieee->IntelPromiscuousModeInfo.bFilterSourceStationFrame) {
+	if (ieee->intel_promiscuous_md_info.promiscuous_on  &&
+		ieee->intel_promiscuous_md_info.bFilterSourceStationFrame) {
 		if ((fc & RTLLIB_FCTL_TODS) && !(fc & RTLLIB_FCTL_FROMDS) &&
 		    !ether_addr_equal(dst, ieee->current_network.bssid) &&
 		    ether_addr_equal(bssid, ieee->current_network.bssid)) {
@@ -1011,7 +1011,7 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc,
 	/* Nullfunc frames may have PS-bit set, so they must be passed to
 	 * hostap_handle_sta_rx() before being dropped here.
 	 */
-	if (!ieee->IntelPromiscuousModeInfo.bPromiscuousOn) {
+	if (!ieee->intel_promiscuous_md_info.promiscuous_on) {
 		if (stype != RTLLIB_STYPE_DATA &&
 		    stype != RTLLIB_STYPE_DATA_CFACK &&
 		    stype != RTLLIB_STYPE_DATA_CFPOLL &&
@@ -1317,7 +1317,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb,
 	multicast = is_multicast_ether_addr(hdr->addr1);
 	unicast = !multicast;
 	if (unicast && !ether_addr_equal(dev->dev_addr, hdr->addr1)) {
-		if (ieee->bNetPromiscuousMode)
+		if (ieee->net_promiscuous_md)
 			bToOtherSTA = true;
 		else
 			goto rx_dropped;
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 1d583e73d7533..8b1d88061125a 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -445,7 +445,7 @@ void rtllib_EnableIntelPromiscuousMode(struct net_device *dev,
 	ieee->SetHwRegHandler(dev, HW_VAR_CECHK_BSSID,
 			     (u8 *)&bFilterOutNonAssociatedBSSID);
 
-	ieee->bNetPromiscuousMode = true;
+	ieee->net_promiscuous_md = true;
 }
 EXPORT_SYMBOL(rtllib_EnableIntelPromiscuousMode);
 
@@ -466,7 +466,7 @@ void rtllib_DisableIntelPromiscuousMode(struct net_device *dev,
 	ieee->SetHwRegHandler(dev, HW_VAR_CECHK_BSSID,
 			     (u8 *)&bFilterOutNonAssociatedBSSID);
 
-	ieee->bNetPromiscuousMode = false;
+	ieee->net_promiscuous_md = false;
 }
 EXPORT_SYMBOL(rtllib_DisableIntelPromiscuousMode);
 
-- 
GitLab


From 62310f889a73ea2089f31209a426307887435e86 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:36:02 +0100
Subject: [PATCH 1749/4122] staging: rtl8192e: Rename bFilterSour..,
 CCKPresentAt.. and ResetProg..

Rename variable bFilterSourceStationFrame to fltr_src_sta_frame,
CCKPresentAttentuation to cck_present_attn and ResetProgress to
rst_progress to avoid CamelCase which is not accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/25ab52350a4a3249a1f76b28eea10c44e2f9552d.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_dev.c    | 12 +++++------
 .../staging/rtl8192e/rtl8192e/r8192E_phy.c    | 20 +++++++++----------
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  | 14 ++++++-------
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h  |  4 ++--
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c    | 20 +++++++++----------
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    | 12 +++++------
 drivers/staging/rtl8192e/rtllib.h             |  2 +-
 drivers/staging/rtl8192e/rtllib_rx.c          |  2 +-
 8 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 2ccd1e0542c24..0eeb9b2daefce 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -617,7 +617,7 @@ bool rtl92e_start_adapter(struct net_device *dev)
 start:
 	rtl92e_reset_desc_ring(dev);
 	priv->Rf_Mode = RF_OP_By_SW_3wire;
-	if (priv->ResetProgress == RESET_TYPE_NORESET) {
+	if (priv->rst_progress == RESET_TYPE_NORESET) {
 		rtl92e_writeb(dev, ANAPAR, 0x37);
 		mdelay(500);
 	}
@@ -650,7 +650,7 @@ start:
 	}
 
 	priv->LoopbackMode = RTL819X_NO_LOOPBACK;
-	if (priv->ResetProgress == RESET_TYPE_NORESET) {
+	if (priv->rst_progress == RESET_TYPE_NORESET) {
 		ulRegRead = rtl92e_readl(dev, CPU_GEN);
 		if (priv->LoopbackMode == RTL819X_NO_LOOPBACK)
 			ulRegRead = (ulRegRead & CPU_GEN_NO_LOOPBACK_MSK) |
@@ -699,7 +699,7 @@ start:
 
 	rtl92e_writeb(dev, ACK_TIMEOUT, 0x30);
 
-	if (priv->ResetProgress == RESET_TYPE_NORESET)
+	if (priv->rst_progress == RESET_TYPE_NORESET)
 		rtl92e_set_wireless_mode(dev, priv->rtllib->mode);
 	rtl92e_cam_reset(dev);
 	{
@@ -739,7 +739,7 @@ start:
 		}
 	}
 
-	if (priv->ResetProgress == RESET_TYPE_NORESET) {
+	if (priv->rst_progress == RESET_TYPE_NORESET) {
 		rtStatus = rtl92e_config_phy(dev);
 		if (!rtStatus) {
 			netdev_info(dev, "RF Config failed\n");
@@ -766,7 +766,7 @@ start:
 	else
 		priv->Rf_Mode = RF_OP_By_SW_3wire;
 
-	if (priv->ResetProgress == RESET_TYPE_NORESET) {
+	if (priv->rst_progress == RESET_TYPE_NORESET) {
 		rtl92e_dm_init_txpower_tracking(dev);
 
 		if (priv->IC_Cut >= IC_VersionCut_D) {
@@ -795,7 +795,7 @@ start:
 			}
 			priv->CCKPresentAttentuation_40Mdefault = 0;
 			priv->CCKPresentAttentuation_difference = 0;
-			priv->CCKPresentAttentuation =
+			priv->cck_present_attn =
 				  priv->CCKPresentAttentuation_20Mdefault;
 			priv->btxpower_tracking = false;
 		}
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index c357adf95a3d0..ac3c7f047ca2d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -1008,16 +1008,16 @@ static void _rtl92e_cck_tx_power_track_bw_switch_tssi(struct net_device *dev)
 
 	switch (priv->CurrentChannelBW) {
 	case HT_CHANNEL_WIDTH_20:
-		priv->CCKPresentAttentuation =
+		priv->cck_present_attn =
 			priv->CCKPresentAttentuation_20Mdefault +
 			    priv->CCKPresentAttentuation_difference;
 
-		if (priv->CCKPresentAttentuation >
+		if (priv->cck_present_attn >
 		    (CCKTxBBGainTableLength-1))
-			priv->CCKPresentAttentuation =
+			priv->cck_present_attn =
 					 CCKTxBBGainTableLength-1;
-		if (priv->CCKPresentAttentuation < 0)
-			priv->CCKPresentAttentuation = 0;
+		if (priv->cck_present_attn < 0)
+			priv->cck_present_attn = 0;
 
 		if (priv->rtllib->current_network.channel == 14 &&
 		    !priv->bcck_in_ch14) {
@@ -1033,16 +1033,16 @@ static void _rtl92e_cck_tx_power_track_bw_switch_tssi(struct net_device *dev)
 		break;
 
 	case HT_CHANNEL_WIDTH_20_40:
-		priv->CCKPresentAttentuation =
+		priv->cck_present_attn =
 			priv->CCKPresentAttentuation_40Mdefault +
 			priv->CCKPresentAttentuation_difference;
 
-		if (priv->CCKPresentAttentuation >
+		if (priv->cck_present_attn >
 		    (CCKTxBBGainTableLength - 1))
-			priv->CCKPresentAttentuation =
+			priv->cck_present_attn =
 					 CCKTxBBGainTableLength-1;
-		if (priv->CCKPresentAttentuation < 0)
-			priv->CCKPresentAttentuation = 0;
+		if (priv->cck_present_attn < 0)
+			priv->cck_present_attn = 0;
 
 		if (priv->rtllib->current_network.channel == 14 &&
 		    !priv->bcck_in_ch14) {
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 94b8ed2e3489d..1c4985dcc5eb1 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -850,7 +850,7 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rtllib->iw_mode = IW_MODE_INFRA;
 	priv->rtllib->net_promiscuous_md = false;
 	priv->rtllib->intel_promiscuous_md_info.promiscuous_on = false;
-	priv->rtllib->intel_promiscuous_md_info.bFilterSourceStationFrame =
+	priv->rtllib->intel_promiscuous_md_info.fltr_src_sta_frame =
 								 false;
 	priv->rtllib->ieee_up = 0;
 	priv->retry_rts = DEFAULT_RETRY_RTS;
@@ -861,11 +861,11 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->promisc = (dev->flags & IFF_PROMISC) ? 1 : 0;
 	priv->bcck_in_ch14 = false;
 	priv->bfsync_processing  = false;
-	priv->CCKPresentAttentuation = 0;
+	priv->cck_present_attn = 0;
 	priv->rfa_txpowertrackingindex = 0;
 	priv->rfc_txpowertrackingindex = 0;
 	priv->CckPwEnl = 6;
-	priv->ResetProgress = RESET_TYPE_NORESET;
+	priv->rst_progress = RESET_TYPE_NORESET;
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
 
@@ -1135,8 +1135,8 @@ static void _rtl92e_if_silent_reset(struct net_device *dev)
 	struct rtllib_device *ieee = priv->rtllib;
 	unsigned long flag;
 
-	if (priv->ResetProgress == RESET_TYPE_NORESET) {
-		priv->ResetProgress = RESET_TYPE_SILENT;
+	if (priv->rst_progress == RESET_TYPE_NORESET) {
+		priv->rst_progress = RESET_TYPE_SILENT;
 
 		spin_lock_irqsave(&priv->rf_ps_lock, flag);
 		if (priv->rf_change_in_progress) {
@@ -1233,7 +1233,7 @@ RESET_START:
 		rtl92e_cam_restore(dev);
 		rtl92e_dm_restore_state(dev);
 END:
-		priv->ResetProgress = RESET_TYPE_NORESET;
+		priv->rst_progress = RESET_TYPE_NORESET;
 		priv->reset_count++;
 		priv->bResetInProgress = false;
 
@@ -1396,7 +1396,7 @@ static void _rtl92e_watchdog_wq_cb(void *data)
 	spin_unlock_irqrestore(&priv->tx_lock, flags);
 
 	if (ResetType == RESET_TYPE_NORMAL) {
-		priv->ResetProgress = RESET_TYPE_NORMAL;
+		priv->rst_progress = RESET_TYPE_NORMAL;
 		return;
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 98c750730f871..dfbc9fbcc1297 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -483,7 +483,7 @@ struct r8192_priv {
 	u8 CCKPresentAttentuation_20Mdefault;
 	u8 CCKPresentAttentuation_40Mdefault;
 	s8 CCKPresentAttentuation_difference;
-	s8 CCKPresentAttentuation;
+	s8 cck_present_attn;
 	long undecorated_smoothed_pwdb;
 
 	u32 MCSTxPowerLevelOriginalOffset[6];
@@ -536,7 +536,7 @@ struct r8192_priv {
 
 	u32		reset_count;
 
-	enum reset_type ResetProgress;
+	enum reset_type rst_progress;
 	u16		TxCounter;
 	u16		RxCounter;
 	bool		bResetInProgress;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index 767c746fc73d9..234e85a25d459 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -267,7 +267,7 @@ static void _rtl92e_dm_check_ac_dc_power(struct net_device *dev)
 			"PATH=/usr/bin:/bin",
 			 NULL};
 
-	if (priv->ResetProgress == RESET_TYPE_SILENT)
+	if (priv->rst_progress == RESET_TYPE_SILENT)
 		return;
 	if (priv->rtllib->state != RTLLIB_LINKED)
 		return;
@@ -716,21 +716,21 @@ static void _rtl92e_dm_tx_power_tracking_callback_tssi(struct net_device *dev)
 			}
 
 			if (priv->CurrentChannelBW == HT_CHANNEL_WIDTH_20)
-				priv->CCKPresentAttentuation =
+				priv->cck_present_attn =
 					 priv->CCKPresentAttentuation_20Mdefault +
 					 priv->CCKPresentAttentuation_difference;
 			else
-				priv->CCKPresentAttentuation =
+				priv->cck_present_attn =
 					 priv->CCKPresentAttentuation_40Mdefault +
 					 priv->CCKPresentAttentuation_difference;
 
-			if (priv->CCKPresentAttentuation > (CCKTxBBGainTableLength-1))
-				priv->CCKPresentAttentuation = CCKTxBBGainTableLength-1;
-			if (priv->CCKPresentAttentuation < 0)
-				priv->CCKPresentAttentuation = 0;
+			if (priv->cck_present_attn > (CCKTxBBGainTableLength-1))
+				priv->cck_present_attn = CCKTxBBGainTableLength-1;
+			if (priv->cck_present_attn < 0)
+				priv->cck_present_attn = 0;
 
-			if (priv->CCKPresentAttentuation > -1 &&
-			    priv->CCKPresentAttentuation < CCKTxBBGainTableLength) {
+			if (priv->cck_present_attn > -1 &&
+			    priv->cck_present_attn < CCKTxBBGainTableLength) {
 				if (priv->rtllib->current_network.channel == 14 &&
 				    !priv->bcck_in_ch14) {
 					priv->bcck_in_ch14 = true;
@@ -963,7 +963,7 @@ static void _rtl92e_dm_cck_tx_power_adjust_tssi(struct net_device *dev,
 {
 	u32 TempVal;
 	struct r8192_priv *priv = rtllib_priv(dev);
-	u8 attenuation = priv->CCKPresentAttentuation;
+	u8 attenuation = priv->cck_present_attn;
 
 	TempVal = 0;
 	if (!bInCH14) {
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index c31e5b572e9e2..7ff14aa9f4762 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -1015,20 +1015,20 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev,
 
 	u32 oid;
 	u32 promiscuous_on;
-	u32 bFilterSourceStationFrame;
+	u32 fltr_src_sta_frame;
 
 	if (copy_from_user(info_buf, wrqu->data.pointer, sizeof(info_buf)))
 		return -EFAULT;
 
 	oid = info_buf[0];
 	promiscuous_on = info_buf[1];
-	bFilterSourceStationFrame = info_buf[2];
+	fltr_src_sta_frame = info_buf[2];
 
 	if (oid == OID_RT_INTEL_PROMISCUOUS_MODE) {
 		ieee->intel_promiscuous_md_info.promiscuous_on =
 					(promiscuous_on) ? (true) : (false);
-		ieee->intel_promiscuous_md_info.bFilterSourceStationFrame =
-			(bFilterSourceStationFrame) ? (true) : (false);
+		ieee->intel_promiscuous_md_info.fltr_src_sta_frame =
+			(fltr_src_sta_frame) ? (true) : (false);
 		(promiscuous_on) ?
 		(rtllib_EnableIntelPromiscuousMode(dev, false)) :
 		(rtllib_DisableIntelPromiscuousMode(dev, false));
@@ -1036,7 +1036,7 @@ static int _rtl92e_wx_set_promisc_mode(struct net_device *dev,
 		netdev_info(dev,
 			    "=======>%s(), on = %d, filter src sta = %d\n",
 			    __func__, promiscuous_on,
-			    bFilterSourceStationFrame);
+			    fltr_src_sta_frame);
 	} else {
 		return -1;
 	}
@@ -1055,7 +1055,7 @@ static int _rtl92e_wx_get_promisc_mode(struct net_device *dev,
 
 	snprintf(extra, 45, "PromiscuousMode:%d, FilterSrcSTAFrame:%d",
 		 ieee->intel_promiscuous_md_info.promiscuous_on,
-		 ieee->intel_promiscuous_md_info.bFilterSourceStationFrame);
+		 ieee->intel_promiscuous_md_info.fltr_src_sta_frame);
 	wrqu->data.length = strlen(extra) + 1;
 
 	mutex_unlock(&priv->wx_mutex);
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 493759cc6ccf1..9c81ca38f4b1d 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1385,7 +1385,7 @@ struct rt_pmkid_list {
 
 struct rt_intel_promisc_mode {
 	bool promiscuous_on;
-	bool bFilterSourceStationFrame;
+	bool fltr_src_sta_frame;
 };
 
 
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index ccb61d8decd3c..75190c389ccf1 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -1000,7 +1000,7 @@ static int rtllib_rx_data_filter(struct rtllib_device *ieee, u16 fc,
 
 	/* Filter packets sent by an STA that will be forwarded by AP */
 	if (ieee->intel_promiscuous_md_info.promiscuous_on  &&
-		ieee->intel_promiscuous_md_info.bFilterSourceStationFrame) {
+		ieee->intel_promiscuous_md_info.fltr_src_sta_frame) {
 		if ((fc & RTLLIB_FCTL_TODS) && !(fc & RTLLIB_FCTL_FROMDS) &&
 		    !ether_addr_equal(dst, ieee->current_network.bssid) &&
 		    ether_addr_equal(bssid, ieee->current_network.bssid)) {
-- 
GitLab


From 41fa3d42a3210f09771a10d4e30896c6e3062a82 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:36:09 +0100
Subject: [PATCH 1750/4122] staging: rtl8192e: Rename InterruptLog, RxCounter
 and bHwRfOffAction

Rename variable InterruptLog to int_log, RxCounter to rx_ctr and
bHwRfOffAction to hw_rf_off_action to avoid CamelCase which is not
accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/82ea07ddd894ac9b863ce90ddb9ba78065bd1f4e.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 6 +++---
 drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c | 4 ++--
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 8 ++++----
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   | 6 +++---
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c     | 2 +-
 5 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 0eeb9b2daefce..e530f917fd231 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1928,7 +1928,7 @@ void rtl92e_stop_adapter(struct net_device *dev, bool reset)
 	if (!reset) {
 		mdelay(150);
 
-		priv->bHwRfOffAction = 2;
+		priv->hw_rf_off_action = 2;
 
 		if (!priv->rtllib->bSupportRemoteWakeUp) {
 			rtl92e_set_rf_off(dev);
@@ -2129,7 +2129,7 @@ bool rtl92e_is_rx_stuck(struct net_device *dev)
 
 	SlotIndex = (priv->SilentResetRxSlotIndex++)%SilentResetRxSoltNum;
 
-	if (priv->RxCounter == RegRxCounter) {
+	if (priv->rx_ctr == RegRxCounter) {
 		priv->SilentResetRxStuckEvent[SlotIndex] = 1;
 
 		for (i = 0; i < SilentResetRxSoltNum; i++)
@@ -2147,7 +2147,7 @@ bool rtl92e_is_rx_stuck(struct net_device *dev)
 		priv->SilentResetRxStuckEvent[SlotIndex] = 0;
 	}
 
-	priv->RxCounter = RegRxCounter;
+	priv->rx_ctr = RegRxCounter;
 
 	return bStuck;
 }
diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
index ac3c7f047ca2d..a813eded4cb3f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_phy.c
@@ -1342,7 +1342,7 @@ static bool _rtl92e_set_rf_power_state(struct net_device *dev,
 				mdelay(1);
 				rtl92e_set_bb_reg(dev, rFPGA0_AnalogParameter1,
 						 0x4, 0x1);
-				priv->bHwRfOffAction = 0;
+				priv->hw_rf_off_action = 0;
 
 				rtl92e_set_bb_reg(dev, rFPGA0_XA_RFInterfaceOE,
 						  BIT4, 0x1);
@@ -1450,7 +1450,7 @@ bool rtl92e_set_rf_power_state(struct net_device *dev,
 	bool bResult = false;
 
 	if (rf_power_state == priv->rtllib->rf_power_state &&
-	    priv->bHwRfOffAction == 0) {
+	    priv->hw_rf_off_action == 0) {
 		return bResult;
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 1c4985dcc5eb1..43601ec8d9032 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -869,13 +869,13 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
 
-	memset(&priv->InterruptLog, 0, sizeof(struct log_int_8190));
-	priv->RxCounter = 0;
+	memset(&priv->int_log, 0, sizeof(struct log_int_8190));
+	priv->rx_ctr = 0;
 	priv->rtllib->wx_set_enc = 0;
 	priv->hw_radio_off = false;
 	priv->rtllib->rf_off_reason = 0;
 	priv->rf_change_in_progress = false;
-	priv->bHwRfOffAction = 0;
+	priv->hw_rf_off_action = 0;
 	priv->SetRFPowerStateInProgress = false;
 	priv->rtllib->pwr_save_ctrl.bLeisurePs = true;
 	priv->rtllib->LPSDelayCnt = 0;
@@ -2189,7 +2189,7 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev)
 
 	if (inta & IMR_ROK) {
 		priv->stats.rxint++;
-		priv->InterruptLog.nIMR_ROK++;
+		priv->int_log.nIMR_ROK++;
 		tasklet_schedule(&priv->irq_rx_tasklet);
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index dfbc9fbcc1297..b1656d4ecbad6 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -330,7 +330,7 @@ struct r8192_priv {
 
 	struct work_struct				reset_wq;
 
-	struct log_int_8190 InterruptLog;
+	struct log_int_8190 int_log;
 
 	enum rt_customer_id CustomerID;
 
@@ -469,7 +469,7 @@ struct r8192_priv {
 
 	u16 reg_chnl_plan;
 	u16 ChannelPlan;
-	u8 bHwRfOffAction;
+	u8 hw_rf_off_action;
 
 	bool rf_change_in_progress;
 	bool SetRFPowerStateInProgress;
@@ -538,7 +538,7 @@ struct r8192_priv {
 
 	enum reset_type rst_progress;
 	u16		TxCounter;
-	u16		RxCounter;
+	u16		rx_ctr;
 	bool		bResetInProgress;
 	bool		force_reset;
 	bool		force_lps;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index 234e85a25d459..e0f6f1405c17e 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -1693,7 +1693,7 @@ static void _rtl92e_dm_check_rf_ctrl_gpio(void *data)
 
 	if (bActuallySet) {
 		mdelay(1000);
-		priv->bHwRfOffAction = 1;
+		priv->hw_rf_off_action = 1;
 		rtl92e_set_rf_state(dev, rf_power_state_to_set, RF_CHANGE_BY_HW);
 		if (priv->hw_radio_off)
 			argv[1] = "RFOFF";
-- 
GitLab


From ccdbe14b77a5e39496baf632e157f9daf322dd27 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Sun, 13 Nov 2022 05:36:15 +0100
Subject: [PATCH 1751/4122] staging: rtl8192e: Rename pHTInfo

Rename variable pHTInfo to ht_info to avoid CamelCase which is not
accepted by checkpatch.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/88cdc0ef393c92cb2102a66893c5320e8c8606df.1668313325.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/rtl8192e/rtl8192e/r8192E_dev.c    |  10 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_cam.c   |   2 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c  |  12 +-
 drivers/staging/rtl8192e/rtl8192e/rtl_dm.c    |  50 +--
 drivers/staging/rtl8192e/rtl8192e/rtl_wx.c    |   2 +-
 drivers/staging/rtl8192e/rtl819x_BAProc.c     |  20 +-
 drivers/staging/rtl8192e/rtl819x_HTProc.c     | 354 +++++++++---------
 drivers/staging/rtl8192e/rtl819x_TSProc.c     |   2 +-
 drivers/staging/rtl8192e/rtllib.h             |   4 +-
 drivers/staging/rtl8192e/rtllib_module.c      |   6 +-
 drivers/staging/rtl8192e/rtllib_rx.c          |  12 +-
 drivers/staging/rtl8192e/rtllib_softmac.c     |  78 ++--
 drivers/staging/rtl8192e/rtllib_softmac_wx.c  |   8 +-
 drivers/staging/rtl8192e/rtllib_tx.c          |  54 +--
 14 files changed, 307 insertions(+), 307 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index e530f917fd231..b9c846015d286 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1978,7 +1978,7 @@ void rtl92e_update_ratr_table(struct net_device *dev)
 		break;
 	case IEEE_N_24G:
 	case IEEE_N_5G:
-		if (ieee->pHTInfo->peer_mimo_ps == 0) {
+		if (ieee->ht_info->peer_mimo_ps == 0) {
 			ratr_value &= 0x0007F007;
 		} else {
 			if (priv->rf_type == RF_1T2R)
@@ -1991,11 +1991,11 @@ void rtl92e_update_ratr_table(struct net_device *dev)
 		break;
 	}
 	ratr_value &= 0x0FFFFFFF;
-	if (ieee->pHTInfo->cur_tx_bw40mhz &&
-	    ieee->pHTInfo->bCurShortGI40MHz)
+	if (ieee->ht_info->cur_tx_bw40mhz &&
+	    ieee->ht_info->bCurShortGI40MHz)
 		ratr_value |= 0x80000000;
-	else if (!ieee->pHTInfo->cur_tx_bw40mhz &&
-		  ieee->pHTInfo->bCurShortGI20MHz)
+	else if (!ieee->ht_info->cur_tx_bw40mhz &&
+		  ieee->ht_info->bCurShortGI20MHz)
 		ratr_value |= 0x80000000;
 	rtl92e_writel(dev, RATR0+rate_index*4, ratr_value);
 	rtl92e_writeb(dev, UFWP, 1);
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
index 9d8d4837e6b20..a4d65b4d99c28 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_cam.c
@@ -41,7 +41,7 @@ void rtl92e_enable_hw_security_config(struct net_device *dev)
 	}
 
 	ieee->hwsec_active = 1;
-	if ((ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) {
+	if ((ieee->ht_info->iot_action & HT_IOT_ACT_PURE_N_MODE) || !hwwep) {
 		ieee->hwsec_active = 0;
 		SECR_value &= ~SCR_RxDecEnable;
 	}
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 43601ec8d9032..399ee9783f994 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -307,7 +307,7 @@ static void _rtl92e_update_cap(struct net_device *dev, u16 cap)
 		u8	cur_slot_time = priv->slot_time;
 
 		if ((cap & WLAN_CAPABILITY_SHORT_SLOT_TIME) &&
-		   (!priv->rtllib->pHTInfo->current_rt2rt_long_slot_time)) {
+		   (!priv->rtllib->ht_info->current_rt2rt_long_slot_time)) {
 			if (cur_slot_time != SHORT_SLOT_TIME) {
 				slot_time_val = SHORT_SLOT_TIME;
 				priv->rtllib->SetHwRegHandler(dev,
@@ -339,10 +339,10 @@ static void _rtl92e_update_beacon(void *data)
 	struct rtllib_device *ieee = priv->rtllib;
 	struct rtllib_network *net = &ieee->current_network;
 
-	if (ieee->pHTInfo->bCurrentHTSupport)
+	if (ieee->ht_info->bCurrentHTSupport)
 		HT_update_self_and_peer_setting(ieee, net);
-	ieee->pHTInfo->current_rt2rt_long_slot_time = net->bssht.bd_rt2rt_long_slot_time;
-	ieee->pHTInfo->RT2RT_HT_Mode = net->bssht.rt2rt_ht_mode;
+	ieee->ht_info->current_rt2rt_long_slot_time = net->bssht.bd_rt2rt_long_slot_time;
+	ieee->ht_info->RT2RT_HT_Mode = net->bssht.rt2rt_ht_mode;
 	_rtl92e_update_cap(dev, net->capability);
 }
 
@@ -672,9 +672,9 @@ void rtl92e_set_wireless_mode(struct net_device *dev, u8 wireless_mode)
 
 	if ((wireless_mode == WIRELESS_MODE_N_24G) ||
 	    (wireless_mode == WIRELESS_MODE_N_5G)) {
-		priv->rtllib->pHTInfo->enable_ht = 1;
+		priv->rtllib->ht_info->enable_ht = 1;
 	} else {
-		priv->rtllib->pHTInfo->enable_ht = 0;
+		priv->rtllib->ht_info->enable_ht = 0;
 	}
 	_rtl92e_refresh_support_rate(priv);
 }
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
index e0f6f1405c17e..a18393c8a8339 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_dm.c
@@ -321,7 +321,7 @@ void rtl92e_init_adaptive_rate(struct net_device *dev)
 static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo;
+	struct rt_hi_throughput *ht_info = priv->rtllib->ht_info;
 	struct rate_adaptive *pra = &priv->rate_adaptive;
 	u32 currentRATR, targetRATR = 0;
 	u32 LowRSSIThreshForRA = 0, HighRSSIThreshForRA = 0;
@@ -340,10 +340,10 @@ static void _rtl92e_dm_check_rate_adaptive(struct net_device *dev)
 
 	if (priv->rtllib->state == RTLLIB_LINKED) {
 
-		bshort_gi_enabled = (pHTInfo->cur_tx_bw40mhz &&
-				     pHTInfo->bCurShortGI40MHz) ||
-				    (!pHTInfo->cur_tx_bw40mhz &&
-				     pHTInfo->bCurShortGI20MHz);
+		bshort_gi_enabled = (ht_info->cur_tx_bw40mhz &&
+				     ht_info->bCurShortGI40MHz) ||
+				    (!ht_info->cur_tx_bw40mhz &&
+				     ht_info->bCurShortGI20MHz);
 
 		pra->upper_rssi_threshold_ratr =
 				(pra->upper_rssi_threshold_ratr & (~BIT31)) |
@@ -1532,7 +1532,7 @@ void rtl92e_dm_init_edca_turbo(struct net_device *dev)
 static void _rtl92e_dm_check_edca_turbo(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv(dev);
-	struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo;
+	struct rt_hi_throughput *ht_info = priv->rtllib->ht_info;
 
 	static unsigned long lastTxOkCnt;
 	static unsigned long lastRxOkCnt;
@@ -1543,18 +1543,18 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev)
 		goto dm_CheckEdcaTurbo_EXIT;
 	if (priv->rtllib->state != RTLLIB_LINKED)
 		goto dm_CheckEdcaTurbo_EXIT;
-	if (priv->rtllib->pHTInfo->iot_action & HT_IOT_ACT_DISABLE_EDCA_TURBO)
+	if (priv->rtllib->ht_info->iot_action & HT_IOT_ACT_DISABLE_EDCA_TURBO)
 		goto dm_CheckEdcaTurbo_EXIT;
 
 	if (!priv->rtllib->bis_any_nonbepkts) {
 		curTxOkCnt = priv->stats.txbytesunicast - lastTxOkCnt;
 		curRxOkCnt = priv->stats.rxbytesunicast - lastRxOkCnt;
-		if (pHTInfo->iot_action & HT_IOT_ACT_EDCA_BIAS_ON_RX) {
+		if (ht_info->iot_action & HT_IOT_ACT_EDCA_BIAS_ON_RX) {
 			if (curTxOkCnt > 4*curRxOkCnt) {
 				if (priv->bis_cur_rdlstate ||
 				    !priv->bcurrent_turbo_EDCA) {
 					rtl92e_writel(dev, EDCAPARA_BE,
-						      edca_setting_UL[pHTInfo->IOTPeer]);
+						      edca_setting_UL[ht_info->IOTPeer]);
 					priv->bis_cur_rdlstate = false;
 				}
 			} else {
@@ -1562,10 +1562,10 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev)
 				    !priv->bcurrent_turbo_EDCA) {
 					if (priv->rtllib->mode == WIRELESS_MODE_G)
 						rtl92e_writel(dev, EDCAPARA_BE,
-							      edca_setting_DL_GMode[pHTInfo->IOTPeer]);
+							      edca_setting_DL_GMode[ht_info->IOTPeer]);
 					else
 						rtl92e_writel(dev, EDCAPARA_BE,
-							      edca_setting_DL[pHTInfo->IOTPeer]);
+							      edca_setting_DL[ht_info->IOTPeer]);
 					priv->bis_cur_rdlstate = true;
 				}
 			}
@@ -1576,17 +1576,17 @@ static void _rtl92e_dm_check_edca_turbo(struct net_device *dev)
 				    !priv->bcurrent_turbo_EDCA) {
 					if (priv->rtllib->mode == WIRELESS_MODE_G)
 						rtl92e_writel(dev, EDCAPARA_BE,
-							      edca_setting_DL_GMode[pHTInfo->IOTPeer]);
+							      edca_setting_DL_GMode[ht_info->IOTPeer]);
 					else
 						rtl92e_writel(dev, EDCAPARA_BE,
-							      edca_setting_DL[pHTInfo->IOTPeer]);
+							      edca_setting_DL[ht_info->IOTPeer]);
 					priv->bis_cur_rdlstate = true;
 				}
 			} else {
 				if (priv->bis_cur_rdlstate ||
 				    !priv->bcurrent_turbo_EDCA) {
 					rtl92e_writel(dev, EDCAPARA_BE,
-						      edca_setting_UL[pHTInfo->IOTPeer]);
+						      edca_setting_UL[ht_info->IOTPeer]);
 					priv->bis_cur_rdlstate = false;
 				}
 
@@ -1621,23 +1621,23 @@ static void _rtl92e_dm_init_cts_to_self(struct net_device *dev)
 static void _rtl92e_dm_cts_to_self(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv((struct net_device *)dev);
-	struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo;
+	struct rt_hi_throughput *ht_info = priv->rtllib->ht_info;
 	static unsigned long lastTxOkCnt;
 	static unsigned long lastRxOkCnt;
 	unsigned long curTxOkCnt = 0;
 	unsigned long curRxOkCnt = 0;
 
 	if (!priv->rtllib->bCTSToSelfEnable) {
-		pHTInfo->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF;
+		ht_info->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF;
 		return;
 	}
-	if (pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) {
+	if (ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) {
 		curTxOkCnt = priv->stats.txbytesunicast - lastTxOkCnt;
 		curRxOkCnt = priv->stats.rxbytesunicast - lastRxOkCnt;
 		if (curRxOkCnt > 4*curTxOkCnt)
-			pHTInfo->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF;
+			ht_info->iot_action &= ~HT_IOT_ACT_FORCED_CTS2SELF;
 		else
-			pHTInfo->iot_action |= HT_IOT_ACT_FORCED_CTS2SELF;
+			ht_info->iot_action |= HT_IOT_ACT_FORCED_CTS2SELF;
 
 		lastTxOkCnt = priv->stats.txbytesunicast;
 		lastRxOkCnt = priv->stats.rxbytesunicast;
@@ -1648,10 +1648,10 @@ static void _rtl92e_dm_cts_to_self(struct net_device *dev)
 static void _rtl92e_dm_init_wa_broadcom_iot(struct net_device *dev)
 {
 	struct r8192_priv *priv = rtllib_priv((struct net_device *)dev);
-	struct rt_hi_throughput *pHTInfo = priv->rtllib->pHTInfo;
+	struct rt_hi_throughput *ht_info = priv->rtllib->ht_info;
 
-	pHTInfo->bWAIotBroadcom = false;
-	pHTInfo->WAIotTH = WAIotTHVal;
+	ht_info->bWAIotBroadcom = false;
+	ht_info->WAIotTH = WAIotTHVal;
 }
 
 static void _rtl92e_dm_check_rf_ctrl_gpio(void *data)
@@ -1992,7 +1992,7 @@ static void _rtl92e_dm_fsync_timer_callback(struct timer_list *t)
 
 	if (priv->rtllib->state == RTLLIB_LINKED &&
 	    priv->rtllib->bfsync_enable &&
-	    (priv->rtllib->pHTInfo->iot_action & HT_IOT_ACT_CDD_FSYNC)) {
+	    (priv->rtllib->ht_info->iot_action & HT_IOT_ACT_CDD_FSYNC)) {
 		u32 rate_bitmap;
 
 		for (rate_index = 0; rate_index <= 27; rate_index++) {
@@ -2163,7 +2163,7 @@ static void _rtl92e_dm_check_fsync(struct net_device *dev)
 	static u32 reset_cnt;
 
 	if (priv->rtllib->state == RTLLIB_LINKED &&
-	    priv->rtllib->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM) {
+	    priv->rtllib->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM) {
 		if (priv->rtllib->bfsync_enable == 0) {
 			switch (priv->rtllib->fsync_state) {
 			case Default_Fsync:
@@ -2288,7 +2288,7 @@ static void _rtl92e_dm_dynamic_tx_power(struct net_device *dev)
 		priv->bDynamicTxLowPower = false;
 		return;
 	}
-	if ((priv->rtllib->pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) &&
+	if ((priv->rtllib->ht_info->IOTPeer == HT_IOT_PEER_ATHEROS) &&
 	    (priv->rtllib->mode == IEEE_G)) {
 		txhipower_threshold = TX_POWER_ATHEROAP_THRESH_HIGH;
 		txlowpower_threshold = TX_POWER_ATHEROAP_THRESH_LOW;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
index 7ff14aa9f4762..bf0030144e5d8 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
@@ -915,7 +915,7 @@ static int _rtl92e_wx_set_encode_ext(struct net_device *dev,
 					 key, 0);
 		} else {
 			if ((ieee->pairwise_key_type == KEY_TYPE_CCMP) &&
-			     ieee->pHTInfo->bCurrentHTSupport)
+			     ieee->ht_info->bCurrentHTSupport)
 				rtl92e_writeb(dev, 0x173, 1);
 			rtl92e_set_key(dev, 4, idx, alg,
 				       (u8 *)ieee->ap_mac_addr, 0, key);
diff --git a/drivers/staging/rtl8192e/rtl819x_BAProc.c b/drivers/staging/rtl8192e/rtl819x_BAProc.c
index 7c0369319f97f..acc19514bca60 100644
--- a/drivers/staging/rtl8192e/rtl819x_BAProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_BAProc.c
@@ -244,13 +244,13 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb)
 	pBaStartSeqCtrl = (union sequence_control *)(req + 7);
 
 	if (!ieee->current_network.qos_data.active ||
-	    !ieee->pHTInfo->bCurrentHTSupport ||
-	    (ieee->pHTInfo->iot_action & HT_IOT_ACT_REJECT_ADDBA_REQ)) {
+	    !ieee->ht_info->bCurrentHTSupport ||
+	    (ieee->ht_info->iot_action & HT_IOT_ACT_REJECT_ADDBA_REQ)) {
 		rc = ADDBA_STATUS_REFUSED;
 		netdev_warn(ieee->dev,
 			    "Failed to reply on ADDBA_REQ as some capability is not ready(%d, %d)\n",
 			    ieee->current_network.qos_data.active,
-			    ieee->pHTInfo->bCurrentHTSupport);
+			    ieee->ht_info->bCurrentHTSupport);
 		goto OnADDBAReq_Fail;
 	}
 	if (!GetTs(ieee, (struct ts_common_info **)&pTS, dst,
@@ -277,7 +277,7 @@ int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb)
 	pBA->ba_start_seq_ctrl = *pBaStartSeqCtrl;
 
 	if (ieee->GetHalfNmodeSupportByAPsHandler(ieee->dev) ||
-	   (ieee->pHTInfo->iot_action & HT_IOT_ACT_ALLOW_PEER_AGG_ONE_PKT))
+	   (ieee->ht_info->iot_action & HT_IOT_ACT_ALLOW_PEER_AGG_ONE_PKT))
 		pBA->ba_param_set.field.buffer_size = 1;
 	else
 		pBA->ba_param_set.field.buffer_size = 32;
@@ -326,13 +326,13 @@ int rtllib_rx_ADDBARsp(struct rtllib_device *ieee, struct sk_buff *skb)
 	pBaTimeoutVal = (u16 *)(tag + 7);
 
 	if (!ieee->current_network.qos_data.active ||
-	    !ieee->pHTInfo->bCurrentHTSupport ||
-	    !ieee->pHTInfo->bCurrentAMPDUEnable) {
+	    !ieee->ht_info->bCurrentHTSupport ||
+	    !ieee->ht_info->bCurrentAMPDUEnable) {
 		netdev_warn(ieee->dev,
 			    "reject to ADDBA_RSP as some capability is not ready(%d, %d, %d)\n",
 			    ieee->current_network.qos_data.active,
-			    ieee->pHTInfo->bCurrentHTSupport,
-			    ieee->pHTInfo->bCurrentAMPDUEnable);
+			    ieee->ht_info->bCurrentHTSupport,
+			    ieee->ht_info->bCurrentAMPDUEnable);
 		ReasonCode = DELBA_REASON_UNKNOWN_BA;
 		goto OnADDBARsp_Reject;
 	}
@@ -413,11 +413,11 @@ int rtllib_rx_DELBA(struct rtllib_device *ieee, struct sk_buff *skb)
 	}
 
 	if (!ieee->current_network.qos_data.active ||
-	    !ieee->pHTInfo->bCurrentHTSupport) {
+	    !ieee->ht_info->bCurrentHTSupport) {
 		netdev_warn(ieee->dev,
 			    "received DELBA while QOS or HT is not supported(%d, %d)\n",
 			    ieee->current_network. qos_data.active,
-			    ieee->pHTInfo->bCurrentHTSupport);
+			    ieee->ht_info->bCurrentHTSupport);
 		return -1;
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl819x_HTProc.c b/drivers/staging/rtl8192e/rtl819x_HTProc.c
index 2c4c1cb4cbed2..fe30a291e64c9 100644
--- a/drivers/staging/rtl8192e/rtl819x_HTProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_HTProc.c
@@ -69,48 +69,48 @@ static u8 LINKSYS_MARVELL_4400N[3] = {0x00, 0x14, 0xa4};
 
 void HTUpdateDefaultSetting(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	pHTInfo->bRegShortGI20MHz = 1;
-	pHTInfo->bRegShortGI40MHz = 1;
+	ht_info->bRegShortGI20MHz = 1;
+	ht_info->bRegShortGI40MHz = 1;
 
-	pHTInfo->bRegBW40MHz = 1;
+	ht_info->bRegBW40MHz = 1;
 
-	if (pHTInfo->bRegBW40MHz)
-		pHTInfo->bRegSuppCCK = 1;
+	if (ht_info->bRegBW40MHz)
+		ht_info->bRegSuppCCK = 1;
 	else
-		pHTInfo->bRegSuppCCK = true;
+		ht_info->bRegSuppCCK = true;
 
-	pHTInfo->nAMSDU_MaxSize = 7935UL;
-	pHTInfo->bAMSDU_Support = 0;
+	ht_info->nAMSDU_MaxSize = 7935UL;
+	ht_info->bAMSDU_Support = 0;
 
-	pHTInfo->bAMPDUEnable = 1;
-	pHTInfo->AMPDU_Factor = 2;
-	pHTInfo->MPDU_Density = 0;
+	ht_info->bAMPDUEnable = 1;
+	ht_info->AMPDU_Factor = 2;
+	ht_info->MPDU_Density = 0;
 
-	pHTInfo->self_mimo_ps = 3;
-	if (pHTInfo->self_mimo_ps == 2)
-		pHTInfo->self_mimo_ps = 3;
+	ht_info->self_mimo_ps = 3;
+	if (ht_info->self_mimo_ps == 2)
+		ht_info->self_mimo_ps = 3;
 	ieee->tx_dis_rate_fallback = 0;
 	ieee->tx_use_drv_assinged_rate = 0;
 
 	ieee->bTxEnableFwCalcDur = 1;
 
-	pHTInfo->reg_rt2rt_aggregation = 1;
+	ht_info->reg_rt2rt_aggregation = 1;
 
-	pHTInfo->reg_rx_reorder_enable = 1;
-	pHTInfo->rx_reorder_win_size = 64;
-	pHTInfo->rx_reorder_pending_time = 30;
+	ht_info->reg_rx_reorder_enable = 1;
+	ht_info->rx_reorder_win_size = 64;
+	ht_info->rx_reorder_pending_time = 30;
 }
 
 static u16 HTMcsToDataRate(struct rtllib_device *ieee, u8 nMcsRate)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	u8	is40MHz = (pHTInfo->bCurBW40MHz) ? 1 : 0;
-	u8	isShortGI = (pHTInfo->bCurBW40MHz) ?
-			    ((pHTInfo->bCurShortGI40MHz) ? 1 : 0) :
-			    ((pHTInfo->bCurShortGI20MHz) ? 1 : 0);
+	u8	is40MHz = (ht_info->bCurBW40MHz) ? 1 : 0;
+	u8	isShortGI = (ht_info->bCurBW40MHz) ?
+			    ((ht_info->bCurShortGI40MHz) ? 1 : 0) :
+			    ((ht_info->bCurShortGI20MHz) ? 1 : 0);
 	return MCS_DATA_RATE[is40MHz][isShortGI][(nMcsRate & 0x7f)];
 }
 
@@ -166,45 +166,45 @@ bool IsHTHalfNmodeAPs(struct rtllib_device *ieee)
 
 static void HTIOTPeerDetermine(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	struct rtllib_network *net = &ieee->current_network;
 
 	if (net->bssht.bd_rt2rt_aggregation) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_REALTEK;
+		ht_info->IOTPeer = HT_IOT_PEER_REALTEK;
 		if (net->bssht.rt2rt_ht_mode & RT_HT_CAP_USE_92SE)
-			pHTInfo->IOTPeer = HT_IOT_PEER_REALTEK_92SE;
+			ht_info->IOTPeer = HT_IOT_PEER_REALTEK_92SE;
 		if (net->bssht.rt2rt_ht_mode & RT_HT_CAP_USE_SOFTAP)
-			pHTInfo->IOTPeer = HT_IOT_PEER_92U_SOFTAP;
+			ht_info->IOTPeer = HT_IOT_PEER_92U_SOFTAP;
 	} else if (net->broadcom_cap_exist) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_BROADCOM;
+		ht_info->IOTPeer = HT_IOT_PEER_BROADCOM;
 	} else if (!memcmp(net->bssid, UNKNOWN_BORADCOM, 3) ||
 		 !memcmp(net->bssid, LINKSYSWRT330_LINKSYSWRT300_BROADCOM, 3) ||
 		 !memcmp(net->bssid, LINKSYSWRT350_LINKSYSWRT150_BROADCOM, 3)) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_BROADCOM;
+		ht_info->IOTPeer = HT_IOT_PEER_BROADCOM;
 	} else if ((memcmp(net->bssid, BELKINF5D8233V1_RALINK, 3) == 0) ||
 		 (memcmp(net->bssid, BELKINF5D82334V3_RALINK, 3) == 0) ||
 		 (memcmp(net->bssid, PCI_RALINK, 3) == 0) ||
 		 (memcmp(net->bssid, EDIMAX_RALINK, 3) == 0) ||
 		 (memcmp(net->bssid, AIRLINK_RALINK, 3) == 0) ||
 		  net->ralink_cap_exist) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_RALINK;
+		ht_info->IOTPeer = HT_IOT_PEER_RALINK;
 	} else if ((net->atheros_cap_exist) ||
 		(memcmp(net->bssid, DLINK_ATHEROS_1, 3) == 0) ||
 		(memcmp(net->bssid, DLINK_ATHEROS_2, 3) == 0)) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_ATHEROS;
+		ht_info->IOTPeer = HT_IOT_PEER_ATHEROS;
 	} else if ((memcmp(net->bssid, CISCO_BROADCOM, 3) == 0) ||
 		  net->cisco_cap_exist) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_CISCO;
+		ht_info->IOTPeer = HT_IOT_PEER_CISCO;
 	} else if ((memcmp(net->bssid, LINKSYS_MARVELL_4400N, 3) == 0) ||
 		  net->marvell_cap_exist) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_MARVELL;
+		ht_info->IOTPeer = HT_IOT_PEER_MARVELL;
 	} else if (net->airgo_cap_exist) {
-		pHTInfo->IOTPeer = HT_IOT_PEER_AIRGO;
+		ht_info->IOTPeer = HT_IOT_PEER_AIRGO;
 	} else {
-		pHTInfo->IOTPeer = HT_IOT_PEER_UNKNOWN;
+		ht_info->IOTPeer = HT_IOT_PEER_UNKNOWN;
 	}
 
-	netdev_dbg(ieee->dev, "IOTPEER: %x\n", pHTInfo->IOTPeer);
+	netdev_dbg(ieee->dev, "IOTPEER: %x\n", ht_info->IOTPeer);
 }
 
 static u8 HTIOTActIsDisableMCS14(struct rtllib_device *ieee, u8 *PeerMacAddr)
@@ -233,7 +233,7 @@ static u8 HTIOTActIsMgntUseCCK6M(struct rtllib_device *ieee,
 {
 	u8	retValue = 0;
 
-	if (ieee->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM)
+	if (ieee->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM)
 		retValue = 1;
 
 	return retValue;
@@ -243,40 +243,40 @@ static u8 HTIOTActIsCCDFsync(struct rtllib_device *ieee)
 {
 	u8	retValue = 0;
 
-	if (ieee->pHTInfo->IOTPeer == HT_IOT_PEER_BROADCOM)
+	if (ieee->ht_info->IOTPeer == HT_IOT_PEER_BROADCOM)
 		retValue = 1;
 	return retValue;
 }
 
 static void HTIOTActDetermineRaFunc(struct rtllib_device *ieee, bool bPeerRx2ss)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	pHTInfo->iot_ra_func &= HT_IOT_RAFUNC_DISABLE_ALL;
+	ht_info->iot_ra_func &= HT_IOT_RAFUNC_DISABLE_ALL;
 
-	if (pHTInfo->IOTPeer == HT_IOT_PEER_RALINK && !bPeerRx2ss)
-		pHTInfo->iot_ra_func |= HT_IOT_RAFUNC_PEER_1R;
+	if (ht_info->IOTPeer == HT_IOT_PEER_RALINK && !bPeerRx2ss)
+		ht_info->iot_ra_func |= HT_IOT_RAFUNC_PEER_1R;
 
-	if (pHTInfo->iot_action & HT_IOT_ACT_AMSDU_ENABLE)
-		pHTInfo->iot_ra_func |= HT_IOT_RAFUNC_TX_AMSDU;
+	if (ht_info->iot_action & HT_IOT_ACT_AMSDU_ENABLE)
+		ht_info->iot_ra_func |= HT_IOT_RAFUNC_TX_AMSDU;
 }
 
-void HTResetIOTSetting(struct rt_hi_throughput *pHTInfo)
+void HTResetIOTSetting(struct rt_hi_throughput *ht_info)
 {
-	pHTInfo->iot_action = 0;
-	pHTInfo->IOTPeer = HT_IOT_PEER_UNKNOWN;
-	pHTInfo->iot_ra_func = 0;
+	ht_info->iot_action = 0;
+	ht_info->IOTPeer = HT_IOT_PEER_UNKNOWN;
+	ht_info->iot_ra_func = 0;
 }
 
 void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap,
 				  u8 *len, u8 IsEncrypt, bool bAssoc)
 {
-	struct rt_hi_throughput *pHT = ieee->pHTInfo;
+	struct rt_hi_throughput *pHT = ieee->ht_info;
 	struct ht_capab_ele *pCapELE = NULL;
 
 	if (!posHTCap || !pHT) {
 		netdev_warn(ieee->dev,
-			    "%s(): posHTCap and pHTInfo are null\n", __func__);
+			    "%s(): posHTCap and ht_info are null\n", __func__);
 		return;
 	}
 	memset(posHTCap, 0, *len);
@@ -352,7 +352,7 @@ void HTConstructCapabilityElement(struct rtllib_device *ieee, u8 *posHTCap,
 void HTConstructInfoElement(struct rtllib_device *ieee, u8 *posHTInfo,
 			    u8 *len, u8 IsEncrypt)
 {
-	struct rt_hi_throughput *pHT = ieee->pHTInfo;
+	struct rt_hi_throughput *pHT = ieee->ht_info;
 	struct ht_info_ele *pHTInfoEle = (struct ht_info_ele *)posHTInfo;
 
 	if (!posHTInfo || !pHTInfoEle) {
@@ -509,7 +509,7 @@ void HTSetConnectBwMode(struct rtllib_device *ieee,
 
 void HTOnAssocRsp(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	struct ht_capab_ele *pPeerHTCap = NULL;
 	struct ht_info_ele *pPeerHTInfo = NULL;
 	u16 nMaxAMSDUSize = 0;
@@ -518,22 +518,22 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 	static const u8 EWC11NHTCap[] = { 0x00, 0x90, 0x4c, 0x33 };
 	static const u8 EWC11NHTInfo[] = { 0x00, 0x90, 0x4c, 0x34 };
 
-	if (!pHTInfo->bCurrentHTSupport) {
+	if (!ht_info->bCurrentHTSupport) {
 		netdev_warn(ieee->dev, "%s(): HT_DISABLE\n", __func__);
 		return;
 	}
 	netdev_dbg(ieee->dev, "%s(): HT_ENABLE\n", __func__);
 
-	if (!memcmp(pHTInfo->PeerHTCapBuf, EWC11NHTCap, sizeof(EWC11NHTCap)))
-		pPeerHTCap = (struct ht_capab_ele *)(&pHTInfo->PeerHTCapBuf[4]);
+	if (!memcmp(ht_info->PeerHTCapBuf, EWC11NHTCap, sizeof(EWC11NHTCap)))
+		pPeerHTCap = (struct ht_capab_ele *)(&ht_info->PeerHTCapBuf[4]);
 	else
-		pPeerHTCap = (struct ht_capab_ele *)(pHTInfo->PeerHTCapBuf);
+		pPeerHTCap = (struct ht_capab_ele *)(ht_info->PeerHTCapBuf);
 
-	if (!memcmp(pHTInfo->PeerHTInfoBuf, EWC11NHTInfo, sizeof(EWC11NHTInfo)))
+	if (!memcmp(ht_info->PeerHTInfoBuf, EWC11NHTInfo, sizeof(EWC11NHTInfo)))
 		pPeerHTInfo = (struct ht_info_ele *)
-			     (&pHTInfo->PeerHTInfoBuf[4]);
+			     (&ht_info->PeerHTInfoBuf[4]);
 	else
-		pPeerHTInfo = (struct ht_info_ele *)(pHTInfo->PeerHTInfoBuf);
+		pPeerHTInfo = (struct ht_info_ele *)(ht_info->PeerHTInfoBuf);
 
 #ifdef VERBOSE_DEBUG
 	print_hex_dump_bytes("%s: ", __func__, DUMP_PREFIX_NONE,
@@ -541,63 +541,63 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 #endif
 	HTSetConnectBwMode(ieee, (enum ht_channel_width)(pPeerHTCap->ChlWidth),
 			   (enum ht_extchnl_offset)(pPeerHTInfo->ExtChlOffset));
-	pHTInfo->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ?
+	ht_info->cur_tx_bw40mhz = ((pPeerHTInfo->RecommemdedTxWidth == 1) ?
 				 true : false);
 
-	pHTInfo->bCurShortGI20MHz = ((pHTInfo->bRegShortGI20MHz) ?
+	ht_info->bCurShortGI20MHz = ((ht_info->bRegShortGI20MHz) ?
 				    ((pPeerHTCap->ShortGI20Mhz == 1) ?
 				    true : false) : false);
-	pHTInfo->bCurShortGI40MHz = ((pHTInfo->bRegShortGI40MHz) ?
+	ht_info->bCurShortGI40MHz = ((ht_info->bRegShortGI40MHz) ?
 				     ((pPeerHTCap->ShortGI40Mhz == 1) ?
 				     true : false) : false);
 
-	pHTInfo->bCurSuppCCK = ((pHTInfo->bRegSuppCCK) ?
+	ht_info->bCurSuppCCK = ((ht_info->bRegSuppCCK) ?
 			       ((pPeerHTCap->DssCCk == 1) ? true :
 			       false) : false);
 
-	pHTInfo->bCurrent_AMSDU_Support = pHTInfo->bAMSDU_Support;
+	ht_info->bCurrent_AMSDU_Support = ht_info->bAMSDU_Support;
 
 	nMaxAMSDUSize = (pPeerHTCap->MaxAMSDUSize == 0) ? 3839 : 7935;
 
-	if (pHTInfo->nAMSDU_MaxSize > nMaxAMSDUSize)
-		pHTInfo->nCurrent_AMSDU_MaxSize = nMaxAMSDUSize;
+	if (ht_info->nAMSDU_MaxSize > nMaxAMSDUSize)
+		ht_info->nCurrent_AMSDU_MaxSize = nMaxAMSDUSize;
 	else
-		pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize;
+		ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize;
 
-	pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable;
+	ht_info->bCurrentAMPDUEnable = ht_info->bAMPDUEnable;
 	if (ieee->rtllib_ap_sec_type &&
 	    (ieee->rtllib_ap_sec_type(ieee) & (SEC_ALG_WEP | SEC_ALG_TKIP))) {
-		if ((pHTInfo->IOTPeer == HT_IOT_PEER_ATHEROS) ||
-		    (pHTInfo->IOTPeer == HT_IOT_PEER_UNKNOWN))
-			pHTInfo->bCurrentAMPDUEnable = false;
+		if ((ht_info->IOTPeer == HT_IOT_PEER_ATHEROS) ||
+		    (ht_info->IOTPeer == HT_IOT_PEER_UNKNOWN))
+			ht_info->bCurrentAMPDUEnable = false;
 	}
 
-	if (!pHTInfo->reg_rt2rt_aggregation) {
-		if (pHTInfo->AMPDU_Factor > pPeerHTCap->MaxRxAMPDUFactor)
-			pHTInfo->CurrentAMPDUFactor =
+	if (!ht_info->reg_rt2rt_aggregation) {
+		if (ht_info->AMPDU_Factor > pPeerHTCap->MaxRxAMPDUFactor)
+			ht_info->CurrentAMPDUFactor =
 						 pPeerHTCap->MaxRxAMPDUFactor;
 		else
-			pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor;
+			ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor;
 
 	} else {
 		if (ieee->current_network.bssht.bd_rt2rt_aggregation) {
 			if (ieee->pairwise_key_type != KEY_TYPE_NA)
-				pHTInfo->CurrentAMPDUFactor =
+				ht_info->CurrentAMPDUFactor =
 						 pPeerHTCap->MaxRxAMPDUFactor;
 			else
-				pHTInfo->CurrentAMPDUFactor = HT_AGG_SIZE_64K;
+				ht_info->CurrentAMPDUFactor = HT_AGG_SIZE_64K;
 		} else {
-			pHTInfo->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor,
+			ht_info->CurrentAMPDUFactor = min_t(u32, pPeerHTCap->MaxRxAMPDUFactor,
 							    HT_AGG_SIZE_32K);
 		}
 	}
-	pHTInfo->current_mpdu_density = max_t(u8, pHTInfo->MPDU_Density,
+	ht_info->current_mpdu_density = max_t(u8, ht_info->MPDU_Density,
 					      pPeerHTCap->MPDUDensity);
-	if (pHTInfo->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) {
-		pHTInfo->bCurrentAMPDUEnable = false;
-		pHTInfo->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE;
+	if (ht_info->iot_action & HT_IOT_ACT_TX_USE_AMSDU_8K) {
+		ht_info->bCurrentAMPDUEnable = false;
+		ht_info->ForcedAMSDUMode = HT_AGG_FORCE_ENABLE;
 	}
-	pHTInfo->cur_rx_reorder_enable = pHTInfo->reg_rx_reorder_enable;
+	ht_info->cur_rx_reorder_enable = ht_info->reg_rx_reorder_enable;
 
 	if (pPeerHTCap->MCS[0] == 0)
 		pPeerHTCap->MCS[0] = 0xff;
@@ -606,8 +606,8 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 
 	HTFilterMCSRate(ieee, pPeerHTCap->MCS, ieee->dot11ht_oper_rate_set);
 
-	pHTInfo->peer_mimo_ps = pPeerHTCap->MimoPwrSave;
-	if (pHTInfo->peer_mimo_ps == MIMO_PS_STATIC)
+	ht_info->peer_mimo_ps = pPeerHTCap->MimoPwrSave;
+	if (ht_info->peer_mimo_ps == MIMO_PS_STATIC)
 		pMcsFilter = MCS_FILTER_1SS;
 	else
 		pMcsFilter = MCS_FILTER_ALL;
@@ -616,49 +616,49 @@ void HTOnAssocRsp(struct rtllib_device *ieee)
 						       pMcsFilter);
 	ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
-	pHTInfo->current_op_mode = pPeerHTInfo->OptMode;
+	ht_info->current_op_mode = pPeerHTInfo->OptMode;
 }
 
 void HTInitializeHTInfo(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	pHTInfo->bCurrentHTSupport = false;
+	ht_info->bCurrentHTSupport = false;
 
-	pHTInfo->bCurBW40MHz = false;
-	pHTInfo->cur_tx_bw40mhz = false;
+	ht_info->bCurBW40MHz = false;
+	ht_info->cur_tx_bw40mhz = false;
 
-	pHTInfo->bCurShortGI20MHz = false;
-	pHTInfo->bCurShortGI40MHz = false;
-	pHTInfo->forced_short_gi = false;
+	ht_info->bCurShortGI20MHz = false;
+	ht_info->bCurShortGI40MHz = false;
+	ht_info->forced_short_gi = false;
 
-	pHTInfo->bCurSuppCCK = true;
+	ht_info->bCurSuppCCK = true;
 
-	pHTInfo->bCurrent_AMSDU_Support = false;
-	pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize;
-	pHTInfo->current_mpdu_density = pHTInfo->MPDU_Density;
-	pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor;
+	ht_info->bCurrent_AMSDU_Support = false;
+	ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize;
+	ht_info->current_mpdu_density = ht_info->MPDU_Density;
+	ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor;
 
-	memset((void *)(&pHTInfo->SelfHTCap), 0,
-	       sizeof(pHTInfo->SelfHTCap));
-	memset((void *)(&pHTInfo->SelfHTInfo), 0,
-	       sizeof(pHTInfo->SelfHTInfo));
-	memset((void *)(&pHTInfo->PeerHTCapBuf), 0,
-	       sizeof(pHTInfo->PeerHTCapBuf));
-	memset((void *)(&pHTInfo->PeerHTInfoBuf), 0,
-	       sizeof(pHTInfo->PeerHTInfoBuf));
+	memset((void *)(&ht_info->SelfHTCap), 0,
+	       sizeof(ht_info->SelfHTCap));
+	memset((void *)(&ht_info->SelfHTInfo), 0,
+	       sizeof(ht_info->SelfHTInfo));
+	memset((void *)(&ht_info->PeerHTCapBuf), 0,
+	       sizeof(ht_info->PeerHTCapBuf));
+	memset((void *)(&ht_info->PeerHTInfoBuf), 0,
+	       sizeof(ht_info->PeerHTInfoBuf));
 
-	pHTInfo->sw_bw_in_progress = false;
+	ht_info->sw_bw_in_progress = false;
 
-	pHTInfo->ePeerHTSpecVer = HT_SPEC_VER_IEEE;
+	ht_info->ePeerHTSpecVer = HT_SPEC_VER_IEEE;
 
-	pHTInfo->current_rt2rt_aggregation = false;
-	pHTInfo->current_rt2rt_long_slot_time = false;
-	pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0;
+	ht_info->current_rt2rt_aggregation = false;
+	ht_info->current_rt2rt_long_slot_time = false;
+	ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0;
 
-	pHTInfo->IOTPeer = 0;
-	pHTInfo->iot_action = 0;
-	pHTInfo->iot_ra_func = 0;
+	ht_info->IOTPeer = 0;
+	ht_info->iot_action = 0;
+	ht_info->iot_ra_func = 0;
 
 	{
 		u8 *RegHTSuppRateSets = &ieee->reg_ht_supp_rate_set[0];
@@ -687,114 +687,114 @@ void HTInitializeBssDesc(struct bss_ht *pBssHT)
 void HTResetSelfAndSavePeerSetting(struct rtllib_device *ieee,
 				   struct rtllib_network *pNetwork)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	u8	bIOTAction = 0;
 
 	/* unmark enable_ht flag here is the same reason why unmarked in
 	 * function rtllib_softmac_new_net. WB 2008.09.10
 	 */
 	if (pNetwork->bssht.bd_support_ht) {
-		pHTInfo->bCurrentHTSupport = true;
-		pHTInfo->ePeerHTSpecVer = pNetwork->bssht.bd_ht_spec_ver;
+		ht_info->bCurrentHTSupport = true;
+		ht_info->ePeerHTSpecVer = pNetwork->bssht.bd_ht_spec_ver;
 
 		if (pNetwork->bssht.bd_ht_cap_len > 0 &&
-		    pNetwork->bssht.bd_ht_cap_len <= sizeof(pHTInfo->PeerHTCapBuf))
-			memcpy(pHTInfo->PeerHTCapBuf,
+		    pNetwork->bssht.bd_ht_cap_len <= sizeof(ht_info->PeerHTCapBuf))
+			memcpy(ht_info->PeerHTCapBuf,
 			       pNetwork->bssht.bd_ht_cap_buf,
 			       pNetwork->bssht.bd_ht_cap_len);
 
 		if (pNetwork->bssht.bd_ht_info_len > 0 &&
 		    pNetwork->bssht.bd_ht_info_len <=
-		    sizeof(pHTInfo->PeerHTInfoBuf))
-			memcpy(pHTInfo->PeerHTInfoBuf,
+		    sizeof(ht_info->PeerHTInfoBuf))
+			memcpy(ht_info->PeerHTInfoBuf,
 			       pNetwork->bssht.bd_ht_info_buf,
 			       pNetwork->bssht.bd_ht_info_len);
 
-		if (pHTInfo->reg_rt2rt_aggregation) {
-			pHTInfo->current_rt2rt_aggregation =
+		if (ht_info->reg_rt2rt_aggregation) {
+			ht_info->current_rt2rt_aggregation =
 				 pNetwork->bssht.bd_rt2rt_aggregation;
-			pHTInfo->current_rt2rt_long_slot_time =
+			ht_info->current_rt2rt_long_slot_time =
 				 pNetwork->bssht.bd_rt2rt_long_slot_time;
-			pHTInfo->RT2RT_HT_Mode = pNetwork->bssht.rt2rt_ht_mode;
+			ht_info->RT2RT_HT_Mode = pNetwork->bssht.rt2rt_ht_mode;
 		} else {
-			pHTInfo->current_rt2rt_aggregation = false;
-			pHTInfo->current_rt2rt_long_slot_time = false;
-			pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0;
+			ht_info->current_rt2rt_aggregation = false;
+			ht_info->current_rt2rt_long_slot_time = false;
+			ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0;
 		}
 
 		HTIOTPeerDetermine(ieee);
 
-		pHTInfo->iot_action = 0;
+		ht_info->iot_action = 0;
 		bIOTAction = HTIOTActIsDisableMCS14(ieee, pNetwork->bssid);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_MCS14;
+			ht_info->iot_action |= HT_IOT_ACT_DISABLE_MCS14;
 
 		bIOTAction = HTIOTActIsDisableMCS15(ieee);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_MCS15;
+			ht_info->iot_action |= HT_IOT_ACT_DISABLE_MCS15;
 
 		bIOTAction = HTIOTActIsDisableMCSTwoSpatialStream(ieee);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_ALL_2SS;
+			ht_info->iot_action |= HT_IOT_ACT_DISABLE_ALL_2SS;
 
 		bIOTAction = HTIOTActIsDisableEDCATurbo(ieee, pNetwork->bssid);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_DISABLE_EDCA_TURBO;
+			ht_info->iot_action |= HT_IOT_ACT_DISABLE_EDCA_TURBO;
 
 		bIOTAction = HTIOTActIsMgntUseCCK6M(ieee, pNetwork);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_MGNT_USE_CCK_6M;
+			ht_info->iot_action |= HT_IOT_ACT_MGNT_USE_CCK_6M;
 		bIOTAction = HTIOTActIsCCDFsync(ieee);
 		if (bIOTAction)
-			pHTInfo->iot_action |= HT_IOT_ACT_CDD_FSYNC;
+			ht_info->iot_action |= HT_IOT_ACT_CDD_FSYNC;
 	} else {
-		pHTInfo->bCurrentHTSupport = false;
-		pHTInfo->current_rt2rt_aggregation = false;
-		pHTInfo->current_rt2rt_long_slot_time = false;
-		pHTInfo->RT2RT_HT_Mode = (enum rt_ht_capability)0;
+		ht_info->bCurrentHTSupport = false;
+		ht_info->current_rt2rt_aggregation = false;
+		ht_info->current_rt2rt_long_slot_time = false;
+		ht_info->RT2RT_HT_Mode = (enum rt_ht_capability)0;
 
-		pHTInfo->iot_action = 0;
-		pHTInfo->iot_ra_func = 0;
+		ht_info->iot_action = 0;
+		ht_info->iot_ra_func = 0;
 	}
 }
 
 void HT_update_self_and_peer_setting(struct rtllib_device *ieee,
 				     struct rtllib_network *pNetwork)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	struct ht_info_ele *pPeerHTInfo =
 		 (struct ht_info_ele *)pNetwork->bssht.bd_ht_info_buf;
 
-	if (pHTInfo->bCurrentHTSupport) {
+	if (ht_info->bCurrentHTSupport) {
 		if (pNetwork->bssht.bd_ht_info_len != 0)
-			pHTInfo->current_op_mode = pPeerHTInfo->OptMode;
+			ht_info->current_op_mode = pPeerHTInfo->OptMode;
 	}
 }
 EXPORT_SYMBOL(HT_update_self_and_peer_setting);
 
 void HTUseDefaultSetting(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	if (pHTInfo->enable_ht) {
-		pHTInfo->bCurrentHTSupport = true;
-		pHTInfo->bCurSuppCCK = pHTInfo->bRegSuppCCK;
+	if (ht_info->enable_ht) {
+		ht_info->bCurrentHTSupport = true;
+		ht_info->bCurSuppCCK = ht_info->bRegSuppCCK;
 
-		pHTInfo->bCurBW40MHz = pHTInfo->bRegBW40MHz;
-		pHTInfo->bCurShortGI20MHz = pHTInfo->bRegShortGI20MHz;
+		ht_info->bCurBW40MHz = ht_info->bRegBW40MHz;
+		ht_info->bCurShortGI20MHz = ht_info->bRegShortGI20MHz;
 
-		pHTInfo->bCurShortGI40MHz = pHTInfo->bRegShortGI40MHz;
+		ht_info->bCurShortGI40MHz = ht_info->bRegShortGI40MHz;
 
 		if (ieee->iw_mode == IW_MODE_ADHOC)
 			ieee->current_network.qos_data.active =
 				 ieee->current_network.qos_data.supported;
-		pHTInfo->bCurrent_AMSDU_Support = pHTInfo->bAMSDU_Support;
-		pHTInfo->nCurrent_AMSDU_MaxSize = pHTInfo->nAMSDU_MaxSize;
+		ht_info->bCurrent_AMSDU_Support = ht_info->bAMSDU_Support;
+		ht_info->nCurrent_AMSDU_MaxSize = ht_info->nAMSDU_MaxSize;
 
-		pHTInfo->bCurrentAMPDUEnable = pHTInfo->bAMPDUEnable;
-		pHTInfo->CurrentAMPDUFactor = pHTInfo->AMPDU_Factor;
+		ht_info->bCurrentAMPDUEnable = ht_info->bAMPDUEnable;
+		ht_info->CurrentAMPDUFactor = ht_info->AMPDU_Factor;
 
-		pHTInfo->current_mpdu_density = pHTInfo->current_mpdu_density;
+		ht_info->current_mpdu_density = ht_info->current_mpdu_density;
 
 		HTFilterMCSRate(ieee, ieee->reg_dot11tx_ht_oper_rate_set,
 				ieee->dot11ht_oper_rate_set);
@@ -804,13 +804,13 @@ void HTUseDefaultSetting(struct rtllib_device *ieee)
 		ieee->HTCurrentOperaRate = ieee->HTHighestOperaRate;
 
 	} else {
-		pHTInfo->bCurrentHTSupport = false;
+		ht_info->bCurrentHTSupport = false;
 	}
 }
 
 u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame)
 {
-	if (ieee->pHTInfo->bCurrentHTSupport) {
+	if (ieee->ht_info->bCurrentHTSupport) {
 		if ((IsQoSDataFrame(pFrame) && Frame_Order(pFrame)) == 1) {
 			netdev_dbg(ieee->dev, "HT CONTROL FILED EXIST!!\n");
 			return true;
@@ -821,13 +821,13 @@ u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame)
 
 static void HTSetConnectBwModeCallback(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	if (pHTInfo->bCurBW40MHz) {
-		if (pHTInfo->CurSTAExtChnlOffset == HT_EXTCHNL_OFFSET_UPPER)
+	if (ht_info->bCurBW40MHz) {
+		if (ht_info->CurSTAExtChnlOffset == HT_EXTCHNL_OFFSET_UPPER)
 			ieee->set_chan(ieee->dev,
 				       ieee->current_network.channel + 2);
-		else if (pHTInfo->CurSTAExtChnlOffset ==
+		else if (ht_info->CurSTAExtChnlOffset ==
 			 HT_EXTCHNL_OFFSET_LOWER)
 			ieee->set_chan(ieee->dev,
 				       ieee->current_network.channel - 2);
@@ -836,29 +836,29 @@ static void HTSetConnectBwModeCallback(struct rtllib_device *ieee)
 				       ieee->current_network.channel);
 
 		ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20_40,
-				       pHTInfo->CurSTAExtChnlOffset);
+				       ht_info->CurSTAExtChnlOffset);
 	} else {
 		ieee->set_chan(ieee->dev, ieee->current_network.channel);
 		ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20,
 				       HT_EXTCHNL_OFFSET_NO_EXT);
 	}
 
-	pHTInfo->sw_bw_in_progress = false;
+	ht_info->sw_bw_in_progress = false;
 }
 
 void HTSetConnectBwMode(struct rtllib_device *ieee,
 			enum ht_channel_width bandwidth,
 			enum ht_extchnl_offset Offset)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
-	if (!pHTInfo->bRegBW40MHz)
+	if (!ht_info->bRegBW40MHz)
 		return;
 
 	if (ieee->GetHalfNmodeSupportByAPsHandler(ieee->dev))
 		bandwidth = HT_CHANNEL_WIDTH_20;
 
-	if (pHTInfo->sw_bw_in_progress) {
+	if (ht_info->sw_bw_in_progress) {
 		pr_info("%s: sw_bw_in_progress!!\n", __func__);
 		return;
 	}
@@ -868,21 +868,21 @@ void HTSetConnectBwMode(struct rtllib_device *ieee,
 			Offset = HT_EXTCHNL_OFFSET_NO_EXT;
 		if (Offset == HT_EXTCHNL_OFFSET_UPPER ||
 		    Offset == HT_EXTCHNL_OFFSET_LOWER) {
-			pHTInfo->bCurBW40MHz = true;
-			pHTInfo->CurSTAExtChnlOffset = Offset;
+			ht_info->bCurBW40MHz = true;
+			ht_info->CurSTAExtChnlOffset = Offset;
 		} else {
-			pHTInfo->bCurBW40MHz = false;
-			pHTInfo->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT;
+			ht_info->bCurBW40MHz = false;
+			ht_info->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT;
 		}
 	} else {
-		pHTInfo->bCurBW40MHz = false;
-		pHTInfo->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT;
+		ht_info->bCurBW40MHz = false;
+		ht_info->CurSTAExtChnlOffset = HT_EXTCHNL_OFFSET_NO_EXT;
 	}
 
-	netdev_dbg(ieee->dev, "%s():pHTInfo->bCurBW40MHz:%x\n", __func__,
-		   pHTInfo->bCurBW40MHz);
+	netdev_dbg(ieee->dev, "%s():ht_info->bCurBW40MHz:%x\n", __func__,
+		   ht_info->bCurBW40MHz);
 
-	pHTInfo->sw_bw_in_progress = true;
+	ht_info->sw_bw_in_progress = true;
 
 	HTSetConnectBwModeCallback(ieee);
 }
diff --git a/drivers/staging/rtl8192e/rtl819x_TSProc.c b/drivers/staging/rtl8192e/rtl819x_TSProc.c
index 05c7e822f3727..68c131afc2ba4 100644
--- a/drivers/staging/rtl8192e/rtl819x_TSProc.c
+++ b/drivers/staging/rtl8192e/rtl819x_TSProc.c
@@ -83,7 +83,7 @@ static void RxPktPendingTimeout(struct timer_list *t)
 	if (bPktInBuf && (pRxTs->rx_timeout_indicate_seq == 0xffff)) {
 		pRxTs->rx_timeout_indicate_seq = pRxTs->rx_indicate_seq;
 		mod_timer(&pRxTs->rx_pkt_pending_timer,  jiffies +
-			  msecs_to_jiffies(ieee->pHTInfo->rx_reorder_pending_time)
+			  msecs_to_jiffies(ieee->ht_info->rx_reorder_pending_time)
 			  );
 	}
 	spin_unlock_irqrestore(&(ieee->reorder_spinlock), flags);
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 9c81ca38f4b1d..7119c9c5e1fef 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1432,7 +1432,7 @@ struct rtllib_device {
 	RT_RF_CHANGE_SOURCE rf_off_reason;
 	bool is_set_key;
 	bool wx_set_enc;
-	struct rt_hi_throughput *pHTInfo;
+	struct rt_hi_throughput *ht_info;
 
 	spinlock_t reorder_spinlock;
 	u8	reg_dot11ht_oper_rate_set[16];
@@ -2089,7 +2089,7 @@ u8 HTGetHighestMCSRate(struct rtllib_device *ieee, u8 *pMCSRateSet,
 extern u8 MCS_FILTER_ALL[];
 extern u16 MCS_DATA_RATE[2][2][77];
 u8 HTCCheck(struct rtllib_device *ieee, u8 *pFrame);
-void HTResetIOTSetting(struct rt_hi_throughput *pHTInfo);
+void HTResetIOTSetting(struct rt_hi_throughput *ht_info);
 bool IsHTHalfNmodeAPs(struct rtllib_device *ieee);
 u16  TxCountToDataRate(struct rtllib_device *ieee, u8 nDataRate);
 int rtllib_rx_ADDBAReq(struct rtllib_device *ieee, struct sk_buff *skb);
diff --git a/drivers/staging/rtl8192e/rtllib_module.c b/drivers/staging/rtl8192e/rtllib_module.c
index ce8b73f437a3d..d6a4d6b4ec578 100644
--- a/drivers/staging/rtl8192e/rtllib_module.c
+++ b/drivers/staging/rtl8192e/rtllib_module.c
@@ -125,8 +125,8 @@ struct net_device *alloc_rtllib(int sizeof_priv)
 	if (err)
 		goto free_crypt_info;
 
-	ieee->pHTInfo = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL);
-	if (!ieee->pHTInfo)
+	ieee->ht_info = kzalloc(sizeof(struct rt_hi_throughput), GFP_KERNEL);
+	if (!ieee->ht_info)
 		goto free_softmac;
 
 	HTUpdateDefaultSetting(ieee);
@@ -160,7 +160,7 @@ void free_rtllib(struct net_device *dev)
 	struct rtllib_device *ieee = (struct rtllib_device *)
 				      netdev_priv_rsl(dev);
 
-	kfree(ieee->pHTInfo);
+	kfree(ieee->ht_info);
 	rtllib_softmac_free(ieee);
 
 	lib80211_crypt_info_free(&ieee->crypt_info);
diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index 75190c389ccf1..6a0f5bbb99ef4 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -567,9 +567,9 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee,
 				    struct rtllib_rxb *prxb,
 				    struct rx_ts_record *pTS, u16 SeqNum)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	struct rx_reorder_entry *pReorderEntry = NULL;
-	u8 WinSize = pHTInfo->rx_reorder_win_size;
+	u8 WinSize = ht_info->rx_reorder_win_size;
 	u16 WinEnd = 0;
 	u8 index = 0;
 	bool bMatchWinStart = false, bPktInBuf = false;
@@ -591,7 +591,7 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee,
 		netdev_dbg(ieee->dev,
 			   "Packet Drop! IndicateSeq: %d, NewSeq: %d\n",
 			   pTS->rx_indicate_seq, SeqNum);
-		pHTInfo->rx_reorder_drop_counter++;
+		ht_info->rx_reorder_drop_counter++;
 		{
 			int i;
 
@@ -755,7 +755,7 @@ static void RxReorderIndicatePacket(struct rtllib_device *ieee,
 		netdev_dbg(ieee->dev, "%s(): SET rx timeout timer\n", __func__);
 		pTS->rx_timeout_indicate_seq = pTS->rx_indicate_seq;
 		mod_timer(&pTS->rx_pkt_pending_timer, jiffies +
-			  msecs_to_jiffies(pHTInfo->rx_reorder_pending_time));
+			  msecs_to_jiffies(ht_info->rx_reorder_pending_time));
 	}
 	spin_unlock_irqrestore(&(ieee->reorder_spinlock), flags);
 }
@@ -924,7 +924,7 @@ static int rtllib_rx_check_duplicate(struct rtllib_device *ieee,
 	sc = le16_to_cpu(hdr->seq_ctl);
 	frag = WLAN_GET_SEQ_FRAG(sc);
 
-	if (!ieee->pHTInfo->cur_rx_reorder_enable ||
+	if (!ieee->ht_info->cur_rx_reorder_enable ||
 		!ieee->current_network.qos_data.active ||
 		!IsDataFrame(skb->data) ||
 		IsLegacyDataFrame(skb->data)) {
@@ -1442,7 +1442,7 @@ static int rtllib_rx_InfraAdhoc(struct rtllib_device *ieee, struct sk_buff *skb,
 	}
 
 	/* Indicate packets to upper layer or Rx Reorder */
-	if (!ieee->pHTInfo->cur_rx_reorder_enable || pTS == NULL || bToOtherSTA)
+	if (!ieee->ht_info->cur_rx_reorder_enable || pTS == NULL || bToOtherSTA)
 		rtllib_rx_indicate_pkt_legacy(ieee, rx_stats, rxb, dst, src);
 	else
 		RxReorderIndicatePacket(ieee, rxb, pTS, SeqNum);
diff --git a/drivers/staging/rtl8192e/rtllib_softmac.c b/drivers/staging/rtl8192e/rtllib_softmac.c
index 8b1d88061125a..2552aa089700f 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac.c
@@ -176,10 +176,10 @@ u8 MgntQuery_TxRateExcludeCCKRates(struct rtllib_device *ieee)
 
 static u8 MgntQuery_MgntFrameTxRate(struct rtllib_device *ieee)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	u8 rate;
 
-	if (pHTInfo->iot_action & HT_IOT_ACT_MGNT_USE_CCK_6M)
+	if (ht_info->iot_action & HT_IOT_ACT_MGNT_USE_CCK_6M)
 		rate = 0x0c;
 	else
 		rate = ieee->basic_rate & 0x7f;
@@ -187,7 +187,7 @@ static u8 MgntQuery_MgntFrameTxRate(struct rtllib_device *ieee)
 	if (rate == 0) {
 		if (ieee->mode == IEEE_A ||
 		   ieee->mode == IEEE_N_5G ||
-		   (ieee->mode == IEEE_N_24G && !pHTInfo->bCurSuppCCK))
+		   (ieee->mode == IEEE_N_24G && !ht_info->bCurSuppCCK))
 			rate = 0x0c;
 		else
 			rate = 0x02;
@@ -829,7 +829,7 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee,
 	u8 tmp_ht_cap_len = 0;
 	u8 *tmp_ht_info_buf = NULL;
 	u8 tmp_ht_info_len = 0;
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	u8 *tmp_generic_ie_buf = NULL;
 	u8 tmp_generic_ie_len = 0;
 
@@ -843,7 +843,7 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee,
 
 	if ((ieee->current_network.mode == IEEE_G) ||
 	   (ieee->current_network.mode == IEEE_N_24G &&
-	   ieee->pHTInfo->bCurSuppCCK)) {
+	   ieee->ht_info->bCurSuppCCK)) {
 		erp_len = 3;
 		erpinfo_content = 0;
 		if (ieee->current_network.buseprotection)
@@ -854,20 +854,20 @@ static struct sk_buff *rtllib_probe_resp(struct rtllib_device *ieee,
 	crypt = ieee->crypt_info.crypt[ieee->crypt_info.tx_keyidx];
 	encrypt = ieee->host_encrypt && crypt && crypt->ops &&
 		((strcmp(crypt->ops->name, "R-WEP") == 0 || wpa_ie_len));
-	if (ieee->pHTInfo->bCurrentHTSupport) {
-		tmp_ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap);
-		tmp_ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap);
-		tmp_ht_info_buf = (u8 *)&(ieee->pHTInfo->SelfHTInfo);
-		tmp_ht_info_len = sizeof(ieee->pHTInfo->SelfHTInfo);
+	if (ieee->ht_info->bCurrentHTSupport) {
+		tmp_ht_cap_buf = (u8 *)&(ieee->ht_info->SelfHTCap);
+		tmp_ht_cap_len = sizeof(ieee->ht_info->SelfHTCap);
+		tmp_ht_info_buf = (u8 *)&(ieee->ht_info->SelfHTInfo);
+		tmp_ht_info_len = sizeof(ieee->ht_info->SelfHTInfo);
 		HTConstructCapabilityElement(ieee, tmp_ht_cap_buf,
 					     &tmp_ht_cap_len, encrypt, false);
 		HTConstructInfoElement(ieee, tmp_ht_info_buf, &tmp_ht_info_len,
 				       encrypt);
 
-		if (pHTInfo->reg_rt2rt_aggregation) {
-			tmp_generic_ie_buf = ieee->pHTInfo->sz_rt2rt_agg_buf;
+		if (ht_info->reg_rt2rt_aggregation) {
+			tmp_generic_ie_buf = ieee->ht_info->sz_rt2rt_agg_buf;
 			tmp_generic_ie_len =
-				 sizeof(ieee->pHTInfo->sz_rt2rt_agg_buf);
+				 sizeof(ieee->ht_info->sz_rt2rt_agg_buf);
 			HTConstructRT2RTAggElement(ieee, tmp_generic_ie_buf,
 						   &tmp_generic_ie_len);
 		}
@@ -1179,19 +1179,19 @@ rtllib_association_req(struct rtllib_network *beacon,
 	if ((ieee->rtllib_ap_sec_type &&
 	    (ieee->rtllib_ap_sec_type(ieee) & SEC_ALG_TKIP)) ||
 	    ieee->bForcedBgMode) {
-		ieee->pHTInfo->enable_ht = 0;
+		ieee->ht_info->enable_ht = 0;
 		ieee->mode = WIRELESS_MODE_G;
 	}
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
-		ht_cap_buf = (u8 *)&(ieee->pHTInfo->SelfHTCap);
-		ht_cap_len = sizeof(ieee->pHTInfo->SelfHTCap);
+	if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) {
+		ht_cap_buf = (u8 *)&(ieee->ht_info->SelfHTCap);
+		ht_cap_len = sizeof(ieee->ht_info->SelfHTCap);
 		HTConstructCapabilityElement(ieee, ht_cap_buf, &ht_cap_len,
 					     encrypt, true);
-		if (ieee->pHTInfo->current_rt2rt_aggregation) {
-			realtek_ie_buf = ieee->pHTInfo->sz_rt2rt_agg_buf;
+		if (ieee->ht_info->current_rt2rt_aggregation) {
+			realtek_ie_buf = ieee->ht_info->sz_rt2rt_agg_buf;
 			realtek_ie_len =
-				 sizeof(ieee->pHTInfo->sz_rt2rt_agg_buf);
+				 sizeof(ieee->ht_info->sz_rt2rt_agg_buf);
 			HTConstructRT2RTAggElement(ieee, realtek_ie_buf,
 						   &realtek_ie_len);
 		}
@@ -1324,8 +1324,8 @@ rtllib_association_req(struct rtllib_network *beacon,
 		memcpy(tag, osCcxVerNum.Octet, osCcxVerNum.Length);
 		tag += osCcxVerNum.Length;
 	}
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
-		if (ieee->pHTInfo->ePeerHTSpecVer != HT_SPEC_VER_EWC) {
+	if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) {
+		if (ieee->ht_info->ePeerHTSpecVer != HT_SPEC_VER_EWC) {
 			tag = skb_put(skb, ht_cap_len);
 			*tag++ = MFIE_TYPE_HT_CAP;
 			*tag++ = ht_cap_len - 2;
@@ -1358,8 +1358,8 @@ rtllib_association_req(struct rtllib_network *beacon,
 		rtllib_TURBO_Info(ieee, &tag);
 	}
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
-		if (ieee->pHTInfo->ePeerHTSpecVer == HT_SPEC_VER_EWC) {
+	if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) {
+		if (ieee->ht_info->ePeerHTSpecVer == HT_SPEC_VER_EWC) {
 			tag = skb_put(skb, ht_cap_len);
 			*tag++ = MFIE_TYPE_GENERIC;
 			*tag++ = ht_cap_len - 2;
@@ -1367,7 +1367,7 @@ rtllib_association_req(struct rtllib_network *beacon,
 			tag += ht_cap_len - 2;
 		}
 
-		if (ieee->pHTInfo->current_rt2rt_aggregation) {
+		if (ieee->ht_info->current_rt2rt_aggregation) {
 			tag = skb_put(skb, realtek_ie_len);
 			*tag++ = MFIE_TYPE_GENERIC;
 			*tag++ = realtek_ie_len - 2;
@@ -1524,14 +1524,14 @@ static void rtllib_associate_complete_wq(void *data)
 		ieee->SetWirelessMode(ieee->dev, IEEE_B);
 		netdev_info(ieee->dev, "Using B rates:%d\n", ieee->rate);
 	}
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht) {
+	if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht) {
 		netdev_info(ieee->dev, "Successfully associated, ht enabled\n");
 		HTOnAssocRsp(ieee);
 	} else {
 		netdev_info(ieee->dev,
 			    "Successfully associated, ht not enabled(%d, %d)\n",
-			    ieee->pHTInfo->bCurrentHTSupport,
-			    ieee->pHTInfo->enable_ht);
+			    ieee->ht_info->bCurrentHTSupport,
+			    ieee->ht_info->enable_ht);
 		memset(ieee->dot11ht_oper_rate_set, 0, 16);
 	}
 	ieee->link_detect_info.SlotNum = 2 * (1 +
@@ -1684,7 +1684,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee,
 				    ieee->current_network.ssid,
 				    ieee->current_network.channel,
 				    ieee->current_network.qos_data.supported,
-				    ieee->pHTInfo->enable_ht,
+				    ieee->ht_info->enable_ht,
 				    ieee->current_network.bssht.bd_support_ht,
 				    ieee->current_network.mode,
 				    ieee->current_network.flags);
@@ -1693,7 +1693,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee,
 			   !(ieee->softmac_features & IEEE_SOFTMAC_SCAN))
 				rtllib_stop_scan_syncro(ieee);
 
-			HTResetIOTSetting(ieee->pHTInfo);
+			HTResetIOTSetting(ieee->ht_info);
 			ieee->wmm_acm = 0;
 			if (ieee->iw_mode == IW_MODE_INFRA) {
 				/* Join the network for the first time */
@@ -1703,7 +1703,7 @@ inline void rtllib_softmac_new_net(struct rtllib_device *ieee,
 					HTResetSelfAndSavePeerSetting(ieee,
 						 &(ieee->current_network));
 				else
-					ieee->pHTInfo->bCurrentHTSupport =
+					ieee->ht_info->bCurrentHTSupport =
 								 false;
 
 				ieee->state = RTLLIB_ASSOCIATING;
@@ -1893,7 +1893,7 @@ static inline u16 assoc_parse(struct rtllib_device *ieee, struct sk_buff *skb,
 	   ((ieee->mode == IEEE_G) &&
 	   (ieee->current_network.mode == IEEE_N_24G) &&
 	   (ieee->AsocRetryCount++ < (RT_ASOC_RETRY_LIMIT-1)))) {
-		ieee->pHTInfo->iot_action |= HT_IOT_ACT_PURE_N_MODE;
+		ieee->ht_info->iot_action |= HT_IOT_ACT_PURE_N_MODE;
 	} else {
 		ieee->AsocRetryCount = 0;
 	}
@@ -2100,7 +2100,7 @@ static void rtllib_sta_wakeup(struct rtllib_device *ieee, short nl)
 {
 	if (ieee->sta_sleep == LPS_IS_WAKE) {
 		if (nl) {
-			if (ieee->pHTInfo->iot_action &
+			if (ieee->ht_info->iot_action &
 			    HT_IOT_ACT_NULL_DATA_POWER_SAVING) {
 				ieee->ack_tx_to_ieee = 1;
 				rtllib_sta_ps_send_null_frame(ieee, 0);
@@ -2116,7 +2116,7 @@ static void rtllib_sta_wakeup(struct rtllib_device *ieee, short nl)
 	if (ieee->sta_sleep == LPS_IS_SLEEP)
 		ieee->sta_wake_up(ieee->dev);
 	if (nl) {
-		if (ieee->pHTInfo->iot_action &
+		if (ieee->ht_info->iot_action &
 		    HT_IOT_ACT_NULL_DATA_POWER_SAVING) {
 			ieee->ack_tx_to_ieee = 1;
 			rtllib_sta_ps_send_null_frame(ieee, 0);
@@ -2151,7 +2151,7 @@ void rtllib_ps_tx_ack(struct rtllib_device *ieee, short success)
 
 		if ((ieee->sta_sleep == LPS_IS_WAKE) && !success) {
 			spin_lock_irqsave(&ieee->mgmt_tx_lock, flags2);
-			if (ieee->pHTInfo->iot_action &
+			if (ieee->ht_info->iot_action &
 			    HT_IOT_ACT_NULL_DATA_POWER_SAVING)
 				rtllib_sta_ps_send_null_frame(ieee, 0);
 			else
@@ -2235,10 +2235,10 @@ rtllib_rx_assoc_resp(struct rtllib_device *ieee, struct sk_buff *skb,
 					kfree(network);
 					return 1;
 				}
-				memcpy(ieee->pHTInfo->PeerHTCapBuf,
+				memcpy(ieee->ht_info->PeerHTCapBuf,
 				       network->bssht.bd_ht_cap_buf,
 				       network->bssht.bd_ht_cap_len);
-				memcpy(ieee->pHTInfo->PeerHTInfoBuf,
+				memcpy(ieee->ht_info->PeerHTInfoBuf,
 				       network->bssht.bd_ht_info_buf,
 				       network->bssht.bd_ht_info_len);
 				if (ieee->handle_assoc_response != NULL)
@@ -2295,7 +2295,7 @@ static void rtllib_rx_auth_resp(struct rtllib_device *ieee, struct sk_buff *skb)
 	if (ieee->open_wep || !challenge) {
 		ieee->state = RTLLIB_ASSOCIATING_AUTHENTICATED;
 		ieee->softmac_stats.rx_auth_rs_ok++;
-		if (!(ieee->pHTInfo->iot_action & HT_IOT_ACT_PURE_N_MODE)) {
+		if (!(ieee->ht_info->iot_action & HT_IOT_ACT_PURE_N_MODE)) {
 			if (!ieee->GetNmodeSupportBySecCfg(ieee->dev)) {
 				if (IsHTHalfNmodeAPs(ieee)) {
 					bSupportNmode = true;
@@ -2669,7 +2669,7 @@ static void rtllib_start_ibss_wq(void *data)
 	if ((ieee->mode == IEEE_N_24G) || (ieee->mode == IEEE_N_5G))
 		HTUseDefaultSetting(ieee);
 	else
-		ieee->pHTInfo->bCurrentHTSupport = false;
+		ieee->ht_info->bCurrentHTSupport = false;
 
 	ieee->SetHwRegHandler(ieee->dev, HW_VAR_MEDIA_STATUS,
 			      (u8 *)(&ieee->state));
diff --git a/drivers/staging/rtl8192e/rtllib_softmac_wx.c b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
index e02e7d9566b24..63edf68c0b49a 100644
--- a/drivers/staging/rtl8192e/rtllib_softmac_wx.c
+++ b/drivers/staging/rtl8192e/rtllib_softmac_wx.c
@@ -359,11 +359,11 @@ void rtllib_wx_sync_scan_wq(void *data)
 	if (ieee->ScanOperationBackupHandler)
 		ieee->ScanOperationBackupHandler(ieee->dev, SCAN_OPT_BACKUP);
 
-	if (ieee->pHTInfo->bCurrentHTSupport && ieee->pHTInfo->enable_ht &&
-	    ieee->pHTInfo->bCurBW40MHz) {
+	if (ieee->ht_info->bCurrentHTSupport && ieee->ht_info->enable_ht &&
+	    ieee->ht_info->bCurBW40MHz) {
 		b40M = 1;
-		chan_offset = ieee->pHTInfo->CurSTAExtChnlOffset;
-		bandwidth = (enum ht_channel_width)ieee->pHTInfo->bCurBW40MHz;
+		chan_offset = ieee->ht_info->CurSTAExtChnlOffset;
+		bandwidth = (enum ht_channel_width)ieee->ht_info->bCurBW40MHz;
 		ieee->SetBWModeHandler(ieee->dev, HT_CHANNEL_WIDTH_20,
 				       HT_EXTCHNL_OFFSET_NO_EXT);
 	}
diff --git a/drivers/staging/rtl8192e/rtllib_tx.c b/drivers/staging/rtl8192e/rtllib_tx.c
index 8b01cfe033813..9ab8ee46ef668 100644
--- a/drivers/staging/rtl8192e/rtllib_tx.c
+++ b/drivers/staging/rtl8192e/rtllib_tx.c
@@ -266,14 +266,14 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 				    struct sk_buff *skb,
 				    struct cb_desc *tcb_desc)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 	struct tx_ts_record *pTxTs = NULL;
 	struct rtllib_hdr_1addr *hdr = (struct rtllib_hdr_1addr *)skb->data;
 
 	if (rtllib_act_scanning(ieee, false))
 		return;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
+	if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht)
 		return;
 	if (!IsQoSDataFrame(skb->data))
 		return;
@@ -283,12 +283,12 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 	if (tcb_desc->bdhcp || ieee->CntAfterLink < 2)
 		return;
 
-	if (pHTInfo->iot_action & HT_IOT_ACT_TX_NO_AGGREGATION)
+	if (ht_info->iot_action & HT_IOT_ACT_TX_NO_AGGREGATION)
 		return;
 
 	if (!ieee->GetNmodeSupportBySecCfg(ieee->dev))
 		return;
-	if (pHTInfo->bCurrentAMPDUEnable) {
+	if (ht_info->bCurrentAMPDUEnable) {
 		if (!GetTs(ieee, (struct ts_common_info **)(&pTxTs), hdr->addr1,
 			   skb->priority, TX_DIR, true)) {
 			netdev_info(ieee->dev, "%s: can't get TS\n", __func__);
@@ -313,19 +313,19 @@ static void rtllib_tx_query_agg_cap(struct rtllib_device *ieee,
 		}
 		if (ieee->iw_mode == IW_MODE_INFRA) {
 			tcb_desc->bAMPDUEnable = true;
-			tcb_desc->ampdu_factor = pHTInfo->CurrentAMPDUFactor;
-			tcb_desc->ampdu_density = pHTInfo->current_mpdu_density;
+			tcb_desc->ampdu_factor = ht_info->CurrentAMPDUFactor;
+			tcb_desc->ampdu_density = ht_info->current_mpdu_density;
 		}
 	}
 FORCED_AGG_SETTING:
-	switch (pHTInfo->ForcedAMPDUMode) {
+	switch (ht_info->ForcedAMPDUMode) {
 	case HT_AGG_AUTO:
 		break;
 
 	case HT_AGG_FORCE_ENABLE:
 		tcb_desc->bAMPDUEnable = true;
-		tcb_desc->ampdu_density = pHTInfo->forced_mpdu_density;
-		tcb_desc->ampdu_factor = pHTInfo->forced_ampdu_factor;
+		tcb_desc->ampdu_density = ht_info->forced_mpdu_density;
+		tcb_desc->ampdu_factor = ht_info->forced_ampdu_factor;
 		break;
 
 	case HT_AGG_FORCE_DISABLE:
@@ -350,32 +350,32 @@ static void rtllib_query_ShortPreambleMode(struct rtllib_device *ieee,
 static void rtllib_query_HTCapShortGI(struct rtllib_device *ieee,
 				      struct cb_desc *tcb_desc)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
 	tcb_desc->bUseShortGI		= false;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
+	if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht)
 		return;
 
-	if (pHTInfo->forced_short_gi) {
+	if (ht_info->forced_short_gi) {
 		tcb_desc->bUseShortGI = true;
 		return;
 	}
 
-	if (pHTInfo->bCurBW40MHz && pHTInfo->bCurShortGI40MHz)
+	if (ht_info->bCurBW40MHz && ht_info->bCurShortGI40MHz)
 		tcb_desc->bUseShortGI = true;
-	else if (!pHTInfo->bCurBW40MHz && pHTInfo->bCurShortGI20MHz)
+	else if (!ht_info->bCurBW40MHz && ht_info->bCurShortGI20MHz)
 		tcb_desc->bUseShortGI = true;
 }
 
 static void rtllib_query_BandwidthMode(struct rtllib_device *ieee,
 				       struct cb_desc *tcb_desc)
 {
-	struct rt_hi_throughput *pHTInfo = ieee->pHTInfo;
+	struct rt_hi_throughput *ht_info = ieee->ht_info;
 
 	tcb_desc->bPacketBW = false;
 
-	if (!pHTInfo->bCurrentHTSupport || !pHTInfo->enable_ht)
+	if (!ht_info->bCurrentHTSupport || !ht_info->enable_ht)
 		return;
 
 	if (tcb_desc->bMulticast || tcb_desc->bBroadcast)
@@ -383,7 +383,7 @@ static void rtllib_query_BandwidthMode(struct rtllib_device *ieee,
 
 	if ((tcb_desc->data_rate & 0x80) == 0)
 		return;
-	if (pHTInfo->bCurBW40MHz && pHTInfo->cur_tx_bw40mhz &&
+	if (ht_info->bCurBW40MHz && ht_info->cur_tx_bw40mhz &&
 	    !ieee->bandwidth_auto_switch.bforced_tx20Mhz)
 		tcb_desc->bPacketBW = true;
 }
@@ -392,7 +392,7 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 					struct cb_desc *tcb_desc,
 					struct sk_buff *skb)
 {
-	struct rt_hi_throughput *pHTInfo;
+	struct rt_hi_throughput *ht_info;
 
 	tcb_desc->bRTSSTBC			= false;
 	tcb_desc->bRTSUseShortGI		= false;
@@ -418,15 +418,15 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 		return;
 	}
 
-	pHTInfo = ieee->pHTInfo;
+	ht_info = ieee->ht_info;
 
 	while (true) {
-		if (pHTInfo->iot_action & HT_IOT_ACT_FORCED_CTS2SELF) {
+		if (ht_info->iot_action & HT_IOT_ACT_FORCED_CTS2SELF) {
 			tcb_desc->bCTSEnable	= true;
 			tcb_desc->rts_rate  =	MGN_24M;
 			tcb_desc->bRTSEnable = true;
 			break;
-		} else if (pHTInfo->iot_action & (HT_IOT_ACT_FORCED_RTS |
+		} else if (ht_info->iot_action & (HT_IOT_ACT_FORCED_RTS |
 			   HT_IOT_ACT_PURE_N_MODE)) {
 			tcb_desc->bRTSEnable = true;
 			tcb_desc->rts_rate  =	MGN_24M;
@@ -438,12 +438,12 @@ static void rtllib_query_protectionmode(struct rtllib_device *ieee,
 			tcb_desc->rts_rate = MGN_24M;
 			break;
 		}
-		if (pHTInfo->bCurrentHTSupport  && pHTInfo->enable_ht) {
-			u8 HTOpMode = pHTInfo->current_op_mode;
+		if (ht_info->bCurrentHTSupport  && ht_info->enable_ht) {
+			u8 HTOpMode = ht_info->current_op_mode;
 
-			if ((pHTInfo->bCurBW40MHz && (HTOpMode == 2 ||
+			if ((ht_info->bCurBW40MHz && (HTOpMode == 2 ||
 						      HTOpMode == 3)) ||
-			     (!pHTInfo->bCurBW40MHz && HTOpMode == 3)) {
+			     (!ht_info->bCurBW40MHz && HTOpMode == 3)) {
 				tcb_desc->rts_rate = MGN_24M;
 				tcb_desc->bRTSEnable = true;
 				break;
@@ -882,7 +882,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 		tcb_desc->priority = skb->priority;
 
 		if (ether_type == ETH_P_PAE) {
-			if (ieee->pHTInfo->iot_action &
+			if (ieee->ht_info->iot_action &
 			    HT_IOT_ACT_WA_IOT_Broadcom) {
 				tcb_desc->data_rate =
 					 MgntQuery_TxRateExcludeCCKRates(ieee);
@@ -906,7 +906,7 @@ static int rtllib_xmit_inter(struct sk_buff *skb, struct net_device *dev)
 				tcb_desc->data_rate = rtllib_current_rate(ieee);
 
 			if (bdhcp) {
-				if (ieee->pHTInfo->iot_action &
+				if (ieee->ht_info->iot_action &
 				    HT_IOT_ACT_WA_IOT_Broadcom) {
 					tcb_desc->data_rate =
 					   MgntQuery_TxRateExcludeCCKRates(ieee);
-- 
GitLab


From 8610e98f0b48a7f9974cb12c5f501433c4afa958 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Wed, 16 Nov 2022 14:50:59 -0600
Subject: [PATCH 1752/4122] PCI: Drop of_match_ptr() to avoid unused variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We have stubs for most OF interfaces even when CONFIG_OF is not set, so we
allow building of most controller drivers in that case for compile testing.

When CONFIG_OF is not set, "of_match_ptr(<match_table>)" compiles to NULL,
which leaves <match_table> unused, resulting in errors like this:

  $ make W=1
  drivers/pci/controller/pci-xgene.c:636:34: error: ‘xgene_pcie_match_table’ defined but not used [-Werror=unused-const-variable=]

Drop of_match_ptr() to avoid the unused variable warning.

See also 1dff012f636d ("PCI: Drop of_match_ptr() to avoid unused
variables").

Link: https://lore.kernel.org/r/20221025191339.667614-2-helgaas@kernel.org
Link: https://lore.kernel.org/r/20221116205100.1136224-2-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pci-ftpci100.c | 2 +-
 drivers/pci/controller/pci-v3-semi.c  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/pci-ftpci100.c b/drivers/pci/controller/pci-ftpci100.c
index 0cfd9d5a497c9..ecd3009df586d 100644
--- a/drivers/pci/controller/pci-ftpci100.c
+++ b/drivers/pci/controller/pci-ftpci100.c
@@ -553,7 +553,7 @@ static const struct of_device_id faraday_pci_of_match[] = {
 static struct platform_driver faraday_pci_driver = {
 	.driver = {
 		.name = "ftpci100",
-		.of_match_table = of_match_ptr(faraday_pci_of_match),
+		.of_match_table = faraday_pci_of_match,
 		.suppress_bind_attrs = true,
 	},
 	.probe  = faraday_pci_probe,
diff --git a/drivers/pci/controller/pci-v3-semi.c b/drivers/pci/controller/pci-v3-semi.c
index 784fcf35599c6..ca44b0c83d1bf 100644
--- a/drivers/pci/controller/pci-v3-semi.c
+++ b/drivers/pci/controller/pci-v3-semi.c
@@ -901,7 +901,7 @@ static const struct of_device_id v3_pci_of_match[] = {
 static struct platform_driver v3_pci_driver = {
 	.driver = {
 		.name = "pci-v3-semi",
-		.of_match_table = of_match_ptr(v3_pci_of_match),
+		.of_match_table = v3_pci_of_match,
 		.suppress_bind_attrs = true,
 	},
 	.probe  = v3_pci_probe,
-- 
GitLab


From 1aff514e1d2bd47854dbbdf867970b9d463d4c57 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 31 Oct 2022 15:43:37 +0800
Subject: [PATCH 1753/4122] HSI: omap_ssi_core: fix possible memory leak in
 ssi_probe()

If ssi_add_controller() returns error, it should call hsi_put_controller()
to give up the reference that was set in hsi_alloc_controller(), so that
it can call hsi_controller_release() to free controller and ports that
allocated in hsi_alloc_controller().

Fixes: b209e047bc74 ("HSI: Introduce OMAP SSI driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/controllers/omap_ssi_core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
index b23a576ed88a5..052cf3e92dd69 100644
--- a/drivers/hsi/controllers/omap_ssi_core.c
+++ b/drivers/hsi/controllers/omap_ssi_core.c
@@ -502,8 +502,10 @@ static int ssi_probe(struct platform_device *pd)
 	platform_set_drvdata(pd, ssi);
 
 	err = ssi_add_controller(ssi, pd);
-	if (err < 0)
+	if (err < 0) {
+		hsi_put_controller(ssi);
 		goto out1;
+	}
 
 	pm_runtime_enable(&pd->dev);
 
-- 
GitLab


From 8acbca3a92b859e3dfe0538254acd5bd5b4632b1 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 11 Sep 2022 13:56:59 +0200
Subject: [PATCH 1754/4122] headers: Remove some left-over license text in
 include/uapi/linux/hsi/

Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX
license identifier to uapi header files with a license")

When the SPDX-License-Identifier tag has been added, the corresponding
license text has not been removed.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Kai Vehmanen <kvcontact@nosignal.fi>
Acked-by: Peter Ujfalusi <peter.ujfalusi@gmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 include/uapi/linux/hsi/cs-protocol.h | 14 --------------
 include/uapi/linux/hsi/hsi_char.h    | 14 --------------
 2 files changed, 28 deletions(-)

diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h
index c7f6e7672cb59..07c3bfb674637 100644
--- a/include/uapi/linux/hsi/cs-protocol.h
+++ b/include/uapi/linux/hsi/cs-protocol.h
@@ -6,20 +6,6 @@
  *
  * Contact: Kai Vehmanen <kai.vehmanen@nokia.com>
  * Original author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #ifndef _CS_PROTOCOL_H
diff --git a/include/uapi/linux/hsi/hsi_char.h b/include/uapi/linux/hsi/hsi_char.h
index 91623b0398b1b..5ef72f0daf947 100644
--- a/include/uapi/linux/hsi/hsi_char.h
+++ b/include/uapi/linux/hsi/hsi_char.h
@@ -5,20 +5,6 @@
  * Copyright (C) 2010 Nokia Corporation. All rights reserved.
  *
  * Contact: Andras Domokos <andras.domokos at nokia.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
  */
 
 #ifndef __HSI_CHAR_H
-- 
GitLab


From 5b79480ce1978864ac3f06f2134dfa3b6691fe74 Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Thu, 17 Nov 2022 16:32:19 +0800
Subject: [PATCH 1755/4122] power: supply: fix residue sysfs file in error
 handle route of __power_supply_register()

If device_add() succeeds, we should call device_del() when want to
get rid of it, so move it into proper jump symbol.

Otherwise, when __power_supply_register() returns fail and goto
wakeup_init_failed to exit, there is still residue device file in sysfs.
When attempt to probe device again, sysfs would complain as below:

sysfs: cannot create duplicate filename '/devices/platform/i2c/i2c-0/0-001c/power_supply/adp5061'
Call Trace:
 dump_stack_lvl+0x68/0x85
 sysfs_warn_dup.cold+0x1c/0x29
 sysfs_create_dir_ns+0x1b1/0x1d0
 kobject_add_internal+0x143/0x390
 kobject_add+0x108/0x170

Fixes: 80c6463e2fa3 ("power_supply: Fix Oops from NULL pointer dereference from wakeup_source_activate")
Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 9035e349bf53c..00cb19b46001c 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -1386,8 +1386,8 @@ create_triggers_failed:
 register_cooler_failed:
 	psy_unregister_thermal(psy);
 register_thermal_failed:
-	device_del(dev);
 wakeup_init_failed:
+	device_del(dev);
 device_add_failed:
 check_supplies_failed:
 dev_set_name_failed:
-- 
GitLab


From 332d7d0c6dd7db50109d304802e0d1a9d086188f Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Date: Wed, 16 Nov 2022 12:05:56 +0200
Subject: [PATCH 1756/4122] power: supply: 88pm860x: simplify using devm

Use devm variants for requesting threaded IRQ and for power-supply
registration. Clean up error path and remove the .remove-callback.

Signed-off-by: Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/88pm860x_charger.c | 38 ++++++-------------------
 1 file changed, 9 insertions(+), 29 deletions(-)

diff --git a/drivers/power/supply/88pm860x_charger.c b/drivers/power/supply/88pm860x_charger.c
index f21ce52fbc040..2b9fcb7e71d79 100644
--- a/drivers/power/supply/88pm860x_charger.c
+++ b/drivers/power/supply/88pm860x_charger.c
@@ -690,8 +690,7 @@ static int pm860x_charger_probe(struct platform_device *pdev)
 	    (chip->id == CHIP_PM8607) ? chip->companion : chip->client;
 	if (!info->i2c_8606) {
 		dev_err(&pdev->dev, "Missed I2C address of 88PM8606!\n");
-		ret = -EINVAL;
-		goto out;
+		return -EINVAL;
 	}
 	info->dev = &pdev->dev;
 
@@ -704,44 +703,26 @@ static int pm860x_charger_probe(struct platform_device *pdev)
 	psy_cfg.drv_data = info;
 	psy_cfg.supplied_to = pm860x_supplied_to;
 	psy_cfg.num_supplicants = ARRAY_SIZE(pm860x_supplied_to);
-	info->usb = power_supply_register(&pdev->dev, &pm860x_charger_desc,
-					  &psy_cfg);
+	info->usb = devm_power_supply_register(&pdev->dev, &pm860x_charger_desc,
+					       &psy_cfg);
 	if (IS_ERR(info->usb)) {
-		ret = PTR_ERR(info->usb);
-		goto out;
+		return PTR_ERR(info->usb);
 	}
 
 	pm860x_init_charger(info);
 
 	for (i = 0; i < ARRAY_SIZE(info->irq); i++) {
-		ret = request_threaded_irq(info->irq[i], NULL,
-			pm860x_irq_descs[i].handler,
-			IRQF_ONESHOT, pm860x_irq_descs[i].name, info);
+		ret = devm_request_threaded_irq(&pdev->dev, info->irq[i], NULL,
+						pm860x_irq_descs[i].handler,
+						IRQF_ONESHOT,
+						pm860x_irq_descs[i].name, info);
 		if (ret < 0) {
 			dev_err(chip->dev, "Failed to request IRQ: #%d: %d\n",
 				info->irq[i], ret);
-			goto out_irq;
+			return ret;
 		}
 	}
 	return 0;
-
-out_irq:
-	power_supply_unregister(info->usb);
-	while (--i >= 0)
-		free_irq(info->irq[i], info);
-out:
-	return ret;
-}
-
-static int pm860x_charger_remove(struct platform_device *pdev)
-{
-	struct pm860x_charger_info *info = platform_get_drvdata(pdev);
-	int i;
-
-	power_supply_unregister(info->usb);
-	for (i = 0; i < info->irq_nums; i++)
-		free_irq(info->irq[i], info);
-	return 0;
 }
 
 static struct platform_driver pm860x_charger_driver = {
@@ -749,7 +730,6 @@ static struct platform_driver pm860x_charger_driver = {
 		   .name = "88pm860x-charger",
 	},
 	.probe = pm860x_charger_probe,
-	.remove = pm860x_charger_remove,
 };
 module_platform_driver(pm860x_charger_driver);
 
-- 
GitLab


From 1c137323e9a2a970b4a5bf8cf3c50e0ea1cefbeb Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:43 +0530
Subject: [PATCH 1757/4122] crypto: vmx: Skip objtool from running on
 aesp8-ppc.o

With objtool enabled, below warnings are seen when trying to build:
  drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: aes_p8_set_encrypt_key+0x44: unannotated intra-function call
  drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2448: unannotated intra-function call
  drivers/crypto/vmx/aesp8-ppc.o: warning: objtool: .text+0x2d68: unannotated intra-function call

Skip objtool from running on drivers/crypto/vmx/aesp8-ppc.o file for the
following reasons:

- Since this file comes from OpenSSL, and since it is a perl file which
  generates a .S file, it may not be the best choice to make too many
  code changes to such files, unless absolutely necessary.

- As far as the objtool --mcount functionality is concerned, we do not
  have to run objtool on this file because there are no calls to
  _mcount().

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-6-sv@linux.ibm.com
---
 drivers/crypto/vmx/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index 2560cfea1dec2..7b41f0da6807f 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -9,3 +9,5 @@ targets += aesp8-ppc.S ghashp8-ppc.S
 
 $(obj)/aesp8-ppc.S $(obj)/ghashp8-ppc.S: $(obj)/%.S: $(src)/%.pl FORCE
 	$(call if_changed,perl)
+
+OBJECT_FILES_NON_STANDARD_aesp8-ppc.o := y
-- 
GitLab


From 2da37761671b5bdedbe04e6469cfa57cd6b6ae45 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 14 Nov 2022 23:27:44 +0530
Subject: [PATCH 1758/4122] powerpc/32: Fix objtool unannotated intra-function
 call warnings

Fix several annotations in assembly files on PPC32.

[Sathvika Vasireddy: Changed subject line and removed Kconfig change to
 enable objtool, as it is a part of "objtool/powerpc: Enable objtool to
 be built on ppc" patch in this series.]

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-7-sv@linux.ibm.com
---
 arch/powerpc/kernel/cpu_setup_6xx.S          | 26 ++++++++++++------
 arch/powerpc/kernel/cpu_setup_e500.S         |  8 ++++--
 arch/powerpc/kernel/entry_32.S               |  9 ++++--
 arch/powerpc/kernel/head_40x.S               |  5 +++-
 arch/powerpc/kernel/head_85xx.S              |  5 +++-
 arch/powerpc/kernel/head_8xx.S               |  5 +++-
 arch/powerpc/kernel/head_book3s_32.S         | 29 ++++++++++++++------
 arch/powerpc/kernel/swsusp_32.S              |  5 +++-
 arch/powerpc/kvm/fpu.S                       | 17 ++++++++----
 arch/powerpc/platforms/52xx/lite5200_sleep.S | 15 +++++++---
 10 files changed, 89 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/kernel/cpu_setup_6xx.S b/arch/powerpc/kernel/cpu_setup_6xx.S
index f8b5ff64b604f..f29ce3dd6140f 100644
--- a/arch/powerpc/kernel/cpu_setup_6xx.S
+++ b/arch/powerpc/kernel/cpu_setup_6xx.S
@@ -4,6 +4,8 @@
  *    Copyright (C) 2003 Benjamin Herrenschmidt (benh@kernel.crashing.org)
  */
 
+#include <linux/linkage.h>
+
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
@@ -81,7 +83,7 @@ _GLOBAL(__setup_cpu_745x)
 	blr
 
 /* Enable caches for 603's, 604, 750 & 7400 */
-setup_common_caches:
+SYM_FUNC_START_LOCAL(setup_common_caches)
 	mfspr	r11,SPRN_HID0
 	andi.	r0,r11,HID0_DCE
 	ori	r11,r11,HID0_ICE|HID0_DCE
@@ -95,11 +97,12 @@ setup_common_caches:
 	sync
 	isync
 	blr
+SYM_FUNC_END(setup_common_caches)
 
 /* 604, 604e, 604ev, ...
  * Enable superscalar execution & branch history table
  */
-setup_604_hid0:
+SYM_FUNC_START_LOCAL(setup_604_hid0)
 	mfspr	r11,SPRN_HID0
 	ori	r11,r11,HID0_SIED|HID0_BHTE
 	ori	r8,r11,HID0_BTCD
@@ -110,6 +113,7 @@ setup_604_hid0:
 	sync
 	isync
 	blr
+SYM_FUNC_END(setup_604_hid0)
 
 /* 7400 <= rev 2.7 and 7410 rev = 1.0 suffer from some
  * erratas we work around here.
@@ -125,13 +129,14 @@ setup_604_hid0:
  * needed once we have applied workaround #5 (though it's
  * not set by Apple's firmware at least).
  */
-setup_7400_workarounds:
+SYM_FUNC_START_LOCAL(setup_7400_workarounds)
 	mfpvr	r3
 	rlwinm	r3,r3,0,20,31
 	cmpwi	0,r3,0x0207
 	ble	1f
 	blr
-setup_7410_workarounds:
+SYM_FUNC_END(setup_7400_workarounds)
+SYM_FUNC_START_LOCAL(setup_7410_workarounds)
 	mfpvr	r3
 	rlwinm	r3,r3,0,20,31
 	cmpwi	0,r3,0x0100
@@ -151,6 +156,7 @@ setup_7410_workarounds:
 	sync
 	isync
 	blr
+SYM_FUNC_END(setup_7410_workarounds)
 
 /* 740/750/7400/7410
  * Enable Store Gathering (SGE), Address Broadcast (ABE),
@@ -158,7 +164,7 @@ setup_7410_workarounds:
  * Dynamic Power Management (DPM), Speculative (SPD)
  * Clear Instruction cache throttling (ICTC)
  */
-setup_750_7400_hid0:
+SYM_FUNC_START_LOCAL(setup_750_7400_hid0)
 	mfspr	r11,SPRN_HID0
 	ori	r11,r11,HID0_SGE | HID0_ABE | HID0_BHTE | HID0_BTIC
 	oris	r11,r11,HID0_DPM@h
@@ -177,12 +183,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
 	sync
 	isync
 	blr
+SYM_FUNC_END(setup_750_7400_hid0)
 
 /* 750cx specific
  * Looks like we have to disable NAP feature for some PLL settings...
  * (waiting for confirmation)
  */
-setup_750cx:
+SYM_FUNC_START_LOCAL(setup_750cx)
 	mfspr	r10, SPRN_HID1
 	rlwinm	r10,r10,4,28,31
 	cmpwi	cr0,r10,7
@@ -196,11 +203,13 @@ setup_750cx:
 	andc	r6,r6,r7
 	stw	r6,CPU_SPEC_FEATURES(r4)
 	blr
+SYM_FUNC_END(setup_750cx)
 
 /* 750fx specific
  */
-setup_750fx:
+SYM_FUNC_START_LOCAL(setup_750fx)
 	blr
+SYM_FUNC_END(setup_750fx)
 
 /* MPC 745x
  * Enable Store Gathering (SGE), Branch Folding (FOLD)
@@ -212,7 +221,7 @@ setup_750fx:
  * Clear Instruction cache throttling (ICTC)
  * Enable L2 HW prefetch
  */
-setup_745x_specifics:
+SYM_FUNC_START_LOCAL(setup_745x_specifics)
 	/* We check for the presence of an L3 cache setup by
 	 * the firmware. If any, we disable NAP capability as
 	 * it's known to be bogus on rev 2.1 and earlier
@@ -270,6 +279,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NO_DPM)
 	sync
 	isync
 	blr
+SYM_FUNC_END(setup_745x_specifics)
 
 /*
  * Initialize the FPU registers. This is needed to work around an errata
diff --git a/arch/powerpc/kernel/cpu_setup_e500.S b/arch/powerpc/kernel/cpu_setup_e500.S
index 2ab25161b0adc..077cfccc3461e 100644
--- a/arch/powerpc/kernel/cpu_setup_e500.S
+++ b/arch/powerpc/kernel/cpu_setup_e500.S
@@ -8,6 +8,8 @@
  * Benjamin Herrenschmidt <benh@kernel.crashing.org>
  */
 
+#include <linux/linkage.h>
+
 #include <asm/page.h>
 #include <asm/processor.h>
 #include <asm/cputable.h>
@@ -274,7 +276,7 @@ _GLOBAL(flush_dcache_L1)
 
 	blr
 
-has_L2_cache:
+SYM_FUNC_START_LOCAL(has_L2_cache)
 	/* skip L2 cache on P2040/P2040E as they have no L2 cache */
 	mfspr	r3, SPRN_SVR
 	/* shift right by 8 bits and clear E bit of SVR */
@@ -290,9 +292,10 @@ has_L2_cache:
 1:
 	li	r3, 0
 	blr
+SYM_FUNC_END(has_L2_cache)
 
 /* flush backside L2 cache */
-flush_backside_L2_cache:
+SYM_FUNC_START_LOCAL(flush_backside_L2_cache)
 	mflr	r10
 	bl	has_L2_cache
 	mtlr	r10
@@ -313,6 +316,7 @@ flush_backside_L2_cache:
 	bne	1b
 2:
 	blr
+SYM_FUNC_END(flush_backside_L2_cache)
 
 _GLOBAL(cpu_down_flush_e500v2)
 	mflr r0
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3fc7c9886bb70..5e0763be15490 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -18,6 +18,8 @@
 #include <linux/err.h>
 #include <linux/sys.h>
 #include <linux/threads.h>
+#include <linux/linkage.h>
+
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -74,17 +76,18 @@ _ASM_NOKPROBE_SYMBOL(prepare_transfer_to_handler)
 #endif /* CONFIG_PPC_BOOK3S_32 || CONFIG_PPC_E500 */
 
 #if defined(CONFIG_PPC_KUEP) && defined(CONFIG_PPC_BOOK3S_32)
-	.globl	__kuep_lock
-__kuep_lock:
+SYM_FUNC_START(__kuep_lock)
 	lwz	r9, THREAD+THSR0(r2)
 	update_user_segments_by_4 r9, r10, r11, r12
 	blr
+SYM_FUNC_END(__kuep_lock)
 
-__kuep_unlock:
+SYM_FUNC_START_LOCAL(__kuep_unlock)
 	lwz	r9, THREAD+THSR0(r2)
 	rlwinm  r9,r9,0,~SR_NX
 	update_user_segments_by_4 r9, r10, r11, r12
 	blr
+SYM_FUNC_END(__kuep_unlock)
 
 .macro	kuep_lock
 	bl	__kuep_lock
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 088f500896c78..9110fe9d6747c 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -28,6 +28,8 @@
 #include <linux/init.h>
 #include <linux/pgtable.h>
 #include <linux/sizes.h>
+#include <linux/linkage.h>
+
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -662,7 +664,7 @@ start_here:
  * kernel initialization.  This maps the first 32 MBytes of memory 1:1
  * virtual to physical and more importantly sets the cache mode.
  */
-initial_mmu:
+SYM_FUNC_START_LOCAL(initial_mmu)
 	tlbia			/* Invalidate all TLB entries */
 	isync
 
@@ -711,6 +713,7 @@ initial_mmu:
 	mtspr	SPRN_EVPR,r0
 
 	blr
+SYM_FUNC_END(initial_mmu)
 
 _GLOBAL(abort)
         mfspr   r13,SPRN_DBCR0
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 52c0ab416326a..6be3cc36b716c 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -29,6 +29,8 @@
 #include <linux/init.h>
 #include <linux/threads.h>
 #include <linux/pgtable.h>
+#include <linux/linkage.h>
+
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -885,7 +887,7 @@ KernelSPE:
  * Translate the effec addr in r3 to phys addr. The phys addr will be put
  * into r3(higher 32bit) and r4(lower 32bit)
  */
-get_phys_addr:
+SYM_FUNC_START_LOCAL(get_phys_addr)
 	mfmsr	r8
 	mfspr	r9,SPRN_PID
 	rlwinm	r9,r9,16,0x3fff0000	/* turn PID into MAS6[SPID] */
@@ -907,6 +909,7 @@ get_phys_addr:
 	mfspr	r3,SPRN_MAS7
 #endif
 	blr
+SYM_FUNC_END(get_phys_addr)
 
 /*
  * Global functions
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 0b05f2be66b9f..c94ed5a08c935 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -18,6 +18,8 @@
 #include <linux/magic.h>
 #include <linux/pgtable.h>
 #include <linux/sizes.h>
+#include <linux/linkage.h>
+
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -625,7 +627,7 @@ start_here:
  * 24 Mbytes of data, and the 512k IMMR space.  Anything not covered by
  * these mappings is mapped by page tables.
  */
-initial_mmu:
+SYM_FUNC_START_LOCAL(initial_mmu)
 	li	r8, 0
 	mtspr	SPRN_MI_CTR, r8		/* remove PINNED ITLB entries */
 	lis	r10, MD_TWAM@h
@@ -686,6 +688,7 @@ initial_mmu:
 #endif
 	mtspr	SPRN_DER, r8
 	blr
+SYM_FUNC_END(initial_mmu)
 
 _GLOBAL(mmu_pin_tlb)
 	lis	r9, (1f - PAGE_OFFSET)@h
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 519b606951675..4af12447dc0b6 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -18,6 +18,8 @@
 
 #include <linux/init.h>
 #include <linux/pgtable.h>
+#include <linux/linkage.h>
+
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -877,7 +879,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
  * Load stuff into the MMU.  Intended to be called with
  * IR=0 and DR=0.
  */
-early_hash_table:
+SYM_FUNC_START_LOCAL(early_hash_table)
 	sync			/* Force all PTE updates to finish */
 	isync
 	tlbia			/* Clear all TLB entries */
@@ -888,8 +890,9 @@ early_hash_table:
 	ori	r6, r6, 3	/* 256kB table */
 	mtspr	SPRN_SDR1, r6
 	blr
+SYM_FUNC_END(early_hash_table)
 
-load_up_mmu:
+SYM_FUNC_START_LOCAL(load_up_mmu)
 	sync			/* Force all PTE updates to finish */
 	isync
 	tlbia			/* Clear all TLB entries */
@@ -918,6 +921,7 @@ BEGIN_MMU_FTR_SECTION
 	LOAD_BAT(7,r3,r4,r5)
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	blr
+SYM_FUNC_END(load_up_mmu)
 
 _GLOBAL(load_segment_registers)
 	li	r0, NUM_USER_SEGMENTS /* load up user segment register values */
@@ -1028,7 +1032,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
  * this makes sure it's done.
  *  -- Cort
  */
-clear_bats:
+SYM_FUNC_START_LOCAL(clear_bats)
 	li	r10,0
 
 	mtspr	SPRN_DBAT0U,r10
@@ -1072,6 +1076,7 @@ BEGIN_MMU_FTR_SECTION
 	mtspr	SPRN_IBAT7L,r10
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	blr
+SYM_FUNC_END(clear_bats)
 
 _GLOBAL(update_bats)
 	lis	r4, 1f@h
@@ -1108,15 +1113,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
 	mtspr	SPRN_SRR1, r6
 	rfi
 
-flush_tlbs:
+SYM_FUNC_START_LOCAL(flush_tlbs)
 	lis	r10, 0x40
 1:	addic.	r10, r10, -0x1000
 	tlbie	r10
 	bgt	1b
 	sync
 	blr
+SYM_FUNC_END(flush_tlbs)
 
-mmu_off:
+SYM_FUNC_START_LOCAL(mmu_off)
  	addi	r4, r3, __after_mmu_off - _start
 	mfmsr	r3
 	andi.	r0,r3,MSR_DR|MSR_IR		/* MMU enabled? */
@@ -1128,9 +1134,10 @@ mmu_off:
 	mtspr	SPRN_SRR1,r3
 	sync
 	rfi
+SYM_FUNC_END(mmu_off)
 
 /* We use one BAT to map up to 256M of RAM at _PAGE_OFFSET */
-initial_bats:
+SYM_FUNC_START_LOCAL(initial_bats)
 	lis	r11,PAGE_OFFSET@h
 	tophys(r8,r11)
 #ifdef CONFIG_SMP
@@ -1146,9 +1153,10 @@ initial_bats:
 	mtspr	SPRN_IBAT0U,r11
 	isync
 	blr
+SYM_FUNC_END(initial_bats)
 
 #ifdef CONFIG_BOOTX_TEXT
-setup_disp_bat:
+SYM_FUNC_START_LOCAL(setup_disp_bat)
 	/*
 	 * setup the display bat prepared for us in prom.c
 	 */
@@ -1164,10 +1172,11 @@ setup_disp_bat:
 	mtspr	SPRN_DBAT3L,r8
 	mtspr	SPRN_DBAT3U,r11
 	blr
+SYM_FUNC_END(setup_disp_bat)
 #endif /* CONFIG_BOOTX_TEXT */
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_CPM
-setup_cpm_bat:
+SYM_FUNC_START_LOCAL(setup_cpm_bat)
 	lis	r8, 0xf000
 	ori	r8, r8,	0x002a
 	mtspr	SPRN_DBAT1L, r8
@@ -1177,10 +1186,11 @@ setup_cpm_bat:
 	mtspr	SPRN_DBAT1U, r11
 
 	blr
+SYM_FUNC_END(setup_cpm_bat)
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_USBGECKO
-setup_usbgecko_bat:
+SYM_FUNC_START_LOCAL(setup_usbgecko_bat)
 	/* prepare a BAT for early io */
 #if defined(CONFIG_GAMECUBE)
 	lis	r8, 0x0c00
@@ -1199,6 +1209,7 @@ setup_usbgecko_bat:
 	mtspr	SPRN_DBAT1L, r8
 	mtspr	SPRN_DBAT1U, r11
 	blr
+SYM_FUNC_END(setup_usbgecko_bat)
 #endif
 
 	.data
diff --git a/arch/powerpc/kernel/swsusp_32.S b/arch/powerpc/kernel/swsusp_32.S
index e0cbd63007f21..ffb79326483c0 100644
--- a/arch/powerpc/kernel/swsusp_32.S
+++ b/arch/powerpc/kernel/swsusp_32.S
@@ -1,5 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #include <linux/threads.h>
+#include <linux/linkage.h>
+
 #include <asm/processor.h>
 #include <asm/page.h>
 #include <asm/cputable.h>
@@ -400,7 +402,7 @@ _ASM_NOKPROBE_SYMBOL(swsusp_arch_resume)
 /* FIXME:This construct is actually not useful since we don't shut
  * down the instruction MMU, we could just flip back MSR-DR on.
  */
-turn_on_mmu:
+SYM_FUNC_START_LOCAL(turn_on_mmu)
 	mflr	r4
 	mtsrr0	r4
 	mtsrr1	r3
@@ -408,4 +410,5 @@ turn_on_mmu:
 	isync
 	rfi
 _ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+SYM_FUNC_END(turn_on_mmu)
 
diff --git a/arch/powerpc/kvm/fpu.S b/arch/powerpc/kvm/fpu.S
index 315c94946bad1..b68e7f26a81f4 100644
--- a/arch/powerpc/kvm/fpu.S
+++ b/arch/powerpc/kvm/fpu.S
@@ -6,6 +6,8 @@
  */
 
 #include <linux/pgtable.h>
+#include <linux/linkage.h>
+
 #include <asm/reg.h>
 #include <asm/page.h>
 #include <asm/mmu.h>
@@ -110,18 +112,22 @@ FPS_THREE_IN(fsel)
  * R8 = (double*)&param3 [load_three]
  * LR = instruction call function
  */
-fpd_load_three:
+SYM_FUNC_START_LOCAL(fpd_load_three)
 	lfd	2,0(r8)			/* load param3 */
-fpd_load_two:
+SYM_FUNC_START_LOCAL(fpd_load_two)
 	lfd	1,0(r7)			/* load param2 */
-fpd_load_one:
+SYM_FUNC_START_LOCAL(fpd_load_one)
 	lfd	0,0(r6)			/* load param1 */
-fpd_load_none:
+SYM_FUNC_START_LOCAL(fpd_load_none)
 	lfd	3,0(r3)			/* load up fpscr value */
 	MTFSF_L(3)
 	lwz	r6, 0(r4)		/* load cr */
 	mtcr	r6
 	blr
+SYM_FUNC_END(fpd_load_none)
+SYM_FUNC_END(fpd_load_one)
+SYM_FUNC_END(fpd_load_two)
+SYM_FUNC_END(fpd_load_three)
 
 /*
  * End of double instruction processing
@@ -131,13 +137,14 @@ fpd_load_none:
  * R5 = (double*)&result
  * LR = caller of instruction call function
  */
-fpd_return:
+SYM_FUNC_START_LOCAL(fpd_return)
 	mfcr	r6
 	stfd	0,0(r5)			/* save result */
 	mffs	0
 	stfd	0,0(r3)			/* save new fpscr value */
 	stw	r6,0(r4)		/* save new cr value */
 	blr
+SYM_FUNC_END(fpd_return)
 
 /*
  * Double operation with no input operand
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index afee8b1515a8e..0b12647e7b420 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -1,4 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/linkage.h>
+
 #include <asm/reg.h>
 #include <asm/ppc_asm.h>
 #include <asm/processor.h>
@@ -178,7 +180,8 @@ sram_code:
 
 
 	/* local udelay in sram is needed */
-  udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
+SYM_FUNC_START_LOCAL(udelay)
+	/* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
 	mullw	r12, r12, r11
 	mftb	r13	/* start */
 	add	r12, r13, r12 /* end */
@@ -187,6 +190,7 @@ sram_code:
 	cmp	cr0, r13, r12
 	blt	1b
 	blr
+SYM_FUNC_END(udelay)
 
 sram_code_end:
 
@@ -271,7 +275,7 @@ _ASM_NOKPROBE_SYMBOL(lite5200_wakeup)
 	SAVE_SR(n+2, addr+2);	\
 	SAVE_SR(n+3, addr+3);
 
-save_regs:
+SYM_FUNC_START_LOCAL(save_regs)
 	stw	r0, 0(r4)
 	stw	r1, 0x4(r4)
 	stw	r2, 0x8(r4)
@@ -317,6 +321,7 @@ save_regs:
 	SAVE_SPRN(TBRU,  0x5b)
 
 	blr
+SYM_FUNC_END(save_regs)
 
 
 /* restore registers */
@@ -336,7 +341,7 @@ save_regs:
 	LOAD_SR(n+2, addr+2);	\
 	LOAD_SR(n+3, addr+3);
 
-restore_regs:
+SYM_FUNC_START_LOCAL(restore_regs)
 	lis	r4, registers@h
 	ori	r4, r4, registers@l
 
@@ -393,6 +398,7 @@ restore_regs:
 
 	blr
 _ASM_NOKPROBE_SYMBOL(restore_regs)
+SYM_FUNC_END(restore_regs)
 
 
@@ -403,7 +409,7 @@ _ASM_NOKPROBE_SYMBOL(restore_regs)
  * Flush data cache
  * Do this by just reading lots of stuff into the cache.
  */
-flush_data_cache:
+SYM_FUNC_START_LOCAL(flush_data_cache)
 	lis	r3,CONFIG_KERNEL_START@h
 	ori	r3,r3,CONFIG_KERNEL_START@l
 	li	r4,NUM_CACHE_LINES
@@ -413,3 +419,4 @@ flush_data_cache:
 	addi	r3,r3,L1_CACHE_BYTES	/* Next line, please */
 	bdnz	1b
 	blr
+SYM_FUNC_END(flush_data_cache)
-- 
GitLab


From d0160bd5d389da247fb5affb6a35ea393d22fedb Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:45 +0530
Subject: [PATCH 1759/4122] powerpc/vdso: Skip objtool from running on VDSO
 files

Do not run objtool on VDSO files, by using OBJECT_FILES_NON_STANDARD.

Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-8-sv@linux.ibm.com
---
 arch/powerpc/kernel/vdso/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile
index a2e7b0ce5b191..6a977b0d8ffc3 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -102,3 +102,5 @@ quiet_cmd_vdso64ld_and_check = VDSO64L $@
       cmd_vdso64ld_and_check = $(VDSOCC) $(c_flags) $(CC64FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) -z noexecstack ; $(cmd_vdso_check)
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(VDSOCC) $(a_flags) $(CC64FLAGS) $(AS64FLAGS) -c -o $@ $<
+
+OBJECT_FILES_NON_STANDARD := y
-- 
GitLab


From efb11fdb3e1a9f694fa12b70b21e69e55ec59c36 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 14 Nov 2022 23:27:46 +0530
Subject: [PATCH 1760/4122] objtool: Fix SEGFAULT

find_insn() will return NULL in case of failure. Check insn in order
to avoid a kernel Oops for NULL pointer dereference.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-9-sv@linux.ibm.com
---
 tools/objtool/check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 43ec14c29a60c..8427af808221b 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -207,7 +207,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
 		return false;
 
 	insn = find_insn(file, func->sec, func->offset);
-	if (!insn->func)
+	if (!insn || !insn->func)
 		return false;
 
 	func_for_each_insn(file, func, insn) {
-- 
GitLab


From 0646c28b417b7fe307c9da72ca1c508e43b57dc0 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 14 Nov 2022 23:27:47 +0530
Subject: [PATCH 1761/4122] objtool: Use target file endianness instead of a
 compiled constant

Some architectures like powerpc support both endianness, it's
therefore not possible to fix the endianness via arch/endianness.h
because there is no easy way to get the target endianness at
build time.

Use the endianness recorded in the file objtool is working on.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-10-sv@linux.ibm.com
---
 .../arch/x86/include/arch/endianness.h        |  9 ------
 tools/objtool/check.c                         |  2 +-
 tools/objtool/include/objtool/endianness.h    | 32 +++++++++----------
 tools/objtool/orc_dump.c                      | 11 +++++--
 tools/objtool/orc_gen.c                       |  4 +--
 tools/objtool/special.c                       |  3 +-
 6 files changed, 30 insertions(+), 31 deletions(-)
 delete mode 100644 tools/objtool/arch/x86/include/arch/endianness.h

diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h
deleted file mode 100644
index 7c362527da205..0000000000000
--- a/tools/objtool/arch/x86/include/arch/endianness.h
+++ /dev/null
@@ -1,9 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ARCH_ENDIANNESS_H
-#define _ARCH_ENDIANNESS_H
-
-#include <endian.h>
-
-#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN
-
-#endif /* _ARCH_ENDIANNESS_H */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 8427af808221b..ad5dab1757014 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2100,7 +2100,7 @@ static int read_unwind_hints(struct objtool_file *file)
 			return -1;
 		}
 
-		cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
+		cfi.cfa.offset = bswap_if_needed(file->elf, hint->sp_offset);
 		cfi.type = hint->type;
 		cfi.end = hint->end;
 
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
index 10241341eff35..4d2aa9b0fe2fd 100644
--- a/tools/objtool/include/objtool/endianness.h
+++ b/tools/objtool/include/objtool/endianness.h
@@ -2,33 +2,33 @@
 #ifndef _OBJTOOL_ENDIANNESS_H
 #define _OBJTOOL_ENDIANNESS_H
 
-#include <arch/endianness.h>
 #include <linux/kernel.h>
 #include <endian.h>
-
-#ifndef __TARGET_BYTE_ORDER
-#error undefined arch __TARGET_BYTE_ORDER
-#endif
-
-#if __BYTE_ORDER != __TARGET_BYTE_ORDER
-#define __NEED_BSWAP 1
-#else
-#define __NEED_BSWAP 0
-#endif
+#include <objtool/elf.h>
 
 /*
- * Does a byte swap if target endianness doesn't match the host, i.e. cross
+ * Does a byte swap if target file endianness doesn't match the host, i.e. cross
  * compilation for little endian on big endian and vice versa.
  * To be used for multi-byte values conversion, which are read from / about
  * to be written to a target native endianness ELF file.
  */
-#define bswap_if_needed(val)						\
+static inline bool need_bswap(struct elf *elf)
+{
+	return (__BYTE_ORDER == __LITTLE_ENDIAN) ^
+	       (elf->ehdr.e_ident[EI_DATA] == ELFDATA2LSB);
+}
+
+#define bswap_if_needed(elf, val)					\
 ({									\
 	__typeof__(val) __ret;						\
+	bool __need_bswap = need_bswap(elf);				\
 	switch (sizeof(val)) {						\
-	case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break;	\
-	case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break;	\
-	case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break;	\
+	case 8:								\
+		__ret = __need_bswap ? bswap_64(val) : (val); break;	\
+	case 4:								\
+		__ret = __need_bswap ? bswap_32(val) : (val); break;	\
+	case 2:								\
+		__ret = __need_bswap ? bswap_16(val) : (val); break;	\
 	default:							\
 		BUILD_BUG(); break;					\
 	}								\
diff --git a/tools/objtool/orc_dump.c b/tools/objtool/orc_dump.c
index f5a8508c42d6d..4f1211fec82ce 100644
--- a/tools/objtool/orc_dump.c
+++ b/tools/objtool/orc_dump.c
@@ -76,6 +76,7 @@ int orc_dump(const char *_objname)
 	GElf_Rela rela;
 	GElf_Sym sym;
 	Elf_Data *data, *symtab = NULL, *rela_orc_ip = NULL;
+	struct elf dummy_elf = {};
 
 
 	objname = _objname;
@@ -94,6 +95,12 @@ int orc_dump(const char *_objname)
 		return -1;
 	}
 
+	if (!elf64_getehdr(elf)) {
+		WARN_ELF("elf64_getehdr");
+		return -1;
+	}
+	memcpy(&dummy_elf.ehdr, elf64_getehdr(elf), sizeof(dummy_elf.ehdr));
+
 	if (elf_getshdrnum(elf, &nr_sections)) {
 		WARN_ELF("elf_getshdrnum");
 		return -1;
@@ -198,11 +205,11 @@ int orc_dump(const char *_objname)
 
 		printf(" sp:");
 
-		print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset));
+		print_reg(orc[i].sp_reg, bswap_if_needed(&dummy_elf, orc[i].sp_offset));
 
 		printf(" bp:");
 
-		print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset));
+		print_reg(orc[i].bp_reg, bswap_if_needed(&dummy_elf, orc[i].bp_offset));
 
 		printf(" type:%s end:%d\n",
 		       orc_type_name(orc[i].type), orc[i].end);
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index dd3c64af9db23..1f22b7ebae588 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -97,8 +97,8 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
 	/* populate ORC data */
 	orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
 	memcpy(orc, o, sizeof(*orc));
-	orc->sp_offset = bswap_if_needed(orc->sp_offset);
-	orc->bp_offset = bswap_if_needed(orc->bp_offset);
+	orc->sp_offset = bswap_if_needed(elf, orc->sp_offset);
+	orc->bp_offset = bswap_if_needed(elf, orc->bp_offset);
 
 	/* populate reloc for ip */
 	if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index e2223dd91c379..9c8d827f69afb 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -87,7 +87,8 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
 	if (entry->feature) {
 		unsigned short feature;
 
-		feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf +
+		feature = bswap_if_needed(elf,
+					  *(unsigned short *)(sec->data->d_buf +
 							      offset +
 							      entry->feature));
 		arch_handle_alternative(feature, alt);
-- 
GitLab


From 86ea7f361537f825a699e86fdc9e49be19f128d1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Mon, 14 Nov 2022 23:27:48 +0530
Subject: [PATCH 1762/4122] objtool: Use target file class size instead of a
 compiled constant

In order to allow using objtool on cross-built kernels,
determine size of long from elf data instead of using
sizeof(long) at build time.

For the time being this covers only mcount.

[Sathvika Vasireddy: Rename variable "size" to "addrsize" and function
"elf_class_size()" to "elf_class_addrsize()", and modify
create_mcount_loc_sections() function to follow reverse christmas tree
format to order local variable declarations.]

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-11-sv@linux.ibm.com
---
 tools/objtool/check.c               | 18 ++++++++++--------
 tools/objtool/elf.c                 |  8 ++++++--
 tools/objtool/include/objtool/elf.h |  8 ++++++++
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ad5dab1757014..b64518c7c7b48 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -852,9 +852,9 @@ static int create_ibt_endbr_seal_sections(struct objtool_file *file)
 
 static int create_mcount_loc_sections(struct objtool_file *file)
 {
-	struct section *sec;
-	unsigned long *loc;
+	int addrsize = elf_class_addrsize(file->elf);
 	struct instruction *insn;
+	struct section *sec;
 	int idx;
 
 	sec = find_section_by_name(file->elf, "__mcount_loc");
@@ -871,23 +871,25 @@ static int create_mcount_loc_sections(struct objtool_file *file)
 	list_for_each_entry(insn, &file->mcount_loc_list, call_node)
 		idx++;
 
-	sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+	sec = elf_create_section(file->elf, "__mcount_loc", 0, addrsize, idx);
 	if (!sec)
 		return -1;
 
+	sec->sh.sh_addralign = addrsize;
+
 	idx = 0;
 	list_for_each_entry(insn, &file->mcount_loc_list, call_node) {
+		void *loc;
 
-		loc = (unsigned long *)sec->data->d_buf + idx;
-		memset(loc, 0, sizeof(unsigned long));
+		loc = sec->data->d_buf + idx;
+		memset(loc, 0, addrsize);
 
-		if (elf_add_reloc_to_insn(file->elf, sec,
-					  idx * sizeof(unsigned long),
+		if (elf_add_reloc_to_insn(file->elf, sec, idx,
 					  R_X86_64_64,
 					  insn->sec, insn->offset))
 			return -1;
 
-		idx++;
+		idx += addrsize;
 	}
 
 	return 0;
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 7e24b09b1163a..33739865735b5 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -1129,6 +1129,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
 {
 	char *relocname;
 	struct section *sec;
+	int addrsize = elf_class_addrsize(elf);
 
 	relocname = malloc(strlen(base->name) + strlen(".rela") + 1);
 	if (!relocname) {
@@ -1138,7 +1139,10 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
 	strcpy(relocname, ".rela");
 	strcat(relocname, base->name);
 
-	sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
+	if (addrsize == sizeof(u32))
+		sec = elf_create_section(elf, relocname, 0, sizeof(Elf32_Rela), 0);
+	else
+		sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
 	free(relocname);
 	if (!sec)
 		return NULL;
@@ -1147,7 +1151,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
 	sec->base = base;
 
 	sec->sh.sh_type = SHT_RELA;
-	sec->sh.sh_addralign = 8;
+	sec->sh.sh_addralign = addrsize;
 	sec->sh.sh_link = find_section_by_name(elf, ".symtab")->idx;
 	sec->sh.sh_info = base->idx;
 	sec->sh.sh_flags = SHF_INFO_LINK;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index 16f4067b82aea..78b3aa2e546d5 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -142,6 +142,14 @@ static inline bool has_multiple_files(struct elf *elf)
 	return elf->num_files > 1;
 }
 
+static inline int elf_class_addrsize(struct elf *elf)
+{
+	if (elf->ehdr.e_ident[EI_CLASS] == ELFCLASS32)
+		return sizeof(u32);
+	else
+		return sizeof(u64);
+}
+
 struct elf *elf_open_read(const char *name, int flags);
 struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
 
-- 
GitLab


From 280981d6994e0700abd36647b141e73059851e66 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:49 +0530
Subject: [PATCH 1763/4122] objtool: Add --mnop as an option to --mcount

Some architectures (powerpc) may not support ftrace locations being nop'ed
out at build time. Introduce CONFIG_HAVE_OBJTOOL_NOP_MCOUNT for objtool, as
a means for architectures to enable nop'ing of ftrace locations. Add --mnop
as an option to objtool --mcount, to indicate support for the same.

Also, make sure that --mnop can be passed as an option to objtool only when
--mcount is passed.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-12-sv@linux.ibm.com
---
 Makefile                                |  4 +++-
 arch/x86/Kconfig                        |  1 +
 kernel/trace/Kconfig                    |  7 +++++++
 scripts/Makefile.lib                    |  3 +++
 tools/objtool/builtin-check.c           | 14 ++++++++++++++
 tools/objtool/check.c                   | 19 ++++++++++---------
 tools/objtool/include/objtool/builtin.h |  1 +
 7 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index d148a55bfd0f5..53c2b715d0bf0 100644
--- a/Makefile
+++ b/Makefile
@@ -933,7 +933,9 @@ ifdef CONFIG_FTRACE_MCOUNT_USE_CC
   endif
 endif
 ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
-  CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT
+  ifdef CONFIG_HAVE_OBJTOOL_NOP_MCOUNT
+    CC_FLAGS_USING	+= -DCC_USING_NOP_MCOUNT
+  endif
 endif
 ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
   ifdef CONFIG_HAVE_C_RECORDMCOUNT
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 67745ceab0dbc..4be7c06a5d189 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -195,6 +195,7 @@ config X86
 	select HAVE_CONTEXT_TRACKING_USER_OFFSTACK	if HAVE_CONTEXT_TRACKING_USER
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
+	select HAVE_OBJTOOL_NOP_MCOUNT		if HAVE_OBJTOOL_MCOUNT
 	select HAVE_BUILDTIME_MCOUNT_SORT
 	select HAVE_DEBUG_KMEMLEAK
 	select HAVE_DMA_CONTIGUOUS
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e9e95c790b8ee..2b782321376ac 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -82,6 +82,13 @@ config HAVE_OBJTOOL_MCOUNT
 	help
 	  Arch supports objtool --mcount
 
+config HAVE_OBJTOOL_NOP_MCOUNT
+	bool
+	help
+	  Arch supports the objtool options --mcount with --mnop.
+	  An architecture can select this if it wants to enable nop'ing
+	  of ftrace locations.
+
 config HAVE_C_RECORDMCOUNT
 	bool
 	help
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 3aa384cec76b8..658f541c27827 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -256,6 +256,9 @@ objtool-args-$(CONFIG_HAVE_JUMP_LABEL_HACK)		+= --hacks=jump_label
 objtool-args-$(CONFIG_HAVE_NOINSTR_HACK)		+= --hacks=noinstr
 objtool-args-$(CONFIG_X86_KERNEL_IBT)			+= --ibt
 objtool-args-$(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL)	+= --mcount
+ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
+objtool-args-$(CONFIG_HAVE_OBJTOOL_NOP_MCOUNT)		+= --mnop
+endif
 objtool-args-$(CONFIG_UNWINDER_ORC)			+= --orc
 objtool-args-$(CONFIG_RETPOLINE)			+= --retpoline
 objtool-args-$(CONFIG_RETHUNK)				+= --rethunk
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index 24fbe803a0d32..9bd347d3c2449 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -82,6 +82,7 @@ const struct option check_options[] = {
 	OPT_BOOLEAN(0, "dry-run", &opts.dryrun, "don't write modifications"),
 	OPT_BOOLEAN(0, "link", &opts.link, "object is a linked object"),
 	OPT_BOOLEAN(0, "module", &opts.module, "object is part of a kernel module"),
+	OPT_BOOLEAN(0, "mnop", &opts.mnop, "nop out mcount call sites"),
 	OPT_BOOLEAN(0, "no-unreachable", &opts.no_unreachable, "skip 'unreachable instruction' warnings"),
 	OPT_BOOLEAN(0, "sec-address", &opts.sec_address, "print section addresses in warnings"),
 	OPT_BOOLEAN(0, "stats", &opts.stats, "print statistics"),
@@ -150,6 +151,16 @@ static bool opts_valid(void)
 	return false;
 }
 
+static bool mnop_opts_valid(void)
+{
+	if (opts.mnop && !opts.mcount) {
+		ERROR("--mnop requires --mcount");
+		return false;
+	}
+
+	return true;
+}
+
 static bool link_opts_valid(struct objtool_file *file)
 {
 	if (opts.link)
@@ -198,6 +209,9 @@ int objtool_run(int argc, const char **argv)
 	if (!file)
 		return 1;
 
+	if (!mnop_opts_valid())
+		return 1;
+
 	if (!link_opts_valid(file))
 		return 1;
 
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b64518c7c7b48..71cf4b4ba1da2 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1256,17 +1256,18 @@ static void annotate_call_site(struct objtool_file *file,
 	if (opts.mcount && sym->fentry) {
 		if (sibling)
 			WARN_FUNC("Tail call to __fentry__ !?!?", insn->sec, insn->offset);
+		if (opts.mnop) {
+			if (reloc) {
+				reloc->type = R_NONE;
+				elf_write_reloc(file->elf, reloc);
+			}
 
-		if (reloc) {
-			reloc->type = R_NONE;
-			elf_write_reloc(file->elf, reloc);
-		}
-
-		elf_write_insn(file->elf, insn->sec,
-			       insn->offset, insn->len,
-			       arch_nop_insn(insn->len));
+			elf_write_insn(file->elf, insn->sec,
+				       insn->offset, insn->len,
+				       arch_nop_insn(insn->len));
 
-		insn->type = INSN_NOP;
+			insn->type = INSN_NOP;
+		}
 
 		list_add_tail(&insn->call_node, &file->mcount_loc_list);
 		return;
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 42a52f1a0add5..0785707c5a924 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -31,6 +31,7 @@ struct opts {
 	bool backup;
 	bool dryrun;
 	bool link;
+	bool mnop;
 	bool module;
 	bool no_unreachable;
 	bool sec_address;
-- 
GitLab


From de6fbcedf5abce4c321eeb15d7d286b79804b8b6 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:50 +0530
Subject: [PATCH 1764/4122] objtool: Read special sections with alts only when
 specific options are selected

Call add_special_section_alts() only when stackval or orc or uaccess or
noinstr options are passed to objtool.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-13-sv@linux.ibm.com
---
 tools/objtool/check.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 71cf4b4ba1da2..752a6ffd5c4c9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2392,9 +2392,11 @@ static int decode_sections(struct objtool_file *file)
 	 * Must be before add_jump_destinations(), which depends on 'func'
 	 * being set for alternatives, to enable proper sibling call detection.
 	 */
-	ret = add_special_section_alts(file);
-	if (ret)
-		return ret;
+	if (opts.stackval || opts.orc || opts.uaccess || opts.noinstr) {
+		ret = add_special_section_alts(file);
+		if (ret)
+			return ret;
+	}
 
 	ret = add_jump_destinations(file);
 	if (ret)
-- 
GitLab


From c1449735211dd8c4c2d54fa0ece6890ecbd74e24 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:51 +0530
Subject: [PATCH 1765/4122] objtool: Use macros to define arch specific reloc
 types

Make relocation types architecture specific.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-14-sv@linux.ibm.com
---
 tools/objtool/arch/x86/include/arch/elf.h | 2 ++
 tools/objtool/check.c                     | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/include/arch/elf.h b/tools/objtool/arch/x86/include/arch/elf.h
index 69cc4264b28a8..ac14987cf6875 100644
--- a/tools/objtool/arch/x86/include/arch/elf.h
+++ b/tools/objtool/arch/x86/include/arch/elf.h
@@ -2,5 +2,7 @@
 #define _OBJTOOL_ARCH_ELF
 
 #define R_NONE R_X86_64_NONE
+#define R_ABS64 R_X86_64_64
+#define R_ABS32 R_X86_64_32
 
 #endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 752a6ffd5c4c9..2d7153b5d5d16 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -885,7 +885,7 @@ static int create_mcount_loc_sections(struct objtool_file *file)
 		memset(loc, 0, addrsize);
 
 		if (elf_add_reloc_to_insn(file->elf, sec, idx,
-					  R_X86_64_64,
+					  addrsize == sizeof(u64) ? R_ABS64 : R_ABS32,
 					  insn->sec, insn->offset))
 			return -1;
 
-- 
GitLab


From 4ca993d498987332ceeedee5380101b84accaf35 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:52 +0530
Subject: [PATCH 1766/4122] objtool: Add arch specific function
 arch_ftrace_match()

Add architecture specific function to look for relocation records
pointing to architecture specific symbols.

Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-15-sv@linux.ibm.com
---
 tools/objtool/arch/x86/decode.c      | 5 +++++
 tools/objtool/check.c                | 2 +-
 tools/objtool/include/objtool/arch.h | 2 ++
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 1c253b4b7ce00..af7ad09c926c4 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -23,6 +23,11 @@
 #include <objtool/builtin.h>
 #include <arch/elf.h>
 
+int arch_ftrace_match(char *name)
+{
+	return !strcmp(name, "__fentry__");
+}
+
 static int is_x86_64(const struct elf *elf)
 {
 	switch (elf->ehdr.e_machine) {
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 2d7153b5d5d16..7580c66ca5c8e 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -2316,7 +2316,7 @@ static int classify_symbols(struct objtool_file *file)
 			if (arch_is_rethunk(func))
 				func->return_thunk = true;
 
-			if (!strcmp(func->name, "__fentry__"))
+			if (arch_ftrace_match(func->name))
 				func->fentry = true;
 
 			if (is_profiling_func(func->name))
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index beb2f3aa94ffc..5149330f400f5 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -69,6 +69,8 @@ struct stack_op {
 
 struct instruction;
 
+int arch_ftrace_match(char *name);
+
 void arch_initial_func_cfi_state(struct cfi_init_state *state);
 
 int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
-- 
GitLab


From e52ec98c5ab18c0710ea22bf52f45e60a725adaf Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:53 +0530
Subject: [PATCH 1767/4122] objtool/powerpc: Enable objtool to be built on ppc

This patch adds [stub] implementations for required functions, inorder
to enable objtool build on powerpc.

[Christophe Leroy: powerpc: Add missing asm/asm.h for objtool,
Use local variables for type and imm in arch_decode_instruction(),
Adapt len for prefixed instructions.]

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-16-sv@linux.ibm.com
---
 arch/powerpc/Kconfig                          |  1 +
 arch/powerpc/include/asm/asm.h                |  7 ++
 tools/objtool/arch/powerpc/Build              |  2 +
 tools/objtool/arch/powerpc/decode.c           | 85 +++++++++++++++++++
 .../arch/powerpc/include/arch/cfi_regs.h      | 11 +++
 tools/objtool/arch/powerpc/include/arch/elf.h |  8 ++
 .../arch/powerpc/include/arch/special.h       | 21 +++++
 tools/objtool/arch/powerpc/special.c          | 19 +++++
 8 files changed, 154 insertions(+)
 create mode 100644 arch/powerpc/include/asm/asm.h
 create mode 100644 tools/objtool/arch/powerpc/Build
 create mode 100644 tools/objtool/arch/powerpc/decode.c
 create mode 100644 tools/objtool/arch/powerpc/include/arch/cfi_regs.h
 create mode 100644 tools/objtool/arch/powerpc/include/arch/elf.h
 create mode 100644 tools/objtool/arch/powerpc/include/arch/special.h
 create mode 100644 tools/objtool/arch/powerpc/special.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2fc..12e6c16be54e8 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -238,6 +238,7 @@ config PPC
 	select HAVE_MOD_ARCH_SPECIFIC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
 	select HAVE_OPTPROBES
+	select HAVE_OBJTOOL			if PPC32 || MPROFILE_KERNEL
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
diff --git a/arch/powerpc/include/asm/asm.h b/arch/powerpc/include/asm/asm.h
new file mode 100644
index 0000000000000..86f46b604e9a6
--- /dev/null
+++ b/arch/powerpc/include/asm/asm.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_ASM_H
+#define _ASM_POWERPC_ASM_H
+
+#define _ASM_PTR	" .long "
+
+#endif /* _ASM_POWERPC_ASM_H */
diff --git a/tools/objtool/arch/powerpc/Build b/tools/objtool/arch/powerpc/Build
new file mode 100644
index 0000000000000..d24d5636a5b84
--- /dev/null
+++ b/tools/objtool/arch/powerpc/Build
@@ -0,0 +1,2 @@
+objtool-y += decode.o
+objtool-y += special.o
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
new file mode 100644
index 0000000000000..dcd0975cad6ba
--- /dev/null
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <objtool/check.h>
+#include <objtool/elf.h>
+#include <objtool/arch.h>
+#include <objtool/warn.h>
+#include <objtool/builtin.h>
+#include <objtool/endianness.h>
+
+unsigned long arch_dest_reloc_offset(int addend)
+{
+	return addend;
+}
+
+bool arch_callee_saved_reg(unsigned char reg)
+{
+	return false;
+}
+
+int arch_decode_hint_reg(u8 sp_reg, int *base)
+{
+	exit(-1);
+}
+
+const char *arch_nop_insn(int len)
+{
+	exit(-1);
+}
+
+const char *arch_ret_insn(int len)
+{
+	exit(-1);
+}
+
+int arch_decode_instruction(struct objtool_file *file, const struct section *sec,
+			    unsigned long offset, unsigned int maxlen,
+			    unsigned int *len, enum insn_type *type,
+			    unsigned long *immediate,
+			    struct list_head *ops_list)
+{
+	unsigned int opcode;
+	enum insn_type typ;
+	unsigned long imm;
+	u32 insn;
+
+	insn = bswap_if_needed(file->elf, *(u32 *)(sec->data->d_buf + offset));
+	opcode = insn >> 26;
+	typ = INSN_OTHER;
+	imm = 0;
+
+	if (opcode == 1)
+		*len = 8;
+	else
+		*len = 4;
+
+	*type = typ;
+	*immediate = imm;
+
+	return 0;
+}
+
+unsigned long arch_jump_destination(struct instruction *insn)
+{
+	return insn->offset + insn->immediate;
+}
+
+void arch_initial_func_cfi_state(struct cfi_init_state *state)
+{
+	int i;
+
+	for (i = 0; i < CFI_NUM_REGS; i++) {
+		state->regs[i].base = CFI_UNDEFINED;
+		state->regs[i].offset = 0;
+	}
+
+	/* initial CFA (call frame address) */
+	state->cfa.base = CFI_SP;
+	state->cfa.offset = 0;
+
+	/* initial LR (return address) */
+	state->regs[CFI_RA].base = CFI_CFA;
+	state->regs[CFI_RA].offset = 0;
+}
diff --git a/tools/objtool/arch/powerpc/include/arch/cfi_regs.h b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h
new file mode 100644
index 0000000000000..59638ebeafc82
--- /dev/null
+++ b/tools/objtool/arch/powerpc/include/arch/cfi_regs.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _OBJTOOL_CFI_REGS_H
+#define _OBJTOOL_CFI_REGS_H
+
+#define CFI_BP 1
+#define CFI_SP CFI_BP
+#define CFI_RA 32
+#define CFI_NUM_REGS 33
+
+#endif
diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h
new file mode 100644
index 0000000000000..3c8ebb7d2a6bb
--- /dev/null
+++ b/tools/objtool/arch/powerpc/include/arch/elf.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _OBJTOOL_ARCH_ELF
+#define _OBJTOOL_ARCH_ELF
+
+#define R_NONE R_PPC_NONE
+
+#endif /* _OBJTOOL_ARCH_ELF */
diff --git a/tools/objtool/arch/powerpc/include/arch/special.h b/tools/objtool/arch/powerpc/include/arch/special.h
new file mode 100644
index 0000000000000..ffef9ada7133d
--- /dev/null
+++ b/tools/objtool/arch/powerpc/include/arch/special.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _PPC_ARCH_SPECIAL_H
+#define _PPC_ARCH_SPECIAL_H
+
+#define EX_ENTRY_SIZE 8
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 16
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 4
+#define JUMP_KEY_OFFSET 8
+
+#define ALT_ENTRY_SIZE 12
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#endif /* _PPC_ARCH_SPECIAL_H */
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
new file mode 100644
index 0000000000000..d33868147196a
--- /dev/null
+++ b/tools/objtool/arch/powerpc/special.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <string.h>
+#include <stdlib.h>
+#include <objtool/special.h>
+#include <objtool/builtin.h>
+
+
+bool arch_support_alt_relocation(struct special_alt *special_alt,
+				 struct instruction *insn,
+				 struct reloc *reloc)
+{
+	exit(-1);
+}
+
+struct reloc *arch_find_switch_table(struct objtool_file *file,
+				    struct instruction *insn)
+{
+	exit(-1);
+}
-- 
GitLab


From c984aef8c8326035570ff6e01d0ff9e79a5dfa76 Mon Sep 17 00:00:00 2001
From: Sathvika Vasireddy <sv@linux.ibm.com>
Date: Mon, 14 Nov 2022 23:27:54 +0530
Subject: [PATCH 1768/4122] objtool/powerpc: Add --mcount specific
 implementation

This patch enables objtool --mcount on powerpc, and adds implementation
specific to powerpc.

Tested-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Acked-by: Josh Poimboeuf <jpoimboe@kernel.org>
Signed-off-by: Sathvika Vasireddy <sv@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114175754.1131267-17-sv@linux.ibm.com
---
 arch/powerpc/Kconfig                          |  1 +
 tools/objtool/arch/powerpc/decode.c           | 16 ++++++++++++++++
 tools/objtool/arch/powerpc/include/arch/elf.h |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 12e6c16be54e8..9c07068ba5e5d 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -239,6 +239,7 @@ config PPC
 	select HAVE_NMI				if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
 	select HAVE_OPTPROBES
 	select HAVE_OBJTOOL			if PPC32 || MPROFILE_KERNEL
+	select HAVE_OBJTOOL_MCOUNT		if HAVE_OBJTOOL
 	select HAVE_PERF_EVENTS
 	select HAVE_PERF_EVENTS_NMI		if PPC64
 	select HAVE_PERF_REGS
diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index dcd0975cad6ba..01cade98b49e8 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -9,6 +9,11 @@
 #include <objtool/builtin.h>
 #include <objtool/endianness.h>
 
+int arch_ftrace_match(char *name)
+{
+	return !strcmp(name, "_mcount");
+}
+
 unsigned long arch_dest_reloc_offset(int addend)
 {
 	return addend;
@@ -50,6 +55,17 @@ int arch_decode_instruction(struct objtool_file *file, const struct section *sec
 	typ = INSN_OTHER;
 	imm = 0;
 
+	switch (opcode) {
+	case 18: /* b[l][a] */
+		if ((insn & 3) == 1) /* bl */
+			typ = INSN_CALL;
+
+		imm = insn & 0x3fffffc;
+		if (imm & 0x2000000)
+			imm -= 0x4000000;
+		break;
+	}
+
 	if (opcode == 1)
 		*len = 8;
 	else
diff --git a/tools/objtool/arch/powerpc/include/arch/elf.h b/tools/objtool/arch/powerpc/include/arch/elf.h
index 3c8ebb7d2a6bb..73f9ae172fe55 100644
--- a/tools/objtool/arch/powerpc/include/arch/elf.h
+++ b/tools/objtool/arch/powerpc/include/arch/elf.h
@@ -4,5 +4,7 @@
 #define _OBJTOOL_ARCH_ELF
 
 #define R_NONE R_PPC_NONE
+#define R_ABS64 R_PPC64_ADDR64
+#define R_ABS32 R_PPC_ADDR32
 
 #endif /* _OBJTOOL_ARCH_ELF */
-- 
GitLab


From 16bdbae394280f1d97933d919023eccbf0b564bd Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 7 Nov 2022 13:24:55 +0100
Subject: [PATCH 1769/4122] hwrng: core - treat default_quality as a maximum
 and default to 1024

Most hw_random devices return entropy which is assumed to be of full
quality, but driver authors don't bother setting the quality knob. Some
hw_random devices return less than full quality entropy, and then driver
authors set the quality knob. Therefore, the entropy crediting should be
opt-out rather than opt-in per-driver, to reflect the actual reality on
the ground.

For example, the two Raspberry Pi RNG drivers produce full entropy
randomness, and both EDK2 and U-Boot's drivers for these treat them as
such. The result is that EFI then uses these numbers and passes the to
Linux, and Linux credits them as boot, thereby initializing the RNG.
Yet, in Linux, the quality knob was never set to anything, and so on the
chance that Linux is booted without EFI, nothing is ever credited.
That's annoying.

The same pattern appears to repeat itself throughout various drivers. In
fact, very very few drivers have bothered setting quality=1024.

Looking at the git history of existing drivers and corresponding mailing
list discussion, this conclusion tracks. There's been a decent amount of
discussion about drivers that set quality < 1024 -- somebody read and
interepreted a datasheet, or made some back of the envelope calculation
somehow. But there's been very little, if any, discussion about most
drivers where the quality is just set to 1024 or unset (or set to 1000
when the authors misunderstood the API and assumed it was base-10 rather
than base-2); in both cases the intent was fairly clear of, "this is a
hardware random device; it's fine."

So let's invert this logic. A hw_random struct's quality knob now
controls the maximum quality a driver can produce, or 0 to specify 1024.
Then, the module-wide switch called "default_quality" is changed to
represent the maximum quality of any driver. By default it's 1024, and
the quality of any particular driver is then given by:

    min(default_quality, rng->quality ?: 1024);

This way, the user can still turn this off for weird reasons (and we can
replace whatever driver-specific disabling hacks existed in the past),
yet we get proper crediting for relevant RNGs.

Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/um/drivers/random.c                          | 1 -
 drivers/char/hw_random/cavium-rng-vf.c            | 1 -
 drivers/char/hw_random/cn10k-rng.c                | 1 -
 drivers/char/hw_random/core.c                     | 9 +++------
 drivers/char/hw_random/mpfs-rng.c                 | 1 -
 drivers/char/hw_random/npcm-rng.c                 | 1 -
 drivers/char/hw_random/s390-trng.c                | 1 -
 drivers/char/hw_random/timeriomem-rng.c           | 2 --
 drivers/char/hw_random/virtio-rng.c               | 1 -
 drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c | 1 -
 drivers/crypto/atmel-sha204a.c                    | 1 -
 drivers/crypto/caam/caamrng.c                     | 1 -
 drivers/firmware/turris-mox-rwtm.c                | 1 -
 drivers/s390/crypto/zcrypt_api.c                  | 6 ------
 drivers/usb/misc/chaoskey.c                       | 1 -
 include/linux/hw_random.h                         | 2 +-
 16 files changed, 4 insertions(+), 27 deletions(-)

diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index 32b3341fe9707..da985e0dc69a5 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -82,7 +82,6 @@ static int __init rng_init (void)
 	sigio_broken(random_fd);
 	hwrng.name = RNG_MODULE_NAME;
 	hwrng.read = rng_dev_read;
-	hwrng.quality = 1024;
 
 	err = hwrng_register(&hwrng);
 	if (err) {
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 7c55f4cf4a8ba..c99c54cd99c67 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -225,7 +225,6 @@ static int cavium_rng_probe_vf(struct	pci_dev		*pdev,
 		return -ENOMEM;
 
 	rng->ops.read    = cavium_rng_read;
-	rng->ops.quality = 1000;
 
 	pci_set_drvdata(pdev, rng);
 
diff --git a/drivers/char/hw_random/cn10k-rng.c b/drivers/char/hw_random/cn10k-rng.c
index a01e9307737c5..c1193f85982c3 100644
--- a/drivers/char/hw_random/cn10k-rng.c
+++ b/drivers/char/hw_random/cn10k-rng.c
@@ -145,7 +145,6 @@ static int cn10k_rng_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	rng->ops.read    = cn10k_rng_read;
-	rng->ops.quality = 1000;
 	rng->ops.priv = (unsigned long)rng;
 
 	reset_rng_health_state(rng);
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index cc002b0c2f0c3..afde685f5e0a4 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -41,14 +41,14 @@ static DEFINE_MUTEX(reading_mutex);
 static int data_avail;
 static u8 *rng_buffer, *rng_fillbuf;
 static unsigned short current_quality;
-static unsigned short default_quality; /* = 0; default to "off" */
+static unsigned short default_quality = 1024; /* default to maximum */
 
 module_param(current_quality, ushort, 0644);
 MODULE_PARM_DESC(current_quality,
 		 "current hwrng entropy estimation per 1024 bits of input -- obsolete, use rng_quality instead");
 module_param(default_quality, ushort, 0644);
 MODULE_PARM_DESC(default_quality,
-		 "default entropy content of hwrng per 1024 bits of input");
+		 "default maximum entropy content of hwrng per 1024 bits of input");
 
 static void drop_current_rng(void);
 static int hwrng_init(struct hwrng *rng);
@@ -170,10 +170,7 @@ static int hwrng_init(struct hwrng *rng)
 	reinit_completion(&rng->cleanup_done);
 
 skip_init:
-	if (!rng->quality)
-		rng->quality = default_quality;
-	if (rng->quality > 1024)
-		rng->quality = 1024;
+	rng->quality = min_t(u16, min_t(u16, default_quality, 1024), rng->quality ?: 1024);
 	current_quality = rng->quality; /* obsolete */
 
 	return 0;
diff --git a/drivers/char/hw_random/mpfs-rng.c b/drivers/char/hw_random/mpfs-rng.c
index 5813da617a485..c6972734ae62e 100644
--- a/drivers/char/hw_random/mpfs-rng.c
+++ b/drivers/char/hw_random/mpfs-rng.c
@@ -78,7 +78,6 @@ static int mpfs_rng_probe(struct platform_device *pdev)
 
 	rng_priv->rng.read = mpfs_rng_read;
 	rng_priv->rng.name = pdev->name;
-	rng_priv->rng.quality = 1024;
 
 	platform_set_drvdata(pdev, rng_priv);
 
diff --git a/drivers/char/hw_random/npcm-rng.c b/drivers/char/hw_random/npcm-rng.c
index 5bf7f370f9859..9903d0357e06e 100644
--- a/drivers/char/hw_random/npcm-rng.c
+++ b/drivers/char/hw_random/npcm-rng.c
@@ -111,7 +111,6 @@ static int npcm_rng_probe(struct platform_device *pdev)
 	priv->rng.name = pdev->name;
 	priv->rng.read = npcm_rng_read;
 	priv->rng.priv = (unsigned long)&pdev->dev;
-	priv->rng.quality = 1000;
 	priv->clkp = (u32)(uintptr_t)of_device_get_match_data(&pdev->dev);
 
 	writel(NPCM_RNG_M1ROSEL, priv->base + NPCM_RNGMODE_REG);
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
index 795853dfc46b7..cffa326ddc8d3 100644
--- a/drivers/char/hw_random/s390-trng.c
+++ b/drivers/char/hw_random/s390-trng.c
@@ -191,7 +191,6 @@ static struct hwrng trng_hwrng_dev = {
 	.name		= "s390-trng",
 	.data_read	= trng_hwrng_data_read,
 	.read		= trng_hwrng_read,
-	.quality	= 1024,
 };
 
 
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index 8ea1fc831eb7b..26f322d19a883 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -145,8 +145,6 @@ static int timeriomem_rng_probe(struct platform_device *pdev)
 		if (!of_property_read_u32(pdev->dev.of_node,
 						"quality", &i))
 			priv->rng_ops.quality = i;
-		else
-			priv->rng_ops.quality = 0;
 	} else {
 		period = pdata->period;
 		priv->rng_ops.quality = pdata->quality;
diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index a6f3a8a2aca6d..f7690e0f92ede 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c
@@ -148,7 +148,6 @@ static int probe_common(struct virtio_device *vdev)
 		.cleanup = virtio_cleanup,
 		.priv = (unsigned long)vi,
 		.name = vi->name,
-		.quality = 1000,
 	};
 	vdev->priv = vi;
 
diff --git a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
index c4b0a8b588429..e2b9b91046941 100644
--- a/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
+++ b/drivers/crypto/allwinner/sun8i-ce/sun8i-ce-trng.c
@@ -108,7 +108,6 @@ int sun8i_ce_hwrng_register(struct sun8i_ce_dev *ce)
 	}
 	ce->trng.name = "sun8i Crypto Engine TRNG";
 	ce->trng.read = sun8i_ce_trng_read;
-	ce->trng.quality = 1000;
 
 	ret = hwrng_register(&ce->trng);
 	if (ret)
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index a84b657598c6e..c0103e7fc2e75 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -107,7 +107,6 @@ static int atmel_sha204a_probe(struct i2c_client *client,
 
 	i2c_priv->hwrng.name = dev_name(&client->dev);
 	i2c_priv->hwrng.read = atmel_sha204a_rng_read;
-	i2c_priv->hwrng.quality = 1024;
 
 	ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
 	if (ret)
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 77d048dfe5d06..1f0e820509767 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -246,7 +246,6 @@ int caam_rng_init(struct device *ctrldev)
 	ctx->rng.cleanup = caam_cleanup;
 	ctx->rng.read    = caam_read;
 	ctx->rng.priv    = (unsigned long)ctx;
-	ctx->rng.quality = 1024;
 
 	dev_info(ctrldev, "registering rng-caam\n");
 
diff --git a/drivers/firmware/turris-mox-rwtm.c b/drivers/firmware/turris-mox-rwtm.c
index c2d34dc8ba462..6ea5789a89e2b 100644
--- a/drivers/firmware/turris-mox-rwtm.c
+++ b/drivers/firmware/turris-mox-rwtm.c
@@ -528,7 +528,6 @@ static int turris_mox_rwtm_probe(struct platform_device *pdev)
 	rwtm->hwrng.name = DRIVER_NAME "_hwrng";
 	rwtm->hwrng.read = mox_hwrng_read;
 	rwtm->hwrng.priv = (unsigned long) rwtm;
-	rwtm->hwrng.quality = 1024;
 
 	ret = devm_hwrng_register(dev, &rwtm->hwrng);
 	if (ret < 0) {
diff --git a/drivers/s390/crypto/zcrypt_api.c b/drivers/s390/crypto/zcrypt_api.c
index f94b43ce9a658..4bf36e53fe3e4 100644
--- a/drivers/s390/crypto/zcrypt_api.c
+++ b/drivers/s390/crypto/zcrypt_api.c
@@ -53,10 +53,6 @@ MODULE_LICENSE("GPL");
 EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_req);
 EXPORT_TRACEPOINT_SYMBOL(s390_zcrypt_rep);
 
-static int zcrypt_hwrng_seed = 1;
-module_param_named(hwrng_seed, zcrypt_hwrng_seed, int, 0440);
-MODULE_PARM_DESC(hwrng_seed, "Turn on/off hwrng auto seed, default is 1 (on).");
-
 DEFINE_SPINLOCK(zcrypt_list_lock);
 LIST_HEAD(zcrypt_card_list);
 
@@ -2063,8 +2059,6 @@ int zcrypt_rng_device_add(void)
 			goto out;
 		}
 		zcrypt_rng_buffer_index = 0;
-		if (!zcrypt_hwrng_seed)
-			zcrypt_rng_dev.quality = 0;
 		rc = hwrng_register(&zcrypt_rng_dev);
 		if (rc)
 			goto out_free;
diff --git a/drivers/usb/misc/chaoskey.c b/drivers/usb/misc/chaoskey.c
index 87067c3d6109b..6fb5140e29b9d 100644
--- a/drivers/usb/misc/chaoskey.c
+++ b/drivers/usb/misc/chaoskey.c
@@ -200,7 +200,6 @@ static int chaoskey_probe(struct usb_interface *interface,
 
 	dev->hwrng.name = dev->name ? dev->name : chaoskey_driver.name;
 	dev->hwrng.read = chaoskey_rng_read;
-	dev->hwrng.quality = 1024;
 
 	dev->hwrng_registered = (hwrng_register(&dev->hwrng) == 0);
 	if (!dev->hwrng_registered)
diff --git a/include/linux/hw_random.h b/include/linux/hw_random.h
index 77c2885c4c130..8a3115516a1ba 100644
--- a/include/linux/hw_random.h
+++ b/include/linux/hw_random.h
@@ -34,7 +34,7 @@
  * @priv:		Private data, for use by the RNG driver.
  * @quality:		Estimation of true entropy in RNG's bitstream
  *			(in bits of entropy per 1024 bits of input;
- *			valid values: 1 to 1024, or 0 for unknown).
+ *			valid values: 1 to 1024, or 0 for maximum).
  */
 struct hwrng {
 	const char *name;
-- 
GitLab


From 7cdc5e6bcd02ab45d0a2991b35861b448e355276 Mon Sep 17 00:00:00 2001
From: Tomas Marek <tomas.marek@elrest.cz>
Date: Tue, 8 Nov 2022 07:42:40 +0100
Subject: [PATCH 1770/4122] hwrng: stm32 - rename readl return value

Use a more meaningful name for the readl return value variable.

Link: https://lore.kernel.org/all/Y1J3QwynPFIlfrIv@loth.rohan.me.apana.org.au/

Signed-off-by: Tomas Marek <tomas.marek@elrest.cz>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/stm32-rng.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/char/hw_random/stm32-rng.c b/drivers/char/hw_random/stm32-rng.c
index 366edda4848b6..a6731cf0627ac 100644
--- a/drivers/char/hw_random/stm32-rng.c
+++ b/drivers/char/hw_random/stm32-rng.c
@@ -49,13 +49,13 @@ static int stm32_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 		/* Manage timeout which is based on timer and take */
 		/* care of initial delay time when enabling rng	*/
 		if (!sr && wait) {
-			int ret;
+			int err;
 
-			ret = readl_relaxed_poll_timeout_atomic(priv->base
+			err = readl_relaxed_poll_timeout_atomic(priv->base
 								   + RNG_SR,
 								   sr, sr,
 								   10, 50000);
-			if (ret)
+			if (err)
 				dev_err((struct device *)priv->rng.priv,
 					"%s: timeout %x!\n", __func__, sr);
 		}
-- 
GitLab


From 4f1c596df706c9aca662b6c214fad84047ae2a97 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Tue, 8 Nov 2022 16:29:12 +0800
Subject: [PATCH 1771/4122] crypto: ccree - Remove debugfs when
 platform_driver_register failed

When platform_driver_register failed, we need to remove debugfs,
which will caused a resource leak, fix it.

Failed logs as follows:
[   32.606488] debugfs: Directory 'ccree' with parent '/' already present!

Fixes: 4c3f97276e15 ("crypto: ccree - introduce CryptoCell driver")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccree/cc_driver.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index cadead18b59e8..d489c6f808925 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -651,9 +651,17 @@ static struct platform_driver ccree_driver = {
 
 static int __init ccree_init(void)
 {
+	int rc;
+
 	cc_debugfs_global_init();
 
-	return platform_driver_register(&ccree_driver);
+	rc = platform_driver_register(&ccree_driver);
+	if (rc) {
+		cc_debugfs_global_fini();
+		return rc;
+	}
+
+	return 0;
 }
 module_init(ccree_init);
 
-- 
GitLab


From 824db5cd1ec9b95c254fc317c3999f6b53e98b12 Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Thu, 10 Nov 2022 18:42:04 +0800
Subject: [PATCH 1772/4122] crypto: arm64 - Fix unused variable compilation
 warnings of cpu_feature

The cpu feature defined by MODULE_DEVICE_TABLE is only referenced when
compiling as a module, and the warning of unused variable will be
encountered when compiling with intree. The warning can be removed by
adding the __maybe_unused flag.

Fixes: 03c9a333fef1 ("crypto: arm64/ghash - add NEON accelerated fallback for 64-bit PMULL")
Fixes: ae1b83c7d572 ("crypto: arm64/sm4 - add CE implementation for GCM mode")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/ghash-ce-glue.c   | 2 +-
 arch/arm64/crypto/sm4-ce-gcm-glue.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 15794fe21a0b2..e5e9adc1fcf4e 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -508,7 +508,7 @@ static void __exit ghash_ce_mod_exit(void)
 		crypto_unregister_shash(&ghash_alg);
 }
 
-static const struct cpu_feature ghash_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused ghash_cpu_feature[] = {
 	{ cpu_feature(PMULL) }, { }
 };
 MODULE_DEVICE_TABLE(cpu, ghash_cpu_feature);
diff --git a/arch/arm64/crypto/sm4-ce-gcm-glue.c b/arch/arm64/crypto/sm4-ce-gcm-glue.c
index e90ea0f17beb2..c450a2025ca9a 100644
--- a/arch/arm64/crypto/sm4-ce-gcm-glue.c
+++ b/arch/arm64/crypto/sm4-ce-gcm-glue.c
@@ -271,7 +271,7 @@ static void __exit sm4_ce_gcm_exit(void)
 	crypto_unregister_aead(&sm4_gcm_alg);
 }
 
-static const struct cpu_feature sm4_ce_gcm_cpu_feature[] = {
+static const struct cpu_feature __maybe_unused sm4_ce_gcm_cpu_feature[] = {
 	{ cpu_feature(PMULL) },
 	{}
 };
-- 
GitLab


From 3a58c231172537f7b0e19d93ed33decd04f80eab Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 11 Nov 2022 17:59:17 +0800
Subject: [PATCH 1773/4122] crypto: cryptd - Use request context instead of
 stack for sub-request

cryptd is buggy as it tries to use sync_skcipher without going
through the proper sync_skcipher interface.  In fact it doesn't
even need sync_skcipher since it's already a proper skcipher and
can easily access the request context instead of using something
off the stack.

Fixes: 36b3875a97b8 ("crypto: cryptd - Remove VLA usage of skcipher")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/cryptd.c | 36 +++++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 668095eca0faf..ca3a40fc7da91 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -68,11 +68,12 @@ struct aead_instance_ctx {
 
 struct cryptd_skcipher_ctx {
 	refcount_t refcnt;
-	struct crypto_sync_skcipher *child;
+	struct crypto_skcipher *child;
 };
 
 struct cryptd_skcipher_request_ctx {
 	crypto_completion_t complete;
+	struct skcipher_request req;
 };
 
 struct cryptd_hash_ctx {
@@ -227,13 +228,13 @@ static int cryptd_skcipher_setkey(struct crypto_skcipher *parent,
 				  const u8 *key, unsigned int keylen)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(parent);
-	struct crypto_sync_skcipher *child = ctx->child;
+	struct crypto_skcipher *child = ctx->child;
 
-	crypto_sync_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
-	crypto_sync_skcipher_set_flags(child,
-				       crypto_skcipher_get_flags(parent) &
-					 CRYPTO_TFM_REQ_MASK);
-	return crypto_sync_skcipher_setkey(child, key, keylen);
+	crypto_skcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
+	crypto_skcipher_set_flags(child,
+				  crypto_skcipher_get_flags(parent) &
+				  CRYPTO_TFM_REQ_MASK);
+	return crypto_skcipher_setkey(child, key, keylen);
 }
 
 static void cryptd_skcipher_complete(struct skcipher_request *req, int err)
@@ -258,13 +259,13 @@ static void cryptd_skcipher_encrypt(struct crypto_async_request *base,
 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_sync_skcipher *child = ctx->child;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
+	struct skcipher_request *subreq = &rctx->req;
+	struct crypto_skcipher *child = ctx->child;
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	skcipher_request_set_sync_tfm(subreq, child);
+	skcipher_request_set_tfm(subreq, child);
 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
 				      NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -286,13 +287,13 @@ static void cryptd_skcipher_decrypt(struct crypto_async_request *base,
 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct crypto_sync_skcipher *child = ctx->child;
-	SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, child);
+	struct skcipher_request *subreq = &rctx->req;
+	struct crypto_skcipher *child = ctx->child;
 
 	if (unlikely(err == -EINPROGRESS))
 		goto out;
 
-	skcipher_request_set_sync_tfm(subreq, child);
+	skcipher_request_set_tfm(subreq, child);
 	skcipher_request_set_callback(subreq, CRYPTO_TFM_REQ_MAY_SLEEP,
 				      NULL, NULL);
 	skcipher_request_set_crypt(subreq, req->src, req->dst, req->cryptlen,
@@ -343,9 +344,10 @@ static int cryptd_skcipher_init_tfm(struct crypto_skcipher *tfm)
 	if (IS_ERR(cipher))
 		return PTR_ERR(cipher);
 
-	ctx->child = (struct crypto_sync_skcipher *)cipher;
+	ctx->child = cipher;
 	crypto_skcipher_set_reqsize(
-		tfm, sizeof(struct cryptd_skcipher_request_ctx));
+		tfm, sizeof(struct cryptd_skcipher_request_ctx) +
+		     crypto_skcipher_reqsize(cipher));
 	return 0;
 }
 
@@ -353,7 +355,7 @@ static void cryptd_skcipher_exit_tfm(struct crypto_skcipher *tfm)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	crypto_free_sync_skcipher(ctx->child);
+	crypto_free_skcipher(ctx->child);
 }
 
 static void cryptd_skcipher_free(struct skcipher_instance *inst)
@@ -931,7 +933,7 @@ struct crypto_skcipher *cryptd_skcipher_child(struct cryptd_skcipher *tfm)
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
 
-	return &ctx->child->base;
+	return ctx->child;
 }
 EXPORT_SYMBOL_GPL(cryptd_skcipher_child);
 
-- 
GitLab


From cc7710d0d4ebc6998f04035cde4f32c5ddbe9d7f Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Fri, 11 Nov 2022 18:00:36 +0800
Subject: [PATCH 1774/4122] crypto: hisilicon/qm - add missing pci_dev_put() in
 q_num_set()

pci_get_device() will increase the reference count for the returned
pci_dev. We need to use pci_dev_put() to decrease the reference count
before q_num_set() returns.

Fixes: c8b4b477079d ("crypto: hisilicon - add HiSilicon HPRE accelerator")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/hisi_acc_qm.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index e230c7c46110a..c3618255b1504 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -384,14 +384,14 @@ struct hisi_qp {
 static inline int q_num_set(const char *val, const struct kernel_param *kp,
 			    unsigned int device)
 {
-	struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
-					      device, NULL);
+	struct pci_dev *pdev;
 	u32 n, q_num;
 	int ret;
 
 	if (!val)
 		return -EINVAL;
 
+	pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI, device, NULL);
 	if (!pdev) {
 		q_num = min_t(u32, QM_QNUM_V1, QM_QNUM_V2);
 		pr_info("No device found currently, suppose queue number is %u\n",
@@ -401,6 +401,8 @@ static inline int q_num_set(const char *val, const struct kernel_param *kp,
 			q_num = QM_QNUM_V1;
 		else
 			q_num = QM_QNUM_V2;
+
+		pci_dev_put(pdev);
 	}
 
 	ret = kstrtou32(val, 10, &n);
-- 
GitLab


From e6cb02bd0a52457e486a752da5db7b67f2540c16 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 11 Nov 2022 18:05:41 +0800
Subject: [PATCH 1775/4122] crypto: skcipher - Allow sync algorithms with large
 request contexts

Some sync algorithms may require a large amount of temporary
space during its operations.  There is no reason why they should
be limited just because some legacy users want to place all
temporary data on the stack.

Such algorithms can now set a flag to indicate that they need
extra request context, which will cause them to be invisible
to users that go through the sync_skcipher interface.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/skcipher.c                  | 2 +-
 include/crypto/internal/skcipher.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 418211180ceec..0ecab31cfe79b 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -763,7 +763,7 @@ struct crypto_sync_skcipher *crypto_alloc_sync_skcipher(
 	struct crypto_skcipher *tfm;
 
 	/* Only sync algorithms allowed. */
-	mask |= CRYPTO_ALG_ASYNC;
+	mask |= CRYPTO_ALG_ASYNC | CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE;
 
 	tfm = crypto_alloc_tfm(alg_name, &crypto_skcipher_type, type, mask);
 
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index a2339f80a6159..2a97540156bbf 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -14,6 +14,14 @@
 #include <linux/list.h>
 #include <linux/types.h>
 
+/*
+ * Set this if your algorithm is sync but needs a reqsize larger
+ * than MAX_SYNC_SKCIPHER_REQSIZE.
+ *
+ * Reuse bit that is specific to hash algorithms.
+ */
+#define CRYPTO_ALG_SKCIPHER_REQSIZE_LARGE CRYPTO_ALG_OPTIONAL_KEY
+
 struct aead_request;
 struct rtattr;
 
-- 
GitLab


From 7bbbc9d81be588ae4fb28b5b202e4421dbfef197 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 12 Nov 2022 02:12:50 +0000
Subject: [PATCH 1776/4122] crypto: hisilicon/qm - delete redundant null
 assignment operations

There is no security data in the pointer. It is only a value transferred
as a structure. It makes no sense to zero a variable that is on the stack.
So not need to set the pointer to null.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 363a02810a164..849dc80a71185 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1773,7 +1773,6 @@ static void dfx_regs_uninit(struct hisi_qm *qm,
 		dregs[i].regs = NULL;
 	}
 	kfree(dregs);
-	dregs = NULL;
 }
 
 /**
-- 
GitLab


From b40b62ed7b0ffe8eb2e6fe8bcfb47027c9a93e93 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 12 Nov 2022 02:12:51 +0000
Subject: [PATCH 1777/4122] crypto: hisilicon/qm - modify the process of regs
 dfx

The last register logic and different register logic are combined.
Use "u32" instead of 'int' in the regs function input parameter to
simplify some checks.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_main.c |   7 +-
 drivers/crypto/hisilicon/qm.c             | 151 ++++++++++++----------
 drivers/crypto/hisilicon/sec2/sec_main.c  |   7 +-
 drivers/crypto/hisilicon/zip/zip_main.c   |   7 +-
 include/linux/hisi_acc_qm.h               |   8 +-
 5 files changed, 99 insertions(+), 81 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index baf1faec7046f..923f9c2792654 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -1101,8 +1101,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
 
 	qm->debug.sqe_mask_offset = HPRE_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = HPRE_SQE_MASK_LEN;
-	ret = hisi_qm_diff_regs_init(qm, hpre_diff_regs,
-				ARRAY_SIZE(hpre_diff_regs));
+	ret = hisi_qm_regs_debugfs_init(qm, hpre_diff_regs, ARRAY_SIZE(hpre_diff_regs));
 	if (ret) {
 		dev_warn(dev, "Failed to init HPRE diff regs!\n");
 		goto debugfs_remove;
@@ -1121,7 +1120,7 @@ static int hpre_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
 debugfs_remove:
 	debugfs_remove_recursive(qm->debug.debug_root);
 	return ret;
@@ -1129,7 +1128,7 @@ debugfs_remove:
 
 static void hpre_debugfs_exit(struct hisi_qm *qm)
 {
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hpre_diff_regs));
 
 	debugfs_remove_recursive(qm->debug.debug_root);
 }
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 849dc80a71185..441466df7c6d5 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -1722,8 +1722,21 @@ static int qm_regs_show(struct seq_file *s, void *unused)
 
 DEFINE_SHOW_ATTRIBUTE(qm_regs);
 
+static void dfx_regs_uninit(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, int reg_len)
+{
+	int i;
+
+	/* Setting the pointer is NULL to prevent double free */
+	for (i = 0; i < reg_len; i++) {
+		kfree(dregs[i].regs);
+		dregs[i].regs = NULL;
+	}
+	kfree(dregs);
+}
+
 static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
-	const struct dfx_diff_registers *cregs, int reg_len)
+	const struct dfx_diff_registers *cregs, u32 reg_len)
 {
 	struct dfx_diff_registers *diff_regs;
 	u32 j, base_offset;
@@ -1762,64 +1775,107 @@ alloc_error:
 	return ERR_PTR(-ENOMEM);
 }
 
-static void dfx_regs_uninit(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, int reg_len)
+static int qm_diff_regs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, u32 reg_len)
+{
+	qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+	if (IS_ERR(qm->debug.qm_diff_regs))
+		return PTR_ERR(qm->debug.qm_diff_regs);
+
+	qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
+	if (IS_ERR(qm->debug.acc_diff_regs)) {
+		dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+		return PTR_ERR(qm->debug.acc_diff_regs);
+	}
+
+	return 0;
+}
+
+static void qm_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+		return;
+
+	kfree(debug->qm_last_words);
+	debug->qm_last_words = NULL;
+}
+
+static int qm_last_regs_init(struct hisi_qm *qm)
 {
+	int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
+	struct qm_debug *debug = &qm->debug;
 	int i;
 
-	/* Setting the pointer is NULL to prevent double free */
-	for (i = 0; i < reg_len; i++) {
-		kfree(dregs[i].regs);
-		dregs[i].regs = NULL;
+	if (qm->fun_type == QM_HW_VF)
+		return 0;
+
+	debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+	if (!debug->qm_last_words)
+		return -ENOMEM;
+
+	for (i = 0; i < dfx_regs_num; i++) {
+		debug->qm_last_words[i] = readl_relaxed(qm->io_base +
+			qm_dfx_regs[i].offset);
 	}
-	kfree(dregs);
+
+	return 0;
+}
+
+static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len)
+{
+	dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+	dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
 }
 
 /**
- * hisi_qm_diff_regs_init() - Allocate memory for registers.
+ * hisi_qm_regs_debugfs_init() - Allocate memory for registers.
  * @qm: device qm handle.
  * @dregs: diff registers handle.
  * @reg_len: diff registers region length.
  */
-int hisi_qm_diff_regs_init(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, int reg_len)
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, u32 reg_len)
 {
-	if (!qm || !dregs || reg_len <= 0)
+	int ret;
+
+	if (!qm || !dregs)
 		return -EINVAL;
 
 	if (qm->fun_type != QM_HW_PF)
 		return 0;
 
-	qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs,
-						ARRAY_SIZE(qm_diff_regs));
-	if (IS_ERR(qm->debug.qm_diff_regs))
-		return PTR_ERR(qm->debug.qm_diff_regs);
+	ret = qm_last_regs_init(qm);
+	if (ret) {
+		dev_info(&qm->pdev->dev, "failed to init qm words memory!\n");
+		return ret;
+	}
 
-	qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
-	if (IS_ERR(qm->debug.acc_diff_regs)) {
-		dfx_regs_uninit(qm, qm->debug.qm_diff_regs,
-				ARRAY_SIZE(qm_diff_regs));
-		return PTR_ERR(qm->debug.acc_diff_regs);
+	ret = qm_diff_regs_init(qm, dregs, reg_len);
+	if (ret) {
+		qm_last_regs_uninit(qm);
+		return ret;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_init);
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init);
 
 /**
- * hisi_qm_diff_regs_uninit() - Free memory for registers.
+ * hisi_qm_regs_debugfs_uninit() - Free memory for registers.
  * @qm: device qm handle.
  * @reg_len: diff registers region length.
  */
-void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len)
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len)
 {
-	if (!qm  || reg_len <= 0 || qm->fun_type != QM_HW_PF)
+	if (!qm || qm->fun_type != QM_HW_PF)
 		return;
 
-	dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
-	dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+	qm_diff_regs_uninit(qm, reg_len);
+	qm_last_regs_uninit(qm);
 }
-EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit);
 
 /**
  * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
@@ -1829,12 +1885,12 @@ EXPORT_SYMBOL_GPL(hisi_qm_diff_regs_uninit);
  * @regs_len: diff registers region length.
  */
 void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
-	struct dfx_diff_registers *dregs, int regs_len)
+	struct dfx_diff_registers *dregs, u32 regs_len)
 {
 	u32 j, val, base_offset;
 	int i, ret;
 
-	if (!qm || !s || !dregs || regs_len <= 0)
+	if (!qm || !s || !dregs)
 		return;
 
 	ret = hisi_qm_get_dfx_access(qm);
@@ -3719,17 +3775,6 @@ static void hisi_qm_set_state(struct hisi_qm *qm, u8 state)
 		writel(state, qm->io_base + QM_VF_STATE);
 }
 
-static void qm_last_regs_uninit(struct hisi_qm *qm)
-{
-	struct qm_debug *debug = &qm->debug;
-
-	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
-		return;
-
-	kfree(debug->qm_last_words);
-	debug->qm_last_words = NULL;
-}
-
 static void hisi_qm_unint_work(struct hisi_qm *qm)
 {
 	destroy_workqueue(qm->wq);
@@ -3760,8 +3805,6 @@ static void hisi_qm_memory_uninit(struct hisi_qm *qm)
  */
 void hisi_qm_uninit(struct hisi_qm *qm)
 {
-	qm_last_regs_uninit(qm);
-
 	qm_cmd_uninit(qm);
 	hisi_qm_unint_work(qm);
 	down_write(&qm->qps_lock);
@@ -6339,26 +6382,6 @@ err_destroy_idr:
 	return ret;
 }
 
-static void qm_last_regs_init(struct hisi_qm *qm)
-{
-	int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
-	struct qm_debug *debug = &qm->debug;
-	int i;
-
-	if (qm->fun_type == QM_HW_VF)
-		return;
-
-	debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int),
-								GFP_KERNEL);
-	if (!debug->qm_last_words)
-		return;
-
-	for (i = 0; i < dfx_regs_num; i++) {
-		debug->qm_last_words[i] = readl_relaxed(qm->io_base +
-			qm_dfx_regs[i].offset);
-	}
-}
-
 /**
  * hisi_qm_init() - Initialize configures about qm.
  * @qm: The qm needing init.
@@ -6407,8 +6430,6 @@ int hisi_qm_init(struct hisi_qm *qm)
 	qm_cmd_init(qm);
 	atomic_set(&qm->status.flags, QM_INIT);
 
-	qm_last_regs_init(qm);
-
 	return 0;
 
 err_free_qm_memory:
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 6eb8a16ba0a70..4e24735d95bae 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -899,8 +899,7 @@ static int sec_debugfs_init(struct hisi_qm *qm)
 	qm->debug.sqe_mask_offset = SEC_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = SEC_SQE_MASK_LEN;
 
-	ret = hisi_qm_diff_regs_init(qm, sec_diff_regs,
-				ARRAY_SIZE(sec_diff_regs));
+	ret = hisi_qm_regs_debugfs_init(qm, sec_diff_regs, ARRAY_SIZE(sec_diff_regs));
 	if (ret) {
 		dev_warn(dev, "Failed to init SEC diff regs!\n");
 		goto debugfs_remove;
@@ -915,7 +914,7 @@ static int sec_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
 debugfs_remove:
 	debugfs_remove_recursive(sec_debugfs_root);
 	return ret;
@@ -923,7 +922,7 @@ debugfs_remove:
 
 static void sec_debugfs_exit(struct hisi_qm *qm)
 {
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(sec_diff_regs));
 
 	debugfs_remove_recursive(qm->debug.debug_root);
 }
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index c863435e8c75a..1549bec3aea59 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -849,8 +849,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
 	qm->debug.sqe_mask_offset = HZIP_SQE_MASK_OFFSET;
 	qm->debug.sqe_mask_len = HZIP_SQE_MASK_LEN;
 	qm->debug.debug_root = dev_d;
-	ret = hisi_qm_diff_regs_init(qm, hzip_diff_regs,
-				ARRAY_SIZE(hzip_diff_regs));
+	ret = hisi_qm_regs_debugfs_init(qm, hzip_diff_regs, ARRAY_SIZE(hzip_diff_regs));
 	if (ret) {
 		dev_warn(dev, "Failed to init ZIP diff regs!\n");
 		goto debugfs_remove;
@@ -869,7 +868,7 @@ static int hisi_zip_debugfs_init(struct hisi_qm *qm)
 	return 0;
 
 failed_to_create:
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
 debugfs_remove:
 	debugfs_remove_recursive(hzip_debugfs_root);
 	return ret;
@@ -895,7 +894,7 @@ static void hisi_zip_debug_regs_clear(struct hisi_qm *qm)
 
 static void hisi_zip_debugfs_exit(struct hisi_qm *qm)
 {
-	hisi_qm_diff_regs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
+	hisi_qm_regs_debugfs_uninit(qm, ARRAY_SIZE(hzip_diff_regs));
 
 	debugfs_remove_recursive(qm->debug.debug_root);
 
diff --git a/include/linux/hisi_acc_qm.h b/include/linux/hisi_acc_qm.h
index c3618255b1504..be3aedaa96dc1 100644
--- a/include/linux/hisi_acc_qm.h
+++ b/include/linux/hisi_acc_qm.h
@@ -471,11 +471,11 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen);
 int hisi_qm_sriov_configure(struct pci_dev *pdev, int num_vfs);
 void hisi_qm_dev_err_init(struct hisi_qm *qm);
 void hisi_qm_dev_err_uninit(struct hisi_qm *qm);
-int hisi_qm_diff_regs_init(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, int reg_len);
-void hisi_qm_diff_regs_uninit(struct hisi_qm *qm, int reg_len);
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+			  struct dfx_diff_registers *dregs, u32 reg_len);
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len);
 void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
-		struct dfx_diff_registers *dregs, int regs_len);
+				struct dfx_diff_registers *dregs, u32 regs_len);
 
 pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
 					  pci_channel_state_t state);
-- 
GitLab


From 94476b2b6d60bc926a585ae62e1bf69bd22c1dff Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 12 Nov 2022 02:12:52 +0000
Subject: [PATCH 1778/4122] crypto: hisilicon/qm - split a debugfs.c from qm

Considering that the qm feature and debugfs feature are independent.
The code related to debugfs is getting larger and larger. It should be
separate as a debugfs file. So move some debugfs code to new file from
qm file. The qm code logic is not modified. And maintainability is
enhanced.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/Makefile    |    2 +-
 drivers/crypto/hisilicon/debugfs.c   | 1097 +++++++++++++++++++++
 drivers/crypto/hisilicon/qm.c        | 1348 ++------------------------
 drivers/crypto/hisilicon/qm_common.h |   87 ++
 4 files changed, 1277 insertions(+), 1257 deletions(-)
 create mode 100644 drivers/crypto/hisilicon/debugfs.c
 create mode 100644 drivers/crypto/hisilicon/qm_common.h

diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile
index 1e89269a2e4b0..8595a5a5d2288 100644
--- a/drivers/crypto/hisilicon/Makefile
+++ b/drivers/crypto/hisilicon/Makefile
@@ -3,6 +3,6 @@ obj-$(CONFIG_CRYPTO_DEV_HISI_HPRE) += hpre/
 obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/
 obj-$(CONFIG_CRYPTO_DEV_HISI_SEC2) += sec2/
 obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += hisi_qm.o
-hisi_qm-objs = qm.o sgl.o
+hisi_qm-objs = qm.o sgl.o debugfs.o
 obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/
 obj-$(CONFIG_CRYPTO_DEV_HISI_TRNG) += trng/
diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
new file mode 100644
index 0000000000000..13bec8b2d7237
--- /dev/null
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -0,0 +1,1097 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 HiSilicon Limited. */
+#include <linux/hisi_acc_qm.h>
+#include "qm_common.h"
+
+#define QM_DFX_BASE			0x0100000
+#define QM_DFX_STATE1			0x0104000
+#define QM_DFX_STATE2			0x01040C8
+#define QM_DFX_COMMON			0x0000
+#define QM_DFX_BASE_LEN			0x5A
+#define QM_DFX_STATE1_LEN		0x2E
+#define QM_DFX_STATE2_LEN		0x11
+#define QM_DFX_COMMON_LEN		0xC3
+#define QM_DFX_REGS_LEN			4UL
+#define QM_DBG_TMP_BUF_LEN		22
+#define CURRENT_FUN_MASK		GENMASK(5, 0)
+#define CURRENT_Q_MASK			GENMASK(31, 16)
+#define QM_SQE_ADDR_MASK		GENMASK(7, 0)
+
+#define QM_DFX_MB_CNT_VF		0x104010
+#define QM_DFX_DB_CNT_VF		0x104020
+#define QM_DFX_SQE_CNT_VF_SQN		0x104030
+#define QM_DFX_CQE_CNT_VF_CQN		0x104040
+#define QM_DFX_QN_SHIFT			16
+#define QM_DFX_CNT_CLR_CE		0x100118
+#define QM_DBG_WRITE_LEN		1024
+
+static const char * const qm_debug_file_name[] = {
+	[CURRENT_QM]   = "current_qm",
+	[CURRENT_Q]    = "current_q",
+	[CLEAR_ENABLE] = "clear_enable",
+};
+
+struct qm_dfx_item {
+	const char *name;
+	u32 offset;
+};
+
+static struct qm_dfx_item qm_dfx_files[] = {
+	{"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
+	{"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
+	{"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)},
+	{"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)},
+	{"mb_err", offsetof(struct qm_dfx, mb_err_cnt)},
+};
+
+#define CNT_CYC_REGS_NUM		10
+static const struct debugfs_reg32 qm_dfx_regs[] = {
+	/* XXX_CNT are reading clear register */
+	{"QM_ECC_1BIT_CNT               ",  0x104000ull},
+	{"QM_ECC_MBIT_CNT               ",  0x104008ull},
+	{"QM_DFX_MB_CNT                 ",  0x104018ull},
+	{"QM_DFX_DB_CNT                 ",  0x104028ull},
+	{"QM_DFX_SQE_CNT                ",  0x104038ull},
+	{"QM_DFX_CQE_CNT                ",  0x104048ull},
+	{"QM_DFX_SEND_SQE_TO_ACC_CNT    ",  0x104050ull},
+	{"QM_DFX_WB_SQE_FROM_ACC_CNT    ",  0x104058ull},
+	{"QM_DFX_ACC_FINISH_CNT         ",  0x104060ull},
+	{"QM_DFX_CQE_ERR_CNT            ",  0x1040b4ull},
+	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
+	{"QM_ECC_1BIT_INF               ",  0x104004ull},
+	{"QM_ECC_MBIT_INF               ",  0x10400cull},
+	{"QM_DFX_ACC_RDY_VLD0           ",  0x1040a0ull},
+	{"QM_DFX_ACC_RDY_VLD1           ",  0x1040a4ull},
+	{"QM_DFX_AXI_RDY_VLD            ",  0x1040a8ull},
+	{"QM_DFX_FF_ST0                 ",  0x1040c8ull},
+	{"QM_DFX_FF_ST1                 ",  0x1040ccull},
+	{"QM_DFX_FF_ST2                 ",  0x1040d0ull},
+	{"QM_DFX_FF_ST3                 ",  0x1040d4ull},
+	{"QM_DFX_FF_ST4                 ",  0x1040d8ull},
+	{"QM_DFX_FF_ST5                 ",  0x1040dcull},
+	{"QM_DFX_FF_ST6                 ",  0x1040e0ull},
+	{"QM_IN_IDLE_ST                 ",  0x1040e4ull},
+};
+
+static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
+	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
+};
+
+/* define the QM's dfx regs region and region length */
+static struct dfx_diff_registers qm_diff_regs[] = {
+	{
+		.reg_offset = QM_DFX_BASE,
+		.reg_len = QM_DFX_BASE_LEN,
+	}, {
+		.reg_offset = QM_DFX_STATE1,
+		.reg_len = QM_DFX_STATE1_LEN,
+	}, {
+		.reg_offset = QM_DFX_STATE2,
+		.reg_len = QM_DFX_STATE2_LEN,
+	}, {
+		.reg_offset = QM_DFX_COMMON,
+		.reg_len = QM_DFX_COMMON_LEN,
+	},
+};
+
+static struct hisi_qm *file_to_qm(struct debugfs_file *file)
+{
+	struct qm_debug *debug = file->debug;
+
+	return container_of(debug, struct hisi_qm, debug);
+}
+
+static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
+			   size_t count, loff_t *pos)
+{
+	char buf[QM_DBG_READ_LEN];
+	int len;
+
+	len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n",
+			"Please echo help to cmd to get help information");
+
+	return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static void dump_show(struct hisi_qm *qm, void *info,
+		     unsigned int info_size, char *info_name)
+{
+	struct device *dev = &qm->pdev->dev;
+	u8 *info_curr = info;
+	u32 i;
+#define BYTE_PER_DW	4
+
+	dev_info(dev, "%s DUMP\n", info_name);
+	for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) {
+		pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW,
+			*(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr));
+	}
+}
+
+static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_sqc *sqc, *sqc_curr;
+	dma_addr_t sqc_dma;
+	u32 qp_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &qp_id);
+	if (ret || qp_id >= qm->qp_num) {
+		dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
+		return -EINVAL;
+	}
+
+	sqc = hisi_qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
+	if (IS_ERR(sqc))
+		return PTR_ERR(sqc);
+
+	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 1);
+	if (ret) {
+		down_read(&qm->qps_lock);
+		if (qm->sqc) {
+			sqc_curr = qm->sqc + qp_id;
+
+			dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC");
+		}
+		up_read(&qm->qps_lock);
+
+		goto free_ctx;
+	}
+
+	dump_show(qm, sqc, sizeof(*sqc), "SQC");
+
+free_ctx:
+	hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
+	return 0;
+}
+
+static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_cqc *cqc, *cqc_curr;
+	dma_addr_t cqc_dma;
+	u32 qp_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &qp_id);
+	if (ret || qp_id >= qm->qp_num) {
+		dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
+		return -EINVAL;
+	}
+
+	cqc = hisi_qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
+	if (IS_ERR(cqc))
+		return PTR_ERR(cqc);
+
+	ret = hisi_qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 1);
+	if (ret) {
+		down_read(&qm->qps_lock);
+		if (qm->cqc) {
+			cqc_curr = qm->cqc + qp_id;
+
+			dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC");
+		}
+		up_read(&qm->qps_lock);
+
+		goto free_ctx;
+	}
+
+	dump_show(qm, cqc, sizeof(*cqc), "CQC");
+
+free_ctx:
+	hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
+	return 0;
+}
+
+static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
+			    int cmd, char *name)
+{
+	struct device *dev = &qm->pdev->dev;
+	dma_addr_t xeqc_dma;
+	void *xeqc;
+	int ret;
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma);
+	if (IS_ERR(xeqc))
+		return PTR_ERR(xeqc);
+
+	ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
+	if (ret)
+		goto err_free_ctx;
+
+	dump_show(qm, xeqc, size, name);
+
+err_free_ctx:
+	hisi_qm_ctx_free(qm, size, xeqc, &xeqc_dma);
+	return ret;
+}
+
+static int q_dump_param_parse(struct hisi_qm *qm, char *s,
+			      u32 *e_id, u32 *q_id, u16 q_depth)
+{
+	struct device *dev = &qm->pdev->dev;
+	unsigned int qp_num = qm->qp_num;
+	char *presult;
+	int ret;
+
+	presult = strsep(&s, " ");
+	if (!presult) {
+		dev_err(dev, "Please input qp number!\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(presult, 0, q_id);
+	if (ret || *q_id >= qp_num) {
+		dev_err(dev, "Please input qp num (0-%u)", qp_num - 1);
+		return -EINVAL;
+	}
+
+	presult = strsep(&s, " ");
+	if (!presult) {
+		dev_err(dev, "Please input sqe number!\n");
+		return -EINVAL;
+	}
+
+	ret = kstrtou32(presult, 0, e_id);
+	if (ret || *e_id >= q_depth) {
+		dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1);
+		return -EINVAL;
+	}
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int qm_sq_dump(struct hisi_qm *qm, char *s)
+{
+	u16 sq_depth = qm->qp_array->cq_depth;
+	void *sqe, *sqe_curr;
+	struct hisi_qp *qp;
+	u32 qp_id, sqe_id;
+	int ret;
+
+	ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth);
+	if (ret)
+		return ret;
+
+	sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL);
+	if (!sqe)
+		return -ENOMEM;
+
+	qp = &qm->qp_array[qp_id];
+	memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth);
+	sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
+	memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
+	       qm->debug.sqe_mask_len);
+
+	dump_show(qm, sqe_curr, qm->sqe_size, "SQE");
+
+	kfree(sqe);
+
+	return 0;
+}
+
+static int qm_cq_dump(struct hisi_qm *qm, char *s)
+{
+	struct qm_cqe *cqe_curr;
+	struct hisi_qp *qp;
+	u32 qp_id, cqe_id;
+	int ret;
+
+	ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth);
+	if (ret)
+		return ret;
+
+	qp = &qm->qp_array[qp_id];
+	cqe_curr = qp->cqe + cqe_id;
+	dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE");
+
+	return 0;
+}
+
+static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
+			  size_t size, char *name)
+{
+	struct device *dev = &qm->pdev->dev;
+	void *xeqe;
+	u32 xeqe_id;
+	int ret;
+
+	if (!s)
+		return -EINVAL;
+
+	ret = kstrtou32(s, 0, &xeqe_id);
+	if (ret)
+		return -EINVAL;
+
+	if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) {
+		dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1);
+		return -EINVAL;
+	} else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) {
+		dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1);
+		return -EINVAL;
+	}
+
+	down_read(&qm->qps_lock);
+
+	if (qm->eqe && !strcmp(name, "EQE")) {
+		xeqe = qm->eqe + xeqe_id;
+	} else if (qm->aeqe && !strcmp(name, "AEQE")) {
+		xeqe = qm->aeqe + xeqe_id;
+	} else {
+		ret = -EINVAL;
+		goto err_unlock;
+	}
+
+	dump_show(qm, xeqe, size, name);
+
+err_unlock:
+	up_read(&qm->qps_lock);
+	return ret;
+}
+
+static int qm_dbg_help(struct hisi_qm *qm, char *s)
+{
+	struct device *dev = &qm->pdev->dev;
+
+	if (strsep(&s, " ")) {
+		dev_err(dev, "Please do not input extra characters!\n");
+		return -EINVAL;
+	}
+
+	dev_info(dev, "available commands:\n");
+	dev_info(dev, "sqc <num>\n");
+	dev_info(dev, "cqc <num>\n");
+	dev_info(dev, "eqc\n");
+	dev_info(dev, "aeqc\n");
+	dev_info(dev, "sq <num> <e>\n");
+	dev_info(dev, "cq <num> <e>\n");
+	dev_info(dev, "eq <e>\n");
+	dev_info(dev, "aeq <e>\n");
+
+	return 0;
+}
+
+static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
+{
+	struct device *dev = &qm->pdev->dev;
+	char *presult, *s, *s_tmp;
+	int ret;
+
+	s = kstrdup(cmd_buf, GFP_KERNEL);
+	if (!s)
+		return -ENOMEM;
+
+	s_tmp = s;
+	presult = strsep(&s, " ");
+	if (!presult) {
+		ret = -EINVAL;
+		goto err_buffer_free;
+	}
+
+	if (!strcmp(presult, "sqc"))
+		ret = qm_sqc_dump(qm, s);
+	else if (!strcmp(presult, "cqc"))
+		ret = qm_cqc_dump(qm, s);
+	else if (!strcmp(presult, "eqc"))
+		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc),
+				       QM_MB_CMD_EQC, "EQC");
+	else if (!strcmp(presult, "aeqc"))
+		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc),
+				       QM_MB_CMD_AEQC, "AEQC");
+	else if (!strcmp(presult, "sq"))
+		ret = qm_sq_dump(qm, s);
+	else if (!strcmp(presult, "cq"))
+		ret = qm_cq_dump(qm, s);
+	else if (!strcmp(presult, "eq"))
+		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE");
+	else if (!strcmp(presult, "aeq"))
+		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE");
+	else if (!strcmp(presult, "help"))
+		ret = qm_dbg_help(qm, s);
+	else
+		ret = -EINVAL;
+
+	if (ret)
+		dev_info(dev, "Please echo help\n");
+
+err_buffer_free:
+	kfree(s_tmp);
+
+	return ret;
+}
+
+static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
+			    size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char *cmd_buf, *cmd_buf_tmp;
+	int ret;
+
+	if (*pos)
+		return 0;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return ret;
+
+	/* Judge if the instance is being reset. */
+	if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) {
+		ret = 0;
+		goto put_dfx_access;
+	}
+
+	if (count > QM_DBG_WRITE_LEN) {
+		ret = -ENOSPC;
+		goto put_dfx_access;
+	}
+
+	cmd_buf = memdup_user_nul(buffer, count);
+	if (IS_ERR(cmd_buf)) {
+		ret = PTR_ERR(cmd_buf);
+		goto put_dfx_access;
+	}
+
+	cmd_buf_tmp = strchr(cmd_buf, '\n');
+	if (cmd_buf_tmp) {
+		*cmd_buf_tmp = '\0';
+		count = cmd_buf_tmp - cmd_buf + 1;
+	}
+
+	ret = qm_cmd_write_dump(qm, cmd_buf);
+	if (ret) {
+		kfree(cmd_buf);
+		goto put_dfx_access;
+	}
+
+	kfree(cmd_buf);
+
+	ret = count;
+
+put_dfx_access:
+	hisi_qm_put_dfx_access(qm);
+	return ret;
+}
+
+static const struct file_operations qm_cmd_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_cmd_read,
+	.write = qm_cmd_write,
+};
+
+/**
+ * hisi_qm_regs_dump() - Dump registers's value.
+ * @s: debugfs file handle.
+ * @regset: accelerator registers information.
+ *
+ * Dump accelerator registers.
+ */
+void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
+{
+	struct pci_dev *pdev = to_pci_dev(regset->dev);
+	struct hisi_qm *qm = pci_get_drvdata(pdev);
+	const struct debugfs_reg32 *regs = regset->regs;
+	int regs_len = regset->nregs;
+	int i, ret;
+	u32 val;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return;
+
+	for (i = 0; i < regs_len; i++) {
+		val = readl(regset->base + regs[i].offset);
+		seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
+	}
+
+	hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
+
+static int qm_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+	struct debugfs_regset32 regset;
+
+	if (qm->fun_type == QM_HW_PF) {
+		regset.regs = qm_dfx_regs;
+		regset.nregs = ARRAY_SIZE(qm_dfx_regs);
+	} else {
+		regset.regs = qm_vf_dfx_regs;
+		regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
+	}
+
+	regset.base = qm->io_base;
+	regset.dev = &qm->pdev->dev;
+
+	hisi_qm_regs_dump(s, &regset);
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qm_regs);
+
+static u32 current_q_read(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
+}
+
+static int current_q_write(struct hisi_qm *qm, u32 val)
+{
+	u32 tmp;
+
+	if (val >= qm->debug.curr_qm_qp_num)
+		return -EINVAL;
+
+	tmp = val << QM_DFX_QN_SHIFT |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val << QM_DFX_QN_SHIFT |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return 0;
+}
+
+static u32 clear_enable_read(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+
+/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
+static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
+{
+	if (rd_clr_ctrl > 1)
+		return -EINVAL;
+
+	writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+	return 0;
+}
+
+static u32 current_qm_read(struct hisi_qm *qm)
+{
+	return readl(qm->io_base + QM_DFX_MB_CNT_VF);
+}
+
+static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num)
+{
+	u32 remain_q_num, vfq_num;
+	u32 num_vfs = qm->vfs_num;
+
+	vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs;
+	if (vfq_num >= qm->max_qp_num)
+		return qm->max_qp_num;
+
+	remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs;
+	if (vfq_num + remain_q_num <= qm->max_qp_num)
+		return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num;
+
+	/*
+	 * if vfq_num + remain_q_num > max_qp_num, the last VFs,
+	 * each with one more queue.
+	 */
+	return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num;
+}
+
+static int current_qm_write(struct hisi_qm *qm, u32 val)
+{
+	u32 tmp;
+
+	if (val > qm->vfs_num)
+		return -EINVAL;
+
+	/* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
+	if (!val)
+		qm->debug.curr_qm_qp_num = qm->qp_num;
+	else
+		qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val);
+
+	writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return 0;
+}
+
+static ssize_t qm_debug_read(struct file *filp, char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct debugfs_file *file = filp->private_data;
+	enum qm_debug_file index = file->index;
+	struct hisi_qm *qm = file_to_qm(file);
+	char tbuf[QM_DBG_TMP_BUF_LEN];
+	u32 val;
+	int ret;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return ret;
+
+	mutex_lock(&file->lock);
+	switch (index) {
+	case CURRENT_QM:
+		val = current_qm_read(qm);
+		break;
+	case CURRENT_Q:
+		val = current_q_read(qm);
+		break;
+	case CLEAR_ENABLE:
+		val = clear_enable_read(qm);
+		break;
+	default:
+		goto err_input;
+	}
+	mutex_unlock(&file->lock);
+
+	hisi_qm_put_dfx_access(qm);
+	ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
+	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_input:
+	mutex_unlock(&file->lock);
+	hisi_qm_put_dfx_access(qm);
+	return -EINVAL;
+}
+
+static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
+			      size_t count, loff_t *pos)
+{
+	struct debugfs_file *file = filp->private_data;
+	enum qm_debug_file index = file->index;
+	struct hisi_qm *qm = file_to_qm(file);
+	unsigned long val;
+	char tbuf[QM_DBG_TMP_BUF_LEN];
+	int len, ret;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= QM_DBG_TMP_BUF_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf,
+				     count);
+	if (len < 0)
+		return len;
+
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return ret;
+
+	mutex_lock(&file->lock);
+	switch (index) {
+	case CURRENT_QM:
+		ret = current_qm_write(qm, val);
+		break;
+	case CURRENT_Q:
+		ret = current_q_write(qm, val);
+		break;
+	case CLEAR_ENABLE:
+		ret = clear_enable_write(qm, val);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	mutex_unlock(&file->lock);
+
+	hisi_qm_put_dfx_access(qm);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static const struct file_operations qm_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_debug_read,
+	.write = qm_debug_write,
+};
+
+static void dfx_regs_uninit(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, int reg_len)
+{
+	int i;
+
+	/* Setting the pointer is NULL to prevent double free */
+	for (i = 0; i < reg_len; i++) {
+		kfree(dregs[i].regs);
+		dregs[i].regs = NULL;
+	}
+	kfree(dregs);
+}
+
+static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
+	const struct dfx_diff_registers *cregs, u32 reg_len)
+{
+	struct dfx_diff_registers *diff_regs;
+	u32 j, base_offset;
+	int i;
+
+	diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
+	if (!diff_regs)
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < reg_len; i++) {
+		if (!cregs[i].reg_len)
+			continue;
+
+		diff_regs[i].reg_offset = cregs[i].reg_offset;
+		diff_regs[i].reg_len = cregs[i].reg_len;
+		diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
+					 GFP_KERNEL);
+		if (!diff_regs[i].regs)
+			goto alloc_error;
+
+		for (j = 0; j < diff_regs[i].reg_len; j++) {
+			base_offset = diff_regs[i].reg_offset +
+					j * QM_DFX_REGS_LEN;
+			diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
+		}
+	}
+
+	return diff_regs;
+
+alloc_error:
+	while (i > 0) {
+		i--;
+		kfree(diff_regs[i].regs);
+	}
+	kfree(diff_regs);
+	return ERR_PTR(-ENOMEM);
+}
+
+static int qm_diff_regs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, u32 reg_len)
+{
+	qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+	if (IS_ERR(qm->debug.qm_diff_regs))
+		return PTR_ERR(qm->debug.qm_diff_regs);
+
+	qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
+	if (IS_ERR(qm->debug.acc_diff_regs)) {
+		dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+		return PTR_ERR(qm->debug.acc_diff_regs);
+	}
+
+	return 0;
+}
+
+static void qm_last_regs_uninit(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+
+	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+		return;
+
+	kfree(debug->qm_last_words);
+	debug->qm_last_words = NULL;
+}
+
+static int qm_last_regs_init(struct hisi_qm *qm)
+{
+	int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
+	struct qm_debug *debug = &qm->debug;
+	int i;
+
+	if (qm->fun_type == QM_HW_VF)
+		return 0;
+
+	debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
+	if (!debug->qm_last_words)
+		return -ENOMEM;
+
+	for (i = 0; i < dfx_regs_num; i++) {
+		debug->qm_last_words[i] = readl_relaxed(qm->io_base +
+			qm_dfx_regs[i].offset);
+	}
+
+	return 0;
+}
+
+static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len)
+{
+	dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
+	dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
+}
+
+/**
+ * hisi_qm_regs_debugfs_init() - Allocate memory for registers.
+ * @qm: device qm handle.
+ * @dregs: diff registers handle.
+ * @reg_len: diff registers region length.
+ */
+int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
+		struct dfx_diff_registers *dregs, u32 reg_len)
+{
+	int ret;
+
+	if (!qm || !dregs)
+		return -EINVAL;
+
+	if (qm->fun_type != QM_HW_PF)
+		return 0;
+
+	ret = qm_last_regs_init(qm);
+	if (ret) {
+		dev_info(&qm->pdev->dev, "failed to init qm words memory!\n");
+		return ret;
+	}
+
+	ret = qm_diff_regs_init(qm, dregs, reg_len);
+	if (ret) {
+		qm_last_regs_uninit(qm);
+		return ret;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init);
+
+/**
+ * hisi_qm_regs_debugfs_uninit() - Free memory for registers.
+ * @qm: device qm handle.
+ * @reg_len: diff registers region length.
+ */
+void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len)
+{
+	if (!qm || qm->fun_type != QM_HW_PF)
+		return;
+
+	qm_diff_regs_uninit(qm, reg_len);
+	qm_last_regs_uninit(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit);
+
+/**
+ * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
+ * @qm: device qm handle.
+ * @s: Debugfs file handle.
+ * @dregs: diff registers handle.
+ * @regs_len: diff registers region length.
+ */
+void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
+	struct dfx_diff_registers *dregs, u32 regs_len)
+{
+	u32 j, val, base_offset;
+	int i, ret;
+
+	if (!qm || !s || !dregs)
+		return;
+
+	ret = hisi_qm_get_dfx_access(qm);
+	if (ret)
+		return;
+
+	down_read(&qm->qps_lock);
+	for (i = 0; i < regs_len; i++) {
+		if (!dregs[i].reg_len)
+			continue;
+
+		for (j = 0; j < dregs[i].reg_len; j++) {
+			base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
+			val = readl(qm->io_base + base_offset);
+			if (val != dregs[i].regs[j])
+				seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
+					   base_offset, dregs[i].regs[j], val);
+		}
+	}
+	up_read(&qm->qps_lock);
+
+	hisi_qm_put_dfx_access(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
+
+void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm)
+{
+	struct qm_debug *debug = &qm->debug;
+	struct pci_dev *pdev = qm->pdev;
+	u32 val;
+	int i;
+
+	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
+		val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
+		if (debug->qm_last_words[i] != val)
+			pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
+			qm_dfx_regs[i].name, debug->qm_last_words[i], val);
+	}
+}
+
+static int qm_diff_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+
+	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
+					ARRAY_SIZE(qm_diff_regs));
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
+
+static ssize_t qm_status_read(struct file *filp, char __user *buffer,
+			      size_t count, loff_t *pos)
+{
+	struct hisi_qm *qm = filp->private_data;
+	char buf[QM_DBG_READ_LEN];
+	int val, len;
+
+	val = atomic_read(&qm->status.flags);
+	len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
+
+	return simple_read_from_buffer(buffer, count, pos, buf, len);
+}
+
+static const struct file_operations qm_status_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_status_read,
+};
+
+static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
+				   enum qm_debug_file index)
+{
+	struct debugfs_file *file = qm->debug.files + index;
+
+	debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
+			    &qm_debug_fops);
+
+	file->index = index;
+	mutex_init(&file->lock);
+	file->debug = &qm->debug;
+}
+
+static int qm_debugfs_atomic64_set(void *data, u64 val)
+{
+	if (val)
+		return -EINVAL;
+
+	atomic64_set((atomic64_t *)data, 0);
+
+	return 0;
+}
+
+static int qm_debugfs_atomic64_get(void *data, u64 *val)
+{
+	*val = atomic64_read((atomic64_t *)data);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
+			 qm_debugfs_atomic64_set, "%llu\n");
+
+/**
+ * hisi_qm_debug_init() - Initialize qm related debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create qm related debugfs files.
+ */
+void hisi_qm_debug_init(struct hisi_qm *qm)
+{
+	struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
+	struct qm_dfx *dfx = &qm->debug.dfx;
+	struct dentry *qm_d;
+	void *data;
+	int i;
+
+	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
+	qm->debug.qm_d = qm_d;
+
+	/* only show this in PF */
+	if (qm->fun_type == QM_HW_PF) {
+		qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
+		for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
+			qm_create_debugfs_file(qm, qm->debug.qm_d, i);
+	}
+
+	if (qm_regs)
+		debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
+					qm, &qm_diff_regs_fops);
+
+	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+
+	debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
+
+	debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
+			&qm_status_fops);
+	for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
+		data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
+		debugfs_create_file(qm_dfx_files[i].name,
+			0644,
+			qm_d,
+			data,
+			&qm_atomic64_ops);
+	}
+
+	if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
+		hisi_qm_set_algqos_init(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
+
+/**
+ * hisi_qm_debug_regs_clear() - clear qm debug related registers.
+ * @qm: The qm for which we want to clear its debug registers.
+ */
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
+{
+	const struct debugfs_reg32 *regs;
+	int i;
+
+	/* clear current_qm */
+	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	/* clear current_q */
+	writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+	writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	/*
+	 * these registers are reading and clearing, so clear them after
+	 * reading them.
+	 */
+	writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+	regs = qm_dfx_regs;
+	for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
+		readl(qm->io_base + regs->offset);
+		regs++;
+	}
+
+	/* clear clear_enable */
+	writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 441466df7c6d5..36d70b9f6117c 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <uapi/misc/uacce/hisi_qm.h>
 #include <linux/hisi_acc_qm.h>
+#include "qm_common.h"
 
 /* eq/aeq irq enable */
 #define QM_VF_AEQ_INT_SOURCE		0x0
@@ -119,8 +120,6 @@
 #define QM_SQC_VFT_NUM_SHIFT_V2		45
 #define QM_SQC_VFT_NUM_MASK_v2		GENMASK(9, 0)
 
-#define QM_DFX_CNT_CLR_CE		0x100118
-
 #define QM_ABNORMAL_INT_SOURCE		0x100000
 #define QM_ABNORMAL_INT_MASK		0x100004
 #define QM_ABNORMAL_INT_MASK_VALUE	0x7fff
@@ -187,14 +186,6 @@
 #define QM_VF_RESET_WAIT_TIMEOUT_US    \
 	(QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT)
 
-#define QM_DFX_MB_CNT_VF		0x104010
-#define QM_DFX_DB_CNT_VF		0x104020
-#define QM_DFX_SQE_CNT_VF_SQN		0x104030
-#define QM_DFX_CQE_CNT_VF_CQN		0x104040
-#define QM_DFX_QN_SHIFT			16
-#define CURRENT_FUN_MASK		GENMASK(5, 0)
-#define CURRENT_Q_MASK			GENMASK(31, 16)
-
 #define POLL_PERIOD			10
 #define POLL_TIMEOUT			1000
 #define WAIT_PERIOD_US_MAX		200
@@ -211,19 +202,15 @@
 #define QMC_ALIGN(sz)			ALIGN(sz, 32)
 
 #define QM_DBG_READ_LEN		256
-#define QM_DBG_WRITE_LEN		1024
-#define QM_DBG_TMP_BUF_LEN		22
 #define QM_PCI_COMMAND_INVALID		~0
 #define QM_RESET_STOP_TX_OFFSET		1
 #define QM_RESET_STOP_RX_OFFSET		2
 
 #define WAIT_PERIOD			20
 #define REMOVE_WAIT_DELAY		10
-#define QM_SQE_ADDR_MASK		GENMASK(7, 0)
 
 #define QM_DRIVER_REMOVING		0
 #define QM_RST_SCHED			1
-#define QM_RESETTING			2
 #define QM_QOS_PARAM_NUM		2
 #define QM_QOS_VAL_NUM			1
 #define QM_QOS_BDF_PARAM_NUM		4
@@ -250,15 +237,6 @@
 #define QM_QOS_MIN_CIR_B		100
 #define QM_QOS_MAX_CIR_U		6
 #define QM_QOS_MAX_CIR_S		11
-#define QM_DFX_BASE		0x0100000
-#define QM_DFX_STATE1		0x0104000
-#define QM_DFX_STATE2		0x01040C8
-#define QM_DFX_COMMON		0x0000
-#define QM_DFX_BASE_LEN		0x5A
-#define QM_DFX_STATE1_LEN		0x2E
-#define QM_DFX_STATE2_LEN		0x11
-#define QM_DFX_COMMON_LEN		0xC3
-#define QM_DFX_REGS_LEN		4UL
 #define QM_AUTOSUSPEND_DELAY		3000
 
 #define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
@@ -368,73 +346,6 @@ static const struct hisi_qm_cap_info qm_basic_info[] = {
 	{QM_VF_IRQ_NUM_CAP,     0x311c,   0,  GENMASK(15, 0), 0x1,       0x2,       0x3},
 };
 
-struct qm_cqe {
-	__le32 rsvd0;
-	__le16 cmd_id;
-	__le16 rsvd1;
-	__le16 sq_head;
-	__le16 sq_num;
-	__le16 rsvd2;
-	__le16 w7;
-};
-
-struct qm_eqe {
-	__le32 dw0;
-};
-
-struct qm_aeqe {
-	__le32 dw0;
-};
-
-struct qm_sqc {
-	__le16 head;
-	__le16 tail;
-	__le32 base_l;
-	__le32 base_h;
-	__le32 dw3;
-	__le16 w8;
-	__le16 rsvd0;
-	__le16 pasid;
-	__le16 w11;
-	__le16 cq_num;
-	__le16 w13;
-	__le32 rsvd1;
-};
-
-struct qm_cqc {
-	__le16 head;
-	__le16 tail;
-	__le32 base_l;
-	__le32 base_h;
-	__le32 dw3;
-	__le16 w8;
-	__le16 rsvd0;
-	__le16 pasid;
-	__le16 w11;
-	__le32 dw6;
-	__le32 rsvd1;
-};
-
-struct qm_eqc {
-	__le16 head;
-	__le16 tail;
-	__le32 base_l;
-	__le32 base_h;
-	__le32 dw3;
-	__le32 rsvd[2];
-	__le32 dw6;
-};
-
-struct qm_aeqc {
-	__le16 head;
-	__le16 tail;
-	__le32 base_l;
-	__le32 base_h;
-	__le32 dw3;
-	__le32 rsvd[2];
-	__le32 dw6;
-};
-
 struct qm_mailbox {
 	__le16 w0;
 	__le16 queue_num;
@@ -467,25 +378,6 @@ struct hisi_qm_hw_ops {
 	int (*set_msi)(struct hisi_qm *qm, bool set);
 };
 
-struct qm_dfx_item {
-	const char *name;
-	u32 offset;
-};
-
-static struct qm_dfx_item qm_dfx_files[] = {
-	{"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
-	{"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
-	{"abnormal_irq", offsetof(struct qm_dfx, abnormal_irq_cnt)},
-	{"create_qp_err", offsetof(struct qm_dfx, create_qp_err_cnt)},
-	{"mb_err", offsetof(struct qm_dfx, mb_err_cnt)},
-};
-
-static const char * const qm_debug_file_name[] = {
-	[CURRENT_QM]   = "current_qm",
-	[CURRENT_Q]    = "current_q",
-	[CLEAR_ENABLE] = "clear_enable",
-};
-
 struct hisi_qm_hw_error {
 	u32 int_msk;
 	const char *msg;
@@ -510,23 +402,6 @@ static const struct hisi_qm_hw_error qm_hw_error[] = {
 	{ /* sentinel */ }
 };
 
-/* define the QM's dfx regs region and region length */
-static struct dfx_diff_registers qm_diff_regs[] = {
-	{
-		.reg_offset = QM_DFX_BASE,
-		.reg_len = QM_DFX_BASE_LEN,
-	}, {
-		.reg_offset = QM_DFX_STATE1,
-		.reg_len = QM_DFX_STATE1_LEN,
-	}, {
-		.reg_offset = QM_DFX_STATE2,
-		.reg_len = QM_DFX_STATE2_LEN,
-	}, {
-		.reg_offset = QM_DFX_COMMON,
-		.reg_len = QM_DFX_COMMON_LEN,
-	},
-};
-
 static const char * const qm_db_timeout[] = {
 	"sq", "cq", "eq", "aeq",
 };
@@ -535,10 +410,6 @@ static const char * const qm_fifo_overflow[] = {
 	"cq", "eq", "aeq",
 };
 
-static const char * const qm_s[] = {
-	"init", "start", "close", "stop",
-};
-
 static const char * const qp_s[] = {
 	"none", "init", "start", "stop", "close",
 };
@@ -1332,1046 +1203,150 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
 				(QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) |
 				(factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT);
 			}
-			break;
-		}
-	}
-
-	writel(lower_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_L);
-	writel(upper_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_H);
-}
-
-static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
-			     u32 fun_num, u32 base, u32 number)
-{
-	struct qm_shaper_factor *factor = NULL;
-	unsigned int val;
-	int ret;
-
-	if (type == SHAPER_VFT && test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
-		factor = &qm->factor[fun_num];
-
-	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
-					 val & BIT(0), POLL_PERIOD,
-					 POLL_TIMEOUT);
-	if (ret)
-		return ret;
-
-	writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR);
-	writel(type, qm->io_base + QM_VFT_CFG_TYPE);
-	if (type == SHAPER_VFT)
-		fun_num |= base << QM_SHAPER_VFT_OFFSET;
-
-	writel(fun_num, qm->io_base + QM_VFT_CFG);
-
-	qm_vft_data_cfg(qm, type, base, number, factor);
-
-	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
-	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
-
-	return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
-					  val & BIT(0), POLL_PERIOD,
-					  POLL_TIMEOUT);
-}
-
-static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num)
-{
-	u32 qos = qm->factor[fun_num].func_qos;
-	int ret, i;
-
-	ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]);
-	if (ret) {
-		dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n");
-		return ret;
-	}
-	writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG);
-	for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
-		/* The base number of queue reuse for different alg type */
-		ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-/* The config should be conducted after qm_dev_mem_reset() */
-static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
-			      u32 number)
-{
-	int ret, i;
-
-	for (i = SQC_VFT; i <= CQC_VFT; i++) {
-		ret = qm_set_vft_common(qm, i, fun_num, base, number);
-		if (ret)
-			return ret;
-	}
-
-	/* init default shaper qos val */
-	if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) {
-		ret = qm_shaper_init_vft(qm, fun_num);
-		if (ret)
-			goto back_sqc_cqc;
-	}
-
-	return 0;
-back_sqc_cqc:
-	for (i = SQC_VFT; i <= CQC_VFT; i++)
-		qm_set_vft_common(qm, i, fun_num, 0, 0);
-
-	return ret;
-}
-
-static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
-{
-	u64 sqc_vft;
-	int ret;
-
-	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
-	if (ret)
-		return ret;
-
-	sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
-		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
-	*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
-	*number = (QM_SQC_VFT_NUM_MASK_v2 &
-		   (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
-
-	return 0;
-}
-
-static int qm_get_vf_qp_num(struct hisi_qm *qm, u32 fun_num)
-{
-	u32 remain_q_num, vfq_num;
-	u32 num_vfs = qm->vfs_num;
-
-	vfq_num = (qm->ctrl_qp_num - qm->qp_num) / num_vfs;
-	if (vfq_num >= qm->max_qp_num)
-		return qm->max_qp_num;
-
-	remain_q_num = (qm->ctrl_qp_num - qm->qp_num) % num_vfs;
-	if (vfq_num + remain_q_num <= qm->max_qp_num)
-		return fun_num == num_vfs ? vfq_num + remain_q_num : vfq_num;
-
-	/*
-	 * if vfq_num + remain_q_num > max_qp_num, the last VFs,
-	 * each with one more queue.
-	 */
-	return fun_num + remain_q_num > num_vfs ? vfq_num + 1 : vfq_num;
-}
-
-static struct hisi_qm *file_to_qm(struct debugfs_file *file)
-{
-	struct qm_debug *debug = file->debug;
-
-	return container_of(debug, struct hisi_qm, debug);
-}
-
-static u32 current_q_read(struct hisi_qm *qm)
-{
-	return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
-}
-
-static int current_q_write(struct hisi_qm *qm, u32 val)
-{
-	u32 tmp;
-
-	if (val >= qm->debug.curr_qm_qp_num)
-		return -EINVAL;
-
-	tmp = val << QM_DFX_QN_SHIFT |
-	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK);
-	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-
-	tmp = val << QM_DFX_QN_SHIFT |
-	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK);
-	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
-	return 0;
-}
-
-static u32 clear_enable_read(struct hisi_qm *qm)
-{
-	return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-
-/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
-static int clear_enable_write(struct hisi_qm *qm, u32 rd_clr_ctrl)
-{
-	if (rd_clr_ctrl > 1)
-		return -EINVAL;
-
-	writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE);
-
-	return 0;
-}
-
-static u32 current_qm_read(struct hisi_qm *qm)
-{
-	return readl(qm->io_base + QM_DFX_MB_CNT_VF);
-}
-
-static int current_qm_write(struct hisi_qm *qm, u32 val)
-{
-	u32 tmp;
-
-	if (val > qm->vfs_num)
-		return -EINVAL;
-
-	/* According PF or VF Dev ID to calculation curr_qm_qp_num and store */
-	if (!val)
-		qm->debug.curr_qm_qp_num = qm->qp_num;
-	else
-		qm->debug.curr_qm_qp_num = qm_get_vf_qp_num(qm, val);
-
-	writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
-	writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
-
-	tmp = val |
-	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
-	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-
-	tmp = val |
-	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
-	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
-	return 0;
-}
-
-static ssize_t qm_debug_read(struct file *filp, char __user *buf,
-			     size_t count, loff_t *pos)
-{
-	struct debugfs_file *file = filp->private_data;
-	enum qm_debug_file index = file->index;
-	struct hisi_qm *qm = file_to_qm(file);
-	char tbuf[QM_DBG_TMP_BUF_LEN];
-	u32 val;
-	int ret;
-
-	ret = hisi_qm_get_dfx_access(qm);
-	if (ret)
-		return ret;
-
-	mutex_lock(&file->lock);
-	switch (index) {
-	case CURRENT_QM:
-		val = current_qm_read(qm);
-		break;
-	case CURRENT_Q:
-		val = current_q_read(qm);
-		break;
-	case CLEAR_ENABLE:
-		val = clear_enable_read(qm);
-		break;
-	default:
-		goto err_input;
-	}
-	mutex_unlock(&file->lock);
-
-	hisi_qm_put_dfx_access(qm);
-	ret = scnprintf(tbuf, QM_DBG_TMP_BUF_LEN, "%u\n", val);
-	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
-
-err_input:
-	mutex_unlock(&file->lock);
-	hisi_qm_put_dfx_access(qm);
-	return -EINVAL;
-}
-
-static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
-			      size_t count, loff_t *pos)
-{
-	struct debugfs_file *file = filp->private_data;
-	enum qm_debug_file index = file->index;
-	struct hisi_qm *qm = file_to_qm(file);
-	unsigned long val;
-	char tbuf[QM_DBG_TMP_BUF_LEN];
-	int len, ret;
-
-	if (*pos != 0)
-		return 0;
-
-	if (count >= QM_DBG_TMP_BUF_LEN)
-		return -ENOSPC;
-
-	len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf,
-				     count);
-	if (len < 0)
-		return len;
-
-	tbuf[len] = '\0';
-	if (kstrtoul(tbuf, 0, &val))
-		return -EFAULT;
-
-	ret = hisi_qm_get_dfx_access(qm);
-	if (ret)
-		return ret;
-
-	mutex_lock(&file->lock);
-	switch (index) {
-	case CURRENT_QM:
-		ret = current_qm_write(qm, val);
-		break;
-	case CURRENT_Q:
-		ret = current_q_write(qm, val);
-		break;
-	case CLEAR_ENABLE:
-		ret = clear_enable_write(qm, val);
-		break;
-	default:
-		ret = -EINVAL;
-	}
-	mutex_unlock(&file->lock);
-
-	hisi_qm_put_dfx_access(qm);
-
-	if (ret)
-		return ret;
-
-	return count;
-}
-
-static const struct file_operations qm_debug_fops = {
-	.owner = THIS_MODULE,
-	.open = simple_open,
-	.read = qm_debug_read,
-	.write = qm_debug_write,
-};
-
-#define CNT_CYC_REGS_NUM		10
-static const struct debugfs_reg32 qm_dfx_regs[] = {
-	/* XXX_CNT are reading clear register */
-	{"QM_ECC_1BIT_CNT               ",  0x104000ull},
-	{"QM_ECC_MBIT_CNT               ",  0x104008ull},
-	{"QM_DFX_MB_CNT                 ",  0x104018ull},
-	{"QM_DFX_DB_CNT                 ",  0x104028ull},
-	{"QM_DFX_SQE_CNT                ",  0x104038ull},
-	{"QM_DFX_CQE_CNT                ",  0x104048ull},
-	{"QM_DFX_SEND_SQE_TO_ACC_CNT    ",  0x104050ull},
-	{"QM_DFX_WB_SQE_FROM_ACC_CNT    ",  0x104058ull},
-	{"QM_DFX_ACC_FINISH_CNT         ",  0x104060ull},
-	{"QM_DFX_CQE_ERR_CNT            ",  0x1040b4ull},
-	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
-	{"QM_ECC_1BIT_INF               ",  0x104004ull},
-	{"QM_ECC_MBIT_INF               ",  0x10400cull},
-	{"QM_DFX_ACC_RDY_VLD0           ",  0x1040a0ull},
-	{"QM_DFX_ACC_RDY_VLD1           ",  0x1040a4ull},
-	{"QM_DFX_AXI_RDY_VLD            ",  0x1040a8ull},
-	{"QM_DFX_FF_ST0                 ",  0x1040c8ull},
-	{"QM_DFX_FF_ST1                 ",  0x1040ccull},
-	{"QM_DFX_FF_ST2                 ",  0x1040d0ull},
-	{"QM_DFX_FF_ST3                 ",  0x1040d4ull},
-	{"QM_DFX_FF_ST4                 ",  0x1040d8ull},
-	{"QM_DFX_FF_ST5                 ",  0x1040dcull},
-	{"QM_DFX_FF_ST6                 ",  0x1040e0ull},
-	{"QM_IN_IDLE_ST                 ",  0x1040e4ull},
-};
-
-static const struct debugfs_reg32 qm_vf_dfx_regs[] = {
-	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
-};
-
-/**
- * hisi_qm_regs_dump() - Dump registers's value.
- * @s: debugfs file handle.
- * @regset: accelerator registers information.
- *
- * Dump accelerator registers.
- */
-void hisi_qm_regs_dump(struct seq_file *s, struct debugfs_regset32 *regset)
-{
-	struct pci_dev *pdev = to_pci_dev(regset->dev);
-	struct hisi_qm *qm = pci_get_drvdata(pdev);
-	const struct debugfs_reg32 *regs = regset->regs;
-	int regs_len = regset->nregs;
-	int i, ret;
-	u32 val;
-
-	ret = hisi_qm_get_dfx_access(qm);
-	if (ret)
-		return;
-
-	for (i = 0; i < regs_len; i++) {
-		val = readl(regset->base + regs[i].offset);
-		seq_printf(s, "%s= 0x%08x\n", regs[i].name, val);
-	}
-
-	hisi_qm_put_dfx_access(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_regs_dump);
-
-static int qm_regs_show(struct seq_file *s, void *unused)
-{
-	struct hisi_qm *qm = s->private;
-	struct debugfs_regset32 regset;
-
-	if (qm->fun_type == QM_HW_PF) {
-		regset.regs = qm_dfx_regs;
-		regset.nregs = ARRAY_SIZE(qm_dfx_regs);
-	} else {
-		regset.regs = qm_vf_dfx_regs;
-		regset.nregs = ARRAY_SIZE(qm_vf_dfx_regs);
-	}
-
-	regset.base = qm->io_base;
-	regset.dev = &qm->pdev->dev;
-
-	hisi_qm_regs_dump(s, &regset);
-
-	return 0;
-}
-
-DEFINE_SHOW_ATTRIBUTE(qm_regs);
-
-static void dfx_regs_uninit(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, int reg_len)
-{
-	int i;
-
-	/* Setting the pointer is NULL to prevent double free */
-	for (i = 0; i < reg_len; i++) {
-		kfree(dregs[i].regs);
-		dregs[i].regs = NULL;
-	}
-	kfree(dregs);
-}
-
-static struct dfx_diff_registers *dfx_regs_init(struct hisi_qm *qm,
-	const struct dfx_diff_registers *cregs, u32 reg_len)
-{
-	struct dfx_diff_registers *diff_regs;
-	u32 j, base_offset;
-	int i;
-
-	diff_regs = kcalloc(reg_len, sizeof(*diff_regs), GFP_KERNEL);
-	if (!diff_regs)
-		return ERR_PTR(-ENOMEM);
-
-	for (i = 0; i < reg_len; i++) {
-		if (!cregs[i].reg_len)
-			continue;
-
-		diff_regs[i].reg_offset = cregs[i].reg_offset;
-		diff_regs[i].reg_len = cregs[i].reg_len;
-		diff_regs[i].regs = kcalloc(QM_DFX_REGS_LEN, cregs[i].reg_len,
-					 GFP_KERNEL);
-		if (!diff_regs[i].regs)
-			goto alloc_error;
-
-		for (j = 0; j < diff_regs[i].reg_len; j++) {
-			base_offset = diff_regs[i].reg_offset +
-					j * QM_DFX_REGS_LEN;
-			diff_regs[i].regs[j] = readl(qm->io_base + base_offset);
-		}
-	}
-
-	return diff_regs;
-
-alloc_error:
-	while (i > 0) {
-		i--;
-		kfree(diff_regs[i].regs);
-	}
-	kfree(diff_regs);
-	return ERR_PTR(-ENOMEM);
-}
-
-static int qm_diff_regs_init(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, u32 reg_len)
-{
-	qm->debug.qm_diff_regs = dfx_regs_init(qm, qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
-	if (IS_ERR(qm->debug.qm_diff_regs))
-		return PTR_ERR(qm->debug.qm_diff_regs);
-
-	qm->debug.acc_diff_regs = dfx_regs_init(qm, dregs, reg_len);
-	if (IS_ERR(qm->debug.acc_diff_regs)) {
-		dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
-		return PTR_ERR(qm->debug.acc_diff_regs);
-	}
-
-	return 0;
-}
-
-static void qm_last_regs_uninit(struct hisi_qm *qm)
-{
-	struct qm_debug *debug = &qm->debug;
-
-	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
-		return;
-
-	kfree(debug->qm_last_words);
-	debug->qm_last_words = NULL;
-}
-
-static int qm_last_regs_init(struct hisi_qm *qm)
-{
-	int dfx_regs_num = ARRAY_SIZE(qm_dfx_regs);
-	struct qm_debug *debug = &qm->debug;
-	int i;
-
-	if (qm->fun_type == QM_HW_VF)
-		return 0;
-
-	debug->qm_last_words = kcalloc(dfx_regs_num, sizeof(unsigned int), GFP_KERNEL);
-	if (!debug->qm_last_words)
-		return -ENOMEM;
-
-	for (i = 0; i < dfx_regs_num; i++) {
-		debug->qm_last_words[i] = readl_relaxed(qm->io_base +
-			qm_dfx_regs[i].offset);
-	}
-
-	return 0;
-}
-
-static void qm_diff_regs_uninit(struct hisi_qm *qm, u32 reg_len)
-{
-	dfx_regs_uninit(qm, qm->debug.acc_diff_regs, reg_len);
-	dfx_regs_uninit(qm, qm->debug.qm_diff_regs, ARRAY_SIZE(qm_diff_regs));
-}
-
-/**
- * hisi_qm_regs_debugfs_init() - Allocate memory for registers.
- * @qm: device qm handle.
- * @dregs: diff registers handle.
- * @reg_len: diff registers region length.
- */
-int hisi_qm_regs_debugfs_init(struct hisi_qm *qm,
-		struct dfx_diff_registers *dregs, u32 reg_len)
-{
-	int ret;
-
-	if (!qm || !dregs)
-		return -EINVAL;
-
-	if (qm->fun_type != QM_HW_PF)
-		return 0;
-
-	ret = qm_last_regs_init(qm);
-	if (ret) {
-		dev_info(&qm->pdev->dev, "failed to init qm words memory!\n");
-		return ret;
-	}
-
-	ret = qm_diff_regs_init(qm, dregs, reg_len);
-	if (ret) {
-		qm_last_regs_uninit(qm);
-		return ret;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_init);
-
-/**
- * hisi_qm_regs_debugfs_uninit() - Free memory for registers.
- * @qm: device qm handle.
- * @reg_len: diff registers region length.
- */
-void hisi_qm_regs_debugfs_uninit(struct hisi_qm *qm, u32 reg_len)
-{
-	if (!qm || qm->fun_type != QM_HW_PF)
-		return;
-
-	qm_diff_regs_uninit(qm, reg_len);
-	qm_last_regs_uninit(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_regs_debugfs_uninit);
-
-/**
- * hisi_qm_acc_diff_regs_dump() - Dump registers's value.
- * @qm: device qm handle.
- * @s: Debugfs file handle.
- * @dregs: diff registers handle.
- * @regs_len: diff registers region length.
- */
-void hisi_qm_acc_diff_regs_dump(struct hisi_qm *qm, struct seq_file *s,
-	struct dfx_diff_registers *dregs, u32 regs_len)
-{
-	u32 j, val, base_offset;
-	int i, ret;
-
-	if (!qm || !s || !dregs)
-		return;
-
-	ret = hisi_qm_get_dfx_access(qm);
-	if (ret)
-		return;
-
-	down_read(&qm->qps_lock);
-	for (i = 0; i < regs_len; i++) {
-		if (!dregs[i].reg_len)
-			continue;
-
-		for (j = 0; j < dregs[i].reg_len; j++) {
-			base_offset = dregs[i].reg_offset + j * QM_DFX_REGS_LEN;
-			val = readl(qm->io_base + base_offset);
-			if (val != dregs[i].regs[j])
-				seq_printf(s, "0x%08x = 0x%08x ---> 0x%08x\n",
-					   base_offset, dregs[i].regs[j], val);
-		}
-	}
-	up_read(&qm->qps_lock);
-
-	hisi_qm_put_dfx_access(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_acc_diff_regs_dump);
-
-static int qm_diff_regs_show(struct seq_file *s, void *unused)
-{
-	struct hisi_qm *qm = s->private;
-
-	hisi_qm_acc_diff_regs_dump(qm, s, qm->debug.qm_diff_regs,
-					ARRAY_SIZE(qm_diff_regs));
-
-	return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(qm_diff_regs);
-
-static ssize_t qm_cmd_read(struct file *filp, char __user *buffer,
-			   size_t count, loff_t *pos)
-{
-	char buf[QM_DBG_READ_LEN];
-	int len;
-
-	len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n",
-			"Please echo help to cmd to get help information");
-
-	return simple_read_from_buffer(buffer, count, pos, buf, len);
-}
-
-static void *qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
-			  dma_addr_t *dma_addr)
-{
-	struct device *dev = &qm->pdev->dev;
-	void *ctx_addr;
-
-	ctx_addr = kzalloc(ctx_size, GFP_KERNEL);
-	if (!ctx_addr)
-		return ERR_PTR(-ENOMEM);
-
-	*dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE);
-	if (dma_mapping_error(dev, *dma_addr)) {
-		dev_err(dev, "DMA mapping error!\n");
-		kfree(ctx_addr);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	return ctx_addr;
-}
-
-static void qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
-			const void *ctx_addr, dma_addr_t *dma_addr)
-{
-	struct device *dev = &qm->pdev->dev;
-
-	dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE);
-	kfree(ctx_addr);
-}
-
-static void dump_show(struct hisi_qm *qm, void *info,
-		     unsigned int info_size, char *info_name)
-{
-	struct device *dev = &qm->pdev->dev;
-	u8 *info_curr = info;
-	u32 i;
-#define BYTE_PER_DW	4
-
-	dev_info(dev, "%s DUMP\n", info_name);
-	for (i = 0; i < info_size; i += BYTE_PER_DW, info_curr += BYTE_PER_DW) {
-		pr_info("DW%u: %02X%02X %02X%02X\n", i / BYTE_PER_DW,
-			*(info_curr + 3), *(info_curr + 2), *(info_curr + 1), *(info_curr));
-	}
-}
-
-static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
-{
-	return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
-}
-
-static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
-{
-	return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
-}
-
-static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
-{
-	struct device *dev = &qm->pdev->dev;
-	struct qm_sqc *sqc, *sqc_curr;
-	dma_addr_t sqc_dma;
-	u32 qp_id;
-	int ret;
-
-	if (!s)
-		return -EINVAL;
-
-	ret = kstrtou32(s, 0, &qp_id);
-	if (ret || qp_id >= qm->qp_num) {
-		dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
-		return -EINVAL;
-	}
-
-	sqc = qm_ctx_alloc(qm, sizeof(*sqc), &sqc_dma);
-	if (IS_ERR(sqc))
-		return PTR_ERR(sqc);
-
-	ret = qm_dump_sqc_raw(qm, sqc_dma, qp_id);
-	if (ret) {
-		down_read(&qm->qps_lock);
-		if (qm->sqc) {
-			sqc_curr = qm->sqc + qp_id;
-
-			dump_show(qm, sqc_curr, sizeof(*sqc), "SOFT SQC");
-		}
-		up_read(&qm->qps_lock);
-
-		goto free_ctx;
-	}
-
-	dump_show(qm, sqc, sizeof(*sqc), "SQC");
-
-free_ctx:
-	qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
-	return 0;
-}
-
-static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
-{
-	struct device *dev = &qm->pdev->dev;
-	struct qm_cqc *cqc, *cqc_curr;
-	dma_addr_t cqc_dma;
-	u32 qp_id;
-	int ret;
-
-	if (!s)
-		return -EINVAL;
-
-	ret = kstrtou32(s, 0, &qp_id);
-	if (ret || qp_id >= qm->qp_num) {
-		dev_err(dev, "Please input qp num (0-%u)", qm->qp_num - 1);
-		return -EINVAL;
-	}
-
-	cqc = qm_ctx_alloc(qm, sizeof(*cqc), &cqc_dma);
-	if (IS_ERR(cqc))
-		return PTR_ERR(cqc);
-
-	ret = qm_dump_cqc_raw(qm, cqc_dma, qp_id);
-	if (ret) {
-		down_read(&qm->qps_lock);
-		if (qm->cqc) {
-			cqc_curr = qm->cqc + qp_id;
-
-			dump_show(qm, cqc_curr, sizeof(*cqc), "SOFT CQC");
+			break;
 		}
-		up_read(&qm->qps_lock);
-
-		goto free_ctx;
 	}
 
-	dump_show(qm, cqc, sizeof(*cqc), "CQC");
-
-free_ctx:
-	qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
-	return 0;
+	writel(lower_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_L);
+	writel(upper_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_H);
 }
 
-static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
-			    int cmd, char *name)
+static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
+			     u32 fun_num, u32 base, u32 number)
 {
-	struct device *dev = &qm->pdev->dev;
-	dma_addr_t xeqc_dma;
-	void *xeqc;
+	struct qm_shaper_factor *factor = NULL;
+	unsigned int val;
 	int ret;
 
-	if (strsep(&s, " ")) {
-		dev_err(dev, "Please do not input extra characters!\n");
-		return -EINVAL;
-	}
-
-	xeqc = qm_ctx_alloc(qm, size, &xeqc_dma);
-	if (IS_ERR(xeqc))
-		return PTR_ERR(xeqc);
+	if (type == SHAPER_VFT && test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
+		factor = &qm->factor[fun_num];
 
-	ret = hisi_qm_mb(qm, cmd, xeqc_dma, 0, 1);
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), POLL_PERIOD,
+					 POLL_TIMEOUT);
 	if (ret)
-		goto err_free_ctx;
+		return ret;
 
-	dump_show(qm, xeqc, size, name);
+	writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR);
+	writel(type, qm->io_base + QM_VFT_CFG_TYPE);
+	if (type == SHAPER_VFT)
+		fun_num |= base << QM_SHAPER_VFT_OFFSET;
 
-err_free_ctx:
-	qm_ctx_free(qm, size, xeqc, &xeqc_dma);
-	return ret;
-}
+	writel(fun_num, qm->io_base + QM_VFT_CFG);
 
-static int q_dump_param_parse(struct hisi_qm *qm, char *s,
-			      u32 *e_id, u32 *q_id, u16 q_depth)
-{
-	struct device *dev = &qm->pdev->dev;
-	unsigned int qp_num = qm->qp_num;
-	char *presult;
-	int ret;
+	qm_vft_data_cfg(qm, type, base, number, factor);
 
-	presult = strsep(&s, " ");
-	if (!presult) {
-		dev_err(dev, "Please input qp number!\n");
-		return -EINVAL;
-	}
+	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
 
-	ret = kstrtou32(presult, 0, q_id);
-	if (ret || *q_id >= qp_num) {
-		dev_err(dev, "Please input qp num (0-%u)", qp_num - 1);
-		return -EINVAL;
-	}
+	return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					  val & BIT(0), POLL_PERIOD,
+					  POLL_TIMEOUT);
+}
 
-	presult = strsep(&s, " ");
-	if (!presult) {
-		dev_err(dev, "Please input sqe number!\n");
-		return -EINVAL;
-	}
+static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num)
+{
+	u32 qos = qm->factor[fun_num].func_qos;
+	int ret, i;
 
-	ret = kstrtou32(presult, 0, e_id);
-	if (ret || *e_id >= q_depth) {
-		dev_err(dev, "Please input sqe num (0-%u)", q_depth - 1);
-		return -EINVAL;
+	ret = qm_get_shaper_para(qos * QM_QOS_RATE, &qm->factor[fun_num]);
+	if (ret) {
+		dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n");
+		return ret;
 	}
-
-	if (strsep(&s, " ")) {
-		dev_err(dev, "Please do not input extra characters!\n");
-		return -EINVAL;
+	writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG);
+	for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
+		/* The base number of queue reuse for different alg type */
+		ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1);
+		if (ret)
+			return ret;
 	}
 
 	return 0;
 }
 
-static int qm_sq_dump(struct hisi_qm *qm, char *s)
+/* The config should be conducted after qm_dev_mem_reset() */
+static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
+			      u32 number)
 {
-	u16 sq_depth = qm->qp_array->cq_depth;
-	void *sqe, *sqe_curr;
-	struct hisi_qp *qp;
-	u32 qp_id, sqe_id;
-	int ret;
-
-	ret = q_dump_param_parse(qm, s, &sqe_id, &qp_id, sq_depth);
-	if (ret)
-		return ret;
-
-	sqe = kzalloc(qm->sqe_size * sq_depth, GFP_KERNEL);
-	if (!sqe)
-		return -ENOMEM;
-
-	qp = &qm->qp_array[qp_id];
-	memcpy(sqe, qp->sqe, qm->sqe_size * sq_depth);
-	sqe_curr = sqe + (u32)(sqe_id * qm->sqe_size);
-	memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
-	       qm->debug.sqe_mask_len);
+	int ret, i;
 
-	dump_show(qm, sqe_curr, qm->sqe_size, "SQE");
+	for (i = SQC_VFT; i <= CQC_VFT; i++) {
+		ret = qm_set_vft_common(qm, i, fun_num, base, number);
+		if (ret)
+			return ret;
+	}
 
-	kfree(sqe);
+	/* init default shaper qos val */
+	if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps)) {
+		ret = qm_shaper_init_vft(qm, fun_num);
+		if (ret)
+			goto back_sqc_cqc;
+	}
 
 	return 0;
+back_sqc_cqc:
+	for (i = SQC_VFT; i <= CQC_VFT; i++)
+		qm_set_vft_common(qm, i, fun_num, 0, 0);
+
+	return ret;
 }
 
-static int qm_cq_dump(struct hisi_qm *qm, char *s)
+static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
 {
-	struct qm_cqe *cqe_curr;
-	struct hisi_qp *qp;
-	u32 qp_id, cqe_id;
+	u64 sqc_vft;
 	int ret;
 
-	ret = q_dump_param_parse(qm, s, &cqe_id, &qp_id, qm->qp_array->cq_depth);
+	ret = hisi_qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
 	if (ret)
 		return ret;
 
-	qp = &qm->qp_array[qp_id];
-	cqe_curr = qp->cqe + cqe_id;
-	dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE");
+	sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
+	*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+	*number = (QM_SQC_VFT_NUM_MASK_v2 &
+		   (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
 
 	return 0;
 }
 
-static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
-			  size_t size, char *name)
+void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
+			  dma_addr_t *dma_addr)
 {
 	struct device *dev = &qm->pdev->dev;
-	void *xeqe;
-	u32 xeqe_id;
-	int ret;
-
-	if (!s)
-		return -EINVAL;
-
-	ret = kstrtou32(s, 0, &xeqe_id);
-	if (ret)
-		return -EINVAL;
-
-	if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) {
-		dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1);
-		return -EINVAL;
-	} else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) {
-		dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1);
-		return -EINVAL;
-	}
-
-	down_read(&qm->qps_lock);
-
-	if (qm->eqe && !strcmp(name, "EQE")) {
-		xeqe = qm->eqe + xeqe_id;
-	} else if (qm->aeqe && !strcmp(name, "AEQE")) {
-		xeqe = qm->aeqe + xeqe_id;
-	} else {
-		ret = -EINVAL;
-		goto err_unlock;
-	}
-
-	dump_show(qm, xeqe, size, name);
-
-err_unlock:
-	up_read(&qm->qps_lock);
-	return ret;
-}
+	void *ctx_addr;
 
-static int qm_dbg_help(struct hisi_qm *qm, char *s)
-{
-	struct device *dev = &qm->pdev->dev;
+	ctx_addr = kzalloc(ctx_size, GFP_KERNEL);
+	if (!ctx_addr)
+		return ERR_PTR(-ENOMEM);
 
-	if (strsep(&s, " ")) {
-		dev_err(dev, "Please do not input extra characters!\n");
-		return -EINVAL;
+	*dma_addr = dma_map_single(dev, ctx_addr, ctx_size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(dev, *dma_addr)) {
+		dev_err(dev, "DMA mapping error!\n");
+		kfree(ctx_addr);
+		return ERR_PTR(-ENOMEM);
 	}
 
-	dev_info(dev, "available commands:\n");
-	dev_info(dev, "sqc <num>\n");
-	dev_info(dev, "cqc <num>\n");
-	dev_info(dev, "eqc\n");
-	dev_info(dev, "aeqc\n");
-	dev_info(dev, "sq <num> <e>\n");
-	dev_info(dev, "cq <num> <e>\n");
-	dev_info(dev, "eq <e>\n");
-	dev_info(dev, "aeq <e>\n");
-
-	return 0;
+	return ctx_addr;
 }
 
-static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
+void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
+			const void *ctx_addr, dma_addr_t *dma_addr)
 {
 	struct device *dev = &qm->pdev->dev;
-	char *presult, *s, *s_tmp;
-	int ret;
-
-	s = kstrdup(cmd_buf, GFP_KERNEL);
-	if (!s)
-		return -ENOMEM;
-
-	s_tmp = s;
-	presult = strsep(&s, " ");
-	if (!presult) {
-		ret = -EINVAL;
-		goto err_buffer_free;
-	}
-
-	if (!strcmp(presult, "sqc"))
-		ret = qm_sqc_dump(qm, s);
-	else if (!strcmp(presult, "cqc"))
-		ret = qm_cqc_dump(qm, s);
-	else if (!strcmp(presult, "eqc"))
-		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc),
-				       QM_MB_CMD_EQC, "EQC");
-	else if (!strcmp(presult, "aeqc"))
-		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc),
-				       QM_MB_CMD_AEQC, "AEQC");
-	else if (!strcmp(presult, "sq"))
-		ret = qm_sq_dump(qm, s);
-	else if (!strcmp(presult, "cq"))
-		ret = qm_cq_dump(qm, s);
-	else if (!strcmp(presult, "eq"))
-		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE");
-	else if (!strcmp(presult, "aeq"))
-		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE");
-	else if (!strcmp(presult, "help"))
-		ret = qm_dbg_help(qm, s);
-	else
-		ret = -EINVAL;
-
-	if (ret)
-		dev_info(dev, "Please echo help\n");
-
-err_buffer_free:
-	kfree(s_tmp);
 
-	return ret;
+	dma_unmap_single(dev, *dma_addr, ctx_size, DMA_FROM_DEVICE);
+	kfree(ctx_addr);
 }
 
-static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
-			    size_t count, loff_t *pos)
+static int qm_dump_sqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
 {
-	struct hisi_qm *qm = filp->private_data;
-	char *cmd_buf, *cmd_buf_tmp;
-	int ret;
-
-	if (*pos)
-		return 0;
-
-	ret = hisi_qm_get_dfx_access(qm);
-	if (ret)
-		return ret;
-
-	/* Judge if the instance is being reset. */
-	if (unlikely(atomic_read(&qm->status.flags) == QM_STOP)) {
-		ret = 0;
-		goto put_dfx_access;
-	}
-
-	if (count > QM_DBG_WRITE_LEN) {
-		ret = -ENOSPC;
-		goto put_dfx_access;
-	}
-
-	cmd_buf = memdup_user_nul(buffer, count);
-	if (IS_ERR(cmd_buf)) {
-		ret = PTR_ERR(cmd_buf);
-		goto put_dfx_access;
-	}
-
-	cmd_buf_tmp = strchr(cmd_buf, '\n');
-	if (cmd_buf_tmp) {
-		*cmd_buf_tmp = '\0';
-		count = cmd_buf_tmp - cmd_buf + 1;
-	}
-
-	ret = qm_cmd_write_dump(qm, cmd_buf);
-	if (ret) {
-		kfree(cmd_buf);
-		goto put_dfx_access;
-	}
-
-	kfree(cmd_buf);
-
-	ret = count;
-
-put_dfx_access:
-	hisi_qm_put_dfx_access(qm);
-	return ret;
+	return hisi_qm_mb(qm, QM_MB_CMD_SQC, dma_addr, qp_id, 1);
 }
 
-static const struct file_operations qm_cmd_fops = {
-	.owner = THIS_MODULE,
-	.open = simple_open,
-	.read = qm_cmd_read,
-	.write = qm_cmd_write,
-};
-
-static void qm_create_debugfs_file(struct hisi_qm *qm, struct dentry *dir,
-				   enum qm_debug_file index)
+static int qm_dump_cqc_raw(struct hisi_qm *qm, dma_addr_t dma_addr, u16 qp_id)
 {
-	struct debugfs_file *file = qm->debug.files + index;
-
-	debugfs_create_file(qm_debug_file_name[index], 0600, dir, file,
-			    &qm_debug_fops);
-
-	file->index = index;
-	mutex_init(&file->lock);
-	file->debug = &qm->debug;
+	return hisi_qm_mb(qm, QM_MB_CMD_CQC, dma_addr, qp_id, 1);
 }
 
 static void qm_hw_error_init_v1(struct hisi_qm *qm)
@@ -3155,7 +2130,7 @@ static int qm_drain_qp(struct hisi_qp *qp)
 		return ret;
 	}
 
-	addr = qm_ctx_alloc(qm, size, &dma_addr);
+	addr = hisi_qm_ctx_alloc(qm, size, &dma_addr);
 	if (IS_ERR(addr)) {
 		dev_err(dev, "Failed to alloc ctx for sqc and cqc!\n");
 		return -ENOMEM;
@@ -3190,7 +2165,7 @@ static int qm_drain_qp(struct hisi_qp *qp)
 		usleep_range(WAIT_PERIOD_US_MIN, WAIT_PERIOD_US_MAX);
 	}
 
-	qm_ctx_free(qm, size, addr, &dma_addr);
+	hisi_qm_ctx_free(qm, size, addr, &dma_addr);
 
 	return ret;
 }
@@ -4173,45 +3148,6 @@ err_unlock:
 }
 EXPORT_SYMBOL_GPL(hisi_qm_stop);
 
-static ssize_t qm_status_read(struct file *filp, char __user *buffer,
-			      size_t count, loff_t *pos)
-{
-	struct hisi_qm *qm = filp->private_data;
-	char buf[QM_DBG_READ_LEN];
-	int val, len;
-
-	val = atomic_read(&qm->status.flags);
-	len = scnprintf(buf, QM_DBG_READ_LEN, "%s\n", qm_s[val]);
-
-	return simple_read_from_buffer(buffer, count, pos, buf, len);
-}
-
-static const struct file_operations qm_status_fops = {
-	.owner = THIS_MODULE,
-	.open = simple_open,
-	.read = qm_status_read,
-};
-
-static int qm_debugfs_atomic64_set(void *data, u64 val)
-{
-	if (val)
-		return -EINVAL;
-
-	atomic64_set((atomic64_t *)data, 0);
-
-	return 0;
-}
-
-static int qm_debugfs_atomic64_get(void *data, u64 *val)
-{
-	*val = atomic64_read((atomic64_t *)data);
-
-	return 0;
-}
-
-DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
-			 qm_debugfs_atomic64_set, "%llu\n");
-
 static void qm_hw_error_init(struct hisi_qm *qm)
 {
 	if (!qm->ops->hw_error_init) {
@@ -4732,7 +3668,7 @@ static const struct file_operations qm_algqos_fops = {
  *
  * Create function qos debugfs files, VF ping PF to get function qos.
  */
-static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
+void hisi_qm_set_algqos_init(struct hisi_qm *qm)
 {
 	if (qm->fun_type == QM_HW_PF)
 		debugfs_create_file("alg_qos", 0644, qm->debug.debug_root,
@@ -4742,88 +3678,6 @@ static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
 				    qm, &qm_algqos_fops);
 }
 
-/**
- * hisi_qm_debug_init() - Initialize qm related debugfs files.
- * @qm: The qm for which we want to add debugfs files.
- *
- * Create qm related debugfs files.
- */
-void hisi_qm_debug_init(struct hisi_qm *qm)
-{
-	struct dfx_diff_registers *qm_regs = qm->debug.qm_diff_regs;
-	struct qm_dfx *dfx = &qm->debug.dfx;
-	struct dentry *qm_d;
-	void *data;
-	int i;
-
-	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
-	qm->debug.qm_d = qm_d;
-
-	/* only show this in PF */
-	if (qm->fun_type == QM_HW_PF) {
-		qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
-		for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
-			qm_create_debugfs_file(qm, qm->debug.qm_d, i);
-	}
-
-	if (qm_regs)
-		debugfs_create_file("diff_regs", 0444, qm->debug.qm_d,
-					qm, &qm_diff_regs_fops);
-
-	debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
-
-	debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
-
-	debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
-			&qm_status_fops);
-	for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
-		data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
-		debugfs_create_file(qm_dfx_files[i].name,
-			0644,
-			qm_d,
-			data,
-			&qm_atomic64_ops);
-	}
-
-	if (test_bit(QM_SUPPORT_FUNC_QOS, &qm->caps))
-		hisi_qm_set_algqos_init(qm);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
-
-/**
- * hisi_qm_debug_regs_clear() - clear qm debug related registers.
- * @qm: The qm for which we want to clear its debug registers.
- */
-void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
-{
-	const struct debugfs_reg32 *regs;
-	int i;
-
-	/* clear current_qm */
-	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
-	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
-
-	/* clear current_q */
-	writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
-	writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
-	/*
-	 * these registers are reading and clearing, so clear them after
-	 * reading them.
-	 */
-	writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
-
-	regs = qm_dfx_regs;
-	for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
-		readl(qm->io_base + regs->offset);
-		regs++;
-	}
-
-	/* clear clear_enable */
-	writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
-
 static void hisi_qm_init_vf_qos(struct hisi_qm *qm, int total_func)
 {
 	int i;
@@ -5462,24 +4316,6 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
 	return 0;
 }
 
-static void qm_show_last_dfx_regs(struct hisi_qm *qm)
-{
-	struct qm_debug *debug = &qm->debug;
-	struct pci_dev *pdev = qm->pdev;
-	u32 val;
-	int i;
-
-	if (qm->fun_type == QM_HW_VF || !debug->qm_last_words)
-		return;
-
-	for (i = 0; i < ARRAY_SIZE(qm_dfx_regs); i++) {
-		val = readl_relaxed(qm->io_base + qm_dfx_regs[i].offset);
-		if (debug->qm_last_words[i] != val)
-			pci_info(pdev, "%s \t= 0x%08x => 0x%08x\n",
-			qm_dfx_regs[i].name, debug->qm_last_words[i], val);
-	}
-}
-
 static int qm_controller_reset(struct hisi_qm *qm)
 {
 	struct pci_dev *pdev = qm->pdev;
@@ -5495,7 +4331,7 @@ static int qm_controller_reset(struct hisi_qm *qm)
 		return ret;
 	}
 
-	qm_show_last_dfx_regs(qm);
+	hisi_qm_show_last_dfx_regs(qm);
 	if (qm->err_ini->show_last_dfx_regs)
 		qm->err_ini->show_last_dfx_regs(qm);
 
diff --git a/drivers/crypto/hisilicon/qm_common.h b/drivers/crypto/hisilicon/qm_common.h
new file mode 100644
index 0000000000000..1406a422d4551
--- /dev/null
+++ b/drivers/crypto/hisilicon/qm_common.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 HiSilicon Limited. */
+#ifndef QM_COMMON_H
+#define QM_COMMON_H
+
+#define QM_DBG_READ_LEN		256
+#define QM_RESETTING		2
+
+struct qm_cqe {
+	__le32 rsvd0;
+	__le16 cmd_id;
+	__le16 rsvd1;
+	__le16 sq_head;
+	__le16 sq_num;
+	__le16 rsvd2;
+	__le16 w7;
+};
+
+struct qm_eqe {
+	__le32 dw0;
+};
+
+struct qm_aeqe {
+	__le32 dw0;
+};
+
+struct qm_sqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le16 w8;
+	__le16 rsvd0;
+	__le16 pasid;
+	__le16 w11;
+	__le16 cq_num;
+	__le16 w13;
+	__le32 rsvd1;
+};
+
+struct qm_cqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le16 w8;
+	__le16 rsvd0;
+	__le16 pasid;
+	__le16 w11;
+	__le32 dw6;
+	__le32 rsvd1;
+};
+
+struct qm_eqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le32 rsvd[2];
+	__le32 dw6;
+};
+
+struct qm_aeqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le32 rsvd[2];
+	__le32 dw6;
+};
+
+static const char * const qm_s[] = {
+	"init", "start", "close", "stop",
+};
+
+void *hisi_qm_ctx_alloc(struct hisi_qm *qm, size_t ctx_size,
+			dma_addr_t *dma_addr);
+void hisi_qm_ctx_free(struct hisi_qm *qm, size_t ctx_size,
+		      const void *ctx_addr, dma_addr_t *dma_addr);
+void hisi_qm_show_last_dfx_regs(struct hisi_qm *qm);
+void hisi_qm_set_algqos_init(struct hisi_qm *qm);
+
+#endif
-- 
GitLab


From 9c75609842f091fa814153d21243b07988dc8ef3 Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 12 Nov 2022 02:12:53 +0000
Subject: [PATCH 1779/4122] crypto: hisilicon/qm - the command dump process is
 modified

Reduce the function complexity by use the function table in the
process of dumping queue. The function input parameters are
unified. And maintainability is enhanced.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/debugfs.c | 130 ++++++++++++++++++++---------
 1 file changed, 90 insertions(+), 40 deletions(-)

diff --git a/drivers/crypto/hisilicon/debugfs.c b/drivers/crypto/hisilicon/debugfs.c
index 13bec8b2d7237..2cc1591949db7 100644
--- a/drivers/crypto/hisilicon/debugfs.c
+++ b/drivers/crypto/hisilicon/debugfs.c
@@ -36,6 +36,12 @@ struct qm_dfx_item {
 	u32 offset;
 };
 
+struct qm_cmd_dump_item {
+	const char *cmd;
+	char *info_name;
+	int (*dump_fn)(struct hisi_qm *qm, char *cmd, char *info_name);
+};
+
 static struct qm_dfx_item qm_dfx_files[] = {
 	{"err_irq", offsetof(struct qm_dfx, err_irq_cnt)},
 	{"aeq_irq", offsetof(struct qm_dfx, aeq_irq_cnt)},
@@ -128,7 +134,7 @@ static void dump_show(struct hisi_qm *qm, void *info,
 	}
 }
 
-static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
+static int qm_sqc_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	struct device *dev = &qm->pdev->dev;
 	struct qm_sqc *sqc, *sqc_curr;
@@ -162,14 +168,14 @@ static int qm_sqc_dump(struct hisi_qm *qm, const char *s)
 		goto free_ctx;
 	}
 
-	dump_show(qm, sqc, sizeof(*sqc), "SQC");
+	dump_show(qm, sqc, sizeof(*sqc), name);
 
 free_ctx:
 	hisi_qm_ctx_free(qm, sizeof(*sqc), sqc, &sqc_dma);
 	return 0;
 }
 
-static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
+static int qm_cqc_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	struct device *dev = &qm->pdev->dev;
 	struct qm_cqc *cqc, *cqc_curr;
@@ -203,26 +209,35 @@ static int qm_cqc_dump(struct hisi_qm *qm, const char *s)
 		goto free_ctx;
 	}
 
-	dump_show(qm, cqc, sizeof(*cqc), "CQC");
+	dump_show(qm, cqc, sizeof(*cqc), name);
 
 free_ctx:
 	hisi_qm_ctx_free(qm, sizeof(*cqc), cqc, &cqc_dma);
 	return 0;
 }
 
-static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, size_t size,
-			    int cmd, char *name)
+static int qm_eqc_aeqc_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	struct device *dev = &qm->pdev->dev;
 	dma_addr_t xeqc_dma;
+	size_t size;
 	void *xeqc;
 	int ret;
+	u8 cmd;
 
 	if (strsep(&s, " ")) {
 		dev_err(dev, "Please do not input extra characters!\n");
 		return -EINVAL;
 	}
 
+	if (!strcmp(name, "EQC")) {
+		cmd = QM_MB_CMD_EQC;
+		size = sizeof(struct qm_eqc);
+	} else {
+		cmd = QM_MB_CMD_AEQC;
+		size = sizeof(struct qm_aeqc);
+	}
+
 	xeqc = hisi_qm_ctx_alloc(qm, size, &xeqc_dma);
 	if (IS_ERR(xeqc))
 		return PTR_ERR(xeqc);
@@ -278,7 +293,7 @@ static int q_dump_param_parse(struct hisi_qm *qm, char *s,
 	return 0;
 }
 
-static int qm_sq_dump(struct hisi_qm *qm, char *s)
+static int qm_sq_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	u16 sq_depth = qm->qp_array->cq_depth;
 	void *sqe, *sqe_curr;
@@ -300,14 +315,14 @@ static int qm_sq_dump(struct hisi_qm *qm, char *s)
 	memset(sqe_curr + qm->debug.sqe_mask_offset, QM_SQE_ADDR_MASK,
 	       qm->debug.sqe_mask_len);
 
-	dump_show(qm, sqe_curr, qm->sqe_size, "SQE");
+	dump_show(qm, sqe_curr, qm->sqe_size, name);
 
 	kfree(sqe);
 
 	return 0;
 }
 
-static int qm_cq_dump(struct hisi_qm *qm, char *s)
+static int qm_cq_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	struct qm_cqe *cqe_curr;
 	struct hisi_qp *qp;
@@ -320,15 +335,16 @@ static int qm_cq_dump(struct hisi_qm *qm, char *s)
 
 	qp = &qm->qp_array[qp_id];
 	cqe_curr = qp->cqe + cqe_id;
-	dump_show(qm, cqe_curr, sizeof(struct qm_cqe), "CQE");
+	dump_show(qm, cqe_curr, sizeof(struct qm_cqe), name);
 
 	return 0;
 }
 
-static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
-			  size_t size, char *name)
+static int qm_eq_aeq_dump(struct hisi_qm *qm, char *s, char *name)
 {
 	struct device *dev = &qm->pdev->dev;
+	u16 xeq_depth;
+	size_t size;
 	void *xeqe;
 	u32 xeqe_id;
 	int ret;
@@ -340,11 +356,16 @@ static int qm_eq_aeq_dump(struct hisi_qm *qm, const char *s,
 	if (ret)
 		return -EINVAL;
 
-	if (!strcmp(name, "EQE") && xeqe_id >= qm->eq_depth) {
-		dev_err(dev, "Please input eqe num (0-%u)", qm->eq_depth - 1);
-		return -EINVAL;
-	} else if (!strcmp(name, "AEQE") && xeqe_id >= qm->aeq_depth) {
-		dev_err(dev, "Please input aeqe num (0-%u)", qm->eq_depth - 1);
+	if (!strcmp(name, "EQE")) {
+		xeq_depth = qm->eq_depth;
+		size = sizeof(struct qm_eqe);
+	} else {
+		xeq_depth = qm->aeq_depth;
+		size = sizeof(struct qm_aeqe);
+	}
+
+	if (xeqe_id >= xeq_depth) {
+		dev_err(dev, "Please input eqe or aeqe num (0-%u)", xeq_depth - 1);
 		return -EINVAL;
 	}
 
@@ -388,11 +409,47 @@ static int qm_dbg_help(struct hisi_qm *qm, char *s)
 	return 0;
 }
 
+static const struct qm_cmd_dump_item qm_cmd_dump_table[] = {
+	{
+		.cmd = "sqc",
+		.info_name = "SQC",
+		.dump_fn = qm_sqc_dump,
+	}, {
+		.cmd = "cqc",
+		.info_name = "CQC",
+		.dump_fn = qm_cqc_dump,
+	}, {
+		.cmd = "eqc",
+		.info_name = "EQC",
+		.dump_fn = qm_eqc_aeqc_dump,
+	}, {
+		.cmd = "aeqc",
+		.info_name = "AEQC",
+		.dump_fn = qm_eqc_aeqc_dump,
+	}, {
+		.cmd = "sq",
+		.info_name = "SQE",
+		.dump_fn = qm_sq_dump,
+	}, {
+		.cmd = "cq",
+		.info_name = "CQE",
+		.dump_fn = qm_cq_dump,
+	}, {
+		.cmd = "eq",
+		.info_name = "EQE",
+		.dump_fn = qm_eq_aeq_dump,
+	}, {
+		.cmd = "aeq",
+		.info_name = "AEQE",
+		.dump_fn = qm_eq_aeq_dump,
+	},
+};
+
 static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
 {
 	struct device *dev = &qm->pdev->dev;
 	char *presult, *s, *s_tmp;
-	int ret;
+	int table_size, i, ret;
 
 	s = kstrdup(cmd_buf, GFP_KERNEL);
 	if (!s)
@@ -405,31 +462,24 @@ static int qm_cmd_write_dump(struct hisi_qm *qm, const char *cmd_buf)
 		goto err_buffer_free;
 	}
 
-	if (!strcmp(presult, "sqc"))
-		ret = qm_sqc_dump(qm, s);
-	else if (!strcmp(presult, "cqc"))
-		ret = qm_cqc_dump(qm, s);
-	else if (!strcmp(presult, "eqc"))
-		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_eqc),
-				       QM_MB_CMD_EQC, "EQC");
-	else if (!strcmp(presult, "aeqc"))
-		ret = qm_eqc_aeqc_dump(qm, s, sizeof(struct qm_aeqc),
-				       QM_MB_CMD_AEQC, "AEQC");
-	else if (!strcmp(presult, "sq"))
-		ret = qm_sq_dump(qm, s);
-	else if (!strcmp(presult, "cq"))
-		ret = qm_cq_dump(qm, s);
-	else if (!strcmp(presult, "eq"))
-		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_eqe), "EQE");
-	else if (!strcmp(presult, "aeq"))
-		ret = qm_eq_aeq_dump(qm, s, sizeof(struct qm_aeqe), "AEQE");
-	else if (!strcmp(presult, "help"))
+	if (!strcmp(presult, "help")) {
 		ret = qm_dbg_help(qm, s);
-	else
-		ret = -EINVAL;
+		goto err_buffer_free;
+	}
 
-	if (ret)
+	table_size = ARRAY_SIZE(qm_cmd_dump_table);
+	for (i = 0; i < table_size; i++) {
+		if (!strcmp(presult, qm_cmd_dump_table[i].cmd)) {
+			ret = qm_cmd_dump_table[i].dump_fn(qm, s,
+				qm_cmd_dump_table[i].info_name);
+			break;
+		}
+	}
+
+	if (i == table_size) {
 		dev_info(dev, "Please echo help\n");
+		ret = -EINVAL;
+	}
 
 err_buffer_free:
 	kfree(s_tmp);
-- 
GitLab


From 2132d4efaa66388f1f79c79a920908a22464686b Mon Sep 17 00:00:00 2001
From: Kai Ye <yekai13@huawei.com>
Date: Sat, 12 Nov 2022 08:51:04 +0000
Subject: [PATCH 1780/4122] crypto: hisilicon/sec - fix spelling mistake
 'ckeck' -> 'check'

There are a couple of spelling mistakes in sec2. Fix them.

Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 84ae8ddd1a131..a2630ddc02944 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2009,7 +2009,7 @@ static int sec_aead_sha512_ctx_init(struct crypto_aead *tfm)
 	return sec_aead_ctx_init(tfm, "sha512");
 }
 
-static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx,
+static int sec_skcipher_cryptlen_check(struct sec_ctx *ctx,
 	struct sec_req *sreq)
 {
 	u32 cryptlen = sreq->c_req.sk_req->cryptlen;
@@ -2071,7 +2071,7 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
 		}
 		return 0;
 	} else if (c_alg == SEC_CALG_AES || c_alg == SEC_CALG_SM4) {
-		return sec_skcipher_cryptlen_ckeck(ctx, sreq);
+		return sec_skcipher_cryptlen_check(ctx, sreq);
 	}
 
 	dev_err(dev, "skcipher algorithm error!\n");
-- 
GitLab


From 75df46b598b5b46b0857ee7d2410deaf215e23d1 Mon Sep 17 00:00:00 2001
From: Wenkai Lin <linwenkai6@hisilicon.com>
Date: Sat, 12 Nov 2022 08:51:05 +0000
Subject: [PATCH 1781/4122] crypto: hisilicon/sec - remove continuous blank
 lines

Fix that put two or more continuous blank lines inside function.

Signed-off-by: Wenkai Lin <linwenkai6@hisilicon.com>
Signed-off-by: Kai Ye <yekai13@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/sec2/sec_crypto.c | 1 -
 drivers/crypto/hisilicon/sec2/sec_main.c   | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index a2630ddc02944..f5bfc9755a4a3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -283,7 +283,6 @@ static int sec_bd_send(struct sec_ctx *ctx, struct sec_req *req)
 
 	spin_lock_bh(&qp_ctx->req_lock);
 	ret = hisi_qp_send(qp_ctx->qp, &req->sec_sqe);
-
 	if (ctx->fake_req_limit <=
 	    atomic_read(&qp_ctx->qp->qp_status.used) && !ret) {
 		list_add_tail(&req->backlog_head, &qp_ctx->backlog);
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 4e24735d95bae..93572c0d4faa3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -427,7 +427,6 @@ static void sec_set_endian(struct hisi_qm *qm)
 	if (!IS_ENABLED(CONFIG_64BIT))
 		reg |= BIT(1);
 
-
 	if (!IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN))
 		reg |= BIT(0);
 
-- 
GitLab


From 5a47cb4df38bee861de37c12aaa4ef5510dd533b Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Thu, 17 Nov 2022 10:44:21 +0100
Subject: [PATCH 1782/4122] dt-bindings: arm-smmu: Add SM6350 GPU SMMUv2

SM6350 has a qcom,smmu-v2-style SMMU just for Adreno and friends.
Document it.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221117094422.11000-2-konrad.dybcio@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/devicetree/bindings/iommu/arm,smmu.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index 28f5720824cd3..b28c5c2b0ff23 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -94,6 +94,7 @@ properties:
               - qcom,sc7180-smmu-v2
               - qcom,sdm630-smmu-v2
               - qcom,sdm845-smmu-v2
+              - qcom,sm6350-smmu-v2
           - const: qcom,adreno-smmu
           - const: qcom,smmu-v2
       - description: Qcom Adreno GPUs on Google Cheza platform
@@ -346,6 +347,7 @@ allOf:
         compatible:
           contains:
             enum:
+              - qcom,sm6350-smmu-v2
               - qcom,sm8150-smmu-500
               - qcom,sm8250-smmu-500
     then:
-- 
GitLab


From 3811a7283a0a07fa84ccde69b3d48115d34e79af Mon Sep 17 00:00:00 2001
From: Konrad Dybcio <konrad.dybcio@linaro.org>
Date: Thu, 17 Nov 2022 10:44:22 +0100
Subject: [PATCH 1783/4122] iommu/arm-smmu-qcom: Add SM6350 SMMUv2

SM6350 uses a qcom,smmu-v2-style SMMU just for Adreno and friends.
Add a compatible for it.

Signed-off-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Link: https://lore.kernel.org/r/20221117094422.11000-3-konrad.dybcio@linaro.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index c94daf88c5051..91d404deb1155 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -509,6 +509,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,sdm845-smmu-500", .data = &sdm845_smmu_500_data },
 	{ .compatible = "qcom,sm6115-smmu-500", .data = &qcom_smmu_500_impl0_data},
 	{ .compatible = "qcom,sm6125-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sm6350-smmu-v2", .data = &qcom_smmu_v2_data },
 	{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
-- 
GitLab


From 6c7b2202e4d11572ab23a89aeec49005b94bb966 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 17 Nov 2022 12:25:02 -0500
Subject: [PATCH 1784/4122] KVM: x86: avoid memslot check in NX hugepage
 recovery if it cannot succeed

Since gfn_to_memslot() is relatively expensive, it helps to
skip it if it the memslot cannot possibly have dirty logging
enabled.  In order to do this, add to struct kvm a counter
of the number of log-page memslots.  While the correct value
can only be read with slots_lock taken, the NX recovery thread
is content with using an approximate value.  Therefore, the
counter is an atomic_t.

Based on https://lore.kernel.org/kvm/20221027200316.2221027-2-dmatlack@google.com/
by David Matlack.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu/mmu.c   | 22 +++++++++++++++++++---
 include/linux/kvm_host.h |  5 +++++
 virt/kvm/kvm_main.c      |  8 ++++++++
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index cfff74685a259..4736d7849c60f 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6878,16 +6878,32 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
 		WARN_ON_ONCE(!sp->nx_huge_page_disallowed);
 		WARN_ON_ONCE(!sp->role.direct);
 
-		slot = gfn_to_memslot(kvm, sp->gfn);
-		WARN_ON_ONCE(!slot);
-
 		/*
 		 * Unaccount and do not attempt to recover any NX Huge Pages
 		 * that are being dirty tracked, as they would just be faulted
 		 * back in as 4KiB pages. The NX Huge Pages in this slot will be
 		 * recovered, along with all the other huge pages in the slot,
 		 * when dirty logging is disabled.
+		 *
+		 * Since gfn_to_memslot() is relatively expensive, it helps to
+		 * skip it if it the test cannot possibly return true.  On the
+		 * other hand, if any memslot has logging enabled, chances are
+		 * good that all of them do, in which case unaccount_nx_huge_page()
+		 * is much cheaper than zapping the page.
+		 *
+		 * If a memslot update is in progress, reading an incorrect value
+		 * of kvm->nr_memslots_dirty_logging is not a problem: if it is
+		 * becoming zero, gfn_to_memslot() will be done unnecessarily; if
+		 * it is becoming nonzero, the page will be zapped unnecessarily.
+		 * Either way, this only affects efficiency in racy situations,
+		 * and not correctness.
 		 */
+		slot = NULL;
+		if (atomic_read(&kvm->nr_memslots_dirty_logging)) {
+			slot = gfn_to_memslot(kvm, sp->gfn);
+			WARN_ON_ONCE(!slot);
+		}
+
 		if (slot && kvm_slot_dirty_track_enabled(slot))
 			unaccount_nx_huge_page(kvm, sp);
 		else if (is_tdp_mmu_page(sp))
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e6e66c5e56f24..6f0f389f5f9cd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -722,6 +722,11 @@ struct kvm {
 	/* The current active memslot set for each address space */
 	struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct xarray vcpu_array;
+	/*
+	 * Protected by slots_lock, but can be read outside if an
+	 * incorrect answer is acceptable.
+	 */
+	atomic_t nr_memslots_dirty_logging;
 
 	/* Used to wait for completion of MMU notifiers.  */
 	spinlock_t mn_invalidate_lock;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 43bbe4fde078f..1782c4555d94f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1641,6 +1641,8 @@ static void kvm_commit_memory_region(struct kvm *kvm,
 				     const struct kvm_memory_slot *new,
 				     enum kvm_mr_change change)
 {
+	int old_flags = old ? old->flags : 0;
+	int new_flags = new ? new->flags : 0;
 	/*
 	 * Update the total number of memslot pages before calling the arch
 	 * hook so that architectures can consume the result directly.
@@ -1650,6 +1652,12 @@ static void kvm_commit_memory_region(struct kvm *kvm,
 	else if (change == KVM_MR_CREATE)
 		kvm->nr_memslot_pages += new->npages;
 
+	if ((old_flags ^ new_flags) & KVM_MEM_LOG_DIRTY_PAGES) {
+		int change = (new_flags & KVM_MEM_LOG_DIRTY_PAGES) ? 1 : -1;
+		atomic_set(&kvm->nr_memslots_dirty_logging,
+			   atomic_read(&kvm->nr_memslots_dirty_logging) + change);
+	}
+
 	kvm_arch_commit_memory_region(kvm, old, new, change);
 
 	switch (change) {
-- 
GitLab


From 74c8e6bffbe10c4470139496f930c0b0752c85c9 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 29 Oct 2022 00:47:34 -0700
Subject: [PATCH 1785/4122] driver core: Add __alloc_size hint to devm
 allocators

Mark the devm_*alloc()-family of allocations with appropriate
__alloc_size()/__realloc_size() hints so the compiler can attempt to
reason about buffer lengths from allocations.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Rasmus Villemoes <rasmus.villemoes@prevas.dk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Nishanth Menon <nm@ti.com>
Cc: Michael Kelley <mikelley@microsoft.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Won Chung <wonchung@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221029074734.gonna.276-kees@kernel.org
---
 include/linux/device.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 424b55df02727..5e4cd857e74f8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -197,9 +197,9 @@ void devres_remove_group(struct device *dev, void *id);
 int devres_release_group(struct device *dev, void *id);
 
 /* managed devm_k.alloc/kfree for device drivers */
-void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __malloc;
+void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp) __alloc_size(2);
 void *devm_krealloc(struct device *dev, void *ptr, size_t size,
-		    gfp_t gfp) __must_check;
+		    gfp_t gfp) __must_check __realloc_size(3);
 __printf(3, 0) char *devm_kvasprintf(struct device *dev, gfp_t gfp,
 				     const char *fmt, va_list ap) __malloc;
 __printf(3, 4) char *devm_kasprintf(struct device *dev, gfp_t gfp,
@@ -226,7 +226,8 @@ static inline void *devm_kcalloc(struct device *dev,
 void devm_kfree(struct device *dev, const void *p);
 char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp) __malloc;
 const char *devm_kstrdup_const(struct device *dev, const char *s, gfp_t gfp);
-void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp);
+void *devm_kmemdup(struct device *dev, const void *src, size_t len, gfp_t gfp)
+	__realloc_size(3);
 
 unsigned long devm_get_free_pages(struct device *dev,
 				  gfp_t gfp_mask, unsigned int order);
-- 
GitLab


From 96d845a67b7e406cfed7880a724c8ca6121e022e Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 2 Nov 2022 08:42:15 -0700
Subject: [PATCH 1786/4122] drm/fsl-dcu: Fix return type of
 fsl_dcu_drm_connector_mode_valid()

With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
indirect call targets are validated against the expected function
pointer prototype to make sure the call target is valid to help mitigate
ROP attacks. If they are not identical, there is a failure at run time,
which manifests as either a kernel panic or thread getting killed. A
proposed warning in clang aims to catch these at compile time, which
reveals:

  drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c:74:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          .mode_valid = fsl_dcu_drm_connector_mode_valid,
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  1 error generated.

->mode_valid() in 'struct drm_connector_helper_funcs' expects a return
type of 'enum drm_mode_status', not 'int'. Adjust the return type of
fsl_dcu_drm_connector_mode_valid() to match the prototype's to resolve
the warning and CFI failure.

Link: https://github.com/ClangBuiltLinux/linux/issues/1750
Reported-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221102154215.78059-1-nathan@kernel.org
---
 drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index 4d4a715b429d1..2c2b92324a2e9 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector)
 	return drm_panel_get_modes(fsl_connector->panel, connector);
 }
 
-static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
+static enum drm_mode_status
+fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
 {
 	if (mode->hdisplay & 0xf)
 		return MODE_ERROR;
-- 
GitLab


From 0ad811cc08a937d875cbad0149c1bab17f84ba05 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Wed, 2 Nov 2022 08:56:23 -0700
Subject: [PATCH 1787/4122] drm/sti: Fix return type of
 sti_{dvo,hda,hdmi}_connector_mode_valid()

With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
indirect call targets are validated against the expected function
pointer prototype to make sure the call target is valid to help mitigate
ROP attacks. If they are not identical, there is a failure at run time,
which manifests as either a kernel panic or thread getting killed. A
proposed warning in clang aims to catch these at compile time, which
reveals:

  drivers/gpu/drm/sti/sti_hda.c:637:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          .mode_valid = sti_hda_connector_mode_valid,
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
  drivers/gpu/drm/sti/sti_dvo.c:376:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          .mode_valid = sti_dvo_connector_mode_valid,
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~
  drivers/gpu/drm/sti/sti_hdmi.c:1035:16: error: incompatible function pointer types initializing 'enum drm_mode_status (*)(struct drm_connector *, struct drm_display_mode *)' with an expression of type 'int (struct drm_connector *, struct drm_display_mode *)' [-Werror,-Wincompatible-function-pointer-types-strict]
          .mode_valid = sti_hdmi_connector_mode_valid,
                        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~

->mode_valid() in 'struct drm_connector_helper_funcs' expects a return
type of 'enum drm_mode_status', not 'int'. Adjust the return type of
sti_{dvo,hda,hdmi}_connector_mode_valid() to match the prototype's to
resolve the warning and CFI failure.

Link: https://github.com/ClangBuiltLinux/linux/issues/1750
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221102155623.3042869-1-nathan@kernel.org
---
 drivers/gpu/drm/sti/sti_dvo.c  | 5 +++--
 drivers/gpu/drm/sti/sti_hda.c  | 5 +++--
 drivers/gpu/drm/sti/sti_hdmi.c | 5 +++--
 3 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index b6ee8a82e656c..076d5f30a09cc 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector)
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_dvo_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_dvo_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 03cc401ed5934..a53b5a15c2a9d 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -601,8 +601,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector)
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_hda_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_hda_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index cb82622877d20..09e0cadb63683 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -1004,8 +1004,9 @@ fail:
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_hdmi_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_hdmi_connector_mode_valid(struct drm_connector *connector,
+			      struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
-- 
GitLab


From 089fe572a2e0a89e36a455d299d801770293d08f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:39 +0100
Subject: [PATCH 1788/4122] x86/hyperv: Move VMCB enlightenment definitions to
 hyperv-tlfs.h

Move Hyper-V's VMCB enlightenment definitions to the TLFS header; the
definitions come directly from the TLFS[*], not from KVM.

No functional change intended.

[*] https://learn.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/datatypes/hv_svm_enlightened_vmcb_fields

[vitaly: rename VMCB_HV_ -> HV_VMCB_ to match the rest of
hyperv-tlfs.h, keep svm/hyperv.h]

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-2-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h            | 22 +++++++++++++++++++
 arch/x86/kvm/svm/hyperv.h                     | 22 -------------------
 arch/x86/kvm/svm/nested.c                     |  2 +-
 arch/x86/kvm/svm/svm_onhyperv.c               |  2 +-
 arch/x86/kvm/svm/svm_onhyperv.h               |  4 ++--
 .../selftests/kvm/x86_64/hyperv_svm_test.c    |  6 ++---
 6 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 3089ec352743b..245a806a97170 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -598,6 +598,28 @@ struct hv_enlightened_vmcs {
 
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL			0xFFFF
 
+/*
+ * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose
+ * SVM enlightenments to guests.
+ */
+struct hv_enlightenments {
+	struct __packed hv_enlightenments_control {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 enlightened_npt_tlb: 1;
+		u32 reserved:29;
+	} __packed hv_enlightenments_control;
+	u32 hv_vp_id;
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB.
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS		31
+
 struct hv_partition_assist_pg {
 	u32 tlb_lock_count;
 };
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index 7d6d97968fb98..c59544cdf03b7 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -10,26 +10,4 @@
 
 #include "../hyperv.h"
 
-/*
- * Hyper-V uses the software reserved 32 bytes in VMCB
- * control area to expose SVM enlightenments to guests.
- */
-struct hv_enlightenments {
-	struct __packed hv_enlightenments_control {
-		u32 nested_flush_hypercall:1;
-		u32 msr_bitmap:1;
-		u32 enlightened_npt_tlb: 1;
-		u32 reserved:29;
-	} __packed hv_enlightenments_control;
-	u32 hv_vp_id;
-	u64 hv_vm_id;
-	u64 partition_assist_page;
-	u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB
- */
-#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW
-
 #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 3aa9184d1e4ed..a2f25bffff488 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -195,7 +195,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 	if (!svm->nested.force_msr_bitmap_recalc &&
 	    kvm_hv_hypercall_enabled(&svm->vcpu) &&
 	    hve->hv_enlightenments_control.msr_bitmap &&
-	    (svm->nested.ctl.clean & BIT(VMCB_HV_NESTED_ENLIGHTENMENTS)))
+	    (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
 		goto set_msrpm_base_pa;
 
 	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index 8cdc62c74a964..ed5e793925441 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -32,7 +32,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 	hve->hv_vm_id = (unsigned long)vcpu->kvm;
 	if (!hve->hv_enlightenments_control.nested_flush_hypercall) {
 		hve->hv_enlightenments_control.nested_flush_hypercall = 1;
-		vmcb_mark_dirty(to_svm(vcpu)->vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+		vmcb_mark_dirty(to_svm(vcpu)->vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
 	}
 
 	return 0;
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index e2fc593804657..66e61a73caebc 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -64,7 +64,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 		(struct hv_enlightenments *)vmcb->control.reserved_sw;
 
 	if (hve->hv_enlightenments_control.msr_bitmap)
-		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+		vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
 }
 
 static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
@@ -76,7 +76,7 @@ static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
 
 	if (hve->hv_vp_id != vp_index) {
 		hve->hv_vp_id = vp_index;
-		vmcb_mark_dirty(vmcb, VMCB_HV_NESTED_ENLIGHTENMENTS);
+		vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
 	}
 }
 #else
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index a380ad7bb9b34..5060fcfe17601 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -39,7 +39,7 @@ struct hv_enlightenments {
 /*
  * Hyper-V uses the software reserved clean bit in VMCB
  */
-#define VMCB_HV_NESTED_ENLIGHTENMENTS (1U << 31)
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
 
 void l2_guest_code(void)
 {
@@ -98,14 +98,14 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
 	set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
 	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
-	vmcb->control.clean |= VMCB_HV_NESTED_ENLIGHTENMENTS;
+	vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
 	run_guest(vmcb, svm->vmcb_gpa);
 	/* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
 	vmcb->save.rip += 3; /* vmcall */
 
 	/* Now tell KVM we've changed MSR-Bitmap */
-	vmcb->control.clean &= ~VMCB_HV_NESTED_ENLIGHTENMENTS;
+	vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
 	run_guest(vmcb, svm->vmcb_gpa);
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
 	vmcb->save.rip += 2; /* rdmsr */
-- 
GitLab


From 381fc63ac0754e05d3921e9d399b89dfdfd2b2e5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:40 +0100
Subject: [PATCH 1789/4122] KVM: selftests: Move "struct hv_enlightenments" to
 x86_64/svm.h

Move Hyper-V's VMCB "struct hv_enlightenments" to the svm.h header so
that the struct can be referenced in "struct vmcb_control_area".
Alternatively, a dedicated header for SVM+Hyper-V could be added, a la
x86_64/evmcs.h, but it doesn't appear that Hyper-V will end up needing
a wholesale replacement for the VMCB.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-3-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/x86_64/svm.h | 17 +++++++++++++++++
 .../selftests/kvm/x86_64/hyperv_svm_test.c     | 18 ------------------
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index c8343ff84f7f7..89ce2c6b57fe0 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -58,6 +58,23 @@ enum {
 	INTERCEPT_RDPRU,
 };
 
+struct hv_enlightenments {
+	struct __packed hv_enlightenments_control {
+		u32 nested_flush_hypercall:1;
+		u32 msr_bitmap:1;
+		u32 enlightened_npt_tlb: 1;
+		u32 reserved:29;
+	} __packed hv_enlightenments_control;
+	u32 hv_vp_id;
+	u64 hv_vm_id;
+	u64 partition_assist_page;
+	u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
 
 struct __attribute__ ((__packed__)) vmcb_control_area {
 	u32 intercept_cr;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 5060fcfe17601..2fd64b419928a 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -23,24 +23,6 @@
 
 #define L2_GUEST_STACK_SIZE 256
 
-struct hv_enlightenments {
-	struct __packed hv_enlightenments_control {
-		u32 nested_flush_hypercall:1;
-		u32 msr_bitmap:1;
-		u32 enlightened_npt_tlb: 1;
-		u32 reserved:29;
-	} __packed hv_enlightenments_control;
-	u32 hv_vp_id;
-	u64 hv_vm_id;
-	u64 partition_assist_page;
-	u64 reserved;
-} __packed;
-
-/*
- * Hyper-V uses the software reserved clean bit in VMCB
- */
-#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
-
 void l2_guest_code(void)
 {
 	GUEST_SYNC(3);
-- 
GitLab


From 68ae7c7bc56a4504ed5efde7c2f8d6024148a35e Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:41 +0100
Subject: [PATCH 1790/4122] KVM: SVM: Add a proper field for Hyper-V VMCB
 enlightenments

Add a union to provide hv_enlightenments side-by-side with the sw_reserved
bytes that Hyper-V's enlightenments overlay.  Casting sw_reserved
everywhere is messy, confusing, and unnecessarily unsafe.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-4-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/svm.h                        |  7 ++++++-
 arch/x86/kvm/svm/nested.c                         |  9 ++++-----
 arch/x86/kvm/svm/svm.h                            |  5 ++++-
 arch/x86/kvm/svm/svm_onhyperv.c                   |  2 +-
 arch/x86/kvm/svm/svm_onhyperv.h                   | 15 +++++++--------
 tools/testing/selftests/kvm/include/x86_64/svm.h  |  5 ++++-
 .../selftests/kvm/x86_64/hyperv_svm_test.c        |  3 +--
 7 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index 4352b46dd20c9..b37249e7c6607 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -5,6 +5,8 @@
 #include <uapi/asm/svm.h>
 #include <uapi/asm/kvm.h>
 
+#include <asm/hyperv-tlfs.h>
+
 /*
  * 32-bit intercept words in the VMCB Control Area, starting
  * at Byte offset 000h.
@@ -161,7 +163,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	 * Offset 0x3e0, 32 bytes reserved
 	 * for use by hypervisor/software.
 	 */
-	u8 reserved_sw[32];
+	union {
+		struct hv_enlightenments hv_enlightenments;
+		u8 reserved_sw[32];
+	};
 };
 
 
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index a2f25bffff488..622fe00c3acf1 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -180,8 +180,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
  */
 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
-	struct hv_enlightenments *hve =
-		(struct hv_enlightenments *)svm->nested.ctl.reserved_sw;
+	struct hv_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
 	int i;
 
 	/*
@@ -370,8 +369,8 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
 	/* Hyper-V extensions (Enlightened VMCB) */
 	if (kvm_hv_hypercall_enabled(vcpu)) {
 		to->clean = from->clean;
-		memcpy(to->reserved_sw, from->reserved_sw,
-		       sizeof(struct hv_enlightenments));
+		memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
+		       sizeof(to->hv_enlightenments));
 	}
 }
 
@@ -1488,7 +1487,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
 	dst->virt_ext              = from->virt_ext;
 	dst->pause_filter_count   = from->pause_filter_count;
 	dst->pause_filter_thresh  = from->pause_filter_thresh;
-	/* 'clean' and 'reserved_sw' are not changed by KVM */
+	/* 'clean' and 'hv_enlightenments' are not changed by KVM */
 }
 
 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 199a2ecef1cec..a5af367502e42 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -151,7 +151,10 @@ struct vmcb_ctrl_area_cached {
 	u64 nested_cr3;
 	u64 virt_ext;
 	u32 clean;
-	u8 reserved_sw[32];
+	union {
+		struct hv_enlightenments hv_enlightenments;
+		u8 reserved_sw[32];
+	};
 };
 
 struct svm_nested_state {
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index ed5e793925441..422d00fee24ab 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -26,7 +26,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 	if (!*p_hv_pa_pg)
 		return -ENOMEM;
 
-	hve = (struct hv_enlightenments *)to_svm(vcpu)->vmcb->control.reserved_sw;
+	hve = &to_svm(vcpu)->vmcb->control.hv_enlightenments;
 
 	hve->partition_assist_page = __pa(*p_hv_pa_pg);
 	hve->hv_vm_id = (unsigned long)vcpu->kvm;
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 66e61a73caebc..5c664dd7bee21 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -17,8 +17,10 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
 
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
-	struct hv_enlightenments *hve =
-		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+	BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) !=
+		     sizeof(vmcb->control.reserved_sw));
 
 	if (npt_enabled &&
 	    ms_hyperv.nested_features & HV_X64_NESTED_ENLIGHTENED_TLB)
@@ -60,18 +62,15 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 		struct kvm_vcpu *vcpu)
 {
 	struct vmcb *vmcb = to_svm(vcpu)->vmcb;
-	struct hv_enlightenments *hve =
-		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
 
 	if (hve->hv_enlightenments_control.msr_bitmap)
 		vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
 }
 
-static inline void svm_hv_update_vp_id(struct vmcb *vmcb,
-		struct kvm_vcpu *vcpu)
+static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
 {
-	struct hv_enlightenments *hve =
-		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
 	u32 vp_index = kvm_hv_get_vpindex(vcpu);
 
 	if (hve->hv_vp_id != vp_index) {
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index 89ce2c6b57fe0..6e1527aa34191 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -123,7 +123,10 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	 * Offset 0x3e0, 32 bytes reserved
 	 * for use by hypervisor/software.
 	 */
-	u8 reserved_sw[32];
+	union {
+		struct hv_enlightenments hv_enlightenments;
+		u8 reserved_sw[32];
+	};
 };
 
 
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 2fd64b419928a..8ef6a4c83cb1e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -46,8 +46,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 {
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 	struct vmcb *vmcb = svm->vmcb;
-	struct hv_enlightenments *hve =
-		(struct hv_enlightenments *)vmcb->control.reserved_sw;
+	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
 
 	GUEST_SYNC(1);
 
-- 
GitLab


From 26b516bb39215cf60aa1fb55d0a6fd73058698fa Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:42 +0100
Subject: [PATCH 1791/4122] x86/hyperv: KVM: Rename "hv_enlightenments" to
 "hv_vmcb_enlightenments"

Now that KVM isn't littered with "struct hv_enlightenments" casts, rename
the struct to "hv_vmcb_enlightenments" to highlight the fact that the
struct is specifically for SVM's VMCB.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-5-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h                   | 2 +-
 arch/x86/include/asm/svm.h                           | 2 +-
 arch/x86/kvm/svm/nested.c                            | 2 +-
 arch/x86/kvm/svm/svm.h                               | 2 +-
 arch/x86/kvm/svm/svm_onhyperv.c                      | 2 +-
 arch/x86/kvm/svm/svm_onhyperv.h                      | 6 +++---
 tools/testing/selftests/kvm/include/x86_64/svm.h     | 4 ++--
 tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c | 2 +-
 8 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 245a806a97170..c5e0e5a06c0dc 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -602,7 +602,7 @@ struct hv_enlightened_vmcs {
  * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose
  * SVM enlightenments to guests.
  */
-struct hv_enlightenments {
+struct hv_vmcb_enlightenments {
 	struct __packed hv_enlightenments_control {
 		u32 nested_flush_hypercall:1;
 		u32 msr_bitmap:1;
diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h
index b37249e7c6607..cb1ee53ad3b18 100644
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -164,7 +164,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	 * for use by hypervisor/software.
 	 */
 	union {
-		struct hv_enlightenments hv_enlightenments;
+		struct hv_vmcb_enlightenments hv_enlightenments;
 		u8 reserved_sw[32];
 	};
 };
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 622fe00c3acf1..cc8f47e7e294d 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -180,7 +180,7 @@ void recalc_intercepts(struct vcpu_svm *svm)
  */
 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
 {
-	struct hv_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
+	struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
 	int i;
 
 	/*
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index a5af367502e42..4826e6cc611bf 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -152,7 +152,7 @@ struct vmcb_ctrl_area_cached {
 	u64 virt_ext;
 	u32 clean;
 	union {
-		struct hv_enlightenments hv_enlightenments;
+		struct hv_vmcb_enlightenments hv_enlightenments;
 		u8 reserved_sw[32];
 	};
 };
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index 422d00fee24ab..52c73a8be72b1 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -16,7 +16,7 @@
 
 int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
 {
-	struct hv_enlightenments *hve;
+	struct hv_vmcb_enlightenments *hve;
 	struct hv_partition_assist_pg **p_hv_pa_pg =
 			&to_kvm_hv(vcpu->kvm)->hv_pa_pg;
 
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index 5c664dd7bee21..d5cb2c62e3557 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -17,7 +17,7 @@ int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
 
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
-	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
+	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
 
 	BUILD_BUG_ON(sizeof(vmcb->control.hv_enlightenments) !=
 		     sizeof(vmcb->control.reserved_sw));
@@ -62,7 +62,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 		struct kvm_vcpu *vcpu)
 {
 	struct vmcb *vmcb = to_svm(vcpu)->vmcb;
-	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
+	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
 
 	if (hve->hv_enlightenments_control.msr_bitmap)
 		vmcb_mark_dirty(vmcb, HV_VMCB_NESTED_ENLIGHTENMENTS);
@@ -70,7 +70,7 @@ static inline void svm_hv_vmcb_dirty_nested_enlightenments(
 
 static inline void svm_hv_update_vp_id(struct vmcb *vmcb, struct kvm_vcpu *vcpu)
 {
-	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
+	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
 	u32 vp_index = kvm_hv_get_vpindex(vcpu);
 
 	if (hve->hv_vp_id != vp_index) {
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index 6e1527aa34191..483e6ae12f69e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -58,7 +58,7 @@ enum {
 	INTERCEPT_RDPRU,
 };
 
-struct hv_enlightenments {
+struct hv_vmcb_enlightenments {
 	struct __packed hv_enlightenments_control {
 		u32 nested_flush_hypercall:1;
 		u32 msr_bitmap:1;
@@ -124,7 +124,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 	 * for use by hypervisor/software.
 	 */
 	union {
-		struct hv_enlightenments hv_enlightenments;
+		struct hv_vmcb_enlightenments hv_enlightenments;
 		u8 reserved_sw[32];
 	};
 };
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 8ef6a4c83cb1e..1c3fc38b4f151 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -46,7 +46,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 {
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 	struct vmcb *vmcb = svm->vmcb;
-	struct hv_enlightenments *hve = &vmcb->control.hv_enlightenments;
+	struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
 
 	GUEST_SYNC(1);
 
-- 
GitLab


From b83237ad2167a0f9a43b909adb42623941b741b8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:43 +0100
Subject: [PATCH 1792/4122] KVM: x86: Rename 'enable_direct_tlbflush' to
 'enable_l2_tlb_flush'

To make terminology between Hyper-V-on-KVM and KVM-on-Hyper-V consistent,
rename 'enable_direct_tlbflush' to 'enable_l2_tlb_flush'. The change
eliminates the use of confusing 'direct' and adds the missing underscore.

No functional change.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-6-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm-x86-ops.h | 2 +-
 arch/x86/include/asm/kvm_host.h    | 2 +-
 arch/x86/kvm/svm/svm_onhyperv.c    | 2 +-
 arch/x86/kvm/svm/svm_onhyperv.h    | 6 +++---
 arch/x86/kvm/vmx/vmx.c             | 6 +++---
 arch/x86/kvm/x86.c                 | 6 +++---
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index ea58e67e9a670..abccd51dcfca1 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -125,7 +125,7 @@ KVM_X86_OP_OPTIONAL(guest_memory_reclaimed)
 KVM_X86_OP(get_msr_feature)
 KVM_X86_OP(can_emulate_instruction)
 KVM_X86_OP(apic_init_signal_blocked)
-KVM_X86_OP_OPTIONAL(enable_direct_tlbflush)
+KVM_X86_OP_OPTIONAL(enable_l2_tlb_flush)
 KVM_X86_OP_OPTIONAL(migrate_timers)
 KVM_X86_OP(msr_filter_changed)
 KVM_X86_OP(complete_emulated_msr)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 598eb3b9ae440..a413f841e8308 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1652,7 +1652,7 @@ struct kvm_x86_ops {
 					void *insn, int insn_len);
 
 	bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
-	int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
+	int (*enable_l2_tlb_flush)(struct kvm_vcpu *vcpu);
 
 	void (*migrate_timers)(struct kvm_vcpu *vcpu);
 	void (*msr_filter_changed)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm/svm_onhyperv.c b/arch/x86/kvm/svm/svm_onhyperv.c
index 52c73a8be72b1..26a89d0da93e4 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.c
+++ b/arch/x86/kvm/svm/svm_onhyperv.c
@@ -14,7 +14,7 @@
 #include "kvm_onhyperv.h"
 #include "svm_onhyperv.h"
 
-int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
 {
 	struct hv_vmcb_enlightenments *hve;
 	struct hv_partition_assist_pg **p_hv_pa_pg =
diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h
index d5cb2c62e3557..45faf84476cec 100644
--- a/arch/x86/kvm/svm/svm_onhyperv.h
+++ b/arch/x86/kvm/svm/svm_onhyperv.h
@@ -13,7 +13,7 @@
 
 static struct kvm_x86_ops svm_x86_ops;
 
-int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu);
+int svm_hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu);
 
 static inline void svm_hv_init_vmcb(struct vmcb *vmcb)
 {
@@ -53,8 +53,8 @@ static inline void svm_hv_hardware_setup(void)
 
 			vp_ap->nested_control.features.directhypercall = 1;
 		}
-		svm_x86_ops.enable_direct_tlbflush =
-				svm_hv_enable_direct_tlbflush;
+		svm_x86_ops.enable_l2_tlb_flush =
+				svm_hv_enable_l2_tlb_flush;
 	}
 }
 
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index aca88524fd1ec..5806ab88851e4 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -527,7 +527,7 @@ static unsigned long host_idt_base;
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
 
-static int hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu)
+static int hv_enable_l2_tlb_flush(struct kvm_vcpu *vcpu)
 {
 	struct hv_enlightened_vmcs *evmcs;
 	struct hv_partition_assist_pg **p_hv_pa_pg =
@@ -8520,8 +8520,8 @@ static int __init vmx_init(void)
 		}
 
 		if (ms_hyperv.nested_features & HV_X64_NESTED_DIRECT_FLUSH)
-			vmx_x86_ops.enable_direct_tlbflush
-				= hv_enable_direct_tlbflush;
+			vmx_x86_ops.enable_l2_tlb_flush
+				= hv_enable_l2_tlb_flush;
 
 	} else {
 		enlightened_vmcs = false;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 404325a13dc2e..838ed4ea7e4d8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4481,7 +4481,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 			kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0;
 		break;
 	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
-		r = kvm_x86_ops.enable_direct_tlbflush != NULL;
+		r = kvm_x86_ops.enable_l2_tlb_flush != NULL;
 		break;
 	case KVM_CAP_HYPERV_ENLIGHTENED_VMCS:
 		r = kvm_x86_ops.nested_ops->enable_evmcs != NULL;
@@ -5494,10 +5494,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 		}
 		return r;
 	case KVM_CAP_HYPERV_DIRECT_TLBFLUSH:
-		if (!kvm_x86_ops.enable_direct_tlbflush)
+		if (!kvm_x86_ops.enable_l2_tlb_flush)
 			return -ENOTTY;
 
-		return static_call(kvm_x86_enable_direct_tlbflush)(vcpu);
+		return static_call(kvm_x86_enable_l2_tlb_flush)(vcpu);
 
 	case KVM_CAP_HYPERV_ENFORCE_CPUID:
 		return kvm_hv_set_enforce_cpuid(vcpu, cap->args[0]);
-- 
GitLab


From a789aeba419647c44d7e7320de20fea037c211d0 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:44 +0100
Subject: [PATCH 1793/4122] KVM: VMX: Rename "vmx/evmcs.{ch}" to
 "vmx/hyperv.{ch}"

To conform with SVM, rename VMX specific Hyper-V files from "evmcs.{ch}"
to "hyperv.{ch}". While Enlightened VMCS is a lion's share of these
files, some stuff (e.g. enlightened MSR bitmap, the upcoming Hyper-V
L2 TLB flush, ...) goes beyond that.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-7-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Makefile                  | 2 +-
 arch/x86/kvm/vmx/{evmcs.c => hyperv.c} | 3 +--
 arch/x86/kvm/vmx/{evmcs.h => hyperv.h} | 8 +++++---
 arch/x86/kvm/vmx/nested.c              | 1 -
 arch/x86/kvm/vmx/vmx.c                 | 1 -
 arch/x86/kvm/vmx/vmx_ops.h             | 2 +-
 6 files changed, 8 insertions(+), 9 deletions(-)
 rename arch/x86/kvm/vmx/{evmcs.c => hyperv.c} (99%)
 rename arch/x86/kvm/vmx/{evmcs.h => hyperv.h} (98%)

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index b8a494b6a5ec9..4cf407563feea 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -23,7 +23,7 @@ kvm-$(CONFIG_KVM_XEN)	+= xen.o
 kvm-$(CONFIG_KVM_SMM)	+= smm.o
 
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
-			   vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
+			   vmx/hyperv.o vmx/nested.o vmx/posted_intr.o
 kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
 
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/hyperv.c
similarity index 99%
rename from arch/x86/kvm/vmx/evmcs.c
rename to arch/x86/kvm/vmx/hyperv.c
index d8b23c96d6272..5e239158174ea 100644
--- a/arch/x86/kvm/vmx/evmcs.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -3,9 +3,8 @@
 #include <linux/errno.h>
 #include <linux/smp.h>
 
-#include "../hyperv.h"
 #include "../cpuid.h"
-#include "evmcs.h"
+#include "hyperv.h"
 #include "vmcs.h"
 #include "vmx.h"
 #include "trace.h"
diff --git a/arch/x86/kvm/vmx/evmcs.h b/arch/x86/kvm/vmx/hyperv.h
similarity index 98%
rename from arch/x86/kvm/vmx/evmcs.h
rename to arch/x86/kvm/vmx/hyperv.h
index 6f746ef3c0386..99a151af7a817 100644
--- a/arch/x86/kvm/vmx/evmcs.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __KVM_X86_VMX_EVMCS_H
-#define __KVM_X86_VMX_EVMCS_H
+#ifndef __KVM_X86_VMX_HYPERV_H
+#define __KVM_X86_VMX_HYPERV_H
 
 #include <linux/jump_label.h>
 
@@ -8,6 +8,8 @@
 #include <asm/mshyperv.h>
 #include <asm/vmx.h>
 
+#include "../hyperv.h"
+
 #include "capabilities.h"
 #include "vmcs.h"
 #include "vmcs12.h"
@@ -242,4 +244,4 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
 
-#endif /* __KVM_X86_VMX_EVMCS_H */
+#endif /* __KVM_X86_VMX_HYPERV_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 7924dea936781..048b2c3e3b3fc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -7,7 +7,6 @@
 #include <asm/mmu_context.h>
 
 #include "cpuid.h"
-#include "evmcs.h"
 #include "hyperv.h"
 #include "mmu.h"
 #include "nested.h"
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 5806ab88851e4..cea8c07f52298 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -51,7 +51,6 @@
 
 #include "capabilities.h"
 #include "cpuid.h"
-#include "evmcs.h"
 #include "hyperv.h"
 #include "kvm_onhyperv.h"
 #include "irq.h"
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index ec268df83ed67..f6f23c7397dc5 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -6,7 +6,7 @@
 
 #include <asm/vmx.h>
 
-#include "evmcs.h"
+#include "hyperv.h"
 #include "vmcs.h"
 #include "../x86.h"
 
-- 
GitLab


From e94cea0930195adee67c93140ad9f95e430b2be8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:45 +0100
Subject: [PATCH 1794/4122] KVM: x86: Move clearing of TLB_FLUSH_CURRENT to
 kvm_vcpu_flush_tlb_all()

Clear KVM_REQ_TLB_FLUSH_CURRENT in kvm_vcpu_flush_tlb_all() instead of in
its sole caller that processes KVM_REQ_TLB_FLUSH.  Regardless of why/when
kvm_vcpu_flush_tlb_all() is called, flushing "all" TLB entries also
flushes "current" TLB entries.

Ideally, there will never be another caller of kvm_vcpu_flush_tlb_all(),
and moving the handling "requires" extra work to document the ordering
requirement, but future Hyper-V paravirt TLB flushing support will add
similar logic for flush "guest" (Hyper-V can flush a subset of "guest"
entries).  And in the Hyper-V case, KVM needs to do more than just clear
the request, the queue of GPAs to flush also needs to purged, and doing
all only in the request path is undesirable as kvm_vcpu_flush_tlb_guest()
does have multiple callers (though it's unlikely KVM's paravirt TLB flush
will coincide with Hyper-V's paravirt TLB flush).

Move the logic even though it adds extra "work" so that KVM will be
consistent with how flush requests are processed when the Hyper-V support
lands.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-8-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 838ed4ea7e4d8..7fc5508c0b4a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3399,6 +3399,9 @@ static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu)
 {
 	++vcpu->stat.tlb_flush;
 	static_call(kvm_x86_flush_tlb_all)(vcpu);
+
+	/* Flushing all ASIDs flushes the current ASID... */
+	kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
 }
 
 static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
@@ -10236,12 +10239,15 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_mmu_sync_roots(vcpu);
 		if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu))
 			kvm_mmu_load_pgd(vcpu);
-		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+
+		/*
+		 * Note, the order matters here, as flushing "all" TLB entries
+		 * also flushes the "current" TLB entries, i.e. servicing the
+		 * flush "all" will clear any request to flush "current".
+		 */
+		if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu))
 			kvm_vcpu_flush_tlb_all(vcpu);
 
-			/* Flushing all ASIDs flushes the current ASID... */
-			kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-		}
 		kvm_service_local_tlb_flush_requests(vcpu);
 
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
-- 
GitLab


From adc43caa0a25746e1a9dabbab241abd01120dbfe Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:46 +0100
Subject: [PATCH 1795/4122] KVM: x86: hyper-v: Resurrect dedicated
 KVM_REQ_HV_TLB_FLUSH flag

In preparation to implementing fine-grained Hyper-V TLB flush and
L2 TLB flush, resurrect dedicated KVM_REQ_HV_TLB_FLUSH request bit. As
KVM_REQ_TLB_FLUSH_GUEST is a stronger operation, clear KVM_REQ_HV_TLB_FLUSH
request in kvm_vcpu_flush_tlb_guest().

The flush itself is temporary handled by kvm_vcpu_flush_tlb_guest().

No functional change intended.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-9-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 2 ++
 arch/x86/kvm/hyperv.c           | 4 ++--
 arch/x86/kvm/svm/svm.c          | 7 +++++++
 arch/x86/kvm/x86.c              | 9 +++++++++
 4 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a413f841e8308..0b85230a0e0a7 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -110,6 +110,8 @@
 	KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_MMU_FREE_OBSOLETE_ROOTS \
 	KVM_ARCH_REQ_FLAGS(31, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_HV_TLB_FLUSH \
+	KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
 	(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0adf4a437e85f..3c0f639f6a05e 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1870,11 +1870,11 @@ do_flush:
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
 	if (all_cpus) {
-		kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH_GUEST);
+		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
 	} else {
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
-		kvm_make_vcpus_request_mask(kvm, KVM_REQ_TLB_FLUSH_GUEST, vcpu_mask);
+		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
 	}
 
 ret_success:
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7efc4fdaa446b..4ea6ddd998994 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3722,6 +3722,13 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
 
+	/*
+	 * Unlike VMX, SVM doesn't provide a way to flush only NPT TLB entries.
+	 * A TLB flush for the current ASID flushes both "host" and "guest" TLB
+	 * entries, and thus is a superset of Hyper-V's fine grained flushing.
+	 */
+	kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+
 	/*
 	 * Flush only the current ASID even if the TLB flush was invoked via
 	 * kvm_flush_remote_tlbs().  Although flushing remote TLBs requires all
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7fc5508c0b4a9..12e49e8566d49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3420,6 +3420,12 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 	}
 
 	static_call(kvm_x86_flush_tlb_guest)(vcpu);
+
+	/*
+	 * Flushing all "guest" TLB is always a superset of Hyper-V's fine
+	 * grained flushing.
+	 */
+	kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
 }
 
 
@@ -10250,6 +10256,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		kvm_service_local_tlb_flush_requests(vcpu);
 
+		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+			kvm_vcpu_flush_tlb_guest(vcpu);
+
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
 			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
 			r = 0;
-- 
GitLab


From 0823570f01989d3703751f66534a138d4fae062e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:47 +0100
Subject: [PATCH 1796/4122] KVM: x86: hyper-v: Introduce TLB flush fifo

To allow flushing individual GVAs instead of always flushing the whole
VPID a per-vCPU structure to pass the requests is needed. Use standard
'kfifo' to queue two types of entries: individual GVA (GFN + up to 4095
following GFNs in the lower 12 bits) and 'flush all'.

The size of the fifo is arbitrarily set to '16'.

Note, kvm_hv_flush_tlb() only queues 'flush all' entries for now and
kvm_hv_vcpu_flush_tlb() doesn't actually read the fifo just resets the
queue before returning -EOPNOTSUPP (which triggers full TLB flush) so
the functional change is very small but the infrastructure is prepared
to handle individual GVA flush requests.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-10-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 20 ++++++++++++++
 arch/x86/kvm/hyperv.c           | 47 +++++++++++++++++++++++++++++++++
 arch/x86/kvm/hyperv.h           | 15 +++++++++++
 arch/x86/kvm/svm/svm.c          |  2 +-
 arch/x86/kvm/x86.c              | 11 ++++++--
 5 files changed, 92 insertions(+), 3 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0b85230a0e0a7..3e35dcf40dc7c 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
 #include <linux/clocksource.h>
 #include <linux/irqbypass.h>
 #include <linux/hyperv.h>
+#include <linux/kfifo.h>
 
 #include <asm/apic.h>
 #include <asm/pvclock-abi.h>
@@ -618,6 +619,23 @@ struct kvm_vcpu_hv_synic {
 	bool dont_zero_synic_pages;
 };
 
+/* The maximum number of entries on the TLB flush fifo. */
+#define KVM_HV_TLB_FLUSH_FIFO_SIZE (16)
+/*
+ * Note: the following 'magic' entry is made up by KVM to avoid putting
+ * anything besides GVA on the TLB flush fifo. It is theoretically possible
+ * to observe a request to flush 4095 PFNs starting from 0xfffffffffffff000
+ * which will look identical. KVM's action to 'flush everything' instead of
+ * flushing these particular addresses is, however, fully legitimate as
+ * flushing more than requested is always OK.
+ */
+#define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
+
+struct kvm_vcpu_hv_tlb_flush_fifo {
+	spinlock_t write_lock;
+	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+};
+
 /* Hyper-V per vcpu emulation context */
 struct kvm_vcpu_hv {
 	struct kvm_vcpu *vcpu;
@@ -639,6 +657,8 @@ struct kvm_vcpu_hv {
 		u32 nested_eax; /* HYPERV_CPUID_NESTED_FEATURES.EAX */
 		u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
 	} cpuid_cache;
+
+	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo;
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3c0f639f6a05e..9d9a5ff2d54bd 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -29,6 +29,7 @@
 #include <linux/kvm_host.h>
 #include <linux/highmem.h>
 #include <linux/sched/cputime.h>
+#include <linux/spinlock.h>
 #include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
@@ -954,6 +955,9 @@ int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 
 	hv_vcpu->vp_index = vcpu->vcpu_idx;
 
+	INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries);
+	spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock);
+
 	return 0;
 }
 
@@ -1783,6 +1787,37 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
 			      var_cnt * sizeof(*sparse_banks));
 }
 
+static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
+
+	if (!hv_vcpu)
+		return;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_in_spinlocked_noirqsave(&tlb_flush_fifo->entries, &flush_all_entry,
+				      1, &tlb_flush_fifo->write_lock);
+}
+
+int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu)
+		return -EINVAL;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_reset_out(&tlb_flush_fifo->entries);
+
+	/* Precise flushing isn't implemented yet. */
+	return -EOPNOTSUPP;
+}
+
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
 	struct kvm *kvm = vcpu->kvm;
@@ -1791,6 +1826,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
 	u64 valid_bank_mask;
 	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
+	struct kvm_vcpu *v;
+	unsigned long i;
 	bool all_cpus;
 
 	/*
@@ -1870,10 +1907,20 @@ do_flush:
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
 	if (all_cpus) {
+		kvm_for_each_vcpu(i, v, kvm)
+			hv_tlb_flush_enqueue(v);
+
 		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
 	} else {
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
+		for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
+			v = kvm_get_vcpu(kvm, i);
+			if (!v)
+				continue;
+			hv_tlb_flush_enqueue(v);
+		}
+
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
 	}
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 1030b1b505523..f79edf9234cd4 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -151,4 +151,19 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		     struct kvm_cpuid_entry2 __user *entries);
 
+static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+		return;
+
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+
+	kfifo_reset_out(&tlb_flush_fifo->entries);
+}
+
+int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
+
 #endif
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 4ea6ddd998994..91352d6928452 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3727,7 +3727,7 @@ static void svm_flush_tlb_current(struct kvm_vcpu *vcpu)
 	 * A TLB flush for the current ASID flushes both "host" and "guest" TLB
 	 * entries, and thus is a superset of Hyper-V's fine grained flushing.
 	 */
-	kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+	kvm_hv_vcpu_purge_flush_tlb(vcpu);
 
 	/*
 	 * Flush only the current ASID even if the TLB flush was invoked via
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 12e49e8566d49..72ac6bf05c8b4 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3425,7 +3425,7 @@ static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu)
 	 * Flushing all "guest" TLB is always a superset of Hyper-V's fine
 	 * grained flushing.
 	 */
-	kvm_clear_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+	kvm_hv_vcpu_purge_flush_tlb(vcpu);
 }
 
 
@@ -10256,7 +10256,14 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 
 		kvm_service_local_tlb_flush_requests(vcpu);
 
-		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+		/*
+		 * Fall back to a "full" guest flush if Hyper-V's precise
+		 * flushing fails.  Note, Hyper-V's flushing is per-vCPU, but
+		 * the flushes are considered "remote" and not "local" because
+		 * the requests can be initiated from other vCPUs.
+		 */
+		if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu) &&
+		    kvm_hv_vcpu_flush_tlb(vcpu))
 			kvm_vcpu_flush_tlb_guest(vcpu);
 
 		if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) {
-- 
GitLab


From 56b5354fd8f9173de2e1614864e5fb7bec8c50c4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 1 Nov 2022 15:53:48 +0100
Subject: [PATCH 1797/4122] KVM: x86: hyper-v: Add helper to read hypercall
 data for array

Move the guts of kvm_get_sparse_vp_set() to a helper so that the code for
reading a guest-provided array can be reused in the future, e.g. for
getting a list of virtual addresses whose TLB entries need to be flushed.

Opportunisticaly swap the order of the data and XMM adjustment so that
the XMM/gpa offsets are bundled together.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-11-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 53 +++++++++++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 9d9a5ff2d54bd..3ba7e2d2fbbdc 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1753,38 +1753,51 @@ struct kvm_hv_hcall {
 	sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
 };
 
-static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
-				 int consumed_xmm_halves,
-				 u64 *sparse_banks, gpa_t offset)
-{
-	u16 var_cnt;
-	int i;
 
-	if (hc->var_cnt > 64)
-		return -EINVAL;
-
-	/* Ignore banks that cannot possibly contain a legal VP index. */
-	var_cnt = min_t(u16, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS);
+static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
+			      u16 orig_cnt, u16 cnt_cap, u64 *data,
+			      int consumed_xmm_halves, gpa_t offset)
+{
+	/*
+	 * Preserve the original count when ignoring entries via a "cap", KVM
+	 * still needs to validate the guest input (though the non-XMM path
+	 * punts on the checks).
+	 */
+	u16 cnt = min(orig_cnt, cnt_cap);
+	int i, j;
 
 	if (hc->fast) {
 		/*
 		 * Each XMM holds two sparse banks, but do not count halves that
 		 * have already been consumed for hypercall parameters.
 		 */
-		if (hc->var_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
+		if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - consumed_xmm_halves)
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
-		for (i = 0; i < var_cnt; i++) {
-			int j = i + consumed_xmm_halves;
+
+		for (i = 0; i < cnt; i++) {
+			j = i + consumed_xmm_halves;
 			if (j % 2)
-				sparse_banks[i] = sse128_hi(hc->xmm[j / 2]);
+				data[i] = sse128_hi(hc->xmm[j / 2]);
 			else
-				sparse_banks[i] = sse128_lo(hc->xmm[j / 2]);
+				data[i] = sse128_lo(hc->xmm[j / 2]);
 		}
 		return 0;
 	}
 
-	return kvm_read_guest(kvm, hc->ingpa + offset, sparse_banks,
-			      var_cnt * sizeof(*sparse_banks));
+	return kvm_read_guest(kvm, hc->ingpa + offset, data,
+			      cnt * sizeof(*data));
+}
+
+static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
+				 u64 *sparse_banks, int consumed_xmm_halves,
+				 gpa_t offset)
+{
+	if (hc->var_cnt > 64)
+		return -EINVAL;
+
+	/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
+	return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
+				  sparse_banks, consumed_xmm_halves, offset);
 }
 
 static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
@@ -1895,7 +1908,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (!hc->var_cnt)
 			goto ret_success;
 
-		if (kvm_get_sparse_vp_set(kvm, hc, 2, sparse_banks,
+		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2,
 					  offsetof(struct hv_tlb_flush_ex,
 						   hv_vp_set.bank_contents)))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
@@ -2006,7 +2019,7 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (!hc->var_cnt)
 			goto ret_success;
 
-		if (kvm_get_sparse_vp_set(kvm, hc, 1, sparse_banks,
+		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 1,
 					  offsetof(struct hv_send_ipi_ex,
 						   vp_set.bank_contents)))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
-- 
GitLab


From 260970862c88b4130e9e12be023c7e2c2d37a966 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:49 +0100
Subject: [PATCH 1798/4122] KVM: x86: hyper-v: Handle
 HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls gently

Currently, HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX} calls are handled
the exact same way as HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE{,EX}: by
flushing the whole VPID and this is sub-optimal. Switch to handling
these requests with 'flush_tlb_gva()' hooks instead. Use the newly
introduced TLB flush fifo to queue the requests.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-12-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 111 ++++++++++++++++++++++++++++++++++++------
 1 file changed, 95 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3ba7e2d2fbbdc..6868c478617c0 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1800,7 +1800,14 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
 				  sparse_banks, consumed_xmm_halves, offset);
 }
 
-static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
+static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[],
+					int consumed_xmm_halves, gpa_t offset)
+{
+	return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt,
+				  entries, consumed_xmm_halves, offset);
+}
+
+static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count)
 {
 	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
@@ -1811,24 +1818,64 @@ static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu)
 
 	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
 
-	kfifo_in_spinlocked_noirqsave(&tlb_flush_fifo->entries, &flush_all_entry,
-				      1, &tlb_flush_fifo->write_lock);
+	spin_lock(&tlb_flush_fifo->write_lock);
+
+	/*
+	 * All entries should fit on the fifo leaving one free for 'flush all'
+	 * entry in case another request comes in. In case there's not enough
+	 * space, just put 'flush all' entry there.
+	 */
+	if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) {
+		WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count);
+		goto out_unlock;
+	}
+
+	/*
+	 * Note: full fifo always contains 'flush all' entry, no need to check the
+	 * return value.
+	 */
+	kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1);
+
+out_unlock:
+	spin_unlock(&tlb_flush_fifo->write_lock);
 }
 
 int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE];
+	int i, j, count;
+	gva_t gva;
 
-	if (!hv_vcpu)
+	if (!tdp_enabled || !hv_vcpu)
 		return -EINVAL;
 
 	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
 
+	count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE);
+
+	for (i = 0; i < count; i++) {
+		if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
+			goto out_flush_all;
+
+		/*
+		 * Lower 12 bits of 'address' encode the number of additional
+		 * pages to flush.
+		 */
+		gva = entries[i] & PAGE_MASK;
+		for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++)
+			static_call(kvm_x86_flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE);
+
+		++vcpu->stat.tlb_flush;
+	}
+	return 0;
+
+out_flush_all:
 	kfifo_reset_out(&tlb_flush_fifo->entries);
 
-	/* Precise flushing isn't implemented yet. */
-	return -EOPNOTSUPP;
+	/* Fall back to full flush. */
+	return -ENOSPC;
 }
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
@@ -1837,11 +1884,21 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
 	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+	/*
+	 * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
+	 * entries on the TLB flush fifo. The last entry, however, needs to be
+	 * always left free for 'flush all' entry which gets placed when
+	 * there is not enough space to put all the requested entries.
+	 */
+	u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1];
+	u64 *tlb_flush_entries;
 	u64 valid_bank_mask;
 	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	struct kvm_vcpu *v;
 	unsigned long i;
 	bool all_cpus;
+	int consumed_xmm_halves = 0;
+	gpa_t data_offset;
 
 	/*
 	 * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the
@@ -1857,10 +1914,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 			flush.address_space = hc->ingpa;
 			flush.flags = hc->outgpa;
 			flush.processor_mask = sse128_lo(hc->xmm[0]);
+			consumed_xmm_halves = 1;
 		} else {
 			if (unlikely(kvm_read_guest(kvm, hc->ingpa,
 						    &flush, sizeof(flush))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+			data_offset = sizeof(flush);
 		}
 
 		trace_kvm_hv_flush_tlb(flush.processor_mask,
@@ -1884,10 +1943,12 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 			flush_ex.flags = hc->outgpa;
 			memcpy(&flush_ex.hv_vp_set,
 			       &hc->xmm[0], sizeof(hc->xmm[0]));
+			consumed_xmm_halves = 2;
 		} else {
 			if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
 						    sizeof(flush_ex))))
 				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+			data_offset = sizeof(flush_ex);
 		}
 
 		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
@@ -1902,26 +1963,44 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		if (hc->var_cnt != hweight64(valid_bank_mask))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
-		if (all_cpus)
-			goto do_flush;
+		if (!all_cpus) {
+			if (!hc->var_cnt)
+				goto ret_success;
 
-		if (!hc->var_cnt)
-			goto ret_success;
+			if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks,
+						  consumed_xmm_halves, data_offset))
+				return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		}
+
+		/*
+		 * Hyper-V TLFS doesn't explicitly forbid non-empty sparse vCPU
+		 * banks (and, thus, non-zero 'var_cnt') for the 'all vCPUs'
+		 * case (HV_GENERIC_SET_ALL).  Always adjust data_offset and
+		 * consumed_xmm_halves to make sure TLB flush entries are read
+		 * from the correct offset.
+		 */
+		data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
+		consumed_xmm_halves += hc->var_cnt;
+	}
 
-		if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks, 2,
-					  offsetof(struct hv_tlb_flush_ex,
-						   hv_vp_set.bank_contents)))
+	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
+	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
+	    hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
+		tlb_flush_entries = NULL;
+	} else {
+		if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries,
+						consumed_xmm_halves, data_offset))
 			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+		tlb_flush_entries = __tlb_flush_entries;
 	}
 
-do_flush:
 	/*
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
 	if (all_cpus) {
 		kvm_for_each_vcpu(i, v, kvm)
-			hv_tlb_flush_enqueue(v);
+			hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
 
 		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
 	} else {
@@ -1931,7 +2010,7 @@ do_flush:
 			v = kvm_get_vcpu(kvm, i);
 			if (!v)
 				continue;
-			hv_tlb_flush_enqueue(v);
+			hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
 		}
 
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
-- 
GitLab


From f84fcb66568c0b00626f7f03e28db7d0dcba8098 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:50 +0100
Subject: [PATCH 1799/4122] KVM: x86: hyper-v: Expose support for extended gva
 ranges for flush hypercalls

Extended GVA ranges support bit seems to indicate whether lower 12
bits of GVA can be used to specify up to 4095 additional consequent
GVAs to flush. This is somewhat described in TLFS.

Previously, KVM was handling HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST{,EX}
requests by flushing the whole VPID so technically, extended GVA
ranges were already supported. As such requests are handled more
gently now, advertizing support for extended ranges starts making
sense to reduce the size of TLB flush requests.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-13-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h | 2 ++
 arch/x86/kvm/hyperv.c              | 1 +
 2 files changed, 3 insertions(+)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index c5e0e5a06c0dc..6639979302ab7 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -61,6 +61,8 @@
 #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE		BIT(10)
 /* Support for debug MSRs available */
 #define HV_FEATURE_DEBUG_MSRS_AVAILABLE			BIT(11)
+/* Support for extended gva ranges for flush hypercalls available */
+#define HV_FEATURE_EXT_GVA_RANGES_FLUSH			BIT(14)
 /*
  * Support for returning hypercall output block via XMM
  * registers is available
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 6868c478617c0..fca9c51891f5f 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2641,6 +2641,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 			ent->ebx |= HV_DEBUGGING;
 			ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
 			ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
+			ent->edx |= HV_FEATURE_EXT_GVA_RANGES_FLUSH;
 
 			/*
 			 * Direct Synthetic timers only make sense with in-kernel
-- 
GitLab


From aee738236dca0d0870789138ec494e15d6303566 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:51 +0100
Subject: [PATCH 1800/4122] KVM: x86: Prepare kvm_hv_flush_tlb() to handle L2's
 GPAs

To handle L2 TLB flush requests, KVM needs to translate the specified
L2 GPA to L1 GPA to read hypercall arguments from there.

No functional change as KVM doesn't handle VMCALL/VMMCALL from L2 yet.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-14-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index fca9c51891f5f..cb145987f5b85 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -23,6 +23,7 @@
 #include "ioapic.h"
 #include "cpuid.h"
 #include "hyperv.h"
+#include "mmu.h"
 #include "xen.h"
 
 #include <linux/cpu.h>
@@ -1908,6 +1909,19 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	 */
 	BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64);
 
+	/*
+	 * 'Slow' hypercall's first parameter is the address in guest's memory
+	 * where hypercall parameters are placed. This is either a GPA or a
+	 * nested GPA when KVM is handling the call from L2 ('direct' TLB
+	 * flush).  Translate the address here so the memory can be uniformly
+	 * read with kvm_read_guest().
+	 */
+	if (!hc->fast && is_guest_mode(vcpu)) {
+		hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL);
+		if (unlikely(hc->ingpa == INVALID_GPA))
+			return HV_STATUS_INVALID_HYPERCALL_INPUT;
+	}
+
 	if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
 	    hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {
 		if (hc->fast) {
-- 
GitLab


From bd19c94a19b09b563a20862c651859f6e3d73847 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:52 +0100
Subject: [PATCH 1801/4122] x86/hyperv: Introduce
 HV_MAX_SPARSE_VCPU_BANKS/HV_VCPUS_PER_SPARSE_BANK constants

It may not come clear from where the magical '64' value used in
__cpumask_to_vpset() come from. Moreover, '64' means both the maximum
sparse bank number as well as the number of vCPUs per bank. Add defines
to make things clear. These defines are also going to be used by KVM.

No functional change.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-15-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/asm-generic/hyperv-tlfs.h |  5 +++++
 include/asm-generic/mshyperv.h    | 11 ++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index fdce7a4cfc6ff..020ca9bdbb79a 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -399,6 +399,11 @@ struct hv_vpset {
 	u64 bank_contents[];
 } __packed;
 
+/* The maximum number of sparse vCPU banks which can be encoded by 'struct hv_vpset' */
+#define HV_MAX_SPARSE_VCPU_BANKS (64)
+/* The number of vCPUs in one sparse bank */
+#define HV_VCPUS_PER_SPARSE_BANK (64)
+
 /* HvCallSendSyntheticClusterIpi hypercall */
 struct hv_send_ipi {
 	u32 vector;
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215b..d55d2833a37b7 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -211,9 +211,10 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
 {
 	int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
 	int this_cpu = smp_processor_id();
+	int max_vcpu_bank = hv_max_vp_index / HV_VCPUS_PER_SPARSE_BANK;
 
-	/* valid_bank_mask can represent up to 64 banks */
-	if (hv_max_vp_index / 64 >= 64)
+	/* vpset.valid_bank_mask can represent up to HV_MAX_SPARSE_VCPU_BANKS banks */
+	if (max_vcpu_bank >= HV_MAX_SPARSE_VCPU_BANKS)
 		return 0;
 
 	/*
@@ -221,7 +222,7 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
 	 * structs are not cleared between calls, we risk flushing unneeded
 	 * vCPUs otherwise.
 	 */
-	for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
+	for (vcpu_bank = 0; vcpu_bank <= max_vcpu_bank; vcpu_bank++)
 		vpset->bank_contents[vcpu_bank] = 0;
 
 	/*
@@ -233,8 +234,8 @@ static inline int __cpumask_to_vpset(struct hv_vpset *vpset,
 		vcpu = hv_cpu_number_to_vp_number(cpu);
 		if (vcpu == VP_INVAL)
 			return -1;
-		vcpu_bank = vcpu / 64;
-		vcpu_offset = vcpu % 64;
+		vcpu_bank = vcpu / HV_VCPUS_PER_SPARSE_BANK;
+		vcpu_offset = vcpu % HV_VCPUS_PER_SPARSE_BANK;
 		__set_bit(vcpu_offset, (unsigned long *)
 			  &vpset->bank_contents[vcpu_bank]);
 		if (vcpu_bank >= nr_bank)
-- 
GitLab


From ca7372aca7f4b2f1b29a9941053999d224d1e7c7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:53 +0100
Subject: [PATCH 1802/4122] KVM: x86: hyper-v: Use
 HV_MAX_SPARSE_VCPU_BANKS/HV_VCPUS_PER_SPARSE_BANK instead of raw '64'

It may not be clear from where the '64' limit for the maximum sparse
bank number comes from, use HV_MAX_SPARSE_VCPU_BANKS define instead.
Use HV_VCPUS_PER_SPARSE_BANK in KVM_HV_MAX_SPARSE_VCPU_SET_BITS's
definition. Opportunistically adjust the comment around BUILD_BUG_ON().

No functional change.

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-16-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index cb145987f5b85..2fceb85687c13 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -40,7 +40,7 @@
 #include "irq.h"
 #include "fpu.h"
 
-#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, 64)
+#define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK)
 
 static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
 				bool vcpu_kick);
@@ -1793,7 +1793,7 @@ static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
 				 u64 *sparse_banks, int consumed_xmm_halves,
 				 gpa_t offset)
 {
-	if (hc->var_cnt > 64)
+	if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
 		return -EINVAL;
 
 	/* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
@@ -1902,12 +1902,11 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	gpa_t data_offset;
 
 	/*
-	 * The Hyper-V TLFS doesn't allow more than 64 sparse banks, e.g. the
-	 * valid mask is a u64.  Fail the build if KVM's max allowed number of
-	 * vCPUs (>4096) would exceed this limit, KVM will additional changes
-	 * for Hyper-V support to avoid setting the guest up to fail.
+	 * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
+	 * sparse banks. Fail the build if KVM's max allowed number of
+	 * vCPUs (>4096) exceeds this limit.
 	 */
-	BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > 64);
+	BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS);
 
 	/*
 	 * 'Slow' hypercall's first parameter is the address in guest's memory
-- 
GitLab


From b6c2c22fa7012616b3039c9f559bf01195137b9d Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:54 +0100
Subject: [PATCH 1803/4122] KVM: x86: hyper-v: Don't use
 sparse_set_to_vcpu_mask() in kvm_hv_send_ipi()

Get rid of on-stack allocation of vcpu_mask and optimize kvm_hv_send_ipi()
for a smaller number of vCPUs in the request. When Hyper-V TLB flush
is in  use, HvSendSyntheticClusterIpi{,Ex} calls are not commonly used to
send IPIs to a large number of vCPUs (and are rarely used in general).

Introduce hv_is_vp_in_sparse_set() to directly check if the specified
VP_ID is present in sparse vCPU set.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-17-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2fceb85687c13..0bfa59838e0a5 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1741,6 +1741,28 @@ static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
 	}
 }
 
+static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[])
+{
+	int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK;
+	unsigned long sbank;
+
+	if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask))
+		return false;
+
+	/*
+	 * The index into the sparse bank is the number of preceding bits in
+	 * the valid mask.  Optimize for VMs with <64 vCPUs by skipping the
+	 * fancy math if there can't possibly be preceding bits.
+	 */
+	if (valid_bit_nr)
+		sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0));
+	else
+		sbank = 0;
+
+	return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK,
+			(unsigned long *)&sparse_banks[sbank]);
+}
+
 struct kvm_hv_hcall {
 	u64 param;
 	u64 ingpa;
@@ -2035,8 +2057,8 @@ ret_success:
 		((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
 }
 
-static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
-				 unsigned long *vcpu_bitmap)
+static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
+				    u64 *sparse_banks, u64 valid_bank_mask)
 {
 	struct kvm_lapic_irq irq = {
 		.delivery_mode = APIC_DM_FIXED,
@@ -2046,7 +2068,9 @@ static void kvm_send_ipi_to_many(struct kvm *kvm, u32 vector,
 	unsigned long i;
 
 	kvm_for_each_vcpu(i, vcpu, kvm) {
-		if (vcpu_bitmap && !test_bit(i, vcpu_bitmap))
+		if (sparse_banks &&
+		    !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu),
+					    valid_bank_mask, sparse_banks))
 			continue;
 
 		/* We fail only when APIC is disabled */
@@ -2059,7 +2083,6 @@ static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
-	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
 	u64 valid_bank_mask;
 	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	u32 vector;
@@ -2121,13 +2144,10 @@ check_and_send_ipi:
 	if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
 		return HV_STATUS_INVALID_HYPERCALL_INPUT;
 
-	if (all_cpus) {
-		kvm_send_ipi_to_many(kvm, vector, NULL);
-	} else {
-		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
-
-		kvm_send_ipi_to_many(kvm, vector, vcpu_mask);
-	}
+	if (all_cpus)
+		kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0);
+	else
+		kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask);
 
 ret_success:
 	return HV_STATUS_SUCCESS;
-- 
GitLab


From 53ca765a041d5a24650d3f01bced791be5d72df7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:55 +0100
Subject: [PATCH 1804/4122] KVM: x86: hyper-v: Create a separate fifo for L2
 TLB flush

To handle L2 TLB flush requests, KVM needs to use a separate fifo from
regular (L1) Hyper-V TLB flush requests: e.g. when a request to flush
something in L2 is made, the target vCPU can transition from L2 to L1,
receive a request to flush a GVA for L1 and then try to enter L2 back.
The first request needs to be processed at this point. Similarly,
requests to flush GVAs in L1 must wait until L2 exits to L1.

No functional change as KVM doesn't handle L2 TLB flush requests from
L2 yet.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-18-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  8 +++++++-
 arch/x86/kvm/hyperv.c           | 11 +++++++----
 arch/x86/kvm/hyperv.h           | 19 ++++++++++++++++---
 3 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 3e35dcf40dc7c..89f9c98ff4456 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -631,6 +631,12 @@ struct kvm_vcpu_hv_synic {
  */
 #define KVM_HV_TLB_FLUSHALL_ENTRY  ((u64)-1)
 
+enum hv_tlb_flush_fifos {
+	HV_L1_TLB_FLUSH_FIFO,
+	HV_L2_TLB_FLUSH_FIFO,
+	HV_NR_TLB_FLUSH_FIFOS,
+};
+
 struct kvm_vcpu_hv_tlb_flush_fifo {
 	spinlock_t write_lock;
 	DECLARE_KFIFO(entries, u64, KVM_HV_TLB_FLUSH_FIFO_SIZE);
@@ -658,7 +664,7 @@ struct kvm_vcpu_hv {
 		u32 nested_ebx; /* HYPERV_CPUID_NESTED_FEATURES.EBX */
 	} cpuid_cache;
 
-	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo;
+	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 0bfa59838e0a5..9898463103032 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -956,8 +956,10 @@ int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
 
 	hv_vcpu->vp_index = vcpu->vcpu_idx;
 
-	INIT_KFIFO(hv_vcpu->tlb_flush_fifo.entries);
-	spin_lock_init(&hv_vcpu->tlb_flush_fifo.write_lock);
+	for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) {
+		INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries);
+		spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock);
+	}
 
 	return 0;
 }
@@ -1839,7 +1841,8 @@ static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count)
 	if (!hv_vcpu)
 		return;
 
-	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+	/* kvm_hv_flush_tlb() is not ready to handle requests for L2s yet */
+	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo[HV_L1_TLB_FLUSH_FIFO];
 
 	spin_lock(&tlb_flush_fifo->write_lock);
 
@@ -1874,7 +1877,7 @@ int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	if (!tdp_enabled || !hv_vcpu)
 		return -EINVAL;
 
-	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+	tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu));
 
 	count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE);
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index f79edf9234cd4..8942e8c6c912e 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -22,6 +22,7 @@
 #define __ARCH_X86_KVM_HYPERV_H__
 
 #include <linux/kvm_host.h>
+#include "x86.h"
 
 /* "Hv#1" signature */
 #define HYPERV_CPUID_SIGNATURE_EAX 0x31237648
@@ -151,15 +152,27 @@ int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args);
 int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 		     struct kvm_cpuid_entry2 __user *entries);
 
+static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struct kvm_vcpu *vcpu,
+									   bool is_guest_mode)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO :
+				HV_L1_TLB_FLUSH_FIFO;
+
+	/* KVM does not handle L2 TLB flush requests yet */
+	WARN_ON_ONCE(i != HV_L1_TLB_FLUSH_FIFO);
+
+	return &hv_vcpu->tlb_flush_fifo[i];
+}
+
 static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
-	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-	if (!hv_vcpu || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
+	if (!to_hv_vcpu(vcpu) || !kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu))
 		return;
 
-	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo;
+	tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu));
 
 	kfifo_reset_out(&tlb_flush_fifo->entries);
 }
-- 
GitLab


From 7d5e88d301f84a7b64602dbe3640f288223095ea Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:56 +0100
Subject: [PATCH 1805/4122] KVM: x86: hyper-v: Use preallocated buffer in
 'struct kvm_vcpu_hv' instead of on-stack 'sparse_banks'

To make kvm_hv_flush_tlb() ready to handle L2 TLB flush requests, KVM needs
to allow for all 64 sparse vCPU banks regardless of KVM_MAX_VCPUs as L1
may use vCPU overcommit for L2. To avoid growing on-stack allocation, make
'sparse_banks' part of per-vCPU 'struct kvm_vcpu_hv' which is allocated
dynamically.

Note: sparse_set_to_vcpu_mask() can't currently be used to handle L2
requests as KVM does not keep L2 VM_ID -> L2 VCPU_ID -> L1 vCPU mappings,
i.e. its vp_bitmap array is still bounded by the number of L1 vCPUs and so
can remain an on-stack allocation.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-19-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 3 +++
 arch/x86/kvm/hyperv.c           | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 89f9c98ff4456..4596f19f927b1 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -665,6 +665,9 @@ struct kvm_vcpu_hv {
 	} cpuid_cache;
 
 	struct kvm_vcpu_hv_tlb_flush_fifo tlb_flush_fifo[HV_NR_TLB_FLUSH_FIFOS];
+
+	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
+	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 9898463103032..058e14564389d 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -1906,6 +1906,8 @@ out_flush_all:
 
 static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 *sparse_banks = hv_vcpu->sparse_banks;
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
@@ -1919,7 +1921,6 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1];
 	u64 *tlb_flush_entries;
 	u64 valid_bank_mask;
-	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	struct kvm_vcpu *v;
 	unsigned long i;
 	bool all_cpus;
@@ -2083,11 +2084,12 @@ static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
 
 static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u64 *sparse_banks = hv_vcpu->sparse_banks;
 	struct kvm *kvm = vcpu->kvm;
 	struct hv_send_ipi_ex send_ipi_ex;
 	struct hv_send_ipi send_ipi;
 	u64 valid_bank_mask;
-	u64 sparse_banks[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
 	u32 vector;
 	bool all_cpus;
 
-- 
GitLab


From 38edb45231832ef2aa191e2f3f77e30ad0bb4b61 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:57 +0100
Subject: [PATCH 1806/4122] KVM: nVMX: Keep track of hv_vm_id/hv_vp_id when
 eVMCS is in use

To handle L2 TLB flush requests, KVM needs to keep track of L2's VM_ID/
VP_IDs which are set by L1 hypervisor. 'Partition assist page' address is
also needed to handle post-flush exit to L1 upon request.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-20-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  6 ++++++
 arch/x86/kvm/vmx/nested.c       | 15 +++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4596f19f927b1..63dad1e129698 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -668,6 +668,12 @@ struct kvm_vcpu_hv {
 
 	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
 	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
+
+	struct {
+		u64 pa_page_gpa;
+		u64 vm_id;
+		u32 vp_id;
+	} nested;
 };
 
 /* Xen HVM per vcpu emulation context */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 048b2c3e3b3fc..cce68fd5befbc 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -225,6 +225,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
 
 static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
 {
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
@@ -233,6 +234,12 @@ static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
 	}
 
 	vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
+
+	if (hv_vcpu) {
+		hv_vcpu->nested.pa_page_gpa = INVALID_GPA;
+		hv_vcpu->nested.vm_id = 0;
+		hv_vcpu->nested.vp_id = 0;
+	}
 }
 
 static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
@@ -1557,11 +1564,19 @@ static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields
 {
 	struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
 	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(&vmx->vcpu);
 
 	/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
 	vmcs12->tpr_threshold = evmcs->tpr_threshold;
 	vmcs12->guest_rip = evmcs->guest_rip;
 
+	if (unlikely(!(hv_clean_fields &
+		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL))) {
+		hv_vcpu->nested.pa_page_gpa = evmcs->partition_assist_page;
+		hv_vcpu->nested.vm_id = evmcs->hv_vm_id;
+		hv_vcpu->nested.vp_id = evmcs->hv_vp_id;
+	}
+
 	if (unlikely(!(hv_clean_fields &
 		       HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
 		vmcs12->guest_rsp = evmcs->guest_rsp;
-- 
GitLab


From e45aa2444d280747d27d4d98685d761125c4e364 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:58 +0100
Subject: [PATCH 1807/4122] KVM: nSVM: Keep track of Hyper-V hv_vm_id/hv_vp_id

Similar to nSVM, KVM needs to know L2's VM_ID/VP_ID and Partition
assist page address to handle L2 TLB flush requests.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-21-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/svm/hyperv.h | 15 +++++++++++++++
 arch/x86/kvm/svm/nested.c |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index c59544cdf03b7..e97d80974e720 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -9,5 +9,20 @@
 #include <asm/mshyperv.h>
 
 #include "../hyperv.h"
+#include "svm.h"
+
+static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu)
+		return;
+
+	hv_vcpu->nested.pa_page_gpa = hve->partition_assist_page;
+	hv_vcpu->nested.vm_id = hve->hv_vm_id;
+	hv_vcpu->nested.vp_id = hve->hv_vp_id;
+}
 
 #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index cc8f47e7e294d..aa36c349da430 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -800,6 +800,8 @@ int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
 	if (kvm_vcpu_apicv_active(vcpu))
 		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
 
+	nested_svm_hv_update_vm_vp_ids(vcpu);
+
 	return 0;
 }
 
-- 
GitLab


From b0c9c25e46252a576a974dd659f2396774e0dbb1 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:53:59 +0100
Subject: [PATCH 1808/4122] KVM: x86: Introduce
 .hv_inject_synthetic_vmexit_post_tlb_flush() nested hook

Hyper-V supports injecting synthetic L2->L1 exit after performing
L2 TLB flush operation but the procedure is vendor specific. Introduce
.hv_inject_synthetic_vmexit_post_tlb_flush nested hook for it.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-22-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/Makefile           |  3 ++-
 arch/x86/kvm/svm/hyperv.c       | 11 +++++++++++
 arch/x86/kvm/svm/hyperv.h       |  2 ++
 arch/x86/kvm/svm/nested.c       |  1 +
 arch/x86/kvm/vmx/hyperv.c       |  4 ++++
 arch/x86/kvm/vmx/hyperv.h       |  1 +
 arch/x86/kvm/vmx/nested.c       |  1 +
 8 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/svm/hyperv.c

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 63dad1e129698..ebf90f4f1a21e 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1722,6 +1722,7 @@ struct kvm_x86_nested_ops {
 	int (*enable_evmcs)(struct kvm_vcpu *vcpu,
 			    uint16_t *vmcs_version);
 	uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu);
+	void (*hv_inject_synthetic_vmexit_post_tlb_flush)(struct kvm_vcpu *vcpu);
 };
 
 struct kvm_x86_init_ops {
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 4cf407563feea..80e3fe184d17e 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -26,7 +26,8 @@ kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/hyperv.o vmx/nested.o vmx/posted_intr.o
 kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
 
-kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
+kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
+			   svm/sev.o svm/hyperv.o
 
 ifdef CONFIG_HYPERV
 kvm-amd-y		+= svm/svm_onhyperv.o
diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c
new file mode 100644
index 0000000000000..911f51021af1f
--- /dev/null
+++ b/arch/x86/kvm/svm/hyperv.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD SVM specific code for Hyper-V on KVM.
+ *
+ * Copyright 2022 Red Hat, Inc. and/or its affiliates.
+ */
+#include "hyperv.h"
+
+void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
+{
+}
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index e97d80974e720..7564bdf652e47 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -25,4 +25,6 @@ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu)
 	hv_vcpu->nested.vp_id = hve->hv_vp_id;
 }
 
+void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
+
 #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index aa36c349da430..748e4de40c8fa 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -1730,4 +1730,5 @@ struct kvm_x86_nested_ops svm_nested_ops = {
 	.get_nested_state_pages = svm_get_nested_state_pages,
 	.get_state = svm_get_nested_state,
 	.set_state = svm_set_nested_state,
+	.hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush,
 };
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index 5e239158174ea..f05464db4fdc1 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -506,3 +506,7 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 
 	return 0;
 }
+
+void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
+{
+}
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 99a151af7a817..8efaffe9215bf 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -243,5 +243,6 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
 void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
 
 #endif /* __KVM_X86_VMX_HYPERV_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index cce68fd5befbc..396712d13211f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6995,4 +6995,5 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
 	.write_log_dirty = nested_vmx_write_pml_buffer,
 	.enable_evmcs = nested_enable_evmcs,
 	.get_evmcs_version = nested_get_evmcs_version,
+	.hv_inject_synthetic_vmexit_post_tlb_flush = vmx_hv_inject_synthetic_vmexit_post_tlb_flush,
 };
-- 
GitLab


From 3c9eb0655fc03fb5e84f1db334ebc832d9c5ac31 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:00 +0100
Subject: [PATCH 1809/4122] KVM: x86: hyper-v: Introduce
 kvm_hv_is_tlb_flush_hcall()

The newly introduced helper checks whether vCPU is performing a
Hyper-V TLB flush hypercall. This is required to filter out L2 TLB
flush hypercalls for processing.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-23-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 8942e8c6c912e..5f9c76b45f468 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -177,6 +177,23 @@ static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu)
 	kfifo_reset_out(&tlb_flush_fifo->entries);
 }
 
+static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	u16 code;
+
+	if (!hv_vcpu)
+		return false;
+
+	code = is_64_bit_hypercall(vcpu) ? kvm_rcx_read(vcpu) :
+					   kvm_rax_read(vcpu);
+
+	return (code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
+		code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
+		code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
+		code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX);
+}
+
 int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
 
 #endif
-- 
GitLab


From c58a318f6090efe06e6702b8882e2026f44f620e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:01 +0100
Subject: [PATCH 1810/4122] KVM: x86: hyper-v: L2 TLB flush

Handle L2 TLB flush requests by going through all vCPUs and checking
whether there are vCPUs running the same VM_ID with a VP_ID specified
in the requests. Perform synthetic exit to L2 upon finish.

Note, while checking VM_ID/VP_ID of running vCPUs seem to be a bit
racy, we count on the fact that KVM flushes the whole L2 VPID upon
transition. Also, KVM_REQ_HV_TLB_FLUSH request needs to be done upon
transition between L1 and L2 to make sure all pending requests are
always processed.

For the reference, Hyper-V TLFS refers to the feature as "Direct
Virtual Flush".

Note, nVMX/nSVM code does not handle VMCALL/VMMCALL from L2 yet.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-24-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 80 ++++++++++++++++++++++++++++++++++++-------
 arch/x86/kvm/hyperv.h |  3 --
 arch/x86/kvm/trace.h  | 21 +++++++-----
 3 files changed, 80 insertions(+), 24 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 058e14564389d..3715a6f026a23 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -34,6 +34,7 @@
 #include <linux/eventfd.h>
 
 #include <asm/apicdef.h>
+#include <asm/mshyperv.h>
 #include <trace/events/kvm.h>
 
 #include "trace.h"
@@ -1832,18 +1833,16 @@ static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc
 				  entries, consumed_xmm_halves, offset);
 }
 
-static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu, u64 *entries, int count)
+static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
+				 struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo,
+				 u64 *entries, int count)
 {
-	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 	u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
 
 	if (!hv_vcpu)
 		return;
 
-	/* kvm_hv_flush_tlb() is not ready to handle requests for L2s yet */
-	tlb_flush_fifo = &hv_vcpu->tlb_flush_fifo[HV_L1_TLB_FLUSH_FIFO];
-
 	spin_lock(&tlb_flush_fifo->write_lock);
 
 	/*
@@ -1912,6 +1911,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	struct hv_tlb_flush_ex flush_ex;
 	struct hv_tlb_flush flush;
 	DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
+	struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
 	/*
 	 * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
 	 * entries on the TLB flush fifo. The last entry, however, needs to be
@@ -1962,7 +1962,8 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		}
 
 		trace_kvm_hv_flush_tlb(flush.processor_mask,
-				       flush.address_space, flush.flags);
+				       flush.address_space, flush.flags,
+				       is_guest_mode(vcpu));
 
 		valid_bank_mask = BIT_ULL(0);
 		sparse_banks[0] = flush.processor_mask;
@@ -1993,7 +1994,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 		trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
 					  flush_ex.hv_vp_set.format,
 					  flush_ex.address_space,
-					  flush_ex.flags);
+					  flush_ex.flags, is_guest_mode(vcpu));
 
 		valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
 		all_cpus = flush_ex.hv_vp_set.format !=
@@ -2037,19 +2038,57 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
 	 * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
 	 * analyze it here, flush TLB regardless of the specified address space.
 	 */
-	if (all_cpus) {
-		kvm_for_each_vcpu(i, v, kvm)
-			hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
+	if (all_cpus && !is_guest_mode(vcpu)) {
+		kvm_for_each_vcpu(i, v, kvm) {
+			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
+			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
+					     tlb_flush_entries, hc->rep_cnt);
+		}
 
 		kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
-	} else {
+	} else if (!is_guest_mode(vcpu)) {
 		sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
 
 		for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
 			v = kvm_get_vcpu(kvm, i);
 			if (!v)
 				continue;
-			hv_tlb_flush_enqueue(v, tlb_flush_entries, hc->rep_cnt);
+			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
+			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
+					     tlb_flush_entries, hc->rep_cnt);
+		}
+
+		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
+	} else {
+		struct kvm_vcpu_hv *hv_v;
+
+		bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
+
+		kvm_for_each_vcpu(i, v, kvm) {
+			hv_v = to_hv_vcpu(v);
+
+			/*
+			 * The following check races with nested vCPUs entering/exiting
+			 * and/or migrating between L1's vCPUs, however the only case when
+			 * KVM *must* flush the TLB is when the target L2 vCPU keeps
+			 * running on the same L1 vCPU from the moment of the request until
+			 * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other
+			 * cases, e.g. when the target L2 vCPU migrates to a different L1
+			 * vCPU or when the corresponding L1 vCPU temporary switches to a
+			 * different L2 vCPU while the request is being processed.
+			 */
+			if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id)
+				continue;
+
+			if (!all_cpus &&
+			    !hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask,
+						    sparse_banks))
+				continue;
+
+			__set_bit(i, vcpu_mask);
+			tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true);
+			hv_tlb_flush_enqueue(v, tlb_flush_fifo,
+					     tlb_flush_entries, hc->rep_cnt);
 		}
 
 		kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
@@ -2239,10 +2278,25 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
 
 static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
 {
+	u32 tlb_lock_count = 0;
+	int ret;
+
+	if (hv_result_success(result) && is_guest_mode(vcpu) &&
+	    kvm_hv_is_tlb_flush_hcall(vcpu) &&
+	    kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa,
+			   &tlb_lock_count, sizeof(tlb_lock_count)))
+		result = HV_STATUS_INVALID_HYPERCALL_INPUT;
+
 	trace_kvm_hv_hypercall_done(result);
 	kvm_hv_hypercall_set_result(vcpu, result);
 	++vcpu->stat.hypercalls;
-	return kvm_skip_emulated_instruction(vcpu);
+
+	ret = kvm_skip_emulated_instruction(vcpu);
+
+	if (tlb_lock_count)
+		kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu);
+
+	return ret;
 }
 
 static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 5f9c76b45f468..7706e203ff43e 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -159,9 +159,6 @@ static inline struct kvm_vcpu_hv_tlb_flush_fifo *kvm_hv_get_tlb_flush_fifo(struc
 	int i = is_guest_mode ? HV_L2_TLB_FLUSH_FIFO :
 				HV_L1_TLB_FLUSH_FIFO;
 
-	/* KVM does not handle L2 TLB flush requests yet */
-	WARN_ON_ONCE(i != HV_L1_TLB_FLUSH_FIFO);
-
 	return &hv_vcpu->tlb_flush_fifo[i];
 }
 
diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index bc25589ad5886..09f3392dd8300 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -1547,38 +1547,41 @@ TRACE_EVENT(kvm_hv_timer_state,
  * Tracepoint for kvm_hv_flush_tlb.
  */
 TRACE_EVENT(kvm_hv_flush_tlb,
-	TP_PROTO(u64 processor_mask, u64 address_space, u64 flags),
-	TP_ARGS(processor_mask, address_space, flags),
+	TP_PROTO(u64 processor_mask, u64 address_space, u64 flags, bool guest_mode),
+	TP_ARGS(processor_mask, address_space, flags, guest_mode),
 
 	TP_STRUCT__entry(
 		__field(u64, processor_mask)
 		__field(u64, address_space)
 		__field(u64, flags)
+		__field(bool, guest_mode)
 	),
 
 	TP_fast_assign(
 		__entry->processor_mask = processor_mask;
 		__entry->address_space = address_space;
 		__entry->flags = flags;
+		__entry->guest_mode = guest_mode;
 	),
 
-	TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx",
+	TP_printk("processor_mask 0x%llx address_space 0x%llx flags 0x%llx %s",
 		  __entry->processor_mask, __entry->address_space,
-		  __entry->flags)
+		  __entry->flags, __entry->guest_mode ? "(L2)" : "")
 );
 
 /*
  * Tracepoint for kvm_hv_flush_tlb_ex.
  */
 TRACE_EVENT(kvm_hv_flush_tlb_ex,
-	TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags),
-	TP_ARGS(valid_bank_mask, format, address_space, flags),
+	TP_PROTO(u64 valid_bank_mask, u64 format, u64 address_space, u64 flags, bool guest_mode),
+	TP_ARGS(valid_bank_mask, format, address_space, flags, guest_mode),
 
 	TP_STRUCT__entry(
 		__field(u64, valid_bank_mask)
 		__field(u64, format)
 		__field(u64, address_space)
 		__field(u64, flags)
+		__field(bool, guest_mode)
 	),
 
 	TP_fast_assign(
@@ -1586,12 +1589,14 @@ TRACE_EVENT(kvm_hv_flush_tlb_ex,
 		__entry->format = format;
 		__entry->address_space = address_space;
 		__entry->flags = flags;
+		__entry->guest_mode = guest_mode;
 	),
 
 	TP_printk("valid_bank_mask 0x%llx format 0x%llx "
-		  "address_space 0x%llx flags 0x%llx",
+		  "address_space 0x%llx flags 0x%llx %s",
 		  __entry->valid_bank_mask, __entry->format,
-		  __entry->address_space, __entry->flags)
+		  __entry->address_space, __entry->flags,
+		  __entry->guest_mode ? "(L2)" : "")
 );
 
 /*
-- 
GitLab


From d4baf1a9a572910d7b4cd63d23bf4be89b7648bd Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:02 +0100
Subject: [PATCH 1811/4122] KVM: x86: hyper-v: Introduce fast
 guest_hv_cpuid_has_l2_tlb_flush() check

Introduce a helper to quickly check if KVM needs to handle VMCALL/VMMCALL
from L2 in L0 to process L2 TLB flush requests.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-25-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 7706e203ff43e..bd698eb2bda10 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -174,6 +174,14 @@ static inline void kvm_hv_vcpu_purge_flush_tlb(struct kvm_vcpu *vcpu)
 	kfifo_reset_out(&tlb_flush_fifo->entries);
 }
 
+static inline bool guest_hv_cpuid_has_l2_tlb_flush(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	return hv_vcpu &&
+		(hv_vcpu->cpuid_cache.nested_eax & HV_X64_NESTED_DIRECT_FLUSH);
+}
+
 static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
-- 
GitLab


From 046f5756c49106471bc98bd32b87a62d0717ddda Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:03 +0100
Subject: [PATCH 1812/4122] KVM: nVMX: hyper-v: Cache VP assist page in 'struct
 kvm_vcpu_hv'

In preparation to enabling L2 TLB flush, cache VP assist page in
'struct kvm_vcpu_hv'. While on it, rename nested_enlightened_vmentry()
to nested_get_evmptr() and make it return eVMCS GPA directly.

No functional change intended.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-26-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/hyperv.c           | 10 ++++++----
 arch/x86/kvm/hyperv.h           |  3 +--
 arch/x86/kvm/vmx/hyperv.c       | 21 +++++++--------------
 arch/x86/kvm/vmx/hyperv.h       |  2 +-
 arch/x86/kvm/vmx/nested.c       |  6 +++---
 6 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ebf90f4f1a21e..d1013c4f673ca 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -669,6 +669,8 @@ struct kvm_vcpu_hv {
 	/* Preallocated buffer for handling hypercalls passing sparse vCPU set */
 	u64 sparse_banks[HV_MAX_SPARSE_VCPU_BANKS];
 
+	struct hv_vp_assist_page vp_assist_page;
+
 	struct {
 		u64 pa_page_gpa;
 		u64 vm_id;
diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 3715a6f026a23..ce245e37d08fc 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -900,13 +900,15 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
 
-bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
-			    struct hv_vp_assist_page *assist_page)
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
 {
-	if (!kvm_hv_assist_page_enabled(vcpu))
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu))
 		return false;
+
 	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
-				      assist_page, sizeof(*assist_page));
+				      &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
 }
 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index bd698eb2bda10..81313e418b80a 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -108,8 +108,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
-bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu,
-			    struct hv_vp_assist_page *assist_page);
+bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu);
 
 static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu,
 						      int timer_index)
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index f05464db4fdc1..bceca1a99804f 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -321,24 +321,17 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = {
 };
 const unsigned int nr_evmcs_1_fields = ARRAY_SIZE(vmcs_field_to_evmcs_1);
 
-bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
+u64 nested_get_evmptr(struct kvm_vcpu *vcpu)
 {
-	struct hv_vp_assist_page assist_page;
-
-	*evmcs_gpa = -1ull;
-
-	if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
-		return false;
-
-	if (unlikely(!assist_page.enlighten_vmentry))
-		return false;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-	if (unlikely(!evmptr_is_valid(assist_page.current_nested_vmcs)))
-		return false;
+	if (unlikely(!kvm_hv_get_assist_page(vcpu)))
+		return EVMPTR_INVALID;
 
-	*evmcs_gpa = assist_page.current_nested_vmcs;
+	if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry))
+		return EVMPTR_INVALID;
 
-	return true;
+	return hv_vcpu->vp_assist_page.current_nested_vmcs;
 }
 
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 8efaffe9215bf..8bf366730d333 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -237,7 +237,7 @@ enum nested_evmptrld_status {
 	EVMPTRLD_ERROR,
 };
 
-bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
+u64 nested_get_evmptr(struct kvm_vcpu *vcpu);
 uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
 int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 396712d13211f..38e6cb8abe627 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1992,7 +1992,8 @@ static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
 	if (likely(!guest_cpuid_has_evmcs(vcpu)))
 		return EVMPTRLD_DISABLED;
 
-	if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
+	evmcs_gpa = nested_get_evmptr(vcpu);
+	if (!evmptr_is_valid(evmcs_gpa)) {
 		nested_release_evmcs(vcpu);
 		return EVMPTRLD_DISABLED;
 	}
@@ -5221,7 +5222,6 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	u32 zero = 0;
 	gpa_t vmptr;
-	u64 evmcs_gpa;
 	int r;
 
 	if (!nested_vmx_check_permission(vcpu))
@@ -5247,7 +5247,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
 	 * vmx->nested.hv_evmcs but this shouldn't be a problem.
 	 */
 	if (likely(!guest_cpuid_has_evmcs(vcpu) ||
-		   !nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
+		   !evmptr_is_valid(nested_get_evmptr(vcpu)))) {
 		if (vmptr == vmx->nested.current_vmptr)
 			nested_release_vmcs12(vcpu);
 
-- 
GitLab


From c30e9bc8b606077142969a807ada42ca921e605a Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:04 +0100
Subject: [PATCH 1813/4122] KVM: nVMX: hyper-v: Enable L2 TLB flush

Enable L2 TLB flush feature on nVMX when:
- Enlightened VMCS is in use.
- The feature flag is enabled in eVMCS.
- The feature flag is enabled in partition assist page.

Perform synthetic vmexit to L1 after processing TLB flush call upon
request (HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH).

Note: nested_evmcs_l2_tlb_flush_enabled() uses cached VP assist page copy
which gets updated from nested_vmx_handle_enlightened_vmptrld(). This is
also guaranteed to happen post migration with eVMCS backed L2 running.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-27-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h |  9 +++++++++
 arch/x86/kvm/vmx/hyperv.c          | 17 +++++++++++++++++
 arch/x86/kvm/vmx/hyperv.h          |  1 +
 arch/x86/kvm/vmx/nested.c          | 20 ++++++++++++++++++++
 4 files changed, 47 insertions(+)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6639979302ab7..b25c6792d409c 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -600,6 +600,15 @@ struct hv_enlightened_vmcs {
 
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL			0xFFFF
 
+/*
+ * Note, Hyper-V isn't actually stealing bit 28 from Intel, just abusing it by
+ * pairing it with architecturally impossible exit reasons.  Bit 28 is set only
+ * on SMI exits to a SMI transfer monitor (STM) and if and only if a MTF VM-Exit
+ * is pending.  I.e. it will never be set by hardware for non-SMI exits (there
+ * are only three), nor will it ever be set unless the VMM is an STM.
+ */
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH		0x10000031
+
 /*
  * Hyper-V uses the software reserved 32 bytes in VMCB control area to expose
  * SVM enlightenments to guests.
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index bceca1a99804f..04a0bba58c7de 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -5,6 +5,7 @@
 
 #include "../cpuid.h"
 #include "hyperv.h"
+#include "nested.h"
 #include "vmcs.h"
 #include "vmx.h"
 #include "trace.h"
@@ -500,6 +501,22 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+{
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+	struct vcpu_vmx *vmx = to_vmx(vcpu);
+	struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
+
+	if (!hv_vcpu || !evmcs)
+		return false;
+
+	if (!evmcs->hv_enlightenments_control.nested_flush_hypercall)
+		return false;
+
+	return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
+}
+
 void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
 {
+	nested_vmx_vmexit(vcpu, HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH, 0, 0);
 }
diff --git a/arch/x86/kvm/vmx/hyperv.h b/arch/x86/kvm/vmx/hyperv.h
index 8bf366730d333..571e7929d14e7 100644
--- a/arch/x86/kvm/vmx/hyperv.h
+++ b/arch/x86/kvm/vmx/hyperv.h
@@ -243,6 +243,7 @@ int nested_enable_evmcs(struct kvm_vcpu *vcpu,
 			uint16_t *vmcs_version);
 void nested_evmcs_filter_control_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
 int nested_evmcs_check_controls(struct vmcs12 *vmcs12);
+bool nested_evmcs_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu);
 void vmx_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
 
 #endif /* __KVM_X86_VMX_HYPERV_H */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 38e6cb8abe627..b28be793de298 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -1132,6 +1132,15 @@ static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
+	/*
+	 * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
+	 * L2's VP_ID upon request from the guest. Make sure we check for
+	 * pending entries in the right FIFO upon L1/L2 transition as these
+	 * requests are put by other vCPUs asynchronously.
+	 */
+	if (to_hv_vcpu(vcpu) && enable_ept)
+		kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+
 	/*
 	 * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
 	 * for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
@@ -3267,6 +3276,12 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * Note: nested_get_evmcs_page() also updates 'vp_assist_page' copy
+	 * in 'struct kvm_vcpu_hv' in case eVMCS is in use, this is mandatory
+	 * to make nested_evmcs_l2_tlb_flush_enabled() work correctly post
+	 * migration.
+	 */
 	if (!nested_get_evmcs_page(vcpu)) {
 		pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
 				     __func__);
@@ -6144,6 +6159,11 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
 		 * Handle L2's bus locks in L0 directly.
 		 */
 		return true;
+	case EXIT_REASON_VMCALL:
+		/* Hyper-V L2 TLB flush hypercall is handled by L0 */
+		return guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+			nested_evmcs_l2_tlb_flush_enabled(vcpu) &&
+			kvm_hv_is_tlb_flush_hcall(vcpu);
 	default:
 		break;
 	}
-- 
GitLab


From b415d8d417bbe5403626b74e1041101ac23d602f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:05 +0100
Subject: [PATCH 1814/4122] KVM: x86: Make kvm_hv_get_assist_page() return
 0/-errno

Convert kvm_hv_get_assist_page() to return 'int' and propagate possible
errors from kvm_read_guest_cached().

Suggested-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-28-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c     | 8 ++++----
 arch/x86/kvm/hyperv.h     | 2 +-
 arch/x86/kvm/vmx/hyperv.c | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index ce245e37d08fc..15880da73a7b1 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -900,15 +900,15 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
 
-bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
+int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
 	if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu))
-		return false;
+		return -EFAULT;
 
-	return !kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
-				      &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
+	return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
+				     &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
 }
 EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
 
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 81313e418b80a..5157622c2fb3a 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -108,7 +108,7 @@ int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
 void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu);
-bool kvm_hv_get_assist_page(struct kvm_vcpu *vcpu);
+int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu);
 
 static inline struct kvm_vcpu_hv_stimer *to_hv_stimer(struct kvm_vcpu *vcpu,
 						      int timer_index)
diff --git a/arch/x86/kvm/vmx/hyperv.c b/arch/x86/kvm/vmx/hyperv.c
index 04a0bba58c7de..ae03d1fe03552 100644
--- a/arch/x86/kvm/vmx/hyperv.c
+++ b/arch/x86/kvm/vmx/hyperv.c
@@ -326,7 +326,7 @@ u64 nested_get_evmptr(struct kvm_vcpu *vcpu)
 {
 	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
 
-	if (unlikely(!kvm_hv_get_assist_page(vcpu)))
+	if (unlikely(kvm_hv_get_assist_page(vcpu)))
 		return EVMPTR_INVALID;
 
 	if (unlikely(!hv_vcpu->vp_assist_page.enlighten_vmentry))
-- 
GitLab


From 3f4a812edf5cb0a50e65fbdfafdb3e688da18f16 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:06 +0100
Subject: [PATCH 1815/4122] KVM: nSVM: hyper-v: Enable L2 TLB flush

Implement Hyper-V L2 TLB flush for nSVM. The feature needs to be enabled
both in extended 'nested controls' in VMCB and VP assist page.
According to Hyper-V TLFS, synthetic vmexit to L1 is performed with
- HV_SVM_EXITCODE_ENL exit_code.
- HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH exit_info_1.

Note: VP assist page is cached in 'struct kvm_vcpu_hv' so
recalc_intercepts() doesn't need to read from guest's memory. KVM
needs to update the case upon each VMRUN and after svm_set_nested_state
(svm_get_nested_state_pages()) to handle the case when the guest got
migrated while L2 was running.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-29-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/hyperv-tlfs.h |  4 ++++
 arch/x86/kvm/hyperv.h              | 11 ++++++++++
 arch/x86/kvm/svm/hyperv.c          |  7 ++++++
 arch/x86/kvm/svm/hyperv.h          | 15 +++++++++++++
 arch/x86/kvm/svm/nested.c          | 35 ++++++++++++++++++++++++++++--
 5 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b25c6792d409c..e3efaf6e6b628 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -631,6 +631,10 @@ struct hv_vmcb_enlightenments {
  */
 #define HV_VMCB_NESTED_ENLIGHTENMENTS		31
 
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL			0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH	(1)
+
 struct hv_partition_assist_pg {
 	u32 tlb_lock_count;
 };
diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h
index 5157622c2fb3a..9f96414a31c52 100644
--- a/arch/x86/kvm/hyperv.h
+++ b/arch/x86/kvm/hyperv.h
@@ -198,6 +198,17 @@ static inline bool kvm_hv_is_tlb_flush_hcall(struct kvm_vcpu *vcpu)
 		code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX);
 }
 
+static inline int kvm_hv_verify_vp_assist(struct kvm_vcpu *vcpu)
+{
+	if (!to_hv_vcpu(vcpu))
+		return 0;
+
+	if (!kvm_hv_assist_page_enabled(vcpu))
+		return 0;
+
+	return kvm_hv_get_assist_page(vcpu);
+}
+
 int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/arch/x86/kvm/svm/hyperv.c b/arch/x86/kvm/svm/hyperv.c
index 911f51021af1f..088f6429b24ce 100644
--- a/arch/x86/kvm/svm/hyperv.c
+++ b/arch/x86/kvm/svm/hyperv.c
@@ -8,4 +8,11 @@
 
 void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu)
 {
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->control.exit_code = HV_SVM_EXITCODE_ENL;
+	svm->vmcb->control.exit_code_hi = 0;
+	svm->vmcb->control.exit_info_1 = HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH;
+	svm->vmcb->control.exit_info_2 = 0;
+	nested_svm_vmexit(svm);
 }
diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h
index 7564bdf652e47..02f4784b5d446 100644
--- a/arch/x86/kvm/svm/hyperv.h
+++ b/arch/x86/kvm/svm/hyperv.h
@@ -25,6 +25,21 @@ static inline void nested_svm_hv_update_vm_vp_ids(struct kvm_vcpu *vcpu)
 	hv_vcpu->nested.vp_id = hve->hv_vp_id;
 }
 
+static inline bool nested_svm_l2_tlb_flush_enabled(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+	struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
+	struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+
+	if (!hv_vcpu)
+		return false;
+
+	if (!hve->hv_enlightenments_control.nested_flush_hypercall)
+		return false;
+
+	return hv_vcpu->vp_assist_page.nested_control.features.directhypercall;
+}
+
 void svm_hv_inject_synthetic_vmexit_post_tlb_flush(struct kvm_vcpu *vcpu);
 
 #endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 748e4de40c8fa..bc9cd7086fa97 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -150,8 +150,12 @@ void recalc_intercepts(struct vcpu_svm *svm)
 		vmcb_clr_intercept(c, INTERCEPT_VINTR);
 	}
 
-	/* We don't want to see VMMCALLs from a nested guest */
-	vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
+	/*
+	 * We want to see VMMCALLs from a nested guest only when Hyper-V L2 TLB
+	 * flush feature is enabled.
+	 */
+	if (!nested_svm_l2_tlb_flush_enabled(&svm->vcpu))
+		vmcb_clr_intercept(c, INTERCEPT_VMMCALL);
 
 	for (i = 0; i < MAX_INTERCEPT; i++)
 		c->intercepts[i] |= g->intercepts[i];
@@ -473,6 +477,15 @@ static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
 
 static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * KVM_REQ_HV_TLB_FLUSH flushes entries from either L1's VP_ID or
+	 * L2's VP_ID upon request from the guest. Make sure we check for
+	 * pending entries in the right FIFO upon L1/L2 transition as these
+	 * requests are put by other vCPUs asynchronously.
+	 */
+	if (to_hv_vcpu(vcpu) && npt_enabled)
+		kvm_make_request(KVM_REQ_HV_TLB_FLUSH, vcpu);
+
 	/*
 	 * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
 	 * things to fix before this can be conditional:
@@ -824,6 +837,13 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 		return 1;
 	}
 
+	/* This fails when VP assist page is enabled but the supplied GPA is bogus */
+	ret = kvm_hv_verify_vp_assist(vcpu);
+	if (ret) {
+		kvm_inject_gp(vcpu, 0);
+		return ret;
+	}
+
 	vmcb12_gpa = svm->vmcb->save.rax;
 	ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
 	if (ret == -EINVAL) {
@@ -1421,6 +1441,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu)
 int nested_svm_exit_special(struct vcpu_svm *svm)
 {
 	u32 exit_code = svm->vmcb->control.exit_code;
+	struct kvm_vcpu *vcpu = &svm->vcpu;
 
 	switch (exit_code) {
 	case SVM_EXIT_INTR:
@@ -1439,6 +1460,13 @@ int nested_svm_exit_special(struct vcpu_svm *svm)
 			return NESTED_EXIT_HOST;
 		break;
 	}
+	case SVM_EXIT_VMMCALL:
+		/* Hyper-V L2 TLB flush hypercall is handled by L0 */
+		if (guest_hv_cpuid_has_l2_tlb_flush(vcpu) &&
+		    nested_svm_l2_tlb_flush_enabled(vcpu) &&
+		    kvm_hv_is_tlb_flush_hcall(vcpu))
+			return NESTED_EXIT_HOST;
+		break;
 	default:
 		break;
 	}
@@ -1719,6 +1747,9 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 		return false;
 	}
 
+	if (kvm_hv_verify_vp_assist(vcpu))
+		return false;
+
 	return true;
 }
 
-- 
GitLab


From f4de6a1fa3ee81197239603756fc5c4259e5ef1b Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:07 +0100
Subject: [PATCH 1816/4122] KVM: x86: Expose Hyper-V L2 TLB flush feature

With both nSVM and nVMX implementations in place, KVM can now expose
Hyper-V L2 TLB flush feature to userspace.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-30-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 15880da73a7b1..2c7f2a26421e8 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -2779,6 +2779,7 @@ int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
 
 		case HYPERV_CPUID_NESTED_FEATURES:
 			ent->eax = evmcs_ver;
+			ent->eax |= HV_X64_NESTED_DIRECT_FLUSH;
 			ent->eax |= HV_X64_NESTED_MSR_BITMAP;
 			ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
 			break;
-- 
GitLab


From 676a863ce605caca3a559bdb8e40f640c15f1fde Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:08 +0100
Subject: [PATCH 1817/4122] KVM: selftests: Better XMM read/write helpers

set_xmm()/get_xmm() helpers are fairly useless as they only read 64 bits
from 128-bit registers. Moreover, these helpers are not used. Borrow
_kvm_read_sse_reg()/_kvm_write_sse_reg() from KVM limiting them to
XMM0-XMM8 for now.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-31-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h  | 70 ++++++++++---------
 1 file changed, 36 insertions(+), 34 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index f838ac5865dc5..a10f39affa45e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -603,71 +603,73 @@ static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
 	       !this_cpu_has(feature.anti_feature);
 }
 
-#define SET_XMM(__var, __xmm) \
-	asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+typedef u32		__attribute__((vector_size(16))) sse128_t;
+#define __sse128_u	union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x)	({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x)	({ __sse128_u t; t.vec = x; t.as_u64[1]; })
 
-static inline void set_xmm(int n, unsigned long val)
+static inline void read_sse_reg(int reg, sse128_t *data)
 {
-	switch (n) {
+	switch (reg) {
 	case 0:
-		SET_XMM(val, xmm0);
+		asm("movdqa %%xmm0, %0" : "=m"(*data));
 		break;
 	case 1:
-		SET_XMM(val, xmm1);
+		asm("movdqa %%xmm1, %0" : "=m"(*data));
 		break;
 	case 2:
-		SET_XMM(val, xmm2);
+		asm("movdqa %%xmm2, %0" : "=m"(*data));
 		break;
 	case 3:
-		SET_XMM(val, xmm3);
+		asm("movdqa %%xmm3, %0" : "=m"(*data));
 		break;
 	case 4:
-		SET_XMM(val, xmm4);
+		asm("movdqa %%xmm4, %0" : "=m"(*data));
 		break;
 	case 5:
-		SET_XMM(val, xmm5);
+		asm("movdqa %%xmm5, %0" : "=m"(*data));
 		break;
 	case 6:
-		SET_XMM(val, xmm6);
+		asm("movdqa %%xmm6, %0" : "=m"(*data));
 		break;
 	case 7:
-		SET_XMM(val, xmm7);
+		asm("movdqa %%xmm7, %0" : "=m"(*data));
 		break;
+	default:
+		BUG();
 	}
 }
 
-#define GET_XMM(__xmm)							\
-({									\
-	unsigned long __val;						\
-	asm volatile("movq %%"#__xmm", %0" : "=r"(__val));		\
-	__val;								\
-})
-
-static inline unsigned long get_xmm(int n)
+static inline void write_sse_reg(int reg, const sse128_t *data)
 {
-	assert(n >= 0 && n <= 7);
-
-	switch (n) {
+	switch (reg) {
 	case 0:
-		return GET_XMM(xmm0);
+		asm("movdqa %0, %%xmm0" : : "m"(*data));
+		break;
 	case 1:
-		return GET_XMM(xmm1);
+		asm("movdqa %0, %%xmm1" : : "m"(*data));
+		break;
 	case 2:
-		return GET_XMM(xmm2);
+		asm("movdqa %0, %%xmm2" : : "m"(*data));
+		break;
 	case 3:
-		return GET_XMM(xmm3);
+		asm("movdqa %0, %%xmm3" : : "m"(*data));
+		break;
 	case 4:
-		return GET_XMM(xmm4);
+		asm("movdqa %0, %%xmm4" : : "m"(*data));
+		break;
 	case 5:
-		return GET_XMM(xmm5);
+		asm("movdqa %0, %%xmm5" : : "m"(*data));
+		break;
 	case 6:
-		return GET_XMM(xmm6);
+		asm("movdqa %0, %%xmm6" : : "m"(*data));
+		break;
 	case 7:
-		return GET_XMM(xmm7);
+		asm("movdqa %0, %%xmm7" : : "m"(*data));
+		break;
+	default:
+		BUG();
 	}
-
-	/* never reached */
-	return 0;
 }
 
 static inline void cpu_relax(void)
-- 
GitLab


From c05a0a71c5d0aea010af19f21ccdc0d576066790 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:09 +0100
Subject: [PATCH 1818/4122] KVM: selftests: Move HYPERV_LINUX_OS_ID definition
 to a common header

HYPERV_LINUX_OS_ID needs to be written to HV_X64_MSR_GUEST_OS_ID by
each Hyper-V specific selftest.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-32-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/hyperv.h  | 3 +++
 tools/testing/selftests/kvm/x86_64/hyperv_features.c | 6 ++----
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index b66910702c0a7..f0a8a93694b2b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -185,4 +185,7 @@
 /* hypercall options */
 #define HV_HYPERCALL_FAST_BIT		BIT(16)
 
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
 #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 2b6d455acf8ae..6558cc61cf69f 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -13,8 +13,6 @@
 #include "processor.h"
 #include "hyperv.h"
 
-#define LINUX_OS_ID ((u64)0x8100 << 48)
-
 static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
 				vm_vaddr_t output_address, uint64_t *hv_status)
 {
@@ -72,7 +70,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 
 	GUEST_ASSERT(hcall->control);
 
-	wrmsr(HV_X64_MSR_GUEST_OS_ID, LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
 	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
 
 	if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
@@ -170,7 +168,7 @@ static void guest_test_msrs_access(void)
 			 */
 			msr->idx = HV_X64_MSR_GUEST_OS_ID;
 			msr->write = 1;
-			msr->write_val = LINUX_OS_ID;
+			msr->write_val = HYPERV_LINUX_OS_ID;
 			msr->available = 1;
 			break;
 		case 3:
-- 
GitLab


From caf4110fbaa89a20733facb062381e89961ce698 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:10 +0100
Subject: [PATCH 1819/4122] KVM: selftests: Move the function doing Hyper-V
 hypercall to a common header

All Hyper-V specific tests issuing hypercalls need this.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-33-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/hyperv.h     | 19 ++++++++++++++++++
 .../selftests/kvm/x86_64/hyperv_features.c    | 20 +------------------
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index f0a8a93694b2b..7ed8f4f5f7d89 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -185,6 +185,25 @@
 /* hypercall options */
 #define HV_HYPERCALL_FAST_BIT		BIT(16)
 
+static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+				       vm_vaddr_t output_address,
+				       uint64_t *hv_status)
+{
+	uint64_t error_code;
+	uint8_t vector;
+
+	/* Note both the hypercall and the "asm safe" clobber r9-r11. */
+	asm volatile("mov %[output_address], %%r8\n\t"
+		     KVM_ASM_SAFE("vmcall")
+		     : "=a" (*hv_status),
+		       "+c" (control), "+d" (input_address),
+		       KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+		     : [output_address] "r"(output_address),
+		       "a" (-EFAULT)
+		     : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+	return vector;
+}
+
 /* Proper HV_X64_MSR_GUEST_OS_ID value */
 #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
 
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 6558cc61cf69f..5ff4ff2365bb3 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -13,24 +13,6 @@
 #include "processor.h"
 #include "hyperv.h"
 
-static inline uint8_t hypercall(u64 control, vm_vaddr_t input_address,
-				vm_vaddr_t output_address, uint64_t *hv_status)
-{
-	uint64_t error_code;
-	uint8_t vector;
-
-	/* Note both the hypercall and the "asm safe" clobber r9-r11. */
-	asm volatile("mov %[output_address], %%r8\n\t"
-		     KVM_ASM_SAFE("vmcall")
-		     : "=a" (*hv_status),
-		       "+c" (control), "+d" (input_address),
-		       KVM_ASM_SAFE_OUTPUTS(vector, error_code)
-		     : [output_address] "r"(output_address),
-		       "a" (-EFAULT)
-		     : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
-	return vector;
-}
-
 struct msr_data {
 	uint32_t idx;
 	bool available;
@@ -80,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 		input = output = 0;
 	}
 
-	vector = hypercall(hcall->control, input, output, &res);
+	vector = hyperv_hypercall(hcall->control, input, output, &res);
 	if (hcall->ud_expected) {
 		GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
 	} else {
-- 
GitLab


From 998489245d8469c21f1517c4bbe192e0ef3c3374 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:11 +0100
Subject: [PATCH 1820/4122] KVM: selftests: Hyper-V PV IPI selftest

Introduce a selftest for Hyper-V PV IPI hypercalls
(HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx).

The test creates one 'sender' vCPU and two 'receiver' vCPU and then
issues various combinations of send IPI hypercalls in both 'normal'
and 'fast' (with XMM input where necessary) mode. Later, the test
checks whether IPIs were delivered to the expected destination vCPU[s].

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-34-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/hyperv.h     |  35 +-
 .../selftests/kvm/x86_64/hyperv_features.c    |   2 +-
 .../testing/selftests/kvm/x86_64/hyperv_ipi.c | 314 ++++++++++++++++++
 5 files changed, 349 insertions(+), 4 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_ipi.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 582e2e198fbf5..3b3218cb46ed6 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -25,6 +25,7 @@
 /x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
 /x86_64/hyperv_features
+/x86_64/hyperv_ipi
 /x86_64/hyperv_svm_test
 /x86_64/max_vcpuid_cap_test
 /x86_64/mmio_warning_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index f62dcfcda618c..4095b1212f08c 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -87,6 +87,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index 7ed8f4f5f7d89..c757e40011731 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -9,6 +9,8 @@
 #ifndef SELFTEST_KVM_HYPERV_H
 #define SELFTEST_KVM_HYPERV_H
 
+#include "processor.h"
+
 #define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS	0x40000000
 #define HYPERV_CPUID_INTERFACE			0x40000001
 #define HYPERV_CPUID_VERSION			0x40000002
@@ -184,10 +186,15 @@
 
 /* hypercall options */
 #define HV_HYPERCALL_FAST_BIT		BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET	17
 
-static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address,
-				       vm_vaddr_t output_address,
-				       uint64_t *hv_status)
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+					 vm_vaddr_t output_address,
+					 uint64_t *hv_status)
 {
 	uint64_t error_code;
 	uint8_t vector;
@@ -204,6 +211,28 @@ static inline uint8_t hyperv_hypercall(u64 control, vm_vaddr_t input_address,
 	return vector;
 }
 
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+				    vm_vaddr_t output_address)
+{
+	uint64_t hv_status;
+	uint8_t vector;
+
+	vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+	GUEST_ASSERT(!vector);
+	GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+	int i;
+
+	for (i = 0; i < n_sse_regs; i++)
+		write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
 /* Proper HV_X64_MSR_GUEST_OS_ID value */
 #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
 
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index 5ff4ff2365bb3..3163c3e8db0a7 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -62,7 +62,7 @@ static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
 		input = output = 0;
 	}
 
-	vector = hyperv_hypercall(hcall->control, input, output, &res);
+	vector = __hyperv_hypercall(hcall->control, input, output, &res);
 	if (hcall->ud_expected) {
 		GUEST_ASSERT_2(vector == UD_VECTOR, hcall->control, vector);
 	} else {
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
new file mode 100644
index 0000000000000..8b791eac7d5a3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR	 0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+	u64 format;
+	u64 valid_bank_mask;
+	u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+	HV_GENERIC_SET_SPARSE_4K,
+	HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+	u32 vector;
+	u32 reserved;
+	u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+	u32 vector;
+	u32 reserved;
+	struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+	u32 vcpu_id;
+
+	x2apic_enable();
+	hv_init(pgs_gpa);
+
+	vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+	/* Signal sender vCPU we're ready */
+	ipis_rcvd[vcpu_id] = (u64)-1;
+
+	for (;;)
+		asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+	u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+	ipis_rcvd[vcpu_id]++;
+	wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+	int i;
+
+	for (i = 0; i < 100000000; i++)
+		asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+	struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+	struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+	int stage = 1, ipis_expected[2] = {0};
+
+	hv_init(pgs_gpa);
+	GUEST_SYNC(stage++);
+
+	/* Wait for receiver vCPUs to come up */
+	while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+		nop_loop();
+	ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+	/* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+	ipi->vector = IPI_VECTOR;
+	ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+	hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+	GUEST_SYNC(stage++);
+	/* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+	hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+			 IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+	GUEST_SYNC(stage++);
+
+	/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+	memset(hcall_page, 0, 4096);
+	ipi_ex->vector = IPI_VECTOR;
+	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+	ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 pgs_gpa, pgs_gpa + 4096);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+	GUEST_SYNC(stage++);
+	/* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+	hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+			 (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+	GUEST_SYNC(stage++);
+
+	/* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+	memset(hcall_page, 0, 4096);
+	ipi_ex->vector = IPI_VECTOR;
+	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+	ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 pgs_gpa, pgs_gpa + 4096);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+	/* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+	hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+			 (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+
+	/* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+	memset(hcall_page, 0, 4096);
+	ipi_ex->vector = IPI_VECTOR;
+	ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+	ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+	ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+	ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 pgs_gpa, pgs_gpa + 4096);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+	/* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+	hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+			 (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+			 IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+
+	/* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+	memset(hcall_page, 0, 4096);
+	ipi_ex->vector = IPI_VECTOR;
+	ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+	hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+	/*
+	 * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+	 * Nothing to write anything to XMM regs.
+	 */
+	hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+			 IPI_VECTOR, HV_GENERIC_SET_ALL);
+	nop_loop();
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+	GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+	GUEST_SYNC(stage++);
+
+	GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+	int old, r;
+
+	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+		    vcpu->id, r);
+
+	vcpu_run(vcpu);
+
+	TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+	return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+	void *retval;
+	int r;
+
+	r = pthread_cancel(thread);
+	TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+
+	r = pthread_join(thread, &retval);
+	TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+	TEST_ASSERT(retval == PTHREAD_CANCELED,
+		    "expected retval=%p, got %p", PTHREAD_CANCELED,
+		    retval);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu[3];
+	unsigned int exit_reason;
+	vm_vaddr_t hcall_page;
+	pthread_t threads[2];
+	int stage = 1, r;
+	struct ucall uc;
+
+	vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+	/* Hypercall input/output */
+	hcall_page = vm_vaddr_alloc_pages(vm, 2);
+	memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+	vm_init_descriptor_tables(vm);
+
+	vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+	vcpu_init_descriptor_tables(vcpu[1]);
+	vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+	vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+	vcpu_set_hv_cpuid(vcpu[1]);
+
+	vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+	vcpu_init_descriptor_tables(vcpu[2]);
+	vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+	vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+	vcpu_set_hv_cpuid(vcpu[2]);
+
+	vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+	vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+	vcpu_set_hv_cpuid(vcpu[0]);
+
+	r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+	TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+	r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+	TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+	while (true) {
+		vcpu_run(vcpu[0]);
+
+		exit_reason = vcpu[0]->run->exit_reason;
+		TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    exit_reason, exit_reason_str(exit_reason));
+
+		switch (get_ucall(vcpu[0], &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == stage,
+				    "Unexpected stage: %ld (%d expected)\n",
+				    uc.args[1], stage);
+			break;
+		case UCALL_DONE:
+			goto done;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		stage++;
+	}
+
+done:
+	cancel_join_vcpu_thread(threads[0], vcpu[1]);
+	cancel_join_vcpu_thread(threads[1], vcpu[2]);
+	kvm_vm_free(vm);
+
+	return r;
+}
-- 
GitLab


From 56fc7732031d9999a35cb43b3de30d52ae30fd3f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:12 +0100
Subject: [PATCH 1821/4122] KVM: selftests: Fill in vm->vpages_mapped bitmap in
 virt_map() too

Similar to vm_vaddr_alloc(), virt_map() needs to reflect the mapping
in vm->vpages_mapped.

While on it, remove unneeded code wrapping in vm_vaddr_alloc().

Reviewed-by: Andrew Jones <andrew.jones@linux.dev>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-35-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/kvm_util.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 5ac8f207ed92d..a3ff96ec02355 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1319,8 +1319,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
 
 		virt_pg_map(vm, vaddr, paddr);
 
-		sparsebit_set(vm->vpages_mapped,
-			vaddr >> vm->page_shift);
+		sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
 	}
 
 	return vaddr_start;
@@ -1393,6 +1392,8 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
 		virt_pg_map(vm, vaddr, paddr);
 		vaddr += page_size;
 		paddr += page_size;
+
+		sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
 	}
 }
 
-- 
GitLab


From 2d4a5f91837f4a75cf02010b9a52bfe52d0efd40 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:13 +0100
Subject: [PATCH 1822/4122] KVM: selftests: Export vm_vaddr_unused_gap() to
 make it possible to request unmapped ranges

Currently, tests can only request a new vaddr range by using
vm_vaddr_alloc()/vm_vaddr_alloc_page()/vm_vaddr_alloc_pages() but
these functions allocate and map physical pages too. Make it possible
to request unmapped range too.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-36-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/kvm_util_base.h | 1 +
 tools/testing/selftests/kvm/lib/kvm_util.c          | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 228212ede05eb..c7685c7038ff0 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -385,6 +385,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
 vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index a3ff96ec02355..1d26a21601785 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1214,8 +1214,8 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
  * TEST_ASSERT failure occurs for invalid input or no area of at least
  * sz unallocated bytes >= vaddr_min is available.
  */
-static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
-				      vm_vaddr_t vaddr_min)
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+			       vm_vaddr_t vaddr_min)
 {
 	uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
 
-- 
GitLab


From 8f649b57856b855f8a28724321e7ae72e31d513a Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 18 Nov 2022 13:58:51 -0800
Subject: [PATCH 1823/4122] IB/hfi1: Replace 1-element array with singleton

Zero-length arrays are deprecated[1] and are being replaced with flexible
array members in support of the ongoing efforts to tighten the
FORTIFY_SOURCE routines on memcpy(), correctly instrument array indexing
with UBSAN_BOUNDS, and to globally enable -fstrict-flex-arrays=3.

Replace zero-length array with flexible-array member "lvs" in struct
opa_port_data_counters_msg and struct opa_port_error_counters64_msg.

Additionally, the "port" member of several structs is defined as a
single-element, but is only ever accessed at index 0. Replace it with a
singleton so that flexible array usage is sane.

This results in no differences in binary output.

[1] https://github.com/KSPP/linux/issues/78

Link: https://lore.kernel.org/r/20221118215847.never.416-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hfi1/mad.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/mad.c b/drivers/infiniband/hw/hfi1/mad.c
index 4146a2113a954..e5e783c45810b 100644
--- a/drivers/infiniband/hw/hfi1/mad.c
+++ b/drivers/infiniband/hw/hfi1/mad.c
@@ -2437,9 +2437,9 @@ struct opa_port_data_counters_msg {
 			__be64 port_vl_xmit_wait_data;
 			__be64 port_vl_rcv_bubble;
 			__be64 port_vl_mark_fecn;
-		} vls[0];
+		} vls[];
 		/* array size defined by #bits set in vl_select_mask*/
-	} port[1]; /* array size defined by  #ports in attribute modifier */
+	} port;
 };
 
 struct opa_port_error_counters64_msg {
@@ -2470,9 +2470,9 @@ struct opa_port_error_counters64_msg {
 		u8 reserved3[7];
 		struct _vls_ectrs {
 			__be64 port_vl_xmit_discards;
-		} vls[0];
+		} vls[];
 		/* array size defined by #bits set in vl_select_mask */
-	} port[1]; /* array size defined by #ports in attribute modifier */
+	} port;
 };
 
 struct opa_port_error_info_msg {
@@ -2543,7 +2543,7 @@ struct opa_port_error_info_msg {
 			u8 error_info;
 		} __packed fm_config_ei;
 		__u32 reserved9;
-	} port[1]; /* actual array size defined by #ports in attr modifier */
+	} port;
 };
 
 /* opa_port_error_info_msg error_info_select_mask bit definitions */
@@ -2966,7 +2966,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 	}
 
 	/* Sanity check */
-	response_data_size = struct_size(req, port[0].vls, num_vls);
+	response_data_size = struct_size(req, port.vls, num_vls);
 
 	if (response_data_size > sizeof(pmp->data)) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -2986,7 +2986,7 @@ static int pma_get_opa_datacounters(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	rsp = &req->port[0];
+	rsp = &req->port;
 	memset(rsp, 0, sizeof(*rsp));
 
 	rsp->port_number = port;
@@ -3182,7 +3182,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	response_data_size = struct_size(req, port[0].vls, num_vls);
+	response_data_size = struct_size(req, port.vls, num_vls);
 
 	if (response_data_size > sizeof(pmp->data)) {
 		pmp->mad_hdr.status |= IB_SMP_INVALID_FIELD;
@@ -3201,7 +3201,7 @@ static int pma_get_opa_porterrors(struct opa_pma_mad *pmp,
 		return reply((struct ib_mad_hdr *)pmp);
 	}
 
-	rsp = &req->port[0];
+	rsp = &req->port;
 
 	ibp = to_iport(ibdev, port_num);
 	ppd = ppd_from_ibp(ibp);
@@ -3340,7 +3340,7 @@ static int pma_get_opa_errorinfo(struct opa_pma_mad *pmp,
 	u64 reg;
 
 	req = (struct opa_port_error_info_msg *)pmp->data;
-	rsp = &req->port[0];
+	rsp = &req->port;
 
 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
@@ -3590,7 +3590,7 @@ static int pma_set_opa_errorinfo(struct opa_pma_mad *pmp,
 	u32 error_info_select;
 
 	req = (struct opa_port_error_info_msg *)pmp->data;
-	rsp = &req->port[0];
+	rsp = &req->port;
 
 	num_ports = OPA_AM_NPORT(be32_to_cpu(pmp->mad_hdr.attr_mod));
 	num_pslm = hweight64(be64_to_cpu(req->port_select_mask[3]));
-- 
GitLab


From 8e1a76493be9868fef34f977296a69769dcdfa6f Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Fri, 11 Nov 2022 21:35:37 -0500
Subject: [PATCH 1824/4122] RDMA/rxe: Remove reliable datagram support

The rdma_rxe driver does not actually support the reliable datagram
transport but contains a variable with RD opcodes in driver code.  And
this variable is never used. So remove it.

Link: https://lore.kernel.org/r/20221112023537.432912-1-yanjun.zhu@intel.com
Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_hdr.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index e432f9e37795e..804594b76040a 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -742,7 +742,6 @@ enum aeth_syndrome {
 	AETH_NAK_INVALID_REQ	= 0x61,
 	AETH_NAK_REM_ACC_ERR	= 0x62,
 	AETH_NAK_REM_OP_ERR	= 0x63,
-	AETH_NAK_INV_RD_REQ	= 0x64,
 };
 
 static inline u8 __aeth_syn(void *arg)
-- 
GitLab


From 7d984dac8f6bf4ebd3398af82b357e1d181ecaac Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Sun, 30 Oct 2022 03:04:33 +0000
Subject: [PATCH 1825/4122] RDMA/rxe: Fix mr->map double free

rxe_mr_cleanup() which tries to free mr->map again will be called when
rxe_mr_init_user() fails:

   CPU: 0 PID: 4917 Comm: rdma_flush_serv Kdump: loaded Not tainted 6.1.0-rc1-roce-flush+ #25
   Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.16.0-0-gd239552ce722-prebuilt.qemu.org 04/01/2014
   Call Trace:
    <TASK>
    dump_stack_lvl+0x45/0x5d
    panic+0x19e/0x349
    end_report.part.0+0x54/0x7c
    kasan_report.cold+0xa/0xf
    rxe_mr_cleanup+0x9d/0xf0 [rdma_rxe]
    __rxe_cleanup+0x10a/0x1e0 [rdma_rxe]
    rxe_reg_user_mr+0xb7/0xd0 [rdma_rxe]
    ib_uverbs_reg_mr+0x26a/0x480 [ib_uverbs]
    ib_uverbs_handler_UVERBS_METHOD_INVOKE_WRITE+0x1a2/0x250 [ib_uverbs]
    ib_uverbs_cmd_verbs+0x1397/0x15a0 [ib_uverbs]

This issue was firstly exposed since commit b18c7da63fcb ("RDMA/rxe: Fix
memory leak in error path code") and then we fixed it in commit
8ff5f5d9d8cf ("RDMA/rxe: Prevent double freeing rxe_map_set()") but this
fix was reverted together at last by commit 1e75550648da (Revert
"RDMA/rxe: Create duplicate mapping tables for FMRs")

Simply let rxe_mr_cleanup() always handle freeing the mr->map once it is
successfully allocated.

Fixes: 1e75550648da ("Revert "RDMA/rxe: Create duplicate mapping tables for FMRs"")
Link: https://lore.kernel.org/r/1667099073-2-1-git-send-email-lizhijian@fujitsu.com
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index cd846cf82a84c..b1423000e4bcd 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -97,6 +97,7 @@ err2:
 		kfree(mr->map[i]);
 
 	kfree(mr->map);
+	mr->map = NULL;
 err1:
 	return -ENOMEM;
 }
@@ -120,7 +121,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 	int			num_buf;
 	void			*vaddr;
 	int err;
-	int i;
 
 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
 	if (IS_ERR(umem)) {
@@ -159,9 +159,8 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 			if (!vaddr) {
 				rxe_dbg_mr(mr, "Unable to get virtual address\n");
 				err = -ENOMEM;
-				goto err_cleanup_map;
+				goto err_release_umem;
 			}
-
 			buf->addr = (uintptr_t)vaddr;
 			buf->size = PAGE_SIZE;
 			num_buf++;
@@ -178,10 +177,6 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 
 	return 0;
 
-err_cleanup_map:
-	for (i = 0; i < mr->num_map; i++)
-		kfree(mr->map[i]);
-	kfree(mr->map);
 err_release_umem:
 	ib_umem_release(umem);
 err_out:
-- 
GitLab


From 8eaa6f7d569b4a22bfc1b0a3fdfeeb401feb65a4 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 8 Nov 2022 21:38:46 +0800
Subject: [PATCH 1826/4122] RDMA/hns: Fix ext_sge num error when post send

In the HNS ROCE driver, The sge is divided into standard sge and extended
sge.  There are 2 standard sge in RC/XRC, and the UD standard sge is 0.
In the scenario of RC SQ inline, if the data does not exceed 32bytes, the
standard sge will be used. If it exceeds, only the extended sge will be
used to fill the data.

Currently, when filling the extended sge, max_gs is directly used as the
number of the extended sge, which did not subtract the number of standard
sge.  There is a logical error. The new algorithm subtracts the number of
standard sge from max_gs to get the actual number of extended sge.

Fixes: 30b707886aeb ("RDMA/hns: Support inline data in extented sge space for RC")
Link: https://lore.kernel.org/r/20221108133847.2304539-2-xuhaoyue1@hisilicon.com
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 1ead35fb031b0..dcb59c05edfd8 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -188,20 +188,29 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
 	hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
 }
 
+static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
+{
+	if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
+		return 0;
+
+	return HNS_ROCE_SGE_IN_WQE;
+}
+
 static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
 				 const struct ib_send_wr *wr,
 				 unsigned int *sge_idx, u32 msg_len)
 {
 	struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
-	unsigned int ext_sge_sz = qp->sq.max_gs * HNS_ROCE_SGE_SIZE;
 	unsigned int left_len_in_pg;
 	unsigned int idx = *sge_idx;
+	unsigned int std_sge_num;
 	unsigned int i = 0;
 	unsigned int len;
 	void *addr;
 	void *dseg;
 
-	if (msg_len > ext_sge_sz) {
+	std_sge_num = get_std_sge_num(qp);
+	if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) {
 		ibdev_err(ibdev,
 			  "no enough extended sge space for inline data.\n");
 		return -EINVAL;
-- 
GitLab


From 0c5e259b06a8efc69f929ad777ea49281bb58e37 Mon Sep 17 00:00:00 2001
From: Luoyouming <luoyouming@huawei.com>
Date: Tue, 8 Nov 2022 21:38:47 +0800
Subject: [PATCH 1827/4122] RDMA/hns: Fix incorrect sge nums calculation

The user usually configures the number of sge through the max_send_sge
parameter when creating qp, and configures the maximum size of inline data
that can be sent through max_inline_data. Inline uses sge to fill data to
send. Expect the following:

1) When the sge space cannot hold inline data, the sge space needs to be
   expanded to accommodate all inline data

2) When the sge space is enough to accommodate inline data, the upper
   limit of inline data can be increased so that users can send larger
   inline data

Currently case one is not implemented. When the inline data is larger than
the sge space, an error of insufficient sge space occurs.  This part of
the code needs to be reimplemented according to the expected rules. The
calculation method of sge num is modified to take the maximum value of
max_send_sge and the sge for max_inline_data to solve this problem.

Fixes: 05201e01be93 ("RDMA/hns: Refactor process of setting extended sge")
Fixes: 30b707886aeb ("RDMA/hns: Support inline data in extented sge space for RC")
Link: https://lore.kernel.org/r/20221108133847.2304539-3-xuhaoyue1@hisilicon.com
Signed-off-by: Luoyouming <luoyouming@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_device.h |   3 +
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c  |  12 +--
 drivers/infiniband/hw/hns/hns_roce_main.c   |  18 +++-
 drivers/infiniband/hw/hns/hns_roce_qp.c     | 107 ++++++++++++++++----
 include/uapi/rdma/hns-abi.h                 |  15 +++
 5 files changed, 125 insertions(+), 30 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h
index 723e55a7de8d9..f701cc86896b3 100644
--- a/drivers/infiniband/hw/hns/hns_roce_device.h
+++ b/drivers/infiniband/hw/hns/hns_roce_device.h
@@ -202,6 +202,7 @@ struct hns_roce_ucontext {
 	struct list_head	page_list;
 	struct mutex		page_mutex;
 	struct hns_user_mmap_entry *db_mmap_entry;
+	u32			config;
 };
 
 struct hns_roce_pd {
@@ -334,6 +335,7 @@ struct hns_roce_wq {
 	u32		head;
 	u32		tail;
 	void __iomem	*db_reg;
+	u32		ext_sge_cnt;
 };
 
 struct hns_roce_sge {
@@ -635,6 +637,7 @@ struct hns_roce_qp {
 	struct list_head	rq_node; /* all recv qps are on a list */
 	struct list_head	sq_node; /* all send qps are on a list */
 	struct hns_user_mmap_entry *dwqe_mmap_entry;
+	u32			config;
 };
 
 struct hns_roce_ib_iboe {
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index dcb59c05edfd8..939811867249f 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -188,14 +188,6 @@ static void set_atomic_seg(const struct ib_send_wr *wr,
 	hr_reg_write(rc_sq_wqe, RC_SEND_WQE_SGE_NUM, valid_num_sge);
 }
 
-static unsigned int get_std_sge_num(struct hns_roce_qp *qp)
-{
-	if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
-		return 0;
-
-	return HNS_ROCE_SGE_IN_WQE;
-}
-
 static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
 				 const struct ib_send_wr *wr,
 				 unsigned int *sge_idx, u32 msg_len)
@@ -203,14 +195,12 @@ static int fill_ext_sge_inl_data(struct hns_roce_qp *qp,
 	struct ib_device *ibdev = &(to_hr_dev(qp->ibqp.device))->ib_dev;
 	unsigned int left_len_in_pg;
 	unsigned int idx = *sge_idx;
-	unsigned int std_sge_num;
 	unsigned int i = 0;
 	unsigned int len;
 	void *addr;
 	void *dseg;
 
-	std_sge_num = get_std_sge_num(qp);
-	if (msg_len > (qp->sq.max_gs - std_sge_num) * HNS_ROCE_SGE_SIZE) {
+	if (msg_len > qp->sq.ext_sge_cnt * HNS_ROCE_SGE_SIZE) {
 		ibdev_err(ibdev,
 			  "no enough extended sge space for inline data.\n");
 		return -EINVAL;
diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c
index dcf89689a4c62..8ba68ac12388d 100644
--- a/drivers/infiniband/hw/hns/hns_roce_main.c
+++ b/drivers/infiniband/hw/hns/hns_roce_main.c
@@ -354,10 +354,11 @@ static int hns_roce_alloc_uar_entry(struct ib_ucontext *uctx)
 static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 				   struct ib_udata *udata)
 {
-	int ret;
 	struct hns_roce_ucontext *context = to_hr_ucontext(uctx);
-	struct hns_roce_ib_alloc_ucontext_resp resp = {};
 	struct hns_roce_dev *hr_dev = to_hr_dev(uctx->device);
+	struct hns_roce_ib_alloc_ucontext_resp resp = {};
+	struct hns_roce_ib_alloc_ucontext ucmd = {};
+	int ret;
 
 	if (!hr_dev->active)
 		return -EAGAIN;
@@ -365,6 +366,19 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
 	resp.qp_tab_size = hr_dev->caps.num_qps;
 	resp.srq_tab_size = hr_dev->caps.num_srqs;
 
+	ret = ib_copy_from_udata(&ucmd, udata,
+				 min(udata->inlen, sizeof(ucmd)));
+	if (ret)
+		return ret;
+
+	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+		context->config = ucmd.config & HNS_ROCE_EXSGE_FLAGS;
+
+	if (context->config & HNS_ROCE_EXSGE_FLAGS) {
+		resp.config |= HNS_ROCE_RSP_EXSGE_FLAGS;
+		resp.max_inline_data = hr_dev->caps.max_sq_inline;
+	}
+
 	ret = hns_roce_uar_alloc(hr_dev, &context->uar);
 	if (ret)
 		goto error_fail_uar_alloc;
diff --git a/drivers/infiniband/hw/hns/hns_roce_qp.c b/drivers/infiniband/hw/hns/hns_roce_qp.c
index f0bd82a18069a..0ae335fb205ca 100644
--- a/drivers/infiniband/hw/hns/hns_roce_qp.c
+++ b/drivers/infiniband/hw/hns/hns_roce_qp.c
@@ -476,38 +476,109 @@ static int set_rq_size(struct hns_roce_dev *hr_dev, struct ib_qp_cap *cap,
 	return 0;
 }
 
-static u32 get_wqe_ext_sge_cnt(struct hns_roce_qp *qp)
+static u32 get_max_inline_data(struct hns_roce_dev *hr_dev,
+			       struct ib_qp_cap *cap)
 {
-	/* GSI/UD QP only has extended sge */
-	if (qp->ibqp.qp_type == IB_QPT_GSI || qp->ibqp.qp_type == IB_QPT_UD)
-		return qp->sq.max_gs;
-
-	if (qp->sq.max_gs > HNS_ROCE_SGE_IN_WQE)
-		return qp->sq.max_gs - HNS_ROCE_SGE_IN_WQE;
+	if (cap->max_inline_data) {
+		cap->max_inline_data = roundup_pow_of_two(cap->max_inline_data);
+		return min(cap->max_inline_data,
+			   hr_dev->caps.max_sq_inline);
+	}
 
 	return 0;
 }
 
+static void update_inline_data(struct hns_roce_qp *hr_qp,
+			       struct ib_qp_cap *cap)
+{
+	u32 sge_num = hr_qp->sq.ext_sge_cnt;
+
+	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+		if (!(hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+		      hr_qp->ibqp.qp_type == IB_QPT_UD))
+			sge_num = max((u32)HNS_ROCE_SGE_IN_WQE, sge_num);
+
+		cap->max_inline_data = max(cap->max_inline_data,
+					   sge_num * HNS_ROCE_SGE_SIZE);
+	}
+
+	hr_qp->max_inline_data = cap->max_inline_data;
+}
+
+static u32 get_sge_num_from_max_send_sge(bool is_ud_or_gsi,
+					 u32 max_send_sge)
+{
+	unsigned int std_sge_num;
+	unsigned int min_sge;
+
+	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+	min_sge = is_ud_or_gsi ? 1 : 0;
+	return max_send_sge > std_sge_num ? (max_send_sge - std_sge_num) :
+				min_sge;
+}
+
+static unsigned int get_sge_num_from_max_inl_data(bool is_ud_or_gsi,
+						  u32 max_inline_data)
+{
+	unsigned int inline_sge;
+
+	inline_sge = roundup_pow_of_two(max_inline_data) / HNS_ROCE_SGE_SIZE;
+
+	/*
+	 * if max_inline_data less than
+	 * HNS_ROCE_SGE_IN_WQE * HNS_ROCE_SGE_SIZE,
+	 * In addition to ud's mode, no need to extend sge.
+	 */
+	if (!is_ud_or_gsi && inline_sge <= HNS_ROCE_SGE_IN_WQE)
+		inline_sge = 0;
+
+	return inline_sge;
+}
+
 static void set_ext_sge_param(struct hns_roce_dev *hr_dev, u32 sq_wqe_cnt,
 			      struct hns_roce_qp *hr_qp, struct ib_qp_cap *cap)
 {
+	bool is_ud_or_gsi = (hr_qp->ibqp.qp_type == IB_QPT_GSI ||
+				hr_qp->ibqp.qp_type == IB_QPT_UD);
+	unsigned int std_sge_num;
+	u32 inline_ext_sge = 0;
+	u32 ext_wqe_sge_cnt;
 	u32 total_sge_cnt;
-	u32 wqe_sge_cnt;
+
+	cap->max_inline_data = get_max_inline_data(hr_dev, cap);
 
 	hr_qp->sge.sge_shift = HNS_ROCE_SGE_SHIFT;
+	std_sge_num = is_ud_or_gsi ? 0 : HNS_ROCE_SGE_IN_WQE;
+	ext_wqe_sge_cnt = get_sge_num_from_max_send_sge(is_ud_or_gsi,
+							cap->max_send_sge);
 
-	hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+	if (hr_qp->config & HNS_ROCE_EXSGE_FLAGS) {
+		inline_ext_sge = max(ext_wqe_sge_cnt,
+				     get_sge_num_from_max_inl_data(is_ud_or_gsi,
+							 cap->max_inline_data));
+		hr_qp->sq.ext_sge_cnt = inline_ext_sge ?
+					roundup_pow_of_two(inline_ext_sge) : 0;
 
-	wqe_sge_cnt = get_wqe_ext_sge_cnt(hr_qp);
+		hr_qp->sq.max_gs = max(1U, (hr_qp->sq.ext_sge_cnt + std_sge_num));
+		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+
+		ext_wqe_sge_cnt = hr_qp->sq.ext_sge_cnt;
+	} else {
+		hr_qp->sq.max_gs = max(1U, cap->max_send_sge);
+		hr_qp->sq.max_gs = min(hr_qp->sq.max_gs, hr_dev->caps.max_sq_sg);
+		hr_qp->sq.ext_sge_cnt = hr_qp->sq.max_gs;
+	}
 
 	/* If the number of extended sge is not zero, they MUST use the
 	 * space of HNS_HW_PAGE_SIZE at least.
 	 */
-	if (wqe_sge_cnt) {
-		total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * wqe_sge_cnt);
+	if (ext_wqe_sge_cnt) {
+		total_sge_cnt = roundup_pow_of_two(sq_wqe_cnt * ext_wqe_sge_cnt);
 		hr_qp->sge.sge_cnt = max(total_sge_cnt,
 				(u32)HNS_HW_PAGE_SIZE / HNS_ROCE_SGE_SIZE);
 	}
+
+	update_inline_data(hr_qp, cap);
 }
 
 static int check_sq_size_with_integrity(struct hns_roce_dev *hr_dev,
@@ -556,6 +627,7 @@ static int set_user_sq_size(struct hns_roce_dev *hr_dev,
 
 	hr_qp->sq.wqe_shift = ucmd->log_sq_stride;
 	hr_qp->sq.wqe_cnt = cnt;
+	cap->max_send_sge = hr_qp->sq.max_gs;
 
 	return 0;
 }
@@ -986,13 +1058,9 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 			struct hns_roce_ib_create_qp *ucmd)
 {
 	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_ucontext *uctx;
 	int ret;
 
-	if (init_attr->cap.max_inline_data > hr_dev->caps.max_sq_inline)
-		init_attr->cap.max_inline_data = hr_dev->caps.max_sq_inline;
-
-	hr_qp->max_inline_data = init_attr->cap.max_inline_data;
-
 	if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
 		hr_qp->sq_signal_bits = IB_SIGNAL_ALL_WR;
 	else
@@ -1015,12 +1083,17 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
 			return ret;
 		}
 
+		uctx = rdma_udata_to_drv_context(udata, struct hns_roce_ucontext,
+						 ibucontext);
+		hr_qp->config = uctx->config;
 		ret = set_user_sq_size(hr_dev, &init_attr->cap, hr_qp, ucmd);
 		if (ret)
 			ibdev_err(ibdev,
 				  "failed to set user SQ size, ret = %d.\n",
 				  ret);
 	} else {
+		if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+			hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
 		ret = set_kernel_sq_size(hr_dev, &init_attr->cap, hr_qp);
 		if (ret)
 			ibdev_err(ibdev,
diff --git a/include/uapi/rdma/hns-abi.h b/include/uapi/rdma/hns-abi.h
index f6fde06db4b4e..745790ce3c261 100644
--- a/include/uapi/rdma/hns-abi.h
+++ b/include/uapi/rdma/hns-abi.h
@@ -85,11 +85,26 @@ struct hns_roce_ib_create_qp_resp {
 	__aligned_u64 dwqe_mmap_key;
 };
 
+enum {
+	HNS_ROCE_EXSGE_FLAGS = 1 << 0,
+};
+
+enum {
+	HNS_ROCE_RSP_EXSGE_FLAGS = 1 << 0,
+};
+
 struct hns_roce_ib_alloc_ucontext_resp {
 	__u32	qp_tab_size;
 	__u32	cqe_size;
 	__u32	srq_tab_size;
 	__u32	reserved;
+	__u32	config;
+	__u32	max_inline_data;
+};
+
+struct hns_roce_ib_alloc_ucontext {
+	__u32 config;
+	__u32 reserved;
 };
 
 struct hns_roce_ib_alloc_pd_resp {
-- 
GitLab


From 5f18e9f8868c6d4eae71678e7ebd4977b7d8c8cf Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Mon, 19 Sep 2022 10:56:37 -0500
Subject: [PATCH 1828/4122] iommu/amd: Fix ivrs_acpihid cmdline parsing code

The second (UID) strcmp in acpi_dev_hid_uid_match considers
"0" and "00" different, which can prevent device registration.

Have the AMD IOMMU driver's ivrs_acpihid parsing code remove
any leading zeroes to make the UID strcmp succeed.  Now users
can safely specify "AMDxxxxx:00" or "AMDxxxxx:0" and expect
the same behaviour.

Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Cc: stable@vger.kernel.org
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20220919155638.391481-1-kim.phillips@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/init.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 1a2d425bf5687..d14da30b8706f 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3488,6 +3488,13 @@ static int __init parse_ivrs_acpihid(char *str)
 		return 1;
 	}
 
+	/*
+	 * Ignore leading zeroes after ':', so e.g., AMDI0095:00
+	 * will match AMDI0095:0 in the second strcmp in acpi_dev_hid_uid_match
+	 */
+	while (*uid == '0' && *(uid + 1))
+		uid++;
+
 	i = early_acpihid_map_size++;
 	memcpy(early_acpihid_map[i].hid, hid, strlen(hid));
 	memcpy(early_acpihid_map[i].uid, uid, strlen(uid));
-- 
GitLab


From 1198d2316dc4265a97d0e8445a22c7a6d17580a4 Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Mon, 19 Sep 2022 10:56:38 -0500
Subject: [PATCH 1829/4122] iommu/amd: Fix ill-formed ivrs_ioapic, ivrs_hpet
 and ivrs_acpihid options

Currently, these options cause the following libkmod error:

libkmod: ERROR ../libkmod/libkmod-config.c:489 kcmdline_parse_result: \
	Ignoring bad option on kernel command line while parsing module \
	name: 'ivrs_xxxx[XX:XX'

Fix by introducing a new parameter format for these options and
throw a warning for the deprecated format.

Users are still allowed to omit the PCI Segment if zero.

Adding a Link: to the reason why we're modding the syntax parsing
in the driver and not in libkmod.

Fixes: ca3bf5d47cec ("iommu/amd: Introduces ivrs_acpihid kernel parameter")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-modules/20200310082308.14318-2-lucas.demarchi@intel.com/
Reported-by: Kim Phillips <kim.phillips@amd.com>
Co-developed-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Link: https://lore.kernel.org/r/20220919155638.391481-2-kim.phillips@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../admin-guide/kernel-parameters.txt         | 27 +++++--
 drivers/iommu/amd/init.c                      | 79 +++++++++++++------
 2 files changed, 76 insertions(+), 30 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a465d5242774a..bb1c62314f9ec 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2300,7 +2300,13 @@
 			Provide an override to the IOAPIC-ID<->DEVICE-ID
 			mapping provided in the IVRS ACPI table.
 			By default, PCI segment is 0, and can be omitted.
-			For example:
+
+			For example, to map IOAPIC-ID decimal 10 to
+			PCI segment 0x1 and PCI device 00:14.0,
+			write the parameter as:
+				ivrs_ioapic=10@0001:00:14.0
+
+			Deprecated formats:
 			* To map IOAPIC-ID decimal 10 to PCI device 00:14.0
 			  write the parameter as:
 				ivrs_ioapic[10]=00:14.0
@@ -2312,7 +2318,13 @@
 			Provide an override to the HPET-ID<->DEVICE-ID
 			mapping provided in the IVRS ACPI table.
 			By default, PCI segment is 0, and can be omitted.
-			For example:
+
+			For example, to map HPET-ID decimal 10 to
+			PCI segment 0x1 and PCI device 00:14.0,
+			write the parameter as:
+				ivrs_hpet=10@0001:00:14.0
+
+			Deprecated formats:
 			* To map HPET-ID decimal 0 to PCI device 00:14.0
 			  write the parameter as:
 				ivrs_hpet[0]=00:14.0
@@ -2323,15 +2335,20 @@
 	ivrs_acpihid	[HW,X86-64]
 			Provide an override to the ACPI-HID:UID<->DEVICE-ID
 			mapping provided in the IVRS ACPI table.
+			By default, PCI segment is 0, and can be omitted.
 
 			For example, to map UART-HID:UID AMD0020:0 to
 			PCI segment 0x1 and PCI device ID 00:14.5,
 			write the parameter as:
-				ivrs_acpihid[0001:00:14.5]=AMD0020:0
+				ivrs_acpihid=AMD0020:0@0001:00:14.5
 
-			By default, PCI segment is 0, and can be omitted.
-			For example, PCI device 00:14.5 write the parameter as:
+			Deprecated formats:
+			* To map UART-HID:UID AMD0020:0 to PCI segment is 0,
+			  PCI device ID 00:14.5, write the parameter as:
 				ivrs_acpihid[00:14.5]=AMD0020:0
+			* To map UART-HID:UID AMD0020:0 to PCI segment 0x1 and
+			  PCI device ID 00:14.5, write the parameter as:
+				ivrs_acpihid[0001:00:14.5]=AMD0020:0
 
 	js=		[HW,JOY] Analog joystick
 			See Documentation/input/joydev/joystick.rst.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index d14da30b8706f..34029d1161073 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -3402,18 +3402,24 @@ static int __init parse_amd_iommu_options(char *str)
 static int __init parse_ivrs_ioapic(char *str)
 {
 	u32 seg = 0, bus, dev, fn;
-	int ret, id, i;
+	int id, i;
 	u32 devid;
 
-	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
-	if (ret != 4) {
-		ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
-		if (ret != 5) {
-			pr_err("Invalid command line: ivrs_ioapic%s\n", str);
-			return 1;
-		}
+	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
+	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
+		goto found;
+
+	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
+	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
+		pr_warn("ivrs_ioapic%s option format deprecated; use ivrs_ioapic=%d@%04x:%02x:%02x.%d instead\n",
+			str, id, seg, bus, dev, fn);
+		goto found;
 	}
 
+	pr_err("Invalid command line: ivrs_ioapic%s\n", str);
+	return 1;
+
+found:
 	if (early_ioapic_map_size == EARLY_MAP_SIZE) {
 		pr_err("Early IOAPIC map overflow - ignoring ivrs_ioapic%s\n",
 			str);
@@ -3434,18 +3440,24 @@ static int __init parse_ivrs_ioapic(char *str)
 static int __init parse_ivrs_hpet(char *str)
 {
 	u32 seg = 0, bus, dev, fn;
-	int ret, id, i;
+	int id, i;
 	u32 devid;
 
-	ret = sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn);
-	if (ret != 4) {
-		ret = sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn);
-		if (ret != 5) {
-			pr_err("Invalid command line: ivrs_hpet%s\n", str);
-			return 1;
-		}
+	if (sscanf(str, "=%d@%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
+	    sscanf(str, "=%d@%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5)
+		goto found;
+
+	if (sscanf(str, "[%d]=%x:%x.%x", &id, &bus, &dev, &fn) == 4 ||
+	    sscanf(str, "[%d]=%x:%x:%x.%x", &id, &seg, &bus, &dev, &fn) == 5) {
+		pr_warn("ivrs_hpet%s option format deprecated; use ivrs_hpet=%d@%04x:%02x:%02x.%d instead\n",
+			str, id, seg, bus, dev, fn);
+		goto found;
 	}
 
+	pr_err("Invalid command line: ivrs_hpet%s\n", str);
+	return 1;
+
+found:
 	if (early_hpet_map_size == EARLY_MAP_SIZE) {
 		pr_err("Early HPET map overflow - ignoring ivrs_hpet%s\n",
 			str);
@@ -3466,19 +3478,36 @@ static int __init parse_ivrs_hpet(char *str)
 static int __init parse_ivrs_acpihid(char *str)
 {
 	u32 seg = 0, bus, dev, fn;
-	char *hid, *uid, *p;
+	char *hid, *uid, *p, *addr;
 	char acpiid[ACPIHID_UID_LEN + ACPIHID_HID_LEN] = {0};
-	int ret, i;
-
-	ret = sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid);
-	if (ret != 4) {
-		ret = sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid);
-		if (ret != 5) {
-			pr_err("Invalid command line: ivrs_acpihid(%s)\n", str);
-			return 1;
+	int i;
+
+	addr = strchr(str, '@');
+	if (!addr) {
+		if (sscanf(str, "[%x:%x.%x]=%s", &bus, &dev, &fn, acpiid) == 4 ||
+		    sscanf(str, "[%x:%x:%x.%x]=%s", &seg, &bus, &dev, &fn, acpiid) == 5) {
+			pr_warn("ivrs_acpihid%s option format deprecated; use ivrs_acpihid=%s@%04x:%02x:%02x.%d instead\n",
+				str, acpiid, seg, bus, dev, fn);
+			goto found;
 		}
+		goto not_found;
 	}
 
+	/* We have the '@', make it the terminator to get just the acpiid */
+	*addr++ = 0;
+
+	if (sscanf(str, "=%s", acpiid) != 1)
+		goto not_found;
+
+	if (sscanf(addr, "%x:%x.%x", &bus, &dev, &fn) == 3 ||
+	    sscanf(addr, "%x:%x:%x.%x", &seg, &bus, &dev, &fn) == 4)
+		goto found;
+
+not_found:
+	pr_err("Invalid command line: ivrs_acpihid%s\n", str);
+	return 1;
+
+found:
 	p = acpiid;
 	hid = strsep(&p, ":");
 	uid = p;
-- 
GitLab


From 73b6924cdebc899de9b719e1319aa86c6bed4acf Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 29 Oct 2022 18:35:50 +0800
Subject: [PATCH 1830/4122] iommu/mediatek: Check return value after calling
 platform_get_resource()

platform_get_resource() may return NULL pointer, we need check its
return value to avoid null-ptr-deref in resource_size().

Fixes: 42d57fc58aeb ("iommu/mediatek: Initialise/Remove for multi bank dev")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221029103550.3774365-1-yangyingliang@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 2ab2ecfe01f80..2d14dc846b839 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1173,6 +1173,8 @@ static int mtk_iommu_probe(struct platform_device *pdev)
 
 	banks_num = data->plat_data->banks_num;
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
 	if (resource_size(res) < banks_num * MTK_IOMMU_BANK_SZ) {
 		dev_err(dev, "banknr %d. res %pR is not enough.\n", banks_num, res);
 		return -EINVAL;
-- 
GitLab


From 59a316fdc4d564dc5e811321a8b20a444fc0094c Mon Sep 17 00:00:00 2001
From: Fabien Parent <fparent@baylibre.com>
Date: Wed, 2 Nov 2022 16:18:07 +0100
Subject: [PATCH 1831/4122] dt-bindings: iommu: mediatek: add binding
 documentation for MT8365 SoC

Add IOMMU binding documentation for the MT8365 SoC.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Signed-off-by: Markus Schneider-Pargmann <msp@baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Alexandre Mergnat <amergnat@baylibre.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221001-iommu-support-v6-1-be4fe8da254b@baylibre.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 .../bindings/iommu/mediatek,iommu.yaml        |  2 +
 .../memory/mediatek,mt8365-larb-port.h        | 90 +++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 include/dt-bindings/memory/mediatek,mt8365-larb-port.h

diff --git a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
index 839e3be0bf3ca..5b6395bc10e0c 100644
--- a/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
+++ b/Documentation/devicetree/bindings/iommu/mediatek,iommu.yaml
@@ -82,6 +82,7 @@ properties:
           - mediatek,mt8195-iommu-vdo        # generation two
           - mediatek,mt8195-iommu-vpp        # generation two
           - mediatek,mt8195-iommu-infra      # generation two
+          - mediatek,mt8365-m4u  # generation two
 
       - description: mt7623 generation one
         items:
@@ -132,6 +133,7 @@ properties:
       dt-binding/memory/mt8186-memory-port.h for mt8186,
       dt-binding/memory/mt8192-larb-port.h for mt8192.
       dt-binding/memory/mt8195-memory-port.h for mt8195.
+      dt-binding/memory/mediatek,mt8365-larb-port.h for mt8365.
 
   power-domains:
     maxItems: 1
diff --git a/include/dt-bindings/memory/mediatek,mt8365-larb-port.h b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h
new file mode 100644
index 0000000000000..56d5a5dd519e7
--- /dev/null
+++ b/include/dt-bindings/memory/mediatek,mt8365-larb-port.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Yong Wu <yong.wu@mediatek.com>
+ */
+#ifndef _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_
+#define _DT_BINDINGS_MEMORY_MT8365_LARB_PORT_H_
+
+#include <dt-bindings/memory/mtk-memory-port.h>
+
+#define M4U_LARB0_ID			0
+#define M4U_LARB1_ID			1
+#define M4U_LARB2_ID			2
+#define M4U_LARB3_ID			3
+
+/* larb0 */
+#define M4U_PORT_DISP_OVL0		MTK_M4U_ID(M4U_LARB0_ID, 0)
+#define M4U_PORT_DISP_OVL0_2L		MTK_M4U_ID(M4U_LARB0_ID, 1)
+#define M4U_PORT_DISP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 2)
+#define M4U_PORT_DISP_WDMA0		MTK_M4U_ID(M4U_LARB0_ID, 3)
+#define M4U_PORT_DISP_RDMA1		MTK_M4U_ID(M4U_LARB0_ID, 4)
+#define M4U_PORT_MDP_RDMA0		MTK_M4U_ID(M4U_LARB0_ID, 5)
+#define M4U_PORT_MDP_WROT1		MTK_M4U_ID(M4U_LARB0_ID, 6)
+#define M4U_PORT_MDP_WROT0		MTK_M4U_ID(M4U_LARB0_ID, 7)
+#define M4U_PORT_MDP_RDMA1		MTK_M4U_ID(M4U_LARB0_ID, 8)
+#define M4U_PORT_DISP_FAKE0		MTK_M4U_ID(M4U_LARB0_ID, 9)
+#define M4U_PORT_APU_READ		MTK_M4U_ID(M4U_LARB0_ID, 10)
+#define M4U_PORT_APU_WRITE		MTK_M4U_ID(M4U_LARB0_ID, 11)
+
+/* larb1 */
+#define M4U_PORT_VENC_RCPU		MTK_M4U_ID(M4U_LARB1_ID, 0)
+#define M4U_PORT_VENC_REC		MTK_M4U_ID(M4U_LARB1_ID, 1)
+#define M4U_PORT_VENC_BSDMA		MTK_M4U_ID(M4U_LARB1_ID, 2)
+#define M4U_PORT_VENC_SV_COMV		MTK_M4U_ID(M4U_LARB1_ID, 3)
+#define M4U_PORT_VENC_RD_COMV		MTK_M4U_ID(M4U_LARB1_ID, 4)
+#define M4U_PORT_VENC_NBM_RDMA		MTK_M4U_ID(M4U_LARB1_ID, 5)
+#define M4U_PORT_VENC_NBM_RDMA_LITE	MTK_M4U_ID(M4U_LARB1_ID, 6)
+#define M4U_PORT_JPGENC_Y_RDMA		MTK_M4U_ID(M4U_LARB1_ID, 7)
+#define M4U_PORT_JPGENC_C_RDMA		MTK_M4U_ID(M4U_LARB1_ID, 8)
+#define M4U_PORT_JPGENC_Q_TABLE		MTK_M4U_ID(M4U_LARB1_ID, 9)
+#define M4U_PORT_JPGENC_BSDMA		MTK_M4U_ID(M4U_LARB1_ID, 10)
+#define M4U_PORT_JPGDEC_WDMA		MTK_M4U_ID(M4U_LARB1_ID, 11)
+#define M4U_PORT_JPGDEC_BSDMA		MTK_M4U_ID(M4U_LARB1_ID, 12)
+#define M4U_PORT_VENC_NBM_WDMA		MTK_M4U_ID(M4U_LARB1_ID, 13)
+#define M4U_PORT_VENC_NBM_WDMA_LITE	MTK_M4U_ID(M4U_LARB1_ID, 14)
+#define M4U_PORT_VENC_CUR_LUMA		MTK_M4U_ID(M4U_LARB1_ID, 15)
+#define M4U_PORT_VENC_CUR_CHROMA	MTK_M4U_ID(M4U_LARB1_ID, 16)
+#define M4U_PORT_VENC_REF_LUMA		MTK_M4U_ID(M4U_LARB1_ID, 17)
+#define M4U_PORT_VENC_REF_CHROMA	MTK_M4U_ID(M4U_LARB1_ID, 18)
+
+/* larb2 */
+#define M4U_PORT_CAM_IMGO		MTK_M4U_ID(M4U_LARB2_ID, 0)
+#define M4U_PORT_CAM_RRZO		MTK_M4U_ID(M4U_LARB2_ID, 1)
+#define M4U_PORT_CAM_AAO		MTK_M4U_ID(M4U_LARB2_ID, 2)
+#define M4U_PORT_CAM_LCS		MTK_M4U_ID(M4U_LARB2_ID, 3)
+#define M4U_PORT_CAM_ESFKO		MTK_M4U_ID(M4U_LARB2_ID, 4)
+#define M4U_PORT_CAM_CAM_SV0		MTK_M4U_ID(M4U_LARB2_ID, 5)
+#define M4U_PORT_CAM_CAM_SV1		MTK_M4U_ID(M4U_LARB2_ID, 6)
+#define M4U_PORT_CAM_LSCI		MTK_M4U_ID(M4U_LARB2_ID, 7)
+#define M4U_PORT_CAM_LSCI_D		MTK_M4U_ID(M4U_LARB2_ID, 8)
+#define M4U_PORT_CAM_AFO		MTK_M4U_ID(M4U_LARB2_ID, 9)
+#define M4U_PORT_CAM_SPARE		MTK_M4U_ID(M4U_LARB2_ID, 10)
+#define M4U_PORT_CAM_BPCI		MTK_M4U_ID(M4U_LARB2_ID, 11)
+#define M4U_PORT_CAM_BPCI_D		MTK_M4U_ID(M4U_LARB2_ID, 12)
+#define M4U_PORT_CAM_UFDI		MTK_M4U_ID(M4U_LARB2_ID, 13)
+#define M4U_PORT_CAM_IMGI		MTK_M4U_ID(M4U_LARB2_ID, 14)
+#define M4U_PORT_CAM_IMG2O		MTK_M4U_ID(M4U_LARB2_ID, 15)
+#define M4U_PORT_CAM_IMG3O		MTK_M4U_ID(M4U_LARB2_ID, 16)
+#define M4U_PORT_CAM_WPE0_I		MTK_M4U_ID(M4U_LARB2_ID, 17)
+#define M4U_PORT_CAM_WPE1_I		MTK_M4U_ID(M4U_LARB2_ID, 18)
+#define M4U_PORT_CAM_WPE_O		MTK_M4U_ID(M4U_LARB2_ID, 19)
+#define M4U_PORT_CAM_FD0_I		MTK_M4U_ID(M4U_LARB2_ID, 20)
+#define M4U_PORT_CAM_FD1_I		MTK_M4U_ID(M4U_LARB2_ID, 21)
+#define M4U_PORT_CAM_FD0_O		MTK_M4U_ID(M4U_LARB2_ID, 22)
+#define M4U_PORT_CAM_FD1_O		MTK_M4U_ID(M4U_LARB2_ID, 23)
+
+/* larb3 */
+#define M4U_PORT_HW_VDEC_MC_EXT		MTK_M4U_ID(M4U_LARB3_ID, 0)
+#define M4U_PORT_HW_VDEC_UFO_EXT	MTK_M4U_ID(M4U_LARB3_ID, 1)
+#define M4U_PORT_HW_VDEC_PP_EXT		MTK_M4U_ID(M4U_LARB3_ID, 2)
+#define M4U_PORT_HW_VDEC_PRED_RD_EXT	MTK_M4U_ID(M4U_LARB3_ID, 3)
+#define M4U_PORT_HW_VDEC_PRED_WR_EXT	MTK_M4U_ID(M4U_LARB3_ID, 4)
+#define M4U_PORT_HW_VDEC_PPWRAP_EXT	MTK_M4U_ID(M4U_LARB3_ID, 5)
+#define M4U_PORT_HW_VDEC_TILE_EXT	MTK_M4U_ID(M4U_LARB3_ID, 6)
+#define M4U_PORT_HW_VDEC_VLD_EXT	MTK_M4U_ID(M4U_LARB3_ID, 7)
+#define M4U_PORT_HW_VDEC_VLD2_EXT	MTK_M4U_ID(M4U_LARB3_ID, 8)
+#define M4U_PORT_HW_VDEC_AVC_MV_EXT	MTK_M4U_ID(M4U_LARB3_ID, 9)
+#define M4U_PORT_HW_VDEC_RG_CTRL_DMA_EXT MTK_M4U_ID(M4U_LARB3_ID, 10)
+
+#endif
-- 
GitLab


From 65df7d824f82f4dd3552b5a62ae8db07f25e423f Mon Sep 17 00:00:00 2001
From: Fabien Parent <fparent@baylibre.com>
Date: Wed, 2 Nov 2022 16:18:08 +0100
Subject: [PATCH 1832/4122] iommu/mediatek: add support for 6-bit encoded port
 IDs

Until now the port ID was always encoded as a 5-bit data. On MT8365,
the port ID is encoded as a 6-bit data. This requires to add extra
macro F_MMU_INT_ID_LARB_ID_EXT, and F_MMU_INT_ID_PORT_ID_EXT in order
to support 6-bit encoded port IDs.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Signed-off-by: Markus Schneider-Pargmann <msp@baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Alexandre Mergnat <amergnat@baylibre.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221001-iommu-support-v6-2-be4fe8da254b@baylibre.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 2d14dc846b839..885ba5233b99f 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -108,8 +108,12 @@
 #define F_MMU_INT_ID_SUB_COMM_ID(a)		(((a) >> 7) & 0x3)
 #define F_MMU_INT_ID_COMM_ID_EXT(a)		(((a) >> 10) & 0x7)
 #define F_MMU_INT_ID_SUB_COMM_ID_EXT(a)		(((a) >> 7) & 0x7)
+/* Macro for 5 bits length port ID field (default) */
 #define F_MMU_INT_ID_LARB_ID(a)			(((a) >> 7) & 0x7)
 #define F_MMU_INT_ID_PORT_ID(a)			(((a) >> 2) & 0x1f)
+/* Macro for 6 bits length port ID field */
+#define F_MMU_INT_ID_LARB_ID_WID_6(a)		(((a) >> 8) & 0x7)
+#define F_MMU_INT_ID_PORT_ID_WID_6(a)		(((a) >> 2) & 0x3f)
 
 #define MTK_PROTECT_PA_ALIGN			256
 #define MTK_IOMMU_BANK_SZ			0x1000
@@ -139,6 +143,7 @@
 #define IFA_IOMMU_PCIE_SUPPORT		BIT(16)
 #define PGTABLE_PA_35_EN		BIT(17)
 #define TF_PORT_TO_ADDR_MT8173		BIT(18)
+#define INT_ID_PORT_WIDTH_6		BIT(19)
 
 #define MTK_IOMMU_HAS_FLAG_MASK(pdata, _x, mask)	\
 				((((pdata)->flags) & (mask)) == (_x))
@@ -441,14 +446,19 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
 	fault_pa |= (u64)pa34_32 << 32;
 
 	if (MTK_IOMMU_IS_TYPE(plat_data, MTK_IOMMU_TYPE_MM)) {
-		fault_port = F_MMU_INT_ID_PORT_ID(regval);
 		if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_2BITS)) {
 			fault_larb = F_MMU_INT_ID_COMM_ID(regval);
 			sub_comm = F_MMU_INT_ID_SUB_COMM_ID(regval);
+			fault_port = F_MMU_INT_ID_PORT_ID(regval);
 		} else if (MTK_IOMMU_HAS_FLAG(plat_data, HAS_SUB_COMM_3BITS)) {
 			fault_larb = F_MMU_INT_ID_COMM_ID_EXT(regval);
 			sub_comm = F_MMU_INT_ID_SUB_COMM_ID_EXT(regval);
+			fault_port = F_MMU_INT_ID_PORT_ID(regval);
+		} else if (MTK_IOMMU_HAS_FLAG(plat_data, INT_ID_PORT_WIDTH_6)) {
+			fault_port = F_MMU_INT_ID_PORT_ID_WID_6(regval);
+			fault_larb = F_MMU_INT_ID_LARB_ID_WID_6(regval);
 		} else {
+			fault_port = F_MMU_INT_ID_PORT_ID(regval);
 			fault_larb = F_MMU_INT_ID_LARB_ID(regval);
 		}
 		fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm];
-- 
GitLab


From 3cd0e4a34d5a9bcff90e0c104800700346e42658 Mon Sep 17 00:00:00 2001
From: Fabien Parent <fparent@baylibre.com>
Date: Wed, 2 Nov 2022 16:18:09 +0100
Subject: [PATCH 1833/4122] iommu/mediatek: add support for MT8365 SoC

Add IOMMU support for MT8365 SoC.

Signed-off-by: Fabien Parent <fparent@baylibre.com>
Reviewed-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
Tested-by: Amjad Ouled-Ameur <aouledameur@baylibre.com>
Signed-off-by: Markus Schneider-Pargmann <msp@baylibre.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Signed-off-by: Alexandre Mergnat <amergnat@baylibre.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221001-iommu-support-v6-3-be4fe8da254b@baylibre.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 885ba5233b99f..c80f33dd2d434 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -170,6 +170,7 @@ enum mtk_iommu_plat {
 	M4U_MT8186,
 	M4U_MT8192,
 	M4U_MT8195,
+	M4U_MT8365,
 };
 
 struct mtk_iommu_iova_region {
@@ -1528,6 +1529,17 @@ static const struct mtk_iommu_plat_data mt8195_data_vpp = {
 			   {4, MTK_INVALID_LARBID, MTK_INVALID_LARBID, MTK_INVALID_LARBID, 6}},
 };
 
+static const struct mtk_iommu_plat_data mt8365_data = {
+	.m4u_plat	= M4U_MT8365,
+	.flags		= RESET_AXI | INT_ID_PORT_WIDTH_6,
+	.inv_sel_reg	= REG_MMU_INV_SEL_GEN1,
+	.banks_num	= 1,
+	.banks_enable	= {true},
+	.iova_region	= single_domain,
+	.iova_region_nr	= ARRAY_SIZE(single_domain),
+	.larbid_remap	= {{0}, {1}, {2}, {3}, {4}, {5}}, /* Linear mapping. */
+};
+
 static const struct of_device_id mtk_iommu_of_ids[] = {
 	{ .compatible = "mediatek,mt2712-m4u", .data = &mt2712_data},
 	{ .compatible = "mediatek,mt6779-m4u", .data = &mt6779_data},
@@ -1540,6 +1552,7 @@ static const struct of_device_id mtk_iommu_of_ids[] = {
 	{ .compatible = "mediatek,mt8195-iommu-infra", .data = &mt8195_data_infra},
 	{ .compatible = "mediatek,mt8195-iommu-vdo",   .data = &mt8195_data_vdo},
 	{ .compatible = "mediatek,mt8195-iommu-vpp",   .data = &mt8195_data_vpp},
+	{ .compatible = "mediatek,mt8365-m4u", .data = &mt8365_data},
 	{}
 };
 
-- 
GitLab


From 01657bc14a3990c665375f77978631fee77b1fce Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Fri, 4 Nov 2022 19:51:43 +0000
Subject: [PATCH 1834/4122] iommu: Avoid races around device probe

We currently have 3 different ways that __iommu_probe_device() may be
called, but no real guarantee that multiple callers can't tread on each
other, especially once asynchronous driver probe gets involved. It would
likely have taken a fair bit of luck to hit this previously, but commit
57365a04c921 ("iommu: Move bus setup to IOMMU device registration") ups
the odds since now it's not just omap-iommu that may trigger multiple
bus_iommu_probe() calls in parallel if probing asynchronously.

Add a lock to ensure we can't try to double-probe a device, and also
close some possible race windows to make sure we're truly robust against
trying to double-initialise a group via two different member devices.

Reported-by: Brian Norris <briannorris@chromium.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Tested-by: Brian Norris <briannorris@chromium.org>
Fixes: 57365a04c921 ("iommu: Move bus setup to IOMMU device registration")
Link: https://lore.kernel.org/r/1946ef9f774851732eed78760a78ec40dbc6d178.1667591503.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6ca377f4fbf9e..7c99d8eb3182a 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -306,13 +306,23 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 	const struct iommu_ops *ops = dev->bus->iommu_ops;
 	struct iommu_device *iommu_dev;
 	struct iommu_group *group;
+	static DEFINE_MUTEX(iommu_probe_device_lock);
 	int ret;
 
 	if (!ops)
 		return -ENODEV;
-
-	if (!dev_iommu_get(dev))
-		return -ENOMEM;
+	/*
+	 * Serialise to avoid races between IOMMU drivers registering in
+	 * parallel and/or the "replay" calls from ACPI/OF code via client
+	 * driver probe. Once the latter have been cleaned up we should
+	 * probably be able to use device_lock() here to minimise the scope,
+	 * but for now enforcing a simple global ordering is fine.
+	 */
+	mutex_lock(&iommu_probe_device_lock);
+	if (!dev_iommu_get(dev)) {
+		ret = -ENOMEM;
+		goto err_unlock;
+	}
 
 	if (!try_module_get(ops->owner)) {
 		ret = -EINVAL;
@@ -333,11 +343,14 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
 		ret = PTR_ERR(group);
 		goto out_release;
 	}
-	iommu_group_put(group);
 
+	mutex_lock(&group->mutex);
 	if (group_list && !group->default_domain && list_empty(&group->entry))
 		list_add_tail(&group->entry, group_list);
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
 
+	mutex_unlock(&iommu_probe_device_lock);
 	iommu_device_link(iommu_dev, dev);
 
 	return 0;
@@ -352,6 +365,9 @@ out_module_put:
 err_free:
 	dev_iommu_free(dev);
 
+err_unlock:
+	mutex_unlock(&iommu_probe_device_lock);
+
 	return ret;
 }
 
@@ -1824,11 +1840,11 @@ int bus_iommu_probe(struct bus_type *bus)
 		return ret;
 
 	list_for_each_entry_safe(group, next, &group_list, entry) {
+		mutex_lock(&group->mutex);
+
 		/* Remove item from the list */
 		list_del_init(&group->entry);
 
-		mutex_lock(&group->mutex);
-
 		/* Try to allocate default domain */
 		probe_alloc_default_domain(bus, group);
 
-- 
GitLab


From 59bbf596791b89c7f88fdcac29dfc39c1221d25d Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:28:59 +0100
Subject: [PATCH 1835/4122] iommu/s390: Make attach succeed even if the device
 is in error state

If a zPCI device is in the error state while switching IOMMU domains
zpci_register_ioat() will fail and we would end up with the device not
attached to any domain. In this state since zdev->dma_table == NULL
a reset via zpci_hot_reset_device() would wrongfully re-initialize the
device for DMA API usage using zpci_dma_init_device(). As automatic
recovery is currently disabled while attached to an IOMMU domain this
only affects slot resets triggered through other means but will affect
automatic recovery once we switch to using dma-iommu.

Additionally with that switch common code expects attaching to the
default domain to always work so zpci_register_ioat() should only fail
if there is no chance to recover anyway, e.g. if the device has been
unplugged.

Improve the robustness of attach by specifically looking at the status
returned by zpci_mod_fc() to determine if the device is unavailable and
in this case simply ignore the error. Once the device is reset
zpci_hot_reset_device() will then correctly set the domain's DMA
translation tables.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Reviewed-by: Matthew Rosato <mjrosato@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-2-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/s390/include/asm/pci.h |  2 +-
 arch/s390/kvm/pci.c         |  6 ++++--
 arch/s390/pci/pci.c         | 11 ++++++-----
 arch/s390/pci/pci_dma.c     |  3 ++-
 drivers/iommu/s390-iommu.c  |  9 +++++++--
 5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 15f8714ca9b7c..07361e2fd8c51 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -221,7 +221,7 @@ void zpci_device_reserved(struct zpci_dev *zdev);
 bool zpci_is_device_configured(struct zpci_dev *zdev);
 
 int zpci_hot_reset_device(struct zpci_dev *zdev);
-int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
+int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64, u8 *);
 int zpci_unregister_ioat(struct zpci_dev *, u8);
 void zpci_remove_reserved_devices(void);
 void zpci_update_fh(struct zpci_dev *zdev, u32 fh);
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index c50c1645c0aec..03964c0e1fdf3 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -434,6 +434,7 @@ static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
 {
 	struct zpci_dev *zdev = opaque;
+	u8 status;
 	int rc;
 
 	if (!zdev)
@@ -486,7 +487,7 @@ static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm)
 
 	/* Re-register the IOMMU that was already created */
 	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				virt_to_phys(zdev->dma_table));
+				virt_to_phys(zdev->dma_table), &status);
 	if (rc)
 		goto clear_gisa;
 
@@ -516,6 +517,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
 {
 	struct zpci_dev *zdev = opaque;
 	struct kvm *kvm;
+	u8 status;
 
 	if (!zdev)
 		return;
@@ -554,7 +556,7 @@ static void kvm_s390_pci_unregister_kvm(void *opaque)
 
 	/* Re-register the IOMMU that was already created */
 	zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			   virt_to_phys(zdev->dma_table));
+			   virt_to_phys(zdev->dma_table), &status);
 
 out:
 	spin_lock(&kvm->arch.kzdev_list_lock);
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 73cdc55393847..a703dcd94a689 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -116,20 +116,20 @@ EXPORT_SYMBOL_GPL(pci_proc_domain);
 
 /* Modify PCI: Register I/O address translation parameters */
 int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
-		       u64 base, u64 limit, u64 iota)
+		       u64 base, u64 limit, u64 iota, u8 *status)
 {
 	u64 req = ZPCI_CREATE_REQ(zdev->fh, dmaas, ZPCI_MOD_FC_REG_IOAT);
 	struct zpci_fib fib = {0};
-	u8 cc, status;
+	u8 cc;
 
 	WARN_ON_ONCE(iota & 0x3fff);
 	fib.pba = base;
 	fib.pal = limit;
 	fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
 	fib.gd = zdev->gisa;
-	cc = zpci_mod_fc(req, &fib, &status);
+	cc = zpci_mod_fc(req, &fib, status);
 	if (cc)
-		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, status);
+		zpci_dbg(3, "reg ioat fid:%x, cc:%d, status:%d\n", zdev->fid, cc, *status);
 	return cc;
 }
 EXPORT_SYMBOL_GPL(zpci_register_ioat);
@@ -764,6 +764,7 @@ EXPORT_SYMBOL_GPL(zpci_disable_device);
  */
 int zpci_hot_reset_device(struct zpci_dev *zdev)
 {
+	u8 status;
 	int rc;
 
 	zpci_dbg(3, "rst fid:%x, fh:%x\n", zdev->fid, zdev->fh);
@@ -787,7 +788,7 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
 
 	if (zdev->dma_table)
 		rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-					virt_to_phys(zdev->dma_table));
+					virt_to_phys(zdev->dma_table), &status);
 	else
 		rc = zpci_dma_init_device(zdev);
 	if (rc) {
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 227cf0a62800b..dee825ee73052 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -547,6 +547,7 @@ static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 	
 int zpci_dma_init_device(struct zpci_dev *zdev)
 {
+	u8 status;
 	int rc;
 
 	/*
@@ -598,7 +599,7 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 
 	}
 	if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-			       virt_to_phys(zdev->dma_table))) {
+			       virt_to_phys(zdev->dma_table), &status)) {
 		rc = -EIO;
 		goto free_bitmap;
 	}
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 7fb512bece9a8..e2c886bc43761 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -98,6 +98,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	unsigned long flags;
+	u8 status;
 	int cc;
 
 	if (!zdev)
@@ -113,8 +114,12 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 		zpci_dma_exit_device(zdev);
 
 	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				virt_to_phys(s390_domain->dma_table));
-	if (cc)
+				virt_to_phys(s390_domain->dma_table), &status);
+	/*
+	 * If the device is undergoing error recovery the reset code
+	 * will re-establish the new domain.
+	 */
+	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
 		return -EIO;
 	zdev->dma_table = s390_domain->dma_table;
 
-- 
GitLab


From c228f5a043370ef02867e4f0aab1bdc8422500e6 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:29:00 +0100
Subject: [PATCH 1836/4122] iommu/s390: Add I/O TLB ops

Currently s390-iommu does an I/O TLB flush (RPCIT) for every update of
the I/O translation table explicitly. For one this is wasteful since
RPCIT can be skipped after a mapping operation if zdev->tlb_refresh is
unset. Moreover we can do a single RPCIT for a range of pages including
whne doing lazy unmapping.

Thankfully both of these optimizations can be achieved by implementing
the IOMMU operations common code provides for the different types of I/O
tlb flushes:

 * flush_iotlb_all: Flushes the I/O TLB for the entire IOVA space
 * iotlb_sync:  Flushes the I/O TLB for a range of pages that can be
   gathered up, for example to implement lazy unmapping.
 * iotlb_sync_map: Flushes the I/O TLB after a mapping operation

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-3-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 67 +++++++++++++++++++++++++++++++-------
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index e2c886bc43761..9771bce86e94c 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -199,14 +199,63 @@ static void s390_iommu_release_device(struct device *dev)
 		__s390_iommu_detach_device(zdev);
 }
 
+static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+		zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
+				   zdev->end_dma - zdev->start_dma + 1);
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+}
+
+static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
+				  struct iommu_iotlb_gather *gather)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	size_t size = gather->end - gather->start + 1;
+	struct zpci_dev *zdev;
+	unsigned long flags;
+
+	/* If gather was never added to there is nothing to flush */
+	if (!gather->end)
+		return;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
+				   size);
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+}
+
+static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
+				      unsigned long iova, size_t size)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev;
+	unsigned long flags;
+
+	spin_lock_irqsave(&s390_domain->list_lock, flags);
+	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+		if (!zdev->tlb_refresh)
+			continue;
+		zpci_refresh_trans((u64)zdev->fh << 32,
+				   iova, size);
+	}
+	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+}
+
 static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 				   phys_addr_t pa, dma_addr_t dma_addr,
 				   unsigned long nr_pages, int flags)
 {
 	phys_addr_t page_addr = pa & PAGE_MASK;
-	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags, i;
-	struct zpci_dev *zdev;
 	unsigned long *entry;
 	int rc = 0;
 
@@ -225,15 +274,6 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 		dma_addr += PAGE_SIZE;
 	}
 
-	spin_lock(&s390_domain->list_lock);
-	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
-		rc = zpci_refresh_trans((u64)zdev->fh << 32,
-					start_dma_addr, nr_pages * PAGE_SIZE);
-		if (rc)
-			break;
-	}
-	spin_unlock(&s390_domain->list_lock);
-
 undo_cpu_trans:
 	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 		flags = ZPCI_PTE_INVALID;
@@ -340,6 +380,8 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
 	if (rc)
 		return 0;
 
+	iommu_iotlb_gather_add_range(gather, iova, size);
+
 	return size;
 }
 
@@ -384,6 +426,9 @@ static const struct iommu_ops s390_iommu_ops = {
 		.detach_dev	= s390_iommu_detach_device,
 		.map_pages	= s390_iommu_map_pages,
 		.unmap_pages	= s390_iommu_unmap_pages,
+		.flush_iotlb_all = s390_iommu_flush_iotlb_all,
+		.iotlb_sync      = s390_iommu_iotlb_sync,
+		.iotlb_sync_map  = s390_iommu_iotlb_sync_map,
 		.iova_to_phys	= s390_iommu_iova_to_phys,
 		.free		= s390_domain_free,
 	}
-- 
GitLab


From 2ba8336dab5fb81452aea9c21dfc870050a017f3 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:29:01 +0100
Subject: [PATCH 1837/4122] iommu/s390: Use RCU to allow concurrent domain_list
 iteration

The s390_domain->devices list is only added to when new devices are
attached but is iterated through in read-only fashion for every mapping
operation as well as for I/O TLB flushes and thus in performance
critical code causing contention on the s390_domain->list_lock.
Fortunately such a read-mostly linked list is a standard use case for
RCU. This change closely follows the example fpr RCU protected list
given in Documentation/RCU/listRCU.rst.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-4-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/s390/include/asm/pci.h |  1 +
 arch/s390/pci/pci.c         |  2 +-
 drivers/iommu/s390-iommu.c  | 44 +++++++++++++++++++++++--------------
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 07361e2fd8c51..e4c3e4e04d306 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -119,6 +119,7 @@ struct zpci_dev {
 	struct list_head entry;		/* list of all zpci_devices, needed for hotplug, etc. */
 	struct list_head iommu_list;
 	struct kref kref;
+	struct rcu_head rcu;
 	struct hotplug_slot hotplug_slot;
 
 	enum zpci_state state;
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index a703dcd94a689..ef38b1514c77a 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -996,7 +996,7 @@ void zpci_release_device(struct kref *kref)
 		break;
 	}
 	zpci_dbg(3, "rem fid:%x\n", zdev->fid);
-	kfree(zdev);
+	kfree_rcu(zdev, rcu);
 }
 
 int zpci_report_error(struct pci_dev *pdev,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9771bce86e94c..cf5dcbcea4e03 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -10,6 +10,8 @@
 #include <linux/iommu.h>
 #include <linux/iommu-helper.h>
 #include <linux/sizes.h>
+#include <linux/rculist.h>
+#include <linux/rcupdate.h>
 #include <asm/pci_dma.h>
 
 static const struct iommu_ops s390_iommu_ops;
@@ -20,6 +22,7 @@ struct s390_domain {
 	unsigned long		*dma_table;
 	spinlock_t		dma_table_lock;
 	spinlock_t		list_lock;
+	struct rcu_head		rcu;
 };
 
 static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
@@ -61,18 +64,28 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 
 	spin_lock_init(&s390_domain->dma_table_lock);
 	spin_lock_init(&s390_domain->list_lock);
-	INIT_LIST_HEAD(&s390_domain->devices);
+	INIT_LIST_HEAD_RCU(&s390_domain->devices);
 
 	return &s390_domain->domain;
 }
 
+static void s390_iommu_rcu_free_domain(struct rcu_head *head)
+{
+	struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
+
+	dma_cleanup_tables(s390_domain->dma_table);
+	kfree(s390_domain);
+}
+
 static void s390_domain_free(struct iommu_domain *domain)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 
+	rcu_read_lock();
 	WARN_ON(!list_empty(&s390_domain->devices));
-	dma_cleanup_tables(s390_domain->dma_table);
-	kfree(s390_domain);
+	rcu_read_unlock();
+
+	call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
 }
 
 static void __s390_iommu_detach_device(struct zpci_dev *zdev)
@@ -84,7 +97,7 @@ static void __s390_iommu_detach_device(struct zpci_dev *zdev)
 		return;
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_del_init(&zdev->iommu_list);
+	list_del_rcu(&zdev->iommu_list);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	zpci_unregister_ioat(zdev, 0);
@@ -127,7 +140,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 	zdev->s390_domain = s390_domain;
 
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_add(&zdev->iommu_list, &s390_domain->devices);
+	list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	return 0;
@@ -203,14 +216,13 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev;
-	unsigned long flags;
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
 		zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
 				   zdev->end_dma - zdev->start_dma + 1);
 	}
-	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+	rcu_read_unlock();
 }
 
 static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
@@ -219,18 +231,17 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	size_t size = gather->end - gather->start + 1;
 	struct zpci_dev *zdev;
-	unsigned long flags;
 
 	/* If gather was never added to there is nothing to flush */
 	if (!gather->end)
 		return;
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
 		zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
 				   size);
 	}
-	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+	rcu_read_unlock();
 }
 
 static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
@@ -238,16 +249,15 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	struct zpci_dev *zdev;
-	unsigned long flags;
 
-	spin_lock_irqsave(&s390_domain->list_lock, flags);
-	list_for_each_entry(zdev, &s390_domain->devices, iommu_list) {
+	rcu_read_lock();
+	list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
 		if (!zdev->tlb_refresh)
 			continue;
 		zpci_refresh_trans((u64)zdev->fh << 32,
 				   iova, size);
 	}
-	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
+	rcu_read_unlock();
 }
 
 static int s390_iommu_update_trans(struct s390_domain *s390_domain,
-- 
GitLab


From 08955af0600303455f57fe2f2a26f24f9b496b49 Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:29:02 +0100
Subject: [PATCH 1838/4122] iommu/s390: Optimize IOMMU table walking

When invalidating existing table entries for unmap there is no need to
know the physical address beforehand so don't do an extra walk of the
IOMMU table to get it. Also when invalidating entries not finding an
entry indicates an invalid unmap and not a lack of memory we also don't
need to undo updates in this case. Implement this by splitting
s390_iommu_update_trans() in a variant for validating and one for
invalidating translations.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-5-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/s390-iommu.c | 69 ++++++++++++++++++++++++--------------
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index cf5dcbcea4e03..2b9a3e3bc6066 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -260,14 +260,14 @@ static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
 	rcu_read_unlock();
 }
 
-static int s390_iommu_update_trans(struct s390_domain *s390_domain,
-				   phys_addr_t pa, dma_addr_t dma_addr,
-				   unsigned long nr_pages, int flags)
+static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
+				     phys_addr_t pa, dma_addr_t dma_addr,
+				     unsigned long nr_pages, int flags)
 {
 	phys_addr_t page_addr = pa & PAGE_MASK;
 	unsigned long irq_flags, i;
 	unsigned long *entry;
-	int rc = 0;
+	int rc;
 
 	if (!nr_pages)
 		return 0;
@@ -275,7 +275,7 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
 	for (i = 0; i < nr_pages; i++) {
 		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
-		if (!entry) {
+		if (unlikely(!entry)) {
 			rc = -ENOMEM;
 			goto undo_cpu_trans;
 		}
@@ -283,19 +283,43 @@ static int s390_iommu_update_trans(struct s390_domain *s390_domain,
 		page_addr += PAGE_SIZE;
 		dma_addr += PAGE_SIZE;
 	}
+	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+	return 0;
 
 undo_cpu_trans:
-	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
-		flags = ZPCI_PTE_INVALID;
-		while (i-- > 0) {
-			page_addr -= PAGE_SIZE;
-			dma_addr -= PAGE_SIZE;
-			entry = dma_walk_cpu_trans(s390_domain->dma_table,
-						   dma_addr);
-			if (!entry)
-				break;
-			dma_update_cpu_trans(entry, page_addr, flags);
+	while (i-- > 0) {
+		dma_addr -= PAGE_SIZE;
+		entry = dma_walk_cpu_trans(s390_domain->dma_table,
+					   dma_addr);
+		if (!entry)
+			break;
+		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
+	}
+	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
+
+	return rc;
+}
+
+static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
+				       dma_addr_t dma_addr, unsigned long nr_pages)
+{
+	unsigned long irq_flags, i;
+	unsigned long *entry;
+	int rc = 0;
+
+	if (!nr_pages)
+		return 0;
+
+	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
+	for (i = 0; i < nr_pages; i++) {
+		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
+		if (unlikely(!entry)) {
+			rc = -EINVAL;
+			break;
 		}
+		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
+		dma_addr += PAGE_SIZE;
 	}
 	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
 
@@ -308,8 +332,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
 				int prot, gfp_t gfp, size_t *mapped)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
-	int flags = ZPCI_PTE_VALID, rc = 0;
 	size_t size = pgcount << __ffs(pgsize);
+	int flags = ZPCI_PTE_VALID, rc = 0;
 
 	if (pgsize != SZ_4K)
 		return -EINVAL;
@@ -327,8 +351,8 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
 	if (!(prot & IOMMU_WRITE))
 		flags |= ZPCI_TABLE_PROTECTED;
 
-	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
-				     pgcount, flags);
+	rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
+				       pgcount, flags);
 	if (!rc)
 		*mapped = size;
 
@@ -373,20 +397,13 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
 	size_t size = pgcount << __ffs(pgsize);
-	int flags = ZPCI_PTE_INVALID;
-	phys_addr_t paddr;
 	int rc;
 
 	if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
 	    (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
 		return 0;
 
-	paddr = s390_iommu_iova_to_phys(domain, iova);
-	if (!paddr)
-		return 0;
-
-	rc = s390_iommu_update_trans(s390_domain, paddr, iova,
-				     pgcount, flags);
+	rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
 	if (rc)
 		return 0;
 
-- 
GitLab


From 21c1f9021f0e7d28c3edfcc70e1ca1926ea3774e Mon Sep 17 00:00:00 2001
From: Niklas Schnelle <schnelle@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:29:03 +0100
Subject: [PATCH 1839/4122] s390/pci: use lock-free I/O translation updates

I/O translation tables on s390 use 8 byte page table entries and tables
which are allocated lazily but only freed when the entire I/O
translation table is torn down. Also each IOVA can at any time only
translate to one physical address Furthermore I/O table accesses by the
IOMMU hardware are cache coherent. With a bit of care we can thus use
atomic updates to manipulate the translation table without having to use
a global lock at all. This is done analogous to the existing I/O
translation table handling code used on Intel and AMD x86 systems.

Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Link: https://lore.kernel.org/r/20221109142903.4080275-6-schnelle@linux.ibm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 arch/s390/include/asm/pci.h |  1 -
 arch/s390/pci/pci_dma.c     | 74 ++++++++++++++++++++++---------------
 drivers/iommu/s390-iommu.c  | 37 +++++++------------
 3 files changed, 58 insertions(+), 54 deletions(-)

diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index e4c3e4e04d306..b248694e00247 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -157,7 +157,6 @@ struct zpci_dev {
 
 	/* DMA stuff */
 	unsigned long	*dma_table;
-	spinlock_t	dma_table_lock;
 	int		tlb_refresh;
 
 	spinlock_t	iommu_bitmap_lock;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index dee825ee73052..ea478d11fbd13 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -63,37 +63,55 @@ static void dma_free_page_table(void *table)
 	kmem_cache_free(dma_page_table_cache, table);
 }
 
-static unsigned long *dma_get_seg_table_origin(unsigned long *entry)
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep)
 {
+	unsigned long old_rte, rte;
 	unsigned long *sto;
 
-	if (reg_entry_isvalid(*entry))
-		sto = get_rt_sto(*entry);
-	else {
+	rte = READ_ONCE(*rtep);
+	if (reg_entry_isvalid(rte)) {
+		sto = get_rt_sto(rte);
+	} else {
 		sto = dma_alloc_cpu_table();
 		if (!sto)
 			return NULL;
 
-		set_rt_sto(entry, virt_to_phys(sto));
-		validate_rt_entry(entry);
-		entry_clr_protected(entry);
+		set_rt_sto(&rte, virt_to_phys(sto));
+		validate_rt_entry(&rte);
+		entry_clr_protected(&rte);
+
+		old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+		if (old_rte != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(sto);
+			sto = get_rt_sto(old_rte);
+		}
 	}
 	return sto;
 }
 
-static unsigned long *dma_get_page_table_origin(unsigned long *entry)
+static unsigned long *dma_get_page_table_origin(unsigned long *step)
 {
+	unsigned long old_ste, ste;
 	unsigned long *pto;
 
-	if (reg_entry_isvalid(*entry))
-		pto = get_st_pto(*entry);
-	else {
+	ste = READ_ONCE(*step);
+	if (reg_entry_isvalid(ste)) {
+		pto = get_st_pto(ste);
+	} else {
 		pto = dma_alloc_page_table();
 		if (!pto)
 			return NULL;
-		set_st_pto(entry, virt_to_phys(pto));
-		validate_st_entry(entry);
-		entry_clr_protected(entry);
+		set_st_pto(&ste, virt_to_phys(pto));
+		validate_st_entry(&ste);
+		entry_clr_protected(&ste);
+
+		old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+		if (old_ste != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_page_table(pto);
+			pto = get_st_pto(old_ste);
+		}
 	}
 	return pto;
 }
@@ -117,19 +135,24 @@ unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr)
 	return &pto[px];
 }
 
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags)
+void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
 {
+	unsigned long pte;
+
+	pte = READ_ONCE(*ptep);
 	if (flags & ZPCI_PTE_INVALID) {
-		invalidate_pt_entry(entry);
+		invalidate_pt_entry(&pte);
 	} else {
-		set_pt_pfaa(entry, page_addr);
-		validate_pt_entry(entry);
+		set_pt_pfaa(&pte, page_addr);
+		validate_pt_entry(&pte);
 	}
 
 	if (flags & ZPCI_TABLE_PROTECTED)
-		entry_set_protected(entry);
+		entry_set_protected(&pte);
 	else
-		entry_clr_protected(entry);
+		entry_clr_protected(&pte);
+
+	xchg(ptep, pte);
 }
 
 static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
@@ -137,18 +160,14 @@ static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
 {
 	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	phys_addr_t page_addr = (pa & PAGE_MASK);
-	unsigned long irq_flags;
 	unsigned long *entry;
 	int i, rc = 0;
 
 	if (!nr_pages)
 		return -EINVAL;
 
-	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
-	if (!zdev->dma_table) {
-		rc = -EINVAL;
-		goto out_unlock;
-	}
+	if (!zdev->dma_table)
+		return -EINVAL;
 
 	for (i = 0; i < nr_pages; i++) {
 		entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr);
@@ -173,8 +192,6 @@ undo_cpu_trans:
 			dma_update_cpu_trans(entry, page_addr, flags);
 		}
 	}
-out_unlock:
-	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 	return rc;
 }
 
@@ -558,7 +575,6 @@ int zpci_dma_init_device(struct zpci_dev *zdev)
 	WARN_ON(zdev->s390_domain);
 
 	spin_lock_init(&zdev->iommu_bitmap_lock);
-	spin_lock_init(&zdev->dma_table_lock);
 
 	zdev->dma_table = dma_alloc_cpu_table();
 	if (!zdev->dma_table) {
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 2b9a3e3bc6066..ed33c6cce0836 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -20,7 +20,6 @@ struct s390_domain {
 	struct iommu_domain	domain;
 	struct list_head	devices;
 	unsigned long		*dma_table;
-	spinlock_t		dma_table_lock;
 	spinlock_t		list_lock;
 	struct rcu_head		rcu;
 };
@@ -62,7 +61,6 @@ static struct iommu_domain *s390_domain_alloc(unsigned domain_type)
 	s390_domain->domain.geometry.aperture_start = 0;
 	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
 
-	spin_lock_init(&s390_domain->dma_table_lock);
 	spin_lock_init(&s390_domain->list_lock);
 	INIT_LIST_HEAD_RCU(&s390_domain->devices);
 
@@ -265,14 +263,10 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 				     unsigned long nr_pages, int flags)
 {
 	phys_addr_t page_addr = pa & PAGE_MASK;
-	unsigned long irq_flags, i;
 	unsigned long *entry;
+	unsigned long i;
 	int rc;
 
-	if (!nr_pages)
-		return 0;
-
-	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
 	for (i = 0; i < nr_pages; i++) {
 		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
 		if (unlikely(!entry)) {
@@ -283,7 +277,6 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 		page_addr += PAGE_SIZE;
 		dma_addr += PAGE_SIZE;
 	}
-	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
 
 	return 0;
 
@@ -296,7 +289,6 @@ undo_cpu_trans:
 			break;
 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
 	}
-	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
 
 	return rc;
 }
@@ -304,14 +296,10 @@ undo_cpu_trans:
 static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
 				       dma_addr_t dma_addr, unsigned long nr_pages)
 {
-	unsigned long irq_flags, i;
 	unsigned long *entry;
+	unsigned long i;
 	int rc = 0;
 
-	if (!nr_pages)
-		return 0;
-
-	spin_lock_irqsave(&s390_domain->dma_table_lock, irq_flags);
 	for (i = 0; i < nr_pages; i++) {
 		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr);
 		if (unlikely(!entry)) {
@@ -321,7 +309,6 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
 		dma_addr += PAGE_SIZE;
 	}
-	spin_unlock_irqrestore(&s390_domain->dma_table_lock, irq_flags);
 
 	return rc;
 }
@@ -363,7 +350,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 					   dma_addr_t iova)
 {
 	struct s390_domain *s390_domain = to_s390_domain(domain);
-	unsigned long *sto, *pto, *rto, flags;
+	unsigned long *rto, *sto, *pto;
+	unsigned long ste, pte, rte;
 	unsigned int rtx, sx, px;
 	phys_addr_t phys = 0;
 
@@ -376,16 +364,17 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
 	px = calc_px(iova);
 	rto = s390_domain->dma_table;
 
-	spin_lock_irqsave(&s390_domain->dma_table_lock, flags);
-	if (rto && reg_entry_isvalid(rto[rtx])) {
-		sto = get_rt_sto(rto[rtx]);
-		if (sto && reg_entry_isvalid(sto[sx])) {
-			pto = get_st_pto(sto[sx]);
-			if (pto && pt_entry_isvalid(pto[px]))
-				phys = pto[px] & ZPCI_PTE_ADDR_MASK;
+	rte = READ_ONCE(rto[rtx]);
+	if (reg_entry_isvalid(rte)) {
+		sto = get_rt_sto(rte);
+		ste = READ_ONCE(sto[sx]);
+		if (reg_entry_isvalid(ste)) {
+			pto = get_st_pto(ste);
+			pte = READ_ONCE(pto[px]);
+			if (pt_entry_isvalid(pte))
+				phys = pte & ZPCI_PTE_ADDR_MASK;
 		}
 	}
-	spin_unlock_irqrestore(&s390_domain->dma_table_lock, flags);
 
 	return phys;
 }
-- 
GitLab


From bbc4d205d93f52ee18dfa7858d51489c0506547f Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Thu, 10 Nov 2022 16:44:07 +0100
Subject: [PATCH 1840/4122] iommu/exynos: Fix driver initialization sequence

Registering a SYSMMU platform driver might directly trigger initializing
IOMMU domains and performing the initial mappings. Also the IOMMU core
might use the IOMMU hardware once it has been registered with
iommu_device_register() function. Ensure that all driver resources are
allocated and initialized before the driver advertise its presence to the
platform bus and the IOMMU subsystem.

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Reviewed-by: Sam Protsenko <semen.protsenko@linaro.org>
Link: https://lore.kernel.org/r/20221110154407.26531-1-m.szyprowski@samsung.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/exynos-iommu.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 45fd4850bacbd..b0cde22119875 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -708,10 +708,6 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev);
-	if (ret)
-		goto err_iommu_register;
-
 	platform_set_drvdata(pdev, data);
 
 	if (PG_ENT_SHIFT < 0) {
@@ -743,11 +739,13 @@ static int exynos_sysmmu_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
+	ret = iommu_device_register(&data->iommu, &exynos_iommu_ops, dev);
+	if (ret)
+		goto err_dma_set_mask;
+
 	return 0;
 
 err_dma_set_mask:
-	iommu_device_unregister(&data->iommu);
-err_iommu_register:
 	iommu_device_sysfs_remove(&data->iommu);
 	return ret;
 }
@@ -1432,12 +1430,6 @@ static int __init exynos_iommu_init(void)
 		return -ENOMEM;
 	}
 
-	ret = platform_driver_register(&exynos_sysmmu_driver);
-	if (ret) {
-		pr_err("%s: Failed to register driver\n", __func__);
-		goto err_reg_driver;
-	}
-
 	zero_lv2_table = kmem_cache_zalloc(lv2table_kmem_cache, GFP_KERNEL);
 	if (zero_lv2_table == NULL) {
 		pr_err("%s: Failed to allocate zero level2 page table\n",
@@ -1446,10 +1438,16 @@ static int __init exynos_iommu_init(void)
 		goto err_zero_lv2;
 	}
 
+	ret = platform_driver_register(&exynos_sysmmu_driver);
+	if (ret) {
+		pr_err("%s: Failed to register driver\n", __func__);
+		goto err_reg_driver;
+	}
+
 	return 0;
-err_zero_lv2:
-	platform_driver_unregister(&exynos_sysmmu_driver);
 err_reg_driver:
+	platform_driver_unregister(&exynos_sysmmu_driver);
+err_zero_lv2:
 	kmem_cache_destroy(lv2table_kmem_cache);
 	return ret;
 }
-- 
GitLab


From b577f7e679b763b706032e7a65c7b3a05c5f2184 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:35 +0000
Subject: [PATCH 1841/4122] iommu/mediatek-v1: Update to {map,unmap}_pages

Now that the core API has a proper notion of multi-page mappings, clean
up the old pgsize_bitmap hack by implementing the new interfaces
instead. This also brings a slight simplification since we no longer
need to worry about rolling back partial mappings on failure.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/768e90ff0c2d61e4723049c1349d8bac58daa437.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu_v1.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c
index 6e0e65831eb70..69682ee068d2b 100644
--- a/drivers/iommu/mtk_iommu_v1.c
+++ b/drivers/iommu/mtk_iommu_v1.c
@@ -327,44 +327,42 @@ static void mtk_iommu_v1_detach_device(struct iommu_domain *domain, struct devic
 }
 
 static int mtk_iommu_v1_map(struct iommu_domain *domain, unsigned long iova,
-			    phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			    phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			    int prot, gfp_t gfp, size_t *mapped)
 {
 	struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain);
-	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
 	unsigned long flags;
 	unsigned int i;
 	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
 	u32 pabase = (u32)paddr;
-	int map_size = 0;
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	for (i = 0; i < page_num; i++) {
-		if (pgt_base_iova[i]) {
-			memset(pgt_base_iova, 0, i * sizeof(u32));
+	for (i = 0; i < pgcount; i++) {
+		if (pgt_base_iova[i])
 			break;
-		}
 		pgt_base_iova[i] = pabase | F_DESC_VALID | F_DESC_NONSEC;
 		pabase += MT2701_IOMMU_PAGE_SIZE;
-		map_size += MT2701_IOMMU_PAGE_SIZE;
 	}
 
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
-	mtk_iommu_v1_tlb_flush_range(dom->data, iova, size);
+	*mapped = i * MT2701_IOMMU_PAGE_SIZE;
+	mtk_iommu_v1_tlb_flush_range(dom->data, iova, *mapped);
 
-	return map_size == size ? 0 : -EEXIST;
+	return i == pgcount ? 0 : -EEXIST;
 }
 
 static size_t mtk_iommu_v1_unmap(struct iommu_domain *domain, unsigned long iova,
-				 size_t size, struct iommu_iotlb_gather *gather)
+				 size_t pgsize, size_t pgcount,
+				 struct iommu_iotlb_gather *gather)
 {
 	struct mtk_iommu_v1_domain *dom = to_mtk_domain(domain);
 	unsigned long flags;
 	u32 *pgt_base_iova = dom->pgt_va + (iova  >> MT2701_IOMMU_PAGE_SHIFT);
-	unsigned int page_num = size >> MT2701_IOMMU_PAGE_SHIFT;
+	size_t size = pgcount * MT2701_IOMMU_PAGE_SIZE;
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	memset(pgt_base_iova, 0, page_num * sizeof(u32));
+	memset(pgt_base_iova, 0, pgcount * sizeof(u32));
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
 	mtk_iommu_v1_tlb_flush_range(dom->data, iova, size);
@@ -586,13 +584,13 @@ static const struct iommu_ops mtk_iommu_v1_ops = {
 	.release_device	= mtk_iommu_v1_release_device,
 	.def_domain_type = mtk_iommu_v1_def_domain_type,
 	.device_group	= generic_device_group,
-	.pgsize_bitmap	= ~0UL << MT2701_IOMMU_PAGE_SHIFT,
+	.pgsize_bitmap	= MT2701_IOMMU_PAGE_SIZE,
 	.owner          = THIS_MODULE,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= mtk_iommu_v1_attach_device,
 		.detach_dev	= mtk_iommu_v1_detach_device,
-		.map		= mtk_iommu_v1_map,
-		.unmap		= mtk_iommu_v1_unmap,
+		.map_pages	= mtk_iommu_v1_map,
+		.unmap_pages	= mtk_iommu_v1_unmap,
 		.iova_to_phys	= mtk_iommu_v1_iova_to_phys,
 		.free		= mtk_iommu_v1_domain_free,
 	}
-- 
GitLab


From a05d5857cec3efef02af557dcb5ed257364356e6 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:36 +0000
Subject: [PATCH 1842/4122] iommu/sprd: Update to {map,unmap}_pages

Now that the core API has a proper notion of multi-page mappings, clean
up the old pgsize_bitmap hack by implementing the new interfaces
instead. This time we'll get the return values for unmaps correct too.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/9026464e8380b92d10d09103e215eb4306a5df7c.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sprd-iommu.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index e027933755989..219bfa11f7f48 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -271,10 +271,11 @@ static void sprd_iommu_detach_device(struct iommu_domain *domain,
 }
 
 static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			  phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			  int prot, gfp_t gfp, size_t *mapped)
 {
 	struct sprd_iommu_domain *dom = to_sprd_domain(domain);
-	unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT;
+	size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE;
 	unsigned long flags;
 	unsigned int i;
 	u32 *pgt_base_iova;
@@ -296,35 +297,37 @@ static int sprd_iommu_map(struct iommu_domain *domain, unsigned long iova,
 	pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT);
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	for (i = 0; i < page_num; i++) {
+	for (i = 0; i < pgcount; i++) {
 		pgt_base_iova[i] = pabase >> SPRD_IOMMU_PAGE_SHIFT;
 		pabase += SPRD_IOMMU_PAGE_SIZE;
 	}
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
+	*mapped = size;
 	return 0;
 }
 
 static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			size_t size, struct iommu_iotlb_gather *iotlb_gather)
+			       size_t pgsize, size_t pgcount,
+			       struct iommu_iotlb_gather *iotlb_gather)
 {
 	struct sprd_iommu_domain *dom = to_sprd_domain(domain);
 	unsigned long flags;
 	u32 *pgt_base_iova;
-	unsigned int page_num = size >> SPRD_IOMMU_PAGE_SHIFT;
+	size_t size = pgcount * SPRD_IOMMU_PAGE_SIZE;
 	unsigned long start = domain->geometry.aperture_start;
 	unsigned long end = domain->geometry.aperture_end;
 
 	if (iova < start || (iova + size) > (end + 1))
-		return -EINVAL;
+		return 0;
 
 	pgt_base_iova = dom->pgt_va + ((iova - start) >> SPRD_IOMMU_PAGE_SHIFT);
 
 	spin_lock_irqsave(&dom->pgtlock, flags);
-	memset(pgt_base_iova, 0, page_num * sizeof(u32));
+	memset(pgt_base_iova, 0, pgcount * sizeof(u32));
 	spin_unlock_irqrestore(&dom->pgtlock, flags);
 
-	return 0;
+	return size;
 }
 
 static void sprd_iommu_sync_map(struct iommu_domain *domain,
@@ -407,13 +410,13 @@ static const struct iommu_ops sprd_iommu_ops = {
 	.probe_device	= sprd_iommu_probe_device,
 	.device_group	= sprd_iommu_device_group,
 	.of_xlate	= sprd_iommu_of_xlate,
-	.pgsize_bitmap	= ~0UL << SPRD_IOMMU_PAGE_SHIFT,
+	.pgsize_bitmap	= SPRD_IOMMU_PAGE_SIZE,
 	.owner		= THIS_MODULE,
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= sprd_iommu_attach_device,
 		.detach_dev	= sprd_iommu_detach_device,
-		.map		= sprd_iommu_map,
-		.unmap		= sprd_iommu_unmap,
+		.map_pages	= sprd_iommu_map,
+		.unmap_pages	= sprd_iommu_unmap,
 		.iotlb_sync_map	= sprd_iommu_sync_map,
 		.iotlb_sync	= sprd_iommu_sync,
 		.iova_to_phys	= sprd_iommu_iova_to_phys,
-- 
GitLab


From 85637380dad6d97071018cba6f2aa90667f716b3 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:37 +0000
Subject: [PATCH 1843/4122] iommu/mediatek: Update to {map,unmap}_pages

Update map/unmap to the new multi-page interfaces, which is dead easy
since we just pass them through to io-pgtable anyway.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/25b65b71e7e5d1006469aee48bab07ca87227bfa.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index b383c8327f9cb..6b8ad85b50ed3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -711,7 +711,8 @@ static void mtk_iommu_detach_device(struct iommu_domain *domain,
 }
 
 static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			 phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			 int prot, gfp_t gfp, size_t *mapped)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
 
@@ -720,17 +721,17 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
 		paddr |= BIT_ULL(32);
 
 	/* Synchronize with the tlb_lock */
-	return dom->iop->map(dom->iop, iova, paddr, size, prot, gfp);
+	return dom->iop->map_pages(dom->iop, iova, paddr, pgsize, pgcount, prot, gfp, mapped);
 }
 
 static size_t mtk_iommu_unmap(struct iommu_domain *domain,
-			      unsigned long iova, size_t size,
+			      unsigned long iova, size_t pgsize, size_t pgcount,
 			      struct iommu_iotlb_gather *gather)
 {
 	struct mtk_iommu_domain *dom = to_mtk_domain(domain);
 
-	iommu_iotlb_gather_add_range(gather, iova, size);
-	return dom->iop->unmap(dom->iop, iova, size, gather);
+	iommu_iotlb_gather_add_range(gather, iova, pgsize * pgcount);
+	return dom->iop->unmap_pages(dom->iop, iova, pgsize, pgcount, gather);
 }
 
 static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -938,8 +939,8 @@ static const struct iommu_ops mtk_iommu_ops = {
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= mtk_iommu_attach_device,
 		.detach_dev	= mtk_iommu_detach_device,
-		.map		= mtk_iommu_map,
-		.unmap		= mtk_iommu_unmap,
+		.map_pages	= mtk_iommu_map,
+		.unmap_pages	= mtk_iommu_unmap,
 		.flush_iotlb_all = mtk_iommu_flush_iotlb_all,
 		.iotlb_sync	= mtk_iommu_iotlb_sync,
 		.iotlb_sync_map	= mtk_iommu_sync_map,
-- 
GitLab


From 8b35cdcf9bf82098dd15ed02a2a51cdf5f5ca090 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:38 +0000
Subject: [PATCH 1844/4122] iommu/msm: Update to {map,unmap}_pages

Update map/unmap to the new multi-page interfaces, which is dead easy
since we just pass them through to io-pgtable anyway.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/24a8f522710ddd6bbac4da154aa28799e939ebe4.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/msm_iommu.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 16179a9a72830..c60624910872c 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -471,14 +471,16 @@ fail:
 }
 
 static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			 phys_addr_t pa, size_t len, int prot, gfp_t gfp)
+			 phys_addr_t pa, size_t pgsize, size_t pgcount,
+			 int prot, gfp_t gfp, size_t *mapped)
 {
 	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
 	int ret;
 
 	spin_lock_irqsave(&priv->pgtlock, flags);
-	ret = priv->iop->map(priv->iop, iova, pa, len, prot, GFP_ATOMIC);
+	ret = priv->iop->map_pages(priv->iop, iova, pa, pgsize, pgcount, prot,
+				   GFP_ATOMIC, mapped);
 	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
 	return ret;
@@ -493,16 +495,18 @@ static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
 }
 
 static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			      size_t len, struct iommu_iotlb_gather *gather)
+			      size_t pgsize, size_t pgcount,
+			      struct iommu_iotlb_gather *gather)
 {
 	struct msm_priv *priv = to_msm_priv(domain);
 	unsigned long flags;
+	size_t ret;
 
 	spin_lock_irqsave(&priv->pgtlock, flags);
-	len = priv->iop->unmap(priv->iop, iova, len, gather);
+	ret = priv->iop->unmap_pages(priv->iop, iova, pgsize, pgcount, gather);
 	spin_unlock_irqrestore(&priv->pgtlock, flags);
 
-	return len;
+	return ret;
 }
 
 static phys_addr_t msm_iommu_iova_to_phys(struct iommu_domain *domain,
@@ -679,8 +683,8 @@ static struct iommu_ops msm_iommu_ops = {
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= msm_iommu_attach_dev,
 		.detach_dev	= msm_iommu_detach_dev,
-		.map		= msm_iommu_map,
-		.unmap		= msm_iommu_unmap,
+		.map_pages	= msm_iommu_map,
+		.unmap_pages	= msm_iommu_unmap,
 		/*
 		 * Nothing is needed here, the barrier to guarantee
 		 * completion of the tlb sync operation is implicitly
-- 
GitLab


From 0a17bbab2330aecd026696d4decc2636bd31e790 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:39 +0000
Subject: [PATCH 1845/4122] iommu/ipmmu-vmsa: Update to {map,unmap}_pages

Update map/unmap to the new multi-page interfaces, which is dead easy
since we just pass them through to io-pgtable anyway. Since these are
domain ops now, the domain is inherently valid (not to mention that
container_of() wouldn't return NULL anyway), so garbage-collect that
check in the process.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/ad859ccc24720d72f8eafd03817c1fc11255ddc1.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/ipmmu-vmsa.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 22230cc15dcd1..a003bd5fc65c1 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -659,22 +659,22 @@ static void ipmmu_detach_device(struct iommu_domain *io_domain,
 }
 
 static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova,
-		     phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+		     phys_addr_t paddr, size_t pgsize, size_t pgcount,
+		     int prot, gfp_t gfp, size_t *mapped)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
-	if (!domain)
-		return -ENODEV;
-
-	return domain->iop->map(domain->iop, iova, paddr, size, prot, gfp);
+	return domain->iop->map_pages(domain->iop, iova, paddr, pgsize, pgcount,
+				      prot, gfp, mapped);
 }
 
 static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova,
-			  size_t size, struct iommu_iotlb_gather *gather)
+			  size_t pgsize, size_t pgcount,
+			  struct iommu_iotlb_gather *gather)
 {
 	struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain);
 
-	return domain->iop->unmap(domain->iop, iova, size, gather);
+	return domain->iop->unmap_pages(domain->iop, iova, pgsize, pgcount, gather);
 }
 
 static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain)
@@ -877,8 +877,8 @@ static const struct iommu_ops ipmmu_ops = {
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= ipmmu_attach_device,
 		.detach_dev	= ipmmu_detach_device,
-		.map		= ipmmu_map,
-		.unmap		= ipmmu_unmap,
+		.map_pages	= ipmmu_map,
+		.unmap_pages	= ipmmu_unmap,
 		.flush_iotlb_all = ipmmu_flush_iotlb_all,
 		.iotlb_sync	= ipmmu_iotlb_sync,
 		.iova_to_phys	= ipmmu_iova_to_phys,
-- 
GitLab


From fa8ce5743039bc7ea5cb4217423efaebe381fc54 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:40 +0000
Subject: [PATCH 1846/4122] iommu/qcom: Update to {map,unmap}_pages

Update map/unmap to the new multi-page interfaces, which is dead easy
since we just pass them through to io-pgtable anyway.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/ccff9a133d12ec938741720be6baf5d788b71ea0.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/arm/arm-smmu/qcom_iommu.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
index bfd7b51eb5dbf..270c3d9128bab 100644
--- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c
+++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c
@@ -410,7 +410,8 @@ static void qcom_iommu_detach_dev(struct iommu_domain *domain, struct device *de
 }
 
 static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
-			  phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+			  phys_addr_t paddr, size_t pgsize, size_t pgcount,
+			  int prot, gfp_t gfp, size_t *mapped)
 {
 	int ret;
 	unsigned long flags;
@@ -421,13 +422,14 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova,
 		return -ENODEV;
 
 	spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
-	ret = ops->map(ops, iova, paddr, size, prot, GFP_ATOMIC);
+	ret = ops->map_pages(ops, iova, paddr, pgsize, pgcount, prot, GFP_ATOMIC, mapped);
 	spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
 	return ret;
 }
 
 static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
-			       size_t size, struct iommu_iotlb_gather *gather)
+			       size_t pgsize, size_t pgcount,
+			       struct iommu_iotlb_gather *gather)
 {
 	size_t ret;
 	unsigned long flags;
@@ -444,7 +446,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	 */
 	pm_runtime_get_sync(qcom_domain->iommu->dev);
 	spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags);
-	ret = ops->unmap(ops, iova, size, gather);
+	ret = ops->unmap_pages(ops, iova, pgsize, pgcount, gather);
 	spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags);
 	pm_runtime_put_sync(qcom_domain->iommu->dev);
 
@@ -582,8 +584,8 @@ static const struct iommu_ops qcom_iommu_ops = {
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev	= qcom_iommu_attach_dev,
 		.detach_dev	= qcom_iommu_detach_dev,
-		.map		= qcom_iommu_map,
-		.unmap		= qcom_iommu_unmap,
+		.map_pages	= qcom_iommu_map,
+		.unmap_pages	= qcom_iommu_unmap,
 		.flush_iotlb_all = qcom_iommu_flush_iotlb_all,
 		.iotlb_sync	= qcom_iommu_iotlb_sync,
 		.iova_to_phys	= qcom_iommu_iova_to_phys,
-- 
GitLab


From 99cbb8e436344ddd0554108a3d8afb7ce5c4994e Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:41 +0000
Subject: [PATCH 1847/4122] iommu/io-pgtable-arm: Remove map/unmap

With all users now calling {map,unmap}_pages, remove the wrappers.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/162e58e83ed42f78c3fbefe78c9b5410dd1dc412.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm.c | 42 ++++++++++++----------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 0ba817e863465..72dcdd468cf30 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -360,7 +360,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		max_entries = ARM_LPAE_PTES_PER_TABLE(data) - map_idx_start;
 		num_entries = min_t(int, pgcount, max_entries);
 		ret = arm_lpae_init_pte(data, iova, paddr, prot, lvl, num_entries, ptep);
-		if (!ret && mapped)
+		if (!ret)
 			*mapped += num_entries * size;
 
 		return ret;
@@ -496,13 +496,6 @@ static int arm_lpae_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
 	return ret;
 }
 
-static int arm_lpae_map(struct io_pgtable_ops *ops, unsigned long iova,
-			phys_addr_t paddr, size_t size, int iommu_prot, gfp_t gfp)
-{
-	return arm_lpae_map_pages(ops, iova, paddr, size, 1, iommu_prot, gfp,
-				  NULL);
-}
-
 static void __arm_lpae_free_pgtable(struct arm_lpae_io_pgtable *data, int lvl,
 				    arm_lpae_iopte *ptep)
 {
@@ -682,12 +675,6 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
 				data->start_level, ptep);
 }
 
-static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			     size_t size, struct iommu_iotlb_gather *gather)
-{
-	return arm_lpae_unmap_pages(ops, iova, size, 1, gather);
-}
-
 static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
 					 unsigned long iova)
 {
@@ -799,9 +786,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
 	data->pgd_bits = va_bits - (data->bits_per_level * (levels - 1));
 
 	data->iop.ops = (struct io_pgtable_ops) {
-		.map		= arm_lpae_map,
 		.map_pages	= arm_lpae_map_pages,
-		.unmap		= arm_lpae_unmap,
 		.unmap_pages	= arm_lpae_unmap_pages,
 		.iova_to_phys	= arm_lpae_iova_to_phys,
 	};
@@ -1176,7 +1161,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 
 	int i, j;
 	unsigned long iova;
-	size_t size;
+	size_t size, mapped;
 	struct io_pgtable_ops *ops;
 
 	selftest_running = true;
@@ -1209,15 +1194,16 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 		for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
 			size = 1UL << j;
 
-			if (ops->map(ops, iova, iova, size, IOMMU_READ |
-							    IOMMU_WRITE |
-							    IOMMU_NOEXEC |
-							    IOMMU_CACHE, GFP_KERNEL))
+			if (ops->map_pages(ops, iova, iova, size, 1,
+					   IOMMU_READ | IOMMU_WRITE |
+					   IOMMU_NOEXEC | IOMMU_CACHE,
+					   GFP_KERNEL, &mapped))
 				return __FAIL(ops, i);
 
 			/* Overlapping mappings */
-			if (!ops->map(ops, iova, iova + size, size,
-				      IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
+			if (!ops->map_pages(ops, iova, iova + size, size, 1,
+					    IOMMU_READ | IOMMU_NOEXEC,
+					    GFP_KERNEL, &mapped))
 				return __FAIL(ops, i);
 
 			if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
@@ -1228,11 +1214,12 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 
 		/* Partial unmap */
 		size = 1UL << __ffs(cfg->pgsize_bitmap);
-		if (ops->unmap(ops, SZ_1G + size, size, NULL) != size)
+		if (ops->unmap_pages(ops, SZ_1G + size, size, 1, NULL) != size)
 			return __FAIL(ops, i);
 
 		/* Remap of partial unmap */
-		if (ops->map(ops, SZ_1G + size, size, size, IOMMU_READ, GFP_KERNEL))
+		if (ops->map_pages(ops, SZ_1G + size, size, size, 1,
+				   IOMMU_READ, GFP_KERNEL, &mapped))
 			return __FAIL(ops, i);
 
 		if (ops->iova_to_phys(ops, SZ_1G + size + 42) != (size + 42))
@@ -1243,14 +1230,15 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg)
 		for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) {
 			size = 1UL << j;
 
-			if (ops->unmap(ops, iova, size, NULL) != size)
+			if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
 				return __FAIL(ops, i);
 
 			if (ops->iova_to_phys(ops, iova + 42))
 				return __FAIL(ops, i);
 
 			/* Remap full block */
-			if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
+			if (ops->map_pages(ops, iova, iova, size, 1,
+					   IOMMU_WRITE, GFP_KERNEL, &mapped))
 				return __FAIL(ops, i);
 
 			if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
-- 
GitLab


From b9bf41e249f8c8bf79389cec9d29faf03f79aad2 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:42 +0000
Subject: [PATCH 1848/4122] iommu/io-pgtable-arm-v7s: Remove map/unmap

With all users now calling {map,unmap}_pages, remove the wrappers.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/98481dd7e3576b74149ce2de8f217338ee1dd490.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/io-pgtable-arm-v7s.c | 41 +++++++++++-------------------
 1 file changed, 15 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index ba3115fd0f86a..75f244a3e12df 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -564,8 +564,7 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
 
 		iova += pgsize;
 		paddr += pgsize;
-		if (mapped)
-			*mapped += pgsize;
+		*mapped += pgsize;
 	}
 	/*
 	 * Synchronise all PTE updates for the new mapping before there's
@@ -576,12 +575,6 @@ static int arm_v7s_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
 	return ret;
 }
 
-static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova,
-		       phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
-{
-	return arm_v7s_map_pages(ops, iova, paddr, size, 1, prot, gfp, NULL);
-}
-
 static void arm_v7s_free_pgtable(struct io_pgtable *iop)
 {
 	struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop);
@@ -764,12 +757,6 @@ static size_t arm_v7s_unmap_pages(struct io_pgtable_ops *ops, unsigned long iova
 	return unmapped;
 }
 
-static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova,
-			    size_t size, struct iommu_iotlb_gather *gather)
-{
-	return arm_v7s_unmap_pages(ops, iova, size, 1, gather);
-}
-
 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops,
 					unsigned long iova)
 {
@@ -842,9 +829,7 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
 		goto out_free_data;
 
 	data->iop.ops = (struct io_pgtable_ops) {
-		.map		= arm_v7s_map,
 		.map_pages	= arm_v7s_map_pages,
-		.unmap		= arm_v7s_unmap,
 		.unmap_pages	= arm_v7s_unmap_pages,
 		.iova_to_phys	= arm_v7s_iova_to_phys,
 	};
@@ -954,6 +939,7 @@ static int __init arm_v7s_do_selftests(void)
 	};
 	unsigned int iova, size, iova_start;
 	unsigned int i, loopnr = 0;
+	size_t mapped;
 
 	selftest_running = true;
 
@@ -984,15 +970,16 @@ static int __init arm_v7s_do_selftests(void)
 	iova = 0;
 	for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
 		size = 1UL << i;
-		if (ops->map(ops, iova, iova, size, IOMMU_READ |
-						    IOMMU_WRITE |
-						    IOMMU_NOEXEC |
-						    IOMMU_CACHE, GFP_KERNEL))
+		if (ops->map_pages(ops, iova, iova, size, 1,
+				   IOMMU_READ | IOMMU_WRITE |
+				   IOMMU_NOEXEC | IOMMU_CACHE,
+				   GFP_KERNEL, &mapped))
 			return __FAIL(ops);
 
 		/* Overlapping mappings */
-		if (!ops->map(ops, iova, iova + size, size,
-			      IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL))
+		if (!ops->map_pages(ops, iova, iova + size, size, 1,
+				    IOMMU_READ | IOMMU_NOEXEC, GFP_KERNEL,
+				    &mapped))
 			return __FAIL(ops);
 
 		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
@@ -1007,11 +994,12 @@ static int __init arm_v7s_do_selftests(void)
 	size = 1UL << __ffs(cfg.pgsize_bitmap);
 	while (i < loopnr) {
 		iova_start = i * SZ_16M;
-		if (ops->unmap(ops, iova_start + size, size, NULL) != size)
+		if (ops->unmap_pages(ops, iova_start + size, size, 1, NULL) != size)
 			return __FAIL(ops);
 
 		/* Remap of partial unmap */
-		if (ops->map(ops, iova_start + size, size, size, IOMMU_READ, GFP_KERNEL))
+		if (ops->map_pages(ops, iova_start + size, size, size, 1,
+				   IOMMU_READ, GFP_KERNEL, &mapped))
 			return __FAIL(ops);
 
 		if (ops->iova_to_phys(ops, iova_start + size + 42)
@@ -1025,14 +1013,15 @@ static int __init arm_v7s_do_selftests(void)
 	for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) {
 		size = 1UL << i;
 
-		if (ops->unmap(ops, iova, size, NULL) != size)
+		if (ops->unmap_pages(ops, iova, size, 1, NULL) != size)
 			return __FAIL(ops);
 
 		if (ops->iova_to_phys(ops, iova + 42))
 			return __FAIL(ops);
 
 		/* Remap full block */
-		if (ops->map(ops, iova, iova, size, IOMMU_WRITE, GFP_KERNEL))
+		if (ops->map_pages(ops, iova, iova, size, 1, IOMMU_WRITE,
+				   GFP_KERNEL, &mapped))
 			return __FAIL(ops);
 
 		if (ops->iova_to_phys(ops, iova + 42) != (iova + 42))
-- 
GitLab


From b169a180bef26679b44484ad24b7d8ae32623a10 Mon Sep 17 00:00:00 2001
From: Robin Murphy <robin.murphy@arm.com>
Date: Tue, 15 Nov 2022 15:26:43 +0000
Subject: [PATCH 1849/4122] iommu/io-pgtable: Remove map/unmap

With all users now calling {map,unmap}_pages, retire the redundant
single-page callbacks.

Signed-off-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Will Deacon <will@kernel.org>
Link: https://lore.kernel.org/r/a5a3cbf95c3279982e378cc43dad830322a59868.1668100209.git.robin.murphy@arm.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 include/linux/io-pgtable.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 1f068dfdb140c..1b7a44b35616c 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -150,9 +150,7 @@ struct io_pgtable_cfg {
 /**
  * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
  *
- * @map:          Map a physically contiguous memory region.
  * @map_pages:    Map a physically contiguous range of pages of the same size.
- * @unmap:        Unmap a physically contiguous memory region.
  * @unmap_pages:  Unmap a range of virtually contiguous pages of the same size.
  * @iova_to_phys: Translate iova to physical address.
  *
@@ -160,13 +158,9 @@ struct io_pgtable_cfg {
  * the same names.
  */
 struct io_pgtable_ops {
-	int (*map)(struct io_pgtable_ops *ops, unsigned long iova,
-		   phys_addr_t paddr, size_t size, int prot, gfp_t gfp);
 	int (*map_pages)(struct io_pgtable_ops *ops, unsigned long iova,
 			 phys_addr_t paddr, size_t pgsize, size_t pgcount,
 			 int prot, gfp_t gfp, size_t *mapped);
-	size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova,
-			size_t size, struct iommu_iotlb_gather *gather);
 	size_t (*unmap_pages)(struct io_pgtable_ops *ops, unsigned long iova,
 			      size_t pgsize, size_t pgcount,
 			      struct iommu_iotlb_gather *gather);
-- 
GitLab


From 6cf0981c2233f97d56938d9d61845383d6eb227c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 18 Nov 2022 17:36:04 +0800
Subject: [PATCH 1850/4122] iommu/amd: Fix pci device refcount leak in
 ppr_notifier()

As comment of pci_get_domain_bus_and_slot() says, it returns
a pci device with refcount increment, when finish using it,
the caller must decrement the reference count by calling
pci_dev_put(). So call it before returning from ppr_notifier()
to avoid refcount leak.

Fixes: daae2d25a477 ("iommu/amd: Don't copy GCR3 table root pointer")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221118093604.216371-1-yangyingliang@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu_v2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 6a1f02c62dffc..9f7fab49a5a90 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -587,6 +587,7 @@ out_drop_state:
 	put_device_state(dev_state);
 
 out:
+	pci_dev_put(pdev);
 	return ret;
 }
 
-- 
GitLab


From b09b56734fae28be9332021ae3e84c9b05020fda Mon Sep 17 00:00:00 2001
From: Denis Arefev <arefev@swemel.ru>
Date: Fri, 18 Nov 2022 13:42:52 +0300
Subject: [PATCH 1851/4122] iommu/amd: Check return value of
 mmu_notifier_register()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Аdded a return value check for the function
mmu_notifier_register().

Return value of a function 'mmu_notifier_register'
called at iommu_v2.c:642 is not checked,
 but it is usually checked for this function

Found by Linux Verification Center (linuxtesting.org) with SVACE.

Signed-off-by: Denis Arefev <arefev@swemel.ru>
Link: https://lore.kernel.org/r/20221118104252.122809-1-arefev@swemel.ru
[joro: Fix commit message ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/iommu_v2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 9f7fab49a5a90..864e4ffb6aa94 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -640,7 +640,9 @@ int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
 	if (pasid_state->mm == NULL)
 		goto out_free;
 
-	mmu_notifier_register(&pasid_state->mn, mm);
+	ret = mmu_notifier_register(&pasid_state->mn, mm);
+	if (ret)
+		goto out_free;
 
 	ret = set_pasid_state(dev_state, pasid_state, pasid);
 	if (ret)
-- 
GitLab


From 26d2d0594d7016dbcbce4038aa202c2858d5a944 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:25 +0000
Subject: [PATCH 1852/4122] KVM: arm64: PMU: Do not let AArch32 change the
 counters' top 32 bits

Even when using PMUv3p5 (which implies 64bit counters), there is
no way for AArch32 to write to the top 32 bits of the counters.
The only way to influence these bits (other than by counting
events) is by writing PMCR.P==1.

Make sure we obey the architecture and preserve the top 32 bits
on a counter update.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-10-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 35 +++++++++++++++++++++++++++--------
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index ea0c8411641fd..7a945fa6dd033 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -119,13 +119,8 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 	return counter;
 }
 
-/**
- * kvm_pmu_set_counter_value - set PMU counter value
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
- * @val: The counter value
- */
-void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
+static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val,
+				bool force)
 {
 	u64 reg;
 
@@ -135,12 +130,36 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 	kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]);
 
 	reg = counter_index_to_reg(select_idx);
+
+	if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX &&
+	    !force) {
+		/*
+		 * Even with PMUv3p5, AArch32 cannot write to the top
+		 * 32bit of the counters. The only possible course of
+		 * action is to use PMCR.P, which will reset them to
+		 * 0 (the only use of the 'force' parameter).
+		 */
+		val  = __vcpu_sys_reg(vcpu, reg) & GENMASK(63, 32);
+		val |= lower_32_bits(val);
+	}
+
 	__vcpu_sys_reg(vcpu, reg) = val;
 
 	/* Recreate the perf event to reflect the updated sample_period */
 	kvm_pmu_create_perf_event(vcpu, select_idx);
 }
 
+/**
+ * kvm_pmu_set_counter_value - set PMU counter value
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ * @val: The counter value
+ */
+void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
+{
+	kvm_pmu_set_counter(vcpu, select_idx, val, false);
+}
+
 /**
  * kvm_pmu_release_perf_event - remove the perf event
  * @pmc: The PMU counter pointer
@@ -533,7 +552,7 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 		unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 		for_each_set_bit(i, &mask, 32)
-			kvm_pmu_set_counter_value(vcpu, i, 0);
+			kvm_pmu_set_counter(vcpu, i, 0, true);
 	}
 }
 
-- 
GitLab


From 3d0dba5764b94308b8c4257ad64e383f11ce0c92 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:26 +0000
Subject: [PATCH 1853/4122] KVM: arm64: PMU: Move the ID_AA64DFR0_EL1.PMUver
 limit to VM creation

As further patches will enable the selection of a PMU revision
from userspace, sample the supported PMU revision at VM creation
time, rather than building each time the ID_AA64DFR0_EL1 register
is accessed.

This shouldn't result in any change in behaviour.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-11-maz@kernel.org
---
 arch/arm64/include/asm/kvm_host.h |  4 ++++
 arch/arm64/kvm/arm.c              |  6 ++++++
 arch/arm64/kvm/pmu-emul.c         | 11 ++++++++++
 arch/arm64/kvm/sys_regs.c         | 36 ++++++++++++++++++++++++-------
 include/kvm/arm_pmu.h             |  6 ++++++
 5 files changed, 55 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 45e2136322ba2..cc44e3bc528d8 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -163,6 +163,10 @@ struct kvm_arch {
 
 	u8 pfr0_csv2;
 	u8 pfr0_csv3;
+	struct {
+		u8 imp:4;
+		u8 unimp:4;
+	} dfr0_pmuver;
 
 	/* Hypercall features firmware registers' descriptor */
 	struct kvm_smccc_features smccc_feat;
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 94d33e296e10c..f956aab438c7a 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -164,6 +164,12 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	set_default_spectre(kvm);
 	kvm_arm_init_hypercalls(kvm);
 
+	/*
+	 * Initialise the default PMUver before there is a chance to
+	 * create an actual PMU.
+	 */
+	kvm->arch.dfr0_pmuver.imp = kvm_arm_pmu_get_pmuver_limit();
+
 	return ret;
 out_free_stage2_pgd:
 	kvm_free_stage2_pgd(&kvm->arch.mmu);
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 7a945fa6dd033..94ca2d17a4e46 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -1047,3 +1047,14 @@ int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
 
 	return -ENXIO;
 }
+
+u8 kvm_arm_pmu_get_pmuver_limit(void)
+{
+	u64 tmp;
+
+	tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
+	tmp = cpuid_feature_cap_perfmon_field(tmp,
+					      ID_AA64DFR0_EL1_PMUVer_SHIFT,
+					      ID_AA64DFR0_EL1_PMUVer_V3P4);
+	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
+}
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index f4a7c5abcbca4..297b4fcbf9696 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1062,6 +1062,27 @@ static bool access_arch_timer(struct kvm_vcpu *vcpu,
 	return true;
 }
 
+static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu)
+{
+	if (kvm_vcpu_has_pmu(vcpu))
+		return vcpu->kvm->arch.dfr0_pmuver.imp;
+
+	return vcpu->kvm->arch.dfr0_pmuver.unimp;
+}
+
+static u8 pmuver_to_perfmon(u8 pmuver)
+{
+	switch (pmuver) {
+	case ID_AA64DFR0_EL1_PMUVer_IMP:
+		return ID_DFR0_PERFMON_8_0;
+	case ID_AA64DFR0_EL1_PMUVer_IMP_DEF:
+		return ID_DFR0_PERFMON_IMP_DEF;
+	default:
+		/* Anything ARMv8.1+ and NI have the same value. For now. */
+		return pmuver;
+	}
+}
+
 /* Read a sanitised cpufeature ID register by sys_reg_desc */
 static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r)
 {
@@ -1111,18 +1132,17 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r
 		/* Limit debug to ARMv8.0 */
 		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer);
 		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), 6);
-		/* Limit guests to PMUv3 for ARMv8.4 */
-		val = cpuid_feature_cap_perfmon_field(val,
-						      ID_AA64DFR0_EL1_PMUVer_SHIFT,
-						      kvm_vcpu_has_pmu(vcpu) ? ID_AA64DFR0_EL1_PMUVer_V3P4 : 0);
+		/* Set PMUver to the required version */
+		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer),
+				  vcpu_pmuver(vcpu));
 		/* Hide SPE from guests */
 		val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMSVer);
 		break;
 	case SYS_ID_DFR0_EL1:
-		/* Limit guests to PMUv3 for ARMv8.4 */
-		val = cpuid_feature_cap_perfmon_field(val,
-						      ID_DFR0_PERFMON_SHIFT,
-						      kvm_vcpu_has_pmu(vcpu) ? ID_DFR0_PERFMON_8_4 : 0);
+		val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON);
+		val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_DFR0_PERFMON),
+				  pmuver_to_perfmon(vcpu_pmuver(vcpu)));
 		break;
 	}
 
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 96b192139a23a..812f729c91087 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -89,6 +89,8 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
 			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
 	} while (0)
 
+u8 kvm_arm_pmu_get_pmuver_limit(void);
+
 #else
 struct kvm_pmu {
 };
@@ -154,6 +156,10 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
+static inline u8 kvm_arm_pmu_get_pmuver_limit(void)
+{
+	return 0;
+}
 
 #endif
 
-- 
GitLab


From 60e651ff1f48bfdf8fec80d35510bd89ecf8c766 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:27 +0000
Subject: [PATCH 1854/4122] KVM: arm64: PMU: Allow ID_AA64DFR0_EL1.PMUver to be
 set from userspace

Allow userspace to write ID_AA64DFR0_EL1, on the condition that only
the PMUver field can be altered and be at most the one that was
initially computed for the guest.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-12-maz@kernel.org
---
 arch/arm64/kvm/sys_regs.c | 42 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 297b4fcbf9696..49585258ae6c0 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1242,6 +1242,45 @@ static int set_id_aa64pfr0_el1(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
+			       const struct sys_reg_desc *rd,
+			       u64 val)
+{
+	u8 pmuver, host_pmuver;
+	bool valid_pmu;
+
+	host_pmuver = kvm_arm_pmu_get_pmuver_limit();
+
+	/*
+	 * Allow AA64DFR0_EL1.PMUver to be set from userspace as long
+	 * as it doesn't promise more than what the HW gives us. We
+	 * allow an IMPDEF PMU though, only if no PMU is supported
+	 * (KVM backward compatibility handling).
+	 */
+	pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), val);
+	if ((pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF && pmuver > host_pmuver))
+		return -EINVAL;
+
+	valid_pmu = (pmuver != 0 && pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF);
+
+	/* Make sure view register and PMU support do match */
+	if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
+		return -EINVAL;
+
+	/* We can only differ with PMUver, and anything else is an error */
+	val ^= read_id_reg(vcpu, rd);
+	val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer);
+	if (val)
+		return -EINVAL;
+
+	if (valid_pmu)
+		vcpu->kvm->arch.dfr0_pmuver.imp = pmuver;
+	else
+		vcpu->kvm->arch.dfr0_pmuver.unimp = pmuver;
+
+	return 0;
+}
+
 /*
  * cpufeature ID register user accessors
  *
@@ -1503,7 +1542,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	ID_UNALLOCATED(4,7),
 
 	/* CRm=5 */
-	ID_SANITISED(ID_AA64DFR0_EL1),
+	{ SYS_DESC(SYS_ID_AA64DFR0_EL1), .access = access_id_reg,
+	  .get_user = get_id_reg, .set_user = set_id_aa64dfr0_el1, },
 	ID_SANITISED(ID_AA64DFR1_EL1),
 	ID_UNALLOCATED(5,2),
 	ID_UNALLOCATED(5,3),
-- 
GitLab


From d82e0dfdfda73f91e7282e1083a2cd7cd366ea87 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:28 +0000
Subject: [PATCH 1855/4122] KVM: arm64: PMU: Allow ID_DFR0_EL1.PerfMon to be
 set from userspace

Allow userspace to write ID_DFR0_EL1, on the condition that only
the PerfMon field can be altered and be something that is compatible
with what was computed for the AArch64 view of the guest.

Reviewed-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-13-maz@kernel.org
---
 arch/arm64/kvm/sys_regs.c | 57 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 49585258ae6c0..b8ac58723459c 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1070,6 +1070,19 @@ static u8 vcpu_pmuver(const struct kvm_vcpu *vcpu)
 	return vcpu->kvm->arch.dfr0_pmuver.unimp;
 }
 
+static u8 perfmon_to_pmuver(u8 perfmon)
+{
+	switch (perfmon) {
+	case ID_DFR0_PERFMON_8_0:
+		return ID_AA64DFR0_EL1_PMUVer_IMP;
+	case ID_DFR0_PERFMON_IMP_DEF:
+		return ID_AA64DFR0_EL1_PMUVer_IMP_DEF;
+	default:
+		/* Anything ARMv8.1+ and NI have the same value. For now. */
+		return perfmon;
+	}
+}
+
 static u8 pmuver_to_perfmon(u8 pmuver)
 {
 	switch (pmuver) {
@@ -1281,6 +1294,46 @@ static int set_id_aa64dfr0_el1(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int set_id_dfr0_el1(struct kvm_vcpu *vcpu,
+			   const struct sys_reg_desc *rd,
+			   u64 val)
+{
+	u8 perfmon, host_perfmon;
+	bool valid_pmu;
+
+	host_perfmon = pmuver_to_perfmon(kvm_arm_pmu_get_pmuver_limit());
+
+	/*
+	 * Allow DFR0_EL1.PerfMon to be set from userspace as long as
+	 * it doesn't promise more than what the HW gives us on the
+	 * AArch64 side (as everything is emulated with that), and
+	 * that this is a PMUv3.
+	 */
+	perfmon = FIELD_GET(ARM64_FEATURE_MASK(ID_DFR0_PERFMON), val);
+	if ((perfmon != ID_DFR0_PERFMON_IMP_DEF && perfmon > host_perfmon) ||
+	    (perfmon != 0 && perfmon < ID_DFR0_PERFMON_8_0))
+		return -EINVAL;
+
+	valid_pmu = (perfmon != 0 && perfmon != ID_DFR0_PERFMON_IMP_DEF);
+
+	/* Make sure view register and PMU support do match */
+	if (kvm_vcpu_has_pmu(vcpu) != valid_pmu)
+		return -EINVAL;
+
+	/* We can only differ with PerfMon, and anything else is an error */
+	val ^= read_id_reg(vcpu, rd);
+	val &= ~ARM64_FEATURE_MASK(ID_DFR0_PERFMON);
+	if (val)
+		return -EINVAL;
+
+	if (valid_pmu)
+		vcpu->kvm->arch.dfr0_pmuver.imp = perfmon_to_pmuver(perfmon);
+	else
+		vcpu->kvm->arch.dfr0_pmuver.unimp = perfmon_to_pmuver(perfmon);
+
+	return 0;
+}
+
 /*
  * cpufeature ID register user accessors
  *
@@ -1502,7 +1555,9 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* CRm=1 */
 	AA32_ID_SANITISED(ID_PFR0_EL1),
 	AA32_ID_SANITISED(ID_PFR1_EL1),
-	AA32_ID_SANITISED(ID_DFR0_EL1),
+	{ SYS_DESC(SYS_ID_DFR0_EL1), .access = access_id_reg,
+	  .get_user = get_id_reg, .set_user = set_id_dfr0_el1,
+	  .visibility = aa32_id_visibility, },
 	ID_HIDDEN(ID_AFR0_EL1),
 	AA32_ID_SANITISED(ID_MMFR0_EL1),
 	AA32_ID_SANITISED(ID_MMFR1_EL1),
-- 
GitLab


From 11af4c37165e36a6090172ded5d06acdf15206da Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:29 +0000
Subject: [PATCH 1856/4122] KVM: arm64: PMU: Implement PMUv3p5 long counter
 support

PMUv3p5 (which is mandatory with ARMv8.5) comes with some extra
features:

- All counters are 64bit

- The overflow point is controlled by the PMCR_EL0.LP bit

Add the required checks in the helpers that control counter
width and overflow, as well as the sysreg handling for the LP
bit. A new kvm_pmu_is_3p5() helper makes it easy to spot the
PMUv3p5 specific handling.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-14-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 8 +++++---
 arch/arm64/kvm/sys_regs.c | 4 ++++
 include/kvm/arm_pmu.h     | 7 +++++++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 94ca2d17a4e46..7e25ff73cbba8 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -52,13 +52,15 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
  */
 static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-	return (select_idx == ARMV8_PMU_CYCLE_IDX);
+	return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu));
 }
 
 static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-	return (select_idx == ARMV8_PMU_CYCLE_IDX &&
-		__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
+	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+
+	return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
+	       (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
 }
 
 static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index b8ac58723459c..67eac0f747be8 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -654,6 +654,8 @@ static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 	       | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
 	if (!kvm_supports_32bit_el0())
 		val |= ARMV8_PMU_PMCR_LC;
+	if (!kvm_pmu_is_3p5(vcpu))
+		val &= ~ARMV8_PMU_PMCR_LP;
 	__vcpu_sys_reg(vcpu, r->reg) = val;
 }
 
@@ -703,6 +705,8 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		val |= p->regval & ARMV8_PMU_PMCR_MASK;
 		if (!kvm_supports_32bit_el0())
 			val |= ARMV8_PMU_PMCR_LC;
+		if (!kvm_pmu_is_3p5(vcpu))
+			val &= ~ARMV8_PMU_PMCR_LP;
 		__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 		kvm_pmu_handle_pmcr(vcpu, val);
 		kvm_vcpu_pmu_restore_guest(vcpu);
diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h
index 812f729c91087..628775334d5ef 100644
--- a/include/kvm/arm_pmu.h
+++ b/include/kvm/arm_pmu.h
@@ -89,6 +89,12 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu);
 			vcpu->arch.pmu.events = *kvm_get_pmu_events();	\
 	} while (0)
 
+/*
+ * Evaluates as true when emulating PMUv3p5, and false otherwise.
+ */
+#define kvm_pmu_is_3p5(vcpu)						\
+	(vcpu->kvm->arch.dfr0_pmuver.imp >= ID_AA64DFR0_EL1_PMUVer_V3P5)
+
 u8 kvm_arm_pmu_get_pmuver_limit(void);
 
 #else
@@ -153,6 +159,7 @@ static inline u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
 }
 
 #define kvm_vcpu_has_pmu(vcpu)		({ false; })
+#define kvm_pmu_is_3p5(vcpu)		({ false; })
 static inline void kvm_pmu_update_vcpu_events(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_guest(struct kvm_vcpu *vcpu) {}
 static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
-- 
GitLab


From 1f7c978282855d6b2abd608064004c74902e791d Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:30 +0000
Subject: [PATCH 1857/4122] KVM: arm64: PMU: Allow PMUv3p5 to be exposed to the
 guest

Now that the infrastructure is in place, bump the PMU support up
to PMUv3p5.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-15-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 7e25ff73cbba8..be881ae671336 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -1057,6 +1057,6 @@ u8 kvm_arm_pmu_get_pmuver_limit(void)
 	tmp = read_sanitised_ftr_reg(SYS_ID_AA64DFR0_EL1);
 	tmp = cpuid_feature_cap_perfmon_field(tmp,
 					      ID_AA64DFR0_EL1_PMUVer_SHIFT,
-					      ID_AA64DFR0_EL1_PMUVer_V3P4);
+					      ID_AA64DFR0_EL1_PMUVer_V3P5);
 	return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), tmp);
 }
-- 
GitLab


From 9bad925dd741408825590eccc495d073cc246de0 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:31 +0000
Subject: [PATCH 1858/4122] KVM: arm64: PMU: Simplify vcpu computation on perf
 overflow notification

The way we compute the target vcpu on getting an overflow is
a bit odd, as we use the PMC array as an anchor for kvm_pmc_to_vcpu,
while we could directly compute the correct address.

Get rid of the intermediate step and directly compute the target
vcpu.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-16-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index be881ae671336..49a0046604979 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -405,11 +405,8 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
 static void kvm_pmu_perf_overflow_notify_vcpu(struct irq_work *work)
 {
 	struct kvm_vcpu *vcpu;
-	struct kvm_pmu *pmu;
-
-	pmu = container_of(work, struct kvm_pmu, overflow_work);
-	vcpu = kvm_pmc_to_vcpu(pmu->pmc);
 
+	vcpu = container_of(work, struct kvm_vcpu, arch.pmu.overflow_work);
 	kvm_vcpu_kick(vcpu);
 }
 
-- 
GitLab


From d56bdce586e7fabd2b3339f476e0e4c059b24e19 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sun, 13 Nov 2022 16:38:32 +0000
Subject: [PATCH 1859/4122] KVM: arm64: PMU: Make kvm_pmc the main data
 structure

The PMU code has historically been torn between referencing a counter
as a pair vcpu+index or as the PMC pointer.

Given that it is pretty easy to go from one representation to
the other, standardise on the latter which, IMHO, makes the
code slightly more readable. YMMV.

Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221113163832.3154370-17-maz@kernel.org
---
 arch/arm64/kvm/pmu-emul.c | 174 +++++++++++++++++++-------------------
 1 file changed, 87 insertions(+), 87 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 49a0046604979..3295dea34f4c9 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -22,9 +22,19 @@ DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
 static LIST_HEAD(arm_pmus);
 static DEFINE_MUTEX(arm_pmus_lock);
 
-static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
+static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc);
 static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc);
 
+static struct kvm_vcpu *kvm_pmc_to_vcpu(const struct kvm_pmc *pmc)
+{
+	return container_of(pmc, struct kvm_vcpu, arch.pmu.pmc[pmc->idx]);
+}
+
+static struct kvm_pmc *kvm_vcpu_idx_to_pmc(struct kvm_vcpu *vcpu, int cnt_idx)
+{
+	return &vcpu->arch.pmu.pmc[cnt_idx];
+}
+
 static u32 kvm_pmu_event_mask(struct kvm *kvm)
 {
 	unsigned int pmuver;
@@ -46,38 +56,27 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm)
 }
 
 /**
- * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
+ * kvm_pmc_is_64bit - determine if counter is 64bit
+ * @pmc: counter context
  */
-static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
+static bool kvm_pmc_is_64bit(struct kvm_pmc *pmc)
 {
-	return (select_idx == ARMV8_PMU_CYCLE_IDX || kvm_pmu_is_3p5(vcpu));
+	return (pmc->idx == ARMV8_PMU_CYCLE_IDX ||
+		kvm_pmu_is_3p5(kvm_pmc_to_vcpu(pmc)));
 }
 
-static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx)
+static bool kvm_pmc_has_64bit_overflow(struct kvm_pmc *pmc)
 {
-	u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0);
+	u64 val = __vcpu_sys_reg(kvm_pmc_to_vcpu(pmc), PMCR_EL0);
 
-	return (select_idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
-	       (select_idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
+	return (pmc->idx < ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LP)) ||
+	       (pmc->idx == ARMV8_PMU_CYCLE_IDX && (val & ARMV8_PMU_PMCR_LC));
 }
 
-static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx)
+static bool kvm_pmu_counter_can_chain(struct kvm_pmc *pmc)
 {
-	return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX &&
-		!kvm_pmu_idx_has_64bit_overflow(vcpu, idx));
-}
-
-static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
-{
-	struct kvm_pmu *pmu;
-	struct kvm_vcpu_arch *vcpu_arch;
-
-	pmc -= pmc->idx;
-	pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
-	vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
-	return container_of(vcpu_arch, struct kvm_vcpu, arch);
+	return (!(pmc->idx & 1) && (pmc->idx + 1) < ARMV8_PMU_CYCLE_IDX &&
+		!kvm_pmc_has_64bit_overflow(pmc));
 }
 
 static u32 counter_index_to_reg(u64 idx)
@@ -90,21 +89,12 @@ static u32 counter_index_to_evtreg(u64 idx)
 	return (idx == ARMV8_PMU_CYCLE_IDX) ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + idx;
 }
 
-/**
- * kvm_pmu_get_counter_value - get PMU counter value
- * @vcpu: The vcpu pointer
- * @select_idx: The counter index
- */
-u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
+static u64 kvm_pmu_get_pmc_value(struct kvm_pmc *pmc)
 {
+	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	u64 counter, reg, enabled, running;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
-
-	if (!kvm_vcpu_has_pmu(vcpu))
-		return 0;
 
-	reg = counter_index_to_reg(select_idx);
+	reg = counter_index_to_reg(pmc->idx);
 	counter = __vcpu_sys_reg(vcpu, reg);
 
 	/*
@@ -115,25 +105,35 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 		counter += perf_event_read_value(pmc->perf_event, &enabled,
 						 &running);
 
-	if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
+	if (!kvm_pmc_is_64bit(pmc))
 		counter = lower_32_bits(counter);
 
 	return counter;
 }
 
-static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val,
-				bool force)
+/**
+ * kvm_pmu_get_counter_value - get PMU counter value
+ * @vcpu: The vcpu pointer
+ * @select_idx: The counter index
+ */
+u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
 {
-	u64 reg;
-
 	if (!kvm_vcpu_has_pmu(vcpu))
-		return;
+		return 0;
 
-	kvm_pmu_release_perf_event(&vcpu->arch.pmu.pmc[select_idx]);
+	return kvm_pmu_get_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx));
+}
 
-	reg = counter_index_to_reg(select_idx);
+static void kvm_pmu_set_pmc_value(struct kvm_pmc *pmc, u64 val, bool force)
+{
+	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
+	u64 reg;
 
-	if (vcpu_mode_is_32bit(vcpu) && select_idx != ARMV8_PMU_CYCLE_IDX &&
+	kvm_pmu_release_perf_event(pmc);
+
+	reg = counter_index_to_reg(pmc->idx);
+
+	if (vcpu_mode_is_32bit(vcpu) && pmc->idx != ARMV8_PMU_CYCLE_IDX &&
 	    !force) {
 		/*
 		 * Even with PMUv3p5, AArch32 cannot write to the top
@@ -148,7 +148,7 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val,
 	__vcpu_sys_reg(vcpu, reg) = val;
 
 	/* Recreate the perf event to reflect the updated sample_period */
-	kvm_pmu_create_perf_event(vcpu, select_idx);
+	kvm_pmu_create_perf_event(pmc);
 }
 
 /**
@@ -159,7 +159,10 @@ static void kvm_pmu_set_counter(struct kvm_vcpu *vcpu, u64 select_idx, u64 val,
  */
 void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
 {
-	kvm_pmu_set_counter(vcpu, select_idx, val, false);
+	if (!kvm_vcpu_has_pmu(vcpu))
+		return;
+
+	kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, select_idx), val, false);
 }
 
 /**
@@ -181,14 +184,15 @@ static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
  *
  * If this counter has been configured to monitor some event, release it here.
  */
-static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
+static void kvm_pmu_stop_counter(struct kvm_pmc *pmc)
 {
+	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	u64 reg, val;
 
 	if (!pmc->perf_event)
 		return;
 
-	val = kvm_pmu_get_counter_value(vcpu, pmc->idx);
+	val = kvm_pmu_get_pmc_value(pmc);
 
 	reg = counter_index_to_reg(pmc->idx);
 
@@ -219,11 +223,10 @@ void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 {
 	unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 	int i;
 
 	for_each_set_bit(i, &mask, 32)
-		kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
+		kvm_pmu_stop_counter(kvm_vcpu_idx_to_pmc(vcpu, i));
 }
 
 /**
@@ -234,10 +237,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
 void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
 {
 	int i;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
 
 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
-		kvm_pmu_release_perf_event(&pmu->pmc[i]);
+		kvm_pmu_release_perf_event(kvm_vcpu_idx_to_pmc(vcpu, i));
 	irq_work_sync(&vcpu->arch.pmu.overflow_work);
 }
 
@@ -262,9 +264,6 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
 void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 {
 	int i;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc;
-
 	if (!kvm_vcpu_has_pmu(vcpu))
 		return;
 
@@ -272,13 +271,15 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 		return;
 
 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+		struct kvm_pmc *pmc;
+
 		if (!(val & BIT(i)))
 			continue;
 
-		pmc = &pmu->pmc[i];
+		pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
 
 		if (!pmc->perf_event) {
-			kvm_pmu_create_perf_event(vcpu, i);
+			kvm_pmu_create_perf_event(pmc);
 		} else {
 			perf_event_enable(pmc->perf_event);
 			if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
@@ -297,17 +298,17 @@ void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
 {
 	int i;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc;
 
 	if (!kvm_vcpu_has_pmu(vcpu) || !val)
 		return;
 
 	for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
+		struct kvm_pmc *pmc;
+
 		if (!(val & BIT(i)))
 			continue;
 
-		pmc = &pmu->pmc[i];
+		pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
 
 		if (pmc->perf_event)
 			perf_event_disable(pmc->perf_event);
@@ -427,6 +428,7 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 	mask &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 
 	for_each_set_bit(i, &mask, ARMV8_PMU_CYCLE_IDX) {
+		struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, i);
 		u64 type, reg;
 
 		/* Filter on event type */
@@ -437,30 +439,30 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu,
 
 		/* Increment this counter */
 		reg = __vcpu_sys_reg(vcpu, counter_index_to_reg(i)) + 1;
-		if (!kvm_pmu_idx_is_64bit(vcpu, i))
+		if (!kvm_pmc_is_64bit(pmc))
 			reg = lower_32_bits(reg);
 		__vcpu_sys_reg(vcpu, counter_index_to_reg(i)) = reg;
 
 		/* No overflow? move on */
-		if (kvm_pmu_idx_has_64bit_overflow(vcpu, i) ? reg : lower_32_bits(reg))
+		if (kvm_pmc_has_64bit_overflow(pmc) ? reg : lower_32_bits(reg))
 			continue;
 
 		/* Mark overflow */
 		__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i);
 
-		if (kvm_pmu_counter_can_chain(vcpu, i))
+		if (kvm_pmu_counter_can_chain(pmc))
 			kvm_pmu_counter_increment(vcpu, BIT(i + 1),
 						  ARMV8_PMUV3_PERFCTR_CHAIN);
 	}
 }
 
 /* Compute the sample period for a given counter value */
-static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter)
+static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
 {
 	u64 val;
 
-	if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) {
-		if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx))
+	if (kvm_pmc_is_64bit(pmc)) {
+		if (!kvm_pmc_has_64bit_overflow(pmc))
 			val = -(counter & GENMASK(31, 0));
 		else
 			val = (-counter) & GENMASK(63, 0);
@@ -490,7 +492,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 	 * Reset the sample period to the architectural limit,
 	 * i.e. the point where the counter overflows.
 	 */
-	period = compute_period(vcpu, idx, local64_read(&perf_event->count));
+	period = compute_period(pmc, local64_read(&perf_event->count));
 
 	local64_set(&perf_event->hw.period_left, 0);
 	perf_event->attr.sample_period = period;
@@ -498,7 +500,7 @@ static void kvm_pmu_perf_overflow(struct perf_event *perf_event,
 
 	__vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx);
 
-	if (kvm_pmu_counter_can_chain(vcpu, idx))
+	if (kvm_pmu_counter_can_chain(pmc))
 		kvm_pmu_counter_increment(vcpu, BIT(idx + 1),
 					  ARMV8_PMUV3_PERFCTR_CHAIN);
 
@@ -551,34 +553,33 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 		unsigned long mask = kvm_pmu_valid_counter_mask(vcpu);
 		mask &= ~BIT(ARMV8_PMU_CYCLE_IDX);
 		for_each_set_bit(i, &mask, 32)
-			kvm_pmu_set_counter(vcpu, i, 0, true);
+			kvm_pmu_set_pmc_value(kvm_vcpu_idx_to_pmc(vcpu, i), 0, true);
 	}
 }
 
-static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
+static bool kvm_pmu_counter_is_enabled(struct kvm_pmc *pmc)
 {
+	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) &&
-	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
+	       (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(pmc->idx));
 }
 
 /**
  * kvm_pmu_create_perf_event - create a perf event for a counter
- * @vcpu: The vcpu pointer
- * @select_idx: The number of selected counter
+ * @pmc: Counter context
  */
-static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
+static void kvm_pmu_create_perf_event(struct kvm_pmc *pmc)
 {
+	struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
 	struct arm_pmu *arm_pmu = vcpu->kvm->arch.arm_pmu;
-	struct kvm_pmu *pmu = &vcpu->arch.pmu;
-	struct kvm_pmc *pmc = &pmu->pmc[select_idx];
 	struct perf_event *event;
 	struct perf_event_attr attr;
-	u64 eventsel, counter, reg, data;
+	u64 eventsel, reg, data;
 
-	reg = counter_index_to_evtreg(select_idx);
+	reg = counter_index_to_evtreg(pmc->idx);
 	data = __vcpu_sys_reg(vcpu, reg);
 
-	kvm_pmu_stop_counter(vcpu, pmc);
+	kvm_pmu_stop_counter(pmc);
 	if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
 		eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
 	else
@@ -604,24 +605,22 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 	attr.type = arm_pmu->pmu.type;
 	attr.size = sizeof(attr);
 	attr.pinned = 1;
-	attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
+	attr.disabled = !kvm_pmu_counter_is_enabled(pmc);
 	attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
 	attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
 	attr.exclude_hv = 1; /* Don't count EL2 events */
 	attr.exclude_host = 1; /* Don't count host events */
 	attr.config = eventsel;
 
-	counter = kvm_pmu_get_counter_value(vcpu, select_idx);
-
 	/*
 	 * If counting with a 64bit counter, advertise it to the perf
 	 * code, carefully dealing with the initial sample period
 	 * which also depends on the overflow.
 	 */
-	if (kvm_pmu_idx_is_64bit(vcpu, select_idx))
+	if (kvm_pmc_is_64bit(pmc))
 		attr.config1 |= PERF_ATTR_CFG1_COUNTER_64BIT;
 
-	attr.sample_period = compute_period(vcpu, select_idx, counter);
+	attr.sample_period = compute_period(pmc, kvm_pmu_get_pmc_value(pmc));
 
 	event = perf_event_create_kernel_counter(&attr, -1, current,
 						 kvm_pmu_perf_overflow, pmc);
@@ -648,6 +647,7 @@ static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 				    u64 select_idx)
 {
+	struct kvm_pmc *pmc = kvm_vcpu_idx_to_pmc(vcpu, select_idx);
 	u64 reg, mask;
 
 	if (!kvm_vcpu_has_pmu(vcpu))
@@ -657,11 +657,11 @@ void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
 	mask &= ~ARMV8_PMU_EVTYPE_EVENT;
 	mask |= kvm_pmu_event_mask(vcpu->kvm);
 
-	reg = counter_index_to_evtreg(select_idx);
+	reg = counter_index_to_evtreg(pmc->idx);
 
 	__vcpu_sys_reg(vcpu, reg) = data & mask;
 
-	kvm_pmu_create_perf_event(vcpu, select_idx);
+	kvm_pmu_create_perf_event(pmc);
 }
 
 void kvm_host_pmu_init(struct arm_pmu *pmu)
-- 
GitLab


From 28927f6c483d4a4c9ba8050f2a0e5af1b3557105 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Fri, 7 Oct 2022 17:34:36 +0800
Subject: [PATCH 1860/4122] watchdog: mtk_wdt: Add support for MT6795 Helio X10
 watchdog and toprgu

Add support for the toprgu reset controller and watchdog for the
MediaTek MT6795 SoC.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Co-developed-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221005113517.70628-6-angelogioacchino.delregno@collabora.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/mtk_wdt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index e977875367926..5fa42b7d4b4d5 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -10,6 +10,7 @@
  */
 
 #include <dt-bindings/reset/mt2712-resets.h>
+#include <dt-bindings/reset/mediatek,mt6795-resets.h>
 #include <dt-bindings/reset/mt7986-resets.h>
 #include <dt-bindings/reset/mt8183-resets.h>
 #include <dt-bindings/reset/mt8186-resets.h>
@@ -78,6 +79,10 @@ static const struct mtk_wdt_data mt2712_data = {
 	.toprgu_sw_rst_num = MT2712_TOPRGU_SW_RST_NUM,
 };
 
+static const struct mtk_wdt_data mt6795_data = {
+	.toprgu_sw_rst_num = MT6795_TOPRGU_SW_RST_NUM,
+};
+
 static const struct mtk_wdt_data mt7986_data = {
 	.toprgu_sw_rst_num = MT7986_TOPRGU_SW_RST_NUM,
 };
@@ -426,6 +431,7 @@ static int mtk_wdt_resume(struct device *dev)
 static const struct of_device_id mtk_wdt_dt_ids[] = {
 	{ .compatible = "mediatek,mt2712-wdt", .data = &mt2712_data },
 	{ .compatible = "mediatek,mt6589-wdt" },
+	{ .compatible = "mediatek,mt6795-wdt", .data = &mt6795_data },
 	{ .compatible = "mediatek,mt7986-wdt", .data = &mt7986_data },
 	{ .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data },
 	{ .compatible = "mediatek,mt8186-wdt", .data = &mt8186_data },
-- 
GitLab


From 52f46a6aea59caf63798b2dd09050d4806398890 Mon Sep 17 00:00:00 2001
From: Runyang Chen <runyang.chen@mediatek.com>
Date: Wed, 26 Oct 2022 14:33:25 +0800
Subject: [PATCH 1861/4122] dt-bindings: watchdog: Add compatible for MediaTek
 MT8188

Add dt-binding documentation of watchdog for MediaTek MT8188 Soc

Signed-off-by: Runyang Chen <runyang.chen@mediatek.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20221026063327.20037-2-Runyang.Chen@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/devicetree/bindings/watchdog/mtk-wdt.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
index 762c62e428ef2..b900c85d4560d 100644
--- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
+++ b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
@@ -17,6 +17,7 @@ Required properties:
 	"mediatek,mt7986-wdt", "mediatek,mt6589-wdt": for MT7986
 	"mediatek,mt8183-wdt": for MT8183
 	"mediatek,mt8186-wdt", "mediatek,mt6589-wdt": for MT8186
+	"mediatek,mt8188-wdt", "mediatek,mt6589-wdt": for MT8188
 	"mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516
 	"mediatek,mt8192-wdt": for MT8192
 	"mediatek,mt8195-wdt", "mediatek,mt6589-wdt": for MT8195
-- 
GitLab


From fea58041af4c1a8ac2e8b461a772599e205d0d1f Mon Sep 17 00:00:00 2001
From: Runyang Chen <runyang.chen@mediatek.com>
Date: Wed, 26 Oct 2022 14:33:26 +0800
Subject: [PATCH 1862/4122] dt-bindings: reset: mt8188: add toprgu
 reset-controller header file

Add toprgu reset-controller header file for MT8188

Signed-off-by: Runyang Chen <runyang.chen@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221026063327.20037-3-Runyang.Chen@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 include/dt-bindings/reset/mt8188-resets.h | 36 +++++++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 include/dt-bindings/reset/mt8188-resets.h

diff --git a/include/dt-bindings/reset/mt8188-resets.h b/include/dt-bindings/reset/mt8188-resets.h
new file mode 100644
index 0000000000000..377cdfda82a98
--- /dev/null
+++ b/include/dt-bindings/reset/mt8188-resets.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)*/
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ * Author: Runyang Chen <runyang.chen@mediatek.com>
+ */
+
+#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8188
+#define _DT_BINDINGS_RESET_CONTROLLER_MT8188
+
+#define MT8188_TOPRGU_CONN_MCU_SW_RST          0
+#define MT8188_TOPRGU_INFRA_GRST_SW_RST        1
+#define MT8188_TOPRGU_IPU0_SW_RST              2
+#define MT8188_TOPRGU_IPU1_SW_RST              3
+#define MT8188_TOPRGU_IPU2_SW_RST              4
+#define MT8188_TOPRGU_AUD_ASRC_SW_RST          5
+#define MT8188_TOPRGU_INFRA_SW_RST             6
+#define MT8188_TOPRGU_MMSYS_SW_RST             7
+#define MT8188_TOPRGU_MFG_SW_RST               8
+#define MT8188_TOPRGU_VENC_SW_RST              9
+#define MT8188_TOPRGU_VDEC_SW_RST              10
+#define MT8188_TOPRGU_CAM_VCORE_SW_RST         11
+#define MT8188_TOPRGU_SCP_SW_RST               12
+#define MT8188_TOPRGU_APMIXEDSYS_SW_RST        13
+#define MT8188_TOPRGU_AUDIO_SW_RST             14
+#define MT8188_TOPRGU_CAMSYS_SW_RST            15
+#define MT8188_TOPRGU_MJC_SW_RST               16
+#define MT8188_TOPRGU_PERI_SW_RST              17
+#define MT8188_TOPRGU_PERI_AO_SW_RST           18
+#define MT8188_TOPRGU_PCIE_SW_RST              19
+#define MT8188_TOPRGU_ADSPSYS_SW_RST           21
+#define MT8188_TOPRGU_DPTX_SW_RST              22
+#define MT8188_TOPRGU_SPMI_MST_SW_RST          23
+
+#define MT8188_TOPRGU_SW_RST_NUM               24
+
+#endif  /* _DT_BINDINGS_RESET_CONTROLLER_MT8188 */
-- 
GitLab


From bc7313652a6370df2f73e146483abfa5a69b85cf Mon Sep 17 00:00:00 2001
From: Runyang Chen <runyang.chen@mediatek.com>
Date: Wed, 26 Oct 2022 14:33:27 +0800
Subject: [PATCH 1863/4122] watchdog: mediatek: mt8188: add wdt support

Support MT8188 watchdog device.

Signed-off-by: Runyang Chen <runyang.chen@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221026063327.20037-4-Runyang.Chen@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/mtk_wdt.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/watchdog/mtk_wdt.c b/drivers/watchdog/mtk_wdt.c
index 5fa42b7d4b4d5..3e6212591e697 100644
--- a/drivers/watchdog/mtk_wdt.c
+++ b/drivers/watchdog/mtk_wdt.c
@@ -14,6 +14,7 @@
 #include <dt-bindings/reset/mt7986-resets.h>
 #include <dt-bindings/reset/mt8183-resets.h>
 #include <dt-bindings/reset/mt8186-resets.h>
+#include <dt-bindings/reset/mt8188-resets.h>
 #include <dt-bindings/reset/mt8192-resets.h>
 #include <dt-bindings/reset/mt8195-resets.h>
 #include <linux/delay.h>
@@ -95,6 +96,10 @@ static const struct mtk_wdt_data mt8186_data = {
 	.toprgu_sw_rst_num = MT8186_TOPRGU_SW_RST_NUM,
 };
 
+static const struct mtk_wdt_data mt8188_data = {
+	.toprgu_sw_rst_num = MT8188_TOPRGU_SW_RST_NUM,
+};
+
 static const struct mtk_wdt_data mt8192_data = {
 	.toprgu_sw_rst_num = MT8192_TOPRGU_SW_RST_NUM,
 };
@@ -435,6 +440,7 @@ static const struct of_device_id mtk_wdt_dt_ids[] = {
 	{ .compatible = "mediatek,mt7986-wdt", .data = &mt7986_data },
 	{ .compatible = "mediatek,mt8183-wdt", .data = &mt8183_data },
 	{ .compatible = "mediatek,mt8186-wdt", .data = &mt8186_data },
+	{ .compatible = "mediatek,mt8188-wdt", .data = &mt8188_data },
 	{ .compatible = "mediatek,mt8192-wdt", .data = &mt8192_data },
 	{ .compatible = "mediatek,mt8195-wdt", .data = &mt8195_data },
 	{ /* sentinel */ }
-- 
GitLab


From 4d1363a46cdfcc00460adf1f0fcb81bb5ba69d94 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Fri, 7 Oct 2022 17:34:34 +0800
Subject: [PATCH 1864/4122] dt-bindings: watchdog: mediatek: Convert mtk-wdt to
 json-schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert the MediaTek watchdog bindings to schema.

The original binding only had 4 without a fallback but there is a reset
controller on the "mediatek,mt7986-wdt", "mediatek,mt8186-wdt",
"mediatek,mt8188-wdt" and "mediatek,mt8195-wdt" Since there is no reset
controller for the mt6589, we remove "mediatek,mt6589-wdt" as a
fallback.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Co-developed-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Link: https://lore.kernel.org/r/20221101090116.27130-6-allen-kh.cheng@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 .../bindings/watchdog/mediatek,mtk-wdt.yaml   | 78 +++++++++++++++++++
 .../devicetree/bindings/watchdog/mtk-wdt.txt  | 43 ----------
 2 files changed, 78 insertions(+), 43 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
 delete mode 100644 Documentation/devicetree/bindings/watchdog/mtk-wdt.txt

diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
new file mode 100644
index 0000000000000..b0d71fa28ac31
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
@@ -0,0 +1,78 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/mediatek,mtk-wdt.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek SoCs Watchdog timer
+
+maintainers:
+  - Matthias Brugger <matthias.bgg@gmail.com>
+
+description:
+  The watchdog supports a pre-timeout interrupt that fires
+  timeout-sec/2 before the expiry.
+
+allOf:
+  - $ref: watchdog.yaml#
+
+properties:
+  compatible:
+    oneOf:
+      - enum:
+          - mediatek,mt2712-wdt
+          - mediatek,mt6589-wdt
+          - mediatek,mt7986-wdt
+          - mediatek,mt8183-wdt
+          - mediatek,mt8186-wdt
+          - mediatek,mt8188-wdt
+          - mediatek,mt8192-wdt
+          - mediatek,mt8195-wdt
+      - items:
+          - enum:
+              - mediatek,mt2701-wdt
+              - mediatek,mt6582-wdt
+              - mediatek,mt6797-wdt
+              - mediatek,mt7622-wdt
+              - mediatek,mt7623-wdt
+              - mediatek,mt7629-wdt
+              - mediatek,mt8516-wdt
+          - const: mediatek,mt6589-wdt
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    items:
+      - description: Watchdog pre-timeout (bark) interrupt
+
+  mediatek,disable-extrst:
+    description: Disable sending output reset signal
+    type: boolean
+
+  '#reset-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    soc {
+        #address-cells = <2>;
+        #size-cells = <2>;
+
+        watchdog: watchdog@10007000 {
+            compatible = "mediatek,mt8183-wdt";
+            reg = <0 0x10007000 0 0x100>;
+            interrupts = <GIC_SPI 139 IRQ_TYPE_LEVEL_LOW>;
+            mediatek,disable-extrst;
+            timeout-sec = <10>;
+            #reset-cells = <1>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt b/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
deleted file mode 100644
index b900c85d4560d..0000000000000
--- a/Documentation/devicetree/bindings/watchdog/mtk-wdt.txt
+++ /dev/null
@@ -1,43 +0,0 @@
-Mediatek SoCs Watchdog timer
-
-The watchdog supports a pre-timeout interrupt that fires timeout-sec/2
-before the expiry.
-
-Required properties:
-
-- compatible should contain:
-	"mediatek,mt2701-wdt", "mediatek,mt6589-wdt": for MT2701
-	"mediatek,mt2712-wdt": for MT2712
-	"mediatek,mt6582-wdt", "mediatek,mt6589-wdt": for MT6582
-	"mediatek,mt6589-wdt": for MT6589
-	"mediatek,mt6797-wdt", "mediatek,mt6589-wdt": for MT6797
-	"mediatek,mt7622-wdt", "mediatek,mt6589-wdt": for MT7622
-	"mediatek,mt7623-wdt", "mediatek,mt6589-wdt": for MT7623
-	"mediatek,mt7629-wdt", "mediatek,mt6589-wdt": for MT7629
-	"mediatek,mt7986-wdt", "mediatek,mt6589-wdt": for MT7986
-	"mediatek,mt8183-wdt": for MT8183
-	"mediatek,mt8186-wdt", "mediatek,mt6589-wdt": for MT8186
-	"mediatek,mt8188-wdt", "mediatek,mt6589-wdt": for MT8188
-	"mediatek,mt8516-wdt", "mediatek,mt6589-wdt": for MT8516
-	"mediatek,mt8192-wdt": for MT8192
-	"mediatek,mt8195-wdt", "mediatek,mt6589-wdt": for MT8195
-
-- reg : Specifies base physical address and size of the registers.
-
-Optional properties:
-- mediatek,disable-extrst: disable send output reset signal
-- interrupts: Watchdog pre-timeout (bark) interrupt.
-- timeout-sec: contains the watchdog timeout in seconds.
-- #reset-cells: Should be 1.
-
-Example:
-
-watchdog: watchdog@10007000 {
-	compatible = "mediatek,mt8183-wdt",
-		     "mediatek,mt6589-wdt";
-	mediatek,disable-extrst;
-	reg = <0 0x10007000 0 0x100>;
-	interrupts = <GIC_SPI 139 IRQ_TYPE_NONE>;
-	timeout-sec = <10>;
-	#reset-cells = <1>;
-};
-- 
GitLab


From c389e1f5068f13aa9891fead50dc83a747426bc6 Mon Sep 17 00:00:00 2001
From: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Date: Fri, 7 Oct 2022 17:34:35 +0800
Subject: [PATCH 1865/4122] dt-bindings: watchdog: mediatek,mtk-wdt: Add
 compatible for MT6795
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the mediatek,mt6795-wdt compatible.

Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Co-developed-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Link: https://lore.kernel.org/r/20221101090116.27130-7-allen-kh.cheng@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
index b0d71fa28ac31..36606524d8691 100644
--- a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
@@ -22,6 +22,7 @@ properties:
       - enum:
           - mediatek,mt2712-wdt
           - mediatek,mt6589-wdt
+          - mediatek,mt6795-wdt
           - mediatek,mt7986-wdt
           - mediatek,mt8183-wdt
           - mediatek,mt8186-wdt
-- 
GitLab


From 1d8e67ecf114ef4140a1df7f1581e0e2cab6739a Mon Sep 17 00:00:00 2001
From: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Date: Fri, 7 Oct 2022 17:34:37 +0800
Subject: [PATCH 1866/4122] dt-bindings: watchdog: mediatek,mtk-wdt: Add
 compatible for MT8173
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add the mediatek,mt8173-wdt compatible using mediatek,mt6589-wdt as
fallback.

Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Link: https://lore.kernel.org/r/20221101090116.27130-8-allen-kh.cheng@mediatek.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
index 36606524d8691..b3605608410c6 100644
--- a/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/mediatek,mtk-wdt.yaml
@@ -37,6 +37,7 @@ properties:
               - mediatek,mt7622-wdt
               - mediatek,mt7623-wdt
               - mediatek,mt7629-wdt
+              - mediatek,mt8173-wdt
               - mediatek,mt8516-wdt
           - const: mediatek,mt6589-wdt
 
-- 
GitLab


From 08435c2aab3aea369182bdec3c71ab78b15f8c82 Mon Sep 17 00:00:00 2001
From: Thomas Kastner <thomas.kastner@advantech.com>
Date: Wed, 19 Oct 2022 09:39:03 +0200
Subject: [PATCH 1867/4122] watchdog: Add Advantech EC watchdog driver

This patch adds the 'advantech_ec_wdt' kernel module which provides
WDT support for Advantech platforms with ITE based Embedded Controller.

Signed-off-by: Thomas Kastner <thomas.kastner@advantech.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Thomas Kastner <thomas.kastner@advantech.com>
Link: https://lore.kernel.org/r/Y0+pl/26e3pcEUPk@EIS-S230
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/Kconfig            |   7 +
 drivers/watchdog/Makefile           |   1 +
 drivers/watchdog/advantech_ec_wdt.c | 205 ++++++++++++++++++++++++++++
 3 files changed, 213 insertions(+)
 create mode 100644 drivers/watchdog/advantech_ec_wdt.c

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index b64bc49c7f30e..0bc40b763b065 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1055,6 +1055,13 @@ config ADVANTECH_WDT
 	  feature. More information can be found at
 	  <https://www.advantech.com.tw/products/>
 
+config ADVANTECH_EC_WDT
+	tristate "Advantech Embedded Controller Watchdog Timer"
+	depends on X86
+	help
+		This driver supports Advantech products with ITE based Embedded Controller.
+		It does not support Advantech products with other ECs or without EC.
+
 config ALIM1535_WDT
 	tristate "ALi M1535 PMU Watchdog Timer"
 	depends on X86 && PCI
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index d41e5f830ae7f..9cbf6580f16c9 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_SUNPLUS_WATCHDOG) += sunplus_wdt.o
 # X86 (i386 + ia64 + x86_64) Architecture
 obj-$(CONFIG_ACQUIRE_WDT) += acquirewdt.o
 obj-$(CONFIG_ADVANTECH_WDT) += advantechwdt.o
+obj-$(CONFIG_ADVANTECH_EC_WDT) += advantech_ec_wdt.o
 obj-$(CONFIG_ALIM1535_WDT) += alim1535_wdt.o
 obj-$(CONFIG_ALIM7101_WDT) += alim7101_wdt.o
 obj-$(CONFIG_EBC_C384_WDT) += ebc-c384_wdt.o
diff --git a/drivers/watchdog/advantech_ec_wdt.c b/drivers/watchdog/advantech_ec_wdt.c
new file mode 100644
index 0000000000000..7c380f90ca583
--- /dev/null
+++ b/drivers/watchdog/advantech_ec_wdt.c
@@ -0,0 +1,205 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *	Advantech Embedded Controller Watchdog Driver
+ *
+ *	This driver supports Advantech products with ITE based Embedded Controller.
+ *	It does not support Advantech products with other ECs or without EC.
+ *
+ *	Copyright (C) 2022 Advantech Europe B.V.
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/isa.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/watchdog.h>
+
+#define DRIVER_NAME		"advantech_ec_wdt"
+
+/* EC IO region */
+#define EC_BASE_ADDR		0x299
+#define EC_ADDR_EXTENT		2
+
+/* EC minimum IO access delay in ms */
+#define EC_MIN_DELAY		10
+
+/* EC interface definitions */
+#define EC_ADDR_CMD		(EC_BASE_ADDR + 1)
+#define EC_ADDR_DATA		EC_BASE_ADDR
+#define EC_CMD_EC_PROBE		0x30
+#define EC_CMD_COMM		0x89
+#define EC_CMD_WDT_START	0x28
+#define EC_CMD_WDT_STOP		0x29
+#define EC_CMD_WDT_RESET	0x2A
+#define EC_DAT_EN_DLY_H		0x58
+#define EC_DAT_EN_DLY_L		0x59
+#define EC_DAT_RST_DLY_H	0x5E
+#define EC_DAT_RST_DLY_L	0x5F
+#define EC_MAGIC		0x95
+
+/* module parameters */
+#define MIN_TIME		1
+#define MAX_TIME		6000 /* 100 minutes */
+#define DEFAULT_TIME		60
+
+static unsigned int timeout;
+static ktime_t ec_timestamp;
+
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout,
+		 "Default Watchdog timer setting (" __MODULE_STRING(DEFAULT_TIME) "s). The range is from " __MODULE_STRING(MIN_TIME) " to " __MODULE_STRING(MAX_TIME) ".");
+
+static void adv_ec_wdt_timing_gate(void)
+{
+	ktime_t time_cur, time_delta;
+
+	/* ensure minimum delay between IO accesses*/
+	time_cur = ktime_get();
+	time_delta = ktime_to_ms(ktime_sub(time_cur, ec_timestamp));
+	if (time_delta < EC_MIN_DELAY) {
+		time_delta = EC_MIN_DELAY - time_delta;
+		usleep_range(time_delta * 1000, (time_delta + 1) * 1000);
+	}
+	ec_timestamp = ktime_get();
+}
+
+static void adv_ec_wdt_outb(unsigned char value, unsigned short port)
+{
+	adv_ec_wdt_timing_gate();
+	outb(value, port);
+}
+
+static unsigned char adv_ec_wdt_inb(unsigned short port)
+{
+	adv_ec_wdt_timing_gate();
+	return inb(port);
+}
+
+static int adv_ec_wdt_ping(struct watchdog_device *wdd)
+{
+	adv_ec_wdt_outb(EC_CMD_WDT_RESET, EC_ADDR_CMD);
+	return 0;
+}
+
+static int adv_ec_wdt_set_timeout(struct watchdog_device *wdd, unsigned int t)
+{
+	unsigned int val;
+
+	/* scale time to EC 100 ms base */
+	val = t * 10;
+
+	/* reset enable delay, just in case it was set by BIOS etc. */
+	adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD);
+	adv_ec_wdt_outb(EC_DAT_EN_DLY_H, EC_ADDR_DATA);
+	adv_ec_wdt_outb(0, EC_ADDR_DATA);
+
+	adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD);
+	adv_ec_wdt_outb(EC_DAT_EN_DLY_L, EC_ADDR_DATA);
+	adv_ec_wdt_outb(0, EC_ADDR_DATA);
+
+	/* set reset delay */
+	adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD);
+	adv_ec_wdt_outb(EC_DAT_RST_DLY_H, EC_ADDR_DATA);
+	adv_ec_wdt_outb(val >> 8, EC_ADDR_DATA);
+
+	adv_ec_wdt_outb(EC_CMD_COMM, EC_ADDR_CMD);
+	adv_ec_wdt_outb(EC_DAT_RST_DLY_L, EC_ADDR_DATA);
+	adv_ec_wdt_outb(val & 0xFF, EC_ADDR_DATA);
+
+	wdd->timeout = t;
+	return 0;
+}
+
+static int adv_ec_wdt_start(struct watchdog_device *wdd)
+{
+	adv_ec_wdt_set_timeout(wdd, wdd->timeout);
+	adv_ec_wdt_outb(EC_CMD_WDT_START, EC_ADDR_CMD);
+
+	return 0;
+}
+
+static int adv_ec_wdt_stop(struct watchdog_device *wdd)
+{
+	adv_ec_wdt_outb(EC_CMD_WDT_STOP, EC_ADDR_CMD);
+
+	return 0;
+}
+
+static const struct watchdog_info adv_ec_wdt_info = {
+	.identity =	DRIVER_NAME,
+	.options =	WDIOF_SETTIMEOUT |
+			WDIOF_MAGICCLOSE |
+			WDIOF_KEEPALIVEPING,
+};
+
+static const struct watchdog_ops adv_ec_wdt_ops = {
+	.owner =	THIS_MODULE,
+	.start =	adv_ec_wdt_start,
+	.stop =		adv_ec_wdt_stop,
+	.ping =		adv_ec_wdt_ping,
+	.set_timeout =	adv_ec_wdt_set_timeout,
+};
+
+static struct watchdog_device adv_ec_wdt_dev = {
+	.info =		&adv_ec_wdt_info,
+	.ops =		&adv_ec_wdt_ops,
+	.min_timeout =	MIN_TIME,
+	.max_timeout =	MAX_TIME,
+	.timeout =	DEFAULT_TIME,
+};
+
+static int adv_ec_wdt_probe(struct device *dev, unsigned int id)
+{
+	if (!devm_request_region(dev, EC_BASE_ADDR, EC_ADDR_EXTENT, dev_name(dev))) {
+		dev_err(dev, "Unable to lock port addresses (0x%X-0x%X)\n",
+			EC_BASE_ADDR, EC_BASE_ADDR + EC_ADDR_EXTENT);
+		return -EBUSY;
+	}
+
+	watchdog_init_timeout(&adv_ec_wdt_dev, timeout, dev);
+	watchdog_stop_on_reboot(&adv_ec_wdt_dev);
+	watchdog_stop_on_unregister(&adv_ec_wdt_dev);
+
+	return devm_watchdog_register_device(dev, &adv_ec_wdt_dev);
+}
+
+static struct isa_driver adv_ec_wdt_driver = {
+	.probe		= adv_ec_wdt_probe,
+	.driver		= {
+	.name		= DRIVER_NAME,
+	},
+};
+
+static int __init adv_ec_wdt_init(void)
+{
+	unsigned int val;
+
+	/* quick probe for EC */
+	if (!request_region(EC_BASE_ADDR, EC_ADDR_EXTENT, DRIVER_NAME))
+		return -EBUSY;
+
+	adv_ec_wdt_outb(EC_CMD_EC_PROBE, EC_ADDR_CMD);
+	val = adv_ec_wdt_inb(EC_ADDR_DATA);
+	release_region(EC_BASE_ADDR, EC_ADDR_EXTENT);
+
+	if (val != EC_MAGIC)
+		return -ENODEV;
+
+	return isa_register_driver(&adv_ec_wdt_driver, 1);
+}
+
+static void __exit adv_ec_wdt_exit(void)
+{
+	isa_unregister_driver(&adv_ec_wdt_driver);
+}
+
+module_init(adv_ec_wdt_init);
+module_exit(adv_ec_wdt_exit);
+
+MODULE_AUTHOR("Thomas Kastner <thomas.kastner@advantech.com>");
+MODULE_DESCRIPTION("Advantech Embedded Controller Watchdog Device Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("20221019");
+MODULE_ALIAS("isa:" DRIVER_NAME);
-- 
GitLab


From b49e2a3cfb84290b878999ade1410a3edb65706c Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Oct 2022 19:50:44 +0100
Subject: [PATCH 1868/4122] watchdog: at91rm9200: Remove #ifdef guards for PM
 related functions

Use the pm_ptr() macro to handle the .suspend/.resume callbacks.

This macro allows the suspend and resume functions to be automatically
dropped by the compiler when CONFIG_SUSPEND is disabled, without having
to use #ifdef guards. Not using #ifdef guards means that the code is
always compiled independently of any Kconfig option, and thanks to that
bugs and regressions are easier to catch.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/20221020185047.1001522-2-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/at91rm9200_wdt.c | 11 ++---------
 drivers/watchdog/db8500_wdt.c     |  9 ++-------
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/drivers/watchdog/at91rm9200_wdt.c b/drivers/watchdog/at91rm9200_wdt.c
index 6d751eb8191d6..5126454bb8613 100644
--- a/drivers/watchdog/at91rm9200_wdt.c
+++ b/drivers/watchdog/at91rm9200_wdt.c
@@ -278,8 +278,6 @@ static void at91wdt_shutdown(struct platform_device *pdev)
 	at91_wdt_stop();
 }
 
-#ifdef CONFIG_PM
-
 static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message)
 {
 	at91_wdt_stop();
@@ -293,11 +291,6 @@ static int at91wdt_resume(struct platform_device *pdev)
 	return 0;
 }
 
-#else
-#define at91wdt_suspend NULL
-#define at91wdt_resume	NULL
-#endif
-
 static const struct of_device_id at91_wdt_dt_ids[] = {
 	{ .compatible = "atmel,at91rm9200-wdt" },
 	{ /* sentinel */ }
@@ -308,8 +301,8 @@ static struct platform_driver at91wdt_driver = {
 	.probe		= at91wdt_probe,
 	.remove		= at91wdt_remove,
 	.shutdown	= at91wdt_shutdown,
-	.suspend	= at91wdt_suspend,
-	.resume		= at91wdt_resume,
+	.suspend	= pm_ptr(at91wdt_suspend),
+	.resume		= pm_ptr(at91wdt_resume),
 	.driver		= {
 		.name	= "atmel_st_watchdog",
 		.of_match_table = at91_wdt_dt_ids,
diff --git a/drivers/watchdog/db8500_wdt.c b/drivers/watchdog/db8500_wdt.c
index 6ed8b63d310d1..97148ac0aa54a 100644
--- a/drivers/watchdog/db8500_wdt.c
+++ b/drivers/watchdog/db8500_wdt.c
@@ -105,7 +105,6 @@ static int db8500_wdt_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
 static int db8500_wdt_suspend(struct platform_device *pdev,
 			     pm_message_t state)
 {
@@ -130,15 +129,11 @@ static int db8500_wdt_resume(struct platform_device *pdev)
 	}
 	return 0;
 }
-#else
-#define db8500_wdt_suspend NULL
-#define db8500_wdt_resume NULL
-#endif
 
 static struct platform_driver db8500_wdt_driver = {
 	.probe		= db8500_wdt_probe,
-	.suspend	= db8500_wdt_suspend,
-	.resume		= db8500_wdt_resume,
+	.suspend	= pm_ptr(db8500_wdt_suspend),
+	.resume		= pm_ptr(db8500_wdt_resume),
 	.driver		= {
 		.name	= "db8500_wdt",
 	},
-- 
GitLab


From d36eda79c600518fb6bc8ad9e3f2f5f201ec1fb9 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Oct 2022 19:50:45 +0100
Subject: [PATCH 1869/4122] watchdog: twl4030: Remove #ifdef guards for PM
 related functions

Use the pm_ptr() macro to handle the .suspend/.resume callbacks.

This macro allows the suspend and resume functions to be automatically
dropped by the compiler when CONFIG_SUSPEND is disabled, without having
to use #ifdef guards. Not using #ifdef guards means that the code is
always compiled independently of any Kconfig option, and thanks to that
bugs and regressions are easier to catch.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221020185047.1001522-3-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/twl4030_wdt.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/watchdog/twl4030_wdt.c b/drivers/watchdog/twl4030_wdt.c
index 36b4a660928d3..09d17e20f4a76 100644
--- a/drivers/watchdog/twl4030_wdt.c
+++ b/drivers/watchdog/twl4030_wdt.c
@@ -81,7 +81,6 @@ static int twl4030_wdt_probe(struct platform_device *pdev)
 	return devm_watchdog_register_device(dev, wdt);
 }
 
-#ifdef CONFIG_PM
 static int twl4030_wdt_suspend(struct platform_device *pdev, pm_message_t state)
 {
 	struct watchdog_device *wdt = platform_get_drvdata(pdev);
@@ -99,10 +98,6 @@ static int twl4030_wdt_resume(struct platform_device *pdev)
 
 	return 0;
 }
-#else
-#define twl4030_wdt_suspend        NULL
-#define twl4030_wdt_resume         NULL
-#endif
 
 static const struct of_device_id twl_wdt_of_match[] = {
 	{ .compatible = "ti,twl4030-wdt", },
@@ -112,8 +107,8 @@ MODULE_DEVICE_TABLE(of, twl_wdt_of_match);
 
 static struct platform_driver twl4030_wdt_driver = {
 	.probe		= twl4030_wdt_probe,
-	.suspend	= twl4030_wdt_suspend,
-	.resume		= twl4030_wdt_resume,
+	.suspend	= pm_ptr(twl4030_wdt_suspend),
+	.resume		= pm_ptr(twl4030_wdt_resume),
 	.driver		= {
 		.name		= "twl4030_wdt",
 		.of_match_table	= twl_wdt_of_match,
-- 
GitLab


From 0327476d6ef32c347e1590e6215616adc847afe1 Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Oct 2022 19:50:46 +0100
Subject: [PATCH 1870/4122] watchdog: omap: Remove #ifdef guards for PM related
 functions

Use the pm_ptr() macro to handle the .suspend/.resume callbacks.

This macro allows the suspend and resume functions to be automatically
dropped by the compiler when CONFIG_SUSPEND is disabled, without having
to use #ifdef guards. Not using #ifdef guards means that the code is
always compiled independently of any Kconfig option, and thanks to that
bugs and regressions are easier to catch.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221020185047.1001522-4-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/omap_wdt.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/watchdog/omap_wdt.c b/drivers/watchdog/omap_wdt.c
index 74d785b2b478f..e75aa86f63cbb 100644
--- a/drivers/watchdog/omap_wdt.c
+++ b/drivers/watchdog/omap_wdt.c
@@ -316,8 +316,6 @@ static int omap_wdt_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef	CONFIG_PM
-
 /* REVISIT ... not clear this is the best way to handle system suspend; and
  * it's very inappropriate for selective device suspend (e.g. suspending this
  * through sysfs rather than by stopping the watchdog daemon).  Also, this
@@ -353,11 +351,6 @@ static int omap_wdt_resume(struct platform_device *pdev)
 	return 0;
 }
 
-#else
-#define	omap_wdt_suspend	NULL
-#define	omap_wdt_resume		NULL
-#endif
-
 static const struct of_device_id omap_wdt_of_match[] = {
 	{ .compatible = "ti,omap3-wdt", },
 	{},
@@ -368,8 +361,8 @@ static struct platform_driver omap_wdt_driver = {
 	.probe		= omap_wdt_probe,
 	.remove		= omap_wdt_remove,
 	.shutdown	= omap_wdt_shutdown,
-	.suspend	= omap_wdt_suspend,
-	.resume		= omap_wdt_resume,
+	.suspend	= pm_ptr(omap_wdt_suspend),
+	.resume		= pm_ptr(omap_wdt_resume),
 	.driver		= {
 		.name	= "omap_wdt",
 		.of_match_table = omap_wdt_of_match,
-- 
GitLab


From 758f46c2e67c4901ea49e684e12adb698b1b2bbd Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Thu, 20 Oct 2022 19:50:47 +0100
Subject: [PATCH 1871/4122] watchdog: kempld: Remove #ifdef guards for PM
 related functions

Use the pm_ptr() macro to handle the .suspend/.resume callbacks.

This macro allows the suspend and resume functions to be automatically
dropped by the compiler when CONFIG_SUSPEND is disabled, without having
to use #ifdef guards. Not using #ifdef guards means that the code is
always compiled independently of any Kconfig option, and thanks to that
bugs and regressions are easier to catch.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221020185047.1001522-5-paul@crapouillou.net
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/kempld_wdt.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/watchdog/kempld_wdt.c b/drivers/watchdog/kempld_wdt.c
index 40bd518ed8738..e6c7a29066804 100644
--- a/drivers/watchdog/kempld_wdt.c
+++ b/drivers/watchdog/kempld_wdt.c
@@ -75,9 +75,7 @@ struct kempld_wdt_data {
 	struct watchdog_device		wdd;
 	unsigned int			pretimeout;
 	struct kempld_wdt_stage		stage[KEMPLD_WDT_MAX_STAGES];
-#ifdef CONFIG_PM
 	u8				pm_status_store;
-#endif
 };
 
 #define DEFAULT_TIMEOUT		30 /* seconds */
@@ -495,7 +493,6 @@ static int kempld_wdt_probe(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM
 /* Disable watchdog if it is active during suspend */
 static int kempld_wdt_suspend(struct platform_device *pdev,
 				pm_message_t message)
@@ -531,18 +528,14 @@ static int kempld_wdt_resume(struct platform_device *pdev)
 	else
 		return kempld_wdt_stop(wdd);
 }
-#else
-#define kempld_wdt_suspend	NULL
-#define kempld_wdt_resume	NULL
-#endif
 
 static struct platform_driver kempld_wdt_driver = {
 	.driver		= {
 		.name	= "kempld-wdt",
 	},
 	.probe		= kempld_wdt_probe,
-	.suspend	= kempld_wdt_suspend,
-	.resume		= kempld_wdt_resume,
+	.suspend	= pm_ptr(kempld_wdt_suspend),
+	.resume		= pm_ptr(kempld_wdt_resume),
 };
 
 module_platform_driver(kempld_wdt_driver);
-- 
GitLab


From 47c008050aec3e9a13af29dd74cd8b4c112bc07b Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Fri, 28 Oct 2022 09:50:19 +0200
Subject: [PATCH 1872/4122] watchdog: rn5t618: add support for read out
 bootstatus

The PMIC does store the power-off factor internally.
Read it out and report it as bootstatus.

Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Acked-by: Lee Jones <lee@kernel.org>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221028075019.2757812-1-marcus.folkesson@gmail.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/rn5t618_wdt.c | 12 ++++++++++++
 include/linux/mfd/rn5t618.h    |  9 +++++++++
 2 files changed, 21 insertions(+)

diff --git a/drivers/watchdog/rn5t618_wdt.c b/drivers/watchdog/rn5t618_wdt.c
index 6e524c8e26a8f..40d8ebd8c0acd 100644
--- a/drivers/watchdog/rn5t618_wdt.c
+++ b/drivers/watchdog/rn5t618_wdt.c
@@ -144,6 +144,8 @@ static int rn5t618_wdt_probe(struct platform_device *pdev)
 	struct rn5t618 *rn5t618 = dev_get_drvdata(dev->parent);
 	struct rn5t618_wdt *wdt;
 	int min_timeout, max_timeout;
+	int ret;
+	unsigned int val;
 
 	wdt = devm_kzalloc(dev, sizeof(struct rn5t618_wdt), GFP_KERNEL);
 	if (!wdt)
@@ -160,6 +162,16 @@ static int rn5t618_wdt_probe(struct platform_device *pdev)
 	wdt->wdt_dev.timeout = max_timeout;
 	wdt->wdt_dev.parent = dev;
 
+	/* Read out previous power-off factor */
+	ret = regmap_read(wdt->rn5t618->regmap, RN5T618_POFFHIS, &val);
+	if (ret)
+		return ret;
+
+	if (val & RN5T618_POFFHIS_VINDET)
+		wdt->wdt_dev.bootstatus = WDIOF_POWERUNDER;
+	else if (val & RN5T618_POFFHIS_WDG)
+		wdt->wdt_dev.bootstatus = WDIOF_CARDRESET;
+
 	watchdog_set_drvdata(&wdt->wdt_dev, wdt);
 	watchdog_init_timeout(&wdt->wdt_dev, timeout, dev);
 	watchdog_set_nowayout(&wdt->wdt_dev, nowayout);
diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h
index 8aa0bda1af4fa..aacb6d51e99cb 100644
--- a/include/linux/mfd/rn5t618.h
+++ b/include/linux/mfd/rn5t618.h
@@ -227,6 +227,15 @@
 #define RN5T618_WATCHDOG_WDOGTIM_S	0
 #define RN5T618_PWRIRQ_IR_WDOG		BIT(6)
 
+#define RN5T618_POFFHIS_PWRON		BIT(0)
+#define RN5T618_POFFHIS_TSHUT		BIT(1)
+#define RN5T618_POFFHIS_VINDET		BIT(2)
+#define RN5T618_POFFHIS_IODET		BIT(3)
+#define RN5T618_POFFHIS_CPU		BIT(4)
+#define RN5T618_POFFHIS_WDG		BIT(5)
+#define RN5T618_POFFHIS_DCLIM		BIT(6)
+#define RN5T618_POFFHIS_N_OE		BIT(7)
+
 enum {
 	RN5T618_DCDC1,
 	RN5T618_DCDC2,
-- 
GitLab


From ef9b7bf52c2f47f0a9bf988543c577b92c92d15e Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 28 Oct 2022 09:27:50 +0300
Subject: [PATCH 1873/4122] watchdog: iTCO_wdt: Set NO_REBOOT if the watchdog
 is not already running
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Daniel reported that the commit 1ae3e78c0820 ("watchdog: iTCO_wdt: No
need to stop the timer in probe") makes QEMU implementation of the iTCO
watchdog not to trigger reboot anymore when NO_REBOOT flag is initially
cleared using this option (in QEMU command line):

  -global ICH9-LPC.noreboot=false

The problem with the commit is that it left the unconditional setting of
NO_REBOOT that is not cleared anymore when the kernel keeps pinging the
watchdog (as opposed to the previous code that called iTCO_wdt_stop()
that cleared it).

Fix this so that we only set NO_REBOOT if the watchdog was not initially
running.

Fixes: 1ae3e78c0820 ("watchdog: iTCO_wdt: No need to stop the timer in probe")
Reported-by: Daniel P. Berrangé <berrange@redhat.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221028062750.45451-1-mika.westerberg@linux.intel.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/iTCO_wdt.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index 34693f11385f6..e937b4dd28be7 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -423,14 +423,18 @@ static unsigned int iTCO_wdt_get_timeleft(struct watchdog_device *wd_dev)
 	return time_left;
 }
 
-static void iTCO_wdt_set_running(struct iTCO_wdt_private *p)
+/* Returns true if the watchdog was running */
+static bool iTCO_wdt_set_running(struct iTCO_wdt_private *p)
 {
 	u16 val;
 
-	/* Bit 11: TCO Timer Halt -> 0 = The TCO timer is * enabled */
+	/* Bit 11: TCO Timer Halt -> 0 = The TCO timer is enabled */
 	val = inw(TCO1_CNT(p));
-	if (!(val & BIT(11)))
+	if (!(val & BIT(11))) {
 		set_bit(WDOG_HW_RUNNING, &p->wddev.status);
+		return true;
+	}
+	return false;
 }
 
 /*
@@ -518,9 +522,6 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
 		return -ENODEV;	/* Cannot reset NO_REBOOT bit */
 	}
 
-	/* Set the NO_REBOOT bit to prevent later reboots, just for sure */
-	p->update_no_reboot_bit(p->no_reboot_priv, true);
-
 	if (turn_SMI_watchdog_clear_off >= p->iTCO_version) {
 		/*
 		 * Bit 13: TCO_EN -> 0
@@ -572,7 +573,13 @@ static int iTCO_wdt_probe(struct platform_device *pdev)
 	watchdog_set_drvdata(&p->wddev, p);
 	platform_set_drvdata(pdev, p);
 
-	iTCO_wdt_set_running(p);
+	if (!iTCO_wdt_set_running(p)) {
+		/*
+		 * If the watchdog was not running set NO_REBOOT now to
+		 * prevent later reboots.
+		 */
+		p->update_no_reboot_bit(p->no_reboot_priv, true);
+	}
 
 	/* Check that the heartbeat value is within it's range;
 	   if not reset to the default */
-- 
GitLab


From 9ec0b7e06835b857f892feb2fe6121db1393425d Mon Sep 17 00:00:00 2001
From: Eddie James <eajames@linux.ibm.com>
Date: Tue, 1 Nov 2022 15:53:37 -0500
Subject: [PATCH 1874/4122] watchdog: aspeed: Enable pre-timeout interrupt

Enable the core pre-timeout interrupt on AST2500 and AST2600.

Signed-off-by: Eddie James <eajames@linux.ibm.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221101205338.577427-2-eajames@linux.ibm.com
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@linux-watchdog.org>
---
 drivers/watchdog/aspeed_wdt.c | 104 ++++++++++++++++++++++++++++------
 1 file changed, 88 insertions(+), 16 deletions(-)

diff --git a/drivers/watchdog/aspeed_wdt.c b/drivers/watchdog/aspeed_wdt.c
index 0cff2adfbfc96..86b5331bc4911 100644
--- a/drivers/watchdog/aspeed_wdt.c
+++ b/drivers/watchdog/aspeed_wdt.c
@@ -5,11 +5,14 @@
  * Joel Stanley <joel@jms.id.au>
  */
 
+#include <linux/bits.h>
 #include <linux/delay.h>
+#include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/watchdog.h>
 
@@ -18,28 +21,41 @@ module_param(nowayout, bool, 0);
 MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
 				__MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
 
+struct aspeed_wdt_config {
+	u32 ext_pulse_width_mask;
+	u32 irq_shift;
+	u32 irq_mask;
+};
+
 struct aspeed_wdt {
 	struct watchdog_device	wdd;
 	void __iomem		*base;
 	u32			ctrl;
-};
-
-struct aspeed_wdt_config {
-	u32 ext_pulse_width_mask;
+	const struct aspeed_wdt_config *cfg;
 };
 
 static const struct aspeed_wdt_config ast2400_config = {
 	.ext_pulse_width_mask = 0xff,
+	.irq_shift = 0,
+	.irq_mask = 0,
 };
 
 static const struct aspeed_wdt_config ast2500_config = {
 	.ext_pulse_width_mask = 0xfffff,
+	.irq_shift = 12,
+	.irq_mask = GENMASK(31, 12),
+};
+
+static const struct aspeed_wdt_config ast2600_config = {
+	.ext_pulse_width_mask = 0xfffff,
+	.irq_shift = 0,
+	.irq_mask = GENMASK(31, 10),
 };
 
 static const struct of_device_id aspeed_wdt_of_table[] = {
 	{ .compatible = "aspeed,ast2400-wdt", .data = &ast2400_config },
 	{ .compatible = "aspeed,ast2500-wdt", .data = &ast2500_config },
-	{ .compatible = "aspeed,ast2600-wdt", .data = &ast2500_config },
+	{ .compatible = "aspeed,ast2600-wdt", .data = &ast2600_config },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
@@ -58,6 +74,7 @@ MODULE_DEVICE_TABLE(of, aspeed_wdt_of_table);
 #define   WDT_CTRL_RESET_SYSTEM		BIT(1)
 #define   WDT_CTRL_ENABLE		BIT(0)
 #define WDT_TIMEOUT_STATUS	0x10
+#define   WDT_TIMEOUT_STATUS_IRQ		BIT(2)
 #define   WDT_TIMEOUT_STATUS_BOOT_SECONDARY	BIT(1)
 #define WDT_CLEAR_TIMEOUT_STATUS	0x14
 #define   WDT_CLEAR_TIMEOUT_AND_BOOT_CODE_SELECTION	BIT(0)
@@ -160,6 +177,26 @@ static int aspeed_wdt_set_timeout(struct watchdog_device *wdd,
 	return 0;
 }
 
+static int aspeed_wdt_set_pretimeout(struct watchdog_device *wdd,
+				     unsigned int pretimeout)
+{
+	struct aspeed_wdt *wdt = to_aspeed_wdt(wdd);
+	u32 actual = pretimeout * WDT_RATE_1MHZ;
+	u32 s = wdt->cfg->irq_shift;
+	u32 m = wdt->cfg->irq_mask;
+
+	wdd->pretimeout = pretimeout;
+	wdt->ctrl &= ~m;
+	if (pretimeout)
+		wdt->ctrl |= ((actual << s) & m) | WDT_CTRL_WDT_INTR;
+	else
+		wdt->ctrl &= ~WDT_CTRL_WDT_INTR;
+
+	writel(wdt->ctrl, wdt->base + WDT_CTRL);
+
+	return 0;
+}
+
 static int aspeed_wdt_restart(struct watchdog_device *wdd,
 			      unsigned long action, void *data)
 {
@@ -232,6 +269,7 @@ static const struct watchdog_ops aspeed_wdt_ops = {
 	.stop		= aspeed_wdt_stop,
 	.ping		= aspeed_wdt_ping,
 	.set_timeout	= aspeed_wdt_set_timeout,
+	.set_pretimeout = aspeed_wdt_set_pretimeout,
 	.restart	= aspeed_wdt_restart,
 	.owner		= THIS_MODULE,
 };
@@ -243,10 +281,29 @@ static const struct watchdog_info aspeed_wdt_info = {
 	.identity	= KBUILD_MODNAME,
 };
 
+static const struct watchdog_info aspeed_wdt_pretimeout_info = {
+	.options	= WDIOF_KEEPALIVEPING
+			| WDIOF_PRETIMEOUT
+			| WDIOF_MAGICCLOSE
+			| WDIOF_SETTIMEOUT,
+	.identity	= KBUILD_MODNAME,
+};
+
+static irqreturn_t aspeed_wdt_irq(int irq, void *arg)
+{
+	struct watchdog_device *wdd = arg;
+	struct aspeed_wdt *wdt = to_aspeed_wdt(wdd);
+	u32 status = readl(wdt->base + WDT_TIMEOUT_STATUS);
+
+	if (status & WDT_TIMEOUT_STATUS_IRQ)
+		watchdog_notify_pretimeout(wdd);
+
+	return IRQ_HANDLED;
+}
+
 static int aspeed_wdt_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	const struct aspeed_wdt_config *config;
 	const struct of_device_id *ofdid;
 	struct aspeed_wdt *wdt;
 	struct device_node *np;
@@ -259,11 +316,33 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 	if (!wdt)
 		return -ENOMEM;
 
+	np = dev->of_node;
+
+	ofdid = of_match_node(aspeed_wdt_of_table, np);
+	if (!ofdid)
+		return -EINVAL;
+	wdt->cfg = ofdid->data;
+
 	wdt->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(wdt->base))
 		return PTR_ERR(wdt->base);
 
 	wdt->wdd.info = &aspeed_wdt_info;
+
+	if (wdt->cfg->irq_mask) {
+		int irq = platform_get_irq_optional(pdev, 0);
+
+		if (irq > 0) {
+			ret = devm_request_irq(dev, irq, aspeed_wdt_irq,
+					       IRQF_SHARED, dev_name(dev),
+					       wdt);
+			if (ret)
+				return ret;
+
+			wdt->wdd.info = &aspeed_wdt_pretimeout_info;
+		}
+	}
+
 	wdt->wdd.ops = &aspeed_wdt_ops;
 	wdt->wdd.max_hw_heartbeat_ms = WDT_MAX_TIMEOUT_MS;
 	wdt->wdd.parent = dev;
@@ -273,13 +352,6 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 
 	watchdog_set_nowayout(&wdt->wdd, nowayout);
 
-	np = dev->of_node;
-
-	ofdid = of_match_node(aspeed_wdt_of_table, np);
-	if (!ofdid)
-		return -EINVAL;
-	config = ofdid->data;
-
 	/*
 	 * On clock rates:
 	 *  - ast2400 wdt can run at PCLK, or 1MHz
@@ -331,7 +403,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 		(of_device_is_compatible(np, "aspeed,ast2600-wdt"))) {
 		u32 reg = readl(wdt->base + WDT_RESET_WIDTH);
 
-		reg &= config->ext_pulse_width_mask;
+		reg &= wdt->cfg->ext_pulse_width_mask;
 		if (of_property_read_bool(np, "aspeed,ext-active-high"))
 			reg |= WDT_ACTIVE_HIGH_MAGIC;
 		else
@@ -339,7 +411,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 
 		writel(reg, wdt->base + WDT_RESET_WIDTH);
 
-		reg &= config->ext_pulse_width_mask;
+		reg &= wdt->cfg->ext_pulse_width_mask;
 		if (of_property_read_bool(np, "aspeed,ext-push-pull"))
 			reg |= WDT_PUSH_PULL_MAGIC;
 		else
@@ -349,7 +421,7 @@ static int aspeed_wdt_probe(struct platform_device *pdev)
 	}
 
 	if (!of_property_read_u32(np, "aspeed,ext-pulse-duration", &duration)) {
-		u32 max_duration = config->ext_pulse_width_mask + 1;
+		u32 max_duration = wdt->cfg->ext_pulse_width_mask + 1;
 
 		if (duration == 0 || duration > max_duration) {
 			dev_err(dev, "Invalid pulse duration: %uus\n",
-- 
GitLab


From 586cb1d65cc44371115600bc981626725c864029 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:51 -0800
Subject: [PATCH 1875/4122] tools lib api: Clean up install_headers

Add missing backslash that caused an install command to always appear
in build output. Make the install headers more specific.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 3e5ef1e0e8908..3649c7f7ea651 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -100,12 +100,12 @@ install_lib: $(LIBFILE)
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
 install_headers:
-	$(call QUIET_INSTALL, headers) \
+	$(call QUIET_INSTALL, libapi_headers) \
 		$(call do_install,cpu.h,$(prefix)/include/api,644); \
 		$(call do_install,debug.h,$(prefix)/include/api,644); \
 		$(call do_install,io.h,$(prefix)/include/api,644); \
 		$(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \
-		$(call do_install,fs/fs.h,$(prefix)/include/api/fs,644);
+		$(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \
 		$(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644);
 
 install: install_lib install_headers
-- 
GitLab


From daa45f3f3577556801c7b0b2df85eed1289fbcb6 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:52 -0800
Subject: [PATCH 1876/4122] tools lib bpf: Avoid install_headers make warning

The perf build makes the install_headers target, however, as there is
no action for this target a warning is always produced of:

make[3]: Nothing to be done for 'install_headers'.

Solve this by adding a display of 'INSTALL libbpf_headers'.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 4c904ef0b47e9..7f5f7d2ebe1f1 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -255,6 +255,7 @@ $(INSTALL_GEN_HDRS): $(INSTALL_PFX)/%.h: $(OUTPUT)%.h
 		$(call do_install,$<,$(prefix)/include/bpf,644)
 
 install_headers: $(BPF_GENERATED) $(INSTALL_SRC_HDRS) $(INSTALL_GEN_HDRS)
+	$(call QUIET_INSTALL, libbpf_headers)
 
 install_pkgconfig: $(PC_FILE)
 	$(call QUIET_INSTALL, $(PC_FILE)) \
-- 
GitLab


From 806dda31b856d83d8ec211aa9831bac5f978271e Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:53 -0800
Subject: [PATCH 1877/4122] tools lib symbol: Clean up build output

Missing @ when building libsymbol. Make the install echo specific to
installing the libsymbol headers.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/symbol/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile
index 4c1d6b53032d2..ea8707b3442a7 100644
--- a/tools/lib/symbol/Makefile
+++ b/tools/lib/symbol/Makefile
@@ -77,7 +77,7 @@ include $(srctree)/tools/scripts/Makefile.include
 all: fixdep $(LIBFILE)
 
 $(SYMBOL_IN): FORCE
-	$(MAKE) $(build)=libsymbol V=1
+	@$(MAKE) $(build)=libsymbol
 
 $(LIBFILE): $(SYMBOL_IN)
 	$(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN)
@@ -101,7 +101,7 @@ install_lib: $(LIBFILE)
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
 install_headers:
-	$(call QUIET_INSTALL, headers) \
+	$(call QUIET_INSTALL, libsymbol_headers) \
 		$(call do_install,kallsyms.h,$(prefix)/include/symbol,644);
 
 install: install_lib install_headers
-- 
GitLab


From e8951bfb4cb325a6b80310790dc78ac9b4a147eb Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:54 -0800
Subject: [PATCH 1878/4122] tools lib perf: Make install_headers clearer

Add libperf to the name so that this install_headers build appears
different to similar targets in different libraries.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 1badc0a046768..a90fb8c6bed4d 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -188,7 +188,7 @@ install_lib: libs
 		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
 
 install_headers:
-	$(call QUIET_INSTALL, headers) \
+	$(call QUIET_INSTALL, libperf_headers) \
 		$(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
 		$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
-- 
GitLab


From 77dce6890a2a715b186bdc149c843571a5bb47df Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:55 -0800
Subject: [PATCH 1879/4122] tools lib subcmd: Make install_headers clearer

Add libsubcmd to the name so that this install_headers build appears
different to similar targets in different libraries.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index e96566f8991cc..9a316d8b89df5 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -101,7 +101,7 @@ install_lib: $(LIBFILE)
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
 install_headers:
-	$(call QUIET_INSTALL, headers) \
+	$(call QUIET_INSTALL, libsubcmd_headers) \
 		$(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \
 		$(call do_install,help.h,$(prefix)/include/subcmd,644); \
 		$(call do_install,pager.h,$(prefix)/include/subcmd,644); \
-- 
GitLab


From e664f31e21a2d201507704f302ab32f498871b11 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 16:43:56 -0800
Subject: [PATCH 1880/4122] tools lib traceevent: Make install_headers clearer

Add libtraceevent to the name so that this install_headers build
appears different to similar targets in different libraries. Add ;
after kbuffer.h install target for consistency.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: KP Singh <kpsingh@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Martin KaFai Lau <martin.lau@linux.dev>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Yonghong Song <yhs@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221117004356.279422-7-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index c874c017c6369..98dfd4badea3b 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -234,11 +234,11 @@ install_pkgconfig:
 		$(call do_install_pkgconfig_file,$(prefix))
 
 install_headers:
-	$(call QUIET_INSTALL, headers) \
+	$(call QUIET_INSTALL, traceevent_headers) \
 		$(call do_install,event-parse.h,$(includedir_SQ),644); \
 		$(call do_install,event-utils.h,$(includedir_SQ),644); \
 		$(call do_install,trace-seq.h,$(includedir_SQ),644); \
-		$(call do_install,kbuffer.h,$(includedir_SQ),644)
+		$(call do_install,kbuffer.h,$(includedir_SQ),644);
 
 install: install_lib
 
-- 
GitLab


From f215054d749b17c56e014fdca2fcc592dac4529c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:43 -0800
Subject: [PATCH 1881/4122] perf test: Add -w/--workload option

The -w/--workload option is to run a simple workload used by testing.
This adds a basic framework to run the workloads and 'noploop' workload
as an example.

  $ perf test -w noploop

The noploop does a loop doing nothing (NOP) for a second by default.
It can have an optional argument to specify the time in seconds.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/Build               |  2 ++
 tools/perf/tests/builtin-test.c      | 24 +++++++++++++++++++++
 tools/perf/tests/tests.h             | 22 +++++++++++++++++++
 tools/perf/tests/workloads/Build     |  3 +++
 tools/perf/tests/workloads/noploop.c | 32 ++++++++++++++++++++++++++++
 5 files changed, 83 insertions(+)
 create mode 100644 tools/perf/tests/workloads/Build
 create mode 100644 tools/perf/tests/workloads/noploop.c

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 2064a640facbe..11b69023011b0 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -103,3 +103,5 @@ endif
 CFLAGS_attr.o         += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
 CFLAGS_python-use.o   += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))"
 CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls
+
+perf-y += workloads/
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 7122eae1d98d9..ce641ccfcf814 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -118,6 +118,10 @@ static struct test_suite **tests[] = {
 	arch_tests,
 };
 
+static struct test_workload *workloads[] = {
+	&workload__noploop,
+};
+
 static int num_subtests(const struct test_suite *t)
 {
 	int num;
@@ -475,6 +479,21 @@ static int perf_test__list(int argc, const char **argv)
 	return 0;
 }
 
+static int run_workload(const char *work, int argc, const char **argv)
+{
+	unsigned int i = 0;
+	struct test_workload *twl;
+
+	for (i = 0; i < ARRAY_SIZE(workloads); i++) {
+		twl = workloads[i];
+		if (!strcmp(twl->name, work))
+			return twl->func(argc, argv);
+	}
+
+	pr_info("No workload found: %s\n", work);
+	return -1;
+}
+
 int cmd_test(int argc, const char **argv)
 {
 	const char *test_usage[] = {
@@ -482,12 +501,14 @@ int cmd_test(int argc, const char **argv)
 	NULL,
 	};
 	const char *skip = NULL;
+	const char *workload = NULL;
 	const struct option test_options[] = {
 	OPT_STRING('s', "skip", &skip, "tests", "tests to skip"),
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show symbol address, etc)"),
 	OPT_BOOLEAN('F', "dont-fork", &dont_fork,
 		    "Do not fork for testcase"),
+	OPT_STRING('w', "workload", &workload, "work", "workload to run for testing"),
 	OPT_END()
 	};
 	const char * const test_subcommands[] = { "list", NULL };
@@ -504,6 +525,9 @@ int cmd_test(int argc, const char **argv)
 	if (argc >= 1 && !strcmp(argv[0], "list"))
 		return perf_test__list(argc - 1, argv + 1);
 
+	if (workload)
+		return run_workload(workload, argc, argv);
+
 	symbol_conf.priv_size = sizeof(int);
 	symbol_conf.sort_by_name = true;
 	symbol_conf.try_vmlinux_path = true;
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 5bbb8f6a48fcb..d315d0d6fc977 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -180,4 +180,26 @@ int test__arch_unwind_sample(struct perf_sample *sample,
 DECLARE_SUITE(vectors_page);
 #endif
 
+/*
+ * Define test workloads to be used in test suites.
+ */
+typedef int (*workload_fnptr)(int argc, const char **argv);
+
+struct test_workload {
+	const char	*name;
+	workload_fnptr	func;
+};
+
+#define DECLARE_WORKLOAD(work) \
+	extern struct test_workload workload__##work
+
+#define DEFINE_WORKLOAD(work) \
+struct test_workload workload__##work = {	\
+	.name = #work,				\
+	.func = work,				\
+}
+
+/* The list of test workloads */
+DECLARE_WORKLOAD(noploop);
+
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
new file mode 100644
index 0000000000000..f98e968d4633a
--- /dev/null
+++ b/tools/perf/tests/workloads/Build
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+perf-y += noploop.o
diff --git a/tools/perf/tests/workloads/noploop.c b/tools/perf/tests/workloads/noploop.c
new file mode 100644
index 0000000000000..940ea5910a84c
--- /dev/null
+++ b/tools/perf/tests/workloads/noploop.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+
+static void sighandler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static int noploop(int argc, const char **argv)
+{
+	int sec = 1;
+
+	if (argc > 0)
+		sec = atoi(argv[0]);
+
+	signal(SIGINT, sighandler);
+	signal(SIGALRM, sighandler);
+	alarm(sec);
+
+	while (!done)
+		continue;
+
+	return 0;
+}
+
+DEFINE_WORKLOAD(noploop);
-- 
GitLab


From 24e733b29f13284aac30c4d1fb9f19201951d770 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:44 -0800
Subject: [PATCH 1882/4122] perf test: Replace pipe test workload with noploop

So that it can get rid of requirement of a compiler.
Also define and use more local symbols to ease future changes.

  $ sudo ./perf test -v pipe
   87: perf pipe recording and injection test                          :
  --- start ---
  test child forked, pid 748003
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.000 MB - ]
      748014   748014       -1 |perf
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.000 MB - ]
      99.83%  perf     perf                  [.] noploop
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.000 MB - ]
      99.85%  perf     perf                  [.] noploop
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.160 MB /tmp/perf.data.2XYPdw (4007 samples) ]
      99.83%  perf     perf                  [.] noploop
  test child finished with 0
  ---- end ----
  perf pipe recording and injection test: Ok

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/pipe_test.sh | 55 ++++++-----------------------
 1 file changed, 10 insertions(+), 45 deletions(-)

diff --git a/tools/perf/tests/shell/pipe_test.sh b/tools/perf/tests/shell/pipe_test.sh
index 1b32b4f283911..8dd115dd35a7e 100755
--- a/tools/perf/tests/shell/pipe_test.sh
+++ b/tools/perf/tests/shell/pipe_test.sh
@@ -2,68 +2,33 @@
 # perf pipe recording and injection test
 # SPDX-License-Identifier: GPL-2.0
 
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
-	echo "failed: no compiler, install gcc"
-	exit 2
-fi
-
-file=$(mktemp /tmp/test.file.XXXXXX)
 data=$(mktemp /tmp/perf.data.XXXXXX)
+prog="perf test -w noploop"
+task="perf"
+sym="noploop"
 
-cat <<EOF | cc -o ${file} -x c -
-#include <signal.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-volatile int done;
-
-void sigalrm(int sig) {
-	done = 1;
-}
-
-__attribute__((noinline)) void noploop(void) {
-	while (!done)
-		continue;
-}
-
-int main(int argc, char *argv[]) {
-	int sec = 1;
-
-	if (argc > 1)
-		sec = atoi(argv[1]);
-
-	signal(SIGALRM, sigalrm);
-	alarm(sec);
-
-	noploop();
-	return 0;
-}
-EOF
-
-
-if ! perf record -e task-clock:u -o - ${file} | perf report -i - --task | grep test.file; then
+if ! perf record -e task-clock:u -o - ${prog} | perf report -i - --task | grep ${task}; then
 	echo "cannot find the test file in the perf report"
 	exit 1
 fi
 
-if ! perf record -e task-clock:u -o - ${file} | perf inject -b | perf report -i - | grep noploop; then
+if ! perf record -e task-clock:u -o - ${prog} | perf inject -b | perf report -i - | grep ${sym}; then
 	echo "cannot find noploop function in pipe #1"
 	exit 1
 fi
 
-perf record -e task-clock:u -o - ${file} | perf inject -b -o ${data}
-if ! perf report -i ${data} | grep noploop; then
+perf record -e task-clock:u -o - ${prog} | perf inject -b -o ${data}
+if ! perf report -i ${data} | grep ${sym}; then
 	echo "cannot find noploop function in pipe #2"
 	exit 1
 fi
 
-perf record -e task-clock:u -o ${data} ${file}
-if ! perf inject -b -i ${data} | perf report -i - | grep noploop; then
+perf record -e task-clock:u -o ${data} ${prog}
+if ! perf inject -b -i ${data} | perf report -i - | grep ${sym}; then
 	echo "cannot find noploop function in pipe #3"
 	exit 1
 fi
 
 
-rm -f ${file} ${data} ${data}.old
+rm -f ${data} ${data}.old
 exit 0
-- 
GitLab


From 69b352927885b17f03d3ee4ee38f580699af107a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:45 -0800
Subject: [PATCH 1883/4122] perf test: Add 'thloop' test workload

The thloop is similar to noploop but runs in two threads.  This is
needed to verify perf record --per-thread to handle multi-threaded
programs properly.

  $ perf test -w thloop

It also takes an optional argument to specify runtime in seconds
(default: 1).

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c     |  1 +
 tools/perf/tests/tests.h            |  1 +
 tools/perf/tests/workloads/Build    |  1 +
 tools/perf/tests/workloads/thloop.c | 53 +++++++++++++++++++++++++++++
 4 files changed, 56 insertions(+)
 create mode 100644 tools/perf/tests/workloads/thloop.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index ce641ccfcf814..161f38476e77b 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -120,6 +120,7 @@ static struct test_suite **tests[] = {
 
 static struct test_workload *workloads[] = {
 	&workload__noploop,
+	&workload__thloop,
 };
 
 static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index d315d0d6fc977..e6edfeeadaeba 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -201,5 +201,6 @@ struct test_workload workload__##work = {	\
 
 /* The list of test workloads */
 DECLARE_WORKLOAD(noploop);
+DECLARE_WORKLOAD(thloop);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index f98e968d4633a..b8964b1099c0e 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 perf-y += noploop.o
+perf-y += thloop.o
diff --git a/tools/perf/tests/workloads/thloop.c b/tools/perf/tests/workloads/thloop.c
new file mode 100644
index 0000000000000..29193b75717ef
--- /dev/null
+++ b/tools/perf/tests/workloads/thloop.c
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <pthread.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+static volatile unsigned count;
+
+/* We want to check this symbol in perf report */
+noinline void test_loop(void);
+
+static void sighandler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+noinline void test_loop(void)
+{
+	while (!done)
+		count++;
+}
+
+static void *thfunc(void *arg)
+{
+	void (*loop_fn)(void) = arg;
+
+	loop_fn();
+	return NULL;
+}
+
+static int thloop(int argc, const char **argv)
+{
+	int sec = 1;
+	pthread_t th;
+
+	if (argc > 0)
+		sec = atoi(argv[0]);
+
+	signal(SIGINT, sighandler);
+	signal(SIGALRM, sighandler);
+	alarm(sec);
+
+	pthread_create(&th, NULL, thfunc, test_loop);
+	test_loop();
+	pthread_join(th, NULL);
+
+	return 0;
+}
+
+DEFINE_WORKLOAD(thloop);
-- 
GitLab


From 0b8ff0ba2744f364e8f5fb695ae323bae0ecfd19 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:46 -0800
Subject: [PATCH 1884/4122] perf test: Replace record test workload with thloop

So that it can get rid of requirements for a compiler.

  $ sudo ./perf test -v 92
   92: perf record tests                                               :
  --- start ---
  test child forked, pid 740204
  Basic --per-thread mode test
  Basic --per-thread mode test [Success]
  Register capture test
  Register capture test [Success]
  Basic --system-wide mode test
  Basic --system-wide mode test [Success]
  Basic target workload test
  Basic target workload test [Success]
  test child finished with 0
  ---- end ----
  perf record tests: Ok

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 59 ++------------------------------
 1 file changed, 3 insertions(+), 56 deletions(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index e93b3a8871fe3..4dff89e3a3fd5 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -9,17 +9,13 @@ shelldir=$(dirname "$0")
 
 err=0
 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
-testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX)
+testprog="perf test -w thloop"
 testsym="test_loop"
 
 cleanup() {
   rm -rf "${perfdata}"
   rm -rf "${perfdata}".old
 
-  if [ "${testprog}" != "true" ]; then
-    rm -f "${testprog}"
-  fi
-
   trap - EXIT TERM INT
 }
 
@@ -29,53 +25,6 @@ trap_cleanup() {
 }
 trap trap_cleanup EXIT TERM INT
 
-build_test_program() {
-  if ! [ -x "$(command -v cc)" ]; then
-    # No CC found. Fall back to 'true'
-    testprog=true
-    testsym=true
-    return
-  fi
-
-  echo "Build a test program"
-  cat <<EOF | cc -o ${testprog} -xc - -pthread
-#include <stdio.h>
-#include <stdlib.h>
-#include <pthread.h>
-
-void test_loop(void) {
-  volatile int count = 1000000;
-
-  while (count--)
-    continue;
-}
-
-void *thfunc(void *arg) {
-  int forever = *(int *)arg;
-
-  do {
-    test_loop();
-  } while (forever);
-
-  return NULL;
-}
-
-int main(int argc, char *argv[]) {
-  pthread_t th;
-  int forever = 0;
-
-  if (argc > 1)
-    forever = atoi(argv[1]);
-
-  pthread_create(&th, NULL, thfunc, &forever);
-  test_loop();
-  pthread_join(th, NULL);
-
-  return 0;
-}
-EOF
-}
-
 test_per_thread() {
   echo "Basic --per-thread mode test"
   if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null
@@ -96,8 +45,8 @@ test_per_thread() {
     return
   fi
 
-  # run the test program in background (forever)
-  ${testprog} 1 &
+  # run the test program in background (for 30 seconds)
+  ${testprog} 30 &
   TESTPID=$!
 
   rm -f "${perfdata}"
@@ -205,8 +154,6 @@ test_workload() {
   echo "Basic target workload test [Success]"
 }
 
-build_test_program
-
 test_per_thread
 test_register_capture
 test_system_wide
-- 
GitLab


From 02c70e915967c372d80f6015ef56744285350251 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:07 +0100
Subject: [PATCH 1885/4122] power: supply: adp5061: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/adp5061.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/adp5061.c b/drivers/power/supply/adp5061.c
index fcf8ff0bc974b..840db629a46c3 100644
--- a/drivers/power/supply/adp5061.c
+++ b/drivers/power/supply/adp5061.c
@@ -694,8 +694,7 @@ static const struct power_supply_desc adp5061_desc = {
 	.num_properties		= ARRAY_SIZE(adp5061_props),
 };
 
-static int adp5061_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int adp5061_probe(struct i2c_client *client)
 {
 	struct power_supply_config psy_cfg = {};
 	struct adp5061_state *st;
@@ -737,7 +736,7 @@ static struct i2c_driver adp5061_driver = {
 	.driver = {
 		.name = KBUILD_MODNAME,
 	},
-	.probe = adp5061_probe,
+	.probe_new = adp5061_probe,
 	.id_table = adp5061_id,
 };
 module_i2c_driver(adp5061_driver);
-- 
GitLab


From 31c050513c7ac1e455d0f1b1b6a882d96c94d5da Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:08 +0100
Subject: [PATCH 1886/4122] power: supply: bq2415x: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq2415x_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq2415x_charger.c b/drivers/power/supply/bq2415x_charger.c
index 6b99e1c675b87..d2cb7431dced0 100644
--- a/drivers/power/supply/bq2415x_charger.c
+++ b/drivers/power/supply/bq2415x_charger.c
@@ -1520,9 +1520,9 @@ static int bq2415x_power_supply_init(struct bq2415x_device *bq)
 }
 
 /* main bq2415x probe function */
-static int bq2415x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq2415x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	int num;
 	char *name = NULL;
@@ -1780,7 +1780,7 @@ static struct i2c_driver bq2415x_driver = {
 		.of_match_table = of_match_ptr(bq2415x_of_match_table),
 		.acpi_match_table = ACPI_PTR(bq2415x_i2c_acpi_match),
 	},
-	.probe = bq2415x_probe,
+	.probe_new = bq2415x_probe,
 	.remove = bq2415x_remove,
 	.id_table = bq2415x_i2c_id_table,
 };
-- 
GitLab


From 31731754b9257a1e2ebad60f270ecbe089d2ebd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:09 +0100
Subject: [PATCH 1887/4122] power: supply: bq24190: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq24190_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq24190_charger.c b/drivers/power/supply/bq24190_charger.c
index 2274679c5ddd2..2b2c3a4391c19 100644
--- a/drivers/power/supply/bq24190_charger.c
+++ b/drivers/power/supply/bq24190_charger.c
@@ -1767,9 +1767,9 @@ static int bq24190_get_config(struct bq24190_dev_info *bdi)
 	return 0;
 }
 
-static int bq24190_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int bq24190_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adapter = client->adapter;
 	struct device *dev = &client->dev;
 	struct power_supply_config charger_cfg = {}, battery_cfg = {};
@@ -2032,7 +2032,7 @@ static const struct of_device_id bq24190_of_match[] = {
 MODULE_DEVICE_TABLE(of, bq24190_of_match);
 
 static struct i2c_driver bq24190_driver = {
-	.probe		= bq24190_probe,
+	.probe_new	= bq24190_probe,
 	.remove		= bq24190_remove,
 	.shutdown	= bq24190_shutdown,
 	.id_table	= bq24190_i2c_ids,
-- 
GitLab


From 924668b4ed02dc6c134f6f3b30bd4c22f8dd4a82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:10 +0100
Subject: [PATCH 1888/4122] power: supply: bq24257: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq24257_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq24257_charger.c b/drivers/power/supply/bq24257_charger.c
index a309bbedfe521..ab4c49788c586 100644
--- a/drivers/power/supply/bq24257_charger.c
+++ b/drivers/power/supply/bq24257_charger.c
@@ -947,9 +947,9 @@ static int bq24257_fw_probe(struct bq24257_device *bq)
 	return 0;
 }
 
-static int bq24257_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq24257_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adapter = client->adapter;
 	struct device *dev = &client->dev;
 	const struct acpi_device_id *acpi_id;
@@ -1167,7 +1167,7 @@ static struct i2c_driver bq24257_driver = {
 		.acpi_match_table = ACPI_PTR(bq24257_acpi_match),
 		.pm = &bq24257_pm,
 	},
-	.probe = bq24257_probe,
+	.probe_new = bq24257_probe,
 	.remove = bq24257_remove,
 	.id_table = bq24257_i2c_ids,
 };
-- 
GitLab


From aaf5339e295baaee2b03c2dc45ac9dc8c2573a76 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:11 +0100
Subject: [PATCH 1889/4122] power: supply: bq24735: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq24735-charger.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq24735-charger.c b/drivers/power/supply/bq24735-charger.c
index 3ce36d09c017c..cfca3a82d5a8d 100644
--- a/drivers/power/supply/bq24735-charger.c
+++ b/drivers/power/supply/bq24735-charger.c
@@ -352,8 +352,7 @@ static struct bq24735_platform *bq24735_parse_dt_data(struct i2c_client *client)
 	return pdata;
 }
 
-static int bq24735_charger_probe(struct i2c_client *client,
-				 const struct i2c_device_id *id)
+static int bq24735_charger_probe(struct i2c_client *client)
 {
 	int ret;
 	struct bq24735 *charger;
@@ -506,7 +505,7 @@ static struct i2c_driver bq24735_charger_driver = {
 		.name = "bq24735-charger",
 		.of_match_table = bq24735_match_ids,
 	},
-	.probe = bq24735_charger_probe,
+	.probe_new = bq24735_charger_probe,
 	.id_table = bq24735_charger_id,
 };
 
-- 
GitLab


From ed4e2c7570a7b0972f449b2961e2efb9b48849b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:12 +0100
Subject: [PATCH 1890/4122] power: supply: bq2515x: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq2515x_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq2515x_charger.c b/drivers/power/supply/bq2515x_charger.c
index 4f76ad9c2f18e..da224ae8dc615 100644
--- a/drivers/power/supply/bq2515x_charger.c
+++ b/drivers/power/supply/bq2515x_charger.c
@@ -1078,9 +1078,9 @@ static const struct regmap_config bq25155_regmap_config = {
 	.volatile_reg		= bq2515x_volatile_register,
 };
 
-static int bq2515x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq2515x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct bq2515x_device *bq2515x;
 	struct power_supply_config charger_cfg = {};
@@ -1158,7 +1158,7 @@ static struct i2c_driver bq2515x_driver = {
 		.name = "bq2515x-charger",
 		.of_match_table = bq2515x_of_match,
 	},
-	.probe = bq2515x_probe,
+	.probe_new = bq2515x_probe,
 	.id_table = bq2515x_i2c_ids,
 };
 module_i2c_driver(bq2515x_driver);
-- 
GitLab


From fb94ef2efa1c3e2af52d8144b68eb9c90b0b31ed Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:13 +0100
Subject: [PATCH 1891/4122] power: supply: bq256xx: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq256xx_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq256xx_charger.c b/drivers/power/supply/bq256xx_charger.c
index 01ad84fd147c8..db13e288e439a 100644
--- a/drivers/power/supply/bq256xx_charger.c
+++ b/drivers/power/supply/bq256xx_charger.c
@@ -1619,9 +1619,9 @@ static int bq256xx_parse_dt(struct bq256xx_device *bq,
 	return 0;
 }
 
-static int bq256xx_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq256xx_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct bq256xx_device *bq;
 	struct power_supply_config psy_cfg = { };
@@ -1744,7 +1744,7 @@ static struct i2c_driver bq256xx_driver = {
 		.of_match_table = bq256xx_of_match,
 		.acpi_match_table = bq256xx_acpi_match,
 	},
-	.probe = bq256xx_probe,
+	.probe_new = bq256xx_probe,
 	.id_table = bq256xx_i2c_ids,
 };
 module_i2c_driver(bq256xx_driver);
-- 
GitLab


From c5cddca2351b291c8787b45cd046b1dfeb86979f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:14 +0100
Subject: [PATCH 1892/4122] power: supply: bq25890: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index f0362dcb935e9..bfdd2213ba69a 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1315,8 +1315,7 @@ static int bq25890_fw_probe(struct bq25890_device *bq)
 	return 0;
 }
 
-static int bq25890_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq25890_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct bq25890_device *bq;
@@ -1515,7 +1514,7 @@ static struct i2c_driver bq25890_driver = {
 		.acpi_match_table = ACPI_PTR(bq25890_acpi_match),
 		.pm = &bq25890_pm,
 	},
-	.probe = bq25890_probe,
+	.probe_new = bq25890_probe,
 	.remove = bq25890_remove,
 	.shutdown = bq25890_shutdown,
 	.id_table = bq25890_i2c_ids,
-- 
GitLab


From 79fc7c26602f990e49471df1d237466b9530ccdb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:15 +0100
Subject: [PATCH 1893/4122] power: supply: bq25980: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25980_charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq25980_charger.c b/drivers/power/supply/bq25980_charger.c
index 9339f56492827..a59d9762bc916 100644
--- a/drivers/power/supply/bq25980_charger.c
+++ b/drivers/power/supply/bq25980_charger.c
@@ -1207,9 +1207,9 @@ static int bq25980_parse_dt(struct bq25980_device *bq)
 	return 0;
 }
 
-static int bq25980_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int bq25980_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct bq25980_device *bq;
 	int ret;
@@ -1287,7 +1287,7 @@ static struct i2c_driver bq25980_driver = {
 		.name = "bq25980-charger",
 		.of_match_table = bq25980_of_match,
 	},
-	.probe = bq25980_probe,
+	.probe_new = bq25980_probe,
 	.id_table = bq25980_i2c_ids,
 };
 module_i2c_driver(bq25980_driver);
-- 
GitLab


From 67f56c79a5723cbdd9dd7bbb1a0375895c2d122f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:16 +0100
Subject: [PATCH 1894/4122] power: supply: bq27xxx: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq27xxx_battery_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/bq27xxx_battery_i2c.c b/drivers/power/supply/bq27xxx_battery_i2c.c
index 94b00bb89c177..f8768997333bc 100644
--- a/drivers/power/supply/bq27xxx_battery_i2c.c
+++ b/drivers/power/supply/bq27xxx_battery_i2c.c
@@ -136,9 +136,9 @@ static int bq27xxx_battery_i2c_bulk_write(struct bq27xxx_device_info *di,
 	return 0;
 }
 
-static int bq27xxx_battery_i2c_probe(struct i2c_client *client,
-				     const struct i2c_device_id *id)
+static int bq27xxx_battery_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct bq27xxx_device_info *di;
 	int ret;
 	char *name;
@@ -295,7 +295,7 @@ static struct i2c_driver bq27xxx_battery_i2c_driver = {
 		.name = "bq27xxx-battery",
 		.of_match_table = of_match_ptr(bq27xxx_battery_i2c_of_match_table),
 	},
-	.probe = bq27xxx_battery_i2c_probe,
+	.probe_new = bq27xxx_battery_i2c_probe,
 	.remove = bq27xxx_battery_i2c_remove,
 	.id_table = bq27xxx_i2c_id_table,
 };
-- 
GitLab


From 433e380226799bfcd823adca1c5e1aec8e2ef72a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:17 +0100
Subject: [PATCH 1895/4122] power: supply: ds2782: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ds2782_battery.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/ds2782_battery.c b/drivers/power/supply/ds2782_battery.c
index d78cd05402f66..9b9619246902c 100644
--- a/drivers/power/supply/ds2782_battery.c
+++ b/drivers/power/supply/ds2782_battery.c
@@ -368,9 +368,9 @@ static const struct ds278x_battery_ops ds278x_ops[] = {
 	}
 };
 
-static int ds278x_battery_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int ds278x_battery_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ds278x_platform_data *pdata = client->dev.platform_data;
 	struct power_supply_config psy_cfg = {};
 	struct ds278x_info *info;
@@ -458,7 +458,7 @@ static struct i2c_driver ds278x_battery_driver = {
 		.name	= "ds2782-battery",
 		.pm	= &ds278x_battery_pm_ops,
 	},
-	.probe		= ds278x_battery_probe,
+	.probe_new	= ds278x_battery_probe,
 	.remove		= ds278x_battery_remove,
 	.id_table	= ds278x_id,
 };
-- 
GitLab


From d9cafca1f200ab4bce13a42ffae82718aba29eb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:18 +0100
Subject: [PATCH 1896/4122] power: supply: lp8727: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/lp8727_charger.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/power/supply/lp8727_charger.c b/drivers/power/supply/lp8727_charger.c
index 384a374b52c19..e6c21377d53c2 100644
--- a/drivers/power/supply/lp8727_charger.c
+++ b/drivers/power/supply/lp8727_charger.c
@@ -540,7 +540,7 @@ static struct lp8727_platform_data *lp8727_parse_dt(struct device *dev)
 }
 #endif
 
-static int lp8727_probe(struct i2c_client *cl, const struct i2c_device_id *id)
+static int lp8727_probe(struct i2c_client *cl)
 {
 	struct lp8727_chg *pchg;
 	struct lp8727_platform_data *pdata;
@@ -615,7 +615,7 @@ static struct i2c_driver lp8727_driver = {
 		   .name = "lp8727",
 		   .of_match_table = of_match_ptr(lp8727_dt_ids),
 		   },
-	.probe = lp8727_probe,
+	.probe_new = lp8727_probe,
 	.remove = lp8727_remove,
 	.id_table = lp8727_ids,
 };
-- 
GitLab


From 66d9e8fc2bacffb80300d496c54d2fc072229656 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:19 +0100
Subject: [PATCH 1897/4122] power: supply: ltc2941: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ltc2941-battery-gauge.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/ltc2941-battery-gauge.c b/drivers/power/supply/ltc2941-battery-gauge.c
index 657305214d68d..d3fb428259839 100644
--- a/drivers/power/supply/ltc2941-battery-gauge.c
+++ b/drivers/power/supply/ltc2941-battery-gauge.c
@@ -439,8 +439,7 @@ static enum power_supply_property ltc294x_properties[] = {
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 };
 
-static int ltc294x_i2c_probe(struct i2c_client *client,
-	const struct i2c_device_id *id)
+static int ltc294x_i2c_probe(struct i2c_client *client)
 {
 	struct power_supply_config psy_cfg = {};
 	struct ltc294x_info *info;
@@ -636,7 +635,7 @@ static struct i2c_driver ltc294x_driver = {
 		.of_match_table = ltc294x_i2c_of_match,
 		.pm	= LTC294X_PM_OPS,
 	},
-	.probe		= ltc294x_i2c_probe,
+	.probe_new	= ltc294x_i2c_probe,
 	.shutdown	= ltc294x_i2c_shutdown,
 	.id_table	= ltc294x_i2c_id,
 };
-- 
GitLab


From 97bdbe0d04b183e5680cf0a487fb0abd4de85a0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:20 +0100
Subject: [PATCH 1898/4122] power: supply: ltc4162-l: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ltc4162-l-charger.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/ltc4162-l-charger.c b/drivers/power/supply/ltc4162-l-charger.c
index 1a5cb4405ee3c..db2bb52335705 100644
--- a/drivers/power/supply/ltc4162-l-charger.c
+++ b/drivers/power/supply/ltc4162-l-charger.c
@@ -819,8 +819,7 @@ static void ltc4162l_clear_interrupts(struct ltc4162l_info *info)
 	regmap_write(info->regmap, LTC4162L_CHARGE_STATUS_ALERTS_REG, 0);
 }
 
-static int ltc4162l_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ltc4162l_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct device *dev = &client->dev;
@@ -916,7 +915,7 @@ static const struct of_device_id ltc4162l_of_match[] = {
 MODULE_DEVICE_TABLE(of, ltc4162l_of_match);
 
 static struct i2c_driver ltc4162l_driver = {
-	.probe		= ltc4162l_probe,
+	.probe_new	= ltc4162l_probe,
 	.alert		= ltc4162l_alert,
 	.id_table	= ltc4162l_i2c_id_table,
 	.driver = {
-- 
GitLab


From b17018dee05a145e428d1de12d962d25d5f8837d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:21 +0100
Subject: [PATCH 1899/4122] power: supply: max14656: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max14656_charger_detector.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/max14656_charger_detector.c b/drivers/power/supply/max14656_charger_detector.c
index fc36828895bf6..0d0180fcfa63b 100644
--- a/drivers/power/supply/max14656_charger_detector.c
+++ b/drivers/power/supply/max14656_charger_detector.c
@@ -234,8 +234,7 @@ static enum power_supply_property max14656_battery_props[] = {
 	POWER_SUPPLY_PROP_MANUFACTURER,
 };
 
-static int max14656_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int max14656_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct device *dev = &client->dev;
@@ -317,7 +316,7 @@ static struct i2c_driver max14656_i2c_driver = {
 		.name	= "max14656",
 		.of_match_table = max14656_match_table,
 	},
-	.probe		= max14656_probe,
+	.probe_new	= max14656_probe,
 	.id_table	= max14656_id,
 };
 module_i2c_driver(max14656_i2c_driver);
-- 
GitLab


From a07fca69378c26ea034826feb0011256e25ad237 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:22 +0100
Subject: [PATCH 1900/4122] power: supply: max17040: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max17040_battery.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/max17040_battery.c b/drivers/power/supply/max17040_battery.c
index a9aef1e8b186e..d1075959dd466 100644
--- a/drivers/power/supply/max17040_battery.c
+++ b/drivers/power/supply/max17040_battery.c
@@ -430,9 +430,9 @@ static const struct power_supply_desc max17040_battery_desc = {
 	.num_properties		= ARRAY_SIZE(max17040_battery_props),
 };
 
-static int max17040_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int max17040_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adapter = client->adapter;
 	struct power_supply_config psy_cfg = {};
 	struct max17040_chip *chip;
@@ -599,7 +599,7 @@ static struct i2c_driver max17040_i2c_driver = {
 		.of_match_table = max17040_of_match,
 		.pm	= MAX17040_PM_OPS,
 	},
-	.probe		= max17040_probe,
+	.probe_new	= max17040_probe,
 	.id_table	= max17040_id,
 };
 module_i2c_driver(max17040_i2c_driver);
-- 
GitLab


From d9ac265b57b8625e4ce1b1bd6a6baa862d51c1f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:23 +0100
Subject: [PATCH 1901/4122] power: supply: max17042_battery: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/max17042_battery.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/max17042_battery.c b/drivers/power/supply/max17042_battery.c
index ab031bbfbe785..89cabe8ed3b06 100644
--- a/drivers/power/supply/max17042_battery.c
+++ b/drivers/power/supply/max17042_battery.c
@@ -1031,9 +1031,9 @@ static const struct power_supply_desc max17042_no_current_sense_psy_desc = {
 	.num_properties	= ARRAY_SIZE(max17042_battery_props) - 2,
 };
 
-static int max17042_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int max17042_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adapter = client->adapter;
 	const struct power_supply_desc *max17042_desc = &max17042_psy_desc;
 	struct power_supply_config psy_cfg = {};
@@ -1220,7 +1220,7 @@ static struct i2c_driver max17042_i2c_driver = {
 		.of_match_table = of_match_ptr(max17042_dt_match),
 		.pm	= &max17042_pm_ops,
 	},
-	.probe		= max17042_probe,
+	.probe_new	= max17042_probe,
 	.id_table	= max17042_id,
 };
 module_i2c_driver(max17042_i2c_driver);
-- 
GitLab


From f40ec8bc0888ee785317e0aa4ffa239f73b03d97 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:24 +0100
Subject: [PATCH 1902/4122] power: supply: rt5033_battery: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/rt5033_battery.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/rt5033_battery.c b/drivers/power/supply/rt5033_battery.c
index 736dec608ff6d..5c04cf3052193 100644
--- a/drivers/power/supply/rt5033_battery.c
+++ b/drivers/power/supply/rt5033_battery.c
@@ -112,8 +112,7 @@ static const struct power_supply_desc rt5033_battery_desc = {
 	.num_properties	= ARRAY_SIZE(rt5033_battery_props),
 };
 
-static int rt5033_battery_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int rt5033_battery_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct power_supply_config psy_cfg = {};
@@ -173,7 +172,7 @@ static struct i2c_driver rt5033_battery_driver = {
 		.name = "rt5033-battery",
 		.of_match_table = rt5033_battery_of_match,
 	},
-	.probe = rt5033_battery_probe,
+	.probe_new = rt5033_battery_probe,
 	.remove = rt5033_battery_remove,
 	.id_table = rt5033_battery_id,
 };
-- 
GitLab


From 2adfc4370ebb745380a44c3e5418486a32a9ba67 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:25 +0100
Subject: [PATCH 1903/4122] power: supply: rt9455: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/rt9455_charger.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/rt9455_charger.c b/drivers/power/supply/rt9455_charger.c
index 72962286d7045..31fb6526a1fdf 100644
--- a/drivers/power/supply/rt9455_charger.c
+++ b/drivers/power/supply/rt9455_charger.c
@@ -1581,8 +1581,7 @@ static const struct regmap_config rt9455_regmap_config = {
 	.cache_type	= REGCACHE_RBTREE,
 };
 
-static int rt9455_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int rt9455_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct device *dev = &client->dev;
@@ -1738,7 +1737,7 @@ MODULE_DEVICE_TABLE(acpi, rt9455_i2c_acpi_match);
 #endif
 
 static struct i2c_driver rt9455_driver = {
-	.probe		= rt9455_probe,
+	.probe_new	= rt9455_probe,
 	.remove		= rt9455_remove,
 	.id_table	= rt9455_i2c_id_table,
 	.driver = {
-- 
GitLab


From ef3f6e07d55aeb32880fd24993de4efa9ec09c41 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:26 +0100
Subject: [PATCH 1904/4122] power: supply: sbs: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/sbs-charger.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/sbs-charger.c b/drivers/power/supply/sbs-charger.c
index b08f7d0c41815..75ebcbf0a7883 100644
--- a/drivers/power/supply/sbs-charger.c
+++ b/drivers/power/supply/sbs-charger.c
@@ -162,8 +162,7 @@ static const struct power_supply_desc sbs_desc = {
 	.get_property = sbs_get_property,
 };
 
-static int sbs_probe(struct i2c_client *client,
-		     const struct i2c_device_id *id)
+static int sbs_probe(struct i2c_client *client)
 {
 	struct power_supply_config psy_cfg = {};
 	struct sbs_info *chip;
@@ -241,7 +240,7 @@ static const struct i2c_device_id sbs_id[] = {
 MODULE_DEVICE_TABLE(i2c, sbs_id);
 
 static struct i2c_driver sbs_driver = {
-	.probe		= sbs_probe,
+	.probe_new	= sbs_probe,
 	.id_table	= sbs_id,
 	.driver = {
 		.name	= "sbs-charger",
-- 
GitLab


From 02d1a40141a7b9d9cb8ef151c14e7d7aeaa56966 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:27 +0100
Subject: [PATCH 1905/4122] power: supply: sbs-manager: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/sbs-manager.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/sbs-manager.c b/drivers/power/supply/sbs-manager.c
index 71ec8f74f8359..bde977391fd4c 100644
--- a/drivers/power/supply/sbs-manager.c
+++ b/drivers/power/supply/sbs-manager.c
@@ -315,9 +315,9 @@ static void sbsm_del_mux_adapter(void *data)
 	i2c_mux_del_adapters(sbsm->muxc);
 }
 
-static int sbsm_probe(struct i2c_client *client,
-		      const struct i2c_device_id *id)
+static int sbsm_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adapter = client->adapter;
 	struct sbsm_data *data;
 	struct device *dev = &client->dev;
@@ -409,7 +409,7 @@ static struct i2c_driver sbsm_driver = {
 		.name = "sbsm",
 		.of_match_table = of_match_ptr(sbsm_dt_ids),
 	},
-	.probe		= sbsm_probe,
+	.probe_new	= sbsm_probe,
 	.alert		= sbsm_alert,
 	.id_table	= sbsm_ids
 };
-- 
GitLab


From 6d43a4b0b2f8ab6c9893e2673a133d1798b230e1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:28 +0100
Subject: [PATCH 1906/4122] power: supply: smb347: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/smb347-charger.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/smb347-charger.c b/drivers/power/supply/smb347-charger.c
index 996a82f8a2a1d..b5f0383102824 100644
--- a/drivers/power/supply/smb347-charger.c
+++ b/drivers/power/supply/smb347-charger.c
@@ -1528,9 +1528,9 @@ static const struct regulator_desc smb347_usb_vbus_regulator_desc = {
 	.n_voltages	= 1,
 };
 
-static int smb347_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int smb347_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct power_supply_config mains_usb_cfg = {};
 	struct regulator_config usb_rdev_cfg = {};
 	struct device *dev = &client->dev;
@@ -1629,7 +1629,7 @@ static struct i2c_driver smb347_driver = {
 		.name = "smb347",
 		.of_match_table = smb3xx_of_match,
 	},
-	.probe = smb347_probe,
+	.probe_new = smb347_probe,
 	.remove = smb347_remove,
 	.shutdown = smb347_shutdown,
 	.id_table = smb347_id,
-- 
GitLab


From 8a4f891b2a10f6bbc7bac256b2f745f03d2b7185 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:29 +0100
Subject: [PATCH 1907/4122] power: supply: ucs1002: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ucs1002_power.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/ucs1002_power.c b/drivers/power/supply/ucs1002_power.c
index ef673ec3db568..836d44c9fb74f 100644
--- a/drivers/power/supply/ucs1002_power.c
+++ b/drivers/power/supply/ucs1002_power.c
@@ -532,8 +532,7 @@ static const struct regulator_desc ucs1002_regulator_descriptor = {
 	.n_voltages	= 1,
 };
 
-static int ucs1002_probe(struct i2c_client *client,
-			 const struct i2c_device_id *dev_id)
+static int ucs1002_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct power_supply_config charger_config = {};
@@ -681,7 +680,7 @@ static struct i2c_driver ucs1002_driver = {
 		   .name = "ucs1002",
 		   .of_match_table = ucs1002_of_match,
 	},
-	.probe = ucs1002_probe,
+	.probe_new = ucs1002_probe,
 };
 module_i2c_driver(ucs1002_driver);
 
-- 
GitLab


From 922bde5a095540fe3870245e4f0b625a20967ea4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:44:30 +0100
Subject: [PATCH 1908/4122] power: supply: z2_battery: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/z2_battery.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c
index 1897c29848600..eb01b01bf593b 100644
--- a/drivers/power/supply/z2_battery.c
+++ b/drivers/power/supply/z2_battery.c
@@ -176,8 +176,7 @@ static int z2_batt_ps_init(struct z2_charger *charger, int props)
 	return 0;
 }
 
-static int z2_batt_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int z2_batt_probe(struct i2c_client *client)
 {
 	int ret = 0;
 	int props = 1;	/* POWER_SUPPLY_PROP_PRESENT */
@@ -306,7 +305,7 @@ static struct i2c_driver z2_batt_driver = {
 		.name	= "z2-battery",
 		.pm	= Z2_BATTERY_PM_OPS
 	},
-	.probe		= z2_batt_probe,
+	.probe_new	= z2_batt_probe,
 	.remove		= z2_batt_remove,
 	.id_table	= z2_batt_id,
 };
-- 
GitLab


From 0d2573a2b7838a4f6934c2835e6730b38df4bcc9 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 9 Nov 2022 14:30:55 +0100
Subject: [PATCH 1909/4122] modpost: Join broken long printed messages

Breaking long printed messages in multiple lines makes it very hard to
look up where they originated from.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/mod/file2alias.c | 18 +++++++-----------
 scripts/mod/modpost.c    |  8 +++-----
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 80d973144fded..7df23905fdf1c 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -140,25 +140,22 @@ static void device_id_check(const char *modname, const char *device_id,
 	int i;
 
 	if (size % id_size || size < id_size) {
-		fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo "
-		      "of the size of "
-		      "section __mod_%s__<identifier>_device_table=%lu.\n"
-		      "Fix definition of struct %s_device_id "
-		      "in mod_devicetable.h\n",
+		fatal("%s: sizeof(struct %s_device_id)=%lu is not a modulo of the size of section __mod_%s__<identifier>_device_table=%lu.\n"
+		      "Fix definition of struct %s_device_id in mod_devicetable.h\n",
 		      modname, device_id, id_size, device_id, size, device_id);
 	}
 	/* Verify last one is a terminator */
 	for (i = 0; i < id_size; i++ ) {
 		if (*(uint8_t*)(symval+size-id_size+i)) {
-			fprintf(stderr,"%s: struct %s_device_id is %lu bytes.  "
-				"The last of %lu is:\n",
+			fprintf(stderr,
+				"%s: struct %s_device_id is %lu bytes.  The last of %lu is:\n",
 				modname, device_id, id_size, size / id_size);
 			for (i = 0; i < id_size; i++ )
 				fprintf(stderr,"0x%02x ",
 					*(uint8_t*)(symval+size-id_size+i) );
 			fprintf(stderr,"\n");
-			fatal("%s: struct %s_device_id is not terminated "
-				"with a NULL entry!\n", modname, device_id);
+			fatal("%s: struct %s_device_id is not terminated with a NULL entry!\n",
+			      modname, device_id);
 		}
 	}
 }
@@ -1154,8 +1151,7 @@ static int do_amba_entry(const char *filename,
 	DEF_FIELD(symval, amba_id, mask);
 
 	if ((id & mask) != id)
-		fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: "
-		      "id=0x%08X, mask=0x%08X.  Please fix this driver.\n",
+		fatal("%s: Masked-off bit(s) of AMBA device ID are non-zero: id=0x%08X, mask=0x%08X.  Please fix this driver.\n",
 		      filename, id, mask);
 
 	p += sprintf(alias, "amba:d");
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 2c80da0220c32..56d856f2e5115 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -519,9 +519,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
 		int nobits = sechdrs[i].sh_type == SHT_NOBITS;
 
 		if (!nobits && sechdrs[i].sh_offset > info->size) {
-			fatal("%s is truncated. sechdrs[i].sh_offset=%lu > "
-			      "sizeof(*hrd)=%zu\n", filename,
-			      (unsigned long)sechdrs[i].sh_offset,
+			fatal("%s is truncated. sechdrs[i].sh_offset=%lu > sizeof(*hrd)=%zu\n",
+			      filename, (unsigned long)sechdrs[i].sh_offset,
 			      sizeof(*hdr));
 			return 0;
 		}
@@ -1355,8 +1354,7 @@ static void report_extable_warnings(const char* modname, struct elf_info* elf,
 	get_pretty_name(is_function(tosym),
 			&to_pretty_name, &to_pretty_name_p);
 
-	warn("%s(%s+0x%lx): Section mismatch in reference"
-	     " from the %s %s%s to the %s %s:%s%s\n",
+	warn("%s(%s+0x%lx): Section mismatch in reference from the %s %s%s to the %s %s:%s%s\n",
 	     modname, fromsec, (long)r->r_offset, from_pretty_name,
 	     fromsym_name, from_pretty_name_p,
 	     to_pretty_name, tosec, tosym_name, to_pretty_name_p);
-- 
GitLab


From 9f8fe647797a4bc049bc7cceaf3a63584678ba04 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Thu, 10 Nov 2022 11:59:05 -0800
Subject: [PATCH 1910/4122] Makefile.debug: support for -gz=zstd

Make DEBUG_INFO_COMPRESSED a choice; DEBUG_INFO_COMPRESSED_NONE is the
default, DEBUG_INFO_COMPRESSED_ZLIB uses zlib,
DEBUG_INFO_COMPRESSED_ZSTD uses zstd.

This renames the existing KConfig option DEBUG_INFO_COMPRESSED to
DEBUG_INFO_COMPRESSED_ZLIB so users upgrading may need to reset the new
Kconfigs.

Some quick N=1 measurements with du, /usr/bin/time -v, and bloaty:

clang-16, x86_64 defconfig plus
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_NONE=y:
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:55.43
488M vmlinux
27.6%   136Mi   0.0%       0    .debug_info
 6.1%  30.2Mi   0.0%       0    .debug_str_offsets
 3.5%  17.2Mi   0.0%       0    .debug_line
 3.3%  16.3Mi   0.0%       0    .debug_loclists
 0.9%  4.62Mi   0.0%       0    .debug_str

clang-16, x86_64 defconfig plus
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZLIB=y:
Elapsed (wall clock) time (h:mm:ss or m:ss): 1:00.35
385M vmlinux
21.8%  85.4Mi   0.0%       0    .debug_info
 2.1%  8.26Mi   0.0%       0    .debug_str_offsets
 2.1%  8.24Mi   0.0%       0    .debug_loclists
 1.9%  7.48Mi   0.0%       0    .debug_line
 0.5%  1.94Mi   0.0%       0    .debug_str

clang-16, x86_64 defconfig plus
CONFIG_DEBUG_INFO=y CONFIG_DEBUG_INFO_COMPRESSED_ZSTD=y:
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:59.69
373M vmlinux
21.4%  81.4Mi   0.0%       0    .debug_info
 2.3%  8.85Mi   0.0%       0    .debug_loclists
 1.5%  5.71Mi   0.0%       0    .debug_line
 0.5%  1.95Mi   0.0%       0    .debug_str_offsets
 0.4%  1.62Mi   0.0%       0    .debug_str

That's only a 3.11% overall binary size savings over zlib, but at no
performance regression.

Link: https://maskray.me/blog/2022-09-09-zstd-compressed-debug-sections
Link: https://maskray.me/blog/2022-01-23-compressed-debug-sections
Suggested-by: Sedat Dilek (DHL Supply Chain) <sedat.dilek@dhl.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 lib/Kconfig.debug      | 29 +++++++++++++++++++++++++++--
 scripts/Makefile.debug |  6 +++++-
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c3c0b077ade33..d93dbe5a1d143 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -312,8 +312,21 @@ config DEBUG_INFO_REDUCED
 	  DEBUG_INFO build and compile times are reduced too.
 	  Only works with newer gcc versions.
 
-config DEBUG_INFO_COMPRESSED
-	bool "Compressed debugging information"
+choice
+	prompt "Compressed Debug information"
+	help
+	  Compress the resulting debug info. Results in smaller debug info sections,
+	  but requires that consumers are able to decompress the results.
+
+	  If unsure, choose DEBUG_INFO_COMPRESSED_NONE.
+
+config DEBUG_INFO_COMPRESSED_NONE
+	bool "Don't compress debug information"
+	help
+	  Don't compress debug info sections.
+
+config DEBUG_INFO_COMPRESSED_ZLIB
+	bool "Compress debugging information with zlib"
 	depends on $(cc-option,-gz=zlib)
 	depends on $(ld-option,--compress-debug-sections=zlib)
 	help
@@ -327,6 +340,18 @@ config DEBUG_INFO_COMPRESSED
 	  preferable to setting $KDEB_COMPRESS to "none" which would be even
 	  larger.
 
+config DEBUG_INFO_COMPRESSED_ZSTD
+	bool "Compress debugging information with zstd"
+	depends on $(cc-option,-gz=zstd)
+	depends on $(ld-option,--compress-debug-sections=zstd)
+	help
+	  Compress the debug information using zstd.  This may provide better
+	  compression than zlib, for about the same time costs, but requires newer
+	  toolchain support.  Requires GCC 13.0+ or Clang 16.0+, binutils 2.40+, and
+	  zstd.
+
+endchoice # "Compressed Debug information"
+
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
 	depends on $(cc-option,-gsplit-dwarf)
diff --git a/scripts/Makefile.debug b/scripts/Makefile.debug
index 332c486f705f7..059ff38fe0cb3 100644
--- a/scripts/Makefile.debug
+++ b/scripts/Makefile.debug
@@ -27,10 +27,14 @@ else
 DEBUG_RUSTFLAGS	+= -Cdebuginfo=2
 endif
 
-ifdef CONFIG_DEBUG_INFO_COMPRESSED
+ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZLIB
 DEBUG_CFLAGS	+= -gz=zlib
 KBUILD_AFLAGS	+= -gz=zlib
 KBUILD_LDFLAGS	+= --compress-debug-sections=zlib
+else ifdef CONFIG_DEBUG_INFO_COMPRESSED_ZSTD
+DEBUG_CFLAGS	+= -gz=zstd
+KBUILD_AFLAGS	+= -gz=zstd
+KBUILD_LDFLAGS	+= --compress-debug-sections=zstd
 endif
 
 KBUILD_CFLAGS	+= $(DEBUG_CFLAGS)
-- 
GitLab


From 30daacc571d1416f24abd4cc49910ff9322a8cf6 Mon Sep 17 00:00:00 2001
From: KaiLong Wang <wangkailong@jari.cn>
Date: Sun, 13 Nov 2022 17:29:50 +0800
Subject: [PATCH 1911/4122] modpost: fix array_size.cocci warning

Fix following coccicheck warning:

scripts/mod/sumversion.c:219:48-49: WARNING: Use ARRAY_SIZE
scripts/mod/sumversion.c:156:48-49: WARNING: Use ARRAY_SIZE

Signed-off-by: KaiLong Wang <wangkailong@jari.cn>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/mod/sumversion.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/sumversion.c b/scripts/mod/sumversion.c
index 6bf9caca09684..31066bfdba04e 100644
--- a/scripts/mod/sumversion.c
+++ b/scripts/mod/sumversion.c
@@ -153,7 +153,7 @@ static void md4_transform(uint32_t *hash, uint32_t const *in)
 
 static inline void md4_transform_helper(struct md4_ctx *ctx)
 {
-	le32_to_cpu_array(ctx->block, sizeof(ctx->block) / sizeof(uint32_t));
+	le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block));
 	md4_transform(ctx->hash, ctx->block);
 }
 
@@ -216,7 +216,7 @@ static void md4_final_ascii(struct md4_ctx *mctx, char *out, unsigned int len)
 	le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
 			  sizeof(uint64_t)) / sizeof(uint32_t));
 	md4_transform(mctx->hash, mctx->block);
-	cpu_to_le32_array(mctx->hash, sizeof(mctx->hash) / sizeof(uint32_t));
+	cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash));
 
 	snprintf(out, len, "%08X%08X%08X%08X",
 		 mctx->hash[0], mctx->hash[1], mctx->hash[2], mctx->hash[3]);
-- 
GitLab


From 1791360cb37ff5ef797afe9006cb315ebb7e969e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 13 Nov 2022 19:59:41 +0900
Subject: [PATCH 1912/4122] kconfig: remove unneeded variable in
 get_prompt_str()

The variable 'accessible' is redundant.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/menu.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/scripts/kconfig/menu.c b/scripts/kconfig/menu.c
index 109325f31bef3..b90fff833588a 100644
--- a/scripts/kconfig/menu.c
+++ b/scripts/kconfig/menu.c
@@ -724,10 +724,8 @@ static void get_prompt_str(struct gstr *r, struct property *prop,
 
 	menu = prop->menu;
 	for (i = 0; menu != &rootmenu && i < 8; menu = menu->parent) {
-		bool accessible = menu_is_visible(menu);
-
 		submenu[i++] = menu;
-		if (location == NULL && accessible)
+		if (location == NULL && menu_is_visible(menu))
 			location = menu;
 	}
 	if (head && location) {
-- 
GitLab


From 4d980fd111237ab64705b982f61f284c2a7885e5 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 13 Nov 2022 19:59:42 +0900
Subject: [PATCH 1913/4122] kconfig: remove const qualifier from str_get()

update_text() apparently edits the buffer returned by str_get().
(and there is no reason why it shouldn't)

Remove 'const' quailifier and casting.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/lkc.h   | 2 +-
 scripts/kconfig/mconf.c | 4 ++--
 scripts/kconfig/util.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
index 6ac2eabe109d2..e7118d62a45fc 100644
--- a/scripts/kconfig/lkc.h
+++ b/scripts/kconfig/lkc.h
@@ -76,7 +76,7 @@ struct gstr str_new(void);
 void str_free(struct gstr *gs);
 void str_append(struct gstr *gs, const char *s);
 void str_printf(struct gstr *gs, const char *fmt, ...);
-const char *str_get(struct gstr *gs);
+char *str_get(struct gstr *gs);
 
 /* menu.c */
 void _menu_init(void);
diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index 9d3cf510562f8..d7f7e1bf7dd4f 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -440,8 +440,8 @@ again:
 
 		res = get_relations_str(sym_arr, &head);
 		set_subtitle();
-		dres = show_textbox_ext("Search Results", (char *)
-					str_get(&res), 0, 0, keys, &vscroll,
+		dres = show_textbox_ext("Search Results", str_get(&res), 0, 0,
+					keys, &vscroll,
 					&hscroll, &update_text, (void *)
 					&data);
 		again = false;
diff --git a/scripts/kconfig/util.c b/scripts/kconfig/util.c
index 29585394df71d..b78f114ad48cc 100644
--- a/scripts/kconfig/util.c
+++ b/scripts/kconfig/util.c
@@ -74,7 +74,7 @@ void str_printf(struct gstr *gs, const char *fmt, ...)
 }
 
 /* Retrieve value of growable string */
-const char *str_get(struct gstr *gs)
+char *str_get(struct gstr *gs)
 {
 	return gs->s;
 }
-- 
GitLab


From be5ea98983efe2a2c5156c3b43e35a076d5b640d Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 13 Nov 2022 19:59:43 +0900
Subject: [PATCH 1914/4122] kconfig: remove redundant (void *) cast in
 search_conf()

The (void *) cast is redundant because the last argument of
show_textbox_ext() is an opaque pointer.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/kconfig/mconf.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
index d7f7e1bf7dd4f..9c549683c6275 100644
--- a/scripts/kconfig/mconf.c
+++ b/scripts/kconfig/mconf.c
@@ -441,8 +441,7 @@ again:
 		res = get_relations_str(sym_arr, &head);
 		set_subtitle();
 		dres = show_textbox_ext("Search Results", str_get(&res), 0, 0,
-					keys, &vscroll,
-					&hscroll, &update_text, (void *)
+					keys, &vscroll, &hscroll, &update_text,
 					&data);
 		again = false;
 		for (i = 0; i < JUMP_NB && keys[i]; i++)
-- 
GitLab


From f8f4dc7685c72c8ef86420566a38a4f786613851 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 15 Nov 2022 03:10:55 +0900
Subject: [PATCH 1915/4122] scripts/jobserver-exec: parse the last
 --jobserver-auth= option

In the GNU Make manual, the section "Sharing Job Slots with GNU make"
says:

    Be aware that the MAKEFLAGS variable may contain multiple instances
    of the --jobserver-auth= option. Only the last instance is relevant.

Take the last element of the array, not the first.

Link: https://www.gnu.org/software/make/manual/html_node/Job-Slots.html
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 scripts/jobserver-exec | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/scripts/jobserver-exec b/scripts/jobserver-exec
index 8762887a970ce..4192855f5b8b5 100755
--- a/scripts/jobserver-exec
+++ b/scripts/jobserver-exec
@@ -23,7 +23,9 @@ try:
 	opts = [x for x in flags.split(" ") if x.startswith("--jobserver")]
 
 	# Parse out R,W file descriptor numbers and set them nonblocking.
-	fds = opts[0].split("=", 1)[1]
+	# If the MAKEFLAGS variable contains multiple instances of the
+	# --jobserver-auth= option, the last one is relevant.
+	fds = opts[-1].split("=", 1)[1]
 	reader, writer = [int(x) for x in fds.split(",", 1)]
 	# Open a private copy of reader to avoid setting nonblocking
 	# on an unexpecting process with the same reader fd.
-- 
GitLab


From 5724ac5589ad93d35d95a845222f566175c681b8 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 17 Nov 2022 10:30:33 +0900
Subject: [PATCH 1916/4122] kbuild: deb-pkg: get rid of |flex:native workaround
 from Build-Depends

"| flex:native" was a workaround (suggested by Ben, see Link) because
"MultiArch: foreign" was missing in the flex package on some old distros
when commit e3a22850664f ("deb-pkg: generate correct build dependencies")
was applied.

It seems fixing the flex package has been completed. Get rid of the
workaround.

Link: https://lore.kernel.org/linux-kbuild/ab49b0582ef12b14b1a68877263b81813e2492a2.camel@decadent.org.uk/
Link: https://wiki.debian.org/CrossBuildPackagingGuidelines
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Ben Hutchings <ben@decadent.org.uk>
---
 scripts/package/mkdebian | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/mkdebian b/scripts/package/mkdebian
index a3ac5a716e9fc..6cf383225b8b5 100755
--- a/scripts/package/mkdebian
+++ b/scripts/package/mkdebian
@@ -175,7 +175,7 @@ Section: kernel
 Priority: optional
 Maintainer: $maintainer
 Rules-Requires-Root: no
-Build-Depends: bc, rsync, kmod, cpio, bison, flex | flex:native $extra_build_depends
+Build-Depends: bc, rsync, kmod, cpio, bison, flex $extra_build_depends
 Homepage: https://www.kernel.org/
 
 Package: $packagename-$version
-- 
GitLab


From 9e7726a8a08a65ed48e2749ef62ec4970bdf851f Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:15 +0100
Subject: [PATCH 1917/4122] KVM: selftests: Hyper-V PV TLB flush selftest

Introduce a selftest for Hyper-V PV TLB flush hypercalls
(HvFlushVirtualAddressSpace/HvFlushVirtualAddressSpaceEx,
HvFlushVirtualAddressList/HvFlushVirtualAddressListEx).

The test creates one 'sender' vCPU and two 'worker' vCPU which do busy
loop reading from a certain GVA checking the observed value. Sender
vCPU swaos the data page with another page filled with a different value.
The expectation for workers is also altered. Without TLB flush on worker
vCPUs, they may continue to observe old value. To guard against accidental
TLB flushes for worker vCPUs the test is repeated 100 times.

Hyper-V TLB flush hypercalls are tested in both 'normal' and 'XMM
fast' modes.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-38-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore        |   1 +
 tools/testing/selftests/kvm/Makefile          |   1 +
 .../selftests/kvm/include/x86_64/hyperv.h     |   1 +
 .../selftests/kvm/x86_64/hyperv_tlb_flush.c   | 690 ++++++++++++++++++
 4 files changed, 693 insertions(+)
 create mode 100644 tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 3b3218cb46ed6..dc7e28cf2da07 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -27,6 +27,7 @@
 /x86_64/hyperv_features
 /x86_64/hyperv_ipi
 /x86_64/hyperv_svm_test
+/x86_64/hyperv_tlb_flush
 /x86_64/max_vcpuid_cap_test
 /x86_64/mmio_warning_test
 /x86_64/monitor_mwait_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4095b1212f08c..058b15213c5dd 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -89,6 +89,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index c757e40011731..ae945f7408352 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -187,6 +187,7 @@
 /* hypercall options */
 #define HV_HYPERCALL_FAST_BIT		BIT(16)
 #define HV_HYPERCALL_VARHEAD_OFFSET	17
+#define HV_HYPERCALL_REP_COMP_OFFSET	32
 
 /*
  * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
new file mode 100644
index 0000000000000..68f97ff720a74
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+	u64 format;
+	u64 valid_bank_mask;
+	u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+	HV_GENERIC_SET_SPARSE_4K,
+	HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS			BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES	BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY	BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT	BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+	u64 address_space;
+	u64 flags;
+	u64 processor_mask;
+	u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+	u64 address_space;
+	u64 flags;
+	struct hv_vpset hv_vp_set;
+	u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+	vm_vaddr_t hcall_gva;
+	vm_paddr_t hcall_gpa;
+	vm_vaddr_t test_pages;
+	vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+	struct test_data *data = (struct test_data *)test_data;
+	u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+	void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+	u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+	u64 expected, val;
+
+	x2apic_enable();
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+	for (;;) {
+		cpu_relax();
+
+		expected = READ_ONCE(*this_cpu);
+
+		/*
+		 * Make sure the value in the test page is read after reading
+		 * the expectation for the first time. Pairs with wmb() in
+		 * prepare_to_test().
+		 */
+		rmb();
+
+		val = READ_ONCE(*(u64 *)data->test_pages);
+
+		/*
+		 * Make sure the value in the test page is read after before
+		 * reading the expectation for the second time. Pairs with wmb()
+		 * post_test().
+		 */
+		rmb();
+
+		/*
+		 * '0' indicates the sender is between iterations, wait until
+		 * the sender is ready for this vCPU to start checking again.
+		 */
+		if (!expected)
+			continue;
+
+		/*
+		 * Re-read the per-vCPU byte to ensure the sender didn't move
+		 * onto a new iteration.
+		 */
+		if (expected != READ_ONCE(*this_cpu))
+			continue;
+
+		GUEST_ASSERT(val == expected);
+	}
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+	void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+	*(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+	uint64_t tmp = *(uint64_t *)pte_gva1;
+
+	*(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+	*(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+	int i;
+
+	for (i = 0; i < 1000000; i++)
+		asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+	/* Clear hypercall input page */
+	memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+	/* 'Disable' workers */
+	set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+	set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+	/* Make sure workers are 'disabled' before we swap PTEs. */
+	wmb();
+
+	/* Make sure workers have enough time to notice */
+	do_delay();
+
+	/* Swap test page mappings */
+	swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+	/* Make sure we change the expectation after swapping PTEs */
+	wmb();
+
+	/* Set the expectation for workers, '0' means don't test */
+	set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+	set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+	/* Make sure workers have enough time to test */
+	do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+	struct test_data *data = (struct test_data *)test_data;
+	struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+	struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+	vm_paddr_t hcall_gpa = data->hcall_gpa;
+	int i, stage = 1;
+
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+	/* "Slow" hypercalls */
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+				 hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+		flush->gva_list[0] = (u64)data->test_pages;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			HV_FLUSH_ALL_PROCESSORS;
+		flush->processor_mask = 0;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+				 hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			HV_FLUSH_ALL_PROCESSORS;
+		flush->gva_list[0] = (u64)data->test_pages;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+				 (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		/* bank_contents and gva_list occupy the same space, thus [1] */
+		flush_ex->gva_list[1] = (u64)data->test_pages;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+			BIT_ULL(WORKER_VCPU_ID_1 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+		flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+				 (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+			BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+		flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		/* bank_contents and gva_list occupy the same space, thus [2] */
+		flush_ex->gva_list[2] = (u64)data->test_pages;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+		flush_ex->gva_list[0] = (u64)data->test_pages;
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 hcall_gpa, hcall_gpa + PAGE_SIZE);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	/* "Fast" hypercalls */
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+		hyperv_write_xmm_input(&flush->processor_mask, 1);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+				 HV_HYPERCALL_FAST_BIT, 0x0,
+				 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+		flush->gva_list[0] = (u64)data->test_pages;
+		hyperv_write_xmm_input(&flush->processor_mask, 1);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+				 HV_HYPERCALL_FAST_BIT |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		hyperv_write_xmm_input(&flush->processor_mask, 1);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+				 HV_HYPERCALL_FAST_BIT, 0x0,
+				 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+				 HV_FLUSH_ALL_PROCESSORS);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush->gva_list[0] = (u64)data->test_pages;
+		hyperv_write_xmm_input(&flush->processor_mask, 1);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+				 HV_HYPERCALL_FAST_BIT |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+				 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+				 HV_FLUSH_ALL_PROCESSORS);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+				 HV_HYPERCALL_FAST_BIT |
+				 (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		/* bank_contents and gva_list occupy the same space, thus [1] */
+		flush_ex->gva_list[1] = (u64)data->test_pages;
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 HV_HYPERCALL_FAST_BIT |
+				 (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+			BIT_ULL(WORKER_VCPU_ID_1 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+		flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+				 HV_HYPERCALL_FAST_BIT |
+				 (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 :
+			  TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+		flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+			BIT_ULL(WORKER_VCPU_ID_2 / 64);
+		flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+		flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+		/* bank_contents and gva_list occupy the same space, thus [2] */
+		flush_ex->gva_list[2] = (u64)data->test_pages;
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 HV_HYPERCALL_FAST_BIT |
+				 (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+				 HV_HYPERCALL_FAST_BIT,
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_SYNC(stage++);
+
+	/* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+	for (i = 0; i < NTRY; i++) {
+		prepare_to_test(data);
+		flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+		flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+		flush_ex->gva_list[0] = (u64)data->test_pages;
+		hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+		hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+				 HV_HYPERCALL_FAST_BIT |
+				 (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+				 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+		post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+			  i % 2 ? TESTVAL1 : TESTVAL2);
+	}
+
+	GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+	struct ucall uc;
+	int old;
+	int r;
+	unsigned int exit_reason;
+
+	r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+	TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+		    vcpu->id, r);
+
+	vcpu_run(vcpu);
+	exit_reason = vcpu->run->exit_reason;
+
+	TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+		    "vCPU %u exited with unexpected exit reason %u-%s, expected KVM_EXIT_IO",
+		    vcpu->id, exit_reason, exit_reason_str(exit_reason));
+
+	switch (get_ucall(vcpu, &uc)) {
+	case UCALL_ABORT:
+		REPORT_GUEST_ASSERT(uc);
+		/* NOT REACHED */
+	default:
+		TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+	}
+
+	return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+	void *retval;
+	int r;
+
+	r = pthread_cancel(thread);
+	TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+
+	r = pthread_join(thread, &retval);
+	TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+		    vcpu->id, r);
+	TEST_ASSERT(retval == PTHREAD_CANCELED,
+		    "expected retval=%p, got %p", PTHREAD_CANCELED,
+		    retval);
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+	struct kvm_vcpu *vcpu[3];
+	unsigned int exit_reason;
+	pthread_t threads[2];
+	vm_vaddr_t test_data_page, gva;
+	vm_paddr_t gpa;
+	uint64_t *pte;
+	struct test_data *data;
+	struct ucall uc;
+	int stage = 1, r, i;
+
+	vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+	/* Test data page */
+	test_data_page = vm_vaddr_alloc_page(vm);
+	data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+	/* Hypercall input/output */
+	data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+	data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+	memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+	/*
+	 * Test pages: the first one is filled with '0x01's, the second with '0x02's
+	 * and the test will swap their mappings. The third page keeps the indication
+	 * about the current state of mappings.
+	 */
+	data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+	for (i = 0; i < NTEST_PAGES; i++)
+		memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+		       (u8)(i + 1), PAGE_SIZE);
+	set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+	set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+	/*
+	 * Get PTE pointers for test pages and map them inside the guest.
+	 * Use separate page for each PTE for simplicity.
+	 */
+	gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+	for (i = 0; i < NTEST_PAGES; i++) {
+		pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+		gpa = addr_hva2gpa(vm, pte);
+		__virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+		data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+	}
+
+	/*
+	 * Sender vCPU which performs the test: swaps test pages, sets expectation
+	 * for 'workers' and issues TLB flush hypercalls.
+	 */
+	vcpu_args_set(vcpu[0], 1, test_data_page);
+	vcpu_set_hv_cpuid(vcpu[0]);
+
+	/* Create worker vCPUs which check the contents of the test pages */
+	vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+	vcpu_args_set(vcpu[1], 1, test_data_page);
+	vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+	vcpu_set_hv_cpuid(vcpu[1]);
+
+	vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+	vcpu_args_set(vcpu[2], 1, test_data_page);
+	vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+	vcpu_set_hv_cpuid(vcpu[2]);
+
+	r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+	TEST_ASSERT(!r, "pthread_create() failed");
+
+	r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+	TEST_ASSERT(!r, "pthread_create() failed");
+
+	while (true) {
+		vcpu_run(vcpu[0]);
+		exit_reason = vcpu[0]->run->exit_reason;
+
+		TEST_ASSERT(exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    exit_reason, exit_reason_str(exit_reason));
+
+		switch (get_ucall(vcpu[0], &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == stage,
+				    "Unexpected stage: %ld (%d expected)\n",
+				    uc.args[1], stage);
+			break;
+		case UCALL_ABORT:
+			REPORT_GUEST_ASSERT(uc);
+			/* NOT REACHED */
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
+
+		stage++;
+	}
+
+done:
+	cancel_join_vcpu_thread(threads[0], vcpu[1]);
+	cancel_join_vcpu_thread(threads[1], vcpu[2]);
+	kvm_vm_free(vm);
+
+	return 0;
+}
-- 
GitLab


From 1ad51c0c0cdd5315405d1c93a345635451c245bb Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:16 +0100
Subject: [PATCH 1918/4122] KVM: selftests: Sync 'struct hv_enlightened_vmcs'
 definition with hyperv-tlfs.h

'struct hv_enlightened_vmcs' definition in selftests is not '__packed'
and so we rely on the compiler doing the right padding. This is not
obvious so it seems beneficial to use the same definition as in kernel.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-39-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/evmcs.h | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 58db74f68af25..4b6840df29793 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -41,6 +41,8 @@ struct hv_enlightened_vmcs {
 	u16 host_gs_selector;
 	u16 host_tr_selector;
 
+	u16 padding16_1;
+
 	u64 host_ia32_pat;
 	u64 host_ia32_efer;
 
@@ -159,7 +161,7 @@ struct hv_enlightened_vmcs {
 	u64 ept_pointer;
 
 	u16 virtual_processor_id;
-	u16 padding16[3];
+	u16 padding16_2[3];
 
 	u64 padding64_2[5];
 	u64 guest_physical_address;
@@ -195,13 +197,13 @@ struct hv_enlightened_vmcs {
 	u64 guest_rip;
 
 	u32 hv_clean_fields;
-	u32 hv_padding_32;
+	u32 padding32_1;
 	u32 hv_synthetic_controls;
 	struct {
 		u32 nested_flush_hypercall:1;
 		u32 msr_bitmap:1;
 		u32 reserved:30;
-	} hv_enlightenments_control;
+	}  __packed hv_enlightenments_control;
 	u32 hv_vp_id;
 	u32 padding32_2;
 	u64 hv_vm_id;
@@ -222,7 +224,7 @@ struct hv_enlightened_vmcs {
 	u64 host_ssp;
 	u64 host_ia32_int_ssp_table_addr;
 	u64 padding64_6;
-};
+} __packed;
 
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE                     0
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP                BIT(0)
-- 
GitLab


From d7b14a868ac2122459b2d702fe05c75c48a687bf Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:17 +0100
Subject: [PATCH 1919/4122] KVM: selftests: Sync 'struct hv_vp_assist_page'
 definition with hyperv-tlfs.h

'struct hv_vp_assist_page' definition doesn't match TLFS. Also, define
'struct hv_nested_enlightenments_control' and use it instead of opaque
'__u64'.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-40-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/evmcs.h      | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 4b6840df29793..efdc62704f276 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -20,14 +20,26 @@
 
 extern bool enable_evmcs;
 
+struct hv_nested_enlightenments_control {
+	struct {
+		__u32 directhypercall:1;
+		__u32 reserved:31;
+	} features;
+	struct {
+		__u32 reserved;
+	} hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
 struct hv_vp_assist_page {
 	__u32 apic_assist;
-	__u32 reserved;
-	__u64 vtl_control[2];
-	__u64 nested_enlightenments_control[2];
-	__u32 enlighten_vmentry;
+	__u32 reserved1;
+	__u64 vtl_control[3];
+	struct hv_nested_enlightenments_control nested_control;
+	__u8 enlighten_vmentry;
+	__u8 reserved2[7];
 	__u64 current_nested_vmcs;
-};
+} __packed;
 
 struct hv_enlightened_vmcs {
 	u32 revision_id;
-- 
GitLab


From e8f3d23c02d09210a980ef211b3e8a99d44cb602 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:18 +0100
Subject: [PATCH 1920/4122] KVM: selftests: Move Hyper-V VP assist page
 enablement out of evmcs.h

Hyper-V VP assist page is not eVMCS specific, it is also used for
enlightened nSVM. Move the code to vendor neutral place.

Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-41-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/Makefile          |  1 +
 .../selftests/kvm/include/x86_64/evmcs.h      | 40 +------------------
 .../selftests/kvm/include/x86_64/hyperv.h     | 31 ++++++++++++++
 .../testing/selftests/kvm/lib/x86_64/hyperv.c | 21 ++++++++++
 .../testing/selftests/kvm/x86_64/evmcs_test.c |  1 +
 5 files changed, 56 insertions(+), 38 deletions(-)
 create mode 100644 tools/testing/selftests/kvm/lib/x86_64/hyperv.c

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 058b15213c5dd..246d52e9df60d 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -53,6 +53,7 @@ LIBKVM_STRING += lib/string_override.c
 
 LIBKVM_x86_64 += lib/x86_64/apic.c
 LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/hyperv.c
 LIBKVM_x86_64 += lib/x86_64/memstress.c
 LIBKVM_x86_64 += lib/x86_64/processor.c
 LIBKVM_x86_64 += lib/x86_64/svm.c
diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index efdc62704f276..2530b5aeb4bae 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -10,6 +10,7 @@
 #define SELFTEST_KVM_EVMCS_H
 
 #include <stdint.h>
+#include "hyperv.h"
 #include "vmx.h"
 
 #define u16 uint16_t
@@ -20,27 +21,6 @@
 
 extern bool enable_evmcs;
 
-struct hv_nested_enlightenments_control {
-	struct {
-		__u32 directhypercall:1;
-		__u32 reserved:31;
-	} features;
-	struct {
-		__u32 reserved;
-	} hypercallControls;
-} __packed;
-
-/* Define virtual processor assist page structure. */
-struct hv_vp_assist_page {
-	__u32 apic_assist;
-	__u32 reserved1;
-	__u64 vtl_control[3];
-	struct hv_nested_enlightenments_control nested_control;
-	__u8 enlighten_vmentry;
-	__u8 reserved2[7];
-	__u64 current_nested_vmcs;
-} __packed;
-
 struct hv_enlightened_vmcs {
 	u32 revision_id;
 	u32 abort;
@@ -257,29 +237,13 @@ struct hv_enlightened_vmcs {
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
 
-#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
-		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
-
 extern struct hv_enlightened_vmcs *current_evmcs;
-extern struct hv_vp_assist_page *current_vp_assist;
 
 int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
 
-static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+static inline void evmcs_enable(void)
 {
-	u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
-		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
-	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
-	current_vp_assist = vp_assist;
-
 	enable_evmcs = true;
-
-	return 0;
 }
 
 static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index ae945f7408352..ba38fa347cba3 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -237,4 +237,35 @@ static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
 /* Proper HV_X64_MSR_GUEST_OS_ID value */
 #define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
 
+#define HV_X64_MSR_VP_ASSIST_PAGE		0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE	0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT	12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK	\
+		(~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+	struct {
+		__u32 directhypercall:1;
+		__u32 reserved:31;
+	} features;
+	struct {
+		__u32 reserved;
+	} hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+	__u32 apic_assist;
+	__u32 reserved1;
+	__u64 vtl_control[3];
+	struct hv_nested_enlightenments_control nested_control;
+	__u8 enlighten_vmentry;
+	__u8 reserved2[7];
+	__u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
 #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
new file mode 100644
index 0000000000000..32dc0afd9e5bf
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+	uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+		HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+	wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+	current_vp_assist = vp_assist;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 99bc202243d23..9007fb04343bc 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -79,6 +79,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(2);
 
 	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+	evmcs_enable();
 
 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
-- 
GitLab


From cd8f11bd6bbd00565cf39c6335cf2788795fdca7 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:19 +0100
Subject: [PATCH 1921/4122] KVM: selftests: Split off load_evmcs() from
 load_vmcs()

In preparation to putting Hyper-V specific test pages to a dedicated
struct, move eVMCS load logic from load_vmcs(). Tests call load_vmcs()
directly and the only one which needs 'enlightened' version is
evmcs_test so there's not much gain in having this merged.

Temporary pass both GPA and HVA to load_evmcs().

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-42-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/evmcs.h      | 10 ++++++
 tools/testing/selftests/kvm/lib/x86_64/vmx.c  | 32 +++++++------------
 .../testing/selftests/kvm/x86_64/evmcs_test.c |  4 +--
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 2530b5aeb4bae..59b60d45b8f6a 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -256,6 +256,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
 	return 0;
 }
 
+static inline bool load_evmcs(uint64_t enlightened_vmcs_gpa, void *enlightened_vmcs)
+{
+	if (evmcs_vmptrld(enlightened_vmcs_gpa, enlightened_vmcs))
+		return false;
+
+	current_evmcs->revision_id = EVMCS_VERSION;
+
+	return true;
+}
+
 static inline int evmcs_vmptrst(uint64_t *value)
 {
 	*value = current_vp_assist->current_nested_vmcs &
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 3e4ea846366cb..318ee4658f0b9 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -171,26 +171,18 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
 
 bool load_vmcs(struct vmx_pages *vmx)
 {
-	if (!enable_evmcs) {
-		/* Load a VMCS. */
-		*(uint32_t *)(vmx->vmcs) = vmcs_revision();
-		if (vmclear(vmx->vmcs_gpa))
-			return false;
-
-		if (vmptrld(vmx->vmcs_gpa))
-			return false;
-
-		/* Setup shadow VMCS, do not load it yet. */
-		*(uint32_t *)(vmx->shadow_vmcs) =
-			vmcs_revision() | 0x80000000ul;
-		if (vmclear(vmx->shadow_vmcs_gpa))
-			return false;
-	} else {
-		if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
-				  vmx->enlightened_vmcs))
-			return false;
-		current_evmcs->revision_id = EVMCS_VERSION;
-	}
+	/* Load a VMCS. */
+	*(uint32_t *)(vmx->vmcs) = vmcs_revision();
+	if (vmclear(vmx->vmcs_gpa))
+		return false;
+
+	if (vmptrld(vmx->vmcs_gpa))
+		return false;
+
+	/* Setup shadow VMCS, do not load it yet. */
+	*(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+	if (vmclear(vmx->shadow_vmcs_gpa))
+		return false;
 
 	return true;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 9007fb04343bc..5a4c8b1873aab 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -81,10 +81,10 @@ void guest_code(struct vmx_pages *vmx_pages)
 	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
 	evmcs_enable();
 
-	GUEST_ASSERT(vmx_pages->vmcs_gpa);
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
 	GUEST_SYNC(3);
-	GUEST_ASSERT(load_vmcs(vmx_pages));
+	GUEST_ASSERT(load_evmcs(vmx_pages->enlightened_vmcs_gpa,
+				vmx_pages->enlightened_vmcs));
 	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
 
 	GUEST_SYNC(4);
-- 
GitLab


From 2dc458b8622182ac4d89de793c548cf04f632801 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:20 +0100
Subject: [PATCH 1922/4122] KVM: selftests: Create a vendor independent helper
 to allocate Hyper-V specific test pages

There's no need to pollute VMX and SVM code with Hyper-V specific
stuff and allocate Hyper-V specific test pages for all test as only
few really need them. Create a dedicated struct and an allocation
helper.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-43-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/evmcs.h      |  4 ++--
 .../selftests/kvm/include/x86_64/hyperv.h     | 15 +++++++++++++
 .../selftests/kvm/include/x86_64/vmx.h        |  8 -------
 .../testing/selftests/kvm/lib/x86_64/hyperv.c | 20 +++++++++++++++++
 tools/testing/selftests/kvm/lib/x86_64/vmx.c  | 12 ----------
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 22 +++++++++----------
 6 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 59b60d45b8f6a..94d6059e9a12d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -256,9 +256,9 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
 	return 0;
 }
 
-static inline bool load_evmcs(uint64_t enlightened_vmcs_gpa, void *enlightened_vmcs)
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
 {
-	if (evmcs_vmptrld(enlightened_vmcs_gpa, enlightened_vmcs))
+	if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
 		return false;
 
 	current_evmcs->revision_id = EVMCS_VERSION;
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index ba38fa347cba3..becdd8245e846 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -268,4 +268,19 @@ extern struct hv_vp_assist_page *current_vp_assist;
 
 int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
 
+struct hyperv_test_pages {
+	/* VP assist page */
+	void *vp_assist_hva;
+	uint64_t vp_assist_gpa;
+	void *vp_assist;
+
+	/* Enlightened VMCS */
+	void *enlightened_vmcs_hva;
+	uint64_t enlightened_vmcs_gpa;
+	void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+						       vm_vaddr_t *p_hv_pages_gva);
+
 #endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index e9c96b49966a6..ef784bd6dfc2d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -517,14 +517,6 @@ struct vmx_pages {
 	uint64_t vmwrite_gpa;
 	void *vmwrite;
 
-	void *vp_assist_hva;
-	uint64_t vp_assist_gpa;
-	void *vp_assist;
-
-	void *enlightened_vmcs_hva;
-	uint64_t enlightened_vmcs_gpa;
-	void *enlightened_vmcs;
-
 	void *eptp_hva;
 	uint64_t eptp_gpa;
 	void *eptp;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
index 32dc0afd9e5bf..a2fc083c65efe 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -8,6 +8,26 @@
 #include "processor.h"
 #include "hyperv.h"
 
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+						       vm_vaddr_t *p_hv_pages_gva)
+{
+	vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+	struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+	/* Setup of a region of guest memory for the VP Assist page. */
+	hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+	hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+	hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+	/* Setup of a region of guest memory for the enlightened VMCS. */
+	hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+	hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+	hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+	*p_hv_pages_gva = hv_pages_gva;
+	return hv;
+}
+
 int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
 {
 	uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index 318ee4658f0b9..59d97531c9b17 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -109,18 +109,6 @@ vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
 	vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
 	memset(vmx->vmwrite_hva, 0, getpagesize());
 
-	/* Setup of a region of guest memory for the VP Assist page. */
-	vmx->vp_assist = (void *)vm_vaddr_alloc_page(vm);
-	vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
-	vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
-
-	/* Setup of a region of guest memory for the enlightened VMCS. */
-	vmx->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
-	vmx->enlightened_vmcs_hva =
-		addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
-	vmx->enlightened_vmcs_gpa =
-		addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
-
 	*p_vmx_gva = vmx_gva;
 	return vmx;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 5a4c8b1873aab..74f076ba574b3 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -68,7 +68,7 @@ void l2_guest_code(void)
 	vmcall();
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
@@ -78,23 +78,22 @@ void guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(1);
 	GUEST_SYNC(2);
 
-	enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
+	enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
 	evmcs_enable();
 
 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
 	GUEST_SYNC(3);
-	GUEST_ASSERT(load_evmcs(vmx_pages->enlightened_vmcs_gpa,
-				vmx_pages->enlightened_vmcs));
-	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(load_evmcs(hv_pages));
+	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
 
 	GUEST_SYNC(4);
-	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
 
 	prepare_vmcs(vmx_pages, l2_guest_code,
 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
 	GUEST_SYNC(5);
-	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
 	current_evmcs->revision_id = -1u;
 	GUEST_ASSERT(vmlaunch());
 	current_evmcs->revision_id = EVMCS_VERSION;
@@ -104,7 +103,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 		PIN_BASED_NMI_EXITING);
 
 	GUEST_ASSERT(!vmlaunch());
-	GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
+	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
 
 	/*
 	 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
@@ -152,7 +151,7 @@ void guest_code(struct vmx_pages *vmx_pages)
 	GUEST_SYNC(11);
 
 	/* Try enlightened vmptrld with an incorrect GPA */
-	evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+	evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
 	GUEST_ASSERT(vmlaunch());
 	GUEST_ASSERT(ud_count == 1);
 	GUEST_DONE();
@@ -199,7 +198,7 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t vmx_pages_gva = 0;
+	vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -217,7 +216,8 @@ int main(int argc, char *argv[])
 	vcpu_enable_evmcs(vcpu);
 
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
-	vcpu_args_set(vcpu, 1, vmx_pages_gva);
+	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+	vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vcpu);
-- 
GitLab


From 6c15c3c46520374e1a144942e5228f963f5eb2d5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:21 +0100
Subject: [PATCH 1923/4122] KVM: selftests: Allocate Hyper-V partition assist
 page

In preparation to testing Hyper-V L2 TLB flush hypercalls, allocate
so-called Partition assist page.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-44-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/include/x86_64/hyperv.h | 5 +++++
 tools/testing/selftests/kvm/lib/x86_64/hyperv.c     | 5 +++++
 2 files changed, 10 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
index becdd8245e846..9218bb5f44bfd 100644
--- a/tools/testing/selftests/kvm/include/x86_64/hyperv.h
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -274,6 +274,11 @@ struct hyperv_test_pages {
 	uint64_t vp_assist_gpa;
 	void *vp_assist;
 
+	/* Partition assist page */
+	void *partition_assist_hva;
+	uint64_t partition_assist_gpa;
+	void *partition_assist;
+
 	/* Enlightened VMCS */
 	void *enlightened_vmcs_hva;
 	uint64_t enlightened_vmcs_gpa;
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
index a2fc083c65efe..efb7e7a1354dc 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -19,6 +19,11 @@ struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
 	hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
 	hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
 
+	/* Setup of a region of guest memory for the partition assist page. */
+	hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+	hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+	hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
 	/* Setup of a region of guest memory for the enlightened VMCS. */
 	hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
 	hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
-- 
GitLab


From 8fda37cf3d41f1dfb0667c4b10e3dd01d17735b8 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:22 +0100
Subject: [PATCH 1924/4122] KVM: selftests: Stuff RAX/RCX with 'safe' values in
 vmmcall()/vmcall()

vmmcall()/vmcall() are used to exit from L2 to L1 and no concrete hypercall
ABI is currenty followed. With the introduction of Hyper-V L2 TLB flush
it becomes (theoretically) possible that L0 will take responsibility for
handling the call and no L1 exit will happen. Prevent this by stuffing RAX
(KVM ABI) and RCX (Hyper-V ABI) with 'safe' values.

While on it, convert vmmcall() to 'static inline', make it setup stack
frame and move to include/x86_64/svm_util.h.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-45-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/processor.h      |  5 -----
 .../selftests/kvm/include/x86_64/svm_util.h       | 14 ++++++++++++++
 tools/testing/selftests/kvm/include/x86_64/vmx.h  | 15 ++++++++++-----
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index a10f39affa45e..5d310abe6c3f4 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -677,11 +677,6 @@ static inline void cpu_relax(void)
 	asm volatile("rep; nop" ::: "memory");
 }
 
-#define vmmcall()		\
-	__asm__ __volatile__(	\
-		"vmmcall\n"	\
-		)
-
 #define ud2()			\
 	__asm__ __volatile__(	\
 		"ud2\n"	\
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index 7aee6244ab6ac..044f0f872ba9d 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -32,6 +32,20 @@ struct svm_test_data {
 	uint64_t msr_gpa;
 };
 
+static inline void vmmcall(void)
+{
+	/*
+	 * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+	 * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+	 * use of this function is to exit to L1 from L2.  Clobber all other
+	 * GPRs as L1 doesn't correctly preserve them during vmexits.
+	 */
+	__asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+			     : : "a"(0xdeadbeef), "c"(0xbeefdead)
+			     : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+			       "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
 #define stgi()			\
 	__asm__ __volatile__(	\
 		"stgi\n"	\
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index ef784bd6dfc2d..5f0c0a29c556e 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -437,11 +437,16 @@ static inline int vmresume(void)
 
 static inline void vmcall(void)
 {
-	/* Currently, L1 destroys our GPRs during vmexits.  */
-	__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
-			     "rax", "rbx", "rcx", "rdx",
-			     "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
-			     "r13", "r14", "r15");
+	/*
+	 * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+	 * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+	 * use of this function is to exit to L1 from L2.  Clobber all other
+	 * GPRs as L1 doesn't correctly preserve them during vmexits.
+	 */
+	__asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+			     : : "a"(0xdeadbeef), "c"(0xbeefdead)
+			     : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+			       "r10", "r11", "r12", "r13", "r14", "r15");
 }
 
 static inline int vmread(uint64_t encoding, uint64_t *value)
-- 
GitLab


From 75ee7505feae16bbfbed62115e04f762047c4765 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:23 +0100
Subject: [PATCH 1925/4122] KVM: selftests: Introduce rdmsr_from_l2() and use
 it for MSR-Bitmap tests

Hyper-V MSR-Bitmap tests do RDMSR from L2 to exit to L1. While 'evmcs_test'
correctly clobbers all GPRs (which are not preserved), 'hyperv_svm_test'
does not. Introduce a more generic rdmsr_from_l2() to avoid code
duplication and remove hardcoding of MSRs.  Do not put it in common code
because it is really just a selftests bug rather than a processor
feature that requires it.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-46-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 27 +++++++------------
 .../selftests/kvm/x86_64/hyperv_svm_test.c    | 17 +++++++++---
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 74f076ba574b3..58fa98512c248 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -30,22 +30,15 @@ static void guest_nmi_handler(struct ex_regs *regs)
 {
 }
 
-/* Exits to L1 destroy GRPs! */
-static inline void rdmsr_fs_base(void)
+static inline void rdmsr_from_l2(uint32_t msr)
 {
-	__asm__ __volatile__ ("mov $0xc0000100, %%rcx; rdmsr" : : :
-			      "rax", "rbx", "rcx", "rdx",
-			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
-			      "r13", "r14", "r15");
-}
-static inline void rdmsr_gs_base(void)
-{
-	__asm__ __volatile__ ("mov $0xc0000101, %%rcx; rdmsr" : : :
-			      "rax", "rbx", "rcx", "rdx",
-			      "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
-			      "r13", "r14", "r15");
+	/* Currently, L1 doesn't preserve GPRs during vmexits. */
+	__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+			      "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+			      "r10", "r11", "r12", "r13", "r14", "r15");
 }
 
+/* Exit to L1 from L2 with RDMSR instruction */
 void l2_guest_code(void)
 {
 	GUEST_SYNC(7);
@@ -58,11 +51,11 @@ void l2_guest_code(void)
 	vmcall();
 
 	/* MSR-Bitmap tests */
-	rdmsr_fs_base(); /* intercepted */
-	rdmsr_fs_base(); /* intercepted */
-	rdmsr_gs_base(); /* not intercepted */
+	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+	rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
 	vmcall();
-	rdmsr_gs_base(); /* intercepted */
+	rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
 
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 1c3fc38b4f151..3c9a2a1b4cfd8 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -23,6 +23,15 @@
 
 #define L2_GUEST_STACK_SIZE 256
 
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+	/* Currently, L1 doesn't preserve GPRs during vmexits. */
+	__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+			      "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+			      "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
 void l2_guest_code(void)
 {
 	GUEST_SYNC(3);
@@ -30,11 +39,11 @@ void l2_guest_code(void)
 	vmmcall();
 
 	/* MSR-Bitmap tests */
-	rdmsr(MSR_FS_BASE); /* intercepted */
-	rdmsr(MSR_FS_BASE); /* intercepted */
-	rdmsr(MSR_GS_BASE); /* not intercepted */
+	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+	rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
 	vmmcall();
-	rdmsr(MSR_GS_BASE); /* intercepted */
+	rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
 
 	GUEST_SYNC(5);
 
-- 
GitLab


From 4b5d8b222bf185bda25b56de403afde7b6d3c466 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:24 +0100
Subject: [PATCH 1926/4122] KVM: selftests: evmcs_test: Introduce L2 TLB flush
 test

Enable Hyper-V L2 TLB flush and check that Hyper-V TLB flush hypercalls
from L2 don't exit to L1 unless 'TlbLockCount' is set in the
Partition assist page.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-47-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/evmcs.h      |  2 +
 .../testing/selftests/kvm/x86_64/evmcs_test.c | 50 ++++++++++++++++++-
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index 94d6059e9a12d..901caf0e0939b 100644
--- a/tools/testing/selftests/kvm/include/x86_64/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -237,6 +237,8 @@ struct hv_enlightened_vmcs {
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL    BIT(15)
 #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL                      0xFFFF
 
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
+
 extern struct hv_enlightened_vmcs *current_evmcs;
 
 int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
index 58fa98512c248..ba09d300c9539 100644
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -16,6 +16,7 @@
 
 #include "kvm_util.h"
 
+#include "hyperv.h"
 #include "vmx.h"
 
 static int ud_count;
@@ -41,6 +42,8 @@ static inline void rdmsr_from_l2(uint32_t msr)
 /* Exit to L1 from L2 with RDMSR instruction */
 void l2_guest_code(void)
 {
+	u64 unused;
+
 	GUEST_SYNC(7);
 
 	GUEST_SYNC(8);
@@ -57,15 +60,31 @@ void l2_guest_code(void)
 	vmcall();
 	rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
 
+	/* L2 TLB flush tests */
+	hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+			 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+	rdmsr_from_l2(MSR_FS_BASE);
+	/*
+	 * Note: hypercall status (RAX) is not preserved correctly by L1 after
+	 * synthetic vmexit, use unchecked version.
+	 */
+	__hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+			   HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+			   &unused);
+
 	/* Done, exit to L1 and never come back.  */
 	vmcall();
 }
 
-void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages)
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+		vm_vaddr_t hv_hcall_page_gpa)
 {
 #define L2_GUEST_STACK_SIZE 64
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
 	x2apic_enable();
 
 	GUEST_SYNC(1);
@@ -95,7 +114,17 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages)
 	vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
 		PIN_BASED_NMI_EXITING);
 
+	/* L2 TLB flush setup */
+	current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+	current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+	current_evmcs->hv_vm_id = 1;
+	current_evmcs->hv_vp_id = 1;
+	current_vp_assist->nested_control.features.directhypercall = 1;
+	*(u32 *)(hv_pages->partition_assist) = 0;
+
 	GUEST_ASSERT(!vmlaunch());
+	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+	GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
 	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
 
 	/*
@@ -139,6 +168,18 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages)
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
 	current_evmcs->guest_rip += 2; /* rdmsr */
 
+	/*
+	 * L2 TLB flush test. First VMCALL should be handled directly by L0,
+	 * no VMCALL exit expected.
+	 */
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+	current_evmcs->guest_rip += 2; /* rdmsr */
+	/* Enable synthetic vmexit */
+	*(u32 *)(hv_pages->partition_assist) = 1;
+	GUEST_ASSERT(!vmresume());
+	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
 	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
 	GUEST_SYNC(11);
@@ -192,6 +233,7 @@ static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
 int main(int argc, char *argv[])
 {
 	vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+	vm_vaddr_t hcall_page;
 
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
@@ -205,12 +247,16 @@ int main(int argc, char *argv[])
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
 
+	hcall_page = vm_vaddr_alloc_pages(vm, 1);
+	memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
 	vcpu_set_hv_cpuid(vcpu);
 	vcpu_enable_evmcs(vcpu);
 
 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
 	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
-	vcpu_args_set(vcpu, 2, vmx_pages_gva, hv_pages_gva);
+	vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+	vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
 
 	vm_init_descriptor_tables(vm);
 	vcpu_init_descriptor_tables(vcpu);
-- 
GitLab


From 9c2e881945dca4904e8817acf4f0a928570bd400 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:25 +0100
Subject: [PATCH 1927/4122] KVM: selftests: hyperv_svm_test: Introduce L2 TLB
 flush test

Enable Hyper-V L2 TLB flush and check that Hyper-V TLB flush hypercalls
from L2 don't exit to L1 unless 'TlbLockCount' is set in the Partition
assist page.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-48-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/include/x86_64/svm.h        |  4 ++
 .../selftests/kvm/x86_64/hyperv_svm_test.c    | 59 +++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index 483e6ae12f69e..4803e10560558 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -76,6 +76,10 @@ struct hv_vmcb_enlightenments {
  */
 #define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
 
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL			0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH	(1)
+
 struct __attribute__ ((__packed__)) vmcb_control_area {
 	u32 intercept_cr;
 	u32 intercept_dr;
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 3c9a2a1b4cfd8..3b3cc94ba8e47 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -34,6 +34,8 @@ static inline void rdmsr_from_l2(uint32_t msr)
 
 void l2_guest_code(void)
 {
+	u64 unused;
+
 	GUEST_SYNC(3);
 	/* Exit to L1 */
 	vmmcall();
@@ -47,11 +49,28 @@ void l2_guest_code(void)
 
 	GUEST_SYNC(5);
 
+	/* L2 TLB flush tests */
+	hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+			 HV_HYPERCALL_FAST_BIT, 0x0,
+			 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			 HV_FLUSH_ALL_PROCESSORS);
+	rdmsr_from_l2(MSR_FS_BASE);
+	/*
+	 * Note: hypercall status (RAX) is not preserved correctly by L1 after
+	 * synthetic vmexit, use unchecked version.
+	 */
+	__hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+			   HV_HYPERCALL_FAST_BIT, 0x0,
+			   HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+			   HV_FLUSH_ALL_PROCESSORS, &unused);
+
 	/* Done, exit to L1 and never come back.  */
 	vmmcall();
 }
 
-static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+						    struct hyperv_test_pages *hv_pages,
+						    vm_vaddr_t pgs_gpa)
 {
 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 	struct vmcb *vmcb = svm->vmcb;
@@ -59,13 +78,23 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 
 	GUEST_SYNC(1);
 
-	wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+	wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+	enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
 
 	GUEST_ASSERT(svm->vmcb_gpa);
 	/* Prepare for L2 execution. */
 	generic_svm_setup(svm, l2_guest_code,
 			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
 
+	/* L2 TLB flush setup */
+	hve->partition_assist_page = hv_pages->partition_assist_gpa;
+	hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+	hve->hv_vm_id = 1;
+	hve->hv_vp_id = 1;
+	current_vp_assist->nested_control.features.directhypercall = 1;
+	*(u32 *)(hv_pages->partition_assist) = 0;
+
 	GUEST_SYNC(2);
 	run_guest(vmcb, svm->vmcb_gpa);
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
@@ -100,6 +129,20 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
 	vmcb->save.rip += 2; /* rdmsr */
 
+
+	/*
+	 * L2 TLB flush test. First VMCALL should be handled directly by L0,
+	 * no VMCALL exit expected.
+	 */
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+	vmcb->save.rip += 2; /* rdmsr */
+	/* Enable synthetic vmexit */
+	*(u32 *)(hv_pages->partition_assist) = 1;
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+	GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
 	run_guest(vmcb, svm->vmcb_gpa);
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
 	GUEST_SYNC(6);
@@ -109,8 +152,8 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm)
 
 int main(int argc, char *argv[])
 {
-	vm_vaddr_t nested_gva = 0;
-
+	vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+	vm_vaddr_t hcall_page;
 	struct kvm_vcpu *vcpu;
 	struct kvm_vm *vm;
 	struct kvm_run *run;
@@ -124,7 +167,13 @@ int main(int argc, char *argv[])
 	vcpu_set_hv_cpuid(vcpu);
 	run = vcpu->run;
 	vcpu_alloc_svm(vm, &nested_gva);
-	vcpu_args_set(vcpu, 1, nested_gva);
+	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+	hcall_page = vm_vaddr_alloc_pages(vm, 1);
+	memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
+
+	vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+	vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
 
 	for (stage = 1;; stage++) {
 		vcpu_run(vcpu);
-- 
GitLab


From 0fa32dad1e78629cb42999dacd82489503fdf4c2 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Tue, 1 Nov 2022 15:54:26 +0100
Subject: [PATCH 1928/4122] KVM: selftests: Rename 'evmcs_test' to
 'hyperv_evmcs'

Conform to the rest of Hyper-V emulation selftests which have 'hyperv'
prefix. Get rid of '_test' suffix as well as the purpose of this code
is fairly obvious.

Reviewed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Message-Id: <20221101145426.251680-49-vkuznets@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/.gitignore                          | 2 +-
 tools/testing/selftests/kvm/Makefile                            | 2 +-
 .../selftests/kvm/x86_64/{evmcs_test.c => hyperv_evmcs.c}       | 0
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename tools/testing/selftests/kvm/x86_64/{evmcs_test.c => hyperv_evmcs.c} (100%)

diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index dc7e28cf2da07..082855d94c722 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -16,7 +16,6 @@
 /x86_64/cpuid_test
 /x86_64/cr4_cpuid_sync_test
 /x86_64/debug_regs
-/x86_64/evmcs_test
 /x86_64/exit_on_emulation_failure_test
 /x86_64/fix_hypercall_test
 /x86_64/get_msr_index_features
@@ -24,6 +23,7 @@
 /x86_64/kvm_pv_test
 /x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
+/x86_64/hyperv_evmcs
 /x86_64/hyperv_features
 /x86_64/hyperv_ipi
 /x86_64/hyperv_svm_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 246d52e9df60d..2275ba861e0e5 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -82,11 +82,11 @@ TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
 TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
-TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
 TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
similarity index 100%
rename from tools/testing/selftests/kvm/x86_64/evmcs_test.c
rename to tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
-- 
GitLab


From 28b4b0596343d19d140da059eee0e5c2b5328731 Mon Sep 17 00:00:00 2001
From: Long Li <leo.lilong@huawei.com>
Date: Thu, 17 Nov 2022 13:02:56 -0800
Subject: [PATCH 1929/4122] xfs: fix incorrect i_nlink caused by inode racing

The following error occurred during the fsstress test:

XFS: Assertion failed: VFS_I(ip)->i_nlink >= 2, file: fs/xfs/xfs_inode.c, line: 2452

The problem was that inode race condition causes incorrect i_nlink to be
written to disk, and then it is read into memory. Consider the following
call graph, inodes that are marked as both XFS_IFLUSHING and
XFS_IRECLAIMABLE, i_nlink will be reset to 1 and then restored to original
value in xfs_reinit_inode(). Therefore, the i_nlink of directory on disk
may be set to 1.

  xfsaild
      xfs_inode_item_push
          xfs_iflush_cluster
              xfs_iflush
                  xfs_inode_to_disk

  xfs_iget
      xfs_iget_cache_hit
          xfs_iget_recycle
              xfs_reinit_inode
                  inode_init_always

xfs_reinit_inode() needs to hold the ILOCK_EXCL as it is changing internal
inode state and can race with other RCU protected inode lookups. On the
read side, xfs_iflush_cluster() grabs the ILOCK_SHARED while under rcu +
ip->i_flags_lock, and so xfs_iflush/xfs_inode_to_disk() are protected from
racing inode updates (during transactions) by that lock.

Fixes: ff7bebeb91f8 ("xfs: refactor the inode recycling code") # goes further back than this
Signed-off-by: Long Li <leo.lilong@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_icache.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index eae7427062cf9..f35e2cee52655 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -342,6 +342,9 @@ xfs_iget_recycle(
 
 	trace_xfs_iget_recycle(ip);
 
+	if (!xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+		return -EAGAIN;
+
 	/*
 	 * We need to make it look like the inode is being reclaimed to prevent
 	 * the actual reclaim workers from stomping over us while we recycle
@@ -355,6 +358,7 @@ xfs_iget_recycle(
 
 	ASSERT(!rwsem_is_locked(&inode->i_rwsem));
 	error = xfs_reinit_inode(mp, inode);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	if (error) {
 		/*
 		 * Re-initializing the inode failed, and we are in deep
@@ -518,6 +522,8 @@ xfs_iget_cache_hit(
 	if (ip->i_flags & XFS_IRECLAIMABLE) {
 		/* Drops i_flags_lock and RCU read lock. */
 		error = xfs_iget_recycle(pag, ip);
+		if (error == -EAGAIN)
+			goto out_skip;
 		if (error)
 			return error;
 	} else {
-- 
GitLab


From 2a402120a8d413238999a67ebff5b7dca0e5d14c Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Wed, 16 Nov 2022 10:45:35 +0100
Subject: [PATCH 1930/4122] IB/isert: use the ISCSI_LOGIN_CURRENT_STAGE macro

Use the proper macro to get the current_stage value.

Link: https://lore.kernel.org/r/20221116094535.138298-1-mlombard@redhat.com
Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/ulp/isert/ib_isert.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c
index b360a1527cd10..75404885cf981 100644
--- a/drivers/infiniband/ulp/isert/ib_isert.c
+++ b/drivers/infiniband/ulp/isert/ib_isert.c
@@ -993,9 +993,8 @@ isert_rx_login_req(struct isert_conn *isert_conn)
 		 * login request PDU.
 		 */
 		login->leading_connection = (!login_req->tsih) ? 1 : 0;
-		login->current_stage =
-			(login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK)
-			 >> 2;
+		login->current_stage = ISCSI_LOGIN_CURRENT_STAGE(
+				login_req->flags);
 		login->version_min	= login_req->min_version;
 		login->version_max	= login_req->max_version;
 		memcpy(login->isid, login_req->isid, 6);
-- 
GitLab


From 5e5ff73c2e5863f93fc5fd78d178cd8f2af12464 Mon Sep 17 00:00:00 2001
From: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Date: Mon, 17 Oct 2022 20:04:50 +0530
Subject: [PATCH 1931/4122] asm-generic/io: Add _RET_IP_ to MMIO trace for more
 accurate debug info

Due to compiler optimizations like inlining, there are cases where
MMIO traces using _THIS_IP_ for caller information might not be
sufficient to provide accurate debug traces.

1) With optimizations (Seen with GCC):

In this case, _THIS_IP_ works fine and prints the caller information
since it will be inlined into the caller and we get the debug traces
on who made the MMIO access, for ex:

rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 addr=0xffff8000087447f4
rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 width=32 val=0x0 addr=0xffff8000087447f4

2) Without optimizations (Seen with Clang):

_THIS_IP_ will not be sufficient in this case as it will print only
the MMIO accessors itself which is of not much use since it is not
inlined as below for example:

rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4
rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4

So in order to handle this second case as well irrespective of the compiler
optimizations, add _RET_IP_ to MMIO trace to make it provide more accurate
debug information in all these scenarios.

Before:

rwmmio_read: readl+0x4/0x80 width=32 addr=0xffff8000087447f4
rwmmio_post_read: readl+0x48/0x80 width=32 val=0x4 addr=0xffff8000087447f4

After:

rwmmio_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 addr=0xffff8000087447f4
rwmmio_post_read: qcom_smmu_tlb_sync+0xe0/0x1b0 -> readl+0x4/0x80 width=32 val=0x0 addr=0xffff8000087447f4

Fixes: 210031971cdd ("asm-generic/io: Add logging support for MMIO accessors")
Signed-off-by: Sai Prakash Ranjan <quic_saipraka@quicinc.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/asm-generic/io.h      | 80 +++++++++++++++++------------------
 include/trace/events/rwmmio.h | 43 ++++++++++++-------
 lib/trace_readwrite.c         | 16 +++----
 3 files changed, 75 insertions(+), 64 deletions(-)

diff --git a/include/asm-generic/io.h b/include/asm-generic/io.h
index a68f8fbf423be..4c44a29b5e8ef 100644
--- a/include/asm-generic/io.h
+++ b/include/asm-generic/io.h
@@ -80,24 +80,24 @@ DECLARE_TRACEPOINT(rwmmio_read);
 DECLARE_TRACEPOINT(rwmmio_post_read);
 
 void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-		    unsigned long caller_addr);
+		    unsigned long caller_addr, unsigned long caller_addr0);
 void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-			 unsigned long caller_addr);
+			 unsigned long caller_addr, unsigned long caller_addr0);
 void log_read_mmio(u8 width, const volatile void __iomem *addr,
-		   unsigned long caller_addr);
+		   unsigned long caller_addr, unsigned long caller_addr0);
 void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
-			unsigned long caller_addr);
+			unsigned long caller_addr, unsigned long caller_addr0);
 
 #else
 
 static inline void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-				  unsigned long caller_addr) {}
+				  unsigned long caller_addr, unsigned long caller_addr0) {}
 static inline void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-				       unsigned long caller_addr) {}
+				       unsigned long caller_addr, unsigned long caller_addr0) {}
 static inline void log_read_mmio(u8 width, const volatile void __iomem *addr,
-				 unsigned long caller_addr) {}
+				 unsigned long caller_addr, unsigned long caller_addr0) {}
 static inline void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
-				      unsigned long caller_addr) {}
+				      unsigned long caller_addr, unsigned long caller_addr0) {}
 
 #endif /* CONFIG_TRACE_MMIO_ACCESS */
 
@@ -188,11 +188,11 @@ static inline u8 readb(const volatile void __iomem *addr)
 {
 	u8 val;
 
-	log_read_mmio(8, addr, _THIS_IP_);
+	log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
 	__io_br();
 	val = __raw_readb(addr);
 	__io_ar(val);
-	log_post_read_mmio(val, 8, addr, _THIS_IP_);
+	log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -203,11 +203,11 @@ static inline u16 readw(const volatile void __iomem *addr)
 {
 	u16 val;
 
-	log_read_mmio(16, addr, _THIS_IP_);
+	log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
 	__io_br();
 	val = __le16_to_cpu((__le16 __force)__raw_readw(addr));
 	__io_ar(val);
-	log_post_read_mmio(val, 16, addr, _THIS_IP_);
+	log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -218,11 +218,11 @@ static inline u32 readl(const volatile void __iomem *addr)
 {
 	u32 val;
 
-	log_read_mmio(32, addr, _THIS_IP_);
+	log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
 	__io_br();
 	val = __le32_to_cpu((__le32 __force)__raw_readl(addr));
 	__io_ar(val);
-	log_post_read_mmio(val, 32, addr, _THIS_IP_);
+	log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -234,11 +234,11 @@ static inline u64 readq(const volatile void __iomem *addr)
 {
 	u64 val;
 
-	log_read_mmio(64, addr, _THIS_IP_);
+	log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
 	__io_br();
 	val = __le64_to_cpu(__raw_readq(addr));
 	__io_ar(val);
-	log_post_read_mmio(val, 64, addr, _THIS_IP_);
+	log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -248,11 +248,11 @@ static inline u64 readq(const volatile void __iomem *addr)
 #define writeb writeb
 static inline void writeb(u8 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 8, addr, _THIS_IP_);
+	log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
 	__io_bw();
 	__raw_writeb(value, addr);
 	__io_aw();
-	log_post_write_mmio(value, 8, addr, _THIS_IP_);
+	log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -260,11 +260,11 @@ static inline void writeb(u8 value, volatile void __iomem *addr)
 #define writew writew
 static inline void writew(u16 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 16, addr, _THIS_IP_);
+	log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
 	__io_bw();
 	__raw_writew((u16 __force)cpu_to_le16(value), addr);
 	__io_aw();
-	log_post_write_mmio(value, 16, addr, _THIS_IP_);
+	log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -272,11 +272,11 @@ static inline void writew(u16 value, volatile void __iomem *addr)
 #define writel writel
 static inline void writel(u32 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 32, addr, _THIS_IP_);
+	log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 	__io_bw();
 	__raw_writel((u32 __force)__cpu_to_le32(value), addr);
 	__io_aw();
-	log_post_write_mmio(value, 32, addr, _THIS_IP_);
+	log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -285,11 +285,11 @@ static inline void writel(u32 value, volatile void __iomem *addr)
 #define writeq writeq
 static inline void writeq(u64 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 64, addr, _THIS_IP_);
+	log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 	__io_bw();
 	__raw_writeq(__cpu_to_le64(value), addr);
 	__io_aw();
-	log_post_write_mmio(value, 64, addr, _THIS_IP_);
+	log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 #endif /* CONFIG_64BIT */
@@ -305,9 +305,9 @@ static inline u8 readb_relaxed(const volatile void __iomem *addr)
 {
 	u8 val;
 
-	log_read_mmio(8, addr, _THIS_IP_);
+	log_read_mmio(8, addr, _THIS_IP_, _RET_IP_);
 	val = __raw_readb(addr);
-	log_post_read_mmio(val, 8, addr, _THIS_IP_);
+	log_post_read_mmio(val, 8, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -318,9 +318,9 @@ static inline u16 readw_relaxed(const volatile void __iomem *addr)
 {
 	u16 val;
 
-	log_read_mmio(16, addr, _THIS_IP_);
+	log_read_mmio(16, addr, _THIS_IP_, _RET_IP_);
 	val = __le16_to_cpu(__raw_readw(addr));
-	log_post_read_mmio(val, 16, addr, _THIS_IP_);
+	log_post_read_mmio(val, 16, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -331,9 +331,9 @@ static inline u32 readl_relaxed(const volatile void __iomem *addr)
 {
 	u32 val;
 
-	log_read_mmio(32, addr, _THIS_IP_);
+	log_read_mmio(32, addr, _THIS_IP_, _RET_IP_);
 	val = __le32_to_cpu(__raw_readl(addr));
-	log_post_read_mmio(val, 32, addr, _THIS_IP_);
+	log_post_read_mmio(val, 32, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -344,9 +344,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr)
 {
 	u64 val;
 
-	log_read_mmio(64, addr, _THIS_IP_);
+	log_read_mmio(64, addr, _THIS_IP_, _RET_IP_);
 	val = __le64_to_cpu(__raw_readq(addr));
-	log_post_read_mmio(val, 64, addr, _THIS_IP_);
+	log_post_read_mmio(val, 64, addr, _THIS_IP_, _RET_IP_);
 	return val;
 }
 #endif
@@ -355,9 +355,9 @@ static inline u64 readq_relaxed(const volatile void __iomem *addr)
 #define writeb_relaxed writeb_relaxed
 static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 8, addr, _THIS_IP_);
+	log_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
 	__raw_writeb(value, addr);
-	log_post_write_mmio(value, 8, addr, _THIS_IP_);
+	log_post_write_mmio(value, 8, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -365,9 +365,9 @@ static inline void writeb_relaxed(u8 value, volatile void __iomem *addr)
 #define writew_relaxed writew_relaxed
 static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 16, addr, _THIS_IP_);
+	log_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
 	__raw_writew(cpu_to_le16(value), addr);
-	log_post_write_mmio(value, 16, addr, _THIS_IP_);
+	log_post_write_mmio(value, 16, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -375,9 +375,9 @@ static inline void writew_relaxed(u16 value, volatile void __iomem *addr)
 #define writel_relaxed writel_relaxed
 static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 32, addr, _THIS_IP_);
+	log_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 	__raw_writel(__cpu_to_le32(value), addr);
-	log_post_write_mmio(value, 32, addr, _THIS_IP_);
+	log_post_write_mmio(value, 32, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
@@ -385,9 +385,9 @@ static inline void writel_relaxed(u32 value, volatile void __iomem *addr)
 #define writeq_relaxed writeq_relaxed
 static inline void writeq_relaxed(u64 value, volatile void __iomem *addr)
 {
-	log_write_mmio(value, 64, addr, _THIS_IP_);
+	log_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 	__raw_writeq(__cpu_to_le64(value), addr);
-	log_post_write_mmio(value, 64, addr, _THIS_IP_);
+	log_post_write_mmio(value, 64, addr, _THIS_IP_, _RET_IP_);
 }
 #endif
 
diff --git a/include/trace/events/rwmmio.h b/include/trace/events/rwmmio.h
index de41159216c19..a43e5dd7436b5 100644
--- a/include/trace/events/rwmmio.h
+++ b/include/trace/events/rwmmio.h
@@ -12,12 +12,14 @@
 
 DECLARE_EVENT_CLASS(rwmmio_rw_template,
 
-	TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr),
+	TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width,
+		 volatile void __iomem *addr),
 
-	TP_ARGS(caller, val, width, addr),
+	TP_ARGS(caller, caller0, val, width, addr),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, caller)
+		__field(unsigned long, caller0)
 		__field(unsigned long, addr)
 		__field(u64, val)
 		__field(u8, width)
@@ -25,56 +27,64 @@ DECLARE_EVENT_CLASS(rwmmio_rw_template,
 
 	TP_fast_assign(
 		__entry->caller = caller;
+		__entry->caller0 = caller0;
 		__entry->val = val;
 		__entry->addr = (unsigned long)addr;
 		__entry->width = width;
 	),
 
-	TP_printk("%pS width=%d val=%#llx addr=%#lx",
-		(void *)__entry->caller, __entry->width,
+	TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx",
+		(void *)__entry->caller0, (void *)__entry->caller, __entry->width,
 		__entry->val, __entry->addr)
 );
 
 DEFINE_EVENT(rwmmio_rw_template, rwmmio_write,
-	TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr),
-	TP_ARGS(caller, val, width, addr)
+	TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width,
+		 volatile void __iomem *addr),
+	TP_ARGS(caller, caller0, val, width, addr)
 );
 
 DEFINE_EVENT(rwmmio_rw_template, rwmmio_post_write,
-	TP_PROTO(unsigned long caller, u64 val, u8 width, volatile void __iomem *addr),
-	TP_ARGS(caller, val, width, addr)
+	TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width,
+		 volatile void __iomem *addr),
+	TP_ARGS(caller, caller0, val, width, addr)
 );
 
 TRACE_EVENT(rwmmio_read,
 
-	TP_PROTO(unsigned long caller, u8 width, const volatile void __iomem *addr),
+	TP_PROTO(unsigned long caller, unsigned long caller0, u8 width,
+		 const volatile void __iomem *addr),
 
-	TP_ARGS(caller, width, addr),
+	TP_ARGS(caller, caller0, width, addr),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, caller)
+		__field(unsigned long, caller0)
 		__field(unsigned long, addr)
 		__field(u8, width)
 	),
 
 	TP_fast_assign(
 		__entry->caller = caller;
+		__entry->caller0 = caller0;
 		__entry->addr = (unsigned long)addr;
 		__entry->width = width;
 	),
 
-	TP_printk("%pS width=%d addr=%#lx",
-		 (void *)__entry->caller, __entry->width, __entry->addr)
+	TP_printk("%pS -> %pS width=%d addr=%#lx",
+		 (void *)__entry->caller0, (void *)__entry->caller, __entry->width, __entry->addr)
 );
 
 TRACE_EVENT(rwmmio_post_read,
 
-	TP_PROTO(unsigned long caller, u64 val, u8 width, const volatile void __iomem *addr),
+	TP_PROTO(unsigned long caller, unsigned long caller0, u64 val, u8 width,
+		 const volatile void __iomem *addr),
 
-	TP_ARGS(caller, val, width, addr),
+	TP_ARGS(caller, caller0, val, width, addr),
 
 	TP_STRUCT__entry(
 		__field(unsigned long, caller)
+		__field(unsigned long, caller0)
 		__field(unsigned long, addr)
 		__field(u64, val)
 		__field(u8, width)
@@ -82,13 +92,14 @@ TRACE_EVENT(rwmmio_post_read,
 
 	TP_fast_assign(
 		__entry->caller = caller;
+		__entry->caller0 = caller0;
 		__entry->val = val;
 		__entry->addr = (unsigned long)addr;
 		__entry->width = width;
 	),
 
-	TP_printk("%pS width=%d val=%#llx addr=%#lx",
-		 (void *)__entry->caller, __entry->width,
+	TP_printk("%pS -> %pS width=%d val=%#llx addr=%#lx",
+		 (void *)__entry->caller0, (void *)__entry->caller, __entry->width,
 		 __entry->val, __entry->addr)
 );
 
diff --git a/lib/trace_readwrite.c b/lib/trace_readwrite.c
index 88637038b30c8..62b4e8b3c733b 100644
--- a/lib/trace_readwrite.c
+++ b/lib/trace_readwrite.c
@@ -14,33 +14,33 @@
 
 #ifdef CONFIG_TRACE_MMIO_ACCESS
 void log_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-		    unsigned long caller_addr)
+		    unsigned long caller_addr, unsigned long caller_addr0)
 {
-	trace_rwmmio_write(caller_addr, val, width, addr);
+	trace_rwmmio_write(caller_addr, caller_addr0, val, width, addr);
 }
 EXPORT_SYMBOL_GPL(log_write_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_write);
 
 void log_post_write_mmio(u64 val, u8 width, volatile void __iomem *addr,
-			 unsigned long caller_addr)
+			 unsigned long caller_addr, unsigned long caller_addr0)
 {
-	trace_rwmmio_post_write(caller_addr, val, width, addr);
+	trace_rwmmio_post_write(caller_addr, caller_addr0, val, width, addr);
 }
 EXPORT_SYMBOL_GPL(log_post_write_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_write);
 
 void log_read_mmio(u8 width, const volatile void __iomem *addr,
-		   unsigned long caller_addr)
+		   unsigned long caller_addr, unsigned long caller_addr0)
 {
-	trace_rwmmio_read(caller_addr, width, addr);
+	trace_rwmmio_read(caller_addr, caller_addr0, width, addr);
 }
 EXPORT_SYMBOL_GPL(log_read_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_read);
 
 void log_post_read_mmio(u64 val, u8 width, const volatile void __iomem *addr,
-			unsigned long caller_addr)
+			unsigned long caller_addr, unsigned long caller_addr0)
 {
-	trace_rwmmio_post_read(caller_addr, val, width, addr);
+	trace_rwmmio_post_read(caller_addr, caller_addr0, val, width, addr);
 }
 EXPORT_SYMBOL_GPL(log_post_read_mmio);
 EXPORT_TRACEPOINT_SYMBOL_GPL(rwmmio_post_read);
-- 
GitLab


From 2d9cd957d40c3ac491b358e7cff0515bb07a3a9c Mon Sep 17 00:00:00 2001
From: Zeng Heng <zengheng4@huawei.com>
Date: Mon, 21 Nov 2022 10:00:29 +0800
Subject: [PATCH 1932/4122] PCI: Check for alloc failure in pci_request_irq()

When kvasprintf() fails to allocate memory, it returns a NULL pointer.
Return error from pci_request_irq() so we don't dereference it.

[bhelgaas: commit log]
Fixes: 704e8953d3e9 ("PCI/irq: Add pci_request_irq() and pci_free_irq() helpers")
Link: https://lore.kernel.org/r/20221121020029.3759444-1-zengheng4@huawei.com
Signed-off-by: Zeng Heng <zengheng4@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/pci/irq.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/irq.c b/drivers/pci/irq.c
index 12ecd0aaa28d6..0050e8f6814ed 100644
--- a/drivers/pci/irq.c
+++ b/drivers/pci/irq.c
@@ -44,6 +44,8 @@ int pci_request_irq(struct pci_dev *dev, unsigned int nr, irq_handler_t handler,
 	va_start(ap, fmt);
 	devname = kvasprintf(GFP_KERNEL, fmt, ap);
 	va_end(ap);
+	if (!devname)
+		return -ENOMEM;
 
 	ret = request_threaded_irq(pci_irq_vector(dev, nr), handler, thread_fn,
 				   irqflags, devname, dev_id);
-- 
GitLab


From 9b51d072da1d27e1193e84708201c48e385ad912 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Thu, 17 Nov 2022 21:15:46 +0800
Subject: [PATCH 1933/4122] RDMA/hfi: Decrease PCI device reference count in
 error path

pci_get_device() will increase the reference count for the returned
pci_dev, and also decrease the reference count for the input parameter
*from* if it is not NULL.

If we break out the loop in node_affinity_init() with 'dev' not NULL, we
need to call pci_dev_put() to decrease the reference count. Add missing
pci_dev_put() in error path.

Fixes: c513de490f80 ("IB/hfi1: Invalid NUMA node information can cause a divide by zero")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Link: https://lore.kernel.org/r/20221117131546.113280-1-wangxiongfeng2@huawei.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/hfi1/affinity.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/hfi1/affinity.c b/drivers/infiniband/hw/hfi1/affinity.c
index 877f8e84a672a..77ee77d4000fb 100644
--- a/drivers/infiniband/hw/hfi1/affinity.c
+++ b/drivers/infiniband/hw/hfi1/affinity.c
@@ -177,6 +177,8 @@ out:
 	for (node = 0; node < node_affinity.num_possible_nodes; node++)
 		hfi1_per_node_cntr[node] = 1;
 
+	pci_dev_put(dev);
+
 	return 0;
 }
 
-- 
GitLab


From 8e96729fc26c8967db45a3fb7a60387619f77a22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 21 Nov 2022 18:22:36 +0100
Subject: [PATCH 1934/4122] crypto: ccree - Make cc_debugfs_global_fini()
 available for module init function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ccree_init() calls cc_debugfs_global_fini(), the former is an init
function and the latter an exit function though.

A modular build emits:

	WARNING: modpost: drivers/crypto/ccree/ccree.o: section mismatch in reference: init_module (section: .init.text) -> cc_debugfs_global_fini (section: .exit.text)

(with CONFIG_DEBUG_SECTION_MISMATCH=y).

Fixes: 4f1c596df706 ("crypto: ccree - Remove debugfs when platform_driver_register failed")
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccree/cc_debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/ccree/cc_debugfs.c b/drivers/crypto/ccree/cc_debugfs.c
index 7083767602fcf..8f008f024f8f1 100644
--- a/drivers/crypto/ccree/cc_debugfs.c
+++ b/drivers/crypto/ccree/cc_debugfs.c
@@ -55,7 +55,7 @@ void __init cc_debugfs_global_init(void)
 	cc_debugfs_dir = debugfs_create_dir("ccree", NULL);
 }
 
-void __exit cc_debugfs_global_fini(void)
+void cc_debugfs_global_fini(void)
 {
 	debugfs_remove(cc_debugfs_dir);
 }
-- 
GitLab


From 357057ee55d3c99a5de5abe8150f7bca04f8e53b Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Thu, 17 Nov 2022 11:59:14 +0800
Subject: [PATCH 1935/4122] staging: vme_user: Fix possible UAF in
 tsi148_dma_list_add

Smatch report warning as follows:

drivers/staging/vme_user/vme_tsi148.c:1757 tsi148_dma_list_add() warn:
  '&entry->list' not removed from list

In tsi148_dma_list_add(), the error path "goto err_dma" will not
remove entry->list from list->entries, but entry will be freed,
then list traversal may cause UAF.

Fix by removeing it from list->entries before free().

Fixes: b2383c90a9d6 ("vme: tsi148: fix first DMA item mapping")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20221117035914.2954454-1-cuigaosheng1@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vme_user/vme_tsi148.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/vme_user/vme_tsi148.c b/drivers/staging/vme_user/vme_tsi148.c
index 020e0b3bce64b..0171f46d1848f 100644
--- a/drivers/staging/vme_user/vme_tsi148.c
+++ b/drivers/staging/vme_user/vme_tsi148.c
@@ -1751,6 +1751,7 @@ static int tsi148_dma_list_add(struct vme_dma_list *list,
 	return 0;
 
 err_dma:
+	list_del(&entry->list);
 err_dest:
 err_source:
 err_align:
-- 
GitLab


From 2b7962bd05163f5b20fb5f933092b997debf8ed6 Mon Sep 17 00:00:00 2001
From: Brent Pappas <bpappas@pappasbrent.com>
Date: Thu, 17 Nov 2022 14:54:43 -0500
Subject: [PATCH 1936/4122] staging: gdm724x: Replace macro GDM_TTY_READY with
 static inline function

Replace the macro GDM_TTY_READY with a static inline function
to follow the Linux kernel coding style.

Signed-off-by: Brent Pappas <bpappas@pappasbrent.com>
Link: https://lore.kernel.org/r/20221117195443.19616-1-bpappas@pappasbrent.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/gdm724x/gdm_tty.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/staging/gdm724x/gdm_tty.c b/drivers/staging/gdm724x/gdm_tty.c
index cc6d80554c982..e1a84d6020f48 100644
--- a/drivers/staging/gdm724x/gdm_tty.c
+++ b/drivers/staging/gdm724x/gdm_tty.c
@@ -21,7 +21,10 @@
 
 #define MUX_TX_MAX_SIZE 2048
 
-#define GDM_TTY_READY(gdm) (gdm && gdm->tty_dev && gdm->port.count)
+static inline bool gdm_tty_ready(struct gdm *gdm)
+{
+	return gdm && gdm->tty_dev && gdm->port.count;
+}
 
 static struct tty_driver *gdm_driver[TTY_MAX_COUNT];
 static struct gdm *gdm_table[TTY_MAX_COUNT][GDM_TTY_MINOR];
@@ -113,7 +116,7 @@ static int gdm_tty_recv_complete(void *data,
 {
 	struct gdm *gdm = tty_dev->gdm[index];
 
-	if (!GDM_TTY_READY(gdm)) {
+	if (!gdm_tty_ready(gdm)) {
 		if (complete == RECV_PACKET_PROCESS_COMPLETE)
 			gdm->tty_dev->recv_func(gdm->tty_dev->priv_dev,
 						gdm_tty_recv_complete);
@@ -140,7 +143,7 @@ static void gdm_tty_send_complete(void *arg)
 {
 	struct gdm *gdm = arg;
 
-	if (!GDM_TTY_READY(gdm))
+	if (!gdm_tty_ready(gdm))
 		return;
 
 	tty_port_tty_wakeup(&gdm->port);
@@ -154,7 +157,7 @@ static int gdm_tty_write(struct tty_struct *tty, const unsigned char *buf,
 	int sent_len = 0;
 	int sending_len = 0;
 
-	if (!GDM_TTY_READY(gdm))
+	if (!gdm_tty_ready(gdm))
 		return -ENODEV;
 
 	if (!len)
@@ -181,7 +184,7 @@ static unsigned int gdm_tty_write_room(struct tty_struct *tty)
 {
 	struct gdm *gdm = tty->driver_data;
 
-	if (!GDM_TTY_READY(gdm))
+	if (!gdm_tty_ready(gdm))
 		return 0;
 
 	return WRITE_SIZE;
-- 
GitLab


From 733611730676de202fade0cb73792c17d5aa9903 Mon Sep 17 00:00:00 2001
From: Umang Jain <umang.jain@ideasonboard.com>
Date: Fri, 18 Nov 2022 14:12:42 +0530
Subject: [PATCH 1937/4122] Revert "staging: mmal-vchiq: Avoid use of bool in
 structures"

This reverts commit 640e77466e69d9c28de227bc76881f5501f532ca.

In commit 7967656ffbfa ("coding-style: Clarify the expectations around
bool") the check to dis-allow bool structure members was removed from
checkpatch.pl. It promotes bool structure members to store boolean
values. This enhances code readability.

Signed-off-by: Umang Jain <umang.jain@ideasonboard.com>
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Link: https://lore.kernel.org/r/20221118084244.199909-2-umang.jain@ideasonboard.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../staging/vc04_services/vchiq-mmal/mmal-vchiq.c    | 12 ++++++------
 .../staging/vc04_services/vchiq-mmal/mmal-vchiq.h    |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
index cb921c94996a1..4abb6178cb9ff 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
@@ -863,9 +863,9 @@ static int port_info_get(struct vchiq_mmal_instance *instance,
 		goto release_msg;
 
 	if (rmsg->u.port_info_get_reply.port.is_enabled == 0)
-		port->enabled = 0;
+		port->enabled = false;
 	else
-		port->enabled = 1;
+		port->enabled = true;
 
 	/* copy the values out of the message */
 	port->handle = rmsg->u.port_info_get_reply.port_handle;
@@ -1304,7 +1304,7 @@ static int port_disable(struct vchiq_mmal_instance *instance,
 	if (!port->enabled)
 		return 0;
 
-	port->enabled = 0;
+	port->enabled = false;
 
 	ret = port_action_port(instance, port,
 			       MMAL_MSG_PORT_ACTION_TYPE_DISABLE);
@@ -1359,7 +1359,7 @@ static int port_enable(struct vchiq_mmal_instance *instance,
 	if (ret)
 		goto done;
 
-	port->enabled = 1;
+	port->enabled = true;
 
 	if (port->buffer_cb) {
 		/* send buffer headers to videocore */
@@ -1531,7 +1531,7 @@ int vchiq_mmal_port_connect_tunnel(struct vchiq_mmal_instance *instance,
 			pr_err("failed disconnecting src port\n");
 			goto release_unlock;
 		}
-		src->connected->enabled = 0;
+		src->connected->enabled = false;
 		src->connected = NULL;
 	}
 
@@ -1799,7 +1799,7 @@ int vchiq_mmal_component_disable(struct vchiq_mmal_instance *instance,
 
 	ret = disable_component(instance, component);
 	if (ret == 0)
-		component->enabled = 0;
+		component->enabled = false;
 
 	mutex_unlock(&instance->vchiq_mutex);
 
diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
index 6006e29232b3a..6d984cf5a83ac 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
@@ -48,7 +48,7 @@ typedef void (*vchiq_mmal_buffer_cb)(
 		int status, struct mmal_buffer *buffer);
 
 struct vchiq_mmal_port {
-	u32 enabled:1;
+	bool enabled;
 	u32 handle;
 	u32 type; /* port type, cached to use on port info set */
 	u32 index; /* port index, cached to use on port info set */
@@ -83,7 +83,7 @@ struct vchiq_mmal_port {
 
 struct vchiq_mmal_component {
 	u32 in_use:1;
-	u32 enabled:1;
+	bool enabled;
 	u32 handle;  /* VideoCore handle for component */
 	u32 inputs;  /* Number of input ports */
 	u32 outputs; /* Number of output ports */
-- 
GitLab


From c0012a39cf6c7197ad93da0bdba7245c094f8469 Mon Sep 17 00:00:00 2001
From: Umang Jain <umang.jain@ideasonboard.com>
Date: Fri, 18 Nov 2022 14:12:43 +0530
Subject: [PATCH 1938/4122] vc04_services: mmal-vchiq: Use bool for
 vchiq_mmal_component.in_use

In commit 7967656ffbfa ("coding-style: Clarify the expectations around
bool") the check to dis-allow bool structure members was removed from
checkpatch.pl. It promotes bool structure members to store boolean
values. This enhances code readability.

Signed-off-by: Umang Jain <umang.jain@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Link: https://lore.kernel.org/r/20221118084244.199909-3-umang.jain@ideasonboard.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c | 6 +++---
 drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
index 4abb6178cb9ff..294b184d4a495 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.c
@@ -1648,7 +1648,7 @@ int vchiq_mmal_component_init(struct vchiq_mmal_instance *instance,
 	for (idx = 0; idx < VCHIQ_MMAL_MAX_COMPONENTS; idx++) {
 		if (!instance->component[idx].in_use) {
 			component = &instance->component[idx];
-			component->in_use = 1;
+			component->in_use = true;
 			break;
 		}
 	}
@@ -1724,7 +1724,7 @@ release_component:
 	destroy_component(instance, component);
 unlock:
 	if (component)
-		component->in_use = 0;
+		component->in_use = false;
 	mutex_unlock(&instance->vchiq_mutex);
 
 	return ret;
@@ -1747,7 +1747,7 @@ int vchiq_mmal_component_finalise(struct vchiq_mmal_instance *instance,
 
 	ret = destroy_component(instance, component);
 
-	component->in_use = 0;
+	component->in_use = false;
 
 	mutex_unlock(&instance->vchiq_mutex);
 
diff --git a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
index 6d984cf5a83ac..09f030919d4e2 100644
--- a/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
+++ b/drivers/staging/vc04_services/vchiq-mmal/mmal-vchiq.h
@@ -82,7 +82,7 @@ struct vchiq_mmal_port {
 };
 
 struct vchiq_mmal_component {
-	u32 in_use:1;
+	bool in_use;
 	bool enabled;
 	u32 handle;  /* VideoCore handle for component */
 	u32 inputs;  /* Number of input ports */
-- 
GitLab


From f198d34759eb3d110d37bb42f6c39cd90bd0b0cb Mon Sep 17 00:00:00 2001
From: Umang Jain <umang.jain@ideasonboard.com>
Date: Fri, 18 Nov 2022 14:12:44 +0530
Subject: [PATCH 1939/4122] vc04_services: bcm2835-camera: Use bool values for
 mmal_fmt.remove_padding

mmal_fmt.remove_padding is defined as a boolean type hence, use boolean
values for it instead of 0/1 integers. This enhances code readability.

Signed-off-by: Umang Jain <umang.jain@ideasonboard.com>
Reviewed-by: Dave Stevenson <dave.stevenson@raspberrypi.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Link: https://lore.kernel.org/r/20221118084244.199909-4-umang.jain@ideasonboard.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bcm2835-camera/bcm2835-camera.c           | 30 +++++++++----------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
index fd456d1f70611..797ebe2a973a1 100644
--- a/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
+++ b/drivers/staging/vc04_services/bcm2835-camera/bcm2835-camera.c
@@ -87,21 +87,21 @@ static struct mmal_fmt formats[] = {
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
-		.remove_padding = 1,
+		.remove_padding = true,
 	}, {
 		.fourcc = V4L2_PIX_FMT_YUYV,
 		.mmal = MMAL_ENCODING_YUYV,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_RGB24,
 		.mmal = MMAL_ENCODING_RGB24,
 		.depth = 24,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 3,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_JPEG,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
@@ -109,7 +109,7 @@ static struct mmal_fmt formats[] = {
 		.depth = 8,
 		.mmal_component = COMP_IMAGE_ENCODE,
 		.ybbp = 0,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_H264,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
@@ -117,7 +117,7 @@ static struct mmal_fmt formats[] = {
 		.depth = 8,
 		.mmal_component = COMP_VIDEO_ENCODE,
 		.ybbp = 0,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_MJPEG,
 		.flags = V4L2_FMT_FLAG_COMPRESSED,
@@ -125,63 +125,63 @@ static struct mmal_fmt formats[] = {
 		.depth = 8,
 		.mmal_component = COMP_VIDEO_ENCODE,
 		.ybbp = 0,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_YVYU,
 		.mmal = MMAL_ENCODING_YVYU,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_VYUY,
 		.mmal = MMAL_ENCODING_VYUY,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_UYVY,
 		.mmal = MMAL_ENCODING_UYVY,
 		.depth = 16,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 2,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_NV12,
 		.mmal = MMAL_ENCODING_NV12,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
-		.remove_padding = 1,
+		.remove_padding = true,
 	}, {
 		.fourcc = V4L2_PIX_FMT_BGR24,
 		.mmal = MMAL_ENCODING_BGR24,
 		.depth = 24,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 3,
-		.remove_padding = 0,
+		.remove_padding = false,
 	}, {
 		.fourcc = V4L2_PIX_FMT_YVU420,
 		.mmal = MMAL_ENCODING_YV12,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
-		.remove_padding = 1,
+		.remove_padding = true,
 	}, {
 		.fourcc = V4L2_PIX_FMT_NV21,
 		.mmal = MMAL_ENCODING_NV21,
 		.depth = 12,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 1,
-		.remove_padding = 1,
+		.remove_padding = true,
 	}, {
 		.fourcc = V4L2_PIX_FMT_BGR32,
 		.mmal = MMAL_ENCODING_BGRA,
 		.depth = 32,
 		.mmal_component = COMP_CAMERA,
 		.ybbp = 4,
-		.remove_padding = 0,
+		.remove_padding = false,
 	},
 };
 
@@ -1147,7 +1147,7 @@ static int mmal_setup_components(struct bcm2835_mmal_dev *dev,
 	struct vchiq_mmal_port *port = NULL, *camera_port = NULL;
 	struct vchiq_mmal_component *encode_component = NULL;
 	struct mmal_fmt *mfmt = get_format(f);
-	u32 remove_padding;
+	bool remove_padding;
 
 	if (!mfmt)
 		return -EINVAL;
-- 
GitLab


From 9a2c1d64c8eb4fab0387c0943eb6666b246f96aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:03 +0100
Subject: [PATCH 1940/4122] staging: most: i2c: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-570-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/most/i2c/i2c.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/most/i2c/i2c.c b/drivers/staging/most/i2c/i2c.c
index 285a071f02be1..df53a4c4f8504 100644
--- a/drivers/staging/most/i2c/i2c.c
+++ b/drivers/staging/most/i2c/i2c.c
@@ -284,7 +284,7 @@ static irqreturn_t most_irq_handler(int irq, void *_dev)
  *
  * Register the i2c client device as a MOST interface
  */
-static int i2c_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int i2c_probe(struct i2c_client *client)
 {
 	struct hdm_i2c *dev;
 	int ret, i;
@@ -359,7 +359,7 @@ static struct i2c_driver i2c_driver = {
 	.driver = {
 		.name = "hdm_i2c",
 	},
-	.probe = i2c_probe,
+	.probe_new = i2c_probe,
 	.remove = i2c_remove,
 	.id_table = i2c_id,
 };
-- 
GitLab


From b62649822e8c03ec1319f9d33c753106b8c80bcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:04 +0100
Subject: [PATCH 1941/4122] staging: olpc_dcon: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-571-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/olpc_dcon/olpc_dcon.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c
index 4fb9b9f107996..2fba52e0bd7bd 100644
--- a/drivers/staging/olpc_dcon/olpc_dcon.c
+++ b/drivers/staging/olpc_dcon/olpc_dcon.c
@@ -579,7 +579,7 @@ static int dcon_detect(struct i2c_client *client, struct i2c_board_info *info)
 	return 0;
 }
 
-static int dcon_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int dcon_probe(struct i2c_client *client)
 {
 	struct dcon_priv *dcon;
 	int rc, i, j;
@@ -779,7 +779,7 @@ static struct i2c_driver dcon_driver = {
 	},
 	.class = I2C_CLASS_DDC | I2C_CLASS_HWMON,
 	.id_table = dcon_idtable,
-	.probe = dcon_probe,
+	.probe_new = dcon_probe,
 	.remove = dcon_remove,
 	.detect = dcon_detect,
 	.address_list = normal_i2c,
-- 
GitLab


From 9dadff066244543780e5d9ee406b3ec7af19e22c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:02 +0100
Subject: [PATCH 1942/4122] staging: iio: ade7854: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-569-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/iio/meter/ade7854-i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c
index a9a06e8dda51f..70f64b68f5b9c 100644
--- a/drivers/staging/iio/meter/ade7854-i2c.c
+++ b/drivers/staging/iio/meter/ade7854-i2c.c
@@ -109,8 +109,7 @@ unlock:
 	return ret;
 }
 
-static int ade7854_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int ade7854_i2c_probe(struct i2c_client *client)
 {
 	struct ade7854_state *st;
 	struct iio_dev *indio_dev;
@@ -141,7 +140,7 @@ static struct i2c_driver ade7854_i2c_driver = {
 	.driver = {
 		.name = "ade7854",
 	},
-	.probe    = ade7854_i2c_probe,
+	.probe_new = ade7854_i2c_probe,
 	.id_table = ade7854_id,
 };
 module_i2c_driver(ade7854_i2c_driver);
-- 
GitLab


From ec62b4424174f41bdcedd08d12d7bed80088453d Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:43 +0800
Subject: [PATCH 1943/4122] iommu/vt-d: Allocate pasid table in device probe
 path

Whether or not a domain is attached to the device, the pasid table should
always be valid as long as it has been probed. This moves the pasid table
allocation from the domain attaching device path to device probe path and
frees it in the device release path.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 996a8b5ee5ee6..6b8a24f68da8d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2475,13 +2475,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 
 	/* PASID table is mandatory for a PCI device in scalable mode. */
 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
-		ret = intel_pasid_alloc_table(dev);
-		if (ret) {
-			dev_err(dev, "PASID table allocation failed\n");
-			dmar_remove_one_dev_info(dev);
-			return ret;
-		}
-
 		/* Setup the PASID entry for requests without PASID: */
 		if (hw_pass_through && domain_type_is_si(domain))
 			ret = intel_pasid_setup_pass_through(iommu, domain,
@@ -4106,7 +4099,6 @@ static void dmar_remove_one_dev_info(struct device *dev)
 
 		iommu_disable_dev_iotlb(info);
 		domain_context_clear(info);
-		intel_pasid_free_table(info->dev);
 	}
 
 	spin_lock_irqsave(&domain->lock, flags);
@@ -4466,6 +4458,7 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 	struct device_domain_info *info;
 	struct intel_iommu *iommu;
 	u8 bus, devfn;
+	int ret;
 
 	iommu = device_to_iommu(dev, &bus, &devfn);
 	if (!iommu || !iommu->iommu.ops)
@@ -4509,6 +4502,16 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
 
 	dev_iommu_priv_set(dev, info);
 
+	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
+		ret = intel_pasid_alloc_table(dev);
+		if (ret) {
+			dev_err(dev, "PASID table allocation failed\n");
+			dev_iommu_priv_set(dev, NULL);
+			kfree(info);
+			return ERR_PTR(ret);
+		}
+	}
+
 	return &iommu->iommu;
 }
 
@@ -4517,6 +4520,7 @@ static void intel_iommu_release_device(struct device *dev)
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 
 	dmar_remove_one_dev_info(dev);
+	intel_pasid_free_table(dev);
 	dev_iommu_priv_set(dev, NULL);
 	kfree(info);
 	set_dma_ops(dev, NULL);
-- 
GitLab


From c7be17c2903d4acbf9aa372bfb6e2a418387fce0 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:44 +0800
Subject: [PATCH 1944/4122] iommu/vt-d: Add device_block_translation() helper

If domain attaching to device fails, the IOMMU driver should bring the
device to blocking DMA state. The upper layer is expected to recover it
by attaching a new domain. Use device_block_translation() in the error
path of dev_attach to make the behavior specific.

The difference between device_block_translation() and the previous
dmar_remove_one_dev_info() is that, in the scalable mode, it is the
RID2PASID entry instead of context entry being cleared. As a result,
enabling PCI capabilities is moved up.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 44 ++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6b8a24f68da8d..6aafb86ef5c34 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -277,7 +277,7 @@ static LIST_HEAD(dmar_satc_units);
 #define for_each_rmrr_units(rmrr) \
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
-static void dmar_remove_one_dev_info(struct device *dev);
+static void device_block_translation(struct device *dev);
 
 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -1400,7 +1400,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
 {
 	struct pci_dev *pdev;
 
-	if (!info || !dev_is_pci(info->dev))
+	if (!dev_is_pci(info->dev))
 		return;
 
 	pdev = to_pci_dev(info->dev);
@@ -2045,7 +2045,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	} else {
 		iommu_flush_write_buffer(iommu);
 	}
-	iommu_enable_pci_caps(info);
 
 	ret = 0;
 
@@ -2487,7 +2486,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 					dev, PASID_RID2PASID);
 		if (ret) {
 			dev_err(dev, "Setup RID2PASID failed\n");
-			dmar_remove_one_dev_info(dev);
+			device_block_translation(dev);
 			return ret;
 		}
 	}
@@ -2495,10 +2494,12 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
 	ret = domain_context_mapping(domain, dev);
 	if (ret) {
 		dev_err(dev, "Domain context map failed\n");
-		dmar_remove_one_dev_info(dev);
+		device_block_translation(dev);
 		return ret;
 	}
 
+	iommu_enable_pci_caps(info);
+
 	return 0;
 }
 
@@ -4109,6 +4110,37 @@ static void dmar_remove_one_dev_info(struct device *dev)
 	info->domain = NULL;
 }
 
+/*
+ * Clear the page table pointer in context or pasid table entries so that
+ * all DMA requests without PASID from the device are blocked. If the page
+ * table has been set, clean up the data structures.
+ */
+static void device_block_translation(struct device *dev)
+{
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+	struct intel_iommu *iommu = info->iommu;
+	unsigned long flags;
+
+	iommu_disable_dev_iotlb(info);
+	if (!dev_is_real_dma_subdevice(dev)) {
+		if (sm_supported(iommu))
+			intel_pasid_tear_down_entry(iommu, dev,
+						    PASID_RID2PASID, false);
+		else
+			domain_context_clear(info);
+	}
+
+	if (!info->domain)
+		return;
+
+	spin_lock_irqsave(&info->domain->lock, flags);
+	list_del(&info->link);
+	spin_unlock_irqrestore(&info->domain->lock, flags);
+
+	domain_detach_iommu(info->domain, iommu);
+	info->domain = NULL;
+}
+
 static int md_domain_init(struct dmar_domain *domain, int guest_width)
 {
 	int adjust_width;
@@ -4232,7 +4264,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		struct device_domain_info *info = dev_iommu_priv_get(dev);
 
 		if (info->domain)
-			dmar_remove_one_dev_info(dev);
+			device_block_translation(dev);
 	}
 
 	ret = prepare_domain_attach_device(domain, dev);
-- 
GitLab


From 35a99c54dd60103930db4a472dd15f232e754867 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:45 +0800
Subject: [PATCH 1945/4122] iommu/vt-d: Add blocking domain support

The Intel IOMMU hardwares support blocking DMA transactions by clearing
the translation table entries. This implements a real blocking domain to
avoid using an empty UNMANAGED domain. The detach_dev callback of the
domain ops is not used in any path. Remove it to avoid dead code as well.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-4-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 6aafb86ef5c34..25c772e8106f3 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -278,6 +278,7 @@ static LIST_HEAD(dmar_satc_units);
 	list_for_each_entry(rmrr, &dmar_rmrr_units, list)
 
 static void device_block_translation(struct device *dev);
+static void intel_iommu_domain_free(struct iommu_domain *domain);
 
 int dmar_disabled = !IS_ENABLED(CONFIG_INTEL_IOMMU_DEFAULT_ON);
 int intel_iommu_sm = IS_ENABLED(CONFIG_INTEL_IOMMU_SCALABLE_MODE_DEFAULT_ON);
@@ -4162,12 +4163,28 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
 	return 0;
 }
 
+static int blocking_domain_attach_dev(struct iommu_domain *domain,
+				      struct device *dev)
+{
+	device_block_translation(dev);
+	return 0;
+}
+
+static struct iommu_domain blocking_domain = {
+	.ops = &(const struct iommu_domain_ops) {
+		.attach_dev	= blocking_domain_attach_dev,
+		.free		= intel_iommu_domain_free
+	}
+};
+
 static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 {
 	struct dmar_domain *dmar_domain;
 	struct iommu_domain *domain;
 
 	switch (type) {
+	case IOMMU_DOMAIN_BLOCKED:
+		return &blocking_domain;
 	case IOMMU_DOMAIN_DMA:
 	case IOMMU_DOMAIN_DMA_FQ:
 	case IOMMU_DOMAIN_UNMANAGED:
@@ -4200,7 +4217,7 @@ static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
 
 static void intel_iommu_domain_free(struct iommu_domain *domain)
 {
-	if (domain != &si_domain->domain)
+	if (domain != &si_domain->domain && domain != &blocking_domain)
 		domain_exit(to_dmar_domain(domain));
 }
 
@@ -4274,12 +4291,6 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 	return domain_add_dev_info(to_dmar_domain(domain), dev);
 }
 
-static void intel_iommu_detach_device(struct iommu_domain *domain,
-				      struct device *dev)
-{
-	dmar_remove_one_dev_info(dev);
-}
-
 static int intel_iommu_map(struct iommu_domain *domain,
 			   unsigned long iova, phys_addr_t hpa,
 			   size_t size, int iommu_prot, gfp_t gfp)
@@ -4767,7 +4778,6 @@ const struct iommu_ops intel_iommu_ops = {
 #endif
 	.default_domain_ops = &(const struct iommu_domain_ops) {
 		.attach_dev		= intel_iommu_attach_device,
-		.detach_dev		= intel_iommu_detach_device,
 		.map_pages		= intel_iommu_map_pages,
 		.unmap_pages		= intel_iommu_unmap_pages,
 		.iotlb_sync_map		= intel_iommu_iotlb_sync_map,
-- 
GitLab


From ba502132f5430d66f768569f2af32b8f268322a8 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:46 +0800
Subject: [PATCH 1946/4122] iommu/vt-d: Rename iommu_disable_dev_iotlb()

Rename iommu_disable_dev_iotlb() to iommu_disable_pci_caps() to pair with
iommu_enable_pci_caps().

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-5-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 25c772e8106f3..a5885665ccef1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1441,7 +1441,7 @@ static void iommu_enable_pci_caps(struct device_domain_info *info)
 	}
 }
 
-static void iommu_disable_dev_iotlb(struct device_domain_info *info)
+static void iommu_disable_pci_caps(struct device_domain_info *info)
 {
 	struct pci_dev *pdev;
 
@@ -4099,7 +4099,7 @@ static void dmar_remove_one_dev_info(struct device *dev)
 			intel_pasid_tear_down_entry(iommu, info->dev,
 					PASID_RID2PASID, false);
 
-		iommu_disable_dev_iotlb(info);
+		iommu_disable_pci_caps(info);
 		domain_context_clear(info);
 	}
 
@@ -4122,7 +4122,7 @@ static void device_block_translation(struct device *dev)
 	struct intel_iommu *iommu = info->iommu;
 	unsigned long flags;
 
-	iommu_disable_dev_iotlb(info);
+	iommu_disable_pci_caps(info);
 	if (!dev_is_real_dma_subdevice(dev)) {
 		if (sm_supported(iommu))
 			intel_pasid_tear_down_entry(iommu, dev,
-- 
GitLab


From a8204479f284a9d21c22e2fd7c9f7564b5828553 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:47 +0800
Subject: [PATCH 1947/4122] iommu/vt-d: Rename domain_add_dev_info()

dmar_domain_attach_device() is more meaningful according to what this
helper does.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-6-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index a5885665ccef1..3bd79ae238f2d 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -2453,7 +2453,8 @@ static int __init si_domain_init(int hw)
 	return 0;
 }
 
-static int domain_add_dev_info(struct dmar_domain *domain, struct device *dev)
+static int dmar_domain_attach_device(struct dmar_domain *domain,
+				     struct device *dev)
 {
 	struct device_domain_info *info = dev_iommu_priv_get(dev);
 	struct intel_iommu *iommu;
@@ -4288,7 +4289,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 	if (ret)
 		return ret;
 
-	return domain_add_dev_info(to_dmar_domain(domain), dev);
+	return dmar_domain_attach_device(to_dmar_domain(domain), dev);
 }
 
 static int intel_iommu_map(struct iommu_domain *domain,
-- 
GitLab


From b1cf1563f3b7396a2cb76b12b3bcdd7046b46372 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:48 +0800
Subject: [PATCH 1948/4122] iommu/vt-d: Remove unnecessary
 domain_context_mapped()

The device_domain_info::domain accurately records the domain attached to
the device. It is unnecessary to check whether the context is present in
the attach_dev path. Remove it to make the code neat.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-7-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 47 +++----------------------------------
 1 file changed, 3 insertions(+), 44 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3bd79ae238f2d..3b37f1b3b6deb 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -780,19 +780,6 @@ static void domain_flush_cache(struct dmar_domain *domain,
 		clflush_cache_range(addr, size);
 }
 
-static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
-{
-	struct context_entry *context;
-	int ret = 0;
-
-	spin_lock(&iommu->lock);
-	context = iommu_context_addr(iommu, bus, devfn, 0);
-	if (context)
-		ret = context_present(context);
-	spin_unlock(&iommu->lock);
-	return ret;
-}
-
 static void free_context_table(struct intel_iommu *iommu)
 {
 	struct context_entry *context;
@@ -2097,30 +2084,6 @@ domain_context_mapping(struct dmar_domain *domain, struct device *dev)
 				      &domain_context_mapping_cb, &data);
 }
 
-static int domain_context_mapped_cb(struct pci_dev *pdev,
-				    u16 alias, void *opaque)
-{
-	struct intel_iommu *iommu = opaque;
-
-	return !device_context_mapped(iommu, PCI_BUS_NUM(alias), alias & 0xff);
-}
-
-static int domain_context_mapped(struct device *dev)
-{
-	struct intel_iommu *iommu;
-	u8 bus, devfn;
-
-	iommu = device_to_iommu(dev, &bus, &devfn);
-	if (!iommu)
-		return -ENODEV;
-
-	if (!dev_is_pci(dev))
-		return device_context_mapped(iommu, bus, devfn);
-
-	return !pci_for_each_dma_alias(to_pci_dev(dev),
-				       domain_context_mapped_cb, iommu);
-}
-
 /* Returns a number of VTD pages, but aligned to MM page size */
 static inline unsigned long aligned_nrpages(unsigned long host_addr,
 					    size_t size)
@@ -4269,6 +4232,7 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
 static int intel_iommu_attach_device(struct iommu_domain *domain,
 				     struct device *dev)
 {
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
 	int ret;
 
 	if (domain->type == IOMMU_DOMAIN_UNMANAGED &&
@@ -4277,13 +4241,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain,
 		return -EPERM;
 	}
 
-	/* normally dev is not mapped */
-	if (unlikely(domain_context_mapped(dev))) {
-		struct device_domain_info *info = dev_iommu_priv_get(dev);
-
-		if (info->domain)
-			device_block_translation(dev);
-	}
+	if (info->domain)
+		device_block_translation(dev);
 
 	ret = prepare_domain_attach_device(domain, dev);
 	if (ret)
-- 
GitLab


From e5b0feb4361a4830b9133f57ed13923d70409b69 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 22 Nov 2022 08:29:49 +0800
Subject: [PATCH 1949/4122] iommu/vt-d: Use real field for indication of first
 level

The dmar_domain uses bit field members to indicate the behaviors. Add
a bit field for using first level and remove the flags member to avoid
duplication.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Link: https://lore.kernel.org/r/20221118132451.114406-8-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/intel/iommu.c | 25 ++++++++++---------------
 drivers/iommu/intel/iommu.h | 15 +++++----------
 2 files changed, 15 insertions(+), 25 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3b37f1b3b6deb..a3db7ac3d60c1 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -383,11 +383,6 @@ static inline int domain_type_is_si(struct dmar_domain *domain)
 	return domain->domain.type == IOMMU_DOMAIN_IDENTITY;
 }
 
-static inline bool domain_use_first_level(struct dmar_domain *domain)
-{
-	return domain->flags & DOMAIN_FLAG_USE_FIRST_LEVEL;
-}
-
 static inline int domain_pfn_supported(struct dmar_domain *domain,
 				       unsigned long pfn)
 {
@@ -501,7 +496,7 @@ static int domain_update_iommu_superpage(struct dmar_domain *domain,
 	rcu_read_lock();
 	for_each_active_iommu(iommu, drhd) {
 		if (iommu != skip) {
-			if (domain && domain_use_first_level(domain)) {
+			if (domain && domain->use_first_level) {
 				if (!cap_fl1gp_support(iommu->cap))
 					mask = 0x1;
 			} else {
@@ -579,7 +574,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain)
 	 * paging and 57-bits with 5-level paging). Hence, skip bit
 	 * [N-1].
 	 */
-	if (domain_use_first_level(domain))
+	if (domain->use_first_level)
 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw - 1);
 	else
 		domain->domain.geometry.aperture_end = __DOMAIN_MAX_ADDR(domain->gaw);
@@ -947,7 +942,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
 
 			domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
 			pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
-			if (domain_use_first_level(domain))
+			if (domain->use_first_level)
 				pteval |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
 
 			if (cmpxchg64(&pte->val, 0ULL, pteval))
@@ -1498,7 +1493,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu,
 	if (ih)
 		ih = 1 << 6;
 
-	if (domain_use_first_level(domain)) {
+	if (domain->use_first_level) {
 		qi_flush_piotlb(iommu, did, PASID_RID2PASID, addr, pages, ih);
 	} else {
 		unsigned long bitmask = aligned_pages - 1;
@@ -1552,7 +1547,7 @@ static inline void __mapping_notify_one(struct intel_iommu *iommu,
 	 * It's a non-present to present mapping. Only flush if caching mode
 	 * and second level.
 	 */
-	if (cap_caching_mode(iommu->cap) && !domain_use_first_level(domain))
+	if (cap_caching_mode(iommu->cap) && !domain->use_first_level)
 		iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1);
 	else
 		iommu_flush_write_buffer(iommu);
@@ -1568,7 +1563,7 @@ static void intel_flush_iotlb_all(struct iommu_domain *domain)
 		struct intel_iommu *iommu = info->iommu;
 		u16 did = domain_id_iommu(dmar_domain, iommu);
 
-		if (domain_use_first_level(dmar_domain))
+		if (dmar_domain->use_first_level)
 			qi_flush_piotlb(iommu, did, PASID_RID2PASID, 0, -1, 0);
 		else
 			iommu->flush.flush_iotlb(iommu, did, 0, 0,
@@ -1741,7 +1736,7 @@ static struct dmar_domain *alloc_domain(unsigned int type)
 
 	domain->nid = NUMA_NO_NODE;
 	if (first_level_by_default(type))
-		domain->flags |= DOMAIN_FLAG_USE_FIRST_LEVEL;
+		domain->use_first_level = true;
 	domain->has_iotlb_device = false;
 	INIT_LIST_HEAD(&domain->devices);
 	spin_lock_init(&domain->lock);
@@ -2173,7 +2168,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
 
 	attr = prot & (DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP);
 	attr |= DMA_FL_PTE_PRESENT;
-	if (domain_use_first_level(domain)) {
+	if (domain->use_first_level) {
 		attr |= DMA_FL_PTE_XD | DMA_FL_PTE_US | DMA_FL_PTE_ACCESS;
 		if (prot & DMA_PTE_WRITE)
 			attr |= DMA_FL_PTE_DIRTY;
@@ -2443,7 +2438,7 @@ static int dmar_domain_attach_device(struct dmar_domain *domain,
 		if (hw_pass_through && domain_type_is_si(domain))
 			ret = intel_pasid_setup_pass_through(iommu, domain,
 					dev, PASID_RID2PASID);
-		else if (domain_use_first_level(domain))
+		else if (domain->use_first_level)
 			ret = domain_setup_first_level(iommu, domain, dev,
 					PASID_RID2PASID);
 		else
@@ -4412,7 +4407,7 @@ static void domain_set_force_snooping(struct dmar_domain *domain)
 	 * Second level page table supports per-PTE snoop control. The
 	 * iommu_map() interface will handle this by setting SNP bit.
 	 */
-	if (!domain_use_first_level(domain)) {
+	if (!domain->use_first_level) {
 		domain->set_pte_snp = true;
 		return;
 	}
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 92023dff9513a..30b0d72aeb6c4 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -517,14 +517,6 @@ struct context_entry {
 	u64 hi;
 };
 
-/*
- * When VT-d works in the scalable mode, it allows DMA translation to
- * happen through either first level or second level page table. This
- * bit marks that the DMA translation for the domain goes through the
- * first level page table, otherwise, it goes through the second level.
- */
-#define DOMAIN_FLAG_USE_FIRST_LEVEL		BIT(1)
-
 struct iommu_domain_info {
 	struct intel_iommu *iommu;
 	unsigned int refcnt;		/* Refcount of devices per iommu */
@@ -541,6 +533,11 @@ struct dmar_domain {
 	u8 iommu_coherency: 1;		/* indicate coherency of iommu access */
 	u8 force_snooping : 1;		/* Create IOPTEs with snoop control */
 	u8 set_pte_snp:1;
+	u8 use_first_level:1;		/* DMA translation for the domain goes
+					 * through the first level page table,
+					 * otherwise, goes through the second
+					 * level.
+					 */
 
 	spinlock_t lock;		/* Protect device tracking lists */
 	struct list_head devices;	/* all devices' list */
@@ -550,8 +547,6 @@ struct dmar_domain {
 
 	/* adjusted guest address width, 0 is level 2 30-bit */
 	int		agaw;
-
-	int		flags;		/* flags to find out type of domain */
 	int		iommu_superpage;/* Level of superpages supported:
 					   0 == 4KiB (no superpages), 1 == 2MiB,
 					   2 == 1GiB, 3 == 512GiB, 4 == 1TiB */
-- 
GitLab


From 3a5154c723ba5ceb9ce374a7307e03263c03fd29 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 18 Nov 2022 18:22:20 +0000
Subject: [PATCH 1950/4122] KVM: arm64: Take a pointer to walker data in
 kvm_dereference_pteref()

Rather than passing through the state of the KVM_PGTABLE_WALK_SHARED
flag, just take a pointer to the whole walker structure instead. Move
around struct kvm_pgtable and the RCU indirection such that the
associated ifdeffery remains in one place while ensuring the walker +
flags definitions precede their use.

No functional change intended.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118182222.3932898-2-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h | 144 ++++++++++++++-------------
 arch/arm64/kvm/hyp/pgtable.c         |   6 +-
 2 files changed, 76 insertions(+), 74 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index a874ce0ce7b51..f23af693e3c52 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -37,54 +37,6 @@ static inline u64 kvm_get_parange(u64 mmfr0)
 
 typedef u64 kvm_pte_t;
 
-/*
- * RCU cannot be used in a non-kernel context such as the hyp. As such, page
- * table walkers used in hyp do not call into RCU and instead use other
- * synchronization mechanisms (such as a spinlock).
- */
-#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
-
-typedef kvm_pte_t *kvm_pteref_t;
-
-static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
-{
-	return pteref;
-}
-
-static inline void kvm_pgtable_walk_begin(void) {}
-static inline void kvm_pgtable_walk_end(void) {}
-
-static inline bool kvm_pgtable_walk_lock_held(void)
-{
-	return true;
-}
-
-#else
-
-typedef kvm_pte_t __rcu *kvm_pteref_t;
-
-static inline kvm_pte_t *kvm_dereference_pteref(kvm_pteref_t pteref, bool shared)
-{
-	return rcu_dereference_check(pteref, !shared);
-}
-
-static inline void kvm_pgtable_walk_begin(void)
-{
-	rcu_read_lock();
-}
-
-static inline void kvm_pgtable_walk_end(void)
-{
-	rcu_read_unlock();
-}
-
-static inline bool kvm_pgtable_walk_lock_held(void)
-{
-	return rcu_read_lock_held();
-}
-
-#endif
-
 #define KVM_PTE_VALID			BIT(0)
 
 #define KVM_PTE_ADDR_MASK		GENMASK(47, PAGE_SHIFT)
@@ -212,29 +164,6 @@ enum kvm_pgtable_prot {
 typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
 					   enum kvm_pgtable_prot prot);
 
-/**
- * struct kvm_pgtable - KVM page-table.
- * @ia_bits:		Maximum input address size, in bits.
- * @start_level:	Level at which the page-table walk starts.
- * @pgd:		Pointer to the first top-level entry of the page-table.
- * @mm_ops:		Memory management callbacks.
- * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
- * @flags:		Stage-2 page-table flags.
- * @force_pte_cb:	Function that returns true if page level mappings must
- *			be used instead of block mappings.
- */
-struct kvm_pgtable {
-	u32					ia_bits;
-	u32					start_level;
-	kvm_pteref_t				pgd;
-	struct kvm_pgtable_mm_ops		*mm_ops;
-
-	/* Stage-2 only */
-	struct kvm_s2_mmu			*mmu;
-	enum kvm_pgtable_stage2_flags		flags;
-	kvm_pgtable_force_pte_cb_t		force_pte_cb;
-};
-
 /**
  * enum kvm_pgtable_walk_flags - Flags to control a depth-first page-table walk.
  * @KVM_PGTABLE_WALK_LEAF:		Visit leaf entries, including invalid
@@ -285,6 +214,79 @@ struct kvm_pgtable_walker {
 	const enum kvm_pgtable_walk_flags	flags;
 };
 
+/*
+ * RCU cannot be used in a non-kernel context such as the hyp. As such, page
+ * table walkers used in hyp do not call into RCU and instead use other
+ * synchronization mechanisms (such as a spinlock).
+ */
+#if defined(__KVM_NVHE_HYPERVISOR__) || defined(__KVM_VHE_HYPERVISOR__)
+
+typedef kvm_pte_t *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+						kvm_pteref_t pteref)
+{
+	return pteref;
+}
+
+static inline void kvm_pgtable_walk_begin(void) {}
+static inline void kvm_pgtable_walk_end(void) {}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+	return true;
+}
+
+#else
+
+typedef kvm_pte_t __rcu *kvm_pteref_t;
+
+static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walker,
+						kvm_pteref_t pteref)
+{
+	return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
+}
+
+static inline void kvm_pgtable_walk_begin(void)
+{
+	rcu_read_lock();
+}
+
+static inline void kvm_pgtable_walk_end(void)
+{
+	rcu_read_unlock();
+}
+
+static inline bool kvm_pgtable_walk_lock_held(void)
+{
+	return rcu_read_lock_held();
+}
+
+#endif
+
+/**
+ * struct kvm_pgtable - KVM page-table.
+ * @ia_bits:		Maximum input address size, in bits.
+ * @start_level:	Level at which the page-table walk starts.
+ * @pgd:		Pointer to the first top-level entry of the page-table.
+ * @mm_ops:		Memory management callbacks.
+ * @mmu:		Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
+ * @flags:		Stage-2 page-table flags.
+ * @force_pte_cb:	Function that returns true if page level mappings must
+ *			be used instead of block mappings.
+ */
+struct kvm_pgtable {
+	u32					ia_bits;
+	u32					start_level;
+	kvm_pteref_t				pgd;
+	struct kvm_pgtable_mm_ops		*mm_ops;
+
+	/* Stage-2 only */
+	struct kvm_s2_mmu			*mmu;
+	enum kvm_pgtable_stage2_flags		flags;
+	kvm_pgtable_force_pte_cb_t		force_pte_cb;
+};
+
 /**
  * kvm_pgtable_hyp_init() - Initialise a hypervisor stage-1 page-table.
  * @pgt:	Uninitialised page-table structure to initialise.
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 5bca9610d040d..b5b91a8828365 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -188,7 +188,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
 				      kvm_pteref_t pteref, u32 level)
 {
 	enum kvm_pgtable_walk_flags flags = data->walker->flags;
-	kvm_pte_t *ptep = kvm_dereference_pteref(pteref, flags & KVM_PGTABLE_WALK_SHARED);
+	kvm_pte_t *ptep = kvm_dereference_pteref(data->walker, pteref);
 	struct kvm_pgtable_visit_ctx ctx = {
 		.ptep	= ptep,
 		.old	= READ_ONCE(*ptep),
@@ -558,7 +558,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt)
 	};
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
-	pgt->mm_ops->put_page(kvm_dereference_pteref(pgt->pgd, false));
+	pgt->mm_ops->put_page(kvm_dereference_pteref(&walker, pgt->pgd));
 	pgt->pgd = NULL;
 }
 
@@ -1241,7 +1241,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
 
 	WARN_ON(kvm_pgtable_walk(pgt, 0, BIT(pgt->ia_bits), &walker));
 	pgd_sz = kvm_pgd_pages(pgt->ia_bits, pgt->start_level) * PAGE_SIZE;
-	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(pgt->pgd, false), pgd_sz);
+	pgt->mm_ops->free_pages_exact(kvm_dereference_pteref(&walker, pgt->pgd), pgd_sz);
 	pgt->pgd = NULL;
 }
 
-- 
GitLab


From b7833bf202e3068abb77c642a0843f696e9c8d38 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 18 Nov 2022 18:22:21 +0000
Subject: [PATCH 1951/4122] KVM: arm64: Don't acquire RCU read lock for
 exclusive table walks

Marek reported a BUG resulting from the recent parallel faults changes,
as the hyp stage-1 map walker attempted to allocate table memory while
holding the RCU read lock:

  BUG: sleeping function called from invalid context at
  include/linux/sched/mm.h:274
  in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1, name: swapper/0
  preempt_count: 0, expected: 0
  RCU nest depth: 1, expected: 0
  2 locks held by swapper/0/1:
    #0: ffff80000a8a44d0 (kvm_hyp_pgd_mutex){+.+.}-{3:3}, at:
  __create_hyp_mappings+0x80/0xc4
    #1: ffff80000a927720 (rcu_read_lock){....}-{1:2}, at:
  kvm_pgtable_walk+0x0/0x1f4
  CPU: 2 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc3+ #5918
  Hardware name: Raspberry Pi 3 Model B (DT)
  Call trace:
    dump_backtrace.part.0+0xe4/0xf0
    show_stack+0x18/0x40
    dump_stack_lvl+0x8c/0xb8
    dump_stack+0x18/0x34
    __might_resched+0x178/0x220
    __might_sleep+0x48/0xa0
    prepare_alloc_pages+0x178/0x1a0
    __alloc_pages+0x9c/0x109c
    alloc_page_interleave+0x1c/0xc4
    alloc_pages+0xec/0x160
    get_zeroed_page+0x1c/0x44
    kvm_hyp_zalloc_page+0x14/0x20
    hyp_map_walker+0xd4/0x134
    kvm_pgtable_visitor_cb.isra.0+0x38/0x5c
    __kvm_pgtable_walk+0x1a4/0x220
    kvm_pgtable_walk+0x104/0x1f4
    kvm_pgtable_hyp_map+0x80/0xc4
    __create_hyp_mappings+0x9c/0xc4
    kvm_mmu_init+0x144/0x1cc
    kvm_arch_init+0xe4/0xef4
    kvm_init+0x3c/0x3d0
    arm_init+0x20/0x30
    do_one_initcall+0x74/0x400
    kernel_init_freeable+0x2e0/0x350
    kernel_init+0x24/0x130
    ret_from_fork+0x10/0x20

Since the hyp stage-1 table walkers are serialized by kvm_hyp_pgd_mutex,
RCU protection really doesn't add anything. Don't acquire the RCU read
lock for an exclusive walk.

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118182222.3932898-3-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h | 14 ++++++++------
 arch/arm64/kvm/hyp/pgtable.c         |  4 ++--
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index f23af693e3c52..4b6b52ebc11c3 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -229,8 +229,8 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
 	return pteref;
 }
 
-static inline void kvm_pgtable_walk_begin(void) {}
-static inline void kvm_pgtable_walk_end(void) {}
+static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {}
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
 
 static inline bool kvm_pgtable_walk_lock_held(void)
 {
@@ -247,14 +247,16 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
 	return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
 }
 
-static inline void kvm_pgtable_walk_begin(void)
+static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
 {
-	rcu_read_lock();
+	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+		rcu_read_lock();
 }
 
-static inline void kvm_pgtable_walk_end(void)
+static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
 {
-	rcu_read_unlock();
+	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+		rcu_read_unlock();
 }
 
 static inline bool kvm_pgtable_walk_lock_held(void)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index b5b91a8828365..d6f3753cb87ed 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -289,9 +289,9 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	};
 	int r;
 
-	kvm_pgtable_walk_begin();
+	kvm_pgtable_walk_begin(walker);
 	r = _kvm_pgtable_walk(pgt, &walk_data);
-	kvm_pgtable_walk_end();
+	kvm_pgtable_walk_end(walker);
 
 	return r;
 }
-- 
GitLab


From 5e806c5812e8012a83496cf96bdba266b3aec428 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 18 Nov 2022 18:22:22 +0000
Subject: [PATCH 1952/4122] KVM: arm64: Reject shared table walks in the hyp
 code

Exclusive table walks are the only supported table walk in the hyp, as
there is no construct like RCU available in the hypervisor code. Reject
any attempt to do a shared table walk by returning an error and allowing
the caller to clean up the mess.

Suggested-by: Will Deacon <will@kernel.org>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Acked-by: Will Deacon <will@kernel.org>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118182222.3932898-4-oliver.upton@linux.dev
---
 arch/arm64/include/asm/kvm_pgtable.h | 17 +++++++++++++++--
 arch/arm64/kvm/hyp/pgtable.c         |  5 ++++-
 2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 4b6b52ebc11c3..d5cb01f8dc066 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -229,7 +229,18 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
 	return pteref;
 }
 
-static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker) {}
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+{
+	/*
+	 * Due to the lack of RCU (or a similar protection scheme), only
+	 * non-shared table walkers are allowed in the hypervisor.
+	 */
+	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
+		return -EPERM;
+
+	return 0;
+}
+
 static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker) {}
 
 static inline bool kvm_pgtable_walk_lock_held(void)
@@ -247,10 +258,12 @@ static inline kvm_pte_t *kvm_dereference_pteref(struct kvm_pgtable_walker *walke
 	return rcu_dereference_check(pteref, !(walker->flags & KVM_PGTABLE_WALK_SHARED));
 }
 
-static inline void kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
+static inline int kvm_pgtable_walk_begin(struct kvm_pgtable_walker *walker)
 {
 	if (walker->flags & KVM_PGTABLE_WALK_SHARED)
 		rcu_read_lock();
+
+	return 0;
 }
 
 static inline void kvm_pgtable_walk_end(struct kvm_pgtable_walker *walker)
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index d6f3753cb87ed..58dbe0ab567fe 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -289,7 +289,10 @@ int kvm_pgtable_walk(struct kvm_pgtable *pgt, u64 addr, u64 size,
 	};
 	int r;
 
-	kvm_pgtable_walk_begin(walker);
+	r = kvm_pgtable_walk_begin(walker);
+	if (r)
+		return r;
+
 	r = _kvm_pgtable_walk(pgt, &walk_data);
 	kvm_pgtable_walk_end(walker);
 
-- 
GitLab


From 9907526d25c4ad8a6e3006487a544140776ba005 Mon Sep 17 00:00:00 2001
From: Mustafa Ismail <mustafa.ismail@intel.com>
Date: Mon, 21 Nov 2022 18:44:10 -0600
Subject: [PATCH 1953/4122] RDMA/irdma: Initialize net_type before checking it

The av->net_type is not initialized before it is checked in
irdma_modify_qp_roce. This leads to an incorrect update to the ARP cache
and QP context. RoCEv2 connections might fail as result.

Set the net_type using rdma_gid_attr_network_type.

Fixes: 80005c43d4c8 ("RDMA/irdma: Use net_type to check network type")
Signed-off-by: Mustafa Ismail <mustafa.ismail@intel.com>
Signed-off-by: Shiraz Saleem <shiraz.saleem@intel.com>
Link: https://lore.kernel.org/r/20221122004410.1471-1-shiraz.saleem@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/irdma/verbs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/infiniband/hw/irdma/verbs.c b/drivers/infiniband/hw/irdma/verbs.c
index dc3f5f3fee901..f6973ea55eda7 100644
--- a/drivers/infiniband/hw/irdma/verbs.c
+++ b/drivers/infiniband/hw/irdma/verbs.c
@@ -1213,6 +1213,7 @@ int irdma_modify_qp_roce(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		av->attrs = attr->ah_attr;
 		rdma_gid2ip((struct sockaddr *)&av->sgid_addr, &sgid_attr->gid);
 		rdma_gid2ip((struct sockaddr *)&av->dgid_addr, &attr->ah_attr.grh.dgid);
+		av->net_type = rdma_gid_attr_network_type(sgid_attr);
 		if (av->net_type == RDMA_NETWORK_IPV6) {
 			__be32 *daddr =
 				av->dgid_addr.saddr_in6.sin6_addr.in6_u.u6_addr32;
-- 
GitLab


From a115aa00b18f7b8982b8f458149632caf64a862a Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 19 Nov 2022 15:08:34 +0800
Subject: [PATCH 1954/4122] RDMA/hns: fix memory leak in hns_roce_alloc_mr()

When hns_roce_mr_enable() failed in hns_roce_alloc_mr(), mr_key is not
released. Compiled test only.

Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process")
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Link: https://lore.kernel.org/r/20221119070834.48502-1-shaozhengchao@huawei.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/hns/hns_roce_mr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_mr.c b/drivers/infiniband/hw/hns/hns_roce_mr.c
index 845ac7d3831f4..37a5cf62f88b4 100644
--- a/drivers/infiniband/hw/hns/hns_roce_mr.c
+++ b/drivers/infiniband/hw/hns/hns_roce_mr.c
@@ -392,10 +392,10 @@ struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
 
 	return &mr->ibmr;
 
-err_key:
-	free_mr_key(hr_dev, mr);
 err_pbl:
 	free_mr_pbl(hr_dev, mr);
+err_key:
+	free_mr_key(hr_dev, mr);
 err_free:
 	kfree(mr);
 	return ERR_PTR(ret);
-- 
GitLab


From a2430b25c31840a6dcbf95c65415d5fee2984dbc Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sat, 19 Nov 2022 04:15:50 +0900
Subject: [PATCH 1955/4122] kbuild: add kbuild-file macro

While building, installing, cleaning, Kbuild visits sub-directories
and includes 'Kbuild' or 'Makefile' that exists there.

Add 'kbuild-file' macro, and reuse it from scripts/Makefie.*

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
Reviewed-by: Alexander Lobakin <alobakin@pm.me>
Tested-by: Alexander Lobakin <alobakin@pm.me>
---
 scripts/Kbuild.include       | 5 +++++
 scripts/Makefile.asm-generic | 6 +++---
 scripts/Makefile.build       | 6 +-----
 scripts/Makefile.clean       | 5 +----
 scripts/Makefile.dtbinst     | 2 +-
 scripts/Makefile.modpost     | 2 +-
 6 files changed, 12 insertions(+), 14 deletions(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 2bc08ace38a3b..cbe28744637b7 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -40,6 +40,11 @@ escsq = $(subst $(squote),'\$(squote)',$1)
 # Quote a string to pass it to C files. foo => '"foo"'
 stringify = $(squote)$(quote)$1$(quote)$(squote)
 
+###
+# The path to Kbuild or Makefile. Kbuild has precedence over Makefile.
+kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
+kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile)
+
 ###
 # Easy method for doing a status message
        kecho := :
diff --git a/scripts/Makefile.asm-generic b/scripts/Makefile.asm-generic
index 1d501c57f9eff..8d01b37b76775 100644
--- a/scripts/Makefile.asm-generic
+++ b/scripts/Makefile.asm-generic
@@ -10,15 +10,15 @@ PHONY := all
 all:
 
 src := $(subst /generated,,$(obj))
--include $(src)/Kbuild
+
+include $(srctree)/scripts/Kbuild.include
+-include $(kbuild-file)
 
 # $(generic)/Kbuild lists mandatory-y. Exclude um since it is a special case.
 ifneq ($(SRCARCH),um)
 include $(srctree)/$(generic)/Kbuild
 endif
 
-include $(srctree)/scripts/Kbuild.include
-
 redundant := $(filter $(mandatory-y) $(generated-y), $(generic-y))
 redundant += $(foreach f, $(generic-y), $(if $(wildcard $(srctree)/$(src)/$(f)),$(f)))
 redundant := $(sort $(redundant))
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 41f3602fc8de7..37cf88d076e87 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -38,11 +38,7 @@ subdir-ccflags-y :=
 
 include $(srctree)/scripts/Kbuild.include
 include $(srctree)/scripts/Makefile.compiler
-
-# The filename Kbuild has precedence over Makefile
-kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
-include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile)
-
+include $(kbuild-file)
 include $(srctree)/scripts/Makefile.lib
 
 # Do not include hostprogs rules unless needed.
diff --git a/scripts/Makefile.clean b/scripts/Makefile.clean
index 878cec6489595..3649900696ddd 100644
--- a/scripts/Makefile.clean
+++ b/scripts/Makefile.clean
@@ -9,10 +9,7 @@ PHONY := __clean
 __clean:
 
 include $(srctree)/scripts/Kbuild.include
-
-# The filename Kbuild has precedence over Makefile
-kbuild-dir := $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
-include $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile)
+include $(kbuild-file)
 
 # Figure out what we need to build from the various variables
 # ==========================================================================
diff --git a/scripts/Makefile.dtbinst b/scripts/Makefile.dtbinst
index 190d781e84f4b..2ab936e4179da 100644
--- a/scripts/Makefile.dtbinst
+++ b/scripts/Makefile.dtbinst
@@ -15,7 +15,7 @@ __dtbs_install:
 
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
-include $(src)/Makefile
+include $(kbuild-file)
 
 dtbs    := $(addprefix $(dst)/, $(dtb-y) $(if $(CONFIG_OF_ALL_DTBS),$(dtb-)))
 subdirs := $(addprefix $(obj)/, $(subdir-y) $(subdir-m))
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index e41dee64d429c..55a72f5eb76dd 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -93,7 +93,7 @@ obj := $(KBUILD_EXTMOD)
 src := $(obj)
 
 # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS
-include $(or $(wildcard $(src)/Kbuild), $(src)/Makefile)
+include $(kbuild-file)
 
 module.symvers-if-present := $(wildcard Module.symvers)
 output-symdump := $(KBUILD_EXTMOD)/Module.symvers
-- 
GitLab


From 598afa050403ddbb015ad4d9f8e6b911c3c93d33 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sat, 19 Nov 2022 04:15:51 +0900
Subject: [PATCH 1956/4122] kbuild: warn objects shared among multiple modules

If an object is shared among multiple modules, and some of them are
configured as 'm', but the others as 'y', the shared object is built
as modular, then linked to the modules and vmlinux. This is a potential
issue because the expected CFLAGS are different between modules and
builtins.

Commit 637a642f5ca5 ("zstd: Fixing mixed module-builtin objects")
reported that this could be even more fatal in some cases such as
Clang LTO.

That commit fixed lib/zlib/zstd_{compress,decompress}, but there are
still more instances of breakage.

This commit adds a W=1 warning for shared objects, so that the kbuild
test robot, which provides build tests with W=1, will avoid a new
breakage slipping in.

Quick compile tests on v6.1-rc4 detected the following:

scripts/Makefile.build:252: ./drivers/block/rnbd/Makefile: rnbd-common.o is added to multiple modules: rnbd-client rnbd-server
scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: cn10k_cpt.o is added to multiple modules: rvu_cptpf rvu_cptvf
scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cptlf.o is added to multiple modules: rvu_cptpf rvu_cptvf
scripts/Makefile.build:252: ./drivers/crypto/marvell/octeontx2/Makefile: otx2_cpt_mbox_common.o is added to multiple modules: rvu_cptpf rvu_cptvf
scripts/Makefile.build:252: ./drivers/edac/Makefile: skx_common.o is added to multiple modules: i10nm_edac skx_edac
scripts/Makefile.build:252: ./drivers/gpu/drm/bridge/imx/Makefile: imx-ldb-helper.o is added to multiple modules: imx8qm-ldb imx8qxp-ldb
scripts/Makefile.build:252: ./drivers/mfd/Makefile: rsmu_core.o is added to multiple modules: rsmu-i2c rsmu-spi
scripts/Makefile.build:252: ./drivers/mtd/tests/Makefile: mtd_test.o is added to multiple modules: mtd_nandbiterrs mtd_oobtest mtd_pagetest mtd_readtest mtd_speedtest mtd_stresstest mtd_subpagetest mtd_torturetest
scripts/Makefile.build:252: ./drivers/net/dsa/ocelot/Makefile: felix.o is added to multiple modules: mscc_felix mscc_seville
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_pf_device.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn23xx_vf_device.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn66xx_device.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: cn68xx_device.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_core.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: lio_ethtool.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_device.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_droq.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mailbox.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_mem_ops.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: octeon_nic.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: request_manager.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/cavium/liquidio/Makefile: response_manager.o is added to multiple modules: liquidio liquidio_vf
scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpaa2-mac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch
scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/dpaa2/Makefile: dpmac.o is added to multiple modules: fsl-dpaa2-eth fsl-dpaa2-switch
scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_cbdr.o is added to multiple modules: fsl-enetc fsl-enetc-vf
scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc_ethtool.o is added to multiple modules: fsl-enetc fsl-enetc-vf
scripts/Makefile.build:252: ./drivers/net/ethernet/freescale/enetc/Makefile: enetc.o is added to multiple modules: fsl-enetc fsl-enetc-vf
scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_cmd.o is added to multiple modules: hclge hclgevf
scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_rss.o is added to multiple modules: hclge hclgevf
scripts/Makefile.build:252: ./drivers/net/ethernet/hisilicon/hns3/Makefile: hns3_common/hclge_comm_tqp_stats.o is added to multiple modules: hclge hclgevf
scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_dcbnl.o is added to multiple modules: rvu_nicpf rvu_nicvf
scripts/Makefile.build:252: ./drivers/net/ethernet/marvell/octeontx2/nic/Makefile: otx2_devlink.o is added to multiple modules: rvu_nicpf rvu_nicvf
scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ale.o is added to multiple modules: keystone_netcp keystone_netcp_ethss ti_cpsw ti_cpsw_new
scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_ethtool.o is added to multiple modules: ti_cpsw ti_cpsw_new
scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_priv.o is added to multiple modules: ti_cpsw ti_cpsw_new
scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: cpsw_sl.o is added to multiple modules: ti_cpsw ti_cpsw_new
scripts/Makefile.build:252: ./drivers/net/ethernet/ti/Makefile: davinci_cpdma.o is added to multiple modules: ti_cpsw ti_cpsw_new ti_davinci_emac
scripts/Makefile.build:252: ./drivers/platform/x86/intel/int3472/Makefile: common.o is added to multiple modules: intel_skl_int3472_discrete intel_skl_int3472_tps68470
scripts/Makefile.build:252: ./sound/soc/codecs/Makefile: wcd-clsh-v2.o is added to multiple modules: snd-soc-wcd9335 snd-soc-wcd934x snd-soc-wcd938x

Once all the warnings are fixed, it can become an error without the
W= option.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Alexander Lobakin <alobakin@pm.me>
Tested-by: Alexander Lobakin <alobakin@pm.me>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 scripts/Makefile.build | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 37cf88d076e87..799df12b53f39 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -222,6 +222,10 @@ endif
 
 cmd_check_local_export = $(srctree)/scripts/check-local-export $@
 
+ifneq ($(findstring 1, $(KBUILD_EXTRA_WARN)),)
+cmd_warn_shared_object = $(if $(word 2, $(modname-multi)),$(warning $(kbuild-file): $*.o is added to multiple modules: $(modname-multi)))
+endif
+
 define rule_cc_o_c
 	$(call cmd_and_fixdep,cc_o_c)
 	$(call cmd,gen_ksymdeps)
@@ -231,6 +235,7 @@ define rule_cc_o_c
 	$(call cmd,gen_objtooldep)
 	$(call cmd,gen_symversions_c)
 	$(call cmd,record_mcount)
+	$(call cmd,warn_shared_object)
 endef
 
 define rule_as_o_S
@@ -239,6 +244,7 @@ define rule_as_o_S
 	$(call cmd,check_local_export)
 	$(call cmd,gen_objtooldep)
 	$(call cmd,gen_symversions_S)
+	$(call cmd,warn_shared_object)
 endef
 
 # Built-in and composite module parts
-- 
GitLab


From 73f5fc5f884ad0c5f7d57f66303af64f9f002526 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Mon, 21 Nov 2022 08:20:22 +0000
Subject: [PATCH 1957/4122] iommu/fsl_pamu: Fix resource leak in
 fsl_pamu_probe()

The fsl_pamu_probe() returns directly when create_csd() failed, leaving
irq and memories unreleased.
Fix by jumping to error if create_csd() returns error.

Fixes: 695093e38c3e ("iommu/fsl: Freescale PAMU driver and iommu implementation.")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221121082022.19091-1-yuancan@huawei.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/fsl_pamu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
index 1b53d2da2c191..1a8c85d54123f 100644
--- a/drivers/iommu/fsl_pamu.c
+++ b/drivers/iommu/fsl_pamu.c
@@ -868,7 +868,7 @@ static int fsl_pamu_probe(struct platform_device *pdev)
 		ret = create_csd(ppaact_phys, mem_size, csd_port_id);
 		if (ret) {
 			dev_err(dev, "could not create coherence subdomain\n");
-			return ret;
+			goto error;
 		}
 	}
 
-- 
GitLab


From ddacd6ef44cac60c4fb8cd1a994fb13e32c1c761 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Fri, 11 Nov 2022 15:48:21 +0100
Subject: [PATCH 1958/4122] usb: fotg210: Fix Kconfig for USB host modules

The kernel robot reports a link failure when activating the
FOTG210 host subdriver with =y on a system where the USB host
core is a module (CONFIG_USB=m).

This is a bit of special case, so mimic the Kconfig incantations
from DWC3: let the subdrivers for host or peripheral depend
on the host or gadget support being =y or the same as the
FOTG210 core itself.

This should ensure that either:

- The host (CONFIG_USB) or gadget (CONFIG_GADGET) is compiled
  in and then the FOTG210 can be either module or compiled
  in.

- The host or gadget is modular, and then the FOTG210 module
  must be a module too, or we cannot resolve the symbols
  at link time.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/linux-usb/202211112132.0BUPGKCd-lkp@intel.com/
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221111144821.113665-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig
index 534206ee0d1db..2db6ac9f80746 100644
--- a/drivers/usb/fotg210/Kconfig
+++ b/drivers/usb/fotg210/Kconfig
@@ -14,7 +14,7 @@ if USB_FOTG210
 
 config USB_FOTG210_HCD
 	bool "Faraday FOTG210 USB Host Controller support"
-	depends on USB
+	depends on USB=y || USB=USB_FOTG210
 	help
 	  Faraday FOTG210 is an OTG controller which can be configured as
 	  an USB2.0 host. It is designed to meet USB2.0 EHCI specification
@@ -24,7 +24,7 @@ config USB_FOTG210_HCD
 	  module will be called fotg210-hcd.
 
 config USB_FOTG210_UDC
-	depends on USB_GADGET
+	depends on USB_GADGET=y || USB_GADGET=USB_FOTG210
 	bool "Faraday FOTG210 USB Peripheral Controller support"
 	help
 	   Faraday USB2.0 OTG controller which can be configured as
-- 
GitLab


From 6d36e0e1a14ac9a382c7a157bce5354fd8b68134 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 21 Nov 2022 16:22:19 +0100
Subject: [PATCH 1959/4122] usb: USB_FOTG210 should depend on ARCH_GEMINI

The Faraday Technology FOTG210 USB2 Dual Role Controller is only present
on Cortina Systems Gemini SoCs.  Hence add a dependency on ARCH_GEMINI,
to prevent asking the user about its drivers when configuring a kernel
without Cortina Systems Gemini SoC support.

Fixes: 1dd33a9f1b95ab59 ("usb: fotg210: Collect pieces of dual mode controller")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/a989b3b798ecaf3b45f35160e30e605636d66a77.1669044086.git.geert+renesas@glider.be
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/fotg210/Kconfig b/drivers/usb/fotg210/Kconfig
index 2db6ac9f80746..2b05968735baa 100644
--- a/drivers/usb/fotg210/Kconfig
+++ b/drivers/usb/fotg210/Kconfig
@@ -4,6 +4,7 @@ config USB_FOTG210
 	tristate "Faraday FOTG210 USB2 Dual Role controller"
 	depends on USB || USB_GADGET
 	depends on HAS_DMA && HAS_IOMEM
+	depends on ARCH_GEMINI || COMPILE_TEST
 	default ARCH_GEMINI
 	select MFD_SYSCON
 	help
-- 
GitLab


From d40eaada4209959264be63b21e18e15030db0a38 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Nov 2022 12:51:58 +0100
Subject: [PATCH 1960/4122] fotg210-udc: Use dev pointer in probe and
 dev_messages

Add a local struct device *dev pointer and use dev_err()
etc to report status.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221114115201.302887-1-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 3c357ce42d3b1..b3106e4b31944 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -1091,6 +1091,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	struct resource *res, *ires;
 	struct fotg210_udc *fotg210 = NULL;
 	struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP];
+	struct device *dev = &pdev->dev;
 	int ret = 0;
 	int i;
 
@@ -1122,7 +1123,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 
 	fotg210->reg = ioremap(res->start, resource_size(res));
 	if (fotg210->reg == NULL) {
-		pr_err("ioremap error.\n");
+		dev_err(dev, "ioremap error\n");
 		goto err_alloc;
 	}
 
@@ -1133,8 +1134,8 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	fotg210->gadget.ops = &fotg210_gadget_ops;
 
 	fotg210->gadget.max_speed = USB_SPEED_HIGH;
-	fotg210->gadget.dev.parent = &pdev->dev;
-	fotg210->gadget.dev.dma_mask = pdev->dev.dma_mask;
+	fotg210->gadget.dev.parent = dev;
+	fotg210->gadget.dev.dma_mask = dev->dma_mask;
 	fotg210->gadget.name = udc_name;
 
 	INIT_LIST_HEAD(&fotg210->gadget.ep_list);
@@ -1180,15 +1181,15 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	ret = request_irq(ires->start, fotg210_irq, IRQF_SHARED,
 			  udc_name, fotg210);
 	if (ret < 0) {
-		pr_err("request_irq error (%d)\n", ret);
+		dev_err(dev, "request_irq error (%d)\n", ret);
 		goto err_req;
 	}
 
-	ret = usb_add_gadget_udc(&pdev->dev, &fotg210->gadget);
+	ret = usb_add_gadget_udc(dev, &fotg210->gadget);
 	if (ret)
 		goto err_add_udc;
 
-	dev_info(&pdev->dev, "version %s\n", DRIVER_VERSION);
+	dev_info(dev, "version %s\n", DRIVER_VERSION);
 
 	return 0;
 
-- 
GitLab


From 5f217ccd520f155c2e3b3dd95627140dd5ec947e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Nov 2022 12:51:59 +0100
Subject: [PATCH 1961/4122] fotg210-udc: Support optional external PHY

This adds support for an optional external PHY to the FOTG210
UDC driver.

Tested with the GPIO VBUS PHY driver on the Gemini SoC.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221114115201.302887-2-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 72 +++++++++++++++++++++++++++++++
 drivers/usb/fotg210/fotg210-udc.h |  2 +
 2 files changed, 74 insertions(+)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index b3106e4b31944..4026103330e1d 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -15,6 +15,8 @@
 #include <linux/platform_device.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb/otg.h>
+#include <linux/usb/phy.h>
 
 #include "fotg210.h"
 #include "fotg210-udc.h"
@@ -1008,11 +1010,19 @@ static int fotg210_udc_start(struct usb_gadget *g,
 {
 	struct fotg210_udc *fotg210 = gadget_to_fotg210(g);
 	u32 value;
+	int ret;
 
 	/* hook up the driver */
 	driver->driver.bus = NULL;
 	fotg210->driver = driver;
 
+	if (!IS_ERR_OR_NULL(fotg210->phy)) {
+		ret = otg_set_peripheral(fotg210->phy->otg,
+					 &fotg210->gadget);
+		if (ret)
+			dev_err(fotg210->dev, "can't bind to phy\n");
+	}
+
 	/* enable device global interrupt */
 	value = ioread32(fotg210->reg + FOTG210_DMCR);
 	value |= DMCR_GLINT_EN;
@@ -1054,6 +1064,9 @@ static int fotg210_udc_stop(struct usb_gadget *g)
 	struct fotg210_udc *fotg210 = gadget_to_fotg210(g);
 	unsigned long	flags;
 
+	if (!IS_ERR_OR_NULL(fotg210->phy))
+		return otg_set_peripheral(fotg210->phy->otg, NULL);
+
 	spin_lock_irqsave(&fotg210->lock, flags);
 
 	fotg210_init(fotg210);
@@ -1069,12 +1082,50 @@ static const struct usb_gadget_ops fotg210_gadget_ops = {
 	.udc_stop		= fotg210_udc_stop,
 };
 
+/**
+ * fotg210_phy_event - Called by phy upon VBus event
+ * @nb: notifier block
+ * @action: phy action, is vbus connect or disconnect
+ * @data: the usb_gadget structure in fotg210
+ *
+ * Called by the USB Phy when a cable connect or disconnect is sensed.
+ *
+ * Returns NOTIFY_OK or NOTIFY_DONE
+ */
+static int fotg210_phy_event(struct notifier_block *nb, unsigned long action,
+			     void *data)
+{
+	struct usb_gadget *gadget = data;
+
+	if (!gadget)
+		return NOTIFY_DONE;
+
+	switch (action) {
+	case USB_EVENT_VBUS:
+		usb_gadget_vbus_connect(gadget);
+		return NOTIFY_OK;
+	case USB_EVENT_NONE:
+		usb_gadget_vbus_disconnect(gadget);
+		return NOTIFY_OK;
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+static struct notifier_block fotg210_phy_notifier = {
+	.notifier_call = fotg210_phy_event,
+};
+
 int fotg210_udc_remove(struct platform_device *pdev)
 {
 	struct fotg210_udc *fotg210 = platform_get_drvdata(pdev);
 	int i;
 
 	usb_del_gadget_udc(&fotg210->gadget);
+	if (!IS_ERR_OR_NULL(fotg210->phy)) {
+		usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier);
+		usb_put_phy(fotg210->phy);
+	}
 	iounmap(fotg210->reg);
 	free_irq(platform_get_irq(pdev, 0), fotg210);
 
@@ -1114,6 +1165,22 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	if (fotg210 == NULL)
 		goto err;
 
+	fotg210->dev = dev;
+
+	fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0);
+	if (IS_ERR(fotg210->phy)) {
+		ret = PTR_ERR(fotg210->phy);
+		if (ret == -EPROBE_DEFER)
+			goto err;
+		dev_info(dev, "no PHY found\n");
+		fotg210->phy = NULL;
+	} else {
+		ret = usb_phy_init(fotg210->phy);
+		if (ret)
+			goto err;
+		dev_info(dev, "found and initialized PHY\n");
+	}
+
 	for (i = 0; i < FOTG210_MAX_NUM_EP; i++) {
 		_ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL);
 		if (_ep[i] == NULL)
@@ -1185,6 +1252,9 @@ int fotg210_udc_probe(struct platform_device *pdev)
 		goto err_req;
 	}
 
+	if (!IS_ERR_OR_NULL(fotg210->phy))
+		usb_register_notifier(fotg210->phy, &fotg210_phy_notifier);
+
 	ret = usb_add_gadget_udc(dev, &fotg210->gadget);
 	if (ret)
 		goto err_add_udc;
@@ -1194,6 +1264,8 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	return 0;
 
 err_add_udc:
+	if (!IS_ERR_OR_NULL(fotg210->phy))
+		usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier);
 	free_irq(ires->start, fotg210);
 
 err_req:
diff --git a/drivers/usb/fotg210/fotg210-udc.h b/drivers/usb/fotg210/fotg210-udc.h
index 08c32957503be..e3067d22a895c 100644
--- a/drivers/usb/fotg210/fotg210-udc.h
+++ b/drivers/usb/fotg210/fotg210-udc.h
@@ -234,6 +234,8 @@ struct fotg210_udc {
 
 	unsigned long		irq_trigger;
 
+	struct device			*dev;
+	struct usb_phy			*phy;
 	struct usb_gadget		gadget;
 	struct usb_gadget_driver	*driver;
 
-- 
GitLab


From 718a38d092ec920dd84a5e25510cc6721f527c3e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Nov 2022 12:52:00 +0100
Subject: [PATCH 1962/4122] fotg210-udc: Handle PCLK

This adds optional handling of the peripheral clock PCLK.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221114115201.302887-3-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 30 ++++++++++++++++++++++++++++--
 drivers/usb/fotg210/fotg210-udc.h |  1 +
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 4026103330e1d..de0f72ca103c5 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -15,6 +15,7 @@
 #include <linux/platform_device.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/clk.h>
 #include <linux/usb/otg.h>
 #include <linux/usb/phy.h>
 
@@ -1132,6 +1133,10 @@ int fotg210_udc_remove(struct platform_device *pdev)
 	fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req);
 	for (i = 0; i < FOTG210_MAX_NUM_EP; i++)
 		kfree(fotg210->ep[i]);
+
+	if (!IS_ERR(fotg210->pclk))
+		clk_disable_unprepare(fotg210->pclk);
+
 	kfree(fotg210);
 
 	return 0;
@@ -1167,17 +1172,34 @@ int fotg210_udc_probe(struct platform_device *pdev)
 
 	fotg210->dev = dev;
 
+	/* It's OK not to supply this clock */
+	fotg210->pclk = devm_clk_get(dev, "PCLK");
+	if (!IS_ERR(fotg210->pclk)) {
+		ret = clk_prepare_enable(fotg210->pclk);
+		if (ret) {
+			dev_err(dev, "failed to enable PCLK\n");
+			return ret;
+		}
+	} else if (PTR_ERR(fotg210->pclk) == -EPROBE_DEFER) {
+		/*
+		 * Percolate deferrals, for anything else,
+		 * just live without the clocking.
+		 */
+		ret = -EPROBE_DEFER;
+		goto err;
+	}
+
 	fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0);
 	if (IS_ERR(fotg210->phy)) {
 		ret = PTR_ERR(fotg210->phy);
 		if (ret == -EPROBE_DEFER)
-			goto err;
+			goto err_pclk;
 		dev_info(dev, "no PHY found\n");
 		fotg210->phy = NULL;
 	} else {
 		ret = usb_phy_init(fotg210->phy);
 		if (ret)
-			goto err;
+			goto err_pclk;
 		dev_info(dev, "found and initialized PHY\n");
 	}
 
@@ -1277,6 +1299,10 @@ err_map:
 err_alloc:
 	for (i = 0; i < FOTG210_MAX_NUM_EP; i++)
 		kfree(fotg210->ep[i]);
+err_pclk:
+	if (!IS_ERR(fotg210->pclk))
+		clk_disable_unprepare(fotg210->pclk);
+
 	kfree(fotg210);
 
 err:
diff --git a/drivers/usb/fotg210/fotg210-udc.h b/drivers/usb/fotg210/fotg210-udc.h
index e3067d22a895c..fadb57ca8d786 100644
--- a/drivers/usb/fotg210/fotg210-udc.h
+++ b/drivers/usb/fotg210/fotg210-udc.h
@@ -231,6 +231,7 @@ struct fotg210_ep {
 struct fotg210_udc {
 	spinlock_t		lock; /* protect the struct */
 	void __iomem		*reg;
+	struct clk		*pclk;
 
 	unsigned long		irq_trigger;
 
-- 
GitLab


From f8b729ce97f66807f6b958e891888d0b1ed20a9e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Mon, 14 Nov 2022 12:52:01 +0100
Subject: [PATCH 1963/4122] fotg210-udc: Get IRQ using platform_get_irq()

The platform_get_irq() is necessary to use to get dynamic
IRQ resolution when instantiating the device from the
device tree. IRQs are not passed as resources in that
case.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221114115201.302887-4-linus.walleij@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index de0f72ca103c5..44dfe66e189cd 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -1144,10 +1144,11 @@ int fotg210_udc_remove(struct platform_device *pdev)
 
 int fotg210_udc_probe(struct platform_device *pdev)
 {
-	struct resource *res, *ires;
+	struct resource *res;
 	struct fotg210_udc *fotg210 = NULL;
 	struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP];
 	struct device *dev = &pdev->dev;
+	int irq;
 	int ret = 0;
 	int i;
 
@@ -1157,9 +1158,9 @@ int fotg210_udc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	ires = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (!ires) {
-		pr_err("platform_get_resource IORESOURCE_IRQ error.\n");
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		pr_err("could not get irq\n");
 		return -ENODEV;
 	}
 
@@ -1189,7 +1190,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	fotg210->phy = devm_usb_get_phy_by_phandle(dev->parent, "usb-phy", 0);
+	fotg210->phy = devm_usb_get_phy_by_phandle(dev, "usb-phy", 0);
 	if (IS_ERR(fotg210->phy)) {
 		ret = PTR_ERR(fotg210->phy);
 		if (ret == -EPROBE_DEFER)
@@ -1267,7 +1268,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 
 	fotg210_disable_unplug(fotg210);
 
-	ret = request_irq(ires->start, fotg210_irq, IRQF_SHARED,
+	ret = request_irq(irq, fotg210_irq, IRQF_SHARED,
 			  udc_name, fotg210);
 	if (ret < 0) {
 		dev_err(dev, "request_irq error (%d)\n", ret);
@@ -1288,7 +1289,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 err_add_udc:
 	if (!IS_ERR_OR_NULL(fotg210->phy))
 		usb_unregister_notifier(fotg210->phy, &fotg210_phy_notifier);
-	free_irq(ires->start, fotg210);
+	free_irq(irq, fotg210);
 
 err_req:
 	fotg210_ep_free_request(&fotg210->ep[0]->ep, fotg210->ep0_req);
-- 
GitLab


From 202f785b1863d8feef53f6489afd9abcb744e7bf Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 14 Nov 2022 21:38:04 +0100
Subject: [PATCH 1964/4122] usb: fotg210-udc: Remove a useless assignment

There is no need to use an intermediate array for these memory allocations,
so, axe it.

While at it, turn a '== NULL' into a shorter '!' when testing memory
allocation failure.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/deab9696fc4000499470e7ccbca7c36fca17bd4e.1668458274.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index 44dfe66e189cd..b9ea6c6d931c6 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -1146,7 +1146,6 @@ int fotg210_udc_probe(struct platform_device *pdev)
 {
 	struct resource *res;
 	struct fotg210_udc *fotg210 = NULL;
-	struct fotg210_ep *_ep[FOTG210_MAX_NUM_EP];
 	struct device *dev = &pdev->dev;
 	int irq;
 	int ret = 0;
@@ -1205,10 +1204,9 @@ int fotg210_udc_probe(struct platform_device *pdev)
 	}
 
 	for (i = 0; i < FOTG210_MAX_NUM_EP; i++) {
-		_ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL);
-		if (_ep[i] == NULL)
+		fotg210->ep[i] = kzalloc(sizeof(struct fotg210_ep), GFP_KERNEL);
+		if (!fotg210->ep[i])
 			goto err_alloc;
-		fotg210->ep[i] = _ep[i];
 	}
 
 	fotg210->reg = ioremap(res->start, resource_size(res));
-- 
GitLab


From 488c2c67463cc704715e9d4b68c9edfcc20f299d Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Tue, 15 Nov 2022 11:31:53 +0100
Subject: [PATCH 1965/4122] MAINTAINERS: rectify entry for MICROCHIP USB251XB
 DRIVER

Commit fff61d4ccf3d ("dt-bindings: usb: usb251xb: Convert to YAML schema")
converts usb251xb.txt to usb251xb.yaml, but misses to adjust its reference
in MAINTAINERS.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains about a
broken reference.

Repair this file reference in MICROCHIP USB251XB DRIVER.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20221115103153.28502-1-lukas.bulwahn@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2a8c456c184e0..8cb2c2b7d6cf8 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13644,7 +13644,7 @@ MICROCHIP USB251XB DRIVER
 M:	Richard Leitner <richard.leitner@skidata.com>
 L:	linux-usb@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/usb/usb251xb.txt
+F:	Documentation/devicetree/bindings/usb/usb251xb.yaml
 F:	drivers/usb/misc/usb251xb.c
 
 MICROCHIP USBA UDC DRIVER
-- 
GitLab


From 7b462b05e47adaf11358f5c2c24db85c487b613e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 11 Nov 2022 16:57:24 +0000
Subject: [PATCH 1966/4122] usb: ftdi-elan: remove variable l

Variable l is just being accumulated and it's never used
anywhere else. The variable and the addition are redundant so
remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221111165724.557152-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/ftdi-elan.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 33b35788bd0ba..8ce191e3a4c06 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -1624,7 +1624,6 @@ wait:if (ftdi->disconnected > 0) {
 			char data[30 *3 + 4];
 			char *d = data;
 			int m = (sizeof(data) - 1) / 3 - 1;
-			int l = 0;
 			struct u132_target *target = &ftdi->target[ed];
 			struct u132_command *command = &ftdi->command[
 				COMMAND_MASK & ftdi->command_next];
@@ -1647,7 +1646,6 @@ wait:if (ftdi->disconnected > 0) {
 				} else if (i++ < m) {
 					int w = sprintf(d, " %02X", *b++);
 					d += w;
-					l += w;
 				} else
 					d += sprintf(d, " ..");
 			}
-- 
GitLab


From b6ddd180e3d9f92c1e482b3cdeec7dda086b1341 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 14 Nov 2022 17:59:24 +0100
Subject: [PATCH 1967/4122] usb: typec: Check for ops->exit instead of
 ops->enter in altmode_exit

typec_altmode_exit checks if ops->enter is not NULL but then calls
ops->exit a few lines below. Fix that and check for the function
pointer it's about to call instead.

Fixes: 8a37d87d72f0 ("usb: typec: Bus type for alternate modes")
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221114165924.33487-1-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/bus.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/bus.c b/drivers/usb/typec/bus.c
index 26ea2fdec17dc..31c2a3130cadb 100644
--- a/drivers/usb/typec/bus.c
+++ b/drivers/usb/typec/bus.c
@@ -134,7 +134,7 @@ int typec_altmode_exit(struct typec_altmode *adev)
 	if (!adev || !adev->active)
 		return 0;
 
-	if (!pdev->ops || !pdev->ops->enter)
+	if (!pdev->ops || !pdev->ops->exit)
 		return -EOPNOTSUPP;
 
 	/* Moving to USB Safe State */
-- 
GitLab


From 6552ba4cd0841c23486368ed4feb2229e0abd1b3 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Wed, 16 Nov 2022 17:06:00 +0200
Subject: [PATCH 1968/4122] dt-bindings: usb: dwc3: Add SM8550 compatible

Document the SM8550 dwc3 compatible.

Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221116150600.3011160-1-abel.vesa@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/qcom,dwc3.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml
index a6e6abb4dfa9b..a3f8a3f498529 100644
--- a/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml
+++ b/Documentation/devicetree/bindings/usb/qcom,dwc3.yaml
@@ -39,6 +39,7 @@ properties:
           - qcom,sm8250-dwc3
           - qcom,sm8350-dwc3
           - qcom,sm8450-dwc3
+          - qcom,sm8550-dwc3
       - const: qcom,dwc3
 
   reg:
@@ -301,6 +302,7 @@ allOf:
               - qcom,sm8150-dwc3
               - qcom,sm8250-dwc3
               - qcom,sm8450-dwc3
+              - qcom,sm8550-dwc3
     then:
       properties:
         clocks:
@@ -358,6 +360,7 @@ allOf:
               - qcom,sm8250-dwc3
               - qcom,sm8350-dwc3
               - qcom,sm8450-dwc3
+              - qcom,sm8550-dwc3
     then:
       properties:
         interrupts:
-- 
GitLab


From 0384e87e3fec735e47f1c133c796f32ef7a72a9b Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 21 Nov 2022 14:24:16 +0800
Subject: [PATCH 1969/4122] usb: typec: tcpci: fix of node refcount leak in
 tcpci_register_port()

I got the following report while doing device(mt6370-tcpc) load
test with CONFIG_OF_UNITTEST and CONFIG_OF_DYNAMIC enabled:

  OF: ERROR: memory leak, expected refcount 1 instead of 2,
  of_node_get()/of_node_put() unbalanced - destroy cset entry:
  attach overlay node /i2c/pmic@34/tcpc/connector

The 'fwnode' set in tcpci_parse_config() which is called
in tcpci_register_port(), its node refcount is increased
in device_get_named_child_node(). It needs be put while
exiting, so call fwnode_handle_put() in the error path of
tcpci_register_port() and in tcpci_unregister_port() to
avoid leak.

Fixes: 5e85a04c8c0d ("usb: typec: add fwnode to tcpc")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221121062416.1026192-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index b2bfcebe218f0..72f8d1e876004 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -794,8 +794,10 @@ struct tcpci *tcpci_register_port(struct device *dev, struct tcpci_data *data)
 		return ERR_PTR(err);
 
 	tcpci->port = tcpm_register_port(tcpci->dev, &tcpci->tcpc);
-	if (IS_ERR(tcpci->port))
+	if (IS_ERR(tcpci->port)) {
+		fwnode_handle_put(tcpci->tcpc.fwnode);
 		return ERR_CAST(tcpci->port);
+	}
 
 	return tcpci;
 }
@@ -804,6 +806,7 @@ EXPORT_SYMBOL_GPL(tcpci_register_port);
 void tcpci_unregister_port(struct tcpci *tcpci)
 {
 	tcpm_unregister_port(tcpci->port);
+	fwnode_handle_put(tcpci->tcpc.fwnode);
 }
 EXPORT_SYMBOL_GPL(tcpci_unregister_port);
 
-- 
GitLab


From e99e1a7d6f88b9c54dc32671bac29f26e58bde80 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Fri, 18 Nov 2022 19:01:16 +0800
Subject: [PATCH 1970/4122] usb: host: xhci-mtk: omit shared hcd if either root
 hub has no ports

There is error log when add a usb3 root hub without ports:
"hub 4-0:1.0: config failed, hub doesn't have any ports! (err -19)"

so omit the shared hcd if either of the root hubs has no ports, but
usually there is no usb3 port.

Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20221118110116.20165-1-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c | 72 +++++++++++++++++++++++--------------
 1 file changed, 46 insertions(+), 26 deletions(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index 01705e559c422..cff3c4aea0362 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -485,6 +485,7 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 	const struct hc_driver *driver;
 	struct xhci_hcd *xhci;
 	struct resource *res;
+	struct usb_hcd *usb3_hcd;
 	struct usb_hcd *hcd;
 	int ret = -ENODEV;
 	int wakeup_irq;
@@ -593,6 +594,7 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 
 	xhci = hcd_to_xhci(hcd);
 	xhci->main_hcd = hcd;
+	xhci->allow_single_roothub = 1;
 
 	/*
 	 * imod_interval is the interrupt moderation value in nanoseconds.
@@ -602,24 +604,29 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 	xhci->imod_interval = 5000;
 	device_property_read_u32(dev, "imod-interval-ns", &xhci->imod_interval);
 
-	xhci->shared_hcd = usb_create_shared_hcd(driver, dev,
-			dev_name(dev), hcd);
-	if (!xhci->shared_hcd) {
-		ret = -ENOMEM;
-		goto disable_device_wakeup;
-	}
-
 	ret = usb_add_hcd(hcd, irq, IRQF_SHARED);
 	if (ret)
-		goto put_usb3_hcd;
+		goto disable_device_wakeup;
 
-	if (HCC_MAX_PSA(xhci->hcc_params) >= 4 &&
+	if (!xhci_has_one_roothub(xhci)) {
+		xhci->shared_hcd = usb_create_shared_hcd(driver, dev,
+							 dev_name(dev), hcd);
+		if (!xhci->shared_hcd) {
+			ret = -ENOMEM;
+			goto dealloc_usb2_hcd;
+		}
+	}
+
+	usb3_hcd = xhci_get_usb3_hcd(xhci);
+	if (usb3_hcd && HCC_MAX_PSA(xhci->hcc_params) >= 4 &&
 	    !(xhci->quirks & XHCI_BROKEN_STREAMS))
-		xhci->shared_hcd->can_do_streams = 1;
+		usb3_hcd->can_do_streams = 1;
 
-	ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
-	if (ret)
-		goto dealloc_usb2_hcd;
+	if (xhci->shared_hcd) {
+		ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
+		if (ret)
+			goto put_usb3_hcd;
+	}
 
 	if (wakeup_irq > 0) {
 		ret = dev_pm_set_dedicated_wake_irq_reverse(dev, wakeup_irq);
@@ -641,13 +648,13 @@ dealloc_usb3_hcd:
 	usb_remove_hcd(xhci->shared_hcd);
 	xhci->shared_hcd = NULL;
 
-dealloc_usb2_hcd:
-	usb_remove_hcd(hcd);
-
 put_usb3_hcd:
-	xhci_mtk_sch_exit(mtk);
 	usb_put_hcd(xhci->shared_hcd);
 
+dealloc_usb2_hcd:
+	xhci_mtk_sch_exit(mtk);
+	usb_remove_hcd(hcd);
+
 disable_device_wakeup:
 	device_init_wakeup(dev, false);
 
@@ -679,10 +686,15 @@ static int xhci_mtk_remove(struct platform_device *pdev)
 	dev_pm_clear_wake_irq(dev);
 	device_init_wakeup(dev, false);
 
-	usb_remove_hcd(shared_hcd);
-	xhci->shared_hcd = NULL;
+	if (shared_hcd) {
+		usb_remove_hcd(shared_hcd);
+		xhci->shared_hcd = NULL;
+	}
 	usb_remove_hcd(hcd);
-	usb_put_hcd(shared_hcd);
+
+	if (shared_hcd)
+		usb_put_hcd(shared_hcd);
+
 	usb_put_hcd(hcd);
 	xhci_mtk_sch_exit(mtk);
 	clk_bulk_disable_unprepare(BULK_CLKS_NUM, mtk->clks);
@@ -700,13 +712,16 @@ static int __maybe_unused xhci_mtk_suspend(struct device *dev)
 	struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
 	struct usb_hcd *hcd = mtk->hcd;
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	struct usb_hcd *shared_hcd = xhci->shared_hcd;
 	int ret;
 
 	xhci_dbg(xhci, "%s: stop port polling\n", __func__);
 	clear_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 	del_timer_sync(&hcd->rh_timer);
-	clear_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
-	del_timer_sync(&xhci->shared_hcd->rh_timer);
+	if (shared_hcd) {
+		clear_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags);
+		del_timer_sync(&shared_hcd->rh_timer);
+	}
 
 	ret = xhci_mtk_host_disable(mtk);
 	if (ret)
@@ -718,8 +733,10 @@ static int __maybe_unused xhci_mtk_suspend(struct device *dev)
 
 restart_poll_rh:
 	xhci_dbg(xhci, "%s: restart port polling\n", __func__);
-	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
-	usb_hcd_poll_rh_status(xhci->shared_hcd);
+	if (shared_hcd) {
+		set_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags);
+		usb_hcd_poll_rh_status(shared_hcd);
+	}
 	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 	usb_hcd_poll_rh_status(hcd);
 	return ret;
@@ -730,6 +747,7 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev)
 	struct xhci_hcd_mtk *mtk = dev_get_drvdata(dev);
 	struct usb_hcd *hcd = mtk->hcd;
 	struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+	struct usb_hcd *shared_hcd = xhci->shared_hcd;
 	int ret;
 
 	usb_wakeup_set(mtk, false);
@@ -742,8 +760,10 @@ static int __maybe_unused xhci_mtk_resume(struct device *dev)
 		goto disable_clks;
 
 	xhci_dbg(xhci, "%s: restart port polling\n", __func__);
-	set_bit(HCD_FLAG_POLL_RH, &xhci->shared_hcd->flags);
-	usb_hcd_poll_rh_status(xhci->shared_hcd);
+	if (shared_hcd) {
+		set_bit(HCD_FLAG_POLL_RH, &shared_hcd->flags);
+		usb_hcd_poll_rh_status(shared_hcd);
+	}
 	set_bit(HCD_FLAG_POLL_RH, &hcd->flags);
 	usb_hcd_poll_rh_status(hcd);
 	return 0;
-- 
GitLab


From 19c220e9ab00f50edefb9667e3101e84a5112df2 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 14 Nov 2022 18:44:46 +0100
Subject: [PATCH 1971/4122] usb: typec: tipd: Cleanup resources if
 devm_tps6598_psy_register fails

We can't just return if devm_tps6598_psy_register fails since previous
resources are not devres managed and have yet to be cleaned up.

Fixes: 10eb0b6ac63a ("usb: typec: tps6598x: Export some power supply properties")
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221114174449.34634-1-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tipd/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 2a77bab948f54..83a7a82e55f10 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -814,7 +814,7 @@ static int tps6598x_probe(struct i2c_client *client)
 
 	ret = devm_tps6598_psy_register(tps);
 	if (ret)
-		return ret;
+		goto err_role_put;
 
 	tps->port = typec_register_port(&client->dev, &typec_cap);
 	if (IS_ERR(tps->port)) {
-- 
GitLab


From 782c70edc4852a5d39be12377a85501546236212 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 14 Nov 2022 18:44:47 +0100
Subject: [PATCH 1972/4122] usb: typec: tipd: Fix spurious fwnode_handle_put in
 error path

The err_role_put error path always calls fwnode_handle_put to release
the fwnode. This path can be reached after probe itself has already
released that fwnode though. Fix that by moving fwnode_handle_put in the
happy path to the very end.

Fixes: 18a6c866bb19 ("usb: typec: tps6598x: Add USB role switching logic")
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221114174449.34634-2-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tipd/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 83a7a82e55f10..59059310ba748 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -821,7 +821,6 @@ static int tps6598x_probe(struct i2c_client *client)
 		ret = PTR_ERR(tps->port);
 		goto err_role_put;
 	}
-	fwnode_handle_put(fwnode);
 
 	if (status & TPS_STATUS_PLUG_PRESENT) {
 		ret = tps6598x_read16(tps, TPS_REG_POWER_STATUS, &tps->pwr_status);
@@ -845,6 +844,7 @@ static int tps6598x_probe(struct i2c_client *client)
 	}
 
 	i2c_set_clientdata(client, tps);
+	fwnode_handle_put(fwnode);
 
 	return 0;
 
-- 
GitLab


From 4c8f27ba9ede0118cac9d775204f9b0ecdb877b0 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 14 Nov 2022 18:44:48 +0100
Subject: [PATCH 1973/4122] usb: typec: tipd: Fix typec_unregister_port error
 paths

typec_unregister_port is only called for some error paths after
typec_register_port was successful. Ensure it's called in all
cases.

Fixes: 92440202a880 ("usb: typec: tipd: Only update power status on IRQ")
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221114174449.34634-3-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tipd/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 59059310ba748..195c9c16f817f 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -826,7 +826,7 @@ static int tps6598x_probe(struct i2c_client *client)
 		ret = tps6598x_read16(tps, TPS_REG_POWER_STATUS, &tps->pwr_status);
 		if (ret < 0) {
 			dev_err(tps->dev, "failed to read power status: %d\n", ret);
-			goto err_role_put;
+			goto err_unregister_port;
 		}
 		ret = tps6598x_connect(tps, status);
 		if (ret)
@@ -839,8 +839,7 @@ static int tps6598x_probe(struct i2c_client *client)
 					dev_name(&client->dev), tps);
 	if (ret) {
 		tps6598x_disconnect(tps, 0);
-		typec_unregister_port(tps->port);
-		goto err_role_put;
+		goto err_unregister_port;
 	}
 
 	i2c_set_clientdata(client, tps);
@@ -848,6 +847,8 @@ static int tps6598x_probe(struct i2c_client *client)
 
 	return 0;
 
+err_unregister_port:
+	typec_unregister_port(tps->port);
 err_role_put:
 	usb_role_switch_put(tps->role_sw);
 err_fwnode_put:
-- 
GitLab


From 53a256ea9596ec78a9f5dd51f2b49c2355b15d6e Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 14 Nov 2022 18:44:49 +0100
Subject: [PATCH 1974/4122] usb: typec: tipd: Move tps6598x_disconnect error
 path to its own label

While the code currently correctly calls tps6598x_disconnect before jumping
to the error cleanup label it's inconsistent compared to all the other cleanup
actions and prone to introduce bugs if any more resources are added.

Signed-off-by: Sven Peter <sven@svenpeter.dev>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221114174449.34634-4-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tipd/core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 195c9c16f817f..982bd2cad931d 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -837,16 +837,16 @@ static int tps6598x_probe(struct i2c_client *client)
 					irq_handler,
 					IRQF_SHARED | IRQF_ONESHOT,
 					dev_name(&client->dev), tps);
-	if (ret) {
-		tps6598x_disconnect(tps, 0);
-		goto err_unregister_port;
-	}
+	if (ret)
+		goto err_disconnect;
 
 	i2c_set_clientdata(client, tps);
 	fwnode_handle_put(fwnode);
 
 	return 0;
 
+err_disconnect:
+	tps6598x_disconnect(tps, 0);
 err_unregister_port:
 	typec_unregister_port(tps->port);
 err_role_put:
-- 
GitLab


From ffbe2feac59b37c8dc536727552b4f375e1b9aec Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 18 Nov 2022 12:25:32 +0200
Subject: [PATCH 1975/4122] usb: musb: omap2430: Fix probe regression for
 missing resources

Probe for omap2430 glue layer is now broken for interrupt resources in
all cases.

Commit  239071064732 ("partially Revert "usb: musb: Set the DT node on the
child device"") broke probing for SoCs using ti-sysc interconnect target
module as the dt node is not found.

Commit a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from
DT core") caused omap3 to fail with error "-ENXIO: IRQ mc not found" as
the IRQ resources are no longer automatically populated from devicetree.

Let's fix the issues by calling device_set_of_node_from_dev() only if the
SoC has been updated to probe with ti-sysc. And for legacy SoCs, let's
populate the resources manually as needed.

Note that once we have updated the SoCs to probe with proper devicetree
data in all cases, this is no longer needed. But doing that requires
patching both devicetree and SoC code, so let's fix the probe issues first.

Fixes: a1a2b7125e10 ("of/platform: Drop static setup of IRQ resource from DT core")
Fixes: 239071064732 ("partially Revert "usb: musb: Set the DT node on the child device"")
Cc: H. Nikolaus Schaller <hns@goldelico.com>
Reported-by: Sicelo Mhlongo <absicsz@gmail.com>
Tested-by: Sicelo Mhlongo <absicsz@gmail.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Link: https://lore.kernel.org/r/20221118102532.34458-1-tony@atomide.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/omap2430.c | 54 +++++++++++++++++++++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index f571a65ae6ee2..476f55d1fec30 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -15,6 +15,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/of.h>
+#include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
@@ -310,6 +311,7 @@ static int omap2430_probe(struct platform_device *pdev)
 	struct device_node		*control_node;
 	struct platform_device		*control_pdev;
 	int				ret = -ENOMEM, val;
+	bool				populate_irqs = false;
 
 	if (!np)
 		return -ENODEV;
@@ -328,6 +330,18 @@ static int omap2430_probe(struct platform_device *pdev)
 	musb->dev.dma_mask		= &omap2430_dmamask;
 	musb->dev.coherent_dma_mask	= omap2430_dmamask;
 
+	/*
+	 * Legacy SoCs using omap_device get confused if node is moved
+	 * because of interconnect properties mixed into the node.
+	 */
+	if (of_get_property(np, "ti,hwmods", NULL)) {
+		dev_warn(&pdev->dev, "please update to probe with ti-sysc\n");
+		populate_irqs = true;
+	} else {
+		device_set_of_node_from_dev(&musb->dev, &pdev->dev);
+	}
+	of_node_put(np);
+
 	glue->dev			= &pdev->dev;
 	glue->musb			= musb;
 	glue->status			= MUSB_UNKNOWN;
@@ -389,6 +403,46 @@ static int omap2430_probe(struct platform_device *pdev)
 		goto err2;
 	}
 
+	if (populate_irqs) {
+		struct resource musb_res[3];
+		struct resource *res;
+		int i = 0;
+
+		memset(musb_res, 0, sizeof(*musb_res) * ARRAY_SIZE(musb_res));
+
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+		if (!res)
+			goto err2;
+
+		musb_res[i].start = res->start;
+		musb_res[i].end = res->end;
+		musb_res[i].flags = res->flags;
+		musb_res[i].name = res->name;
+		i++;
+
+		ret = of_irq_get_byname(np, "mc");
+		if (ret > 0) {
+			musb_res[i].start = ret;
+			musb_res[i].flags = IORESOURCE_IRQ;
+			musb_res[i].name = "mc";
+			i++;
+		}
+
+		ret = of_irq_get_byname(np, "dma");
+		if (ret > 0) {
+			musb_res[i].start = ret;
+			musb_res[i].flags = IORESOURCE_IRQ;
+			musb_res[i].name = "dma";
+			i++;
+		}
+
+		ret = platform_device_add_resources(musb, musb_res, i);
+		if (ret) {
+			dev_err(&pdev->dev, "failed to add IRQ resources\n");
+			goto err2;
+		}
+	}
+
 	ret = platform_device_add_data(musb, pdata, sizeof(*pdata));
 	if (ret) {
 		dev_err(&pdev->dev, "failed to add platform_data\n");
-- 
GitLab


From 3205054dc6fe2425ff24827a51fdf7cbbb528680 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 16 Nov 2022 12:04:44 +0100
Subject: [PATCH 1976/4122] usb: dwc3: improve the config dependency of
 USB_DWC3_XILINX

A request to Manish Narani (see Link) asked for clarification of the
reference to the config ARCH_VERSAL in the support of Xilinx SoCs with
DesignWare Core USB3 IP.

As there is no response, clean up the reference to the non-existing config
symbol. While at it, follow up on Felipe Balbi's request to add the
alternative COMPILE_TEST dependency.

Link: https://lore.kernel.org/all/CAKXUXMwgWfX8+OvY0aCwRNukencwJERAZzU7p4eOLXQ2zv6rAg@mail.gmail.com/

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Link: https://lore.kernel.org/r/20221116110444.8340-1-lukas.bulwahn@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index 03ededa86da1f..b2f72b0e75c6b 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -152,11 +152,11 @@ config USB_DWC3_IMX8MP
 
 config USB_DWC3_XILINX
 	tristate "Xilinx Platforms"
-	depends on (ARCH_ZYNQMP || ARCH_VERSAL) && OF
+	depends on (ARCH_ZYNQMP || COMPILE_TEST) && OF
 	default USB_DWC3
 	help
 	  Support Xilinx SoCs with DesignWare Core USB3 IP.
-	  This driver handles both ZynqMP and Versal SoC operations.
+	  This driver handles ZynqMP SoC operations.
 	  Say 'Y' or 'M' if you have one such device.
 
 config USB_DWC3_AM62
-- 
GitLab


From 581c848b610dbf3fe1ed4d85fd53d0743c61faba Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 20 Nov 2022 15:15:09 +0100
Subject: [PATCH 1977/4122] extcon: usbc-tusb320: Update state on probe even if
 no IRQ pending
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently this driver triggers extcon and typec state update in its
probe function, to read out current state reported by the chip and
report the correct state to upper layers. This synchronization is
performed correctly, but only in case the chip indicates a pending
interrupt in reg09 register.

This fails to cover the situation where all interrupts reported by
the chip were already handled by Linux before reboot, then the system
rebooted, and then Linux starts again. In this case, the TUSB320 no
longer reports any interrupts in reg09, and the state update does not
perform any update as it depends on that interrupt indication.

Fix this by turning tusb320_irq_handler() into a thin wrapper around
tusb320_state_update_handler(), where the later now contains the bulk
of the code of tusb320_irq_handler(), but adds new function parameter
"force_update". The "force_update" parameter can be used by the probe
function to assure that the state synchronization is always performed,
independent of the interrupt indicated in reg09. The interrupt handler
tusb320_irq_handler() callback uses force_update=false to avoid state
updates on potential spurious interrupts and retain current behavior.

Fixes: 06bc4ca115cdd ("extcon: Add driver for TI TUSB320")
Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Alvin Šipraga <alsi@bang-olufsen.dk>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221120141509.81012-1-marex@denx.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/extcon/extcon-usbc-tusb320.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c
index 2a120d8d3c272..9dfa545427ca1 100644
--- a/drivers/extcon/extcon-usbc-tusb320.c
+++ b/drivers/extcon/extcon-usbc-tusb320.c
@@ -313,9 +313,9 @@ static void tusb320_typec_irq_handler(struct tusb320_priv *priv, u8 reg9)
 		typec_set_pwr_opmode(port, TYPEC_PWR_MODE_USB);
 }
 
-static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
+static irqreturn_t tusb320_state_update_handler(struct tusb320_priv *priv,
+						bool force_update)
 {
-	struct tusb320_priv *priv = dev_id;
 	unsigned int reg;
 
 	if (regmap_read(priv->regmap, TUSB320_REG9, &reg)) {
@@ -323,7 +323,7 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
 		return IRQ_NONE;
 	}
 
-	if (!(reg & TUSB320_REG9_INTERRUPT_STATUS))
+	if (!force_update && !(reg & TUSB320_REG9_INTERRUPT_STATUS))
 		return IRQ_NONE;
 
 	tusb320_extcon_irq_handler(priv, reg);
@@ -340,6 +340,13 @@ static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t tusb320_irq_handler(int irq, void *dev_id)
+{
+	struct tusb320_priv *priv = dev_id;
+
+	return tusb320_state_update_handler(priv, false);
+}
+
 static const struct regmap_config tusb320_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -466,7 +473,7 @@ static int tusb320_probe(struct i2c_client *client,
 		return ret;
 
 	/* update initial state */
-	tusb320_irq_handler(client->irq, priv);
+	tusb320_state_update_handler(priv, true);
 
 	/* Reset chip to its default state */
 	ret = tusb320_reset(priv);
@@ -477,7 +484,7 @@ static int tusb320_probe(struct i2c_client *client,
 		 * State and polarity might change after a reset, so update
 		 * them again and make sure the interrupt status bit is cleared.
 		 */
-		tusb320_irq_handler(client->irq, priv);
+		tusb320_state_update_handler(priv, true);
 
 	ret = devm_request_threaded_irq(priv->dev, client->irq, NULL,
 					tusb320_irq_handler,
-- 
GitLab


From afdc12887f2b2ecf20d065a7d81ad29824155083 Mon Sep 17 00:00:00 2001
From: Jiantao Zhang <water.zhangjiantao@huawei.com>
Date: Mon, 21 Nov 2022 13:08:05 +0000
Subject: [PATCH 1978/4122] USB: gadget: Fix use-after-free during usb config
 switch

In the process of switching USB config from rndis to other config,
if the hardware does not support the ->pullup callback, or the
hardware encounters a low probability fault, both of them may cause
the ->pullup callback to fail, which will then cause a system panic
(use after free).

The gadget drivers sometimes need to be unloaded regardless of the
hardware's behavior.

Analysis as follows:
=======================================================================
(1) write /config/usb_gadget/g1/UDC "none"

gether_disconnect+0x2c/0x1f8
rndis_disable+0x4c/0x74
composite_disconnect+0x74/0xb0
configfs_composite_disconnect+0x60/0x7c
usb_gadget_disconnect+0x70/0x124
usb_gadget_unregister_driver+0xc8/0x1d8
gadget_dev_desc_UDC_store+0xec/0x1e4

(2) rm /config/usb_gadget/g1/configs/b.1/f1

rndis_deregister+0x28/0x54
rndis_free+0x44/0x7c
usb_put_function+0x14/0x1c
config_usb_cfg_unlink+0xc4/0xe0
configfs_unlink+0x124/0x1c8
vfs_unlink+0x114/0x1dc

(3) rmdir /config/usb_gadget/g1/functions/rndis.gs4

panic+0x1fc/0x3d0
do_page_fault+0xa8/0x46c
do_mem_abort+0x3c/0xac
el1_sync_handler+0x40/0x78
0xffffff801138f880
rndis_close+0x28/0x34
eth_stop+0x74/0x110
dev_close_many+0x48/0x194
rollback_registered_many+0x118/0x814
unregister_netdev+0x20/0x30
gether_cleanup+0x1c/0x38
rndis_attr_release+0xc/0x14
kref_put+0x74/0xb8
configfs_rmdir+0x314/0x374

If gadget->ops->pullup() return an error, function rndis_close() will be
called, then it will causes a use-after-free problem.
=======================================================================

Fixes: 0a55187a1ec8 ("USB: gadget core: Issue ->disconnect() callback from usb_gadget_disconnect()")
Signed-off-by: Jiantao Zhang <water.zhangjiantao@huawei.com>
Signed-off-by: TaoXue <xuetao09@huawei.com>
Link: https://lore.kernel.org/r/20221121130805.10735-1-water.zhangjiantao@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/core.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index c63c0c2cf649d..bf9878e1a72a8 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -734,13 +734,13 @@ int usb_gadget_disconnect(struct usb_gadget *gadget)
 	}
 
 	ret = gadget->ops->pullup(gadget, 0);
-	if (!ret) {
+	if (!ret)
 		gadget->connected = 0;
-		mutex_lock(&udc_lock);
-		if (gadget->udc->driver)
-			gadget->udc->driver->disconnect(gadget);
-		mutex_unlock(&udc_lock);
-	}
+
+	mutex_lock(&udc_lock);
+	if (gadget->udc->driver)
+		gadget->udc->driver->disconnect(gadget);
+	mutex_unlock(&udc_lock);
 
 out:
 	trace_usb_gadget_disconnect(gadget, ret);
-- 
GitLab


From 05b2e347a58385b8b00051fef61f10a512b5aa20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:18 +0100
Subject: [PATCH 1979/4122] usb: typec: ucsi: stm32g0: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-585-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi_stm32g0.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi_stm32g0.c b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
index 7b92f0c8de708..93fead0096b7b 100644
--- a/drivers/usb/typec/ucsi/ucsi_stm32g0.c
+++ b/drivers/usb/typec/ucsi/ucsi_stm32g0.c
@@ -626,7 +626,7 @@ static int ucsi_stm32g0_probe_bootloader(struct ucsi *ucsi)
 	return 0;
 }
 
-static int ucsi_stm32g0_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int ucsi_stm32g0_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct ucsi_stm32g0 *g0;
@@ -763,7 +763,7 @@ static struct i2c_driver ucsi_stm32g0_i2c_driver = {
 		.of_match_table = of_match_ptr(ucsi_stm32g0_typec_of_match),
 		.pm = pm_sleep_ptr(&ucsi_stm32g0_pm_ops),
 	},
-	.probe = ucsi_stm32g0_probe,
+	.probe_new = ucsi_stm32g0_probe,
 	.remove = ucsi_stm32g0_remove,
 	.id_table = ucsi_stm32g0_typec_i2c_devid
 };
-- 
GitLab


From d24182b10cd5c734499c6185e9c63403ee1de5ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:17 +0100
Subject: [PATCH 1980/4122] usb: typec: ucsi/ucsi_ccg: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-584-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi_ccg.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi_ccg.c b/drivers/usb/typec/ucsi/ucsi_ccg.c
index 835f1c4372ba2..46441f1477f28 100644
--- a/drivers/usb/typec/ucsi/ucsi_ccg.c
+++ b/drivers/usb/typec/ucsi/ucsi_ccg.c
@@ -1338,8 +1338,7 @@ static struct attribute *ucsi_ccg_attrs[] = {
 };
 ATTRIBUTE_GROUPS(ucsi_ccg);
 
-static int ucsi_ccg_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int ucsi_ccg_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct ucsi_ccg *uc;
@@ -1482,7 +1481,7 @@ static struct i2c_driver ucsi_ccg_driver = {
 		.dev_groups = ucsi_ccg_groups,
 		.acpi_match_table = amd_i2c_ucsi_match,
 	},
-	.probe = ucsi_ccg_probe,
+	.probe_new = ucsi_ccg_probe,
 	.remove = ucsi_ccg_remove,
 	.id_table = ucsi_ccg_device_id,
 };
-- 
GitLab


From f02586d70aeb491a17c179c857aa4a361760bf0c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:16 +0100
Subject: [PATCH 1981/4122] usb: typec: tcpm/tcpci_rt1711h: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221118224540.619276-583-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpci_rt1711h.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpci_rt1711h.c b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
index 7b217c712c11a..a0e9e3fe8564c 100644
--- a/drivers/usb/typec/tcpm/tcpci_rt1711h.c
+++ b/drivers/usb/typec/tcpm/tcpci_rt1711h.c
@@ -327,8 +327,7 @@ static int rt1711h_check_revision(struct i2c_client *i2c, struct rt1711h_chip *c
 	return ret;
 }
 
-static int rt1711h_probe(struct i2c_client *client,
-			 const struct i2c_device_id *i2c_id)
+static int rt1711h_probe(struct i2c_client *client)
 {
 	int ret;
 	struct rt1711h_chip *chip;
@@ -413,7 +412,7 @@ static struct i2c_driver rt1711h_i2c_driver = {
 		.name = "rt1711h",
 		.of_match_table = of_match_ptr(rt1711h_of_match),
 	},
-	.probe = rt1711h_probe,
+	.probe_new = rt1711h_probe,
 	.remove = rt1711h_remove,
 	.id_table = rt1711h_id,
 };
-- 
GitLab


From c852ec1c0192ff0ce032a1f0bbf23be01c98479d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:15 +0100
Subject: [PATCH 1982/4122] usb: typec: tcpm/tcpci_maxim: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-582-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpci_maxim.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpci_maxim.c b/drivers/usb/typec/tcpm/tcpci_maxim.c
index 03f89e6f1a78e..83e140ffcc3e3 100644
--- a/drivers/usb/typec/tcpm/tcpci_maxim.c
+++ b/drivers/usb/typec/tcpm/tcpci_maxim.c
@@ -438,7 +438,7 @@ static int tcpci_init(struct tcpci *tcpci, struct tcpci_data *data)
 	return -1;
 }
 
-static int max_tcpci_probe(struct i2c_client *client, const struct i2c_device_id *i2c_id)
+static int max_tcpci_probe(struct i2c_client *client)
 {
 	int ret;
 	struct max_tcpci_chip *chip;
@@ -519,7 +519,7 @@ static struct i2c_driver max_tcpci_i2c_driver = {
 		.name = "maxtcpc",
 		.of_match_table = of_match_ptr(max_tcpci_of_match),
 	},
-	.probe = max_tcpci_probe,
+	.probe_new = max_tcpci_probe,
 	.remove = max_tcpci_remove,
 	.id_table = max_tcpci_id,
 };
-- 
GitLab


From bdd0400d0f7245091ca8eff781825e93db1a135f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:14 +0100
Subject: [PATCH 1983/4122] usb: typec: tcpm/tcpci: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-581-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/tcpci.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tcpm/tcpci.c b/drivers/usb/typec/tcpm/tcpci.c
index 72f8d1e876004..fe781a38dc826 100644
--- a/drivers/usb/typec/tcpm/tcpci.c
+++ b/drivers/usb/typec/tcpm/tcpci.c
@@ -810,8 +810,7 @@ void tcpci_unregister_port(struct tcpci *tcpci)
 }
 EXPORT_SYMBOL_GPL(tcpci_unregister_port);
 
-static int tcpci_probe(struct i2c_client *client,
-		       const struct i2c_device_id *i2c_id)
+static int tcpci_probe(struct i2c_client *client)
 {
 	struct tcpci_chip *chip;
 	int err;
@@ -881,7 +880,7 @@ static struct i2c_driver tcpci_i2c_driver = {
 		.name = "tcpci",
 		.of_match_table = of_match_ptr(tcpci_of_match),
 	},
-	.probe = tcpci_probe,
+	.probe_new = tcpci_probe,
 	.remove = tcpci_remove,
 	.id_table = tcpci_id,
 };
-- 
GitLab


From 3646730ee44f42dd91619e30c917c0140853a948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:13 +0100
Subject: [PATCH 1984/4122] usb: typec: tcpm/fusb302: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221118224540.619276-580-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tcpm/fusb302.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/tcpm/fusb302.c b/drivers/usb/typec/tcpm/fusb302.c
index 721b2a548084c..1ffce00d94b46 100644
--- a/drivers/usb/typec/tcpm/fusb302.c
+++ b/drivers/usb/typec/tcpm/fusb302.c
@@ -1677,8 +1677,7 @@ static struct fwnode_handle *fusb302_fwnode_get(struct device *dev)
 	return fwnode;
 }
 
-static int fusb302_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int fusb302_probe(struct i2c_client *client)
 {
 	struct fusb302_chip *chip;
 	struct i2c_adapter *adapter = client->adapter;
@@ -1837,7 +1836,7 @@ static struct i2c_driver fusb302_driver = {
 		   .pm = &fusb302_pm_ops,
 		   .of_match_table = of_match_ptr(fusb302_dt_match),
 		   },
-	.probe = fusb302_probe,
+	.probe_new = fusb302_probe,
 	.remove = fusb302_remove,
 	.id_table = fusb302_i2c_device_id,
 };
-- 
GitLab


From b5583ea8b9ea659241e3f0cb8c9ca56f9a9630b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:12 +0100
Subject: [PATCH 1985/4122] usb: typec: hd3ss3220: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-579-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/hd3ss3220.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/hd3ss3220.c b/drivers/usb/typec/hd3ss3220.c
index 2a58185fb14c4..f128664cb130a 100644
--- a/drivers/usb/typec/hd3ss3220.c
+++ b/drivers/usb/typec/hd3ss3220.c
@@ -148,8 +148,7 @@ static const struct regmap_config config = {
 	.max_register = 0x0A,
 };
 
-static int hd3ss3220_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int hd3ss3220_probe(struct i2c_client *client)
 {
 	struct typec_capability typec_cap = { };
 	struct hd3ss3220 *hd3ss3220;
@@ -264,7 +263,7 @@ static struct i2c_driver hd3ss3220_driver = {
 		.name = "hd3ss3220",
 		.of_match_table = of_match_ptr(dev_ids),
 	},
-	.probe = hd3ss3220_probe,
+	.probe_new = hd3ss3220_probe,
 	.remove =  hd3ss3220_remove,
 };
 
-- 
GitLab


From cfb8e41ae81311eef73fc50e401e93543204de3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:11 +0100
Subject: [PATCH 1986/4122] usb: typec: anx7411: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-578-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/anx7411.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/typec/anx7411.c b/drivers/usb/typec/anx7411.c
index b8f3b75fd7eb9..3d5edce270a44 100644
--- a/drivers/usb/typec/anx7411.c
+++ b/drivers/usb/typec/anx7411.c
@@ -1440,8 +1440,7 @@ static int anx7411_psy_register(struct anx7411_data *ctx)
 	return PTR_ERR_OR_ZERO(ctx->psy);
 }
 
-static int anx7411_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int anx7411_i2c_probe(struct i2c_client *client)
 {
 	struct anx7411_data *plat;
 	struct device *dev = &client->dev;
@@ -1585,7 +1584,7 @@ static struct i2c_driver anx7411_driver = {
 		.of_match_table = anx_match_table,
 		.pm = &anx7411_pm_ops,
 	},
-	.probe = anx7411_i2c_probe,
+	.probe_new = anx7411_i2c_probe,
 	.remove = anx7411_i2c_remove,
 
 	.id_table = anx7411_id,
-- 
GitLab


From 9f7cc30769ac0681669b963ede092f12afe829b6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:10 +0100
Subject: [PATCH 1987/4122] usb: phy: isp1301: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-577-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-isp1301.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/phy/phy-isp1301.c b/drivers/usb/phy/phy-isp1301.c
index c2777a5c1f4ed..f4ee14d985853 100644
--- a/drivers/usb/phy/phy-isp1301.c
+++ b/drivers/usb/phy/phy-isp1301.c
@@ -92,8 +92,7 @@ static int isp1301_phy_set_vbus(struct usb_phy *phy, int on)
 	return 0;
 }
 
-static int isp1301_probe(struct i2c_client *client,
-			 const struct i2c_device_id *i2c_id)
+static int isp1301_probe(struct i2c_client *client)
 {
 	struct isp1301 *isp;
 	struct usb_phy *phy;
@@ -133,7 +132,7 @@ static struct i2c_driver isp1301_driver = {
 		.name = DRV_NAME,
 		.of_match_table = isp1301_of_match,
 	},
-	.probe = isp1301_probe,
+	.probe_new = isp1301_probe,
 	.remove = isp1301_remove,
 	.id_table = isp1301_id,
 };
-- 
GitLab


From c3ed6965fe7400c97d806edb282ad08810acdee9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:09 +0100
Subject: [PATCH 1988/4122] usb: isp1301-omap: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-576-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/phy/phy-isp1301-omap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/phy/phy-isp1301-omap.c b/drivers/usb/phy/phy-isp1301-omap.c
index e5d3f206097c7..931610b76f3d8 100644
--- a/drivers/usb/phy/phy-isp1301-omap.c
+++ b/drivers/usb/phy/phy-isp1301-omap.c
@@ -1471,7 +1471,7 @@ isp1301_start_hnp(struct usb_otg *otg)
 /*-------------------------------------------------------------------------*/
 
 static int
-isp1301_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
+isp1301_probe(struct i2c_client *i2c)
 {
 	int			status;
 	struct isp1301		*isp;
@@ -1616,7 +1616,7 @@ static struct i2c_driver isp1301_driver = {
 	.driver = {
 		.name	= "isp1301_omap",
 	},
-	.probe		= isp1301_probe,
+	.probe_new	= isp1301_probe,
 	.remove		= isp1301_remove,
 	.id_table	= isp1301_id,
 };
-- 
GitLab


From d4468280d8bcd68a7303efcb5a404efb680de1fc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:08 +0100
Subject: [PATCH 1989/4122] usb: usb4604: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-575-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/usb4604.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/misc/usb4604.c b/drivers/usb/misc/usb4604.c
index 2142af9bbdecd..6b5e77231efa1 100644
--- a/drivers/usb/misc/usb4604.c
+++ b/drivers/usb/misc/usb4604.c
@@ -97,8 +97,7 @@ static int usb4604_probe(struct usb4604 *hub)
 	return usb4604_switch_mode(hub, hub->mode);
 }
 
-static int usb4604_i2c_probe(struct i2c_client *i2c,
-			     const struct i2c_device_id *id)
+static int usb4604_i2c_probe(struct i2c_client *i2c)
 {
 	struct usb4604 *hub;
 
@@ -155,7 +154,7 @@ static struct i2c_driver usb4604_i2c_driver = {
 		.pm = pm_ptr(&usb4604_i2c_pm_ops),
 		.of_match_table = of_match_ptr(usb4604_of_match),
 	},
-	.probe		= usb4604_i2c_probe,
+	.probe_new	= usb4604_i2c_probe,
 	.id_table	= usb4604_id,
 };
 module_i2c_driver(usb4604_i2c_driver);
-- 
GitLab


From 4b1e537ad367b415f30fc37c1f4403ddd12a88d5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:07 +0100
Subject: [PATCH 1990/4122] usb: misc: usb3503: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-574-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/usb3503.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/misc/usb3503.c b/drivers/usb/misc/usb3503.c
index c70ca475c7c7e..bd47c4437ca46 100644
--- a/drivers/usb/misc/usb3503.c
+++ b/drivers/usb/misc/usb3503.c
@@ -280,8 +280,7 @@ err_clk:
 	return err;
 }
 
-static int usb3503_i2c_probe(struct i2c_client *i2c,
-			     const struct i2c_device_id *id)
+static int usb3503_i2c_probe(struct i2c_client *i2c)
 {
 	struct usb3503 *hub;
 	int err;
@@ -400,7 +399,7 @@ static struct i2c_driver usb3503_i2c_driver = {
 		.pm = pm_ptr(&usb3503_i2c_pm_ops),
 		.of_match_table = of_match_ptr(usb3503_of_match),
 	},
-	.probe		= usb3503_i2c_probe,
+	.probe_new	= usb3503_i2c_probe,
 	.remove		= usb3503_i2c_remove,
 	.id_table	= usb3503_id,
 };
-- 
GitLab


From 907140462eb511f3d98aa89c0665da1b618d3545 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:06 +0100
Subject: [PATCH 1991/4122] usb: usb251xb: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Richard Leitner <richard.leitner@skidata.com>
Link: https://lore.kernel.org/r/20221118224540.619276-573-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/usb251xb.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/misc/usb251xb.c b/drivers/usb/misc/usb251xb.c
index 54337d72bb9ff..e3abe67a155d0 100644
--- a/drivers/usb/misc/usb251xb.c
+++ b/drivers/usb/misc/usb251xb.c
@@ -699,8 +699,7 @@ static int usb251xb_probe(struct usb251xb *hub)
 	return 0;
 }
 
-static int usb251xb_i2c_probe(struct i2c_client *i2c,
-			      const struct i2c_device_id *id)
+static int usb251xb_i2c_probe(struct i2c_client *i2c)
 {
 	struct usb251xb *hub;
 
@@ -758,7 +757,7 @@ static struct i2c_driver usb251xb_i2c_driver = {
 		.of_match_table = of_match_ptr(usb251xb_of_match),
 		.pm = &usb251xb_pm_ops,
 	},
-	.probe    = usb251xb_i2c_probe,
+	.probe_new = usb251xb_i2c_probe,
 	.id_table = usb251xb_id,
 };
 
-- 
GitLab


From 02a476d932287cf3096f78962ccb70d94d6203c6 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 21 Nov 2022 10:46:45 +0100
Subject: [PATCH 1992/4122] kobject: make kobject_get_ownership() take a
 constant kobject *

The call, kobject_get_ownership(), does not modify the kobject passed
into it, so make it const.  This propagates down into the kobj_type
function callbacks so make the kobject passed into them also const,
ensuring that nothing in the kobject is being changed here.

This helps make it more obvious what calls and callbacks do, and do not,
modify structures passed to them.

Cc: Trond Myklebust <trond.myklebust@hammerspace.com>
Cc: Anna Schumaker <anna@kernel.org>
Cc: Roopa Prabhu <roopa@nvidia.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Chuck Lever <chuck.lever@oracle.com>
Cc: Jeff Layton <jlayton@kernel.org>
Cc: linux-nfs@vger.kernel.org
Cc: bridge@lists.linux-foundation.org
Cc: netdev@vger.kernel.org
Acked-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
Link: https://lore.kernel.org/r/20221121094649.1556002-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/class.c    | 2 +-
 drivers/base/core.c     | 8 ++++----
 fs/nfs/sysfs.c          | 4 ++--
 include/linux/kobject.h | 8 ++++----
 lib/kobject.c           | 4 ++--
 net/bridge/br_if.c      | 2 +-
 net/core/net-sysfs.c    | 8 ++++----
 net/sunrpc/sysfs.c      | 8 ++++----
 8 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 8ceafb7d0203b..86ec554cfe604 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -62,7 +62,7 @@ static void class_release(struct kobject *kobj)
 	kfree(cp);
 }
 
-static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj)
+static const struct kobj_ns_type_operations *class_child_ns_type(const struct kobject *kobj)
 {
 	struct subsys_private *cp = to_subsys_private(kobj);
 	struct class *class = cp->class;
diff --git a/drivers/base/core.c b/drivers/base/core.c
index ab01828fe6c1f..a79b99ecf4d8b 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2335,7 +2335,7 @@ static void device_release(struct kobject *kobj)
 	kfree(p);
 }
 
-static const void *device_namespace(struct kobject *kobj)
+static const void *device_namespace(const struct kobject *kobj)
 {
 	const struct device *dev = kobj_to_dev(kobj);
 	const void *ns = NULL;
@@ -2346,7 +2346,7 @@ static const void *device_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static void device_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+static void device_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
 	const struct device *dev = kobj_to_dev(kobj);
 
@@ -2986,9 +2986,9 @@ static void class_dir_release(struct kobject *kobj)
 }
 
 static const
-struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj)
+struct kobj_ns_type_operations *class_dir_child_ns_type(const struct kobject *kobj)
 {
-	struct class_dir *dir = to_class_dir(kobj);
+	const struct class_dir *dir = to_class_dir(kobj);
 	return dir->class->ns_type;
 }
 
diff --git a/fs/nfs/sysfs.c b/fs/nfs/sysfs.c
index a6f7403669631..67a87800b3a97 100644
--- a/fs/nfs/sysfs.c
+++ b/fs/nfs/sysfs.c
@@ -26,7 +26,7 @@ static void nfs_netns_object_release(struct kobject *kobj)
 }
 
 static const struct kobj_ns_type_operations *nfs_netns_object_child_ns_type(
-		struct kobject *kobj)
+		const struct kobject *kobj)
 {
 	return &net_ns_type_operations;
 }
@@ -130,7 +130,7 @@ static void nfs_netns_client_release(struct kobject *kobj)
 	kfree(c);
 }
 
-static const void *nfs_netns_client_namespace(struct kobject *kobj)
+static const void *nfs_netns_client_namespace(const struct kobject *kobj)
 {
 	return container_of(kobj, struct nfs_netns_client, kobject)->net;
 }
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index fc40fc81aeb1d..d978dbceb50d1 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -113,7 +113,7 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 extern void kobject_put(struct kobject *kobj);
 
 extern const void *kobject_namespace(struct kobject *kobj);
-extern void kobject_get_ownership(struct kobject *kobj,
+extern void kobject_get_ownership(const struct kobject *kobj,
 				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag);
 
@@ -121,9 +121,9 @@ struct kobj_type {
 	void (*release)(struct kobject *kobj);
 	const struct sysfs_ops *sysfs_ops;
 	const struct attribute_group **default_groups;
-	const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
-	const void *(*namespace)(struct kobject *kobj);
-	void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid);
+	const struct kobj_ns_type_operations *(*child_ns_type)(const struct kobject *kobj);
+	const void *(*namespace)(const struct kobject *kobj);
+	void (*get_ownership)(const struct kobject *kobj, kuid_t *uid, kgid_t *gid);
 };
 
 struct kobj_uevent_env {
diff --git a/lib/kobject.c b/lib/kobject.c
index ba1017cd67d16..26e744a46d247 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -45,7 +45,7 @@ const void *kobject_namespace(struct kobject *kobj)
  * representation of given kobject. Normally used to adjust ownership of
  * objects in a container.
  */
-void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+void kobject_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
 	*uid = GLOBAL_ROOT_UID;
 	*gid = GLOBAL_ROOT_GID;
@@ -907,7 +907,7 @@ static void kset_release(struct kobject *kobj)
 	kfree(kset);
 }
 
-static void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+static void kset_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
 	if (kobj->parent)
 		kobject_get_ownership(kobj->parent, uid, gid);
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 228fd5b20f109..ad13b48e3e08a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -262,7 +262,7 @@ static void release_nbp(struct kobject *kobj)
 	kfree(p);
 }
 
-static void brport_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid)
+static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t *gid)
 {
 	struct net_bridge_port *p = kobj_to_brport(kobj);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a8c5a7cd9701d..9cfc80b8ed25f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1020,7 +1020,7 @@ static void rx_queue_release(struct kobject *kobj)
 	netdev_put(queue->dev, &queue->dev_tracker);
 }
 
-static const void *rx_queue_namespace(struct kobject *kobj)
+static const void *rx_queue_namespace(const struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
 	struct device *dev = &queue->dev->dev;
@@ -1032,7 +1032,7 @@ static const void *rx_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static void rx_queue_get_ownership(struct kobject *kobj,
+static void rx_queue_get_ownership(const struct kobject *kobj,
 				   kuid_t *uid, kgid_t *gid)
 {
 	const struct net *net = rx_queue_namespace(kobj);
@@ -1623,7 +1623,7 @@ static void netdev_queue_release(struct kobject *kobj)
 	netdev_put(queue->dev, &queue->dev_tracker);
 }
 
-static const void *netdev_queue_namespace(struct kobject *kobj)
+static const void *netdev_queue_namespace(const struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 	struct device *dev = &queue->dev->dev;
@@ -1635,7 +1635,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj)
 	return ns;
 }
 
-static void netdev_queue_get_ownership(struct kobject *kobj,
+static void netdev_queue_get_ownership(const struct kobject *kobj,
 				       kuid_t *uid, kgid_t *gid)
 {
 	const struct net *net = netdev_queue_namespace(kobj);
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index c1f559892ae8a..1e05a2d723f4c 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -31,7 +31,7 @@ static void rpc_sysfs_object_release(struct kobject *kobj)
 }
 
 static const struct kobj_ns_type_operations *
-rpc_sysfs_object_child_ns_type(struct kobject *kobj)
+rpc_sysfs_object_child_ns_type(const struct kobject *kobj)
 {
 	return &net_ns_type_operations;
 }
@@ -381,17 +381,17 @@ static void rpc_sysfs_xprt_release(struct kobject *kobj)
 	kfree(xprt);
 }
 
-static const void *rpc_sysfs_client_namespace(struct kobject *kobj)
+static const void *rpc_sysfs_client_namespace(const struct kobject *kobj)
 {
 	return container_of(kobj, struct rpc_sysfs_client, kobject)->net;
 }
 
-static const void *rpc_sysfs_xprt_switch_namespace(struct kobject *kobj)
+static const void *rpc_sysfs_xprt_switch_namespace(const struct kobject *kobj)
 {
 	return container_of(kobj, struct rpc_sysfs_xprt_switch, kobject)->net;
 }
 
-static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj)
+static const void *rpc_sysfs_xprt_namespace(const struct kobject *kobj)
 {
 	return container_of(kobj, struct rpc_sysfs_xprt,
 			    kobject)->xprt->xprt_net;
-- 
GitLab


From 542aa24646ca20ccedb70829a95254ce602cdcbd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 21 Nov 2022 10:46:46 +0100
Subject: [PATCH 1993/4122] kobject: make kobject_namespace take a const *

kobject_namespace() should take a const *kobject as it does not modify
the kobject passed to it.  Change that, and the functions
kobj_child_ns_ops() and kobj_ns_ops() needed to also be changed to const
*.

Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221121094649.1556002-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kobject.h    | 2 +-
 include/linux/kobject_ns.h | 4 ++--
 lib/kobject.c              | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index d978dbceb50d1..5a2d58e10bf55 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -112,7 +112,7 @@ extern struct kobject * __must_check kobject_get_unless_zero(
 						struct kobject *kobj);
 extern void kobject_put(struct kobject *kobj);
 
-extern const void *kobject_namespace(struct kobject *kobj);
+extern const void *kobject_namespace(const struct kobject *kobj);
 extern void kobject_get_ownership(const struct kobject *kobj,
 				  kuid_t *uid, kgid_t *gid);
 extern char *kobject_get_path(const struct kobject *kobj, gfp_t flag);
diff --git a/include/linux/kobject_ns.h b/include/linux/kobject_ns.h
index 2b5b64256cf4b..be707748e7cec 100644
--- a/include/linux/kobject_ns.h
+++ b/include/linux/kobject_ns.h
@@ -47,8 +47,8 @@ struct kobj_ns_type_operations {
 
 int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
 int kobj_ns_type_registered(enum kobj_ns_type type);
-const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
-const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
+const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent);
+const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj);
 
 bool kobj_ns_current_may_mount(enum kobj_ns_type type);
 void *kobj_ns_grab_current(enum kobj_ns_type type);
diff --git a/lib/kobject.c b/lib/kobject.c
index 26e744a46d247..6e0bf03f4f364 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -25,7 +25,7 @@
  * and thus @kobj should have a namespace tag associated with it.  Returns
  * %NULL otherwise.
  */
-const void *kobject_namespace(struct kobject *kobj)
+const void *kobject_namespace(const struct kobject *kobj)
 {
 	const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);
 
@@ -1039,7 +1039,7 @@ int kobj_ns_type_registered(enum kobj_ns_type type)
 	return registered;
 }
 
-const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+const struct kobj_ns_type_operations *kobj_child_ns_ops(const struct kobject *parent)
 {
 	const struct kobj_ns_type_operations *ops = NULL;
 
@@ -1049,7 +1049,7 @@ const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
 	return ops;
 }
 
-const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+const struct kobj_ns_type_operations *kobj_ns_ops(const struct kobject *kobj)
 {
 	return kobj_child_ns_ops(kobj->parent);
 }
-- 
GitLab


From c45a88bb3f6cdaeb29d8ee98463610ad815721ab Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 21 Nov 2022 10:46:47 +0100
Subject: [PATCH 1994/4122] kobject: kset_uevent_ops: make filter() callback
 take a const *
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The filter() callback in struct kset_uevent_ops does not modify the
kobject passed into it, so make the pointer const to enforce this
restriction.  When doing so, fix up all existing filter() callbacks to
have the correct signature to preserve the build.

Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: linux-media@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: linaro-mm-sig@lists.linaro.org
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Acked-by: Christian König <christian.koenig@amd.com> for the changes to
Link: https://lore.kernel.org/r/20221121094649.1556002-3-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/bus.c                    | 2 +-
 drivers/base/core.c                   | 4 ++--
 drivers/dma-buf/dma-buf-sysfs-stats.c | 2 +-
 include/linux/kobject.h               | 2 +-
 kernel/params.c                       | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index 7ca47e5b3c1f4..4ec6dbab73be5 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -163,7 +163,7 @@ static struct kobj_type bus_ktype = {
 	.release	= bus_release,
 };
 
-static int bus_uevent_filter(struct kobject *kobj)
+static int bus_uevent_filter(const struct kobject *kobj)
 {
 	const struct kobj_type *ktype = get_ktype(kobj);
 
diff --git a/drivers/base/core.c b/drivers/base/core.c
index a79b99ecf4d8b..005a2b092f3ee 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2362,12 +2362,12 @@ static struct kobj_type device_ktype = {
 };
 
 
-static int dev_uevent_filter(struct kobject *kobj)
+static int dev_uevent_filter(const struct kobject *kobj)
 {
 	const struct kobj_type *ktype = get_ktype(kobj);
 
 	if (ktype == &device_ktype) {
-		struct device *dev = kobj_to_dev(kobj);
+		const struct device *dev = kobj_to_dev(kobj);
 		if (dev->bus)
 			return 1;
 		if (dev->class)
diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c
index 2bba0babcb62b..f69d68122b9b1 100644
--- a/drivers/dma-buf/dma-buf-sysfs-stats.c
+++ b/drivers/dma-buf/dma-buf-sysfs-stats.c
@@ -132,7 +132,7 @@ void dma_buf_stats_teardown(struct dma_buf *dmabuf)
 
 
 /* Statistics files do not need to send uevents. */
-static int dmabuf_sysfs_uevent_filter(struct kobject *kobj)
+static int dmabuf_sysfs_uevent_filter(const struct kobject *kobj)
 {
 	return 0;
 }
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 5a2d58e10bf55..640f59d4b3de7 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -135,7 +135,7 @@ struct kobj_uevent_env {
 };
 
 struct kset_uevent_ops {
-	int (* const filter)(struct kobject *kobj);
+	int (* const filter)(const struct kobject *kobj);
 	const char *(* const name)(struct kobject *kobj);
 	int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env);
 };
diff --git a/kernel/params.c b/kernel/params.c
index 5b92310425c50..d2237209cedac 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -926,7 +926,7 @@ static const struct sysfs_ops module_sysfs_ops = {
 	.store = module_attr_store,
 };
 
-static int uevent_filter(struct kobject *kobj)
+static int uevent_filter(const struct kobject *kobj)
 {
 	const struct kobj_type *ktype = get_ktype(kobj);
 
-- 
GitLab


From a53d1acc978321734a8fd7388f2c050a7219ab69 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 21 Nov 2022 10:46:48 +0100
Subject: [PATCH 1995/4122] kobject: kset_uevent_ops: make name() callback take
 a const *

The name() callback in struct kset_uevent_ops does not modify the
kobject passed into it, so make the pointer const to enforce this
restriction.  When doing so, fix up the single existing name() callback
to have the correct signature to preserve the build.

Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221121094649.1556002-4-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/core.c     | 4 ++--
 include/linux/kobject.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 005a2b092f3ee..a3e14143ec0cf 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -2376,9 +2376,9 @@ static int dev_uevent_filter(const struct kobject *kobj)
 	return 0;
 }
 
-static const char *dev_uevent_name(struct kobject *kobj)
+static const char *dev_uevent_name(const struct kobject *kobj)
 {
-	struct device *dev = kobj_to_dev(kobj);
+	const struct device *dev = kobj_to_dev(kobj);
 
 	if (dev->bus)
 		return dev->bus->name;
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 640f59d4b3de7..58a5b75612e35 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -136,7 +136,7 @@ struct kobj_uevent_env {
 
 struct kset_uevent_ops {
 	int (* const filter)(const struct kobject *kobj);
-	const char *(* const name)(struct kobject *kobj);
+	const char *(* const name)(const struct kobject *kobj);
 	int (* const uevent)(struct kobject *kobj, struct kobj_uevent_env *env);
 };
 
-- 
GitLab


From 9f041c5d8296b3a04cf3ead473a124fb538490dc Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 21 Nov 2022 10:46:49 +0100
Subject: [PATCH 1996/4122] driver core: pass a const * into of_device_uevent()

of_device_uevent() does not modify the struct device * passed into it,
so make it a const * to enforce this.  Also the documentation for the
function was really wrong so fix that up at the same time.

Cc: Rob Herring <robh+dt@kernel.org>
Cc: Frank Rowand <frowand.list@gmail.com>
Cc: devicetree@vger.kernel.org
Link: https://lore.kernel.org/r/20221121094649.1556002-5-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/of/device.c       | 6 +++---
 include/linux/of_device.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/of/device.c b/drivers/of/device.c
index 8cefe5a7d04e2..c674a13c30558 100644
--- a/drivers/of/device.c
+++ b/drivers/of/device.c
@@ -332,10 +332,10 @@ EXPORT_SYMBOL_GPL(of_device_modalias);
 
 /**
  * of_device_uevent - Display OF related uevent information
- * @dev:	Device to apply DMA configuration
- * @env:	Kernel object's userspace event reference
+ * @dev:	Device to display the uevent information for
+ * @env:	Kernel object's userspace event reference to fill up
  */
-void of_device_uevent(struct device *dev, struct kobj_uevent_env *env)
+void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
 	const char *compat, *type;
 	struct alias_prop *app;
diff --git a/include/linux/of_device.h b/include/linux/of_device.h
index 1a803e4335d30..ab7d557d541d5 100644
--- a/include/linux/of_device.h
+++ b/include/linux/of_device.h
@@ -35,7 +35,7 @@ extern const void *of_device_get_match_data(const struct device *dev);
 extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len);
 extern int of_device_request_module(struct device *dev);
 
-extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env);
+extern void of_device_uevent(const struct device *dev, struct kobj_uevent_env *env);
 extern int of_device_uevent_modalias(struct device *dev, struct kobj_uevent_env *env);
 
 static inline struct device_node *of_cpu_device_node_get(int cpu)
@@ -64,7 +64,7 @@ static inline int of_driver_match_device(struct device *dev,
 	return 0;
 }
 
-static inline void of_device_uevent(struct device *dev,
+static inline void of_device_uevent(const struct device *dev,
 			struct kobj_uevent_env *env) { }
 
 static inline const void *of_device_get_match_data(const struct device *dev)
-- 
GitLab


From 947d66b68f3c4e7cf8f3f3500807b9d2a0de28ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ilpo=20J=C3=A4rvinen?= <ilpo.jarvinen@linux.intel.com>
Date: Fri, 11 Nov 2022 16:25:02 +0200
Subject: [PATCH 1997/4122] n_tty: Rename tail to old_tail in n_tty_read()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The local tail variable in n_tty_read() is used for one purpose, it
keeps the old tail. Thus, rename it appropriately to improve code
readability.

Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/22b37499-ff9a-7fc1-f6e0-58411328d122@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/n_tty.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 597019690ae62..c8f56c9b1a1c8 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -2130,7 +2130,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
 	ssize_t retval = 0;
 	long timeout;
 	bool packet;
-	size_t tail;
+	size_t old_tail;
 
 	/*
 	 * Is this a continuation of a read started earler?
@@ -2193,7 +2193,7 @@ static ssize_t n_tty_read(struct tty_struct *tty, struct file *file,
 	}
 
 	packet = tty->ctrl.packet;
-	tail = ldata->read_tail;
+	old_tail = ldata->read_tail;
 
 	add_wait_queue(&tty->read_wait, &wait);
 	while (nr) {
@@ -2282,7 +2282,7 @@ more_to_be_read:
 		if (time)
 			timeout = time;
 	}
-	if (tail != ldata->read_tail)
+	if (old_tail != ldata->read_tail)
 		n_tty_kick_worker(tty);
 	up_read(&tty->termios_rwsem);
 
-- 
GitLab


From cbdf6759e5b798b35ceafbae50fb7dd2340c9751 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 15 Nov 2022 08:17:21 +0100
Subject: [PATCH 1998/4122] tty: serial: altera_jtaguart: remove flag from
 altera_jtaguart_rx_chars()

TTY_NORMAL is the only value it contains, so remove the variable and use
the constant instead.

Cc: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20221115071724.5185-1-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_jtaguart.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index aa49553fac584..8d17297115841 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -126,18 +126,17 @@ static void altera_jtaguart_set_termios(struct uart_port *port,
 static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
 {
 	struct uart_port *port = &pp->port;
-	unsigned char ch, flag;
+	unsigned char ch;
 	unsigned long status;
 
 	while ((status = readl(port->membase + ALTERA_JTAGUART_DATA_REG)) &
 	       ALTERA_JTAGUART_DATA_RVALID_MSK) {
 		ch = status & ALTERA_JTAGUART_DATA_DATA_MSK;
-		flag = TTY_NORMAL;
 		port->icount.rx++;
 
 		if (uart_handle_sysrq_char(port, ch))
 			continue;
-		uart_insert_char(port, 0, 0, ch, flag);
+		uart_insert_char(port, 0, 0, ch, TTY_NORMAL);
 	}
 
 	tty_flip_buffer_push(&port->state->port);
-- 
GitLab


From 070fa1d2bf089937559183320c4066c065312665 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 15 Nov 2022 08:17:22 +0100
Subject: [PATCH 1999/4122] tty: serial: altera_jtaguart: remove unused
 altera_jtaguart::sigs

Nothing uses struct altera_jtaguart::sigs. Remove it.

Cc: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20221115071724.5185-2-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_jtaguart.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 8d17297115841..b83eade64b220 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -55,7 +55,6 @@
  */
 struct altera_jtaguart {
 	struct uart_port port;
-	unsigned int sigs;	/* Local copy of line sigs */
 	unsigned long imr;	/* Local IMR mirror */
 };
 
-- 
GitLab


From 4e2b16a62d9975c4f6135e6a79a8e00cbad812d6 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 15 Nov 2022 08:17:23 +0100
Subject: [PATCH 2000/4122] tty: serial: altera_jtaguart: use
 uart_port::read_status_mask

Instead of self-defined struct altera_jtaguart::imr, use preexisting
uart_port::read_status_mask.

Note that imr was ulong. But there is no reason for that, its values are
uints. And readl/writel's are used to read it/write to it.

Cc: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20221115071724.5185-3-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_jtaguart.c | 41 +++++++++++-----------------
 1 file changed, 16 insertions(+), 25 deletions(-)

diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index b83eade64b220..6808abd277853 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -55,7 +55,6 @@
  */
 struct altera_jtaguart {
 	struct uart_port port;
-	unsigned long imr;	/* Local IMR mirror */
 };
 
 static unsigned int altera_jtaguart_tx_space(struct uart_port *port, u32 *ctlp)
@@ -84,29 +83,23 @@ static void altera_jtaguart_set_mctrl(struct uart_port *port, unsigned int sigs)
 
 static void altera_jtaguart_start_tx(struct uart_port *port)
 {
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
-
-	pp->imr |= ALTERA_JTAGUART_CONTROL_WE_MSK;
-	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	port->read_status_mask |= ALTERA_JTAGUART_CONTROL_WE_MSK;
+	writel(port->read_status_mask,
+			port->membase + ALTERA_JTAGUART_CONTROL_REG);
 }
 
 static void altera_jtaguart_stop_tx(struct uart_port *port)
 {
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
-
-	pp->imr &= ~ALTERA_JTAGUART_CONTROL_WE_MSK;
-	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	port->read_status_mask &= ~ALTERA_JTAGUART_CONTROL_WE_MSK;
+	writel(port->read_status_mask,
+			port->membase + ALTERA_JTAGUART_CONTROL_REG);
 }
 
 static void altera_jtaguart_stop_rx(struct uart_port *port)
 {
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
-
-	pp->imr &= ~ALTERA_JTAGUART_CONTROL_RE_MSK;
-	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	port->read_status_mask &= ~ALTERA_JTAGUART_CONTROL_RE_MSK;
+	writel(port->read_status_mask,
+			port->membase + ALTERA_JTAGUART_CONTROL_REG);
 }
 
 static void altera_jtaguart_break_ctl(struct uart_port *port, int break_state)
@@ -163,7 +156,7 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data)
 	unsigned int isr;
 
 	isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >>
-	       ALTERA_JTAGUART_CONTROL_RI_OFF) & pp->imr;
+	       ALTERA_JTAGUART_CONTROL_RI_OFF) & port->read_status_mask;
 
 	spin_lock(&port->lock);
 
@@ -187,8 +180,6 @@ static void altera_jtaguart_config_port(struct uart_port *port, int flags)
 
 static int altera_jtaguart_startup(struct uart_port *port)
 {
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
 	unsigned long flags;
 	int ret;
 
@@ -203,8 +194,9 @@ static int altera_jtaguart_startup(struct uart_port *port)
 	spin_lock_irqsave(&port->lock, flags);
 
 	/* Enable RX interrupts now */
-	pp->imr = ALTERA_JTAGUART_CONTROL_RE_MSK;
-	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	port->read_status_mask = ALTERA_JTAGUART_CONTROL_RE_MSK;
+	writel(port->read_status_mask,
+			port->membase + ALTERA_JTAGUART_CONTROL_REG);
 
 	spin_unlock_irqrestore(&port->lock, flags);
 
@@ -213,15 +205,14 @@ static int altera_jtaguart_startup(struct uart_port *port)
 
 static void altera_jtaguart_shutdown(struct uart_port *port)
 {
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
 	unsigned long flags;
 
 	spin_lock_irqsave(&port->lock, flags);
 
 	/* Disable all interrupts now */
-	pp->imr = 0;
-	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	port->read_status_mask = 0;
+	writel(port->read_status_mask,
+			port->membase + ALTERA_JTAGUART_CONTROL_REG);
 
 	spin_unlock_irqrestore(&port->lock, flags);
 
-- 
GitLab


From 4d167f635a4d33f6b645f60c2a265f93668fdd8d Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 15 Nov 2022 08:17:24 +0100
Subject: [PATCH 2001/4122] tty: serial: altera_jtaguart: remove struct
 altera_jtaguart

It contains only struct uart_port, so no need for another structure.
Remove it and convert the rest to use struct uart_port directly.

Cc: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Reviewed-by: Tobias Klauser <tklauser@distanz.ch>
Link: https://lore.kernel.org/r/20221115071724.5185-4-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_jtaguart.c | 29 +++++++++-------------------
 1 file changed, 9 insertions(+), 20 deletions(-)

diff --git a/drivers/tty/serial/altera_jtaguart.c b/drivers/tty/serial/altera_jtaguart.c
index 6808abd277853..9f843d1cee40e 100644
--- a/drivers/tty/serial/altera_jtaguart.c
+++ b/drivers/tty/serial/altera_jtaguart.c
@@ -50,13 +50,6 @@
 #define ALTERA_JTAGUART_CONTROL_AC_MSK		0x00000400
 #define ALTERA_JTAGUART_CONTROL_WSPACE_MSK	0xFFFF0000
 
-/*
- * Local per-uart structure.
- */
-struct altera_jtaguart {
-	struct uart_port port;
-};
-
 static unsigned int altera_jtaguart_tx_space(struct uart_port *port, u32 *ctlp)
 {
 	u32 ctl = readl(port->membase + ALTERA_JTAGUART_CONTROL_REG);
@@ -115,9 +108,8 @@ static void altera_jtaguart_set_termios(struct uart_port *port,
 		tty_termios_copy_hw(termios, old);
 }
 
-static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
+static void altera_jtaguart_rx_chars(struct uart_port *port)
 {
-	struct uart_port *port = &pp->port;
 	unsigned char ch;
 	unsigned long status;
 
@@ -134,9 +126,8 @@ static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
 	tty_flip_buffer_push(&port->state->port);
 }
 
-static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp)
+static void altera_jtaguart_tx_chars(struct uart_port *port)
 {
-	struct uart_port *port = &pp->port;
 	unsigned int count;
 	u8 ch;
 
@@ -151,8 +142,6 @@ static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp)
 static irqreturn_t altera_jtaguart_interrupt(int irq, void *data)
 {
 	struct uart_port *port = data;
-	struct altera_jtaguart *pp =
-	    container_of(port, struct altera_jtaguart, port);
 	unsigned int isr;
 
 	isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >>
@@ -161,9 +150,9 @@ static irqreturn_t altera_jtaguart_interrupt(int irq, void *data)
 	spin_lock(&port->lock);
 
 	if (isr & ALTERA_JTAGUART_CONTROL_RE_MSK)
-		altera_jtaguart_rx_chars(pp);
+		altera_jtaguart_rx_chars(port);
 	if (isr & ALTERA_JTAGUART_CONTROL_WE_MSK)
-		altera_jtaguart_tx_chars(pp);
+		altera_jtaguart_tx_chars(port);
 
 	spin_unlock(&port->lock);
 
@@ -265,7 +254,7 @@ static const struct uart_ops altera_jtaguart_ops = {
 };
 
 #define ALTERA_JTAGUART_MAXPORTS 1
-static struct altera_jtaguart altera_jtaguart_ports[ALTERA_JTAGUART_MAXPORTS];
+static struct uart_port altera_jtaguart_ports[ALTERA_JTAGUART_MAXPORTS];
 
 #if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE)
 
@@ -308,7 +297,7 @@ static void altera_jtaguart_console_putc(struct uart_port *port, unsigned char c
 static void altera_jtaguart_console_write(struct console *co, const char *s,
 					  unsigned int count)
 {
-	struct uart_port *port = &(altera_jtaguart_ports + co->index)->port;
+	struct uart_port *port = &altera_jtaguart_ports[co->index];
 
 	uart_console_write(port, s, count, altera_jtaguart_console_putc);
 }
@@ -320,7 +309,7 @@ static int __init altera_jtaguart_console_setup(struct console *co,
 
 	if (co->index < 0 || co->index >= ALTERA_JTAGUART_MAXPORTS)
 		return -EINVAL;
-	port = &altera_jtaguart_ports[co->index].port;
+	port = &altera_jtaguart_ports[co->index];
 	if (port->membase == NULL)
 		return -ENODEV;
 	return 0;
@@ -400,7 +389,7 @@ static int altera_jtaguart_probe(struct platform_device *pdev)
 	if (i >= ALTERA_JTAGUART_MAXPORTS)
 		return -EINVAL;
 
-	port = &altera_jtaguart_ports[i].port;
+	port = &altera_jtaguart_ports[i];
 
 	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (res_mem)
@@ -444,7 +433,7 @@ static int altera_jtaguart_remove(struct platform_device *pdev)
 	if (i == -1)
 		i = 0;
 
-	port = &altera_jtaguart_ports[i].port;
+	port = &altera_jtaguart_ports[i];
 	uart_remove_one_port(&altera_jtaguart_driver, port);
 	iounmap(port->membase);
 
-- 
GitLab


From 94cdb9f33698478b0e7062586633c42c6158a786 Mon Sep 17 00:00:00 2001
From: Jiamei Xie <jiamei.xie@arm.com>
Date: Thu, 17 Nov 2022 18:32:37 +0800
Subject: [PATCH 2002/4122] serial: amba-pl011: avoid SBSA UART accessing DMACR
 register

Chapter "B Generic UART" in "ARM Server Base System Architecture" [1]
documentation describes a generic UART interface. Such generic UART
does not support DMA. In current code, sbsa_uart_pops and
amba_pl011_pops share the same stop_rx operation, which will invoke
pl011_dma_rx_stop, leading to an access of the DMACR register. This
commit adds a using_rx_dma check in pl011_dma_rx_stop to avoid the
access to DMACR register for SBSA UARTs which does not support DMA.

When the kernel enables DMA engine with "CONFIG_DMA_ENGINE=y", Linux
SBSA PL011 driver will access PL011 DMACR register in some functions.
For most real SBSA Pl011 hardware implementations, the DMACR write
behaviour will be ignored. So these DMACR operations will not cause
obvious problems. But for some virtual SBSA PL011 hardware, like Xen
virtual SBSA PL011 (vpl011) device, the behaviour might be different.
Xen vpl011 emulation will inject a data abort to guest, when guest is
accessing an unimplemented UART register. As Xen VPL011 is SBSA
compatible, it will not implement DMACR register. So when Linux SBSA
PL011 driver access DMACR register, it will get an unhandled data abort
fault and the application will get a segmentation fault:
Unhandled fault at 0xffffffc00944d048
Mem abort info:
  ESR = 0x96000000
  EC = 0x25: DABT (current EL), IL = 32 bits
  SET = 0, FnV = 0
  EA = 0, S1PTW = 0
  FSC = 0x00: ttbr address size fault
Data abort info:
  ISV = 0, ISS = 0x00000000
  CM = 0, WnR = 0
swapper pgtable: 4k pages, 39-bit VAs, pgdp=0000000020e2e000
[ffffffc00944d048] pgd=100000003ffff803, p4d=100000003ffff803, pud=100000003ffff803, pmd=100000003fffa803, pte=006800009c090f13
Internal error: ttbr address size fault: 96000000 [#1] PREEMPT SMP
...
Call trace:
 pl011_stop_rx+0x70/0x80
 tty_port_shutdown+0x7c/0xb4
 tty_port_close+0x60/0xcc
 uart_close+0x34/0x8c
 tty_release+0x144/0x4c0
 __fput+0x78/0x220
 ____fput+0x1c/0x30
 task_work_run+0x88/0xc0
 do_notify_resume+0x8d0/0x123c
 el0_svc+0xa8/0xc0
 el0t_64_sync_handler+0xa4/0x130
 el0t_64_sync+0x1a0/0x1a4
Code: b9000083 b901f001 794038a0 8b000042 (b9000041)
---[ end trace 83dd93df15c3216f ]---
note: bootlogd[132] exited with preempt_count 1
/etc/rcS.d/S07bootlogd: line 47: 132 Segmentation fault start-stop-daemon

This has been discussed in the Xen community, and we think it should fix
this in Linux. See [2] for more information.

[1] https://developer.arm.com/documentation/den0094/c/?lang=en
[2] https://lists.xenproject.org/archives/html/xen-devel/2022-11/msg00543.html

Fixes: 0dd1e247fd39 (drivers: PL011: add support for the ARM SBSA generic UART)
Signed-off-by: Jiamei Xie <jiamei.xie@arm.com>
Reviewed-by: Andre Przywara <andre.przywara@arm.com>
Link: https://lore.kernel.org/r/20221117103237.86856-1-jiamei.xie@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 6d8552506091c..6b9deb4211b5e 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1044,6 +1044,9 @@ static void pl011_dma_rx_callback(void *data)
  */
 static inline void pl011_dma_rx_stop(struct uart_amba_port *uap)
 {
+	if (!uap->using_rx_dma)
+		return;
+
 	/* FIXME.  Just disable the DMA enable */
 	uap->dmacr &= ~UART011_RXDMAE;
 	pl011_write(uap->dmacr, uap, REG_DMACR);
-- 
GitLab


From 032d5a71ed378ffc6a2d41a187d8488a4f9fe415 Mon Sep 17 00:00:00 2001
From: delisun <delisun@pateo.com.cn>
Date: Thu, 10 Nov 2022 10:01:08 +0800
Subject: [PATCH 2003/4122] serial: pl011: Do not clear RX FIFO & RX interrupt
 in unthrottle.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Clearing the RX FIFO will cause data loss.
Copy the pl011_enabl_interrupts implementation, and remove the clear
interrupt and FIFO part of the code.

Fixes: 211565b10099 ("serial: pl011: UPSTAT_AUTORTS requires .throttle/unthrottle")
Signed-off-by: delisun <delisun@pateo.com.cn>
Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
Link: https://lore.kernel.org/r/20221110020108.7700-1-delisun@pateo.com.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/amba-pl011.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 6b9deb4211b5e..d75c39f4622b7 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -1830,8 +1830,17 @@ static void pl011_enable_interrupts(struct uart_amba_port *uap)
 static void pl011_unthrottle_rx(struct uart_port *port)
 {
 	struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port);
+	unsigned long flags;
 
-	pl011_enable_interrupts(uap);
+	spin_lock_irqsave(&uap->port.lock, flags);
+
+	uap->im = UART011_RTIM;
+	if (!pl011_dma_rx_running(uap))
+		uap->im |= UART011_RXIM;
+
+	pl011_write(uap->im, uap, REG_IMSC);
+
+	spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
 static int pl011_startup(struct uart_port *port)
-- 
GitLab


From b30e66863c60bb5c7da2151a65afb2ee419b9df4 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 11 Nov 2022 15:28:38 -0600
Subject: [PATCH 2004/4122] dt-bindings: serial: xlnx,opb-uartlite: Drop
 'contains' from 'xlnx,use-parity'

'contains' applies to arrays, but 'xlnx,use-parity' is a scalar. So drop
'contains' from the 'if' schema.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221111212838.4103828-1-robh@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/serial/xlnx,opb-uartlite.yaml          | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml b/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml
index f7617b88c7c36..2f4390e8d4e88 100644
--- a/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml
+++ b/Documentation/devicetree/bindings/serial/xlnx,opb-uartlite.yaml
@@ -67,8 +67,7 @@ allOf:
   - if:
       properties:
         xlnx,use-parity:
-          contains:
-            const: 1
+          const: 1
     then:
       required:
         - xlnx,odd-parity
-- 
GitLab


From 0d114e9ff940ebad8e88267013bf96c605a6b336 Mon Sep 17 00:00:00 2001
From: Valentin Caron <valentin.caron@foss.st.com>
Date: Fri, 18 Nov 2022 18:06:02 +0100
Subject: [PATCH 2005/4122] serial: stm32: move dma_request_chan() before
 clk_prepare_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If dma_request_chan() returns a PROBE_DEFER error, clk_disable_unprepare()
will be called and USART clock will be disabled. But early console can be
still active on the same USART.

While moving dma_request_chan() before clk_prepare_enable(), the clock
won't be taken in case of a DMA PROBE_DEFER error, and so it doesn't need
to be disabled. Then USART is still clocked for early console.

Fixes: a7770a4bfcf4 ("serial: stm32: defer probe for dma devices")
Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Valentin Caron <valentin.caron@foss.st.com>
Link: https://lore.kernel.org/r/20221118170602.1057863-1-valentin.caron@foss.st.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/stm32-usart.c | 47 ++++++++++++++++----------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index 24def72b25653..a1490033aa164 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -1680,22 +1680,10 @@ static int stm32_usart_serial_probe(struct platform_device *pdev)
 	if (!stm32port->info)
 		return -EINVAL;
 
-	ret = stm32_usart_init_port(stm32port, pdev);
-	if (ret)
-		return ret;
-
-	if (stm32port->wakeup_src) {
-		device_set_wakeup_capable(&pdev->dev, true);
-		ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq);
-		if (ret)
-			goto err_deinit_port;
-	}
-
 	stm32port->rx_ch = dma_request_chan(&pdev->dev, "rx");
-	if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER) {
-		ret = -EPROBE_DEFER;
-		goto err_wakeirq;
-	}
+	if (PTR_ERR(stm32port->rx_ch) == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
 	/* Fall back in interrupt mode for any non-deferral error */
 	if (IS_ERR(stm32port->rx_ch))
 		stm32port->rx_ch = NULL;
@@ -1709,6 +1697,17 @@ static int stm32_usart_serial_probe(struct platform_device *pdev)
 	if (IS_ERR(stm32port->tx_ch))
 		stm32port->tx_ch = NULL;
 
+	ret = stm32_usart_init_port(stm32port, pdev);
+	if (ret)
+		goto err_dma_tx;
+
+	if (stm32port->wakeup_src) {
+		device_set_wakeup_capable(&pdev->dev, true);
+		ret = dev_pm_set_wake_irq(&pdev->dev, stm32port->port.irq);
+		if (ret)
+			goto err_deinit_port;
+	}
+
 	if (stm32port->rx_ch && stm32_usart_of_dma_rx_probe(stm32port, pdev)) {
 		/* Fall back in interrupt mode */
 		dma_release_channel(stm32port->rx_ch);
@@ -1745,19 +1744,11 @@ err_port:
 	pm_runtime_set_suspended(&pdev->dev);
 	pm_runtime_put_noidle(&pdev->dev);
 
-	if (stm32port->tx_ch) {
+	if (stm32port->tx_ch)
 		stm32_usart_of_dma_tx_remove(stm32port, pdev);
-		dma_release_channel(stm32port->tx_ch);
-	}
-
 	if (stm32port->rx_ch)
 		stm32_usart_of_dma_rx_remove(stm32port, pdev);
 
-err_dma_rx:
-	if (stm32port->rx_ch)
-		dma_release_channel(stm32port->rx_ch);
-
-err_wakeirq:
 	if (stm32port->wakeup_src)
 		dev_pm_clear_wake_irq(&pdev->dev);
 
@@ -1767,6 +1758,14 @@ err_deinit_port:
 
 	stm32_usart_deinit_port(stm32port);
 
+err_dma_tx:
+	if (stm32port->tx_ch)
+		dma_release_channel(stm32port->tx_ch);
+
+err_dma_rx:
+	if (stm32port->rx_ch)
+		dma_release_channel(stm32port->rx_ch);
+
 	return ret;
 }
 
-- 
GitLab


From 24ce048b0d4d4d8542c26459e53be4b7840d374c Mon Sep 17 00:00:00 2001
From: Paul Fulghum <paulkf@microgate.com>
Date: Tue, 15 Nov 2022 09:38:32 -0800
Subject: [PATCH 2006/4122] tty: synclink_gt: unwind actions in error path of
 net device open

Resent again, last attempt still altered the plain text.

Zhengchao Shao <shaozhengchao@huawei.com> identified by inspection bugs in the error path of hdlcdev_open() in synclink_gt.c

The function did not fully unwind actions in the error path. The use of try_module_get()/module_put() is unnecessary, potentially hazardous and is removed. The synclink_gt driver is already pinned any point the net device is registered, a requirement for calling this entry point.

The call hdlc_open() to init the generic HDLC layer is moved to after driver level init/checks and proper rollback of previous actions is added. This is a more sensible ordering as the most common error paths are at the driver level and the driver level rollbacks require less processing than hdlc_open()/hdlc_close().

This has been tested with supported hardware.

Signed-off-by:Paul Fulghum <paulkf@microgate.com>

Link: https://lore.kernel.org/r/7599F007-8985-4469-BE00-52BD1530210E@microgate.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/synclink_gt.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 25e9befdda3a8..72b76cdde534d 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -1433,16 +1433,8 @@ static int hdlcdev_open(struct net_device *dev)
 	int rc;
 	unsigned long flags;
 
-	if (!try_module_get(THIS_MODULE))
-		return -EBUSY;
-
 	DBGINFO(("%s hdlcdev_open\n", dev->name));
 
-	/* generic HDLC layer open processing */
-	rc = hdlc_open(dev);
-	if (rc)
-		return rc;
-
 	/* arbitrate between network and tty opens */
 	spin_lock_irqsave(&info->netlock, flags);
 	if (info->port.count != 0 || info->netcount != 0) {
@@ -1461,6 +1453,16 @@ static int hdlcdev_open(struct net_device *dev)
 		return rc;
 	}
 
+	/* generic HDLC layer open processing */
+	rc = hdlc_open(dev);
+	if (rc) {
+		shutdown(info);
+		spin_lock_irqsave(&info->netlock, flags);
+		info->netcount = 0;
+		spin_unlock_irqrestore(&info->netlock, flags);
+		return rc;
+	}
+
 	/* assert RTS and DTR, apply hardware settings */
 	info->signals |= SerialSignal_RTS | SerialSignal_DTR;
 	program_hw(info);
@@ -1506,7 +1508,6 @@ static int hdlcdev_close(struct net_device *dev)
 	info->netcount=0;
 	spin_unlock_irqrestore(&info->netlock, flags);
 
-	module_put(THIS_MODULE);
 	return 0;
 }
 
-- 
GitLab


From 96e8298945010d4b0a0c21841566401848a42afc Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 14 Nov 2022 17:18:13 +0200
Subject: [PATCH 2007/4122] serdev: Replace poll loop by readx_poll_timeout()
 macro

The readx_poll_timeout() consolidates the necessary code under
macro. Replace current code with it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221114151813.37294-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serdev.h | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/include/linux/serdev.h b/include/linux/serdev.h
index 66f624fc618ca..5f6bfe4f6d955 100644
--- a/include/linux/serdev.h
+++ b/include/linux/serdev.h
@@ -7,6 +7,7 @@
 
 #include <linux/types.h>
 #include <linux/device.h>
+#include <linux/iopoll.h>
 #include <linux/uaccess.h>
 #include <linux/termios.h>
 #include <linux/delay.h>
@@ -279,18 +280,10 @@ static inline bool serdev_device_get_cts(struct serdev_device *serdev)
 
 static inline int serdev_device_wait_for_cts(struct serdev_device *serdev, bool state, int timeout_ms)
 {
-	unsigned long timeout;
 	bool signal;
 
-	timeout = jiffies + msecs_to_jiffies(timeout_ms);
-	while (time_is_after_jiffies(timeout)) {
-		signal = serdev_device_get_cts(serdev);
-		if (signal == state)
-			return 0;
-		usleep_range(1000, 2000);
-	}
-
-	return -ETIMEDOUT;
+	return readx_poll_timeout(serdev_device_get_cts, serdev, signal, signal == state,
+				  2000, timeout_ms * 1000);
 }
 
 static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enable)
-- 
GitLab


From 4f5cb8c5e9151c678fc2be533c070c6f7522940e Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Thu, 10 Nov 2022 19:38:57 +0800
Subject: [PATCH 2008/4122] tty: serial: fsl_lpuart: enable wakeup source for
 lpuart

LPUART supports both synchronous wakeup and asynchronous wakeup(wakeup
the system when the UART clocks are shut-off), the synchronous wakeup is
configured by UARTCTRL_RIE interrupt, and the asynchronous wakeup is
configured by UARTBAUD_RXEDGIE interrupt.

Add lpuart_uport_is_active() to determine if the uart port needs to get
into the suspend states, also add lpuart_suspend_noirq() and
lpuart_resume_noirq() to enable and disable the wakeup irq bits if the
uart port needs to be set as wakeup source.

When use lpuart with DMA mode, it still needs to switch to the cpu mode
in .suspend() that enable cpu interrupts RIE and RXEDGIE as wakeup
source, after system resume back, needs to setup DMA again, .resume()
will share the HW setup code with .startup(), so abstract the same code
to the api like lpuart32_hw_setup().

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/20221110113859.8485-2-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 281 +++++++++++++++++++++++---------
 1 file changed, 200 insertions(+), 81 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index bd685491eead2..07c1524ef0087 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -18,6 +18,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
+#include <linux/pinctrl/consumer.h>
 #include <linux/serial_core.h>
 #include <linux/slab.h>
 #include <linux/tty_flip.h>
@@ -1627,10 +1628,23 @@ err:
 	sport->lpuart_dma_rx_use = false;
 }
 
+static void lpuart_hw_setup(struct lpuart_port *sport)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	lpuart_setup_watermark_enable(sport);
+
+	lpuart_rx_dma_startup(sport);
+	lpuart_tx_dma_startup(sport);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
 static int lpuart_startup(struct uart_port *port)
 {
 	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
-	unsigned long flags;
 	unsigned char temp;
 
 	/* determine FIFO size and enable FIFO mode */
@@ -1644,15 +1658,7 @@ static int lpuart_startup(struct uart_port *port)
 					    UARTPFIFO_FIFOSIZE_MASK);
 
 	lpuart_request_dma(sport);
-
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	lpuart_setup_watermark_enable(sport);
-
-	lpuart_rx_dma_startup(sport);
-	lpuart_tx_dma_startup(sport);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
+	lpuart_hw_setup(sport);
 
 	return 0;
 }
@@ -1675,10 +1681,25 @@ static void lpuart32_configure(struct lpuart_port *sport)
 	lpuart32_write(&sport->port, temp, UARTCTRL);
 }
 
+static void lpuart32_hw_setup(struct lpuart_port *sport)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sport->port.lock, flags);
+
+	lpuart32_setup_watermark_enable(sport);
+
+	lpuart_rx_dma_startup(sport);
+	lpuart_tx_dma_startup(sport);
+
+	lpuart32_configure(sport);
+
+	spin_unlock_irqrestore(&sport->port.lock, flags);
+}
+
 static int lpuart32_startup(struct uart_port *port)
 {
 	struct lpuart_port *sport = container_of(port, struct lpuart_port, port);
-	unsigned long flags;
 	unsigned long temp;
 
 	/* determine FIFO size */
@@ -1703,17 +1724,8 @@ static int lpuart32_startup(struct uart_port *port)
 	}
 
 	lpuart_request_dma(sport);
+	lpuart32_hw_setup(sport);
 
-	spin_lock_irqsave(&sport->port.lock, flags);
-
-	lpuart32_setup_watermark_enable(sport);
-
-	lpuart_rx_dma_startup(sport);
-	lpuart_tx_dma_startup(sport);
-
-	lpuart32_configure(sport);
-
-	spin_unlock_irqrestore(&sport->port.lock, flags);
 	return 0;
 }
 
@@ -2766,97 +2778,204 @@ static int lpuart_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int __maybe_unused lpuart_suspend(struct device *dev)
+static void serial_lpuart_enable_wakeup(struct lpuart_port *sport, bool on)
 {
-	struct lpuart_port *sport = dev_get_drvdata(dev);
-	unsigned long temp;
-	bool irq_wake;
+	unsigned int val, baud;
 
 	if (lpuart_is_32(sport)) {
-		/* disable Rx/Tx and interrupts */
-		temp = lpuart32_read(&sport->port, UARTCTRL);
-		temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE);
-		lpuart32_write(&sport->port, temp, UARTCTRL);
+		val = lpuart32_read(&sport->port, UARTCTRL);
+		baud = lpuart32_read(&sport->port, UARTBAUD);
+		if (on) {
+			/* set rx_watermark to 0 in wakeup source mode */
+			lpuart32_write(&sport->port, 0, UARTWATER);
+			val |= UARTCTRL_RIE;
+			/* clear RXEDGIF flag before enable RXEDGIE interrupt */
+			lpuart32_write(&sport->port, UARTSTAT_RXEDGIF, UARTSTAT);
+			baud |= UARTBAUD_RXEDGIE;
+		} else {
+			val &= ~UARTCTRL_RIE;
+			baud &= ~UARTBAUD_RXEDGIE;
+		}
+		lpuart32_write(&sport->port, val, UARTCTRL);
+		lpuart32_write(&sport->port, baud, UARTBAUD);
 	} else {
-		/* disable Rx/Tx and interrupts */
-		temp = readb(sport->port.membase + UARTCR2);
-		temp &= ~(UARTCR2_TE | UARTCR2_TIE | UARTCR2_TCIE);
-		writeb(temp, sport->port.membase + UARTCR2);
+		val = readb(sport->port.membase + UARTCR2);
+		if (on)
+			val |= UARTCR2_RIE;
+		else
+			val &= ~UARTCR2_RIE;
+		writeb(val, sport->port.membase + UARTCR2);
 	}
+}
 
-	uart_suspend_port(&lpuart_reg, &sport->port);
+static bool lpuart_uport_is_active(struct lpuart_port *sport)
+{
+	struct tty_port *port = &sport->port.state->port;
+	struct tty_struct *tty;
+	struct device *tty_dev;
+	int may_wake = 0;
 
-	/* uart_suspend_port() might set wakeup flag */
-	irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq));
+	tty = tty_port_tty_get(port);
+	if (tty) {
+		tty_dev = tty->dev;
+		may_wake = device_may_wakeup(tty_dev);
+		tty_kref_put(tty);
+	}
 
-	if (sport->lpuart_dma_rx_use) {
-		/*
-		 * EDMA driver during suspend will forcefully release any
-		 * non-idle DMA channels. If port wakeup is enabled or if port
-		 * is console port or 'no_console_suspend' is set the Rx DMA
-		 * cannot resume as expected, hence gracefully release the
-		 * Rx DMA path before suspend and start Rx DMA path on resume.
-		 */
-		if (irq_wake) {
-			del_timer_sync(&sport->lpuart_timer);
-			lpuart_dma_rx_free(&sport->port);
-		}
+	if ((tty_port_initialized(port) && may_wake) ||
+	    (!console_suspend_enabled && uart_console(&sport->port)))
+		return true;
+
+	return false;
+}
+
+static int __maybe_unused lpuart_suspend_noirq(struct device *dev)
+{
+	struct lpuart_port *sport = dev_get_drvdata(dev);
+	bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq));
+
+	if (lpuart_uport_is_active(sport))
+		serial_lpuart_enable_wakeup(sport, !!irq_wake);
+
+	pinctrl_pm_select_sleep_state(dev);
+
+	return 0;
+}
+
+static int __maybe_unused lpuart_resume_noirq(struct device *dev)
+{
+	struct lpuart_port *sport = dev_get_drvdata(dev);
+	unsigned int val;
+
+	pinctrl_pm_select_default_state(dev);
+
+	if (lpuart_uport_is_active(sport)) {
+		serial_lpuart_enable_wakeup(sport, false);
 
-		/* Disable Rx DMA to use UART port as wakeup source */
+		/* clear the wakeup flags */
 		if (lpuart_is_32(sport)) {
-			temp = lpuart32_read(&sport->port, UARTBAUD);
-			lpuart32_write(&sport->port, temp & ~UARTBAUD_RDMAE,
-				       UARTBAUD);
-		} else {
-			writeb(readb(sport->port.membase + UARTCR5) &
-			       ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
+			val = lpuart32_read(&sport->port, UARTSTAT);
+			lpuart32_write(&sport->port, val, UARTSTAT);
 		}
 	}
 
-	if (sport->lpuart_dma_tx_use) {
-		sport->dma_tx_in_progress = false;
-		dmaengine_terminate_all(sport->dma_tx_chan);
-	}
-
-	if (sport->port.suspended && !irq_wake)
-		lpuart_disable_clks(sport);
-
 	return 0;
 }
 
-static int __maybe_unused lpuart_resume(struct device *dev)
+static int __maybe_unused lpuart_suspend(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
-	bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq));
+	unsigned long temp, flags;
 
-	if (sport->port.suspended && !irq_wake)
-		lpuart_enable_clks(sport);
+	uart_suspend_port(&lpuart_reg, &sport->port);
 
-	if (lpuart_is_32(sport))
-		lpuart32_setup_watermark_enable(sport);
-	else
-		lpuart_setup_watermark_enable(sport);
+	if (lpuart_uport_is_active(sport)) {
+		spin_lock_irqsave(&sport->port.lock, flags);
+		if (lpuart_is_32(sport)) {
+			/* disable Rx/Tx and interrupts */
+			temp = lpuart32_read(&sport->port, UARTCTRL);
+			temp &= ~(UARTCTRL_TE | UARTCTRL_TIE | UARTCTRL_TCIE);
+			lpuart32_write(&sport->port, temp, UARTCTRL);
+		} else {
+			/* disable Rx/Tx and interrupts */
+			temp = readb(sport->port.membase + UARTCR2);
+			temp &= ~(UARTCR2_TE | UARTCR2_TIE | UARTCR2_TCIE);
+			writeb(temp, sport->port.membase + UARTCR2);
+		}
+		spin_unlock_irqrestore(&sport->port.lock, flags);
 
-	if (sport->lpuart_dma_rx_use) {
-		if (irq_wake) {
-			if (!lpuart_start_rx_dma(sport))
-				rx_dma_timer_init(sport);
-			else
-				sport->lpuart_dma_rx_use = false;
+		if (sport->lpuart_dma_rx_use) {
+			/*
+			 * EDMA driver during suspend will forcefully release any
+			 * non-idle DMA channels. If port wakeup is enabled or if port
+			 * is console port or 'no_console_suspend' is set the Rx DMA
+			 * cannot resume as expected, hence gracefully release the
+			 * Rx DMA path before suspend and start Rx DMA path on resume.
+			 */
+			del_timer_sync(&sport->lpuart_timer);
+			lpuart_dma_rx_free(&sport->port);
+
+			/* Disable Rx DMA to use UART port as wakeup source */
+			spin_lock_irqsave(&sport->port.lock, flags);
+			if (lpuart_is_32(sport)) {
+				temp = lpuart32_read(&sport->port, UARTBAUD);
+				lpuart32_write(&sport->port, temp & ~UARTBAUD_RDMAE,
+					       UARTBAUD);
+			} else {
+				writeb(readb(sport->port.membase + UARTCR5) &
+				       ~UARTCR5_RDMAS, sport->port.membase + UARTCR5);
+			}
+			spin_unlock_irqrestore(&sport->port.lock, flags);
+		}
+
+		if (sport->lpuart_dma_tx_use) {
+			spin_lock_irqsave(&sport->port.lock, flags);
+			if (lpuart_is_32(sport)) {
+				temp = lpuart32_read(&sport->port, UARTBAUD);
+				temp &= ~UARTBAUD_TDMAE;
+				lpuart32_write(&sport->port, temp, UARTBAUD);
+			} else {
+				temp = readb(sport->port.membase + UARTCR5);
+				temp &= ~UARTCR5_TDMAS;
+				writeb(temp, sport->port.membase + UARTCR5);
+			}
+			spin_unlock_irqrestore(&sport->port.lock, flags);
+			sport->dma_tx_in_progress = false;
+			dmaengine_terminate_all(sport->dma_tx_chan);
 		}
 	}
 
-	lpuart_tx_dma_startup(sport);
+	return 0;
+}
 
-	if (lpuart_is_32(sport))
-		lpuart32_configure(sport);
+static void lpuart_console_fixup(struct lpuart_port *sport)
+{
+	struct tty_port *port = &sport->port.state->port;
+	struct uart_port *uport = &sport->port;
+	struct ktermios termios;
+
+	/* i.MX7ULP enter VLLS mode that lpuart module power off and registers
+	 * all lost no matter the port is wakeup source.
+	 * For console port, console baud rate setting lost and print messy
+	 * log when enable the console port as wakeup source. To avoid the
+	 * issue happen, user should not enable uart port as wakeup source
+	 * in VLLS mode, or restore console setting here.
+	 */
+	if (is_imx7ulp_lpuart(sport) && lpuart_uport_is_active(sport) &&
+	    console_suspend_enabled && uart_console(&sport->port)) {
+
+		mutex_lock(&port->mutex);
+		memset(&termios, 0, sizeof(struct ktermios));
+		termios.c_cflag = uport->cons->cflag;
+		if (port->tty && termios.c_cflag == 0)
+			termios = port->tty->termios;
+		uport->ops->set_termios(uport, &termios, NULL);
+		mutex_unlock(&port->mutex);
+	}
+}
+
+static int __maybe_unused lpuart_resume(struct device *dev)
+{
+	struct lpuart_port *sport = dev_get_drvdata(dev);
+
+	if (lpuart_uport_is_active(sport)) {
+		if (lpuart_is_32(sport))
+			lpuart32_hw_setup(sport);
+		else
+			lpuart_hw_setup(sport);
+	}
 
+	lpuart_console_fixup(sport);
 	uart_resume_port(&lpuart_reg, &sport->port);
 
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(lpuart_pm_ops, lpuart_suspend, lpuart_resume);
+static const struct dev_pm_ops lpuart_pm_ops = {
+	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq,
+				      lpuart_resume_noirq)
+	SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume)
+};
 
 static struct platform_driver lpuart_driver = {
 	.probe		= lpuart_probe,
-- 
GitLab


From 43543e6f539b3e646348c253059f75e27d63c94d Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Thu, 10 Nov 2022 19:38:58 +0800
Subject: [PATCH 2009/4122] tty: serial: fsl_lpuart: Add runtime pm support

Add runtime pm support to manage the lpuart clock.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/20221110113859.8485-3-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 60 +++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 07c1524ef0087..9e04728bcc0b9 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -19,6 +19,7 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/pinctrl/consumer.h>
+#include <linux/pm_runtime.h>
 #include <linux/serial_core.h>
 #include <linux/slab.h>
 #include <linux/tty_flip.h>
@@ -233,6 +234,7 @@
 
 /* Rx DMA timeout in ms, which is used to calculate Rx ring buffer size */
 #define DMA_RX_TIMEOUT		(10)
+#define UART_AUTOSUSPEND_TIMEOUT	3000
 
 #define DRIVER_NAME	"fsl-lpuart"
 #define DEV_NAME	"ttyLP"
@@ -793,6 +795,20 @@ static void lpuart32_start_tx(struct uart_port *port)
 	}
 }
 
+static void
+lpuart_uart_pm(struct uart_port *port, unsigned int state, unsigned int oldstate)
+{
+	switch (state) {
+	case UART_PM_STATE_OFF:
+		pm_runtime_mark_last_busy(port->dev);
+		pm_runtime_put_autosuspend(port->dev);
+		break;
+	default:
+		pm_runtime_get_sync(port->dev);
+		break;
+	}
+}
+
 /* return TIOCSER_TEMT when transmitter is not busy */
 static unsigned int lpuart_tx_empty(struct uart_port *port)
 {
@@ -2229,6 +2245,7 @@ static const struct uart_ops lpuart_pops = {
 	.startup	= lpuart_startup,
 	.shutdown	= lpuart_shutdown,
 	.set_termios	= lpuart_set_termios,
+	.pm		= lpuart_uart_pm,
 	.type		= lpuart_type,
 	.request_port	= lpuart_request_port,
 	.release_port	= lpuart_release_port,
@@ -2253,6 +2270,7 @@ static const struct uart_ops lpuart32_pops = {
 	.startup	= lpuart32_startup,
 	.shutdown	= lpuart32_shutdown,
 	.set_termios	= lpuart32_set_termios,
+	.pm		= lpuart_uart_pm,
 	.type		= lpuart_type,
 	.request_port	= lpuart_request_port,
 	.release_port	= lpuart_release_port,
@@ -2733,6 +2751,11 @@ static int lpuart_probe(struct platform_device *pdev)
 		handler = lpuart_int;
 	}
 
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, UART_AUTOSUSPEND_TIMEOUT);
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	ret = lpuart_global_reset(sport);
 	if (ret)
 		goto failed_reset;
@@ -2757,6 +2780,9 @@ failed_irq_request:
 failed_attach_port:
 failed_get_rs485:
 failed_reset:
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	lpuart_disable_clks(sport);
 	return ret;
 }
@@ -2775,9 +2801,30 @@ static int lpuart_remove(struct platform_device *pdev)
 	if (sport->dma_rx_chan)
 		dma_release_channel(sport->dma_rx_chan);
 
+	pm_runtime_disable(&pdev->dev);
+	pm_runtime_set_suspended(&pdev->dev);
+	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	return 0;
 }
 
+static int __maybe_unused lpuart_runtime_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct lpuart_port *sport = platform_get_drvdata(pdev);
+
+	lpuart_disable_clks(sport);
+
+	return 0;
+};
+
+static int __maybe_unused lpuart_runtime_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct lpuart_port *sport = platform_get_drvdata(pdev);
+
+	return lpuart_enable_clks(sport);
+};
+
 static void serial_lpuart_enable_wakeup(struct lpuart_port *sport, bool on)
 {
 	unsigned int val, baud;
@@ -2923,6 +2970,10 @@ static int __maybe_unused lpuart_suspend(struct device *dev)
 			sport->dma_tx_in_progress = false;
 			dmaengine_terminate_all(sport->dma_tx_chan);
 		}
+	} else if (pm_runtime_active(sport->port.dev)) {
+		lpuart_disable_clks(sport);
+		pm_runtime_disable(sport->port.dev);
+		pm_runtime_set_suspended(sport->port.dev);
 	}
 
 	return 0;
@@ -2957,12 +3008,19 @@ static void lpuart_console_fixup(struct lpuart_port *sport)
 static int __maybe_unused lpuart_resume(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
+	int ret;
 
 	if (lpuart_uport_is_active(sport)) {
 		if (lpuart_is_32(sport))
 			lpuart32_hw_setup(sport);
 		else
 			lpuart_hw_setup(sport);
+	} else if (pm_runtime_active(sport->port.dev)) {
+		ret = lpuart_enable_clks(sport);
+		if (ret)
+			return ret;
+		pm_runtime_set_active(sport->port.dev);
+		pm_runtime_enable(sport->port.dev);
 	}
 
 	lpuart_console_fixup(sport);
@@ -2972,6 +3030,8 @@ static int __maybe_unused lpuart_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops lpuart_pm_ops = {
+	SET_RUNTIME_PM_OPS(lpuart_runtime_suspend,
+			   lpuart_runtime_resume, NULL)
 	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq,
 				      lpuart_resume_noirq)
 	SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume)
-- 
GitLab


From 22cf92bb3908e1bbc22b03371e9e67e7bd455e0f Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Thu, 10 Nov 2022 19:38:59 +0800
Subject: [PATCH 2010/4122] tty: serial: fsl_lpuart: Use pm_ptr() to avoid need
 to make pm __maybe_unused

Use pm_ptr() to remove the need to mark the pm functions as
__maybe_unused when the kernel is built without CONFIG_PM support.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/20221110113859.8485-4-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index 9e04728bcc0b9..c1c8aa3e0fac8 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -2807,7 +2807,7 @@ static int lpuart_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static int __maybe_unused lpuart_runtime_suspend(struct device *dev)
+static int lpuart_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct lpuart_port *sport = platform_get_drvdata(pdev);
@@ -2817,7 +2817,7 @@ static int __maybe_unused lpuart_runtime_suspend(struct device *dev)
 	return 0;
 };
 
-static int __maybe_unused lpuart_runtime_resume(struct device *dev)
+static int lpuart_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct lpuart_port *sport = platform_get_drvdata(pdev);
@@ -2876,7 +2876,7 @@ static bool lpuart_uport_is_active(struct lpuart_port *sport)
 	return false;
 }
 
-static int __maybe_unused lpuart_suspend_noirq(struct device *dev)
+static int lpuart_suspend_noirq(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	bool irq_wake = irqd_is_wakeup_set(irq_get_irq_data(sport->port.irq));
@@ -2889,7 +2889,7 @@ static int __maybe_unused lpuart_suspend_noirq(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused lpuart_resume_noirq(struct device *dev)
+static int lpuart_resume_noirq(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	unsigned int val;
@@ -2909,7 +2909,7 @@ static int __maybe_unused lpuart_resume_noirq(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused lpuart_suspend(struct device *dev)
+static int lpuart_suspend(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	unsigned long temp, flags;
@@ -3005,7 +3005,7 @@ static void lpuart_console_fixup(struct lpuart_port *sport)
 	}
 }
 
-static int __maybe_unused lpuart_resume(struct device *dev)
+static int lpuart_resume(struct device *dev)
 {
 	struct lpuart_port *sport = dev_get_drvdata(dev);
 	int ret;
@@ -3030,11 +3030,11 @@ static int __maybe_unused lpuart_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops lpuart_pm_ops = {
-	SET_RUNTIME_PM_OPS(lpuart_runtime_suspend,
+	RUNTIME_PM_OPS(lpuart_runtime_suspend,
 			   lpuart_runtime_resume, NULL)
-	SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq,
+	NOIRQ_SYSTEM_SLEEP_PM_OPS(lpuart_suspend_noirq,
 				      lpuart_resume_noirq)
-	SET_SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume)
+	SYSTEM_SLEEP_PM_OPS(lpuart_suspend, lpuart_resume)
 };
 
 static struct platform_driver lpuart_driver = {
@@ -3043,7 +3043,7 @@ static struct platform_driver lpuart_driver = {
 	.driver		= {
 		.name	= "fsl-lpuart",
 		.of_match_table = lpuart_dt_ids,
-		.pm	= &lpuart_pm_ops,
+		.pm	= pm_ptr(&lpuart_pm_ops),
 	},
 };
 
-- 
GitLab


From 8be3a7bf773700534a6e8f87f6ed2ed111254be5 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Tue, 22 Nov 2022 19:45:59 +0800
Subject: [PATCH 2011/4122] serial: pch: Fix PCI device refcount leak in
 pch_request_dma()

As comment of pci_get_slot() says, it returns a pci_device with its
refcount increased. The caller must decrement the reference count by
calling pci_dev_put().

Since 'dma_dev' is only used to filter the channel in filter(), we can
call pci_dev_put() before exiting from pch_request_dma(). Add the
missing pci_dev_put() for the normal and error path.

Fixes: 3c6a483275f4 ("Serial: EG20T: add PCH_UART driver")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Link: https://lore.kernel.org/r/20221122114559.27692-1-wangxiongfeng2@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/pch_uart.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/tty/serial/pch_uart.c b/drivers/tty/serial/pch_uart.c
index c76719c0f453a..3d54a43768cd2 100644
--- a/drivers/tty/serial/pch_uart.c
+++ b/drivers/tty/serial/pch_uart.c
@@ -694,6 +694,7 @@ static void pch_request_dma(struct uart_port *port)
 	if (!chan) {
 		dev_err(priv->port.dev, "%s:dma_request_channel FAILS(Tx)\n",
 			__func__);
+		pci_dev_put(dma_dev);
 		return;
 	}
 	priv->chan_tx = chan;
@@ -710,6 +711,7 @@ static void pch_request_dma(struct uart_port *port)
 			__func__);
 		dma_release_channel(priv->chan_tx);
 		priv->chan_tx = NULL;
+		pci_dev_put(dma_dev);
 		return;
 	}
 
@@ -717,6 +719,8 @@ static void pch_request_dma(struct uart_port *port)
 	priv->rx_buf_virt = dma_alloc_coherent(port->dev, port->fifosize,
 				    &priv->rx_buf_dma, GFP_KERNEL);
 	priv->chan_rx = chan;
+
+	pci_dev_put(dma_dev);
 }
 
 static void pch_dma_rx_complete(void *arg)
-- 
GitLab


From 05f5747414c6ecb8a7f9b0c1dc10bcffa6dfb5ba Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 21 Nov 2022 19:15:18 +0100
Subject: [PATCH 2012/4122] PCI/portdrv: Set PCIE_PORT_SERVICE_HP for Root and
 Downstream Ports only

It is reported that on some systems pciehp binds to an Upstream Port and
attempts to operate it which causes devices below the Port to disappear
from the bus.

This happens because acpiphp sets dev->is_hotplug_bridge for that Port
(after receiving a Device Check notification on it from the platform
firmware via ACPI) during the enumeration of PCI devices.

get_port_device_capability() sees that dev->is_hotplug_bridge is set and
adds PCIE_PORT_SERVICE_HP to Port services (which allows pciehp to bind to
the Port in question) without consulting the PCIe type, which should be
either Root Port or Downstream Port for the hotplug capability to be
present.

Per PCIe r6.0, sec 7.5.3.2, the Slot Implemented bit is only valid for
Downstream Ports (including Root Ports), and PCIe hotplug depends on the
Slot Capabilities / Control / Status registers.

Make get_port_device_capability() more robust by adding a PCIe type check
to it before adding PCIE_PORT_SERVICE_HP to Port services which helps to
avoid the problem.

[bhelgaas: add spec citation]
Suggested-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/4786090.31r3eYUQgx@kreacher
Reported-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
---
 drivers/pci/pcie/portdrv_core.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/pci/pcie/portdrv_core.c b/drivers/pci/pcie/portdrv_core.c
index 1ac7fec47d6fb..98f0126aaa903 100644
--- a/drivers/pci/pcie/portdrv_core.c
+++ b/drivers/pci/pcie/portdrv_core.c
@@ -209,6 +209,8 @@ static int get_port_device_capability(struct pci_dev *dev)
 	int services = 0;
 
 	if (dev->is_hotplug_bridge &&
+	    (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+	     pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) &&
 	    (pcie_ports_native || host->native_pcie_hotplug)) {
 		services |= PCIE_PORT_SERVICE_HP;
 
-- 
GitLab


From c63a3be76df678b173c59f1d5dc19a21b2d1c753 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 21 Nov 2022 19:16:57 +0100
Subject: [PATCH 2013/4122] PCI: acpiphp: Avoid setting is_hotplug_bridge for
 PCIe Upstream Ports

It is reported that on some systems pciehp binds to an Upstream Port and
attempts to operate it which causes devices below the Port to disappear
from the bus.

This happens because acpiphp sets dev->is_hotplug_bridge for that Port
(after receiving a Device Check notification on it from the platform
firmware via ACPI) during the enumeration of PCI devices.

get_port_device_capability() sees that dev->is_hotplug_bridge is set and
adds PCIE_PORT_SERVICE_HP to Port services, which allows pciehp to bind to
the Port in question.

Even though this particular problem can be addressed by making the
portdrv_core checks more robust, it also causes power management to work
differently on the affected systems which generally is not desirable (PCIe
Ports with dev->is_hotplug_bridge set have to pass additional tests to be
allowed to go into the D3hot/cold power states which affects runtime PM of
devices below these Ports).

For this reason, amend check_hotplug_bridge() with a PCIe type check to
prevent it from setting dev->is_hotplug_bridge for Upstream Ports.

Suggested-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/2262230.ElGaqSPkdT@kreacher
Reported-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Tested-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
---
 drivers/pci/hotplug/acpiphp_glue.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 6efa3d8db9a56..5b1f271c6034b 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -411,6 +411,14 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
 	if (dev->is_hotplug_bridge)
 		return;
 
+	/*
+	 * In the PCIe case, only Root Ports and Downstream Ports are capable of
+	 * accommodating hotplug devices, so avoid marking Upstream Ports as
+	 * "hotplug bridges".
+	 */
+	if (pci_is_pcie(dev) && pci_pcie_type(dev) == PCI_EXP_TYPE_UPSTREAM)
+		return;
+
 	list_for_each_entry(func, &slot->funcs, sibling) {
 		if (PCI_FUNC(dev->devfn) == func->function) {
 			dev->is_hotplug_bridge = 1;
-- 
GitLab


From cb6562c380832a930ffd1722ac9d479b454aed4e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 22 Nov 2022 15:37:39 -0400
Subject: [PATCH 2014/4122] RDMA/rxe: Do not NULL deref on debugging failure
 path

Correct the mistake, mr is obviously NULL in this code path.

Fixes: 2778b72b1df0 ("RDMA/rxe: Replace pr_xxx by rxe_dbg_xxx in rxe_mr.c")
Link: https://lore.kernel.org/r/Y3eeJW0AdyJYhYyQ@kili
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index b1423000e4bcd..b7c9ff1ddf0e1 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -519,7 +519,7 @@ int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
 
 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
 	if (!mr) {
-		rxe_dbg_mr(mr, "No MR for key %#x\n", key);
+		rxe_dbg_qp(qp, "No MR for key %#x\n", key);
 		ret = -EINVAL;
 		goto err;
 	}
-- 
GitLab


From f67376d801499f4fa0838c18c1efcad8840e550d Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 22 Nov 2022 23:14:37 +0800
Subject: [PATCH 2015/4122] RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup()
 when socket create failed

There is a null-ptr-deref when mount.cifs over rdma:

  BUG: KASAN: null-ptr-deref in rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe]
  Read of size 8 at addr 0000000000000018 by task mount.cifs/3046

  CPU: 2 PID: 3046 Comm: mount.cifs Not tainted 6.1.0-rc5+ #62
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc3
  Call Trace:
   <TASK>
   dump_stack_lvl+0x34/0x44
   kasan_report+0xad/0x130
   rxe_qp_do_cleanup+0x2f3/0x360 [rdma_rxe]
   execute_in_process_context+0x25/0x90
   __rxe_cleanup+0x101/0x1d0 [rdma_rxe]
   rxe_create_qp+0x16a/0x180 [rdma_rxe]
   create_qp.part.0+0x27d/0x340
   ib_create_qp_kernel+0x73/0x160
   rdma_create_qp+0x100/0x230
   _smbd_get_connection+0x752/0x20f0
   smbd_get_connection+0x21/0x40
   cifs_get_tcp_session+0x8ef/0xda0
   mount_get_conns+0x60/0x750
   cifs_mount+0x103/0xd00
   cifs_smb3_do_mount+0x1dd/0xcb0
   smb3_get_tree+0x1d5/0x300
   vfs_get_tree+0x41/0xf0
   path_mount+0x9b3/0xdd0
   __x64_sys_mount+0x190/0x1d0
   do_syscall_64+0x35/0x80
   entry_SYSCALL_64_after_hwframe+0x46/0xb0

The root cause of the issue is the socket create failed in
rxe_qp_init_req().

So move the reset rxe_qp_do_cleanup() after the NULL ptr check.

Fixes: 8700e3e7c485 ("Soft RoCE driver")
Link: https://lore.kernel.org/r/20221122151437.1057671-1-zhangxiaoxu5@huawei.com
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_qp.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_qp.c b/drivers/infiniband/sw/rxe/rxe_qp.c
index 46f6c74ce00e1..ab72db68b58f6 100644
--- a/drivers/infiniband/sw/rxe/rxe_qp.c
+++ b/drivers/infiniband/sw/rxe/rxe_qp.c
@@ -817,12 +817,12 @@ static void rxe_qp_do_cleanup(struct work_struct *work)
 	if (qp->resp.mr)
 		rxe_put(qp->resp.mr);
 
-	if (qp_type(qp) == IB_QPT_RC)
-		sk_dst_reset(qp->sk->sk);
-
 	free_rd_atomic_resources(qp);
 
 	if (qp->sk) {
+		if (qp_type(qp) == IB_QPT_RC)
+			sk_dst_reset(qp->sk->sk);
+
 		kernel_sock_shutdown(qp->sk, SHUT_RDWR);
 		sock_release(qp->sk);
 	}
-- 
GitLab


From 9676f40618df9f8e1ab681486021d6c0df86c5fa Mon Sep 17 00:00:00 2001
From: Ian Cowan <ian@linux.cowan.aero>
Date: Sat, 12 Nov 2022 09:28:57 -0500
Subject: [PATCH 2016/4122] PCI: shpchp: Remove unused get_mode1_ECC_cap
 callback

The ->get_mode1_ECC_cap callback in the shpchp_hpc_ops struct is never
called, so remove it.

[bhelgaas: squash]
Link: https://lore.kernel.org/r/20221112142859.319733-2-ian@linux.cowan.aero
Link: https://lore.kernel.org/r/20221112142859.319733-3-ian@linux.cowan.aero
Link: https://lore.kernel.org/r/20221112142859.319733-4-ian@linux.cowan.aero
Signed-off-by: Ian Cowan <ian@linux.cowan.aero>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/hotplug/TODO         |  3 ---
 drivers/pci/hotplug/shpchp.h     |  1 -
 drivers/pci/hotplug/shpchp_hpc.c | 18 ------------------
 3 files changed, 22 deletions(-)

diff --git a/drivers/pci/hotplug/TODO b/drivers/pci/hotplug/TODO
index 88f217c82b4ff..fdb8dd6ea24dc 100644
--- a/drivers/pci/hotplug/TODO
+++ b/drivers/pci/hotplug/TODO
@@ -58,9 +58,6 @@ shpchp:
   pciehp with commit 82a9e79ef132 ("PCI: pciehp: remove hpc_ops").  Clarify
   if there was a specific reason not to apply the same change to shpchp.
 
-* The ->get_mode1_ECC_cap callback in shpchp_hpc_ops is never invoked.
-  Why was it introduced?  Can it be removed?
-
 * The hardirq handler shpc_isr() queues events on a workqueue.  It can be
   simplified by converting it to threaded IRQ handling.  Use pciehp as a
   template.
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 6e85885b554c5..3a97f455336e6 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -311,7 +311,6 @@ struct hpc_ops {
 	int (*get_latch_status)(struct slot *slot, u8 *status);
 	int (*get_adapter_status)(struct slot *slot, u8 *status);
 	int (*get_adapter_speed)(struct slot *slot, enum pci_bus_speed *speed);
-	int (*get_mode1_ECC_cap)(struct slot *slot, u8 *mode);
 	int (*get_prog_int)(struct slot *slot, u8 *prog_int);
 	int (*query_power_fault)(struct slot *slot);
 	void (*green_led_on)(struct slot *slot);
diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c
index bd7557ca49108..48e4daefc44af 100644
--- a/drivers/pci/hotplug/shpchp_hpc.c
+++ b/drivers/pci/hotplug/shpchp_hpc.c
@@ -489,23 +489,6 @@ static int hpc_get_adapter_speed(struct slot *slot, enum pci_bus_speed *value)
 	return retval;
 }
 
-static int hpc_get_mode1_ECC_cap(struct slot *slot, u8 *mode)
-{
-	int retval = 0;
-	struct controller *ctrl = slot->ctrl;
-	u16 sec_bus_status = shpc_readw(ctrl, SEC_BUS_CONFIG);
-	u8 pi = shpc_readb(ctrl, PROG_INTERFACE);
-
-	if (pi == 2) {
-		*mode = (sec_bus_status & 0x0100) >> 8;
-	} else {
-		retval = -1;
-	}
-
-	ctrl_dbg(ctrl, "Mode 1 ECC cap = %d\n", *mode);
-	return retval;
-}
-
 static int hpc_query_power_fault(struct slot *slot)
 {
 	struct controller *ctrl = slot->ctrl;
@@ -900,7 +883,6 @@ static const struct hpc_ops shpchp_hpc_ops = {
 	.get_adapter_status		= hpc_get_adapter_status,
 
 	.get_adapter_speed		= hpc_get_adapter_speed,
-	.get_mode1_ECC_cap		= hpc_get_mode1_ECC_cap,
 	.get_prog_int			= hpc_get_prog_int,
 
 	.query_power_fault		= hpc_query_power_fault,
-- 
GitLab


From 198dd8aedee6a7d2de0dfa739f9a008a938f6848 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 23 Nov 2022 12:40:11 +1100
Subject: [PATCH 2017/4122] xfs: punching delalloc extents on write failure is
 racy

xfs_buffered_write_iomap_end() has a comment about the safety of
punching delalloc extents based holding the IOLOCK_EXCL. This
comment is wrong, and punching delalloc extents is not race free.

When we punch out a delalloc extent after a write failure in
xfs_buffered_write_iomap_end(), we punch out the page cache with
truncate_pagecache_range() before we punch out the delalloc extents.
At this point, we only hold the IOLOCK_EXCL, so there is nothing
stopping mmap() write faults racing with this cleanup operation,
reinstantiating a folio over the range we are about to punch and
hence requiring the delalloc extent to be kept.

If this race condition is hit, we can end up with a dirty page in
the page cache that has no delalloc extent or space reservation
backing it. This leads to bad things happening at writeback time.

To avoid this race condition, we need the page cache truncation to
be atomic w.r.t. the extent manipulation. We can do this by holding
the mapping->invalidate_lock exclusively across this operation -
this will prevent new pages from being inserted into the page cache
whilst we are removing the pages and the backing extent and space
reservation.

Taking the mapping->invalidate_lock exclusively in the buffered
write IO path is safe - it naturally nests inside the IOLOCK (see
truncate and fallocate paths). iomap_zero_range() can be called from
under the mapping->invalidate_lock (from the truncate path via
either xfs_zero_eof() or xfs_truncate_page(), but iomap_zero_iter()
will not instantiate new delalloc pages (because it skips holes) and
hence will not ever need to punch out delalloc extents on failure.

Fix the locking issue, and clean up the code logic a little to avoid
unnecessary work if we didn't allocate the delalloc extent or wrote
the entire region we allocated.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_iomap.c | 41 +++++++++++++++++++++++------------------
 1 file changed, 23 insertions(+), 18 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 5cea069a38b4e..a2e45ea1b0cb3 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1147,6 +1147,10 @@ xfs_buffered_write_iomap_end(
 		written = 0;
 	}
 
+	/* If we didn't reserve the blocks, we're not allowed to punch them. */
+	if (!(iomap->flags & IOMAP_F_NEW))
+		return 0;
+
 	/*
 	 * start_fsb refers to the first unused block after a short write. If
 	 * nothing was written, round offset down to point at the first block in
@@ -1158,27 +1162,28 @@ xfs_buffered_write_iomap_end(
 		start_fsb = XFS_B_TO_FSB(mp, offset + written);
 	end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
+	/* Nothing to do if we've written the entire delalloc extent */
+	if (start_fsb >= end_fsb)
+		return 0;
+
 	/*
-	 * Trim delalloc blocks if they were allocated by this write and we
-	 * didn't manage to write the whole range.
-	 *
-	 * We don't need to care about racing delalloc as we hold i_mutex
-	 * across the reserve/allocate/unreserve calls. If there are delalloc
-	 * blocks in the range, they are ours.
+	 * Lock the mapping to avoid races with page faults re-instantiating
+	 * folios and dirtying them via ->page_mkwrite between the page cache
+	 * truncation and the delalloc extent removal. Failing to do this can
+	 * leave dirty pages with no space reservation in the cache.
 	 */
-	if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) {
-		truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
-					 XFS_FSB_TO_B(mp, end_fsb) - 1);
-
-		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-					       end_fsb - start_fsb);
-		if (error && !xfs_is_shutdown(mp)) {
-			xfs_alert(mp, "%s: unable to clean up ino %lld",
-				__func__, ip->i_ino);
-			return error;
-		}
+	filemap_invalidate_lock(inode->i_mapping);
+	truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
+				 XFS_FSB_TO_B(mp, end_fsb) - 1);
+
+	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+				       end_fsb - start_fsb);
+	filemap_invalidate_unlock(inode->i_mapping);
+	if (error && !xfs_is_shutdown(mp)) {
+		xfs_alert(mp, "%s: unable to clean up ino %lld",
+			__func__, ip->i_ino);
+		return error;
 	}
-
 	return 0;
 }
 
-- 
GitLab


From b71f889c18ada210a97aa3eb5e00c0de552234c6 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 23 Nov 2022 12:40:12 +1100
Subject: [PATCH 2018/4122] xfs: use byte ranges for write cleanup ranges

xfs_buffered_write_iomap_end() currently converts the byte ranges
passed to it to filesystem blocks to pass them to the bmap code to
punch out delalloc blocks, but then has to convert filesytem
blocks back to byte ranges for page cache truncate.

We're about to make the page cache truncate go away and replace it
with a page cache walk, so having to convert everything to/from/to
filesystem blocks is messy and error-prone. It is much easier to
pass around byte ranges and convert to page indexes and/or
filesystem blocks only where those units are needed.

In preparation for the page cache walk being added, add a helper
that converts byte ranges to filesystem blocks and calls
xfs_bmap_punch_delalloc_range() and convert
xfs_buffered_write_iomap_end() to calculate limits in byte ranges.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_iomap.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index a2e45ea1b0cb3..7bb55dbc19d35 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1120,6 +1120,20 @@ out_unlock:
 	return error;
 }
 
+static int
+xfs_buffered_write_delalloc_punch(
+	struct inode		*inode,
+	loff_t			start_byte,
+	loff_t			end_byte)
+{
+	struct xfs_mount	*mp = XFS_M(inode->i_sb);
+	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, start_byte);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, end_byte);
+
+	return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
+				end_fsb - start_fsb);
+}
+
 static int
 xfs_buffered_write_iomap_end(
 	struct inode		*inode,
@@ -1129,10 +1143,9 @@ xfs_buffered_write_iomap_end(
 	unsigned		flags,
 	struct iomap		*iomap)
 {
-	struct xfs_inode	*ip = XFS_I(inode);
-	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fileoff_t		start_fsb;
-	xfs_fileoff_t		end_fsb;
+	struct xfs_mount	*mp = XFS_M(inode->i_sb);
+	loff_t			start_byte;
+	loff_t			end_byte;
 	int			error = 0;
 
 	if (iomap->type != IOMAP_DELALLOC)
@@ -1157,13 +1170,13 @@ xfs_buffered_write_iomap_end(
 	 * the range.
 	 */
 	if (unlikely(!written))
-		start_fsb = XFS_B_TO_FSBT(mp, offset);
+		start_byte = round_down(offset, mp->m_sb.sb_blocksize);
 	else
-		start_fsb = XFS_B_TO_FSB(mp, offset + written);
-	end_fsb = XFS_B_TO_FSB(mp, offset + length);
+		start_byte = round_up(offset + written, mp->m_sb.sb_blocksize);
+	end_byte = round_up(offset + length, mp->m_sb.sb_blocksize);
 
 	/* Nothing to do if we've written the entire delalloc extent */
-	if (start_fsb >= end_fsb)
+	if (start_byte >= end_byte)
 		return 0;
 
 	/*
@@ -1173,15 +1186,12 @@ xfs_buffered_write_iomap_end(
 	 * leave dirty pages with no space reservation in the cache.
 	 */
 	filemap_invalidate_lock(inode->i_mapping);
-	truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb),
-				 XFS_FSB_TO_B(mp, end_fsb) - 1);
-
-	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-				       end_fsb - start_fsb);
+	truncate_pagecache_range(inode, start_byte, end_byte - 1);
+	error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte);
 	filemap_invalidate_unlock(inode->i_mapping);
 	if (error && !xfs_is_shutdown(mp)) {
-		xfs_alert(mp, "%s: unable to clean up ino %lld",
-			__func__, ip->i_ino);
+		xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
+			__func__, XFS_I(inode)->i_ino);
 		return error;
 	}
 	return 0;
-- 
GitLab


From 9c7babf94a0d686b552e53aded8d4703d1b8b92b Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Wed, 23 Nov 2022 12:44:38 +1100
Subject: [PATCH 2019/4122] xfs,iomap: move delalloc punching to iomap

Because that's what Christoph wants for this error handling path
only XFS uses.

It requires a new iomap export for handling errors over delalloc
ranges. This is basically the XFS code as is stands, but even though
Christoph wants this as iomap funcitonality, we still have
to call it from the filesystem specific ->iomap_end callback, and
call into the iomap code with yet another filesystem specific
callback to punch the delalloc extent within the defined ranges.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 60 ++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_iomap.c     | 47 ++++++---------------------------
 include/linux/iomap.h  |  4 +++
 3 files changed, 72 insertions(+), 39 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 91ee0b308e13d..734b761a1e4ab 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,6 +832,66 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
+/*
+ * When a short write occurs, the filesystem may need to remove reserved space
+ * that was allocated in ->iomap_begin from it's ->iomap_end method. For
+ * filesystems that use delayed allocation, we need to punch out delalloc
+ * extents from the range that are not dirty in the page cache. As the write can
+ * race with page faults, there can be dirty pages over the delalloc extent
+ * outside the range of a short write but still within the delalloc extent
+ * allocated for this iomap.
+ *
+ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
+ * simplify range iterations, but converts them back to {offset,len} tuples for
+ * the punch callback.
+ */
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+		struct iomap *iomap, loff_t pos, loff_t length,
+		ssize_t written,
+		int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+{
+	loff_t			start_byte;
+	loff_t			end_byte;
+	int			blocksize = i_blocksize(inode);
+	int			error = 0;
+
+	if (iomap->type != IOMAP_DELALLOC)
+		return 0;
+
+	/* If we didn't reserve the blocks, we're not allowed to punch them. */
+	if (!(iomap->flags & IOMAP_F_NEW))
+		return 0;
+
+	/*
+	 * start_byte refers to the first unused block after a short write. If
+	 * nothing was written, round offset down to point at the first block in
+	 * the range.
+	 */
+	if (unlikely(!written))
+		start_byte = round_down(pos, blocksize);
+	else
+		start_byte = round_up(pos + written, blocksize);
+	end_byte = round_up(pos + length, blocksize);
+
+	/* Nothing to do if we've written the entire delalloc extent */
+	if (start_byte >= end_byte)
+		return 0;
+
+	/*
+	 * Lock the mapping to avoid races with page faults re-instantiating
+	 * folios and dirtying them via ->page_mkwrite between the page cache
+	 * truncation and the delalloc extent removal. Failing to do this can
+	 * leave dirty pages with no space reservation in the cache.
+	 */
+	filemap_invalidate_lock(inode->i_mapping);
+	truncate_pagecache_range(inode, start_byte, end_byte - 1);
+	error = punch(inode, start_byte, end_byte - start_byte);
+	filemap_invalidate_unlock(inode->i_mapping);
+
+	return error;
+}
+EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
+
 static loff_t iomap_unshare_iter(struct iomap_iter *iter)
 {
 	struct iomap *iomap = &iter->iomap;
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 7bb55dbc19d35..ea96e8a348687 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1123,12 +1123,12 @@ out_unlock:
 static int
 xfs_buffered_write_delalloc_punch(
 	struct inode		*inode,
-	loff_t			start_byte,
-	loff_t			end_byte)
+	loff_t			offset,
+	loff_t			length)
 {
 	struct xfs_mount	*mp = XFS_M(inode->i_sb);
-	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, start_byte);
-	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, end_byte);
+	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, offset);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + length);
 
 	return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
 				end_fsb - start_fsb);
@@ -1143,13 +1143,9 @@ xfs_buffered_write_iomap_end(
 	unsigned		flags,
 	struct iomap		*iomap)
 {
-	struct xfs_mount	*mp = XFS_M(inode->i_sb);
-	loff_t			start_byte;
-	loff_t			end_byte;
-	int			error = 0;
 
-	if (iomap->type != IOMAP_DELALLOC)
-		return 0;
+	struct xfs_mount	*mp = XFS_M(inode->i_sb);
+	int			error;
 
 	/*
 	 * Behave as if the write failed if drop writes is enabled. Set the NEW
@@ -1160,35 +1156,8 @@ xfs_buffered_write_iomap_end(
 		written = 0;
 	}
 
-	/* If we didn't reserve the blocks, we're not allowed to punch them. */
-	if (!(iomap->flags & IOMAP_F_NEW))
-		return 0;
-
-	/*
-	 * start_fsb refers to the first unused block after a short write. If
-	 * nothing was written, round offset down to point at the first block in
-	 * the range.
-	 */
-	if (unlikely(!written))
-		start_byte = round_down(offset, mp->m_sb.sb_blocksize);
-	else
-		start_byte = round_up(offset + written, mp->m_sb.sb_blocksize);
-	end_byte = round_up(offset + length, mp->m_sb.sb_blocksize);
-
-	/* Nothing to do if we've written the entire delalloc extent */
-	if (start_byte >= end_byte)
-		return 0;
-
-	/*
-	 * Lock the mapping to avoid races with page faults re-instantiating
-	 * folios and dirtying them via ->page_mkwrite between the page cache
-	 * truncation and the delalloc extent removal. Failing to do this can
-	 * leave dirty pages with no space reservation in the cache.
-	 */
-	filemap_invalidate_lock(inode->i_mapping);
-	truncate_pagecache_range(inode, start_byte, end_byte - 1);
-	error = xfs_buffered_write_delalloc_punch(inode, start_byte, end_byte);
-	filemap_invalidate_unlock(inode->i_mapping);
+	error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
+			length, written, &xfs_buffered_write_delalloc_punch);
 	if (error && !xfs_is_shutdown(mp)) {
 		xfs_alert(mp, "%s: unable to clean up ino 0x%llx",
 			__func__, XFS_I(inode)->i_ino);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 238a03087e17e..0698c4b8ce0e2 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -226,6 +226,10 @@ static inline const struct iomap *iomap_iter_srcmap(const struct iomap_iter *i)
 
 ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
 		const struct iomap_ops *ops);
+int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
+		struct iomap *iomap, loff_t pos, loff_t length, ssize_t written,
+		int (*punch)(struct inode *inode, loff_t pos, loff_t length));
+
 int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops);
 void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
 bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count);
-- 
GitLab


From 9124a26401483bf2b13a99cb4317dce3f677060f Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 29 Sep 2022 01:58:59 -0700
Subject: [PATCH 2020/4122] kunit/fortify: Validate __alloc_size attribute
 results

Validate the effect of the __alloc_size attribute on allocators. If the
compiler doesn't support __builtin_dynamic_object_size(), skip the
associated tests.

(For GCC, just remove the "--make_options" line below...)

$ ./tools/testing/kunit/kunit.py run --arch x86_64 \
        --kconfig_add CONFIG_FORTIFY_SOURCE=y \
	--make_options LLVM=1
        fortify
...
[15:16:30] ================== fortify (10 subtests) ===================
[15:16:30] [PASSED] known_sizes_test
[15:16:30] [PASSED] control_flow_split_test
[15:16:30] [PASSED] alloc_size_kmalloc_const_test
[15:16:30] [PASSED] alloc_size_kmalloc_dynamic_test
[15:16:30] [PASSED] alloc_size_vmalloc_const_test
[15:16:30] [PASSED] alloc_size_vmalloc_dynamic_test
[15:16:30] [PASSED] alloc_size_kvmalloc_const_test
[15:16:30] [PASSED] alloc_size_kvmalloc_dynamic_test
[15:16:30] [PASSED] alloc_size_devm_kmalloc_const_test
[15:16:30] [PASSED] alloc_size_devm_kmalloc_dynamic_test
[15:16:30] ===================== [PASSED] fortify =====================
[15:16:30] ============================================================
[15:16:30] Testing complete. Ran 10 tests: passed: 10
[15:16:31] Elapsed time: 8.348s total, 0.002s configuring, 6.923s building, 1.075s running

For earlier GCC prior to version 12, the dynamic tests will be skipped:

[15:18:59] ================== fortify (10 subtests) ===================
[15:18:59] [PASSED] known_sizes_test
[15:18:59] [PASSED] control_flow_split_test
[15:18:59] [PASSED] alloc_size_kmalloc_const_test
[15:18:59] [SKIPPED] alloc_size_kmalloc_dynamic_test
[15:18:59] [PASSED] alloc_size_vmalloc_const_test
[15:18:59] [SKIPPED] alloc_size_vmalloc_dynamic_test
[15:18:59] [PASSED] alloc_size_kvmalloc_const_test
[15:18:59] [SKIPPED] alloc_size_kvmalloc_dynamic_test
[15:18:59] [PASSED] alloc_size_devm_kmalloc_const_test
[15:18:59] [SKIPPED] alloc_size_devm_kmalloc_dynamic_test
[15:18:59] ===================== [PASSED] fortify =====================
[15:18:59] ============================================================
[15:18:59] Testing complete. Ran 10 tests: passed: 6, skipped: 4
[15:18:59] Elapsed time: 11.965s total, 0.002s configuring, 10.540s building, 1.068s running

Cc: David Gow <davidgow@google.com>
Cc: linux-hardening@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/Makefile        |   1 +
 lib/fortify_kunit.c | 255 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 256 insertions(+)

diff --git a/lib/Makefile b/lib/Makefile
index 322178b9f7fbc..2f0454b931dc8 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -378,6 +378,7 @@ CFLAGS_overflow_kunit.o = $(call cc-disable-warning, tautological-constant-out-o
 obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
 obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
+CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o
 obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
diff --git a/lib/fortify_kunit.c b/lib/fortify_kunit.c
index 409af07f340af..c8c33cbaae9ec 100644
--- a/lib/fortify_kunit.c
+++ b/lib/fortify_kunit.c
@@ -16,7 +16,10 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <kunit/test.h>
+#include <linux/device.h>
+#include <linux/slab.h>
 #include <linux/string.h>
+#include <linux/vmalloc.h>
 
 static const char array_of_10[] = "this is 10";
 static const char *ptr_of_11 = "this is 11!";
@@ -60,9 +63,261 @@ static void control_flow_split_test(struct kunit *test)
 	KUNIT_EXPECT_EQ(test, want_minus_one(pick), SIZE_MAX);
 }
 
+#define KUNIT_EXPECT_BOS(test, p, expected, name)			\
+	KUNIT_EXPECT_EQ_MSG(test, __builtin_object_size(p, 1),		\
+		expected,						\
+		"__alloc_size() not working with __bos on " name "\n")
+
+#if !__has_builtin(__builtin_dynamic_object_size)
+#define KUNIT_EXPECT_BDOS(test, p, expected, name)			\
+	/* Silence "unused variable 'expected'" warning. */		\
+	KUNIT_EXPECT_EQ(test, expected, expected)
+#else
+#define KUNIT_EXPECT_BDOS(test, p, expected, name)			\
+	KUNIT_EXPECT_EQ_MSG(test, __builtin_dynamic_object_size(p, 1),	\
+		expected,						\
+		"__alloc_size() not working with __bdos on " name "\n")
+#endif
+
+/* If the execpted size is a constant value, __bos can see it. */
+#define check_const(_expected, alloc, free)		do {		\
+	size_t expected = (_expected);					\
+	void *p = alloc;						\
+	KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n");	\
+	KUNIT_EXPECT_BOS(test, p, expected, #alloc);			\
+	KUNIT_EXPECT_BDOS(test, p, expected, #alloc);			\
+	free;								\
+} while (0)
+
+/* If the execpted size is NOT a constant value, __bos CANNOT see it. */
+#define check_dynamic(_expected, alloc, free)		do {		\
+	size_t expected = (_expected);					\
+	void *p = alloc;						\
+	KUNIT_EXPECT_TRUE_MSG(test, p != NULL, #alloc " failed?!\n");	\
+	KUNIT_EXPECT_BOS(test, p, SIZE_MAX, #alloc);			\
+	KUNIT_EXPECT_BDOS(test, p, expected, #alloc);			\
+	free;								\
+} while (0)
+
+/* Assortment of constant-value kinda-edge cases. */
+#define CONST_TEST_BODY(TEST_alloc)	do {				\
+	/* Special-case vmalloc()-family to skip 0-sized allocs. */	\
+	if (strcmp(#TEST_alloc, "TEST_vmalloc") != 0)			\
+		TEST_alloc(check_const, 0, 0);				\
+	TEST_alloc(check_const, 1, 1);					\
+	TEST_alloc(check_const, 128, 128);				\
+	TEST_alloc(check_const, 1023, 1023);				\
+	TEST_alloc(check_const, 1025, 1025);				\
+	TEST_alloc(check_const, 4096, 4096);				\
+	TEST_alloc(check_const, 4097, 4097);				\
+} while (0)
+
+static volatile size_t zero_size;
+static volatile size_t unknown_size = 50;
+
+#if !__has_builtin(__builtin_dynamic_object_size)
+#define DYNAMIC_TEST_BODY(TEST_alloc)					\
+	kunit_skip(test, "Compiler is missing __builtin_dynamic_object_size() support\n")
+#else
+#define DYNAMIC_TEST_BODY(TEST_alloc)	do {				\
+	size_t size = unknown_size;					\
+									\
+	/*								\
+	 * Expected size is "size" in each test, before it is then	\
+	 * internally incremented in each test.	Requires we disable	\
+	 * -Wunsequenced.						\
+	 */								\
+	TEST_alloc(check_dynamic, size, size++);			\
+	/* Make sure incrementing actually happened. */			\
+	KUNIT_EXPECT_NE(test, size, unknown_size);			\
+} while (0)
+#endif
+
+#define DEFINE_ALLOC_SIZE_TEST_PAIR(allocator)				\
+static void alloc_size_##allocator##_const_test(struct kunit *test)	\
+{									\
+	CONST_TEST_BODY(TEST_##allocator);				\
+}									\
+static void alloc_size_##allocator##_dynamic_test(struct kunit *test)	\
+{									\
+	DYNAMIC_TEST_BODY(TEST_##allocator);				\
+}
+
+#define TEST_kmalloc(checker, expected_size, alloc_size)	do {	\
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;				\
+	void *orig;							\
+	size_t len;							\
+									\
+	checker(expected_size, kmalloc(alloc_size, gfp),		\
+		kfree(p));						\
+	checker(expected_size,						\
+		kmalloc_node(alloc_size, gfp, NUMA_NO_NODE),		\
+		kfree(p));						\
+	checker(expected_size, kzalloc(alloc_size, gfp),		\
+		kfree(p));						\
+	checker(expected_size,						\
+		kzalloc_node(alloc_size, gfp, NUMA_NO_NODE),		\
+		kfree(p));						\
+	checker(expected_size, kcalloc(1, alloc_size, gfp),		\
+		kfree(p));						\
+	checker(expected_size, kcalloc(alloc_size, 1, gfp),		\
+		kfree(p));						\
+	checker(expected_size,						\
+		kcalloc_node(1, alloc_size, gfp, NUMA_NO_NODE),		\
+		kfree(p));						\
+	checker(expected_size,						\
+		kcalloc_node(alloc_size, 1, gfp, NUMA_NO_NODE),		\
+		kfree(p));						\
+	checker(expected_size, kmalloc_array(1, alloc_size, gfp),	\
+		kfree(p));						\
+	checker(expected_size, kmalloc_array(alloc_size, 1, gfp),	\
+		kfree(p));						\
+	checker(expected_size,						\
+		kmalloc_array_node(1, alloc_size, gfp, NUMA_NO_NODE),	\
+		kfree(p));						\
+	checker(expected_size,						\
+		kmalloc_array_node(alloc_size, 1, gfp, NUMA_NO_NODE),	\
+		kfree(p));						\
+	checker(expected_size, __kmalloc(alloc_size, gfp),		\
+		kfree(p));						\
+	checker(expected_size,						\
+		__kmalloc_node(alloc_size, gfp, NUMA_NO_NODE),		\
+		kfree(p));						\
+									\
+	orig = kmalloc(alloc_size, gfp);				\
+	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
+	checker((expected_size) * 2,					\
+		krealloc(orig, (alloc_size) * 2, gfp),			\
+		kfree(p));						\
+	orig = kmalloc(alloc_size, gfp);				\
+	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
+	checker((expected_size) * 2,					\
+		krealloc_array(orig, 1, (alloc_size) * 2, gfp),		\
+		kfree(p));						\
+	orig = kmalloc(alloc_size, gfp);				\
+	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
+	checker((expected_size) * 2,					\
+		krealloc_array(orig, (alloc_size) * 2, 1, gfp),		\
+		kfree(p));						\
+									\
+	len = 11;							\
+	/* Using memdup() with fixed size, so force unknown length. */	\
+	if (!__builtin_constant_p(expected_size))			\
+		len += zero_size;					\
+	checker(len, kmemdup("hello there", len, gfp), kfree(p));	\
+} while (0)
+DEFINE_ALLOC_SIZE_TEST_PAIR(kmalloc)
+
+/* Sizes are in pages, not bytes. */
+#define TEST_vmalloc(checker, expected_pages, alloc_pages)	do {	\
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;				\
+	checker((expected_pages) * PAGE_SIZE,				\
+		vmalloc((alloc_pages) * PAGE_SIZE),	   vfree(p));	\
+	checker((expected_pages) * PAGE_SIZE,				\
+		vzalloc((alloc_pages) * PAGE_SIZE),	   vfree(p));	\
+	checker((expected_pages) * PAGE_SIZE,				\
+		__vmalloc((alloc_pages) * PAGE_SIZE, gfp), vfree(p));	\
+} while (0)
+DEFINE_ALLOC_SIZE_TEST_PAIR(vmalloc)
+
+/* Sizes are in pages (and open-coded for side-effects), not bytes. */
+#define TEST_kvmalloc(checker, expected_pages, alloc_pages)	do {	\
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;				\
+	size_t prev_size;						\
+	void *orig;							\
+									\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvmalloc((alloc_pages) * PAGE_SIZE, gfp),		\
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvmalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvzalloc((alloc_pages) * PAGE_SIZE, gfp),		\
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvzalloc_node((alloc_pages) * PAGE_SIZE, gfp, NUMA_NO_NODE), \
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvcalloc(1, (alloc_pages) * PAGE_SIZE, gfp),		\
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvcalloc((alloc_pages) * PAGE_SIZE, 1, gfp),		\
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvmalloc_array(1, (alloc_pages) * PAGE_SIZE, gfp),	\
+		vfree(p));						\
+	checker((expected_pages) * PAGE_SIZE,				\
+		kvmalloc_array((alloc_pages) * PAGE_SIZE, 1, gfp),	\
+		vfree(p));						\
+									\
+	prev_size = (expected_pages) * PAGE_SIZE;			\
+	orig = kvmalloc(prev_size, gfp);				\
+	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
+	checker(((expected_pages) * PAGE_SIZE) * 2,			\
+		kvrealloc(orig, prev_size,				\
+			  ((alloc_pages) * PAGE_SIZE) * 2, gfp),	\
+		kvfree(p));						\
+} while (0)
+DEFINE_ALLOC_SIZE_TEST_PAIR(kvmalloc)
+
+#define TEST_devm_kmalloc(checker, expected_size, alloc_size)	do {	\
+	gfp_t gfp = GFP_KERNEL | __GFP_NOWARN;				\
+	const char dev_name[] = "fortify-test";				\
+	struct device *dev;						\
+	void *orig;							\
+	size_t len;							\
+									\
+	/* Create dummy device for devm_kmalloc()-family tests. */	\
+	dev = root_device_register(dev_name);				\
+	KUNIT_ASSERT_FALSE_MSG(test, IS_ERR(dev),			\
+			       "Cannot register test device\n");	\
+									\
+	checker(expected_size, devm_kmalloc(dev, alloc_size, gfp),	\
+		devm_kfree(dev, p));					\
+	checker(expected_size, devm_kzalloc(dev, alloc_size, gfp),	\
+		devm_kfree(dev, p));					\
+	checker(expected_size,						\
+		devm_kmalloc_array(dev, 1, alloc_size, gfp),		\
+		devm_kfree(dev, p));					\
+	checker(expected_size,						\
+		devm_kmalloc_array(dev, alloc_size, 1, gfp),		\
+		devm_kfree(dev, p));					\
+	checker(expected_size,						\
+		devm_kcalloc(dev, 1, alloc_size, gfp),			\
+		devm_kfree(dev, p));					\
+	checker(expected_size,						\
+		devm_kcalloc(dev, alloc_size, 1, gfp),			\
+		devm_kfree(dev, p));					\
+									\
+	orig = devm_kmalloc(dev, alloc_size, gfp);			\
+	KUNIT_EXPECT_TRUE(test, orig != NULL);				\
+	checker((expected_size) * 2,					\
+		devm_krealloc(dev, orig, (alloc_size) * 2, gfp),	\
+		devm_kfree(dev, p));					\
+									\
+	len = 4;							\
+	/* Using memdup() with fixed size, so force unknown length. */	\
+	if (!__builtin_constant_p(expected_size))			\
+		len += zero_size;					\
+	checker(len, devm_kmemdup(dev, "Ohai", len, gfp),		\
+		devm_kfree(dev, p));					\
+									\
+	device_unregister(dev);						\
+} while (0)
+DEFINE_ALLOC_SIZE_TEST_PAIR(devm_kmalloc)
+
 static struct kunit_case fortify_test_cases[] = {
 	KUNIT_CASE(known_sizes_test),
 	KUNIT_CASE(control_flow_split_test),
+	KUNIT_CASE(alloc_size_kmalloc_const_test),
+	KUNIT_CASE(alloc_size_kmalloc_dynamic_test),
+	KUNIT_CASE(alloc_size_vmalloc_const_test),
+	KUNIT_CASE(alloc_size_vmalloc_dynamic_test),
+	KUNIT_CASE(alloc_size_kvmalloc_const_test),
+	KUNIT_CASE(alloc_size_kvmalloc_dynamic_test),
+	KUNIT_CASE(alloc_size_devm_kmalloc_const_test),
+	KUNIT_CASE(alloc_size_devm_kmalloc_dynamic_test),
 	{}
 };
 
-- 
GitLab


From 1307c5d33cce8a41dd77c2571e4df65a5b627feb Mon Sep 17 00:00:00 2001
From: Gabriel Somlo <gsomlo@gmail.com>
Date: Tue, 22 Nov 2022 15:04:26 -0500
Subject: [PATCH 2021/4122] serial: altera_uart: fix locking in polling mode

Since altera_uart_interrupt() may also be called from
a poll timer in "serving_softirq" context, use
spin_[lock_irqsave|unlock_irqrestore] variants, which
are appropriate for both softirq and hardware interrupt
contexts.

Fixes: 2f8b9c15cd88 ("altera_uart: Add support for polling mode (IRQ-less)")
Signed-off-by: Gabriel Somlo <gsomlo@gmail.com>
Link: https://lore.kernel.org/r/20221122200426.888349-1-gsomlo@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/altera_uart.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/altera_uart.c b/drivers/tty/serial/altera_uart.c
index 316074bb23e9f..9ce3d24af5360 100644
--- a/drivers/tty/serial/altera_uart.c
+++ b/drivers/tty/serial/altera_uart.c
@@ -259,16 +259,17 @@ static irqreturn_t altera_uart_interrupt(int irq, void *data)
 {
 	struct uart_port *port = data;
 	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
 	unsigned int isr;
 
 	isr = altera_uart_readl(port, ALTERA_UART_STATUS_REG) & pp->imr;
 
-	spin_lock(&port->lock);
+	spin_lock_irqsave(&port->lock, flags);
 	if (isr & ALTERA_UART_STATUS_RRDY_MSK)
 		altera_uart_rx_chars(port);
 	if (isr & ALTERA_UART_STATUS_TRDY_MSK)
 		altera_uart_tx_chars(port);
-	spin_unlock(&port->lock);
+	spin_unlock_irqrestore(&port->lock, flags);
 
 	return IRQ_RETVAL(isr);
 }
-- 
GitLab


From 1a6ec673fb627c26e2267ca0a03849f91dbd9b40 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Wed, 23 Nov 2022 06:12:12 +0000
Subject: [PATCH 2022/4122] serial: sunsab: Fix error handling in sunsab_init()

The sunsab_init() returns the platform_driver_register() directly without
checking its return value, if platform_driver_register() failed, the
allocated sunsab_ports is leaked.
Fix by free sunsab_ports and set it to NULL when platform_driver_register()
failed.

Fixes: c4d37215a824 ("[SERIAL] sunsab: Convert to of_driver framework.")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221123061212.52593-1-yuancan@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/sunsab.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/sunsab.c b/drivers/tty/serial/sunsab.c
index 94db67f21abf2..48b39fdb0397a 100644
--- a/drivers/tty/serial/sunsab.c
+++ b/drivers/tty/serial/sunsab.c
@@ -1131,7 +1131,13 @@ static int __init sunsab_init(void)
 		}
 	}
 
-	return platform_driver_register(&sab_driver);
+	err = platform_driver_register(&sab_driver);
+	if (err) {
+		kfree(sunsab_ports);
+		sunsab_ports = NULL;
+	}
+
+	return err;
 }
 
 static void __exit sunsab_exit(void)
-- 
GitLab


From 8682ab0eea89c300ebb120c02ead3999ca5560a8 Mon Sep 17 00:00:00 2001
From: Sherry Sun <sherry.sun@nxp.com>
Date: Wed, 23 Nov 2022 10:36:19 +0800
Subject: [PATCH 2023/4122] tty: serial: fsl_lpuart: switch to new
 dmaengine_terminate_* API

Convert dmaengine_terminate_all() calls to synchronous and asynchronous
versions where appropriate.

Signed-off-by: Sherry Sun <sherry.sun@nxp.com>
Link: https://lore.kernel.org/r/20221123023619.30173-1-sherry.sun@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/fsl_lpuart.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/tty/serial/fsl_lpuart.c b/drivers/tty/serial/fsl_lpuart.c
index c1c8aa3e0fac8..5e69fb73f570f 100644
--- a/drivers/tty/serial/fsl_lpuart.c
+++ b/drivers/tty/serial/fsl_lpuart.c
@@ -583,7 +583,7 @@ static void lpuart_flush_buffer(struct uart_port *port)
 				sport->dma_tx_nents, DMA_TO_DEVICE);
 			sport->dma_tx_in_progress = false;
 		}
-		dmaengine_terminate_all(chan);
+		dmaengine_terminate_async(chan);
 	}
 
 	if (lpuart_is_32(sport)) {
@@ -1327,7 +1327,7 @@ static void lpuart_dma_rx_free(struct uart_port *port)
 					struct lpuart_port, port);
 	struct dma_chan *chan = sport->dma_rx_chan;
 
-	dmaengine_terminate_all(chan);
+	dmaengine_terminate_sync(chan);
 	dma_unmap_sg(chan->device->dev, &sport->rx_sgl, 1, DMA_FROM_DEVICE);
 	kfree(sport->rx_ring.buf);
 	sport->rx_ring.tail = 0;
@@ -1757,7 +1757,7 @@ static void lpuart_dma_shutdown(struct lpuart_port *sport)
 		if (wait_event_interruptible_timeout(sport->dma_wait,
 			!sport->dma_tx_in_progress, msecs_to_jiffies(300)) <= 0) {
 			sport->dma_tx_in_progress = false;
-			dmaengine_terminate_all(sport->dma_tx_chan);
+			dmaengine_terminate_sync(sport->dma_tx_chan);
 		}
 		sport->lpuart_dma_tx_use = false;
 	}
@@ -2968,7 +2968,7 @@ static int lpuart_suspend(struct device *dev)
 			}
 			spin_unlock_irqrestore(&sport->port.lock, flags);
 			sport->dma_tx_in_progress = false;
-			dmaengine_terminate_all(sport->dma_tx_chan);
+			dmaengine_terminate_sync(sport->dma_tx_chan);
 		}
 	} else if (pm_runtime_active(sport->port.dev)) {
 		lpuart_disable_clks(sport);
-- 
GitLab


From 94ec165c9f98189ce9aa50cfcb7181ba23f92eb7 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 23 Nov 2022 09:27:35 +0100
Subject: [PATCH 2024/4122] serial: atmel: cleanup atmel_start+stop_tx()

Define local variables holding information about whether pdc or dma is
used in the HW. These are retested several times by calls to
atmel_use_pdc_tx() and atmel_use_dma_tx(). So to make the code more
readable, simply cache the values.

This is also a preparatory patch for the next one (where is_pdc is used
once more in atmel_stop_tx()).

Cc: Richard Genoud <richard.genoud@gmail.com>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: linux-arm-kernel@lists.infradead.org
Reported-by: Michael Walle <michael@walle.cc>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221123082736.24566-1-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 4ca04676c4066..65f63dccfd728 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -552,8 +552,9 @@ static u_int atmel_get_mctrl(struct uart_port *port)
 static void atmel_stop_tx(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	bool is_pdc = atmel_use_pdc_tx(port);
 
-	if (atmel_use_pdc_tx(port)) {
+	if (is_pdc) {
 		/* disable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 	}
@@ -572,7 +573,6 @@ static void atmel_stop_tx(struct uart_port *port)
 	if (atmel_uart_is_half_duplex(port))
 		if (!atomic_read(&atmel_port->tasklet_shutdown))
 			atmel_start_rx(port);
-
 }
 
 /*
@@ -581,20 +581,22 @@ static void atmel_stop_tx(struct uart_port *port)
 static void atmel_start_tx(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
+	bool is_pdc = atmel_use_pdc_tx(port);
+	bool is_dma = is_pdc || atmel_use_dma_tx(port);
 
-	if (atmel_use_pdc_tx(port) && (atmel_uart_readl(port, ATMEL_PDC_PTSR)
+	if (is_pdc && (atmel_uart_readl(port, ATMEL_PDC_PTSR)
 				       & ATMEL_PDC_TXTEN))
 		/* The transmitter is already running.  Yes, we
 		   really need this.*/
 		return;
 
-	if (atmel_use_pdc_tx(port) || atmel_use_dma_tx(port))
-		if (atmel_uart_is_half_duplex(port))
-			atmel_stop_rx(port);
+	if (is_dma && atmel_uart_is_half_duplex(port))
+		atmel_stop_rx(port);
 
-	if (atmel_use_pdc_tx(port))
+	if (is_pdc) {
 		/* re-enable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTEN);
+	}
 
 	/* Enable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
-- 
GitLab


From 6373ab4dfee731deec62b4452ea641611feff9b3 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Wed, 23 Nov 2022 09:27:36 +0100
Subject: [PATCH 2025/4122] serial: atmel: don't stop the transmitter when
 doing PIO

Writing ATMEL_US_TXDIS to ATMEL_US_CR makes the transmitter NOT to send
the just queued character. This means when the character is last and
uart calls ops->stop_tx(), the character is not sent at all.

The usart datasheet is not much specific on this, it just says the
transmitter is stopped. But apparently, the character is dropped. So
we should stop the transmitter only for DMA and PDC transfers to not
send any more characters. For PIO, this is unexpected and deviates from
other drivers. In particular, the below referenced commit broke TX as it
added a call to ->stop_tx() after the very last character written to the
transmitter.

So fix this by limiting the write of ATMEL_US_TXDIS to DMA transfers
only.

Even there, I don't know if it is correctly implemented. Are all the
queued characters sent once ->start_tx() is called? Anyone tested flow
control -- be it hard (RTSCTS) or the soft (XOFF/XON) one?

Fixes: 2d141e683e9a ("tty: serial: use uart_port_tx() helper")
Cc: Richard Genoud <richard.genoud@gmail.com>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: linux-arm-kernel@lists.infradead.org
Reported-by: Michael Walle <michael@walle.cc>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221123082736.24566-2-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/atmel_serial.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index 65f63dccfd728..f1c06e12efa0b 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -553,19 +553,22 @@ static void atmel_stop_tx(struct uart_port *port)
 {
 	struct atmel_uart_port *atmel_port = to_atmel_uart_port(port);
 	bool is_pdc = atmel_use_pdc_tx(port);
+	bool is_dma = is_pdc || atmel_use_dma_tx(port);
 
 	if (is_pdc) {
 		/* disable PDC transmit */
 		atmel_uart_writel(port, ATMEL_PDC_PTCR, ATMEL_PDC_TXTDIS);
 	}
 
-	/*
-	 * Disable the transmitter.
-	 * This is mandatory when DMA is used, otherwise the DMA buffer
-	 * is fully transmitted.
-	 */
-	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS);
-	atmel_port->tx_stopped = true;
+	if (is_dma) {
+		/*
+		 * Disable the transmitter.
+		 * This is mandatory when DMA is used, otherwise the DMA buffer
+		 * is fully transmitted.
+		 */
+		atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXDIS);
+		atmel_port->tx_stopped = true;
+	}
 
 	/* Disable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IDR, atmel_port->tx_done_mask);
@@ -601,9 +604,11 @@ static void atmel_start_tx(struct uart_port *port)
 	/* Enable interrupts */
 	atmel_uart_writel(port, ATMEL_US_IER, atmel_port->tx_done_mask);
 
-	/* re-enable the transmitter */
-	atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
-	atmel_port->tx_stopped = false;
+	if (is_dma) {
+		/* re-enable the transmitter */
+		atmel_uart_writel(port, ATMEL_US_CR, ATMEL_US_TXEN);
+		atmel_port->tx_stopped = false;
+	}
 }
 
 /*
-- 
GitLab


From fb491d5500a7ca551e49bc32d9b19d226023f68d Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:27 +0100
Subject: [PATCH 2026/4122] KVM: s390: pv: asynchronous destroy for reboot

Until now, destroying a protected guest was an entirely synchronous
operation that could potentially take a very long time, depending on
the size of the guest, due to the time needed to clean up the address
space from protected pages.

This patch implements an asynchronous destroy mechanism, that allows a
protected guest to reboot significantly faster than previously.

This is achieved by clearing the pages of the old guest in background.
In case of reboot, the new guest will be able to run in the same
address space almost immediately.

The old protected guest is then only destroyed when all of its memory
has been destroyed or otherwise made non protected.

Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl:

KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for
later asynchronous teardown. The current KVM VM will then continue
immediately as non-protected. If a protected VM had already been
set aside for asynchronous teardown, but without starting the teardown
process, this call will fail. There can be at most one VM set aside at
any time. Once it is set aside, the protected VM only exists in the
context of the Ultravisor, it is not associated with the KVM VM
anymore. Its protected CPUs have already been destroyed, but not its
memory. This command can be issued again immediately after starting
KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion.

KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously
set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the
KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace
from a separate thread. If a fatal signal is received (or if the
process terminates naturally), the command will terminate immediately
without completing. All protected VMs whose teardown was interrupted
will be put in the need_cleanup list. The rest of the normal KVM
teardown process will take care of properly cleaning up all remaining
protected VMs, including the ones on the need_cleanup list.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/kvm_host.h |   2 +
 arch/s390/kvm/kvm-s390.c         |  49 ++++-
 arch/s390/kvm/kvm-s390.h         |   3 +
 arch/s390/kvm/pv.c               | 295 +++++++++++++++++++++++++++++--
 include/uapi/linux/kvm.h         |   2 +
 5 files changed, 333 insertions(+), 18 deletions(-)

diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 21f1339a41974..d67ce719d16a9 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -942,6 +942,8 @@ struct kvm_s390_pv {
 	unsigned long stor_base;
 	void *stor_var;
 	bool dumping;
+	void *set_aside;
+	struct list_head need_cleanup;
 	struct mmu_notifier mmu_notifier;
 };
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bd6e0201bfe5f..f0abaaf7eea4c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz;
 module_param(diag9c_forwarding_hz, uint, 0644);
 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 
+static int async_destroy;
+
 /*
  * For now we handle at most 16 double words as this is what the s390 base
  * kernel handles and stores in the prefix page. If we ever need to go beyond
@@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd,
 
 static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 {
+	const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM);
+	void __user *argp = (void __user *)cmd->data;
 	int r = 0;
 	u16 dummy;
-	void __user *argp = (void __user *)cmd->data;
+
+	if (need_lock)
+		mutex_lock(&kvm->lock);
 
 	switch (cmd->cmd) {
 	case KVM_PV_ENABLE: {
@@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
 		break;
 	}
+	case KVM_PV_ASYNC_CLEANUP_PREPARE:
+		r = -EINVAL;
+		if (!kvm_s390_pv_is_protected(kvm) || !async_destroy)
+			break;
+
+		r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc);
+		/*
+		 * If a CPU could not be destroyed, destroy VM will also fail.
+		 * There is no point in trying to destroy it. Instead return
+		 * the rc and rrc from the first CPU that failed destroying.
+		 */
+		if (r)
+			break;
+		r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc);
+
+		/* no need to block service interrupts any more */
+		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
+		break;
+	case KVM_PV_ASYNC_CLEANUP_PERFORM:
+		r = -EINVAL;
+		if (!async_destroy)
+			break;
+		/* kvm->lock must not be held; this is asserted inside the function. */
+		r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc);
+		break;
 	case KVM_PV_DISABLE: {
 		r = -EINVAL;
 		if (!kvm_s390_pv_is_protected(kvm))
@@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 		 */
 		if (r)
 			break;
-		r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc);
+		r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc);
 
 		/* no need to block service interrupts any more */
 		clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs);
@@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
 	default:
 		r = -ENOTTY;
 	}
+	if (need_lock)
+		mutex_unlock(&kvm->lock);
+
 	return r;
 }
 
@@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 			r = -EINVAL;
 			break;
 		}
-		mutex_lock(&kvm->lock);
+		/* must be called without kvm->lock */
 		r = kvm_s390_handle_pv(kvm, &args);
-		mutex_unlock(&kvm->lock);
 		if (copy_to_user(argp, &args, sizeof(args))) {
 			r = -EFAULT;
 			break;
@@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	kvm_s390_vsie_init(kvm);
 	if (use_gisa)
 		kvm_s390_gisa_init(kvm);
+	INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup);
+	kvm->arch.pv.set_aside = NULL;
 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
 
 	return 0;
@@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 	/*
 	 * We are already at the end of life and kvm->lock is not taken.
 	 * This is ok as the file descriptor is closed by now and nobody
-	 * can mess with the pv state. To avoid lockdep_assert_held from
-	 * complaining we do not use kvm_s390_pv_is_protected.
+	 * can mess with the pv state.
 	 */
-	if (kvm_s390_pv_get_handle(kvm))
-		kvm_s390_pv_deinit_vm(kvm, &rc, &rrc);
+	kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc);
 	/*
 	 * Remove the mmu notifier only when the whole KVM VM is torn down,
 	 * and only if one was registered to begin with. If the VM is
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index a60d1e5c44cd1..826754937ae4a 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm)
 /* implemented in pv.c */
 int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
 int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc);
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc);
 int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc,
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 48c4f57d5d76f..5f958fcf62836 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -18,6 +18,29 @@
 #include <linux/mmu_notifier.h>
 #include "kvm-s390.h"
 
+/**
+ * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to
+ * be destroyed
+ *
+ * @list: list head for the list of leftover VMs
+ * @old_gmap_table: the gmap table of the leftover protected VM
+ * @handle: the handle of the leftover protected VM
+ * @stor_var: pointer to the variable storage of the leftover protected VM
+ * @stor_base: address of the base storage of the leftover protected VM
+ *
+ * Represents a protected VM that is still registered with the Ultravisor,
+ * but which does not correspond any longer to an active KVM VM. It should
+ * be destroyed at some point later, either asynchronously or when the
+ * process terminates.
+ */
+struct pv_vm_to_be_destroyed {
+	struct list_head list;
+	unsigned long old_gmap_table;
+	u64 handle;
+	void *stor_var;
+	unsigned long stor_base;
+};
+
 static void kvm_s390_clear_pv_state(struct kvm *kvm)
 {
 	kvm->arch.pv.handle = 0;
@@ -161,23 +184,150 @@ out_err:
 	return -ENOMEM;
 }
 
-/* this should not fail, but if it does, we must not free the donated memory */
-int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+/**
+ * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM.
+ * @kvm: the KVM that was associated with this leftover protected VM
+ * @leftover: details about the leftover protected VM that needs a clean up
+ * @rc: the RC code of the Destroy Secure Configuration UVC
+ * @rrc: the RRC code of the Destroy Secure Configuration UVC
+ *
+ * Destroy one leftover protected VM.
+ * On success, kvm->mm->context.protected_count will be decremented atomically
+ * and all other resources used by the VM will be freed.
+ *
+ * Return: 0 in case of success, otherwise 1
+ */
+static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
+					    struct pv_vm_to_be_destroyed *leftover,
+					    u16 *rc, u16 *rrc)
 {
 	int cc;
 
-	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
-			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
-	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
+	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
+	if (cc)
+		return cc;
 	/*
-	 * if the mm still has a mapping, make all its pages accessible
-	 * before destroying the guest
+	 * Intentionally leak unusable memory. If the UVC fails, the memory
+	 * used for the VM and its metadata is permanently unusable.
+	 * This can only happen in case of a serious KVM or hardware bug; it
+	 * is not expected to happen in normal operation.
 	 */
-	if (mmget_not_zero(kvm->mm)) {
-		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
-		mmput(kvm->mm);
+	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
+	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
+	vfree(leftover->stor_var);
+	atomic_dec(&kvm->mm->context.protected_count);
+	return 0;
+}
+
+/**
+ * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory.
+ * @kvm: the VM whose memory is to be cleared.
+ *
+ * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ */
+static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
+{
+	const unsigned long pages_2g = SZ_2G / PAGE_SIZE;
+	struct kvm_memory_slot *slot;
+	unsigned long len;
+	int srcu_idx;
+
+	srcu_idx = srcu_read_lock(&kvm->srcu);
+
+	/* Take the memslot containing guest absolute address 0 */
+	slot = gfn_to_memslot(kvm, 0);
+	/* Clear all slots or parts thereof that are below 2GB */
+	while (slot && slot->base_gfn < pages_2g) {
+		len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE;
+		s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len);
+		/* Take the next memslot */
+		slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages);
+	}
+
+	srcu_read_unlock(&kvm->srcu, srcu_idx);
+}
+
+/**
+ * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
+ * @kvm: the VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Set aside the protected VM for a subsequent teardown. The VM will be able
+ * to continue immediately as a non-secure VM, and the information needed to
+ * properly tear down the protected VM is set aside. If another protected VM
+ * was already set aside without starting its teardown, this function will
+ * fail.
+ * The CPUs of the protected VM need to be destroyed beforehand.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, -EINVAL if another protected VM was already set
+ * aside, -ENOMEM if the system ran out of memory.
+ */
+int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *priv;
+
+	lockdep_assert_held(&kvm->lock);
+	/*
+	 * If another protected VM was already prepared for teardown, refuse.
+	 * A normal deinitialization has to be performed instead.
+	 */
+	if (kvm->arch.pv.set_aside)
+		return -EINVAL;
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->stor_var = kvm->arch.pv.stor_var;
+	priv->stor_base = kvm->arch.pv.stor_base;
+	priv->handle = kvm_s390_pv_get_handle(kvm);
+	priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	if (s390_replace_asce(kvm->arch.gmap)) {
+		kfree(priv);
+		return -ENOMEM;
 	}
 
+	kvm_s390_destroy_lower_2g(kvm);
+	kvm_s390_clear_pv_state(kvm);
+	kvm->arch.pv.set_aside = priv;
+
+	*rc = UVC_RC_EXECUTED;
+	*rrc = 42;
+	return 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM
+ * @kvm: the KVM whose protected VM needs to be deinitialized
+ * @rc: the RC code of the UVC
+ * @rrc: the RRC code of the UVC
+ *
+ * Deinitialize the current protected VM. This function will destroy and
+ * cleanup the current protected VM, but it will not cleanup the guest
+ * memory. This function should only be called when the protected VM has
+ * just been created and therefore does not have any guest memory, or when
+ * the caller cleans up the guest memory separately.
+ *
+ * This function should not fail, but if it does, the donated memory must
+ * not be freed.
+ *
+ * Context: kvm->lock needs to be held
+ *
+ * Return: 0 in case of success, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	int cc;
+
+	cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm),
+			   UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
 	if (!cc) {
 		atomic_dec(&kvm->mm->context.protected_count);
 		kvm_s390_pv_dealloc_vm(kvm);
@@ -191,6 +341,131 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
 	return cc ? -EIO : 0;
 }
 
+/**
+ * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated
+ * with a specific KVM.
+ * @kvm: the KVM to be cleaned up
+ * @rc: the RC code of the first failing UVC
+ * @rrc: the RRC code of the first failing UVC
+ *
+ * This function will clean up all protected VMs associated with a KVM.
+ * This includes the active one, the one prepared for deinitialization with
+ * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list.
+ *
+ * Context: kvm->lock needs to be held unless being called from
+ * kvm_arch_destroy_vm.
+ *
+ * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO
+ */
+int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *cur;
+	bool need_zap = false;
+	u16 _rc, _rrc;
+	int cc = 0;
+
+	/* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */
+	atomic_inc(&kvm->mm->context.protected_count);
+
+	*rc = 1;
+	/* If the current VM is protected, destroy it */
+	if (kvm_s390_pv_get_handle(kvm)) {
+		cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc);
+		need_zap = true;
+	}
+
+	/* If a previous protected VM was set aside, put it in the need_cleanup list */
+	if (kvm->arch.pv.set_aside) {
+		list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup);
+		kvm->arch.pv.set_aside = NULL;
+	}
+
+	/* Cleanup all protected VMs in the need_cleanup list */
+	while (!list_empty(&kvm->arch.pv.need_cleanup)) {
+		cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list);
+		need_zap = true;
+		if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) {
+			cc = 1;
+			/*
+			 * Only return the first error rc and rrc, so make
+			 * sure it is not overwritten. All destroys will
+			 * additionally be reported via KVM_UV_EVENT().
+			 */
+			if (*rc == UVC_RC_EXECUTED) {
+				*rc = _rc;
+				*rrc = _rrc;
+			}
+		}
+		list_del(&cur->list);
+		kfree(cur);
+	}
+
+	/*
+	 * If the mm still has a mapping, try to mark all its pages as
+	 * accessible. The counter should not reach zero before this
+	 * cleanup has been performed.
+	 */
+	if (need_zap && mmget_not_zero(kvm->mm)) {
+		s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE);
+		mmput(kvm->mm);
+	}
+
+	/* Now the counter can safely reach 0 */
+	atomic_dec(&kvm->mm->context.protected_count);
+	return cc ? -EIO : 0;
+}
+
+/**
+ * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM.
+ * @kvm: the VM previously associated with the protected VM
+ * @rc: return value for the RC field of the UVCB
+ * @rrc: return value for the RRC field of the UVCB
+ *
+ * Tear down the protected VM that had been previously prepared for teardown
+ * using kvm_s390_pv_set_aside_vm. Ideally this should be called by
+ * userspace asynchronously from a separate thread.
+ *
+ * Context: kvm->lock must not be held.
+ *
+ * Return: 0 in case of success, -EINVAL if no protected VM had been
+ * prepared for asynchronous teardowm, -EIO in case of other errors.
+ */
+int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct pv_vm_to_be_destroyed *p;
+	int ret = 0;
+
+	lockdep_assert_not_held(&kvm->lock);
+	mutex_lock(&kvm->lock);
+	p = kvm->arch.pv.set_aside;
+	kvm->arch.pv.set_aside = NULL;
+	mutex_unlock(&kvm->lock);
+	if (!p)
+		return -EINVAL;
+
+	/* When a fatal signal is received, stop immediately */
+	if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX))
+		goto done;
+	if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc))
+		ret = -EIO;
+	kfree(p);
+	p = NULL;
+done:
+	/*
+	 * p is not NULL if we aborted because of a fatal signal, in which
+	 * case queue the leftover for later cleanup.
+	 */
+	if (p) {
+		mutex_lock(&kvm->lock);
+		list_add(&p->list, &kvm->arch.pv.need_cleanup);
+		mutex_unlock(&kvm->lock);
+		/* Did not finish, but pretend things went well */
+		*rc = UVC_RC_EXECUTED;
+		*rrc = 42;
+	}
+	return ret;
+}
+
 static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
 					     struct mm_struct *mm)
 {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0d5d4419139ae..b3701b23ca189 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1740,6 +1740,8 @@ enum pv_cmd_id {
 	KVM_PV_UNSHARE_ALL,
 	KVM_PV_INFO,
 	KVM_PV_DUMP,
+	KVM_PV_ASYNC_CLEANUP_PREPARE,
+	KVM_PV_ASYNC_CLEANUP_PERFORM,
 };
 
 struct kvm_pv_cmd {
-- 
GitLab


From d9459922a15ce7a20a85b38a976494ac7f445732 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:28 +0100
Subject: [PATCH 2027/4122] KVM: s390: pv: api documentation for asynchronous
 destroy

Add documentation for the new commands added to the KVM_S390_PV_COMMAND
ioctl.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-3-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-3-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 Documentation/virt/kvm/api.rst | 41 ++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986f..9175d41e80816 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5163,10 +5163,13 @@ KVM_PV_ENABLE
   =====      =============================
 
 KVM_PV_DISABLE
-  Deregister the VM from the Ultravisor and reclaim the memory that
-  had been donated to the Ultravisor, making it usable by the kernel
-  again.  All registered VCPUs are converted back to non-protected
-  ones.
+  Deregister the VM from the Ultravisor and reclaim the memory that had
+  been donated to the Ultravisor, making it usable by the kernel again.
+  All registered VCPUs are converted back to non-protected ones. If a
+  previous protected VM had been prepared for asynchonous teardown with
+  KVM_PV_ASYNC_CLEANUP_PREPARE and not subsequently torn down with
+  KVM_PV_ASYNC_CLEANUP_PERFORM, it will be torn down in this call
+  together with the current protected VM.
 
 KVM_PV_VM_SET_SEC_PARMS
   Pass the image header from VM memory to the Ultravisor in
@@ -5289,6 +5292,36 @@ KVM_PV_DUMP
     authentication tag all of which are needed to decrypt the dump at a
     later time.
 
+KVM_PV_ASYNC_CLEANUP_PREPARE
+  :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE
+
+  Prepare the current protected VM for asynchronous teardown. Most
+  resources used by the current protected VM will be set aside for a
+  subsequent asynchronous teardown. The current protected VM will then
+  resume execution immediately as non-protected. There can be at most
+  one protected VM prepared for asynchronous teardown at any time. If
+  a protected VM had already been prepared for teardown without
+  subsequently calling KVM_PV_ASYNC_CLEANUP_PERFORM, this call will
+  fail. In that case, the userspace process should issue a normal
+  KVM_PV_DISABLE. The resources set aside with this call will need to
+  be cleaned up with a subsequent call to KVM_PV_ASYNC_CLEANUP_PERFORM
+  or KVM_PV_DISABLE, otherwise they will be cleaned up when KVM
+  terminates. KVM_PV_ASYNC_CLEANUP_PREPARE can be called again as soon
+  as cleanup starts, i.e. before KVM_PV_ASYNC_CLEANUP_PERFORM finishes.
+
+KVM_PV_ASYNC_CLEANUP_PERFORM
+  :Capability: KVM_CAP_S390_PROTECTED_ASYNC_DISABLE
+
+  Tear down the protected VM previously prepared for teardown with
+  KVM_PV_ASYNC_CLEANUP_PREPARE. The resources that had been set aside
+  will be freed during the execution of this command. This PV command
+  should ideally be issued by userspace from a separate thread. If a
+  fatal signal is received (or the process terminates naturally), the
+  command will terminate immediately without completing, and the normal
+  KVM shutdown procedure will take care of cleaning up all remaining
+  protected VMs, including the ones whose teardown was interrupted by
+  process termination.
+
 4.126 KVM_XEN_HVM_SET_ATTR
 --------------------------
 
-- 
GitLab


From 8c516b25d6e9c70e6d76627932b14b0ef03a82c4 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:29 +0100
Subject: [PATCH 2028/4122] KVM: s390: pv: add
 KVM_CAP_S390_PROTECTED_ASYNC_DISABLE

Add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE to signal that the
KVM_PV_ASYNC_DISABLE and KVM_PV_ASYNC_DISABLE_PREPARE commands for the
KVM_S390_PV_COMMAND ioctl are available.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-4-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-4-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 3 +++
 include/uapi/linux/kvm.h | 1 +
 2 files changed, 4 insertions(+)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f0abaaf7eea4c..b6cc7d2935c0c 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -618,6 +618,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_BPB:
 		r = test_facility(82);
 		break;
+	case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE:
+		r = async_destroy && is_prot_virt_host();
+		break;
 	case KVM_CAP_S390_PROTECTED:
 		r = is_prot_virt_host();
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index b3701b23ca189..d3f86a2808580 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_S390_ZPCI_OP 221
 #define KVM_CAP_S390_CPU_TOPOLOGY 222
 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223
+#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
GitLab


From afe20eb8df9108e4be9bdec88a4f90d2de863ca2 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:30 +0100
Subject: [PATCH 2029/4122] KVM: s390: pv: avoid export before import if
 possible

If the appropriate UV feature bit is set, there is no need to perform
an export before import.

The misc feature indicates, among other things, that importing a shared
page from a different protected VM will automatically also transfer its
ownership.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-5-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-5-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kernel/uv.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index f9810d2a267c6..9f18a4af9c131 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -255,6 +255,13 @@ static int make_secure_pte(pte_t *ptep, unsigned long addr,
  */
 static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
 {
+	/*
+	 * The misc feature indicates, among other things, that importing a
+	 * shared page from a different protected VM will automatically also
+	 * transfer its ownership.
+	 */
+	if (test_bit_inv(BIT_UV_FEAT_MISC, &uv_info.uv_feature_indications))
+		return false;
 	if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
 		return false;
 	return atomic_read(&mm->context.protected_count) > 1;
-- 
GitLab


From f7866f582b1c9d80d1a3bd0953170185668c52ca Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:31 +0100
Subject: [PATCH 2030/4122] KVM: s390: pv: support for Destroy fast UVC

Add support for the Destroy Secure Configuration Fast Ultravisor call,
and take advantage of it for asynchronous destroy.

When supported, the protected guest is destroyed immediately using the
new UVC, leaving only the memory to be cleaned up asynchronously.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-6-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-6-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/include/asm/uv.h | 10 +++++++
 arch/s390/kvm/pv.c         | 61 +++++++++++++++++++++++++++++++++-----
 2 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index be3ef9dd69726..28a9ad57b6f16 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -34,6 +34,7 @@
 #define UVC_CMD_INIT_UV			0x000f
 #define UVC_CMD_CREATE_SEC_CONF		0x0100
 #define UVC_CMD_DESTROY_SEC_CONF	0x0101
+#define UVC_CMD_DESTROY_SEC_CONF_FAST	0x0102
 #define UVC_CMD_CREATE_SEC_CPU		0x0120
 #define UVC_CMD_DESTROY_SEC_CPU		0x0121
 #define UVC_CMD_CONV_TO_SEC_STOR	0x0200
@@ -81,6 +82,7 @@ enum uv_cmds_inst {
 	BIT_UVC_CMD_UNSHARE_ALL = 20,
 	BIT_UVC_CMD_PIN_PAGE_SHARED = 21,
 	BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22,
+	BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23,
 	BIT_UVC_CMD_DUMP_INIT = 24,
 	BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25,
 	BIT_UVC_CMD_DUMP_CPU = 26,
@@ -230,6 +232,14 @@ struct uv_cb_nodata {
 	u64 reserved20[4];
 } __packed __aligned(8);
 
+/* Destroy Configuration Fast */
+struct uv_cb_destroy_fast {
+	struct uv_cb_header header;
+	u64 reserved08[2];
+	u64 handle;
+	u64 reserved20[5];
+} __packed __aligned(8);
+
 /* Set Shared Access */
 struct uv_cb_share {
 	struct uv_cb_header header;
diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c
index 5f958fcf62836..e032ebbf51b97 100644
--- a/arch/s390/kvm/pv.c
+++ b/arch/s390/kvm/pv.c
@@ -203,6 +203,9 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
 {
 	int cc;
 
+	/* It used the destroy-fast UVC, nothing left to do here */
+	if (!leftover->handle)
+		goto done_fast;
 	cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc);
 	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc);
 	WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc);
@@ -217,6 +220,7 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm,
 	free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len));
 	free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER);
 	vfree(leftover->stor_var);
+done_fast:
 	atomic_dec(&kvm->mm->context.protected_count);
 	return 0;
 }
@@ -250,6 +254,36 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
 	srcu_read_unlock(&kvm->srcu, srcu_idx);
 }
 
+static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc)
+{
+	struct uv_cb_destroy_fast uvcb = {
+		.header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST,
+		.header.len = sizeof(uvcb),
+		.handle = kvm_s390_pv_get_handle(kvm),
+	};
+	int cc;
+
+	cc = uv_call_sched(0, (u64)&uvcb);
+	if (rc)
+		*rc = uvcb.header.rc;
+	if (rrc)
+		*rrc = uvcb.header.rrc;
+	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+	KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x",
+		     uvcb.header.rc, uvcb.header.rrc);
+	WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x",
+		  kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc);
+	/* Inteded memory leak on "impossible" error */
+	if (!cc)
+		kvm_s390_pv_dealloc_vm(kvm);
+	return cc ? -EIO : 0;
+}
+
+static inline bool is_destroy_fast_available(void)
+{
+	return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list);
+}
+
 /**
  * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown.
  * @kvm: the VM
@@ -271,6 +305,7 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm)
 int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
 {
 	struct pv_vm_to_be_destroyed *priv;
+	int res = 0;
 
 	lockdep_assert_held(&kvm->lock);
 	/*
@@ -283,14 +318,21 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc)
 	if (!priv)
 		return -ENOMEM;
 
-	priv->stor_var = kvm->arch.pv.stor_var;
-	priv->stor_base = kvm->arch.pv.stor_base;
-	priv->handle = kvm_s390_pv_get_handle(kvm);
-	priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
-	WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
-	if (s390_replace_asce(kvm->arch.gmap)) {
+	if (is_destroy_fast_available()) {
+		res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc);
+	} else {
+		priv->stor_var = kvm->arch.pv.stor_var;
+		priv->stor_base = kvm->arch.pv.stor_base;
+		priv->handle = kvm_s390_pv_get_handle(kvm);
+		priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table;
+		WRITE_ONCE(kvm->arch.gmap->guest_handle, 0);
+		if (s390_replace_asce(kvm->arch.gmap))
+			res = -ENOMEM;
+	}
+
+	if (res) {
 		kfree(priv);
-		return -ENOMEM;
+		return res;
 	}
 
 	kvm_s390_destroy_lower_2g(kvm);
@@ -471,6 +513,7 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
 {
 	struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier);
 	u16 dummy;
+	int r;
 
 	/*
 	 * No locking is needed since this is the last thread of the last user of this
@@ -479,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription,
 	 * unregistered. This means that if this notifier runs, then the
 	 * struct kvm is still valid.
 	 */
-	kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy);
+	if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm))
+		kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy);
 }
 
 static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = {
-- 
GitLab


From cc726886079febfa2384d77486d7f3a11f951ea9 Mon Sep 17 00:00:00 2001
From: Claudio Imbrenda <imbrenda@linux.ibm.com>
Date: Fri, 11 Nov 2022 18:06:32 +0100
Subject: [PATCH 2031/4122] KVM: s390: pv: module parameter to fence
 asynchronous destroy

Add the module parameter "async_destroy", to allow the asynchronous
destroy mechanism to be switched off. This might be useful for
debugging purposes.

The parameter is enabled by default since the feature is opt-in anyway.

Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
Reviewed-by: Steffen Eiden <seiden@linux.ibm.com>
Reviewed-by: Nico Boehr <nrb@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111170632.77622-7-imbrenda@linux.ibm.com
Message-Id: <20221111170632.77622-7-imbrenda@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b6cc7d2935c0c..8a0c884bf7375 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -209,7 +209,13 @@ unsigned int diag9c_forwarding_hz;
 module_param(diag9c_forwarding_hz, uint, 0644);
 MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off");
 
-static int async_destroy;
+/*
+ * allow asynchronous deinit for protected guests; enable by default since
+ * the feature is opt-in anyway
+ */
+static int async_destroy = 1;
+module_param(async_destroy, int, 0444);
+MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests");
 
 /*
  * For now we handle at most 16 double words as this is what the s390 base
-- 
GitLab


From dbec280045f8ca568de2a88ac4712a35f82f2cb1 Mon Sep 17 00:00:00 2001
From: Nico Boehr <nrb@linux.ibm.com>
Date: Fri, 18 Nov 2022 11:04:29 +0100
Subject: [PATCH 2032/4122] s390/vfio-ap: GISA: sort out physical vs virtual
 pointers usage

Fix virtual vs physical address confusion (which currently are the same)
for the GISA when enabling the IRQ.

Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
Link: https://lore.kernel.org/r/20221118100429.70453-1-nrb@linux.ibm.com
Message-Id: <20221118100429.70453-1-nrb@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0b4cc8c597ae6..205a001058589 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -429,7 +429,7 @@ static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
 
 	aqic_gisa.isc = nisc;
 	aqic_gisa.ir = 1;
-	aqic_gisa.gisa = (uint64_t)gisa >> 4;
+	aqic_gisa.gisa = virt_to_phys(gisa) >> 4;
 
 	status = ap_aqic(q->apqn, aqic_gisa, h_nib);
 	switch (status.response_code) {
-- 
GitLab


From 99b63f55dc514a357c2ecf25e9aab149879329f0 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <hca@linux.ibm.com>
Date: Fri, 18 Nov 2022 16:11:33 +0100
Subject: [PATCH 2033/4122] KVM: s390: remove unused gisa_clear_ipm_gisc()
 function

clang warns about an unused function:
arch/s390/kvm/interrupt.c:317:20:
  error: unused function 'gisa_clear_ipm_gisc' [-Werror,-Wunused-function]
static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)

Remove gisa_clear_ipm_gisc(), since it is unused and get rid of this
warning.

Signed-off-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Link: https://lore.kernel.org/r/20221118151133.2974602-1-hca@linux.ibm.com
Signed-off-by: Christian Borntraeger <borntraeger@linux.ibm.com>
Signed-off-by: Janosch Frank <frankja@linux.ibm.com>
---
 arch/s390/kvm/interrupt.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ab569faf0df24..1dae78deddf28 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -314,11 +314,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa)
 	return READ_ONCE(gisa->ipm);
 }
 
-static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
-{
-	clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
-}
-
 static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc)
 {
 	return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa);
-- 
GitLab


From a39818a3fb2bf12ae945a7c5fba8c5d9048a0e96 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Wed, 23 Nov 2022 21:26:10 +1100
Subject: [PATCH 2034/4122] objtool/powerpc: Implement arch_pc_relative_reloc()

Provide an implementation for arch_pc_relative_reloc(). It is needed to
pass the build once 61c6065ef7ec ("objtool: Allow !PC relative
relocations") is merged.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 tools/objtool/arch/powerpc/decode.c  | 9 +++++++++
 tools/objtool/include/objtool/arch.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/tools/objtool/arch/powerpc/decode.c b/tools/objtool/arch/powerpc/decode.c
index 01cade98b49e8..9c653805a08a9 100644
--- a/tools/objtool/arch/powerpc/decode.c
+++ b/tools/objtool/arch/powerpc/decode.c
@@ -82,6 +82,15 @@ unsigned long arch_jump_destination(struct instruction *insn)
 	return insn->offset + insn->immediate;
 }
 
+bool arch_pc_relative_reloc(struct reloc *reloc)
+{
+	/*
+	 * The powerpc build only allows certain relocation types, see
+	 * relocs_check.sh, and none of those accepted are PC relative.
+	 */
+	return false;
+}
+
 void arch_initial_func_cfi_state(struct cfi_init_state *state)
 {
 	int i;
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 5149330f400f5..4ecb480131c7c 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -95,4 +95,6 @@ bool arch_is_rethunk(struct symbol *sym);
 
 int arch_rewrite_retpolines(struct objtool_file *file);
 
+bool arch_pc_relative_reloc(struct reloc *reloc);
+
 #endif /* _ARCH_H */
-- 
GitLab


From 41522f7442905814c654dbe2ca7b8d3605c7e0cc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:47 -0800
Subject: [PATCH 2035/4122] perf test: Add 'leafloop' test workload

The leafloop workload is to run an infinite loop in the test_leaf
function.  This is needed for the ARM fp callgraph test to verify if it
gets the correct callchains.

  $ perf test -w leafloop

Committer notes:

Add a:

  -U_FORTIFY_SOURCE

to the leafloop CFLAGS as the main perf flags set it and it requires
building with optimization, and this new test has a -O0.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c       |  1 +
 tools/perf/tests/tests.h              |  1 +
 tools/perf/tests/workloads/Build      |  3 +++
 tools/perf/tests/workloads/leafloop.c | 34 +++++++++++++++++++++++++++
 4 files changed, 39 insertions(+)
 create mode 100644 tools/perf/tests/workloads/leafloop.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 161f38476e77b..0ed5ac452f6ee 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -121,6 +121,7 @@ static struct test_suite **tests[] = {
 static struct test_workload *workloads[] = {
 	&workload__noploop,
 	&workload__thloop,
+	&workload__leafloop,
 };
 
 static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index e6edfeeadaeba..86804dd6452b7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -202,5 +202,6 @@ struct test_workload workload__##work = {	\
 /* The list of test workloads */
 DECLARE_WORKLOAD(noploop);
 DECLARE_WORKLOAD(thloop);
+DECLARE_WORKLOAD(leafloop);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index b8964b1099c0e..03dc675a4a7c8 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -2,3 +2,6 @@
 
 perf-y += noploop.o
 perf-y += thloop.o
+perf-y += leafloop.o
+
+CFLAGS_leafloop.o         = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/leafloop.c b/tools/perf/tests/workloads/leafloop.c
new file mode 100644
index 0000000000000..1bf5cc97649b0
--- /dev/null
+++ b/tools/perf/tests/workloads/leafloop.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include "../tests.h"
+
+/* We want to check these symbols in perf script */
+noinline void leaf(volatile int b);
+noinline void parent(volatile int b);
+
+static volatile int a;
+
+noinline void leaf(volatile int b)
+{
+	for (;;)
+		a += b;
+}
+
+noinline void parent(volatile int b)
+{
+	leaf(b);
+}
+
+static int leafloop(int argc, const char **argv)
+{
+	int c = 1;
+
+	if (argc > 0)
+		c = atoi(argv[0]);
+
+	parent(c);
+	return 0;
+}
+
+DEFINE_WORKLOAD(leafloop);
-- 
GitLab


From 7cf0b4a73a4a4f36bb4ef53d066b811b7621c635 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:48 -0800
Subject: [PATCH 2036/4122] perf test: Replace arm callgraph fp test workload
 with leafloop

So that it can get rid of requirement of a compiler.

Reviewed-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/tests/shell/test_arm_callgraph_fp.sh | 34 ++-----------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/tools/perf/tests/shell/test_arm_callgraph_fp.sh b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
index ec108d45d3c61..e61d8deaa0c41 100755
--- a/tools/perf/tests/shell/test_arm_callgraph_fp.sh
+++ b/tools/perf/tests/shell/test_arm_callgraph_fp.sh
@@ -4,44 +4,16 @@
 
 lscpu | grep -q "aarch64" || exit 2
 
-if ! [ -x "$(command -v cc)" ]; then
-	echo "failed: no compiler, install gcc"
-	exit 2
-fi
-
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
-TEST_PROGRAM_SOURCE=$(mktemp /tmp/test_program.XXXXX.c)
-TEST_PROGRAM=$(mktemp /tmp/test_program.XXXXX)
+TEST_PROGRAM="perf test -w leafloop"
 
 cleanup_files()
 {
 	rm -f $PERF_DATA
-	rm -f $TEST_PROGRAM_SOURCE
-	rm -f $TEST_PROGRAM
 }
 
 trap cleanup_files exit term int
 
-cat << EOF > $TEST_PROGRAM_SOURCE
-int a = 0;
-void leaf(void) {
-  for (;;)
-    a += a;
-}
-void parent(void) {
-  leaf();
-}
-int main(void) {
-  parent();
-  return 0;
-}
-EOF
-
-echo " + Compiling test program ($TEST_PROGRAM)..."
-
-CFLAGS="-g -O0 -fno-inline -fno-omit-frame-pointer"
-cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1
-
 # Add a 1 second delay to skip samples that are not in the leaf() function
 perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
 PID=$!
@@ -58,11 +30,11 @@ wait $PID
 # program
 # 	728 leaf
 # 	753 parent
-# 	76c main
+# 	76c leafloop
 # ...
 
 perf script -i $PERF_DATA -F comm,ip,sym | head -n4
 perf script -i $PERF_DATA -F comm,ip,sym | head -n4 | \
 	awk '{ if ($2 != "") sym[i++] = $2 } END { if (sym[0] != "leaf" ||
 						       sym[1] != "parent" ||
-						       sym[2] != "main") exit 1 }'
+						       sym[2] != "leafloop") exit 1 }'
-- 
GitLab


From 39281709a6e2301ac4c6ac7015c7793392ca2dfe Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:49 -0800
Subject: [PATCH 2037/4122] perf test: Add 'sqrtloop' test workload

The sqrtloop creates a child process to run an infinite loop calling
sqrt() with rand().  This is needed for ARM SPE fork test.

  $ perf test -w sqrtloop

It can take an optional argument to specify how long it will run in
seconds (default: 1).

Committer notes:

Explicitely ignored the sqrt() return to fix the build on systems where
the compiler complains it isn't being used.

And added a sqrtloop specific CFLAGS to disable optimizations to make
this a bit more robust wrt dead code elimination.

Doing that a -U_FORTIFY_SOURCE needs to be added, as -O0 is incompatible
with it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c       |  1 +
 tools/perf/tests/tests.h              |  1 +
 tools/perf/tests/workloads/Build      |  2 ++
 tools/perf/tests/workloads/sqrtloop.c | 45 +++++++++++++++++++++++++++
 4 files changed, 49 insertions(+)
 create mode 100644 tools/perf/tests/workloads/sqrtloop.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 0ed5ac452f6ee..9acb7a93eeb97 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -122,6 +122,7 @@ static struct test_workload *workloads[] = {
 	&workload__noploop,
 	&workload__thloop,
 	&workload__leafloop,
+	&workload__sqrtloop,
 };
 
 static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 86804dd6452b7..18c40319e67c7 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -203,5 +203,6 @@ struct test_workload workload__##work = {	\
 DECLARE_WORKLOAD(noploop);
 DECLARE_WORKLOAD(thloop);
 DECLARE_WORKLOAD(leafloop);
+DECLARE_WORKLOAD(sqrtloop);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index 03dc675a4a7c8..2312a338f01c0 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -3,5 +3,7 @@
 perf-y += noploop.o
 perf-y += thloop.o
 perf-y += leafloop.o
+perf-y += sqrtloop.o
 
+CFLAGS_sqrtloop.o         = -g -O0 -fno-inline -U_FORTIFY_SOURCE
 CFLAGS_leafloop.o         = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/sqrtloop.c b/tools/perf/tests/workloads/sqrtloop.c
new file mode 100644
index 0000000000000..ccc94c6a6676a
--- /dev/null
+++ b/tools/perf/tests/workloads/sqrtloop.c
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <math.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <linux/compiler.h>
+#include <sys/wait.h>
+#include "../tests.h"
+
+static volatile sig_atomic_t done;
+
+static void sighandler(int sig __maybe_unused)
+{
+	done = 1;
+}
+
+static int __sqrtloop(int sec)
+{
+	signal(SIGALRM, sighandler);
+	alarm(sec);
+
+	while (!done)
+		(void)sqrt(rand());
+	return 0;
+}
+
+static int sqrtloop(int argc, const char **argv)
+{
+	int sec = 1;
+
+	if (argc > 0)
+		sec = atoi(argv[0]);
+
+	switch (fork()) {
+	case 0:
+		return __sqrtloop(sec);
+	case -1:
+		return -1;
+	default:
+		wait(NULL);
+	}
+	return 0;
+}
+
+DEFINE_WORKLOAD(sqrtloop);
-- 
GitLab


From e011979ec4c3482b68be05e2fdf98fefce4cd75b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:50 -0800
Subject: [PATCH 2038/4122] perf test: Replace arm spe fork test workload with
 sqrtloop

So that it can get rid of requirement of a compiler.  I've also removed
killall as it'll kill perf process now and run the test workload for 10
sec instead.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_arm_spe_fork.sh | 44 +--------------------
 1 file changed, 1 insertion(+), 43 deletions(-)

diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh
index c920d3583d30a..da810e1b2b9e7 100755
--- a/tools/perf/tests/shell/test_arm_spe_fork.sh
+++ b/tools/perf/tests/shell/test_arm_spe_fork.sh
@@ -11,14 +11,7 @@ skip_if_no_arm_spe_event() {
 
 skip_if_no_arm_spe_event || exit 2
 
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
-	echo "failed: no compiler, install gcc"
-	exit 2
-fi
-
-TEST_PROGRAM_SOURCE=$(mktemp /tmp/__perf_test.program.XXXXX.c)
-TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX)
+TEST_PROGRAM="perf test -w sqrtloop 10"
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 PERF_RECORD_LOG=$(mktemp /tmp/__perf_test.log.XXXXX)
 
@@ -27,43 +20,10 @@ cleanup_files()
 	echo "Cleaning up files..."
 	rm -f ${PERF_RECORD_LOG}
 	rm -f ${PERF_DATA}
-	rm -f ${TEST_PROGRAM_SOURCE}
-	rm -f ${TEST_PROGRAM}
 }
 
 trap cleanup_files exit term int
 
-# compile test program
-cat << EOF > $TEST_PROGRAM_SOURCE
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <sys/wait.h>
-
-int workload() {
-  while (1)
-    sqrt(rand());
-  return 0;
-}
-
-int main() {
-  switch (fork()) {
-    case 0:
-      return workload();
-    case -1:
-      return 1;
-    default:
-      wait(NULL);
-  }
-  return 0;
-}
-EOF
-
-echo "Compiling test program..."
-CFLAGS="-lm"
-cc $TEST_PROGRAM_SOURCE $CFLAGS -o $TEST_PROGRAM || exit 1
-
 echo "Recording workload..."
 perf record -o ${PERF_DATA} -e arm_spe/period=65536/ -vvv -- $TEST_PROGRAM > ${PERF_RECORD_LOG} 2>&1 &
 PERFPID=$!
@@ -78,8 +38,6 @@ echo Log lines after 1 second = $log1
 
 kill $PERFPID
 wait $PERFPID
-# test program may leave an orphan process running the workload
-killall $(basename $TEST_PROGRAM)
 
 if [ "$log0" = "$log1" ];
 then
-- 
GitLab


From a104f0ea99d846df19aad8a5476eb9bc39fa42ca Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:51 -0800
Subject: [PATCH 2039/4122] perf test: Add 'brstack' test workload

The brstack is to run different kinds of branches repeatedly.  This is
necessary for brstack test case to verify if it has correct branch info.

  $ perf test -w brstack

I renamed the internal functions to have brstack_ prefix as it's too
generic name.

Add a -U_FORTIFY_SOURCE to the brstack CFLAGS, as the main perf flags
set it and it requires building with optimization, and this new test has
a -O0.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c      |  1 +
 tools/perf/tests/tests.h             |  1 +
 tools/perf/tests/workloads/Build     |  2 ++
 tools/perf/tests/workloads/brstack.c | 40 ++++++++++++++++++++++++++++
 4 files changed, 44 insertions(+)
 create mode 100644 tools/perf/tests/workloads/brstack.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 9acb7a93eeb97..69fa56939309b 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -123,6 +123,7 @@ static struct test_workload *workloads[] = {
 	&workload__thloop,
 	&workload__leafloop,
 	&workload__sqrtloop,
+	&workload__brstack,
 };
 
 static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 18c40319e67c7..dc96f59cac2ef 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -204,5 +204,6 @@ DECLARE_WORKLOAD(noploop);
 DECLARE_WORKLOAD(thloop);
 DECLARE_WORKLOAD(leafloop);
 DECLARE_WORKLOAD(sqrtloop);
+DECLARE_WORKLOAD(brstack);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index 2312a338f01c0..ae06a5538b171 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -4,6 +4,8 @@ perf-y += noploop.o
 perf-y += thloop.o
 perf-y += leafloop.o
 perf-y += sqrtloop.o
+perf-y += brstack.o
 
 CFLAGS_sqrtloop.o         = -g -O0 -fno-inline -U_FORTIFY_SOURCE
 CFLAGS_leafloop.o         = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
+CFLAGS_brstack.o          = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/brstack.c b/tools/perf/tests/workloads/brstack.c
new file mode 100644
index 0000000000000..0b60bd37b9d1a
--- /dev/null
+++ b/tools/perf/tests/workloads/brstack.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
+#include "../tests.h"
+
+#define BENCH_RUNS 999999
+
+static volatile int cnt;
+
+static void brstack_bar(void) {
+}				/* return */
+
+static void brstack_foo(void) {
+	brstack_bar();		/* call */
+}				/* return */
+
+static void brstack_bench(void) {
+	void (*brstack_foo_ind)(void) = brstack_foo;
+
+	if ((cnt++) % 3)	/* branch (cond) */
+		brstack_foo();	/* call */
+	brstack_bar();		/* call */
+	brstack_foo_ind();	/* call (ind) */
+}
+
+static int brstack(int argc, const char **argv)
+{
+	int num_loops = BENCH_RUNS;
+
+	if (argc > 0)
+		num_loops = atoi(argv[0]);
+
+	while (1) {
+		if ((cnt++) > num_loops)
+			break;
+		brstack_bench();/* call */
+	}			/* branch (uncond) */
+	return 0;
+}
+
+DEFINE_WORKLOAD(brstack);
-- 
GitLab


From 7bc1dd96cf48e1b44773698e7c97481f5f455f6c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:52 -0800
Subject: [PATCH 2040/4122] perf test: Replace brstack test workload

So that it can get rid of requirement of a compiler.  Also rename the
symbols to match with the perf test workload.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Acked-by: German Gomez <german.gomez@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_brstack.sh | 66 +++++---------------------
 1 file changed, 12 insertions(+), 54 deletions(-)

diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index d7ff5c4b4da4c..5856639b565b1 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -4,13 +4,6 @@
 # SPDX-License-Identifier: GPL-2.0
 # German Gomez <german.gomez@arm.com>, 2022
 
-# we need a C compiler to build the test programs
-# so bail if none is found
-if ! [ -x "$(command -v cc)" ]; then
-	echo "failed: no compiler, install gcc"
-	exit 2
-fi
-
 # skip the test if the hardware doesn't support branch stack sampling
 # and if the architecture doesn't support filter types: any,save_type,u
 if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev/null 2>&1 ; then
@@ -19,6 +12,7 @@ if ! perf record -o- --no-buildid --branch-filter any,save_type,u -- true > /dev
 fi
 
 TMPDIR=$(mktemp -d /tmp/__perf_test.program.XXXXX)
+TESTPROG="perf test -w brstack"
 
 cleanup() {
 	rm -rf $TMPDIR
@@ -26,57 +20,24 @@ cleanup() {
 
 trap cleanup exit term int
 
-gen_test_program() {
-	# generate test program
-	cat << EOF > $1
-#define BENCH_RUNS 999999
-int cnt;
-void bar(void) {
-}			/* return */
-void foo(void) {
-	bar();		/* call */
-}			/* return */
-void bench(void) {
-  void (*foo_ind)(void) = foo;
-  if ((cnt++) % 3)	/* branch (cond) */
-    foo();		/* call */
-  bar();		/* call */
-  foo_ind();		/* call (ind) */
-}
-int main(void)
-{
-  int cnt = 0;
-  while (1) {
-    if ((cnt++) > BENCH_RUNS)
-      break;
-    bench();		/* call */
-  }			/* branch (uncond) */
-  return 0;
-}
-EOF
-}
-
 test_user_branches() {
 	echo "Testing user branch stack sampling"
 
-	gen_test_program "$TEMPDIR/program.c"
-	cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
-
-	perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+	perf record -o $TMPDIR/perf.data --branch-filter any,save_type,u -- ${TESTPROG} > /dev/null 2>&1
 	perf script -i $TMPDIR/perf.data --fields brstacksym | xargs -n1 > $TMPDIR/perf.script
 
 	# example of branch entries:
-	# 	foo+0x14/bar+0x40/P/-/-/0/CALL
+	# 	brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
 
 	set -x
-	egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/IND_CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^foo\+[^ ]*/bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^bench\+[^ ]*/foo\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^bench\+[^ ]*/bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^bar\+[^ ]*/foo\+[^ ]*/RET$"		$TMPDIR/perf.script
-	egrep -m1 "^foo\+[^ ]*/bench\+[^ ]*/RET$"	$TMPDIR/perf.script
-	egrep -m1 "^bench\+[^ ]*/bench\+[^ ]*/COND$"	$TMPDIR/perf.script
-	egrep -m1 "^main\+[^ ]*/main\+[^ ]*/UNCOND$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$"		$TMPDIR/perf.script
+	egrep -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$"	$TMPDIR/perf.script
+	egrep -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$"		$TMPDIR/perf.script
 	set +x
 
 	# some branch types are still not being tested:
@@ -91,10 +52,7 @@ test_filter() {
 
 	echo "Testing branch stack filtering permutation ($filter,$expect)"
 
-	gen_test_program "$TEMPDIR/program.c"
-	cc -fno-inline -g "$TEMPDIR/program.c" -o $TMPDIR/a.out
-
-	perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- $TMPDIR/a.out > /dev/null 2>&1
+	perf record -o $TMPDIR/perf.data --branch-filter $filter,save_type,u -- ${TESTPROG} > /dev/null 2>&1
 	perf script -i $TMPDIR/perf.data --fields brstack | xargs -n1 > $TMPDIR/perf.script
 
 	# fail if we find any branch type that doesn't match any of the expected ones
-- 
GitLab


From 3dfc01fe9d12a1e832f49deab37279faa8a9ebc8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:53 -0800
Subject: [PATCH 2041/4122] perf test: Add 'datasym' test workload

The datasym workload is to check if perf mem command gets the data
addresses precisely.  This is needed for data symbol test.

  $ perf test -w datasym

I had to keep the buf1 in the data section, otherwise it could end
up in the BSS and was mmaped as a separate //anon region, then it
was not symbolized at all.  It needs to be fixed separately.

Committer notes:

Add a -U _FORTIFY_SOURCE to the datasym CFLAGS, as the main perf flags
set it and it requires building with optimization, and this new test has
a -O0.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c      |  1 +
 tools/perf/tests/tests.h             |  1 +
 tools/perf/tests/workloads/Build     |  2 ++
 tools/perf/tests/workloads/datasym.c | 24 ++++++++++++++++++++++++
 4 files changed, 28 insertions(+)
 create mode 100644 tools/perf/tests/workloads/datasym.c

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 69fa56939309b..4c6ae59a4dfd7 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -124,6 +124,7 @@ static struct test_workload *workloads[] = {
 	&workload__leafloop,
 	&workload__sqrtloop,
 	&workload__brstack,
+	&workload__datasym,
 };
 
 static int num_subtests(const struct test_suite *t)
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index dc96f59cac2ef..e15f24cfc9094 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -205,5 +205,6 @@ DECLARE_WORKLOAD(thloop);
 DECLARE_WORKLOAD(leafloop);
 DECLARE_WORKLOAD(sqrtloop);
 DECLARE_WORKLOAD(brstack);
+DECLARE_WORKLOAD(datasym);
 
 #endif /* TESTS_H */
diff --git a/tools/perf/tests/workloads/Build b/tools/perf/tests/workloads/Build
index ae06a5538b171..a1f34d5861e36 100644
--- a/tools/perf/tests/workloads/Build
+++ b/tools/perf/tests/workloads/Build
@@ -5,7 +5,9 @@ perf-y += thloop.o
 perf-y += leafloop.o
 perf-y += sqrtloop.o
 perf-y += brstack.o
+perf-y += datasym.o
 
 CFLAGS_sqrtloop.o         = -g -O0 -fno-inline -U_FORTIFY_SOURCE
 CFLAGS_leafloop.o         = -g -O0 -fno-inline -fno-omit-frame-pointer -U_FORTIFY_SOURCE
 CFLAGS_brstack.o          = -g -O0 -fno-inline -U_FORTIFY_SOURCE
+CFLAGS_datasym.o          = -g -O0 -fno-inline -U_FORTIFY_SOURCE
diff --git a/tools/perf/tests/workloads/datasym.c b/tools/perf/tests/workloads/datasym.c
new file mode 100644
index 0000000000000..ddd40bc63448a
--- /dev/null
+++ b/tools/perf/tests/workloads/datasym.c
@@ -0,0 +1,24 @@
+#include <linux/compiler.h>
+#include "../tests.h"
+
+typedef struct _buf {
+	char data1;
+	char reserved[55];
+	char data2;
+} buf __attribute__((aligned(64)));
+
+static buf buf1 = {
+	/* to have this in the data section */
+	.reserved[0] = 1,
+};
+
+static int datasym(int argc __maybe_unused, const char **argv __maybe_unused)
+{
+	for (;;) {
+		buf1.data1++;
+		buf1.data2 += buf1.data1;
+	}
+	return 0;
+}
+
+DEFINE_WORKLOAD(datasym);
-- 
GitLab


From 0b77fe474696aaaa592d52c7316c135401337aec Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 16 Nov 2022 15:38:54 -0800
Subject: [PATCH 2042/4122] perf test: Replace data symbol test workload with
 datasym

So that it can get rid of requirement of a compiler.

  $ sudo ./perf test -v 109
  109: Test data symbol                                                :
  --- start ---
  test child forked, pid 844526
  Recording workload...
  [ perf record: Woken up 2 times to write data ]
  [ perf record: Captured and wrote 0.354 MB /tmp/__perf_test.perf.data.GFeZO (4847 samples) ]
  Cleaning up files...
  test child finished with 0
  ---- end ----
  Test data symbol: Ok

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: James Clark <james.clark@arm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221116233854.1596378-13-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_data_symbol.sh | 29 +---------------------
 1 file changed, 1 insertion(+), 28 deletions(-)

diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index cd6eb54d235d8..d871e6c743efd 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -11,13 +11,7 @@ skip_if_no_mem_event() {
 
 skip_if_no_mem_event || exit 2
 
-# skip if there's no compiler
-if ! [ -x "$(command -v cc)" ]; then
-	echo "skip: no compiler, install gcc"
-	exit 2
-fi
-
-TEST_PROGRAM=$(mktemp /tmp/__perf_test.program.XXXXX)
+TEST_PROGRAM="perf test -w datasym"
 PERF_DATA=$(mktemp /tmp/__perf_test.perf.data.XXXXX)
 
 check_result() {
@@ -45,31 +39,10 @@ cleanup_files()
 {
 	echo "Cleaning up files..."
 	rm -f ${PERF_DATA}
-	rm -f ${TEST_PROGRAM}
 }
 
 trap cleanup_files exit term int
 
-# compile test program
-echo "Compiling test program..."
-cat << EOF | cc -o ${TEST_PROGRAM} -x c -
-typedef struct _buf {
-	char data1;
-	char reserved[55];
-	char data2;
-} buf __attribute__((aligned(64)));
-
-static buf buf1;
-
-int main(void) {
-	for (;;) {
-		buf1.data1++;
-		buf1.data2 += buf1.data1;
-	}
-	return 0;
-}
-EOF
-
 echo "Recording workload..."
 
 # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support
-- 
GitLab


From a3720e969c6de39210809ca9aaebec81919d6c6c Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Wed, 16 Nov 2022 14:46:31 -0800
Subject: [PATCH 2043/4122] perf build: Fix LIBTRACEEVENT_DYNAMIC

The tools/lib includes fixes break LIBTRACEVENT_DYNAMIC as the makefile
erroneously had dependencies on building libtraceevent even when not
linking with it. This change fixes the issues with LIBTRACEEVENT_DYNAMIC
by making the built files optional.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andrii Nakryiko <andrii.nakryiko@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221116224631.207631-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 25 ++++++++++++++++++++++---
 tools/perf/util/setup.py |  3 ++-
 2 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 8c0df762fb02f..a17a6ea85e817 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -242,8 +242,10 @@ sub-make: fixdep
 else # force_fixdep
 
 LIBAPI_DIR      = $(srctree)/tools/lib/api/
+ifndef LIBTRACEEVENT_DYNAMIC
 LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/
 LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins
+endif
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
 LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
 LIBSYMBOL_DIR   = $(srctree)/tools/lib/symbol/
@@ -293,6 +295,7 @@ SCRIPT_SH += perf-iostat.sh
 grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
+ifndef LIBTRACEEVENT_DYNAMIC
 ifneq ($(OUTPUT),)
   LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent
 else
@@ -306,13 +309,16 @@ LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a
 export LIBTRACEEVENT
 LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list
 CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include
-
 #
 # The static build has no dynsym table, so this does not work for
 # static build. Looks like linker starts to scream about that now
 # (in Fedora 26) so we need to switch it off for static build.
 DYNAMIC_LIST_LDFLAGS               = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
 LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
+else
+LIBTRACEEVENT_DYNAMIC_LIST =
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS =
+endif
 
 ifneq ($(OUTPUT),)
   LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi
@@ -375,7 +381,11 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
 python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
 
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+ifndef LIBTRACEEVENT_DYNAMIC
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
+else
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI)
+endif
 
 SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
 
@@ -785,9 +795,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(LIBPERF) \
 	$(LIBSUBCMD) \
 	$(LIBSYMBOL) \
-	$(LIBTRACEEVENT) \
 	bpf-skel
 
+ifndef LIBTRACEEVENT_DYNAMIC
+prepare: $(LIBTRACEEVENT)
+endif
+
 $(OUTPUT)%.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
@@ -843,6 +856,7 @@ endif
 
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
+ifndef LIBTRACEEVENT_DYNAMIC
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))'
 
 $(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT)
@@ -872,6 +886,7 @@ install-traceevent-plugins: libtraceevent_plugins
 	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
 		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
 		$(LIBTRACEEVENT_FLAGS) install
+endif
 
 $(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
 	$(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
@@ -1152,7 +1167,7 @@ endif # BUILD_BPF_SKEL
 bpf-skel-clean:
 	$(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean
+clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean
 	$(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS)
 	$(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
 	$(Q)$(RM) $(OUTPUT).config-detected
@@ -1192,6 +1207,10 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea
 	$(call QUIET_CLEAN, Documentation) \
 	$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
 
+ifndef LIBTRACEEVENT_DYNAMIC
+clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean
+endif
+
 #
 # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
 # file if defined, with no further action.
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 5b1e6468d5e8b..43e7ca40b2ec4 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -77,7 +77,8 @@ perf = Extension('perf',
 		  include_dirs = ['util/include'],
 		  libraries = extra_libraries,
 		  extra_compile_args = cflags,
-		  extra_objects = [libtraceevent, libapikfs, libperf],
+		  extra_objects = [ x for x in [libtraceevent, libapikfs, libperf]
+                                    if x is not None],
                  )
 
 setup(name='perf',
-- 
GitLab


From e5c6109f4813246aa21e2c441e3cde549efa1f18 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 14 Nov 2022 13:07:22 -0800
Subject: [PATCH 2044/4122] perf list: Reorganize to use callbacks to allow
 honouring command line options
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rather than controlling the list output with passed flags, add
callbacks that are called when an event or metric are
encountered. State is passed to the callback so that command line
options can be respected, alternatively the callbacks can be changed.

Fix a few bugs:
 - wordwrap to columns metric descriptions and expressions;
 - remove unnecessary whitespace after PMU event names;
 - the metric filter is a glob but matched using strstr which will
   always fail, switch to using a proper globmatch,
 - the detail flag gives details for extra kernel PMU events like
   branch-instructions.

In metricgroup.c switch from struct mep being a rbtree of metricgroups
containing a list of metrics, to the tree directly containing all the
metrics. In general the alias for a name is passed to the print
routine rather than being contained in the name with OR.

Committer notes:

Check the asprint() return to address this on fedora 36:

  util/print-events.c: In function ‘print_sdt_events’:
  util/print-events.c:183:33: error: ignoring return value of ‘asprintf’ declared with attribute ‘warn_unused_result’ [-Werror=unused-result]
    183 |                                 asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid);
        |                                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  cc1: all warnings being treated as errors

  $ gcc --version | head -1
  gcc (GCC) 12.2.1 20220819 (Red Hat 12.2.1-2)
  $

Fix ps.pmu_glob setting when dealing with *:* events, it was being left
with a freed pointer that then at the end of cmd_list() would be double
freed.

Check if pmu_name is NULL in default_print_event() before calling
strglobmatch(pmu_name, ...) to avoid a segfault.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-10-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-list.c      | 333 +++++++++++++++++++++++++-----
 tools/perf/util/metricgroup.c  | 243 ++++++----------------
 tools/perf/util/metricgroup.h  |   4 +-
 tools/perf/util/pmu.c          | 145 +++++--------
 tools/perf/util/pmu.h          |   5 +-
 tools/perf/util/print-events.c | 364 +++++++++++++++++----------------
 tools/perf/util/print-events.h |  42 ++--
 7 files changed, 624 insertions(+), 512 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index cc84ced6da266..0c84fdb3ad37c 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -15,31 +15,240 @@
 #include "util/pmu-hybrid.h"
 #include "util/debug.h"
 #include "util/metricgroup.h"
+#include "util/string2.h"
+#include "util/strlist.h"
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
 #include <stdio.h>
 
-static bool desc_flag = true;
-static bool details_flag;
+/**
+ * struct print_state - State and configuration passed to the default_print
+ * functions.
+ */
+struct print_state {
+	/**
+	 * @pmu_glob: Optionally restrict PMU and metric matching to PMU or
+	 * debugfs subsystem name.
+	 */
+	char *pmu_glob;
+	/** @event_glob: Optional pattern matching glob. */
+	char *event_glob;
+	/** @name_only: Print event or metric names only. */
+	bool name_only;
+	/** @desc: Print the event or metric description. */
+	bool desc;
+	/** @long_desc: Print longer event or metric description. */
+	bool long_desc;
+	/** @deprecated: Print deprecated events or metrics. */
+	bool deprecated;
+	/**
+	 * @detailed: Print extra information on the perf event such as names
+	 * and expressions used internally by events.
+	 */
+	bool detailed;
+	/** @metrics: Controls printing of metric and metric groups. */
+	bool metrics;
+	/** @metricgroups: Controls printing of metric and metric groups. */
+	bool metricgroups;
+	/** @last_topic: The last printed event topic. */
+	char *last_topic;
+	/** @last_metricgroups: The last printed metric group. */
+	char *last_metricgroups;
+	/** @visited_metrics: Metrics that are printed to avoid duplicates. */
+	struct strlist *visited_metrics;
+};
+
+static void default_print_start(void *ps)
+{
+	struct print_state *print_state = ps;
+
+	if (!print_state->name_only && pager_in_use())
+		printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+}
+
+static void default_print_end(void *print_state __maybe_unused) {}
+
+static void wordwrap(const char *s, int start, int max, int corr)
+{
+	int column = start;
+	int n;
+
+	while (*s) {
+		int wlen = strcspn(s, " \t");
+
+		if (column + wlen >= max && column > start) {
+			printf("\n%*s", start, "");
+			column = start + corr;
+		}
+		n = printf("%s%.*s", column > start ? " " : "", wlen, s);
+		if (n <= 0)
+			break;
+		s += wlen;
+		column += n;
+		s = skip_spaces(s);
+	}
+}
+
+static void default_print_event(void *ps, const char *pmu_name, const char *topic,
+				const char *event_name, const char *event_alias,
+				const char *scale_unit __maybe_unused,
+				bool deprecated, const char *event_type_desc,
+				const char *desc, const char *long_desc,
+				const char *encoding_desc,
+				const char *metric_name, const char *metric_expr)
+{
+	struct print_state *print_state = ps;
+	int pos;
+
+	if (deprecated && !print_state->deprecated)
+		return;
+
+	if (print_state->pmu_glob && pmu_name && !strglobmatch(pmu_name, print_state->pmu_glob))
+		return;
+
+	if (print_state->event_glob &&
+	    (!event_name || !strglobmatch(event_name, print_state->event_glob)) &&
+	    (!event_alias || !strglobmatch(event_alias, print_state->event_glob)) &&
+	    (!topic || !strglobmatch_nocase(topic, print_state->event_glob)))
+		return;
+
+	if (print_state->name_only) {
+		if (event_alias && strlen(event_alias))
+			printf("%s ", event_alias);
+		else
+			printf("%s ", event_name);
+		return;
+	}
+
+	if (strcmp(print_state->last_topic, topic ?: "")) {
+		if (topic)
+			printf("\n%s:\n", topic);
+		free(print_state->last_topic);
+		print_state->last_topic = strdup(topic ?: "");
+	}
+
+	if (event_alias && strlen(event_alias))
+		pos = printf("  %s OR %s", event_name, event_alias);
+	else
+		pos = printf("  %s", event_name);
+
+	if (!topic && event_type_desc) {
+		for (; pos < 53; pos++)
+			putchar(' ');
+		printf("[%s]\n", event_type_desc);
+	} else
+		putchar('\n');
+
+	if (desc && print_state->desc) {
+		printf("%*s", 8, "[");
+		wordwrap(desc, 8, pager_get_columns(), 0);
+		printf("]\n");
+	}
+
+	if (long_desc && print_state->long_desc) {
+		printf("%*s", 8, "[");
+		wordwrap(long_desc, 8, pager_get_columns(), 0);
+		printf("]\n");
+	}
+
+	if (print_state->detailed && encoding_desc) {
+		printf("%*s%s", 8, "", encoding_desc);
+		if (metric_name)
+			printf(" MetricName: %s", metric_name);
+		if (metric_expr)
+			printf(" MetricExpr: %s", metric_expr);
+		putchar('\n');
+	}
+}
+
+static void default_print_metric(void *ps,
+				const char *group,
+				const char *name,
+				const char *desc,
+				const char *long_desc,
+				const char *expr,
+				const char *unit __maybe_unused)
+{
+	struct print_state *print_state = ps;
+
+	if (print_state->event_glob &&
+	    (!print_state->metrics || !name || !strglobmatch(name, print_state->event_glob)) &&
+	    (!print_state->metricgroups || !group || !strglobmatch(group, print_state->event_glob)))
+		return;
+
+	if (!print_state->name_only && !print_state->last_metricgroups) {
+		if (print_state->metricgroups) {
+			printf("\nMetric Groups:\n");
+			if (!print_state->metrics)
+				putchar('\n');
+		} else {
+			printf("\nMetrics:\n\n");
+		}
+	}
+	if (!print_state->last_metricgroups ||
+	    strcmp(print_state->last_metricgroups, group ?: "")) {
+		if (group && print_state->metricgroups) {
+			if (print_state->name_only)
+				printf("%s ", group);
+			else if (print_state->metrics)
+				printf("\n%s:\n", group);
+			else
+				printf("%s\n", group);
+		}
+		free(print_state->last_metricgroups);
+		print_state->last_metricgroups = strdup(group ?: "");
+	}
+	if (!print_state->metrics)
+		return;
+
+	if (print_state->name_only) {
+		if (print_state->metrics &&
+		    !strlist__has_entry(print_state->visited_metrics, name)) {
+			printf("%s ", name);
+			strlist__add(print_state->visited_metrics, name);
+		}
+		return;
+	}
+	printf("  %s\n", name);
+
+	if (desc && print_state->desc) {
+		printf("%*s", 8, "[");
+		wordwrap(desc, 8, pager_get_columns(), 0);
+		printf("]\n");
+	}
+	if (long_desc && print_state->long_desc) {
+		printf("%*s", 8, "[");
+		wordwrap(long_desc, 8, pager_get_columns(), 0);
+		printf("]\n");
+	}
+	if (expr && print_state->detailed) {
+		printf("%*s", 8, "[");
+		wordwrap(expr, 8, pager_get_columns(), 0);
+		printf("]\n");
+	}
+}
 
 int cmd_list(int argc, const char **argv)
 {
 	int i, ret = 0;
-	bool raw_dump = false;
-	bool long_desc_flag = false;
-	bool deprecated = false;
-	char *pmu_name = NULL;
+	struct print_state ps = {};
+	struct print_callbacks print_cb = {
+		.print_start = default_print_start,
+		.print_end = default_print_end,
+		.print_event = default_print_event,
+		.print_metric = default_print_metric,
+	};
 	const char *hybrid_name = NULL;
 	const char *unit_name = NULL;
 	struct option list_options[] = {
-		OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"),
-		OPT_BOOLEAN('d', "desc", &desc_flag,
+		OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"),
+		OPT_BOOLEAN('d', "desc", &ps.desc,
 			    "Print extra event descriptions. --no-desc to not print."),
-		OPT_BOOLEAN('v', "long-desc", &long_desc_flag,
+		OPT_BOOLEAN('v', "long-desc", &ps.long_desc,
 			    "Print longer event descriptions."),
-		OPT_BOOLEAN(0, "details", &details_flag,
+		OPT_BOOLEAN(0, "details", &ps.detailed,
 			    "Print information on the perf event names and expressions used internally by events."),
-		OPT_BOOLEAN(0, "deprecated", &deprecated,
+		OPT_BOOLEAN(0, "deprecated", &ps.deprecated,
 			    "Print deprecated events."),
 		OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
 			   "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
@@ -63,20 +272,28 @@ int cmd_list(int argc, const char **argv)
 
 	setup_pager();
 
-	if (!raw_dump && pager_in_use())
-		printf("\nList of pre-defined events (to be used in -e or -M):\n\n");
+	if (!ps.name_only)
+		setup_pager();
 
+	ps.desc = !ps.long_desc;
+	ps.last_topic = strdup("");
+	assert(ps.last_topic);
+	ps.visited_metrics = strlist__new(NULL, NULL);
+	assert(ps.visited_metrics);
 	if (unit_name)
-		pmu_name = strdup(unit_name);
+		ps.pmu_glob = strdup(unit_name);
 	else if (hybrid_name) {
-		pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name);
-		if (!pmu_name)
+		ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
+		if (!ps.pmu_glob)
 			pr_warning("WARNING: hybrid cputype is not supported!\n");
 	}
 
+	print_cb.print_start(&ps);
+
 	if (argc == 0) {
-		print_events(NULL, raw_dump, !desc_flag, long_desc_flag,
-				details_flag, deprecated, pmu_name);
+		ps.metrics = true;
+		ps.metricgroups = true;
+		print_events(&print_cb, &ps);
 		goto out;
 	}
 
@@ -84,31 +301,35 @@ int cmd_list(int argc, const char **argv)
 		char *sep, *s;
 
 		if (strcmp(argv[i], "tracepoint") == 0)
-			print_tracepoint_events(NULL, NULL, raw_dump);
+			print_tracepoint_events(&print_cb, &ps);
 		else if (strcmp(argv[i], "hw") == 0 ||
 			 strcmp(argv[i], "hardware") == 0)
-			print_symbol_events(NULL, PERF_TYPE_HARDWARE,
-					event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
+			print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE,
+					event_symbols_hw, PERF_COUNT_HW_MAX);
 		else if (strcmp(argv[i], "sw") == 0 ||
 			 strcmp(argv[i], "software") == 0) {
-			print_symbol_events(NULL, PERF_TYPE_SOFTWARE,
-					event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
-			print_tool_events(NULL, raw_dump);
+			print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE,
+					event_symbols_sw, PERF_COUNT_SW_MAX);
+			print_tool_events(&print_cb, &ps);
 		} else if (strcmp(argv[i], "cache") == 0 ||
 			 strcmp(argv[i], "hwcache") == 0)
-			print_hwcache_events(NULL, raw_dump);
+			print_hwcache_events(&print_cb, &ps);
 		else if (strcmp(argv[i], "pmu") == 0)
-			print_pmu_events(NULL, raw_dump, !desc_flag,
-						long_desc_flag, details_flag,
-						deprecated, pmu_name);
+			print_pmu_events(&print_cb, &ps);
 		else if (strcmp(argv[i], "sdt") == 0)
-			print_sdt_events(NULL, NULL, raw_dump);
-		else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0)
-			metricgroup__print(true, false, NULL, raw_dump, details_flag, pmu_name);
-		else if (strcmp(argv[i], "metricgroup") == 0 || strcmp(argv[i], "metricgroups") == 0)
-			metricgroup__print(false, true, NULL, raw_dump, details_flag, pmu_name);
-		else if ((sep = strchr(argv[i], ':')) != NULL) {
+			print_sdt_events(&print_cb, &ps);
+		else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
+			ps.metricgroups = false;
+			ps.metrics = true;
+			metricgroup__print(&print_cb, &ps);
+		} else if (strcmp(argv[i], "metricgroup") == 0 ||
+			   strcmp(argv[i], "metricgroups") == 0) {
+			ps.metricgroups = true;
+			ps.metrics = false;
+			metricgroup__print(&print_cb, &ps);
+		} else if ((sep = strchr(argv[i], ':')) != NULL) {
 			int sep_idx;
+			char *old_pmu_glob = ps.pmu_glob;
 
 			sep_idx = sep - argv[i];
 			s = strdup(argv[i]);
@@ -118,34 +339,42 @@ int cmd_list(int argc, const char **argv)
 			}
 
 			s[sep_idx] = '\0';
-			print_tracepoint_events(s, s + sep_idx + 1, raw_dump);
-			print_sdt_events(s, s + sep_idx + 1, raw_dump);
-			metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
+			ps.pmu_glob = s;
+			ps.event_glob = s + sep_idx + 1;
+			print_tracepoint_events(&print_cb, &ps);
+			print_sdt_events(&print_cb, &ps);
+			ps.metrics = true;
+			ps.metricgroups = true;
+			metricgroup__print(&print_cb, &ps);
 			free(s);
+			ps.pmu_glob = old_pmu_glob;
 		} else {
 			if (asprintf(&s, "*%s*", argv[i]) < 0) {
 				printf("Critical: Not enough memory! Trying to continue...\n");
 				continue;
 			}
-			print_symbol_events(s, PERF_TYPE_HARDWARE,
-					    event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump);
-			print_symbol_events(s, PERF_TYPE_SOFTWARE,
-					    event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump);
-			print_tool_events(s, raw_dump);
-			print_hwcache_events(s, raw_dump);
-			print_pmu_events(s, raw_dump, !desc_flag,
-						long_desc_flag,
-						details_flag,
-						deprecated,
-						pmu_name);
-			print_tracepoint_events(NULL, s, raw_dump);
-			print_sdt_events(NULL, s, raw_dump);
-			metricgroup__print(true, true, s, raw_dump, details_flag, pmu_name);
+			ps.event_glob = s;
+			print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE,
+					event_symbols_hw, PERF_COUNT_HW_MAX);
+			print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE,
+					event_symbols_sw, PERF_COUNT_SW_MAX);
+			print_tool_events(&print_cb, &ps);
+			print_hwcache_events(&print_cb, &ps);
+			print_pmu_events(&print_cb, &ps);
+			print_tracepoint_events(&print_cb, &ps);
+			print_sdt_events(&print_cb, &ps);
+			ps.metrics = true;
+			ps.metricgroups = true;
+			metricgroup__print(&print_cb, &ps);
 			free(s);
 		}
 	}
 
 out:
-	free(pmu_name);
+	print_cb.print_end(&ps);
+	free(ps.pmu_glob);
+	free(ps.last_topic);
+	free(ps.last_metricgroups);
+	strlist__delete(ps.visited_metrics);
 	return ret;
 }
diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c
index cf9e2452d3229..6eac7a60ed270 100644
--- a/tools/perf/util/metricgroup.c
+++ b/tools/perf/util/metricgroup.c
@@ -12,6 +12,7 @@
 #include "strbuf.h"
 #include "pmu.h"
 #include "pmu-hybrid.h"
+#include "print-events.h"
 #include "expr.h"
 #include "rblist.h"
 #include <string.h>
@@ -353,51 +354,65 @@ static bool match_pe_metric(const struct pmu_event *pe, const char *metric)
 	       match_metric(pe->metric_name, metric);
 }
 
+/** struct mep - RB-tree node for building printing information. */
 struct mep {
+	/** nd - RB-tree element. */
 	struct rb_node nd;
-	const char *name;
-	struct strlist *metrics;
+	/** @metric_group: Owned metric group name, separated others with ';'. */
+	char *metric_group;
+	const char *metric_name;
+	const char *metric_desc;
+	const char *metric_long_desc;
+	const char *metric_expr;
+	const char *metric_unit;
 };
 
 static int mep_cmp(struct rb_node *rb_node, const void *entry)
 {
 	struct mep *a = container_of(rb_node, struct mep, nd);
 	struct mep *b = (struct mep *)entry;
+	int ret;
 
-	return strcmp(a->name, b->name);
+	ret = strcmp(a->metric_group, b->metric_group);
+	if (ret)
+		return ret;
+
+	return strcmp(a->metric_name, b->metric_name);
 }
 
-static struct rb_node *mep_new(struct rblist *rl __maybe_unused,
-					const void *entry)
+static struct rb_node *mep_new(struct rblist *rl __maybe_unused, const void *entry)
 {
 	struct mep *me = malloc(sizeof(struct mep));
 
 	if (!me)
 		return NULL;
+
 	memcpy(me, entry, sizeof(struct mep));
-	me->name = strdup(me->name);
-	if (!me->name)
-		goto out_me;
-	me->metrics = strlist__new(NULL, NULL);
-	if (!me->metrics)
-		goto out_name;
 	return &me->nd;
-out_name:
-	zfree(&me->name);
-out_me:
+}
+
+static void mep_delete(struct rblist *rl __maybe_unused,
+		       struct rb_node *nd)
+{
+	struct mep *me = container_of(nd, struct mep, nd);
+
+	zfree(&me->metric_group);
 	free(me);
-	return NULL;
 }
 
-static struct mep *mep_lookup(struct rblist *groups, const char *name)
+static struct mep *mep_lookup(struct rblist *groups, const char *metric_group,
+			      const char *metric_name)
 {
 	struct rb_node *nd;
 	struct mep me = {
-		.name = name
+		.metric_group = strdup(metric_group),
+		.metric_name = metric_name,
 	};
 	nd = rblist__find(groups, &me);
-	if (nd)
+	if (nd) {
+		free(me.metric_group);
 		return container_of(nd, struct mep, nd);
+	}
 	rblist__add_node(groups, &me);
 	nd = rblist__find(groups, &me);
 	if (nd)
@@ -405,107 +420,37 @@ static struct mep *mep_lookup(struct rblist *groups, const char *name)
 	return NULL;
 }
 
-static void mep_delete(struct rblist *rl __maybe_unused,
-		       struct rb_node *nd)
-{
-	struct mep *me = container_of(nd, struct mep, nd);
-
-	strlist__delete(me->metrics);
-	zfree(&me->name);
-	free(me);
-}
-
-static void metricgroup__print_strlist(struct strlist *metrics, bool raw)
-{
-	struct str_node *sn;
-	int n = 0;
-
-	strlist__for_each_entry (sn, metrics) {
-		if (raw)
-			printf("%s%s", n > 0 ? " " : "", sn->s);
-		else
-			printf("  %s\n", sn->s);
-		n++;
-	}
-	if (raw)
-		putchar('\n');
-}
-
-static int metricgroup__print_pmu_event(const struct pmu_event *pe,
-					bool metricgroups, char *filter,
-					bool raw, bool details,
-					struct rblist *groups,
-					struct strlist *metriclist)
+static int metricgroup__add_to_mep_groups(const struct pmu_event *pe,
+					struct rblist *groups)
 {
 	const char *g;
 	char *omg, *mg;
 
-	g = pe->metric_group;
-	if (!g && pe->metric_name) {
-		if (pe->name)
-			return 0;
-		g = "No_group";
-	}
-
-	if (!g)
-		return 0;
-
-	mg = strdup(g);
-
+	mg = strdup(pe->metric_group ?: "No_group");
 	if (!mg)
 		return -ENOMEM;
 	omg = mg;
 	while ((g = strsep(&mg, ";")) != NULL) {
 		struct mep *me;
-		char *s;
 
 		g = skip_spaces(g);
-		if (*g == 0)
-			g = "No_group";
-		if (filter && !strstr(g, filter))
-			continue;
-		if (raw)
-			s = (char *)pe->metric_name;
-		else {
-			if (asprintf(&s, "%s\n%*s%s]",
-				     pe->metric_name, 8, "[", pe->desc) < 0)
-				return -1;
-			if (details) {
-				if (asprintf(&s, "%s\n%*s%s]",
-					     s, 8, "[", pe->metric_expr) < 0)
-					return -1;
-			}
-		}
-
-		if (!s)
-			continue;
+		if (strlen(g))
+			me = mep_lookup(groups, g, pe->metric_name);
+		else
+			me = mep_lookup(groups, "No_group", pe->metric_name);
 
-		if (!metricgroups) {
-			strlist__add(metriclist, s);
-		} else {
-			me = mep_lookup(groups, g);
-			if (!me)
-				continue;
-			strlist__add(me->metrics, s);
+		if (me) {
+			me->metric_desc = pe->desc;
+			me->metric_long_desc = pe->long_desc;
+			me->metric_expr = pe->metric_expr;
+			me->metric_unit = pe->unit;
 		}
-
-		if (!raw)
-			free(s);
 	}
 	free(omg);
 
 	return 0;
 }
 
-struct metricgroup_print_sys_idata {
-	struct strlist *metriclist;
-	char *filter;
-	struct rblist *groups;
-	bool metricgroups;
-	bool raw;
-	bool details;
-};
-
 struct metricgroup_iter_data {
 	pmu_event_iter_fn fn;
 	void *data;
@@ -528,61 +473,26 @@ static int metricgroup__sys_event_iter(const struct pmu_event *pe,
 
 		return d->fn(pe, table, d->data);
 	}
-
 	return 0;
 }
 
-static int metricgroup__print_sys_event_iter(const struct pmu_event *pe,
-					     const struct pmu_events_table *table __maybe_unused,
-					     void *data)
-{
-	struct metricgroup_print_sys_idata *d = data;
-
-	return metricgroup__print_pmu_event(pe, d->metricgroups, d->filter, d->raw,
-				     d->details, d->groups, d->metriclist);
-}
-
-struct metricgroup_print_data {
-	const char *pmu_name;
-	struct strlist *metriclist;
-	char *filter;
-	struct rblist *groups;
-	bool metricgroups;
-	bool raw;
-	bool details;
-};
-
-static int metricgroup__print_callback(const struct pmu_event *pe,
-				       const struct pmu_events_table *table __maybe_unused,
-				       void *vdata)
+static int metricgroup__add_to_mep_groups_callback(const struct pmu_event *pe,
+						const struct pmu_events_table *table __maybe_unused,
+						void *vdata)
 {
-	struct metricgroup_print_data *data = vdata;
-	const char *pmu = pe->pmu ?: "cpu";
+	struct rblist *groups = vdata;
 
-	if (!pe->metric_expr)
-		return 0;
-
-	if (data->pmu_name && strcmp(data->pmu_name, pmu))
+	if (!pe->metric_name)
 		return 0;
 
-	return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter,
-					    data->raw, data->details, data->groups,
-					    data->metriclist);
+	return metricgroup__add_to_mep_groups(pe, groups);
 }
 
-void metricgroup__print(bool metrics, bool metricgroups, char *filter,
-			bool raw, bool details, const char *pmu_name)
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state)
 {
 	struct rblist groups;
-	struct rb_node *node, *next;
-	struct strlist *metriclist = NULL;
 	const struct pmu_events_table *table;
-
-	if (!metricgroups) {
-		metriclist = strlist__new(NULL, NULL);
-		if (!metriclist)
-			return;
-	}
+	struct rb_node *node, *next;
 
 	rblist__init(&groups);
 	groups.node_new = mep_new;
@@ -590,56 +500,31 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter,
 	groups.node_delete = mep_delete;
 	table = pmu_events_table__find();
 	if (table) {
-		struct metricgroup_print_data data = {
-			.pmu_name = pmu_name,
-			.metriclist = metriclist,
-			.metricgroups = metricgroups,
-			.filter = filter,
-			.raw = raw,
-			.details = details,
-			.groups = &groups,
-		};
-
 		pmu_events_table_for_each_event(table,
-						metricgroup__print_callback,
-						&data);
+						metricgroup__add_to_mep_groups_callback,
+						&groups);
 	}
 	{
 		struct metricgroup_iter_data data = {
-			.fn = metricgroup__print_sys_event_iter,
-			.data = (void *) &(struct metricgroup_print_sys_idata){
-				.metriclist = metriclist,
-				.metricgroups = metricgroups,
-				.filter = filter,
-				.raw = raw,
-				.details = details,
-				.groups = &groups,
-			},
+			.fn = metricgroup__add_to_mep_groups_callback,
+			.data = &groups,
 		};
-
 		pmu_for_each_sys_event(metricgroup__sys_event_iter, &data);
 	}
 
-	if (!filter || !rblist__empty(&groups)) {
-		if (metricgroups && !raw)
-			printf("\nMetric Groups:\n\n");
-		else if (metrics && !raw)
-			printf("\nMetrics:\n\n");
-	}
-
 	for (node = rb_first_cached(&groups.entries); node; node = next) {
 		struct mep *me = container_of(node, struct mep, nd);
 
-		if (metricgroups)
-			printf("%s%s%s", me->name, metrics && !raw ? ":" : "", raw ? " " : "\n");
-		if (metrics)
-			metricgroup__print_strlist(me->metrics, raw);
+		print_cb->print_metric(print_state,
+				me->metric_group,
+				me->metric_name,
+				me->metric_desc,
+				me->metric_long_desc,
+				me->metric_expr,
+				me->metric_unit);
 		next = rb_next(node);
 		rblist__remove_node(&groups, node);
 	}
-	if (!metricgroups)
-		metricgroup__print_strlist(metriclist, raw);
-	strlist__delete(metriclist);
 }
 
 static const char *code_characters = ",-=@";
diff --git a/tools/perf/util/metricgroup.h b/tools/perf/util/metricgroup.h
index 732d3a0d33341..0013cf582173c 100644
--- a/tools/perf/util/metricgroup.h
+++ b/tools/perf/util/metricgroup.h
@@ -10,6 +10,7 @@
 struct evlist;
 struct evsel;
 struct option;
+struct print_callbacks;
 struct rblist;
 struct cgroup;
 
@@ -78,8 +79,7 @@ int metricgroup__parse_groups_test(struct evlist *evlist,
 				   bool metric_no_merge,
 				   struct rblist *metric_events);
 
-void metricgroup__print(bool metrics, bool groups, char *filter,
-			bool raw, bool details, const char *pmu_name);
+void metricgroup__print(const struct print_callbacks *print_cb, void *print_state);
 bool metricgroup__has_metric(const char *metric);
 int arch_get_runtimeparam(const struct pmu_event *pe __maybe_unused);
 void metricgroup__rblist_exit(struct rblist *metric_events);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 075c82dd1347f..e9a4f31926bf4 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -23,6 +23,7 @@
 #include "evsel.h"
 #include "pmu.h"
 #include "parse-events.h"
+#include "print-events.h"
 #include "header.h"
 #include "string2.h"
 #include "strbuf.h"
@@ -1579,13 +1580,6 @@ static char *format_alias(char *buf, int len, const struct perf_pmu *pmu,
 	return buf;
 }
 
-static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu,
-			     const struct perf_pmu_alias *alias)
-{
-	snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name);
-	return buf;
-}
-
 /** Struct for ordering events as output in perf list. */
 struct sevent {
 	/** PMU for event. */
@@ -1629,7 +1623,7 @@ static int cmp_sevent(const void *a, const void *b)
 
 	/* Order CPU core events to be first */
 	if (as->is_cpu != bs->is_cpu)
-		return bs->is_cpu - as->is_cpu;
+		return as->is_cpu ? -1 : 1;
 
 	/* Order by PMU name. */
 	a_pmu_name = as->pmu->name ?: "";
@@ -1642,27 +1636,6 @@ static int cmp_sevent(const void *a, const void *b)
 	return strcmp(a_name, b_name);
 }
 
-static void wordwrap(char *s, int start, int max, int corr)
-{
-	int column = start;
-	int n;
-
-	while (*s) {
-		int wlen = strcspn(s, " \t");
-
-		if (column + wlen >= max && column > start) {
-			printf("\n%*s", start, "");
-			column = start + corr;
-		}
-		n = printf("%s%.*s", column > start ? " " : "", wlen, s);
-		if (n <= 0)
-			break;
-		s += wlen;
-		column += n;
-		s = skip_spaces(s);
-	}
-}
-
 bool is_pmu_core(const char *name)
 {
 	return !strcmp(name, "cpu") || is_arm_pmu_core(name);
@@ -1685,24 +1658,19 @@ static bool pmu_alias_is_duplicate(struct sevent *alias_a,
 	return strcmp(a_pmu_name, b_pmu_name) == 0;
 }
 
-void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
-			bool long_desc, bool details_flag, bool deprecated,
-			const char *pmu_name)
+void print_pmu_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	struct perf_pmu *pmu;
-	struct perf_pmu_alias *alias;
+	struct perf_pmu_alias *event;
 	char buf[1024];
 	int printed = 0;
 	int len, j;
 	struct sevent *aliases;
-	int numdesc = 0;
-	int columns = pager_get_columns();
-	char *topic = NULL;
 
 	pmu = NULL;
 	len = 0;
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
-		list_for_each_entry(alias, &pmu->aliases, list)
+		list_for_each_entry(event, &pmu->aliases, list)
 			len++;
 		if (pmu->selectable)
 			len++;
@@ -1715,32 +1683,15 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 	pmu = NULL;
 	j = 0;
 	while ((pmu = perf_pmu__scan(pmu)) != NULL) {
-		bool is_cpu;
+		bool is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name);
 
-		if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name))
-			continue;
-
-		is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name);
-
-		list_for_each_entry(alias, &pmu->aliases, list) {
-			if (alias->deprecated && !deprecated)
-				continue;
-
-			if (event_glob != NULL &&
-			    !(strglobmatch_nocase(alias->name, event_glob) ||
-			      (!is_cpu &&
-			       strglobmatch_nocase(alias->name, event_glob)) ||
-			      (alias->topic &&
-			       strglobmatch_nocase(alias->topic, event_glob))))
-				continue;
-
-			aliases[j].event = alias;
+		list_for_each_entry(event, &pmu->aliases, list) {
+			aliases[j].event = event;
 			aliases[j].pmu = pmu;
 			aliases[j].is_cpu = is_cpu;
 			j++;
 		}
-		if (pmu->selectable &&
-		    (event_glob == NULL || strglobmatch(pmu->name, event_glob))) {
+		if (pmu->selectable) {
 			aliases[j].event = NULL;
 			aliases[j].pmu = pmu;
 			aliases[j].is_cpu = is_cpu;
@@ -1750,7 +1701,12 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 	len = j;
 	qsort(aliases, len, sizeof(struct sevent), cmp_sevent);
 	for (j = 0; j < len; j++) {
-		char *name, *desc;
+		const char *name, *alias = NULL, *scale_unit = NULL,
+			*desc = NULL, *long_desc = NULL,
+			*encoding_desc = NULL, *topic = NULL,
+			*metric_name = NULL, *metric_expr = NULL;
+		bool deprecated = false;
+		size_t buf_used;
 
 		/* Skip duplicates */
 		if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1]))
@@ -1758,48 +1714,51 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag,
 
 		if (!aliases[j].event) {
 			/* A selectable event. */
-			snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name);
+			buf_used = snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name) + 1;
 			name = buf;
-		} else if (aliases[j].event->desc) {
-			name = aliases[j].event->name;
 		} else {
-			if (!name_only && aliases[j].is_cpu) {
-				name = format_alias_or(buf, sizeof(buf), aliases[j].pmu,
-						       aliases[j].event);
+			if (aliases[j].event->desc) {
+				name = aliases[j].event->name;
+				buf_used = 0;
 			} else {
 				name = format_alias(buf, sizeof(buf), aliases[j].pmu,
 						    aliases[j].event);
+				if (aliases[j].is_cpu) {
+					alias = name;
+					name = aliases[j].event->name;
+				}
+				buf_used = strlen(buf) + 1;
 			}
-		}
-		if (name_only) {
-			printf("%s ", name);
-			continue;
-		}
-		printed++;
-		if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) {
-			printf("  %-50s [Kernel PMU event]\n", name);
-			continue;
-		}
-		if (numdesc++ == 0)
-			printf("\n");
-		if (aliases[j].event->topic && (!topic ||
-						strcmp(topic, aliases[j].event->topic))) {
-			printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic);
+			if (strlen(aliases[j].event->unit) || aliases[j].event->scale != 1.0) {
+				scale_unit = buf + buf_used;
+				buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+						"%G%s", aliases[j].event->scale,
+						aliases[j].event->unit) + 1;
+			}
+			desc = aliases[j].event->desc;
+			long_desc = aliases[j].event->long_desc;
 			topic = aliases[j].event->topic;
+			encoding_desc = buf + buf_used;
+			buf_used += snprintf(buf + buf_used, sizeof(buf) - buf_used,
+					"%s/%s/", aliases[j].pmu->name,
+					aliases[j].event->str) + 1;
+			metric_name = aliases[j].event->metric_name;
+			metric_expr = aliases[j].event->metric_expr;
+			deprecated = aliases[j].event->deprecated;
 		}
-		printf("  %-50s\n", name);
-		printf("%*s", 8, "[");
-		desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc;
-		wordwrap(desc, 8, columns, 0);
-		printf("]\n");
-		if (details_flag) {
-			printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str);
-			if (aliases[j].event->metric_name)
-				printf(" MetricName: %s", aliases[j].event->metric_name);
-			if (aliases[j].event->metric_expr)
-				printf(" MetricExpr: %s", aliases[j].event->metric_expr);
-			putchar('\n');
-		}
+		print_cb->print_event(print_state,
+				aliases[j].pmu->name,
+				topic,
+				name,
+				alias,
+				scale_unit,
+				deprecated,
+				"Kernel PMU event",
+				desc,
+				long_desc,
+				encoding_desc,
+				metric_name,
+				metric_expr);
 	}
 	if (printed && pager_in_use())
 		printf("\n");
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index ee02e1ef91878..69ca0004f94f6 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -12,6 +12,7 @@
 
 struct evsel_config_term;
 struct perf_cpu_map;
+struct print_callbacks;
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -225,9 +226,7 @@ void perf_pmu__del_formats(struct list_head *formats);
 struct perf_pmu *perf_pmu__scan(struct perf_pmu *pmu);
 
 bool is_pmu_core(const char *name);
-void print_pmu_events(const char *event_glob, bool name_only, bool quiet,
-		      bool long_desc, bool details_flag,
-		      bool deprecated, const char *pmu_name);
+void print_pmu_events(const struct print_callbacks *print_cb, void *print_state);
 bool pmu_have_event(const char *pname, const char *name);
 
 int perf_pmu__scan_file(struct perf_pmu *pmu, const char *name, const char *fmt, ...) __scanf(3, 4);
diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c
index d53dba033597b..2646ae18d9f9b 100644
--- a/tools/perf/util/print-events.c
+++ b/tools/perf/util/print-events.c
@@ -28,6 +28,7 @@
 
 #define MAX_NAME_LEN 100
 
+/** Strings corresponding to enum perf_type_id. */
 static const char * const event_type_descriptors[] = {
 	"Hardware event",
 	"Software event",
@@ -55,11 +56,9 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = {
 /*
  * Print the events from <debugfs_mount_point>/tracing/events
  */
-void print_tracepoint_events(const char *subsys_glob,
-			     const char *event_glob, bool name_only)
+void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	struct dirent **sys_namelist = NULL;
-	bool printed = false;
 	int sys_items = tracing_events__scandir_alphasort(&sys_namelist);
 
 	for (int i = 0; i < sys_items; i++) {
@@ -73,10 +72,6 @@ void print_tracepoint_events(const char *subsys_glob,
 		    !strcmp(sys_dirent->d_name, ".."))
 			continue;
 
-		if (subsys_glob != NULL &&
-		    !strglobmatch(sys_dirent->d_name, subsys_glob))
-			continue;
-
 		dir_path = get_events_file(sys_dirent->d_name);
 		if (!dir_path)
 			continue;
@@ -94,41 +89,41 @@ void print_tracepoint_events(const char *subsys_glob,
 			if (tp_event_has_id(dir_path, evt_dirent) != 0)
 				continue;
 
-			if (event_glob != NULL &&
-			    !strglobmatch(evt_dirent->d_name, event_glob))
-				continue;
-
 			snprintf(evt_path, MAXPATHLEN, "%s:%s",
 				 sys_dirent->d_name, evt_dirent->d_name);
-			if (name_only)
-				printf("%s ", evt_path);
-			else {
-				printf("  %-50s [%s]\n", evt_path,
-				       event_type_descriptors[PERF_TYPE_TRACEPOINT]);
-			}
-			printed = true;
+			print_cb->print_event(print_state,
+					/*topic=*/NULL,
+					/*pmu_name=*/NULL,
+					evt_path,
+					/*event_alias=*/NULL,
+					/*scale_unit=*/NULL,
+					/*deprecated=*/false,
+					"Tracepoint event",
+					/*desc=*/NULL,
+					/*long_desc=*/NULL,
+					/*encoding_desc=*/NULL,
+					/*metric_name=*/NULL,
+					/*metric_expr=*/NULL);
 		}
 		free(dir_path);
 		free(evt_namelist);
 	}
 	free(sys_namelist);
-	if (printed && pager_in_use())
-		printf("\n");
 }
 
-void print_sdt_events(const char *subsys_glob, const char *event_glob,
-		      bool name_only)
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state)
 {
-	struct probe_cache *pcache;
-	struct probe_cache_entry *ent;
 	struct strlist *bidlist, *sdtlist;
-	struct strlist_config cfg = {.dont_dupstr = true};
-	struct str_node *nd, *nd2;
-	char *buf, *path, *ptr = NULL;
-	bool show_detail = false;
-	int ret;
-
-	sdtlist = strlist__new(NULL, &cfg);
+	struct str_node *bid_nd, *sdt_name, *next_sdt_name;
+	const char *last_sdt_name = NULL;
+
+	/*
+	 * The implicitly sorted sdtlist will hold the tracepoint name followed
+	 * by @<buildid>. If the tracepoint name is unique (determined by
+	 * looking at the adjacent nodes) the @<buildid> is dropped otherwise
+	 * the executable path and buildid are added to the name.
+	 */
+	sdtlist = strlist__new(NULL, NULL);
 	if (!sdtlist) {
 		pr_debug("Failed to allocate new strlist for SDT\n");
 		return;
@@ -138,65 +133,78 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob,
 		pr_debug("Failed to get buildids: %d\n", errno);
 		return;
 	}
-	strlist__for_each_entry(nd, bidlist) {
-		pcache = probe_cache__new(nd->s, NULL);
+	strlist__for_each_entry(bid_nd, bidlist) {
+		struct probe_cache *pcache;
+		struct probe_cache_entry *ent;
+
+		pcache = probe_cache__new(bid_nd->s, NULL);
 		if (!pcache)
 			continue;
 		list_for_each_entry(ent, &pcache->entries, node) {
-			if (!ent->sdt)
-				continue;
-			if (subsys_glob &&
-			    !strglobmatch(ent->pev.group, subsys_glob))
-				continue;
-			if (event_glob &&
-			    !strglobmatch(ent->pev.event, event_glob))
-				continue;
-			ret = asprintf(&buf, "%s:%s@%s", ent->pev.group,
-					ent->pev.event, nd->s);
-			if (ret > 0)
-				strlist__add(sdtlist, buf);
+			char buf[1024];
+
+			snprintf(buf, sizeof(buf), "%s:%s@%s",
+				 ent->pev.group, ent->pev.event, bid_nd->s);
+			strlist__add(sdtlist, buf);
 		}
 		probe_cache__delete(pcache);
 	}
 	strlist__delete(bidlist);
 
-	strlist__for_each_entry(nd, sdtlist) {
-		buf = strchr(nd->s, '@');
-		if (buf)
-			*(buf++) = '\0';
-		if (name_only) {
-			printf("%s ", nd->s);
-			continue;
-		}
-		nd2 = strlist__next(nd);
-		if (nd2) {
-			ptr = strchr(nd2->s, '@');
-			if (ptr)
-				*ptr = '\0';
-			if (strcmp(nd->s, nd2->s) == 0)
-				show_detail = true;
+	strlist__for_each_entry(sdt_name, sdtlist) {
+		bool show_detail = false;
+		char *bid = strchr(sdt_name->s, '@');
+		char *evt_name = NULL;
+
+		if (bid)
+			*(bid++) = '\0';
+
+		if (last_sdt_name && !strcmp(last_sdt_name, sdt_name->s)) {
+			show_detail = true;
+		} else {
+			next_sdt_name = strlist__next(sdt_name);
+			if (next_sdt_name) {
+				char *bid2 = strchr(next_sdt_name->s, '@');
+
+				if (bid2)
+					*bid2 = '\0';
+				if (strcmp(sdt_name->s, next_sdt_name->s) == 0)
+					show_detail = true;
+				if (bid2)
+					*bid2 = '@';
+			}
 		}
+		last_sdt_name = sdt_name->s;
+
 		if (show_detail) {
-			path = build_id_cache__origname(buf);
-			ret = asprintf(&buf, "%s@%s(%.12s)", nd->s, path, buf);
-			if (ret > 0) {
-				printf("  %-50s [%s]\n", buf, "SDT event");
-				free(buf);
+			char *path = build_id_cache__origname(bid);
+
+			if (path) {
+				if (asprintf(&evt_name, "%s@%s(%.12s)", sdt_name->s, path, bid) < 0)
+					evt_name = NULL;
+				free(path);
 			}
-			free(path);
-		} else
-			printf("  %-50s [%s]\n", nd->s, "SDT event");
-		if (nd2) {
-			if (strcmp(nd->s, nd2->s) != 0)
-				show_detail = false;
-			if (ptr)
-				*ptr = '@';
 		}
+		print_cb->print_event(print_state,
+				/*topic=*/NULL,
+				/*pmu_name=*/NULL,
+				evt_name ?: sdt_name->s,
+				/*event_alias=*/NULL,
+				/*deprecated=*/false,
+				/*scale_unit=*/NULL,
+				"SDT event",
+				/*desc=*/NULL,
+				/*long_desc=*/NULL,
+				/*encoding_desc=*/NULL,
+				/*metric_name=*/NULL,
+				/*metric_expr=*/NULL);
+
+		free(evt_name);
 	}
 	strlist__delete(sdtlist);
 }
 
-int print_hwcache_events(const char *event_glob, bool name_only)
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	struct strlist *evt_name_list = strlist__new(NULL, NULL);
 	struct str_node *nd;
@@ -216,9 +224,6 @@ int print_hwcache_events(const char *event_glob, bool name_only)
 				char name[64];
 
 				__evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name));
-				if (event_glob != NULL && !strglobmatch(name, event_glob))
-					continue;
-
 				if (!perf_pmu__has_hybrid()) {
 					if (is_event_supported(PERF_TYPE_HW_CACHE,
 							       type | (op << 8) | (i << 16)))
@@ -240,55 +245,47 @@ int print_hwcache_events(const char *event_glob, bool name_only)
 	}
 
 	strlist__for_each_entry(nd, evt_name_list) {
-		if (name_only) {
-			printf("%s ", nd->s);
-			continue;
-		}
-		printf("  %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]);
+		print_cb->print_event(print_state,
+				"cache",
+				/*pmu_name=*/NULL,
+				nd->s,
+				/*event_alias=*/NULL,
+				/*scale_unit=*/NULL,
+				/*deprecated=*/false,
+				event_type_descriptors[PERF_TYPE_HW_CACHE],
+				/*desc=*/NULL,
+				/*long_desc=*/NULL,
+				/*encoding_desc=*/NULL,
+				/*metric_name=*/NULL,
+				/*metric_expr=*/NULL);
 	}
-	if (!strlist__empty(evt_name_list) && pager_in_use())
-		printf("\n");
-
 	strlist__delete(evt_name_list);
 	return 0;
 }
 
-static void print_tool_event(const struct event_symbol *syms, const char *event_glob,
-			     bool name_only)
-{
-	if (syms->symbol == NULL)
-		return;
-
-	if (event_glob && !(strglobmatch(syms->symbol, event_glob) ||
-	      (syms->alias && strglobmatch(syms->alias, event_glob))))
-		return;
-
-	if (name_only)
-		printf("%s ", syms->symbol);
-	else {
-		char name[MAX_NAME_LEN];
-
-		if (syms->alias && strlen(syms->alias))
-			snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias);
-		else
-			strlcpy(name, syms->symbol, MAX_NAME_LEN);
-		printf("  %-50s [%s]\n", name, "Tool event");
-	}
-}
-
-void print_tool_events(const char *event_glob, bool name_only)
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	// Start at 1 because the first enum entry means no tool event.
-	for (int i = 1; i < PERF_TOOL_MAX; ++i)
-		print_tool_event(event_symbols_tool + i, event_glob, name_only);
-
-	if (pager_in_use())
-		printf("\n");
+	for (int i = 1; i < PERF_TOOL_MAX; ++i) {
+		print_cb->print_event(print_state,
+				"tool",
+				/*pmu_name=*/NULL,
+				event_symbols_tool[i].symbol,
+				event_symbols_tool[i].alias,
+				/*scale_unit=*/NULL,
+				/*deprecated=*/false,
+				"Tool event",
+				/*desc=*/NULL,
+				/*long_desc=*/NULL,
+				/*encoding_desc=*/NULL,
+				/*metric_name=*/NULL,
+				/*metric_expr=*/NULL);
+	}
 }
 
-void print_symbol_events(const char *event_glob, unsigned int type,
-			 struct event_symbol *syms, unsigned int max,
-			 bool name_only)
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+			 unsigned int type, const struct event_symbol *syms,
+			 unsigned int max)
 {
 	struct strlist *evt_name_list = strlist__new(NULL, NULL);
 	struct str_node *nd;
@@ -305,10 +302,6 @@ void print_symbol_events(const char *event_glob, unsigned int type,
 		if (syms[i].symbol == NULL)
 			continue;
 
-		if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) ||
-		      (syms[i].alias && strglobmatch(syms[i].alias, event_glob))))
-			continue;
-
 		if (!is_event_supported(type, i))
 			continue;
 
@@ -322,63 +315,92 @@ void print_symbol_events(const char *event_glob, unsigned int type,
 	}
 
 	strlist__for_each_entry(nd, evt_name_list) {
-		if (name_only) {
-			printf("%s ", nd->s);
-			continue;
+		char *alias = strstr(nd->s, " OR ");
+
+		if (alias) {
+			*alias = '\0';
+			alias += 4;
 		}
-		printf("  %-50s [%s]\n", nd->s, event_type_descriptors[type]);
+		print_cb->print_event(print_state,
+				/*topic=*/NULL,
+				/*pmu_name=*/NULL,
+				nd->s,
+				alias,
+				/*scale_unit=*/NULL,
+				/*deprecated=*/false,
+				event_type_descriptors[type],
+				/*desc=*/NULL,
+				/*long_desc=*/NULL,
+				/*encoding_desc=*/NULL,
+				/*metric_name=*/NULL,
+				/*metric_expr=*/NULL);
 	}
-	if (!strlist__empty(evt_name_list) && pager_in_use())
-		printf("\n");
-
 	strlist__delete(evt_name_list);
 }
 
 /*
  * Print the help text for the event symbols:
  */
-void print_events(const char *event_glob, bool name_only, bool quiet_flag,
-			bool long_desc, bool details_flag, bool deprecated,
-			const char *pmu_name)
+void print_events(const struct print_callbacks *print_cb, void *print_state)
 {
-	print_symbol_events(event_glob, PERF_TYPE_HARDWARE,
-			    event_symbols_hw, PERF_COUNT_HW_MAX, name_only);
-
-	print_symbol_events(event_glob, PERF_TYPE_SOFTWARE,
-			    event_symbols_sw, PERF_COUNT_SW_MAX, name_only);
-	print_tool_events(event_glob, name_only);
-
-	print_hwcache_events(event_glob, name_only);
-
-	print_pmu_events(event_glob, name_only, quiet_flag, long_desc,
-			details_flag, deprecated, pmu_name);
-
-	if (event_glob != NULL)
-		return;
-
-	if (!name_only) {
-		printf("  %-50s [%s]\n",
-		       "rNNN",
-		       event_type_descriptors[PERF_TYPE_RAW]);
-		printf("  %-50s [%s]\n",
-		       "cpu/t1=v1[,t2=v2,t3 ...]/modifier",
-		       event_type_descriptors[PERF_TYPE_RAW]);
-		if (pager_in_use())
-			printf("   (see 'man perf-list' on how to encode it)\n\n");
-
-		printf("  %-50s [%s]\n",
-		       "mem:<addr>[/len][:access]",
-			event_type_descriptors[PERF_TYPE_BREAKPOINT]);
-		if (pager_in_use())
-			printf("\n");
-	}
-
-	print_tracepoint_events(NULL, NULL, name_only);
-
-	print_sdt_events(NULL, NULL, name_only);
-
-	metricgroup__print(true, true, NULL, name_only, details_flag,
-			   pmu_name);
-
-	print_libpfm_events(name_only, long_desc);
+	print_symbol_events(print_cb, print_state, PERF_TYPE_HARDWARE,
+			event_symbols_hw, PERF_COUNT_HW_MAX);
+	print_symbol_events(print_cb, print_state, PERF_TYPE_SOFTWARE,
+			event_symbols_sw, PERF_COUNT_SW_MAX);
+
+	print_tool_events(print_cb, print_state);
+
+	print_hwcache_events(print_cb, print_state);
+
+	print_pmu_events(print_cb, print_state);
+
+	print_cb->print_event(print_state,
+			/*topic=*/NULL,
+			/*pmu_name=*/NULL,
+			"rNNN",
+			/*event_alias=*/NULL,
+			/*scale_unit=*/NULL,
+			/*deprecated=*/false,
+			event_type_descriptors[PERF_TYPE_RAW],
+			/*desc=*/NULL,
+			/*long_desc=*/NULL,
+			/*encoding_desc=*/NULL,
+			/*metric_name=*/NULL,
+			/*metric_expr=*/NULL);
+
+	print_cb->print_event(print_state,
+			/*topic=*/NULL,
+			/*pmu_name=*/NULL,
+			"cpu/t1=v1[,t2=v2,t3 ...]/modifier",
+			/*event_alias=*/NULL,
+			/*scale_unit=*/NULL,
+			/*deprecated=*/false,
+			event_type_descriptors[PERF_TYPE_RAW],
+			"(see 'man perf-list' on how to encode it)",
+			/*long_desc=*/NULL,
+			/*encoding_desc=*/NULL,
+			/*metric_name=*/NULL,
+			/*metric_expr=*/NULL);
+
+	print_cb->print_event(print_state,
+			/*topic=*/NULL,
+			/*pmu_name=*/NULL,
+			"mem:<addr>[/len][:access]",
+			/*scale_unit=*/NULL,
+			/*event_alias=*/NULL,
+			/*deprecated=*/false,
+			event_type_descriptors[PERF_TYPE_BREAKPOINT],
+			/*desc=*/NULL,
+			/*long_desc=*/NULL,
+			/*encoding_desc=*/NULL,
+			/*metric_name=*/NULL,
+			/*metric_expr=*/NULL);
+
+	print_tracepoint_events(print_cb, print_state);
+
+	print_sdt_events(print_cb, print_state);
+
+	metricgroup__print(print_cb, print_state);
+
+	print_libpfm_events(print_cb, print_state);
 }
diff --git a/tools/perf/util/print-events.h b/tools/perf/util/print-events.h
index 1da9910d83a60..c237e53c44870 100644
--- a/tools/perf/util/print-events.h
+++ b/tools/perf/util/print-events.h
@@ -2,21 +2,39 @@
 #ifndef __PERF_PRINT_EVENTS_H
 #define __PERF_PRINT_EVENTS_H
 
+#include <linux/perf_event.h>
 #include <stdbool.h>
 
 struct event_symbol;
 
-void print_events(const char *event_glob, bool name_only, bool quiet_flag,
-		  bool long_desc, bool details_flag, bool deprecated,
-		  const char *pmu_name);
-int print_hwcache_events(const char *event_glob, bool name_only);
-void print_sdt_events(const char *subsys_glob, const char *event_glob,
-		      bool name_only);
-void print_symbol_events(const char *event_glob, unsigned int type,
-			 struct event_symbol *syms, unsigned int max,
-			 bool name_only);
-void print_tool_events(const char *event_glob, bool name_only);
-void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
-			     bool name_only);
+struct print_callbacks {
+	void (*print_start)(void *print_state);
+	void (*print_end)(void *print_state);
+	void (*print_event)(void *print_state, const char *topic,
+			const char *pmu_name,
+			const char *event_name, const char *event_alias,
+			const char *scale_unit,
+			bool deprecated, const char *event_type_desc,
+			const char *desc, const char *long_desc,
+			const char *encoding_desc,
+			const char *metric_name, const char *metric_expr);
+	void (*print_metric)(void *print_state,
+			const char *group,
+			const char *name,
+			const char *desc,
+			const char *long_desc,
+			const char *expr,
+			const char *unit);
+};
+
+/** Print all events, the default when no options are specified. */
+void print_events(const struct print_callbacks *print_cb, void *print_state);
+int print_hwcache_events(const struct print_callbacks *print_cb, void *print_state);
+void print_sdt_events(const struct print_callbacks *print_cb, void *print_state);
+void print_symbol_events(const struct print_callbacks *print_cb, void *print_state,
+			 unsigned int type, const struct event_symbol *syms,
+			 unsigned int max);
+void print_tool_events(const struct print_callbacks *print_cb, void *print_state);
+void print_tracepoint_events(const struct print_callbacks *print_cb, void *print_state);
 
 #endif /* __PERF_PRINT_EVENTS_H */
-- 
GitLab


From 6ed249441a7d3ead8e81cc926e68d5e7ae031032 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Nov 2022 13:43:37 -0300
Subject: [PATCH 2045/4122] perf list: Add JSON output option

Output events and metrics in a JSON format by overriding the print
callbacks. Currently other command line options aren't supported and
metrics are repeated once per metric group.

Committer testing:

  $ perf list cache

  List of pre-defined events (to be used in -e or -M):

    L1-dcache-load-misses                              [Hardware cache event]
    L1-dcache-loads                                    [Hardware cache event]
    L1-dcache-prefetches                               [Hardware cache event]
    L1-icache-load-misses                              [Hardware cache event]
    L1-icache-loads                                    [Hardware cache event]
    branch-load-misses                                 [Hardware cache event]
    branch-loads                                       [Hardware cache event]
    dTLB-load-misses                                   [Hardware cache event]
    dTLB-loads                                         [Hardware cache event]
    iTLB-load-misses                                   [Hardware cache event]
    iTLB-loads                                         [Hardware cache event]
  $ perf list --json cache
  [
  {
          "Unit": "cache",
          "EventName": "L1-dcache-load-misses",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "L1-dcache-loads",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "L1-dcache-prefetches",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "L1-icache-load-misses",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "L1-icache-loads",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "branch-load-misses",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "branch-loads",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "dTLB-load-misses",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "dTLB-loads",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "iTLB-load-misses",
          "EventType": "Hardware cache event"
  },
  {
          "Unit": "cache",
          "EventName": "iTLB-loads",
          "EventType": "Hardware cache event"
  }
  ]
  $

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/20221114210723.2749751-11-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-list.txt |   4 +
 tools/perf/builtin-list.c              | 308 +++++++++++++++++++------
 2 files changed, 245 insertions(+), 67 deletions(-)

diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 44a819af573de..c5a3cb0f57c7c 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -43,6 +43,10 @@ Print deprecated events. By default the deprecated events are hidden.
 Print PMU events and metrics limited to the specific PMU name.
 (e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom)
 
+-j::
+--json::
+Output in JSON format.
+
 [[EVENT_MODIFIERS]]
 EVENT MODIFIERS
 ---------------
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 0c84fdb3ad37c..84fa2d050eacb 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -19,6 +19,8 @@
 #include "util/strlist.h"
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
+#include <linux/zalloc.h>
+#include <stdarg.h>
 #include <stdio.h>
 
 /**
@@ -228,10 +230,176 @@ static void default_print_metric(void *ps,
 	}
 }
 
+struct json_print_state {
+	/** Should a separator be printed prior to the next item? */
+	bool need_sep;
+};
+
+static void json_print_start(void *print_state __maybe_unused)
+{
+	printf("[\n");
+}
+
+static void json_print_end(void *ps)
+{
+	struct json_print_state *print_state = ps;
+
+	printf("%s]\n", print_state->need_sep ? "\n" : "");
+}
+
+static void fix_escape_printf(const char *fmt, ...)
+{
+	va_list args;
+	char buf[2048];
+	size_t buf_pos = 0;
+
+	va_start(args, fmt);
+	for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) {
+		switch (fmt[fmt_pos]) {
+		case '%': {
+			const char *s = va_arg(args, const char*);
+
+			fmt_pos++;
+			assert(fmt[fmt_pos] == 's');
+			for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) {
+				switch (s[s_pos]) {
+				case '\\':
+					__fallthrough;
+				case '\"':
+					buf[buf_pos++] = '\\';
+					assert(buf_pos < sizeof(buf));
+					__fallthrough;
+				default:
+					buf[buf_pos++] = s[s_pos];
+					assert(buf_pos < sizeof(buf));
+					break;
+				}
+			}
+			break;
+		}
+		default:
+			buf[buf_pos++] = fmt[fmt_pos];
+			assert(buf_pos < sizeof(buf));
+			break;
+		}
+	}
+	va_end(args);
+	buf[buf_pos] = '\0';
+	fputs(buf, stdout);
+}
+
+static void json_print_event(void *ps, const char *pmu_name, const char *topic,
+			     const char *event_name, const char *event_alias,
+			     const char *scale_unit,
+			     bool deprecated, const char *event_type_desc,
+			     const char *desc, const char *long_desc,
+			     const char *encoding_desc,
+			     const char *metric_name, const char *metric_expr)
+{
+	struct json_print_state *print_state = ps;
+	bool need_sep = false;
+
+	printf("%s{\n", print_state->need_sep ? ",\n" : "");
+	print_state->need_sep = true;
+	if (pmu_name) {
+		fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name);
+		need_sep = true;
+	}
+	if (topic) {
+		fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic);
+		need_sep = true;
+	}
+	if (event_name) {
+		fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name);
+		need_sep = true;
+	}
+	if (event_alias && strlen(event_alias)) {
+		fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias);
+		need_sep = true;
+	}
+	if (scale_unit && strlen(scale_unit)) {
+		fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "",
+				  scale_unit);
+		need_sep = true;
+	}
+	if (event_type_desc) {
+		fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "",
+				  event_type_desc);
+		need_sep = true;
+	}
+	if (deprecated) {
+		fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "",
+				  deprecated ? "1" : "0");
+		need_sep = true;
+	}
+	if (desc) {
+		fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc);
+		need_sep = true;
+	}
+	if (long_desc) {
+		fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "",
+				  long_desc);
+		need_sep = true;
+	}
+	if (encoding_desc) {
+		fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc);
+		need_sep = true;
+	}
+	if (metric_name) {
+		fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name);
+		need_sep = true;
+	}
+	if (metric_expr) {
+		fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr);
+		need_sep = true;
+	}
+	printf("%s}", need_sep ? "\n" : "");
+}
+
+static void json_print_metric(void *ps __maybe_unused, const char *group,
+			      const char *name, const char *desc,
+			      const char *long_desc, const char *expr,
+			      const char *unit)
+{
+	struct json_print_state *print_state = ps;
+	bool need_sep = false;
+
+	printf("%s{\n", print_state->need_sep ? ",\n" : "");
+	print_state->need_sep = true;
+	if (group) {
+		fix_escape_printf("\t\"MetricGroup\": \"%s\"", group);
+		need_sep = true;
+	}
+	if (name) {
+		fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name);
+		need_sep = true;
+	}
+	if (expr) {
+		fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr);
+		need_sep = true;
+	}
+	if (unit) {
+		fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit);
+		need_sep = true;
+	}
+	if (desc) {
+		fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc);
+		need_sep = true;
+	}
+	if (long_desc) {
+		fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "",
+				  long_desc);
+		need_sep = true;
+	}
+	printf("%s}", need_sep ? "\n" : "");
+}
+
 int cmd_list(int argc, const char **argv)
 {
 	int i, ret = 0;
-	struct print_state ps = {};
+	struct print_state default_ps = {};
+	struct print_state json_ps = {};
+	void *ps = &default_ps;
 	struct print_callbacks print_cb = {
 		.print_start = default_print_start,
 		.print_end = default_print_end,
@@ -240,15 +408,17 @@ int cmd_list(int argc, const char **argv)
 	};
 	const char *hybrid_name = NULL;
 	const char *unit_name = NULL;
+	bool json = false;
 	struct option list_options[] = {
-		OPT_BOOLEAN(0, "raw-dump", &ps.name_only, "Dump raw events"),
-		OPT_BOOLEAN('d', "desc", &ps.desc,
+		OPT_BOOLEAN(0, "raw-dump", &default_ps.name_only, "Dump raw events"),
+		OPT_BOOLEAN('j', "json", &json, "JSON encode events and metrics"),
+		OPT_BOOLEAN('d', "desc", &default_ps.desc,
 			    "Print extra event descriptions. --no-desc to not print."),
-		OPT_BOOLEAN('v', "long-desc", &ps.long_desc,
+		OPT_BOOLEAN('v', "long-desc", &default_ps.long_desc,
 			    "Print longer event descriptions."),
-		OPT_BOOLEAN(0, "details", &ps.detailed,
+		OPT_BOOLEAN(0, "details", &default_ps.detailed,
 			    "Print information on the perf event names and expressions used internally by events."),
-		OPT_BOOLEAN(0, "deprecated", &ps.deprecated,
+		OPT_BOOLEAN(0, "deprecated", &default_ps.deprecated,
 			    "Print deprecated events."),
 		OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type",
 			   "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."),
@@ -272,28 +442,37 @@ int cmd_list(int argc, const char **argv)
 
 	setup_pager();
 
-	if (!ps.name_only)
+	if (!default_ps.name_only)
 		setup_pager();
 
-	ps.desc = !ps.long_desc;
-	ps.last_topic = strdup("");
-	assert(ps.last_topic);
-	ps.visited_metrics = strlist__new(NULL, NULL);
-	assert(ps.visited_metrics);
-	if (unit_name)
-		ps.pmu_glob = strdup(unit_name);
-	else if (hybrid_name) {
-		ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
-		if (!ps.pmu_glob)
-			pr_warning("WARNING: hybrid cputype is not supported!\n");
+	if (json) {
+		print_cb = (struct print_callbacks){
+			.print_start = json_print_start,
+			.print_end = json_print_end,
+			.print_event = json_print_event,
+			.print_metric = json_print_metric,
+		};
+		ps = &json_ps;
+	} else {
+		default_ps.desc = !default_ps.long_desc;
+		default_ps.last_topic = strdup("");
+		assert(default_ps.last_topic);
+		default_ps.visited_metrics = strlist__new(NULL, NULL);
+		assert(default_ps.visited_metrics);
+		if (unit_name)
+			default_ps.pmu_glob = strdup(unit_name);
+		else if (hybrid_name) {
+			default_ps.pmu_glob = perf_pmu__hybrid_type_to_pmu(hybrid_name);
+			if (!default_ps.pmu_glob)
+				pr_warning("WARNING: hybrid cputype is not supported!\n");
+		}
 	}
-
-	print_cb.print_start(&ps);
+	print_cb.print_start(ps);
 
 	if (argc == 0) {
-		ps.metrics = true;
-		ps.metricgroups = true;
-		print_events(&print_cb, &ps);
+		default_ps.metrics = true;
+		default_ps.metricgroups = true;
+		print_events(&print_cb, ps);
 		goto out;
 	}
 
@@ -301,80 +480,75 @@ int cmd_list(int argc, const char **argv)
 		char *sep, *s;
 
 		if (strcmp(argv[i], "tracepoint") == 0)
-			print_tracepoint_events(&print_cb, &ps);
+			print_tracepoint_events(&print_cb, ps);
 		else if (strcmp(argv[i], "hw") == 0 ||
 			 strcmp(argv[i], "hardware") == 0)
-			print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE,
+			print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
 					event_symbols_hw, PERF_COUNT_HW_MAX);
 		else if (strcmp(argv[i], "sw") == 0 ||
 			 strcmp(argv[i], "software") == 0) {
-			print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE,
+			print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE,
 					event_symbols_sw, PERF_COUNT_SW_MAX);
-			print_tool_events(&print_cb, &ps);
+			print_tool_events(&print_cb, ps);
 		} else if (strcmp(argv[i], "cache") == 0 ||
 			 strcmp(argv[i], "hwcache") == 0)
-			print_hwcache_events(&print_cb, &ps);
+			print_hwcache_events(&print_cb, ps);
 		else if (strcmp(argv[i], "pmu") == 0)
-			print_pmu_events(&print_cb, &ps);
+			print_pmu_events(&print_cb, ps);
 		else if (strcmp(argv[i], "sdt") == 0)
-			print_sdt_events(&print_cb, &ps);
+			print_sdt_events(&print_cb, ps);
 		else if (strcmp(argv[i], "metric") == 0 || strcmp(argv[i], "metrics") == 0) {
-			ps.metricgroups = false;
-			ps.metrics = true;
-			metricgroup__print(&print_cb, &ps);
+			default_ps.metricgroups = false;
+			default_ps.metrics = true;
+			metricgroup__print(&print_cb, ps);
 		} else if (strcmp(argv[i], "metricgroup") == 0 ||
 			   strcmp(argv[i], "metricgroups") == 0) {
-			ps.metricgroups = true;
-			ps.metrics = false;
-			metricgroup__print(&print_cb, &ps);
+			default_ps.metricgroups = true;
+			default_ps.metrics = false;
+			metricgroup__print(&print_cb, ps);
 		} else if ((sep = strchr(argv[i], ':')) != NULL) {
-			int sep_idx;
-			char *old_pmu_glob = ps.pmu_glob;
+			char *old_pmu_glob = default_ps.pmu_glob;
 
-			sep_idx = sep - argv[i];
-			s = strdup(argv[i]);
-			if (s == NULL) {
+			default_ps.event_glob = strdup(argv[i]);
+			if (!default_ps.event_glob) {
 				ret = -1;
 				goto out;
 			}
 
-			s[sep_idx] = '\0';
-			ps.pmu_glob = s;
-			ps.event_glob = s + sep_idx + 1;
-			print_tracepoint_events(&print_cb, &ps);
-			print_sdt_events(&print_cb, &ps);
-			ps.metrics = true;
-			ps.metricgroups = true;
-			metricgroup__print(&print_cb, &ps);
-			free(s);
-			ps.pmu_glob = old_pmu_glob;
+			print_tracepoint_events(&print_cb, ps);
+			print_sdt_events(&print_cb, ps);
+			default_ps.metrics = true;
+			default_ps.metricgroups = true;
+			metricgroup__print(&print_cb, ps);
+			zfree(&default_ps.event_glob);
+			default_ps.pmu_glob = old_pmu_glob;
 		} else {
 			if (asprintf(&s, "*%s*", argv[i]) < 0) {
 				printf("Critical: Not enough memory! Trying to continue...\n");
 				continue;
 			}
-			ps.event_glob = s;
-			print_symbol_events(&print_cb, &ps, PERF_TYPE_HARDWARE,
+			default_ps.event_glob = s;
+			print_symbol_events(&print_cb, ps, PERF_TYPE_HARDWARE,
 					event_symbols_hw, PERF_COUNT_HW_MAX);
-			print_symbol_events(&print_cb, &ps, PERF_TYPE_SOFTWARE,
+			print_symbol_events(&print_cb, ps, PERF_TYPE_SOFTWARE,
 					event_symbols_sw, PERF_COUNT_SW_MAX);
-			print_tool_events(&print_cb, &ps);
-			print_hwcache_events(&print_cb, &ps);
-			print_pmu_events(&print_cb, &ps);
-			print_tracepoint_events(&print_cb, &ps);
-			print_sdt_events(&print_cb, &ps);
-			ps.metrics = true;
-			ps.metricgroups = true;
-			metricgroup__print(&print_cb, &ps);
+			print_tool_events(&print_cb, ps);
+			print_hwcache_events(&print_cb, ps);
+			print_pmu_events(&print_cb, ps);
+			print_tracepoint_events(&print_cb, ps);
+			print_sdt_events(&print_cb, ps);
+			default_ps.metrics = true;
+			default_ps.metricgroups = true;
+			metricgroup__print(&print_cb, ps);
 			free(s);
 		}
 	}
 
 out:
-	print_cb.print_end(&ps);
-	free(ps.pmu_glob);
-	free(ps.last_topic);
-	free(ps.last_metricgroups);
-	strlist__delete(ps.visited_metrics);
+	print_cb.print_end(ps);
+	free(default_ps.pmu_glob);
+	free(default_ps.last_topic);
+	free(default_ps.last_metricgroups);
+	strlist__delete(default_ps.visited_metrics);
 	return ret;
 }
-- 
GitLab


From eadcab4c7a66e1df03d32da0db55d89fd9343fcc Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 21 Nov 2022 07:52:33 +0000
Subject: [PATCH 2046/4122] perf trace: Use macro RAW_SYSCALL_ARGS_NUM to
 replace number

This patch defines a macro RAW_SYSCALL_ARGS_NUM to replace the open
coded number '6'.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221121075237.127706-2-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 3257da5cad231..22008a31684b6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -88,6 +88,8 @@
 # define F_LINUX_SPECIFIC_BASE	1024
 #endif
 
+#define RAW_SYSCALL_ARGS_NUM	6
+
 /*
  * strtoul: Go from a string to a value, i.e. for msr: MSR_FS_BASE to 0xc0000100
  */
@@ -108,7 +110,7 @@ struct syscall_fmt {
 		const char *sys_enter,
 			   *sys_exit;
 	}	   bpf_prog_name;
-	struct syscall_arg_fmt arg[6];
+	struct syscall_arg_fmt arg[RAW_SYSCALL_ARGS_NUM];
 	u8	   nr_args;
 	bool	   errpid;
 	bool	   timeout;
@@ -1229,7 +1231,7 @@ struct syscall {
  */
 struct bpf_map_syscall_entry {
 	bool	enabled;
-	u16	string_args_len[6];
+	u16	string_args_len[RAW_SYSCALL_ARGS_NUM];
 };
 
 /*
@@ -1661,7 +1663,7 @@ static int syscall__alloc_arg_fmts(struct syscall *sc, int nr_args)
 {
 	int idx;
 
-	if (nr_args == 6 && sc->fmt && sc->fmt->nr_args != 0)
+	if (nr_args == RAW_SYSCALL_ARGS_NUM && sc->fmt && sc->fmt->nr_args != 0)
 		nr_args = sc->fmt->nr_args;
 
 	sc->arg_fmt = calloc(nr_args, sizeof(*sc->arg_fmt));
@@ -1812,7 +1814,8 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
 	}
 
-	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ? 6 : sc->tp_format->format.nr_fields))
+	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
+					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
 		return -ENOMEM;
 
 	if (IS_ERR(sc->tp_format))
-- 
GitLab


From d4223e1776c30b2ce8d0e6eaadcbf696e60fca3c Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 21 Nov 2022 07:52:34 +0000
Subject: [PATCH 2047/4122] perf trace: Return error if a system call doesn't
 exist

When a system call is not detected, the reason is either because the
system call ID is out of scope or failure to find the corresponding path
in the sysfs, trace__read_syscall_info() returns zero.  Finally, without
returning an error value it introduces confusion for the caller.

This patch lets the function trace__read_syscall_info() to return
-EEXIST when a system call doesn't exist.

Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: bpf@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221121075237.127706-3-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 22008a31684b6..bd5513b15cde8 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1796,11 +1796,11 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 #endif
 	sc = trace->syscalls.table + id;
 	if (sc->nonexistent)
-		return 0;
+		return -EEXIST;
 
 	if (name == NULL) {
 		sc->nonexistent = true;
-		return 0;
+		return -EEXIST;
 	}
 
 	sc->name = name;
-- 
GitLab


From 03e9a5d8eb552a1bf692a9c8a5ecd50f4e428006 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 21 Nov 2022 07:52:35 +0000
Subject: [PATCH 2048/4122] perf trace: Handle failure when trace point folder
 is missed

On Arm64 a case is perf tools fails to find the corresponding trace
point folder for system calls listed in the table 'syscalltbl_arm64',
e.g. the generated system call table contains "lookup_dcookie" but we
cannot find out the matched trace point folder for it.

We need to figure out if there have any issue for the generated system
call table, on the other hand, we need to handle the case when trace
point folder is missed under sysfs, this patch sets the flag
syscall::nonexistent as true and returns the error from
trace__read_syscall_info().

Another problem is for trace__syscall_info(), it returns two different
values if a system call doesn't exist: at the first time calling
trace__syscall_info() it returns NULL when the system call doesn't exist,
later if call trace__syscall_info() again for the same missed system
call, it returns pointer of syscall.  trace__syscall_info() checks the
condition 'syscalls.table[id].name == NULL', but the name will be
assigned in the first invoking even the system call is not found.

So checking system call's name in trace__syscall_info() is not the right
thing to do, this patch simply checks flag syscall::nonexistent to make
decision if a system call exists or not, finally trace__syscall_info()
returns the consistent result (NULL) if a system call doesn't existed.

Fixes: b8b1033fcaa091d8 ("perf trace: Mark syscall ids that are not allocated to avoid unnecessary error messages")
Signed-off-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: bpf@vger.kernel.org
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221121075237.127706-4-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index bd5513b15cde8..071e7598391f1 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1814,13 +1814,19 @@ static int trace__read_syscall_info(struct trace *trace, int id)
 		sc->tp_format = trace_event__tp_format("syscalls", tp_name);
 	}
 
+	/*
+	 * Fails to read trace point format via sysfs node, so the trace point
+	 * doesn't exist.  Set the 'nonexistent' flag as true.
+	 */
+	if (IS_ERR(sc->tp_format)) {
+		sc->nonexistent = true;
+		return PTR_ERR(sc->tp_format);
+	}
+
 	if (syscall__alloc_arg_fmts(sc, IS_ERR(sc->tp_format) ?
 					RAW_SYSCALL_ARGS_NUM : sc->tp_format->format.nr_fields))
 		return -ENOMEM;
 
-	if (IS_ERR(sc->tp_format))
-		return PTR_ERR(sc->tp_format);
-
 	sc->args = sc->tp_format->format.fields;
 	/*
 	 * We need to check and discard the first variable '__syscall_nr'
@@ -2137,11 +2143,8 @@ static struct syscall *trace__syscall_info(struct trace *trace,
 	    (err = trace__read_syscall_info(trace, id)) != 0)
 		goto out_cant_read;
 
-	if (trace->syscalls.table[id].name == NULL) {
-		if (trace->syscalls.table[id].nonexistent)
-			return NULL;
+	if (trace->syscalls.table && trace->syscalls.table[id].nonexistent)
 		goto out_cant_read;
-	}
 
 	return &trace->syscalls.table[id];
 
-- 
GitLab


From 9bc427a0613da358f56fe499c690d97ce5d1af26 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 21 Nov 2022 07:52:36 +0000
Subject: [PATCH 2049/4122] perf augmented_raw_syscalls: Remove unused variable
 'syscall'

The local variable 'syscall' is not used anymore, remove it.

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221121075237.127706-5-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/examples/bpf/augmented_raw_syscalls.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 7dc24c9173a75..4203f92c063bf 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -389,7 +389,6 @@ int sys_enter(struct syscall_enter_args *args)
 	 * initial, non-augmented raw_syscalls:sys_enter payload.
 	 */
 	unsigned int len = sizeof(augmented_args->args);
-	struct syscall *syscall;
 
 	if (pid_filter__has(&pids_filtered, getpid()))
 		return 0;
-- 
GitLab


From 8daf87f5922730468c98ae588573386042bc2992 Mon Sep 17 00:00:00 2001
From: Leo Yan <leo.yan@linaro.org>
Date: Mon, 21 Nov 2022 07:52:37 +0000
Subject: [PATCH 2050/4122] perf trace: Remove unused bpf map 'syscalls'

augmented_raw_syscalls.c defines the bpf map 'syscalls' which is
initialized by perf tool in user space to indicate which system calls
are enabled for tracing, on the other flip eBPF program relies on the
map to filter out the trace events which are not enabled.

The map also includes a field 'string_args_len[6]' which presents the
string length if the corresponding argument is a string type.

Now the map 'syscalls' is not used, bpf program doesn't use it as filter
anymore, this is replaced by using the function bpf_tail_call() and
PROG_ARRAY syscalls map.  And we don't need to explicitly set the string
length anymore, bpf_probe_read_str() is smart to copy the string and
return string length.

Therefore, it's safe to remove the bpf map 'syscalls'.

To consolidate the code, this patch removes the definition of map
'syscalls' from augmented_raw_syscalls.c and drops code for using
the map in the perf trace.

Note, since function trace__set_ev_qualifier_bpf_filter() is removed,
calling trace__init_syscall_bpf_progs() from it is also removed.  We
don't need to worry it because trace__init_syscall_bpf_progs() is
still invoked from trace__init_syscalls_bpf_prog_array_maps() for
initialization the system call's bpf program callback.

After:

  # perf trace -e examples/bpf/augmented_raw_syscalls.c,open* --max-events 10 perf stat --quiet sleep 0.001
  openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libelf.so.1", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libdw.so.1", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind.so.8", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libunwind-aarch64.so.8", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libcrypto.so.3", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libslang.so.2", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libperl.so.5.34", O_RDONLY|O_CLOEXEC) = 3
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3

  # perf trace -e examples/bpf/augmented_raw_syscalls.c --max-events 10 perf stat --quiet sleep 0.001
  ... [continued]: execve())             = 0
  brk(NULL)                               = 0xaaaab1d28000
  faccessat(-100, "/etc/ld.so.preload", 4) = -1 ENOENT (No such file or directory)
  openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
  close(3</usr/lib/aarch64-linux-gnu/libcrypto.so.3>) = 0
  openat(AT_FDCWD, "/lib/aarch64-linux-gnu/libm.so.6", O_RDONLY|O_CLOEXEC) = 3
  read(3</usr/lib/aarch64-linux-gnu/libcrypto.so.3>, 0xfffff33f70d0, 832) = 832
  munmap(0xffffb5519000, 28672)           = 0
  munmap(0xffffb55b7000, 32880)           = 0
  mprotect(0xffffb55a6000, 61440, PROT_NONE) = 0

Signed-off-by: Leo Yan <leo.yan@linaro.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221121075237.127706-6-leo.yan@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c                    | 101 ------------------
 .../examples/bpf/augmented_raw_syscalls.c     |  17 ---
 2 files changed, 118 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 071e7598391f1..543c379d2a57a 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -122,7 +122,6 @@ struct trace {
 	struct syscalltbl	*sctbl;
 	struct {
 		struct syscall  *table;
-		struct bpf_map  *map;
 		struct { // per syscall BPF_MAP_TYPE_PROG_ARRAY
 			struct bpf_map  *sys_enter,
 					*sys_exit;
@@ -1224,16 +1223,6 @@ struct syscall {
 	struct syscall_arg_fmt *arg_fmt;
 };
 
-/*
- * Must match what is in the BPF program:
- *
- * tools/perf/examples/bpf/augmented_raw_syscalls.c
- */
-struct bpf_map_syscall_entry {
-	bool	enabled;
-	u16	string_args_len[RAW_SYSCALL_ARGS_NUM];
-};
-
 /*
  * We need to have this 'calculated' boolean because in some cases we really
  * don't know what is the duration of a syscall, for instance, when we start
@@ -3259,7 +3248,6 @@ static void trace__set_bpf_map_filtered_pids(struct trace *trace)
 
 static void trace__set_bpf_map_syscalls(struct trace *trace)
 {
-	trace->syscalls.map = trace__find_bpf_map_by_name(trace, "syscalls");
 	trace->syscalls.prog_array.sys_enter = trace__find_bpf_map_by_name(trace, "syscalls_sys_enter");
 	trace->syscalls.prog_array.sys_exit  = trace__find_bpf_map_by_name(trace, "syscalls_sys_exit");
 }
@@ -3339,80 +3327,6 @@ static int trace__bpf_prog_sys_exit_fd(struct trace *trace, int id)
 	return sc ? bpf_program__fd(sc->bpf_prog.sys_exit) : bpf_program__fd(trace->syscalls.unaugmented_prog);
 }
 
-static void trace__init_bpf_map_syscall_args(struct trace *trace, int id, struct bpf_map_syscall_entry *entry)
-{
-	struct syscall *sc = trace__syscall_info(trace, NULL, id);
-	int arg = 0;
-
-	if (sc == NULL)
-		goto out;
-
-	for (; arg < sc->nr_args; ++arg) {
-		entry->string_args_len[arg] = 0;
-		if (sc->arg_fmt[arg].scnprintf == SCA_FILENAME) {
-			/* Should be set like strace -s strsize */
-			entry->string_args_len[arg] = PATH_MAX;
-		}
-	}
-out:
-	for (; arg < 6; ++arg)
-		entry->string_args_len[arg] = 0;
-}
-static int trace__set_ev_qualifier_bpf_filter(struct trace *trace)
-{
-	int fd = bpf_map__fd(trace->syscalls.map);
-	struct bpf_map_syscall_entry value = {
-		.enabled = !trace->not_ev_qualifier,
-	};
-	int err = 0;
-	size_t i;
-
-	for (i = 0; i < trace->ev_qualifier_ids.nr; ++i) {
-		int key = trace->ev_qualifier_ids.entries[i];
-
-		if (value.enabled) {
-			trace__init_bpf_map_syscall_args(trace, key, &value);
-			trace__init_syscall_bpf_progs(trace, key);
-		}
-
-		err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static int __trace__init_syscalls_bpf_map(struct trace *trace, bool enabled)
-{
-	int fd = bpf_map__fd(trace->syscalls.map);
-	struct bpf_map_syscall_entry value = {
-		.enabled = enabled,
-	};
-	int err = 0, key;
-
-	for (key = 0; key < trace->sctbl->syscalls.nr_entries; ++key) {
-		if (enabled)
-			trace__init_bpf_map_syscall_args(trace, key, &value);
-
-		err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
-		if (err)
-			break;
-	}
-
-	return err;
-}
-
-static int trace__init_syscalls_bpf_map(struct trace *trace)
-{
-	bool enabled = true;
-
-	if (trace->ev_qualifier_ids.nr)
-		enabled = trace->not_ev_qualifier;
-
-	return __trace__init_syscalls_bpf_map(trace, enabled);
-}
-
 static struct bpf_program *trace__find_usable_bpf_prog_entry(struct trace *trace, struct syscall *sc)
 {
 	struct tep_format_field *field, *candidate_field;
@@ -3627,16 +3541,6 @@ static void trace__set_bpf_map_syscalls(struct trace *trace __maybe_unused)
 {
 }
 
-static int trace__set_ev_qualifier_bpf_filter(struct trace *trace __maybe_unused)
-{
-	return 0;
-}
-
-static int trace__init_syscalls_bpf_map(struct trace *trace __maybe_unused)
-{
-	return 0;
-}
-
 static struct bpf_program *trace__find_bpf_program_by_title(struct trace *trace __maybe_unused,
 							    const char *name __maybe_unused)
 {
@@ -3670,8 +3574,6 @@ static bool trace__only_augmented_syscalls_evsels(struct trace *trace)
 
 static int trace__set_ev_qualifier_filter(struct trace *trace)
 {
-	if (trace->syscalls.map)
-		return trace__set_ev_qualifier_bpf_filter(trace);
 	if (trace->syscalls.events.sys_enter)
 		return trace__set_ev_qualifier_tp_filter(trace);
 	return 0;
@@ -4045,9 +3947,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 	if (err < 0)
 		goto out_error_mem;
 
-	if (trace->syscalls.map)
-		trace__init_syscalls_bpf_map(trace);
-
 	if (trace->syscalls.prog_array.sys_enter)
 		trace__init_syscalls_bpf_prog_array_maps(trace);
 
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 4203f92c063bf..9a03189d33d38 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -37,23 +37,6 @@ struct __augmented_syscalls__ {
 	__uint(max_entries, __NR_CPUS__);
 } __augmented_syscalls__ SEC(".maps");
 
-/*
- * string_args_len: one per syscall arg, 0 means not a string or don't copy it,
- * 		    PATH_MAX for copying everything, any other value to limit
- * 		    it a la 'strace -s strsize'.
- */
-struct syscall {
-	bool	enabled;
-	__u16	string_args_len[6];
-};
-
-struct syscalls {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__type(key, int);
-	__type(value, struct syscall);
-	__uint(max_entries, 512);
-} syscalls SEC(".maps");
-
 /*
  * What to augment at entry?
  *
-- 
GitLab


From 7d54a4acd8c1de3ea70d31424757dcdb7f0a231a Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Mon, 21 Nov 2022 15:57:47 +0530
Subject: [PATCH 2051/4122] perf test: Skip watchpoint tests if no watchpoints
 available

On IBM Power9, perf watchpoint tests fail since no hardware breakpoints
are available. Detect this by checking the error returned by
perf_event_open() and skip the tests in that case.

Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Ian Rogers <irogers@google.com>
Reviewed-by: Kajol Jain<kjain@linux.ibm.com>
Tested-by: Kajol Jain<kjain@linux.ibm.com>
Link: https://lore.kernel.org/r/20221121102747.208289-1-naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-perf-users@vger.kernel.org
---
 tools/perf/tests/wp.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/tools/perf/tests/wp.c b/tools/perf/tests/wp.c
index 56455da30341b..cc8719609b19e 100644
--- a/tools/perf/tests/wp.c
+++ b/tools/perf/tests/wp.c
@@ -59,8 +59,10 @@ static int __event(int wp_type, void *wp_addr, unsigned long wp_len)
 	get__perf_event_attr(&attr, wp_type, wp_addr, wp_len);
 	fd = sys_perf_event_open(&attr, 0, -1, -1,
 				 perf_event_open_cloexec_flag());
-	if (fd < 0)
+	if (fd < 0) {
+		fd = -errno;
 		pr_debug("failed opening event %x\n", attr.bp_type);
+	}
 
 	return fd;
 }
@@ -77,7 +79,7 @@ static int test__wp_ro(struct test_suite *test __maybe_unused,
 
 	fd = __event(HW_BREAKPOINT_R, (void *)&data1, sizeof(data1));
 	if (fd < 0)
-		return -1;
+		return fd == -ENODEV ? TEST_SKIP : -1;
 
 	tmp = data1;
 	WP_TEST_ASSERT_VAL(fd, "RO watchpoint", 1);
@@ -101,7 +103,7 @@ static int test__wp_wo(struct test_suite *test __maybe_unused,
 
 	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
 	if (fd < 0)
-		return -1;
+		return fd == -ENODEV ? TEST_SKIP : -1;
 
 	tmp = data1;
 	WP_TEST_ASSERT_VAL(fd, "WO watchpoint", 0);
@@ -126,7 +128,7 @@ static int test__wp_rw(struct test_suite *test __maybe_unused,
 	fd = __event(HW_BREAKPOINT_R | HW_BREAKPOINT_W, (void *)&data1,
 		     sizeof(data1));
 	if (fd < 0)
-		return -1;
+		return fd == -ENODEV ? TEST_SKIP : -1;
 
 	tmp = data1;
 	WP_TEST_ASSERT_VAL(fd, "RW watchpoint", 1);
@@ -150,7 +152,7 @@ static int test__wp_modify(struct test_suite *test __maybe_unused, int subtest _
 
 	fd = __event(HW_BREAKPOINT_W, (void *)&data1, sizeof(data1));
 	if (fd < 0)
-		return -1;
+		return fd == -ENODEV ? TEST_SKIP : -1;
 
 	data1 = tmp;
 	WP_TEST_ASSERT_VAL(fd, "Modify watchpoint", 1);
-- 
GitLab


From 19030564ab116757e3270a567fd9d5b20b411d74 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 18 Nov 2022 16:27:50 -0800
Subject: [PATCH 2052/4122] perf inject: Set PERF_RECORD_MISC_BUILD_ID_SIZE

With perf inject -b, it synthesizes build-id event for DSOs.  But it
missed to set the size and resulted in having trailing zeros.

As perf record sets the size in write_build_id(), let's set the size
here as well.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221119002750.1568027-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/synthetic-events.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index cccd293b53124..0645795ff0806 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -2218,8 +2218,9 @@ int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16
 	len = pos->long_name_len + 1;
 	len = PERF_ALIGN(len, NAME_ALIGN);
 	memcpy(&ev.build_id.build_id, pos->bid.data, sizeof(pos->bid.data));
+	ev.build_id.size = pos->bid.size;
 	ev.build_id.header.type = PERF_RECORD_HEADER_BUILD_ID;
-	ev.build_id.header.misc = misc;
+	ev.build_id.header.misc = misc | PERF_RECORD_MISC_BUILD_ID_SIZE;
 	ev.build_id.pid = machine->pid;
 	ev.build_id.header.size = sizeof(ev.build_id) + len;
 	memcpy(&ev.build_id.filename, pos->long_name, pos->long_name_len);
-- 
GitLab


From 2e9f5bda2f036aa38312fbeb99ff7e19c0221578 Mon Sep 17 00:00:00 2001
From: Michael Petlan <mpetlan@redhat.com>
Date: Tue, 22 Nov 2022 09:31:21 +0100
Subject: [PATCH 2053/4122] perf test: Fix record test on KVM guests

Using precise flag with br_inst_retired.near_call causes the test fail
on KVM guests, even when the guests have PMU forwarding enabled and the
event itself is supported.

Remove the precise flag in order to make the test work on KVM guests.

Signed-off-by: Michael Petlan <mpetlan@redhat.com>
Acked-by: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20221122083121.6012-1-mpetlan@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/record.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh
index 4dff89e3a3fd5..4fbc74805d526 100755
--- a/tools/perf/tests/shell/record.sh
+++ b/tools/perf/tests/shell/record.sh
@@ -83,7 +83,7 @@ test_register_capture() {
     echo "Register capture test [Skipped missing registers]"
     return
   fi
-  if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \
+  if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call \
     -c 1000 --per-thread ${testprog} 2> /dev/null \
     | perf script -F ip,sym,iregs -i - 2> /dev/null \
     | grep -q "DI:"
-- 
GitLab


From 7b7c22ccdf275018f715af95b7d052e8d4c6c690 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 21 Nov 2022 11:30:18 +0000
Subject: [PATCH 2054/4122] MAINTAINERS: Update John Garry's email address for
 arm64 perf tooling

Update my address.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Acked-by: Will Deacon <will@kernel.org>
Cc: Ian Rogers <irogers@google.com>
Link: https://lore.kernel.org/r/20221121113018.1899426-1-john.g.garry@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2585e7edc3358..170f1763557a2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16200,7 +16200,7 @@ F:	tools/lib/perf/
 F:	tools/perf/
 
 PERFORMANCE EVENTS TOOLING ARM64
-R:	John Garry <john.garry@huawei.com>
+R:	John Garry <john.g.garry@oracle.com>
 R:	Will Deacon <will@kernel.org>
 R:	James Clark <james.clark@arm.com>
 R:	Mike Leach <mike.leach@linaro.org>
-- 
GitLab


From c66a36af7ba3a628453da1d91f42ee64fb36ea5a Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 18 Nov 2022 11:01:09 -0800
Subject: [PATCH 2055/4122] perf lock contention: Do not use BPF task local
 storage

It caused some troubles when a lock inside kmalloc is contended
because task local storage would allocate memory using kmalloc.
It'd create a recusion and even crash in my system.

There could be a couple of workarounds but I think the simplest
one is to use a pre-allocated hash map.  We could fix the task
local storage to use the safe BPF allocator, but it takes time
so let's change this until it happens actually.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Chris Li <chriscli@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221118190109.1512674-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_lock_contention.c         |  1 +
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 34 ++++++++++++-------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 0deec1178778e..4db9ad3d50c41 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -39,6 +39,7 @@ int lock_contention_prepare(struct lock_contention *con)
 	bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
 	bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
 	bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
+	bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
 
 	if (target__has_cpu(target))
 		ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 1bb8628e7c9f0..9681cb59b0df4 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -40,10 +40,10 @@ struct {
 
 /* maintain timestamp at the beginning of contention */
 struct {
-	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
-	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(type, BPF_MAP_TYPE_HASH);
 	__type(key, int);
 	__type(value, struct tstamp_data);
+	__uint(max_entries, MAX_ENTRIES);
 } tstamp SEC(".maps");
 
 /* actual lock contention statistics */
@@ -103,18 +103,28 @@ static inline int can_record(void)
 SEC("tp_btf/contention_begin")
 int contention_begin(u64 *ctx)
 {
-	struct task_struct *curr;
+	__u32 pid;
 	struct tstamp_data *pelem;
 
 	if (!enabled || !can_record())
 		return 0;
 
-	curr = bpf_get_current_task_btf();
-	pelem = bpf_task_storage_get(&tstamp, curr, NULL,
-				     BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (!pelem || pelem->lock)
+	pid = bpf_get_current_pid_tgid();
+	pelem = bpf_map_lookup_elem(&tstamp, &pid);
+	if (pelem && pelem->lock)
 		return 0;
 
+	if (pelem == NULL) {
+		struct tstamp_data zero = {};
+
+		bpf_map_update_elem(&tstamp, &pid, &zero, BPF_ANY);
+		pelem = bpf_map_lookup_elem(&tstamp, &pid);
+		if (pelem == NULL) {
+			lost++;
+			return 0;
+		}
+	}
+
 	pelem->timestamp = bpf_ktime_get_ns();
 	pelem->lock = (__u64)ctx[0];
 	pelem->flags = (__u32)ctx[1];
@@ -128,7 +138,7 @@ int contention_begin(u64 *ctx)
 SEC("tp_btf/contention_end")
 int contention_end(u64 *ctx)
 {
-	struct task_struct *curr;
+	__u32 pid;
 	struct tstamp_data *pelem;
 	struct contention_key key;
 	struct contention_data *data;
@@ -137,8 +147,8 @@ int contention_end(u64 *ctx)
 	if (!enabled)
 		return 0;
 
-	curr = bpf_get_current_task_btf();
-	pelem = bpf_task_storage_get(&tstamp, curr, NULL, 0);
+	pid = bpf_get_current_pid_tgid();
+	pelem = bpf_map_lookup_elem(&tstamp, &pid);
 	if (!pelem || pelem->lock != ctx[0])
 		return 0;
 
@@ -156,7 +166,7 @@ int contention_end(u64 *ctx)
 		};
 
 		bpf_map_update_elem(&lock_stat, &key, &first, BPF_NOEXIST);
-		pelem->lock = 0;
+		bpf_map_delete_elem(&tstamp, &pid);
 		return 0;
 	}
 
@@ -169,7 +179,7 @@ int contention_end(u64 *ctx)
 	if (data->min_time > duration)
 		data->min_time = duration;
 
-	pelem->lock = 0;
+	bpf_map_delete_elem(&tstamp, &pid);
 	return 0;
 }
 
-- 
GitLab


From 8749c27895a369a99e4a21709b3e3bec4785778f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 23 Sep 2022 22:39:13 +0800
Subject: [PATCH 2056/4122] habanalabs: fix return value check in
 hl_fw_get_sec_attest_data()

If hl_cpu_accessible_dma_pool_alloc() fails, we should check
'req_cpu_addr', fix it.

Fixes: 0c88760f8f5e ("habanalabs/gaudi2: add secured attestation info uapi")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 2de6a9bd564de..f18e53bbba6bb 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -2983,7 +2983,7 @@ static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void
 	int rc;
 
 	req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
-	if (!data) {
+	if (!req_cpu_addr) {
 		dev_err(hdev->dev,
 			"Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
 		return -ENOMEM;
-- 
GitLab


From a925d90b365aa38565191857bddc3c12d80fda96 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 23 Aug 2022 15:14:14 +0300
Subject: [PATCH 2057/4122] habanalabs: allow control device open during reset

Monitoring apps would like to query device state at any time so we
should allow it also during reset because it doesn't involve
accessing the h/w.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       | 22 +++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   |  2 ++
 .../misc/habanalabs/common/habanalabs_drv.c   |  4 ++--
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 233d8b46c831f..1aaaa2004e345 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -355,6 +355,28 @@ bool hl_device_operational(struct hl_device *hdev,
 	}
 }
 
+bool hl_ctrl_device_operational(struct hl_device *hdev,
+		enum hl_device_status *status)
+{
+	enum hl_device_status current_status;
+
+	current_status = hl_device_status(hdev);
+	if (status)
+		*status = current_status;
+
+	switch (current_status) {
+	case HL_DEVICE_STATUS_MALFUNCTION:
+		return false;
+	case HL_DEVICE_STATUS_IN_RESET:
+	case HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE:
+	case HL_DEVICE_STATUS_NEEDS_RESET:
+	case HL_DEVICE_STATUS_OPERATIONAL:
+	case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
+	default:
+		return true;
+	}
+}
+
 static void hpriv_release(struct kref *ref)
 {
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 58c95b13be69a..2ffb8378f5657 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3496,6 +3496,8 @@ int hl_device_open(struct inode *inode, struct file *filp);
 int hl_device_open_ctrl(struct inode *inode, struct file *filp);
 bool hl_device_operational(struct hl_device *hdev,
 		enum hl_device_status *status);
+bool hl_ctrl_device_operational(struct hl_device *hdev,
+		enum hl_device_status *status);
 enum hl_device_status hl_device_status(struct hl_device *hdev);
 int hl_device_set_debug_mode(struct hl_device *hdev, struct hl_ctx *ctx, bool enable);
 int hl_hw_queues_create(struct hl_device *hdev);
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 112632afe7d53..3ee44ea58d5cc 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -270,9 +270,9 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
 
 	mutex_lock(&hdev->fpriv_ctrl_list_lock);
 
-	if (!hl_device_operational(hdev, NULL)) {
+	if (!hl_ctrl_device_operational(hdev, NULL)) {
 		dev_dbg_ratelimited(hdev->dev_ctrl,
-			"Can't open %s because it is disabled or in reset\n",
+			"Can't open %s because it is disabled\n",
 			dev_name(hdev->dev_ctrl));
 		rc = -EPERM;
 		goto out_err;
-- 
GitLab


From ea73ef14ddf93b8b1ae6ce1963846f43a81bb510 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 22 Sep 2022 12:30:32 +0300
Subject: [PATCH 2058/4122] habanalabs: Use simplified API for p2p dist calc

Use the simplified API that calculates distance between two devices.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index ef28f3b37b932..99b1d6ce26aef 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -1689,7 +1689,7 @@ static int hl_dmabuf_attach(struct dma_buf *dmabuf,
 	hl_dmabuf = dmabuf->priv;
 	hdev = hl_dmabuf->ctx->hdev;
 
-	rc = pci_p2pdma_distance_many(hdev->pdev, &attachment->dev, 1, true);
+	rc = pci_p2pdma_distance(hdev->pdev, attachment->dev, true);
 
 	if (rc < 0)
 		attachment->peer2peer = false;
-- 
GitLab


From 52d5e5469526216bcc418f26a2796d5af6226023 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 19 Sep 2022 18:51:59 +0300
Subject: [PATCH 2059/4122] habanalabs: refactor razwi event notification

This event notification was compatible only with gaudi, where razwi
and page fault happens together.

To make it compatible with all ASICs, this refactor contains:

1. Razwi notification will only notify about razwi info.
   New notification will be added in future patch, to retrieve data
   about page fault error.

2. Changed razwi info structure to support all ASICs.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       | 22 +++++++
 drivers/misc/habanalabs/common/habanalabs.h   | 31 ++-------
 .../misc/habanalabs/common/habanalabs_drv.c   |  2 +-
 .../misc/habanalabs/common/habanalabs_ioctl.c | 12 +---
 drivers/misc/habanalabs/gaudi/gaudi.c         | 64 +++++++++----------
 include/uapi/misc/habanalabs.h                | 45 ++++++++-----
 6 files changed, 90 insertions(+), 86 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 1aaaa2004e345..30ddaaae67e51 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2253,3 +2253,25 @@ inline void hl_wreg(struct hl_device *hdev, u32 reg, u32 val)
 {
 	writel(val, hdev->rmmio + reg);
 }
+
+void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
+			u8 flags)
+{
+	if (num_of_engines > HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR) {
+		dev_err(hdev->dev,
+				"Number of possible razwi initiators (%u) exceeded limit (%u)\n",
+				num_of_engines, HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR);
+		return;
+	}
+
+	/* In case it's the first razwi since the device was opened, capture its parameters */
+	if (atomic_cmpxchg(&hdev->captured_err_info.razwi_info_recorded, 0, 1))
+		return;
+
+	hdev->captured_err_info.razwi.timestamp = ktime_to_ns(ktime_get());
+	hdev->captured_err_info.razwi.addr = addr;
+	hdev->captured_err_info.razwi.num_of_possible_engines = num_of_engines;
+	memcpy(&hdev->captured_err_info.razwi.engine_id[0], &engine_id[0],
+			num_of_engines * sizeof(u16));
+	hdev->captured_err_info.razwi.flags = flags;
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 2ffb8378f5657..cdc50c2c4de8a 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2925,30 +2925,6 @@ struct cs_timeout_info {
 	u64		seq;
 };
 
-/**
- * struct razwi_info - info about last razwi error occurred.
- * @timestamp: razwi timestamp.
- * @write_enable: if set writing to razwi parameters in the structure is enabled.
- *                otherwise - disabled, so the first (root cause) razwi will not be overwritten.
- * @addr: address that caused razwi.
- * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does
- *               not have engine id it will be set to U16_MAX.
- * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
- *               engines which one them caused the razwi. In that case, it will contain the
- *               second possible engine id, otherwise it will be set to U16_MAX.
- * @non_engine_initiator: in case the initiator of the razwi does not have engine id.
- * @type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
- */
-struct razwi_info {
-	ktime_t		timestamp;
-	atomic_t	write_enable;
-	u64		addr;
-	u16		engine_id_1;
-	u16		engine_id_2;
-	u8		non_engine_initiator;
-	u8		type;
-};
-
 #define MAX_QMAN_STREAMS_INFO		4
 #define OPCODE_INFO_MAX_ADDR_SIZE	8
 /**
@@ -2985,11 +2961,14 @@ struct undefined_opcode_info {
  * struct hl_error_info - holds information collected during an error.
  * @cs_timeout: CS timeout error information.
  * @razwi: razwi information.
+ * @razwi_info_recorded: if set writing to razwi information is enabled.
+ *                otherwise - disabled, so the first (root cause) razwi will not be overwritten.
  * @undef_opcode: undefined opcode information
  */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
-	struct razwi_info		razwi;
+	struct hl_info_razwi_event	razwi;
+	atomic_t			razwi_info_recorded;
 	struct undefined_opcode_info	undef_opcode;
 };
 
@@ -3800,6 +3779,8 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
 		      struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
 		      void *args);
 __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
+void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
+			u8 flags);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 3ee44ea58d5cc..d87434b9bc164 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -212,7 +212,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 	hl_debugfs_add_file(hpriv);
 
 	atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
-	atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
+	atomic_set(&hdev->captured_err_info.razwi_info_recorded, 0);
 	hdev->captured_err_info.undef_opcode.write_enable = true;
 
 	hdev->open_counter++;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 43afe40966e50..6aef4e24d122d 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -603,20 +603,14 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 {
 	struct hl_device *hdev = hpriv->hdev;
 	u32 max_size = args->return_size;
-	struct hl_info_razwi_event info = {0};
+	struct hl_info_razwi_event *info = &hdev->captured_err_info.razwi;
 	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
 
 	if ((!max_size) || (!out))
 		return -EINVAL;
 
-	info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
-	info.addr = hdev->captured_err_info.razwi.addr;
-	info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
-	info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
-	info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
-	info.error_type = hdev->captured_err_info.razwi.type;
-
-	return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
+	return copy_to_user(out, info, min_t(size_t, max_size, sizeof(struct hl_info_razwi_event)))
+				? -EFAULT : 0;
 }
 
 static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 92560414e8433..f856ac51fde1c 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6505,8 +6505,8 @@ event_not_supported:
 }
 
 static const char *gaudi_get_razwi_initiator_dma_name(struct hl_device *hdev, u32 x_y,
-							bool is_write, s32 *engine_id_1,
-							s32 *engine_id_2)
+							bool is_write, u16 *engine_id_1,
+							u16 *engine_id_2)
 {
 	u32 dma_id[2], dma_offset, err_cause[2], mask, i;
 
@@ -6603,7 +6603,7 @@ unknown_initiator:
 }
 
 static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool is_write,
-							u32 *engine_id_1, u32 *engine_id_2)
+							u16 *engine_id_1, u16 *engine_id_2)
 {
 	u32 val, x_y, axi_id;
 
@@ -6719,8 +6719,8 @@ static const char *gaudi_get_razwi_initiator_name(struct hl_device *hdev, bool i
 	return "unknown initiator";
 }
 
-static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_id_1,
-						u32 *engine_id_2)
+static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_id_1,
+						u16 *engine_id_2, bool *is_read, bool *is_write)
 {
 
 	if (RREG32(mmMMU_UP_RAZWI_WRITE_VLD)) {
@@ -6728,6 +6728,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_i
 			"RAZWI event caused by illegal write of %s\n",
 			gaudi_get_razwi_initiator_name(hdev, true, engine_id_1, engine_id_2));
 		WREG32(mmMMU_UP_RAZWI_WRITE_VLD, 0);
+		*is_write = true;
 	}
 
 	if (RREG32(mmMMU_UP_RAZWI_READ_VLD)) {
@@ -6735,10 +6736,11 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u32 *engine_i
 			"RAZWI event caused by illegal read of %s\n",
 			gaudi_get_razwi_initiator_name(hdev, false, engine_id_1, engine_id_2));
 		WREG32(mmMMU_UP_RAZWI_READ_VLD, 0);
+		*is_read = true;
 	}
 }
 
-static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u8 *type)
+static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 val;
@@ -6753,8 +6755,6 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
 
 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
-		*type = HL_RAZWI_PAGE_FAULT;
-
 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
 	}
 
@@ -6765,7 +6765,6 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
 		*addr |= RREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE_VA);
 
 		dev_err_ratelimited(hdev->dev, "MMU access error on va 0x%llx\n", *addr);
-		*type = HL_RAZWI_MMU_ACCESS_ERROR;
 
 		WREG32(mmMMU_UP_ACCESS_ERROR_CAPTURE, 0);
 	}
@@ -7302,46 +7301,41 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 					bool razwi)
 {
-	u32 engine_id_1, engine_id_2;
+	bool is_read = false, is_write = false;
+	u16 engine_id[2], num_of_razwi_eng = 0;
 	char desc[64] = "";
 	u64 razwi_addr = 0;
-	u8 razwi_type;
-	int rc;
+	u8 razwi_flags = 0;
 
 	/*
 	 * Init engine id by default as not valid and only if razwi initiated from engine with
 	 * engine id it will get valid value.
-	 * Init razwi type to default, will be changed only if razwi caused by page fault of
-	 * MMU access error
 	 */
-	engine_id_1 = U16_MAX;
-	engine_id_2 = U16_MAX;
-	razwi_type = U8_MAX;
+	engine_id[0] = HL_RAZWI_NA_ENG_ID;
+	engine_id[1] = HL_RAZWI_NA_ENG_ID;
 
 	gaudi_get_event_desc(event_type, desc, sizeof(desc));
 	dev_err_ratelimited(hdev->dev, "Received H/W interrupt %d [\"%s\"]\n",
 		event_type, desc);
 
 	if (razwi) {
-		gaudi_print_and_get_razwi_info(hdev, &engine_id_1, &engine_id_2);
-		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type);
-
-		/* In case it's the first razwi, save its parameters*/
-		rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0);
-		if (rc) {
-			hdev->captured_err_info.razwi.timestamp = ktime_get();
-			hdev->captured_err_info.razwi.addr = razwi_addr;
-			hdev->captured_err_info.razwi.engine_id_1 = engine_id_1;
-			hdev->captured_err_info.razwi.engine_id_2 = engine_id_2;
-			/*
-			 * If first engine id holds non valid value the razwi initiator
-			 * does not have engine id
-			 */
-			hdev->captured_err_info.razwi.non_engine_initiator =
-									(engine_id_1 == U16_MAX);
-			hdev->captured_err_info.razwi.type = razwi_type;
-
+		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
+						&is_write);
+		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr);
+
+		if (is_read)
+			razwi_flags |= HL_RAZWI_READ;
+		if (is_write)
+			razwi_flags |= HL_RAZWI_WRITE;
+
+		if (engine_id[0] != HL_RAZWI_NA_ENG_ID) {
+			if (engine_id[1] != HL_RAZWI_NA_ENG_ID)
+				num_of_razwi_eng = 2;
+			else
+				num_of_razwi_eng = 1;
 		}
+
+		hl_capture_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags);
 	}
 }
 
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index e00ebe05097d7..d6f84cb35e3d8 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -1071,31 +1071,44 @@ struct hl_info_cs_timeout_event {
 	__u64 seq;
 };
 
-#define HL_RAZWI_PAGE_FAULT 0
-#define HL_RAZWI_MMU_ACCESS_ERROR 1
+#define HL_RAZWI_NA_ENG_ID U16_MAX
+#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128
+#define HL_RAZWI_READ		BIT(0)
+#define HL_RAZWI_WRITE		BIT(1)
+#define HL_RAZWI_LBW		BIT(2)
+#define HL_RAZWI_HBW		BIT(3)
+#define HL_RAZWI_RR		BIT(4)
+#define HL_RAZWI_ADDR_DEC	BIT(5)
 
 /**
  * struct hl_info_razwi_event - razwi information.
  * @timestamp: timestamp of razwi.
  * @addr: address which accessing it caused razwi.
- * @engine_id_1: engine id of the razwi initiator, if it was initiated by engine that does not
- *               have engine id it will be set to U16_MAX.
- * @engine_id_2: second engine id of razwi initiator. Might happen that razwi have 2 possible
- *               engines which one them caused the razwi. In that case, it will contain the
- *               second possible engine id, otherwise it will be set to U16_MAX.
- * @no_engine_id: if razwi initiator does not have engine id, this field will be set to 1,
- *                otherwise 0.
- * @error_type: cause of razwi, page fault or access error, otherwise it will be set to U8_MAX.
- * @pad: padding to 64 bit.
+ * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not
+ *             have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible
+ *             engines which caused the razwi, it will hold all of them.
+ * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication
+ *                           might be common for several engines and there is no way to get the
+ *                           exact engine. In this way, engine_id array will be filled with all
+ *                           possible engines caused this razwi. Also, there might be possibility
+ *                           in gaudi, where we don't indication on specific engine, in that case
+ *                           the value of this parameter will be zero.
+ * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation
+ *                                      HL_RAZWI_WRITE - razwi caused by write operation
+ *                                      HL_RAZWI_LBW - razwi caused by lbw fabric transaction
+ *                                      HL_RAZWI_HBW - razwi caused by hbw fabric transaction
+ *                                      HL_RAZWI_RR - razwi caused by range register
+ *                                      HL_RAZWI_ADDR_DEC - razwi caused by address decode error
+ *         Note: this data is not supported by all asics, in that case the relevant bits will not
+ *               be set.
  */
 struct hl_info_razwi_event {
 	__s64 timestamp;
 	__u64 addr;
-	__u16 engine_id_1;
-	__u16 engine_id_2;
-	__u8 no_engine_id;
-	__u8 error_type;
-	__u8 pad[2];
+	__u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR];
+	__u16 num_of_possible_engines;
+	__u8 flags;
+	__u8 pad[5];
 };
 
 #define MAX_QMAN_STREAMS_INFO		4
-- 
GitLab


From 0502df9bbea0bd0e77c4a283e4cc34801038899a Mon Sep 17 00:00:00 2001
From: Bharat Jauhari <bjauhari@habana.ai>
Date: Tue, 27 Sep 2022 14:38:38 +0300
Subject: [PATCH 2060/4122] habanalabs: use lower_32_bits()

This fixes sparse warning on doing cast to 32-bits

Signed-off-by: Bharat Jauhari <bjauhari@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index cdc50c2c4de8a..f4b3fa4b09767 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2528,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 				break; \
 			(val) = __elbi_read; \
 		} else {\
-			(val) = RREG32((u32)(addr)); \
+			(val) = RREG32(lower_32_bits(addr)); \
 		} \
 		if (cond) \
 			break; \
@@ -2539,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 					break; \
 				(val) = __elbi_read; \
 			} else {\
-				(val) = RREG32((u32)(addr)); \
+				(val) = RREG32(lower_32_bits(addr)); \
 			} \
 			break; \
 		} \
@@ -2594,7 +2594,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 				if (__rc) \
 					break; \
 			} else { \
-				__read_val = RREG32((u32)(addr_arr)[__arr_idx]); \
+				__read_val = RREG32(lower_32_bits(addr_arr[__arr_idx])); \
 			} \
 			if (__read_val == (expected_val))	\
 				__elem_bitmask &= ~BIT_ULL(__arr_idx);	\
-- 
GitLab


From 6d1c567f2ac66391edf5423247f27c82f6b82d86 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 22 Sep 2022 15:25:46 +0300
Subject: [PATCH 2061/4122] habanalabs/gaudi2: fix module ID for RAZWI handling

RAZWI is optionally handled as part of the generic QM SEI error
handling, but it always uses PDMA as the module ID.
Fix it to use the suitable module ID according to the specific event.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 65e6cae6100a4..b3685978f6aea 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -7602,6 +7602,7 @@ static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
 					struct hl_eq_razwi_info *razwi_info)
 {
+	enum razwi_event_sources module;
 	u64 qman_base;
 	u8 index;
 
@@ -7611,9 +7612,11 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
 		qman_base = mmDCORE0_TPC0_QM_BASE +
 				(index / NUM_OF_TPC_PER_DCORE) * DCORE_OFFSET +
 				(index % NUM_OF_TPC_PER_DCORE) * DCORE_TPC_OFFSET;
+		module = RAZWI_TPC;
 		break;
 	case GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
 		qman_base = mmDCORE0_TPC6_QM_BASE;
+		module = RAZWI_TPC;
 		break;
 	case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE:
 	case GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE:
@@ -7623,16 +7626,19 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
 		qman_base = mmDCORE0_MME_QM_BASE + index * DCORE_OFFSET;
+		module = RAZWI_MME;
 		break;
 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
 		index = event_type - GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP;
 		qman_base = mmPDMA0_QM_BASE + index * PDMA_OFFSET;
+		module = RAZWI_PDMA;
 		break;
 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
 		qman_base = mmROT0_QM_BASE + index * ROT_OFFSET;
+		module = RAZWI_ROT;
 		break;
 	default:
 		return;
@@ -7647,7 +7653,7 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
 
 	/* check if RAZWI happened */
 	if (razwi_info)
-		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, 0, 0, razwi_info);
+		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info);
 }
 
 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
-- 
GitLab


From dd600db47ba60c3c69d4d24c73c43133c6040118 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Sun, 18 Sep 2022 21:37:31 +0300
Subject: [PATCH 2062/4122] habanalabs: add page fault info uapi

Only the first page fault will be saved.
Besides the address which caused the page fault, the driver captures
all of the mmu user mappings.
User can retrieve this data via the new uapi (new opcode in INFO ioctl).

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       | 58 +++++++++++++++++++
 drivers/misc/habanalabs/common/habanalabs.h   | 22 ++++++-
 .../misc/habanalabs/common/habanalabs_drv.c   |  1 +
 .../misc/habanalabs/common/habanalabs_ioctl.c | 42 ++++++++++++++
 drivers/misc/habanalabs/gaudi/gaudi.c         |  2 +
 include/uapi/misc/habanalabs.h                | 31 ++++++++++
 6 files changed, 155 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 30ddaaae67e51..5dc6c77b47211 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -12,6 +12,7 @@
 
 #include <linux/pci.h>
 #include <linux/hwmon.h>
+#include <linux/vmalloc.h>
 
 #include <trace/events/habanalabs.h>
 
@@ -2199,6 +2200,8 @@ void hl_device_fini(struct hl_device *hdev)
 
 	hl_mmu_fini(hdev);
 
+	vfree(hdev->captured_err_info.pgf_info.user_mappings);
+
 	hl_eq_fini(hdev, &hdev->event_queue);
 
 	kfree(hdev->shadow_cs_queue);
@@ -2275,3 +2278,58 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
 			num_of_engines * sizeof(u16));
 	hdev->captured_err_info.razwi.flags = flags;
 }
+static void hl_capture_user_mappings(struct hl_device *hdev)
+{
+	struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info;
+	struct hl_vm_hash_node *hnode;
+	struct hl_userptr *userptr;
+	struct hl_ctx *ctx;
+	u32 map_idx = 0;
+	int i;
+
+	ctx = hl_get_compute_ctx(hdev);
+	if (!ctx) {
+		dev_err(hdev->dev, "Can't get user context for user mappings\n");
+		return;
+	}
+
+	mutex_lock(&ctx->mem_hash_lock);
+	hash_for_each(ctx->mem_hash, i, hnode, node)
+	pgf_info->num_of_user_mappings++;
+
+	if (!pgf_info->num_of_user_mappings)
+		goto finish;
+
+	/* In case we already allocated in previous session, need to release it before
+	 * allocating new buffer.
+	 */
+	vfree(pgf_info->user_mappings);
+	pgf_info->user_mappings =
+			vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
+	if (!pgf_info->user_mappings) {
+		pgf_info->num_of_user_mappings = 0;
+		goto finish;
+	}
+
+	hash_for_each(ctx->mem_hash, i, hnode, node) {
+		userptr = hnode->ptr;
+		pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+		pgf_info->user_mappings[map_idx].size = userptr->size;
+		map_idx++;
+	}
+finish:
+	mutex_unlock(&ctx->mem_hash_lock);
+	hl_ctx_put(ctx);
+}
+
+void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu)
+{
+	/* Capture only the first page fault */
+	if (atomic_cmpxchg(&hdev->captured_err_info.pgf_info_recorded, 0, 1))
+		return;
+
+	hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get());
+	hdev->captured_err_info.pgf_info.pgf.addr = addr;
+	hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
+	hl_capture_user_mappings(hdev);
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index f4b3fa4b09767..1489240d5a3ad 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2957,19 +2957,38 @@ struct undefined_opcode_info {
 	bool write_enable;
 };
 
+/**
+ * struct page_fault_info - info about page fault
+ * @pgf_info: page fault information.
+ * @user_mappings: buffer containing user mappings.
+ * @num_of_user_mappings: number of user mappings.
+ */
+struct page_fault_info {
+	struct hl_page_fault_info	pgf;
+	struct hl_user_mapping		*user_mappings;
+	u64				num_of_user_mappings;
+};
+
 /**
  * struct hl_error_info - holds information collected during an error.
  * @cs_timeout: CS timeout error information.
  * @razwi: razwi information.
  * @razwi_info_recorded: if set writing to razwi information is enabled.
- *                otherwise - disabled, so the first (root cause) razwi will not be overwritten.
+ *                       otherwise - disabled, so the first (root cause) razwi will not be
+ *                       overwritten.
  * @undef_opcode: undefined opcode information
+ * @pgf_info: page fault information.
+ * @pgf_info_recorded: if set writing to page fault information is enabled.
+ *                     otherwise - disabled, so the first (root cause) page fault will not be
+ *                     overwritten.
  */
 struct hl_error_info {
 	struct cs_timeout_info		cs_timeout;
 	struct hl_info_razwi_event	razwi;
 	atomic_t			razwi_info_recorded;
 	struct undefined_opcode_info	undef_opcode;
+	struct page_fault_info		pgf_info;
+	atomic_t			pgf_info_recorded;
 };
 
 /**
@@ -3781,6 +3800,7 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
 __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
 			u8 flags);
+void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index d87434b9bc164..714994725224f 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -213,6 +213,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 	atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
 	atomic_set(&hdev->captured_err_info.razwi_info_recorded, 0);
+	atomic_set(&hdev->captured_err_info.pgf_info_recorded, 0);
 	hdev->captured_err_info.undef_opcode.write_enable = true;
 
 	hdev->open_counter++;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 6aef4e24d122d..cac2c7fb14f1b 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -778,6 +778,42 @@ static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
 	return rc;
 }
 
+static int page_fault_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	struct hl_device *hdev = hpriv->hdev;
+	u32 max_size = args->return_size;
+	struct hl_page_fault_info *info = &hdev->captured_err_info.pgf_info.pgf;
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+
+	if ((!max_size) || (!out))
+		return -EINVAL;
+
+	return copy_to_user(out, info, min_t(size_t, max_size, sizeof(struct hl_page_fault_info)))
+				? -EFAULT : 0;
+}
+
+static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+	void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+	u32 user_buf_size = args->return_size;
+	struct hl_device *hdev = hpriv->hdev;
+	struct page_fault_info *pgf_info;
+	u64 actual_size;
+
+	pgf_info = &hdev->captured_err_info.pgf_info;
+	args->array_size = pgf_info->num_of_user_mappings;
+
+	if (!out)
+		return -EINVAL;
+
+	actual_size = pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping);
+	if (user_buf_size < actual_size)
+		return -ENOMEM;
+
+	return copy_to_user(out, pgf_info->user_mappings, min_t(size_t, user_buf_size, actual_size))
+				? -EFAULT : 0;
+}
+
 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 				struct device *dev)
 {
@@ -837,6 +873,12 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_GET_EVENTS:
 		return events_info(hpriv, args);
 
+	case HL_INFO_PAGE_FAULT_EVENT:
+		return page_fault_info(hpriv, args);
+
+	case HL_INFO_USER_MAPPINGS:
+		return user_mappings_info(hpriv, args);
+
 	default:
 		break;
 	}
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index f856ac51fde1c..1a99f7be8b609 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6755,6 +6755,8 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
 
 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
+		hl_capture_page_fault(hdev, *addr, 0, true);
+
 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
 	}
 
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index d6f84cb35e3d8..2b794f54e2ed3 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -778,6 +778,9 @@ enum hl_server_type {
  * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd
  * HL_INFO_GET_EVENTS         - Retrieve the last occurred events
  * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information.
+ * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic.
+ * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault.
+ * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event.
  */
 #define HL_INFO_HW_IP_INFO			0
 #define HL_INFO_HW_EVENTS			1
@@ -809,6 +812,8 @@ enum hl_server_type {
 #define HL_INFO_GET_EVENTS			30
 #define HL_INFO_UNDEFINED_OPCODE_EVENT		31
 #define HL_INFO_ENGINE_STATUS			32
+#define HL_INFO_PAGE_FAULT_EVENT		33
+#define HL_INFO_USER_MAPPINGS			34
 
 #define HL_INFO_VERSION_MAX_LEN			128
 #define HL_INFO_CARD_NAME_MAX_LEN		16
@@ -1187,6 +1192,29 @@ struct hl_info_sec_attest {
 	__u8 pad0[2];
 };
 
+/**
+ * struct hl_page_fault_info - page fault information.
+ * @timestamp: timestamp of page fault.
+ * @addr: address which accessing it caused page fault.
+ * @engine_id: engine id which caused the page fault, supported only in gaudi3.
+ */
+struct hl_page_fault_info {
+	__s64 timestamp;
+	__u64 addr;
+	__u16 engine_id;
+	__u8 pad[6];
+};
+
+/**
+ * struct hl_user_mapping - user mapping information.
+ * @dev_va: device virtual address.
+ * @size: virtual address mapping size.
+ */
+struct hl_user_mapping {
+	__u64 dev_va;
+	__u64 size;
+};
+
 enum gaudi_dcores {
 	HL_GAUDI_WS_DCORE,
 	HL_GAUDI_WN_DCORE,
@@ -1213,6 +1241,8 @@ enum gaudi_dcores {
  *                           needed, hence updating this variable so user will know the exact amount
  *                           of bytes copied by the kernel to the buffer.
  * @sec_attest_nonce: Nonce number used for attestation report.
+ * @array_size: Number of array members copied to user buffer.
+ *              Relevant for HL_INFO_USER_MAPPINGS info ioctl.
  * @pad: Padding to 64 bit.
  */
 struct hl_info_args {
@@ -1228,6 +1258,7 @@ struct hl_info_args {
 		__u32 eventfd;
 		__u32 user_buffer_actual_size;
 		__u32 sec_attest_nonce;
+		__u32 array_size;
 	};
 
 	__u32 pad;
-- 
GitLab


From 189b203ebbea181d678b4d8bf3547eb78c8ae44a Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Wed, 28 Sep 2022 11:38:00 +0300
Subject: [PATCH 2063/4122] habanalabs: replace 'pf' to 'prefetch'

pf was an abbreviation for prefetch but because pf already stands
for 'physical function', we decided to change it to 'prefetch'.

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 14 ++++++-------
 drivers/misc/habanalabs/common/habanalabs.h |  8 ++++----
 drivers/misc/habanalabs/common/mmu/mmu.c    | 22 ++++++++++-----------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 5dc6c77b47211..bf675cf39f717 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -783,8 +783,8 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_cs_cmplt_wq;
 	}
 
-	hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
-	if (!hdev->pf_wq) {
+	hdev->prefetch_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0);
+	if (!hdev->prefetch_wq) {
 		dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n");
 		rc = -ENOMEM;
 		goto free_ts_free_wq;
@@ -794,7 +794,7 @@ static int device_early_init(struct hl_device *hdev)
 					GFP_KERNEL);
 	if (!hdev->hl_chip_info) {
 		rc = -ENOMEM;
-		goto free_pf_wq;
+		goto free_prefetch_wq;
 	}
 
 	rc = hl_mmu_if_set_funcs(hdev);
@@ -833,8 +833,8 @@ free_cb_mgr:
 	hl_mem_mgr_fini(&hdev->kernel_mem_mgr);
 free_chip_info:
 	kfree(hdev->hl_chip_info);
-free_pf_wq:
-	destroy_workqueue(hdev->pf_wq);
+free_prefetch_wq:
+	destroy_workqueue(hdev->prefetch_wq);
 free_ts_free_wq:
 	destroy_workqueue(hdev->ts_free_obj_wq);
 free_cs_cmplt_wq:
@@ -877,7 +877,7 @@ static void device_early_fini(struct hl_device *hdev)
 
 	kfree(hdev->hl_chip_info);
 
-	destroy_workqueue(hdev->pf_wq);
+	destroy_workqueue(hdev->prefetch_wq);
 	destroy_workqueue(hdev->ts_free_obj_wq);
 	destroy_workqueue(hdev->cs_cmplt_wq);
 	destroy_workqueue(hdev->eq_wq);
@@ -1076,7 +1076,7 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r
 	hl_cs_rollback_all(hdev, skip_wq_flush);
 
 	/* flush the MMU prefetch workqueue */
-	flush_workqueue(hdev->pf_wq);
+	flush_workqueue(hdev->prefetch_wq);
 
 	/* Release all pending user interrupts, each pending user interrupt
 	 * holds a reference to user context
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 1489240d5a3ad..6d8ce4a1dbb15 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2811,7 +2811,7 @@ struct hl_mmu_funcs {
 
 /**
  * struct hl_prefetch_work - prefetch work structure handler
- * @pf_work: actual work struct.
+ * @prefetch_work: actual work struct.
  * @ctx: compute context.
  * @va: virtual address to pre-fetch.
  * @size: pre-fetch size.
@@ -2819,7 +2819,7 @@ struct hl_mmu_funcs {
  * @asid: ASID for maintenance operation.
  */
 struct hl_prefetch_work {
-	struct work_struct	pf_work;
+	struct work_struct	prefetch_work;
 	struct hl_ctx		*ctx;
 	u64			va;
 	u64			size;
@@ -3060,7 +3060,7 @@ struct hl_reset_info {
  * @cs_cmplt_wq: work queue of CS completions for executing work in process
  *               context.
  * @ts_free_obj_wq: work queue for timestamp registration objects release.
- * @pf_wq: work queue for MMU pre-fetch operations.
+ * @prefetch_wq: work queue for MMU pre-fetch operations.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @cs_mirror_list: CS mirror list for TDR.
@@ -3231,7 +3231,7 @@ struct hl_device {
 	struct workqueue_struct		*eq_wq;
 	struct workqueue_struct		*cs_cmplt_wq;
 	struct workqueue_struct		*ts_free_obj_wq;
-	struct workqueue_struct		*pf_wq;
+	struct workqueue_struct		*prefetch_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
 	struct list_head		cs_mirror_list;
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index cf8946266615c..589179f8cd41b 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -699,7 +699,7 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard,
 
 static void hl_mmu_prefetch_work_function(struct work_struct *work)
 {
-	struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
+	struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, prefetch_work);
 	struct hl_ctx *ctx = pfw->ctx;
 	struct hl_device *hdev = ctx->hdev;
 
@@ -723,25 +723,25 @@ put_ctx:
 
 int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size)
 {
-	struct hl_prefetch_work *handle_pf_work;
+	struct hl_prefetch_work *handle_prefetch_work;
 
-	handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL);
-	if (!handle_pf_work)
+	handle_prefetch_work = kmalloc(sizeof(*handle_prefetch_work), GFP_KERNEL);
+	if (!handle_prefetch_work)
 		return -ENOMEM;
 
-	INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function);
-	handle_pf_work->ctx = ctx;
-	handle_pf_work->va = va;
-	handle_pf_work->size = size;
-	handle_pf_work->flags = flags;
-	handle_pf_work->asid = asid;
+	INIT_WORK(&handle_prefetch_work->prefetch_work, hl_mmu_prefetch_work_function);
+	handle_prefetch_work->ctx = ctx;
+	handle_prefetch_work->va = va;
+	handle_prefetch_work->size = size;
+	handle_prefetch_work->flags = flags;
+	handle_prefetch_work->asid = asid;
 
 	/*
 	 * as actual prefetch is done in a WQ we must get the context (and put it
 	 * at the end of the work function)
 	 */
 	hl_ctx_get(ctx);
-	queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work);
+	queue_work(ctx->hdev->prefetch_wq, &handle_prefetch_work->prefetch_work);
 
 	return 0;
 }
-- 
GitLab


From 16448d644404351e685466ab14e7e043ad67673c Mon Sep 17 00:00:00 2001
From: Koby Elbaz <kelbaz@habana.ai>
Date: Wed, 28 Sep 2022 15:56:13 +0300
Subject: [PATCH 2064/4122] habanalabs/gaudi2: remove privileged MME clock
 configuration

Privileged MME clock configuration is removed as it is done by the f/w.

Signed-off-by: Koby Elbaz <kelbaz@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index b3685978f6aea..cb048920ffc8a 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4535,7 +4535,7 @@ static void gaudi2_init_mme_acc(struct hl_device *hdev, u32 reg_base)
 static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
 							bool config_qman_only)
 {
-	u32 queue_id_base, reg_base, clk_en_addr = 0;
+	u32 queue_id_base, reg_base;
 
 	switch (dcore_id) {
 	case 0:
@@ -4543,23 +4543,18 @@ static void gaudi2_init_dcore_mme(struct hl_device *hdev, int dcore_id,
 		break;
 	case 1:
 		queue_id_base = GAUDI2_QUEUE_ID_DCORE1_MME_0_0;
-		clk_en_addr = mmDCORE1_MME_CTRL_LO_QM_SLV_CLK_EN;
 		break;
 	case 2:
 		queue_id_base = GAUDI2_QUEUE_ID_DCORE2_MME_0_0;
 		break;
 	case 3:
 		queue_id_base = GAUDI2_QUEUE_ID_DCORE3_MME_0_0;
-		clk_en_addr = mmDCORE3_MME_CTRL_LO_QM_SLV_CLK_EN;
 		break;
 	default:
 		dev_err(hdev->dev, "Invalid dcore id %u\n", dcore_id);
 		return;
 	}
 
-	if (clk_en_addr && !(hdev->fw_components & FW_TYPE_BOOT_CPU))
-		WREG32(clk_en_addr, 0x1);
-
 	if (!config_qman_only) {
 		reg_base = gaudi2_mme_acc_blocks_bases[dcore_id];
 		gaudi2_init_mme_acc(hdev, reg_base);
-- 
GitLab


From 5731b6e6f08a4a3adf944fd0436f77f3e9ce1725 Mon Sep 17 00:00:00 2001
From: Tal Cohen <talcohen@habana.ai>
Date: Wed, 28 Sep 2022 18:33:19 +0300
Subject: [PATCH 2065/4122] habanalabs/gaudi2: add device unavailable
 notification

Device unavailable notifies the user that there isn't an option to
retrieve debug information from the device.
When a critical device error occurs and the f/w performs the device
reset, a device unavailable notification shall be sent to the user
process.

Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index cb048920ffc8a..e9c4ec429baee 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8576,7 +8576,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 {
 	u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
-	bool reset_required = false, skip_reset = false;
+	bool reset_required = false, skip_reset = false, is_critical = false;
 	int index, sbte_index;
 	u64 event_mask = 0;
 	u16 event_type;
@@ -8602,6 +8602,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
+		is_critical = eq_entry->ecc_data.is_critical;
 		break;
 
 	case GAUDI2_EVENT_TPC0_QM ... GAUDI2_EVENT_PDMA1_QM:
@@ -8976,9 +8977,16 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	return;
 
 reset_device:
-	if (hdev->hard_reset_on_fw_events) {
+	if (hdev->asic_prop.fw_security_enabled && is_critical) {
+		reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
+
+		/* notify on device unavailable while the reset triggered by fw */
+		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
+					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
 		hl_device_reset(hdev, reset_flags);
+	} else if (hdev->hard_reset_on_fw_events) {
 		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+		hl_device_reset(hdev, reset_flags);
 	} else {
 		if (!gaudi2_irq_map_table[event_type].msg)
 			hl_fw_unmask_irq(hdev, event_type);
-- 
GitLab


From 3a83ebc521b2e57af070b5667c60ac2d50347658 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 14:09:32 +0300
Subject: [PATCH 2066/4122] habanalabs: skip idle status check if reset on
 device release

If reset upon device release is enabled, there is no need to check the
device idle status in hpriv_release(), because device is going to be
reset in any case.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index bf675cf39f717..e60ed0c8a9db4 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -398,16 +398,14 @@ static void hpriv_release(struct kref *ref)
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);
 
-	if ((!hdev->pldm) && (hdev->pdev) &&
-			(!hdev->asic_funcs->is_device_idle(hdev,
-				idle_mask,
-				HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL))) {
-		dev_err(hdev->dev,
-			"device not idle after user context is closed (0x%llx_%llx)\n",
-			idle_mask[1], idle_mask[0]);
+	/* No need for idle status check if device is going to be reset in any case */
+	if (!hdev->reset_upon_device_release && hdev->pdev && !hdev->pldm)
+		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
+							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 
-		device_is_idle = false;
-	}
+	if (!device_is_idle)
+		dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n",
+			idle_mask[1], idle_mask[0]);
 
 	/* We need to remove the user from the list to make sure the reset process won't
 	 * try to kill the user process. Because, if we got here, it means there are no
-- 
GitLab


From 51236cd95e7bcea41e57fb2cf238312be21dcf58 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 14:19:21 +0300
Subject: [PATCH 2067/4122] habanalabs: allow unregistering eventfd when device
 non-operational

Unregistering eventfd is for releasing host resources and doesn't
involve an access to the device. As such, there is no reason to disallow
it when device isn't operational.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index cac2c7fb14f1b..5ce5c42e27313 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -879,6 +879,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_USER_MAPPINGS:
 		return user_mappings_info(hpriv, args);
 
+	case HL_INFO_UNREGISTER_EVENTFD:
+		return eventfd_unregister(hpriv, args);
+
 	default:
 		break;
 	}
@@ -935,9 +938,6 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
 	case HL_INFO_REGISTER_EVENTFD:
 		return eventfd_register(hpriv, args);
 
-	case HL_INFO_UNREGISTER_EVENTFD:
-		return eventfd_unregister(hpriv, args);
-
 	case HL_INFO_ENGINE_STATUS:
 		return engine_status_info(hpriv, args);
 
-- 
GitLab


From 1eebb259290b1be5398fec953bdd7923a5cbf33e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 14:36:27 +0300
Subject: [PATCH 2068/4122] habanalabs: move reset workqueue to be under
 hl_device

'struct hl_device_reset_work' is used as a wrapper for the reset work
and its parameters, including the reset workqueue on which it runs.
In a future commit, another reset related work with similar parameters
is going to be added, but it won't use the reset workqueue.

As in any case there is a single reset workqueue, and to allow the resue
of this structure, move the reset workqueue to 'struct hl_device'.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 15 ++++++---------
 drivers/misc/habanalabs/common/habanalabs.h | 12 ++++++------
 2 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index e60ed0c8a9db4..e9b373a8cdade 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -684,9 +684,8 @@ static void device_hard_reset_pending(struct work_struct *work)
 			"Could not reset device. will try again in %u seconds",
 			HL_PENDING_RESET_PER_SEC);
 
-		queue_delayed_work(device_reset_work->wq,
-			&device_reset_work->reset_work,
-			msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
+		queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work,
+					msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
 	}
 }
 
@@ -801,9 +800,8 @@ static int device_early_init(struct hl_device *hdev)
 
 	hl_mem_mgr_init(hdev->dev, &hdev->kernel_mem_mgr);
 
-	hdev->device_reset_work.wq =
-			create_singlethread_workqueue("hl_device_reset");
-	if (!hdev->device_reset_work.wq) {
+	hdev->reset_wq = create_singlethread_workqueue("hl_device_reset");
+	if (!hdev->reset_wq) {
 		rc = -ENOMEM;
 		dev_err(hdev->dev, "Failed to create device reset WQ\n");
 		goto free_cb_mgr;
@@ -879,7 +877,7 @@ static void device_early_fini(struct hl_device *hdev)
 	destroy_workqueue(hdev->ts_free_obj_wq);
 	destroy_workqueue(hdev->cs_cmplt_wq);
 	destroy_workqueue(hdev->eq_wq);
-	destroy_workqueue(hdev->device_reset_work.wq);
+	destroy_workqueue(hdev->reset_wq);
 
 	for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
 		destroy_workqueue(hdev->cq_wq[i]);
@@ -1460,8 +1458,7 @@ again:
 		 * Because the reset function can't run from heartbeat work,
 		 * we need to call the reset function from a dedicated work.
 		 */
-		queue_delayed_work(hdev->device_reset_work.wq,
-			&hdev->device_reset_work.reset_work, 0);
+		queue_delayed_work(hdev->reset_wq, &hdev->device_reset_work.reset_work, 0);
 
 		return 0;
 	}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 6d8ce4a1dbb15..4913197c433e6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2682,17 +2682,15 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 struct hwmon_chip_info;
 
 /**
- * struct hl_device_reset_work - reset workqueue task wrapper.
- * @wq: work queue for device reset procedure.
+ * struct hl_device_reset_work - reset work wrapper.
  * @reset_work: reset work to be done.
  * @hdev: habanalabs device structure.
  * @flags: reset flags.
  */
 struct hl_device_reset_work {
-	struct workqueue_struct		*wq;
-	struct delayed_work		reset_work;
-	struct hl_device		*hdev;
-	u32				flags;
+	struct delayed_work	reset_work;
+	struct hl_device	*hdev;
+	u32			flags;
 };
 
 /**
@@ -3061,6 +3059,7 @@ struct hl_reset_info {
  *               context.
  * @ts_free_obj_wq: work queue for timestamp registration objects release.
  * @prefetch_wq: work queue for MMU pre-fetch operations.
+ * @reset_wq: work queue for device reset procedure.
  * @kernel_ctx: Kernel driver context structure.
  * @kernel_queues: array of hl_hw_queue.
  * @cs_mirror_list: CS mirror list for TDR.
@@ -3232,6 +3231,7 @@ struct hl_device {
 	struct workqueue_struct		*cs_cmplt_wq;
 	struct workqueue_struct		*ts_free_obj_wq;
 	struct workqueue_struct		*prefetch_wq;
+	struct workqueue_struct		*reset_wq;
 	struct hl_ctx			*kernel_ctx;
 	struct hl_hw_queue		*kernel_queues;
 	struct list_head		cs_mirror_list;
-- 
GitLab


From 17f3f42af2bcddc38ff08b355e007f3b6d5ce70c Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Thu, 29 Sep 2022 10:21:28 +0300
Subject: [PATCH 2069/4122] habanalabs: handle HBM MMU when capturing page
 fault data

In case of HBM MMU page fault, capture its relevant mappings.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 29 ++++++++++++++++++-------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index e9b373a8cdade..b8b32285720de 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2273,15 +2273,20 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
 			num_of_engines * sizeof(u16));
 	hdev->captured_err_info.razwi.flags = flags;
 }
-static void hl_capture_user_mappings(struct hl_device *hdev)
+static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
 {
 	struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info;
+	struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
 	struct hl_vm_hash_node *hnode;
 	struct hl_userptr *userptr;
+	enum vm_type *vm_type;
 	struct hl_ctx *ctx;
 	u32 map_idx = 0;
 	int i;
 
+	/* Reset previous session count*/
+	pgf_info->num_of_user_mappings = 0;
+
 	ctx = hl_get_compute_ctx(hdev);
 	if (!ctx) {
 		dev_err(hdev->dev, "Can't get user context for user mappings\n");
@@ -2290,7 +2295,7 @@ static void hl_capture_user_mappings(struct hl_device *hdev)
 
 	mutex_lock(&ctx->mem_hash_lock);
 	hash_for_each(ctx->mem_hash, i, hnode, node)
-	pgf_info->num_of_user_mappings++;
+		pgf_info->num_of_user_mappings++;
 
 	if (!pgf_info->num_of_user_mappings)
 		goto finish;
@@ -2300,17 +2305,25 @@ static void hl_capture_user_mappings(struct hl_device *hdev)
 	 */
 	vfree(pgf_info->user_mappings);
 	pgf_info->user_mappings =
-			vmalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
+			vzalloc(pgf_info->num_of_user_mappings * sizeof(struct hl_user_mapping));
 	if (!pgf_info->user_mappings) {
 		pgf_info->num_of_user_mappings = 0;
 		goto finish;
 	}
 
 	hash_for_each(ctx->mem_hash, i, hnode, node) {
-		userptr = hnode->ptr;
-		pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
-		pgf_info->user_mappings[map_idx].size = userptr->size;
-		map_idx++;
+		vm_type = hnode->ptr;
+		if ((*vm_type == VM_TYPE_USERPTR) && (is_pmmu)) {
+			userptr = hnode->ptr;
+			pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+			pgf_info->user_mappings[map_idx].size = userptr->size;
+			map_idx++;
+		} else if ((*vm_type == VM_TYPE_PHYS_PACK) && (!is_pmmu)) {
+			phys_pg_pack = hnode->ptr;
+			pgf_info->user_mappings[map_idx].dev_va = hnode->vaddr;
+			pgf_info->user_mappings[map_idx].size = phys_pg_pack->total_size;
+			map_idx++;
+		}
 	}
 finish:
 	mutex_unlock(&ctx->mem_hash_lock);
@@ -2326,5 +2339,5 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is
 	hdev->captured_err_info.pgf_info.pgf.timestamp = ktime_to_ns(ktime_get());
 	hdev->captured_err_info.pgf_info.pgf.addr = addr;
 	hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
-	hl_capture_user_mappings(hdev);
+	hl_capture_user_mappings(hdev, is_pmmu);
 }
-- 
GitLab


From 15ac503cdc0d9a1275d82a926c673359cf69ebef Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Wed, 28 Sep 2022 22:14:55 +0300
Subject: [PATCH 2070/4122] habanalabs/gaudi2: capture RAZWI information

Added function to calculate possible engines which caused
RAZWI (read-only zero, write ignored), from a given router id or
module index.

When getting RAZWI via PSOC IP, first the router id is calculated
and then the possible engines that caused the RAZWI are calculated.

There is a possibility that the RAZWI initiator is not an engine. In
that case, it will not be included in possible engines as it
doesn't have an engine id.

RAZWI information is captured when receiving event from engine or via
PSOC IP.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 255 ++++++++++++++++++++++--
 include/uapi/misc/habanalabs.h          |   4 +
 2 files changed, 242 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index e9c4ec429baee..1058c8a0e644a 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -128,6 +128,8 @@
 #define GAUDI2_VDEC_MSIX_ENTRIES		(GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM - \
 							GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 1)
 
+#define ENGINE_ID_DCORE_OFFSET (GAUDI2_DCORE1_ENGINE_ID_EDMA_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0)
+
 enum hl_pmmu_fatal_cause {
 	LATENCY_RD_OUT_FIFO_OVERRUN,
 	LATENCY_WR_OUT_FIFO_OVERRUN,
@@ -7092,9 +7094,12 @@ static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *q
 
 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
-			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
+			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
+			enum gaudi2_engine_id id)
 {
 	u32 razwi_hi, razwi_lo, razwi_xy;
+	u16 eng_id = id;
+	u8 rd_wr_flag;
 
 	if (is_write) {
 		if (read_razwi_regs) {
@@ -7106,6 +7111,7 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg);
 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg);
 		}
+		rd_wr_flag = HL_RAZWI_WRITE;
 	} else {
 		if (read_razwi_regs) {
 			razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI);
@@ -7116,8 +7122,12 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg);
 			razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg);
 		}
+		rd_wr_flag = HL_RAZWI_READ;
 	}
 
+	hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
+				rd_wr_flag | HL_RAZWI_HBW);
+
 	dev_err_ratelimited(hdev->dev,
 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
 		name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy);
@@ -7125,9 +7135,12 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 
 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
-			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info)
+			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
+			enum gaudi2_engine_id id)
 {
 	u32 razwi_addr, razwi_xy;
+	u16 eng_id = id;
+	u8 rd_wr_flag;
 
 	if (is_write) {
 		if (read_razwi_regs) {
@@ -7138,9 +7151,7 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_aw_razwi_id_reg);
 		}
 
-		dev_err_ratelimited(hdev->dev,
-			"%s-RAZWI SHARED RR LBW WR error, mstr_if 0x%llx, captured address 0x%x, Initiator coordinates 0x%x\n",
-			name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
+		rd_wr_flag = HL_RAZWI_WRITE;
 	} else {
 		if (read_razwi_regs) {
 			razwi_addr = RREG32(rtr_mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI);
@@ -7150,9 +7161,57 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
 			razwi_xy = le32_to_cpu(razwi_info->lbw.rr_ar_razwi_id_reg);
 		}
 
-		dev_err_ratelimited(hdev->dev,
-			"%s-RAZWI SHARED RR LBW AR error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
-			name, rtr_mstr_if_base_addr, razwi_addr, razwi_xy);
+		rd_wr_flag = HL_RAZWI_READ;
+	}
+
+	hl_capture_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW);
+	dev_err_ratelimited(hdev->dev,
+				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
+				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
+						razwi_xy);
+}
+
+static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
+						enum razwi_event_sources module, u8 module_idx)
+{
+	switch (module) {
+	case RAZWI_TPC:
+		if (module_idx == (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES))
+			return GAUDI2_DCORE0_ENGINE_ID_TPC_6;
+		return (((module_idx / NUM_OF_TPC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
+				(module_idx % NUM_OF_TPC_PER_DCORE) +
+				(GAUDI2_DCORE0_ENGINE_ID_TPC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
+
+	case RAZWI_MME:
+		return ((GAUDI2_DCORE0_ENGINE_ID_MME - GAUDI2_DCORE0_ENGINE_ID_EDMA_0) +
+			(module_idx * ENGINE_ID_DCORE_OFFSET));
+
+	case RAZWI_EDMA:
+		return (((module_idx / NUM_OF_EDMA_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
+			(module_idx % NUM_OF_EDMA_PER_DCORE));
+
+	case RAZWI_PDMA:
+		return (GAUDI2_ENGINE_ID_PDMA_0 + module_idx);
+
+	case RAZWI_NIC:
+		return (GAUDI2_ENGINE_ID_NIC0_0 + (NIC_NUMBER_OF_QM_PER_MACRO * module_idx));
+
+	case RAZWI_DEC:
+		if (module_idx == 8)
+			return GAUDI2_PCIE_ENGINE_ID_DEC_0;
+
+		if (module_idx == 9)
+			return GAUDI2_PCIE_ENGINE_ID_DEC_1;
+					;
+		return (((module_idx / NUM_OF_DEC_PER_DCORE) * ENGINE_ID_DCORE_OFFSET) +
+				(module_idx % NUM_OF_DEC_PER_DCORE) +
+				(GAUDI2_DCORE0_ENGINE_ID_DEC_0 - GAUDI2_DCORE0_ENGINE_ID_EDMA_0));
+
+	case RAZWI_ROT:
+		return GAUDI2_ENGINE_ID_ROT_0 + module_idx;
+
+	default:
+		return GAUDI2_ENGINE_ID_SIZE;
 	}
 }
 
@@ -7165,7 +7224,7 @@ static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
 				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info)
 {
 	bool via_sft = false, read_razwi_regs = false;
-	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id;
+	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
 	u64 rtr_mstr_if_base_addr;
 	u32 hbw_shrd_aw = 0, hbw_shrd_ar = 0;
 	u32 lbw_shrd_aw = 0, lbw_shrd_ar = 0;
@@ -7299,9 +7358,11 @@ dump_info:
 	if (!hbw_shrd_aw && !hbw_shrd_ar && !lbw_shrd_aw && !lbw_shrd_ar)
 		return;
 
+	eng_id = gaudi2_razwi_calc_engine_id(hdev, module, module_idx);
 	if (hbw_shrd_aw) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
-						initiator_name, read_razwi_regs, razwi_info);
+						initiator_name, read_razwi_regs, razwi_info,
+						eng_id);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7310,7 +7371,8 @@ dump_info:
 
 	if (hbw_shrd_ar) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
-						initiator_name, read_razwi_regs, razwi_info);
+						initiator_name, read_razwi_regs, razwi_info,
+						eng_id);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7319,7 +7381,8 @@ dump_info:
 
 	if (lbw_shrd_aw) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
-						initiator_name, read_razwi_regs, razwi_info);
+						initiator_name, read_razwi_regs, razwi_info,
+						eng_id);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7328,7 +7391,8 @@ dump_info:
 
 	if (lbw_shrd_ar) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
-						initiator_name, read_razwi_regs, razwi_info);
+						initiator_name, read_razwi_regs, razwi_info,
+						eng_id);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7450,25 +7514,175 @@ static const char *gaudi2_get_initiators_name(u32 rtr_id)
 	}
 }
 
+static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
+{
+	switch (rtr_id) {
+	case DCORE0_RTR0:
+		engines[0] = GAUDI2_DCORE0_ENGINE_ID_DEC_0;
+		engines[1] = GAUDI2_DCORE0_ENGINE_ID_DEC_1;
+		engines[2] = GAUDI2_PCIE_ENGINE_ID_DEC_0;
+		engines[3] = GAUDI2_PCIE_ENGINE_ID_DEC_1;
+		engines[4] = GAUDI2_DCORE0_ENGINE_ID_TPC_6;
+		engines[5] = GAUDI2_ENGINE_ID_PDMA_0;
+		engines[6] = GAUDI2_ENGINE_ID_PDMA_1;
+		engines[7] = GAUDI2_ENGINE_ID_PCIE;
+		engines[8] = GAUDI2_DCORE0_ENGINE_ID_EDMA_0;
+		engines[9] = GAUDI2_DCORE1_ENGINE_ID_EDMA_0;
+		engines[10] = GAUDI2_ENGINE_ID_PSOC;
+		return 11;
+
+	case DCORE0_RTR1:
+		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_0;
+		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_1;
+		return 2;
+
+	case DCORE0_RTR2:
+		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_2;
+		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_3;
+		return 2;
+
+	case DCORE0_RTR3:
+		engines[0] = GAUDI2_DCORE0_ENGINE_ID_TPC_4;
+		engines[1] = GAUDI2_DCORE0_ENGINE_ID_TPC_5;
+		return 2;
+
+	case DCORE0_RTR4:
+	case DCORE0_RTR5:
+	case DCORE0_RTR6:
+	case DCORE0_RTR7:
+		engines[0] = GAUDI2_DCORE0_ENGINE_ID_MME;
+		return 1;
+
+	case DCORE1_RTR0:
+	case DCORE1_RTR1:
+	case DCORE1_RTR2:
+	case DCORE1_RTR3:
+		engines[0] = GAUDI2_DCORE1_ENGINE_ID_MME;
+		return 1;
+
+	case DCORE1_RTR4:
+		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_4;
+		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_5;
+		return 2;
+
+	case DCORE1_RTR5:
+		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_2;
+		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_3;
+		return 2;
+
+	case DCORE1_RTR6:
+		engines[0] = GAUDI2_DCORE1_ENGINE_ID_TPC_0;
+		engines[1] = GAUDI2_DCORE1_ENGINE_ID_TPC_1;
+		return 2;
+
+	case DCORE1_RTR7:
+		engines[0] = GAUDI2_DCORE1_ENGINE_ID_DEC_0;
+		engines[1] = GAUDI2_DCORE1_ENGINE_ID_DEC_1;
+		engines[2] = GAUDI2_ENGINE_ID_NIC0_0;
+		engines[3] = GAUDI2_ENGINE_ID_NIC1_0;
+		engines[4] = GAUDI2_ENGINE_ID_NIC2_0;
+		engines[5] = GAUDI2_ENGINE_ID_NIC3_0;
+		engines[6] = GAUDI2_ENGINE_ID_NIC4_0;
+		engines[7] = GAUDI2_ENGINE_ID_ARC_FARM;
+		engines[8] = GAUDI2_ENGINE_ID_KDMA;
+		engines[9] = GAUDI2_DCORE0_ENGINE_ID_EDMA_1;
+		engines[10] = GAUDI2_DCORE1_ENGINE_ID_EDMA_1;
+		return 11;
+
+	case DCORE2_RTR0:
+		engines[0] = GAUDI2_DCORE2_ENGINE_ID_DEC_0;
+		engines[1] = GAUDI2_DCORE2_ENGINE_ID_DEC_1;
+		engines[2] = GAUDI2_ENGINE_ID_NIC5_0;
+		engines[3] = GAUDI2_ENGINE_ID_NIC6_0;
+		engines[4] = GAUDI2_ENGINE_ID_NIC7_0;
+		engines[5] = GAUDI2_ENGINE_ID_NIC8_0;
+		engines[6] = GAUDI2_DCORE2_ENGINE_ID_EDMA_0;
+		engines[7] = GAUDI2_DCORE3_ENGINE_ID_EDMA_0;
+		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
+		return 9;
+
+	case DCORE2_RTR1:
+		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_4;
+		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_5;
+		return 2;
+
+	case DCORE2_RTR2:
+		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_2;
+		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_3;
+		return 2;
+
+	case DCORE2_RTR3:
+		engines[0] = GAUDI2_DCORE2_ENGINE_ID_TPC_0;
+		engines[1] = GAUDI2_DCORE2_ENGINE_ID_TPC_1;
+		return 2;
+
+	case DCORE2_RTR4:
+	case DCORE2_RTR5:
+	case DCORE2_RTR6:
+	case DCORE2_RTR7:
+		engines[0] = GAUDI2_DCORE2_ENGINE_ID_MME;
+		return 1;
+	case DCORE3_RTR0:
+	case DCORE3_RTR1:
+	case DCORE3_RTR2:
+	case DCORE3_RTR3:
+		engines[0] = GAUDI2_DCORE3_ENGINE_ID_MME;
+		return 1;
+	case DCORE3_RTR4:
+		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_0;
+		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_1;
+		return 2;
+	case DCORE3_RTR5:
+		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_2;
+		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_3;
+		return 2;
+	case DCORE3_RTR6:
+		engines[0] = GAUDI2_DCORE3_ENGINE_ID_TPC_4;
+		engines[1] = GAUDI2_DCORE3_ENGINE_ID_TPC_5;
+		return 2;
+	case DCORE3_RTR7:
+		engines[0] = GAUDI2_DCORE3_ENGINE_ID_DEC_0;
+		engines[1] = GAUDI2_DCORE3_ENGINE_ID_DEC_1;
+		engines[2] = GAUDI2_ENGINE_ID_NIC9_0;
+		engines[3] = GAUDI2_ENGINE_ID_NIC10_0;
+		engines[4] = GAUDI2_ENGINE_ID_NIC11_0;
+		engines[5] = GAUDI2_DCORE2_ENGINE_ID_EDMA_1;
+		engines[6] = GAUDI2_DCORE3_ENGINE_ID_EDMA_1;
+		engines[7] = GAUDI2_ENGINE_ID_ROT_1;
+		engines[8] = GAUDI2_ENGINE_ID_ROT_0;
+		return 9;
+	default:
+		return 0;
+	}
+}
+
 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
 							u64 rtr_ctrl_base_addr, bool is_write)
 {
+	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
 	u32 razwi_hi, razwi_lo;
+	u8 rd_wr_flag;
+
+	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
 
 	if (is_write) {
 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI);
 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO);
+		rd_wr_flag = HL_RAZWI_WRITE;
 
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1);
 	} else {
 		razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI);
 		razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO);
+		rd_wr_flag = HL_RAZWI_READ;
 
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
 	}
 
+	hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
+				rd_wr_flag | HL_RAZWI_HBW);
 	dev_err_ratelimited(hdev->dev,
 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
@@ -7480,20 +7694,27 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u
 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
 							u64 rtr_ctrl_base_addr, bool is_write)
 {
+	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
 	u32 razwi_addr;
+	u8 rd_wr_flag;
+
+	num_of_eng = gaudi2_get_razwi_initiators(rtr_id, &engines[0]);
 
 	if (is_write) {
 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR);
+		rd_wr_flag = HL_RAZWI_WRITE;
 
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1);
 	} else {
 		razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR);
+		rd_wr_flag = HL_RAZWI_READ;
 
 		/* Clear set indication */
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
 	}
 
+	hl_capture_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW);
 	dev_err_ratelimited(hdev->dev,
 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
@@ -7974,28 +8195,28 @@ static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
-							NULL);
+							NULL, GAUDI2_ENGINE_ID_PCIE);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
-							NULL);
+							NULL, GAUDI2_ENGINE_ID_PCIE);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
-							NULL);
+							NULL, GAUDI2_ENGINE_ID_PCIE);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
-							NULL);
+							NULL, GAUDI2_ENGINE_ID_PCIE);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 }
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 2b794f54e2ed3..a4ceee6818986 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -597,6 +597,10 @@ enum gaudi2_engine_id {
 	GAUDI2_ENGINE_ID_NIC10_1,
 	GAUDI2_ENGINE_ID_NIC11_0,
 	GAUDI2_ENGINE_ID_NIC11_1,
+	GAUDI2_ENGINE_ID_PCIE,
+	GAUDI2_ENGINE_ID_PSOC,
+	GAUDI2_ENGINE_ID_ARC_FARM,
+	GAUDI2_ENGINE_ID_KDMA,
 	GAUDI2_ENGINE_ID_SIZE
 };
 
-- 
GitLab


From 4f11694f27582fa0875c4be7d133e0ae88ad36f8 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Thu, 29 Sep 2022 10:28:36 +0300
Subject: [PATCH 2071/4122] habanalabs/gaudi2: capture page fault data

Capture page fault data when it happens.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 1058c8a0e644a..a4e3586f1a125 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8286,6 +8286,7 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool
 
 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
 				is_pmmu ? "PMMU" : "HMMU", addr);
+	hl_capture_page_fault(hdev, addr, 0, is_pmmu);
 
 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
 }
-- 
GitLab


From 27cd39afde454ca8f9a438cfc84d676e96b36bd7 Mon Sep 17 00:00:00 2001
From: Tal Cohen <talcohen@habana.ai>
Date: Mon, 3 Oct 2022 13:55:50 +0300
Subject: [PATCH 2072/4122] habanalabs: verify no zero event is sent

The event notifier mechanism should not raise an empty
event (event equals zero).

Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 5 +++++
 drivers/misc/habanalabs/gaudi/gaudi.c   | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b8b32285720de..9b54d1df53022 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1746,6 +1746,11 @@ void hl_notifier_event_send_all(struct hl_device *hdev, u64 event_mask)
 {
 	struct hl_fpriv	*hpriv;
 
+	if (!event_mask) {
+		dev_warn(hdev->dev, "Skip sending zero event");
+		return;
+	}
+
 	mutex_lock(&hdev->fpriv_list_lock);
 
 	list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 1a99f7be8b609..337123f735014 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7945,7 +7945,9 @@ reset_device:
 	/* despite reset doesn't execute. a notification on
 	 * occurred event needs to be sent here
 	 */
-	hl_notifier_event_send_all(hdev, event_mask);
+	if (event_mask)
+		hl_notifier_event_send_all(hdev, event_mask);
+
 	if (reset_required)
 		hl_device_reset(hdev, flags);
 	else
-- 
GitLab


From dc8d243caea8056bd2580b0f1703fe019d3b4419 Mon Sep 17 00:00:00 2001
From: Dilip Puri <dilipp@habana.ai>
Date: Wed, 12 Oct 2022 11:06:48 +0300
Subject: [PATCH 2073/4122] habanalabs/gaudi2: unsecure CBU_EARLY_BRESP
 registers

NIC ARCs need to have access to CBU_EARLY_BRESP, hence we unsecure
those registers.

Signed-off-by: Dilip Puri <dilipp@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2_security.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
index c6906fb142290..768c2f3dc9003 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2_security.c
@@ -1764,6 +1764,7 @@ static const struct range gaudi2_pb_nic0_qm_arc_aux0_unsecured_regs[] = {
 	{mmNIC0_QM_ARC_AUX0_CLUSTER_NUM, mmNIC0_QM_ARC_AUX0_WAKE_UP_EVENT},
 	{mmNIC0_QM_ARC_AUX0_ARC_RST_REQ, mmNIC0_QM_ARC_AUX0_CID_OFFSET_7},
 	{mmNIC0_QM_ARC_AUX0_SCRATCHPAD_0, mmNIC0_QM_ARC_AUX0_INFLIGHT_LBU_RD_CNT},
+	{mmNIC0_QM_ARC_AUX0_CBU_EARLY_BRESP_EN, mmNIC0_QM_ARC_AUX0_CBU_EARLY_BRESP_EN},
 	{mmNIC0_QM_ARC_AUX0_LBU_EARLY_BRESP_EN, mmNIC0_QM_ARC_AUX0_LBU_EARLY_BRESP_EN},
 	{mmNIC0_QM_ARC_AUX0_DCCM_QUEUE_BASE_ADDR_0, mmNIC0_QM_ARC_AUX0_DCCM_QUEUE_ALERT_MSG},
 	{mmNIC0_QM_ARC_AUX0_DCCM_Q_PUSH_FIFO_CNT, mmNIC0_QM_ARC_AUX0_QMAN_ARC_CQ_SHADOW_CI},
-- 
GitLab


From 24fdfb359cadd222de8ba9d2d6a3f4dfc514878a Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Tue, 18 Oct 2022 08:51:33 +0300
Subject: [PATCH 2074/4122] habanalabs: fix using freed pointer

The code uses the pointer for trace purpose (without actually
dereference it) but still get static analysis warning.
This patch eliminate the warning.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 9b54d1df53022..dd01be5c4ba3e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -135,6 +135,9 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
 					dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
 					const char *caller)
 {
+	/* this is needed to avoid warning on using freed pointer */
+	u64 store_cpu_addr = (u64) (uintptr_t) cpu_addr;
+
 	switch (alloc_type) {
 	case DMA_ALLOC_COHERENT:
 		hdev->asic_funcs->asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle);
@@ -147,7 +150,7 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
 		break;
 	}
 
-	trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
+	trace_habanalabs_dma_free(hdev->dev, store_cpu_addr, dma_handle, size, caller);
 }
 
 void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
-- 
GitLab


From e325d5dbf34500fd42d5847d5b8c4e097f8030af Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Wed, 14 Sep 2022 08:53:29 +0300
Subject: [PATCH 2075/4122] habanalabs: allow setting HBM BAR to other regions

Up until now the use-case in the driver was that the HBM is accessed
using the HBM BAR, yet the BAR sometimes cannot cover the whole HBM and
so we needed to set the BAR to other HBM offset.
Now we are facing the need to access other PCI memory regions that can
be covered by the HBM BAR.
To answer that we are allowing the caller to determine if the HBM BAR
need to be set or not regardless of the PCI memory region.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 29 ++++++++++++---------
 drivers/misc/habanalabs/common/habanalabs.h |  2 ++
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index dd01be5c4ba3e..0026fe42b3d2c 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -32,6 +32,7 @@ enum dma_alloc_type {
  * @hdev: pointer to habanalabs device structure.
  * @addr: the address the caller wants to access.
  * @region: the PCI region.
+ * @new_bar_region_base: the new BAR region base address.
  *
  * @return: the old BAR base address on success, U64_MAX for failure.
  *	    The caller should set it back to the old address after use.
@@ -41,7 +42,8 @@ enum dma_alloc_type {
  * This function can be called also if the bar doesn't need to be set,
  * in that case it just won't change the base.
  */
-static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
+static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region,
+				u64 *new_bar_region_base)
 {
 	struct asic_fixed_properties *prop = &hdev->asic_prop;
 	u64 bar_base_addr, old_base;
@@ -55,27 +57,28 @@ static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_regi
 	old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
 
 	/* in case of success we need to update the new BAR base */
-	if (old_base != U64_MAX)
-		region->region_base = bar_base_addr;
+	if ((old_base != U64_MAX) && new_bar_region_base)
+		*new_bar_region_base = bar_base_addr;
 
 	return old_base;
 }
 
-static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
-	enum debugfs_access_type acc_type, enum pci_region region_type)
+int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
+	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar)
 {
 	struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
+	u64 old_base = 0, rc, new_bar_region_base = 0;
 	void __iomem *acc_addr;
-	u64 old_base = 0, rc;
 
-	if (region_type == PCI_REGION_DRAM) {
-		old_base = hl_set_dram_bar(hdev, addr, region);
+	if (set_dram_bar) {
+		old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base);
 		if (old_base == U64_MAX)
 			return -EIO;
 	}
 
-	acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
-			region->offset_in_bar;
+	acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
+			(addr - new_bar_region_base);
+
 	switch (acc_type) {
 	case DEBUGFS_READ8:
 		*val = readb(acc_addr);
@@ -97,8 +100,8 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
 		break;
 	}
 
-	if (region_type == PCI_REGION_DRAM) {
-		rc = hl_set_dram_bar(hdev, old_base, region);
+	if (set_dram_bar) {
+		rc = hl_set_dram_bar(hdev, old_base, region, NULL);
 		if (rc == U64_MAX)
 			return -EIO;
 	}
@@ -283,7 +286,7 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
 	case PCI_REGION_SRAM:
 	case PCI_REGION_DRAM:
 		return hl_access_sram_dram_region(hdev, addr, val, acc_type,
-			region_type);
+				region_type, (region_type == PCI_REGION_DRAM));
 	default:
 		return -EFAULT;
 	}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 4913197c433e6..c8347eac09ed0 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3486,6 +3486,8 @@ void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_
 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
 				enum dma_data_direction dir);
+int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
+	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar);
 int hl_access_cfg_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type);
 int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
-- 
GitLab


From 5ad06bb1d2c073c8b071016226fb9ebe2163e660 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 20 Oct 2022 11:29:03 +0300
Subject: [PATCH 2076/4122] habanalabs/gaudi2: remove configurations to access
 the MSI-X doorbell

The virtual MSI-X doorbell is supported now in F/W, so all
configurations to access the PCIE_DBI MSI-X doorbell can be removed.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 34 +++----------------------
 1 file changed, 3 insertions(+), 31 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index a4e3586f1a125..9208f69dd7f81 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4473,23 +4473,9 @@ static void gaudi2_init_sm(struct hl_device *hdev)
 	reg_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_MON_CONFIG_CQ_EN_MASK, 1);
 	WREG32(mmDCORE0_SYNC_MNGR_OBJS_MON_CONFIG_0 + (4 * i), reg_val);
 
-	/* Init CQ0 DB */
-	/* Configure the monitor to trigger MSI-X interrupt */
-	/* TODO:
-	 * Remove the if statement when virtual MSI-X doorbell is supported in simulator (SW-93022)
-	 * and in F/W (SW-93024).
-	 */
-	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
-		u64 msix_db_reg = CFG_BASE + mmPCIE_DBI_MSIX_DOORBELL_OFF;
-
-		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(msix_db_reg));
-		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(msix_db_reg));
-	} else {
-		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0,
-				lower_32_bits(gaudi2->virt_msix_db_dma_addr));
-		WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0,
-				upper_32_bits(gaudi2->virt_msix_db_dma_addr));
-	}
+	/* Init CQ0 DB - configure the monitor to trigger MSI-X interrupt */
+	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0, lower_32_bits(gaudi2->virt_msix_db_dma_addr));
+	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0, upper_32_bits(gaudi2->virt_msix_db_dma_addr));
 	WREG32(mmDCORE0_SYNC_MNGR_GLBL_LBW_DATA_0, GAUDI2_IRQ_NUM_COMPLETION);
 
 	for (i = 0 ; i < GAUDI2_RESERVED_CQ_NUMBER ; i++) {
@@ -4657,20 +4643,6 @@ static void gaudi2_init_vdec_brdg_ctrl(struct hl_device *hdev, u64 base_addr, u3
 {
 	u32 sob_id;
 
-	/* TODO:
-	 * Remove when virtual MSI-X doorbell is supported in simulator (SW-93022) and in F/W
-	 * (SW-93024).
-	 */
-	if (!hdev->pdev || hdev->asic_prop.fw_security_enabled) {
-		u32 interrupt_id = GAUDI2_IRQ_NUM_DCORE0_DEC0_NRM + 2 * decoder_id;
-
-		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
-		WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_WDATA, interrupt_id);
-		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_AWADDR, mmPCIE_DBI_MSIX_DOORBELL_OFF);
-		WREG32(base_addr + BRDG_CTRL_ABNRM_MSIX_LBW_WDATA, interrupt_id + 1);
-		return;
-	}
-
 	/* VCMD normal interrupt */
 	sob_id = GAUDI2_RESERVED_SOB_DEC_NRM_FIRST + decoder_id;
 	WREG32(base_addr + BRDG_CTRL_NRM_MSIX_LBW_AWADDR,
-- 
GitLab


From 6bcb2d05a59b3534821a194f8642808ae56f2d10 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Wed, 19 Oct 2022 20:24:55 +0300
Subject: [PATCH 2077/4122] habanalabs: fix user mappings calculation in case
 of page fault

As there are 2 types of user mappings, pmmu and hmmu, calculate
only the relevant mappings for the requested type.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0026fe42b3d2c..0e88396744a1e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2305,8 +2305,13 @@ static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
 	}
 
 	mutex_lock(&ctx->mem_hash_lock);
-	hash_for_each(ctx->mem_hash, i, hnode, node)
-		pgf_info->num_of_user_mappings++;
+	hash_for_each(ctx->mem_hash, i, hnode, node) {
+		vm_type = hnode->ptr;
+		if (((*vm_type == VM_TYPE_USERPTR) && is_pmmu) ||
+				((*vm_type == VM_TYPE_PHYS_PACK) && !is_pmmu))
+			pgf_info->num_of_user_mappings++;
+
+	}
 
 	if (!pgf_info->num_of_user_mappings)
 		goto finish;
-- 
GitLab


From d1e0ac37ed41e581c030a8fffe4ad1d0bb987872 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 23 Oct 2022 14:46:08 +0300
Subject: [PATCH 2078/4122] habanalabs: avoid divide by zero in device
 utilization

Currently there is no verification whether the divisor is legal.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0e88396744a1e..b71303ba11d0e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -987,11 +987,16 @@ static void device_late_fini(struct hl_device *hdev)
 
 int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 {
-	u64 max_power, curr_power, dc_power, dividend;
+	u64 max_power, curr_power, dc_power, dividend, divisor;
 	int rc;
 
 	max_power = hdev->max_power;
 	dc_power = hdev->asic_prop.dc_power_default;
+	divisor = max_power - dc_power;
+	if (!divisor) {
+		dev_warn(hdev->dev, "device utilization is not supported\n");
+		return -EOPNOTSUPP;
+	}
 	rc = hl_fw_cpucp_power_get(hdev, &curr_power);
 
 	if (rc)
@@ -1000,7 +1005,7 @@ int hl_device_utilization(struct hl_device *hdev, u32 *utilization)
 	curr_power = clamp(curr_power, dc_power, max_power);
 
 	dividend = (curr_power - dc_power) * 100;
-	*utilization = (u32) div_u64(dividend, (max_power - dc_power));
+	*utilization = (u32) div_u64(dividend, divisor);
 
 	return 0;
 }
-- 
GitLab


From a88a6f5f5cdfce21aaf988370287e0e78970c8ad Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 15:08:13 +0300
Subject: [PATCH 2079/4122] habanalabs: add support for graceful hard reset

Calling hl_device_reset() for a hard reset will lead to a quite
immediate device reset and to killing user process.
For resets that follow errors, it disables the option to debug the
errors on both the device side and the user application side.

This patch adds a 'graceful hard reset' option and a new
hl_device_cond_reset() function.
Under some conditions, mainly if there is no user process or if he is
not registered to driver notifications, this function will execute hard
reset as usual.
Otherwise, the reset will be postponed and a notification will be sent
to user, to let him perform post-error actions and then to release the
device, after which reset will take place.

If device is not released by user in some defined time, a watchdog work
will execute the reset in any case.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 141 ++++++++++++++++++--
 drivers/misc/habanalabs/common/habanalabs.h |  14 +-
 2 files changed, 140 insertions(+), 15 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b71303ba11d0e..bcd9599249714 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -16,7 +16,9 @@
 
 #include <trace/events/habanalabs.h>
 
-#define HL_RESET_DELAY_USEC		10000	/* 10ms */
+#define HL_RESET_DELAY_USEC			10000	/* 10ms */
+
+#define HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC	5
 
 enum dma_alloc_type {
 	DMA_ALLOC_COHERENT,
@@ -387,7 +389,7 @@ bool hl_ctrl_device_operational(struct hl_device *hdev,
 static void hpriv_release(struct kref *ref)
 {
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-	bool device_is_idle = true;
+	bool reset_device, device_is_idle = true;
 	struct hl_fpriv *hpriv;
 	struct hl_device *hdev;
 
@@ -404,14 +406,20 @@ static void hpriv_release(struct kref *ref)
 	mutex_destroy(&hpriv->ctx_lock);
 	mutex_destroy(&hpriv->restore_phase_mutex);
 
-	/* No need for idle status check if device is going to be reset in any case */
-	if (!hdev->reset_upon_device_release && hdev->pdev && !hdev->pldm)
+	/* Device should be reset if reset-upon-device-release is enabled, or if there is a pending
+	 * reset that waits for device release.
+	 */
+	reset_device = hdev->reset_upon_device_release || hdev->reset_info.watchdog_active;
+
+	/* Unless device is reset in any case, check idle status and reset if device is not idle */
+	if (!reset_device && hdev->pdev && !hdev->pldm)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
-
-	if (!device_is_idle)
+	if (!device_is_idle) {
 		dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n",
 			idle_mask[1], idle_mask[0]);
+		reset_device = true;
+	}
 
 	/* We need to remove the user from the list to make sure the reset process won't
 	 * try to kill the user process. Because, if we got here, it means there are no
@@ -426,9 +434,10 @@ static void hpriv_release(struct kref *ref)
 	list_del(&hpriv->dev_node);
 	mutex_unlock(&hdev->fpriv_list_lock);
 
-	if (!device_is_idle || hdev->reset_upon_device_release) {
+	if (reset_device) {
 		hl_device_reset(hdev, HL_DRV_RESET_DEV_RELEASE);
 	} else {
+		/* Scrubbing is handled within hl_device_reset(), so here need to do it directly */
 		int rc = hdev->asic_funcs->scrub_device_mem(hdev);
 
 		if (rc)
@@ -695,6 +704,20 @@ static void device_hard_reset_pending(struct work_struct *work)
 	}
 }
 
+static void device_release_watchdog_func(struct work_struct *work)
+{
+	struct hl_device_reset_work *device_release_watchdog_work =
+				container_of(work, struct hl_device_reset_work, reset_work.work);
+	struct hl_device *hdev = device_release_watchdog_work->hdev;
+	u32 flags;
+
+	dev_dbg(hdev->dev, "Device wasn't released in time. Initiate device reset.\n");
+
+	flags = device_release_watchdog_work->flags | HL_DRV_RESET_FROM_WD_THR;
+
+	hl_device_reset(hdev, flags);
+}
+
 /*
  * device_early_init - do some early initialization for the habanalabs device
  *
@@ -813,11 +836,14 @@ static int device_early_init(struct hl_device *hdev)
 		goto free_cb_mgr;
 	}
 
-	INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work,
-			device_hard_reset_pending);
+	INIT_DELAYED_WORK(&hdev->device_reset_work.reset_work, device_hard_reset_pending);
 	hdev->device_reset_work.hdev = hdev;
 	hdev->device_fini_pending = 0;
 
+	INIT_DELAYED_WORK(&hdev->device_release_watchdog_work.reset_work,
+				device_release_watchdog_func);
+	hdev->device_release_watchdog_work.hdev = hdev;
+
 	mutex_init(&hdev->send_cpu_message_lock);
 	mutex_init(&hdev->debug_lock);
 	INIT_LIST_HEAD(&hdev->cs_mirror_list);
@@ -1367,8 +1393,8 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
 	bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false,
-			reset_upon_device_release = false, schedule_hard_reset = false,
-			skip_wq_flush, delay_reset;
+			reset_upon_device_release = false, schedule_hard_reset = false, delay_reset,
+			from_dev_release, from_watchdog_thread;
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
 	struct hl_ctx *ctx;
 	int i, rc;
@@ -1381,8 +1407,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
 	hard_reset = !!(flags & HL_DRV_RESET_HARD);
 	from_hard_reset_thread = !!(flags & HL_DRV_RESET_FROM_RESET_THR);
 	fw_reset = !!(flags & HL_DRV_RESET_BYPASS_REQ_TO_FW);
-	skip_wq_flush = !!(flags & HL_DRV_RESET_DEV_RELEASE);
+	from_dev_release = !!(flags & HL_DRV_RESET_DEV_RELEASE);
 	delay_reset = !!(flags & HL_DRV_RESET_DELAY);
+	from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR);
 
 	if (!hard_reset && !hdev->asic_prop.supports_compute_reset) {
 		hard_instead_soft = true;
@@ -1439,6 +1466,23 @@ do_reset:
 
 		spin_unlock(&hdev->reset_info.lock);
 
+		/* Cancel the device release watchdog work if required.
+		 * In case of reset-upon-device-release while the release watchdog work is
+		 * scheduled, do hard-reset instead of compute-reset.
+		 */
+		if ((hard_reset || from_dev_release) && hdev->reset_info.watchdog_active) {
+			hdev->reset_info.watchdog_active = 0;
+			if (!from_watchdog_thread)
+				cancel_delayed_work_sync(
+						&hdev->device_release_watchdog_work.reset_work);
+
+			if (from_dev_release) {
+				flags |= HL_DRV_RESET_HARD;
+				flags &= ~HL_DRV_RESET_DEV_RELEASE;
+				hard_reset = true;
+			}
+		}
+
 		if (delay_reset)
 			usleep_range(HL_RESET_DELAY_USEC, HL_RESET_DELAY_USEC << 1);
 
@@ -1474,7 +1518,7 @@ again:
 		return 0;
 	}
 
-	cleanup_resources(hdev, hard_reset, fw_reset, skip_wq_flush);
+	cleanup_resources(hdev, hard_reset, fw_reset, from_dev_release);
 
 kill_processes:
 	if (hard_reset) {
@@ -1735,6 +1779,73 @@ out_err:
 	return rc;
 }
 
+/*
+ * hl_device_cond_reset() - conditionally reset the device.
+ * @hdev: pointer to habanalabs device structure.
+ * @reset_flags: reset flags.
+ * @event_mask: events to notify user about.
+ *
+ * Conditionally reset the device, or alternatively schedule a watchdog work to reset the device
+ * unless another reset precedes it.
+ */
+int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask)
+{
+	struct hl_ctx *ctx = NULL;
+
+	/* Device release watchdog is only for hard reset */
+	if (!(flags & HL_DRV_RESET_HARD) && hdev->asic_prop.allow_inference_soft_reset)
+		goto device_reset;
+
+	/* F/W reset cannot be postponed */
+	if (flags & HL_DRV_RESET_BYPASS_REQ_TO_FW)
+		goto device_reset;
+
+	/* Device release watchdog is relevant only if user exists and gets a reset notification */
+	if (!(event_mask & HL_NOTIFIER_EVENT_DEVICE_RESET)) {
+		dev_err(hdev->dev, "Resetting device without a reset indication to user\n");
+		goto device_reset;
+	}
+
+	ctx = hl_get_compute_ctx(hdev);
+	if (!ctx || !ctx->hpriv->notifier_event.eventfd)
+		goto device_reset;
+
+	/* Schedule the device release watchdog work unless reset is already in progress or if the
+	 * work is already scheduled.
+	 */
+	spin_lock(&hdev->reset_info.lock);
+	if (hdev->reset_info.in_reset) {
+		spin_unlock(&hdev->reset_info.lock);
+		goto device_reset;
+	}
+
+	if (hdev->reset_info.watchdog_active)
+		goto out;
+
+	hdev->device_release_watchdog_work.flags = flags;
+	dev_dbg(hdev->dev, "Device is going to be reset in %u sec unless being released\n",
+		hdev->device_release_watchdog_timeout_sec);
+	schedule_delayed_work(&hdev->device_release_watchdog_work.reset_work,
+				msecs_to_jiffies(hdev->device_release_watchdog_timeout_sec * 1000));
+	hdev->reset_info.watchdog_active = 1;
+out:
+	spin_unlock(&hdev->reset_info.lock);
+
+	hl_notifier_event_send_all(hdev, event_mask);
+
+	hl_ctx_put(ctx);
+
+	return 0;
+
+device_reset:
+	if (event_mask)
+		hl_notifier_event_send_all(hdev, event_mask);
+	if (ctx)
+		hl_ctx_put(ctx);
+
+	return hl_device_reset(hdev, flags);
+}
+
 static void hl_notifier_event_send(struct hl_notifier_event *notifier_event, u64 event_mask)
 {
 	mutex_lock(&notifier_event->lock);
@@ -1932,6 +2043,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
 	hdev->asic_funcs->state_dump_init(hdev);
 
+	hdev->device_release_watchdog_timeout_sec = HL_DEVICE_RELEASE_WATCHDOG_TIMEOUT_SEC;
+
 	hdev->memory_scrub_val = MEM_SCRUB_DEFAULT_VAL;
 	hl_debugfs_add_device(hdev);
 
@@ -2152,6 +2265,8 @@ void hl_device_fini(struct hl_device *hdev)
 		}
 	}
 
+	cancel_delayed_work_sync(&hdev->device_release_watchdog_work.reset_work);
+
 	/* Disable PCI access from device F/W so it won't send us additional
 	 * interrupts. We disable MSI/MSI-X at the halt_engines function and we
 	 * can't have the F/W sending us interrupts after that. We need to
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index c8347eac09ed0..bfaaa9daa7501 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -191,6 +191,9 @@ enum hl_mmu_enablement {
  *
  * - HL_DRV_RESET_DELAY
  *       Set if a delay should be added before the reset
+ *
+ * - HL_DRV_RESET_FROM_WD_THR
+ *       Set if the caller is the device release watchdog thread
  */
 
 #define HL_DRV_RESET_HARD		(1 << 0)
@@ -201,6 +204,7 @@ enum hl_mmu_enablement {
 #define HL_DRV_RESET_BYPASS_REQ_TO_FW	(1 << 5)
 #define HL_DRV_RESET_FW_FATAL_ERR	(1 << 6)
 #define HL_DRV_RESET_DELAY		(1 << 7)
+#define HL_DRV_RESET_FROM_WD_THR	(1 << 8)
 
 /*
  * Security
@@ -3009,6 +3013,7 @@ struct hl_error_info {
  *                          same cause.
  * @skip_reset_on_timeout: Skip device reset if CS has timed out, wait for it to
  *                         complete instead.
+ * @watchdog_active: true if a device release watchdog work is scheduled.
  */
 struct hl_reset_info {
 	spinlock_t	lock;
@@ -3019,12 +3024,11 @@ struct hl_reset_info {
 	u8		in_compute_reset;
 	u8		needs_reset;
 	u8		hard_reset_pending;
-
 	u8		curr_reset_cause;
 	u8		prev_reset_trigger;
 	u8		reset_trigger_repeated;
-
 	u8		skip_reset_on_timeout;
+	u8		watchdog_active;
 };
 
 /**
@@ -3040,6 +3044,8 @@ struct hl_reset_info {
  * @dev_ctrl: related kernel device structure for the control device
  * @work_heartbeat: delayed work for CPU-CP is-alive check.
  * @device_reset_work: delayed work which performs hard reset
+ * @device_release_watchdog_work: watchdog work that performs hard reset if user doesn't release
+ *                                device upon certain error cases.
  * @asic_name: ASIC specific name.
  * @asic_type: ASIC specific type.
  * @completion_queue: array of hl_cq.
@@ -3149,6 +3155,7 @@ struct hl_reset_info {
  *                   indicates which decoder engines are binned-out
  * @edma_binning: contains mask of edma engines that is received from the f/w which
  *                   indicates which edma engines are binned-out
+ * @device_release_watchdog_timeout_sec: device release watchdog timeout value in seconds.
  * @id: device minor.
  * @id_control: minor of the control device.
  * @cdev_idx: char device index. Used for setting its name.
@@ -3218,6 +3225,7 @@ struct hl_device {
 	struct device			*dev_ctrl;
 	struct delayed_work		work_heartbeat;
 	struct hl_device_reset_work	device_reset_work;
+	struct hl_device_reset_work	device_release_watchdog_work;
 	char				asic_name[HL_STR_MAX];
 	char				status[HL_DEV_STS_MAX][HL_STR_MAX];
 	enum hl_asic_type		asic_type;
@@ -3312,6 +3320,7 @@ struct hl_device {
 	u32				high_pll;
 	u32				decoder_binning;
 	u32				edma_binning;
+	u32				device_release_watchdog_timeout_sec;
 	u16				id;
 	u16				id_control;
 	u16				cdev_idx;
@@ -3551,6 +3560,7 @@ void hl_device_fini(struct hl_device *hdev);
 int hl_device_suspend(struct hl_device *hdev);
 int hl_device_resume(struct hl_device *hdev);
 int hl_device_reset(struct hl_device *hdev, u32 flags);
+int hl_device_cond_reset(struct hl_device *hdev, u32 flags, u64 event_mask);
 void hl_hpriv_get(struct hl_fpriv *hpriv);
 int hl_hpriv_put(struct hl_fpriv *hpriv);
 int hl_device_utilization(struct hl_device *hdev, u32 *utilization);
-- 
GitLab


From 11669b58fa1cee8442ae31ad4ba71398729727b5 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 16:37:41 +0300
Subject: [PATCH 2080/4122] habanalabs: add an option to control watchdog
 timeout via debugfs

Add an option to control the timeout value for the driver's watchdog
of the reset process. The timeout represents the amount of the user
has to close his process once he gets a device reset notification from
the driver.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 Documentation/ABI/testing/debugfs-driver-habanalabs | 7 +++++++
 drivers/misc/habanalabs/common/debugfs.c            | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/Documentation/ABI/testing/debugfs-driver-habanalabs b/Documentation/ABI/testing/debugfs-driver-habanalabs
index c915bf17b2932..85f6d04f528b6 100644
--- a/Documentation/ABI/testing/debugfs-driver-habanalabs
+++ b/Documentation/ABI/testing/debugfs-driver-habanalabs
@@ -91,6 +91,13 @@ Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/device_release_watchdog_timeout
+Date:           Oct 2022
+KernelVersion:  6.2
+Contact:        ttayar@habana.ai
+Description:    The watchdog timeout value in seconds for a device relese upon
+                certain error cases, after which the device is reset.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/dma_size
 Date:           Apr 2021
 KernelVersion:  5.13
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 48d3ec8b5c825..945c0e6758caa 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -1769,6 +1769,11 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 				dev_entry,
 				&hl_timeout_locked_fops);
 
+	debugfs_create_u32("device_release_watchdog_timeout",
+				0644,
+				dev_entry->root,
+				&hdev->device_release_watchdog_timeout_sec);
+
 	for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
 		debugfs_create_file(hl_debugfs_list[i].name,
 					0444,
-- 
GitLab


From 5b8873b39c5d4ee93e382389b199d553b38b19f3 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 16:43:47 +0300
Subject: [PATCH 2081/4122] habanalabs/gaudi: use graceful hard reset for F/W
 events

Use graceful hard reset for F/W events on Gaudi device that require a
device reset.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 337123f735014..3dfb9ecf7db37 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7942,16 +7942,14 @@ reset_device:
 		reset_required = false;
 	}
 
-	/* despite reset doesn't execute. a notification on
-	 * occurred event needs to be sent here
-	 */
-	if (event_mask)
-		hl_notifier_event_send_all(hdev, event_mask);
-
-	if (reset_required)
-		hl_device_reset(hdev, flags);
-	else
+	if (reset_required) {
+		hl_device_cond_reset(hdev, flags, event_mask);
+	} else {
 		hl_fw_unmask_irq(hdev, event_type);
+		/* Notification on occurred event needs to be sent although reset is not executed */
+		if (event_mask)
+			hl_notifier_event_send_all(hdev, event_mask);
+	}
 }
 
 static void *gaudi_get_events_stat(struct hl_device *hdev, bool aggregate, u32 *size)
-- 
GitLab


From d1ce7e5ea140bb01d8c6faded09b9264bb83f722 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 16:57:54 +0300
Subject: [PATCH 2082/4122] habanalabs/gaudi2: use graceful hard reset for F/W
 events

Use graceful hard reset for F/W events on Gaudi2 device that require a
device reset.

While at it, do a small refactor of the checks and function calls,
to simplify it and to avoid code duplication.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 27 +++++++++----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 9208f69dd7f81..22f5445fe71c2 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8768,9 +8768,9 @@ static void hl_arc_event_handle(struct hl_device *hdev,
 
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
-	u32 ctl, reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_DELAY;
-	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 	bool reset_required = false, skip_reset = false, is_critical = false;
+	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+	u32 ctl, reset_flags = HL_DRV_RESET_HARD;
 	int index, sbte_index;
 	u64 event_mask = 0;
 	u16 event_type;
@@ -9158,7 +9158,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 						event_type);
 	}
 
-	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset)
+	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset &&
+			(hdev->hard_reset_on_fw_events ||
+			(hdev->asic_prop.fw_security_enabled && is_critical)))
 		goto reset_device;
 
 	/* Send unmask irq only for interrupts not classified as MSG */
@@ -9172,22 +9174,13 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 
 reset_device:
 	if (hdev->asic_prop.fw_security_enabled && is_critical) {
-		reset_flags = HL_DRV_RESET_HARD | HL_DRV_RESET_BYPASS_REQ_TO_FW;
-
-		/* notify on device unavailable while the reset triggered by fw */
-		event_mask |= (HL_NOTIFIER_EVENT_DEVICE_RESET |
-					HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE);
-		hl_device_reset(hdev, reset_flags);
-	} else if (hdev->hard_reset_on_fw_events) {
-		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
-		hl_device_reset(hdev, reset_flags);
+		reset_flags |= HL_DRV_RESET_BYPASS_REQ_TO_FW;
+		event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE;
 	} else {
-		if (!gaudi2_irq_map_table[event_type].msg)
-			hl_fw_unmask_irq(hdev, event_type);
+		reset_flags |= HL_DRV_RESET_DELAY;
 	}
-
-	if (event_mask)
-		hl_notifier_event_send_all(hdev, event_mask);
+	event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+	hl_device_cond_reset(hdev, reset_flags, event_mask);
 }
 
 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
-- 
GitLab


From 1b363adc7fbe37c4b6c18864c1f7043d85b4af6e Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 30 Sep 2022 17:02:19 +0300
Subject: [PATCH 2083/4122] habanalabs: use graceful hard reset for CS timeouts

Use graceful hard reset when detecting a CS timeout that requires a
device reset.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../misc/habanalabs/common/command_submission.c  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index fa05770865c65..f1c69c8ed74a6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -798,7 +798,7 @@ out:
 static void cs_timedout(struct work_struct *work)
 {
 	struct hl_device *hdev;
-	u64 event_mask;
+	u64 event_mask = 0x0;
 	int rc;
 	struct hl_cs *cs = container_of(work, struct hl_cs,
 						 work_tdr.work);
@@ -830,11 +830,7 @@ static void cs_timedout(struct work_struct *work)
 	if (rc) {
 		hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
 		hdev->captured_err_info.cs_timeout.seq = cs->sequence;
-
-		event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
-				HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
-
-		hl_notifier_event_send_all(hdev, event_mask);
+		event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT;
 	}
 
 	switch (cs->type) {
@@ -869,8 +865,12 @@ static void cs_timedout(struct work_struct *work)
 
 	cs_put(cs);
 
-	if (device_reset)
-		hl_device_reset(hdev, HL_DRV_RESET_TDR);
+	if (device_reset) {
+		event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET;
+		hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask);
+	} else if (event_mask) {
+		hl_notifier_event_send_all(hdev, event_mask);
+	}
 }
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
-- 
GitLab


From 4a9c6e2cdf2b4128f5204b9cf14e3a788a8511df Mon Sep 17 00:00:00 2001
From: Tal Cohen <talcohen@habana.ai>
Date: Tue, 18 Oct 2022 17:35:06 +0300
Subject: [PATCH 2084/4122] habanalabs: no consecutive err when user context is
 enabled

Consecutive error protects a device reset loop from being triggered
due to h/w issues and enters the device into an unavailable state.
When user may cause the error, an unavailable state
will prevent the user from running its workloads.

The commit prevents entering consecutive state when a user context
is enabled.

Signed-off-by: Tal Cohen <talcohen@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index bcd9599249714..61ddcb1ce5089 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1320,6 +1320,10 @@ static void handle_reset_trigger(struct hl_device *hdev, u32 flags)
 {
 	u32 cur_reset_trigger = HL_RESET_TRIGGER_DEFAULT;
 
+	/* No consecutive mechanism when user context exists */
+	if (hdev->is_compute_ctx_active)
+		return;
+
 	/*
 	 * 'reset cause' is being updated here, because getting here
 	 * means that it's the 1st time and the last time we're here
-- 
GitLab


From 679e968908a4997d02c2a7df294e97b066f9149f Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Tue, 20 Sep 2022 11:48:40 +0300
Subject: [PATCH 2085/4122] habanalabs: zero ts registration buff when
 allocated

To avoid memory corruption in kernel memory while using timestamp
registration nodes, zero the kernel buff memory when its allocated.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 99b1d6ce26aef..541e1b6a2176d 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2109,7 +2109,7 @@ static int hl_ts_alloc_buf(struct hl_mmap_mem_buf *buf, gfp_t gfp, void *args)
 
 	/* Allocate the internal kernel buffer */
 	size = num_elements * sizeof(struct hl_user_pending_interrupt);
-	p = vmalloc(size);
+	p = vzalloc(size);
 	if (!p)
 		goto free_user_buff;
 
-- 
GitLab


From fc69aa8640f8baf9c1246c17ca858bab9aea98b0 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 24 Oct 2022 01:14:18 +0300
Subject: [PATCH 2086/4122] habanalabs: fix PCIe access to SRAM via debugfs

hl_access_sram_dram_region() uses a region base which is set within the
hl_set_dram_bar() function. However, for SRAM access this function is
not called, and we end up with a wrong value of region base and with a
bad calculated address.
Fix it by initializing the region base value independently of whether
hl_set_dram_bar() is called or not.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 61ddcb1ce5089..cb8ecc17bba14 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -69,17 +69,17 @@ int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
 	enum debugfs_access_type acc_type, enum pci_region region_type, bool set_dram_bar)
 {
 	struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
-	u64 old_base = 0, rc, new_bar_region_base = 0;
+	u64 old_base = 0, rc, bar_region_base = region->region_base;
 	void __iomem *acc_addr;
 
 	if (set_dram_bar) {
-		old_base = hl_set_dram_bar(hdev, addr, region, &new_bar_region_base);
+		old_base = hl_set_dram_bar(hdev, addr, region, &bar_region_base);
 		if (old_base == U64_MAX)
 			return -EIO;
 	}
 
 	acc_addr = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
-			(addr - new_bar_region_base);
+			(addr - bar_region_base);
 
 	switch (acc_type) {
 	case DEBUGFS_READ8:
-- 
GitLab


From bdfef91e7c9c2bae083ce1965f53115d88329773 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 19 Oct 2022 14:05:18 +0300
Subject: [PATCH 2087/4122] habanalabs: add warning print upon a PCI error

In order to know if driver catches PCI errors correctly, we need to
print a warning per each error.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_drv.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index 714994725224f..e82af89897009 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -595,15 +595,16 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
 
 	switch (state) {
 	case pci_channel_io_normal:
+		dev_warn(hdev->dev, "PCI normal state error detected\n");
 		return PCI_ERS_RESULT_CAN_RECOVER;
 
 	case pci_channel_io_frozen:
-		dev_warn(hdev->dev, "frozen state error detected\n");
+		dev_warn(hdev->dev, "PCI frozen state error detected\n");
 		result = PCI_ERS_RESULT_NEED_RESET;
 		break;
 
 	case pci_channel_io_perm_failure:
-		dev_warn(hdev->dev, "failure state error detected\n");
+		dev_warn(hdev->dev, "PCI failure state error detected\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
 		break;
 
@@ -639,6 +640,10 @@ static void hl_pci_err_resume(struct pci_dev *pdev)
  */
 static pci_ers_result_t hl_pci_err_slot_reset(struct pci_dev *pdev)
 {
+	struct hl_device *hdev = pci_get_drvdata(pdev);
+
+	dev_warn(hdev->dev, "PCI slot reset detected\n");
+
 	return PCI_ERS_RESULT_RECOVERED;
 }
 
-- 
GitLab


From 306206985a4bcfc12b45596d56c7bd8ba6f0f6b1 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 26 Oct 2022 18:20:49 +0300
Subject: [PATCH 2088/4122] habanalabs: remove redundant gaudi2_sec asic type

As Gaudi2 has a single PCI id, the secured asic type is redundant.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 3 ---
 drivers/misc/habanalabs/common/habanalabs.h | 2 --
 drivers/misc/habanalabs/common/mmu/mmu.c    | 1 -
 drivers/misc/habanalabs/common/sysfs.c      | 2 --
 drivers/misc/habanalabs/gaudi2/gaudi2.c     | 2 +-
 5 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index cb8ecc17bba14..3ea1ee1ec8ef3 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -748,9 +748,6 @@ static int device_early_init(struct hl_device *hdev)
 		gaudi2_set_asic_funcs(hdev);
 		strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
 		break;
-	case ASIC_GAUDI2_SEC:
-		gaudi2_set_asic_funcs(hdev);
-		strscpy(hdev->asic_name, "GAUDI2 SEC", sizeof(hdev->asic_name));
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index bfaaa9daa7501..7d191f388953f 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1192,7 +1192,6 @@ struct hl_dec {
  * @ASIC_GAUDI: Gaudi device (HL-2000).
  * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  * @ASIC_GAUDI2: Gaudi2 device.
- * @ASIC_GAUDI2_SEC: Gaudi2 secured device.
  */
 enum hl_asic_type {
 	ASIC_INVALID,
@@ -1200,7 +1199,6 @@ enum hl_asic_type {
 	ASIC_GAUDI,
 	ASIC_GAUDI_SEC,
 	ASIC_GAUDI2,
-	ASIC_GAUDI2_SEC,
 };
 
 struct hl_cs_parser;
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 589179f8cd41b..67d3e70cf5714 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -635,7 +635,6 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 		hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
 		break;
 	case ASIC_GAUDI2:
-	case ASIC_GAUDI2_SEC:
 		/* MMUs in Gaudi2 are always host resident */
 		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
 		break;
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 36e9814139d17..c924fc994bd96 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -248,8 +248,6 @@ static ssize_t device_type_show(struct device *dev,
 	case ASIC_GAUDI2:
 		str = "GAUDI2";
 		break;
-	case ASIC_GAUDI2_SEC:
-		str = "GAUDI2 SEC";
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 22f5445fe71c2..03f8cf9bb136d 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -3969,7 +3969,7 @@ static void gaudi2_init_firmware_loader(struct hl_device *hdev)
 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
 	fw_loader->dram_bar_id = DRAM_BAR_ID;
 
-	if (hdev->asic_type == ASIC_GAUDI2 || hdev->asic_type == ASIC_GAUDI2_SEC)
+	if (hdev->asic_type == ASIC_GAUDI2)
 		fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
 	else /* ASIC_GAUDI2_FPGA */
 		fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT;
-- 
GitLab


From 841cd2d7658d92e09354640c1887797f0da3d444 Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Wed, 26 Oct 2022 16:20:45 +0300
Subject: [PATCH 2089/4122] habanalabs/gaudi2: add PCI revision 2 support

Add support for Gaudi2 Device with PCI revision 2.
Functionality is exactly the same as revision 1, the only difference
is device name exposed to user.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c       |  4 +++
 drivers/misc/habanalabs/common/habanalabs.h   |  2 ++
 .../misc/habanalabs/common/habanalabs_drv.c   | 26 +++++++++++++------
 .../misc/habanalabs/common/habanalabs_ioctl.c |  6 +++--
 drivers/misc/habanalabs/common/mmu/mmu.c      |  1 +
 drivers/misc/habanalabs/common/sysfs.c        |  2 ++
 drivers/misc/habanalabs/gaudi2/gaudi2.c       |  6 +----
 drivers/misc/habanalabs/gaudi2/gaudi2P.h      |  2 --
 .../include/hw_ip/pci/pci_general.h           |  7 +++++
 include/uapi/misc/habanalabs.h                |  7 +++++
 10 files changed, 46 insertions(+), 17 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 3ea1ee1ec8ef3..35ed494fcfdf0 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -748,6 +748,10 @@ static int device_early_init(struct hl_device *hdev)
 		gaudi2_set_asic_funcs(hdev);
 		strscpy(hdev->asic_name, "GAUDI2", sizeof(hdev->asic_name));
 		break;
+	case ASIC_GAUDI2B:
+		gaudi2_set_asic_funcs(hdev);
+		strscpy(hdev->asic_name, "GAUDI2B", sizeof(hdev->asic_name));
+		break;
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 7d191f388953f..e391e7951fb75 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -1192,6 +1192,7 @@ struct hl_dec {
  * @ASIC_GAUDI: Gaudi device (HL-2000).
  * @ASIC_GAUDI_SEC: Gaudi secured device (HL-2000).
  * @ASIC_GAUDI2: Gaudi2 device.
+ * @ASIC_GAUDI2B: Gaudi2B device.
  */
 enum hl_asic_type {
 	ASIC_INVALID,
@@ -1199,6 +1200,7 @@ enum hl_asic_type {
 	ASIC_GAUDI,
 	ASIC_GAUDI_SEC,
 	ASIC_GAUDI2,
+	ASIC_GAUDI2B,
 };
 
 struct hl_cs_parser;
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index e82af89897009..7815c60df54e2 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -9,6 +9,7 @@
 #define pr_fmt(fmt)		"habanalabs: " fmt
 
 #include "habanalabs.h"
+#include "../include/hw_ip/pci/pci_general.h"
 
 #include <linux/pci.h>
 #include <linux/aer.h>
@@ -74,16 +75,17 @@ MODULE_DEVICE_TABLE(pci, ids);
 /*
  * get_asic_type - translate device id to asic type
  *
- * @device: id of the PCI device
+ * @hdev: pointer to habanalabs device structure.
  *
- * Translate device id to asic type.
+ * Translate device id and revision id to asic type.
  * In case of unidentified device, return -1
  */
-static enum hl_asic_type get_asic_type(u16 device)
+static enum hl_asic_type get_asic_type(struct hl_device *hdev)
 {
-	enum hl_asic_type asic_type;
+	struct pci_dev *pdev = hdev->pdev;
+	enum hl_asic_type asic_type = ASIC_INVALID;
 
-	switch (device) {
+	switch (pdev->device) {
 	case PCI_IDS_GOYA:
 		asic_type = ASIC_GOYA;
 		break;
@@ -94,10 +96,18 @@ static enum hl_asic_type get_asic_type(u16 device)
 		asic_type = ASIC_GAUDI_SEC;
 		break;
 	case PCI_IDS_GAUDI2:
-		asic_type = ASIC_GAUDI2;
+		switch (pdev->revision) {
+		case REV_ID_A:
+			asic_type = ASIC_GAUDI2;
+			break;
+		case REV_ID_B:
+			asic_type = ASIC_GAUDI2B;
+			break;
+		default:
+			break;
+		}
 		break;
 	default:
-		asic_type = ASIC_INVALID;
 		break;
 	}
 
@@ -416,7 +426,7 @@ static int create_hdev(struct hl_device **dev, struct pci_dev *pdev)
 	/* First, we must find out which ASIC are we handling. This is needed
 	 * to configure the behavior of the driver (kernel parameters)
 	 */
-	hdev->asic_type = get_asic_type(pdev->device);
+	hdev->asic_type = get_asic_type(hdev);
 	if (hdev->asic_type == ASIC_INVALID) {
 		dev_err(&pdev->dev, "Unsupported ASIC\n");
 		rc = -ENODEV;
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 5ce5c42e27313..ee43017eb5636 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -10,10 +10,11 @@
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 
-#include <linux/kernel.h>
 #include <linux/fs.h>
-#include <linux/uaccess.h>
+#include <linux/kernel.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
 #include <linux/vmalloc.h>
 
 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
@@ -105,6 +106,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 	hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
 	hw_ip.server_type = prop->server_type;
 	hw_ip.security_enabled = prop->fw_security_enabled;
+	hw_ip.revision_id = hdev->pdev->revision;
 
 	return copy_to_user(out, &hw_ip,
 		min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 67d3e70cf5714..2c1005f74cf44 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -635,6 +635,7 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 		hl_mmu_v1_set_funcs(hdev, &hdev->mmu_func[MMU_DR_PGT]);
 		break;
 	case ASIC_GAUDI2:
+	case ASIC_GAUDI2B:
 		/* MMUs in Gaudi2 are always host resident */
 		hl_mmu_v2_hr_set_funcs(hdev, &hdev->mmu_func[MMU_HR_PGT]);
 		break;
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index c924fc994bd96..735d8bed00660 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -248,6 +248,8 @@ static ssize_t device_type_show(struct device *dev,
 	case ASIC_GAUDI2:
 		str = "GAUDI2";
 		break;
+	case ASIC_GAUDI2B:
+		str = "GAUDI2B";
 		break;
 	default:
 		dev_err(hdev->dev, "Unrecognized ASIC type %d\n",
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 03f8cf9bb136d..f21b68be6d207 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -3968,11 +3968,7 @@ static void gaudi2_init_firmware_loader(struct hl_device *hdev)
 	fw_loader->skip_bmc = false;
 	fw_loader->sram_bar_id = SRAM_CFG_BAR_ID;
 	fw_loader->dram_bar_id = DRAM_BAR_ID;
-
-	if (hdev->asic_type == ASIC_GAUDI2)
-		fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
-	else /* ASIC_GAUDI2_FPGA */
-		fw_loader->cpu_timeout = GAUDI2_FPGA_CPU_TIMEOUT;
+	fw_loader->cpu_timeout = GAUDI2_CPU_TIMEOUT_USEC;
 
 	/* here we update initial values for few specific dynamic regs (as
 	 * before reading the first descriptor from FW those value has to be
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
index a99c348bbf390..b4383c199bbba 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h
@@ -23,8 +23,6 @@
 
 #define GAUDI2_CPU_TIMEOUT_USEC		30000000	/* 30s */
 
-#define GAUDI2_FPGA_CPU_TIMEOUT		100000000	/* 100s */
-
 #define NUMBER_OF_PDMA_QUEUES		2
 #define NUMBER_OF_EDMA_QUEUES		8
 #define NUMBER_OF_MME_QUEUES		4
diff --git a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h
index d232081d4e0f7..f5d497dc9bdc1 100644
--- a/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h
+++ b/drivers/misc/habanalabs/include/hw_ip/pci/pci_general.h
@@ -20,4 +20,11 @@
 #define PCI_CONFIG_ELBI_STS_MASK	(PCI_CONFIG_ELBI_STS_ERR | \
 					PCI_CONFIG_ELBI_STS_DONE)
 
+enum hl_revision_id {
+	/* PCI revision ID 0 is not legal */
+	REV_ID_INVALID				= 0x00,
+	REV_ID_A				= 0x01,
+	REV_ID_B				= 0x02,
+};
+
 #endif /* INCLUDE_PCI_GENERAL_H_ */
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index a4ceee6818986..58343998bd638 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -868,6 +868,7 @@ enum hl_server_type {
  * @number_of_user_interrupts: The number of interrupts that are available to the userspace
  *                             application to use. Relevant for Gaudi2 and later.
  * @device_mem_alloc_default_page_size: default page size used in device memory allocation.
+ * @revision_id: PCI revision ID of the ASIC.
  */
 struct hl_info_hw_ip_info {
 	__u64 sram_base_address;
@@ -898,6 +899,12 @@ struct hl_info_hw_ip_info {
 	__u16 pad2;
 	__u64 reserved4;
 	__u64 device_mem_alloc_default_page_size;
+	__u64 reserved5;
+	__u64 reserved6;
+	__u32 reserved7;
+	__u8 reserved8;
+	__u8 revision_id;
+	__u8 pad[2];
 };
 
 struct hl_info_dram_usage {
-- 
GitLab


From cb5fb665f30388cf8cb9becae86dcb84ace0ca88 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Sun, 30 Oct 2022 13:08:37 +0200
Subject: [PATCH 2090/4122] habanalabs/gaudi: add razwi notify event

Each time razwi (read-only zero, write ignore) happens, besides
capturing its data, also notify the user about it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     |  8 +++++
 drivers/misc/habanalabs/common/habanalabs.h |  2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 37 +++++++++++----------
 include/uapi/misc/habanalabs.h              |  2 ++
 4 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 35ed494fcfdf0..d1a6095895584 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2409,6 +2409,14 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
 			num_of_engines * sizeof(u16));
 	hdev->captured_err_info.razwi.flags = flags;
 }
+
+void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
+			u8 flags, u64 *event_mask)
+{
+	hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags);
+	*event_mask |= HL_NOTIFIER_EVENT_RAZWI;
+}
+
 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
 {
 	struct page_fault_info *pgf_info = &hdev->captured_err_info.pgf_info;
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e391e7951fb75..d9335f3769b87 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3812,6 +3812,8 @@ hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
 __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
 void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
 			u8 flags);
+void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
+			u8 flags, u64 *event_mask);
 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 3dfb9ecf7db37..035865cb097c4 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7301,7 +7301,7 @@ static void gaudi_handle_qman_err(struct hl_device *hdev, u16 event_type, u64 *e
 }
 
 static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
-					bool razwi)
+					bool razwi, u64 *event_mask)
 {
 	bool is_read = false, is_write = false;
 	u16 engine_id[2], num_of_razwi_eng = 0;
@@ -7337,7 +7337,8 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 				num_of_razwi_eng = 1;
 		}
 
-		hl_capture_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags);
+		hl_handle_razwi(hdev, razwi_addr, engine_id, num_of_razwi_eng, razwi_flags,
+				event_mask);
 	}
 }
 
@@ -7675,7 +7676,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_HBM_0_DERR ... GAUDI_EVENT_HBM_3_DERR:
 	case GAUDI_EVENT_MMU_DERR:
 	case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
@@ -7685,7 +7686,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_AXI_ECC:
 	case GAUDI_EVENT_L2_RAM_ECC:
 	case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
@@ -7694,7 +7695,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_HBM1_SPI_0:
 	case GAUDI_EVENT_HBM2_SPI_0:
 	case GAUDI_EVENT_HBM3_SPI_0:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_hbm_read_interrupts(hdev,
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
@@ -7706,7 +7707,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_HBM1_SPI_1:
 	case GAUDI_EVENT_HBM2_SPI_1:
 	case GAUDI_EVENT_HBM3_SPI_1:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_hbm_read_interrupts(hdev,
 				gaudi_hbm_event_to_dev(event_type),
 				&eq_entry->hbm_ecc_data);
@@ -7728,7 +7729,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		 * if the event is a TPC Assertion or a "real" TPC DEC.
 		 */
 		event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT;
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		reset_required = gaudi_tpc_read_interrupts(hdev,
 					tpc_dec_event_to_tpc_id(event_type),
 					"AXI_SLV_DEC_Error");
@@ -7753,7 +7754,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_TPC5_KRN_ERR:
 	case GAUDI_EVENT_TPC6_KRN_ERR:
 	case GAUDI_EVENT_TPC7_KRN_ERR:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		reset_required = gaudi_tpc_read_interrupts(hdev,
 					tpc_krn_event_to_tpc_id(event_type),
 					"KRN_ERR");
@@ -7792,7 +7793,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_HBM_0_SERR ... GAUDI_EVENT_HBM_3_SERR:
 		fallthrough;
 	case GAUDI_EVENT_MMU_SERR:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data);
 		hl_fw_unmask_irq(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
@@ -7802,14 +7803,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_CPU_AXI_SPLITTER:
 	case GAUDI_EVENT_PSOC_AXI_DEC:
 	case GAUDI_EVENT_PSOC_PRSTN_FALL:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		hl_fw_unmask_irq(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
 
 	case GAUDI_EVENT_MMU_PAGE_FAULT:
 	case GAUDI_EVENT_MMU_WR_PERM:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		hl_fw_unmask_irq(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
@@ -7838,14 +7839,14 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_NIC4_QM1:
 	case GAUDI_EVENT_DMA0_CORE ... GAUDI_EVENT_DMA7_CORE:
 	case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		gaudi_handle_qman_err(hdev, event_type, &event_mask);
 		hl_fw_unmask_irq(hdev, event_type);
 		event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET);
 		break;
 
 	case GAUDI_EVENT_RAZWI_OR_ADC_SW:
-		gaudi_print_irq_info(hdev, event_type, true);
+		gaudi_print_irq_info(hdev, event_type, true, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		goto reset_device;
 
@@ -7858,7 +7859,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 	case GAUDI_EVENT_TPC6_BMON_SPMU:
 	case GAUDI_EVENT_TPC7_BMON_SPMU:
 	case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		hl_fw_unmask_irq(hdev, event_type);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
@@ -7870,7 +7871,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		break;
 
 	case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_print_sm_sei_info(hdev, event_type,
 					&eq_entry->sm_sei_data);
 		rc = hl_state_dump(hdev);
@@ -7899,18 +7900,18 @@ static void gaudi_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entr
 		break;
 
 	case GAUDI_EVENT_DEV_RESET_REQ:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
 
 	case GAUDI_EVENT_FW_ALIVE_S:
-		gaudi_print_irq_info(hdev, event_type, false);
+		gaudi_print_irq_info(hdev, event_type, false, &event_mask);
 		gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive);
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		goto reset_device;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 58343998bd638..7747e19e81fe0 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -721,6 +721,7 @@ enum hl_server_type {
  * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE	- Indicates device is unavailable
  * HL_NOTIFIER_EVENT_USER_ENGINE_ERR	- Indicates device engine in error state
  * HL_NOTIFIER_EVENT_GENERAL_HW_ERR     - Indicates device HW error
+ * HL_NOTIFIER_EVENT_RAZWI              - Indicates razwi happened
  */
 #define HL_NOTIFIER_EVENT_TPC_ASSERT		(1ULL << 0)
 #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE	(1ULL << 1)
@@ -729,6 +730,7 @@ enum hl_server_type {
 #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE	(1ULL << 4)
 #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR	(1ULL << 5)
 #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR	(1ULL << 6)
+#define HL_NOTIFIER_EVENT_RAZWI			(1ULL << 7)
 
 /* Opcode for management ioctl
  *
-- 
GitLab


From cd21701cde33123fc53c6401192219ba14832da3 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Thu, 27 Oct 2022 20:38:26 +0300
Subject: [PATCH 2091/4122] habanalabs: use single threaded WQ for event
 handling

Creating event queue workqueue using alloc_workqueue made it run in
multi threaded mode, which caused parallel dumping of events as well as
parallel events notifying to user, causing logs with multiple
events to be out of order.

Fixed by creating event queue workqueue as single threaded work queue.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index d1a6095895584..65bb40f81901e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -787,7 +787,7 @@ static int device_early_init(struct hl_device *hdev)
 		}
 	}
 
-	hdev->eq_wq = alloc_workqueue("hl-events", WQ_UNBOUND, 0);
+	hdev->eq_wq = create_singlethread_workqueue("hl-events");
 	if (hdev->eq_wq == NULL) {
 		dev_err(hdev->dev, "Failed to allocate EQ workqueue\n");
 		rc = -ENOMEM;
-- 
GitLab


From aff6354afd1f9eae1e10658c157c26e316806f56 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 31 Oct 2022 11:44:45 +0200
Subject: [PATCH 2092/4122] habanalabs/gaudi: add page fault notify event

Each time page fault happens, besides capturing its data, also notify
the user about it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c     | 9 +++++++++
 drivers/misc/habanalabs/common/habanalabs.h | 2 ++
 drivers/misc/habanalabs/gaudi/gaudi.c       | 6 +++---
 include/uapi/misc/habanalabs.h              | 2 ++
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 65bb40f81901e..31818121ef4de 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2490,3 +2490,12 @@ void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is
 	hdev->captured_err_info.pgf_info.pgf.engine_id = eng_id;
 	hl_capture_user_mappings(hdev, is_pmmu);
 }
+
+void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
+				u64 *event_mask)
+{
+	hl_capture_page_fault(hdev, addr, eng_id, is_pmmu);
+
+	if (event_mask)
+		*event_mask |=  HL_NOTIFIER_EVENT_PAGE_FAULT;
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index d9335f3769b87..0781b8698f744 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3815,6 +3815,8 @@ void hl_capture_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_
 void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_of_engines,
 			u8 flags, u64 *event_mask);
 void hl_capture_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu);
+void hl_handle_page_fault(struct hl_device *hdev, u64 addr, u16 eng_id, bool is_pmmu,
+				u64 *event_mask);
 
 #ifdef CONFIG_DEBUG_FS
 
diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 035865cb097c4..cbe1daf5a7939 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -6740,7 +6740,7 @@ static void gaudi_print_and_get_razwi_info(struct hl_device *hdev, u16 *engine_i
 	}
 }
 
-static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr)
+static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr, u64 *event_mask)
 {
 	struct gaudi_device *gaudi = hdev->asic_specific;
 	u32 val;
@@ -6755,7 +6755,7 @@ static void gaudi_print_and_get_mmu_error_info(struct hl_device *hdev, u64 *addr
 		*addr |= RREG32(mmMMU_UP_PAGE_ERROR_CAPTURE_VA);
 
 		dev_err_ratelimited(hdev->dev, "MMU page fault on va 0x%llx\n", *addr);
-		hl_capture_page_fault(hdev, *addr, 0, true);
+		hl_handle_page_fault(hdev, *addr, 0, true, event_mask);
 
 		WREG32(mmMMU_UP_PAGE_ERROR_CAPTURE, 0);
 	}
@@ -7323,7 +7323,7 @@ static void gaudi_print_irq_info(struct hl_device *hdev, u16 event_type,
 	if (razwi) {
 		gaudi_print_and_get_razwi_info(hdev, &engine_id[0], &engine_id[1], &is_read,
 						&is_write);
-		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr);
+		gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, event_mask);
 
 		if (is_read)
 			razwi_flags |= HL_RAZWI_READ;
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index 7747e19e81fe0..e50cb71df081c 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -722,6 +722,7 @@ enum hl_server_type {
  * HL_NOTIFIER_EVENT_USER_ENGINE_ERR	- Indicates device engine in error state
  * HL_NOTIFIER_EVENT_GENERAL_HW_ERR     - Indicates device HW error
  * HL_NOTIFIER_EVENT_RAZWI              - Indicates razwi happened
+ * HL_NOTIFIER_EVENT_PAGE_FAULT         - Indicates page fault happened
  */
 #define HL_NOTIFIER_EVENT_TPC_ASSERT		(1ULL << 0)
 #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE	(1ULL << 1)
@@ -731,6 +732,7 @@ enum hl_server_type {
 #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR	(1ULL << 5)
 #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR	(1ULL << 6)
 #define HL_NOTIFIER_EVENT_RAZWI			(1ULL << 7)
+#define HL_NOTIFIER_EVENT_PAGE_FAULT		(1ULL << 8)
 
 /* Opcode for management ioctl
  *
-- 
GitLab


From 91bd822448e57a55d12dc0461909b5c585485a6c Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Sun, 30 Oct 2022 15:10:13 +0200
Subject: [PATCH 2093/4122] habanalabs/gaudi2: implement fp32 not supported
 event

Due to binning, Gaudi2 does not always support fp32.
We add support for such an event in case fp32 is used by the user
in such a device.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c                      | 5 +++++
 drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h | 1 +
 .../include/gaudi2/gaudi2_async_ids_map_extended.h           | 4 +++-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index f21b68be6d207..77bdbab41e6c1 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -9148,6 +9148,11 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
+	case GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED:
+		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
+		is_critical = true;
+		break;
+
 	default:
 		if (gaudi2_irq_map_table[event_type].valid)
 			dev_err_ratelimited(hdev->dev, "Cannot find handler for event %d\n",
diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h
index 34406770a76a6..305b576222e6f 100644
--- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h
+++ b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_events.h
@@ -957,6 +957,7 @@ enum gaudi2_async_event_id {
 	GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0 = 1317,
 	GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1 = 1318,
 	GAUDI2_EVENT_ARC_DCCM_FULL = 1319,
+	GAUDI2_EVENT_CPU_FP32_NOT_SUPPORTED = 1320,
 	GAUDI2_EVENT_SIZE,
 };
 
diff --git a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
index 5bd4383c9f2cf..d510cb10c8831 100644
--- a/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
+++ b/drivers/misc/habanalabs/include/gaudi2/gaudi2_async_ids_map_extended.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0
  *
- * Copyright 2018-2021 HabanaLabs, Ltd.
+ * Copyright 2018-2022 HabanaLabs, Ltd.
  * All Rights Reserved.
  *
  */
@@ -2663,6 +2663,8 @@ static struct gaudi2_async_events_ids_map gaudi2_irq_map_table[] = {
 		.msg = 1, .reset = 0, .name = "STATUS_NIC11_ENG1" },
 	{ .fc_id = 1319, .cpu_id = 625, .valid = 1,
 		.msg = 1, .reset = 0, .name = "ARC_DCCM_FULL" },
+	{ .fc_id = 1320, .cpu_id = 626, .valid = 1,
+		.msg = 1, .reset = 1, .name = "FP32_NOT_SUPPORTED" },
 };
 
 #endif /* __GAUDI2_ASYNC_IDS_MAP_EVENTS_EXT_H_ */
-- 
GitLab


From 413bdb176eaa7d02c979a3c738738aea91fe6ed7 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Sun, 30 Oct 2022 14:46:19 +0200
Subject: [PATCH 2094/4122] habanalabs/gaudi2: add razwi notify event

Each time razwi (read-only zero, write ignored) event happens, besides
capturing its data, also notify the user about it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c |   4 +-
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 140 +++++++++++++-----------
 2 files changed, 82 insertions(+), 62 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 31818121ef4de..708db0f48ee0e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2414,7 +2414,9 @@ void hl_handle_razwi(struct hl_device *hdev, u64 addr, u16 *engine_id, u16 num_o
 			u8 flags, u64 *event_mask)
 {
 	hl_capture_razwi(hdev, addr, engine_id, num_of_engines, flags);
-	*event_mask |= HL_NOTIFIER_EVENT_RAZWI;
+
+	if (event_mask)
+		*event_mask |= HL_NOTIFIER_EVENT_RAZWI;
 }
 
 static void hl_capture_user_mappings(struct hl_device *hdev, bool is_pmmu)
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 77bdbab41e6c1..59940c8df2d20 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -7063,7 +7063,7 @@ static void gaudi2_handle_qman_err_generic(struct hl_device *hdev, const char *q
 static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
-			enum gaudi2_engine_id id)
+			enum gaudi2_engine_id id, u64 *event_mask)
 {
 	u32 razwi_hi, razwi_lo, razwi_xy;
 	u16 eng_id = id;
@@ -7093,8 +7093,8 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 		rd_wr_flag = HL_RAZWI_READ;
 	}
 
-	hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
-				rd_wr_flag | HL_RAZWI_HBW);
+	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &eng_id, 1,
+				rd_wr_flag | HL_RAZWI_HBW, event_mask);
 
 	dev_err_ratelimited(hdev->dev,
 		"%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n",
@@ -7104,7 +7104,7 @@ static void gaudi2_razwi_rr_hbw_shared_printf_info(struct hl_device *hdev,
 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
 			u64 rtr_mstr_if_base_addr, bool is_write, char *name,
 			bool read_razwi_regs, struct hl_eq_razwi_info *razwi_info,
-			enum gaudi2_engine_id id)
+			enum gaudi2_engine_id id, u64 *event_mask)
 {
 	u32 razwi_addr, razwi_xy;
 	u16 eng_id = id;
@@ -7132,7 +7132,7 @@ static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev,
 		rd_wr_flag = HL_RAZWI_READ;
 	}
 
-	hl_capture_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW);
+	hl_handle_razwi(hdev, razwi_addr, &eng_id, 1, rd_wr_flag | HL_RAZWI_LBW, event_mask);
 	dev_err_ratelimited(hdev->dev,
 				"%s-RAZWI SHARED RR LBW %s error, mstr_if 0x%llx, captured address 0x%x Initiator coordinates 0x%x\n",
 				name, is_write ? "WR" : "RD", rtr_mstr_if_base_addr, razwi_addr,
@@ -7189,7 +7189,8 @@ static enum gaudi2_engine_id gaudi2_razwi_calc_engine_id(struct hl_device *hdev,
  */
 static void gaudi2_ack_module_razwi_event_handler(struct hl_device *hdev,
 				enum razwi_event_sources module, u8 module_idx,
-				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info)
+				u8 module_sub_idx, struct hl_eq_razwi_info *razwi_info,
+				u64 *event_mask)
 {
 	bool via_sft = false, read_razwi_regs = false;
 	u32 rtr_id, dcore_id, dcore_rtr_id, sft_id, eng_id;
@@ -7330,7 +7331,7 @@ dump_info:
 	if (hbw_shrd_aw) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
 						initiator_name, read_razwi_regs, razwi_info,
-						eng_id);
+						eng_id, event_mask);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7340,7 +7341,7 @@ dump_info:
 	if (hbw_shrd_ar) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
 						initiator_name, read_razwi_regs, razwi_info,
-						eng_id);
+						eng_id, event_mask);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7350,7 +7351,7 @@ dump_info:
 	if (lbw_shrd_aw) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, true,
 						initiator_name, read_razwi_regs, razwi_info,
-						eng_id);
+						eng_id, event_mask);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7360,7 +7361,7 @@ dump_info:
 	if (lbw_shrd_ar) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, rtr_mstr_if_base_addr, false,
 						initiator_name, read_razwi_regs, razwi_info,
-						eng_id);
+						eng_id, event_mask);
 
 		/* Clear event indication */
 		if (read_razwi_regs)
@@ -7376,38 +7377,42 @@ static void gaudi2_check_if_razwi_happened(struct hl_device *hdev)
 	/* check all TPCs */
 	for (mod_idx = 0 ; mod_idx < (NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1) ; mod_idx++) {
 		if (prop->tpc_enabled_mask & BIT(mod_idx))
-			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL);
+			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, mod_idx, 0, NULL,
+								NULL);
 	}
 
 	/* check all MMEs */
 	for (mod_idx = 0 ; mod_idx < (NUM_OF_MME_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
 		for (sub_mod = MME_WAP0 ; sub_mod < MME_INITIATORS_MAX ; sub_mod++)
 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mod_idx,
-								sub_mod, NULL);
+									sub_mod, NULL, NULL);
 
 	/* check all EDMAs */
 	for (mod_idx = 0 ; mod_idx < (NUM_OF_EDMA_PER_DCORE * NUM_OF_DCORES) ; mod_idx++)
 		if (prop->edma_enabled_mask & BIT(mod_idx))
-			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL);
+			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_EDMA, mod_idx, 0, NULL,
+								NULL);
 
 	/* check all PDMAs */
 	for (mod_idx = 0 ; mod_idx < NUM_OF_PDMA ; mod_idx++)
-		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL);
+		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_PDMA, mod_idx, 0, NULL,
+							NULL);
 
 	/* check all NICs */
 	for (mod_idx = 0 ; mod_idx < NIC_NUMBER_OF_PORTS ; mod_idx++)
 		if (hdev->nic_ports_mask & BIT(mod_idx))
 			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_NIC, mod_idx >> 1, 0,
-								NULL);
+								NULL, NULL);
 
 	/* check all DECs */
 	for (mod_idx = 0 ; mod_idx < NUMBER_OF_DEC ; mod_idx++)
 		if (prop->decoder_enabled_mask & BIT(mod_idx))
-			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL);
+			gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, mod_idx, 0, NULL,
+								NULL);
 
 	/* check all ROTs */
 	for (mod_idx = 0 ; mod_idx < NUM_OF_ROT ; mod_idx++)
-		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL);
+		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL, NULL);
 }
 
 static const char *gaudi2_get_initiators_name(u32 rtr_id)
@@ -7625,7 +7630,8 @@ static u16 gaudi2_get_razwi_initiators(u32 rtr_id, u16 *engines)
 }
 
 static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id,
-							u64 rtr_ctrl_base_addr, bool is_write)
+							u64 rtr_ctrl_base_addr, bool is_write,
+							u64 *event_mask)
 {
 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
 	u32 razwi_hi, razwi_lo;
@@ -7649,8 +7655,8 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1);
 	}
 
-	hl_capture_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
-				rd_wr_flag | HL_RAZWI_HBW);
+	hl_handle_razwi(hdev, (u64)razwi_hi << 32 | razwi_lo, &engines[0], num_of_eng,
+				rd_wr_flag | HL_RAZWI_HBW, event_mask);
 	dev_err_ratelimited(hdev->dev,
 		"RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n",
 		is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo);
@@ -7660,7 +7666,8 @@ static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u
 }
 
 static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id,
-							u64 rtr_ctrl_base_addr, bool is_write)
+							u64 rtr_ctrl_base_addr, bool is_write,
+							u64 *event_mask)
 {
 	u16 engines[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR], num_of_eng;
 	u32 razwi_addr;
@@ -7682,7 +7689,8 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u
 		WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1);
 	}
 
-	hl_capture_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW);
+	hl_handle_razwi(hdev, razwi_addr, &engines[0], num_of_eng, rd_wr_flag | HL_RAZWI_LBW,
+			event_mask);
 	dev_err_ratelimited(hdev->dev,
 		"RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n",
 		is_write ? "WR" : "RD", rtr_id, razwi_addr);
@@ -7692,7 +7700,7 @@ static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u
 }
 
 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */
-static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
+static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev, u64 *event_mask)
 {
 	u32 hbw_aw_set, hbw_ar_set, lbw_aw_set, lbw_ar_set, rtr_id, dcore_id, dcore_rtr_id, xy,
 								razwi_mask_info, razwi_intr = 0;
@@ -7746,19 +7754,19 @@ static void gaudi2_ack_psoc_razwi_event_handler(struct hl_device *hdev)
 
 	if (hbw_aw_set)
 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
-						rtr_ctrl_base_addr, true);
+						rtr_ctrl_base_addr, true, event_mask);
 
 	if (hbw_ar_set)
 		gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id,
-						rtr_ctrl_base_addr, false);
+						rtr_ctrl_base_addr, false, event_mask);
 
 	if (lbw_aw_set)
 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
-						rtr_ctrl_base_addr, true);
+						rtr_ctrl_base_addr, true, event_mask);
 
 	if (lbw_ar_set)
 		gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id,
-						rtr_ctrl_base_addr, false);
+						rtr_ctrl_base_addr, false, event_mask);
 
 clear:
 	/* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */
@@ -7784,7 +7792,7 @@ static void _gaudi2_handle_qm_sei_err(struct hl_device *hdev, u64 qman_base)
 }
 
 static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
-					struct hl_eq_razwi_info *razwi_info)
+					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
 {
 	enum razwi_event_sources module;
 	u64 qman_base;
@@ -7837,7 +7845,7 @@ static void gaudi2_handle_qm_sei_err(struct hl_device *hdev, u16 event_type,
 
 	/* check if RAZWI happened */
 	if (razwi_info)
-		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info);
+		gaudi2_ack_module_razwi_event_handler(hdev, module, 0, 0, razwi_info, event_mask);
 }
 
 static void gaudi2_handle_qman_err(struct hl_device *hdev, u16 event_type)
@@ -8003,7 +8011,8 @@ static void gaudi2_handle_cpu_sei_err(struct hl_device *hdev)
 }
 
 static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
-					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
+					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
+					u64 *event_mask)
 {
 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
 	int i;
@@ -8015,11 +8024,12 @@ static void gaudi2_handle_rot_err(struct hl_device *hdev, u8 rot_index,
 
 	/* check if RAZWI happened */
 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, rot_index, 0,
-						&razwi_with_intr_cause->razwi_info);
+						&razwi_with_intr_cause->razwi_info, event_mask);
 }
 
 static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char *interrupt_name,
-					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause)
+					struct hl_eq_razwi_with_intr_cause *razwi_with_intr_cause,
+					u64 *event_mask)
 {
 	u64 intr_cause_data = le64_to_cpu(razwi_with_intr_cause->intr_cause.intr_cause_data);
 	int i;
@@ -8031,11 +8041,11 @@ static void gaudi2_tpc_ack_interrupts(struct hl_device *hdev, u8 tpc_index, char
 
 	/* check if RAZWI happened */
 	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_TPC, tpc_index, 0,
-						&razwi_with_intr_cause->razwi_info);
+						&razwi_with_intr_cause->razwi_info, event_mask);
 }
 
 static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const char *interrupt_name,
-				struct hl_eq_razwi_info *razwi_info)
+				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
 {
 	u32 sts_addr, sts_val, sts_clr_val = 0;
 	int i;
@@ -8061,14 +8071,15 @@ static void gaudi2_handle_dec_err(struct hl_device *hdev, u8 dec_index, const ch
 	}
 
 	/* check if RAZWI happened */
-	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info);
+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_DEC, dec_index, 0, razwi_info,
+						event_mask);
 
 	/* Write 1 clear errors */
 	WREG32(sts_addr, sts_clr_val);
 }
 
 static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const char *interrupt_name,
-				struct hl_eq_razwi_info *razwi_info)
+				struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
 {
 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
 	int i;
@@ -8088,7 +8099,8 @@ static void gaudi2_handle_mme_err(struct hl_device *hdev, u8 mme_index, const ch
 
 	/* check if RAZWI happened */
 	for (i = MME_WRITE ; i < MME_INITIATORS_MAX ; i++)
-		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info);
+		gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, i, razwi_info,
+							event_mask);
 
 	WREG32(sts_clr_addr, sts_clr_val);
 }
@@ -8105,7 +8117,7 @@ static void gaudi2_handle_mme_sbte_err(struct hl_device *hdev, u8 mme_index, u8
 }
 
 static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
-					struct hl_eq_razwi_info *razwi_info)
+					struct hl_eq_razwi_info *razwi_info, u64 *event_mask)
 {
 	u32 sts_addr, sts_val, sts_clr_addr, sts_clr_val = 0;
 	int i;
@@ -8125,8 +8137,10 @@ static void gaudi2_handle_mme_wap_err(struct hl_device *hdev, u8 mme_index,
 	}
 
 	/* check if RAZWI happened on WAP0/1 */
-	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info);
-	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info);
+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP0, razwi_info,
+						event_mask);
+	gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_MME, mme_index, MME_WAP1, razwi_info,
+						event_mask);
 
 	WREG32(sts_clr_addr, sts_clr_val);
 }
@@ -8156,40 +8170,41 @@ static void gaudi2_handle_dma_core_event(struct hl_device *hdev, u64 intr_cause_
 						gaudi2_dma_core_interrupts_cause[i]);
 }
 
-static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev)
+static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev, u64 *event_mask)
 {
 	u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr;
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
-							NULL, GAUDI2_ENGINE_ID_PCIE);
+							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
-							NULL, GAUDI2_ENGINE_ID_PCIE);
+							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true,
-							NULL, GAUDI2_ENGINE_ID_PCIE);
+							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 
 	razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED;
 	if (RREG32(razwi_happened_addr)) {
 		gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true,
-							NULL, GAUDI2_ENGINE_ID_PCIE);
+							NULL, GAUDI2_ENGINE_ID_PCIE, event_mask);
 		WREG32(razwi_happened_addr, 0x1);
 	}
 }
 
-static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data)
+static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data,
+						u64 *event_mask)
 {
 	int i;
 
@@ -8204,7 +8219,7 @@ static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cau
 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK:
 			break;
 		case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK:
-			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev);
+			gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev, event_mask);
 			break;
 		}
 	}
@@ -8818,29 +8833,30 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP:
 	case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP:
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
-		gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info);
+		gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE:
 	case GAUDI2_EVENT_ROTATOR1_AXI_ERROR_RESPONSE:
 		index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE;
-		gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause);
-		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+		gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause, &event_mask);
+		gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP:
 		index = event_type - GAUDI2_EVENT_TPC0_AXI_ERR_RSP;
 		gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP",
-						&eq_entry->razwi_with_intr_cause);
-		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+						&eq_entry->razwi_with_intr_cause, &event_mask);
+		gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
 	case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE:
 		index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE;
-		gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info);
+		gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info,
+					&event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8871,7 +8887,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_TPC24_KERNEL_ERR:
 		index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) /
 			(GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR);
-		gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause);
+		gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause,
+						&event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8887,7 +8904,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_DEC9_SPI:
 		index = (event_type - GAUDI2_EVENT_DEC0_SPI) /
 				(GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI);
-		gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info);
+		gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8899,8 +8916,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 				(GAUDI2_EVENT_MME1_CTRL_AXI_ERROR_RESPONSE -
 						GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE);
 		gaudi2_handle_mme_err(hdev, index,
-				"CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info);
-		gaudi2_handle_qm_sei_err(hdev, event_type, NULL);
+				"CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info, &event_mask);
+		gaudi2_handle_qm_sei_err(hdev, event_type, NULL, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8911,7 +8928,8 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		index = (event_type - GAUDI2_EVENT_MME0_QMAN_SW_ERROR) /
 				(GAUDI2_EVENT_MME1_QMAN_SW_ERROR -
 					GAUDI2_EVENT_MME0_QMAN_SW_ERROR);
-		gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info);
+		gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info,
+					&event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8922,7 +8940,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		index = (event_type - GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID) /
 				(GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID -
 					GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID);
-		gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info);
+		gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -8941,7 +8959,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 
 	case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR:
 		gaudi2_print_pcie_addr_dec_info(hdev,
-				le64_to_cpu(eq_entry->intr_cause.intr_cause_data));
+				le64_to_cpu(eq_entry->intr_cause.intr_cause_data), &event_mask);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR;
 		break;
@@ -8970,7 +8988,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 		break;
 
 	case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT:
-		gaudi2_ack_psoc_razwi_event_handler(hdev);
+		gaudi2_ack_psoc_razwi_event_handler(hdev, &event_mask);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
-- 
GitLab


From 3daa64eea1fb219c8cfb3bb6948dc2993652e201 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Thu, 22 Sep 2022 14:24:35 +0300
Subject: [PATCH 2095/4122] habanalabs: fix firmware descriptor copy operation

This is needed to allow adding more data to the lkd_fw_comms_desc
structure.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 26 ++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index f18e53bbba6bb..01c4ffba6e973 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -12,6 +12,7 @@
 #include <linux/crc32.h>
 #include <linux/slab.h>
 #include <linux/ctype.h>
+#include <linux/vmalloc.h>
 
 #define FW_FILE_MAX_SIZE		0x1400000 /* maximum size of 20MB */
 
@@ -1988,10 +1989,11 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
 						struct fw_load_mgr *fw_loader)
 {
 	struct lkd_fw_comms_desc *fw_desc;
+	void __iomem *src, *temp_fw_desc;
 	struct pci_mem_region *region;
 	struct fw_response *response;
+	u16 fw_data_size;
 	enum pci_region region_id;
-	void __iomem *src;
 	int rc;
 
 	fw_desc = &fw_loader->dynamic_loader.comm_desc;
@@ -2018,9 +2020,29 @@ static int hl_fw_dynamic_read_and_validate_descriptor(struct hl_device *hdev,
 	fw_loader->dynamic_loader.fw_desc_valid = false;
 	src = hdev->pcie_bar[region->bar_id] + region->offset_in_bar +
 							response->ram_offset;
+
+	/*
+	 * We do the copy of the fw descriptor in 2 phases:
+	 * 1. copy the header + data info according to our lkd_fw_comms_desc definition.
+	 *    then we're able to read the actual data size provided by fw.
+	 *    this is needed for cases where data in descriptor was changed(add/remove)
+	 *    in embedded specs header file before updating lkd copy of the header file
+	 * 2. copy descriptor to temporary buffer with aligned size and send it to validation
+	 */
 	memcpy_fromio(fw_desc, src, sizeof(struct lkd_fw_comms_desc));
+	fw_data_size = le16_to_cpu(fw_desc->header.size);
+
+	temp_fw_desc = vzalloc(sizeof(struct comms_desc_header) + fw_data_size);
+	if (!temp_fw_desc)
+		return -ENOMEM;
+
+	memcpy_fromio(temp_fw_desc, src, sizeof(struct comms_desc_header) + fw_data_size);
 
-	return hl_fw_dynamic_validate_descriptor(hdev, fw_loader, fw_desc);
+	rc = hl_fw_dynamic_validate_descriptor(hdev, fw_loader,
+					(struct lkd_fw_comms_desc *) temp_fw_desc);
+	vfree(temp_fw_desc);
+
+	return rc;
 }
 
 /**
-- 
GitLab


From b829e01025f8936bb85bdc39cbd1faddcca290d0 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 6 Nov 2022 09:26:01 +0200
Subject: [PATCH 2096/4122] habanalabs: skip events info ioctl if not supported

Some ASICs haven't yet implemented this functionality and so the
ioctl call should fail and the user should be notified of the reason.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs_ioctl.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index ee43017eb5636..b6abfa7761a70 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -123,6 +123,10 @@ static int hw_events_info(struct hl_device *hdev, bool aggregate,
 		return -EINVAL;
 
 	arr = hdev->asic_funcs->get_events_stat(hdev, aggregate, &size);
+	if (!arr) {
+		dev_err(hdev->dev, "Events info not supported\n");
+		return -EOPNOTSUPP;
+	}
 
 	return copy_to_user(out, arr, min(max_size, size)) ? -EFAULT : 0;
 }
-- 
GitLab


From a63de89bee7ff01dc184fbe289eade5b5ab5f49a Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Sun, 6 Nov 2022 12:07:03 +0200
Subject: [PATCH 2097/4122] habanalabs/gaudi2: classify power/thermal events as
 info

As power and thermal envelope events are pure informative and not
indicating an error, we reduce the print level to info only.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 59940c8df2d20..61960fa059e0a 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -6828,6 +6828,7 @@ static inline bool is_info_event(u32 event)
 {
 	switch (event) {
 	case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE:
+	case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S ... GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E:
 		return true;
 	default:
 		return false;
-- 
GitLab


From d3027f4a625063c18becd6953b4a2a273033b071 Mon Sep 17 00:00:00 2001
From: Dani Liberman <dliberman@habana.ai>
Date: Mon, 31 Oct 2022 23:04:14 +0200
Subject: [PATCH 2098/4122] habanalabs/gaudi2: add page fault notify event

Each time page fault happens, besides capturing its data, also notify
the user about it.

Signed-off-by: Dani Liberman <dliberman@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 61960fa059e0a..65c9b535aa698 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8253,7 +8253,8 @@ static void gaudi2_handle_hif_fatal(struct hl_device *hdev, u16 event_type, u64
 	}
 }
 
-static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu)
+static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool is_pmmu,
+					u64 *event_mask)
 {
 	u32 valid, val;
 	u64 addr;
@@ -8270,7 +8271,7 @@ static void gaudi2_handle_page_error(struct hl_device *hdev, u64 mmu_base, bool
 
 	dev_err_ratelimited(hdev->dev, "%s page fault on va 0x%llx\n",
 				is_pmmu ? "PMMU" : "HMMU", addr);
-	hl_capture_page_fault(hdev, addr, 0, is_pmmu);
+	hl_handle_page_fault(hdev, addr, 0, is_pmmu, event_mask);
 
 	WREG32(mmu_base + MMU_OFFSET(mmDCORE0_HMMU0_MMU_PAGE_ERROR_CAPTURE), 0);
 }
@@ -8296,7 +8297,7 @@ static void gaudi2_handle_access_error(struct hl_device *hdev, u64 mmu_base, boo
 }
 
 static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char *mmu_name,
-						u64 mmu_base, bool is_pmmu)
+						u64 mmu_base, bool is_pmmu, u64 *event_mask)
 {
 	u32 spi_sei_cause, interrupt_clr = 0x0;
 	int i;
@@ -8309,7 +8310,7 @@ static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char
 						mmu_name, gaudi2_mmu_spi_sei[i].cause);
 
 			if (i == 0)
-				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
+				gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, event_mask);
 			else if (i == 1)
 				gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
 
@@ -8381,7 +8382,7 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
 	return reset;
 }
 
-static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type)
+static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
 {
 	bool is_pmmu = false;
 	char desc[32];
@@ -8439,7 +8440,7 @@ static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type
 		return;
 	}
 
-	gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu);
+	gaudi2_handle_mmu_spi_sei_generic(hdev, desc, mmu_base, is_pmmu, event_mask);
 }
 
 
@@ -8969,7 +8970,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 	case GAUDI2_EVENT_HMMU_0_AXI_ERR_RSP ... GAUDI2_EVENT_HMMU_12_AXI_ERR_RSP:
 	case GAUDI2_EVENT_PMMU0_PAGE_FAULT_WR_PERM ... GAUDI2_EVENT_PMMU0_SECURITY_ERROR:
 	case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0:
-		gaudi2_handle_mmu_spi_sei_err(hdev, event_type);
+		gaudi2_handle_mmu_spi_sei_err(hdev, event_type, &event_mask);
 		reset_flags |= HL_DRV_RESET_FW_FATAL_ERR;
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
@@ -10206,7 +10207,7 @@ static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id)
 	if (gaudi2_get_mmu_base(hdev, mmu_id, &mmu_base))
 		return;
 
-	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu);
+	gaudi2_handle_page_error(hdev, mmu_base, is_pmmu, NULL);
 	gaudi2_handle_access_error(hdev, mmu_base, is_pmmu);
 }
 
-- 
GitLab


From 5f8981d699ed33017ff2212ec17f6cde89212756 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 7 Nov 2022 16:20:03 +0200
Subject: [PATCH 2099/4122] habanalabs: fix print for out-of-sync and
 pkt-failure events

Add missing le32_to_cpu() conversions, and use %d for the value
returned from atomic_read().

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c   | 4 ++--
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 8 ++++----
 drivers/misc/habanalabs/goya/goya.c     | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index cbe1daf5a7939..7b93f0d26dd02 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7347,8 +7347,8 @@ static void gaudi_print_out_of_sync_info(struct hl_device *hdev,
 {
 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI_QUEUE_ID_CPU_PQ];
 
-	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
-			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
 }
 
 static void gaudi_print_fw_alive_info(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 65c9b535aa698..bdb5782afb7e5 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8684,8 +8684,8 @@ static void gaudi2_print_out_of_sync_info(struct hl_device *hdev,
 {
 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
 
-	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
-			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
 }
 
 static void gaudi2_handle_pcie_p2p_msix(struct hl_device *hdev)
@@ -8751,8 +8751,8 @@ static void gaudi2_print_cpu_pkt_failure_info(struct hl_device *hdev,
 	struct hl_hw_queue *q = &hdev->kernel_queues[GAUDI2_QUEUE_ID_CPU_PQ];
 
 	dev_warn(hdev->dev,
-		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
-		sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+		"FW reported sanity check failure, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
 }
 
 static void hl_arc_event_handle(struct hl_device *hdev,
diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c
index 5ef9e3ca97a69..0f083fcf81a6b 100644
--- a/drivers/misc/habanalabs/goya/goya.c
+++ b/drivers/misc/habanalabs/goya/goya.c
@@ -4475,8 +4475,8 @@ static void goya_print_out_of_sync_info(struct hl_device *hdev,
 {
 	struct hl_hw_queue *q = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
 
-	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%u\n",
-			sync_err->pi, sync_err->ci, q->pi, atomic_read(&q->ci));
+	dev_err(hdev->dev, "Out of sync with FW, FW: pi=%u, ci=%u, LKD: pi=%u, ci=%d\n",
+		le32_to_cpu(sync_err->pi), le32_to_cpu(sync_err->ci), q->pi, atomic_read(&q->ci));
 }
 
 static void goya_print_irq_info(struct hl_device *hdev, u16 event_type,
-- 
GitLab


From fe3e88c9470ceb2ea67651aa397f29e80453eed1 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 7 Nov 2022 16:34:32 +0200
Subject: [PATCH 2100/4122] habanalabs/gaudi: fix print for firmware-alive
 event

Add missing le{32,64}_to_cpu conversions.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi/gaudi.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c
index 7b93f0d26dd02..9f5e208701bad 100644
--- a/drivers/misc/habanalabs/gaudi/gaudi.c
+++ b/drivers/misc/habanalabs/gaudi/gaudi.c
@@ -7356,9 +7356,10 @@ static void gaudi_print_fw_alive_info(struct hl_device *hdev,
 {
 	dev_err(hdev->dev,
 		"FW alive report: severity=%s, process_id=%u, thread_id=%u, uptime=%llu seconds\n",
-		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ?
-		"Minor" : "Critical", fw_alive->process_id,
-		fw_alive->thread_id, fw_alive->uptime_seconds);
+		(fw_alive->severity == FW_ALIVE_SEVERITY_MINOR) ? "Minor" : "Critical",
+		le32_to_cpu(fw_alive->process_id),
+		le32_to_cpu(fw_alive->thread_id),
+		le64_to_cpu(fw_alive->uptime_seconds));
 }
 
 static void gaudi_print_nic_axi_irq_info(struct hl_device *hdev, u16 event_type,
-- 
GitLab


From 24c983c88f5e7865de972e3b395baf2c237485ca Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Tue, 8 Nov 2022 13:23:17 +0200
Subject: [PATCH 2101/4122] habanalabs/gaudi2: remove redundant firmware
 version check

Firmware 1.7 is the first official firmware, so no need to check
if we are running a version below it.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index bdb5782afb7e5..36f0ea1100bbc 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -10358,10 +10358,9 @@ int gaudi2_send_device_activity(struct hl_device *hdev, bool open)
 {
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
 
-	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37)
+	if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q))
 		return 0;
 
-	/* TODO: add check for FW version using minor ver once it's known */
 	return hl_fw_send_device_activity(hdev, open);
 }
 
-- 
GitLab


From 2c77ec14c2db228f76a74e9123aecbb5b8c994f5 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 20 Oct 2022 12:05:09 +0300
Subject: [PATCH 2102/4122] habanalabs/gaudi2: don't enable entries in the
 MSIX_GW table

User should use the virtual MSI-X doorbell to generate interrupts from
the device, so there is no need to enable entries in the MSIX_GW table.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 26 -------------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index 36f0ea1100bbc..d5efec347bc1c 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -4695,30 +4695,6 @@ static void gaudi2_init_dec(struct hl_device *hdev)
 	}
 }
 
-static void gaudi2_init_msix_gw_table(struct hl_device *hdev)
-{
-	u32 first_reg_offset, last_reg_offset, msix_gw_table_base;
-	u8 first_bit, last_bit;
-	int i;
-
-	msix_gw_table_base = mmPCIE_WRAP_MSIX_GW_TABLE_0;
-	first_reg_offset = (GAUDI2_IRQ_NUM_USER_FIRST >> 5) << 2;
-	first_bit = GAUDI2_IRQ_NUM_USER_FIRST % 32;
-	last_reg_offset = (GAUDI2_IRQ_NUM_USER_LAST >> 5) << 2;
-	last_bit = GAUDI2_IRQ_NUM_USER_LAST % 32;
-
-	if (first_reg_offset == last_reg_offset) {
-		WREG32(msix_gw_table_base + first_reg_offset, GENMASK(last_bit, first_bit));
-		return;
-	}
-
-	WREG32(msix_gw_table_base + first_reg_offset, GENMASK(31, first_bit));
-	WREG32(msix_gw_table_base + last_reg_offset, GENMASK(last_bit, 0));
-
-	for (i = first_reg_offset + 4; i < last_reg_offset ; i += 4)
-		WREG32(msix_gw_table_base + i, 0xFFFFFFFF);
-}
-
 static int gaudi2_mmu_update_asid_hop0_addr(struct hl_device *hdev,
 					u32 stlb_base, u32 asid, u64 phys_addr)
 {
@@ -5232,8 +5208,6 @@ static int gaudi2_hw_init(struct hl_device *hdev)
 		return rc;
 	}
 
-	gaudi2_init_msix_gw_table(hdev);
-
 	gaudi2_init_scrambler_hbm(hdev);
 	gaudi2_init_kdma(hdev);
 
-- 
GitLab


From 9c604af0c9d4efe4f308761229186768b3f3a6a9 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 20 Oct 2022 14:40:16 +0300
Subject: [PATCH 2103/4122] habanalabs/gaudi2: return to reset upon SM SEI
 BRESP error

Due to a H/W issue in the LBW path to the PCIE_DBI MSI-X doorbell, there
were false sporadic error responses in SM when it was configured to
write to there, and hence no reset was done as part of handling the
relevant event.
Now that the virtual MSI-X doorbell is used, such errors in SM are not
expected and reset shouldn't be skipped.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index d5efec347bc1c..f0f2f77f56deb 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -8300,11 +8300,10 @@ static void gaudi2_handle_mmu_spi_sei_generic(struct hl_device *hdev, const char
 	WREG32(mmu_base + MMU_INTERRUPT_CLR_OFFSET, interrupt_clr);
 }
 
-static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
+static void gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
 {
 	u32 sei_cause_addr, sei_cause_val, sei_cause_cause, sei_cause_log;
 	u32 cq_intr_addr, cq_intr_val, cq_intr_queue_index;
-	bool reset = true;
 	int i;
 
 	sei_cause_addr = mmDCORE0_SYNC_MNGR_GLBL_SM_SEI_CAUSE + DCORE_OFFSET * sm_index;
@@ -8329,10 +8328,6 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
 					gaudi2_sm_sei_cause[i].cause_name,
 					gaudi2_sm_sei_cause[i].log_name,
 					sei_cause_log & gaudi2_sm_sei_cause[i].log_mask);
-
-			/* Due to a potential H/W issue, do not reset upon BRESP errors */
-			if (i == 2)
-				reset = false;
 			break;
 		}
 
@@ -8352,8 +8347,6 @@ static bool gaudi2_handle_sm_err(struct hl_device *hdev, u8 sm_index)
 		/* Clear CQ_INTR */
 		WREG32(cq_intr_addr, 0);
 	}
-
-	return reset;
 }
 
 static void gaudi2_handle_mmu_spi_sei_err(struct hl_device *hdev, u16 event_type, u64 *event_mask)
@@ -8755,8 +8748,8 @@ static void hl_arc_event_handle(struct hl_device *hdev,
 
 static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
 {
-	bool reset_required = false, skip_reset = false, is_critical = false;
 	struct gaudi2_device *gaudi2 = hdev->asic_specific;
+	bool reset_required = false, is_critical = false;
 	u32 ctl, reset_flags = HL_DRV_RESET_HARD;
 	int index, sbte_index;
 	u64 event_mask = 0;
@@ -9113,7 +9106,7 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 
 	case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE:
 		index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE;
-		skip_reset = !gaudi2_handle_sm_err(hdev, index);
+		gaudi2_handle_sm_err(hdev, index);
 		event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR;
 		break;
 
@@ -9153,9 +9146,9 @@ static void gaudi2_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_ent
 						event_type);
 	}
 
-	if ((gaudi2_irq_map_table[event_type].reset || reset_required) && !skip_reset &&
-			(hdev->hard_reset_on_fw_events ||
-			(hdev->asic_prop.fw_security_enabled && is_critical)))
+	if ((gaudi2_irq_map_table[event_type].reset || reset_required) &&
+				(hdev->hard_reset_on_fw_events ||
+				(hdev->asic_prop.fw_security_enabled && is_critical)))
 		goto reset_device;
 
 	/* Send unmask irq only for interrupts not classified as MSG */
-- 
GitLab


From bc8e4bae70237f4671e07f83bcfb726eb14d86ed Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 9 Nov 2022 18:08:38 +0200
Subject: [PATCH 2104/4122] habanalabs: reset device if still in use when
 released

If the device file is released while a context is still held, it won't
be possible to reopen it until the context is eventually released.
If that doesn't happen, only a device reset will revert it back to an
operational state, i.e. need to wait for a CS timeout or an error, or to
wait for an external intervention of injecting a reset via sysfs.

At this stage, after the device was released by user, context is held
either because of CS which were left running on the device and are not
relevant anymore, or due to missing cleanup steps from user side.

All of this is in any case handled in the device reset flow, so initiate
the reset at this point instead of waiting for it.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 708db0f48ee0e..49640c8ca9102 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -504,9 +504,10 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 
 	hdev->compute_ctx_in_release = 1;
 
-	if (!hl_hpriv_put(hpriv))
-		dev_notice(hdev->dev,
-			"User process closed FD but device still in use\n");
+	if (!hl_hpriv_put(hpriv)) {
+		dev_notice(hdev->dev, "User process closed FD but device still in use\n");
+		hl_device_reset(hdev, HL_DRV_RESET_HARD);
+	}
 
 	hdev->last_open_session_duration_jif =
 		jiffies - hdev->last_successful_open_jif;
-- 
GitLab


From f69c3e460a614cba8939f7c623f7b77f0bcb3584 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 10 Nov 2022 17:05:24 +0200
Subject: [PATCH 2105/4122] habanalabs: check schedule_hard_reset correctly

schedule_hard_reset can be true only if we didn't do hard-reset.
Therefore, no point of checking it in case hard_reset is true.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
---
 drivers/misc/habanalabs/common/device.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 49640c8ca9102..0650e511a0f5e 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -1737,18 +1737,19 @@ kill_processes:
 		 * the device will be operational although it shouldn't be
 		 */
 		hdev->asic_funcs->enable_events_from_fw(hdev);
-	} else if (!reset_upon_device_release) {
-		hdev->reset_info.compute_reset_cnt++;
-	}
-
-	if (schedule_hard_reset) {
-		dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
-		flags = hdev->reset_info.hard_reset_schedule_flags;
-		hdev->reset_info.hard_reset_schedule_flags = 0;
-		hdev->disabled = true;
-		hard_reset = true;
-		handle_reset_trigger(hdev, flags);
-		goto again;
+	} else {
+		if (!reset_upon_device_release)
+			hdev->reset_info.compute_reset_cnt++;
+
+		if (schedule_hard_reset) {
+			dev_info(hdev->dev, "Performing hard reset scheduled during compute reset\n");
+			flags = hdev->reset_info.hard_reset_schedule_flags;
+			hdev->reset_info.hard_reset_schedule_flags = 0;
+			hdev->disabled = true;
+			hard_reset = true;
+			handle_reset_trigger(hdev, flags);
+			goto again;
+		}
 	}
 
 	return 0;
-- 
GitLab


From b585daa89d572210a94c7f11a746bd5489017003 Mon Sep 17 00:00:00 2001
From: Oded Gabbay <ogabbay@kernel.org>
Date: Thu, 10 Nov 2022 17:24:02 +0200
Subject: [PATCH 2106/4122] habanalabs: extend process wait timeout in device
 fine

Processes that use our device are likely to use at the same time other
devices such as remote storage.

In case our device is removed and a user process is still using the
device, we need to kill the user process. However, if that process
has a thread waiting for i/o to complete on remote storage, for example,
the process won't terminate.

Let's give it enough time to terminate before giving up.

Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Reviewed-by: Tomer Tayar <ttayar@habana.ai>
---
 drivers/misc/habanalabs/common/device.c     |  6 ++++--
 drivers/misc/habanalabs/common/habanalabs.h | 11 ++++++++---
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 0650e511a0f5e..63d0cb7087e8d 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -2300,14 +2300,16 @@ void hl_device_fini(struct hl_device *hdev)
 	 */
 	dev_info(hdev->dev,
 		"Waiting for all processes to exit (timeout of %u seconds)",
-		HL_PENDING_RESET_LONG_SEC);
+		HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI);
 
-	rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC, false);
+	hdev->process_kill_trial_cnt = 0;
+	rc = device_kill_open_processes(hdev, HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI, false);
 	if (rc) {
 		dev_crit(hdev->dev, "Failed to kill all open processes\n");
 		device_disable_open_processes(hdev, false);
 	}
 
+	hdev->process_kill_trial_cnt = 0;
 	rc = device_kill_open_processes(hdev, 0, true);
 	if (rc) {
 		dev_crit(hdev->dev, "Failed to kill all control device open processes\n");
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0781b8698f744..e7f89868428d6 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -50,9 +50,14 @@ struct hl_fpriv;
 #define HL_MMAP_OFFSET_VALUE_MASK	(0x1FFFFFFFFFFFull >> PAGE_SHIFT)
 #define HL_MMAP_OFFSET_VALUE_GET(off)	(off & HL_MMAP_OFFSET_VALUE_MASK)
 
-#define HL_PENDING_RESET_PER_SEC	10
-#define HL_PENDING_RESET_MAX_TRIALS	60 /* 10 minutes */
-#define HL_PENDING_RESET_LONG_SEC	60
+#define HL_PENDING_RESET_PER_SEC		10
+#define HL_PENDING_RESET_MAX_TRIALS		60 /* 10 minutes */
+#define HL_PENDING_RESET_LONG_SEC		60
+/*
+ * In device fini, wait 10 minutes for user processes to be terminated after we kill them.
+ * This is needed to prevent situation of clearing resources while user processes are still alive.
+ */
+#define HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI	600
 
 #define HL_HARD_RESET_MAX_TIMEOUT	120
 #define HL_PLDM_HARD_RESET_MAX_TIMEOUT	(HL_HARD_RESET_MAX_TIMEOUT * 3)
-- 
GitLab


From 18cd948204fffa61660d3f8454fc9d275c1f6c94 Mon Sep 17 00:00:00 2001
From: farah kassabri <fkassabri@habana.ai>
Date: Tue, 8 Nov 2022 15:24:33 +0200
Subject: [PATCH 2107/4122] habanalabs/gaudi2: change memory scrub mechanism

Currently the scrubbing mechanism used the EDMA engines by directly
setting the engine core registers to scrub a chunk of memory.
Due to a sporadic failure with this mechanism, it was decided to
initiate the engines via its QMAN using LIN-DMA packets.

Signed-off-by: farah kassabri <fkassabri@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 129 +++++++++++++++---------
 1 file changed, 83 insertions(+), 46 deletions(-)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index f0f2f77f56deb..c14e63164a848 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -9171,34 +9171,74 @@ reset_device:
 	hl_device_cond_reset(hdev, reset_flags, event_mask);
 }
 
+static int gaudi2_memset_memory_chunk_using_edma_qm(struct hl_device *hdev,
+			struct packet_lin_dma *lin_dma_pkt, dma_addr_t pkt_dma_addr,
+			u32 hw_queue_id, u32 size, u64 addr, u32 val)
+{
+	u32 ctl, pkt_size;
+	int rc = 0;
+
+	ctl = FIELD_PREP(GAUDI2_PKT_CTL_OPCODE_MASK, PACKET_LIN_DMA);
+	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_MEMSET_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI2_PKT_LIN_DMA_CTL_WRCOMP_MASK, 1);
+	ctl |= FIELD_PREP(GAUDI2_PKT_CTL_EB_MASK, 1);
+
+	lin_dma_pkt->ctl = cpu_to_le32(ctl);
+	lin_dma_pkt->src_addr = cpu_to_le64(val);
+	lin_dma_pkt->dst_addr = cpu_to_le64(addr);
+	lin_dma_pkt->tsize = cpu_to_le32(size);
+
+	pkt_size = sizeof(struct packet_lin_dma);
+
+	rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, pkt_dma_addr);
+	if (rc)
+		dev_err(hdev->dev, "Failed to send lin dma packet to H/W queue %d\n",
+				hw_queue_id);
+
+	return rc;
+}
+
 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val)
 {
-	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	u32 edma_queues_id[] = {GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0,
+					GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0,
+					GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0,
+					GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0};
+	u32 chunk_size, dcore, edma_idx, sob_offset, sob_addr, comp_val,
+		old_mmubp, mmubp, num_of_pkts, busy, pkt_size;
 	u64 comp_addr, cur_addr = addr, end_addr = addr + size;
-	u32 chunk_size, busy, dcore, edma_idx, sob_offset, sob_addr, comp_val, edma_commit;
-	u32 old_mmubp, mmubp;
-	int rc = 0;
+	struct asic_fixed_properties *prop = &hdev->asic_prop;
+	void *lin_dma_pkts_arr;
+	dma_addr_t pkt_dma_addr;
+	int rc = 0, dma_num = 0;
+
+	if (prop->edma_enabled_mask == 0) {
+		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
+		return -EIO;
+	}
 
 	sob_offset = hdev->asic_prop.first_available_user_sob[0] * 4;
 	sob_addr = mmDCORE0_SYNC_MNGR_OBJS_SOB_OBJ_0 + sob_offset;
 	comp_addr = CFG_BASE + sob_addr;
 	comp_val = FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_INC_MASK, 1) |
 		FIELD_PREP(DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_VAL_MASK, 1);
-
-	edma_commit = FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_LIN_MASK, 1) |
-			FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_MEM_SET_MASK, 1) |
-			FIELD_PREP(ARC_FARM_KDMA_CTX_COMMIT_WR_COMP_EN_MASK, 1);
 	mmubp = FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_WR_MASK, 1) |
 		FIELD_PREP(ARC_FARM_KDMA_CTX_AXUSER_HB_MMU_BP_RD_MASK, 1);
 
-	if (prop->edma_enabled_mask == 0) {
-		dev_info(hdev->dev, "non of the EDMA engines is enabled - skip dram scrubbing\n");
-		return -EIO;
-	}
+	/* Calculate how many lin dma pkts we'll need */
+	num_of_pkts = div64_u64(round_up(size, SZ_2G), SZ_2G);
+	pkt_size = sizeof(struct packet_lin_dma);
+
+	lin_dma_pkts_arr = hl_asic_dma_alloc_coherent(hdev, pkt_size * num_of_pkts,
+					&pkt_dma_addr, GFP_KERNEL);
+	if (!lin_dma_pkts_arr)
+		return -ENOMEM;
 
 	/*
 	 * set mmu bypass for the scrubbing - all ddmas are configured the same so save
 	 * only the first one to restore later
+	 * also set the sob addr for all edma cores for completion.
+	 * set QM as trusted to allow it to access physical address with MMU bp.
 	 */
 	old_mmubp = RREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP);
 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
@@ -9211,17 +9251,22 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
 
 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP +
 					edma_offset, mmubp);
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
+					lower_32_bits(comp_addr));
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
+					upper_32_bits(comp_addr));
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
+					comp_val);
+			gaudi2_qman_set_test_mode(hdev,
+					edma_queues_id[dcore] + 4 * edma_idx, true);
 		}
 	}
 
-	while (cur_addr < end_addr) {
-		int dma_num = 0;
+	WREG32(sob_addr, 0);
 
-		WREG32(sob_addr, 0);
+	while (cur_addr < end_addr) {
 		for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
 			for (edma_idx = 0 ; edma_idx < NUM_OF_EDMA_PER_DCORE ; edma_idx++) {
-				u32 edma_offset = dcore * DCORE_OFFSET +
-					edma_idx * DCORE_EDMA_OFFSET;
 				u32 edma_bit = dcore * NUM_OF_EDMA_PER_DCORE + edma_idx;
 
 				if (!(prop->edma_enabled_mask & BIT(edma_bit)))
@@ -9229,41 +9274,26 @@ static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 siz
 
 				chunk_size = min_t(u64, SZ_2G, end_addr - cur_addr);
 
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_LO + edma_offset,
-						lower_32_bits(val));
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_SRC_BASE_HI + edma_offset,
-						upper_32_bits(val));
-
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_LO + edma_offset,
-						lower_32_bits(cur_addr));
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_BASE_HI + edma_offset,
-						upper_32_bits(cur_addr));
-
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset,
-						lower_32_bits(comp_addr));
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset,
-						upper_32_bits(comp_addr));
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset,
-						comp_val);
-
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_DST_TSIZE_0 + edma_offset,
-						chunk_size);
-				WREG32(mmDCORE0_EDMA0_CORE_CTX_COMMIT + edma_offset, edma_commit);
+				rc = gaudi2_memset_memory_chunk_using_edma_qm(hdev,
+					(struct packet_lin_dma *)lin_dma_pkts_arr + dma_num,
+					pkt_dma_addr + dma_num * pkt_size,
+					edma_queues_id[dcore] + edma_idx * 4,
+					chunk_size, cur_addr, val);
+				if (rc)
+					goto end;
 
 				dma_num++;
-
 				cur_addr += chunk_size;
-
 				if (cur_addr == end_addr)
-					goto poll;
+					break;
 			}
 		}
-poll:
-		rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
-		if (rc) {
-			dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
-			goto end;
-		}
+	}
+
+	rc = hl_poll_timeout(hdev, sob_addr, busy, (busy == dma_num), 1000, 1000000);
+	if (rc) {
+		dev_err(hdev->dev, "DMA Timeout during HBM scrubbing\n");
+		goto end;
 	}
 end:
 	for (dcore = 0 ; dcore < NUM_OF_DCORES ; dcore++) {
@@ -9275,10 +9305,17 @@ end:
 				continue;
 
 			WREG32(mmDCORE0_EDMA0_CORE_CTX_AXUSER_HB_MMU_BP + edma_offset, old_mmubp);
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_LO + edma_offset, 0);
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_ADDR_HI + edma_offset, 0);
+			WREG32(mmDCORE0_EDMA0_CORE_CTX_WR_COMP_WDATA + edma_offset, 0);
+			gaudi2_qman_set_test_mode(hdev,
+					edma_queues_id[dcore] + 4 * edma_idx, false);
 		}
 	}
 
 	WREG32(sob_addr, 0);
+	hl_asic_dma_free_coherent(hdev, pkt_size * num_of_pkts, lin_dma_pkts_arr, pkt_dma_addr);
+
 	return rc;
 }
 
-- 
GitLab


From 01907ba5252164ca6bf0de670660cd94d77c378c Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Sun, 23 Oct 2022 12:55:21 +0300
Subject: [PATCH 2108/4122] habanalabs: increase the size of busy engines mask

Increase the size of the busy engines mask in 'struct hl_info_hw_idle',
for future ASICs with more than 128 engines.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 9 +++++----
 include/uapi/misc/habanalabs.h          | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 63d0cb7087e8d..f5864893237cc 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -416,8 +416,9 @@ static void hpriv_release(struct kref *ref)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	if (!device_is_idle) {
-		dev_err(hdev->dev, "device not idle after user context is closed (0x%llx_%llx)\n",
-			idle_mask[1], idle_mask[0]);
+		dev_err(hdev->dev,
+			"device not idle after user context is closed (0x%llx_%llx_%llx_%llx)\n",
+			idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
 		reset_device = true;
 	}
 
@@ -1661,8 +1662,8 @@ kill_processes:
 	/* If device is not idle fail the reset process */
 	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
-		dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx) after reset\n",
-			idle_mask[1], idle_mask[0]);
+		dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx_%llx_%llx) after reset\n",
+			idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
 		rc = -EIO;
 		goto out_err;
 	}
diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h
index e50cb71df081c..3b995e841eb85 100644
--- a/include/uapi/misc/habanalabs.h
+++ b/include/uapi/misc/habanalabs.h
@@ -916,7 +916,7 @@ struct hl_info_dram_usage {
 	__u64 ctx_dram_mem;
 };
 
-#define HL_BUSY_ENGINES_MASK_EXT_SIZE	2
+#define HL_BUSY_ENGINES_MASK_EXT_SIZE	4
 
 struct hl_info_hw_idle {
 	__u32 is_idle;
-- 
GitLab


From 5908560a7f14171d1100f4a357deda659dc26868 Mon Sep 17 00:00:00 2001
From: Marco Pagani <marpagan@redhat.com>
Date: Wed, 16 Nov 2022 14:41:25 +0100
Subject: [PATCH 2109/4122] habanalabs: added return value check for
 hl_fw_dynamic_send_clear_cmd()

The clang-analyzer reported a warning: "Value stored to 'rc' is never
read".

The return value check for the first hl_fw_dynamic_send_clear_cmd() call
in hl_fw_dynamic_send_protocol_cmd() appears to be missing.

Signed-off-by: Marco Pagani <marpagan@redhat.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 01c4ffba6e973..c0909d76d6eb2 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -1783,6 +1783,8 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
 
 	/* first send clear command to clean former commands */
 	rc = hl_fw_dynamic_send_clear_cmd(hdev, fw_loader);
+	if (rc)
+		return rc;
 
 	/* send the actual command */
 	hl_fw_dynamic_send_cmd(hdev, fw_loader, cmd, size);
-- 
GitLab


From 6825b5f81f273fcc1ec61e7e203b0ea40d9987fc Mon Sep 17 00:00:00 2001
From: Marco Pagani <marpagan@redhat.com>
Date: Wed, 23 Nov 2022 09:56:39 +0100
Subject: [PATCH 2110/4122] habanalabs/gaudi2: added memset for the cq_size
 register

The clang-analyzer reported a warning: "Value stored to
'cq_size_addr' is never read".

The cq_size register of dcore0 is not being zeroed using
gaudi2_memset_device_lbw(), along with the other cq_* registers,
even though the corresponding cq_size_addr variable is set.

Signed-off-by: Marco Pagani <marpagan@redhat.com>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/gaudi2/gaudi2.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index c14e63164a848..a33a9072fca4b 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -9386,6 +9386,7 @@ static void gaudi2_restore_user_sm_registers(struct hl_device *hdev)
 	gaudi2_memset_device_lbw(hdev, cq_lbw_data_addr, size, 0);
 	gaudi2_memset_device_lbw(hdev, cq_base_l_addr, size, 0);
 	gaudi2_memset_device_lbw(hdev, cq_base_h_addr, size, 0);
+	gaudi2_memset_device_lbw(hdev, cq_size_addr, size, 0);
 
 	cq_lbw_l_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_L_0 + DCORE_OFFSET;
 	cq_lbw_h_addr = mmDCORE0_SYNC_MNGR_GLBL_LBW_ADDR_H_0 + DCORE_OFFSET;
-- 
GitLab


From b3ad31f33982497dbc7a66a9d3013b1ac6985dfe Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:28 +0800
Subject: [PATCH 2111/4122] soundwire: intel: start using hw_ops

Before introducing new hardware with completely different register
spaces and programming sequences, we need to abstract some of the
existing routines in hw_ops that will be platform-specific. For now we
only use the 'cnl' ops - after the first Intel platform with SoundWire
capabilities.

Rather than one big intrusive patch, hw_ops are introduced in this
patch so show the dependencies between drivers. Follow-up patches will
introduce callbacks for debugfs, power and bus management.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-2-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 38 +++++++++++++++++++++++------
 drivers/soundwire/intel.h           |  3 +++
 drivers/soundwire/intel_init.c      |  1 +
 include/linux/soundwire/sdw_intel.h | 15 ++++++++++++
 sound/soc/sof/intel/hda.c           |  2 ++
 5 files changed, 51 insertions(+), 8 deletions(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index b9cb7e31ddb35..f88319f8ded49 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -745,10 +745,10 @@ static int intel_free_stream(struct sdw_intel *sdw,
  * bank switch routines
  */
 
-static int intel_pre_bank_switch(struct sdw_bus *bus)
+static int intel_pre_bank_switch(struct sdw_intel *sdw)
 {
-	struct sdw_cdns *cdns = bus_to_cdns(bus);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_cdns *cdns = &sdw->cdns;
+	struct sdw_bus *bus = &cdns->bus;
 
 	/* Write to register only for multi-link */
 	if (!bus->multi_link)
@@ -759,10 +759,10 @@ static int intel_pre_bank_switch(struct sdw_bus *bus)
 	return 0;
 }
 
-static int intel_post_bank_switch(struct sdw_bus *bus)
+static int intel_post_bank_switch(struct sdw_intel *sdw)
 {
-	struct sdw_cdns *cdns = bus_to_cdns(bus);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_cdns *cdns = &sdw->cdns;
+	struct sdw_bus *bus = &cdns->bus;
 	void __iomem *shim = sdw->link_res->shim;
 	int sync_reg, ret;
 
@@ -1422,6 +1422,28 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop)
 	return 0;
 }
 
+const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
+	.pre_bank_switch = intel_pre_bank_switch,
+	.post_bank_switch = intel_post_bank_switch,
+};
+EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL);
+
+static int generic_pre_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+
+	return sdw->link_res->hw_ops->pre_bank_switch(sdw);
+}
+
+static int generic_post_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+
+	return sdw->link_res->hw_ops->post_bank_switch(sdw);
+}
+
 static int sdw_master_read_intel_prop(struct sdw_bus *bus)
 {
 	struct sdw_master_prop *prop = &bus->prop;
@@ -1477,8 +1499,8 @@ static struct sdw_master_ops sdw_intel_ops = {
 	.xfer_msg_defer = cdns_xfer_msg_defer,
 	.reset_page_addr = cdns_reset_page_addr,
 	.set_bus_conf = cdns_bus_conf,
-	.pre_bank_switch = intel_pre_bank_switch,
-	.post_bank_switch = intel_post_bank_switch,
+	.pre_bank_switch = generic_pre_bank_switch,
+	.post_bank_switch = generic_post_bank_switch,
 	.read_ping_status = cdns_read_ping_status,
 };
 
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index cd93a44dba9a1..3170df76b4110 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -7,6 +7,7 @@
 /**
  * struct sdw_intel_link_res - Soundwire Intel link resource structure,
  * typically populated by the controller driver.
+ * @hw_ops: platform-specific ops
  * @mmio_base: mmio base of SoundWire registers
  * @registers: Link IO registers base
  * @shim: Audio shim pointer
@@ -22,6 +23,8 @@
  * @list: used to walk-through all masters exposed by the same controller
  */
 struct sdw_intel_link_res {
+	const struct sdw_intel_hw_ops *hw_ops;
+
 	void __iomem *mmio_base; /* not strictly needed, useful for debug */
 	void __iomem *registers;
 	void __iomem *shim;
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index d091513919df5..1e6d74b3e7732 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -60,6 +60,7 @@ static struct sdw_intel_link_dev *intel_link_dev_register(struct sdw_intel_res *
 
 	/* Add link information used in the driver probe */
 	link = &ldev->link_res;
+	link->hw_ops = res->hw_ops;
 	link->mmio_base = res->mmio_base;
 	link->registers = res->mmio_base + SDW_LINK_BASE
 		+ (SDW_LINK_SIZE * link_id);
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 2e9fd91572d4c..2dbe34b41ef14 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -233,6 +233,7 @@ struct sdw_intel_ctx {
  * struct sdw_intel_res - Soundwire Intel global resource structure,
  * typically populated by the DSP driver
  *
+ * @hw_ops: abstraction for platform ops
  * @count: link count
  * @mmio_base: mmio base of SoundWire registers
  * @irq: interrupt number
@@ -249,6 +250,7 @@ struct sdw_intel_ctx {
  * @alh_base: sdw alh base.
  */
 struct sdw_intel_res {
+	const struct sdw_intel_hw_ops *hw_ops;
 	int count;
 	void __iomem *mmio_base;
 	int irq;
@@ -292,4 +294,17 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id);
 
 #define SDW_INTEL_QUIRK_MASK_BUS_DISABLE      BIT(1)
 
+struct sdw_intel;
+
+/* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms.
+ * @pre_bank_switch: helper for bus management
+ * @post_bank_switch: helper for bus management
+ */
+struct sdw_intel_hw_ops {
+	int (*pre_bank_switch)(struct sdw_intel *sdw);
+	int (*post_bank_switch)(struct sdw_intel *sdw);
+};
+
+extern const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops;
+
 #endif
diff --git a/sound/soc/sof/intel/hda.c b/sound/soc/sof/intel/hda.c
index 1188ec51816bd..3d6254489056a 100644
--- a/sound/soc/sof/intel/hda.c
+++ b/sound/soc/sof/intel/hda.c
@@ -188,6 +188,7 @@ static int hda_sdw_probe(struct snd_sof_dev *sdev)
 
 	memset(&res, 0, sizeof(res));
 
+	res.hw_ops = &sdw_intel_cnl_hw_ops;
 	res.mmio_base = sdev->bar[HDA_DSP_BAR];
 	res.shim_base = hdev->desc->sdw_shim_base;
 	res.alh_base = hdev->desc->sdw_alh_base;
@@ -1694,3 +1695,4 @@ MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
 MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
 MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
+MODULE_IMPORT_NS(SOUNDWIRE_INTEL);
-- 
GitLab


From fb2dc6a0a5f885233d632b1e92be9c0be977b0dc Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:29 +0800
Subject: [PATCH 2112/4122] soundwire: intel: add debugfs callbacks in hw_ops

No functionality change, only add indirection for debugfs helpers.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-3-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           |  7 +++++--
 drivers/soundwire/intel.h           | 16 ++++++++++++++++
 include/linux/soundwire/sdw_intel.h |  5 +++++
 3 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index f88319f8ded49..914f2fb43721a 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1423,6 +1423,9 @@ static int intel_stop_bus(struct sdw_intel *sdw, bool clock_stop)
 }
 
 const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
+	.debugfs_init = intel_debugfs_init,
+	.debugfs_exit = intel_debugfs_exit,
+
 	.pre_bank_switch = intel_pre_bank_switch,
 	.post_bank_switch = intel_post_bank_switch,
 };
@@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
 		goto err_power_up;
 	}
 
-	intel_debugfs_init(sdw);
+	sdw_intel_debugfs_init(sdw);
 
 	/* start bus */
 	ret = intel_start_bus(sdw);
@@ -1685,7 +1688,7 @@ static void intel_link_remove(struct auxiliary_device *auxdev)
 	 * SDW_INTEL_CLK_STOP_NOT_ALLOWED
 	 */
 	if (!bus->prop.hw_disabled) {
-		intel_debugfs_exit(sdw);
+		sdw_intel_debugfs_exit(sdw);
 		sdw_cdns_enable_interrupt(cdns, false);
 	}
 	sdw_bus_master_delete(bus);
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 3170df76b4110..5548b8451d014 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -61,4 +61,20 @@ struct sdw_intel_link_dev {
 #define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \
 	container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev)
 
+#define SDW_INTEL_CHECK_OPS(sdw, cb)	((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \
+					 (sdw)->link_res->hw_ops->cb)
+#define SDW_INTEL_OPS(sdw, cb)		((sdw)->link_res->hw_ops->cb)
+
+static inline void sdw_intel_debugfs_init(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, debugfs_init))
+		SDW_INTEL_OPS(sdw, debugfs_init)(sdw);
+}
+
+static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, debugfs_exit))
+		SDW_INTEL_OPS(sdw, debugfs_exit)(sdw);
+}
+
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 2dbe34b41ef14..211924e4ebf21 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -297,10 +297,15 @@ irqreturn_t sdw_intel_thread(int irq, void *dev_id);
 struct sdw_intel;
 
 /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms.
+ * @debugfs_init: initialize all debugfs capabilities
+ * @debugfs_exit: close and cleanup debugfs capabilities
  * @pre_bank_switch: helper for bus management
  * @post_bank_switch: helper for bus management
  */
 struct sdw_intel_hw_ops {
+	void (*debugfs_init)(struct sdw_intel *sdw);
+	void (*debugfs_exit)(struct sdw_intel *sdw);
+
 	int (*pre_bank_switch)(struct sdw_intel *sdw);
 	int (*post_bank_switch)(struct sdw_intel *sdw);
 };
-- 
GitLab


From b6234bcc6589a0719ec91d810114c0b556a5b88b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:30 +0800
Subject: [PATCH 2113/4122] soundwire: intel: add register_dai callback in
 hw_ops

No functionality change, only add indirection for DAI registration
helper.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-4-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 4 +++-
 drivers/soundwire/intel.h           | 7 +++++++
 include/linux/soundwire/sdw_intel.h | 3 +++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 914f2fb43721a..0496eb0d60847 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1426,6 +1426,8 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
 	.debugfs_init = intel_debugfs_init,
 	.debugfs_exit = intel_debugfs_exit,
 
+	.register_dai = intel_register_dai,
+
 	.pre_bank_switch = intel_pre_bank_switch,
 	.post_bank_switch = intel_post_bank_switch,
 };
@@ -1611,7 +1613,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
 		goto err_init;
 
 	/* Register DAIs */
-	ret = intel_register_dai(sdw);
+	ret = sdw_intel_register_dai(sdw);
 	if (ret) {
 		dev_err(dev, "DAI registration failed: %d\n", ret);
 		goto err_power_up;
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 5548b8451d014..0521cab311a33 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -77,4 +77,11 @@ static inline void sdw_intel_debugfs_exit(struct sdw_intel *sdw)
 		SDW_INTEL_OPS(sdw, debugfs_exit)(sdw);
 }
 
+static inline int sdw_intel_register_dai(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, register_dai))
+		return SDW_INTEL_OPS(sdw, register_dai)(sdw);
+	return -ENOTSUPP;
+}
+
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 211924e4ebf21..5be63d4fe62ec 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -299,6 +299,7 @@ struct sdw_intel;
 /* struct intel_sdw_hw_ops - SoundWire ops for Intel platforms.
  * @debugfs_init: initialize all debugfs capabilities
  * @debugfs_exit: close and cleanup debugfs capabilities
+ * @register_dai: read all PDI information and register DAIs
  * @pre_bank_switch: helper for bus management
  * @post_bank_switch: helper for bus management
  */
@@ -306,6 +307,8 @@ struct sdw_intel_hw_ops {
 	void (*debugfs_init)(struct sdw_intel *sdw);
 	void (*debugfs_exit)(struct sdw_intel *sdw);
 
+	int (*register_dai)(struct sdw_intel *sdw);
+
 	int (*pre_bank_switch)(struct sdw_intel *sdw);
 	int (*post_bank_switch)(struct sdw_intel *sdw);
 };
-- 
GitLab


From 3db0c5a6a2832c7b4b40676299e4bbbe1a96bc8b Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:31 +0800
Subject: [PATCH 2114/4122] soundwire: intel: add bus management callbacks in
 hw_ops

No functionality change, only add indirection for bus management
helpers.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-5-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 24 ++++++++++++--------
 drivers/soundwire/intel.h           | 34 +++++++++++++++++++++++++++++
 include/linux/soundwire/sdw_intel.h | 11 ++++++++++
 3 files changed, 60 insertions(+), 9 deletions(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 0496eb0d60847..6d2fdf3a01fd8 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1428,6 +1428,12 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
 
 	.register_dai = intel_register_dai,
 
+	.check_clock_stop = intel_check_clock_stop,
+	.start_bus = intel_start_bus,
+	.start_bus_after_reset = intel_start_bus_after_reset,
+	.start_bus_after_clock_stop = intel_start_bus_after_clock_stop,
+	.stop_bus = intel_stop_bus,
+
 	.pre_bank_switch = intel_pre_bank_switch,
 	.post_bank_switch = intel_post_bank_switch,
 };
@@ -1622,7 +1628,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
 	sdw_intel_debugfs_init(sdw);
 
 	/* start bus */
-	ret = intel_start_bus(sdw);
+	ret = sdw_intel_start_bus(sdw);
 	if (ret) {
 		dev_err(dev, "bus start failed: %d\n", ret);
 		goto err_power_up;
@@ -1850,7 +1856,7 @@ static int __maybe_unused intel_suspend(struct device *dev)
 		return 0;
 	}
 
-	ret = intel_stop_bus(sdw, false);
+	ret = sdw_intel_stop_bus(sdw, false);
 	if (ret < 0) {
 		dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret);
 		return ret;
@@ -1876,14 +1882,14 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
 	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
 
 	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
-		ret = intel_stop_bus(sdw, false);
+		ret = sdw_intel_stop_bus(sdw, false);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot stop bus during teardown: %d\n",
 				__func__, ret);
 			return ret;
 		}
 	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) {
-		ret = intel_stop_bus(sdw, true);
+		ret = sdw_intel_stop_bus(sdw, true);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n",
 				__func__, ret);
@@ -1941,7 +1947,7 @@ static int __maybe_unused intel_resume(struct device *dev)
 	 */
 	sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
 
-	ret = intel_start_bus(sdw);
+	ret = sdw_intel_start_bus(sdw);
 	if (ret < 0) {
 		dev_err(dev, "cannot start bus during resume\n");
 		intel_link_power_down(sdw);
@@ -1995,7 +2001,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 		 */
 		sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
 
-		ret = intel_start_bus(sdw);
+		ret = sdw_intel_start_bus(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret);
 			intel_link_power_down(sdw);
@@ -2010,7 +2016,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 			return ret;
 		}
 
-		ret = intel_start_bus_after_reset(sdw);
+		ret = sdw_intel_start_bus_after_reset(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret);
 			intel_link_power_down(sdw);
@@ -2018,7 +2024,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 		}
 	} else if (!clock_stop_quirks) {
 
-		intel_check_clock_stop(sdw);
+		sdw_intel_check_clock_stop(sdw);
 
 		ret = intel_link_power_up(sdw);
 		if (ret) {
@@ -2026,7 +2032,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 			return ret;
 		}
 
-		ret = intel_start_bus_after_clock_stop(sdw);
+		ret = sdw_intel_start_bus_after_clock_stop(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret);
 			intel_link_power_down(sdw);
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 0521cab311a33..99a2d875a3316 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -84,4 +84,38 @@ static inline int sdw_intel_register_dai(struct sdw_intel *sdw)
 	return -ENOTSUPP;
 }
 
+static inline void sdw_intel_check_clock_stop(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, check_clock_stop))
+		SDW_INTEL_OPS(sdw, check_clock_stop)(sdw);
+}
+
+static inline int sdw_intel_start_bus(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, start_bus))
+		return SDW_INTEL_OPS(sdw, start_bus)(sdw);
+	return -ENOTSUPP;
+}
+
+static inline int sdw_intel_start_bus_after_reset(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_reset))
+		return SDW_INTEL_OPS(sdw, start_bus_after_reset)(sdw);
+	return -ENOTSUPP;
+}
+
+static inline int sdw_intel_start_bus_after_clock_stop(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, start_bus_after_clock_stop))
+		return SDW_INTEL_OPS(sdw, start_bus_after_clock_stop)(sdw);
+	return -ENOTSUPP;
+}
+
+static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, stop_bus))
+		return SDW_INTEL_OPS(sdw, stop_bus)(sdw, clock_stop);
+	return -ENOTSUPP;
+}
+
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 5be63d4fe62ec..cee61bc9af8ad 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -300,6 +300,11 @@ struct sdw_intel;
  * @debugfs_init: initialize all debugfs capabilities
  * @debugfs_exit: close and cleanup debugfs capabilities
  * @register_dai: read all PDI information and register DAIs
+ * @check_clock_stop: throw error message if clock is not stopped.
+ * @start_bus: normal start
+ * @start_bus_after_reset: start after reset
+ * @start_bus_after_clock_stop: start after mode0 clock stop
+ * @stop_bus: stop all bus
  * @pre_bank_switch: helper for bus management
  * @post_bank_switch: helper for bus management
  */
@@ -309,6 +314,12 @@ struct sdw_intel_hw_ops {
 
 	int (*register_dai)(struct sdw_intel *sdw);
 
+	void (*check_clock_stop)(struct sdw_intel *sdw);
+	int (*start_bus)(struct sdw_intel *sdw);
+	int (*start_bus_after_reset)(struct sdw_intel *sdw);
+	int (*start_bus_after_clock_stop)(struct sdw_intel *sdw);
+	int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop);
+
 	int (*pre_bank_switch)(struct sdw_intel *sdw);
 	int (*post_bank_switch)(struct sdw_intel *sdw);
 };
-- 
GitLab


From 49c9ff45991a5a62e040c8b43c89a9ab38a0a91f Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:32 +0800
Subject: [PATCH 2115/4122] soundwire: intel: add link power management
 callbacks in hw_ops

No functionality change, only add indirection for link power
management helpers.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-6-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 23 +++++++++++++----------
 drivers/soundwire/intel.h           | 14 ++++++++++++++
 include/linux/soundwire/sdw_intel.h |  5 +++++
 3 files changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 6d2fdf3a01fd8..2320f1b8a2d10 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1434,6 +1434,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
 	.start_bus_after_clock_stop = intel_start_bus_after_clock_stop,
 	.stop_bus = intel_stop_bus,
 
+	.link_power_up = intel_link_power_up,
+	.link_power_down = intel_link_power_down,
+
 	.pre_bank_switch = intel_pre_bank_switch,
 	.post_bank_switch = intel_post_bank_switch,
 };
@@ -1614,7 +1617,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
 	bus->multi_link = multi_link;
 
 	/* Initialize shim, controller */
-	ret = intel_link_power_up(sdw);
+	ret = sdw_intel_link_power_up(sdw);
 	if (ret)
 		goto err_init;
 
@@ -1679,7 +1682,7 @@ int intel_link_startup(struct auxiliary_device *auxdev)
 	return 0;
 
 err_power_up:
-	intel_link_power_down(sdw);
+	sdw_intel_link_power_down(sdw);
 err_init:
 	return ret;
 }
@@ -1935,7 +1938,7 @@ static int __maybe_unused intel_resume(struct device *dev)
 			pm_runtime_idle(dev);
 	}
 
-	ret = intel_link_power_up(sdw);
+	ret = sdw_intel_link_power_up(sdw);
 	if (ret) {
 		dev_err(dev, "%s failed: %d\n", __func__, ret);
 		return ret;
@@ -1950,7 +1953,7 @@ static int __maybe_unused intel_resume(struct device *dev)
 	ret = sdw_intel_start_bus(sdw);
 	if (ret < 0) {
 		dev_err(dev, "cannot start bus during resume\n");
-		intel_link_power_down(sdw);
+		sdw_intel_link_power_down(sdw);
 		return ret;
 	}
 
@@ -1989,7 +1992,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
 
 	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
-		ret = intel_link_power_up(sdw);
+		ret = sdw_intel_link_power_up(sdw);
 		if (ret) {
 			dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret);
 			return ret;
@@ -2004,13 +2007,13 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 		ret = sdw_intel_start_bus(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret);
-			intel_link_power_down(sdw);
+			sdw_intel_link_power_down(sdw);
 			return ret;
 		}
 
 
 	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) {
-		ret = intel_link_power_up(sdw);
+		ret = sdw_intel_link_power_up(sdw);
 		if (ret) {
 			dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret);
 			return ret;
@@ -2019,14 +2022,14 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 		ret = sdw_intel_start_bus_after_reset(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret);
-			intel_link_power_down(sdw);
+			sdw_intel_link_power_down(sdw);
 			return ret;
 		}
 	} else if (!clock_stop_quirks) {
 
 		sdw_intel_check_clock_stop(sdw);
 
-		ret = intel_link_power_up(sdw);
+		ret = sdw_intel_link_power_up(sdw);
 		if (ret) {
 			dev_err(dev, "%s: power_up failed: %d\n", __func__, ret);
 			return ret;
@@ -2035,7 +2038,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 		ret = sdw_intel_start_bus_after_clock_stop(sdw);
 		if (ret < 0) {
 			dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret);
-			intel_link_power_down(sdw);
+			sdw_intel_link_power_down(sdw);
 			return ret;
 		}
 	} else {
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 99a2d875a3316..0f63e75841321 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -118,4 +118,18 @@ static inline int sdw_intel_stop_bus(struct sdw_intel *sdw, bool clock_stop)
 	return -ENOTSUPP;
 }
 
+static inline int sdw_intel_link_power_up(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, link_power_up))
+		return SDW_INTEL_OPS(sdw, link_power_up)(sdw);
+	return -ENOTSUPP;
+}
+
+static inline int sdw_intel_link_power_down(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, link_power_down))
+		return SDW_INTEL_OPS(sdw, link_power_down)(sdw);
+	return -ENOTSUPP;
+}
+
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index cee61bc9af8ad..81430201b8b9d 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -305,6 +305,8 @@ struct sdw_intel;
  * @start_bus_after_reset: start after reset
  * @start_bus_after_clock_stop: start after mode0 clock stop
  * @stop_bus: stop all bus
+ * @link_power_up: power-up using chip-specific helpers
+ * @link_power_down: power-down with chip-specific helpers
  * @pre_bank_switch: helper for bus management
  * @post_bank_switch: helper for bus management
  */
@@ -320,6 +322,9 @@ struct sdw_intel_hw_ops {
 	int (*start_bus_after_clock_stop)(struct sdw_intel *sdw);
 	int (*stop_bus)(struct sdw_intel *sdw, bool clock_stop);
 
+	int (*link_power_up)(struct sdw_intel *sdw);
+	int (*link_power_down)(struct sdw_intel *sdw);
+
 	int (*pre_bank_switch)(struct sdw_intel *sdw);
 	int (*post_bank_switch)(struct sdw_intel *sdw);
 };
-- 
GitLab


From 36e3b385f35a33a10b792ec46350dd87d79e84dd Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:33 +0800
Subject: [PATCH 2116/4122] soundwire: intel: add in-band wake callbacks in
 hw_ops

No functionality change, only add indirection for in-band wake
management helpers.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-7-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/intel.c           | 11 +++++++----
 drivers/soundwire/intel.h           | 13 +++++++++++++
 include/linux/soundwire/sdw_intel.h |  5 +++++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index 2320f1b8a2d10..ea6479b4010de 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -1437,6 +1437,9 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
 	.link_power_up = intel_link_power_up,
 	.link_power_down = intel_link_power_down,
 
+	.shim_check_wake = intel_shim_check_wake,
+	.shim_wake = intel_shim_wake,
+
 	.pre_bank_switch = intel_pre_bank_switch,
 	.post_bank_switch = intel_post_bank_switch,
 };
@@ -1720,11 +1723,11 @@ int intel_link_process_wakeen_event(struct auxiliary_device *auxdev)
 		return 0;
 	}
 
-	if (!intel_shim_check_wake(sdw))
+	if (!sdw_intel_shim_check_wake(sdw))
 		return 0;
 
 	/* disable WAKEEN interrupt ASAP to prevent interrupt flood */
-	intel_shim_wake(sdw, false);
+	sdw_intel_shim_wake(sdw, false);
 
 	/*
 	 * resume the Master, which will generate a bus reset and result in
@@ -1852,7 +1855,7 @@ static int __maybe_unused intel_suspend(struct device *dev)
 				 */
 				dev_err(dev, "%s: invalid config: parent is suspended\n", __func__);
 			} else {
-				intel_shim_wake(sdw, false);
+				sdw_intel_shim_wake(sdw, false);
 			}
 		}
 
@@ -1987,7 +1990,7 @@ static int __maybe_unused intel_resume_runtime(struct device *dev)
 	}
 
 	/* unconditionally disable WAKEEN interrupt */
-	intel_shim_wake(sdw, false);
+	sdw_intel_shim_wake(sdw, false);
 
 	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
 
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 0f63e75841321..9ac3397757a07 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -132,4 +132,17 @@ static inline int sdw_intel_link_power_down(struct sdw_intel *sdw)
 	return -ENOTSUPP;
 }
 
+static inline int sdw_intel_shim_check_wake(struct sdw_intel *sdw)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, shim_check_wake))
+		return SDW_INTEL_OPS(sdw, shim_check_wake)(sdw);
+	return -ENOTSUPP;
+}
+
+static inline void sdw_intel_shim_wake(struct sdw_intel *sdw, bool wake_enable)
+{
+	if (SDW_INTEL_CHECK_OPS(sdw, shim_wake))
+		SDW_INTEL_OPS(sdw, shim_wake)(sdw, wake_enable);
+}
+
 #endif /* __SDW_INTEL_LOCAL_H */
diff --git a/include/linux/soundwire/sdw_intel.h b/include/linux/soundwire/sdw_intel.h
index 81430201b8b9d..0942cd4640953 100644
--- a/include/linux/soundwire/sdw_intel.h
+++ b/include/linux/soundwire/sdw_intel.h
@@ -307,6 +307,8 @@ struct sdw_intel;
  * @stop_bus: stop all bus
  * @link_power_up: power-up using chip-specific helpers
  * @link_power_down: power-down with chip-specific helpers
+ * @shim_check_wake: check if a wake was received
+ * @shim_wake: enable/disable in-band wake management
  * @pre_bank_switch: helper for bus management
  * @post_bank_switch: helper for bus management
  */
@@ -325,6 +327,9 @@ struct sdw_intel_hw_ops {
 	int (*link_power_up)(struct sdw_intel *sdw);
 	int (*link_power_down)(struct sdw_intel *sdw);
 
+	int  (*shim_check_wake)(struct sdw_intel *sdw);
+	void (*shim_wake)(struct sdw_intel *sdw, bool wake_enable);
+
 	int (*pre_bank_switch)(struct sdw_intel *sdw);
 	int (*post_bank_switch)(struct sdw_intel *sdw);
 };
-- 
GitLab


From 7cbf00bd4142cd88ac7ecbc4ea7b917a220cb721 Mon Sep 17 00:00:00 2001
From: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Date: Fri, 11 Nov 2022 09:31:34 +0800
Subject: [PATCH 2117/4122] soundwire: intel: split auxdevice to different file

The auxdevice layer is completely generic, it should be split from
intel.c which is only geared to the 'cnl' hw_ops now.

Signed-off-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Reviewed-by: Rander Wang <rander.wang@intel.com>
Signed-off-by: Bard Liao <yung-chuan.liao@linux.intel.com>
Link: https://lore.kernel.org/r/20221111013135.38289-8-yung-chuan.liao@linux.intel.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/soundwire/Makefile          |   2 +-
 drivers/soundwire/intel.c           | 657 ---------------------------
 drivers/soundwire/intel.h           |  11 +-
 drivers/soundwire/intel_auxdevice.c | 678 ++++++++++++++++++++++++++++
 drivers/soundwire/intel_auxdevice.h |  18 +
 drivers/soundwire/intel_init.c      |   1 +
 6 files changed, 700 insertions(+), 667 deletions(-)
 create mode 100644 drivers/soundwire/intel_auxdevice.c
 create mode 100644 drivers/soundwire/intel_auxdevice.h

diff --git a/drivers/soundwire/Makefile b/drivers/soundwire/Makefile
index 986776787b9ea..ca97414ada705 100644
--- a/drivers/soundwire/Makefile
+++ b/drivers/soundwire/Makefile
@@ -20,7 +20,7 @@ soundwire-cadence-y := cadence_master.o
 obj-$(CONFIG_SOUNDWIRE_CADENCE) += soundwire-cadence.o
 
 #Intel driver
-soundwire-intel-y :=	intel.o intel_init.o dmi-quirks.o
+soundwire-intel-y :=	intel.o intel_auxdevice.o intel_init.o dmi-quirks.o
 obj-$(CONFIG_SOUNDWIRE_INTEL) += soundwire-intel.o
 
 #Qualcomm driver
diff --git a/drivers/soundwire/intel.c b/drivers/soundwire/intel.c
index ea6479b4010de..bc9c50bacc494 100644
--- a/drivers/soundwire/intel.c
+++ b/drivers/soundwire/intel.c
@@ -8,10 +8,7 @@
 #include <linux/acpi.h>
 #include <linux/debugfs.h>
 #include <linux/delay.h>
-#include <linux/module.h>
-#include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/auxiliary_bus.h>
 #include <sound/pcm_params.h>
 #include <linux/pm_runtime.h>
 #include <sound/soc.h>
@@ -22,27 +19,6 @@
 #include "bus.h"
 #include "intel.h"
 
-/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */
-#define INTEL_DEV_NUM_IDA_MIN           4
-
-#define INTEL_MASTER_SUSPEND_DELAY_MS	3000
-#define INTEL_MASTER_RESET_ITERATIONS	10
-
-/*
- * debug/config flags for the Intel SoundWire Master.
- *
- * Since we may have multiple masters active, we can have up to 8
- * flags reused in each byte, with master0 using the ls-byte, etc.
- */
-
-#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME		BIT(0)
-#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP		BIT(1)
-#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE	BIT(2)
-#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK		BIT(3)
-
-static int md_flags;
-module_param_named(sdw_md_flags, md_flags, int, 0444);
-MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)");
 
 enum intel_pdi_type {
 	INTEL_PDI_IN = 0,
@@ -1445,636 +1421,3 @@ const struct sdw_intel_hw_ops sdw_intel_cnl_hw_ops = {
 };
 EXPORT_SYMBOL_NS(sdw_intel_cnl_hw_ops, SOUNDWIRE_INTEL);
 
-static int generic_pre_bank_switch(struct sdw_bus *bus)
-{
-	struct sdw_cdns *cdns = bus_to_cdns(bus);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-
-	return sdw->link_res->hw_ops->pre_bank_switch(sdw);
-}
-
-static int generic_post_bank_switch(struct sdw_bus *bus)
-{
-	struct sdw_cdns *cdns = bus_to_cdns(bus);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-
-	return sdw->link_res->hw_ops->post_bank_switch(sdw);
-}
-
-static int sdw_master_read_intel_prop(struct sdw_bus *bus)
-{
-	struct sdw_master_prop *prop = &bus->prop;
-	struct fwnode_handle *link;
-	char name[32];
-	u32 quirk_mask;
-
-	/* Find master handle */
-	snprintf(name, sizeof(name),
-		 "mipi-sdw-link-%d-subproperties", bus->link_id);
-
-	link = device_get_named_child_node(bus->dev, name);
-	if (!link) {
-		dev_err(bus->dev, "Master node %s not found\n", name);
-		return -EIO;
-	}
-
-	fwnode_property_read_u32(link,
-				 "intel-sdw-ip-clock",
-				 &prop->mclk_freq);
-
-	/* the values reported by BIOS are the 2x clock, not the bus clock */
-	prop->mclk_freq /= 2;
-
-	fwnode_property_read_u32(link,
-				 "intel-quirk-mask",
-				 &quirk_mask);
-
-	if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
-		prop->hw_disabled = true;
-
-	prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH |
-		SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY;
-
-	return 0;
-}
-
-static int intel_prop_read(struct sdw_bus *bus)
-{
-	/* Initialize with default handler to read all DisCo properties */
-	sdw_master_read_prop(bus);
-
-	/* read Intel-specific properties */
-	sdw_master_read_intel_prop(bus);
-
-	return 0;
-}
-
-static struct sdw_master_ops sdw_intel_ops = {
-	.read_prop = intel_prop_read,
-	.override_adr = sdw_dmi_override_adr,
-	.xfer_msg = cdns_xfer_msg,
-	.xfer_msg_defer = cdns_xfer_msg_defer,
-	.reset_page_addr = cdns_reset_page_addr,
-	.set_bus_conf = cdns_bus_conf,
-	.pre_bank_switch = generic_pre_bank_switch,
-	.post_bank_switch = generic_post_bank_switch,
-	.read_ping_status = cdns_read_ping_status,
-};
-
-/*
- * probe and init (aux_dev_id argument is required by function prototype but not used)
- */
-static int intel_link_probe(struct auxiliary_device *auxdev,
-			    const struct auxiliary_device_id *aux_dev_id)
-
-{
-	struct device *dev = &auxdev->dev;
-	struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev);
-	struct sdw_intel *sdw;
-	struct sdw_cdns *cdns;
-	struct sdw_bus *bus;
-	int ret;
-
-	sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL);
-	if (!sdw)
-		return -ENOMEM;
-
-	cdns = &sdw->cdns;
-	bus = &cdns->bus;
-
-	sdw->instance = auxdev->id;
-	sdw->link_res = &ldev->link_res;
-	cdns->dev = dev;
-	cdns->registers = sdw->link_res->registers;
-	cdns->instance = sdw->instance;
-	cdns->msg_count = 0;
-
-	bus->link_id = auxdev->id;
-	bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN;
-	bus->clk_stop_timeout = 1;
-
-	sdw_cdns_probe(cdns);
-
-	/* Set ops */
-	bus->ops = &sdw_intel_ops;
-
-	/* set driver data, accessed by snd_soc_dai_get_drvdata() */
-	auxiliary_set_drvdata(auxdev, cdns);
-
-	/* use generic bandwidth allocation algorithm */
-	sdw->cdns.bus.compute_params = sdw_compute_params;
-
-	/* avoid resuming from pm_runtime suspend if it's not required */
-	dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
-
-	ret = sdw_bus_master_add(bus, dev, dev->fwnode);
-	if (ret) {
-		dev_err(dev, "sdw_bus_master_add fail: %d\n", ret);
-		return ret;
-	}
-
-	if (bus->prop.hw_disabled)
-		dev_info(dev,
-			 "SoundWire master %d is disabled, will be ignored\n",
-			 bus->link_id);
-	/*
-	 * Ignore BIOS err_threshold, it's a really bad idea when dealing
-	 * with multiple hardware synchronized links
-	 */
-	bus->prop.err_threshold = 0;
-
-	return 0;
-}
-
-int intel_link_startup(struct auxiliary_device *auxdev)
-{
-	struct device *dev = &auxdev->dev;
-	struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	int link_flags;
-	bool multi_link;
-	u32 clock_stop_quirks;
-	int ret;
-
-	if (bus->prop.hw_disabled) {
-		dev_info(dev,
-			 "SoundWire master %d is disabled, ignoring\n",
-			 sdw->instance);
-		return 0;
-	}
-
-	link_flags = md_flags >> (bus->link_id * 8);
-	multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK);
-	if (!multi_link) {
-		dev_dbg(dev, "Multi-link is disabled\n");
-	} else {
-		/*
-		 * hardware-based synchronization is required regardless
-		 * of the number of segments used by a stream: SSP-based
-		 * synchronization is gated by gsync when the multi-master
-		 * mode is set.
-		 */
-		bus->hw_sync_min_links = 1;
-	}
-	bus->multi_link = multi_link;
-
-	/* Initialize shim, controller */
-	ret = sdw_intel_link_power_up(sdw);
-	if (ret)
-		goto err_init;
-
-	/* Register DAIs */
-	ret = sdw_intel_register_dai(sdw);
-	if (ret) {
-		dev_err(dev, "DAI registration failed: %d\n", ret);
-		goto err_power_up;
-	}
-
-	sdw_intel_debugfs_init(sdw);
-
-	/* start bus */
-	ret = sdw_intel_start_bus(sdw);
-	if (ret) {
-		dev_err(dev, "bus start failed: %d\n", ret);
-		goto err_power_up;
-	}
-
-	/* Enable runtime PM */
-	if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) {
-		pm_runtime_set_autosuspend_delay(dev,
-						 INTEL_MASTER_SUSPEND_DELAY_MS);
-		pm_runtime_use_autosuspend(dev);
-		pm_runtime_mark_last_busy(dev);
-
-		pm_runtime_set_active(dev);
-		pm_runtime_enable(dev);
-	}
-
-	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
-	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) {
-		/*
-		 * To keep the clock running we need to prevent
-		 * pm_runtime suspend from happening by increasing the
-		 * reference count.
-		 * This quirk is specified by the parent PCI device in
-		 * case of specific latency requirements. It will have
-		 * no effect if pm_runtime is disabled by the user via
-		 * a module parameter for testing purposes.
-		 */
-		pm_runtime_get_noresume(dev);
-	}
-
-	/*
-	 * The runtime PM status of Slave devices is "Unsupported"
-	 * until they report as ATTACHED. If they don't, e.g. because
-	 * there are no Slave devices populated or if the power-on is
-	 * delayed or dependent on a power switch, the Master will
-	 * remain active and prevent its parent from suspending.
-	 *
-	 * Conditionally force the pm_runtime core to re-evaluate the
-	 * Master status in the absence of any Slave activity. A quirk
-	 * is provided to e.g. deal with Slaves that may be powered on
-	 * with a delay. A more complete solution would require the
-	 * definition of Master properties.
-	 */
-	if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE))
-		pm_runtime_idle(dev);
-
-	sdw->startup_done = true;
-	return 0;
-
-err_power_up:
-	sdw_intel_link_power_down(sdw);
-err_init:
-	return ret;
-}
-
-static void intel_link_remove(struct auxiliary_device *auxdev)
-{
-	struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-
-	/*
-	 * Since pm_runtime is already disabled, we don't decrease
-	 * the refcount when the clock_stop_quirk is
-	 * SDW_INTEL_CLK_STOP_NOT_ALLOWED
-	 */
-	if (!bus->prop.hw_disabled) {
-		sdw_intel_debugfs_exit(sdw);
-		sdw_cdns_enable_interrupt(cdns, false);
-	}
-	sdw_bus_master_delete(bus);
-}
-
-int intel_link_process_wakeen_event(struct auxiliary_device *auxdev)
-{
-	struct device *dev = &auxdev->dev;
-	struct sdw_intel *sdw;
-	struct sdw_bus *bus;
-
-	sdw = auxiliary_get_drvdata(auxdev);
-	bus = &sdw->cdns.bus;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	if (!sdw_intel_shim_check_wake(sdw))
-		return 0;
-
-	/* disable WAKEEN interrupt ASAP to prevent interrupt flood */
-	sdw_intel_shim_wake(sdw, false);
-
-	/*
-	 * resume the Master, which will generate a bus reset and result in
-	 * Slaves re-attaching and be re-enumerated. The SoundWire physical
-	 * device which generated the wake will trigger an interrupt, which
-	 * will in turn cause the corresponding Linux Slave device to be
-	 * resumed and the Slave codec driver to check the status.
-	 */
-	pm_request_resume(dev);
-
-	return 0;
-}
-
-/*
- * PM calls
- */
-
-static int intel_resume_child_device(struct device *dev, void *data)
-{
-	int ret;
-	struct sdw_slave *slave = dev_to_sdw_dev(dev);
-
-	if (!slave->probed) {
-		dev_dbg(dev, "skipping device, no probed driver\n");
-		return 0;
-	}
-	if (!slave->dev_num_sticky) {
-		dev_dbg(dev, "skipping device, never detected on bus\n");
-		return 0;
-	}
-
-	ret = pm_request_resume(dev);
-	if (ret < 0)
-		dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
-
-	return ret;
-}
-
-static int __maybe_unused intel_pm_prepare(struct device *dev)
-{
-	struct sdw_cdns *cdns = dev_get_drvdata(dev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	u32 clock_stop_quirks;
-	int ret;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
-
-	if (pm_runtime_suspended(dev) &&
-	    pm_runtime_suspended(dev->parent) &&
-	    ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
-	     !clock_stop_quirks)) {
-		/*
-		 * if we've enabled clock stop, and the parent is suspended, the SHIM registers
-		 * are not accessible and the shim wake cannot be disabled.
-		 * The only solution is to resume the entire bus to full power
-		 */
-
-		/*
-		 * If any operation in this block fails, we keep going since we don't want
-		 * to prevent system suspend from happening and errors should be recoverable
-		 * on resume.
-		 */
-
-		/*
-		 * first resume the device for this link. This will also by construction
-		 * resume the PCI parent device.
-		 */
-		ret = pm_request_resume(dev);
-		if (ret < 0) {
-			dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
-			return 0;
-		}
-
-		/*
-		 * Continue resuming the entire bus (parent + child devices) to exit
-		 * the clock stop mode. If there are no devices connected on this link
-		 * this is a no-op.
-		 * The resume to full power could have been implemented with a .prepare
-		 * step in SoundWire codec drivers. This would however require a lot
-		 * of code to handle an Intel-specific corner case. It is simpler in
-		 * practice to add a loop at the link level.
-		 */
-		ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device);
-
-		if (ret < 0)
-			dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret);
-	}
-
-	return 0;
-}
-
-static int __maybe_unused intel_suspend(struct device *dev)
-{
-	struct sdw_cdns *cdns = dev_get_drvdata(dev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	u32 clock_stop_quirks;
-	int ret;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	if (pm_runtime_suspended(dev)) {
-		dev_dbg(dev, "pm_runtime status: suspended\n");
-
-		clock_stop_quirks = sdw->link_res->clock_stop_quirks;
-
-		if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
-		    !clock_stop_quirks) {
-
-			if (pm_runtime_suspended(dev->parent)) {
-				/*
-				 * paranoia check: this should not happen with the .prepare
-				 * resume to full power
-				 */
-				dev_err(dev, "%s: invalid config: parent is suspended\n", __func__);
-			} else {
-				sdw_intel_shim_wake(sdw, false);
-			}
-		}
-
-		return 0;
-	}
-
-	ret = sdw_intel_stop_bus(sdw, false);
-	if (ret < 0) {
-		dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret);
-		return ret;
-	}
-
-	return 0;
-}
-
-static int __maybe_unused intel_suspend_runtime(struct device *dev)
-{
-	struct sdw_cdns *cdns = dev_get_drvdata(dev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	u32 clock_stop_quirks;
-	int ret;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
-
-	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
-		ret = sdw_intel_stop_bus(sdw, false);
-		if (ret < 0) {
-			dev_err(dev, "%s: cannot stop bus during teardown: %d\n",
-				__func__, ret);
-			return ret;
-		}
-	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) {
-		ret = sdw_intel_stop_bus(sdw, true);
-		if (ret < 0) {
-			dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n",
-				__func__, ret);
-			return ret;
-		}
-	} else {
-		dev_err(dev, "%s clock_stop_quirks %x unsupported\n",
-			__func__, clock_stop_quirks);
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static int __maybe_unused intel_resume(struct device *dev)
-{
-	struct sdw_cdns *cdns = dev_get_drvdata(dev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	int link_flags;
-	int ret;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	link_flags = md_flags >> (bus->link_id * 8);
-
-	if (pm_runtime_suspended(dev)) {
-		dev_dbg(dev, "pm_runtime status was suspended, forcing active\n");
-
-		/* follow required sequence from runtime_pm.rst */
-		pm_runtime_disable(dev);
-		pm_runtime_set_active(dev);
-		pm_runtime_mark_last_busy(dev);
-		pm_runtime_enable(dev);
-
-		link_flags = md_flags >> (bus->link_id * 8);
-
-		if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE))
-			pm_runtime_idle(dev);
-	}
-
-	ret = sdw_intel_link_power_up(sdw);
-	if (ret) {
-		dev_err(dev, "%s failed: %d\n", __func__, ret);
-		return ret;
-	}
-
-	/*
-	 * make sure all Slaves are tagged as UNATTACHED and provide
-	 * reason for reinitialization
-	 */
-	sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
-
-	ret = sdw_intel_start_bus(sdw);
-	if (ret < 0) {
-		dev_err(dev, "cannot start bus during resume\n");
-		sdw_intel_link_power_down(sdw);
-		return ret;
-	}
-
-	/*
-	 * after system resume, the pm_runtime suspend() may kick in
-	 * during the enumeration, before any children device force the
-	 * master device to remain active.  Using pm_runtime_get()
-	 * routines is not really possible, since it'd prevent the
-	 * master from suspending.
-	 * A reasonable compromise is to update the pm_runtime
-	 * counters and delay the pm_runtime suspend by several
-	 * seconds, by when all enumeration should be complete.
-	 */
-	pm_runtime_mark_last_busy(dev);
-
-	return 0;
-}
-
-static int __maybe_unused intel_resume_runtime(struct device *dev)
-{
-	struct sdw_cdns *cdns = dev_get_drvdata(dev);
-	struct sdw_intel *sdw = cdns_to_intel(cdns);
-	struct sdw_bus *bus = &cdns->bus;
-	u32 clock_stop_quirks;
-	int ret;
-
-	if (bus->prop.hw_disabled || !sdw->startup_done) {
-		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
-			bus->link_id);
-		return 0;
-	}
-
-	/* unconditionally disable WAKEEN interrupt */
-	sdw_intel_shim_wake(sdw, false);
-
-	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
-
-	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
-		ret = sdw_intel_link_power_up(sdw);
-		if (ret) {
-			dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret);
-			return ret;
-		}
-
-		/*
-		 * make sure all Slaves are tagged as UNATTACHED and provide
-		 * reason for reinitialization
-		 */
-		sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
-
-		ret = sdw_intel_start_bus(sdw);
-		if (ret < 0) {
-			dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret);
-			sdw_intel_link_power_down(sdw);
-			return ret;
-		}
-
-
-	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) {
-		ret = sdw_intel_link_power_up(sdw);
-		if (ret) {
-			dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret);
-			return ret;
-		}
-
-		ret = sdw_intel_start_bus_after_reset(sdw);
-		if (ret < 0) {
-			dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret);
-			sdw_intel_link_power_down(sdw);
-			return ret;
-		}
-	} else if (!clock_stop_quirks) {
-
-		sdw_intel_check_clock_stop(sdw);
-
-		ret = sdw_intel_link_power_up(sdw);
-		if (ret) {
-			dev_err(dev, "%s: power_up failed: %d\n", __func__, ret);
-			return ret;
-		}
-
-		ret = sdw_intel_start_bus_after_clock_stop(sdw);
-		if (ret < 0) {
-			dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret);
-			sdw_intel_link_power_down(sdw);
-			return ret;
-		}
-	} else {
-		dev_err(dev, "%s: clock_stop_quirks %x unsupported\n",
-			__func__, clock_stop_quirks);
-		ret = -EINVAL;
-	}
-
-	return ret;
-}
-
-static const struct dev_pm_ops intel_pm = {
-	.prepare = intel_pm_prepare,
-	SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume)
-	SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL)
-};
-
-static const struct auxiliary_device_id intel_link_id_table[] = {
-	{ .name = "soundwire_intel.link" },
-	{},
-};
-MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table);
-
-static struct auxiliary_driver sdw_intel_drv = {
-	.probe = intel_link_probe,
-	.remove = intel_link_remove,
-	.driver = {
-		/* auxiliary_driver_register() sets .name to be the modname */
-		.pm = &intel_pm,
-	},
-	.id_table = intel_link_id_table
-};
-module_auxiliary_driver(sdw_intel_drv);
-
-MODULE_LICENSE("Dual BSD/GPL");
-MODULE_DESCRIPTION("Intel Soundwire Link Driver");
diff --git a/drivers/soundwire/intel.h b/drivers/soundwire/intel.h
index 9ac3397757a07..de9883313c8f2 100644
--- a/drivers/soundwire/intel.h
+++ b/drivers/soundwire/intel.h
@@ -50,16 +50,9 @@ struct sdw_intel {
 #endif
 };
 
-int intel_link_startup(struct auxiliary_device *auxdev);
-int intel_link_process_wakeen_event(struct auxiliary_device *auxdev);
+#define cdns_to_intel(_cdns) container_of(_cdns, struct sdw_intel, cdns)
 
-struct sdw_intel_link_dev {
-	struct auxiliary_device auxdev;
-	struct sdw_intel_link_res link_res;
-};
-
-#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \
-	container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev)
+#define INTEL_MASTER_RESET_ITERATIONS	10
 
 #define SDW_INTEL_CHECK_OPS(sdw, cb)	((sdw) && (sdw)->link_res && (sdw)->link_res->hw_ops && \
 					 (sdw)->link_res->hw_ops->cb)
diff --git a/drivers/soundwire/intel_auxdevice.c b/drivers/soundwire/intel_auxdevice.c
new file mode 100644
index 0000000000000..96c6b2112feb6
--- /dev/null
+++ b/drivers/soundwire/intel_auxdevice.c
@@ -0,0 +1,678 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2015-22 Intel Corporation.
+
+/*
+ * Soundwire Intel Manager Driver
+ */
+
+#include <linux/acpi.h>
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/auxiliary_bus.h>
+#include <sound/pcm_params.h>
+#include <linux/pm_runtime.h>
+#include <sound/soc.h>
+#include <linux/soundwire/sdw_registers.h>
+#include <linux/soundwire/sdw.h>
+#include <linux/soundwire/sdw_intel.h>
+#include "cadence_master.h"
+#include "bus.h"
+#include "intel.h"
+#include "intel_auxdevice.h"
+
+/* IDA min selected to avoid conflicts with HDaudio/iDISP SDI values */
+#define INTEL_DEV_NUM_IDA_MIN           4
+
+#define INTEL_MASTER_SUSPEND_DELAY_MS	3000
+
+/*
+ * debug/config flags for the Intel SoundWire Master.
+ *
+ * Since we may have multiple masters active, we can have up to 8
+ * flags reused in each byte, with master0 using the ls-byte, etc.
+ */
+
+#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME		BIT(0)
+#define SDW_INTEL_MASTER_DISABLE_CLOCK_STOP		BIT(1)
+#define SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE	BIT(2)
+#define SDW_INTEL_MASTER_DISABLE_MULTI_LINK		BIT(3)
+
+static int md_flags;
+module_param_named(sdw_md_flags, md_flags, int, 0444);
+MODULE_PARM_DESC(sdw_md_flags, "SoundWire Intel Master device flags (0x0 all off)");
+
+static int generic_pre_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+
+	return sdw->link_res->hw_ops->pre_bank_switch(sdw);
+}
+
+static int generic_post_bank_switch(struct sdw_bus *bus)
+{
+	struct sdw_cdns *cdns = bus_to_cdns(bus);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+
+	return sdw->link_res->hw_ops->post_bank_switch(sdw);
+}
+
+static int sdw_master_read_intel_prop(struct sdw_bus *bus)
+{
+	struct sdw_master_prop *prop = &bus->prop;
+	struct fwnode_handle *link;
+	char name[32];
+	u32 quirk_mask;
+
+	/* Find master handle */
+	snprintf(name, sizeof(name),
+		 "mipi-sdw-link-%d-subproperties", bus->link_id);
+
+	link = device_get_named_child_node(bus->dev, name);
+	if (!link) {
+		dev_err(bus->dev, "Master node %s not found\n", name);
+		return -EIO;
+	}
+
+	fwnode_property_read_u32(link,
+				 "intel-sdw-ip-clock",
+				 &prop->mclk_freq);
+
+	/* the values reported by BIOS are the 2x clock, not the bus clock */
+	prop->mclk_freq /= 2;
+
+	fwnode_property_read_u32(link,
+				 "intel-quirk-mask",
+				 &quirk_mask);
+
+	if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+		prop->hw_disabled = true;
+
+	prop->quirks = SDW_MASTER_QUIRKS_CLEAR_INITIAL_CLASH |
+		SDW_MASTER_QUIRKS_CLEAR_INITIAL_PARITY;
+
+	return 0;
+}
+
+static int intel_prop_read(struct sdw_bus *bus)
+{
+	/* Initialize with default handler to read all DisCo properties */
+	sdw_master_read_prop(bus);
+
+	/* read Intel-specific properties */
+	sdw_master_read_intel_prop(bus);
+
+	return 0;
+}
+
+static struct sdw_master_ops sdw_intel_ops = {
+	.read_prop = intel_prop_read,
+	.override_adr = sdw_dmi_override_adr,
+	.xfer_msg = cdns_xfer_msg,
+	.xfer_msg_defer = cdns_xfer_msg_defer,
+	.reset_page_addr = cdns_reset_page_addr,
+	.set_bus_conf = cdns_bus_conf,
+	.pre_bank_switch = generic_pre_bank_switch,
+	.post_bank_switch = generic_post_bank_switch,
+	.read_ping_status = cdns_read_ping_status,
+};
+
+/*
+ * probe and init (aux_dev_id argument is required by function prototype but not used)
+ */
+static int intel_link_probe(struct auxiliary_device *auxdev,
+			    const struct auxiliary_device_id *aux_dev_id)
+
+{
+	struct device *dev = &auxdev->dev;
+	struct sdw_intel_link_dev *ldev = auxiliary_dev_to_sdw_intel_link_dev(auxdev);
+	struct sdw_intel *sdw;
+	struct sdw_cdns *cdns;
+	struct sdw_bus *bus;
+	int ret;
+
+	sdw = devm_kzalloc(dev, sizeof(*sdw), GFP_KERNEL);
+	if (!sdw)
+		return -ENOMEM;
+
+	cdns = &sdw->cdns;
+	bus = &cdns->bus;
+
+	sdw->instance = auxdev->id;
+	sdw->link_res = &ldev->link_res;
+	cdns->dev = dev;
+	cdns->registers = sdw->link_res->registers;
+	cdns->instance = sdw->instance;
+	cdns->msg_count = 0;
+
+	bus->link_id = auxdev->id;
+	bus->dev_num_ida_min = INTEL_DEV_NUM_IDA_MIN;
+	bus->clk_stop_timeout = 1;
+
+	sdw_cdns_probe(cdns);
+
+	/* Set ops */
+	bus->ops = &sdw_intel_ops;
+
+	/* set driver data, accessed by snd_soc_dai_get_drvdata() */
+	auxiliary_set_drvdata(auxdev, cdns);
+
+	/* use generic bandwidth allocation algorithm */
+	sdw->cdns.bus.compute_params = sdw_compute_params;
+
+	/* avoid resuming from pm_runtime suspend if it's not required */
+	dev_pm_set_driver_flags(dev, DPM_FLAG_SMART_SUSPEND);
+
+	ret = sdw_bus_master_add(bus, dev, dev->fwnode);
+	if (ret) {
+		dev_err(dev, "sdw_bus_master_add fail: %d\n", ret);
+		return ret;
+	}
+
+	if (bus->prop.hw_disabled)
+		dev_info(dev,
+			 "SoundWire master %d is disabled, will be ignored\n",
+			 bus->link_id);
+	/*
+	 * Ignore BIOS err_threshold, it's a really bad idea when dealing
+	 * with multiple hardware synchronized links
+	 */
+	bus->prop.err_threshold = 0;
+
+	return 0;
+}
+
+int intel_link_startup(struct auxiliary_device *auxdev)
+{
+	struct device *dev = &auxdev->dev;
+	struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	int link_flags;
+	bool multi_link;
+	u32 clock_stop_quirks;
+	int ret;
+
+	if (bus->prop.hw_disabled) {
+		dev_info(dev,
+			 "SoundWire master %d is disabled, ignoring\n",
+			 sdw->instance);
+		return 0;
+	}
+
+	link_flags = md_flags >> (bus->link_id * 8);
+	multi_link = !(link_flags & SDW_INTEL_MASTER_DISABLE_MULTI_LINK);
+	if (!multi_link) {
+		dev_dbg(dev, "Multi-link is disabled\n");
+	} else {
+		/*
+		 * hardware-based synchronization is required regardless
+		 * of the number of segments used by a stream: SSP-based
+		 * synchronization is gated by gsync when the multi-master
+		 * mode is set.
+		 */
+		bus->hw_sync_min_links = 1;
+	}
+	bus->multi_link = multi_link;
+
+	/* Initialize shim, controller */
+	ret = sdw_intel_link_power_up(sdw);
+	if (ret)
+		goto err_init;
+
+	/* Register DAIs */
+	ret = sdw_intel_register_dai(sdw);
+	if (ret) {
+		dev_err(dev, "DAI registration failed: %d\n", ret);
+		goto err_power_up;
+	}
+
+	sdw_intel_debugfs_init(sdw);
+
+	/* start bus */
+	ret = sdw_intel_start_bus(sdw);
+	if (ret) {
+		dev_err(dev, "bus start failed: %d\n", ret);
+		goto err_power_up;
+	}
+
+	/* Enable runtime PM */
+	if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME)) {
+		pm_runtime_set_autosuspend_delay(dev,
+						 INTEL_MASTER_SUSPEND_DELAY_MS);
+		pm_runtime_use_autosuspend(dev);
+		pm_runtime_mark_last_busy(dev);
+
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
+	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_NOT_ALLOWED) {
+		/*
+		 * To keep the clock running we need to prevent
+		 * pm_runtime suspend from happening by increasing the
+		 * reference count.
+		 * This quirk is specified by the parent PCI device in
+		 * case of specific latency requirements. It will have
+		 * no effect if pm_runtime is disabled by the user via
+		 * a module parameter for testing purposes.
+		 */
+		pm_runtime_get_noresume(dev);
+	}
+
+	/*
+	 * The runtime PM status of Slave devices is "Unsupported"
+	 * until they report as ATTACHED. If they don't, e.g. because
+	 * there are no Slave devices populated or if the power-on is
+	 * delayed or dependent on a power switch, the Master will
+	 * remain active and prevent its parent from suspending.
+	 *
+	 * Conditionally force the pm_runtime core to re-evaluate the
+	 * Master status in the absence of any Slave activity. A quirk
+	 * is provided to e.g. deal with Slaves that may be powered on
+	 * with a delay. A more complete solution would require the
+	 * definition of Master properties.
+	 */
+	if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE))
+		pm_runtime_idle(dev);
+
+	sdw->startup_done = true;
+	return 0;
+
+err_power_up:
+	sdw_intel_link_power_down(sdw);
+err_init:
+	return ret;
+}
+
+static void intel_link_remove(struct auxiliary_device *auxdev)
+{
+	struct sdw_cdns *cdns = auxiliary_get_drvdata(auxdev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+
+	/*
+	 * Since pm_runtime is already disabled, we don't decrease
+	 * the refcount when the clock_stop_quirk is
+	 * SDW_INTEL_CLK_STOP_NOT_ALLOWED
+	 */
+	if (!bus->prop.hw_disabled) {
+		sdw_intel_debugfs_exit(sdw);
+		sdw_cdns_enable_interrupt(cdns, false);
+	}
+	sdw_bus_master_delete(bus);
+}
+
+int intel_link_process_wakeen_event(struct auxiliary_device *auxdev)
+{
+	struct device *dev = &auxdev->dev;
+	struct sdw_intel *sdw;
+	struct sdw_bus *bus;
+
+	sdw = auxiliary_get_drvdata(auxdev);
+	bus = &sdw->cdns.bus;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	if (!sdw_intel_shim_check_wake(sdw))
+		return 0;
+
+	/* disable WAKEEN interrupt ASAP to prevent interrupt flood */
+	sdw_intel_shim_wake(sdw, false);
+
+	/*
+	 * resume the Master, which will generate a bus reset and result in
+	 * Slaves re-attaching and be re-enumerated. The SoundWire physical
+	 * device which generated the wake will trigger an interrupt, which
+	 * will in turn cause the corresponding Linux Slave device to be
+	 * resumed and the Slave codec driver to check the status.
+	 */
+	pm_request_resume(dev);
+
+	return 0;
+}
+
+/*
+ * PM calls
+ */
+
+static int intel_resume_child_device(struct device *dev, void *data)
+{
+	int ret;
+	struct sdw_slave *slave = dev_to_sdw_dev(dev);
+
+	if (!slave->probed) {
+		dev_dbg(dev, "skipping device, no probed driver\n");
+		return 0;
+	}
+	if (!slave->dev_num_sticky) {
+		dev_dbg(dev, "skipping device, never detected on bus\n");
+		return 0;
+	}
+
+	ret = pm_request_resume(dev);
+	if (ret < 0)
+		dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
+
+	return ret;
+}
+
+static int __maybe_unused intel_pm_prepare(struct device *dev)
+{
+	struct sdw_cdns *cdns = dev_get_drvdata(dev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	u32 clock_stop_quirks;
+	int ret;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+
+	if (pm_runtime_suspended(dev) &&
+	    pm_runtime_suspended(dev->parent) &&
+	    ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
+	     !clock_stop_quirks)) {
+		/*
+		 * if we've enabled clock stop, and the parent is suspended, the SHIM registers
+		 * are not accessible and the shim wake cannot be disabled.
+		 * The only solution is to resume the entire bus to full power
+		 */
+
+		/*
+		 * If any operation in this block fails, we keep going since we don't want
+		 * to prevent system suspend from happening and errors should be recoverable
+		 * on resume.
+		 */
+
+		/*
+		 * first resume the device for this link. This will also by construction
+		 * resume the PCI parent device.
+		 */
+		ret = pm_request_resume(dev);
+		if (ret < 0) {
+			dev_err(dev, "%s: pm_request_resume failed: %d\n", __func__, ret);
+			return 0;
+		}
+
+		/*
+		 * Continue resuming the entire bus (parent + child devices) to exit
+		 * the clock stop mode. If there are no devices connected on this link
+		 * this is a no-op.
+		 * The resume to full power could have been implemented with a .prepare
+		 * step in SoundWire codec drivers. This would however require a lot
+		 * of code to handle an Intel-specific corner case. It is simpler in
+		 * practice to add a loop at the link level.
+		 */
+		ret = device_for_each_child(bus->dev, NULL, intel_resume_child_device);
+
+		if (ret < 0)
+			dev_err(dev, "%s: intel_resume_child_device failed: %d\n", __func__, ret);
+	}
+
+	return 0;
+}
+
+static int __maybe_unused intel_suspend(struct device *dev)
+{
+	struct sdw_cdns *cdns = dev_get_drvdata(dev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	u32 clock_stop_quirks;
+	int ret;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	if (pm_runtime_suspended(dev)) {
+		dev_dbg(dev, "pm_runtime status: suspended\n");
+
+		clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+
+		if ((clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) ||
+		    !clock_stop_quirks) {
+
+			if (pm_runtime_suspended(dev->parent)) {
+				/*
+				 * paranoia check: this should not happen with the .prepare
+				 * resume to full power
+				 */
+				dev_err(dev, "%s: invalid config: parent is suspended\n", __func__);
+			} else {
+				sdw_intel_shim_wake(sdw, false);
+			}
+		}
+
+		return 0;
+	}
+
+	ret = sdw_intel_stop_bus(sdw, false);
+	if (ret < 0) {
+		dev_err(dev, "%s: cannot stop bus: %d\n", __func__, ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __maybe_unused intel_suspend_runtime(struct device *dev)
+{
+	struct sdw_cdns *cdns = dev_get_drvdata(dev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	u32 clock_stop_quirks;
+	int ret;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+
+	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
+		ret = sdw_intel_stop_bus(sdw, false);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot stop bus during teardown: %d\n",
+				__func__, ret);
+			return ret;
+		}
+	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET || !clock_stop_quirks) {
+		ret = sdw_intel_stop_bus(sdw, true);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot stop bus during clock_stop: %d\n",
+				__func__, ret);
+			return ret;
+		}
+	} else {
+		dev_err(dev, "%s clock_stop_quirks %x unsupported\n",
+			__func__, clock_stop_quirks);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int __maybe_unused intel_resume(struct device *dev)
+{
+	struct sdw_cdns *cdns = dev_get_drvdata(dev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	int link_flags;
+	int ret;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	link_flags = md_flags >> (bus->link_id * 8);
+
+	if (pm_runtime_suspended(dev)) {
+		dev_dbg(dev, "pm_runtime status was suspended, forcing active\n");
+
+		/* follow required sequence from runtime_pm.rst */
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_mark_last_busy(dev);
+		pm_runtime_enable(dev);
+
+		link_flags = md_flags >> (bus->link_id * 8);
+
+		if (!(link_flags & SDW_INTEL_MASTER_DISABLE_PM_RUNTIME_IDLE))
+			pm_runtime_idle(dev);
+	}
+
+	ret = sdw_intel_link_power_up(sdw);
+	if (ret) {
+		dev_err(dev, "%s failed: %d\n", __func__, ret);
+		return ret;
+	}
+
+	/*
+	 * make sure all Slaves are tagged as UNATTACHED and provide
+	 * reason for reinitialization
+	 */
+	sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
+
+	ret = sdw_intel_start_bus(sdw);
+	if (ret < 0) {
+		dev_err(dev, "cannot start bus during resume\n");
+		sdw_intel_link_power_down(sdw);
+		return ret;
+	}
+
+	/*
+	 * after system resume, the pm_runtime suspend() may kick in
+	 * during the enumeration, before any children device force the
+	 * master device to remain active.  Using pm_runtime_get()
+	 * routines is not really possible, since it'd prevent the
+	 * master from suspending.
+	 * A reasonable compromise is to update the pm_runtime
+	 * counters and delay the pm_runtime suspend by several
+	 * seconds, by when all enumeration should be complete.
+	 */
+	pm_runtime_mark_last_busy(dev);
+
+	return 0;
+}
+
+static int __maybe_unused intel_resume_runtime(struct device *dev)
+{
+	struct sdw_cdns *cdns = dev_get_drvdata(dev);
+	struct sdw_intel *sdw = cdns_to_intel(cdns);
+	struct sdw_bus *bus = &cdns->bus;
+	u32 clock_stop_quirks;
+	int ret;
+
+	if (bus->prop.hw_disabled || !sdw->startup_done) {
+		dev_dbg(dev, "SoundWire master %d is disabled or not-started, ignoring\n",
+			bus->link_id);
+		return 0;
+	}
+
+	/* unconditionally disable WAKEEN interrupt */
+	sdw_intel_shim_wake(sdw, false);
+
+	clock_stop_quirks = sdw->link_res->clock_stop_quirks;
+
+	if (clock_stop_quirks & SDW_INTEL_CLK_STOP_TEARDOWN) {
+		ret = sdw_intel_link_power_up(sdw);
+		if (ret) {
+			dev_err(dev, "%s: power_up failed after teardown: %d\n", __func__, ret);
+			return ret;
+		}
+
+		/*
+		 * make sure all Slaves are tagged as UNATTACHED and provide
+		 * reason for reinitialization
+		 */
+		sdw_clear_slave_status(bus, SDW_UNATTACH_REQUEST_MASTER_RESET);
+
+		ret = sdw_intel_start_bus(sdw);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot start bus after teardown: %d\n", __func__, ret);
+			sdw_intel_link_power_down(sdw);
+			return ret;
+		}
+
+	} else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET) {
+		ret = sdw_intel_link_power_up(sdw);
+		if (ret) {
+			dev_err(dev, "%s: power_up failed after bus reset: %d\n", __func__, ret);
+			return ret;
+		}
+
+		ret = sdw_intel_start_bus_after_reset(sdw);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot start bus after reset: %d\n", __func__, ret);
+			sdw_intel_link_power_down(sdw);
+			return ret;
+		}
+	} else if (!clock_stop_quirks) {
+
+		sdw_intel_check_clock_stop(sdw);
+
+		ret = sdw_intel_link_power_up(sdw);
+		if (ret) {
+			dev_err(dev, "%s: power_up failed: %d\n", __func__, ret);
+			return ret;
+		}
+
+		ret = sdw_intel_start_bus_after_clock_stop(sdw);
+		if (ret < 0) {
+			dev_err(dev, "%s: cannot start bus after clock stop: %d\n", __func__, ret);
+			sdw_intel_link_power_down(sdw);
+			return ret;
+		}
+	} else {
+		dev_err(dev, "%s: clock_stop_quirks %x unsupported\n",
+			__func__, clock_stop_quirks);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static const struct dev_pm_ops intel_pm = {
+	.prepare = intel_pm_prepare,
+	SET_SYSTEM_SLEEP_PM_OPS(intel_suspend, intel_resume)
+	SET_RUNTIME_PM_OPS(intel_suspend_runtime, intel_resume_runtime, NULL)
+};
+
+static const struct auxiliary_device_id intel_link_id_table[] = {
+	{ .name = "soundwire_intel.link" },
+	{},
+};
+MODULE_DEVICE_TABLE(auxiliary, intel_link_id_table);
+
+static struct auxiliary_driver sdw_intel_drv = {
+	.probe = intel_link_probe,
+	.remove = intel_link_remove,
+	.driver = {
+		/* auxiliary_driver_register() sets .name to be the modname */
+		.pm = &intel_pm,
+	},
+	.id_table = intel_link_id_table
+};
+module_auxiliary_driver(sdw_intel_drv);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Intel Soundwire Link Driver");
diff --git a/drivers/soundwire/intel_auxdevice.h b/drivers/soundwire/intel_auxdevice.h
new file mode 100644
index 0000000000000..a00ecde955633
--- /dev/null
+++ b/drivers/soundwire/intel_auxdevice.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* Copyright(c) 2015-2022 Intel Corporation. */
+
+#ifndef __SDW_INTEL_AUXDEVICE_H
+#define __SDW_INTEL_AUXDEVICE_H
+
+int intel_link_startup(struct auxiliary_device *auxdev);
+int intel_link_process_wakeen_event(struct auxiliary_device *auxdev);
+
+struct sdw_intel_link_dev {
+	struct auxiliary_device auxdev;
+	struct sdw_intel_link_res link_res;
+};
+
+#define auxiliary_dev_to_sdw_intel_link_dev(auxiliary_dev) \
+	container_of(auxiliary_dev, struct sdw_intel_link_dev, auxdev)
+
+#endif /* __SDW_INTEL_AUXDEVICE_H */
diff --git a/drivers/soundwire/intel_init.c b/drivers/soundwire/intel_init.c
index 1e6d74b3e7732..b3a8db7c3b776 100644
--- a/drivers/soundwire/intel_init.c
+++ b/drivers/soundwire/intel_init.c
@@ -17,6 +17,7 @@
 #include <linux/soundwire/sdw_intel.h>
 #include "cadence_master.h"
 #include "intel.h"
+#include "intel_auxdevice.h"
 
 static void intel_link_dev_release(struct device *dev)
 {
-- 
GitLab


From 56fb517775f4d71dbca2b1fb3562276138361072 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Fri, 18 Nov 2022 15:08:33 +0200
Subject: [PATCH 2118/4122] habanalabs: fix rc when new CPUCP opcodes are not
 supported

When the new CPUCP opcodes are not supported and a CPUCP packet fails,
the return value is the F/W error resposone which is a positive value.
If this packet is sent from IOCTL and the positive value is used, the
ICOTL will not be considered as unsuccessful.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index c0909d76d6eb2..cf8147e438335 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -324,6 +324,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
 
 		if (!prop->supports_advanced_cpucp_rc) {
 			dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
+			rc = -EIO;
 			goto scrub_descriptor;
 		}
 
-- 
GitLab


From 0abcae8b48850e0f488d0eb7232323d93bdc4b13 Mon Sep 17 00:00:00 2001
From: Dafna Hirschfeld <dhirschfeld@habana.ai>
Date: Sun, 30 Oct 2022 16:49:42 +0200
Subject: [PATCH 2119/4122] habanalabs: add RMWREG32_SHIFTED to set a val
 within a mask

This is similar to RMWREG32, but the given 'val' is already shifted
according to the mask.
This allows several 'ORed' vals and masks to be set at once
The patch also fixes wrong usage of RMWREG32 by replacing
it with RMWREG32_SHIFTED

Signed-off-by: Dafna Hirschfeld <dhirschfeld@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/habanalabs.h | 10 +++-------
 drivers/misc/habanalabs/gaudi2/gaudi2.c     |  6 +++---
 2 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index e7f89868428d6..0329a0980bb72 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -2498,13 +2498,9 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
 #define WREG32_AND(reg, and) WREG32_P(reg, 0, and)
 #define WREG32_OR(reg, or) WREG32_P(reg, or, ~(or))
 
-#define RMWREG32(reg, val, mask)				\
-	do {							\
-		u32 tmp_ = RREG32(reg);				\
-		tmp_ &= ~(mask);				\
-		tmp_ |= ((val) << __ffs(mask));			\
-		WREG32(reg, tmp_);				\
-	} while (0)
+#define RMWREG32_SHIFTED(reg, val, mask) WREG32_P(reg, val, ~(mask))
+
+#define RMWREG32(reg, val, mask) RMWREG32_SHIFTED(reg, (val) << __ffs(mask), mask)
 
 #define RREG32_MASK(reg, mask) ((RREG32(reg) & mask) >> __ffs(mask))
 
diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2.c b/drivers/misc/habanalabs/gaudi2/gaudi2.c
index a33a9072fca4b..e793fb2bdcbe6 100644
--- a/drivers/misc/habanalabs/gaudi2/gaudi2.c
+++ b/drivers/misc/habanalabs/gaudi2/gaudi2.c
@@ -5052,7 +5052,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
 	mmu_base = mmPMMU_HBW_MMU_BASE;
 	stlb_base = mmPMMU_HBW_STLB_BASE;
 
-	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
+	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
 		(0 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_HOP_SHIFT) |
 		(5 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_SHIFT) |
 		(4 << PMMU_HBW_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_SHIFT) |
@@ -5068,7 +5068,7 @@ static int gaudi2_pci_mmu_init(struct hl_device *hdev)
 
 	if (PAGE_SIZE == SZ_64K) {
 		/* Set page sizes to 64K on hop5 and 16M on hop4 + enable 8 bit hops */
-		RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
+		RMWREG32_SHIFTED(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET,
 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP5_PAGE_SIZE_MASK, 4) |
 			FIELD_PREP(DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK, 3) |
 			FIELD_PREP(
@@ -5116,7 +5116,7 @@ static int gaudi2_dcore_hmmu_init(struct hl_device *hdev, int dcore_id,
 	RMWREG32(mmu_base + MMU_STATIC_MULTI_PAGE_SIZE_OFFSET, 5 /* 64MB */,
 			MMU_STATIC_MULTI_PAGE_SIZE_HOP4_PAGE_SIZE_MASK);
 
-	RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
+	RMWREG32_SHIFTED(stlb_base + STLB_HOP_CONFIGURATION_OFFSET,
 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_HOP_MASK, 0) |
 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_SMALL_P_MASK, 3) |
 		FIELD_PREP(DCORE0_HMMU0_STLB_HOP_CONFIGURATION_FIRST_LOOKUP_HOP_LARGE_P_MASK, 3) |
-- 
GitLab


From 408c46bd6eb7a4e2fb9fd686218e4a13b9de844c Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Mon, 14 Nov 2022 13:26:21 +0200
Subject: [PATCH 2120/4122] habanalabs: print context refcount value if hard
 reset fails

Failing to kill a user process during a hard reset can be due to a
reference to the user context which isn't released.
To make it easier to understand if this the reason for the failure and
not something else, add a print of the context refcount value.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index f5864893237cc..926f230def56b 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -696,10 +696,22 @@ static void device_hard_reset_pending(struct work_struct *work)
 	flags = device_reset_work->flags | HL_DRV_RESET_FROM_RESET_THR;
 
 	rc = hl_device_reset(hdev, flags);
+
 	if ((rc == -EBUSY) && !hdev->device_fini_pending) {
-		dev_info(hdev->dev,
-			"Could not reset device. will try again in %u seconds",
-			HL_PENDING_RESET_PER_SEC);
+		struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
+
+		if (ctx) {
+			/* The read refcount value should subtracted by one, because the read is
+			 * protected with hl_get_compute_ctx().
+			 */
+			dev_info(hdev->dev,
+				"Could not reset device (compute_ctx refcount %u). will try again in %u seconds",
+				kref_read(&ctx->refcount) - 1, HL_PENDING_RESET_PER_SEC);
+			hl_ctx_put(ctx);
+		} else {
+			dev_info(hdev->dev, "Could not reset device. will try again in %u seconds",
+				HL_PENDING_RESET_PER_SEC);
+		}
 
 		queue_delayed_work(hdev->reset_wq, &device_reset_work->reset_work,
 					msecs_to_jiffies(HL_PENDING_RESET_PER_SEC * 1000));
-- 
GitLab


From 1f615120fc9d24a8df7f14b0d1e79f3402330855 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Wed, 16 Nov 2022 13:14:02 +0200
Subject: [PATCH 2121/4122] habanalabs: don't put context in
 hl_encaps_handle_do_release_sob()

hl_encaps_handle_do_release_sob() can be called only when the last
reference to the context object is released and hl_ctx_do_release() is
initiated, and therefore it shouldn't call hl_ctx_put().

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/context.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index 2f4620b7990c7..ba6675960203a 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -39,7 +39,6 @@ static void hl_encaps_handle_do_release_sob(struct kref *ref)
 	idr_remove(&mgr->handles, handle->id);
 	spin_unlock(&mgr->lock);
 
-	hl_ctx_put(handle->ctx);
 	kfree(handle);
 }
 
-- 
GitLab


From 893afb248c7a1f24d17719a5e5f4fe4174ecb60c Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 17 Nov 2022 15:22:31 +0200
Subject: [PATCH 2122/4122] habanalabs: clear non-released encapsulated signals

Reserved encapsulated signals which were not released hold the context
refcount, leading to a failure when killing the user process on device
reset or device fini.
Add the release of these left signals in the CS roll-back process.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 .../habanalabs/common/command_submission.c    | 46 ++++++++++++----
 drivers/misc/habanalabs/common/context.c      | 53 +++++++++++--------
 drivers/misc/habanalabs/common/habanalabs.h   |  3 +-
 3 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index f1c69c8ed74a6..ea0e5101c10ed 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -742,13 +742,11 @@ static void cs_do_release(struct kref *ref)
 		 */
 		if (hl_cs_cmpl->encaps_signals)
 			kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
-						hl_encaps_handle_do_release);
+					hl_encaps_release_handle_and_put_ctx);
 	}
 
-	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
-			&& cs->encaps_signals)
-		kref_put(&cs->encaps_sig_hdl->refcount,
-					hl_encaps_handle_do_release);
+	if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals)
+		kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
 
 out:
 	/* Must be called before hl_ctx_put because inside we use ctx to get
@@ -1011,6 +1009,34 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 		hl_complete_job(hdev, job);
 }
 
+/*
+ * release_reserved_encaps_signals() - release reserved encapsulated signals.
+ * @hdev: pointer to habanalabs device structure
+ *
+ * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with
+ * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back.
+ * For these signals need also to put the refcount of the H/W SOB which was taken at the
+ * reservation.
+ */
+static void release_reserved_encaps_signals(struct hl_device *hdev)
+{
+	struct hl_ctx *ctx = hl_get_compute_ctx(hdev);
+	struct hl_cs_encaps_sig_handle *handle;
+	struct hl_encaps_signals_mgr *mgr;
+	u32 id;
+
+	if (!ctx)
+		return;
+
+	mgr = &ctx->sig_mgr;
+
+	idr_for_each_entry(&mgr->handles, handle, id)
+		if (handle->cs_seq == ULLONG_MAX)
+			kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx);
+
+	hl_ctx_put(ctx);
+}
+
 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
 {
 	int i;
@@ -1039,6 +1065,8 @@ void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush)
 	}
 
 	force_complete_multi_cs(hdev);
+
+	release_reserved_encaps_signals(hdev);
 }
 
 static void
@@ -2001,6 +2029,8 @@ static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
 	 */
 	handle->pre_sob_val = prop->next_sob_val - handle->count;
 
+	handle->cs_seq = ULLONG_MAX;
+
 	*signals_count = prop->next_sob_val;
 	hdev->asic_funcs->hw_queues_unlock(hdev);
 
@@ -2350,10 +2380,8 @@ put_cs:
 	/* We finished with the CS in this function, so put the ref */
 	cs_put(cs);
 free_cs_chunk_array:
-	if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
-							is_wait_cs)
-		kref_put(&encaps_sig_hdl->refcount,
-				hl_encaps_handle_do_release);
+	if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs)
+		kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx);
 	kfree(cs_chunk_array);
 out:
 	return rc;
diff --git a/drivers/misc/habanalabs/common/context.c b/drivers/misc/habanalabs/common/context.c
index ba6675960203a..9c8b1b37b510c 100644
--- a/drivers/misc/habanalabs/common/context.c
+++ b/drivers/misc/habanalabs/common/context.c
@@ -9,37 +9,46 @@
 
 #include <linux/slab.h>
 
-void hl_encaps_handle_do_release(struct kref *ref)
+static void encaps_handle_do_release(struct hl_cs_encaps_sig_handle *handle, bool put_hw_sob,
+					bool put_ctx)
 {
-	struct hl_cs_encaps_sig_handle *handle =
-		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
 	struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
 
+	if (put_hw_sob)
+		hw_sob_put(handle->hw_sob);
+
 	spin_lock(&mgr->lock);
 	idr_remove(&mgr->handles, handle->id);
 	spin_unlock(&mgr->lock);
 
-	hl_ctx_put(handle->ctx);
+	if (put_ctx)
+		hl_ctx_put(handle->ctx);
+
 	kfree(handle);
 }
 
-static void hl_encaps_handle_do_release_sob(struct kref *ref)
+void hl_encaps_release_handle_and_put_ctx(struct kref *ref)
 {
 	struct hl_cs_encaps_sig_handle *handle =
-		container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
-	struct hl_encaps_signals_mgr *mgr = &handle->ctx->sig_mgr;
+			container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
 
-	/* if we're here, then there was a signals reservation but cs with
-	 * encaps signals wasn't submitted, so need to put refcount
-	 * to hw_sob taken at the reservation.
-	 */
-	hw_sob_put(handle->hw_sob);
+	encaps_handle_do_release(handle, false, true);
+}
 
-	spin_lock(&mgr->lock);
-	idr_remove(&mgr->handles, handle->id);
-	spin_unlock(&mgr->lock);
+static void hl_encaps_release_handle_and_put_sob(struct kref *ref)
+{
+	struct hl_cs_encaps_sig_handle *handle =
+			container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
 
-	kfree(handle);
+	encaps_handle_do_release(handle, true, false);
+}
+
+void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref)
+{
+	struct hl_cs_encaps_sig_handle *handle =
+			container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
+
+	encaps_handle_do_release(handle, true, true);
 }
 
 static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
@@ -48,8 +57,7 @@ static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
 	idr_init(&mgr->handles);
 }
 
-static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
-			struct hl_encaps_signals_mgr *mgr)
+static void hl_encaps_sig_mgr_fini(struct hl_device *hdev, struct hl_encaps_signals_mgr *mgr)
 {
 	struct hl_cs_encaps_sig_handle *handle;
 	struct idr *idp;
@@ -57,11 +65,14 @@ static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
 
 	idp = &mgr->handles;
 
+	/* The IDR is expected to be empty at this stage, because any left signal should have been
+	 * released as part of CS roll-back.
+	 */
 	if (!idr_is_empty(idp)) {
-		dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
+		dev_warn(hdev->dev,
+			"device released while some encaps signals handles are still allocated\n");
 		idr_for_each_entry(idp, handle, id)
-			kref_put(&handle->refcount,
-					hl_encaps_handle_do_release_sob);
+			kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob);
 	}
 
 	idr_destroy(&mgr->handles);
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index 0329a0980bb72..e2527d976ee05 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -3775,7 +3775,8 @@ void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *d
 
 void hw_sob_get(struct hl_hw_sob *hw_sob);
 void hw_sob_put(struct hl_hw_sob *hw_sob);
-void hl_encaps_handle_do_release(struct kref *ref);
+void hl_encaps_release_handle_and_put_ctx(struct kref *ref);
+void hl_encaps_release_handle_and_put_sob_ctx(struct kref *ref);
 void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
 			struct hl_cs *cs, struct hl_cs_job *job,
 			struct hl_cs_compl *cs_cmpl);
-- 
GitLab


From 1b18cf33d6ce63a9f5fe3764d7b20c4738dd1245 Mon Sep 17 00:00:00 2001
From: Tomer Tayar <ttayar@habana.ai>
Date: Thu, 17 Nov 2022 18:57:49 +0200
Subject: [PATCH 2123/4122] habanalabs: make print of engines idle mask more
 readable

The engines idle mask was increased to be an array of 4 u64 entries.
To make the print of this mask more readable, remove the "0x" prefix,
and zero-pad each u64 to 16 bytes if either it isn't zero or if any of
the higher-order u64's is not zero.

Signed-off-by: Tomer Tayar <ttayar@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/device.c | 27 +++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index 926f230def56b..87ab329e65d49 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -386,6 +386,23 @@ bool hl_ctrl_device_operational(struct hl_device *hdev,
 	}
 }
 
+static void print_idle_status_mask(struct hl_device *hdev, const char *message,
+					u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE])
+{
+	u32 pad_width[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {};
+
+	BUILD_BUG_ON(HL_BUSY_ENGINES_MASK_EXT_SIZE != 4);
+
+	pad_width[3] = idle_mask[3] ? 16 : 0;
+	pad_width[2] = idle_mask[2] || pad_width[3] ? 16 : 0;
+	pad_width[1] = idle_mask[1] || pad_width[2] ? 16 : 0;
+	pad_width[0] = idle_mask[0] || pad_width[1] ? 16 : 0;
+
+	dev_err(hdev->dev, "%s (mask %0*llx_%0*llx_%0*llx_%0*llx)\n",
+		message, pad_width[3], idle_mask[3], pad_width[2], idle_mask[2],
+		pad_width[1], idle_mask[1], pad_width[0], idle_mask[0]);
+}
+
 static void hpriv_release(struct kref *ref)
 {
 	u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
@@ -416,9 +433,8 @@ static void hpriv_release(struct kref *ref)
 		device_is_idle = hdev->asic_funcs->is_device_idle(hdev, idle_mask,
 							HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
 	if (!device_is_idle) {
-		dev_err(hdev->dev,
-			"device not idle after user context is closed (0x%llx_%llx_%llx_%llx)\n",
-			idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
+		print_idle_status_mask(hdev, "device is not idle after user context is closed",
+					idle_mask);
 		reset_device = true;
 	}
 
@@ -1673,9 +1689,8 @@ kill_processes:
 
 	/* If device is not idle fail the reset process */
 	if (!hdev->asic_funcs->is_device_idle(hdev, idle_mask,
-			HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
-		dev_err(hdev->dev, "device is not idle (mask 0x%llx_%llx_%llx_%llx) after reset\n",
-			idle_mask[3], idle_mask[2], idle_mask[1], idle_mask[0]);
+						HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)) {
+		print_idle_status_mask(hdev, "device is not idle after reset", idle_mask);
 		rc = -EIO;
 		goto out_err;
 	}
-- 
GitLab


From 5354a2a0018345774ab2517fc2fe107a6cd894fa Mon Sep 17 00:00:00 2001
From: Ofir Bitton <obitton@habana.ai>
Date: Tue, 22 Nov 2022 09:59:27 +0200
Subject: [PATCH 2124/4122] habanalabs: fail driver load if EEPROM errors
 detected

In case EEPROM is not burned, firmware sets default EEPROM values.
As this is not valid in production, driver should fail load upon any
EEPROM error reported by firmware.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/firmware_if.c | 23 ++++++++++----------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index cf8147e438335..228b92278e480 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -617,16 +617,12 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
 	if (sts_val & CPU_BOOT_DEV_STS0_ENABLED)
 		dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
 
-	/* All warnings should go here in order not to reach the unknown error validation */
 	if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
-		dev_warn(hdev->dev,
-			"Device boot warning - EEPROM failure detected, default settings applied\n");
-		/* This is a warning so we don't want it to disable the
-		 * device
-		 */
-		err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL;
+		dev_err(hdev->dev, "Device boot error - EEPROM failure detected\n");
+		err_exists = true;
 	}
 
+	/* All warnings should go here in order not to reach the unknown error validation */
 	if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
 		dev_warn(hdev->dev,
 			"Device boot warning - Skipped DRAM initialization\n");
@@ -2532,7 +2528,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 					struct fw_load_mgr *fw_loader)
 {
 	struct cpu_dyn_regs *dyn_regs;
-	int rc;
+	int rc, fw_error_rc;
 
 	dev_info(hdev->dev,
 		"Loading %sfirmware to device, may take some time...\n",
@@ -2632,14 +2628,17 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
 
 	hl_fw_dynamic_update_linux_interrupt_if(hdev);
 
-	return 0;
-
 protocol_err:
-	if (fw_loader->dynamic_loader.fw_desc_valid)
-		fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
+	if (fw_loader->dynamic_loader.fw_desc_valid) {
+		fw_error_rc = fw_read_errors(hdev, le32_to_cpu(dyn_regs->cpu_boot_err0),
 				le32_to_cpu(dyn_regs->cpu_boot_err1),
 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts0),
 				le32_to_cpu(dyn_regs->cpu_boot_dev_sts1));
+
+		if (fw_error_rc)
+			return fw_error_rc;
+	}
+
 	return rc;
 }
 
-- 
GitLab


From 19a17a9fb486b2961dbd7f3fff0d79a144c9a3b6 Mon Sep 17 00:00:00 2001
From: Ohad Sharabi <osharabi@habana.ai>
Date: Sun, 20 Nov 2022 15:12:26 +0200
Subject: [PATCH 2125/4122] habanalabs: fix VA range calculation

Current implementation is fixing the page size to PAGE_SIZE whereas the
input page size may be different.

Signed-off-by: Ohad Sharabi <osharabi@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
---
 drivers/misc/habanalabs/common/memory.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 541e1b6a2176d..7c5c18be294a8 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2508,24 +2508,20 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range **va_ranges,
 
 	/*
 	 * PAGE_SIZE alignment
-	 * it is the callers responsibility to align the addresses if the
+	 * it is the caller's responsibility to align the addresses if the
 	 * page size is not a power of 2
 	 */
 
 	if (is_power_of_2(page_size)) {
-		if (start & (PAGE_SIZE - 1)) {
-			start &= PAGE_MASK;
-			start += PAGE_SIZE;
-		}
+		start = round_up(start, page_size);
 
 		/*
 		 * The end of the range is inclusive, hence we need to align it
 		 * to the end of the last full page in the range. For example if
 		 * end = 0x3ff5 with page size 0x1000, we need to align it to
-		 * 0x2fff. The remainig 0xff5 bytes do not form a full page.
+		 * 0x2fff. The remaining 0xff5 bytes do not form a full page.
 		 */
-		if ((end + 1) & (PAGE_SIZE - 1))
-			end = ((end + 1) & PAGE_MASK) - 1;
+		end = round_down(end + 1, page_size) - 1;
 	}
 
 	if (start >= end) {
-- 
GitLab


From b8a83e600bdde93e7da41ea3204b2b3832a3c99b Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:42 +0300
Subject: [PATCH 2126/4122] dt-bindings: imx6q-pcie: Fix clock names for imx6sx
 and imx8mq

Originally as it was defined the legacy bindings the pcie_inbound_axi and
pcie_aux clock names were supposed to be used in the fsl,imx6sx-pcie and
fsl,imx8mq-pcie devices respectively. But the bindings conversion has been
incorrectly so now the fourth clock name is defined as "pcie_inbound_axi
for imx6sx-pcie, pcie_aux for imx8mq-pcie", which is completely wrong.
Let's fix that by conditionally apply the clock-names constraints based on
the compatible string content.

Link: https://lore.kernel.org/r/20221113191301.5526-2-Sergey.Semin@baikalelectronics.ru
Fixes: 751ca492f131 ("dt-bindings: PCI: imx6: convert the imx pcie controller to dtschema")
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Alexander Stein <alexander.stein@ew.tq-group.com>
---
 .../bindings/pci/fsl,imx6q-pcie.yaml          | 46 +++++++++++++++++--
 1 file changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
index 376e739bcad40..49b4f7a32e71e 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@@ -14,9 +14,6 @@ description: |+
   This PCIe host controller is based on the Synopsys DesignWare PCIe IP
   and thus inherits all the common properties defined in snps,dw-pcie.yaml.
 
-allOf:
-  - $ref: /schemas/pci/snps,dw-pcie.yaml#
-
 properties:
   compatible:
     enum:
@@ -61,7 +58,7 @@ properties:
       - const: pcie
       - const: pcie_bus
       - const: pcie_phy
-      - const: pcie_inbound_axi for imx6sx-pcie, pcie_aux for imx8mq-pcie
+      - enum: [ pcie_inbound_axi, pcie_aux ]
 
   num-lanes:
     const: 1
@@ -175,6 +172,47 @@ required:
   - clocks
   - clock-names
 
+allOf:
+  - $ref: /schemas/pci/snps,dw-pcie.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: fsl,imx6sx-pcie
+    then:
+      properties:
+        clock-names:
+          items:
+            - {}
+            - {}
+            - {}
+            - const: pcie_inbound_axi
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: fsl,imx8mq-pcie
+    then:
+      properties:
+        clock-names:
+          items:
+            - {}
+            - {}
+            - {}
+            - const: pcie_aux
+  - if:
+      properties:
+        compatible:
+          not:
+            contains:
+              enum:
+                - fsl,imx6sx-pcie
+                - fsl,imx8mq-pcie
+    then:
+      properties:
+        clock-names:
+          maxItems: 3
+
 unevaluatedProperties: false
 
 examples:
-- 
GitLab


From 4cf4b9b70ab2785461190c08a3542d2d74c28b46 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:43 +0300
Subject: [PATCH 2127/4122] dt-bindings: visconti-pcie: Fix interrupts array
 max constraints

In accordance with the way the device DT-node is actually defined in
arch/arm64/boot/dts/toshiba/tmpv7708.dtsi and the way the device is probed
by the DW PCIe driver there are two IRQs it actually has. It's MSI IRQ the
DT-bindings lack. Let's extend the interrupts property constraints then
and fix the schema example so one would be acceptable by the actual device
DT-bindings.

Link: https://lore.kernel.org/r/20221113191301.5526-3-Sergey.Semin@baikalelectronics.ru
Fixes: 17c1b16340f0 ("dt-bindings: pci: Add DT binding for Toshiba Visconti PCIe controller")
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Acked-by: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
---
 .../devicetree/bindings/pci/toshiba,visconti-pcie.yaml     | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
index 48ed227fc5b9e..53da2edd7c9ab 100644
--- a/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
@@ -36,7 +36,7 @@ properties:
       - const: mpu
 
   interrupts:
-    maxItems: 1
+    maxItems: 2
 
   clocks:
     items:
@@ -94,8 +94,9 @@ examples:
             #interrupt-cells = <1>;
             ranges = <0x81000000 0 0x40000000 0 0x40000000 0 0x00010000>,
                      <0x82000000 0 0x50000000 0 0x50000000 0 0x20000000>;
-            interrupts = <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
-            interrupt-names = "intr";
+            interrupts = <GIC_SPI 211 IRQ_TYPE_LEVEL_HIGH>,
+                         <GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH>;
+            interrupt-names = "msi", "intr";
             interrupt-map-mask = <0 0 0 7>;
             interrupt-map =
                 <0 0 0 1 &gic GIC_SPI 215 IRQ_TYPE_LEVEL_HIGH
-- 
GitLab


From 057646a5db2f8873efba90eeffd165c2525b413f Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:44 +0300
Subject: [PATCH 2128/4122] dt-bindings: PCI: dwc: Detach common RP/EP DT
 bindings

Currently both DW PCIe Root Port and End-point DT bindings are defined as
separate schemas. Carefully looking at them, at the hardware reference
manuals and seeing there is a generic part of the driver used by the both
RP and EP drivers we can greatly simplify the DW PCIe controller bindings
by moving some of the properties into the common DT schema. It concerns
the PERST GPIO control, number of lanes, number of iATU windows and CDM
check properties. They will be defined in the snps,dw-pcie-common.yaml
schema which will be referenced in the DW PCIe Root Port and End-point DT
bindings in order to evaluate the common for both of these controllers
properties. The rest of properties like reg{,-names}, clock{s,-names},
reset{s,-names}, etc will be consolidate there in one of the next commits.

Link: https://lore.kernel.org/r/20221113191301.5526-4-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-common.yaml     | 76 +++++++++++++++++++
 .../bindings/pci/snps,dw-pcie-ep.yaml         | 31 +-------
 .../devicetree/bindings/pci/snps,dw-pcie.yaml | 33 +-------
 3 files changed, 78 insertions(+), 62 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
new file mode 100644
index 0000000000000..554c2804c608a
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -0,0 +1,76 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/snps,dw-pcie-common.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DWC PCIe RP/EP controller
+
+maintainers:
+  - Jingoo Han <jingoohan1@gmail.com>
+  - Gustavo Pimentel <gustavo.pimentel@synopsys.com>
+
+description:
+  Generic Synopsys DesignWare PCIe Root Port and Endpoint controller
+  properties.
+
+select: false
+
+properties:
+  reset-gpio:
+    deprecated: true
+    description:
+      Reference to the GPIO-controlled PERST# signal. It is used to reset all
+      the peripheral devices available on the PCIe bus.
+    maxItems: 1
+
+  reset-gpios:
+    description:
+      Reference to the GPIO-controlled PERST# signal. It is used to reset all
+      the peripheral devices available on the PCIe bus.
+    maxItems: 1
+
+  num-lanes:
+    description:
+      Number of PCIe link lanes to use. Can be omitted if the already brought
+      up link is supposed to be preserved.
+    maximum: 16
+
+  num-ob-windows:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    deprecated: true
+    description:
+      Number of outbound address translation windows. This parameter can be
+      auto-detected based on the iATU memory writability. So there is no
+      point in having a dedicated DT-property for it.
+    maximum: 256
+
+  num-ib-windows:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    deprecated: true
+    description:
+      Number of inbound address translation windows. In the same way as
+      for the outbound AT windows, this parameter can be auto-detected based
+      on the iATU memory writability. There is no point having a dedicated
+      DT-property for it either.
+    maximum: 256
+
+  num-viewport:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    deprecated: true
+    description:
+      Number of outbound view ports configured in hardware. It's the same as
+      the number of outbound AT windows.
+    maximum: 256
+
+  snps,enable-cdm-check:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Enable automatic checking of CDM (Configuration Dependent Module)
+      registers for data corruption. CDM registers include standard PCIe
+      configuration space registers, Port Logic registers, DMA and iATU
+      registers. This feature has been available since DWC PCIe v4.80a.
+
+additionalProperties: true
+
+...
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index b78535040f04c..eae60901d60e2 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -15,6 +15,7 @@ description: |
 
 allOf:
   - $ref: /schemas/pci/pci-ep.yaml#
+  - $ref: /schemas/pci/snps,dw-pcie-common.yaml#
 
 properties:
   compatible:
@@ -36,36 +37,6 @@ properties:
     items:
       enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl]
 
-  reset-gpio:
-    description: GPIO pin number of PERST# signal
-    maxItems: 1
-    deprecated: true
-
-  reset-gpios:
-    description: GPIO controlled connection to PERST# signal
-    maxItems: 1
-
-  snps,enable-cdm-check:
-    type: boolean
-    description: |
-      This is a boolean property and if present enables
-      automatic checking of CDM (Configuration Dependent Module) registers
-      for data corruption. CDM registers include standard PCIe configuration
-      space registers, Port Logic registers, DMA and iATU (internal Address
-      Translation Unit) registers.
-
-  num-ib-windows:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    maximum: 256
-    description: number of inbound address translation windows
-    deprecated: true
-
-  num-ob-windows:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    maximum: 256
-    description: number of outbound address translation windows
-    deprecated: true
-
 required:
   - reg
   - reg-names
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index 7287d395e1b65..505b01e0a0341 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -15,6 +15,7 @@ description: |
 
 allOf:
   - $ref: /schemas/pci/pci-bus.yaml#
+  - $ref: /schemas/pci/snps,dw-pcie-common.yaml#
 
 properties:
   compatible:
@@ -37,44 +38,12 @@ properties:
       enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl,
               parf, cfg, link, ulreg, smu, mpu, apb, phy ]
 
-  num-lanes:
-    description: |
-      number of lanes to use (this property should be specified unless
-      the link is brought already up in firmware)
-    maximum: 16
-
-  reset-gpio:
-    description: GPIO pin number of PERST# signal
-    maxItems: 1
-    deprecated: true
-
-  reset-gpios:
-    description: GPIO controlled connection to PERST# signal
-    maxItems: 1
-
   interrupts: true
 
   interrupt-names: true
 
   clocks: true
 
-  snps,enable-cdm-check:
-    type: boolean
-    description: |
-      This is a boolean property and if present enables
-      automatic checking of CDM (Configuration Dependent Module) registers
-      for data corruption. CDM registers include standard PCIe configuration
-      space registers, Port Logic registers, DMA and iATU (internal Address
-      Translation Unit) registers.
-
-  num-viewport:
-    $ref: /schemas/types.yaml#/definitions/uint32
-    maximum: 256
-    description: |
-      number of view ports configured in hardware. If a platform
-      does not specify it, the driver autodetects it.
-    deprecated: true
-
 additionalProperties: true
 
 required:
-- 
GitLab


From b9fe9985aee2cb62814671b883b9cbfa1c941ab3 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:45 +0300
Subject: [PATCH 2129/4122] dt-bindings: PCI: dwc: Remove bus node from the
 examples

It's absolutely redundant seeing by default each node is embedded into its
own example-X node with address and size cells set to 1.

Link: https://lore.kernel.org/r/20221113191301.5526-5-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-ep.yaml         | 16 ++++-----
 .../devicetree/bindings/pci/snps,dw-pcie.yaml | 35 ++++++++++---------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index eae60901d60e2..7d05dcba419bf 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -46,14 +46,10 @@ additionalProperties: true
 
 examples:
   - |
-    bus {
-      #address-cells = <1>;
-      #size-cells = <1>;
-      pcie-ep@dfd00000 {
-        compatible = "snps,dw-pcie-ep";
-        reg = <0xdfc00000 0x0001000>, /* IP registers 1 */
-              <0xdfc01000 0x0001000>, /* IP registers 2 */
-              <0xd0000000 0x2000000>; /* Configuration space */
-        reg-names = "dbi", "dbi2", "addr_space";
-      };
+    pcie-ep@dfd00000 {
+      compatible = "snps,dw-pcie-ep";
+      reg = <0xdfc00000 0x0001000>, /* IP registers 1 */
+            <0xdfc01000 0x0001000>, /* IP registers 2 */
+            <0xd0000000 0x2000000>; /* Configuration space */
+      reg-names = "dbi", "dbi2", "addr_space";
     };
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index 505b01e0a0341..3fdc80453a850 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -53,21 +53,22 @@ required:
 
 examples:
   - |
-    bus {
-      #address-cells = <1>;
-      #size-cells = <1>;
-      pcie@dfc00000 {
-        device_type = "pci";
-        compatible = "snps,dw-pcie";
-        reg = <0xdfc00000 0x0001000>, /* IP registers */
-              <0xd0000000 0x0002000>; /* Configuration space */
-        reg-names = "dbi", "config";
-        #address-cells = <3>;
-        #size-cells = <2>;
-        ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>,
-                 <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>;
-        interrupts = <25>, <24>;
-        #interrupt-cells = <1>;
-        num-lanes = <1>;
-      };
+    pcie@dfc00000 {
+      compatible = "snps,dw-pcie";
+      device_type = "pci";
+      reg = <0xdfc00000 0x0001000>, /* IP registers */
+            <0xd0000000 0x0002000>; /* Configuration space */
+      reg-names = "dbi", "config";
+      #address-cells = <3>;
+      #size-cells = <2>;
+      ranges = <0x81000000 0 0x00000000 0xde000000 0 0x00010000>,
+               <0x82000000 0 0xd0400000 0xd0400000 0 0x0d000000>;
+      bus-range = <0x0 0xff>;
+
+      interrupts = <25>, <24>;
+      #interrupt-cells = <1>;
+
+      reset-gpios = <&port0 0 1>;
+
+      num-lanes = <1>;
     };
-- 
GitLab


From 875596361910711f3e7ba6314075d867e4b74fd1 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:46 +0300
Subject: [PATCH 2130/4122] dt-bindings: PCI: dwc: Add phys/phy-names common
 properties

It's normal to have the DW PCIe RP/EP DT-nodes equipped with the explicit
PHY phandle references. There can be up to 16 PHYs attach in accordance
with the maximum number of supported PCIe lanes. Let's extend the common
DW PCIe controller schema with the 'phys' and 'phy-names' properties
definition. There two types PHY names are defined: preferred generic names
'^pcie[0-9]+$' and non-preferred vendor-specific names
'^pcie([0-9]+|-?phy[0-9]*)?$' so to match the names currently supported by
the DW PCIe platform drivers ("pcie": meson; "pciephy": qcom, imx6;
"pcie-phy": uniphier, rockchip, spear13xx; "pcie": intel-gw; "pcie-phy%d":
keystone, dra7xx; "pcie": histb, etc).

Link: https://lore.kernel.org/r/20221113191301.5526-6-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-common.yaml     | 24 +++++++++++++++++++
 .../bindings/pci/snps,dw-pcie-ep.yaml         |  3 +++
 .../devicetree/bindings/pci/snps,dw-pcie.yaml |  3 +++
 3 files changed, 30 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index 554c2804c608a..91d24e400dfcc 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -17,6 +17,30 @@ description:
 select: false
 
 properties:
+  phys:
+    description:
+      There can be up to the number of possible lanes PHYs specified placed in
+      the phandle array in the line-based order. Obviously each the specified
+      PHYs are supposed to be able to work in the PCIe mode with a speed
+      implied by the DWC PCIe controller they are attached to.
+    minItems: 1
+    maxItems: 16
+
+  phy-names:
+    minItems: 1
+    maxItems: 16
+    oneOf:
+      - description: Generic PHY names
+        items:
+          pattern: '^pcie[0-9]+$'
+      - description:
+          Vendor-specific PHY names. Consider using the generic
+          names above for new bindings.
+        items:
+          oneOf:
+            - pattern: '^pcie(-?phy[0-9]*)?$'
+            - pattern: '^p2u-[0-7]$'
+
   reset-gpio:
     deprecated: true
     description:
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index 7d05dcba419bf..dcd521aed2130 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -52,4 +52,7 @@ examples:
             <0xdfc01000 0x0001000>, /* IP registers 2 */
             <0xd0000000 0x2000000>; /* Configuration space */
       reg-names = "dbi", "dbi2", "addr_space";
+
+      phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>;
+      phy-names = "pcie0", "pcie1", "pcie2", "pcie3";
     };
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index 3fdc80453a850..d9512f7f7124a 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -70,5 +70,8 @@ examples:
 
       reset-gpios = <&port0 0 1>;
 
+      phys = <&pcie_phy>;
+      phy-names = "pcie";
+
       num-lanes = <1>;
     };
-- 
GitLab


From eaa9d886528730bcd7213f0b22c8dd468460f495 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:47 +0300
Subject: [PATCH 2131/4122] dt-bindings: PCI: dwc: Add max-link-speed common
 property

In accordance with [1] DW PCIe controllers support up to Gen5 link speed.
Let's add the max-link-speed property upper bound to 5 then. The DT
bindings of the particular devices are expected to setup more strict
constraint on that parameter.

[1] Synopsys DesignWare Cores PCI Express Controller Databook, Version
5.40a, March 2019, p. 27

Link: https://lore.kernel.org/r/20221113191301.5526-7-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml | 3 +++
 Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml     | 2 ++
 Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml        | 1 +
 3 files changed, 6 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index 91d24e400dfcc..e63c21783fc1f 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -54,6 +54,9 @@ properties:
       the peripheral devices available on the PCIe bus.
     maxItems: 1
 
+  max-link-speed:
+    maximum: 5
+
   num-lanes:
     description:
       Number of PCIe link lanes to use. Can be omitted if the already brought
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index dcd521aed2130..fc3b5d4ac2450 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -55,4 +55,6 @@ examples:
 
       phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>;
       phy-names = "pcie0", "pcie1", "pcie2", "pcie3";
+
+      max-link-speed = <3>;
     };
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index d9512f7f7124a..e787b97275897 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -74,4 +74,5 @@ examples:
       phy-names = "pcie";
 
       num-lanes = <1>;
+      max-link-speed = <3>;
     };
-- 
GitLab


From f133396e2d0063d589362122da659fe047643384 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:48 +0300
Subject: [PATCH 2132/4122] dt-bindings: PCI: dwc: Apply generic schema for
 generic device only

Having the generic compatible strings constraints with the 'any'+'generic
string' semantic implicitly encourages either to add new DW PCIe-based
DT-bindings with the generic compatible string attached or just forget
about adding new DT-bindings since the corresponding DT-node will be
evaluated anyway. Moreover having that semantic implemented in the
generic DT-schema causes the DT-validation tool to apply the schema twice:
first by implicit compatible-string-based selection and second by means of
the 'allOf: [ $ref ]' statement. Let's fix all of that by dropping the
compatible property constraints and selecting the generic DT-schema only
for the purely generic DW PCIe DT-nodes. The later is required since there
is a driver for such devices. (Though there are no such DT-nodes currently
defined in the kernel DT sources.)

Link: https://lore.kernel.org/r/20221113191301.5526-8-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 16 ++++++++++------
 .../devicetree/bindings/pci/snps,dw-pcie.yaml    | 16 ++++++++++------
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index fc3b5d4ac2450..d04001248b53d 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -13,16 +13,20 @@ maintainers:
 description: |
   Synopsys DesignWare PCIe host controller endpoint
 
+# Please create a separate DT-schema for your DWC PCIe Endpoint controller
+# and make sure it's assigned with the vendor-specific compatible string.
+select:
+  properties:
+    compatible:
+      const: snps,dw-pcie-ep
+  required:
+    - compatible
+
 allOf:
   - $ref: /schemas/pci/pci-ep.yaml#
   - $ref: /schemas/pci/snps,dw-pcie-common.yaml#
 
 properties:
-  compatible:
-    anyOf:
-      - {}
-      - const: snps,dw-pcie-ep
-
   reg:
     description: |
       It should contain Data Bus Interface (dbi) and config registers for all
@@ -38,9 +42,9 @@ properties:
       enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl]
 
 required:
+  - compatible
   - reg
   - reg-names
-  - compatible
 
 additionalProperties: true
 
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index e787b97275897..85861b71d9ff7 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -13,16 +13,20 @@ maintainers:
 description: |
   Synopsys DesignWare PCIe host controller
 
+# Please create a separate DT-schema for your DWC PCIe Root Port controller
+# and make sure it's assigned with the vendor-specific compatible string.
+select:
+  properties:
+    compatible:
+      const: snps,dw-pcie
+  required:
+    - compatible
+
 allOf:
   - $ref: /schemas/pci/pci-bus.yaml#
   - $ref: /schemas/pci/snps,dw-pcie-common.yaml#
 
 properties:
-  compatible:
-    anyOf:
-      - {}
-      - const: snps,dw-pcie
-
   reg:
     description: |
       It should contain Data Bus Interface (dbi) and config registers for all
@@ -47,9 +51,9 @@ properties:
 additionalProperties: true
 
 required:
+  - compatible
   - reg
   - reg-names
-  - compatible
 
 examples:
   - |
-- 
GitLab


From 12f7936c7a0e0c40069ff12ddfd091a29da6e77c Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:49 +0300
Subject: [PATCH 2133/4122] dt-bindings: PCI: dwc: Add max-functions EP
 property

In accordance with [1] the CX_NFUNC IP-core synthesize parameter is
responsible for the number of physical functions to support in the EP
mode. Its upper limit is 32. Let's use it to constrain the number of
PCIe functions the DW PCIe EP DT-nodes can advertise.

[1] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe
Endpoint, Version 5.40a, March 2019, p. 887.

Link: https://lore.kernel.org/r/20221113191301.5526-9-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index d04001248b53d..71dd19ae1060c 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -41,6 +41,9 @@ properties:
     items:
       enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl]
 
+  max-functions:
+    maximum: 32
+
 required:
   - compatible
   - reg
@@ -61,4 +64,5 @@ examples:
       phy-names = "pcie0", "pcie1", "pcie2", "pcie3";
 
       max-link-speed = <3>;
+      max-functions = /bits/ 8 <4>;
     };
-- 
GitLab


From 35486813c41b3a5229b4987857ff597704feda21 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:50 +0300
Subject: [PATCH 2134/4122] dt-bindings: PCI: dwc: Add
 interrupts/interrupt-names common properties

Currently the 'interrupts' and 'interrupt-names' properties are defined
being too generic to really describe any actual IRQ interface. Moreover
the DW PCIe End-point devices are left with no IRQ signals. All of that
can be fixed by adding the IRQ-related properties to the common DW PCIe
DT-schemas in accordance with the hardware reference manual. The DW PCIe
common DT-schema will contain the generic properties definitions with just
a number of entries per property, while the DW PCIe RP/EP-specific schemas
will have the particular number of items and the generic resource names
listed.

Note since there are DW PCI-based vendor-specific DT-bindings with the
custom names assigned to the same IRQ resources we have no much choice but
to add them to the generic DT-schemas in order to have the schemas being
applicable for such devices. These names are marked as vendor-specific and
should be avoided being used in new bindings in favor of the generic
names.

Link: https://lore.kernel.org/r/20221113191301.5526-10-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-common.yaml     | 19 ++++
 .../bindings/pci/snps,dw-pcie-ep.yaml         | 52 +++++++++++
 .../devicetree/bindings/pci/snps,dw-pcie.yaml | 90 ++++++++++++++++++-
 3 files changed, 158 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index e63c21783fc1f..4646fb14e8176 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -17,6 +17,25 @@ description:
 select: false
 
 properties:
+  interrupts:
+    description:
+      There are two main sub-blocks which are normally capable of
+      generating interrupts. It's System Information Interface and MSI
+      interface. While the former one has some common for the Host and
+      Endpoint controllers IRQ-signals, the later interface is obviously
+      Root Complex specific since it's responsible for the incoming MSI
+      messages signalling. The System Information IRQ signals are mainly
+      responsible for reporting the generic PCIe hierarchy and Root
+      Complex events like VPD IO request, general AER, PME, Hot-plug, link
+      bandwidth change, link equalization request, INTx asserted/deasserted
+      Message detection, embedded DMA Tx/Rx/Error.
+    minItems: 1
+    maxItems: 26
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 26
+
   phys:
     description:
       There can be up to the number of possible lanes PHYs specified placed in
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index 71dd19ae1060c..7d3f8fc8b7b49 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -41,6 +41,55 @@ properties:
     items:
       enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl]
 
+  interrupts:
+    description:
+      There is no mandatory IRQ signals for the normal controller functioning,
+      but in addition to the native set the platforms may have a link- or
+      PM-related IRQs specified.
+    minItems: 1
+    maxItems: 20
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 20
+    items:
+      oneOf:
+        - description:
+            Controller request to read or write virtual product data
+            from/to the VPD capability registers.
+          const: vpd
+        - description:
+            Link Equalization Request flag is set in the Link Status 2
+            register (applicable if the corresponding IRQ is enabled in
+            the Link Control 3 register).
+          const: l_eq
+        - description:
+            Indicates that the eDMA Tx/Rx transfer is complete or that an
+            error has occurred on the corresponding channel. eDMA can have
+            eight Tx (Write) and Rx (Read) eDMA channels thus supporting up
+            to 16 IRQ signals all together. Write eDMA channels shall go
+            first in the ordered row as per default edma_int[*] bus setup.
+          pattern: '^dma([0-9]|1[0-5])?$'
+        - description:
+            PCIe protocol correctable error or a Data Path protection
+            correctable error is detected by the automotive/safety
+            feature.
+          const: sft_ce
+        - description:
+            Indicates that the internal safety mechanism has detected an
+            uncorrectable error.
+          const: sft_ue
+        - description:
+            Application-specific IRQ raised depending on the vendor-specific
+            events basis.
+          const: app
+        - description:
+            Vendor-specific IRQ names. Consider using the generic names above
+            for new bindings.
+          oneOf:
+            - description: See native "app" IRQ for details
+              enum: [ intr ]
+
   max-functions:
     maximum: 32
 
@@ -60,6 +109,9 @@ examples:
             <0xd0000000 0x2000000>; /* Configuration space */
       reg-names = "dbi", "dbi2", "addr_space";
 
+      interrupts = <23>, <24>;
+      interrupt-names = "dma0", "dma1";
+
       phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>;
       phy-names = "pcie0", "pcie1", "pcie2", "pcie3";
 
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index 85861b71d9ff7..fa1db57b2b979 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -42,9 +42,92 @@ properties:
       enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl,
               parf, cfg, link, ulreg, smu, mpu, apb, phy ]
 
-  interrupts: true
-
-  interrupt-names: true
+  interrupts:
+    description:
+      DWC PCIe Root Port/Complex specific IRQ signals. At least MSI interrupt
+      signal is supposed to be specified for the host controller.
+    minItems: 1
+    maxItems: 26
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 26
+    items:
+      oneOf:
+        - description:
+            Controller request to read or write virtual product data
+            from/to the VPD capability registers.
+          const: vpd
+        - description:
+            Link Equalization Request flag is set in the Link Status 2
+            register (applicable if the corresponding IRQ is enabled in
+            the Link Control 3 register).
+          const: l_eq
+        - description:
+            Indicates that the eDMA Tx/Rx transfer is complete or that an
+            error has occurred on the corresponding channel. eDMA can have
+            eight Tx (Write) and Rx (Read) eDMA channels thus supporting up
+            to 16 IRQ signals all together. Write eDMA channels shall go
+            first in the ordered row as per default edma_int[*] bus setup.
+          pattern: '^dma([0-9]|1[0-5])?$'
+        - description:
+            PCIe protocol correctable error or a Data Path protection
+            correctable error is detected by the automotive/safety
+            feature.
+          const: sft_ce
+        - description:
+            Indicates that the internal safety mechanism has detected an
+            uncorrectable error.
+          const: sft_ue
+        - description:
+            Application-specific IRQ raised depending on the vendor-specific
+            events basis.
+          const: app
+        - description:
+            DSP AXI MSI Interrupt detected. It gets de-asserted when there is
+            no more MSI interrupt pending. The interrupt is relevant to the
+            iMSI-RX - Integrated MSI Receiver (AXI bridge).
+          const: msi
+        - description:
+            Legacy A/B/C/D interrupt signal. Basically it's triggered by
+            receiving a Assert_INT{A,B,C,D}/Desassert_INT{A,B,C,D} message
+            from the downstream device.
+          pattern: "^int(a|b|c|d)$"
+        - description:
+            Error condition detected and a flag is set in the Root Error Status
+            register of the AER capability. It's asserted when the RC
+            internally generated an error or an error message is received by
+            the RC.
+          const: aer
+        - description:
+            PME message is received by the port. That means having the PME
+            status bit set in the Root Status register (the event is
+            supposed to be unmasked in the Root Control register).
+          const: pme
+        - description:
+            Hot-plug event is detected. That is a bit has been set in the
+            Slot Status register and the corresponding event is enabled in
+            the Slot Control register.
+          const: hp
+        - description:
+            Link Autonomous Bandwidth Status flag has been set in the Link
+            Status register (the event is supposed to be unmasked in the
+            Link Control register).
+          const: bw_au
+        - description:
+            Bandwidth Management Status flag has been set in the Link
+            Status register (the event is supposed to be unmasked in the
+            Link Control register).
+          const: bw_mg
+        - description:
+            Vendor-specific IRQ names. Consider using the generic names above
+            for new bindings.
+          oneOf:
+            - description: See native "app" IRQ for details
+              enum: [ intr ]
+    allOf:
+      - contains:
+          const: msi
 
   clocks: true
 
@@ -70,6 +153,7 @@ examples:
       bus-range = <0x0 0xff>;
 
       interrupts = <25>, <24>;
+      interrupt-names = "msi", "hp";
       #interrupt-cells = <1>;
 
       reset-gpios = <&port0 0 1>;
-- 
GitLab


From 4cc13eedb892c53f3d61fb5a1f6d57724541441a Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:51 +0300
Subject: [PATCH 2135/4122] dt-bindings: PCI: dwc: Add reg/reg-names common
 properties

Even though there is a more-or-less limited set of the CSR spaces can be
defined for each DW PCIe controller the generic DT-schema currently
doesn't specify much limitations on the reg-space names used for one or
another range. In order to prevent the vendor-specific controller schemas
further deviation from the generic interface let's fix that by introducing
the reg-names definition in the common DW PCIe DT-schemas and preserving
the generic "reg" and "reg-names" properties in there. New DW PCIe device
DT-bindings are encouraged to use the generic set of the CSR spaces
defined in the generic DW PCIe RP/EP DT-bindings, while the already
available vendor-specific DT-bindings can still apple the common
DT-schemas.

Note the number of reg/reg-names items need to be changed in the DW PCIe
EP DT-schema since aside with the "dbi" CSRs space these arrays can have
"dbi2", "addr_space", "atu", etc ranges.

Also note since there are DW PCIe-based vendor-specific DT-bindings with
the custom names assigned to the same CSR resources we have no much choice
but to add them to the generic DT-schemas in order to have the schemas
being applicable for such devices. These names are marked as
vendor-specific and should be avoided being used in new bindings in favor
of the generic names.

Link: https://lore.kernel.org/r/20221113191301.5526-11-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-common.yaml     | 22 +++++
 .../bindings/pci/snps,dw-pcie-ep.yaml         | 82 +++++++++++++++++--
 .../devicetree/bindings/pci/snps,dw-pcie.yaml | 78 ++++++++++++++++--
 3 files changed, 169 insertions(+), 13 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index 4646fb14e8176..13c41cd50e54a 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -17,6 +17,28 @@ description:
 select: false
 
 properties:
+  reg:
+    description:
+      DWC PCIe CSR space is normally accessed over the dedicated Data Bus
+      Interface - DBI. In accordance with the reference manual the register
+      configuration space belongs to the Configuration-Dependent Module (CDM)
+      and is split up into several sub-parts Standard PCIe configuration
+      space, Port Logic Registers (PL), Shadow Config-space Registers,
+      iATU/eDMA registers. The particular sub-space is selected by the
+      CDM/ELBI (dbi_cs) and CS2 (dbi_cs2) signals (selector bits). Such
+      configuration provides a flexible interface for the system engineers to
+      either map the particular space at a desired MMIO address or just leave
+      them in a contiguous memory space if pure Native or AXI Bridge DBI access
+      is selected. Note the PCIe CFG-space, PL and Shadow registers are
+      specific for each activated function, while the rest of the sub-spaces
+      are common for all of them (if there are more than one).
+    minItems: 2
+    maxItems: 6
+
+  reg-names:
+    minItems: 2
+    maxItems: 6
+
   interrupts:
     description:
       There are two main sub-blocks which are normally capable of
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index 7d3f8fc8b7b49..f4d7eb2dec4db 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -28,18 +28,86 @@ allOf:
 
 properties:
   reg:
-    description: |
-      It should contain Data Bus Interface (dbi) and config registers for all
-      versions.
-      For designware core version >= 4.80, it may contain ATU address space.
+    description:
+      DBI, DBI2 reg-spaces and outbound memory window are required for the
+      normal controller functioning. iATU memory IO region is also required
+      if the space is unrolled (IP-core version >= 4.80a).
     minItems: 2
-    maxItems: 4
+    maxItems: 5
 
   reg-names:
     minItems: 2
-    maxItems: 4
+    maxItems: 5
     items:
-      enum: [dbi, dbi2, config, atu, addr_space, link, atu_dma, appl]
+      oneOf:
+        - description:
+            Basic DWC PCIe controller configuration-space accessible over
+            the DBI interface. This memory space is either activated with
+            CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region
+            with all spaces. Note iATU/eDMA CSRs are indirectly accessible
+            via the PL viewports on the DWC PCIe controllers older than
+            v4.80a.
+          const: dbi
+        - description:
+            Shadow DWC PCIe config-space registers. This space is selected
+            by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of
+            the PCI-SIG PCIe CFG-space with the shadow registers for some
+            PCI Header space, PCI Standard and Extended Structures. It's
+            mainly relevant for the end-point controller configuration,
+            but still there are some shadow registers available for the
+            Root Port mode too.
+          const: dbi2
+        - description:
+            External Local Bus registers. It's an application-dependent
+            registers normally defined by the platform engineers. The space
+            can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can
+            be accessed over some platform-specific means (for instance
+            as a part of a system controller).
+          enum: [ elbi, app ]
+        - description:
+            iATU/eDMA registers common for all device functions. It's an
+            unrolled memory space with the internal Address Translation
+            Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1
+            and CS2 = 1. For IP-core releases prior v4.80a, these registers
+            have been programmed via an indirect addressing scheme using a
+            set of viewport CSRs mapped into the PL space. Note iATU is
+            normally mapped to the 0x0 address of this region, while eDMA
+            is available at 0x80000 base address.
+          const: atu
+        - description:
+            Platform-specific eDMA registers. Some platforms may have eDMA
+            CSRs mapped in a non-standard base address. The registers offset
+            can be changed or the MS/LS-bits of the address can be attached
+            in an additional RTL block before the MEM-IO transactions reach
+            the DW PCIe slave interface.
+          const: dma
+        - description:
+            PHY/PCS configuration registers. Some platforms can have the
+            PCS and PHY CSRs accessible over a dedicated memory mapped
+            region, but mainly these registers are indirectly accessible
+            either by means of the embedded PHY viewport schema or by some
+            platform-specific method.
+          const: phy
+        - description:
+            Outbound iATU-capable memory-region which will be used to
+            generate various application-specific traffic on the PCIe bus
+            hierarchy. It's usage scenario depends on the endpoint
+            functionality, for instance it can be used to create MSI(X)
+            messages.
+          const: addr_space
+        - description:
+            Vendor-specific CSR names. Consider using the generic names above
+            for new bindings.
+          oneOf:
+            - description: See native 'elbi/app' CSR region for details.
+              enum: [ link, appl ]
+            - description: See native 'atu' CSR region for details.
+              enum: [ atu_dma ]
+    allOf:
+      - contains:
+          const: dbi
+      - contains:
+          const: addr_space
 
   interrupts:
     description:
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index fa1db57b2b979..59d3bbb5883a7 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -28,10 +28,10 @@ allOf:
 
 properties:
   reg:
-    description: |
-      It should contain Data Bus Interface (dbi) and config registers for all
-      versions.
-      For designware core version >= 4.80, it may contain ATU address space.
+    description:
+      At least DBI reg-space and peripheral devices CFG-space outbound window
+      are required for the normal controller work. iATU memory IO region is
+      also required if the space is unrolled (IP-core version >= 4.80a).
     minItems: 2
     maxItems: 5
 
@@ -39,8 +39,74 @@ properties:
     minItems: 2
     maxItems: 5
     items:
-      enum: [ dbi, dbi2, config, atu, atu_dma, app, appl, elbi, mgmt, ctrl,
-              parf, cfg, link, ulreg, smu, mpu, apb, phy ]
+      oneOf:
+        - description:
+            Basic DWC PCIe controller configuration-space accessible over
+            the DBI interface. This memory space is either activated with
+            CDM/ELBI = 0 and CS2 = 0 or is a contiguous memory region
+            with all spaces. Note iATU/eDMA CSRs are indirectly accessible
+            via the PL viewports on the DWC PCIe controllers older than
+            v4.80a.
+          const: dbi
+        - description:
+            Shadow DWC PCIe config-space registers. This space is selected
+            by setting CDM/ELBI = 0 and CS2 = 1. This is an intermix of
+            the PCI-SIG PCIe CFG-space with the shadow registers for some
+            PCI Header space, PCI Standard and Extended Structures. It's
+            mainly relevant for the end-point controller configuration,
+            but still there are some shadow registers available for the
+            Root Port mode too.
+          const: dbi2
+        - description:
+            External Local Bus registers. It's an application-dependent
+            registers normally defined by the platform engineers. The space
+            can be selected by setting CDM/ELBI = 1 and CS2 = 0 wires or can
+            be accessed over some platform-specific means (for instance
+            as a part of a system controller).
+          enum: [ elbi, app ]
+        - description:
+            iATU/eDMA registers common for all device functions. It's an
+            unrolled memory space with the internal Address Translation
+            Unit and Enhanced DMA, which is selected by setting CDM/ELBI = 1
+            and CS2 = 1. For IP-core releases prior v4.80a, these registers
+            have been programmed via an indirect addressing scheme using a
+            set of viewport CSRs mapped into the PL space. Note iATU is
+            normally mapped to the 0x0 address of this region, while eDMA
+            is available at 0x80000 base address.
+          const: atu
+        - description:
+            Platform-specific eDMA registers. Some platforms may have eDMA
+            CSRs mapped in a non-standard base address. The registers offset
+            can be changed or the MS/LS-bits of the address can be attached
+            in an additional RTL block before the MEM-IO transactions reach
+            the DW PCIe slave interface.
+          const: dma
+        - description:
+            PHY/PCS configuration registers. Some platforms can have the
+            PCS and PHY CSRs accessible over a dedicated memory mapped
+            region, but mainly these registers are indirectly accessible
+            either by means of the embedded PHY viewport schema or by some
+            platform-specific method.
+          const: phy
+        - description:
+            Outbound iATU-capable memory-region which will be used to access
+            the peripheral PCIe devices configuration space.
+          const: config
+        - description:
+            Vendor-specific CSR names. Consider using the generic names above
+            for new bindings.
+          oneOf:
+            - description: See native 'elbi/app' CSR region for details.
+              enum: [ apb, mgmt, link, ulreg, appl ]
+            - description: See native 'atu' CSR region for details.
+              enum: [ atu_dma ]
+            - description: Syscon-related CSR regions.
+              enum: [ smu, mpu ]
+    allOf:
+      - contains:
+          const: dbi
+      - contains:
+          const: config
 
   interrupts:
     description:
-- 
GitLab


From bd9504af9169131156e753a6e47de34ad7a97b7d Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:52 +0300
Subject: [PATCH 2136/4122] dt-bindings: PCI: dwc: Add clocks/resets common
 properties

DW PCIe RP/EP reference manuals explicit define all the clocks and reset
requirements in [1] and [2]. Seeing the DW PCIe vendor-specific
DT-bindings have already started assigning random names to the same set of
the clocks and resets lines, let's define a generic names sets and add
them to the DW PCIe common DT-schema.

Note since there are DW PCI-based vendor-specific DT-bindings with the
custom names assigned to the same clocks and resets resources we have no
much choice but to add them to the generic DT-schemas in order to have the
schemas being applicable for such devices. These names are marked as
vendor-specific and should be avoided being used in new bindings in favor
of the generic names.

[1] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe
Root Port, Version 5.40a, March 2019, p.55 - 78.
[2] Synopsys DesignWare Cores PCI Express Controller Databook - DWC PCIe
Endpoint, Version 5.40a, March 2019, p.58 - 81.

Link: https://lore.kernel.org/r/20221113191301.5526-12-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/snps,dw-pcie-common.yaml     | 120 ++++++++++++++++++
 .../bindings/pci/snps,dw-pcie-ep.yaml         |   6 +
 .../devicetree/bindings/pci/snps,dw-pcie.yaml |   2 -
 3 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index 13c41cd50e54a..4d9efcea38598 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -58,6 +58,126 @@ properties:
     minItems: 1
     maxItems: 26
 
+  clocks:
+    description:
+      DWC PCIe reference manual explicitly defines a set of the clocks required
+      to get the controller working correctly. In general all of them can
+      be divided into two groups':' application and core clocks. Note the
+      platforms may have some of the clock sources unspecified in case if the
+      corresponding domains are fed up from a common clock source.
+    minItems: 1
+    maxItems: 7
+
+  clock-names:
+    minItems: 1
+    maxItems: 7
+    items:
+      oneOf:
+        - description:
+            Data Bus Interface (DBI) clock. Clock signal for the AXI-bus
+            interface of the Configuration-Dependent Module, which is
+            basically the set of the controller CSRs.
+          const: dbi
+        - description:
+            Application AXI-bus Master interface clock. Basically this is
+            a clock for the controller DMA interface (PCI-to-CPU).
+          const: mstr
+        - description:
+            Application AXI-bus Slave interface clock. This is a clock for
+            the CPU-to-PCI memory IO interface.
+          const: slv
+        - description:
+            Controller Core-PCS PIPE interface clock. It's normally
+            supplied by an external PCS-PHY.
+          const: pipe
+        - description:
+            Controller Primary clock. It's assumed that all controller input
+            signals (except resets) are synchronous to this clock.
+          const: core
+        - description:
+            Auxiliary clock for the controller PMC domain. The controller
+            partitioning implies having some parts to operate with this
+            clock in some power management states.
+          const: aux
+        - description:
+            Generic reference clock. In case if there are several
+            interfaces fed up with a common clock source it's advisable to
+            define it with this name (for instance pipe, core and aux can
+            be connected to a single source of the periodic signal).
+          const: ref
+        - description:
+            Clock for the PHY registers interface. Originally this is
+            a PHY-viewport-based interface, but some platform may have
+            specifically designed one.
+          const: phy_reg
+        - description:
+            Vendor-specific clock names. Consider using the generic names
+            above for new bindings.
+          oneOf:
+            - description: See native 'dbi' clock for details
+              enum: [ pcie, pcie_apb_sys, aclk_dbi ]
+            - description: See native 'mstr/slv' clock for details
+              enum: [ pcie_bus, pcie_inbound_axi, pcie_aclk, aclk_mst, aclk_slv ]
+            - description: See native 'pipe' clock for details
+              enum: [ pcie_phy, pcie_phy_ref, link ]
+            - description: See native 'aux' clock for details
+              enum: [ pcie_aux ]
+            - description: See native 'ref' clock for details.
+              enum: [ gio ]
+            - description: See nativs 'phy_reg' clock for details
+              enum: [ pcie_apb_phy, pclk ]
+
+  resets:
+    description:
+      DWC PCIe reference manual explicitly defines a set of the reset
+      signals required to be de-asserted to properly activate the controller
+      sub-parts. All of these signals can be divided into two sub-groups':'
+      application and core resets with respect to the main sub-domains they
+      are supposed to reset. Note the platforms may have some of these signals
+      unspecified in case if they are automatically handled or aggregated into
+      a comprehensive control module.
+    minItems: 1
+    maxItems: 10
+
+  reset-names:
+    minItems: 1
+    maxItems: 10
+    items:
+      oneOf:
+        - description: Data Bus Interface (DBI) domain reset
+          const: dbi
+        - description: AXI-bus Master interface reset
+          const: mstr
+        - description: AXI-bus Slave interface reset
+          const: slv
+        - description: Application-dependent interface reset
+          const: app
+        - description: Controller Non-sticky CSR flags reset
+          const: non-sticky
+        - description: Controller sticky CSR flags reset
+          const: sticky
+        - description: PIPE-interface (Core-PCS) logic reset
+          const: pipe
+        - description:
+            Controller primary reset (resets everything except PMC module)
+          const: core
+        - description: PCS/PHY block reset
+          const: phy
+        - description: PMC hot reset signal
+          const: hot
+        - description: Cold reset signal
+          const: pwr
+        - description:
+            Vendor-specific reset names. Consider using the generic names
+            above for new bindings.
+          oneOf:
+            - description: See native 'app' reset for details
+              enum: [ apps, gio, apb ]
+            - description: See native 'phy' reset for details
+              enum: [ pciephy, link ]
+            - description: See native 'pwr' reset for details
+              enum: [ turnoff ]
+
   phys:
     description:
       There can be up to the number of possible lanes PHYs specified placed in
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
index f4d7eb2dec4db..8fc2151691a47 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
@@ -180,6 +180,12 @@ examples:
       interrupts = <23>, <24>;
       interrupt-names = "dma0", "dma1";
 
+      clocks = <&sys_clk 12>, <&sys_clk 24>;
+      clock-names = "dbi", "ref";
+
+      resets = <&sys_rst 12>, <&sys_rst 24>;
+      reset-names = "dbi", "phy";
+
       phys = <&pcie_phy0>, <&pcie_phy1>, <&pcie_phy2>, <&pcie_phy3>;
       phy-names = "pcie0", "pcie1", "pcie2", "pcie3";
 
diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
index 59d3bbb5883a7..c62c8fe517aef 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
@@ -195,8 +195,6 @@ properties:
       - contains:
           const: msi
 
-  clocks: true
-
 additionalProperties: true
 
 required:
-- 
GitLab


From 4a8972542a6d1eee81c7cc27699b0a47f6a6619e Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:53 +0300
Subject: [PATCH 2137/4122] dt-bindings: PCI: dwc: Add dma-coherent property

DW PCIe EP/RP AXI- and TRGT1-master interfaces are responsible for the
application memory access. They are used by the RP/EP PCIe buses (MWr/MWr
TLPs emitted by the peripheral PCIe devices) and the eDMA block. Since all
of them mainly involve the system memory and basically mean DMA we can
expect the corresponding platforms can be designed in a way to make sure
the transactions are cache-coherent. As such the DW PCIe DT-nodes can have
the 'dma-coherent' property specified. Let's permit it in the DT-bindings
then.

Link: https://lore.kernel.org/r/20221113191301.5526-13-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
index 4d9efcea38598..d87e13496834a 100644
--- a/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
+++ b/Documentation/devicetree/bindings/pci/snps,dw-pcie-common.yaml
@@ -259,6 +259,8 @@ properties:
       configuration space registers, Port Logic registers, DMA and iATU
       registers. This feature has been available since DWC PCIe v4.80a.
 
+  dma-coherent: true
+
 additionalProperties: true
 
 ...
-- 
GitLab


From 98b59129cb9f43a37bb92a577145f29ca54353a7 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:54 +0300
Subject: [PATCH 2138/4122] dt-bindings: PCI: dwc: Apply common schema to
 Rockchip DW PCIe nodes

As the DT-bindings description states the Rockchip PCIe controller is
based on the DW PCIe RP IP-core thus its DT-nodes are supposed to be
compatible with the common DW PCIe controller schema. Let's make sure they
are evaluated against it by referring to the snps,dw-pcie.yaml schema in
the allOf sub-schemas composition.

Link: https://lore.kernel.org/r/20221113191301.5526-14-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml
index bc0a9d1db750b..2be72ae1169f9 100644
--- a/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/rockchip-dw-pcie.yaml
@@ -14,10 +14,10 @@ maintainers:
 description: |+
   RK3568 SoC PCIe host controller is based on the Synopsys DesignWare
   PCIe IP and thus inherits all the common properties defined in
-  designware-pcie.txt.
+  snps,dw-pcie.yaml.
 
 allOf:
-  - $ref: /schemas/pci/pci-bus.yaml#
+  - $ref: /schemas/pci/snps,dw-pcie.yaml#
 
 properties:
   compatible:
-- 
GitLab


From ce27c4e61f2dcc41d13f54cbecbd3a4b15db86c8 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:55 +0300
Subject: [PATCH 2139/4122] dt-bindings: PCI: dwc: Add Baikal-T1 PCIe Root Port
 bindings

Baikal-T1 SoC is equipped with DWC PCIe v4.60a Root Port controller, which
link can be trained to work on up to Gen.3 speed over up to x4 lanes. The
controller is supposed to be fed up with four clock sources: DBI
peripheral clock, AXI application Tx/Rx clocks and external PHY/core
reference clock generating the 100MHz signal. In addition to that the
platform provide a way to reset each part of the controller:
sticky/non-sticky bits, host controller core, PIPE interface, PCS/PHY and
Hot/Power reset signal. The Root Port controller is equipped with multiple
IRQ lines like MSI, system AER, PME, HP, Bandwidth change, Link
equalization request and eDMA ones. The registers space is accessed over
the DBI interface. There can be no more than four inbound or outbound iATU
windows configured.

Link: https://lore.kernel.org/r/20221113191301.5526-15-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/baikal,bt1-pcie.yaml         | 168 ++++++++++++++++++
 1 file changed, 168 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml

diff --git a/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml
new file mode 100644
index 0000000000000..8eaa07ae97743
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/baikal,bt1-pcie.yaml
@@ -0,0 +1,168 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/baikal,bt1-pcie.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Baikal-T1 PCIe Root Port Controller
+
+maintainers:
+  - Serge Semin <fancer.lancer@gmail.com>
+
+description:
+  Embedded into Baikal-T1 SoC Root Complex controller with a single port
+  activated. It's based on the DWC RC PCIe v4.60a IP-core, which is configured
+  to have just a single Root Port function and is capable of establishing the
+  link up to Gen.3 speed on x4 lanes. It doesn't have embedded clock and reset
+  control module, so the proper interface initialization is supposed to be
+  performed by software. There four in- and four outbound iATU regions
+  which can be used to emit all required TLP types on the PCIe bus.
+
+allOf:
+  - $ref: /schemas/pci/snps,dw-pcie.yaml#
+
+properties:
+  compatible:
+    const: baikal,bt1-pcie
+
+  reg:
+    description:
+      DBI, DBI2 and at least 4KB outbound iATU-capable region for the
+      peripheral devices CFG-space access.
+    maxItems: 3
+
+  reg-names:
+    items:
+      - const: dbi
+      - const: dbi2
+      - const: config
+
+  interrupts:
+    description:
+      MSI, AER, PME, Hot-plug, Link Bandwidth Management, Link Equalization
+      request and eight Read/Write eDMA IRQ lines are available.
+    maxItems: 14
+
+  interrupt-names:
+    items:
+      - const: dma0
+      - const: dma1
+      - const: dma2
+      - const: dma3
+      - const: dma4
+      - const: dma5
+      - const: dma6
+      - const: dma7
+      - const: msi
+      - const: aer
+      - const: pme
+      - const: hp
+      - const: bw_mg
+      - const: l_eq
+
+  clocks:
+    description:
+      DBI (attached to the APB bus), AXI-bus master and slave interfaces
+      are fed up by the dedicated application clocks. A common reference
+      clock signal is supposed to be attached to the corresponding Ref-pad
+      of the SoC. It will be redistributed amongst the controller core
+      sub-modules (pipe, core, aux, etc).
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: dbi
+      - const: mstr
+      - const: slv
+      - const: ref
+
+  resets:
+    description:
+      A comprehensive controller reset logic is supposed to be implemented
+      by software, so almost all the possible application and core reset
+      signals are exposed via the system CCU module.
+    maxItems: 9
+
+  reset-names:
+    items:
+      - const: mstr
+      - const: slv
+      - const: pwr
+      - const: hot
+      - const: phy
+      - const: core
+      - const: pipe
+      - const: sticky
+      - const: non-sticky
+
+  baikal,bt1-syscon:
+    $ref: /schemas/types.yaml#/definitions/phandle
+    description:
+      Phandle to the Baikal-T1 System Controller DT node. It's required to
+      access some additional PM, Reset-related and LTSSM signals.
+
+  num-lanes:
+    maximum: 4
+
+  max-link-speed:
+    maximum: 3
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - interrupts
+  - interrupt-names
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/mips-gic.h>
+    #include <dt-bindings/gpio/gpio.h>
+
+    pcie@1f052000 {
+      compatible = "baikal,bt1-pcie";
+      device_type = "pci";
+      reg = <0x1f052000 0x1000>, <0x1f053000 0x1000>, <0x1bdbf000 0x1000>;
+      reg-names = "dbi", "dbi2", "config";
+      #address-cells = <3>;
+      #size-cells = <2>;
+      ranges = <0x81000000 0 0x00000000 0x1bdb0000 0 0x00008000>,
+               <0x82000000 0 0x20000000 0x08000000 0 0x13db0000>;
+      bus-range = <0x0 0xff>;
+
+      interrupts = <GIC_SHARED 80 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 81 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 82 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 83 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 84 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 85 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 86 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 87 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 88 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 89 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 90 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 91 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 92 IRQ_TYPE_LEVEL_HIGH>,
+                   <GIC_SHARED 93 IRQ_TYPE_LEVEL_HIGH>;
+      interrupt-names = "dma0", "dma1", "dma2", "dma3",
+                        "dma4", "dma5", "dma6", "dma7",
+                        "msi", "aer", "pme", "hp", "bw_mg",
+                        "l_eq";
+
+      clocks = <&ccu_sys 1>, <&ccu_axi 6>, <&ccu_axi 7>, <&clk_pcie>;
+      clock-names = "dbi", "mstr", "slv", "ref";
+
+      resets = <&ccu_axi 6>, <&ccu_axi 7>, <&ccu_sys 7>, <&ccu_sys 10>,
+               <&ccu_sys 4>, <&ccu_sys 6>, <&ccu_sys 5>, <&ccu_sys 8>,
+               <&ccu_sys 9>;
+      reset-names = "mstr", "slv", "pwr", "hot", "phy", "core", "pipe",
+                    "sticky", "non-sticky";
+
+      reset-gpios = <&port0 0 GPIO_ACTIVE_LOW>;
+
+      num-lanes = <4>;
+      max-link-speed = <3>;
+    };
+...
-- 
GitLab


From 8522e17d4cab47b35d43943ca13d677e76ab01b7 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:56 +0300
Subject: [PATCH 2140/4122] PCI: dwc: Introduce dma-ranges property support for
 RC-host

In accordance with the generic PCIe Root Port DT-bindings the "dma-ranges"
property has the same format as the "ranges" property. The only difference
is in their semantics. The "dma-ranges" property describes the PCIe-to-CPU
memory mapping in opposite to the CPU-to-PCIe mapping of the "ranges"
property. Even though the DW PCIe controllers are normally equipped with
the internal Address Translation Unit which inbound and outbound tables
can be used to implement both properties semantics, it was surprising for
me to discover that the host-related part of the DW PCIe driver currently
supports the "ranges" property only while the "dma-ranges" windows are
just ignored. Having the "dma-ranges" supported in the driver would be
very handy for the platforms, that don't tolerate the 1:1 CPU-PCIe memory
mapping and require a customized PCIe memory layout. So let's fix that by
introducing the "dma-ranges" property support.

First of all we suggest to rename the dw_pcie_prog_inbound_atu() method to
dw_pcie_prog_ep_inbound_atu() and create a new version of the
dw_pcie_prog_inbound_atu() function. Thus we'll have two methods for the
RC and EP controllers respectively in the same way as it has been
developed for the outbound ATU setup methods.

Secondly aside with the memory window index and type the new
dw_pcie_prog_inbound_atu() function will accept CPU address, PCIe address
and size as its arguments. These parameters define the PCIe and CPU memory
ranges which will be used to setup the respective inbound ATU mapping. The
passed parameters need to be verified against the ATU ranges constraints
in the same way as it is done for the outbound ranges.

Finally the DMA-ranges detected for the PCIe controller need to be
converted to the inbound ATU entries during the host controller
initialization procedure. It will be done in the framework of the
dw_pcie_iatu_setup() method. Note before setting the inbound ranges up we
need to disable all the inbound ATU entries in order to prevent unexpected
PCIe TLPs translations defined by some third party software like
bootloaders.

Link: https://lore.kernel.org/r/20221113191301.5526-16-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 .../pci/controller/dwc/pcie-designware-ep.c   |  4 +-
 .../pci/controller/dwc/pcie-designware-host.c | 32 ++++++++++-
 drivers/pci/controller/dwc/pcie-designware.c  | 56 ++++++++++++++++++-
 drivers/pci/controller/dwc/pcie-designware.h  |  6 +-
 4 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index efc6c6360e28f..40d0056b2f568 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -171,8 +171,8 @@ static int dw_pcie_ep_inbound_atu(struct dw_pcie_ep *ep, u8 func_no, int type,
 		return -EINVAL;
 	}
 
-	ret = dw_pcie_prog_inbound_atu(pci, func_no, free_win, type,
-				       cpu_addr, bar);
+	ret = dw_pcie_prog_ep_inbound_atu(pci, func_no, free_win, type,
+					  cpu_addr, bar);
 	if (ret < 0) {
 		dev_err(pci->dev, "Failed to program IB window\n");
 		return ret;
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index 39f3b37d4033c..ea923c25e12d2 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -643,12 +643,15 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
 	}
 
 	/*
-	 * Ensure all outbound windows are disabled before proceeding with
-	 * the MEM/IO ranges setups.
+	 * Ensure all out/inbound windows are disabled before proceeding with
+	 * the MEM/IO (dma-)ranges setups.
 	 */
 	for (i = 0; i < pci->num_ob_windows; i++)
 		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_OB, i);
 
+	for (i = 0; i < pci->num_ib_windows; i++)
+		dw_pcie_disable_atu(pci, PCIE_ATU_REGION_DIR_IB, i);
+
 	i = 0;
 	resource_list_for_each_entry(entry, &pp->bridge->windows) {
 		if (resource_type(entry->res) != IORESOURCE_MEM)
@@ -685,9 +688,32 @@ static int dw_pcie_iatu_setup(struct dw_pcie_rp *pp)
 	}
 
 	if (pci->num_ob_windows <= i)
-		dev_warn(pci->dev, "Resources exceed number of ATU entries (%d)\n",
+		dev_warn(pci->dev, "Ranges exceed outbound iATU size (%d)\n",
 			 pci->num_ob_windows);
 
+	i = 0;
+	resource_list_for_each_entry(entry, &pp->bridge->dma_ranges) {
+		if (resource_type(entry->res) != IORESOURCE_MEM)
+			continue;
+
+		if (pci->num_ib_windows <= i)
+			break;
+
+		ret = dw_pcie_prog_inbound_atu(pci, i++, PCIE_ATU_TYPE_MEM,
+					       entry->res->start,
+					       entry->res->start - entry->offset,
+					       resource_size(entry->res));
+		if (ret) {
+			dev_err(pci->dev, "Failed to set DMA range %pr\n",
+				entry->res);
+			return ret;
+		}
+	}
+
+	if (pci->num_ib_windows <= i)
+		dev_warn(pci->dev, "Dma-ranges exceed inbound iATU size (%u)\n",
+			 pci->num_ib_windows);
+
 	return 0;
 }
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 432aead68d1fd..7f1fb764897df 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -393,8 +393,60 @@ static inline void dw_pcie_writel_atu_ib(struct dw_pcie *pci, u32 index, u32 reg
 	dw_pcie_writel_atu(pci, PCIE_ATU_REGION_DIR_IB, index, reg, val);
 }
 
-int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
-			     int type, u64 cpu_addr, u8 bar)
+int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type,
+			     u64 cpu_addr, u64 pci_addr, u64 size)
+{
+	u64 limit_addr = pci_addr + size - 1;
+	u32 retries, val;
+
+	if ((limit_addr & ~pci->region_limit) != (pci_addr & ~pci->region_limit) ||
+	    !IS_ALIGNED(cpu_addr, pci->region_align) ||
+	    !IS_ALIGNED(pci_addr, pci->region_align) || !size) {
+		return -EINVAL;
+	}
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_BASE,
+			      lower_32_bits(pci_addr));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_BASE,
+			      upper_32_bits(pci_addr));
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LIMIT,
+			      lower_32_bits(limit_addr));
+	if (dw_pcie_ver_is_ge(pci, 460A))
+		dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_LIMIT,
+				      upper_32_bits(limit_addr));
+
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_LOWER_TARGET,
+			      lower_32_bits(cpu_addr));
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_UPPER_TARGET,
+			      upper_32_bits(cpu_addr));
+
+	val = type;
+	if (upper_32_bits(limit_addr) > upper_32_bits(pci_addr) &&
+	    dw_pcie_ver_is_ge(pci, 460A))
+		val |= PCIE_ATU_INCREASE_REGION_SIZE;
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL1, val);
+	dw_pcie_writel_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2, PCIE_ATU_ENABLE);
+
+	/*
+	 * Make sure ATU enable takes effect before any subsequent config
+	 * and I/O accesses.
+	 */
+	for (retries = 0; retries < LINK_WAIT_MAX_IATU_RETRIES; retries++) {
+		val = dw_pcie_readl_atu_ib(pci, index, PCIE_ATU_REGION_CTRL2);
+		if (val & PCIE_ATU_ENABLE)
+			return 0;
+
+		mdelay(LINK_WAIT_IATU);
+	}
+
+	dev_err(pci->dev, "Inbound iATU is not being enabled\n");
+
+	return -ETIMEDOUT;
+}
+
+int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				int type, u64 cpu_addr, u8 bar)
 {
 	u32 retries, val;
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index a871ae7eb59ec..37801bbce8540 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -346,8 +346,10 @@ int dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
 			      u64 cpu_addr, u64 pci_addr, u64 size);
 int dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index,
 				 int type, u64 cpu_addr, u64 pci_addr, u64 size);
-int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
-			     int type, u64 cpu_addr, u8 bar);
+int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, int index, int type,
+			     u64 cpu_addr, u64 pci_addr, u64 size);
+int dw_pcie_prog_ep_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
+				int type, u64 cpu_addr, u8 bar);
 void dw_pcie_disable_atu(struct dw_pcie *pci, u32 dir, int index);
 void dw_pcie_setup(struct dw_pcie *pci);
 void dw_pcie_iatu_detect(struct dw_pcie *pci);
-- 
GitLab


From 7f9e982dc4fcf7b4bc7e9dc8a9f344395fc125b8 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:57 +0300
Subject: [PATCH 2141/4122] PCI: dwc: Introduce generic controller capabilities
 interface

Since in addition to the already available iATU unrolled mapping we are
about to add a few more DW PCIe platform-specific capabilities (CDM-check
and generic clocks/resets resources) let's add a generic interface to set
and get the flags indicating their availability. The new interface shall
improve maintainability of the platform-specific code.

Link: https://lore.kernel.org/r/20221113191301.5526-17-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/pci/controller/dwc/pcie-designware.c | 11 ++++++-----
 drivers/pci/controller/dwc/pcie-designware.h | 12 +++++++++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 7f1fb764897df..b9cc4b00e5fe7 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -211,7 +211,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 static inline void __iomem *dw_pcie_select_atu(struct dw_pcie *pci, u32 dir,
 					       u32 index)
 {
-	if (pci->iatu_unroll_enabled)
+	if (dw_pcie_cap_is(pci, IATU_UNROLL))
 		return pci->atu_base + PCIE_ATU_UNROLL_BASE(dir, index);
 
 	dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, dir | index);
@@ -591,7 +591,7 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci)
 	u32 val, min, dir;
 	u64 max;
 
-	if (pci->iatu_unroll_enabled) {
+	if (dw_pcie_cap_is(pci, IATU_UNROLL)) {
 		max_region = min((int)pci->atu_size / 512, 256);
 	} else {
 		dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF);
@@ -641,8 +641,9 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci)
 {
 	struct platform_device *pdev = to_platform_device(pci->dev);
 
-	pci->iatu_unroll_enabled = dw_pcie_iatu_unroll_enabled(pci);
-	if (pci->iatu_unroll_enabled) {
+	if (dw_pcie_iatu_unroll_enabled(pci)) {
+		dw_pcie_cap_set(pci, IATU_UNROLL);
+
 		if (!pci->atu_base) {
 			struct resource *res =
 				platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
@@ -664,7 +665,7 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci)
 
 	dw_pcie_iatu_detect_regions(pci);
 
-	dev_info(pci->dev, "iATU unroll: %s\n", pci->iatu_unroll_enabled ?
+	dev_info(pci->dev, "iATU unroll: %s\n", dw_pcie_cap_is(pci, IATU_UNROLL) ?
 		"enabled" : "disabled");
 
 	dev_info(pci->dev, "iATU regions: %u ob, %u ib, align %uK, limit %lluG\n",
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 37801bbce8540..c6dddacee3b1c 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -12,6 +12,7 @@
 #define _PCIE_DESIGNWARE_H
 
 #include <linux/bitfield.h>
+#include <linux/bitops.h>
 #include <linux/dma-mapping.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
@@ -43,6 +44,15 @@
 	(__dw_pcie_ver_cmp(_pci, _ver, ==) && \
 	 __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=))
 
+/* DWC PCIe controller capabilities */
+#define DW_PCIE_CAP_IATU_UNROLL		1
+
+#define dw_pcie_cap_is(_pci, _cap) \
+	test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps)
+
+#define dw_pcie_cap_set(_pci, _cap) \
+	set_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps)
+
 /* Parameters for the waiting for link up routine */
 #define LINK_WAIT_MAX_RETRIES		10
 #define LINK_WAIT_USLEEP_MIN		90000
@@ -317,10 +327,10 @@ struct dw_pcie {
 	const struct dw_pcie_ops *ops;
 	u32			version;
 	u32			type;
+	unsigned long		caps;
 	int			num_lanes;
 	int			link_gen;
 	u8			n_fts[2];
-	bool			iatu_unroll_enabled: 1;
 };
 
 #define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp)
-- 
GitLab


From ef8c58877fe77c7807777f61f59cffaee89881f7 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:58 +0300
Subject: [PATCH 2142/4122] PCI: dwc: Introduce generic resources getter

Currently the DW PCIe Root Port and Endpoint CSR spaces are retrieved in
the separate parts of the DW PCIe core driver. It doesn't really make
sense since the both controller types have identical set of the core CSR
regions: DBI, DBI CS2 and iATU/eDMA. Thus we can simplify the DW PCIe Host
and EP initialization methods by moving the platform-specific registers
space getting and mapping into a common method. It gets to be even more
justified seeing the CSRs base address pointers are preserved in the
common DW PCIe descriptor. Note all the OF-based common DW PCIe settings
initialization will be moved to the new method too in order to have a
single function for all the generic platform properties handling in single
place.

A nice side-effect of this change is that the pcie-designware-host.c and
pcie-designware-ep.c drivers are cleaned up from all the direct dw_pcie
storage modification, which makes the DW PCIe core, Root Port and Endpoint
modules more coherent.

Link: https://lore.kernel.org/r/20221113191301.5526-18-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
---
 .../pci/controller/dwc/pcie-designware-ep.c   | 25 +------
 .../pci/controller/dwc/pcie-designware-host.c | 15 +---
 drivers/pci/controller/dwc/pcie-designware.c  | 75 ++++++++++++++-----
 drivers/pci/controller/dwc/pcie-designware.h  |  3 +
 4 files changed, 65 insertions(+), 53 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware-ep.c b/drivers/pci/controller/dwc/pcie-designware-ep.c
index 40d0056b2f568..d06654895ebae 100644
--- a/drivers/pci/controller/dwc/pcie-designware-ep.c
+++ b/drivers/pci/controller/dwc/pcie-designware-ep.c
@@ -13,8 +13,6 @@
 #include <linux/pci-epc.h>
 #include <linux/pci-epf.h>
 
-#include "../../pci.h"
-
 void dw_pcie_ep_linkup(struct dw_pcie_ep *ep)
 {
 	struct pci_epc *epc = ep->epc;
@@ -711,23 +709,9 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 
 	INIT_LIST_HEAD(&ep->func_list);
 
-	if (!pci->dbi_base) {
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
-		pci->dbi_base = devm_pci_remap_cfg_resource(dev, res);
-		if (IS_ERR(pci->dbi_base))
-			return PTR_ERR(pci->dbi_base);
-	}
-
-	if (!pci->dbi_base2) {
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2");
-		if (!res) {
-			pci->dbi_base2 = pci->dbi_base + SZ_4K;
-		} else {
-			pci->dbi_base2 = devm_pci_remap_cfg_resource(dev, res);
-			if (IS_ERR(pci->dbi_base2))
-				return PTR_ERR(pci->dbi_base2);
-		}
-	}
+	ret = dw_pcie_get_resources(pci);
+	if (ret)
+		return ret;
 
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
 	if (!res)
@@ -756,9 +740,6 @@ int dw_pcie_ep_init(struct dw_pcie_ep *ep)
 		return -ENOMEM;
 	ep->outbound_addr = addr;
 
-	if (pci->link_gen < 1)
-		pci->link_gen = of_pci_get_max_link_speed(np);
-
 	epc = devm_pci_epc_create(dev, &epc_ops);
 	if (IS_ERR(epc)) {
 		dev_err(dev, "Failed to create epc device\n");
diff --git a/drivers/pci/controller/dwc/pcie-designware-host.c b/drivers/pci/controller/dwc/pcie-designware-host.c
index ea923c25e12d2..3ab6ae3712c42 100644
--- a/drivers/pci/controller/dwc/pcie-designware-host.c
+++ b/drivers/pci/controller/dwc/pcie-designware-host.c
@@ -16,7 +16,6 @@
 #include <linux/pci_regs.h>
 #include <linux/platform_device.h>
 
-#include "../../pci.h"
 #include "pcie-designware.h"
 
 static struct pci_ops dw_pcie_ops;
@@ -395,6 +394,10 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
 
 	raw_spin_lock_init(&pp->lock);
 
+	ret = dw_pcie_get_resources(pci);
+	if (ret)
+		return ret;
+
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
 	if (res) {
 		pp->cfg0_size = resource_size(res);
@@ -408,13 +411,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
 		return -ENODEV;
 	}
 
-	if (!pci->dbi_base) {
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
-		pci->dbi_base = devm_pci_remap_cfg_resource(dev, res);
-		if (IS_ERR(pci->dbi_base))
-			return PTR_ERR(pci->dbi_base);
-	}
-
 	bridge = devm_pci_alloc_host_bridge(dev, 0);
 	if (!bridge)
 		return -ENOMEM;
@@ -429,9 +425,6 @@ int dw_pcie_host_init(struct dw_pcie_rp *pp)
 		pp->io_base = pci_pio_to_address(win->res->start);
 	}
 
-	if (pci->link_gen < 1)
-		pci->link_gen = of_pci_get_max_link_speed(np);
-
 	/* Set default bus ops */
 	bridge->ops = &dw_pcie_ops;
 	bridge->child_ops = &dw_child_pcie_ops;
diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index b9cc4b00e5fe7..393e64ecccd35 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -11,6 +11,7 @@
 #include <linux/align.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
+#include <linux/ioport.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/sizes.h>
@@ -19,6 +20,59 @@
 #include "../../pci.h"
 #include "pcie-designware.h"
 
+int dw_pcie_get_resources(struct dw_pcie *pci)
+{
+	struct platform_device *pdev = to_platform_device(pci->dev);
+	struct device_node *np = dev_of_node(pci->dev);
+	struct resource *res;
+
+	if (!pci->dbi_base) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
+		pci->dbi_base = devm_pci_remap_cfg_resource(pci->dev, res);
+		if (IS_ERR(pci->dbi_base))
+			return PTR_ERR(pci->dbi_base);
+	}
+
+	/* DBI2 is mainly useful for the endpoint controller */
+	if (!pci->dbi_base2) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi2");
+		if (res) {
+			pci->dbi_base2 = devm_pci_remap_cfg_resource(pci->dev, res);
+			if (IS_ERR(pci->dbi_base2))
+				return PTR_ERR(pci->dbi_base2);
+		} else {
+			pci->dbi_base2 = pci->dbi_base + SZ_4K;
+		}
+	}
+
+	/* For non-unrolled iATU/eDMA platforms this range will be ignored */
+	if (!pci->atu_base) {
+		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
+		if (res) {
+			pci->atu_size = resource_size(res);
+			pci->atu_base = devm_ioremap_resource(pci->dev, res);
+			if (IS_ERR(pci->atu_base))
+				return PTR_ERR(pci->atu_base);
+		} else {
+			pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
+		}
+	}
+
+	/* Set a default value suitable for at most 8 in and 8 out windows */
+	if (!pci->atu_size)
+		pci->atu_size = SZ_4K;
+
+	if (pci->link_gen < 1)
+		pci->link_gen = of_pci_get_max_link_speed(np);
+
+	of_property_read_u32(np, "num-lanes", &pci->num_lanes);
+
+	if (of_property_read_bool(np, "snps,enable-cdm-check"))
+		dw_pcie_cap_set(pci, CDM_CHECK);
+
+	return 0;
+}
+
 void dw_pcie_version_detect(struct dw_pcie *pci)
 {
 	u32 ver;
@@ -639,25 +693,8 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci)
 
 void dw_pcie_iatu_detect(struct dw_pcie *pci)
 {
-	struct platform_device *pdev = to_platform_device(pci->dev);
-
 	if (dw_pcie_iatu_unroll_enabled(pci)) {
 		dw_pcie_cap_set(pci, IATU_UNROLL);
-
-		if (!pci->atu_base) {
-			struct resource *res =
-				platform_get_resource_byname(pdev, IORESOURCE_MEM, "atu");
-			if (res) {
-				pci->atu_size = resource_size(res);
-				pci->atu_base = devm_ioremap_resource(pci->dev, res);
-			}
-			if (!pci->atu_base || IS_ERR(pci->atu_base))
-				pci->atu_base = pci->dbi_base + DEFAULT_DBI_ATU_OFFSET;
-		}
-
-		if (!pci->atu_size)
-			/* Pick a minimal default, enough for 8 in and 8 out windows */
-			pci->atu_size = SZ_4K;
 	} else {
 		pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE;
 		pci->atu_size = PCIE_ATU_VIEWPORT_SIZE;
@@ -675,7 +712,6 @@ void dw_pcie_iatu_detect(struct dw_pcie *pci)
 
 void dw_pcie_setup(struct dw_pcie *pci)
 {
-	struct device_node *np = pci->dev->of_node;
 	u32 val;
 
 	if (pci->link_gen > 0)
@@ -703,14 +739,13 @@ void dw_pcie_setup(struct dw_pcie *pci)
 	val |= PORT_LINK_DLL_LINK_EN;
 	dw_pcie_writel_dbi(pci, PCIE_PORT_LINK_CONTROL, val);
 
-	if (of_property_read_bool(np, "snps,enable-cdm-check")) {
+	if (dw_pcie_cap_is(pci, CDM_CHECK)) {
 		val = dw_pcie_readl_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS);
 		val |= PCIE_PL_CHK_REG_CHK_REG_CONTINUOUS |
 		       PCIE_PL_CHK_REG_CHK_REG_START;
 		dw_pcie_writel_dbi(pci, PCIE_PL_CHK_REG_CONTROL_STATUS, val);
 	}
 
-	of_property_read_u32(np, "num-lanes", &pci->num_lanes);
 	if (!pci->num_lanes) {
 		dev_dbg(pci->dev, "Using h/w default number of lanes\n");
 		return;
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index c6dddacee3b1c..081f169e6021e 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -46,6 +46,7 @@
 
 /* DWC PCIe controller capabilities */
 #define DW_PCIE_CAP_IATU_UNROLL		1
+#define DW_PCIE_CAP_CDM_CHECK		2
 
 #define dw_pcie_cap_is(_pci, _cap) \
 	test_bit(DW_PCIE_CAP_ ## _cap, &(_pci)->caps)
@@ -338,6 +339,8 @@ struct dw_pcie {
 #define to_dw_pcie_from_ep(endpoint)   \
 		container_of((endpoint), struct dw_pcie, ep)
 
+int dw_pcie_get_resources(struct dw_pcie *pci);
+
 void dw_pcie_version_detect(struct dw_pcie *pci);
 
 u8 dw_pcie_find_capability(struct dw_pcie *pci, u8 cap);
-- 
GitLab


From 9f67ecdd9579228d656192a4b6e951c757085db8 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:12:59 +0300
Subject: [PATCH 2143/4122] PCI: dwc: Combine iATU detection procedures

Since the iATU CSR region is now retrieved in the DW PCIe resources getter
there is no much benefits in the iATU detection procedures splitting up.
Therefore let's join the iATU unroll/viewport detection procedure with the
rest of the iATU parameters detection code. The resultant method will be
as coherent as before, while the redundant functions will be eliminated
thus producing more readable code.

Link: https://lore.kernel.org/r/20221113191301.5526-19-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
---
 drivers/pci/controller/dwc/pcie-designware.c | 39 +++++---------------
 1 file changed, 10 insertions(+), 29 deletions(-)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index 393e64ecccd35..e979fb8f3cee9 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -628,26 +628,21 @@ static void dw_pcie_link_set_max_speed(struct dw_pcie *pci, u32 link_gen)
 
 }
 
-static bool dw_pcie_iatu_unroll_enabled(struct dw_pcie *pci)
-{
-	u32 val;
-
-	val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT);
-	if (val == 0xffffffff)
-		return true;
-
-	return false;
-}
-
-static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci)
+void dw_pcie_iatu_detect(struct dw_pcie *pci)
 {
 	int max_region, ob, ib;
 	u32 val, min, dir;
 	u64 max;
 
-	if (dw_pcie_cap_is(pci, IATU_UNROLL)) {
+	val = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT);
+	if (val == 0xFFFFFFFF) {
+		dw_pcie_cap_set(pci, IATU_UNROLL);
+
 		max_region = min((int)pci->atu_size / 512, 256);
 	} else {
+		pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE;
+		pci->atu_size = PCIE_ATU_VIEWPORT_SIZE;
+
 		dw_pcie_writel_dbi(pci, PCIE_ATU_VIEWPORT, 0xFF);
 		max_region = dw_pcie_readl_dbi(pci, PCIE_ATU_VIEWPORT) + 1;
 	}
@@ -689,23 +684,9 @@ static void dw_pcie_iatu_detect_regions(struct dw_pcie *pci)
 	pci->num_ib_windows = ib;
 	pci->region_align = 1 << fls(min);
 	pci->region_limit = (max << 32) | (SZ_4G - 1);
-}
-
-void dw_pcie_iatu_detect(struct dw_pcie *pci)
-{
-	if (dw_pcie_iatu_unroll_enabled(pci)) {
-		dw_pcie_cap_set(pci, IATU_UNROLL);
-	} else {
-		pci->atu_base = pci->dbi_base + PCIE_ATU_VIEWPORT_BASE;
-		pci->atu_size = PCIE_ATU_VIEWPORT_SIZE;
-	}
-
-	dw_pcie_iatu_detect_regions(pci);
-
-	dev_info(pci->dev, "iATU unroll: %s\n", dw_pcie_cap_is(pci, IATU_UNROLL) ?
-		"enabled" : "disabled");
 
-	dev_info(pci->dev, "iATU regions: %u ob, %u ib, align %uK, limit %lluG\n",
+	dev_info(pci->dev, "iATU: unroll %s, %u ob, %u ib, align %uK, limit %lluG\n",
+		 dw_pcie_cap_is(pci, IATU_UNROLL) ? "T" : "F",
 		 pci->num_ob_windows, pci->num_ib_windows,
 		 pci->region_align / SZ_1K, (pci->region_limit + 1) / SZ_1G);
 }
-- 
GitLab


From ef69f852a9784017e646e50e3efc715dac7e3fc4 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:13:00 +0300
Subject: [PATCH 2144/4122] PCI: dwc: Introduce generic platform clocks and
 resets

Currently almost each platform driver uses its own resets and clocks
naming in order to get the corresponding descriptors. It makes the code
harder to maintain and comprehend especially seeing the DWC PCIe core main
resets and clocks signals set hasn't changed much for about at least one
major IP-core release. So in order to organize things around these signals
we suggest to create a generic interface for them in accordance with the
naming introduced in the DWC PCIe IP-core reference manual:

Application clocks:
- "dbi"  - data bus interface clock (on some DWC PCIe platforms it's
           referred as "pclk", "pcie", "sys", "ahb", "cfg", "iface",
           "gio", "reg", "pcie_apb_sys");
- "mstr" - AXI-bus master interface clock (some DWC PCIe glue drivers
           refer to this clock as "port", "bus", "pcie_bus",
           "bus_master/master_bus/axi_m", "pcie_aclk");
- "slv"  - AXI-bus slave interface clock (also called as "port", "bus",
           "pcie_bus", "bus_slave/slave_bus/axi_s", "pcie_aclk",
           "pcie_inbound_axi").

Core clocks:
- "pipe" - core-PCS PIPE interface clock coming from external PHY (it's
           normally named by the platform drivers as just "pipe");
- "core" - primary clock of the controller (none of the platform drivers
           declare such a clock but in accordance with the ref. manual
           the devices may have it separately specified);
- "aux"  - auxiliary PMC domain clock (it is named by some platforms as
           "pcie_aux" and just "aux");
- "ref"  - Generic reference clock (it is a generic clock source, which
           can be used as a signal source for multiple interfaces, some
           platforms call it as "ref", "general", "pcie_phy",
           "pcie_phy_ref").

Application resets:
- "dbi"  - Data-bus interface reset (it's CSR interface clock and is
           normally called as "apb" though technically it's not APB but
           DWC PCIe-specific interface);
- "mstr" - AXI-bus master reset (some platforms call it as "port", "apps",
           "bus", "axi_m");
- "slv"  - ABI-bus slave reset (some platforms call it as "port", "apps",
           "bus", "axi_s").

Core resets:
- "non-sticky" - non-sticky CSR flags reset;
- "sticky"     - sticky CSR flags reset;
- "pipe"       - PIPE-interface (Core-PCS) logic reset (some platforms
                 call it just "pipe");
- "core"       - controller primary reset (resets everything except PMC
                 module, some platforms refer to this signal as "soft",
                 "pci");
- "phy"        - PCS/PHY block reset (strictly speaking it is normally
                 connected to the input of an external block, but the
                 reference manual says it must be available for the PMC
                 working correctly, some existing platforms call it
                 "pciephy", "phy", "link");
- "hot"        - PMC hot reset signal (also called as "sleep");
- "pwr"        - cold reset signal (can be referred as "pwr", "turnoff").

Bus reset:
- "perst" - PCIe standard signal used to reset the PCIe peripheral
            devices.

As you can see each platform uses it's own naming for basically the same
set of the signals. In the framework of this commit we suggest to add a
set of the clocks and reset signals resources, corresponding names and
identifiers for each denoted entity. At current stage the platforms will
be able to use the provided infrastructure to automatically request all
these resources and manipulate with them in the Host/EP init callbacks.
Alas it isn't that easy to create a common cold/hot reset procedure due to
too many platform-specifics in the procedure, like the external flags
exposure and the delays requirement.

Link: https://lore.kernel.org/r/20221113191301.5526-20-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/controller/dwc/pcie-designware.c | 91 ++++++++++++++++++++
 drivers/pci/controller/dwc/pcie-designware.h | 42 +++++++++
 2 files changed, 133 insertions(+)

diff --git a/drivers/pci/controller/dwc/pcie-designware.c b/drivers/pci/controller/dwc/pcie-designware.c
index e979fb8f3cee9..6d5d619ab2e94 100644
--- a/drivers/pci/controller/dwc/pcie-designware.c
+++ b/drivers/pci/controller/dwc/pcie-designware.c
@@ -10,7 +10,9 @@
 
 #include <linux/align.h>
 #include <linux/bitops.h>
+#include <linux/clk.h>
 #include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 #include <linux/ioport.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -20,11 +22,89 @@
 #include "../../pci.h"
 #include "pcie-designware.h"
 
+static const char * const dw_pcie_app_clks[DW_PCIE_NUM_APP_CLKS] = {
+	[DW_PCIE_DBI_CLK] = "dbi",
+	[DW_PCIE_MSTR_CLK] = "mstr",
+	[DW_PCIE_SLV_CLK] = "slv",
+};
+
+static const char * const dw_pcie_core_clks[DW_PCIE_NUM_CORE_CLKS] = {
+	[DW_PCIE_PIPE_CLK] = "pipe",
+	[DW_PCIE_CORE_CLK] = "core",
+	[DW_PCIE_AUX_CLK] = "aux",
+	[DW_PCIE_REF_CLK] = "ref",
+};
+
+static const char * const dw_pcie_app_rsts[DW_PCIE_NUM_APP_RSTS] = {
+	[DW_PCIE_DBI_RST] = "dbi",
+	[DW_PCIE_MSTR_RST] = "mstr",
+	[DW_PCIE_SLV_RST] = "slv",
+};
+
+static const char * const dw_pcie_core_rsts[DW_PCIE_NUM_CORE_RSTS] = {
+	[DW_PCIE_NON_STICKY_RST] = "non-sticky",
+	[DW_PCIE_STICKY_RST] = "sticky",
+	[DW_PCIE_CORE_RST] = "core",
+	[DW_PCIE_PIPE_RST] = "pipe",
+	[DW_PCIE_PHY_RST] = "phy",
+	[DW_PCIE_HOT_RST] = "hot",
+	[DW_PCIE_PWR_RST] = "pwr",
+};
+
+static int dw_pcie_get_clocks(struct dw_pcie *pci)
+{
+	int i, ret;
+
+	for (i = 0; i < DW_PCIE_NUM_APP_CLKS; i++)
+		pci->app_clks[i].id = dw_pcie_app_clks[i];
+
+	for (i = 0; i < DW_PCIE_NUM_CORE_CLKS; i++)
+		pci->core_clks[i].id = dw_pcie_core_clks[i];
+
+	ret = devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_APP_CLKS,
+					 pci->app_clks);
+	if (ret)
+		return ret;
+
+	return devm_clk_bulk_get_optional(pci->dev, DW_PCIE_NUM_CORE_CLKS,
+					  pci->core_clks);
+}
+
+static int dw_pcie_get_resets(struct dw_pcie *pci)
+{
+	int i, ret;
+
+	for (i = 0; i < DW_PCIE_NUM_APP_RSTS; i++)
+		pci->app_rsts[i].id = dw_pcie_app_rsts[i];
+
+	for (i = 0; i < DW_PCIE_NUM_CORE_RSTS; i++)
+		pci->core_rsts[i].id = dw_pcie_core_rsts[i];
+
+	ret = devm_reset_control_bulk_get_optional_shared(pci->dev,
+							  DW_PCIE_NUM_APP_RSTS,
+							  pci->app_rsts);
+	if (ret)
+		return ret;
+
+	ret = devm_reset_control_bulk_get_optional_exclusive(pci->dev,
+							     DW_PCIE_NUM_CORE_RSTS,
+							     pci->core_rsts);
+	if (ret)
+		return ret;
+
+	pci->pe_rst = devm_gpiod_get_optional(pci->dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(pci->pe_rst))
+		return PTR_ERR(pci->pe_rst);
+
+	return 0;
+}
+
 int dw_pcie_get_resources(struct dw_pcie *pci)
 {
 	struct platform_device *pdev = to_platform_device(pci->dev);
 	struct device_node *np = dev_of_node(pci->dev);
 	struct resource *res;
+	int ret;
 
 	if (!pci->dbi_base) {
 		res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "dbi");
@@ -62,6 +142,17 @@ int dw_pcie_get_resources(struct dw_pcie *pci)
 	if (!pci->atu_size)
 		pci->atu_size = SZ_4K;
 
+	/* LLDD is supposed to manually switch the clocks and resets state */
+	if (dw_pcie_cap_is(pci, REQ_RES)) {
+		ret = dw_pcie_get_clocks(pci);
+		if (ret)
+			return ret;
+
+		ret = dw_pcie_get_resets(pci);
+		if (ret)
+			return ret;
+	}
+
 	if (pci->link_gen < 1)
 		pci->link_gen = of_pci_get_max_link_speed(np);
 
diff --git a/drivers/pci/controller/dwc/pcie-designware.h b/drivers/pci/controller/dwc/pcie-designware.h
index 081f169e6021e..393dfb931df6c 100644
--- a/drivers/pci/controller/dwc/pcie-designware.h
+++ b/drivers/pci/controller/dwc/pcie-designware.h
@@ -13,10 +13,13 @@
 
 #include <linux/bitfield.h>
 #include <linux/bitops.h>
+#include <linux/clk.h>
 #include <linux/dma-mapping.h>
+#include <linux/gpio/consumer.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
 #include <linux/pci.h>
+#include <linux/reset.h>
 
 #include <linux/pci-epc.h>
 #include <linux/pci-epf.h>
@@ -45,6 +48,7 @@
 	 __dw_pcie_ver_cmp(_pci, TYPE_ ## _type, >=))
 
 /* DWC PCIe controller capabilities */
+#define DW_PCIE_CAP_REQ_RES		0
 #define DW_PCIE_CAP_IATU_UNROLL		1
 #define DW_PCIE_CAP_CDM_CHECK		2
 
@@ -233,6 +237,39 @@ enum dw_pcie_device_mode {
 	DW_PCIE_RC_TYPE,
 };
 
+enum dw_pcie_app_clk {
+	DW_PCIE_DBI_CLK,
+	DW_PCIE_MSTR_CLK,
+	DW_PCIE_SLV_CLK,
+	DW_PCIE_NUM_APP_CLKS
+};
+
+enum dw_pcie_core_clk {
+	DW_PCIE_PIPE_CLK,
+	DW_PCIE_CORE_CLK,
+	DW_PCIE_AUX_CLK,
+	DW_PCIE_REF_CLK,
+	DW_PCIE_NUM_CORE_CLKS
+};
+
+enum dw_pcie_app_rst {
+	DW_PCIE_DBI_RST,
+	DW_PCIE_MSTR_RST,
+	DW_PCIE_SLV_RST,
+	DW_PCIE_NUM_APP_RSTS
+};
+
+enum dw_pcie_core_rst {
+	DW_PCIE_NON_STICKY_RST,
+	DW_PCIE_STICKY_RST,
+	DW_PCIE_CORE_RST,
+	DW_PCIE_PIPE_RST,
+	DW_PCIE_PHY_RST,
+	DW_PCIE_HOT_RST,
+	DW_PCIE_PWR_RST,
+	DW_PCIE_NUM_CORE_RSTS
+};
+
 struct dw_pcie_host_ops {
 	int (*host_init)(struct dw_pcie_rp *pp);
 	void (*host_deinit)(struct dw_pcie_rp *pp);
@@ -332,6 +369,11 @@ struct dw_pcie {
 	int			num_lanes;
 	int			link_gen;
 	u8			n_fts[2];
+	struct clk_bulk_data	app_clks[DW_PCIE_NUM_APP_CLKS];
+	struct clk_bulk_data	core_clks[DW_PCIE_NUM_CORE_CLKS];
+	struct reset_control_bulk_data	app_rsts[DW_PCIE_NUM_APP_RSTS];
+	struct reset_control_bulk_data	core_rsts[DW_PCIE_NUM_CORE_RSTS];
+	struct gpio_desc		*pe_rst;
 };
 
 #define to_dw_pcie_from_pp(port) container_of((port), struct dw_pcie, pp)
-- 
GitLab


From ba6ed462dcf41a83b36eb9a74a8c4720040f9762 Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sun, 13 Nov 2022 22:13:01 +0300
Subject: [PATCH 2145/4122] PCI: dwc: Add Baikal-T1 PCIe controller support

Baikal-T1 SoC is equipped with DWC PCIe v4.60a host controller. It can be
trained to work up to Gen.3 speed over up to x4 lanes. The host controller
is attached to the DW PCIe 3.0 PCS via the PIPE-4 interface, which in its
turn is connected to the DWC 10G PHY. The whole system is supposed to be
fed up with four clock sources: DBI peripheral clock, AXI application
clocks and external PHY/core reference clock generating the 100MHz signal.
In addition to that the platform provide a way to reset each part of the
controller: sticky/non-sticky bits, host controller core, PIPE interface,
PCS/PHY and Hot/Power reset signal. The driver also provides a way to
handle the GPIO-based PERST# signal.

Note due to the Baikal-T1 MMIO peculiarity we have to implement the DBI
interface accessors which make sure the IO operations are dword-aligned.

Link: https://lore.kernel.org/r/20221113191301.5526-21-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/controller/dwc/Kconfig    |   9 +
 drivers/pci/controller/dwc/Makefile   |   1 +
 drivers/pci/controller/dwc/pcie-bt1.c | 643 ++++++++++++++++++++++++++
 3 files changed, 653 insertions(+)
 create mode 100644 drivers/pci/controller/dwc/pcie-bt1.c

diff --git a/drivers/pci/controller/dwc/Kconfig b/drivers/pci/controller/dwc/Kconfig
index 62ce3abf0f196..771b8b146623f 100644
--- a/drivers/pci/controller/dwc/Kconfig
+++ b/drivers/pci/controller/dwc/Kconfig
@@ -222,6 +222,15 @@ config PCIE_ARTPEC6_EP
 	  Enables support for the PCIe controller in the ARTPEC-6 SoC to work in
 	  endpoint mode. This uses the DesignWare core.
 
+config PCIE_BT1
+	tristate "Baikal-T1 PCIe controller"
+	depends on MIPS_BAIKAL_T1 || COMPILE_TEST
+	depends on PCI_MSI_IRQ_DOMAIN
+	select PCIE_DW_HOST
+	help
+	  Enables support for the PCIe controller in the Baikal-T1 SoC to work
+	  in host mode. It's based on the Synopsys DWC PCIe v4.60a IP-core.
+
 config PCIE_ROCKCHIP_DW_HOST
 	bool "Rockchip DesignWare PCIe controller"
 	select PCIE_DW
diff --git a/drivers/pci/controller/dwc/Makefile b/drivers/pci/controller/dwc/Makefile
index 8ba7b67f5e50a..bf5c311875a1e 100644
--- a/drivers/pci/controller/dwc/Makefile
+++ b/drivers/pci/controller/dwc/Makefile
@@ -3,6 +3,7 @@ obj-$(CONFIG_PCIE_DW) += pcie-designware.o
 obj-$(CONFIG_PCIE_DW_HOST) += pcie-designware-host.o
 obj-$(CONFIG_PCIE_DW_EP) += pcie-designware-ep.o
 obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
+obj-$(CONFIG_PCIE_BT1) += pcie-bt1.o
 obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
 obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
 obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
diff --git a/drivers/pci/controller/dwc/pcie-bt1.c b/drivers/pci/controller/dwc/pcie-bt1.c
new file mode 100644
index 0000000000000..3346770e66547
--- /dev/null
+++ b/drivers/pci/controller/dwc/pcie-bt1.c
@@ -0,0 +1,643 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 BAIKAL ELECTRONICS, JSC
+ *
+ * Authors:
+ *   Vadim Vlasov <Vadim.Vlasov@baikalelectronics.ru>
+ *   Serge Semin <Sergey.Semin@baikalelectronics.ru>
+ *
+ * Baikal-T1 PCIe controller driver
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+
+/* Baikal-T1 System CCU control registers */
+#define BT1_CCU_PCIE_CLKC			0x140
+#define BT1_CCU_PCIE_REQ_PCS_CLK		BIT(16)
+#define BT1_CCU_PCIE_REQ_MAC_CLK		BIT(17)
+#define BT1_CCU_PCIE_REQ_PIPE_CLK		BIT(18)
+
+#define BT1_CCU_PCIE_RSTC			0x144
+#define BT1_CCU_PCIE_REQ_LINK_RST		BIT(13)
+#define BT1_CCU_PCIE_REQ_SMLH_RST		BIT(14)
+#define BT1_CCU_PCIE_REQ_PHY_RST		BIT(16)
+#define BT1_CCU_PCIE_REQ_CORE_RST		BIT(24)
+#define BT1_CCU_PCIE_REQ_STICKY_RST		BIT(26)
+#define BT1_CCU_PCIE_REQ_NSTICKY_RST		BIT(27)
+
+#define BT1_CCU_PCIE_PMSC			0x148
+#define BT1_CCU_PCIE_LTSSM_STATE_MASK		GENMASK(5, 0)
+#define BT1_CCU_PCIE_LTSSM_DET_QUIET		0x00
+#define BT1_CCU_PCIE_LTSSM_DET_ACT		0x01
+#define BT1_CCU_PCIE_LTSSM_POLL_ACT		0x02
+#define BT1_CCU_PCIE_LTSSM_POLL_COMP		0x03
+#define BT1_CCU_PCIE_LTSSM_POLL_CONF		0x04
+#define BT1_CCU_PCIE_LTSSM_PRE_DET_QUIET	0x05
+#define BT1_CCU_PCIE_LTSSM_DET_WAIT		0x06
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_START	0x07
+#define BT1_CCU_PCIE_LTSSM_CFG_LNKWD_ACEPT	0x08
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_WAIT	0x09
+#define BT1_CCU_PCIE_LTSSM_CFG_LNNUM_ACEPT	0x0a
+#define BT1_CCU_PCIE_LTSSM_CFG_COMPLETE		0x0b
+#define BT1_CCU_PCIE_LTSSM_CFG_IDLE		0x0c
+#define BT1_CCU_PCIE_LTSSM_RCVR_LOCK		0x0d
+#define BT1_CCU_PCIE_LTSSM_RCVR_SPEED		0x0e
+#define BT1_CCU_PCIE_LTSSM_RCVR_RCVRCFG		0x0f
+#define BT1_CCU_PCIE_LTSSM_RCVR_IDLE		0x10
+#define BT1_CCU_PCIE_LTSSM_L0			0x11
+#define BT1_CCU_PCIE_LTSSM_L0S			0x12
+#define BT1_CCU_PCIE_LTSSM_L123_SEND_IDLE	0x13
+#define BT1_CCU_PCIE_LTSSM_L1_IDLE		0x14
+#define BT1_CCU_PCIE_LTSSM_L2_IDLE		0x15
+#define BT1_CCU_PCIE_LTSSM_L2_WAKE		0x16
+#define BT1_CCU_PCIE_LTSSM_DIS_ENTRY		0x17
+#define BT1_CCU_PCIE_LTSSM_DIS_IDLE		0x18
+#define BT1_CCU_PCIE_LTSSM_DISABLE		0x19
+#define BT1_CCU_PCIE_LTSSM_LPBK_ENTRY		0x1a
+#define BT1_CCU_PCIE_LTSSM_LPBK_ACTIVE		0x1b
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT		0x1c
+#define BT1_CCU_PCIE_LTSSM_LPBK_EXIT_TOUT	0x1d
+#define BT1_CCU_PCIE_LTSSM_HOT_RST_ENTRY	0x1e
+#define BT1_CCU_PCIE_LTSSM_HOT_RST		0x1f
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ0		0x20
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ1		0x21
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ2		0x22
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ3		0x23
+#define BT1_CCU_PCIE_SMLH_LINKUP		BIT(6)
+#define BT1_CCU_PCIE_RDLH_LINKUP		BIT(7)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L0S		BIT(8)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L1		BIT(9)
+#define BT1_CCU_PCIE_PM_LINKSTATE_L2		BIT(10)
+#define BT1_CCU_PCIE_L1_PENDING			BIT(12)
+#define BT1_CCU_PCIE_REQ_EXIT_L1		BIT(14)
+#define BT1_CCU_PCIE_LTSSM_RCVR_EQ		BIT(15)
+#define BT1_CCU_PCIE_PM_DSTAT_MASK		GENMASK(18, 16)
+#define BT1_CCU_PCIE_PM_PME_EN			BIT(20)
+#define BT1_CCU_PCIE_PM_PME_STATUS		BIT(21)
+#define BT1_CCU_PCIE_AUX_PM_EN			BIT(22)
+#define BT1_CCU_PCIE_AUX_PWR_DET		BIT(23)
+#define BT1_CCU_PCIE_WAKE_DET			BIT(24)
+#define BT1_CCU_PCIE_TURNOFF_REQ		BIT(30)
+#define BT1_CCU_PCIE_TURNOFF_ACK		BIT(31)
+
+#define BT1_CCU_PCIE_GENC			0x14c
+#define BT1_CCU_PCIE_LTSSM_EN			BIT(1)
+#define BT1_CCU_PCIE_DBI2_MODE			BIT(2)
+#define BT1_CCU_PCIE_MGMT_EN			BIT(3)
+#define BT1_CCU_PCIE_RXLANE_FLIP_EN		BIT(16)
+#define BT1_CCU_PCIE_TXLANE_FLIP_EN		BIT(17)
+#define BT1_CCU_PCIE_SLV_XFER_PEND		BIT(24)
+#define BT1_CCU_PCIE_RCV_XFER_PEND		BIT(25)
+#define BT1_CCU_PCIE_DBI_XFER_PEND		BIT(26)
+#define BT1_CCU_PCIE_DMA_XFER_PEND		BIT(27)
+
+#define BT1_CCU_PCIE_LTSSM_LINKUP(_pmsc) \
+({ \
+	int __state = FIELD_GET(BT1_CCU_PCIE_LTSSM_STATE_MASK, _pmsc); \
+	__state >= BT1_CCU_PCIE_LTSSM_L0 && __state <= BT1_CCU_PCIE_LTSSM_L2_WAKE; \
+})
+
+/* Baikal-T1 PCIe specific control registers */
+#define BT1_PCIE_AXI2MGM_LANENUM		0xd04
+#define BT1_PCIE_AXI2MGM_LANESEL_MASK		GENMASK(3, 0)
+
+#define BT1_PCIE_AXI2MGM_ADDRCTL		0xd08
+#define BT1_PCIE_AXI2MGM_PHYREG_ADDR_MASK	GENMASK(20, 0)
+#define BT1_PCIE_AXI2MGM_READ_FLAG		BIT(29)
+#define BT1_PCIE_AXI2MGM_DONE			BIT(30)
+#define BT1_PCIE_AXI2MGM_BUSY			BIT(31)
+
+#define BT1_PCIE_AXI2MGM_WRITEDATA		0xd0c
+#define BT1_PCIE_AXI2MGM_WDATA			GENMASK(15, 0)
+
+#define BT1_PCIE_AXI2MGM_READDATA		0xd10
+#define BT1_PCIE_AXI2MGM_RDATA			GENMASK(15, 0)
+
+/* Generic Baikal-T1 PCIe interface resources */
+#define BT1_PCIE_NUM_APP_CLKS			ARRAY_SIZE(bt1_pcie_app_clks)
+#define BT1_PCIE_NUM_CORE_CLKS			ARRAY_SIZE(bt1_pcie_core_clks)
+#define BT1_PCIE_NUM_APP_RSTS			ARRAY_SIZE(bt1_pcie_app_rsts)
+#define BT1_PCIE_NUM_CORE_RSTS			ARRAY_SIZE(bt1_pcie_core_rsts)
+
+/* PCIe bus setup delays and timeouts */
+#define BT1_PCIE_RST_DELAY_MS			100
+#define BT1_PCIE_RUN_DELAY_US			100
+#define BT1_PCIE_REQ_DELAY_US			1
+#define BT1_PCIE_REQ_TIMEOUT_US			1000
+#define BT1_PCIE_LNK_DELAY_US			1000
+#define BT1_PCIE_LNK_TIMEOUT_US			1000000
+
+static const enum dw_pcie_app_clk bt1_pcie_app_clks[] = {
+	DW_PCIE_DBI_CLK, DW_PCIE_MSTR_CLK, DW_PCIE_SLV_CLK,
+};
+
+static const enum dw_pcie_core_clk bt1_pcie_core_clks[] = {
+	DW_PCIE_REF_CLK,
+};
+
+static const enum dw_pcie_app_rst bt1_pcie_app_rsts[] = {
+	DW_PCIE_MSTR_RST, DW_PCIE_SLV_RST,
+};
+
+static const enum dw_pcie_core_rst bt1_pcie_core_rsts[] = {
+	DW_PCIE_NON_STICKY_RST, DW_PCIE_STICKY_RST, DW_PCIE_CORE_RST,
+	DW_PCIE_PIPE_RST, DW_PCIE_PHY_RST, DW_PCIE_HOT_RST, DW_PCIE_PWR_RST,
+};
+
+struct bt1_pcie {
+	struct dw_pcie dw;
+	struct platform_device *pdev;
+	struct regmap *sys_regs;
+};
+#define to_bt1_pcie(_dw) container_of(_dw, struct bt1_pcie, dw)
+
+/*
+ * Baikal-T1 MMIO space must be read/written by the dword-aligned
+ * instructions. Note the methods are optimized to have the dword operations
+ * performed with minimum overhead as the most frequently used ones.
+ */
+static int bt1_pcie_read_mmio(void __iomem *addr, int size, u32 *val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return -EINVAL;
+
+	*val = readl(addr - ofs) >> ofs * BITS_PER_BYTE;
+	if (size == 4) {
+		return 0;
+	} else if (size == 2) {
+		*val &= 0xffff;
+		return 0;
+	} else if (size == 1) {
+		*val &= 0xff;
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static int bt1_pcie_write_mmio(void __iomem *addr, int size, u32 val)
+{
+	unsigned int ofs = (uintptr_t)addr & 0x3;
+	u32 tmp, mask;
+
+	if (!IS_ALIGNED((uintptr_t)addr, size))
+		return -EINVAL;
+
+	if (size == 4) {
+		writel(val, addr);
+		return 0;
+	} else if (size == 2 || size == 1) {
+		mask = GENMASK(size * BITS_PER_BYTE - 1, 0);
+		tmp = readl(addr - ofs) & ~(mask << ofs * BITS_PER_BYTE);
+		tmp |= (val & mask) << ofs * BITS_PER_BYTE;
+		writel(tmp, addr - ofs);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static u32 bt1_pcie_read_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			     size_t size)
+{
+	int ret;
+	u32 val;
+
+	ret = bt1_pcie_read_mmio(base + reg, size, &val);
+	if (ret) {
+		dev_err(pci->dev, "Read DBI address failed\n");
+		return ~0U;
+	}
+
+	return val;
+}
+
+static void bt1_pcie_write_dbi(struct dw_pcie *pci, void __iomem *base, u32 reg,
+			       size_t size, u32 val)
+{
+	int ret;
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Write DBI address failed\n");
+}
+
+static void bt1_pcie_write_dbi2(struct dw_pcie *pci, void __iomem *base, u32 reg,
+				size_t size, u32 val)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, BT1_CCU_PCIE_DBI2_MODE);
+
+	ret = bt1_pcie_write_mmio(base + reg, size, val);
+	if (ret)
+		dev_err(pci->dev, "Write DBI2 address failed\n");
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_DBI2_MODE, 0);
+}
+
+static int bt1_pcie_start_link(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	u32 val;
+	int ret;
+
+	/*
+	 * Enable LTSSM and make sure it was able to establish both PHY and
+	 * data links. This procedure shall work fine to reach 2.5 GT/s speed.
+	 */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, BT1_CCU_PCIE_LTSSM_EN);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_SMLH_LINKUP),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set PHY link up\n");
+		return ret;
+	}
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       (val & BT1_CCU_PCIE_RDLH_LINKUP),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret) {
+		dev_err(pci->dev, "LTSSM failed to set data link up\n");
+		return ret;
+	}
+
+	/*
+	 * Activate direct speed change after the link is established in an
+	 * attempt to reach a higher bus performance (up to Gen.3 - 8.0 GT/s).
+	 * This is required at least to get 8.0 GT/s speed.
+	 */
+	val = dw_pcie_readl_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL);
+	val |= PORT_LOGIC_SPEED_CHANGE;
+	dw_pcie_writel_dbi(pci, PCIE_LINK_WIDTH_SPEED_CONTROL, val);
+
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_PMSC, val,
+				       BT1_CCU_PCIE_LTSSM_LINKUP(val),
+				       BT1_PCIE_LNK_DELAY_US, BT1_PCIE_LNK_TIMEOUT_US);
+	if (ret)
+		dev_err(pci->dev, "LTSSM failed to get into L0 state\n");
+
+	return ret;
+}
+
+static void bt1_pcie_stop_link(struct dw_pcie *pci)
+{
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+}
+
+static const struct dw_pcie_ops bt1_pcie_ops = {
+	.read_dbi = bt1_pcie_read_dbi,
+	.write_dbi = bt1_pcie_write_dbi,
+	.write_dbi2 = bt1_pcie_write_dbi2,
+	.start_link = bt1_pcie_start_link,
+	.stop_link = bt1_pcie_stop_link,
+};
+
+static struct pci_ops bt1_pci_ops = {
+	.map_bus = dw_pcie_own_conf_map_bus,
+	.read = pci_generic_config_read32,
+	.write = pci_generic_config_write32,
+};
+
+static int bt1_pcie_get_resources(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	int i;
+
+	/* DBI access is supposed to be performed by the dword-aligned IOs */
+	btpci->dw.pp.bridge->ops = &bt1_pci_ops;
+
+	/* These CSRs are in MMIO so we won't check the regmap-methods status */
+	btpci->sys_regs =
+		syscon_regmap_lookup_by_phandle(dev->of_node, "baikal,bt1-syscon");
+	if (IS_ERR(btpci->sys_regs))
+		return dev_err_probe(dev, PTR_ERR(btpci->sys_regs),
+				     "Failed to get syscon\n");
+
+	/* Make sure all the required resources have been specified */
+	for (i = 0; i < BT1_PCIE_NUM_APP_CLKS; i++) {
+		if (!btpci->dw.app_clks[bt1_pcie_app_clks[i]].clk) {
+			dev_err(dev, "App clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_CLKS; i++) {
+		if (!btpci->dw.core_clks[bt1_pcie_core_clks[i]].clk) {
+			dev_err(dev, "Core clocks set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_APP_RSTS; i++) {
+		if (!btpci->dw.app_rsts[bt1_pcie_app_rsts[i]].rstc) {
+			dev_err(dev, "App resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	for (i = 0; i < BT1_PCIE_NUM_CORE_RSTS; i++) {
+		if (!btpci->dw.core_rsts[bt1_pcie_core_rsts[i]].rstc) {
+			dev_err(dev, "Core resets set is incomplete\n");
+			return -ENOENT;
+		}
+	}
+
+	return 0;
+}
+
+static void bt1_pcie_full_stop_bus(struct bt1_pcie *btpci, bool init)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	int ret;
+
+	/* Disable LTSSM for sure */
+	regmap_update_bits(btpci->sys_regs, BT1_CCU_PCIE_GENC,
+			   BT1_CCU_PCIE_LTSSM_EN, 0);
+
+	/*
+	 * Application reset controls are trigger-based so assert the core
+	 * resets only.
+	 */
+	ret = reset_control_bulk_assert(DW_PCIE_NUM_CORE_RSTS, pci->core_rsts);
+	if (ret)
+		dev_err(dev, "Failed to assert core resets\n");
+
+	/*
+	 * Clocks are disabled by default at least in accordance with the clk
+	 * enable counter value on init stage.
+	 */
+	if (!init) {
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+		clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	}
+
+	/* The peripheral devices are unavailable anyway so reset them too */
+	gpiod_set_value_cansleep(pci->pe_rst, 1);
+
+	/* Make sure all the resets are settled */
+	msleep(BT1_PCIE_RST_DELAY_MS);
+}
+
+/*
+ * Implements the cold reset procedure in accordance with the reference manual
+ * and available PM signals.
+ */
+static int bt1_pcie_cold_start_bus(struct bt1_pcie *btpci)
+{
+	struct device *dev = btpci->dw.dev;
+	struct dw_pcie *pci = &btpci->dw;
+	u32 val;
+	int ret;
+
+	/* First get out of the Power/Hot reset state */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		return ret;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert hot reset\n");
+		goto err_assert_pwr_rst;
+	}
+
+	/* Wait for the PM-core to stop requesting the PHY reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_PHY_RST),
+				       BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop PHY resetting\n");
+		goto err_assert_hot_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PHY reset\n");
+		goto err_assert_hot_rst;
+	}
+
+	/* Clocks can be now enabled, but the ref one is crucial at this stage */
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable app clocks\n");
+		goto err_assert_phy_rst;
+	}
+
+	ret = clk_bulk_prepare_enable(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+	if (ret) {
+		dev_err(dev, "Failed to enable ref clocks\n");
+		goto err_disable_app_clk;
+	}
+
+	/* Wait for the PM to stop requesting the controller core reset */
+	ret = regmap_read_poll_timeout(btpci->sys_regs, BT1_CCU_PCIE_RSTC, val,
+				       !(val & BT1_CCU_PCIE_REQ_CORE_RST),
+				       BT1_PCIE_REQ_DELAY_US, BT1_PCIE_REQ_TIMEOUT_US);
+	if (ret) {
+		dev_err(dev, "Timed out waiting for PM to stop core resetting\n");
+		goto err_disable_core_clk;
+	}
+
+	/* PCS-PIPE interface and controller core can be now activated */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert PIPE reset\n");
+		goto err_disable_core_clk;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert core reset\n");
+		goto err_assert_pipe_rst;
+	}
+
+	/* It's recommended to reset the core and application logic together */
+	ret = reset_control_bulk_reset(DW_PCIE_NUM_APP_RSTS, pci->app_rsts);
+	if (ret) {
+		dev_err(dev, "Failed to reset app domain\n");
+		goto err_assert_core_rst;
+	}
+
+	/* Sticky/Non-sticky CSR flags can be now unreset too */
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert sticky reset\n");
+		goto err_assert_core_rst;
+	}
+
+	ret = reset_control_deassert(pci->core_rsts[DW_PCIE_NON_STICKY_RST].rstc);
+	if (ret) {
+		dev_err(dev, "Failed to deassert non-sticky reset\n");
+		goto err_assert_sticky_rst;
+	}
+
+	/* Activate the PCIe bus peripheral devices */
+	gpiod_set_value_cansleep(pci->pe_rst, 0);
+
+	/* Make sure the state is settled (LTSSM is still disabled though) */
+	usleep_range(BT1_PCIE_RUN_DELAY_US, BT1_PCIE_RUN_DELAY_US + 100);
+
+	return 0;
+
+err_assert_sticky_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_STICKY_RST].rstc);
+
+err_assert_core_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_CORE_RST].rstc);
+
+err_assert_pipe_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PIPE_RST].rstc);
+
+err_disable_core_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_CORE_CLKS, pci->core_clks);
+
+err_disable_app_clk:
+	clk_bulk_disable_unprepare(DW_PCIE_NUM_APP_CLKS, pci->app_clks);
+
+err_assert_phy_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PHY_RST].rstc);
+
+err_assert_hot_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_HOT_RST].rstc);
+
+err_assert_pwr_rst:
+	reset_control_assert(pci->core_rsts[DW_PCIE_PWR_RST].rstc);
+
+	return ret;
+}
+
+static int bt1_pcie_host_init(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+	int ret;
+
+	ret = bt1_pcie_get_resources(btpci);
+	if (ret)
+		return ret;
+
+	bt1_pcie_full_stop_bus(btpci, true);
+
+	return bt1_pcie_cold_start_bus(btpci);
+}
+
+static void bt1_pcie_host_deinit(struct dw_pcie_rp *pp)
+{
+	struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+	struct bt1_pcie *btpci = to_bt1_pcie(pci);
+
+	bt1_pcie_full_stop_bus(btpci, false);
+}
+
+static const struct dw_pcie_host_ops bt1_pcie_host_ops = {
+	.host_init = bt1_pcie_host_init,
+	.host_deinit = bt1_pcie_host_deinit,
+};
+
+static struct bt1_pcie *bt1_pcie_create_data(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = devm_kzalloc(&pdev->dev, sizeof(*btpci), GFP_KERNEL);
+	if (!btpci)
+		return ERR_PTR(-ENOMEM);
+
+	btpci->pdev = pdev;
+
+	platform_set_drvdata(pdev, btpci);
+
+	return btpci;
+}
+
+static int bt1_pcie_add_port(struct bt1_pcie *btpci)
+{
+	struct device *dev = &btpci->pdev->dev;
+	int ret;
+
+	btpci->dw.version = DW_PCIE_VER_460A;
+	btpci->dw.dev = dev;
+	btpci->dw.ops = &bt1_pcie_ops;
+
+	btpci->dw.pp.num_vectors = MAX_MSI_IRQS;
+	btpci->dw.pp.ops = &bt1_pcie_host_ops;
+
+	dw_pcie_cap_set(&btpci->dw, REQ_RES);
+
+	ret = dw_pcie_host_init(&btpci->dw.pp);
+
+	return dev_err_probe(dev, ret, "Failed to initialize DWC PCIe host\n");
+}
+
+static void bt1_pcie_del_port(struct bt1_pcie *btpci)
+{
+	dw_pcie_host_deinit(&btpci->dw.pp);
+}
+
+static int bt1_pcie_probe(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci;
+
+	btpci = bt1_pcie_create_data(pdev);
+	if (IS_ERR(btpci))
+		return PTR_ERR(btpci);
+
+	return bt1_pcie_add_port(btpci);
+}
+
+static int bt1_pcie_remove(struct platform_device *pdev)
+{
+	struct bt1_pcie *btpci = platform_get_drvdata(pdev);
+
+	bt1_pcie_del_port(btpci);
+
+	return 0;
+}
+
+static const struct of_device_id bt1_pcie_of_match[] = {
+	{ .compatible = "baikal,bt1-pcie" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bt1_pcie_of_match);
+
+static struct platform_driver bt1_pcie_driver = {
+	.probe = bt1_pcie_probe,
+	.remove = bt1_pcie_remove,
+	.driver = {
+		.name	= "bt1-pcie",
+		.of_match_table = bt1_pcie_of_match,
+	},
+};
+module_platform_driver(bt1_pcie_driver);
+
+MODULE_AUTHOR("Serge Semin <Sergey.Semin@baikalelectronics.ru>");
+MODULE_DESCRIPTION("Baikal-T1 PCIe driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 9298804840457c29c7e115f3a87bec406c262c81 Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:08 -0400
Subject: [PATCH 2146/4122] PCI: endpoint: pci-epf-vntb: Clean up kernel_doc
 warning

Cleanup warning found by scripts/kernel-doc.

Consolidate terms:

- host, host1 to HOST
- vhost, vHost, Vhost, VHOST2 to VHOST

Link: https://lore.kernel.org/r/20221102141014.1025893-2-Frank.Li@nxp.com
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 83 ++++++++++++-------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 0ea85e1d292ec..c0115bcb3b5ec 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -11,7 +11,7 @@
  * Author: Kishon Vijay Abraham I <kishon@ti.com>
  */
 
-/**
+/*
  * +------------+         +---------------------------------------+
  * |            |         |                                       |
  * +------------+         |                        +--------------+
@@ -156,12 +156,14 @@ static struct pci_epf_header epf_ntb_header = {
 };
 
 /**
- * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host
+ * epf_ntb_link_up() - Raise link_up interrupt to Virtual Host (VHOST)
  * @ntb: NTB device that facilitates communication between HOST and VHOST
  * @link_up: true or false indicating Link is UP or Down
  *
  * Once NTB function in HOST invoke ntb_link_enable(),
- * this NTB function driver will trigger a link event to vhost.
+ * this NTB function driver will trigger a link event to VHOST.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
 {
@@ -175,9 +177,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
 }
 
 /**
- * epf_ntb_configure_mw() - Configure the Outbound Address Space for vhost
- *   to access the memory window of host
- * @ntb: NTB device that facilitates communication between host and vhost
+ * epf_ntb_configure_mw() - Configure the Outbound Address Space for VHOST
+ *   to access the memory window of HOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  * @mw: Index of the memory window (either 0, 1, 2 or 3)
  *
  *                          EP Outbound Window
@@ -194,7 +196,9 @@ static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
  * |        |              |           |
  * |        |              |           |
  * +--------+              +-----------+
- *  VHost                   PCI EP
+ *  VHOST                   PCI EP
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw)
 {
@@ -219,7 +223,7 @@ static int epf_ntb_configure_mw(struct epf_ntb *ntb, u32 mw)
 
 /**
  * epf_ntb_teardown_mw() - Teardown the configured OB ATU
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  * @mw: Index of the memory window (either 0, 1, 2 or 3)
  *
  * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using
@@ -234,12 +238,12 @@ static void epf_ntb_teardown_mw(struct epf_ntb *ntb, u32 mw)
 }
 
 /**
- * epf_ntb_cmd_handler() - Handle commands provided by the NTB Host
+ * epf_ntb_cmd_handler() - Handle commands provided by the NTB HOST
  * @work: work_struct for the epf_ntb_epc
  *
  * Workqueue function that gets invoked for the two epf_ntb_epc
  * periodically (once every 5ms) to see if it has received any commands
- * from NTB host. The host can send commands to configure doorbell or
+ * from NTB HOST. The HOST can send commands to configure doorbell or
  * configure memory window or to update link status.
  */
 static void epf_ntb_cmd_handler(struct work_struct *work)
@@ -321,8 +325,8 @@ reset_handler:
 
 /**
  * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR
- * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
- *	     address.
+ * @ntb: EPC associated with one of the HOST which holds peer's outbound
+ *	 address.
  *
  * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
  * self scratchpad region (removes inbound ATU configuration). While BAR0 is
@@ -331,8 +335,10 @@ reset_handler:
  * used for self scratchpad from epf_ntb_bar[BAR_CONFIG].
  *
  * Please note the self scratchpad region and config region is combined to
- * a single region and mapped using the same BAR. Also note HOST2's peer
- * scratchpad is HOST1's self scratchpad.
+ * a single region and mapped using the same BAR. Also note VHOST's peer
+ * scratchpad is HOST's self scratchpad.
+ *
+ * Returns: void
  */
 static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb)
 {
@@ -347,13 +353,15 @@ static void epf_ntb_config_sspad_bar_clear(struct epf_ntb *ntb)
 
 /**
  * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
- * Map BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * Map BAR0 of EP CONTROLLER which contains the VHOST's config and
  * self scratchpad region.
  *
  * Please note the self scratchpad region and config region is combined to
  * a single region and mapped using the same BAR.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb)
 {
@@ -380,7 +388,7 @@ static int epf_ntb_config_sspad_bar_set(struct epf_ntb *ntb)
 /**
  * epf_ntb_config_spad_bar_free() - Free the physical memory associated with
  *   config + scratchpad region
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  */
 static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb)
 {
@@ -393,11 +401,13 @@ static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb)
 /**
  * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad
  *   region
- * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
  * Allocate the Local Memory mentioned in the above diagram. The size of
  * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION
  * is obtained from "spad-count" configfs entry.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb)
 {
@@ -465,11 +475,13 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb)
 }
 
 /**
- * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capaiblity
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capability
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
  * Configure MSI/MSI-X capability for each interface with number of
  * interrupts equal to "db_count" configfs entry.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
 {
@@ -511,7 +523,9 @@ static int epf_ntb_configure_interrupt(struct epf_ntb *ntb)
 
 /**
  * epf_ntb_db_bar_init() - Configure Doorbell window BARs
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_db_bar_init(struct epf_ntb *ntb)
 {
@@ -566,7 +580,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws);
 /**
  * epf_ntb_db_bar_clear() - Clear doorbell BAR and free memory
  *   allocated in peer's outbound address space
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  */
 static void epf_ntb_db_bar_clear(struct epf_ntb *ntb)
 {
@@ -582,8 +596,9 @@ static void epf_ntb_db_bar_clear(struct epf_ntb *ntb)
 
 /**
  * epf_ntb_mw_bar_init() - Configure Memory window BARs
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_mw_bar_init(struct epf_ntb *ntb)
 {
@@ -639,7 +654,7 @@ err_alloc_mem:
 
 /**
  * epf_ntb_mw_bar_clear() - Clear Memory window BARs
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  */
 static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws)
 {
@@ -662,7 +677,7 @@ static void epf_ntb_mw_bar_clear(struct epf_ntb *ntb, int num_mws)
 
 /**
  * epf_ntb_epc_destroy() - Cleanup NTB EPC interface
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
  * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces
  */
@@ -675,7 +690,9 @@ static void epf_ntb_epc_destroy(struct epf_ntb *ntb)
 /**
  * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB
  * constructs (scratchpad region, doorbell, memorywindow)
- * @ntb: NTB device that facilitates communication between HOST and vHOST
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_init_epc_bar(struct epf_ntb *ntb)
 {
@@ -716,11 +733,13 @@ static int epf_ntb_init_epc_bar(struct epf_ntb *ntb)
 
 /**
  * epf_ntb_epc_init() - Initialize NTB interface
- * @ntb: NTB device that facilitates communication between HOST and vHOST2
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
  * Wrapper to initialize a particular EPC interface and start the workqueue
- * to check for commands from host. This function will write to the
+ * to check for commands from HOST. This function will write to the
  * EP controller HW for configuring it.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_epc_init(struct epf_ntb *ntb)
 {
@@ -787,7 +806,7 @@ err_config_interrupt:
 
 /**
  * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces
- * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @ntb: NTB device that facilitates communication between HOST and VHOST
  *
  * Wrapper to cleanup all NTB interfaces.
  */
@@ -951,6 +970,8 @@ static const struct config_item_type ntb_group_type = {
  *
  * Add configfs directory specific to NTB. This directory will hold
  * NTB specific properties like db_count, spad_count, num_mws etc.,
+ *
+ * Returns: Pointer to config_group
  */
 static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf,
 					    struct config_group *group)
@@ -1292,6 +1313,8 @@ static struct pci_driver vntb_pci_driver = {
  * Invoked when a primary interface or secondary interface is bound to EPC
  * device. This function will succeed only when EPC is bound to both the
  * interfaces.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_bind(struct pci_epf *epf)
 {
@@ -1377,6 +1400,8 @@ static struct pci_epf_ops epf_ntb_ops = {
  *
  * Probe NTB function driver when endpoint function bus detects a NTB
  * endpoint function.
+ *
+ * Returns: Zero for success, or an error code in case of failure
  */
 static int epf_ntb_probe(struct pci_epf *epf)
 {
-- 
GitLab


From 1d118fed348f65bcc08e9bfb947085c276d05b52 Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:09 -0400
Subject: [PATCH 2147/4122] PCI: endpoint: pci-epf-vntb: Fix struct
 epf_ntb_ctrl indentation

Align the indentation of struct epf_ntb_ctrl with other structs in
the driver.

Link: https://lore.kernel.org/r/20221102141014.1025893-3-Frank.Li@nxp.com
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 28 +++++++++----------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index c0115bcb3b5ec..1863006cc36c0 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -99,20 +99,20 @@ enum epf_ntb_bar {
  *       NTB Driver               NTB Driver
  */
 struct epf_ntb_ctrl {
-	u32     command;
-	u32     argument;
-	u16     command_status;
-	u16     link_status;
-	u32     topology;
-	u64     addr;
-	u64     size;
-	u32     num_mws;
-	u32	reserved;
-	u32     spad_offset;
-	u32     spad_count;
-	u32	db_entry_size;
-	u32     db_data[MAX_DB_COUNT];
-	u32     db_offset[MAX_DB_COUNT];
+	u32 command;
+	u32 argument;
+	u16 command_status;
+	u16 link_status;
+	u32 topology;
+	u64 addr;
+	u64 size;
+	u32 num_mws;
+	u32 reserved;
+	u32 spad_offset;
+	u32 spad_count;
+	u32 db_entry_size;
+	u32 db_data[MAX_DB_COUNT];
+	u32 db_offset[MAX_DB_COUNT];
 } __packed;
 
 struct epf_ntb {
-- 
GitLab


From 8818039f959b2efc0d6f2cb101f8061332f0c77e Mon Sep 17 00:00:00 2001
From: Ivan Vecera <ivecera@redhat.com>
Date: Mon, 21 Nov 2022 09:48:45 -0500
Subject: [PATCH 2148/4122] kbuild: add ability to make source rpm buildable
 using koji
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Changes:
- added new target 'srcrpm-pkg' to generate source rpm
- added required build tools to spec file
- removed locally compiled host tools to force their re-compile

Signed-off-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Jonathan Toppins <jtoppins@redhat.com>
Acked-by: Íñigo Huguet <ihuguet@redhat.com>
Tested-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/Makefile.package | 10 ++++++++++
 scripts/package/mkspec   |  7 +++++++
 2 files changed, 17 insertions(+)

diff --git a/scripts/Makefile.package b/scripts/Makefile.package
index 8bbcced67c221..1290f1c631fb6 100644
--- a/scripts/Makefile.package
+++ b/scripts/Makefile.package
@@ -62,6 +62,16 @@ rpm-pkg:
 	+rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ta $(KERNELPATH).tar.gz \
 	--define='_smp_mflags %{nil}'
 
+# srcrpm-pkg
+# ---------------------------------------------------------------------------
+PHONY += srcrpm-pkg
+srcrpm-pkg:
+	$(MAKE) clean
+	$(CONFIG_SHELL) $(MKSPEC) >$(objtree)/kernel.spec
+	$(call cmd,src_tar,$(KERNELPATH),kernel.spec)
+	+rpmbuild $(RPMOPTS) --target $(UTS_MACHINE)-linux -ts $(KERNELPATH).tar.gz \
+	--define='_smp_mflags %{nil}' --define='_srcrpmdir $(srctree)'
+
 # binrpm-pkg
 # ---------------------------------------------------------------------------
 PHONY += binrpm-pkg
diff --git a/scripts/package/mkspec b/scripts/package/mkspec
index 70392fd2fd29c..dda00a948a01d 100755
--- a/scripts/package/mkspec
+++ b/scripts/package/mkspec
@@ -33,6 +33,8 @@ EXCLUDES="$RCS_TAR_IGNORE --exclude=*vmlinux* --exclude=*.mod \
 --exclude=*.o --exclude=*.ko --exclude=*.cmd --exclude=Documentation \
 --exclude=.config.old --exclude=.missing-syscalls.d --exclude=*.s"
 
+test -n "$LOCALVERSION" && MAKE="$MAKE LOCALVERSION=$LOCALVERSION"
+
 # We can label the here-doc lines for conditional output to the spec file
 #
 # Labels:
@@ -49,6 +51,9 @@ sed -e '/^DEL/d' -e 's/^\t*//' <<EOF
 	URL: https://www.kernel.org
 $S	Source: kernel-$__KERNELRELEASE.tar.gz
 	Provides: $PROVIDES
+$S	BuildRequires: bc binutils bison dwarves elfutils-libelf-devel flex
+$S	BuildRequires: gcc make openssl openssl-devel perl python3 rsync
+
 	# $UTS_MACHINE as a fallback of _arch in case
 	# /usr/lib/rpm/platform/*/macros was not included.
 	%define _arch %{?_arch:$UTS_MACHINE}
@@ -80,6 +85,8 @@ $S$M	against the $__KERNELRELEASE kernel package.
 $S$M
 $S	%prep
 $S	%setup -q
+$S	rm -f scripts/basic/fixdep scripts/kconfig/conf
+$S	rm -f tools/objtool/{fixdep,objtool}
 $S
 $S	%build
 $S	$MAKE %{?_smp_mflags} KBUILD_BUILD_VERSION=%{release}
-- 
GitLab


From 0c031262d2ddfb938f9668d620d7ed674771646c Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:10 -0400
Subject: [PATCH 2149/4122] PCI: endpoint: pci-epf-vntb: Fix call
 pci_epc_mem_free_addr() in error path

Replace pci_epc_mem_free_addr() with pci_epf_free_space() in the
error handle path to match pci_epf_alloc_space().

Link: https://lore.kernel.org/r/20221102141014.1025893-4-Frank.Li@nxp.com
Fixes: e35f56bb0330 ("PCI: endpoint: Support NTB transfer between RC and EP")
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 1863006cc36c0..191924a834548 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -571,7 +571,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb)
 	return ret;
 
 err_alloc_peer_mem:
-	pci_epc_mem_free_addr(ntb->epf->epc, epf_bar->phys_addr, mw_addr, epf_bar->size);
+	pci_epf_free_space(ntb->epf, mw_addr, barno, 0);
 	return -1;
 }
 
-- 
GitLab


From 03d426ae5426caf46cb96534ca77fa50a018dd3a Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:11 -0400
Subject: [PATCH 2150/4122] PCI: endpoint: pci-epf-vntb: Remove unused
 epf_db_phy struct member

epf_db_phy member in struct epf_ntb is not used, remove it.

Link: https://lore.kernel.org/r/20221102141014.1025893-5-Frank.Li@nxp.com
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 191924a834548..ee66101cb5c41 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -136,7 +136,6 @@ struct epf_ntb {
 
 	struct epf_ntb_ctrl *reg;
 
-	phys_addr_t epf_db_phy;
 	void __iomem *epf_db;
 
 	phys_addr_t vpci_mw_phy[MAX_MW];
-- 
GitLab


From 2b35c886556a24f1531edf38a4dab53bbbea4db4 Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:12 -0400
Subject: [PATCH 2151/4122] PCI: endpoint: pci-epf-vntb: Replace hardcoded 4
 with sizeof(u32)

NTB spad entry item size is sizeof(u32), replace hardcoded 4 with it.

Link: https://lore.kernel.org/r/20221102141014.1025893-6-Frank.Li@nxp.com
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index ee66101cb5c41..54616281da9e1 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -257,12 +257,12 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
 	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
 
 	for (i = 1; i < ntb->db_count; i++) {
-		if (readl(ntb->epf_db + i * 4)) {
-			if (readl(ntb->epf_db + i * 4))
+		if (readl(ntb->epf_db + i * sizeof(u32))) {
+			if (readl(ntb->epf_db + i * sizeof(u32)))
 				ntb->db |= 1 << (i - 1);
 
 			ntb_db_event(&ntb->ntb, i);
-			writel(0, ntb->epf_db + i * 4);
+			writel(0, ntb->epf_db + i * sizeof(u32));
 		}
 	}
 
@@ -433,7 +433,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb)
 	spad_count = ntb->spad_count;
 
 	ctrl_size = sizeof(struct epf_ntb_ctrl);
-	spad_size = 2 * spad_count * 4;
+	spad_size = 2 * spad_count * sizeof(u32);
 
 	if (!align) {
 		ctrl_size = roundup_pow_of_two(ctrl_size);
@@ -463,7 +463,7 @@ static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb)
 	ctrl->num_mws = ntb->num_mws;
 	ntb->spad_size = spad_size;
 
-	ctrl->db_entry_size = 4;
+	ctrl->db_entry_size = sizeof(u32);
 
 	for (i = 0; i < ntb->db_count; i++) {
 		ntb->reg->db_data[i] = 1 + i;
@@ -535,7 +535,7 @@ static int epf_ntb_db_bar_init(struct epf_ntb *ntb)
 	struct pci_epf_bar *epf_bar;
 	void __iomem *mw_addr;
 	enum pci_barno barno;
-	size_t size = 4 * ntb->db_count;
+	size_t size = sizeof(u32) * ntb->db_count;
 
 	epc_features = pci_epc_get_features(ntb->epf->epc,
 					    ntb->epf->func_no,
@@ -1121,11 +1121,11 @@ static int vntb_epf_link_enable(struct ntb_dev *ntb,
 static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx)
 {
 	struct epf_ntb *ntb = ntb_ndev(ndev);
-	int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * 4;
+	int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32);
 	u32 val;
 	void __iomem *base = ntb->reg;
 
-	val = readl(base + off + ct + idx * 4);
+	val = readl(base + off + ct + idx * sizeof(u32));
 	return val;
 }
 
@@ -1133,10 +1133,10 @@ static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val)
 {
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 	struct epf_ntb_ctrl *ctrl = ntb->reg;
-	int off = ctrl->spad_offset, ct = ctrl->spad_count * 4;
+	int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32);
 	void __iomem *base = ntb->reg;
 
-	writel(val, base + off + ct + idx * 4);
+	writel(val, base + off + ct + idx * sizeof(u32));
 	return 0;
 }
 
@@ -1148,7 +1148,7 @@ static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx)
 	void __iomem *base = ntb->reg;
 	u32 val;
 
-	val = readl(base + off + idx * 4);
+	val = readl(base + off + idx * sizeof(u32));
 	return val;
 }
 
@@ -1159,7 +1159,7 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
 	int off = ctrl->spad_offset;
 	void __iomem *base = ntb->reg;
 
-	writel(val, base + off + idx * 4);
+	writel(val, base + off + idx * sizeof(u32));
 	return 0;
 }
 
-- 
GitLab


From 01dcec6d57ce62d535b2016fc4a617627fff506d Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:13 -0400
Subject: [PATCH 2152/4122] PCI: endpoint: pci-epf-vntb: Fix sparse build
 warning for epf_db

Use epf_db[i] dereference instead of readl() because epf_db is
in memory allocated by dma_alloc_coherent(), not I/O.

Remove useless/duplicated readl() in the process.

Link: https://lore.kernel.org/r/20221102141014.1025893-7-Frank.Li@nxp.com
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index 54616281da9e1..f896846ed970a 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -136,7 +136,7 @@ struct epf_ntb {
 
 	struct epf_ntb_ctrl *reg;
 
-	void __iomem *epf_db;
+	u32 *epf_db;
 
 	phys_addr_t vpci_mw_phy[MAX_MW];
 	void __iomem *vpci_mw_addr[MAX_MW];
@@ -257,12 +257,10 @@ static void epf_ntb_cmd_handler(struct work_struct *work)
 	ntb = container_of(work, struct epf_ntb, cmd_handler.work);
 
 	for (i = 1; i < ntb->db_count; i++) {
-		if (readl(ntb->epf_db + i * sizeof(u32))) {
-			if (readl(ntb->epf_db + i * sizeof(u32)))
-				ntb->db |= 1 << (i - 1);
-
+		if (ntb->epf_db[i]) {
+			ntb->db |= 1 << (i - 1);
 			ntb_db_event(&ntb->ntb, i);
-			writel(0, ntb->epf_db + i * sizeof(u32));
+			ntb->epf_db[i] = 0;
 		}
 	}
 
-- 
GitLab


From 5f697b25009ccfebede5e42c6693c4b18de11b37 Mon Sep 17 00:00:00 2001
From: Frank Li <frank.li@nxp.com>
Date: Wed, 2 Nov 2022 10:10:14 -0400
Subject: [PATCH 2153/4122] PCI: endpoint: pci-epf-vntb: Fix sparse ntb->reg
 build warning

  pci-epf-vntb.c:1128:33: sparse:     expected void [noderef] __iomem *base
  pci-epf-vntb.c:1128:33: sparse:     got struct epf_ntb_ctrl *reg

Add __iomem type cast in vntb_epf_peer_spad_read() and
vntb_epf_peer_spad_write().

Link: https://lore.kernel.org/r/20221102141014.1025893-8-Frank.Li@nxp.com
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Frank Li <frank.li@nxp.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Manivannan Sadhasivam <mani@kernel.org>
---
 drivers/pci/endpoint/functions/pci-epf-vntb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/endpoint/functions/pci-epf-vntb.c b/drivers/pci/endpoint/functions/pci-epf-vntb.c
index f896846ed970a..04698e7995a54 100644
--- a/drivers/pci/endpoint/functions/pci-epf-vntb.c
+++ b/drivers/pci/endpoint/functions/pci-epf-vntb.c
@@ -1121,7 +1121,7 @@ static u32 vntb_epf_spad_read(struct ntb_dev *ndev, int idx)
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 	int off = ntb->reg->spad_offset, ct = ntb->reg->spad_count * sizeof(u32);
 	u32 val;
-	void __iomem *base = ntb->reg;
+	void __iomem *base = (void __iomem *)ntb->reg;
 
 	val = readl(base + off + ct + idx * sizeof(u32));
 	return val;
@@ -1132,7 +1132,7 @@ static int vntb_epf_spad_write(struct ntb_dev *ndev, int idx, u32 val)
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 	struct epf_ntb_ctrl *ctrl = ntb->reg;
 	int off = ctrl->spad_offset, ct = ctrl->spad_count * sizeof(u32);
-	void __iomem *base = ntb->reg;
+	void __iomem *base = (void __iomem *)ntb->reg;
 
 	writel(val, base + off + ct + idx * sizeof(u32));
 	return 0;
@@ -1143,7 +1143,7 @@ static u32 vntb_epf_peer_spad_read(struct ntb_dev *ndev, int pidx, int idx)
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 	struct epf_ntb_ctrl *ctrl = ntb->reg;
 	int off = ctrl->spad_offset;
-	void __iomem *base = ntb->reg;
+	void __iomem *base = (void __iomem *)ntb->reg;
 	u32 val;
 
 	val = readl(base + off + idx * sizeof(u32));
@@ -1155,7 +1155,7 @@ static int vntb_epf_peer_spad_write(struct ntb_dev *ndev, int pidx, int idx, u32
 	struct epf_ntb *ntb = ntb_ndev(ndev);
 	struct epf_ntb_ctrl *ctrl = ntb->reg;
 	int off = ctrl->spad_offset;
-	void __iomem *base = ntb->reg;
+	void __iomem *base = (void __iomem *)ntb->reg;
 
 	writel(val, base + off + idx * sizeof(u32));
 	return 0;
-- 
GitLab


From 24b3e3dd9c9c742a4dd18e71b6963f9e7ab72911 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 11 Nov 2022 19:14:56 -0800
Subject: [PATCH 2154/4122] kernfs: fix all kernel-doc warnings and multiple
 typos

Fix kernel-doc warnings. Many of these are about a function's
return value, so use the kernel-doc Return: format to fix those

Use % prefix on numeric constant values.

dir.c: fix typos/spellos
file.c fix typo: s/taret/target/

Fix all of these kernel-doc warnings:

dir.c:305: warning: missing initial short description on line:
 *      kernfs_name_hash

dir.c:137: warning: No description found for return value of 'kernfs_path_from_node_locked'
dir.c:196: warning: No description found for return value of 'kernfs_name'
dir.c:224: warning: No description found for return value of 'kernfs_path_from_node'
dir.c:292: warning: No description found for return value of 'kernfs_get_parent'
dir.c:312: warning: No description found for return value of 'kernfs_name_hash'
dir.c:404: warning: No description found for return value of 'kernfs_unlink_sibling'
dir.c:588: warning: No description found for return value of 'kernfs_node_from_dentry'
dir.c:806: warning: No description found for return value of 'kernfs_find_ns'
dir.c:879: warning: No description found for return value of 'kernfs_find_and_get_ns'
dir.c:904: warning: No description found for return value of 'kernfs_walk_and_get_ns'
dir.c:927: warning: No description found for return value of 'kernfs_create_root'
dir.c:996: warning: No description found for return value of 'kernfs_root_to_node'
dir.c:1016: warning: No description found for return value of 'kernfs_create_dir_ns'
dir.c:1048: warning: No description found for return value of 'kernfs_create_empty_dir'
dir.c:1306: warning: No description found for return value of 'kernfs_next_descendant_post'
dir.c:1568: warning: No description found for return value of 'kernfs_remove_self'
dir.c:1630: warning: No description found for return value of 'kernfs_remove_by_name_ns'
dir.c:1667: warning: No description found for return value of 'kernfs_rename_ns'

file.c:66: warning: No description found for return value of 'of_on'
file.c:88: warning: No description found for return value of 'kernfs_deref_open_node_locked'
file.c:1036: warning: No description found for return value of '__kernfs_create_file'

inode.c:100: warning: No description found for return value of 'kernfs_setattr'

mount.c:160: warning: No description found for return value of 'kernfs_root_from_sb'
mount.c:198: warning: No description found for return value of 'kernfs_node_dentry'
mount.c:302: warning: No description found for return value of 'kernfs_super_ns'
mount.c:318: warning: No description found for return value of 'kernfs_get_tree'

symlink.c:28: warning: No description found for return value of 'kernfs_create_link'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20221112031456.22980-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/dir.c             | 82 ++++++++++++++++++++++---------------
 fs/kernfs/file.c            | 18 ++++----
 fs/kernfs/inode.c           |  8 ++--
 fs/kernfs/kernfs-internal.h |  2 +-
 fs/kernfs/mount.c           | 10 +++--
 fs/kernfs/symlink.c         |  2 +-
 6 files changed, 74 insertions(+), 48 deletions(-)

diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 77d7a3a280571..935ef8cb02b26 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -125,9 +125,9 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
  * kn_to:   /n1/n2/n3         [depth=3]
  * result:  /../..
  *
- * [3] when @kn_to is NULL result will be "(null)"
+ * [3] when @kn_to is %NULL result will be "(null)"
  *
- * Returns the length of the full path.  If the full length is equal to or
+ * Return: the length of the full path.  If the full length is equal to or
  * greater than @buflen, @buf contains the truncated path with the trailing
  * '\0'.  On error, -errno is returned.
  */
@@ -185,10 +185,12 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
  * @buflen: size of @buf
  *
  * Copies the name of @kn into @buf of @buflen bytes.  The behavior is
- * similar to strlcpy().  It returns the length of @kn's name and if @buf
- * isn't long enough, it's filled upto @buflen-1 and nul terminated.
+ * similar to strlcpy().
  *
- * Fills buffer with "(null)" if @kn is NULL.
+ * Fills buffer with "(null)" if @kn is %NULL.
+ *
+ * Return: the length of @kn's name and if @buf isn't long enough,
+ * it's filled up to @buflen-1 and nul terminated.
  *
  * This function can be called from any context.
  */
@@ -215,7 +217,7 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
  * path (which includes '..'s) as needed to reach from @from to @to is
  * returned.
  *
- * Returns the length of the full path.  If the full length is equal to or
+ * Return: the length of the full path.  If the full length is equal to or
  * greater than @buflen, @buf contains the truncated path with the trailing
  * '\0'.  On error, -errno is returned.
  */
@@ -287,6 +289,8 @@ out:
  *
  * Determines @kn's parent, pins and returns it.  This function can be
  * called from any context.
+ *
+ * Return: parent node of @kn
  */
 struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 {
@@ -302,11 +306,11 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
 }
 
 /**
- *	kernfs_name_hash
+ *	kernfs_name_hash - calculate hash of @ns + @name
  *	@name: Null terminated string to hash
  *	@ns:   Namespace tag to hash
  *
- *	Returns 31 bit hash of ns + name (so it fits in an off_t )
+ *	Return: 31-bit hash of ns + name (so it fits in an off_t)
  */
 static unsigned int kernfs_name_hash(const char *name, const void *ns)
 {
@@ -354,8 +358,8 @@ static int kernfs_sd_compare(const struct kernfs_node *left,
  *	Locking:
  *	kernfs_rwsem held exclusive
  *
- *	RETURNS:
- *	0 on susccess -EEXIST on failure.
+ *	Return:
+ *	%0 on success, -EEXIST on failure.
  */
 static int kernfs_link_sibling(struct kernfs_node *kn)
 {
@@ -394,8 +398,10 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
  *	@kn: kernfs_node of interest
  *
  *	Try to unlink @kn from its sibling rbtree which starts from
- *	kn->parent->dir.children.  Returns %true if @kn was actually
- *	removed, %false if @kn wasn't on the rbtree.
+ *	kn->parent->dir.children.
+ *
+ *	Return: %true if @kn was actually removed,
+ *	%false if @kn wasn't on the rbtree.
  *
  *	Locking:
  *	kernfs_rwsem held exclusive
@@ -419,10 +425,10 @@ static bool kernfs_unlink_sibling(struct kernfs_node *kn)
  *	@kn: kernfs_node to get an active reference to
  *
  *	Get an active reference of @kn.  This function is noop if @kn
- *	is NULL.
+ *	is %NULL.
  *
- *	RETURNS:
- *	Pointer to @kn on success, NULL on failure.
+ *	Return:
+ *	Pointer to @kn on success, %NULL on failure.
  */
 struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
 {
@@ -442,7 +448,7 @@ struct kernfs_node *kernfs_get_active(struct kernfs_node *kn)
  *	@kn: kernfs_node to put an active reference to
  *
  *	Put an active reference to @kn.  This function is noop if @kn
- *	is NULL.
+ *	is %NULL.
  */
 void kernfs_put_active(struct kernfs_node *kn)
 {
@@ -464,7 +470,7 @@ void kernfs_put_active(struct kernfs_node *kn)
  * kernfs_drain - drain kernfs_node
  * @kn: kernfs_node to drain
  *
- * Drain existing usages and nuke all existing mmaps of @kn.  Mutiple
+ * Drain existing usages and nuke all existing mmaps of @kn.  Multiple
  * removers may invoke this function concurrently on @kn and all will
  * return after draining is complete.
  */
@@ -577,7 +583,7 @@ EXPORT_SYMBOL_GPL(kernfs_put);
  * kernfs_node_from_dentry - determine kernfs_node associated with a dentry
  * @dentry: the dentry in question
  *
- * Return the kernfs_node associated with @dentry.  If @dentry is not a
+ * Return: the kernfs_node associated with @dentry.  If @dentry is not a
  * kernfs one, %NULL is returned.
  *
  * While the returned kernfs_node will stay accessible as long as @dentry
@@ -684,8 +690,8 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
  * @id's lower 32bits encode ino and upper gen.  If the gen portion is
  * zero, all generations are matched.
  *
- * RETURNS:
- * NULL on failure. Return a kernfs node with reference counter incremented
+ * Return: %NULL on failure,
+ * otherwise a kernfs node with reference counter incremented.
  */
 struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root,
 						   u64 id)
@@ -733,8 +739,8 @@ err_unlock:
  *	function increments nlink of the parent's inode if @kn is a
  *	directory and link into the children list of the parent.
  *
- *	RETURNS:
- *	0 on success, -EEXIST if entry with the given name already
+ *	Return:
+ *	%0 on success, -EEXIST if entry with the given name already
  *	exists.
  */
 int kernfs_add_one(struct kernfs_node *kn)
@@ -797,8 +803,9 @@ out_unlock:
  * @name: name to look for
  * @ns: the namespace tag to use
  *
- * Look for kernfs_node with name @name under @parent.  Returns pointer to
- * the found kernfs_node on success, %NULL on failure.
+ * Look for kernfs_node with name @name under @parent.
+ *
+ * Return: pointer to the found kernfs_node on success, %NULL on failure.
  */
 static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
 					  const unsigned char *name,
@@ -871,8 +878,9 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
  * @ns: the namespace tag to use
  *
  * Look for kernfs_node with name @name under @parent and get a reference
- * if found.  This function may sleep and returns pointer to the found
- * kernfs_node on success, %NULL on failure.
+ * if found.  This function may sleep.
+ *
+ * Return: pointer to the found kernfs_node on success, %NULL on failure.
  */
 struct kernfs_node *kernfs_find_and_get_ns(struct kernfs_node *parent,
 					   const char *name, const void *ns)
@@ -896,8 +904,9 @@ EXPORT_SYMBOL_GPL(kernfs_find_and_get_ns);
  * @ns: the namespace tag to use
  *
  * Look for kernfs_node with path @path under @parent and get a reference
- * if found.  This function may sleep and returns pointer to the found
- * kernfs_node on success, %NULL on failure.
+ * if found.  This function may sleep.
+ *
+ * Return: pointer to the found kernfs_node on success, %NULL on failure.
  */
 struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
 					   const char *path, const void *ns)
@@ -919,7 +928,7 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
  * @flags: KERNFS_ROOT_* flags
  * @priv: opaque data associated with the new directory
  *
- * Returns the root of the new hierarchy on success, ERR_PTR() value on
+ * Return: the root of the new hierarchy on success, ERR_PTR() value on
  * failure.
  */
 struct kernfs_root *kernfs_create_root(struct kernfs_syscall_ops *scops,
@@ -991,6 +1000,8 @@ void kernfs_destroy_root(struct kernfs_root *root)
 /**
  * kernfs_root_to_node - return the kernfs_node associated with a kernfs_root
  * @root: root to use to lookup
+ *
+ * Return: @root's kernfs_node
  */
 struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root)
 {
@@ -1007,7 +1018,7 @@ struct kernfs_node *kernfs_root_to_node(struct kernfs_root *root)
  * @priv: opaque data associated with the new directory
  * @ns: optional namespace tag of the directory
  *
- * Returns the created node on success, ERR_PTR() value on failure.
+ * Return: the created node on success, ERR_PTR() value on failure.
  */
 struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
 					 const char *name, umode_t mode,
@@ -1041,7 +1052,7 @@ struct kernfs_node *kernfs_create_dir_ns(struct kernfs_node *parent,
  * @parent: parent in which to create a new directory
  * @name: name of the new directory
  *
- * Returns the created node on success, ERR_PTR() value on failure.
+ * Return: the created node on success, ERR_PTR() value on failure.
  */
 struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
 					    const char *name)
@@ -1300,6 +1311,8 @@ static struct kernfs_node *kernfs_leftmost_descendant(struct kernfs_node *pos)
  * Find the next descendant to visit for post-order traversal of @root's
  * descendants.  @root is included in the iteration and the last node to be
  * visited.
+ *
+ * Return: the next descendant to visit or %NULL when done.
  */
 static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
 						       struct kernfs_node *root)
@@ -1563,6 +1576,8 @@ void kernfs_unbreak_active_protection(struct kernfs_node *kn)
  * the whole kernfs_ops which won the arbitration.  This can be used to
  * guarantee, for example, all concurrent writes to a "delete" file to
  * finish only after the whole operation is complete.
+ *
+ * Return: %true if @kn is removed by this call, otherwise %false.
  */
 bool kernfs_remove_self(struct kernfs_node *kn)
 {
@@ -1623,7 +1638,8 @@ bool kernfs_remove_self(struct kernfs_node *kn)
  * @ns: namespace tag of the kernfs_node to remove
  *
  * Look for the kernfs_node with @name and @ns under @parent and remove it.
- * Returns 0 on success, -ENOENT if such entry doesn't exist.
+ *
+ * Return: %0 on success, -ENOENT if such entry doesn't exist.
  */
 int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
 			     const void *ns)
@@ -1661,6 +1677,8 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name,
  * @new_parent: new parent to put @sd under
  * @new_name: new name
  * @new_ns: new namespace tag
+ *
+ * Return: %0 on success, -errno on failure.
  */
 int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
 		     const char *new_name, const void *new_ns)
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 9ab6c92e02dab..e4a50e4ff0d23 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -33,7 +33,7 @@ struct kernfs_open_node {
  * pending queue is implemented as a singly linked list of kernfs_nodes.
  * The list is terminated with the self pointer so that whether a
  * kernfs_node is on the list or not can be determined by testing the next
- * pointer for NULL.
+ * pointer for %NULL.
  */
 #define KERNFS_NOTIFY_EOL			((void *)&kernfs_notify_list)
 
@@ -59,8 +59,10 @@ static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
 }
 
 /**
- * of_on - Return the kernfs_open_node of the specified kernfs_open_file
- * @of: taret kernfs_open_file
+ * of_on - Get the kernfs_open_node of the specified kernfs_open_file
+ * @of: target kernfs_open_file
+ *
+ * Return: the kernfs_open_node of the kernfs_open_file
  */
 static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
 {
@@ -82,6 +84,8 @@ static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
  * outside RCU read-side critical section.
  *
  * The caller needs to make sure that kernfs_open_file_mutex is held.
+ *
+ * Return: @kn->attr.open when kernfs_open_file_mutex is held.
  */
 static struct kernfs_open_node *
 kernfs_deref_open_node_locked(struct kernfs_node *kn)
@@ -548,11 +552,11 @@ out_unlock:
  *	If @kn->attr.open exists, increment its reference count; otherwise,
  *	create one.  @of is chained to the files list.
  *
- *	LOCKING:
+ *	Locking:
  *	Kernel thread context (may sleep).
  *
- *	RETURNS:
- *	0 on success, -errno on failure.
+ *	Return:
+ *	%0 on success, -errno on failure.
  */
 static int kernfs_get_open_node(struct kernfs_node *kn,
 				struct kernfs_open_file *of)
@@ -1024,7 +1028,7 @@ const struct file_operations kernfs_file_fops = {
  * @ns: optional namespace tag of the file
  * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
  *
- * Returns the created node on success, ERR_PTR() value on error.
+ * Return: the created node on success, ERR_PTR() value on error.
  */
 struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
 					 const char *name,
diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c
index 74f3453f4639f..eac0f210299a0 100644
--- a/fs/kernfs/inode.c
+++ b/fs/kernfs/inode.c
@@ -94,7 +94,7 @@ int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
  * @kn: target node
  * @iattr: iattr to set
  *
- * Returns 0 on success, -errno on failure.
+ * Return: %0 on success, -errno on failure.
  */
 int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
 {
@@ -239,11 +239,11 @@ static void kernfs_init_inode(struct kernfs_node *kn, struct inode *inode)
  *	allocated and basics are initialized.  New inode is returned
  *	locked.
  *
- *	LOCKING:
+ *	Locking:
  *	Kernel thread context (may sleep).
  *
- *	RETURNS:
- *	Pointer to allocated inode on success, NULL on failure.
+ *	Return:
+ *	Pointer to allocated inode on success, %NULL on failure.
  */
 struct inode *kernfs_get_inode(struct super_block *sb, struct kernfs_node *kn)
 {
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index fc5821effd97d..9046d9f39e635 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -58,7 +58,7 @@ struct kernfs_root {
  * kernfs_root - find out the kernfs_root a kernfs_node belongs to
  * @kn: kernfs_node of interest
  *
- * Return the kernfs_root @kn belongs to.
+ * Return: the kernfs_root @kn belongs to.
  */
 static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
 {
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index d0859f72d2d64..e08e8d9998070 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -153,7 +153,7 @@ static const struct export_operations kernfs_export_ops = {
  * kernfs_root_from_sb - determine kernfs_root associated with a super_block
  * @sb: the super_block in question
  *
- * Return the kernfs_root associated with @sb.  If @sb is not a kernfs one,
+ * Return: the kernfs_root associated with @sb.  If @sb is not a kernfs one,
  * %NULL is returned.
  */
 struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
@@ -167,7 +167,7 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
  * find the next ancestor in the path down to @child, where @parent was the
  * ancestor whose descendant we want to find.
  *
- * Say the path is /a/b/c/d.  @child is d, @parent is NULL.  We return the root
+ * Say the path is /a/b/c/d.  @child is d, @parent is %NULL.  We return the root
  * node.  If @parent is b, then we return the node for c.
  * Passing in d as @parent is not ok.
  */
@@ -192,6 +192,8 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
  * kernfs_node_dentry - get a dentry for the given kernfs_node
  * @kn: kernfs_node for which a dentry is needed
  * @sb: the kernfs super_block
+ *
+ * Return: the dentry pointer
  */
 struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
 				  struct super_block *sb)
@@ -296,7 +298,7 @@ static int kernfs_set_super(struct super_block *sb, struct fs_context *fc)
  * kernfs_super_ns - determine the namespace tag of a kernfs super_block
  * @sb: super_block of interest
  *
- * Return the namespace tag associated with kernfs super_block @sb.
+ * Return: the namespace tag associated with kernfs super_block @sb.
  */
 const void *kernfs_super_ns(struct super_block *sb)
 {
@@ -313,6 +315,8 @@ const void *kernfs_super_ns(struct super_block *sb)
  * implementation, which should set the specified ->@fs_type and ->@flags, and
  * specify the hierarchy and namespace tag to mount via ->@root and ->@ns,
  * respectively.
+ *
+ * Return: %0 on success, -errno on failure.
  */
 int kernfs_get_tree(struct fs_context *fc)
 {
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 0ab13824822f7..45371a70caa71 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -19,7 +19,7 @@
  * @name: name of the symlink
  * @target: target node for the symlink to point to
  *
- * Returns the created node on success, ERR_PTR() value on error.
+ * Return: the created node on success, ERR_PTR() value on error.
  * Ownership of the link matches ownership of the target.
  */
 struct kernfs_node *kernfs_create_link(struct kernfs_node *parent,
-- 
GitLab


From 40eb28dc17f87cfac69d7755447039e92ac5fbda Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Nov 2022 15:35:57 +0200
Subject: [PATCH 2155/4122] device property: Get rid of
 __PROPERTY_ENTRY_ARRAY_EL*SIZE*()

First of all, _ELEMENT_SIZE() repeats existing sizeof_field() macro.
Second, usage of _ARRAY_ELSIZE_LEN() adds unnecessary indirection
to the data layout. It's more understandable when the data structure
is placed explicitly. That said, get rid of those macros by replacing
them with the existing helper and explicit data structure layout.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221122133600.49897-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/include/linux/property.h b/include/linux/property.h
index 83674f968a8fd..04a6b1433cd93 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -12,6 +12,7 @@
 
 #include <linux/bits.h>
 #include <linux/fwnode.h>
+#include <linux/stddef.h>
 #include <linux/types.h>
 
 struct device;
@@ -303,24 +304,14 @@ struct property_entry {
  * crafted to avoid gcc-4.4.4's problems with initialization of anon unions
  * and structs.
  */
-
-#define __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_)				\
-	sizeof(((struct property_entry *)NULL)->value._elem_[0])
-
-#define __PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_, _elsize_, _Type_,	\
-					  _val_, _len_)			\
-(struct property_entry) {						\
-	.name = _name_,							\
-	.length = (_len_) * (_elsize_),					\
-	.type = DEV_PROP_##_Type_,					\
-	{ .pointer = _val_ },						\
+#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _elem_, _Type_, _val_, _len_)		\
+(struct property_entry) {								\
+	.name = _name_,									\
+	.length = (_len_) * sizeof_field(struct property_entry, value._elem_[0]),	\
+	.type = DEV_PROP_##_Type_,							\
+	{ .pointer = _val_ },								\
 }
 
-#define __PROPERTY_ENTRY_ARRAY_LEN(_name_, _elem_, _Type_, _val_, _len_)\
-	__PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_,			\
-				__PROPERTY_ENTRY_ELEMENT_SIZE(_elem_),	\
-				_Type_, _val_, _len_)
-
 #define PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, _len_)		\
 	__PROPERTY_ENTRY_ARRAY_LEN(_name_, u8_data, U8, _val_, _len_)
 #define PROPERTY_ENTRY_U16_ARRAY_LEN(_name_, _val_, _len_)		\
@@ -332,9 +323,12 @@ struct property_entry {
 #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_)		\
 	__PROPERTY_ENTRY_ARRAY_LEN(_name_, str, STRING, _val_, _len_)
 #define PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, _len_)		\
-	__PROPERTY_ENTRY_ARRAY_ELSIZE_LEN(_name_,			\
-				sizeof(struct software_node_ref_args),	\
-				REF, _val_, _len_)
+(struct property_entry) {						\
+	.name = _name_,							\
+	.length = (_len_) * sizeof(struct software_node_ref_args),	\
+	.type = DEV_PROP_REF,						\
+	{ .pointer = _val_ },						\
+}
 
 #define PROPERTY_ENTRY_U8_ARRAY(_name_, _val_)				\
 	PROPERTY_ENTRY_U8_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_))
@@ -352,7 +346,7 @@ struct property_entry {
 #define __PROPERTY_ENTRY_ELEMENT(_name_, _elem_, _Type_, _val_)		\
 (struct property_entry) {						\
 	.name = _name_,							\
-	.length = __PROPERTY_ENTRY_ELEMENT_SIZE(_elem_),		\
+	.length = sizeof_field(struct property_entry, value._elem_[0]),	\
 	.is_inline = true,						\
 	.type = DEV_PROP_##_Type_,					\
 	{ .value = { ._elem_[0] = _val_ } },				\
-- 
GitLab


From c6c76563bd13871739539e20fd3116159e491f5b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Nov 2022 15:35:58 +0200
Subject: [PATCH 2156/4122] device property: Move PROPERTY_ENTRY_BOOL() a bit
 down

Let's order ARRAY and non-ARRAY macros in the same way. The
PROPERTY_ENTRY_BOOL() is special, move it a bit down in the
code so it won't break ordering of the rest.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221122133600.49897-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/property.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/property.h b/include/linux/property.h
index 04a6b1433cd93..a1f846a151136 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -322,6 +322,7 @@ struct property_entry {
 	__PROPERTY_ENTRY_ARRAY_LEN(_name_, u64_data, U64, _val_, _len_)
 #define PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, _len_)		\
 	__PROPERTY_ENTRY_ARRAY_LEN(_name_, str, STRING, _val_, _len_)
+
 #define PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, _len_)		\
 (struct property_entry) {						\
 	.name = _name_,							\
@@ -340,7 +341,7 @@ struct property_entry {
 	PROPERTY_ENTRY_U64_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_))
 #define PROPERTY_ENTRY_STRING_ARRAY(_name_, _val_)			\
 	PROPERTY_ENTRY_STRING_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_))
-#define PROPERTY_ENTRY_REF_ARRAY(_name_, _val_)			\
+#define PROPERTY_ENTRY_REF_ARRAY(_name_, _val_)				\
 	PROPERTY_ENTRY_REF_ARRAY_LEN(_name_, _val_, ARRAY_SIZE(_val_))
 
 #define __PROPERTY_ENTRY_ELEMENT(_name_, _elem_, _Type_, _val_)		\
@@ -363,12 +364,6 @@ struct property_entry {
 #define PROPERTY_ENTRY_STRING(_name_, _val_)				\
 	__PROPERTY_ENTRY_ELEMENT(_name_, str, STRING, _val_)
 
-#define PROPERTY_ENTRY_BOOL(_name_)		\
-(struct property_entry) {			\
-	.name = _name_,				\
-	.is_inline = true,			\
-}
-
 #define PROPERTY_ENTRY_REF(_name_, _ref_, ...)				\
 (struct property_entry) {						\
 	.name = _name_,							\
@@ -377,9 +372,14 @@ struct property_entry {
 	{ .pointer = &SOFTWARE_NODE_REFERENCE(_ref_, ##__VA_ARGS__), },	\
 }
 
+#define PROPERTY_ENTRY_BOOL(_name_)		\
+(struct property_entry) {			\
+	.name = _name_,				\
+	.is_inline = true,			\
+}
+
 struct property_entry *
 property_entries_dup(const struct property_entry *properties);
-
 void property_entries_free(const struct property_entry *properties);
 
 bool device_dma_supported(const struct device *dev);
-- 
GitLab


From 4d57b4f215e8ba86c36540eaccd3b17bc5ee39c0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Nov 2022 15:35:59 +0200
Subject: [PATCH 2157/4122] device property: Rename goto label to be more
 precise

In the fwnode_property_match_string() the goto label out has
an additional task. Rename the label to be more precise on
what is going to happen if goto it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221122133600.49897-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index f7b5aa8fcf283..ed74083c179d8 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -482,12 +482,13 @@ int fwnode_property_match_string(const struct fwnode_handle *fwnode,
 
 	ret = fwnode_property_read_string_array(fwnode, propname, values, nval);
 	if (ret < 0)
-		goto out;
+		goto out_free;
 
 	ret = match_string(values, nval, string);
 	if (ret < 0)
 		ret = -ENODATA;
-out:
+
+out_free:
 	kfree(values);
 	return ret;
 }
-- 
GitLab


From 9dc5f12f95e00fb0e0500ea4a8da0c0f29e718cb Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 22 Nov 2022 15:36:00 +0200
Subject: [PATCH 2158/4122] device property: Add a blank line in Kconfig of
 tests

Seems the blank line to separate entries in Kconfig was missing.
Add it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221122133600.49897-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/test/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/base/test/Kconfig b/drivers/base/test/Kconfig
index 2f3fa31a948e2..610a1ba7a467e 100644
--- a/drivers/base/test/Kconfig
+++ b/drivers/base/test/Kconfig
@@ -8,6 +8,7 @@ config TEST_ASYNC_DRIVER_PROBE
 	  The module name will be test_async_driver_probe.ko
 
 	  If unsure say N.
+
 config DRIVER_PE_KUNIT_TEST
 	bool "KUnit Tests for property entry API" if !KUNIT_ALL_TESTS
 	depends on KUNIT=y
-- 
GitLab


From 6865788f5ad998f261e37f6b029d61d3bb7dc373 Mon Sep 17 00:00:00 2001
From: Andra Paraschiv <andraprs@amazon.com>
Date: Tue, 8 Nov 2022 20:59:12 +0200
Subject: [PATCH 2159/4122] MAINTAINERS: Update entries from the Nitro Enclaves
 section

Update the list of maintainers for the Nitro Enclaves project. Alex
(lexnv@) is not working at Amazon anymore and there will be the same
case for me starting with 2023.

Add a reference to the mailing list of the Nitro Enclaves development
team.

Signed-off-by: Andra Paraschiv <andraprs@amazon.com>
Acked-by: Alexandru Vasile <acvasile96@gmail.com>
Link: https://lore.kernel.org/r/20221108185912.15792-1-andraprs@amazon.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 38bae79369a3c..33c76a218c0cc 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14569,10 +14569,9 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/dinguyen/linux.git
 F:	arch/nios2/
 
 NITRO ENCLAVES (NE)
-M:	Andra Paraschiv <andraprs@amazon.com>
-M:	Alexandru Vasile <lexnv@amazon.com>
 M:	Alexandru Ciobotaru <alcioa@amazon.com>
 L:	linux-kernel@vger.kernel.org
+L:	The AWS Nitro Enclaves Team <aws-nitro-enclaves-devel@amazon.com>
 S:	Supported
 W:	https://aws.amazon.com/ec2/nitro/nitro-enclaves/
 F:	Documentation/virt/ne_overview.rst
-- 
GitLab


From 83f47eea742c1152c237398fc040ceba04fc5d76 Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 16 Nov 2022 14:47:34 +0200
Subject: [PATCH 2160/4122] mei: add timeout to send

When driver wakes up the firmware from the low power state,
it is sending a memory ready message.
The send is done via synchronous/blocking function to ensure
that firmware is in ready state. However, in case of firmware
undergoing reset send might be block forever.
To address this issue a timeout is added to blocking
write command on the internal bus.

Introduce the __mei_cl_send_timeout function to use instead of
__mei_cl_send in cases where timeout is required.
The mei_cl_write has only two callers and there is no need to split
it into two functions.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Link: https://lore.kernel.org/r/20221116124735.2493847-2-alexander.usyskin@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c |  7 +++++--
 drivers/misc/mei/bus.c       | 22 +++++++++++++++++++++-
 drivers/misc/mei/client.c    | 20 ++++++++++++++++----
 drivers/misc/mei/client.h    |  2 +-
 drivers/misc/mei/main.c      |  2 +-
 drivers/misc/mei/mei_dev.h   |  2 ++
 6 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 71fbf0bc84532..90023c34666ea 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -188,17 +188,20 @@ static int mei_fwver(struct mei_cl_device *cldev)
 	return ret;
 }
 
+#define GFX_MEMORY_READY_TIMEOUT 200 /* timeout in milliseconds */
+
 static int mei_gfx_memory_ready(struct mei_cl_device *cldev)
 {
 	struct mkhi_gfx_mem_ready req = {0};
-	unsigned int mode = MEI_CL_IO_TX_INTERNAL;
+	unsigned int mode = MEI_CL_IO_TX_INTERNAL | MEI_CL_IO_TX_BLOCKING;
 
 	req.hdr.group_id = MKHI_GROUP_ID_GFX;
 	req.hdr.command = MKHI_GFX_MEMORY_READY_CMD_REQ;
 	req.flags = MKHI_GFX_MEM_READY_PXP_ALLOWED;
 
 	dev_dbg(&cldev->dev, "Sending memory ready command\n");
-	return __mei_cl_send(cldev->cl, (u8 *)&req, sizeof(req), 0, mode);
+	return __mei_cl_send_timeout(cldev->cl, (u8 *)&req, sizeof(req), 0,
+				     mode, GFX_MEMORY_READY_TIMEOUT);
 }
 
 static void mei_mkhi_fix(struct mei_cl_device *cldev)
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 46aa3554e97b0..fdb5f73316958 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -33,6 +33,26 @@
  */
 ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
 		      unsigned int mode)
+{
+	return __mei_cl_send_timeout(cl, buf, length, vtag, mode, MAX_SCHEDULE_TIMEOUT);
+}
+
+/**
+ * __mei_cl_send_timeout - internal client send (write)
+ *
+ * @cl: host client
+ * @buf: buffer to send
+ * @length: buffer length
+ * @vtag: virtual tag
+ * @mode: sending mode
+ * @timeout: send timeout in milliseconds.
+ *           effective only for blocking writes: the MEI_CL_IO_TX_BLOCKING mode bit is set.
+ *           set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait.
+ *
+ * Return: written size bytes or < 0 on error
+ */
+ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+			      unsigned int mode, unsigned long timeout)
 {
 	struct mei_device *bus;
 	struct mei_cl_cb *cb;
@@ -101,7 +121,7 @@ ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
 	cb->blocking = !!(mode & MEI_CL_IO_TX_BLOCKING);
 	memcpy(cb->buf.data, buf, length);
 
-	rets = mei_cl_write(cl, cb);
+	rets = mei_cl_write(cl, cb, timeout);
 
 out:
 	mutex_unlock(&bus->device_lock);
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index 0b2fbe1335a77..b4c104907ce21 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -1926,10 +1926,13 @@ err:
  *
  * @cl: host client
  * @cb: write callback with filled data
+ * @timeout: send timeout in milliseconds.
+ *           effective only for blocking writes: the cb->blocking is set.
+ *           set timeout to the MAX_SCHEDULE_TIMEOUT to maixum allowed wait.
  *
  * Return: number of bytes sent on success, <0 on failure.
  */
-ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb)
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout)
 {
 	struct mei_device *dev;
 	struct mei_msg_data *buf;
@@ -2056,11 +2059,20 @@ out:
 	if (blocking && cl->writing_state != MEI_WRITE_COMPLETE) {
 
 		mutex_unlock(&dev->device_lock);
-		rets = wait_event_interruptible(cl->tx_wait,
-				cl->writing_state == MEI_WRITE_COMPLETE ||
-				(!mei_cl_is_connected(cl)));
+		rets = wait_event_interruptible_timeout(cl->tx_wait,
+							cl->writing_state == MEI_WRITE_COMPLETE ||
+							(!mei_cl_is_connected(cl)),
+							msecs_to_jiffies(timeout));
 		mutex_lock(&dev->device_lock);
+		/* clean all queue on timeout as something fatal happened */
+		if (rets == 0) {
+			rets = -ETIME;
+			mei_io_tx_list_free_cl(&dev->write_list, cl, NULL);
+			mei_io_tx_list_free_cl(&dev->write_waiting_list, cl, NULL);
+		}
 		/* wait_event_interruptible returns -ERESTARTSYS */
+		if (rets > 0)
+			rets = 0;
 		if (rets) {
 			if (signal_pending(current))
 				rets = -EINTR;
diff --git a/drivers/misc/mei/client.h b/drivers/misc/mei/client.h
index 418056fb1489c..9052860bcfe03 100644
--- a/drivers/misc/mei/client.h
+++ b/drivers/misc/mei/client.h
@@ -246,7 +246,7 @@ int mei_cl_connect(struct mei_cl *cl, struct mei_me_client *me_cl,
 int mei_cl_irq_connect(struct mei_cl *cl, struct mei_cl_cb *cb,
 		       struct list_head *cmpl_list);
 int mei_cl_read_start(struct mei_cl *cl, size_t length, const struct file *fp);
-ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb);
+ssize_t mei_cl_write(struct mei_cl *cl, struct mei_cl_cb *cb, unsigned long timeout);
 int mei_cl_irq_write(struct mei_cl *cl, struct mei_cl_cb *cb,
 		     struct list_head *cmpl_list);
 
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 930887e7e38d6..632d4ae21e465 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -383,7 +383,7 @@ static ssize_t mei_write(struct file *file, const char __user *ubuf,
 		goto out;
 	}
 
-	rets = mei_cl_write(cl, cb);
+	rets = mei_cl_write(cl, cb, MAX_SCHEDULE_TIMEOUT);
 out:
 	mutex_unlock(&dev->device_lock);
 	return rets;
diff --git a/drivers/misc/mei/mei_dev.h b/drivers/misc/mei/mei_dev.h
index 6bb3e1ba9ded4..c1618a44c5a4c 100644
--- a/drivers/misc/mei/mei_dev.h
+++ b/drivers/misc/mei/mei_dev.h
@@ -373,6 +373,8 @@ void mei_cl_bus_rescan_work(struct work_struct *work);
 void mei_cl_bus_dev_fixup(struct mei_cl_device *dev);
 ssize_t __mei_cl_send(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
 		      unsigned int mode);
+ssize_t __mei_cl_send_timeout(struct mei_cl *cl, const u8 *buf, size_t length, u8 vtag,
+			      unsigned int mode, unsigned long timeout);
 ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
 		      unsigned int mode, unsigned long timeout);
 bool mei_cl_bus_rx_event(struct mei_cl *cl);
-- 
GitLab


From 0ef77698b85603d21453daf32ae70f76ae62ccae Mon Sep 17 00:00:00 2001
From: Alexander Usyskin <alexander.usyskin@intel.com>
Date: Wed, 16 Nov 2022 14:47:35 +0200
Subject: [PATCH 2161/4122] mei: bus-fixup: change pxp mode only if message was
 sent

Move PXP mode state machine to SETUP mode only if
memory ready message sent successfully to the firmware.
Leave it in INIT mode otherwise to allow try to send message later.

Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
Link: https://lore.kernel.org/r/20221116124735.2493847-3-alexander.usyskin@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/bus-fixup.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/mei/bus-fixup.c b/drivers/misc/mei/bus-fixup.c
index 90023c34666ea..6df7679d97391 100644
--- a/drivers/misc/mei/bus-fixup.c
+++ b/drivers/misc/mei/bus-fixup.c
@@ -266,12 +266,13 @@ static void mei_gsc_mkhi_fix_ver(struct mei_cl_device *cldev)
 
 	if (cldev->bus->pxp_mode == MEI_DEV_PXP_INIT) {
 		ret = mei_gfx_memory_ready(cldev);
-		if (ret < 0)
+		if (ret < 0) {
 			dev_err(&cldev->dev, "memory ready command failed %d\n", ret);
-		else
+		} else {
 			dev_dbg(&cldev->dev, "memory ready command sent\n");
+			cldev->bus->pxp_mode = MEI_DEV_PXP_SETUP;
+		}
 		/* we go to reset after that */
-		cldev->bus->pxp_mode = MEI_DEV_PXP_SETUP;
 		goto out;
 	}
 
-- 
GitLab


From c002f04c0bc79ec00d4beb75fb631d5bf37419bd Mon Sep 17 00:00:00 2001
From: Eli Billauer <eli.billauer@gmail.com>
Date: Thu, 17 Nov 2022 09:18:25 +0200
Subject: [PATCH 2162/4122] char: xillybus: Fix trivial bug with mutex

@unit_mutex protects @unit from being freed, so obviously it should be
released after @unit is used, and not before.

This is a follow-up to commit 282a4b71816b ("char: xillybus: Prevent
use-after-free due to race condition") which ensures, among others, the
protection of @private_data after @unit_mutex has been released.

Reported-by: Hyunwoo Kim <imv4bel@gmail.com>
Signed-off-by: Eli Billauer <eli.billauer@gmail.com>
Link: https://lore.kernel.org/r/20221117071825.3942-1-eli.billauer@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/xillybus/xillybus_class.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/char/xillybus/xillybus_class.c b/drivers/char/xillybus/xillybus_class.c
index 0f238648dcfe2..e9a288e61c156 100644
--- a/drivers/char/xillybus/xillybus_class.c
+++ b/drivers/char/xillybus/xillybus_class.c
@@ -227,14 +227,15 @@ int xillybus_find_inode(struct inode *inode,
 			break;
 		}
 
-	mutex_unlock(&unit_mutex);
-
-	if (!unit)
+	if (!unit) {
+		mutex_unlock(&unit_mutex);
 		return -ENODEV;
+	}
 
 	*private_data = unit->private_data;
 	*index = minor - unit->lowest_minor;
 
+	mutex_unlock(&unit_mutex);
 	return 0;
 }
 EXPORT_SYMBOL(xillybus_find_inode);
-- 
GitLab


From adc40221bf676f3e722d135889a7b913b4162dc2 Mon Sep 17 00:00:00 2001
From: Yuma Ueda <cyan@0x00a1e9.dev>
Date: Fri, 18 Nov 2022 22:36:31 +0900
Subject: [PATCH 2163/4122] scripts/kallsyms.c Make the comment up-to-date with
 current implementation

The comment in scripts/kallsyms.c describing the usage of
scripts/kallsyms does not reflect the latest implementation.
Fix the comment to be equivalent to what the usage() function prints.

Signed-off-by: Yuma Ueda <cyan@0x00a1e9.dev>
Reviewed-by: Miguel Ojeda <ojeda@kernel.org>
Link: https://lore.kernel.org/r/20221118133631.4554-1-cyan@0x00a1e9.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 scripts/kallsyms.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
index 03fa07ad45d95..46d1afaaf4cd0 100644
--- a/scripts/kallsyms.c
+++ b/scripts/kallsyms.c
@@ -5,7 +5,8 @@
  * This software may be used and distributed according to the terms
  * of the GNU General Public License, incorporated herein by reference.
  *
- * Usage: nm -n vmlinux | scripts/kallsyms [--all-symbols] > symbols.S
+ * Usage: kallsyms [--all-symbols] [--absolute-percpu]
+ *                         [--base-relative] in.map > out.S
  *
  *      Table compression uses all the unused char codes on the symbols and
  *  maps these to the most used substrings (tokens). For instance, it might
-- 
GitLab


From e6278a5445780c71cc3dfc6ceda2875838eac8e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?C=C3=A9dric=20Le=20Goater?= <clg@kaod.org>
Date: Tue, 22 Nov 2022 14:46:43 +0100
Subject: [PATCH 2164/4122] virtio_console: Introduce an ID allocator for
 virtual console numbers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When a virtio console port is initialized, it is registered as an hvc
console using a virtual console number. If a KVM guest is started with
multiple virtio console devices, the same vtermno (or virtual console
number) can be used to allocate different hvc consoles, which leads to
various communication problems later on.

This is also reported in debugfs :

  # grep vtermno /sys/kernel/debug/virtio-ports/*
  /sys/kernel/debug/virtio-ports/vport1p1:console_vtermno: 1
  /sys/kernel/debug/virtio-ports/vport2p1:console_vtermno: 1
  /sys/kernel/debug/virtio-ports/vport3p1:console_vtermno: 2
  /sys/kernel/debug/virtio-ports/vport4p1:console_vtermno: 3

Replace the next_vtermno global with an ID allocator and start the
allocation at 1 as it is today. Also recycle IDs when a console port
is removed.

Signed-off-by: Cédric Le Goater <clg@kaod.org>
Reviewed-by: Thomas Huth <thuth@redhat.com>
Link: https://lore.kernel.org/r/20221122134643.376184-1-clg@kaod.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/virtio_console.c | 26 +++++++++++---------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index 9fa3c76a267f5..6a821118d5530 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -13,6 +13,7 @@
 #include <linux/fs.h>
 #include <linux/splice.h>
 #include <linux/pagemap.h>
+#include <linux/idr.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/poll.h>
@@ -48,22 +49,11 @@ struct ports_driver_data {
 	/* List of all the devices we're handling */
 	struct list_head portdevs;
 
-	/*
-	 * This is used to keep track of the number of hvc consoles
-	 * spawned by this driver.  This number is given as the first
-	 * argument to hvc_alloc().  To correctly map an initial
-	 * console spawned via hvc_instantiate to the console being
-	 * hooked up via hvc_alloc, we need to pass the same vtermno.
-	 *
-	 * We also just assume the first console being initialised was
-	 * the first one that got used as the initial console.
-	 */
-	unsigned int next_vtermno;
-
 	/* All the console devices handled by this driver */
 	struct list_head consoles;
 };
-static struct ports_driver_data pdrvdata = { .next_vtermno = 1};
+
+static struct ports_driver_data pdrvdata;
 
 static DEFINE_SPINLOCK(pdrvdata_lock);
 static DECLARE_COMPLETION(early_console_added);
@@ -89,6 +79,8 @@ struct console {
 	u32 vtermno;
 };
 
+static DEFINE_IDA(vtermno_ida);
+
 struct port_buffer {
 	char *buf;
 
@@ -1244,18 +1236,21 @@ static int init_port_console(struct port *port)
 	 * pointers.  The final argument is the output buffer size: we
 	 * can do any size, so we put PAGE_SIZE here.
 	 */
-	port->cons.vtermno = pdrvdata.next_vtermno;
+	ret = ida_alloc_min(&vtermno_ida, 1, GFP_KERNEL);
+	if (ret < 0)
+		return ret;
 
+	port->cons.vtermno = ret;
 	port->cons.hvc = hvc_alloc(port->cons.vtermno, 0, &hv_ops, PAGE_SIZE);
 	if (IS_ERR(port->cons.hvc)) {
 		ret = PTR_ERR(port->cons.hvc);
 		dev_err(port->dev,
 			"error %d allocating hvc for port\n", ret);
 		port->cons.hvc = NULL;
+		ida_free(&vtermno_ida, port->cons.vtermno);
 		return ret;
 	}
 	spin_lock_irq(&pdrvdata_lock);
-	pdrvdata.next_vtermno++;
 	list_add_tail(&port->cons.list, &pdrvdata.consoles);
 	spin_unlock_irq(&pdrvdata_lock);
 	port->guest_connected = true;
@@ -1532,6 +1527,7 @@ static void unplug_port(struct port *port)
 		list_del(&port->cons.list);
 		spin_unlock_irq(&pdrvdata_lock);
 		hvc_remove(port->cons.hvc);
+		ida_free(&vtermno_ida, port->cons.vtermno);
 	}
 
 	remove_port_data(port);
-- 
GitLab


From fbfc4ca465a1f8d81bf2d67d95bf7fc67c3cf0c2 Mon Sep 17 00:00:00 2001
From: Patrick Delaunay <patrick.delaunay@foss.st.com>
Date: Fri, 18 Nov 2022 06:39:20 +0000
Subject: [PATCH 2165/4122] nvmem: stm32: move STM32MP15_BSEC_NUM_LOWER in
 config

Support STM32MP15_BSEC_NUM_LOWER in stm32 romem config to prepare
the next SoC in STM32MP family.

Signed-off-by: Patrick Delaunay <patrick.delaunay@foss.st.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-2-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/stm32-romem.c | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c
index 354be526897ff..d93baee01d7b6 100644
--- a/drivers/nvmem/stm32-romem.c
+++ b/drivers/nvmem/stm32-romem.c
@@ -22,16 +22,15 @@
 /* shadow registers offest */
 #define STM32MP15_BSEC_DATA0		0x200
 
-/* 32 (x 32-bits) lower shadow registers */
-#define STM32MP15_BSEC_NUM_LOWER	32
-
 struct stm32_romem_cfg {
 	int size;
+	u8 lower;
 };
 
 struct stm32_romem_priv {
 	void __iomem *base;
 	struct nvmem_config cfg;
+	u8 lower;
 };
 
 static int stm32_romem_read(void *context, unsigned int offset, void *buf,
@@ -85,7 +84,7 @@ static int stm32_bsec_read(void *context, unsigned int offset, void *buf,
 	for (i = roffset; (i < roffset + rbytes); i += 4) {
 		u32 otp = i >> 2;
 
-		if (otp < STM32MP15_BSEC_NUM_LOWER) {
+		if (otp < priv->lower) {
 			/* read lower data from shadow registers */
 			val = readl_relaxed(
 				priv->base + STM32MP15_BSEC_DATA0 + i);
@@ -159,6 +158,8 @@ static int stm32_romem_probe(struct platform_device *pdev)
 	priv->cfg.priv = priv;
 	priv->cfg.owner = THIS_MODULE;
 
+	priv->lower = 0;
+
 	cfg = (const struct stm32_romem_cfg *)
 		of_match_device(dev->driver->of_match_table, dev)->data;
 	if (!cfg) {
@@ -167,6 +168,7 @@ static int stm32_romem_probe(struct platform_device *pdev)
 		priv->cfg.reg_read = stm32_romem_read;
 	} else {
 		priv->cfg.size = cfg->size;
+		priv->lower = cfg->lower;
 		priv->cfg.reg_read = stm32_bsec_read;
 		priv->cfg.reg_write = stm32_bsec_write;
 	}
@@ -174,8 +176,17 @@ static int stm32_romem_probe(struct platform_device *pdev)
 	return PTR_ERR_OR_ZERO(devm_nvmem_register(dev, &priv->cfg));
 }
 
+/*
+ * STM32MP15 BSEC OTP regions: 4096 OTP bits (with 3072 effective bits)
+ * => 96 x 32-bits data words
+ * - Lower: 1K bits, 2:1 redundancy, incremental bit programming
+ *   => 32 (x 32-bits) lower shadow registers = words 0 to 31
+ * - Upper: 2K bits, ECC protection, word programming only
+ *   => 64 (x 32-bits) = words 32 to 95
+ */
 static const struct stm32_romem_cfg stm32mp15_bsec_cfg = {
-	.size = 384, /* 96 x 32-bits data words */
+	.size = 384,
+	.lower = 32,
 };
 
 static const struct of_device_id stm32_romem_of_match[] = {
-- 
GitLab


From d61784e6410f3df2028e6eb91b06ffed37a660e0 Mon Sep 17 00:00:00 2001
From: Patrick Delaunay <patrick.delaunay@foss.st.com>
Date: Fri, 18 Nov 2022 06:39:21 +0000
Subject: [PATCH 2166/4122] nvmem: stm32: add warning when upper OTPs are
 updated

As the upper OTPs are ECC protected, they support only one 32 bits word
programming.
For a second modification of this word, these ECC become invalid and
this OTP will be no more accessible, the shadowed value is invalid.

This patch adds a warning to indicate an upper OTP update, because this
operation is dangerous as OTP is not locked by the driver after the first
update to avoid a second update.

Signed-off-by: Patrick Delaunay <patrick.delaunay@foss.st.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-3-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/stm32-romem.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c
index d93baee01d7b6..bb8aa72ba2f94 100644
--- a/drivers/nvmem/stm32-romem.c
+++ b/drivers/nvmem/stm32-romem.c
@@ -132,6 +132,9 @@ static int stm32_bsec_write(void *context, unsigned int offset, void *buf,
 		}
 	}
 
+	if (offset + bytes >= priv->lower * 4)
+		dev_warn(dev, "Update of upper OTPs with ECC protection (word programming, only once)\n");
+
 	return 0;
 }
 
-- 
GitLab


From a3816a7d7c097c1da46aad5f5d1e229b607dce04 Mon Sep 17 00:00:00 2001
From: Patrick Delaunay <patrick.delaunay@foss.st.com>
Date: Fri, 18 Nov 2022 06:39:22 +0000
Subject: [PATCH 2167/4122] nvmem: stm32: add nvmem type attribute

Inform NVMEM framework of type attribute for stm32-romem as NVMEM_TYPE_OTP
so userspace is able to know how the data is stored in BSEC.

Signed-off-by: Patrick Delaunay <patrick.delaunay@foss.st.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-4-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/stm32-romem.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c
index bb8aa72ba2f94..6de565639d5f4 100644
--- a/drivers/nvmem/stm32-romem.c
+++ b/drivers/nvmem/stm32-romem.c
@@ -160,6 +160,7 @@ static int stm32_romem_probe(struct platform_device *pdev)
 	priv->cfg.dev = dev;
 	priv->cfg.priv = priv;
 	priv->cfg.owner = THIS_MODULE;
+	priv->cfg.type = NVMEM_TYPE_OTP;
 
 	priv->lower = 0;
 
-- 
GitLab


From 107548adf89d581d1271cb2c81858bc793930306 Mon Sep 17 00:00:00 2001
From: Patrick Delaunay <patrick.delaunay@foss.st.com>
Date: Fri, 18 Nov 2022 06:39:23 +0000
Subject: [PATCH 2168/4122] dt-bindings: nvmem: add new stm32mp13 compatible
 for stm32-romem

Add a new compatible for stm32mp13 support.

Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Patrick Delaunay <patrick.delaunay@foss.st.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-5-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
index 448a2678dc624..16f4cad2fa551 100644
--- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
@@ -22,6 +22,7 @@ properties:
   compatible:
     enum:
       - st,stm32f4-otp
+      - st,stm32mp13-bsec
       - st,stm32mp15-bsec
 
   reg:
-- 
GitLab


From 06aac0e11960a7ddccc1888326b5906d017e0f24 Mon Sep 17 00:00:00 2001
From: Jiangshan Yi <yijiangshan@kylinos.cn>
Date: Fri, 18 Nov 2022 06:39:24 +0000
Subject: [PATCH 2169/4122] nvmem: stm32: fix spelling typo in comment

Fix spelling typo in comment.

Reported-by: k2ci <kernel-bot@kylinos.cn>
Signed-off-by: Jiangshan Yi <yijiangshan@kylinos.cn>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-6-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/stm32-romem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvmem/stm32-romem.c b/drivers/nvmem/stm32-romem.c
index 6de565639d5f4..d1d03c2ad0816 100644
--- a/drivers/nvmem/stm32-romem.c
+++ b/drivers/nvmem/stm32-romem.c
@@ -19,7 +19,7 @@
 #define STM32_SMC_WRITE_SHADOW		0x03
 #define STM32_SMC_READ_OTP		0x04
 
-/* shadow registers offest */
+/* shadow registers offset */
 #define STM32MP15_BSEC_DATA0		0x200
 
 struct stm32_romem_cfg {
-- 
GitLab


From 42b868b032901075f8e9bdcd8d700cb9c0d03ba5 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 18 Nov 2022 06:39:25 +0000
Subject: [PATCH 2170/4122] dt-bindings: nvmem: Fix example

Despite not being listed nor required within the top level nvmem yaml
file, the "compatible" property is mandatory and is actually enforced by
all the nvmem provider bindings.

Unfortunately, the lack of compatible in the nvmem.yaml to level
description file lead to the example not matching anything and thus not
being checked at all.

Let's pick a compatible almost randomly (one which is already used with
the qfprom label) to make the example at least valid on a semantic
point of view and getting it checked.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-7-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/nvmem/nvmem.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
index 1eb22dba364c7..0455506fc30fa 100644
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
@@ -67,6 +67,7 @@ examples:
       #include <dt-bindings/gpio/gpio.h>
 
       qfprom: eeprom@700000 {
+          compatible = "qcom,msm8974-qfprom", "qcom,qfprom";
           #address-cells = <1>;
           #size-cells = <1>;
           reg = <0x00700000 0x100000>;
-- 
GitLab


From fb817c4ef63e8cfb6e77ae4a2875ae854c80708f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 18 Nov 2022 06:39:26 +0000
Subject: [PATCH 2171/4122] nvmem: Kconfig: Fix spelling mistake "controlls" ->
 "controls"

There is a spelling mistake in a Kconfig description. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-8-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvmem/Kconfig b/drivers/nvmem/Kconfig
index ec8a49c040031..755f551426b55 100644
--- a/drivers/nvmem/Kconfig
+++ b/drivers/nvmem/Kconfig
@@ -164,7 +164,7 @@ config NVMEM_MICROCHIP_OTPC
 	depends on ARCH_AT91 || COMPILE_TEST
 	help
 	  This driver enable the OTP controller available on Microchip SAMA7G5
-	  SoCs. It controlls the access to the OTP memory connected to it.
+	  SoCs. It controls the access to the OTP memory connected to it.
 
 config NVMEM_MTK_EFUSE
 	tristate "Mediatek SoCs EFUSE support"
-- 
GitLab


From ada84d07af6097b2addd18262668ce6cb9e15206 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= <rafal@milecki.pl>
Date: Fri, 18 Nov 2022 06:39:27 +0000
Subject: [PATCH 2172/4122] nvmem: u-boot-env: add Broadcom format support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Broadcom uses U-Boot for a lot of their bcmbca familiy chipsets. They
decided to store U-Boot environment data inside U-Boot partition and to
use a custom header (with "uEnv" magic and env data length).

Add support for Broadcom's specific binding and their custom format.

Ref: 6b0584c19d87 ("dt-bindings: nvmem: u-boot,env: add Broadcom's variant binding")
Signed-off-by: Rafał Miłecki <rafal@milecki.pl>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-9-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/nvmem/u-boot-env.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/nvmem/u-boot-env.c b/drivers/nvmem/u-boot-env.c
index 4fdbdccebda16..29b1d87a3c515 100644
--- a/drivers/nvmem/u-boot-env.c
+++ b/drivers/nvmem/u-boot-env.c
@@ -16,6 +16,7 @@
 enum u_boot_env_format {
 	U_BOOT_FORMAT_SINGLE,
 	U_BOOT_FORMAT_REDUNDANT,
+	U_BOOT_FORMAT_BROADCOM,
 };
 
 struct u_boot_env {
@@ -40,6 +41,13 @@ struct u_boot_env_image_redundant {
 	uint8_t data[];
 } __packed;
 
+struct u_boot_env_image_broadcom {
+	__le32 magic;
+	__le32 len;
+	__le32 crc32;
+	uint8_t data[0];
+} __packed;
+
 static int u_boot_env_read(void *context, unsigned int offset, void *val,
 			   size_t bytes)
 {
@@ -138,6 +146,11 @@ static int u_boot_env_parse(struct u_boot_env *priv)
 		crc32_data_offset = offsetof(struct u_boot_env_image_redundant, data);
 		data_offset = offsetof(struct u_boot_env_image_redundant, data);
 		break;
+	case U_BOOT_FORMAT_BROADCOM:
+		crc32_offset = offsetof(struct u_boot_env_image_broadcom, crc32);
+		crc32_data_offset = offsetof(struct u_boot_env_image_broadcom, data);
+		data_offset = offsetof(struct u_boot_env_image_broadcom, data);
+		break;
 	}
 	crc32 = le32_to_cpu(*(__le32 *)(buf + crc32_offset));
 	crc32_data_len = priv->mtd->size - crc32_data_offset;
@@ -202,6 +215,7 @@ static const struct of_device_id u_boot_env_of_match_table[] = {
 	{ .compatible = "u-boot,env", .data = (void *)U_BOOT_FORMAT_SINGLE, },
 	{ .compatible = "u-boot,env-redundant-bool", .data = (void *)U_BOOT_FORMAT_REDUNDANT, },
 	{ .compatible = "u-boot,env-redundant-count", .data = (void *)U_BOOT_FORMAT_REDUNDANT, },
+	{ .compatible = "brcm,env", .data = (void *)U_BOOT_FORMAT_BROADCOM, },
 	{},
 };
 
-- 
GitLab


From 27dfc44e1ba30d2d49675e21918bf4b3b3b59fa6 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 18 Nov 2022 06:39:28 +0000
Subject: [PATCH 2173/4122] dt-bindings: nvmem: Introduce the nvmem-layout
 container

The nvmem devices description works like this:

* Most cases (EEPROM & co):

eeprom@x {
	 compatible = "<eeprom-compatible>";
	 ...
};

* MTD case:

flash@y {
	compatible = "<flash-compatible>";
	...
	otp {
		compatible = "user-otp"; /* or "factory-otp" */
		...
	};
};

In the former case, the nvmem device is "eeprom@x", while in the latter
case the nvmem device is "otp".

Nvmem devices can produce nvmem cells. The current way to describe nvmem
cells is to locate them by providing their static byte and bit offset
and length. These information are stored in subnodes of the nvmem
device.

It is now a fact that such description does not fit more advanced use
cases where the location or the size of the cells may vary. There are
currently three known situations which require being described
differently: Kontron's SL28 VPD, ONIE's TLV table and U-Boot's
environment variables.

Hence, we need a way to describe the parsers that must be used in order
to make the dynamic discovery of the nvmem cells. This new description
must fit both use cases (the generic situation and the MTD case).

Let's create in both cases a container node named nvmem-layout whose
content will depend on the parser. Right now nvmem-layout.yaml is
"empty", but references to additional layout parser bindings will be
inserted in the near future. The final goal being something that looks
like:

* Most cases (EEPROM & co):

eeprom@x {
	compatible = "<eeprom-compatible>";
	...
	nvmem-layout {
		compatible = "<parser-compatible>";
		...
	};
};

* MTD case:

flash@y {
	compatible = "<flash-compatible>";
	...
	otp {
		compatible = "user-otp"; /* or "factory-otp" */
		...
		nvmem-layout {
			compatible = "<parser-compatible>";
			...
		};
	};
};

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-10-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/nvmem/layouts/nvmem-layout.yaml  | 30 +++++++++++++++++++
 .../devicetree/bindings/nvmem/nvmem.yaml      |  7 +++++
 2 files changed, 37 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml

diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
new file mode 100644
index 0000000000000..ecc7c37cbc1fc
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
@@ -0,0 +1,30 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/layouts/nvmem-layout.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVMEM (Non Volatile Memory) layouts
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+  - Michael Walle <michael@walle.cc>
+  - Miquel Raynal <miquel.raynal@bootlin.com>
+
+description: |
+  Most NVMEM layouts are static and thus do not require additional description
+  besides the bytes/bits offset and length. Other layouts can be less statically
+  define and might require dynamic reading of the NVMEM device in order to
+  perform their parsing. The nvmem-layout container is here to describe these.
+
+properties:
+  compatible: true
+
+  '#address-cells': false
+
+  '#size-cells': false
+
+required:
+  - compatible
+
+unevaluatedProperties: false
diff --git a/Documentation/devicetree/bindings/nvmem/nvmem.yaml b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
index 0455506fc30fa..75bb93dda9df1 100644
--- a/Documentation/devicetree/bindings/nvmem/nvmem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/nvmem.yaml
@@ -39,6 +39,13 @@ properties:
       when it's driven low (logical '0') to allow writing.
     maxItems: 1
 
+  nvmem-layout:
+    $ref: /schemas/nvmem/layouts/nvmem-layout.yaml
+    description:
+      Alternative to the statically defined nvmem cells, this
+      container may reference more advanced (dynamic) layout
+      parsers.
+
 patternProperties:
   "@[0-9a-f]+(,[0-7])?$":
     type: object
-- 
GitLab


From ca104926184db10d486e91e64cec725e6c2bd8ae Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 18 Nov 2022 06:39:29 +0000
Subject: [PATCH 2174/4122] dt-bindings: eeprom: Inherit from nvmem.yaml

EEPROMs can be nvmem providers. Let's make all EEPROM bindings
reference nvmem.yaml as they should, so that nvmem cells and layout
parsers can be safely described within the EEPROM nodes.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-11-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/eeprom/at24.yaml           | 5 ++++-
 Documentation/devicetree/bindings/eeprom/at25.yaml           | 1 +
 .../devicetree/bindings/eeprom/microchip,93lc46b.yaml        | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/eeprom/at24.yaml b/Documentation/devicetree/bindings/eeprom/at24.yaml
index d14e0accbda81..84af0d5f52aaf 100644
--- a/Documentation/devicetree/bindings/eeprom/at24.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at24.yaml
@@ -10,6 +10,9 @@ title: I2C EEPROMs compatible with Atmel's AT24
 maintainers:
   - Bartosz Golaszewski <bgolaszewski@baylibre.com>
 
+allOf:
+  - $ref: /schemas/nvmem/nvmem.yaml
+
 select:
   properties:
     compatible:
@@ -183,7 +186,7 @@ required:
   - compatible
   - reg
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/eeprom/at25.yaml b/Documentation/devicetree/bindings/eeprom/at25.yaml
index 8b1c997caac12..0f5a8ef996d33 100644
--- a/Documentation/devicetree/bindings/eeprom/at25.yaml
+++ b/Documentation/devicetree/bindings/eeprom/at25.yaml
@@ -104,6 +104,7 @@ required:
 
 allOf:
   - $ref: /schemas/spi/spi-peripheral-props.yaml#
+  - $ref: /schemas/nvmem/nvmem.yaml
   - if:
       properties:
         compatible:
diff --git a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
index 0c2f5ddb79c5a..64cfd971c9c5e 100644
--- a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
+++ b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
@@ -47,6 +47,7 @@ required:
 
 allOf:
   - $ref: /schemas/spi/spi-peripheral-props.yaml#
+  - $ref: /schemas/nvmem/nvmem.yaml
 
 unevaluatedProperties: false
 
-- 
GitLab


From e33cfae0eff66dca02ed482cf3ef01bb61f84fb0 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 18 Nov 2022 06:39:30 +0000
Subject: [PATCH 2175/4122] dt-bindings: nvmem: add YAML schema for the sl28
 vpd layout

Add a schema for the NVMEM layout on Kontron's sl28 boards.

Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-12-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../nvmem/layouts/kontron,sl28-vpd.yaml       | 64 +++++++++++++++++++
 .../bindings/nvmem/layouts/nvmem-layout.yaml  |  3 +
 2 files changed, 67 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml

diff --git a/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml b/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
new file mode 100644
index 0000000000000..c713e23819f11
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/layouts/kontron,sl28-vpd.yaml
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/layouts/kontron,sl28-vpd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVMEM layout of the Kontron SMARC-sAL28 vital product data
+
+maintainers:
+  - Michael Walle <michael@walle.cc>
+
+description:
+  The vital product data (VPD) of the sl28 boards contains a serial
+  number and a base MAC address. The actual MAC addresses for the
+  on-board ethernet devices are derived from this base MAC address by
+  adding an offset.
+
+select: false
+
+properties:
+  compatible:
+    const: kontron,sl28-vpd
+
+  serial-number:
+    type: object
+    description: The board's serial number
+
+    additionalProperties: false
+
+  base-mac-address:
+    type: object
+    description:
+      Base MAC address for all on-module network interfaces. The first
+      argument of the phandle will be treated as an offset.
+
+    properties:
+      "#nvmem-cell-cells":
+        const: 1
+
+    additionalProperties: false
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+      otp-1 {
+          compatible = "user-otp";
+
+          nvmem-layout {
+              compatible = "kontron,sl28-vpd";
+
+              serial_number: serial-number {
+              };
+
+              base_mac_address: base-mac-address {
+                  #nvmem-cell-cells = <1>;
+              };
+          };
+      };
+
+...
diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
index ecc7c37cbc1fc..f64ea2fa362d1 100644
--- a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
+++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
@@ -17,6 +17,9 @@ description: |
   define and might require dynamic reading of the NVMEM device in order to
   perform their parsing. The nvmem-layout container is here to describe these.
 
+oneOf:
+  - $ref: kontron,sl28-vpd.yaml
+
 properties:
   compatible: true
 
-- 
GitLab


From b6c88f10e8bb20f0ccaabe4c0a4ac3c6c1fb8768 Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 18 Nov 2022 06:39:31 +0000
Subject: [PATCH 2176/4122] dt-bindings: vendor-prefixes: Add ONIE
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As described on their website (see link below),

   "The Open Network Install Environment (ONIE) is an open source
    initiative that defines an open “install environment” for modern
    networking hardware."

It is not a proper corporation per-se but rather more a group which
tries to spread the use of open source standards in the networking
hardware world.

Link: https://opencomputeproject.github.io/onie/
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-13-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 6e323a380294a..65a74026cf2b7 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -927,6 +927,8 @@ patternProperties:
     description: One Laptop Per Child
   "^oneplus,.*":
     description: OnePlus Technology (Shenzhen) Co., Ltd.
+  "^onie,.*":
+    description: Open Network Install Environment group
   "^onion,.*":
     description: Onion Corporation
   "^onnn,.*":
-- 
GitLab


From ce9c0b06abc44d51e81ce36b83a81e960034f3ee Mon Sep 17 00:00:00 2001
From: Miquel Raynal <miquel.raynal@bootlin.com>
Date: Fri, 18 Nov 2022 06:39:32 +0000
Subject: [PATCH 2177/4122] dt-bindings: nvmem: add YAML schema for the ONIE
 tlv layout

Add a schema for the ONIE tlv NVMEM layout that can be found on any ONIE
compatible networking device.

Describe all the possible NVMEM cells that can be produced by this
layout parser.

Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118063932.6418-14-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/nvmem/layouts/nvmem-layout.yaml  |   1 +
 .../nvmem/layouts/onie,tlv-layout.yaml        | 147 ++++++++++++++++++
 2 files changed, 148 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml

diff --git a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
index f64ea2fa362d1..8512ee538c4c1 100644
--- a/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
+++ b/Documentation/devicetree/bindings/nvmem/layouts/nvmem-layout.yaml
@@ -19,6 +19,7 @@ description: |
 
 oneOf:
   - $ref: kontron,sl28-vpd.yaml
+  - $ref: onie,tlv-layout.yaml
 
 properties:
   compatible: true
diff --git a/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml b/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml
new file mode 100644
index 0000000000000..5a0e7671aa3f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/nvmem/layouts/onie,tlv-layout.yaml
@@ -0,0 +1,147 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/layouts/onie,tlv-layout.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVMEM layout of the ONIE tlv table
+
+maintainers:
+  - Miquel Raynal <miquel.raynal@bootlin.com>
+
+description:
+  Modern networking hardware implementing the Open Compute Project ONIE
+  infrastructure shall provide a non-volatile memory with a table whose the
+  content is well specified and gives many information about the manufacturer
+  (name, country of manufacture, etc) as well as device caracteristics (serial
+  number, hardware version, mac addresses, etc). The underlaying device type
+  (flash, EEPROM,...) is not specified. The exact location of each value is also
+  dynamic and should be discovered at run time because it depends on the
+  parameters the manufacturer decided to embed.
+
+select: false
+
+properties:
+  compatible:
+    const: onie,tlv-layout
+
+  product-name:
+    type: object
+    additionalProperties: false
+
+  part-number:
+    type: object
+    additionalProperties: false
+
+  serial-number:
+    type: object
+    additionalProperties: false
+
+  mac-address:
+    type: object
+    description:
+      Base MAC address for all on-module network interfaces. The first
+      argument of the phandle will be treated as an offset.
+
+    properties:
+      "#nvmem-cell-cells":
+        const: 1
+
+    additionalProperties: false
+
+  manufacture-date:
+    type: object
+    additionalProperties: false
+
+  device-version:
+    type: object
+    additionalProperties: false
+
+  label-revision:
+    type: object
+    additionalProperties: false
+
+  platforn-name:
+    type: object
+    additionalProperties: false
+
+  onie-version:
+    type: object
+    additionalProperties: false
+
+  num-macs:
+    type: object
+    additionalProperties: false
+
+  manufacturer:
+    type: object
+    additionalProperties: false
+
+  country-code:
+    type: object
+    additionalProperties: false
+
+  vendor:
+    type: object
+    additionalProperties: false
+
+  diag-version:
+    type: object
+    additionalProperties: false
+
+  service-tag:
+    type: object
+    additionalProperties: false
+
+  vendor-extension:
+    type: object
+    additionalProperties: false
+
+required:
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        eeprom@56 {
+            compatible = "atmel,24c64";
+            read-only;
+            reg = <0x56>;
+
+            nvmem-layout {
+                compatible = "onie,tlv-layout";
+
+                serial-number {
+                };
+            };
+        };
+    };
+
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        flash@0 {
+            compatible = "m25p80", "jedec,spi-nor";
+            reg = <0>;
+
+            otp {
+                compatible = "user-otp";
+
+                nvmem-layout {
+                    compatible = "onie,tlv-layout";
+
+                    mac-address {
+                        #nvmem-cell-cells = <1>;
+                    };
+                };
+            };
+        };
+    };
+...
-- 
GitLab


From 97e1a5309190aca528c7e12697a898bda793a460 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:35 +0000
Subject: [PATCH 2178/4122] slimbus: qcom-ngd-ctrl: check for device runtime PM
 status during ISR

Slimbus core interrupt is getting fired after suspend. At this point
ADSP slimbus hardware is off with gated clocks which is leading to an
unclocked access when HLOS slimbus tried to read the interrupt
status register in the ISR.

Co-developed-by: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Signed-off-by: Chandana Kishori Chiluveru <cchiluve@codeaurora.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-2-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 76c5e446d2433..964adf77b51b9 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -763,7 +763,14 @@ static irqreturn_t qcom_slim_ngd_interrupt(int irq, void *d)
 {
 	struct qcom_slim_ngd_ctrl *ctrl = d;
 	void __iomem *base = ctrl->ngd->base;
-	u32 stat = readl(base + NGD_INT_STAT);
+	u32 stat;
+
+	if (pm_runtime_suspended(ctrl->ctrl.dev)) {
+		dev_warn_once(ctrl->dev, "Interrupt received while suspended\n");
+		return IRQ_NONE;
+	}
+
+	stat = readl(base + NGD_INT_STAT);
 
 	if ((stat & NGD_INT_MSG_BUF_CONTE) ||
 		(stat & NGD_INT_MSG_TX_INVAL) || (stat & NGD_INT_DEV_ERR) ||
-- 
GitLab


From 63c60a226c908f46589b57b5bcb220ca82a76cc5 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:36 +0000
Subject: [PATCH 2179/4122] slimbus: qcom-ngd-ctrl: drop PM runtime counter on
 transfer error paths

If transfer in qcom_slim_ngd_xfer_msg_sync() fails, we need to drop the
PM runtime usage counter to have it balanced.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-3-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 964adf77b51b9..d48e58ca5d585 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -919,21 +919,29 @@ static int qcom_slim_ngd_xfer_msg_sync(struct slim_controller *ctrl,
 	DECLARE_COMPLETION_ONSTACK(done);
 	int ret, timeout;
 
-	pm_runtime_get_sync(ctrl->dev);
+	ret = pm_runtime_get_sync(ctrl->dev);
+	if (ret < 0)
+		goto pm_put;
 
 	txn->comp = &done;
 
 	ret = qcom_slim_ngd_xfer_msg(ctrl, txn);
 	if (ret)
-		return ret;
+		goto pm_put;
 
 	timeout = wait_for_completion_timeout(&done, HZ);
 	if (!timeout) {
 		dev_err(ctrl->dev, "TX timed out:MC:0x%x,mt:0x%x", txn->mc,
 				txn->mt);
-		return -ETIMEDOUT;
+		ret = -ETIMEDOUT;
+		goto pm_put;
 	}
 	return 0;
+
+pm_put:
+	pm_runtime_put(ctrl->dev);
+
+	return ret;
 }
 
 static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt)
-- 
GitLab


From 434d25728171aa72ed1b1c4d248527cbf6b6c99b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:37 +0000
Subject: [PATCH 2180/4122] slimbus: stream: handle unsupported bitrates for
 presence rate

Handle errors of getting presence rate for unsupported stream bitrate,
instead of sending -EINVAL in change content message.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@somainline.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-4-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/stream.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c
index 73a2aa3629572..7e9c818e66c13 100644
--- a/drivers/slimbus/stream.c
+++ b/drivers/slimbus/stream.c
@@ -204,7 +204,7 @@ int slim_stream_prepare(struct slim_stream_runtime *rt,
 {
 	struct slim_controller *ctrl = rt->dev->ctrl;
 	struct slim_port *port;
-	int num_ports, i, port_id;
+	int num_ports, i, port_id, prrate;
 
 	if (rt->ports) {
 		dev_err(&rt->dev->dev, "Stream already Prepared\n");
@@ -221,6 +221,13 @@ int slim_stream_prepare(struct slim_stream_runtime *rt,
 	rt->bps = cfg->bps;
 	rt->direction = cfg->direction;
 
+	prrate = slim_get_prate_code(cfg->rate);
+	if (prrate < 0) {
+		dev_err(&rt->dev->dev, "Cannot get presence rate for rate %d Hz\n",
+			cfg->rate);
+		return prrate;
+	}
+
 	if (cfg->rate % ctrl->a_framer->superfreq) {
 		/*
 		 * data rate not exactly multiple of super frame,
@@ -241,7 +248,7 @@ int slim_stream_prepare(struct slim_stream_runtime *rt,
 		port = &rt->ports[i];
 		port->state = SLIM_PORT_DISCONNECTED;
 		port->id = port_id;
-		port->ch.prrate = slim_get_prate_code(cfg->rate);
+		port->ch.prrate = prrate;
 		port->ch.id = cfg->chs[i];
 		port->ch.data_fmt = SLIM_CH_DATA_FMT_NOT_DEFINED;
 		port->ch.aux_fmt = SLIM_CH_AUX_FMT_NOT_APPLICABLE;
-- 
GitLab


From 4594cb4b76c6cf9acf81d3044c6a6817ed4a3781 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:38 +0000
Subject: [PATCH 2181/4122] slimbus: qcom-ngd-ctrl: add support for 44.1 Khz
 frequency

Add support for 44.1Khz frequency by dynamically calculating the slimbus
parameters instead of statically defining them.

Co-developed-by: Prudhvi Yarlagadda <pyarlaga@codeaurora.org>
Signed-off-by: Prudhvi Yarlagadda <pyarlaga@codeaurora.org>
Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-5-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 64 ++++++++++++++++++++++++++++++---
 1 file changed, 59 insertions(+), 5 deletions(-)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index d48e58ca5d585..22720ad4c22d7 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -944,6 +944,54 @@ pm_put:
 	return ret;
 }
 
+static int qcom_slim_calc_coef(struct slim_stream_runtime *rt, int *exp)
+{
+	struct slim_controller *ctrl = rt->dev->ctrl;
+	int coef;
+
+	if (rt->ratem * ctrl->a_framer->superfreq < rt->rate)
+		rt->ratem++;
+
+	coef = rt->ratem;
+	*exp = 0;
+
+	/*
+	 * CRM = Cx(2^E) is the formula we are using.
+	 * Here C is the coffecient and E is the exponent.
+	 * CRM is the Channel Rate Multiplier.
+	 * Coefficeint should be either 1 or 3 and exponenet
+	 * should be an integer between 0 to 9, inclusive.
+	 */
+	while (1) {
+		while ((coef & 0x1) != 0x1) {
+			coef >>= 1;
+			*exp = *exp + 1;
+		}
+
+		if (coef <= 3)
+			break;
+
+		coef++;
+	}
+
+	/*
+	 * we rely on the coef value (1 or 3) to set a bit
+	 * in the slimbus message packet. This bit is
+	 * BIT(5) which is the segment rate coefficient.
+	 */
+	if (coef == 1) {
+		if (*exp > 9)
+			return -EIO;
+		coef = 0;
+	} else {
+		if (*exp > 8)
+			return -EIO;
+		coef = 1;
+	}
+
+	return coef;
+}
+
 static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt)
 {
 	struct slim_device *sdev = rt->dev;
@@ -967,16 +1015,22 @@ static int qcom_slim_ngd_enable_stream(struct slim_stream_runtime *rt)
 		struct slim_port *port = &rt->ports[i];
 
 		if (txn.msg->num_bytes == 0) {
-			int seg_interval = SLIM_SLOTS_PER_SUPERFRAME/rt->ratem;
-			int exp;
+			int exp = 0, coef = 0;
 
 			wbuf[txn.msg->num_bytes++] = sdev->laddr;
 			wbuf[txn.msg->num_bytes] = rt->bps >> 2 |
 						   (port->ch.aux_fmt << 6);
 
-			/* Data channel segment interval not multiple of 3 */
-			exp = seg_interval % 3;
-			if (exp)
+			/* calculate coef dynamically */
+			coef = qcom_slim_calc_coef(rt, &exp);
+			if (coef < 0) {
+				dev_err(&sdev->dev,
+				"%s: error calculating coef %d\n", __func__,
+									coef);
+				return -EIO;
+			}
+
+			if (coef)
 				wbuf[txn.msg->num_bytes] |= BIT(5);
 
 			txn.msg->num_bytes++;
-- 
GitLab


From a82b1ec34e9bea94058f429560e311e5ca634356 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:39 +0000
Subject: [PATCH 2182/4122] slimbus: stream: add checks for invalid
 unprepare/disable usage

slim_disable_stream() and slim_stream_unprepare() are exported, so add
sanity checks preventing unmatched/invalid calls.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-6-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/stream.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/slimbus/stream.c b/drivers/slimbus/stream.c
index 7e9c818e66c13..1d6b386579173 100644
--- a/drivers/slimbus/stream.c
+++ b/drivers/slimbus/stream.c
@@ -414,6 +414,9 @@ int slim_stream_disable(struct slim_stream_runtime *stream)
 	struct slim_controller *ctrl = stream->dev->ctrl;
 	int ret, i;
 
+	if (!stream->ports || !stream->num_ports)
+		return -EINVAL;
+
 	if (ctrl->disable_stream)
 		ctrl->disable_stream(stream);
 
@@ -445,6 +448,9 @@ int slim_stream_unprepare(struct slim_stream_runtime *stream)
 {
 	int i;
 
+	if (!stream->ports || !stream->num_ports)
+		return -EINVAL;
+
 	for (i = 0; i < stream->num_ports; i++)
 		slim_disconnect_port(stream, &stream->ports[i]);
 
-- 
GitLab


From 8c8112d7c442579cca821836bbcc46b747ceca74 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:40 +0000
Subject: [PATCH 2183/4122] slimbus: qcom-ctrl: drop unneeded qcom,apq8064-slim
 compatible

Bindings require usage of fallback "qcom,slim" compatible, so
"qcom,apq8064-slim" is redundant.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-7-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ctrl.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c
index c0c4f895d76e2..bb106eab8ae27 100644
--- a/drivers/slimbus/qcom-ctrl.c
+++ b/drivers/slimbus/qcom-ctrl.c
@@ -718,7 +718,6 @@ static const struct dev_pm_ops qcom_slim_dev_pm_ops = {
 
 static const struct of_device_id qcom_slim_dt_match[] = {
 	{ .compatible = "qcom,slim", },
-	{ .compatible = "qcom,apq8064-slim", },
 	{}
 };
 
-- 
GitLab


From 3d58b933c9eb2da5745c485bc7008d29c0eaddac Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:41 +0000
Subject: [PATCH 2184/4122] slimbus: qcom-ctrl: use
 devm_platform_ioremap_resource_byname()

Simplify the code with devm_platform_ioremap_resource_byname().

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-8-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ctrl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/slimbus/qcom-ctrl.c b/drivers/slimbus/qcom-ctrl.c
index bb106eab8ae27..400b7b385a443 100644
--- a/drivers/slimbus/qcom-ctrl.c
+++ b/drivers/slimbus/qcom-ctrl.c
@@ -488,7 +488,6 @@ static int qcom_slim_probe(struct platform_device *pdev)
 {
 	struct qcom_slim_ctrl *ctrl;
 	struct slim_controller *sctrl;
-	struct resource *slim_mem;
 	int ret, ver;
 
 	ctrl = devm_kzalloc(&pdev->dev, sizeof(*ctrl), GFP_KERNEL);
@@ -519,8 +518,7 @@ static int qcom_slim_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ctrl);
 	dev_set_drvdata(ctrl->dev, ctrl);
 
-	slim_mem = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ctrl");
-	ctrl->base = devm_ioremap_resource(ctrl->dev, slim_mem);
+	ctrl->base = devm_platform_ioremap_resource_byname(pdev, "ctrl");
 	if (IS_ERR(ctrl->base))
 		return PTR_ERR(ctrl->base);
 
-- 
GitLab


From 1d01bcb4659cfca87d92075b87cdadb0a9897d14 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:42 +0000
Subject: [PATCH 2185/4122] slimbus: qcom-ngd-ctrl: use
 devm_platform_get_and_ioremap_resource()

Simplify the code with devm_platform_get_and_ioremap_resource().

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-9-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index 22720ad4c22d7..a6f3b6860d8f0 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1597,7 +1597,6 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct qcom_slim_ngd_ctrl *ctrl;
-	struct resource *res;
 	int ret;
 	struct pdr_service *pds;
 
@@ -1607,8 +1606,7 @@ static int qcom_slim_ngd_ctrl_probe(struct platform_device *pdev)
 
 	dev_set_drvdata(dev, ctrl);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ctrl->base = devm_ioremap_resource(dev, res);
+	ctrl->base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(ctrl->base))
 		return PTR_ERR(ctrl->base);
 
-- 
GitLab


From 319a538d618fea33434387c4502361bb8f047e11 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:43 +0000
Subject: [PATCH 2186/4122] slimbus: qcom-ngd-ctrl: reinit the reconf
 completion flag

Reinitialize the reconf completion flag when ngd registers are
not retainied or when enumeration is lost for ngd.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-10-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/slimbus/qcom-ngd-ctrl.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/slimbus/qcom-ngd-ctrl.c b/drivers/slimbus/qcom-ngd-ctrl.c
index a6f3b6860d8f0..77aa6d26476cd 100644
--- a/drivers/slimbus/qcom-ngd-ctrl.c
+++ b/drivers/slimbus/qcom-ngd-ctrl.c
@@ -1205,6 +1205,12 @@ static int qcom_slim_ngd_power_up(struct qcom_slim_ngd_ctrl *ctrl)
 		return 0;
 	}
 
+	/*
+	 * Reinitialize only when registers are not retained or when enumeration
+	 * is lost for ngd.
+	 */
+	reinit_completion(&ctrl->reconf);
+
 	writel_relaxed(DEF_NGD_INT_MASK, ngd->base + NGD_INT_EN);
 	rx_msgq = readl_relaxed(ngd->base + NGD_RX_MSGQ_CFG);
 
-- 
GitLab


From c53627f83abc9fb4ac5cbd9dc1c898008bcc2ac8 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:44 +0000
Subject: [PATCH 2187/4122] dt-bindings: slimbus: convert bus description to DT
 schema

Convert the SLIMbus bus description bindings to DT Schema.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-11-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../devicetree/bindings/slimbus/bus.txt       | 60 ------------
 .../bindings/slimbus/slim-ngd-qcom-ctrl.txt   |  2 -
 .../bindings/slimbus/slim-qcom-ctrl.txt       |  3 -
 .../devicetree/bindings/slimbus/slimbus.yaml  | 95 +++++++++++++++++++
 4 files changed, 95 insertions(+), 65 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/slimbus/bus.txt
 create mode 100644 Documentation/devicetree/bindings/slimbus/slimbus.yaml

diff --git a/Documentation/devicetree/bindings/slimbus/bus.txt b/Documentation/devicetree/bindings/slimbus/bus.txt
deleted file mode 100644
index bbe871f82a8b9..0000000000000
--- a/Documentation/devicetree/bindings/slimbus/bus.txt
+++ /dev/null
@@ -1,60 +0,0 @@
-SLIM(Serial Low Power Interchip Media Bus) bus
-
-SLIMbus is a 2-wire bus, and is used to communicate with peripheral
-components like audio-codec.
-
-Required property for SLIMbus controller node:
-- compatible	- name of SLIMbus controller
-
-Child nodes:
-Every SLIMbus controller node can contain zero or more child nodes
-representing slave devices on the bus. Every SLIMbus slave device is
-uniquely determined by the enumeration address containing 4 fields:
-Manufacturer ID, Product code, Device index, and Instance value for
-the device.
-If child node is not present and it is instantiated after device
-discovery (slave device reporting itself present).
-
-In some cases it may be necessary to describe non-probeable device
-details such as non-standard ways of powering up a device. In
-such cases, child nodes for those devices will be present as
-slaves of the SLIMbus controller, as detailed below.
-
-Required property for SLIMbus child node if it is present:
-- reg		- Should be ('Device index', 'Instance ID') from SLIMbus
-		  Enumeration  Address.
-		  Device Index Uniquely identifies multiple Devices within
-		  a single Component.
-		  Instance ID Is for the cases where multiple Devices of the
-		  same type or Class are attached to the bus.
-
-- compatible	-"slimMID,PID". The textual representation of Manufacturer ID,
-	 	  Product Code, shall be in lower case hexadecimal with leading
-		  zeroes suppressed
-
-Optional property for SLIMbus child node if it is present:
-- slim-ifc-dev	- Should be phandle to SLIMBus Interface device.
-		  Required for devices which deal with streams.
-
-SLIMbus example for Qualcomm's slimbus manager component:
-
-	slim@28080000 {
-		compatible = "qcom,apq8064-slim", "qcom,slim";
-		reg = <0x28080000 0x2000>,
-		interrupts = <0 33 0>;
-		clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>;
-		clock-names = "iface", "core";
-		#address-cells = <2>;
-		#size-cell = <0>;
-
-		codec_ifd: ifd@0,0{
-			compatible = "slim217,60";
-			reg = <0 0>;
-		};
-
-		codec: wcd9310@1,0{
-			compatible = "slim217,60";
-			reg = <1 0>;
-			slim-ifc-dev  = <&codec_ifd>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt
index e94a2ad3a710d..7c3d9eb6af5d1 100644
--- a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt
+++ b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt
@@ -5,8 +5,6 @@ with SLIMBus slaves directly over the bus using messaging interface and
 communicating with master component residing on ADSP for bandwidth and
 data-channel management
 
-Please refer to slimbus/bus.txt for details of the common SLIMBus bindings.
-
 - compatible:
 	Usage: required
 	Value type: <stringlist>
diff --git a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt
index 922dcb8ff24ab..6d955e129f908 100644
--- a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt
+++ b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt
@@ -4,9 +4,6 @@ master component.
 
 Required properties:
 
- - #address-cells - refer to Documentation/devicetree/bindings/slimbus/bus.txt
- - #size-cells	- refer to Documentation/devicetree/bindings/slimbus/bus.txt
-
  - reg : Offset and length of the register region(s) for the device
  - reg-names : Register region name(s) referenced in reg above
 	 Required register resource entries are:
diff --git a/Documentation/devicetree/bindings/slimbus/slimbus.yaml b/Documentation/devicetree/bindings/slimbus/slimbus.yaml
new file mode 100644
index 0000000000000..22513fb7c59a8
--- /dev/null
+++ b/Documentation/devicetree/bindings/slimbus/slimbus.yaml
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/slimbus/slimbus.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SLIM (Serial Low Power Interchip Media) bus
+
+maintainers:
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description:
+  SLIMbus is a 2-wire bus, and is used to communicate with peripheral
+  components like audio-codec.
+
+properties:
+  $nodename:
+    pattern: "^slim(@.*|-[0-9a-f])*$"
+
+  "#address-cells":
+    const: 2
+
+  "#size-cells":
+    const: 0
+
+patternProperties:
+  "^.*@[0-9a-f]+,[0-9a-f]+$":
+    type: object
+    description: |
+      Every SLIMbus controller node can contain zero or more child nodes
+      representing slave devices on the bus. Every SLIMbus slave device is
+      uniquely determined by the enumeration address containing 4 fields::
+      Manufacturer ID, Product code, Device index, and Instance value for the
+      device.
+
+      If child node is not present and it is instantiated after device
+      discovery (slave device reporting itself present).
+
+      In some cases it may be necessary to describe non-probeable device
+      details such as non-standard ways of powering up a device. In such cases,
+      child nodes for those devices will be present as slaves of the SLIMbus
+      controller.
+
+    properties:
+      compatible:
+        pattern: "^slim[0-9a-f]+,[0-9a-f]+$"
+
+      reg:
+        maxItems: 1
+        description: |
+          Pair of (device index, instande ID), where::
+           - Device index, which uniquely identifies multiple devices within a
+             single component.
+           - Instance ID, can be used for the cases where multiple devices of
+             the same type or class are attached to the bus.
+
+    required:
+      - compatible
+      - reg
+
+    additionalProperties: true
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+
+additionalProperties: true
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-msm8960.h>
+    #include <dt-bindings/clock/qcom,lcc-msm8960.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    soc {
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        slim@28080000 {
+            compatible = "qcom,apq8064-slim", "qcom,slim";
+            reg = <0x28080000 0x2000>, <0x80207c 4>;
+            reg-names = "ctrl", "slew";
+            interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>;
+            clock-names = "iface", "core";
+            #address-cells = <2>;
+            #size-cells = <0>;
+
+            audio-codec@1,0 {
+                compatible = "slim217,60";
+                reg = <1 0>;
+            };
+        };
+    };
-- 
GitLab


From 717bd3dfc4ced0a12bc177b267a0c6fd23eec620 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:45 +0000
Subject: [PATCH 2188/4122] dt-bindings: slimbus: qcom,slim: convert to DT
 schema

Convert the Qualcomm SoC SLIMbus controller bindings to DT Schema.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-12-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/slimbus/qcom,slim.yaml           | 86 +++++++++++++++++++
 .../bindings/slimbus/slim-qcom-ctrl.txt       | 36 --------
 2 files changed, 86 insertions(+), 36 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/slimbus/qcom,slim.yaml
 delete mode 100644 Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt

diff --git a/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml b/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml
new file mode 100644
index 0000000000000..883bda58ca977
--- /dev/null
+++ b/Documentation/devicetree/bindings/slimbus/qcom,slim.yaml
@@ -0,0 +1,86 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/slimbus/qcom,slim.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SoC SLIMbus controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description:
+  SLIMbus controller used when applications processor controls SLIMbus master
+  component.
+
+allOf:
+  - $ref: slimbus.yaml#
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - qcom,apq8064-slim
+      - const: qcom,slim
+
+  reg:
+    items:
+      - description: Physical address of controller register blocks
+      - description: SLEW RATE register
+
+  reg-names:
+    items:
+      - const: ctrl
+      - const: slew
+
+  clocks:
+    items:
+      - description: Interface clock for this controller
+      - description: Interrupt for controller core's BAM
+
+  clock-names:
+    items:
+      - const: iface
+      - const: core
+
+  interrupts:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - reg-names
+  - clocks
+  - clock-names
+  - interrupts
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-msm8960.h>
+    #include <dt-bindings/clock/qcom,lcc-msm8960.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    soc {
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        slim@28080000 {
+            compatible = "qcom,apq8064-slim", "qcom,slim";
+            reg = <0x28080000 0x2000>, <0x80207c 4>;
+            reg-names = "ctrl", "slew";
+            interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+            clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>;
+            clock-names = "iface", "core";
+            #address-cells = <2>;
+            #size-cells = <0>;
+
+            audio-codec@1,0 {
+                compatible = "slim217,60";
+                reg = <1 0>;
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt
deleted file mode 100644
index 6d955e129f908..0000000000000
--- a/Documentation/devicetree/bindings/slimbus/slim-qcom-ctrl.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Qualcomm SLIMbus controller
-This controller is used if applications processor driver controls SLIMbus
-master component.
-
-Required properties:
-
- - reg : Offset and length of the register region(s) for the device
- - reg-names : Register region name(s) referenced in reg above
-	 Required register resource entries are:
-	 "ctrl": Physical address of controller register blocks
- 	 "slew": required for "qcom,apq8064-slim" SOC.
- - compatible : should be "qcom,<SOC-NAME>-slim" for SOC specific compatible
- 		followed by "qcom,slim" for fallback.
- - interrupts : Interrupt number used by this controller
- - clocks : Interface and core clocks used by this SLIMbus controller
- - clock-names : Required clock-name entries are:
-	"iface" : Interface clock for this controller
-	"core" : Interrupt for controller core's BAM
-
-Example:
-
-	slim@28080000 {
-		compatible = "qcom,apq8064-slim", "qcom,slim";
-		reg = <0x28080000 0x2000>, <0x80207C 4>;
-		reg-names = "ctrl", "slew";
-		interrupts = <0 33 0>;
-		clocks = <&lcc SLIMBUS_SRC>, <&lcc AUDIO_SLIMBUS_CLK>;
-		clock-names = "iface", "core";
-		#address-cells = <2>;
-		#size-cell = <0>;
-
-		wcd9310: audio-codec@1,0{
-			compatible = "slim217,60";
-			reg = <1 0>;
-		};
-	};
-- 
GitLab


From 5f115bb92a631b01bee7ca2310c6c353a770656e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 18 Nov 2022 06:52:46 +0000
Subject: [PATCH 2189/4122] dt-bindings: slimbus: qcom,slim-ngd: convert to DT
 schema

Convert the Qualcomm SoC SLIMBus Non Generic Device (NGD) controller
bindings to DT Schema.

During conversion add iommus already present in DTS and extend the
example based on SDM845.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221118065246.6835-13-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/slimbus/qcom,slim-ngd.yaml       | 120 ++++++++++++++++++
 .../bindings/slimbus/slim-ngd-qcom-ctrl.txt   |  82 ------------
 2 files changed, 120 insertions(+), 82 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml
 delete mode 100644 Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt

diff --git a/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml b/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml
new file mode 100644
index 0000000000000..abf61c15246eb
--- /dev/null
+++ b/Documentation/devicetree/bindings/slimbus/qcom,slim-ngd.yaml
@@ -0,0 +1,120 @@
+# SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/slimbus/qcom,slim-ngd.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm SoC SLIMBus Non Generic Device (NGD) Controller
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+  - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description:
+  SLIMBus NGD controller is a light-weight driver responsible for communicating
+  with SLIMBus slaves directly over the bus using messaging interface and
+  communicating with master component residing on ADSP for bandwidth and
+  data-channel management
+
+properties:
+  compatible:
+    enum:
+      - qcom,slim-ngd-v1.5.0        # for MSM8996
+      - qcom,slim-ngd-v2.1.0        # for SDM845
+
+  reg:
+    maxItems: 1
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  dmas:
+    maxItems: 2
+
+  dma-names:
+    items:
+      - const: rx
+      - const: tx
+
+  interrupts:
+    maxItems: 1
+
+  iommus:
+    maxItems: 1
+
+patternProperties:
+  "^slim@[0-9a-f]+$":
+    type: object
+    $ref: slimbus.yaml#
+    description:
+      Each subnode represents an instance of NGD
+
+    properties:
+      reg:
+        maxItems: 1
+
+    unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+  - dmas
+  - dma-names
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    slim-ngd@171c0000 {
+        compatible = "qcom,slim-ngd-v2.1.0";
+        reg = <0x171c0000 0x2c000>;
+        interrupts = <GIC_SPI 163 IRQ_TYPE_LEVEL_HIGH>;
+
+        dmas = <&slimbam 3>, <&slimbam 4>;
+        dma-names = "rx", "tx";
+        iommus = <&apps_smmu 0x1806 0x0>;
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        slim@1 {
+            reg = <1>;
+            #address-cells = <2>;
+            #size-cells = <0>;
+
+            codec@1,0 {
+                compatible = "slim217,250";
+                reg = <1 0>;
+                slim-ifc-dev = <&wcd9340_ifd>;
+
+                #sound-dai-cells = <1>;
+
+                interrupts-extended = <&tlmm 54 IRQ_TYPE_LEVEL_HIGH>;
+                interrupt-controller;
+                #interrupt-cells = <1>;
+
+                #clock-cells = <0>;
+                clock-frequency = <9600000>;
+                clock-output-names = "mclk";
+                qcom,micbias1-microvolt = <1800000>;
+                qcom,micbias2-microvolt = <1800000>;
+                qcom,micbias3-microvolt = <1800000>;
+                qcom,micbias4-microvolt = <1800000>;
+
+                #address-cells = <1>;
+                #size-cells = <1>;
+
+                reset-gpios = <&tlmm 64 GPIO_ACTIVE_HIGH>;
+
+                /* Rest of the WCD9340 codec */
+            };
+        };
+    };
diff --git a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt b/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt
deleted file mode 100644
index 7c3d9eb6af5d1..0000000000000
--- a/Documentation/devicetree/bindings/slimbus/slim-ngd-qcom-ctrl.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-Qualcomm SLIMBus Non Generic Device (NGD) Controller binding
-
-SLIMBus NGD controller is a light-weight driver responsible for communicating
-with SLIMBus slaves directly over the bus using messaging interface and
-communicating with master component residing on ADSP for bandwidth and
-data-channel management
-
-- compatible:
-	Usage: required
-	Value type: <stringlist>
-	Definition: must be "qcom,slim-ngd-v<MAJOR>.<MINOR>.<STEP>"
-	must be one of the following.
-	"qcom,slim-ngd-v1.5.0" for MSM8996
-	"qcom,slim-ngd-v2.1.0" for SDM845
-
-- reg:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must specify the base address and size of the controller
-		    register space.
-- dmas
-	Usage: required
-	Value type: <array of phandles>
-	Definition: List of rx and tx dma channels
-
-- dma-names
-	Usage: required
-	Value type: <stringlist>
-	Definition: must be "rx" and "tx".
-
-- interrupts:
-	Usage: required
-	Value type: <prop-encoded-array>
-	Definition: must list controller IRQ.
-
-#address-cells
-	Usage: required
-	Value type: <u32>
-	Definition: Should be 1, reflecting the instance id of ngd.
-
-#size-cells
-	Usage: required
-	Value type: <u32>
-	Definition: Should be 0
-
-= NGD Devices
-Each subnode represents an instance of NGD, must contain the following
-properties:
-
-- reg:
-	Usage: required
-	Value type: <u32>
-	Definition: Should be instance id of ngd.
-
-#address-cells
-	Usage: required
-	Refer to slimbus/bus.txt for details of the common SLIMBus bindings.
-
-#size-cells
-	Usage: required
-	Refer to slimbus/bus.txt for details of the common SLIMBus bindings.
-
-= EXAMPLE
-
-slim@91c0000 {
-	compatible = "qcom,slim-ngd-v1.5.0";
-	reg = <0x91c0000 0x2c000>;
-	interrupts = <0 163 0>;
-	dmas =	<&slimbam 3>, <&slimbam 4>;
-	dma-names = "rx", "tx";
-	#address-cells = <1>;
-	#size-cells = <0>;
-	ngd@1 {
-		reg = <1>;
-		#address-cells = <1>;
-		#size-cells = <1>;
-		codec@1 {
-			compatible = "slim217,1a0";
-			reg  = <1 0>;
-		};
-	};
-};
-- 
GitLab


From b9bf27386dddab16bc455124c54cbeea6cb9a1ca Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:54 +0500
Subject: [PATCH 2190/4122] Accessiblity: speakup_soft: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_soft module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-2-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_soft.c | 59 ++++++++++++++------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_soft.c b/drivers/accessibility/speakup/speakup_soft.c
index 28c8f60370cf8..6d446824677b4 100644
--- a/drivers/accessibility/speakup/speakup_soft.c
+++ b/drivers/accessibility/speakup/speakup_soft.c
@@ -33,21 +33,30 @@ static struct miscdevice synth_device, synthu_device;
 static int init_pos;
 static int misc_registered;
 
-static struct var_t vars[] = {
-	/* DIRECT is put first so that module_param_named can access it easily */
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
-
-	{ CAPS_START, .u.s = {"\x01+3p" } },
-	{ CAPS_STOP, .u.s = {"\x01-3p" } },
-	{ PAUSE, .u.n = {"\x01P" } },
-	{ RATE, .u.n = {"\x01%ds", 2, 0, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x01%dp", 5, 0, 9, 0, 0, NULL } },
-	{ INFLECTION, .u.n = {"\x01%dr", 5, 0, 9, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"\x01%db", 0, 0, 3, 0, 0, NULL } },
-	{ VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
-	{ FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
+
+enum default_vars_id {
+	DIRECT_ID = 0, CAPS_START_ID, CAPS_STOP_ID,
+	PAUSE_ID, RATE_ID, PITCH_ID, INFLECTION_ID,
+	VOL_ID, TONE_ID, PUNCT_ID, VOICE_ID,
+	FREQUENCY_ID, V_LAST_VAR_ID,
+	 NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+
+	[DIRECT_ID]  = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+3p" } },
+	[CAPS_STOP_ID]  = { CAPS_STOP, .u.s = {"\x01-3p" } },
+	[PAUSE_ID]  = { PAUSE, .u.n = {"\x01P" } },
+	[RATE_ID]  = { RATE, .u.n = {"\x01%ds", 2, 0, 9, 0, 0, NULL } },
+	[PITCH_ID]  = { PITCH, .u.n = {"\x01%dp", 5, 0, 9, 0, 0, NULL } },
+	[INFLECTION_ID]  = { INFLECTION, .u.n = {"\x01%dr", 5, 0, 9, 0, 0, NULL } },
+	[VOL_ID]  = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
+	[TONE_ID]  = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
+	[PUNCT_ID]  = { PUNCT, .u.n = {"\x01%db", 0, 0, 3, 0, 0, NULL } },
+	[VOICE_ID]  = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
+	[FREQUENCY_ID]  = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -451,10 +460,28 @@ static int softsynth_adjust(struct spk_synth *synth, struct st_var_header *var)
 }
 
 module_param_named(start, synth_soft.startup, short, 0444);
-module_param_named(direct, vars[0].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
 MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+MODULE_PARM_DESC(rate, "Sets the rate of the synthesizer.");
+MODULE_PARM_DESC(pitch, "Sets the pitch of the synthesizer.");
+MODULE_PARM_DESC(inflection, "Sets the inflection of the synthesizer.");
+MODULE_PARM_DESC(vol, "Sets the volume of the speech synthesizer.");
+MODULE_PARM_DESC(tone, "Sets the tone of the speech synthesizer.");
+MODULE_PARM_DESC(punct, "Sets the amount of punctuation spoken by the synthesizer.");
+MODULE_PARM_DESC(voice, "Sets the voice used by the synthesizer.");
+MODULE_PARM_DESC(frequency, "Sets the frequency of speech synthesizer.");
 
 module_spk_synth(synth_soft);
 
-- 
GitLab


From 5e3e27f040ec3611db14a5efe23c9108831c38be Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:55 +0500
Subject: [PATCH 2191/4122] Accessiblity: speakup_apollo: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_apollo module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-3-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_apollo.c    | 46 +++++++++++++++----
 1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_apollo.c b/drivers/accessibility/speakup/speakup_apollo.c
index c84a7e0864b7c..d2fbb3f572219 100644
--- a/drivers/accessibility/speakup/speakup_apollo.c
+++ b/drivers/accessibility/speakup/speakup_apollo.c
@@ -24,15 +24,28 @@
 
 static void do_catch_up(struct spk_synth *synth);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"cap, " } },
-	{ CAPS_STOP, .u.s = {"" } },
-	{ RATE, .u.n = {"@W%d", 6, 1, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"@F%x", 10, 0, 15, 0, 0, NULL } },
-	{ VOL, .u.n = {"@A%x", 10, 0, 15, 0, 0, NULL } },
-	{ VOICE, .u.n = {"@V%d", 1, 1, 6, 0, 0, NULL } },
-	{ LANG, .u.n = {"@=%d,", 1, 1, 4, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, VOICE_ID, LANG_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"cap, " } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"" } },
+	[RATE_ID] = { RATE, .u.n = {"@W%d", 6, 1, 9, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"@F%x", 10, 0, 15, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"@A%x", 10, 0, 15, 0, 0, NULL } },
+	[VOICE_ID] = { VOICE, .u.n = {"@V%d", 1, 1, 6, 0, 0, NULL } },
+	[LANG_ID] = { LANG, .u.n = {"@=%d,", 1, 1, 4, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -193,10 +206,25 @@ static void do_catch_up(struct spk_synth *synth)
 module_param_named(ser, synth_apollo.ser, int, 0444);
 module_param_named(dev, synth_apollo.dev_name, charp, 0444);
 module_param_named(start, synth_apollo.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(lang, vars[LANG_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(lang, "Set the lang variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
+
 
 module_spk_synth(synth_apollo);
 
-- 
GitLab


From d5dab7ff97b8370d8bf406d9be2113b1df7d2f0c Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:56 +0500
Subject: [PATCH 2192/4122] Accessiblity: speakup_audptr: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding a default variables to the speakup_audptr module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-4-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_audptr.c    | 42 +++++++++++++++----
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_audptr.c b/drivers/accessibility/speakup/speakup_audptr.c
index 4d16d60db9b23..55813f3e40ff5 100644
--- a/drivers/accessibility/speakup/speakup_audptr.c
+++ b/drivers/accessibility/speakup/speakup_audptr.c
@@ -19,15 +19,24 @@
 static int synth_probe(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x05[f99]" } },
-	{ CAPS_STOP, .u.s = {"\x05[f80]" } },
-	{ RATE, .u.n = {"\x05[r%d]", 10, 0, 20, 100, -10, NULL } },
-	{ PITCH, .u.n = {"\x05[f%d]", 80, 39, 4500, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x05[g%d]", 21, 0, 40, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x05[s%d]", 9, 0, 63, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"\x05[A%c]", 0, 0, 3, 0, 0, "nmsa" } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID, PUNCT_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x05[f99]" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05[f80]" } },
+	[RATE_ID] = { RATE, .u.n = {"\x05[r%d]", 10, 0, 20, 100, -10, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x05[f%d]", 80, 39, 4500, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x05[g%d]", 21, 0, 40, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x05[s%d]", 9, 0, 63, 0, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"\x05[A%c]", 0, 0, 3, 0, 0, "nmsa" } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -158,10 +167,25 @@ static int synth_probe(struct spk_synth *synth)
 module_param_named(ser, synth_audptr.ser, int, 0444);
 module_param_named(dev, synth_audptr.dev_name, charp, 0444);
 module_param_named(start, synth_audptr.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_audptr);
 
-- 
GitLab


From 95892c4e70f391955dcd69dd25bd6be8f888590f Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:57 +0500
Subject: [PATCH 2193/4122] Accessiblity: speakup_bns: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_bns module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-5-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_bns.c | 36 ++++++++++++++++-----
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_bns.c b/drivers/accessibility/speakup/speakup_bns.c
index b8103eb117b86..60507756499c2 100644
--- a/drivers/accessibility/speakup/speakup_bns.c
+++ b/drivers/accessibility/speakup/speakup_bns.c
@@ -16,14 +16,23 @@
 #define SYNTH_CLEAR 0x18
 #define PROCSPEECH '\r'
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x05\x31\x32P" } },
-	{ CAPS_STOP, .u.s = {"\x05\x38P" } },
-	{ RATE, .u.n = {"\x05%dE", 8, 1, 16, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x05%dP", 8, 0, 16, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x05%dV", 8, 0, 16, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x05%dT", 8, 0, 16, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x05\x31\x32P" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05\x38P" } },
+	[RATE_ID] = { RATE, .u.n = {"\x05%dE", 8, 1, 16, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x05%dP", 8, 0, 16, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x05%dV", 8, 0, 16, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x05%dT", 8, 0, 16, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -113,10 +122,21 @@ static struct spk_synth synth_bns = {
 module_param_named(ser, synth_bns.ser, int, 0444);
 module_param_named(dev, synth_bns.dev_name, charp, 0444);
 module_param_named(start, synth_bns.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
 
 module_spk_synth(synth_bns);
 
-- 
GitLab


From f613f00eb046970f20f4abe1ab8b26c1424a4f5a Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:58 +0500
Subject: [PATCH 2194/4122] Accessiblity: speakup_decext: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding a default variables to the speakup_decext module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-6-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_decext.c    | 44 ++++++++++++++-----
 1 file changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_decext.c b/drivers/accessibility/speakup/speakup_decext.c
index eaebf62300a41..271bcf279bf9b 100644
--- a/drivers/accessibility/speakup/speakup_decext.c
+++ b/drivers/accessibility/speakup/speakup_decext.c
@@ -38,16 +38,25 @@ static void synth_flush(struct spk_synth *synth);
 
 static int in_escape;
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"[:dv ap 222]" } },
-	{ CAPS_STOP, .u.s = {"[:dv ap 100]" } },
-	{ RATE, .u.n = {"[:ra %d]", 7, 0, 9, 150, 25, NULL } },
-	{ PITCH, .u.n = {"[:dv ap %d]", 100, 0, 100, 0, 0, NULL } },
-	{ INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
-	{ VOL, .u.n = {"[:dv gv %d]", 13, 0, 16, 0, 5, NULL } },
-	{ PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } },
-	{ VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID, INFLECTION_ID,
+	VOL_ID, PUNCT_ID, VOICE_ID,
+	DIRECT_ID, V_LAST_ID,
+	NB_ID,
+};
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 222]" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100]" } },
+	[RATE_ID] = { RATE, .u.n = {"[:ra %d]", 7, 0, 9, 150, 25, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d]", 100, 0, 100, 0, 0, NULL } },
+	[INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"[:dv gv %d]", 13, 0, 16, 0, 5, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } },
+	[VOICE_ID] = { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -225,10 +234,25 @@ static void synth_flush(struct spk_synth *synth)
 module_param_named(ser, synth_decext.ser, int, 0444);
 module_param_named(dev, synth_decext.dev_name, charp, 0444);
 module_param_named(start, synth_decext.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(inflection, "Set the inflection variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
 
 module_spk_synth(synth_decext);
 
-- 
GitLab


From b75cfeb116de5894a2e7cc6f8bcf53c4910a7c6f Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:50:59 +0500
Subject: [PATCH 2195/4122] Accessiblity: speakup_decpc: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_decpc module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-7-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_decpc.c | 48 +++++++++++++++----
 1 file changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_decpc.c b/drivers/accessibility/speakup/speakup_decpc.c
index dec314dee214f..083ca92658057 100644
--- a/drivers/accessibility/speakup/speakup_decpc.c
+++ b/drivers/accessibility/speakup/speakup_decpc.c
@@ -134,16 +134,27 @@ static int synth_portlist[] = { 0x340, 0x350, 0x240, 0x250, 0 };
 static int in_escape, is_flushing;
 static int dt_stat, dma_state;
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"[:dv ap 200]" } },
-	{ CAPS_STOP, .u.s = {"[:dv ap 100]" } },
-	{ RATE, .u.n = {"[:ra %d]", 9, 0, 18, 150, 25, NULL } },
-	{ PITCH, .u.n = {"[:dv ap %d]", 80, 0, 100, 20, 0, NULL } },
-	{ INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
-	{ VOL, .u.n = {"[:vo se %d]", 5, 0, 9, 5, 10, NULL } },
-	{ PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } },
-	{ VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID, INFLECTION_ID,
+	VOL_ID, PUNCT_ID, VOICE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID,
+};
+
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 200]" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100]" } },
+	[RATE_ID] = { RATE, .u.n = {"[:ra %d]", 9, 0, 18, 150, 25, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d]", 80, 0, 100, 20, 0, NULL } },
+	[INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"[:vo se %d]", 5, 0, 9, 5, 10, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c]", 0, 0, 2, 0, 0, "nsa" } },
+	[VOICE_ID] = { VOICE, .u.n = {"[:n%c]", 0, 0, 9, 0, 0, "phfdburwkv" } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -483,8 +494,25 @@ static void dtpc_release(struct spk_synth *synth)
 }
 
 module_param_named(start, synth_dec_pc.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
+
 
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(inflection, "Set the inflection variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
 
 module_spk_synth(synth_dec_pc);
 
-- 
GitLab


From 44d3e977dd361f4491bd5adc31f32ca13243703b Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:00 +0500
Subject: [PATCH 2196/4122] Accessiblity: speakup_dectlk: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding a default variables to the speakup_dectlk module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-8-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_dectlk.c    | 45 ++++++++++++++-----
 1 file changed, 35 insertions(+), 10 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_dectlk.c b/drivers/accessibility/speakup/speakup_dectlk.c
index 2a7e8d727904a..56334405d865c 100644
--- a/drivers/accessibility/speakup/speakup_dectlk.c
+++ b/drivers/accessibility/speakup/speakup_dectlk.c
@@ -40,16 +40,24 @@ static int is_flushing;
 static DEFINE_SPINLOCK(flush_lock);
 static DECLARE_WAIT_QUEUE_HEAD(flush);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"[:dv ap 160] " } },
-	{ CAPS_STOP, .u.s = {"[:dv ap 100 ] " } },
-	{ RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } },
-	{ PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } },
-	{ INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
-	{ VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } },
-	{ VOICE, .u.n = {"[:n%c] ", 0, 0, 9, 0, 0, "phfdburwkv" } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID, INFLECTION_ID,
+	VOL_ID, PUNCT_ID, VOICE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID,
+};
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"[:dv ap 160] " } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[:dv ap 100 ] " } },
+	[RATE_ID] = { RATE, .u.n = {"[:ra %d] ", 180, 75, 650, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"[:dv ap %d] ", 122, 50, 350, 0, 0, NULL } },
+	[INFLECTION_ID] = { INFLECTION, .u.n = {"[:dv pr %d] ", 100, 0, 10000, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"[:dv g5 %d] ", 86, 60, 86, 0, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"[:pu %c] ", 0, 0, 2, 0, 0, "nsa" } },
+	[VOICE_ID] = { VOICE, .u.n = {"[:n%c] ", 0, 0, 9, 0, 0, "phfdburwkv" } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -306,10 +314,27 @@ static void synth_flush(struct spk_synth *synth)
 module_param_named(ser, synth_dectlk.ser, int, 0444);
 module_param_named(dev, synth_dectlk.dev_name, charp, 0444);
 module_param_named(start, synth_dectlk.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(inflection, "Set the inflection variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_dectlk);
 
-- 
GitLab


From 251ca7da3087361d25bfd05e23ebd711e82ccf20 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:01 +0500
Subject: [PATCH 2197/4122] Accessiblity: speakup_dtlk: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_dtlk module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-9-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_dtlk.c | 50 +++++++++++++++-----
 1 file changed, 39 insertions(+), 11 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_dtlk.c b/drivers/accessibility/speakup/speakup_dtlk.c
index 6f01e010aaf43..fa826568937bf 100644
--- a/drivers/accessibility/speakup/speakup_dtlk.c
+++ b/drivers/accessibility/speakup/speakup_dtlk.c
@@ -37,17 +37,27 @@ static unsigned int synth_portlist[] = {
 
 static u_char synth_status;
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x01+35p" } },
-	{ CAPS_STOP, .u.s = {"\x01-35p" } },
-	{ RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } },
-	{ VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
-	{ FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID, PUNCT_ID,
+	VOICE_ID, FREQUENCY_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID,
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+35p" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x01-35p" } },
+	[RATE_ID] = { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } },
+	[VOICE_ID] = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
+	[FREQUENCY_ID] = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -376,9 +386,27 @@ static void dtlk_release(struct spk_synth *synth)
 
 module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_dtlk.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(frequency, "Set the frequency variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_dtlk);
 
-- 
GitLab


From 81188dd195788a1b314570c5d901a88de2179cf4 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:02 +0500
Subject: [PATCH 2198/4122] Accessiblity: speakup_dummy: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_dummy module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-10-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_dummy.c | 53 +++++++++++++++----
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_dummy.c b/drivers/accessibility/speakup/speakup_dummy.c
index 56419dbb28d3c..52b2c5d44576d 100644
--- a/drivers/accessibility/speakup/speakup_dummy.c
+++ b/drivers/accessibility/speakup/speakup_dummy.c
@@ -18,17 +18,30 @@
 #define DRV_VERSION "2.11"
 #define SYNTH_CLEAR '!'
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"CAPS_START\n" } },
-	{ CAPS_STOP, .u.s = {"CAPS_STOP\n" } },
-	{ PAUSE, .u.s = {"PAUSE\n"} },
-	{ RATE, .u.n = {"RATE %d\n", 8, 1, 16, 0, 0, NULL } },
-	{ PITCH, .u.n = {"PITCH %d\n", 8, 0, 16, 0, 0, NULL } },
-	{ INFLECTION, .u.n = {"INFLECTION %d\n", 8, 0, 16, 0, 0, NULL } },
-	{ VOL, .u.n = {"VOL %d\n", 8, 0, 16, 0, 0, NULL } },
-	{ TONE, .u.n = {"TONE %d\n", 8, 0, 16, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"PUNCT %d\n", 0, 0, 3, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	PAUSE_ID,
+	RATE_ID, PITCH_ID, INFLECTION_ID,
+	VOL_ID, TONE_ID, PUNCT_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"CAPS_START\n" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"CAPS_STOP\n" } },
+	[PAUSE_ID] = { PAUSE, .u.s = {"PAUSE\n"} },
+	[RATE_ID] = { RATE, .u.n = {"RATE %d\n", 8, 1, 16, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"PITCH %d\n", 8, 0, 16, 0, 0, NULL } },
+	[INFLECTION_ID] = { INFLECTION, .u.n = {"INFLECTION %d\n", 8, 0, 16, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"VOL %d\n", 8, 0, 16, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"TONE %d\n", 8, 0, 16, 0, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"PUNCT %d\n", 0, 0, 3, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -129,10 +142,28 @@ static struct spk_synth synth_dummy = {
 module_param_named(ser, synth_dummy.ser, int, 0444);
 module_param_named(dev, synth_dummy.dev_name, charp, 0444);
 module_param_named(start, synth_dummy.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(inflection, vars[INFLECTION_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(inflection, "Set the inflection variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_dummy);
 
-- 
GitLab


From 031c122f8950f939b715781dadea0b7659f1ea3b Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:03 +0500
Subject: [PATCH 2199/4122] Accessiblity: speakup_keypc: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_keypc module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-11-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_keypc.c | 29 +++++++++++++++----
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_keypc.c b/drivers/accessibility/speakup/speakup_keypc.c
index f61b62f1ea4db..9356f63795603 100644
--- a/drivers/accessibility/speakup/speakup_keypc.c
+++ b/drivers/accessibility/speakup/speakup_keypc.c
@@ -33,12 +33,21 @@ static int synth_port;
 static int port_forced;
 static unsigned int synth_portlist[] = { 0x2a8, 0 };
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"[f130]" } },
-	{ CAPS_STOP, .u.s = {"[f90]" } },
-	{ RATE, .u.n = {"\04%c ", 8, 0, 10, 81, -8, NULL } },
-	{ PITCH, .u.n = {"[f%d]", 5, 0, 9, 40, 10, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"[f130]" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"[f90]" } },
+	[RATE_ID] = { RATE, .u.n = {"\04%c ", 8, 0, 10, 81, -8, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"[f%d]", 5, 0, 9, 40, 10, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -305,9 +314,17 @@ static void keynote_release(struct spk_synth *synth)
 
 module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_keypc.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
+
 
 module_spk_synth(synth_keypc);
 
-- 
GitLab


From 88dce45b3282647ac0b2916d4820956cd239a1e9 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:04 +0500
Subject: [PATCH 2200/4122] Accessiblity: speakup_ltlk: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_ltlk module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-12-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_ltlk.c | 53 ++++++++++++++++----
 1 file changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_ltlk.c b/drivers/accessibility/speakup/speakup_ltlk.c
index f885cfaa27c80..1e279ae143bfd 100644
--- a/drivers/accessibility/speakup/speakup_ltlk.c
+++ b/drivers/accessibility/speakup/speakup_ltlk.c
@@ -18,17 +18,28 @@
 
 static int synth_probe(struct spk_synth *synth);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x01+35p" } },
-	{ CAPS_STOP, .u.s = {"\x01-35p" } },
-	{ RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
-	{ PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } },
-	{ VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
-	{ FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID, PUNCT_ID,
+	VOICE_ID, FREQUENCY_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x01+35p" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x01-35p" } },
+	[RATE_ID] = { RATE, .u.n = {"\x01%ds", 8, 0, 9, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x01%dp", 50, 0, 99, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x01%dv", 5, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x01%dx", 1, 0, 2, 0, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"\x01%db", 7, 0, 15, 0, 0, NULL } },
+	[VOICE_ID] = { VOICE, .u.n = {"\x01%do", 0, 0, 7, 0, 0, NULL } },
+	[FREQUENCY_ID] = { FREQUENCY, .u.n = {"\x01%df", 5, 0, 9, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -160,10 +171,30 @@ static int synth_probe(struct spk_synth *synth)
 module_param_named(ser, synth_ltlk.ser, int, 0444);
 module_param_named(dev, synth_ltlk.dev_name, charp, 0444);
 module_param_named(start, synth_ltlk.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(voice, vars[VOICE_ID].u.n.default_val, int, 0444);
+module_param_named(frequency, vars[FREQUENCY_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(voice, "Set the voice variable on load.");
+MODULE_PARM_DESC(frequency, "Set the frequency variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_ltlk);
 
-- 
GitLab


From 9744f41fe0879eb971a5cab950f7022ac49c0a92 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:05 +0500
Subject: [PATCH 2201/4122] Accessiblity: speakup_spkout: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_spkout module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-13-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_spkout.c    | 43 +++++++++++++++----
 1 file changed, 34 insertions(+), 9 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_spkout.c b/drivers/accessibility/speakup/speakup_spkout.c
index 5e3bb3aa98b62..d3f26095b0eef 100644
--- a/drivers/accessibility/speakup/speakup_spkout.c
+++ b/drivers/accessibility/speakup/speakup_spkout.c
@@ -18,15 +18,26 @@
 
 static void synth_flush(struct spk_synth *synth);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x05P+" } },
-	{ CAPS_STOP, .u.s = {"\x05P-" } },
-	{ RATE, .u.n = {"\x05R%d", 7, 0, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x05P%d", 3, 0, 9, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x05V%d", 9, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x05T%c", 8, 0, 25, 65, 0, NULL } },
-	{ PUNCT, .u.n = {"\x05M%c", 0, 0, 3, 0, 0, "nsma" } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID, PUNCT_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x05P+" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05P-" } },
+	[RATE_ID] = { RATE, .u.n = {"\x05R%d", 7, 0, 9, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x05P%d", 3, 0, 9, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x05V%d", 9, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x05T%c", 8, 0, 25, 65, 0, NULL } },
+	[PUNCT_ID] = { PUNCT, .u.n = {"\x05M%c", 0, 0, 3, 0, 0, "nsma" } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -124,10 +135,24 @@ static void synth_flush(struct spk_synth *synth)
 module_param_named(ser, synth_spkout.ser, int, 0444);
 module_param_named(dev, synth_spkout.dev_name, charp, 0444);
 module_param_named(start, synth_spkout.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(punct, vars[PUNCT_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(punct, "Set the punct variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
+
 
 module_spk_synth(synth_spkout);
 
-- 
GitLab


From dfa6c10c89f8e9d001d649201bd5a5d821c23c36 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:06 +0500
Subject: [PATCH 2202/4122] Accessiblity: speakup_txprt: specifying the default
 driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding a default variables to the speakup_txprt module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Link: https://lore.kernel.org/r/20221109215108.7933-14-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/speakup_txprt.c | 45 +++++++++++++++----
 1 file changed, 37 insertions(+), 8 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_txprt.c b/drivers/accessibility/speakup/speakup_txprt.c
index 9e781347f7ebb..4d0a0d4c41f0b 100644
--- a/drivers/accessibility/speakup/speakup_txprt.c
+++ b/drivers/accessibility/speakup/speakup_txprt.c
@@ -16,14 +16,29 @@
 #define SYNTH_CLEAR 0x18
 #define PROCSPEECH '\r' /* process speech char */
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\x05P8" } },
-	{ CAPS_STOP, .u.s = {"\x05P5" } },
-	{ RATE, .u.n = {"\x05R%d", 5, 0, 9, 0, 0, NULL } },
-	{ PITCH, .u.n = {"\x05P%d", 5, 0, 9, 0, 0, NULL } },
-	{ VOL, .u.n = {"\x05V%d", 5, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\x05T%c", 12, 0, 25, 61, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\x05P8" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\x05P5" } },
+	[RATE_ID] = { RATE, .u.n = {"\x05R%d", 5, 0, 9, 0, 0, NULL } },
+	[PITCH_ID] = { PITCH, .u.n = {"\x05P%d", 5, 0, 9, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\x05V%d", 5, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\x05T%c", 12, 0, 25, 61, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 	 };
 
@@ -112,10 +127,24 @@ static struct spk_synth synth_txprt = {
 module_param_named(ser, synth_txprt.ser, int, 0444);
 module_param_named(dev, synth_txprt.dev_name, charp, 0444);
 module_param_named(start, synth_txprt.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
+
 
 module_spk_synth(synth_txprt);
 
-- 
GitLab


From cf0b4652d6acaed768b81b153872476a10b7db3b Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:07 +0500
Subject: [PATCH 2203/4122] Accessiblity: speakup_acntpc: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_acntpc module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-15-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_acntpc.c    | 38 +++++++++++++++----
 1 file changed, 30 insertions(+), 8 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_acntpc.c b/drivers/accessibility/speakup/speakup_acntpc.c
index a55b60754eb10..a27e6bbf05da3 100644
--- a/drivers/accessibility/speakup/speakup_acntpc.c
+++ b/drivers/accessibility/speakup/speakup_acntpc.c
@@ -34,14 +34,23 @@ static int synth_port_control;
 static int port_forced;
 static unsigned int synth_portlist[] = { 0x2a8, 0 };
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\033P8" } },
-	{ CAPS_STOP, .u.s = {"\033P5" } },
-	{ RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } },
-	{ PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } },
-	{ VOL, .u.n = {"\033A%d", 5, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\033P8" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\033P5" } },
+	[RATE_ID] = { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } },
+	[PITCH_ID] = { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\033A%d", 5, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -305,9 +314,22 @@ static void accent_release(struct spk_synth *synth)
 
 module_param_hw_named(port, port_forced, int, ioport, 0444);
 module_param_named(start, synth_acntpc.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
+
+
 
 MODULE_PARM_DESC(port, "Set the port for the synthesizer (override probing).");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_acntpc);
 
-- 
GitLab


From a606dd6253b8de8dc81a1112ab702edfe7a98d72 Mon Sep 17 00:00:00 2001
From: Osama Muhammad <osmtendev@gmail.com>
Date: Thu, 10 Nov 2022 02:51:08 +0500
Subject: [PATCH 2204/4122] Accessiblity: speakup_acntsa: specifying the
 default driver parameters among the module params

This is an enhancement which allows to specify the default driver
parameters among the module parameters.

Adding default variables to the speakup_acntsa module
allows to easily set that at boot, rather than
setting the sys variables after boot.
More details can be found here:
https://github.com/linux-speakup/speakup/issues/7

Signed-off-by: Osama Muhammad <osmtendev@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221109215108.7933-16-osmtendev@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../accessibility/speakup/speakup_acntsa.c    | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/drivers/accessibility/speakup/speakup_acntsa.c b/drivers/accessibility/speakup/speakup_acntsa.c
index 2697c51ed6b56..26bb9f9399d33 100644
--- a/drivers/accessibility/speakup/speakup_acntsa.c
+++ b/drivers/accessibility/speakup/speakup_acntsa.c
@@ -19,14 +19,24 @@
 
 static int synth_probe(struct spk_synth *synth);
 
-static struct var_t vars[] = {
-	{ CAPS_START, .u.s = {"\033P8" } },
-	{ CAPS_STOP, .u.s = {"\033P5" } },
-	{ RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } },
-	{ PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } },
-	{ VOL, .u.n = {"\033A%d", 9, 0, 9, 0, 0, NULL } },
-	{ TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } },
-	{ DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
+
+enum default_vars_id {
+	CAPS_START_ID = 0, CAPS_STOP_ID,
+	RATE_ID, PITCH_ID,
+	VOL_ID, TONE_ID,
+	DIRECT_ID, V_LAST_VAR_ID,
+	NB_ID
+};
+
+
+static struct var_t vars[NB_ID] = {
+	[CAPS_START_ID] = { CAPS_START, .u.s = {"\033P8" } },
+	[CAPS_STOP_ID] = { CAPS_STOP, .u.s = {"\033P5" } },
+	[RATE_ID] = { RATE, .u.n = {"\033R%c", 9, 0, 17, 0, 0, "0123456789abcdefgh" } },
+	[PITCH_ID] = { PITCH, .u.n = {"\033P%d", 5, 0, 9, 0, 0, NULL } },
+	[VOL_ID] = { VOL, .u.n = {"\033A%d", 9, 0, 9, 0, 0, NULL } },
+	[TONE_ID] = { TONE, .u.n = {"\033V%d", 5, 0, 9, 0, 0, NULL } },
+	[DIRECT_ID] = { DIRECT, .u.n = {NULL, 0, 0, 1, 0, 0, NULL } },
 	V_LAST_VAR
 };
 
@@ -129,10 +139,21 @@ static int synth_probe(struct spk_synth *synth)
 module_param_named(ser, synth_acntsa.ser, int, 0444);
 module_param_named(dev, synth_acntsa.dev_name, charp, 0444);
 module_param_named(start, synth_acntsa.startup, short, 0444);
+module_param_named(rate, vars[RATE_ID].u.n.default_val, int, 0444);
+module_param_named(pitch, vars[PITCH_ID].u.n.default_val, int, 0444);
+module_param_named(vol, vars[VOL_ID].u.n.default_val, int, 0444);
+module_param_named(tone, vars[TONE_ID].u.n.default_val, int, 0444);
+module_param_named(direct, vars[DIRECT_ID].u.n.default_val, int, 0444);
 
 MODULE_PARM_DESC(ser, "Set the serial port for the synthesizer (0-based).");
 MODULE_PARM_DESC(dev, "Set the device e.g. ttyUSB0, for the synthesizer.");
 MODULE_PARM_DESC(start, "Start the synthesizer once it is loaded.");
+MODULE_PARM_DESC(rate, "Set the rate variable on load.");
+MODULE_PARM_DESC(pitch, "Set the pitch variable on load.");
+MODULE_PARM_DESC(vol, "Set the vol variable on load.");
+MODULE_PARM_DESC(tone, "Set the tone variable on load.");
+MODULE_PARM_DESC(direct, "Set the direct variable on load.");
+
 
 module_spk_synth(synth_acntsa);
 
-- 
GitLab


From f43241aafedb1c7d72825cea1a521cef183ee61d Mon Sep 17 00:00:00 2001
From: Mushahid Hussain <mushi.shar@gmail.com>
Date: Tue, 15 Nov 2022 15:05:29 +0500
Subject: [PATCH 2205/4122] accessibility: speakup: Specify spk_vars among
 module parameters

This is an enhancement which allows setting default variables for
speakup module at the boot rather than setting the sys variables after
the boot.

Signed-off-by: Mushahid Hussain <mushi.shar@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221115100530.91174-2-mushi.shar@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/main.c | 60 ++++++++++++++++++++++------
 1 file changed, 47 insertions(+), 13 deletions(-)

diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c
index 73db0cb44fc7b..1df9a27a5214d 100644
--- a/drivers/accessibility/speakup/main.c
+++ b/drivers/accessibility/speakup/main.c
@@ -1268,20 +1268,28 @@ int spk_set_key_info(const u_char *key_info, u_char *k_buffer)
 	return 0;
 }
 
-static struct var_t spk_vars[] = {
+enum spk_vars_id {
+	BELL_POS_ID = 0, SPELL_DELAY_ID, ATTRIB_BLEEP_ID,
+	BLEEPS_ID, BLEEP_TIME_ID, PUNC_LEVEL_ID,
+	READING_PUNC_ID, CURSOR_TIME_ID, SAY_CONTROL_ID,
+	SAY_WORD_CTL_ID, NO_INTERRUPT_ID, KEY_ECHO_ID,
+	V_LAST_VAR_ID, NB_ID
+};
+
+static struct var_t spk_vars[NB_ID] = {
 	/* bell must be first to set high limit */
-	{BELL_POS, .u.n = {NULL, 0, 0, 0, 0, 0, NULL} },
-	{SPELL_DELAY, .u.n = {NULL, 0, 0, 4, 0, 0, NULL} },
-	{ATTRIB_BLEEP, .u.n = {NULL, 1, 0, 3, 0, 0, NULL} },
-	{BLEEPS, .u.n = {NULL, 3, 0, 3, 0, 0, NULL} },
-	{BLEEP_TIME, .u.n = {NULL, 30, 1, 200, 0, 0, NULL} },
-	{PUNC_LEVEL, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} },
-	{READING_PUNC, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} },
-	{CURSOR_TIME, .u.n = {NULL, 120, 50, 600, 0, 0, NULL} },
-	{SAY_CONTROL, TOGGLE_0},
-	{SAY_WORD_CTL, TOGGLE_0},
-	{NO_INTERRUPT, TOGGLE_0},
-	{KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} },
+	[BELL_POS_ID] = { BELL_POS, .u.n = {NULL, 0, 0, 0, 0, 0, NULL} },
+	[SPELL_DELAY_ID] = { SPELL_DELAY, .u.n = {NULL, 0, 0, 4, 0, 0, NULL} },
+	[ATTRIB_BLEEP_ID] = { ATTRIB_BLEEP, .u.n = {NULL, 1, 0, 3, 0, 0, NULL} },
+	[BLEEPS_ID] = { BLEEPS, .u.n = {NULL, 3, 0, 3, 0, 0, NULL} },
+	[BLEEP_TIME_ID] = { BLEEP_TIME, .u.n = {NULL, 30, 1, 200, 0, 0, NULL} },
+	[PUNC_LEVEL_ID] = { PUNC_LEVEL, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} },
+	[READING_PUNC_ID] = { READING_PUNC, .u.n = {NULL, 1, 0, 4, 0, 0, NULL} },
+	[CURSOR_TIME_ID] = { CURSOR_TIME, .u.n = {NULL, 120, 50, 600, 0, 0, NULL} },
+	[SAY_CONTROL_ID] { SAY_CONTROL, TOGGLE_0},
+	[SAY_WORD_CTL_ID] = {SAY_WORD_CTL, TOGGLE_0},
+	[NO_INTERRUPT_ID] = { NO_INTERRUPT, TOGGLE_0},
+	[KEY_ECHO_ID] = { KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} },
 	V_LAST_VAR
 };
 
@@ -2453,5 +2461,31 @@ out:
 	return err;
 }
 
+module_param_named(bell_pos, spk_vars[BELL_POS_ID].u.n.default_val, int, 0444);
+module_param_named(spell_delay, spk_vars[SPELL_DELAY_ID].u.n.default_val, int, 0444);
+module_param_named(attrib_bleep, spk_vars[ATTRIB_BLEEP_ID].u.n.default_val, int, 0444);
+module_param_named(bleeps, spk_vars[BLEEPS_ID].u.n.default_val, int, 0444);
+module_param_named(bleep_time, spk_vars[BLEEP_TIME_ID].u.n.default_val, int, 0444);
+module_param_named(punc_level, spk_vars[PUNC_LEVEL_ID].u.n.default_val, int, 0444);
+module_param_named(reading_punc, spk_vars[READING_PUNC_ID].u.n.default_val, int, 0444);
+module_param_named(cursor_time, spk_vars[CURSOR_TIME_ID].u.n.default_val, int, 0444);
+module_param_named(say_control, spk_vars[SAY_CONTROL_ID].u.n.default_val, int, 0444);
+module_param_named(say_word_ctl, spk_vars[SAY_WORD_CTL_ID].u.n.default_val, int, 0444);
+module_param_named(no_interrupt, spk_vars[NO_INTERRUPT_ID].u.n.default_val, int, 0444);
+module_param_named(key_echo, spk_vars[KEY_ECHO_ID].u.n.default_val, int, 0444);
+
+MODULE_PARM_DESC(bell_pos, "This works much like a typewriter bell. If for example 72 is echoed to bell_pos, it will beep the PC speaker when typing on a line past character 72.");
+MODULE_PARM_DESC(spell_delay, "This controls how fast a word is spelled when speakup's spell word review command is pressed.");
+MODULE_PARM_DESC(attrib_bleep, "Beeps the PC speaker when there is an attribute change such as background color when using speakup review commands. One = on, zero = off.");
+MODULE_PARM_DESC(bleeps, "This controls whether one hears beeps through the PC speaker when using speakup review commands.");
+MODULE_PARM_DESC(bleep_time, "This controls the duration of the PC speaker beeps speakup produces.");
+MODULE_PARM_DESC(punc_level, "Controls the level of punctuation spoken as the screen is displayed, not reviewed.");
+MODULE_PARM_DESC(reading_punc, "It controls the level of punctuation when reviewing the screen with speakup's screen review commands.");
+MODULE_PARM_DESC(cursor_time, "This controls cursor delay when using arrow keys.");
+MODULE_PARM_DESC(say_control, "This controls if speakup speaks shift, alt and control when those keys are pressed or not.");
+MODULE_PARM_DESC(say_word_ctl, "Sets thw say_word_ctl  on load.");
+MODULE_PARM_DESC(no_interrupt, "Controls if typing interrupts output from speakup.");
+MODULE_PARM_DESC(key_echo, "Controls if speakup speaks keys when they are typed. One = on zero = off or don't echo keys.");
+
 module_init(speakup_init);
 module_exit(speakup_exit);
-- 
GitLab


From 72b8ec15f97bbabbb2379c87cb5f7a9dc63f0d88 Mon Sep 17 00:00:00 2001
From: Mushahid Hussain <mushi.shar@gmail.com>
Date: Tue, 15 Nov 2022 15:05:30 +0500
Subject: [PATCH 2206/4122] accessibility: speakup: phonetic spelling while
 arrowing letter by letter

This patch includes an enhancement requested frequently on the mailing
list.[1][2] It adds a variable, cur_phonetic in the spk_vars, which can
be set as a module parameter, as well as in /sys/speakup/cur_phonetic.
This patch also documents cur_phonetic as a sysfs attribute in
sysfs-driver-speakup.

When cur_phonetic=1, it causes speakup to speak letters phonetically if
paused on the character while arrowing through a word.

When a user does not set cur_phonetic to any value, the default value
for it would be 0.

[1]: https://github.com/linux-speakup/speakup/issues/6
[2]: https://github.com/linux-speakup/speakup/issues/5

since V1:
	- removed unnecessary lines

Signed-off-by: Mushahid Hussain<mushi.shar@gmail.com>
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221115100530.91174-3-mushi.shar@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/stable/sysfs-driver-speakup |  9 +++++++++
 drivers/accessibility/speakup/kobjects.c      |  3 +++
 drivers/accessibility/speakup/main.c          | 14 +++++++++++---
 drivers/accessibility/speakup/speakup.h       |  1 +
 drivers/accessibility/speakup/spk_types.h     |  2 +-
 drivers/accessibility/speakup/varhandlers.c   |  1 +
 6 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/Documentation/ABI/stable/sysfs-driver-speakup b/Documentation/ABI/stable/sysfs-driver-speakup
index dc2a6ba1674b8..bcb6831aa114d 100644
--- a/Documentation/ABI/stable/sysfs-driver-speakup
+++ b/Documentation/ABI/stable/sysfs-driver-speakup
@@ -35,6 +35,15 @@ Description:	This controls cursor delay when using arrow keys. When a
 		characters. Set this to a higher value to adjust for the delay
 		and better synchronisation between cursor position and speech.
 
+What:		/sys/accessibility/speakup/cur_phonetic
+KernelVersion:	6.2
+Contact:	speakup@linux-speakup.org
+Description:	This allows speakup to speak letters phoneticaly when arrowing through
+		a word letter by letter. This doesn't affect the spelling when typing
+		the characters. When cur_phonetic=1, speakup will speak characters
+		phoneticaly when arrowing over a letter. When cur_phonetic=0, speakup
+		will speak letters as normally.
+
 What:		/sys/accessibility/speakup/delimiters
 KernelVersion:	2.6
 Contact:	speakup@linux-speakup.org
diff --git a/drivers/accessibility/speakup/kobjects.c b/drivers/accessibility/speakup/kobjects.c
index 41ae24ab5d086..a7522d4098025 100644
--- a/drivers/accessibility/speakup/kobjects.c
+++ b/drivers/accessibility/speakup/kobjects.c
@@ -914,6 +914,8 @@ static struct kobj_attribute say_word_ctl_attribute =
 	__ATTR(say_word_ctl, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute spell_delay_attribute =
 	__ATTR(spell_delay, 0644, spk_var_show, spk_var_store);
+static struct kobj_attribute cur_phonetic_attribute =
+	__ATTR(cur_phonetic, 0644, spk_var_show, spk_var_store);
 
 /*
  * These attributes are i18n related.
@@ -967,6 +969,7 @@ static struct attribute *main_attrs[] = {
 	&say_control_attribute.attr,
 	&say_word_ctl_attribute.attr,
 	&spell_delay_attribute.attr,
+	&cur_phonetic_attribute.attr,
 	NULL,
 };
 
diff --git a/drivers/accessibility/speakup/main.c b/drivers/accessibility/speakup/main.c
index 1df9a27a5214d..4733fd6334ab8 100644
--- a/drivers/accessibility/speakup/main.c
+++ b/drivers/accessibility/speakup/main.c
@@ -65,6 +65,7 @@ int spk_key_echo, spk_say_word_ctl;
 int spk_say_ctrl, spk_bell_pos;
 short spk_punc_mask;
 int spk_punc_level, spk_reading_punc;
+int spk_cur_phonetic;
 char spk_str_caps_start[MAXVARLEN + 1] = "\0";
 char spk_str_caps_stop[MAXVARLEN + 1] = "\0";
 char spk_str_pause[MAXVARLEN + 1] = "\0";
@@ -1273,7 +1274,7 @@ enum spk_vars_id {
 	BLEEPS_ID, BLEEP_TIME_ID, PUNC_LEVEL_ID,
 	READING_PUNC_ID, CURSOR_TIME_ID, SAY_CONTROL_ID,
 	SAY_WORD_CTL_ID, NO_INTERRUPT_ID, KEY_ECHO_ID,
-	V_LAST_VAR_ID, NB_ID
+	CUR_PHONETIC_ID, V_LAST_VAR_ID, NB_ID
 };
 
 static struct var_t spk_vars[NB_ID] = {
@@ -1290,6 +1291,7 @@ static struct var_t spk_vars[NB_ID] = {
 	[SAY_WORD_CTL_ID] = {SAY_WORD_CTL, TOGGLE_0},
 	[NO_INTERRUPT_ID] = { NO_INTERRUPT, TOGGLE_0},
 	[KEY_ECHO_ID] = { KEY_ECHO, .u.n = {NULL, 1, 0, 2, 0, 0, NULL} },
+	[CUR_PHONETIC_ID] = { CUR_PHONETIC, .u.n = {NULL, 0, 0, 1, 0, 0, NULL} },
 	V_LAST_VAR
 };
 
@@ -1720,8 +1722,12 @@ static void cursor_done(struct timer_list *unused)
 		speakup_win_say(vc);
 	else if (is_cursor == 1 || is_cursor == 4)
 		say_line_from_to(vc, 0, vc->vc_cols, 0);
-	else
-		say_char(vc);
+	else {
+		if (spk_cur_phonetic == 1)
+			say_phonetic_char(vc);
+		else
+			say_char(vc);
+	}
 	spk_keydown = 0;
 	is_cursor = 0;
 out:
@@ -2473,6 +2479,7 @@ module_param_named(say_control, spk_vars[SAY_CONTROL_ID].u.n.default_val, int, 0
 module_param_named(say_word_ctl, spk_vars[SAY_WORD_CTL_ID].u.n.default_val, int, 0444);
 module_param_named(no_interrupt, spk_vars[NO_INTERRUPT_ID].u.n.default_val, int, 0444);
 module_param_named(key_echo, spk_vars[KEY_ECHO_ID].u.n.default_val, int, 0444);
+module_param_named(cur_phonetic, spk_vars[CUR_PHONETIC_ID].u.n.default_val, int, 0444);
 
 MODULE_PARM_DESC(bell_pos, "This works much like a typewriter bell. If for example 72 is echoed to bell_pos, it will beep the PC speaker when typing on a line past character 72.");
 MODULE_PARM_DESC(spell_delay, "This controls how fast a word is spelled when speakup's spell word review command is pressed.");
@@ -2486,6 +2493,7 @@ MODULE_PARM_DESC(say_control, "This controls if speakup speaks shift, alt and co
 MODULE_PARM_DESC(say_word_ctl, "Sets thw say_word_ctl  on load.");
 MODULE_PARM_DESC(no_interrupt, "Controls if typing interrupts output from speakup.");
 MODULE_PARM_DESC(key_echo, "Controls if speakup speaks keys when they are typed. One = on zero = off or don't echo keys.");
+MODULE_PARM_DESC(cur_phonetic, "Controls if speakup speaks letters phonetically during navigation. One = on zero = off or don't speak phonetically.");
 
 module_init(speakup_init);
 module_exit(speakup_exit);
diff --git a/drivers/accessibility/speakup/speakup.h b/drivers/accessibility/speakup/speakup.h
index 33594f5a79837..364fde99749ed 100644
--- a/drivers/accessibility/speakup/speakup.h
+++ b/drivers/accessibility/speakup/speakup.h
@@ -105,6 +105,7 @@ extern int spk_no_intr, spk_say_ctrl, spk_say_word_ctl, spk_punc_level;
 extern int spk_reading_punc, spk_attrib_bleep, spk_bleeps;
 extern int spk_bleep_time, spk_bell_pos;
 extern int spk_spell_delay, spk_key_echo;
+extern int spk_cur_phonetic;
 extern short spk_punc_mask;
 extern short spk_pitch_shift, synth_flags;
 extern bool spk_quiet_boot;
diff --git a/drivers/accessibility/speakup/spk_types.h b/drivers/accessibility/speakup/spk_types.h
index 3a14d39bf896e..08011518a28ae 100644
--- a/drivers/accessibility/speakup/spk_types.h
+++ b/drivers/accessibility/speakup/spk_types.h
@@ -49,7 +49,7 @@ enum var_id_t {
 	RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG,
 	DIRECT, PAUSE,
 	CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, FLUSH,
-	MAXVARS
+	CUR_PHONETIC, MAXVARS
 };
 
 typedef int (*special_func)(struct vc_data *vc, u_char type, u_char ch,
diff --git a/drivers/accessibility/speakup/varhandlers.c b/drivers/accessibility/speakup/varhandlers.c
index e1c9f42e39f0f..462f8d879053c 100644
--- a/drivers/accessibility/speakup/varhandlers.c
+++ b/drivers/accessibility/speakup/varhandlers.c
@@ -48,6 +48,7 @@ static struct st_var_header var_headers[] = {
 	{ "chartab", CHARTAB, VAR_PROC, NULL, NULL },
 	{ "direct", DIRECT, VAR_NUM, NULL, NULL },
 	{ "pause", PAUSE, VAR_STRING, spk_str_pause, NULL },
+	{ "cur_phonetic", CUR_PHONETIC, VAR_NUM, &spk_cur_phonetic, NULL },
 };
 
 static struct st_var_header *var_ptrs[MAXVARS] = { NULL, NULL, NULL };
-- 
GitLab


From 7610615e8cdb3f6f5bbd9d8e7a5d8a63e3cabf2e Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 19 Nov 2022 11:57:21 +0800
Subject: [PATCH 2207/4122] test_firmware: fix memory leak in
 test_firmware_init()

When misc_register() failed in test_firmware_init(), the memory pointed
by test_fw_config->name is not released. The memory leak information is
as follows:
unreferenced object 0xffff88810a34cb00 (size 32):
  comm "insmod", pid 7952, jiffies 4294948236 (age 49.060s)
  hex dump (first 32 bytes):
    74 65 73 74 2d 66 69 72 6d 77 61 72 65 2e 62 69  test-firmware.bi
    6e 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  n...............
  backtrace:
    [<ffffffff81b21fcb>] __kmalloc_node_track_caller+0x4b/0xc0
    [<ffffffff81affb96>] kstrndup+0x46/0xc0
    [<ffffffffa0403a49>] __test_firmware_config_init+0x29/0x380 [test_firmware]
    [<ffffffffa040f068>] 0xffffffffa040f068
    [<ffffffff81002c41>] do_one_initcall+0x141/0x780
    [<ffffffff816a72c3>] do_init_module+0x1c3/0x630
    [<ffffffff816adb9e>] load_module+0x623e/0x76a0
    [<ffffffff816af471>] __do_sys_finit_module+0x181/0x240
    [<ffffffff89978f99>] do_syscall_64+0x39/0xb0
    [<ffffffff89a0008b>] entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: c92316bf8e94 ("test_firmware: add batched firmware tests")
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Link: https://lore.kernel.org/r/20221119035721.18268-1-shaozhengchao@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_firmware.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 0c714cdd51eff..e207bc08820d0 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -1491,6 +1491,7 @@ static int __init test_firmware_init(void)
 
 	rc = misc_register(&test_fw_misc_device);
 	if (rc) {
+		__test_firmware_config_free();
 		kfree(test_fw_config);
 		pr_err("could not register misc device: %d\n", rc);
 		return rc;
-- 
GitLab


From a4cb1004aeed2ab893a058fad00a5b41a12c4691 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:59:29 +0800
Subject: [PATCH 2208/4122] misc: ocxl: fix possible name leak in
 ocxl_file_register_afu()

If device_register() returns error in ocxl_file_register_afu(),
the name allocated by dev_set_name() need be freed. As comment
of device_register() says, it should use put_device() to give
up the reference in the error path. So fix this by calling
put_device(), then the name can be freed in kobject_cleanup(),
and info is freed in info_release().

Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111145929.2429271-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ocxl/file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index d46dba2df5a10..452d5777a0e4c 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -541,8 +541,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu)
 		goto err_put;
 
 	rc = device_register(&info->dev);
-	if (rc)
-		goto err_put;
+	if (rc) {
+		free_minor(info);
+		put_device(&info->dev);
+		return rc;
+	}
 
 	rc = ocxl_sysfs_register_afu(info);
 	if (rc)
-- 
GitLab


From 27158c72678b39ee01cc01de1aba6b51c71abe2f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 21 Nov 2022 23:43:39 +0800
Subject: [PATCH 2209/4122] ocxl: fix pci device refcount leak when calling
 get_function_0()

get_function_0() calls pci_get_domain_bus_and_slot(), as comment
says, it returns a pci device with refcount increment, so after
using it, pci_dev_put() needs be called.

Get the device reference when get_function_0() is not called, so
pci_dev_put() can be called in the error path and callers
unconditionally. And add comment above get_dvsec_vendor0() to tell
callers to call pci_dev_put().

Fixes: 87db7579ebd5 ("ocxl: control via sysfs whether the FPGA is reloaded on a link reset")
Suggested-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Link: https://lore.kernel.org/r/20221121154339.4088935-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ocxl/config.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index e401a51596b9c..92ab49705f645 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -193,6 +193,18 @@ static int read_dvsec_vendor(struct pci_dev *dev)
 	return 0;
 }
 
+/**
+ * get_dvsec_vendor0() - Find a related PCI device (function 0)
+ * @dev: PCI device to match
+ * @dev0: The PCI device (function 0) found
+ * @out_pos: The position of PCI device (function 0)
+ *
+ * Returns 0 on success, negative on failure.
+ *
+ * NOTE: If it's successful, the reference of dev0 is increased,
+ * so after using it, the callers must call pci_dev_put() to give
+ * up the reference.
+ */
 static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
 			     int *out_pos)
 {
@@ -202,10 +214,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
 		dev = get_function_0(dev);
 		if (!dev)
 			return -1;
+	} else {
+		dev = pci_dev_get(dev);
 	}
 	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
-	if (!pos)
+	if (!pos) {
+		pci_dev_put(dev);
 		return -1;
+	}
 	*dev0 = dev;
 	*out_pos = pos;
 	return 0;
@@ -222,6 +238,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
 
 	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
 			      &reset_reload);
+	pci_dev_put(dev0);
 	*val = !!(reset_reload & BIT(0));
 	return 0;
 }
@@ -243,6 +260,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
 		reset_reload &= ~BIT(0);
 	pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
 			       reset_reload);
+	pci_dev_put(dev0);
 	return 0;
 }
 
-- 
GitLab


From e68a558fb2af06daa38f86dad25061ddd90ab131 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 18 Nov 2022 02:22:06 +0100
Subject: [PATCH 2210/4122] speakup: Fix building as extmod
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

spk_priv_keyinfo.h should be opened from the speakup directory. When
building as extmod we should thus open it from the module directory
rather than the main Linux source.

Signed-off-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
Link: https://lore.kernel.org/r/20221118012206.j6hq6b6nfx2jhqg6@begin
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/accessibility/speakup/Makefile      | 4 +++-
 drivers/accessibility/speakup/makemapdata.c | 7 +++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/accessibility/speakup/Makefile b/drivers/accessibility/speakup/Makefile
index ba69b0803d428..6f6a83565c0de 100644
--- a/drivers/accessibility/speakup/Makefile
+++ b/drivers/accessibility/speakup/Makefile
@@ -40,7 +40,9 @@ hostprogs += makemapdata
 makemapdata-objs := makemapdata.o
 
 quiet_cmd_mkmap = MKMAP   $@
-      cmd_mkmap = TOPDIR=$(srctree) $(obj)/makemapdata > $@
+      cmd_mkmap = TOPDIR=$(srctree) \
+		  SPKDIR=$(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD),$(srctree)/drivers/accessibility/speakup) \
+		  $(obj)/makemapdata > $@
 
 $(obj)/mapdata.h: $(obj)/makemapdata
 	$(call cmd,mkmap)
diff --git a/drivers/accessibility/speakup/makemapdata.c b/drivers/accessibility/speakup/makemapdata.c
index 81db9ebf1fff8..d7d41bb9b05fb 100644
--- a/drivers/accessibility/speakup/makemapdata.c
+++ b/drivers/accessibility/speakup/makemapdata.c
@@ -51,12 +51,15 @@ main(int argc, char *argv[])
 {
 	int value, i;
 	struct st_key *this;
-	const char *dir_name;
+	const char *dir_name, *spk_dir_name;
 	char *cp;
 
 	dir_name = getenv("TOPDIR");
 	if (!dir_name)
 		dir_name = ".";
+	spk_dir_name = getenv("SPKDIR");
+	if (!spk_dir_name)
+		spk_dir_name = "drivers/accessibility/speakup";
 	bzero(key_table, sizeof(key_table));
 	add_key("shift",	1, is_shift);
 	add_key("altgr",	2, is_shift);
@@ -83,7 +86,7 @@ main(int argc, char *argv[])
 			add_key(def_name, value, is_input);
 	}
 
-	open_input(dir_name, "drivers/accessibility/speakup/spk_priv_keyinfo.h");
+	open_input(spk_dir_name, "spk_priv_keyinfo.h");
 	while (get_define()) {
 		if (strlen(def_val) > 5) {
 			//if (def_val[0] == '(')
-- 
GitLab


From 1dbb4f0235a450f22e518124cbf9b922802ce38f Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Nov 2022 18:29:56 +0200
Subject: [PATCH 2211/4122] virt: acrn: Mark the uuid field as unused

After the commits for userspace (see Link tags below) the uuid field
is not being used in the ACRN code. Update kernel to reflect these
changes, i.e. do the following:
- adding a comment explaining that it's not used anymore
- replacing the specific type by a raw buffer
- updating the example code accordingly

The advertised field confused users and actually never been used.
So the wrong part here is that kernel puts something which userspace
never used and hence this may confuse a reader of this code.

Note, that there is only a single tool that had been prepared a year
ago for these forthcoming changes in the kernel.

Link: https://github.com/projectacrn/acrn-hypervisor/commit/da0d24326ed6
Link: https://github.com/projectacrn/acrn-hypervisor/commit/bb0327e70097
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221116162956.72658-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/acrn.h | 5 ++---
 samples/acrn/vm-sample.c  | 3 ---
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h
index ccf47ed925008..7b714c1902eb2 100644
--- a/include/uapi/linux/acrn.h
+++ b/include/uapi/linux/acrn.h
@@ -12,7 +12,6 @@
 #define _UAPI_ACRN_H
 
 #include <linux/types.h>
-#include <linux/uuid.h>
 
 #define ACRN_IO_REQUEST_MAX		16
 
@@ -186,7 +185,7 @@ struct acrn_ioreq_notify {
  * @reserved0:		Reserved and must be 0
  * @vcpu_num:		Number of vCPU in the VM. Return from hypervisor.
  * @reserved1:		Reserved and must be 0
- * @uuid:		UUID of the VM. Pass to hypervisor directly.
+ * @uuid:		Empty space never to be used again (used to be UUID of the VM)
  * @vm_flag:		Flag of the VM creating. Pass to hypervisor directly.
  * @ioreq_buf:		Service VM GPA of I/O request buffer. Pass to
  *			hypervisor directly.
@@ -198,7 +197,7 @@ struct acrn_vm_creation {
 	__u16	reserved0;
 	__u16	vcpu_num;
 	__u16	reserved1;
-	guid_t	uuid;
+	__u8	uuid[16];
 	__u64	vm_flag;
 	__u64	ioreq_buf;
 	__u64	cpu_affinity;
diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c
index b2dad47a77a01..7abd68b20153b 100644
--- a/samples/acrn/vm-sample.c
+++ b/samples/acrn/vm-sample.c
@@ -29,8 +29,6 @@ static struct acrn_io_request *io_req_buf = (struct acrn_io_request *)io_request
 
 __u16 vcpu_num;
 __u16 vmid;
-/* POST_STANDARD_VM_UUID1, refer to https://github.com/projectacrn/acrn-hypervisor/blob/master/hypervisor/include/common/vm_uuids.h */
-guid_t vm_uuid = GUID_INIT(0x385479d2, 0xd625, 0xe811, 0x86, 0x4e, 0xcb, 0x7a, 0x18, 0xb3, 0x46, 0x43);
 
 int hsm_fd;
 int is_running = 1;
@@ -63,7 +61,6 @@ int main(int argc, char **argv)
 	}
 	hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC);
 
-	memcpy(&create_vm.uuid, &vm_uuid, 16);
 	create_vm.ioreq_buf = (__u64)io_req_buf;
 	ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_VM, &create_vm);
 	printf("Created VM! [%d]\n", ret);
-- 
GitLab


From fd2c930cf6a5b9176382c15f9acb1996e76e25ad Mon Sep 17 00:00:00 2001
From: ruanjinjie <ruanjinjie@huawei.com>
Date: Thu, 17 Nov 2022 14:47:25 +0800
Subject: [PATCH 2212/4122] misc: tifm: fix possible memory leak in
 tifm_7xx1_switch_media()

If device_register() returns error in tifm_7xx1_switch_media(),
name of kobject which is allocated in dev_set_name() called in device_add()
is leaked.

Never directly free @dev after calling device_register(), even
if it returned an error! Always use put_device() to give up the
reference initialized.

Fixes: 2428a8fe2261 ("tifm: move common device management tasks from tifm_7xx1 to tifm_core")
Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/r/20221117064725.3478402-1-ruanjinjie@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/tifm_7xx1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/misc/tifm_7xx1.c b/drivers/misc/tifm_7xx1.c
index 017c2f7d62871..7dd86a9858aba 100644
--- a/drivers/misc/tifm_7xx1.c
+++ b/drivers/misc/tifm_7xx1.c
@@ -190,7 +190,7 @@ static void tifm_7xx1_switch_media(struct work_struct *work)
 				spin_unlock_irqrestore(&fm->lock, flags);
 			}
 			if (sock)
-				tifm_free_device(&sock->dev);
+				put_device(&sock->dev);
 		}
 		spin_lock_irqsave(&fm->lock, flags);
 	}
-- 
GitLab


From 643a16a0eb1d6ac23744bb6e90a00fc21148a9dc Mon Sep 17 00:00:00 2001
From: Zheng Wang <zyytlz.wz@163.com>
Date: Thu, 10 Nov 2022 11:50:33 +0800
Subject: [PATCH 2213/4122] misc: sgi-gru: fix use-after-free error in
 gru_set_context_option, gru_fault and gru_handle_user_call_os

In some bad situation, the gts may be freed gru_check_chiplet_assignment.
The call chain can be gru_unload_context->gru_free_gru_context->gts_drop
and kfree finally. However, the caller didn't know if the gts is freed
or not and use it afterwards. This will trigger a Use after Free bug.

Fix it by introducing a return value to see if it's in error path or not.
Free the gts in caller if gru_check_chiplet_assignment check failed.

Fixes: 55484c45dbec ("gru: allow users to specify gru chiplet 2")
Signed-off-by: Zheng Wang <zyytlz.wz@163.com>
Acked-by: Dimitri Sivanich <sivanich@hpe.com>
Link: https://lore.kernel.org/r/20221110035033.19498-1-zyytlz.wz@163.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/sgi-gru/grufault.c  | 13 +++++++++++--
 drivers/misc/sgi-gru/grumain.c   | 22 ++++++++++++++++++----
 drivers/misc/sgi-gru/grutables.h |  2 +-
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/sgi-gru/grufault.c b/drivers/misc/sgi-gru/grufault.c
index d7ef61e602ede..b836936e97471 100644
--- a/drivers/misc/sgi-gru/grufault.c
+++ b/drivers/misc/sgi-gru/grufault.c
@@ -648,6 +648,7 @@ int gru_handle_user_call_os(unsigned long cb)
 	if ((cb & (GRU_HANDLE_STRIDE - 1)) || ucbnum >= GRU_NUM_CB)
 		return -EINVAL;
 
+again:
 	gts = gru_find_lock_gts(cb);
 	if (!gts)
 		return -EINVAL;
@@ -656,7 +657,11 @@ int gru_handle_user_call_os(unsigned long cb)
 	if (ucbnum >= gts->ts_cbr_au_count * GRU_CBR_AU_SIZE)
 		goto exit;
 
-	gru_check_context_placement(gts);
+	if (gru_check_context_placement(gts)) {
+		gru_unlock_gts(gts);
+		gru_unload_context(gts, 1);
+		goto again;
+	}
 
 	/*
 	 * CCH may contain stale data if ts_force_cch_reload is set.
@@ -874,7 +879,11 @@ int gru_set_context_option(unsigned long arg)
 		} else {
 			gts->ts_user_blade_id = req.val1;
 			gts->ts_user_chiplet_id = req.val0;
-			gru_check_context_placement(gts);
+			if (gru_check_context_placement(gts)) {
+				gru_unlock_gts(gts);
+				gru_unload_context(gts, 1);
+				return ret;
+			}
 		}
 		break;
 	case sco_gseg_owner:
diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c
index 6706ef3c59776..4eb4b94551390 100644
--- a/drivers/misc/sgi-gru/grumain.c
+++ b/drivers/misc/sgi-gru/grumain.c
@@ -716,9 +716,10 @@ static int gru_check_chiplet_assignment(struct gru_state *gru,
  * chiplet. Misassignment can occur if the process migrates to a different
  * blade or if the user changes the selected blade/chiplet.
  */
-void gru_check_context_placement(struct gru_thread_state *gts)
+int gru_check_context_placement(struct gru_thread_state *gts)
 {
 	struct gru_state *gru;
+	int ret = 0;
 
 	/*
 	 * If the current task is the context owner, verify that the
@@ -726,15 +727,23 @@ void gru_check_context_placement(struct gru_thread_state *gts)
 	 * references. Pthread apps use non-owner references to the CBRs.
 	 */
 	gru = gts->ts_gru;
+	/*
+	 * If gru or gts->ts_tgid_owner isn't initialized properly, return
+	 * success to indicate that the caller does not need to unload the
+	 * gru context.The caller is responsible for their inspection and
+	 * reinitialization if needed.
+	 */
 	if (!gru || gts->ts_tgid_owner != current->tgid)
-		return;
+		return ret;
 
 	if (!gru_check_chiplet_assignment(gru, gts)) {
 		STAT(check_context_unload);
-		gru_unload_context(gts, 1);
+		ret = -EINVAL;
 	} else if (gru_retarget_intr(gts)) {
 		STAT(check_context_retarget_intr);
 	}
+
+	return ret;
 }
 
 
@@ -934,7 +943,12 @@ again:
 	mutex_lock(&gts->ts_ctxlock);
 	preempt_disable();
 
-	gru_check_context_placement(gts);
+	if (gru_check_context_placement(gts)) {
+		preempt_enable();
+		mutex_unlock(&gts->ts_ctxlock);
+		gru_unload_context(gts, 1);
+		return VM_FAULT_NOPAGE;
+	}
 
 	if (!gts->ts_gru) {
 		STAT(load_user_context);
diff --git a/drivers/misc/sgi-gru/grutables.h b/drivers/misc/sgi-gru/grutables.h
index 8c52776db2341..640daf1994df7 100644
--- a/drivers/misc/sgi-gru/grutables.h
+++ b/drivers/misc/sgi-gru/grutables.h
@@ -632,7 +632,7 @@ extern int gru_user_flush_tlb(unsigned long arg);
 extern int gru_user_unload_context(unsigned long arg);
 extern int gru_get_exception_detail(unsigned long arg);
 extern int gru_set_context_option(unsigned long address);
-extern void gru_check_context_placement(struct gru_thread_state *gts);
+extern int gru_check_context_placement(struct gru_thread_state *gts);
 extern int gru_cpu_fault_map_id(void);
 extern struct vm_area_struct *gru_find_vma(unsigned long vaddr);
 extern void gru_flush_all_tlb(struct gru_state *gru);
-- 
GitLab


From 7198cf0f1ca90581f27452664216a662ad72aed5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:48 +0100
Subject: [PATCH 2214/4122] misc: lis3lv02d/lis3lv02d_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-495-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/lis3lv02d/lis3lv02d_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
index d7daa01fe7ca9..7071412d6bf63 100644
--- a/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
+++ b/drivers/misc/lis3lv02d/lis3lv02d_i2c.c
@@ -100,8 +100,7 @@ static const struct of_device_id lis3lv02d_i2c_dt_ids[] = {
 MODULE_DEVICE_TABLE(of, lis3lv02d_i2c_dt_ids);
 #endif
 
-static int lis3lv02d_i2c_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int lis3lv02d_i2c_probe(struct i2c_client *client)
 {
 	int ret = 0;
 	struct lis3lv02d_platform_data *pdata = client->dev.platform_data;
@@ -263,7 +262,7 @@ static struct i2c_driver lis3lv02d_i2c_driver = {
 		.pm     = &lis3_pm_ops,
 		.of_match_table = of_match_ptr(lis3lv02d_i2c_dt_ids),
 	},
-	.probe	= lis3lv02d_i2c_probe,
+	.probe_new = lis3lv02d_i2c_probe,
 	.remove	= lis3lv02d_i2c_remove,
 	.id_table = lis3lv02d_id,
 };
-- 
GitLab


From 59ee8ca4eeda35d850e4c81ec3065dba10023842 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:41 +0100
Subject: [PATCH 2215/4122] misc: eeprom/eeprom: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-488-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/eeprom.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index 8a841a75d8931..32611100d5cd3 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -141,8 +141,7 @@ static int eeprom_detect(struct i2c_client *client, struct i2c_board_info *info)
 	return 0;
 }
 
-static int eeprom_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int eeprom_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct eeprom_data *data;
@@ -197,7 +196,7 @@ static struct i2c_driver eeprom_driver = {
 	.driver = {
 		.name	= "eeprom",
 	},
-	.probe		= eeprom_probe,
+	.probe_new	= eeprom_probe,
 	.remove		= eeprom_remove,
 	.id_table	= eeprom_id,
 
-- 
GitLab


From 8427bd8bdee8f35797cade56fe173fbea990e38c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:49 +0100
Subject: [PATCH 2216/4122] misc: tsl2550: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-496-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/tsl2550.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/tsl2550.c b/drivers/misc/tsl2550.c
index 1652fb9b3856c..6c62b94e0acdb 100644
--- a/drivers/misc/tsl2550.c
+++ b/drivers/misc/tsl2550.c
@@ -331,8 +331,7 @@ static int tsl2550_init_client(struct i2c_client *client)
  */
 
 static struct i2c_driver tsl2550_driver;
-static int tsl2550_probe(struct i2c_client *client,
-				   const struct i2c_device_id *id)
+static int tsl2550_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct tsl2550_data *data;
@@ -438,7 +437,7 @@ static struct i2c_driver tsl2550_driver = {
 		.of_match_table = tsl2550_of_match,
 		.pm	= TSL2550_PM_OPS,
 	},
-	.probe	= tsl2550_probe,
+	.probe_new = tsl2550_probe,
 	.remove	= tsl2550_remove,
 	.id_table = tsl2550_id,
 };
-- 
GitLab


From 327e1ad186d91b12b6ece0b21178c07edef01806 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:47 +0100
Subject: [PATCH 2217/4122] misc: isl29020: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-494-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/isl29020.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/isl29020.c b/drivers/misc/isl29020.c
index c6f2a94f501ac..3be02093368cf 100644
--- a/drivers/misc/isl29020.c
+++ b/drivers/misc/isl29020.c
@@ -151,8 +151,7 @@ static int als_set_default_config(struct i2c_client *client)
 	return 0;
 }
 
-static int  isl29020_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int  isl29020_probe(struct i2c_client *client)
 {
 	int res;
 
@@ -215,7 +214,7 @@ static struct i2c_driver isl29020_driver = {
 		.name = "isl29020",
 		.pm = ISL29020_PM_OPS,
 	},
-	.probe = isl29020_probe,
+	.probe_new = isl29020_probe,
 	.remove = isl29020_remove,
 	.id_table = isl29020_id,
 };
-- 
GitLab


From 99b0cb3f5f8d67f4552c24d9b0aa6cda38f558aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:43 +0100
Subject: [PATCH 2218/4122] misc: eeprom/max6875: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-490-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/max6875.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
index 6bd4f4339af4a..79cf8afcef2e8 100644
--- a/drivers/misc/eeprom/max6875.c
+++ b/drivers/misc/eeprom/max6875.c
@@ -130,8 +130,7 @@ static const struct bin_attribute user_eeprom_attr = {
 	.read = max6875_read,
 };
 
-static int max6875_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int max6875_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct max6875_data *data;
@@ -193,7 +192,7 @@ static struct i2c_driver max6875_driver = {
 	.driver = {
 		.name	= "max6875",
 	},
-	.probe		= max6875_probe,
+	.probe_new	= max6875_probe,
 	.remove		= max6875_remove,
 	.id_table	= max6875_id,
 };
-- 
GitLab


From 654700c9fc2860d33d57b42fd39cae2310dbc2ba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:44 +0100
Subject: [PATCH 2219/4122] misc: hmc6352: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-491-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/hmc6352.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/hmc6352.c b/drivers/misc/hmc6352.c
index 42b9adef28a3c..8967940ecd1e8 100644
--- a/drivers/misc/hmc6352.c
+++ b/drivers/misc/hmc6352.c
@@ -101,8 +101,7 @@ static const struct attribute_group m_compass_gr = {
 	.attrs = mid_att_compass
 };
 
-static int hmc6352_probe(struct i2c_client *client,
-					const struct i2c_device_id *id)
+static int hmc6352_probe(struct i2c_client *client)
 {
 	int res;
 
@@ -132,7 +131,7 @@ static struct i2c_driver hmc6352_driver = {
 	.driver = {
 		.name = "hmc6352",
 	},
-	.probe = hmc6352_probe,
+	.probe_new = hmc6352_probe,
 	.remove = hmc6352_remove,
 	.id_table = hmc6352_id,
 };
-- 
GitLab


From 9c18dad44dc1de202a69c8ccef983e6070740acd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:45 +0100
Subject: [PATCH 2220/4122] misc: ics932s401: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-492-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ics932s401.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/misc/ics932s401.c b/drivers/misc/ics932s401.c
index 1cb71df966a4c..12108a7b9b40d 100644
--- a/drivers/misc/ics932s401.c
+++ b/drivers/misc/ics932s401.c
@@ -89,8 +89,7 @@ struct ics932s401_data {
 	u8			regs[NUM_REGS];
 };
 
-static int ics932s401_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id);
+static int ics932s401_probe(struct i2c_client *client);
 static int ics932s401_detect(struct i2c_client *client,
 			  struct i2c_board_info *info);
 static void ics932s401_remove(struct i2c_client *client);
@@ -106,7 +105,7 @@ static struct i2c_driver ics932s401_driver = {
 	.driver = {
 		.name	= "ics932s401",
 	},
-	.probe		= ics932s401_probe,
+	.probe_new	= ics932s401_probe,
 	.remove		= ics932s401_remove,
 	.id_table	= ics932s401_id,
 	.detect		= ics932s401_detect,
@@ -429,8 +428,7 @@ static int ics932s401_detect(struct i2c_client *client,
 	return 0;
 }
 
-static int ics932s401_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int ics932s401_probe(struct i2c_client *client)
 {
 	struct ics932s401_data *data;
 	int err;
-- 
GitLab


From db687ce71845aeb639be7452f4d8a272cf190cd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:46 +0100
Subject: [PATCH 2221/4122] misc: isl29003: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-493-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/isl29003.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/isl29003.c b/drivers/misc/isl29003.c
index 8ab61be79c762..aeda2fa89e61f 100644
--- a/drivers/misc/isl29003.c
+++ b/drivers/misc/isl29003.c
@@ -374,8 +374,7 @@ static int isl29003_init_client(struct i2c_client *client)
  * I2C layer
  */
 
-static int isl29003_probe(struct i2c_client *client,
-				    const struct i2c_device_id *id)
+static int isl29003_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	struct isl29003_data *data;
@@ -460,7 +459,7 @@ static struct i2c_driver isl29003_driver = {
 		.name	= ISL29003_DRV_NAME,
 		.pm	= ISL29003_PM_OPS,
 	},
-	.probe	= isl29003_probe,
+	.probe_new = isl29003_probe,
 	.remove	= isl29003_remove,
 	.id_table = isl29003_id,
 };
-- 
GitLab


From 244179dbe11e707a0ef596246a9b80327492fc35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:42 +0100
Subject: [PATCH 2222/4122] misc: eeprom/idt_89hpesx: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-489-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/eeprom/idt_89hpesx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/eeprom/idt_89hpesx.c b/drivers/misc/eeprom/idt_89hpesx.c
index bb3ed352b95f9..4e07ee9cb500e 100644
--- a/drivers/misc/eeprom/idt_89hpesx.c
+++ b/drivers/misc/eeprom/idt_89hpesx.c
@@ -1366,7 +1366,7 @@ static void idt_remove_dbgfs_files(struct idt_89hpesx_dev *pdev)
 /*
  * idt_probe() - IDT 89HPESx driver probe() callback method
  */
-static int idt_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int idt_probe(struct i2c_client *client)
 {
 	struct idt_89hpesx_dev *pdev;
 	int ret;
@@ -1556,7 +1556,7 @@ static struct i2c_driver idt_driver = {
 		.name = IDT_NAME,
 		.of_match_table = idt_of_match,
 	},
-	.probe = idt_probe,
+	.probe_new = idt_probe,
 	.remove = idt_remove,
 	.id_table = idt_ids,
 };
-- 
GitLab


From 6757c6480d7f34cb272d28339dfac096b94c8638 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:38 +0100
Subject: [PATCH 2223/4122] misc: apds990x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-485-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/apds990x.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/apds990x.c b/drivers/misc/apds990x.c
index e2100cc42ce86..0024503ea6dbb 100644
--- a/drivers/misc/apds990x.c
+++ b/drivers/misc/apds990x.c
@@ -1051,8 +1051,7 @@ static const struct attribute_group apds990x_attribute_group[] = {
 	{.attrs = sysfs_attrs_ctrl },
 };
 
-static int apds990x_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int apds990x_probe(struct i2c_client *client)
 {
 	struct apds990x_chip *chip;
 	int err;
@@ -1272,7 +1271,7 @@ static struct i2c_driver apds990x_driver = {
 		.name	= "apds990x",
 		.pm	= &apds990x_pm_ops,
 	},
-	.probe	  = apds990x_probe,
+	.probe_new = apds990x_probe,
 	.remove	  = apds990x_remove,
 	.id_table = apds990x_id,
 };
-- 
GitLab


From 9f28b675c160519c79daed9f73bc38ab3d6c9015 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:37 +0100
Subject: [PATCH 2224/4122] misc: apds9802als: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-484-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/apds9802als.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/apds9802als.c b/drivers/misc/apds9802als.c
index a32431f4b370a..0526c55d5cd5c 100644
--- a/drivers/misc/apds9802als.c
+++ b/drivers/misc/apds9802als.c
@@ -212,8 +212,7 @@ static int als_set_default_config(struct i2c_client *client)
 	return ret_val;
 }
 
-static int apds9802als_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int apds9802als_probe(struct i2c_client *client)
 {
 	int res;
 	struct als_data *data;
@@ -297,7 +296,7 @@ static struct i2c_driver apds9802als_driver = {
 		.name = DRIVER_NAME,
 		.pm = APDS9802ALS_PM_OPS,
 	},
-	.probe = apds9802als_probe,
+	.probe_new = apds9802als_probe,
 	.remove = apds9802als_remove,
 	.id_table = apds9802als_id,
 };
-- 
GitLab


From 781edb0530a1009f89e7888726ca87b255d2526b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:39 +0100
Subject: [PATCH 2225/4122] misc: bh1770glc: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-486-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/bh1770glc.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/bh1770glc.c b/drivers/misc/bh1770glc.c
index d0dfa674414c3..bedbe0efb330e 100644
--- a/drivers/misc/bh1770glc.c
+++ b/drivers/misc/bh1770glc.c
@@ -1162,8 +1162,7 @@ static const struct attribute_group bh1770_attribute_group = {
 	.attrs = sysfs_attrs
 };
 
-static int bh1770_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int bh1770_probe(struct i2c_client *client)
 {
 	struct bh1770_chip *chip;
 	int err;
@@ -1379,7 +1378,7 @@ static struct i2c_driver bh1770_driver = {
 		.name	= "bh1770glc",
 		.pm	= &bh1770_pm_ops,
 	},
-	.probe	  = bh1770_probe,
+	.probe_new = bh1770_probe,
 	.remove	  = bh1770_remove,
 	.id_table = bh1770_id,
 };
-- 
GitLab


From 3127a86a3702bd3a2ff43503d49919d666739ae8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:43:40 +0100
Subject: [PATCH 2226/4122] misc: ds1682: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-487-uwe@kleine-koenig.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/ds1682.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
index 0698ddc5f4d54..d517eed32971c 100644
--- a/drivers/misc/ds1682.c
+++ b/drivers/misc/ds1682.c
@@ -200,8 +200,7 @@ static const struct bin_attribute ds1682_eeprom_attr = {
 /*
  * Called when a ds1682 device is matched with this driver
  */
-static int ds1682_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ds1682_probe(struct i2c_client *client)
 {
 	int rc;
 
@@ -251,7 +250,7 @@ static struct i2c_driver ds1682_driver = {
 		.name = "ds1682",
 		.of_match_table = ds1682_of_match,
 	},
-	.probe = ds1682_probe,
+	.probe_new = ds1682_probe,
 	.remove = ds1682_remove,
 	.id_table = ds1682_id,
 };
-- 
GitLab


From 7b51161696e803fd5f9ad55b20a64c2df313f95c Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 17 Nov 2022 15:06:36 +0800
Subject: [PATCH 2227/4122] firmware: raspberrypi: fix possible memory leak in
 rpi_firmware_probe()

In rpi_firmware_probe(), if mbox_request_channel() fails, the 'fw' will
not be freed through rpi_firmware_delete(), fix this leak by calling
kfree() in the error path.

Fixes: 1e7c57355a3b ("firmware: raspberrypi: Keep count of all consumers")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221117070636.3849773-1-yangyingliang@huawei.com
Acked-by: Joel Savitz <jsavitz@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/raspberrypi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/firmware/raspberrypi.c b/drivers/firmware/raspberrypi.c
index 4b8978b254f9a..dba315f675bc7 100644
--- a/drivers/firmware/raspberrypi.c
+++ b/drivers/firmware/raspberrypi.c
@@ -272,6 +272,7 @@ static int rpi_firmware_probe(struct platform_device *pdev)
 		int ret = PTR_ERR(fw->chan);
 		if (ret != -EPROBE_DEFER)
 			dev_err(dev, "Failed to get mbox channel: %d\n", ret);
+		kfree(fw);
 		return ret;
 	}
 
-- 
GitLab


From ab760791c0cfbb1d7a668f46a135264f56c8f018 Mon Sep 17 00:00:00 2001
From: D Scott Phillips <scott@os.amperecomputing.com>
Date: Mon, 14 Nov 2022 13:22:12 -0800
Subject: [PATCH 2228/4122] char: misc: Increase the maximum number of dynamic
 misc devices to 1048448

On AmpereOne, 128 dynamic misc devices is not enough for the per-cpu
coresight_tmc devices.  Switch the dynamic minors allocator to an ida and
add logic to allocate in the ranges [0..127] and [256..1048575], leaving
[128..255] for static misc devices.  Dynamic allocations start from 127
growing downwards and then increasing from 256, so device numbering for the
first 128 devices remain the same as before.

Signed-off-by: D Scott Phillips <scott@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20221114212212.9279-1-scott@os.amperecomputing.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/char/misc.c | 41 ++++++++++++++++++++++++++++-------------
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index cba19bfdc44dd..05727f0daa6b5 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -61,7 +61,29 @@ static DEFINE_MUTEX(misc_mtx);
  * Assigned numbers, used for dynamic minors
  */
 #define DYNAMIC_MINORS 128 /* like dynamic majors */
-static DECLARE_BITMAP(misc_minors, DYNAMIC_MINORS);
+static DEFINE_IDA(misc_minors_ida);
+
+static int misc_minor_alloc(void)
+{
+	int ret;
+
+	ret = ida_alloc_max(&misc_minors_ida, DYNAMIC_MINORS - 1, GFP_KERNEL);
+	if (ret >= 0) {
+		ret = DYNAMIC_MINORS - ret - 1;
+	} else {
+		ret = ida_alloc_range(&misc_minors_ida, MISC_DYNAMIC_MINOR + 1,
+				      MINORMASK, GFP_KERNEL);
+	}
+	return ret;
+}
+
+static void misc_minor_free(int minor)
+{
+	if (minor < DYNAMIC_MINORS)
+		ida_free(&misc_minors_ida, DYNAMIC_MINORS - minor - 1);
+	else if (minor > MISC_DYNAMIC_MINOR)
+		ida_free(&misc_minors_ida, minor);
+}
 
 #ifdef CONFIG_PROC_FS
 static void *misc_seq_start(struct seq_file *seq, loff_t *pos)
@@ -183,14 +205,13 @@ int misc_register(struct miscdevice *misc)
 	mutex_lock(&misc_mtx);
 
 	if (is_dynamic) {
-		int i = find_first_zero_bit(misc_minors, DYNAMIC_MINORS);
+		int i = misc_minor_alloc();
 
-		if (i >= DYNAMIC_MINORS) {
+		if (i < 0) {
 			err = -EBUSY;
 			goto out;
 		}
-		misc->minor = DYNAMIC_MINORS - i - 1;
-		set_bit(i, misc_minors);
+		misc->minor = i;
 	} else {
 		struct miscdevice *c;
 
@@ -209,10 +230,7 @@ int misc_register(struct miscdevice *misc)
 					  misc, misc->groups, "%s", misc->name);
 	if (IS_ERR(misc->this_device)) {
 		if (is_dynamic) {
-			int i = DYNAMIC_MINORS - misc->minor - 1;
-
-			if (i < DYNAMIC_MINORS && i >= 0)
-				clear_bit(i, misc_minors);
+			misc_minor_free(misc->minor);
 			misc->minor = MISC_DYNAMIC_MINOR;
 		}
 		err = PTR_ERR(misc->this_device);
@@ -240,16 +258,13 @@ EXPORT_SYMBOL(misc_register);
 
 void misc_deregister(struct miscdevice *misc)
 {
-	int i = DYNAMIC_MINORS - misc->minor - 1;
-
 	if (WARN_ON(list_empty(&misc->list)))
 		return;
 
 	mutex_lock(&misc_mtx);
 	list_del(&misc->list);
 	device_destroy(misc_class, MKDEV(MISC_MAJOR, misc->minor));
-	if (i < DYNAMIC_MINORS && i >= 0)
-		clear_bit(i, misc_minors);
+	misc_minor_free(misc->minor);
 	mutex_unlock(&misc_mtx);
 }
 EXPORT_SYMBOL(misc_deregister);
-- 
GitLab


From fa1ba41c17cd786925720bc1a9554d6c6624923d Mon Sep 17 00:00:00 2001
From: Peng Wu <wupeng58@huawei.com>
Date: Tue, 15 Nov 2022 09:11:38 +0000
Subject: [PATCH 2229/4122] firmware: google: fix a NULL vs IS_ERR() check in
 cbmem_entry_probe()

The devm_memremap() function returns error pointers on error,
it doesn't return NULL.

Fixes: 19d54020883c ("firmware: google: Implement cbmem in sysfs driver")
Signed-off-by: Peng Wu <wupeng58@huawei.com>
Reviewed-by: Guenter Roeck <groeck@chromium.org>
Reviewed-by: Jack Rosenthal <jrosenth@chromium.org>
Link: https://lore.kernel.org/r/20221115091138.51614-1-wupeng58@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/google/cbmem.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/firmware/google/cbmem.c b/drivers/firmware/google/cbmem.c
index 685f3070ce9d8..88e587ba1e0da 100644
--- a/drivers/firmware/google/cbmem.c
+++ b/drivers/firmware/google/cbmem.c
@@ -106,8 +106,8 @@ static int cbmem_entry_probe(struct coreboot_device *dev)
 	entry->mem_file_buf = devm_memremap(&dev->dev, dev->cbmem_entry.address,
 					    dev->cbmem_entry.entry_size,
 					    MEMREMAP_WB);
-	if (!entry->mem_file_buf)
-		return -ENOMEM;
+	if (IS_ERR(entry->mem_file_buf))
+		return PTR_ERR(entry->mem_file_buf);
 
 	entry->size = dev->cbmem_entry.entry_size;
 
-- 
GitLab


From 61c80d1c3833e196256fb060382db94f24d3d9a7 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:54:39 +0800
Subject: [PATCH 2230/4122] cxl: fix possible null-ptr-deref in
 cxl_guest_init_afu|adapter()

If device_register() fails in cxl_register_afu|adapter(), the device
is not added, device_unregister() can not be called in the error path,
otherwise it will cause a null-ptr-deref because of removing not added
device.

As comment of device_register() says, it should use put_device() to give
up the reference in the error path. So split device_unregister() into
device_del() and put_device(), then goes to put dev when register fails.

Fixes: 14baf4d9c739 ("cxl: Add guest-specific code")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111145440.2426970-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/cxl/guest.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 375f692ae9d68..fb95a2d5cef48 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -965,10 +965,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 	 * if it returns an error!
 	 */
 	if ((rc = cxl_register_afu(afu)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_afu_add(afu)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/*
 	 * pHyp doesn't expose the programming models supported by the
@@ -984,7 +984,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 		afu->modes_supported = CXL_MODE_DIRECTED;
 
 	if ((rc = cxl_afu_select_best_mode(afu)))
-		goto err_put2;
+		goto err_remove_sysfs;
 
 	adapter->afu[afu->slice] = afu;
 
@@ -1004,10 +1004,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 
 	return 0;
 
-err_put2:
+err_remove_sysfs:
 	cxl_sysfs_afu_remove(afu);
-err_put1:
-	device_unregister(&afu->dev);
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
+	put_device(&afu->dev);
 	free = false;
 	guest_release_serr_irq(afu);
 err2:
@@ -1141,18 +1143,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
 	 * even if it returns an error!
 	 */
 	if ((rc = cxl_register_adapter(adapter)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_adapter_add(adapter)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/* release the context lock as the adapter is configured */
 	cxl_adapter_context_unlock(adapter);
 
 	return adapter;
 
-err_put1:
-	device_unregister(&adapter->dev);
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
+	put_device(&adapter->dev);
 	free = false;
 	cxl_guest_remove_chardev(adapter);
 err1:
-- 
GitLab


From 02cd3032b154fa02fdf90e7467abaeed889330b2 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:54:40 +0800
Subject: [PATCH 2231/4122] cxl: fix possible null-ptr-deref in
 cxl_pci_init_afu|adapter()

If device_register() fails in cxl_pci_afu|adapter(), the device
is not added, device_unregister() can not be called in the error
path, otherwise it will cause a null-ptr-deref because of removing
not added device.

As comment of device_register() says, it should use put_device() to give
up the reference in the error path. So split device_unregister() into
device_del() and put_device(), then goes to put dev when register fails.

Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Link: https://lore.kernel.org/r/20221111145440.2426970-2-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/cxl/pci.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 3de0aea62ade4..6d495d641c951 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1164,10 +1164,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 	 * if it returns an error!
 	 */
 	if ((rc = cxl_register_afu(afu)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_afu_add(afu)))
-		goto err_put1;
+		goto err_del_dev;
 
 	adapter->afu[afu->slice] = afu;
 
@@ -1176,10 +1176,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 
 	return 0;
 
-err_put1:
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
 	pci_deconfigure_afu(afu);
 	cxl_debugfs_afu_remove(afu);
-	device_unregister(&afu->dev);
+	put_device(&afu->dev);
 	return rc;
 
 err_free_native:
@@ -1667,23 +1669,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
 	 * even if it returns an error!
 	 */
 	if ((rc = cxl_register_adapter(adapter)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_adapter_add(adapter)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/* Release the context lock as adapter is configured */
 	cxl_adapter_context_unlock(adapter);
 
 	return adapter;
 
-err_put1:
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
 	/* This should mirror cxl_remove_adapter, except without the
 	 * sysfs parts
 	 */
 	cxl_debugfs_adapter_remove(adapter);
 	cxl_deconfigure_adapter(adapter);
-	device_unregister(&adapter->dev);
+	put_device(&adapter->dev);
 	return ERR_PTR(rc);
 
 err_release:
-- 
GitLab


From 2613cc29c5723881ca603b1a3b50f0107010d5d6 Mon Sep 17 00:00:00 2001
From: Pierre Gondois <pierre.gondois@arm.com>
Date: Wed, 16 Nov 2022 10:49:58 +0100
Subject: [PATCH 2232/4122] cacheinfo: Remove of_node_put() for fw_token

fw_token is used for DT/ACPI systems to identify CPUs sharing caches.
For DT based systems, fw_token is set to a pointer to a DT node.

commit 3da72e18371c ("cacheinfo: Decrement refcount in
cache_setup_of_node()")
doesn't increment the refcount of fw_token anymore in
cache_setup_of_node(). fw_token is indeed used as a token and not
as a (struct device_node*), so no reference to fw_token should be
kept.

However, [1] is triggered when hotplugging a CPU multiple times
since cache_shared_cpu_map_remove() decrements the refcount to
fw_token at each CPU unplugging, eventually reaching 0.

Remove of_node_put() for fw_token in cache_shared_cpu_map_remove().

[1]
------------[ cut here ]------------
refcount_t: saturated; leaking memory.
WARNING: CPU: 4 PID: 32 at lib/refcount.c:22 refcount_warn_saturate (lib/refcount.c:22 (discriminator 3))
Modules linked in:
CPU: 4 PID: 32 Comm: cpuhp/4 Tainted: G        W          6.1.0-rc1-14091-g9fdf2ca7b9c8 #76
Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Oct 31 2022
pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : refcount_warn_saturate (lib/refcount.c:22 (discriminator 3))
lr : refcount_warn_saturate (lib/refcount.c:22 (discriminator 3))
[...]
Call trace:
[...]
of_node_release (drivers/of/dynamic.c:335)
kobject_put (lib/kobject.c:677 lib/kobject.c:704 ./include/linux/kref.h:65 lib/kobject.c:721)
of_node_put (drivers/of/dynamic.c:49)
free_cache_attributes.part.0 (drivers/base/cacheinfo.c:712)
cacheinfo_cpu_pre_down (drivers/base/cacheinfo.c:718)
cpuhp_invoke_callback (kernel/cpu.c:247 (discriminator 4))
cpuhp_thread_fun (kernel/cpu.c:785)
smpboot_thread_fn (kernel/smpboot.c:164 (discriminator 3))
kthread (kernel/kthread.c:376)
ret_from_fork (arch/arm64/kernel/entry.S:861)
---[ end trace 0000000000000000 ]---

Fixes: 3da72e18371c ("cacheinfo: Decrement refcount in cache_setup_of_node()")
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Tested-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
Link: https://lore.kernel.org/r/20221116094958.2141072-1-pierre.gondois@arm.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/cacheinfo.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
index 04317cde800cb..950b22cdb5f7c 100644
--- a/drivers/base/cacheinfo.c
+++ b/drivers/base/cacheinfo.c
@@ -317,8 +317,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu)
 			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
 			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
 		}
-		if (of_have_populated_dt())
-			of_node_put(this_leaf->fw_token);
 	}
 }
 
-- 
GitLab


From 20228a1d5a55e7db0c6720840f2c7d2b48c55f69 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 20 Sep 2022 13:28:07 +0200
Subject: [PATCH 2233/4122] iio: adc: ad_sigma_delta: do not use internal
 iio_dev lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop 'mlock' usage by making use of iio_device_claim_direct_mode().
This change actually makes sure we cannot do a single conversion while
buffering is enable. Note there was a potential race in the previous
code since we were only acquiring the lock after checking if the bus is
enabled.

Fixes: af3008485ea0 ("iio:adc: Add common code for ADI Sigma Delta devices")
Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: <Stable@vger.kernel.org> #No rush as race is very old.
Link: https://lore.kernel.org/r/20220920112821.975359-2-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad_sigma_delta.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/adc/ad_sigma_delta.c b/drivers/iio/adc/ad_sigma_delta.c
index 261a9a6b45e15..d8570f620785a 100644
--- a/drivers/iio/adc/ad_sigma_delta.c
+++ b/drivers/iio/adc/ad_sigma_delta.c
@@ -281,10 +281,10 @@ int ad_sigma_delta_single_conversion(struct iio_dev *indio_dev,
 	unsigned int data_reg;
 	int ret = 0;
 
-	if (iio_buffer_enabled(indio_dev))
-		return -EBUSY;
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
 
-	mutex_lock(&indio_dev->mlock);
 	ad_sigma_delta_set_channel(sigma_delta, chan->address);
 
 	spi_bus_lock(sigma_delta->spi->master);
@@ -323,7 +323,7 @@ out:
 	ad_sigma_delta_set_mode(sigma_delta, AD_SD_MODE_IDLE);
 	sigma_delta->bus_locked = false;
 	spi_bus_unlock(sigma_delta->spi->master);
-	mutex_unlock(&indio_dev->mlock);
+	iio_device_release_direct_mode(indio_dev);
 
 	if (ret)
 		return ret;
-- 
GitLab


From 2a22b40aea42dd516e669257ab5faf10396c2fad Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Wed, 21 Sep 2022 02:39:15 +0000
Subject: [PATCH 2234/4122] iio: accel: bma400: Switch to use dev_err_probe()
 helper

In the probe path, dev_err() can be replace with dev_err_probe()
which will check if error code is -EPROBE_DEFER and prints the
error name.

Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20220921023915.47300-1-yuancan@huawei.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bma400_core.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/accel/bma400_core.c b/drivers/iio/accel/bma400_core.c
index ad8fce3e08cd0..e8de88e6cfb97 100644
--- a/drivers/iio/accel/bma400_core.c
+++ b/drivers/iio/accel/bma400_core.c
@@ -886,14 +886,10 @@ static int bma400_init(struct bma400_data *data)
 	ret = devm_regulator_bulk_get(data->dev,
 				      ARRAY_SIZE(data->regulators),
 				      data->regulators);
-	if (ret) {
-		if (ret != -EPROBE_DEFER)
-			dev_err(data->dev,
-				"Failed to get regulators: %d\n",
-				ret);
+	if (ret)
+		return dev_err_probe(data->dev, ret, "Failed to get regulators: %d\n",
+				     ret);
 
-		return ret;
-	}
 	ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators),
 				    data->regulators);
 	if (ret) {
-- 
GitLab


From 2aebc223fc7ce613140ef2b64ca3c0f1a4f458cb Mon Sep 17 00:00:00 2001
From: Crt Mori <cmo@melexis.com>
Date: Thu, 22 Sep 2022 10:13:22 +0200
Subject: [PATCH 2235/4122] iio: temperature: mlx90632 Add runtime
 powermanagement modes

The sensor can operate in lower power modes and even make measurements when
in those lower powered modes. The decision was taken that if measurement
is not requested within 2 seconds the sensor will remain in SLEEP_STEP
power mode, where measurements are triggered on request with setting the
start of measurement bit (SOB). In this mode the measurements are taking
a bit longer because we need to start it and complete it. Currently, in
continuous mode we read ready data and this mode is activated if sensor
measurement is requested within 2 seconds. The suspend timeout is
increased to 6 seconds (instead of 3 before), because that enables more
measurements in lower power mode (SLEEP_STEP), with the lowest refresh
rate (2 seconds).

Signed-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/be405068f081f2d518843897b13cd0289c280b5d.1663834141.git.cmo@melexis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90632.c | 377 +++++++++++++++++++++++------
 1 file changed, 309 insertions(+), 68 deletions(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index f6dec0e5f0973..71130d237a691 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -6,11 +6,14 @@
  *
  * Driver for the Melexis MLX90632 I2C 16-bit IR thermopile sensor
  */
+#include <linux/bitfield.h>
 #include <linux/delay.h>
+#include <linux/device.h>
 #include <linux/err.h>
 #include <linux/gpio/consumer.h>
 #include <linux/i2c.h>
 #include <linux/iopoll.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/limits.h>
 #include <linux/mod_devicetable.h>
@@ -55,6 +58,12 @@
 #define MLX90632_EE_Ha		0x2481 /* Ha customer calib value reg 16bit */
 #define MLX90632_EE_Hb		0x2482 /* Hb customer calib value reg 16bit */
 
+#define MLX90632_EE_MEDICAL_MEAS1      0x24E1 /* Medical measurement 1 16bit */
+#define MLX90632_EE_MEDICAL_MEAS2      0x24E2 /* Medical measurement 2 16bit */
+#define MLX90632_EE_EXTENDED_MEAS1     0x24F1 /* Extended measurement 1 16bit */
+#define MLX90632_EE_EXTENDED_MEAS2     0x24F2 /* Extended measurement 2 16bit */
+#define MLX90632_EE_EXTENDED_MEAS3     0x24F3 /* Extended measurement 3 16bit */
+
 /* Register addresses - volatile */
 #define MLX90632_REG_I2C_ADDR	0x3000 /* Chip I2C address register */
 
@@ -62,13 +71,16 @@
 #define MLX90632_REG_CONTROL	0x3001 /* Control Register address */
 #define   MLX90632_CFG_PWR_MASK		GENMASK(2, 1) /* PowerMode Mask */
 #define   MLX90632_CFG_MTYP_MASK		GENMASK(8, 4) /* Meas select Mask */
+#define   MLX90632_CFG_SOB_MASK BIT(11)
 
 /* PowerModes statuses */
 #define MLX90632_PWR_STATUS(ctrl_val) (ctrl_val << 1)
 #define MLX90632_PWR_STATUS_HALT MLX90632_PWR_STATUS(0) /* hold */
-#define MLX90632_PWR_STATUS_SLEEP_STEP MLX90632_PWR_STATUS(1) /* sleep step*/
+#define MLX90632_PWR_STATUS_SLEEP_STEP MLX90632_PWR_STATUS(1) /* sleep step */
 #define MLX90632_PWR_STATUS_STEP MLX90632_PWR_STATUS(2) /* step */
-#define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous*/
+#define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous */
+
+#define MLX90632_EE_RR GENMASK(10, 8) /* Only Refresh Rate bits */
 
 /* Measurement types */
 #define MLX90632_MTYP_MEDICAL 0
@@ -116,8 +128,9 @@
 #define MLX90632_REF_12 	12LL /* ResCtrlRef value of Ch 1 or Ch 2 */
 #define MLX90632_REF_3		12LL /* ResCtrlRef value of Channel 3 */
 #define MLX90632_MAX_MEAS_NUM	31 /* Maximum measurements in list */
-#define MLX90632_SLEEP_DELAY_MS 3000 /* Autosleep delay */
+#define MLX90632_SLEEP_DELAY_MS 6000 /* Autosleep delay */
 #define MLX90632_EXTENDED_LIMIT 27000 /* Extended mode raw value limit */
+#define MLX90632_MEAS_MAX_TIME 2000 /* Max measurement time in ms for the lowest refresh rate */
 
 /**
  * struct mlx90632_data - private data for the MLX90632 device
@@ -130,6 +143,9 @@
  * @object_ambient_temperature: Ambient temperature at object (might differ of
  *                              the ambient temperature of sensor.
  * @regulator: Regulator of the device
+ * @powerstatus: Current POWER status of the device
+ * @interaction_ts: Timestamp of the last temperature read that is used
+ *		    for power management in jiffies
  */
 struct mlx90632_data {
 	struct i2c_client *client;
@@ -139,6 +155,8 @@ struct mlx90632_data {
 	u8 mtyp;
 	u32 object_ambient_temperature;
 	struct regulator *regulator;
+	int powerstatus;
+	unsigned long interaction_ts;
 };
 
 static const struct regmap_range mlx90632_volatile_reg_range[] = {
@@ -158,6 +176,8 @@ static const struct regmap_range mlx90632_read_reg_range[] = {
 	regmap_reg_range(MLX90632_EE_VERSION, MLX90632_EE_Ka),
 	regmap_reg_range(MLX90632_EE_CTRL, MLX90632_EE_I2C_ADDR),
 	regmap_reg_range(MLX90632_EE_Ha, MLX90632_EE_Hb),
+	regmap_reg_range(MLX90632_EE_MEDICAL_MEAS1, MLX90632_EE_MEDICAL_MEAS2),
+	regmap_reg_range(MLX90632_EE_EXTENDED_MEAS1, MLX90632_EE_EXTENDED_MEAS3),
 	regmap_reg_range(MLX90632_REG_I2C_ADDR, MLX90632_REG_CONTROL),
 	regmap_reg_range(MLX90632_REG_I2C_CMD, MLX90632_REG_I2C_CMD),
 	regmap_reg_range(MLX90632_REG_STATUS, MLX90632_REG_STATUS),
@@ -198,16 +218,38 @@ static const struct regmap_config mlx90632_regmap = {
 
 static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap)
 {
-	return regmap_update_bits(regmap, MLX90632_REG_CONTROL,
-				  MLX90632_CFG_PWR_MASK,
-				  MLX90632_PWR_STATUS_SLEEP_STEP);
+	struct mlx90632_data *data =
+		iio_priv(dev_get_drvdata(regmap_get_device(regmap)));
+	s32 ret;
+
+	if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP)
+		return 0;
+
+	ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, MLX90632_CFG_PWR_MASK,
+				MLX90632_PWR_STATUS_SLEEP_STEP);
+	if (ret < 0)
+		return ret;
+
+	data->powerstatus = MLX90632_PWR_STATUS_SLEEP_STEP;
+	return ret;
 }
 
 static s32 mlx90632_pwr_continuous(struct regmap *regmap)
 {
-	return regmap_update_bits(regmap, MLX90632_REG_CONTROL,
-				  MLX90632_CFG_PWR_MASK,
-				  MLX90632_PWR_STATUS_CONTINUOUS);
+	struct mlx90632_data *data =
+		iio_priv(dev_get_drvdata(regmap_get_device(regmap)));
+	s32 ret;
+
+	if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS)
+		return 0;
+
+	ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL, MLX90632_CFG_PWR_MASK,
+				MLX90632_PWR_STATUS_CONTINUOUS);
+	if (ret < 0)
+		return ret;
+
+	data->powerstatus = MLX90632_PWR_STATUS_CONTINUOUS;
+	return ret;
 }
 
 /**
@@ -219,6 +261,63 @@ static void mlx90632_reset_delay(void)
 	usleep_range(150, 200);
 }
 
+static int mlx90632_get_measurement_time(struct regmap *regmap, u16 meas)
+{
+	unsigned int reg;
+	int ret;
+
+	ret = regmap_read(regmap, meas, &reg);
+	if (ret < 0)
+		return ret;
+
+	return MLX90632_MEAS_MAX_TIME >> FIELD_GET(MLX90632_EE_RR, reg);
+}
+
+static int mlx90632_calculate_dataset_ready_time(struct mlx90632_data *data)
+{
+	unsigned int refresh_time;
+	int ret;
+
+	if (data->mtyp == MLX90632_MTYP_MEDICAL) {
+		ret = mlx90632_get_measurement_time(data->regmap,
+						    MLX90632_EE_MEDICAL_MEAS1);
+		if (ret < 0)
+			return ret;
+
+		refresh_time = ret;
+
+		ret = mlx90632_get_measurement_time(data->regmap,
+						    MLX90632_EE_MEDICAL_MEAS2);
+		if (ret < 0)
+			return ret;
+
+		refresh_time += ret;
+	} else {
+		ret = mlx90632_get_measurement_time(data->regmap,
+						    MLX90632_EE_EXTENDED_MEAS1);
+		if (ret < 0)
+			return ret;
+
+		refresh_time = ret;
+
+		ret = mlx90632_get_measurement_time(data->regmap,
+						    MLX90632_EE_EXTENDED_MEAS2);
+		if (ret < 0)
+			return ret;
+
+		refresh_time += ret;
+
+		ret = mlx90632_get_measurement_time(data->regmap,
+						    MLX90632_EE_EXTENDED_MEAS3);
+		if (ret < 0)
+			return ret;
+
+		refresh_time += ret;
+	}
+
+	return refresh_time;
+}
+
 /**
  * mlx90632_perform_measurement() - Trigger and retrieve current measurement cycle
  * @data: pointer to mlx90632_data object containing regmap information
@@ -249,26 +348,75 @@ static int mlx90632_perform_measurement(struct mlx90632_data *data)
 	return (reg_status & MLX90632_STAT_CYCLE_POS) >> 2;
 }
 
-static int mlx90632_set_meas_type(struct regmap *regmap, u8 type)
+/**
+ * mlx90632_perform_measurement_burst() - Trigger and retrieve current measurement
+ * cycle in step sleep mode
+ * @data: pointer to mlx90632_data object containing regmap information
+ *
+ * Perform a measurement and return 2 as measurement cycle position reported
+ * by sensor. This is a blocking function for amount dependent on the sensor
+ * refresh rate.
+ */
+static int mlx90632_perform_measurement_burst(struct mlx90632_data *data)
 {
+	unsigned int reg_status;
 	int ret;
 
-	if ((type != MLX90632_MTYP_MEDICAL) && (type != MLX90632_MTYP_EXTENDED))
-		return -EINVAL;
+	ret = regmap_write_bits(data->regmap, MLX90632_REG_CONTROL,
+				MLX90632_CFG_SOB_MASK, MLX90632_CFG_SOB_MASK);
+	if (ret < 0)
+		return ret;
+
+	ret = mlx90632_calculate_dataset_ready_time(data);
+	if (ret < 0)
+		return ret;
 
-	ret = regmap_write(regmap, MLX90632_REG_I2C_CMD, MLX90632_RESET_CMD);
+	msleep(ret); /* Wait minimum time for dataset to be ready */
+
+	ret = regmap_read_poll_timeout(data->regmap, MLX90632_REG_STATUS,
+				       reg_status,
+				       (reg_status & MLX90632_STAT_BUSY) == 0,
+				       10000, 100 * 10000);
+	if (ret < 0) {
+		dev_err(&data->client->dev, "data not ready");
+		return -ETIMEDOUT;
+	}
+
+	return 2;
+}
+
+static int mlx90632_set_meas_type(struct mlx90632_data *data, u8 type)
+{
+	int current_powerstatus;
+	int ret;
+
+	if (data->mtyp == type)
+		return 0;
+
+	current_powerstatus = data->powerstatus;
+	ret = mlx90632_pwr_continuous(data->regmap);
+	if (ret < 0)
+		return ret;
+
+	ret = regmap_write(data->regmap, MLX90632_REG_I2C_CMD, MLX90632_RESET_CMD);
 	if (ret < 0)
 		return ret;
 
 	mlx90632_reset_delay();
 
-	ret = regmap_write_bits(regmap, MLX90632_REG_CONTROL,
+	ret = regmap_update_bits(data->regmap, MLX90632_REG_CONTROL,
 				 (MLX90632_CFG_MTYP_MASK | MLX90632_CFG_PWR_MASK),
 				 (MLX90632_MTYP_STATUS(type) | MLX90632_PWR_STATUS_HALT));
 	if (ret < 0)
 		return ret;
 
-	return mlx90632_pwr_continuous(regmap);
+	data->mtyp = type;
+	data->powerstatus = MLX90632_PWR_STATUS_HALT;
+
+	if (current_powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP)
+		return mlx90632_pwr_set_sleep_step(data->regmap);
+
+	return mlx90632_pwr_continuous(data->regmap);
 }
 
 static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new,
@@ -355,11 +503,30 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data,
 	s32 ret, measurement;
 
 	mutex_lock(&data->lock);
-	measurement = mlx90632_perform_measurement(data);
-	if (measurement < 0) {
-		ret = measurement;
+	ret = mlx90632_set_meas_type(data, MLX90632_MTYP_MEDICAL);
+	if (ret < 0)
+		goto read_unlock;
+
+	switch (data->powerstatus) {
+	case MLX90632_PWR_STATUS_CONTINUOUS:
+		measurement = mlx90632_perform_measurement(data);
+		if (measurement < 0) {
+			ret = measurement;
+			goto read_unlock;
+		}
+		break;
+	case MLX90632_PWR_STATUS_SLEEP_STEP:
+		measurement = mlx90632_perform_measurement_burst(data);
+		if (measurement < 0) {
+			ret = measurement;
+			goto read_unlock;
+		}
+		break;
+	default:
+		ret = -EOPNOTSUPP;
 		goto read_unlock;
 	}
+
 	ret = mlx90632_read_ambient_raw(data->regmap, ambient_new_raw,
 					ambient_old_raw);
 	if (ret < 0)
@@ -441,14 +608,20 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o
 	s32 ret, meas;
 
 	mutex_lock(&data->lock);
-	ret = mlx90632_set_meas_type(data->regmap, MLX90632_MTYP_EXTENDED);
+	ret = mlx90632_set_meas_type(data, MLX90632_MTYP_EXTENDED);
 	if (ret < 0)
 		goto read_unlock;
 
-	ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19,
-				50000, 800000, false, data);
-	if (ret != 0)
-		goto read_unlock;
+	if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) {
+		ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19,
+					50000, 800000, false, data);
+		if (ret)
+			goto read_unlock;
+	} else if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) {
+		ret = mlx90632_perform_measurement_burst(data);
+		if (ret < 0)
+			goto read_unlock;
+	}
 
 	ret = mlx90632_read_object_raw_extended(data->regmap, object_new_raw);
 	if (ret < 0)
@@ -457,8 +630,6 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o
 	ret = mlx90632_read_ambient_raw_extended(data->regmap, ambient_new_raw, ambient_old_raw);
 
 read_unlock:
-	(void) mlx90632_set_meas_type(data->regmap, MLX90632_MTYP_MEDICAL);
-
 	mutex_unlock(&data->lock);
 	return ret;
 }
@@ -743,12 +914,47 @@ static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val)
 	return ret;
 }
 
+/**
+ * mlx90632_pm_interraction_wakeup() - Measure time between user interactions to change powermode
+ * @data: pointer to mlx90632_data object containing interaction_ts information
+ *
+ * Switch to continuous mode when interaction is faster than MLX90632_MEAS_MAX_TIME. Update the
+ * interaction_ts for each function call with the jiffies to enable measurement between function
+ * calls. Initial value of the interaction_ts needs to be set before this function call.
+ */
+static int mlx90632_pm_interraction_wakeup(struct mlx90632_data *data)
+{
+	unsigned long now;
+	int ret;
+
+	now = jiffies;
+	if (time_in_range(now, data->interaction_ts,
+			  data->interaction_ts +
+			  msecs_to_jiffies(MLX90632_MEAS_MAX_TIME + 100))) {
+		if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) {
+			ret = mlx90632_pwr_continuous(data->regmap);
+			if (ret < 0)
+				return ret;
+		}
+	}
+
+	data->interaction_ts = now;
+
+	return 0;
+}
+
 static int mlx90632_read_raw(struct iio_dev *indio_dev,
 			     struct iio_chan_spec const *channel, int *val,
 			     int *val2, long mask)
 {
 	struct mlx90632_data *data = iio_priv(indio_dev);
 	int ret;
+	int cr;
+
+	pm_runtime_get_sync(&data->client->dev);
+	ret = mlx90632_pm_interraction_wakeup(data);
+	if (ret < 0)
+		goto mlx90632_read_raw_pm;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_PROCESSED:
@@ -756,16 +962,22 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev,
 		case IIO_MOD_TEMP_AMBIENT:
 			ret = mlx90632_calc_ambient_dsp105(data, val);
 			if (ret < 0)
-				return ret;
-			return IIO_VAL_INT;
+				goto mlx90632_read_raw_pm;
+
+			ret = IIO_VAL_INT;
+			break;
 		case IIO_MOD_TEMP_OBJECT:
 			ret = mlx90632_calc_object_dsp105(data, val);
 			if (ret < 0)
-				return ret;
-			return IIO_VAL_INT;
+				goto mlx90632_read_raw_pm;
+
+			ret = IIO_VAL_INT;
+			break;
 		default:
-			return -EINVAL;
+			ret = -EINVAL;
+			break;
 		}
+		break;
 	case IIO_CHAN_INFO_CALIBEMISSIVITY:
 		if (data->emissivity == 1000) {
 			*val = 1;
@@ -774,13 +986,21 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev,
 			*val = 0;
 			*val2 = data->emissivity * 1000;
 		}
-		return IIO_VAL_INT_PLUS_MICRO;
+		ret = IIO_VAL_INT_PLUS_MICRO;
+		break;
 	case IIO_CHAN_INFO_CALIBAMBIENT:
 		*val = data->object_ambient_temperature;
-		return IIO_VAL_INT;
+		ret = IIO_VAL_INT;
+		break;
 	default:
-		return -EINVAL;
+		ret = -EINVAL;
+		break;
 	}
+
+mlx90632_read_raw_pm:
+	pm_runtime_mark_last_busy(&data->client->dev);
+	pm_runtime_put_autosuspend(&data->client->dev);
+	return ret;
 }
 
 static int mlx90632_write_raw(struct iio_dev *indio_dev,
@@ -826,11 +1046,18 @@ static const struct iio_info mlx90632_info = {
 	.write_raw = mlx90632_write_raw,
 };
 
-static int mlx90632_sleep(struct mlx90632_data *data)
+static void mlx90632_sleep(void *_data)
+{
+	struct mlx90632_data *data = _data;
+
+	mlx90632_pwr_set_sleep_step(data->regmap);
+}
+
+static int mlx90632_suspend(struct mlx90632_data *data)
 {
 	regcache_mark_dirty(data->regmap);
 
-	dev_dbg(&data->client->dev, "Requesting sleep");
+	dev_dbg(&data->client->dev, "Requesting suspend");
 	return mlx90632_pwr_set_sleep_step(data->regmap);
 }
 
@@ -902,6 +1129,7 @@ static int mlx90632_probe(struct i2c_client *client,
 	mlx90632->client = client;
 	mlx90632->regmap = regmap;
 	mlx90632->mtyp = MLX90632_MTYP_MEDICAL;
+	mlx90632->powerstatus = MLX90632_PWR_STATUS_HALT;
 
 	mutex_init(&mlx90632->lock);
 	indio_dev->name = id->name;
@@ -933,6 +1161,13 @@ static int mlx90632_probe(struct i2c_client *client,
 		return ret;
 	}
 
+	ret = devm_add_action_or_reset(&client->dev, mlx90632_sleep, mlx90632);
+	if (ret < 0) {
+		dev_err(&client->dev, "Failed to setup low power cleanup action %d\n",
+			ret);
+		return ret;
+	}
+
 	ret = regmap_read(mlx90632->regmap, MLX90632_EE_VERSION, &read);
 	if (ret < 0) {
 		dev_err(&client->dev, "read of version failed: %d\n", ret);
@@ -961,32 +1196,17 @@ static int mlx90632_probe(struct i2c_client *client,
 
 	mlx90632->emissivity = 1000;
 	mlx90632->object_ambient_temperature = 25000; /* 25 degrees milliCelsius */
+	mlx90632->interaction_ts = jiffies; /* Set initial value */
 
-	pm_runtime_disable(&client->dev);
-	ret = pm_runtime_set_active(&client->dev);
-	if (ret < 0) {
-		mlx90632_sleep(mlx90632);
-		return ret;
-	}
-	pm_runtime_enable(&client->dev);
+	pm_runtime_get_noresume(&client->dev);
+	pm_runtime_set_active(&client->dev);
+
+	devm_pm_runtime_enable(&client->dev);
 	pm_runtime_set_autosuspend_delay(&client->dev, MLX90632_SLEEP_DELAY_MS);
 	pm_runtime_use_autosuspend(&client->dev);
+	pm_runtime_put_autosuspend(&client->dev);
 
-	return iio_device_register(indio_dev);
-}
-
-static void mlx90632_remove(struct i2c_client *client)
-{
-	struct iio_dev *indio_dev = i2c_get_clientdata(client);
-	struct mlx90632_data *data = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-
-	pm_runtime_disable(&client->dev);
-	pm_runtime_set_suspended(&client->dev);
-	pm_runtime_put_noidle(&client->dev);
-
-	mlx90632_sleep(data);
+	return devm_iio_device_register(&client->dev, indio_dev);
 }
 
 static const struct i2c_device_id mlx90632_id[] = {
@@ -1001,33 +1221,54 @@ static const struct of_device_id mlx90632_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, mlx90632_of_match);
 
-static int __maybe_unused mlx90632_pm_suspend(struct device *dev)
+static int mlx90632_pm_suspend(struct device *dev)
 {
-	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
-	struct mlx90632_data *data = iio_priv(indio_dev);
+	struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev));
+	int ret;
+
+	ret = mlx90632_suspend(data);
+	if (ret < 0)
+		return ret;
 
-	return mlx90632_sleep(data);
+	ret = regulator_disable(data->regulator);
+	if (ret < 0)
+		dev_err(regmap_get_device(data->regmap),
+			"Failed to disable power regulator: %d\n", ret);
+
+	return ret;
 }
 
-static int __maybe_unused mlx90632_pm_resume(struct device *dev)
+static int mlx90632_pm_resume(struct device *dev)
 {
-	struct iio_dev *indio_dev = i2c_get_clientdata(to_i2c_client(dev));
-	struct mlx90632_data *data = iio_priv(indio_dev);
+	struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev));
+	int ret;
+
+	ret = mlx90632_enable_regulator(data);
+	if (ret < 0)
+		return ret;
 
 	return mlx90632_wakeup(data);
 }
 
-static UNIVERSAL_DEV_PM_OPS(mlx90632_pm_ops, mlx90632_pm_suspend,
-			    mlx90632_pm_resume, NULL);
+static int mlx90632_pm_runtime_suspend(struct device *dev)
+{
+	struct mlx90632_data *data = iio_priv(dev_get_drvdata(dev));
+
+	return mlx90632_pwr_set_sleep_step(data->regmap);
+}
+
+const struct dev_pm_ops mlx90632_pm_ops = {
+	SYSTEM_SLEEP_PM_OPS(mlx90632_pm_suspend, mlx90632_pm_resume)
+	RUNTIME_PM_OPS(mlx90632_pm_runtime_suspend, NULL, NULL)
+};
 
 static struct i2c_driver mlx90632_driver = {
 	.driver = {
 		.name	= "mlx90632",
 		.of_match_table = mlx90632_of_match,
-		.pm	= &mlx90632_pm_ops,
+		.pm	= pm_ptr(&mlx90632_pm_ops),
 	},
 	.probe = mlx90632_probe,
-	.remove = mlx90632_remove,
 	.id_table = mlx90632_id,
 };
 module_i2c_driver(mlx90632_driver);
-- 
GitLab


From eff07b20700a55e5b39e76960934d6eeb56a12d9 Mon Sep 17 00:00:00 2001
From: Crt Mori <cmo@melexis.com>
Date: Thu, 22 Sep 2022 10:13:23 +0200
Subject: [PATCH 2236/4122] iio: temperature: mlx90632 Read sampling frequency

Allow users to read sensor sampling frequency to better plan the
application measurement requests.

Signed-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/0bd6d6d665b4bd39e4565f6f44cb1bdc03386e23.1663834141.git.cmo@melexis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90632.c | 59 ++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index 71130d237a691..0818039402616 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -81,6 +81,9 @@
 #define MLX90632_PWR_STATUS_CONTINUOUS MLX90632_PWR_STATUS(3) /* continuous */
 
 #define MLX90632_EE_RR GENMASK(10, 8) /* Only Refresh Rate bits */
+#define MLX90632_REFRESH_RATE(ee_val) FIELD_GET(MLX90632_EE_RR, ee_val)
+					/* Extract Refresh Rate from ee register */
+#define MLX90632_REFRESH_RATE_STATUS(refresh_rate) (refresh_rate << 8)
 
 /* Measurement types */
 #define MLX90632_MTYP_MEDICAL 0
@@ -914,6 +917,32 @@ static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val)
 	return ret;
 }
 
+static int mlx90632_get_refresh_rate(struct mlx90632_data *data,
+				     int *refresh_rate)
+{
+	unsigned int meas1;
+	int ret;
+
+	ret = regmap_read(data->regmap, MLX90632_EE_MEDICAL_MEAS1, &meas1);
+	if (ret < 0)
+		return ret;
+
+	*refresh_rate = MLX90632_REFRESH_RATE(meas1);
+
+	return ret;
+}
+
+static const int mlx90632_freqs[][2] = {
+	{0, 500000},
+	{1, 0},
+	{2, 0},
+	{4, 0},
+	{8, 0},
+	{16, 0},
+	{32, 0},
+	{64, 0}
+};
+
 /**
  * mlx90632_pm_interraction_wakeup() - Measure time between user interactions to change powermode
  * @data: pointer to mlx90632_data object containing interaction_ts information
@@ -992,6 +1021,15 @@ static int mlx90632_read_raw(struct iio_dev *indio_dev,
 		*val = data->object_ambient_temperature;
 		ret = IIO_VAL_INT;
 		break;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = mlx90632_get_refresh_rate(data, &cr);
+		if (ret < 0)
+			goto mlx90632_read_raw_pm;
+
+		*val = mlx90632_freqs[cr][0];
+		*val2 = mlx90632_freqs[cr][1];
+		ret = IIO_VAL_INT_PLUS_MICRO;
+		break;
 	default:
 		ret = -EINVAL;
 		break;
@@ -1025,12 +1063,30 @@ static int mlx90632_write_raw(struct iio_dev *indio_dev,
 	}
 }
 
+static int mlx90632_read_avail(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       const int **vals, int *type, int *length,
+			       long mask)
+{
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*vals = (int *)mlx90632_freqs;
+		*type = IIO_VAL_INT_PLUS_MICRO;
+		*length = 2 * ARRAY_SIZE(mlx90632_freqs);
+		return IIO_AVAIL_LIST;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct iio_chan_spec mlx90632_channels[] = {
 	{
 		.type = IIO_TEMP,
 		.modified = 1,
 		.channel2 = IIO_MOD_TEMP_AMBIENT,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED),
+		.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+		.info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ),
 	},
 	{
 		.type = IIO_TEMP,
@@ -1038,12 +1094,15 @@ static const struct iio_chan_spec mlx90632_channels[] = {
 		.channel2 = IIO_MOD_TEMP_OBJECT,
 		.info_mask_separate = BIT(IIO_CHAN_INFO_PROCESSED) |
 			BIT(IIO_CHAN_INFO_CALIBEMISSIVITY) | BIT(IIO_CHAN_INFO_CALIBAMBIENT),
+		.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+		.info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ),
 	},
 };
 
 static const struct iio_info mlx90632_info = {
 	.read_raw = mlx90632_read_raw,
 	.write_raw = mlx90632_write_raw,
+	.read_avail = mlx90632_read_avail,
 };
 
 static void mlx90632_sleep(void *_data)
-- 
GitLab


From 4e6151403631255828a5530d9d6233caedcd2976 Mon Sep 17 00:00:00 2001
From: Crt Mori <cmo@melexis.com>
Date: Thu, 22 Sep 2022 10:13:24 +0200
Subject: [PATCH 2237/4122] iio: temperature: mlx90632 Change return value of
 sensor measurement channel

The current EINVAL value is more applicable to embedded library, where
user can actually put the fixed value to the sensor. In case of the
driver if the value of the channel is invalid it is better in inform
userspace that Channel was out of range as that implies more to internal
driver error than invalid input. It also makes for easier debugging of
where the error comes from during the development.

Signed-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/565d4df2592d751dc0f40908f2569b7c9af8e56e.1663834141.git.cmo@melexis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90632.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index 0818039402616..224db7513baa7 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -435,7 +435,7 @@ static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new,
 		*channel_old = 1;
 		break;
 	default:
-		return -EINVAL;
+		return -ECHRNG;
 	}
 
 	return 0;
-- 
GitLab


From 8cf5f0329128efdfe18f12a8697752d39821fbdf Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Thu, 22 Sep 2022 21:46:39 +0200
Subject: [PATCH 2238/4122] iio: adc: mcp3911: add support to set PGA

Add support for setting the Programmable Gain Amplifiers by adjust the
scale value.

Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20220922194639.1118971-1-marcus.folkesson@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/mcp3911.c | 104 +++++++++++++++++++++++++++++---------
 1 file changed, 80 insertions(+), 24 deletions(-)

diff --git a/drivers/iio/adc/mcp3911.c b/drivers/iio/adc/mcp3911.c
index 76b334f5ac616..974c5bd923a68 100644
--- a/drivers/iio/adc/mcp3911.c
+++ b/drivers/iio/adc/mcp3911.c
@@ -29,6 +29,8 @@
 #define MCP3911_REG_MOD			0x06
 #define MCP3911_REG_PHASE		0x07
 #define MCP3911_REG_GAIN		0x09
+#define MCP3911_GAIN_MASK(ch)		(GENMASK(2, 0) << 3 * ch)
+#define MCP3911_GAIN_VAL(ch, val)      ((val << 3 * ch) & MCP3911_GAIN_MASK(ch))
 
 #define MCP3911_REG_STATUSCOM		0x0a
 #define MCP3911_STATUSCOM_DRHIZ         BIT(12)
@@ -60,8 +62,10 @@
 #define MCP3911_REG_MASK		GENMASK(4, 1)
 
 #define MCP3911_NUM_CHANNELS		2
+#define MCP3911_NUM_SCALES		6
 
 static const int mcp3911_osr_table[] = { 32, 64, 128, 256, 512, 1024, 2048, 4096 };
+static u32 mcp3911_scale_table[MCP3911_NUM_SCALES][2];
 
 struct mcp3911 {
 	struct spi_device *spi;
@@ -70,6 +74,7 @@ struct mcp3911 {
 	struct clk *clki;
 	u32 dev_addr;
 	struct iio_trigger *trig;
+	u32 gain[MCP3911_NUM_CHANNELS];
 	struct {
 		u32 channels[MCP3911_NUM_CHANNELS];
 		s64 ts __aligned(8);
@@ -146,6 +151,11 @@ static int mcp3911_read_avail(struct iio_dev *indio_dev,
 		*vals = mcp3911_osr_table;
 		*length = ARRAY_SIZE(mcp3911_osr_table);
 		return IIO_AVAIL_LIST;
+	case IIO_CHAN_INFO_SCALE:
+		*type = IIO_VAL_INT_PLUS_NANO;
+		*vals = (int *)mcp3911_scale_table;
+		*length = ARRAY_SIZE(mcp3911_scale_table) * 2;
+		return IIO_AVAIL_LIST;
 	default:
 		return -EINVAL;
 	}
@@ -190,29 +200,9 @@ static int mcp3911_read_raw(struct iio_dev *indio_dev,
 		break;
 
 	case IIO_CHAN_INFO_SCALE:
-		if (adc->vref) {
-			ret = regulator_get_voltage(adc->vref);
-			if (ret < 0) {
-				dev_err(indio_dev->dev.parent,
-					"failed to get vref voltage: %d\n",
-				       ret);
-				goto out;
-			}
-
-			*val = ret / 1000;
-		} else {
-			*val = MCP3911_INT_VREF_MV;
-		}
-
-		/*
-		 * For 24bit Conversion
-		 * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5
-		 * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5)
-		 */
-
-		/* val2 = (2^23 * 1.5) */
-		*val2 = 12582912;
-		ret = IIO_VAL_FRACTIONAL;
+		*val = mcp3911_scale_table[ilog2(adc->gain[channel->channel])][0];
+		*val2 = mcp3911_scale_table[ilog2(adc->gain[channel->channel])][1];
+		ret = IIO_VAL_INT_PLUS_NANO;
 		break;
 	}
 
@@ -230,6 +220,18 @@ static int mcp3911_write_raw(struct iio_dev *indio_dev,
 
 	mutex_lock(&adc->lock);
 	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		for (int i = 0; i < MCP3911_NUM_SCALES; i++) {
+			if (val == mcp3911_scale_table[i][0] &&
+				val2 == mcp3911_scale_table[i][1]) {
+
+				adc->gain[channel->channel] = BIT(i);
+				ret = mcp3911_update(adc, MCP3911_REG_GAIN,
+						MCP3911_GAIN_MASK(channel->channel),
+						MCP3911_GAIN_VAL(channel->channel, i), 1);
+			}
+		}
+		break;
 	case IIO_CHAN_INFO_OFFSET:
 		if (val2 != 0) {
 			ret = -EINVAL;
@@ -265,6 +267,44 @@ out:
 	return ret;
 }
 
+static int mcp3911_calc_scale_table(struct mcp3911 *adc)
+{
+	u32 ref = MCP3911_INT_VREF_MV;
+	u32 div;
+	int ret;
+	u64 tmp;
+
+	if (adc->vref) {
+		ret = regulator_get_voltage(adc->vref);
+		if (ret < 0) {
+			dev_err(&adc->spi->dev,
+				"failed to get vref voltage: %d\n",
+			       ret);
+			return ret;
+		}
+
+		ref = ret / 1000;
+	}
+
+	/*
+	 * For 24-bit Conversion
+	 * Raw = ((Voltage)/(Vref) * 2^23 * Gain * 1.5
+	 * Voltage = Raw * (Vref)/(2^23 * Gain * 1.5)
+	 *
+	 * ref = Reference voltage
+	 * div = (2^23 * 1.5 * gain) = 12582912 * gain
+	 */
+	for (int i = 0; i < MCP3911_NUM_SCALES; i++) {
+		div = 12582912 * BIT(i);
+		tmp = div_s64((s64)ref * 1000000000LL, div);
+
+		mcp3911_scale_table[i][0] = 0;
+		mcp3911_scale_table[i][1] = tmp;
+	}
+
+	return 0;
+}
+
 #define MCP3911_CHAN(idx) {					\
 		.type = IIO_VOLTAGE,				\
 		.indexed = 1,					\
@@ -274,8 +314,10 @@ out:
 		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |	\
 			BIT(IIO_CHAN_INFO_OFFSET) |		\
 			BIT(IIO_CHAN_INFO_SCALE),		\
-		.info_mask_shared_by_type_available =		\
+		.info_mask_shared_by_type_available =           \
 			BIT(IIO_CHAN_INFO_OVERSAMPLING_RATIO),	\
+		.info_mask_separate_available =			\
+			BIT(IIO_CHAN_INFO_SCALE),		\
 		.scan_type = {					\
 			.sign = 's',				\
 			.realbits = 24,				\
@@ -482,6 +524,20 @@ static int mcp3911_probe(struct spi_device *spi)
 	if (ret)
 		return ret;
 
+	ret = mcp3911_calc_scale_table(adc);
+	if (ret)
+		return ret;
+
+       /* Set gain to 1 for all channels */
+	for (int i = 0; i < MCP3911_NUM_CHANNELS; i++) {
+		adc->gain[i] = 1;
+		ret = mcp3911_update(adc, MCP3911_REG_GAIN,
+				MCP3911_GAIN_MASK(i),
+				MCP3911_GAIN_VAL(i, 0), 1);
+		if (ret)
+			return ret;
+	}
+
 	indio_dev->name = spi_get_device_id(spi)->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->info = &mcp3911_info;
-- 
GitLab


From 3f4033a811bcd1a1f077ce5297488a5c4dd30eb1 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Thu, 22 Sep 2022 11:58:48 +0000
Subject: [PATCH 2239/4122] iio: filter: admv8818: close potential
 out-of-bounds read in __admv8818_read_[h|l]pf_freq()

ADMV8818_SW_IN_WR0_MSK and ADMV8818_SW_OUT_WR0_MSK have 3 bits,
which means a length of 8, but freq_range_hpf and freq_range_lpf
array size is 4, may end up reading 4 elements beyond the end of
those arrays.

Check value first before access freq_range_hpf and freq_range_lpf
to harden against the hardware allowing out of range values.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Link: https://lore.kernel.org/r/20220922115848.1800021-1-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/filter/admv8818.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/filter/admv8818.c b/drivers/iio/filter/admv8818.c
index 68de45fe21b44..fe8d46cb7f1d1 100644
--- a/drivers/iio/filter/admv8818.c
+++ b/drivers/iio/filter/admv8818.c
@@ -265,7 +265,7 @@ static int __admv8818_read_hpf_freq(struct admv8818_state *st, u64 *hpf_freq)
 		return ret;
 
 	hpf_band = FIELD_GET(ADMV8818_SW_IN_WR0_MSK, data);
-	if (!hpf_band) {
+	if (!hpf_band || hpf_band > 4) {
 		*hpf_freq = 0;
 		return ret;
 	}
@@ -303,7 +303,7 @@ static int __admv8818_read_lpf_freq(struct admv8818_state *st, u64 *lpf_freq)
 		return ret;
 
 	lpf_band = FIELD_GET(ADMV8818_SW_OUT_WR0_MSK, data);
-	if (!lpf_band) {
+	if (!lpf_band || lpf_band > 4) {
 		*lpf_freq = 0;
 		return ret;
 	}
-- 
GitLab


From e21b5b1f26694a4498ca11a15e09ccc0a72abb81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5rten=20Lindahl?= <marten.lindahl@axis.com>
Date: Mon, 26 Sep 2022 11:18:59 +0200
Subject: [PATCH 2240/4122] iio: light: vcnl4000: Preserve conf bits when
 toggle power
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the vcnl4040 and vcnl4200 chip uses runtime power management for
turning the ambient light and proximity sensors on/off, it overwrites
the entire register each time. In ALS_CONF register bit fields ALS_IT,
ALS_PERS, ALS_INT_EN are overwritten. In PS_CONF1 register bit fields
PS_DUTY, PS_PERS, PS_IT, PS_HD, and PS_INT are overwritten.

Add functions for preserving the affected bit fields when changing power
state.

Signed-off-by: Mårten Lindahl <marten.lindahl@axis.com>
Link: https://lore.kernel.org/r/20220926091900.1724105-2-marten.lindahl@axis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/vcnl4000.c | 54 ++++++++++++++++++++++++++++++++++--
 1 file changed, 51 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index f6c83ecaad8b5..bc8c974cd8b85 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -74,6 +74,9 @@
 #define VCNL4000_PROX_EN	BIT(1) /* start proximity measurement */
 #define VCNL4000_SELF_TIMED_EN	BIT(0) /* start self-timed measurement */
 
+#define VCNL4040_ALS_CONF_ALS_SHUTDOWN	BIT(0)
+#define VCNL4040_PS_CONF1_PS_SHUTDOWN	BIT(0)
+
 /* Bit masks for interrupt registers. */
 #define VCNL4010_INT_THR_SEL	BIT(0) /* Select threshold interrupt source */
 #define VCNL4010_INT_THR_EN	BIT(1) /* Threshold interrupt type */
@@ -188,16 +191,61 @@ static int vcnl4000_init(struct vcnl4000_data *data)
 	return data->chip_spec->set_power_state(data, true);
 };
 
+static ssize_t vcnl4000_write_als_enable(struct vcnl4000_data *data, bool en)
+{
+	int ret;
+
+	mutex_lock(&data->vcnl4000_lock);
+
+	ret = i2c_smbus_read_word_data(data->client, VCNL4200_AL_CONF);
+	if (ret < 0)
+		goto out;
+
+	if (en)
+		ret &= ~VCNL4040_ALS_CONF_ALS_SHUTDOWN;
+	else
+		ret |= VCNL4040_ALS_CONF_ALS_SHUTDOWN;
+
+	ret = i2c_smbus_write_word_data(data->client, VCNL4200_AL_CONF, ret);
+
+out:
+	mutex_unlock(&data->vcnl4000_lock);
+
+	return ret;
+}
+
+static ssize_t vcnl4000_write_ps_enable(struct vcnl4000_data *data, bool en)
+{
+	int ret;
+
+	mutex_lock(&data->vcnl4000_lock);
+
+	ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1);
+	if (ret < 0)
+		goto out;
+
+	if (en)
+		ret &= ~VCNL4040_PS_CONF1_PS_SHUTDOWN;
+	else
+		ret |= VCNL4040_PS_CONF1_PS_SHUTDOWN;
+
+	ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1, ret);
+
+out:
+	mutex_unlock(&data->vcnl4000_lock);
+
+	return ret;
+}
+
 static int vcnl4200_set_power_state(struct vcnl4000_data *data, bool on)
 {
-	u16 val = on ? 0 /* power on */ : 1 /* shut down */;
 	int ret;
 
-	ret = i2c_smbus_write_word_data(data->client, VCNL4200_AL_CONF, val);
+	ret = vcnl4000_write_als_enable(data, on);
 	if (ret < 0)
 		return ret;
 
-	ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1, val);
+	ret = vcnl4000_write_ps_enable(data, on);
 	if (ret < 0)
 		return ret;
 
-- 
GitLab


From 85e2c6a23f851f65b2b14c1d87685168be620f87 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?M=C3=A5rten=20Lindahl?= <marten.lindahl@axis.com>
Date: Mon, 26 Sep 2022 11:19:00 +0200
Subject: [PATCH 2241/4122] iio: light: vcnl4000: Add ps_it attributes for
 vcnl4040
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add read/write attribute for proximity integration time, and read
attribute for available proximity integration times for the vcnl4040
chip.

Signed-off-by: Mårten Lindahl <marten.lindahl@axis.com>
Link: https://lore.kernel.org/r/20220926091900.1724105-3-marten.lindahl@axis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/vcnl4000.c | 131 ++++++++++++++++++++++++++++++++++-
 1 file changed, 128 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index bc8c974cd8b85..fdb3922ae4acd 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -17,6 +17,7 @@
  *   interrupts (VCNL4040, VCNL4200)
  */
 
+#include <linux/bitfield.h>
 #include <linux/module.h>
 #include <linux/i2c.h>
 #include <linux/err.h>
@@ -76,6 +77,7 @@
 
 #define VCNL4040_ALS_CONF_ALS_SHUTDOWN	BIT(0)
 #define VCNL4040_PS_CONF1_PS_SHUTDOWN	BIT(0)
+#define VCNL4040_PS_CONF2_PS_IT	GENMASK(3, 1) /* Proximity integration time */
 
 /* Bit masks for interrupt registers. */
 #define VCNL4010_INT_THR_SEL	BIT(0) /* Select threshold interrupt source */
@@ -104,6 +106,17 @@ static const int vcnl4010_prox_sampling_frequency[][2] = {
 	{250, 0},
 };
 
+static const int vcnl4040_ps_it_times[][2] = {
+	{0, 100},
+	{0, 150},
+	{0, 200},
+	{0, 250},
+	{0, 300},
+	{0, 350},
+	{0, 400},
+	{0, 800},
+};
+
 #define VCNL4000_SLEEP_DELAY_MS	2000 /* before we enter pm_runtime_suspend */
 
 enum vcnl4000_device_ids {
@@ -470,6 +483,57 @@ static int vcnl4000_set_pm_runtime_state(struct vcnl4000_data *data, bool on)
 	return ret;
 }
 
+static int vcnl4040_read_ps_it(struct vcnl4000_data *data, int *val, int *val2)
+{
+	int ret;
+
+	ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1);
+	if (ret < 0)
+		return ret;
+
+	ret = FIELD_GET(VCNL4040_PS_CONF2_PS_IT, ret);
+
+	if (ret >= ARRAY_SIZE(vcnl4040_ps_it_times))
+		return -EINVAL;
+
+	*val = vcnl4040_ps_it_times[ret][0];
+	*val2 = vcnl4040_ps_it_times[ret][1];
+
+	return 0;
+}
+
+static ssize_t vcnl4040_write_ps_it(struct vcnl4000_data *data, int val)
+{
+	unsigned int i;
+	int ret, index = -1;
+	u16 regval;
+
+	for (i = 0; i < ARRAY_SIZE(vcnl4040_ps_it_times); i++) {
+		if (val == vcnl4040_ps_it_times[i][1]) {
+			index = i;
+			break;
+		}
+	}
+
+	if (index < 0)
+		return -EINVAL;
+
+	mutex_lock(&data->vcnl4000_lock);
+
+	ret = i2c_smbus_read_word_data(data->client, VCNL4200_PS_CONF1);
+	if (ret < 0)
+		goto out;
+
+	regval = (ret & ~VCNL4040_PS_CONF2_PS_IT) |
+	    FIELD_PREP(VCNL4040_PS_CONF2_PS_IT, index);
+	ret = i2c_smbus_write_word_data(data->client, VCNL4200_PS_CONF1,
+					regval);
+
+out:
+	mutex_unlock(&data->vcnl4000_lock);
+	return ret;
+}
+
 static int vcnl4000_read_raw(struct iio_dev *indio_dev,
 				struct iio_chan_spec const *chan,
 				int *val, int *val2, long mask)
@@ -506,6 +570,47 @@ static int vcnl4000_read_raw(struct iio_dev *indio_dev,
 		*val = 0;
 		*val2 = data->al_scale;
 		return IIO_VAL_INT_PLUS_MICRO;
+	case IIO_CHAN_INFO_INT_TIME:
+		if (chan->type != IIO_PROXIMITY)
+			return -EINVAL;
+		ret = vcnl4040_read_ps_it(data, val, val2);
+		if (ret < 0)
+			return ret;
+		return IIO_VAL_INT_PLUS_MICRO;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int vcnl4040_write_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan,
+			      int val, int val2, long mask)
+{
+	struct vcnl4000_data *data = iio_priv(indio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_INT_TIME:
+		if (val != 0)
+			return -EINVAL;
+		if (chan->type != IIO_PROXIMITY)
+			return -EINVAL;
+		return vcnl4040_write_ps_it(data, val2);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int vcnl4040_read_avail(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       const int **vals, int *type, int *length,
+			       long mask)
+{
+	switch (mask) {
+	case IIO_CHAN_INFO_INT_TIME:
+		*vals = (int *)vcnl4040_ps_it_times;
+		*type = IIO_VAL_INT_PLUS_MICRO;
+		*length = 2 * ARRAY_SIZE(vcnl4040_ps_it_times);
+		return IIO_AVAIL_LIST;
 	default:
 		return -EINVAL;
 	}
@@ -844,6 +949,20 @@ static const struct iio_chan_spec vcnl4010_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(1),
 };
 
+static const struct iio_chan_spec vcnl4040_channels[] = {
+	{
+		.type = IIO_LIGHT,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_SCALE),
+	}, {
+		.type = IIO_PROXIMITY,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+			BIT(IIO_CHAN_INFO_INT_TIME),
+		.info_mask_separate_available = BIT(IIO_CHAN_INFO_INT_TIME),
+		.ext_info = vcnl4000_ext_info,
+	}
+};
+
 static const struct iio_info vcnl4000_info = {
 	.read_raw = vcnl4000_read_raw,
 };
@@ -858,6 +977,12 @@ static const struct iio_info vcnl4010_info = {
 	.write_event_config = vcnl4010_write_event_config,
 };
 
+static const struct iio_info vcnl4040_info = {
+	.read_raw = vcnl4000_read_raw,
+	.write_raw = vcnl4040_write_raw,
+	.read_avail = vcnl4040_read_avail,
+};
+
 static const struct vcnl4000_chip_spec vcnl4000_chip_spec_cfg[] = {
 	[VCNL4000] = {
 		.prod = "VCNL4000",
@@ -887,9 +1012,9 @@ static const struct vcnl4000_chip_spec vcnl4000_chip_spec_cfg[] = {
 		.measure_light = vcnl4200_measure_light,
 		.measure_proximity = vcnl4200_measure_proximity,
 		.set_power_state = vcnl4200_set_power_state,
-		.channels = vcnl4000_channels,
-		.num_channels = ARRAY_SIZE(vcnl4000_channels),
-		.info = &vcnl4000_info,
+		.channels = vcnl4040_channels,
+		.num_channels = ARRAY_SIZE(vcnl4040_channels),
+		.info = &vcnl4040_info,
 		.irq_support = false,
 	},
 	[VCNL4200] = {
-- 
GitLab


From 55e00b871ce2fc5689af7991638e5889dcd2ea7a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 27 Sep 2022 14:48:41 +0800
Subject: [PATCH 2242/4122] iio: multiplexer: Switch to use dev_err_probe()
 helper

In the probe path, dev_err() can be replaced with dev_err_probe()
which will check if error code is -EPROBE_DEFER and prints the
error name. It also sets the defer probe reason which can be
checked later through debugfs. It's more simple in error path.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20220927064841.319291-1-yangyingliang@huawei.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/multiplexer/iio-mux.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/multiplexer/iio-mux.c b/drivers/iio/multiplexer/iio-mux.c
index 93558fddfa9b4..edd8c69f6d2e1 100644
--- a/drivers/iio/multiplexer/iio-mux.c
+++ b/drivers/iio/multiplexer/iio-mux.c
@@ -416,11 +416,9 @@ static int mux_probe(struct platform_device *pdev)
 	}
 
 	mux->control = devm_mux_control_get(dev, NULL);
-	if (IS_ERR(mux->control)) {
-		if (PTR_ERR(mux->control) != -EPROBE_DEFER)
-			dev_err(dev, "failed to get control-mux\n");
-		return PTR_ERR(mux->control);
-	}
+	if (IS_ERR(mux->control))
+		return dev_err_probe(dev, PTR_ERR(mux->control),
+				     "failed to get control-mux\n");
 
 	i = 0;
 	for (state = 0; state < all_children; state++) {
-- 
GitLab


From 4eb61e1a3338d0f8c9f7a28b72f3289cc92133c6 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 21 Sep 2022 16:36:16 +0000
Subject: [PATCH 2243/4122] iio: adc: ti-ads131e08: Silence no spi_device_id
 warnings

SPI devices use the spi_device_id for module autoloading even on
systems using device tree, after commit 5fa6863ba692 ("spi: Check
we have a spi_device_id for each DT compatible"), kernel warns as
follows since the spi_device_id is missing:

SPI driver ads131e08 has no spi_device_id for ti,ads131e04
SPI driver ads131e08 has no spi_device_id for ti,ads131e06

Add spi_device_id entries to silence the warnings, and ensure driver
module autoloading works.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20220921163620.805879-2-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ti-ads131e08.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/iio/adc/ti-ads131e08.c b/drivers/iio/adc/ti-ads131e08.c
index 5235a93f28bc6..fcfc462543133 100644
--- a/drivers/iio/adc/ti-ads131e08.c
+++ b/drivers/iio/adc/ti-ads131e08.c
@@ -807,6 +807,8 @@ static int ads131e08_probe(struct spi_device *spi)
 	int ret;
 
 	info = device_get_match_data(&spi->dev);
+	if (!info)
+		info = (void *)spi_get_device_id(spi)->driver_data;
 	if (!info) {
 		dev_err(&spi->dev, "failed to get match data\n");
 		return -ENODEV;
@@ -926,12 +928,21 @@ static const struct of_device_id ads131e08_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ads131e08_of_match);
 
+static const struct spi_device_id ads131e08_ids[] = {
+	{ "ads131e04", (kernel_ulong_t)&ads131e08_info_tbl[ads131e04] },
+	{ "ads131e06", (kernel_ulong_t)&ads131e08_info_tbl[ads131e06] },
+	{ "ads131e08", (kernel_ulong_t)&ads131e08_info_tbl[ads131e08] },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ads131e08_ids);
+
 static struct spi_driver ads131e08_driver = {
 	.driver = {
 		.name = "ads131e08",
 		.of_match_table = ads131e08_of_match,
 	},
 	.probe = ads131e08_probe,
+	.id_table = ads131e08_ids,
 };
 module_spi_driver(ads131e08_driver);
 
-- 
GitLab


From 35dab731c4d2b1e71827a56826ef60f53e97fc32 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 21 Sep 2022 16:36:17 +0000
Subject: [PATCH 2244/4122] iio: accel: sca3300: Silence no spi_device_id
 warning

SPI devices use the spi_device_id for module autoloading even on
systems using device tree, after commit 5fa6863ba692 ("spi: Check
we have a spi_device_id for each DT compatible"), kernel warns as
follows since the spi_device_id is missing:

SPI driver sca3300 has no spi_device_id for murata,scl3300

Add spi_device_id entries to silence the warning, and ensure driver
module autoloading works.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Tomas Melin <tomas.melin@vaisala.com>
Link: https://lore.kernel.org/r/20220921163620.805879-3-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/sca3300.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/accel/sca3300.c b/drivers/iio/accel/sca3300.c
index eaa0c9cfda44c..306482b70fad7 100644
--- a/drivers/iio/accel/sca3300.c
+++ b/drivers/iio/accel/sca3300.c
@@ -679,12 +679,20 @@ static const struct of_device_id sca3300_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, sca3300_dt_ids);
 
+static const struct spi_device_id sca3300_ids[] = {
+	{ "sca3300" },
+	{ "scl3300" },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, sca3300_ids);
+
 static struct spi_driver sca3300_driver = {
-	.driver = {
+	.driver   = {
 		.name		= SCA3300_ALIAS,
 		.of_match_table = sca3300_dt_ids,
 	},
-	.probe	= sca3300_probe,
+	.probe	  = sca3300_probe,
+	.id_table = sca3300_ids,
 };
 module_spi_driver(sca3300_driver);
 
-- 
GitLab


From 283026528e0ee8ea89a60f9addaf0f2eda167c42 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 21 Sep 2022 16:36:18 +0000
Subject: [PATCH 2245/4122] iio: adc: ad9467: Silence no spi_device_id warnings

SPI devices use the spi_device_id for module autoloading even on
systems using device tree, after commit 5fa6863ba692 ("spi: Check
we have a spi_device_id for each DT compatible"), kernel warns as
follows since the spi_device_id is missing:

SPI driver ad9467 has no spi_device_id for adi,ad9265
SPI driver ad9467 has no spi_device_id for adi,ad9434

Add spi_device_id entries to silence the warnings, and ensure driver
module autoloading works.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20220921163620.805879-4-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad9467.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/iio/adc/ad9467.c b/drivers/iio/adc/ad9467.c
index 7534572f74757..0621cf59d6146 100644
--- a/drivers/iio/adc/ad9467.c
+++ b/drivers/iio/adc/ad9467.c
@@ -387,6 +387,8 @@ static int ad9467_probe(struct spi_device *spi)
 	int ret;
 
 	info = of_device_get_match_data(&spi->dev);
+	if (!info)
+		info = (void *)spi_get_device_id(spi)->driver_data;
 	if (!info)
 		return -ENODEV;
 
@@ -447,12 +449,21 @@ static const struct of_device_id ad9467_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ad9467_of_match);
 
+static const struct spi_device_id ad9467_ids[] = {
+	{ "ad9265", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9265] },
+	{ "ad9434", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9434] },
+	{ "ad9467", (kernel_ulong_t)&ad9467_chip_tbl[ID_AD9467] },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad9467_ids);
+
 static struct spi_driver ad9467_driver = {
 	.driver = {
 		.name = "ad9467",
 		.of_match_table = ad9467_of_match,
 	},
 	.probe = ad9467_probe,
+	.id_table = ad9467_ids,
 };
 module_spi_driver(ad9467_driver);
 
-- 
GitLab


From 935779eac00aaae02fa61e12c81775e165b11164 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 21 Sep 2022 16:36:19 +0000
Subject: [PATCH 2246/4122] iio: adc: ad7192: Silence no spi_device_id warnings

SPI devices use the spi_device_id for module autoloading even on
systems using device tree, after commit 5fa6863ba692 ("spi: Check
we have a spi_device_id for each DT compatible"), kernel warns as
follows since the spi_device_id is missing:

SPI driver ad7192 has no spi_device_id for adi,ad7190
SPI driver ad7192 has no spi_device_id for adi,ad7193
SPI driver ad7192 has no spi_device_id for adi,ad7195

Add spi_device_id entries to silence the warnings, and ensure driver
module autoloading works.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20220921163620.805879-5-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7192.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index d71977be7d228..f5067173deb62 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -1037,6 +1037,8 @@ static int ad7192_probe(struct spi_device *spi)
 	st->int_vref_mv = ret / 1000;
 
 	st->chip_info = of_device_get_match_data(&spi->dev);
+	if (!st->chip_info)
+		st->chip_info = (void *)spi_get_device_id(spi)->driver_data;
 	indio_dev->name = st->chip_info->name;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
@@ -1098,12 +1100,22 @@ static const struct of_device_id ad7192_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ad7192_of_match);
 
+static const struct spi_device_id ad7192_ids[] = {
+	{ "ad7190", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7190] },
+	{ "ad7192", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7192] },
+	{ "ad7193", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7193] },
+	{ "ad7195", (kernel_ulong_t)&ad7192_chip_info_tbl[ID_AD7195] },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad7192_ids);
+
 static struct spi_driver ad7192_driver = {
 	.driver = {
 		.name	= "ad7192",
 		.of_match_table = ad7192_of_match,
 	},
 	.probe		= ad7192_probe,
+	.id_table	= ad7192_ids,
 };
 module_spi_driver(ad7192_driver);
 
-- 
GitLab


From 3a258747a01f1f21fd4c10a07499bde684f8ca2a Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 21 Sep 2022 16:36:20 +0000
Subject: [PATCH 2247/4122] iio: adc: ad7124: Silence no spi_device_id warnings

SPI devices use the spi_device_id for module autoloading even on
systems using device tree, after commit 5fa6863ba692 ("spi: Check
we have a spi_device_id for each DT compatible"), kernel warns as
follows since the spi_device_id is missing:

SPI driver ad7124 has no spi_device_id for adi,ad7124-4
SPI driver ad7124 has no spi_device_id for adi,ad7124-8

Add spi_device_id entries to silence the warnings, and ensure driver
module autoloading works.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Link: https://lore.kernel.org/r/20220921163620.805879-6-weiyongjun@huaweicloud.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7124.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/iio/adc/ad7124.c b/drivers/iio/adc/ad7124.c
index 4088786e1026d..050a2fbf5c493 100644
--- a/drivers/iio/adc/ad7124.c
+++ b/drivers/iio/adc/ad7124.c
@@ -944,6 +944,8 @@ static int ad7124_probe(struct spi_device *spi)
 	int i, ret;
 
 	info = of_device_get_match_data(&spi->dev);
+	if (!info)
+		info = (void *)spi_get_device_id(spi)->driver_data;
 	if (!info)
 		return -ENODEV;
 
@@ -1021,12 +1023,20 @@ static const struct of_device_id ad7124_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, ad7124_of_match);
 
+static const struct spi_device_id ad71124_ids[] = {
+	{ "ad7124-4", (kernel_ulong_t)&ad7124_chip_info_tbl[ID_AD7124_4] },
+	{ "ad7124-8", (kernel_ulong_t)&ad7124_chip_info_tbl[ID_AD7124_8] },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad71124_ids);
+
 static struct spi_driver ad71124_driver = {
 	.driver = {
 		.name = "ad7124",
 		.of_match_table = ad7124_of_match,
 	},
 	.probe = ad7124_probe,
+	.id_table = ad71124_ids,
 };
 module_spi_driver(ad71124_driver);
 
-- 
GitLab


From 8f347c565df4e8dd2c862a48a3056bfe59d315e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:54 +0200
Subject: [PATCH 2248/4122] iio: adc: ad799x: do not use internal iio_dev lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

'mlock' was being grabbed when setting the device frequency. In order to
not introduce any functional change a new lock is added. With that in
mind, the lock also needs to be grabbed in the places where 'mlock' is
since it was also being used to protect st->config against the current
device state.

On the other places the lock was being used, we can just drop
it since we are only doing one i2c bus read/write which is already
safe.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-2-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad799x.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index 6dbe9d5e08a2b..4730d8d0f4c3f 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -28,6 +28,7 @@
 #include <linux/types.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/bitops.h>
 
 #include <linux/iio/iio.h>
@@ -125,6 +126,8 @@ struct ad799x_state {
 	const struct ad799x_chip_config	*chip_config;
 	struct regulator		*reg;
 	struct regulator		*vref;
+	/* lock to protect against multiple access to the device */
+	struct mutex			lock;
 	unsigned			id;
 	u16				config;
 
@@ -290,7 +293,9 @@ static int ad799x_read_raw(struct iio_dev *indio_dev,
 		ret = iio_device_claim_direct_mode(indio_dev);
 		if (ret)
 			return ret;
+		mutex_lock(&st->lock);
 		ret = ad799x_scan_direct(st, chan->scan_index);
+		mutex_unlock(&st->lock);
 		iio_device_release_direct_mode(indio_dev);
 
 		if (ret < 0)
@@ -351,7 +356,8 @@ static ssize_t ad799x_write_frequency(struct device *dev,
 	if (ret)
 		return ret;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&st->lock);
+
 	ret = i2c_smbus_read_byte_data(st->client, AD7998_CYCLE_TMR_REG);
 	if (ret < 0)
 		goto error_ret_mutex;
@@ -373,7 +379,7 @@ static ssize_t ad799x_write_frequency(struct device *dev,
 	ret = len;
 
 error_ret_mutex:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&st->lock);
 
 	return ret;
 }
@@ -407,6 +413,8 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev,
 	if (ret)
 		return ret;
 
+	mutex_lock(&st->lock);
+
 	if (state)
 		st->config |= BIT(chan->scan_index) << AD799X_CHANNEL_SHIFT;
 	else
@@ -418,6 +426,7 @@ static int ad799x_write_event_config(struct iio_dev *indio_dev,
 		st->config &= ~AD7998_ALERT_EN;
 
 	ret = ad799x_write_config(st, st->config);
+	mutex_unlock(&st->lock);
 	iio_device_release_direct_mode(indio_dev);
 	return ret;
 }
@@ -454,11 +463,9 @@ static int ad799x_write_event_value(struct iio_dev *indio_dev,
 	if (val < 0 || val > GENMASK(chan->scan_type.realbits - 1, 0))
 		return -EINVAL;
 
-	mutex_lock(&indio_dev->mlock);
 	ret = i2c_smbus_write_word_swapped(st->client,
 		ad799x_threshold_reg(chan, dir, info),
 		val << chan->scan_type.shift);
-	mutex_unlock(&indio_dev->mlock);
 
 	return ret;
 }
@@ -473,10 +480,8 @@ static int ad799x_read_event_value(struct iio_dev *indio_dev,
 	int ret;
 	struct ad799x_state *st = iio_priv(indio_dev);
 
-	mutex_lock(&indio_dev->mlock);
 	ret = i2c_smbus_read_word_swapped(st->client,
 		ad799x_threshold_reg(chan, dir, info));
-	mutex_unlock(&indio_dev->mlock);
 	if (ret < 0)
 		return ret;
 	*val = (ret >> chan->scan_type.shift) &
@@ -863,6 +868,9 @@ static int ad799x_probe(struct i2c_client *client,
 		if (ret)
 			goto error_cleanup_ring;
 	}
+
+	mutex_init(&st->lock);
+
 	ret = iio_device_register(indio_dev);
 	if (ret)
 		goto error_cleanup_ring;
-- 
GitLab


From ed3aa67167bed8825993e6483c6dbeae607c1ff9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:55 +0200
Subject: [PATCH 2249/4122] iio: adc: axp288_adc: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-3-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/axp288_adc.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/axp288_adc.c b/drivers/iio/adc/axp288_adc.c
index 580361bd98492..49fff1cabd0dc 100644
--- a/drivers/iio/adc/axp288_adc.c
+++ b/drivers/iio/adc/axp288_adc.c
@@ -9,6 +9,7 @@
 
 #include <linux/dmi.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/regmap.h>
@@ -50,6 +51,8 @@ enum axp288_adc_id {
 struct axp288_adc_info {
 	int irq;
 	struct regmap *regmap;
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 	bool ts_enabled;
 };
 
@@ -161,7 +164,7 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
 	int ret;
 	struct axp288_adc_info *info = iio_priv(indio_dev);
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&info->lock);
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 		if (axp288_adc_set_ts(info, AXP288_ADC_TS_CURRENT_ON_ONDEMAND,
@@ -178,7 +181,7 @@ static int axp288_adc_read_raw(struct iio_dev *indio_dev,
 	default:
 		ret = -EINVAL;
 	}
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&info->lock);
 
 	return ret;
 }
@@ -289,6 +292,8 @@ static int axp288_adc_probe(struct platform_device *pdev)
 	if (ret < 0)
 		return ret;
 
+	mutex_init(&info->lock);
+
 	return devm_iio_device_register(&pdev->dev, indio_dev);
 }
 
-- 
GitLab


From 7dde7ec2a84d598eb755883540d48a5ee73948ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:56 +0200
Subject: [PATCH 2250/4122] iio: adc: imx7d_adc: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-4-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/imx7d_adc.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/adc/imx7d_adc.c b/drivers/iio/adc/imx7d_adc.c
index 86caff1d006b9..22da81bac97f1 100644
--- a/drivers/iio/adc/imx7d_adc.c
+++ b/drivers/iio/adc/imx7d_adc.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
@@ -108,7 +109,8 @@ struct imx7d_adc {
 	struct device *dev;
 	void __iomem *regs;
 	struct clk *clk;
-
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 	u32 vref_uv;
 	u32 value;
 	u32 channel;
@@ -293,7 +295,7 @@ static int imx7d_adc_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&info->lock);
 		reinit_completion(&info->completion);
 
 		channel = chan->channel & 0x03;
@@ -303,16 +305,16 @@ static int imx7d_adc_read_raw(struct iio_dev *indio_dev,
 		ret = wait_for_completion_interruptible_timeout
 				(&info->completion, IMX7D_ADC_TIMEOUT);
 		if (ret == 0) {
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&info->lock);
 			return -ETIMEDOUT;
 		}
 		if (ret < 0) {
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&info->lock);
 			return ret;
 		}
 
 		*val = info->value;
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&info->lock);
 		return IIO_VAL_INT;
 
 	case IIO_CHAN_INFO_SCALE:
@@ -531,6 +533,8 @@ static int imx7d_adc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	mutex_init(&info->lock);
+
 	ret = devm_iio_device_register(dev, indio_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Couldn't register the device.\n");
-- 
GitLab


From 98c4fb93d1d448db191eea795a40072dc61da07d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:57 +0200
Subject: [PATCH 2251/4122] iio: adc: lpc32xx_adc: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-5-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/lpc32xx_adc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/lpc32xx_adc.c b/drivers/iio/adc/lpc32xx_adc.c
index b56ce15255cfb..732c924a976dd 100644
--- a/drivers/iio/adc/lpc32xx_adc.c
+++ b/drivers/iio/adc/lpc32xx_adc.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 
@@ -49,6 +50,8 @@ struct lpc32xx_adc_state {
 	struct clk *clk;
 	struct completion completion;
 	struct regulator *vref;
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 
 	u32 value;
 };
@@ -64,10 +67,10 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&st->lock);
 		ret = clk_prepare_enable(st->clk);
 		if (ret) {
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&st->lock);
 			return ret;
 		}
 		/* Measurement setup */
@@ -80,7 +83,7 @@ static int lpc32xx_read_raw(struct iio_dev *indio_dev,
 		wait_for_completion(&st->completion); /* set by ISR */
 		clk_disable_unprepare(st->clk);
 		*val = st->value;
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&st->lock);
 
 		return IIO_VAL_INT;
 
@@ -201,6 +204,8 @@ static int lpc32xx_adc_probe(struct platform_device *pdev)
 	iodev->modes = INDIO_DIRECT_MODE;
 	iodev->num_channels = ARRAY_SIZE(lpc32xx_adc_iio_channels);
 
+	mutex_init(&st->lock);
+
 	retval = devm_iio_device_register(&pdev->dev, iodev);
 	if (retval)
 		return retval;
-- 
GitLab


From da8091f8acfa953ac55e2aa8d4218e49b18206a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:58 +0200
Subject: [PATCH 2252/4122] iio: adc: ltc2947-core: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-6-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ltc2497-core.c | 7 +++++--
 drivers/iio/adc/ltc2497.h      | 2 ++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/ltc2497-core.c b/drivers/iio/adc/ltc2497-core.c
index f52d37af4d1f1..996f6cbbed3ce 100644
--- a/drivers/iio/adc/ltc2497-core.c
+++ b/drivers/iio/adc/ltc2497-core.c
@@ -10,6 +10,7 @@
 #include <linux/iio/iio.h>
 #include <linux/iio/driver.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/regulator/consumer.h>
 
 #include "ltc2497.h"
@@ -81,9 +82,9 @@ static int ltc2497core_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&ddata->lock);
 		ret = ltc2497core_read(ddata, chan->address, val);
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&ddata->lock);
 		if (ret < 0)
 			return ret;
 
@@ -214,6 +215,8 @@ int ltc2497core_probe(struct device *dev, struct iio_dev *indio_dev)
 	ddata->addr_prev = LTC2497_CONFIG_DEFAULT;
 	ddata->time_prev = ktime_get();
 
+	mutex_init(&ddata->lock);
+
 	ret = iio_device_register(indio_dev);
 	if (ret < 0)
 		goto err_array_unregister;
diff --git a/drivers/iio/adc/ltc2497.h b/drivers/iio/adc/ltc2497.h
index e023de0d88c46..781519b524756 100644
--- a/drivers/iio/adc/ltc2497.h
+++ b/drivers/iio/adc/ltc2497.h
@@ -12,6 +12,8 @@ struct ltc2497_chip_info {
 struct ltc2497core_driverdata {
 	struct regulator *ref;
 	ktime_t	time_prev;
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 	const struct ltc2497_chip_info	*chip_info;
 	u8 addr_prev;
 	int (*result_and_measure)(struct ltc2497core_driverdata *ddata,
-- 
GitLab


From d0c09264f1a64ba8435acfaa70380f61239fc0d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:48:59 +0200
Subject: [PATCH 2253/4122] iio: adc: meson_saradc: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-7-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/meson_saradc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/meson_saradc.c b/drivers/iio/adc/meson_saradc.c
index 1a68b099d323c..85b6826cc10cf 100644
--- a/drivers/iio/adc/meson_saradc.c
+++ b/drivers/iio/adc/meson_saradc.c
@@ -12,6 +12,7 @@
 #include <linux/io.h>
 #include <linux/iio/iio.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/interrupt.h>
 #include <linux/of.h>
@@ -276,6 +277,8 @@ struct meson_sar_adc_priv {
 	struct clk				*adc_div_clk;
 	struct clk_divider			clk_div;
 	struct completion			done;
+	/* lock to protect against multiple access to the device */
+	struct mutex				lock;
 	int					calibbias;
 	int					calibscale;
 	struct regmap				*tsc_regmap;
@@ -486,7 +489,7 @@ static int meson_sar_adc_lock(struct iio_dev *indio_dev)
 	struct meson_sar_adc_priv *priv = iio_priv(indio_dev);
 	int val, ret;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&priv->lock);
 
 	if (priv->param->has_bl30_integration) {
 		/* prevent BL30 from using the SAR ADC while we are using it */
@@ -504,7 +507,7 @@ static int meson_sar_adc_lock(struct iio_dev *indio_dev)
 						      !(val & MESON_SAR_ADC_DELAY_BL30_BUSY),
 						      1, 10000);
 		if (ret) {
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&priv->lock);
 			return ret;
 		}
 	}
@@ -521,7 +524,7 @@ static void meson_sar_adc_unlock(struct iio_dev *indio_dev)
 		regmap_update_bits(priv->regmap, MESON_SAR_ADC_DELAY,
 				   MESON_SAR_ADC_DELAY_KERNEL_BUSY, 0);
 
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&priv->lock);
 }
 
 static void meson_sar_adc_clear_fifo(struct iio_dev *indio_dev)
@@ -1250,6 +1253,8 @@ static int meson_sar_adc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err;
 
+	mutex_init(&priv->lock);
+
 	ret = meson_sar_adc_hw_enable(indio_dev);
 	if (ret)
 		goto err;
-- 
GitLab


From bb690935df8dd8f97612b422c669bd1a5fe87096 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:00 +0200
Subject: [PATCH 2254/4122] iio: adc: rockchip_saradc: do not use internal
 iio_dev lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20221004134909.1692021-8-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/rockchip_saradc.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/adc/rockchip_saradc.c b/drivers/iio/adc/rockchip_saradc.c
index b87ea7148b586..79448c5ffc2ae 100644
--- a/drivers/iio/adc/rockchip_saradc.c
+++ b/drivers/iio/adc/rockchip_saradc.c
@@ -5,6 +5,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
@@ -49,6 +50,8 @@ struct rockchip_saradc {
 	struct clk		*clk;
 	struct completion	completion;
 	struct regulator	*vref;
+	/* lock to protect against multiple access to the device */
+	struct mutex		lock;
 	int			uv_vref;
 	struct reset_control	*reset;
 	const struct rockchip_saradc_data *data;
@@ -94,17 +97,17 @@ static int rockchip_saradc_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&info->lock);
 
 		ret = rockchip_saradc_conversion(info, chan);
 		if (ret) {
 			rockchip_saradc_power_down(info);
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&info->lock);
 			return ret;
 		}
 
 		*val = info->last_val;
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&info->lock);
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_SCALE:
 		*val = info->uv_vref / 1000;
@@ -270,7 +273,7 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p)
 	int ret;
 	int i, j = 0;
 
-	mutex_lock(&i_dev->mlock);
+	mutex_lock(&info->lock);
 
 	for_each_set_bit(i, i_dev->active_scan_mask, i_dev->masklength) {
 		const struct iio_chan_spec *chan = &i_dev->channels[i];
@@ -287,7 +290,7 @@ static irqreturn_t rockchip_saradc_trigger_handler(int irq, void *p)
 
 	iio_push_to_buffers_with_timestamp(i_dev, &data, iio_get_time_ns(i_dev));
 out:
-	mutex_unlock(&i_dev->mlock);
+	mutex_unlock(&info->lock);
 
 	iio_trigger_notify_done(i_dev->trig);
 
@@ -478,6 +481,8 @@ static int rockchip_saradc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	mutex_init(&info->lock);
+
 	return devm_iio_device_register(&pdev->dev, indio_dev);
 }
 
-- 
GitLab


From 8433aa3591afff6f4dc641a2e274e6171f66a4b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:01 +0200
Subject: [PATCH 2255/4122] iio: adc: sc27xx_adc: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-9-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/sc27xx_adc.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/adc/sc27xx_adc.c b/drivers/iio/adc/sc27xx_adc.c
index f8421cbba8fa5..ff1fc329bb9b7 100644
--- a/drivers/iio/adc/sc27xx_adc.c
+++ b/drivers/iio/adc/sc27xx_adc.c
@@ -4,6 +4,7 @@
 #include <linux/hwspinlock.h>
 #include <linux/iio/iio.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/nvmem-consumer.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -83,6 +84,8 @@ struct sc27xx_adc_data {
 	struct device *dev;
 	struct regulator *volref;
 	struct regmap *regmap;
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 	/*
 	 * One hardware spinlock to synchronize between the multiple
 	 * subsystems which will access the unique ADC controller.
@@ -664,9 +667,9 @@ static int sc27xx_adc_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&data->lock);
 		ret = sc27xx_adc_read(data, chan->channel, scale, &tmp);
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&data->lock);
 
 		if (ret)
 			return ret;
@@ -675,10 +678,10 @@ static int sc27xx_adc_read_raw(struct iio_dev *indio_dev,
 		return IIO_VAL_INT;
 
 	case IIO_CHAN_INFO_PROCESSED:
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&data->lock);
 		ret = sc27xx_adc_read_processed(data, chan->channel, scale,
 						&tmp);
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&data->lock);
 
 		if (ret)
 			return ret;
@@ -934,6 +937,9 @@ static int sc27xx_adc_probe(struct platform_device *pdev)
 	indio_dev->info = &sc27xx_info;
 	indio_dev->channels = sc27xx_channels;
 	indio_dev->num_channels = ARRAY_SIZE(sc27xx_channels);
+
+	mutex_init(&sc27xx_data->lock);
+
 	ret = devm_iio_device_register(dev, indio_dev);
 	if (ret)
 		dev_err(dev, "could not register iio (ADC)");
-- 
GitLab


From f2bdea865e776b571557035eac2e8afde8ea7844 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:02 +0200
Subject: [PATCH 2256/4122] iio: adc: vf610_adc: add helper function to read
 samples
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is a precursor change to make it simpler to remove the 'mlock'
usage. Having the code in it's own helper function, also makes it easier
to read the error paths.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-10-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/vf610_adc.c | 94 +++++++++++++++++++++----------------
 1 file changed, 54 insertions(+), 40 deletions(-)

diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index c6b16cf6e3675..a6f9182d7766a 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -622,6 +622,58 @@ static const struct attribute_group vf610_attribute_group = {
 	.attrs = vf610_attributes,
 };
 
+static int vf610_read_sample(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan, int *val)
+{
+	struct vf610_adc *info = iio_priv(indio_dev);
+	unsigned int hc_cfg;
+	int ret;
+
+	mutex_lock(&indio_dev->mlock);
+	if (iio_buffer_enabled(indio_dev)) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+
+	reinit_completion(&info->completion);
+	hc_cfg = VF610_ADC_ADCHC(chan->channel);
+	hc_cfg |= VF610_ADC_AIEN;
+	writel(hc_cfg, info->regs + VF610_REG_ADC_HC0);
+	ret = wait_for_completion_interruptible_timeout(&info->completion,
+							VF610_ADC_TIMEOUT);
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		goto out_unlock;
+	}
+
+	if (ret < 0)
+		goto out_unlock;
+
+	switch (chan->type) {
+	case IIO_VOLTAGE:
+		*val = info->value;
+		break;
+	case IIO_TEMP:
+		/*
+		 * Calculate in degree Celsius times 1000
+		 * Using the typical sensor slope of 1.84 mV/°C
+		 * and VREFH_ADC at 3.3V, V at 25°C of 699 mV
+		 */
+		*val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) *
+				1000000 / VF610_TEMP_SLOPE_COEFF;
+
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+out_unlock:
+	mutex_unlock(&indio_dev->mlock);
+
+	return ret;
+}
+
 static int vf610_read_raw(struct iio_dev *indio_dev,
 			struct iio_chan_spec const *chan,
 			int *val,
@@ -629,53 +681,15 @@ static int vf610_read_raw(struct iio_dev *indio_dev,
 			long mask)
 {
 	struct vf610_adc *info = iio_priv(indio_dev);
-	unsigned int hc_cfg;
 	long ret;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_RAW:
 	case IIO_CHAN_INFO_PROCESSED:
-		mutex_lock(&indio_dev->mlock);
-		if (iio_buffer_enabled(indio_dev)) {
-			mutex_unlock(&indio_dev->mlock);
-			return -EBUSY;
-		}
-
-		reinit_completion(&info->completion);
-		hc_cfg = VF610_ADC_ADCHC(chan->channel);
-		hc_cfg |= VF610_ADC_AIEN;
-		writel(hc_cfg, info->regs + VF610_REG_ADC_HC0);
-		ret = wait_for_completion_interruptible_timeout
-				(&info->completion, VF610_ADC_TIMEOUT);
-		if (ret == 0) {
-			mutex_unlock(&indio_dev->mlock);
-			return -ETIMEDOUT;
-		}
-		if (ret < 0) {
-			mutex_unlock(&indio_dev->mlock);
+		ret = vf610_read_sample(indio_dev, chan, val);
+		if (ret < 0)
 			return ret;
-		}
-
-		switch (chan->type) {
-		case IIO_VOLTAGE:
-			*val = info->value;
-			break;
-		case IIO_TEMP:
-			/*
-			 * Calculate in degree Celsius times 1000
-			 * Using the typical sensor slope of 1.84 mV/°C
-			 * and VREFH_ADC at 3.3V, V at 25°C of 699 mV
-			 */
-			*val = 25000 - ((int)info->value - VF610_VTEMP25_3V3) *
-					1000000 / VF610_TEMP_SLOPE_COEFF;
-
-			break;
-		default:
-			mutex_unlock(&indio_dev->mlock);
-			return -EINVAL;
-		}
 
-		mutex_unlock(&indio_dev->mlock);
 		return IIO_VAL_INT;
 
 	case IIO_CHAN_INFO_SCALE:
-- 
GitLab


From 4e15cad8dbf9991d430c31166040575bf972b179 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:03 +0200
Subject: [PATCH 2257/4122] iio: adc: vf610_adc: vf610_adc: do not use internal
 iio_dev lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to drop the internal lock usage we needed two different things:

1) The first place where 'mlock' was being used was a typical case where
iio_device_claim_direct_mode() fits perfectly.
2) In the second case, it was being used to prevent concurrent accesses
to the device and shared data but nothing was being enforced with
regards to buffering (i.e, there was nothing preventing from changing
the conversion mode while buffering). Hence, in this case, a new lock
was introduced in the state structure.

Note that the goal is not to introduce any functional change and that is
the reason why a new lock was introduced to guarantee 2).

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-11-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/vf610_adc.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/iio/adc/vf610_adc.c b/drivers/iio/adc/vf610_adc.c
index a6f9182d7766a..ae31aafd26538 100644
--- a/drivers/iio/adc/vf610_adc.c
+++ b/drivers/iio/adc/vf610_adc.c
@@ -7,6 +7,7 @@
 
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/property.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
@@ -156,6 +157,9 @@ struct vf610_adc {
 	void __iomem *regs;
 	struct clk *clk;
 
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
+
 	u32 vref_uv;
 	u32 value;
 	struct regulator *vref;
@@ -467,11 +471,11 @@ static int vf610_set_conversion_mode(struct iio_dev *indio_dev,
 {
 	struct vf610_adc *info = iio_priv(indio_dev);
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&info->lock);
 	info->adc_feature.conv_mode = mode;
 	vf610_adc_calculate_rates(info);
 	vf610_adc_hw_init(info);
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&info->lock);
 
 	return 0;
 }
@@ -629,12 +633,11 @@ static int vf610_read_sample(struct iio_dev *indio_dev,
 	unsigned int hc_cfg;
 	int ret;
 
-	mutex_lock(&indio_dev->mlock);
-	if (iio_buffer_enabled(indio_dev)) {
-		ret = -EBUSY;
-		goto out_unlock;
-	}
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
 
+	mutex_lock(&info->lock);
 	reinit_completion(&info->completion);
 	hc_cfg = VF610_ADC_ADCHC(chan->channel);
 	hc_cfg |= VF610_ADC_AIEN;
@@ -669,7 +672,8 @@ static int vf610_read_sample(struct iio_dev *indio_dev,
 	}
 
 out_unlock:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&info->lock);
+	iio_device_release_direct_mode(indio_dev);
 
 	return ret;
 }
@@ -892,6 +896,8 @@ static int vf610_adc_probe(struct platform_device *pdev)
 		goto error_iio_device_register;
 	}
 
+	mutex_init(&info->lock);
+
 	ret = iio_device_register(indio_dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Couldn't register the device.\n");
-- 
GitLab


From d711a5a7eff4855deb0e2c2663e679dc205e2d9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:04 +0200
Subject: [PATCH 2258/4122] iio: common: scmi_iio: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-12-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/common/scmi_sensors/scmi_iio.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/common/scmi_sensors/scmi_iio.c b/drivers/iio/common/scmi_sensors/scmi_iio.c
index 54ccf19ab2bb1..d92f7f651f7b3 100644
--- a/drivers/iio/common/scmi_sensors/scmi_iio.c
+++ b/drivers/iio/common/scmi_sensors/scmi_iio.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/scmi_protocol.h>
 #include <linux/time.h>
 #include <linux/types.h>
@@ -27,6 +28,8 @@ struct scmi_iio_priv {
 	struct scmi_protocol_handle *ph;
 	const struct scmi_sensor_info *sensor_info;
 	struct iio_dev *indio_dev;
+	/* lock to protect against multiple access to the device */
+	struct mutex lock;
 	/* adding one additional channel for timestamp */
 	s64 iio_buf[SCMI_IIO_NUM_OF_AXIS + 1];
 	struct notifier_block sensor_update_nb;
@@ -198,13 +201,14 @@ static int scmi_iio_write_raw(struct iio_dev *iio_dev,
 			      struct iio_chan_spec const *chan, int val,
 			      int val2, long mask)
 {
+	struct scmi_iio_priv *sensor = iio_priv(iio_dev);
 	int err;
 
 	switch (mask) {
 	case IIO_CHAN_INFO_SAMP_FREQ:
-		mutex_lock(&iio_dev->mlock);
+		mutex_lock(&sensor->lock);
 		err = scmi_iio_set_odr_val(iio_dev, val, val2);
-		mutex_unlock(&iio_dev->mlock);
+		mutex_unlock(&sensor->lock);
 		return err;
 	default:
 		return -EINVAL;
@@ -586,6 +590,7 @@ scmi_alloc_iiodev(struct scmi_device *sdev,
 	sensor->sensor_info = sensor_info;
 	sensor->sensor_update_nb.notifier_call = scmi_iio_sensor_update_cb;
 	sensor->indio_dev = iiodev;
+	mutex_init(&sensor->lock);
 
 	/* adding one additional channel for timestamp */
 	iiodev->num_channels = sensor_info->num_axis + 1;
-- 
GitLab


From 3cc36cabc669fffa7f04931e3dd25dc47314ec06 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Tue, 4 Oct 2022 15:49:05 +0200
Subject: [PATCH 2259/4122] iio: gyro: itg3200_core: do not use internal
 iio_dev lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The iio_device lock is only meant for internal use. Hence define a
device local lock to protect against concurrent accesses.

While at it, properly include "mutex.h" for mutex related APIs.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004134909.1692021-13-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/itg3200_core.c  | 10 +++++++---
 include/linux/iio/gyro/itg3200.h |  2 ++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/itg3200_core.c b/drivers/iio/gyro/itg3200_core.c
index 4215015845875..74ca224684968 100644
--- a/drivers/iio/gyro/itg3200_core.c
+++ b/drivers/iio/gyro/itg3200_core.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/delay.h>
 
 #include <linux/iio/iio.h>
@@ -131,6 +132,7 @@ static int itg3200_write_raw(struct iio_dev *indio_dev,
 			     int val2,
 			     long mask)
 {
+	struct itg3200 *st = iio_priv(indio_dev);
 	int ret;
 	u8 t;
 
@@ -139,11 +141,11 @@ static int itg3200_write_raw(struct iio_dev *indio_dev,
 		if (val == 0 || val2 != 0)
 			return -EINVAL;
 
-		mutex_lock(&indio_dev->mlock);
+		mutex_lock(&st->lock);
 
 		ret = itg3200_read_reg_8(indio_dev, ITG3200_REG_DLPF, &t);
 		if (ret) {
-			mutex_unlock(&indio_dev->mlock);
+			mutex_unlock(&st->lock);
 			return ret;
 		}
 		t = ((t & ITG3200_DLPF_CFG_MASK) ? 1000u : 8000u) / val - 1;
@@ -152,7 +154,7 @@ static int itg3200_write_raw(struct iio_dev *indio_dev,
 					  ITG3200_REG_SAMPLE_RATE_DIV,
 					  t);
 
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&st->lock);
 		return ret;
 
 	default:
@@ -336,6 +338,8 @@ static int itg3200_probe(struct i2c_client *client,
 	if (ret)
 		goto error_remove_trigger;
 
+	mutex_init(&st->lock);
+
 	ret = iio_device_register(indio_dev);
 	if (ret)
 		goto error_remove_trigger;
diff --git a/include/linux/iio/gyro/itg3200.h b/include/linux/iio/gyro/itg3200.h
index a602fe7b84fa4..74b6d1cadc868 100644
--- a/include/linux/iio/gyro/itg3200.h
+++ b/include/linux/iio/gyro/itg3200.h
@@ -102,6 +102,8 @@ struct itg3200 {
 	struct i2c_client	*i2c;
 	struct iio_trigger	*trig;
 	struct iio_mount_matrix orientation;
+	/* lock to protect against multiple access to the device */
+	struct mutex		lock;
 };
 
 enum ITG3200_SCAN_INDEX {
-- 
GitLab


From 4b0c44bdb72e99f17b600ba5ba9acc81cf67e335 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 5 Oct 2022 10:50:43 +0200
Subject: [PATCH 2260/4122] dt-bindings: iio: dac: change ad5766 maintainer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the maintainer email no longer exists, change it to myself.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221005085044.204701-1-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
index 29bd16dab5464..3c8784a54d2ce 100644
--- a/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5766.yaml
@@ -8,7 +8,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: Analog Devices AD5766 DAC device driver
 
 maintainers:
-  - Cristian Pop <cristian.pop@analog.com>
+  - Nuno Sá <nuno.sa@analog.com>
 
 description: |
   Bindings for the Analog Devices AD5766 current DAC device. Datasheet can be
-- 
GitLab


From 8add74e75ea2a2356d5b4579cacbaef7f4828e35 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 5 Oct 2022 10:50:44 +0200
Subject: [PATCH 2261/4122] dt-bindings: iio: frequency: change admv4420
 maintainer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the maintainer email no longer exists, change it to myself.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221005085044.204701-2-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/frequency/adi,admv4420.yaml         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
index da7fe85ec92ef..071cda64865d3 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: ADMV4420 K Band Downconverter
 
 maintainers:
-  - Cristian Pop <cristian.pop@analog.com>
+  - Nuno Sá <nuno.sa@analog.com>
 
 description:
   The ADMV4420 is a highly integrated, double balanced, active
-- 
GitLab


From d26b79732d27ff45c83109e2484b96dcac223355 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:35 +0200
Subject: [PATCH 2262/4122] dt-bindings: iio: addac: adi,ad74413r: use
 spi-peripheral-props.yaml

Reference the spi-peripheral-props.yaml schema to allow using all
properties typical for SPI-connected devices, even these which device
bindings author did not tried yet.

While changing additionalProperties->unevaluatedProperties, put it in
typical place, just before example DTS.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/addac/adi,ad74413r.yaml        | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
index 03bb90a7f4f88..2e8dad9278f21 100644
--- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
@@ -58,8 +58,6 @@ required:
   - spi-cpol
   - refin-supply
 
-additionalProperties: false
-
 patternProperties:
   "^channel@[0-3]$":
     type: object
@@ -103,6 +101,11 @@ patternProperties:
     required:
       - reg
 
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
 examples:
   - |
     #include <dt-bindings/gpio/gpio.h>
-- 
GitLab


From 1b96d663d97da5205526d7020dad5258b88baee2 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:36 +0200
Subject: [PATCH 2263/4122] dt-bindings: iio: addac: adi,ad74413r: improve
 example

Improve example by: dropping unrelated properties (status and cs-gpios)
and using generic node name.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/addac/adi,ad74413r.yaml          | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
index 2e8dad9278f21..58b3ae14ccaa6 100644
--- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
@@ -116,10 +116,7 @@ examples:
       #address-cells = <1>;
       #size-cells = <0>;
 
-      cs-gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
-      status = "okay";
-
-      ad74413r@0 {
+      addac@0 {
         compatible = "adi,ad74413r";
         reg = <0>;
         spi-max-frequency = <1000000>;
-- 
GitLab


From 00407a680e86db4d2fee43250849b7fa205507d3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:37 +0200
Subject: [PATCH 2264/4122] dt-bindings: iio: frequency: use
 spi-peripheral-props.yaml

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

While changing additionalProperties->unevaluatedProperties, put it in
typical place, just before example DTS.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/frequency/adi,adf4350.yaml     | 7 +++++--
 .../devicetree/bindings/iio/frequency/adi,admv1013.yaml    | 5 ++++-
 .../devicetree/bindings/iio/frequency/adi,admv1014.yaml    | 5 ++++-
 .../devicetree/bindings/iio/frequency/adi,admv4420.yaml    | 6 +++++-
 .../devicetree/bindings/iio/frequency/adi,adrf6780.yaml    | 5 ++++-
 5 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml
index d7f20b8518e06..43cbf27114c7b 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,adf4350.yaml
@@ -160,13 +160,16 @@ properties:
       2: +2dBm
       3: +5dBm
 
-additionalProperties: false
-
 required:
   - compatible
   - reg
   - clocks
 
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
 examples:
   - |
     spi {
diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml
index 23f1f3b55abb3..fc813bcb65320 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1013.yaml
@@ -70,7 +70,10 @@ required:
   - clock-names
   - vcm-supply
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml
index 2716c1e8fe316..ab86daa2c56ec 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv1014.yaml
@@ -104,7 +104,10 @@ required:
   - clock-names
   - vcm-supply
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
index 071cda64865d3..64f2352aac3d5 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,admv4420.yaml
@@ -37,7 +37,11 @@ required:
   - compatible
   - reg
 
-additionalProperties: false
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml
index 3a8ea93f4e0c1..f11391ab4b622 100644
--- a/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,adrf6780.yaml
@@ -113,7 +113,10 @@ required:
   - clocks
   - clock-names
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From d29c7f8ce90d549727d7a07b6096615f26b38e85 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:38 +0200
Subject: [PATCH 2265/4122] dt-bindings: iio: gyroscope: use
 spi-peripheral-props.yaml

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-4-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml      | 5 ++++-
 .../devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml    | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml b/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml
index 662ec59ca0af5..0ae2464b9bc42 100644
--- a/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml
+++ b/Documentation/devicetree/bindings/iio/gyroscope/adi,adxrs290.yaml
@@ -38,7 +38,10 @@ required:
   - spi-cpol
   - spi-cpha
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml b/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml
index 3f57a1b813e69..2c900e9dddc62 100644
--- a/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml
+++ b/Documentation/devicetree/bindings/iio/gyroscope/nxp,fxas21002c.yaml
@@ -56,7 +56,10 @@ required:
   - compatible
   - reg
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From acce6052cdf9f9fc03f2668f63a1d9e42dc8a424 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:39 +0200
Subject: [PATCH 2266/4122] dt-bindings: iio: imu: adi,adis16475: use
 spi-peripheral-props.yaml
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221004115642.63749-5-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml b/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml
index a7574210175a9..5dbfae80bb281 100644
--- a/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/adi,adis16475.yaml
@@ -79,6 +79,7 @@ required:
   - spi-cpol
 
 allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
   - if:
       properties:
         compatible:
@@ -107,7 +108,7 @@ allOf:
       dependencies:
         adi,sync-mode: [ clocks ]
 
-additionalProperties: false
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From 87748b25c65a0fda5f605b7cf07cf9b79b257308 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:40 +0200
Subject: [PATCH 2267/4122] dt-bindings: iio: pressure: use
 spi-peripheral-props.yaml

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-6-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/pressure/meas,ms5611.yaml        | 5 ++++-
 .../devicetree/bindings/iio/pressure/murata,zpa2326.yaml     | 5 ++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
index 4f06707450bf5..7fed750fa3ff7 100644
--- a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
+++ b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
@@ -30,7 +30,10 @@ required:
   - compatible
   - reg
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml b/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml
index d6103be03460b..c33640ddde586 100644
--- a/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml
+++ b/Documentation/devicetree/bindings/iio/pressure/murata,zpa2326.yaml
@@ -33,7 +33,10 @@ required:
   - compatible
   - reg
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From e91d40216890874791ee43ef40cc883ba412a84f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:41 +0200
Subject: [PATCH 2268/4122] dt-bindings: iio: proximity: ams,as3935: use
 spi-peripheral-props.yaml

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-7-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/proximity/ams,as3935.yaml        | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml b/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml
index 7fcba5d6d508e..710d3b9a86d90 100644
--- a/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml
+++ b/Documentation/devicetree/bindings/iio/proximity/ams,as3935.yaml
@@ -49,7 +49,10 @@ required:
   - spi-cpha
   - interrupts
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From 85250a2400ad43542d02b46b6e269343ea835331 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 4 Oct 2022 13:56:42 +0200
Subject: [PATCH 2269/4122] dt-bindings: iio: resolver: adi,ad2s90: use
 spi-peripheral-props.yaml

For devices connectable by SPI bus (e.g. already using
"spi-max-frequency" property), reference the "spi-peripheral-props.yaml"
schema to allow using all SPI device properties, even these which device
bindings author did not tried yet.

While changing additionalProperties->unevaluatedProperties, put it in
typical place, just before example DTS.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221004115642.63749-8-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/resolver/adi,ad2s90.yaml       | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml
index 81e4bdfc17c43..b24e5a202a48a 100644
--- a/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml
+++ b/Documentation/devicetree/bindings/iio/resolver/adi,ad2s90.yaml
@@ -33,8 +33,6 @@ properties:
 
   spi-cpha: true
 
-additionalProperties: false
-
 required:
   - compatible
   - reg
@@ -43,6 +41,11 @@ dependencies:
   spi-cpol: [ spi-cpha ]
   spi-cpha: [ spi-cpol ]
 
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
 examples:
   - |
     spi {
-- 
GitLab


From a44ef7c4609724e2f395d8a24d4a863cd860bbba Mon Sep 17 00:00:00 2001
From: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Date: Mon, 3 Oct 2022 13:59:01 +0300
Subject: [PATCH 2270/4122] iio: adc: add max11410 adc driver

Adding support for max11410 24-bit, 1.9ksps delta-sigma adc which
has 3 differential reference and 10 differential channel inputs.
Inputs and references can be buffered internally. Inputs can also
be amplified with internal PGA.

Device has four digital filter modes: FIR50/60, FIR50, FIR60 and SINC4.
FIR 50Hz and 60Hz rejections can be enabled/disabled separately.
Digital filter selection affects sampling frequency range so driver
has to consider the configured filter when configuring sampling frequency.

Signed-off-by: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Link: https://lore.kernel.org/r/20221003105903.229-2-Ibrahim.Tilki@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/Kconfig    |   13 +
 drivers/iio/adc/Makefile   |    1 +
 drivers/iio/adc/max11410.c | 1050 ++++++++++++++++++++++++++++++++++++
 3 files changed, 1064 insertions(+)
 create mode 100644 drivers/iio/adc/max11410.c

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 791612ca60120..544986fd456de 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -667,6 +667,19 @@ config MAX11205
 	  To compile this driver as a module, choose M here: the module will be
 	  called max11205.
 
+config MAX11410
+	tristate "Analog Devices MAX11410 ADC driver"
+	depends on SPI
+	select REGMAP_SPI
+	select IIO_BUFFER
+	select IIO_TRIGGER
+	select IIO_TRIGGERED_BUFFER
+	help
+	  Say yes here to build support for Analog Devices MAX11410 ADCs.
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called max11410.
+
 config MAX1241
 	tristate "Maxim max1241 ADC driver"
 	depends on SPI_MASTER
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 46caba7a010c9..4d803413425c4 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_MAX1027) += max1027.o
 obj-$(CONFIG_MAX11100) += max11100.o
 obj-$(CONFIG_MAX1118) += max1118.o
 obj-$(CONFIG_MAX11205) += max11205.o
+obj-$(CONFIG_MAX11410) += max11410.o
 obj-$(CONFIG_MAX1241) += max1241.o
 obj-$(CONFIG_MAX1363) += max1363.o
 obj-$(CONFIG_MAX9611) += max9611.o
diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c
new file mode 100644
index 0000000000000..8cd5663671874
--- /dev/null
+++ b/drivers/iio/adc/max11410.c
@@ -0,0 +1,1050 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MAX11410 SPI ADC driver
+ *
+ * Copyright 2022 Analog Devices Inc.
+ */
+#include <asm-generic/unaligned.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <linux/iio/buffer.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#define MAX11410_REG_CONV_START	0x01
+#define		MAX11410_CONV_TYPE_SINGLE	0x00
+#define		MAX11410_CONV_TYPE_CONTINUOUS	0x01
+#define MAX11410_REG_CAL_START	0x03
+#define		MAX11410_CAL_START_SELF		0x00
+#define		MAX11410_CAL_START_PGA		0x01
+#define MAX11410_REG_GPIO_CTRL(ch)		((ch) ? 0x05 : 0x04)
+#define		MAX11410_GPIO_INTRB		0xC1
+#define MAX11410_REG_FILTER	0x08
+#define		MAX11410_FILTER_RATE_MASK	GENMASK(3, 0)
+#define		MAX11410_FILTER_RATE_MAX	0x0F
+#define		MAX11410_FILTER_LINEF_MASK	GENMASK(5, 4)
+#define		MAX11410_FILTER_50HZ		BIT(5)
+#define		MAX11410_FILTER_60HZ		BIT(4)
+#define MAX11410_REG_CTRL	0x09
+#define		MAX11410_CTRL_REFSEL_MASK	GENMASK(2, 0)
+#define		MAX11410_CTRL_VREFN_BUF_BIT	BIT(3)
+#define		MAX11410_CTRL_VREFP_BUF_BIT	BIT(4)
+#define		MAX11410_CTRL_FORMAT_BIT	BIT(5)
+#define		MAX11410_CTRL_UNIPOLAR_BIT	BIT(6)
+#define MAX11410_REG_MUX_CTRL0	0x0B
+#define MAX11410_REG_PGA	0x0E
+#define		MAX11410_PGA_GAIN_MASK		GENMASK(2, 0)
+#define		MAX11410_PGA_SIG_PATH_MASK	GENMASK(5, 4)
+#define		MAX11410_PGA_SIG_PATH_BUFFERED	0x00
+#define		MAX11410_PGA_SIG_PATH_BYPASS	0x01
+#define		MAX11410_PGA_SIG_PATH_PGA	0x02
+#define MAX11410_REG_DATA0	0x30
+#define MAX11410_REG_STATUS	0x38
+#define		MAX11410_STATUS_CONV_READY_BIT	BIT(0)
+#define		MAX11410_STATUS_CAL_READY_BIT	BIT(2)
+
+#define MAX11410_REFSEL_AVDD_AGND	0x03
+#define MAX11410_REFSEL_MAX		0x06
+#define MAX11410_SIG_PATH_MAX		0x02
+#define MAX11410_CHANNEL_INDEX_MAX	0x0A
+#define MAX11410_AINP_AVDD	0x0A
+#define MAX11410_AINN_GND	0x0A
+
+#define MAX11410_CONVERSION_TIMEOUT_MS	2000
+#define MAX11410_CALIB_TIMEOUT_MS	2000
+
+#define MAX11410_SCALE_AVAIL_SIZE	8
+
+enum max11410_filter {
+	MAX11410_FILTER_FIR5060,
+	MAX11410_FILTER_FIR50,
+	MAX11410_FILTER_FIR60,
+	MAX11410_FILTER_SINC4,
+};
+
+static const u8 max11410_sampling_len[] = {
+	[MAX11410_FILTER_FIR5060] = 5,
+	[MAX11410_FILTER_FIR50] = 6,
+	[MAX11410_FILTER_FIR60] = 6,
+	[MAX11410_FILTER_SINC4] = 10,
+};
+
+static const int max11410_sampling_rates[4][10][2] = {
+	[MAX11410_FILTER_FIR5060] = {
+		{ 1, 100000 },
+		{ 2, 100000 },
+		{ 4, 200000 },
+		{ 8, 400000 },
+		{ 16, 800000 }
+	},
+	[MAX11410_FILTER_FIR50] = {
+		{ 1, 300000 },
+		{ 2, 700000 },
+		{ 5, 300000 },
+		{ 10, 700000 },
+		{ 21, 300000 },
+		{ 40 }
+	},
+	[MAX11410_FILTER_FIR60] = {
+		{ 1, 300000 },
+		{ 2, 700000 },
+		{ 5, 300000 },
+		{ 10, 700000 },
+		{ 21, 300000 },
+		{ 40 }
+	},
+	[MAX11410_FILTER_SINC4] = {
+		{ 4 },
+		{ 10 },
+		{ 20 },
+		{ 40 },
+		{ 60 },
+		{ 120 },
+		{ 240 },
+		{ 480 },
+		{ 960 },
+		{ 1920 }
+	}
+};
+
+struct max11410_channel_config {
+	u32 settling_time_us;
+	u32 *scale_avail;
+	u8 refsel;
+	u8 sig_path;
+	u8 gain;
+	bool bipolar;
+	bool buffered_vrefp;
+	bool buffered_vrefn;
+};
+
+struct max11410_state {
+	struct spi_device *spi_dev;
+	struct iio_trigger *trig;
+	struct completion completion;
+	struct mutex lock; /* Prevent changing channel config during sampling */
+	struct regmap *regmap;
+	struct regulator *avdd;
+	struct regulator *vrefp[3];
+	struct regulator *vrefn[3];
+	struct max11410_channel_config *channels;
+	int irq;
+	struct {
+		u32 data __aligned(IIO_DMA_MINALIGN);
+		s64 ts __aligned(8);
+	} scan;
+};
+
+static const struct iio_chan_spec chanspec_template = {
+	.type = IIO_VOLTAGE,
+	.indexed = 1,
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+			      BIT(IIO_CHAN_INFO_SCALE) |
+			      BIT(IIO_CHAN_INFO_OFFSET),
+	.info_mask_shared_by_all = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	.info_mask_shared_by_all_available = BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	.scan_type = {
+		.sign = 's',
+		.realbits = 24,
+		.storagebits = 32,
+		.endianness = IIO_LE,
+	},
+};
+
+static unsigned int max11410_reg_size(unsigned int reg)
+{
+	/* Registers from 0x00 to 0x10 are 1 byte, the rest are 3 bytes long. */
+	return reg <= 0x10 ? 1 : 3;
+}
+
+static int max11410_write_reg(struct max11410_state *st, unsigned int reg,
+			      unsigned int val)
+{
+	/* This driver only needs to write 8-bit registers */
+	if (max11410_reg_size(reg) != 1)
+		return -EINVAL;
+
+	return regmap_write(st->regmap, reg, val);
+}
+
+static int max11410_read_reg(struct max11410_state *st, unsigned int reg,
+			     int *val)
+{
+	int ret;
+
+	if (max11410_reg_size(reg) == 3) {
+		ret = regmap_bulk_read(st->regmap, reg, &st->scan.data, 3);
+		if (ret)
+			return ret;
+
+		*val = get_unaligned_be24(&st->scan.data);
+		return 0;
+	}
+
+	return regmap_read(st->regmap, reg, val);
+}
+
+static struct regulator *max11410_get_vrefp(struct max11410_state *st,
+					    u8 refsel)
+{
+	refsel = refsel % 4;
+	if (refsel == 3)
+		return st->avdd;
+
+	return st->vrefp[refsel];
+}
+
+static struct regulator *max11410_get_vrefn(struct max11410_state *st,
+					    u8 refsel)
+{
+	if (refsel > 2)
+		return NULL;
+
+	return st->vrefn[refsel];
+}
+
+static const struct regmap_config regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = 0x39,
+};
+
+static ssize_t max11410_notch_en_show(struct device *dev,
+				      struct device_attribute *devattr,
+				      char *buf)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct max11410_state *state = iio_priv(indio_dev);
+	struct iio_dev_attr *iio_attr = to_iio_dev_attr(devattr);
+	unsigned int val;
+	int ret;
+
+	ret = max11410_read_reg(state, MAX11410_REG_FILTER, &val);
+	if (ret)
+		return ret;
+
+	switch (iio_attr->address) {
+	case 0:
+		val = !FIELD_GET(MAX11410_FILTER_50HZ, val);
+		break;
+	case 1:
+		val = !FIELD_GET(MAX11410_FILTER_60HZ, val);
+		break;
+	case 2:
+		val = FIELD_GET(MAX11410_FILTER_LINEF_MASK, val) == 3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static ssize_t max11410_notch_en_store(struct device *dev,
+				       struct device_attribute *devattr,
+				       const char *buf, size_t count)
+{
+	struct iio_dev_attr *iio_attr = to_iio_dev_attr(devattr);
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct max11410_state *state = iio_priv(indio_dev);
+	unsigned int filter_bits;
+	bool enable;
+	int ret;
+
+	ret = kstrtobool(buf, &enable);
+	if (ret)
+		return ret;
+
+	switch (iio_attr->address) {
+	case 0:
+		filter_bits = MAX11410_FILTER_50HZ;
+		break;
+	case 1:
+		filter_bits = MAX11410_FILTER_60HZ;
+		break;
+	case 2:
+	default:
+		filter_bits = MAX11410_FILTER_50HZ | MAX11410_FILTER_60HZ;
+		enable = !enable;
+		break;
+	}
+
+	if (enable)
+		ret = regmap_clear_bits(state->regmap, MAX11410_REG_FILTER,
+					filter_bits);
+	else
+		ret = regmap_set_bits(state->regmap, MAX11410_REG_FILTER,
+				      filter_bits);
+
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static ssize_t in_voltage_filter2_notch_center_show(struct device *dev,
+						    struct device_attribute *devattr,
+						    char *buf)
+{
+	struct iio_dev *indio_dev = dev_get_drvdata(dev);
+	struct max11410_state *state = iio_priv(indio_dev);
+	int ret, reg, rate, filter;
+
+	ret = regmap_read(state->regmap, MAX11410_REG_FILTER, &reg);
+	if (ret)
+		return ret;
+
+	rate = FIELD_GET(MAX11410_FILTER_RATE_MASK, reg);
+	rate = clamp_val(rate, 0,
+			 max11410_sampling_len[MAX11410_FILTER_SINC4] - 1);
+	filter = max11410_sampling_rates[MAX11410_FILTER_SINC4][rate][0];
+
+	return sysfs_emit(buf, "%d\n", filter);
+}
+
+static IIO_CONST_ATTR(in_voltage_filter0_notch_center, "50");
+static IIO_CONST_ATTR(in_voltage_filter1_notch_center, "60");
+static IIO_DEVICE_ATTR_RO(in_voltage_filter2_notch_center, 2);
+
+static IIO_DEVICE_ATTR(in_voltage_filter0_notch_en, 0644,
+		       max11410_notch_en_show, max11410_notch_en_store, 0);
+static IIO_DEVICE_ATTR(in_voltage_filter1_notch_en, 0644,
+		       max11410_notch_en_show, max11410_notch_en_store, 1);
+static IIO_DEVICE_ATTR(in_voltage_filter2_notch_en, 0644,
+		       max11410_notch_en_show, max11410_notch_en_store, 2);
+
+static struct attribute *max11410_attributes[] = {
+	&iio_const_attr_in_voltage_filter0_notch_center.dev_attr.attr,
+	&iio_const_attr_in_voltage_filter1_notch_center.dev_attr.attr,
+	&iio_dev_attr_in_voltage_filter2_notch_center.dev_attr.attr,
+	&iio_dev_attr_in_voltage_filter0_notch_en.dev_attr.attr,
+	&iio_dev_attr_in_voltage_filter1_notch_en.dev_attr.attr,
+	&iio_dev_attr_in_voltage_filter2_notch_en.dev_attr.attr,
+	NULL
+};
+
+static const struct attribute_group max11410_attribute_group = {
+	.attrs = max11410_attributes,
+};
+
+static int max11410_set_input_mux(struct max11410_state *st, u8 ainp, u8 ainn)
+{
+	if (ainp > MAX11410_CHANNEL_INDEX_MAX ||
+	    ainn > MAX11410_CHANNEL_INDEX_MAX)
+		return -EINVAL;
+
+	return max11410_write_reg(st, MAX11410_REG_MUX_CTRL0,
+				  (ainp << 4) | ainn);
+}
+
+static int max11410_configure_channel(struct max11410_state *st,
+				      struct iio_chan_spec const *chan)
+{
+	struct max11410_channel_config cfg = st->channels[chan->address];
+	unsigned int regval;
+	int ret;
+
+	if (chan->differential)
+		ret = max11410_set_input_mux(st, chan->channel, chan->channel2);
+	else
+		ret = max11410_set_input_mux(st, chan->channel,
+					     MAX11410_AINN_GND);
+
+	if (ret)
+		return ret;
+
+	regval = FIELD_PREP(MAX11410_CTRL_VREFP_BUF_BIT, cfg.buffered_vrefp) |
+		 FIELD_PREP(MAX11410_CTRL_VREFN_BUF_BIT, cfg.buffered_vrefn) |
+		 FIELD_PREP(MAX11410_CTRL_REFSEL_MASK, cfg.refsel) |
+		 FIELD_PREP(MAX11410_CTRL_UNIPOLAR_BIT, cfg.bipolar ? 0 : 1);
+	ret = regmap_update_bits(st->regmap, MAX11410_REG_CTRL,
+				 MAX11410_CTRL_REFSEL_MASK |
+				 MAX11410_CTRL_VREFN_BUF_BIT |
+				 MAX11410_CTRL_VREFN_BUF_BIT |
+				 MAX11410_CTRL_UNIPOLAR_BIT, regval);
+	if (ret)
+		return ret;
+
+	regval = FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK, cfg.sig_path) |
+		 FIELD_PREP(MAX11410_PGA_GAIN_MASK, cfg.gain);
+	ret = regmap_write(st->regmap, MAX11410_REG_PGA, regval);
+	if (ret)
+		return ret;
+
+	if (cfg.settling_time_us)
+		fsleep(cfg.settling_time_us);
+
+	return 0;
+}
+
+static int max11410_sample(struct max11410_state *st, int *sample_raw,
+			   struct iio_chan_spec const *chan)
+{
+	int val, ret;
+
+	ret = max11410_configure_channel(st, chan);
+	if (ret)
+		return ret;
+
+	if (st->irq > 0)
+		reinit_completion(&st->completion);
+
+	/* Start Conversion */
+	ret = max11410_write_reg(st, MAX11410_REG_CONV_START,
+				 MAX11410_CONV_TYPE_SINGLE);
+	if (ret)
+		return ret;
+
+	if (st->irq > 0) {
+		/* Wait for an interrupt. */
+		ret = wait_for_completion_timeout(&st->completion,
+						  msecs_to_jiffies(MAX11410_CONVERSION_TIMEOUT_MS));
+		if (!ret)
+			return -ETIMEDOUT;
+	} else {
+		/* Wait for status register Conversion Ready flag */
+		ret = read_poll_timeout(max11410_read_reg, ret,
+					ret || (val & MAX11410_STATUS_CONV_READY_BIT),
+					5000, MAX11410_CONVERSION_TIMEOUT_MS * 1000,
+					true, st, MAX11410_REG_STATUS, &val);
+		if (ret)
+			return ret;
+	}
+
+	/* Read ADC Data */
+	return max11410_read_reg(st, MAX11410_REG_DATA0, sample_raw);
+}
+
+static int max11410_get_scale(struct max11410_state *state,
+			      struct max11410_channel_config cfg)
+{
+	struct regulator *vrefp, *vrefn;
+	int scale;
+
+	vrefp = max11410_get_vrefp(state, cfg.refsel);
+
+	scale = regulator_get_voltage(vrefp) / 1000;
+	vrefn = max11410_get_vrefn(state, cfg.refsel);
+	if (vrefn)
+		scale -= regulator_get_voltage(vrefn) / 1000;
+
+	if (cfg.bipolar)
+		scale *= 2;
+
+	return scale >> cfg.gain;
+}
+
+static int max11410_read_raw(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan,
+			     int *val, int *val2, long info)
+{
+	struct max11410_state *state = iio_priv(indio_dev);
+	struct max11410_channel_config cfg = state->channels[chan->address];
+	int ret, reg_val, filter, rate;
+
+	switch (info) {
+	case IIO_CHAN_INFO_SCALE:
+		*val = max11410_get_scale(state, cfg);
+		*val2 = chan->scan_type.realbits;
+		return IIO_VAL_FRACTIONAL_LOG2;
+	case IIO_CHAN_INFO_OFFSET:
+		if (cfg.bipolar)
+			*val = -BIT(chan->scan_type.realbits - 1);
+		else
+			*val = 0;
+
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_RAW:
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+
+		mutex_lock(&state->lock);
+
+		ret = max11410_sample(state, &reg_val, chan);
+
+		mutex_unlock(&state->lock);
+
+		iio_device_release_direct_mode(indio_dev);
+
+		if (ret)
+			return ret;
+
+		*val = reg_val;
+
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = regmap_read(state->regmap, MAX11410_REG_FILTER, &reg_val);
+		if (ret)
+			return ret;
+
+		filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val);
+		rate = reg_val & MAX11410_FILTER_RATE_MASK;
+		if (rate >= max11410_sampling_len[filter])
+			rate = max11410_sampling_len[filter] - 1;
+
+		*val = max11410_sampling_rates[filter][rate][0];
+		*val2 = max11410_sampling_rates[filter][rate][1];
+
+		return IIO_VAL_INT_PLUS_MICRO;
+	}
+	return -EINVAL;
+}
+
+static int max11410_write_raw(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan,
+			      int val, int val2, long mask)
+{
+	struct max11410_state *st = iio_priv(indio_dev);
+	int i, ret, reg_val, filter, gain;
+	u32 *scale_avail;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SCALE:
+		scale_avail = st->channels[chan->address].scale_avail;
+		if (!scale_avail)
+			return -EOPNOTSUPP;
+
+		/* Accept values in range 0.000001 <= scale < 1.000000 */
+		if (val != 0 || val2 == 0)
+			return -EINVAL;
+
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+
+		/* Convert from INT_PLUS_MICRO to FRACTIONAL_LOG2 */
+		val2 = val2 * DIV_ROUND_CLOSEST(BIT(24), 1000000);
+		val2 = DIV_ROUND_CLOSEST(scale_avail[0], val2);
+		gain = order_base_2(val2);
+
+		st->channels[chan->address].gain = clamp_val(gain, 0, 7);
+
+		iio_device_release_direct_mode(indio_dev);
+
+		return 0;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = iio_device_claim_direct_mode(indio_dev);
+		if (ret)
+			return ret;
+
+		mutex_lock(&st->lock);
+
+		ret = regmap_read(st->regmap, MAX11410_REG_FILTER, &reg_val);
+		if (ret)
+			goto out;
+
+		filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val);
+
+		for (i = 0; i < max11410_sampling_len[filter]; ++i) {
+			if (val == max11410_sampling_rates[filter][i][0] &&
+			    val2 == max11410_sampling_rates[filter][i][1])
+				break;
+		}
+		if (i == max11410_sampling_len[filter]) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER,
+					MAX11410_FILTER_RATE_MASK, i);
+
+out:
+		mutex_unlock(&st->lock);
+		iio_device_release_direct_mode(indio_dev);
+
+		return ret;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int max11410_read_avail(struct iio_dev *indio_dev,
+			       struct iio_chan_spec const *chan,
+			       const int **vals, int *type, int *length,
+			       long info)
+{
+	struct max11410_state *st = iio_priv(indio_dev);
+	struct max11410_channel_config cfg;
+	int ret, reg_val, filter;
+
+	switch (info) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = regmap_read(st->regmap, MAX11410_REG_FILTER, &reg_val);
+		if (ret)
+			return ret;
+
+		filter = FIELD_GET(MAX11410_FILTER_LINEF_MASK, reg_val);
+
+		*vals = (const int *)max11410_sampling_rates[filter];
+		*length = max11410_sampling_len[filter] * 2;
+		*type = IIO_VAL_INT_PLUS_MICRO;
+
+		return IIO_AVAIL_LIST;
+	case IIO_CHAN_INFO_SCALE:
+		cfg = st->channels[chan->address];
+
+		if (!cfg.scale_avail)
+			return -EINVAL;
+
+		*vals = cfg.scale_avail;
+		*length = MAX11410_SCALE_AVAIL_SIZE * 2;
+		*type = IIO_VAL_FRACTIONAL_LOG2;
+
+		return IIO_AVAIL_LIST;
+	}
+	return -EINVAL;
+}
+
+static const struct iio_info max11410_info = {
+	.read_raw = max11410_read_raw,
+	.write_raw = max11410_write_raw,
+	.read_avail = max11410_read_avail,
+	.attrs = &max11410_attribute_group,
+};
+
+static irqreturn_t max11410_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct max11410_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = max11410_read_reg(st, MAX11410_REG_DATA0, &st->scan.data);
+	if (ret) {
+		dev_err(&indio_dev->dev, "cannot read data\n");
+		goto out;
+	}
+
+	iio_push_to_buffers_with_timestamp(indio_dev, &st->scan,
+					   iio_get_time_ns(indio_dev));
+
+out:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static int max11410_buffer_postenable(struct iio_dev *indio_dev)
+{
+	struct max11410_state *st = iio_priv(indio_dev);
+	int scan_ch, ret;
+
+	scan_ch = ffs(*indio_dev->active_scan_mask) - 1;
+
+	ret = max11410_configure_channel(st, &indio_dev->channels[scan_ch]);
+	if (ret)
+		return ret;
+
+	/* Start continuous conversion. */
+	return max11410_write_reg(st, MAX11410_REG_CONV_START,
+				  MAX11410_CONV_TYPE_CONTINUOUS);
+}
+
+static int max11410_buffer_predisable(struct iio_dev *indio_dev)
+{
+	struct max11410_state *st = iio_priv(indio_dev);
+
+	/* Stop continuous conversion. */
+	return max11410_write_reg(st, MAX11410_REG_CONV_START,
+				  MAX11410_CONV_TYPE_SINGLE);
+}
+
+static const struct iio_buffer_setup_ops max11410_buffer_ops = {
+	.postenable = &max11410_buffer_postenable,
+	.predisable = &max11410_buffer_predisable,
+	.validate_scan_mask = &iio_validate_scan_mask_onehot,
+};
+
+static const struct iio_trigger_ops max11410_trigger_ops = {
+	.validate_device = iio_trigger_validate_own_device,
+};
+
+static irqreturn_t max11410_interrupt(int irq, void *dev_id)
+{
+	struct iio_dev *indio_dev = dev_id;
+	struct max11410_state *st = iio_priv(indio_dev);
+
+	if (iio_buffer_enabled(indio_dev))
+		iio_trigger_poll_chained(st->trig);
+	else
+		complete(&st->completion);
+
+	return IRQ_HANDLED;
+};
+
+static int max11410_parse_channels(struct max11410_state *st,
+				   struct iio_dev *indio_dev)
+{
+	struct iio_chan_spec chanspec = chanspec_template;
+	struct device *dev = &st->spi_dev->dev;
+	struct max11410_channel_config *cfg;
+	struct iio_chan_spec *channels;
+	struct fwnode_handle *child;
+	u32 reference, sig_path;
+	const char *node_name;
+	u32 inputs[2], scale;
+	unsigned int num_ch;
+	int chan_idx = 0;
+	int ret, i;
+
+	num_ch = device_get_child_node_count(dev);
+	if (num_ch == 0)
+		return dev_err_probe(&indio_dev->dev, -ENODEV,
+				     "FW has no channels defined\n");
+
+	/* Reserve space for soft timestamp channel */
+	num_ch++;
+	channels = devm_kcalloc(dev, num_ch, sizeof(*channels), GFP_KERNEL);
+	if (!channels)
+		return -ENOMEM;
+
+	st->channels = devm_kcalloc(dev, num_ch, sizeof(*st->channels),
+				    GFP_KERNEL);
+	if (!st->channels)
+		return -ENOMEM;
+
+	device_for_each_child_node(dev, child) {
+		node_name = fwnode_get_name(child);
+		if (fwnode_property_present(child, "diff-channels")) {
+			ret = fwnode_property_read_u32_array(child,
+							     "diff-channels",
+							     inputs,
+							     ARRAY_SIZE(inputs));
+
+			chanspec.differential = 1;
+		} else {
+			ret = fwnode_property_read_u32(child, "reg", &inputs[0]);
+
+			inputs[1] = 0;
+			chanspec.differential = 0;
+		}
+		if (ret) {
+			fwnode_handle_put(child);
+			return ret;
+		}
+
+		if (inputs[0] > MAX11410_CHANNEL_INDEX_MAX ||
+		    inputs[1] > MAX11410_CHANNEL_INDEX_MAX) {
+			fwnode_handle_put(child);
+			return dev_err_probe(&indio_dev->dev, -EINVAL,
+					     "Invalid channel index for %s, should be less than %d\n",
+					     node_name,
+					     MAX11410_CHANNEL_INDEX_MAX + 1);
+		}
+
+		cfg = &st->channels[chan_idx];
+
+		reference = MAX11410_REFSEL_AVDD_AGND;
+		fwnode_property_read_u32(child, "adi,reference", &reference);
+		if (reference > MAX11410_REFSEL_MAX) {
+			fwnode_handle_put(child);
+			return dev_err_probe(&indio_dev->dev, -EINVAL,
+					     "Invalid adi,reference value for %s, should be less than %d.\n",
+					     node_name, MAX11410_REFSEL_MAX + 1);
+		}
+
+		if (!max11410_get_vrefp(st, reference) ||
+		    (!max11410_get_vrefn(st, reference) && reference <= 2)) {
+			fwnode_handle_put(child);
+			return dev_err_probe(&indio_dev->dev, -EINVAL,
+					     "Invalid VREF configuration for %s, either specify corresponding VREF regulators or change adi,reference property.\n",
+					     node_name);
+		}
+
+		sig_path = MAX11410_PGA_SIG_PATH_BUFFERED;
+		fwnode_property_read_u32(child, "adi,input-mode", &sig_path);
+		if (sig_path > MAX11410_SIG_PATH_MAX) {
+			fwnode_handle_put(child);
+			return dev_err_probe(&indio_dev->dev, -EINVAL,
+					     "Invalid adi,input-mode value for %s, should be less than %d.\n",
+					     node_name, MAX11410_SIG_PATH_MAX + 1);
+		}
+
+		fwnode_property_read_u32(child, "settling-time-us",
+					 &cfg->settling_time_us);
+		cfg->bipolar = fwnode_property_read_bool(child, "bipolar");
+		cfg->buffered_vrefp = fwnode_property_read_bool(child, "adi,buffered-vrefp");
+		cfg->buffered_vrefn = fwnode_property_read_bool(child, "adi,buffered-vrefn");
+		cfg->refsel = reference;
+		cfg->sig_path = sig_path;
+		cfg->gain = 0;
+
+		/* Enable scale_available property if input mode is PGA */
+		if (sig_path == MAX11410_PGA_SIG_PATH_PGA) {
+			__set_bit(IIO_CHAN_INFO_SCALE,
+				  &chanspec.info_mask_separate_available);
+			cfg->scale_avail = devm_kcalloc(dev, MAX11410_SCALE_AVAIL_SIZE * 2,
+							sizeof(*cfg->scale_avail),
+							GFP_KERNEL);
+			if (!cfg->scale_avail) {
+				fwnode_handle_put(child);
+				return -ENOMEM;
+			}
+
+			scale = max11410_get_scale(st, *cfg);
+			for (i = 0; i < MAX11410_SCALE_AVAIL_SIZE; i++) {
+				cfg->scale_avail[2 * i] = scale >> i;
+				cfg->scale_avail[2 * i + 1] = chanspec.scan_type.realbits;
+			}
+		} else {
+			__clear_bit(IIO_CHAN_INFO_SCALE,
+				    &chanspec.info_mask_separate_available);
+		}
+
+		chanspec.address = chan_idx;
+		chanspec.scan_index = chan_idx;
+		chanspec.channel = inputs[0];
+		chanspec.channel2 = inputs[1];
+
+		channels[chan_idx] = chanspec;
+		chan_idx++;
+	}
+
+	channels[chan_idx] = (struct iio_chan_spec)IIO_CHAN_SOFT_TIMESTAMP(chan_idx);
+
+	indio_dev->num_channels = chan_idx + 1;
+	indio_dev->channels = channels;
+
+	return 0;
+}
+
+static void max11410_disable_reg(void *reg)
+{
+	regulator_disable(reg);
+}
+
+static int max11410_init_vref(struct device *dev,
+			      struct regulator **vref,
+			      const char *id)
+{
+	struct regulator *reg;
+	int ret;
+
+	reg = devm_regulator_get_optional(dev, id);
+	if (PTR_ERR(reg) == -ENODEV) {
+		*vref = NULL;
+		return 0;
+	} else if (IS_ERR(reg)) {
+		return PTR_ERR(reg);
+	}
+	ret = regulator_enable(reg);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to enable regulator %s\n", id);
+
+	*vref = reg;
+	return devm_add_action_or_reset(dev, max11410_disable_reg, reg);
+}
+
+static int max11410_calibrate(struct max11410_state *st, u32 cal_type)
+{
+	int ret, val;
+
+	ret = max11410_write_reg(st, MAX11410_REG_CAL_START, cal_type);
+	if (ret)
+		return ret;
+
+	/* Wait for status register Calibration Ready flag */
+	return read_poll_timeout(max11410_read_reg, ret,
+				 ret || (val & MAX11410_STATUS_CAL_READY_BIT),
+				 50000, MAX11410_CALIB_TIMEOUT_MS * 1000, true,
+				 st, MAX11410_REG_STATUS, &val);
+}
+
+static int max11410_self_calibrate(struct max11410_state *st)
+{
+	int ret, i;
+
+	ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER,
+				MAX11410_FILTER_RATE_MASK,
+				FIELD_PREP(MAX11410_FILTER_RATE_MASK,
+					   MAX11410_FILTER_RATE_MAX));
+	if (ret)
+		return ret;
+
+	ret = max11410_calibrate(st, MAX11410_CAL_START_SELF);
+	if (ret)
+		return ret;
+
+	ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA,
+				MAX11410_PGA_SIG_PATH_MASK,
+				FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK,
+					   MAX11410_PGA_SIG_PATH_PGA));
+	if (ret)
+		return ret;
+
+	/* PGA calibrations */
+	for (i = 1; i < 8; ++i) {
+		ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA,
+					MAX11410_PGA_GAIN_MASK, i);
+		if (ret)
+			return ret;
+
+		ret = max11410_calibrate(st, MAX11410_CAL_START_PGA);
+		if (ret)
+			return ret;
+	}
+
+	/* Cleanup */
+	ret = regmap_write_bits(st->regmap, MAX11410_REG_PGA,
+				MAX11410_PGA_GAIN_MASK, 0);
+	if (ret)
+		return ret;
+
+	ret = regmap_write_bits(st->regmap, MAX11410_REG_FILTER,
+				MAX11410_FILTER_RATE_MASK, 0);
+	if (ret)
+		return ret;
+
+	return regmap_write_bits(st->regmap, MAX11410_REG_PGA,
+				 MAX11410_PGA_SIG_PATH_MASK,
+				 FIELD_PREP(MAX11410_PGA_SIG_PATH_MASK,
+					    MAX11410_PGA_SIG_PATH_BUFFERED));
+}
+
+static int max11410_probe(struct spi_device *spi)
+{
+	const char *vrefp_regs[] = { "vref0p", "vref1p", "vref2p" };
+	const char *vrefn_regs[] = { "vref0n", "vref1n", "vref2n" };
+	struct device *dev = &spi->dev;
+	struct max11410_state *st;
+	struct iio_dev *indio_dev;
+	int ret, irqs[2];
+	int i;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*st));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+	st->spi_dev = spi;
+	init_completion(&st->completion);
+	mutex_init(&st->lock);
+
+	indio_dev->name = "max11410";
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &max11410_info;
+
+	st->regmap = devm_regmap_init_spi(spi, &regmap_config);
+	if (IS_ERR(st->regmap))
+		return dev_err_probe(dev, PTR_ERR(st->regmap),
+				     "regmap initialization failed\n");
+
+	ret = max11410_init_vref(dev, &st->avdd, "avdd");
+	if (ret)
+		return ret;
+
+	for (i = 0; i < ARRAY_SIZE(vrefp_regs); i++) {
+		ret = max11410_init_vref(dev, &st->vrefp[i], vrefp_regs[i]);
+		if (ret)
+			return ret;
+
+		ret = max11410_init_vref(dev, &st->vrefn[i], vrefn_regs[i]);
+		if (ret)
+			return ret;
+	}
+
+	/*
+	 * Regulators must be configured before parsing channels for
+	 * validating "adi,reference" property of each channel.
+	 */
+	ret = max11410_parse_channels(st, indio_dev);
+	if (ret)
+		return ret;
+
+	irqs[0] = fwnode_irq_get_byname(dev_fwnode(dev), "gpio0");
+	irqs[1] = fwnode_irq_get_byname(dev_fwnode(dev), "gpio1");
+
+	if (irqs[0] > 0) {
+		st->irq = irqs[0];
+		ret = regmap_write(st->regmap, MAX11410_REG_GPIO_CTRL(0),
+				   MAX11410_GPIO_INTRB);
+	} else if (irqs[1] > 0) {
+		st->irq = irqs[1];
+		ret = regmap_write(st->regmap, MAX11410_REG_GPIO_CTRL(1),
+				   MAX11410_GPIO_INTRB);
+	} else if (spi->irq > 0) {
+		return dev_err_probe(dev, -ENODEV,
+				     "no interrupt name specified");
+	}
+
+	if (ret)
+		return ret;
+
+	ret = regmap_set_bits(st->regmap, MAX11410_REG_CTRL,
+			      MAX11410_CTRL_FORMAT_BIT);
+	if (ret)
+		return ret;
+
+	ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL,
+					      &max11410_trigger_handler,
+					      &max11410_buffer_ops);
+	if (ret)
+		return ret;
+
+	if (st->irq > 0) {
+		st->trig = devm_iio_trigger_alloc(dev, "%s-dev%d",
+						  indio_dev->name,
+						  iio_device_id(indio_dev));
+		if (!st->trig)
+			return -ENOMEM;
+
+		st->trig->ops = &max11410_trigger_ops;
+		ret = devm_iio_trigger_register(dev, st->trig);
+		if (ret)
+			return ret;
+
+		ret = devm_request_threaded_irq(dev, st->irq, NULL,
+						&max11410_interrupt,
+						IRQF_ONESHOT, "max11410",
+						indio_dev);
+		if (ret)
+			return ret;
+	}
+
+	ret = max11410_self_calibrate(st);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "cannot perform device self calibration\n");
+
+	return devm_iio_device_register(dev, indio_dev);
+}
+
+static const struct of_device_id max11410_spi_of_id[] = {
+	{ .compatible = "adi,max11410" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, max11410_spi_of_id);
+
+static const struct spi_device_id max11410_id[] = {
+	{ "max11410" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, max11410_id);
+
+static struct spi_driver max11410_driver = {
+	.driver = {
+		.name	= "max11410",
+		.of_match_table = max11410_spi_of_id,
+	},
+	.probe		= max11410_probe,
+	.id_table	= max11410_id,
+};
+module_spi_driver(max11410_driver);
+
+MODULE_AUTHOR("David Jung <David.Jung@analog.com>");
+MODULE_AUTHOR("Ibrahim Tilki <Ibrahim.Tilki@analog.com>");
+MODULE_DESCRIPTION("Analog Devices MAX11410 ADC");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 089ec09f4973898c60c6781c448384bf828e7f45 Mon Sep 17 00:00:00 2001
From: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Date: Mon, 3 Oct 2022 13:59:02 +0300
Subject: [PATCH 2271/4122] dt-bindings: iio: adc: add adi,max11410.yaml

Adding devicetree binding documentation for max11410 adc.

Signed-off-by: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221003105903.229-3-Ibrahim.Tilki@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/adc/adi,max11410.yaml        | 177 ++++++++++++++++++
 1 file changed, 177 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml b/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml
new file mode 100644
index 0000000000000..53f9feff137ba
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,max11410.yaml
@@ -0,0 +1,177 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2022 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/adi,max11410.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices MAX11410 ADC device driver
+
+maintainers:
+  - Ibrahim Tilki <Ibrahim.Tilki@analog.com>
+
+description: |
+  Bindings for the Analog Devices MAX11410 ADC device. Datasheet can be
+  found here:
+    https://datasheets.maximintegrated.com/en/ds/MAX11410.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,max11410
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  interrupt-names:
+    description: Name of the gpio pin of max11410 used for IRQ
+    minItems: 1
+    items:
+      - enum: [gpio0, gpio1]
+      - const: gpio1
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+  avdd-supply:
+    description: Optional avdd supply. Used as reference when no explicit reference supplied.
+
+  vref0p-supply:
+    description: vref0p supply can be used as reference for conversion.
+
+  vref1p-supply:
+    description: vref1p supply can be used as reference for conversion.
+
+  vref2p-supply:
+    description: vref2p supply can be used as reference for conversion.
+
+  vref0n-supply:
+    description: vref0n supply can be used as reference for conversion.
+
+  vref1n-supply:
+    description: vref1n supply can be used as reference for conversion.
+
+  vref2n-supply:
+    description: vref2n supply can be used as reference for conversion.
+
+  spi-max-frequency:
+    maximum: 8000000
+
+patternProperties:
+  "^channel(@[0-9])?$":
+    $ref: adc.yaml
+    type: object
+    description: Represents the external channels which are connected to the ADC.
+
+    properties:
+      reg:
+        description: The channel number in single-ended mode.
+        minimum: 0
+        maximum: 9
+
+      adi,reference:
+        description: |
+          Select the reference source to use when converting on
+          the specific channel. Valid values are:
+          0: VREF0P/VREF0N
+          1: VREF1P/VREF1N
+          2: VREF2P/VREF2N
+          3: AVDD/AGND
+          4: VREF0P/AGND
+          5: VREF1P/AGND
+          6: VREF2P/AGND
+          If this field is left empty, AVDD/AGND is selected.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2, 3, 4, 5, 6]
+        default: 3
+
+      adi,input-mode:
+        description: |
+          Select signal path of input channels. Valid values are:
+          0: Buffered, low-power, unity-gain path (default)
+          1: Bypass path
+          2: PGA path
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2]
+        default: 0
+
+      diff-channels: true
+
+      bipolar: true
+
+      settling-time-us: true
+
+      adi,buffered-vrefp:
+        description: Enable buffered mode for positive reference.
+        type: boolean
+
+      adi,buffered-vrefn:
+        description: Enable buffered mode for negative reference.
+        type: boolean
+
+    required:
+      - reg
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        adc@0 {
+            reg = <0>;
+            compatible = "adi,max11410";
+            spi-max-frequency = <8000000>;
+
+            interrupt-parent = <&gpio>;
+            interrupts = <25 IRQ_TYPE_EDGE_FALLING>;
+            interrupt-names = "gpio1";
+
+            avdd-supply = <&adc_avdd>;
+
+            vref1p-supply = <&adc_vref1p>;
+            vref1n-supply = <&adc_vref1n>;
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            channel@0 {
+                reg = <0>;
+            };
+
+            channel@1 {
+                reg = <1>;
+                diff-channels = <2 3>;
+                adi,reference = <1>;
+                bipolar;
+                settling-time-us = <100000>;
+            };
+
+            channel@2 {
+                reg = <2>;
+                diff-channels = <7 9>;
+                adi,reference = <5>;
+                adi,input-mode = <2>;
+                settling-time-us = <50000>;
+            };
+        };
+    };
-- 
GitLab


From 8d2caf8b98f17f44f76a002ced7c7ce6425652bc Mon Sep 17 00:00:00 2001
From: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Date: Mon, 3 Oct 2022 13:59:03 +0300
Subject: [PATCH 2272/4122] Documentation: ABI: testing: add max11410 doc

Adding documentation for Analog Devices max11410 adc userspace sysfs.

Signed-off-by: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Link: https://lore.kernel.org/r/20221003105903.229-4-Ibrahim.Tilki@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../ABI/testing/sysfs-bus-iio-adc-max11410          | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-max11410

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410 b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410
new file mode 100644
index 0000000000000..2a53c6b373603
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-max11410
@@ -0,0 +1,13 @@
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_en
+Date:		September 2022
+KernelVersion:  6.0
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Enable or disable a notch filter.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_filterY_notch_center
+Date:		September 2022
+KernelVersion:  6.0
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Center frequency of the notch filter in Hz.
-- 
GitLab


From c1404d1b659fe3d7d13bdbd59e5161ab508dc101 Mon Sep 17 00:00:00 2001
From: ChiaEn Wu <chiaen_wu@richtek.com>
Date: Tue, 11 Oct 2022 12:05:45 +0800
Subject: [PATCH 2273/4122] iio: adc: mt6370: Add MediaTek MT6370 support

MediaTek MT6370 is a SubPMIC consisting of a single cell battery charger
with ADC monitoring, RGB LEDs, dual channel flashlight, WLED backlight
driver, display bias voltage supply, one general purpose LDO, and the
USB Type-C & PD controller complies with the latest USB Type-C and PD
standards.

Add support for the MT6370 ADC driver for system monitoring, including
charger current, voltage, and temperature.

Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: ChiaEn Wu <chiaen_wu@richtek.com>
Link: https://lore.kernel.org/r/81ec58ae89030e48508d6810396de2679c40d26c.1665488982.git.chiaen_wu@richtek.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/Kconfig      |  12 ++
 drivers/iio/adc/Makefile     |   1 +
 drivers/iio/adc/mt6370-adc.c | 305 +++++++++++++++++++++++++++++++++++
 3 files changed, 318 insertions(+)
 create mode 100644 drivers/iio/adc/mt6370-adc.c

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 544986fd456de..5386d862def9c 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -765,6 +765,18 @@ config MEDIATEK_MT6360_ADC
 	  is used in smartphones and tablets and supports a 11 channel
 	  general purpose ADC.
 
+config MEDIATEK_MT6370_ADC
+	tristate "MediaTek MT6370 ADC driver"
+	depends on MFD_MT6370
+	help
+	  Say yes here to enable MediaTek MT6370 ADC support.
+
+	  This ADC driver provides 9 channels for system monitoring (charger
+	  current, voltage, and temperature).
+
+	  This driver can also be built as a module. If so, the module
+	  will be called "mt6370-adc".
+
 config MEDIATEK_MT6577_AUXADC
 	tristate "MediaTek AUXADC driver"
 	depends on ARCH_MEDIATEK || COMPILE_TEST
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 4d803413425c4..1571e891828eb 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_MCP320X) += mcp320x.o
 obj-$(CONFIG_MCP3422) += mcp3422.o
 obj-$(CONFIG_MCP3911) += mcp3911.o
 obj-$(CONFIG_MEDIATEK_MT6360_ADC) += mt6360-adc.o
+obj-$(CONFIG_MEDIATEK_MT6370_ADC) += mt6370-adc.o
 obj-$(CONFIG_MEDIATEK_MT6577_AUXADC) += mt6577_auxadc.o
 obj-$(CONFIG_MEN_Z188_ADC) += men_z188_adc.o
 obj-$(CONFIG_MESON_SARADC) += meson_saradc.o
diff --git a/drivers/iio/adc/mt6370-adc.c b/drivers/iio/adc/mt6370-adc.c
new file mode 100644
index 0000000000000..bc62e5a9d50d1
--- /dev/null
+++ b/drivers/iio/adc/mt6370-adc.c
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Richtek Technology Corp.
+ *
+ * Author: ChiaEn Wu <chiaen_wu@richtek.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/iio/iio.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/sysfs.h>
+#include <linux/units.h>
+
+#include <dt-bindings/iio/adc/mediatek,mt6370_adc.h>
+
+#define MT6370_REG_CHG_CTRL3		0x113
+#define MT6370_REG_CHG_CTRL7		0x117
+#define MT6370_REG_CHG_ADC		0x121
+#define MT6370_REG_ADC_DATA_H		0x14C
+
+#define MT6370_ADC_START_MASK		BIT(0)
+#define MT6370_ADC_IN_SEL_MASK		GENMASK(7, 4)
+#define MT6370_AICR_ICHG_MASK		GENMASK(7, 2)
+
+#define MT6370_AICR_100_mA		0x0
+#define MT6370_AICR_150_mA		0x1
+#define MT6370_AICR_200_mA		0x2
+#define MT6370_AICR_250_mA		0x3
+#define MT6370_AICR_300_mA		0x4
+#define MT6370_AICR_350_mA		0x5
+
+#define MT6370_ICHG_100_mA		0x0
+#define MT6370_ICHG_200_mA		0x1
+#define MT6370_ICHG_300_mA		0x2
+#define MT6370_ICHG_400_mA		0x3
+#define MT6370_ICHG_500_mA		0x4
+#define MT6370_ICHG_600_mA		0x5
+#define MT6370_ICHG_700_mA		0x6
+#define MT6370_ICHG_800_mA		0x7
+
+#define ADC_CONV_TIME_MS		35
+#define ADC_CONV_POLLING_TIME_US	1000
+
+struct mt6370_adc_data {
+	struct device *dev;
+	struct regmap *regmap;
+	/*
+	 * This mutex lock is for preventing the different ADC channels
+	 * from being read at the same time.
+	 */
+	struct mutex adc_lock;
+};
+
+static int mt6370_adc_read_channel(struct mt6370_adc_data *priv, int chan,
+				   unsigned long addr, int *val)
+{
+	unsigned int reg_val;
+	__be16 be_val;
+	int ret;
+
+	mutex_lock(&priv->adc_lock);
+
+	reg_val = MT6370_ADC_START_MASK |
+		  FIELD_PREP(MT6370_ADC_IN_SEL_MASK, addr);
+	ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, reg_val);
+	if (ret)
+		goto adc_unlock;
+
+	msleep(ADC_CONV_TIME_MS);
+
+	ret = regmap_read_poll_timeout(priv->regmap,
+				       MT6370_REG_CHG_ADC, reg_val,
+				       !(reg_val & MT6370_ADC_START_MASK),
+				       ADC_CONV_POLLING_TIME_US,
+				       ADC_CONV_TIME_MS * MILLI * 3);
+	if (ret) {
+		dev_err(priv->dev, "Failed to read ADC register (%d)\n", ret);
+		goto adc_unlock;
+	}
+
+	ret = regmap_raw_read(priv->regmap, MT6370_REG_ADC_DATA_H,
+			      &be_val, sizeof(be_val));
+	if (ret)
+		goto adc_unlock;
+
+	*val = be16_to_cpu(be_val);
+	ret = IIO_VAL_INT;
+
+adc_unlock:
+	mutex_unlock(&priv->adc_lock);
+
+	return ret;
+}
+
+static int mt6370_adc_read_scale(struct mt6370_adc_data *priv,
+				 int chan, int *val1, int *val2)
+{
+	unsigned int reg_val;
+	int ret;
+
+	switch (chan) {
+	case MT6370_CHAN_VBAT:
+	case MT6370_CHAN_VSYS:
+	case MT6370_CHAN_CHG_VDDP:
+		*val1 = 5;
+		return IIO_VAL_INT;
+	case MT6370_CHAN_IBUS:
+		ret = regmap_read(priv->regmap, MT6370_REG_CHG_CTRL3, &reg_val);
+		if (ret)
+			return ret;
+
+		reg_val = FIELD_GET(MT6370_AICR_ICHG_MASK, reg_val);
+		switch (reg_val) {
+		case MT6370_AICR_100_mA:
+		case MT6370_AICR_150_mA:
+		case MT6370_AICR_200_mA:
+		case MT6370_AICR_250_mA:
+		case MT6370_AICR_300_mA:
+		case MT6370_AICR_350_mA:
+			*val1 = 3350;
+			break;
+		default:
+			*val1 = 5000;
+			break;
+		}
+
+		*val2 = 100;
+
+		return IIO_VAL_FRACTIONAL;
+	case MT6370_CHAN_IBAT:
+		ret = regmap_read(priv->regmap, MT6370_REG_CHG_CTRL7, &reg_val);
+		if (ret)
+			return ret;
+
+		reg_val = FIELD_GET(MT6370_AICR_ICHG_MASK, reg_val);
+		switch (reg_val) {
+		case MT6370_ICHG_100_mA:
+		case MT6370_ICHG_200_mA:
+		case MT6370_ICHG_300_mA:
+		case MT6370_ICHG_400_mA:
+			*val1 = 2375;
+			break;
+		case MT6370_ICHG_500_mA:
+		case MT6370_ICHG_600_mA:
+		case MT6370_ICHG_700_mA:
+		case MT6370_ICHG_800_mA:
+			*val1 = 2680;
+			break;
+		default:
+			*val1 = 5000;
+			break;
+		}
+
+		*val2 = 100;
+
+		return IIO_VAL_FRACTIONAL;
+	case MT6370_CHAN_VBUSDIV5:
+		*val1 = 25;
+		return IIO_VAL_INT;
+	case MT6370_CHAN_VBUSDIV2:
+		*val1 = 10;
+		return IIO_VAL_INT;
+	case MT6370_CHAN_TS_BAT:
+		*val1 = 25;
+		*val2 = 10000;
+		return IIO_VAL_FRACTIONAL;
+	case MT6370_CHAN_TEMP_JC:
+		*val1 = 2000;
+		return IIO_VAL_INT;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int mt6370_adc_read_offset(struct mt6370_adc_data *priv,
+				  int chan, int *val)
+{
+	*val = -20;
+
+	return IIO_VAL_INT;
+}
+
+static int mt6370_adc_read_raw(struct iio_dev *iio_dev,
+			       const struct iio_chan_spec *chan,
+			       int *val, int *val2, long mask)
+{
+	struct mt6370_adc_data *priv = iio_priv(iio_dev);
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		return mt6370_adc_read_channel(priv, chan->channel,
+					       chan->address, val);
+	case IIO_CHAN_INFO_SCALE:
+		return mt6370_adc_read_scale(priv, chan->channel, val, val2);
+	case IIO_CHAN_INFO_OFFSET:
+		return mt6370_adc_read_offset(priv, chan->channel, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static const char * const mt6370_channel_labels[MT6370_CHAN_MAX] = {
+	[MT6370_CHAN_VBUSDIV5] = "vbusdiv5",
+	[MT6370_CHAN_VBUSDIV2] = "vbusdiv2",
+	[MT6370_CHAN_VSYS] = "vsys",
+	[MT6370_CHAN_VBAT] = "vbat",
+	[MT6370_CHAN_TS_BAT] = "ts_bat",
+	[MT6370_CHAN_IBUS] = "ibus",
+	[MT6370_CHAN_IBAT] = "ibat",
+	[MT6370_CHAN_CHG_VDDP] = "chg_vddp",
+	[MT6370_CHAN_TEMP_JC] = "temp_jc",
+};
+
+static int mt6370_adc_read_label(struct iio_dev *iio_dev,
+				 struct iio_chan_spec const *chan, char *label)
+{
+	return sysfs_emit(label, "%s\n", mt6370_channel_labels[chan->channel]);
+}
+
+static const struct iio_info mt6370_adc_iio_info = {
+	.read_raw = mt6370_adc_read_raw,
+	.read_label = mt6370_adc_read_label,
+};
+
+#define MT6370_ADC_CHAN(_idx, _type, _addr, _extra_info) {	\
+	.type = _type,						\
+	.channel = MT6370_CHAN_##_idx,				\
+	.address = _addr,					\
+	.scan_index = MT6370_CHAN_##_idx,			\
+	.indexed = 1,						\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |		\
+			      BIT(IIO_CHAN_INFO_SCALE) |	\
+			      _extra_info,			\
+}
+
+static const struct iio_chan_spec mt6370_adc_channels[] = {
+	MT6370_ADC_CHAN(VBUSDIV5, IIO_VOLTAGE, 1, 0),
+	MT6370_ADC_CHAN(VBUSDIV2, IIO_VOLTAGE, 2, 0),
+	MT6370_ADC_CHAN(VSYS, IIO_VOLTAGE, 3, 0),
+	MT6370_ADC_CHAN(VBAT, IIO_VOLTAGE, 4, 0),
+	MT6370_ADC_CHAN(TS_BAT, IIO_VOLTAGE, 6, 0),
+	MT6370_ADC_CHAN(IBUS, IIO_CURRENT, 8, 0),
+	MT6370_ADC_CHAN(IBAT, IIO_CURRENT, 9, 0),
+	MT6370_ADC_CHAN(CHG_VDDP, IIO_VOLTAGE, 11, 0),
+	MT6370_ADC_CHAN(TEMP_JC, IIO_TEMP, 12, BIT(IIO_CHAN_INFO_OFFSET)),
+};
+
+static int mt6370_adc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct mt6370_adc_data *priv;
+	struct iio_dev *indio_dev;
+	struct regmap *regmap;
+	int ret;
+
+	regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!regmap)
+		return dev_err_probe(dev, -ENODEV, "Failed to get regmap\n");
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*priv));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	priv = iio_priv(indio_dev);
+	priv->dev = dev;
+	priv->regmap = regmap;
+	mutex_init(&priv->adc_lock);
+
+	ret = regmap_write(priv->regmap, MT6370_REG_CHG_ADC, 0);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to reset ADC\n");
+
+	indio_dev->name = "mt6370-adc";
+	indio_dev->info = &mt6370_adc_iio_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->channels = mt6370_adc_channels;
+	indio_dev->num_channels = ARRAY_SIZE(mt6370_adc_channels);
+
+	return devm_iio_device_register(dev, indio_dev);
+}
+
+static const struct of_device_id mt6370_adc_of_id[] = {
+	{ .compatible = "mediatek,mt6370-adc", },
+	{}
+};
+MODULE_DEVICE_TABLE(of, mt6370_adc_of_id);
+
+static struct platform_driver mt6370_adc_driver = {
+	.driver = {
+		.name = "mt6370-adc",
+		.of_match_table = mt6370_adc_of_id,
+	},
+	.probe = mt6370_adc_probe,
+};
+module_platform_driver(mt6370_adc_driver);
+
+MODULE_AUTHOR("ChiaEn Wu <chiaen_wu@richtek.com>");
+MODULE_DESCRIPTION("MT6370 ADC Driver");
+MODULE_LICENSE("GPL v2");
-- 
GitLab


From cd83c5c10036a2a156d725725daf3409832c8a24 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 10 Oct 2022 13:07:26 +0200
Subject: [PATCH 2274/4122] iio: imu: st_lsm6dsx: introduce
 st_lsm6dsx_device_set_enable routine

Introduce st_lsm6dsx_device_set_enable utility routine and remove
duplicated code used to enable/disable sensors

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/e3fbe5d4a3bed41130908669f745f78c8505cf47.1665399959.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h        | 11 +++++++++++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c | 14 +++-----------
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c   | 14 ++------------
 3 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index 6b57d47be69ea..d3a4e21bc1149 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -509,6 +509,17 @@ st_lsm6dsx_get_mount_matrix(const struct iio_dev *iio_dev,
 	return &hw->orientation;
 }
 
+static inline int
+st_lsm6dsx_device_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable)
+{
+	if (sensor->id == ST_LSM6DSX_ID_EXT0 ||
+	    sensor->id == ST_LSM6DSX_ID_EXT1 ||
+	    sensor->id == ST_LSM6DSX_ID_EXT2)
+		return st_lsm6dsx_shub_set_enable(sensor, enable);
+
+	return st_lsm6dsx_sensor_set_enable(sensor, enable);
+}
+
 static const
 struct iio_chan_spec_ext_info __maybe_unused st_lsm6dsx_accel_ext_info[] = {
 	IIO_MOUNT_MATRIX(IIO_SHARED_BY_ALL, st_lsm6dsx_get_mount_matrix),
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
index e49f2d120ed38..48fe6a45671bd 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -673,17 +673,9 @@ int st_lsm6dsx_update_fifo(struct st_lsm6dsx_sensor *sensor, bool enable)
 			goto out;
 	}
 
-	if (sensor->id == ST_LSM6DSX_ID_EXT0 ||
-	    sensor->id == ST_LSM6DSX_ID_EXT1 ||
-	    sensor->id == ST_LSM6DSX_ID_EXT2) {
-		err = st_lsm6dsx_shub_set_enable(sensor, enable);
-		if (err < 0)
-			goto out;
-	} else {
-		err = st_lsm6dsx_sensor_set_enable(sensor, enable);
-		if (err < 0)
-			goto out;
-	}
+	err = st_lsm6dsx_device_set_enable(sensor, enable);
+	if (err < 0)
+		goto out;
 
 	err = st_lsm6dsx_set_fifo_odr(sensor, enable);
 	if (err < 0)
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index f8bbb005718ed..6af2e905c1612 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -2317,12 +2317,7 @@ static int st_lsm6dsx_suspend(struct device *dev)
 			continue;
 		}
 
-		if (sensor->id == ST_LSM6DSX_ID_EXT0 ||
-		    sensor->id == ST_LSM6DSX_ID_EXT1 ||
-		    sensor->id == ST_LSM6DSX_ID_EXT2)
-			err = st_lsm6dsx_shub_set_enable(sensor, false);
-		else
-			err = st_lsm6dsx_sensor_set_enable(sensor, false);
+		err = st_lsm6dsx_device_set_enable(sensor, false);
 		if (err < 0)
 			return err;
 
@@ -2353,12 +2348,7 @@ static int st_lsm6dsx_resume(struct device *dev)
 		if (!(hw->suspend_mask & BIT(sensor->id)))
 			continue;
 
-		if (sensor->id == ST_LSM6DSX_ID_EXT0 ||
-		    sensor->id == ST_LSM6DSX_ID_EXT1 ||
-		    sensor->id == ST_LSM6DSX_ID_EXT2)
-			err = st_lsm6dsx_shub_set_enable(sensor, true);
-		else
-			err = st_lsm6dsx_sensor_set_enable(sensor, true);
+		err = st_lsm6dsx_device_set_enable(sensor, true);
 		if (err < 0)
 			return err;
 
-- 
GitLab


From 6cafcdb19ee4a0a073e4a8b0828698f8816c800c Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Wed, 12 Oct 2022 16:21:59 +0200
Subject: [PATCH 2275/4122] dt-bindings: iio: adc: stm32-adc: add stm32mp13
 compatibles

Add st,stm32mp13-adc-core and st,stm32mp13-adc compatibles
to support STM32MPU13 SoC.

On STM32MP13x, each ADC peripheral has a single ADC block.
These ADC peripherals, ADC1 and ADC2, are fully independent.

Main characteristics of STM32MP13x ADC:
- One interrupt line per ADC
- 6 to 12 bits resolution
- 19 channels

ADC2 instance supports two extra internal channels VDDCPU and VDDQ_DDR.
Add "vddcpu" and "vddq_ddr" internal channels names to the reserved
labels list.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221012142205.13041-3-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/adc/st,stm32-adc.yaml        | 68 ++++++++++++++++++-
 1 file changed, 65 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
index fa8da42cb1e6b..05265f381fde9 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
@@ -27,6 +27,7 @@ properties:
       - st,stm32f4-adc-core
       - st,stm32h7-adc-core
       - st,stm32mp1-adc-core
+      - st,stm32mp13-adc-core
 
   reg:
     maxItems: 1
@@ -37,6 +38,7 @@ properties:
         - stm32f4 and stm32h7 share a common ADC interrupt line.
         - stm32mp1 has two separate interrupt lines, one for each ADC within
           ADC block.
+        - stm32mp13 has an interrupt line per ADC block.
     minItems: 1
     maxItems: 2
 
@@ -180,6 +182,33 @@ allOf:
           maximum: 36000000
           default: 36000000
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: st,stm32mp13-adc-core
+
+    then:
+      properties:
+        clocks:
+          minItems: 1
+          maxItems: 2
+
+        clock-names:
+          items:
+            - const: bus
+            - const: adc
+          minItems: 1
+
+        interrupts:
+          items:
+            - description: ADC interrupt line
+
+        st,max-clk-rate-hz:
+          minimum: 150000
+          maximum: 75000000
+          default: 75000000
+
 additionalProperties: false
 
 required:
@@ -208,6 +237,7 @@ patternProperties:
           - st,stm32f4-adc
           - st,stm32h7-adc
           - st,stm32mp1-adc
+          - st,stm32mp13-adc
 
       reg:
         description: |
@@ -229,7 +259,7 @@ patternProperties:
       interrupts:
         description: |
           IRQ Line for the ADC instance. Valid values are:
-            - 0 for adc@0
+            - 0 for adc@0 (single adc for stm32mp13)
             - 1 for adc@100
             - 2 for adc@200 (stm32f4 only)
         maxItems: 1
@@ -250,13 +280,14 @@ patternProperties:
       assigned-resolution-bits:
         description: |
           Resolution (bits) to use for conversions:
-            - can be 6, 8, 10 or 12 on stm32f4
+            - can be 6, 8, 10 or 12 on stm32f4 and stm32mp13
             - can be 8, 10, 12, 14 or 16 on stm32h7 and stm32mp1
 
       st,adc-channels:
         description: |
           List of single-ended channels muxed for this ADC. It can have up to:
             - 16 channels, numbered from 0 to 15 (for in0..in15) on stm32f4
+            - 19 channels, numbered from 0 to 18 (for in0..in18) on stm32mp13.
             - 20 channels, numbered from 0 to 19 (for in0..in19) on stm32h7 and
               stm32mp1.
         $ref: /schemas/types.yaml#/definitions/uint32-array
@@ -322,7 +353,7 @@ patternProperties:
           label:
             description: |
               Unique name to identify which channel this is.
-              Reserved label names "vddcore", "vrefint" and "vbat"
+              Reserved label names "vddcore", "vddcpu", "vddq_ddr", "vrefint" and "vbat"
               are used to identify internal channels with matching names.
 
           diff-channels:
@@ -419,6 +450,37 @@ patternProperties:
               items:
                 minimum: 40
 
+
+      - if:
+          properties:
+            compatible:
+              contains:
+                const: st,stm32mp13-adc
+
+        then:
+          properties:
+            reg:
+              const: 0x0
+
+            interrupts:
+              const: 0
+
+            assigned-resolution-bits:
+              enum: [6, 8, 10, 12]
+              default: 12
+
+            st,adc-channels:
+              minItems: 1
+              maxItems: 19
+              items:
+                minimum: 0
+                maximum: 18
+
+            st,min-sample-time-nsecs:
+              minItems: 1
+              maxItems: 19
+              items:
+                minimum: 40
     additionalProperties: false
 
     required:
-- 
GitLab


From cf0fb80ae1675179620d964d629b43ca57219ced Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Wed, 12 Oct 2022 16:22:00 +0200
Subject: [PATCH 2276/4122] iio: adc: stm32-adc: add stm32mp13 support

Add STM32 ADC support for STM32MP13x SOCs family.

On STM32MP13x, each ADC peripheral has a single ADC block.
These ADC peripherals, ADC1 and ADC2, are fully independent.
This introduces changes in common registers handling.

Some features such as boost mode, channel preselection and
linear calibration are not supported by the STM32MP13x ADC.
Add diversity management for these features.

The STM32MP13x ADC introduces registers and bitfield variants
on existing features such as calibration factors and internal
channels. Add register diversity management.

Add also support for new internal channels VDDCPU and VDDQ_DDR.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20221012142205.13041-4-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc-core.c |  30 ++++-
 drivers/iio/adc/stm32-adc-core.h |  30 +++++
 drivers/iio/adc/stm32-adc.c      | 210 +++++++++++++++++++++++++++----
 3 files changed, 239 insertions(+), 31 deletions(-)

diff --git a/drivers/iio/adc/stm32-adc-core.c b/drivers/iio/adc/stm32-adc-core.c
index 81d5db91c67bf..48f02dcc81c1b 100644
--- a/drivers/iio/adc/stm32-adc-core.c
+++ b/drivers/iio/adc/stm32-adc-core.c
@@ -22,6 +22,7 @@
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
+#include <linux/units.h>
 
 #include "stm32-adc-core.h"
 
@@ -306,8 +307,8 @@ out:
 static const struct stm32_adc_common_regs stm32f4_adc_common_regs = {
 	.csr = STM32F4_ADC_CSR,
 	.ccr = STM32F4_ADC_CCR,
-	.eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3},
-	.ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3},
+	.eoc_msk = { STM32F4_EOC1, STM32F4_EOC2, STM32F4_EOC3 },
+	.ovr_msk = { STM32F4_OVR1, STM32F4_OVR2, STM32F4_OVR3 },
 	.ier = STM32F4_ADC_CR1,
 	.eocie_msk = STM32F4_EOCIE,
 };
@@ -316,8 +317,18 @@ static const struct stm32_adc_common_regs stm32f4_adc_common_regs = {
 static const struct stm32_adc_common_regs stm32h7_adc_common_regs = {
 	.csr = STM32H7_ADC_CSR,
 	.ccr = STM32H7_ADC_CCR,
-	.eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV},
-	.ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV},
+	.eoc_msk = { STM32H7_EOC_MST, STM32H7_EOC_SLV },
+	.ovr_msk = { STM32H7_OVR_MST, STM32H7_OVR_SLV },
+	.ier = STM32H7_ADC_IER,
+	.eocie_msk = STM32H7_EOCIE,
+};
+
+/* STM32MP13 common registers definitions */
+static const struct stm32_adc_common_regs stm32mp13_adc_common_regs = {
+	.csr = STM32H7_ADC_CSR,
+	.ccr = STM32H7_ADC_CCR,
+	.eoc_msk = { STM32H7_EOC_MST },
+	.ovr_msk = { STM32H7_OVR_MST },
 	.ier = STM32H7_ADC_IER,
 	.eocie_msk = STM32H7_EOCIE,
 };
@@ -868,6 +879,14 @@ static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = {
 	.num_irqs = 2,
 };
 
+static const struct stm32_adc_priv_cfg stm32mp13_adc_priv_cfg = {
+	.regs = &stm32mp13_adc_common_regs,
+	.clk_sel = stm32h7_adc_clk_sel,
+	.max_clk_rate_hz = 75 * HZ_PER_MHZ,
+	.ipid = STM32MP13_IPIDR_NUMBER,
+	.num_irqs = 1,
+};
+
 static const struct of_device_id stm32_adc_of_match[] = {
 	{
 		.compatible = "st,stm32f4-adc-core",
@@ -878,6 +897,9 @@ static const struct of_device_id stm32_adc_of_match[] = {
 	}, {
 		.compatible = "st,stm32mp1-adc-core",
 		.data = (void *)&stm32mp1_adc_priv_cfg
+	}, {
+		.compatible = "st,stm32mp13-adc-core",
+		.data = (void *)&stm32mp13_adc_priv_cfg
 	}, {
 	},
 };
diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h
index 2118ef63843d2..9d6dfa1c03faf 100644
--- a/drivers/iio/adc/stm32-adc-core.h
+++ b/drivers/iio/adc/stm32-adc-core.h
@@ -112,6 +112,11 @@
 #define STM32MP1_ADC_IPDR		0x3F8
 #define STM32MP1_ADC_SIDR		0x3FC
 
+/* STM32MP13 - Registers for each ADC instance */
+#define STM32MP13_ADC_DIFSEL		0xB0
+#define STM32MP13_ADC_CALFACT		0xB4
+#define STM32MP13_ADC2_OR		0xC8
+
 /* STM32H7 - common registers for all ADC instances */
 #define STM32H7_ADC_CSR			(STM32_ADCX_COMN_OFFSET + 0x00)
 #define STM32H7_ADC_CCR			(STM32_ADCX_COMN_OFFSET + 0x08)
@@ -161,6 +166,9 @@ enum stm32h7_adc_dmngt {
 	STM32H7_DMNGT_DMA_CIRC,		/* DMA circular mode */
 };
 
+/* STM32H7_ADC_DIFSEL - bit fields */
+#define STM32H7_DIFSEL_MASK		GENMASK(19, 0)
+
 /* STM32H7_ADC_CALFACT - bit fields */
 #define STM32H7_CALFACT_D_SHIFT		16
 #define STM32H7_CALFACT_D_MASK		GENMASK(26, 16)
@@ -210,7 +218,29 @@ enum stm32h7_adc_dmngt {
 /* STM32MP1_ADC_SIDR - bit fields */
 #define STM32MP1_SIDR_MASK		GENMASK(31, 0)
 
+/* STM32MP13_ADC_CFGR specific bit fields */
+#define STM32MP13_DMAEN			BIT(0)
+#define STM32MP13_DMACFG		BIT(1)
+#define STM32MP13_DFSDMCFG		BIT(2)
+#define STM32MP13_RES_SHIFT		3
+#define STM32MP13_RES_MASK		GENMASK(4, 3)
+
+/* STM32MP13_ADC_DIFSEL - bit fields */
+#define STM32MP13_DIFSEL_MASK		GENMASK(18, 0)
+
+/* STM32MP13_ADC_CALFACT - bit fields */
+#define STM32MP13_CALFACT_D_SHIFT	16
+#define STM32MP13_CALFACT_D_MASK	GENMASK(22, 16)
+#define STM32MP13_CALFACT_S_SHIFT	0
+#define STM32MP13_CALFACT_S_MASK	GENMASK(6, 0)
+
+/* STM32MP13_ADC2_OR - bit fields */
+#define STM32MP13_OP2			BIT(2)
+#define STM32MP13_OP1			BIT(1)
+#define STM32MP13_OP0			BIT(0)
+
 #define STM32MP15_IPIDR_NUMBER		0x00110005
+#define STM32MP13_IPIDR_NUMBER		0x00110006
 
 /**
  * struct stm32_adc_common - stm32 ADC driver common data (for all instances)
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 3cda529f081db..d36c024526f29 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -82,6 +82,8 @@ enum stm32_adc_extsel {
 enum stm32_adc_int_ch {
 	STM32_ADC_INT_CH_NONE = -1,
 	STM32_ADC_INT_CH_VDDCORE,
+	STM32_ADC_INT_CH_VDDCPU,
+	STM32_ADC_INT_CH_VDDQ_DDR,
 	STM32_ADC_INT_CH_VREFINT,
 	STM32_ADC_INT_CH_VBAT,
 	STM32_ADC_INT_CH_NB,
@@ -99,6 +101,8 @@ struct stm32_adc_ic {
 
 static const struct stm32_adc_ic stm32_adc_ic[STM32_ADC_INT_CH_NB] = {
 	{ "vddcore", STM32_ADC_INT_CH_VDDCORE },
+	{ "vddcpu", STM32_ADC_INT_CH_VDDCPU },
+	{ "vddq_ddr", STM32_ADC_INT_CH_VDDQ_DDR },
 	{ "vrefint", STM32_ADC_INT_CH_VREFINT },
 	{ "vbat", STM32_ADC_INT_CH_VBAT },
 };
@@ -160,9 +164,14 @@ struct stm32_adc_vrefint {
  * @exten:		trigger control register & bitfield
  * @extsel:		trigger selection register & bitfield
  * @res:		resolution selection register & bitfield
+ * @difsel:		differential mode selection register & bitfield
+ * @calfact_s:		single-ended calibration factors register & bitfield
+ * @calfact_d:		differential calibration factors register & bitfield
  * @smpr:		smpr1 & smpr2 registers offset array
  * @smp_bits:		smpr1 & smpr2 index and bitfields
- * @or_vdd:		option register & vddcore bitfield
+ * @or_vddcore:		option register & vddcore bitfield
+ * @or_vddcpu:		option register & vddcpu bitfield
+ * @or_vddq_ddr:	option register & vddq_ddr bitfield
  * @ccr_vbat:		common register & vbat bitfield
  * @ccr_vref:		common register & vrefint bitfield
  */
@@ -176,9 +185,14 @@ struct stm32_adc_regspec {
 	const struct stm32_adc_regs exten;
 	const struct stm32_adc_regs extsel;
 	const struct stm32_adc_regs res;
+	const struct stm32_adc_regs difsel;
+	const struct stm32_adc_regs calfact_s;
+	const struct stm32_adc_regs calfact_d;
 	const u32 smpr[2];
 	const struct stm32_adc_regs *smp_bits;
-	const struct stm32_adc_regs or_vdd;
+	const struct stm32_adc_regs or_vddcore;
+	const struct stm32_adc_regs or_vddcpu;
+	const struct stm32_adc_regs or_vddq_ddr;
 	const struct stm32_adc_regs ccr_vbat;
 	const struct stm32_adc_regs ccr_vref;
 };
@@ -192,6 +206,9 @@ struct stm32_adc;
  * @trigs:		external trigger sources
  * @clk_required:	clock is required
  * @has_vregready:	vregready status flag presence
+ * @has_boostmode:	boost mode support flag
+ * @has_linearcal:	linear calibration support flag
+ * @has_presel:		channel preselection support flag
  * @prepare:		optional prepare routine (power-up, enable)
  * @start_conv:		routine to start conversions
  * @stop_conv:		routine to stop conversions
@@ -206,6 +223,9 @@ struct stm32_adc_cfg {
 	struct stm32_adc_trig_info	*trigs;
 	bool clk_required;
 	bool has_vregready;
+	bool has_boostmode;
+	bool has_linearcal;
+	bool has_presel;
 	int (*prepare)(struct iio_dev *);
 	void (*start_conv)(struct iio_dev *, bool dma);
 	void (*stop_conv)(struct iio_dev *);
@@ -312,6 +332,13 @@ static const struct stm32_adc_info stm32h7_adc_info = {
 	.num_res = ARRAY_SIZE(stm32h7_adc_resolutions),
 };
 
+/* stm32mp13 can have up to 19 channels */
+static const struct stm32_adc_info stm32mp13_adc_info = {
+	.max_channels = 19,
+	.resolutions = stm32f4_adc_resolutions,
+	.num_res = ARRAY_SIZE(stm32f4_adc_resolutions),
+};
+
 /*
  * stm32f4_sq - describe regular sequence registers
  * - L: sequence len (register & bit field)
@@ -497,10 +524,45 @@ static const struct stm32_adc_regspec stm32h7_adc_regspec = {
 	.extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK,
 		    STM32H7_EXTSEL_SHIFT },
 	.res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT },
+	.difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK},
+	.calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK,
+		       STM32H7_CALFACT_S_SHIFT },
+	.calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK,
+		       STM32H7_CALFACT_D_SHIFT },
 	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
 	.smp_bits = stm32h7_smp_bits,
 };
 
+/* STM32MP13 programmable sampling time (ADC clock cycles, rounded down) */
+static const unsigned int stm32mp13_adc_smp_cycles[STM32_ADC_MAX_SMP + 1] = {
+	2, 6, 12, 24, 47, 92, 247, 640,
+};
+
+static const struct stm32_adc_regspec stm32mp13_adc_regspec = {
+	.dr = STM32H7_ADC_DR,
+	.ier_eoc = { STM32H7_ADC_IER, STM32H7_EOCIE },
+	.ier_ovr = { STM32H7_ADC_IER, STM32H7_OVRIE },
+	.isr_eoc = { STM32H7_ADC_ISR, STM32H7_EOC },
+	.isr_ovr = { STM32H7_ADC_ISR, STM32H7_OVR },
+	.sqr = stm32h7_sq,
+	.exten = { STM32H7_ADC_CFGR, STM32H7_EXTEN_MASK, STM32H7_EXTEN_SHIFT },
+	.extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK,
+		    STM32H7_EXTSEL_SHIFT },
+	.res = { STM32H7_ADC_CFGR, STM32MP13_RES_MASK, STM32MP13_RES_SHIFT },
+	.difsel = { STM32MP13_ADC_DIFSEL, STM32MP13_DIFSEL_MASK},
+	.calfact_s = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_S_MASK,
+		       STM32MP13_CALFACT_S_SHIFT },
+	.calfact_d = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_D_MASK,
+		       STM32MP13_CALFACT_D_SHIFT },
+	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
+	.smp_bits = stm32h7_smp_bits,
+	.or_vddcore = { STM32MP13_ADC2_OR, STM32MP13_OP0 },
+	.or_vddcpu = { STM32MP13_ADC2_OR, STM32MP13_OP1 },
+	.or_vddq_ddr = { STM32MP13_ADC2_OR, STM32MP13_OP2 },
+	.ccr_vbat = { STM32H7_ADC_CCR, STM32H7_VBATEN },
+	.ccr_vref = { STM32H7_ADC_CCR, STM32H7_VREFEN },
+};
+
 static const struct stm32_adc_regspec stm32mp1_adc_regspec = {
 	.dr = STM32H7_ADC_DR,
 	.ier_eoc = { STM32H7_ADC_IER, STM32H7_EOCIE },
@@ -512,9 +574,14 @@ static const struct stm32_adc_regspec stm32mp1_adc_regspec = {
 	.extsel = { STM32H7_ADC_CFGR, STM32H7_EXTSEL_MASK,
 		    STM32H7_EXTSEL_SHIFT },
 	.res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT },
+	.difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK},
+	.calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK,
+		       STM32H7_CALFACT_S_SHIFT },
+	.calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK,
+		       STM32H7_CALFACT_D_SHIFT },
 	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
 	.smp_bits = stm32h7_smp_bits,
-	.or_vdd = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN },
+	.or_vddcore = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN },
 	.ccr_vbat = { STM32H7_ADC_CCR, STM32H7_VBATEN },
 	.ccr_vref = { STM32H7_ADC_CCR, STM32H7_VREFEN },
 };
@@ -675,8 +742,18 @@ static void stm32_adc_int_ch_enable(struct iio_dev *indio_dev)
 		switch (i) {
 		case STM32_ADC_INT_CH_VDDCORE:
 			dev_dbg(&indio_dev->dev, "Enable VDDCore\n");
-			stm32_adc_set_bits(adc, adc->cfg->regs->or_vdd.reg,
-					   adc->cfg->regs->or_vdd.mask);
+			stm32_adc_set_bits(adc, adc->cfg->regs->or_vddcore.reg,
+					   adc->cfg->regs->or_vddcore.mask);
+			break;
+		case STM32_ADC_INT_CH_VDDCPU:
+			dev_dbg(&indio_dev->dev, "Enable VDDCPU\n");
+			stm32_adc_set_bits(adc, adc->cfg->regs->or_vddcpu.reg,
+					   adc->cfg->regs->or_vddcpu.mask);
+			break;
+		case STM32_ADC_INT_CH_VDDQ_DDR:
+			dev_dbg(&indio_dev->dev, "Enable VDDQ_DDR\n");
+			stm32_adc_set_bits(adc, adc->cfg->regs->or_vddq_ddr.reg,
+					   adc->cfg->regs->or_vddq_ddr.mask);
 			break;
 		case STM32_ADC_INT_CH_VREFINT:
 			dev_dbg(&indio_dev->dev, "Enable VREFInt\n");
@@ -702,8 +779,16 @@ static void stm32_adc_int_ch_disable(struct stm32_adc *adc)
 
 		switch (i) {
 		case STM32_ADC_INT_CH_VDDCORE:
-			stm32_adc_clr_bits(adc, adc->cfg->regs->or_vdd.reg,
-					   adc->cfg->regs->or_vdd.mask);
+			stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddcore.reg,
+					   adc->cfg->regs->or_vddcore.mask);
+			break;
+		case STM32_ADC_INT_CH_VDDCPU:
+			stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddcpu.reg,
+					   adc->cfg->regs->or_vddcpu.mask);
+			break;
+		case STM32_ADC_INT_CH_VDDQ_DDR:
+			stm32_adc_clr_bits(adc, adc->cfg->regs->or_vddq_ddr.reg,
+					   adc->cfg->regs->or_vddq_ddr.mask);
 			break;
 		case STM32_ADC_INT_CH_VREFINT:
 			stm32_adc_clr_bits_common(adc, adc->cfg->regs->ccr_vref.reg,
@@ -801,6 +886,7 @@ static void stm32h7_adc_stop_conv(struct iio_dev *indio_dev)
 	if (ret)
 		dev_warn(&indio_dev->dev, "stop failed\n");
 
+	/* STM32H7_DMNGT_MASK covers STM32MP13_DMAEN & STM32MP13_DMACFG */
 	stm32_adc_clr_bits(adc, STM32H7_ADC_CFGR, STM32H7_DMNGT_MASK);
 }
 
@@ -811,6 +897,17 @@ static void stm32h7_adc_irq_clear(struct iio_dev *indio_dev, u32 msk)
 	stm32_adc_set_bits(adc, adc->cfg->regs->isr_eoc.reg, msk);
 }
 
+static void stm32mp13_adc_start_conv(struct iio_dev *indio_dev, bool dma)
+{
+	struct stm32_adc *adc = iio_priv(indio_dev);
+
+	if (dma)
+		stm32_adc_set_bits(adc, STM32H7_ADC_CFGR,
+				   STM32MP13_DMAEN | STM32MP13_DMACFG);
+
+	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADSTART);
+}
+
 static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev)
 {
 	struct stm32_adc *adc = iio_priv(indio_dev);
@@ -821,7 +918,8 @@ static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev)
 	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_DEEPPWD);
 	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADVREGEN);
 
-	if (adc->common->rate > STM32H7_BOOST_CLKRATE)
+	if (adc->cfg->has_boostmode &&
+	    adc->common->rate > STM32H7_BOOST_CLKRATE)
 		stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST);
 
 	/* Wait for startup time */
@@ -843,7 +941,8 @@ static int stm32h7_adc_exit_pwr_down(struct iio_dev *indio_dev)
 
 static void stm32h7_adc_enter_pwr_down(struct stm32_adc *adc)
 {
-	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST);
+	if (adc->cfg->has_boostmode)
+		stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_BOOST);
 
 	/* Setting DEEPPWD disables ADC vreg and clears ADVREGEN */
 	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_DEEPPWD);
@@ -901,6 +1000,9 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev)
 	int i, ret;
 	u32 lincalrdyw_mask, val;
 
+	if (!adc->cfg->has_linearcal)
+		goto skip_linearcal;
+
 	/* Read linearity calibration */
 	lincalrdyw_mask = STM32H7_LINCALRDYW6;
 	for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) {
@@ -923,12 +1025,13 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev)
 		lincalrdyw_mask >>= 1;
 	}
 
+skip_linearcal:
 	/* Read offset calibration */
-	val = stm32_adc_readl(adc, STM32H7_ADC_CALFACT);
-	adc->cal.calfact_s = (val & STM32H7_CALFACT_S_MASK);
-	adc->cal.calfact_s >>= STM32H7_CALFACT_S_SHIFT;
-	adc->cal.calfact_d = (val & STM32H7_CALFACT_D_MASK);
-	adc->cal.calfact_d >>= STM32H7_CALFACT_D_SHIFT;
+	val = stm32_adc_readl(adc, adc->cfg->regs->calfact_s.reg);
+	adc->cal.calfact_s = (val & adc->cfg->regs->calfact_s.mask);
+	adc->cal.calfact_s >>= adc->cfg->regs->calfact_s.shift;
+	adc->cal.calfact_d = (val & adc->cfg->regs->calfact_d.mask);
+	adc->cal.calfact_d >>= adc->cfg->regs->calfact_d.shift;
 	adc->cal.calibrated = true;
 
 	return 0;
@@ -945,9 +1048,12 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev)
 	int i, ret;
 	u32 lincalrdyw_mask, val;
 
-	val = (adc->cal.calfact_s << STM32H7_CALFACT_S_SHIFT) |
-		(adc->cal.calfact_d << STM32H7_CALFACT_D_SHIFT);
-	stm32_adc_writel(adc, STM32H7_ADC_CALFACT, val);
+	val = (adc->cal.calfact_s << adc->cfg->regs->calfact_s.shift) |
+		(adc->cal.calfact_d << adc->cfg->regs->calfact_d.shift);
+	stm32_adc_writel(adc, adc->cfg->regs->calfact_s.reg, val);
+
+	if (!adc->cfg->has_linearcal)
+		return 0;
 
 	lincalrdyw_mask = STM32H7_LINCALRDYW6;
 	for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) {
@@ -1016,11 +1122,14 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev)
 {
 	struct stm32_adc *adc = iio_priv(indio_dev);
 	int ret;
+	u32 msk = STM32H7_ADCALDIF;
 	u32 val;
 
 	if (adc->cal.calibrated)
 		return true;
 
+	if (adc->cfg->has_linearcal)
+		msk |= STM32H7_ADCALLIN;
 	/* ADC must be disabled for calibration */
 	stm32h7_adc_disable(indio_dev);
 
@@ -1029,8 +1138,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev)
 	 * - Offset calibration for single ended inputs
 	 * - No linearity calibration (do it later, before reading it)
 	 */
-	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADCALDIF);
-	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, STM32H7_ADCALLIN);
+	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, msk);
 
 	/* Start calibration, then wait for completion */
 	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADCAL);
@@ -1048,8 +1156,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev)
 	 * - Linearity calibration (needs to be done only once for single/diff)
 	 *   will run simultaneously with offset calibration.
 	 */
-	stm32_adc_set_bits(adc, STM32H7_ADC_CR,
-			   STM32H7_ADCALDIF | STM32H7_ADCALLIN);
+	stm32_adc_set_bits(adc, STM32H7_ADC_CR, msk);
 	stm32_adc_set_bits(adc, STM32H7_ADC_CR, STM32H7_ADCAL);
 	ret = stm32_adc_readl_poll_timeout(STM32H7_ADC_CR, val,
 					   !(val & STM32H7_ADCAL), 100,
@@ -1060,8 +1167,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev)
 	}
 
 out:
-	stm32_adc_clr_bits(adc, STM32H7_ADC_CR,
-			   STM32H7_ADCALDIF | STM32H7_ADCALLIN);
+	stm32_adc_clr_bits(adc, STM32H7_ADC_CR, msk);
 
 	return ret;
 }
@@ -1093,7 +1199,7 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev)
 
 	stm32_adc_int_ch_enable(indio_dev);
 
-	stm32_adc_writel(adc, STM32H7_ADC_DIFSEL, adc->difsel);
+	stm32_adc_writel(adc, adc->cfg->regs->difsel.reg, adc->difsel);
 
 	ret = stm32h7_adc_enable(indio_dev);
 	if (ret)
@@ -1107,7 +1213,8 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev)
 	if (ret)
 		goto disable;
 
-	stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel);
+	if (adc->cfg->has_presel)
+		stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel);
 
 	return 0;
 
@@ -1125,7 +1232,8 @@ static void stm32h7_adc_unprepare(struct iio_dev *indio_dev)
 {
 	struct stm32_adc *adc = iio_priv(indio_dev);
 
-	stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0);
+	if (adc->cfg->has_presel)
+		stm32_adc_writel(adc, STM32H7_ADC_PCSEL, 0);
 	stm32h7_adc_disable(indio_dev);
 	stm32_adc_int_ch_disable(adc);
 	stm32h7_adc_enter_pwr_down(adc);
@@ -1857,7 +1965,7 @@ static void stm32_adc_chan_init_one(struct iio_dev *indio_dev,
 	adc->pcsel |= BIT(chan->channel);
 	if (differential) {
 		/* pre-build diff channels mask */
-		adc->difsel |= BIT(chan->channel);
+		adc->difsel |= BIT(chan->channel) & adc->cfg->regs->difsel.mask;
 		/* Also add negative input to pre-selected channels */
 		adc->pcsel |= BIT(chan->channel2);
 	}
@@ -1998,6 +2106,35 @@ static int stm32_adc_populate_int_ch(struct iio_dev *indio_dev, const char *ch_n
 
 	for (i = 0; i < STM32_ADC_INT_CH_NB; i++) {
 		if (!strncmp(stm32_adc_ic[i].name, ch_name, STM32_ADC_CH_SZ)) {
+			/* Check internal channel availability */
+			switch (i) {
+			case STM32_ADC_INT_CH_VDDCORE:
+				if (!adc->cfg->regs->or_vddcore.reg)
+					dev_warn(&indio_dev->dev,
+						 "%s channel not available\n", ch_name);
+				break;
+			case STM32_ADC_INT_CH_VDDCPU:
+				if (!adc->cfg->regs->or_vddcpu.reg)
+					dev_warn(&indio_dev->dev,
+						 "%s channel not available\n", ch_name);
+				break;
+			case STM32_ADC_INT_CH_VDDQ_DDR:
+				if (!adc->cfg->regs->or_vddq_ddr.reg)
+					dev_warn(&indio_dev->dev,
+						 "%s channel not available\n", ch_name);
+				break;
+			case STM32_ADC_INT_CH_VREFINT:
+				if (!adc->cfg->regs->ccr_vref.reg)
+					dev_warn(&indio_dev->dev,
+						 "%s channel not available\n", ch_name);
+				break;
+			case STM32_ADC_INT_CH_VBAT:
+				if (!adc->cfg->regs->ccr_vbat.reg)
+					dev_warn(&indio_dev->dev,
+						 "%s channel not available\n", ch_name);
+				break;
+			}
+
 			if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT) {
 				adc->int_ch[i] = chan;
 				break;
@@ -2435,6 +2572,9 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = {
 	.regs = &stm32h7_adc_regspec,
 	.adc_info = &stm32h7_adc_info,
 	.trigs = stm32h7_adc_trigs,
+	.has_boostmode = true,
+	.has_linearcal = true,
+	.has_presel = true,
 	.start_conv = stm32h7_adc_start_conv,
 	.stop_conv = stm32h7_adc_stop_conv,
 	.prepare = stm32h7_adc_prepare,
@@ -2448,6 +2588,9 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
 	.adc_info = &stm32h7_adc_info,
 	.trigs = stm32h7_adc_trigs,
 	.has_vregready = true,
+	.has_boostmode = true,
+	.has_linearcal = true,
+	.has_presel = true,
 	.start_conv = stm32h7_adc_start_conv,
 	.stop_conv = stm32h7_adc_stop_conv,
 	.prepare = stm32h7_adc_prepare,
@@ -2457,10 +2600,23 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
 	.ts_vrefint_ns = 4300,
 };
 
+static const struct stm32_adc_cfg stm32mp13_adc_cfg = {
+	.regs = &stm32mp13_adc_regspec,
+	.adc_info = &stm32mp13_adc_info,
+	.trigs = stm32h7_adc_trigs,
+	.start_conv = stm32mp13_adc_start_conv,
+	.stop_conv = stm32h7_adc_stop_conv,
+	.prepare = stm32h7_adc_prepare,
+	.unprepare = stm32h7_adc_unprepare,
+	.smp_cycles = stm32mp13_adc_smp_cycles,
+	.irq_clear = stm32h7_adc_irq_clear,
+};
+
 static const struct of_device_id stm32_adc_of_match[] = {
 	{ .compatible = "st,stm32f4-adc", .data = (void *)&stm32f4_adc_cfg },
 	{ .compatible = "st,stm32h7-adc", .data = (void *)&stm32h7_adc_cfg },
 	{ .compatible = "st,stm32mp1-adc", .data = (void *)&stm32mp1_adc_cfg },
+	{ .compatible = "st,stm32mp13-adc", .data = (void *)&stm32mp13_adc_cfg },
 	{},
 };
 MODULE_DEVICE_TABLE(of, stm32_adc_of_match);
-- 
GitLab


From 7cb2303dd02487cc18bc9ff0fc1338e8c78ae846 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Wed, 12 Oct 2022 16:22:01 +0200
Subject: [PATCH 2277/4122] iio: adc: stm32: manage min sampling time on all
 internal channels

Force minimum sampling time for all internal channels according
to datasheet requirement. This value can be increased through
DT st,min-sample-time-ns property.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20221012142205.13041-5-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index d36c024526f29..8d03d21a33d60 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -215,7 +215,7 @@ struct stm32_adc;
  * @unprepare:		optional unprepare routine (disable, power-down)
  * @irq_clear:		routine to clear irqs
  * @smp_cycles:		programmable sampling time (ADC clock cycles)
- * @ts_vrefint_ns:	vrefint minimum sampling time in ns
+ * @ts_int_ch:		pointer to array of internal channels minimum sampling time in ns
  */
 struct stm32_adc_cfg {
 	const struct stm32_adc_regspec	*regs;
@@ -232,7 +232,7 @@ struct stm32_adc_cfg {
 	void (*unprepare)(struct iio_dev *);
 	void (*irq_clear)(struct iio_dev *indio_dev, u32 msk);
 	const unsigned int *smp_cycles;
-	const unsigned int ts_vrefint_ns;
+	const unsigned int *ts_int_ch;
 };
 
 /**
@@ -1910,14 +1910,15 @@ static void stm32_adc_smpr_init(struct stm32_adc *adc, int channel, u32 smp_ns)
 {
 	const struct stm32_adc_regs *smpr = &adc->cfg->regs->smp_bits[channel];
 	u32 period_ns, shift = smpr->shift, mask = smpr->mask;
-	unsigned int smp, r = smpr->reg;
+	unsigned int i, smp, r = smpr->reg;
 
 	/*
-	 * For vrefint channel, ensure that the sampling time cannot
+	 * For internal channels, ensure that the sampling time cannot
 	 * be lower than the one specified in the datasheet
 	 */
-	if (channel == adc->int_ch[STM32_ADC_INT_CH_VREFINT])
-		smp_ns = max(smp_ns, adc->cfg->ts_vrefint_ns);
+	for (i = 0; i < STM32_ADC_INT_CH_NB; i++)
+		if (channel == adc->int_ch[i] && adc->int_ch[i] != STM32_ADC_INT_CH_NONE)
+			smp_ns = max(smp_ns, adc->cfg->ts_int_ch[i]);
 
 	/* Determine sampling time (ADC clock cycles) */
 	period_ns = NSEC_PER_SEC / adc->common->rate;
@@ -2568,6 +2569,9 @@ static const struct stm32_adc_cfg stm32f4_adc_cfg = {
 	.irq_clear = stm32f4_adc_irq_clear,
 };
 
+const unsigned int stm32_adc_min_ts_h7[] = { 0, 0, 0, 4300, 9000 };
+static_assert(ARRAY_SIZE(stm32_adc_min_ts_h7) == STM32_ADC_INT_CH_NB);
+
 static const struct stm32_adc_cfg stm32h7_adc_cfg = {
 	.regs = &stm32h7_adc_regspec,
 	.adc_info = &stm32h7_adc_info,
@@ -2581,8 +2585,12 @@ static const struct stm32_adc_cfg stm32h7_adc_cfg = {
 	.unprepare = stm32h7_adc_unprepare,
 	.smp_cycles = stm32h7_adc_smp_cycles,
 	.irq_clear = stm32h7_adc_irq_clear,
+	.ts_int_ch = stm32_adc_min_ts_h7,
 };
 
+const unsigned int stm32_adc_min_ts_mp1[] = { 100, 100, 100, 4300, 9800 };
+static_assert(ARRAY_SIZE(stm32_adc_min_ts_mp1) == STM32_ADC_INT_CH_NB);
+
 static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
 	.regs = &stm32mp1_adc_regspec,
 	.adc_info = &stm32h7_adc_info,
@@ -2597,9 +2605,12 @@ static const struct stm32_adc_cfg stm32mp1_adc_cfg = {
 	.unprepare = stm32h7_adc_unprepare,
 	.smp_cycles = stm32h7_adc_smp_cycles,
 	.irq_clear = stm32h7_adc_irq_clear,
-	.ts_vrefint_ns = 4300,
+	.ts_int_ch = stm32_adc_min_ts_mp1,
 };
 
+const unsigned int stm32_adc_min_ts_mp13[] = { 100, 0, 0, 4300, 9800 };
+static_assert(ARRAY_SIZE(stm32_adc_min_ts_mp13) == STM32_ADC_INT_CH_NB);
+
 static const struct stm32_adc_cfg stm32mp13_adc_cfg = {
 	.regs = &stm32mp13_adc_regspec,
 	.adc_info = &stm32mp13_adc_info,
@@ -2610,6 +2621,7 @@ static const struct stm32_adc_cfg stm32mp13_adc_cfg = {
 	.unprepare = stm32h7_adc_unprepare,
 	.smp_cycles = stm32mp13_adc_smp_cycles,
 	.irq_clear = stm32h7_adc_irq_clear,
+	.ts_int_ch = stm32_adc_min_ts_mp13,
 };
 
 static const struct of_device_id stm32_adc_of_match[] = {
-- 
GitLab


From 0a8565425afd8ba0e1a0ea73e21da119ee6dacea Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 12 Oct 2022 17:16:17 +0200
Subject: [PATCH 2278/4122] iio: core: introduce
 iio_device_{claim|release}_buffer_mode() APIs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These APIs are analogous to iio_device_claim_direct_mode() and
iio_device_release_direct_mode() but, as the name suggests, with the
logic flipped. While this looks odd enough, it will have at least two
users (in following changes) and it will be important to move the IIO
mlock to the private struct.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20221012151620.1725215-2-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/industrialio-core.c | 38 +++++++++++++++++++++++++++++++++
 include/linux/iio/iio.h         |  2 ++
 2 files changed, 40 insertions(+)

diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 151ff39933548..b2b7bd27adc77 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -2083,6 +2083,44 @@ void iio_device_release_direct_mode(struct iio_dev *indio_dev)
 }
 EXPORT_SYMBOL_GPL(iio_device_release_direct_mode);
 
+/**
+ * iio_device_claim_buffer_mode - Keep device in buffer mode
+ * @indio_dev:	the iio_dev associated with the device
+ *
+ * If the device is in buffer mode it is guaranteed to stay
+ * that way until iio_device_release_buffer_mode() is called.
+ *
+ * Use with iio_device_release_buffer_mode().
+ *
+ * Returns: 0 on success, -EBUSY on failure.
+ */
+int iio_device_claim_buffer_mode(struct iio_dev *indio_dev)
+{
+	mutex_lock(&indio_dev->mlock);
+
+	if (iio_buffer_enabled(indio_dev))
+		return 0;
+
+	mutex_unlock(&indio_dev->mlock);
+	return -EBUSY;
+}
+EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode);
+
+/**
+ * iio_device_release_buffer_mode - releases claim on buffer mode
+ * @indio_dev:	the iio_dev associated with the device
+ *
+ * Release the claim. Device is no longer guaranteed to stay
+ * in buffer mode.
+ *
+ * Use with iio_device_claim_buffer_mode().
+ */
+void iio_device_release_buffer_mode(struct iio_dev *indio_dev)
+{
+	mutex_unlock(&indio_dev->mlock);
+}
+EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode);
+
 /**
  * iio_device_get_current_mode() - helper function providing read-only access to
  *				   the opaque @currentmode variable
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index f0ec8a5e5a7a9..9d3bd6379eb87 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -629,6 +629,8 @@ int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev,
 int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp);
 int iio_device_claim_direct_mode(struct iio_dev *indio_dev);
 void iio_device_release_direct_mode(struct iio_dev *indio_dev);
+int iio_device_claim_buffer_mode(struct iio_dev *indio_dev);
+void iio_device_release_buffer_mode(struct iio_dev *indio_dev);
 
 extern struct bus_type iio_bus_type;
 
-- 
GitLab


From 1555790c86286933dc674fffe9c1104250d093ce Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 12 Oct 2022 17:16:18 +0200
Subject: [PATCH 2279/4122] iio: health: max30100: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pattern used in this device does not quite fit in the
iio_device_claim_direct_mode() typical usage. In this case,
iio_buffer_enabled() was being used not to prevent the raw
access but to allow it. Hence, let's make use of the new
iio_device_claim_buffer_mode() API to make sure we stay in
buffered mode during the complete read.

Note that we are shadowing the error code returned by
iio_device_claim_buffer_mode() so that we keep the original one
(-EAGAIN). The reason is that some userspace stack might already be
relying on this particular code so that we are not taking chances and
leave it alone.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221012151620.1725215-3-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/health/max30100.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c
index 2cca5e0519f89..3aa5d037a1c35 100644
--- a/drivers/iio/health/max30100.c
+++ b/drivers/iio/health/max30100.c
@@ -387,18 +387,21 @@ static int max30100_read_raw(struct iio_dev *indio_dev,
 		 * Temperature reading can only be acquired while engine
 		 * is running
 		 */
-		mutex_lock(&indio_dev->mlock);
-
-		if (!iio_buffer_enabled(indio_dev))
+		if (iio_device_claim_buffer_mode(indio_dev)) {
+			/*
+			 * Replacing -EBUSY or other error code
+			 * returned by iio_device_claim_buffer_mode()
+			 * because user space may rely on the current
+			 * one.
+			 */
 			ret = -EAGAIN;
-		else {
+		} else {
 			ret = max30100_get_temp(data, val);
 			if (!ret)
 				ret = IIO_VAL_INT;
 
+			iio_device_release_buffer_mode(indio_dev);
 		}
-
-		mutex_unlock(&indio_dev->mlock);
 		break;
 	case IIO_CHAN_INFO_SCALE:
 		*val = 1;  /* 0.0625 */
-- 
GitLab


From 6b701cda3632c9cffaea6f79c5fe638800c8f7f1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 12 Oct 2022 17:16:19 +0200
Subject: [PATCH 2280/4122] iio: health: max30102: do not use internal iio_dev
 lock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The pattern used in this device does not quite fit in the
iio_device_claim_direct_mode() typical usage. In this case, we want to
know if we are in buffered mode or not to know if the device is powered
(buffer mode) or not. And depending on that max30102_get_temp() will
power on the device if needed. Hence, in order to keep the same
functionality, we try to:

1. Claim Buffered mode;
2: If 1) succeeds call max30102_get_temp() without powering on the
   device;
3: Release Buffered mode;
4: If 1) fails, Claim Direct mode;
5: If 4) succeeds call max30102_get_temp() with powering on the device;
6: Release Direct mode;
7: If 4) fails, goto to 1) and try again.

This dance between buffered and direct mode is not particularly pretty
(as well as the loop introduced by the goto statement) but it does allow
us to get rid of the mlock usage while keeping the same behavior.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20221012151620.1725215-4-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/health/max30102.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c
index 437298a29f2d3..66df4aaa31a7b 100644
--- a/drivers/iio/health/max30102.c
+++ b/drivers/iio/health/max30102.c
@@ -477,12 +477,23 @@ static int max30102_read_raw(struct iio_dev *indio_dev,
 		 * Temperature reading can only be acquired when not in
 		 * shutdown; leave shutdown briefly when buffer not running
 		 */
-		mutex_lock(&indio_dev->mlock);
-		if (!iio_buffer_enabled(indio_dev))
+any_mode_retry:
+		if (iio_device_claim_buffer_mode(indio_dev)) {
+			/*
+			 * This one is a *bit* hacky. If we cannot claim buffer
+			 * mode, then try direct mode so that we make sure
+			 * things cannot concurrently change. And we just keep
+			 * trying until we get one of the modes...
+			 */
+			if (iio_device_claim_direct_mode(indio_dev))
+				goto any_mode_retry;
+
 			ret = max30102_get_temp(data, val, true);
-		else
+			iio_device_release_direct_mode(indio_dev);
+		} else {
 			ret = max30102_get_temp(data, val, false);
-		mutex_unlock(&indio_dev->mlock);
+			iio_device_release_buffer_mode(indio_dev);
+		}
 		if (ret)
 			return ret;
 
-- 
GitLab


From 16afe125b53f88b855d2713c8ba253d905dcf3cc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nuno=20S=C3=A1?= <nuno.sa@analog.com>
Date: Wed, 12 Oct 2022 17:16:20 +0200
Subject: [PATCH 2281/4122] iio: core: move 'mlock' to 'struct iio_dev_opaque'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that there are no more users accessing 'mlock' directly, we can move
it to the iio_dev private structure. Hence, it's now explicit that new
driver's should not directly use this lock.

Signed-off-by: Nuno Sá <nuno.sa@analog.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20221012151620.1725215-5-nuno.sa@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/TODO                   |  3 ---
 drivers/iio/industrialio-buffer.c  | 29 +++++++++++++++++------------
 drivers/iio/industrialio-core.c    | 26 +++++++++++++++-----------
 drivers/iio/industrialio-event.c   |  4 ++--
 drivers/iio/industrialio-trigger.c | 12 ++++++------
 include/linux/iio/iio-opaque.h     |  2 ++
 include/linux/iio/iio.h            |  3 ---
 7 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/drivers/iio/TODO b/drivers/iio/TODO
index 7d7326b7085a2..2ace27d1ac62a 100644
--- a/drivers/iio/TODO
+++ b/drivers/iio/TODO
@@ -7,9 +7,6 @@ tree
   - ABI Documentation
   - Audit driviers/iio/staging/Documentation
 
-- Replace iio_dev->mlock by either a local lock or use
-iio_claim_direct.(Requires analysis of the purpose of the lock.)
-
 - Converting drivers from device tree centric to more generic
 property handlers.
 
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 228598b82a2f3..9cd7db549fcbc 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -507,13 +507,14 @@ static ssize_t iio_scan_el_store(struct device *dev,
 	int ret;
 	bool state;
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_dev_attr *this_attr = to_iio_dev_attr(attr);
 	struct iio_buffer *buffer = this_attr->buffer;
 
 	ret = kstrtobool(buf, &state);
 	if (ret < 0)
 		return ret;
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 	if (iio_buffer_is_active(buffer)) {
 		ret = -EBUSY;
 		goto error_ret;
@@ -532,7 +533,7 @@ static ssize_t iio_scan_el_store(struct device *dev,
 	}
 
 error_ret:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return ret < 0 ? ret : len;
 
@@ -554,6 +555,7 @@ static ssize_t iio_scan_el_ts_store(struct device *dev,
 {
 	int ret;
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
 	bool state;
 
@@ -561,14 +563,14 @@ static ssize_t iio_scan_el_ts_store(struct device *dev,
 	if (ret < 0)
 		return ret;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 	if (iio_buffer_is_active(buffer)) {
 		ret = -EBUSY;
 		goto error_ret;
 	}
 	buffer->scan_timestamp = state;
 error_ret:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return ret ? ret : len;
 }
@@ -642,6 +644,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t len)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
 	unsigned int val;
 	int ret;
@@ -653,7 +656,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr,
 	if (val == buffer->length)
 		return len;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 	if (iio_buffer_is_active(buffer)) {
 		ret = -EBUSY;
 	} else {
@@ -665,7 +668,7 @@ static ssize_t length_store(struct device *dev, struct device_attribute *attr,
 	if (buffer->length && buffer->length < buffer->watermark)
 		buffer->watermark = buffer->length;
 out:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return ret ? ret : len;
 }
@@ -1256,7 +1259,7 @@ int iio_update_buffers(struct iio_dev *indio_dev,
 		return -EINVAL;
 
 	mutex_lock(&iio_dev_opaque->info_exist_lock);
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 
 	if (insert_buffer && iio_buffer_is_active(insert_buffer))
 		insert_buffer = NULL;
@@ -1277,7 +1280,7 @@ int iio_update_buffers(struct iio_dev *indio_dev,
 	ret = __iio_update_buffers(indio_dev, insert_buffer, remove_buffer);
 
 out_unlock:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 	mutex_unlock(&iio_dev_opaque->info_exist_lock);
 
 	return ret;
@@ -1296,6 +1299,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
 	int ret;
 	bool requested_state;
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
 	bool inlist;
 
@@ -1303,7 +1307,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
 	if (ret < 0)
 		return ret;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 
 	/* Find out if it is in the list */
 	inlist = iio_buffer_is_active(buffer);
@@ -1317,7 +1321,7 @@ static ssize_t enable_store(struct device *dev, struct device_attribute *attr,
 		ret = __iio_update_buffers(indio_dev, NULL, buffer);
 
 done:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 	return (ret < 0) ? ret : len;
 }
 
@@ -1334,6 +1338,7 @@ static ssize_t watermark_store(struct device *dev,
 			       const char *buf, size_t len)
 {
 	struct iio_dev *indio_dev = dev_to_iio_dev(dev);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_buffer *buffer = to_iio_dev_attr(attr)->buffer;
 	unsigned int val;
 	int ret;
@@ -1344,7 +1349,7 @@ static ssize_t watermark_store(struct device *dev,
 	if (!val)
 		return -EINVAL;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 
 	if (val > buffer->length) {
 		ret = -EINVAL;
@@ -1358,7 +1363,7 @@ static ssize_t watermark_store(struct device *dev,
 
 	buffer->watermark = val;
 out:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return ret ? ret : len;
 }
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index b2b7bd27adc77..52e690f031cb8 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -285,16 +285,16 @@ int iio_device_set_clock(struct iio_dev *indio_dev, clockid_t clock_id)
 	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	const struct iio_event_interface *ev_int = iio_dev_opaque->event_interface;
 
-	ret = mutex_lock_interruptible(&indio_dev->mlock);
+	ret = mutex_lock_interruptible(&iio_dev_opaque->mlock);
 	if (ret)
 		return ret;
 	if ((ev_int && iio_event_enabled(ev_int)) ||
 	    iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&iio_dev_opaque->mlock);
 		return -EBUSY;
 	}
 	iio_dev_opaque->clock_id = clock_id;
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return 0;
 }
@@ -1674,7 +1674,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv)
 	indio_dev->dev.type = &iio_device_type;
 	indio_dev->dev.bus = &iio_bus_type;
 	device_initialize(&indio_dev->dev);
-	mutex_init(&indio_dev->mlock);
+	mutex_init(&iio_dev_opaque->mlock);
 	mutex_init(&iio_dev_opaque->info_exist_lock);
 	INIT_LIST_HEAD(&iio_dev_opaque->channel_attr_list);
 
@@ -1696,7 +1696,7 @@ struct iio_dev *iio_device_alloc(struct device *parent, int sizeof_priv)
 	INIT_LIST_HEAD(&iio_dev_opaque->ioctl_handlers);
 
 	lockdep_register_key(&iio_dev_opaque->mlock_key);
-	lockdep_set_class(&indio_dev->mlock, &iio_dev_opaque->mlock_key);
+	lockdep_set_class(&iio_dev_opaque->mlock, &iio_dev_opaque->mlock_key);
 
 	return indio_dev;
 }
@@ -2058,10 +2058,12 @@ EXPORT_SYMBOL_GPL(__devm_iio_device_register);
  */
 int iio_device_claim_direct_mode(struct iio_dev *indio_dev)
 {
-	mutex_lock(&indio_dev->mlock);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
+
+	mutex_lock(&iio_dev_opaque->mlock);
 
 	if (iio_buffer_enabled(indio_dev)) {
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&iio_dev_opaque->mlock);
 		return -EBUSY;
 	}
 	return 0;
@@ -2079,7 +2081,7 @@ EXPORT_SYMBOL_GPL(iio_device_claim_direct_mode);
  */
 void iio_device_release_direct_mode(struct iio_dev *indio_dev)
 {
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock);
 }
 EXPORT_SYMBOL_GPL(iio_device_release_direct_mode);
 
@@ -2096,12 +2098,14 @@ EXPORT_SYMBOL_GPL(iio_device_release_direct_mode);
  */
 int iio_device_claim_buffer_mode(struct iio_dev *indio_dev)
 {
-	mutex_lock(&indio_dev->mlock);
+	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
+
+	mutex_lock(&iio_dev_opaque->mlock);
 
 	if (iio_buffer_enabled(indio_dev))
 		return 0;
 
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 	return -EBUSY;
 }
 EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode);
@@ -2117,7 +2121,7 @@ EXPORT_SYMBOL_GPL(iio_device_claim_buffer_mode);
  */
 void iio_device_release_buffer_mode(struct iio_dev *indio_dev)
 {
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&to_iio_dev_opaque(indio_dev)->mlock);
 }
 EXPORT_SYMBOL_GPL(iio_device_release_buffer_mode);
 
diff --git a/drivers/iio/industrialio-event.c b/drivers/iio/industrialio-event.c
index 3d78da2531a9a..1a26393a7c0cf 100644
--- a/drivers/iio/industrialio-event.c
+++ b/drivers/iio/industrialio-event.c
@@ -198,7 +198,7 @@ static int iio_event_getfd(struct iio_dev *indio_dev)
 	if (ev_int == NULL)
 		return -ENODEV;
 
-	fd = mutex_lock_interruptible(&indio_dev->mlock);
+	fd = mutex_lock_interruptible(&iio_dev_opaque->mlock);
 	if (fd)
 		return fd;
 
@@ -219,7 +219,7 @@ static int iio_event_getfd(struct iio_dev *indio_dev)
 	}
 
 unlock:
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 	return fd;
 }
 
diff --git a/drivers/iio/industrialio-trigger.c b/drivers/iio/industrialio-trigger.c
index 6885a186fe27a..a2f3cc2f65efb 100644
--- a/drivers/iio/industrialio-trigger.c
+++ b/drivers/iio/industrialio-trigger.c
@@ -120,12 +120,12 @@ int iio_trigger_set_immutable(struct iio_dev *indio_dev, struct iio_trigger *tri
 		return -EINVAL;
 
 	iio_dev_opaque = to_iio_dev_opaque(indio_dev);
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 	WARN_ON(iio_dev_opaque->trig_readonly);
 
 	indio_dev->trig = iio_trigger_get(trig);
 	iio_dev_opaque->trig_readonly = true;
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	return 0;
 }
@@ -438,16 +438,16 @@ static ssize_t current_trigger_store(struct device *dev,
 	struct iio_trigger *trig;
 	int ret;
 
-	mutex_lock(&indio_dev->mlock);
+	mutex_lock(&iio_dev_opaque->mlock);
 	if (iio_dev_opaque->currentmode == INDIO_BUFFER_TRIGGERED) {
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&iio_dev_opaque->mlock);
 		return -EBUSY;
 	}
 	if (iio_dev_opaque->trig_readonly) {
-		mutex_unlock(&indio_dev->mlock);
+		mutex_unlock(&iio_dev_opaque->mlock);
 		return -EPERM;
 	}
-	mutex_unlock(&indio_dev->mlock);
+	mutex_unlock(&iio_dev_opaque->mlock);
 
 	trig = iio_trigger_acquire_by_name(buf);
 	if (oldtrig == trig) {
diff --git a/include/linux/iio/iio-opaque.h b/include/linux/iio/iio-opaque.h
index d1f8b30a7c8b7..5aec3945555bf 100644
--- a/include/linux/iio/iio-opaque.h
+++ b/include/linux/iio/iio-opaque.h
@@ -11,6 +11,7 @@
  *				checked by device drivers but should be considered
  *				read-only as this is a core internal bit
  * @driver_module:		used to make it harder to undercut users
+ * @mlock:			lock used to prevent simultaneous device state changes
  * @mlock_key:			lockdep class for iio_dev lock
  * @info_exist_lock:		lock to prevent use during removal
  * @trig_readonly:		mark the current trigger immutable
@@ -43,6 +44,7 @@ struct iio_dev_opaque {
 	int				currentmode;
 	int				id;
 	struct module			*driver_module;
+	struct mutex			mlock;
 	struct lock_class_key		mlock_key;
 	struct mutex			info_exist_lock;
 	bool				trig_readonly;
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 9d3bd6379eb87..8e0afaaa3f752 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -548,8 +548,6 @@ struct iio_buffer_setup_ops {
  *			and owner
  * @buffer:		[DRIVER] any buffer present
  * @scan_bytes:		[INTERN] num bytes captured to be fed to buffer demux
- * @mlock:		[INTERN] lock used to prevent simultaneous device state
- *			changes
  * @available_scan_masks: [DRIVER] optional array of allowed bitmasks
  * @masklength:		[INTERN] the length of the mask established from
  *			channels
@@ -574,7 +572,6 @@ struct iio_dev {
 
 	struct iio_buffer		*buffer;
 	int				scan_bytes;
-	struct mutex			mlock;
 
 	const unsigned long		*available_scan_masks;
 	unsigned			masklength;
-- 
GitLab


From 0c3a333524a3e5ba4b6c7b2638faef8420cfdb2a Mon Sep 17 00:00:00 2001
From: Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
Date: Mon, 10 Oct 2022 23:07:20 +0530
Subject: [PATCH 2282/4122] iio: pressure: mpl115: Implementing low power mode
 by shutdown gpio

MPL115 supports shutdown gpio which can be used to set the state
to low power mode. Power from all internal circuits and
registers is removed. This is done by pulling the SHDN pin to low.
This patch enables runtime PM on MPL115 to increase power savings.

According to spec., a wakeup time period of ~5 ms exists between
waking up and actually communicating with the device. This is
implemented using sleep delay.

Signed-off-by: Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
Link: https://lore.kernel.org/r/20221010173720.568916-1-rajat.khandelwal@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/mpl115.c     | 62 ++++++++++++++++++++++++++++++-
 drivers/iio/pressure/mpl115.h     |  5 +++
 drivers/iio/pressure/mpl115_i2c.c |  1 +
 drivers/iio/pressure/mpl115_spi.c |  1 +
 4 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/pressure/mpl115.c b/drivers/iio/pressure/mpl115.c
index 5bf5b9abe6f1a..02ea38c8a3e43 100644
--- a/drivers/iio/pressure/mpl115.c
+++ b/drivers/iio/pressure/mpl115.c
@@ -4,12 +4,13 @@
  *
  * Copyright (c) 2014 Peter Meerwald <pmeerw@pmeerw.net>
  *
- * TODO: shutdown pin
+ * TODO: synchronization with system suspend
  */
 
 #include <linux/module.h>
 #include <linux/iio/iio.h>
 #include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 
 #include "mpl115.h"
 
@@ -27,6 +28,7 @@ struct mpl115_data {
 	s16 a0;
 	s16 b1, b2;
 	s16 c12;
+	struct gpio_desc *shutdown;
 	const struct mpl115_ops *ops;
 };
 
@@ -102,16 +104,24 @@ static int mpl115_read_raw(struct iio_dev *indio_dev,
 
 	switch (mask) {
 	case IIO_CHAN_INFO_PROCESSED:
+		pm_runtime_get_sync(data->dev);
 		ret = mpl115_comp_pressure(data, val, val2);
 		if (ret < 0)
 			return ret;
+		pm_runtime_mark_last_busy(data->dev);
+		pm_runtime_put_autosuspend(data->dev);
+
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_RAW:
+		pm_runtime_get_sync(data->dev);
 		/* temperature -5.35 C / LSB, 472 LSB is 25 C */
 		ret = mpl115_read_temp(data);
 		if (ret < 0)
 			return ret;
+		pm_runtime_mark_last_busy(data->dev);
+		pm_runtime_put_autosuspend(data->dev);
 		*val = ret >> 6;
+
 		return IIO_VAL_INT;
 	case IIO_CHAN_INFO_OFFSET:
 		*val = -605;
@@ -168,6 +178,8 @@ int mpl115_probe(struct device *dev, const char *name,
 	if (ret)
 		return ret;
 
+	dev_set_drvdata(dev, indio_dev);
+
 	ret = data->ops->read(data->dev, MPL115_A0);
 	if (ret < 0)
 		return ret;
@@ -185,10 +197,58 @@ int mpl115_probe(struct device *dev, const char *name,
 		return ret;
 	data->c12 = ret;
 
+	data->shutdown = devm_gpiod_get_optional(dev, "shutdown",
+						 GPIOD_OUT_LOW);
+	if (IS_ERR(data->shutdown))
+		return dev_err_probe(dev, PTR_ERR(data->shutdown),
+				     "cannot get shutdown gpio\n");
+
+	if (data->shutdown) {
+		/* Enable runtime PM */
+		pm_runtime_get_noresume(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+
+		/*
+		 * As the device takes 3 ms to come up with a fresh
+		 * reading after power-on and 5 ms to actually power-on,
+		 * do not shut it down unnecessarily. Set autosuspend to
+		 * 2000 ms.
+		 */
+		pm_runtime_set_autosuspend_delay(dev, 2000);
+		pm_runtime_use_autosuspend(dev);
+		pm_runtime_put(dev);
+
+		dev_dbg(dev, "low-power mode enabled");
+	} else
+		dev_dbg(dev, "low-power mode disabled");
+
 	return devm_iio_device_register(dev, indio_dev);
 }
 EXPORT_SYMBOL_NS_GPL(mpl115_probe, IIO_MPL115);
 
+static int mpl115_runtime_suspend(struct device *dev)
+{
+	struct mpl115_data *data = iio_priv(dev_get_drvdata(dev));
+
+	gpiod_set_value(data->shutdown, 1);
+
+	return 0;
+}
+
+static int mpl115_runtime_resume(struct device *dev)
+{
+	struct mpl115_data *data = iio_priv(dev_get_drvdata(dev));
+
+	gpiod_set_value(data->shutdown, 0);
+	usleep_range(5000, 6000);
+
+	return 0;
+}
+
+EXPORT_NS_RUNTIME_DEV_PM_OPS(mpl115_dev_pm_ops, mpl115_runtime_suspend,
+			  mpl115_runtime_resume, NULL, IIO_MPL115);
+
 MODULE_AUTHOR("Peter Meerwald <pmeerw@pmeerw.net>");
 MODULE_DESCRIPTION("Freescale MPL115 pressure/temperature driver");
 MODULE_LICENSE("GPL");
diff --git a/drivers/iio/pressure/mpl115.h b/drivers/iio/pressure/mpl115.h
index 57d55eb8e6617..78a0068a17bbc 100644
--- a/drivers/iio/pressure/mpl115.h
+++ b/drivers/iio/pressure/mpl115.h
@@ -6,6 +6,8 @@
  * Copyright (c) 2016 Akinobu Mita <akinobu.mita@gmail.com>
  */
 
+#include <linux/pm_runtime.h>
+
 #ifndef _MPL115_H_
 #define _MPL115_H_
 
@@ -18,4 +20,7 @@ struct mpl115_ops {
 int mpl115_probe(struct device *dev, const char *name,
 			const struct mpl115_ops *ops);
 
+/*PM ops */
+extern const struct dev_pm_ops mpl115_dev_pm_ops;
+
 #endif
diff --git a/drivers/iio/pressure/mpl115_i2c.c b/drivers/iio/pressure/mpl115_i2c.c
index 099ab1c6832c5..555bda1146fb9 100644
--- a/drivers/iio/pressure/mpl115_i2c.c
+++ b/drivers/iio/pressure/mpl115_i2c.c
@@ -53,6 +53,7 @@ MODULE_DEVICE_TABLE(i2c, mpl115_i2c_id);
 static struct i2c_driver mpl115_i2c_driver = {
 	.driver = {
 		.name	= "mpl115",
+		.pm = pm_ptr(&mpl115_dev_pm_ops),
 	},
 	.probe = mpl115_i2c_probe,
 	.id_table = mpl115_i2c_id,
diff --git a/drivers/iio/pressure/mpl115_spi.c b/drivers/iio/pressure/mpl115_spi.c
index 7feec87e2704e..58d218fd90dcb 100644
--- a/drivers/iio/pressure/mpl115_spi.c
+++ b/drivers/iio/pressure/mpl115_spi.c
@@ -92,6 +92,7 @@ MODULE_DEVICE_TABLE(spi, mpl115_spi_ids);
 static struct spi_driver mpl115_spi_driver = {
 	.driver = {
 		.name   = "mpl115",
+		.pm = pm_ptr(&mpl115_dev_pm_ops),
 	},
 	.probe = mpl115_spi_probe,
 	.id_table = mpl115_spi_ids,
-- 
GitLab


From 687c8848c642c093c190bb182e4a6ac2ed86b4eb Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 25 Sep 2022 16:57:16 +0100
Subject: [PATCH 2283/4122] iio: accel: fxls8962af: Use new
 EXPORT_NS_GPL_DEV_PM_OPS()

Using this macro allows the compiler to remove unused
structures and callbacks if we are not building with
CONFIG_PM* without needing __maybe_unused markings.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Sean Nyekjaer <sean@geanix.com>

--
Switched to Paul Cercueil's more flexible implementation fo the macro.
Dropped Sean's Ack given new form of macro being used.

Acked-by: Sean Nyekjaer <sean@geanix.com>
Link: https://lore.kernel.org/r/20220925155719.3316280-3-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/fxls8962af-core.c | 16 +++++++---------
 drivers/iio/accel/fxls8962af-i2c.c  |  2 +-
 drivers/iio/accel/fxls8962af-spi.c  |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
index 8874d6d617253..bf259db281f59 100644
--- a/drivers/iio/accel/fxls8962af-core.c
+++ b/drivers/iio/accel/fxls8962af-core.c
@@ -1241,7 +1241,7 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq)
 }
 EXPORT_SYMBOL_NS_GPL(fxls8962af_core_probe, IIO_FXLS8962AF);
 
-static int __maybe_unused fxls8962af_runtime_suspend(struct device *dev)
+static int fxls8962af_runtime_suspend(struct device *dev)
 {
 	struct fxls8962af_data *data = iio_priv(dev_get_drvdata(dev));
 	int ret;
@@ -1255,14 +1255,14 @@ static int __maybe_unused fxls8962af_runtime_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused fxls8962af_runtime_resume(struct device *dev)
+static int fxls8962af_runtime_resume(struct device *dev)
 {
 	struct fxls8962af_data *data = iio_priv(dev_get_drvdata(dev));
 
 	return fxls8962af_active(data);
 }
 
-static int __maybe_unused fxls8962af_suspend(struct device *dev)
+static int fxls8962af_suspend(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct fxls8962af_data *data = iio_priv(indio_dev);
@@ -1283,7 +1283,7 @@ static int __maybe_unused fxls8962af_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused fxls8962af_resume(struct device *dev)
+static int fxls8962af_resume(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct fxls8962af_data *data = iio_priv(indio_dev);
@@ -1300,12 +1300,10 @@ static int __maybe_unused fxls8962af_resume(struct device *dev)
 	return 0;
 }
 
-const struct dev_pm_ops fxls8962af_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(fxls8962af_suspend, fxls8962af_resume)
-	SET_RUNTIME_PM_OPS(fxls8962af_runtime_suspend,
-			   fxls8962af_runtime_resume, NULL)
+EXPORT_NS_GPL_DEV_PM_OPS(fxls8962af_pm_ops, IIO_FXLS8962AF) = {
+	SYSTEM_SLEEP_PM_OPS(fxls8962af_suspend, fxls8962af_resume)
+	RUNTIME_PM_OPS(fxls8962af_runtime_suspend, fxls8962af_runtime_resume, NULL)
 };
-EXPORT_SYMBOL_NS_GPL(fxls8962af_pm_ops, IIO_FXLS8962AF);
 
 MODULE_AUTHOR("Sean Nyekjaer <sean@geanix.com>");
 MODULE_DESCRIPTION("NXP FXLS8962AF/FXLS8964AF accelerometer driver");
diff --git a/drivers/iio/accel/fxls8962af-i2c.c b/drivers/iio/accel/fxls8962af-i2c.c
index 8fbadfea1620b..22640eaebac73 100644
--- a/drivers/iio/accel/fxls8962af-i2c.c
+++ b/drivers/iio/accel/fxls8962af-i2c.c
@@ -45,7 +45,7 @@ static struct i2c_driver fxls8962af_driver = {
 	.driver = {
 		   .name = "fxls8962af_i2c",
 		   .of_match_table = fxls8962af_of_match,
-		   .pm = &fxls8962af_pm_ops,
+		   .pm = pm_ptr(&fxls8962af_pm_ops),
 		   },
 	.probe_new = fxls8962af_probe,
 	.id_table = fxls8962af_id,
diff --git a/drivers/iio/accel/fxls8962af-spi.c b/drivers/iio/accel/fxls8962af-spi.c
index 885b3ab7fcb58..a0d192211839f 100644
--- a/drivers/iio/accel/fxls8962af-spi.c
+++ b/drivers/iio/accel/fxls8962af-spi.c
@@ -44,7 +44,7 @@ MODULE_DEVICE_TABLE(spi, fxls8962af_spi_id_table);
 static struct spi_driver fxls8962af_driver = {
 	.driver = {
 		   .name = "fxls8962af_spi",
-		   .pm = &fxls8962af_pm_ops,
+		   .pm = pm_ptr(&fxls8962af_pm_ops),
 		   .of_match_table = fxls8962af_spi_of_match,
 		   },
 	.probe = fxls8962af_probe,
-- 
GitLab


From 02e082c433c65f06f4cb359688993377c1d7b6d1 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 25 Sep 2022 16:57:17 +0100
Subject: [PATCH 2284/4122] iio: gyro: fxas210002c: Move exports to
 IIO_FXAS210002C namespace.

Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that
brings by allowing the compiler to remove unused struct dev_pm_ops
and callbacks without needing explicit __maybe_unused markings.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Rui Miguel Silva <rui.silva@linaro.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>

--
Dropped Rui's tag on basis this is rather different from v1 due to
the different macro implementation.

v2: Switch to Paul's more flexible approach to EXPORT_NS_GPL_DEV_PM_OPS()
Acked-by: Rui Miguel Silva <rui.silva@linaro.org>
Link: https://lore.kernel.org/r/20220925155719.3316280-4-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/fxas21002c_core.c | 21 ++++++++++-----------
 drivers/iio/gyro/fxas21002c_i2c.c  |  3 ++-
 drivers/iio/gyro/fxas21002c_spi.c  |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/iio/gyro/fxas21002c_core.c b/drivers/iio/gyro/fxas21002c_core.c
index a36d71d9e3ea9..3ea1d46130808 100644
--- a/drivers/iio/gyro/fxas21002c_core.c
+++ b/drivers/iio/gyro/fxas21002c_core.c
@@ -998,7 +998,7 @@ pm_disable:
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(fxas21002c_core_probe);
+EXPORT_SYMBOL_NS_GPL(fxas21002c_core_probe, IIO_FXAS21002C);
 
 void fxas21002c_core_remove(struct device *dev)
 {
@@ -1009,9 +1009,9 @@ void fxas21002c_core_remove(struct device *dev)
 	pm_runtime_disable(dev);
 	pm_runtime_set_suspended(dev);
 }
-EXPORT_SYMBOL_GPL(fxas21002c_core_remove);
+EXPORT_SYMBOL_NS_GPL(fxas21002c_core_remove, IIO_FXAS21002C);
 
-static int __maybe_unused fxas21002c_suspend(struct device *dev)
+static int fxas21002c_suspend(struct device *dev)
 {
 	struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev));
 
@@ -1021,7 +1021,7 @@ static int __maybe_unused fxas21002c_suspend(struct device *dev)
 	return 0;
 }
 
-static int __maybe_unused fxas21002c_resume(struct device *dev)
+static int fxas21002c_resume(struct device *dev)
 {
 	struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev));
 	int ret;
@@ -1033,26 +1033,25 @@ static int __maybe_unused fxas21002c_resume(struct device *dev)
 	return fxas21002c_mode_set(data, data->prev_mode);
 }
 
-static int __maybe_unused fxas21002c_runtime_suspend(struct device *dev)
+static int fxas21002c_runtime_suspend(struct device *dev)
 {
 	struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev));
 
 	return fxas21002c_mode_set(data, FXAS21002C_MODE_READY);
 }
 
-static int __maybe_unused fxas21002c_runtime_resume(struct device *dev)
+static int fxas21002c_runtime_resume(struct device *dev)
 {
 	struct fxas21002c_data *data = iio_priv(dev_get_drvdata(dev));
 
 	return fxas21002c_mode_set(data, FXAS21002C_MODE_ACTIVE);
 }
 
-const struct dev_pm_ops fxas21002c_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(fxas21002c_suspend, fxas21002c_resume)
-	SET_RUNTIME_PM_OPS(fxas21002c_runtime_suspend,
-			   fxas21002c_runtime_resume, NULL)
+EXPORT_NS_GPL_DEV_PM_OPS(fxas21002c_pm_ops, IIO_FXAS21002C) = {
+	SYSTEM_SLEEP_PM_OPS(fxas21002c_suspend, fxas21002c_resume)
+	RUNTIME_PM_OPS(fxas21002c_runtime_suspend, fxas21002c_runtime_resume,
+		       NULL)
 };
-EXPORT_SYMBOL_GPL(fxas21002c_pm_ops);
 
 MODULE_AUTHOR("Rui Miguel Silva <rui.silva@linaro.org>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/iio/gyro/fxas21002c_i2c.c b/drivers/iio/gyro/fxas21002c_i2c.c
index 13bb52c594d12..9e2d0f34a672e 100644
--- a/drivers/iio/gyro/fxas21002c_i2c.c
+++ b/drivers/iio/gyro/fxas21002c_i2c.c
@@ -53,7 +53,7 @@ MODULE_DEVICE_TABLE(of, fxas21002c_i2c_of_match);
 static struct i2c_driver fxas21002c_i2c_driver = {
 	.driver = {
 		.name = "fxas21002c_i2c",
-		.pm = &fxas21002c_pm_ops,
+		.pm = pm_ptr(&fxas21002c_pm_ops),
 		.of_match_table = fxas21002c_i2c_of_match,
 	},
 	.probe_new	= fxas21002c_i2c_probe,
@@ -65,3 +65,4 @@ module_i2c_driver(fxas21002c_i2c_driver);
 MODULE_AUTHOR("Rui Miguel Silva <rui.silva@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("FXAS21002C I2C Gyro driver");
+MODULE_IMPORT_NS(IIO_FXAS21002C);
diff --git a/drivers/iio/gyro/fxas21002c_spi.c b/drivers/iio/gyro/fxas21002c_spi.c
index c3ac169facf9b..4f633826547c2 100644
--- a/drivers/iio/gyro/fxas21002c_spi.c
+++ b/drivers/iio/gyro/fxas21002c_spi.c
@@ -54,7 +54,7 @@ MODULE_DEVICE_TABLE(of, fxas21002c_spi_of_match);
 static struct spi_driver fxas21002c_spi_driver = {
 	.driver = {
 		.name = "fxas21002c_spi",
-		.pm = &fxas21002c_pm_ops,
+		.pm = pm_ptr(&fxas21002c_pm_ops),
 		.of_match_table = fxas21002c_spi_of_match,
 	},
 	.probe		= fxas21002c_spi_probe,
@@ -66,3 +66,4 @@ module_spi_driver(fxas21002c_spi_driver);
 MODULE_AUTHOR("Rui Miguel Silva <rui.silva@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("FXAS21002C SPI Gyro driver");
+MODULE_IMPORT_NS(IIO_FXAS21002C);
-- 
GitLab


From ef5a5ef29c8f70ca640d785b7173101417c78d24 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 25 Sep 2022 16:57:18 +0100
Subject: [PATCH 2285/4122] iio: imu: inv_icm42600: Move exports to
 IIO_ICM42600 namespace

As these exports are only relevant to core module and users in the
bus specific modules, move them out of the main kernel namespace.

Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that
brings by allowing the compiler to remove unused struct dev_pm_ops
and callbacks without needing explicit __maybe_unused markings.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>

--
EXPORT_NS_GPL_DEV_PM_OPS()

v2: Switch to Paul's more flexible version of the
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Link: https://lore.kernel.org/r/20220925155719.3316280-5-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../iio/imu/inv_icm42600/inv_icm42600_core.c  | 21 +++++++++----------
 .../iio/imu/inv_icm42600/inv_icm42600_i2c.c   |  3 ++-
 .../iio/imu/inv_icm42600/inv_icm42600_spi.c   |  3 ++-
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
index ca85fccc98393..b63c5dab1a562 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
@@ -41,7 +41,7 @@ const struct regmap_config inv_icm42600_regmap_config = {
 	.ranges = inv_icm42600_regmap_ranges,
 	.num_ranges = ARRAY_SIZE(inv_icm42600_regmap_ranges),
 };
-EXPORT_SYMBOL_GPL(inv_icm42600_regmap_config);
+EXPORT_SYMBOL_NS_GPL(inv_icm42600_regmap_config, IIO_ICM42600);
 
 struct inv_icm42600_hw {
 	uint8_t whoami;
@@ -660,13 +660,13 @@ int inv_icm42600_core_probe(struct regmap *regmap, int chip, int irq,
 
 	return devm_add_action_or_reset(dev, inv_icm42600_disable_pm, dev);
 }
-EXPORT_SYMBOL_GPL(inv_icm42600_core_probe);
+EXPORT_SYMBOL_NS_GPL(inv_icm42600_core_probe, IIO_ICM42600);
 
 /*
  * Suspend saves sensors state and turns everything off.
  * Check first if runtime suspend has not already done the job.
  */
-static int __maybe_unused inv_icm42600_suspend(struct device *dev)
+static int inv_icm42600_suspend(struct device *dev)
 {
 	struct inv_icm42600_state *st = dev_get_drvdata(dev);
 	int ret;
@@ -706,7 +706,7 @@ out_unlock:
  * System resume gets the system back on and restores the sensors state.
  * Manually put runtime power management in system active state.
  */
-static int __maybe_unused inv_icm42600_resume(struct device *dev)
+static int inv_icm42600_resume(struct device *dev)
 {
 	struct inv_icm42600_state *st = dev_get_drvdata(dev);
 	int ret;
@@ -739,7 +739,7 @@ out_unlock:
 }
 
 /* Runtime suspend will turn off sensors that are enabled by iio devices. */
-static int __maybe_unused inv_icm42600_runtime_suspend(struct device *dev)
+static int inv_icm42600_runtime_suspend(struct device *dev)
 {
 	struct inv_icm42600_state *st = dev_get_drvdata(dev);
 	int ret;
@@ -761,7 +761,7 @@ error_unlock:
 }
 
 /* Sensors are enabled by iio devices, no need to turn them back on here. */
-static int __maybe_unused inv_icm42600_runtime_resume(struct device *dev)
+static int inv_icm42600_runtime_resume(struct device *dev)
 {
 	struct inv_icm42600_state *st = dev_get_drvdata(dev);
 	int ret;
@@ -774,12 +774,11 @@ static int __maybe_unused inv_icm42600_runtime_resume(struct device *dev)
 	return ret;
 }
 
-const struct dev_pm_ops inv_icm42600_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(inv_icm42600_suspend, inv_icm42600_resume)
-	SET_RUNTIME_PM_OPS(inv_icm42600_runtime_suspend,
-			   inv_icm42600_runtime_resume, NULL)
+EXPORT_NS_GPL_DEV_PM_OPS(inv_icm42600_pm_ops, IIO_ICM42600) = {
+	SYSTEM_SLEEP_PM_OPS(inv_icm42600_suspend, inv_icm42600_resume)
+	RUNTIME_PM_OPS(inv_icm42600_runtime_suspend,
+		       inv_icm42600_runtime_resume, NULL)
 };
-EXPORT_SYMBOL_GPL(inv_icm42600_pm_ops);
 
 MODULE_AUTHOR("InvenSense, Inc.");
 MODULE_DESCRIPTION("InvenSense ICM-426xx device driver");
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
index d4a692b838d0f..4f96989ddf4a1 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
@@ -93,7 +93,7 @@ static struct i2c_driver inv_icm42600_driver = {
 	.driver = {
 		.name = "inv-icm42600-i2c",
 		.of_match_table = inv_icm42600_of_matches,
-		.pm = &inv_icm42600_pm_ops,
+		.pm = pm_ptr(&inv_icm42600_pm_ops),
 	},
 	.probe_new = inv_icm42600_probe,
 };
@@ -102,3 +102,4 @@ module_i2c_driver(inv_icm42600_driver);
 MODULE_AUTHOR("InvenSense, Inc.");
 MODULE_DESCRIPTION("InvenSense ICM-426xx I2C driver");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_ICM42600);
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
index e6305e5fa9756..486b46e531131 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
@@ -89,7 +89,7 @@ static struct spi_driver inv_icm42600_driver = {
 	.driver = {
 		.name = "inv-icm42600-spi",
 		.of_match_table = inv_icm42600_of_matches,
-		.pm = &inv_icm42600_pm_ops,
+		.pm = pm_ptr(&inv_icm42600_pm_ops),
 	},
 	.probe = inv_icm42600_probe,
 };
@@ -98,3 +98,4 @@ module_spi_driver(inv_icm42600_driver);
 MODULE_AUTHOR("InvenSense, Inc.");
 MODULE_DESCRIPTION("InvenSense ICM-426xx SPI driver");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_ICM42600);
-- 
GitLab


From 62bfa12c87ac7468a69e81ea870eb68b0adabf37 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 25 Sep 2022 16:57:19 +0100
Subject: [PATCH 2286/4122] iio: imu: inv_mpu: Move exports to IIO_MPU6050
 namespace

As these exports are only relevant to core module and users in the
bus specific modules, move them out of the main kernel namespace.

Includes using EXPORT_NS_GPL_DEV_PM_OPS() and the simplifications that
brings by allowing the compiler to remove unused struct dev_pm_ops
and callbacks without needing explicit __maybe_unused markings.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Cc: Linus Walleij <linus.walleij@linaro.org>

--
Dropped Linus' tag as the new patch is significantly different.

v2: Switch to more flexible version of EXPORT* macro from Paul.
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Link: https://lore.kernel.org/r/20220925155719.3316280-6-jic23@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/inv_mpu6050/inv_mpu_core.c | 17 ++++++++---------
 drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c  |  3 ++-
 drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c  |  3 ++-
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
index 86fbbe9040503..8a129120b73dd 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_core.c
@@ -1653,9 +1653,9 @@ error_power_off:
 	inv_mpu6050_set_power_itg(st, false);
 	return result;
 }
-EXPORT_SYMBOL_GPL(inv_mpu_core_probe);
+EXPORT_SYMBOL_NS_GPL(inv_mpu_core_probe, IIO_MPU6050);
 
-static int __maybe_unused inv_mpu_resume(struct device *dev)
+static int inv_mpu_resume(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
@@ -1687,7 +1687,7 @@ out_unlock:
 	return result;
 }
 
-static int __maybe_unused inv_mpu_suspend(struct device *dev)
+static int inv_mpu_suspend(struct device *dev)
 {
 	struct iio_dev *indio_dev = dev_get_drvdata(dev);
 	struct inv_mpu6050_state *st = iio_priv(indio_dev);
@@ -1730,7 +1730,7 @@ out_unlock:
 	return result;
 }
 
-static int __maybe_unused inv_mpu_runtime_suspend(struct device *dev)
+static int inv_mpu_runtime_suspend(struct device *dev)
 {
 	struct inv_mpu6050_state *st = iio_priv(dev_get_drvdata(dev));
 	unsigned int sensors;
@@ -1755,7 +1755,7 @@ out_unlock:
 	return ret;
 }
 
-static int __maybe_unused inv_mpu_runtime_resume(struct device *dev)
+static int inv_mpu_runtime_resume(struct device *dev)
 {
 	struct inv_mpu6050_state *st = iio_priv(dev_get_drvdata(dev));
 	int ret;
@@ -1767,11 +1767,10 @@ static int __maybe_unused inv_mpu_runtime_resume(struct device *dev)
 	return inv_mpu6050_set_power_itg(st, true);
 }
 
-const struct dev_pm_ops inv_mpu_pmops = {
-	SET_SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume)
-	SET_RUNTIME_PM_OPS(inv_mpu_runtime_suspend, inv_mpu_runtime_resume, NULL)
+EXPORT_NS_GPL_DEV_PM_OPS(inv_mpu_pmops, IIO_MPU6050) = {
+	SYSTEM_SLEEP_PM_OPS(inv_mpu_suspend, inv_mpu_resume)
+	RUNTIME_PM_OPS(inv_mpu_runtime_suspend, inv_mpu_runtime_resume, NULL)
 };
-EXPORT_SYMBOL_GPL(inv_mpu_pmops);
 
 MODULE_AUTHOR("Invensense Corporation");
 MODULE_DESCRIPTION("Invensense device MPU6050 driver");
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
index 14255a918eb1d..7a8d60a5afa9a 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
@@ -267,7 +267,7 @@ static struct i2c_driver inv_mpu_driver = {
 		.of_match_table = inv_of_match,
 		.acpi_match_table = inv_acpi_match,
 		.name	=	"inv-mpu6050-i2c",
-		.pm     =       &inv_mpu_pmops,
+		.pm     =       pm_ptr(&inv_mpu_pmops),
 	},
 };
 
@@ -276,3 +276,4 @@ module_i2c_driver(inv_mpu_driver);
 MODULE_AUTHOR("Invensense Corporation");
 MODULE_DESCRIPTION("Invensense device MPU6050 driver");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_MPU6050);
diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
index e6107b0cc38ff..89f46c2f213d8 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_spi.c
@@ -154,7 +154,7 @@ static struct spi_driver inv_mpu_driver = {
 		.of_match_table = inv_of_match,
 		.acpi_match_table = inv_acpi_match,
 		.name	=	"inv-mpu6000-spi",
-		.pm     =       &inv_mpu_pmops,
+		.pm     =       pm_ptr(&inv_mpu_pmops),
 	},
 };
 
@@ -163,3 +163,4 @@ module_spi_driver(inv_mpu_driver);
 MODULE_AUTHOR("Adriana Reus <adriana.reus@intel.com>");
 MODULE_DESCRIPTION("Invensense device MPU6000 driver");
 MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_MPU6050);
-- 
GitLab


From 1ccef2e6e9205e209ad958d2e591bcca60981007 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:19:01 +0300
Subject: [PATCH 2287/4122] iio: adc: ad7192: Simplify using
 devm_regulator_get_enable()

Use devm_regulator_get_enable() instead of open coded get, enable,
add-action-to-disable-at-detach - pattern. Also drop the seemingly unused
struct member 'dvdd'.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/9719c445c095d3d308e2fc9f4f93294f5806c41c.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7192.c | 15 ++-------------
 1 file changed, 2 insertions(+), 13 deletions(-)

diff --git a/drivers/iio/adc/ad7192.c b/drivers/iio/adc/ad7192.c
index f5067173deb62..55a6ab5910160 100644
--- a/drivers/iio/adc/ad7192.c
+++ b/drivers/iio/adc/ad7192.c
@@ -177,7 +177,6 @@ struct ad7192_chip_info {
 struct ad7192_state {
 	const struct ad7192_chip_info	*chip_info;
 	struct regulator		*avdd;
-	struct regulator		*dvdd;
 	struct clk			*mclk;
 	u16				int_vref_mv;
 	u32				fclk;
@@ -1015,19 +1014,9 @@ static int ad7192_probe(struct spi_device *spi)
 	if (ret)
 		return ret;
 
-	st->dvdd = devm_regulator_get(&spi->dev, "dvdd");
-	if (IS_ERR(st->dvdd))
-		return PTR_ERR(st->dvdd);
-
-	ret = regulator_enable(st->dvdd);
-	if (ret) {
-		dev_err(&spi->dev, "Failed to enable specified DVdd supply\n");
-		return ret;
-	}
-
-	ret = devm_add_action_or_reset(&spi->dev, ad7192_reg_disable, st->dvdd);
+	ret = devm_regulator_get_enable(&spi->dev, "dvdd");
 	if (ret)
-		return ret;
+		return dev_err_probe(&spi->dev, ret, "Failed to enable specified DVdd supply\n");
 
 	ret = regulator_get_voltage(st->avdd);
 	if (ret < 0) {
-- 
GitLab


From 08f75f180db06566d134b1cac1d8a4b8f266761e Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:19:17 +0300
Subject: [PATCH 2288/4122] iio: dac: ltc2688: Simplify using
 devm_regulator_*get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use devm_regulator_bulk_get_enable() instead of open coded bulk-get,
bulk-enable, add-action-to-disable-at-detach - pattern.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/a29493f594c84b3bd852e462bbd3e591a8575a27.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ltc2688.c | 23 +++--------------------
 1 file changed, 3 insertions(+), 20 deletions(-)

diff --git a/drivers/iio/dac/ltc2688.c b/drivers/iio/dac/ltc2688.c
index 28bdde2d30889..fc8eb53c65be3 100644
--- a/drivers/iio/dac/ltc2688.c
+++ b/drivers/iio/dac/ltc2688.c
@@ -84,7 +84,6 @@ struct ltc2688_chan {
 struct ltc2688_state {
 	struct spi_device *spi;
 	struct regmap *regmap;
-	struct regulator_bulk_data regulators[2];
 	struct ltc2688_chan channels[LTC2688_DAC_CHANNELS];
 	struct iio_chan_spec *iio_chan;
 	/* lock to protect against multiple access to the device and shared data */
@@ -902,13 +901,6 @@ static int ltc2688_setup(struct ltc2688_state *st, struct regulator *vref)
 			       LTC2688_CONFIG_EXT_REF);
 }
 
-static void ltc2688_disable_regulators(void *data)
-{
-	struct ltc2688_state *st = data;
-
-	regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators);
-}
-
 static void ltc2688_disable_regulator(void *regulator)
 {
 	regulator_disable(regulator);
@@ -965,6 +957,7 @@ static const struct iio_info ltc2688_info = {
 
 static int ltc2688_probe(struct spi_device *spi)
 {
+	static const char * const regulators[] = { "vcc", "iovcc" };
 	struct ltc2688_state *st;
 	struct iio_dev *indio_dev;
 	struct regulator *vref_reg;
@@ -988,21 +981,11 @@ static int ltc2688_probe(struct spi_device *spi)
 		return dev_err_probe(dev, PTR_ERR(st->regmap),
 				     "Failed to init regmap");
 
-	st->regulators[0].supply = "vcc";
-	st->regulators[1].supply = "iovcc";
-	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(st->regulators),
-				      st->regulators);
-	if (ret)
-		return dev_err_probe(dev, ret, "Failed to get regulators\n");
-
-	ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators);
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators),
+					     regulators);
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to enable regulators\n");
 
-	ret = devm_add_action_or_reset(dev, ltc2688_disable_regulators, st);
-	if (ret)
-		return ret;
-
 	vref_reg = devm_regulator_get_optional(dev, "vref");
 	if (IS_ERR(vref_reg)) {
 		if (PTR_ERR(vref_reg) != -ENODEV)
-- 
GitLab


From 2c620883a46b6e2299cf9e4b4683e5b3cc67a61e Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:19:31 +0300
Subject: [PATCH 2289/4122] iio: gyro: bmg160_core: Simplify using
 devm_regulator_*get_enable()

Use devm_regulator_bulk_get_enable() instead of open coded bulk-get,
bulk-enable, add-action-to-disable-at-detach - pattern.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/3fd11489356b1c73a3d7b4bd9dec7e12c9fe8788.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/bmg160_core.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/iio/gyro/bmg160_core.c b/drivers/iio/gyro/bmg160_core.c
index cedd9f02ea211..0e2eb0e98235f 100644
--- a/drivers/iio/gyro/bmg160_core.c
+++ b/drivers/iio/gyro/bmg160_core.c
@@ -93,7 +93,6 @@
 
 struct bmg160_data {
 	struct regmap *regmap;
-	struct regulator_bulk_data regulators[2];
 	struct iio_trigger *dready_trig;
 	struct iio_trigger *motion_trig;
 	struct iio_mount_matrix orientation;
@@ -1067,16 +1066,10 @@ static const char *bmg160_match_acpi_device(struct device *dev)
 	return dev_name(dev);
 }
 
-static void bmg160_disable_regulators(void *d)
-{
-	struct bmg160_data *data = d;
-
-	regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators);
-}
-
 int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq,
 		      const char *name)
 {
+	static const char * const regulators[] = { "vdd", "vddio" };
 	struct bmg160_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -1090,22 +1083,11 @@ int bmg160_core_probe(struct device *dev, struct regmap *regmap, int irq,
 	data->irq = irq;
 	data->regmap = regmap;
 
-	data->regulators[0].supply = "vdd";
-	data->regulators[1].supply = "vddio";
-	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(data->regulators),
-				      data->regulators);
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators),
+					     regulators);
 	if (ret)
 		return dev_err_probe(dev, ret, "Failed to get regulators\n");
 
-	ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators),
-				    data->regulators);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(dev, bmg160_disable_regulators, data);
-	if (ret)
-		return ret;
-
 	ret = iio_read_mount_matrix(dev, &data->orientation);
 	if (ret)
 		return ret;
-- 
GitLab


From 6900cdbfb99e8600572da340576509297a684a3a Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:19:46 +0300
Subject: [PATCH 2290/4122] iio: imu: st_lsm6dsx: Simplify using
 devm_regulator_*get_enable()

Use devm_regulator_bulk_get_enable() instead of open coded bulk-get,
bulk-enable, add-action-to-disable-at-detach - pattern.

A functional change (which seems like a bugfix) is that if
regulator_bulk_get fails, the enable is not attempted.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/876e58428cec056d51070e49eff559e2d7c23b12.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h      |  2 --
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 30 ++++----------------
 2 files changed, 5 insertions(+), 27 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index d3a4e21bc1149..07ad8027de738 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -374,7 +374,6 @@ struct st_lsm6dsx_sensor {
  * struct st_lsm6dsx_hw - ST IMU MEMS hw instance
  * @dev: Pointer to instance of struct device (I2C or SPI).
  * @regmap: Register map of the device.
- * @regulators: VDD/VDDIO voltage regulators.
  * @irq: Device interrupt line (I2C or SPI).
  * @fifo_lock: Mutex to prevent concurrent access to the hw FIFO.
  * @conf_lock: Mutex to prevent concurrent FIFO configuration update.
@@ -397,7 +396,6 @@ struct st_lsm6dsx_sensor {
 struct st_lsm6dsx_hw {
 	struct device *dev;
 	struct regmap *regmap;
-	struct regulator_bulk_data regulators[2];
 	int irq;
 
 	struct mutex fifo_lock;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 6af2e905c1612..fe5fa08b68acb 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -2177,36 +2177,20 @@ static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw)
 
 static int st_lsm6dsx_init_regulators(struct device *dev)
 {
-	struct st_lsm6dsx_hw *hw = dev_get_drvdata(dev);
+	/* vdd-vddio power regulators */
+	static const char * const regulators[] = { "vdd", "vddio" };
 	int err;
 
-	/* vdd-vddio power regulators */
-	hw->regulators[0].supply = "vdd";
-	hw->regulators[1].supply = "vddio";
-	err = devm_regulator_bulk_get(dev, ARRAY_SIZE(hw->regulators),
-				      hw->regulators);
+	err = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulators),
+					     regulators);
 	if (err)
-		return dev_err_probe(dev, err, "failed to get regulators\n");
-
-	err = regulator_bulk_enable(ARRAY_SIZE(hw->regulators),
-				    hw->regulators);
-	if (err) {
-		dev_err(dev, "failed to enable regulators: %d\n", err);
-		return err;
-	}
+		return dev_err_probe(dev, err, "failed to enable regulators\n");
 
 	msleep(50);
 
 	return 0;
 }
 
-static void st_lsm6dsx_chip_uninit(void *data)
-{
-	struct st_lsm6dsx_hw *hw = data;
-
-	regulator_bulk_disable(ARRAY_SIZE(hw->regulators), hw->regulators);
-}
-
 int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id,
 		     struct regmap *regmap)
 {
@@ -2230,10 +2214,6 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id,
 	if (err)
 		return err;
 
-	err = devm_add_action_or_reset(dev, st_lsm6dsx_chip_uninit, hw);
-	if (err)
-		return err;
-
 	hw->buff = devm_kzalloc(dev, ST_LSM6DSX_BUFF_SIZE, GFP_KERNEL);
 	if (!hw->buff)
 		return -ENOMEM;
-- 
GitLab


From 7ff0ad35aa6f70fe32fee33bc6e0aa356af99f76 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:20:03 +0300
Subject: [PATCH 2291/4122] iio: adc: ad7476: simplify using
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(),
add_action_or_reset(regulator_disable)' and use the
devm_regulator_get_enable()

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/33070d66b9b976acac1cee5570facef9278b6b61.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7476.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/iio/adc/ad7476.c b/drivers/iio/adc/ad7476.c
index 94776f6962907..80aebed47d1fa 100644
--- a/drivers/iio/adc/ad7476.c
+++ b/drivers/iio/adc/ad7476.c
@@ -368,16 +368,7 @@ static int ad7476_probe(struct spi_device *spi)
 	}
 
 	if (st->chip_info->has_vdrive) {
-		reg = devm_regulator_get(&spi->dev, "vdrive");
-		if (IS_ERR(reg))
-			return PTR_ERR(reg);
-
-		ret = regulator_enable(reg);
-		if (ret)
-			return ret;
-
-		ret = devm_add_action_or_reset(&spi->dev, ad7476_reg_disable,
-					       reg);
+		ret = devm_regulator_get_enable(&spi->dev, "vdrive");
 		if (ret)
 			return ret;
 	}
-- 
GitLab


From fd5b6c48ec3345d5c243e283fbdb69618ada2be8 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:20:18 +0300
Subject: [PATCH 2292/4122] iio: adc: ad7606: simplify using
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(),
add_action_or_reset(regulator_disable)' and use the
devm_regulator_get_enable() and drop the pointer to the regulator.
This simplifies code and makes it less tempting to add manual control
for the regulator which is also controlled by devm.

Whilst here also switch to dev_err_probe() to provide more information
if a deferred probe occurs.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/521c52f5a9bdc2db04d5775b36df4b233ae338da.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7606.c | 22 +++-------------------
 drivers/iio/adc/ad7606.h |  2 --
 2 files changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/iio/adc/ad7606.c b/drivers/iio/adc/ad7606.c
index ba24f99523e01..dd6b603f65ead 100644
--- a/drivers/iio/adc/ad7606.c
+++ b/drivers/iio/adc/ad7606.c
@@ -557,13 +557,6 @@ static const struct iio_trigger_ops ad7606_trigger_ops = {
 	.validate_device = iio_trigger_validate_own_device,
 };
 
-static void ad7606_regulator_disable(void *data)
-{
-	struct ad7606_state *st = data;
-
-	regulator_disable(st->reg);
-}
-
 int ad7606_probe(struct device *dev, int irq, void __iomem *base_address,
 		 const char *name, unsigned int id,
 		 const struct ad7606_bus_ops *bops)
@@ -589,19 +582,10 @@ int ad7606_probe(struct device *dev, int irq, void __iomem *base_address,
 	st->scale_avail = ad7606_scale_avail;
 	st->num_scales = ARRAY_SIZE(ad7606_scale_avail);
 
-	st->reg = devm_regulator_get(dev, "avcc");
-	if (IS_ERR(st->reg))
-		return PTR_ERR(st->reg);
-
-	ret = regulator_enable(st->reg);
-	if (ret) {
-		dev_err(dev, "Failed to enable specified AVcc supply\n");
-		return ret;
-	}
-
-	ret = devm_add_action_or_reset(dev, ad7606_regulator_disable, st);
+	ret = devm_regulator_get_enable(dev, "avcc");
 	if (ret)
-		return ret;
+		return dev_err_probe(dev, ret,
+				     "Failed to enable specified AVcc supply\n");
 
 	st->chip_info = &ad7606_chip_info_tbl[id];
 
diff --git a/drivers/iio/adc/ad7606.h b/drivers/iio/adc/ad7606.h
index 2dc4f599f9df9..0c6a88cc46958 100644
--- a/drivers/iio/adc/ad7606.h
+++ b/drivers/iio/adc/ad7606.h
@@ -62,7 +62,6 @@ struct ad7606_chip_info {
  * struct ad7606_state - driver instance specific data
  * @dev		pointer to kernel device
  * @chip_info		entry in the table of chips that describes this device
- * @reg		regulator info for the power supply of the device
  * @bops		bus operations (SPI or parallel)
  * @range		voltage range selection, selects which scale to apply
  * @oversampling	oversampling selection
@@ -92,7 +91,6 @@ struct ad7606_chip_info {
 struct ad7606_state {
 	struct device			*dev;
 	const struct ad7606_chip_info	*chip_info;
-	struct regulator		*reg;
 	const struct ad7606_bus_ops	*bops;
 	unsigned int			range[16];
 	unsigned int			oversampling;
-- 
GitLab


From f55c8e6696425ef22b7d08fc05b920d3a1a5dbf6 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:20:32 +0300
Subject: [PATCH 2293/4122] iio: adc: max1241: simplify using
 devm_regulator_get_enable()

Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(),
add_action_or_reset(regulator_disable)' and use the
devm_regulator_get_enable() and drop the pointer to the regulator.
This simplifies code and makes it less tempting to add manual control
for the regulator which is also controlled by devm.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Alexandru Lazar <alazar@startmail.com>
Link: https://lore.kernel.org/r/7c759bf6c06e72ae70bffeebc1939d9903427278.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max1241.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/iio/adc/max1241.c b/drivers/iio/adc/max1241.c
index a815ad1f6913b..500bb09ab19bc 100644
--- a/drivers/iio/adc/max1241.c
+++ b/drivers/iio/adc/max1241.c
@@ -22,7 +22,6 @@ enum max1241_id {
 struct max1241 {
 	struct spi_device *spi;
 	struct mutex lock;
-	struct regulator *vdd;
 	struct regulator *vref;
 	struct gpio_desc *shutdown;
 
@@ -110,17 +109,6 @@ static const struct iio_info max1241_info = {
 	.read_raw = max1241_read_raw,
 };
 
-static void max1241_disable_vdd_action(void *data)
-{
-	struct max1241 *adc = data;
-	struct device *dev = &adc->spi->dev;
-	int err;
-
-	err = regulator_disable(adc->vdd);
-	if (err)
-		dev_err(dev, "could not disable vdd regulator.\n");
-}
-
 static void max1241_disable_vref_action(void *data)
 {
 	struct max1241 *adc = data;
@@ -147,20 +135,10 @@ static int max1241_probe(struct spi_device *spi)
 	adc->spi = spi;
 	mutex_init(&adc->lock);
 
-	adc->vdd = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(adc->vdd))
-		return dev_err_probe(dev, PTR_ERR(adc->vdd),
-				     "failed to get vdd regulator\n");
-
-	ret = regulator_enable(adc->vdd);
+	ret = devm_regulator_get_enable(dev, "vdd");
 	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(dev, max1241_disable_vdd_action, adc);
-	if (ret) {
-		dev_err(dev, "could not set up vdd regulator cleanup action\n");
-		return ret;
-	}
+		return dev_err_probe(dev, ret,
+				     "failed to get/enable vdd regulator\n");
 
 	adc->vref = devm_regulator_get(dev, "vref");
 	if (IS_ERR(adc->vref))
-- 
GitLab


From 3dfa1d4f63baf41774bbc77467729de39e0ac8a6 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:20:47 +0300
Subject: [PATCH 2294/4122] iio: adc: max1363: simplify using
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(),
add_action_or_reset(regulator_disable)' and use the
devm_regulator_get_enable() and drop the pointer to the regulator.
This simplifies code and makes it less tempting to add manual control
for the regulator which is also controlled by devm.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/1286ea127d190e5708a0aaff271819b2d3f8802d.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max1363.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c
index a28cf86cdce80..42d3479cefb73 100644
--- a/drivers/iio/adc/max1363.c
+++ b/drivers/iio/adc/max1363.c
@@ -148,7 +148,6 @@ struct max1363_chip_info {
  * @chip_info:		chip model specific constants, available modes, etc.
  * @current_mode:	the scan mode of this chip
  * @requestedmask:	a valid requested set of channels
- * @reg:		supply regulator
  * @lock:		lock to ensure state is consistent
  * @monitor_on:		whether monitor mode is enabled
  * @monitor_speed:	parameter corresponding to device monitor speed setting
@@ -168,7 +167,6 @@ struct max1363_state {
 	const struct max1363_chip_info	*chip_info;
 	const struct max1363_mode	*current_mode;
 	u32				requestedmask;
-	struct regulator		*reg;
 	struct mutex			lock;
 
 	/* Using monitor modes and buffer at the same time is
@@ -1597,15 +1595,7 @@ static int max1363_probe(struct i2c_client *client,
 	st = iio_priv(indio_dev);
 
 	mutex_init(&st->lock);
-	st->reg = devm_regulator_get(&client->dev, "vcc");
-	if (IS_ERR(st->reg))
-		return PTR_ERR(st->reg);
-
-	ret = regulator_enable(st->reg);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(&client->dev, max1363_reg_disable, st->reg);
+	ret = devm_regulator_get_enable(&client->dev, "vcc");
 	if (ret)
 		return ret;
 
-- 
GitLab


From 563746c26db6c203d30bed2daa301416b27b77fb Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Fri, 19 Aug 2022 22:21:02 +0300
Subject: [PATCH 2295/4122] iio: amplifier: hmc425a: simplify using
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop open-coded pattern: 'devm_regulator_get(), regulator_enable(),
add_action_or_reset(regulator_disable)' and use the
devm_regulator_get_enable() and drop the pointer to the regulator.
This simplifies code and makes it less tempting to add manual control
for the regulator which is also controlled by devm.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Acked-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/8b1193fdefb231a6d721e2bded52c48e56039c20.1660934107.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/amplifiers/hmc425a.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/iio/amplifiers/hmc425a.c b/drivers/iio/amplifiers/hmc425a.c
index ce80e0c916f4a..108f0f1685ef1 100644
--- a/drivers/iio/amplifiers/hmc425a.c
+++ b/drivers/iio/amplifiers/hmc425a.c
@@ -34,7 +34,6 @@ struct hmc425a_chip_info {
 };
 
 struct hmc425a_state {
-	struct	regulator *reg;
 	struct	mutex lock; /* protect sensor state */
 	struct	hmc425a_chip_info *chip_info;
 	struct	gpio_descs *gpios;
@@ -162,13 +161,6 @@ static const struct of_device_id hmc425a_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, hmc425a_of_match);
 
-static void hmc425a_reg_disable(void *data)
-{
-	struct hmc425a_state *st = data;
-
-	regulator_disable(st->reg);
-}
-
 static struct hmc425a_chip_info hmc425a_chip_info_tbl[] = {
 	[ID_HMC425A] = {
 		.name = "hmc425a",
@@ -211,14 +203,7 @@ static int hmc425a_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	st->reg = devm_regulator_get(&pdev->dev, "vcc-supply");
-	if (IS_ERR(st->reg))
-		return PTR_ERR(st->reg);
-
-	ret = regulator_enable(st->reg);
-	if (ret)
-		return ret;
-	ret = devm_add_action_or_reset(&pdev->dev, hmc425a_reg_disable, st);
+	ret = devm_regulator_get_enable(&pdev->dev, "vcc-supply");
 	if (ret)
 		return ret;
 
-- 
GitLab


From fd4fc88d0b6fc452571152d4d3b2fc8078be3825 Mon Sep 17 00:00:00 2001
From: Mitja Spes <mitja@lxnav.com>
Date: Fri, 21 Oct 2022 15:58:22 +0200
Subject: [PATCH 2296/4122] dt-bindings: iio: pressure: meas,ms5611: add max
 SPI frequency to the example

Added max SPI frequency setting to the example.

Signed-off-by: Mitja Spes <mitja@lxnav.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221021135827.1444793-4-mitja@lxnav.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
index 7fed750fa3ff7..21e6ddb7f41e3 100644
--- a/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
+++ b/Documentation/devicetree/bindings/iio/pressure/meas,ms5611.yaml
@@ -55,6 +55,7 @@ examples:
             compatible = "meas,ms5611";
             reg = <0>;
             vdd-supply = <&ldo_3v3_gnss>;
+            spi-max-frequency = <20000000>;
         };
     };
 ...
-- 
GitLab


From 36a4df5003c91663bd2d68a3cf452c0495ce583c Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Fri, 21 Oct 2022 13:41:14 +0300
Subject: [PATCH 2297/4122] dt-bindings: iio: adc: add AD4130

AD4130-8 is an ultra-low power, high precision, measurement solution for
low bandwidth battery operated applications.

The fully integrated AFE (Analog Front-End) includes a multiplexer for up
to 16 single-ended or 8 differential inputs, PGA (Programmable Gain
Amplifier), 24-bit Sigma-Delta ADC, on-chip reference and oscillator,
selectable filter options, smart sequencer, sensor biasing and excitation
options, diagnostics, and a FIFO buffer.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221021104115.1812486-2-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/adc/adi,ad4130.yaml          | 259 ++++++++++++++++++
 MAINTAINERS                                   |   7 +
 2 files changed, 266 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
new file mode 100644
index 0000000000000..28ebd38b9db44
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
@@ -0,0 +1,259 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2022 Analog Devices Inc.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/adc/adi,ad4130.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD4130 ADC device driver
+
+maintainers:
+  - Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+description: |
+  Bindings for the Analog Devices AD4130 ADC. Datasheet can be found here:
+    https://www.analog.com/media/en/technical-documentation/data-sheets/AD4130-8.pdf
+
+properties:
+  compatible:
+    enum:
+      - adi,ad4130
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+    description: phandle to the master clock (mclk)
+
+  clock-names:
+    items:
+      - const: mclk
+
+  interrupts:
+    maxItems: 1
+
+  interrupt-names:
+    description: |
+      Specify which interrupt pin should be configured as Data Ready / FIFO
+      interrupt.
+      Default if not supplied is int.
+    enum:
+      - int
+      - clk
+      - p2
+      - dout
+
+  '#address-cells':
+    const: 1
+
+  '#size-cells':
+    const: 0
+
+  '#clock-cells':
+    const: 0
+
+  clock-output-names:
+    maxItems: 1
+
+  refin1-supply:
+    description: refin1 supply. Can be used as reference for conversion.
+
+  refin2-supply:
+    description: refin2 supply. Can be used as reference for conversion.
+
+  avdd-supply:
+    description: AVDD voltage supply. Can be used as reference for conversion.
+
+  iovdd-supply:
+    description: IOVDD voltage supply. Used for the chip interface.
+
+  spi-max-frequency:
+    maximum: 5000000
+
+  adi,ext-clk-freq-hz:
+    description: Specify the frequency of the external clock.
+    enum: [76800, 153600]
+    default: 76800
+
+  adi,bipolar:
+    description: Specify if the device should be used in bipolar mode.
+    type: boolean
+
+  adi,vbias-pins:
+    description: Analog inputs to apply a voltage bias of (AVDD − AVSS) / 2 to.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 16
+    items:
+      minimum: 0
+      maximum: 15
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+patternProperties:
+  "^channel@([0-9a-f])$":
+    type: object
+    $ref: adc.yaml
+    unevaluatedProperties: false
+
+    properties:
+      reg:
+        description: The channel number.
+        minimum: 0
+        maximum: 15
+
+      diff-channels:
+        description: |
+          Besides the analog inputs available, internal inputs can be used.
+          16: Internal temperature sensor.
+          17: AVSS
+          18: Internal reference
+          19: DGND
+          20: (AVDD − AVSS)/6+
+          21: (AVDD − AVSS)/6-
+          22: (IOVDD − DGND)/6+
+          23: (IOVDD − DGND)/6-
+          24: (ALDO − AVSS)/6+
+          25: (ALDO − AVSS)/6-
+          26: (DLDO − DGND)/6+
+          27: (DLDO − DGND)/6-
+          28: V_MV_P
+          29: V_MV_M
+        items:
+          minimum: 0
+          maximum: 29
+
+      adi,reference-select:
+        description: |
+          Select the reference source to use when converting on the
+          specific channel. Valid values are:
+          0: REFIN1(+)/REFIN1(−)
+          1: REFIN2(+)/REFIN2(−)
+          2: REFOUT/AVSS (Internal reference)
+          3: AVDD/AVSS
+          If not specified, REFIN1 is used.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        enum: [0, 1, 2, 3]
+        default: 0
+
+      adi,excitation-pin-0:
+        description: |
+          Analog input to apply excitation current to while the channel
+          is active.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        default: 0
+
+      adi,excitation-pin-1:
+        description: |
+          Analog input to apply excitation current to while this channel
+          is active.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 0
+        maximum: 15
+        default: 0
+
+      adi,excitation-current-0-nanoamp:
+        description: |
+          Excitation current in nanoamps to be applied to pin specified in
+          adi,excitation-pin-0 while this channel is active.
+        enum: [0, 100, 10000, 20000, 50000, 100000, 150000, 200000]
+        default: 0
+
+      adi,excitation-current-1-nanoamp:
+        description: |
+          Excitation current in nanoamps to be applied to pin specified in
+          adi,excitation-pin-1 while this channel is active.
+        enum: [0, 100, 10000, 20000, 50000, 100000, 150000, 200000]
+        default: 0
+
+      adi,burnout-current-nanoamp:
+        description: |
+          Burnout current in nanoamps to be applied for this channel.
+        enum: [0, 500, 2000, 4000]
+        default: 0
+
+      adi,buffered-positive:
+        description: Enable buffered mode for positive input.
+        type: boolean
+
+      adi,buffered-negative:
+        description: Enable buffered mode for negative input.
+        type: boolean
+
+    required:
+      - reg
+      - diff-channels
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      adc@0 {
+        compatible = "adi,ad4130";
+        reg = <0>;
+
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        spi-max-frequency = <5000000>;
+        interrupts = <27 IRQ_TYPE_EDGE_FALLING>;
+        interrupt-parent = <&gpio>;
+
+        channel@0 {
+          reg = <0>;
+
+          adi,reference-select = <2>;
+
+          /* AIN8, AIN9 */
+          diff-channels = <8 9>;
+        };
+
+        channel@1 {
+          reg = <1>;
+
+          adi,reference-select = <2>;
+
+          /* AIN10, AIN11 */
+          diff-channels = <10 11>;
+        };
+
+        channel@2 {
+          reg = <2>;
+
+          adi,reference-select = <2>;
+
+          /* Temperature Sensor, DGND */
+          diff-channels = <16 19>;
+        };
+
+        channel@3 {
+          reg = <3>;
+
+          adi,reference-select = <2>;
+
+          /* Internal reference, DGND */
+          diff-channels = <18 19>;
+        };
+
+        channel@4 {
+          reg = <4>;
+
+          adi,reference-select = <2>;
+
+          /* DGND, DGND */
+          diff-channels = <19 19>;
+        };
+      };
+    };
diff --git a/MAINTAINERS b/MAINTAINERS
index 379945f82a643..5cb67803af96f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1117,6 +1117,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
 F:	drivers/net/amt.c
 
+ANALOG DEVICES INC AD4130 DRIVER
+M:	Cosmin Tanislav <cosmin.tanislav@analog.com>
+L:	linux-iio@vger.kernel.org
+S:	Supported
+W:	http://ez.analog.com/community/linux-device-drivers
+F:	Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
+
 ANALOG DEVICES INC AD7192 DRIVER
 M:	Alexandru Tachici <alexandru.tachici@analog.com>
 L:	linux-iio@vger.kernel.org
-- 
GitLab


From 62094060cf3acaf52e277457d807ea753269b89e Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Fri, 21 Oct 2022 13:41:15 +0300
Subject: [PATCH 2298/4122] iio: adc: ad4130: add AD4130 driver

AD4130-8 is an ultra-low power, high precision, measurement solution for
low bandwidth battery operated applications.

The fully integrated AFE (Analog Front-End) includes a multiplexer for up
to 16 single-ended or 8 differential inputs, PGA (Programmable Gain
Amplifier), 24-bit Sigma-Delta ADC, on-chip reference and oscillator,
selectable filter options, smart sequencer, sensor biasing and excitation
options, diagnostics, and a FIFO buffer.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Link: https://lore.kernel.org/r/20221021104115.1812486-3-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../ABI/testing/sysfs-bus-iio-adc-ad4130      |   36 +
 MAINTAINERS                                   |    2 +
 drivers/iio/adc/Kconfig                       |   14 +
 drivers/iio/adc/Makefile                      |    1 +
 drivers/iio/adc/ad4130.c                      | 2100 +++++++++++++++++
 5 files changed, 2153 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
 create mode 100644 drivers/iio/adc/ad4130.c

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
new file mode 100644
index 0000000000000..d9555751d21ce
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
@@ -0,0 +1,36 @@
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_filter_mode_available
+KernelVersion:  6.2
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Reading returns a list with the possible filter modes.
+		"sinc4"       - Sinc 4. Excellent noise performance. Long 1st
+				conversion time. No natural 50/60Hz rejection.
+		"sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion time.
+		"sinc3"	      - Sinc3. Moderate 1st conversion time. Good noise
+				performance.
+		"sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling frequency
+				of 50Hz, achieves simultaneous 50Hz and 60Hz
+				rejection.
+		"sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion time.
+				Best used with a sampling frequency of at least
+				216.19Hz.
+		"sinc3+pf1"   - Sinc3 + Post Filter 1.
+				53dB rejection @ 50Hz, 58dB rejection @ 60Hz.
+		"sinc3+pf2"   - Sinc3 + Post Filter 2.
+				70dB rejection @ 50Hz, 70dB rejection @ 60Hz.
+		"sinc3+pf3"   - Sinc3 + Post Filter 3.
+				99dB rejection @ 50Hz, 103dB rejection @ 60Hz.
+		"sinc3+pf4"   - Sinc3 + Post Filter 4.
+				103dB rejection @ 50Hz, 109dB rejection @ 60Hz.
+
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode
+KernelVersion:  6.2
+Contact:	linux-iio@vger.kernel.org
+Description:
+		Set the filter mode of the differential channel. When the filter
+		mode changes, the in_voltageY-voltageZ_sampling_frequency and
+		in_voltageY-voltageZ_sampling_frequency_available attributes
+		might also change to accommodate the new filter mode.
+		If the current sampling frequency is out of range for the new
+		filter mode, the sampling frequency will be changed to the
+		closest valid one.
diff --git a/MAINTAINERS b/MAINTAINERS
index 5cb67803af96f..ecec0bb6b09e3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1122,7 +1122,9 @@ M:	Cosmin Tanislav <cosmin.tanislav@analog.com>
 L:	linux-iio@vger.kernel.org
 S:	Supported
 W:	http://ez.analog.com/community/linux-device-drivers
+F:	Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
 F:	Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
+F:	drivers/iio/adc/ad4130.c
 
 ANALOG DEVICES INC AD7192 DRIVER
 M:	Alexandru Tachici <alexandru.tachici@analog.com>
diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 5386d862def9c..8d719fbb6acc0 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -21,6 +21,20 @@ config AD_SIGMA_DELTA
 	select IIO_BUFFER
 	select IIO_TRIGGERED_BUFFER
 
+config AD4130
+	tristate "Analog Device AD4130 ADC Driver"
+	depends on SPI
+	select IIO_BUFFER
+	select IIO_KFIFO_BUF
+	select REGMAP_SPI
+	depends on COMMON_CLK
+	help
+	  Say yes here to build support for Analog Devices AD4130-8 SPI analog
+	  to digital converters (ADC).
+
+	  To compile this driver as a module, choose M here: the module will be
+	  called ad4130.
+
 config AD7091R5
 	tristate "Analog Devices AD7091R5 ADC Driver"
 	depends on I2C
diff --git a/drivers/iio/adc/Makefile b/drivers/iio/adc/Makefile
index 1571e891828eb..4ef41a7dfac6e 100644
--- a/drivers/iio/adc/Makefile
+++ b/drivers/iio/adc/Makefile
@@ -6,6 +6,7 @@
 # When adding new entries keep the list in alphabetical order
 obj-$(CONFIG_AB8500_GPADC) += ab8500-gpadc.o
 obj-$(CONFIG_AD_SIGMA_DELTA) += ad_sigma_delta.o
+obj-$(CONFIG_AD4130) += ad4130.o
 obj-$(CONFIG_AD7091R5) += ad7091r5.o ad7091r-base.o
 obj-$(CONFIG_AD7124) += ad7124.o
 obj-$(CONFIG_AD7192) += ad7192.o
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
new file mode 100644
index 0000000000000..9a4d0043d797e
--- /dev/null
+++ b/drivers/iio/adc/ad4130.c
@@ -0,0 +1,2100 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2022 Analog Devices, Inc.
+ * Author: Cosmin Tanislav <cosmin.tanislav@analog.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <linux/units.h>
+
+#include <asm/div64.h>
+#include <asm/unaligned.h>
+
+#include <linux/iio/buffer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/kfifo_buf.h>
+#include <linux/iio/sysfs.h>
+
+#define AD4130_NAME				"ad4130"
+
+#define AD4130_COMMS_READ_MASK			BIT(6)
+
+#define AD4130_STATUS_REG			0x00
+
+#define AD4130_ADC_CONTROL_REG			0x01
+#define AD4130_ADC_CONTROL_BIPOLAR_MASK		BIT(14)
+#define AD4130_ADC_CONTROL_INT_REF_VAL_MASK	BIT(13)
+#define AD4130_INT_REF_2_5V			2500000
+#define AD4130_INT_REF_1_25V			1250000
+#define AD4130_ADC_CONTROL_CSB_EN_MASK		BIT(9)
+#define AD4130_ADC_CONTROL_INT_REF_EN_MASK	BIT(8)
+#define AD4130_ADC_CONTROL_MODE_MASK		GENMASK(5, 2)
+#define AD4130_ADC_CONTROL_MCLK_SEL_MASK	GENMASK(1, 0)
+#define AD4130_MCLK_FREQ_76_8KHZ		76800
+#define AD4130_MCLK_FREQ_153_6KHZ		153600
+
+#define AD4130_DATA_REG				0x02
+
+#define AD4130_IO_CONTROL_REG			0x03
+#define AD4130_IO_CONTROL_INT_PIN_SEL_MASK	GENMASK(9, 8)
+#define AD4130_IO_CONTROL_GPIO_DATA_MASK	GENMASK(7, 4)
+#define AD4130_IO_CONTROL_GPIO_CTRL_MASK	GENMASK(3, 0)
+
+#define AD4130_VBIAS_REG			0x04
+
+#define AD4130_ID_REG				0x05
+
+#define AD4130_ERROR_REG			0x06
+
+#define AD4130_ERROR_EN_REG			0x07
+
+#define AD4130_MCLK_COUNT_REG			0x08
+
+#define AD4130_CHANNEL_X_REG(x)			(0x09 + (x))
+#define AD4130_CHANNEL_EN_MASK			BIT(23)
+#define AD4130_CHANNEL_SETUP_MASK		GENMASK(22, 20)
+#define AD4130_CHANNEL_AINP_MASK		GENMASK(17, 13)
+#define AD4130_CHANNEL_AINM_MASK		GENMASK(12, 8)
+#define AD4130_CHANNEL_IOUT1_MASK		GENMASK(7, 4)
+#define AD4130_CHANNEL_IOUT2_MASK		GENMASK(3, 0)
+
+#define AD4130_CONFIG_X_REG(x)			(0x19 + (x))
+#define AD4130_CONFIG_IOUT1_VAL_MASK		GENMASK(15, 13)
+#define AD4130_CONFIG_IOUT2_VAL_MASK		GENMASK(12, 10)
+#define AD4130_CONFIG_BURNOUT_MASK		GENMASK(9, 8)
+#define AD4130_CONFIG_REF_BUFP_MASK		BIT(7)
+#define AD4130_CONFIG_REF_BUFM_MASK		BIT(6)
+#define AD4130_CONFIG_REF_SEL_MASK		GENMASK(5, 4)
+#define AD4130_CONFIG_PGA_MASK			GENMASK(3, 1)
+
+#define AD4130_FILTER_X_REG(x)			(0x21 + (x))
+#define AD4130_FILTER_MODE_MASK			GENMASK(15, 12)
+#define AD4130_FILTER_SELECT_MASK		GENMASK(10, 0)
+#define AD4130_FILTER_SELECT_MIN		1
+
+#define AD4130_OFFSET_X_REG(x)			(0x29 + (x))
+
+#define AD4130_GAIN_X_REG(x)			(0x31 + (x))
+
+#define AD4130_MISC_REG				0x39
+
+#define AD4130_FIFO_CONTROL_REG			0x3a
+#define AD4130_FIFO_CONTROL_HEADER_MASK		BIT(18)
+#define AD4130_FIFO_CONTROL_MODE_MASK		GENMASK(17, 16)
+#define AD4130_FIFO_CONTROL_WM_INT_EN_MASK	BIT(9)
+#define AD4130_FIFO_CONTROL_WM_MASK		GENMASK(7, 0)
+#define AD4130_WATERMARK_256			0
+
+#define AD4130_FIFO_STATUS_REG			0x3b
+
+#define AD4130_FIFO_THRESHOLD_REG		0x3c
+
+#define AD4130_FIFO_DATA_REG			0x3d
+#define AD4130_FIFO_SIZE			256
+#define AD4130_FIFO_MAX_SAMPLE_SIZE		3
+
+#define AD4130_MAX_ANALOG_PINS			16
+#define AD4130_MAX_CHANNELS			16
+#define AD4130_MAX_DIFF_INPUTS			30
+#define AD4130_MAX_GPIOS			4
+#define AD4130_MAX_ODR				2400
+#define AD4130_MAX_PGA				8
+#define AD4130_MAX_SETUPS			8
+
+#define AD4130_AIN2_P1				0x2
+#define AD4130_AIN3_P2				0x3
+
+#define AD4130_RESET_BUF_SIZE			8
+#define AD4130_RESET_SLEEP_US			(160 * MICRO / AD4130_MCLK_FREQ_76_8KHZ)
+
+#define AD4130_INVALID_SLOT			-1
+
+static const unsigned int ad4130_reg_size[] = {
+	[AD4130_STATUS_REG] = 1,
+	[AD4130_ADC_CONTROL_REG] = 2,
+	[AD4130_DATA_REG] = 3,
+	[AD4130_IO_CONTROL_REG] = 2,
+	[AD4130_VBIAS_REG] = 2,
+	[AD4130_ID_REG] = 1,
+	[AD4130_ERROR_REG] = 2,
+	[AD4130_ERROR_EN_REG] = 2,
+	[AD4130_MCLK_COUNT_REG] = 1,
+	[AD4130_CHANNEL_X_REG(0) ... AD4130_CHANNEL_X_REG(AD4130_MAX_CHANNELS - 1)] = 3,
+	[AD4130_CONFIG_X_REG(0) ... AD4130_CONFIG_X_REG(AD4130_MAX_SETUPS - 1)] = 2,
+	[AD4130_FILTER_X_REG(0) ... AD4130_FILTER_X_REG(AD4130_MAX_SETUPS - 1)] = 3,
+	[AD4130_OFFSET_X_REG(0) ... AD4130_OFFSET_X_REG(AD4130_MAX_SETUPS - 1)] = 3,
+	[AD4130_GAIN_X_REG(0) ... AD4130_GAIN_X_REG(AD4130_MAX_SETUPS - 1)] = 3,
+	[AD4130_MISC_REG] = 2,
+	[AD4130_FIFO_CONTROL_REG] = 3,
+	[AD4130_FIFO_STATUS_REG] = 1,
+	[AD4130_FIFO_THRESHOLD_REG] = 3,
+	[AD4130_FIFO_DATA_REG] = 3,
+};
+
+enum ad4130_int_ref_val {
+	AD4130_INT_REF_VAL_2_5V,
+	AD4130_INT_REF_VAL_1_25V,
+};
+
+enum ad4130_mclk_sel {
+	AD4130_MCLK_76_8KHZ,
+	AD4130_MCLK_76_8KHZ_OUT,
+	AD4130_MCLK_76_8KHZ_EXT,
+	AD4130_MCLK_153_6KHZ_EXT,
+};
+
+enum ad4130_int_pin_sel {
+	AD4130_INT_PIN_INT,
+	AD4130_INT_PIN_CLK,
+	AD4130_INT_PIN_P2,
+	AD4130_INT_PIN_DOUT,
+};
+
+enum ad4130_iout {
+	AD4130_IOUT_OFF,
+	AD4130_IOUT_10000NA,
+	AD4130_IOUT_20000NA,
+	AD4130_IOUT_50000NA,
+	AD4130_IOUT_100000NA,
+	AD4130_IOUT_150000NA,
+	AD4130_IOUT_200000NA,
+	AD4130_IOUT_100NA,
+	AD4130_IOUT_MAX
+};
+
+enum ad4130_burnout {
+	AD4130_BURNOUT_OFF,
+	AD4130_BURNOUT_500NA,
+	AD4130_BURNOUT_2000NA,
+	AD4130_BURNOUT_4000NA,
+	AD4130_BURNOUT_MAX
+};
+
+enum ad4130_ref_sel {
+	AD4130_REF_REFIN1,
+	AD4130_REF_REFIN2,
+	AD4130_REF_REFOUT_AVSS,
+	AD4130_REF_AVDD_AVSS,
+	AD4130_REF_SEL_MAX
+};
+
+enum ad4130_fifo_mode {
+	AD4130_FIFO_MODE_DISABLED = 0b00,
+	AD4130_FIFO_MODE_WM = 0b01,
+};
+
+enum ad4130_mode {
+	AD4130_MODE_CONTINUOUS = 0b0000,
+	AD4130_MODE_IDLE = 0b0100,
+};
+
+enum ad4130_filter_mode {
+	AD4130_FILTER_SINC4,
+	AD4130_FILTER_SINC4_SINC1,
+	AD4130_FILTER_SINC3,
+	AD4130_FILTER_SINC3_REJ60,
+	AD4130_FILTER_SINC3_SINC1,
+	AD4130_FILTER_SINC3_PF1,
+	AD4130_FILTER_SINC3_PF2,
+	AD4130_FILTER_SINC3_PF3,
+	AD4130_FILTER_SINC3_PF4,
+};
+
+enum ad4130_pin_function {
+	AD4130_PIN_FN_NONE,
+	AD4130_PIN_FN_SPECIAL = BIT(0),
+	AD4130_PIN_FN_DIFF = BIT(1),
+	AD4130_PIN_FN_EXCITATION = BIT(2),
+	AD4130_PIN_FN_VBIAS = BIT(3),
+};
+
+struct ad4130_setup_info {
+	unsigned int			iout0_val;
+	unsigned int			iout1_val;
+	unsigned int			burnout;
+	unsigned int			pga;
+	unsigned int			fs;
+	u32				ref_sel;
+	enum ad4130_filter_mode		filter_mode;
+	bool				ref_bufp;
+	bool				ref_bufm;
+};
+
+struct ad4130_slot_info {
+	struct ad4130_setup_info	setup;
+	unsigned int			enabled_channels;
+	unsigned int			channels;
+};
+
+struct ad4130_chan_info {
+	struct ad4130_setup_info	setup;
+	u32				iout0;
+	u32				iout1;
+	int				slot;
+	bool				enabled;
+	bool				initialized;
+};
+
+struct ad4130_filter_config {
+	enum ad4130_filter_mode		filter_mode;
+	unsigned int			odr_div;
+	unsigned int			fs_max;
+	enum iio_available_type		samp_freq_avail_type;
+	int				samp_freq_avail_len;
+	int				samp_freq_avail[3][2];
+};
+
+struct ad4130_state {
+	struct regmap			*regmap;
+	struct spi_device		*spi;
+	struct clk			*mclk;
+	struct regulator_bulk_data	regulators[4];
+	u32				irq_trigger;
+	u32				inv_irq_trigger;
+
+	/*
+	 * Synchronize access to members the of driver state, and ensure
+	 * atomicity of consecutive regmap operations.
+	 */
+	struct mutex			lock;
+	struct completion		completion;
+
+	struct iio_chan_spec		chans[AD4130_MAX_CHANNELS];
+	struct ad4130_chan_info		chans_info[AD4130_MAX_CHANNELS];
+	struct ad4130_slot_info		slots_info[AD4130_MAX_SETUPS];
+	enum ad4130_pin_function	pins_fn[AD4130_MAX_ANALOG_PINS];
+	u32				vbias_pins[AD4130_MAX_ANALOG_PINS];
+	u32				num_vbias_pins;
+	int				scale_tbls[AD4130_REF_SEL_MAX][AD4130_MAX_PGA][2];
+	struct gpio_chip		gc;
+	struct clk_hw			int_clk_hw;
+
+	u32			int_pin_sel;
+	u32			int_ref_uv;
+	u32			mclk_sel;
+	bool			int_ref_en;
+	bool			bipolar;
+
+	unsigned int		num_enabled_channels;
+	unsigned int		effective_watermark;
+	unsigned int		watermark;
+
+	struct spi_message	fifo_msg;
+	struct spi_transfer	fifo_xfer[2];
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires any transfer
+	 * buffers to live in their own cache lines. As the use of these
+	 * buffers is synchronous, all of the buffers used for DMA in this
+	 * driver may share a cache line.
+	 */
+	u8			reset_buf[AD4130_RESET_BUF_SIZE] __aligned(IIO_DMA_MINALIGN);
+	u8			reg_write_tx_buf[4];
+	u8			reg_read_tx_buf[1];
+	u8			reg_read_rx_buf[3];
+	u8			fifo_tx_buf[2];
+	u8			fifo_rx_buf[AD4130_FIFO_SIZE *
+					    AD4130_FIFO_MAX_SAMPLE_SIZE];
+};
+
+static const char * const ad4130_int_pin_names[] = {
+	[AD4130_INT_PIN_INT] = "int",
+	[AD4130_INT_PIN_CLK] = "clk",
+	[AD4130_INT_PIN_P2] = "p2",
+	[AD4130_INT_PIN_DOUT] = "dout",
+};
+
+static const unsigned int ad4130_iout_current_na_tbl[AD4130_IOUT_MAX] = {
+	[AD4130_IOUT_OFF] = 0,
+	[AD4130_IOUT_100NA] = 100,
+	[AD4130_IOUT_10000NA] = 10000,
+	[AD4130_IOUT_20000NA] = 20000,
+	[AD4130_IOUT_50000NA] = 50000,
+	[AD4130_IOUT_100000NA] = 100000,
+	[AD4130_IOUT_150000NA] = 150000,
+	[AD4130_IOUT_200000NA] = 200000,
+};
+
+static const unsigned int ad4130_burnout_current_na_tbl[AD4130_BURNOUT_MAX] = {
+	[AD4130_BURNOUT_OFF] = 0,
+	[AD4130_BURNOUT_500NA] = 500,
+	[AD4130_BURNOUT_2000NA] = 2000,
+	[AD4130_BURNOUT_4000NA] = 4000,
+};
+
+#define AD4130_VARIABLE_ODR_CONFIG(_filter_mode, _odr_div, _fs_max)	\
+{									\
+		.filter_mode = (_filter_mode),				\
+		.odr_div = (_odr_div),					\
+		.fs_max = (_fs_max),					\
+		.samp_freq_avail_type = IIO_AVAIL_RANGE,		\
+		.samp_freq_avail = {					\
+			{ AD4130_MAX_ODR, (_odr_div) * (_fs_max) },	\
+			{ AD4130_MAX_ODR, (_odr_div) * (_fs_max) },	\
+			{ AD4130_MAX_ODR, (_odr_div) },			\
+		},							\
+}
+
+#define AD4130_FIXED_ODR_CONFIG(_filter_mode, _odr_div)			\
+{									\
+		.filter_mode = (_filter_mode),				\
+		.odr_div = (_odr_div),					\
+		.fs_max = AD4130_FILTER_SELECT_MIN,			\
+		.samp_freq_avail_type = IIO_AVAIL_LIST,			\
+		.samp_freq_avail_len = 1,				\
+		.samp_freq_avail = {					\
+			{ AD4130_MAX_ODR, (_odr_div) },			\
+		},							\
+}
+
+static const struct ad4130_filter_config ad4130_filter_configs[] = {
+	AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC4,       1,  10),
+	AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC4_SINC1, 11, 10),
+	AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3,       1,  2047),
+	AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3_REJ60, 1,  2047),
+	AD4130_VARIABLE_ODR_CONFIG(AD4130_FILTER_SINC3_SINC1, 10, 2047),
+	AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF1,      92),
+	AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF2,      100),
+	AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF3,      124),
+	AD4130_FIXED_ODR_CONFIG(AD4130_FILTER_SINC3_PF4,      148),
+};
+
+static const char * const ad4130_filter_modes_str[] = {
+	[AD4130_FILTER_SINC4] = "sinc4",
+	[AD4130_FILTER_SINC4_SINC1] = "sinc4+sinc1",
+	[AD4130_FILTER_SINC3] = "sinc3",
+	[AD4130_FILTER_SINC3_REJ60] = "sinc3+rej60",
+	[AD4130_FILTER_SINC3_SINC1] = "sinc3+sinc1",
+	[AD4130_FILTER_SINC3_PF1] = "sinc3+pf1",
+	[AD4130_FILTER_SINC3_PF2] = "sinc3+pf2",
+	[AD4130_FILTER_SINC3_PF3] = "sinc3+pf3",
+	[AD4130_FILTER_SINC3_PF4] = "sinc3+pf4",
+};
+
+static int ad4130_get_reg_size(struct ad4130_state *st, unsigned int reg,
+			       unsigned int *size)
+{
+	if (reg >= ARRAY_SIZE(ad4130_reg_size))
+		return -EINVAL;
+
+	*size = ad4130_reg_size[reg];
+
+	return 0;
+}
+
+static unsigned int ad4130_data_reg_size(struct ad4130_state *st)
+{
+	unsigned int data_reg_size;
+	int ret;
+
+	ret = ad4130_get_reg_size(st, AD4130_DATA_REG, &data_reg_size);
+	if (ret)
+		return 0;
+
+	return data_reg_size;
+}
+
+static unsigned int ad4130_resolution(struct ad4130_state *st)
+{
+	return ad4130_data_reg_size(st) * BITS_PER_BYTE;
+}
+
+static int ad4130_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct ad4130_state *st = context;
+	unsigned int size;
+	int ret;
+
+	ret = ad4130_get_reg_size(st, reg, &size);
+	if (ret)
+		return ret;
+
+	st->reg_write_tx_buf[0] = reg;
+
+	switch (size) {
+	case 3:
+		put_unaligned_be24(val, &st->reg_write_tx_buf[1]);
+		break;
+	case 2:
+		put_unaligned_be16(val, &st->reg_write_tx_buf[1]);
+		break;
+	case 1:
+		st->reg_write_tx_buf[1] = val;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return spi_write(st->spi, st->reg_write_tx_buf, size + 1);
+}
+
+static int ad4130_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct ad4130_state *st = context;
+	struct spi_transfer t[] = {
+		{
+			.tx_buf = st->reg_read_tx_buf,
+			.len = sizeof(st->reg_read_tx_buf),
+		},
+		{
+			.rx_buf = st->reg_read_rx_buf,
+		},
+	};
+	unsigned int size;
+	int ret;
+
+	ret = ad4130_get_reg_size(st, reg, &size);
+	if (ret)
+		return ret;
+
+	st->reg_read_tx_buf[0] = AD4130_COMMS_READ_MASK | reg;
+	t[1].len = size;
+
+	ret = spi_sync_transfer(st->spi, t, ARRAY_SIZE(t));
+	if (ret)
+		return ret;
+
+	switch (size) {
+	case 3:
+		*val = get_unaligned_be24(st->reg_read_rx_buf);
+		break;
+	case 2:
+		*val = get_unaligned_be16(st->reg_read_rx_buf);
+		break;
+	case 1:
+		*val = st->reg_read_rx_buf[0];
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct regmap_config ad4130_regmap_config = {
+	.reg_read = ad4130_reg_read,
+	.reg_write = ad4130_reg_write,
+};
+
+static int ad4130_gpio_init_valid_mask(struct gpio_chip *gc,
+				       unsigned long *valid_mask,
+				       unsigned int ngpios)
+{
+	struct ad4130_state *st = gpiochip_get_data(gc);
+	unsigned int i;
+
+	/*
+	 * Output-only GPIO functionality is available on pins AIN2 through
+	 * AIN5. If these pins are used for anything else, do not expose them.
+	 */
+	for (i = 0; i < ngpios; i++) {
+		unsigned int pin = i + AD4130_AIN2_P1;
+		bool valid = st->pins_fn[pin] == AD4130_PIN_FN_NONE;
+
+		__assign_bit(i, valid_mask, valid);
+	}
+
+	return 0;
+}
+
+static int ad4130_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+	return GPIO_LINE_DIRECTION_OUT;
+}
+
+static void ad4130_gpio_set(struct gpio_chip *gc, unsigned int offset,
+			    int value)
+{
+	struct ad4130_state *st = gpiochip_get_data(gc);
+	unsigned int mask = FIELD_PREP(AD4130_IO_CONTROL_GPIO_DATA_MASK,
+				       BIT(offset));
+
+	regmap_update_bits(st->regmap, AD4130_IO_CONTROL_REG, mask,
+			   value ? mask : 0);
+}
+
+static int ad4130_set_mode(struct ad4130_state *st, enum ad4130_mode mode)
+{
+	return regmap_update_bits(st->regmap, AD4130_ADC_CONTROL_REG,
+				  AD4130_ADC_CONTROL_MODE_MASK,
+				  FIELD_PREP(AD4130_ADC_CONTROL_MODE_MASK, mode));
+}
+
+static int ad4130_set_watermark_interrupt_en(struct ad4130_state *st, bool en)
+{
+	return regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG,
+				  AD4130_FIFO_CONTROL_WM_INT_EN_MASK,
+				  FIELD_PREP(AD4130_FIFO_CONTROL_WM_INT_EN_MASK, en));
+}
+
+static unsigned int ad4130_watermark_reg_val(unsigned int val)
+{
+	if (val == AD4130_FIFO_SIZE)
+		val = AD4130_WATERMARK_256;
+
+	return val;
+}
+
+static int ad4130_set_fifo_mode(struct ad4130_state *st,
+				enum ad4130_fifo_mode mode)
+{
+	return regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG,
+				  AD4130_FIFO_CONTROL_MODE_MASK,
+				  FIELD_PREP(AD4130_FIFO_CONTROL_MODE_MASK, mode));
+}
+
+static void ad4130_push_fifo_data(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int data_reg_size = ad4130_data_reg_size(st);
+	unsigned int transfer_len = st->effective_watermark * data_reg_size;
+	unsigned int set_size = st->num_enabled_channels * data_reg_size;
+	unsigned int i;
+	int ret;
+
+	st->fifo_tx_buf[1] = ad4130_watermark_reg_val(st->effective_watermark);
+	st->fifo_xfer[1].len = transfer_len;
+
+	ret = spi_sync(st->spi, &st->fifo_msg);
+	if (ret)
+		return;
+
+	for (i = 0; i < transfer_len; i += set_size)
+		iio_push_to_buffers(indio_dev, &st->fifo_rx_buf[i]);
+}
+
+static irqreturn_t ad4130_irq_handler(int irq, void *private)
+{
+	struct iio_dev *indio_dev = private;
+	struct ad4130_state *st = iio_priv(indio_dev);
+
+	if (iio_buffer_enabled(indio_dev))
+		ad4130_push_fifo_data(indio_dev);
+	else
+		complete(&st->completion);
+
+	return IRQ_HANDLED;
+}
+
+static int ad4130_find_slot(struct ad4130_state *st,
+			    struct ad4130_setup_info *target_setup_info,
+			    unsigned int *slot, bool *overwrite)
+{
+	unsigned int i;
+
+	*slot = AD4130_INVALID_SLOT;
+	*overwrite = false;
+
+	for (i = 0; i < AD4130_MAX_SETUPS; i++) {
+		struct ad4130_slot_info *slot_info = &st->slots_info[i];
+
+		/* Immediately accept a matching setup info. */
+		if (!memcmp(target_setup_info, &slot_info->setup,
+			    sizeof(*target_setup_info))) {
+			*slot = i;
+			return 0;
+		}
+
+		/* Ignore all setups which are used by enabled channels. */
+		if (slot_info->enabled_channels)
+			continue;
+
+		/* Find the least used slot. */
+		if (*slot == AD4130_INVALID_SLOT ||
+		    slot_info->channels < st->slots_info[*slot].channels)
+			*slot = i;
+	}
+
+	if (*slot == AD4130_INVALID_SLOT)
+		return -EINVAL;
+
+	*overwrite = true;
+
+	return 0;
+}
+
+static void ad4130_unlink_channel(struct ad4130_state *st, unsigned int channel)
+{
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_slot_info *slot_info = &st->slots_info[chan_info->slot];
+
+	chan_info->slot = AD4130_INVALID_SLOT;
+	slot_info->channels--;
+}
+
+static int ad4130_unlink_slot(struct ad4130_state *st, unsigned int slot)
+{
+	unsigned int i;
+
+	for (i = 0; i < AD4130_MAX_CHANNELS; i++) {
+		struct ad4130_chan_info *chan_info = &st->chans_info[i];
+
+		if (!chan_info->initialized || chan_info->slot != slot)
+			continue;
+
+		ad4130_unlink_channel(st, i);
+	}
+
+	return 0;
+}
+
+static int ad4130_link_channel_slot(struct ad4130_state *st,
+				    unsigned int channel, unsigned int slot)
+{
+	struct ad4130_slot_info *slot_info = &st->slots_info[slot];
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	int ret;
+
+	ret = regmap_update_bits(st->regmap, AD4130_CHANNEL_X_REG(channel),
+				 AD4130_CHANNEL_SETUP_MASK,
+				 FIELD_PREP(AD4130_CHANNEL_SETUP_MASK, slot));
+	if (ret)
+		return ret;
+
+	chan_info->slot = slot;
+	slot_info->channels++;
+
+	return 0;
+}
+
+static int ad4130_write_slot_setup(struct ad4130_state *st,
+				   unsigned int slot,
+				   struct ad4130_setup_info *setup_info)
+{
+	unsigned int val;
+	int ret;
+
+	val = FIELD_PREP(AD4130_CONFIG_IOUT1_VAL_MASK, setup_info->iout0_val) |
+	      FIELD_PREP(AD4130_CONFIG_IOUT1_VAL_MASK, setup_info->iout1_val) |
+	      FIELD_PREP(AD4130_CONFIG_BURNOUT_MASK, setup_info->burnout) |
+	      FIELD_PREP(AD4130_CONFIG_REF_BUFP_MASK, setup_info->ref_bufp) |
+	      FIELD_PREP(AD4130_CONFIG_REF_BUFM_MASK, setup_info->ref_bufm) |
+	      FIELD_PREP(AD4130_CONFIG_REF_SEL_MASK, setup_info->ref_sel) |
+	      FIELD_PREP(AD4130_CONFIG_PGA_MASK, setup_info->pga);
+
+	ret = regmap_write(st->regmap, AD4130_CONFIG_X_REG(slot), val);
+	if (ret)
+		return ret;
+
+	val = FIELD_PREP(AD4130_FILTER_MODE_MASK, setup_info->filter_mode) |
+	      FIELD_PREP(AD4130_FILTER_SELECT_MASK, setup_info->fs);
+
+	ret = regmap_write(st->regmap, AD4130_FILTER_X_REG(slot), val);
+	if (ret)
+		return ret;
+
+	memcpy(&st->slots_info[slot].setup, setup_info, sizeof(*setup_info));
+
+	return 0;
+}
+
+static int ad4130_write_channel_setup(struct ad4130_state *st,
+				      unsigned int channel, bool on_enable)
+{
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_setup_info *setup_info = &chan_info->setup;
+	bool overwrite;
+	int slot;
+	int ret;
+
+	/*
+	 * The following cases need to be handled.
+	 *
+	 * 1. Enabled and linked channel with setup changes:
+	 *    - Find a slot. If not possible, return error.
+	 *    - Unlink channel from current slot.
+	 *    - If the slot has channels linked to it, unlink all channels, and
+	 *      write the new setup to it.
+	 *    - Link channel to new slot.
+	 *
+	 * 2. Soon to be enabled and unlinked channel:
+	 *    - Find a slot. If not possible, return error.
+	 *    - If the slot has channels linked to it, unlink all channels, and
+	 *      write the new setup to it.
+	 *    - Link channel to the slot.
+	 *
+	 * 3. Disabled and linked channel with setup changes:
+	 *    - Unlink channel from current slot.
+	 *
+	 * 4. Soon to be enabled and linked channel:
+	 * 5. Disabled and unlinked channel with setup changes:
+	 *    - Do nothing.
+	 */
+
+	/* Case 4 */
+	if (on_enable && chan_info->slot != AD4130_INVALID_SLOT)
+		return 0;
+
+	if (!on_enable && !chan_info->enabled) {
+		if (chan_info->slot != AD4130_INVALID_SLOT)
+			/* Case 3 */
+			ad4130_unlink_channel(st, channel);
+
+		/* Cases 3 & 5 */
+		return 0;
+	}
+
+	/* Cases 1 & 2 */
+	ret = ad4130_find_slot(st, setup_info, &slot, &overwrite);
+	if (ret)
+		return ret;
+
+	if (chan_info->slot != AD4130_INVALID_SLOT)
+		/* Case 1 */
+		ad4130_unlink_channel(st, channel);
+
+	if (overwrite) {
+		ret = ad4130_unlink_slot(st, slot);
+		if (ret)
+			return ret;
+
+		ret = ad4130_write_slot_setup(st, slot, setup_info);
+		if (ret)
+			return ret;
+	}
+
+	return ad4130_link_channel_slot(st, channel, slot);
+}
+
+static int ad4130_set_channel_enable(struct ad4130_state *st,
+				     unsigned int channel, bool status)
+{
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_slot_info *slot_info;
+	int ret;
+
+	if (chan_info->enabled == status)
+		return 0;
+
+	if (status) {
+		ret = ad4130_write_channel_setup(st, channel, true);
+		if (ret)
+			return ret;
+	}
+
+	slot_info = &st->slots_info[chan_info->slot];
+
+	ret = regmap_update_bits(st->regmap, AD4130_CHANNEL_X_REG(channel),
+				 AD4130_CHANNEL_EN_MASK,
+				 FIELD_PREP(AD4130_CHANNEL_EN_MASK, status));
+	if (ret)
+		return ret;
+
+	slot_info->enabled_channels += status ? 1 : -1;
+	chan_info->enabled = status;
+
+	return 0;
+}
+
+/*
+ * Table 58. FILTER_MODE_n bits and Filter Types of the datasheet describes
+ * the relation between filter mode, ODR and FS.
+ *
+ * Notice that the max ODR of each filter mode is not necessarily the
+ * absolute max ODR supported by the chip.
+ *
+ * The ODR divider is not explicitly specified, but it can be deduced based
+ * on the ODR range of each filter mode.
+ *
+ * For example, for Sinc4+Sinc1, max ODR is 218.18. That means that the
+ * absolute max ODR is divided by 11 to achieve the max ODR of this filter
+ * mode.
+ *
+ * The formulas for converting between ODR and FS for a specific filter
+ * mode can be deduced from the same table.
+ *
+ * Notice that FS = 1 actually means max ODR, and that ODR decreases by
+ * (maximum ODR / maximum FS) for each increment of FS.
+ *
+ * odr = MAX_ODR / odr_div * (1 - (fs - 1) / fs_max) <=>
+ * odr = MAX_ODR * (1 - (fs - 1) / fs_max) / odr_div <=>
+ * odr = MAX_ODR * (1 - (fs - 1) / fs_max) / odr_div <=>
+ * odr = MAX_ODR * (fs_max - fs + 1) / (fs_max * odr_div)
+ * (used in ad4130_fs_to_freq)
+ *
+ * For the opposite formula, FS can be extracted from the last one.
+ *
+ * MAX_ODR * (fs_max - fs + 1) = fs_max * odr_div * odr <=>
+ * fs_max - fs + 1 = fs_max * odr_div * odr / MAX_ODR <=>
+ * fs = 1 + fs_max - fs_max * odr_div * odr / MAX_ODR
+ * (used in ad4130_fs_to_freq)
+ */
+
+static void ad4130_freq_to_fs(enum ad4130_filter_mode filter_mode,
+			      int val, int val2, unsigned int *fs)
+{
+	const struct ad4130_filter_config *filter_config =
+		&ad4130_filter_configs[filter_mode];
+	u64 dividend, divisor;
+	int temp;
+
+	dividend = filter_config->fs_max * filter_config->odr_div *
+		   ((u64)val * NANO + val2);
+	divisor = (u64)AD4130_MAX_ODR * NANO;
+
+	temp = AD4130_FILTER_SELECT_MIN + filter_config->fs_max -
+	       DIV64_U64_ROUND_CLOSEST(dividend, divisor);
+
+	if (temp < AD4130_FILTER_SELECT_MIN)
+		temp = AD4130_FILTER_SELECT_MIN;
+	else if (temp > filter_config->fs_max)
+		temp = filter_config->fs_max;
+
+	*fs = temp;
+}
+
+static void ad4130_fs_to_freq(enum ad4130_filter_mode filter_mode,
+			      unsigned int fs, int *val, int *val2)
+{
+	const struct ad4130_filter_config *filter_config =
+		&ad4130_filter_configs[filter_mode];
+	unsigned int dividend, divisor;
+	u64 temp;
+
+	dividend = (filter_config->fs_max - fs + AD4130_FILTER_SELECT_MIN) *
+		   AD4130_MAX_ODR;
+	divisor = filter_config->fs_max * filter_config->odr_div;
+
+	temp = div_u64((u64)dividend * NANO, divisor);
+	*val = div_u64_rem(temp, NANO, val2);
+}
+
+static int ad4130_set_filter_mode(struct iio_dev *indio_dev,
+				  const struct iio_chan_spec *chan,
+				  unsigned int val)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel = chan->scan_index;
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_setup_info *setup_info = &chan_info->setup;
+	enum ad4130_filter_mode old_filter_mode;
+	int freq_val, freq_val2;
+	unsigned int old_fs;
+	int ret = 0;
+
+	mutex_lock(&st->lock);
+	if (setup_info->filter_mode == val)
+		goto out;
+
+	old_fs = setup_info->fs;
+	old_filter_mode = setup_info->filter_mode;
+
+	/*
+	 * When switching between filter modes, try to match the ODR as
+	 * close as possible. To do this, convert the current FS into ODR
+	 * using the old filter mode, then convert it back into FS using
+	 * the new filter mode.
+	 */
+	ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs,
+			  &freq_val, &freq_val2);
+
+	ad4130_freq_to_fs(val, freq_val, freq_val2, &setup_info->fs);
+
+	setup_info->filter_mode = val;
+
+	ret = ad4130_write_channel_setup(st, channel, false);
+	if (ret) {
+		setup_info->fs = old_fs;
+		setup_info->filter_mode = old_filter_mode;
+	}
+
+ out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int ad4130_get_filter_mode(struct iio_dev *indio_dev,
+				  const struct iio_chan_spec *chan)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel = chan->scan_index;
+	struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup;
+	enum ad4130_filter_mode filter_mode;
+
+	mutex_lock(&st->lock);
+	filter_mode = setup_info->filter_mode;
+	mutex_unlock(&st->lock);
+
+	return filter_mode;
+}
+
+static const struct iio_enum ad4130_filter_mode_enum = {
+	.items = ad4130_filter_modes_str,
+	.num_items = ARRAY_SIZE(ad4130_filter_modes_str),
+	.set = ad4130_set_filter_mode,
+	.get = ad4130_get_filter_mode,
+};
+
+static const struct iio_chan_spec_ext_info ad4130_filter_mode_ext_info[] = {
+	IIO_ENUM("filter_mode", IIO_SEPARATE, &ad4130_filter_mode_enum),
+	IIO_ENUM_AVAILABLE("filter_mode", IIO_SHARED_BY_TYPE,
+			   &ad4130_filter_mode_enum),
+	{ }
+};
+
+static const struct iio_chan_spec ad4130_channel_template = {
+	.type = IIO_VOLTAGE,
+	.indexed = 1,
+	.differential = 1,
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) |
+			      BIT(IIO_CHAN_INFO_SCALE) |
+			      BIT(IIO_CHAN_INFO_OFFSET) |
+			      BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	.info_mask_separate_available = BIT(IIO_CHAN_INFO_SCALE) |
+					BIT(IIO_CHAN_INFO_SAMP_FREQ),
+	.ext_info = ad4130_filter_mode_ext_info,
+	.scan_type = {
+		.sign = 'u',
+		.endianness = IIO_BE,
+	},
+};
+
+static int ad4130_set_channel_pga(struct ad4130_state *st, unsigned int channel,
+				  int val, int val2)
+{
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_setup_info *setup_info = &chan_info->setup;
+	unsigned int pga, old_pga;
+	int ret = 0;
+
+	for (pga = 0; pga < AD4130_MAX_PGA; pga++)
+		if (val == st->scale_tbls[setup_info->ref_sel][pga][0] &&
+		    val2 == st->scale_tbls[setup_info->ref_sel][pga][1])
+			break;
+
+	if (pga == AD4130_MAX_PGA)
+		return -EINVAL;
+
+	mutex_lock(&st->lock);
+	if (pga == setup_info->pga)
+		goto out;
+
+	old_pga = setup_info->pga;
+	setup_info->pga = pga;
+
+	ret = ad4130_write_channel_setup(st, channel, false);
+	if (ret)
+		setup_info->pga = old_pga;
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int ad4130_set_channel_freq(struct ad4130_state *st,
+				   unsigned int channel, int val, int val2)
+{
+	struct ad4130_chan_info *chan_info = &st->chans_info[channel];
+	struct ad4130_setup_info *setup_info = &chan_info->setup;
+	unsigned int fs, old_fs;
+	int ret = 0;
+
+	mutex_lock(&st->lock);
+	old_fs = setup_info->fs;
+
+	ad4130_freq_to_fs(setup_info->filter_mode, val, val2, &fs);
+
+	if (fs == setup_info->fs)
+		goto out;
+
+	setup_info->fs = fs;
+
+	ret = ad4130_write_channel_setup(st, channel, false);
+	if (ret)
+		setup_info->fs = old_fs;
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int _ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel,
+			       int *val)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = ad4130_set_channel_enable(st, channel, true);
+	if (ret)
+		return ret;
+
+	reinit_completion(&st->completion);
+
+	ret = ad4130_set_mode(st, AD4130_MODE_CONTINUOUS);
+	if (ret)
+		return ret;
+
+	ret = wait_for_completion_timeout(&st->completion,
+					  msecs_to_jiffies(1000));
+	if (!ret)
+		return -ETIMEDOUT;
+
+	ret = ad4130_set_mode(st, AD4130_MODE_IDLE);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(st->regmap, AD4130_DATA_REG, val);
+	if (ret)
+		return ret;
+
+	ret = ad4130_set_channel_enable(st, channel, false);
+	if (ret)
+		return ret;
+
+	return IIO_VAL_INT;
+}
+
+static int ad4130_read_sample(struct iio_dev *indio_dev, unsigned int channel,
+			      int *val)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
+
+	mutex_lock(&st->lock);
+	ret = _ad4130_read_sample(indio_dev, channel, val);
+	mutex_unlock(&st->lock);
+
+	iio_device_release_direct_mode(indio_dev);
+
+	return ret;
+}
+
+static int ad4130_read_raw(struct iio_dev *indio_dev,
+			   struct iio_chan_spec const *chan,
+			   int *val, int *val2, long info)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel = chan->scan_index;
+	struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		return ad4130_read_sample(indio_dev, channel, val);
+	case IIO_CHAN_INFO_SCALE:
+		mutex_lock(&st->lock);
+		*val = st->scale_tbls[setup_info->ref_sel][setup_info->pga][0];
+		*val2 = st->scale_tbls[setup_info->ref_sel][setup_info->pga][1];
+		mutex_unlock(&st->lock);
+
+		return IIO_VAL_INT_PLUS_NANO;
+	case IIO_CHAN_INFO_OFFSET:
+		*val = st->bipolar ? -BIT(chan->scan_type.realbits - 1) : 0;
+
+		return IIO_VAL_INT;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		mutex_lock(&st->lock);
+		ad4130_fs_to_freq(setup_info->filter_mode, setup_info->fs,
+				  val, val2);
+		mutex_unlock(&st->lock);
+
+		return IIO_VAL_INT_PLUS_NANO;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad4130_read_avail(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan,
+			     const int **vals, int *type, int *length,
+			     long info)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel = chan->scan_index;
+	struct ad4130_setup_info *setup_info = &st->chans_info[channel].setup;
+	const struct ad4130_filter_config *filter_config;
+
+	switch (info) {
+	case IIO_CHAN_INFO_SCALE:
+		*vals = (int *)st->scale_tbls[setup_info->ref_sel];
+		*length = ARRAY_SIZE(st->scale_tbls[setup_info->ref_sel]) * 2;
+
+		*type = IIO_VAL_INT_PLUS_NANO;
+
+		return IIO_AVAIL_LIST;
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		mutex_lock(&st->lock);
+		filter_config = &ad4130_filter_configs[setup_info->filter_mode];
+		mutex_unlock(&st->lock);
+
+		*vals = (int *)filter_config->samp_freq_avail;
+		*length = filter_config->samp_freq_avail_len * 2;
+		*type = IIO_VAL_FRACTIONAL;
+
+		return filter_config->samp_freq_avail_type;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad4130_write_raw_get_fmt(struct iio_dev *indio_dev,
+				    struct iio_chan_spec const *chan,
+				    long info)
+{
+	switch (info) {
+	case IIO_CHAN_INFO_SCALE:
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		return IIO_VAL_INT_PLUS_NANO;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad4130_write_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int val, int val2, long info)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel = chan->scan_index;
+
+	switch (info) {
+	case IIO_CHAN_INFO_SCALE:
+		return ad4130_set_channel_pga(st, channel, val, val2);
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		return ad4130_set_channel_freq(st, channel, val, val2);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad4130_reg_access(struct iio_dev *indio_dev, unsigned int reg,
+			     unsigned int writeval, unsigned int *readval)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+
+	if (readval)
+		return regmap_read(st->regmap, reg, readval);
+
+	return regmap_write(st->regmap, reg, writeval);
+}
+
+static int ad4130_update_scan_mode(struct iio_dev *indio_dev,
+				   const unsigned long *scan_mask)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int channel;
+	unsigned int val = 0;
+	int ret;
+
+	mutex_lock(&st->lock);
+
+	for_each_set_bit(channel, scan_mask, indio_dev->num_channels) {
+		ret = ad4130_set_channel_enable(st, channel, true);
+		if (ret)
+			goto out;
+
+		val++;
+	}
+
+	st->num_enabled_channels = val;
+
+out:
+	mutex_unlock(&st->lock);
+
+	return 0;
+}
+
+static int ad4130_set_fifo_watermark(struct iio_dev *indio_dev, unsigned int val)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int eff;
+	int ret;
+
+	if (val > AD4130_FIFO_SIZE)
+		return -EINVAL;
+
+	eff = val * st->num_enabled_channels;
+	if (eff > AD4130_FIFO_SIZE)
+		/*
+		 * Always set watermark to a multiple of the number of
+		 * enabled channels to avoid making the FIFO unaligned.
+		 */
+		eff = rounddown(AD4130_FIFO_SIZE, st->num_enabled_channels);
+
+	mutex_lock(&st->lock);
+
+	ret = regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG,
+				 AD4130_FIFO_CONTROL_WM_MASK,
+				 FIELD_PREP(AD4130_FIFO_CONTROL_WM_MASK,
+					    ad4130_watermark_reg_val(eff)));
+	if (ret)
+		goto out;
+
+	st->effective_watermark = eff;
+	st->watermark = val;
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static const struct iio_info ad4130_info = {
+	.read_raw = ad4130_read_raw,
+	.read_avail = ad4130_read_avail,
+	.write_raw_get_fmt = ad4130_write_raw_get_fmt,
+	.write_raw = ad4130_write_raw,
+	.update_scan_mode = ad4130_update_scan_mode,
+	.hwfifo_set_watermark = ad4130_set_fifo_watermark,
+	.debugfs_reg_access = ad4130_reg_access,
+};
+
+static int ad4130_buffer_postenable(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	int ret;
+
+	mutex_lock(&st->lock);
+
+	ret = ad4130_set_watermark_interrupt_en(st, true);
+	if (ret)
+		goto out;
+
+	ret = irq_set_irq_type(st->spi->irq, st->inv_irq_trigger);
+	if (ret)
+		goto out;
+
+	ret = ad4130_set_fifo_mode(st, AD4130_FIFO_MODE_WM);
+	if (ret)
+		goto out;
+
+	ret = ad4130_set_mode(st, AD4130_MODE_CONTINUOUS);
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int ad4130_buffer_predisable(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int i;
+	int ret;
+
+	mutex_lock(&st->lock);
+
+	ret = ad4130_set_mode(st, AD4130_MODE_IDLE);
+	if (ret)
+		goto out;
+
+	ret = irq_set_irq_type(st->spi->irq, st->irq_trigger);
+	if (ret)
+		goto out;
+
+	ret = ad4130_set_fifo_mode(st, AD4130_FIFO_MODE_DISABLED);
+	if (ret)
+		goto out;
+
+	ret = ad4130_set_watermark_interrupt_en(st, false);
+	if (ret)
+		goto out;
+
+	/*
+	 * update_scan_mode() is not called in the disable path, disable all
+	 * channels here.
+	 */
+	for (i = 0; i < indio_dev->num_channels; i++) {
+		ret = ad4130_set_channel_enable(st, i, false);
+		if (ret)
+			goto out;
+	}
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static const struct iio_buffer_setup_ops ad4130_buffer_ops = {
+	.postenable = ad4130_buffer_postenable,
+	.predisable = ad4130_buffer_predisable,
+};
+
+static ssize_t hwfifo_watermark_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct ad4130_state *st = iio_priv(dev_to_iio_dev(dev));
+	unsigned int val;
+
+	mutex_lock(&st->lock);
+	val = st->watermark;
+	mutex_unlock(&st->lock);
+
+	return sysfs_emit(buf, "%d\n", val);
+}
+
+static ssize_t hwfifo_enabled_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct ad4130_state *st = iio_priv(dev_to_iio_dev(dev));
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD4130_FIFO_CONTROL_REG, &val);
+	if (ret)
+		return ret;
+
+	val = FIELD_GET(AD4130_FIFO_CONTROL_MODE_MASK, val);
+
+	return sysfs_emit(buf, "%d\n", val != AD4130_FIFO_MODE_DISABLED);
+}
+
+static ssize_t hwfifo_watermark_min_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sysfs_emit(buf, "%s\n", "1");
+}
+
+static ssize_t hwfifo_watermark_max_show(struct device *dev,
+					 struct device_attribute *attr,
+					 char *buf)
+{
+	return sysfs_emit(buf, "%s\n", __stringify(AD4130_FIFO_SIZE));
+}
+
+static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
+static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0);
+static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0);
+
+static const struct attribute *ad4130_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
+	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
+	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
+	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+	NULL
+};
+
+static int _ad4130_find_table_index(const unsigned int *tbl, size_t len,
+				    unsigned int val)
+{
+	unsigned int i;
+
+	for (i = 0; i < len; i++)
+		if (tbl[i] == val)
+			return i;
+
+	return -EINVAL;
+}
+
+#define ad4130_find_table_index(table, val) \
+	_ad4130_find_table_index(table, ARRAY_SIZE(table), val)
+
+static int ad4130_get_ref_voltage(struct ad4130_state *st,
+				  enum ad4130_ref_sel ref_sel)
+{
+	switch (ref_sel) {
+	case AD4130_REF_REFIN1:
+		return regulator_get_voltage(st->regulators[2].consumer);
+	case AD4130_REF_REFIN2:
+		return regulator_get_voltage(st->regulators[3].consumer);
+	case AD4130_REF_AVDD_AVSS:
+		return regulator_get_voltage(st->regulators[0].consumer);
+	case AD4130_REF_REFOUT_AVSS:
+		return st->int_ref_uv;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad4130_parse_fw_setup(struct ad4130_state *st,
+				 struct fwnode_handle *child,
+				 struct ad4130_setup_info *setup_info)
+{
+	struct device *dev = &st->spi->dev;
+	u32 tmp;
+	int ret;
+
+	tmp = 0;
+	fwnode_property_read_u32(child, "adi,excitation-current-0-nanoamp", &tmp);
+	ret = ad4130_find_table_index(ad4130_iout_current_na_tbl, tmp);
+	if (ret < 0)
+		return dev_err_probe(dev, ret,
+				     "Invalid excitation current %unA\n", tmp);
+	setup_info->iout0_val = ret;
+
+	tmp = 0;
+	fwnode_property_read_u32(child, "adi,excitation-current-1-nanoamp", &tmp);
+	ret = ad4130_find_table_index(ad4130_iout_current_na_tbl, tmp);
+	if (ret < 0)
+		return dev_err_probe(dev, ret,
+				     "Invalid excitation current %unA\n", tmp);
+	setup_info->iout1_val = ret;
+
+	tmp = 0;
+	fwnode_property_read_u32(child, "adi,burnout-current-nanoamp", &tmp);
+	ret = ad4130_find_table_index(ad4130_burnout_current_na_tbl, tmp);
+	if (ret < 0)
+		return dev_err_probe(dev, ret,
+				     "Invalid burnout current %unA\n", tmp);
+	setup_info->burnout = ret;
+
+	setup_info->ref_bufp = fwnode_property_read_bool(child, "adi,buffered-positive");
+	setup_info->ref_bufm = fwnode_property_read_bool(child, "adi,buffered-negative");
+
+	setup_info->ref_sel = AD4130_REF_REFIN1;
+	fwnode_property_read_u32(child, "adi,reference-select",
+				 &setup_info->ref_sel);
+	if (setup_info->ref_sel >= AD4130_REF_SEL_MAX)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid reference selected %u\n",
+				     setup_info->ref_sel);
+
+	if (setup_info->ref_sel == AD4130_REF_REFOUT_AVSS)
+		st->int_ref_en = true;
+
+	ret = ad4130_get_ref_voltage(st, setup_info->ref_sel);
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "Cannot use reference %u\n",
+				     setup_info->ref_sel);
+
+	return 0;
+}
+
+static int ad4130_validate_diff_channel(struct ad4130_state *st, u32 pin)
+{
+	struct device *dev = &st->spi->dev;
+
+	if (pin >= AD4130_MAX_DIFF_INPUTS)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid diffreential channel %u\n", pin);
+
+	if (pin >= AD4130_MAX_ANALOG_PINS)
+		return 0;
+
+	if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL)
+		return dev_err_probe(dev, -EINVAL,
+				     "Pin %u already used with fn %u\n", pin,
+				     st->pins_fn[pin]);
+
+	st->pins_fn[pin] |= AD4130_PIN_FN_DIFF;
+
+	return 0;
+}
+
+static int ad4130_validate_diff_channels(struct ad4130_state *st,
+					 u32 *pins, unsigned int len)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < len; i++) {
+		ret = ad4130_validate_diff_channel(st, pins[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ad4130_validate_excitation_pin(struct ad4130_state *st, u32 pin)
+{
+	struct device *dev = &st->spi->dev;
+
+	if (pin >= AD4130_MAX_ANALOG_PINS)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid excitation pin %u\n", pin);
+
+	if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL)
+		return dev_err_probe(dev, -EINVAL,
+				     "Pin %u already used with fn %u\n", pin,
+				     st->pins_fn[pin]);
+
+	st->pins_fn[pin] |= AD4130_PIN_FN_EXCITATION;
+
+	return 0;
+}
+
+static int ad4130_validate_vbias_pin(struct ad4130_state *st, u32 pin)
+{
+	struct device *dev = &st->spi->dev;
+
+	if (pin >= AD4130_MAX_ANALOG_PINS)
+		return dev_err_probe(dev, -EINVAL, "Invalid vbias pin %u\n",
+				     pin);
+
+	if (st->pins_fn[pin] == AD4130_PIN_FN_SPECIAL)
+		return dev_err_probe(dev, -EINVAL,
+				     "Pin %u already used with fn %u\n", pin,
+				     st->pins_fn[pin]);
+
+	st->pins_fn[pin] |= AD4130_PIN_FN_VBIAS;
+
+	return 0;
+}
+
+static int ad4130_validate_vbias_pins(struct ad4130_state *st,
+				      u32 *pins, unsigned int len)
+{
+	unsigned int i;
+	int ret;
+
+	for (i = 0; i < st->num_vbias_pins; i++) {
+		ret = ad4130_validate_vbias_pin(st, pins[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ad4130_parse_fw_channel(struct iio_dev *indio_dev,
+				   struct fwnode_handle *child)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	unsigned int resolution = ad4130_resolution(st);
+	unsigned int index = indio_dev->num_channels++;
+	struct device *dev = &st->spi->dev;
+	struct ad4130_chan_info *chan_info;
+	struct iio_chan_spec *chan;
+	u32 pins[2];
+	int ret;
+
+	if (index >= AD4130_MAX_CHANNELS)
+		return dev_err_probe(dev, -EINVAL, "Too many channels\n");
+
+	chan = &st->chans[index];
+	chan_info = &st->chans_info[index];
+
+	*chan = ad4130_channel_template;
+	chan->scan_type.realbits = resolution;
+	chan->scan_type.storagebits = resolution;
+	chan->scan_index = index;
+
+	chan_info->slot = AD4130_INVALID_SLOT;
+	chan_info->setup.fs = AD4130_FILTER_SELECT_MIN;
+	chan_info->initialized = true;
+
+	ret = fwnode_property_read_u32_array(child, "diff-channels", pins,
+					     ARRAY_SIZE(pins));
+	if (ret)
+		return ret;
+
+	ret = ad4130_validate_diff_channels(st, pins, ARRAY_SIZE(pins));
+	if (ret)
+		return ret;
+
+	chan->channel = pins[0];
+	chan->channel2 = pins[1];
+
+	ret = ad4130_parse_fw_setup(st, child, &chan_info->setup);
+	if (ret)
+		return ret;
+
+	fwnode_property_read_u32(child, "adi,excitation-pin-0",
+				 &chan_info->iout0);
+	if (chan_info->setup.iout0_val != AD4130_IOUT_OFF) {
+		ret = ad4130_validate_excitation_pin(st, chan_info->iout0);
+		if (ret)
+			return ret;
+	}
+
+	fwnode_property_read_u32(child, "adi,excitation-pin-1",
+				 &chan_info->iout1);
+	if (chan_info->setup.iout1_val != AD4130_IOUT_OFF) {
+		ret = ad4130_validate_excitation_pin(st, chan_info->iout1);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ad4130_parse_fw_children(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	struct fwnode_handle *child;
+	int ret;
+
+	indio_dev->channels = st->chans;
+
+	device_for_each_child_node(dev, child) {
+		ret = ad4130_parse_fw_channel(indio_dev, child);
+		if (ret) {
+			fwnode_handle_put(child);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int ad4310_parse_fw(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	u32 ext_clk_freq = AD4130_MCLK_FREQ_76_8KHZ;
+	unsigned int i;
+	int avdd_uv;
+	int irq;
+	int ret;
+
+	st->mclk = devm_clk_get_optional(dev, "mclk");
+	if (IS_ERR(st->mclk))
+		return dev_err_probe(dev, PTR_ERR(st->mclk),
+				     "Failed to get mclk\n");
+
+	st->int_pin_sel = AD4130_INT_PIN_INT;
+
+	for (i = 0; i < ARRAY_SIZE(ad4130_int_pin_names); i++) {
+		irq = fwnode_irq_get_byname(dev_fwnode(dev),
+					    ad4130_int_pin_names[i]);
+		if (irq > 0) {
+			st->int_pin_sel = i;
+			break;
+		}
+	}
+
+	if (st->int_pin_sel == AD4130_INT_PIN_DOUT)
+		return dev_err_probe(dev, -EINVAL,
+				     "Cannot use DOUT as interrupt pin\n");
+
+	if (st->int_pin_sel == AD4130_INT_PIN_P2)
+		st->pins_fn[AD4130_AIN3_P2] = AD4130_PIN_FN_SPECIAL;
+
+	device_property_read_u32(dev, "adi,ext-clk-freq-hz", &ext_clk_freq);
+	if (ext_clk_freq != AD4130_MCLK_FREQ_153_6KHZ &&
+	    ext_clk_freq != AD4130_MCLK_FREQ_76_8KHZ)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid external clock frequency %u\n",
+				     ext_clk_freq);
+
+	if (st->mclk && ext_clk_freq == AD4130_MCLK_FREQ_153_6KHZ)
+		st->mclk_sel = AD4130_MCLK_153_6KHZ_EXT;
+	else if (st->mclk)
+		st->mclk_sel = AD4130_MCLK_76_8KHZ_EXT;
+	else
+		st->mclk_sel = AD4130_MCLK_76_8KHZ;
+
+	if (st->int_pin_sel == AD4130_INT_PIN_CLK &&
+	    st->mclk_sel != AD4130_MCLK_76_8KHZ)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid clock %u for interrupt pin %u\n",
+				     st->mclk_sel, st->int_pin_sel);
+
+	st->int_ref_uv = AD4130_INT_REF_2_5V;
+
+	/*
+	 * When the AVDD supply is set to below 2.5V the internal reference of
+	 * 1.25V should be selected.
+	 * See datasheet page 37, section ADC REFERENCE.
+	 */
+	avdd_uv = regulator_get_voltage(st->regulators[0].consumer);
+	if (avdd_uv > 0 && avdd_uv < AD4130_INT_REF_2_5V)
+		st->int_ref_uv = AD4130_INT_REF_1_25V;
+
+	st->bipolar = device_property_read_bool(dev, "adi,bipolar");
+
+	ret = device_property_count_u32(dev, "adi,vbias-pins");
+	if (ret > 0) {
+		if (ret > AD4130_MAX_ANALOG_PINS)
+			return dev_err_probe(dev, -EINVAL,
+					     "Too many vbias pins %u\n", ret);
+
+		st->num_vbias_pins = ret;
+
+		ret = device_property_read_u32_array(dev, "adi,vbias-pins",
+						     st->vbias_pins,
+						     st->num_vbias_pins);
+		if (ret)
+			return dev_err_probe(dev, ret,
+					     "Failed to read vbias pins\n");
+
+		ret = ad4130_validate_vbias_pins(st, st->vbias_pins,
+						 st->num_vbias_pins);
+		if (ret)
+			return ret;
+	}
+
+	ret = ad4130_parse_fw_children(indio_dev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static void ad4130_fill_scale_tbls(struct ad4130_state *st)
+{
+	unsigned int pow = ad4130_resolution(st) - st->bipolar;
+	unsigned int i, j;
+
+	for (i = 0; i < AD4130_REF_SEL_MAX; i++) {
+		int ret;
+		u64 nv;
+
+		ret = ad4130_get_ref_voltage(st, i);
+		if (ret < 0)
+			continue;
+
+		nv = (u64)ret * NANO;
+
+		for (j = 0; j < AD4130_MAX_PGA; j++)
+			st->scale_tbls[i][j][1] = div_u64(nv >> (pow + j), MILLI);
+	}
+}
+
+static void ad4130_clk_disable_unprepare(void *clk)
+{
+	clk_disable_unprepare(clk);
+}
+
+static int ad4130_set_mclk_sel(struct ad4130_state *st,
+			       enum ad4130_mclk_sel mclk_sel)
+{
+	return regmap_update_bits(st->regmap, AD4130_ADC_CONTROL_REG,
+				 AD4130_ADC_CONTROL_MCLK_SEL_MASK,
+				 FIELD_PREP(AD4130_ADC_CONTROL_MCLK_SEL_MASK,
+					    mclk_sel));
+}
+
+static unsigned long ad4130_int_clk_recalc_rate(struct clk_hw *hw,
+						unsigned long parent_rate)
+{
+	return AD4130_MCLK_FREQ_76_8KHZ;
+}
+
+static int ad4130_int_clk_is_enabled(struct clk_hw *hw)
+{
+	struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw);
+
+	return st->mclk_sel == AD4130_MCLK_76_8KHZ_OUT;
+}
+
+static int ad4130_int_clk_prepare(struct clk_hw *hw)
+{
+	struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw);
+	int ret;
+
+	ret = ad4130_set_mclk_sel(st, AD4130_MCLK_76_8KHZ_OUT);
+	if (ret)
+		return ret;
+
+	st->mclk_sel = AD4130_MCLK_76_8KHZ_OUT;
+
+	return 0;
+}
+
+static void ad4130_int_clk_unprepare(struct clk_hw *hw)
+{
+	struct ad4130_state *st = container_of(hw, struct ad4130_state, int_clk_hw);
+	int ret;
+
+	ret = ad4130_set_mclk_sel(st, AD4130_MCLK_76_8KHZ);
+	if (ret)
+		return;
+
+	st->mclk_sel = AD4130_MCLK_76_8KHZ;
+}
+
+static const struct clk_ops ad4130_int_clk_ops = {
+	.recalc_rate = ad4130_int_clk_recalc_rate,
+	.is_enabled = ad4130_int_clk_is_enabled,
+	.prepare = ad4130_int_clk_prepare,
+	.unprepare = ad4130_int_clk_unprepare,
+};
+
+static int ad4130_setup_int_clk(struct ad4130_state *st)
+{
+	struct device *dev = &st->spi->dev;
+	struct device_node *of_node = dev_of_node(dev);
+	struct clk_init_data init;
+	const char *clk_name;
+	struct clk *clk;
+
+	if (st->int_pin_sel == AD4130_INT_PIN_CLK ||
+	    st->mclk_sel != AD4130_MCLK_76_8KHZ)
+		return 0;
+
+	if (!of_node)
+		return 0;
+
+	clk_name = of_node->name;
+	of_property_read_string(of_node, "clock-output-names", &clk_name);
+
+	init.name = clk_name;
+	init.ops = &ad4130_int_clk_ops;
+
+	st->int_clk_hw.init = &init;
+	clk = devm_clk_register(dev, &st->int_clk_hw);
+	if (IS_ERR(clk))
+		return PTR_ERR(clk);
+
+	return of_clk_add_provider(of_node, of_clk_src_simple_get, clk);
+}
+
+static int ad4130_setup(struct iio_dev *indio_dev)
+{
+	struct ad4130_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	unsigned int int_ref_val;
+	unsigned long rate = AD4130_MCLK_FREQ_76_8KHZ;
+	unsigned int val;
+	unsigned int i;
+	int ret;
+
+	if (st->mclk_sel == AD4130_MCLK_153_6KHZ_EXT)
+		rate = AD4130_MCLK_FREQ_153_6KHZ;
+
+	ret = clk_set_rate(st->mclk, rate);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(st->mclk);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev, ad4130_clk_disable_unprepare,
+				       st->mclk);
+	if (ret)
+		return ret;
+
+	if (st->int_ref_uv == AD4130_INT_REF_2_5V)
+		int_ref_val = AD4130_INT_REF_VAL_2_5V;
+	else
+		int_ref_val = AD4130_INT_REF_VAL_1_25V;
+
+	/* Switch to SPI 4-wire mode. */
+	val =  FIELD_PREP(AD4130_ADC_CONTROL_CSB_EN_MASK, 1);
+	val |= FIELD_PREP(AD4130_ADC_CONTROL_BIPOLAR_MASK, st->bipolar);
+	val |= FIELD_PREP(AD4130_ADC_CONTROL_INT_REF_EN_MASK, st->int_ref_en);
+	val |= FIELD_PREP(AD4130_ADC_CONTROL_MODE_MASK, AD4130_MODE_IDLE);
+	val |= FIELD_PREP(AD4130_ADC_CONTROL_MCLK_SEL_MASK, st->mclk_sel);
+	val |= FIELD_PREP(AD4130_ADC_CONTROL_INT_REF_VAL_MASK, int_ref_val);
+
+	ret = regmap_write(st->regmap, AD4130_ADC_CONTROL_REG, val);
+	if (ret)
+		return ret;
+
+	/*
+	 * Configure all GPIOs for output. If configured, the interrupt function
+	 * of P2 takes priority over the GPIO out function.
+	 */
+	val =  AD4130_IO_CONTROL_GPIO_CTRL_MASK;
+	val |= FIELD_PREP(AD4130_IO_CONTROL_INT_PIN_SEL_MASK, st->int_pin_sel);
+
+	ret = regmap_write(st->regmap, AD4130_IO_CONTROL_REG, val);
+	if (ret)
+		return ret;
+
+	val = 0;
+	for (i = 0; i < st->num_vbias_pins; i++)
+		val |= BIT(st->vbias_pins[i]);
+
+	ret = regmap_write(st->regmap, AD4130_VBIAS_REG, val);
+	if (ret)
+		return ret;
+
+	ret = regmap_update_bits(st->regmap, AD4130_FIFO_CONTROL_REG,
+				 AD4130_FIFO_CONTROL_HEADER_MASK, 0);
+	if (ret)
+		return ret;
+
+	/* FIFO watermark interrupt starts out as enabled, disable it. */
+	ret = ad4130_set_watermark_interrupt_en(st, false);
+	if (ret)
+		return ret;
+
+	/* Setup channels. */
+	for (i = 0; i < indio_dev->num_channels; i++) {
+		struct ad4130_chan_info *chan_info = &st->chans_info[i];
+		struct iio_chan_spec *chan = &st->chans[i];
+		unsigned int val;
+
+		val = FIELD_PREP(AD4130_CHANNEL_AINP_MASK, chan->channel) |
+		      FIELD_PREP(AD4130_CHANNEL_AINM_MASK, chan->channel2) |
+		      FIELD_PREP(AD4130_CHANNEL_IOUT1_MASK, chan_info->iout0) |
+		      FIELD_PREP(AD4130_CHANNEL_IOUT2_MASK, chan_info->iout1);
+
+		ret = regmap_write(st->regmap, AD4130_CHANNEL_X_REG(i), val);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int ad4130_soft_reset(struct ad4130_state *st)
+{
+	int ret;
+
+	ret = spi_write(st->spi, st->reset_buf, sizeof(st->reset_buf));
+	if (ret)
+		return ret;
+
+	fsleep(AD4130_RESET_SLEEP_US);
+
+	return 0;
+}
+
+static void ad4130_disable_regulators(void *data)
+{
+	struct ad4130_state *st = data;
+
+	regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators);
+}
+
+static int ad4130_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct iio_dev *indio_dev;
+	struct ad4130_state *st;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*st));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+
+	memset(st->reset_buf, 0xff, sizeof(st->reset_buf));
+	init_completion(&st->completion);
+	mutex_init(&st->lock);
+	st->spi = spi;
+
+	/*
+	 * Xfer:   [ XFR1 ] [         XFR2         ]
+	 * Master:  0x7D N   ......................
+	 * Slave:   ......   DATA1 DATA2 ... DATAN
+	 */
+	st->fifo_tx_buf[0] = AD4130_COMMS_READ_MASK | AD4130_FIFO_DATA_REG;
+	st->fifo_xfer[0].tx_buf = st->fifo_tx_buf;
+	st->fifo_xfer[0].len = sizeof(st->fifo_tx_buf);
+	st->fifo_xfer[1].rx_buf = st->fifo_rx_buf;
+	spi_message_init_with_transfers(&st->fifo_msg, st->fifo_xfer,
+					ARRAY_SIZE(st->fifo_xfer));
+
+	indio_dev->name = AD4130_NAME;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &ad4130_info;
+
+	st->regmap = devm_regmap_init(dev, NULL, st, &ad4130_regmap_config);
+	if (IS_ERR(st->regmap))
+		return PTR_ERR(st->regmap);
+
+	st->regulators[0].supply = "avdd";
+	st->regulators[1].supply = "iovdd";
+	st->regulators[2].supply = "refin1";
+	st->regulators[3].supply = "refin2";
+
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(st->regulators),
+				      st->regulators);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to get regulators\n");
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to enable regulators\n");
+
+	ret = devm_add_action_or_reset(dev, ad4130_disable_regulators, st);
+	if (ret)
+		return dev_err_probe(dev, ret,
+				     "Failed to add regulators disable action\n");
+
+	ret = ad4130_soft_reset(st);
+	if (ret)
+		return ret;
+
+	ret = ad4310_parse_fw(indio_dev);
+	if (ret)
+		return ret;
+
+	ret = ad4130_setup(indio_dev);
+	if (ret)
+		return ret;
+
+	ret = ad4130_setup_int_clk(st);
+	if (ret)
+		return ret;
+
+	ad4130_fill_scale_tbls(st);
+
+	st->gc.owner = THIS_MODULE;
+	st->gc.label = AD4130_NAME;
+	st->gc.base = -1;
+	st->gc.ngpio = AD4130_MAX_GPIOS;
+	st->gc.parent = dev;
+	st->gc.can_sleep = true;
+	st->gc.init_valid_mask = ad4130_gpio_init_valid_mask;
+	st->gc.get_direction = ad4130_gpio_get_direction;
+	st->gc.set = ad4130_gpio_set;
+
+	ret = devm_gpiochip_add_data(dev, &st->gc, st);
+	if (ret)
+		return ret;
+
+	ret = devm_iio_kfifo_buffer_setup_ext(dev, indio_dev,
+					      &ad4130_buffer_ops,
+					      ad4130_fifo_attributes);
+	if (ret)
+		return ret;
+
+	ret = devm_request_threaded_irq(dev, spi->irq, NULL,
+					ad4130_irq_handler, IRQF_ONESHOT,
+					indio_dev->name, indio_dev);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to request irq\n");
+
+	/*
+	 * When the chip enters FIFO mode, IRQ polarity is inverted.
+	 * When the chip exits FIFO mode, IRQ polarity returns to normal.
+	 * See datasheet pages: 65, FIFO Watermark Interrupt section,
+	 * and 71, Bit Descriptions for STATUS Register, RDYB.
+	 * Cache the normal and inverted IRQ triggers to set them when
+	 * entering and exiting FIFO mode.
+	 */
+	st->irq_trigger = irq_get_trigger_type(spi->irq);
+	if (st->irq_trigger & IRQF_TRIGGER_RISING)
+		st->inv_irq_trigger = IRQF_TRIGGER_FALLING;
+	else if (st->irq_trigger & IRQF_TRIGGER_FALLING)
+		st->inv_irq_trigger = IRQF_TRIGGER_RISING;
+	else
+		return dev_err_probe(dev, -EINVAL, "Invalid irq flags: %u\n",
+				     st->irq_trigger);
+
+	return devm_iio_device_register(dev, indio_dev);
+}
+
+static const struct of_device_id ad4130_of_match[] = {
+	{
+		.compatible = "adi,ad4130",
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ad4130_of_match);
+
+static struct spi_driver ad4130_driver = {
+	.driver = {
+		.name = AD4130_NAME,
+		.of_match_table = ad4130_of_match,
+	},
+	.probe = ad4130_probe,
+};
+module_spi_driver(ad4130_driver);
+
+MODULE_AUTHOR("Cosmin Tanislav <cosmin.tanislav@analog.com>");
+MODULE_DESCRIPTION("Analog Devices AD4130 SPI driver");
+MODULE_LICENSE("GPL");
-- 
GitLab


From e54ec96c7201507db1b2c053068cda32f3c42cc7 Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Mon, 17 Oct 2022 12:02:30 +0530
Subject: [PATCH 2299/4122] staging: iio: frequency: ad9834: merge unnecessary
 split lines

Improve code readability by merging unnecessary split lines that are
well within the code-style guidelines post merge.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Acked-by: Julia Lawall <julia.lawall@inria.fr>
Link: https://lore.kernel.org/r/Y0z2/qFe3kW96MTs@debian-BULLSEYE-live-builder-AMD64
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/frequency/ad9834.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/iio/frequency/ad9834.c b/drivers/staging/iio/frequency/ad9834.c
index 2b4267a87e65e..285df0e489a62 100644
--- a/drivers/staging/iio/frequency/ad9834.c
+++ b/drivers/staging/iio/frequency/ad9834.c
@@ -331,11 +331,9 @@ static IIO_DEV_ATTR_PHASE(0, 1, 0200, NULL, ad9834_write, AD9834_REG_PHASE1);
 static IIO_DEV_ATTR_PHASESYMBOL(0, 0200, NULL, ad9834_write, AD9834_PSEL);
 static IIO_CONST_ATTR_PHASE_SCALE(0, "0.0015339808"); /* 2PI/2^12 rad*/
 
-static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL,
-	ad9834_write, AD9834_PIN_SW);
+static IIO_DEV_ATTR_PINCONTROL_EN(0, 0200, NULL, ad9834_write, AD9834_PIN_SW);
 static IIO_DEV_ATTR_OUT_ENABLE(0, 0200, NULL, ad9834_write, AD9834_RESET);
-static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL,
-	ad9834_write, AD9834_OPBITEN);
+static IIO_DEV_ATTR_OUTY_ENABLE(0, 1, 0200, NULL, ad9834_write, AD9834_OPBITEN);
 static IIO_DEV_ATTR_OUT_WAVETYPE(0, 0, ad9834_store_wavetype, 0);
 static IIO_DEV_ATTR_OUT_WAVETYPE(0, 1, ad9834_store_wavetype, 1);
 
-- 
GitLab


From e9b96e18cf47133e51f4cb67d9dd9d163abc5f6d Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:33:56 +0100
Subject: [PATCH 2300/4122] iio: accel: adxl367: Use
 devm_regulator_bulk_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new
devm_regulator_bulk_get_enable() replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-2-jic23@kernel.org
---
 drivers/iio/accel/adxl367.c | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index 7c7d780407937..d7af3a006a444 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -160,8 +160,6 @@ struct adxl367_state {
 	struct device			*dev;
 	struct regmap			*regmap;
 
-	struct regulator_bulk_data	regulators[2];
-
 	/*
 	 * Synchronize access to members of driver state, and ensure atomicity
 	 * of consecutive regmap operations.
@@ -1487,16 +1485,10 @@ static int adxl367_setup(struct adxl367_state *st)
 	return adxl367_set_measure_en(st, true);
 }
 
-static void adxl367_disable_regulators(void *data)
-{
-	struct adxl367_state *st = data;
-
-	regulator_bulk_disable(ARRAY_SIZE(st->regulators), st->regulators);
-}
-
 int adxl367_probe(struct device *dev, const struct adxl367_ops *ops,
 		  void *context, struct regmap *regmap, int irq)
 {
+	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct iio_dev *indio_dev;
 	struct adxl367_state *st;
 	int ret;
@@ -1520,25 +1512,13 @@ int adxl367_probe(struct device *dev, const struct adxl367_ops *ops,
 	indio_dev->info = &adxl367_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
-	st->regulators[0].supply = "vdd";
-	st->regulators[1].supply = "vddio";
-
-	ret = devm_regulator_bulk_get(st->dev, ARRAY_SIZE(st->regulators),
-				      st->regulators);
+	ret = devm_regulator_bulk_get_enable(st->dev,
+					     ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (ret)
 		return dev_err_probe(st->dev, ret,
 				     "Failed to get regulators\n");
 
-	ret = regulator_bulk_enable(ARRAY_SIZE(st->regulators), st->regulators);
-	if (ret)
-		return dev_err_probe(st->dev, ret,
-				     "Failed to enable regulators\n");
-
-	ret = devm_add_action_or_reset(st->dev, adxl367_disable_regulators, st);
-	if (ret)
-		return dev_err_probe(st->dev, ret,
-				     "Failed to add regulators disable action\n");
-
 	ret = regmap_write(st->regmap, ADXL367_REG_RESET, ADXL367_RESET_CODE);
 	if (ret)
 		return ret;
-- 
GitLab


From 5b30e739ceaf37aa5817195a20cd1fa7a4607623 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:33:58 +0100
Subject: [PATCH 2301/4122] iio: accel: fxls8962af: Use
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Sean Nyekjaer <sean@geanix.com>
Reviewed-by: Sean Nyekjaer <sean@geanix.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-4-jic23@kernel.org
---
 drivers/iio/accel/fxls8962af-core.c | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/drivers/iio/accel/fxls8962af-core.c b/drivers/iio/accel/fxls8962af-core.c
index bf259db281f59..0d672b1469e8d 100644
--- a/drivers/iio/accel/fxls8962af-core.c
+++ b/drivers/iio/accel/fxls8962af-core.c
@@ -159,7 +159,6 @@ struct fxls8962af_chip_info {
 struct fxls8962af_data {
 	struct regmap *regmap;
 	const struct fxls8962af_chip_info *chip_info;
-	struct regulator *vdd_reg;
 	struct {
 		__le16 channels[3];
 		s64 ts __aligned(8);
@@ -1051,13 +1050,6 @@ static irqreturn_t fxls8962af_interrupt(int irq, void *p)
 	return IRQ_NONE;
 }
 
-static void fxls8962af_regulator_disable(void *data_ptr)
-{
-	struct fxls8962af_data *data = data_ptr;
-
-	regulator_disable(data->vdd_reg);
-}
-
 static void fxls8962af_pm_disable(void *dev_ptr)
 {
 	struct device *dev = dev_ptr;
@@ -1171,20 +1163,10 @@ int fxls8962af_core_probe(struct device *dev, struct regmap *regmap, int irq)
 	if (ret)
 		return ret;
 
-	data->vdd_reg = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(data->vdd_reg))
-		return dev_err_probe(dev, PTR_ERR(data->vdd_reg),
-				     "Failed to get vdd regulator\n");
-
-	ret = regulator_enable(data->vdd_reg);
-	if (ret) {
-		dev_err(dev, "Failed to enable vdd regulator: %d\n", ret);
-		return ret;
-	}
-
-	ret = devm_add_action_or_reset(dev, fxls8962af_regulator_disable, data);
+	ret = devm_regulator_get_enable(dev, "vdd");
 	if (ret)
-		return ret;
+		return dev_err_probe(dev, ret,
+				     "Failed to get vdd regulator\n");
 
 	ret = regmap_read(data->regmap, FXLS8962AF_WHO_AM_I, &reg);
 	if (ret)
-- 
GitLab


From 1fa4ff1f249983b38394ee79379e028c1c501ae5 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:33:59 +0100
Subject: [PATCH 2302/4122] iio: accel: kxcjk-1013: Use
 devm_regulator_bulk_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new
devm_regulator_bulk_get_enable() replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-5-jic23@kernel.org
---
 drivers/iio/accel/kxcjk-1013.c | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index adc66b3615c0b..e626b6fa8a36a 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -241,7 +241,6 @@ enum kxcjk1013_axis {
 };
 
 struct kxcjk1013_data {
-	struct regulator_bulk_data regulators[2];
 	struct i2c_client *client;
 	struct iio_trigger *dready_trig;
 	struct iio_trigger *motion_trig;
@@ -1425,16 +1424,10 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev,
 	return dev_name(dev);
 }
 
-static void kxcjk1013_disable_regulators(void *d)
-{
-	struct kxcjk1013_data *data = d;
-
-	regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators);
-}
-
 static int kxcjk1013_probe(struct i2c_client *client,
 			   const struct i2c_device_id *id)
 {
+	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct kxcjk1013_data *data;
 	struct iio_dev *indio_dev;
 	struct kxcjk_1013_platform_data *pdata;
@@ -1461,22 +1454,12 @@ static int kxcjk1013_probe(struct i2c_client *client,
 			return ret;
 	}
 
-	data->regulators[0].supply = "vdd";
-	data->regulators[1].supply = "vddio";
-	ret = devm_regulator_bulk_get(&client->dev, ARRAY_SIZE(data->regulators),
-				      data->regulators);
+	ret = devm_regulator_bulk_get_enable(&client->dev,
+					     ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (ret)
 		return dev_err_probe(&client->dev, ret, "Failed to get regulators\n");
 
-	ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators),
-				    data->regulators);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(&client->dev, kxcjk1013_disable_regulators, data);
-	if (ret)
-		return ret;
-
 	/*
 	 * A typical delay of 10ms is required for powering up
 	 * according to the data sheets of supported chips.
-- 
GitLab


From eefa008b1677244d73ba47920172be36570c87c1 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:00 +0100
Subject: [PATCH 2303/4122] iio: accel: msa311: Use devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Dmitry Rokosov <DDRokosov@sberdevices.ru>
Reviewed-by: Dmitry Rokosov <ddrokosov@sberdevices.ru>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-6-jic23@kernel.org
---
 drivers/iio/accel/msa311.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/iio/accel/msa311.c b/drivers/iio/accel/msa311.c
index 2fded37591717..af94d3adf6d81 100644
--- a/drivers/iio/accel/msa311.c
+++ b/drivers/iio/accel/msa311.c
@@ -351,7 +351,6 @@ static const struct regmap_config msa311_regmap_config = {
  * @chip_name: Chip name in the format "msa311-%02x" % partid
  * @new_data_trig: Optional NEW_DATA interrupt driven trigger used
  *                 to notify external consumers a new sample is ready
- * @vdd: Optional external voltage regulator for the device power supply
  */
 struct msa311_priv {
 	struct regmap *regs;
@@ -362,7 +361,6 @@ struct msa311_priv {
 	char *chip_name;
 
 	struct iio_trigger *new_data_trig;
-	struct regulator *vdd;
 };
 
 enum msa311_si {
@@ -1146,11 +1144,6 @@ static void msa311_powerdown(void *msa311)
 	msa311_set_pwr_mode(msa311, MSA311_PWR_MODE_SUSPEND);
 }
 
-static void msa311_vdd_disable(void *vdd)
-{
-	regulator_disable(vdd);
-}
-
 static int msa311_probe(struct i2c_client *i2c)
 {
 	struct device *dev = &i2c->dev;
@@ -1173,19 +1166,9 @@ static int msa311_probe(struct i2c_client *i2c)
 
 	mutex_init(&msa311->lock);
 
-	msa311->vdd = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(msa311->vdd))
-		return dev_err_probe(dev, PTR_ERR(msa311->vdd),
-				     "can't get vdd supply\n");
-
-	err = regulator_enable(msa311->vdd);
+	err = devm_regulator_get_enable(dev, "vdd");
 	if (err)
-		return dev_err_probe(dev, err, "can't enable vdd supply\n");
-
-	err = devm_add_action_or_reset(dev, msa311_vdd_disable, msa311->vdd);
-	if (err)
-		return dev_err_probe(dev, err,
-				     "can't add vdd disable action\n");
+		return dev_err_probe(dev, err, "can't get vdd supply\n");
 
 	err = msa311_check_partid(msa311);
 	if (err)
-- 
GitLab


From 72ce527c7bb599ac1d64ec5393f66a673b75de6b Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:01 +0100
Subject: [PATCH 2304/4122] iio: cdc: ad7150: Use devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-7-jic23@kernel.org
---
 drivers/iio/cdc/ad7150.c | 18 +-----------------
 1 file changed, 1 insertion(+), 17 deletions(-)

diff --git a/drivers/iio/cdc/ad7150.c b/drivers/iio/cdc/ad7150.c
index ebe112b4618b0..1113745890cae 100644
--- a/drivers/iio/cdc/ad7150.c
+++ b/drivers/iio/cdc/ad7150.c
@@ -536,19 +536,11 @@ static const struct iio_info ad7150_info_no_irq = {
 	.read_raw = &ad7150_read_raw,
 };
 
-static void ad7150_reg_disable(void *data)
-{
-	struct regulator *reg = data;
-
-	regulator_disable(reg);
-}
-
 static int ad7150_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
 	struct ad7150_chip_info *chip;
 	struct iio_dev *indio_dev;
-	struct regulator *reg;
 	int ret;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*chip));
@@ -563,15 +555,7 @@ static int ad7150_probe(struct i2c_client *client,
 
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
-	reg = devm_regulator_get(&client->dev, "vdd");
-	if (IS_ERR(reg))
-		return PTR_ERR(reg);
-
-	ret = regulator_enable(reg);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(&client->dev, ad7150_reg_disable, reg);
+	ret = devm_regulator_get_enable(&client->dev, "vdd");
 	if (ret)
 		return ret;
 
-- 
GitLab


From 9e855d77b1ec57704d23e25761a97e6e64abed66 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:02 +0100
Subject: [PATCH 2305/4122] iio: st_sensors: core and lsm9ds0 switch to
 devm_regulator_bulk_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These drivers only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The two regulators were handled
separately so also switch to bulk registration.
The new devm_regulator_bulk_get_enable() replaces all this boilerplate
code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Linus Walleij <linus.walleij@linaro.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-8-jic23@kernel.org
---
 .../iio/common/st_sensors/st_sensors_core.c   | 39 ++---------
 drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c  | 65 ++-----------------
 include/linux/iio/common/st_sensors.h         |  4 --
 3 files changed, 14 insertions(+), 94 deletions(-)

diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 35720c64fea8f..c77d7bdcc1216 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -219,47 +219,22 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
 }
 EXPORT_SYMBOL_NS(st_sensors_set_axis_enable, IIO_ST_SENSORS);
 
-static void st_reg_disable(void *reg)
-{
-	regulator_disable(reg);
-}
 
 int st_sensors_power_enable(struct iio_dev *indio_dev)
 {
-	struct st_sensor_data *pdata = iio_priv(indio_dev);
+	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct device *parent = indio_dev->dev.parent;
 	int err;
 
 	/* Regulators not mandatory, but if requested we should enable them. */
-	pdata->vdd = devm_regulator_get(parent, "vdd");
-	if (IS_ERR(pdata->vdd))
-		return dev_err_probe(&indio_dev->dev, PTR_ERR(pdata->vdd),
-				     "unable to get Vdd supply\n");
-
-	err = regulator_enable(pdata->vdd);
-	if (err != 0) {
-		dev_warn(&indio_dev->dev,
-			 "Failed to enable specified Vdd supply\n");
-		return err;
-	}
-
-	err = devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd);
+	err = devm_regulator_bulk_get_enable(parent,
+					     ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (err)
-		return err;
+		return dev_err_probe(&indio_dev->dev, err,
+				     "unable to enable supplies\n");
 
-	pdata->vdd_io = devm_regulator_get(parent, "vddio");
-	if (IS_ERR(pdata->vdd_io))
-		return dev_err_probe(&indio_dev->dev, PTR_ERR(pdata->vdd_io),
-				     "unable to get Vdd_IO supply\n");
-
-	err = regulator_enable(pdata->vdd_io);
-	if (err != 0) {
-		dev_warn(&indio_dev->dev,
-			 "Failed to enable specified Vdd_IO supply\n");
-		return err;
-	}
-
-	return devm_add_action_or_reset(parent, st_reg_disable, pdata->vdd_io);
+	return 0;
 }
 EXPORT_SYMBOL_NS(st_sensors_power_enable, IIO_ST_SENSORS);
 
diff --git a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
index ae7bc815382fb..e887b45cdbcd7 100644
--- a/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
+++ b/drivers/iio/imu/st_lsm9ds0/st_lsm9ds0_core.c
@@ -18,58 +18,6 @@
 
 #include "st_lsm9ds0.h"
 
-static int st_lsm9ds0_power_enable(struct device *dev, struct st_lsm9ds0 *lsm9ds0)
-{
-	int ret;
-
-	/* Regulators not mandatory, but if requested we should enable them. */
-	lsm9ds0->vdd = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(lsm9ds0->vdd))
-		return dev_err_probe(dev, PTR_ERR(lsm9ds0->vdd),
-				     "unable to get Vdd supply\n");
-
-	ret = regulator_enable(lsm9ds0->vdd);
-	if (ret) {
-		dev_warn(dev, "Failed to enable specified Vdd supply\n");
-		return ret;
-	}
-
-	lsm9ds0->vdd_io = devm_regulator_get(dev, "vddio");
-	if (IS_ERR(lsm9ds0->vdd_io)) {
-		regulator_disable(lsm9ds0->vdd);
-		return dev_err_probe(dev, PTR_ERR(lsm9ds0->vdd_io),
-				     "unable to get Vdd_IO supply\n");
-	}
-	ret = regulator_enable(lsm9ds0->vdd_io);
-	if (ret) {
-		dev_warn(dev, "Failed to enable specified Vdd_IO supply\n");
-		regulator_disable(lsm9ds0->vdd);
-		return ret;
-	}
-
-	return 0;
-}
-
-static void st_lsm9ds0_power_disable(void *data)
-{
-	struct st_lsm9ds0 *lsm9ds0 = data;
-
-	regulator_disable(lsm9ds0->vdd_io);
-	regulator_disable(lsm9ds0->vdd);
-}
-
-static int devm_st_lsm9ds0_power_enable(struct st_lsm9ds0 *lsm9ds0)
-{
-	struct device *dev = lsm9ds0->dev;
-	int ret;
-
-	ret = st_lsm9ds0_power_enable(dev, lsm9ds0);
-	if (ret)
-		return ret;
-
-	return devm_add_action_or_reset(dev, st_lsm9ds0_power_disable, lsm9ds0);
-}
-
 static int st_lsm9ds0_probe_accel(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap)
 {
 	const struct st_sensor_settings *settings;
@@ -92,8 +40,6 @@ static int st_lsm9ds0_probe_accel(struct st_lsm9ds0 *lsm9ds0, struct regmap *reg
 	data->sensor_settings = (struct st_sensor_settings *)settings;
 	data->irq = lsm9ds0->irq;
 	data->regmap = regmap;
-	data->vdd = lsm9ds0->vdd;
-	data->vdd_io = lsm9ds0->vdd_io;
 
 	return st_accel_common_probe(lsm9ds0->accel);
 }
@@ -120,19 +66,22 @@ static int st_lsm9ds0_probe_magn(struct st_lsm9ds0 *lsm9ds0, struct regmap *regm
 	data->sensor_settings = (struct st_sensor_settings *)settings;
 	data->irq = lsm9ds0->irq;
 	data->regmap = regmap;
-	data->vdd = lsm9ds0->vdd;
-	data->vdd_io = lsm9ds0->vdd_io;
 
 	return st_magn_common_probe(lsm9ds0->magn);
 }
 
 int st_lsm9ds0_probe(struct st_lsm9ds0 *lsm9ds0, struct regmap *regmap)
 {
+	struct device *dev = lsm9ds0->dev;
+	static const char * const regulator_names[] = { "vdd", "vddio" };
 	int ret;
 
-	ret = devm_st_lsm9ds0_power_enable(lsm9ds0);
+	/* Regulators not mandatory, but if requested we should enable them. */
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (ret)
-		return ret;
+		return dev_err_probe(dev, ret,
+				     "unable to enable Vdd supply\n");
 
 	/* Setup accelerometer device */
 	ret = st_lsm9ds0_probe_accel(lsm9ds0, regmap);
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index db4a1b260348c..f5f3ee57bc70a 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -224,8 +224,6 @@ struct st_sensor_settings {
  * @mount_matrix: The mounting matrix of the sensor.
  * @sensor_settings: Pointer to the specific sensor settings in use.
  * @current_fullscale: Maximum range of measure by the sensor.
- * @vdd: Pointer to sensor's Vdd power supply
- * @vdd_io: Pointer to sensor's Vdd-IO power supply
  * @regmap: Pointer to specific sensor regmap configuration.
  * @enabled: Status of the sensor (false->off, true->on).
  * @odr: Output data rate of the sensor [Hz].
@@ -244,8 +242,6 @@ struct st_sensor_data {
 	struct iio_mount_matrix mount_matrix;
 	struct st_sensor_settings *sensor_settings;
 	struct st_sensor_fullscale_avl *current_fullscale;
-	struct regulator *vdd;
-	struct regulator *vdd_io;
 	struct regmap *regmap;
 
 	bool enabled;
-- 
GitLab


From d86186a6e0247394c70239713994df5e2c66220c Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:03 +0100
Subject: [PATCH 2306/4122] iio: frequency: ad9523: Use
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Note that in event of an error on the devm_regulator_get() the driver
would have continued without enabling the regulator which is probably
not a good idea.  So here we handle any error as a reason to fail the
probe(). In theory this may expose breakage on a platform that was
previously papered over but it seems low risk.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Michael Hennerich <michael.hennerich@analog.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-9-jic23@kernel.org
---
 drivers/iio/frequency/ad9523.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/drivers/iio/frequency/ad9523.c b/drivers/iio/frequency/ad9523.c
index 97662ca1ca966..b391c6e27ab0d 100644
--- a/drivers/iio/frequency/ad9523.c
+++ b/drivers/iio/frequency/ad9523.c
@@ -265,7 +265,6 @@ enum {
 
 struct ad9523_state {
 	struct spi_device		*spi;
-	struct regulator		*reg;
 	struct ad9523_platform_data	*pdata;
 	struct iio_chan_spec		ad9523_channels[AD9523_NUM_CHAN];
 	struct gpio_desc		*pwrdown_gpio;
@@ -969,13 +968,6 @@ static int ad9523_setup(struct iio_dev *indio_dev)
 	return 0;
 }
 
-static void ad9523_reg_disable(void *data)
-{
-	struct regulator *reg = data;
-
-	regulator_disable(reg);
-}
-
 static int ad9523_probe(struct spi_device *spi)
 {
 	struct ad9523_platform_data *pdata = spi->dev.platform_data;
@@ -996,17 +988,9 @@ static int ad9523_probe(struct spi_device *spi)
 
 	mutex_init(&st->lock);
 
-	st->reg = devm_regulator_get(&spi->dev, "vcc");
-	if (!IS_ERR(st->reg)) {
-		ret = regulator_enable(st->reg);
-		if (ret)
-			return ret;
-
-		ret = devm_add_action_or_reset(&spi->dev, ad9523_reg_disable,
-					       st->reg);
-		if (ret)
-			return ret;
-	}
+	ret = devm_regulator_get_enable(&spi->dev, "vcc");
+	if (ret)
+		return ret;
 
 	st->pwrdown_gpio = devm_gpiod_get_optional(&spi->dev, "powerdown",
 		GPIOD_OUT_HIGH);
-- 
GitLab


From 2c97f7b404b8610ebca645d60a6ed1e68d08fbe3 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:04 +0100
Subject: [PATCH 2307/4122] iio: humidity: hts211: Use
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-10-jic23@kernel.org
---
 drivers/iio/humidity/hts221.h      |  2 --
 drivers/iio/humidity/hts221_core.c | 27 ++++-----------------------
 2 files changed, 4 insertions(+), 25 deletions(-)

diff --git a/drivers/iio/humidity/hts221.h b/drivers/iio/humidity/hts221.h
index cf3d8d2dccd6b..721359e226cb9 100644
--- a/drivers/iio/humidity/hts221.h
+++ b/drivers/iio/humidity/hts221.h
@@ -13,7 +13,6 @@
 #define HTS221_DEV_NAME		"hts221"
 
 #include <linux/iio/iio.h>
-#include <linux/regulator/consumer.h>
 
 enum hts221_sensor_type {
 	HTS221_SENSOR_H,
@@ -30,7 +29,6 @@ struct hts221_hw {
 	const char *name;
 	struct device *dev;
 	struct regmap *regmap;
-	struct regulator *vdd;
 
 	struct iio_trigger *trig;
 	int irq;
diff --git a/drivers/iio/humidity/hts221_core.c b/drivers/iio/humidity/hts221_core.c
index 517158307d8c2..2a413da87b760 100644
--- a/drivers/iio/humidity/hts221_core.c
+++ b/drivers/iio/humidity/hts221_core.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/pm.h>
 #include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
 #include <linux/bitfield.h>
 
 #include "hts221.h"
@@ -549,33 +550,17 @@ static const unsigned long hts221_scan_masks[] = {0x3, 0x0};
 
 static int hts221_init_regulators(struct device *dev)
 {
-	struct iio_dev *iio_dev = dev_get_drvdata(dev);
-	struct hts221_hw *hw = iio_priv(iio_dev);
 	int err;
 
-	hw->vdd = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(hw->vdd))
-		return dev_err_probe(dev, PTR_ERR(hw->vdd),
-				     "failed to get vdd regulator\n");
-
-	err = regulator_enable(hw->vdd);
-	if (err) {
-		dev_err(dev, "failed to enable vdd regulator: %d\n", err);
-		return err;
-	}
+	err = devm_regulator_get_enable(dev, "vdd");
+	if (err)
+		return dev_err_probe(dev, err, "failed to get vdd regulator\n");
 
 	msleep(50);
 
 	return 0;
 }
 
-static void hts221_chip_uninit(void *data)
-{
-	struct hts221_hw *hw = data;
-
-	regulator_disable(hw->vdd);
-}
-
 int hts221_probe(struct device *dev, int irq, const char *name,
 		 struct regmap *regmap)
 {
@@ -600,10 +585,6 @@ int hts221_probe(struct device *dev, int irq, const char *name,
 	if (err)
 		return err;
 
-	err = devm_add_action_or_reset(dev, hts221_chip_uninit, hw);
-	if (err)
-		return err;
-
 	err = hts221_check_whoami(hw);
 	if (err < 0)
 		return err;
-- 
GitLab


From c437c977c1d2116e0f3667de7222544f348032e4 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:05 +0100
Subject: [PATCH 2308/4122] iio: light: ltr501: Use
 devm_regulator_bulk_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power for some regulators on at probe and off
via a custom devm_add_action_or_reset() callback. The new
devm_regulator_bulk_get_enable() replaces all this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-11-jic23@kernel.org
---
 drivers/iio/light/ltr501.c | 27 ++++-----------------------
 1 file changed, 4 insertions(+), 23 deletions(-)

diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index 74a1ccda8b9c4..453b845ef265b 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -153,7 +153,6 @@ struct ltr501_chip_info {
 
 struct ltr501_data {
 	struct i2c_client *client;
-	struct regulator_bulk_data regulators[2];
 	struct mutex lock_als, lock_ps;
 	const struct ltr501_chip_info *chip_info;
 	u8 als_contr, ps_contr;
@@ -1415,13 +1414,6 @@ static const struct regmap_config ltr501_regmap_config = {
 	.volatile_reg = ltr501_is_volatile_reg,
 };
 
-static void ltr501_disable_regulators(void *d)
-{
-	struct ltr501_data *data = d;
-
-	regulator_bulk_disable(ARRAY_SIZE(data->regulators), data->regulators);
-}
-
 static int ltr501_powerdown(struct ltr501_data *data)
 {
 	return ltr501_write_contr(data, data->als_contr &
@@ -1443,6 +1435,7 @@ static const char *ltr501_match_acpi_device(struct device *dev, int *chip_idx)
 static int ltr501_probe(struct i2c_client *client,
 			const struct i2c_device_id *id)
 {
+	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct ltr501_data *data;
 	struct iio_dev *indio_dev;
 	struct regmap *regmap;
@@ -1466,25 +1459,13 @@ static int ltr501_probe(struct i2c_client *client,
 	mutex_init(&data->lock_als);
 	mutex_init(&data->lock_ps);
 
-	data->regulators[0].supply = "vdd";
-	data->regulators[1].supply = "vddio";
-	ret = devm_regulator_bulk_get(&client->dev,
-				      ARRAY_SIZE(data->regulators),
-				      data->regulators);
+	ret = devm_regulator_bulk_get_enable(&client->dev,
+					     ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (ret)
 		return dev_err_probe(&client->dev, ret,
 				     "Failed to get regulators\n");
 
-	ret = regulator_bulk_enable(ARRAY_SIZE(data->regulators),
-				    data->regulators);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(&client->dev,
-				       ltr501_disable_regulators, data);
-	if (ret)
-		return ret;
-
 	data->reg_it = devm_regmap_field_alloc(&client->dev, regmap,
 					       reg_field_it);
 	if (IS_ERR(data->reg_it)) {
-- 
GitLab


From b620be5f32621b953313056c396894818d5a2ed9 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:06 +0100
Subject: [PATCH 2309/4122] iio: light: noa1305: Use
 devm_regulator_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power on at probe and off via a custom
devm_add_action_or_reset() callback. The new devm_regulator_get_enable()
replaces this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Martyn Welch <martyn.welch@collabora.com>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-12-jic23@kernel.org
---
 drivers/iio/light/noa1305.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/drivers/iio/light/noa1305.c b/drivers/iio/light/noa1305.c
index ee81fe083e4cc..be3536b390fcb 100644
--- a/drivers/iio/light/noa1305.c
+++ b/drivers/iio/light/noa1305.c
@@ -46,7 +46,6 @@
 struct noa1305_priv {
 	struct i2c_client *client;
 	struct regmap *regmap;
-	struct regulator *vin_reg;
 };
 
 static int noa1305_measure(struct noa1305_priv *priv)
@@ -187,13 +186,6 @@ static const struct regmap_config noa1305_regmap_config = {
 	.writeable_reg = noa1305_writable_reg,
 };
 
-static void noa1305_reg_remove(void *data)
-{
-	struct noa1305_priv *priv = data;
-
-	regulator_disable(priv->vin_reg);
-}
-
 static int noa1305_probe(struct i2c_client *client,
 			 const struct i2c_device_id *id)
 {
@@ -216,23 +208,11 @@ static int noa1305_probe(struct i2c_client *client,
 
 	priv = iio_priv(indio_dev);
 
-	priv->vin_reg = devm_regulator_get(&client->dev, "vin");
-	if (IS_ERR(priv->vin_reg))
-		return dev_err_probe(&client->dev, PTR_ERR(priv->vin_reg),
+	ret = devm_regulator_get_enable(&client->dev, "vin");
+	if (ret)
+		return dev_err_probe(&client->dev, ret,
 				     "get regulator vin failed\n");
 
-	ret = regulator_enable(priv->vin_reg);
-	if (ret) {
-		dev_err(&client->dev, "enable regulator vin failed\n");
-		return ret;
-	}
-
-	ret = devm_add_action_or_reset(&client->dev, noa1305_reg_remove, priv);
-	if (ret) {
-		dev_err(&client->dev, "addition of devm action failed\n");
-		return ret;
-	}
-
 	i2c_set_clientdata(client, indio_dev);
 	priv->client = client;
 	priv->regmap = regmap;
-- 
GitLab


From 1db96143df6b81a402e9b88d08f04374f1353ed0 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 17:34:07 +0100
Subject: [PATCH 2310/4122] iio: proximity: sx_common: Use
 devm_regulator_bulk_get_enable()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver only turns the power for some regulators on at probe and off
via a custom devm_add_action_or_reset() callback. The new
devm_regulator_bulk_get_enable() replaces all this boilerplate code.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: Gwendal Grignou <gwendal@chromium.org>
Reviewed-by: Stephen Boyd <swboyd@chromium.org>
Reviewed-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016163409.320197-13-jic23@kernel.org
---
 drivers/iio/proximity/sx_common.c | 23 +++--------------------
 drivers/iio/proximity/sx_common.h |  2 --
 2 files changed, 3 insertions(+), 22 deletions(-)

diff --git a/drivers/iio/proximity/sx_common.c b/drivers/iio/proximity/sx_common.c
index d70a6b4f0bf86..eba9256730ec1 100644
--- a/drivers/iio/proximity/sx_common.c
+++ b/drivers/iio/proximity/sx_common.c
@@ -424,13 +424,6 @@ static const struct iio_buffer_setup_ops sx_common_buffer_setup_ops = {
 	.postdisable = sx_common_buffer_postdisable,
 };
 
-static void sx_common_regulator_disable(void *_data)
-{
-	struct sx_common_data *data = _data;
-
-	regulator_bulk_disable(ARRAY_SIZE(data->supplies), data->supplies);
-}
-
 #define SX_COMMON_SOFT_RESET				0xde
 
 static int sx_common_init_device(struct device *dev, struct iio_dev *indio_dev)
@@ -474,6 +467,7 @@ int sx_common_probe(struct i2c_client *client,
 		    const struct sx_common_chip_info *chip_info,
 		    const struct regmap_config *regmap_config)
 {
+	static const char * const regulator_names[] = { "vdd", "svdd" };
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct sx_common_data *data;
@@ -487,8 +481,6 @@ int sx_common_probe(struct i2c_client *client,
 
 	data->chip_info = chip_info;
 	data->client = client;
-	data->supplies[0].supply = "vdd";
-	data->supplies[1].supply = "svdd";
 	mutex_init(&data->mutex);
 	init_completion(&data->completion);
 
@@ -497,23 +489,14 @@ int sx_common_probe(struct i2c_client *client,
 		return dev_err_probe(dev, PTR_ERR(data->regmap),
 				     "Could init register map\n");
 
-	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(data->supplies),
-				      data->supplies);
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names),
+					     regulator_names);
 	if (ret)
 		return dev_err_probe(dev, ret, "Unable to get regulators\n");
 
-	ret = regulator_bulk_enable(ARRAY_SIZE(data->supplies), data->supplies);
-	if (ret)
-		return dev_err_probe(dev, ret, "Unable to enable regulators\n");
-
 	/* Must wait for Tpor time after initial power up */
 	usleep_range(1000, 1100);
 
-	ret = devm_add_action_or_reset(dev, sx_common_regulator_disable, data);
-	if (ret)
-		return dev_err_probe(dev, ret,
-				     "Unable to register regulators deleter\n");
-
 	ret = data->chip_info->ops.check_whoami(dev, indio_dev);
 	if (ret)
 		return dev_err_probe(dev, ret, "error reading WHOAMI\n");
diff --git a/drivers/iio/proximity/sx_common.h b/drivers/iio/proximity/sx_common.h
index 5d3edeb75f4e0..49d4517103b0f 100644
--- a/drivers/iio/proximity/sx_common.h
+++ b/drivers/iio/proximity/sx_common.h
@@ -102,7 +102,6 @@ struct sx_common_chip_info {
  * @trig:		IIO trigger object.
  * @regmap:		Register map.
  * @num_default_regs:	Number of default registers to set at init.
- * @supplies:		Power supplies object.
  * @chan_prox_stat:	Last reading of the proximity status for each channel.
  *			We only send an event to user space when this changes.
  * @trigger_enabled:	True when the device trigger is enabled.
@@ -120,7 +119,6 @@ struct sx_common_data {
 	struct iio_trigger *trig;
 	struct regmap *regmap;
 
-	struct regulator_bulk_data supplies[2];
 	unsigned long chan_prox_stat;
 	bool trigger_enabled;
 
-- 
GitLab


From ecff5cb471b5dc20b996378c0533bd5f34e85c89 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:40 +0200
Subject: [PATCH 2311/4122] iio: accel: adxl367: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-2-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl367_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/adxl367_i2c.c b/drivers/iio/accel/adxl367_i2c.c
index 3606efa25835e..070aad724abd4 100644
--- a/drivers/iio/accel/adxl367_i2c.c
+++ b/drivers/iio/accel/adxl367_i2c.c
@@ -41,8 +41,7 @@ static const struct adxl367_ops adxl367_i2c_ops = {
 	.read_fifo = adxl367_i2c_read_fifo,
 };
 
-static int adxl367_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int adxl367_i2c_probe(struct i2c_client *client)
 {
 	struct adxl367_i2c_state *st;
 	struct regmap *regmap;
@@ -78,7 +77,7 @@ static struct i2c_driver adxl367_i2c_driver = {
 		.name = "adxl367_i2c",
 		.of_match_table = adxl367_of_match,
 	},
-	.probe = adxl367_i2c_probe,
+	.probe_new = adxl367_i2c_probe,
 	.id_table = adxl367_i2c_id,
 };
 
-- 
GitLab


From a9e38f1ecc461c43aa26849394c552e38593ad38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:45 +0200
Subject: [PATCH 2312/4122] iio: accel: da311: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-7-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/da311.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/da311.c b/drivers/iio/accel/da311.c
index ec4e29d260f7e..080335fa2ad65 100644
--- a/drivers/iio/accel/da311.c
+++ b/drivers/iio/accel/da311.c
@@ -217,8 +217,7 @@ static void da311_disable(void *client)
 	da311_enable(client, false);
 }
 
-static int da311_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int da311_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -279,7 +278,7 @@ static struct i2c_driver da311_driver = {
 		.name = "da311",
 		.pm = pm_sleep_ptr(&da311_pm_ops),
 	},
-	.probe		= da311_probe,
+	.probe_new	= da311_probe,
 	.id_table	= da311_i2c_id,
 };
 
-- 
GitLab


From 72907238b193be0fc18f2859b7f4c6741b73aedc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:46 +0200
Subject: [PATCH 2313/4122] iio: accel: dmard06: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-8-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/dmard06.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/dmard06.c b/drivers/iio/accel/dmard06.c
index 4b69c8530f5e6..7390509aaac05 100644
--- a/drivers/iio/accel/dmard06.c
+++ b/drivers/iio/accel/dmard06.c
@@ -125,8 +125,7 @@ static const struct iio_info dmard06_info = {
 	.read_raw	= dmard06_read_raw,
 };
 
-static int dmard06_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int dmard06_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -218,7 +217,7 @@ static const struct of_device_id dmard06_of_match[] = {
 MODULE_DEVICE_TABLE(of, dmard06_of_match);
 
 static struct i2c_driver dmard06_driver = {
-	.probe = dmard06_probe,
+	.probe_new = dmard06_probe,
 	.id_table = dmard06_id,
 	.driver = {
 		.name = DMARD06_DRV_NAME,
-- 
GitLab


From d61f79d383f548df8b3b9da58ab687c7db1a93c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:47 +0200
Subject: [PATCH 2314/4122] iio: accel: dmard09: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-9-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/dmard09.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/dmard09.c b/drivers/iio/accel/dmard09.c
index cb0246ca72f32..4b7a537f617d5 100644
--- a/drivers/iio/accel/dmard09.c
+++ b/drivers/iio/accel/dmard09.c
@@ -88,8 +88,7 @@ static const struct iio_info dmard09_info = {
 	.read_raw	= dmard09_read_raw,
 };
 
-static int dmard09_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int dmard09_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -136,7 +135,7 @@ static struct i2c_driver dmard09_driver = {
 	.driver = {
 		.name = DMARD09_DRV_NAME
 	},
-	.probe = dmard09_probe,
+	.probe_new = dmard09_probe,
 	.id_table = dmard09_id,
 };
 
-- 
GitLab


From 5019025fc3a9869c438ad33ab18a4fdad46a996d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:48 +0200
Subject: [PATCH 2315/4122] iio: accel: dmard10: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-10-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/dmard10.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/dmard10.c b/drivers/iio/accel/dmard10.c
index 8ac62ec0a04a4..07e68aed8a13c 100644
--- a/drivers/iio/accel/dmard10.c
+++ b/drivers/iio/accel/dmard10.c
@@ -175,8 +175,7 @@ static void dmard10_shutdown_cleanup(void *client)
 	dmard10_shutdown(client);
 }
 
-static int dmard10_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int dmard10_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -242,7 +241,7 @@ static struct i2c_driver dmard10_driver = {
 		.name = "dmard10",
 		.pm = pm_sleep_ptr(&dmard10_pm_ops),
 	},
-	.probe		= dmard10_probe,
+	.probe_new	= dmard10_probe,
 	.id_table	= dmard10_i2c_id,
 };
 
-- 
GitLab


From 2ba423fefb1c87d080e3a7964ccfa7e929ed6f07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:50 +0200
Subject: [PATCH 2316/4122] iio: accel: kxsd9: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-12-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/kxsd9-i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/kxsd9-i2c.c b/drivers/iio/accel/kxsd9-i2c.c
index 61346ea8ef194..6b3683ddce36d 100644
--- a/drivers/iio/accel/kxsd9-i2c.c
+++ b/drivers/iio/accel/kxsd9-i2c.c
@@ -10,8 +10,7 @@
 
 #include "kxsd9.h"
 
-static int kxsd9_i2c_probe(struct i2c_client *i2c,
-			   const struct i2c_device_id *id)
+static int kxsd9_i2c_probe(struct i2c_client *i2c)
 {
 	static const struct regmap_config config = {
 		.reg_bits = 8,
@@ -55,7 +54,7 @@ static struct i2c_driver kxsd9_i2c_driver = {
 		.of_match_table = kxsd9_of_match,
 		.pm = pm_ptr(&kxsd9_dev_pm_ops),
 	},
-	.probe		= kxsd9_i2c_probe,
+	.probe_new	= kxsd9_i2c_probe,
 	.remove		= kxsd9_i2c_remove,
 	.id_table	= kxsd9_i2c_id,
 };
-- 
GitLab


From ded7a4f8f90286e63eefcd32443ac911d926e118 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:51 +0200
Subject: [PATCH 2317/4122] iio: accel: mc3230: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-13-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mc3230.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mc3230.c b/drivers/iio/accel/mc3230.c
index 2462000e05198..efc21871de42b 100644
--- a/drivers/iio/accel/mc3230.c
+++ b/drivers/iio/accel/mc3230.c
@@ -106,8 +106,7 @@ static const struct iio_info mc3230_info = {
 	.read_raw	= mc3230_read_raw,
 };
 
-static int mc3230_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int mc3230_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -191,7 +190,7 @@ static struct i2c_driver mc3230_driver = {
 		.name = "mc3230",
 		.pm = pm_sleep_ptr(&mc3230_pm_ops),
 	},
-	.probe		= mc3230_probe,
+	.probe_new	= mc3230_probe,
 	.remove		= mc3230_remove,
 	.id_table	= mc3230_i2c_id,
 };
-- 
GitLab


From 76403ea698adffb6661e083545f21e50fef5b3ac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:53 +0200
Subject: [PATCH 2318/4122] iio: accel: mma7660: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-15-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mma7660.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mma7660.c b/drivers/iio/accel/mma7660.c
index 85829990bbada..b279ca4dcdc05 100644
--- a/drivers/iio/accel/mma7660.c
+++ b/drivers/iio/accel/mma7660.c
@@ -169,8 +169,7 @@ static const struct iio_info mma7660_info = {
 	.attrs			= &mma7660_attribute_group,
 };
 
-static int mma7660_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int mma7660_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -267,7 +266,7 @@ static struct i2c_driver mma7660_driver = {
 		.of_match_table = mma7660_of_match,
 		.acpi_match_table = mma7660_acpi_id,
 	},
-	.probe		= mma7660_probe,
+	.probe_new	= mma7660_probe,
 	.remove		= mma7660_remove,
 	.id_table	= mma7660_i2c_id,
 };
-- 
GitLab


From a97d9d95a188d21f5d1cbedbbc7c990b00bf5744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:57 +0200
Subject: [PATCH 2319/4122] iio: accel: mxc4005: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-19-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mxc4005.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mxc4005.c b/drivers/iio/accel/mxc4005.c
index df600d2917c0a..b146fc82738f6 100644
--- a/drivers/iio/accel/mxc4005.c
+++ b/drivers/iio/accel/mxc4005.c
@@ -385,8 +385,7 @@ static int mxc4005_chip_init(struct mxc4005_data *data)
 	return 0;
 }
 
-static int mxc4005_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mxc4005_probe(struct i2c_client *client)
 {
 	struct mxc4005_data *data;
 	struct iio_dev *indio_dev;
@@ -489,7 +488,7 @@ static struct i2c_driver mxc4005_driver = {
 		.name = MXC4005_DRV_NAME,
 		.acpi_match_table = ACPI_PTR(mxc4005_acpi_match),
 	},
-	.probe		= mxc4005_probe,
+	.probe_new	= mxc4005_probe,
 	.id_table	= mxc4005_id,
 };
 
-- 
GitLab


From 76e4a118c85a7e93ff5264ff8d92f76f675be352 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:58 +0200
Subject: [PATCH 2320/4122] iio: accel: mxc6255: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-20-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mxc6255.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mxc6255.c b/drivers/iio/accel/mxc6255.c
index 9aeeadc420d31..aa2e660545f84 100644
--- a/drivers/iio/accel/mxc6255.c
+++ b/drivers/iio/accel/mxc6255.c
@@ -113,8 +113,7 @@ static const struct regmap_config mxc6255_regmap_config = {
 	.readable_reg = mxc6255_is_readable_reg,
 };
 
-static int mxc6255_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mxc6255_probe(struct i2c_client *client)
 {
 	struct mxc6255_data *data;
 	struct iio_dev *indio_dev;
@@ -184,7 +183,7 @@ static struct i2c_driver mxc6255_driver = {
 		.name = MXC6255_DRV_NAME,
 		.acpi_match_table = ACPI_PTR(mxc6255_acpi_match),
 	},
-	.probe		= mxc6255_probe,
+	.probe_new	= mxc6255_probe,
 	.id_table	= mxc6255_id,
 };
 
-- 
GitLab


From 3d8a49ebd6576b7803181e0a9fa4e7930f011f1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:22:59 +0200
Subject: [PATCH 2321/4122] iio: accel: stk8312: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-21-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/stk8312.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/stk8312.c b/drivers/iio/accel/stk8312.c
index 7b1d6fb692b37..68f680db75051 100644
--- a/drivers/iio/accel/stk8312.c
+++ b/drivers/iio/accel/stk8312.c
@@ -498,8 +498,7 @@ static const struct iio_buffer_setup_ops stk8312_buffer_setup_ops = {
 	.postdisable = stk8312_buffer_postdisable,
 };
 
-static int stk8312_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int stk8312_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -645,7 +644,7 @@ static struct i2c_driver stk8312_driver = {
 		.name = STK8312_DRIVER_NAME,
 		.pm = pm_sleep_ptr(&stk8312_pm_ops),
 	},
-	.probe =            stk8312_probe,
+	.probe_new =        stk8312_probe,
 	.remove =           stk8312_remove,
 	.id_table =         stk8312_i2c_id,
 };
-- 
GitLab


From d18e70608e01a5845fd75e7ec1956ce422b07184 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:23:00 +0200
Subject: [PATCH 2322/4122] iio: accel: stk8ba50: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-22-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/stk8ba50.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/stk8ba50.c b/drivers/iio/accel/stk8ba50.c
index 2f5e4ab2a6e7d..44f6e0fbdfcc6 100644
--- a/drivers/iio/accel/stk8ba50.c
+++ b/drivers/iio/accel/stk8ba50.c
@@ -379,8 +379,7 @@ static const struct iio_buffer_setup_ops stk8ba50_buffer_setup_ops = {
 	.postdisable = stk8ba50_buffer_postdisable,
 };
 
-static int stk8ba50_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int stk8ba50_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -544,7 +543,7 @@ static struct i2c_driver stk8ba50_driver = {
 		.pm = pm_sleep_ptr(&stk8ba50_pm_ops),
 		.acpi_match_table = ACPI_PTR(stk8ba50_acpi_id),
 	},
-	.probe =            stk8ba50_probe,
+	.probe_new =        stk8ba50_probe,
 	.remove =           stk8ba50_remove,
 	.id_table =         stk8ba50_i2c_id,
 };
-- 
GitLab


From 9492c00d83ee8b2b141e29acbcd3e56dd11aa436 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:23:01 +0200
Subject: [PATCH 2323/4122] iio: accel: st_magn: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-23-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/st_magn_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/st_magn_i2c.c b/drivers/iio/magnetometer/st_magn_i2c.c
index c5d8c303db4ef..b4098d3b38137 100644
--- a/drivers/iio/magnetometer/st_magn_i2c.c
+++ b/drivers/iio/magnetometer/st_magn_i2c.c
@@ -54,8 +54,7 @@ static const struct of_device_id st_magn_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, st_magn_of_match);
 
-static int st_magn_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int st_magn_i2c_probe(struct i2c_client *client)
 {
 	const struct st_sensor_settings *settings;
 	struct st_sensor_data *mdata;
@@ -107,7 +106,7 @@ static struct i2c_driver st_magn_driver = {
 		.name = "st-magn-i2c",
 		.of_match_table = st_magn_of_match,
 	},
-	.probe = st_magn_i2c_probe,
+	.probe_new = st_magn_i2c_probe,
 	.id_table = st_magn_id_table,
 };
 module_i2c_driver(st_magn_driver);
-- 
GitLab


From 2ca0b16f6ce23cf586c27a9139cf426b3f64d242 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Sun, 23 Oct 2022 15:23:02 +0200
Subject: [PATCH 2324/4122] iio: accel: vl6180: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221023132302.911644-24-u.kleine-koenig@pengutronix.de
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/vl6180.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/vl6180.c b/drivers/iio/light/vl6180.c
index d47a4f6f4e870..8b56df26c59e8 100644
--- a/drivers/iio/light/vl6180.c
+++ b/drivers/iio/light/vl6180.c
@@ -493,8 +493,7 @@ static int vl6180_init(struct vl6180_data *data)
 	return vl6180_hold(data, false);
 }
 
-static int vl6180_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int vl6180_probe(struct i2c_client *client)
 {
 	struct vl6180_data *data;
 	struct iio_dev *indio_dev;
@@ -539,7 +538,7 @@ static struct i2c_driver vl6180_driver = {
 		.name   = VL6180_DRV_NAME,
 		.of_match_table = vl6180_of_match,
 	},
-	.probe  = vl6180_probe,
+	.probe_new = vl6180_probe,
 	.id_table = vl6180_id,
 };
 
-- 
GitLab


From a95ccebde759f1f6a6070819cd43dce1ad06750d Mon Sep 17 00:00:00 2001
From: Crt Mori <cmo@melexis.com>
Date: Mon, 24 Oct 2022 12:22:05 +0200
Subject: [PATCH 2325/4122] iio: temperature: mlx90632 Style alignment for the
 driver

Changing and aligning the overall style of the driver with the recent
reviews. There is no functional change, only type generalization and
moving to the reverse Christmas tree for variable declarations.

Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/d59aad00891c1a64e044a0f5bc7d40e42d47e9c7.1666606912.git.cmo@melexis.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90632.c | 65 +++++++++++++++++-------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index 224db7513baa7..a17fe5f4967a6 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -219,11 +219,11 @@ static const struct regmap_config mlx90632_regmap = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap)
+static int mlx90632_pwr_set_sleep_step(struct regmap *regmap)
 {
 	struct mlx90632_data *data =
 		iio_priv(dev_get_drvdata(regmap_get_device(regmap)));
-	s32 ret;
+	int ret;
 
 	if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP)
 		return 0;
@@ -234,14 +234,14 @@ static s32 mlx90632_pwr_set_sleep_step(struct regmap *regmap)
 		return ret;
 
 	data->powerstatus = MLX90632_PWR_STATUS_SLEEP_STEP;
-	return ret;
+	return 0;
 }
 
-static s32 mlx90632_pwr_continuous(struct regmap *regmap)
+static int mlx90632_pwr_continuous(struct regmap *regmap)
 {
 	struct mlx90632_data *data =
 		iio_priv(dev_get_drvdata(regmap_get_device(regmap)));
-	s32 ret;
+	int ret;
 
 	if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS)
 		return 0;
@@ -252,7 +252,7 @@ static s32 mlx90632_pwr_continuous(struct regmap *regmap)
 		return ret;
 
 	data->powerstatus = MLX90632_PWR_STATUS_CONTINUOUS;
-	return ret;
+	return 0;
 }
 
 /**
@@ -444,8 +444,8 @@ static int mlx90632_channel_new_select(int perform_ret, uint8_t *channel_new,
 static int mlx90632_read_ambient_raw(struct regmap *regmap,
 				     s16 *ambient_new_raw, s16 *ambient_old_raw)
 {
-	int ret;
 	unsigned int read_tmp;
+	int ret;
 
 	ret = regmap_read(regmap, MLX90632_RAM_3(1), &read_tmp);
 	if (ret < 0)
@@ -464,11 +464,11 @@ static int mlx90632_read_object_raw(struct regmap *regmap,
 				    int perform_measurement_ret,
 				    s16 *object_new_raw, s16 *object_old_raw)
 {
-	int ret;
 	unsigned int read_tmp;
-	s16 read;
-	u8 channel = 0;
 	u8 channel_old = 0;
+	u8 channel = 0;
+	s16 read;
+	int ret;
 
 	ret = mlx90632_channel_new_select(perform_measurement_ret, &channel,
 					  &channel_old);
@@ -503,7 +503,8 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data,
 				     s16 *ambient_new_raw, s16 *ambient_old_raw,
 				     s16 *object_new_raw, s16 *object_old_raw)
 {
-	s32 ret, measurement;
+	s32 measurement;
+	int ret;
 
 	mutex_lock(&data->lock);
 	ret = mlx90632_set_meas_type(data, MLX90632_MTYP_MEDICAL);
@@ -512,24 +513,24 @@ static int mlx90632_read_all_channel(struct mlx90632_data *data,
 
 	switch (data->powerstatus) {
 	case MLX90632_PWR_STATUS_CONTINUOUS:
-		measurement = mlx90632_perform_measurement(data);
-		if (measurement < 0) {
-			ret = measurement;
+		ret = mlx90632_perform_measurement(data);
+		if (ret < 0)
 			goto read_unlock;
-		}
+
 		break;
 	case MLX90632_PWR_STATUS_SLEEP_STEP:
-		measurement = mlx90632_perform_measurement_burst(data);
-		if (measurement < 0) {
-			ret = measurement;
+		ret = mlx90632_perform_measurement_burst(data);
+		if (ret < 0)
 			goto read_unlock;
-		}
+
 		break;
 	default:
 		ret = -EOPNOTSUPP;
 		goto read_unlock;
 	}
 
+	measurement = ret; /* If we came here ret holds the measurement position */
+
 	ret = mlx90632_read_ambient_raw(data->regmap, ambient_new_raw,
 					ambient_old_raw);
 	if (ret < 0)
@@ -615,15 +616,21 @@ static int mlx90632_read_all_channel_extended(struct mlx90632_data *data, s16 *o
 	if (ret < 0)
 		goto read_unlock;
 
-	if (data->powerstatus == MLX90632_PWR_STATUS_CONTINUOUS) {
+	switch (data->powerstatus) {
+	case MLX90632_PWR_STATUS_CONTINUOUS:
 		ret = read_poll_timeout(mlx90632_perform_measurement, meas, meas == 19,
 					50000, 800000, false, data);
 		if (ret)
 			goto read_unlock;
-	} else if (data->powerstatus == MLX90632_PWR_STATUS_SLEEP_STEP) {
+		break;
+	case MLX90632_PWR_STATUS_SLEEP_STEP:
 		ret = mlx90632_perform_measurement_burst(data);
 		if (ret < 0)
 			goto read_unlock;
+		break;
+	default:
+		ret = -EOPNOTSUPP;
+		goto read_unlock;
 	}
 
 	ret = mlx90632_read_object_raw_extended(data->regmap, object_new_raw);
@@ -640,9 +647,9 @@ read_unlock:
 static int mlx90632_read_ee_register(struct regmap *regmap, u16 reg_lsb,
 				     s32 *reg_value)
 {
-	s32 ret;
 	unsigned int read;
 	u32 value;
+	int ret;
 
 	ret = regmap_read(regmap, reg_lsb, &read);
 	if (ret < 0)
@@ -806,12 +813,12 @@ static s32 mlx90632_calc_temp_object_extended(s64 object, s64 ambient, s64 refle
 
 static int mlx90632_calc_object_dsp105(struct mlx90632_data *data, int *val)
 {
-	s32 ret;
+	s16 ambient_new_raw, ambient_old_raw, object_new_raw, object_old_raw;
 	s32 Ea, Eb, Fa, Fb, Ga;
 	unsigned int read_tmp;
-	s16 Ha, Hb, Gb, Ka;
-	s16 ambient_new_raw, ambient_old_raw, object_new_raw, object_old_raw;
 	s64 object, ambient;
+	s16 Ha, Hb, Gb, Ka;
+	int ret;
 
 	ret = mlx90632_read_ee_register(data->regmap, MLX90632_EE_Ea, &Ea);
 	if (ret < 0)
@@ -885,11 +892,11 @@ static int mlx90632_calc_object_dsp105(struct mlx90632_data *data, int *val)
 
 static int mlx90632_calc_ambient_dsp105(struct mlx90632_data *data, int *val)
 {
-	s32 ret;
+	s16 ambient_new_raw, ambient_old_raw;
 	unsigned int read_tmp;
 	s32 PT, PR, PG, PO;
+	int ret;
 	s16 Gb;
-	s16 ambient_new_raw, ambient_old_raw;
 
 	ret = mlx90632_read_ee_register(data->regmap, MLX90632_EE_P_R, &PR);
 	if (ret < 0)
@@ -1164,11 +1171,11 @@ static int mlx90632_enable_regulator(struct mlx90632_data *data)
 static int mlx90632_probe(struct i2c_client *client,
 			  const struct i2c_device_id *id)
 {
-	struct iio_dev *indio_dev;
 	struct mlx90632_data *mlx90632;
+	struct iio_dev *indio_dev;
 	struct regmap *regmap;
-	int ret;
 	unsigned int read;
+	int ret;
 
 	indio_dev = devm_iio_device_alloc(&client->dev, sizeof(*mlx90632));
 	if (!indio_dev) {
-- 
GitLab


From f7626504432a6d30178bc6946d50a9d4a1bb4e31 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 24 Oct 2022 18:52:50 +0800
Subject: [PATCH 2326/4122] iio: trigger: sysfs: rename error label in
 iio_sysfs_trigger_probe()

Rename error label in iio_sysfs_trigger_probe() to make more readable.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221024105250.873394-1-yangyingliang@huawei.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/trigger/iio-trig-sysfs.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/trigger/iio-trig-sysfs.c b/drivers/iio/trigger/iio-trig-sysfs.c
index d6c5e96447383..63ce01ac20368 100644
--- a/drivers/iio/trigger/iio-trig-sysfs.c
+++ b/drivers/iio/trigger/iio-trig-sysfs.c
@@ -138,18 +138,18 @@ static int iio_sysfs_trigger_probe(int id)
 		}
 	if (foundit) {
 		ret = -EINVAL;
-		goto out1;
+		goto err_unlock;
 	}
 	t = kmalloc(sizeof(*t), GFP_KERNEL);
 	if (t == NULL) {
 		ret = -ENOMEM;
-		goto out1;
+		goto err_unlock;
 	}
 	t->id = id;
 	t->trig = iio_trigger_alloc(&iio_sysfs_trig_dev, "sysfstrig%d", id);
 	if (!t->trig) {
 		ret = -ENOMEM;
-		goto free_t;
+		goto err_free_sys_trig;
 	}
 
 	t->trig->dev.groups = iio_sysfs_trigger_attr_groups;
@@ -159,17 +159,17 @@ static int iio_sysfs_trigger_probe(int id)
 
 	ret = iio_trigger_register(t->trig);
 	if (ret)
-		goto out2;
+		goto err_free_trig;
 	list_add(&t->l, &iio_sysfs_trig_list);
 	__module_get(THIS_MODULE);
 	mutex_unlock(&iio_sysfs_trig_list_mut);
 	return 0;
 
-out2:
+err_free_trig:
 	iio_trigger_free(t->trig);
-free_t:
+err_free_sys_trig:
 	kfree(t);
-out1:
+err_unlock:
 	mutex_unlock(&iio_sysfs_trig_list_mut);
 	return ret;
 }
-- 
GitLab


From b52e2f19f80240365d7eaa3fdd320afcf14cf4c0 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 24 Oct 2022 15:40:11 +0300
Subject: [PATCH 2327/4122] dt-bindings: iio: Add KX022A accelerometer

KX022A is a 3-axis Accelerometer from ROHM/Kionix. The sensor features
include variable ODRs, I2C and SPI control, FIFO/LIFO with watermark IRQ,
tap/motion detection, wake-up & back-to-sleep events, four acceleration
ranges (2, 4, 8 and 16g) and probably some other cool features.

Add the basic device tree description for the accelerometer. Only basic
accelerometer features are considered as of now - new properties may or
may not be needed in the future when rest of the features are supported.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/06f8e1ab29d02ed216db10091a269df4b6abad9a.1666614295.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/accel/kionix,kx022a.yaml     | 65 +++++++++++++++++++
 1 file changed, 65 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml

diff --git a/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml
new file mode 100644
index 0000000000000..986df1a6ff0ab
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/accel/kionix,kx022a.yaml
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/accel/kionix,kx022a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ROHM/Kionix KX022A Accelerometer
+
+maintainers:
+  - Matti Vaittinen <mazziesaccount@gmail.com>
+
+description: |
+  KX022A is a 3-axis accelerometer supporting +/- 2G, 4G, 8G and 16G ranges,
+  output data-rates from 0.78Hz to 1600Hz and a hardware-fifo buffering.
+  KX022A can be accessed either via I2C or SPI.
+
+properties:
+  compatible:
+    const: kionix,kx022a
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  interrupt-names:
+    minItems: 1
+    items:
+      - enum: [INT1, INT2]
+      - const: INT2
+
+  vdd-supply: true
+  io-vdd-supply: true
+
+  mount-matrix:
+    description: |
+      an optional 3x3 mounting rotation matrix.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        accel@1f {
+            compatible = "kionix,kx022a";
+            reg = <0x1f>;
+
+            interrupt-parent = <&gpio1>;
+            interrupts = <29 IRQ_TYPE_LEVEL_LOW>;
+            interrupt-names = "INT1";
+
+            io-vdd-supply = <&iovdd>;
+            vdd-supply = <&vdd>;
+        };
+    };
-- 
GitLab


From 7c1d1677b3227c6b18ac999f2b84778baa280b8f Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 24 Oct 2022 15:40:29 +0300
Subject: [PATCH 2328/4122] iio: accel: Support Kionix/ROHM KX022A
 accelerometer

KX022A is a 3-axis accelerometer from ROHM/Kionix. The sensor features
include variable ODRs, I2C and SPI control, FIFO/LIFO with watermark IRQ,
tap/motion detection, wake-up & back-to-sleep events, four acceleration
ranges (2, 4, 8 and 16g), and probably some other cool features.

Add support for the basic accelerometer features such as getting the
acceleration data via IIO. (raw reads, triggered buffer [data-ready] or
using the WMI IRQ).

Important things to be added include the double-tap, motion
detection and wake-up as well as the runtime power management.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/758b00d6aea0a6431a5a3a78d557d449c113b21e.1666614295.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/Kconfig             |   21 +
 drivers/iio/accel/Makefile            |    3 +
 drivers/iio/accel/kionix-kx022a-i2c.c |   51 ++
 drivers/iio/accel/kionix-kx022a-spi.c |   58 ++
 drivers/iio/accel/kionix-kx022a.c     | 1142 +++++++++++++++++++++++++
 drivers/iio/accel/kionix-kx022a.h     |   82 ++
 6 files changed, 1357 insertions(+)
 create mode 100644 drivers/iio/accel/kionix-kx022a-i2c.c
 create mode 100644 drivers/iio/accel/kionix-kx022a-spi.c
 create mode 100644 drivers/iio/accel/kionix-kx022a.c
 create mode 100644 drivers/iio/accel/kionix-kx022a.h

diff --git a/drivers/iio/accel/Kconfig b/drivers/iio/accel/Kconfig
index ffac66db7ac92..03ac410c162e1 100644
--- a/drivers/iio/accel/Kconfig
+++ b/drivers/iio/accel/Kconfig
@@ -409,6 +409,27 @@ config IIO_ST_ACCEL_SPI_3AXIS
 	  To compile this driver as a module, choose M here. The module
 	  will be called st_accel_spi.
 
+config IIO_KX022A
+	tristate
+
+config IIO_KX022A_SPI
+	tristate "Kionix KX022A tri-axis digital accelerometer SPI interface"
+	depends on SPI
+	select IIO_KX022A
+	select REGMAP_SPI
+	help
+	  Enable support for the Kionix KX022A digital tri-axis
+	  accelerometer connected to I2C interface.
+
+config IIO_KX022A_I2C
+	tristate "Kionix KX022A tri-axis digital accelerometer I2C interface"
+	depends on I2C
+	select IIO_KX022A
+	select REGMAP_I2C
+	help
+	  Enable support for the Kionix KX022A digital tri-axis
+	  accelerometer connected to I2C interface.
+
 config KXSD9
 	tristate "Kionix KXSD9 Accelerometer Driver"
 	select IIO_BUFFER
diff --git a/drivers/iio/accel/Makefile b/drivers/iio/accel/Makefile
index 5e45b5fa5ab50..311ead9c3ef18 100644
--- a/drivers/iio/accel/Makefile
+++ b/drivers/iio/accel/Makefile
@@ -40,6 +40,9 @@ obj-$(CONFIG_FXLS8962AF)	+= fxls8962af-core.o
 obj-$(CONFIG_FXLS8962AF_I2C)	+= fxls8962af-i2c.o
 obj-$(CONFIG_FXLS8962AF_SPI)	+= fxls8962af-spi.o
 obj-$(CONFIG_HID_SENSOR_ACCEL_3D) += hid-sensor-accel-3d.o
+obj-$(CONFIG_IIO_KX022A)	+= kionix-kx022a.o
+obj-$(CONFIG_IIO_KX022A_I2C)	+= kionix-kx022a-i2c.o
+obj-$(CONFIG_IIO_KX022A_SPI)	+= kionix-kx022a-spi.o
 obj-$(CONFIG_KXCJK1013) += kxcjk-1013.o
 obj-$(CONFIG_KXSD9)	+= kxsd9.o
 obj-$(CONFIG_KXSD9_SPI)	+= kxsd9-spi.o
diff --git a/drivers/iio/accel/kionix-kx022a-i2c.c b/drivers/iio/accel/kionix-kx022a-i2c.c
new file mode 100644
index 0000000000000..e6fd02d931b60
--- /dev/null
+++ b/drivers/iio/accel/kionix-kx022a-i2c.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ROHM Semiconductors
+ *
+ * ROHM/KIONIX KX022A accelerometer driver
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+#include "kionix-kx022a.h"
+
+static int kx022a_i2c_probe(struct i2c_client *i2c)
+{
+	struct device *dev = &i2c->dev;
+	struct regmap *regmap;
+
+	if (!i2c->irq) {
+		dev_err(dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
+
+	regmap = devm_regmap_init_i2c(i2c, &kx022a_regmap);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap),
+				     "Failed to initialize Regmap\n");
+
+	return kx022a_probe_internal(dev);
+}
+
+static const struct of_device_id kx022a_of_match[] = {
+	{ .compatible = "kionix,kx022a", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, kx022a_of_match);
+
+static struct i2c_driver kx022a_i2c_driver = {
+	.driver = {
+		.name  = "kx022a-i2c",
+		.of_match_table = kx022a_of_match,
+	  },
+	.probe_new    = kx022a_i2c_probe,
+};
+module_i2c_driver(kx022a_i2c_driver);
+
+MODULE_DESCRIPTION("ROHM/Kionix KX022A accelerometer driver");
+MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_KX022A);
diff --git a/drivers/iio/accel/kionix-kx022a-spi.c b/drivers/iio/accel/kionix-kx022a-spi.c
new file mode 100644
index 0000000000000..9cd047f7b3467
--- /dev/null
+++ b/drivers/iio/accel/kionix-kx022a-spi.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ROHM Semiconductors
+ *
+ * ROHM/KIONIX KX022A accelerometer driver
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/spi/spi.h>
+
+#include "kionix-kx022a.h"
+
+static int kx022a_spi_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct regmap *regmap;
+
+	if (!spi->irq) {
+		dev_err(dev, "No IRQ configured\n");
+		return -EINVAL;
+	}
+
+	regmap = devm_regmap_init_spi(spi, &kx022a_regmap);
+	if (IS_ERR(regmap))
+		return dev_err_probe(dev, PTR_ERR(regmap),
+				     "Failed to initialize Regmap\n");
+
+	return kx022a_probe_internal(dev);
+}
+
+static const struct spi_device_id kx022a_id[] = {
+	{ "kx022a" },
+	{ }
+};
+MODULE_DEVICE_TABLE(spi, kx022a_id);
+
+static const struct of_device_id kx022a_of_match[] = {
+	{ .compatible = "kionix,kx022a", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, kx022a_of_match);
+
+static struct spi_driver kx022a_spi_driver = {
+	.driver = {
+		.name   = "kx022a-spi",
+		.of_match_table = kx022a_of_match,
+	},
+	.probe = kx022a_spi_probe,
+	.id_table = kx022a_id,
+};
+module_spi_driver(kx022a_spi_driver);
+
+MODULE_DESCRIPTION("ROHM/Kionix kx022A accelerometer driver");
+MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
+MODULE_LICENSE("GPL");
+MODULE_IMPORT_NS(IIO_KX022A);
diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c
new file mode 100644
index 0000000000000..50de7f9a1cc7f
--- /dev/null
+++ b/drivers/iio/accel/kionix-kx022a.c
@@ -0,0 +1,1142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ROHM Semiconductors
+ *
+ * ROHM/KIONIX KX022A accelerometer driver
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/mutex.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+#include <linux/string_helpers.h>
+#include <linux/units.h>
+
+#include <linux/iio/iio.h>
+#include <linux/iio/sysfs.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#include "kionix-kx022a.h"
+
+/*
+ * The KX022A has FIFO which can store 43 samples of HiRes data from 2
+ * channels. This equals to 43 (samples) * 3 (channels) * 2 (bytes/sample) to
+ * 258 bytes of sample data. The quirk to know is that the amount of bytes in
+ * the FIFO is advertised via 8 bit register (max value 255). The thing to note
+ * is that full 258 bytes of data is indicated using the max value 255.
+ */
+#define KX022A_FIFO_LENGTH			43
+#define KX022A_FIFO_FULL_VALUE			255
+#define KX022A_SOFT_RESET_WAIT_TIME_US		(5 * USEC_PER_MSEC)
+#define KX022A_SOFT_RESET_TOTAL_WAIT_TIME_US	(500 * USEC_PER_MSEC)
+
+/* 3 axis, 2 bytes of data for each of the axis */
+#define KX022A_FIFO_SAMPLES_SIZE_BYTES		6
+#define KX022A_FIFO_MAX_BYTES					\
+	(KX022A_FIFO_LENGTH * KX022A_FIFO_SAMPLES_SIZE_BYTES)
+
+enum {
+	KX022A_STATE_SAMPLE,
+	KX022A_STATE_FIFO,
+};
+
+/* Regmap configs */
+static const struct regmap_range kx022a_volatile_ranges[] = {
+	{
+		.range_min = KX022A_REG_XHP_L,
+		.range_max = KX022A_REG_COTR,
+	}, {
+		.range_min = KX022A_REG_TSCP,
+		.range_max = KX022A_REG_INT_REL,
+	}, {
+		/* The reset bit will be cleared by sensor */
+		.range_min = KX022A_REG_CNTL2,
+		.range_max = KX022A_REG_CNTL2,
+	}, {
+		.range_min = KX022A_REG_BUF_STATUS_1,
+		.range_max = KX022A_REG_BUF_READ,
+	},
+};
+
+static const struct regmap_access_table kx022a_volatile_regs = {
+	.yes_ranges = &kx022a_volatile_ranges[0],
+	.n_yes_ranges = ARRAY_SIZE(kx022a_volatile_ranges),
+};
+
+static const struct regmap_range kx022a_precious_ranges[] = {
+	{
+		.range_min = KX022A_REG_INT_REL,
+		.range_max = KX022A_REG_INT_REL,
+	},
+};
+
+static const struct regmap_access_table kx022a_precious_regs = {
+	.yes_ranges = &kx022a_precious_ranges[0],
+	.n_yes_ranges = ARRAY_SIZE(kx022a_precious_ranges),
+};
+
+/*
+ * The HW does not set WHO_AM_I reg as read-only but we don't want to write it
+ * so we still include it in the read-only ranges.
+ */
+static const struct regmap_range kx022a_read_only_ranges[] = {
+	{
+		.range_min = KX022A_REG_XHP_L,
+		.range_max = KX022A_REG_INT_REL,
+	}, {
+		.range_min = KX022A_REG_BUF_STATUS_1,
+		.range_max = KX022A_REG_BUF_STATUS_2,
+	}, {
+		.range_min = KX022A_REG_BUF_READ,
+		.range_max = KX022A_REG_BUF_READ,
+	},
+};
+
+static const struct regmap_access_table kx022a_ro_regs = {
+	.no_ranges = &kx022a_read_only_ranges[0],
+	.n_no_ranges = ARRAY_SIZE(kx022a_read_only_ranges),
+};
+
+static const struct regmap_range kx022a_write_only_ranges[] = {
+	{
+		.range_min = KX022A_REG_BTS_WUF_TH,
+		.range_max = KX022A_REG_BTS_WUF_TH,
+	}, {
+		.range_min = KX022A_REG_MAN_WAKE,
+		.range_max = KX022A_REG_MAN_WAKE,
+	}, {
+		.range_min = KX022A_REG_SELF_TEST,
+		.range_max = KX022A_REG_SELF_TEST,
+	}, {
+		.range_min = KX022A_REG_BUF_CLEAR,
+		.range_max = KX022A_REG_BUF_CLEAR,
+	},
+};
+
+static const struct regmap_access_table kx022a_wo_regs = {
+	.no_ranges = &kx022a_write_only_ranges[0],
+	.n_no_ranges = ARRAY_SIZE(kx022a_write_only_ranges),
+};
+
+static const struct regmap_range kx022a_noinc_read_ranges[] = {
+	{
+		.range_min = KX022A_REG_BUF_READ,
+		.range_max = KX022A_REG_BUF_READ,
+	},
+};
+
+static const struct regmap_access_table kx022a_nir_regs = {
+	.yes_ranges = &kx022a_noinc_read_ranges[0],
+	.n_yes_ranges = ARRAY_SIZE(kx022a_noinc_read_ranges),
+};
+
+const struct regmap_config kx022a_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.volatile_table = &kx022a_volatile_regs,
+	.rd_table = &kx022a_wo_regs,
+	.wr_table = &kx022a_ro_regs,
+	.rd_noinc_table = &kx022a_nir_regs,
+	.precious_table = &kx022a_precious_regs,
+	.max_register = KX022A_MAX_REGISTER,
+	.cache_type = REGCACHE_RBTREE,
+};
+EXPORT_SYMBOL_NS_GPL(kx022a_regmap, IIO_KX022A);
+
+struct kx022a_data {
+	struct regmap *regmap;
+	struct iio_trigger *trig;
+	struct device *dev;
+	struct iio_mount_matrix orientation;
+	int64_t timestamp, old_timestamp;
+
+	int irq;
+	int inc_reg;
+	int ien_reg;
+
+	unsigned int g_range;
+	unsigned int state;
+	unsigned int odr_ns;
+
+	bool trigger_enabled;
+	/*
+	 * Prevent toggling the sensor stby/active state (PC1 bit) in the
+	 * middle of a configuration, or when the fifo is enabled. Also,
+	 * protect the data stored/retrieved from this structure from
+	 * concurrent accesses.
+	 */
+	struct mutex mutex;
+	u8 watermark;
+
+	/* 3 x 16bit accel data + timestamp */
+	__le16 buffer[8] __aligned(IIO_DMA_MINALIGN);
+	struct {
+		__le16 channels[3];
+		s64 ts __aligned(8);
+	} scan;
+};
+
+static const struct iio_mount_matrix *
+kx022a_get_mount_matrix(const struct iio_dev *idev,
+			const struct iio_chan_spec *chan)
+{
+	struct kx022a_data *data = iio_priv(idev);
+
+	return &data->orientation;
+}
+
+enum {
+	AXIS_X,
+	AXIS_Y,
+	AXIS_Z,
+	AXIS_MAX
+};
+
+static const unsigned long kx022a_scan_masks[] = {
+	BIT(AXIS_X) | BIT(AXIS_Y) | BIT(AXIS_Z), 0
+};
+
+static const struct iio_chan_spec_ext_info kx022a_ext_info[] = {
+	IIO_MOUNT_MATRIX(IIO_SHARED_BY_TYPE, kx022a_get_mount_matrix),
+	{ }
+};
+
+#define KX022A_ACCEL_CHAN(axis, index)				\
+{								\
+	.type = IIO_ACCEL,					\
+	.modified = 1,						\
+	.channel2 = IIO_MOD_##axis,				\
+	.info_mask_separate = BIT(IIO_CHAN_INFO_RAW),		\
+	.info_mask_shared_by_type = BIT(IIO_CHAN_INFO_SCALE) |	\
+				BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+	.info_mask_shared_by_type_available =			\
+				BIT(IIO_CHAN_INFO_SCALE) |	\
+				BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+	.ext_info = kx022a_ext_info,				\
+	.address = KX022A_REG_##axis##OUT_L,			\
+	.scan_index = index,					\
+	.scan_type = {                                          \
+		.sign = 's',					\
+		.realbits = 16,					\
+		.storagebits = 16,				\
+		.endianness = IIO_LE,				\
+	},							\
+}
+
+static const struct iio_chan_spec kx022a_channels[] = {
+	KX022A_ACCEL_CHAN(X, 0),
+	KX022A_ACCEL_CHAN(Y, 1),
+	KX022A_ACCEL_CHAN(Z, 2),
+	IIO_CHAN_SOFT_TIMESTAMP(3),
+};
+
+/*
+ * The sensor HW can support ODR up to 1600 Hz, which is beyond what most of the
+ * Linux CPUs can handle without dropping samples. Also, the low power mode is
+ * not available for higher sample rates. Thus, the driver only supports 200 Hz
+ * and slower ODRs. The slowest is 0.78 Hz.
+ */
+static const int kx022a_accel_samp_freq_table[][2] = {
+	{ 0, 780000 },
+	{ 1, 563000 },
+	{ 3, 125000 },
+	{ 6, 250000 },
+	{ 12, 500000 },
+	{ 25, 0 },
+	{ 50, 0 },
+	{ 100, 0 },
+	{ 200, 0 },
+};
+
+static const unsigned int kx022a_odrs[] = {
+	1282051282,
+	639795266,
+	320 * MEGA,
+	160 * MEGA,
+	80 * MEGA,
+	40 * MEGA,
+	20 * MEGA,
+	10 * MEGA,
+	5 * MEGA,
+};
+
+/*
+ * range is typically +-2G/4G/8G/16G, distributed over the amount of bits.
+ * The scale table can be calculated using
+ *	(range / 2^bits) * g = (range / 2^bits) * 9.80665 m/s^2
+ *	=> KX022A uses 16 bit (HiRes mode - assume the low 8 bits are zeroed
+ *	in low-power mode(?) )
+ *	=> +/-2G  => 4 / 2^16 * 9,80665 * 10^6 (to scale to micro)
+ *	=> +/-2G  - 598.550415
+ *	   +/-4G  - 1197.10083
+ *	   +/-8G  - 2394.20166
+ *	   +/-16G - 4788.40332
+ */
+static const int kx022a_scale_table[][2] = {
+	{ 598, 550415 },
+	{ 1197, 100830 },
+	{ 2394, 201660 },
+	{ 4788, 403320 },
+};
+
+static int kx022a_read_avail(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan,
+			     const int **vals, int *type, int *length,
+			     long mask)
+{
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		*vals = (const int *)kx022a_accel_samp_freq_table;
+		*length = ARRAY_SIZE(kx022a_accel_samp_freq_table) *
+			  ARRAY_SIZE(kx022a_accel_samp_freq_table[0]);
+		*type = IIO_VAL_INT_PLUS_MICRO;
+		return IIO_AVAIL_LIST;
+	case IIO_CHAN_INFO_SCALE:
+		*vals = (const int *)kx022a_scale_table;
+		*length = ARRAY_SIZE(kx022a_scale_table) *
+			  ARRAY_SIZE(kx022a_scale_table[0]);
+		*type = IIO_VAL_INT_PLUS_MICRO;
+		return IIO_AVAIL_LIST;
+	default:
+		return -EINVAL;
+	}
+}
+
+#define KX022A_DEFAULT_PERIOD_NS (20 * NSEC_PER_MSEC)
+
+static void kx022a_reg2freq(unsigned int val,  int *val1, int *val2)
+{
+	*val1 = kx022a_accel_samp_freq_table[val & KX022A_MASK_ODR][0];
+	*val2 = kx022a_accel_samp_freq_table[val & KX022A_MASK_ODR][1];
+}
+
+static void kx022a_reg2scale(unsigned int val, unsigned int *val1,
+			     unsigned int *val2)
+{
+	val &= KX022A_MASK_GSEL;
+	val >>= KX022A_GSEL_SHIFT;
+
+	*val1 = kx022a_scale_table[val][0];
+	*val2 = kx022a_scale_table[val][1];
+}
+
+static int kx022a_turn_on_off_unlocked(struct kx022a_data *data, bool on)
+{
+	int ret;
+
+	if (on)
+		ret = regmap_set_bits(data->regmap, KX022A_REG_CNTL,
+				      KX022A_MASK_PC1);
+	else
+		ret = regmap_clear_bits(data->regmap, KX022A_REG_CNTL,
+					KX022A_MASK_PC1);
+	if (ret)
+		dev_err(data->dev, "Turn %s fail %d\n", str_on_off(on), ret);
+
+	return ret;
+
+}
+
+static int kx022a_turn_off_lock(struct kx022a_data *data)
+{
+	int ret;
+
+	mutex_lock(&data->mutex);
+	ret = kx022a_turn_on_off_unlocked(data, false);
+	if (ret)
+		mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static int kx022a_turn_on_unlock(struct kx022a_data *data)
+{
+	int ret;
+
+	ret = kx022a_turn_on_off_unlocked(data, true);
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static int kx022a_write_raw(struct iio_dev *idev,
+			    struct iio_chan_spec const *chan,
+			    int val, int val2, long mask)
+{
+	struct kx022a_data *data = iio_priv(idev);
+	int ret, n;
+
+	/*
+	 * We should not allow changing scale or frequency when FIFO is running
+	 * as it will mess the timestamp/scale for samples existing in the
+	 * buffer. If this turns out to be an issue we can later change logic
+	 * to internally flush the fifo before reconfiguring so the samples in
+	 * fifo keep matching the freq/scale settings. (Such setup could cause
+	 * issues if users trust the watermark to be reached within known
+	 * time-limit).
+	 */
+	ret = iio_device_claim_direct_mode(idev);
+	if (ret)
+		return ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		n = ARRAY_SIZE(kx022a_accel_samp_freq_table);
+
+		while (n--)
+			if (val == kx022a_accel_samp_freq_table[n][0] &&
+			    val2 == kx022a_accel_samp_freq_table[n][1])
+				break;
+		if (n < 0) {
+			ret = -EINVAL;
+			goto unlock_out;
+		}
+		ret = kx022a_turn_off_lock(data);
+		if (ret)
+			break;
+
+		ret = regmap_update_bits(data->regmap,
+					 KX022A_REG_ODCNTL,
+					 KX022A_MASK_ODR, n);
+		data->odr_ns = kx022a_odrs[n];
+		kx022a_turn_on_unlock(data);
+		break;
+	case IIO_CHAN_INFO_SCALE:
+		n = ARRAY_SIZE(kx022a_scale_table);
+
+		while (n-- > 0)
+			if (val == kx022a_scale_table[n][0] &&
+			    val2 == kx022a_scale_table[n][1])
+				break;
+		if (n < 0) {
+			ret = -EINVAL;
+			goto unlock_out;
+		}
+
+		ret = kx022a_turn_off_lock(data);
+		if (ret)
+			break;
+
+		ret = regmap_update_bits(data->regmap, KX022A_REG_CNTL,
+					 KX022A_MASK_GSEL,
+					 n << KX022A_GSEL_SHIFT);
+		kx022a_turn_on_unlock(data);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+unlock_out:
+	iio_device_release_direct_mode(idev);
+
+	return ret;
+}
+
+static int kx022a_fifo_set_wmi(struct kx022a_data *data)
+{
+	u8 threshold;
+
+	threshold = data->watermark;
+
+	return regmap_update_bits(data->regmap, KX022A_REG_BUF_CNTL1,
+				  KX022A_MASK_WM_TH, threshold);
+}
+
+static int kx022a_get_axis(struct kx022a_data *data,
+			   struct iio_chan_spec const *chan,
+			   int *val)
+{
+	int ret;
+
+	ret = regmap_bulk_read(data->regmap, chan->address, &data->buffer[0],
+			       sizeof(__le16));
+	if (ret)
+		return ret;
+
+	*val = le16_to_cpu(data->buffer[0]);
+
+	return IIO_VAL_INT;
+}
+
+static int kx022a_read_raw(struct iio_dev *idev,
+			   struct iio_chan_spec const *chan,
+			   int *val, int *val2, long mask)
+{
+	struct kx022a_data *data = iio_priv(idev);
+	unsigned int regval;
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		ret = iio_device_claim_direct_mode(idev);
+		if (ret)
+			return ret;
+
+		mutex_lock(&data->mutex);
+		ret = kx022a_get_axis(data, chan, val);
+		mutex_unlock(&data->mutex);
+
+		iio_device_release_direct_mode(idev);
+
+		return ret;
+
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		ret = regmap_read(data->regmap, KX022A_REG_ODCNTL, &regval);
+		if (ret)
+			return ret;
+
+		if ((regval & KX022A_MASK_ODR) >
+		    ARRAY_SIZE(kx022a_accel_samp_freq_table)) {
+			dev_err(data->dev, "Invalid ODR\n");
+			return -EINVAL;
+		}
+
+		kx022a_reg2freq(regval, val, val2);
+
+		return IIO_VAL_INT_PLUS_MICRO;
+
+	case IIO_CHAN_INFO_SCALE:
+		ret = regmap_read(data->regmap, KX022A_REG_CNTL, &regval);
+		if (ret < 0)
+			return ret;
+
+		kx022a_reg2scale(regval, val, val2);
+
+		return IIO_VAL_INT_PLUS_MICRO;
+	}
+
+	return -EINVAL;
+};
+
+static int kx022a_validate_trigger(struct iio_dev *idev,
+				   struct iio_trigger *trig)
+{
+	struct kx022a_data *data = iio_priv(idev);
+
+	if (data->trig != trig)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int kx022a_set_watermark(struct iio_dev *idev, unsigned int val)
+{
+	struct kx022a_data *data = iio_priv(idev);
+
+	if (val > KX022A_FIFO_LENGTH)
+		val = KX022A_FIFO_LENGTH;
+
+	mutex_lock(&data->mutex);
+	data->watermark = val;
+	mutex_unlock(&data->mutex);
+
+	return 0;
+}
+
+static ssize_t hwfifo_enabled_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct iio_dev *idev = dev_to_iio_dev(dev);
+	struct kx022a_data *data = iio_priv(idev);
+	bool state;
+
+	mutex_lock(&data->mutex);
+	state = data->state;
+	mutex_unlock(&data->mutex);
+
+	return sysfs_emit(buf, "%d\n", state);
+}
+
+static ssize_t hwfifo_watermark_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct iio_dev *idev = dev_to_iio_dev(dev);
+	struct kx022a_data *data = iio_priv(idev);
+	int wm;
+
+	mutex_lock(&data->mutex);
+	wm = data->watermark;
+	mutex_unlock(&data->mutex);
+
+	return sysfs_emit(buf, "%d\n", wm);
+}
+
+static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0);
+static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0);
+
+static const struct attribute *kx022a_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
+	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+	NULL
+};
+
+static int kx022a_drop_fifo_contents(struct kx022a_data *data)
+{
+	/*
+	 * We must clear the old time-stamp to avoid computing the timestamps
+	 * based on samples acquired when buffer was last enabled.
+	 *
+	 * We don't need to protect the timestamp as long as we are only
+	 * called from fifo-disable where we can guarantee the sensor is not
+	 * triggering interrupts and where the mutex is locked to prevent the
+	 * user-space access.
+	 */
+	data->timestamp = 0;
+
+	return regmap_write(data->regmap, KX022A_REG_BUF_CLEAR, 0x0);
+}
+
+static int __kx022a_fifo_flush(struct iio_dev *idev, unsigned int samples,
+			       bool irq)
+{
+	struct kx022a_data *data = iio_priv(idev);
+	struct device *dev = regmap_get_device(data->regmap);
+	__le16 buffer[KX022A_FIFO_LENGTH * 3];
+	uint64_t sample_period;
+	int count, fifo_bytes;
+	bool renable = false;
+	int64_t tstamp;
+	int ret, i;
+
+	ret = regmap_read(data->regmap, KX022A_REG_BUF_STATUS_1, &fifo_bytes);
+	if (ret) {
+		dev_err(dev, "Error reading buffer status\n");
+		return ret;
+	}
+
+	/* Let's not overflow if we for some reason get bogus value from i2c */
+	if (fifo_bytes == KX022A_FIFO_FULL_VALUE)
+		fifo_bytes = KX022A_FIFO_MAX_BYTES;
+
+	if (fifo_bytes % KX022A_FIFO_SAMPLES_SIZE_BYTES)
+		dev_warn(data->dev, "Bad FIFO alignment. Data may be corrupt\n");
+
+	count = fifo_bytes / KX022A_FIFO_SAMPLES_SIZE_BYTES;
+	if (!count)
+		return 0;
+
+	/*
+	 * If we are being called from IRQ handler we know the stored timestamp
+	 * is fairly accurate for the last stored sample. Otherwise, if we are
+	 * called as a result of a read operation from userspace and hence
+	 * before the watermark interrupt was triggered, take a timestamp
+	 * now. We can fall anywhere in between two samples so the error in this
+	 * case is at most one sample period.
+	 */
+	if (!irq) {
+		/*
+		 * We need to have the IRQ disabled or we risk of messing-up
+		 * the timestamps. If we are ran from IRQ, then the
+		 * IRQF_ONESHOT has us covered - but if we are ran by the
+		 * user-space read we need to disable the IRQ to be on a safe
+		 * side. We do this usng synchronous disable so that if the
+		 * IRQ thread is being ran on other CPU we wait for it to be
+		 * finished.
+		 */
+		disable_irq(data->irq);
+		renable = true;
+
+		data->old_timestamp = data->timestamp;
+		data->timestamp = iio_get_time_ns(idev);
+	}
+
+	/*
+	 * Approximate timestamps for each of the sample based on the sampling
+	 * frequency, timestamp for last sample and number of samples.
+	 *
+	 * We'd better not use the current bandwidth settings to compute the
+	 * sample period. The real sample rate varies with the device and
+	 * small variation adds when we store a large number of samples.
+	 *
+	 * To avoid this issue we compute the actual sample period ourselves
+	 * based on the timestamp delta between the last two flush operations.
+	 */
+	if (data->old_timestamp) {
+		sample_period = data->timestamp - data->old_timestamp;
+		do_div(sample_period, count);
+	} else {
+		sample_period = data->odr_ns;
+	}
+	tstamp = data->timestamp - (count - 1) * sample_period;
+
+	if (samples && count > samples) {
+		/*
+		 * Here we leave some old samples to the buffer. We need to
+		 * adjust the timestamp to match the first sample in the buffer
+		 * or we will miscalculate the sample_period at next round.
+		 */
+		data->timestamp -= (count - samples) * sample_period;
+		count = samples;
+	}
+
+	fifo_bytes = count * KX022A_FIFO_SAMPLES_SIZE_BYTES;
+	ret = regmap_noinc_read(data->regmap, KX022A_REG_BUF_READ,
+				&buffer[0], fifo_bytes);
+	if (ret)
+		goto renable_out;
+
+	for (i = 0; i < count; i++) {
+		__le16 *sam = &buffer[i * 3];
+		__le16 *chs;
+		int bit;
+
+		chs = &data->scan.channels[0];
+		for_each_set_bit(bit, idev->active_scan_mask, AXIS_MAX)
+			chs[bit] = sam[bit];
+
+		iio_push_to_buffers_with_timestamp(idev, &data->scan, tstamp);
+
+		tstamp += sample_period;
+	}
+
+	ret = count;
+
+renable_out:
+	if (renable)
+		enable_irq(data->irq);
+
+	return ret;
+}
+
+static int kx022a_fifo_flush(struct iio_dev *idev, unsigned int samples)
+{
+	struct kx022a_data *data = iio_priv(idev);
+	int ret;
+
+	mutex_lock(&data->mutex);
+	ret = __kx022a_fifo_flush(idev, samples, false);
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static const struct iio_info kx022a_info = {
+	.read_raw = &kx022a_read_raw,
+	.write_raw = &kx022a_write_raw,
+	.read_avail = &kx022a_read_avail,
+
+	.validate_trigger	= kx022a_validate_trigger,
+	.hwfifo_set_watermark	= kx022a_set_watermark,
+	.hwfifo_flush_to_buffer	= kx022a_fifo_flush,
+};
+
+static int kx022a_set_drdy_irq(struct kx022a_data *data, bool en)
+{
+	if (en)
+		return regmap_set_bits(data->regmap, KX022A_REG_CNTL,
+				       KX022A_MASK_DRDY);
+
+	return regmap_clear_bits(data->regmap, KX022A_REG_CNTL,
+				 KX022A_MASK_DRDY);
+}
+
+static int kx022a_prepare_irq_pin(struct kx022a_data *data)
+{
+	/* Enable IRQ1 pin. Set polarity to active low */
+	int mask = KX022A_MASK_IEN | KX022A_MASK_IPOL |
+		   KX022A_MASK_ITYP;
+	int val = KX022A_MASK_IEN | KX022A_IPOL_LOW |
+		  KX022A_ITYP_LEVEL;
+	int ret;
+
+	ret = regmap_update_bits(data->regmap, data->inc_reg, mask, val);
+	if (ret)
+		return ret;
+
+	/* We enable WMI to IRQ pin only at buffer_enable */
+	mask = KX022A_MASK_INS2_DRDY;
+
+	return regmap_set_bits(data->regmap, data->ien_reg, mask);
+}
+
+static int kx022a_fifo_disable(struct kx022a_data *data)
+{
+	int ret = 0;
+
+	ret = kx022a_turn_off_lock(data);
+	if (ret)
+		return ret;
+
+	ret = regmap_clear_bits(data->regmap, data->ien_reg, KX022A_MASK_WMI);
+	if (ret)
+		goto unlock_out;
+
+	ret = regmap_clear_bits(data->regmap, KX022A_REG_BUF_CNTL2,
+				KX022A_MASK_BUF_EN);
+	if (ret)
+		goto unlock_out;
+
+	data->state &= ~KX022A_STATE_FIFO;
+
+	kx022a_drop_fifo_contents(data);
+
+	return kx022a_turn_on_unlock(data);
+
+unlock_out:
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static int kx022a_buffer_predisable(struct iio_dev *idev)
+{
+	struct kx022a_data *data = iio_priv(idev);
+
+	if (iio_device_get_current_mode(idev) == INDIO_BUFFER_TRIGGERED)
+		return 0;
+
+	return kx022a_fifo_disable(data);
+}
+
+static int kx022a_fifo_enable(struct kx022a_data *data)
+{
+	int ret;
+
+	ret = kx022a_turn_off_lock(data);
+	if (ret)
+		return ret;
+
+	/* Update watermark to HW */
+	ret = kx022a_fifo_set_wmi(data);
+	if (ret)
+		goto unlock_out;
+
+	/* Enable buffer */
+	ret = regmap_set_bits(data->regmap, KX022A_REG_BUF_CNTL2,
+			      KX022A_MASK_BUF_EN);
+	if (ret)
+		goto unlock_out;
+
+	data->state |= KX022A_STATE_FIFO;
+	ret = regmap_set_bits(data->regmap, data->ien_reg,
+			      KX022A_MASK_WMI);
+	if (ret)
+		goto unlock_out;
+
+	return kx022a_turn_on_unlock(data);
+
+unlock_out:
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static int kx022a_buffer_postenable(struct iio_dev *idev)
+{
+	struct kx022a_data *data = iio_priv(idev);
+
+	/*
+	 * If we use data-ready trigger, then the IRQ masks should be handled by
+	 * trigger enable and the hardware buffer is not used but we just update
+	 * results to the IIO fifo when data-ready triggers.
+	 */
+	if (iio_device_get_current_mode(idev) == INDIO_BUFFER_TRIGGERED)
+		return 0;
+
+	return kx022a_fifo_enable(data);
+}
+
+static const struct iio_buffer_setup_ops kx022a_buffer_ops = {
+	.postenable = kx022a_buffer_postenable,
+	.predisable = kx022a_buffer_predisable,
+};
+
+static irqreturn_t kx022a_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *idev = pf->indio_dev;
+	struct kx022a_data *data = iio_priv(idev);
+	int ret;
+
+	ret = regmap_bulk_read(data->regmap, KX022A_REG_XOUT_L, data->buffer,
+			       KX022A_FIFO_SAMPLES_SIZE_BYTES);
+	if (ret < 0)
+		goto err_read;
+
+	iio_push_to_buffers_with_timestamp(idev, data->buffer, pf->timestamp);
+err_read:
+	iio_trigger_notify_done(idev->trig);
+
+	return IRQ_HANDLED;
+}
+
+/* Get timestamps and wake the thread if we need to read data */
+static irqreturn_t kx022a_irq_handler(int irq, void *private)
+{
+	struct iio_dev *idev = private;
+	struct kx022a_data *data = iio_priv(idev);
+
+	data->old_timestamp = data->timestamp;
+	data->timestamp = iio_get_time_ns(idev);
+
+	if (data->state & KX022A_STATE_FIFO || data->trigger_enabled)
+		return IRQ_WAKE_THREAD;
+
+	return IRQ_NONE;
+}
+
+/*
+ * WMI and data-ready IRQs are acked when results are read. If we add
+ * TILT/WAKE or other IRQs - then we may need to implement the acking
+ * (which is racy).
+ */
+static irqreturn_t kx022a_irq_thread_handler(int irq, void *private)
+{
+	struct iio_dev *idev = private;
+	struct kx022a_data *data = iio_priv(idev);
+	irqreturn_t ret = IRQ_NONE;
+
+	mutex_lock(&data->mutex);
+
+	if (data->trigger_enabled) {
+		iio_trigger_poll_chained(data->trig);
+		ret = IRQ_HANDLED;
+	}
+
+	if (data->state & KX022A_STATE_FIFO) {
+		int ok;
+
+		ok = __kx022a_fifo_flush(idev, KX022A_FIFO_LENGTH, true);
+		if (ok > 0)
+			ret = IRQ_HANDLED;
+	}
+
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static int kx022a_trigger_set_state(struct iio_trigger *trig,
+				    bool state)
+{
+	struct kx022a_data *data = iio_trigger_get_drvdata(trig);
+	int ret = 0;
+
+	mutex_lock(&data->mutex);
+
+	if (data->trigger_enabled == state)
+		goto unlock_out;
+
+	if (data->state & KX022A_STATE_FIFO) {
+		dev_warn(data->dev, "Can't set trigger when FIFO enabled\n");
+		ret = -EBUSY;
+		goto unlock_out;
+	}
+
+	ret = kx022a_turn_on_off_unlocked(data, false);
+	if (ret)
+		goto unlock_out;
+
+	data->trigger_enabled = state;
+	ret = kx022a_set_drdy_irq(data, state);
+	if (ret)
+		goto unlock_out;
+
+	ret = kx022a_turn_on_off_unlocked(data, true);
+
+unlock_out:
+	mutex_unlock(&data->mutex);
+
+	return ret;
+}
+
+static const struct iio_trigger_ops kx022a_trigger_ops = {
+	.set_trigger_state = kx022a_trigger_set_state,
+};
+
+static int kx022a_chip_init(struct kx022a_data *data)
+{
+	int ret, val;
+
+	/* Reset the senor */
+	ret = regmap_write(data->regmap, KX022A_REG_CNTL2, KX022A_MASK_SRST);
+	if (ret)
+		return ret;
+
+	/*
+	 * I've seen I2C read failures if we poll too fast after the sensor
+	 * reset. Slight delay gives I2C block the time to recover.
+	 */
+	msleep(1);
+
+	ret = regmap_read_poll_timeout(data->regmap, KX022A_REG_CNTL2, val,
+				       !(val & KX022A_MASK_SRST),
+				       KX022A_SOFT_RESET_WAIT_TIME_US,
+				       KX022A_SOFT_RESET_TOTAL_WAIT_TIME_US);
+	if (ret) {
+		dev_err(data->dev, "Sensor reset %s\n",
+			val & KX022A_MASK_SRST ? "timeout" : "fail#");
+		return ret;
+	}
+
+	ret = regmap_reinit_cache(data->regmap, &kx022a_regmap);
+	if (ret) {
+		dev_err(data->dev, "Failed to reinit reg cache\n");
+		return ret;
+	}
+
+	/* set data res 16bit */
+	ret = regmap_set_bits(data->regmap, KX022A_REG_BUF_CNTL2,
+			      KX022A_MASK_BRES16);
+	if (ret) {
+		dev_err(data->dev, "Failed to set data resolution\n");
+		return ret;
+	}
+
+	return kx022a_prepare_irq_pin(data);
+}
+
+int kx022a_probe_internal(struct device *dev)
+{
+	static const char * const regulator_names[] = {"io-vdd", "vdd"};
+	struct iio_trigger *indio_trig;
+	struct fwnode_handle *fwnode;
+	struct kx022a_data *data;
+	struct regmap *regmap;
+	unsigned int chip_id;
+	struct iio_dev *idev;
+	int ret, irq;
+	char *name;
+
+	regmap = dev_get_regmap(dev, NULL);
+	if (!regmap) {
+		dev_err(dev, "no regmap\n");
+		return -EINVAL;
+	}
+
+	fwnode = dev_fwnode(dev);
+	if (!fwnode)
+		return -ENODEV;
+
+	idev = devm_iio_device_alloc(dev, sizeof(*data));
+	if (!idev)
+		return -ENOMEM;
+
+	data = iio_priv(idev);
+
+	/*
+	 * VDD is the analog and digital domain voltage supply and
+	 * IO_VDD is the digital I/O voltage supply.
+	 */
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names),
+					     regulator_names);
+	if (ret && ret != -ENODEV)
+		return dev_err_probe(dev, ret, "failed to enable regulator\n");
+
+	ret = regmap_read(regmap, KX022A_REG_WHO, &chip_id);
+	if (ret)
+		return dev_err_probe(dev, ret, "Failed to access sensor\n");
+
+	if (chip_id != KX022A_ID) {
+		dev_err(dev, "unsupported device 0x%x\n", chip_id);
+		return -EINVAL;
+	}
+
+	irq = fwnode_irq_get_byname(fwnode, "INT1");
+	if (irq > 0) {
+		data->inc_reg = KX022A_REG_INC1;
+		data->ien_reg = KX022A_REG_INC4;
+	} else {
+		irq = fwnode_irq_get_byname(fwnode, "INT2");
+		if (irq <= 0)
+			return dev_err_probe(dev, irq, "No suitable IRQ\n");
+
+		data->inc_reg = KX022A_REG_INC5;
+		data->ien_reg = KX022A_REG_INC6;
+	}
+
+	data->regmap = regmap;
+	data->dev = dev;
+	data->irq = irq;
+	data->odr_ns = KX022A_DEFAULT_PERIOD_NS;
+	mutex_init(&data->mutex);
+
+	idev->channels = kx022a_channels;
+	idev->num_channels = ARRAY_SIZE(kx022a_channels);
+	idev->name = "kx022-accel";
+	idev->info = &kx022a_info;
+	idev->modes = INDIO_DIRECT_MODE | INDIO_BUFFER_SOFTWARE;
+	idev->available_scan_masks = kx022a_scan_masks;
+
+	/* Read the mounting matrix, if present */
+	ret = iio_read_mount_matrix(dev, &data->orientation);
+	if (ret)
+		return ret;
+
+	/* The sensor must be turned off for configuration */
+	ret = kx022a_turn_off_lock(data);
+	if (ret)
+		return ret;
+
+	ret = kx022a_chip_init(data);
+	if (ret) {
+		mutex_unlock(&data->mutex);
+		return ret;
+	}
+
+	ret = kx022a_turn_on_unlock(data);
+	if (ret)
+		return ret;
+
+	ret = devm_iio_triggered_buffer_setup_ext(dev, idev,
+						  &iio_pollfunc_store_time,
+						  kx022a_trigger_handler,
+						  IIO_BUFFER_DIRECTION_IN,
+						  &kx022a_buffer_ops,
+						  kx022a_fifo_attributes);
+
+	if (ret)
+		return dev_err_probe(data->dev, ret,
+				     "iio_triggered_buffer_setup_ext FAIL\n");
+	indio_trig = devm_iio_trigger_alloc(dev, "%sdata-rdy-dev%d", idev->name,
+					    iio_device_id(idev));
+	if (!indio_trig)
+		return -ENOMEM;
+
+	data->trig = indio_trig;
+
+	indio_trig->ops = &kx022a_trigger_ops;
+	iio_trigger_set_drvdata(indio_trig, data);
+
+	/*
+	 * No need to check for NULL. request_threaded_irq() defaults to
+	 * dev_name() should the alloc fail.
+	 */
+	name = devm_kasprintf(data->dev, GFP_KERNEL, "%s-kx022a",
+			      dev_name(data->dev));
+
+	ret = devm_request_threaded_irq(data->dev, irq, kx022a_irq_handler,
+					&kx022a_irq_thread_handler,
+					IRQF_ONESHOT, name, idev);
+	if (ret)
+		return dev_err_probe(data->dev, ret, "Could not request IRQ\n");
+
+
+	ret = devm_iio_trigger_register(dev, indio_trig);
+	if (ret)
+		return dev_err_probe(data->dev, ret,
+				     "Trigger registration failed\n");
+
+	ret = devm_iio_device_register(data->dev, idev);
+	if (ret < 0)
+		return dev_err_probe(dev, ret,
+				     "Unable to register iio device\n");
+
+	return ret;
+}
+EXPORT_SYMBOL_NS_GPL(kx022a_probe_internal, IIO_KX022A);
+
+MODULE_DESCRIPTION("ROHM/Kionix KX022A accelerometer driver");
+MODULE_AUTHOR("Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/iio/accel/kionix-kx022a.h b/drivers/iio/accel/kionix-kx022a.h
new file mode 100644
index 0000000000000..12424649d438b
--- /dev/null
+++ b/drivers/iio/accel/kionix-kx022a.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2022 ROHM Semiconductors
+ *
+ * ROHM/KIONIX KX022A accelerometer driver
+ */
+
+#ifndef _KX022A_H_
+#define _KX022A_H_
+
+#include <linux/bits.h>
+#include <linux/regmap.h>
+
+#define KX022A_REG_WHO		0x0f
+#define KX022A_ID		0xc8
+
+#define KX022A_REG_CNTL2	0x19
+#define KX022A_MASK_SRST	BIT(7)
+#define KX022A_REG_CNTL		0x18
+#define KX022A_MASK_PC1		BIT(7)
+#define KX022A_MASK_RES		BIT(6)
+#define KX022A_MASK_DRDY	BIT(5)
+#define KX022A_MASK_GSEL	GENMASK(4, 3)
+#define KX022A_GSEL_SHIFT	3
+#define KX022A_GSEL_2		0x0
+#define KX022A_GSEL_4		BIT(3)
+#define KX022A_GSEL_8		BIT(4)
+#define KX022A_GSEL_16		GENMASK(4, 3)
+
+#define KX022A_REG_INS2		0x13
+#define KX022A_MASK_INS2_DRDY	BIT(4)
+#define KX122_MASK_INS2_WMI	BIT(5)
+
+#define KX022A_REG_XHP_L	0x0
+#define KX022A_REG_XOUT_L	0x06
+#define KX022A_REG_YOUT_L	0x08
+#define KX022A_REG_ZOUT_L	0x0a
+#define KX022A_REG_COTR		0x0c
+#define KX022A_REG_TSCP		0x10
+#define KX022A_REG_INT_REL	0x17
+
+#define KX022A_REG_ODCNTL	0x1b
+
+#define KX022A_REG_BTS_WUF_TH	0x31
+#define KX022A_REG_MAN_WAKE	0x2c
+
+#define KX022A_REG_BUF_CNTL1	0x3a
+#define KX022A_MASK_WM_TH	GENMASK(6, 0)
+#define KX022A_REG_BUF_CNTL2	0x3b
+#define KX022A_MASK_BUF_EN	BIT(7)
+#define KX022A_MASK_BRES16	BIT(6)
+#define KX022A_REG_BUF_STATUS_1	0x3c
+#define KX022A_REG_BUF_STATUS_2	0x3d
+#define KX022A_REG_BUF_CLEAR	0x3e
+#define KX022A_REG_BUF_READ	0x3f
+#define KX022A_MASK_ODR		GENMASK(3, 0)
+#define KX022A_ODR_SHIFT	3
+#define KX022A_FIFO_MAX_WMI_TH	41
+
+#define KX022A_REG_INC1		0x1c
+#define KX022A_REG_INC5		0x20
+#define KX022A_REG_INC6		0x21
+#define KX022A_MASK_IEN		BIT(5)
+#define KX022A_MASK_IPOL	BIT(4)
+#define KX022A_IPOL_LOW		0
+#define KX022A_IPOL_HIGH	KX022A_MASK_IPOL1
+#define KX022A_MASK_ITYP	BIT(3)
+#define KX022A_ITYP_PULSE	KX022A_MASK_ITYP
+#define KX022A_ITYP_LEVEL	0
+
+#define KX022A_REG_INC4		0x1f
+#define KX022A_MASK_WMI		BIT(5)
+
+#define KX022A_REG_SELF_TEST	0x60
+#define KX022A_MAX_REGISTER	0x60
+
+struct device;
+
+int kx022a_probe_internal(struct device *dev);
+extern const struct regmap_config kx022a_regmap;
+
+#endif
-- 
GitLab


From d269e0d60761e9d2a6ca2adb9b3b44323a2f55cb Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 24 Oct 2022 15:40:58 +0300
Subject: [PATCH 2329/4122] MAINTAINERS: Add KX022A maintainer entry

Add maintainer entry for ROHM/Kionix KX022A accelerometer sensor driver.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/7895435f7fd31a3b576fc6a59b01eb3202c85d36.1666614295.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 MAINTAINERS | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index ecec0bb6b09e3..be373dfae4dd4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11450,6 +11450,12 @@ F:	drivers/mfd/khadas-mcu.c
 F:	include/linux/mfd/khadas-mcu.h
 F:	drivers/thermal/khadas_mcu_fan.c
 
+KIONIX/ROHM KX022A ACCELEROMETER
+M:	Matti Vaittinen <mazziesaccount@gmail.com>
+L:	linux-iio@vger.kernel.org
+S:	Supported
+F:	drivers/iio/accel/kionix-kx022a*
+
 KMEMLEAK
 M:	Catalin Marinas <catalin.marinas@arm.com>
 S:	Maintained
-- 
GitLab


From 6abcb19e0a6a371b5dafe4473ea886a6891a5cd5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 4 Nov 2022 09:31:48 +0000
Subject: [PATCH 2330/4122] iio: adc: ad4130: Fix spelling mistake
 "diffreential" -> "differential"

There is a spelling mistake in an error message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221104093148.167765-1-colin.i.king@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad4130.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
index 9a4d0043d797e..ae1a4dd5c40ad 100644
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1480,7 +1480,7 @@ static int ad4130_validate_diff_channel(struct ad4130_state *st, u32 pin)
 
 	if (pin >= AD4130_MAX_DIFF_INPUTS)
 		return dev_err_probe(dev, -EINVAL,
-				     "Invalid diffreential channel %u\n", pin);
+				     "Invalid differential channel %u\n", pin);
 
 	if (pin >= AD4130_MAX_ANALOG_PINS)
 		return 0;
-- 
GitLab


From 2cc64a23c4e26107887af23a62f8ba3c79ff7ab5 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:12:04 +0300
Subject: [PATCH 2331/4122] iio: Add IIO_STATIC_CONST_DEVICE_ATTR

Add IIO_STATIC_CONST_DEVICE_ATTR macro for creating an read-only
iio_dev_attr which returns constant value. This macro is intended to be
used when replacing the IIO_CONST_ATTR - attributes for triggered
buffers because the triggered buffer attributes must be of type
iio_dev_attr.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/8dd853dd0ef8eb40cb980cc6f6e7a43166de3afb.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/sysfs.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/include/linux/iio/sysfs.h b/include/linux/iio/sysfs.h
index e51fba66de4b2..de5bb125815c5 100644
--- a/include/linux/iio/sysfs.h
+++ b/include/linux/iio/sysfs.h
@@ -97,6 +97,17 @@ struct iio_const_attr {
 	= { .string = _string,						\
 	    .dev_attr = __ATTR(_name, S_IRUGO, iio_read_const_attr, NULL)}
 
+#define IIO_STATIC_CONST_DEVICE_ATTR(_name, _string)				\
+	static ssize_t iio_const_dev_attr_show_##_name(			\
+					struct device *dev,		\
+					struct device_attribute *attr,	\
+					char *buf)			\
+	{								\
+		return sysfs_emit(buf, "%s\n", _string);		\
+	}								\
+	static IIO_DEVICE_ATTR(_name, 0444,				\
+			       iio_const_dev_attr_show_##_name, NULL, 0)
+
 /* Generic attributes of onetype or another */
 
 /**
-- 
GitLab


From f0ab171b80d49a2a47b88f79de38ca64c7d641d4 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:12:25 +0300
Subject: [PATCH 2332/4122] iio: adxl367: Use IIO_STATIC_CONST_DEVICE_ATTR()

Slightly simplify by dropping open-coded constant data iio_dev_attr
functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/c96c55363b8f36017ef7f18fdfe810cd3990e2a4.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl367.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index d7af3a006a444..d8e98f65f25df 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -1183,22 +1183,9 @@ static ssize_t adxl367_get_fifo_watermark(struct device *dev,
 	return sysfs_emit(buf, "%d\n", fifo_watermark);
 }
 
-static ssize_t hwfifo_watermark_min_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", "1");
-}
-
-static ssize_t hwfifo_watermark_max_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", __stringify(ADXL367_FIFO_MAX_WATERMARK));
-}
-
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1");
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max,
+			     __stringify(ADXL367_FIFO_MAX_WATERMARK));
 static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 		       adxl367_get_fifo_watermark, NULL, 0);
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
-- 
GitLab


From f7e6804509eeb27accc6625150db3f02d0b93ad7 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:12:48 +0300
Subject: [PATCH 2333/4122] iio: adxl372: Use IIO_STATIC_CONST_DEVICE_ATTR()

Slightly simplify by dropping open-coded constant data iio_dev_attr
functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/3632af8849ef101c54ec1f739596f22c773b067c.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl372.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index bc53af809d5de..90e1d726b9c59 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -998,22 +998,9 @@ static ssize_t adxl372_get_fifo_watermark(struct device *dev,
 	return sprintf(buf, "%d\n", st->watermark);
 }
 
-static ssize_t hwfifo_watermark_min_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", "1");
-}
-
-static ssize_t hwfifo_watermark_max_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", __stringify(ADXL372_FIFO_SIZE));
-}
-
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1");
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max,
+			     __stringify(ADXL372_FIFO_SIZE));
 static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 		       adxl372_get_fifo_watermark, NULL, 0);
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
-- 
GitLab


From c02b2a5166a58823e8a335c486ee71ce4f22f2b7 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:13:10 +0300
Subject: [PATCH 2334/4122] iio: bmc150-accel-core: Use
 IIO_STATIC_CONST_DEVICE_ATTR()

Slightly simplify by dropping open-coded constant data iio_dev_attr
functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Link: https://lore.kernel.org/r/b1b0d662705b43d68594639fb7719bc431101f79.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bmc150-accel-core.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index 92f8b139acce9..b4a0779448967 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -925,22 +925,9 @@ static const struct iio_chan_spec_ext_info bmc150_accel_ext_info[] = {
 	{ }
 };
 
-static ssize_t hwfifo_watermark_min_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", "1");
-}
-
-static ssize_t hwfifo_watermark_max_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", __stringify(BMC150_ACCEL_FIFO_LENGTH));
-}
-
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "1");
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max,
+			     __stringify(BMC150_ACCEL_FIFO_LENGTH));
 static IIO_DEVICE_ATTR(hwfifo_enabled, S_IRUGO,
 		       bmc150_accel_get_fifo_state, NULL, 0);
 static IIO_DEVICE_ATTR(hwfifo_watermark, S_IRUGO,
-- 
GitLab


From c1531e3ad43f5e66cd04ae19644343b9222204d2 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:13:31 +0300
Subject: [PATCH 2335/4122] iio: at91-sama5d2_adc: Use
 IIO_STATIC_CONST_DEVICE_ATTR()

Slightly simplify by dropping open-coded constant data iio_dev_attr
functions and using the IIO_STATIC_CONST_DEVICE_ATTR() instead.

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Tested-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/4476a4ce852febb3eb863878e66751c787195b18.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/at91-sama5d2_adc.c | 19 +++----------------
 1 file changed, 3 insertions(+), 16 deletions(-)

diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index 33e2515522149..aa2e1640c4a3c 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -2193,26 +2193,13 @@ static ssize_t at91_adc_get_watermark(struct device *dev,
 	return scnprintf(buf, PAGE_SIZE, "%d\n", st->dma_st.watermark);
 }
 
-static ssize_t hwfifo_watermark_min_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", "2");
-}
-
-static ssize_t hwfifo_watermark_max_show(struct device *dev,
-					 struct device_attribute *attr,
-					 char *buf)
-{
-	return sysfs_emit(buf, "%s\n", AT91_HWFIFO_MAX_SIZE_STR);
-}
-
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
 		       at91_adc_get_fifo_state, NULL, 0);
 static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 		       at91_adc_get_watermark, NULL, 0);
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_min, 0);
-static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
+
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "2");
+IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, AT91_HWFIFO_MAX_SIZE_STR);
 
 static const struct attribute *at91_adc_fifo_attributes[] = {
 	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-- 
GitLab


From 0a33755c4b01ed62a6d025cb585928304f9653d7 Mon Sep 17 00:00:00 2001
From: Matti Vaittinen <mazziesaccount@gmail.com>
Date: Mon, 3 Oct 2022 11:13:53 +0300
Subject: [PATCH 2336/4122] iio: Don't silently expect attribute types

The iio_triggered_buffer_setup_ext() and the
devm_iio_kfifo_buffer_setup_ext() were changed by
commit 15097c7a1adc ("iio: buffer: wrap all buffer attributes into iio_dev_attr")
to silently expect that all attributes given in buffer_attrs array are
device-attributes. This expectation was not forced by the API - and some
drivers did register attributes created by IIO_CONST_ATTR().

When using IIO_CONST_ATTRs the added attribute "wrapping" does not copy
the pointer to stored string constant and when the sysfs file is read the
kernel will access to invalid location.

Change the function signatures to expect an array of iio_dev_attrs to
avoid similar errors in the future.

Merge conflict resolved whilst applying due to patch crossing with
two new drivers (kx022a accelerometer and ad4130 ADC).

Signed-off-by: Matti Vaittinen <mazziesaccount@gmail.com>
Tested-by: Claudiu Beznea <claudiu.beznea@microchip.com>
Link: https://lore.kernel.org/r/63f54787a684eb1232f1c5d275a09c786987fe4a.1664782676.git.mazziesaccount@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl367.c                          | 10 +++++-----
 drivers/iio/accel/adxl372.c                          | 10 +++++-----
 drivers/iio/accel/bmc150-accel-core.c                | 12 ++++++------
 drivers/iio/accel/kionix-kx022a.c                    |  6 +++---
 drivers/iio/adc/ad4130.c                             | 10 +++++-----
 drivers/iio/adc/at91-sama5d2_adc.c                   | 12 ++++++------
 drivers/iio/buffer/industrialio-buffer-dmaengine.c   |  4 ++--
 drivers/iio/buffer/industrialio-triggered-buffer.c   |  4 ++--
 drivers/iio/buffer/kfifo_buf.c                       |  2 +-
 .../common/cros_ec_sensors/cros_ec_sensors_core.c    |  6 +++---
 drivers/iio/common/hid-sensors/hid-sensor-trigger.c  |  8 ++++----
 drivers/iio/industrialio-buffer.c                    | 11 +++++++----
 include/linux/iio/buffer_impl.h                      |  2 +-
 include/linux/iio/kfifo_buf.h                        |  3 ++-
 include/linux/iio/triggered_buffer.h                 |  6 +++---
 15 files changed, 55 insertions(+), 51 deletions(-)

diff --git a/drivers/iio/accel/adxl367.c b/drivers/iio/accel/adxl367.c
index d8e98f65f25df..90b7ae6d42b77 100644
--- a/drivers/iio/accel/adxl367.c
+++ b/drivers/iio/accel/adxl367.c
@@ -1191,11 +1191,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
 		       adxl367_get_fifo_enabled, NULL, 0);
 
-static const struct attribute *adxl367_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *adxl367_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min,
+	&iio_dev_attr_hwfifo_watermark_max,
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL,
 };
 
diff --git a/drivers/iio/accel/adxl372.c b/drivers/iio/accel/adxl372.c
index 90e1d726b9c59..c4193286eb053 100644
--- a/drivers/iio/accel/adxl372.c
+++ b/drivers/iio/accel/adxl372.c
@@ -1006,11 +1006,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
 		       adxl372_get_fifo_enabled, NULL, 0);
 
-static const struct attribute *adxl372_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *adxl372_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min,
+	&iio_dev_attr_hwfifo_watermark_max,
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL,
 };
 
diff --git a/drivers/iio/accel/bmc150-accel-core.c b/drivers/iio/accel/bmc150-accel-core.c
index b4a0779448967..110591804b4c4 100644
--- a/drivers/iio/accel/bmc150-accel-core.c
+++ b/drivers/iio/accel/bmc150-accel-core.c
@@ -933,11 +933,11 @@ static IIO_DEVICE_ATTR(hwfifo_enabled, S_IRUGO,
 static IIO_DEVICE_ATTR(hwfifo_watermark, S_IRUGO,
 		       bmc150_accel_get_fifo_watermark, NULL, 0);
 
-static const struct attribute *bmc150_accel_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *bmc150_accel_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min,
+	&iio_dev_attr_hwfifo_watermark_max,
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL,
 };
 
@@ -1665,7 +1665,7 @@ int bmc150_accel_core_probe(struct device *dev, struct regmap *regmap, int irq,
 			    enum bmc150_type type, const char *name,
 			    bool block_supported)
 {
-	const struct attribute **fifo_attrs;
+	const struct iio_dev_attr **fifo_attrs;
 	struct bmc150_accel_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
diff --git a/drivers/iio/accel/kionix-kx022a.c b/drivers/iio/accel/kionix-kx022a.c
index 50de7f9a1cc7f..f866859855cdd 100644
--- a/drivers/iio/accel/kionix-kx022a.c
+++ b/drivers/iio/accel/kionix-kx022a.c
@@ -575,9 +575,9 @@ static ssize_t hwfifo_watermark_show(struct device *dev,
 static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0);
 static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0);
 
-static const struct attribute *kx022a_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *kx022a_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL
 };
 
diff --git a/drivers/iio/adc/ad4130.c b/drivers/iio/adc/ad4130.c
index ae1a4dd5c40ad..38394341fd6e7 100644
--- a/drivers/iio/adc/ad4130.c
+++ b/drivers/iio/adc/ad4130.c
@@ -1380,11 +1380,11 @@ static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
 static IIO_DEVICE_ATTR_RO(hwfifo_watermark, 0);
 static IIO_DEVICE_ATTR_RO(hwfifo_enabled, 0);
 
-static const struct attribute *ad4130_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *ad4130_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min,
+	&iio_dev_attr_hwfifo_watermark_max,
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL
 };
 
diff --git a/drivers/iio/adc/at91-sama5d2_adc.c b/drivers/iio/adc/at91-sama5d2_adc.c
index aa2e1640c4a3c..a00b8316a12f2 100644
--- a/drivers/iio/adc/at91-sama5d2_adc.c
+++ b/drivers/iio/adc/at91-sama5d2_adc.c
@@ -2201,11 +2201,11 @@ static IIO_DEVICE_ATTR(hwfifo_watermark, 0444,
 IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_min, "2");
 IIO_STATIC_CONST_DEVICE_ATTR(hwfifo_watermark_max, AT91_HWFIFO_MAX_SIZE_STR);
 
-static const struct attribute *at91_adc_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_watermark_min.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *at91_adc_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_watermark_min,
+	&iio_dev_attr_hwfifo_watermark_max,
+	&iio_dev_attr_hwfifo_watermark,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL,
 };
 
@@ -2222,7 +2222,7 @@ static int at91_adc_buffer_and_trigger_init(struct device *dev,
 					    struct iio_dev *indio)
 {
 	struct at91_adc_state *st = iio_priv(indio);
-	const struct attribute **fifo_attrs;
+	const struct iio_dev_attr **fifo_attrs;
 	int ret;
 
 	if (st->selected_trig->hw_trig)
diff --git a/drivers/iio/buffer/industrialio-buffer-dmaengine.c b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
index f744b62a636ad..5f85ba38e6f6e 100644
--- a/drivers/iio/buffer/industrialio-buffer-dmaengine.c
+++ b/drivers/iio/buffer/industrialio-buffer-dmaengine.c
@@ -142,8 +142,8 @@ static ssize_t iio_dmaengine_buffer_get_length_align(struct device *dev,
 static IIO_DEVICE_ATTR(length_align_bytes, 0444,
 		       iio_dmaengine_buffer_get_length_align, NULL, 0);
 
-static const struct attribute *iio_dmaengine_buffer_attrs[] = {
-	&iio_dev_attr_length_align_bytes.dev_attr.attr,
+static const struct iio_dev_attr *iio_dmaengine_buffer_attrs[] = {
+	&iio_dev_attr_length_align_bytes,
 	NULL,
 };
 
diff --git a/drivers/iio/buffer/industrialio-triggered-buffer.c b/drivers/iio/buffer/industrialio-triggered-buffer.c
index 8d4fc97d10059..c7671b1f5eada 100644
--- a/drivers/iio/buffer/industrialio-triggered-buffer.c
+++ b/drivers/iio/buffer/industrialio-triggered-buffer.c
@@ -41,7 +41,7 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
 	irqreturn_t (*thread)(int irq, void *p),
 	enum iio_buffer_direction direction,
 	const struct iio_buffer_setup_ops *setup_ops,
-	const struct attribute **buffer_attrs)
+	const struct iio_dev_attr **buffer_attrs)
 {
 	struct iio_buffer *buffer;
 	int ret;
@@ -110,7 +110,7 @@ int devm_iio_triggered_buffer_setup_ext(struct device *dev,
 					irqreturn_t (*thread)(int irq, void *p),
 					enum iio_buffer_direction direction,
 					const struct iio_buffer_setup_ops *ops,
-					const struct attribute **buffer_attrs)
+					const struct iio_dev_attr **buffer_attrs)
 {
 	int ret;
 
diff --git a/drivers/iio/buffer/kfifo_buf.c b/drivers/iio/buffer/kfifo_buf.c
index 35d8b4077376c..05b285f0eb225 100644
--- a/drivers/iio/buffer/kfifo_buf.c
+++ b/drivers/iio/buffer/kfifo_buf.c
@@ -270,7 +270,7 @@ static struct iio_buffer *devm_iio_kfifo_allocate(struct device *dev)
 int devm_iio_kfifo_buffer_setup_ext(struct device *dev,
 				    struct iio_dev *indio_dev,
 				    const struct iio_buffer_setup_ops *setup_ops,
-				    const struct attribute **buffer_attrs)
+				    const struct iio_dev_attr **buffer_attrs)
 {
 	struct iio_buffer *buffer;
 
diff --git a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
index 05a28d353e343..943e9e14d1e99 100644
--- a/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
+++ b/drivers/iio/common/cros_ec_sensors/cros_ec_sensors_core.c
@@ -172,9 +172,9 @@ static ssize_t hwfifo_watermark_max_show(struct device *dev,
 
 static IIO_DEVICE_ATTR_RO(hwfifo_watermark_max, 0);
 
-static const struct attribute *cros_ec_sensor_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_timeout.dev_attr.attr,
-	&iio_dev_attr_hwfifo_watermark_max.dev_attr.attr,
+static const struct iio_dev_attr *cros_ec_sensor_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_timeout,
+	&iio_dev_attr_hwfifo_watermark_max,
 	NULL,
 };
 
diff --git a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
index 1151434038d40..ad8910e6ad59d 100644
--- a/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
+++ b/drivers/iio/common/hid-sensors/hid-sensor-trigger.c
@@ -75,9 +75,9 @@ static IIO_DEVICE_ATTR(hwfifo_timeout, 0644,
 static IIO_DEVICE_ATTR(hwfifo_enabled, 0444,
 		       _hid_sensor_get_fifo_state, NULL, 0);
 
-static const struct attribute *hid_sensor_fifo_attributes[] = {
-	&iio_dev_attr_hwfifo_timeout.dev_attr.attr,
-	&iio_dev_attr_hwfifo_enabled.dev_attr.attr,
+static const struct iio_dev_attr *hid_sensor_fifo_attributes[] = {
+	&iio_dev_attr_hwfifo_timeout,
+	&iio_dev_attr_hwfifo_enabled,
 	NULL,
 };
 
@@ -231,7 +231,7 @@ static const struct iio_trigger_ops hid_sensor_trigger_ops = {
 int hid_sensor_setup_trigger(struct iio_dev *indio_dev, const char *name,
 				struct hid_sensor_common *attrb)
 {
-	const struct attribute **fifo_attrs;
+	const struct iio_dev_attr **fifo_attrs;
 	int ret;
 	struct iio_trigger *trig;
 
diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c
index 9cd7db549fcbc..80c78bd6bbef4 100644
--- a/drivers/iio/industrialio-buffer.c
+++ b/drivers/iio/industrialio-buffer.c
@@ -1605,6 +1605,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
 {
 	struct iio_dev_opaque *iio_dev_opaque = to_iio_dev_opaque(indio_dev);
 	struct iio_dev_attr *p;
+	const struct iio_dev_attr *id_attr;
 	struct attribute **attr;
 	int ret, i, attrn, scan_el_attrcount, buffer_attrcount;
 	const struct iio_chan_spec *channels;
@@ -1614,6 +1615,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
 		while (buffer->attrs[buffer_attrcount] != NULL)
 			buffer_attrcount++;
 	}
+	buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs);
 
 	scan_el_attrcount = 0;
 	INIT_LIST_HEAD(&buffer->buffer_attr_list);
@@ -1656,7 +1658,7 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
 		}
 	}
 
-	attrn = buffer_attrcount + scan_el_attrcount + ARRAY_SIZE(iio_buffer_attrs);
+	attrn = buffer_attrcount + scan_el_attrcount;
 	attr = kcalloc(attrn + 1, sizeof(*attr), GFP_KERNEL);
 	if (!attr) {
 		ret = -ENOMEM;
@@ -1671,10 +1673,11 @@ static int __iio_buffer_alloc_sysfs_and_mask(struct iio_buffer *buffer,
 		attr[2] = &dev_attr_watermark_ro.attr;
 
 	if (buffer->attrs)
-		memcpy(&attr[ARRAY_SIZE(iio_buffer_attrs)], buffer->attrs,
-		       sizeof(struct attribute *) * buffer_attrcount);
+		for (i = 0, id_attr = buffer->attrs[i];
+		     (id_attr = buffer->attrs[i]); i++)
+			attr[ARRAY_SIZE(iio_buffer_attrs) + i] =
+				(struct attribute *)&id_attr->dev_attr.attr;
 
-	buffer_attrcount += ARRAY_SIZE(iio_buffer_attrs);
 	buffer->buffer_group.attrs = attr;
 
 	for (i = 0; i < buffer_attrcount; i++) {
diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h
index e2ca8ea23e191..89c3fd7c29caf 100644
--- a/include/linux/iio/buffer_impl.h
+++ b/include/linux/iio/buffer_impl.h
@@ -123,7 +123,7 @@ struct iio_buffer {
 	struct attribute_group buffer_group;
 
 	/* @attrs: Standard attributes of the buffer. */
-	const struct attribute **attrs;
+	const struct iio_dev_attr **attrs;
 
 	/* @demux_bounce: Buffer for doing gather from incoming scan. */
 	void *demux_bounce;
diff --git a/include/linux/iio/kfifo_buf.h b/include/linux/iio/kfifo_buf.h
index 8a83fb58232d5..22874da0c8be9 100644
--- a/include/linux/iio/kfifo_buf.h
+++ b/include/linux/iio/kfifo_buf.h
@@ -5,6 +5,7 @@
 struct iio_buffer;
 struct iio_buffer_setup_ops;
 struct iio_dev;
+struct iio_dev_attr;
 struct device;
 
 struct iio_buffer *iio_kfifo_allocate(void);
@@ -13,7 +14,7 @@ void iio_kfifo_free(struct iio_buffer *r);
 int devm_iio_kfifo_buffer_setup_ext(struct device *dev,
 				    struct iio_dev *indio_dev,
 				    const struct iio_buffer_setup_ops *setup_ops,
-				    const struct attribute **buffer_attrs);
+				    const struct iio_dev_attr **buffer_attrs);
 
 #define devm_iio_kfifo_buffer_setup(dev, indio_dev, setup_ops)	\
 	devm_iio_kfifo_buffer_setup_ext((dev), (indio_dev), (setup_ops), NULL)
diff --git a/include/linux/iio/triggered_buffer.h b/include/linux/iio/triggered_buffer.h
index 7490b05fc5b2f..29e1fe146879a 100644
--- a/include/linux/iio/triggered_buffer.h
+++ b/include/linux/iio/triggered_buffer.h
@@ -5,8 +5,8 @@
 #include <linux/iio/buffer.h>
 #include <linux/interrupt.h>
 
-struct attribute;
 struct iio_dev;
+struct iio_dev_attr;
 struct iio_buffer_setup_ops;
 
 int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
@@ -14,7 +14,7 @@ int iio_triggered_buffer_setup_ext(struct iio_dev *indio_dev,
 	irqreturn_t (*thread)(int irq, void *p),
 	enum iio_buffer_direction direction,
 	const struct iio_buffer_setup_ops *setup_ops,
-	const struct attribute **buffer_attrs);
+	const struct iio_dev_attr **buffer_attrs);
 void iio_triggered_buffer_cleanup(struct iio_dev *indio_dev);
 
 #define iio_triggered_buffer_setup(indio_dev, h, thread, setup_ops)		\
@@ -28,7 +28,7 @@ int devm_iio_triggered_buffer_setup_ext(struct device *dev,
 					irqreturn_t (*thread)(int irq, void *p),
 					enum iio_buffer_direction direction,
 					const struct iio_buffer_setup_ops *ops,
-					const struct attribute **buffer_attrs);
+					const struct iio_dev_attr **buffer_attrs);
 
 #define devm_iio_triggered_buffer_setup(dev, indio_dev, h, thread, setup_ops)	\
 	devm_iio_triggered_buffer_setup_ext((dev), (indio_dev), (h), (thread),	\
-- 
GitLab


From 26bfb581931a020eaf560612f01009b3e179db0a Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 18:09:46 +0100
Subject: [PATCH 2337/4122] iio: adc: cc10001: Add local struct device *dev
 variable to avoid repitition
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are lots of uses of this in probe() and we are about to introduce
some more, so add a local variable to simplify this.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016170950.387751-2-jic23@kernel.org
---
 drivers/iio/adc/cc10001_adc.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index e16ac935693b6..eeaea1362ed12 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -307,14 +307,15 @@ static int cc10001_adc_channel_init(struct iio_dev *indio_dev,
 
 static int cc10001_adc_probe(struct platform_device *pdev)
 {
-	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
 	struct cc10001_adc_device *adc_dev;
 	unsigned long adc_clk_rate;
 	struct iio_dev *indio_dev;
 	unsigned long channel_map;
 	int ret;
 
-	indio_dev = devm_iio_device_alloc(&pdev->dev, sizeof(*adc_dev));
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*adc_dev));
 	if (indio_dev == NULL)
 		return -ENOMEM;
 
@@ -326,7 +327,7 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 		channel_map &= ~ret;
 	}
 
-	adc_dev->reg = devm_regulator_get(&pdev->dev, "vref");
+	adc_dev->reg = devm_regulator_get(dev, "vref");
 	if (IS_ERR(adc_dev->reg))
 		return PTR_ERR(adc_dev->reg);
 
@@ -334,7 +335,7 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	indio_dev->name = dev_name(&pdev->dev);
+	indio_dev->name = dev_name(dev);
 	indio_dev->info = &cc10001_adc_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
@@ -344,23 +345,23 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 		goto err_disable_reg;
 	}
 
-	adc_dev->adc_clk = devm_clk_get(&pdev->dev, "adc");
+	adc_dev->adc_clk = devm_clk_get(dev, "adc");
 	if (IS_ERR(adc_dev->adc_clk)) {
-		dev_err(&pdev->dev, "failed to get the clock\n");
+		dev_err(dev, "failed to get the clock\n");
 		ret = PTR_ERR(adc_dev->adc_clk);
 		goto err_disable_reg;
 	}
 
 	ret = clk_prepare_enable(adc_dev->adc_clk);
 	if (ret) {
-		dev_err(&pdev->dev, "failed to enable the clock\n");
+		dev_err(dev, "failed to enable the clock\n");
 		goto err_disable_reg;
 	}
 
 	adc_clk_rate = clk_get_rate(adc_dev->adc_clk);
 	if (!adc_clk_rate) {
 		ret = -EINVAL;
-		dev_err(&pdev->dev, "null clock rate!\n");
+		dev_err(dev, "null clock rate!\n");
 		goto err_disable_clk;
 	}
 
-- 
GitLab


From dc0ba516d103532b7f289b20119374fe3797f81b Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 18:09:47 +0100
Subject: [PATCH 2338/4122] iio: adc: cc10001: Add devm_add_action_or_reset()
 to disable regulator.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As the voltage of this regulator is queried, we cannot use the
devm_regulator_get_enable() call and have to role our own disable.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016170950.387751-3-jic23@kernel.org
---
 drivers/iio/adc/cc10001_adc.c | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index eeaea1362ed12..4f42ceb40ded4 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -305,6 +305,11 @@ static int cc10001_adc_channel_init(struct iio_dev *indio_dev,
 	return 0;
 }
 
+static void cc10001_reg_disable(void *priv)
+{
+	regulator_disable(priv);
+}
+
 static int cc10001_adc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -335,27 +340,28 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	ret = devm_add_action_or_reset(dev, cc10001_reg_disable, adc_dev->reg);
+	if (ret)
+		return ret;
+
 	indio_dev->name = dev_name(dev);
 	indio_dev->info = &cc10001_adc_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 
 	adc_dev->reg_base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(adc_dev->reg_base)) {
-		ret = PTR_ERR(adc_dev->reg_base);
-		goto err_disable_reg;
-	}
+	if (IS_ERR(adc_dev->reg_base))
+		return PTR_ERR(adc_dev->reg_base);
 
 	adc_dev->adc_clk = devm_clk_get(dev, "adc");
 	if (IS_ERR(adc_dev->adc_clk)) {
 		dev_err(dev, "failed to get the clock\n");
-		ret = PTR_ERR(adc_dev->adc_clk);
-		goto err_disable_reg;
+		return PTR_ERR(adc_dev->adc_clk);
 	}
 
 	ret = clk_prepare_enable(adc_dev->adc_clk);
 	if (ret) {
 		dev_err(dev, "failed to enable the clock\n");
-		goto err_disable_reg;
+		return ret;
 	}
 
 	adc_clk_rate = clk_get_rate(adc_dev->adc_clk);
@@ -400,8 +406,6 @@ err_cleanup_buffer:
 	iio_triggered_buffer_cleanup(indio_dev);
 err_disable_clk:
 	clk_disable_unprepare(adc_dev->adc_clk);
-err_disable_reg:
-	regulator_disable(adc_dev->reg);
 	return ret;
 }
 
@@ -414,7 +418,6 @@ static int cc10001_adc_remove(struct platform_device *pdev)
 	iio_device_unregister(indio_dev);
 	iio_triggered_buffer_cleanup(indio_dev);
 	clk_disable_unprepare(adc_dev->adc_clk);
-	regulator_disable(adc_dev->reg);
 
 	return 0;
 }
-- 
GitLab


From c247e0d8c0d50793f459d2a7997d2f8f2105c973 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 18:09:48 +0100
Subject: [PATCH 2339/4122] iio: adc: cc10001: Use devm_clk_get_enabled() to
 avoid boilerplate.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As this driver just enables clock in probe() and disables in remove()
we can use this new function to replace boilerplate and simplify
error paths.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016170950.387751-4-jic23@kernel.org
---
 drivers/iio/adc/cc10001_adc.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index 4f42ceb40ded4..1a3a5e0a52f79 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -352,23 +352,16 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 	if (IS_ERR(adc_dev->reg_base))
 		return PTR_ERR(adc_dev->reg_base);
 
-	adc_dev->adc_clk = devm_clk_get(dev, "adc");
+	adc_dev->adc_clk = devm_clk_get_enabled(dev, "adc");
 	if (IS_ERR(adc_dev->adc_clk)) {
-		dev_err(dev, "failed to get the clock\n");
+		dev_err(dev, "failed to get/enable the clock\n");
 		return PTR_ERR(adc_dev->adc_clk);
 	}
 
-	ret = clk_prepare_enable(adc_dev->adc_clk);
-	if (ret) {
-		dev_err(dev, "failed to enable the clock\n");
-		return ret;
-	}
-
 	adc_clk_rate = clk_get_rate(adc_dev->adc_clk);
 	if (!adc_clk_rate) {
-		ret = -EINVAL;
 		dev_err(dev, "null clock rate!\n");
-		goto err_disable_clk;
+		return -EINVAL;
 	}
 
 	adc_dev->eoc_delay_ns = NSEC_PER_SEC / adc_clk_rate;
@@ -385,14 +378,14 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 	/* Setup the ADC channels available on the device */
 	ret = cc10001_adc_channel_init(indio_dev, channel_map);
 	if (ret < 0)
-		goto err_disable_clk;
+		return ret;
 
 	mutex_init(&adc_dev->lock);
 
 	ret = iio_triggered_buffer_setup(indio_dev, NULL,
 					 &cc10001_adc_trigger_h, NULL);
 	if (ret < 0)
-		goto err_disable_clk;
+		return ret;
 
 	ret = iio_device_register(indio_dev);
 	if (ret < 0)
@@ -404,8 +397,6 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 
 err_cleanup_buffer:
 	iio_triggered_buffer_cleanup(indio_dev);
-err_disable_clk:
-	clk_disable_unprepare(adc_dev->adc_clk);
 	return ret;
 }
 
@@ -417,7 +408,6 @@ static int cc10001_adc_remove(struct platform_device *pdev)
 	cc10001_adc_power_down(adc_dev);
 	iio_device_unregister(indio_dev);
 	iio_triggered_buffer_cleanup(indio_dev);
-	clk_disable_unprepare(adc_dev->adc_clk);
 
 	return 0;
 }
-- 
GitLab


From a43d5155b9455d38c4b3e4656131d79af61a2978 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 18:09:49 +0100
Subject: [PATCH 2340/4122] iio: adc: cc10001: Use devm_ to call device power
 down.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is presumably safe to call the powerdown whether or not we are
in the commented shared state (the driver always did this).

The power down was previously out of order wrt to the probe() function
so move using devm_ will ensure it occurs after the userspace interfaces
are removed.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016170950.387751-5-jic23@kernel.org
---
 drivers/iio/adc/cc10001_adc.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index 1a3a5e0a52f79..aecad89c00ff9 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -310,6 +310,11 @@ static void cc10001_reg_disable(void *priv)
 	regulator_disable(priv);
 }
 
+static void cc10001_pd_cb(void *priv)
+{
+	cc10001_adc_power_down(priv);
+}
+
 static int cc10001_adc_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -375,6 +380,9 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 	if (adc_dev->shared)
 		cc10001_adc_power_up(adc_dev);
 
+	ret = devm_add_action_or_reset(dev, cc10001_pd_cb, adc_dev);
+	if (ret)
+		return ret;
 	/* Setup the ADC channels available on the device */
 	ret = cc10001_adc_channel_init(indio_dev, channel_map);
 	if (ret < 0)
@@ -405,7 +413,6 @@ static int cc10001_adc_remove(struct platform_device *pdev)
 	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
 	struct cc10001_adc_device *adc_dev = iio_priv(indio_dev);
 
-	cc10001_adc_power_down(adc_dev);
 	iio_device_unregister(indio_dev);
 	iio_triggered_buffer_cleanup(indio_dev);
 
-- 
GitLab


From c5269fe908635c67c1eb4876df625efcfb156827 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sun, 16 Oct 2022 18:09:50 +0100
Subject: [PATCH 2341/4122] iio: adc: cc10001: Switch remaining IIO calls in
 probe to devm_ forms.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As everything else is now handled by devm managed releases the
triggered buffer setup and IIO device registration can also be
moved over to their devm forms allowing dropping of remove().

Only user of drvdata associated with the struct device was the
remove function, so also drop the platform_set_drvdata() call.

Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221016170950.387751-6-jic23@kernel.org
---
 drivers/iio/adc/cc10001_adc.c | 28 +++-------------------------
 1 file changed, 3 insertions(+), 25 deletions(-)

diff --git a/drivers/iio/adc/cc10001_adc.c b/drivers/iio/adc/cc10001_adc.c
index aecad89c00ff9..2cde4b44fc6e4 100644
--- a/drivers/iio/adc/cc10001_adc.c
+++ b/drivers/iio/adc/cc10001_adc.c
@@ -390,33 +390,12 @@ static int cc10001_adc_probe(struct platform_device *pdev)
 
 	mutex_init(&adc_dev->lock);
 
-	ret = iio_triggered_buffer_setup(indio_dev, NULL,
-					 &cc10001_adc_trigger_h, NULL);
+	ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL,
+					      &cc10001_adc_trigger_h, NULL);
 	if (ret < 0)
 		return ret;
 
-	ret = iio_device_register(indio_dev);
-	if (ret < 0)
-		goto err_cleanup_buffer;
-
-	platform_set_drvdata(pdev, indio_dev);
-
-	return 0;
-
-err_cleanup_buffer:
-	iio_triggered_buffer_cleanup(indio_dev);
-	return ret;
-}
-
-static int cc10001_adc_remove(struct platform_device *pdev)
-{
-	struct iio_dev *indio_dev = platform_get_drvdata(pdev);
-	struct cc10001_adc_device *adc_dev = iio_priv(indio_dev);
-
-	iio_device_unregister(indio_dev);
-	iio_triggered_buffer_cleanup(indio_dev);
-
-	return 0;
+	return devm_iio_device_register(dev, indio_dev);
 }
 
 static const struct of_device_id cc10001_adc_dt_ids[] = {
@@ -431,7 +410,6 @@ static struct platform_driver cc10001_adc_driver = {
 		.of_match_table = cc10001_adc_dt_ids,
 	},
 	.probe	= cc10001_adc_probe,
-	.remove	= cc10001_adc_remove,
 };
 module_platform_driver(cc10001_adc_driver);
 
-- 
GitLab


From 306935570f23e25bff22a3273bc6f92c8f13c910 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Oct 2022 10:34:10 -0400
Subject: [PATCH 2342/4122] dt-bindings: iio: adc: qcom,spmi-vadc: simplify
 compatible enum

The second compatible item in oneOf is just an enum, not a list.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221027143411.277980-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml  | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
index 8bac0c4120dd0..a848df37db068 100644
--- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
@@ -22,13 +22,11 @@ properties:
       - items:
           - const: qcom,pms405-adc
           - const: qcom,spmi-adc-rev2
-
-      - items:
-          - enum:
-              - qcom,spmi-vadc
-              - qcom,spmi-adc5
-              - qcom,spmi-adc-rev2
-              - qcom,spmi-adc7
+      - enum:
+          - qcom,spmi-vadc
+          - qcom,spmi-adc5
+          - qcom,spmi-adc-rev2
+          - qcom,spmi-adc7
 
   reg:
     description: VADC base address in the SPMI PMIC register map
-- 
GitLab


From ea4b79e98ae0aedc0ebbe4adbb8f73b6c34f21d7 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 27 Oct 2022 10:34:11 -0400
Subject: [PATCH 2343/4122] dt-bindings: iio: adc: qcom,spmi-vadc: extend
 example

Cleanup existing example (generic node name for spmi, use 4-space
indentation) and add example for ADCv7 copied from
Documentation/devicetree/bindings/thermal/qcom-spmi-adc-tm5.yaml.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221027143411.277980-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/adc/qcom,spmi-vadc.yaml      | 94 ++++++++++++-------
 1 file changed, 62 insertions(+), 32 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
index a848df37db068..f1522196042d8 100644
--- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-vadc.yaml
@@ -236,42 +236,72 @@ additionalProperties: false
 
 examples:
   - |
-    spmi_bus {
-      #address-cells = <1>;
-      #size-cells = <0>;
-      /* VADC node */
-      pmic_vadc: adc@3100 {
-        compatible = "qcom,spmi-vadc";
-        reg = <0x3100>;
-        interrupts = <0x0 0x31 0x0 0x1>;
+    spmi {
         #address-cells = <1>;
         #size-cells = <0>;
-        #io-channel-cells = <1>;
-
-        /* Channel node */
-        adc-chan@39 {
-          reg = <0x39>;
-          qcom,decimation = <512>;
-          qcom,ratiometric;
-          qcom,hw-settle-time = <200>;
-          qcom,avg-samples = <1>;
-          qcom,pre-scaling = <1 3>;
-        };
-
-        adc-chan@9 {
-          reg = <0x9>;
-        };
-
-        adc-chan@a {
-          reg = <0xa>;
+        /* VADC node */
+        pmic_vadc: adc@3100 {
+            compatible = "qcom,spmi-vadc";
+            reg = <0x3100>;
+            interrupts = <0x0 0x31 0x0 0x1>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+            #io-channel-cells = <1>;
+
+            /* Channel node */
+            adc-chan@39 {
+                reg = <0x39>;
+                qcom,decimation = <512>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+                qcom,avg-samples = <1>;
+                qcom,pre-scaling = <1 3>;
+            };
+
+            adc-chan@9 {
+                reg = <0x9>;
+            };
+
+            adc-chan@a {
+                reg = <0xa>;
+            };
+
+            adc-chan@e {
+                reg = <0xe>;
+            };
+
+            adc-chan@f {
+                reg = <0xf>;
+            };
         };
+    };
 
-        adc-chan@e {
-          reg = <0xe>;
-        };
+  - |
+    #include <dt-bindings/iio/qcom,spmi-adc7-pmk8350.h>
+    #include <dt-bindings/iio/qcom,spmi-adc7-pm8350.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
 
-        adc-chan@f {
-          reg = <0xf>;
+    spmi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        adc@3100 {
+            reg = <0x3100>;
+            compatible = "qcom,spmi-adc7";
+            #address-cells = <1>;
+            #size-cells = <0>;
+            #io-channel-cells = <1>;
+
+            /* Other properties are omitted */
+            xo-therm@44 {
+                reg = <PMK8350_ADC7_AMUX_THM1_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
+
+            conn-therm@47 {
+                reg = <PM8350_ADC7_AMUX_THM4_100K_PU>;
+                qcom,ratiometric;
+                qcom,hw-settle-time = <200>;
+            };
         };
-      };
     };
-- 
GitLab


From 2cfb2180c3e8002719234c43b88b040e4f89396f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 28 Oct 2022 13:23:42 +0200
Subject: [PATCH 2344/4122] iio: imu: st_lsm6dsx: introduce sw trigger support

There are some hw configuration where irq0 and/or irq1 pins are not
connected to the SPI or I2C/I3C controller. In order to avoid polling
the output register introduce iio-sw trigger support when irq line is
not available (or hw FIFO is not supported).

Suggested-by: Mario Tesi <mario.tesi@st.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/93ae6ff1150b531a9d7a4d3d1b1adb8383613717.1666955685.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h      |  3 +-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 78 ++++++++++++++++++++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c |  4 +-
 3 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index 07ad8027de738..6399b0bb6f674 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -424,7 +424,7 @@ struct st_lsm6dsx_hw {
 	struct {
 		__le16 channels[3];
 		s64 ts __aligned(8);
-	} scan[3];
+	} scan[ST_LSM6DSX_ID_MAX];
 };
 
 static __maybe_unused const struct iio_event_spec st_lsm6dsx_event = {
@@ -456,6 +456,7 @@ int st_lsm6dsx_read_tagged_fifo(struct st_lsm6dsx_hw *hw);
 int st_lsm6dsx_check_odr(struct st_lsm6dsx_sensor *sensor, u32 odr, u8 *val);
 int st_lsm6dsx_shub_probe(struct st_lsm6dsx_hw *hw, const char *name);
 int st_lsm6dsx_shub_set_enable(struct st_lsm6dsx_sensor *sensor, bool enable);
+int st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data, int len);
 int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable);
 
 static inline int
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index fe5fa08b68acb..73fd5f038375d 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -53,6 +53,8 @@
 #include <linux/iio/events.h>
 #include <linux/iio/iio.h>
 #include <linux/iio/sysfs.h>
+#include <linux/iio/triggered_buffer.h>
+#include <linux/iio/trigger_consumer.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/minmax.h>
@@ -2117,6 +2119,32 @@ static irqreturn_t st_lsm6dsx_handler_thread(int irq, void *private)
 	return fifo_len || event ? IRQ_HANDLED : IRQ_NONE;
 }
 
+static irqreturn_t st_lsm6dsx_sw_trigger_handler_thread(int irq,
+							void *private)
+{
+	struct iio_poll_func *pf = private;
+	struct iio_dev *iio_dev = pf->indio_dev;
+	struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev);
+	struct st_lsm6dsx_hw *hw = sensor->hw;
+
+	if (sensor->id == ST_LSM6DSX_ID_EXT0 ||
+	    sensor->id == ST_LSM6DSX_ID_EXT1 ||
+	    sensor->id == ST_LSM6DSX_ID_EXT2)
+		st_lsm6dsx_shub_read_output(hw,
+					    (u8 *)hw->scan[sensor->id].channels,
+					    sizeof(hw->scan[sensor->id].channels));
+	else
+		st_lsm6dsx_read_locked(hw, iio_dev->channels[0].address,
+				       hw->scan[sensor->id].channels,
+				       sizeof(hw->scan[sensor->id].channels));
+
+	iio_push_to_buffers_with_timestamp(iio_dev, &hw->scan[sensor->id],
+					   iio_get_time_ns(iio_dev));
+	iio_trigger_notify_done(iio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
 static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw)
 {
 	struct st_sensors_platform_data *pdata;
@@ -2175,6 +2203,46 @@ static int st_lsm6dsx_irq_setup(struct st_lsm6dsx_hw *hw)
 	return 0;
 }
 
+static int st_lsm6dsx_sw_buffer_preenable(struct iio_dev *iio_dev)
+{
+	struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev);
+
+	return st_lsm6dsx_device_set_enable(sensor, true);
+}
+
+static int st_lsm6dsx_sw_buffer_postdisable(struct iio_dev *iio_dev)
+{
+	struct st_lsm6dsx_sensor *sensor = iio_priv(iio_dev);
+
+	return st_lsm6dsx_device_set_enable(sensor, false);
+}
+
+static const struct iio_buffer_setup_ops st_lsm6dsx_sw_buffer_ops = {
+	.preenable = st_lsm6dsx_sw_buffer_preenable,
+	.postdisable = st_lsm6dsx_sw_buffer_postdisable,
+};
+
+static int st_lsm6dsx_sw_buffers_setup(struct st_lsm6dsx_hw *hw)
+{
+	int i;
+
+	for (i = 0; i < ST_LSM6DSX_ID_MAX; i++) {
+		int err;
+
+		if (!hw->iio_devs[i])
+			continue;
+
+		err = devm_iio_triggered_buffer_setup(hw->dev,
+					hw->iio_devs[i], NULL,
+					st_lsm6dsx_sw_trigger_handler_thread,
+					&st_lsm6dsx_sw_buffer_ops);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int st_lsm6dsx_init_regulators(struct device *dev)
 {
 	/* vdd-vddio power regulators */
@@ -2255,6 +2323,16 @@ int st_lsm6dsx_probe(struct device *dev, int irq, int hw_id,
 			return err;
 	}
 
+	if (!hw->irq || !hw->settings->fifo_ops.read_fifo) {
+		/*
+		 * Rely on sw triggers (e.g. hr-timers) if irq pin is not
+		 * connected of if the device does not support HW FIFO
+		 */
+		err = st_lsm6dsx_sw_buffers_setup(hw);
+		if (err)
+			return err;
+	}
+
 	err = iio_read_mount_matrix(hw->dev, &hw->orientation);
 	if (err)
 		return err;
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
index 99562ba85ee43..f2b64b4956a33 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_shub.c
@@ -170,9 +170,7 @@ static void st_lsm6dsx_shub_wait_complete(struct st_lsm6dsx_hw *hw)
  *
  * Read st_lsm6dsx i2c controller register
  */
-static int
-st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data,
-			    int len)
+int st_lsm6dsx_shub_read_output(struct st_lsm6dsx_hw *hw, u8 *data, int len)
 {
 	const struct st_lsm6dsx_shub_settings *hub_settings;
 	int err;
-- 
GitLab


From a1c6d631ff12cd41201c3ab824f3c4db66621c13 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 31 Oct 2022 12:41:25 +0100
Subject: [PATCH 2345/4122] iio: imu: st_lsm6dsx: add support to LSM6DSV

Add support to STM LSM6DSV (accelerometer and gyroscope)
Mems sensor.

Datasheet: https://www.st.com/resource/en/datasheet/lsm6dsv.pdf
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/aad879e7af0fe583bbf043a2b93e32fade79fa19.1667216004.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/Kconfig            |   2 +-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h       |   2 +
 .../iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c    |   2 +-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c  | 202 ++++++++++++++++++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c   |   5 +
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c   |   5 +
 6 files changed, 216 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig
index 2ed2b3f40c0b1..1c68bac94bcee 100644
--- a/drivers/iio/imu/st_lsm6dsx/Kconfig
+++ b/drivers/iio/imu/st_lsm6dsx/Kconfig
@@ -13,7 +13,7 @@ config IIO_ST_LSM6DSX
 	  sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm,
 	  ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr,
 	  lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx,
-	  the accelerometer/gyroscope of lsm9ds1 and lsm6dst.
+	  lsm6dsv, the accelerometer/gyroscope of lsm9ds1 and lsm6dst.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called st_lsm6dsx.
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index 6399b0bb6f674..ab61895cf0720 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -33,6 +33,7 @@
 #define ST_LSM6DSOP_DEV_NAME	"lsm6dsop"
 #define ST_ASM330LHHX_DEV_NAME	"asm330lhhx"
 #define ST_LSM6DSTX_DEV_NAME	"lsm6dstx"
+#define ST_LSM6DSV_DEV_NAME	"lsm6dsv"
 
 enum st_lsm6dsx_hw_id {
 	ST_LSM6DS3_ID,
@@ -53,6 +54,7 @@ enum st_lsm6dsx_hw_id {
 	ST_LSM6DSOP_ID,
 	ST_ASM330LHHX_ID,
 	ST_LSM6DSTX_ID,
+	ST_LSM6DSV_ID,
 	ST_LSM6DSX_MAX_ID,
 };
 
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
index 48fe6a45671bd..7dd5205aea5b4 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_buffer.c
@@ -15,7 +15,7 @@
  * value of the decimation factor and ODR set for each FIFO data set.
  *
  * LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/LSM6DSRX/ISM330DHCX/
- * LSM6DST/LSM6DSOP/LSM6DSTX:
+ * LSM6DST/LSM6DSOP/LSM6DSTX/LSM6DSV:
  * The FIFO buffer can be configured to store data from gyroscope and
  * accelerometer. Each sample is queued with a tag (1B) indicating data
  * source (gyroscope, accelerometer, hw timer).
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 73fd5f038375d..5e716a5071fd4 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -1162,6 +1162,208 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
 			.wakeup_src_x_mask = BIT(2),
 		},
 	},
+	{
+		.reset = {
+			.addr = 0x12,
+			.mask = BIT(0),
+		},
+		.boot = {
+			.addr = 0x12,
+			.mask = BIT(7),
+		},
+		.bdu = {
+			.addr = 0x12,
+			.mask = BIT(6),
+		},
+		.id = {
+			{
+				.hw_id = ST_LSM6DSV_ID,
+				.name = ST_LSM6DSV_DEV_NAME,
+				.wai = 0x70,
+			},
+		},
+		.channels = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.chan = st_lsm6dsx_acc_channels,
+				.len = ARRAY_SIZE(st_lsm6dsx_acc_channels),
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.chan = st_lsm6dsx_gyro_channels,
+				.len = ARRAY_SIZE(st_lsm6dsx_gyro_channels),
+			},
+		},
+		.drdy_mask = {
+			.addr = 0x13,
+			.mask = BIT(3),
+		},
+		.odr_table = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.reg = {
+					.addr = 0x10,
+					.mask = GENMASK(3, 0),
+				},
+				.odr_avl[0] = {   7500, 0x02 },
+				.odr_avl[1] = {  15000, 0x03 },
+				.odr_avl[2] = {  30000, 0x04 },
+				.odr_avl[3] = {  60000, 0x05 },
+				.odr_avl[4] = { 120000, 0x06 },
+				.odr_avl[5] = { 240000, 0x07 },
+				.odr_avl[6] = { 480000, 0x08 },
+				.odr_avl[7] = { 960000, 0x09 },
+				.odr_len = 8,
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.reg = {
+					.addr = 0x11,
+					.mask = GENMASK(3, 0),
+				},
+				.odr_avl[0] = {   7500, 0x02 },
+				.odr_avl[1] = {  15000, 0x03 },
+				.odr_avl[2] = {  30000, 0x04 },
+				.odr_avl[3] = {  60000, 0x05 },
+				.odr_avl[4] = { 120000, 0x06 },
+				.odr_avl[5] = { 240000, 0x07 },
+				.odr_avl[6] = { 480000, 0x08 },
+				.odr_avl[7] = { 960000, 0x09 },
+				.odr_len = 8,
+			},
+		},
+		.fs_table = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.reg = {
+					.addr = 0x17,
+					.mask = GENMASK(1, 0),
+				},
+				.fs_avl[0] = {  IIO_G_TO_M_S_2(61000), 0x0 },
+				.fs_avl[1] = { IIO_G_TO_M_S_2(122000), 0x1 },
+				.fs_avl[2] = { IIO_G_TO_M_S_2(244000), 0x2 },
+				.fs_avl[3] = { IIO_G_TO_M_S_2(488000), 0x3 },
+				.fs_len = 4,
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.reg = {
+					.addr = 0x15,
+					.mask = GENMASK(3, 0),
+				},
+				.fs_avl[0] = {  IIO_DEGREE_TO_RAD(8750000), 0x1 },
+				.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500000), 0x2 },
+				.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000000), 0x3 },
+				.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000000), 0x4 },
+				.fs_len = 4,
+			},
+		},
+		.irq_config = {
+			.irq1 = {
+				.addr = 0x0d,
+				.mask = BIT(3),
+			},
+			.irq2 = {
+				.addr = 0x0e,
+				.mask = BIT(3),
+			},
+			.lir = {
+				.addr = 0x56,
+				.mask = BIT(0),
+			},
+			.irq1_func = {
+				.addr = 0x5e,
+				.mask = BIT(5),
+			},
+			.irq2_func = {
+				.addr = 0x5f,
+				.mask = BIT(5),
+			},
+			.hla = {
+				.addr = 0x03,
+				.mask = BIT(4),
+			},
+			.od = {
+				.addr = 0x03,
+				.mask = BIT(3),
+			},
+		},
+		.batch = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.addr = 0x09,
+				.mask = GENMASK(3, 0),
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.addr = 0x09,
+				.mask = GENMASK(7, 4),
+			},
+		},
+		.fifo_ops = {
+			.update_fifo = st_lsm6dsx_update_fifo,
+			.read_fifo = st_lsm6dsx_read_tagged_fifo,
+			.fifo_th = {
+				.addr = 0x07,
+				.mask = GENMASK(7, 0),
+			},
+			.fifo_diff = {
+				.addr = 0x1b,
+				.mask = GENMASK(8, 0),
+			},
+			.max_size = 512,
+			.th_wl = 1,
+		},
+		.ts_settings = {
+			.timer_en = {
+				.addr = 0x50,
+				.mask = BIT(6),
+			},
+			.decimator = {
+				.addr = 0x0a,
+				.mask = GENMASK(7, 6),
+			},
+			.freq_fine = 0x4f,
+		},
+		.shub_settings = {
+			.page_mux = {
+				.addr = 0x01,
+				.mask = BIT(6),
+			},
+			.master_en = {
+				.sec_page = true,
+				.addr = 0x14,
+				.mask = BIT(2),
+			},
+			.pullup_en = {
+				.addr = 0x03,
+				.mask = BIT(6),
+			},
+			.aux_sens = {
+				.addr = 0x14,
+				.mask = GENMASK(1, 0),
+			},
+			.wr_once = {
+				.addr = 0x14,
+				.mask = BIT(6),
+			},
+			.num_ext_dev = 3,
+			.shub_out = {
+				.sec_page = true,
+				.addr = 0x02,
+			},
+			.slv0_addr = 0x15,
+			.dw_slv0_addr = 0x21,
+			.batch_en = BIT(3),
+		},
+		.event_settings = {
+			.enable_reg = {
+				.addr = 0x50,
+				.mask = BIT(7),
+			},
+			.wakeup_reg = {
+				.addr = 0x5b,
+				.mask = GENMASK(5, 0),
+			},
+			.wakeup_src_reg = 0x45,
+			.wakeup_src_status_mask = BIT(3),
+			.wakeup_src_z_mask = BIT(0),
+			.wakeup_src_y_mask = BIT(1),
+			.wakeup_src_x_mask = BIT(2),
+		},
+	},
 };
 
 int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable)
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
index 307c8c436862a..239c8920a31f8 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
@@ -109,6 +109,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = {
 		.compatible = "st,lsm6dstx",
 		.data = (void *)ST_LSM6DSTX_ID,
 	},
+	{
+		.compatible = "st,lsm6dsv",
+		.data = (void *)ST_LSM6DSV_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match);
@@ -132,6 +136,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = {
 	{ ST_LSM6DSOP_DEV_NAME, ST_LSM6DSOP_ID },
 	{ ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID },
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
+	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
index 6a4eecf4bb050..66705ef16ed04 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
@@ -109,6 +109,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = {
 		.compatible = "st,lsm6dstx",
 		.data = (void *)ST_LSM6DSTX_ID,
 	},
+	{
+		.compatible = "st,lsm6dsv",
+		.data = (void *)ST_LSM6DSV_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match);
@@ -132,6 +136,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = {
 	{ ST_LSM6DSOP_DEV_NAME, ST_LSM6DSOP_ID },
 	{ ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID },
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
+	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table);
-- 
GitLab


From 9b4901528f57fe88c7e0a0bfdf69d8edcaad46fb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 31 Oct 2022 12:41:26 +0100
Subject: [PATCH 2346/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dsv
 device bindings

Introduce device bindings for LSM6DSV IMU sensor.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/c34cea4468dc26050dff812f47e1a21f8e544758.1667216004.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
index fe1e02e5d7b39..e7349a3275dda 100644
--- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
@@ -32,6 +32,7 @@ properties:
           - st,lsm6dsrx
           - st,lsm6dst
           - st,lsm6dsop
+          - st,lsm6dsv
       - items:
           - const: st,asm330lhhx
           - const: st,lsm6dsr
-- 
GitLab


From 75347e30f142521c140ba1f5011d4fb175c1406b Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Mon, 31 Oct 2022 12:51:27 +0200
Subject: [PATCH 2347/4122] drivers: iio: accel: Use warning if invalid device
 id is detected

Use warning instead of failing driver probe if invalid device id is
detected for ADXL355 device.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Link: https://lore.kernel.org/r/20221031105129.47740-2-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl355_core.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c
index 4bc648eac8b29..dd08253d66d06 100644
--- a/drivers/iio/accel/adxl355_core.c
+++ b/drivers/iio/accel/adxl355_core.c
@@ -262,10 +262,8 @@ static int adxl355_setup(struct adxl355_data *data)
 	if (ret)
 		return ret;
 
-	if (regval != ADXL355_PARTID_VAL) {
-		dev_err(data->dev, "Invalid DEV ID 0x%02x\n", regval);
-		return -ENODEV;
-	}
+	if (regval != ADXL355_PARTID_VAL)
+		dev_warn(data->dev, "Invalid DEV ID 0x%02x\n", regval);
 
 	/*
 	 * Perform a software reset to make sure the device is in a consistent
-- 
GitLab


From b311d2e170761fbf73184d236f53187e3db8bd14 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Mon, 31 Oct 2022 12:51:28 +0200
Subject: [PATCH 2348/4122] dt-bindings: iio: accel: Add docs for ADXL359

Update ADXL355 existing documentation with documentation
for ADXL359 device.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221031105129.47740-3-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/accel/adi,adxl355.yaml        | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
index 14b487088ab49..6b03c4efbb083 100644
--- a/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
+++ b/Documentation/devicetree/bindings/iio/accel/adi,adxl355.yaml
@@ -4,20 +4,22 @@
 $id: http://devicetree.org/schemas/iio/accel/adi,adxl355.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer
+title: Analog Devices ADXL355 and ADXL359 3-Axis, Low noise MEMS Accelerometers
 
 maintainers:
   - Puranjay Mohan <puranjay12@gmail.com>
 
 description: |
-  Analog Devices ADXL355 3-Axis, Low noise MEMS Accelerometer that supports
-  both I2C & SPI interfaces
+  Analog Devices ADXL355 and ADXL359 3-Axis, Low noise MEMS Accelerometers that
+  support both I2C & SPI interfaces
     https://www.analog.com/en/products/adxl355.html
+    https://www.analog.com/en/products/adxl359.html
 
 properties:
   compatible:
     enum:
       - adi,adxl355
+      - adi,adxl359
 
   reg:
     maxItems: 1
-- 
GitLab


From d3532d69757f9c6a73155d73bd9b94b64b12ef72 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Mon, 31 Oct 2022 12:51:29 +0200
Subject: [PATCH 2349/4122] drivers: iio: accel: Add support for ADXL359 device

Add support for ADXL359 device in already existing ADXL355 driver.

Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/adxl359.pdf
Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Link: https://lore.kernel.org/r/20221031105129.47740-4-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl355.h      | 20 +++++++-
 drivers/iio/accel/adxl355_core.c | 87 +++++++++++++++++++++++++-------
 drivers/iio/accel/adxl355_i2c.c  | 22 ++++++--
 drivers/iio/accel/adxl355_spi.c  | 19 +++++--
 4 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/drivers/iio/accel/adxl355.h b/drivers/iio/accel/adxl355.h
index 6dd49b13e4fd0..061e66dc7057b 100644
--- a/drivers/iio/accel/adxl355.h
+++ b/drivers/iio/accel/adxl355.h
@@ -10,12 +10,30 @@
 
 #include <linux/regmap.h>
 
+enum adxl355_device_type {
+	ADXL355,
+	ADXL359,
+};
+
+struct adxl355_fractional_type {
+	int integer;
+	int decimal;
+};
+
 struct device;
 
+struct adxl355_chip_info {
+	const char			*name;
+	u8				part_id;
+	struct adxl355_fractional_type	accel_scale;
+	struct adxl355_fractional_type	temp_offset;
+};
+
 extern const struct regmap_access_table adxl355_readable_regs_tbl;
 extern const struct regmap_access_table adxl355_writeable_regs_tbl;
+extern const struct adxl355_chip_info adxl35x_chip_info[];
 
 int adxl355_core_probe(struct device *dev, struct regmap *regmap,
-		       const char *name);
+		       const struct adxl355_chip_info *chip_info);
 
 #endif /* _ADXL355_H_ */
diff --git a/drivers/iio/accel/adxl355_core.c b/drivers/iio/accel/adxl355_core.c
index dd08253d66d06..0c9225d18fb29 100644
--- a/drivers/iio/accel/adxl355_core.c
+++ b/drivers/iio/accel/adxl355_core.c
@@ -60,6 +60,7 @@
 #define ADXL355_DEVID_AD_VAL		0xAD
 #define ADXL355_DEVID_MST_VAL		0x1D
 #define ADXL355_PARTID_VAL		0xED
+#define ADXL359_PARTID_VAL		0xE9
 #define ADXL355_RESET_CODE		0x52
 
 static const struct regmap_range adxl355_read_reg_range[] = {
@@ -83,6 +84,60 @@ const struct regmap_access_table adxl355_writeable_regs_tbl = {
 };
 EXPORT_SYMBOL_NS_GPL(adxl355_writeable_regs_tbl, IIO_ADXL355);
 
+const struct adxl355_chip_info adxl35x_chip_info[] = {
+	[ADXL355] = {
+		.name = "adxl355",
+		.part_id = ADXL355_PARTID_VAL,
+		/*
+		 * At +/- 2g with 20-bit resolution, scale is given in datasheet
+		 * as 3.9ug/LSB = 0.0000039 * 9.80665 = 0.00003824593 m/s^2.
+		 */
+		.accel_scale = {
+			.integer = 0,
+			.decimal = 38245,
+		},
+		/*
+		 * The datasheet defines an intercept of 1885 LSB at 25 degC
+		 * and a slope of -9.05 LSB/C. The following formula can be used
+		 * to find the temperature:
+		 * Temp = ((RAW - 1885)/(-9.05)) + 25 but this doesn't follow
+		 * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE.
+		 * Hence using some rearranging we get the scale as -110.497238
+		 * and offset as -2111.25.
+		 */
+		.temp_offset = {
+			.integer =  -2111,
+			.decimal = 250000,
+		},
+	},
+	[ADXL359] = {
+		.name = "adxl359",
+		.part_id = ADXL359_PARTID_VAL,
+		/*
+		 * At +/- 10g with 20-bit resolution, scale is given in datasheet
+		 * as 19.5ug/LSB = 0.0000195 * 9.80665 = 0.0.00019122967 m/s^2.
+		 */
+		.accel_scale = {
+			.integer = 0,
+			.decimal = 191229,
+		},
+		/*
+		 * The datasheet defines an intercept of 1852 LSB at 25 degC
+		 * and a slope of -9.05 LSB/C. The following formula can be used
+		 * to find the temperature:
+		 * Temp = ((RAW - 1852)/(-9.05)) + 25 but this doesn't follow
+		 * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE.
+		 * Hence using some rearranging we get the scale as -110.497238
+		 * and offset as -2079.25.
+		 */
+		.temp_offset = {
+			.integer = -2079,
+			.decimal = 250000,
+		},
+	},
+};
+EXPORT_SYMBOL_NS_GPL(adxl35x_chip_info, IIO_ADXL355);
+
 enum adxl355_op_mode {
 	ADXL355_MEASUREMENT,
 	ADXL355_STANDBY,
@@ -162,6 +217,7 @@ static const struct adxl355_chan_info adxl355_chans[] = {
 };
 
 struct adxl355_data {
+	const struct adxl355_chip_info *chip_info;
 	struct regmap *regmap;
 	struct device *dev;
 	struct mutex lock; /* lock to protect op_mode */
@@ -456,33 +512,25 @@ static int adxl355_read_raw(struct iio_dev *indio_dev,
 
 	case IIO_CHAN_INFO_SCALE:
 		switch (chan->type) {
-		/*
-		 * The datasheet defines an intercept of 1885 LSB at 25 degC
-		 * and a slope of -9.05 LSB/C. The following formula can be used
-		 * to find the temperature:
-		 * Temp = ((RAW - 1885)/(-9.05)) + 25 but this doesn't follow
-		 * the format of the IIO which is Temp = (RAW + OFFSET) * SCALE.
-		 * Hence using some rearranging we get the scale as -110.497238
-		 * and offset as -2111.25.
-		 */
 		case IIO_TEMP:
+			/*
+			 * Temperature scale is -110.497238.
+			 * See the detailed explanation in adxl35x_chip_info
+			 * definition above.
+			 */
 			*val = -110;
 			*val2 = 497238;
 			return IIO_VAL_INT_PLUS_MICRO;
-		/*
-		 * At +/- 2g with 20-bit resolution, scale is given in datasheet
-		 * as 3.9ug/LSB = 0.0000039 * 9.80665 = 0.00003824593 m/s^2.
-		 */
 		case IIO_ACCEL:
-			*val = 0;
-			*val2 = 38245;
+			*val = data->chip_info->accel_scale.integer;
+			*val2 = data->chip_info->accel_scale.decimal;
 			return IIO_VAL_INT_PLUS_NANO;
 		default:
 			return -EINVAL;
 		}
 	case IIO_CHAN_INFO_OFFSET:
-		*val = -2111;
-		*val2 = 250000;
+		*val = data->chip_info->temp_offset.integer;
+		*val2 = data->chip_info->temp_offset.decimal;
 		return IIO_VAL_INT_PLUS_MICRO;
 	case IIO_CHAN_INFO_CALIBBIAS:
 		*val = sign_extend32(data->calibbias[chan->address], 15);
@@ -705,7 +753,7 @@ static int adxl355_probe_trigger(struct iio_dev *indio_dev, int irq)
 }
 
 int adxl355_core_probe(struct device *dev, struct regmap *regmap,
-		       const char *name)
+		       const struct adxl355_chip_info *chip_info)
 {
 	struct adxl355_data *data;
 	struct iio_dev *indio_dev;
@@ -720,9 +768,10 @@ int adxl355_core_probe(struct device *dev, struct regmap *regmap,
 	data->regmap = regmap;
 	data->dev = dev;
 	data->op_mode = ADXL355_STANDBY;
+	data->chip_info = chip_info;
 	mutex_init(&data->lock);
 
-	indio_dev->name = name;
+	indio_dev->name = chip_info->name;
 	indio_dev->info = &adxl355_info;
 	indio_dev->modes = INDIO_DIRECT_MODE;
 	indio_dev->channels = adxl355_channels;
diff --git a/drivers/iio/accel/adxl355_i2c.c b/drivers/iio/accel/adxl355_i2c.c
index f67d57921c81b..6cde5ccac06b8 100644
--- a/drivers/iio/accel/adxl355_i2c.c
+++ b/drivers/iio/accel/adxl355_i2c.c
@@ -23,6 +23,20 @@ static const struct regmap_config adxl355_i2c_regmap_config = {
 static int adxl355_i2c_probe(struct i2c_client *client)
 {
 	struct regmap *regmap;
+	const struct adxl355_chip_info *chip_data;
+	const struct i2c_device_id *adxl355;
+
+	chip_data = device_get_match_data(&client->dev);
+	if (!chip_data) {
+		adxl355 = to_i2c_driver(client->dev.driver)->id_table;
+		if (!adxl355)
+			return -EINVAL;
+
+		chip_data = (void *)i2c_match_id(adxl355, client)->driver_data;
+
+		if (!chip_data)
+			return -EINVAL;
+	}
 
 	regmap = devm_regmap_init_i2c(client, &adxl355_i2c_regmap_config);
 	if (IS_ERR(regmap)) {
@@ -32,17 +46,19 @@ static int adxl355_i2c_probe(struct i2c_client *client)
 		return PTR_ERR(regmap);
 	}
 
-	return adxl355_core_probe(&client->dev, regmap, client->name);
+	return adxl355_core_probe(&client->dev, regmap, chip_data);
 }
 
 static const struct i2c_device_id adxl355_i2c_id[] = {
-	{ "adxl355", 0 },
+	{ "adxl355", (kernel_ulong_t)&adxl35x_chip_info[ADXL355] },
+	{ "adxl359", (kernel_ulong_t)&adxl35x_chip_info[ADXL359] },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, adxl355_i2c_id);
 
 static const struct of_device_id adxl355_of_match[] = {
-	{ .compatible = "adi,adxl355" },
+	{ .compatible = "adi,adxl355", .data = &adxl35x_chip_info[ADXL355] },
+	{ .compatible = "adi,adxl359", .data = &adxl35x_chip_info[ADXL359] },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, adxl355_of_match);
diff --git a/drivers/iio/accel/adxl355_spi.c b/drivers/iio/accel/adxl355_spi.c
index 5fe986ae03f63..fc99534d91ffc 100644
--- a/drivers/iio/accel/adxl355_spi.c
+++ b/drivers/iio/accel/adxl355_spi.c
@@ -9,6 +9,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/regmap.h>
 #include <linux/spi/spi.h>
+#include <linux/property.h>
 
 #include "adxl355.h"
 
@@ -24,9 +25,17 @@ static const struct regmap_config adxl355_spi_regmap_config = {
 
 static int adxl355_spi_probe(struct spi_device *spi)
 {
-	const struct spi_device_id *id = spi_get_device_id(spi);
+	const struct adxl355_chip_info *chip_data;
 	struct regmap *regmap;
 
+	chip_data = device_get_match_data(&spi->dev);
+	if (!chip_data) {
+		chip_data = (void *)spi_get_device_id(spi)->driver_data;
+
+		if (!chip_data)
+			return -EINVAL;
+	}
+
 	regmap = devm_regmap_init_spi(spi, &adxl355_spi_regmap_config);
 	if (IS_ERR(regmap)) {
 		dev_err(&spi->dev, "Error initializing spi regmap: %ld\n",
@@ -35,17 +44,19 @@ static int adxl355_spi_probe(struct spi_device *spi)
 		return PTR_ERR(regmap);
 	}
 
-	return adxl355_core_probe(&spi->dev, regmap, id->name);
+	return adxl355_core_probe(&spi->dev, regmap, chip_data);
 }
 
 static const struct spi_device_id adxl355_spi_id[] = {
-	{ "adxl355", 0 },
+	{ "adxl355", (kernel_ulong_t)&adxl35x_chip_info[ADXL355] },
+	{ "adxl359", (kernel_ulong_t)&adxl35x_chip_info[ADXL359] },
 	{ }
 };
 MODULE_DEVICE_TABLE(spi, adxl355_spi_id);
 
 static const struct of_device_id adxl355_of_match[] = {
-	{ .compatible = "adi,adxl355" },
+	{ .compatible = "adi,adxl355", .data = &adxl35x_chip_info[ADXL355] },
+	{ .compatible = "adi,adxl359", .data = &adxl35x_chip_info[ADXL359] },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, adxl355_of_match);
-- 
GitLab


From ed81d3de8eacf70c96f7012e4e16257efc2e12a9 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Sun, 6 Nov 2022 21:02:33 +0700
Subject: [PATCH 2350/4122] Documentation: ad4130: format list of
 in_voltage-voltage_filter_mode_available modes

Sphinx reports two warnings on sysfs documentation for AD4130 driver:

Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130:2: WARNING: Unexpected indentation.
Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130:2: WARNING: Block quote ends without a blank line; unexpected unindent.

These are due to misformatting of sinc* modes list. Format it with bullet
list. Since each entry spans multiple lines, separate each with a blank
line.

Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver")
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Link: https://lore.kernel.org/r/20221106140233.74112-1-bagasdotme@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../ABI/testing/sysfs-bus-iio-adc-ad4130      | 48 +++++++++++--------
 1 file changed, 29 insertions(+), 19 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130 b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
index d9555751d21ce..f24ed6687e900 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
+++ b/Documentation/ABI/testing/sysfs-bus-iio-adc-ad4130
@@ -3,25 +3,35 @@ KernelVersion:  6.2
 Contact:	linux-iio@vger.kernel.org
 Description:
 		Reading returns a list with the possible filter modes.
-		"sinc4"       - Sinc 4. Excellent noise performance. Long 1st
-				conversion time. No natural 50/60Hz rejection.
-		"sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion time.
-		"sinc3"	      - Sinc3. Moderate 1st conversion time. Good noise
-				performance.
-		"sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling frequency
-				of 50Hz, achieves simultaneous 50Hz and 60Hz
-				rejection.
-		"sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion time.
-				Best used with a sampling frequency of at least
-				216.19Hz.
-		"sinc3+pf1"   - Sinc3 + Post Filter 1.
-				53dB rejection @ 50Hz, 58dB rejection @ 60Hz.
-		"sinc3+pf2"   - Sinc3 + Post Filter 2.
-				70dB rejection @ 50Hz, 70dB rejection @ 60Hz.
-		"sinc3+pf3"   - Sinc3 + Post Filter 3.
-				99dB rejection @ 50Hz, 103dB rejection @ 60Hz.
-		"sinc3+pf4"   - Sinc3 + Post Filter 4.
-				103dB rejection @ 50Hz, 109dB rejection @ 60Hz.
+
+		  * "sinc4"       - Sinc 4. Excellent noise performance. Long
+                    1st conversion time. No natural 50/60Hz rejection.
+
+		  * "sinc4+sinc1" - Sinc4 + averaging by 8. Low 1st conversion
+		    time.
+
+		  * "sinc3"	      - Sinc3. Moderate 1st conversion time.
+		    Good noise performance.
+
+		  * "sinc3+rej60" - Sinc3 + 60Hz rejection. At a sampling
+		    frequency of 50Hz, achieves simultaneous 50Hz and 60Hz
+		    rejection.
+
+		  * "sinc3+sinc1" - Sinc3 + averaging by 8. Low 1st conversion
+		    time. Best used with a sampling frequency of at least
+		    216.19Hz.
+
+		  * "sinc3+pf1"   - Sinc3 + Post Filter 1. 53dB rejection @
+		    50Hz, 58dB rejection @ 60Hz.
+
+		  * "sinc3+pf2"   - Sinc3 + Post Filter 2. 70dB rejection @
+		    50Hz, 70dB rejection @ 60Hz.
+
+		  * "sinc3+pf3"   - Sinc3 + Post Filter 3. 99dB rejection @
+		    50Hz, 103dB rejection @ 60Hz.
+
+		  * "sinc3+pf4"   - Sinc3 + Post Filter 4. 103dB rejection @
+		    50Hz, 109dB rejection @ 60Hz.
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_filter_mode
 KernelVersion:  6.2
-- 
GitLab


From 5e0176213949724fbe9a8e4a39817edce337b8a0 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:29 +0200
Subject: [PATCH 2351/4122] iio: temperature: ltc2983: make bulk write buffer
 DMA-safe

regmap_bulk_write() does not guarantee implicit DMA-safety,
even though the current implementation duplicates the given
buffer. Do not rely on it.

Fixes: f110f3188e56 ("iio: temperature: Add support for LTC2983")
Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20221103130041.2153295-2-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/ltc2983.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c
index a60ccf1836872..1117991ca2ab6 100644
--- a/drivers/iio/temperature/ltc2983.c
+++ b/drivers/iio/temperature/ltc2983.c
@@ -209,6 +209,7 @@ struct ltc2983_data {
 	 * Holds the converted temperature
 	 */
 	__be32 temp __aligned(IIO_DMA_MINALIGN);
+	__be32 chan_val;
 };
 
 struct ltc2983_sensor {
@@ -313,19 +314,18 @@ static int __ltc2983_fault_handler(const struct ltc2983_data *st,
 	return 0;
 }
 
-static int __ltc2983_chan_assign_common(const struct ltc2983_data *st,
+static int __ltc2983_chan_assign_common(struct ltc2983_data *st,
 					const struct ltc2983_sensor *sensor,
 					u32 chan_val)
 {
 	u32 reg = LTC2983_CHAN_START_ADDR(sensor->chan);
-	__be32 __chan_val;
 
 	chan_val |= LTC2983_CHAN_TYPE(sensor->type);
 	dev_dbg(&st->spi->dev, "Assign reg:0x%04X, val:0x%08X\n", reg,
 		chan_val);
-	__chan_val = cpu_to_be32(chan_val);
-	return regmap_bulk_write(st->regmap, reg, &__chan_val,
-				 sizeof(__chan_val));
+	st->chan_val = cpu_to_be32(chan_val);
+	return regmap_bulk_write(st->regmap, reg, &st->chan_val,
+				 sizeof(st->chan_val));
 }
 
 static int __ltc2983_chan_custom_sensor_assign(struct ltc2983_data *st,
-- 
GitLab


From 4ecee36077698bc5c759571106cd29321a886735 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:30 +0200
Subject: [PATCH 2352/4122] dt-bindings: iio: temperature: ltc2983: add default
 values

Binding properties should have default values to let the reader
know if they should change it. Add them based on driver logic.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-3-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 722781aa4697e..82667adc85b10 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -34,6 +34,7 @@ properties:
       cases. An extra delay can be configured using this property. The value is
       rounded to nearest 100us.
     maximum: 255
+    default: 0
 
   adi,filter-notch-freq:
     description:
@@ -45,6 +46,7 @@ properties:
     $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 0
     maximum: 2
+    default: 0
 
   '#address-cells':
     const: 1
@@ -104,6 +106,7 @@ patternProperties:
           This property set's the pulsed current value applied during
           open-circuit detect.
         enum: [10, 100, 500, 1000]
+        default: 10
 
       adi,cold-junction-handle:
         description:
@@ -163,6 +166,7 @@ patternProperties:
           cycles, this property will assume different predefined values on
           each cycle. Just set the value of the first cycle (1l).
         enum: [10, 20, 40, 80]
+        default: 10
 
       adi,ideal-factor-value:
         description:
@@ -170,6 +174,7 @@ patternProperties:
           be multiplied by 1000000 to remove the fractional part. For more
           information look at table 20 of the datasheet.
         $ref: /schemas/types.yaml#/definitions/uint32
+        default: 0
 
   "^rtd@":
     type: object
@@ -207,6 +212,7 @@ patternProperties:
           property to 5 means 4 wires with Kelvin Rsense.
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [2, 3, 4, 5]
+        default: 2
 
       adi,rsense-share:
         description:
@@ -226,6 +232,7 @@ patternProperties:
           This property controls the magnitude of the excitation current
           applied to the RTD.
         enum: [5, 10, 25, 50, 100, 250, 500, 1000]
+        default: 5
 
       adi,rtd-curve:
         description:
@@ -234,6 +241,7 @@ patternProperties:
         $ref: /schemas/types.yaml#/definitions/uint32
         minimum: 0
         maximum: 3
+        default: 0
 
       adi,custom-rtd:
         description:
@@ -308,6 +316,7 @@ patternProperties:
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000,
                500000, 1000000]
+        default: 0
 
       adi,custom-thermistor:
         description:
-- 
GitLab


From d449fb5146b9ba93cb72a0676c3da3dd7a1743a9 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:31 +0200
Subject: [PATCH 2353/4122] dt-bindings: iio: temperature: ltc2983: use hex for
 sensor address

Addresses should be in hex, fix it.

Although the driver initially specified 1-20, it can be made free-range
since the address is supposed to match reg, onto which we can impose
restrictions based on the compatible property value.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-4-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 82667adc85b10..29f6fa5e25291 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -55,7 +55,7 @@ properties:
     const: 0
 
 patternProperties:
-  "@([1-9]|1[0-9]|20)$":
+  "@([0-9a-f]+)$":
     type: object
 
     properties:
-- 
GitLab


From 828a6c2252babdb79c7870080e872729ea90df17 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:32 +0200
Subject: [PATCH 2354/4122] dt-bindings: iio: temperature: ltc2983: remove
 qutations from phandle ref

Not needed, so why have them?

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-5-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml    | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 29f6fa5e25291..6b3a20448f78f 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -112,7 +112,7 @@ patternProperties:
         description:
           Phandle which points to a sensor object responsible for measuring
           the thermocouple cold junction temperature.
-        $ref: "/schemas/types.yaml#/definitions/phandle"
+        $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,custom-thermocouple:
         description:
@@ -204,7 +204,7 @@ patternProperties:
       adi,rsense-handle:
         description:
           Phandle pointing to a rsense object associated with this RTD.
-        $ref: "/schemas/types.yaml#/definitions/phandle"
+        $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,number-of-wires:
         description:
@@ -288,7 +288,7 @@ patternProperties:
         description:
           Phandle pointing to a rsense object associated with this
           thermistor.
-        $ref: "/schemas/types.yaml#/definitions/phandle"
+        $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,single-ended:
         description:
-- 
GitLab


From 6cf75e90c5cc268d47ea67bfdf05961c1439dee4 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:33 +0200
Subject: [PATCH 2355/4122] dt-bindings: iio: temperature: ltc2983: describe
 matrix items

Give a little bit of information on what each item in the matrix is
supposed to be.

Also, some matrices put the 'minItems' and 'maxItems' keywords in the
wrong level. They should be on the same level as the 'items' keyword.
Fix it.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-6-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml | 23 ++++++++++---------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 6b3a20448f78f..4f26b337c9574 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -126,8 +126,9 @@ patternProperties:
         minItems: 3
         maxItems: 64
         items:
-          minItems: 2
-          maxItems: 2
+          items:
+            - description: Voltage point in nV, signed.
+            - description: Temperature point in uK.
 
   "^diode@":
     type: object
@@ -249,12 +250,12 @@ patternProperties:
           resistance(ohm)-temperature(K). The entries added here are in uohm
           and uK. For more details values look at table 74 and 75.
         $ref: /schemas/types.yaml#/definitions/uint64-matrix
+        minItems: 3
+        maxItems: 64
         items:
-          minItems: 3
-          maxItems: 64
           items:
-            minItems: 2
-            maxItems: 2
+            - description: Resistance point in uOhms.
+            - description: Temperature point in uK.
 
     required:
       - adi,rsense-handle
@@ -328,8 +329,9 @@ patternProperties:
         minItems: 3
         maxItems: 64
         items:
-          minItems: 2
-          maxItems: 2
+          items:
+            - description: Resistance point in uOhms.
+            - description: Temperature point in uK.
 
       adi,custom-steinhart:
         description:
@@ -338,9 +340,8 @@ patternProperties:
           Steinhart sensors the coefficients are given in the raw
           format. Look at table 82 for more information.
         $ref: /schemas/types.yaml#/definitions/uint32-array
-        items:
-          minItems: 6
-          maxItems: 6
+        minItems: 6
+        maxItems: 6
 
     required:
       - adi,rsense-handle
-- 
GitLab


From a2ebc8d281de63797fea5188f2aaa402cdb63cf1 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:34 +0200
Subject: [PATCH 2356/4122] dt-bindings: iio: temperature: ltc2983: require
 custom sensor tables

The driver will error out when a custom sensor type is used but a
custom sensor table is not provided. Require it in the binding too.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-7-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml | 34 +++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 4f26b337c9574..bbac5f5cfbb3a 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -130,6 +130,15 @@ patternProperties:
             - description: Voltage point in nV, signed.
             - description: Temperature point in uK.
 
+    allOf:
+      - if:
+          properties:
+            adi,sensor-type:
+              const: 9
+        then:
+          required:
+            - adi,custom-thermocouple
+
   "^diode@":
     type: object
     description:
@@ -263,6 +272,15 @@ patternProperties:
     dependencies:
       adi,current-rotate: [ "adi,rsense-share" ]
 
+    allOf:
+      - if:
+          properties:
+            adi,sensor-type:
+              const: 18
+        then:
+          required:
+            - adi,custom-rtd
+
   "^thermistor@":
     type: object
     description:
@@ -349,6 +367,22 @@ patternProperties:
     dependencies:
       adi,current-rotate: [ "adi,rsense-share" ]
 
+    allOf:
+      - if:
+          properties:
+            adi,sensor-type:
+              const: 26
+        then:
+          required:
+            - adi,custom-steinhart
+      - if:
+          properties:
+            adi,sensor-type:
+              const: 27
+        then:
+          required:
+            - adi,custom-thermistor
+
   "^adc@":
     type: object
     description: Represents a channel which is being used as a direct adc.
-- 
GitLab


From b3805fc3dcf35512266bddf8c785eb82a24325bc Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:35 +0200
Subject: [PATCH 2357/4122] dt-bindings: iio: temperature: ltc2983: require 4
 wire rtd for current rotate

The driver will error out when current rotation is enabled but the RTD
is not 4-wire. Require it in the binding too.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-8-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index bbac5f5cfbb3a..b603219fb0c98 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -230,13 +230,6 @@ patternProperties:
           resistor is used for multiple 2-, 3-, and/or 4-wire RTDs.
         type: boolean
 
-      adi,current-rotate:
-        description:
-          Boolean property which enables excitation current rotation to
-          automatically remove parasitic thermocouple effects. Note that
-          this property is not allowed for 2- and 3-wire RTDs.
-        type: boolean
-
       adi,excitation-current-microamp:
         description:
           This property controls the magnitude of the excitation current
@@ -269,10 +262,22 @@ patternProperties:
     required:
       - adi,rsense-handle
 
-    dependencies:
-      adi,current-rotate: [ "adi,rsense-share" ]
-
     allOf:
+      - if:
+          properties:
+            adi,number-of-wires:
+              const: 4
+        then:
+          properties:
+            adi,current-rotate:
+              description:
+                Whether to enable excitation current rotation to automatically
+                remove parasitic thermocouple effects.
+              type: boolean
+
+          dependencies:
+            adi,current-rotate: [ "adi,rsense-share" ]
+
       - if:
           properties:
             adi,sensor-type:
-- 
GitLab


From ee7e336c48d3f8f5bef748b202792290d0fa0c1e Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:36 +0200
Subject: [PATCH 2358/4122] dt-bindings: iio: temperature: ltc2983: change
 default excitation for custom thermistors

Excitation cannot be set to auto-range when using a custom thermistor
or Steinhart sensor type. Default it to 1000nA to match the driver
and remove the auto-range value from the enum.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-9-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml          | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index b603219fb0c98..1b6a11f2aa9da 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -378,6 +378,11 @@ patternProperties:
             adi,sensor-type:
               const: 26
         then:
+          properties:
+            adi,excitation-current-nanoamp:
+              enum: [250, 500, 1000, 5000, 10000, 25000, 50000, 100000,
+                     250000, 500000, 1000000]
+              default: 1000
           required:
             - adi,custom-steinhart
       - if:
@@ -385,6 +390,11 @@ patternProperties:
             adi,sensor-type:
               const: 27
         then:
+          properties:
+            adi,excitation-current-nanoamp:
+              enum: [250, 500, 1000, 5000, 10000, 25000, 50000, 100000,
+                     250000, 500000, 1000000]
+              default: 1000
           required:
             - adi,custom-thermistor
 
-- 
GitLab


From 95027f5b3999b1b47a407ae4aa7bee18591164d1 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:37 +0200
Subject: [PATCH 2359/4122] dt-bindings: iio: temperature: ltc2983: refine
 descriptions

Some descriptions are too verbose, while others are too succint.
Rewrite them all.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-10-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml | 163 +++++++-----------
 1 file changed, 65 insertions(+), 98 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 1b6a11f2aa9da..676801b036cf1 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -25,21 +25,16 @@ properties:
     maxItems: 1
 
   adi,mux-delay-config-us:
-    description:
-      The LTC2983 performs 2 or 3 internal conversion cycles per temperature
-      result. Each conversion cycle is performed with different excitation and
-      input multiplexer configurations. Prior to each conversion, these
-      excitation circuits and input switch configurations are changed and an
-      internal 1ms delay ensures settling prior to the conversion cycle in most
-      cases. An extra delay can be configured using this property. The value is
-      rounded to nearest 100us.
+    description: |
+      Extra delay prior to each conversion, in addition to the internal 1ms
+      delay, for the multiplexer to switch input configurations and
+      excitation values.
     maximum: 255
     default: 0
 
   adi,filter-notch-freq:
     description:
-      Set's the default setting of the digital filter. The default is
-      simultaneous 50/60Hz rejection.
+      Notch frequency of the digital filter.
       0 - 50/60Hz rejection
       1 - 60Hz rejection
       2 - 50Hz rejection
@@ -57,17 +52,18 @@ properties:
 patternProperties:
   "@([0-9a-f]+)$":
     type: object
+    description: Sensor.
 
     properties:
       reg:
         description:
-          The channel number. It can be connected to one of the 20 channels of
-          the device.
+          Channel number. Connects the sensor to the channel with this number
+          of the device.
         minimum: 1
         maximum: 20
 
       adi,sensor-type:
-        description: Identifies the type of sensor connected to the device.
+        description: Type of sensor connected to the device.
         $ref: /schemas/types.yaml#/definitions/uint32
 
     required:
@@ -76,9 +72,7 @@ patternProperties:
 
   "^thermocouple@":
     type: object
-    description:
-      Represents a thermocouple sensor which is connected to one of the device
-      channels.
+    description: Thermocouple sensor.
 
     properties:
       adi,sensor-type:
@@ -97,31 +91,24 @@ patternProperties:
         maximum: 9
 
       adi,single-ended:
-        description:
-          Boolean property which set's the thermocouple as single-ended.
+        description: Whether the sensor is single-ended.
         type: boolean
 
       adi,sensor-oc-current-microamp:
-        description:
-          This property set's the pulsed current value applied during
-          open-circuit detect.
+        description: Pulsed current value applied during open-circuit detect.
         enum: [10, 100, 500, 1000]
         default: 10
 
       adi,cold-junction-handle:
         description:
-          Phandle which points to a sensor object responsible for measuring
-          the thermocouple cold junction temperature.
+          Sensor responsible for measuring the thermocouple cold junction
+          temperature.
         $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,custom-thermocouple:
         description:
-          This is a table, where each entry should be a pair of
-          voltage(mv)-temperature(K). The entries must be given in nv and uK
-          so that, the original values must be multiplied by 1000000. For
-          more details look at table 69 and 70.
-          Note should be signed, but dtc doesn't currently maintain the
-          sign.
+          Used for digitizing custom thermocouples.
+          See Page 59 of the datasheet.
         $ref: /schemas/types.yaml#/definitions/uint64-matrix
         minItems: 3
         maxItems: 64
@@ -141,55 +128,50 @@ patternProperties:
 
   "^diode@":
     type: object
-    description:
-      Represents a diode sensor which is connected to one of the device
-      channels.
+    description: Diode sensor.
 
     properties:
       adi,sensor-type:
-        description: Identifies the sensor as a diode.
+        description: Sensor type for diodes.
         $ref: /schemas/types.yaml#/definitions/uint32
         const: 28
 
       adi,single-ended:
-        description: Boolean property which set's the diode as single-ended.
+        description: Whether the sensor is single-ended.
         type: boolean
 
       adi,three-conversion-cycles:
         description:
-          Boolean property which set's three conversion cycles removing
-          parasitic resistance effects between the LTC2983 and the diode.
+          Whether to use three conversion cycles to remove parasitic
+          resistance between the device and the diode.
         type: boolean
 
       adi,average-on:
         description:
-          Boolean property which enables a running average of the diode
-          temperature reading. This reduces the noise when the diode is used
-          as a cold junction temperature element on an isothermal block
-          where temperatures change slowly.
+          Whether to use a running average of the diode temperature
+          reading to reduce the noise when the diode is used as a cold
+          junction temperature element on an isothermal block where
+          temperatures change slowly.
         type: boolean
 
       adi,excitation-current-microamp:
         description:
-          This property controls the magnitude of the excitation current
-          applied to the diode. Depending on the number of conversions
-          cycles, this property will assume different predefined values on
-          each cycle. Just set the value of the first cycle (1l).
+          Magnitude of the 1l excitation current applied to the diode.
+          4l excitation current will be 4 times this value, and 8l
+          excitation current will be 8 times value.
         enum: [10, 20, 40, 80]
         default: 10
 
       adi,ideal-factor-value:
         description:
-          This property sets the diode ideality factor. The real value must
-          be multiplied by 1000000 to remove the fractional part. For more
-          information look at table 20 of the datasheet.
+          Diode ideality factor.
+          Set this property to 1000000 times the real value.
         $ref: /schemas/types.yaml#/definitions/uint32
         default: 0
 
   "^rtd@":
     type: object
-    description:
-      Represents a rtd sensor which is connected to one of the device channels.
+    description: RTD sensor.
 
     properties:
       reg:
@@ -212,35 +194,35 @@ patternProperties:
         maximum: 18
 
       adi,rsense-handle:
-        description:
-          Phandle pointing to a rsense object associated with this RTD.
+        description: Associated sense resistor sensor.
         $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,number-of-wires:
         description:
-          Identifies the number of wires used by the RTD. Setting this
-          property to 5 means 4 wires with Kelvin Rsense.
+          Number of wires used by the RTD.
+          5 means 4 wires with Kelvin sense resistor.
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [2, 3, 4, 5]
         default: 2
 
       adi,rsense-share:
         description:
-          Boolean property which enables Rsense sharing, where one sense
-          resistor is used for multiple 2-, 3-, and/or 4-wire RTDs.
+          Whether to enable sense resistor sharing, where one sense
+          resistor is used by multiple sensors.
         type: boolean
 
       adi,excitation-current-microamp:
-        description:
-          This property controls the magnitude of the excitation current
-          applied to the RTD.
+        description: Excitation current applied to the RTD.
         enum: [5, 10, 25, 50, 100, 250, 500, 1000]
         default: 5
 
       adi,rtd-curve:
-        description:
-          This property set the RTD curve used and the corresponding
-          Callendar-VanDusen constants. Look at table 30 of the datasheet.
+        description: |
+          RTD curve and the corresponding Callendar-VanDusen constants.
+          0 - European
+          1 - American
+          2 - Japanese
+          3 - ITS-90
         $ref: /schemas/types.yaml#/definitions/uint32
         minimum: 0
         maximum: 3
@@ -248,9 +230,8 @@ patternProperties:
 
       adi,custom-rtd:
         description:
-          This is a table, where each entry should be a pair of
-          resistance(ohm)-temperature(K). The entries added here are in uohm
-          and uK. For more details values look at table 74 and 75.
+          Used for digitizing custom RTDs.
+          See Page 62 of the datasheet.
         $ref: /schemas/types.yaml#/definitions/uint64-matrix
         minItems: 3
         maxItems: 64
@@ -288,9 +269,7 @@ patternProperties:
 
   "^thermistor@":
     type: object
-    description:
-      Represents a thermistor sensor which is connected to one of the device
-      channels.
+    description: Thermistor sensor.
 
     properties:
       adi,sensor-type:
@@ -309,34 +288,29 @@ patternProperties:
         maximum: 27
 
       adi,rsense-handle:
-        description:
-          Phandle pointing to a rsense object associated with this
-          thermistor.
+        description: Associated sense resistor sensor.
         $ref: /schemas/types.yaml#/definitions/phandle
 
       adi,single-ended:
-        description:
-          Boolean property which set's the thermistor as single-ended.
+        description: Whether the sensor is single-ended.
         type: boolean
 
       adi,rsense-share:
         description:
-          Boolean property which enables Rsense sharing, where one sense
-          resistor is used for multiple thermistors. Note that this property
-          is ignored if adi,single-ended is set.
+          Whether to enable sense resistor sharing, where one sense
+          resistor is used by multiple sensors.
         type: boolean
 
       adi,current-rotate:
         description:
-          Boolean property which enables excitation current rotation to
-          automatically remove parasitic thermocouple effects.
+          Whether to enable excitation current rotation to automatically
+          remove parasitic thermocouple effects.
         type: boolean
 
       adi,excitation-current-nanoamp:
         description:
-          This property controls the magnitude of the excitation current
-          applied to the thermistor. Value 0 set's the sensor in auto-range
-          mode.
+          Excitation current applied to the thermistor.
+          0 sets the sensor in auto-range mode.
         $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000,
                500000, 1000000]
@@ -344,10 +318,8 @@ patternProperties:
 
       adi,custom-thermistor:
         description:
-          This is a table, where each entry should be a pair of
-          resistance(ohm)-temperature(K). The entries added here are in uohm
-          and uK only for custom thermistors. For more details look at table
-          78 and 79.
+          Used for digitizing custom thermistors.
+          See Page 65 of the datasheet.
         $ref: /schemas/types.yaml#/definitions/uint64-matrix
         minItems: 3
         maxItems: 64
@@ -358,10 +330,9 @@ patternProperties:
 
       adi,custom-steinhart:
         description:
-          Steinhart-Hart coefficients are also supported and can
-          be programmed into the device memory using this property. For
-          Steinhart sensors the coefficients are given in the raw
-          format. Look at table 82 for more information.
+          Steinhart-Hart coefficients in raw format, used for digitizing
+          custom thermistors.
+          See Page 68 of the datasheet.
         $ref: /schemas/types.yaml#/definitions/uint32-array
         minItems: 6
         maxItems: 6
@@ -400,23 +371,21 @@ patternProperties:
 
   "^adc@":
     type: object
-    description: Represents a channel which is being used as a direct adc.
+    description: Direct ADC sensor.
 
     properties:
       adi,sensor-type:
-        description: Identifies the sensor as a direct adc.
+        description: Sensor type for direct ADC sensors.
         $ref: /schemas/types.yaml#/definitions/uint32
         const: 30
 
       adi,single-ended:
-        description: Boolean property which set's the adc as single-ended.
+        description: Whether the sensor is single-ended.
         type: boolean
 
   "^rsense@":
     type: object
-    description:
-      Represents a rsense which is connected to one of the device channels.
-      Rsense are used by thermistors and RTD's.
+    description: Sense resistor sensor.
 
     properties:
       reg:
@@ -424,14 +393,12 @@ patternProperties:
         maximum: 20
 
       adi,sensor-type:
-        description: Identifies the sensor as a rsense.
+        description: Sensor type sense resistor sensors.
         $ref: /schemas/types.yaml#/definitions/uint32
         const: 29
 
       adi,rsense-val-milli-ohms:
-        description:
-          Sets the value of the sense resistor. Look at table 20 of the
-          datasheet for information.
+        description: Value of the sense resistor.
 
     required:
       - adi,rsense-val-milli-ohms
-- 
GitLab


From 93144097f51bf01591da5b8f8e37b6c616a0b23a Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:38 +0200
Subject: [PATCH 2360/4122] dt-bindings: iio: temperature: ltc2983: describe
 broken mux delay property

The 'adi,mux-delay-config-us' property is broken. It was supposed to
be in us, but the value is actually written directly to the register.
Describe the fact that it is broken and how it actually works.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-11-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml       | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 676801b036cf1..467e165e9b0b9 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -29,6 +29,9 @@ properties:
       Extra delay prior to each conversion, in addition to the internal 1ms
       delay, for the multiplexer to switch input configurations and
       excitation values.
+
+      This property is supposed to be in microseconds, but to maintain
+      compatibility, this value will be multiplied by 100 before usage.
     maximum: 255
     default: 0
 
-- 
GitLab


From cf738c544da333f6a8b7927f848ea388d7e58421 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:39 +0200
Subject: [PATCH 2361/4122] dt-bindings: iio: temperature: ltc2983: use generic
 node name in example

Examples should use the generic IIO node name of temperature-sensor.
Fix it.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20221103130041.2153295-12-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml        | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 467e165e9b0b9..0e8333260a445 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -420,7 +420,7 @@ examples:
         #address-cells = <1>;
         #size-cells = <0>;
 
-        sensor_ltc2983: ltc2983@0 {
+        temperature-sensor@0 {
                 compatible = "adi,ltc2983";
                 reg = <0>;
 
-- 
GitLab


From d24052695057eb5254d9fedcb5494428f23d3ecb Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:40 +0200
Subject: [PATCH 2362/4122] dt-bindings: iio: temperature: ltc2983: support
 more parts

Add support for the following parts:
 * LTC2984
 * LTC2986
 * LTM2985

The LTC2984 is a variant of the LTC2983 with EEPROM.
The LTC2986 is a variant of the LTC2983 with only 10 channels,
EEPROM and support for active analog temperature sensors.
The LTM2985 is software-compatible with the LTC2986.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221103130041.2153295-13-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/temperature/adi,ltc2983.yaml | 60 +++++++++++++++++--
 1 file changed, 56 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 0e8333260a445..44f8b0672f533 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -4,19 +4,30 @@
 $id: http://devicetree.org/schemas/iio/temperature/adi,ltc2983.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analog Devices LTC2983 Multi-sensor Temperature system
+title: Analog Devices LTC2983, LTC2986, LTM2985 Multi-sensor Temperature system
 
 maintainers:
   - Nuno Sá <nuno.sa@analog.com>
 
 description: |
-  Analog Devices LTC2983 Multi-Sensor Digital Temperature Measurement System
+  Analog Devices LTC2983, LTC2984, LTC2986, LTM2985 Multi-Sensor Digital
+  Temperature Measurement Systems
+
   https://www.analog.com/media/en/technical-documentation/data-sheets/2983fc.pdf
+  https://www.analog.com/media/en/technical-documentation/data-sheets/2984fb.pdf
+  https://www.analog.com/media/en/technical-documentation/data-sheets/29861fa.pdf
+  https://www.analog.com/media/en/technical-documentation/data-sheets/ltm2985.pdf
 
 properties:
   compatible:
-    enum:
-      - adi,ltc2983
+    oneOf:
+      - enum:
+          - adi,ltc2983
+          - adi,ltc2986
+          - adi,ltm2985
+      - items:
+          - const: adi,ltc2984
+          - const: adi,ltc2983
 
   reg:
     maxItems: 1
@@ -386,6 +397,35 @@ patternProperties:
         description: Whether the sensor is single-ended.
         type: boolean
 
+  "^temp@":
+    type: object
+    description: Active analog temperature sensor.
+
+    properties:
+      adi,sensor-type:
+        description: Sensor type for active analog temperature sensors.
+        $ref: /schemas/types.yaml#/definitions/uint32
+        const: 31
+
+      adi,single-ended:
+        description: Whether the sensor is single-ended.
+        type: boolean
+
+      adi,custom-temp:
+        description:
+          Used for digitizing active analog temperature sensors.
+          See Page 67 of the LTM2985 datasheet.
+        $ref: /schemas/types.yaml#/definitions/uint64-matrix
+        minItems: 3
+        maxItems: 64
+        items:
+          items:
+            - description: Voltage point in nV, signed.
+            - description: Temperature point in uK.
+
+    required:
+      - adi,custom-temp
+
   "^rsense@":
     type: object
     description: Sense resistor sensor.
@@ -413,6 +453,18 @@ required:
 
 additionalProperties: false
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - adi,ltc2983
+              - adi,ltc2984
+    then:
+      patternProperties:
+        "^temp@": false
+
 examples:
   - |
     #include <dt-bindings/interrupt-controller/irq.h>
-- 
GitLab


From 6f7cadcf664b04df3b2e9b9c6d65bf626aa1b411 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <cosmin.tanislav@analog.com>
Date: Thu, 3 Nov 2022 15:00:41 +0200
Subject: [PATCH 2363/4122] iio: temperature: ltc2983: support more parts

Add support for the following parts:
 * LTC2984
 * LTC2986
 * LTM2985

The LTC2984 is a variant of the LTC2983 with EEPROM.
The LTC2986 is a variant of the LTC2983 with only 10 channels,
EEPROM and support for active analog temperature sensors.
The LTM2985 is software-compatible with the LTC2986.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20221103130041.2153295-14-demonsingur@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/ltc2983.c | 183 ++++++++++++++++++++++++++++--
 1 file changed, 176 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/temperature/ltc2983.c b/drivers/iio/temperature/ltc2983.c
index 1117991ca2ab6..fcb96c44d9540 100644
--- a/drivers/iio/temperature/ltc2983.c
+++ b/drivers/iio/temperature/ltc2983.c
@@ -25,9 +25,12 @@
 #define LTC2983_STATUS_REG			0x0000
 #define LTC2983_TEMP_RES_START_REG		0x0010
 #define LTC2983_TEMP_RES_END_REG		0x005F
+#define LTC2983_EEPROM_KEY_REG			0x00B0
+#define LTC2983_EEPROM_READ_STATUS_REG		0x00D0
 #define LTC2983_GLOBAL_CONFIG_REG		0x00F0
 #define LTC2983_MULT_CHANNEL_START_REG		0x00F4
 #define LTC2983_MULT_CHANNEL_END_REG		0x00F7
+#define LTC2986_EEPROM_STATUS_REG		0x00F9
 #define LTC2983_MUX_CONFIG_REG			0x00FF
 #define LTC2983_CHAN_ASSIGN_START_REG		0x0200
 #define LTC2983_CHAN_ASSIGN_END_REG		0x024F
@@ -35,13 +38,21 @@
 #define LTC2983_CUST_SENS_TBL_END_REG		0x03CF
 
 #define LTC2983_DIFFERENTIAL_CHAN_MIN		2
-#define LTC2983_MAX_CHANNELS_NR			20
 #define LTC2983_MIN_CHANNELS_NR			1
 #define LTC2983_SLEEP				0x97
 #define LTC2983_CUSTOM_STEINHART_SIZE		24
 #define LTC2983_CUSTOM_SENSOR_ENTRY_SZ		6
 #define LTC2983_CUSTOM_STEINHART_ENTRY_SZ	4
 
+#define LTC2983_EEPROM_KEY			0xA53C0F5A
+#define LTC2983_EEPROM_WRITE_CMD		0x15
+#define LTC2983_EEPROM_READ_CMD			0x16
+#define LTC2983_EEPROM_STATUS_FAILURE_MASK	GENMASK(3, 1)
+#define LTC2983_EEPROM_READ_FAILURE_MASK	GENMASK(7, 0)
+
+#define LTC2983_EEPROM_WRITE_TIME_MS		2600
+#define LTC2983_EEPROM_READ_TIME_MS		20
+
 #define LTC2983_CHAN_START_ADDR(chan) \
 			(((chan - 1) * 4) + LTC2983_CHAN_ASSIGN_START_REG)
 #define LTC2983_CHAN_RES_ADDR(chan) \
@@ -171,6 +182,7 @@ enum {
 	LTC2983_SENSOR_DIODE = 28,
 	LTC2983_SENSOR_SENSE_RESISTOR = 29,
 	LTC2983_SENSOR_DIRECT_ADC = 30,
+	LTC2983_SENSOR_ACTIVE_TEMP = 31,
 };
 
 #define to_thermocouple(_sensor) \
@@ -191,7 +203,17 @@ enum {
 #define to_adc(_sensor) \
 		container_of(_sensor, struct ltc2983_adc, sensor)
 
+#define to_temp(_sensor) \
+		container_of(_sensor, struct ltc2983_temp, sensor)
+
+struct ltc2983_chip_info {
+	unsigned int max_channels_nr;
+	bool has_temp;
+	bool has_eeprom;
+};
+
 struct ltc2983_data {
+	const struct ltc2983_chip_info *info;
 	struct regmap *regmap;
 	struct spi_device *spi;
 	struct mutex lock;
@@ -210,6 +232,7 @@ struct ltc2983_data {
 	 */
 	__be32 temp __aligned(IIO_DMA_MINALIGN);
 	__be32 chan_val;
+	__be32 eeprom_key;
 };
 
 struct ltc2983_sensor {
@@ -272,6 +295,12 @@ struct ltc2983_adc {
 	bool single_ended;
 };
 
+struct ltc2983_temp {
+	struct ltc2983_sensor sensor;
+	struct ltc2983_custom_sensor *custom;
+	bool single_ended;
+};
+
 /*
  * Convert to Q format numbers. These number's are integers where
  * the number of integer and fractional bits are specified. The resolution
@@ -606,6 +635,22 @@ static int ltc2983_adc_assign_chan(struct ltc2983_data *st,
 	return __ltc2983_chan_assign_common(st, sensor, chan_val);
 }
 
+static int ltc2983_temp_assign_chan(struct ltc2983_data *st,
+				    const struct ltc2983_sensor *sensor)
+{
+	struct ltc2983_temp *temp = to_temp(sensor);
+	u32 chan_val;
+	int ret;
+
+	chan_val = LTC2983_ADC_SINGLE_ENDED(temp->single_ended);
+
+	ret = __ltc2983_chan_custom_sensor_assign(st, temp->custom, &chan_val);
+	if (ret)
+		return ret;
+
+	return __ltc2983_chan_assign_common(st, sensor, chan_val);
+}
+
 static struct ltc2983_sensor *
 ltc2983_thermocouple_new(const struct fwnode_handle *child, struct ltc2983_data *st,
 			 const struct ltc2983_sensor *sensor)
@@ -771,10 +816,10 @@ ltc2983_rtd_new(const struct fwnode_handle *child, struct ltc2983_data *st,
 	if (rtd->sensor_config & LTC2983_RTD_4_WIRE_MASK) {
 		/* 4-wire */
 		u8 min = LTC2983_DIFFERENTIAL_CHAN_MIN,
-			max = LTC2983_MAX_CHANNELS_NR;
+			max = st->info->max_channels_nr;
 
 		if (rtd->sensor_config & LTC2983_RTD_ROTATION_MASK)
-			max = LTC2983_MAX_CHANNELS_NR - 1;
+			max = st->info->max_channels_nr - 1;
 
 		if (((rtd->sensor_config & LTC2983_RTD_KELVIN_R_SENSE_MASK)
 		     == LTC2983_RTD_KELVIN_R_SENSE_MASK) &&
@@ -1143,6 +1188,38 @@ static struct ltc2983_sensor *ltc2983_adc_new(struct fwnode_handle *child,
 	return &adc->sensor;
 }
 
+static struct ltc2983_sensor *ltc2983_temp_new(struct fwnode_handle *child,
+					       struct ltc2983_data *st,
+					       const struct ltc2983_sensor *sensor)
+{
+	struct ltc2983_temp *temp;
+
+	temp = devm_kzalloc(&st->spi->dev, sizeof(*temp), GFP_KERNEL);
+	if (!temp)
+		return ERR_PTR(-ENOMEM);
+
+	if (fwnode_property_read_bool(child, "adi,single-ended"))
+		temp->single_ended = true;
+
+	if (!temp->single_ended &&
+	    sensor->chan < LTC2983_DIFFERENTIAL_CHAN_MIN) {
+		dev_err(&st->spi->dev, "Invalid chan:%d for differential temp\n",
+			sensor->chan);
+		return ERR_PTR(-EINVAL);
+	}
+
+	temp->custom = __ltc2983_custom_sensor_new(st, child, "adi,custom-temp",
+						   false, 4096, true);
+	if (IS_ERR(temp->custom))
+		return ERR_CAST(temp->custom);
+
+	/* set common parameters */
+	temp->sensor.assign_chan = ltc2983_temp_assign_chan;
+	temp->sensor.fault_handler = ltc2983_common_fault_handler;
+
+	return &temp->sensor;
+}
+
 static int ltc2983_chan_read(struct ltc2983_data *st,
 			const struct ltc2983_sensor *sensor, int *val)
 {
@@ -1302,10 +1379,10 @@ static int ltc2983_parse_dt(struct ltc2983_data *st)
 
 		/* check if we have a valid channel */
 		if (sensor.chan < LTC2983_MIN_CHANNELS_NR ||
-		    sensor.chan > LTC2983_MAX_CHANNELS_NR) {
+		    sensor.chan > st->info->max_channels_nr) {
 			ret = -EINVAL;
 			dev_err(dev, "chan:%d must be from %u to %u\n", sensor.chan,
-				LTC2983_MIN_CHANNELS_NR, LTC2983_MAX_CHANNELS_NR);
+				LTC2983_MIN_CHANNELS_NR, st->info->max_channels_nr);
 			goto put_child;
 		} else if (channel_avail_mask & BIT(sensor.chan)) {
 			ret = -EINVAL;
@@ -1345,6 +1422,9 @@ static int ltc2983_parse_dt(struct ltc2983_data *st)
 			st->iio_channels--;
 		} else if (sensor.type == LTC2983_SENSOR_DIRECT_ADC) {
 			st->sensors[chan] = ltc2983_adc_new(child, st, &sensor);
+		} else if (st->info->has_temp &&
+			   sensor.type == LTC2983_SENSOR_ACTIVE_TEMP) {
+			st->sensors[chan] = ltc2983_temp_new(child, st, &sensor);
 		} else {
 			dev_err(dev, "Unknown sensor type %d\n", sensor.type);
 			ret = -EINVAL;
@@ -1371,6 +1451,45 @@ put_child:
 	return ret;
 }
 
+static int ltc2983_eeprom_cmd(struct ltc2983_data *st, unsigned int cmd,
+			      unsigned int wait_time, unsigned int status_reg,
+			      unsigned long status_fail_mask)
+{
+	unsigned long time;
+	unsigned int val;
+	int ret;
+
+	ret = regmap_bulk_write(st->regmap, LTC2983_EEPROM_KEY_REG,
+				&st->eeprom_key, sizeof(st->eeprom_key));
+	if (ret)
+		return ret;
+
+	reinit_completion(&st->completion);
+
+	ret = regmap_write(st->regmap, LTC2983_STATUS_REG,
+			   LTC2983_STATUS_START(true) | cmd);
+	if (ret)
+		return ret;
+
+	time = wait_for_completion_timeout(&st->completion,
+					   msecs_to_jiffies(wait_time));
+	if (!time) {
+		dev_err(&st->spi->dev, "EEPROM command timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	ret = regmap_read(st->regmap, status_reg, &val);
+	if (ret)
+		return ret;
+
+	if (val & status_fail_mask) {
+		dev_err(&st->spi->dev, "EEPROM command failed: 0x%02X\n", val);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio)
 {
 	u32 iio_chan_t = 0, iio_chan_v = 0, chan, iio_idx = 0, status;
@@ -1396,6 +1515,15 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio)
 	if (ret)
 		return ret;
 
+	if (st->info->has_eeprom && !assign_iio) {
+		ret = ltc2983_eeprom_cmd(st, LTC2983_EEPROM_READ_CMD,
+					 LTC2983_EEPROM_READ_TIME_MS,
+					 LTC2983_EEPROM_READ_STATUS_REG,
+					 LTC2983_EEPROM_READ_FAILURE_MASK);
+		if (!ret)
+			return 0;
+	}
+
 	for (chan = 0; chan < st->num_channels; chan++) {
 		u32 chan_type = 0, *iio_chan;
 
@@ -1435,9 +1563,13 @@ static int ltc2983_setup(struct ltc2983_data *st, bool assign_iio)
 static const struct regmap_range ltc2983_reg_ranges[] = {
 	regmap_reg_range(LTC2983_STATUS_REG, LTC2983_STATUS_REG),
 	regmap_reg_range(LTC2983_TEMP_RES_START_REG, LTC2983_TEMP_RES_END_REG),
+	regmap_reg_range(LTC2983_EEPROM_KEY_REG, LTC2983_EEPROM_KEY_REG),
+	regmap_reg_range(LTC2983_EEPROM_READ_STATUS_REG,
+			 LTC2983_EEPROM_READ_STATUS_REG),
 	regmap_reg_range(LTC2983_GLOBAL_CONFIG_REG, LTC2983_GLOBAL_CONFIG_REG),
 	regmap_reg_range(LTC2983_MULT_CHANNEL_START_REG,
 			 LTC2983_MULT_CHANNEL_END_REG),
+	regmap_reg_range(LTC2986_EEPROM_STATUS_REG, LTC2986_EEPROM_STATUS_REG),
 	regmap_reg_range(LTC2983_MUX_CONFIG_REG, LTC2983_MUX_CONFIG_REG),
 	regmap_reg_range(LTC2983_CHAN_ASSIGN_START_REG,
 			 LTC2983_CHAN_ASSIGN_END_REG),
@@ -1482,6 +1614,12 @@ static int ltc2983_probe(struct spi_device *spi)
 
 	st = iio_priv(indio_dev);
 
+	st->info = device_get_match_data(&spi->dev);
+	if (!st->info)
+		st->info = (void *)spi_get_device_id(spi)->driver_data;
+	if (!st->info)
+		return -ENODEV;
+
 	st->regmap = devm_regmap_init_spi(spi, &ltc2983_regmap_config);
 	if (IS_ERR(st->regmap)) {
 		dev_err(&spi->dev, "Failed to initialize regmap\n");
@@ -1491,6 +1629,7 @@ static int ltc2983_probe(struct spi_device *spi)
 	mutex_init(&st->lock);
 	init_completion(&st->completion);
 	st->spi = spi;
+	st->eeprom_key = cpu_to_be32(LTC2983_EEPROM_KEY);
 	spi_set_drvdata(spi, st);
 
 	ret = ltc2983_parse_dt(st);
@@ -1524,6 +1663,15 @@ static int ltc2983_probe(struct spi_device *spi)
 		return ret;
 	}
 
+	if (st->info->has_eeprom) {
+		ret = ltc2983_eeprom_cmd(st, LTC2983_EEPROM_WRITE_CMD,
+					 LTC2983_EEPROM_WRITE_TIME_MS,
+					 LTC2986_EEPROM_STATUS_REG,
+					 LTC2983_EEPROM_STATUS_FAILURE_MASK);
+		if (ret)
+			return ret;
+	}
+
 	indio_dev->name = name;
 	indio_dev->num_channels = st->iio_channels;
 	indio_dev->channels = st->iio_chan;
@@ -1554,14 +1702,35 @@ static int ltc2983_suspend(struct device *dev)
 static DEFINE_SIMPLE_DEV_PM_OPS(ltc2983_pm_ops, ltc2983_suspend,
 				ltc2983_resume);
 
+static const struct ltc2983_chip_info ltc2983_chip_info_data = {
+	.max_channels_nr = 20,
+};
+
+static const struct ltc2983_chip_info ltc2984_chip_info_data = {
+	.max_channels_nr = 20,
+	.has_eeprom = true,
+};
+
+static const struct ltc2983_chip_info ltc2986_chip_info_data = {
+	.max_channels_nr = 10,
+	.has_temp = true,
+	.has_eeprom = true,
+};
+
 static const struct spi_device_id ltc2983_id_table[] = {
-	{ "ltc2983" },
+	{ "ltc2983", (kernel_ulong_t)&ltc2983_chip_info_data },
+	{ "ltc2984", (kernel_ulong_t)&ltc2984_chip_info_data },
+	{ "ltc2986", (kernel_ulong_t)&ltc2986_chip_info_data },
+	{ "ltm2985", (kernel_ulong_t)&ltc2986_chip_info_data },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, ltc2983_id_table);
 
 static const struct of_device_id ltc2983_of_match[] = {
-	{ .compatible = "adi,ltc2983" },
+	{ .compatible = "adi,ltc2983", .data = &ltc2983_chip_info_data },
+	{ .compatible = "adi,ltc2984", .data = &ltc2984_chip_info_data },
+	{ .compatible = "adi,ltc2986", .data = &ltc2986_chip_info_data },
+	{ .compatible = "adi,ltm2985", .data = &ltc2986_chip_info_data },
 	{},
 };
 MODULE_DEVICE_TABLE(of, ltc2983_of_match);
-- 
GitLab


From bcf22afd2ce0b8bd4fad50bfda92e8cbbc483c72 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sat, 5 Nov 2022 12:51:07 +0000
Subject: [PATCH 2364/4122] iio: temperature: mlx90632: Add error handling for
 devm_pm_runtime_enable()

This call can fail so handling is necessary even if it is very unlikely.

Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Addresses-Coverity-ID: 1527134 ("Error handling issues")
Fixes: 2aebc223fc7c ("iio: temperature: mlx90632 Add runtime powermanagement modes")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/20221105125108.383193-2-jic23@kernel.org
---
 drivers/iio/temperature/mlx90632.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index a17fe5f4967a6..7572ae3f84322 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -1267,7 +1267,10 @@ static int mlx90632_probe(struct i2c_client *client,
 	pm_runtime_get_noresume(&client->dev);
 	pm_runtime_set_active(&client->dev);
 
-	devm_pm_runtime_enable(&client->dev);
+	ret = devm_pm_runtime_enable(&client->dev);
+	if (ret)
+		return ret;
+
 	pm_runtime_set_autosuspend_delay(&client->dev, MLX90632_SLEEP_DELAY_MS);
 	pm_runtime_use_autosuspend(&client->dev);
 	pm_runtime_put_autosuspend(&client->dev);
-- 
GitLab


From 99043ba702243f69d1853bd8aeca01e22836ede3 Mon Sep 17 00:00:00 2001
From: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Date: Sat, 5 Nov 2022 12:51:08 +0000
Subject: [PATCH 2365/4122] iio: temperature: mlx90632: Add missing static
 marking on devm_pm_ops

Only used within this file, so should be marked static.

Fixes: 2aebc223fc7c ("iio: temperature: mlx90632 Add runtime powermanagement modes")
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-off-by: Crt Mori <cmo@melexis.com>
Link: https://lore.kernel.org/r/20221105125108.383193-3-jic23@kernel.org
---
 drivers/iio/temperature/mlx90632.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index 7572ae3f84322..f1f5ebc145b14 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -1326,7 +1326,7 @@ static int mlx90632_pm_runtime_suspend(struct device *dev)
 	return mlx90632_pwr_set_sleep_step(data->regmap);
 }
 
-const struct dev_pm_ops mlx90632_pm_ops = {
+static const struct dev_pm_ops mlx90632_pm_ops = {
 	SYSTEM_SLEEP_PM_OPS(mlx90632_pm_suspend, mlx90632_pm_resume)
 	RUNTIME_PM_OPS(mlx90632_pm_runtime_suspend, NULL, NULL)
 };
-- 
GitLab


From c486b7019398a7a451631d5d99b67480768bfd25 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Sat, 5 Nov 2022 15:51:57 -0700
Subject: [PATCH 2366/4122] iio: proximity: sx9360: Add a new ACPI hardware ID

From
https://treexy.com/products/driver-fusion/database/sensors/semtech/sx9360-proximity/

sx9360 SAR sensor can be presented with ACPI ID SAMM0208.

Whilst this does not appear to be an official ACPI ID, it is in the wild
(perhaps due to a typo on the SAM PnP ID).

Reported-by: Jordi Torres <majosamaso@gmail.com>
Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Link: https://lore.kernel.org/r/20221105225157.10081-1-gwendal@chromium.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/sx9360.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/proximity/sx9360.c b/drivers/iio/proximity/sx9360.c
index 7fa2213d23baf..6e19d22e6a015 100644
--- a/drivers/iio/proximity/sx9360.c
+++ b/drivers/iio/proximity/sx9360.c
@@ -865,6 +865,7 @@ static DEFINE_SIMPLE_DEV_PM_OPS(sx9360_pm_ops, sx9360_suspend, sx9360_resume);
 
 static const struct acpi_device_id sx9360_acpi_match[] = {
 	{ "STH9360", SX9360_WHOAMI_VALUE },
+	{ "SAMM0208", SX9360_WHOAMI_VALUE },
 	{ }
 };
 MODULE_DEVICE_TABLE(acpi, sx9360_acpi_match);
-- 
GitLab


From 46975081f75105de5a6bda05af90323a4894d2be Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 6 Nov 2022 15:36:53 +0100
Subject: [PATCH 2367/4122] iio: imu: st_lsm6dsx: add support to LSM6DSV16X

Add support to STM LSM6DSV16X (accelerometer and gyroscope) Mems sensor.
The LSM6DSV16X sensor can use LSM6DSV as fallback device since it
implements all the LSM6DSV16X features currently implemented in
st_lsm6dsx.

Datasheet: https://www.st.com/resource/en/datasheet/lsm6dsv16x.pdf
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/284b251f861dff30c399e5736a843c8e3a497249.1667745215.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/Kconfig           | 3 ++-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h      | 2 ++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 6 +++++-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c  | 5 +++++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c  | 5 +++++
 5 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig
index 1c68bac94bcee..37d02e0fc227a 100644
--- a/drivers/iio/imu/st_lsm6dsx/Kconfig
+++ b/drivers/iio/imu/st_lsm6dsx/Kconfig
@@ -13,7 +13,8 @@ config IIO_ST_LSM6DSX
 	  sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm,
 	  ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr,
 	  lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx,
-	  lsm6dsv, the accelerometer/gyroscope of lsm9ds1 and lsm6dst.
+	  lsm6dsv, lsm6dsv16x, the accelerometer/gyroscope of lsm9ds1
+	  and lsm6dst.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called st_lsm6dsx.
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index ab61895cf0720..683cfadcf62ef 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -34,6 +34,7 @@
 #define ST_ASM330LHHX_DEV_NAME	"asm330lhhx"
 #define ST_LSM6DSTX_DEV_NAME	"lsm6dstx"
 #define ST_LSM6DSV_DEV_NAME	"lsm6dsv"
+#define ST_LSM6DSV16X_DEV_NAME	"lsm6dsv16x"
 
 enum st_lsm6dsx_hw_id {
 	ST_LSM6DS3_ID,
@@ -55,6 +56,7 @@ enum st_lsm6dsx_hw_id {
 	ST_ASM330LHHX_ID,
 	ST_LSM6DSTX_ID,
 	ST_LSM6DSV_ID,
+	ST_LSM6DSV16X_ID,
 	ST_LSM6DSX_MAX_ID,
 };
 
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 5e716a5071fd4..acb6101aec5d9 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -27,7 +27,7 @@
  *   - FIFO size: 4KB
  *
  * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/
- *   LSM6DSTX:
+ *   LSM6DSTX/LSM6DSV/LSM6DSV16X:
  *   - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416,
  *     833
  *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
@@ -1180,6 +1180,10 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
 				.hw_id = ST_LSM6DSV_ID,
 				.name = ST_LSM6DSV_DEV_NAME,
 				.wai = 0x70,
+			}, {
+				.hw_id = ST_LSM6DSV16X_ID,
+				.name = ST_LSM6DSV16X_DEV_NAME,
+				.wai = 0x70,
 			},
 		},
 		.channels = {
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
index 239c8920a31f8..0faf1b4c11afe 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
@@ -113,6 +113,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = {
 		.compatible = "st,lsm6dsv",
 		.data = (void *)ST_LSM6DSV_ID,
 	},
+	{
+		.compatible = "st,lsm6dsv16x",
+		.data = (void *)ST_LSM6DSV16X_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match);
@@ -137,6 +141,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = {
 	{ ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID },
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
+	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
index 66705ef16ed04..57597aaa2a92a 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
@@ -113,6 +113,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = {
 		.compatible = "st,lsm6dsv",
 		.data = (void *)ST_LSM6DSV_ID,
 	},
+	{
+		.compatible = "st,lsm6dsv16x",
+		.data = (void *)ST_LSM6DSV16X_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match);
@@ -137,6 +141,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = {
 	{ ST_ASM330LHHX_DEV_NAME, ST_ASM330LHHX_ID },
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
+	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table);
-- 
GitLab


From 03e8373c070daf14eed069783e3cf343470ca5cd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 6 Nov 2022 15:36:54 +0100
Subject: [PATCH 2368/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dsv16x

Add device bindings for lsm6dsv16x IMU sensor.
Use lsm6dsv as fallback device for lsm6dsv16x since it implements all the
features currently supported by lsm6dsv16x.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/8d10a63ec6abd22863ab25addd8c2f578dbc9cd9.1667745215.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
index e7349a3275dda..5933270799f55 100644
--- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
@@ -39,6 +39,9 @@ properties:
       - items:
           - const: st,lsm6dstx
           - const: st,lsm6dst
+      - items:
+          - const: st,lsm6dsv16x
+          - const: st,lsm6dsv
 
   reg:
     maxItems: 1
-- 
GitLab


From 84aea36299a411f46d4372dc2d51c037ae70341a Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Fri, 11 Nov 2022 15:28:46 -0600
Subject: [PATCH 2369/4122] dt-bindings: iio: dac: adi,ad5758: Drop 'contains'
 from 'adi,dc-dc-mode'

'contains' applies to arrays, but 'adi,dc-dc-mode' is a scalar. So drop
'contains' from the 'if' schema.

Signed-off-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221111212846.4104059-1-robh@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml
index e49e7556175df..4e508bfcc9d87 100644
--- a/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/adi,ad5758.yaml
@@ -102,8 +102,7 @@ allOf:
   - if:
       properties:
         adi,dc-dc-mode:
-          contains:
-            enum: [1, 3]
+          enum: [1, 3]
     then:
       properties:
         adi,range-microvolt: false
-- 
GitLab


From 6809ec97686f00b667468f7e8e582a6e3d50169b Mon Sep 17 00:00:00 2001
From: Jagan Teki <jagan@edgeble.ai>
Date: Tue, 8 Nov 2022 09:43:53 +0530
Subject: [PATCH 2370/4122] dt-bindings: iio: adc: rockchip-saradc: Add saradc
 for rv1126

Add saradc compatible string for rockchip rv1126.

Cc: linux-iio@vger.kernel.org
Cc: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Jagan Teki <jagan@edgeble.ai>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221108041400.157052-4-jagan@edgeble.ai
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml
index e512a14e41b43..da50b529c1576 100644
--- a/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/rockchip-saradc.yaml
@@ -22,6 +22,7 @@ properties:
               - rockchip,rk3328-saradc
               - rockchip,rk3568-saradc
               - rockchip,rv1108-saradc
+              - rockchip,rv1126-saradc
           - const: rockchip,rk3399-saradc
 
   reg:
-- 
GitLab


From c896b9f0920fdcb9a815014ca1917522ba2c9b9a Mon Sep 17 00:00:00 2001
From: Jay Greco <grecojay@amazon.com>
Date: Thu, 10 Nov 2022 19:29:32 +0000
Subject: [PATCH 2371/4122] iio: imu: inv_icm42600: Add support for icm42631

Add the required WHOAMI and device_id definitions to support the
icm42631.

Signed-off-by: Jay Greco <grecojay@amazon.com>
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Link: https://lore.kernel.org/r/20221110192933.13616-2-grecojay@amazon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/inv_icm42600/inv_icm42600.h      | 2 ++
 drivers/iio/imu/inv_icm42600/inv_icm42600_core.c | 5 +++++
 drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c  | 3 +++
 drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c  | 3 +++
 4 files changed, 13 insertions(+)

diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600.h b/drivers/iio/imu/inv_icm42600/inv_icm42600.h
index 3d91469beccbb..0e290c807b0f9 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600.h
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600.h
@@ -22,6 +22,7 @@ enum inv_icm42600_chip {
 	INV_CHIP_ICM42602,
 	INV_CHIP_ICM42605,
 	INV_CHIP_ICM42622,
+	INV_CHIP_ICM42631,
 	INV_CHIP_NB,
 };
 
@@ -303,6 +304,7 @@ struct inv_icm42600_state {
 #define INV_ICM42600_WHOAMI_ICM42602			0x41
 #define INV_ICM42600_WHOAMI_ICM42605			0x42
 #define INV_ICM42600_WHOAMI_ICM42622			0x46
+#define INV_ICM42600_WHOAMI_ICM42631			0x5C
 
 /* User bank 1 (MSB 0x10) */
 #define INV_ICM42600_REG_SENSOR_CONFIG0			0x1003
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
index b63c5dab1a562..7b3a2a0dc2cb0 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_core.c
@@ -87,6 +87,11 @@ static const struct inv_icm42600_hw inv_icm42600_hw[INV_CHIP_NB] = {
 		.name = "icm42622",
 		.conf = &inv_icm42600_default_conf,
 	},
+	[INV_CHIP_ICM42631] = {
+		.whoami = INV_ICM42600_WHOAMI_ICM42631,
+		.name = "icm42631",
+		.conf = &inv_icm42600_default_conf,
+	},
 };
 
 const struct iio_mount_matrix *
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
index 4f96989ddf4a1..eb2681ad375f2 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_i2c.c
@@ -84,6 +84,9 @@ static const struct of_device_id inv_icm42600_of_matches[] = {
 	}, {
 		.compatible = "invensense,icm42622",
 		.data = (void *)INV_CHIP_ICM42622,
+	}, {
+		.compatible = "invensense,icm42631",
+		.data = (void *)INV_CHIP_ICM42631,
 	},
 	{}
 };
diff --git a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
index 486b46e531131..6be4ac7949379 100644
--- a/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
+++ b/drivers/iio/imu/inv_icm42600/inv_icm42600_spi.c
@@ -80,6 +80,9 @@ static const struct of_device_id inv_icm42600_of_matches[] = {
 	}, {
 		.compatible = "invensense,icm42622",
 		.data = (void *)INV_CHIP_ICM42622,
+	}, {
+		.compatible = "invensense,icm42631",
+		.data = (void *)INV_CHIP_ICM42631,
 	},
 	{}
 };
-- 
GitLab


From 7b4452381a966de33707718aa8d79672cc8caa2f Mon Sep 17 00:00:00 2001
From: Jay Greco <grecojay@amazon.com>
Date: Thu, 10 Nov 2022 19:29:33 +0000
Subject: [PATCH 2372/4122] dt-bindings: iio: imu: Add inv_icm42600
 documentation

Update the required documentation for the icm42631.

Signed-off-by: Jay Greco <grecojay@amazon.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Link: https://lore.kernel.org/r/20221110192933.13616-3-grecojay@amazon.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/imu/invensense,icm42600.yaml         | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml b/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml
index 488349755c99d..13c9abdd31314 100644
--- a/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/invensense,icm42600.yaml
@@ -31,6 +31,7 @@ properties:
       - invensense,icm42602
       - invensense,icm42605
       - invensense,icm42622
+      - invensense,icm42631
 
   reg:
     maxItems: 1
-- 
GitLab


From 5ae34494cf37ae6383f4bd674f343ef20833024d Mon Sep 17 00:00:00 2001
From: Subhajit Ghosh <subhajit.ghosh@vixtechnology.com>
Date: Thu, 10 Nov 2022 18:12:41 +0800
Subject: [PATCH 2373/4122] iio: light: apds9960: Fix iio_event_spec structures

There is only one interrupt enable option for both ALS low and high
thresholds, and one for both Proximity low and high thresholds.

Signed-off-by: Subhajit Ghosh <subhajit.ghosh@vixtechnology.com>
Reviewed-by: Matt Ranostay <matt.ranostay@konsulko.com>
Link: https://lore.kernel.org/r/20221110101241.10576-1-subhajit.ghosh@vixtechnology.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/apds9960.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index b62c139baf414..997aa01ecc115 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -223,14 +223,16 @@ static const struct iio_event_spec apds9960_pxs_event_spec[] = {
 	{
 		.type = IIO_EV_TYPE_THRESH,
 		.dir = IIO_EV_DIR_RISING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
+		.mask_separate = BIT(IIO_EV_INFO_VALUE),
 	},
 	{
 		.type = IIO_EV_TYPE_THRESH,
 		.dir = IIO_EV_DIR_FALLING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
+		.mask_separate = BIT(IIO_EV_INFO_VALUE),
+	},
+	{
+		.type = IIO_EV_TYPE_THRESH,
+		.mask_separate = BIT(IIO_EV_INFO_ENABLE),
 	},
 };
 
@@ -238,14 +240,16 @@ static const struct iio_event_spec apds9960_als_event_spec[] = {
 	{
 		.type = IIO_EV_TYPE_THRESH,
 		.dir = IIO_EV_DIR_RISING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
+		.mask_separate = BIT(IIO_EV_INFO_VALUE),
 	},
 	{
 		.type = IIO_EV_TYPE_THRESH,
 		.dir = IIO_EV_DIR_FALLING,
-		.mask_separate = BIT(IIO_EV_INFO_VALUE) |
-			BIT(IIO_EV_INFO_ENABLE),
+		.mask_separate = BIT(IIO_EV_INFO_VALUE),
+	},
+	{
+		.type = IIO_EV_TYPE_THRESH,
+		.mask_separate = BIT(IIO_EV_INFO_ENABLE),
 	},
 };
 
-- 
GitLab


From 572cc583c92e7b113a0d0f650b02d80505920eef Mon Sep 17 00:00:00 2001
From: Deepak R Varma <drv@mailo.com>
Date: Sun, 13 Nov 2022 10:58:20 +0530
Subject: [PATCH 2374/4122] staging: iio: meter: replace ternary operator by if
 condition

Replace ternary operator by simple if based evaluation of the return
value. Issue identified using coccicheck.

Signed-off-by: Deepak R Varma <drv@mailo.com>
Link: https://lore.kernel.org/r/Y3CAdCa17WdWDYUa@qemulion
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/meter/ade7854-i2c.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c
index a9a06e8dda51f..71b67dd3c8e98 100644
--- a/drivers/staging/iio/meter/ade7854-i2c.c
+++ b/drivers/staging/iio/meter/ade7854-i2c.c
@@ -61,7 +61,10 @@ static int ade7854_i2c_write_reg(struct device *dev,
 unlock:
 	mutex_unlock(&st->buf_lock);
 
-	return ret < 0 ? ret : 0;
+	if (ret < 0)
+		return ret;
+
+	return 0;
 }
 
 static int ade7854_i2c_read_reg(struct device *dev,
-- 
GitLab


From 10c4539d1d5581646997d7f9b430319bcb9245b1 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Mon, 14 Nov 2022 15:36:49 +0200
Subject: [PATCH 2375/4122] iio: adc: ad4130: depend on GPIOLIB

Fixes undefined references to 'gpiochip_get_data' and
'devm_gpiochip_add_data_with_key'.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 62094060cf3a ("iio: adc: ad4130: add AD4130 driver")
Link: https://lore.kernel.org/r/20221114133649.1737027-1-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/adc/Kconfig b/drivers/iio/adc/Kconfig
index 8d719fbb6acc0..63f80d747cbd2 100644
--- a/drivers/iio/adc/Kconfig
+++ b/drivers/iio/adc/Kconfig
@@ -24,6 +24,7 @@ config AD_SIGMA_DELTA
 config AD4130
 	tristate "Analog Device AD4130 ADC Driver"
 	depends on SPI
+	depends on GPIOLIB
 	select IIO_BUFFER
 	select IIO_KFIFO_BUF
 	select REGMAP_SPI
-- 
GitLab


From d94fbd9231a2b4062cf5972629999402ea7dd6fa Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 14 Nov 2022 10:25:34 +0100
Subject: [PATCH 2376/4122] iio: imu: st_lsm6dsx: fix LSM6DSV sensor
 description

Fix sensor ODR description for LSM6DSV/LSM6DSVX

Fixes: a1c6d631ff12 ("iio: imu: st_lsm6dsx: add support to LSM6DSV")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/653e14bf79bb88d8581d2bc42da2f784caaf3776.1668417770.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index acb6101aec5d9..b680682f98330 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -27,13 +27,20 @@
  *   - FIFO size: 4KB
  *
  * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/
- *   LSM6DSTX/LSM6DSV/LSM6DSV16X:
+ *   LSM6DSTX:
  *   - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416,
  *     833
  *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
  *   - Gyroscope supported full-scale [dps]: +-125/+-245/+-500/+-1000/+-2000
  *   - FIFO size: 3KB
  *
+ * - LSM6DSV/LSM6DSV16X:
+ *   - Accelerometer/Gyroscope supported ODR [Hz]: 7.5, 15, 30, 60, 120, 240,
+ *     480, 960
+ *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
+ *   - Gyroscope supported full-scale [dps]: +-125/+-250/+-500/+-1000/+-2000
+ *   - FIFO size: 3KB
+ *
  * - LSM9DS1/LSM6DS0:
  *   - Accelerometer supported ODR [Hz]: 10, 50, 119, 238, 476, 952
  *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
-- 
GitLab


From 2c5c45e236c4706a261042f67766b0486c72d603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:23 +0100
Subject: [PATCH 2377/4122] iio: accel: adxl372_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-50-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adxl372_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/adxl372_i2c.c b/drivers/iio/accel/adxl372_i2c.c
index 4efb70a5fe405..e5f310ea65fff 100644
--- a/drivers/iio/accel/adxl372_i2c.c
+++ b/drivers/iio/accel/adxl372_i2c.c
@@ -18,9 +18,9 @@ static const struct regmap_config adxl372_regmap_config = {
 	.readable_noinc_reg = adxl372_readable_noinc_reg,
 };
 
-static int adxl372_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int adxl372_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	unsigned int regval;
 	int ret;
@@ -58,7 +58,7 @@ static struct i2c_driver adxl372_i2c_driver = {
 		.name = "adxl372_i2c",
 		.of_match_table = adxl372_of_match,
 	},
-	.probe = adxl372_i2c_probe,
+	.probe_new = adxl372_i2c_probe,
 	.id_table = adxl372_i2c_id,
 };
 
-- 
GitLab


From c476246bd5abb1f6d40054d83fea3f1a407ccb11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:24 +0100
Subject: [PATCH 2378/4122] iio: accel: bma180: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-51-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bma180.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/bma180.c b/drivers/iio/accel/bma180.c
index d03fc3400f94e..eb697eeb4301b 100644
--- a/drivers/iio/accel/bma180.c
+++ b/drivers/iio/accel/bma180.c
@@ -921,9 +921,9 @@ static const struct iio_trigger_ops bma180_trigger_ops = {
 	.reenable = bma180_trig_reen,
 };
 
-static int bma180_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int bma180_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct bma180_data *data;
 	struct iio_dev *indio_dev;
@@ -1134,7 +1134,7 @@ static struct i2c_driver bma180_driver = {
 		.pm	= pm_sleep_ptr(&bma180_pm_ops),
 		.of_match_table = bma180_of_match,
 	},
-	.probe		= bma180_probe,
+	.probe_new	= bma180_probe,
 	.remove		= bma180_remove,
 	.id_table	= bma180_ids,
 };
-- 
GitLab


From e599500f96fb51417320b8b51ae4c36868672b13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:25 +0100
Subject: [PATCH 2379/4122] iio: accel: bma400: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-52-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bma400_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/bma400_i2c.c b/drivers/iio/accel/bma400_i2c.c
index 1ba2a982ea736..688b06dae669b 100644
--- a/drivers/iio/accel/bma400_i2c.c
+++ b/drivers/iio/accel/bma400_i2c.c
@@ -13,9 +13,9 @@
 
 #include "bma400.h"
 
-static int bma400_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int bma400_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 
 	regmap = devm_regmap_init_i2c(client, &bma400_regmap_config);
@@ -44,7 +44,7 @@ static struct i2c_driver bma400_i2c_driver = {
 		.name = "bma400",
 		.of_match_table = bma400_of_i2c_match,
 	},
-	.probe    = bma400_i2c_probe,
+	.probe_new = bma400_i2c_probe,
 	.id_table = bma400_i2c_ids,
 };
 
-- 
GitLab


From a141d225f6d62809d2b6a6c916929a8bbaaaaad5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:26 +0100
Subject: [PATCH 2380/4122] iio: accel: bmc150: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-53-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/bmc150-accel-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/bmc150-accel-i2c.c b/drivers/iio/accel/bmc150-accel-i2c.c
index be8cc598b88e3..509cab2bd6947 100644
--- a/drivers/iio/accel/bmc150-accel-i2c.c
+++ b/drivers/iio/accel/bmc150-accel-i2c.c
@@ -171,9 +171,9 @@ static void bmc150_acpi_dual_accel_probe(struct i2c_client *client) {}
 static void bmc150_acpi_dual_accel_remove(struct i2c_client *client) {}
 #endif
 
-static int bmc150_accel_probe(struct i2c_client *client,
-			      const struct i2c_device_id *id)
+static int bmc150_accel_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name = NULL;
 	enum bmc150_type type = BOSCH_UNKNOWN;
@@ -269,7 +269,7 @@ static struct i2c_driver bmc150_accel_driver = {
 		.acpi_match_table = ACPI_PTR(bmc150_accel_acpi_match),
 		.pm	= &bmc150_accel_pm_ops,
 	},
-	.probe		= bmc150_accel_probe,
+	.probe_new	= bmc150_accel_probe,
 	.remove		= bmc150_accel_remove,
 	.id_table	= bmc150_accel_id,
 };
-- 
GitLab


From a8fab44897dd959c01a88a806fd4252a29c62055 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:27 +0100
Subject: [PATCH 2381/4122] iio: accel: da280: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-54-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/da280.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/da280.c b/drivers/iio/accel/da280.c
index 04e9c56789647..38a7d811610e7 100644
--- a/drivers/iio/accel/da280.c
+++ b/drivers/iio/accel/da280.c
@@ -105,9 +105,9 @@ static void da280_disable(void *client)
 	da280_enable(client, false);
 }
 
-static int da280_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int da280_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	struct iio_dev *indio_dev;
 	struct da280_data *data;
@@ -184,7 +184,7 @@ static struct i2c_driver da280_driver = {
 		.acpi_match_table = ACPI_PTR(da280_acpi_match),
 		.pm = pm_sleep_ptr(&da280_pm_ops),
 	},
-	.probe		= da280_probe,
+	.probe_new	= da280_probe,
 	.id_table	= da280_i2c_id,
 };
 
-- 
GitLab


From 09cec0835243b17316995bce30532f5ad51db355 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:28 +0100
Subject: [PATCH 2382/4122] iio: accel: kxcjk-1013: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-55-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/kxcjk-1013.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/kxcjk-1013.c b/drivers/iio/accel/kxcjk-1013.c
index e626b6fa8a36a..98da4bda22dfe 100644
--- a/drivers/iio/accel/kxcjk-1013.c
+++ b/drivers/iio/accel/kxcjk-1013.c
@@ -1424,9 +1424,9 @@ static const char *kxcjk1013_match_acpi_device(struct device *dev,
 	return dev_name(dev);
 }
 
-static int kxcjk1013_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int kxcjk1013_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct kxcjk1013_data *data;
 	struct iio_dev *indio_dev;
@@ -1732,7 +1732,7 @@ static struct i2c_driver kxcjk1013_driver = {
 		.of_match_table = kxcjk1013_of_match,
 		.pm	= &kxcjk1013_pm_ops,
 	},
-	.probe		= kxcjk1013_probe,
+	.probe_new	= kxcjk1013_probe,
 	.remove		= kxcjk1013_remove,
 	.id_table	= kxcjk1013_id,
 };
-- 
GitLab


From 17b9a72f4a11c64a12d7cd4a7951ba44c5f2f374 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:29 +0100
Subject: [PATCH 2383/4122] iio: accel: mma7455_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-56-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mma7455_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mma7455_i2c.c b/drivers/iio/accel/mma7455_i2c.c
index c63b321b01cda..a3864dbe27613 100644
--- a/drivers/iio/accel/mma7455_i2c.c
+++ b/drivers/iio/accel/mma7455_i2c.c
@@ -10,9 +10,9 @@
 
 #include "mma7455.h"
 
-static int mma7455_i2c_probe(struct i2c_client *i2c,
-			     const struct i2c_device_id *id)
+static int mma7455_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	struct regmap *regmap;
 	const char *name = NULL;
 
@@ -46,7 +46,7 @@ static const struct of_device_id mma7455_of_match[] = {
 MODULE_DEVICE_TABLE(of, mma7455_of_match);
 
 static struct i2c_driver mma7455_i2c_driver = {
-	.probe = mma7455_i2c_probe,
+	.probe_new = mma7455_i2c_probe,
 	.remove = mma7455_i2c_remove,
 	.id_table = mma7455_i2c_ids,
 	.driver = {
-- 
GitLab


From 12491d35551df69709777bd7769e1e33641943cc Mon Sep 17 00:00:00 2001
From: Angel Iglesias <ang.iglesiasg@gmail.com>
Date: Sun, 13 Nov 2022 18:54:46 +0100
Subject: [PATCH 2384/4122] iio: pressure: bmp280: convert to i2c's
 .probe_new()

Use i2c_client_get_device_id() to get the i2c_device_id* parameter in the
.new_probe() callback.

Signed-off-by: Angel Iglesias <ang.iglesiasg@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/5dcaa389ea2ffe7050091b07a3bc4b0c1c9d586b.1668361368.git.ang.iglesiasg@gmail.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/bmp280-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/bmp280-i2c.c b/drivers/iio/pressure/bmp280-i2c.c
index 0c27211f3ea07..14eab086d24a6 100644
--- a/drivers/iio/pressure/bmp280-i2c.c
+++ b/drivers/iio/pressure/bmp280-i2c.c
@@ -5,11 +5,11 @@
 
 #include "bmp280.h"
 
-static int bmp280_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int bmp280_i2c_probe(struct i2c_client *client)
 {
 	struct regmap *regmap;
 	const struct regmap_config *regmap_config;
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 
 	switch (id->driver_data) {
 	case BMP180_CHIP_ID:
@@ -65,7 +65,7 @@ static struct i2c_driver bmp280_i2c_driver = {
 		.of_match_table = bmp280_of_i2c_match,
 		.pm = pm_ptr(&bmp280_dev_pm_ops),
 	},
-	.probe		= bmp280_i2c_probe,
+	.probe_new	= bmp280_i2c_probe,
 	.id_table	= bmp280_i2c_id,
 };
 module_i2c_driver(bmp280_i2c_driver);
-- 
GitLab


From 226d6dbac88336f37b1fae76aabe6fa610e9b872 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:30 +0100
Subject: [PATCH 2385/4122] iio: accel: mma8452: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-57-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mma8452.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mma8452.c b/drivers/iio/accel/mma8452.c
index 3ba28c2ff68a3..f97fb68e3a711 100644
--- a/drivers/iio/accel/mma8452.c
+++ b/drivers/iio/accel/mma8452.c
@@ -1545,9 +1545,9 @@ static const struct of_device_id mma8452_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, mma8452_dt_ids);
 
-static int mma8452_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mma8452_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mma8452_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -1846,7 +1846,7 @@ static struct i2c_driver mma8452_driver = {
 		.of_match_table = mma8452_dt_ids,
 		.pm	= &mma8452_pm_ops,
 	},
-	.probe = mma8452_probe,
+	.probe_new = mma8452_probe,
 	.remove = mma8452_remove,
 	.id_table = mma8452_id,
 };
-- 
GitLab


From 6dae5d11e29d2935e7995d05468d72660dde5f7b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:31 +0100
Subject: [PATCH 2386/4122] iio: accel: mma9551: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-58-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mma9551.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mma9551.c b/drivers/iio/accel/mma9551.c
index f7a793f4a8e34..aa4f5842859ef 100644
--- a/drivers/iio/accel/mma9551.c
+++ b/drivers/iio/accel/mma9551.c
@@ -446,9 +446,9 @@ static const char *mma9551_match_acpi_device(struct device *dev)
 	return dev_name(dev);
 }
 
-static int mma9551_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mma9551_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mma9551_data *data;
 	struct iio_dev *indio_dev;
 	const char *name = NULL;
@@ -607,7 +607,7 @@ static struct i2c_driver mma9551_driver = {
 		   .acpi_match_table = ACPI_PTR(mma9551_acpi_match),
 		   .pm = pm_ptr(&mma9551_pm_ops),
 		   },
-	.probe = mma9551_probe,
+	.probe_new = mma9551_probe,
 	.remove = mma9551_remove,
 	.id_table = mma9551_id,
 };
-- 
GitLab


From 50434e4605d66a55e7422cb24804c88b57cfb2bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:32 +0100
Subject: [PATCH 2387/4122] iio: accel: mma9553: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-59-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/mma9553.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/accel/mma9553.c b/drivers/iio/accel/mma9553.c
index 2da0e005b13eb..0af578ef9d3d0 100644
--- a/drivers/iio/accel/mma9553.c
+++ b/drivers/iio/accel/mma9553.c
@@ -1073,9 +1073,9 @@ static const char *mma9553_match_acpi_device(struct device *dev)
 	return dev_name(dev);
 }
 
-static int mma9553_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mma9553_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mma9553_data *data;
 	struct iio_dev *indio_dev;
 	const char *name = NULL;
@@ -1246,7 +1246,7 @@ static struct i2c_driver mma9553_driver = {
 		   .acpi_match_table = ACPI_PTR(mma9553_acpi_match),
 		   .pm = pm_ptr(&mma9553_pm_ops),
 		   },
-	.probe = mma9553_probe,
+	.probe_new = mma9553_probe,
 	.remove = mma9553_remove,
 	.id_table = mma9553_id,
 };
-- 
GitLab


From fe44f0738dd142a173e1b62e13b59d134abca003 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:33 +0100
Subject: [PATCH 2388/4122] iio: adc: ad7091r5: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-60-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7091r5.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ad7091r5.c b/drivers/iio/adc/ad7091r5.c
index 47f5763023a40..7d6709da1005f 100644
--- a/drivers/iio/adc/ad7091r5.c
+++ b/drivers/iio/adc/ad7091r5.c
@@ -69,9 +69,9 @@ static const struct ad7091r_chip_info ad7091r5_chip_info_noirq = {
 	.vref_mV = 2500,
 };
 
-static int ad7091r5_i2c_probe(struct i2c_client *i2c,
-		const struct i2c_device_id *id)
+static int ad7091r5_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	const struct ad7091r_chip_info *chip_info;
 	struct regmap *map = devm_regmap_init_i2c(i2c, &ad7091r_regmap_config);
 
@@ -103,7 +103,7 @@ static struct i2c_driver ad7091r5_driver = {
 		.name = "ad7091r5",
 		.of_match_table = ad7091r5_dt_ids,
 	},
-	.probe = ad7091r5_i2c_probe,
+	.probe_new = ad7091r5_i2c_probe,
 	.id_table = ad7091r5_i2c_ids,
 };
 module_i2c_driver(ad7091r5_driver);
-- 
GitLab


From 3ff5dd78f6b7e5aa73ddbc8f609f5ba53d1d49bc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:34 +0100
Subject: [PATCH 2389/4122] iio: adc: ad7291: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-61-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7291.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ad7291.c b/drivers/iio/adc/ad7291.c
index e9129dac762fe..3dd0105f63d71 100644
--- a/drivers/iio/adc/ad7291.c
+++ b/drivers/iio/adc/ad7291.c
@@ -465,9 +465,9 @@ static void ad7291_reg_disable(void *reg)
 	regulator_disable(reg);
 }
 
-static int ad7291_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ad7291_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ad7291_chip_info *chip;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -553,7 +553,7 @@ static struct i2c_driver ad7291_driver = {
 		.name = KBUILD_MODNAME,
 		.of_match_table = ad7291_of_match,
 	},
-	.probe = ad7291_probe,
+	.probe_new = ad7291_probe,
 	.id_table = ad7291_id,
 };
 module_i2c_driver(ad7291_driver);
-- 
GitLab


From 28ae41885ab2bec935a61a9c811713df6e70043e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:35 +0100
Subject: [PATCH 2390/4122] iio: adc: ad799x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-62-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad799x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ad799x.c b/drivers/iio/adc/ad799x.c
index 4730d8d0f4c3f..8f0a3a35e727e 100644
--- a/drivers/iio/adc/ad799x.c
+++ b/drivers/iio/adc/ad799x.c
@@ -775,9 +775,9 @@ static const struct ad799x_chip_info ad799x_chip_info_tbl[] = {
 	},
 };
 
-static int ad799x_probe(struct i2c_client *client,
-				   const struct i2c_device_id *id)
+static int ad799x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	int extra_config = 0;
 	struct ad799x_state *st;
@@ -968,7 +968,7 @@ static struct i2c_driver ad799x_driver = {
 		.name = "ad799x",
 		.pm = pm_sleep_ptr(&ad799x_pm_ops),
 	},
-	.probe = ad799x_probe,
+	.probe_new = ad799x_probe,
 	.remove = ad799x_remove,
 	.id_table = ad799x_id,
 };
-- 
GitLab


From 203a5e83dd3f7ea4d0d827563a317bd656d2f579 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:36 +0100
Subject: [PATCH 2391/4122] iio: adc: ina2xx-adc: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-63-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ina2xx-adc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ina2xx-adc.c b/drivers/iio/adc/ina2xx-adc.c
index 910e7e965fc48..38d9d7b2313ea 100644
--- a/drivers/iio/adc/ina2xx-adc.c
+++ b/drivers/iio/adc/ina2xx-adc.c
@@ -946,9 +946,9 @@ static int ina2xx_init(struct ina2xx_chip_info *chip, unsigned int config)
 	return ina2xx_set_calibration(chip);
 }
 
-static int ina2xx_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ina2xx_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ina2xx_chip_info *chip;
 	struct iio_dev *indio_dev;
 	unsigned int val;
@@ -1090,7 +1090,7 @@ static struct i2c_driver ina2xx_driver = {
 		   .name = KBUILD_MODNAME,
 		   .of_match_table = ina2xx_of_match,
 	},
-	.probe = ina2xx_probe,
+	.probe_new = ina2xx_probe,
 	.remove = ina2xx_remove,
 	.id_table = ina2xx_id,
 };
-- 
GitLab


From 7b8c4fa43583ea1a7e60658ad23302608fe19ce9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:37 +0100
Subject: [PATCH 2392/4122] iio: adc: ltc2471: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-64-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ltc2471.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ltc2471.c b/drivers/iio/adc/ltc2471.c
index 0e0fe881a8e69..eeb2945829eb2 100644
--- a/drivers/iio/adc/ltc2471.c
+++ b/drivers/iio/adc/ltc2471.c
@@ -99,9 +99,9 @@ static const struct iio_info ltc2471_info = {
 	.read_raw = ltc2471_read_raw,
 };
 
-static int ltc2471_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int ltc2471_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct ltc2471_data *data;
 	int ret;
@@ -146,7 +146,7 @@ static struct i2c_driver ltc2471_i2c_driver = {
 	.driver = {
 		.name = "ltc2471",
 	},
-	.probe    = ltc2471_i2c_probe,
+	.probe_new = ltc2471_i2c_probe,
 	.id_table = ltc2471_i2c_id,
 };
 
-- 
GitLab


From 11e67cc9675f1289556e1524fa7717cc95282487 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:38 +0100
Subject: [PATCH 2393/4122] iio: adc: ltc2485: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-65-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ltc2485.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ltc2485.c b/drivers/iio/adc/ltc2485.c
index 37c762f8218c9..6a23427344ecd 100644
--- a/drivers/iio/adc/ltc2485.c
+++ b/drivers/iio/adc/ltc2485.c
@@ -89,9 +89,9 @@ static const struct iio_info ltc2485_info = {
 	.read_raw = ltc2485_read_raw,
 };
 
-static int ltc2485_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int ltc2485_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct ltc2485_data *data;
 	int ret;
@@ -133,7 +133,7 @@ static struct i2c_driver ltc2485_driver = {
 	.driver = {
 		.name = "ltc2485",
 	},
-	.probe = ltc2485_probe,
+	.probe_new = ltc2485_probe,
 	.id_table = ltc2485_id,
 };
 module_i2c_driver(ltc2485_driver);
-- 
GitLab


From 3a79844428c39fa54a30ec11eaad5a8e577ef706 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:39 +0100
Subject: [PATCH 2394/4122] iio: adc: ltc2497: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-66-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ltc2497.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ltc2497.c b/drivers/iio/adc/ltc2497.c
index 556f10dfb5023..17370c5eb6fe3 100644
--- a/drivers/iio/adc/ltc2497.c
+++ b/drivers/iio/adc/ltc2497.c
@@ -94,9 +94,9 @@ static int ltc2497_result_and_measure(struct ltc2497core_driverdata *ddata,
 	return ret;
 }
 
-static int ltc2497_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int ltc2497_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	const struct ltc2497_chip_info *chip_info;
 	struct iio_dev *indio_dev;
 	struct ltc2497_driverdata *st;
@@ -165,7 +165,7 @@ static struct i2c_driver ltc2497_driver = {
 		.name = "ltc2497",
 		.of_match_table = ltc2497_of_match,
 	},
-	.probe = ltc2497_probe,
+	.probe_new = ltc2497_probe,
 	.remove = ltc2497_remove,
 	.id_table = ltc2497_id,
 };
-- 
GitLab


From a69e45a411ea3ae0bbec474e4303c4281f3f8376 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:40 +0100
Subject: [PATCH 2395/4122] iio: adc: max1363: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-67-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max1363.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/max1363.c b/drivers/iio/adc/max1363.c
index 42d3479cefb73..73b783b430d72 100644
--- a/drivers/iio/adc/max1363.c
+++ b/drivers/iio/adc/max1363.c
@@ -1579,9 +1579,9 @@ static void max1363_reg_disable(void *reg)
 	regulator_disable(reg);
 }
 
-static int max1363_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int max1363_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	struct max1363_state *st;
 	struct iio_dev *indio_dev;
@@ -1718,7 +1718,7 @@ static struct i2c_driver max1363_driver = {
 		.name = "max1363",
 		.of_match_table = max1363_of_match,
 	},
-	.probe = max1363_probe,
+	.probe_new = max1363_probe,
 	.id_table = max1363_id,
 };
 module_i2c_driver(max1363_driver);
-- 
GitLab


From d59ecbc48a1c74cfe6bc2d4ff503fb3e7455d19f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:41 +0100
Subject: [PATCH 2396/4122] iio: adc: max9611: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Jacopo Mondi <jacopo@jmondi.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-68-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max9611.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/max9611.c b/drivers/iio/adc/max9611.c
index f982f00303dc0..cb7f4785423a2 100644
--- a/drivers/iio/adc/max9611.c
+++ b/drivers/iio/adc/max9611.c
@@ -510,8 +510,7 @@ static const struct of_device_id max9611_of_table[] = {
 };
 
 MODULE_DEVICE_TABLE(of, max9611_of_table);
-static int max9611_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int max9611_probe(struct i2c_client *client)
 {
 	const char * const shunt_res_prop = "shunt-resistor-micro-ohms";
 	struct max9611_dev *max9611;
@@ -557,7 +556,7 @@ static struct i2c_driver max9611_driver = {
 		   .name = DRIVER_NAME,
 		   .of_match_table = max9611_of_table,
 	},
-	.probe = max9611_probe,
+	.probe_new = max9611_probe,
 };
 module_i2c_driver(max9611_driver);
 
-- 
GitLab


From 6168215dbfea1c1d42a7aff145efbbcbdb5d6051 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:42 +0100
Subject: [PATCH 2397/4122] iio: adc: mcp3422: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-69-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/mcp3422.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/mcp3422.c b/drivers/iio/adc/mcp3422.c
index da353dcb1e9d4..ada844c3f7ecc 100644
--- a/drivers/iio/adc/mcp3422.c
+++ b/drivers/iio/adc/mcp3422.c
@@ -330,9 +330,9 @@ static const struct iio_info mcp3422_info = {
 	.attrs = &mcp3422_attribute_group,
 };
 
-static int mcp3422_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mcp3422_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct mcp3422 *adc;
 	int err;
@@ -417,7 +417,7 @@ static struct i2c_driver mcp3422_driver = {
 		.name = "mcp3422",
 		.of_match_table = mcp3422_of_match,
 	},
-	.probe = mcp3422_probe,
+	.probe_new = mcp3422_probe,
 	.id_table = mcp3422_id,
 };
 module_i2c_driver(mcp3422_driver);
-- 
GitLab


From 4b50867f6a328c13d40223937c34b4146eafd883 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:43 +0100
Subject: [PATCH 2398/4122] iio: adc: ti-adc081c: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-70-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ti-adc081c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ti-adc081c.c b/drivers/iio/adc/ti-adc081c.c
index bd48b073e7200..c663dc59d4590 100644
--- a/drivers/iio/adc/ti-adc081c.c
+++ b/drivers/iio/adc/ti-adc081c.c
@@ -152,9 +152,9 @@ static void adc081c_reg_disable(void *reg)
 	regulator_disable(reg);
 }
 
-static int adc081c_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int adc081c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *iio;
 	struct adc081c *adc;
 	const struct adcxx1c_model *model;
@@ -235,7 +235,7 @@ static struct i2c_driver adc081c_driver = {
 		.of_match_table = adc081c_of_match,
 		.acpi_match_table = adc081c_acpi_match,
 	},
-	.probe = adc081c_probe,
+	.probe_new = adc081c_probe,
 	.id_table = adc081c_id,
 };
 module_i2c_driver(adc081c_driver);
-- 
GitLab


From 0143ce1039f70e2656865d75c163ed4dbf40db30 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:44 +0100
Subject: [PATCH 2399/4122] iio: adc: ti-ads1015: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-71-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ti-ads1015.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/adc/ti-ads1015.c b/drivers/iio/adc/ti-ads1015.c
index 8bceba6940260..56af5e1488022 100644
--- a/drivers/iio/adc/ti-ads1015.c
+++ b/drivers/iio/adc/ti-ads1015.c
@@ -974,9 +974,9 @@ static int ads1015_set_conv_mode(struct ads1015_data *data, int mode)
 				  mode << ADS1015_CFG_MOD_SHIFT);
 }
 
-static int ads1015_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int ads1015_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	const struct ads1015_chip_data *chip;
 	struct iio_dev *indio_dev;
 	struct ads1015_data *data;
@@ -1195,7 +1195,7 @@ static struct i2c_driver ads1015_driver = {
 		.of_match_table = ads1015_of_match,
 		.pm = &ads1015_pm_ops,
 	},
-	.probe		= ads1015_probe,
+	.probe_new	= ads1015_probe,
 	.remove		= ads1015_remove,
 	.id_table	= ads1015_id,
 };
-- 
GitLab


From 7558eaa9727dbf2efd41d29101d1639975cf778e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:45 +0100
Subject: [PATCH 2400/4122] iio: cdc: ad7150: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-72-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/cdc/ad7150.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/cdc/ad7150.c b/drivers/iio/cdc/ad7150.c
index 1113745890cae..79aeb0aaea67d 100644
--- a/drivers/iio/cdc/ad7150.c
+++ b/drivers/iio/cdc/ad7150.c
@@ -536,9 +536,9 @@ static const struct iio_info ad7150_info_no_irq = {
 	.read_raw = &ad7150_read_raw,
 };
 
-static int ad7150_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ad7150_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ad7150_chip_info *chip;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -647,7 +647,7 @@ static struct i2c_driver ad7150_driver = {
 		.name = "ad7150",
 		.of_match_table = ad7150_of_match,
 	},
-	.probe = ad7150_probe,
+	.probe_new = ad7150_probe,
 	.id_table = ad7150_id,
 };
 module_i2c_driver(ad7150_driver);
-- 
GitLab


From 9b1cd21eafc34beb50194a060e1cd3902b763a82 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:46 +0100
Subject: [PATCH 2401/4122] iio: cdc: ad7746: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-73-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/cdc/ad7746.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/cdc/ad7746.c b/drivers/iio/cdc/ad7746.c
index b266f53281409..6f68651ce1d5a 100644
--- a/drivers/iio/cdc/ad7746.c
+++ b/drivers/iio/cdc/ad7746.c
@@ -717,9 +717,9 @@ static const struct iio_info ad7746_info = {
 	.write_raw = ad7746_write_raw,
 };
 
-static int ad7746_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ad7746_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct ad7746_chip_info *chip;
 	struct iio_dev *indio_dev;
@@ -810,7 +810,7 @@ static struct i2c_driver ad7746_driver = {
 		.name = KBUILD_MODNAME,
 		.of_match_table = ad7746_of_match,
 	},
-	.probe = ad7746_probe,
+	.probe_new = ad7746_probe,
 	.id_table = ad7746_id,
 };
 module_i2c_driver(ad7746_driver);
-- 
GitLab


From 89d63224e20c8409afb553d334d38bb2675757ec Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:47 +0100
Subject: [PATCH 2402/4122] iio: chemical: ams-iaq-core: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-74-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/ams-iaq-core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/ams-iaq-core.c b/drivers/iio/chemical/ams-iaq-core.c
index 97be3669c5543..0a0fbcdd44692 100644
--- a/drivers/iio/chemical/ams-iaq-core.c
+++ b/drivers/iio/chemical/ams-iaq-core.c
@@ -135,8 +135,7 @@ static const struct iio_info ams_iaqcore_info = {
 	.read_raw	= ams_iaqcore_read_raw,
 };
 
-static int ams_iaqcore_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int ams_iaqcore_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct ams_iaqcore_data *data;
@@ -180,7 +179,7 @@ static struct i2c_driver ams_iaqcore_driver = {
 		.name	= "ams-iaq-core",
 		.of_match_table = ams_iaqcore_dt_ids,
 	},
-	.probe = ams_iaqcore_probe,
+	.probe_new = ams_iaqcore_probe,
 	.id_table = ams_iaqcore_id,
 };
 module_i2c_driver(ams_iaqcore_driver);
-- 
GitLab


From e9c812ca569f1314f0a9c0420cb17c9db0a55457 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:48 +0100
Subject: [PATCH 2403/4122] iio: chemical: atlas-ezo-sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-75-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/atlas-ezo-sensor.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/atlas-ezo-sensor.c b/drivers/iio/chemical/atlas-ezo-sensor.c
index bbcf5a59c1f4e..307c3488f4bdd 100644
--- a/drivers/iio/chemical/atlas-ezo-sensor.c
+++ b/drivers/iio/chemical/atlas-ezo-sensor.c
@@ -201,9 +201,9 @@ static const struct of_device_id atlas_ezo_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atlas_ezo_dt_ids);
 
-static int atlas_ezo_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int atlas_ezo_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	const struct atlas_ezo_device *chip;
 	struct atlas_ezo_data *data;
 	struct iio_dev *indio_dev;
@@ -238,7 +238,7 @@ static struct i2c_driver atlas_ezo_driver = {
 		.name	= ATLAS_EZO_DRV_NAME,
 		.of_match_table	= atlas_ezo_dt_ids,
 	},
-	.probe		= atlas_ezo_probe,
+	.probe_new	= atlas_ezo_probe,
 	.id_table	= atlas_ezo_id,
 };
 module_i2c_driver(atlas_ezo_driver);
-- 
GitLab


From 50c359245069f7e64880fe01b06d40954df71d9f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:49 +0100
Subject: [PATCH 2404/4122] iio: chemical: atlas-sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-76-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/atlas-sensor.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/atlas-sensor.c b/drivers/iio/chemical/atlas-sensor.c
index 7cac77a931c70..024657bc59e17 100644
--- a/drivers/iio/chemical/atlas-sensor.c
+++ b/drivers/iio/chemical/atlas-sensor.c
@@ -608,9 +608,9 @@ static const struct of_device_id atlas_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, atlas_dt_ids);
 
-static int atlas_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int atlas_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct atlas_data *data;
 	struct atlas_device *chip;
 	struct iio_trigger *trig;
@@ -767,7 +767,7 @@ static struct i2c_driver atlas_driver = {
 		.of_match_table	= atlas_dt_ids,
 		.pm	= pm_ptr(&atlas_pm_ops),
 	},
-	.probe		= atlas_probe,
+	.probe_new	= atlas_probe,
 	.remove		= atlas_remove,
 	.id_table	= atlas_id,
 };
-- 
GitLab


From 5aa377658458cb7c4ea20dab7405b795a5cc8a11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:50 +0100
Subject: [PATCH 2405/4122] iio: chemical: bme680_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-77-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/bme680_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/bme680_i2c.c b/drivers/iio/chemical/bme680_i2c.c
index 20f2c20b6b022..61b12079858d2 100644
--- a/drivers/iio/chemical/bme680_i2c.c
+++ b/drivers/iio/chemical/bme680_i2c.c
@@ -17,9 +17,9 @@
 
 #include "bme680.h"
 
-static int bme680_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int bme680_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name = NULL;
 
@@ -52,7 +52,7 @@ static struct i2c_driver bme680_i2c_driver = {
 		.name			= "bme680_i2c",
 		.of_match_table		= bme680_of_i2c_match,
 	},
-	.probe = bme680_i2c_probe,
+	.probe_new = bme680_i2c_probe,
 	.id_table = bme680_i2c_id,
 };
 module_i2c_driver(bme680_i2c_driver);
-- 
GitLab


From 684e57366e68e4ba2abc5ab56041b4d6534f28ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:51 +0100
Subject: [PATCH 2406/4122] iio: chemical: ccs811: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-78-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/ccs811.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/ccs811.c b/drivers/iio/chemical/ccs811.c
index ba4045e203035..6ead80c089240 100644
--- a/drivers/iio/chemical/ccs811.c
+++ b/drivers/iio/chemical/ccs811.c
@@ -401,9 +401,9 @@ static int ccs811_reset(struct i2c_client *client)
 	return 0;
 }
 
-static int ccs811_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ccs811_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct ccs811_data *data;
 	int ret;
@@ -567,7 +567,7 @@ static struct i2c_driver ccs811_driver = {
 		.name = "ccs811",
 		.of_match_table = ccs811_dt_ids,
 	},
-	.probe = ccs811_probe,
+	.probe_new = ccs811_probe,
 	.remove = ccs811_remove,
 	.id_table = ccs811_id,
 };
-- 
GitLab


From 2f2adc666335cad150b720a6b19cec33464e2680 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:52 +0100
Subject: [PATCH 2407/4122] iio: chemical: scd4x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-79-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/scd4x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/chemical/scd4x.c b/drivers/iio/chemical/scd4x.c
index 54066532ea458..f7ed9455b3c8f 100644
--- a/drivers/iio/chemical/scd4x.c
+++ b/drivers/iio/chemical/scd4x.c
@@ -615,7 +615,7 @@ out:
 	return IRQ_HANDLED;
 }
 
-static int scd4x_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int scd4x_probe(struct i2c_client *client)
 {
 	static const unsigned long scd4x_scan_masks[] = { 0x07, 0x00 };
 	struct device *dev = &client->dev;
@@ -690,7 +690,7 @@ static struct i2c_driver scd4x_i2c_driver = {
 		.of_match_table = scd4x_dt_ids,
 		.pm = pm_sleep_ptr(&scd4x_pm_ops),
 	},
-	.probe = scd4x_probe,
+	.probe_new = scd4x_probe,
 };
 module_i2c_driver(scd4x_i2c_driver);
 
-- 
GitLab


From 67eba68bfccbdf6598200d70f80971e454d1ee2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:53 +0100
Subject: [PATCH 2408/4122] iio: chemical: sgp30: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-80-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/sgp30.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/sgp30.c b/drivers/iio/chemical/sgp30.c
index e2c13c78c7e05..9d0c68485b63f 100644
--- a/drivers/iio/chemical/sgp30.c
+++ b/drivers/iio/chemical/sgp30.c
@@ -496,9 +496,9 @@ static const struct of_device_id sgp_dt_ids[] = {
 	{ }
 };
 
-static int sgp_probe(struct i2c_client *client,
-		     const struct i2c_device_id *id)
+static int sgp_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct sgp_data *data;
@@ -575,7 +575,7 @@ static struct i2c_driver sgp_driver = {
 		.name = "sgp30",
 		.of_match_table = sgp_dt_ids,
 	},
-	.probe = sgp_probe,
+	.probe_new = sgp_probe,
 	.remove = sgp_remove,
 	.id_table = sgp_id,
 };
-- 
GitLab


From 07eda54d92f7ae7439a77903b9a43bfe563ebf21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:54 +0100
Subject: [PATCH 2409/4122] iio: chemical: sgp40: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-81-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/sgp40.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/sgp40.c b/drivers/iio/chemical/sgp40.c
index 8a56394cea4eb..c0ea013009081 100644
--- a/drivers/iio/chemical/sgp40.c
+++ b/drivers/iio/chemical/sgp40.c
@@ -311,9 +311,9 @@ static const struct iio_info sgp40_info = {
 	.write_raw	= sgp40_write_raw,
 };
 
-static int sgp40_probe(struct i2c_client *client,
-		     const struct i2c_device_id *id)
+static int sgp40_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct sgp40_data *data;
@@ -368,7 +368,7 @@ static struct i2c_driver sgp40_driver = {
 		.name = "sgp40",
 		.of_match_table = sgp40_dt_ids,
 	},
-	.probe = sgp40_probe,
+	.probe_new = sgp40_probe,
 	.id_table = sgp40_id,
 };
 module_i2c_driver(sgp40_driver);
-- 
GitLab


From 0d8535ee52f0392f6293e564e3ff4522d6c37cfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:55 +0100
Subject: [PATCH 2410/4122] iio: chemical: vz89x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-82-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/chemical/vz89x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/chemical/vz89x.c b/drivers/iio/chemical/vz89x.c
index e7e1c74a351ef..d4604f7ccd1e0 100644
--- a/drivers/iio/chemical/vz89x.c
+++ b/drivers/iio/chemical/vz89x.c
@@ -348,9 +348,9 @@ static const struct of_device_id vz89x_dt_ids[] = {
 };
 MODULE_DEVICE_TABLE(of, vz89x_dt_ids);
 
-static int vz89x_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int vz89x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct vz89x_data *data;
@@ -402,7 +402,7 @@ static struct i2c_driver vz89x_driver = {
 		.name	= "vz89x",
 		.of_match_table = vz89x_dt_ids,
 	},
-	.probe = vz89x_probe,
+	.probe_new = vz89x_probe,
 	.id_table = vz89x_id,
 };
 module_i2c_driver(vz89x_driver);
-- 
GitLab


From 16fb97c4aacf5db311f4fb54745f852d1183e662 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:56 +0100
Subject: [PATCH 2411/4122] iio: dac: ad5064: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-83-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5064.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ad5064.c b/drivers/iio/dac/ad5064.c
index 4447b88118270..f01249c1ba93c 100644
--- a/drivers/iio/dac/ad5064.c
+++ b/drivers/iio/dac/ad5064.c
@@ -993,9 +993,9 @@ static int ad5064_i2c_write(struct ad5064_state *st, unsigned int cmd,
 	return 0;
 }
 
-static int ad5064_i2c_probe(struct i2c_client *i2c,
-	const struct i2c_device_id *id)
+static int ad5064_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	return ad5064_probe(&i2c->dev, id->driver_data, id->name,
 						ad5064_i2c_write);
 }
@@ -1056,7 +1056,7 @@ static struct i2c_driver ad5064_i2c_driver = {
 	.driver = {
 		   .name = "ad5064",
 	},
-	.probe = ad5064_i2c_probe,
+	.probe_new = ad5064_i2c_probe,
 	.id_table = ad5064_i2c_ids,
 };
 
-- 
GitLab


From 4b2b4370a8459982d78d5716361a7a51089fecbb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:57 +0100
Subject: [PATCH 2412/4122] iio: dac: ad5380: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-84-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5380.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ad5380.c b/drivers/iio/dac/ad5380.c
index a81bfa47a221a..64b4519f8f5e6 100644
--- a/drivers/iio/dac/ad5380.c
+++ b/drivers/iio/dac/ad5380.c
@@ -546,9 +546,9 @@ static inline void ad5380_spi_unregister_driver(void)
 
 #if IS_ENABLED(CONFIG_I2C)
 
-static int ad5380_i2c_probe(struct i2c_client *i2c,
-			    const struct i2c_device_id *id)
+static int ad5380_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	struct regmap *regmap;
 
 	regmap = devm_regmap_init_i2c(i2c, &ad5380_regmap_config);
@@ -589,7 +589,7 @@ static struct i2c_driver ad5380_i2c_driver = {
 	.driver = {
 		   .name = "ad5380",
 	},
-	.probe = ad5380_i2c_probe,
+	.probe_new = ad5380_i2c_probe,
 	.remove = ad5380_i2c_remove,
 	.id_table = ad5380_i2c_ids,
 };
-- 
GitLab


From 94e5ddbaac1ea9b5bda64af53d8a57b19be890ae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:58 +0100
Subject: [PATCH 2413/4122] iio: dac: ad5446: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-85-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5446.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ad5446.c b/drivers/iio/dac/ad5446.c
index 7324065d37821..aa3130b334563 100644
--- a/drivers/iio/dac/ad5446.c
+++ b/drivers/iio/dac/ad5446.c
@@ -568,9 +568,9 @@ static const struct ad5446_chip_info ad5446_i2c_chip_info[] = {
 	},
 };
 
-static int ad5446_i2c_probe(struct i2c_client *i2c,
-			    const struct i2c_device_id *id)
+static int ad5446_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	return ad5446_probe(&i2c->dev, id->name,
 		&ad5446_i2c_chip_info[id->driver_data]);
 }
@@ -595,7 +595,7 @@ static struct i2c_driver ad5446_i2c_driver = {
 	.driver = {
 		   .name = "ad5446",
 	},
-	.probe = ad5446_i2c_probe,
+	.probe_new = ad5446_i2c_probe,
 	.remove = ad5446_i2c_remove,
 	.id_table = ad5446_i2c_ids,
 };
-- 
GitLab


From a17c748115cb1f95675b322e955763cd9bb628ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:59 +0100
Subject: [PATCH 2414/4122] iio: dac: ad5593r: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-86-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5593r.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ad5593r.c b/drivers/iio/dac/ad5593r.c
index 8e5e014e0c28b..d311567ab324a 100644
--- a/drivers/iio/dac/ad5593r.c
+++ b/drivers/iio/dac/ad5593r.c
@@ -99,9 +99,9 @@ static const struct ad5592r_rw_ops ad5593r_rw_ops = {
 	.gpio_read = ad5593r_gpio_read,
 };
 
-static int ad5593r_i2c_probe(struct i2c_client *i2c,
-		const struct i2c_device_id *id)
+static int ad5593r_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	if (!i2c_check_functionality(i2c->adapter,
 				     I2C_FUNC_SMBUS_BYTE | I2C_FUNC_I2C))
 		return -EOPNOTSUPP;
@@ -138,7 +138,7 @@ static struct i2c_driver ad5593r_driver = {
 		.of_match_table = ad5593r_of_match,
 		.acpi_match_table = ad5593r_acpi_match,
 	},
-	.probe = ad5593r_i2c_probe,
+	.probe_new = ad5593r_i2c_probe,
 	.remove = ad5593r_i2c_remove,
 	.id_table = ad5593r_i2c_ids,
 };
-- 
GitLab


From 92cd05a0968d60b3d76dc8d3a5880b25b1af96b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:00 +0100
Subject: [PATCH 2415/4122] iio: dac: ad5696-i2c: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-87-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ad5696-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ad5696-i2c.c b/drivers/iio/dac/ad5696-i2c.c
index aa36cbf0137c1..160e80cf9135f 100644
--- a/drivers/iio/dac/ad5696-i2c.c
+++ b/drivers/iio/dac/ad5696-i2c.c
@@ -58,9 +58,9 @@ static int ad5686_i2c_write(struct ad5686_state *st,
 	return (ret != 3) ? -EIO : 0;
 }
 
-static int ad5686_i2c_probe(struct i2c_client *i2c,
-			    const struct i2c_device_id *id)
+static int ad5686_i2c_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	return ad5686_probe(&i2c->dev, id->driver_data, id->name,
 			    ad5686_i2c_write, ad5686_i2c_read);
 }
@@ -113,7 +113,7 @@ static struct i2c_driver ad5686_i2c_driver = {
 		.name = "ad5696",
 		.of_match_table = ad5686_of_match,
 	},
-	.probe = ad5686_i2c_probe,
+	.probe_new = ad5686_i2c_probe,
 	.remove = ad5686_i2c_remove,
 	.id_table = ad5686_i2c_id,
 };
-- 
GitLab


From 53f46605c3656c181e49df0d5fdba014cffa498e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:01 +0100
Subject: [PATCH 2416/4122] iio: dac: ds4424: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-88-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ds4424.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ds4424.c b/drivers/iio/dac/ds4424.c
index 3e17a681174e8..a16a6a934d9d9 100644
--- a/drivers/iio/dac/ds4424.c
+++ b/drivers/iio/dac/ds4424.c
@@ -213,9 +213,9 @@ static const struct iio_info ds4424_info = {
 	.write_raw = ds4424_write_raw,
 };
 
-static int ds4424_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ds4424_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ds4424_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -312,7 +312,7 @@ static struct i2c_driver ds4424_driver = {
 		.of_match_table = ds4424_of_match,
 		.pm     = pm_sleep_ptr(&ds4424_pm_ops),
 	},
-	.probe		= ds4424_probe,
+	.probe_new	= ds4424_probe,
 	.remove		= ds4424_remove,
 	.id_table	= ds4424_id,
 };
-- 
GitLab


From 44d7a03c98f466c1567a8cad84908ce6a9d1f2c3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:02 +0100
Subject: [PATCH 2417/4122] iio: dac: m62332: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-89-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/m62332.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/m62332.c b/drivers/iio/dac/m62332.c
index 5a812f87970c0..b692459b0f231 100644
--- a/drivers/iio/dac/m62332.c
+++ b/drivers/iio/dac/m62332.c
@@ -176,8 +176,7 @@ static const struct iio_chan_spec m62332_channels[M62332_CHANNELS] = {
 	M62332_CHANNEL(1)
 };
 
-static int m62332_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int m62332_probe(struct i2c_client *client)
 {
 	struct m62332_data *data;
 	struct iio_dev *indio_dev;
@@ -239,7 +238,7 @@ static struct i2c_driver m62332_driver = {
 		.name	= "m62332",
 		.pm	= pm_sleep_ptr(&m62332_pm_ops),
 	},
-	.probe		= m62332_probe,
+	.probe_new	= m62332_probe,
 	.remove		= m62332_remove,
 	.id_table	= m62332_id,
 };
-- 
GitLab


From 090515edf46dc1f2ff7dbc78a60ba7692ff8420f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:03 +0100
Subject: [PATCH 2418/4122] iio: dac: max517: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-90-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/max517.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/max517.c b/drivers/iio/dac/max517.c
index 373ce6ff83b78..25967c39365d2 100644
--- a/drivers/iio/dac/max517.c
+++ b/drivers/iio/dac/max517.c
@@ -141,9 +141,9 @@ static const struct iio_chan_spec max517_channels[] = {
 	MAX517_CHANNEL(7),
 };
 
-static int max517_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int max517_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct max517_data *data;
 	struct iio_dev *indio_dev;
 	struct max517_platform_data *platform_data = client->dev.platform_data;
@@ -203,7 +203,7 @@ static struct i2c_driver max517_driver = {
 		.name	= MAX517_DRV_NAME,
 		.pm	= pm_sleep_ptr(&max517_pm_ops),
 	},
-	.probe		= max517_probe,
+	.probe_new	= max517_probe,
 	.id_table	= max517_id,
 };
 module_i2c_driver(max517_driver);
-- 
GitLab


From 62b001dad803e40274db2c712dd4dc0d3a4a37d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:04 +0100
Subject: [PATCH 2419/4122] iio: dac: max5821: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-91-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/max5821.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/max5821.c b/drivers/iio/dac/max5821.c
index e001b594d5b19..23da345b9250c 100644
--- a/drivers/iio/dac/max5821.c
+++ b/drivers/iio/dac/max5821.c
@@ -300,9 +300,9 @@ static void max5821_regulator_disable(void *reg)
 	regulator_disable(reg);
 }
 
-static int max5821_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int max5821_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct max5821_data *data;
 	struct iio_dev *indio_dev;
 	u32 tmp;
@@ -377,7 +377,7 @@ static struct i2c_driver max5821_driver = {
 		.of_match_table = max5821_of_match,
 		.pm     = pm_sleep_ptr(&max5821_pm_ops),
 	},
-	.probe		= max5821_probe,
+	.probe_new	= max5821_probe,
 	.id_table	= max5821_id,
 };
 module_i2c_driver(max5821_driver);
-- 
GitLab


From 818fe4546521f5e836ab02eeb6533bfd6aee6829 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:05 +0100
Subject: [PATCH 2420/4122] iio: dac: mcp4725: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-92-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/mcp4725.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/mcp4725.c b/drivers/iio/dac/mcp4725.c
index 446d1a8fe4bef..46bf758760f85 100644
--- a/drivers/iio/dac/mcp4725.c
+++ b/drivers/iio/dac/mcp4725.c
@@ -369,9 +369,9 @@ static int mcp4725_probe_dt(struct device *dev,
 	return 0;
 }
 
-static int mcp4725_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mcp4725_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mcp4725_data *data;
 	struct iio_dev *indio_dev;
 	struct mcp4725_platform_data *pdata, pdata_dt;
@@ -524,7 +524,7 @@ static struct i2c_driver mcp4725_driver = {
 		.of_match_table = mcp4725_of_match,
 		.pm	= pm_sleep_ptr(&mcp4725_pm_ops),
 	},
-	.probe		= mcp4725_probe,
+	.probe_new	= mcp4725_probe,
 	.remove		= mcp4725_remove,
 	.id_table	= mcp4725_id,
 };
-- 
GitLab


From 3de8dd69fad2c2ee7ca623d78fb2806d7b8b5a80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:06 +0100
Subject: [PATCH 2421/4122] iio: dac: ti-dac5571: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Link: https://lore.kernel.org/r/20221118224540.619276-93-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/dac/ti-dac5571.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/dac/ti-dac5571.c b/drivers/iio/dac/ti-dac5571.c
index 3210e3098f9ae..40191947fea32 100644
--- a/drivers/iio/dac/ti-dac5571.c
+++ b/drivers/iio/dac/ti-dac5571.c
@@ -306,9 +306,9 @@ static const struct iio_info dac5571_info = {
 	.write_raw_get_fmt = dac5571_write_raw_get_fmt,
 };
 
-static int dac5571_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int dac5571_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	const struct dac5571_spec *spec;
 	struct dac5571_data *data;
@@ -426,7 +426,7 @@ static struct i2c_driver dac5571_driver = {
 		   .name = "ti-dac5571",
 		   .of_match_table = dac5571_of_id,
 	},
-	.probe	  = dac5571_probe,
+	.probe_new = dac5571_probe,
 	.remove   = dac5571_remove,
 	.id_table = dac5571_id,
 };
-- 
GitLab


From 840ef016c202fc196c91f020750fab08c9a0779f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:07 +0100
Subject: [PATCH 2422/4122] iio: gyro: bmg160_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-94-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/bmg160_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/bmg160_i2c.c b/drivers/iio/gyro/bmg160_i2c.c
index 908ccc385254a..2b019ee5b2eb5 100644
--- a/drivers/iio/gyro/bmg160_i2c.c
+++ b/drivers/iio/gyro/bmg160_i2c.c
@@ -13,9 +13,9 @@ static const struct regmap_config bmg160_regmap_i2c_conf = {
 	.max_register = 0x3f
 };
 
-static int bmg160_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int bmg160_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name = NULL;
 
@@ -70,7 +70,7 @@ static struct i2c_driver bmg160_i2c_driver = {
 		.of_match_table = bmg160_of_match,
 		.pm	= &bmg160_pm_ops,
 	},
-	.probe		= bmg160_i2c_probe,
+	.probe_new	= bmg160_i2c_probe,
 	.remove		= bmg160_i2c_remove,
 	.id_table	= bmg160_i2c_id,
 };
-- 
GitLab


From 8312841060b8bc4b463dcf6a2cb8cc2452c6ac8f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:08 +0100
Subject: [PATCH 2423/4122] iio: gyro: itg3200_core: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-95-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/itg3200_core.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/itg3200_core.c b/drivers/iio/gyro/itg3200_core.c
index 74ca224684968..ceacd863d3eaa 100644
--- a/drivers/iio/gyro/itg3200_core.c
+++ b/drivers/iio/gyro/itg3200_core.c
@@ -295,8 +295,7 @@ static const struct iio_info itg3200_info = {
 
 static const unsigned long itg3200_available_scan_masks[] = { 0xffffffff, 0x0 };
 
-static int itg3200_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int itg3200_probe(struct i2c_client *client)
 {
 	int ret;
 	struct itg3200 *st;
@@ -406,7 +405,7 @@ static struct i2c_driver itg3200_driver = {
 		.pm	= pm_sleep_ptr(&itg3200_pm_ops),
 	},
 	.id_table	= itg3200_id,
-	.probe		= itg3200_probe,
+	.probe_new	= itg3200_probe,
 	.remove		= itg3200_remove,
 };
 
-- 
GitLab


From b97db5284e9ae169de79f78c3f09514d661407a7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:09 +0100
Subject: [PATCH 2424/4122] iio: gyro: mpu3050-i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-96-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/mpu3050-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/mpu3050-i2c.c b/drivers/iio/gyro/mpu3050-i2c.c
index 12e3afa9dd11d..2116798226bf2 100644
--- a/drivers/iio/gyro/mpu3050-i2c.c
+++ b/drivers/iio/gyro/mpu3050-i2c.c
@@ -32,9 +32,9 @@ static int mpu3050_i2c_bypass_deselect(struct i2c_mux_core *mux, u32 chan_id)
 	return 0;
 }
 
-static int mpu3050_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int mpu3050_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name;
 	struct mpu3050 *mpu3050;
@@ -108,7 +108,7 @@ static const struct of_device_id mpu3050_i2c_of_match[] = {
 MODULE_DEVICE_TABLE(of, mpu3050_i2c_of_match);
 
 static struct i2c_driver mpu3050_i2c_driver = {
-	.probe = mpu3050_i2c_probe,
+	.probe_new = mpu3050_i2c_probe,
 	.remove = mpu3050_i2c_remove,
 	.id_table = mpu3050_i2c_id,
 	.driver = {
-- 
GitLab


From 1522b453c153cd93fe295a6dbd589f0f62fb168b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:10 +0100
Subject: [PATCH 2425/4122] iio: gyro: st_gyro_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-97-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/st_gyro_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/gyro/st_gyro_i2c.c b/drivers/iio/gyro/st_gyro_i2c.c
index 8c7af42b65587..797a1c6a04020 100644
--- a/drivers/iio/gyro/st_gyro_i2c.c
+++ b/drivers/iio/gyro/st_gyro_i2c.c
@@ -58,8 +58,7 @@ static const struct of_device_id st_gyro_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, st_gyro_of_match);
 
-static int st_gyro_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int st_gyro_i2c_probe(struct i2c_client *client)
 {
 	const struct st_sensor_settings *settings;
 	struct st_sensor_data *gdata;
@@ -112,7 +111,7 @@ static struct i2c_driver st_gyro_driver = {
 		.name = "st-gyro-i2c",
 		.of_match_table = st_gyro_of_match,
 	},
-	.probe = st_gyro_i2c_probe,
+	.probe_new = st_gyro_i2c_probe,
 	.id_table = st_gyro_id_table,
 };
 module_i2c_driver(st_gyro_driver);
-- 
GitLab


From d4764a4045b14a938594a082662b780c2c8eec78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:11 +0100
Subject: [PATCH 2426/4122] iio: health: afe4404: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-98-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/health/afe4404.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/health/afe4404.c b/drivers/iio/health/afe4404.c
index 8fca787b25245..658dfc1a346ca 100644
--- a/drivers/iio/health/afe4404.c
+++ b/drivers/iio/health/afe4404.c
@@ -459,8 +459,7 @@ static int afe4404_resume(struct device *dev)
 static DEFINE_SIMPLE_DEV_PM_OPS(afe4404_pm_ops, afe4404_suspend,
 				afe4404_resume);
 
-static int afe4404_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int afe4404_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct afe4404_data *afe;
@@ -608,7 +607,7 @@ static struct i2c_driver afe4404_i2c_driver = {
 		.of_match_table = afe4404_of_match,
 		.pm = pm_sleep_ptr(&afe4404_pm_ops),
 	},
-	.probe = afe4404_probe,
+	.probe_new = afe4404_probe,
 	.remove = afe4404_remove,
 	.id_table = afe4404_ids,
 };
-- 
GitLab


From 3ef7e6e019c66beb6e4c8749a70d09d26326be8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:12 +0100
Subject: [PATCH 2427/4122] iio: health: max30100: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-99-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/health/max30100.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/health/max30100.c b/drivers/iio/health/max30100.c
index 3aa5d037a1c35..a80fa9852c220 100644
--- a/drivers/iio/health/max30100.c
+++ b/drivers/iio/health/max30100.c
@@ -417,8 +417,7 @@ static const struct iio_info max30100_info = {
 	.read_raw = max30100_read_raw,
 };
 
-static int max30100_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int max30100_probe(struct i2c_client *client)
 {
 	struct max30100_data *data;
 	struct iio_dev *indio_dev;
@@ -500,7 +499,7 @@ static struct i2c_driver max30100_driver = {
 		.name	= MAX30100_DRV_NAME,
 		.of_match_table	= max30100_dt_ids,
 	},
-	.probe		= max30100_probe,
+	.probe_new	= max30100_probe,
 	.remove		= max30100_remove,
 	.id_table	= max30100_id,
 };
-- 
GitLab


From 15818f0890133372fb7eb875841dbcdb8c8ed592 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:13 +0100
Subject: [PATCH 2428/4122] iio: health: max30102: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-100-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/health/max30102.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/health/max30102.c b/drivers/iio/health/max30102.c
index 66df4aaa31a7b..7edcf9e05687b 100644
--- a/drivers/iio/health/max30102.c
+++ b/drivers/iio/health/max30102.c
@@ -513,9 +513,9 @@ static const struct iio_info max30102_info = {
 	.read_raw = max30102_read_raw,
 };
 
-static int max30102_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int max30102_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct max30102_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -631,7 +631,7 @@ static struct i2c_driver max30102_driver = {
 		.name	= MAX30102_DRV_NAME,
 		.of_match_table	= max30102_dt_ids,
 	},
-	.probe		= max30102_probe,
+	.probe_new	= max30102_probe,
 	.remove		= max30102_remove,
 	.id_table	= max30102_id,
 };
-- 
GitLab


From 1b1a60e70b0cd7ec46df424f6e7d342585c446b3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:14 +0100
Subject: [PATCH 2429/4122] iio: humidity: am2315: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-101-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/am2315.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/am2315.c b/drivers/iio/humidity/am2315.c
index 4a39f10193471..f246516bd45ea 100644
--- a/drivers/iio/humidity/am2315.c
+++ b/drivers/iio/humidity/am2315.c
@@ -218,8 +218,7 @@ static const struct iio_info am2315_info = {
 	.read_raw		= am2315_read_raw,
 };
 
-static int am2315_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int am2315_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -263,7 +262,7 @@ static struct i2c_driver am2315_driver = {
 	.driver = {
 		.name = "am2315",
 	},
-	.probe =            am2315_probe,
+	.probe_new =        am2315_probe,
 	.id_table =         am2315_i2c_id,
 };
 
-- 
GitLab


From c5f1c4fdbfa2ac6ca247bf5f32fb4c35ba0b1ec5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:15 +0100
Subject: [PATCH 2430/4122] iio: humidity: hdc100x: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-102-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/hdc100x.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/hdc100x.c b/drivers/iio/humidity/hdc100x.c
index 47f8e8ef56d68..49a950d739e44 100644
--- a/drivers/iio/humidity/hdc100x.c
+++ b/drivers/iio/humidity/hdc100x.c
@@ -351,8 +351,7 @@ static const struct iio_info hdc100x_info = {
 	.attrs = &hdc100x_attribute_group,
 };
 
-static int hdc100x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int hdc100x_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct hdc100x_data *data;
@@ -429,7 +428,7 @@ static struct i2c_driver hdc100x_driver = {
 		.of_match_table = hdc100x_dt_ids,
 		.acpi_match_table = hdc100x_acpi_match,
 	},
-	.probe = hdc100x_probe,
+	.probe_new = hdc100x_probe,
 	.id_table = hdc100x_id,
 };
 module_i2c_driver(hdc100x_driver);
-- 
GitLab


From 0a78deeb639a464c9160f6bba912c97567f18b17 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:16 +0100
Subject: [PATCH 2431/4122] iio: humidity: hdc2010: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-103-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/hdc2010.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/hdc2010.c b/drivers/iio/humidity/hdc2010.c
index d6858ccb056e4..c8fddd612e060 100644
--- a/drivers/iio/humidity/hdc2010.c
+++ b/drivers/iio/humidity/hdc2010.c
@@ -251,8 +251,7 @@ static const struct iio_info hdc2010_info = {
 	.attrs = &hdc2010_attribute_group,
 };
 
-static int hdc2010_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int hdc2010_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct hdc2010_data *data;
@@ -339,7 +338,7 @@ static struct i2c_driver hdc2010_driver = {
 		.name	= "hdc2010",
 		.of_match_table = hdc2010_dt_ids,
 	},
-	.probe = hdc2010_probe,
+	.probe_new = hdc2010_probe,
 	.remove = hdc2010_remove,
 	.id_table = hdc2010_id,
 };
-- 
GitLab


From 7b64a83ce6f94bbb91d91a66555f17bf3e9bc085 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:17 +0100
Subject: [PATCH 2432/4122] iio: humidity: hts221_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-104-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/hts221_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/hts221_i2c.c b/drivers/iio/humidity/hts221_i2c.c
index afbc611f7712f..d81869423cf03 100644
--- a/drivers/iio/humidity/hts221_i2c.c
+++ b/drivers/iio/humidity/hts221_i2c.c
@@ -25,8 +25,7 @@ static const struct regmap_config hts221_i2c_regmap_config = {
 	.read_flag_mask = HTS221_I2C_AUTO_INCREMENT,
 };
 
-static int hts221_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int hts221_i2c_probe(struct i2c_client *client)
 {
 	struct regmap *regmap;
 
@@ -66,7 +65,7 @@ static struct i2c_driver hts221_driver = {
 		.of_match_table = hts221_i2c_of_match,
 		.acpi_match_table = ACPI_PTR(hts221_acpi_match),
 	},
-	.probe = hts221_i2c_probe,
+	.probe_new = hts221_i2c_probe,
 	.id_table = hts221_i2c_id_table,
 };
 module_i2c_driver(hts221_driver);
-- 
GitLab


From 1a144b6320f32feec3e54dbbda8b4c58b69f86c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:18 +0100
Subject: [PATCH 2433/4122] iio: humidity: htu21: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-105-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/htu21.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/htu21.c b/drivers/iio/humidity/htu21.c
index fd9e2565f8a2d..8411a9f3e828a 100644
--- a/drivers/iio/humidity/htu21.c
+++ b/drivers/iio/humidity/htu21.c
@@ -177,9 +177,9 @@ static const struct iio_info htu21_info = {
 	.attrs = &htu21_attribute_group,
 };
 
-static int htu21_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int htu21_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ms_ht_dev *dev_data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -244,7 +244,7 @@ static const struct of_device_id htu21_of_match[] = {
 MODULE_DEVICE_TABLE(of, htu21_of_match);
 
 static struct i2c_driver htu21_driver = {
-	.probe = htu21_probe,
+	.probe_new = htu21_probe,
 	.id_table = htu21_id,
 	.driver = {
 		   .name = "htu21",
-- 
GitLab


From e18594f6d87f8f7f2623263eb230139382e1a080 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:19 +0100
Subject: [PATCH 2434/4122] iio: humidity: si7005: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-106-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/si7005.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/si7005.c b/drivers/iio/humidity/si7005.c
index 160b3d92df619..fa1faf168c8d9 100644
--- a/drivers/iio/humidity/si7005.c
+++ b/drivers/iio/humidity/si7005.c
@@ -123,8 +123,7 @@ static const struct iio_info si7005_info = {
 	.read_raw = si7005_read_raw,
 };
 
-static int si7005_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int si7005_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct si7005_data *data;
@@ -174,7 +173,7 @@ static struct i2c_driver si7005_driver = {
 	.driver = {
 		.name	= "si7005",
 	},
-	.probe = si7005_probe,
+	.probe_new = si7005_probe,
 	.id_table = si7005_id,
 };
 module_i2c_driver(si7005_driver);
-- 
GitLab


From e6b610550e2c17b9eb83b10109462fc88f9a6367 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:20 +0100
Subject: [PATCH 2435/4122] iio: humidity: si7020: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-107-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/humidity/si7020.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/humidity/si7020.c b/drivers/iio/humidity/si7020.c
index ab6537f136ba1..3e50592e8e68b 100644
--- a/drivers/iio/humidity/si7020.c
+++ b/drivers/iio/humidity/si7020.c
@@ -103,8 +103,7 @@ static const struct iio_info si7020_info = {
 	.read_raw = si7020_read_raw,
 };
 
-static int si7020_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int si7020_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct i2c_client **data;
@@ -156,7 +155,7 @@ static struct i2c_driver si7020_driver = {
 		.name = "si7020",
 		.of_match_table = si7020_dt_ids,
 	},
-	.probe		= si7020_probe,
+	.probe_new	= si7020_probe,
 	.id_table	= si7020_id,
 };
 
-- 
GitLab


From 97202c55041806ba3fcddd6ca1b467154b5812cd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:21 +0100
Subject: [PATCH 2436/4122] iio: imu: bmi160/bmi160_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-108-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/bmi160/bmi160_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/imu/bmi160/bmi160_i2c.c b/drivers/iio/imu/bmi160/bmi160_i2c.c
index d93f4fa2ad554..2ca907d396a01 100644
--- a/drivers/iio/imu/bmi160/bmi160_i2c.c
+++ b/drivers/iio/imu/bmi160/bmi160_i2c.c
@@ -15,9 +15,9 @@
 
 #include "bmi160.h"
 
-static int bmi160_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int bmi160_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name;
 
@@ -60,7 +60,7 @@ static struct i2c_driver bmi160_i2c_driver = {
 		.acpi_match_table	= bmi160_acpi_match,
 		.of_match_table		= bmi160_of_match,
 	},
-	.probe		= bmi160_i2c_probe,
+	.probe_new	= bmi160_i2c_probe,
 	.id_table	= bmi160_i2c_id,
 };
 module_i2c_driver(bmi160_i2c_driver);
-- 
GitLab


From 4fee985aaf3afaa4ce908342a854439fd8480196 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:22 +0100
Subject: [PATCH 2437/4122] iio: imu: fxos8700_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-109-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/fxos8700_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/imu/fxos8700_i2c.c b/drivers/iio/imu/fxos8700_i2c.c
index 40a570325b0ac..a74a15fda8cbb 100644
--- a/drivers/iio/imu/fxos8700_i2c.c
+++ b/drivers/iio/imu/fxos8700_i2c.c
@@ -18,9 +18,9 @@
 
 #include "fxos8700.h"
 
-static int fxos8700_i2c_probe(struct i2c_client *client,
-			      const struct i2c_device_id *id)
+static int fxos8700_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name = NULL;
 
@@ -60,7 +60,7 @@ static struct i2c_driver fxos8700_i2c_driver = {
 		.acpi_match_table       = ACPI_PTR(fxos8700_acpi_match),
 		.of_match_table         = fxos8700_of_match,
 	},
-	.probe          = fxos8700_i2c_probe,
+	.probe_new      = fxos8700_i2c_probe,
 	.id_table       = fxos8700_i2c_id,
 };
 module_i2c_driver(fxos8700_i2c_driver);
-- 
GitLab


From 4f218ae01c5c365f78f97e5d5b2c8cb15b503fe3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:23 +0100
Subject: [PATCH 2438/4122] iio: imu: inv_mpu6050: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-110-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
index 7a8d60a5afa9a..2f2da4cb73215 100644
--- a/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
+++ b/drivers/iio/imu/inv_mpu6050/inv_mpu_i2c.c
@@ -91,13 +91,12 @@ static int inv_mpu_i2c_aux_setup(struct iio_dev *indio_dev)
 /**
  *  inv_mpu_probe() - probe function.
  *  @client:          i2c client.
- *  @id:              i2c device id.
  *
  *  Returns 0 on success, a negative error code otherwise.
  */
-static int inv_mpu_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int inv_mpu_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	const void *match;
 	struct inv_mpu6050_state *st;
 	int result;
@@ -260,7 +259,7 @@ static const struct acpi_device_id inv_acpi_match[] = {
 MODULE_DEVICE_TABLE(acpi, inv_acpi_match);
 
 static struct i2c_driver inv_mpu_driver = {
-	.probe		=	inv_mpu_probe,
+	.probe_new	=	inv_mpu_probe,
 	.remove		=	inv_mpu_remove,
 	.id_table	=	inv_mpu_id,
 	.driver = {
-- 
GitLab


From 4bf718bc3b7f07f82895d4e56a2af93b9a89f43a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:24 +0100
Subject: [PATCH 2439/4122] iio: imu: kmx61: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-111-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/kmx61.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/imu/kmx61.c b/drivers/iio/imu/kmx61.c
index b10c0dcac0bbe..e692dfeeda44c 100644
--- a/drivers/iio/imu/kmx61.c
+++ b/drivers/iio/imu/kmx61.c
@@ -1276,9 +1276,9 @@ static struct iio_trigger *kmx61_trigger_setup(struct kmx61_data *data,
 	return trig;
 }
 
-static int kmx61_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int kmx61_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	struct kmx61_data *data;
 	const char *name = NULL;
@@ -1517,7 +1517,7 @@ static struct i2c_driver kmx61_driver = {
 		.acpi_match_table = ACPI_PTR(kmx61_acpi_match),
 		.pm = pm_ptr(&kmx61_pm_ops),
 	},
-	.probe		= kmx61_probe,
+	.probe_new	= kmx61_probe,
 	.remove		= kmx61_remove,
 	.id_table	= kmx61_id,
 };
-- 
GitLab


From b7dbc0aecb2f4ca38475f0ef9d1b6ab6137be599 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:25 +0100
Subject: [PATCH 2440/4122] iio: imu: st_lsm6dsx: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-112-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
index 0faf1b4c11afe..3570fac1b6125 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
@@ -21,9 +21,9 @@ static const struct regmap_config st_lsm6dsx_i2c_regmap_config = {
 	.val_bits = 8,
 };
 
-static int st_lsm6dsx_i2c_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int st_lsm6dsx_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int hw_id = id->driver_data;
 	struct regmap *regmap;
 
@@ -152,7 +152,7 @@ static struct i2c_driver st_lsm6dsx_driver = {
 		.pm = pm_sleep_ptr(&st_lsm6dsx_pm_ops),
 		.of_match_table = st_lsm6dsx_i2c_of_match,
 	},
-	.probe = st_lsm6dsx_i2c_probe,
+	.probe_new = st_lsm6dsx_i2c_probe,
 	.id_table = st_lsm6dsx_i2c_id_table,
 };
 module_i2c_driver(st_lsm6dsx_driver);
-- 
GitLab


From 39c7d963d54a4dee2c6d95a4c5461600b26ca295 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:26 +0100
Subject: [PATCH 2441/4122] iio: light: adjd_s311: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-113-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/adjd_s311.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/adjd_s311.c b/drivers/iio/light/adjd_s311.c
index 6b33975c8d738..210a90f44c532 100644
--- a/drivers/iio/light/adjd_s311.c
+++ b/drivers/iio/light/adjd_s311.c
@@ -233,8 +233,7 @@ static const struct iio_info adjd_s311_info = {
 	.write_raw = adjd_s311_write_raw,
 };
 
-static int adjd_s311_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int adjd_s311_probe(struct i2c_client *client)
 {
 	struct adjd_s311_data *data;
 	struct iio_dev *indio_dev;
@@ -271,7 +270,7 @@ static struct i2c_driver adjd_s311_driver = {
 	.driver = {
 		.name	= ADJD_S311_DRV_NAME,
 	},
-	.probe		= adjd_s311_probe,
+	.probe_new	= adjd_s311_probe,
 	.id_table	= adjd_s311_id,
 };
 module_i2c_driver(adjd_s311_driver);
-- 
GitLab


From fd63b0a46acf4b00ff69e6c60ecdd3c78a68fc71 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:27 +0100
Subject: [PATCH 2442/4122] iio: light: adux1020: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-114-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/adux1020.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/adux1020.c b/drivers/iio/light/adux1020.c
index 9aa28695e6f19..606075350d01c 100644
--- a/drivers/iio/light/adux1020.c
+++ b/drivers/iio/light/adux1020.c
@@ -774,8 +774,7 @@ static int adux1020_chip_init(struct adux1020_data *data)
 			   ADUX1020_MODE_INT_MASK, ADUX1020_MODE_INT_DISABLE);
 }
 
-static int adux1020_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int adux1020_probe(struct i2c_client *client)
 {
 	struct adux1020_data *data;
 	struct iio_dev *indio_dev;
@@ -838,7 +837,7 @@ static struct i2c_driver adux1020_driver = {
 		.name	= ADUX1020_DRV_NAME,
 		.of_match_table = adux1020_of_match,
 	},
-	.probe		= adux1020_probe,
+	.probe_new	= adux1020_probe,
 	.id_table	= adux1020_id,
 };
 module_i2c_driver(adux1020_driver);
-- 
GitLab


From ad428de325d27964f1c471ae496232df89b22885 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:28 +0100
Subject: [PATCH 2443/4122] iio: light: al3010: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: David Heidelberg <david@ixit.cz>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-115-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/al3010.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/al3010.c b/drivers/iio/light/al3010.c
index ce5363845b225..69cc723e2ac4a 100644
--- a/drivers/iio/light/al3010.c
+++ b/drivers/iio/light/al3010.c
@@ -164,8 +164,7 @@ static const struct iio_info al3010_info = {
 	.attrs		= &al3010_attribute_group,
 };
 
-static int al3010_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int al3010_probe(struct i2c_client *client)
 {
 	struct al3010_data *data;
 	struct iio_dev *indio_dev;
@@ -230,7 +229,7 @@ static struct i2c_driver al3010_driver = {
 		.of_match_table = al3010_of_match,
 		.pm = pm_sleep_ptr(&al3010_pm_ops),
 	},
-	.probe		= al3010_probe,
+	.probe_new	= al3010_probe,
 	.id_table	= al3010_id,
 };
 module_i2c_driver(al3010_driver);
-- 
GitLab


From a76c90301f86107240eebb404d9881f16344f779 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:29 +0100
Subject: [PATCH 2444/4122] iio: light: al3320a: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-116-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/al3320a.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/al3320a.c b/drivers/iio/light/al3320a.c
index bc99179728edc..9ff28bbf34bbe 100644
--- a/drivers/iio/light/al3320a.c
+++ b/drivers/iio/light/al3320a.c
@@ -187,8 +187,7 @@ static const struct iio_info al3320a_info = {
 	.attrs		= &al3320a_attribute_group,
 };
 
-static int al3320a_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int al3320a_probe(struct i2c_client *client)
 {
 	struct al3320a_data *data;
 	struct iio_dev *indio_dev;
@@ -254,7 +253,7 @@ static struct i2c_driver al3320a_driver = {
 		.of_match_table = al3320a_of_match,
 		.pm = pm_sleep_ptr(&al3320a_pm_ops),
 	},
-	.probe		= al3320a_probe,
+	.probe_new	= al3320a_probe,
 	.id_table	= al3320a_id,
 };
 
-- 
GitLab


From debe8c7568242ae7e49803c6bffc56362e43bfd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:30 +0100
Subject: [PATCH 2445/4122] iio: light: apds9300: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-117-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/apds9300.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/apds9300.c b/drivers/iio/light/apds9300.c
index b70f2681bcb33..15dfb753734fa 100644
--- a/drivers/iio/light/apds9300.c
+++ b/drivers/iio/light/apds9300.c
@@ -398,8 +398,7 @@ static irqreturn_t apds9300_interrupt_handler(int irq, void *private)
 	return IRQ_HANDLED;
 }
 
-static int apds9300_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int apds9300_probe(struct i2c_client *client)
 {
 	struct apds9300_data *data;
 	struct iio_dev *indio_dev;
@@ -505,7 +504,7 @@ static struct i2c_driver apds9300_driver = {
 		.name	= APDS9300_DRV_NAME,
 		.pm	= pm_sleep_ptr(&apds9300_pm_ops),
 	},
-	.probe		= apds9300_probe,
+	.probe_new	= apds9300_probe,
 	.remove		= apds9300_remove,
 	.id_table	= apds9300_id,
 };
-- 
GitLab


From 783964ae9752541356e40f47b677069151543c80 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:31 +0100
Subject: [PATCH 2446/4122] iio: light: apds9960: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Matt Ranostay <matt.ranostay@konsulko.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-118-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/apds9960.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/apds9960.c b/drivers/iio/light/apds9960.c
index 997aa01ecc115..ee6acc6a36eef 100644
--- a/drivers/iio/light/apds9960.c
+++ b/drivers/iio/light/apds9960.c
@@ -988,8 +988,7 @@ static int apds9960_chip_init(struct apds9960_data *data)
 	return apds9960_set_powermode(data, 1);
 }
 
-static int apds9960_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int apds9960_probe(struct i2c_client *client)
 {
 	struct apds9960_data *data;
 	struct iio_dev *indio_dev;
@@ -1132,7 +1131,7 @@ static struct i2c_driver apds9960_driver = {
 		.pm	= &apds9960_pm_ops,
 		.acpi_match_table = apds9960_acpi_match,
 	},
-	.probe		= apds9960_probe,
+	.probe_new	= apds9960_probe,
 	.remove		= apds9960_remove,
 	.id_table	= apds9960_id,
 };
-- 
GitLab


From 058d6333ea4fe069cce36adb12d6a6341ddcb540 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:32 +0100
Subject: [PATCH 2447/4122] iio: light: bh1750: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-119-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/bh1750.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/bh1750.c b/drivers/iio/light/bh1750.c
index 3e92820bc8200..390c5b3ad4f67 100644
--- a/drivers/iio/light/bh1750.c
+++ b/drivers/iio/light/bh1750.c
@@ -228,9 +228,9 @@ static const struct iio_chan_spec bh1750_channels[] = {
 	}
 };
 
-static int bh1750_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int bh1750_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret, usec;
 	struct bh1750_data *data;
 	struct iio_dev *indio_dev;
@@ -320,7 +320,7 @@ static struct i2c_driver bh1750_driver = {
 		.of_match_table = bh1750_of_match,
 		.pm = pm_sleep_ptr(&bh1750_pm_ops),
 	},
-	.probe = bh1750_probe,
+	.probe_new = bh1750_probe,
 	.remove = bh1750_remove,
 	.id_table = bh1750_id,
 
-- 
GitLab


From ed5c6b1ce4e1d412f9a97973348c7b8f3e12025a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:33 +0100
Subject: [PATCH 2448/4122] iio: light: bh1780: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-120-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/bh1780.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/bh1780.c b/drivers/iio/light/bh1780.c
index 90bca392b2620..da9039e5a8391 100644
--- a/drivers/iio/light/bh1780.c
+++ b/drivers/iio/light/bh1780.c
@@ -141,8 +141,7 @@ static const struct iio_chan_spec bh1780_channels[] = {
 	}
 };
 
-static int bh1780_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int bh1780_probe(struct i2c_client *client)
 {
 	int ret;
 	struct bh1780_data *bh1780;
@@ -270,7 +269,7 @@ static const struct of_device_id of_bh1780_match[] = {
 MODULE_DEVICE_TABLE(of, of_bh1780_match);
 
 static struct i2c_driver bh1780_driver = {
-	.probe		= bh1780_probe,
+	.probe_new	= bh1780_probe,
 	.remove		= bh1780_remove,
 	.id_table	= bh1780_id,
 	.driver = {
-- 
GitLab


From f8232aad7609a946d4bbb76b747cb9d73b8180f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:34 +0100
Subject: [PATCH 2449/4122] iio: light: cm3232: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-121-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/cm3232.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/cm3232.c b/drivers/iio/light/cm3232.c
index 5214cd014cf8b..43e492f5051d1 100644
--- a/drivers/iio/light/cm3232.c
+++ b/drivers/iio/light/cm3232.c
@@ -325,9 +325,9 @@ static const struct iio_info cm3232_info = {
 	.attrs			= &cm3232_attribute_group,
 };
 
-static int cm3232_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int cm3232_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct cm3232_chip *chip;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -417,7 +417,7 @@ static struct i2c_driver cm3232_driver = {
 		.pm	= pm_sleep_ptr(&cm3232_pm_ops),
 	},
 	.id_table	= cm3232_id,
-	.probe		= cm3232_probe,
+	.probe_new	= cm3232_probe,
 	.remove		= cm3232_remove,
 };
 
-- 
GitLab


From ebbcdb1a9ddec05b37347d3fbf537e1741876901 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:35 +0100
Subject: [PATCH 2450/4122] iio: light: cm3323: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-122-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/cm3323.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/cm3323.c b/drivers/iio/light/cm3323.c
index fd9a8c27de2e2..e5ce7d0fd272e 100644
--- a/drivers/iio/light/cm3323.c
+++ b/drivers/iio/light/cm3323.c
@@ -214,8 +214,7 @@ static const struct iio_info cm3323_info = {
 	.attrs		= &cm3323_attribute_group,
 };
 
-static int cm3323_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int cm3323_probe(struct i2c_client *client)
 {
 	struct cm3323_data *data;
 	struct iio_dev *indio_dev;
@@ -267,7 +266,7 @@ static struct i2c_driver cm3323_driver = {
 		.name = CM3323_DRV_NAME,
 		.of_match_table = cm3323_of_match,
 	},
-	.probe		= cm3323_probe,
+	.probe_new	= cm3323_probe,
 	.id_table	= cm3323_id,
 };
 
-- 
GitLab


From 31ceb2f5b4fbd7dd46d7d679b990e9195f1b3db6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:36 +0100
Subject: [PATCH 2451/4122] iio: light: cm36651: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-123-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/cm36651.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/cm36651.c b/drivers/iio/light/cm36651.c
index 6615c98b601c3..1707dbf2ce267 100644
--- a/drivers/iio/light/cm36651.c
+++ b/drivers/iio/light/cm36651.c
@@ -618,9 +618,9 @@ static const struct iio_info cm36651_info = {
 	.attrs			= &cm36651_attribute_group,
 };
 
-static int cm36651_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int cm36651_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct cm36651_data *cm36651;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -730,7 +730,7 @@ static struct i2c_driver cm36651_driver = {
 		.name	= "cm36651",
 		.of_match_table = cm36651_of_match,
 	},
-	.probe		= cm36651_probe,
+	.probe_new	= cm36651_probe,
 	.remove		= cm36651_remove,
 	.id_table	= cm36651_id,
 };
-- 
GitLab


From b3f8e22e737e7991a3754675c7418215cdb461b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:37 +0100
Subject: [PATCH 2452/4122] iio: light: gp2ap002: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-124-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/gp2ap002.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/gp2ap002.c b/drivers/iio/light/gp2ap002.c
index 8000fa347344f..c0430db0038af 100644
--- a/drivers/iio/light/gp2ap002.c
+++ b/drivers/iio/light/gp2ap002.c
@@ -425,8 +425,7 @@ static struct regmap_bus gp2ap002_regmap_bus = {
 	.reg_write = gp2ap002_regmap_i2c_write,
 };
 
-static int gp2ap002_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int gp2ap002_probe(struct i2c_client *client)
 {
 	struct gp2ap002 *gp2ap002;
 	struct iio_dev *indio_dev;
@@ -711,7 +710,7 @@ static struct i2c_driver gp2ap002_driver = {
 		.of_match_table = gp2ap002_of_match,
 		.pm = pm_ptr(&gp2ap002_dev_pm_ops),
 	},
-	.probe = gp2ap002_probe,
+	.probe_new = gp2ap002_probe,
 	.remove = gp2ap002_remove,
 	.id_table = gp2ap002_id_table,
 };
-- 
GitLab


From a969195001bcf0d8baa5363fc8a1b4d3fc5aff0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:38 +0100
Subject: [PATCH 2453/4122] iio: light: gp2ap020a00f: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-125-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/gp2ap020a00f.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/gp2ap020a00f.c b/drivers/iio/light/gp2ap020a00f.c
index 826439299e8b0..a5bf9da0d2f38 100644
--- a/drivers/iio/light/gp2ap020a00f.c
+++ b/drivers/iio/light/gp2ap020a00f.c
@@ -1467,9 +1467,9 @@ static const struct iio_buffer_setup_ops gp2ap020a00f_buffer_setup_ops = {
 	.predisable = &gp2ap020a00f_buffer_predisable,
 };
 
-static int gp2ap020a00f_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int gp2ap020a00f_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct gp2ap020a00f_data *data;
 	struct iio_dev *indio_dev;
 	struct regmap *regmap;
@@ -1609,7 +1609,7 @@ static struct i2c_driver gp2ap020a00f_driver = {
 		.name	= GP2A_I2C_NAME,
 		.of_match_table = gp2ap020a00f_of_match,
 	},
-	.probe		= gp2ap020a00f_probe,
+	.probe_new	= gp2ap020a00f_probe,
 	.remove		= gp2ap020a00f_remove,
 	.id_table	= gp2ap020a00f_id,
 };
-- 
GitLab


From ee6e0241f854b23f2177b05a6cdd538bcfe8d9cb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:39 +0100
Subject: [PATCH 2454/4122] iio: light: isl29018: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-126-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/isl29018.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/isl29018.c b/drivers/iio/light/isl29018.c
index b36f8b7ca68e7..141845fb47f96 100644
--- a/drivers/iio/light/isl29018.c
+++ b/drivers/iio/light/isl29018.c
@@ -711,9 +711,9 @@ static void isl29018_disable_regulator_action(void *_data)
 		pr_err("failed to disable isl29018's VCC regulator!\n");
 }
 
-static int isl29018_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int isl29018_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct isl29018_chip *chip;
 	struct iio_dev *indio_dev;
 	int err;
@@ -865,7 +865,7 @@ static struct i2c_driver isl29018_driver = {
 			.pm = pm_sleep_ptr(&isl29018_pm_ops),
 			.of_match_table = isl29018_of_match,
 		    },
-	.probe	 = isl29018_probe,
+	.probe_new = isl29018_probe,
 	.id_table = isl29018_id,
 };
 module_i2c_driver(isl29018_driver);
-- 
GitLab


From 2086bafffdf7200238ba887a4ac9bdba0c4503e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:40 +0100
Subject: [PATCH 2455/4122] iio: light: isl29028: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-127-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/isl29028.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/isl29028.c b/drivers/iio/light/isl29028.c
index 32d58e18f26da..bcf3a556e41ad 100644
--- a/drivers/iio/light/isl29028.c
+++ b/drivers/iio/light/isl29028.c
@@ -565,9 +565,9 @@ static const struct regmap_config isl29028_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static int isl29028_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int isl29028_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct isl29028_chip *chip;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -698,7 +698,7 @@ static struct i2c_driver isl29028_driver = {
 		.pm = pm_ptr(&isl29028_pm_ops),
 		.of_match_table = isl29028_of_match,
 	},
-	.probe	 = isl29028_probe,
+	.probe_new = isl29028_probe,
 	.remove  = isl29028_remove,
 	.id_table = isl29028_id,
 };
-- 
GitLab


From 3059126a449c8c62c6b766da7744f853cce6ad36 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:41 +0100
Subject: [PATCH 2456/4122] iio: light: isl29125: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-128-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/isl29125.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/isl29125.c b/drivers/iio/light/isl29125.c
index c199e63cce828..b4bd656ca1698 100644
--- a/drivers/iio/light/isl29125.c
+++ b/drivers/iio/light/isl29125.c
@@ -241,8 +241,7 @@ static const struct iio_buffer_setup_ops isl29125_buffer_setup_ops = {
 	.predisable = isl29125_buffer_predisable,
 };
 
-static int isl29125_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int isl29125_probe(struct i2c_client *client)
 {
 	struct isl29125_data *data;
 	struct iio_dev *indio_dev;
@@ -338,7 +337,7 @@ static struct i2c_driver isl29125_driver = {
 		.name	= ISL29125_DRV_NAME,
 		.pm	= pm_sleep_ptr(&isl29125_pm_ops),
 	},
-	.probe		= isl29125_probe,
+	.probe_new	= isl29125_probe,
 	.remove		= isl29125_remove,
 	.id_table	= isl29125_id,
 };
-- 
GitLab


From dd97aab4f02c72d053335424752ee648a572ff81 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:42 +0100
Subject: [PATCH 2457/4122] iio: light: jsa1212: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-129-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/jsa1212.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/jsa1212.c b/drivers/iio/light/jsa1212.c
index 57ce6d75966c6..d3834d0a0635a 100644
--- a/drivers/iio/light/jsa1212.c
+++ b/drivers/iio/light/jsa1212.c
@@ -308,8 +308,7 @@ static const struct regmap_config jsa1212_regmap_config = {
 	.volatile_reg = jsa1212_is_volatile_reg,
 };
 
-static int jsa1212_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int jsa1212_probe(struct i2c_client *client)
 {
 	struct jsa1212_data *data;
 	struct iio_dev *indio_dev;
@@ -441,7 +440,7 @@ static struct i2c_driver jsa1212_driver = {
 		.pm	= pm_sleep_ptr(&jsa1212_pm_ops),
 		.acpi_match_table = ACPI_PTR(jsa1212_acpi_match),
 	},
-	.probe		= jsa1212_probe,
+	.probe_new	= jsa1212_probe,
 	.remove		= jsa1212_remove,
 	.id_table	= jsa1212_id,
 };
-- 
GitLab


From 958f5a0de0f67289e6bc0fa7850056b2af0c14f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:43 +0100
Subject: [PATCH 2458/4122] iio: light: ltr501: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-130-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/ltr501.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/ltr501.c b/drivers/iio/light/ltr501.c
index 453b845ef265b..bdbd918213e45 100644
--- a/drivers/iio/light/ltr501.c
+++ b/drivers/iio/light/ltr501.c
@@ -1432,9 +1432,9 @@ static const char *ltr501_match_acpi_device(struct device *dev, int *chip_idx)
 	return dev_name(dev);
 }
 
-static int ltr501_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ltr501_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	static const char * const regulator_names[] = { "vdd", "vddio" };
 	struct ltr501_data *data;
 	struct iio_dev *indio_dev;
@@ -1641,7 +1641,7 @@ static struct i2c_driver ltr501_driver = {
 		.pm	= pm_sleep_ptr(&ltr501_pm_ops),
 		.acpi_match_table = ACPI_PTR(ltr_acpi_match),
 	},
-	.probe  = ltr501_probe,
+	.probe_new = ltr501_probe,
 	.remove	= ltr501_remove,
 	.id_table = ltr501_id,
 };
-- 
GitLab


From f0a6f7674ef29e01e8db2986f534476f60aac3cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:44 +0100
Subject: [PATCH 2459/4122] iio: light: lv0104cs: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-131-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/lv0104cs.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/lv0104cs.c b/drivers/iio/light/lv0104cs.c
index c2aef88f4e634..c041fa0faa5df 100644
--- a/drivers/iio/light/lv0104cs.c
+++ b/drivers/iio/light/lv0104cs.c
@@ -474,8 +474,7 @@ static const struct iio_chan_spec lv0104cs_channels[] = {
 	},
 };
 
-static int lv0104cs_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int lv0104cs_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct lv0104cs_private *lv0104cs;
@@ -521,7 +520,7 @@ static struct i2c_driver lv0104cs_i2c_driver = {
 		.name	= "lv0104cs",
 	},
 	.id_table	= lv0104cs_id,
-	.probe		= lv0104cs_probe,
+	.probe_new	= lv0104cs_probe,
 };
 module_i2c_driver(lv0104cs_i2c_driver);
 
-- 
GitLab


From 0978ef36b24cb10937417e94d49d5a3f1092e3d4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:45 +0100
Subject: [PATCH 2460/4122] iio: light: max44000: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-132-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/max44000.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/max44000.c b/drivers/iio/light/max44000.c
index 85689dffbcbf7..5dcabc43a30e6 100644
--- a/drivers/iio/light/max44000.c
+++ b/drivers/iio/light/max44000.c
@@ -523,8 +523,7 @@ out_unlock:
 	return IRQ_HANDLED;
 }
 
-static int max44000_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int max44000_probe(struct i2c_client *client)
 {
 	struct max44000_data *data;
 	struct iio_dev *indio_dev;
@@ -617,7 +616,7 @@ static struct i2c_driver max44000_driver = {
 		.name	= MAX44000_DRV_NAME,
 		.acpi_match_table = ACPI_PTR(max44000_acpi_match),
 	},
-	.probe		= max44000_probe,
+	.probe_new	= max44000_probe,
 	.id_table	= max44000_id,
 };
 
-- 
GitLab


From c49135d99f5285a3e781a50ee7111f68150d5cd2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:47 +0100
Subject: [PATCH 2461/4122] iio: light: noa1305: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-134-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/noa1305.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/noa1305.c b/drivers/iio/light/noa1305.c
index be3536b390fcb..eaf548d4649ea 100644
--- a/drivers/iio/light/noa1305.c
+++ b/drivers/iio/light/noa1305.c
@@ -186,8 +186,7 @@ static const struct regmap_config noa1305_regmap_config = {
 	.writeable_reg = noa1305_writable_reg,
 };
 
-static int noa1305_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int noa1305_probe(struct i2c_client *client)
 {
 	struct noa1305_priv *priv;
 	struct iio_dev *indio_dev;
@@ -279,7 +278,7 @@ static struct i2c_driver noa1305_driver = {
 		.name		= NOA1305_DRIVER_NAME,
 		.of_match_table	= noa1305_of_match,
 	},
-	.probe		= noa1305_probe,
+	.probe_new	= noa1305_probe,
 	.id_table	= noa1305_ids,
 };
 
-- 
GitLab


From 06d10073283e5b301bd850a9ded6f592fb74a803 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:48 +0100
Subject: [PATCH 2462/4122] iio: light: opt3001: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-135-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/opt3001.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/opt3001.c b/drivers/iio/light/opt3001.c
index a26d1c3f95437..ec4f5c2369c42 100644
--- a/drivers/iio/light/opt3001.c
+++ b/drivers/iio/light/opt3001.c
@@ -735,8 +735,7 @@ out:
 	return IRQ_HANDLED;
 }
 
-static int opt3001_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int opt3001_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 
@@ -835,7 +834,7 @@ static const struct of_device_id opt3001_of_match[] = {
 MODULE_DEVICE_TABLE(of, opt3001_of_match);
 
 static struct i2c_driver opt3001_driver = {
-	.probe = opt3001_probe,
+	.probe_new = opt3001_probe,
 	.remove = opt3001_remove,
 	.id_table = opt3001_id,
 
-- 
GitLab


From e16302076d8a42b760878fe603593e2c59106948 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:49 +0100
Subject: [PATCH 2463/4122] iio: light: pa12203001: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-136-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/pa12203001.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/pa12203001.c b/drivers/iio/light/pa12203001.c
index 3cb2de51f4aa8..15a666f15c275 100644
--- a/drivers/iio/light/pa12203001.c
+++ b/drivers/iio/light/pa12203001.c
@@ -338,8 +338,7 @@ out:
 	return ret;
 }
 
-static int pa12203001_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int pa12203001_probe(struct i2c_client *client)
 {
 	struct pa12203001_data *data;
 	struct iio_dev *indio_dev;
@@ -475,7 +474,7 @@ static struct i2c_driver pa12203001_driver = {
 		.pm = &pa12203001_pm_ops,
 		.acpi_match_table = ACPI_PTR(pa12203001_acpi_match),
 	},
-	.probe = pa12203001_probe,
+	.probe_new = pa12203001_probe,
 	.remove = pa12203001_remove,
 	.id_table = pa12203001_id,
 
-- 
GitLab


From 314ba3b4208b8fdc66ca6a94dac778ab673444e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:50 +0100
Subject: [PATCH 2464/4122] iio: light: rpr0521: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-137-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/rpr0521.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/rpr0521.c b/drivers/iio/light/rpr0521.c
index d1c16dd760583..668e444f60498 100644
--- a/drivers/iio/light/rpr0521.c
+++ b/drivers/iio/light/rpr0521.c
@@ -927,8 +927,7 @@ static const struct regmap_config rpr0521_regmap_config = {
 	.volatile_reg	= rpr0521_is_volatile_reg,
 };
 
-static int rpr0521_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int rpr0521_probe(struct i2c_client *client)
 {
 	struct rpr0521_data *data;
 	struct iio_dev *indio_dev;
@@ -1122,7 +1121,7 @@ static struct i2c_driver rpr0521_driver = {
 		.pm	= pm_ptr(&rpr0521_pm_ops),
 		.acpi_match_table = ACPI_PTR(rpr0521_acpi_match),
 	},
-	.probe		= rpr0521_probe,
+	.probe_new	= rpr0521_probe,
 	.remove		= rpr0521_remove,
 	.id_table	= rpr0521_id,
 };
-- 
GitLab


From 122b0c0ba505a2faebb883bbfc178449d47c4fba Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:51 +0100
Subject: [PATCH 2465/4122] iio: light: si1133: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-138-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/si1133.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/si1133.c b/drivers/iio/light/si1133.c
index f8c9b2cc322e8..a08fbc8f5adb1 100644
--- a/drivers/iio/light/si1133.c
+++ b/drivers/iio/light/si1133.c
@@ -990,9 +990,9 @@ static int si1133_validate_ids(struct iio_dev *iio_dev)
 	return 0;
 }
 
-static int si1133_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int si1133_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct si1133_data *data;
 	struct iio_dev *iio_dev;
 	int err;
@@ -1064,7 +1064,7 @@ static struct i2c_driver si1133_driver = {
 	.driver = {
 	    .name   = "si1133",
 	},
-	.probe  = si1133_probe,
+	.probe_new = si1133_probe,
 	.id_table = si1133_ids,
 };
 
-- 
GitLab


From fb006652dc597b2807bab56d25fe3b3404f87e53 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:52 +0100
Subject: [PATCH 2466/4122] iio: light: si1145: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-139-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/si1145.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/si1145.c b/drivers/iio/light/si1145.c
index e8f6cdf26f22a..f7126235f94c9 100644
--- a/drivers/iio/light/si1145.c
+++ b/drivers/iio/light/si1145.c
@@ -1269,9 +1269,9 @@ static int si1145_probe_trigger(struct iio_dev *indio_dev)
 	return 0;
 }
 
-static int si1145_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int si1145_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct si1145_data *data;
 	struct iio_dev *indio_dev;
 	u8 part_id, rev_id, seq_id;
@@ -1352,7 +1352,7 @@ static struct i2c_driver si1145_driver = {
 	.driver = {
 		.name   = "si1145",
 	},
-	.probe  = si1145_probe,
+	.probe_new = si1145_probe,
 	.id_table = si1145_ids,
 };
 
-- 
GitLab


From c3ff326a9d3c809b493775bd65c07dccb73258cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:53 +0100
Subject: [PATCH 2467/4122] iio: light: st_uvis25_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-140-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/st_uvis25_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/st_uvis25_i2c.c b/drivers/iio/light/st_uvis25_i2c.c
index c982b0b255cf8..2160e87bb498e 100644
--- a/drivers/iio/light/st_uvis25_i2c.c
+++ b/drivers/iio/light/st_uvis25_i2c.c
@@ -25,8 +25,7 @@ static const struct regmap_config st_uvis25_i2c_regmap_config = {
 	.read_flag_mask = UVIS25_I2C_AUTO_INCREMENT,
 };
 
-static int st_uvis25_i2c_probe(struct i2c_client *client,
-			       const struct i2c_device_id *id)
+static int st_uvis25_i2c_probe(struct i2c_client *client)
 {
 	struct regmap *regmap;
 
@@ -58,7 +57,7 @@ static struct i2c_driver st_uvis25_driver = {
 		.pm = pm_sleep_ptr(&st_uvis25_pm_ops),
 		.of_match_table = st_uvis25_i2c_of_match,
 	},
-	.probe = st_uvis25_i2c_probe,
+	.probe_new = st_uvis25_i2c_probe,
 	.id_table = st_uvis25_i2c_id_table,
 };
 module_i2c_driver(st_uvis25_driver);
-- 
GitLab


From 9046d80dce04c65c92ea5550f220a2de236e3ff0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:54 +0100
Subject: [PATCH 2468/4122] iio: light: stk3310: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-141-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/stk3310.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/stk3310.c b/drivers/iio/light/stk3310.c
index 7b8e0da6aabc1..48ae6ff0015e1 100644
--- a/drivers/iio/light/stk3310.c
+++ b/drivers/iio/light/stk3310.c
@@ -586,8 +586,7 @@ out:
 	return IRQ_HANDLED;
 }
 
-static int stk3310_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int stk3310_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -715,7 +714,7 @@ static struct i2c_driver stk3310_driver = {
 		.pm = pm_sleep_ptr(&stk3310_pm_ops),
 		.acpi_match_table = ACPI_PTR(stk3310_acpi_id),
 	},
-	.probe =            stk3310_probe,
+	.probe_new =        stk3310_probe,
 	.remove =           stk3310_remove,
 	.id_table =         stk3310_i2c_id,
 };
-- 
GitLab


From b30cfdeb9f7fe03103472c4c79e65e382935082a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:55 +0100
Subject: [PATCH 2469/4122] iio: light: tcs3414: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-142-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tcs3414.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tcs3414.c b/drivers/iio/light/tcs3414.c
index 3951536022b3d..5100732fbaf0a 100644
--- a/drivers/iio/light/tcs3414.c
+++ b/drivers/iio/light/tcs3414.c
@@ -279,8 +279,7 @@ static void tcs3414_powerdown_cleanup(void *data)
 	tcs3414_powerdown(data);
 }
 
-static int tcs3414_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int tcs3414_probe(struct i2c_client *client)
 {
 	struct tcs3414_data *data;
 	struct iio_dev *indio_dev;
@@ -374,7 +373,7 @@ static struct i2c_driver tcs3414_driver = {
 		.name	= TCS3414_DRV_NAME,
 		.pm	= pm_sleep_ptr(&tcs3414_pm_ops),
 	},
-	.probe		= tcs3414_probe,
+	.probe_new	= tcs3414_probe,
 	.id_table	= tcs3414_id,
 };
 module_i2c_driver(tcs3414_driver);
-- 
GitLab


From e498cc544eae9b6264c9a535f8784aafe5788965 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:56 +0100
Subject: [PATCH 2470/4122] iio: light: tcs3472: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-143-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tcs3472.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tcs3472.c b/drivers/iio/light/tcs3472.c
index db17fec634be1..6187c54879162 100644
--- a/drivers/iio/light/tcs3472.c
+++ b/drivers/iio/light/tcs3472.c
@@ -442,8 +442,7 @@ static const struct iio_info tcs3472_info = {
 	.attrs = &tcs3472_attribute_group,
 };
 
-static int tcs3472_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int tcs3472_probe(struct i2c_client *client)
 {
 	struct tcs3472_data *data;
 	struct iio_dev *indio_dev;
@@ -610,7 +609,7 @@ static struct i2c_driver tcs3472_driver = {
 		.name	= TCS3472_DRV_NAME,
 		.pm	= pm_sleep_ptr(&tcs3472_pm_ops),
 	},
-	.probe		= tcs3472_probe,
+	.probe_new	= tcs3472_probe,
 	.remove		= tcs3472_remove,
 	.id_table	= tcs3472_id,
 };
-- 
GitLab


From 74cd01b3f9041ef3e44cc043b8bb10f79ec52c43 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:57 +0100
Subject: [PATCH 2471/4122] iio: light: tsl2563: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-144-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl2563.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tsl2563.c b/drivers/iio/light/tsl2563.c
index 951f35ef3f41d..d0e42b73203a6 100644
--- a/drivers/iio/light/tsl2563.c
+++ b/drivers/iio/light/tsl2563.c
@@ -699,8 +699,7 @@ static const struct iio_info tsl2563_info = {
 	.write_event_config = &tsl2563_write_interrupt_config,
 };
 
-static int tsl2563_probe(struct i2c_client *client,
-				const struct i2c_device_id *device_id)
+static int tsl2563_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct tsl2563_chip *chip;
@@ -880,7 +879,7 @@ static struct i2c_driver tsl2563_i2c_driver = {
 		.of_match_table = tsl2563_of_match,
 		.pm	= pm_sleep_ptr(&tsl2563_pm_ops),
 	},
-	.probe		= tsl2563_probe,
+	.probe_new	= tsl2563_probe,
 	.remove		= tsl2563_remove,
 	.id_table	= tsl2563_id,
 };
-- 
GitLab


From dcc484a21108441f7970c4813b1223154dd23ea6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:58 +0100
Subject: [PATCH 2472/4122] iio: light: tsl2583: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-145-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl2583.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tsl2583.c b/drivers/iio/light/tsl2583.c
index 7bcb5c7189224..a05f1c0453d16 100644
--- a/drivers/iio/light/tsl2583.c
+++ b/drivers/iio/light/tsl2583.c
@@ -809,8 +809,7 @@ static const struct iio_info tsl2583_info = {
 	.write_raw = tsl2583_write_raw,
 };
 
-static int tsl2583_probe(struct i2c_client *clientp,
-			 const struct i2c_device_id *idp)
+static int tsl2583_probe(struct i2c_client *clientp)
 {
 	int ret;
 	struct tsl2583_chip *chip;
@@ -943,7 +942,7 @@ static struct i2c_driver tsl2583_driver = {
 		.of_match_table = tsl2583_of_match,
 	},
 	.id_table = tsl2583_idtable,
-	.probe = tsl2583_probe,
+	.probe_new = tsl2583_probe,
 	.remove = tsl2583_remove,
 };
 module_i2c_driver(tsl2583_driver);
-- 
GitLab


From e723b95135b1510873c832eaf8361e6b8d76bfb8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:37:59 +0100
Subject: [PATCH 2473/4122] iio: light: tsl2772: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-146-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl2772.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tsl2772.c b/drivers/iio/light/tsl2772.c
index dd9051f1cc1a0..ad50baa0202cc 100644
--- a/drivers/iio/light/tsl2772.c
+++ b/drivers/iio/light/tsl2772.c
@@ -1750,9 +1750,9 @@ static const struct tsl2772_chip_info tsl2772_chip_info_tbl[] = {
 	},
 };
 
-static int tsl2772_probe(struct i2c_client *clientp,
-			 const struct i2c_device_id *id)
+static int tsl2772_probe(struct i2c_client *clientp)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(clientp);
 	struct iio_dev *indio_dev;
 	struct tsl2772_chip *chip;
 	int ret;
@@ -1931,7 +1931,7 @@ static struct i2c_driver tsl2772_driver = {
 		.pm = &tsl2772_pm_ops,
 	},
 	.id_table = tsl2772_idtable,
-	.probe = tsl2772_probe,
+	.probe_new = tsl2772_probe,
 };
 
 module_i2c_driver(tsl2772_driver);
-- 
GitLab


From 081f6b9dc055f9ccb19a86346b80df691972120b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:00 +0100
Subject: [PATCH 2474/4122] iio: light: tsl4531: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-147-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/tsl4531.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/tsl4531.c b/drivers/iio/light/tsl4531.c
index 090038fed8891..d95397eb15263 100644
--- a/drivers/iio/light/tsl4531.c
+++ b/drivers/iio/light/tsl4531.c
@@ -160,8 +160,7 @@ static int tsl4531_check_id(struct i2c_client *client)
 	}
 }
 
-static int tsl4531_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int tsl4531_probe(struct i2c_client *client)
 {
 	struct tsl4531_data *data;
 	struct iio_dev *indio_dev;
@@ -238,7 +237,7 @@ static struct i2c_driver tsl4531_driver = {
 		.name   = TSL4531_DRV_NAME,
 		.pm	= pm_sleep_ptr(&tsl4531_pm_ops),
 	},
-	.probe  = tsl4531_probe,
+	.probe_new = tsl4531_probe,
 	.remove = tsl4531_remove,
 	.id_table = tsl4531_id,
 };
-- 
GitLab


From 3ed2b14de4d36a9335e93798bf966c521df42a95 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:01 +0100
Subject: [PATCH 2475/4122] iio: light: us5182d: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-148-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/us5182d.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/us5182d.c b/drivers/iio/light/us5182d.c
index 3e652d7f3b0eb..8b2a0c99c8e6f 100644
--- a/drivers/iio/light/us5182d.c
+++ b/drivers/iio/light/us5182d.c
@@ -832,8 +832,7 @@ static irqreturn_t us5182d_irq_thread_handler(int irq, void *private)
 	return IRQ_HANDLED;
 }
 
-static int us5182d_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int us5182d_probe(struct i2c_client *client)
 {
 	struct us5182d_data *data;
 	struct iio_dev *indio_dev;
@@ -975,7 +974,7 @@ static struct i2c_driver us5182d_driver = {
 		.of_match_table = us5182d_of_match,
 		.acpi_match_table = ACPI_PTR(us5182d_acpi_match),
 	},
-	.probe = us5182d_probe,
+	.probe_new = us5182d_probe,
 	.remove = us5182d_remove,
 	.id_table = us5182d_id,
 
-- 
GitLab


From e61295e0d7cf6de3d7cc25e7f387ffe021f0b0a1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:02 +0100
Subject: [PATCH 2476/4122] iio: light: vcnl4000: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-149-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/vcnl4000.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/vcnl4000.c b/drivers/iio/light/vcnl4000.c
index fdb3922ae4acd..cc1a2062e76d6 100644
--- a/drivers/iio/light/vcnl4000.c
+++ b/drivers/iio/light/vcnl4000.c
@@ -1180,9 +1180,9 @@ static int vcnl4010_probe_trigger(struct iio_dev *indio_dev)
 	return devm_iio_trigger_register(&client->dev, trigger);
 }
 
-static int vcnl4000_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int vcnl4000_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct vcnl4000_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -1326,7 +1326,7 @@ static struct i2c_driver vcnl4000_driver = {
 		.pm	= pm_ptr(&vcnl4000_pm_ops),
 		.of_match_table = vcnl_4000_of_match,
 	},
-	.probe  = vcnl4000_probe,
+	.probe_new = vcnl4000_probe,
 	.id_table = vcnl4000_id,
 	.remove	= vcnl4000_remove,
 };
-- 
GitLab


From 9da43dcc54a19fe40e3aa7dc03fbca3f8c0395ff Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:03 +0100
Subject: [PATCH 2477/4122] iio: light: vcnl4035: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-150-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/vcnl4035.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/vcnl4035.c b/drivers/iio/light/vcnl4035.c
index 3ed37f6057fbf..84148b9440006 100644
--- a/drivers/iio/light/vcnl4035.c
+++ b/drivers/iio/light/vcnl4035.c
@@ -539,8 +539,7 @@ static int vcnl4035_probe_trigger(struct iio_dev *indio_dev)
 	return ret;
 }
 
-static int vcnl4035_probe(struct i2c_client *client,
-				const struct i2c_device_id *id)
+static int vcnl4035_probe(struct i2c_client *client)
 {
 	struct vcnl4035_data *data;
 	struct iio_dev *indio_dev;
@@ -668,7 +667,7 @@ static struct i2c_driver vcnl4035_driver = {
 		.pm	= pm_ptr(&vcnl4035_pm_ops),
 		.of_match_table = vcnl4035_of_match,
 	},
-	.probe  = vcnl4035_probe,
+	.probe_new = vcnl4035_probe,
 	.remove	= vcnl4035_remove,
 	.id_table = vcnl4035_id,
 };
-- 
GitLab


From e465524d64d11fbd25a9eb65ce4e77b965979291 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:04 +0100
Subject: [PATCH 2478/4122] iio: light: veml6030: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-151-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/veml6030.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c
index 9a7800cdfee26..e7d2d5d177d43 100644
--- a/drivers/iio/light/veml6030.c
+++ b/drivers/iio/light/veml6030.c
@@ -786,8 +786,7 @@ static int veml6030_hw_init(struct iio_dev *indio_dev)
 	return ret;
 }
 
-static int veml6030_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int veml6030_probe(struct i2c_client *client)
 {
 	int ret;
 	struct veml6030_data *data;
@@ -893,7 +892,7 @@ static struct i2c_driver veml6030_driver = {
 		.of_match_table = veml6030_of_match,
 		.pm = pm_ptr(&veml6030_pm_ops),
 	},
-	.probe = veml6030_probe,
+	.probe_new = veml6030_probe,
 	.id_table = veml6030_id,
 };
 module_i2c_driver(veml6030_driver);
-- 
GitLab


From f90b8694f355566c2ff70564942d4932d76e28e0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:05 +0100
Subject: [PATCH 2479/4122] iio: light: veml6070: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-152-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/veml6070.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/veml6070.c b/drivers/iio/light/veml6070.c
index cfa4e9e7c8038..ee76a68deb24a 100644
--- a/drivers/iio/light/veml6070.c
+++ b/drivers/iio/light/veml6070.c
@@ -135,8 +135,7 @@ static const struct iio_info veml6070_info = {
 	.read_raw = veml6070_read_raw,
 };
 
-static int veml6070_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int veml6070_probe(struct i2c_client *client)
 {
 	struct veml6070_data *data;
 	struct iio_dev *indio_dev;
@@ -199,7 +198,7 @@ static struct i2c_driver veml6070_driver = {
 	.driver = {
 		.name   = VEML6070_DRV_NAME,
 	},
-	.probe  = veml6070_probe,
+	.probe_new = veml6070_probe,
 	.remove  = veml6070_remove,
 	.id_table = veml6070_id,
 };
-- 
GitLab


From 6811c7a52f025ab50f53b3107e6071132a8fe1f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:06 +0100
Subject: [PATCH 2480/4122] iio: light: zopt2201: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-153-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/light/zopt2201.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/light/zopt2201.c b/drivers/iio/light/zopt2201.c
index e0bc9df9c88b0..e3bac8b563807 100644
--- a/drivers/iio/light/zopt2201.c
+++ b/drivers/iio/light/zopt2201.c
@@ -501,8 +501,7 @@ static const struct iio_info zopt2201_info = {
 	.attrs = &zopt2201_attribute_group,
 };
 
-static int zopt2201_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int zopt2201_probe(struct i2c_client *client)
 {
 	struct zopt2201_data *data;
 	struct iio_dev *indio_dev;
@@ -555,7 +554,7 @@ static struct i2c_driver zopt2201_driver = {
 	.driver = {
 		.name   = ZOPT2201_DRV_NAME,
 	},
-	.probe  = zopt2201_probe,
+	.probe_new = zopt2201_probe,
 	.id_table = zopt2201_id,
 };
 
-- 
GitLab


From 149a5043b6f9b100471a1fdbdc22a395602fadd1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:07 +0100
Subject: [PATCH 2481/4122] iio: magnetometer: ak8974: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-154-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/ak8974.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8974.c b/drivers/iio/magnetometer/ak8974.c
index 7ec9ab3beb45a..45abdcce6bc0a 100644
--- a/drivers/iio/magnetometer/ak8974.c
+++ b/drivers/iio/magnetometer/ak8974.c
@@ -814,8 +814,7 @@ static const struct regmap_config ak8974_regmap_config = {
 	.precious_reg = ak8974_precious_reg,
 };
 
-static int ak8974_probe(struct i2c_client *i2c,
-			const struct i2c_device_id *id)
+static int ak8974_probe(struct i2c_client *i2c)
 {
 	struct iio_dev *indio_dev;
 	struct ak8974 *ak8974;
@@ -1047,7 +1046,7 @@ static struct i2c_driver ak8974_driver = {
 		.pm = pm_ptr(&ak8974_dev_pm_ops),
 		.of_match_table = ak8974_of_match,
 	},
-	.probe	  = ak8974_probe,
+	.probe_new = ak8974_probe,
 	.remove	  = ak8974_remove,
 	.id_table = ak8974_id,
 };
-- 
GitLab


From f64eecea19ba974f03e3614ebcc3749d9a24da77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:08 +0100
Subject: [PATCH 2482/4122] iio: magnetometer: ak8975: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-155-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/ak8975.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/ak8975.c b/drivers/iio/magnetometer/ak8975.c
index caf03a2a98a5d..924b481a3034a 100644
--- a/drivers/iio/magnetometer/ak8975.c
+++ b/drivers/iio/magnetometer/ak8975.c
@@ -876,9 +876,9 @@ static irqreturn_t ak8975_handle_trigger(int irq, void *p)
 	return IRQ_HANDLED;
 }
 
-static int ak8975_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ak8975_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ak8975_data *data;
 	struct iio_dev *indio_dev;
 	struct gpio_desc *eoc_gpiod;
@@ -1110,7 +1110,7 @@ static struct i2c_driver ak8975_driver = {
 		.of_match_table = ak8975_of_match,
 		.acpi_match_table = ak_acpi_match,
 	},
-	.probe		= ak8975_probe,
+	.probe_new	= ak8975_probe,
 	.remove		= ak8975_remove,
 	.id_table	= ak8975_id,
 };
-- 
GitLab


From f23215c1e5a1255ed2097afe3e564dec96f73911 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:09 +0100
Subject: [PATCH 2483/4122] iio: magnetometer: bmc150_magn_i2c: Convert to
 i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-156-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/bmc150_magn_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/bmc150_magn_i2c.c b/drivers/iio/magnetometer/bmc150_magn_i2c.c
index 570deaa87836b..44b8960eea175 100644
--- a/drivers/iio/magnetometer/bmc150_magn_i2c.c
+++ b/drivers/iio/magnetometer/bmc150_magn_i2c.c
@@ -16,9 +16,9 @@
 
 #include "bmc150_magn.h"
 
-static int bmc150_magn_i2c_probe(struct i2c_client *client,
-				 const struct i2c_device_id *id)
+static int bmc150_magn_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 	const char *name = NULL;
 
@@ -71,7 +71,7 @@ static struct i2c_driver bmc150_magn_driver = {
 		.acpi_match_table = ACPI_PTR(bmc150_magn_acpi_match),
 		.pm	= &bmc150_magn_pm_ops,
 	},
-	.probe		= bmc150_magn_i2c_probe,
+	.probe_new	= bmc150_magn_i2c_probe,
 	.remove		= bmc150_magn_i2c_remove,
 	.id_table	= bmc150_magn_i2c_id,
 };
-- 
GitLab


From bcf73c37f7c7e7c1d0cac6ebea36cfd7bf626727 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:10 +0100
Subject: [PATCH 2484/4122] iio: magnetometer: hmc5843: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-157-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/hmc5843_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/hmc5843_i2c.c b/drivers/iio/magnetometer/hmc5843_i2c.c
index 18a13dd512961..7ef2b1d562898 100644
--- a/drivers/iio/magnetometer/hmc5843_i2c.c
+++ b/drivers/iio/magnetometer/hmc5843_i2c.c
@@ -52,9 +52,9 @@ static const struct regmap_config hmc5843_i2c_regmap_config = {
 	.cache_type = REGCACHE_RBTREE,
 };
 
-static int hmc5843_i2c_probe(struct i2c_client *cli,
-			     const struct i2c_device_id *id)
+static int hmc5843_i2c_probe(struct i2c_client *cli)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(cli);
 	struct regmap *regmap = devm_regmap_init_i2c(cli,
 			&hmc5843_i2c_regmap_config);
 	if (IS_ERR(regmap))
@@ -95,7 +95,7 @@ static struct i2c_driver hmc5843_driver = {
 		.of_match_table = hmc5843_of_match,
 	},
 	.id_table	= hmc5843_id,
-	.probe		= hmc5843_i2c_probe,
+	.probe_new	= hmc5843_i2c_probe,
 	.remove		= hmc5843_i2c_remove,
 };
 module_i2c_driver(hmc5843_driver);
-- 
GitLab


From cee51403c696aac364fd991bc3a95a90722678b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:11 +0100
Subject: [PATCH 2485/4122] iio: magnetometer: mag3110: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-158-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/mag3110.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/mag3110.c b/drivers/iio/magnetometer/mag3110.c
index b870ad8038626..661176a885ad7 100644
--- a/drivers/iio/magnetometer/mag3110.c
+++ b/drivers/iio/magnetometer/mag3110.c
@@ -469,9 +469,9 @@ static const struct iio_info mag3110_info = {
 
 static const unsigned long mag3110_scan_masks[] = {0x7, 0xf, 0};
 
-static int mag3110_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mag3110_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mag3110_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -641,7 +641,7 @@ static struct i2c_driver mag3110_driver = {
 		.of_match_table = mag3110_of_match,
 		.pm	= pm_sleep_ptr(&mag3110_pm_ops),
 	},
-	.probe = mag3110_probe,
+	.probe_new = mag3110_probe,
 	.remove = mag3110_remove,
 	.id_table = mag3110_id,
 };
-- 
GitLab


From e7a45a76351b6b5163af4a581ab46fdad137d62a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:12 +0100
Subject: [PATCH 2486/4122] iio: magnetometer: mmc35240: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-159-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/mmc35240.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/mmc35240.c b/drivers/iio/magnetometer/mmc35240.c
index 186edfcda0b70..756dadbad106f 100644
--- a/drivers/iio/magnetometer/mmc35240.c
+++ b/drivers/iio/magnetometer/mmc35240.c
@@ -481,8 +481,7 @@ static const struct regmap_config mmc35240_regmap_config = {
 	.num_reg_defaults = ARRAY_SIZE(mmc35240_reg_defaults),
 };
 
-static int mmc35240_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int mmc35240_probe(struct i2c_client *client)
 {
 	struct mmc35240_data *data;
 	struct iio_dev *indio_dev;
@@ -576,7 +575,7 @@ static struct i2c_driver mmc35240_driver = {
 		.pm = pm_sleep_ptr(&mmc35240_pm_ops),
 		.acpi_match_table = ACPI_PTR(mmc35240_acpi_match),
 	},
-	.probe		= mmc35240_probe,
+	.probe_new	= mmc35240_probe,
 	.id_table	= mmc35240_id,
 };
 
-- 
GitLab


From fcd969675290cd3cbe911b350502332be58f100b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:13 +0100
Subject: [PATCH 2487/4122] iio: magnetometer: yamaha-yas530: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-160-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/magnetometer/yamaha-yas530.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/magnetometer/yamaha-yas530.c b/drivers/iio/magnetometer/yamaha-yas530.c
index 801c760feb4d1..753717158b073 100644
--- a/drivers/iio/magnetometer/yamaha-yas530.c
+++ b/drivers/iio/magnetometer/yamaha-yas530.c
@@ -1384,9 +1384,9 @@ static const struct yas5xx_chip_info yas5xx_chip_info_tbl[] = {
 	},
 };
 
-static int yas5xx_probe(struct i2c_client *i2c,
-			const struct i2c_device_id *id)
+static int yas5xx_probe(struct i2c_client *i2c)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(i2c);
 	struct iio_dev *indio_dev;
 	struct device *dev = &i2c->dev;
 	struct yas5xx *yas5xx;
@@ -1605,7 +1605,7 @@ static struct i2c_driver yas5xx_driver = {
 		.of_match_table = yas5xx_of_match,
 		.pm = pm_ptr(&yas5xx_dev_pm_ops),
 	},
-	.probe	  = yas5xx_probe,
+	.probe_new = yas5xx_probe,
 	.remove	  = yas5xx_remove,
 	.id_table = yas5xx_id,
 };
-- 
GitLab


From b11df8374c0d0581474e0b8f74a7ba8f4a10be7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:14 +0100
Subject: [PATCH 2488/4122] iio: potentiometer: ad5272: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-161-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/potentiometer/ad5272.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/potentiometer/ad5272.c b/drivers/iio/potentiometer/ad5272.c
index ed5fc0b50fe97..aa140d6321016 100644
--- a/drivers/iio/potentiometer/ad5272.c
+++ b/drivers/iio/potentiometer/ad5272.c
@@ -158,9 +158,9 @@ static int ad5272_reset(struct ad5272_data *data)
 	return 0;
 }
 
-static int ad5272_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ad5272_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct ad5272_data *data;
@@ -218,7 +218,7 @@ static struct i2c_driver ad5272_driver = {
 		.name	= "ad5272",
 		.of_match_table = ad5272_dt_ids,
 	},
-	.probe		= ad5272_probe,
+	.probe_new	= ad5272_probe,
 	.id_table	= ad5272_id,
 };
 
-- 
GitLab


From 705f1ce22b967b8fd7fc3cb0de31830ec9eda3c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:15 +0100
Subject: [PATCH 2489/4122] iio: potentiometer: ds1803: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-162-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/potentiometer/ds1803.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/potentiometer/ds1803.c b/drivers/iio/potentiometer/ds1803.c
index 5c212ed7a9313..0b5e475807cbc 100644
--- a/drivers/iio/potentiometer/ds1803.c
+++ b/drivers/iio/potentiometer/ds1803.c
@@ -202,8 +202,9 @@ static const struct iio_info ds1803_info = {
 	.read_avail = ds1803_read_avail,
 };
 
-static int ds1803_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int ds1803_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct ds1803_data *data;
 	struct iio_dev *indio_dev;
@@ -251,7 +252,7 @@ static struct i2c_driver ds1803_driver = {
 		.name	= "ds1803",
 		.of_match_table = ds1803_dt_ids,
 	},
-	.probe		= ds1803_probe,
+	.probe_new	= ds1803_probe,
 	.id_table	= ds1803_id,
 };
 
-- 
GitLab


From e005024112dd3800e78d056facf958fc6451ba10 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:16 +0100
Subject: [PATCH 2490/4122] iio: potentiometer: max5432: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-163-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/potentiometer/max5432.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/potentiometer/max5432.c b/drivers/iio/potentiometer/max5432.c
index aed3b6ab82a21..94ef27ef3fb57 100644
--- a/drivers/iio/potentiometer/max5432.c
+++ b/drivers/iio/potentiometer/max5432.c
@@ -85,8 +85,7 @@ static const struct iio_info max5432_info = {
 	.write_raw = max5432_write_raw,
 };
 
-static int max5432_probe(struct i2c_client *client,
-		const struct i2c_device_id *id)
+static int max5432_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
@@ -124,7 +123,7 @@ static struct i2c_driver max5432_driver = {
 		.name = "max5432",
 		.of_match_table = max5432_dt_ids,
 	},
-	.probe = max5432_probe,
+	.probe_new = max5432_probe,
 };
 
 module_i2c_driver(max5432_driver);
-- 
GitLab


From 4c1142cc436842eefffcd800ca41f8327bf61f88 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:17 +0100
Subject: [PATCH 2491/4122] iio: potentiometer: tpl0102: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-164-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/potentiometer/tpl0102.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/potentiometer/tpl0102.c b/drivers/iio/potentiometer/tpl0102.c
index d996dc367fb77..a3465b413b0ca 100644
--- a/drivers/iio/potentiometer/tpl0102.c
+++ b/drivers/iio/potentiometer/tpl0102.c
@@ -120,9 +120,9 @@ static const struct iio_info tpl0102_info = {
 	.write_raw = tpl0102_write_raw,
 };
 
-static int tpl0102_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int tpl0102_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct tpl0102_data *data;
 	struct iio_dev *indio_dev;
@@ -161,7 +161,7 @@ static struct i2c_driver tpl0102_driver = {
 	.driver = {
 		.name = "tpl0102",
 	},
-	.probe = tpl0102_probe,
+	.probe_new = tpl0102_probe,
 	.id_table = tpl0102_id,
 };
 
-- 
GitLab


From 7d8b3e100abeeff40f54853ce82bc1d900cb2bf4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:18 +0100
Subject: [PATCH 2492/4122] iio: potentiostat: lmp91000: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-165-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/potentiostat/lmp91000.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/potentiostat/lmp91000.c b/drivers/iio/potentiostat/lmp91000.c
index 5ec7060d31d90..b82f093f1e6a4 100644
--- a/drivers/iio/potentiostat/lmp91000.c
+++ b/drivers/iio/potentiostat/lmp91000.c
@@ -292,8 +292,7 @@ static const struct iio_buffer_setup_ops lmp91000_buffer_setup_ops = {
 	.predisable = lmp91000_buffer_predisable,
 };
 
-static int lmp91000_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int lmp91000_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
 	struct lmp91000_data *data;
@@ -417,7 +416,7 @@ static struct i2c_driver lmp91000_driver = {
 		.name = LMP91000_DRV_NAME,
 		.of_match_table = lmp91000_of_match,
 	},
-	.probe = lmp91000_probe,
+	.probe_new = lmp91000_probe,
 	.remove = lmp91000_remove,
 	.id_table = lmp91000_id,
 };
-- 
GitLab


From 576306ab4ceb16a032b5f67aeb635f385390193c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:19 +0100
Subject: [PATCH 2493/4122] iio: pressure: abp060mg: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-166-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/abp060mg.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/abp060mg.c b/drivers/iio/pressure/abp060mg.c
index e1c3bdb371ee6..c0140779366af 100644
--- a/drivers/iio/pressure/abp060mg.c
+++ b/drivers/iio/pressure/abp060mg.c
@@ -174,9 +174,9 @@ static void abp060mg_init_device(struct iio_dev *indio_dev, unsigned long id)
 		state->offset -= ABP060MG_NUM_COUNTS >> 1;
 }
 
-static int abp060mg_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int abp060mg_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct abp_state *state;
 	unsigned long cfg_id = id->driver_data;
@@ -255,7 +255,7 @@ static struct i2c_driver abp060mg_driver = {
 	.driver = {
 		.name = "abp060mg",
 	},
-	.probe = abp060mg_probe,
+	.probe_new = abp060mg_probe,
 	.id_table = abp060mg_id_table,
 };
 module_i2c_driver(abp060mg_driver);
-- 
GitLab


From 046ae105f266aae218f76dc6321eebebeead6b94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:21 +0100
Subject: [PATCH 2494/4122] iio: pressure: dlhl60d: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-168-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/dlhl60d.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/dlhl60d.c b/drivers/iio/pressure/dlhl60d.c
index f0b0d198c6d47..43650b048d629 100644
--- a/drivers/iio/pressure/dlhl60d.c
+++ b/drivers/iio/pressure/dlhl60d.c
@@ -282,9 +282,9 @@ static irqreturn_t dlh_interrupt(int irq, void *private)
 	return IRQ_HANDLED;
 };
 
-static int dlh_probe(struct i2c_client *client,
-	const struct i2c_device_id *id)
+static int dlh_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct dlh_state *st;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -362,7 +362,7 @@ static struct i2c_driver dlh_driver = {
 		.name = "dlhl60d",
 		.of_match_table = dlh_of_match,
 	},
-	.probe = dlh_probe,
+	.probe_new = dlh_probe,
 	.id_table = dlh_id,
 };
 module_i2c_driver(dlh_driver);
-- 
GitLab


From 92a54a29cf39f8f18632f9af43aa5baf4ff805bb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:22 +0100
Subject: [PATCH 2495/4122] iio: pressure: dps310: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-169-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/dps310.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/dps310.c b/drivers/iio/pressure/dps310.c
index 984a3f511a1ae..2af275a24ff9e 100644
--- a/drivers/iio/pressure/dps310.c
+++ b/drivers/iio/pressure/dps310.c
@@ -827,9 +827,9 @@ static const struct iio_info dps310_info = {
 	.write_raw = dps310_write_raw,
 };
 
-static int dps310_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int dps310_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct dps310_data *data;
 	struct iio_dev *iio;
 	int rc;
@@ -887,7 +887,7 @@ static struct i2c_driver dps310_driver = {
 		.name = DPS310_DEV_NAME,
 		.acpi_match_table = dps310_acpi_match,
 	},
-	.probe = dps310_probe,
+	.probe_new = dps310_probe,
 	.id_table = dps310_id,
 };
 module_i2c_driver(dps310_driver);
-- 
GitLab


From 3d5f5d599f61a2ab99768471cf56c61b1751f98f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:23 +0100
Subject: [PATCH 2496/4122] iio: pressure: hp03: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-170-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/hp03.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/hp03.c b/drivers/iio/pressure/hp03.c
index 9538118c96489..bd1f71a99cfa3 100644
--- a/drivers/iio/pressure/hp03.c
+++ b/drivers/iio/pressure/hp03.c
@@ -208,9 +208,9 @@ static const struct iio_info hp03_info = {
 	.read_raw	= &hp03_read_raw,
 };
 
-static int hp03_probe(struct i2c_client *client,
-		      const struct i2c_device_id *id)
+static int hp03_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device *dev = &client->dev;
 	struct iio_dev *indio_dev;
 	struct hp03_priv *priv;
@@ -282,7 +282,7 @@ static struct i2c_driver hp03_driver = {
 		.name	= "hp03",
 		.of_match_table = hp03_of_match,
 	},
-	.probe		= hp03_probe,
+	.probe_new	= hp03_probe,
 	.id_table	= hp03_id,
 };
 module_i2c_driver(hp03_driver);
-- 
GitLab


From 240c69e09d3f77673653f56867aba2c69a66264d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:24 +0100
Subject: [PATCH 2497/4122] iio: pressure: hp206c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-171-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/hp206c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/hp206c.c b/drivers/iio/pressure/hp206c.c
index 986b7a59712e0..b6d2ff4643419 100644
--- a/drivers/iio/pressure/hp206c.c
+++ b/drivers/iio/pressure/hp206c.c
@@ -352,9 +352,9 @@ static const struct iio_info hp206c_info = {
 	.write_raw = hp206c_write_raw,
 };
 
-static int hp206c_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int hp206c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct hp206c_data *data;
 	int ret;
@@ -409,7 +409,7 @@ MODULE_DEVICE_TABLE(acpi, hp206c_acpi_match);
 #endif
 
 static struct i2c_driver hp206c_driver = {
-	.probe = hp206c_probe,
+	.probe_new = hp206c_probe,
 	.id_table = hp206c_id,
 	.driver = {
 		.name = "hp206c",
-- 
GitLab


From 7a5da8b28618426a81f53f496de3a83d47df2634 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:25 +0100
Subject: [PATCH 2498/4122] iio: pressure: icp10100: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
Link: https://lore.kernel.org/r/20221118224540.619276-172-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/icp10100.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/icp10100.c b/drivers/iio/pressure/icp10100.c
index b62f28585db5b..407cf25ea0e33 100644
--- a/drivers/iio/pressure/icp10100.c
+++ b/drivers/iio/pressure/icp10100.c
@@ -530,8 +530,7 @@ static void icp10100_pm_disable(void *data)
 	pm_runtime_disable(dev);
 }
 
-static int icp10100_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int icp10100_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct icp10100_state *st;
@@ -649,7 +648,7 @@ static struct i2c_driver icp10100_driver = {
 		.pm = pm_ptr(&icp10100_pm),
 		.of_match_table = icp10100_of_match,
 	},
-	.probe = icp10100_probe,
+	.probe_new = icp10100_probe,
 	.id_table = icp10100_id,
 };
 module_i2c_driver(icp10100_driver);
-- 
GitLab


From 8afce858762fc767dbd52d313adf058098332aaf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:26 +0100
Subject: [PATCH 2499/4122] iio: pressure: mpl115_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-173-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/mpl115_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/mpl115_i2c.c b/drivers/iio/pressure/mpl115_i2c.c
index 555bda1146fb9..ade4dd854ddf1 100644
--- a/drivers/iio/pressure/mpl115_i2c.c
+++ b/drivers/iio/pressure/mpl115_i2c.c
@@ -35,9 +35,9 @@ static const struct mpl115_ops mpl115_i2c_ops = {
 	.write = mpl115_i2c_write,
 };
 
-static int mpl115_i2c_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mpl115_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_WORD_DATA))
 		return -EOPNOTSUPP;
 
@@ -55,7 +55,7 @@ static struct i2c_driver mpl115_i2c_driver = {
 		.name	= "mpl115",
 		.pm = pm_ptr(&mpl115_dev_pm_ops),
 	},
-	.probe = mpl115_i2c_probe,
+	.probe_new = mpl115_i2c_probe,
 	.id_table = mpl115_i2c_id,
 };
 module_i2c_driver(mpl115_i2c_driver);
-- 
GitLab


From 0586ce78a70c2792c4dfd415d1a427ea447bebd4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:27 +0100
Subject: [PATCH 2500/4122] iio: pressure: mpl3115: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-174-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/mpl3115.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/mpl3115.c b/drivers/iio/pressure/mpl3115.c
index 2f22aba61e4dd..72e811a5c96e6 100644
--- a/drivers/iio/pressure/mpl3115.c
+++ b/drivers/iio/pressure/mpl3115.c
@@ -230,9 +230,9 @@ static const struct iio_info mpl3115_info = {
 	.read_raw = &mpl3115_read_raw,
 };
 
-static int mpl3115_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mpl3115_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mpl3115_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -335,7 +335,7 @@ static struct i2c_driver mpl3115_driver = {
 		.of_match_table = mpl3115_of_match,
 		.pm	= pm_sleep_ptr(&mpl3115_pm_ops),
 	},
-	.probe = mpl3115_probe,
+	.probe_new = mpl3115_probe,
 	.remove = mpl3115_remove,
 	.id_table = mpl3115_id,
 };
-- 
GitLab


From eba7dcb65337dc2b081508317e10d0f1187121a9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:28 +0100
Subject: [PATCH 2501/4122] iio: pressure: ms5611_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-175-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/ms5611_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/ms5611_i2c.c b/drivers/iio/pressure/ms5611_i2c.c
index b681a41839098..caf882497656d 100644
--- a/drivers/iio/pressure/ms5611_i2c.c
+++ b/drivers/iio/pressure/ms5611_i2c.c
@@ -79,9 +79,9 @@ static int ms5611_i2c_read_adc_temp_and_pressure(struct ms5611_state *st,
 	return ms5611_i2c_read_adc(st, pressure);
 }
 
-static int ms5611_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int ms5611_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ms5611_state *st;
 	struct iio_dev *indio_dev;
 
@@ -130,7 +130,7 @@ static struct i2c_driver ms5611_driver = {
 		.of_match_table = ms5611_i2c_matches,
 	},
 	.id_table = ms5611_id,
-	.probe = ms5611_i2c_probe,
+	.probe_new = ms5611_i2c_probe,
 	.remove = ms5611_i2c_remove,
 };
 module_i2c_driver(ms5611_driver);
-- 
GitLab


From f80ccc71c3f38d55662a6cfdec5a9e0907f6dfb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:29 +0100
Subject: [PATCH 2502/4122] iio: pressure: ms5637: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-176-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/ms5637.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/ms5637.c b/drivers/iio/pressure/ms5637.c
index 70c70019142ae..c4981b29dccb1 100644
--- a/drivers/iio/pressure/ms5637.c
+++ b/drivers/iio/pressure/ms5637.c
@@ -142,9 +142,9 @@ static const struct iio_info ms5637_info = {
 	.attrs = &ms5637_attribute_group,
 };
 
-static int ms5637_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ms5637_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	const struct ms_tp_data *data;
 	struct ms_tp_dev *dev_data;
 	struct iio_dev *indio_dev;
@@ -238,7 +238,7 @@ static const struct of_device_id ms5637_of_match[] = {
 MODULE_DEVICE_TABLE(of, ms5637_of_match);
 
 static struct i2c_driver ms5637_driver = {
-	.probe = ms5637_probe,
+	.probe_new = ms5637_probe,
 	.id_table = ms5637_id,
 	.driver = {
 		   .name = "ms5637",
-- 
GitLab


From a52833a526b5e38be1aaf3e8f2e74a21bc679cb3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:30 +0100
Subject: [PATCH 2503/4122] iio: pressure: st_pressure_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-177-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/st_pressure_i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/st_pressure_i2c.c b/drivers/iio/pressure/st_pressure_i2c.c
index 58fede8618911..f2c3bb568d16b 100644
--- a/drivers/iio/pressure/st_pressure_i2c.c
+++ b/drivers/iio/pressure/st_pressure_i2c.c
@@ -76,8 +76,7 @@ static const struct i2c_device_id st_press_id_table[] = {
 };
 MODULE_DEVICE_TABLE(i2c, st_press_id_table);
 
-static int st_press_i2c_probe(struct i2c_client *client,
-			      const struct i2c_device_id *id)
+static int st_press_i2c_probe(struct i2c_client *client)
 {
 	const struct st_sensor_settings *settings;
 	struct st_sensor_data *press_data;
@@ -117,7 +116,7 @@ static struct i2c_driver st_press_driver = {
 		.of_match_table = st_press_of_match,
 		.acpi_match_table = ACPI_PTR(st_press_acpi_match),
 	},
-	.probe = st_press_i2c_probe,
+	.probe_new = st_press_i2c_probe,
 	.id_table = st_press_id_table,
 };
 module_i2c_driver(st_press_driver);
-- 
GitLab


From aa9b3321735d42d3a2a2e58ce25be5586396c8a8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:31 +0100
Subject: [PATCH 2504/4122] iio: pressure: t5403: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-178-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/t5403.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/t5403.c b/drivers/iio/pressure/t5403.c
index 685fcf65334fc..2fbf14aff0339 100644
--- a/drivers/iio/pressure/t5403.c
+++ b/drivers/iio/pressure/t5403.c
@@ -208,9 +208,9 @@ static const struct iio_info t5403_info = {
 	.attrs = &t5403_attribute_group,
 };
 
-static int t5403_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int t5403_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct t5403_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -260,7 +260,7 @@ static struct i2c_driver t5403_driver = {
 	.driver = {
 		.name	= "t5403",
 	},
-	.probe = t5403_probe,
+	.probe_new = t5403_probe,
 	.id_table = t5403_id,
 };
 module_i2c_driver(t5403_driver);
-- 
GitLab


From 1bee48f48a69013cdd5ed5fdbff1e0410a18a381 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:32 +0100
Subject: [PATCH 2505/4122] iio: pressure: zpa2326_i2c: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-179-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/pressure/zpa2326_i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/pressure/zpa2326_i2c.c b/drivers/iio/pressure/zpa2326_i2c.c
index f26dd8cbb387c..ade465014be1e 100644
--- a/drivers/iio/pressure/zpa2326_i2c.c
+++ b/drivers/iio/pressure/zpa2326_i2c.c
@@ -38,9 +38,9 @@ static unsigned int zpa2326_i2c_hwid(const struct i2c_client *client)
 		(ZPA2326_SA0(client->addr) << ZPA2326_DEVICE_ID_SA0_SHIFT));
 }
 
-static int zpa2326_probe_i2c(struct i2c_client          *client,
-			     const struct i2c_device_id *i2c_id)
+static int zpa2326_probe_i2c(struct i2c_client          *client)
 {
+	const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client);
 	struct regmap *regmap;
 
 	regmap = devm_regmap_init_i2c(client, &zpa2326_regmap_i2c_config);
@@ -76,7 +76,7 @@ static struct i2c_driver zpa2326_i2c_driver = {
 		.of_match_table = zpa2326_i2c_matches,
 		.pm             = ZPA2326_PM_OPS,
 	},
-	.probe    = zpa2326_probe_i2c,
+	.probe_new = zpa2326_probe_i2c,
 	.remove   = zpa2326_remove_i2c,
 	.id_table = zpa2326_i2c_ids,
 };
-- 
GitLab


From 9d6f774dd4ff150ce7a5bb2bf64918b3b72cda11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:33 +0100
Subject: [PATCH 2506/4122] iio: proximity: isl29501: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-180-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/isl29501.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/isl29501.c b/drivers/iio/proximity/isl29501.c
index 5b6ea783795d9..7b8f40b7ccf33 100644
--- a/drivers/iio/proximity/isl29501.c
+++ b/drivers/iio/proximity/isl29501.c
@@ -949,8 +949,7 @@ static irqreturn_t isl29501_trigger_handler(int irq, void *p)
 	return IRQ_HANDLED;
 }
 
-static int isl29501_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int isl29501_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct isl29501_private *isl29501;
@@ -1009,7 +1008,7 @@ static struct i2c_driver isl29501_driver = {
 		.name	= "isl29501",
 	},
 	.id_table	= isl29501_id,
-	.probe		= isl29501_probe,
+	.probe_new	= isl29501_probe,
 };
 module_i2c_driver(isl29501_driver);
 
-- 
GitLab


From 5c5c482e02bcd09bf113853b7520ad121dba081b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:34 +0100
Subject: [PATCH 2507/4122] iio: proximity: mb1232: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-181-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/mb1232.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/mb1232.c b/drivers/iio/proximity/mb1232.c
index 0bca5f74de68a..e70cac8240afc 100644
--- a/drivers/iio/proximity/mb1232.c
+++ b/drivers/iio/proximity/mb1232.c
@@ -180,9 +180,9 @@ static const struct iio_info mb1232_info = {
 	.read_raw = mb1232_read_raw,
 };
 
-static int mb1232_probe(struct i2c_client *client,
-					 const struct i2c_device_id *id)
+static int mb1232_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct mb1232_data *data;
 	int ret;
@@ -264,7 +264,7 @@ static struct i2c_driver mb1232_driver = {
 		.name	= "maxbotix-mb1232",
 		.of_match_table	= of_mb1232_match,
 	},
-	.probe = mb1232_probe,
+	.probe_new = mb1232_probe,
 	.id_table = mb1232_id,
 };
 module_i2c_driver(mb1232_driver);
-- 
GitLab


From 94c80ea7677c9d1886673a4f6764e19c5309b11c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:35 +0100
Subject: [PATCH 2508/4122] iio: proximity: pulsedlight-lidar-lite-v2: Convert
 to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-182-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/pulsedlight-lidar-lite-v2.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
index 791a33d5286c6..c9eead01a0314 100644
--- a/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
+++ b/drivers/iio/proximity/pulsedlight-lidar-lite-v2.c
@@ -253,8 +253,7 @@ static const struct iio_info lidar_info = {
 	.read_raw = lidar_read_raw,
 };
 
-static int lidar_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int lidar_probe(struct i2c_client *client)
 {
 	struct lidar_data *data;
 	struct iio_dev *indio_dev;
@@ -366,7 +365,7 @@ static struct i2c_driver lidar_driver = {
 		.of_match_table	= lidar_dt_ids,
 		.pm	= pm_ptr(&lidar_pm_ops),
 	},
-	.probe		= lidar_probe,
+	.probe_new	= lidar_probe,
 	.remove		= lidar_remove,
 	.id_table	= lidar_id,
 };
-- 
GitLab


From 576eb2b091084425eea6edd36d4407d139387d6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:36 +0100
Subject: [PATCH 2509/4122] iio: proximity: rfd77402: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-183-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/rfd77402.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/rfd77402.c b/drivers/iio/proximity/rfd77402.c
index cb80b3c9d0735..44f72b78bd506 100644
--- a/drivers/iio/proximity/rfd77402.c
+++ b/drivers/iio/proximity/rfd77402.c
@@ -257,8 +257,7 @@ static void rfd77402_disable(void *client)
 	rfd77402_powerdown(client);
 }
 
-static int rfd77402_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int rfd77402_probe(struct i2c_client *client)
 {
 	struct rfd77402_data *data;
 	struct iio_dev *indio_dev;
@@ -319,7 +318,7 @@ static struct i2c_driver rfd77402_driver = {
 		.name   = RFD77402_DRV_NAME,
 		.pm     = pm_sleep_ptr(&rfd77402_pm_ops),
 	},
-	.probe  = rfd77402_probe,
+	.probe_new = rfd77402_probe,
 	.id_table = rfd77402_id,
 };
 
-- 
GitLab


From 038b0fac0abe2f5d9beee2511434f231a58f41fd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:37 +0100
Subject: [PATCH 2510/4122] iio: proximity: srf08: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-184-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/srf08.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/srf08.c b/drivers/iio/proximity/srf08.c
index 7ed11339c31e1..61866d0440f76 100644
--- a/drivers/iio/proximity/srf08.c
+++ b/drivers/iio/proximity/srf08.c
@@ -443,9 +443,9 @@ static const struct iio_info srf02_info = {
 	.read_raw = srf08_read_raw,
 };
 
-static int srf08_probe(struct i2c_client *client,
-					 const struct i2c_device_id *id)
+static int srf08_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct srf08_data *data;
 	int ret;
@@ -549,7 +549,7 @@ static struct i2c_driver srf08_driver = {
 		.name	= "srf08",
 		.of_match_table	= of_srf08_match,
 	},
-	.probe = srf08_probe,
+	.probe_new = srf08_probe,
 	.id_table = srf08_id,
 };
 module_i2c_driver(srf08_driver);
-- 
GitLab


From 7660d32e6f9608b5bea7d066c74d63fc7666c510 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:38 +0100
Subject: [PATCH 2511/4122] iio: proximity: sx9500: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-185-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/proximity/sx9500.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/proximity/sx9500.c b/drivers/iio/proximity/sx9500.c
index d4670864ddc7e..8794e75e5bf9c 100644
--- a/drivers/iio/proximity/sx9500.c
+++ b/drivers/iio/proximity/sx9500.c
@@ -901,8 +901,7 @@ static void sx9500_gpio_probe(struct i2c_client *client,
 	}
 }
 
-static int sx9500_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int sx9500_probe(struct i2c_client *client)
 {
 	int ret;
 	struct iio_dev *indio_dev;
@@ -1056,7 +1055,7 @@ static struct i2c_driver sx9500_driver = {
 		.of_match_table = of_match_ptr(sx9500_of_match),
 		.pm = pm_sleep_ptr(&sx9500_pm_ops),
 	},
-	.probe		= sx9500_probe,
+	.probe_new	= sx9500_probe,
 	.remove		= sx9500_remove,
 	.id_table	= sx9500_id,
 };
-- 
GitLab


From 160c7140bb25a1921c4f3845bb15423721bcc0fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:39 +0100
Subject: [PATCH 2512/4122] iio: temperature: mlx90614: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Crt Mori<cmo@melexis.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-186-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90614.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/mlx90614.c b/drivers/iio/temperature/mlx90614.c
index 8eb0f962ed25c..909fadb623491 100644
--- a/drivers/iio/temperature/mlx90614.c
+++ b/drivers/iio/temperature/mlx90614.c
@@ -537,9 +537,9 @@ static int mlx90614_probe_num_ir_sensors(struct i2c_client *client)
 	return (ret & MLX90614_CONFIG_DUAL_MASK) ? 1 : 0;
 }
 
-static int mlx90614_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int mlx90614_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct iio_dev *indio_dev;
 	struct mlx90614_data *data;
 	int ret;
@@ -675,7 +675,7 @@ static struct i2c_driver mlx90614_driver = {
 		.of_match_table = mlx90614_of_match,
 		.pm	= pm_ptr(&mlx90614_pm_ops),
 	},
-	.probe = mlx90614_probe,
+	.probe_new = mlx90614_probe,
 	.remove = mlx90614_remove,
 	.id_table = mlx90614_id,
 };
-- 
GitLab


From 40fbb59b9c132e401b7487ab2792e6dcd3f14809 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:40 +0100
Subject: [PATCH 2513/4122] iio: temperature: mlx90632: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Crt Mori <cmo@melexis.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-187-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/mlx90632.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/mlx90632.c b/drivers/iio/temperature/mlx90632.c
index f1f5ebc145b14..753b7a4ccfddc 100644
--- a/drivers/iio/temperature/mlx90632.c
+++ b/drivers/iio/temperature/mlx90632.c
@@ -1168,9 +1168,9 @@ static int mlx90632_enable_regulator(struct mlx90632_data *data)
 	return ret;
 }
 
-static int mlx90632_probe(struct i2c_client *client,
-			  const struct i2c_device_id *id)
+static int mlx90632_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct mlx90632_data *mlx90632;
 	struct iio_dev *indio_dev;
 	struct regmap *regmap;
@@ -1337,7 +1337,7 @@ static struct i2c_driver mlx90632_driver = {
 		.of_match_table = mlx90632_of_match,
 		.pm	= pm_ptr(&mlx90632_pm_ops),
 	},
-	.probe = mlx90632_probe,
+	.probe_new = mlx90632_probe,
 	.id_table = mlx90632_id,
 };
 module_i2c_driver(mlx90632_driver);
-- 
GitLab


From 89824f4cb65e7a7174cb24e4c0835e4c37f74a21 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:41 +0100
Subject: [PATCH 2514/4122] iio: temperature: tmp006: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-188-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/tmp006.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/tmp006.c b/drivers/iio/temperature/tmp006.c
index 706a760f30b47..cdf08477e63f7 100644
--- a/drivers/iio/temperature/tmp006.c
+++ b/drivers/iio/temperature/tmp006.c
@@ -212,8 +212,7 @@ static void tmp006_powerdown_cleanup(void *dev)
 	tmp006_power(dev, false);
 }
 
-static int tmp006_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int tmp006_probe(struct i2c_client *client)
 {
 	struct iio_dev *indio_dev;
 	struct tmp006_data *data;
@@ -284,7 +283,7 @@ static struct i2c_driver tmp006_driver = {
 		.name	= "tmp006",
 		.pm	= pm_sleep_ptr(&tmp006_pm_ops),
 	},
-	.probe = tmp006_probe,
+	.probe_new = tmp006_probe,
 	.id_table = tmp006_id,
 };
 module_i2c_driver(tmp006_driver);
-- 
GitLab


From f1e7d53ddfc309149521a2812c3ffcd54eb67b04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:42 +0100
Subject: [PATCH 2515/4122] iio: temperature: tmp007: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-189-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/tmp007.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/tmp007.c b/drivers/iio/temperature/tmp007.c
index f3420d8a0e359..8d27aa3bdd6da 100644
--- a/drivers/iio/temperature/tmp007.c
+++ b/drivers/iio/temperature/tmp007.c
@@ -446,9 +446,9 @@ static void tmp007_powerdown_action_cb(void *priv)
 	tmp007_powerdown(data);
 }
 
-static int tmp007_probe(struct i2c_client *client,
-			const struct i2c_device_id *tmp007_id)
+static int tmp007_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *tmp007_id = i2c_client_get_device_id(client);
 	struct tmp007_data *data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -574,7 +574,7 @@ static struct i2c_driver tmp007_driver = {
 		.of_match_table = tmp007_of_match,
 		.pm	= pm_sleep_ptr(&tmp007_pm_ops),
 	},
-	.probe		= tmp007_probe,
+	.probe_new	= tmp007_probe,
 	.id_table	= tmp007_id,
 };
 module_i2c_driver(tmp007_driver);
-- 
GitLab


From d7c9422850aad0e57316f74c2f9cdfc18be39459 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:43 +0100
Subject: [PATCH 2516/4122] iio: temperature: tsys01: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-190-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/tsys01.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/tsys01.c b/drivers/iio/temperature/tsys01.c
index 60d58ec5b0636..30b268ba82ccf 100644
--- a/drivers/iio/temperature/tsys01.c
+++ b/drivers/iio/temperature/tsys01.c
@@ -176,8 +176,7 @@ static int tsys01_probe(struct iio_dev *indio_dev, struct device *dev)
 	return devm_iio_device_register(dev, indio_dev);
 }
 
-static int tsys01_i2c_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int tsys01_i2c_probe(struct i2c_client *client)
 {
 	struct tsys01_dev *dev_data;
 	struct iio_dev *indio_dev;
@@ -219,7 +218,7 @@ static const struct of_device_id tsys01_of_match[] = {
 MODULE_DEVICE_TABLE(of, tsys01_of_match);
 
 static struct i2c_driver tsys01_driver = {
-	.probe = tsys01_i2c_probe,
+	.probe_new = tsys01_i2c_probe,
 	.id_table = tsys01_id,
 	.driver = {
 		   .name = "tsys01",
-- 
GitLab


From 6a9113195fff8e48d8898cdab82c96bb77780228 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:38:44 +0100
Subject: [PATCH 2517/4122] iio: temperature: tsys02d: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221118224540.619276-191-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/temperature/tsys02d.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/temperature/tsys02d.c b/drivers/iio/temperature/tsys02d.c
index 49c275e4f5106..cdefe046ab17d 100644
--- a/drivers/iio/temperature/tsys02d.c
+++ b/drivers/iio/temperature/tsys02d.c
@@ -121,9 +121,9 @@ static const struct iio_info tsys02d_info = {
 	.attrs = &tsys02d_attribute_group,
 };
 
-static int tsys02d_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int tsys02d_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct ms_ht_dev *dev_data;
 	struct iio_dev *indio_dev;
 	int ret;
@@ -174,7 +174,7 @@ static const struct i2c_device_id tsys02d_id[] = {
 MODULE_DEVICE_TABLE(i2c, tsys02d_id);
 
 static struct i2c_driver tsys02d_driver = {
-	.probe = tsys02d_probe,
+	.probe_new = tsys02d_probe,
 	.id_table = tsys02d_id,
 	.driver = {
 		   .name = "tsys02d",
-- 
GitLab


From 8282ef72a6e5e075f7f2f2f905b2a1adcc0bdcc9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:00 +0100
Subject: [PATCH 2518/4122] staging: iio: adt7316: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-567-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/addac/adt7316-i2c.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/iio/addac/adt7316-i2c.c b/drivers/staging/iio/addac/adt7316-i2c.c
index 5543cc9097070..7e3d1a6f30ba9 100644
--- a/drivers/staging/iio/addac/adt7316-i2c.c
+++ b/drivers/staging/iio/addac/adt7316-i2c.c
@@ -93,9 +93,9 @@ static int adt7316_i2c_multi_write(void *client, u8 reg, u8 count, u8 *data)
  * device probe and remove
  */
 
-static int adt7316_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int adt7316_i2c_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct adt7316_bus bus = {
 		.client = client,
 		.irq = client->irq,
@@ -138,7 +138,7 @@ static struct i2c_driver adt7316_driver = {
 		.of_match_table = adt7316_of_match,
 		.pm = ADT7316_PM_OPS,
 	},
-	.probe = adt7316_i2c_probe,
+	.probe_new = adt7316_i2c_probe,
 	.id_table = adt7316_i2c_id,
 };
 module_i2c_driver(adt7316_driver);
-- 
GitLab


From 67ab4155b2391b7901c83c69a6dec97a4e0f709d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:01 +0100
Subject: [PATCH 2519/4122] staging: iio: ad5933: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-568-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/impedance-analyzer/ad5933.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/iio/impedance-analyzer/ad5933.c b/drivers/staging/iio/impedance-analyzer/ad5933.c
index f177b20f0f2d9..b3152f7153fbb 100644
--- a/drivers/staging/iio/impedance-analyzer/ad5933.c
+++ b/drivers/staging/iio/impedance-analyzer/ad5933.c
@@ -674,9 +674,9 @@ static void ad5933_clk_disable(void *data)
 	clk_disable_unprepare(st->mclk);
 }
 
-static int ad5933_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int ad5933_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	int ret;
 	struct ad5933_state *st;
 	struct iio_dev *indio_dev;
@@ -781,7 +781,7 @@ static struct i2c_driver ad5933_driver = {
 		.name = "ad5933",
 		.of_match_table = ad5933_of_match,
 	},
-	.probe = ad5933_probe,
+	.probe_new = ad5933_probe,
 	.id_table = ad5933_id,
 };
 module_i2c_driver(ad5933_driver);
-- 
GitLab


From 9fe1614f96af91ffd876da529cf9d667992e73a6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:02 +0100
Subject: [PATCH 2520/4122] staging: iio: ade7854: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221118224540.619276-569-uwe@kleine-koenig.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/meter/ade7854-i2c.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/iio/meter/ade7854-i2c.c b/drivers/staging/iio/meter/ade7854-i2c.c
index 71b67dd3c8e98..572d714eb0ddb 100644
--- a/drivers/staging/iio/meter/ade7854-i2c.c
+++ b/drivers/staging/iio/meter/ade7854-i2c.c
@@ -112,8 +112,7 @@ unlock:
 	return ret;
 }
 
-static int ade7854_i2c_probe(struct i2c_client *client,
-			     const struct i2c_device_id *id)
+static int ade7854_i2c_probe(struct i2c_client *client)
 {
 	struct ade7854_state *st;
 	struct iio_dev *indio_dev;
@@ -144,7 +143,7 @@ static struct i2c_driver ade7854_i2c_driver = {
 	.driver = {
 		.name = "ade7854",
 	},
-	.probe    = ade7854_i2c_probe,
+	.probe_new = ade7854_i2c_probe,
 	.id_table = ade7854_id,
 };
 module_i2c_driver(ade7854_i2c_driver);
-- 
GitLab


From 9ee95ae4cffd04e0773f16118b343104bab80634 Mon Sep 17 00:00:00 2001
From: Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
Date: Tue, 22 Nov 2022 23:23:00 +0530
Subject: [PATCH 2521/4122] iio: temperature: Add driver support for Maxim
 MAX30208
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Maxim MAX30208 is a digital temperature sensor with 0.1°C accuracy.

Add support for max30208 driver in iio subsystem.

Datasheet: https://datasheets.maximintegrated.com/en/ds/MAX30208.pdf
Signed-off-by: Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
Link: https://lore.kernel.org/r/20221122175300.800956-1-rajat.khandelwal@linux.intel.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 MAINTAINERS                        |   6 +
 drivers/iio/temperature/Kconfig    |  10 ++
 drivers/iio/temperature/Makefile   |   1 +
 drivers/iio/temperature/max30208.c | 252 +++++++++++++++++++++++++++++
 4 files changed, 269 insertions(+)
 create mode 100644 drivers/iio/temperature/max30208.c

diff --git a/MAINTAINERS b/MAINTAINERS
index be373dfae4dd4..f1e1ac23a1c66 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12518,6 +12518,12 @@ S:	Maintained
 F:	Documentation/devicetree/bindings/regulator/maxim,max20086.yaml
 F:	drivers/regulator/max20086-regulator.c
 
+MAXIM MAX30208 TEMPERATURE SENSOR DRIVER
+M:	Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
+L:	linux-iio@vger.kernel.org
+S:	Maintained
+F:	drivers/iio/temperature/max30208.c
+
 MAXIM MAX77650 PMIC MFD DRIVER
 M:	Bartosz Golaszewski <brgl@bgdev.pl>
 L:	linux-kernel@vger.kernel.org
diff --git a/drivers/iio/temperature/Kconfig b/drivers/iio/temperature/Kconfig
index e8ed849e3b766..ed384f33e0c76 100644
--- a/drivers/iio/temperature/Kconfig
+++ b/drivers/iio/temperature/Kconfig
@@ -128,6 +128,16 @@ config TSYS02D
 	  This driver can also be built as a module. If so, the module will
 	  be called tsys02d.
 
+config MAX30208
+	tristate "Maxim MAX30208 digital temperature sensor"
+	depends on I2C
+	help
+	  If you say yes here you get support for Maxim MAX30208
+	  digital temperature sensor connected via I2C.
+
+	  This driver can also be built as a module. If so, the module
+	  will be called max30208.
+
 config MAX31856
 	tristate "MAX31856 thermocouple sensor"
 	depends on SPI
diff --git a/drivers/iio/temperature/Makefile b/drivers/iio/temperature/Makefile
index dd08e562ffe0e..dfec8c6d30193 100644
--- a/drivers/iio/temperature/Makefile
+++ b/drivers/iio/temperature/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_IQS620AT_TEMP) += iqs620at-temp.o
 obj-$(CONFIG_LTC2983) += ltc2983.o
 obj-$(CONFIG_HID_SENSOR_TEMP) += hid-sensor-temperature.o
 obj-$(CONFIG_MAXIM_THERMOCOUPLE) += maxim_thermocouple.o
+obj-$(CONFIG_MAX30208) += max30208.o
 obj-$(CONFIG_MAX31856) += max31856.o
 obj-$(CONFIG_MAX31865) += max31865.o
 obj-$(CONFIG_MLX90614) += mlx90614.o
diff --git a/drivers/iio/temperature/max30208.c b/drivers/iio/temperature/max30208.c
new file mode 100644
index 0000000000000..c85c21474711f
--- /dev/null
+++ b/drivers/iio/temperature/max30208.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (c) Rajat Khandelwal <rajat.khandelwal@linux.intel.com>
+ *
+ * Maxim MAX30208 digital temperature sensor with 0.1°C accuracy
+ * (7-bit I2C slave address (0x50 - 0x53))
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/iio/iio.h>
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/types.h>
+
+#define MAX30208_STATUS			0x00
+#define MAX30208_STATUS_TEMP_RDY	BIT(0)
+#define MAX30208_INT_ENABLE		0x01
+#define MAX30208_INT_ENABLE_TEMP_RDY	BIT(0)
+
+#define MAX30208_FIFO_OVF_CNTR		0x06
+#define MAX30208_FIFO_DATA_CNTR		0x07
+#define MAX30208_FIFO_DATA		0x08
+
+#define MAX30208_FIFO_CONFIG		0x0a
+#define MAX30208_FIFO_CONFIG_RO		BIT(1)
+
+#define MAX30208_SYSTEM_CTRL		0x0c
+#define MAX30208_SYSTEM_CTRL_RESET	0x01
+
+#define MAX30208_TEMP_SENSOR_SETUP	0x14
+#define MAX30208_TEMP_SENSOR_SETUP_CONV	BIT(0)
+
+struct max30208_data {
+	struct i2c_client *client;
+	struct iio_dev *indio_dev;
+	struct mutex lock; /* Lock to prevent concurrent reads of temperature readings */
+};
+
+static const struct iio_chan_spec max30208_channels[] = {
+	{
+		.type = IIO_TEMP,
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW) | BIT(IIO_CHAN_INFO_SCALE),
+	},
+};
+
+/**
+ * max30208_request() - Request a reading
+ * @data: Struct comprising member elements of the device
+ *
+ * Requests a reading from the device and waits until the conversion is ready.
+ */
+static int max30208_request(struct max30208_data *data)
+{
+	/*
+	 * Sensor can take up to 500 ms to respond so execute a total of
+	 * 10 retries to give the device sufficient time.
+	 */
+	int retries = 10;
+	u8 regval;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, MAX30208_TEMP_SENSOR_SETUP);
+	if (ret < 0)
+		return ret;
+
+	regval = ret | MAX30208_TEMP_SENSOR_SETUP_CONV;
+
+	ret = i2c_smbus_write_byte_data(data->client, MAX30208_TEMP_SENSOR_SETUP, regval);
+	if (ret)
+		return ret;
+
+	while (retries--) {
+		ret = i2c_smbus_read_byte_data(data->client, MAX30208_STATUS);
+		if (ret < 0)
+			return ret;
+
+		if (ret & MAX30208_STATUS_TEMP_RDY)
+			return 0;
+
+		msleep(50);
+	}
+	dev_err(&data->client->dev, "Temperature conversion failed\n");
+
+	return -ETIMEDOUT;
+}
+
+static int max30208_update_temp(struct max30208_data *data)
+{
+	u8 data_count;
+	int ret;
+
+	mutex_lock(&data->lock);
+
+	ret = max30208_request(data);
+	if (ret)
+		goto unlock;
+
+	ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_OVF_CNTR);
+	if (ret < 0)
+		goto unlock;
+	else if (!ret) {
+		ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_DATA_CNTR);
+		if (ret < 0)
+			goto unlock;
+
+		data_count = ret;
+	} else
+		data_count = 1;
+
+	while (data_count) {
+		ret = i2c_smbus_read_word_swapped(data->client, MAX30208_FIFO_DATA);
+		if (ret < 0)
+			goto unlock;
+
+		data_count--;
+	}
+
+unlock:
+	mutex_unlock(&data->lock);
+	return ret;
+}
+
+/**
+ * max30208_config_setup() - Set up FIFO configuration register
+ * @data: Struct comprising member elements of the device
+ *
+ * Sets the rollover bit to '1' to enable overwriting FIFO during overflow.
+ */
+static int max30208_config_setup(struct max30208_data *data)
+{
+	u8 regval;
+	int ret;
+
+	ret = i2c_smbus_read_byte_data(data->client, MAX30208_FIFO_CONFIG);
+	if (ret < 0)
+		return ret;
+
+	regval = ret | MAX30208_FIFO_CONFIG_RO;
+
+	ret = i2c_smbus_write_byte_data(data->client, MAX30208_FIFO_CONFIG, regval);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static int max30208_read(struct iio_dev *indio_dev,
+			 struct iio_chan_spec const *chan,
+			 int *val, int *val2, long mask)
+{
+	struct max30208_data *data = iio_priv(indio_dev);
+	int ret;
+
+	switch (mask) {
+	case IIO_CHAN_INFO_RAW:
+		ret = max30208_update_temp(data);
+		if (ret < 0)
+			return ret;
+
+		*val = sign_extend32(ret, 15);
+		return IIO_VAL_INT;
+
+	case IIO_CHAN_INFO_SCALE:
+		*val = 5;
+		return IIO_VAL_INT;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+static const struct iio_info max30208_info = {
+	.read_raw = max30208_read,
+};
+
+static int max30208_probe(struct i2c_client *i2c)
+{
+	struct device *dev = &i2c->dev;
+	struct max30208_data *data;
+	struct iio_dev *indio_dev;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*data));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	data = iio_priv(indio_dev);
+	data->client = i2c;
+	mutex_init(&data->lock);
+
+	indio_dev->name = "max30208";
+	indio_dev->channels = max30208_channels;
+	indio_dev->num_channels = ARRAY_SIZE(max30208_channels);
+	indio_dev->info = &max30208_info;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+
+	ret = i2c_smbus_write_byte_data(data->client, MAX30208_SYSTEM_CTRL,
+					MAX30208_SYSTEM_CTRL_RESET);
+	if (ret) {
+		dev_err(dev, "Failure in performing reset\n");
+		return ret;
+	}
+
+	msleep(50);
+
+	ret = max30208_config_setup(data);
+	if (ret)
+		return ret;
+
+	ret = devm_iio_device_register(dev, indio_dev);
+	if (ret) {
+		dev_err(dev, "Failed to register IIO device\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static const struct i2c_device_id max30208_id_table[] = {
+	{ "max30208" },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, max30208_id_table);
+
+static const struct acpi_device_id max30208_acpi_match[] = {
+	{ "MAX30208" },
+	{ }
+};
+MODULE_DEVICE_TABLE(acpi, max30208_acpi_match);
+
+static const struct of_device_id max30208_of_match[] = {
+	{ .compatible = "maxim,max30208" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, max30208_of_match);
+
+static struct i2c_driver max30208_driver = {
+	.driver = {
+		.name = "max30208",
+		.of_match_table = max30208_of_match,
+		.acpi_match_table = max30208_acpi_match,
+	},
+	.probe_new = max30208_probe,
+	.id_table = max30208_id_table,
+};
+module_i2c_driver(max30208_driver);
+
+MODULE_AUTHOR("Rajat Khandelwal <rajat.khandelwal@linux.intel.com>");
+MODULE_DESCRIPTION("Maxim MAX30208 digital temperature sensor");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 1140f96cd4af9bcf0065b0295c4486e1dba8426e Mon Sep 17 00:00:00 2001
From: Edmund Berenson <edmund.berenson@emlix.com>
Date: Tue, 22 Nov 2022 17:14:37 +0100
Subject: [PATCH 2522/4122] dt-bindings: iio: adc: ad7923: adjust documentation

- The ad7927 is fully compatible with ad7928 driver, add
documentation for device.
- ad7923 and ad7924 are treated the same in the driver, show
the relationship in the documentation.

Suggested-by: Lukasz Zemla <Lukasz.Zemla@woodward.com>
Signed-off-by: Edmund Berenson <edmund.berenson@emlix.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221122161437.18937-1-edmund.berenson@emlix.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/adc/adi,ad7923.yaml          | 26 ++++++++++++-------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml
index 07f9d1c09c7da..85148338c597c 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad7923.yaml
@@ -11,7 +11,7 @@ maintainers:
 
 description: |
   Analog Devices AD7904, AD7914, AD7923, AD7924 4 Channel ADCs, and AD7908,
-   AD7918, AD7928 8 Channels ADCs.
+   AD7918, AD7927, AD7928 8 Channels ADCs.
 
   Specifications about the part can be found at:
     https://www.analog.com/media/en/technical-documentation/data-sheets/AD7923.pdf
@@ -20,14 +20,22 @@ description: |
 
 properties:
   compatible:
-    enum:
-      - adi,ad7904
-      - adi,ad7914
-      - adi,ad7923
-      - adi,ad7924
-      - adi,ad7908
-      - adi,ad7918
-      - adi,ad7928
+    oneOf:
+      - enum:
+          - adi,ad7904
+          - adi,ad7908
+          - adi,ad7914
+          - adi,ad7918
+          - adi,ad7923
+          - adi,ad7928
+      - const: adi,ad7924
+        deprecated: true
+      - items:
+          - const: adi,ad7924
+          - const: adi,ad7923
+      - items:
+          - const: adi,ad7927
+          - const: adi,ad7928
 
   reg:
     maxItems: 1
-- 
GitLab


From 99b43a15915543484d7538cb32f49901e802628a Mon Sep 17 00:00:00 2001
From: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Date: Tue, 22 Nov 2022 14:47:18 +0300
Subject: [PATCH 2523/4122] iio: adc: max11410: fix incomplete vref buffer mask

VREFP bit was missing from channel configuration mask and VREFN bit was
included twice instead which fails to enable positive reference buffer when
requested by a channel. Channels that don't enable vrefp buffer were not
affected.

Fixes: a44ef7c46097 ("iio: adc: add max11410 adc driver")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Ibrahim Tilki <Ibrahim.Tilki@analog.com>
Link: https://lore.kernel.org/r/20221122114718.17557-1-Ibrahim.Tilki@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/max11410.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/adc/max11410.c b/drivers/iio/adc/max11410.c
index 8cd5663671874..fdc9f03135b54 100644
--- a/drivers/iio/adc/max11410.c
+++ b/drivers/iio/adc/max11410.c
@@ -370,7 +370,7 @@ static int max11410_configure_channel(struct max11410_state *st,
 		 FIELD_PREP(MAX11410_CTRL_UNIPOLAR_BIT, cfg.bipolar ? 0 : 1);
 	ret = regmap_update_bits(st->regmap, MAX11410_REG_CTRL,
 				 MAX11410_CTRL_REFSEL_MASK |
-				 MAX11410_CTRL_VREFN_BUF_BIT |
+				 MAX11410_CTRL_VREFP_BUF_BIT |
 				 MAX11410_CTRL_VREFN_BUF_BIT |
 				 MAX11410_CTRL_UNIPOLAR_BIT, regval);
 	if (ret)
-- 
GitLab


From 99c05e4283a19a02a256f14100ca4ec3b2da3f62 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:49 +0200
Subject: [PATCH 2524/4122] iio: adis: add '__adis_enable_irq()' implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add '__adis_enable_irq()' implementation which is the unlocked
version of 'adis_enable_irq()'.
Call '__adis_enable_irq()' instead of 'adis_enable_irq()' from
'__adis_intial_startup()' to keep the expected unlocked functionality.

This fix is needed to remove a deadlock for all devices which are
using 'adis_initial_startup()'. The deadlock occurs because the
same mutex is acquired twice, without releasing it.
The mutex is acquired once inside 'adis_initial_startup()', before
calling '__adis_initial_startup()', and once inside
'adis_enable_irq()', which is called by '__adis_initial_startup()'.
The deadlock is removed by calling '__adis_enable_irq()', instead of
'adis_enable_irq()' from within '__adis_initial_startup()'.

Fixes: b600bd7eb3335 ("iio: adis: do not disabe IRQs in 'adis_init()'")
Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-2-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/adis.c       | 28 ++++++++++------------------
 include/linux/iio/imu/adis.h | 13 ++++++++++++-
 2 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/drivers/iio/imu/adis.c b/drivers/iio/imu/adis.c
index f7fcfd04f659d..bc40240b29e26 100644
--- a/drivers/iio/imu/adis.c
+++ b/drivers/iio/imu/adis.c
@@ -270,23 +270,19 @@ EXPORT_SYMBOL_NS(adis_debugfs_reg_access, IIO_ADISLIB);
 #endif
 
 /**
- * adis_enable_irq() - Enable or disable data ready IRQ
+ * __adis_enable_irq() - Enable or disable data ready IRQ (unlocked)
  * @adis: The adis device
  * @enable: Whether to enable the IRQ
  *
  * Returns 0 on success, negative error code otherwise
  */
-int adis_enable_irq(struct adis *adis, bool enable)
+int __adis_enable_irq(struct adis *adis, bool enable)
 {
-	int ret = 0;
+	int ret;
 	u16 msc;
 
-	mutex_lock(&adis->state_lock);
-
-	if (adis->data->enable_irq) {
-		ret = adis->data->enable_irq(adis, enable);
-		goto out_unlock;
-	}
+	if (adis->data->enable_irq)
+		return adis->data->enable_irq(adis, enable);
 
 	if (adis->data->unmasked_drdy) {
 		if (enable)
@@ -294,12 +290,12 @@ int adis_enable_irq(struct adis *adis, bool enable)
 		else
 			disable_irq(adis->spi->irq);
 
-		goto out_unlock;
+		return 0;
 	}
 
 	ret = __adis_read_reg_16(adis, adis->data->msc_ctrl_reg, &msc);
 	if (ret)
-		goto out_unlock;
+		return ret;
 
 	msc |= ADIS_MSC_CTRL_DATA_RDY_POL_HIGH;
 	msc &= ~ADIS_MSC_CTRL_DATA_RDY_DIO2;
@@ -308,13 +304,9 @@ int adis_enable_irq(struct adis *adis, bool enable)
 	else
 		msc &= ~ADIS_MSC_CTRL_DATA_RDY_EN;
 
-	ret = __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
-
-out_unlock:
-	mutex_unlock(&adis->state_lock);
-	return ret;
+	return __adis_write_reg_16(adis, adis->data->msc_ctrl_reg, msc);
 }
-EXPORT_SYMBOL_NS(adis_enable_irq, IIO_ADISLIB);
+EXPORT_SYMBOL_NS(__adis_enable_irq, IIO_ADISLIB);
 
 /**
  * __adis_check_status() - Check the device for error conditions (unlocked)
@@ -445,7 +437,7 @@ int __adis_initial_startup(struct adis *adis)
 	 * with 'IRQF_NO_AUTOEN' anyways.
 	 */
 	if (!adis->data->unmasked_drdy)
-		adis_enable_irq(adis, false);
+		__adis_enable_irq(adis, false);
 
 	if (!adis->data->prod_id_reg)
 		return 0;
diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index 515ca09764fe4..bcbefb7574751 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -402,9 +402,20 @@ static inline int adis_update_bits_base(struct adis *adis, unsigned int reg,
 	__adis_update_bits_base(adis, reg, mask, val, sizeof(val));	\
 })
 
-int adis_enable_irq(struct adis *adis, bool enable);
 int __adis_check_status(struct adis *adis);
 int __adis_initial_startup(struct adis *adis);
+int __adis_enable_irq(struct adis *adis, bool enable);
+
+static inline int adis_enable_irq(struct adis *adis, bool enable)
+{
+	int ret;
+
+	mutex_lock(&adis->state_lock);
+	ret = __adis_enable_irq(adis, enable);
+	mutex_unlock(&adis->state_lock);
+
+	return ret;
+}
 
 static inline int adis_check_status(struct adis *adis)
 {
-- 
GitLab


From 594ff4c49e9dd7aba84d82052643c1e4b09b8788 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:50 +0200
Subject: [PATCH 2525/4122] iio: accel: adis16201: Call
 '__adis_initial_startup()' in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16201_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-3-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adis16201.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/accel/adis16201.c b/drivers/iio/accel/adis16201.c
index dfb8e2e5bdf58..d054721859b3b 100644
--- a/drivers/iio/accel/adis16201.c
+++ b/drivers/iio/accel/adis16201.c
@@ -281,7 +281,7 @@ static int adis16201_probe(struct spi_device *spi)
 	if (ret)
 		return ret;
 
-	ret = adis_initial_startup(st);
+	ret = __adis_initial_startup(st);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 09f8360f3a8843733d9e54960405c902982062f4 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:51 +0200
Subject: [PATCH 2526/4122] iio: accel: adis16209: Call
 '__adis_initial_startup()' in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16209_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-4-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/accel/adis16209.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/accel/adis16209.c b/drivers/iio/accel/adis16209.c
index 5a9c6e2296f1d..0035e4f4db63f 100644
--- a/drivers/iio/accel/adis16209.c
+++ b/drivers/iio/accel/adis16209.c
@@ -291,7 +291,7 @@ static int adis16209_probe(struct spi_device *spi)
 	if (ret)
 		return ret;
 
-	ret = adis_initial_startup(st);
+	ret = __adis_initial_startup(st);
 	if (ret)
 		return ret;
 
-- 
GitLab


From c5de7d4c026f4c4310e6cb1e06e1ae0bd57cd838 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:52 +0200
Subject: [PATCH 2527/4122] iio: gyro: adis16136: Call
 '__adis_initial_startup()' in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16136_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-5-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/adis16136.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/gyro/adis16136.c b/drivers/iio/gyro/adis16136.c
index 71295709f2b96..c95cf41be34b6 100644
--- a/drivers/iio/gyro/adis16136.c
+++ b/drivers/iio/gyro/adis16136.c
@@ -429,7 +429,7 @@ static int adis16136_initial_setup(struct iio_dev *indio_dev)
 	uint16_t prod_id;
 	int ret;
 
-	ret = adis_initial_startup(&adis16136->adis);
+	ret = __adis_initial_startup(&adis16136->adis);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 2647f0e4d397fa5f80c78370d514b05a2110f987 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:53 +0200
Subject: [PATCH 2528/4122] iio: gyro: adis16260: Call
 '__adis_initial_startup()' in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16260_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-6-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/gyro/adis16260.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/gyro/adis16260.c b/drivers/iio/gyro/adis16260.c
index eaf57bd339edd..112d635b7dfd3 100644
--- a/drivers/iio/gyro/adis16260.c
+++ b/drivers/iio/gyro/adis16260.c
@@ -395,7 +395,7 @@ static int adis16260_probe(struct spi_device *spi)
 		return ret;
 
 	/* Get the device into a sane initial state */
-	ret = adis_initial_startup(&adis16260->adis);
+	ret = __adis_initial_startup(&adis16260->adis);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 40fd61b0698c1ecf73f8da61b461937b7d91ee26 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:54 +0200
Subject: [PATCH 2529/4122] iio: imu: adis16400: Call
 '__adis_initial_startup()' in probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16400_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-7-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/adis16400.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/imu/adis16400.c b/drivers/iio/imu/adis16400.c
index 17bb0c40a1495..c02fc35dceb40 100644
--- a/drivers/iio/imu/adis16400.c
+++ b/drivers/iio/imu/adis16400.c
@@ -445,7 +445,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev)
 	st->adis.spi->mode = SPI_MODE_3;
 	spi_setup(st->adis.spi);
 
-	ret = adis_initial_startup(&st->adis);
+	ret = __adis_initial_startup(&st->adis);
 	if (ret)
 		return ret;
 
-- 
GitLab


From f3b0ab42a875d3c4b05311bd1ebcc0322a55c30c Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:55 +0200
Subject: [PATCH 2530/4122] staging: iio: accel: adis16203: Call
 '__adis_initial_startup()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16203_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-8-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/accel/adis16203.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/accel/adis16203.c b/drivers/staging/iio/accel/adis16203.c
index 62d5397ff1f98..c0e4c9266b5f4 100644
--- a/drivers/staging/iio/accel/adis16203.c
+++ b/drivers/staging/iio/accel/adis16203.c
@@ -285,7 +285,7 @@ static int adis16203_probe(struct spi_device *spi)
 		return ret;
 
 	/* Get the device into a sane initial state */
-	ret = adis_initial_startup(st);
+	ret = __adis_initial_startup(st);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 60105b59cc12813267b922fa5969e38f9e8f9b50 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:56 +0200
Subject: [PATCH 2531/4122] staging: iio: accel: adis16240: Call
 '__adis_initial_startup()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Call '__adis_initial_startup()' instead of its locked variant in
'adis16240_probe()'.
The locks are not needed at this point.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-9-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/staging/iio/accel/adis16240.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/iio/accel/adis16240.c b/drivers/staging/iio/accel/adis16240.c
index bca857eef92e2..337492785f04c 100644
--- a/drivers/staging/iio/accel/adis16240.c
+++ b/drivers/staging/iio/accel/adis16240.c
@@ -414,7 +414,7 @@ static int adis16240_probe(struct spi_device *spi)
 		return ret;
 
 	/* Get the device into a sane initial state */
-	ret = adis_initial_startup(st);
+	ret = __adis_initial_startup(st);
 	if (ret)
 		return ret;
 
-- 
GitLab


From c613afc1f257e1e3229b8dcade43a104a26541c8 Mon Sep 17 00:00:00 2001
From: Ramona Bolboaca <ramona.bolboaca@analog.com>
Date: Tue, 22 Nov 2022 10:27:57 +0200
Subject: [PATCH 2532/4122] iio: imu: adis: Remove adis_initial_startup
 function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove adis_initial_startup function since it is not used
anymore.

Signed-off-by: Ramona Bolboaca <ramona.bolboaca@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221122082757.449452-10-ramona.bolboaca@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 include/linux/iio/imu/adis.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/include/linux/iio/imu/adis.h b/include/linux/iio/imu/adis.h
index bcbefb7574751..dc9ea299e0885 100644
--- a/include/linux/iio/imu/adis.h
+++ b/include/linux/iio/imu/adis.h
@@ -428,18 +428,6 @@ static inline int adis_check_status(struct adis *adis)
 	return ret;
 }
 
-/* locked version of __adis_initial_startup() */
-static inline int adis_initial_startup(struct adis *adis)
-{
-	int ret;
-
-	mutex_lock(&adis->state_lock);
-	ret = __adis_initial_startup(adis);
-	mutex_unlock(&adis->state_lock);
-
-	return ret;
-}
-
 static inline void adis_dev_lock(struct adis *adis)
 {
 	mutex_lock(&adis->state_lock);
-- 
GitLab


From 8aa2e715ca65757e0eb625862491f95e8baf0328 Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Tue, 22 Nov 2022 09:35:33 +0800
Subject: [PATCH 2533/4122] iio: use devm_platform_get_and_ioremap_resource()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/202211220935338446115@zte.com.cn
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ad7606_par.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iio/adc/ad7606_par.c b/drivers/iio/adc/ad7606_par.c
index b912b4df9b563..d8408052262e4 100644
--- a/drivers/iio/adc/ad7606_par.c
+++ b/drivers/iio/adc/ad7606_par.c
@@ -57,8 +57,7 @@ static int ad7606_par_probe(struct platform_device *pdev)
 	if (irq < 0)
 		return irq;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	addr = devm_ioremap_resource(&pdev->dev, res);
+	addr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(addr))
 		return PTR_ERR(addr);
 
-- 
GitLab


From 980389d06d08442fad0139874bff455c76125e47 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 18 Nov 2022 13:32:08 +0100
Subject: [PATCH 2534/4122] iio: addac: ad74413r: fix integer promotion bug in
 ad74413_get_input_current_offset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The constant AD74413R_ADC_RESULT_MAX is defined via GENMASK, so its
type is "unsigned long".

Hence in the expression voltage_offset * AD74413R_ADC_RESULT_MAX,
voltage_offset is first promoted to unsigned long, and since it may be
negative, that results in a garbage value. For example, when range is
AD74413R_ADC_RANGE_5V_BI_DIR, voltage_offset is -2500 and
voltage_range is 5000, so the RHS of this assignment is, depending on
sizeof(long), either 826225UL or 3689348814709142UL, which after
truncation to int then results in either 826225 or 1972216214 being
the output from in_currentX_offset.

Casting to int avoids that promotion and results in the correct -32767
output.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Fixes: fea251b6a5db (iio: addac: add AD74413R driver)
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221118123209.1658420-1-linux@rasmusvillemoes.dk
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/addac/ad74413r.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index 899bcd83f40bc..e0e130ba9d3ec 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -691,7 +691,7 @@ static int ad74413_get_input_current_offset(struct ad74413r_state *st,
 	if (ret)
 		return ret;
 
-	*val = voltage_offset * AD74413R_ADC_RESULT_MAX / voltage_range;
+	*val = voltage_offset * (int)AD74413R_ADC_RESULT_MAX / voltage_range;
 
 	return IIO_VAL_INT;
 }
-- 
GitLab


From 0e69ba0dd56700b173100984f8a89fe4605591a5 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Thu, 17 Nov 2022 10:09:15 +0200
Subject: [PATCH 2535/4122] dt-bindings: iio: addac: add AD74115

The AD74115H is a single-channel, software-configurable, input and
output device for industrial control applications. The AD74115H
provides a wide range of use cases, integrated on a single chip.

These use cases include analog output, analog input, digital output,
digital input, resistance temperature detector (RTD), and thermocouple
measurement capability. The AD74115H also has an integrated HART modem.

A serial peripheral interface (SPI) is used to handle all communications
to the device, including communications with the HART modem. The digital
input and digital outputs can be accessed via the SPI or the
general-purpose input and output (GPIO) pins to support higher
speed data rates.

The device features a 16-bit, sigma-delta analog-to-digital converter
(ADC) and a 14-bit digital-to-analog converter (DAC).
The AD74115H contains a high accuracy 2.5 V on-chip reference that can
be used as the DAC and ADC reference.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221117080916.411766-2-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/addac/adi,ad74115.yaml       | 373 ++++++++++++++++++
 MAINTAINERS                                   |   7 +
 2 files changed, 380 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml

diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
new file mode 100644
index 0000000000000..72d2e910f2065
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
@@ -0,0 +1,373 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/addac/adi,ad74115.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Analog Devices AD74115H device
+
+maintainers:
+  - Cosmin Tanislav <cosmin.tanislav@analog.com>
+
+description: |
+  The AD74115H is a single-channel software configurable input/output
+  device for industrial control applications. It contains functionality for
+  analog output, analog input, digital output, digital input, resistance
+  temperature detector, and thermocouple measurements integrated into a single
+  chip solution with an SPI interface. The device features a 16-bit ADC and a
+  14-bit DAC.
+
+    https://www.analog.com/en/products/ad74115h.html
+
+properties:
+  compatible:
+    enum:
+      - adi,ad74115h
+
+  reg:
+    maxItems: 1
+
+  spi-max-frequency:
+    maximum: 24000000
+
+  spi-cpol: true
+
+  reset-gpios: true
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      enum:
+        - adc_rdy
+        - alert
+
+  avdd-supply: true
+  avcc-supply: true
+  dvcc-supply: true
+  dovdd-supply: true
+  refin-supply: true
+
+  adi,ch-func:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Channel function.
+      0 - High impedance
+      1 - Voltage output
+      2 - Current output
+      3 - Voltage input
+      4 - Current input, externally-powered
+      5 - Current input, loop-powered
+      6 - Resistance input
+      7 - RTD measure
+      8 - Digital input logic
+      9 - Digital input, loop-powered
+      10 - Current output with HART
+      11 - Current input, externally-powered, with HART
+      12 - Current input, loop-powered, with HART
+    minimum: 0
+    maximum: 12
+    default: 0
+
+  adi,conv2-mux:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      Input node for ADC conversion 2.
+      0 - SENSE_EXT1 to AGND_SENSE
+      1 - SENSE_EXT2 to AGND_SENSE
+      2 - SENSE_EXT2 to SENSE_EXT1
+      3 - AGND to AGND
+    minimum: 0
+    maximum: 3
+    default: 0
+
+  adi,conv2-range-microvolt:
+    description: Conversion range for ADC conversion 2.
+    oneOf:
+      - items:
+          - enum: [-2500000, 0]
+          - const: 2500000
+      - items:
+          - enum: [-12000000, 0]
+          - const: 12000000
+      - items:
+          - const: -2500000
+          - const: 0
+      - items:
+          - const: -104000
+          - const: 104000
+      - items:
+          - const: 0
+          - const: 625000
+
+  adi,sense-agnd-buffer-low-power:
+    type: boolean
+    description:
+      Whether to enable low-power buffered mode for the AGND sense pin.
+
+  adi,lf-buffer-low-power:
+    type: boolean
+    description:
+      Whether to enable low-power buffered mode for the low-side filtered
+      sense pin.
+
+  adi,hf-buffer-low-power:
+    type: boolean
+    description:
+      Whether to enable low-power buffered mode for the high-side filtered
+      sense pin.
+
+  adi,ext2-buffer-low-power:
+    type: boolean
+    description: Whether to enable low-power buffered mode for the EXT2 pin.
+
+  adi,ext1-buffer-low-power:
+    type: boolean
+    description: Whether to enable low-power buffered mode for the EXT1 pin.
+
+  adi,comparator-invert:
+    type: boolean
+    description: Whether to invert the comparator output.
+
+  adi,digital-input-sink-range-high:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      When not present, the digital input range is from 0 to 3700uA in steps
+      of 120uA, with a ~2k series resistance.
+      When present, the digital input range is from 0 to 7400uA in steps
+      of 240uA, with a ~1k series resistance.
+
+  adi,digital-input-sink-microamp:
+    description: Sink current in digital input mode.
+    minimum: 0
+    maximum: 3700
+    default: 0
+
+  adi,digital-input-debounce-mode-counter-reset:
+    type: boolean
+    description: |
+      When not present, a counter increments when the signal is asserted
+      and decrements when the signal is de-asserted.
+      When present, a counter increments while the signal is asserted and
+      resets when the signal de-asserts
+
+  adi,digital-input-unbuffered:
+    type: boolean
+    description: Whether to buffer digital input signals.
+
+  adi,digital-input-short-circuit-detection:
+    type: boolean
+    description: Whether to detect digital input short circuits.
+
+  adi,digital-input-open-circuit-detection:
+    type: boolean
+    description: Whether to detect digital input open circuits.
+
+  adi,digital-input-threshold-mode-fixed:
+    type: boolean
+    description: |
+      When not present, the digital input threshold range is -0.96 * AVDD
+      to AVDD.
+      When present, the threshold range is fixed from -19V to 30V.
+
+  adi,dac-bipolar:
+    type: boolean
+    description: |
+      When not present, the DAC operates in the 0V to 12V range.
+      When present, the DAC operates in the -12V to 12V range.
+
+  adi,charge-pump:
+    type: boolean
+    description: Whether to enable the internal charge pump.
+
+  adi,dac-hart-slew:
+    type: boolean
+    description: Whether to use a HART-compatible slew rate.
+
+  adi,dac-current-limit-low:
+    type: boolean
+    description: |
+      When not present, the DAC short-circuit current limit is 32mA in
+      either source or sink for VOUT and 4mA sink for IOUT.
+      When present, the limit is 16mA in either source or sink for VOUT,
+      1mA sink for IOUT.
+
+  adi,4-wire-rtd:
+    type: boolean
+    description: |
+      When not present, the ADC should be used for measuring 3-wire RTDs.
+      When present, the ADC should be used for measuring 4-wire RTDs.
+
+  adi,3-wire-rtd-excitation-swap:
+    type: boolean
+    description: Whether to swap the excitation for 3-wire RTD.
+
+  adi,rtd-excitation-current-microamp:
+    description: Excitation current to apply to RTD.
+    enum: [250, 500, 750, 1000]
+    default: 250
+
+  adi,ext1-burnout:
+    type: boolean
+    description: Whether to enable burnout current for EXT1.
+
+  adi,ext1-burnout-current-nanoamp:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      Burnout current in nanoamps to be applied to EXT1.
+    enum: [0, 50, 500, 1000, 10000]
+    default: 0
+
+  adi,ext1-burnout-current-polarity-sourcing:
+    type: boolean
+    description: |
+      When not present, the burnout current polarity for EXT1 is sinking.
+      When present, the burnout current polarity for EXT1 is sourcing.
+
+  adi,ext2-burnout:
+    type: boolean
+    description: Whether to enable burnout current for EXT2.
+
+  adi,ext2-burnout-current-nanoamp:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Burnout current in nanoamps to be applied to EXT2.
+    enum: [0, 50, 500, 1000, 10000]
+    default: 0
+
+  adi,ext2-burnout-current-polarity-sourcing:
+    type: boolean
+    description: |
+      When not present, the burnout current polarity for EXT2 is sinking.
+      When present, the burnout current polarity for EXT2 is sourcing.
+
+  adi,viout-burnout:
+    type: boolean
+    description: Whether to enable burnout current for VIOUT.
+
+  adi,viout-burnout-current-nanoamp:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Burnout current in nanoamps to be applied to VIOUT.
+    enum: [0, 1000, 10000]
+    default: 0
+
+  adi,viout-burnout-current-polarity-sourcing:
+    type: boolean
+    description: |
+      When not present, the burnout current polarity for VIOUT is sinking.
+      When present, the burnout current polarity for VIOUT is sourcing.
+
+  adi,gpio0-mode:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      GPIO functions.
+      0 - Disabled
+      1 - Logic I/O
+      2 - Comparator output
+      3 - Control HART CD
+      4 - Monitor HART CD
+      5 - Monitor HART EOM status
+    minimum: 0
+    maximum: 5
+    default: 0
+
+  adi,gpio1-mode:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      GPIO functions.
+      0 - Disabled
+      1 - Logic I/O
+      2 - Drive external digital output FET
+      3 - Control HART RXD
+      4 - Monitor HART RXD
+      5 - Monitor HART SOM status
+    minimum: 0
+    maximum: 5
+    default: 0
+
+  adi,gpio2-mode:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      GPIO functions.
+      0 - Disabled
+      1 - Logic I/O
+      2 - Drive internal digital output FET
+      3 - Control HART TXD
+      4 - Monitor HART TXD
+      5 - Monitor HART TX complete status
+    minimum: 0
+    maximum: 5
+    default: 0
+
+  adi,gpio3-mode:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: |
+      GPIO functions.
+      0 - Disabled
+      1 - Logic I/O
+      2 - High impedance
+      3 - Control HART RTS
+      4 - Monitor HART RTS
+      5 - Monitor HART CD complete status
+    minimum: 0
+    maximum: 5
+    default: 0
+
+required:
+  - compatible
+  - reg
+  - spi-cpol
+  - avdd-supply
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+  - if:
+      required:
+        - adi,digital-input-sink-range-high
+    then:
+      properties:
+        adi,digital-input-sink-microamp:
+          maximum: 7400
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    spi {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      addac@0 {
+        compatible = "adi,ad74115h";
+        reg = <0>;
+
+        spi-max-frequency = <12000000>;
+        spi-cpol;
+
+        reset-gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
+
+        interrupt-parent = <&gpio>;
+        interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
+        interrupt-names = "adc_rdy";
+
+        avdd-supply = <&ad74115_avdd>;
+
+        adi,ch-func = <1>;
+        adi,conv2-mux = <2>;
+        adi,conv2-range-microvolt = <(-12000000) 12000000>;
+
+        adi,gpio0-mode = <1>;
+        adi,gpio1-mode = <1>;
+        adi,gpio2-mode = <1>;
+        adi,gpio3-mode = <1>;
+
+        adi,dac-bipolar;
+      };
+    };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index f1e1ac23a1c66..d1ba33eb0ceca 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1175,6 +1175,13 @@ W:	https://ez.analog.com/linux-software-drivers
 F:	Documentation/devicetree/bindings/iio/adc/adi,ad7780.yaml
 F:	drivers/iio/adc/ad7780.c
 
+ANALOG DEVICES INC AD74115 DRIVER
+M:	Cosmin Tanislav <cosmin.tanislav@analog.com>
+L:	linux-iio@vger.kernel.org
+S:	Supported
+W:	http://ez.analog.com/community/linux-device-drivers
+F:	Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
+
 ANALOG DEVICES INC AD74413R DRIVER
 M:	Cosmin Tanislav <cosmin.tanislav@analog.com>
 L:	linux-iio@vger.kernel.org
-- 
GitLab


From 48ea75598db2b38e4a4b5738d65f226be717bd59 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Thu, 17 Nov 2022 10:09:16 +0200
Subject: [PATCH 2536/4122] iio: addac: add AD74115 driver

The AD74115H is a single-channel, software-configurable, input and
output device for industrial control applications. The AD74115H
provides a wide range of use cases, integrated on a single chip.

These use cases include analog output, analog input, digital output,
digital input, resistance temperature detector (RTD), and thermocouple
measurement capability. The AD74115H also has an integrated HART modem.

A serial peripheral interface (SPI) is used to handle all communications
to the device, including communications with the HART modem. The digital
input and digital outputs can be accessed via the SPI or the
general-purpose input and output (GPIO) pins to support higher
speed data rates.

The device features a 16-bit, sigma-delta analog-to-digital converter
(ADC) and a 14-bit digital-to-analog converter (DAC).
The AD74115H contains a high accuracy 2.5 V on-chip reference that can
be used as the DAC and ADC reference.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221117080916.411766-3-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 MAINTAINERS                 |    1 +
 drivers/iio/addac/Kconfig   |   14 +
 drivers/iio/addac/Makefile  |    1 +
 drivers/iio/addac/ad74115.c | 1947 +++++++++++++++++++++++++++++++++++
 4 files changed, 1963 insertions(+)
 create mode 100644 drivers/iio/addac/ad74115.c

diff --git a/MAINTAINERS b/MAINTAINERS
index d1ba33eb0ceca..860075c493dd2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1181,6 +1181,7 @@ L:	linux-iio@vger.kernel.org
 S:	Supported
 W:	http://ez.analog.com/community/linux-device-drivers
 F:	Documentation/devicetree/bindings/iio/addac/adi,ad74115.yaml
+F:	drivers/iio/addac/ad74115.c
 
 ANALOG DEVICES INC AD74413R DRIVER
 M:	Cosmin Tanislav <cosmin.tanislav@analog.com>
diff --git a/drivers/iio/addac/Kconfig b/drivers/iio/addac/Kconfig
index fcf6d2269bfc2..2843fcb70e24b 100644
--- a/drivers/iio/addac/Kconfig
+++ b/drivers/iio/addac/Kconfig
@@ -5,6 +5,20 @@
 
 menu "Analog to digital and digital to analog converters"
 
+config AD74115
+	tristate "Analog Devices AD74115H driver"
+	depends on GPIOLIB && SPI
+	select CRC8
+	select IIO_BUFFER
+	select IIO_TRIGGERED_BUFFER
+	select REGMAP_SPI
+	help
+	  Say yes here to build support for Analog Devices AD74115H
+	  single-channel software configurable input/output solution.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad74115.
+
 config AD74413R
 	tristate "Analog Devices AD74412R/AD74413R driver"
 	depends on GPIOLIB && SPI
diff --git a/drivers/iio/addac/Makefile b/drivers/iio/addac/Makefile
index 17de20ef0d8e9..577777276e439 100644
--- a/drivers/iio/addac/Makefile
+++ b/drivers/iio/addac/Makefile
@@ -4,5 +4,6 @@
 #
 
 # When adding new entries keep the list in alphabetical order
+obj-$(CONFIG_AD74115) += ad74115.o
 obj-$(CONFIG_AD74413R) += ad74413r.o
 obj-$(CONFIG_STX104) += stx104.o
diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c
new file mode 100644
index 0000000000000..383b92e7b6826
--- /dev/null
+++ b/drivers/iio/addac/ad74115.c
@@ -0,0 +1,1947 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Analog Devices, Inc.
+ * Author: Cosmin Tanislav <cosmin.tanislav@analog.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/crc8.h>
+#include <linux/device.h>
+#include <linux/gpio/driver.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <linux/units.h>
+
+#include <asm/unaligned.h>
+
+#include <linux/iio/buffer.h>
+#include <linux/iio/iio.h>
+#include <linux/iio/trigger.h>
+#include <linux/iio/trigger_consumer.h>
+#include <linux/iio/triggered_buffer.h>
+
+#define AD74115_NAME				"ad74115"
+
+#define AD74115_CH_FUNC_SETUP_REG		0x01
+
+#define AD74115_ADC_CONFIG_REG			0x02
+#define AD74115_ADC_CONFIG_CONV2_RATE_MASK	GENMASK(15, 13)
+#define AD74115_ADC_CONFIG_CONV1_RATE_MASK	GENMASK(12, 10)
+#define AD74115_ADC_CONFIG_CONV2_RANGE_MASK	GENMASK(9, 7)
+#define AD74115_ADC_CONFIG_CONV1_RANGE_MASK	GENMASK(6, 4)
+
+#define AD74115_PWR_OPTIM_CONFIG_REG		0x03
+
+#define AD74115_DIN_CONFIG1_REG			0x04
+#define AD74115_DIN_COMPARATOR_EN_MASK		BIT(13)
+#define AD74115_DIN_SINK_MASK			GENMASK(11, 7)
+#define AD74115_DIN_DEBOUNCE_MASK		GENMASK(4, 0)
+
+#define AD74115_DIN_CONFIG2_REG			0x05
+#define AD74115_COMP_THRESH_MASK		GENMASK(6, 0)
+
+#define AD74115_OUTPUT_CONFIG_REG		0x06
+#define AD74115_OUTPUT_SLEW_EN_MASK		GENMASK(6, 5)
+#define AD74115_OUTPUT_SLEW_LIN_STEP_MASK	GENMASK(4, 3)
+#define AD74115_OUTPUT_SLEW_LIN_RATE_MASK	GENMASK(2, 1)
+
+#define AD74115_RTD3W4W_CONFIG_REG		0x07
+
+#define AD74115_BURNOUT_CONFIG_REG		0x0a
+#define AD74115_BURNOUT_EXT2_EN_MASK		BIT(10)
+#define AD74115_BURNOUT_EXT1_EN_MASK		BIT(5)
+#define AD74115_BURNOUT_VIOUT_EN_MASK		BIT(0)
+
+#define AD74115_DAC_CODE_REG			0x0b
+
+#define AD74115_DAC_ACTIVE_REG			0x0d
+
+#define AD74115_GPIO_CONFIG_X_REG(x)		(0x35 + (x))
+#define AD74115_GPIO_CONFIG_GPI_DATA		BIT(5)
+#define AD74115_GPIO_CONFIG_GPO_DATA		BIT(4)
+#define AD74115_GPIO_CONFIG_SELECT_MASK		GENMASK(2, 0)
+
+#define AD74115_CHARGE_PUMP_REG			0x3a
+
+#define AD74115_ADC_CONV_CTRL_REG		0x3b
+#define AD74115_ADC_CONV_SEQ_MASK		GENMASK(13, 12)
+
+#define AD74115_DIN_COMP_OUT_REG		0x40
+
+#define AD74115_LIVE_STATUS_REG			0x42
+#define AD74115_ADC_DATA_RDY_MASK		BIT(3)
+
+#define AD74115_READ_SELECT_REG			0x64
+
+#define AD74115_CMD_KEY_REG			0x78
+#define AD74115_CMD_KEY_RESET1			0x15fa
+#define AD74115_CMD_KEY_RESET2			0xaf51
+
+#define AD74115_CRC_POLYNOMIAL			0x7
+DECLARE_CRC8_TABLE(ad74115_crc8_table);
+
+#define AD74115_ADC_CODE_MAX			((int)GENMASK(15, 0))
+#define AD74115_ADC_CODE_HALF			(AD74115_ADC_CODE_MAX / 2)
+
+#define AD74115_DAC_VOLTAGE_MAX			12000
+#define AD74115_DAC_CURRENT_MAX			25
+#define AD74115_DAC_CODE_MAX			((int)GENMASK(13, 0))
+#define AD74115_DAC_CODE_HALF			(AD74115_DAC_CODE_MAX / 2)
+
+#define AD74115_COMP_THRESH_MAX			98
+
+#define AD74115_SENSE_RESISTOR_OHMS		100
+#define AD74115_REF_RESISTOR_OHMS		2100
+
+#define AD74115_DIN_SINK_LOW_STEP		120
+#define AD74115_DIN_SINK_HIGH_STEP		240
+#define AD74115_DIN_SINK_MAX			31
+
+#define AD74115_FRAME_SIZE			4
+#define AD74115_GPIO_NUM			4
+
+#define AD74115_CONV_TIME_US			1000000
+
+enum ad74115_dac_ch {
+	AD74115_DAC_CH_MAIN,
+	AD74115_DAC_CH_COMPARATOR,
+};
+
+enum ad74115_adc_ch {
+	AD74115_ADC_CH_CONV1,
+	AD74115_ADC_CH_CONV2,
+	AD74115_ADC_CH_NUM
+};
+
+enum ad74115_ch_func {
+	AD74115_CH_FUNC_HIGH_IMPEDANCE,
+	AD74115_CH_FUNC_VOLTAGE_OUTPUT,
+	AD74115_CH_FUNC_CURRENT_OUTPUT,
+	AD74115_CH_FUNC_VOLTAGE_INPUT,
+	AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER,
+	AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER,
+	AD74115_CH_FUNC_2_WIRE_RESISTANCE_INPUT,
+	AD74115_CH_FUNC_3_4_WIRE_RESISTANCE_INPUT,
+	AD74115_CH_FUNC_DIGITAL_INPUT_LOGIC,
+	AD74115_CH_FUNC_DIGITAL_INPUT_LOOP_POWER,
+	AD74115_CH_FUNC_CURRENT_OUTPUT_HART,
+	AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER_HART,
+	AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART,
+	AD74115_CH_FUNC_MAX = AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART,
+	AD74115_CH_FUNC_NUM
+};
+
+enum ad74115_adc_range {
+	AD74115_ADC_RANGE_12V,
+	AD74115_ADC_RANGE_12V_BIPOLAR,
+	AD74115_ADC_RANGE_2_5V_BIPOLAR,
+	AD74115_ADC_RANGE_2_5V_NEG,
+	AD74115_ADC_RANGE_2_5V,
+	AD74115_ADC_RANGE_0_625V,
+	AD74115_ADC_RANGE_104MV_BIPOLAR,
+	AD74115_ADC_RANGE_12V_OTHER,
+	AD74115_ADC_RANGE_MAX = AD74115_ADC_RANGE_12V_OTHER,
+	AD74115_ADC_RANGE_NUM
+};
+
+enum ad74115_adc_conv_seq {
+	AD74115_ADC_CONV_SEQ_STANDBY = 0b00,
+	AD74115_ADC_CONV_SEQ_SINGLE = 0b01,
+	AD74115_ADC_CONV_SEQ_CONTINUOUS = 0b10,
+};
+
+enum ad74115_din_threshold_mode {
+	AD74115_DIN_THRESHOLD_MODE_AVDD,
+	AD74115_DIN_THRESHOLD_MODE_FIXED,
+	AD74115_DIN_THRESHOLD_MODE_MAX = AD74115_DIN_THRESHOLD_MODE_FIXED,
+};
+
+enum ad74115_slew_mode {
+	AD74115_SLEW_MODE_DISABLED,
+	AD74115_SLEW_MODE_LINEAR,
+	AD74115_SLEW_MODE_HART,
+};
+
+enum ad74115_slew_step {
+	AD74115_SLEW_STEP_0_8_PERCENT,
+	AD74115_SLEW_STEP_1_5_PERCENT,
+	AD74115_SLEW_STEP_6_1_PERCENT,
+	AD74115_SLEW_STEP_22_2_PERCENT,
+};
+
+enum ad74115_slew_rate {
+	AD74115_SLEW_RATE_4KHZ,
+	AD74115_SLEW_RATE_64KHZ,
+	AD74115_SLEW_RATE_150KHZ,
+	AD74115_SLEW_RATE_240KHZ,
+};
+
+enum ad74115_gpio_config {
+	AD74115_GPIO_CONFIG_OUTPUT_BUFFERED = 0b010,
+	AD74115_GPIO_CONFIG_INPUT = 0b011,
+};
+
+enum ad74115_gpio_mode {
+	AD74115_GPIO_MODE_LOGIC = 1,
+	AD74115_GPIO_MODE_SPECIAL = 2,
+};
+
+struct ad74115_channels {
+	struct iio_chan_spec		*channels;
+	unsigned int			num_channels;
+};
+
+struct ad74115_state {
+	struct spi_device		*spi;
+	struct regmap			*regmap;
+	struct iio_trigger		*trig;
+	struct regulator		*avdd;
+
+	/*
+	 * Synchronize consecutive operations when doing a one-shot
+	 * conversion and when updating the ADC samples SPI message.
+	 */
+	struct mutex			lock;
+	struct gpio_chip		gc;
+	struct gpio_chip		comp_gc;
+	int				irq;
+
+	unsigned int			avdd_mv;
+	unsigned long			gpio_valid_mask;
+	bool				dac_bipolar;
+	bool				dac_hart_slew;
+	bool				rtd_mode_4_wire;
+	enum ad74115_ch_func		ch_func;
+	enum ad74115_din_threshold_mode	din_threshold_mode;
+
+	struct completion		adc_data_completion;
+	struct spi_message		adc_samples_msg;
+	struct spi_transfer		adc_samples_xfer[AD74115_ADC_CH_NUM + 1];
+
+	/*
+	 * DMA (thus cache coherency maintenance) requires the
+	 * transfer buffers to live in their own cache lines.
+	 */
+	u8				reg_tx_buf[AD74115_FRAME_SIZE] __aligned(IIO_DMA_MINALIGN);
+	u8				reg_rx_buf[AD74115_FRAME_SIZE];
+	u8				adc_samples_tx_buf[AD74115_FRAME_SIZE * AD74115_ADC_CH_NUM];
+	u8				adc_samples_rx_buf[AD74115_FRAME_SIZE * AD74115_ADC_CH_NUM];
+};
+
+struct ad74115_fw_prop {
+	const char			*name;
+	bool				is_boolean;
+	bool				negate;
+	unsigned int			max;
+	unsigned int			reg;
+	unsigned int			mask;
+	const unsigned int		*lookup_tbl;
+	unsigned int			lookup_tbl_len;
+};
+
+#define AD74115_FW_PROP(_name, _max, _reg, _mask)		\
+{								\
+	.name = (_name),					\
+	.max = (_max),						\
+	.reg = (_reg),						\
+	.mask = (_mask),					\
+}
+
+#define AD74115_FW_PROP_TBL(_name, _tbl, _reg, _mask)		\
+{								\
+	.name = (_name),					\
+	.reg = (_reg),						\
+	.mask = (_mask),					\
+	.lookup_tbl = (_tbl),					\
+	.lookup_tbl_len = ARRAY_SIZE(_tbl),			\
+}
+
+#define AD74115_FW_PROP_BOOL(_name, _reg, _mask)		\
+{								\
+	.name = (_name),					\
+	.is_boolean = true,					\
+	.reg = (_reg),						\
+	.mask = (_mask),					\
+}
+
+#define AD74115_FW_PROP_BOOL_NEG(_name, _reg, _mask)		\
+{								\
+	.name = (_name),					\
+	.is_boolean = true,					\
+	.negate = true,						\
+	.reg = (_reg),						\
+	.mask = (_mask),					\
+}
+
+static const int ad74115_dac_rate_tbl[] = {
+	0,
+	4 * 8,
+	4 * 15,
+	4 * 61,
+	4 * 222,
+	64 * 8,
+	64 * 15,
+	64 * 61,
+	64 * 222,
+	150 * 8,
+	150 * 15,
+	150 * 61,
+	150 * 222,
+	240 * 8,
+	240 * 15,
+	240 * 61,
+	240 * 222,
+};
+
+static const unsigned int ad74115_dac_rate_step_tbl[][3] = {
+	{ AD74115_SLEW_MODE_DISABLED },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_4KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_4KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_4KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_4KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_64KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_64KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_64KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_64KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_150KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_150KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_150KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_150KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_0_8_PERCENT, AD74115_SLEW_RATE_240KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_1_5_PERCENT, AD74115_SLEW_RATE_240KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_6_1_PERCENT, AD74115_SLEW_RATE_240KHZ },
+	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_240KHZ },
+};
+
+static const unsigned int ad74115_dac_slew_rate_hz_tbl[] = {
+	4000, 64000, 150000, 240000
+};
+
+static const unsigned int ad74115_rtd_excitation_current_ua_tbl[] = {
+	250, 500, 750, 1000
+};
+
+static const unsigned int ad74115_burnout_current_na_tbl[] = {
+	0, 50, 0, 500, 1000, 0, 10000, 0
+};
+
+static const unsigned int ad74115_viout_burnout_current_na_tbl[] = {
+	0, 0, 0, 0, 1000, 0, 10000, 0
+};
+
+static const unsigned int ad74115_gpio_mode_tbl[] = {
+	0, 0, 0, 1, 2, 3, 4, 5
+};
+
+static const unsigned int ad74115_adc_conv_rate_tbl[] = {
+	10, 20, 1200, 4800, 9600
+};
+
+static const unsigned int ad74115_debounce_tbl[] = {
+	0,     13,    18,    24,    32,    42,    56,    75,
+	100,   130,   180,   240,   320,   420,   560,   750,
+	1000,  1300,  1800,  2400,  3200,  4200,  5600,  7500,
+	10000, 13000, 18000, 24000, 32000, 42000, 56000, 75000,
+};
+
+static const unsigned int ad74115_adc_ch_data_regs_tbl[] = {
+	[AD74115_ADC_CH_CONV1] = 0x44,
+	[AD74115_ADC_CH_CONV2] = 0x46,
+};
+
+static const unsigned int ad74115_adc_ch_en_bit_tbl[] = {
+	[AD74115_ADC_CH_CONV1] = BIT(0),
+	[AD74115_ADC_CH_CONV2] = BIT(1),
+};
+
+static const bool ad74115_adc_bipolar_tbl[AD74115_ADC_RANGE_NUM] = {
+	[AD74115_ADC_RANGE_12V_BIPOLAR]		= true,
+	[AD74115_ADC_RANGE_2_5V_BIPOLAR]	= true,
+	[AD74115_ADC_RANGE_104MV_BIPOLAR]	= true,
+};
+
+static const unsigned int ad74115_adc_conv_mul_tbl[AD74115_ADC_RANGE_NUM] = {
+	[AD74115_ADC_RANGE_12V]			= 12000,
+	[AD74115_ADC_RANGE_12V_BIPOLAR]		= 24000,
+	[AD74115_ADC_RANGE_2_5V_BIPOLAR]	= 5000,
+	[AD74115_ADC_RANGE_2_5V_NEG]		= 2500,
+	[AD74115_ADC_RANGE_2_5V]		= 2500,
+	[AD74115_ADC_RANGE_0_625V]		= 625,
+	[AD74115_ADC_RANGE_104MV_BIPOLAR]	= 208,
+	[AD74115_ADC_RANGE_12V_OTHER]		= 12000,
+};
+
+static const unsigned int ad74115_adc_gain_tbl[AD74115_ADC_RANGE_NUM][2] = {
+	[AD74115_ADC_RANGE_12V]			= { 5, 24 },
+	[AD74115_ADC_RANGE_12V_BIPOLAR]		= { 5, 24 },
+	[AD74115_ADC_RANGE_2_5V_BIPOLAR]	= { 1, 1 },
+	[AD74115_ADC_RANGE_2_5V_NEG]		= { 1, 1 },
+	[AD74115_ADC_RANGE_2_5V]		= { 1, 1 },
+	[AD74115_ADC_RANGE_0_625V]		= { 4, 1 },
+	[AD74115_ADC_RANGE_104MV_BIPOLAR]	= { 24, 1 },
+	[AD74115_ADC_RANGE_12V_OTHER]		= { 5, 24 },
+};
+
+static const int ad74115_adc_range_tbl[AD74115_ADC_RANGE_NUM][2] = {
+	[AD74115_ADC_RANGE_12V]			= { 0,         12000000 },
+	[AD74115_ADC_RANGE_12V_BIPOLAR]		= { -12000000, 12000000 },
+	[AD74115_ADC_RANGE_2_5V_BIPOLAR]	= { -2500000,  2500000 },
+	[AD74115_ADC_RANGE_2_5V_NEG]		= { -2500000,  0 },
+	[AD74115_ADC_RANGE_2_5V]		= { 0,         2500000 },
+	[AD74115_ADC_RANGE_0_625V]		= { 0,         625000 },
+	[AD74115_ADC_RANGE_104MV_BIPOLAR]	= { -104000,   104000 },
+	[AD74115_ADC_RANGE_12V_OTHER]		= { 0,         12000000 },
+};
+
+static int _ad74115_find_tbl_index(const unsigned int *tbl, unsigned int tbl_len,
+				   unsigned int val, unsigned int *index)
+{
+	unsigned int i;
+
+	for (i = 0; i < tbl_len; i++)
+		if (val == tbl[i]) {
+			*index = i;
+			return 0;
+		}
+
+	return -EINVAL;
+}
+
+#define ad74115_find_tbl_index(tbl, val, index)	\
+	_ad74115_find_tbl_index(tbl, ARRAY_SIZE(tbl), val, index)
+
+static int ad74115_crc(u8 *buf)
+{
+	return crc8(ad74115_crc8_table, buf, 3, 0);
+}
+
+static void ad74115_format_reg_write(u8 reg, u16 val, u8 *buf)
+{
+	buf[0] = reg;
+	put_unaligned_be16(val, &buf[1]);
+	buf[3] = ad74115_crc(buf);
+}
+
+static int ad74115_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+	struct ad74115_state *st = context;
+
+	ad74115_format_reg_write(reg, val, st->reg_tx_buf);
+
+	return spi_write(st->spi, st->reg_tx_buf, AD74115_FRAME_SIZE);
+}
+
+static int ad74115_crc_check(struct ad74115_state *st, u8 *buf)
+{
+	struct device *dev = &st->spi->dev;
+	u8 expected_crc = ad74115_crc(buf);
+
+	if (buf[3] != expected_crc) {
+		dev_err(dev, "Bad CRC %02x for %02x%02x%02x, expected %02x\n",
+			buf[3], buf[0], buf[1], buf[2], expected_crc);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ad74115_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+	struct ad74115_state *st = context;
+	struct spi_transfer reg_read_xfer[] = {
+		{
+			.tx_buf = st->reg_tx_buf,
+			.len = sizeof(st->reg_tx_buf),
+			.cs_change = 1,
+		},
+		{
+			.rx_buf = st->reg_rx_buf,
+			.len = sizeof(st->reg_rx_buf),
+		},
+	};
+	int ret;
+
+	ad74115_format_reg_write(AD74115_READ_SELECT_REG, reg, st->reg_tx_buf);
+
+	ret = spi_sync_transfer(st->spi, reg_read_xfer, ARRAY_SIZE(reg_read_xfer));
+	if (ret)
+		return ret;
+
+	ret = ad74115_crc_check(st, st->reg_rx_buf);
+	if (ret)
+		return ret;
+
+	*val = get_unaligned_be16(&st->reg_rx_buf[1]);
+
+	return 0;
+}
+
+static const struct regmap_config ad74115_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 16,
+	.reg_read = ad74115_reg_read,
+	.reg_write = ad74115_reg_write,
+};
+
+static int ad74115_gpio_config_set(struct ad74115_state *st, unsigned int offset,
+				   enum ad74115_gpio_config cfg)
+{
+	return regmap_update_bits(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset),
+				  AD74115_GPIO_CONFIG_SELECT_MASK,
+				  FIELD_PREP(AD74115_GPIO_CONFIG_SELECT_MASK, cfg));
+}
+
+static int ad74115_gpio_init_valid_mask(struct gpio_chip *gc,
+					unsigned long *valid_mask,
+					unsigned int ngpios)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+
+	*valid_mask = st->gpio_valid_mask;
+
+	return 0;
+}
+
+static int ad74115_gpio_get_direction(struct gpio_chip *gc, unsigned int offset)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), &val);
+	if (ret)
+		return ret;
+
+	return FIELD_GET(AD74115_GPIO_CONFIG_SELECT_MASK, val) == AD74115_GPIO_CONFIG_INPUT;
+}
+
+static int ad74115_gpio_direction_input(struct gpio_chip *gc, unsigned int offset)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+
+	return ad74115_gpio_config_set(st, offset, AD74115_GPIO_CONFIG_INPUT);
+}
+
+static int ad74115_gpio_direction_output(struct gpio_chip *gc, unsigned int offset,
+					 int value)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+
+	return ad74115_gpio_config_set(st, offset, AD74115_GPIO_CONFIG_OUTPUT_BUFFERED);
+}
+
+static int ad74115_gpio_get(struct gpio_chip *gc, unsigned int offset)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset), &val);
+	if (ret)
+		return ret;
+
+	return FIELD_GET(AD74115_GPIO_CONFIG_GPI_DATA, val);
+}
+
+static void ad74115_gpio_set(struct gpio_chip *gc, unsigned int offset, int value)
+{
+	struct ad74115_state *st = gpiochip_get_data(gc);
+	struct device *dev = &st->spi->dev;
+	int ret;
+
+	ret = regmap_update_bits(st->regmap, AD74115_GPIO_CONFIG_X_REG(offset),
+				 AD74115_GPIO_CONFIG_GPO_DATA,
+				 FIELD_PREP(AD74115_GPIO_CONFIG_GPO_DATA, value));
+	if (ret)
+		dev_err(dev, "Failed to set GPIO %u output value, err: %d\n",
+			offset, ret);
+}
+
+static int ad74115_set_comp_debounce(struct ad74115_state *st, unsigned int val)
+{
+	unsigned int len = ARRAY_SIZE(ad74115_debounce_tbl);
+	unsigned int i;
+
+	for (i = 0; i < len; i++)
+		if (val <= ad74115_debounce_tbl[i])
+			break;
+
+	if (i == len)
+		i = len - 1;
+
+	return regmap_update_bits(st->regmap, AD74115_DIN_CONFIG1_REG,
+				  AD74115_DIN_DEBOUNCE_MASK,
+				  FIELD_PREP(AD74115_DIN_DEBOUNCE_MASK, val));
+}
+
+static int ad74115_comp_gpio_get_direction(struct gpio_chip *chip,
+					   unsigned int offset)
+{
+	return GPIO_LINE_DIRECTION_IN;
+}
+
+static int ad74115_comp_gpio_set_config(struct gpio_chip *chip,
+					unsigned int offset,
+					unsigned long config)
+{
+	struct ad74115_state *st = gpiochip_get_data(chip);
+	u32 param = pinconf_to_config_param(config);
+	u32 arg = pinconf_to_config_argument(config);
+
+	switch (param) {
+	case PIN_CONFIG_INPUT_DEBOUNCE:
+		return ad74115_set_comp_debounce(st, arg);
+	default:
+		return -ENOTSUPP;
+	}
+}
+
+static int ad74115_comp_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+	struct ad74115_state *st = gpiochip_get_data(chip);
+	unsigned int val;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_DIN_COMP_OUT_REG, &val);
+	if (ret)
+		return ret;
+
+	return !!val;
+}
+
+static irqreturn_t ad74115_trigger_handler(int irq, void *p)
+{
+	struct iio_poll_func *pf = p;
+	struct iio_dev *indio_dev = pf->indio_dev;
+	struct ad74115_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = spi_sync(st->spi, &st->adc_samples_msg);
+	if (ret)
+		goto out;
+
+	iio_push_to_buffers(indio_dev, st->adc_samples_rx_buf);
+
+out:
+	iio_trigger_notify_done(indio_dev->trig);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ad74115_adc_data_interrupt(int irq, void *data)
+{
+	struct iio_dev *indio_dev = data;
+	struct ad74115_state *st = iio_priv(indio_dev);
+
+	if (iio_buffer_enabled(indio_dev))
+		iio_trigger_poll(st->trig);
+	else
+		complete(&st->adc_data_completion);
+
+	return IRQ_HANDLED;
+}
+
+static int ad74115_set_adc_ch_en(struct ad74115_state *st,
+				 enum ad74115_adc_ch channel, bool status)
+{
+	unsigned int mask = ad74115_adc_ch_en_bit_tbl[channel];
+
+	return regmap_update_bits(st->regmap, AD74115_ADC_CONV_CTRL_REG, mask,
+				  status ? mask : 0);
+}
+
+static int ad74115_set_adc_conv_seq(struct ad74115_state *st,
+				    enum ad74115_adc_conv_seq conv_seq)
+{
+	return regmap_update_bits(st->regmap, AD74115_ADC_CONV_CTRL_REG,
+				  AD74115_ADC_CONV_SEQ_MASK,
+				  FIELD_PREP(AD74115_ADC_CONV_SEQ_MASK, conv_seq));
+}
+
+static int ad74115_update_scan_mode(struct iio_dev *indio_dev,
+				    const unsigned long *active_scan_mask)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	struct spi_transfer *xfer = st->adc_samples_xfer;
+	u8 *rx_buf = st->adc_samples_rx_buf;
+	u8 *tx_buf = st->adc_samples_tx_buf;
+	unsigned int i;
+	int ret = 0;
+
+	mutex_lock(&st->lock);
+
+	spi_message_init(&st->adc_samples_msg);
+
+	for_each_clear_bit(i, active_scan_mask, AD74115_ADC_CH_NUM) {
+		ret = ad74115_set_adc_ch_en(st, i, false);
+		if (ret)
+			goto out;
+	}
+
+	/*
+	 * The read select register is used to select which register's value
+	 * will be sent by the slave on the next SPI frame.
+	 *
+	 * Create an SPI message that, on each step, writes to the read select
+	 * register to select the ADC result of the next enabled channel, and
+	 * reads the ADC result of the previous enabled channel.
+	 *
+	 * Example:
+	 * W: [WCH1] [WCH2] [WCH2] [WCH3] [    ]
+	 * R: [    ] [RCH1] [RCH2] [RCH3] [RCH4]
+	 */
+	for_each_set_bit(i, active_scan_mask, AD74115_ADC_CH_NUM) {
+		ret = ad74115_set_adc_ch_en(st, i, true);
+		if (ret)
+			goto out;
+
+		if (xfer == st->adc_samples_xfer)
+			xfer->rx_buf = NULL;
+		else
+			xfer->rx_buf = rx_buf;
+
+		xfer->tx_buf = tx_buf;
+		xfer->len = AD74115_FRAME_SIZE;
+		xfer->cs_change = 1;
+
+		ad74115_format_reg_write(AD74115_READ_SELECT_REG,
+					 ad74115_adc_ch_data_regs_tbl[i], tx_buf);
+
+		spi_message_add_tail(xfer, &st->adc_samples_msg);
+
+		tx_buf += AD74115_FRAME_SIZE;
+		if (xfer != st->adc_samples_xfer)
+			rx_buf += AD74115_FRAME_SIZE;
+		xfer++;
+	}
+
+	xfer->rx_buf = rx_buf;
+	xfer->tx_buf = NULL;
+	xfer->len = AD74115_FRAME_SIZE;
+	xfer->cs_change = 0;
+
+	spi_message_add_tail(xfer, &st->adc_samples_msg);
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int ad74115_buffer_postenable(struct iio_dev *indio_dev)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+
+	return ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_CONTINUOUS);
+}
+
+static int ad74115_buffer_predisable(struct iio_dev *indio_dev)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	unsigned int i;
+	int ret;
+
+	mutex_lock(&st->lock);
+
+	ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_STANDBY);
+	if (ret)
+		goto out;
+
+	/*
+	 * update_scan_mode() is not called in the disable path, disable all
+	 * channels here.
+	 */
+	for (i = 0; i < AD74115_ADC_CH_NUM; i++) {
+		ret = ad74115_set_adc_ch_en(st, i, false);
+		if (ret)
+			goto out;
+	}
+
+out:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static const struct iio_buffer_setup_ops ad74115_buffer_ops = {
+	.postenable = &ad74115_buffer_postenable,
+	.predisable = &ad74115_buffer_predisable,
+};
+
+static const struct iio_trigger_ops ad74115_trigger_ops = {
+	.validate_device = iio_trigger_validate_own_device,
+};
+
+static int ad74115_get_adc_rate(struct ad74115_state *st,
+				enum ad74115_adc_ch channel, int *val)
+{
+	unsigned int i;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_ADC_CONFIG_REG, &i);
+	if (ret)
+		return ret;
+
+	if (channel == AD74115_ADC_CH_CONV1)
+		i = FIELD_GET(AD74115_ADC_CONFIG_CONV1_RATE_MASK, i);
+	else
+		i = FIELD_GET(AD74115_ADC_CONFIG_CONV2_RATE_MASK, i);
+
+	*val = ad74115_adc_conv_rate_tbl[i];
+
+	return IIO_VAL_INT;
+}
+
+static int _ad74115_get_adc_code(struct ad74115_state *st,
+				 enum ad74115_adc_ch channel, int *val)
+{
+	unsigned int uval;
+	int ret;
+
+	reinit_completion(&st->adc_data_completion);
+
+	ret = ad74115_set_adc_ch_en(st, channel, true);
+	if (ret)
+		return ret;
+
+	ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_SINGLE);
+	if (ret)
+		return ret;
+
+	if (st->irq) {
+		ret = wait_for_completion_timeout(&st->adc_data_completion,
+						  msecs_to_jiffies(1000));
+		if (!ret)
+			return -ETIMEDOUT;
+	} else {
+		unsigned int regval, wait_time;
+		int rate;
+
+		ret = ad74115_get_adc_rate(st, channel, &rate);
+		if (ret < 0)
+			return ret;
+
+		wait_time = DIV_ROUND_CLOSEST(AD74115_CONV_TIME_US, rate);
+
+		ret = regmap_read_poll_timeout(st->regmap, AD74115_LIVE_STATUS_REG,
+					       regval, regval & AD74115_ADC_DATA_RDY_MASK,
+					       wait_time, 5 * wait_time);
+		if (ret)
+			return ret;
+
+		/*
+		 * The ADC_DATA_RDY bit is W1C.
+		 * See datasheet page 98, Table 62. Bit Descriptions for
+		 * LIVE_STATUS.
+		 * Although the datasheet mentions that the bit will auto-clear
+		 * when writing to the ADC_CONV_CTRL register, this does not
+		 * seem to happen.
+		 */
+		ret = regmap_write_bits(st->regmap, AD74115_LIVE_STATUS_REG,
+					AD74115_ADC_DATA_RDY_MASK,
+					FIELD_PREP(AD74115_ADC_DATA_RDY_MASK, 1));
+		if (ret)
+			return ret;
+	}
+
+	ret = regmap_read(st->regmap, ad74115_adc_ch_data_regs_tbl[channel], &uval);
+	if (ret)
+		return ret;
+
+	ret = ad74115_set_adc_conv_seq(st, AD74115_ADC_CONV_SEQ_STANDBY);
+	if (ret)
+		return ret;
+
+	ret = ad74115_set_adc_ch_en(st, channel, false);
+	if (ret)
+		return ret;
+
+	*val = uval;
+
+	return IIO_VAL_INT;
+}
+
+static int ad74115_get_adc_code(struct iio_dev *indio_dev,
+				enum ad74115_adc_ch channel, int *val)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	int ret;
+
+	ret = iio_device_claim_direct_mode(indio_dev);
+	if (ret)
+		return ret;
+
+	mutex_lock(&st->lock);
+	ret = _ad74115_get_adc_code(st, channel, val);
+	mutex_unlock(&st->lock);
+
+	iio_device_release_direct_mode(indio_dev);
+
+	return ret;
+}
+
+static int ad74115_adc_code_to_resistance(int code, int *val, int *val2)
+{
+	if (code == AD74115_ADC_CODE_MAX)
+		code--;
+
+	*val = code * AD74115_REF_RESISTOR_OHMS;
+	*val2 = AD74115_ADC_CODE_MAX - code;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74115_set_dac_code(struct ad74115_state *st,
+				enum ad74115_dac_ch channel, int val)
+{
+	if (val < 0)
+		return -EINVAL;
+
+	if (channel == AD74115_DAC_CH_COMPARATOR) {
+		if (val > AD74115_COMP_THRESH_MAX)
+			return -EINVAL;
+
+		return regmap_update_bits(st->regmap, AD74115_DIN_CONFIG2_REG,
+					  AD74115_COMP_THRESH_MASK,
+					  FIELD_PREP(AD74115_COMP_THRESH_MASK, val));
+	}
+
+	if (val > AD74115_DAC_CODE_MAX)
+		return -EINVAL;
+
+	return regmap_write(st->regmap, AD74115_DAC_CODE_REG, val);
+}
+
+static int ad74115_get_dac_code(struct ad74115_state *st,
+				enum ad74115_dac_ch channel, int *val)
+{
+	unsigned int uval;
+	int ret;
+
+	if (channel == AD74115_DAC_CH_COMPARATOR)
+		return -EINVAL;
+
+	ret = regmap_read(st->regmap, AD74115_DAC_ACTIVE_REG, &uval);
+	if (ret)
+		return ret;
+
+	*val = uval;
+
+	return IIO_VAL_INT;
+}
+
+static int ad74115_set_adc_rate(struct ad74115_state *st,
+				enum ad74115_adc_ch channel, int val)
+{
+	unsigned int i;
+	int ret;
+
+	ret = ad74115_find_tbl_index(ad74115_adc_conv_rate_tbl, val, &i);
+	if (ret)
+		return ret;
+
+	if (channel == AD74115_ADC_CH_CONV1)
+		return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG,
+					  AD74115_ADC_CONFIG_CONV1_RATE_MASK,
+					  FIELD_PREP(AD74115_ADC_CONFIG_CONV1_RATE_MASK, i));
+
+	return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG,
+				  AD74115_ADC_CONFIG_CONV2_RATE_MASK,
+				  FIELD_PREP(AD74115_ADC_CONFIG_CONV2_RATE_MASK, i));
+}
+
+static int ad74115_get_dac_rate(struct ad74115_state *st, int *val)
+{
+	unsigned int i, en_val, step_val, rate_val, tmp;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_OUTPUT_CONFIG_REG, &tmp);
+	if (ret)
+		return ret;
+
+	en_val = FIELD_GET(AD74115_OUTPUT_SLEW_EN_MASK, tmp);
+	step_val = FIELD_GET(AD74115_OUTPUT_SLEW_LIN_STEP_MASK, tmp);
+	rate_val = FIELD_GET(AD74115_OUTPUT_SLEW_LIN_RATE_MASK, tmp);
+
+	for (i = 0; i < ARRAY_SIZE(ad74115_dac_rate_step_tbl); i++)
+		if (en_val == ad74115_dac_rate_step_tbl[i][0] &&
+		    step_val == ad74115_dac_rate_step_tbl[i][1] &&
+		    rate_val == ad74115_dac_rate_step_tbl[i][2])
+			break;
+
+	if (i == ARRAY_SIZE(ad74115_dac_rate_step_tbl))
+		return -EINVAL;
+
+	*val = ad74115_dac_rate_tbl[i];
+
+	return IIO_VAL_INT;
+}
+
+static int ad74115_set_dac_rate(struct ad74115_state *st, int val)
+{
+	unsigned int i, en_val, step_val, rate_val, mask, tmp;
+	int ret;
+
+	ret = ad74115_find_tbl_index(ad74115_dac_rate_tbl, val, &i);
+	if (ret)
+		return ret;
+
+	en_val = ad74115_dac_rate_step_tbl[i][0];
+	step_val = ad74115_dac_rate_step_tbl[i][1];
+	rate_val = ad74115_dac_rate_step_tbl[i][2];
+
+	mask = AD74115_OUTPUT_SLEW_EN_MASK;
+	mask |= AD74115_OUTPUT_SLEW_LIN_STEP_MASK;
+	mask |= AD74115_OUTPUT_SLEW_LIN_RATE_MASK;
+
+	tmp = FIELD_PREP(AD74115_OUTPUT_SLEW_EN_MASK, en_val);
+	tmp |= FIELD_PREP(AD74115_OUTPUT_SLEW_LIN_STEP_MASK, step_val);
+	tmp |= FIELD_PREP(AD74115_OUTPUT_SLEW_LIN_RATE_MASK, rate_val);
+
+	return regmap_update_bits(st->regmap, AD74115_OUTPUT_CONFIG_REG, mask, tmp);
+}
+
+static int ad74115_get_dac_scale(struct ad74115_state *st,
+				 struct iio_chan_spec const *chan,
+				 int *val, int *val2)
+{
+	if (chan->channel == AD74115_DAC_CH_MAIN) {
+		if (chan->type == IIO_VOLTAGE) {
+			*val = AD74115_DAC_VOLTAGE_MAX;
+
+			if (st->dac_bipolar)
+				*val *= 2;
+
+		} else {
+			*val = AD74115_DAC_CURRENT_MAX;
+		}
+
+		*val2 = AD74115_DAC_CODE_MAX;
+	} else {
+		if (st->din_threshold_mode == AD74115_DIN_THRESHOLD_MODE_AVDD) {
+			*val = 196 * st->avdd_mv;
+			*val2 = 10 * AD74115_COMP_THRESH_MAX;
+		} else {
+			*val = 49000;
+			*val2 = AD74115_COMP_THRESH_MAX;
+		}
+	}
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74115_get_dac_offset(struct ad74115_state *st,
+				  struct iio_chan_spec const *chan, int *val)
+{
+	if (chan->channel == AD74115_DAC_CH_MAIN) {
+		if (chan->type == IIO_VOLTAGE && st->dac_bipolar)
+			*val = -AD74115_DAC_CODE_HALF;
+		else
+			*val = 0;
+	} else {
+		if (st->din_threshold_mode == AD74115_DIN_THRESHOLD_MODE_AVDD)
+			*val = -48;
+		else
+			*val = -38;
+	}
+
+	return IIO_VAL_INT;
+}
+
+static int ad74115_get_adc_range(struct ad74115_state *st,
+				 enum ad74115_adc_ch channel, unsigned int *val)
+{
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_ADC_CONFIG_REG, val);
+	if (ret)
+		return ret;
+
+	if (channel == AD74115_ADC_CH_CONV1)
+		*val = FIELD_GET(AD74115_ADC_CONFIG_CONV1_RANGE_MASK, *val);
+	else
+		*val = FIELD_GET(AD74115_ADC_CONFIG_CONV2_RANGE_MASK, *val);
+
+	return 0;
+}
+
+static int ad74115_get_adc_resistance_scale(struct ad74115_state *st,
+					    unsigned int range,
+					    int *val, int *val2)
+{
+	*val = ad74115_adc_gain_tbl[range][1] * AD74115_REF_RESISTOR_OHMS;
+	*val2 = ad74115_adc_gain_tbl[range][0];
+
+	if (ad74115_adc_bipolar_tbl[range])
+		*val2 *= AD74115_ADC_CODE_HALF;
+	else
+		*val2 *= AD74115_ADC_CODE_MAX;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74115_get_adc_scale(struct ad74115_state *st,
+				 struct iio_chan_spec const *chan,
+				 int *val, int *val2)
+{
+	unsigned int range;
+	int ret;
+
+	ret = ad74115_get_adc_range(st, chan->channel, &range);
+	if (ret)
+		return ret;
+
+	if (chan->type == IIO_RESISTANCE)
+		return ad74115_get_adc_resistance_scale(st, range, val, val2);
+
+	*val = ad74115_adc_conv_mul_tbl[range];
+	*val2 = AD74115_ADC_CODE_MAX;
+
+	if (chan->type == IIO_CURRENT)
+		*val2 *= AD74115_SENSE_RESISTOR_OHMS;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74115_get_adc_resistance_offset(struct ad74115_state *st,
+					     unsigned int range,
+					     int *val, int *val2)
+{
+	unsigned int d = 10 * AD74115_REF_RESISTOR_OHMS
+			 * ad74115_adc_gain_tbl[range][1];
+
+	*val = 5;
+
+	if (ad74115_adc_bipolar_tbl[range])
+		*val -= AD74115_ADC_CODE_HALF;
+
+	*val *= d;
+
+	if (!st->rtd_mode_4_wire) {
+		/* Add 0.2 Ohm to the final result for 3-wire RTD. */
+		unsigned int v = 2 * ad74115_adc_gain_tbl[range][0];
+
+		if (ad74115_adc_bipolar_tbl[range])
+			v *= AD74115_ADC_CODE_HALF;
+		else
+			v *= AD74115_ADC_CODE_MAX;
+
+		*val += v;
+	}
+
+	*val2 = d;
+
+	return IIO_VAL_FRACTIONAL;
+}
+
+static int ad74115_get_adc_offset(struct ad74115_state *st,
+				  struct iio_chan_spec const *chan,
+				  int *val, int *val2)
+{
+	unsigned int range;
+	int ret;
+
+	ret = ad74115_get_adc_range(st, chan->channel, &range);
+	if (ret)
+		return ret;
+
+	if (chan->type == IIO_RESISTANCE)
+		return ad74115_get_adc_resistance_offset(st, range, val, val2);
+
+	if (ad74115_adc_bipolar_tbl[range])
+		*val = -AD74115_ADC_CODE_HALF;
+	else if (range == AD74115_ADC_RANGE_2_5V_NEG)
+		*val = -AD74115_ADC_CODE_MAX;
+	else
+		*val = 0;
+
+	return IIO_VAL_INT;
+}
+
+static int ad74115_read_raw(struct iio_dev *indio_dev,
+			    struct iio_chan_spec const *chan,
+			    int *val, int *val2, long info)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	int ret;
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		if (chan->output)
+			return ad74115_get_dac_code(st, chan->channel, val);
+
+		return ad74115_get_adc_code(indio_dev, chan->channel, val);
+	case IIO_CHAN_INFO_PROCESSED:
+		ret = ad74115_get_adc_code(indio_dev, chan->channel, val);
+		if (ret)
+			return ret;
+
+		return ad74115_adc_code_to_resistance(*val, val, val2);
+	case IIO_CHAN_INFO_SCALE:
+		if (chan->output)
+			return ad74115_get_dac_scale(st, chan, val, val2);
+
+		return ad74115_get_adc_scale(st, chan, val, val2);
+	case IIO_CHAN_INFO_OFFSET:
+		if (chan->output)
+			return ad74115_get_dac_offset(st, chan, val);
+
+		return ad74115_get_adc_offset(st, chan, val, val2);
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (chan->output)
+			return ad74115_get_dac_rate(st, val);
+
+		return ad74115_get_adc_rate(st, chan->channel, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad74115_write_raw(struct iio_dev *indio_dev,
+			     struct iio_chan_spec const *chan, int val, int val2,
+			     long info)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+
+	switch (info) {
+	case IIO_CHAN_INFO_RAW:
+		if (!chan->output)
+			return -EINVAL;
+
+		return ad74115_set_dac_code(st, chan->channel, val);
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (chan->output)
+			return ad74115_set_dac_rate(st, val);
+
+		return ad74115_set_adc_rate(st, chan->channel, val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad74115_read_avail(struct iio_dev *indio_dev,
+			      struct iio_chan_spec const *chan,
+			      const int **vals, int *type, int *length, long info)
+{
+	switch (info) {
+	case IIO_CHAN_INFO_SAMP_FREQ:
+		if (chan->output) {
+			*vals = ad74115_dac_rate_tbl;
+			*length = ARRAY_SIZE(ad74115_dac_rate_tbl);
+		} else {
+			*vals = ad74115_adc_conv_rate_tbl;
+			*length = ARRAY_SIZE(ad74115_adc_conv_rate_tbl);
+		}
+
+		*type = IIO_VAL_INT;
+
+		return IIO_AVAIL_LIST;
+	default:
+		return -EINVAL;
+	}
+}
+
+static int ad74115_reg_access(struct iio_dev *indio_dev, unsigned int reg,
+			      unsigned int writeval, unsigned int *readval)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+
+	if (readval)
+		return regmap_read(st->regmap, reg, readval);
+
+	return regmap_write(st->regmap, reg, writeval);
+}
+
+static const struct iio_info ad74115_info = {
+	.read_raw = ad74115_read_raw,
+	.write_raw = ad74115_write_raw,
+	.read_avail = ad74115_read_avail,
+	.update_scan_mode = ad74115_update_scan_mode,
+	.debugfs_reg_access = ad74115_reg_access,
+};
+
+#define AD74115_DAC_CHANNEL(_type, index)				\
+	{								\
+		.type = (_type),					\
+		.channel = (index),					\
+		.indexed = 1,						\
+		.output = 1,						\
+		.scan_index = -1,					\
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW)		\
+				      | BIT(IIO_CHAN_INFO_SCALE)	\
+				      | BIT(IIO_CHAN_INFO_OFFSET),	\
+	}
+
+#define _AD74115_ADC_CHANNEL(_type, index, extra_mask_separate)		\
+	{								\
+		.type = (_type),					\
+		.channel = (index),					\
+		.indexed = 1,						\
+		.info_mask_separate = BIT(IIO_CHAN_INFO_RAW)		\
+				      | BIT(IIO_CHAN_INFO_SAMP_FREQ)	\
+				      | (extra_mask_separate),		\
+		.info_mask_separate_available =				\
+					BIT(IIO_CHAN_INFO_SAMP_FREQ),	\
+		.scan_index = index,					\
+		.scan_type = {						\
+			.sign = 'u',					\
+			.realbits = 16,					\
+			.storagebits = 32,				\
+			.shift = 8,					\
+			.endianness = IIO_BE,				\
+		},							\
+	}
+
+#define AD74115_ADC_CHANNEL(_type, index)				\
+	_AD74115_ADC_CHANNEL(_type, index, BIT(IIO_CHAN_INFO_SCALE)	\
+					   | BIT(IIO_CHAN_INFO_OFFSET))
+
+static struct iio_chan_spec ad74115_voltage_input_channels[] = {
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_voltage_output_channels[] = {
+	AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_MAIN),
+	AD74115_ADC_CHANNEL(IIO_CURRENT, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_current_input_channels[] = {
+	AD74115_ADC_CHANNEL(IIO_CURRENT, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_current_output_channels[] = {
+	AD74115_DAC_CHANNEL(IIO_CURRENT, AD74115_DAC_CH_MAIN),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_2_wire_resistance_input_channels[] = {
+	_AD74115_ADC_CHANNEL(IIO_RESISTANCE, AD74115_ADC_CH_CONV1,
+			     BIT(IIO_CHAN_INFO_PROCESSED)),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_3_4_wire_resistance_input_channels[] = {
+	AD74115_ADC_CHANNEL(IIO_RESISTANCE, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_digital_input_logic_channels[] = {
+	AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_COMPARATOR),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+static struct iio_chan_spec ad74115_digital_input_loop_channels[] = {
+	AD74115_DAC_CHANNEL(IIO_CURRENT, AD74115_DAC_CH_MAIN),
+	AD74115_DAC_CHANNEL(IIO_VOLTAGE, AD74115_DAC_CH_COMPARATOR),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV1),
+	AD74115_ADC_CHANNEL(IIO_VOLTAGE, AD74115_ADC_CH_CONV2),
+};
+
+#define _AD74115_CHANNELS(_channels)			\
+	{						\
+		.channels = _channels,			\
+		.num_channels = ARRAY_SIZE(_channels),	\
+	}
+
+#define AD74115_CHANNELS(name) \
+	_AD74115_CHANNELS(ad74115_ ## name ## _channels)
+
+static const struct ad74115_channels ad74115_channels_map[AD74115_CH_FUNC_NUM] = {
+	[AD74115_CH_FUNC_HIGH_IMPEDANCE] = AD74115_CHANNELS(voltage_input),
+	[AD74115_CH_FUNC_VOLTAGE_INPUT] = AD74115_CHANNELS(voltage_input),
+
+	[AD74115_CH_FUNC_VOLTAGE_OUTPUT] = AD74115_CHANNELS(voltage_output),
+
+	[AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER] = AD74115_CHANNELS(current_input),
+	[AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER] = AD74115_CHANNELS(current_input),
+	[AD74115_CH_FUNC_CURRENT_INPUT_EXT_POWER_HART] = AD74115_CHANNELS(current_input),
+	[AD74115_CH_FUNC_CURRENT_INPUT_LOOP_POWER_HART] = AD74115_CHANNELS(current_input),
+
+	[AD74115_CH_FUNC_CURRENT_OUTPUT] = AD74115_CHANNELS(current_output),
+	[AD74115_CH_FUNC_CURRENT_OUTPUT_HART] = AD74115_CHANNELS(current_output),
+
+	[AD74115_CH_FUNC_2_WIRE_RESISTANCE_INPUT] = AD74115_CHANNELS(2_wire_resistance_input),
+	[AD74115_CH_FUNC_3_4_WIRE_RESISTANCE_INPUT] = AD74115_CHANNELS(3_4_wire_resistance_input),
+
+	[AD74115_CH_FUNC_DIGITAL_INPUT_LOGIC] = AD74115_CHANNELS(digital_input_logic),
+
+	[AD74115_CH_FUNC_DIGITAL_INPUT_LOOP_POWER] = AD74115_CHANNELS(digital_input_loop),
+};
+
+#define AD74115_GPIO_MODE_FW_PROP(i)					\
+{									\
+	.name = "adi,gpio" __stringify(i) "-mode",			\
+	.reg = AD74115_GPIO_CONFIG_X_REG(i),				\
+	.mask = AD74115_GPIO_CONFIG_SELECT_MASK,			\
+	.lookup_tbl = ad74115_gpio_mode_tbl,				\
+	.lookup_tbl_len = ARRAY_SIZE(ad74115_gpio_mode_tbl),		\
+}
+
+static const struct ad74115_fw_prop ad74115_gpio_mode_fw_props[] = {
+	AD74115_GPIO_MODE_FW_PROP(0),
+	AD74115_GPIO_MODE_FW_PROP(1),
+	AD74115_GPIO_MODE_FW_PROP(2),
+	AD74115_GPIO_MODE_FW_PROP(3),
+};
+
+static const struct ad74115_fw_prop ad74115_din_threshold_mode_fw_prop =
+	AD74115_FW_PROP_BOOL("adi,digital-input-threshold-mode-fixed",
+			     AD74115_DIN_CONFIG2_REG, BIT(7));
+
+static const struct ad74115_fw_prop ad74115_dac_bipolar_fw_prop =
+	AD74115_FW_PROP_BOOL("adi,dac-bipolar", AD74115_OUTPUT_CONFIG_REG, BIT(7));
+
+static const struct ad74115_fw_prop ad74115_ch_func_fw_prop =
+	AD74115_FW_PROP("adi,ch-func", AD74115_CH_FUNC_MAX,
+			AD74115_CH_FUNC_SETUP_REG, GENMASK(3, 0));
+
+static const struct ad74115_fw_prop ad74115_rtd_mode_fw_prop =
+	AD74115_FW_PROP_BOOL("adi,4-wire-rtd", AD74115_RTD3W4W_CONFIG_REG, BIT(3));
+
+static const struct ad74115_fw_prop ad74115_din_range_fw_prop =
+	AD74115_FW_PROP_BOOL("adi,digital-input-sink-range-high",
+			     AD74115_DIN_CONFIG1_REG, BIT(12));
+
+static const struct ad74115_fw_prop ad74115_ext2_burnout_current_fw_prop =
+	AD74115_FW_PROP_TBL("adi,ext2-burnout-current-nanoamp",
+			    ad74115_burnout_current_na_tbl,
+			    AD74115_BURNOUT_CONFIG_REG, GENMASK(14, 12));
+
+static const struct ad74115_fw_prop ad74115_ext1_burnout_current_fw_prop =
+	AD74115_FW_PROP_TBL("adi,ext1-burnout-current-nanoamp",
+			    ad74115_burnout_current_na_tbl,
+			    AD74115_BURNOUT_CONFIG_REG, GENMASK(9, 7));
+
+static const struct ad74115_fw_prop ad74115_viout_burnout_current_fw_prop =
+	AD74115_FW_PROP_TBL("adi,viout-burnout-current-nanoamp",
+			    ad74115_viout_burnout_current_na_tbl,
+			    AD74115_BURNOUT_CONFIG_REG, GENMASK(4, 2));
+
+static const struct ad74115_fw_prop ad74115_fw_props[] = {
+	AD74115_FW_PROP("adi,conv2-mux", 3,
+			AD74115_ADC_CONFIG_REG, GENMASK(3, 2)),
+
+	AD74115_FW_PROP_BOOL_NEG("adi,sense-agnd-buffer-low-power",
+				 AD74115_PWR_OPTIM_CONFIG_REG, BIT(4)),
+	AD74115_FW_PROP_BOOL_NEG("adi,lf-buffer-low-power",
+				 AD74115_PWR_OPTIM_CONFIG_REG, BIT(3)),
+	AD74115_FW_PROP_BOOL_NEG("adi,hf-buffer-low-power",
+				 AD74115_PWR_OPTIM_CONFIG_REG, BIT(2)),
+	AD74115_FW_PROP_BOOL_NEG("adi,ext2-buffer-low-power",
+				 AD74115_PWR_OPTIM_CONFIG_REG, BIT(1)),
+	AD74115_FW_PROP_BOOL_NEG("adi,ext1-buffer-low-power",
+				 AD74115_PWR_OPTIM_CONFIG_REG, BIT(0)),
+
+	AD74115_FW_PROP_BOOL("adi,comparator-invert",
+			     AD74115_DIN_CONFIG1_REG, BIT(14)),
+	AD74115_FW_PROP_BOOL("adi,digital-input-debounce-mode-counter-reset",
+			     AD74115_DIN_CONFIG1_REG, BIT(6)),
+
+	AD74115_FW_PROP_BOOL("adi,digital-input-unbuffered",
+			     AD74115_DIN_CONFIG2_REG, BIT(10)),
+	AD74115_FW_PROP_BOOL("adi,digital-input-short-circuit-detection",
+			     AD74115_DIN_CONFIG2_REG, BIT(9)),
+	AD74115_FW_PROP_BOOL("adi,digital-input-open-circuit-detection",
+			     AD74115_DIN_CONFIG2_REG, BIT(8)),
+
+	AD74115_FW_PROP_BOOL("adi,dac-current-limit-low",
+			     AD74115_OUTPUT_CONFIG_REG, BIT(0)),
+
+	AD74115_FW_PROP_BOOL("adi,3-wire-rtd-excitation-swap",
+			     AD74115_RTD3W4W_CONFIG_REG, BIT(2)),
+	AD74115_FW_PROP_TBL("adi,rtd-excitation-current-microamp",
+			    ad74115_rtd_excitation_current_ua_tbl,
+			    AD74115_RTD3W4W_CONFIG_REG, GENMASK(1, 0)),
+
+	AD74115_FW_PROP_BOOL("adi,ext2-burnout-current-polarity-sourcing",
+			     AD74115_BURNOUT_CONFIG_REG, BIT(11)),
+	AD74115_FW_PROP_BOOL("adi,ext1-burnout-current-polarity-sourcing",
+			     AD74115_BURNOUT_CONFIG_REG, BIT(6)),
+	AD74115_FW_PROP_BOOL("adi,viout-burnout-current-polarity-sourcing",
+			     AD74115_BURNOUT_CONFIG_REG, BIT(1)),
+
+	AD74115_FW_PROP_BOOL("adi,charge-pump",
+			     AD74115_CHARGE_PUMP_REG, BIT(0)),
+};
+
+static int ad74115_apply_fw_prop(struct ad74115_state *st,
+				 const struct ad74115_fw_prop *prop, u32 *retval)
+{
+	struct device *dev = &st->spi->dev;
+	u32 val = 0;
+	int ret;
+
+	if (prop->is_boolean) {
+		val = device_property_read_bool(dev, prop->name);
+	} else {
+		ret = device_property_read_u32(dev, prop->name, &val);
+		if (ret && prop->lookup_tbl)
+			val = prop->lookup_tbl[0];
+	}
+
+	*retval = val;
+
+	if (prop->negate)
+		val = !val;
+
+	if (prop->lookup_tbl)
+		ret = _ad74115_find_tbl_index(prop->lookup_tbl,
+					      prop->lookup_tbl_len, val, &val);
+	else if (prop->max && val > prop->max)
+		ret = -EINVAL;
+	else
+		ret = 0;
+
+	if (ret)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid value %u for prop %s\n",
+				     val, prop->name);
+
+	WARN(!prop->mask, "Prop %s mask is empty\n", prop->name);
+
+	val = (val << __ffs(prop->mask)) & prop->mask;
+
+	return regmap_update_bits(st->regmap, prop->reg, prop->mask, val);
+}
+
+static int ad74115_setup_adc_conv2_range(struct ad74115_state *st)
+{
+	unsigned int tbl_len = ARRAY_SIZE(ad74115_adc_range_tbl);
+	const char *prop_name = "adi,conv2-range-microvolt";
+	s32 vals[2] = {
+		ad74115_adc_range_tbl[0][0],
+		ad74115_adc_range_tbl[0][1],
+	};
+	struct device *dev = &st->spi->dev;
+	unsigned int i;
+
+	device_property_read_u32_array(dev, prop_name, vals, 2);
+
+	for (i = 0; i < tbl_len; i++)
+		if (vals[0] == ad74115_adc_range_tbl[i][0] &&
+		    vals[1] == ad74115_adc_range_tbl[i][1])
+			break;
+
+	if (i == tbl_len)
+		return dev_err_probe(dev, -EINVAL,
+				     "Invalid value %d, %d for prop %s\n",
+				     vals[0], vals[1], prop_name);
+
+	return regmap_update_bits(st->regmap, AD74115_ADC_CONFIG_REG,
+				  AD74115_ADC_CONFIG_CONV2_RANGE_MASK,
+				  FIELD_PREP(AD74115_ADC_CONFIG_CONV2_RANGE_MASK, i));
+}
+
+static int ad74115_setup_iio_channels(struct iio_dev *indio_dev)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	struct iio_chan_spec *channels;
+
+	channels = devm_kcalloc(dev, sizeof(*channels),
+				indio_dev->num_channels, GFP_KERNEL);
+	if (!channels)
+		return -ENOMEM;
+
+	indio_dev->channels = channels;
+
+	memcpy(channels, ad74115_channels_map[st->ch_func].channels,
+	       sizeof(*channels) * ad74115_channels_map[st->ch_func].num_channels);
+
+	if (channels[0].output && channels[0].channel == AD74115_DAC_CH_MAIN &&
+	    channels[0].type == IIO_VOLTAGE && !st->dac_hart_slew) {
+		channels[0].info_mask_separate |= BIT(IIO_CHAN_INFO_SAMP_FREQ);
+		channels[0].info_mask_separate_available |= BIT(IIO_CHAN_INFO_SAMP_FREQ);
+	}
+
+	return 0;
+}
+
+static int ad74115_setup_gpio_chip(struct ad74115_state *st)
+{
+	struct device *dev = &st->spi->dev;
+
+	if (!st->gpio_valid_mask)
+		return 0;
+
+	st->gc = (struct gpio_chip) {
+		.owner = THIS_MODULE,
+		.label = AD74115_NAME,
+		.base = -1,
+		.ngpio = AD74115_GPIO_NUM,
+		.parent = dev,
+		.can_sleep = true,
+		.init_valid_mask = ad74115_gpio_init_valid_mask,
+		.get_direction = ad74115_gpio_get_direction,
+		.direction_input = ad74115_gpio_direction_input,
+		.direction_output = ad74115_gpio_direction_output,
+		.get = ad74115_gpio_get,
+		.set = ad74115_gpio_set,
+	};
+
+	return devm_gpiochip_add_data(dev, &st->gc, st);
+}
+
+static int ad74115_setup_comp_gpio_chip(struct ad74115_state *st)
+{
+	struct device *dev = &st->spi->dev;
+	u32 val;
+	int ret;
+
+	ret = regmap_read(st->regmap, AD74115_DIN_CONFIG1_REG, &val);
+	if (ret)
+		return ret;
+
+	if (!(val & AD74115_DIN_COMPARATOR_EN_MASK))
+		return 0;
+
+	st->comp_gc = (struct gpio_chip) {
+		.owner = THIS_MODULE,
+		.label = AD74115_NAME,
+		.base = -1,
+		.ngpio = 1,
+		.parent = dev,
+		.can_sleep = true,
+		.get_direction = ad74115_comp_gpio_get_direction,
+		.get = ad74115_comp_gpio_get,
+		.set_config = ad74115_comp_gpio_set_config,
+	};
+
+	return devm_gpiochip_add_data(dev, &st->comp_gc, st);
+}
+
+static int ad74115_setup(struct iio_dev *indio_dev)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	u32 val, din_range_high;
+	unsigned int i;
+	int ret;
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_ch_func_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	indio_dev->num_channels += ad74115_channels_map[val].num_channels;
+	st->ch_func = val;
+
+	ret = ad74115_setup_adc_conv2_range(st);
+	if (ret)
+		return ret;
+
+	val = device_property_read_bool(dev, "adi,dac-hart-slew");
+	if (val) {
+		st->dac_hart_slew = val;
+
+		ret = regmap_update_bits(st->regmap, AD74115_OUTPUT_CONFIG_REG,
+					 AD74115_OUTPUT_SLEW_EN_MASK,
+					 FIELD_PREP(AD74115_OUTPUT_SLEW_EN_MASK,
+						    AD74115_SLEW_MODE_HART));
+		if (ret)
+			return ret;
+	}
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_din_range_fw_prop,
+				    &din_range_high);
+	if (ret)
+		return ret;
+
+	ret = device_property_read_u32(dev, "adi,digital-input-sink-microamp", &val);
+	if (!ret) {
+		if (din_range_high)
+			val = DIV_ROUND_CLOSEST(val, AD74115_DIN_SINK_LOW_STEP);
+		else
+			val = DIV_ROUND_CLOSEST(val, AD74115_DIN_SINK_HIGH_STEP);
+
+		if (val > AD74115_DIN_SINK_MAX)
+			val = AD74115_DIN_SINK_MAX;
+
+		ret = regmap_update_bits(st->regmap, AD74115_DIN_CONFIG1_REG,
+					 AD74115_DIN_SINK_MASK,
+					 FIELD_PREP(AD74115_DIN_SINK_MASK, val));
+		if (ret)
+			return ret;
+	}
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_din_threshold_mode_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	if (val == AD74115_DIN_THRESHOLD_MODE_AVDD) {
+		ret = regulator_get_voltage(st->avdd);
+		if (ret < 0)
+			return ret;
+
+		st->avdd_mv = ret / 1000;
+	}
+
+	st->din_threshold_mode = val;
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_dac_bipolar_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	st->dac_bipolar = val;
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_rtd_mode_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	st->rtd_mode_4_wire = val;
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_ext2_burnout_current_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG,
+					 AD74115_BURNOUT_EXT2_EN_MASK,
+					 FIELD_PREP(AD74115_BURNOUT_EXT2_EN_MASK, 1));
+		if (ret)
+			return ret;
+	}
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_ext1_burnout_current_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG,
+					 AD74115_BURNOUT_EXT1_EN_MASK,
+					 FIELD_PREP(AD74115_BURNOUT_EXT1_EN_MASK, 1));
+		if (ret)
+			return ret;
+	}
+
+	ret = ad74115_apply_fw_prop(st, &ad74115_viout_burnout_current_fw_prop, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = regmap_update_bits(st->regmap, AD74115_BURNOUT_CONFIG_REG,
+					 AD74115_BURNOUT_VIOUT_EN_MASK,
+					 FIELD_PREP(AD74115_BURNOUT_VIOUT_EN_MASK, 1));
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < AD74115_GPIO_NUM; i++) {
+		ret = ad74115_apply_fw_prop(st, &ad74115_gpio_mode_fw_props[i], &val);
+		if (ret)
+			return ret;
+
+		if (val == AD74115_GPIO_MODE_LOGIC)
+			st->gpio_valid_mask |= BIT(i);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(ad74115_fw_props); i++) {
+		ret = ad74115_apply_fw_prop(st, &ad74115_fw_props[i], &val);
+		if (ret)
+			return ret;
+	}
+
+	ret = ad74115_setup_gpio_chip(st);
+	if (ret)
+		return ret;
+
+	ret = ad74115_setup_comp_gpio_chip(st);
+	if (ret)
+		return ret;
+
+	return ad74115_setup_iio_channels(indio_dev);
+}
+
+static int ad74115_reset(struct ad74115_state *st)
+{
+	struct device *dev = &st->spi->dev;
+	struct gpio_desc *reset_gpio;
+	int ret;
+
+	reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(reset_gpio))
+		return dev_err_probe(dev, PTR_ERR(reset_gpio),
+				     "Failed to find reset GPIO\n");
+
+	if (reset_gpio) {
+		fsleep(100);
+
+		gpiod_set_value_cansleep(reset_gpio, 0);
+	} else {
+		ret = regmap_write(st->regmap, AD74115_CMD_KEY_REG,
+				   AD74115_CMD_KEY_RESET1);
+		if (ret)
+			return ret;
+
+		ret = regmap_write(st->regmap, AD74115_CMD_KEY_REG,
+				   AD74115_CMD_KEY_RESET2);
+		if (ret)
+			return ret;
+	}
+
+	fsleep(1000);
+
+	return 0;
+}
+
+static void ad74115_regulator_disable(void *data)
+{
+	regulator_disable(data);
+}
+
+static int ad74115_setup_trigger(struct iio_dev *indio_dev)
+{
+	struct ad74115_state *st = iio_priv(indio_dev);
+	struct device *dev = &st->spi->dev;
+	int ret;
+
+	st->irq = fwnode_irq_get_byname(dev_fwnode(dev), "adc_rdy");
+
+	if (st->irq == -EPROBE_DEFER)
+		return -EPROBE_DEFER;
+
+	if (st->irq < 0) {
+		st->irq = 0;
+		return 0;
+	}
+
+	ret = devm_request_irq(dev, st->irq, ad74115_adc_data_interrupt,
+			       0, AD74115_NAME, indio_dev);
+	if (ret)
+		return ret;
+
+	st->trig = devm_iio_trigger_alloc(dev, "%s-dev%d", AD74115_NAME,
+					  iio_device_id(indio_dev));
+	if (!st->trig)
+		return -ENOMEM;
+
+	st->trig->ops = &ad74115_trigger_ops;
+	iio_trigger_set_drvdata(st->trig, st);
+
+	ret = devm_iio_trigger_register(dev, st->trig);
+	if (ret)
+		return ret;
+
+	indio_dev->trig = iio_trigger_get(st->trig);
+
+	return 0;
+}
+
+static int ad74115_probe(struct spi_device *spi)
+{
+	static const char * const regulator_names[] = {
+		"avcc", "dvcc", "dovdd", "refin",
+	};
+	struct device *dev = &spi->dev;
+	struct ad74115_state *st;
+	struct iio_dev *indio_dev;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(dev, sizeof(*st));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	st = iio_priv(indio_dev);
+
+	st->spi = spi;
+	mutex_init(&st->lock);
+	init_completion(&st->adc_data_completion);
+
+	indio_dev->name = AD74115_NAME;
+	indio_dev->modes = INDIO_DIRECT_MODE;
+	indio_dev->info = &ad74115_info;
+
+	st->avdd = devm_regulator_get(dev, "avdd");
+	if (IS_ERR(st->avdd))
+		return PTR_ERR(st->avdd);
+
+	ret = regulator_enable(st->avdd);
+	if (ret) {
+		dev_err(dev, "Failed to enable avdd regulator\n");
+		return ret;
+	}
+
+	ret = devm_add_action_or_reset(dev, ad74115_regulator_disable, st->avdd);
+	if (ret)
+		return ret;
+
+	ret = devm_regulator_bulk_get_enable(dev, ARRAY_SIZE(regulator_names),
+					     regulator_names);
+	if (ret)
+		return ret;
+
+	st->regmap = devm_regmap_init(dev, NULL, st, &ad74115_regmap_config);
+	if (IS_ERR(st->regmap))
+		return PTR_ERR(st->regmap);
+
+	ret = ad74115_reset(st);
+	if (ret)
+		return ret;
+
+	ret = ad74115_setup(indio_dev);
+	if (ret)
+		return ret;
+
+	ret = ad74115_setup_trigger(indio_dev);
+	if (ret)
+		return ret;
+
+	ret = devm_iio_triggered_buffer_setup(dev, indio_dev, NULL,
+					      ad74115_trigger_handler,
+					      &ad74115_buffer_ops);
+	if (ret)
+		return ret;
+
+	return devm_iio_device_register(dev, indio_dev);
+}
+
+static int ad74115_unregister_driver(struct spi_driver *spi)
+{
+	spi_unregister_driver(spi);
+
+	return 0;
+}
+
+static int __init ad74115_register_driver(struct spi_driver *spi)
+{
+	crc8_populate_msb(ad74115_crc8_table, AD74115_CRC_POLYNOMIAL);
+
+	return spi_register_driver(spi);
+}
+
+static const struct spi_device_id ad74115_spi_id[] = {
+	{ "ad74115h" },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(spi, ad74115_spi_id);
+
+static const struct of_device_id ad74115_dt_id[] = {
+	{ .compatible = "adi,ad74115h" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ad74115_dt_id);
+
+static struct spi_driver ad74115_driver = {
+	.driver = {
+		   .name = "ad74115",
+		   .of_match_table = ad74115_dt_id,
+	},
+	.probe = ad74115_probe,
+	.id_table = ad74115_spi_id,
+};
+
+module_driver(ad74115_driver,
+	      ad74115_register_driver, ad74115_unregister_driver);
+
+MODULE_AUTHOR("Cosmin Tanislav <cosmin.tanislav@analog.com>");
+MODULE_DESCRIPTION("Analog Devices AD74115 ADDAC");
+MODULE_LICENSE("GPL");
-- 
GitLab


From f35e1ee9cb5d617efeb0f3695ff65169eb2b9cdd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 16 Nov 2022 14:40:03 +0100
Subject: [PATCH 2537/4122] iio: imu: st_lsm6dsx: add support to LSM6DSO16IS

Add support to STM LSM6DSO16IS (accelerometer and gyroscope) Mems sensor.

Datasheet: https://www.st.com/resource/en/datasheet/lsm6dso16is.pdf
Tested-by: Mario Tesi <mario.tesi@st.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/df6a9d4653cd69f7204190f8b6a9b618fd48bd23.1668605631.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/Kconfig           |   4 +-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h      |   2 +
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 128 ++++++++++++++++++-
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c  |   5 +
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c  |   5 +
 5 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig
index 37d02e0fc227a..0096035728cd4 100644
--- a/drivers/iio/imu/st_lsm6dsx/Kconfig
+++ b/drivers/iio/imu/st_lsm6dsx/Kconfig
@@ -13,8 +13,8 @@ config IIO_ST_LSM6DSX
 	  sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm,
 	  ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr,
 	  lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx,
-	  lsm6dsv, lsm6dsv16x, the accelerometer/gyroscope of lsm9ds1
-	  and lsm6dst.
+	  lsm6dsv, lsm6dsv16x, lsm6dso16is, the accelerometer/gyroscope
+	  of lsm9ds1 and lsm6dst.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called st_lsm6dsx.
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index 683cfadcf62ef..abf14a2ce0e95 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -35,6 +35,7 @@
 #define ST_LSM6DSTX_DEV_NAME	"lsm6dstx"
 #define ST_LSM6DSV_DEV_NAME	"lsm6dsv"
 #define ST_LSM6DSV16X_DEV_NAME	"lsm6dsv16x"
+#define ST_LSM6DSO16IS_DEV_NAME	"lsm6dso16is"
 
 enum st_lsm6dsx_hw_id {
 	ST_LSM6DS3_ID,
@@ -57,6 +58,7 @@ enum st_lsm6dsx_hw_id {
 	ST_LSM6DSTX_ID,
 	ST_LSM6DSV_ID,
 	ST_LSM6DSV16X_ID,
+	ST_LSM6DSO16IS_ID,
 	ST_LSM6DSX_MAX_ID,
 };
 
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index b680682f98330..57a79bf35bbaa 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -27,7 +27,7 @@
  *   - FIFO size: 4KB
  *
  * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/
- *   LSM6DSTX:
+ *   LSM6DSTX/LSM6DSO16IS:
  *   - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416,
  *     833
  *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
@@ -1375,6 +1375,132 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
 			.wakeup_src_x_mask = BIT(2),
 		},
 	},
+	{
+		.reset = {
+			.addr = 0x12,
+			.mask = BIT(0),
+		},
+		.boot = {
+			.addr = 0x12,
+			.mask = BIT(7),
+		},
+		.bdu = {
+			.addr = 0x12,
+			.mask = BIT(6),
+		},
+		.id = {
+			{
+				.hw_id = ST_LSM6DSO16IS_ID,
+				.name = ST_LSM6DSO16IS_DEV_NAME,
+				.wai = 0x22,
+			},
+		},
+		.channels = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.chan = st_lsm6dsx_acc_channels,
+				.len = ARRAY_SIZE(st_lsm6dsx_acc_channels),
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.chan = st_lsm6dsx_gyro_channels,
+				.len = ARRAY_SIZE(st_lsm6dsx_gyro_channels),
+			},
+		},
+		.odr_table = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.reg = {
+					.addr = 0x10,
+					.mask = GENMASK(7, 4),
+				},
+				.odr_avl[0] = {  12500, 0x01 },
+				.odr_avl[1] = {  26000, 0x02 },
+				.odr_avl[2] = {  52000, 0x03 },
+				.odr_avl[3] = { 104000, 0x04 },
+				.odr_avl[4] = { 208000, 0x05 },
+				.odr_avl[5] = { 416000, 0x06 },
+				.odr_avl[6] = { 833000, 0x07 },
+				.odr_len = 7,
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.reg = {
+					.addr = 0x11,
+					.mask = GENMASK(7, 4),
+				},
+				.odr_avl[0] = {  12500, 0x01 },
+				.odr_avl[1] = {  26000, 0x02 },
+				.odr_avl[2] = {  52000, 0x03 },
+				.odr_avl[3] = { 104000, 0x04 },
+				.odr_avl[4] = { 208000, 0x05 },
+				.odr_avl[5] = { 416000, 0x06 },
+				.odr_avl[6] = { 833000, 0x07 },
+				.odr_len = 7,
+			},
+		},
+		.fs_table = {
+			[ST_LSM6DSX_ID_ACC] = {
+				.reg = {
+					.addr = 0x10,
+					.mask = GENMASK(3, 2),
+				},
+				.fs_avl[0] = {  IIO_G_TO_M_S_2(61000), 0x0 },
+				.fs_avl[1] = { IIO_G_TO_M_S_2(122000), 0x2 },
+				.fs_avl[2] = { IIO_G_TO_M_S_2(244000), 0x3 },
+				.fs_avl[3] = { IIO_G_TO_M_S_2(488000), 0x1 },
+				.fs_len = 4,
+			},
+			[ST_LSM6DSX_ID_GYRO] = {
+				.reg = {
+					.addr = 0x11,
+					.mask = GENMASK(3, 2),
+				},
+				.fs_avl[0] = {  IIO_DEGREE_TO_RAD(8750000), 0x0 },
+				.fs_avl[1] = { IIO_DEGREE_TO_RAD(17500000), 0x1 },
+				.fs_avl[2] = { IIO_DEGREE_TO_RAD(35000000), 0x2 },
+				.fs_avl[3] = { IIO_DEGREE_TO_RAD(70000000), 0x3 },
+				.fs_len = 4,
+			},
+		},
+		.irq_config = {
+			.hla = {
+				.addr = 0x12,
+				.mask = BIT(5),
+			},
+			.od = {
+				.addr = 0x12,
+				.mask = BIT(4),
+			},
+		},
+		.shub_settings = {
+			.page_mux = {
+				.addr = 0x01,
+				.mask = BIT(6),
+			},
+			.master_en = {
+				.sec_page = true,
+				.addr = 0x14,
+				.mask = BIT(2),
+			},
+			.pullup_en = {
+				.sec_page = true,
+				.addr = 0x14,
+				.mask = BIT(3),
+			},
+			.aux_sens = {
+				.addr = 0x14,
+				.mask = GENMASK(1, 0),
+			},
+			.wr_once = {
+				.addr = 0x14,
+				.mask = BIT(6),
+			},
+			.num_ext_dev = 3,
+			.shub_out = {
+				.sec_page = true,
+				.addr = 0x02,
+			},
+			.slv0_addr = 0x15,
+			.dw_slv0_addr = 0x21,
+		},
+	},
 };
 
 int st_lsm6dsx_set_page(struct st_lsm6dsx_hw *hw, bool enable)
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
index 3570fac1b6125..c34ccc85e4c71 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
@@ -117,6 +117,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = {
 		.compatible = "st,lsm6dsv16x",
 		.data = (void *)ST_LSM6DSV16X_ID,
 	},
+	{
+		.compatible = "st,lsm6dso16is",
+		.data = (void *)ST_LSM6DSO16IS_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match);
@@ -142,6 +146,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = {
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
+	{ ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
index 57597aaa2a92a..24d5e51a8662e 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
@@ -117,6 +117,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = {
 		.compatible = "st,lsm6dsv16x",
 		.data = (void *)ST_LSM6DSV16X_ID,
 	},
+	{
+		.compatible = "st,lsm6dso16is",
+		.data = (void *)ST_LSM6DSO16IS_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match);
@@ -142,6 +146,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = {
 	{ ST_LSM6DSTX_DEV_NAME, ST_LSM6DSTX_ID },
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
+	{ ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table);
-- 
GitLab


From 1842fff0f7b603e587e0fe45804d9ebfba329e38 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 16 Nov 2022 14:40:04 +0100
Subject: [PATCH 2538/4122] dt-bindings: iio: imu: st_lsm6dsx: add lsm6dso16is

Add device bindings for lsm6dso16is IMU sensor.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/55b15b3e8453a12edcf8195ef9c9243a76f87096.1668605631.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
index 5933270799f55..07d5aee7e4421 100644
--- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
@@ -33,6 +33,7 @@ properties:
           - st,lsm6dst
           - st,lsm6dsop
           - st,lsm6dsv
+          - st,lsm6dso16is
       - items:
           - const: st,asm330lhhx
           - const: st,lsm6dsr
-- 
GitLab


From 58ae95f9671ddf98082e021ee42643d126912e71 Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca@z3ntu.xyz>
Date: Mon, 31 Oct 2022 19:24:54 +0100
Subject: [PATCH 2539/4122] dt-bindings: iio/adc: qcom,spmi-iadc: use double
 compatibles

As in other bindings, let's use specific compatibles together with the
fallback compatible. Adjust the bindings for it.

Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221031182456.952648-1-luca@z3ntu.xyz
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml        | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml
index 2a94db6888308..fa855baa368c3 100644
--- a/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/qcom,spmi-iadc.yaml
@@ -18,7 +18,10 @@ description: |
 
 properties:
   compatible:
-    const: qcom,spmi-iadc
+    items:
+      - enum:
+          - qcom,pm8941-iadc
+      - const: qcom,spmi-iadc
 
   reg:
     description: IADC base address and length in the SPMI PMIC register map
@@ -50,7 +53,7 @@ examples:
         #address-cells = <1>;
         #size-cells = <0>;
         pmic_iadc: adc@3600 {
-            compatible = "qcom,spmi-iadc";
+            compatible = "qcom,pm8941-iadc", "qcom,spmi-iadc";
             reg = <0x3600>;
             interrupts = <0x0 0x36 0x0 IRQ_TYPE_EDGE_RISING>;
             qcom,external-resistor-micro-ohms = <10000>;
-- 
GitLab


From 7b2366008125b0849dcbd18afbcb33cbc30c3477 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 15 Nov 2022 10:55:15 +0100
Subject: [PATCH 2540/4122] iio: addac: ad74413r: add spi_device_id table

Silence the run-time warning

  SPI driver ad74413r has no spi_device_id for adi,ad74412r

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/r/20221115095517.1008632-2-linux@rasmusvillemoes.dk
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/addac/ad74413r.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index e0e130ba9d3ec..29b3a5775f236 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -1305,6 +1305,15 @@ static int ad74413r_probe(struct spi_device *spi)
 	st->spi = spi;
 	st->dev = &spi->dev;
 	st->chip_info = device_get_match_data(&spi->dev);
+	if (!st->chip_info) {
+		const struct spi_device_id *id = spi_get_device_id(spi);
+		if (id)
+			st->chip_info =
+				(struct ad74413r_chip_info *)id->driver_data;
+		if (!st->chip_info)
+			return -EINVAL;
+	}
+
 	mutex_init(&st->lock);
 	init_completion(&st->adc_data_completion);
 
@@ -1457,12 +1466,20 @@ static const struct of_device_id ad74413r_dt_id[] = {
 };
 MODULE_DEVICE_TABLE(of, ad74413r_dt_id);
 
+static const struct spi_device_id ad74413r_spi_id[] = {
+	{ .name = "ad74412r", .driver_data = (kernel_ulong_t)&ad74412r_chip_info_data },
+	{ .name = "ad74413r", .driver_data = (kernel_ulong_t)&ad74413r_chip_info_data },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, ad74413r_spi_id);
+
 static struct spi_driver ad74413r_driver = {
 	.driver = {
 		   .name = "ad74413r",
 		   .of_match_table = ad74413r_dt_id,
 	},
 	.probe = ad74413r_probe,
+	.id_table = ad74413r_spi_id,
 };
 
 module_driver(ad74413r_driver,
-- 
GitLab


From 6aaf7045697aa93589bea1b33e751814b0776991 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 15 Nov 2022 10:55:16 +0100
Subject: [PATCH 2541/4122] dt-bindings: iio: ad74413r: add optional
 reset-gpios

The ad74412 and ad74413 devices have an active-low reset pin. Add a
binding allowing one to specify a gpio tied to that.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221115095517.1008632-3-linux@rasmusvillemoes.dk
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
index 58b3ae14ccaa6..9eb3ecc8bbc87 100644
--- a/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
+++ b/Documentation/devicetree/bindings/iio/addac/adi,ad74413r.yaml
@@ -51,6 +51,9 @@ properties:
       Shunt (sense) resistor value in micro-Ohms.
     default: 100000000
 
+  reset-gpios:
+    maxItems: 1
+
 required:
   - compatible
   - reg
@@ -129,6 +132,7 @@ examples:
         interrupts = <26 IRQ_TYPE_EDGE_FALLING>;
 
         refin-supply = <&ad74413r_refin>;
+        reset-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
 
         channel@0 {
           reg = <0>;
-- 
GitLab


From f237cf1914e24ebba88670ecc36e8209c888c9a2 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 15 Nov 2022 10:55:17 +0100
Subject: [PATCH 2542/4122] iio: addac: ad74413r: add support for reset-gpio

We have a board where the reset pin of the ad74412 is connected to a
gpio, but also pulled low (i.e., asserted) by default. Hence to get
the chip out of reset, the driver needs to know about that gpio and
deassert the reset signal before attempting to communicate with the
chip.

When a reset-gpio is given in device tree, use that instead of the
software reset. According to the data sheet, the two methods are
functionally equivalent.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Link: https://lore.kernel.org/r/20221115095517.1008632-4-linux@rasmusvillemoes.dk
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/addac/ad74413r.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index 29b3a5775f236..61030053cbeaa 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -71,6 +71,7 @@ struct ad74413r_state {
 	struct regmap			*regmap;
 	struct device			*dev;
 	struct iio_trigger		*trig;
+	struct gpio_desc		*reset_gpio;
 
 	size_t			adc_active_channels;
 	struct spi_message	adc_samples_msg;
@@ -393,6 +394,13 @@ static int ad74413r_reset(struct ad74413r_state *st)
 {
 	int ret;
 
+	if (st->reset_gpio) {
+		gpiod_set_value_cansleep(st->reset_gpio, 1);
+		fsleep(50);
+		gpiod_set_value_cansleep(st->reset_gpio, 0);
+		return 0;
+	}
+
 	ret = regmap_write(st->regmap, AD74413R_REG_CMD_KEY,
 			   AD74413R_CMD_KEY_RESET1);
 	if (ret)
@@ -1322,6 +1330,10 @@ static int ad74413r_probe(struct spi_device *spi)
 	if (IS_ERR(st->regmap))
 		return PTR_ERR(st->regmap);
 
+	st->reset_gpio = devm_gpiod_get_optional(st->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(st->reset_gpio))
+		return PTR_ERR(st->reset_gpio);
+
 	st->refin_reg = devm_regulator_get(st->dev, "refin");
 	if (IS_ERR(st->refin_reg))
 		return dev_err_probe(st->dev, PTR_ERR(st->refin_reg),
-- 
GitLab


From 9d901e356c8d3640940cb72ff7969f03e85c2e95 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 15 Nov 2022 11:31:21 +0100
Subject: [PATCH 2543/4122] iio: adc: stm32-adc: smart calibration support

Add smart calibration support for STM32MP1.
- STM32MP15x: both linear & offset calibration are supported
- STM32MP13x: Only offset calibration is supported

Linear calibration:
Linear calibration is SoC dependent and does not change over time
so it can be done only once.
Linear calibration may have already been done in u-boot.
Skip calibration execution if calibration data are already available.
Save calibration factors in private data and restore them from private
data on next ADC start.

Offset calibration:
This calibration may vary over time, depending on temperature or voltage.
Run offset single-ended and differential calibration on each ADC start,
as it is not time consuming. This calibration do not need to be saved.
So, remove calfact_s and calfact_d value and bitfields that are no
longer used.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20221115103124.70074-2-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc-core.h |   1 +
 drivers/iio/adc/stm32-adc.c      | 108 +++++++++++++++----------------
 2 files changed, 53 insertions(+), 56 deletions(-)

diff --git a/drivers/iio/adc/stm32-adc-core.h b/drivers/iio/adc/stm32-adc-core.h
index 9d6dfa1c03faf..73b2c2e91c088 100644
--- a/drivers/iio/adc/stm32-adc-core.h
+++ b/drivers/iio/adc/stm32-adc-core.h
@@ -142,6 +142,7 @@
 #define STM32H7_LINCALRDYW3		BIT(24)
 #define STM32H7_LINCALRDYW2		BIT(23)
 #define STM32H7_LINCALRDYW1		BIT(22)
+#define STM32H7_LINCALRDYW_MASK		GENMASK(27, 22)
 #define STM32H7_ADCALLIN		BIT(16)
 #define STM32H7_BOOST			BIT(8)
 #define STM32H7_ADSTP			BIT(4)
diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 8d03d21a33d60..2b2b55eb130ed 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -119,16 +119,12 @@ struct stm32_adc_trig_info {
 
 /**
  * struct stm32_adc_calib - optional adc calibration data
- * @calfact_s: Calibration offset for single ended channels
- * @calfact_d: Calibration offset in differential
  * @lincalfact: Linearity calibration factor
- * @calibrated: Indicates calibration status
+ * @lincal_saved: Indicates that linear calibration factors are saved
  */
 struct stm32_adc_calib {
-	u32			calfact_s;
-	u32			calfact_d;
 	u32			lincalfact[STM32H7_LINCALFACT_NUM];
-	bool			calibrated;
+	bool			lincal_saved;
 };
 
 /**
@@ -165,8 +161,6 @@ struct stm32_adc_vrefint {
  * @extsel:		trigger selection register & bitfield
  * @res:		resolution selection register & bitfield
  * @difsel:		differential mode selection register & bitfield
- * @calfact_s:		single-ended calibration factors register & bitfield
- * @calfact_d:		differential calibration factors register & bitfield
  * @smpr:		smpr1 & smpr2 registers offset array
  * @smp_bits:		smpr1 & smpr2 index and bitfields
  * @or_vddcore:		option register & vddcore bitfield
@@ -186,8 +180,6 @@ struct stm32_adc_regspec {
 	const struct stm32_adc_regs extsel;
 	const struct stm32_adc_regs res;
 	const struct stm32_adc_regs difsel;
-	const struct stm32_adc_regs calfact_s;
-	const struct stm32_adc_regs calfact_d;
 	const u32 smpr[2];
 	const struct stm32_adc_regs *smp_bits;
 	const struct stm32_adc_regs or_vddcore;
@@ -525,10 +517,6 @@ static const struct stm32_adc_regspec stm32h7_adc_regspec = {
 		    STM32H7_EXTSEL_SHIFT },
 	.res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT },
 	.difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK},
-	.calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK,
-		       STM32H7_CALFACT_S_SHIFT },
-	.calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK,
-		       STM32H7_CALFACT_D_SHIFT },
 	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
 	.smp_bits = stm32h7_smp_bits,
 };
@@ -550,10 +538,6 @@ static const struct stm32_adc_regspec stm32mp13_adc_regspec = {
 		    STM32H7_EXTSEL_SHIFT },
 	.res = { STM32H7_ADC_CFGR, STM32MP13_RES_MASK, STM32MP13_RES_SHIFT },
 	.difsel = { STM32MP13_ADC_DIFSEL, STM32MP13_DIFSEL_MASK},
-	.calfact_s = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_S_MASK,
-		       STM32MP13_CALFACT_S_SHIFT },
-	.calfact_d = { STM32MP13_ADC_CALFACT, STM32MP13_CALFACT_D_MASK,
-		       STM32MP13_CALFACT_D_SHIFT },
 	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
 	.smp_bits = stm32h7_smp_bits,
 	.or_vddcore = { STM32MP13_ADC2_OR, STM32MP13_OP0 },
@@ -575,10 +559,6 @@ static const struct stm32_adc_regspec stm32mp1_adc_regspec = {
 		    STM32H7_EXTSEL_SHIFT },
 	.res = { STM32H7_ADC_CFGR, STM32H7_RES_MASK, STM32H7_RES_SHIFT },
 	.difsel = { STM32H7_ADC_DIFSEL, STM32H7_DIFSEL_MASK},
-	.calfact_s = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_S_MASK,
-		       STM32H7_CALFACT_S_SHIFT },
-	.calfact_d = { STM32H7_ADC_CALFACT, STM32H7_CALFACT_D_MASK,
-		       STM32H7_CALFACT_D_SHIFT },
 	.smpr = { STM32H7_ADC_SMPR1, STM32H7_ADC_SMPR2 },
 	.smp_bits = stm32h7_smp_bits,
 	.or_vddcore = { STM32MP1_ADC2_OR, STM32MP1_VDDCOREEN },
@@ -1000,9 +980,6 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev)
 	int i, ret;
 	u32 lincalrdyw_mask, val;
 
-	if (!adc->cfg->has_linearcal)
-		goto skip_linearcal;
-
 	/* Read linearity calibration */
 	lincalrdyw_mask = STM32H7_LINCALRDYW6;
 	for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) {
@@ -1024,15 +1001,7 @@ static int stm32h7_adc_read_selfcalib(struct iio_dev *indio_dev)
 
 		lincalrdyw_mask >>= 1;
 	}
-
-skip_linearcal:
-	/* Read offset calibration */
-	val = stm32_adc_readl(adc, adc->cfg->regs->calfact_s.reg);
-	adc->cal.calfact_s = (val & adc->cfg->regs->calfact_s.mask);
-	adc->cal.calfact_s >>= adc->cfg->regs->calfact_s.shift;
-	adc->cal.calfact_d = (val & adc->cfg->regs->calfact_d.mask);
-	adc->cal.calfact_d >>= adc->cfg->regs->calfact_d.shift;
-	adc->cal.calibrated = true;
+	adc->cal.lincal_saved = true;
 
 	return 0;
 }
@@ -1048,13 +1017,6 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev)
 	int i, ret;
 	u32 lincalrdyw_mask, val;
 
-	val = (adc->cal.calfact_s << adc->cfg->regs->calfact_s.shift) |
-		(adc->cal.calfact_d << adc->cfg->regs->calfact_d.shift);
-	stm32_adc_writel(adc, adc->cfg->regs->calfact_s.reg, val);
-
-	if (!adc->cfg->has_linearcal)
-		return 0;
-
 	lincalrdyw_mask = STM32H7_LINCALRDYW6;
 	for (i = STM32H7_LINCALFACT_NUM - 1; i >= 0; i--) {
 		/*
@@ -1116,19 +1078,20 @@ static int stm32h7_adc_restore_selfcalib(struct iio_dev *indio_dev)
 /**
  * stm32h7_adc_selfcalib() - Procedure to calibrate ADC
  * @indio_dev: IIO device instance
+ * @do_lincal: linear calibration request flag
  * Note: Must be called once ADC is out of power down.
+ *
+ * Run offset calibration unconditionally.
+ * Run linear calibration if requested & supported.
  */
-static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev)
+static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal)
 {
 	struct stm32_adc *adc = iio_priv(indio_dev);
 	int ret;
 	u32 msk = STM32H7_ADCALDIF;
 	u32 val;
 
-	if (adc->cal.calibrated)
-		return true;
-
-	if (adc->cfg->has_linearcal)
+	if (adc->cfg->has_linearcal && do_lincal)
 		msk |= STM32H7_ADCALLIN;
 	/* ADC must be disabled for calibration */
 	stm32h7_adc_disable(indio_dev);
@@ -1172,6 +1135,33 @@ out:
 	return ret;
 }
 
+/**
+ * stm32h7_adc_check_selfcalib() - Check linear calibration status
+ * @indio_dev: IIO device instance
+ *
+ * Used to check if linear calibration has been done.
+ * Return true if linear calibration factors are already saved in private data
+ * or if a linear calibration has been done at boot stage.
+ */
+static int stm32h7_adc_check_selfcalib(struct iio_dev *indio_dev)
+{
+	struct stm32_adc *adc = iio_priv(indio_dev);
+	u32 val;
+
+	if (adc->cal.lincal_saved)
+		return true;
+
+	/*
+	 * Check if linear calibration factors are available in ADC registers,
+	 * by checking that all LINCALRDYWx bits are set.
+	 */
+	val = stm32_adc_readl(adc, STM32H7_ADC_CR) & STM32H7_LINCALRDYW_MASK;
+	if (val == STM32H7_LINCALRDYW_MASK)
+		return true;
+
+	return false;
+}
+
 /**
  * stm32h7_adc_prepare() - Leave power down mode to enable ADC.
  * @indio_dev: IIO device instance
@@ -1186,16 +1176,20 @@ out:
 static int stm32h7_adc_prepare(struct iio_dev *indio_dev)
 {
 	struct stm32_adc *adc = iio_priv(indio_dev);
-	int calib, ret;
+	int lincal_done = false;
+	int ret;
 
 	ret = stm32h7_adc_exit_pwr_down(indio_dev);
 	if (ret)
 		return ret;
 
-	ret = stm32h7_adc_selfcalib(indio_dev);
+	if (adc->cfg->has_linearcal)
+		lincal_done = stm32h7_adc_check_selfcalib(indio_dev);
+
+	/* Always run offset calibration. Run linear calibration only once */
+	ret = stm32h7_adc_selfcalib(indio_dev, !lincal_done);
 	if (ret < 0)
 		goto pwr_dwn;
-	calib = ret;
 
 	stm32_adc_int_ch_enable(indio_dev);
 
@@ -1205,13 +1199,15 @@ static int stm32h7_adc_prepare(struct iio_dev *indio_dev)
 	if (ret)
 		goto ch_disable;
 
-	/* Either restore or read calibration result for future reference */
-	if (calib)
-		ret = stm32h7_adc_restore_selfcalib(indio_dev);
-	else
-		ret = stm32h7_adc_read_selfcalib(indio_dev);
-	if (ret)
-		goto disable;
+	if (adc->cfg->has_linearcal) {
+		if (!adc->cal.lincal_saved)
+			ret = stm32h7_adc_read_selfcalib(indio_dev);
+		else
+			ret = stm32h7_adc_restore_selfcalib(indio_dev);
+
+		if (ret)
+			goto disable;
+	}
 
 	if (adc->cfg->has_presel)
 		stm32_adc_writel(adc, STM32H7_ADC_PCSEL, adc->pcsel);
-- 
GitLab


From 2206732b9ad1be8c59f714f2912539bf95cf2961 Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 15 Nov 2022 11:31:22 +0100
Subject: [PATCH 2544/4122] iio: adc: stm32-adc: improve calibration error log

Add more information in calibration error log to differentiate
single-ended and differential calibration.

Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20221115103124.70074-3-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 2b2b55eb130ed..65dd455375050 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -1109,7 +1109,7 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal)
 					   !(val & STM32H7_ADCAL), 100,
 					   STM32H7_ADC_CALIB_TIMEOUT_US);
 	if (ret) {
-		dev_err(&indio_dev->dev, "calibration failed\n");
+		dev_err(&indio_dev->dev, "calibration (single-ended) error %d\n", ret);
 		goto out;
 	}
 
@@ -1125,7 +1125,8 @@ static int stm32h7_adc_selfcalib(struct iio_dev *indio_dev, int do_lincal)
 					   !(val & STM32H7_ADCAL), 100,
 					   STM32H7_ADC_CALIB_TIMEOUT_US);
 	if (ret) {
-		dev_err(&indio_dev->dev, "calibration failed\n");
+		dev_err(&indio_dev->dev, "calibration (diff%s) error %d\n",
+			(msk & STM32H7_ADCALLIN) ? "+linear" : "", ret);
 		goto out;
 	}
 
-- 
GitLab


From 51bcacc6fce8db7085c8652069b68123c6c1897c Mon Sep 17 00:00:00 2001
From: Olivier Moysan <olivier.moysan@foss.st.com>
Date: Tue, 15 Nov 2022 11:31:23 +0100
Subject: [PATCH 2545/4122] iio: adc: stm32-adc: add debugfs to read raw
 calibration result

Add debugfs to read linear ADC STM32 self calibration results.

Signed-off-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Signed-off-by: Olivier Moysan <olivier.moysan@foss.st.com>
Reviewed-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20221115103124.70074-4-olivier.moysan@foss.st.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/stm32-adc.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/drivers/iio/adc/stm32-adc.c b/drivers/iio/adc/stm32-adc.c
index 65dd455375050..45d4e79f8e55e 100644
--- a/drivers/iio/adc/stm32-adc.c
+++ b/drivers/iio/adc/stm32-adc.c
@@ -7,6 +7,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
@@ -1879,6 +1880,23 @@ static const struct iio_chan_spec_ext_info stm32_adc_ext_info[] = {
 	{},
 };
 
+static void stm32_adc_debugfs_init(struct iio_dev *indio_dev)
+{
+	struct stm32_adc *adc = iio_priv(indio_dev);
+	struct dentry *d = iio_get_debugfs_dentry(indio_dev);
+	struct stm32_adc_calib *cal = &adc->cal;
+	char buf[16];
+	unsigned int i;
+
+	if (!adc->cfg->has_linearcal)
+		return;
+
+	for (i = 0; i < STM32H7_LINCALFACT_NUM; i++) {
+		snprintf(buf, sizeof(buf), "lincalfact%d", i + 1);
+		debugfs_create_u32(buf, 0444, d, &cal->lincalfact[i]);
+	}
+}
+
 static int stm32_adc_fw_get_resolution(struct iio_dev *indio_dev)
 {
 	struct device *dev = &indio_dev->dev;
@@ -2465,6 +2483,9 @@ static int stm32_adc_probe(struct platform_device *pdev)
 	pm_runtime_mark_last_busy(dev);
 	pm_runtime_put_autosuspend(dev);
 
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		stm32_adc_debugfs_init(indio_dev);
+
 	return 0;
 
 err_hw_stop:
@@ -2493,6 +2514,7 @@ static int stm32_adc_remove(struct platform_device *pdev)
 	struct stm32_adc *adc = iio_priv(indio_dev);
 
 	pm_runtime_get_sync(&pdev->dev);
+	/* iio_device_unregister() also removes debugfs entries */
 	iio_device_unregister(indio_dev);
 	stm32_adc_hw_stop(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
-- 
GitLab


From e2af60f5900c6ade53477b494ffb54690eee11f5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Tue, 15 Nov 2022 14:23:23 +0100
Subject: [PATCH 2546/4122] iio: adc128s052: add proper .data members in
 adc128_of_match table

Prior to commit bd5d54e4d49d ("iio: adc128s052: add ACPI _HID
AANT1280"), the driver unconditionally used spi_get_device_id() to get
the index into the adc128_config array.

However, with that commit, OF-based boards now incorrectly treat all
supported sensors as if they are an adc128s052, because all the .data
members of the adc128_of_match table are implicitly 0. Our board,
which has an adc122s021, thus exposes 8 channels whereas it really
only has two.

Fixes: bd5d54e4d49d ("iio: adc128s052: add ACPI _HID AANT1280")
Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221115132324.1078169-1-linux@rasmusvillemoes.dk
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/adc/ti-adc128s052.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/iio/adc/ti-adc128s052.c b/drivers/iio/adc/ti-adc128s052.c
index 622fd384983c7..b3d5b9b7255bc 100644
--- a/drivers/iio/adc/ti-adc128s052.c
+++ b/drivers/iio/adc/ti-adc128s052.c
@@ -181,13 +181,13 @@ static int adc128_probe(struct spi_device *spi)
 }
 
 static const struct of_device_id adc128_of_match[] = {
-	{ .compatible = "ti,adc128s052", },
-	{ .compatible = "ti,adc122s021", },
-	{ .compatible = "ti,adc122s051", },
-	{ .compatible = "ti,adc122s101", },
-	{ .compatible = "ti,adc124s021", },
-	{ .compatible = "ti,adc124s051", },
-	{ .compatible = "ti,adc124s101", },
+	{ .compatible = "ti,adc128s052", .data = (void*)0L, },
+	{ .compatible = "ti,adc122s021", .data = (void*)1L, },
+	{ .compatible = "ti,adc122s051", .data = (void*)1L, },
+	{ .compatible = "ti,adc122s101", .data = (void*)1L, },
+	{ .compatible = "ti,adc124s021", .data = (void*)2L, },
+	{ .compatible = "ti,adc124s051", .data = (void*)2L, },
+	{ .compatible = "ti,adc124s101", .data = (void*)2L, },
 	{ /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, adc128_of_match);
-- 
GitLab


From 32abe97b48fb1532c752495dab3f8255db8f5c2a Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 15 Nov 2022 12:41:28 +0000
Subject: [PATCH 2547/4122] dt-bindings: iio: adc: renesas,rzg2l-adc: Document
 RZ/Five SoC

The ADC block on the RZ/Five SoC is identical to one found on the RZ/G2UL
SoC. "renesas,r9a07g043-adc" compatible string will be used on the RZ/Five
SoC so to make this clear, update the comment to include RZ/Five SoC.

No driver changes are required as generic compatible string
"renesas,rzg2l-adc" will be used as a fallback on RZ/Five SoC.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20221115124128.1183144-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml          | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml b/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
index 61c6157cf5a96..8b743742a5f9f 100644
--- a/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
@@ -19,7 +19,7 @@ properties:
   compatible:
     items:
       - enum:
-          - renesas,r9a07g043-adc   # RZ/G2UL
+          - renesas,r9a07g043-adc   # RZ/G2UL and RZ/Five
           - renesas,r9a07g044-adc   # RZ/G2L
           - renesas,r9a07g054-adc   # RZ/V2L
       - const: renesas,rzg2l-adc
-- 
GitLab


From a0666f7f0f9470bfe64ab60324bc0a7830ce9035 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Tue, 15 Nov 2022 17:16:53 +0200
Subject: [PATCH 2548/4122] dt-bindings: iio: temperature: ltc2983: drop $ref
 for -nanoamp properties

Currently there are -nanoamp properties both with and without a $ref.
dt-schema has been modified to handle it as a standard unit, but the
change has been reverted since there were still occurrences of
-nanoamp properties with a $ref.
Remove this since it's the only occurrence left.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221115151653.393559-1-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../devicetree/bindings/iio/temperature/adi,ltc2983.yaml         | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
index 44f8b0672f533..b69813f281dad 100644
--- a/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
+++ b/Documentation/devicetree/bindings/iio/temperature/adi,ltc2983.yaml
@@ -325,7 +325,6 @@ patternProperties:
         description:
           Excitation current applied to the thermistor.
           0 sets the sensor in auto-range mode.
-        $ref: /schemas/types.yaml#/definitions/uint32
         enum: [0, 250, 500, 1000, 5000, 10000, 25000, 50000, 100000, 250000,
                500000, 1000000]
         default: 0
-- 
GitLab


From e0736b1bb71b21faeebf85efc725d01c7ecf2ebe Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Tue, 15 Nov 2022 17:19:55 +0200
Subject: [PATCH 2549/4122] dt-bindings: iio: adc: ad4130: use
 spi-peripheral-props.yaml

Reference the "spi-peripheral-props.yaml" schema to allow using
all SPI device properties.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Acked-by: Rob Herring <robh@kernel.org>
Link: https://lore.kernel.org/r/20221115151955.394030-1-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
index 28ebd38b9db44..d00690a8d3fbd 100644
--- a/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adi,ad4130.yaml
@@ -190,7 +190,10 @@ patternProperties:
       - reg
       - diff-channels
 
-additionalProperties: false
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
 
 examples:
   - |
-- 
GitLab


From 1407438a7ad513a9dd5c70bc996200f97960584c Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Tue, 15 Nov 2022 13:00:40 +0200
Subject: [PATCH 2550/4122] dt-bindings: iio: frequency: add adf4377 doc

Add device tree bindings for the ADF4377 driver.

Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221115110041.71495-1-antoniu.miclaus@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 .../bindings/iio/frequency/adi,adf4377.yaml   | 92 +++++++++++++++++++
 MAINTAINERS                                   |  8 ++
 2 files changed, 100 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml

diff --git a/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml b/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml
new file mode 100644
index 0000000000000..aa6a3193b4e03
--- /dev/null
+++ b/Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iio/frequency/adi,adf4377.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ADF4377 Microwave Wideband Synthesizer with Integrated VCO
+
+maintainers:
+  - Antoniu Miclaus <antoniu.miclaus@analog.com>
+  - Dragos Bogdan <dragos.bogdan@analog.com>
+
+description: |
+   The ADF4377 is a high performance, ultralow jitter, dual output integer-N
+   phased locked loop (PLL) with integrated voltage controlled oscillator (VCO)
+   ideally suited for data converter and mixed signal front end (MxFE) clock
+   applications.
+
+   https://www.analog.com/en/products/adf4377.html
+
+properties:
+  compatible:
+    enum:
+      - adi,adf4377
+      - adi,adf4378
+
+  reg:
+    maxItems: 1
+
+  spi-max-frequency:
+    maximum: 10000000
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    description:
+      External clock that provides reference input frequency.
+    items:
+      - const: ref_in
+
+  chip-enable-gpios:
+    description:
+      GPIO that controls the Chip Enable Pin.
+    maxItems: 1
+
+  clk1-enable-gpios:
+    description:
+      GPIO that controls the Enable Clock 1 Output Buffer Pin.
+    maxItems: 1
+
+  clk2-enable-gpios:
+    description:
+      GPIO that controls the Enable Clock 2 Output Buffer Pin.
+    maxItems: 1
+
+  adi,muxout-select:
+    description:
+      On chip multiplexer output selection.
+      high_z - MUXOUT Pin set to high-Z.
+      lock_detect - MUXOUT Pin set to lock detector output.
+      muxout_low - MUXOUT Pin set to low.
+      f_div_rclk_2 - MUXOUT Pin set to fDIV_RCLK/2.
+      f_div_nclk_2 - MUXOUT Pin set to fDIV_NCLK/2.
+      muxout_high - MUXOUT Pin set to high.
+    enum: [high_z, lock_detect, muxout_low, f_div_rclk_2, f_div_nclk_2, muxout_high]
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+
+allOf:
+  - $ref: /schemas/spi/spi-peripheral-props.yaml#
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        frequency@0 {
+            compatible = "adi,adf4377";
+            reg = <0>;
+            spi-max-frequency = <10000000>;
+            clocks = <&adf4377_ref_in>;
+            clock-names = "ref_in";
+        };
+    };
+...
diff --git a/MAINTAINERS b/MAINTAINERS
index 860075c493dd2..82e23ae055e11 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1206,6 +1206,14 @@ W:	https://ez.analog.com/linux-software-drivers
 F:	Documentation/devicetree/bindings/iio/amplifiers/adi,ada4250.yaml
 F:	drivers/iio/amplifiers/ada4250.c
 
+ANALOG DEVICES INC ADF4377 DRIVER
+M:	Antoniu Miclaus <antoniu.miclaus@analog.com>
+L:	linux-iio@vger.kernel.org
+S:	Supported
+W:	https://ez.analog.com/linux-software-drivers
+F:	Documentation/devicetree/bindings/iio/frequency/adi,adf4377.yaml
+F:	drivers/iio/frequency/adf4377.c
+
 ANALOG DEVICES INC ADGS1408 DRIVER
 M:	Mircea Caprioru <mircea.caprioru@analog.com>
 S:	Supported
-- 
GitLab


From eda549e2e52496d0d374ce457f514a4f14172aa5 Mon Sep 17 00:00:00 2001
From: Antoniu Miclaus <antoniu.miclaus@analog.com>
Date: Tue, 15 Nov 2022 13:00:41 +0200
Subject: [PATCH 2551/4122] iio: frequency: adf4377: add support for ADF4377
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The ADF4377 is a high performance, ultralow jitter, dual output integer-N
phased locked loop (PLL) with integrated voltage controlled oscillator
(VCO) ideally suited for data converter and mixed signal front end (MxFE)
clock applications.

Datasheet: https://www.analog.com/media/en/technical-documentation/data-sheets/adf4377.pdf
Signed-off-by: Antoniu Miclaus <antoniu.miclaus@analog.com>
Reviewed-by: Nuno Sá <nuno.sa@analog.com>
Link: https://lore.kernel.org/r/20221115110041.71495-2-antoniu.miclaus@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/frequency/Kconfig   |  10 +
 drivers/iio/frequency/Makefile  |   1 +
 drivers/iio/frequency/adf4377.c | 994 ++++++++++++++++++++++++++++++++
 3 files changed, 1005 insertions(+)
 create mode 100644 drivers/iio/frequency/adf4377.c

diff --git a/drivers/iio/frequency/Kconfig b/drivers/iio/frequency/Kconfig
index f3702f36436cb..9e85dfa585081 100644
--- a/drivers/iio/frequency/Kconfig
+++ b/drivers/iio/frequency/Kconfig
@@ -50,6 +50,16 @@ config ADF4371
 	  To compile this driver as a module, choose M here: the
 	  module will be called adf4371.
 
+config ADF4377
+	tristate "Analog Devices ADF4377 Microwave Wideband Synthesizer"
+	depends on SPI && COMMON_CLK
+	help
+	  Say yes here to build support for Analog Devices ADF4377 Microwave
+	  Wideband Synthesizer.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called adf4377.
+
 config ADMV1013
 	tristate "Analog Devices ADMV1013 Microwave Upconverter"
 	depends on SPI && COMMON_CLK
diff --git a/drivers/iio/frequency/Makefile b/drivers/iio/frequency/Makefile
index 48add732f1d36..b616c29b4a087 100644
--- a/drivers/iio/frequency/Makefile
+++ b/drivers/iio/frequency/Makefile
@@ -7,6 +7,7 @@
 obj-$(CONFIG_AD9523) += ad9523.o
 obj-$(CONFIG_ADF4350) += adf4350.o
 obj-$(CONFIG_ADF4371) += adf4371.o
+obj-$(CONFIG_ADF4377) += adf4377.o
 obj-$(CONFIG_ADMV1013) += admv1013.o
 obj-$(CONFIG_ADMV1014) += admv1014.o
 obj-$(CONFIG_ADMV4420) += admv4420.o
diff --git a/drivers/iio/frequency/adf4377.c b/drivers/iio/frequency/adf4377.c
new file mode 100644
index 0000000000000..26abecbd51e06
--- /dev/null
+++ b/drivers/iio/frequency/adf4377.c
@@ -0,0 +1,994 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ADF4377 driver
+ *
+ * Copyright 2022 Analog Devices Inc.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/clkdev.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/notifier.h>
+#include <linux/property.h>
+#include <linux/spi/spi.h>
+#include <linux/iio/iio.h>
+#include <linux/regmap.h>
+#include <linux/units.h>
+
+#include <asm/unaligned.h>
+
+/* ADF4377 REG0000 Map */
+#define ADF4377_0000_SOFT_RESET_R_MSK		BIT(7)
+#define ADF4377_0000_LSB_FIRST_R_MSK		BIT(6)
+#define ADF4377_0000_ADDRESS_ASC_R_MSK		BIT(5)
+#define ADF4377_0000_SDO_ACTIVE_R_MSK		BIT(4)
+#define ADF4377_0000_SDO_ACTIVE_MSK		BIT(3)
+#define ADF4377_0000_ADDRESS_ASC_MSK		BIT(2)
+#define ADF4377_0000_LSB_FIRST_MSK		BIT(1)
+#define ADF4377_0000_SOFT_RESET_MSK		BIT(0)
+
+/* ADF4377 REG0000 Bit Definition */
+#define ADF4377_0000_SDO_ACTIVE_SPI_3W		0x0
+#define ADF4377_0000_SDO_ACTIVE_SPI_4W		0x1
+
+#define ADF4377_0000_ADDR_ASC_AUTO_DECR		0x0
+#define ADF4377_0000_ADDR_ASC_AUTO_INCR		0x1
+
+#define ADF4377_0000_LSB_FIRST_MSB		0x0
+#define ADF4377_0000_LSB_FIRST_LSB		0x1
+
+#define ADF4377_0000_SOFT_RESET_N_OP		0x0
+#define ADF4377_0000_SOFT_RESET_EN		0x1
+
+/* ADF4377 REG0001 Map */
+#define ADF4377_0001_SINGLE_INSTR_MSK		BIT(7)
+#define ADF4377_0001_MASTER_RB_CTRL_MSK		BIT(5)
+
+/* ADF4377 REG0003 Bit Definition */
+#define ADF4377_0003_CHIP_TYPE			0x06
+
+/* ADF4377 REG0004 Bit Definition */
+#define ADF4377_0004_PRODUCT_ID_LSB		0x0005
+
+/* ADF4377 REG0005 Bit Definition */
+#define ADF4377_0005_PRODUCT_ID_MSB		0x0005
+
+/* ADF4377 REG000A Map */
+#define ADF4377_000A_SCRATCHPAD_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG000C Bit Definition */
+#define ADF4377_000C_VENDOR_ID_LSB		0x56
+
+/* ADF4377 REG000D Bit Definition */
+#define ADF4377_000D_VENDOR_ID_MSB		0x04
+
+/* ADF4377 REG000F Bit Definition */
+#define ADF4377_000F_R00F_RSV1_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0010 Map*/
+#define ADF4377_0010_N_INT_LSB_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0011 Map*/
+#define ADF4377_0011_EN_AUTOCAL_MSK		BIT(7)
+#define ADF4377_0011_EN_RDBLR_MSK		BIT(6)
+#define ADF4377_0011_DCLK_DIV2_MSK		GENMASK(5, 4)
+#define ADF4377_0011_N_INT_MSB_MSK		GENMASK(3, 0)
+
+/* ADF4377 REG0011 Bit Definition */
+#define ADF4377_0011_DCLK_DIV2_1		0x0
+#define ADF4377_0011_DCLK_DIV2_2		0x1
+#define ADF4377_0011_DCLK_DIV2_4		0x2
+#define ADF4377_0011_DCLK_DIV2_8		0x3
+
+/* ADF4377 REG0012 Map*/
+#define ADF4377_0012_CLKOUT_DIV_MSK		GENMASK(7, 6)
+#define ADF4377_0012_R_DIV_MSK			GENMASK(5, 0)
+
+/* ADF4377 REG0012 Bit Definition */
+#define ADF4377_0012_CLKOUT_DIV_1		0x0
+#define ADF4377_0012_CLKOUT_DIV_2		0x1
+#define ADF4377_0012_CLKOUT_DIV_4		0x2
+#define ADF4377_0012_CLKOUT_DIV_8		0x3
+
+/* ADF4377 REG0013 Map */
+#define ADF4377_0013_M_VCO_CORE_MSK		GENMASK(5, 4)
+#define ADF4377_0013_VCO_BIAS_MSK		GENMASK(3, 0)
+
+/* ADF4377 REG0013 Bit Definition */
+#define ADF4377_0013_M_VCO_0			0x0
+#define ADF4377_0013_M_VCO_1			0x1
+#define ADF4377_0013_M_VCO_2			0x2
+#define ADF4377_0013_M_VCO_3			0x3
+
+/* ADF4377 REG0014 Map */
+#define ADF4377_0014_M_VCO_BAND_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0015 Map */
+#define ADF4377_0015_BLEED_I_LSB_MSK		GENMASK(7, 6)
+#define ADF4377_0015_BLEED_POL_MSK		BIT(5)
+#define ADF4377_0015_EN_BLEED_MSK		BIT(4)
+#define ADF4377_0015_CP_I_MSK			GENMASK(3, 0)
+
+/* ADF4377 REG0015 Bit Definition */
+#define ADF4377_CURRENT_SINK			0x0
+#define ADF4377_CURRENT_SOURCE			0x1
+
+#define ADF4377_0015_CP_0MA7			0x0
+#define ADF4377_0015_CP_0MA9			0x1
+#define ADF4377_0015_CP_1MA1			0x2
+#define ADF4377_0015_CP_1MA3			0x3
+#define ADF4377_0015_CP_1MA4			0x4
+#define ADF4377_0015_CP_1MA8			0x5
+#define ADF4377_0015_CP_2MA2			0x6
+#define ADF4377_0015_CP_2MA5			0x7
+#define ADF4377_0015_CP_2MA9			0x8
+#define ADF4377_0015_CP_3MA6			0x9
+#define ADF4377_0015_CP_4MA3			0xA
+#define ADF4377_0015_CP_5MA0			0xB
+#define ADF4377_0015_CP_5MA7			0xC
+#define ADF4377_0015_CP_7MA2			0xD
+#define ADF4377_0015_CP_8MA6			0xE
+#define ADF4377_0015_CP_10MA1			0xF
+
+/* ADF4377 REG0016 Map */
+#define ADF4377_0016_BLEED_I_MSB_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0017 Map */
+#define ADF4377_0016_INV_CLKOUT_MSK		BIT(7)
+#define ADF4377_0016_N_DEL_MSK			GENMASK(6, 0)
+
+/* ADF4377 REG0018 Map */
+#define ADF4377_0018_CMOS_OV_MSK		BIT(7)
+#define ADF4377_0018_R_DEL_MSK			GENMASK(6, 0)
+
+/* ADF4377 REG0018 Bit Definition */
+#define ADF4377_0018_1V8_LOGIC			0x0
+#define ADF4377_0018_3V3_LOGIC			0x1
+
+/* ADF4377 REG0019 Map */
+#define ADF4377_0019_CLKOUT2_OP_MSK		GENMASK(7, 6)
+#define ADF4377_0019_CLKOUT1_OP_MSK		GENMASK(5, 4)
+#define ADF4377_0019_PD_CLK_MSK			BIT(3)
+#define ADF4377_0019_PD_RDET_MSK		BIT(2)
+#define ADF4377_0019_PD_ADC_MSK			BIT(1)
+#define ADF4377_0019_PD_CALADC_MSK		BIT(0)
+
+/* ADF4377 REG0019 Bit Definition */
+#define ADF4377_0019_CLKOUT_320MV		0x0
+#define ADF4377_0019_CLKOUT_420MV		0x1
+#define ADF4377_0019_CLKOUT_530MV		0x2
+#define ADF4377_0019_CLKOUT_640MV		0x3
+
+/* ADF4377 REG001A Map */
+#define ADF4377_001A_PD_ALL_MSK			BIT(7)
+#define ADF4377_001A_PD_RDIV_MSK		BIT(6)
+#define ADF4377_001A_PD_NDIV_MSK		BIT(5)
+#define ADF4377_001A_PD_VCO_MSK			BIT(4)
+#define ADF4377_001A_PD_LD_MSK			BIT(3)
+#define ADF4377_001A_PD_PFDCP_MSK		BIT(2)
+#define ADF4377_001A_PD_CLKOUT1_MSK		BIT(1)
+#define ADF4377_001A_PD_CLKOUT2_MSK		BIT(0)
+
+/* ADF4377 REG001B Map */
+#define ADF4377_001B_EN_LOL_MSK			BIT(7)
+#define ADF4377_001B_LDWIN_PW_MSK		BIT(6)
+#define ADF4377_001B_EN_LDWIN_MSK		BIT(5)
+#define ADF4377_001B_LD_COUNT_MSK		GENMASK(4, 0)
+
+/* ADF4377 REG001B Bit Definition */
+#define ADF4377_001B_LDWIN_PW_NARROW		0x0
+#define ADF4377_001B_LDWIN_PW_WIDE		0x1
+
+/* ADF4377 REG001C Map */
+#define ADF4377_001C_EN_DNCLK_MSK		BIT(7)
+#define ADF4377_001C_EN_DRCLK_MSK		BIT(6)
+#define ADF4377_001C_RST_LD_MSK			BIT(2)
+#define ADF4377_001C_R01C_RSV1_MSK		BIT(0)
+
+/* ADF4377 REG001C Bit Definition */
+#define ADF4377_001C_RST_LD_INACTIVE		0x0
+#define ADF4377_001C_RST_LD_ACTIVE		0x1
+
+#define ADF4377_001C_R01C_RSV1			0x1
+
+/* ADF4377 REG001D Map */
+#define ADF4377_001D_MUXOUT_MSK			GENMASK(7, 4)
+#define ADF4377_001D_EN_CPTEST_MSK		BIT(2)
+#define ADF4377_001D_CP_DOWN_MSK		BIT(1)
+#define ADF4377_001D_CP_UP_MSK			BIT(0)
+
+#define ADF4377_001D_EN_CPTEST_OFF		0x0
+#define ADF4377_001D_EN_CPTEST_ON		0x1
+
+#define ADF4377_001D_CP_DOWN_OFF		0x0
+#define ADF4377_001D_CP_DOWN_ON			0x1
+
+#define ADF4377_001D_CP_UP_OFF			0x0
+#define ADF4377_001D_CP_UP_ON			0x1
+
+/* ADF4377 REG001F Map */
+#define ADF4377_001F_BST_REF_MSK		BIT(7)
+#define ADF4377_001F_FILT_REF_MSK		BIT(6)
+#define ADF4377_001F_REF_SEL_MSK		BIT(5)
+#define ADF4377_001F_R01F_RSV1_MSK		GENMASK(4, 0)
+
+/* ADF4377 REG001F Bit Definition */
+#define ADF4377_001F_BST_LARGE_REF_IN		0x0
+#define ADF4377_001F_BST_SMALL_REF_IN		0x1
+
+#define ADF4377_001F_FILT_REF_OFF		0x0
+#define ADF4377_001F_FILT_REF_ON		0x1
+
+#define ADF4377_001F_REF_SEL_DMA		0x0
+#define ADF4377_001F_REF_SEL_LNA		0x1
+
+#define ADF4377_001F_R01F_RSV1			0x7
+
+/* ADF4377 REG0020 Map */
+#define ADF4377_0020_RST_SYS_MSK		BIT(4)
+#define ADF4377_0020_EN_ADC_CLK_MSK		BIT(3)
+#define ADF4377_0020_R020_RSV1_MSK		BIT(0)
+
+/* ADF4377 REG0021 Bit Definition */
+#define ADF4377_0021_R021_RSV1			0xD3
+
+/* ADF4377 REG0022 Bit Definition */
+#define ADF4377_0022_R022_RSV1			0x32
+
+/* ADF4377 REG0023 Map */
+#define ADF4377_0023_CAT_CT_SEL			BIT(7)
+#define ADF4377_0023_R023_RSV1_MSK		GENMASK(6, 0)
+
+/* ADF4377 REG0023 Bit Definition */
+#define ADF4377_0023_R023_RSV1			0x18
+
+/* ADF4377 REG0024 Map */
+#define ADF4377_0024_DCLK_MODE_MSK		BIT(2)
+
+/* ADF4377 REG0025 Map */
+#define ADF4377_0025_CLKODIV_DB_MSK		BIT(7)
+#define ADF4377_0025_DCLK_DB_MSK		BIT(6)
+#define ADF4377_0025_R025_RSV1_MSK		GENMASK(5, 0)
+
+/* ADF4377 REG0025 Bit Definition */
+#define ADF4377_0025_R025_RSV1			0x16
+
+/* ADF4377 REG0026 Map */
+#define ADF4377_0026_VCO_BAND_DIV_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0027 Map */
+#define ADF4377_0027_SYNTH_LOCK_TO_LSB_MSK	GENMASK(7, 0)
+
+/* ADF4377 REG0028 Map */
+#define ADF4377_0028_O_VCO_DB_MSK		BIT(7)
+#define ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK	GENMASK(6, 0)
+
+/* ADF4377 REG0029 Map */
+#define ADF4377_0029_VCO_ALC_TO_LSB_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG002A Map */
+#define ADF4377_002A_DEL_CTRL_DB_MSK		BIT(7)
+#define ADF4377_002A_VCO_ALC_TO_MSB_MSK		GENMASK(6, 0)
+
+/* ADF4377 REG002C Map */
+#define ADF4377_002C_R02C_RSV1			0xC0
+
+/* ADF4377 REG002D Map */
+#define ADF4377_002D_ADC_CLK_DIV_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG002E Map */
+#define ADF4377_002E_EN_ADC_CNV_MSK		BIT(7)
+#define ADF4377_002E_EN_ADC_MSK			BIT(1)
+#define ADF4377_002E_ADC_A_CONV_MSK		BIT(0)
+
+/* ADF4377 REG002E Bit Definition */
+#define ADF4377_002E_ADC_A_CONV_ADC_ST_CNV	0x0
+#define ADF4377_002E_ADC_A_CONV_VCO_CALIB	0x1
+
+/* ADF4377 REG002F Map */
+#define ADF4377_002F_DCLK_DIV1_MSK		GENMASK(1, 0)
+
+/* ADF4377 REG002F Bit Definition */
+#define ADF4377_002F_DCLK_DIV1_1		0x0
+#define ADF4377_002F_DCLK_DIV1_2		0x1
+#define ADF4377_002F_DCLK_DIV1_8		0x2
+#define ADF4377_002F_DCLK_DIV1_32		0x3
+
+/* ADF4377 REG0031 Bit Definition */
+#define ADF4377_0031_R031_RSV1			0x09
+
+/* ADF4377 REG0032 Map */
+#define ADF4377_0032_ADC_CLK_SEL_MSK		BIT(6)
+#define ADF4377_0032_R032_RSV1_MSK		GENMASK(5, 0)
+
+/* ADF4377 REG0032 Bit Definition */
+#define ADF4377_0032_ADC_CLK_SEL_N_OP		0x0
+#define ADF4377_0032_ADC_CLK_SEL_SPI_CLK	0x1
+
+#define ADF4377_0032_R032_RSV1			0x9
+
+/* ADF4377 REG0033 Bit Definition */
+#define ADF4377_0033_R033_RSV1			0x18
+
+/* ADF4377 REG0034 Bit Definition */
+#define ADF4377_0034_R034_RSV1			0x08
+
+/* ADF4377 REG003A Bit Definition */
+#define ADF4377_003A_R03A_RSV1			0x5D
+
+/* ADF4377 REG003B Bit Definition */
+#define ADF4377_003B_R03B_RSV1			0x2B
+
+/* ADF4377 REG003D Map */
+#define ADF4377_003D_O_VCO_BAND_MSK		BIT(3)
+#define ADF4377_003D_O_VCO_CORE_MSK		BIT(2)
+#define ADF4377_003D_O_VCO_BIAS_MSK		BIT(1)
+
+/* ADF4377 REG003D Bit Definition */
+#define ADF4377_003D_O_VCO_BAND_VCO_CALIB	0x0
+#define ADF4377_003D_O_VCO_BAND_M_VCO		0x1
+
+#define ADF4377_003D_O_VCO_CORE_VCO_CALIB	0x0
+#define ADF4377_003D_O_VCO_CORE_M_VCO		0x1
+
+#define ADF4377_003D_O_VCO_BIAS_VCO_CALIB	0x0
+#define ADF4377_003D_O_VCO_BIAS_M_VCO		0x1
+
+/* ADF4377 REG0042 Map */
+#define ADF4377_0042_R042_RSV1			0x05
+
+/* ADF4377 REG0045 Map */
+#define ADF4377_0045_ADC_ST_CNV_MSK		BIT(0)
+
+/* ADF4377 REG0049 Map */
+#define ADF4377_0049_EN_CLK2_MSK		BIT(7)
+#define ADF4377_0049_EN_CLK1_MSK		BIT(6)
+#define ADF4377_0049_REF_OK_MSK			BIT(3)
+#define ADF4377_0049_ADC_BUSY_MSK		BIT(2)
+#define ADF4377_0049_FSM_BUSY_MSK		BIT(1)
+#define ADF4377_0049_LOCKED_MSK			BIT(0)
+
+/* ADF4377 REG004B Map */
+#define ADF4377_004B_VCO_CORE_MSK		GENMASK(1, 0)
+
+/* ADF4377 REG004C Map */
+#define ADF4377_004C_CHIP_TEMP_LSB_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG004D Map */
+#define ADF4377_004D_CHIP_TEMP_MSB_MSK		BIT(0)
+
+/* ADF4377 REG004F Map */
+#define ADF4377_004F_VCO_BAND_MSK		GENMASK(7, 0)
+
+/* ADF4377 REG0051 Map */
+#define ADF4377_0051_VCO_BIAS_MSK		GENMASK(3, 0)
+
+/* ADF4377 REG0054 Map */
+#define ADF4377_0054_CHIP_VERSION_MSK		GENMASK(7, 0)
+
+/* Specifications */
+#define ADF4377_SPI_READ_CMD			BIT(7)
+#define ADF4377_MAX_VCO_FREQ			(12800ULL * HZ_PER_MHZ)
+#define ADF4377_MIN_VCO_FREQ			(6400ULL * HZ_PER_MHZ)
+#define ADF4377_MAX_REFIN_FREQ			(1000 * HZ_PER_MHZ)
+#define ADF4377_MIN_REFIN_FREQ			(10 * HZ_PER_MHZ)
+#define ADF4377_MAX_FREQ_PFD			(500 * HZ_PER_MHZ)
+#define ADF4377_MIN_FREQ_PFD			(3 * HZ_PER_MHZ)
+#define ADF4377_MAX_CLKPN_FREQ			ADF4377_MAX_VCO_FREQ
+#define ADF4377_MIN_CLKPN_FREQ			(ADF4377_MIN_VCO_FREQ / 8)
+#define ADF4377_FREQ_PFD_80MHZ			(80 * HZ_PER_MHZ)
+#define ADF4377_FREQ_PFD_125MHZ			(125 * HZ_PER_MHZ)
+#define ADF4377_FREQ_PFD_160MHZ			(160 * HZ_PER_MHZ)
+#define ADF4377_FREQ_PFD_250MHZ			(250 * HZ_PER_MHZ)
+#define ADF4377_FREQ_PFD_320MHZ			(320 * HZ_PER_MHZ)
+
+enum {
+	ADF4377_FREQ,
+};
+
+enum muxout_select_mode {
+	ADF4377_MUXOUT_HIGH_Z = 0x0,
+	ADF4377_MUXOUT_LKDET = 0x1,
+	ADF4377_MUXOUT_LOW = 0x2,
+	ADF4377_MUXOUT_DIV_RCLK_2 = 0x4,
+	ADF4377_MUXOUT_DIV_NCLK_2 = 0x5,
+	ADF4377_MUXOUT_HIGH = 0x8,
+};
+
+struct adf4377_state {
+	struct spi_device	*spi;
+	struct regmap		*regmap;
+	struct clk		*clkin;
+	/* Protect against concurrent accesses to the device and data content */
+	struct mutex		lock;
+	struct notifier_block	nb;
+	/* Reference Divider */
+	unsigned int		ref_div_factor;
+	/* PFD Frequency */
+	unsigned int		f_pfd;
+	/* Input Reference Clock */
+	unsigned int		clkin_freq;
+	/* CLKOUT Divider */
+	u8			clkout_div_sel;
+	/* Feedback Divider (N) */
+	u16			n_int;
+	u16			synth_lock_timeout;
+	u16			vco_alc_timeout;
+	u16			adc_clk_div;
+	u16			vco_band_div;
+	u8			dclk_div1;
+	u8			dclk_div2;
+	u8			dclk_mode;
+	unsigned int		f_div_rclk;
+	enum muxout_select_mode	muxout_select;
+	struct gpio_desc	*gpio_ce;
+	struct gpio_desc	*gpio_enclk1;
+	struct gpio_desc	*gpio_enclk2;
+	u8			buf[2] __aligned(IIO_DMA_MINALIGN);
+};
+
+static const char * const adf4377_muxout_modes[] = {
+	[ADF4377_MUXOUT_HIGH_Z] = "high_z",
+	[ADF4377_MUXOUT_LKDET] = "lock_detect",
+	[ADF4377_MUXOUT_LOW] = "muxout_low",
+	[ADF4377_MUXOUT_DIV_RCLK_2] = "f_div_rclk_2",
+	[ADF4377_MUXOUT_DIV_NCLK_2] = "f_div_nclk_2",
+	[ADF4377_MUXOUT_HIGH] = "muxout_high",
+};
+
+static const struct reg_sequence adf4377_reg_defaults[] = {
+	{ 0x42,  ADF4377_0042_R042_RSV1 },
+	{ 0x3B,  ADF4377_003B_R03B_RSV1 },
+	{ 0x3A,  ADF4377_003A_R03A_RSV1 },
+	{ 0x34,  ADF4377_0034_R034_RSV1 },
+	{ 0x33,  ADF4377_0033_R033_RSV1 },
+	{ 0x32,  ADF4377_0032_R032_RSV1 },
+	{ 0x31,  ADF4377_0031_R031_RSV1 },
+	{ 0x2C,  ADF4377_002C_R02C_RSV1 },
+	{ 0x25,  ADF4377_0025_R025_RSV1 },
+	{ 0x23,  ADF4377_0023_R023_RSV1 },
+	{ 0x22,  ADF4377_0022_R022_RSV1 },
+	{ 0x21,  ADF4377_0021_R021_RSV1 },
+	{ 0x1f,  ADF4377_001F_R01F_RSV1 },
+	{ 0x1c,  ADF4377_001C_R01C_RSV1 },
+};
+
+static const struct regmap_config adf4377_regmap_config = {
+	.reg_bits = 16,
+	.val_bits = 8,
+	.read_flag_mask = BIT(7),
+	.max_register = 0x54,
+};
+
+static int adf4377_reg_access(struct iio_dev *indio_dev,
+			      unsigned int reg,
+			      unsigned int write_val,
+			      unsigned int *read_val)
+{
+	struct adf4377_state *st = iio_priv(indio_dev);
+
+	if (read_val)
+		return regmap_read(st->regmap, reg, read_val);
+
+	return regmap_write(st->regmap, reg, write_val);
+}
+
+static const struct iio_info adf4377_info = {
+	.debugfs_reg_access = &adf4377_reg_access,
+};
+
+static int adf4377_soft_reset(struct adf4377_state *st)
+{
+	unsigned int read_val;
+	int ret;
+
+	ret = regmap_update_bits(st->regmap, 0x0, ADF4377_0000_SOFT_RESET_MSK |
+				 ADF4377_0000_SOFT_RESET_R_MSK,
+				 FIELD_PREP(ADF4377_0000_SOFT_RESET_MSK, 1) |
+				 FIELD_PREP(ADF4377_0000_SOFT_RESET_R_MSK, 1));
+	if (ret)
+		return ret;
+
+	return regmap_read_poll_timeout(st->regmap, 0x0, read_val,
+					!(read_val & (ADF4377_0000_SOFT_RESET_R_MSK |
+					ADF4377_0000_SOFT_RESET_R_MSK)), 200, 200 * 100);
+}
+
+static int adf4377_get_freq(struct adf4377_state *st, u64 *freq)
+{
+	unsigned int ref_div_factor, n_int;
+	u64 clkin_freq;
+	int ret;
+
+	mutex_lock(&st->lock);
+	ret = regmap_read(st->regmap, 0x12, &ref_div_factor);
+	if (ret)
+		goto exit;
+
+	ret = regmap_bulk_read(st->regmap, 0x10, st->buf, sizeof(st->buf));
+	if (ret)
+		goto exit;
+
+	clkin_freq = clk_get_rate(st->clkin);
+	ref_div_factor = FIELD_GET(ADF4377_0012_R_DIV_MSK, ref_div_factor);
+	n_int = FIELD_GET(ADF4377_0010_N_INT_LSB_MSK | ADF4377_0011_N_INT_MSB_MSK,
+			  get_unaligned_le16(&st->buf));
+
+	*freq = div_u64(clkin_freq, ref_div_factor) * n_int;
+exit:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static int adf4377_set_freq(struct adf4377_state *st, u64 freq)
+{
+	unsigned int read_val;
+	u64 f_vco;
+	int ret;
+
+	mutex_lock(&st->lock);
+
+	if (freq > ADF4377_MAX_CLKPN_FREQ || freq < ADF4377_MIN_CLKPN_FREQ) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	ret = regmap_update_bits(st->regmap, 0x1C, ADF4377_001C_EN_DNCLK_MSK |
+				 ADF4377_001C_EN_DRCLK_MSK,
+				 FIELD_PREP(ADF4377_001C_EN_DNCLK_MSK, 1) |
+				 FIELD_PREP(ADF4377_001C_EN_DRCLK_MSK, 1));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x11, ADF4377_0011_EN_AUTOCAL_MSK |
+				 ADF4377_0011_DCLK_DIV2_MSK,
+				 FIELD_PREP(ADF4377_0011_EN_AUTOCAL_MSK, 1) |
+				 FIELD_PREP(ADF4377_0011_DCLK_DIV2_MSK, st->dclk_div2));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x2E, ADF4377_002E_EN_ADC_CNV_MSK |
+				 ADF4377_002E_EN_ADC_MSK |
+				 ADF4377_002E_ADC_A_CONV_MSK,
+				 FIELD_PREP(ADF4377_002E_EN_ADC_CNV_MSK, 1) |
+				 FIELD_PREP(ADF4377_002E_EN_ADC_MSK, 1) |
+				 FIELD_PREP(ADF4377_002E_ADC_A_CONV_MSK,
+					    ADF4377_002E_ADC_A_CONV_VCO_CALIB));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x20, ADF4377_0020_EN_ADC_CLK_MSK,
+				 FIELD_PREP(ADF4377_0020_EN_ADC_CLK_MSK, 1));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x2F, ADF4377_002F_DCLK_DIV1_MSK,
+				 FIELD_PREP(ADF4377_002F_DCLK_DIV1_MSK, st->dclk_div1));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x24, ADF4377_0024_DCLK_MODE_MSK,
+				 FIELD_PREP(ADF4377_0024_DCLK_MODE_MSK, st->dclk_mode));
+	if (ret)
+		goto exit;
+
+	ret = regmap_write(st->regmap, 0x27,
+			   FIELD_PREP(ADF4377_0027_SYNTH_LOCK_TO_LSB_MSK,
+				      st->synth_lock_timeout));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x28, ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK,
+				 FIELD_PREP(ADF4377_0028_SYNTH_LOCK_TO_MSB_MSK,
+					    st->synth_lock_timeout >> 8));
+	if (ret)
+		goto exit;
+
+	ret = regmap_write(st->regmap, 0x29,
+			   FIELD_PREP(ADF4377_0029_VCO_ALC_TO_LSB_MSK,
+				      st->vco_alc_timeout));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x2A, ADF4377_002A_VCO_ALC_TO_MSB_MSK,
+				 FIELD_PREP(ADF4377_002A_VCO_ALC_TO_MSB_MSK,
+					    st->vco_alc_timeout >> 8));
+	if (ret)
+		goto exit;
+
+	ret = regmap_write(st->regmap, 0x26,
+			   FIELD_PREP(ADF4377_0026_VCO_BAND_DIV_MSK, st->vco_band_div));
+	if (ret)
+		goto exit;
+
+	ret = regmap_write(st->regmap, 0x2D,
+			   FIELD_PREP(ADF4377_002D_ADC_CLK_DIV_MSK, st->adc_clk_div));
+	if (ret)
+		goto exit;
+
+	st->clkout_div_sel = 0;
+
+	f_vco = freq;
+
+	while (f_vco < ADF4377_MIN_VCO_FREQ) {
+		f_vco <<= 1;
+		st->clkout_div_sel++;
+	}
+
+	st->n_int = div_u64(freq, st->f_pfd);
+
+	ret = regmap_update_bits(st->regmap, 0x11, ADF4377_0011_EN_RDBLR_MSK |
+				 ADF4377_0011_N_INT_MSB_MSK,
+				 FIELD_PREP(ADF4377_0011_EN_RDBLR_MSK, 0) |
+				 FIELD_PREP(ADF4377_0011_N_INT_MSB_MSK, st->n_int >> 8));
+	if (ret)
+		goto exit;
+
+	ret = regmap_update_bits(st->regmap, 0x12, ADF4377_0012_R_DIV_MSK |
+				 ADF4377_0012_CLKOUT_DIV_MSK,
+				 FIELD_PREP(ADF4377_0012_CLKOUT_DIV_MSK, st->clkout_div_sel) |
+				 FIELD_PREP(ADF4377_0012_R_DIV_MSK, st->ref_div_factor));
+	if (ret)
+		goto exit;
+
+	ret = regmap_write(st->regmap, 0x10,
+			   FIELD_PREP(ADF4377_0010_N_INT_LSB_MSK, st->n_int));
+	if (ret)
+		goto exit;
+
+	ret = regmap_read_poll_timeout(st->regmap, 0x49, read_val,
+				       !(read_val & (ADF4377_0049_FSM_BUSY_MSK)), 200, 200 * 100);
+	if (ret)
+		goto exit;
+
+	/* Disable EN_DNCLK, EN_DRCLK */
+	ret = regmap_update_bits(st->regmap, 0x1C, ADF4377_001C_EN_DNCLK_MSK |
+				 ADF4377_001C_EN_DRCLK_MSK,
+				 FIELD_PREP(ADF4377_001C_EN_DNCLK_MSK, 0) |
+				 FIELD_PREP(ADF4377_001C_EN_DRCLK_MSK, 0));
+	if (ret)
+		goto exit;
+
+	/* Disable EN_ADC_CLK */
+	ret = regmap_update_bits(st->regmap, 0x20, ADF4377_0020_EN_ADC_CLK_MSK,
+				 FIELD_PREP(ADF4377_0020_EN_ADC_CLK_MSK, 0));
+	if (ret)
+		goto exit;
+
+	/* Set output Amplitude */
+	ret = regmap_update_bits(st->regmap, 0x19, ADF4377_0019_CLKOUT2_OP_MSK |
+				 ADF4377_0019_CLKOUT1_OP_MSK,
+				 FIELD_PREP(ADF4377_0019_CLKOUT1_OP_MSK,
+					    ADF4377_0019_CLKOUT_420MV) |
+				 FIELD_PREP(ADF4377_0019_CLKOUT2_OP_MSK,
+					    ADF4377_0019_CLKOUT_420MV));
+
+exit:
+	mutex_unlock(&st->lock);
+
+	return ret;
+}
+
+static void adf4377_gpio_init(struct adf4377_state *st)
+{
+	if (st->gpio_ce) {
+		gpiod_set_value(st->gpio_ce, 1);
+
+		/* Delay for SPI register bits to settle to their power-on reset state */
+		fsleep(200);
+	}
+
+	if (st->gpio_enclk1)
+		gpiod_set_value(st->gpio_enclk1, 1);
+
+	if (st->gpio_enclk2)
+		gpiod_set_value(st->gpio_enclk2, 1);
+}
+
+static int adf4377_init(struct adf4377_state *st)
+{
+	struct spi_device *spi = st->spi;
+	int ret;
+
+	adf4377_gpio_init(st);
+
+	ret = adf4377_soft_reset(st);
+	if (ret) {
+		dev_err(&spi->dev, "Failed to soft reset.\n");
+		return ret;
+	}
+
+	ret = regmap_multi_reg_write(st->regmap, adf4377_reg_defaults,
+				     ARRAY_SIZE(adf4377_reg_defaults));
+	if (ret) {
+		dev_err(&spi->dev, "Failed to set default registers.\n");
+		return ret;
+	}
+
+	ret = regmap_update_bits(st->regmap, 0x00,
+				 ADF4377_0000_SDO_ACTIVE_MSK | ADF4377_0000_SDO_ACTIVE_R_MSK,
+				 FIELD_PREP(ADF4377_0000_SDO_ACTIVE_MSK,
+					    ADF4377_0000_SDO_ACTIVE_SPI_4W) |
+				 FIELD_PREP(ADF4377_0000_SDO_ACTIVE_R_MSK,
+					    ADF4377_0000_SDO_ACTIVE_SPI_4W));
+	if (ret) {
+		dev_err(&spi->dev, "Failed to set 4-Wire Operation.\n");
+		return ret;
+	}
+
+	st->clkin_freq = clk_get_rate(st->clkin);
+
+	/* Power Up */
+	ret = regmap_write(st->regmap, 0x1a,
+			   FIELD_PREP(ADF4377_001A_PD_ALL_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_RDIV_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_NDIV_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_VCO_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_LD_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_PFDCP_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_CLKOUT1_MSK, 0) |
+			   FIELD_PREP(ADF4377_001A_PD_CLKOUT2_MSK, 0));
+	if (ret) {
+		dev_err(&spi->dev, "Failed to set power down registers.\n");
+		return ret;
+	}
+
+	/* Set Mux Output */
+	ret = regmap_update_bits(st->regmap, 0x1D,
+				 ADF4377_001D_MUXOUT_MSK,
+				 FIELD_PREP(ADF4377_001D_MUXOUT_MSK, st->muxout_select));
+	if (ret)
+		return ret;
+
+	/* Compute PFD */
+	st->ref_div_factor = 0;
+	do {
+		st->ref_div_factor++;
+		st->f_pfd = st->clkin_freq / st->ref_div_factor;
+	} while (st->f_pfd > ADF4377_MAX_FREQ_PFD);
+
+	if (st->f_pfd > ADF4377_MAX_FREQ_PFD || st->f_pfd < ADF4377_MIN_FREQ_PFD)
+		return -EINVAL;
+
+	st->f_div_rclk = st->f_pfd;
+
+	if (st->f_pfd <= ADF4377_FREQ_PFD_80MHZ) {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_1;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1;
+		st->dclk_mode = 0;
+	} else if (st->f_pfd <= ADF4377_FREQ_PFD_125MHZ) {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_1;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1;
+		st->dclk_mode = 1;
+	} else if (st->f_pfd <= ADF4377_FREQ_PFD_160MHZ) {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1;
+		st->dclk_mode = 0;
+		st->f_div_rclk /= 2;
+	} else if (st->f_pfd <= ADF4377_FREQ_PFD_250MHZ) {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_1;
+		st->dclk_mode = 1;
+		st->f_div_rclk /= 2;
+	} else if (st->f_pfd <= ADF4377_FREQ_PFD_320MHZ) {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_2;
+		st->dclk_mode = 0;
+		st->f_div_rclk /= 4;
+	} else {
+		st->dclk_div1 = ADF4377_002F_DCLK_DIV1_2;
+		st->dclk_div2 = ADF4377_0011_DCLK_DIV2_2;
+		st->dclk_mode = 1;
+		st->f_div_rclk /= 4;
+	}
+
+	st->synth_lock_timeout = DIV_ROUND_UP(st->f_div_rclk, 50000);
+	st->vco_alc_timeout = DIV_ROUND_UP(st->f_div_rclk, 20000);
+	st->vco_band_div = DIV_ROUND_UP(st->f_div_rclk, 150000 * 16 * (1 << st->dclk_mode));
+	st->adc_clk_div = DIV_ROUND_UP((st->f_div_rclk / 400000 - 2), 4);
+
+	return 0;
+}
+
+static ssize_t adf4377_read(struct iio_dev *indio_dev, uintptr_t private,
+			    const struct iio_chan_spec *chan, char *buf)
+{
+	struct adf4377_state *st = iio_priv(indio_dev);
+	u64 val = 0;
+	int ret;
+
+	switch ((u32)private) {
+	case ADF4377_FREQ:
+		ret = adf4377_get_freq(st, &val);
+		if (ret)
+			return ret;
+
+		return sysfs_emit(buf, "%llu\n", val);
+	default:
+		return -EINVAL;
+	}
+}
+
+static ssize_t adf4377_write(struct iio_dev *indio_dev, uintptr_t private,
+			     const struct iio_chan_spec *chan, const char *buf,
+			     size_t len)
+{
+	struct adf4377_state *st = iio_priv(indio_dev);
+	unsigned long long freq;
+	int ret;
+
+	switch ((u32)private) {
+	case ADF4377_FREQ:
+		ret = kstrtoull(buf, 10, &freq);
+		if (ret)
+			return ret;
+
+		ret = adf4377_set_freq(st, freq);
+		if (ret)
+			return ret;
+
+		return len;
+	default:
+		return -EINVAL;
+	}
+}
+
+#define _ADF4377_EXT_INFO(_name, _shared, _ident) { \
+		.name = _name, \
+		.read = adf4377_read, \
+		.write = adf4377_write, \
+		.private = _ident, \
+		.shared = _shared, \
+	}
+
+static const struct iio_chan_spec_ext_info adf4377_ext_info[] = {
+	/*
+	 * Usually we use IIO_CHAN_INFO_FREQUENCY, but there are
+	 * values > 2^32 in order to support the entire frequency range
+	 * in Hz.
+	 */
+	_ADF4377_EXT_INFO("frequency", IIO_SEPARATE, ADF4377_FREQ),
+	{ }
+};
+
+static const struct iio_chan_spec adf4377_channels[] = {
+	{
+		.type = IIO_ALTVOLTAGE,
+		.indexed = 1,
+		.output = 1,
+		.channel = 0,
+		.ext_info = adf4377_ext_info,
+	},
+};
+
+static int adf4377_properties_parse(struct adf4377_state *st)
+{
+	struct spi_device *spi = st->spi;
+	const char *str;
+	int ret;
+
+	st->clkin = devm_clk_get_enabled(&spi->dev, "ref_in");
+	if (IS_ERR(st->clkin))
+		return dev_err_probe(&spi->dev, PTR_ERR(st->clkin),
+				     "failed to get the reference input clock\n");
+
+	st->gpio_ce = devm_gpiod_get_optional(&st->spi->dev, "chip-enable",
+					      GPIOD_OUT_LOW);
+	if (IS_ERR(st->gpio_ce))
+		return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_ce),
+				     "failed to get the CE GPIO\n");
+
+	st->gpio_enclk1 = devm_gpiod_get_optional(&st->spi->dev, "clk1-enable",
+						  GPIOD_OUT_LOW);
+	if (IS_ERR(st->gpio_enclk1))
+		return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_enclk1),
+				     "failed to get the CE GPIO\n");
+
+	st->gpio_enclk2 = devm_gpiod_get_optional(&st->spi->dev, "clk2-enable",
+						  GPIOD_OUT_LOW);
+	if (IS_ERR(st->gpio_enclk2))
+		return dev_err_probe(&spi->dev, PTR_ERR(st->gpio_enclk2),
+				     "failed to get the CE GPIO\n");
+
+	ret = device_property_read_string(&spi->dev, "adi,muxout-select", &str);
+	if (ret) {
+		st->muxout_select = ADF4377_MUXOUT_HIGH_Z;
+	} else {
+		ret = match_string(adf4377_muxout_modes, ARRAY_SIZE(adf4377_muxout_modes), str);
+		if (ret < 0)
+			return ret;
+
+		st->muxout_select = ret;
+	}
+
+	return 0;
+}
+
+static int adf4377_freq_change(struct notifier_block *nb, unsigned long action, void *data)
+{
+	struct adf4377_state *st = container_of(nb, struct adf4377_state, nb);
+	int ret;
+
+	if (action == POST_RATE_CHANGE) {
+		mutex_lock(&st->lock);
+		ret = notifier_from_errno(adf4377_init(st));
+		mutex_unlock(&st->lock);
+		return ret;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int adf4377_probe(struct spi_device *spi)
+{
+	struct iio_dev *indio_dev;
+	struct regmap *regmap;
+	struct adf4377_state *st;
+	int ret;
+
+	indio_dev = devm_iio_device_alloc(&spi->dev, sizeof(*st));
+	if (!indio_dev)
+		return -ENOMEM;
+
+	regmap = devm_regmap_init_spi(spi, &adf4377_regmap_config);
+	if (IS_ERR(regmap))
+		return PTR_ERR(regmap);
+
+	st = iio_priv(indio_dev);
+
+	indio_dev->info = &adf4377_info;
+	indio_dev->name = "adf4377";
+	indio_dev->channels = adf4377_channels;
+	indio_dev->num_channels = ARRAY_SIZE(adf4377_channels);
+
+	st->regmap = regmap;
+	st->spi = spi;
+	mutex_init(&st->lock);
+
+	ret = adf4377_properties_parse(st);
+	if (ret)
+		return ret;
+
+	st->nb.notifier_call = adf4377_freq_change;
+	ret = devm_clk_notifier_register(&spi->dev, st->clkin, &st->nb);
+	if (ret)
+		return ret;
+
+	ret = adf4377_init(st);
+	if (ret)
+		return ret;
+
+	return devm_iio_device_register(&spi->dev, indio_dev);
+}
+
+static const struct spi_device_id adf4377_id[] = {
+	{ "adf4377", 0 },
+	{}
+};
+MODULE_DEVICE_TABLE(spi, adf4377_id);
+
+static const struct of_device_id adf4377_of_match[] = {
+	{ .compatible = "adi,adf4377" },
+	{}
+};
+MODULE_DEVICE_TABLE(of, adf4377_of_match);
+
+static struct spi_driver adf4377_driver = {
+	.driver = {
+		.name = "adf4377",
+		.of_match_table = adf4377_of_match,
+	},
+	.probe = adf4377_probe,
+	.id_table = adf4377_id,
+};
+module_spi_driver(adf4377_driver);
+
+MODULE_AUTHOR("Antoniu Miclaus <antoniu.miclaus@analog.com>");
+MODULE_DESCRIPTION("Analog Devices ADF4377");
+MODULE_LICENSE("GPL");
-- 
GitLab


From 78a01feb4024ffb6c6321e45dc2bfcafb2d1d1e5 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Tue, 25 Oct 2022 15:39:23 +0000
Subject: [PATCH 2552/4122] ftrace: Clean comments related to
 FTRACE_OPS_FL_PER_CPU

Commit b3a88803ac5b ("ftrace: Kill FTRACE_OPS_FL_PER_CPU") didn't
completely remove the comments related to FTRACE_OPS_FL_PER_CPU.

Link: https://lkml.kernel.org/r/20221025153923.1995973-1-zhengyejian1@huawei.com

Fixes: b3a88803ac5b ("ftrace: Kill FTRACE_OPS_FL_PER_CPU")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 33236241f2364..65a5d36463e05 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -163,7 +163,7 @@ static void ftrace_sync_ipi(void *data)
 static ftrace_func_t ftrace_ops_get_list_func(struct ftrace_ops *ops)
 {
 	/*
-	 * If this is a dynamic, RCU, or per CPU ops, or we force list func,
+	 * If this is a dynamic or RCU ops, or we force list func,
 	 * then it needs to call the list anyway.
 	 */
 	if (ops->flags & (FTRACE_OPS_FL_DYNAMIC | FTRACE_OPS_FL_RCU) ||
@@ -3071,8 +3071,6 @@ out:
 	/*
 	 * Dynamic ops may be freed, we must make sure that all
 	 * callers are done before leaving this function.
-	 * The same goes for freeing the per_cpu data of the per_cpu
-	 * ops.
 	 */
 	if (ops->flags & FTRACE_OPS_FL_DYNAMIC) {
 		/*
@@ -7519,8 +7517,6 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip,
 		/*
 		 * Check the following for each ops before calling their func:
 		 *  if RCU flag is set, then rcu_is_watching() must be true
-		 *  if PER_CPU is set, then ftrace_function_local_disable()
-		 *                          must be false
 		 *  Otherwise test if the ip matches the ops filter
 		 *
 		 * If any of the above fails then the op->func() is not executed.
@@ -7570,8 +7566,8 @@ NOKPROBE_SYMBOL(arch_ftrace_ops_list_func);
 
 /*
  * If there's only one function registered but it does not support
- * recursion, needs RCU protection and/or requires per cpu handling, then
- * this function will be called by the mcount trampoline.
+ * recursion, needs RCU protection, then this function will be called
+ * by the mcount trampoline.
  */
 static void ftrace_ops_assist_func(unsigned long ip, unsigned long parent_ip,
 				   struct ftrace_ops *op, struct ftrace_regs *fregs)
-- 
GitLab


From 8230f27b1ccc4b8976c137e3d6d690f9d4ffca8d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Fri, 14 Oct 2022 08:04:56 -0400
Subject: [PATCH 2553/4122] tracing: Add __cpumask to denote a trace event
 field that is a cpumask_t

The trace events have a __bitmask field that can be used for anything
that requires bitmasks. Although currently it is only used for CPU
masks, it could be used in the future for any type of bitmasks.

There is some user space tooling that wants to know if a field is a CPU
mask and not just some random unsigned long bitmask. Introduce
"__cpumask()" helper functions that work the same as the current
__bitmask() helpers but displays in the format file:

  field:__data_loc cpumask_t *[] mask;    offset:36;      size:4; signed:0;

Instead of:

  field:__data_loc unsigned long[] mask;  offset:32;      size:4; signed:0;

The main difference is the type. Instead of "unsigned long" it is
"cpumask_t *". Note, this type field needs to be a real type in the
__dynamic_array() logic that both __cpumask and__bitmask use, but the
comparison field requires it to be a scalar type whereas cpumask_t is a
structure (non-scalar). But everything works when making it a pointer.

Valentin added changes to remove the need of passing in "nr_bits" and the
__cpumask will always use nr_cpumask_bits as its size.

Link: https://lkml.kernel.org/r/20221014080456.1d32b989@rorschach.local.home

Requested-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/bpf_probe.h                    |  6 ++++
 include/trace/perf.h                         |  6 ++++
 include/trace/stages/stage1_struct_define.h  |  6 ++++
 include/trace/stages/stage2_data_offsets.h   |  6 ++++
 include/trace/stages/stage3_trace_output.h   |  6 ++++
 include/trace/stages/stage4_event_fields.h   |  6 ++++
 include/trace/stages/stage5_get_offsets.h    |  6 ++++
 include/trace/stages/stage6_event_callback.h | 20 ++++++++++++
 include/trace/stages/stage7_class_define.h   |  2 ++
 samples/trace_events/trace-events-sample.c   |  2 +-
 samples/trace_events/trace-events-sample.h   | 34 +++++++++++++++-----
 11 files changed, 91 insertions(+), 9 deletions(-)

diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 6a13220d2d27b..155c495b89ead 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -21,6 +21,9 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_cpumask
+#define __get_cpumask(field) (char *)__get_dynamic_array(field)
+
 #undef __get_sockaddr
 #define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
 
@@ -40,6 +43,9 @@
 #undef __get_rel_bitmask
 #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
 
+#undef __get_rel_cpumask
+#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field)
+
 #undef __get_rel_sockaddr
 #define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
 
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 5800d13146c3d..8f3bf1e177070 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -21,6 +21,9 @@
 #undef __get_bitmask
 #define __get_bitmask(field) (char *)__get_dynamic_array(field)
 
+#undef __get_cpumask
+#define __get_cpumask(field) (char *)__get_dynamic_array(field)
+
 #undef __get_sockaddr
 #define __get_sockaddr(field) ((struct sockaddr *)__get_dynamic_array(field))
 
@@ -41,6 +44,9 @@
 #undef __get_rel_bitmask
 #define __get_rel_bitmask(field) (char *)__get_rel_dynamic_array(field)
 
+#undef __get_rel_cpumask
+#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field)
+
 #undef __get_rel_sockaddr
 #define __get_rel_sockaddr(field) ((struct sockaddr *)__get_rel_dynamic_array(field))
 
diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h
index 1b7bab60434c1..69e0dae453bfa 100644
--- a/include/trace/stages/stage1_struct_define.h
+++ b/include/trace/stages/stage1_struct_define.h
@@ -32,6 +32,9 @@
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
 
+#undef __cpumask
+#define __cpumask(item) __dynamic_array(char, item, -1)
+
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
@@ -47,6 +50,9 @@
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(char, item, -1)
 
+#undef __rel_cpumask
+#define __rel_cpumask(item) __rel_dynamic_array(char, item, -1)
+
 #undef __rel_sockaddr
 #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
 
diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h
index 1b7a8f764fddd..469b6a64293de 100644
--- a/include/trace/stages/stage2_data_offsets.h
+++ b/include/trace/stages/stage2_data_offsets.h
@@ -38,6 +38,9 @@
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
+#undef __cpumask
+#define __cpumask(item) __dynamic_array(unsigned long, item, -1)
+
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
@@ -53,5 +56,8 @@
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
+#undef __rel_cpumask
+#define __rel_cpumask(item) __rel_dynamic_array(unsigned long, item, -1)
+
 #undef __rel_sockaddr
 #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
diff --git a/include/trace/stages/stage3_trace_output.h b/include/trace/stages/stage3_trace_output.h
index e3b183e9d18ea..66374df61ed30 100644
--- a/include/trace/stages/stage3_trace_output.h
+++ b/include/trace/stages/stage3_trace_output.h
@@ -42,6 +42,9 @@
 		trace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
 	})
 
+#undef __get_cpumask
+#define __get_cpumask(field) __get_bitmask(field)
+
 #undef __get_rel_bitmask
 #define __get_rel_bitmask(field)						\
 	({								\
@@ -51,6 +54,9 @@
 		trace_print_bitmask_seq(p, __bitmask, __bitmask_size);	\
 	})
 
+#undef __get_rel_cpumask
+#define __get_rel_cpumask(field) __get_rel_bitmask(field)
+
 #undef __get_sockaddr
 #define __get_sockaddr(field)	((struct sockaddr *)__get_dynamic_array(field))
 
diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
index a8fb25f39a99d..f2990d22313cf 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -46,6 +46,9 @@
 #undef __bitmask
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
+#undef __cpumask
+#define __cpumask(item) __dynamic_array(cpumask_t *, item, -1)
+
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
@@ -64,5 +67,8 @@
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
+#undef __rel_cpumask
+#define __rel_cpumask(item) __rel_dynamic_array(cpumask_t *, item, -1)
+
 #undef __rel_sockaddr
 #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h
index fba4c24ed9e60..ac5c24d3beeb2 100644
--- a/include/trace/stages/stage5_get_offsets.h
+++ b/include/trace/stages/stage5_get_offsets.h
@@ -82,10 +82,16 @@
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item,	\
 					 __bitmask_size_in_longs(nr_bits))
 
+#undef __cpumask
+#define __cpumask(item) __bitmask(item, nr_cpumask_bits)
+
 #undef __rel_bitmask
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item,	\
 					 __bitmask_size_in_longs(nr_bits))
 
+#undef __rel_cpumask
+#define __rel_cpumask(item) __rel_bitmask(item, nr_cpumask_bits)
+
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
index 3c554a5853204..49c32394b53fb 100644
--- a/include/trace/stages/stage6_event_callback.h
+++ b/include/trace/stages/stage6_event_callback.h
@@ -57,6 +57,16 @@
 #define __assign_bitmask(dst, src, nr_bits)					\
 	memcpy(__get_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
 
+#undef __cpumask
+#define __cpumask(item) __dynamic_array(unsigned long, item, -1)
+
+#undef __get_cpumask
+#define __get_cpumask(field) (char *)__get_dynamic_array(field)
+
+#undef __assign_cpumask
+#define __assign_cpumask(dst, src)					\
+	memcpy(__get_cpumask(dst), (src), __bitmask_size_in_bytes(nr_cpumask_bits))
+
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
 
@@ -98,6 +108,16 @@
 #define __assign_rel_bitmask(dst, src, nr_bits)					\
 	memcpy(__get_rel_bitmask(dst), (src), __bitmask_size_in_bytes(nr_bits))
 
+#undef __rel_cpumask
+#define __rel_cpumask(item) __rel_dynamic_array(unsigned long, item, -1)
+
+#undef __get_rel_cpumask
+#define __get_rel_cpumask(field) (char *)__get_rel_dynamic_array(field)
+
+#undef __assign_rel_cpumask
+#define __assign_rel_cpumask(dst, src)					\
+	memcpy(__get_rel_cpumask(dst), (src), __bitmask_size_in_bytes(nr_cpumask_bits))
+
 #undef __rel_sockaddr
 #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
 
diff --git a/include/trace/stages/stage7_class_define.h b/include/trace/stages/stage7_class_define.h
index 8a7ec24c246dd..8795429f388b0 100644
--- a/include/trace/stages/stage7_class_define.h
+++ b/include/trace/stages/stage7_class_define.h
@@ -13,11 +13,13 @@
 #undef __get_dynamic_array_len
 #undef __get_str
 #undef __get_bitmask
+#undef __get_cpumask
 #undef __get_sockaddr
 #undef __get_rel_dynamic_array
 #undef __get_rel_dynamic_array_len
 #undef __get_rel_str
 #undef __get_rel_bitmask
+#undef __get_rel_cpumask
 #undef __get_rel_sockaddr
 #undef __print_array
 #undef __print_hex_dump
diff --git a/samples/trace_events/trace-events-sample.c b/samples/trace_events/trace-events-sample.c
index 608c4ae3b08a3..ecc7db237f2ef 100644
--- a/samples/trace_events/trace-events-sample.c
+++ b/samples/trace_events/trace-events-sample.c
@@ -50,7 +50,7 @@ static void do_simple_thread_func(int cnt, const char *fmt, ...)
 
 	trace_foo_with_template_print("I have to be different", cnt);
 
-	trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask);
+	trace_foo_rel_loc("Hello __rel_loc", cnt, bitmask, current->cpus_ptr);
 }
 
 static void simple_thread_func(int cnt)
diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index 1a92226202fc5..fb4548a44153c 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -200,6 +200,16 @@
  *
  *         __assign_bitmask(target_cpus, cpumask_bits(bar), nr_cpumask_bits);
  *
+ *   __cpumask: This is pretty much the same as __bitmask but is specific for
+ *         CPU masks. The type displayed to the user via the format files will
+ *         be "cpumaks_t" such that user space may deal with them differently
+ *         if they choose to do so, and the bits is always set to nr_cpumask_bits.
+ *
+ *         __cpumask(target_cpu)
+ *
+ *         To assign a cpumask, use the __assign_cpumask() helper macro.
+ *
+ *         __assign_cpumask(target_cpus, cpumask_bits(bar));
  *
  * fast_assign: This is a C like function that is used to store the items
  *    into the ring buffer. A special variable called "__entry" will be the
@@ -212,8 +222,8 @@
  *    This is also used to print out the data from the trace files.
  *    Again, the __entry macro is used to access the data from the ring buffer.
  *
- *    Note, __dynamic_array, __string, and __bitmask require special helpers
- *       to access the data.
+ *    Note, __dynamic_array, __string, __bitmask and __cpumask require special
+ *       helpers to access the data.
  *
  *      For __dynamic_array(int, foo, bar) use __get_dynamic_array(foo)
  *            Use __get_dynamic_array_len(foo) to get the length of the array
@@ -226,6 +236,8 @@
  *
  *      For __bitmask(target_cpus, nr_cpumask_bits) use __get_bitmask(target_cpus)
  *
+ *      For __cpumask(target_cpus) use __get_cpumask(target_cpus)
+ *
  *
  * Note, that for both the assign and the printk, __entry is the handler
  * to the data structure in the ring buffer, and is defined by the
@@ -288,6 +300,7 @@ TRACE_EVENT(foo_bar,
 		__dynamic_array(int,	list,   __length_of(lst))
 		__string(	str,	string			)
 		__bitmask(	cpus,	num_possible_cpus()	)
+		__cpumask(	cpum				)
 		__vstring(	vstr,	fmt,	va		)
 	),
 
@@ -299,9 +312,10 @@ TRACE_EVENT(foo_bar,
 		__assign_str(str, string);
 		__assign_vstr(vstr, fmt, va);
 		__assign_bitmask(cpus, cpumask_bits(mask), num_possible_cpus());
+		__assign_cpumask(cpum, cpumask_bits(mask));
 	),
 
-	TP_printk("foo %s %d %s %s %s %s (%s) %s", __entry->foo, __entry->bar,
+	TP_printk("foo %s %d %s %s %s %s (%s) (%s) %s", __entry->foo, __entry->bar,
 
 /*
  * Notice here the use of some helper functions. This includes:
@@ -345,7 +359,8 @@ TRACE_EVENT(foo_bar,
 		  __print_array(__get_dynamic_array(list),
 				__get_dynamic_array_len(list) / sizeof(int),
 				sizeof(int)),
-		  __get_str(str), __get_bitmask(cpus), __get_str(vstr))
+		  __get_str(str), __get_bitmask(cpus), __get_cpumask(cpus),
+		  __get_str(vstr))
 );
 
 /*
@@ -542,15 +557,16 @@ DEFINE_EVENT_PRINT(foo_template, foo_with_template_print,
 
 TRACE_EVENT(foo_rel_loc,
 
-	TP_PROTO(const char *foo, int bar, unsigned long *mask),
+	TP_PROTO(const char *foo, int bar, unsigned long *mask, const cpumask_t *cpus),
 
-	TP_ARGS(foo, bar, mask),
+	TP_ARGS(foo, bar, mask, cpus),
 
 	TP_STRUCT__entry(
 		__rel_string(	foo,	foo	)
 		__field(	int,	bar	)
 		__rel_bitmask(	bitmask,
 			BITS_PER_BYTE * sizeof(unsigned long)	)
+		__rel_cpumask(	cpumask )
 	),
 
 	TP_fast_assign(
@@ -558,10 +574,12 @@ TRACE_EVENT(foo_rel_loc,
 		__entry->bar = bar;
 		__assign_rel_bitmask(bitmask, mask,
 			BITS_PER_BYTE * sizeof(unsigned long));
+		__assign_rel_cpumask(cpumask, cpus);
 	),
 
-	TP_printk("foo_rel_loc %s, %d, %s", __get_rel_str(foo), __entry->bar,
-		  __get_rel_bitmask(bitmask))
+	TP_printk("foo_rel_loc %s, %d, %s, %s", __get_rel_str(foo), __entry->bar,
+		  __get_rel_bitmask(bitmask),
+		  __get_rel_cpumask(cpumask))
 );
 #endif
 
-- 
GitLab


From a01fdc897fa56ffb596d0a0cd7ea2ab3bd8398c5 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Thu, 20 Oct 2022 21:00:56 -0400
Subject: [PATCH 2554/4122] tracing: Add trace_trigger kernel command line
 option

Allow triggers to be enabled at kernel boot up. For example:

  trace_trigger="sched_switch.stacktrace if prev_state == 2"

The above will enable the stacktrace trigger on top of the sched_switch
event and only trigger if its prev_state is 2 (TASK_UNINTERRUPTIBLE). Then
at boot up, a stacktrace will trigger and be recorded in the tracing ring
buffer every time the sched_switch happens where the previous state is
TASK_INTERRUPTIBLE.

Another useful trigger would be "traceoff" which can stop tracing on an
event if a field of the event matches a certain value defined by the
filter ("if" statement).

Link: https://lore.kernel.org/linux-trace-kernel/20221020210056.0d8d0a5b@gandalf.local.home

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 .../admin-guide/kernel-parameters.txt         | 19 +++++
 kernel/trace/trace_events.c                   | 72 ++++++++++++++++++-
 2 files changed, 89 insertions(+), 2 deletions(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a465d5242774a..ccf91a4bf113f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -6257,6 +6257,25 @@
 			See also Documentation/trace/ftrace.rst "trace options"
 			section.
 
+	trace_trigger=[trigger-list]
+			[FTRACE] Add a event trigger on specific events.
+			Set a trigger on top of a specific event, with an optional
+			filter.
+
+			The format is is "trace_trigger=<event>.<trigger>[ if <filter>],..."
+			Where more than one trigger may be specified that are comma deliminated.
+
+			For example:
+
+			  trace_trigger="sched_switch.stacktrace if prev_state == 2"
+
+			The above will enable the "stacktrace" trigger on the "sched_switch"
+			event but only trigger it if the "prev_state" of the "sched_switch"
+			event is "2" (TASK_UNINTERUPTIBLE).
+
+			See also "Event triggers" in Documentation/trace/events.rst
+
+
 	traceoff_on_warning
 			[FTRACE] enable this option to disable tracing when a
 			warning is hit. This turns off "tracing_on". Tracing can
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f71ea6e79b3c8..3bfaf560ecc46 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2796,6 +2796,44 @@ trace_create_new_event(struct trace_event_call *call,
 	return file;
 }
 
+#ifdef CONFIG_HIST_TRIGGERS
+#define MAX_BOOT_TRIGGERS 32
+
+static struct boot_triggers {
+	const char		*event;
+	char			*trigger;
+} bootup_triggers[MAX_BOOT_TRIGGERS];
+
+static char bootup_trigger_buf[COMMAND_LINE_SIZE];
+static int nr_boot_triggers;
+
+static __init int setup_trace_triggers(char *str)
+{
+	char *trigger;
+	char *buf;
+	int i;
+
+	strlcpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
+	ring_buffer_expanded = true;
+	disable_tracing_selftest("running event triggers");
+
+	buf = bootup_trigger_buf;
+	for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
+		trigger = strsep(&buf, ",");
+		if (!trigger)
+			break;
+		bootup_triggers[i].event = strsep(&trigger, ".");
+		bootup_triggers[i].trigger = strsep(&trigger, ".");
+		if (!bootup_triggers[i].trigger)
+			break;
+	}
+
+	nr_boot_triggers = i;
+	return 1;
+}
+__setup("trace_trigger=", setup_trace_triggers);
+#endif
+
 /* Add an event to a trace directory */
 static int
 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
@@ -2812,6 +2850,28 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
 		return event_define_fields(call);
 }
 
+#ifdef CONFIG_HIST_TRIGGERS
+static void trace_early_triggers(struct trace_event_file *file, const char *name)
+{
+	int ret;
+	int i;
+
+	for (i = 0; i < nr_boot_triggers; i++) {
+		if (strcmp(name, bootup_triggers[i].event))
+			continue;
+		mutex_lock(&event_mutex);
+		ret = trigger_process_regex(file, bootup_triggers[i].trigger);
+		mutex_unlock(&event_mutex);
+		if (ret)
+			pr_err("Failed to register trigger '%s' on event %s\n",
+			       bootup_triggers[i].trigger,
+			       bootup_triggers[i].event);
+	}
+}
+#else
+static inline void trace_early_triggers(struct trace_event_file *file, const char *name) { }
+#endif
+
 /*
  * Just create a descriptor for early init. A descriptor is required
  * for enabling events at boot. We want to enable events before
@@ -2822,12 +2882,19 @@ __trace_early_add_new_event(struct trace_event_call *call,
 			    struct trace_array *tr)
 {
 	struct trace_event_file *file;
+	int ret;
 
 	file = trace_create_new_event(call, tr);
 	if (!file)
 		return -ENOMEM;
 
-	return event_define_fields(call);
+	ret = event_define_fields(call);
+	if (ret)
+		return ret;
+
+	trace_early_triggers(file, trace_event_name(call));
+
+	return 0;
 }
 
 struct ftrace_module_file_ops;
@@ -3735,6 +3802,8 @@ static __init int event_trace_enable(void)
 			list_add(&call->list, &ftrace_events);
 	}
 
+	register_trigger_cmds();
+
 	/*
 	 * We need the top trace array to have a working set of trace
 	 * points at early init, before the debug files and directories
@@ -3749,7 +3818,6 @@ static __init int event_trace_enable(void)
 
 	register_event_cmds();
 
-	register_trigger_cmds();
 
 	return 0;
 }
-- 
GitLab


From 04aabc32fb677f91d676fd306bca1043805e78d5 Mon Sep 17 00:00:00 2001
From: Song Chen <chensong_2000@189.cn>
Date: Thu, 20 Oct 2022 22:06:51 +0800
Subject: [PATCH 2555/4122] ring_buffer: Remove unused "event" parameter

After commit a389d86f7fd0 ("ring-buffer: Have nested events still record
running time stamp"), the "event" parameter is no longer used in either
ring_buffer_unlock_commit() or rb_commit(). Best to remove it.

Link: https://lkml.kernel.org/r/1666274811-24138-1-git-send-email-chensong_2000@189.cn

Signed-off-by: Song Chen <chensong_2000@189.cn>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h          |  3 +--
 kernel/trace/ring_buffer.c           | 12 +++++-------
 kernel/trace/ring_buffer_benchmark.c |  2 +-
 kernel/trace/trace.c                 |  2 +-
 4 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 3c7d295746f67..782e14f62201f 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -113,8 +113,7 @@ void ring_buffer_change_overwrite(struct trace_buffer *buffer, int val);
 
 struct ring_buffer_event *ring_buffer_lock_reserve(struct trace_buffer *buffer,
 						   unsigned long length);
-int ring_buffer_unlock_commit(struct trace_buffer *buffer,
-			      struct ring_buffer_event *event);
+int ring_buffer_unlock_commit(struct trace_buffer *buffer);
 int ring_buffer_write(struct trace_buffer *buffer,
 		      unsigned long length, void *data);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index b21bf14bae9bd..843818ee4814e 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3180,8 +3180,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event)
 		event->time_delta = 1;
 }
 
-static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer,
-		      struct ring_buffer_event *event)
+static void rb_commit(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	local_inc(&cpu_buffer->entries);
 	rb_end_commit(cpu_buffer);
@@ -3383,15 +3382,14 @@ void ring_buffer_nest_end(struct trace_buffer *buffer)
  *
  * Must be paired with ring_buffer_lock_reserve.
  */
-int ring_buffer_unlock_commit(struct trace_buffer *buffer,
-			      struct ring_buffer_event *event)
+int ring_buffer_unlock_commit(struct trace_buffer *buffer)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	int cpu = raw_smp_processor_id();
 
 	cpu_buffer = buffer->buffers[cpu];
 
-	rb_commit(cpu_buffer, event);
+	rb_commit(cpu_buffer);
 
 	rb_wakeups(buffer, cpu_buffer);
 
@@ -3977,7 +3975,7 @@ int ring_buffer_write(struct trace_buffer *buffer,
 
 	memcpy(body, data, length);
 
-	rb_commit(cpu_buffer, event);
+	rb_commit(cpu_buffer);
 
 	rb_wakeups(buffer, cpu_buffer);
 
@@ -5998,7 +5996,7 @@ static __init int rb_write_something(struct rb_test_data *data, bool nested)
 	}
 
  out:
-	ring_buffer_unlock_commit(data->buffer, event);
+	ring_buffer_unlock_commit(data->buffer);
 
 	return 0;
 }
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index 78e576575b79e..aef34673d79d0 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -258,7 +258,7 @@ static void ring_buffer_producer(void)
 				hit++;
 				entry = ring_buffer_event_data(event);
 				*entry = smp_processor_id();
-				ring_buffer_unlock_commit(buffer, event);
+				ring_buffer_unlock_commit(buffer);
 			}
 		}
 		end_time = ktime_get();
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5cfc95a52bc37..5c97dbef741b4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -999,7 +999,7 @@ __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *ev
 		/* ring_buffer_unlock_commit() enables preemption */
 		preempt_enable_notrace();
 	} else
-		ring_buffer_unlock_commit(buffer, event);
+		ring_buffer_unlock_commit(buffer);
 }
 
 /**
-- 
GitLab


From b179d48b6aab21f3999f5006685ea4254c0618a9 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Thu, 17 Nov 2022 14:46:18 +0100
Subject: [PATCH 2556/4122] tracing/osnoise: Add osnoise/options file

Add the tracing/osnoise/options file to control
osnoise/timerlat tracer features. It is a single
file to contain multiple features, similar to
the sched/features file.

Reading the file displays a list of options. Writing
the OPTION_NAME enables it, writing NO_OPTION_NAME disables
it.

The DEAFULTS is a particular option that resets the options
to the default ones.

It uses a bitmask to keep track of the status of the option. When
needed, we can add a list of static keys, but for now
it does not justify the memory increase.

Link: https://lkml.kernel.org/r/f8d34aefdb225d2603fcb4c02a120832a0cd3339.1668692096.git.bristot@kernel.org

Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 170 +++++++++++++++++++++++++++++++++++
 1 file changed, 170 insertions(+)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 4300c5dc4e5db..17b77fe3950b8 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -48,6 +48,19 @@
 #define DEFAULT_TIMERLAT_PERIOD	1000			/* 1ms */
 #define DEFAULT_TIMERLAT_PRIO	95			/* FIFO 95 */
 
+/*
+ * osnoise/options entries.
+ */
+enum osnoise_options_index {
+	OSN_DEFAULTS = 0,
+	OSN_MAX
+};
+
+static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS" };
+
+#define OSN_DEFAULT_OPTIONS	0
+unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
+
 /*
  * trace_array of the enabled osnoise/timerlat instances.
  */
@@ -1860,6 +1873,150 @@ static void osnoise_init_hotplug_support(void)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
+/*
+ * seq file functions for the osnoise/options file.
+ */
+static void *s_options_start(struct seq_file *s, loff_t *pos)
+{
+	int option = *pos;
+
+	mutex_lock(&interface_lock);
+
+	if (option >= OSN_MAX)
+		return NULL;
+
+	return pos;
+}
+
+static void *s_options_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	int option = ++(*pos);
+
+	if (option >= OSN_MAX)
+		return NULL;
+
+	return pos;
+}
+
+static int s_options_show(struct seq_file *s, void *v)
+{
+	loff_t *pos = v;
+	int option = *pos;
+
+	if (option == OSN_DEFAULTS) {
+		if (osnoise_options == OSN_DEFAULT_OPTIONS)
+			seq_printf(s, "%s", osnoise_options_str[option]);
+		else
+			seq_printf(s, "NO_%s", osnoise_options_str[option]);
+		goto out;
+	}
+
+	if (test_bit(option, &osnoise_options))
+		seq_printf(s, "%s", osnoise_options_str[option]);
+	else
+		seq_printf(s, "NO_%s", osnoise_options_str[option]);
+
+out:
+	if (option != OSN_MAX)
+		seq_puts(s, " ");
+
+	return 0;
+}
+
+static void s_options_stop(struct seq_file *s, void *v)
+{
+	seq_puts(s, "\n");
+	mutex_unlock(&interface_lock);
+}
+
+static const struct seq_operations osnoise_options_seq_ops = {
+	.start		= s_options_start,
+	.next		= s_options_next,
+	.show		= s_options_show,
+	.stop		= s_options_stop
+};
+
+static int osnoise_options_open(struct inode *inode, struct file *file)
+{
+	return seq_open(file, &osnoise_options_seq_ops);
+};
+
+/**
+ * osnoise_options_write - Write function for "options" entry
+ * @filp: The active open file structure
+ * @ubuf: The user buffer that contains the value to write
+ * @cnt: The maximum number of bytes to write to "file"
+ * @ppos: The current position in @file
+ *
+ * Writing the option name sets the option, writing the "NO_"
+ * prefix in front of the option name disables it.
+ *
+ * Writing "DEFAULTS" resets the option values to the default ones.
+ */
+static ssize_t osnoise_options_write(struct file *filp, const char __user *ubuf,
+				     size_t cnt, loff_t *ppos)
+{
+	int running, option, enable, retval;
+	char buf[256], *option_str;
+
+	if (cnt >= 256)
+		return -EINVAL;
+
+	if (copy_from_user(buf, ubuf, cnt))
+		return -EFAULT;
+
+	buf[cnt] = 0;
+
+	if (strncmp(buf, "NO_", 3)) {
+		option_str = strstrip(buf);
+		enable = true;
+	} else {
+		option_str = strstrip(&buf[3]);
+		enable = false;
+	}
+
+	option = match_string(osnoise_options_str, OSN_MAX, option_str);
+	if (option < 0)
+		return -EINVAL;
+
+	/*
+	 * trace_types_lock is taken to avoid concurrency on start/stop.
+	 */
+	mutex_lock(&trace_types_lock);
+	running = osnoise_has_registered_instances();
+	if (running)
+		stop_per_cpu_kthreads();
+
+	mutex_lock(&interface_lock);
+	/*
+	 * avoid CPU hotplug operations that might read options.
+	 */
+	cpus_read_lock();
+
+	retval = cnt;
+
+	if (enable) {
+		if (option == OSN_DEFAULTS)
+			osnoise_options = OSN_DEFAULT_OPTIONS;
+		else
+			set_bit(option, &osnoise_options);
+	} else {
+		if (option == OSN_DEFAULTS)
+			retval = -EINVAL;
+		else
+			clear_bit(option, &osnoise_options);
+	}
+
+	cpus_read_unlock();
+	mutex_unlock(&interface_lock);
+
+	if (running)
+		start_per_cpu_kthreads();
+	mutex_unlock(&trace_types_lock);
+
+	return retval;
+}
+
 /*
  * osnoise_cpus_read - Read function for reading the "cpus" file
  * @filp: The active open file structure
@@ -2042,6 +2199,14 @@ static const struct file_operations cpus_fops = {
 	.llseek		= generic_file_llseek,
 };
 
+static const struct file_operations osnoise_options_fops = {
+	.open		= osnoise_options_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release,
+	.write		= osnoise_options_write
+};
+
 #ifdef CONFIG_TIMERLAT_TRACER
 #ifdef CONFIG_STACKTRACE
 static int init_timerlat_stack_tracefs(struct dentry *top_dir)
@@ -2128,6 +2293,11 @@ static int init_tracefs(void)
 	if (!tmp)
 		goto err;
 
+	tmp = trace_create_file("options", TRACE_MODE_WRITE, top_dir, NULL,
+				&osnoise_options_fops);
+	if (!tmp)
+		goto err;
+
 	ret = init_timerlat_tracefs(top_dir);
 	if (ret)
 		goto err;
-- 
GitLab


From 30838fcd81078d078b10209bc18a6357ba4dd5fa Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Thu, 17 Nov 2022 14:46:19 +0100
Subject: [PATCH 2557/4122] tracing/osnoise: Add OSNOISE_WORKLOAD option

The osnoise tracer is not only a tracer, and a set of tracepoints,
but also a workload dispatcher.

In preparation for having other workloads, e.g., in user-space,
add an option to avoid dispatching the workload.

By not dispatching the workload, the osnoise: tracepoints become
generic events to measure the execution time of *any* task on Linux.

For example:

  # cd /sys/kernel/tracing/
  # cat osnoise/options
  DEFAULTS OSNOISE_WORKLOAD
  # echo NO_OSNOISE_WORKLOAD > osnoise/options
  # cat osnoise/options
  NO_DEFAULTS NO_OSNOISE_WORKLOAD
  # echo osnoise > set_event
  # echo osnoise > current_tracer
  # tail -8 trace
      make-94722   [002] d..3.  1371.794507: thread_noise:     make:94722 start 1371.794302286 duration 200897 ns
        sh-121042  [020] d..3.  1371.794534: thread_noise:       sh:121042 start 1371.781610976 duration 8943683 ns
      make-121097  [005] d..3.  1371.794542: thread_noise:     make:121097 start 1371.794481522 duration 60444 ns
     <...>-40      [005] d..3.  1371.794550: thread_noise: migration/5:40 start 1371.794542256 duration 7154 ns
    <idle>-0       [018] dNh2.  1371.794554: irq_noise: reschedule:253 start 1371.794553547 duration 40 ns
    <idle>-0       [018] dNh2.  1371.794561: irq_noise: local_timer:236 start 1371.794556222 duration 4890 ns
    <idle>-0       [018] .Ns2.  1371.794563: softirq_noise:    SCHED:7 start 1371.794561803 duration 992 ns
    <idle>-0       [018] d..3.  1371.794566: thread_noise: swapper/18:0 start 1371.781368110 duration 13191798 ns

In preparation for the rtla exec_time tracer/tool and
rtla osnoise --user option.

Link: https://lkml.kernel.org/r/f5cfbd37aefd419eefe9243b4d2fc38ed5753fe4.1668692096.git.bristot@kernel.org

Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 17b77fe3950b8..3f10dd1f2f1ce 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -53,12 +53,13 @@
  */
 enum osnoise_options_index {
 	OSN_DEFAULTS = 0,
+	OSN_WORKLOAD,
 	OSN_MAX
 };
 
-static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS" };
+static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" };
 
-#define OSN_DEFAULT_OPTIONS	0
+#define OSN_DEFAULT_OPTIONS	0x2
 unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
 
 /*
@@ -1186,11 +1187,12 @@ trace_sched_switch_callback(void *data, bool preempt,
 			    unsigned int prev_state)
 {
 	struct osnoise_variables *osn_var = this_cpu_osn_var();
+	int workload = test_bit(OSN_WORKLOAD, &osnoise_options);
 
-	if (p->pid != osn_var->pid)
+	if ((p->pid != osn_var->pid) || !workload)
 		thread_exit(osn_var, p);
 
-	if (n->pid != osn_var->pid)
+	if ((n->pid != osn_var->pid) || !workload)
 		thread_entry(osn_var, n);
 }
 
@@ -1723,9 +1725,16 @@ static void stop_kthread(unsigned int cpu)
 	struct task_struct *kthread;
 
 	kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
-	if (kthread)
+	if (kthread) {
 		kthread_stop(kthread);
-	per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
+		per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
+	} else {
+		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
+			per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
+			barrier();
+			return;
+		}
+	}
 }
 
 /*
@@ -1759,6 +1768,13 @@ static int start_kthread(unsigned int cpu)
 		snprintf(comm, 24, "timerlat/%d", cpu);
 		main = timerlat_main;
 	} else {
+		/* if no workload, just return */
+		if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
+			per_cpu(per_cpu_osnoise_var, cpu).sampling = true;
+			barrier();
+			return 0;
+		}
+
 		snprintf(comm, 24, "osnoise/%d", cpu);
 	}
 
-- 
GitLab


From 67543cd6b8eee53959e624b9ce420ca4d47be0c8 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Thu, 17 Nov 2022 14:46:20 +0100
Subject: [PATCH 2558/4122] Documentation/osnoise: Add osnoise/options
 documentation

Add the documentation about the osnoise/options file, along
with an explanation about the OSNOISE_WORKLOAD option.

Link: https://lkml.kernel.org/r/777af8f3d87beedd304805f98eff6c8291d64226.1668692096.git.bristot@kernel.org

Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/osnoise-tracer.rst | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index 963def9f97c6e..3c675ed82b27a 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -109,6 +109,11 @@ The tracer has a set of options inside the osnoise directory, they are:
  - tracing_threshold: the minimum delta between two time() reads to be
    considered as noise, in us. When set to 0, the default value will
    be used, which is currently 5 us.
+ - osnoise/options: a set of on/off options that can be enabled by
+   writing the option name to the file or disabled by writing the option
+   name preceded with the 'NO_' prefix. For example, writing
+   NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The
+   special DEAFAULTS option resets all options to the default value.
 
 Additional Tracing
 ------------------
@@ -150,3 +155,10 @@ tracepoints is smaller than eight us reported in the sample_threshold.
 The reason roots in the overhead of the entry and exit code that happens
 before and after any interference execution. This justifies the dual
 approach: measuring thread and tracing.
+
+Running osnoise tracer without workload
+---------------------------------------
+
+By enabling the osnoise tracer with the NO_OSNOISE_WORKLOAD option set,
+the osnoise: tracepoints serve to measure the execution time of
+any type of Linux task, free from the interference of other tasks.
-- 
GitLab


From 9430cd62b6ccdf1f06915cc06561f0e364809604 Mon Sep 17 00:00:00 2001
From: Chuang Wang <nashuiliang@gmail.com>
Date: Mon, 21 Nov 2022 16:08:31 +0800
Subject: [PATCH 2559/4122] tracing/perf: Use strndup_user instead of
 kzalloc/strncpy_from_user

This patch uses strndup_user instead of kzalloc + strncpy_from_user,
which makes the code more concise.

Link: https://lkml.kernel.org/r/20221121080831.707409-1-nashuiliang@gmail.com

Signed-off-by: Chuang Wang <nashuiliang@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_event_perf.c | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 61e3a2620fa3c..05e7912418126 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -251,16 +251,12 @@ int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
 	struct trace_event_call *tp_event;
 
 	if (p_event->attr.kprobe_func) {
-		func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
-		if (!func)
-			return -ENOMEM;
-		ret = strncpy_from_user(
-			func, u64_to_user_ptr(p_event->attr.kprobe_func),
-			KSYM_NAME_LEN);
-		if (ret == KSYM_NAME_LEN)
-			ret = -E2BIG;
-		if (ret < 0)
-			goto out;
+		func = strndup_user(u64_to_user_ptr(p_event->attr.kprobe_func),
+				    KSYM_NAME_LEN);
+		if (IS_ERR(func)) {
+			ret = PTR_ERR(func);
+			return (ret == -EINVAL) ? -E2BIG : ret;
+		}
 
 		if (func[0] == '\0') {
 			kfree(func);
-- 
GitLab


From a76d4648a0bbd624a1322c15073b5032a5135f01 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Tue, 22 Nov 2022 17:14:56 +0800
Subject: [PATCH 2560/4122] tracing: Make tracepoint_print_iter static

After change in commit 4239174570da ("tracing: Make tracepoint_printk a
static_key"), this symbol is not used outside of the file, so mark it
static.

Link: https://lkml.kernel.org/r/20221122091456.72055-1-xiujianfeng@huawei.com

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 2 +-
 kernel/trace/trace.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5c97dbef741b4..93a75a97118f1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -85,7 +85,7 @@ void __init disable_tracing_selftest(const char *reason)
 #endif
 
 /* Pipe tracepoints to printk */
-struct trace_iterator *tracepoint_print_iter;
+static struct trace_iterator *tracepoint_print_iter;
 int tracepoint_printk;
 static bool tracepoint_printk_stop_on_boot __initdata;
 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index d42e245071525..48643f07bc018 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1942,8 +1942,6 @@ static inline void tracer_hardirqs_on(unsigned long a0, unsigned long a1) { }
 static inline void tracer_hardirqs_off(unsigned long a0, unsigned long a1) { }
 #endif
 
-extern struct trace_iterator *tracepoint_print_iter;
-
 /*
  * Reset the state of the trace_iterator so that it can read consumed data.
  * Normally, the trace_iterator is used for reading the data when it is not
-- 
GitLab


From 96e6122cb79616c622ae0d025eb9f981120b568d Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Thu, 10 Nov 2022 10:03:19 +0800
Subject: [PATCH 2561/4122] tracing: Optimize event type allocation with IDA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After commit 060fa5c83e67 ("tracing/events: reuse trace event ids after
 overflow"), trace events with dynamic type are linked up in list
'ftrace_event_list' through field 'trace_event.list'. Then when max
event type number used up, it's possible to reuse type number of some
freed one by traversing 'ftrace_event_list'.

As instead, using IDA to manage available type numbers can make codes
simpler and then the field 'trace_event.list' can be dropped.

Since 'struct trace_event' is used in static tracepoints, drop
'trace_event.list' can make vmlinux smaller. Local test with about 2000
tracepoints, vmlinux reduced about 64KB:
  before：-rwxrwxr-x 1 root root 76669448 Nov  8 17:14 vmlinux
  after： -rwxrwxr-x 1 root root 76604176 Nov  8 17:15 vmlinux

Link: https://lkml.kernel.org/r/20221110020319.1259291-1-zhengyejian1@huawei.com

Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h |  1 -
 kernel/trace/trace_output.c  | 66 +++++++++---------------------------
 2 files changed, 16 insertions(+), 51 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 20749bd9db718..bb2053246d6a2 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -136,7 +136,6 @@ struct trace_event_functions {
 
 struct trace_event {
 	struct hlist_node		node;
-	struct list_head		list;
 	int				type;
 	struct trace_event_functions	*funcs;
 };
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 67f47ea27921d..f0ba97121345b 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -11,6 +11,7 @@
 #include <linux/kprobes.h>
 #include <linux/sched/clock.h>
 #include <linux/sched/mm.h>
+#include <linux/idr.h>
 
 #include "trace_output.h"
 
@@ -21,8 +22,6 @@ DECLARE_RWSEM(trace_event_sem);
 
 static struct hlist_head event_hash[EVENT_HASHSIZE] __read_mostly;
 
-static int next_event_type = __TRACE_LAST_TYPE;
-
 enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
 {
 	struct trace_seq *s = &iter->seq;
@@ -688,38 +687,23 @@ struct trace_event *ftrace_find_event(int type)
 	return NULL;
 }
 
-static LIST_HEAD(ftrace_event_list);
+static DEFINE_IDA(trace_event_ida);
 
-static int trace_search_list(struct list_head **list)
+static void free_trace_event_type(int type)
 {
-	struct trace_event *e = NULL, *iter;
-	int next = __TRACE_LAST_TYPE;
-
-	if (list_empty(&ftrace_event_list)) {
-		*list = &ftrace_event_list;
-		return next;
-	}
+	if (type >= __TRACE_LAST_TYPE)
+		ida_free(&trace_event_ida, type);
+}
 
-	/*
-	 * We used up all possible max events,
-	 * lets see if somebody freed one.
-	 */
-	list_for_each_entry(iter, &ftrace_event_list, list) {
-		if (iter->type != next) {
-			e = iter;
-			break;
-		}
-		next++;
-	}
+static int alloc_trace_event_type(void)
+{
+	int next;
 
-	/* Did we used up all 65 thousand events??? */
-	if (next > TRACE_EVENT_TYPE_MAX)
+	/* Skip static defined type numbers */
+	next = ida_alloc_range(&trace_event_ida, __TRACE_LAST_TYPE,
+			       TRACE_EVENT_TYPE_MAX, GFP_KERNEL);
+	if (next < 0)
 		return 0;
-
-	if (e)
-		*list = &e->list;
-	else
-		*list = &ftrace_event_list;
 	return next;
 }
 
@@ -761,28 +745,10 @@ int register_trace_event(struct trace_event *event)
 	if (WARN_ON(!event->funcs))
 		goto out;
 
-	INIT_LIST_HEAD(&event->list);
-
 	if (!event->type) {
-		struct list_head *list = NULL;
-
-		if (next_event_type > TRACE_EVENT_TYPE_MAX) {
-
-			event->type = trace_search_list(&list);
-			if (!event->type)
-				goto out;
-
-		} else {
-
-			event->type = next_event_type++;
-			list = &ftrace_event_list;
-		}
-
-		if (WARN_ON(ftrace_find_event(event->type)))
+		event->type = alloc_trace_event_type();
+		if (!event->type)
 			goto out;
-
-		list_add_tail(&event->list, list);
-
 	} else if (WARN(event->type > __TRACE_LAST_TYPE,
 			"Need to add type to trace.h")) {
 		goto out;
@@ -819,7 +785,7 @@ EXPORT_SYMBOL_GPL(register_trace_event);
 int __unregister_trace_event(struct trace_event *event)
 {
 	hlist_del(&event->node);
-	list_del(&event->list);
+	free_trace_event_type(event->type);
 	return 0;
 }
 
-- 
GitLab


From bd604f3db49c5b21171abea0414a2020dcbf2646 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 22 Nov 2022 18:09:05 -0500
Subject: [PATCH 2562/4122] ftrace: Avoid needless updates of the ftrace
 function call

Song Shuai reported:

    The list func (ftrace_ops_list_func) will be patched first
    before the transition between old and new calls are set,
    which fixed the race described in this commit `59338f75`.

    While ftrace_trace_function changes from the list func to a
    ftrace_ops func, like unregistering the klp_ops to leave the only
    global_ops in ftrace_ops_list, the ftrace_[regs]_call will be
    replaced with the list func although it already exists. So there
    should be a condition to avoid this.

And suggested using another variable to keep track of what the ftrace
function is set to. But this could be simplified by using a helper
function that does the same with a static variable.

Link: https://lore.kernel.org/lkml/20221026132039.2236233-1-suagrfillet@gmail.com/
Link: https://lore.kernel.org/linux-trace-kernel/20221122180905.737b6f52@gandalf.local.home

Reported-by: Song Shuai <suagrfillet@gmail.com>
Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 65a5d36463e05..d04552c0c275d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -2763,6 +2763,19 @@ void __weak ftrace_arch_code_modify_post_process(void)
 {
 }
 
+static int update_ftrace_func(ftrace_func_t func)
+{
+	static ftrace_func_t save_func;
+
+	/* Avoid updating if it hasn't changed */
+	if (func == save_func)
+		return 0;
+
+	save_func = func;
+
+	return ftrace_update_ftrace_func(func);
+}
+
 void ftrace_modify_all_code(int command)
 {
 	int update = command & FTRACE_UPDATE_TRACE_FUNC;
@@ -2783,7 +2796,7 @@ void ftrace_modify_all_code(int command)
 	 * traced.
 	 */
 	if (update) {
-		err = ftrace_update_ftrace_func(ftrace_ops_list_func);
+		err = update_ftrace_func(ftrace_ops_list_func);
 		if (FTRACE_WARN_ON(err))
 			return;
 	}
@@ -2799,7 +2812,7 @@ void ftrace_modify_all_code(int command)
 		/* If irqs are disabled, we are in stop machine */
 		if (!irqs_disabled())
 			smp_call_function(ftrace_sync_ipi, NULL, 1);
-		err = ftrace_update_ftrace_func(ftrace_trace_function);
+		err = update_ftrace_func(ftrace_trace_function);
 		if (FTRACE_WARN_ON(err))
 			return;
 	}
-- 
GitLab


From d57d98fef46fead01d954afa1b585405b617a4e4 Mon Sep 17 00:00:00 2001
From: Justin Tee <justin.tee@broadcom.com>
Date: Tue, 15 Nov 2022 17:19:21 -0800
Subject: [PATCH 2563/4122] scsi: lpfc: Update lpfc version to 14.2.0.9

Update lpfc version to 14.2.0.9.

Signed-off-by: Justin Tee <justin.tee@broadcom.com>
Link: https://lore.kernel.org/r/20221116011921.105995-7-justintee8345@gmail.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_version.h b/drivers/scsi/lpfc/lpfc_version.h
index 378eba7b09d9f..41a1128f86514 100644
--- a/drivers/scsi/lpfc/lpfc_version.h
+++ b/drivers/scsi/lpfc/lpfc_version.h
@@ -20,7 +20,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "14.2.0.8"
+#define LPFC_DRIVER_VERSION "14.2.0.9"
 #define LPFC_DRIVER_NAME		"lpfc"
 
 /* Used for SLI 2/3 */
-- 
GitLab


From cdd9344e00b4fe3a4683a0ee58826c7a5ce778e0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 13 Nov 2022 21:33:59 +0100
Subject: [PATCH 2564/4122] scsi: lpfc: Remove linux/msi.h include

Nothing in this file needs anything from linux/msi.h

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20221113202428.436270297@linutronix.de
Cc: James Smart <james.smart@broadcom.com>
Cc: Dick Kennedy <dick.kennedy@broadcom.com>
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reviewed-by: James Smart <jsmart2021@gmail.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/lpfc/lpfc_init.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index a119c06742b82..9d595d37d6ca7 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -36,7 +36,6 @@
 #include <linux/firmware.h>
 #include <linux/miscdevice.h>
 #include <linux/percpu.h>
-#include <linux/msi.h>
 #include <linux/irq.h>
 #include <linux/bitops.h>
 #include <linux/crash_dump.h>
-- 
GitLab


From 7870d24817890bccee98db0718acececd6399d04 Mon Sep 17 00:00:00 2001
From: Anastasia Kovaleva <a.kovaleva@yadro.com>
Date: Mon, 14 Nov 2022 13:24:58 +0300
Subject: [PATCH 2565/4122] scsi: target: core: Send max transfer length in
 blocks

A MAXIMUM TRANSFER LENGTH value indicates the maximum transfer length in
logical blocks that the device server accepts for a single command. Fix
function sending the length in sectors instead of blocks.

This patch also removes the special casing for fileio in block_size_store
since this logic in now unified in spc_emulate_evpd_b0() for all backends.

Reviewed-by: Konstantin Shelekhin <k.shelekhin@yadro.com>
Reviewed-by: Dmitriy Bogdanov <d.bogdanov@yadro.com>
Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
Link: https://lore.kernel.org/r/20221114102500.88892-2-a.kovaleva@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_configfs.c | 2 --
 drivers/target/target_core_file.c     | 1 -
 drivers/target/target_core_spc.c      | 6 +++++-
 include/target/target_core_base.h     | 1 -
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c
index b8a5c8d6cfde0..611b0424e305a 100644
--- a/drivers/target/target_core_configfs.c
+++ b/drivers/target/target_core_configfs.c
@@ -1102,8 +1102,6 @@ static ssize_t block_size_store(struct config_item *item,
 	}
 
 	da->block_size = val;
-	if (da->max_bytes_per_io)
-		da->hw_max_sectors = da->max_bytes_per_io / val;
 
 	pr_debug("dev[%p]: SE Device block_size changed to %u\n",
 			da->da_dev, val);
diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c
index 28aa643be5d5e..f9aed9fa8ced2 100644
--- a/drivers/target/target_core_file.c
+++ b/drivers/target/target_core_file.c
@@ -193,7 +193,6 @@ static int fd_configure_device(struct se_device *dev)
 	}
 
 	dev->dev_attrib.hw_block_size = fd_dev->fd_block_size;
-	dev->dev_attrib.max_bytes_per_io = FD_MAX_BYTES;
 	dev->dev_attrib.hw_max_sectors = FD_MAX_BYTES / fd_dev->fd_block_size;
 	dev->dev_attrib.hw_queue_depth = FD_MAX_DEVICE_QUEUE_DEPTH;
 
diff --git a/drivers/target/target_core_spc.c b/drivers/target/target_core_spc.c
index 621a460ba234e..fcc7b10a7ae35 100644
--- a/drivers/target/target_core_spc.c
+++ b/drivers/target/target_core_spc.c
@@ -515,6 +515,7 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf)
 	struct se_device *dev = cmd->se_dev;
 	u32 mtl = 0;
 	int have_tp = 0, opt, min;
+	u32 io_max_blocks;
 
 	/*
 	 * Following spc3r22 section 6.5.3 Block Limits VPD page, when
@@ -553,7 +554,10 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf)
 		mtl = (cmd->se_tfo->max_data_sg_nents * PAGE_SIZE) /
 		       dev->dev_attrib.block_size;
 	}
-	put_unaligned_be32(min_not_zero(mtl, dev->dev_attrib.hw_max_sectors), &buf[8]);
+	io_max_blocks = mult_frac(dev->dev_attrib.hw_max_sectors,
+			dev->dev_attrib.hw_block_size,
+			dev->dev_attrib.block_size);
+	put_unaligned_be32(min_not_zero(mtl, io_max_blocks), &buf[8]);
 
 	/*
 	 * Set OPTIMAL TRANSFER LENGTH
diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h
index 0c1e439809852..12c9ba16217ef 100644
--- a/include/target/target_core_base.h
+++ b/include/target/target_core_base.h
@@ -712,7 +712,6 @@ struct se_dev_attrib {
 	u32		unmap_granularity;
 	u32		unmap_granularity_alignment;
 	u32		max_write_same_len;
-	u32		max_bytes_per_io;
 	struct se_device *da_dev;
 	struct config_group da_group;
 };
-- 
GitLab


From 9375031ee40b66c8fd2fc24d5fbea47b69f53de6 Mon Sep 17 00:00:00 2001
From: Anastasia Kovaleva <a.kovaleva@yadro.com>
Date: Mon, 14 Nov 2022 13:24:59 +0300
Subject: [PATCH 2566/4122] scsi: target: core: Make hw_max_sectors store the
 sectors amount in blocks

By default, hw_max_sectors stores its value in 512 blocks in iblock,
despite the fact that the block size can be 4096 bytes. Change
hw_max_sectors to store the number of sectors in hw_block_size blocks.

Reviewed-by: Konstantin Shelekhin <k.shelekhin@yadro.com>
Reviewed-by: Dmitriy Bogdanov <d.bogdanov@yadro.com>
Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
Link: https://lore.kernel.org/r/20221114102500.88892-3-a.kovaleva@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_iblock.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c
index 8351c974cee32..2a704926edb96 100644
--- a/drivers/target/target_core_iblock.c
+++ b/drivers/target/target_core_iblock.c
@@ -124,7 +124,9 @@ static int iblock_configure_device(struct se_device *dev)
 	q = bdev_get_queue(bd);
 
 	dev->dev_attrib.hw_block_size = bdev_logical_block_size(bd);
-	dev->dev_attrib.hw_max_sectors = queue_max_hw_sectors(q);
+	dev->dev_attrib.hw_max_sectors = mult_frac(queue_max_hw_sectors(q),
+			SECTOR_SIZE,
+			dev->dev_attrib.hw_block_size);
 	dev->dev_attrib.hw_queue_depth = q->nr_requests;
 
 	/*
-- 
GitLab


From 689d94ec208cfdf95101d99319cb4bdc5f55774d Mon Sep 17 00:00:00 2001
From: Anastasia Kovaleva <a.kovaleva@yadro.com>
Date: Mon, 14 Nov 2022 13:25:00 +0300
Subject: [PATCH 2567/4122] scsi: target: core: Change the way
 target_xcopy_do_work() sets restiction on max I/O

To determine how many blocks sends in one command, the minimum value is
selected from the hw_max_sectors of both devices. In target_xcopy_do_work,
hw_max_sectors are used as blocks, not sectors; it also ignores the fact
that sectors can be of different sizes, for example 512 and 4096
bytes. Because of this, a number of blocks can be transmitted that the
device will not be able to accept.

Change the selection of max transmission size into bytes.

Reviewed-by: Konstantin Shelekhin <k.shelekhin@yadro.com>
Reviewed-by: Dmitriy Bogdanov <d.bogdanov@yadro.com>
Signed-off-by: Anastasia Kovaleva <a.kovaleva@yadro.com>
Link: https://lore.kernel.org/r/20221114102500.88892-4-a.kovaleva@yadro.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_xcopy.c | 97 ++++++++++++++++--------------
 drivers/target/target_core_xcopy.h |  2 +-
 2 files changed, 54 insertions(+), 45 deletions(-)

diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
index edf5222082855..49eaee022ef1d 100644
--- a/drivers/target/target_core_xcopy.c
+++ b/drivers/target/target_core_xcopy.c
@@ -582,11 +582,11 @@ static int target_xcopy_read_source(
 	struct xcopy_op *xop,
 	struct se_device *src_dev,
 	sector_t src_lba,
-	u32 src_sectors)
+	u32 src_bytes)
 {
 	struct xcopy_pt_cmd xpt_cmd;
 	struct se_cmd *se_cmd = &xpt_cmd.se_cmd;
-	u32 length = (src_sectors * src_dev->dev_attrib.block_size);
+	u32 transfer_length_block = src_bytes / src_dev->dev_attrib.block_size;
 	int rc;
 	unsigned char cdb[16];
 	bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP);
@@ -597,11 +597,11 @@ static int target_xcopy_read_source(
 	memset(&cdb[0], 0, 16);
 	cdb[0] = READ_16;
 	put_unaligned_be64(src_lba, &cdb[2]);
-	put_unaligned_be32(src_sectors, &cdb[10]);
-	pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n",
-		(unsigned long long)src_lba, src_sectors, length);
+	put_unaligned_be32(transfer_length_block, &cdb[10]);
+	pr_debug("XCOPY: Built READ_16: LBA: %llu Blocks: %u Length: %u\n",
+		(unsigned long long)src_lba, transfer_length_block, src_bytes);
 
-	__target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length,
+	__target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, src_bytes,
 			  DMA_FROM_DEVICE, 0, &xpt_cmd.sense_buffer[0], 0);
 
 	rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, src_dev, &cdb[0],
@@ -627,11 +627,11 @@ static int target_xcopy_write_destination(
 	struct xcopy_op *xop,
 	struct se_device *dst_dev,
 	sector_t dst_lba,
-	u32 dst_sectors)
+	u32 dst_bytes)
 {
 	struct xcopy_pt_cmd xpt_cmd;
 	struct se_cmd *se_cmd = &xpt_cmd.se_cmd;
-	u32 length = (dst_sectors * dst_dev->dev_attrib.block_size);
+	u32 transfer_length_block = dst_bytes / dst_dev->dev_attrib.block_size;
 	int rc;
 	unsigned char cdb[16];
 	bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP);
@@ -642,11 +642,11 @@ static int target_xcopy_write_destination(
 	memset(&cdb[0], 0, 16);
 	cdb[0] = WRITE_16;
 	put_unaligned_be64(dst_lba, &cdb[2]);
-	put_unaligned_be32(dst_sectors, &cdb[10]);
-	pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n",
-		(unsigned long long)dst_lba, dst_sectors, length);
+	put_unaligned_be32(transfer_length_block, &cdb[10]);
+	pr_debug("XCOPY: Built WRITE_16: LBA: %llu Blocks: %u Length: %u\n",
+		(unsigned long long)dst_lba, transfer_length_block, dst_bytes);
 
-	__target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, length,
+	__target_init_cmd(se_cmd, &xcopy_pt_tfo, &xcopy_pt_sess, dst_bytes,
 			  DMA_TO_DEVICE, 0, &xpt_cmd.sense_buffer[0], 0);
 
 	rc = target_xcopy_setup_pt_cmd(&xpt_cmd, xop, dst_dev, &cdb[0],
@@ -670,9 +670,10 @@ static void target_xcopy_do_work(struct work_struct *work)
 	struct se_cmd *ec_cmd = xop->xop_se_cmd;
 	struct se_device *src_dev, *dst_dev;
 	sector_t src_lba, dst_lba, end_lba;
-	unsigned int max_sectors;
+	unsigned long long max_bytes, max_bytes_src, max_bytes_dst, max_blocks;
 	int rc = 0;
-	unsigned short nolb, max_nolb, copied_nolb = 0;
+	unsigned short nolb;
+	unsigned int copied_bytes = 0;
 	sense_reason_t sense_rc;
 
 	sense_rc = target_parse_xcopy_cmd(xop);
@@ -691,23 +692,31 @@ static void target_xcopy_do_work(struct work_struct *work)
 	nolb = xop->nolb;
 	end_lba = src_lba + nolb;
 	/*
-	 * Break up XCOPY I/O into hw_max_sectors sized I/O based on the
-	 * smallest max_sectors between src_dev + dev_dev, or
+	 * Break up XCOPY I/O into hw_max_sectors * hw_block_size sized
+	 * I/O based on the smallest max_bytes between src_dev + dst_dev
 	 */
-	max_sectors = min(src_dev->dev_attrib.hw_max_sectors,
-			  dst_dev->dev_attrib.hw_max_sectors);
-	max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS);
+	max_bytes_src = (unsigned long long) src_dev->dev_attrib.hw_max_sectors *
+			src_dev->dev_attrib.hw_block_size;
+	max_bytes_dst = (unsigned long long) dst_dev->dev_attrib.hw_max_sectors *
+			dst_dev->dev_attrib.hw_block_size;
 
-	max_nolb = min_t(u16, max_sectors, ((u16)(~0U)));
+	max_bytes = min_t(u64, max_bytes_src, max_bytes_dst);
+	max_bytes = min_t(u64, max_bytes, XCOPY_MAX_BYTES);
 
-	pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n",
-			nolb, max_nolb, (unsigned long long)end_lba);
-	pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n",
+	/*
+	 * Using shift instead of the division because otherwise GCC
+	 * generates __udivdi3 that is missing on i386
+	 */
+	max_blocks = max_bytes >> ilog2(src_dev->dev_attrib.block_size);
+
+	pr_debug("%s: nolb: %u, max_blocks: %llu end_lba: %llu\n", __func__,
+			nolb, max_blocks, (unsigned long long)end_lba);
+	pr_debug("%s: Starting src_lba: %llu, dst_lba: %llu\n", __func__,
 			(unsigned long long)src_lba, (unsigned long long)dst_lba);
 
-	while (src_lba < end_lba) {
-		unsigned short cur_nolb = min(nolb, max_nolb);
-		u32 cur_bytes = cur_nolb * src_dev->dev_attrib.block_size;
+	while (nolb) {
+		u32 cur_bytes = min_t(u64, max_bytes, nolb * src_dev->dev_attrib.block_size);
+		unsigned short cur_nolb = cur_bytes / src_dev->dev_attrib.block_size;
 
 		if (cur_bytes != xop->xop_data_bytes) {
 			/*
@@ -724,43 +733,43 @@ static void target_xcopy_do_work(struct work_struct *work)
 			xop->xop_data_bytes = cur_bytes;
 		}
 
-		pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu,"
-			" cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb);
+		pr_debug("%s: Calling read src_dev: %p src_lba: %llu, cur_nolb: %hu\n",
+				__func__, src_dev, (unsigned long long)src_lba, cur_nolb);
 
-		rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb);
+		rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_bytes);
 		if (rc < 0)
 			goto out;
 
-		src_lba += cur_nolb;
-		pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n",
+		src_lba += cur_bytes / src_dev->dev_attrib.block_size;
+		pr_debug("%s: Incremented READ src_lba to %llu\n", __func__,
 				(unsigned long long)src_lba);
 
-		pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu,"
-			" cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb);
+		pr_debug("%s: Calling write dst_dev: %p dst_lba: %llu, cur_nolb: %u\n",
+				__func__, dst_dev, (unsigned long long)dst_lba, cur_nolb);
 
 		rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev,
-						dst_lba, cur_nolb);
+						dst_lba, cur_bytes);
 		if (rc < 0)
 			goto out;
 
-		dst_lba += cur_nolb;
-		pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n",
+		dst_lba += cur_bytes / dst_dev->dev_attrib.block_size;
+		pr_debug("%s: Incremented WRITE dst_lba to %llu\n", __func__,
 				(unsigned long long)dst_lba);
 
-		copied_nolb += cur_nolb;
-		nolb -= cur_nolb;
+		copied_bytes += cur_bytes;
+		nolb -= cur_bytes / src_dev->dev_attrib.block_size;
 	}
 
 	xcopy_pt_undepend_remotedev(xop);
 	target_free_sgl(xop->xop_data_sg, xop->xop_data_nents);
 	kfree(xop);
 
-	pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n",
+	pr_debug("%s: Final src_lba: %llu, dst_lba: %llu\n", __func__,
 		(unsigned long long)src_lba, (unsigned long long)dst_lba);
-	pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n",
-		copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size);
+	pr_debug("%s: Blocks copied: %u, Bytes Copied: %u\n", __func__,
+		copied_bytes / dst_dev->dev_attrib.block_size, copied_bytes);
 
-	pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n");
+	pr_debug("%s: Setting X-COPY GOOD status -> sending response\n", __func__);
 	target_complete_cmd(ec_cmd, SAM_STAT_GOOD);
 	return;
 
@@ -776,8 +785,8 @@ out:
 
 err_free:
 	kfree(xop);
-	pr_warn_ratelimited("target_xcopy_do_work: rc: %d, sense: %u, XCOPY operation failed\n",
-			   rc, sense_rc);
+	pr_warn_ratelimited("%s: rc: %d, sense: %u, XCOPY operation failed\n",
+			   __func__, rc, sense_rc);
 	target_complete_cmd_with_sense(ec_cmd, SAM_STAT_CHECK_CONDITION, sense_rc);
 }
 
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
index e5f20005179a8..0aad7dc658955 100644
--- a/drivers/target/target_core_xcopy.h
+++ b/drivers/target/target_core_xcopy.h
@@ -5,7 +5,7 @@
 #define XCOPY_TARGET_DESC_LEN		32
 #define XCOPY_SEGMENT_DESC_LEN		28
 #define XCOPY_NAA_IEEE_REGEX_LEN	16
-#define XCOPY_MAX_SECTORS		4096
+#define XCOPY_MAX_BYTES			16777216 /* 16 MB */
 
 /*
  * SPC4r37 6.4.6.1
-- 
GitLab


From fec1b2fa62c162d03f5dcd7b03e3c89d3116d49f Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Tue, 15 Nov 2022 13:56:38 +0100
Subject: [PATCH 2568/4122] scsi: target: iscsi: Fix a race condition between
 login_work and the login thread

In case a malicious initiator sends some random data immediately after a
login PDU; the iscsi_target_sk_data_ready() callback will schedule the
login_work and, at the same time, the negotiation may end without clearing
the LOGIN_FLAGS_INITIAL_PDU flag (because no additional PDU exchanges are
required to complete the login).

The login has been completed but the login_work function will find the
LOGIN_FLAGS_INITIAL_PDU flag set and will never stop from rescheduling
itself; at this point, if the initiator drops the connection, the
iscsit_conn structure will be freed, login_work will dereference a released
socket structure and the kernel crashes.

BUG: kernel NULL pointer dereference, address: 0000000000000230
PF: supervisor write access in kernel mode
PF: error_code(0x0002) - not-present page
Workqueue: events iscsi_target_do_login_rx [iscsi_target_mod]
RIP: 0010:_raw_read_lock_bh+0x15/0x30
Call trace:
 iscsi_target_do_login_rx+0x75/0x3f0 [iscsi_target_mod]
 process_one_work+0x1e8/0x3c0

Fix this bug by forcing login_work to stop after the login has been
completed and the socket callbacks have been restored.

Add a comment to clearify the return values of iscsi_target_do_login()

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Link: https://lore.kernel.org/r/20221115125638.102517-1-mlombard@redhat.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/iscsi/iscsi_target_nego.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c
index f2919319ad383..ff49c8f3fe241 100644
--- a/drivers/target/iscsi/iscsi_target_nego.c
+++ b/drivers/target/iscsi/iscsi_target_nego.c
@@ -1018,6 +1018,13 @@ static int iscsi_target_handle_csg_one(struct iscsit_conn *conn, struct iscsi_lo
 	return 0;
 }
 
+/*
+ * RETURN VALUE:
+ *
+ *  1 = Login successful
+ * -1 = Login failed
+ *  0 = More PDU exchanges required
+ */
 static int iscsi_target_do_login(struct iscsit_conn *conn, struct iscsi_login *login)
 {
 	int pdu_count = 0;
@@ -1363,12 +1370,13 @@ int iscsi_target_start_negotiation(
 		ret = -1;
 
 	if (ret < 0) {
-		cancel_delayed_work_sync(&conn->login_work);
 		iscsi_target_restore_sock_callbacks(conn);
 		iscsi_remove_failed_auth_entry(conn);
 	}
-	if (ret != 0)
+	if (ret != 0) {
+		cancel_delayed_work_sync(&conn->login_work);
 		iscsi_target_nego_release(conn);
+	}
 
 	return ret;
 }
-- 
GitLab


From a72629b5cdbc43e28a4a19b0fce8d17c582c4db4 Mon Sep 17 00:00:00 2001
From: Maurizio Lombardi <mlombard@redhat.com>
Date: Mon, 21 Nov 2022 10:27:03 +0100
Subject: [PATCH 2569/4122] scsi: target: core: Fix hard lockup when executing
 a compare-and-write command

While handling an I/O completion for the compare portion of a
COMPARE_AND_WRITE command, it may happen that the
compare_and_write_callback function submits new bio structs while still in
softirq context.

Low level drivers like md raid5 do not expect their make_request call to be
used in softirq context, they call into schedule() and create a deadlocked
system.

 __schedule at ffffffff873a0807
 schedule at ffffffff873a0cc5
 raid5_get_active_stripe at ffffffffc0875744 [raid456]
 raid5_make_request at ffffffffc0875a50 [raid456]
 md_handle_request at ffffffff8713b9f9
 md_make_request at ffffffff8713bacb
 generic_make_request at ffffffff86e6f14b
 submit_bio at ffffffff86e6f27c
 iblock_submit_bios at ffffffffc0b4e4dc [target_core_iblock]
 iblock_execute_rw at ffffffffc0b4f3ce [target_core_iblock]
 __target_execute_cmd at ffffffffc1090079 [target_core_mod]
 compare_and_write_callback at ffffffffc1093602 [target_core_mod]
 target_cmd_interrupted at ffffffffc108d1ec [target_core_mod]
 target_complete_cmd_with_sense at ffffffffc108d27c [target_core_mod]
 iblock_complete_cmd at ffffffffc0b4e23a [target_core_iblock]
 dm_io_dec_pending at ffffffffc00db29e [dm_mod]
 clone_endio at ffffffffc00dbf07 [dm_mod]
 raid5_align_endio at ffffffffc086d6c2 [raid456]
 blk_update_request at ffffffff86e6d950
 scsi_end_request at ffffffff87063d48
 scsi_io_completion at ffffffff87063ee8
 blk_complete_reqs at ffffffff86e77b05
 __softirqentry_text_start at ffffffff876000d7

This problem appears to be an issue between target_cmd_interrupted() and
compare_and_write_callback(). target_cmd_interrupted() calls the se_cmd's
transport_complete_callback function pointer if the se_cmd is being stopped
or aborted, and CMD_T_ABORTED was set on the se_cmd.

When calling compare_and_write_callback(), the success parameter was set to
false. target_cmd_interrupted() seems to expect this means the callback
will do cleanup that does not require a process context. But
compare_and_write_callback() ignores the parameter if there was I/O done
for the compare part of COMPARE_AND_WRITE.

Since there was data, the function continued on, passed the compare, and
issued a write while ignoring the value of the success parameter.  The
submit of a bio for the write portion of the COMPARE_AND_WRITE then causes
schedule to be unsafely called from the softirq context.

Fix the bug in compare_and_write_callback by jumping to the out label if
success == "false", after checking if we have been called by
transport_generic_request_failure(); The command is being aborted or
stopped so there is no need to submit the write bio for the write part of
the COMPARE_AND_WRITE command.

Signed-off-by: Maurizio Lombardi <mlombard@redhat.com>
Link: https://lore.kernel.org/r/20221121092703.316489-1-mlombard@redhat.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/target/target_core_sbc.c | 22 ++++++++++++++++------
 1 file changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c
index 1cd41e3834bb0..7536ca7976068 100644
--- a/drivers/target/target_core_sbc.c
+++ b/drivers/target/target_core_sbc.c
@@ -446,12 +446,22 @@ static sense_reason_t compare_and_write_callback(struct se_cmd *cmd, bool succes
 	sense_reason_t ret = TCM_NO_SENSE;
 	int i;
 
-	/*
-	 * Handle early failure in transport_generic_request_failure(),
-	 * which will not have taken ->caw_sem yet..
-	 */
-	if (!success && (!cmd->t_data_sg || !cmd->t_bidi_data_sg))
-		return TCM_NO_SENSE;
+	if (!success) {
+		/*
+		 * Handle early failure in transport_generic_request_failure(),
+		 * which will not have taken ->caw_sem yet..
+		 */
+		if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
+			return TCM_NO_SENSE;
+
+		/*
+		 * The command has been stopped or aborted so
+		 * we don't have to perform the write operation.
+		 */
+		WARN_ON(!(cmd->transport_state &
+			(CMD_T_ABORTED | CMD_T_STOP)));
+		goto out;
+	}
 	/*
 	 * Handle special case for zero-length COMPARE_AND_WRITE
 	 */
-- 
GitLab


From 0c26a2d7c98039e913e63f9250fde738a3f88a60 Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao@huawei.com>
Date: Tue, 22 Nov 2022 18:11:05 +0000
Subject: [PATCH 2570/4122] scsi: iscsi: Rename iscsi_set_param() to
 iscsi_if_set_param()

There are two iscsi_set_param() functions defined in libiscsi.c and
scsi_transport_iscsi.c respectively which is confusing.

Rename the one in scsi_transport_iscsi.c to iscsi_if_set_param().

Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
Link: https://lore.kernel.org/r/20221122181105.4123935-1-haowenchao@huawei.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index cd3db9684e52d..c3fe5ecfee59e 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -2988,7 +2988,7 @@ iscsi_if_destroy_conn(struct iscsi_transport *transport, struct iscsi_uevent *ev
 }
 
 static int
-iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
+iscsi_if_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
 {
 	char *data = (char*)ev + sizeof(*ev);
 	struct iscsi_cls_conn *conn;
@@ -3941,7 +3941,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 			err = -EINVAL;
 		break;
 	case ISCSI_UEVENT_SET_PARAM:
-		err = iscsi_set_param(transport, ev);
+		err = iscsi_if_set_param(transport, ev);
 		break;
 	case ISCSI_UEVENT_CREATE_CONN:
 	case ISCSI_UEVENT_DESTROY_CONN:
-- 
GitLab


From ec9780e48c77f469c339b53940ef0c5eacc8b9d2 Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao@huawei.com>
Date: Wed, 23 Nov 2022 20:21:36 +0800
Subject: [PATCH 2571/4122] scsi: core: Increase scsi_device's iodone_cnt in
 scsi_timeout()

If a SCSI command times out and is going to be aborted, we should increase
the iodone_cnt of the related scsi_device. Otherwise the iodone_cnt would
be smaller than iorequest_cnt.

Increasing iodone_cnt in scsi_timeout() would not cause a double accounting
issue. Brief analysis follows:

 - We add the iodone_cnt when BLK_EH_DONE is returned in
   scsi_timeout(). The related command's timeout event would not happen.

 - If the abort succeeds and the command is not retried, the command would
   be completed with scsi_finish_command() which would not increase
   iodone_cnt.

 - If the abort succeeds and the command is retried, it would be requeue. A
   scsi_dispatch_cmd() would be called and iorequest_cnt would be increased
   again.

 - If the abort fails, the error handler successfully recovers the device,
   and the command is not retried, the command would be completed with
   scsi_finish_command() which would not increase iodone_cnt.

 - If the abort fails, the error handler successfully recovers the device,
   and the command is retried, the iorequest_cnt would be increased again.

Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
Link: https://lore.kernel.org/r/20221123122137.150776-2-haowenchao@huawei.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_error.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index be2a70c5ac6dc..613d5aeb1e3ce 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -354,6 +354,7 @@ enum blk_eh_timer_return scsi_timeout(struct request *req)
 	 */
 	if (test_and_set_bit(SCMD_STATE_COMPLETE, &scmd->state))
 		return BLK_EH_DONE;
+	atomic_inc(&scmd->device->iodone_cnt);
 	if (scsi_abort_command(scmd) != SUCCESS) {
 		set_host_byte(scmd, DID_TIME_OUT);
 		scsi_eh_scmd_add(scmd);
-- 
GitLab


From cfee29ffb45b1c9798011b19d454637d1b0fe87d Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao@huawei.com>
Date: Wed, 23 Nov 2022 20:21:37 +0800
Subject: [PATCH 2572/4122] scsi: core: Do not increase scsi_device's
 iorequest_cnt if dispatch failed

If scsi_dispatch_cmd() failed, the SCSI command was not sent to the target.
scsi_queue_rq() would return BLK_STS_RESOURCE if scsi_dispatch_cmd()
failed, and the related request would be requeued. The timeout of this
request would not fire, so noone would increase iodone_cnt.

Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
Link: https://lore.kernel.org/r/20221123122137.150776-3-haowenchao@huawei.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_lib.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ec890865abae0..a29d87e574308 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1464,8 +1464,6 @@ static int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
 	struct Scsi_Host *host = cmd->device->host;
 	int rtn = 0;
 
-	atomic_inc(&cmd->device->iorequest_cnt);
-
 	/* check if the device is still usable */
 	if (unlikely(cmd->device->sdev_state == SDEV_DEL)) {
 		/* in SDEV_DEL we error all commands. DID_NO_CONNECT
@@ -1764,6 +1762,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
 		goto out_dec_host_busy;
 	}
 
+	atomic_inc(&cmd->device->iorequest_cnt);
 	return BLK_STS_OK;
 
 out_dec_host_busy:
-- 
GitLab


From 9c9ff300e0de07475796495d86f449340d454a0c Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Tue, 22 Nov 2022 01:57:51 +0000
Subject: [PATCH 2573/4122] scsi: hpsa: Fix possible memory leak in
 hpsa_init_one()

The hpda_alloc_ctlr_info() allocates h and its field reply_map. However, in
hpsa_init_one(), if alloc_percpu() failed, the hpsa_init_one() jumps to
clean1 directly, which frees h and leaks the h->reply_map.

Fix by calling hpda_free_ctlr_info() to release h->replay_map and h instead
free h directly.

Fixes: 8b834bff1b73 ("scsi: hpsa: fix selection of reply queue")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221122015751.87284-1-yuancan@huawei.com
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index f8e832b1bc46a..e5cbc97a5ea46 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -8925,7 +8925,7 @@ clean1:	/* wq/aer/h */
 		destroy_workqueue(h->monitor_ctlr_wq);
 		h->monitor_ctlr_wq = NULL;
 	}
-	kfree(h);
+	hpda_free_ctlr_info(h);
 	return rc;
 }
 
-- 
GitLab


From 8ac813f7e663bcf03e09291517359111d0cf9785 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:45 +0100
Subject: [PATCH 2574/4122] gpio: max732x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-max732x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-max732x.c b/drivers/gpio/gpio-max732x.c
index da69721170308..68e982cdee734 100644
--- a/drivers/gpio/gpio-max732x.c
+++ b/drivers/gpio/gpio-max732x.c
@@ -608,9 +608,9 @@ static struct max732x_platform_data *of_gpio_max732x(struct device *dev)
 	return pdata;
 }
 
-static int max732x_probe(struct i2c_client *client,
-				   const struct i2c_device_id *id)
+static int max732x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct max732x_platform_data *pdata;
 	struct device_node *node;
 	struct max732x_chip *chip;
@@ -707,7 +707,7 @@ static struct i2c_driver max732x_driver = {
 		.name		= "max732x",
 		.of_match_table	= of_match_ptr(max732x_of_table),
 	},
-	.probe		= max732x_probe,
+	.probe_new	= max732x_probe,
 	.id_table	= max732x_id,
 };
 
-- 
GitLab


From 1287341c1980e0cf9eb19bdd370405d755392f2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:46 +0100
Subject: [PATCH 2575/4122] gpio: pca953x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pca953x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-pca953x.c b/drivers/gpio/gpio-pca953x.c
index ebe1943b85dd9..342800527c146 100644
--- a/drivers/gpio/gpio-pca953x.c
+++ b/drivers/gpio/gpio-pca953x.c
@@ -1049,9 +1049,9 @@ out:
 	return ret;
 }
 
-static int pca953x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *i2c_id)
+static int pca953x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *i2c_id = i2c_client_get_device_id(client);
 	struct pca953x_platform_data *pdata;
 	struct pca953x_chip *chip;
 	int irq_base = 0;
@@ -1375,7 +1375,7 @@ static struct i2c_driver pca953x_driver = {
 		.of_match_table = pca953x_dt_ids,
 		.acpi_match_table = pca953x_acpi_ids,
 	},
-	.probe		= pca953x_probe,
+	.probe_new	= pca953x_probe,
 	.remove		= pca953x_remove,
 	.id_table	= pca953x_id,
 };
-- 
GitLab


From 7963ba02b2d1de681ba1ee33060db42eb4cf4c07 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:47 +0100
Subject: [PATCH 2576/4122] gpio: pcf857x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-pcf857x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpio-pcf857x.c b/drivers/gpio/gpio-pcf857x.c
index e98ea47d72379..cec2f2c78255f 100644
--- a/drivers/gpio/gpio-pcf857x.c
+++ b/drivers/gpio/gpio-pcf857x.c
@@ -247,9 +247,9 @@ static const struct irq_chip pcf857x_irq_chip = {
 
 /*-------------------------------------------------------------------------*/
 
-static int pcf857x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int pcf857x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct pcf857x_platform_data	*pdata = dev_get_platdata(&client->dev);
 	struct device_node		*np = client->dev.of_node;
 	struct pcf857x			*gpio;
@@ -422,7 +422,7 @@ static struct i2c_driver pcf857x_driver = {
 		.name	= "pcf857x",
 		.of_match_table = of_match_ptr(pcf857x_of_table),
 	},
-	.probe	= pcf857x_probe,
+	.probe_new = pcf857x_probe,
 	.remove	= pcf857x_remove,
 	.shutdown = pcf857x_shutdown,
 	.id_table = pcf857x_id,
-- 
GitLab


From 61119786de40f61b8843aa57217b678361763d67 Mon Sep 17 00:00:00 2001
From: XueBing Chen <chenxuebing@jari.cn>
Date: Fri, 17 Jun 2022 23:50:19 +0800
Subject: [PATCH 2577/4122] KVM: PPC: Use __func__ to get function's name

Prefer using '"%s...", __func__' to get current function's name in
output messages.

Signed-off-by: XueBing Chen <chenxuebing@jari.cn>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/13b2c857.beb.181725bad35.Coremail.chenxuebing@jari.cn
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index e9744b41a226c..351ff0f89b007 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1202,7 +1202,7 @@ static int resize_hpt_allocate(struct kvm_resize_hpt *resize)
 	if (rc < 0)
 		return rc;
 
-	resize_hpt_debug(resize, "resize_hpt_allocate(): HPT @ 0x%lx\n",
+	resize_hpt_debug(resize, "%s(): HPT @ 0x%lx\n", __func__,
 			 resize->hpt.virt);
 
 	return 0;
@@ -1443,7 +1443,7 @@ static void resize_hpt_prepare_work(struct work_struct *work)
 		 */
 		mutex_unlock(&kvm->arch.mmu_setup_lock);
 
-		resize_hpt_debug(resize, "resize_hpt_prepare_work(): order = %d\n",
+		resize_hpt_debug(resize, "%s(): order = %d\n", __func__,
 				 resize->order);
 
 		err = resize_hpt_allocate(resize);
@@ -1887,8 +1887,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
 			ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r,
 							 tmp);
 			if (ret != H_SUCCESS) {
-				pr_err("kvm_htab_write ret %ld i=%ld v=%lx "
-				       "r=%lx\n", ret, i, v, r);
+				pr_err("%s ret %ld i=%ld v=%lx r=%lx\n", __func__, ret, i, v, r);
 				goto out;
 			}
 			if (!mmu_ready && is_vrma_hpte(v)) {
-- 
GitLab


From 392a58f1eaab0c90b80d7ba4a03dbf6eaaeabe60 Mon Sep 17 00:00:00 2001
From: Zhang Jiaming <jiaming@nfschina.com>
Date: Thu, 23 Jun 2022 18:20:31 +0800
Subject: [PATCH 2578/4122] KVM: PPC: Book3S HV: XIVE: Fix spelling mistakes

Change 'subsquent' to 'subsequent'.
Change 'accross' to 'across'.

Signed-off-by: Zhang Jiaming <jiaming@nfschina.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220623102031.15359-1-jiaming@nfschina.com
---
 arch/powerpc/kvm/book3s_xive.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 4ca23644f7525..b4b680f2d8534 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -539,7 +539,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 	if (irq == XICS_IPI || irq == 0) {
 		/*
 		 * This barrier orders the setting of xc->cppr vs.
-		 * subsquent test of xc->mfrr done inside
+		 * subsequent test of xc->mfrr done inside
 		 * scan_interrupts and push_pending_to_hw
 		 */
 		smp_mb();
@@ -563,7 +563,7 @@ static int xive_vm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 	/*
 	 * This barrier orders both setting of in_eoi above vs,
 	 * subsequent test of guest_priority, and the setting
-	 * of xc->cppr vs. subsquent test of xc->mfrr done inside
+	 * of xc->cppr vs. subsequent test of xc->mfrr done inside
 	 * scan_interrupts and push_pending_to_hw
 	 */
 	smp_mb();
@@ -2392,7 +2392,7 @@ static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
 	/*
 	 * Now, we select a target if we have one. If we don't we
 	 * leave the interrupt untargetted. It means that an interrupt
-	 * can become "untargetted" accross migration if it was masked
+	 * can become "untargetted" across migration if it was masked
 	 * by set_xive() but there is little we can do about it.
 	 */
 
-- 
GitLab


From 6fa1efeaa6671fb7339a6c62ceeec19e8e787963 Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Sun, 3 Jul 2022 13:29:32 -0400
Subject: [PATCH 2579/4122] KVM: PPC: Book3s: Use arg->size directly in
 kvm_vm_ioctl_create_spapr_tce()

The size variable is just a copy of args->size, neither size nor args
are modifed, so just use args->size directly.

Signed-off-by: Deming Wang <wangdeming@inspur.com>
[mpe: Reword change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220703172932.11329-1-wangdeming@inspur.com
---
 arch/powerpc/kvm/book3s_64_vio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 40864373ef876..95e738ef9062b 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -294,14 +294,14 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	struct kvmppc_spapr_tce_table *stt = NULL;
 	struct kvmppc_spapr_tce_table *siter;
 	struct mm_struct *mm = kvm->mm;
-	unsigned long npages, size = args->size;
+	unsigned long npages;
 	int ret;
 
 	if (!args->size || args->page_shift < 12 || args->page_shift > 34 ||
 		(args->offset + args->size > (ULLONG_MAX >> args->page_shift)))
 		return -EINVAL;
 
-	npages = kvmppc_tce_pages(size);
+	npages = kvmppc_tce_pages(args->size);
 	ret = account_locked_vm(mm, kvmppc_stt_pages(npages), true);
 	if (ret)
 		return ret;
@@ -314,7 +314,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
 	stt->liobn = args->liobn;
 	stt->page_shift = args->page_shift;
 	stt->offset = args->offset;
-	stt->size = size;
+	stt->size = args->size;
 	stt->kvm = kvm;
 	mutex_init(&stt->alloc_lock);
 	INIT_LIST_HEAD_RCU(&stt->iommu_tables);
-- 
GitLab


From a96b20758b23be7e9f693218908228d6100c3c26 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 9 Jul 2022 17:56:43 +0200
Subject: [PATCH 2580/4122] KVM: PPC: Book3S HV: Use the bitmap API to allocate
 bitmaps

Use bitmap_zalloc()/bitmap_free() instead of hand-writing them.

It is less verbose and it improves the semantic.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/52e843a460bc374973149b8da0bd04f9761b80b7.1657382184.git.christophe.jaillet@wanadoo.fr
---
 arch/powerpc/kvm/book3s_hv_uvmem.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index e2f11f9c3f2aa..1d67baa5557a2 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -1190,8 +1190,7 @@ int kvmppc_uvmem_init(void)
 
 	pfn_first = res->start >> PAGE_SHIFT;
 	pfn_last = pfn_first + (resource_size(res) >> PAGE_SHIFT);
-	kvmppc_uvmem_bitmap = kcalloc(BITS_TO_LONGS(pfn_last - pfn_first),
-				      sizeof(unsigned long), GFP_KERNEL);
+	kvmppc_uvmem_bitmap = bitmap_zalloc(pfn_last - pfn_first, GFP_KERNEL);
 	if (!kvmppc_uvmem_bitmap) {
 		ret = -ENOMEM;
 		goto out_unmap;
@@ -1215,5 +1214,5 @@ void kvmppc_uvmem_free(void)
 	memunmap_pages(&kvmppc_uvmem_pgmap);
 	release_mem_region(kvmppc_uvmem_pgmap.range.start,
 			   range_len(&kvmppc_uvmem_pgmap.range));
-	kfree(kvmppc_uvmem_bitmap);
+	bitmap_free(kvmppc_uvmem_bitmap);
 }
-- 
GitLab


From 8daa9c1dc9b4a3422801017ca46d935073dc14c0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:23 +0100
Subject: [PATCH 2581/4122] macintosh/ams-i2c: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-290-uwe@kleine-koenig.org
---
 drivers/macintosh/ams/ams-i2c.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/macintosh/ams/ams-i2c.c b/drivers/macintosh/ams/ams-i2c.c
index 3ded340699fb5..a4a1035eb4123 100644
--- a/drivers/macintosh/ams/ams-i2c.c
+++ b/drivers/macintosh/ams/ams-i2c.c
@@ -56,8 +56,7 @@ enum ams_i2c_cmd {
 	AMS_CMD_START,
 };
 
-static int ams_i2c_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id);
+static int ams_i2c_probe(struct i2c_client *client);
 static void ams_i2c_remove(struct i2c_client *client);
 
 static const struct i2c_device_id ams_id[] = {
@@ -70,7 +69,7 @@ static struct i2c_driver ams_i2c_driver = {
 	.driver = {
 		.name   = "ams",
 	},
-	.probe          = ams_i2c_probe,
+	.probe_new      = ams_i2c_probe,
 	.remove         = ams_i2c_remove,
 	.id_table       = ams_id,
 };
@@ -155,8 +154,7 @@ static void ams_i2c_get_xyz(s8 *x, s8 *y, s8 *z)
 	*z = ams_i2c_read(AMS_DATAZ);
 }
 
-static int ams_i2c_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int ams_i2c_probe(struct i2c_client *client)
 {
 	int vmaj, vmin;
 	int result;
-- 
GitLab


From 0424113fed923a2fcb699b5f3aa335d16e092f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:24 +0100
Subject: [PATCH 2582/4122] macintosh/therm_adt746x: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-291-uwe@kleine-koenig.org
---
 drivers/macintosh/therm_adt746x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index b004ea2a11025..8f5db9093c9af 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -464,9 +464,9 @@ static void thermostat_remove_files(struct thermostat *th)
 
 }
 
-static int probe_thermostat(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int probe_thermostat(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct device_node *np = client->dev.of_node;
 	struct thermostat* th;
 	const __be32 *prop;
@@ -598,7 +598,7 @@ static struct i2c_driver thermostat_driver = {
 	.driver = {
 		.name	= "therm_adt746x",
 	},
-	.probe = probe_thermostat,
+	.probe_new = probe_thermostat,
 	.remove = remove_thermostat,
 	.id_table = therm_adt746x_id,
 };
-- 
GitLab


From dc9be0735c3e245fe60775307cf7842b1f9b45a2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:25 +0100
Subject: [PATCH 2583/4122] macintosh/therm_windtunnel: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-292-uwe@kleine-koenig.org
---
 drivers/macintosh/therm_windtunnel.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index b8228ca404544..22b15efcc0258 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -411,8 +411,9 @@ static const struct i2c_device_id therm_windtunnel_id[] = {
 MODULE_DEVICE_TABLE(i2c, therm_windtunnel_id);
 
 static int
-do_probe(struct i2c_client *cl, const struct i2c_device_id *id)
+do_probe(struct i2c_client *cl)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(cl);
 	struct i2c_adapter *adapter = cl->adapter;
 	int ret = 0;
 
@@ -441,7 +442,7 @@ static struct i2c_driver g4fan_driver = {
 	.driver = {
 		.name	= "therm_windtunnel",
 	},
-	.probe		= do_probe,
+	.probe_new	= do_probe,
 	.remove		= do_remove,
 	.id_table	= therm_windtunnel_id,
 };
-- 
GitLab


From 9d533bdf4a582f037327f1a38ed8cf689d67cab4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:26 +0100
Subject: [PATCH 2584/4122] macintosh/windfarm_ad7417_sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-293-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_ad7417_sensor.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_ad7417_sensor.c b/drivers/macintosh/windfarm_ad7417_sensor.c
index c5c54a4ce91f2..33b4723d235e4 100644
--- a/drivers/macintosh/windfarm_ad7417_sensor.c
+++ b/drivers/macintosh/windfarm_ad7417_sensor.c
@@ -229,8 +229,7 @@ static void wf_ad7417_init_chip(struct wf_ad7417_priv *pv)
 	pv->config = config;
 }
 
-static int wf_ad7417_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int wf_ad7417_probe(struct i2c_client *client)
 {
 	struct wf_ad7417_priv *pv;
 	const struct mpu_data *mpu;
@@ -321,7 +320,7 @@ static struct i2c_driver wf_ad7417_driver = {
 		.name	= "wf_ad7417",
 		.of_match_table = wf_ad7417_of_id,
 	},
-	.probe		= wf_ad7417_probe,
+	.probe_new	= wf_ad7417_probe,
 	.remove		= wf_ad7417_remove,
 	.id_table	= wf_ad7417_id,
 };
-- 
GitLab


From 472e4c61d2bb4977ade8e2491953954bf9723563 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:27 +0100
Subject: [PATCH 2585/4122] macintosh/windfarm_fcu_controls: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-294-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_fcu_controls.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_fcu_controls.c b/drivers/macintosh/windfarm_fcu_controls.c
index c5b1ca5bcd732..e027d889d7e80 100644
--- a/drivers/macintosh/windfarm_fcu_controls.c
+++ b/drivers/macintosh/windfarm_fcu_controls.c
@@ -514,8 +514,7 @@ static int wf_fcu_init_chip(struct wf_fcu_priv *pv)
 	return 0;
 }
 
-static int wf_fcu_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int wf_fcu_probe(struct i2c_client *client)
 {
 	struct wf_fcu_priv *pv;
 
@@ -590,7 +589,7 @@ static struct i2c_driver wf_fcu_driver = {
 		.name	= "wf_fcu",
 		.of_match_table = wf_fcu_of_id,
 	},
-	.probe		= wf_fcu_probe,
+	.probe_new	= wf_fcu_probe,
 	.remove		= wf_fcu_remove,
 	.id_table	= wf_fcu_id,
 };
-- 
GitLab


From 51a9e1755cdd8b127191030d15b74b97f7d3ce75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:28 +0100
Subject: [PATCH 2586/4122] macintosh/windfarm_lm75_sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-295-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_lm75_sensor.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index 204661c8e918f..24f0a444d3122 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -87,9 +87,9 @@ static const struct wf_sensor_ops wf_lm75_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int wf_lm75_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
-{	
+static int wf_lm75_probe(struct i2c_client *client)
+{
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct wf_lm75_sensor *lm;
 	int rc, ds1775;
 	const char *name, *loc;
@@ -177,7 +177,7 @@ static struct i2c_driver wf_lm75_driver = {
 		.name	= "wf_lm75",
 		.of_match_table = wf_lm75_of_id,
 	},
-	.probe		= wf_lm75_probe,
+	.probe_new	= wf_lm75_probe,
 	.remove		= wf_lm75_remove,
 	.id_table	= wf_lm75_id,
 };
-- 
GitLab


From 0e2211b3373ea718d2161bcc360cd4d9a3bcebc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:29 +0100
Subject: [PATCH 2587/4122] macintosh/windfarm_lm87_sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-296-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_lm87_sensor.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_lm87_sensor.c b/drivers/macintosh/windfarm_lm87_sensor.c
index 40d25463346ed..f37a32c2070ca 100644
--- a/drivers/macintosh/windfarm_lm87_sensor.c
+++ b/drivers/macintosh/windfarm_lm87_sensor.c
@@ -95,8 +95,7 @@ static const struct wf_sensor_ops wf_lm87_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int wf_lm87_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int wf_lm87_probe(struct i2c_client *client)
 {	
 	struct wf_lm87_sensor *lm;
 	const char *name = NULL, *loc;
@@ -173,7 +172,7 @@ static struct i2c_driver wf_lm87_driver = {
 		.name	= "wf_lm87",
 		.of_match_table = wf_lm87_of_id,
 	},
-	.probe		= wf_lm87_probe,
+	.probe_new	= wf_lm87_probe,
 	.remove		= wf_lm87_remove,
 	.id_table	= wf_lm87_id,
 };
-- 
GitLab


From 2d7a9d780444c8f31ee6af522a92a99492d9eeb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:30 +0100
Subject: [PATCH 2588/4122] macintosh/windfarm_max6690_sensor: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-297-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_max6690_sensor.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_max6690_sensor.c b/drivers/macintosh/windfarm_max6690_sensor.c
index c0d404ebc7922..6c5ab657b6b39 100644
--- a/drivers/macintosh/windfarm_max6690_sensor.c
+++ b/drivers/macintosh/windfarm_max6690_sensor.c
@@ -60,8 +60,7 @@ static const struct wf_sensor_ops wf_max6690_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int wf_max6690_probe(struct i2c_client *client,
-			    const struct i2c_device_id *id)
+static int wf_max6690_probe(struct i2c_client *client)
 {
 	const char *name, *loc;
 	struct wf_6690_sensor *max;
@@ -129,7 +128,7 @@ static struct i2c_driver wf_max6690_driver = {
 		.name		= "wf_max6690",
 		.of_match_table = wf_max6690_of_id,
 	},
-	.probe		= wf_max6690_probe,
+	.probe_new	= wf_max6690_probe,
 	.remove		= wf_max6690_remove,
 	.id_table	= wf_max6690_id,
 };
-- 
GitLab


From d05921a09a5a72805a1d669dce0fcbd66df86237 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:40:31 +0100
Subject: [PATCH 2589/4122] macintosh/windfarm_smu_sat: Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118224540.619276-298-uwe@kleine-koenig.org
---
 drivers/macintosh/windfarm_smu_sat.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
index be5d4593db93e..ebc4256a9e4a0 100644
--- a/drivers/macintosh/windfarm_smu_sat.c
+++ b/drivers/macintosh/windfarm_smu_sat.c
@@ -189,8 +189,7 @@ static const struct wf_sensor_ops wf_sat_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static int wf_sat_probe(struct i2c_client *client,
-			const struct i2c_device_id *id)
+static int wf_sat_probe(struct i2c_client *client)
 {
 	struct device_node *dev = client->dev.of_node;
 	struct wf_sat *sat;
@@ -349,7 +348,7 @@ static struct i2c_driver wf_sat_driver = {
 		.name		= "wf_smu_sat",
 		.of_match_table = wf_sat_of_id,
 	},
-	.probe		= wf_sat_probe,
+	.probe_new	= wf_sat_probe,
 	.remove		= wf_sat_remove,
 	.id_table	= wf_sat_id,
 };
-- 
GitLab


From e0acfdd13474815696595206e11169736b4bca9d Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Thu, 17 Mar 2022 10:32:39 +0800
Subject: [PATCH 2590/4122] macintosh/windfarm_pm81: Fix warning comparing
 pointer to 0

Avoid pointer type value compared with 0 to make code clear.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1647484359-12402-1-git-send-email-baihaowen@meizu.com
---
 drivers/macintosh/windfarm_pm81.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/windfarm_pm81.c b/drivers/macintosh/windfarm_pm81.c
index e0f4743f21cc6..257fb2c695c53 100644
--- a/drivers/macintosh/windfarm_pm81.c
+++ b/drivers/macintosh/windfarm_pm81.c
@@ -401,7 +401,7 @@ static void wf_smu_create_cpu_fans(void)
 
 	/* First, locate the PID params in SMU SBD */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL);
-	if (hdr == 0) {
+	if (!hdr) {
 		printk(KERN_WARNING "windfarm: CPU PID fan config not found "
 		       "max fan speed\n");
 		goto fail;
@@ -705,7 +705,7 @@ static int wf_init_pm(void)
 	const struct smu_sdbp_header *hdr;
 
 	hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL);
-	if (hdr != 0) {
+	if (hdr) {
 		struct smu_sdbp_sensortree *st =
 			(struct smu_sdbp_sensortree *)&hdr[1];
 		wf_smu_mach_model = st->model_id;
-- 
GitLab


From 2f59562c140d3119328f869126e8e593a99a392f Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Thu, 17 Mar 2022 10:35:54 +0800
Subject: [PATCH 2591/4122] macintosh/adb: Fix warning comparing pointer to 0

Avoid pointer type value compared with 0 to make code clear.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1647484554-13258-1-git-send-email-baihaowen@meizu.com
---
 drivers/macintosh/adb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index 1bbb9ca08d40f..23bd0c77ac1af 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -478,7 +478,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
 		if ((adb_handler[i].original_address == default_id) &&
 		    (!handler_id || (handler_id == adb_handler[i].handler_id) || 
 		    try_handler_change(i, handler_id))) {
-			if (adb_handler[i].handler != 0) {
+			if (adb_handler[i].handler) {
 				pr_err("Two handlers for ADB device %d\n",
 				       default_id);
 				continue;
@@ -673,7 +673,7 @@ static int adb_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 	state = kmalloc(sizeof(struct adbdev_state), GFP_KERNEL);
-	if (state == 0) {
+	if (!state) {
 		ret = -ENOMEM;
 		goto out;
 	}
-- 
GitLab


From 88316944c3b3aa3ce3249c51689ef1621049df9d Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Thu, 17 Mar 2022 10:42:33 +0800
Subject: [PATCH 2592/4122] macintosh/windfarm_pm91: Fix warning comparing
 pointer to 0

Avoid pointer type value compared with 0 to make code clear.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1647484953-15249-1-git-send-email-baihaowen@meizu.com
---
 drivers/macintosh/windfarm_pm91.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/windfarm_pm91.c b/drivers/macintosh/windfarm_pm91.c
index c8535855360d1..120a9cfba0c54 100644
--- a/drivers/macintosh/windfarm_pm91.c
+++ b/drivers/macintosh/windfarm_pm91.c
@@ -150,7 +150,7 @@ static void wf_smu_create_cpu_fans(void)
 
 	/* First, locate the PID params in SMU SBD */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL);
-	if (hdr == 0) {
+	if (!hdr) {
 		printk(KERN_WARNING "windfarm: CPU PID fan config not found "
 		       "max fan speed\n");
 		goto fail;
-- 
GitLab


From a823307bf0a3b79b27eea916bf6499ba4377cdf9 Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Thu, 17 Mar 2022 10:57:02 +0800
Subject: [PATCH 2593/4122] macintosh/windfarm_pm121: Fix warning comparing
 pointer to 0

Avoid pointer type value compared with 0 to make code clear.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1647485822-16717-1-git-send-email-baihaowen@meizu.com
---
 drivers/macintosh/windfarm_pm121.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/macintosh/windfarm_pm121.c b/drivers/macintosh/windfarm_pm121.c
index 36312f163aac8..82500417ebeec 100644
--- a/drivers/macintosh/windfarm_pm121.c
+++ b/drivers/macintosh/windfarm_pm121.c
@@ -651,7 +651,7 @@ static void pm121_create_cpu_fans(void)
 
 	/* First, locate the PID params in SMU SBD */
 	hdr = smu_get_sdb_partition(SMU_SDB_CPUPIDDATA_ID, NULL);
-	if (hdr == 0) {
+	if (!hdr) {
 		printk(KERN_WARNING "pm121: CPU PID fan config not found.\n");
 		goto fail;
 	}
@@ -970,7 +970,7 @@ static int pm121_init_pm(void)
 	const struct smu_sdbp_header *hdr;
 
 	hdr = smu_get_sdb_partition(SMU_SDB_SENSORTREE_ID, NULL);
-	if (hdr != 0) {
+	if (hdr) {
 		struct smu_sdbp_sensortree *st =
 			(struct smu_sdbp_sensortree *)&hdr[1];
 		pm121_mach_model = st->model_id;
-- 
GitLab


From fc21ed8f26d980428f9b4e08e0fb72c7f7ffc9b8 Mon Sep 17 00:00:00 2001
From: Haowen Bai <baihaowen@meizu.com>
Date: Thu, 17 Mar 2022 17:24:49 +0800
Subject: [PATCH 2594/4122] macintosh/macio-adb: Fix warning comparing pointer
 to 0

Avoid pointer type value compared with 0 to make code clear.

Signed-off-by: Haowen Bai <baihaowen@meizu.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1647509089-4280-1-git-send-email-baihaowen@meizu.com
---
 drivers/macintosh/macio-adb.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 9b63bd2551c63..3721402582b4f 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -100,7 +100,7 @@ int macio_init(void)
 	unsigned int irq;
 
 	adbs = of_find_compatible_node(NULL, "adb", "chrp,adb0");
-	if (adbs == 0)
+	if (!adbs)
 		return -ENXIO;
 
 	if (of_address_to_resource(adbs, 0, &r)) {
@@ -183,7 +183,7 @@ static int macio_send_request(struct adb_request *req, int sync)
 	req->reply_len = 0;
 
 	spin_lock_irqsave(&macio_lock, flags);
-	if (current_req != 0) {
+	if (current_req) {
 		last_req->next = req;
 		last_req = req;
 	} else {
@@ -213,7 +213,8 @@ static irqreturn_t macio_adb_interrupt(int irq, void *arg)
 	spin_lock(&macio_lock);
 	if (in_8(&adb->intr.r) & TAG) {
 		handled = 1;
-		if ((req = current_req) != 0) {
+		req = current_req;
+		if (req) {
 			/* put the current request in */
 			for (i = 0; i < req->nbytes; ++i)
 				out_8(&adb->data[i].r, req->data[i]);
-- 
GitLab


From 27f9690a81d7acf185b78be8d03d4b3a243116b1 Mon Sep 17 00:00:00 2001
From: Finn Thain <fthain@linux-m68k.org>
Date: Mon, 21 Mar 2022 20:28:00 +1100
Subject: [PATCH 2595/4122] macintosh/via-pmu: Avoid compiler warnings when
 CONFIG_PROC_FS is disabled

drivers/macintosh/via-pmu.c:897:12: warning: 'pmu_battery_proc_show' defined but not used [-Wunused-function]
 static int pmu_battery_proc_show(struct seq_file *m, void *v)
            ^~~~~~~~~~~~~~~~~~~~~
drivers/macintosh/via-pmu.c:871:12: warning: 'pmu_irqstats_proc_show' defined but not used [-Wunused-function]
 static int pmu_irqstats_proc_show(struct seq_file *m, void *v)
            ^~~~~~~~~~~~~~~~~~~~~~
drivers/macintosh/via-pmu.c:860:12: warning: 'pmu_info_proc_show' defined but not used [-Wunused-function]
 static int pmu_info_proc_show(struct seq_file *m, void *v)
            ^~~~~~~~~~~~~~~~~~

Add some #ifdefs to avoid unused code warnings when CONFIG_PROC_FS is
disabled.

Reported-by: Randy Dunlap <rdunlap@infradead.org>
Suggested-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Finn Thain <fthain@linux-m68k.org>
Tested-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/0c11c0770fc4ec7e80a4b2e0ffce1055b792cfdb.1647854880.git.fthain@linux-m68k.org
---
 drivers/macintosh/via-pmu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c
index 49657962d8920..e0cb8daf4f082 100644
--- a/drivers/macintosh/via-pmu.c
+++ b/drivers/macintosh/via-pmu.c
@@ -203,9 +203,11 @@ static int init_pmu(void);
 static void pmu_start(void);
 static irqreturn_t via_pmu_interrupt(int irq, void *arg);
 static irqreturn_t gpio1_interrupt(int irq, void *arg);
+#ifdef CONFIG_PROC_FS
 static int pmu_info_proc_show(struct seq_file *m, void *v);
 static int pmu_irqstats_proc_show(struct seq_file *m, void *v);
 static int pmu_battery_proc_show(struct seq_file *m, void *v);
+#endif
 static void pmu_pass_intr(unsigned char *data, int len);
 static const struct proc_ops pmu_options_proc_ops;
 
@@ -852,6 +854,7 @@ query_battery_state(void)
 			2, PMU_SMART_BATTERY_STATE, pmu_cur_battery+1);
 }
 
+#ifdef CONFIG_PROC_FS
 static int pmu_info_proc_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "PMU driver version     : %d\n", PMU_DRIVER_VERSION);
@@ -972,6 +975,7 @@ static const struct proc_ops pmu_options_proc_ops = {
 	.proc_release	= single_release,
 	.proc_write	= pmu_options_proc_write,
 };
+#endif
 
 #ifdef CONFIG_ADB
 /* Send an ADB command */
-- 
GitLab


From a0542d2c45a64162e63ad2d80684e57de0566271 Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Thu, 16 Jun 2022 19:04:24 +0200
Subject: [PATCH 2596/4122] macintosh/via-pmu-backlight: Use backlight helper

backlight_properties.fb_blank is deprecated. The states it represents
are handled by other properties; but instead of accessing those
properties directly, drivers should use the helpers provided by
backlight.h.

Instead of retrieving the backlight brightness in struct
backlight_properties manually, and then checking whether the backlight
should be on at all, use backlight_get_brightness() which does all
this and insulates this from future changes.

Signed-off-by: Stephen Kitt <steve@sk2.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220616170425.1346081-1-steve@sk2.org
---
 drivers/macintosh/via-pmu-backlight.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/macintosh/via-pmu-backlight.c b/drivers/macintosh/via-pmu-backlight.c
index 2194016122d2f..c2d87e7fa85be 100644
--- a/drivers/macintosh/via-pmu-backlight.c
+++ b/drivers/macintosh/via-pmu-backlight.c
@@ -71,12 +71,7 @@ static int pmu_backlight_get_level_brightness(int level)
 static int __pmu_backlight_update_status(struct backlight_device *bd)
 {
 	struct adb_request req;
-	int level = bd->props.brightness;
-
-
-	if (bd->props.power != FB_BLANK_UNBLANK ||
-	    bd->props.fb_blank != FB_BLANK_UNBLANK)
-		level = 0;
+	int level = backlight_get_brightness(bd);
 
 	if (level > 0) {
 		int pmulevel = pmu_backlight_get_level_brightness(level);
-- 
GitLab


From 2dfcace75e1e1dfbd89af63fce1bfe8aebe38427 Mon Sep 17 00:00:00 2001
From: Li zeming <zeming@nfschina.com>
Date: Thu, 7 Jul 2022 09:53:52 +0800
Subject: [PATCH 2597/4122] macintosh/ams/ams: Add header file macro definition

Add header file macro definition.

Signed-off-by: Li zeming <zeming@nfschina.com>
[mpe: Add endif comment]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220707015352.3391-1-zeming@nfschina.com
---
 drivers/macintosh/ams/ams.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/macintosh/ams/ams.h b/drivers/macintosh/ams/ams.h
index 935bdd9cd9a63..2c159c8844c19 100644
--- a/drivers/macintosh/ams/ams.h
+++ b/drivers/macintosh/ams/ams.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _AMS_H
+#define _AMS_H
+
 #include <linux/i2c.h>
 #include <linux/input.h>
 #include <linux/kthread.h>
@@ -69,3 +72,5 @@ extern int ams_i2c_init(struct device_node *np);
 
 extern int ams_input_init(void);
 extern void ams_input_exit(void);
+
+#endif /* _AMS_H */
-- 
GitLab


From e3e528d29d13c01289f382a0d3ddb5312ac3dae3 Mon Sep 17 00:00:00 2001
From: Li zeming <zeming@nfschina.com>
Date: Thu, 7 Jul 2022 09:59:49 +0800
Subject: [PATCH 2598/4122] macintosh/windfarm_pid: Add header file macro
 definition

I think the header file could avoid redefinition errors at compile time
by adding macro definitions.

Signed-off-by: Li zeming <zeming@nfschina.com>
[mpe: Add endif comment]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220707015949.3733-1-zeming@nfschina.com
---
 drivers/macintosh/windfarm_pid.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/macintosh/windfarm_pid.h b/drivers/macintosh/windfarm_pid.h
index 83f747dbeafcc..335613a200fb0 100644
--- a/drivers/macintosh/windfarm_pid.h
+++ b/drivers/macintosh/windfarm_pid.h
@@ -1,4 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _WINDFARM_PID_H
+#define _WINDFARM_PID_H
+
 /*
  * Windfarm PowerMac thermal control. Generic PID helpers
  *
@@ -82,3 +85,5 @@ struct wf_cpu_pid_state {
 extern void wf_cpu_pid_init(struct wf_cpu_pid_state *st,
 			    struct wf_cpu_pid_param *param);
 extern s32 wf_cpu_pid_run(struct wf_cpu_pid_state *st, s32 power, s32 temp);
+
+#endif /* _WINDFARM_PID_H */
-- 
GitLab


From 3aa16303dc98b7b8baa2adbc3210fd513ec0e810 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 16 Sep 2022 22:16:38 +0800
Subject: [PATCH 2599/4122] macintosh: Switch to use for_each_child_of_node()
 macro

Use for_each_child_of_node() macro instead of open coding it.
No functional change.

Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220916141638.685575-1-yangyingliang@huawei.com
---
 drivers/macintosh/windfarm_smu_controls.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/macintosh/windfarm_smu_controls.c b/drivers/macintosh/windfarm_smu_controls.c
index e9957ad49a2af..bdd92b27da2ae 100644
--- a/drivers/macintosh/windfarm_smu_controls.c
+++ b/drivers/macintosh/windfarm_smu_controls.c
@@ -266,12 +266,11 @@ static int __init smu_controls_init(void)
 		return -ENODEV;
 
 	/* Look for RPM fans */
-	for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;)
+	for_each_child_of_node(smu, fans)
 		if (of_node_name_eq(fans, "rpm-fans") ||
 		    of_device_is_compatible(fans, "smu-rpm-fans"))
 			break;
-	for (fan = NULL;
-	     fans && (fan = of_get_next_child(fans, fan)) != NULL;) {
+	for_each_child_of_node(fans, fan) {
 		struct smu_fan_control *fct;
 
 		fct = smu_fan_create(fan, 0);
@@ -286,11 +285,10 @@ static int __init smu_controls_init(void)
 
 
 	/* Look for PWM fans */
-	for (fans = NULL; (fans = of_get_next_child(smu, fans)) != NULL;)
+	for_each_child_of_node(smu, fans)
 		if (of_node_name_eq(fans, "pwm-fans"))
 			break;
-	for (fan = NULL;
-	     fans && (fan = of_get_next_child(fans, fan)) != NULL;) {
+	for_each_child_of_node(fans, fan) {
 		struct smu_fan_control *fct;
 
 		fct = smu_fan_create(fan, 1);
-- 
GitLab


From 5ca86eae55a2f006e6c1edd2029b2cacb6979515 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 4 Nov 2022 11:25:51 +0800
Subject: [PATCH 2600/4122] macintosh: fix possible memory leak in
 macio_add_one_device()

Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's
bus_id string array"), the name of device is allocated dynamically. It
needs to be freed when of_device_register() fails. Call put_device() to
give up the reference that's taken in device_initialize(), so that it
can be freed in kobject_cleanup() when the refcount hits 0.

macio device is freed in macio_release_dev(), so the kfree() can be
removed.

Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221104032551.1075335-1-yangyingliang@huawei.com
---
 drivers/macintosh/macio_asic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index 1ec1e5984563f..3bc1f374e6577 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -424,7 +424,7 @@ static struct macio_dev * macio_add_one_device(struct macio_chip *chip,
 	if (of_device_register(&dev->ofdev) != 0) {
 		printk(KERN_DEBUG"macio: device registration error for %s!\n",
 		       dev_name(&dev->ofdev.dev));
-		kfree(dev);
+		put_device(&dev->ofdev.dev);
 		return NULL;
 	}
 
-- 
GitLab


From dbaa3105736d4d73063ea0a3b01cd7fafce924e6 Mon Sep 17 00:00:00 2001
From: Xie Shaowen <studentxswpy@163.com>
Date: Tue, 2 Aug 2022 15:41:48 +0800
Subject: [PATCH 2601/4122] macintosh/macio-adb: check the return value of
 ioremap()

The function ioremap() in macio_init() can fail, so its return value
should be checked.

Fixes: 36874579dbf4c ("[PATCH] powerpc: macio-adb build fix")
Reported-by: Hacash Robot <hacashRobot@santino.com>
Signed-off-by: Xie Shaowen <studentxswpy@163.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220802074148.3213659-1-studentxswpy@163.com
---
 drivers/macintosh/macio-adb.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/macintosh/macio-adb.c b/drivers/macintosh/macio-adb.c
index 3721402582b4f..55a9f8c3a150e 100644
--- a/drivers/macintosh/macio-adb.c
+++ b/drivers/macintosh/macio-adb.c
@@ -108,6 +108,10 @@ int macio_init(void)
 		return -ENXIO;
 	}
 	adb = ioremap(r.start, sizeof(struct adb_regs));
+	if (!adb) {
+		of_node_put(adbs);
+		return -ENOMEM;
+	}
 
 	out_8(&adb->ctrl.r, 0);
 	out_8(&adb->intr.r, 0);
-- 
GitLab


From 5836947613ef33d311b4eff6a32d019580a214f5 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 29 Jan 2022 08:16:04 +0100
Subject: [PATCH 2602/4122] powerpc/52xx: Fix a resource leak in an error
 handling path

The error handling path of mpc52xx_lpbfifo_probe() has a request_irq()
that is not balanced by a corresponding free_irq().

Add the missing call, as already done in the remove function.

Fixes: 3c9059d79f5e ("powerpc/5200: add LocalPlus bus FIFO device driver")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/dec1496d46ccd5311d0f6e9f9ca4238be11bf6a6.1643440531.git.christophe.jaillet@wanadoo.fr
---
 arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 48038aaedbd36..2875c206ac0f8 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -531,6 +531,7 @@ static int mpc52xx_lpbfifo_probe(struct platform_device *op)
  err_bcom_rx_irq:
 	bcom_gen_bd_rx_release(lpbfifo.bcom_rx_task);
  err_bcom_rx:
+	free_irq(lpbfifo.irq, &lpbfifo);
  err_irq:
 	iounmap(lpbfifo.regs);
 	lpbfifo.regs = NULL;
-- 
GitLab


From e75d07bd8303588c33e6f1f180a9081fb58c872e Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 9 Mar 2022 10:29:50 +0100
Subject: [PATCH 2603/4122] powerpc: Remove find_current_mm_pte()

Last usage of find_current_mm_pte() was removed by
commit 15759cb054ef ("powerpc/perf/callchain: Use
__get_user_pages_fast in read_user_stack_slow")

Remove it.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/ec79f462a3bfa8365b7df505e574d5d85246bc68.1646818177.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/pte-walk.h | 25 -------------------------
 arch/powerpc/mm/book3s64/pgtable.c  |  4 ++--
 2 files changed, 2 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/include/asm/pte-walk.h b/arch/powerpc/include/asm/pte-walk.h
index 714a35f0d425b..73c22c579a791 100644
--- a/arch/powerpc/include/asm/pte-walk.h
+++ b/arch/powerpc/include/asm/pte-walk.h
@@ -60,29 +60,4 @@ static inline phys_addr_t ppc_find_vmap_phys(unsigned long addr)
 	return pa;
 }
 
-/*
- * This is what we should always use. Any other lockless page table lookup needs
- * careful audit against THP split.
- */
-static inline pte_t *find_current_mm_pte(pgd_t *pgdir, unsigned long ea,
-					 bool *is_thp, unsigned *hshift)
-{
-	pte_t *pte;
-
-	VM_WARN(!arch_irqs_disabled(), "%s called with irq enabled\n", __func__);
-	VM_WARN(pgdir != current->mm->pgd,
-		"%s lock less page table lookup called on wrong mm\n", __func__);
-	pte = __find_linux_pte(pgdir, ea, is_thp, hshift);
-
-#if defined(CONFIG_DEBUG_VM) &&						\
-	!(defined(CONFIG_HUGETLB_PAGE) || defined(CONFIG_TRANSPARENT_HUGEPAGE))
-	/*
-	 * We should not find huge page if these configs are not enabled.
-	 */
-	if (hshift)
-		WARN_ON(*hshift);
-#endif
-	return pte;
-}
-
 #endif /* _ASM_POWERPC_PTE_WALK_H */
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index f6151a5892982..85c84e89e3eaf 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -100,14 +100,14 @@ static void do_serialize(void *arg)
 }
 
 /*
- * Serialize against find_current_mm_pte which does lock-less
+ * Serialize against __find_linux_pte() which does lock-less
  * lookup in page tables with local interrupts disabled. For huge pages
  * it casts pmd_t to pte_t. Since format of pte_t is different from
  * pmd_t we want to prevent transit from pmd pointing to page table
  * to pmd pointing to huge page (and back) while interrupts are disabled.
  * We clear pmd to possibly replace it with page table pointer in
  * different code paths. So make sure we wait for the parallel
- * find_current_mm_pte to finish.
+ * __find_linux_pte() to finish.
  */
 void serialize_against_pte_lookup(struct mm_struct *mm)
 {
-- 
GitLab


From 4562bffb83b88e61ea9c9912e50efbd5a941f0b3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 23 Nov 2022 22:19:18 -0800
Subject: [PATCH 2604/4122] powerpc/mpc52xx_lpbfifo: fix all kernel-doc
 warnings

Fix multiple kernel-doc warnings in mpc52xx_lpbfifo.c:

arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c:377: warning: expecting prototype for mpc52xx_lpbfifo_bcom_poll(). Prototype was for mpc52xx_lpbfifo_poll() instead

mpc52xx_lpbfifo.c:221: warning: No description found for return value of 'mpc52xx_lpbfifo_irq'
mpc52xx_lpbfifo.c:327: warning: No description found for return value of 'mpc52xx_lpbfifo_bcom_irq'
mpc52xx_lpbfifo.c:398: warning: No description found for return value of 'mpc52xx_lpbfifo_submit'

mpc52xx_lpbfifo.c:64: warning: Function parameter or member 'req' not described in 'mpc52xx_lpbfifo_kick'
mpc52xx_lpbfifo.c:220: warning: contents before sections
mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_irq'
mpc52xx_lpbfifo.c:223: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_irq'
mpc52xx_lpbfifo.c:328: warning: contents before sections
mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'irq' not described in 'mpc52xx_lpbfifo_bcom_irq'
mpc52xx_lpbfifo.c:331: warning: Function parameter or member 'dev_id' not described in 'mpc52xx_lpbfifo_bcom_irq'

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221124061918.1967-1-rdunlap@infradead.org
---
 arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
index 2875c206ac0f8..6d1dd6e874786 100644
--- a/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
+++ b/arch/powerpc/platforms/52xx/mpc52xx_lpbfifo.c
@@ -59,6 +59,8 @@ static struct mpc52xx_lpbfifo lpbfifo;
 
 /**
  * mpc52xx_lpbfifo_kick - Trigger the next block of data to be transferred
+ *
+ * @req: Pointer to request structure
  */
 static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
 {
@@ -178,6 +180,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
 
 /**
  * mpc52xx_lpbfifo_irq - IRQ handler for LPB FIFO
+ * @irq: IRQ number to be handled
+ * @dev_id: device ID cookie
  *
  * On transmit, the dma completion irq triggers before the fifo completion
  * triggers.  Handle the dma completion here instead of the LPB FIFO Bestcomm
@@ -216,6 +220,8 @@ static void mpc52xx_lpbfifo_kick(struct mpc52xx_lpbfifo_request *req)
  * or nested spinlock condition.  The out path is non-trivial, so
  * extra fiddling is done to make sure all paths lead to the same
  * outbound code.
+ *
+ * Return: irqreturn code (%IRQ_HANDLED)
  */
 static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
 {
@@ -320,8 +326,12 @@ static irqreturn_t mpc52xx_lpbfifo_irq(int irq, void *dev_id)
 
 /**
  * mpc52xx_lpbfifo_bcom_irq - IRQ handler for LPB FIFO Bestcomm task
+ * @irq: IRQ number to be handled
+ * @dev_id: device ID cookie
  *
  * Only used when receiving data.
+ *
+ * Return: irqreturn code (%IRQ_HANDLED)
  */
 static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
 {
@@ -372,7 +382,7 @@ static irqreturn_t mpc52xx_lpbfifo_bcom_irq(int irq, void *dev_id)
 }
 
 /**
- * mpc52xx_lpbfifo_bcom_poll - Poll for DMA completion
+ * mpc52xx_lpbfifo_poll - Poll for DMA completion
  */
 void mpc52xx_lpbfifo_poll(void)
 {
@@ -393,6 +403,8 @@ EXPORT_SYMBOL(mpc52xx_lpbfifo_poll);
 /**
  * mpc52xx_lpbfifo_submit - Submit an LPB FIFO transfer request.
  * @req: Pointer to request structure
+ *
+ * Return: %0 on success, -errno code on error
  */
 int mpc52xx_lpbfifo_submit(struct mpc52xx_lpbfifo_request *req)
 {
-- 
GitLab


From 932c6dea4f32f7d71488137c475b60a77e56bb2a Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Wed, 13 Apr 2022 06:55:07 -0400
Subject: [PATCH 2605/4122] powerpc/xive: remove unused parameter

The parameter xc to xive_cleanup_single_escalation() is unused, so we
can remove it.

Signed-off-by: Deming Wang <wangdeming@inspur.com>
[mpe: Reword change log, unwrap lines < 90 columns]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220413105507.1729-1-wangdeming@inspur.com
---
 arch/powerpc/kvm/book3s_xive.c        | 6 ++----
 arch/powerpc/kvm/book3s_xive.h        | 3 +--
 arch/powerpc/kvm/book3s_xive_native.c | 3 +--
 3 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 4ca23644f7525..d64b2dcc0e7f9 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1785,8 +1785,7 @@ void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
  * stale_p (because it has no easy way to address it).  Hence we have
  * to adjust stale_p before shutting down the interrupt.
  */
-void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
-				    struct kvmppc_xive_vcpu *xc, int irq)
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
 	struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
@@ -1827,8 +1826,7 @@ void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
 	for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
 		if (xc->esc_virq[i]) {
 			if (kvmppc_xive_has_single_escalation(xc->xive))
-				xive_cleanup_single_escalation(vcpu, xc,
-							xc->esc_virq[i]);
+				xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
 			irq_dispose_mapping(xc->esc_virq[i]);
 			kfree(xc->esc_virq_names[i]);
diff --git a/arch/powerpc/kvm/book3s_xive.h b/arch/powerpc/kvm/book3s_xive.h
index 1e48f72e8aa5d..62bf39f537839 100644
--- a/arch/powerpc/kvm/book3s_xive.h
+++ b/arch/powerpc/kvm/book3s_xive.h
@@ -299,8 +299,7 @@ int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio);
 int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
 				  bool single_escalation);
 struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
-void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
-				    struct kvmppc_xive_vcpu *xc, int irq);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu, int irq);
 int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp);
 int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr);
 bool kvmppc_xive_check_save_restore(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c
index 5271c33fe79e4..4f566bea5e10f 100644
--- a/arch/powerpc/kvm/book3s_xive_native.c
+++ b/arch/powerpc/kvm/book3s_xive_native.c
@@ -93,8 +93,7 @@ void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
 		/* Free the escalation irq */
 		if (xc->esc_virq[i]) {
 			if (kvmppc_xive_has_single_escalation(xc->xive))
-				xive_cleanup_single_escalation(vcpu, xc,
-							xc->esc_virq[i]);
+				xive_cleanup_single_escalation(vcpu, xc->esc_virq[i]);
 			free_irq(xc->esc_virq[i], vcpu);
 			irq_dispose_mapping(xc->esc_virq[i]);
 			kfree(xc->esc_virq_names[i]);
-- 
GitLab


From 37195edebf479b94f1e20c2a83a29e4beebe7ff5 Mon Sep 17 00:00:00 2001
From: Julia Lawall <Julia.Lawall@inria.fr>
Date: Sat, 21 May 2022 13:11:32 +0200
Subject: [PATCH 2606/4122] cxl: fix typo in comment

Spelling mistake (triple letters) in comment.
Detected with the help of Coccinelle.

Signed-off-by: Julia Lawall <Julia.Lawall@inria.fr>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220521111145.81697-82-Julia.Lawall@inria.fr
---
 include/misc/cxl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/misc/cxl.h b/include/misc/cxl.h
index 0410412de16b0..d8044299d654d 100644
--- a/include/misc/cxl.h
+++ b/include/misc/cxl.h
@@ -30,7 +30,7 @@ unsigned int cxl_pci_to_cfg_record(struct pci_dev *dev);
 /*
  * Context lifetime overview:
  *
- * An AFU context may be inited and then started and stoppped multiple times
+ * An AFU context may be inited and then started and stopped multiple times
  * before it's released. ie.
  *    - cxl_dev_context_init()
  *      - cxl_start_context()
-- 
GitLab


From 1d09697ff22908ae487fc8c4fbde1811732be523 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Sun, 5 Jun 2022 10:00:38 +0400
Subject: [PATCH 2607/4122] cxl: Fix refcount leak in cxl_calc_capp_routing

of_get_next_parent() returns a node pointer with refcount incremented,
we should use of_node_put() on it when not need anymore.
This function only calls of_node_put() in normal path,
missing it in the error path.
Add missing of_node_put() to avoid refcount leak.

Fixes: f24be42aab37 ("cxl: Add psl9 specific code")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220605060038.62217-1-linmq006@gmail.com
---
 drivers/misc/cxl/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 3de0aea62ade4..62385a529d869 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -387,6 +387,7 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
 	rc = get_phb_index(np, phb_index);
 	if (rc) {
 		pr_err("cxl: invalid phb index\n");
+		of_node_put(np);
 		return rc;
 	}
 
-- 
GitLab


From f949ccee1dde970bc77dc871b4f0b5e651577344 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:54:39 +0800
Subject: [PATCH 2608/4122] cxl: fix possible null-ptr-deref in
 cxl_guest_init_afu|adapter()

If device_register() fails in cxl_register_afu|adapter(), the device
is not added, device_unregister() can not be called in the error path,
otherwise it will cause a null-ptr-deref because of removing not added
device.

As comment of device_register() says, it should use put_device() to give
up the reference in the error path. So split device_unregister() into
device_del() and put_device(), then goes to put dev when register fails.

Fixes: 14baf4d9c739 ("cxl: Add guest-specific code")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221111145440.2426970-1-yangyingliang@huawei.com
---
 drivers/misc/cxl/guest.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/misc/cxl/guest.c b/drivers/misc/cxl/guest.c
index 375f692ae9d68..fb95a2d5cef48 100644
--- a/drivers/misc/cxl/guest.c
+++ b/drivers/misc/cxl/guest.c
@@ -965,10 +965,10 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 	 * if it returns an error!
 	 */
 	if ((rc = cxl_register_afu(afu)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_afu_add(afu)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/*
 	 * pHyp doesn't expose the programming models supported by the
@@ -984,7 +984,7 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 		afu->modes_supported = CXL_MODE_DIRECTED;
 
 	if ((rc = cxl_afu_select_best_mode(afu)))
-		goto err_put2;
+		goto err_remove_sysfs;
 
 	adapter->afu[afu->slice] = afu;
 
@@ -1004,10 +1004,12 @@ int cxl_guest_init_afu(struct cxl *adapter, int slice, struct device_node *afu_n
 
 	return 0;
 
-err_put2:
+err_remove_sysfs:
 	cxl_sysfs_afu_remove(afu);
-err_put1:
-	device_unregister(&afu->dev);
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
+	put_device(&afu->dev);
 	free = false;
 	guest_release_serr_irq(afu);
 err2:
@@ -1141,18 +1143,20 @@ struct cxl *cxl_guest_init_adapter(struct device_node *np, struct platform_devic
 	 * even if it returns an error!
 	 */
 	if ((rc = cxl_register_adapter(adapter)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_adapter_add(adapter)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/* release the context lock as the adapter is configured */
 	cxl_adapter_context_unlock(adapter);
 
 	return adapter;
 
-err_put1:
-	device_unregister(&adapter->dev);
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
+	put_device(&adapter->dev);
 	free = false;
 	cxl_guest_remove_chardev(adapter);
 err1:
-- 
GitLab


From 8bf03f557d6c6e108cf47bea32f4a68e276e1157 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:54:40 +0800
Subject: [PATCH 2609/4122] cxl: fix possible null-ptr-deref in
 cxl_pci_init_afu|adapter()

If device_register() fails in cxl_pci_afu|adapter(), the device
is not added, device_unregister() can not be called in the error
path, otherwise it will cause a null-ptr-deref because of removing
not added device.

As comment of device_register() says, it should use put_device() to give
up the reference in the error path. So split device_unregister() into
device_del() and put_device(), then goes to put dev when register fails.

Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for userspace access")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221111145440.2426970-2-yangyingliang@huawei.com
---
 drivers/misc/cxl/pci.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 62385a529d869..0ff944860dda9 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -1165,10 +1165,10 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 	 * if it returns an error!
 	 */
 	if ((rc = cxl_register_afu(afu)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_afu_add(afu)))
-		goto err_put1;
+		goto err_del_dev;
 
 	adapter->afu[afu->slice] = afu;
 
@@ -1177,10 +1177,12 @@ static int pci_init_afu(struct cxl *adapter, int slice, struct pci_dev *dev)
 
 	return 0;
 
-err_put1:
+err_del_dev:
+	device_del(&afu->dev);
+err_put_dev:
 	pci_deconfigure_afu(afu);
 	cxl_debugfs_afu_remove(afu);
-	device_unregister(&afu->dev);
+	put_device(&afu->dev);
 	return rc;
 
 err_free_native:
@@ -1668,23 +1670,25 @@ static struct cxl *cxl_pci_init_adapter(struct pci_dev *dev)
 	 * even if it returns an error!
 	 */
 	if ((rc = cxl_register_adapter(adapter)))
-		goto err_put1;
+		goto err_put_dev;
 
 	if ((rc = cxl_sysfs_adapter_add(adapter)))
-		goto err_put1;
+		goto err_del_dev;
 
 	/* Release the context lock as adapter is configured */
 	cxl_adapter_context_unlock(adapter);
 
 	return adapter;
 
-err_put1:
+err_del_dev:
+	device_del(&adapter->dev);
+err_put_dev:
 	/* This should mirror cxl_remove_adapter, except without the
 	 * sysfs parts
 	 */
 	cxl_debugfs_adapter_remove(adapter);
 	cxl_deconfigure_adapter(adapter);
-	device_unregister(&adapter->dev);
+	put_device(&adapter->dev);
 	return ERR_PTR(rc);
 
 err_release:
-- 
GitLab


From 295faa17722a11cac8dbf51e4c9f9405a5e07ef1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 22:59:29 +0800
Subject: [PATCH 2610/4122] ocxl: fix possible name leak in
 ocxl_file_register_afu()

If device_register() returns error in ocxl_file_register_afu(),
the name allocated by dev_set_name() need be freed. As comment
of device_register() says, it should use put_device() to give
up the reference in the error path. So fix this by calling
put_device(), then the name can be freed in kobject_cleanup(),
and info is freed in info_release().

Fixes: 75ca758adbaf ("ocxl: Create a clear delineation between ocxl backend & frontend")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221111145929.2429271-1-yangyingliang@huawei.com
---
 drivers/misc/ocxl/file.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index d46dba2df5a10..452d5777a0e4c 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -541,8 +541,11 @@ int ocxl_file_register_afu(struct ocxl_afu *afu)
 		goto err_put;
 
 	rc = device_register(&info->dev);
-	if (rc)
-		goto err_put;
+	if (rc) {
+		free_minor(info);
+		put_device(&info->dev);
+		return rc;
+	}
 
 	rc = ocxl_sysfs_register_afu(info);
 	if (rc)
-- 
GitLab


From 5f58cad1e4c65bebee34292696c6d2105eeb2027 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 21 Nov 2022 23:43:39 +0800
Subject: [PATCH 2611/4122] ocxl: fix pci device refcount leak when calling
 get_function_0()

get_function_0() calls pci_get_domain_bus_and_slot(), as comment
says, it returns a pci device with refcount increment, so after
using it, pci_dev_put() needs be called.

Get the device reference when get_function_0() is not called, so
pci_dev_put() can be called in the error path and callers
unconditionally. And add comment above get_dvsec_vendor0() to tell
callers to call pci_dev_put().

Fixes: 87db7579ebd5 ("ocxl: control via sysfs whether the FPGA is reloaded on a link reset")
Suggested-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Acked-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221121154339.4088935-1-yangyingliang@huawei.com
---
 drivers/misc/ocxl/config.c | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index e401a51596b9c..92ab49705f645 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -193,6 +193,18 @@ static int read_dvsec_vendor(struct pci_dev *dev)
 	return 0;
 }
 
+/**
+ * get_dvsec_vendor0() - Find a related PCI device (function 0)
+ * @dev: PCI device to match
+ * @dev0: The PCI device (function 0) found
+ * @out_pos: The position of PCI device (function 0)
+ *
+ * Returns 0 on success, negative on failure.
+ *
+ * NOTE: If it's successful, the reference of dev0 is increased,
+ * so after using it, the callers must call pci_dev_put() to give
+ * up the reference.
+ */
 static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
 			     int *out_pos)
 {
@@ -202,10 +214,14 @@ static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
 		dev = get_function_0(dev);
 		if (!dev)
 			return -1;
+	} else {
+		dev = pci_dev_get(dev);
 	}
 	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
-	if (!pos)
+	if (!pos) {
+		pci_dev_put(dev);
 		return -1;
+	}
 	*dev0 = dev;
 	*out_pos = pos;
 	return 0;
@@ -222,6 +238,7 @@ int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
 
 	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
 			      &reset_reload);
+	pci_dev_put(dev0);
 	*val = !!(reset_reload & BIT(0));
 	return 0;
 }
@@ -243,6 +260,7 @@ int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
 		reset_reload &= ~BIT(0);
 	pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
 			       reset_reload);
+	pci_dev_put(dev0);
 	return 0;
 }
 
-- 
GitLab


From 14b5d59a261b1947db287b3b52f4bb1dc496dede Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Fri, 1 Jul 2022 05:45:53 -0400
Subject: [PATCH 2612/4122] powerpc/pseries: Fix formatting to make code look
 more beautiful

Operators should be separated by spaces in tce_buildmulti_pSeriesLP()

Signed-off-by: Deming Wang <wangdeming@inspur.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220701094553.1722-1-wangdeming@inspur.com
---
 arch/powerpc/platforms/pseries/iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 561adac690229..c74b71d4733d4 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -248,7 +248,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		 * Set up the page with TCE data, looping through and setting
 		 * the values.
 		 */
-		limit = min_t(long, npages, 4096/TCE_ENTRY_SIZE);
+		limit = min_t(long, npages, 4096 / TCE_ENTRY_SIZE);
 
 		for (l = 0; l < limit; l++) {
 			tcep[l] = cpu_to_be64(proto_tce | rpn << tceshift);
-- 
GitLab


From 7af82ff90a2b0690c2c45818fcce4c4ac3b187f3 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 9 Aug 2022 16:24:25 +0530
Subject: [PATCH 2613/4122] powerpc/ftrace: Ignore weak functions

Extend commit b39181f7c6907d ("ftrace: Add FTRACE_MCOUNT_MAX_OFFSET to
avoid adding weak function") to ppc32 and ppc64 -mprofile-kernel by
defining FTRACE_MCOUNT_MAX_OFFSET.

For ppc64 -mprofile-kernel ABI, we can have two instructions at function
entry for TOC setup followed by 'mflr r0' and 'bl _mcount'. So, the
mcount location is at most the 4th instruction in a function. For ppc32,
mcount location is always the 3rd instruction in a function, preceded by
'mflr r0' and 'stw r0,4(r1)'.

With this patch, and with ppc64le_guest_defconfig and some ftrace/bpf
config items enabled:
  # grep __ftrace_invalid_address available_filter_functions | wc -l
  79

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220809105425.424045-1-naveen.n.rao@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/ftrace.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index 3cee7115441b4..ade406dc6504e 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -10,6 +10,13 @@
 
 #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
+/* Ignore unused weak functions which will have larger offsets */
+#ifdef CONFIG_MPROFILE_KERNEL
+#define FTRACE_MCOUNT_MAX_OFFSET	12
+#elif defined(CONFIG_PPC32)
+#define FTRACE_MCOUNT_MAX_OFFSET	8
+#endif
+
 #ifndef __ASSEMBLY__
 extern void _mcount(void);
 
-- 
GitLab


From addebe1cfa71eb29caa9d5c6bc719171e4e76414 Mon Sep 17 00:00:00 2001
From: Nicholas Miehlbradt <nicholas@linux.ibm.com>
Date: Wed, 10 Aug 2022 04:03:21 +0000
Subject: [PATCH 2614/4122] docs: powerpc: add POWER9 and POWER10 to CPU
 families

Add POWER9 and POWER10 to CPU families and list Radix MMU.

Signed-off-by: Nicholas Miehlbradt <nicholas@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220810040321.375396-1-nicholas@linux.ibm.com
---
 Documentation/powerpc/cpu_families.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst
index 9b84e045e7134..eb7e60649b433 100644
--- a/Documentation/powerpc/cpu_families.rst
+++ b/Documentation/powerpc/cpu_families.rst
@@ -10,6 +10,7 @@ Book3S (aka sPAPR)
 ------------------
 
 - Hash MMU (except 603 and e300)
+- Radix MMU (POWER9 and later)
 - Software loaded TLB (603 and e300)
 - Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600)
 - Mix of 32 & 64 bit::
@@ -100,6 +101,18 @@ Book3S (aka sPAPR)
           v
    +--------------+
    |    POWER8    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |    POWER9    |
+   +--------------+
+          |
+          |
+          v
+   +--------------+
+   |   POWER10    |
    +--------------+
 
 
-- 
GitLab


From ff8fae94e26f5cd2779ceda0ee6d714a10501abd Mon Sep 17 00:00:00 2001
From: Shaomin Deng <dengshaomin@cdjrlc.com>
Date: Sun, 4 Sep 2022 11:51:02 -0400
Subject: [PATCH 2615/4122] drivers/ps3: Fix double word in comments

Drop the repeated word "when" in comments.

Signed-off-by: Shaomin Deng <dengshaomin@cdjrlc.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220904155102.26957-1-dengshaomin@cdjrlc.com
---
 drivers/ps3/ps3-lpm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c
index 65512b6cc6fdc..200ad8751860a 100644
--- a/drivers/ps3/ps3-lpm.c
+++ b/drivers/ps3/ps3-lpm.c
@@ -1066,7 +1066,7 @@ EXPORT_SYMBOL_GPL(ps3_disable_pm_interrupts);
  *  instance, specified by one of enum ps3_lpm_tb_type.
  * @tb_cache: Optional user supplied buffer to use as the trace buffer cache.
  *  If NULL, the driver will allocate and manage an internal buffer.
- *  Unused when when @tb_type is PS3_LPM_TB_TYPE_NONE.
+ *  Unused when @tb_type is PS3_LPM_TB_TYPE_NONE.
  * @tb_cache_size: The size in bytes of the user supplied @tb_cache buffer.
  *  Unused when @tb_cache is NULL or @tb_type is PS3_LPM_TB_TYPE_NONE.
  */
-- 
GitLab


From b86cf14f240e002e001fd4f2bf49114c7836fd5c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Fri, 9 Sep 2022 15:23:12 +1000
Subject: [PATCH 2616/4122] powerpc: add compile-time support for lbarx, lharx

ISA v2.06 (POWER7 and up) as well as e6500 support lbarx and lharx.
Add a compile option that allows code to use it, and add support in
cmpxchg and xchg 8 and 16 bit values without shifting and masking.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220909052312.63916-1-npiggin@gmail.com
---
 arch/powerpc/Kconfig                   |   3 +
 arch/powerpc/include/asm/cmpxchg.h     | 231 ++++++++++++++++++++++++-
 arch/powerpc/lib/sstep.c               |  21 +--
 arch/powerpc/platforms/Kconfig.cputype |   5 +
 4 files changed, 249 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2fc..4fd4924f6d50e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -293,6 +293,9 @@ config PPC_BARRIER_NOSPEC
 	default y
 	depends on PPC_BOOK3S_64 || PPC_E500
 
+config PPC_HAS_LBARX_LHARX
+	bool
+
 config EARLY_PRINTK
 	bool
 	default y
diff --git a/arch/powerpc/include/asm/cmpxchg.h b/arch/powerpc/include/asm/cmpxchg.h
index 05f246c0e36eb..d0ea0571e79ab 100644
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -77,10 +77,76 @@ u32 __cmpxchg_##type##sfx(volatile void *p, u32 old, u32 new)	\
  * the previous value stored there.
  */
 
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
 XCHG_GEN(u8, _local, "memory");
 XCHG_GEN(u8, _relaxed, "cc");
 XCHG_GEN(u16, _local, "memory");
 XCHG_GEN(u16, _relaxed, "cc");
+#else
+static __always_inline unsigned long
+__xchg_u8_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2		# __xchg_u8_local\n"
+"	stbcx.	%3,0,%2 \n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned char *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u8_relaxed(u8 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lbarx	%0,0,%2		# __xchg_u8_relaxed\n"
+"	stbcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_local(volatile void *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2		# __xchg_u16_local\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*(volatile unsigned short *)p)
+	: "r" (p), "r" (val)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__xchg_u16_relaxed(u16 *p, unsigned long val)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__(
+"1:	lharx	%0,0,%2		# __xchg_u16_relaxed\n"
+"	sthcx.	%3,0,%2\n"
+"	bne-	1b"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (val)
+	: "cc");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __xchg_u32_local(volatile void *p, unsigned long val)
@@ -198,11 +264,12 @@ __xchg_relaxed(void *ptr, unsigned long x, unsigned int size)
 	(__typeof__(*(ptr))) __xchg_relaxed((ptr),			\
 			(unsigned long)_x_, sizeof(*(ptr)));		\
 })
+
 /*
  * Compare and exchange - if *p == old, set it to new,
  * and return the old value of *p.
  */
-
+#ifndef CONFIG_PPC_HAS_LBARX_LHARX
 CMPXCHG_GEN(u8, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u8, _local, , , "memory");
 CMPXCHG_GEN(u8, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
@@ -211,6 +278,168 @@ CMPXCHG_GEN(u16, , PPC_ATOMIC_ENTRY_BARRIER, PPC_ATOMIC_EXIT_BARRIER, "memory");
 CMPXCHG_GEN(u16, _local, , , "memory");
 CMPXCHG_GEN(u16, _acquire, , PPC_ACQUIRE_BARRIER, "memory");
 CMPXCHG_GEN(u16, _relaxed, , , "cc");
+#else
+static __always_inline unsigned long
+__cmpxchg_u8(volatile unsigned char *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b"
+	PPC_ATOMIC_EXIT_BARRIER
+	"\n\
+2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_local(volatile unsigned char *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_local\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_relaxed(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u8_acquire(u8 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lbarx	%0,0,%2		# __cmpxchg_u8_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	stbcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16(volatile unsigned short *p, unsigned long old, unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+	PPC_ATOMIC_ENTRY_BARRIER
+"1:	lharx	%0,0,%2		# __cmpxchg_u16\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ATOMIC_EXIT_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_local(volatile unsigned short *p, unsigned long old,
+			unsigned long new)
+{
+	unsigned int prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_local\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_relaxed(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_relaxed\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc");
+
+	return prev;
+}
+
+static __always_inline unsigned long
+__cmpxchg_u16_acquire(u16 *p, unsigned long old, unsigned long new)
+{
+	unsigned long prev;
+
+	__asm__ __volatile__ (
+"1:	lharx	%0,0,%2		# __cmpxchg_u16_acquire\n"
+"	cmpw	0,%0,%3\n"
+"	bne-	2f\n"
+"	sthcx.	%4,0,%2\n"
+"	bne-	1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (prev), "+m" (*p)
+	: "r" (p), "r" (old), "r" (new)
+	: "cc", "memory");
+
+	return prev;
+}
+#endif
 
 static __always_inline unsigned long
 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 398b5694aeb70..38158b77a8019 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -2284,15 +2284,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 			op->type = MKOP(STCX, 0, 4);
 			break;
 
-#ifdef __powerpc64__
-		case 84:	/* ldarx */
-			op->type = MKOP(LARX, 0, 8);
-			break;
-
-		case 214:	/* stdcx. */
-			op->type = MKOP(STCX, 0, 8);
-			break;
-
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
 		case 52:	/* lbarx */
 			op->type = MKOP(LARX, 0, 1);
 			break;
@@ -2308,6 +2300,15 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs,
 		case 726:	/* sthcx. */
 			op->type = MKOP(STCX, 0, 2);
 			break;
+#endif
+#ifdef __powerpc64__
+		case 84:	/* ldarx */
+			op->type = MKOP(LARX, 0, 8);
+			break;
+
+		case 214:	/* stdcx. */
+			op->type = MKOP(STCX, 0, 8);
+			break;
 
 		case 276:	/* lqarx */
 			if (!((rd & 1) || rd == ra || rd == rb))
@@ -3334,7 +3335,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op)
 		err = 0;
 		val = 0;
 		switch (size) {
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC_HAS_LBARX_LHARX
 		case 1:
 			__get_user_asmx(val, ea, err, "lbarx");
 			break;
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 0c4eed9aea806..7bac213b4125d 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -135,6 +135,7 @@ config GENERIC_CPU
 	depends on PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
 	select ARCH_HAS_FAST_MULTIPLIER
 	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
 
 config POWERPC_CPU
 	bool "Generic 32 bits powerpc"
@@ -160,17 +161,20 @@ config POWER7_CPU
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
 	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
 
 config POWER8_CPU
 	bool "POWER8"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
 	select PPC_64S_HASH_MMU
+	select PPC_HAS_LBARX_LHARX
 
 config POWER9_CPU
 	bool "POWER9"
 	depends on PPC_BOOK3S_64
 	select ARCH_HAS_FAST_MULTIPLIER
+	select PPC_HAS_LBARX_LHARX
 
 config POWER10_CPU
 	bool "POWER10"
@@ -184,6 +188,7 @@ config E5500_CPU
 config E6500_CPU
 	bool "Freescale e6500"
 	depends on PPC64 && PPC_E500
+	select PPC_HAS_LBARX_LHARX
 
 config 405_CPU
 	bool "40x family"
-- 
GitLab


From d87a233717da400792fa601b29fa74a7d28e03c2 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Sun, 11 Sep 2022 16:43:44 +0800
Subject: [PATCH 2617/4122] powerpc/pasemi: Add __init/__exit annotations to
 module init/exit funcs

Add missing __init/__exit annotations to module init/exit funcs.

Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220911084344.196353-1-xiujianfeng@huawei.com
---
 arch/powerpc/platforms/pasemi/gpio_mdio.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pasemi/gpio_mdio.c b/arch/powerpc/platforms/pasemi/gpio_mdio.c
index bf300167ad6bf..913b77b92cea1 100644
--- a/arch/powerpc/platforms/pasemi/gpio_mdio.c
+++ b/arch/powerpc/platforms/pasemi/gpio_mdio.c
@@ -294,7 +294,7 @@ static struct platform_driver gpio_mdio_driver =
 	},
 };
 
-static int gpio_mdio_init(void)
+static int __init gpio_mdio_init(void)
 {
 	struct device_node *np;
 
@@ -314,7 +314,7 @@ static int gpio_mdio_init(void)
 }
 module_init(gpio_mdio_init);
 
-static void gpio_mdio_exit(void)
+static void __exit gpio_mdio_exit(void)
 {
 	platform_driver_unregister(&gpio_mdio_driver);
 	if (gpio_regs)
-- 
GitLab


From 2223552256dfc48435e0699dbe1e9b8d2cd56b06 Mon Sep 17 00:00:00 2001
From: Disha Goel <disgoel@linux.vnet.ibm.com>
Date: Fri, 16 Sep 2022 16:27:35 +0530
Subject: [PATCH 2618/4122] powerpc/kvm: Remove unused macros from asm-offset

The kvm code was refactored to convert some of kvm assembly routines to C.
This includes commits which moved code path for the kvm guest entry/exit
for p7/8 from aseembly to C. As part of the code changes, usage of some of
the macros were removed. But definitions still exist in the assembly files.
Commits are listed below:

Commit 2e1ae9cd56f8 ("KVM: PPC: Book3S HV: Implement radix prefetch workaround by disabling MMU")
Commit 9769a7fd79b6 ("KVM: PPC: Book3S HV: Remove radix guest support from P7/8 path")
Commit fae5c9f3664b ("KVM: PPC: Book3S HV: remove ISA v3.0 and v3.1 support from P7/8 path")
Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C")

Many of the asm-offset macro definitions were missed to remove. Patch
fixes by removing the unused macros.

Signed-off-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220916105736.268153-2-disgoel@linux.vnet.ibm.com
---
 arch/powerpc/kernel/asm-offsets.c | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 4ce2a4aa39854..b4b661f631f5f 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -418,21 +418,18 @@ int main(void)
 
 	/* book3s */
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	OFFSET(KVM_TLB_SETS, kvm, arch.tlb_sets);
 	OFFSET(KVM_SDR1, kvm, arch.sdr1);
 	OFFSET(KVM_HOST_LPID, kvm, arch.host_lpid);
 	OFFSET(KVM_HOST_LPCR, kvm, arch.host_lpcr);
 	OFFSET(KVM_HOST_SDR1, kvm, arch.host_sdr1);
 	OFFSET(KVM_ENABLED_HCALLS, kvm, arch.enabled_hcalls);
 	OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
-	OFFSET(KVM_RADIX, kvm, arch.radix);
 	OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
 	OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
 	OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
 	OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
 	OFFSET(VCPU_VPA_DIRTY, kvm_vcpu, arch.vpa.dirty);
 	OFFSET(VCPU_HEIR, kvm_vcpu, arch.emul_inst);
-	OFFSET(VCPU_NESTED, kvm_vcpu, arch.nested);
 	OFFSET(VCPU_CPU, kvm_vcpu, cpu);
 	OFFSET(VCPU_THREAD_CPU, kvm_vcpu, arch.thread_cpu);
 #endif
@@ -449,16 +446,12 @@ int main(void)
 	OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
 	OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
 	OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
-	OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
-	OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
 	OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
 	OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
 	OFFSET(VCPU_DEC_EXPIRES, kvm_vcpu, arch.dec_expires);
 	OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
 	OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
 	OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
-	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
-	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
 	OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
 	OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
@@ -486,8 +479,6 @@ int main(void)
 	OFFSET(VCPU_TCSCR, kvm_vcpu, arch.tcscr);
 	OFFSET(VCPU_ACOP, kvm_vcpu, arch.acop);
 	OFFSET(VCPU_WORT, kvm_vcpu, arch.wort);
-	OFFSET(VCPU_TID, kvm_vcpu, arch.tid);
-	OFFSET(VCPU_PSSCR, kvm_vcpu, arch.psscr);
 	OFFSET(VCPU_HFSCR, kvm_vcpu, arch.hfscr);
 	OFFSET(VCORE_ENTRY_EXIT, kvmppc_vcore, entry_exit_map);
 	OFFSET(VCORE_IN_GUEST, kvmppc_vcore, in_guest);
@@ -582,8 +573,6 @@ int main(void)
 	HSTATE_FIELD(HSTATE_HWTHREAD_STATE, hwthread_state);
 	HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu);
 	HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore);
-	HSTATE_FIELD(HSTATE_XIVE_TIMA_PHYS, xive_tima_phys);
-	HSTATE_FIELD(HSTATE_XIVE_TIMA_VIRT, xive_tima_virt);
 	HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi);
 	HSTATE_FIELD(HSTATE_PTID, ptid);
 	HSTATE_FIELD(HSTATE_FAKE_SUSPEND, fake_suspend);
@@ -594,9 +583,6 @@ int main(void)
 	HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
 	HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
 	HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
-	HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]);
-	HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]);
-	HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]);
 	HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
 	HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
 	HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
@@ -672,17 +658,6 @@ int main(void)
 	OFFSET(VCPU_HOST_MAS6, kvm_vcpu, arch.host_mas6);
 #endif
 
-#ifdef CONFIG_KVM_XICS
-	DEFINE(VCPU_XIVE_SAVED_STATE, offsetof(struct kvm_vcpu,
-					       arch.xive_saved_state));
-	DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
-					    arch.xive_cam_word));
-	DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
-	DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
-	DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, arch.xive_esc_raddr));
-	DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, arch.xive_esc_vaddr));
-#endif
-
 #ifdef CONFIG_KVM_EXIT_TIMING
 	OFFSET(VCPU_TIMING_EXIT_TBU, kvm_vcpu, arch.timing_exit.tv32.tbu);
 	OFFSET(VCPU_TIMING_EXIT_TBL, kvm_vcpu, arch.timing_exit.tv32.tbl);
-- 
GitLab


From 4ac9d3187cc7ccba25f76a3faef3e08a366f77b9 Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Fri, 16 Sep 2022 16:27:36 +0530
Subject: [PATCH 2619/4122] powerpc/kvm: Remove unused references for
 MMCR3/SIER2/SIER3 registers

Commit 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore
in C") removed the PMU save/restore functions from assembly code and
implemented these functions in C, for power9 and later platforms.

After the code refactoring, Performance Monitoring Unit (PMU) registers
became part of "p9_host_os_sprs" structure and now this structure is
used to save/restore pmu host registers, for power9 and later platfroms.
But we still have old unused registers references. Patch removes unused
host_mmcr references for Monitor Mode Control Register 3 (MMCR3)/
Sampled Instruction Event Register 2 (SIER2)/ SIER3 registers from
"struct kvmppc_host_state".

Fixes: 57dc0eed73ca ("KVM: PPC: Book3S HV P9: Implement PMU save/restore in C")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220916105736.268153-3-disgoel@linux.vnet.ibm.com
---
 arch/powerpc/include/asm/kvm_book3s_asm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index c8882d9b86c27..a36797938620f 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -105,7 +105,7 @@ struct kvmppc_host_state {
 	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
 	u64 dabr;
-	u64 host_mmcr[10];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */
+	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
 	u32 host_pmc[8];
 	u64 host_purr;
 	u64 host_spurr;
-- 
GitLab


From 1c4a4a4c8410be4a231a58b23e7a30923ff954ac Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 16 Sep 2022 15:15:04 +0100
Subject: [PATCH 2620/4122] powerpc/xmon: Fix -Wswitch-unreachable warning in
 bpt_cmds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When building with automatic stack variable initialization, GCC 12
complains about variables defined outside of switch case statements.
Move the variable into the case that uses it, which silences the warning:

arch/powerpc/xmon/xmon.c: In function ‘bpt_cmds’:
arch/powerpc/xmon/xmon.c:1529:13: warning: statement will never be executed [-Wswitch-unreachable]
 1529 |         int mode;
      |             ^~~~

Fixes: 09b6c1129f89 ("powerpc/xmon: Fix compile error with PPC_8xx=y")
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/YySE6FHiOcbWWR+9@work
---
 arch/powerpc/xmon/xmon.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index f51c882bf9023..e34d7809f6c9f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1525,9 +1525,9 @@ bpt_cmds(void)
 	cmd = inchar();
 
 	switch (cmd) {
-	static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
-	int mode;
-	case 'd':	/* bd - hardware data breakpoint */
+	case 'd': {	/* bd - hardware data breakpoint */
+		static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
+		int mode;
 		if (xmon_is_ro) {
 			printf(xmon_ro_msg);
 			break;
@@ -1560,6 +1560,7 @@ bpt_cmds(void)
 
 		force_enable_xmon();
 		break;
+	}
 
 	case 'i':	/* bi - hardware instr breakpoint */
 		if (xmon_is_ro) {
-- 
GitLab


From 1892e87a3e9170146549779622cb844582f1e2bb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 26 Sep 2022 23:03:25 -0700
Subject: [PATCH 2621/4122] powerpc/warp: switch to using gpiod API

This switches PIKA Warp away from legacy gpio API and to newer gpiod
API, so that we can eventually deprecate the former.

Because LEDs are normally driven by leds-gpio driver, but the
platform code also wants to access the LEDs during thermal shutdown,
and gpiod API does not allow locating GPIO without requesting it,
the platform code is now responsible for locating GPIOs through device
tree and requesting them. It then constructs platform data for
leds-gpio platform device and registers it. This allows platform
code to retain access to LED GPIO descriptors and use them when needed.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/YzKSLcrYmV5kjyeX@google.com
---
 arch/powerpc/boot/dts/warp.dts    |   4 +-
 arch/powerpc/platforms/44x/warp.c | 105 ++++++++++++++++++++++++++----
 2 files changed, 94 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/boot/dts/warp.dts b/arch/powerpc/boot/dts/warp.dts
index b4f32740870e0..aa62d08e97c2f 100644
--- a/arch/powerpc/boot/dts/warp.dts
+++ b/arch/powerpc/boot/dts/warp.dts
@@ -258,14 +258,12 @@
 			};
 
 			power-leds {
-				compatible = "gpio-leds";
+				compatible = "warp-power-leds";
 				green {
 					gpios = <&GPIO1 0 0>;
-					default-state = "keep";
 				};
 				red {
 					gpios = <&GPIO1 1 0>;
-					default-state = "keep";
 				};
 			};
 
diff --git a/arch/powerpc/platforms/44x/warp.c b/arch/powerpc/platforms/44x/warp.c
index f03432ef010bd..cefa313c09f0b 100644
--- a/arch/powerpc/platforms/44x/warp.c
+++ b/arch/powerpc/platforms/44x/warp.c
@@ -5,15 +5,17 @@
  * Copyright (c) 2008-2009 PIKA Technologies
  *   Sean MacLennan <smaclennan@pikatech.com>
  */
+#include <linux/err.h>
 #include <linux/init.h>
 #include <linux/of_platform.h>
 #include <linux/kthread.h>
+#include <linux/leds.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/export.h>
 
@@ -92,8 +94,6 @@ static int __init warp_post_info(void)
 
 static LIST_HEAD(dtm_shutdown_list);
 static void __iomem *dtm_fpga;
-static unsigned green_led, red_led;
-
 
 struct dtm_shutdown {
 	struct list_head list;
@@ -101,7 +101,6 @@ struct dtm_shutdown {
 	void *arg;
 };
 
-
 int pika_dtm_register_shutdown(void (*func)(void *arg), void *arg)
 {
 	struct dtm_shutdown *shutdown;
@@ -132,6 +131,35 @@ int pika_dtm_unregister_shutdown(void (*func)(void *arg), void *arg)
 	return -EINVAL;
 }
 
+#define WARP_GREEN_LED	0
+#define WARP_RED_LED	1
+
+static struct gpio_led warp_gpio_led_pins[] = {
+	[WARP_GREEN_LED] = {
+		.name		= "green",
+		.default_state	= LEDS_DEFSTATE_KEEP,
+		.gpiod		= NULL, /* to be filled by pika_setup_leds() */
+	},
+	[WARP_RED_LED] = {
+		.name		= "red",
+		.default_state	= LEDS_DEFSTATE_KEEP,
+		.gpiod		= NULL, /* to be filled by pika_setup_leds() */
+	},
+};
+
+static struct gpio_led_platform_data warp_gpio_led_data = {
+	.leds		= warp_gpio_led_pins,
+	.num_leds	= ARRAY_SIZE(warp_gpio_led_pins),
+};
+
+static struct platform_device warp_gpio_leds = {
+	.name	= "leds-gpio",
+	.id	= -1,
+	.dev	= {
+		.platform_data = &warp_gpio_led_data,
+	},
+};
+
 static irqreturn_t temp_isr(int irq, void *context)
 {
 	struct dtm_shutdown *shutdown;
@@ -139,7 +167,7 @@ static irqreturn_t temp_isr(int irq, void *context)
 
 	local_irq_disable();
 
-	gpio_set_value(green_led, 0);
+	gpiod_set_value(warp_gpio_led_pins[WARP_GREEN_LED].gpiod, 0);
 
 	/* Run through the shutdown list. */
 	list_for_each_entry(shutdown, &dtm_shutdown_list, list)
@@ -153,7 +181,7 @@ static irqreturn_t temp_isr(int irq, void *context)
 			out_be32(dtm_fpga + 0x14, reset);
 		}
 
-		gpio_set_value(red_led, value);
+		gpiod_set_value(warp_gpio_led_pins[WARP_RED_LED].gpiod, value);
 		value ^= 1;
 		mdelay(500);
 	}
@@ -162,25 +190,78 @@ static irqreturn_t temp_isr(int irq, void *context)
 	return IRQ_HANDLED;
 }
 
+/*
+ * Because green and red power LEDs are normally driven by leds-gpio driver,
+ * but in case of critical temperature shutdown we want to drive them
+ * ourselves, we acquire both and then create leds-gpio platform device
+ * ourselves, instead of doing it through device tree. This way we can still
+ * keep access to the gpios and use them when needed.
+ */
 static int pika_setup_leds(void)
 {
 	struct device_node *np, *child;
+	struct gpio_desc *gpio;
+	struct gpio_led *led;
+	int led_count = 0;
+	int error;
+	int i;
 
-	np = of_find_compatible_node(NULL, NULL, "gpio-leds");
+	np = of_find_compatible_node(NULL, NULL, "warp-power-leds");
 	if (!np) {
 		printk(KERN_ERR __FILE__ ": Unable to find leds\n");
 		return -ENOENT;
 	}
 
-	for_each_child_of_node(np, child)
-		if (of_node_name_eq(child, "green"))
-			green_led = of_get_gpio(child, 0);
-		else if (of_node_name_eq(child, "red"))
-			red_led = of_get_gpio(child, 0);
+	for_each_child_of_node(np, child) {
+		for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+			led = &warp_gpio_led_pins[i];
+
+			if (!of_node_name_eq(child, led->name))
+				continue;
+
+			if (led->gpiod) {
+				printk(KERN_ERR __FILE__ ": %s led has already been defined\n",
+				       led->name);
+				continue;
+			}
+
+			gpio = fwnode_gpiod_get_index(of_fwnode_handle(child),
+						      NULL, 0, GPIOD_ASIS,
+						      led->name);
+			error = PTR_ERR_OR_ZERO(gpio);
+			if (error) {
+				printk(KERN_ERR __FILE__ ": Failed to get %s led gpio: %d\n",
+				       led->name, error);
+				of_node_put(child);
+				goto err_cleanup_pins;
+			}
+
+			led->gpiod = gpio;
+			led_count++;
+		}
+	}
 
 	of_node_put(np);
 
+	/* Skip device registration if no leds have been defined */
+	if (led_count) {
+		error = platform_device_register(&warp_gpio_leds);
+		if (error) {
+			printk(KERN_ERR __FILE__ ": Unable to add leds-gpio: %d\n",
+			       error);
+			goto err_cleanup_pins;
+		}
+	}
+
 	return 0;
+
+err_cleanup_pins:
+	for (i = 0; i < ARRAY_SIZE(warp_gpio_led_pins); i++) {
+		led = &warp_gpio_led_pins[i];
+		gpiod_put(led->gpiod);
+		led->gpiod = NULL;
+	}
+	return error;
 }
 
 static void pika_setup_critical_temp(struct device_node *np,
-- 
GitLab


From 4e87bd14e501030619d1bad29b3ec1f947f84fc4 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Tue, 27 Sep 2022 12:23:58 -0700
Subject: [PATCH 2622/4122] powerpc/sgy_cts1000: convert to using gpiod API and
 facelift

This patch converts the driver to newer gpiod API, and away from
OF-specific legacy gpio API that we want to stop using.

While at it, let's address a few more issues:

- switch to using dev_info()/pr_info() and friends
- cancel work when unbinding the driver

Note that the original code handled halt GPIO polarity incorrectly:
in halt callback, when line polarity is "low" it would set trigger to
"1" and drive halt line high, which is counter to the annotation.
gpiod API will drive such line low. However I do not see any DTSes
in mainline that have a DT node with "sgy,gpio-halt" compatible.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/YzNNznewTyCJiGFz@google.com
---
 arch/powerpc/platforms/85xx/sgy_cts1000.c | 132 +++++++++-------------
 1 file changed, 53 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/platforms/85xx/sgy_cts1000.c b/arch/powerpc/platforms/85xx/sgy_cts1000.c
index e14d1b74d4e4c..751395cbf0222 100644
--- a/arch/powerpc/platforms/85xx/sgy_cts1000.c
+++ b/arch/powerpc/platforms/85xx/sgy_cts1000.c
@@ -7,10 +7,13 @@
  * Copyright 2012 by Servergy, Inc.
  */
 
+#define pr_fmt(fmt) "gpio-halt: " fmt
+
+#include <linux/err.h>
 #include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
-#include <linux/of_gpio.h>
 #include <linux/of_irq.h>
 #include <linux/workqueue.h>
 #include <linux/reboot.h>
@@ -18,7 +21,8 @@
 
 #include <asm/machdep.h>
 
-static struct device_node *halt_node;
+static struct gpio_desc *halt_gpio;
+static int halt_irq;
 
 static const struct of_device_id child_match[] = {
 	{
@@ -36,23 +40,10 @@ static DECLARE_WORK(gpio_halt_wq, gpio_halt_wfn);
 
 static void __noreturn gpio_halt_cb(void)
 {
-	enum of_gpio_flags flags;
-	int trigger, gpio;
-
-	if (!halt_node)
-		panic("No reset GPIO information was provided in DT\n");
-
-	gpio = of_get_gpio_flags(halt_node, 0, &flags);
-
-	if (!gpio_is_valid(gpio))
-		panic("Provided GPIO is invalid\n");
-
-	trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
-	printk(KERN_INFO "gpio-halt: triggering GPIO.\n");
+	pr_info("triggering GPIO.\n");
 
 	/* Probably wont return */
-	gpio_set_value(gpio, trigger);
+	gpiod_set_value(halt_gpio, 1);
 
 	panic("Halt failed\n");
 }
@@ -61,95 +52,78 @@ static void __noreturn gpio_halt_cb(void)
  * to handle the shutdown/poweroff. */
 static irqreturn_t gpio_halt_irq(int irq, void *__data)
 {
-	printk(KERN_INFO "gpio-halt: shutdown due to power button IRQ.\n");
+	struct platform_device *pdev = __data;
+
+	dev_info(&pdev->dev, "scheduling shutdown due to power button IRQ\n");
 	schedule_work(&gpio_halt_wq);
 
         return IRQ_HANDLED;
 };
 
-static int gpio_halt_probe(struct platform_device *pdev)
+static int __gpio_halt_probe(struct platform_device *pdev,
+			     struct device_node *halt_node)
 {
-	enum of_gpio_flags flags;
-	struct device_node *node = pdev->dev.of_node;
-	struct device_node *child_node;
-	int gpio, err, irq;
-	int trigger;
-
-	if (!node)
-		return -ENODEV;
-
-	/* If there's no matching child, this isn't really an error */
-	child_node = of_find_matching_node(node, child_match);
-	if (!child_node)
-		return 0;
-
-	/* Technically we could just read the first one, but punish
-	 * DT writers for invalid form. */
-	if (of_gpio_count(child_node) != 1) {
-		err = -EINVAL;
-		goto err_put;
-	}
-
-	/* Get the gpio number relative to the dynamic base. */
-	gpio = of_get_gpio_flags(child_node, 0, &flags);
-	if (!gpio_is_valid(gpio)) {
-		err = -EINVAL;
-		goto err_put;
-	}
+	int err;
 
-	err = gpio_request(gpio, "gpio-halt");
+	halt_gpio = fwnode_gpiod_get_index(of_fwnode_handle(halt_node),
+					   NULL, 0, GPIOD_OUT_LOW, "gpio-halt");
+	err = PTR_ERR_OR_ZERO(halt_gpio);
 	if (err) {
-		printk(KERN_ERR "gpio-halt: error requesting GPIO %d.\n",
-		       gpio);
-		goto err_put;
+		dev_err(&pdev->dev, "failed to request halt GPIO: %d\n", err);
+		return err;
 	}
 
-	trigger = (flags == OF_GPIO_ACTIVE_LOW);
-
-	gpio_direction_output(gpio, !trigger);
-
 	/* Now get the IRQ which tells us when the power button is hit */
-	irq = irq_of_parse_and_map(child_node, 0);
-	err = request_irq(irq, gpio_halt_irq, IRQF_TRIGGER_RISING |
-			  IRQF_TRIGGER_FALLING, "gpio-halt", child_node);
+	halt_irq = irq_of_parse_and_map(halt_node, 0);
+	err = request_irq(halt_irq, gpio_halt_irq,
+			  IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING,
+			  "gpio-halt", pdev);
 	if (err) {
-		printk(KERN_ERR "gpio-halt: error requesting IRQ %d for "
-		       "GPIO %d.\n", irq, gpio);
-		gpio_free(gpio);
-		goto err_put;
+		dev_err(&pdev->dev, "failed to request IRQ %d: %d\n",
+			halt_irq, err);
+		gpiod_put(halt_gpio);
+		halt_gpio = NULL;
+		return err;
 	}
 
 	/* Register our halt function */
 	ppc_md.halt = gpio_halt_cb;
 	pm_power_off = gpio_halt_cb;
 
-	printk(KERN_INFO "gpio-halt: registered GPIO %d (%d trigger, %d"
-	       " irq).\n", gpio, trigger, irq);
+	dev_info(&pdev->dev, "registered halt GPIO, irq: %d\n", halt_irq);
 
-	halt_node = child_node;
 	return 0;
-
-err_put:
-	of_node_put(child_node);
-	return err;
 }
 
-static int gpio_halt_remove(struct platform_device *pdev)
+static int gpio_halt_probe(struct platform_device *pdev)
 {
-	if (halt_node) {
-		int gpio = of_get_gpio(halt_node, 0);
-		int irq = irq_of_parse_and_map(halt_node, 0);
+	struct device_node *halt_node;
+	int ret;
+
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
+	/* If there's no matching child, this isn't really an error */
+	halt_node = of_find_matching_node(pdev->dev.of_node, child_match);
+	if (!halt_node)
+		return -ENODEV;
+
+	ret = __gpio_halt_probe(pdev, halt_node);
+	of_node_put(halt_node);
 
-		free_irq(irq, halt_node);
+	return ret;
+}
 
-		ppc_md.halt = NULL;
-		pm_power_off = NULL;
+static int gpio_halt_remove(struct platform_device *pdev)
+{
+	free_irq(halt_irq, pdev);
+	cancel_work_sync(&gpio_halt_wq);
 
-		gpio_free(gpio);
+	ppc_md.halt = NULL;
+	pm_power_off = NULL;
 
-		of_node_put(halt_node);
-		halt_node = NULL;
-	}
+	gpiod_put(halt_gpio);
+	halt_gpio = NULL;
 
 	return 0;
 }
-- 
GitLab


From f2c45962cc618c12f69fd46e6ebc20b9cd7f15ac Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 28 Sep 2022 08:29:00 +0200
Subject: [PATCH 2623/4122] powerpc/8xx: Simplify pte_update() with 16k pages

While looking at code generated for code patching, I saw that
pte_clear generated:

 2d8:	38 a0 00 00 	li      r5,0
 2dc:	38 e0 10 00 	li      r7,4096
 2e0:	39 00 20 00 	li      r8,8192
 2e4:	39 40 30 00 	li      r10,12288
 2e8:	90 a9 00 00 	stw     r5,0(r9)
 2ec:	90 e9 00 04 	stw     r7,4(r9)
 2f0:	91 09 00 08 	stw     r8,8(r9)
 2f4:	91 49 00 0c 	stw     r10,12(r9)

With 16k pages, only the first entry is used by the kernel, so no need
to adapt the address of other entries. Only duplicate the first entry
for hardware.

Now it is:

 2cc:	39 40 00 00 	li      r10,0
 2d0:	91 49 00 00 	stw     r10,0(r9)
 2d4:	91 49 00 04 	stw     r10,4(r9)
 2d8:	91 49 00 08 	stw     r10,8(r9)
 2dc:	91 49 00 0c 	stw     r10,12(r9)

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/65f76300de07091a59a042a3db2d0ce9b939a05c.1664346532.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0d40b33184ebe..0e861e59b7695 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -256,8 +256,14 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
 
 	num = number_of_cells_per_pte(pmd, new, huge);
 
-	for (i = 0; i < num; i++, entry++, new += SZ_4K)
-		*entry = new;
+	for (i = 0; i < num; i += PAGE_SIZE / SZ_4K, new += PAGE_SIZE) {
+		*entry++ = new;
+		if (IS_ENABLED(CONFIG_PPC_16K_PAGES) && num != 1) {
+			*entry++ = new;
+			*entry++ = new;
+			*entry++ = new;
+		}
+	}
 
 	return old;
 }
-- 
GitLab


From 0b4721815c5328e08c3acdee4a53890e012d830b Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Wed, 28 Sep 2022 08:29:22 +0200
Subject: [PATCH 2624/4122] powerpc/8xx: Reverse order entries are written by
 __set_pte_at()

At the time being, with 16k pages __set_pte_at() writes table entries
in reverse order:

 294:	91 49 00 0c 	stw     r10,12(r9)
 298:	91 49 00 08 	stw     r10,8(r9)
 29c:	91 49 00 04 	stw     r10,4(r9)
 2a0:	91 49 00 00 	stw     r10,0(r9)

Allthough there should be no impact at all as it stays in a single
cacheline, reverse the writing in a more natural order.

 288:	91 49 00 0c 	stw     r10,0(r9)
 28c:	91 49 00 08 	stw     r10,4(r9)
 290:	91 49 00 04 	stw     r10,8(r9)
 294:	91 49 00 00 	stw     r10,12(r9)

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/67c3b5d44edfec054234ea9b4d05fc4b4f7f8a0e.1664346554.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/nohash/pgtable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h
index d9067dfc531cc..69c3a050a3d85 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -183,7 +183,7 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
 	 * cases, and 32-bit non-hash with 32-bit PTEs.
 	 */
 #if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
-	ptep->pte = ptep->pte1 = ptep->pte2 = ptep->pte3 = pte_val(pte);
+	ptep->pte3 = ptep->pte2 = ptep->pte1 = ptep->pte = pte_val(pte);
 #else
 	*ptep = pte;
 #endif
-- 
GitLab


From 5825603f67bc5ff445a1847302884154f0afa627 Mon Sep 17 00:00:00 2001
From: Joel Stanley <joel@jms.id.au>
Date: Fri, 30 Sep 2022 16:20:12 +0930
Subject: [PATCH 2625/4122] powerpc/microwatt: Add litesd

This is the register layout of the litesd peripheral for the fusesoc
based Microwatt SoC.

It requires a description of the system clock, which is hardcoded to
100MHz.

Signed-off-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220930065012.2860577-1-joel@jms.id.au
---
 arch/powerpc/boot/dts/microwatt.dts | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/powerpc/boot/dts/microwatt.dts b/arch/powerpc/boot/dts/microwatt.dts
index b69db1d275cd6..269e930b3b0b1 100644
--- a/arch/powerpc/boot/dts/microwatt.dts
+++ b/arch/powerpc/boot/dts/microwatt.dts
@@ -21,6 +21,14 @@
 		reg = <0x00000000 0x00000000 0x00000000 0x10000000>;
 	};
 
+	clocks {
+		sys_clk: litex_sys_clk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <100000000>;
+		};
+	};
+
 	cpus {
 		#size-cells = <0x00>;
 		#address-cells = <0x01>;
@@ -141,6 +149,20 @@
 			litex,slot-size = <0x800>;
 			interrupts = <0x11 0x1>;
 		};
+
+		mmc@8040000 {
+			compatible = "litex,mmc";
+			reg = <0x8042800 0x800
+				0x8041000 0x800
+				0x8040800 0x800
+				0x8042000 0x800
+				0x8041800 0x800>;
+			reg-names = "phy", "core", "reader", "writer", "irq";
+			bus-width = <4>;
+			interrupts = <0x13 1>;
+			cap-sd-highspeed;
+			clocks = <&sys_clk>;
+		};
 	};
 
 	chosen {
-- 
GitLab


From 3e65412709293d5fb65249408e8e801b23b72635 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Oct 2022 14:20:18 +1100
Subject: [PATCH 2626/4122] powerpc: Make instruction dump work with
 scripts/decodecode

Matt reported that scripts/decodecode doesn't work for the instruction
dump in the powerpc oops output. Although there are scripts around that
can decode it, it would be preferable if the standard in-tree script
worked.

All other arches prefix the instruction dump with "Code:", and that's
what the script looks for, so use that.

The script then works as expected:

  $ CROSS_COMPILE=powerpc64le-linux-gnu- ./scripts/decodecode
  Code:
  fbc1fff0 f821ffc1 7c7d1b78 7c9c2378 ebc30028 7fdff378 48000018 60000000
  60000000 ebff0008 7c3ef840 41820048 <815f0060> e93f0000 5529077c 7d295378
  ^D

  All code
  ========
     0:   f0 ff c1 fb     std     r30,-16(r1)
     4:   c1 ff 21 f8     stdu    r1,-64(r1)
     8:   78 1b 7d 7c     mr      r29,r3
     ...

Note that the script doesn't cope well with printk timestamps or printk
caller info.

Reported-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221006032019.1128624-1-mpe@ellerman.id.au
---
 arch/powerpc/kernel/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 67da147fe34dc..3372b5c21168e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs)
 	unsigned long nip = regs->nip;
 	unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
 
-	printk("Instruction dump:");
+	printk("Code:");
 
 	/*
 	 * If we were executing with the MMU off for instructions, adjust pc
-- 
GitLab


From d90bb7b4fdaff3f2fa68c7af85de2ce9e70189b1 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 6 Oct 2022 14:20:19 +1100
Subject: [PATCH 2627/4122] powerpc: Print instruction dump on a single line

Although the previous commit made the powerpc instruction dump usable
with scripts/decodecode, there are still some problems.

Because the dump is split across multiple lines, the script doesn't cope
with printk timestamps or caller info.

That can be fixed by printing the entire dump on one line, eg:

  [   12.016307][  T112] --- interrupt: c00
  [   12.016605][  T112] Code: 4b7aae15 60000000 3d22016e 3c62ffec 39291160 38639bc0 e8890000 4b7aadf9 60000000 4bfffee8 7c0802a6 60000000 <0fe00000> 60420000 3c4c008f 384268a0
  [   12.017655][  T112] ---[ end trace 0000000000000000 ]---

That output can then be piped directly into scripts/decodecode and
interpreted correctly.

Printing the dump on a single line does produce a very long line, about
173 characters. That is still shorter than x86, which prints nearly 200
characters even without timestamps etc.

All consoles I'm aware of will wrap the line if it's too long, so the
length should not be a functional problem. If anything it should help on
consoles like VGA by using less vertical space.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221006032019.1128624-2-mpe@ellerman.id.au
---
 arch/powerpc/kernel/process.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 3372b5c21168e..e3e1feaa536a9 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1359,7 +1359,7 @@ static void show_instructions(struct pt_regs *regs)
 	unsigned long nip = regs->nip;
 	unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
 
-	printk("Code:");
+	printk("Code: ");
 
 	/*
 	 * If we were executing with the MMU off for instructions, adjust pc
@@ -1373,9 +1373,6 @@ static void show_instructions(struct pt_regs *regs)
 	for (i = 0; i < NR_INSN_TO_PRINT; i++) {
 		int instr;
 
-		if (!(i % 8))
-			pr_cont("\n");
-
 		if (!__kernel_text_address(pc) ||
 		    get_kernel_nofault(instr, (const void *)pc)) {
 			pr_cont("XXXXXXXX ");
-- 
GitLab


From f985adaf2ff934ec869b32ca1f7f97e2825e3a49 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 6 Oct 2022 20:56:53 +1000
Subject: [PATCH 2628/4122] powerpc: remove the last remnants of cputime_t

cputime_t was a core kernel type, removed by commits
ed5c8c854f2b..b672592f0221. As explained in commit b672592f0221
("sched/cputime: Remove generic asm headers"), the final cleanup
is for the arch to provide cputime_to_nsec[s](). Commit ade7667a981b
("powerpc: Add cputime_to_nsecs()") did that, but justdidn't remove
the then-unused cputime_to_usecs(), cputime_t type, and associated
remnants.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221006105653.115829-1-npiggin@gmail.com
---
 arch/powerpc/include/asm/cputime.h | 17 +----------------
 arch/powerpc/kernel/time.c         | 23 ++---------------------
 2 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 431ae23430222..4961fb38e4385 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -21,23 +21,8 @@
 #include <asm/param.h>
 #include <asm/firmware.h>
 
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
-
 #ifdef __KERNEL__
-/*
- * Convert cputime <-> microseconds
- */
-extern u64 __cputime_usec_factor;
-
-static inline unsigned long cputime_to_usecs(const cputime_t ct)
-{
-	return mulhdu((__force u64) ct, __cputime_usec_factor);
-}
-
-#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime)
+#define cputime_to_nsecs(cputime) tb_to_ns(cputime)
 
 /*
  * PPC64 uses PACA which is task independent for storing accounting data while
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a2ab397065c66..d68de3618741e 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy;
 unsigned long tb_ticks_per_usec = 100; /* sane default */
 EXPORT_SYMBOL(tb_ticks_per_usec);
 unsigned long tb_ticks_per_sec;
-EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime_t conversions */
+EXPORT_SYMBOL(tb_ticks_per_sec);	/* for cputime conversions */
 
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL_GPL(rtc_lock);
@@ -150,21 +150,6 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
 bool tb_invalid;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-/*
- * Factor for converting from cputime_t (timebase ticks) to
- * microseconds. This is stored as 0.64 fixed-point binary fraction.
- */
-u64 __cputime_usec_factor;
-EXPORT_SYMBOL(__cputime_usec_factor);
-
-static void calc_cputime_factors(void)
-{
-	struct div_result res;
-
-	div128_by_32(1000000, 0, tb_ticks_per_sec, &res);
-	__cputime_usec_factor = res.result_low;
-}
-
 /*
  * Read the SPURR on systems that have it, otherwise the PURR,
  * or if that doesn't exist return the timebase value passed in.
@@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk)
 	acct->hardirq_time = 0;
 	acct->softirq_time = 0;
 }
-
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#define calc_cputime_factors()
-#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 void __delay(unsigned long loops)
 {
@@ -914,7 +896,6 @@ void __init time_init(void)
 	tb_ticks_per_jiffy = ppc_tb_freq / HZ;
 	tb_ticks_per_sec = ppc_tb_freq;
 	tb_ticks_per_usec = ppc_tb_freq / 1000000;
-	calc_cputime_factors();
 
 	/*
 	 * Compute scale factor for sched_clock.
-- 
GitLab


From 2cb1dfac6f792f9e4a092793215f0d26e9f8d5b2 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 9 Oct 2022 12:49:50 +0200
Subject: [PATCH 2629/4122] powerpc/sysdev: Remove some duplicate prefix in
 some messages

At the beginning of the file, we have:
   #define pr_fmt(fmt) "xive: " fmt

So, there is no need to duplicate "XIVE:" in debug and error messages.

For the records, these useless prefix have been added in commit
5af50993850a ("KVM: PPC: Book3S HV: Native usage of the XIVE interrupt
controller")

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/7b8b5915a2c7c1616b33e8433ebe0a0bf07070a2.1665312579.git.christophe.jaillet@wanadoo.fr
---
 arch/powerpc/sysdev/xive/native.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 3925825954bcc..19d880ebc5e61 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -535,13 +535,13 @@ static bool __init xive_parse_provisioning(struct device_node *np)
 static void __init xive_native_setup_pools(void)
 {
 	/* Allocate a pool big enough */
-	pr_debug("XIVE: Allocating VP block for pool size %u\n", nr_cpu_ids);
+	pr_debug("Allocating VP block for pool size %u\n", nr_cpu_ids);
 
 	xive_pool_vps = xive_native_alloc_vp_block(nr_cpu_ids);
 	if (WARN_ON(xive_pool_vps == XIVE_INVALID_VP))
-		pr_err("XIVE: Failed to allocate pool VP, KVM might not function\n");
+		pr_err("Failed to allocate pool VP, KVM might not function\n");
 
-	pr_debug("XIVE: Pool VPs allocated at 0x%x for %u max CPUs\n",
+	pr_debug("Pool VPs allocated at 0x%x for %u max CPUs\n",
 		 xive_pool_vps, nr_cpu_ids);
 }
 
-- 
GitLab


From 579aee9fc594af94c242068c011b0233563d4bbf Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 10 Oct 2022 16:57:21 +1100
Subject: [PATCH 2630/4122] powerpc: suppress some linker warnings in recent
 linker versions

This is a follow on from commit

  0d362be5b142 ("Makefile: link with -z noexecstack --no-warn-rwx-segments")

for arch/powerpc/boot to address wanrings like:

  ld: warning: opal-calls.o: missing .note.GNU-stack section implies executable stack
  ld: NOTE: This behaviour is deprecated and will be removed in a future version of the linker
  ld: warning: arch/powerpc/boot/zImage.epapr has a LOAD segment with RWX permissions

This fixes issue https://github.com/linuxppc/issues/issues/417

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221010165721.106267e6@canb.auug.org.au
---
 arch/powerpc/boot/wrapper | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 5bdd4dd20bbb1..a8a87d7667f4e 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -215,6 +215,11 @@ ld_version()
     }'
 }
 
+ld_is_lld()
+{
+	${CROSS}ld -V 2>&1 | grep -q LLD
+}
+
 # Do not include PT_INTERP segment when linking pie. Non-pie linking
 # just ignores this option.
 LD_VERSION=$(${CROSS}ld --version | ld_version)
@@ -223,6 +228,14 @@ if [ "$LD_VERSION" -ge "$LD_NO_DL_MIN_VERSION" ] ; then
 	nodl="--no-dynamic-linker"
 fi
 
+# suppress some warnings in recent ld versions
+nowarn="-z noexecstack"
+if ! ld_is_lld; then
+	if [ "$LD_VERSION" -ge "$(echo 2.39 | ld_version)" ]; then
+		nowarn="$nowarn --no-warn-rwx-segments"
+	fi
+fi
+
 platformo=$object/"$platform".o
 lds=$object/zImage.lds
 ext=strip
@@ -504,7 +517,7 @@ if [ "$platform" != "miboot" ]; then
         text_start="-Ttext $link_address"
     fi
 #link everything
-    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $rodynamic $notext -o "$ofile" $map \
+    ${CROSS}ld -m $format -T $lds $text_start $pie $nodl $nowarn $rodynamic $notext -o "$ofile" $map \
 	$platformo $tmp $object/wrapper.a
     rm $tmp
 fi
-- 
GitLab


From 8b49670f3bb3f10cd4d5a6dca17f5a31b173ecdc Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 17 Oct 2022 11:23:33 +0800
Subject: [PATCH 2631/4122] powerpc/xive: add missing iounmap() in error path
 in xive_spapr_populate_irq_data()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If remapping 'data->trig_page' fails, the 'data->eoi_mmio' need be unmapped
before returning from xive_spapr_populate_irq_data().

Fixes: eac1e731b59e ("powerpc/xive: guest exploitation of the XIVE interrupt controller")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Cédric Le Goater <clg@kaod.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221017032333.1852406-1-yangyingliang@huawei.com
---
 arch/powerpc/sysdev/xive/spapr.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index e2c8f93b535ba..e454192643910 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -439,6 +439,7 @@ static int xive_spapr_populate_irq_data(u32 hw_irq, struct xive_irq_data *data)
 
 	data->trig_mmio = ioremap(data->trig_page, 1u << data->esb_shift);
 	if (!data->trig_mmio) {
+		iounmap(data->eoi_mmio);
 		pr_err("Failed to map trigger page for irq 0x%x\n", hw_irq);
 		return -ENOMEM;
 	}
-- 
GitLab


From 16a3f41ff3322830683d3ccc14d77736829c61bf Mon Sep 17 00:00:00 2001
From: ruanjinjie <ruanjinjie@huawei.com>
Date: Wed, 19 Oct 2022 14:34:14 +0800
Subject: [PATCH 2632/4122] powerpc/mpic_msgr: fix cast removes address space
 of expression warnings

When build Linux kernel, encounter the following warnings:

./arch/powerpc/sysdev/mpic_msgr.c:230:38: warning: cast removes address space '__iomem' of expression
./arch/powerpc/sysdev/mpic_msgr.c:230:27: warning: incorrect type in assignment (different address spaces)

The data type of msgr->mer and msgr->base are 'u32 __iomem *', but
converted to 'u32 *' and 'u8 *' directly and cause above warnings, now
instead of using a type cast, change the size of the pointer offset to fix
these warnings.

Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221019063414.3758087-1-ruanjinjie@huawei.com
---
 arch/powerpc/sysdev/mpic_msgr.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/sysdev/mpic_msgr.c b/arch/powerpc/sysdev/mpic_msgr.c
index a439e33eae061..d75064fb7d12f 100644
--- a/arch/powerpc/sysdev/mpic_msgr.c
+++ b/arch/powerpc/sysdev/mpic_msgr.c
@@ -20,7 +20,7 @@
 
 #define MPIC_MSGR_REGISTERS_PER_BLOCK	4
 #define MPIC_MSGR_STRIDE		0x10
-#define MPIC_MSGR_MER_OFFSET		0x100
+#define MPIC_MSGR_MER_OFFSET		(0x100 / sizeof(u32))
 #define MSGR_INUSE			0
 #define MSGR_FREE			1
 
@@ -234,7 +234,7 @@ static int mpic_msgr_probe(struct platform_device *dev)
 
 		reg_number = block_number * MPIC_MSGR_REGISTERS_PER_BLOCK + i;
 		msgr->base = msgr_block_addr + i * MPIC_MSGR_STRIDE;
-		msgr->mer = (u32 *)((u8 *)msgr->base + MPIC_MSGR_MER_OFFSET);
+		msgr->mer = msgr->base + MPIC_MSGR_MER_OFFSET;
 		msgr->in_use = MSGR_FREE;
 		msgr->num = i;
 		raw_spin_lock_init(&msgr->lock);
-- 
GitLab


From 2fa9482334b0593b7edc371a13c0cca81daaa89e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 20 Oct 2022 22:58:57 +0530
Subject: [PATCH 2633/4122] powerpc/kprobes: Remove preempt disable around call
 to get_kprobe() in arch_prepare_kprobe()

arch_prepare_kprobe() is called from register_kprobe() via
prepare_kprobe(), or through register_aggr_kprobe(), both with the
kprobe_mutex held. Per the comment for get_kprobe():
  /*
   * This routine is called either:
   *	- under the 'kprobe_mutex' - during kprobe_[un]register().
   *				OR
   *	- with preemption disabled - from architecture specific code.
   */

As such, there is no need to disable preemption around the call to
get_kprobe(). Drop the same.

Reported-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1043d06a0affed83a4a46dd29466e72820ee215d.1666262278.git.naveen.n.rao@linux.vnet.ibm.com
---
 arch/powerpc/kernel/kprobes.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bd7b1a0354594..88f42de681e1f 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -158,9 +158,7 @@ int arch_prepare_kprobe(struct kprobe *p)
 		printk("Cannot register a kprobe on the second word of prefixed instruction\n");
 		ret = -EINVAL;
 	}
-	preempt_disable();
 	prev = get_kprobe(p->addr - 1);
-	preempt_enable_no_resched();
 
 	/*
 	 * When prev is a ftrace-based kprobe, we don't have an insn, and it
-- 
GitLab


From 04ec5d5782fb346c291a05a2efe59483d8ada4c4 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 20 Oct 2022 22:58:58 +0530
Subject: [PATCH 2634/4122] powerpc/kprobes: Have optimized_callback() use
 preempt_enable()

Similar to x86 commit 2e62024c265aa6 ("kprobes/x86: Use preempt_enable()
in optimized_callback()"), change powerpc optprobes to use
preempt_enable() rather than preempt_enable_no_resched() since powerpc
also removed irq disabling for optprobes in commit f72180cc93a2c6
("powerpc/kprobes: Do not disable interrupts for optprobes and
kprobes_on_ftrace").

Reported-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1885bab182626c33d9bf6421f430abf924c521a5.1666262278.git.naveen.n.rao@linux.vnet.ibm.com
---
 arch/powerpc/kernel/optprobes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 3b1c2236cbee5..004fae2044a3e 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -112,7 +112,7 @@ static void optimized_callback(struct optimized_kprobe *op,
 		__this_cpu_write(current_kprobe, NULL);
 	}
 
-	preempt_enable_no_resched();
+	preempt_enable();
 }
 NOKPROBE_SYMBOL(optimized_callback);
 
-- 
GitLab


From 266b1991a433cd55bb86a933216b3f6762737d47 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 20 Oct 2022 22:58:59 +0530
Subject: [PATCH 2635/4122] powerpc/kprobes: Use preempt_enable() rather than
 the no_resched variant

preempt_enable_no_resched() is just the same as preempt_enable() when we
are in a irqs disabled context. kprobe_handler() and the post/fault
handlers are all called with irqs disabled. As such, convert those to
just use preempt_enable().

Reported-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/72639f75fe66f931ec8c2165276ffbfb0fe1006f.1666262278.git.naveen.n.rao@linux.vnet.ibm.com
---
 arch/powerpc/kernel/kprobes.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 88f42de681e1f..86ca5a61ea9af 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -369,7 +369,7 @@ int kprobe_handler(struct pt_regs *regs)
 
 			if (ret > 0) {
 				restore_previous_kprobe(kcb);
-				preempt_enable_no_resched();
+				preempt_enable();
 				return 1;
 			}
 		}
@@ -382,7 +382,7 @@ int kprobe_handler(struct pt_regs *regs)
 	if (p->pre_handler && p->pre_handler(p, regs)) {
 		/* handler changed execution path, so skip ss setup */
 		reset_current_kprobe();
-		preempt_enable_no_resched();
+		preempt_enable();
 		return 1;
 	}
 
@@ -395,7 +395,7 @@ int kprobe_handler(struct pt_regs *regs)
 
 			kcb->kprobe_status = KPROBE_HIT_SSDONE;
 			reset_current_kprobe();
-			preempt_enable_no_resched();
+			preempt_enable();
 			return 1;
 		}
 	}
@@ -404,7 +404,7 @@ int kprobe_handler(struct pt_regs *regs)
 	return 1;
 
 no_kprobe:
-	preempt_enable_no_resched();
+	preempt_enable();
 	return ret;
 }
 NOKPROBE_SYMBOL(kprobe_handler);
@@ -490,7 +490,7 @@ int kprobe_post_handler(struct pt_regs *regs)
 	}
 	reset_current_kprobe();
 out:
-	preempt_enable_no_resched();
+	preempt_enable();
 
 	/*
 	 * if somebody else is singlestepping across a probe point, msr
@@ -529,7 +529,7 @@ int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
 			restore_previous_kprobe(kcb);
 		else
 			reset_current_kprobe();
-		preempt_enable_no_resched();
+		preempt_enable();
 		break;
 	case KPROBE_HIT_ACTIVE:
 	case KPROBE_HIT_SSDONE:
-- 
GitLab


From 04757c5e21ea17615b66f45e38f1cab32a7a0654 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Fri, 21 Oct 2022 09:45:45 +0100
Subject: [PATCH 2636/4122] selftests/powerpc: Fix spelling mistake "mmaping"
 -> "mmapping"

There is a spelling mistake in a perror message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221021084545.65973-1-colin.i.king@gmail.com
---
 tools/testing/selftests/powerpc/ptrace/core-pkey.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index bbc05ffc5860a..1a70a96f0bfe7 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -329,7 +329,7 @@ static int parent(struct shared_info *info, pid_t pid)
 
 	core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
 	if (core == (void *) -1) {
-		perror("Error mmaping core file");
+		perror("Error mmapping core file");
 		ret = TEST_FAIL;
 		goto out;
 	}
-- 
GitLab


From ad8284ead833379fc57d90e50dbae1352b116c2b Mon Sep 17 00:00:00 2001
From: Shaomin Deng <dengshaomin@cdjrlc.com>
Date: Sat, 29 Oct 2022 05:46:43 -0400
Subject: [PATCH 2637/4122] selftests/powerpc: Remove repeated word in comments

Remove the repeated word "not" in comments.

Signed-off-by: Shaomin Deng <dengshaomin@cdjrlc.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221029094643.5595-1-dengshaomin@cdjrlc.com
---
 tools/testing/selftests/powerpc/include/pkeys.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
index 3312cb1b058d7..51729d9a71110 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -24,7 +24,7 @@
 #undef PKEY_DISABLE_EXECUTE
 #define PKEY_DISABLE_EXECUTE	0x4
 
-/* Older versions of libc do not not define this */
+/* Older versions of libc do not define this */
 #ifndef SEGV_PKUERR
 #define SEGV_PKUERR	4
 #endif
-- 
GitLab


From f668027521561d1071ccf54500c82a58a1918b2b Mon Sep 17 00:00:00 2001
From: Russell Currey <ruscur@russell.cc>
Date: Mon, 24 Oct 2022 15:13:46 +1100
Subject: [PATCH 2638/4122] powerpc/8xx: Fix warning in hw_breakpoint_handler()

In hw_breakpoint_handler(), ea is set by wp_get_instr_detail() except
for 8xx, leading the variable to be passed uninitialised to
wp_check_constraints().  This is safe as wp_check_constraints() returns
early without using ea, so just set it to make the compiler happy.

Signed-off-by: Russell Currey <ruscur@russell.cc>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221024041346.103608-1-ruscur@russell.cc
---
 arch/powerpc/kernel/hw_breakpoint.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 8db1a15d7acbe..e1b4e70c8fd0f 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -646,7 +646,7 @@ int hw_breakpoint_handler(struct die_args *args)
 	ppc_inst_t instr = ppc_inst(0);
 	int type = 0;
 	int size = 0;
-	unsigned long ea;
+	unsigned long ea = 0;
 
 	/* Disable breakpoints during exception handling */
 	hw_breakpoint_disable();
-- 
GitLab


From afa1cda4097077e37639ca7098c2147e1885b2df Mon Sep 17 00:00:00 2001
From: Bo Liu <liubo03@inspur.com>
Date: Mon, 31 Oct 2022 02:37:06 -0400
Subject: [PATCH 2639/4122] powerpc/pseries/eeh: Fix some kernel-doc warnings

Fixes the following W=1 kernel build warning(s):
  arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_reset'
  arch/powerpc/platforms/pseries/eeh_pseries.c:163: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_reset'
  arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Function parameter or member 'config_addr' not described in 'pseries_eeh_phb_configure_bridge'
  arch/powerpc/platforms/pseries/eeh_pseries.c:198: warning: Excess function parameter 'config_adddr' description in 'pseries_eeh_phb_configure_bridge'

Signed-off-by: Bo Liu <liubo03@inspur.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221031063706.2770-1-liubo03@inspur.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index 8e40ccac0f44e..ea890037843c4 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -154,7 +154,7 @@ static int pseries_eeh_get_pe_config_addr(struct pci_dn *pdn)
 /**
  * pseries_eeh_phb_reset - Reset the specified PHB
  * @phb: PCI controller
- * @config_adddr: the associated config address
+ * @config_addr: the associated config address
  * @option: reset option
  *
  * Reset the specified PHB/PE
@@ -188,7 +188,7 @@ static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, in
 /**
  * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
  * @phb: PCI controller
- * @config_adddr: the associated config address
+ * @config_addr: the associated config address
  *
  * The function will be called to reconfigure the bridges included
  * in the specified PE so that the mulfunctional PE would be recovered
-- 
GitLab


From 59dc2d94bc12dac53a5d2368ad97ca24e7cc5682 Mon Sep 17 00:00:00 2001
From: Chen Lifu <chenlifu@huawei.com>
Date: Thu, 3 Nov 2022 15:01:22 +0800
Subject: [PATCH 2640/4122] powerpc/powermac: Fix symbol not declared warnings

1. ppc_override_l2cr and ppc_override_l2cr_value are only used in
   l2cr_init() function, remove them and used *l2cr directly.
2. has_l2cache is not used outside of the file, so mark it static and
   do not initialise statics to 0.

Fixes the following warnings:

  arch/powerpc/platforms/powermac/setup.c:73:5: warning: symbol
  'ppc_override_l2cr' was not declared. Should it be static?
  arch/powerpc/platforms/powermac/setup.c:74:5: warning: symbol
  'ppc_override_l2cr_value' was not declared. Should it be static?
  arch/powerpc/platforms/powermac/setup.c:75:5: warning: symbol
  'has_l2cache' was not declared. Should it be static?

Signed-off-by: Chen Lifu <chenlifu@huawei.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Unwrap printk string]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221103070122.340773-1-chenlifu@huawei.com
---
 arch/powerpc/platforms/powermac/setup.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/platforms/powermac/setup.c b/arch/powerpc/platforms/powermac/setup.c
index 04daa7f0a03c6..4f7ee885a78ff 100644
--- a/arch/powerpc/platforms/powermac/setup.c
+++ b/arch/powerpc/platforms/powermac/setup.c
@@ -70,9 +70,7 @@
 
 #undef SHOW_GATWICK_IRQS
 
-int ppc_override_l2cr = 0;
-int ppc_override_l2cr_value;
-int has_l2cache = 0;
+static int has_l2cache;
 
 int pmac_newworld;
 
@@ -236,22 +234,16 @@ static void __init l2cr_init(void)
 			const unsigned int *l2cr =
 				of_get_property(np, "l2cr-value", NULL);
 			if (l2cr) {
-				ppc_override_l2cr = 1;
-				ppc_override_l2cr_value = *l2cr;
 				_set_L2CR(0);
-				_set_L2CR(ppc_override_l2cr_value);
+				_set_L2CR(*l2cr);
+				pr_info("L2CR overridden (0x%x), backside cache is %s\n",
+					*l2cr, ((*l2cr) & 0x80000000) ?
+					"enabled" : "disabled");
 			}
 			of_node_put(np);
 			break;
 		}
 	}
-
-	if (ppc_override_l2cr)
-		printk(KERN_INFO "L2CR overridden (0x%x), "
-		       "backside cache is %s\n",
-		       ppc_override_l2cr_value,
-		       (ppc_override_l2cr_value & 0x80000000)
-				? "enabled" : "disabled");
 }
 #endif
 
-- 
GitLab


From 2330757e0be0acad88852e211dcd6106390a729b Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:34 -0500
Subject: [PATCH 2641/4122] powerpc/pseries: fix the object owners enum value
 in plpks driver

OS_VAR_LINUX enum in PLPKS driver should be 0x02 instead of 0x01.

Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore")
Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-2-nayna@linux.ibm.com
---
 arch/powerpc/platforms/pseries/plpks.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/plpks.h b/arch/powerpc/platforms/pseries/plpks.h
index c6a291367bb13..275ccd86bfb5e 100644
--- a/arch/powerpc/platforms/pseries/plpks.h
+++ b/arch/powerpc/platforms/pseries/plpks.h
@@ -17,7 +17,7 @@
 #define WORLDREADABLE 0x08000000
 #define SIGNEDUPDATE 0x01000000
 
-#define PLPKS_VAR_LINUX	0x01
+#define PLPKS_VAR_LINUX	0x02
 #define PLPKS_VAR_COMMON	0x04
 
 struct plpks_var {
-- 
GitLab


From af223e1728c448073d1e12fe464bf344310edeba Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:35 -0500
Subject: [PATCH 2642/4122] powerpc/pseries: Fix the H_CALL error code in PLPKS
 driver

PAPR Spec defines H_P1 actually as H_PARAMETER and maps H_ABORTED to
a different numerical value.

Fix the error codes as per PAPR Specification.

Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore")
Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-3-nayna@linux.ibm.com
---
 arch/powerpc/include/asm/hvcall.h      | 3 +--
 arch/powerpc/platforms/pseries/plpks.c | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index 8abae463f6c12..95fd7f9485d55 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -79,7 +79,7 @@
 #define H_NOT_ENOUGH_RESOURCES -44
 #define H_R_STATE       -45
 #define H_RESCINDED     -46
-#define H_P1		-54
+#define H_ABORTED	-54
 #define H_P2		-55
 #define H_P3		-56
 #define H_P4		-57
@@ -100,7 +100,6 @@
 #define H_COP_HW	-74
 #define H_STATE		-75
 #define H_IN_USE	-77
-#define H_ABORTED	-78
 #define H_UNSUPPORTED_FLAG_START	-256
 #define H_UNSUPPORTED_FLAG_END		-511
 #define H_MULTI_THREADS_ACTIVE	-9005
diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index f4b5b5a64db3d..32ce4d780d8f5 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -75,7 +75,7 @@ static int pseries_status_to_err(int rc)
 	case H_FUNCTION:
 		err = -ENXIO;
 		break;
-	case H_P1:
+	case H_PARAMETER:
 	case H_P2:
 	case H_P3:
 	case H_P4:
-- 
GitLab


From bb8e4c7cb759b90a04f2e94056b50288ff46a0ed Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:36 -0500
Subject: [PATCH 2643/4122] powerpc/pseries: Return -EIO instead of -EINTR for
 H_ABORTED error

Some commands for eg. "cat" might continue to retry on encountering
EINTR. This is not expected for original error code H_ABORTED.

Map H_ABORTED to more relevant Linux error code EIO.

Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore")
Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-4-nayna@linux.ibm.com
---
 arch/powerpc/platforms/pseries/plpks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index 32ce4d780d8f5..cbea447122ca0 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -111,7 +111,7 @@ static int pseries_status_to_err(int rc)
 		err = -EEXIST;
 		break;
 	case H_ABORTED:
-		err = -EINTR;
+		err = -EIO;
 		break;
 	default:
 		err = -EINVAL;
-- 
GitLab


From 8888ea772972323362660e9a1339175294664a6c Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:37 -0500
Subject: [PATCH 2644/4122] powerpc/pseries: cleanup error logs in plpks driver

Logging H_CALL return codes in PLPKS driver are easy to confuse with
Linux error codes.

Let the caller of the function log the converted linux error code.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-5-nayna@linux.ibm.com
---
 arch/powerpc/platforms/pseries/plpks.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index cbea447122ca0..72d9debf18c0b 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -312,10 +312,6 @@ int plpks_write_var(struct plpks_var var)
 	if (!rc)
 		rc = plpks_confirm_object_flushed(label, auth);
 
-	if (rc)
-		pr_err("Failed to write variable %s for component %s with error %d\n",
-		       var.name, var.component, rc);
-
 	rc = pseries_status_to_err(rc);
 	kfree(label);
 out:
@@ -350,10 +346,6 @@ int plpks_remove_var(char *component, u8 varos, struct plpks_var_name vname)
 	if (!rc)
 		rc = plpks_confirm_object_flushed(label, auth);
 
-	if (rc)
-		pr_err("Failed to remove variable %s for component %s with error %d\n",
-		       vname.name, component, rc);
-
 	rc = pseries_status_to_err(rc);
 	kfree(label);
 out:
@@ -395,8 +387,6 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var)
 			 maxobjsize);
 
 	if (rc != H_SUCCESS) {
-		pr_err("Failed to read variable %s for component %s with error %d\n",
-		       var->name, var->component, rc);
 		rc = pseries_status_to_err(rc);
 		goto out_free_output;
 	}
-- 
GitLab


From 212dd5cfbee7815f3c665a51c501701edb881599 Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:38 -0500
Subject: [PATCH 2645/4122] powerpc/pseries: replace kmalloc with kzalloc in
 PLPKS driver

Replace kmalloc with kzalloc in construct_auth() function to default
initialize structure with zeroes.

Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-6-nayna@linux.ibm.com
---
 arch/powerpc/platforms/pseries/plpks.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index 72d9debf18c0b..e8c02735b7021 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -162,19 +162,15 @@ static struct plpks_auth *construct_auth(u8 consumer)
 	if (consumer > PKS_OS_OWNER)
 		return ERR_PTR(-EINVAL);
 
-	auth = kmalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL);
+	auth = kzalloc(struct_size(auth, password, maxpwsize), GFP_KERNEL);
 	if (!auth)
 		return ERR_PTR(-ENOMEM);
 
 	auth->version = 1;
 	auth->consumer = consumer;
-	auth->rsvd0 = 0;
-	auth->rsvd1 = 0;
 
-	if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER) {
-		auth->passwordlength = 0;
+	if (consumer == PKS_FW_OWNER || consumer == PKS_BOOTLOADER_OWNER)
 		return auth;
-	}
 
 	memcpy(auth->password, ospassword, ospasswordlength);
 
-- 
GitLab


From 1f622f3f80cbf8999ff5955a2fcfbd801a1f32e0 Mon Sep 17 00:00:00 2001
From: Nayna Jain <nayna@linux.ibm.com>
Date: Sun, 6 Nov 2022 15:58:39 -0500
Subject: [PATCH 2646/4122] powerpc/pseries: fix plpks_read_var() code for
 different consumers

Even though plpks_read_var() is currently called to read variables
owned by different consumers, it internally supports only OS consumer.

Fix plpks_read_var() to handle different consumers correctly.

Fixes: 2454a7af0f2a ("powerpc/pseries: define driver for Platform KeyStore")
Signed-off-by: Nayna Jain <nayna@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221106205839.600442-7-nayna@linux.ibm.com
---
 arch/powerpc/platforms/pseries/plpks.c | 28 +++++++++++++++++---------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/plpks.c b/arch/powerpc/platforms/pseries/plpks.c
index e8c02735b7021..4edd1585e2457 100644
--- a/arch/powerpc/platforms/pseries/plpks.c
+++ b/arch/powerpc/platforms/pseries/plpks.c
@@ -354,22 +354,24 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var)
 {
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE] = { 0 };
 	struct plpks_auth *auth;
-	struct label *label;
+	struct label *label = NULL;
 	u8 *output;
 	int rc;
 
 	if (var->namelen > MAX_NAME_SIZE)
 		return -EINVAL;
 
-	auth = construct_auth(PKS_OS_OWNER);
+	auth = construct_auth(consumer);
 	if (IS_ERR(auth))
 		return PTR_ERR(auth);
 
-	label = construct_label(var->component, var->os, var->name,
-				var->namelen);
-	if (IS_ERR(label)) {
-		rc = PTR_ERR(label);
-		goto out_free_auth;
+	if (consumer == PKS_OS_OWNER) {
+		label = construct_label(var->component, var->os, var->name,
+					var->namelen);
+		if (IS_ERR(label)) {
+			rc = PTR_ERR(label);
+			goto out_free_auth;
+		}
 	}
 
 	output = kzalloc(maxobjsize, GFP_KERNEL);
@@ -378,9 +380,15 @@ static int plpks_read_var(u8 consumer, struct plpks_var *var)
 		goto out_free_label;
 	}
 
-	rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
-			 virt_to_phys(label), label->size, virt_to_phys(output),
-			 maxobjsize);
+	if (consumer == PKS_OS_OWNER)
+		rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+				 virt_to_phys(label), label->size, virt_to_phys(output),
+				 maxobjsize);
+	else
+		rc = plpar_hcall(H_PKS_READ_OBJECT, retbuf, virt_to_phys(auth),
+				 virt_to_phys(var->name), var->namelen, virt_to_phys(output),
+				 maxobjsize);
+
 
 	if (rc != H_SUCCESS) {
 		rc = pseries_status_to_err(rc);
-- 
GitLab


From a9ffb8ee7b65a468474d6a2be7e9cca4b8f8ea5f Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 18 Nov 2022 17:40:29 +0800
Subject: [PATCH 2647/4122] powerpc: Use "grep -E" instead of "egrep"

The latest version of grep claims the egrep is now obsolete so the build
now contains warnings that look like:
	egrep: warning: egrep is obsolescent; using grep -E
fix this up by moving the related file to use "grep -E" instead.

  sed -i "s/egrep/grep -E/g" `grep egrep -rwl arch/powerpc`

Here are the steps to install the latest grep:

  wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz
  tar xf grep-3.8.tar.gz
  cd grep-3.8 && ./configure && make
  sudo make install
  export PATH=/usr/local/bin:$PATH

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1668764429-11540-1-git-send-email-yangtiezhu@loongson.cn
---
 arch/powerpc/boot/wrapper | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index a8a87d7667f4e..af04cea82b941 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -594,7 +594,7 @@ ps3)
     # reached, then enter the system reset vector of the partially decompressed
     # image.  No warning is issued.
     rm -f "$odir"/{otheros,otheros-too-big}.bld
-    size=$(${CROSS}nm --no-sort --radix=d "$ofile" | egrep ' _end$' | cut -d' ' -f1)
+    size=$(${CROSS}nm --no-sort --radix=d "$ofile" | grep -E ' _end$' | cut -d' ' -f1)
     bld="otheros.bld"
     if [ $size -gt $((0x1000000)) ]; then
         bld="otheros-too-big.bld"
-- 
GitLab


From 15792642db6946890416a6d1616b03ab25c26fa7 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:54 -0800
Subject: [PATCH 2648/4122] perf stat: Fix cgroup display in JSON output

It missed the 'else' keyword after checking json output mode.

Fixes: 41cb875242e71bf1 ("perf stat: Split print_cgroup() function")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index f5501760ff2e1..46e90f0bb423f 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -175,7 +175,7 @@ static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp)
 
 		if (config->json_output)
 			print_cgroup_json(config, cgrp_name);
-		if (config->csv_output)
+		else if (config->csv_output)
 			print_cgroup_csv(config, cgrp_name);
 		else
 			print_cgroup_std(config, cgrp_name);
-- 
GitLab


From 6d74ed369d4342bb7d4fecbc1cde6061b5bb5604 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:55 -0800
Subject: [PATCH 2649/4122] perf stat: Move summary prefix printing logic in
 CSV output

It matches to the prefix (interval timestamp), so better to have them together.
No functional change intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 46e90f0bb423f..d86f2f8e020d4 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -713,11 +713,6 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 		nl = config->metric_only ? new_line_metric : new_line_std;
 	}
 
-	if (!config->no_csv_summary && config->csv_output &&
-	    config->summary && !config->interval && !config->metric_only) {
-		fprintf(config->output, "%16s%s", "summary", config->csv_sep);
-	}
-
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 		if (config->metric_only) {
 			pm(config, &os, NULL, "", "", 0);
@@ -828,8 +823,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	ena = aggr->counts.ena;
 	run = aggr->counts.run;
 
-	if (prefix && !metric_only)
-		fprintf(output, "%s", prefix);
+	if (!metric_only) {
+		if (prefix)
+			fprintf(output, "%s", prefix);
+		else if (config->summary && config->csv_output &&
+			 !config->no_csv_summary && !config->interval)
+			fprintf(output, "%16s%s", "summary", config->csv_sep);
+	}
 
 	uval = val * counter->scale;
 
-- 
GitLab


From 8e55ae24c08fd5bf39b632df72122a7c2591c03c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:56 -0800
Subject: [PATCH 2650/4122] perf stat: Do not align time prefix in CSV output

We don't care about the alignment in the CSV output as it's intended for machine
processing.  Let's get rid of it to make the output more compact.

Before:

  # perf stat -a --summary -I 1 -x, true
       0.001149309,219.20,msec,cpu-clock,219322251,100.00,219.200,CPUs utilized
       0.001149309,144,,context-switches,219241902,100.00,656.935,/sec
       0.001149309,38,,cpu-migrations,219173705,100.00,173.358,/sec
       0.001149309,61,,page-faults,219093635,100.00,278.285,/sec
       0.001149309,10679310,,cycles,218746228,100.00,0.049,GHz
       0.001149309,6288296,,instructions,218589869,100.00,0.59,insn per cycle
       0.001149309,1386904,,branches,218428851,100.00,6.327,M/sec
       0.001149309,56863,,branch-misses,218219951,100.00,4.10,of all branches
           summary,219.20,msec,cpu-clock,219322251,100.00,20.025,CPUs utilized
           summary,144,,context-switches,219241902,100.00,656.935,/sec
           summary,38,,cpu-migrations,219173705,100.00,173.358,/sec
           summary,61,,page-faults,219093635,100.00,278.285,/sec
           summary,10679310,,cycles,218746228,100.00,0.049,GHz
           summary,6288296,,instructions,218589869,100.00,0.59,insn per cycle
           summary,1386904,,branches,218428851,100.00,6.327,M/sec
           summary,56863,,branch-misses,218219951,100.00,4.10,of all branches

After:

  0.001148449,224.75,msec,cpu-clock,224870589,100.00,224.747,CPUs utilized
  0.001148449,176,,context-switches,224775564,100.00,783.103,/sec
  0.001148449,38,,cpu-migrations,224707428,100.00,169.079,/sec
  0.001148449,61,,page-faults,224629326,100.00,271.416,/sec
  0.001148449,12172071,,cycles,224266368,100.00,0.054,GHz
  0.001148449,6901907,,instructions,224108764,100.00,0.57,insn per cycle
  0.001148449,1515655,,branches,223946693,100.00,6.744,M/sec
  0.001148449,70027,,branch-misses,223735385,100.00,4.62,of all branches
  summary,224.75,msec,cpu-clock,224870589,100.00,21.066,CPUs utilized
  summary,176,,context-switches,224775564,100.00,783.103,/sec
  summary,38,,cpu-migrations,224707428,100.00,169.079,/sec
  summary,61,,page-faults,224629326,100.00,271.416,/sec
  summary,12172071,,cycles,224266368,100.00,0.054,GHz
  summary,6901907,,instructions,224108764,100.00,0.57,insn per cycle
  summary,1515655,,branches,223946693,100.00,6.744,M/sec
  summary,70027,,branch-misses,223735385,100.00,4.62,of all branches

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index d86f2f8e020d4..15c88b9b5aa34 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -828,7 +828,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 			fprintf(output, "%s", prefix);
 		else if (config->summary && config->csv_output &&
 			 !config->no_csv_summary && !config->interval)
-			fprintf(output, "%16s%s", "summary", config->csv_sep);
+			fprintf(output, "%s%s", "summary", config->csv_sep);
 	}
 
 	uval = val * counter->scale;
@@ -1078,9 +1078,12 @@ static void prepare_interval(struct perf_stat_config *config,
 	if (config->iostat_run)
 		return;
 
-	if (!config->json_output)
-		sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec,
+	if (config->csv_output)
+		sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec,
 				 ts->tv_nsec, config->csv_sep);
+	else if (!config->json_output)
+		sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec,
+				 ts->tv_nsec);
 	else if (!config->metric_only)
 		sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
 				 ts->tv_sec, ts->tv_nsec);
-- 
GitLab


From a7ec1dd2d744208ca814ebde8c97f94e041625ef Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:57 -0800
Subject: [PATCH 2651/4122] perf stat: Use scnprintf() in prepare_interval()

It should not use sprintf() anymore.  Let's pass the buffer size and use the
safer scnprintf() instead.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 15c88b9b5aa34..744b7a40f59ad 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1073,23 +1073,23 @@ static void print_metric_headers(struct perf_stat_config *config,
 }
 
 static void prepare_interval(struct perf_stat_config *config,
-			     char *prefix, struct timespec *ts)
+			     char *prefix, size_t len, struct timespec *ts)
 {
 	if (config->iostat_run)
 		return;
 
 	if (config->csv_output)
-		sprintf(prefix, "%lu.%09lu%s", (unsigned long) ts->tv_sec,
-				 ts->tv_nsec, config->csv_sep);
+		scnprintf(prefix, len, "%lu.%09lu%s",
+			  (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
 	else if (!config->json_output)
-		sprintf(prefix, "%6lu.%09lu ", (unsigned long) ts->tv_sec,
-				 ts->tv_nsec);
+		scnprintf(prefix, len, "%6lu.%09lu ",
+			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 	else if (!config->metric_only)
-		sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long)
-				 ts->tv_sec, ts->tv_nsec);
+		scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ",
+			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 	else
-		sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long)
-				 ts->tv_sec, ts->tv_nsec);
+		scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}",
+			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 }
 
 static void print_header_interval_std(struct perf_stat_config *config,
@@ -1390,7 +1390,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 
 	if (interval) {
 		prefix = buf;
-		prepare_interval(config, prefix, ts);
+		prepare_interval(config, buf, sizeof(buf), ts);
 	}
 
 	print_header(config, _target, evlist, argc, argv);
-- 
GitLab


From f123b2d84ecec9a3c551e745f43d36bba1d69e1c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:58 -0800
Subject: [PATCH 2652/4122] perf stat: Remove prefix argument in
 print_metric_headers()

It always passes a whitespace to the function, thus we can just add it to the
function body.  Furthermore, it's only used in the normal output mode.

Well, actually CSV used it but it doesn't need to since we don't care about the
indentation or alignment in the CSV output.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-6-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 744b7a40f59ad..deed6ccf072f5 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -996,10 +996,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 }
 
 static void print_metric_headers_std(struct perf_stat_config *config,
-				     const char *prefix, bool no_indent)
+				     bool no_indent)
 {
-	if (prefix)
-		fprintf(config->output, "%s", prefix);
+	fputc(' ', config->output);
 
 	if (!no_indent) {
 		int len = aggr_header_lens[config->aggr_mode];
@@ -1012,11 +1011,8 @@ static void print_metric_headers_std(struct perf_stat_config *config,
 }
 
 static void print_metric_headers_csv(struct perf_stat_config *config,
-				     const char *prefix,
 				     bool no_indent __maybe_unused)
 {
-	if (prefix)
-		fprintf(config->output, "%s", prefix);
 	if (config->interval)
 		fputs("time,", config->output);
 	if (!config->iostat_run)
@@ -1024,7 +1020,6 @@ static void print_metric_headers_csv(struct perf_stat_config *config,
 }
 
 static void print_metric_headers_json(struct perf_stat_config *config,
-				      const char *prefix __maybe_unused,
 				      bool no_indent __maybe_unused)
 {
 	if (config->interval)
@@ -1032,8 +1027,7 @@ static void print_metric_headers_json(struct perf_stat_config *config,
 }
 
 static void print_metric_headers(struct perf_stat_config *config,
-				 struct evlist *evlist,
-				 const char *prefix, bool no_indent)
+				 struct evlist *evlist, bool no_indent)
 {
 	struct evsel *counter;
 	struct outstate os = {
@@ -1047,11 +1041,11 @@ static void print_metric_headers(struct perf_stat_config *config,
 	};
 
 	if (config->json_output)
-		print_metric_headers_json(config, prefix, no_indent);
+		print_metric_headers_json(config, no_indent);
 	else if (config->csv_output)
-		print_metric_headers_csv(config, prefix, no_indent);
+		print_metric_headers_csv(config, no_indent);
 	else
-		print_metric_headers_std(config, prefix, no_indent);
+		print_metric_headers_std(config, no_indent);
 
 	if (config->iostat_run)
 		iostat_print_header_prefix(config);
@@ -1132,7 +1126,7 @@ static void print_header_interval_std(struct perf_stat_config *config,
 	}
 
 	if (config->metric_only)
-		print_metric_headers(config, evlist, " ", true);
+		print_metric_headers(config, evlist, true);
 	else
 		fprintf(output, " %*s %*s events\n",
 			COUNTS_LEN, "counts", config->unit_width, "unit");
@@ -1168,7 +1162,7 @@ static void print_header_std(struct perf_stat_config *config,
 	fprintf(output, ":\n\n");
 
 	if (config->metric_only)
-		print_metric_headers(config, evlist, " ", false);
+		print_metric_headers(config, evlist, false);
 }
 
 static void print_header_csv(struct perf_stat_config *config,
@@ -1178,7 +1172,7 @@ static void print_header_csv(struct perf_stat_config *config,
 			     const char **argv __maybe_unused)
 {
 	if (config->metric_only)
-		print_metric_headers(config, evlist, " ", true);
+		print_metric_headers(config, evlist, true);
 }
 static void print_header_json(struct perf_stat_config *config,
 			      struct target *_target __maybe_unused,
@@ -1187,7 +1181,7 @@ static void print_header_json(struct perf_stat_config *config,
 			      const char **argv __maybe_unused)
 {
 	if (config->metric_only)
-		print_metric_headers(config, evlist, " ", true);
+		print_metric_headers(config, evlist, true);
 }
 
 static void print_header(struct perf_stat_config *config,
-- 
GitLab


From ce551ec923445b821893bddfb13c116d7e8fe454 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:01:59 -0800
Subject: [PATCH 2653/4122] perf stat: Remove metric_only argument in
 print_counter_aggrdata()

It already passes the stat_config argument, then it can find the value in the
config.  No need to pass it separately.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-7-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index deed6ccf072f5..b8432c0a0ec33 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun
 
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
-				   char *prefix, bool metric_only)
+				   char *prefix)
 {
 	FILE *output = config->output;
 	u64 ena, run, val;
@@ -813,6 +813,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	struct perf_stat_aggr *aggr = &ps->aggr[s];
 	struct aggr_cpu_id id = config->aggr_map->map[s];
 	double avg = aggr->counts.val;
+	bool metric_only = config->metric_only;
 
 	if (counter->supported && aggr->nr == 0)
 		return;
@@ -875,7 +876,6 @@ static void print_aggr(struct perf_stat_config *config,
 		       struct evlist *evlist,
 		       char *prefix)
 {
-	bool metric_only = config->metric_only;
 	struct evsel *counter;
 	int s;
 
@@ -893,8 +893,7 @@ static void print_aggr(struct perf_stat_config *config,
 			if (counter->merged_stat)
 				continue;
 
-			print_counter_aggrdata(config, counter, s, prefix,
-					       metric_only);
+			print_counter_aggrdata(config, counter, s, prefix);
 		}
 		print_metric_end(config);
 	}
@@ -904,7 +903,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 			      struct evlist *evlist,
 			      char *prefix)
 {
-	bool metric_only = config->metric_only;
 	struct evsel *counter, *evsel;
 	struct cgroup *cgrp = NULL;
 	int s;
@@ -928,8 +926,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 				if (counter->cgrp != cgrp)
 					continue;
 
-				print_counter_aggrdata(config, counter, s, prefix,
-						       metric_only);
+				print_counter_aggrdata(config, counter, s, prefix);
 			}
 			print_metric_end(config);
 		}
@@ -939,7 +936,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 static void print_counter(struct perf_stat_config *config,
 			  struct evsel *counter, char *prefix)
 {
-	bool metric_only = config->metric_only;
 	int s;
 
 	/* AGGR_THREAD doesn't have config->aggr_get_id */
@@ -950,8 +946,7 @@ static void print_counter(struct perf_stat_config *config,
 		return;
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_counter_aggrdata(config, counter, s, prefix,
-				       metric_only);
+		print_counter_aggrdata(config, counter, s, prefix);
 	}
 }
 
@@ -1339,7 +1334,7 @@ static void print_percore(struct perf_stat_config *config,
 		if (found)
 			continue;
 
-		print_counter_aggrdata(config, counter, s, prefix, metric_only);
+		print_counter_aggrdata(config, counter, s, prefix);
 
 		core_map->map[c++] = core_id;
 	}
-- 
GitLab


From 991991ab99635d9e368f9671fa8c30ec1113042c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:00 -0800
Subject: [PATCH 2654/4122] perf stat: Pass const char *prefix to display
 routines

This is a minor cleanup and preparation for the later change.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-8-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/iostat.h       |  2 +-
 tools/perf/util/stat-display.c | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h
index 23c1c46a331ad..c22688f87cb2d 100644
--- a/tools/perf/util/iostat.h
+++ b/tools/perf/util/iostat.h
@@ -28,7 +28,7 @@ enum iostat_mode_t {
 
 extern enum iostat_mode_t iostat_mode;
 
-typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, char *);
+typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *);
 
 int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config);
 int iostat_parse(const struct option *opt, const char *str,
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b8432c0a0ec33..d2894a519d61b 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -675,7 +675,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
 
 static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
 		     struct evsel *counter, double uval,
-		     char *prefix, u64 run, u64 ena, double noise,
+		     const char *prefix, u64 run, u64 ena, double noise,
 		     struct runtime_stat *st, int map_idx)
 {
 	struct perf_stat_output_ctx out;
@@ -804,7 +804,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun
 
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
-				   char *prefix)
+				   const char *prefix)
 {
 	FILE *output = config->output;
 	u64 ena, run, val;
@@ -843,7 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 static void print_metric_begin(struct perf_stat_config *config,
 			       struct evlist *evlist,
-			       char *prefix, int aggr_idx,
+			       const char *prefix, int aggr_idx,
 			       struct cgroup *cgrp)
 {
 	struct perf_stat_aggr *aggr;
@@ -874,7 +874,7 @@ static void print_metric_end(struct perf_stat_config *config)
 
 static void print_aggr(struct perf_stat_config *config,
 		       struct evlist *evlist,
-		       char *prefix)
+		       const char *prefix)
 {
 	struct evsel *counter;
 	int s;
@@ -901,7 +901,7 @@ static void print_aggr(struct perf_stat_config *config,
 
 static void print_aggr_cgroup(struct perf_stat_config *config,
 			      struct evlist *evlist,
-			      char *prefix)
+			      const char *prefix)
 {
 	struct evsel *counter, *evsel;
 	struct cgroup *cgrp = NULL;
@@ -934,7 +934,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 }
 
 static void print_counter(struct perf_stat_config *config,
-			  struct evsel *counter, char *prefix)
+			  struct evsel *counter, const char *prefix)
 {
 	int s;
 
@@ -952,7 +952,7 @@ static void print_counter(struct perf_stat_config *config,
 
 static void print_no_aggr_metric(struct perf_stat_config *config,
 				 struct evlist *evlist,
-				 char *prefix)
+				 const char *prefix)
 {
 	int all_idx;
 	struct perf_cpu cpu;
@@ -1301,7 +1301,7 @@ static void print_footer(struct perf_stat_config *config)
 }
 
 static void print_percore(struct perf_stat_config *config,
-			  struct evsel *counter, char *prefix)
+			  struct evsel *counter, const char *prefix)
 {
 	bool metric_only = config->metric_only;
 	FILE *output = config->output;
@@ -1345,7 +1345,7 @@ static void print_percore(struct perf_stat_config *config,
 }
 
 static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
-				 char *prefix)
+				 const char *prefix)
 {
 	struct cgroup *cgrp = NULL;
 	struct evsel *counter;
-- 
GitLab


From 92ccf7f11d68fa55dc82cb7ae01043b3691918cf Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:01 -0800
Subject: [PATCH 2655/4122] perf stat: Use 'struct outstate' in
 evlist__print_counters()

This is a preparation for the later cleanup.  No functional changes
intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-9-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index d2894a519d61b..70aebf359e163 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -1372,13 +1372,16 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	bool metric_only = config->metric_only;
 	int interval = config->interval;
 	struct evsel *counter;
-	char buf[64], *prefix = NULL;
+	char buf[64];
+	struct outstate os = {
+		.fh = config->output,
+	};
 
 	if (config->iostat_run)
 		evlist->selected = evlist__first(evlist);
 
 	if (interval) {
-		prefix = buf;
+		os.prefix = buf;
 		prepare_interval(config, buf, sizeof(buf), ts);
 	}
 
@@ -1390,35 +1393,35 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	case AGGR_SOCKET:
 	case AGGR_NODE:
 		if (config->cgroup_list)
-			print_aggr_cgroup(config, evlist, prefix);
+			print_aggr_cgroup(config, evlist, os.prefix);
 		else
-			print_aggr(config, evlist, prefix);
+			print_aggr(config, evlist, os.prefix);
 		break;
 	case AGGR_THREAD:
 	case AGGR_GLOBAL:
 		if (config->iostat_run) {
-			iostat_print_counters(evlist, config, ts, prefix = buf,
+			iostat_print_counters(evlist, config, ts, buf,
 					      print_counter);
 		} else if (config->cgroup_list) {
-			print_cgroup_counter(config, evlist, prefix);
+			print_cgroup_counter(config, evlist, os.prefix);
 		} else {
-			print_metric_begin(config, evlist, prefix,
+			print_metric_begin(config, evlist, os.prefix,
 					   /*aggr_idx=*/0, /*cgrp=*/NULL);
 			evlist__for_each_entry(evlist, counter) {
-				print_counter(config, counter, prefix);
+				print_counter(config, counter, os.prefix);
 			}
 			print_metric_end(config);
 		}
 		break;
 	case AGGR_NONE:
 		if (metric_only)
-			print_no_aggr_metric(config, evlist, prefix);
+			print_no_aggr_metric(config, evlist, os.prefix);
 		else {
 			evlist__for_each_entry(evlist, counter) {
 				if (counter->percore)
-					print_percore(config, counter, prefix);
+					print_percore(config, counter, os.prefix);
 				else
-					print_counter(config, counter, prefix);
+					print_counter(config, counter, os.prefix);
 			}
 		}
 		break;
-- 
GitLab


From 922ae948c429a9b396761f2071eebf6a19688dd2 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:02 -0800
Subject: [PATCH 2656/4122] perf stat: Pass 'struct outstate' to
 print_metric_begin()

It passes prefix and cgroup pointers but the outstate already has them.
Let's pass the outstate pointer instead.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-10-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 50 +++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 70aebf359e163..3ed63061d6f82 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -843,8 +843,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 static void print_metric_begin(struct perf_stat_config *config,
 			       struct evlist *evlist,
-			       const char *prefix, int aggr_idx,
-			       struct cgroup *cgrp)
+			       struct outstate *os, int aggr_idx)
 {
 	struct perf_stat_aggr *aggr;
 	struct aggr_cpu_id id;
@@ -853,15 +852,15 @@ static void print_metric_begin(struct perf_stat_config *config,
 	if (!config->metric_only)
 		return;
 
-	if (prefix)
-		fprintf(config->output, "%s", prefix);
+	if (os->prefix)
+		fprintf(config->output, "%s", os->prefix);
 
 	evsel = evlist__first(evlist);
 	id = config->aggr_map->map[aggr_idx];
 	aggr = &evsel->stats->aggr[aggr_idx];
 	aggr_printout(config, evsel, id, aggr->nr);
 
-	print_cgroup(config, cgrp);
+	print_cgroup(config, os->cgrp);
 }
 
 static void print_metric_end(struct perf_stat_config *config)
@@ -877,6 +876,9 @@ static void print_aggr(struct perf_stat_config *config,
 		       const char *prefix)
 {
 	struct evsel *counter;
+	struct outstate os = {
+		.prefix = prefix,
+	};
 	int s;
 
 	if (!config->aggr_map || !config->aggr_get_id)
@@ -887,7 +889,7 @@ static void print_aggr(struct perf_stat_config *config,
 	 * Without each counter has its own line.
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL);
+		print_metric_begin(config, evlist, &os, s);
 
 		evlist__for_each_entry(evlist, counter) {
 			if (counter->merged_stat)
@@ -904,26 +906,28 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 			      const char *prefix)
 {
 	struct evsel *counter, *evsel;
-	struct cgroup *cgrp = NULL;
+	struct outstate os = {
+		.prefix = prefix,
+	};
 	int s;
 
 	if (!config->aggr_map || !config->aggr_get_id)
 		return;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (cgrp == evsel->cgrp)
+		if (os.cgrp == evsel->cgrp)
 			continue;
 
-		cgrp = evsel->cgrp;
+		os.cgrp = evsel->cgrp;
 
 		for (s = 0; s < config->aggr_map->nr; s++) {
-			print_metric_begin(config, evlist, prefix, s, cgrp);
+			print_metric_begin(config, evlist, &os, s);
 
 			evlist__for_each_entry(evlist, counter) {
 				if (counter->merged_stat)
 					continue;
 
-				if (counter->cgrp != cgrp)
+				if (counter->cgrp != os.cgrp)
 					continue;
 
 				print_counter_aggrdata(config, counter, s, prefix);
@@ -956,6 +960,9 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 {
 	int all_idx;
 	struct perf_cpu cpu;
+	struct outstate os = {
+		.prefix = prefix,
+	};
 
 	perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) {
 		struct evsel *counter;
@@ -973,8 +980,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 
 			id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
-				print_metric_begin(config, evlist, prefix,
-						   counter_idx, /*cgrp=*/NULL);
+				print_metric_begin(config, evlist, &os, counter_idx);
 				first = false;
 			}
 			val = ps->aggr[counter_idx].counts.val;
@@ -1347,22 +1353,23 @@ static void print_percore(struct perf_stat_config *config,
 static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
 				 const char *prefix)
 {
-	struct cgroup *cgrp = NULL;
 	struct evsel *counter;
+	struct outstate os = {
+		.prefix = prefix,
+	};
 
 	evlist__for_each_entry(evlist, counter) {
-		if (cgrp != counter->cgrp) {
-			if (cgrp != NULL)
+		if (os.cgrp != counter->cgrp) {
+			if (os.cgrp != NULL)
 				print_metric_end(config);
 
-			cgrp = counter->cgrp;
-			print_metric_begin(config, evlist, prefix,
-					   /*aggr_idx=*/0, cgrp);
+			os.cgrp = counter->cgrp;
+			print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
 		}
 
 		print_counter(config, counter, prefix);
 	}
-	if (cgrp)
+	if (os.cgrp)
 		print_metric_end(config);
 }
 
@@ -1405,8 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 		} else if (config->cgroup_list) {
 			print_cgroup_counter(config, evlist, os.prefix);
 		} else {
-			print_metric_begin(config, evlist, os.prefix,
-					   /*aggr_idx=*/0, /*cgrp=*/NULL);
+			print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
 			evlist__for_each_entry(evlist, counter) {
 				print_counter(config, counter, os.prefix);
 			}
-- 
GitLab


From e7f4da312259e61877ae8e26d216993c4128bddc Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:03 -0800
Subject: [PATCH 2657/4122] perf stat: Pass struct outstate to printout()

The printout() takes a lot of arguments and sets an outstate with the
value.  Instead, we can fill the outstate first and then pass it to
reduce the number of arguments.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-11-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 38 ++++++++++++++++------------------
 1 file changed, 18 insertions(+), 20 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 3ed63061d6f82..dd190f71e9333 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -673,22 +673,15 @@ static bool is_mixed_hw_group(struct evsel *counter)
 	return false;
 }
 
-static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr,
-		     struct evsel *counter, double uval,
-		     const char *prefix, u64 run, u64 ena, double noise,
+static void printout(struct perf_stat_config *config, struct outstate *os,
+		     double uval, u64 run, u64 ena, double noise,
 		     struct runtime_stat *st, int map_idx)
 {
 	struct perf_stat_output_ctx out;
-	struct outstate os = {
-		.fh = config->output,
-		.prefix = prefix ? prefix : "",
-		.id = id,
-		.nr = nr,
-		.evsel = counter,
-	};
 	print_metric_t pm;
 	new_line_t nl;
 	bool ok = true;
+	struct evsel *counter = os->evsel;
 
 	if (config->csv_output) {
 		static const int aggr_fields[AGGR_MAX] = {
@@ -704,7 +697,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 
 		pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
 		nl = config->metric_only ? new_line_metric : new_line_csv;
-		os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
+		os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
 	} else if (config->json_output) {
 		pm = config->metric_only ? print_metric_only_json : print_metric_json;
 		nl = config->metric_only ? new_line_metric : new_line_json;
@@ -715,7 +708,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 
 	if (run == 0 || ena == 0 || counter->counts->scaled == -1) {
 		if (config->metric_only) {
-			pm(config, &os, NULL, "", "", 0);
+			pm(config, os, NULL, "", "", 0);
 			return;
 		}
 
@@ -732,11 +725,11 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int
 
 	out.print_metric = pm;
 	out.new_line = nl;
-	out.ctx = &os;
+	out.ctx = os;
 	out.force_header = false;
 
 	if (!config->metric_only) {
-		abs_printout(config, id, nr, counter, uval, ok);
+		abs_printout(config, os->id, os->nr, counter, uval, ok);
 
 		print_noise(config, counter, noise, /*before_metric=*/true);
 		print_running(config, run, ena, /*before_metric=*/true);
@@ -814,6 +807,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	struct aggr_cpu_id id = config->aggr_map->map[s];
 	double avg = aggr->counts.val;
 	bool metric_only = config->metric_only;
+	struct outstate os = {
+		.fh = config->output,
+		.prefix = prefix ? prefix : "",
+		.id = id,
+		.nr = aggr->nr,
+		.evsel = counter,
+	};
 
 	if (counter->supported && aggr->nr == 0)
 		return;
@@ -834,8 +834,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 	uval = val * counter->scale;
 
-	printout(config, id, aggr->nr, counter, uval,
-		 prefix, run, ena, avg, &rt_stat, s);
+	printout(config, &os, uval, run, ena, avg, &rt_stat, s);
 
 	if (!metric_only)
 		fputc('\n', output);
@@ -971,14 +970,14 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 		evlist__for_each_entry(evlist, counter) {
 			u64 ena, run, val;
 			double uval;
-			struct aggr_cpu_id id;
 			struct perf_stat_evsel *ps = counter->stats;
 			int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu);
 
 			if (counter_idx < 0)
 				continue;
 
-			id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+			os.evsel = counter;
+			os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
 				print_metric_begin(config, evlist, &os, counter_idx);
 				first = false;
@@ -988,8 +987,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			run = ps->aggr[counter_idx].counts.run;
 
 			uval = val * counter->scale;
-			printout(config, id, 0, counter, uval, prefix,
-				 run, ena, 1.0, &rt_stat, counter_idx);
+			printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx);
 		}
 		if (!first)
 			print_metric_end(config);
-- 
GitLab


From 01577597493dc8bded8a5880fbf84a6d5bf13f1b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:04 -0800
Subject: [PATCH 2658/4122] perf stat: Do not pass runtime_stat to printout()

It always passes a pointer to rt_stat as it's the only one.  Let's not
pass it and directly refer it in the printout().

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-12-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index dd190f71e9333..cdf4ca7f6e3a4 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -674,8 +674,7 @@ static bool is_mixed_hw_group(struct evsel *counter)
 }
 
 static void printout(struct perf_stat_config *config, struct outstate *os,
-		     double uval, u64 run, u64 ena, double noise,
-		     struct runtime_stat *st, int map_idx)
+		     double uval, u64 run, u64 ena, double noise, int map_idx)
 {
 	struct perf_stat_output_ctx out;
 	print_metric_t pm;
@@ -737,7 +736,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 
 	if (ok) {
 		perf_stat__print_shadow_stats(config, counter, uval, map_idx,
-					      &out, &config->metric_events, st);
+					      &out, &config->metric_events, &rt_stat);
 	} else {
 		pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
 	}
@@ -834,7 +833,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 	uval = val * counter->scale;
 
-	printout(config, &os, uval, run, ena, avg, &rt_stat, s);
+	printout(config, &os, uval, run, ena, avg, s);
 
 	if (!metric_only)
 		fputc('\n', output);
@@ -987,7 +986,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			run = ps->aggr[counter_idx].counts.run;
 
 			uval = val * counter->scale;
-			printout(config, &os, uval, run, ena, 1.0, &rt_stat, counter_idx);
+			printout(config, &os, uval, run, ena, 1.0, counter_idx);
 		}
 		if (!first)
 			print_metric_end(config);
-- 
GitLab


From 5f334d88c25e0dbdbc199ad38becc5cc5aa33081 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:05 -0800
Subject: [PATCH 2659/4122] perf stat: Pass through 'struct outstate'

Now most of the print functions take a pointer to the struct outstate.
We have one in the evlist__print_counters() and pass it through the
child functions.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-13-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/x86/util/iostat.c |   4 +-
 tools/perf/util/iostat.c          |   3 +-
 tools/perf/util/iostat.h          |   4 +-
 tools/perf/util/stat-display.c    | 102 +++++++++++++-----------------
 4 files changed, 50 insertions(+), 63 deletions(-)

diff --git a/tools/perf/arch/x86/util/iostat.c b/tools/perf/arch/x86/util/iostat.c
index 404de795ec0bf..7eb0a7b00b95a 100644
--- a/tools/perf/arch/x86/util/iostat.c
+++ b/tools/perf/arch/x86/util/iostat.c
@@ -449,7 +449,7 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
 
 void iostat_print_counters(struct evlist *evlist,
 			   struct perf_stat_config *config, struct timespec *ts,
-			   char *prefix, iostat_print_counter_t print_cnt_cb)
+			   char *prefix, iostat_print_counter_t print_cnt_cb, void *arg)
 {
 	void *perf_device = NULL;
 	struct evsel *counter = evlist__first(evlist);
@@ -464,7 +464,7 @@ void iostat_print_counters(struct evlist *evlist,
 			iostat_prefix(evlist, config, prefix, ts);
 			fprintf(config->output, "\n%s", prefix);
 		}
-		print_cnt_cb(config, counter, prefix);
+		print_cnt_cb(config, counter, arg);
 	}
 	fputc('\n', config->output);
 }
diff --git a/tools/perf/util/iostat.c b/tools/perf/util/iostat.c
index 57dd49da28fe4..b770bd473af71 100644
--- a/tools/perf/util/iostat.c
+++ b/tools/perf/util/iostat.c
@@ -48,6 +48,7 @@ __weak void iostat_print_counters(struct evlist *evlist __maybe_unused,
 				  struct perf_stat_config *config __maybe_unused,
 				  struct timespec *ts __maybe_unused,
 				  char *prefix __maybe_unused,
-				  iostat_print_counter_t print_cnt_cb __maybe_unused)
+				  iostat_print_counter_t print_cnt_cb __maybe_unused,
+				  void *arg __maybe_unused)
 {
 }
diff --git a/tools/perf/util/iostat.h b/tools/perf/util/iostat.h
index c22688f87cb2d..a4e7299c5c2fb 100644
--- a/tools/perf/util/iostat.h
+++ b/tools/perf/util/iostat.h
@@ -28,7 +28,7 @@ enum iostat_mode_t {
 
 extern enum iostat_mode_t iostat_mode;
 
-typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, const char *);
+typedef void (*iostat_print_counter_t)(struct perf_stat_config *, struct evsel *, void *);
 
 int iostat_prepare(struct evlist *evlist, struct perf_stat_config *config);
 int iostat_parse(const struct option *opt, const char *str,
@@ -42,6 +42,6 @@ void iostat_print_metric(struct perf_stat_config *config, struct evsel *evsel,
 			 struct perf_stat_output_ctx *out);
 void iostat_print_counters(struct evlist *evlist,
 			   struct perf_stat_config *config, struct timespec *ts,
-			   char *prefix, iostat_print_counter_t print_cnt_cb);
+			   char *prefix, iostat_print_counter_t print_cnt_cb, void *arg);
 
 #endif /* _IOSTAT_H */
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index cdf4ca7f6e3a4..335627e8542dc 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -796,7 +796,7 @@ static void uniquify_counter(struct perf_stat_config *config, struct evsel *coun
 
 static void print_counter_aggrdata(struct perf_stat_config *config,
 				   struct evsel *counter, int s,
-				   const char *prefix)
+				   struct outstate *os)
 {
 	FILE *output = config->output;
 	u64 ena, run, val;
@@ -806,13 +806,10 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	struct aggr_cpu_id id = config->aggr_map->map[s];
 	double avg = aggr->counts.val;
 	bool metric_only = config->metric_only;
-	struct outstate os = {
-		.fh = config->output,
-		.prefix = prefix ? prefix : "",
-		.id = id,
-		.nr = aggr->nr,
-		.evsel = counter,
-	};
+
+	os->id = id;
+	os->nr = aggr->nr;
+	os->evsel = counter;
 
 	if (counter->supported && aggr->nr == 0)
 		return;
@@ -824,8 +821,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	run = aggr->counts.run;
 
 	if (!metric_only) {
-		if (prefix)
-			fprintf(output, "%s", prefix);
+		if (os->prefix)
+			fprintf(output, "%s", os->prefix);
 		else if (config->summary && config->csv_output &&
 			 !config->no_csv_summary && !config->interval)
 			fprintf(output, "%s%s", "summary", config->csv_sep);
@@ -833,7 +830,7 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 
 	uval = val * counter->scale;
 
-	printout(config, &os, uval, run, ena, avg, s);
+	printout(config, os, uval, run, ena, avg, s);
 
 	if (!metric_only)
 		fputc('\n', output);
@@ -871,12 +868,9 @@ static void print_metric_end(struct perf_stat_config *config)
 
 static void print_aggr(struct perf_stat_config *config,
 		       struct evlist *evlist,
-		       const char *prefix)
+		       struct outstate *os)
 {
 	struct evsel *counter;
-	struct outstate os = {
-		.prefix = prefix,
-	};
 	int s;
 
 	if (!config->aggr_map || !config->aggr_get_id)
@@ -887,13 +881,13 @@ static void print_aggr(struct perf_stat_config *config,
 	 * Without each counter has its own line.
 	 */
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_metric_begin(config, evlist, &os, s);
+		print_metric_begin(config, evlist, os, s);
 
 		evlist__for_each_entry(evlist, counter) {
 			if (counter->merged_stat)
 				continue;
 
-			print_counter_aggrdata(config, counter, s, prefix);
+			print_counter_aggrdata(config, counter, s, os);
 		}
 		print_metric_end(config);
 	}
@@ -901,34 +895,31 @@ static void print_aggr(struct perf_stat_config *config,
 
 static void print_aggr_cgroup(struct perf_stat_config *config,
 			      struct evlist *evlist,
-			      const char *prefix)
+			      struct outstate *os)
 {
 	struct evsel *counter, *evsel;
-	struct outstate os = {
-		.prefix = prefix,
-	};
 	int s;
 
 	if (!config->aggr_map || !config->aggr_get_id)
 		return;
 
 	evlist__for_each_entry(evlist, evsel) {
-		if (os.cgrp == evsel->cgrp)
+		if (os->cgrp == evsel->cgrp)
 			continue;
 
-		os.cgrp = evsel->cgrp;
+		os->cgrp = evsel->cgrp;
 
 		for (s = 0; s < config->aggr_map->nr; s++) {
-			print_metric_begin(config, evlist, &os, s);
+			print_metric_begin(config, evlist, os, s);
 
 			evlist__for_each_entry(evlist, counter) {
 				if (counter->merged_stat)
 					continue;
 
-				if (counter->cgrp != os.cgrp)
+				if (counter->cgrp != os->cgrp)
 					continue;
 
-				print_counter_aggrdata(config, counter, s, prefix);
+				print_counter_aggrdata(config, counter, s, os);
 			}
 			print_metric_end(config);
 		}
@@ -936,7 +927,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 }
 
 static void print_counter(struct perf_stat_config *config,
-			  struct evsel *counter, const char *prefix)
+			  struct evsel *counter, struct outstate *os)
 {
 	int s;
 
@@ -948,19 +939,16 @@ static void print_counter(struct perf_stat_config *config,
 		return;
 
 	for (s = 0; s < config->aggr_map->nr; s++) {
-		print_counter_aggrdata(config, counter, s, prefix);
+		print_counter_aggrdata(config, counter, s, os);
 	}
 }
 
 static void print_no_aggr_metric(struct perf_stat_config *config,
 				 struct evlist *evlist,
-				 const char *prefix)
+				 struct outstate *os)
 {
 	int all_idx;
 	struct perf_cpu cpu;
-	struct outstate os = {
-		.prefix = prefix,
-	};
 
 	perf_cpu_map__for_each_cpu(cpu, all_idx, evlist->core.user_requested_cpus) {
 		struct evsel *counter;
@@ -975,10 +963,10 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			if (counter_idx < 0)
 				continue;
 
-			os.evsel = counter;
-			os.id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
+			os->evsel = counter;
+			os->id = aggr_cpu_id__cpu(cpu, /*data=*/NULL);
 			if (first) {
-				print_metric_begin(config, evlist, &os, counter_idx);
+				print_metric_begin(config, evlist, os, counter_idx);
 				first = false;
 			}
 			val = ps->aggr[counter_idx].counts.val;
@@ -986,7 +974,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			run = ps->aggr[counter_idx].counts.run;
 
 			uval = val * counter->scale;
-			printout(config, &os, uval, run, ena, 1.0, counter_idx);
+			printout(config, os, uval, run, ena, 1.0, counter_idx);
 		}
 		if (!first)
 			print_metric_end(config);
@@ -1304,7 +1292,7 @@ static void print_footer(struct perf_stat_config *config)
 }
 
 static void print_percore(struct perf_stat_config *config,
-			  struct evsel *counter, const char *prefix)
+			  struct evsel *counter, struct outstate *os)
 {
 	bool metric_only = config->metric_only;
 	FILE *output = config->output;
@@ -1315,7 +1303,7 @@ static void print_percore(struct perf_stat_config *config,
 		return;
 
 	if (config->percore_show_thread)
-		return print_counter(config, counter, prefix);
+		return print_counter(config, counter, os);
 
 	core_map = cpu_aggr_map__empty_new(config->aggr_map->nr);
 	if (core_map == NULL) {
@@ -1337,7 +1325,7 @@ static void print_percore(struct perf_stat_config *config,
 		if (found)
 			continue;
 
-		print_counter_aggrdata(config, counter, s, prefix);
+		print_counter_aggrdata(config, counter, s, os);
 
 		core_map->map[c++] = core_id;
 	}
@@ -1348,30 +1336,28 @@ static void print_percore(struct perf_stat_config *config,
 }
 
 static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist,
-				 const char *prefix)
+				 struct outstate *os)
 {
 	struct evsel *counter;
-	struct outstate os = {
-		.prefix = prefix,
-	};
 
 	evlist__for_each_entry(evlist, counter) {
-		if (os.cgrp != counter->cgrp) {
-			if (os.cgrp != NULL)
+		if (os->cgrp != counter->cgrp) {
+			if (os->cgrp != NULL)
 				print_metric_end(config);
 
-			os.cgrp = counter->cgrp;
-			print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
+			os->cgrp = counter->cgrp;
+			print_metric_begin(config, evlist, os, /*aggr_idx=*/0);
 		}
 
-		print_counter(config, counter, prefix);
+		print_counter(config, counter, os);
 	}
-	if (os.cgrp)
+	if (os->cgrp)
 		print_metric_end(config);
 }
 
 void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
-			    struct target *_target, struct timespec *ts, int argc, const char **argv)
+			    struct target *_target, struct timespec *ts,
+			    int argc, const char **argv)
 {
 	bool metric_only = config->metric_only;
 	int interval = config->interval;
@@ -1397,34 +1383,34 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	case AGGR_SOCKET:
 	case AGGR_NODE:
 		if (config->cgroup_list)
-			print_aggr_cgroup(config, evlist, os.prefix);
+			print_aggr_cgroup(config, evlist, &os);
 		else
-			print_aggr(config, evlist, os.prefix);
+			print_aggr(config, evlist, &os);
 		break;
 	case AGGR_THREAD:
 	case AGGR_GLOBAL:
 		if (config->iostat_run) {
 			iostat_print_counters(evlist, config, ts, buf,
-					      print_counter);
+					      (iostat_print_counter_t)print_counter, &os);
 		} else if (config->cgroup_list) {
-			print_cgroup_counter(config, evlist, os.prefix);
+			print_cgroup_counter(config, evlist, &os);
 		} else {
 			print_metric_begin(config, evlist, &os, /*aggr_idx=*/0);
 			evlist__for_each_entry(evlist, counter) {
-				print_counter(config, counter, os.prefix);
+				print_counter(config, counter, &os);
 			}
 			print_metric_end(config);
 		}
 		break;
 	case AGGR_NONE:
 		if (metric_only)
-			print_no_aggr_metric(config, evlist, os.prefix);
+			print_no_aggr_metric(config, evlist, &os);
 		else {
 			evlist__for_each_entry(evlist, counter) {
 				if (counter->percore)
-					print_percore(config, counter, os.prefix);
+					print_percore(config, counter, &os);
 				else
-					print_counter(config, counter, os.prefix);
+					print_counter(config, counter, &os);
 			}
 		}
 		break;
-- 
GitLab


From ab6baaae273572909f1e957fc27a9459fc95dd8c Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:06 -0800
Subject: [PATCH 2660/4122] perf stat: Fix JSON output in metric-only mode

It generated a broken JSON output when aggregation mode or cgroup is
used with --metric-only option.  Also get rid of the header line and
make the output single line for each entry.

It needs to know whether the current metric is the first one or not.
So add 'first' field in the outstate and mark it false after printing.

Before:

  # perf stat -a -j --metric-only true
  {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"}
  {{"metric-value" : "0.797"}{"metric-value" : "1.65"}{"metric-value" : "0.89"}
   ^

  # perf stat -a -j --metric-only --per-socket true
  {"unit" : "GHz"}{"unit" : "insn per cycle"}{"unit" : "branch-misses of all branches"}
  {"socket" : "S0", "aggregate-number" : 8, {"metric-value" : "0.295"}{"metric-value" : "1.88"}{"metric-value" : "0.64"}
                                           ^

After:

  # perf stat -a -j --metric-only true
  {"GHz" : "0.990", "insn per cycle" : "2.06", "branch-misses of all branches" : "0.59"}

  # perf stat -a -j --metric-only --per-socket true
  {"socket" : "S0", "aggregate-number" : 8, "GHz" : "0.439", "insn per cycle" : "2.14", "branch-misses of all branches" : "0.51"}

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-14-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 42 +++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 335627e8542dc..43640115454cf 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -279,9 +279,6 @@ static void print_aggr_id_json(struct perf_stat_config *config,
 {
 	FILE *output = config->output;
 
-	if (!config->interval)
-		fputc('{', output);
-
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
 		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
@@ -335,6 +332,7 @@ static void aggr_printout(struct perf_stat_config *config,
 struct outstate {
 	FILE *fh;
 	bool newline;
+	bool first;
 	const char *prefix;
 	int  nfields;
 	int  nr;
@@ -491,6 +489,7 @@ static void print_metric_only(struct perf_stat_config *config,
 
 	color_snprintf(str, sizeof(str), color ?: "", fmt, val);
 	fprintf(out, "%*s ", mlen, str);
+	os->first = false;
 }
 
 static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused,
@@ -512,6 +511,7 @@ static void print_metric_only_csv(struct perf_stat_config *config __maybe_unused
 		ends++;
 	*ends = 0;
 	fprintf(out, "%s%s", vals, config->csv_sep);
+	os->first = false;
 }
 
 static void print_metric_only_json(struct perf_stat_config *config __maybe_unused,
@@ -532,7 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
 	while (isdigit(*ends) || *ends == '.')
 		ends++;
 	*ends = 0;
-	fprintf(out, "{\"metric-value\" : \"%s\"}", vals);
+	fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
+	os->first = false;
 }
 
 static void new_line_metric(struct perf_stat_config *config __maybe_unused,
@@ -561,7 +562,7 @@ static void print_metric_header(struct perf_stat_config *config,
 	unit = fixunit(tbuf, os->evsel, unit);
 
 	if (config->json_output)
-		fprintf(os->fh, "{\"unit\" : \"%s\"}", unit);
+		return;
 	else if (config->csv_output)
 		fprintf(os->fh, "%s%s", unit, config->csv_sep);
 	else
@@ -821,6 +822,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	run = aggr->counts.run;
 
 	if (!metric_only) {
+		if (config->json_output)
+			fputc('{', output);
 		if (os->prefix)
 			fprintf(output, "%s", os->prefix);
 		else if (config->summary && config->csv_output &&
@@ -844,9 +847,12 @@ static void print_metric_begin(struct perf_stat_config *config,
 	struct aggr_cpu_id id;
 	struct evsel *evsel;
 
+	os->first = true;
 	if (!config->metric_only)
 		return;
 
+	if (config->json_output)
+		fputc('{', config->output);
 	if (os->prefix)
 		fprintf(config->output, "%s", os->prefix);
 
@@ -855,7 +861,7 @@ static void print_metric_begin(struct perf_stat_config *config,
 	aggr = &evsel->stats->aggr[aggr_idx];
 	aggr_printout(config, evsel, id, aggr->nr);
 
-	print_cgroup(config, os->cgrp);
+	print_cgroup(config, os->cgrp ? : evsel->cgrp);
 }
 
 static void print_metric_end(struct perf_stat_config *config)
@@ -863,6 +869,8 @@ static void print_metric_end(struct perf_stat_config *config)
 	if (!config->metric_only)
 		return;
 
+	if (config->json_output)
+		fputc('}', config->output);
 	fputc('\n', config->output);
 }
 
@@ -1005,11 +1013,9 @@ static void print_metric_headers_csv(struct perf_stat_config *config,
 		fputs(aggr_header_csv[config->aggr_mode], config->output);
 }
 
-static void print_metric_headers_json(struct perf_stat_config *config,
+static void print_metric_headers_json(struct perf_stat_config *config __maybe_unused,
 				      bool no_indent __maybe_unused)
 {
-	if (config->interval)
-		fputs("{\"unit\" : \"sec\"}", config->output);
 }
 
 static void print_metric_headers(struct perf_stat_config *config,
@@ -1049,7 +1055,9 @@ static void print_metric_headers(struct perf_stat_config *config,
 					      &config->metric_events,
 					      &rt_stat);
 	}
-	fputc('\n', config->output);
+
+	if (!config->json_output)
+		fputc('\n', config->output);
 }
 
 static void prepare_interval(struct perf_stat_config *config,
@@ -1058,17 +1066,14 @@ static void prepare_interval(struct perf_stat_config *config,
 	if (config->iostat_run)
 		return;
 
-	if (config->csv_output)
+	if (config->json_output)
+		scnprintf(prefix, len, "\"interval\" : %lu.%09lu, ",
+			  (unsigned long) ts->tv_sec, ts->tv_nsec);
+	else if (config->csv_output)
 		scnprintf(prefix, len, "%lu.%09lu%s",
 			  (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
-	else if (!config->json_output)
-		scnprintf(prefix, len, "%6lu.%09lu ",
-			  (unsigned long) ts->tv_sec, ts->tv_nsec);
-	else if (!config->metric_only)
-		scnprintf(prefix, len, "{\"interval\" : %lu.%09lu, ",
-			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 	else
-		scnprintf(prefix, len, "{\"interval\" : %lu.%09lu}",
+		scnprintf(prefix, len, "%6lu.%09lu ",
 			  (unsigned long) ts->tv_sec, ts->tv_nsec);
 }
 
@@ -1365,6 +1370,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 	char buf[64];
 	struct outstate os = {
 		.fh = config->output,
+		.first = true,
 	};
 
 	if (config->iostat_run)
-- 
GitLab


From c4b41b83c25073c09bfcc4e5ec496c9dd316656b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:07 -0800
Subject: [PATCH 2661/4122] perf stat: Rename "aggregate-number" to "cpu-count"
 in JSON

As the JSON output has been broken for a little while, I guess there are
not many users.  Let's rename the field to more intuitive one. :)

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-15-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 43640115454cf..7a39a1a7261d2 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config,
 
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
-		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
+		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ",
 			id.socket, id.die, id.core, nr);
 		break;
 	case AGGR_DIE:
-		fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
+		fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ",
 			id.socket, id.die, nr);
 		break;
 	case AGGR_SOCKET:
-		fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
+		fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ",
 			id.socket, nr);
 		break;
 	case AGGR_NODE:
-		fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
+		fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ",
 			id.node, nr);
 		break;
 	case AGGR_NONE:
-- 
GitLab


From 765d4e497fc51c64b50e5947d0b63bc3ccbd83d3 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 23 Nov 2022 10:02:08 -0800
Subject: [PATCH 2662/4122] perf stat: Tidy up JSON metric-only output when no
 metrics

It printed empty strings for each metric.  I guess it's needed for CSV
output to match the column number.  We could just ignore the empty
metrics in JSON but it ended up with a broken JSON object with a
trailing comma.

So I added a dummy '"metric-value" : "none"' part.  To do that, it
needs to pass struct outstate to print_metric_end() to check if any
metric value is printed or not.

Before:

  # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true
  {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : "", "" : ""}

After:

  # perf stat -aj --metric-only --per-socket --for-each-cgroup system.slice true
  {"socket" : "S0", "cpu-count" : 8, "cgroup" : "system.slice", "metric-value" : "none"}

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221123180208.2068936-16-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7a39a1a7261d2..847acdb5dc407 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -532,6 +532,8 @@ static void print_metric_only_json(struct perf_stat_config *config __maybe_unuse
 	while (isdigit(*ends) || *ends == '.')
 		ends++;
 	*ends = 0;
+	if (!unit[0] || !vals[0])
+		return;
 	fprintf(out, "%s\"%s\" : \"%s\"", os->first ? "" : ", ", unit, vals);
 	os->first = false;
 }
@@ -864,14 +866,19 @@ static void print_metric_begin(struct perf_stat_config *config,
 	print_cgroup(config, os->cgrp ? : evsel->cgrp);
 }
 
-static void print_metric_end(struct perf_stat_config *config)
+static void print_metric_end(struct perf_stat_config *config, struct outstate *os)
 {
+	FILE *output = config->output;
+
 	if (!config->metric_only)
 		return;
 
-	if (config->json_output)
-		fputc('}', config->output);
-	fputc('\n', config->output);
+	if (config->json_output) {
+		if (os->first)
+			fputs("\"metric-value\" : \"none\"", output);
+		fputc('}', output);
+	}
+	fputc('\n', output);
 }
 
 static void print_aggr(struct perf_stat_config *config,
@@ -897,7 +904,7 @@ static void print_aggr(struct perf_stat_config *config,
 
 			print_counter_aggrdata(config, counter, s, os);
 		}
-		print_metric_end(config);
+		print_metric_end(config, os);
 	}
 }
 
@@ -929,7 +936,7 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 
 				print_counter_aggrdata(config, counter, s, os);
 			}
-			print_metric_end(config);
+			print_metric_end(config, os);
 		}
 	}
 }
@@ -985,7 +992,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config,
 			printout(config, os, uval, run, ena, 1.0, counter_idx);
 		}
 		if (!first)
-			print_metric_end(config);
+			print_metric_end(config, os);
 	}
 }
 
@@ -1348,7 +1355,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
 	evlist__for_each_entry(evlist, counter) {
 		if (os->cgrp != counter->cgrp) {
 			if (os->cgrp != NULL)
-				print_metric_end(config);
+				print_metric_end(config, os);
 
 			os->cgrp = counter->cgrp;
 			print_metric_begin(config, evlist, os, /*aggr_idx=*/0);
@@ -1357,7 +1364,7 @@ static void print_cgroup_counter(struct perf_stat_config *config, struct evlist
 		print_counter(config, counter, os);
 	}
 	if (os->cgrp)
-		print_metric_end(config);
+		print_metric_end(config, os);
 }
 
 void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config,
@@ -1405,7 +1412,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf
 			evlist__for_each_entry(evlist, counter) {
 				print_counter(config, counter, &os);
 			}
-			print_metric_end(config);
+			print_metric_end(config, &os);
 		}
 		break;
 	case AGGR_NONE:
-- 
GitLab


From eafcbb6838b67ae67bcab3f794dff593250a71bb Mon Sep 17 00:00:00 2001
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Date: Thu, 24 Nov 2022 11:14:38 +0800
Subject: [PATCH 2663/4122] perf vendor events intel: Add core event list for
 Alderlake-N

Alderlake-N only has E-core, it has been moved to non-hybrid code path on
the kernel side, so add the cpuid for Alderlake-N separately.

Add core event list for Alderlake-N, it is based on the
ADL gracemont v1.16 JSON file.

https://github.com/intel/perfmon/tree/main/ADL/events/

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221124031441.110134-1-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../pmu-events/arch/x86/alderlaken/cache.json | 330 +++++++++++
 .../arch/x86/alderlaken/floating-point.json   |  18 +
 .../arch/x86/alderlaken/frontend.json         |  26 +
 .../arch/x86/alderlaken/memory.json           |  81 +++
 .../pmu-events/arch/x86/alderlaken/other.json |  38 ++
 .../arch/x86/alderlaken/pipeline.json         | 533 ++++++++++++++++++
 .../arch/x86/alderlaken/virtual-memory.json   |  47 ++
 tools/perf/pmu-events/arch/x86/mapfile.csv    |   3 +-
 8 files changed, 1075 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/cache.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/frontend.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/memory.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/other.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json

diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/cache.json b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
new file mode 100644
index 0000000000000..043445ae14a8c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/cache.json
@@ -0,0 +1,330 @@
+[
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4f"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "SampleAfterValue": "200003",
+        "UMask": "0x38"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).",
+        "SampleAfterValue": "200003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons:  load buffer, store buffer or RSV full.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+        "SampleAfterValue": "20003",
+        "UMask": "0x7"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of load uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x81"
+    },
+    {
+        "BriefDescription": "Counts the number of store uops retired.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of store uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x82"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5"
+    },
+    {
+        "BriefDescription": "Counts the number of retired split load uops.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41"
+    },
+    {
+        "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x4003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x8003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json
new file mode 100644
index 0000000000000..30e8ca3c14854
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/floating-point.json
@@ -0,0 +1,18 @@
+[
+    {
+        "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json
new file mode 100644
index 0000000000000..36898bab2bba6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/frontend.json
@@ -0,0 +1,26 @@
+[
+    {
+        "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "EventCode": "0xe6",
+        "EventName": "BACLEARS.ANY",
+        "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend.  Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.ACCESSES",
+        "PublicDescription": "Counts the total number of requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of instruction cache misses.",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.MISSES",
+        "PublicDescription": "Counts the number of missed requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json
new file mode 100644
index 0000000000000..f84bf8c43495c
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/memory.json
@@ -0,0 +1,81 @@
+[
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ANY_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xff"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.L1_BOUND_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xf4"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.OTHER_AT_RET",
+        "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xc0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.PGWALK_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0xa0"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.ST_ADDR_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x84"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/other.json b/tools/perf/pmu-events/arch/x86/alderlaken/other.json
new file mode 100644
index 0000000000000..6336de61f628f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/other.json
@@ -0,0 +1,38 @@
+[
+    {
+        "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.COREWB_M.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10008",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts streaming stores that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10800",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
new file mode 100644
index 0000000000000..fa53ff11a5096
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/pipeline.json
@@ -0,0 +1,533 @@
+[
+    {
+        "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires.  All branch type instructions are accounted for.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9"
+    },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xbf"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9"
+    },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfd"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of mispredicted branch instructions retired.  All branch type instructions are accounted for.  Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP.    A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
+        "SampleAfterValue": "200003"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
+        "EventName": "INST_RETIRED.ANY",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the total number of instructions retired.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
+        "Deprecated": "1",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.4K_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "SampleAfterValue": "20003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of machines clears due to memory renaming.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MRN_NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to a page fault.  Counts both I-Side and D-Side (Loads/Stores) page faults.  A page fault occurs when either the page is not present, or an access violation occurs.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+        "SampleAfterValue": "20003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SLOW",
+        "SampleAfterValue": "20003",
+        "UMask": "0x6f"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
+        "EventCode": "0x75",
+        "EventName": "SERIALIZATION.NON_C01_MS_SCB",
+        "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+        "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALL",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ALL",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.CISC",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.DECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8d"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x72"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.OTHER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4"
+    },
+    {
+        "BriefDescription": "Counts the total number of consumed retirement slots.",
+        "EventCode": "0xc2",
+        "EventName": "TOPDOWN_RETIRING.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003"
+    },
+    {
+        "BriefDescription": "Counts the total number of uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003"
+    },
+    {
+        "BriefDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.X87",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json
new file mode 100644
index 0000000000000..67fd640f790e6
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/virtual-memory.json
@@ -0,0 +1,47 @@
+[
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x90"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 5e609b8767903..78af105ca2367 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,5 +1,6 @@
 Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BE|BF),v1.15,alderlake,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core
+GenuineIntel-6-BE,v1.16,alderlaken,core
 GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core
 GenuineIntel-6-(3D|47),v26,broadwell,core
 GenuineIntel-6-56,v23,broadwellde,core
-- 
GitLab


From a6a29bcf596141f95fc0f9756ba68de31ba1f46c Mon Sep 17 00:00:00 2001
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Date: Thu, 24 Nov 2022 11:14:39 +0800
Subject: [PATCH 2664/4122] perf vendor events intel: Add uncore event list for
 Alderlake-N

Add JSON uncore events for Alderlake-N

Based on JSON list v1.16:

https://github.com/intel/perfmon/tree/main/ADL/events/

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221124031441.110134-2-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/alderlaken/uncore-memory.json    | 175 ++++++++++++++++++
 .../arch/x86/alderlaken/uncore-other.json     |  33 ++++
 2 files changed, 208 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json

diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json
new file mode 100644
index 0000000000000..2ccd9cf96957f
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-memory.json
@@ -0,0 +1,175 @@
+[
+    {
+        "BriefDescription": "Counts every 64B read  request entering the Memory Controller 0 to DRAM (sum of all channels).",
+        "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
+        "PerPkg": "1",
+        "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+        "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).",
+        "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
+        "PerPkg": "1",
+        "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+        "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "ACT command for a read request sent to DRAM",
+        "EventCode": "0x24",
+        "EventName": "UNC_M_ACT_COUNT_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "ACT command sent to DRAM",
+        "EventCode": "0x26",
+        "EventName": "UNC_M_ACT_COUNT_TOTAL",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "ACT command for a write request sent to DRAM",
+        "EventCode": "0x25",
+        "EventName": "UNC_M_ACT_COUNT_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Read CAS command sent to DRAM",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_CAS_COUNT_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Write CAS command sent to DRAM",
+        "EventCode": "0x23",
+        "EventName": "UNC_M_CAS_COUNT_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Number of clocks",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_CLOCKTICKS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming read request page status is Page Empty",
+        "EventCode": "0x1D",
+        "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming write request page status is Page Empty",
+        "EventCode": "0x20",
+        "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming read request page status is Page Hit",
+        "EventCode": "0x1C",
+        "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming write request page status is Page Hit",
+        "EventCode": "0x1F",
+        "EventName": "UNC_M_DRAM_PAGE_HIT_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming read request page status is Page Miss",
+        "EventCode": "0x1E",
+        "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming write request page status is Page Miss",
+        "EventCode": "0x21",
+        "EventName": "UNC_M_DRAM_PAGE_MISS_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Any Rank at Hot state",
+        "EventCode": "0x19",
+        "EventName": "UNC_M_DRAM_THERMAL_HOT",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Any Rank at Warm state",
+        "EventCode": "0x1A",
+        "EventName": "UNC_M_DRAM_THERMAL_WARM",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Incoming read prefetch request from IA.",
+        "EventCode": "0x0A",
+        "EventName": "UNC_M_PREFETCH_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
+        "EventCode": "0x28",
+        "EventName": "UNC_M_PRE_COUNT_IDLE",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "PRE command sent to DRAM for a read/write request",
+        "EventCode": "0x27",
+        "EventName": "UNC_M_PRE_COUNT_PAGE_MISS",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Incoming VC0 read request",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_VC0_REQUESTS_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Incoming VC0 write request",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_VC0_REQUESTS_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Incoming VC1 read request",
+        "EventCode": "0x04",
+        "EventName": "UNC_M_VC1_REQUESTS_RD",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Incoming VC1 write request",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_VC1_REQUESTS_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    }
+]
diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json
new file mode 100644
index 0000000000000..f9e7777cd2be9
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/uncore-other.json
@@ -0,0 +1,33 @@
+[
+    {
+        "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+        "EventCode": "0x84",
+        "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.",
+        "EventCode": "0x80",
+        "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+        "EventCode": "0x81",
+        "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+        "EventCode": "0xff",
+        "EventName": "UNC_CLOCK.SOCKET",
+        "PerPkg": "1",
+        "Unit": "CLOCK"
+    }
+]
-- 
GitLab


From 2bb3fbad4c3b3b5b6d5ac537b6ab404443fc5224 Mon Sep 17 00:00:00 2001
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Date: Thu, 24 Nov 2022 11:14:40 +0800
Subject: [PATCH 2665/4122] perf vendor events intel: Add metrics for
 Alderlake-N

Add JSON metrics for Alderlake-N to perf.

It only included E-core metrics.

E-core metrics based on E-core TMA v2.2 (E-core_TMA_Metrics.csv)

It is downloaded from:
  https://github.com/intel/perfmon/

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221124031441.110134-3-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/alderlaken/adln-metrics.json     | 583 ++++++++++++++++++
 1 file changed, 583 insertions(+)
 create mode 100644 tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json

diff --git a/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
new file mode 100644
index 0000000000000..c57e9f325fb05
--- /dev/null
+++ b/tools/perf/pmu-events/arch/x86/alderlaken/adln-metrics.json
@@ -0,0 +1,583 @@
+[
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to frontend stalls.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "tma_frontend_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY / SLOTS",
+        "MetricGroup": "TopdownL2;tma_frontend_bound_group",
+        "MetricName": "tma_frontend_latency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to instruction cache misses.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.ICACHE / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+        "MetricName": "tma_icache",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.ITLB / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+        "MetricName": "tma_itlb",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend",
+        "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_DETECT / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+        "MetricName": "tma_branch_detect",
+        "PublicDescription": "Counts the number of issue slots  that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.BRANCH_RESTEER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_latency_group",
+        "MetricName": "tma_branch_resteer",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH / SLOTS",
+        "MetricGroup": "TopdownL2;tma_frontend_bound_group",
+        "MetricName": "tma_frontend_bandwidth",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to the microcode sequencer (MS).",
+        "MetricExpr": "TOPDOWN_FE_BOUND.CISC / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+        "MetricName": "tma_cisc",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to decode stalls.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.DECODE / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+        "MetricName": "tma_decode",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to wrong predecodes.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.PREDECODE / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+        "MetricName": "tma_predecode",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not delivered by the frontend due to other common frontend stalls not categorized.",
+        "MetricExpr": "TOPDOWN_FE_BOUND.OTHER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_frontend_bandwidth_group",
+        "MetricName": "tma_other_fb",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear",
+        "MetricExpr": "(SLOTS - (TOPDOWN_FE_BOUND.ALL + TOPDOWN_BE_BOUND.ALL + TOPDOWN_RETIRING.ALL)) / SLOTS",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "tma_bad_speculation",
+        "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ). Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to branch mispredicts.",
+        "MetricExpr": "TOPDOWN_BAD_SPECULATION.MISPREDICT / SLOTS",
+        "MetricGroup": "TopdownL2;tma_bad_speculation_group",
+        "MetricName": "tma_branch_mispredicts",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "MetricExpr": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS / SLOTS",
+        "MetricGroup": "TopdownL2;tma_bad_speculation_group",
+        "MetricName": "tma_machine_clears",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to a machine clear (slow nuke).",
+        "MetricExpr": "TOPDOWN_BAD_SPECULATION.NUKE / SLOTS",
+        "MetricGroup": "TopdownL3;tma_machine_clears_group",
+        "MetricName": "tma_nuke",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to SMC. ",
+        "MetricExpr": "tma_nuke * (MACHINE_CLEARS.SMC / MACHINE_CLEARS.SLOW)",
+        "MetricGroup": "TopdownL4;tma_nuke_group",
+        "MetricName": "tma_smc",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory ordering. ",
+        "MetricExpr": "tma_nuke * (MACHINE_CLEARS.MEMORY_ORDERING / MACHINE_CLEARS.SLOW)",
+        "MetricGroup": "TopdownL4;tma_nuke_group",
+        "MetricName": "tma_memory_ordering",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to FP assists. ",
+        "MetricExpr": "tma_nuke * (MACHINE_CLEARS.FP_ASSIST / MACHINE_CLEARS.SLOW)",
+        "MetricGroup": "TopdownL4;tma_nuke_group",
+        "MetricName": "tma_fp_assist",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to memory disambiguation. ",
+        "MetricExpr": "tma_nuke * (MACHINE_CLEARS.DISAMBIGUATION / MACHINE_CLEARS.SLOW)",
+        "MetricGroup": "TopdownL4;tma_nuke_group",
+        "MetricName": "tma_disambiguation",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears relative to the number of nuke slots due to page faults. ",
+        "MetricExpr": "tma_nuke * (MACHINE_CLEARS.PAGE_FAULT / MACHINE_CLEARS.SLOW)",
+        "MetricGroup": "TopdownL4;tma_nuke_group",
+        "MetricName": "tma_page_fault",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to a machine clear classified as a fast nuke due to memory ordering, memory disambiguation and memory renaming.",
+        "MetricExpr": "TOPDOWN_BAD_SPECULATION.FASTNUKE / SLOTS",
+        "MetricGroup": "TopdownL3;tma_machine_clears_group",
+        "MetricName": "tma_fast_nuke",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls",
+        "MetricExpr": "TOPDOWN_BE_BOUND.ALL / SLOTS",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "tma_backend_bound",
+        "PublicDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls.  Note that uops must be available for consumption in order for this event to count.  If a uop is not available (IQ is empty), this event will not count.   The rest of these subevents count backend stalls, in cycles, due to an outstanding request which is memory bound vs core bound.   The subevents are not slot based events and therefore can not be precisely added or subtracted from the Backend_Bound_Aux subevents which are slot based.",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles due to backend bound stalls that are core execution bound and not attributed to outstanding demand load or store stalls. ",
+        "MetricExpr": "max(0, tma_backend_bound - tma_load_store_bound)",
+        "MetricGroup": "TopdownL2;tma_backend_bound_group",
+        "MetricName": "tma_core_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to stores or loads. ",
+        "MetricExpr": "min((TOPDOWN_BE_BOUND.ALL / SLOTS), (LD_HEAD.ANY_AT_RET / CLKS) + tma_store_bound)",
+        "MetricGroup": "TopdownL2;tma_backend_bound_group",
+        "MetricName": "tma_load_store_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to store buffer full.",
+        "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_store_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a load block.",
+        "MetricExpr": "LD_HEAD.L1_BOUND_AT_RET / CLKS",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_l1_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a store forward block.",
+        "MetricExpr": "LD_HEAD.ST_ADDR_AT_RET / CLKS",
+        "MetricGroup": "TopdownL4;tma_l1_bound_group",
+        "MetricName": "tma_store_fwd",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a first level TLB miss.",
+        "MetricExpr": "LD_HEAD.DTLB_MISS_AT_RET / CLKS",
+        "MetricGroup": "TopdownL4;tma_l1_bound_group",
+        "MetricName": "tma_stlb_hit",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a second level TLB miss requiring a page walk.",
+        "MetricExpr": "LD_HEAD.PGWALK_AT_RET / CLKS",
+        "MetricGroup": "TopdownL4;tma_l1_bound_group",
+        "MetricName": "tma_stlb_miss",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the oldest load of the load buffer is stalled at retirement due to a number of other load blocks.",
+        "MetricExpr": "LD_HEAD.OTHER_AT_RET / CLKS",
+        "MetricGroup": "TopdownL4;tma_l1_bound_group",
+        "MetricName": "tma_other_l1",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the L2 Cache.",
+        "MetricExpr": "(MEM_BOUND_STALLS.LOAD_L2_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_BOUND_STALLS.LOAD)",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_l2_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles a core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "MetricExpr": "(MEM_BOUND_STALLS.LOAD_LLC_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_BOUND_STALLS.LOAD)",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_l3_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+        "MetricExpr": "(MEM_BOUND_STALLS.LOAD_DRAM_HIT / CLKS) - (MEM_BOUND_STALLS_AT_RET_CORRECTION * MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_BOUND_STALLS.LOAD)",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_dram_bound",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hits in the L2, LLC, DRAM or MMIO (Non-DRAM) but could not be correctly attributed or cycles in which the load miss is waiting on a request buffer.",
+        "MetricExpr": "max(0, tma_load_store_bound - (tma_store_bound + tma_l1_bound + tma_l2_bound + tma_l3_bound + tma_dram_bound))",
+        "MetricGroup": "TopdownL3;tma_load_store_bound_group",
+        "MetricName": "tma_other_load_store",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls",
+        "MetricExpr": "tma_backend_bound",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "tma_backend_bound_aux",
+        "PublicDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls.  Note that UOPS must be available for consumption in order for this event to count.  If a uop is not available (IQ is empty), this event will not count.  All of these subevents count backend stalls, in slots, due to a resource limitation.   These are not cycle based events and therefore can not be precisely added or subtracted from the Backend_Bound subevents which are cycle based.  These subevents are supplementary to Backend_Bound and can be used to analyze results from a resource perspective at allocation.  ",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls",
+        "MetricExpr": "tma_backend_bound",
+        "MetricGroup": "TopdownL2;tma_backend_bound_aux_group",
+        "MetricName": "tma_resource_bound",
+        "PublicDescription": "Counts the total number of issue slots  that were not consumed by the backend due to backend stalls.  Note that uops must be available for consumption in order for this event to count.  If a uop is not available (IQ is empty), this event will not count. ",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+        "MetricExpr": "TOPDOWN_BE_BOUND.MEM_SCHEDULER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_mem_scheduler",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to store buffer full",
+        "MetricExpr": "tma_mem_scheduler * (MEM_SCHEDULER_BLOCK.ST_BUF / MEM_SCHEDULER_BLOCK.ALL)",
+        "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+        "MetricName": "tma_st_buffer",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to load buffer full",
+        "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.LD_BUF / MEM_SCHEDULER_BLOCK.ALL",
+        "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+        "MetricName": "tma_ld_buffer",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles, relative to the number of mem_scheduler slots, in which uops are blocked due to RSV full relative ",
+        "MetricExpr": "tma_mem_scheduler * MEM_SCHEDULER_BLOCK.RSV / MEM_SCHEDULER_BLOCK.ALL",
+        "MetricGroup": "TopdownL4;tma_mem_scheduler_group",
+        "MetricName": "tma_rsv",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+        "MetricExpr": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_non_mem_scheduler",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+        "MetricExpr": "TOPDOWN_BE_BOUND.REGISTER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_register",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+        "MetricExpr": "TOPDOWN_BE_BOUND.REORDER_BUFFER / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_reorder_buffer",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to certain allocation restrictions.",
+        "MetricExpr": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_alloc_restriction",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+        "MetricExpr": "TOPDOWN_BE_BOUND.SERIALIZATION / SLOTS",
+        "MetricGroup": "TopdownL3;tma_resource_bound_group",
+        "MetricName": "tma_serialization",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the numer of issue slots  that result in retirement slots. ",
+        "MetricExpr": "TOPDOWN_RETIRING.ALL / SLOTS",
+        "MetricGroup": "TopdownL1",
+        "MetricName": "tma_retiring",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are not from the microsequencer. ",
+        "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS) / SLOTS",
+        "MetricGroup": "TopdownL2;tma_retiring_group",
+        "MetricName": "tma_base",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations per uop with all default weighting.",
+        "MetricExpr": "UOPS_RETIRED.FPDIV / SLOTS",
+        "MetricGroup": "TopdownL3;tma_base_group",
+        "MetricName": "tma_fp_uops",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of uops retired excluding ms and fp div uops.",
+        "MetricExpr": "(TOPDOWN_RETIRING.ALL - UOPS_RETIRED.MS - UOPS_RETIRED.FPDIV) / SLOTS",
+        "MetricGroup": "TopdownL3;tma_base_group",
+        "MetricName": "tma_other_ret",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS)",
+        "MetricExpr": "UOPS_RETIRED.MS / SLOTS",
+        "MetricGroup": "TopdownL2;tma_retiring_group",
+        "MetricName": "tma_ms_uops",
+        "PublicDescription": "Counts the number of uops that are from the complex flows issued by the micro-sequencer (MS).  This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE",
+        "MetricName": "CLKS"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "CPU_CLK_UNHALTED.CORE_P",
+        "MetricName": "CLKS_P"
+    },
+    {
+        "BriefDescription": "",
+        "MetricExpr": "5 * CLKS",
+        "MetricName": "SLOTS"
+    },
+    {
+        "BriefDescription": "Instructions Per Cycle",
+        "MetricExpr": "INST_RETIRED.ANY / CLKS",
+        "MetricName": "IPC"
+    },
+    {
+        "BriefDescription": "Cycles Per Instruction",
+        "MetricExpr": "CLKS / INST_RETIRED.ANY",
+        "MetricName": "CPI"
+    },
+    {
+        "BriefDescription": "Uops Per Instruction",
+        "MetricExpr": "UOPS_RETIRED.ALL / INST_RETIRED.ANY",
+        "MetricName": "UPI"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads with a store forward or unknown store address block",
+        "MetricExpr": "100 * LD_BLOCKS.DATA_UNKNOWN / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Store_Fwd_Blocks"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads with a address aliasing block",
+        "MetricExpr": "100 * LD_BLOCKS.4K_ALIAS / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Address_Alias_Blocks"
+    },
+    {
+        "BriefDescription": "Percentage of total non-speculative loads that are splits",
+        "MetricExpr": "100 * MEM_UOPS_RETIRED.SPLIT_LOADS / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "Load_Splits"
+    },
+    {
+        "BriefDescription": "Instructions per Branch (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricName": "IpBranch"
+    },
+    {
+        "BriefDescription": "Instruction per (near) call (lower number means higher occurrence rate)",
+        "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.CALL",
+        "MetricName": "IpCall"
+    },
+    {
+        "BriefDescription": "Instructions per Load",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_LOADS",
+        "MetricName": "IpLoad"
+    },
+    {
+        "BriefDescription": "Instructions per Store",
+        "MetricExpr": "INST_RETIRED.ANY / MEM_UOPS_RETIRED.ALL_STORES",
+        "MetricName": "IpStore"
+    },
+    {
+        "BriefDescription": "Number of Instructions per non-speculative Branch Misprediction",
+        "MetricExpr": "INST_RETIRED.ANY / BR_MISP_RETIRED.ALL_BRANCHES",
+        "MetricName": "IpMispredict"
+    },
+    {
+        "BriefDescription": "Instructions per Far Branch",
+        "MetricExpr": "INST_RETIRED.ANY / (BR_INST_RETIRED.FAR_BRANCH / 2)",
+        "MetricName": "IpFarBranch"
+    },
+    {
+        "BriefDescription": "Ratio of all branches which mispredict",
+        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BR_INST_RETIRED.ALL_BRANCHES",
+        "MetricName": "Branch_Mispredict_Ratio"
+    },
+    {
+        "BriefDescription": "Ratio between Mispredicted branches and unknown branches",
+        "MetricExpr": "BR_MISP_RETIRED.ALL_BRANCHES / BACLEARS.ANY",
+        "MetricName": "Branch_Mispredict_to_Unknown_Branch_Ratio"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are ucode ops",
+        "MetricExpr": "100 * UOPS_RETIRED.MS / UOPS_RETIRED.ALL",
+        "MetricName": "Microcode_Uop_Ratio"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are FPDiv uops",
+        "MetricExpr": "100 * UOPS_RETIRED.FPDIV / UOPS_RETIRED.ALL",
+        "MetricName": "FPDiv_Uop_Ratio"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are IDiv uops",
+        "MetricExpr": "100 * UOPS_RETIRED.IDIV / UOPS_RETIRED.ALL",
+        "MetricName": "IDiv_Uop_Ratio"
+    },
+    {
+        "BriefDescription": "Percentage of all uops which are x87 uops",
+        "MetricExpr": "100 * UOPS_RETIRED.X87 / UOPS_RETIRED.ALL",
+        "MetricName": "X87_Uop_Ratio"
+    },
+    {
+        "BriefDescription": "Average Frequency Utilization relative nominal frequency",
+        "MetricExpr": "CLKS / CPU_CLK_UNHALTED.REF_TSC",
+        "MetricName": "Turbo_Utilization"
+    },
+    {
+        "BriefDescription": "Fraction of cycles spent in Kernel mode",
+        "MetricExpr": "cpu@CPU_CLK_UNHALTED.CORE@k / CPU_CLK_UNHALTED.CORE",
+        "MetricName": "Kernel_Utilization"
+    },
+    {
+        "BriefDescription": "Average CPU Utilization",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
+        "MetricName": "CPU_Utilization"
+    },
+    {
+        "BriefDescription": "Cycle cost per L2 hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_L2_HIT / MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "MetricName": "Cycles_per_Demand_Load_L2_Hit"
+    },
+    {
+        "BriefDescription": "Cycle cost per LLC hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_LLC_HIT / MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "MetricName": "Cycles_per_Demand_Load_L3_Hit"
+    },
+    {
+        "BriefDescription": "Cycle cost per DRAM hit",
+        "MetricExpr": "MEM_BOUND_STALLS.LOAD_DRAM_HIT / MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "MetricName": "Cycles_per_Demand_Load_DRAM_Hit"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in the L2",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_L2_HIT / (MEM_BOUND_STALLS.IFETCH)",
+        "MetricName": "Inst_Miss_Cost_L2Hit_Percent"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in the L3",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_LLC_HIT / (MEM_BOUND_STALLS.IFETCH)",
+        "MetricName": "Inst_Miss_Cost_L3Hit_Percent"
+    },
+    {
+        "BriefDescription": "Percent of instruction miss cost that hit in DRAM",
+        "MetricExpr": "100 * MEM_BOUND_STALLS.IFETCH_DRAM_HIT / (MEM_BOUND_STALLS.IFETCH)",
+        "MetricName": "Inst_Miss_Cost_DRAMHit_Percent"
+    },
+    {
+        "BriefDescription": "load ops retired per 1000 instruction",
+        "MetricExpr": "1000 * MEM_UOPS_RETIRED.ALL_LOADS / INST_RETIRED.ANY",
+        "MetricName": "MemLoadPKI"
+    },
+    {
+        "BriefDescription": "C1 residency percent per core",
+        "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C1_Core_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C6 residency percent per core",
+        "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Core_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C7 residency percent per core",
+        "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Core_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C2 residency percent per package",
+        "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C2_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C3 residency percent per package",
+        "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C3_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C6 residency percent per package",
+        "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C6_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C7 residency percent per package",
+        "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C7_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C8 residency percent per package",
+        "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C8_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C9 residency percent per package",
+        "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C9_Pkg_Residency",
+        "ScaleUnit": "100%"
+    },
+    {
+        "BriefDescription": "C10 residency percent per package",
+        "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
+        "MetricGroup": "Power",
+        "MetricName": "C10_Pkg_Residency",
+        "ScaleUnit": "100%"
+    }
+]
-- 
GitLab


From 4c12f41a14d6c4dd2b4e387eaea249cee68bc01a Mon Sep 17 00:00:00 2001
From: Zhengjun Xing <zhengjun.xing@linux.intel.com>
Date: Thu, 24 Nov 2022 11:14:41 +0800
Subject: [PATCH 2666/4122] perf vendor events intel: Update events and metrics
 for alderlake

Update JSON events and metrics for alderlake to perf.

Based on ADL JSON event list v1.16:

https://github.com/intel/perfmon/tree/main/ADL/events

Generate the event list and metrics with the converter scripts:

https://github.com/intel/perfmon/pull/32

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221124031441.110134-4-zhengjun.xing@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../arch/x86/alderlake/adl-metrics.json       |   73 +-
 .../pmu-events/arch/x86/alderlake/cache.json  | 1391 +++++-------
 .../arch/x86/alderlake/floating-point.json    |   91 +-
 .../arch/x86/alderlake/frontend.json          |  224 +-
 .../pmu-events/arch/x86/alderlake/memory.json |  214 +-
 .../pmu-events/arch/x86/alderlake/other.json  |  132 +-
 .../arch/x86/alderlake/pipeline.json          | 1932 ++++++-----------
 .../arch/x86/alderlake/uncore-memory.json     |  185 +-
 .../arch/x86/alderlake/uncore-other.json      |   73 +-
 .../arch/x86/alderlake/virtual-memory.json    |  223 +-
 tools/perf/pmu-events/arch/x86/mapfile.csv    |    2 +-
 11 files changed, 1687 insertions(+), 2853 deletions(-)

diff --git a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
index e06d26ad51385..edf440e9359af 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/adl-metrics.json
@@ -1287,14 +1287,14 @@
     },
     {
         "BriefDescription": "Average CPU Utilization",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricGroup": "HPC;Summary",
         "MetricName": "CPU_Utilization",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Measured Average Frequency for unhalted processors [GHz]",
-        "MetricExpr": "Turbo_Utilization * msr@tsc@ / 1000000000 / duration_time",
+        "MetricExpr": "Turbo_Utilization * TSC / 1000000000 / duration_time",
         "MetricGroup": "Power;Summary",
         "MetricName": "Average_Frequency",
         "Unit": "cpu_core"
@@ -1337,18 +1337,25 @@
     },
     {
         "BriefDescription": "Average external Memory Bandwidth Use for reads and writes [GB / sec]",
-        "MetricExpr": "64 * (arb@event\\=0x81\\,umask\\=0x1@ + arb@event\\=0x84\\,umask\\=0x1@) / 1000000 / duration_time / 1000",
+        "MetricExpr": "64 * (UNC_ARB_TRK_REQUESTS.ALL + UNC_ARB_COH_TRK_REQUESTS.ALL) / 1000000 / duration_time / 1000",
         "MetricGroup": "HPC;Mem;MemoryBW;SoC",
         "MetricName": "DRAM_BW_Use",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Average number of parallel requests to external memory. Accounts for all requests",
-        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / arb@event\\=0x81\\,umask\\=0x1@",
+        "MetricExpr": "UNC_ARB_TRK_OCCUPANCY.ALL / UNC_ARB_TRK_REQUESTS.ALL",
         "MetricGroup": "Mem;SoC",
         "MetricName": "MEM_Parallel_Requests",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Socket actual clocks when any core is active on that socket",
+        "MetricExpr": "UNC_CLOCK.SOCKET",
+        "MetricGroup": "SoC",
+        "MetricName": "Socket_CLKS",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Instructions per Far Branch ( Far Branches apply upon transition from application to operating system, handling interrupts, exceptions) [lower number means higher occurrence rate]",
         "MetricExpr": "INST_RETIRED.ANY / BR_INST_RETIRED.FAR_BRANCH:u",
@@ -1356,6 +1363,12 @@
         "MetricName": "IpFarBranch",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Uncore frequency per die [GHZ]",
+        "MetricExpr": "Socket_CLKS / #num_dies / duration_time / 1000000000",
+        "MetricGroup": "SoC",
+        "MetricName": "UNCORE_FREQ"
+    },
     {
         "BriefDescription": "Counts the number of issue slots  that were not consumed by the backend due to frontend stalls.",
         "MetricExpr": "TOPDOWN_FE_BOUND.ALL / SLOTS",
@@ -1902,7 +1915,7 @@
     },
     {
         "BriefDescription": "Average CPU Utilization",
-        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / msr@tsc@",
+        "MetricExpr": "CPU_CLK_UNHALTED.REF_TSC / TSC",
         "MetricName": "CPU_Utilization",
         "Unit": "cpu_atom"
     },
@@ -1950,62 +1963,72 @@
     },
     {
         "BriefDescription": "C1 residency percent per core",
-        "MetricExpr": "(cstate_core@c1\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_core@c1\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C1_Core_Residency"
+        "MetricName": "C1_Core_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C6 residency percent per core",
-        "MetricExpr": "(cstate_core@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_core@c6\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C6_Core_Residency"
+        "MetricName": "C6_Core_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C7 residency percent per core",
-        "MetricExpr": "(cstate_core@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_core@c7\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C7_Core_Residency"
+        "MetricName": "C7_Core_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C2 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c2\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c2\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C2_Pkg_Residency"
+        "MetricName": "C2_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C3 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c3\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c3\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C3_Pkg_Residency"
+        "MetricName": "C3_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C6 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c6\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c6\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C6_Pkg_Residency"
+        "MetricName": "C6_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C7 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c7\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c7\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C7_Pkg_Residency"
+        "MetricName": "C7_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C8 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c8\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c8\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C8_Pkg_Residency"
+        "MetricName": "C8_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C9 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c9\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c9\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C9_Pkg_Residency"
+        "MetricName": "C9_Pkg_Residency",
+        "ScaleUnit": "100%"
     },
     {
         "BriefDescription": "C10 residency percent per package",
-        "MetricExpr": "(cstate_pkg@c10\\-residency@ / msr@tsc@) * 100",
+        "MetricExpr": "cstate_pkg@c10\\-residency@ / TSC",
         "MetricGroup": "Power",
-        "MetricName": "C10_Pkg_Residency"
+        "MetricName": "C10_Pkg_Residency",
+        "ScaleUnit": "100%"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/cache.json b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
index 2cc62d2779d20..adc9887b8ae0f 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/cache.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/cache.json
@@ -1,1178 +1,871 @@
 [
-    {
-        "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x2e",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x41",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x2e",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x4f",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.IFETCH",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x38",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x20",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x8",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x10",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.LOAD",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x7",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x34",
-        "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x80",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons:  load buffer, store buffer or RSV full.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x04",
-        "EventName": "MEM_SCHEDULER_BLOCK.ALL",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x7",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x04",
-        "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x04",
-        "EventName": "MEM_SCHEDULER_BLOCK.RSV",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x04",
-        "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of load uops retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x81",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of store uops retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x82",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x80",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x10",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x100",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x20",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x4",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x200",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x40",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
-        "L1_Hit_Indication": "1",
-        "MSRIndex": "0x3F6",
-        "MSRValue": "0x8",
-        "PEBS": "2",
-        "PEBScounters": "0,1",
-        "SampleAfterValue": "1000003",
-        "TakenAlone": "1",
-        "UMask": "0x5",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of retired split load uops.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x41",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "Data_LA": "1",
-        "EventCode": "0xd0",
-        "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
-        "L1_Hit_Indication": "1",
-        "PEBS": "2",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x6",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x4003C0001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x8003C0001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F803C0002",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10003C0002",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x20",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "L1D.HWPF_MISS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x51",
         "EventName": "L1D.HWPF_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts the number of cache lines replaced in L1 data cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x51",
         "EventName": "L1D.REPLACEMENT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.FB_FULL",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailablability.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability.",
         "CounterMask": "1",
         "EdgeDetect": "1",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.FB_FULL_PERIODS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of phases a demand request has waited due to L1D Fill Buffer (FB) unavailability. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event L1D_PEND_MISS.L2_STALLS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "Deprecated": "1",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.L2_STALL",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of cycles a demand request has waited due to L1D due to lack of L2 resources.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.L2_STALLS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of cycles a demand request has waited due to L1D due to lack of L2 resources. Demand requests include cacheable/uncacheable demand load, store, lock or SW prefetch accesses.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of L1D misses that are outstanding",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.PENDING",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts number of L1D misses that are outstanding in each cycle, that is each cycle the number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch. Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles with L1D load Misses outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x48",
         "EventName": "L1D_PEND_MISS.PENDING_CYCLES",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts duration of L1D miss outstanding in cycles.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache lines filling L2",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x25",
         "EventName": "L2_LINES_IN.ALL",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1f",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cache lines that have been L2 hardware prefetched but not used by demand accesses",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x26",
         "EventName": "L2_LINES_OUT.USELESS_HWPF",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cache lines that have been prefetched by the L2 hardware prefetcher but not used by demand access when evicted from the L2 cache",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "All accesses to L2 cache[This event is alias to L2_RQSTS.REFERENCES]",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_REQUEST.ALL",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.REFERENCES]",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xff",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_RQSTS.MISS]",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_REQUEST.MISS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_RQSTS.MISS]",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x3f",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 code requests",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.ALL_CODE_RD",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of L2 code requests.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xe4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand Data Read access L2 cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts Demand Data Read requests accessing the L2 cache. These requests may hit or miss L2 cache. True-miss exclude misses that were merged with ongoing L2 misses. An access is counted once.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xe1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand requests that miss L2 cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.ALL_DEMAND_MISS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts demand requests that miss L2 cache.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x27",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2_RQSTS.ALL_HWPF",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.ALL_HWPF",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xf0",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "RFO requests to L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.ALL_RFO",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xe2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache hits when fetching instructions, code reads.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.CODE_RD_HIT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xc4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2 cache misses when fetching instructions",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.CODE_RD_MISS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts L2 cache misses when fetching instructions.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x24",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand Data Read requests that hit L2 cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of demand Data Read requests initiated by load instructions that hit L2 cache.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0xc1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand Data Read miss L2 cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts demand Data Read requests with true-miss in the L2 cache. True-miss excludes misses that were merged with ongoing L2 misses. An access is counted once.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x21",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "L2_RQSTS.HWPF_MISS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.HWPF_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x30",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Read requests with true-miss in L2 cache.[This event is alias to L2_REQUEST.MISS]",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x24",
         "EventName": "L2_RQSTS.MISS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts read requests of any type with true-miss in the L2 cache. True-miss excludes L2 misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.MISS]",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x3f",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.REFERENCES",
-        "PEBScounters": "0,1,2,3",
+        "BriefDescription": "All accesses to L2 cache[This event is alias to L2_REQUEST.ALL]",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.REFERENCES",
+        "PublicDescription": "Counts all requests that were hit or true misses in L2 cache. True-miss excludes misses that were merged with ongoing L2 misses.[This event is alias to L2_REQUEST.ALL]",
+        "SampleAfterValue": "200003",
+        "UMask": "0xff",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "RFO requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_HIT",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "RFO requests that miss L2 cache",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.RFO_MISS",
+        "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x22",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "SW prefetch requests that hit L2 cache.",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.SWPF_HIT",
+        "PublicDescription": "Counts Software prefetch requests that hit the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xc8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "SW prefetch requests that miss L2 cache.",
+        "EventCode": "0x24",
+        "EventName": "L2_RQSTS.SWPF_MISS",
+        "PublicDescription": "Counts Software prefetch requests that miss the L2 cache. Accounts for PREFETCHNTA and PREFETCHT0/1/2 instructions when FB is not full.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x28",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that miss in the LLC. Counts on a per core basis.",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts the number of cacheable memory requests that miss in the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x41",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Core-originated cacheable requests that missed L3  (Except hardware prefetches to the L3)",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.MISS",
+        "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2.  It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x41",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of cacheable memory requests that access the LLC. Counts on a per core basis.",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts the number of cacheable memory requests that access the Last Level Cache (LLC). Requests include demand loads, reads for ownership (RFO), instruction fetches and L1 HW prefetches. If the platform has an L3 cache, the LLC is the L3 cache, otherwise it is the L2 cache. Counts on a per core basis.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4f",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
+        "EventCode": "0x2e",
+        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
+        "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches to the L1 and L2.  It does not include hardware prefetches to the L3, and may not count other types of requests to the L3.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x4f",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "SampleAfterValue": "200003",
+        "UMask": "0x38",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_DRAM_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or translation lookaside buffer (TLB) miss which hit in DRAM or MMIO (non-DRAM).",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0xff",
-        "Unit": "cpu_core"
+        "UMask": "0x20",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "RFO requests that hit L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_HIT",
-        "PEBScounters": "0,1,2,3",
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_L2_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the L2 cache.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0xc2",
-        "Unit": "cpu_core"
+        "UMask": "0x8",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "RFO requests that miss L2 cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.RFO_MISS",
-        "PEBScounters": "0,1,2,3",
+        "BriefDescription": "Counts the number of cycles the core is stalled due to an instruction cache or TLB miss which hit in the LLC or other core with HITE/F/M.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.IFETCH_LLC_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to an instruction cache or Translation Lookaside Buffer (TLB) miss which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x22",
-        "Unit": "cpu_core"
+        "UMask": "0x10",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "SW prefetch requests that hit L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.SWPF_HIT",
-        "PEBScounters": "0,1,2,3",
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in the L2, LLC, DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0xc8",
-        "Unit": "cpu_core"
+        "UMask": "0x7",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "SW prefetch requests that miss L2 cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x24",
-        "EventName": "L2_RQSTS.SWPF_MISS",
-        "PEBScounters": "0,1,2,3",
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load miss which hit in DRAM or MMIO (Non-DRAM).",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_DRAM_HIT",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x28",
-        "Unit": "cpu_core"
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Core-originated cacheable requests that missed L3  (Except hardware prefetches to the L3)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x2e",
-        "EventName": "LONGEST_LAT_CACHE.MISS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
-        "SampleAfterValue": "100003",
-        "Speculative": "1",
-        "UMask": "0x41",
-        "Unit": "cpu_core"
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the L2 cache.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_L2_HIT",
+        "SampleAfterValue": "200003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Core-originated cacheable requests that refer to L3 (Except hardware prefetches to the L3)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
-        "EventCode": "0x2e",
-        "EventName": "LONGEST_LAT_CACHE.REFERENCE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
-        "SampleAfterValue": "100003",
-        "Speculative": "1",
-        "UMask": "0x4f",
-        "Unit": "cpu_core"
+        "BriefDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the LLC or other core with HITE/F/M.",
+        "EventCode": "0x34",
+        "EventName": "MEM_BOUND_STALLS.LOAD_LLC_HIT",
+        "PublicDescription": "Counts the number of cycles the core is stalled due to a demand load which hit in the Last Level Cache (LLC) or other core with HITE/F/M.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Retired load instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.ALL_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all retired load instructions. This event accounts for SW prefetch instructions of PREFETCHNTA or PREFETCHT0/1/2 or PREFETCHW.",
         "SampleAfterValue": "1000003",
         "UMask": "0x81",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired store instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.ALL_STORES",
-        "L1_Hit_Indication": "1",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all retired store instructions.",
         "SampleAfterValue": "1000003",
         "UMask": "0x82",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "All retired memory instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.ANY",
-        "L1_Hit_Indication": "1",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all retired memory instructions - loads and stores.",
         "SampleAfterValue": "1000003",
         "UMask": "0x83",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions with locked access.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.LOCK_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions with locked access.",
         "SampleAfterValue": "100007",
         "UMask": "0x21",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions that split across a cacheline boundary.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.SPLIT_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
         "UMask": "0x41",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired store instructions that split across a cacheline boundary.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.SPLIT_STORES",
-        "L1_Hit_Indication": "1",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.",
         "SampleAfterValue": "100003",
         "UMask": "0x42",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions that miss the STLB.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of retired load instructions that (start a) miss in the 2nd-level TLB (STLB).",
         "SampleAfterValue": "100003",
         "UMask": "0x11",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired store instructions that miss the STLB.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd0",
         "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES",
-        "L1_Hit_Indication": "1",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of retired store instructions that (start a) miss in the 2nd-level TLB (STLB).",
         "SampleAfterValue": "100003",
         "UMask": "0x12",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Completed demand load uops that miss the L1 d-cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x43",
         "EventName": "MEM_LOAD_COMPLETED.L1_MISS_ANY",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of completed demand load requests that missed the L1 data cache including shadow misses (FB hits, merge to an ongoing L1D miss)",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xfd",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd2",
         "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_FWD",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
         "SampleAfterValue": "20011",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd2",
         "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
         "SampleAfterValue": "20011",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired load instructions whose data sources were HitM responses from shared L3",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Data_LA": "1",
         "EventCode": "0xd2",
         "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts retired load instructions whose data sources were HitM responses from shared L3.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
+        "SampleAfterValue": "20011",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions whose data sources were hits in L3 without snoops required.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
+        "Data_LA": "1",
+        "EventCode": "0xd2",
+        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache.",
         "SampleAfterValue": "20011",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
+        "Data_LA": "1",
+        "EventCode": "0xd3",
+        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
+        "PEBS": "1",
+        "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
+        "Data_LA": "1",
+        "EventCode": "0xd4",
+        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
+        "PEBS": "1",
+        "PublicDescription": "Retired instructions with at least one load to uncacheable memory-type, or at least one cache-line split locked access (Bus Lock).",
+        "SampleAfterValue": "100007",
+        "UMask": "0x4",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready.",
+        "SampleAfterValue": "100007",
+        "UMask": "0x40",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x8",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with L2 cache hits as data sources.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions missed L2 cache as data sources.",
+        "SampleAfterValue": "100021",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache.",
+        "SampleAfterValue": "100021",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources were L3 hit and cross-core snoop missed in on-pkg core cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "Data_LA": "1",
-        "EventCode": "0xd2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "20011",
+        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "PEBS": "1",
+        "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache.",
+        "SampleAfterValue": "50021",
+        "UMask": "0x20",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in DRAM.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.DRAM_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x80",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L2 cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of load uops retired that hit in the L3 cache.",
+        "Data_LA": "1",
+        "EventCode": "0xd1",
+        "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked for any of the following reasons:  load buffer, store buffer or RSV full.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ALL",
+        "SampleAfterValue": "20003",
+        "UMask": "0x7",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a load buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.LD_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to an RSV full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.RSV",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that uops are blocked due to a store buffer full condition.",
+        "EventCode": "0x04",
+        "EventName": "MEM_SCHEDULER_BLOCK.ST_BUF",
+        "SampleAfterValue": "20003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
+        "EventCode": "0x44",
+        "EventName": "MEM_STORE_RETIRED.L2_HIT",
+        "SampleAfterValue": "200003",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources were hits in L3 without snoops required",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of load uops retired.",
         "Data_LA": "1",
-        "EventCode": "0xd2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100003",
-        "UMask": "0x8",
-        "Unit": "cpu_core"
+        "PublicDescription": "Counts the total number of load uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x81",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions whose data sources were L3 and cross-core snoop hits in on-pkg core cache",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of store uops retired.",
         "Data_LA": "1",
-        "EventCode": "0xd2",
-        "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NO_FWD",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.ALL_STORES",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "20011",
-        "UMask": "0x2",
-        "Unit": "cpu_core"
+        "PublicDescription": "Counts the total number of store uops retired.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x82",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions which data sources missed L3 but serviced from local dram",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd3",
-        "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100007",
-        "UMask": "0x1",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_128",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x80",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 128 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd4",
-        "EventName": "MEM_LOAD_MISC_RETIRED.UC",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100007",
-        "UMask": "0x4",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_16",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x10",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 16 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Number of completed demand load requests that missed the L1, but hit the FB(fill buffer), because a preceding miss to the same cacheline initiated the line to be brought into L1, but data is not yet ready in L1.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.FB_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100007",
-        "UMask": "0x40",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_256",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x100",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 256 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions with L1 cache hits as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L1_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_32",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x20",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 32 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
         "SampleAfterValue": "1000003",
-        "UMask": "0x1",
-        "Unit": "cpu_core"
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions missed L1 cache as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L1_MISS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "200003",
-        "UMask": "0x8",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_4",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x4",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 4 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions with L2 cache hits as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L2_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "200003",
-        "UMask": "0x2",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_512",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x200",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 512 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions missed L2 cache as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L2_MISS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100021",
-        "UMask": "0x10",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_64",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x40",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 64 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions with L3 cache hits as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L3_HIT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "100021",
-        "UMask": "0x4",
-        "Unit": "cpu_core"
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.LOAD_LATENCY_GT_8",
+        "MSRIndex": "0x3F6",
+        "MSRValue": "0x8",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of tagged loads with an instruction latency that exceeds or equals the threshold of 8 cycles as defined in MEC_CR_PEBS_LD_LAT_THRESHOLD (3F6H). Only counts with PEBS enabled. If a PEBS record is generated, will populate the PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x5",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Retired load instructions missed L3 cache as data sources",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "BriefDescription": "Counts the number of retired split load uops.",
         "Data_LA": "1",
-        "EventCode": "0xd1",
-        "EventName": "MEM_LOAD_RETIRED.L3_MISS",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "50021",
-        "UMask": "0x20",
-        "Unit": "cpu_core"
+        "SampleAfterValue": "200003",
+        "UMask": "0x41",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "MEM_STORE_RETIRED.L2_HIT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "EventCode": "0x44",
-        "EventName": "MEM_STORE_RETIRED.L2_HIT",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "200003",
-        "UMask": "0x1",
-        "Unit": "cpu_core"
+        "BriefDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled.",
+        "Data_LA": "1",
+        "EventCode": "0xd0",
+        "EventName": "MEM_UOPS_RETIRED.STORE_LATENCY",
+        "PEBS": "2",
+        "PublicDescription": "Counts the number of stores uops retired. Counts with or without PEBS enabled. If PEBS is enabled and a PEBS record is generated, will populate PEBS Latency and PEBS Data Source fields accordingly.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6",
+        "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Retired memory uops for any access",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe5",
         "EventName": "MEM_UOP_RETIRED.ANY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of retired micro-operations (uops) for load or store memory accesses",
         "SampleAfterValue": "1000003",
         "UMask": "0x3",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand data reads that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM",
         "MSRIndex": "0x1a6,0x1a7",
@@ -1181,9 +874,28 @@
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, but no data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x4003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were supplied by the L3 cache where a snoop was sent, the snoop hit, and non-modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x8003C0001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD",
         "MSRIndex": "0x1a6,0x1a7",
@@ -1192,9 +904,28 @@
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F803C0002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were supplied by the L3 cache where a snoop was sent, the snoop hit, and modified data was forwarded.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10003C0002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that resulted in a snoop hit in another cores caches, data forwarding is required as the data is modified.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_RFO.L3_HIT.SNOOP_HITM",
         "MSRIndex": "0x1a6,0x1a7",
@@ -1205,139 +936,111 @@
     },
     {
         "BriefDescription": "OFFCORE_REQUESTS.ALL_REQUESTS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x21",
         "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand and prefetch data reads",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x21",
         "EventName": "OFFCORE_REQUESTS.DATA_RD",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Demand Data Read requests sent to uncore",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x21",
         "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
+        "Deprecated": "1",
         "Errata": "ADL038",
         "EventCode": "0x20",
         "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "Errata": "ADL038",
         "EventCode": "0x20",
         "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "For every cycle where the core is waiting on at least 1 outstanding Demand RFO request, increments by 1.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x20",
         "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "Errata": "ADL038",
         "EventCode": "0x20",
         "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DATA_RD",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of PREFETCHNTA instructions executed.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x40",
         "EventName": "SW_PREFETCH_ACCESS.NTA",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHNTA instructions executed.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of PREFETCHW instructions executed.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x40",
         "EventName": "SW_PREFETCH_ACCESS.PREFETCHW",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHW instructions executed.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of PREFETCHT0 instructions executed.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x40",
         "EventName": "SW_PREFETCH_ACCESS.T0",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHT0 instructions executed.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x40",
         "EventName": "SW_PREFETCH_ACCESS.T1_T2",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of PREFETCHT1 or PREFETCHT2 instructions executed.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to instruction cache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ICACHE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20",
+        "Unit": "cpu_atom"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
index 48a4605fc057d..3eb7cab9b431a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/floating-point.json
@@ -1,165 +1,124 @@
 [
-    {
-        "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.FP_ASSIST",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "UOPS_RETIRED.FPDIV",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x8",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "ARITH.FPDIV_ACTIVE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xb0",
         "EventName": "ARITH.FPDIV_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts all microcode FP assists.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc1",
         "EventName": "ASSISTS.FP",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all microcode Floating Point assists.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "ASSISTS.SSE_AVX_MIX",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc1",
         "EventName": "ASSISTS.SSE_AVX_MIX",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "FP_ARITH_DISPATCHED.PORT_0",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_0",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "FP_ARITH_DISPATCHED.PORT_1",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "FP_ARITH_DISPATCHED.PORT_5",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb3",
         "EventName": "FP_ARITH_DISPATCHED.PORT_5",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 2 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP14 RSQRT14 SQRT DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 4 computation operations, one for each element.  Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 8 computation operations, one for each element.  Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB HADD HSUB SUBADD MUL DIV MIN MAX SQRT RSQRT RCP DPP FM(N)ADD/SUB.  DPP and FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc7",
         "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired; some instructions will count twice as noted below.  Each count represents 1 computational operation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT RSQRT RCP FM(N)ADD/SUB.  FM(N)ADD/SUB instructions count twice as they perform 2 calculations per element. The DAZ and FTZ flags in the MXCSR register need to be set when using these events.",
         "SampleAfterValue": "100003",
         "UMask": "0x2",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point operations retired that required microcode assist.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.FP_ASSIST",
+        "PublicDescription": "Counts the number of floating point operations retired that required microcode assist, which is not a reflection of the number of FP operations, instructions or uops.",
+        "SampleAfterValue": "20003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of floating point divide uops retired (x87 and SSE, including x87 sqrt).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.FPDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x8",
+        "Unit": "cpu_atom"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
index da1a7ba0e5681..250cd128b6749 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/frontend.json
@@ -1,536 +1,416 @@
 [
     {
         "BriefDescription": "Counts the total number of BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0xe6",
         "EventName": "BACLEARS.ANY",
-        "PEBScounters": "0,1,2,3,4,5",
+        "PublicDescription": "Counts the total number of BACLEARS, which occur when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend.  Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_atom"
     },
-    {
-        "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x80",
-        "EventName": "ICACHE.ACCESSES",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x3",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of instruction cache misses.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x80",
-        "EventName": "ICACHE.MISSES",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "Stalls caused by changing prefix length of the instruction.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x87",
         "EventName": "DECODE.LCP",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.",
         "SampleAfterValue": "500009",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles the Microcode Sequencer is busy.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x87",
         "EventName": "DECODE.MS_BUSY",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "500009",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "DSB-to-MITE switch true penalty cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x61",
         "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Decode Stream Buffer (DSB) is a Uop-cache that holds translations of previously fetched instructions that were decoded by the legacy x86 decode pipeline (MITE). This event counts fetch penalty cycles when a transition occurs from DSB to MITE.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced DSB miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ANY_DSB_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x1",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced a critical DSB miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.DSB_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x11",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of retired Instructions that experienced a critical DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. Critical means stalls were exposed to the back-end as a result of the DSB miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced iTLB true miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.ITLB_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x14",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.L1I_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x12",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L1 Cache true miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.L2_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x13",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions who experienced Instruction L2 Cache true miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions after front-end starvation of at least 1 cycle",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_1",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x600106",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 1 cycle which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_128",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x608006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_16",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x601006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions after front-end starvation of at least 2 cycles",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_2",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x600206",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of at least 2 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_256",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x610006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x100206",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_32",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x602006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_4",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x600406",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_512",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x620006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_64",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x604006",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 8 cycles which was not interrupted by a back-end stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.LATENCY_GE_8",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x600806",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "FRONTEND_RETIRED.MS_FLOWS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.MS_FLOWS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x8",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.STLB_MISS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x15",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc6",
         "EventName": "FRONTEND_RETIRED.UNKNOWN_BRANCH",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x17",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of requests to the instruction cache for one or more bytes of a cache line.",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.ACCESSES",
+        "PublicDescription": "Counts the total number of requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line or byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x3",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of instruction cache misses.",
+        "EventCode": "0x80",
+        "EventName": "ICACHE.MISSES",
+        "PublicDescription": "Counts the number of missed requests to the instruction cache.  The event only counts new cache line accesses, so that multiple back to back fetches to the exact same cache line and byte chunk count as one.  Specifically, the event counts when accesses from sequential code crosses the cache line boundary, or when a branch target is moved to a new line or to a non-sequential byte chunk of the same line.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x80",
         "EventName": "ICACHE_DATA.STALLS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles where a code line fetch is stalled due to an L1 instruction cache miss. The decode pipeline works at a 32 Byte granularity.",
         "SampleAfterValue": "500009",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x83",
         "EventName": "ICACHE_TAG.STALLS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles where a code fetch is stalled due to L1 instruction cache tag miss.",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x79",
         "EventName": "IDQ.DSB_CYCLES_ANY",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles DSB is delivering optimal number of Uops",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "6",
         "EventCode": "0x79",
         "EventName": "IDQ.DSB_CYCLES_OK",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x79",
         "EventName": "IDQ.DSB_UOPS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles MITE is delivering any Uop",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x79",
         "EventName": "IDQ.MITE_CYCLES_ANY",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles MITE is delivering optimal number of Uops",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "6",
         "EventCode": "0x79",
         "EventName": "IDQ.MITE_CYCLES_OK",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of cycles where optimal number of uops was delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x79",
         "EventName": "IDQ.MITE_UOPS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when uops are being delivered to IDQ while MS is busy",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x79",
         "EventName": "IDQ.MS_CYCLES_ANY",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of switches from DSB or MITE to the MS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EdgeDetect": "1",
         "EventCode": "0x79",
         "EventName": "IDQ.MS_SWITCHES",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops delivered to IDQ while MS is busy",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x79",
         "EventName": "IDQ.MS_UOPS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS).",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops not delivered by IDQ when backend of the machine is not stalled",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x9c",
         "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of uops not delivered to by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when no uops are not delivered by the IDQ when backend of the machine is not stalled",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "6",
         "EventCode": "0x9c",
         "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of cycles when no uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when optimal number of uops was delivered to the back-end when the back-end is not stalled",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0x9c",
         "EventName": "IDQ_UOPS_NOT_DELIVERED.CYCLES_FE_WAS_OK",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of cycles when the optimal number of uops were delivered by the Instruction Decode Queue (IDQ) to the back-end of the pipeline when there was no back-end stalls. This event counts for one SMT thread in a given cycle.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     }
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/memory.json b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
index f894e4a0212b2..7595eb4ab46f0 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/memory.json
@@ -1,339 +1,234 @@
 [
+    {
+        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
+        "CounterMask": "6",
+        "EventCode": "0xa3",
+        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x6",
+        "Unit": "cpu_core"
+    },
     {
         "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to any number of reasons, including an L1 miss, WCB full, pagewalk, store address block or store data block, on a load that retires.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0x05",
         "EventName": "LD_HEAD.ANY_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xff",
         "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer is stalled due to a core bound stall including a store address match, a DTLB miss or a page walk that detains the load from retiring.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0x05",
         "EventName": "LD_HEAD.L1_BOUND_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xf4",
         "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0x05",
         "EventName": "LD_HEAD.OTHER_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
+        "PublicDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to other block cases such as pipeline conflicts, fences, etc.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xc0",
         "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a pagewalk.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0x05",
         "EventName": "LD_HEAD.PGWALK_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xa0",
         "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a store address match.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0x05",
         "EventName": "LD_HEAD.ST_ADDR_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x84",
         "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Counts the number of machine clears due to memory ordering caused by a snoop from an external agent. Does not count internally generated machine clears such as those due to memory disambiguation.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
         "EventCode": "0xc3",
         "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
-        "PEBScounters": "0,1,2,3,4,5",
         "SampleAfterValue": "20003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_atom"
     },
-    {
-        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84400001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84400001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84400002",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x3F84400002",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
-        "CounterMask": "6",
-        "EventCode": "0xa3",
-        "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS",
-        "PEBScounters": "0,1,2,3",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x6",
-        "Unit": "cpu_core"
-    },
     {
         "BriefDescription": "Number of machine clears due to memory ordering conflicts.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc3",
         "EventName": "MACHINE_CLEARS.MEMORY_ORDERING",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of Machine Clears detected dye to memory ordering. Memory Ordering Machine Clears may apply when a memory read may not conform to the memory ordering rules of the x86 architecture",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "2",
         "EventCode": "0x47",
         "EventName": "MEMORY_ACTIVITY.CYCLES_L1D_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "3",
         "EventCode": "0x47",
         "EventName": "MEMORY_ACTIVITY.STALLS_L1D_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x3",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "MEMORY_ACTIVITY.STALLS_L2_MISS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "5",
         "EventCode": "0x47",
         "EventName": "MEMORY_ACTIVITY.STALLS_L2_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x5",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "MEMORY_ACTIVITY.STALLS_L3_MISS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "9",
         "EventCode": "0x47",
         "EventName": "MEMORY_ACTIVITY.STALLS_L3_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x9",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_128",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x80",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 128 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "1009",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_16",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x10",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 16 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "20011",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_256",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x100",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 256 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "503",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_32",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x20",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 32 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100007",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_4",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x4",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 4 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "100003",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_512",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x200",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 512 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "101",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_64",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x40",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 64 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "2003",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "1,2,3,4,5,6,7",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.LOAD_LATENCY_GT_8",
         "MSRIndex": "0x3F6",
         "MSRValue": "0x8",
         "PEBS": "2",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts randomly selected loads when the latency from first dispatch to completion is greater than 8 cycles.  Reported latency may be longer than just the memory latency.",
         "SampleAfterValue": "50021",
-        "TakenAlone": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired memory store access operations. A PDist event for PEBS Store Latency Facility.",
-        "CollectPEBSRecord": "2",
         "Data_LA": "1",
         "EventCode": "0xcd",
         "EventName": "MEM_TRANS_RETIRED.STORE_SAMPLE",
         "PEBS": "2",
+        "PublicDescription": "Counts Retired memory accesses with at least 1 store operation. This PEBS event is the precisely-distributed (PDist) trigger covering all stores uops for sampling by the PEBS Store Latency Facility. The facility is described in Intel SDM Volume 3 section 19.9.8",
         "SampleAfterValue": "1000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.L3_MISS",
         "MSRIndex": "0x1a6,0x1a7",
@@ -342,9 +237,28 @@
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts demand data reads that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_RFO.L3_MISS",
         "MSRIndex": "0x1a6,0x1a7",
@@ -352,5 +266,33 @@
         "SampleAfterValue": "100003",
         "UMask": "0x1",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that were not supplied by the L3 cache.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.L3_MISS_LOCAL",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x3F84400002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Demand Data Read requests who miss L3 cache",
+        "EventCode": "0x21",
+        "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "Demand Data Read requests who miss L3 cache.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.",
+        "EventCode": "0x20",
+        "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "PublicDescription": "For every cycle, increments by the number of demand data read requests pending that are known to have missed the L3 cache.  Note that this does not capture all elapsed cycles while requests are outstanding - only cycles from when the requests were known by the requesting core to have missed the L3 cache.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_core"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/other.json b/tools/perf/pmu-events/arch/x86/alderlake/other.json
index c49d8ce273100..329c611d7cf7a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/other.json
@@ -1,111 +1,66 @@
 [
-    {
-        "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.COREWB_M.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10008",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand data reads that have any type of response.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10001",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10002",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts streaming stores that have any type of response.",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xB7",
-        "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
-        "MSRIndex": "0x1a6,0x1a7",
-        "MSRValue": "0x10800",
-        "SampleAfterValue": "100003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "ASSISTS.HARDWARE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc1",
         "EventName": "ASSISTS.HARDWARE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "ASSISTS.PAGE_FAULT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc1",
         "EventName": "ASSISTS.PAGE_FAULT",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "CORE_POWER.LICENSE_1",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LICENSE_1",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "CORE_POWER.LICENSE_2",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LICENSE_2",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "CORE_POWER.LICENSE_3",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x28",
         "EventName": "CORE_POWER.LICENSE_3",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "200003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts modified writebacks from L1 cache and L2 cache that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.COREWB_M.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10008",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts demand data reads that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10001",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand data reads that have any type of response.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
@@ -116,7 +71,6 @@
     },
     {
         "BriefDescription": "Counts demand data reads that were supplied by DRAM.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_DATA_RD.DRAM",
         "MSRIndex": "0x1a6,0x1a7",
@@ -125,9 +79,18 @@
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts demand reads for ownership (RFO) and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
+        "EventCode": "0xB7",
+        "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10002",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Counts demand read for ownership (RFO) requests and software prefetches for exclusive ownership (PREFETCHW) that have any type of response.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.DEMAND_RFO.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
@@ -138,7 +101,16 @@
     },
     {
         "BriefDescription": "Counts streaming stores that have any type of response.",
-        "Counter": "0,1,2,3,4,5,6,7",
+        "EventCode": "0xB7",
+        "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
+        "MSRIndex": "0x1a6,0x1a7",
+        "MSRValue": "0x10800",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts streaming stores that have any type of response.",
         "EventCode": "0x2A,0x2B",
         "EventName": "OCR.STREAMING_WR.ANY_RESPONSE",
         "MSRIndex": "0x1a6,0x1a7",
@@ -149,68 +121,52 @@
     },
     {
         "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa5",
         "EventName": "RS.EMPTY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for this logical processor. This is usually caused when the front-end pipeline runs into starvation periods (e.g. branch mispredictions or i-cache misses)",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x7",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EdgeDetect": "1",
         "EventCode": "0xa5",
         "EventName": "RS.EMPTY_COUNT",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to closely sample on front-end latency issues (see the FRONTEND_RETIRED event of designated precise events)",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x7",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY_COUNT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EdgeDetect": "1",
         "EventCode": "0xa5",
         "EventName": "RS_EMPTY.COUNT",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x7",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event RS.EMPTY",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
+        "Deprecated": "1",
         "EventCode": "0xa5",
         "EventName": "RS_EMPTY.CYCLES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x7",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "XQ.FULL_CYCLES",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x2d",
         "EventName": "XQ.FULL_CYCLES",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     }
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
index 1a137f7f8b7e8..f46fa7ba168ac 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/pipeline.json
@@ -1,2168 +1,1634 @@
 [
-    {
-        "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf9",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.COND",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x7e",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.COND_TAKEN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xbf",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.INDIRECT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xeb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.IND_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.JCC",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x7e",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.NEAR_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf9",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of near RET branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf7",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xeb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.REL_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfd",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.RETURN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf7",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc4",
-        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.COND",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x7e",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.INDIRECT",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xeb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.IND_CALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.JCC",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0x7e",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xeb",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.RETURN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xf7",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc5",
-        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "UMask": "0xfe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 1",
-        "EventName": "CPU_CLK_UNHALTED.CORE",
-        "PEBScounters": "33",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted core clock cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x3c",
-        "EventName": "CPU_CLK_UNHALTED.CORE_P",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 2",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-        "PEBScounters": "34",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x3",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x3c",
-        "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 1",
-        "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "PEBScounters": "33",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of unhalted core clock cycles.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x3c",
-        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 0",
-        "EventName": "INST_RETIRED.ANY",
-        "PEBS": "1",
-        "PEBScounters": "32",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc0",
-        "EventName": "INST_RETIRED.ANY_P",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.4K_ALIAS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x03",
-        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x8",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of machines clears due to memory renaming.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.MRN_NUKE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x80",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of machine clears due to a page fault.  Counts both I-Side and D-Side (Loads/Stores) page faults.  A page fault occurs when either the page is not present, or an access violation occurs.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x20",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.SLOW",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x6f",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.SMC",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "20003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x75",
-        "EventName": "SERIALIZATION.NON_C01_MS_SCB",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x73",
-        "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x73",
-        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x73",
-        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x3",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x73",
-        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x73",
-        "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.ALL",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x8",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x20",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x40",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x74",
-        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x10",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.ALL",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x40",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.CISC",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.DECODE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x8",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x8d",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x72",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.ITLB",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x10",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.OTHER",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x80",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x71",
-        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x4",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of consumed retirement slots.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "TOPDOWN_RETIRING.ALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the total number of uops retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "UOPS_RETIRED.ALL",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of integer divide uops retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "UOPS_RETIRED.IDIV",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x10",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "UOPS_RETIRED.MS",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0xc2",
-        "EventName": "UOPS_RETIRED.X87",
-        "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "UMask": "0x2",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "This event is deprecated. Refer to new event ARITH.DIV_ACTIVE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EventCode": "0xb0",
         "EventName": "ARITH.DIVIDER_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x9",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xb0",
         "EventName": "ARITH.DIV_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x9",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event ARITH.FPDIV_ACTIVE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EventCode": "0xb0",
         "EventName": "ARITH.FP_DIVIDER_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event counts the cycles the integer divider is busy.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb0",
         "EventName": "ARITH.IDIV_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event ARITH.IDIV_ACTIVE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EventCode": "0xb0",
         "EventName": "ARITH.INT_DIVIDER_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of occurrences where a microcode assist is invoked by hardware.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc1",
         "EventName": "ASSISTS.ANY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of occurrences where a microcode assist is invoked by hardware. Examples include AD (page Access Dirty), FP and AVX related assists.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1b",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the total number of branch instructions retired for all branch types.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions in which the instruction pointer (IP) of the processor is resteered due to a branch instruction and the branch instruction successfully retires.  All branch type instructions are accounted for.",
+        "SampleAfterValue": "200003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "All branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.ALL_BRANCHES",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all branch instructions retired.",
         "SampleAfterValue": "400009",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of retired JCC (Jump on Conditional Code) branch instructions retired, includes both taken and not taken branches.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Conditional branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.COND",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x11",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Not taken branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.COND_NTAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts not taken branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Taken conditional branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.COND_TAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of far branch instructions retired, includes far jump, far call and return, and interrupt call and return.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.FAR_BRANCH",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xbf",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Far branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.FAR_BRANCH",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts far branch instructions retired.",
         "SampleAfterValue": "100007",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Indirect near branch instructions retired (excluding returns)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.INDIRECT",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts near indirect branch instructions retired excluding returns. TSX abort is an indirect branch.",
         "SampleAfterValue": "100003",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near indirect CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of near CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf9",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Direct and indirect near call instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.NEAR_CALL",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts both direct and indirect near call instructions retired.",
         "SampleAfterValue": "100007",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of near RET branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NEAR_RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Return instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.NEAR_RETURN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts return instructions retired.",
         "SampleAfterValue": "100007",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Taken branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc4",
         "EventName": "BR_INST_RETIRED.NEAR_TAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.INDIRECT",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.NON_RETURN_IND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of near relative CALL branch instructions retired.",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.REL_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfd",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.NEAR_RETURN",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_INST_RETIRED.COND_TAKEN",
+        "Deprecated": "1",
+        "EventCode": "0xc4",
+        "EventName": "BR_INST_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of mispredicted branch instructions retired for all branch types.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of mispredicted branch instructions retired.  All branch type instructions are accounted for.  Prediction of the branch target address enables the processor to begin executing instructions before the non-speculative execution path is known. The branch prediction unit (BPU) predicts the target address based on the instruction pointer (IP) of the branch and on the execution path through which execution reached this IP.    A branch misprediction occurs when the prediction is wrong, and results in discarding all instructions executed in the speculative path and re-fetching from the correct path.",
+        "SampleAfterValue": "200003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "All mispredicted branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.ALL_BRANCHES",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch.  When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.",
         "SampleAfterValue": "400009",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Mispredicted conditional branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.COND",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts mispredicted conditional branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x11",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Mispredicted non-taken conditional branch instructions retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.COND_NTAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of conditional branch instructions retired that were mispredicted and the branch direction was not taken.",
         "SampleAfterValue": "400009",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted taken JCC (Jump on Conditional Code) branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.COND_TAKEN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "number of branch instructions retired that were mispredicted and taken.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.COND_TAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts taken conditional mispredicted branch instructions retired.",
         "SampleAfterValue": "400009",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect JMP and near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of mispredicted near indirect CALL branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Mispredicted indirect CALL retired.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.INDIRECT_CALL",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts retired mispredicted indirect (near taken) CALL instructions, including both register and memory indirect.",
         "SampleAfterValue": "400009",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT_CALL",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.IND_CALL",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfb",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0x7e",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.NEAR_TAKEN",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts number of near branch instructions retired that were mispredicted and taken.",
         "SampleAfterValue": "400009",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.INDIRECT",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.NON_RETURN_IND",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xeb",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc5",
         "EventName": "BR_MISP_RETIRED.RET",
         "PEBS": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.",
         "SampleAfterValue": "100007",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of mispredicted near RET branch instructions retired.",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.RETURN",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xf7",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event BR_MISP_RETIRED.COND_TAKEN",
+        "Deprecated": "1",
+        "EventCode": "0xc5",
+        "EventName": "BR_MISP_RETIRED.TAKEN_JCC",
+        "PEBS": "1",
+        "SampleAfterValue": "200003",
+        "UMask": "0xfe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.C01",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 light-weight slower wakeup time but more power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.C02",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.2 light-weight faster wakeup time but less power saving optimized state.  This state can be entered via the TPAUSE or UMWAIT instructions.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Core clocks when the thread is in the C0.1 or C0.2 or running a PAUSE in C0 ACPI state.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.C0_WAIT",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core clocks when the thread is in the C0.1 or C0.2 power saving optimized states (TPAUSE or UMWAIT instructions) or running the PAUSE instruction.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x70",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.CORE",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.CORE_P",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Cycle counts are evenly distributed between active threads in the Core.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.DISTRIBUTED",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event distributes cycle counts between active hyperthreads, i.e., those in C0.  A hyperthread becomes inactive when it executes the HLT or MWAIT instructions.  If all other hyperthreads are inactive (or disabled or do not exist), all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts Core crystal clock cycles when current thread is unhalted and the other thread is halted.",
         "SampleAfterValue": "25003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "CPU_CLK_UNHALTED.PAUSE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.PAUSE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "CPU_CLK_UNHALTED.PAUSE_INST",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EdgeDetect": "1",
         "EventCode": "0xec",
         "EventName": "CPU_CLK_UNHALTED.PAUSE_INST",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Core crystal clock cycles. Cycle counts are evenly distributed between active threads in the Core.",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.REF_DISTRIBUTED",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event distributes Core crystal clock cycle counts between active hyperthreads, i.e., those in C0 sleep-state. A hyperthread becomes inactive when it executes the HLT or MWAIT instructions. If one thread is active in a core, all counts are attributed to this hyperthread. To obtain the full count when the Core is active, sum the counts from each hyperthread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses fixed counter 2.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x3",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 2",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC",
-        "PEBScounters": "34",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x3",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted reference clock cycles at TSC frequency.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
+        "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. This event is not affected by core frequency changes and increments at a fixed frequency that is also used for the Time Stamp Counter (TSC). This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Reference cycles when the core is not in halt state.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.REF_TSC_P",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'.  The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'.  After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles. (Fixed event)",
+        "EventName": "CPU_CLK_UNHALTED.THREAD",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time.  This event uses fixed counter 1.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Core cycles when the thread is not in halt state",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 1",
         "EventName": "CPU_CLK_UNHALTED.THREAD",
-        "PEBScounters": "33",
+        "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the eight programmable counters available for other events.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of unhalted core clock cycles.",
+        "EventCode": "0x3c",
+        "EventName": "CPU_CLK_UNHALTED.THREAD_P",
+        "PublicDescription": "Counts the number of core cycles while the core is not in a halt state.  The core enters the halt state when it is running the HLT instruction. The core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Thread cycles when thread is not in halt state",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0x3c",
         "EventName": "CPU_CLK_UNHALTED.THREAD_P",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "8",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles while memory subsystem has an outstanding load.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "16",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "12",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0xc",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "5",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x5",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Total execution stalls.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "4",
         "EventCode": "0xa3",
         "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles total of 1 uop is executed on all ports and Reservation Station was not empty.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.1_PORTS_UTIL",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles total of 2 uops are executed on all ports and Reservation Station was not empty.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.2_PORTS_UTIL",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station was not empty.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.3_PORTS_UTIL",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station was not empty.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.4_PORTS_UTIL",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "5",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.BOUND_ON_LOADS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x21",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where the Store Buffer was full and no loads caused an execution stall.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "2",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.BOUND_ON_STORES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles where the Store Buffer was full and no loads caused an execution stall.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles no uop executed while RS was not empty, the SB was not full and there was no outstanding load.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa6",
         "EventName": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of cycles total of 0 uops executed on all ports, Reservation Station (RS) was not empty, the Store Buffer (SB) was not full and there was no outstanding load.",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Instruction decoders utilized in a cycle",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x75",
         "EventName": "INST_DECODED.DECODERS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Number of decoders utilized in a cycle when the MITE (legacy decode pipeline) fetches instructions.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the total number of instructions retired. (Fixed event)",
+        "EventName": "INST_RETIRED.ANY",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Number of instructions retired. Fixed Counter - architectural event",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 0",
         "EventName": "INST_RETIRED.ANY",
         "PEBS": "1",
-        "PEBScounters": "32",
+        "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the total number of instructions retired.",
+        "EventCode": "0xc0",
+        "EventName": "INST_RETIRED.ANY_P",
+        "PEBS": "1",
+        "PublicDescription": "Counts the total number of instructions that retired. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. This event continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses a programmable general purpose performance counter.",
+        "SampleAfterValue": "2000003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Number of instructions retired. General Counter - architectural event",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.ANY_P",
         "PEBS": "1",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of X86 instructions retired - an Architectural PerfMon event. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter freeing up programmable counters to count other events. INST_RETIRED.ANY_P is counted by a programmable counter.",
         "SampleAfterValue": "2000003",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INST_RETIRED.MACRO_FUSED",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.MACRO_FUSED",
-        "PEBScounters": "1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired NOP instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.NOP",
-        "PEBScounters": "1,2,3,4,5,6,7",
+        "PublicDescription": "Counts all retired NOP or ENDBR32/64 instructions",
         "SampleAfterValue": "2000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Precise instruction retired with PEBS precise-distribution",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 0",
         "EventName": "INST_RETIRED.PREC_DIST",
         "PEBS": "1",
-        "PEBScounters": "32",
+        "PublicDescription": "A version of INST_RETIRED that allows for a precise distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR++) feature to fix bias in how retired instructions get sampled. Use on Fixed Counter 0.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INST_RETIRED.REP_ITERATION",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc0",
         "EventName": "INST_RETIRED.REP_ITERATION",
-        "PEBScounters": "1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xad",
         "EventName": "INT_MISC.CLEAR_RESTEER_CYCLES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles after recovery from a branch misprediction or machine clear till the first uop is issued from the resteered path.",
         "SampleAfterValue": "500009",
-        "Speculative": "1",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xad",
         "EventName": "INT_MISC.RECOVERY_CYCLES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts core cycles when the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.",
         "SampleAfterValue": "500009",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xad",
         "EventName": "INT_MISC.UNKNOWN_BRANCH_CYCLES",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x7",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "TakenAlone": "1",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TMA slots where uops got dropped",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xad",
         "EventName": "INT_MISC.UOP_DROPPING",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Estimated number of Top-down Microarchitecture Analysis slots that got dropped due to non front-end reasons",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.128BIT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.128BIT",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x13",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.256BIT",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.256BIT",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0xac",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "integer ADD, SUB, SAD 128-bit vector instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.ADD_128",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 128-bit vector instructions.",
         "SampleAfterValue": "1000003",
         "UMask": "0x3",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "integer ADD, SUB, SAD 256-bit vector instructions.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.ADD_256",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of retired integer ADD/SUB (regular or horizontal), SAD 256-bit vector instructions.",
         "SampleAfterValue": "1000003",
         "UMask": "0xc",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.MUL_256",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.MUL_256",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.SHUFFLES",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.SHUFFLES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.VNNI_128",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.VNNI_128",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "INT_VEC_RETIRED.VNNI_256",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe7",
         "EventName": "INT_VEC_RETIRED.VNNI_256",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "This event is deprecated. Refer to new event LD_BLOCKS.ADDRESS_ALIAS",
+        "Deprecated": "1",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.4K_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because it initially appears to be store forward blocked, but subsequently is shown not to be blocked based on 4K alias check.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "False dependencies in MOB due to partial compare on address.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.ADDRESS_ALIAS",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times a load got blocked due to false dependencies in MOB due to partial compare on address.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of retired loads that are blocked because its address exactly matches an older store whose data is not ready.",
+        "EventCode": "0x03",
+        "EventName": "LD_BLOCKS.DATA_UNKNOWN",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.NO_SR",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x88",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Loads blocked due to overlapping with a preceding store that cannot be forwarded.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x03",
         "EventName": "LD_BLOCKS.STORE_FORWARD",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of times where store forwarding was prevented for a load operation. The most common case is a load blocked due to the address of memory access (partially) overlapping with a preceding uncompleted store. Note: See the table of not supported store forwards in the Optimization Guide.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x82",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts the number of demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x4c",
         "EventName": "LOAD_HIT_PREFETCH.SWPF",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xa8",
         "EventName": "LSD.CYCLES_ACTIVE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles optimal number of Uops delivered by the LSD, but did not come from the decoder.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "6",
         "EventCode": "0xa8",
         "EventName": "LSD.CYCLES_OK",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the cycles when optimal number of uops is delivered by the LSD (Loop-stream detector).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
-        "BriefDescription": "Number of Uops delivered by the LSD.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
-        "EventCode": "0xa8",
-        "EventName": "LSD.UOPS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_core"
+        "BriefDescription": "Number of Uops delivered by the LSD.",
+        "EventCode": "0xa8",
+        "EventName": "LSD.UOPS",
+        "PublicDescription": "Counts the number of uops delivered to the back-end by the LSD(Loop Stream Detector).",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Number of machine clears (nukes) of any type.",
+        "CounterMask": "1",
+        "EdgeDetect": "1",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.COUNT",
+        "PublicDescription": "Counts the number of machine clears (nukes) of any type.",
+        "SampleAfterValue": "100003",
+        "UMask": "0x1",
+        "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to memory ordering in which an internal load passes an older store within the same CPU.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.DISAMBIGUATION",
+        "SampleAfterValue": "20003",
+        "UMask": "0x8",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of machines clears due to memory renaming.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.MRN_NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to a page fault.  Counts both I-Side and D-Side (Loads/Stores) page faults.  A page fault occurs when either the page is not present, or an access violation occurs.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.PAGE_FAULT",
+        "SampleAfterValue": "20003",
+        "UMask": "0x20",
+        "Unit": "cpu_atom"
     },
     {
-        "BriefDescription": "Number of machine clears (nukes) of any type.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
-        "CounterMask": "1",
-        "EdgeDetect": "1",
+        "BriefDescription": "Counts the number of machine clears that flush the pipeline and restart the machine with the use of microcode due to SMC, MEMORY_ORDERING, FP_ASSISTS, PAGE_FAULT, DISAMBIGUATION, and FPC_VIRTUAL_TRAP.",
         "EventCode": "0xc3",
-        "EventName": "MACHINE_CLEARS.COUNT",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
-        "SampleAfterValue": "100003",
-        "Speculative": "1",
+        "EventName": "MACHINE_CLEARS.SLOW",
+        "SampleAfterValue": "20003",
+        "UMask": "0x6f",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of machine clears due to program modifying data (self modifying code) within 1K of a recently fetched code page.",
+        "EventCode": "0xc3",
+        "EventName": "MACHINE_CLEARS.SMC",
+        "SampleAfterValue": "20003",
         "UMask": "0x1",
-        "Unit": "cpu_core"
+        "Unit": "cpu_atom"
     },
     {
         "BriefDescription": "Self-modifying code (SMC) detected.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc3",
         "EventName": "MACHINE_CLEARS.SMC",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "MISC2_RETIRED.LFENCE",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xe0",
         "EventName": "MISC2_RETIRED.LFENCE",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "400009",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Increments whenever there is an update to the LBR array.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xcc",
         "EventName": "MISC_RETIRED.LBR_INSERTS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.",
         "SampleAfterValue": "100003",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa2",
         "EventName": "RESOURCE_STALLS.SB",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts cycles where the pipeline is stalled due to serializing operations.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa2",
         "EventName": "RESOURCE_STALLS.SCOREBOARD",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires.",
+        "EventCode": "0x75",
+        "EventName": "SERIALIZATION.NON_C01_MS_SCB",
+        "PublicDescription": "Counts the number of issue slots not consumed by the backend due to a micro-sequencer (MS) scoreboard, which stalls the front-end from issuing from the UROM until a specified older uop retires. The most commonly executed instruction with an MS scoreboard is PAUSE.",
+        "SampleAfterValue": "200003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "TMA slots where no uops were being issued due to lack of back-end resources.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa4",
         "EventName": "TOPDOWN.BACKEND_BOUND_SLOTS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of slots in TMA method where no micro-operations were being issued from front-end to back-end of the machine due to lack of back-end resources.",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TMA slots wasted due to incorrect speculations.",
-        "CollectPEBSRecord": "2",
         "EventCode": "0xa4",
         "EventName": "TOPDOWN.BAD_SPEC_SLOTS",
+        "PublicDescription": "Number of slots of TMA method that were wasted due to incorrect speculation. It covers all types of control-flow or data-related mis-speculations.",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TMA slots wasted due to incorrect speculation by branch mispredictions",
-        "CollectPEBSRecord": "2",
         "EventCode": "0xa4",
         "EventName": "TOPDOWN.BR_MISPREDICT_SLOTS",
+        "PublicDescription": "Number of TMA slots that were wasted due to incorrect speculation by (any type of) branch mispredictions. This event estimates number of specualtive operations that were issued but not retired as well as the out-of-order engine recovery past a branch misprediction.",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TOPDOWN.MEMORY_BOUND_SLOTS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa4",
         "EventName": "TOPDOWN.MEMORY_BOUND_SLOTS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TMA slots available for an unhalted logical processor. Fixed counter - architectural event",
-        "CollectPEBSRecord": "2",
-        "Counter": "Fixed counter 3",
         "EventName": "TOPDOWN.SLOTS",
-        "PEBScounters": "35",
+        "PublicDescription": "Number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method (TMA). The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core. Software can use this event as the denominator for the top-level metrics of the TMA method. This architectural event is counted on a designated fixed counter (Fixed Counter 3).",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "TMA slots available for an unhalted logical processor. General counter - architectural event",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xa4",
         "EventName": "TOPDOWN.SLOTS_P",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of available slots for an unhalted logical processor. The event increments by machine-width of the narrowest pipeline as employed by the Top-down Microarchitecture Analysis method. The count is distributed among unhalted logical processors (hyper-threads) who share the same physical core.",
         "SampleAfterValue": "10000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.ALL",
+        "PublicDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a mispredicted jump or a machine clear. Only issue slots wasted due to fast nukes such as memory ordering nukes are counted. Other nukes are not accounted for. Counts all issue slots blocked during this recovery window including relevant microcode flows and while uops are not yet available in the instruction queue (IQ) even if an FE_bound event occurs during this period. Also includes the issue slots that were consumed by the backend but were thrown away because they were younger than the mispredict or machine clear.",
+        "SampleAfterValue": "1000003",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to fast nukes such as memory ordering and memory disambiguation machine clears.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.FASTNUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots that were not consumed by the backend because allocation is stalled due to a machine clear (nuke) of any kind including memory ordering and memory disambiguation.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MACHINE_CLEARS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x3",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to branch mispredicts.",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.MISPREDICT",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to a machine clear (nuke).",
+        "EventCode": "0x73",
+        "EventName": "TOPDOWN_BAD_SPECULATION.NUKE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to backend stalls.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALL",
+        "SampleAfterValue": "1000003",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to certain allocation restrictions.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.ALLOC_RESTRICTIONS",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to memory reservation stalls in which a scheduler is not able to accept uops.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to IEC or FPC RAT stalls, which can be due to FIQ or IEC reservation stalls in which the integer, floating point or SIMD scheduler is not able to accept uops.",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.NON_MEM_SCHEDULER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the physical register file unable to accept an entry (marble stalls).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REGISTER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x20",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to the reorder buffer being full (ROB stalls).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.REORDER_BUFFER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not consumed by the backend due to scoreboards from the instruction queue (IQ), jump execution unit (JEU), or microcode sequencer (MS).",
+        "EventCode": "0x74",
+        "EventName": "TOPDOWN_BE_BOUND.SERIALIZATION",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of issue slots every cycle that were not consumed by the backend due to frontend stalls.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ALL",
+        "SampleAfterValue": "1000003",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_DETECT",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BACLEARS, which occurs when the Branch Target Buffer (BTB) prediction or lack thereof, was corrected by a later branch predictor in the frontend. Includes BACLEARS due to all branch types including conditional and unconditional jumps, returns, and indirect branches.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.BRANCH_RESTEER",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to BTCLEARS, which occurs when the Branch Target Buffer (BTB) predicts a taken branch.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x40",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to the microcode sequencer (MS).",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.CISC",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to decode stalls.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.DECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to frontend bandwidth restrictions due to decode, predecode, cisc, and other limitations.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_BANDWIDTH",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x8d",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to a latency related stalls including BACLEARs, BTCLEARs, ITLB misses, and ICache misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.FRONTEND_LATENCY",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x72",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to ITLB misses.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.ITLB",
+        "PublicDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to Instruction Table Lookaside Buffer (ITLB) misses.",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x10",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to other common frontend stalls not categorized.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.OTHER",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x80",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of issue slots every cycle that were not delivered by the frontend due to wrong predecodes.",
+        "EventCode": "0x71",
+        "EventName": "TOPDOWN_FE_BOUND.PREDECODE",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x4",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the total number of consumed retirement slots.",
+        "EventCode": "0xc2",
+        "EventName": "TOPDOWN_RETIRING.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "1000003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "UOPS_DECODED.DEC0_UOPS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x76",
         "EventName": "UOPS_DECODED.DEC0_UOPS",
-        "PEBScounters": "0,1,2,3",
         "SampleAfterValue": "1000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on port 0",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_0",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution  port 0.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on port 1",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution  port 1.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on ports 2, 3 and 10",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_2_3_10",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution ports 2, 3 and 10",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on ports 4 and 9",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_4_9",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution ports 4 and 9",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on ports 5 and 11",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_5_11",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution ports 5 and 11",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on port 6",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_6",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution  port 6.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x40",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops executed on ports 7 and 8",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb2",
         "EventName": "UOPS_DISPATCHED.PORT_7_8",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Number of uops dispatch to execution  ports 7 and 8.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x80",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 1 micro-op is executed from any thread on physical core.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "2",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 2 micro-ops are executed from any thread on physical core.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "3",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 3 micro-ops are executed from any thread on physical core.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "4",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles when at least 4 micro-ops are executed from any thread on physical core.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where at least 1 uop was executed per-thread",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CYCLES_GE_1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 1 uop was executed per-thread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where at least 2 uops were executed per-thread",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "2",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CYCLES_GE_2",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 2 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where at least 3 uops were executed per-thread",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "3",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CYCLES_GE_3",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 3 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles where at least 4 uops were executed per-thread",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "4",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.CYCLES_GE_4",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Cycles where at least 4 uops were executed per-thread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.STALLS",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event UOPS_EXECUTED.STALLS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.STALL_CYCLES",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.THREAD",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Counts the number of x87 uops dispatched.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xb1",
         "EventName": "UOPS_EXECUTED.X87",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of x87 uops executed.",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Uops that RAT issues to RS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xae",
         "EventName": "UOPS_ISSUED.ANY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).",
         "SampleAfterValue": "2000003",
-        "Speculative": "1",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the total number of uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.ALL",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Cycles with retired uop(s).",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.CYCLES",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts cycles where at least one uop has retired.",
         "SampleAfterValue": "1000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retired uops except the last uop of each instruction.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.HEAVY",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the number of retired micro-operations (uops) except the last uop of each instruction. An instruction that is decoded into less than two uops does not contribute to the count.",
         "SampleAfterValue": "2000003",
         "UMask": "0x1",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of integer divide uops retired.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.IDIV",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x10",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of uops that are from complex flows issued by the micro-sequencer (MS).",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.MS",
+        "PEBS": "1",
+        "PublicDescription": "Counts the number of uops that are from complex flows issued by the Microcode Sequencer (MS). This includes uops from flows due to complex instructions, faults, assists, and inserted flows.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "UOPS_RETIRED.MS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.MS",
         "MSRIndex": "0x3F7",
         "MSRValue": "0x8",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "2000003",
-        "TakenAlone": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Retirement slots used.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.SLOTS",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "Counts the retirement slots used each cycle.",
         "SampleAfterValue": "2000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles without actually retired uops.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.STALLS",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
+        "PublicDescription": "This event counts cycles without actually retired uops.",
         "SampleAfterValue": "1000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "This event is deprecated. Refer to new event UOPS_RETIRED.STALLS",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5,6,7",
         "CounterMask": "1",
+        "Deprecated": "1",
         "EventCode": "0xc2",
         "EventName": "UOPS_RETIRED.STALL_CYCLES",
         "Invert": "1",
-        "PEBScounters": "0,1,2,3,4,5,6,7",
         "SampleAfterValue": "1000003",
         "UMask": "0x2",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of x87 uops retired, includes those in MS flows.",
+        "EventCode": "0xc2",
+        "EventName": "UOPS_RETIRED.X87",
+        "PEBS": "1",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x2",
+        "Unit": "cpu_atom"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
index d82d6f62a6fb4..2ccd9cf96957f 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-memory.json
@@ -1,221 +1,174 @@
 [
     {
-        "BriefDescription": "Number of clocks",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x01",
-        "EventName": "UNC_M_CLOCKTICKS",
+        "BriefDescription": "Counts every 64B read  request entering the Memory Controller 0 to DRAM (sum of all channels).",
+        "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
         "PerPkg": "1",
+        "PublicDescription": "Counts every 64B read request entering the Memory Controller 0 to DRAM (sum of all channels).",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Incoming VC0 read request",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x02",
-        "EventName": "UNC_M_VC0_REQUESTS_RD",
+        "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+        "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Incoming VC0 write request",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x03",
-        "EventName": "UNC_M_VC0_REQUESTS_WR",
+        "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels).",
+        "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
         "PerPkg": "1",
+        "PublicDescription": "Counts every 64B read entering the Memory Controller 1 to DRAM (sum of all channels).",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Incoming VC1 read request",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x04",
-        "EventName": "UNC_M_VC1_REQUESTS_RD",
+        "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM.",
+        "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Incoming VC1 write request",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x05",
-        "EventName": "UNC_M_VC1_REQUESTS_WR",
+        "BriefDescription": "ACT command for a read request sent to DRAM",
+        "EventCode": "0x24",
+        "EventName": "UNC_M_ACT_COUNT_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Incoming read prefetch request from IA",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x0A",
-        "EventName": "UNC_M_PREFETCH_RD",
+        "BriefDescription": "ACT command sent to DRAM",
+        "EventCode": "0x26",
+        "EventName": "UNC_M_ACT_COUNT_TOTAL",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Any Rank at Hot state",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x19",
-        "EventName": "UNC_M_DRAM_THERMAL_HOT",
+        "BriefDescription": "ACT command for a write request sent to DRAM",
+        "EventCode": "0x25",
+        "EventName": "UNC_M_ACT_COUNT_WR",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Any Rank at Warm state",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x1A",
-        "EventName": "UNC_M_DRAM_THERMAL_WARM",
+        "BriefDescription": "Read CAS command sent to DRAM",
+        "EventCode": "0x22",
+        "EventName": "UNC_M_CAS_COUNT_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "incoming read request page status is Page Hit",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x1C",
-        "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
+        "BriefDescription": "Write CAS command sent to DRAM",
+        "EventCode": "0x23",
+        "EventName": "UNC_M_CAS_COUNT_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "Number of clocks",
+        "EventCode": "0x01",
+        "EventName": "UNC_M_CLOCKTICKS",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
         "BriefDescription": "incoming read request page status is Page Empty",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
         "EventCode": "0x1D",
         "EventName": "UNC_M_DRAM_PAGE_EMPTY_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "incoming read request page status is Page Miss",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x1E",
-        "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
+        "BriefDescription": "incoming write request page status is Page Empty",
+        "EventCode": "0x20",
+        "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+        "PerPkg": "1",
+        "Unit": "iMC"
+    },
+    {
+        "BriefDescription": "incoming read request page status is Page Hit",
+        "EventCode": "0x1C",
+        "EventName": "UNC_M_DRAM_PAGE_HIT_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
         "BriefDescription": "incoming write request page status is Page Hit",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
         "EventCode": "0x1F",
         "EventName": "UNC_M_DRAM_PAGE_HIT_WR",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "incoming write request page status is Page Empty",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x20",
-        "EventName": "UNC_M_DRAM_PAGE_EMPTY_WR",
+        "BriefDescription": "incoming read request page status is Page Miss",
+        "EventCode": "0x1E",
+        "EventName": "UNC_M_DRAM_PAGE_MISS_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
         "BriefDescription": "incoming write request page status is Page Miss",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
         "EventCode": "0x21",
         "EventName": "UNC_M_DRAM_PAGE_MISS_WR",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Read CAS command sent to DRAM",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x22",
-        "EventName": "UNC_M_CAS_COUNT_RD",
-        "PerPkg": "1",
-        "Unit": "iMC"
-    },
-    {
-        "BriefDescription": "Write CAS command sent to DRAM",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x23",
-        "EventName": "UNC_M_CAS_COUNT_WR",
+        "BriefDescription": "Any Rank at Hot state",
+        "EventCode": "0x19",
+        "EventName": "UNC_M_DRAM_THERMAL_HOT",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "ACT command for a read request sent to DRAM",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x24",
-        "EventName": "UNC_M_ACT_COUNT_RD",
+        "BriefDescription": "Any Rank at Warm state",
+        "EventCode": "0x1A",
+        "EventName": "UNC_M_DRAM_THERMAL_WARM",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "ACT command for a write request sent to DRAM",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x25",
-        "EventName": "UNC_M_ACT_COUNT_WR",
+        "BriefDescription": "Incoming read prefetch request from IA.",
+        "EventCode": "0x0A",
+        "EventName": "UNC_M_PREFETCH_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "ACT command sent to DRAM",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x26",
-        "EventName": "UNC_M_ACT_COUNT_TOTAL",
+        "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
+        "EventCode": "0x28",
+        "EventName": "UNC_M_PRE_COUNT_IDLE",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
         "BriefDescription": "PRE command sent to DRAM for a read/write request",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
         "EventCode": "0x27",
         "EventName": "UNC_M_PRE_COUNT_PAGE_MISS",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "PRE command sent to DRAM due to page table idle timer expiration",
-        "Counter": "0,1,2,3,4",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x28",
-        "EventName": "UNC_M_PRE_COUNT_IDLE",
-        "PerPkg": "1",
-        "Unit": "iMC"
-    },
-    {
-        "BriefDescription": "Counts every 64B read  request entering the Memory Controller 0 to DRAM (sum of all channels)",
-        "CounterType": "FREERUN",
-        "EventName": "UNC_MC0_RDCAS_COUNT_FREERUN",
+        "BriefDescription": "Incoming VC0 read request",
+        "EventCode": "0x02",
+        "EventName": "UNC_M_VC0_REQUESTS_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Counts every 64B read request entering the Memory Controller 1 to DRAM (sum of all channels)",
-        "Counter": "3",
-        "CounterType": "FREERUN",
-        "EventName": "UNC_MC1_RDCAS_COUNT_FREERUN",
+        "BriefDescription": "Incoming VC0 write request",
+        "EventCode": "0x03",
+        "EventName": "UNC_M_VC0_REQUESTS_WR",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Counts every 64B write request entering the Memory Controller 0 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM",
-        "Counter": "1",
-        "CounterType": "FREERUN",
-        "EventName": "UNC_MC0_WRCAS_COUNT_FREERUN",
+        "BriefDescription": "Incoming VC1 read request",
+        "EventCode": "0x04",
+        "EventName": "UNC_M_VC1_REQUESTS_RD",
         "PerPkg": "1",
         "Unit": "iMC"
     },
     {
-        "BriefDescription": "Counts every 64B write request entering the Memory Controller 1 to DRAM (sum of all channels). Each write request counts as a new request incrementing this counter. However, same cache line write requests (both full and partial) are combined to a single 64 byte data transfer to DRAM",
-        "Counter": "4",
-        "CounterType": "FREERUN",
-        "EventName": "UNC_MC1_WRCAS_COUNT_FREERUN",
+        "BriefDescription": "Incoming VC1 write request",
+        "EventCode": "0x05",
+        "EventName": "UNC_M_VC1_REQUESTS_WR",
         "PerPkg": "1",
         "Unit": "iMC"
     }
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
index b1ae349f5f21c..bc5fb6b760658 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/uncore-other.json
@@ -1,40 +1,73 @@
 [
     {
-        "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles",
-        "Counter": "Fixed",
-        "CounterType": "PGMABLE",
-        "EventCode": "0xff",
-        "EventName": "UNC_CLOCK.SOCKET",
+        "BriefDescription": "Number of requests allocated in Coherency Tracker.",
+        "EventCode": "0x84",
+        "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
         "PerPkg": "1",
-        "Unit": "CLOCK"
+        "UMask": "0x1",
+        "Unit": "ARB"
     },
     {
-        "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC",
-        "Counter": "0,1",
-        "CounterType": "PGMABLE",
+        "BriefDescription": "Each cycle counts number of any coherent request at memory controller that were issued by any core.",
+        "EventCode": "0x85",
+        "EventName": "UNC_ARB_DAT_OCCUPANCY.ALL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Each cycle counts number of coherent reads pending on data return from memory controller that were issued by any core.",
+        "EventCode": "0x85",
+        "EventName": "UNC_ARB_DAT_OCCUPANCY.RD",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Number of coherent read requests sent to memory controller that were issued by any core.",
         "EventCode": "0x81",
-        "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+        "EventName": "UNC_ARB_DAT_REQUESTS.RD",
         "PerPkg": "1",
-        "UMask": "0x01",
+        "UMask": "0x2",
         "Unit": "ARB"
     },
     {
-        "BriefDescription": "Number of requests allocated in Coherency Tracker",
-        "Counter": "0,1",
-        "CounterType": "PGMABLE",
-        "EventCode": "0x84",
-        "EventName": "UNC_ARB_COH_TRK_REQUESTS.ALL",
+        "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_OCCUPANCY.ALL",
+        "EventCode": "0x85",
+        "EventName": "UNC_ARB_IFA_OCCUPANCY.ALL",
         "PerPkg": "1",
-        "UMask": "0x01",
+        "UMask": "0x1",
         "Unit": "ARB"
     },
     {
-        "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic",
-        "CounterType": "PGMABLE",
+        "BriefDescription": "This event is deprecated. Refer to new event UNC_ARB_DAT_REQUESTS.RD",
+        "EventCode": "0x81",
+        "EventName": "UNC_ARB_REQ_TRK_REQUEST.DRD",
+        "PerPkg": "1",
+        "UMask": "0x2",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Each cycle counts number of all outgoing valid entries in ReqTrk. Such entry is defined as valid from its allocation in ReqTrk till deallocation. Accounts for Coherent and non-coherent traffic.",
         "EventCode": "0x80",
         "EventName": "UNC_ARB_TRK_OCCUPANCY.ALL",
         "PerPkg": "1",
-        "UMask": "0x01",
+        "UMask": "0x1",
         "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC.",
+        "EventCode": "0x81",
+        "EventName": "UNC_ARB_TRK_REQUESTS.ALL",
+        "PerPkg": "1",
+        "UMask": "0x1",
+        "Unit": "ARB"
+    },
+    {
+        "BriefDescription": "This 48-bit fixed counter counts the UCLK cycles.",
+        "EventCode": "0xff",
+        "EventName": "UNC_CLOCK.SOCKET",
+        "PerPkg": "1",
+        "Unit": "CLOCK"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
index 12baf768ad8d3..3827d292da80a 100644
--- a/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
+++ b/tools/perf/pmu-events/arch/x86/alderlake/virtual-memory.json
@@ -1,317 +1,236 @@
 [
-    {
-        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x08",
-        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0xe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x49",
-        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0xe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x1",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "2000003",
-        "Speculative": "1",
-        "UMask": "0x80",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x85",
-        "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "200003",
-        "Speculative": "1",
-        "UMask": "0xe",
-        "Unit": "cpu_atom"
-    },
-    {
-        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3,4,5",
-        "EventCode": "0x05",
-        "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
-        "PEBScounters": "0,1,2,3,4,5",
-        "SampleAfterValue": "1000003",
-        "Speculative": "1",
-        "UMask": "0x90",
-        "Unit": "cpu_atom"
-    },
     {
         "BriefDescription": "Loads that miss the DTLB and hit the STLB.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.STLB_HIT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a demand load.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a demand load.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to load DTLB misses to any page size.",
+        "EventCode": "0x08",
+        "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to loads (including SW prefetches) whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size. Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data loads. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0xe",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data load to a 1G page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data load to a 2M/4M page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data load to a 4K page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data loads. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of page walks outstanding for a demand load in the PMH each cycle.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x12",
         "EventName": "DTLB_LOAD_MISSES.WALK_PENDING",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for a demand load in the PMH (Page Miss Handler) each cycle.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Stores that miss the DTLB and hit the STLB.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.STLB_HIT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to store DTLB misses to any page size.",
+        "EventCode": "0x49",
+        "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to stores whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
+        "SampleAfterValue": "2000003",
+        "UMask": "0xe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (all page sizes) caused by demand data stores. This implies it missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0xe",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data store to a 1G page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (1G sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x8",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data store to a 2M/4M page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (2M/4M sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Page walks completed due to a demand data store to a 4K page.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks  (4K sizes) caused by demand data stores. This implies address translations missed in the DTLB and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of page walks outstanding for a store in the PMH each cycle.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x13",
         "EventName": "DTLB_STORE_MISSES.WALK_PENDING",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for a store in the PMH (Page Miss Handler) each cycle.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks initiated by a instruction fetch that missed the first and second level TLBs.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.MISS_CAUSED_WALK",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x1",
+        "Unit": "cpu_atom"
+    },
+    {
+        "BriefDescription": "Counts the number of page walks due to an instruction fetch that miss the PDE (Page Directory Entry) cache.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.PDE_CACHE_MISS",
+        "SampleAfterValue": "2000003",
+        "UMask": "0x80",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.STLB_HIT",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts instruction fetch requests that miss the ITLB (Instruction TLB) and hit the STLB (Second-level TLB).",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x20",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "CounterMask": "1",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.WALK_ACTIVE",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a code (instruction fetch) request.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
     },
+    {
+        "BriefDescription": "Counts the number of page walks completed due to instruction fetch misses to any page size.",
+        "EventCode": "0x85",
+        "EventName": "ITLB_MISSES.WALK_COMPLETED",
+        "PublicDescription": "Counts the number of page walks completed due to instruction fetches whose address translations missed in all Translation Lookaside Buffer (TLB) levels and were mapped to any page size.  Includes page walks that page fault.",
+        "SampleAfterValue": "200003",
+        "UMask": "0xe",
+        "Unit": "cpu_atom"
+    },
     {
         "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.WALK_COMPLETED",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (all page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0xe",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (2M/4M page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x4",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.WALK_COMPLETED_4K",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts completed page walks (4K page sizes) caused by a code fetch. This implies it missed in the ITLB (Instruction TLB) and further levels of TLB. The page walk can end with or without a fault.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x2",
         "Unit": "cpu_core"
     },
     {
         "BriefDescription": "Number of page walks outstanding for an outstanding code request in the PMH each cycle.",
-        "CollectPEBSRecord": "2",
-        "Counter": "0,1,2,3",
         "EventCode": "0x11",
         "EventName": "ITLB_MISSES.WALK_PENDING",
-        "PEBScounters": "0,1,2,3",
+        "PublicDescription": "Counts the number of page walks outstanding for an outstanding code (instruction fetch) request in the PMH (Page Miss Handler) each cycle.",
         "SampleAfterValue": "100003",
-        "Speculative": "1",
         "UMask": "0x10",
         "Unit": "cpu_core"
+    },
+    {
+        "BriefDescription": "Counts the number of cycles that the head (oldest load) of the load buffer and retirement are both stalled due to a DTLB miss.",
+        "EventCode": "0x05",
+        "EventName": "LD_HEAD.DTLB_MISS_AT_RET",
+        "SampleAfterValue": "1000003",
+        "UMask": "0x90",
+        "Unit": "cpu_atom"
     }
 ]
diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv
index 78af105ca2367..df47462a125ff 100644
--- a/tools/perf/pmu-events/arch/x86/mapfile.csv
+++ b/tools/perf/pmu-events/arch/x86/mapfile.csv
@@ -1,5 +1,5 @@
 Family-model,Version,Filename,EventType
-GenuineIntel-6-(97|9A|B7|BA|BF),v1.15,alderlake,core
+GenuineIntel-6-(97|9A|B7|BA|BF),v1.16,alderlake,core
 GenuineIntel-6-BE,v1.16,alderlaken,core
 GenuineIntel-6-(1C|26|27|35|36),v4,bonnell,core
 GenuineIntel-6-(3D|47),v26,broadwell,core
-- 
GitLab


From 5736b1b70170e15d66ec02e500db917ef42ade83 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Sat, 3 Sep 2022 00:37:06 -0700
Subject: [PATCH 2667/4122] x86/paravirt: Remove clobber bitmask from
 .parainstructions

The u16 "clobber" value is not used in .parainstructions since commit
27876f3882fd ("x86/paravirt: Remove clobbers from struct paravirt_patch_site")

Remove the u16 from the section macro, the argument from all macros, and
all now-unused CLBR_* macros.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Juergen Gross <jgross@suse.com>
Link: https://lore.kernel.org/r/20220903073706.3193746-1-keescook@chromium.org
---
 arch/x86/include/asm/paravirt_types.h | 61 ++++++---------------------
 1 file changed, 12 insertions(+), 49 deletions(-)

diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 27c692791b7ea..8c1da419260f9 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -20,37 +20,6 @@ enum paravirt_lazy_mode {
 
 #ifdef CONFIG_PARAVIRT
 
-/* Bitmask of what can be clobbered: usually at least eax. */
-#define CLBR_EAX  (1 << 0)
-#define CLBR_ECX  (1 << 1)
-#define CLBR_EDX  (1 << 2)
-#define CLBR_EDI  (1 << 3)
-
-#ifdef CONFIG_X86_32
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY  ((1 << 4) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_EAX | CLBR_EDX | CLBR_ECX)
-#define CLBR_RET_REG	(CLBR_EAX | CLBR_EDX)
-#else
-#define CLBR_RAX  CLBR_EAX
-#define CLBR_RCX  CLBR_ECX
-#define CLBR_RDX  CLBR_EDX
-#define CLBR_RDI  CLBR_EDI
-#define CLBR_RSI  (1 << 4)
-#define CLBR_R8   (1 << 5)
-#define CLBR_R9   (1 << 6)
-#define CLBR_R10  (1 << 7)
-#define CLBR_R11  (1 << 8)
-
-#define CLBR_ANY  ((1 << 9) - 1)
-
-#define CLBR_ARG_REGS	(CLBR_RDI | CLBR_RSI | CLBR_RDX | \
-			 CLBR_RCX | CLBR_R8 | CLBR_R9)
-#define CLBR_RET_REG	(CLBR_RAX)
-
-#endif /* X86_64 */
-
 #ifndef __ASSEMBLY__
 
 #include <asm/desc_defs.h>
@@ -297,27 +266,23 @@ extern struct paravirt_patch_template pv_ops;
 #define paravirt_type(op)				\
 	[paravirt_typenum] "i" (PARAVIRT_PATCH(op)),	\
 	[paravirt_opptr] "m" (pv_ops.op)
-#define paravirt_clobber(clobber)		\
-	[paravirt_clobber] "i" (clobber)
-
 /*
  * Generate some code, and mark it as patchable by the
  * apply_paravirt() alternate instruction patcher.
  */
-#define _paravirt_alt(insn_string, type, clobber)	\
+#define _paravirt_alt(insn_string, type)		\
 	"771:\n\t" insn_string "\n" "772:\n"		\
 	".pushsection .parainstructions,\"a\"\n"	\
 	_ASM_ALIGN "\n"					\
 	_ASM_PTR " 771b\n"				\
 	"  .byte " type "\n"				\
 	"  .byte 772b-771b\n"				\
-	"  .short " clobber "\n"			\
 	_ASM_ALIGN "\n"					\
 	".popsection\n"
 
 /* Generate patchable code, with the default asm parameters. */
 #define paravirt_alt(insn_string)					\
-	_paravirt_alt(insn_string, "%c[paravirt_typenum]", "%c[paravirt_clobber]")
+	_paravirt_alt(insn_string, "%c[paravirt_typenum]")
 
 /* Simple instruction patching code. */
 #define NATIVE_LABEL(a,x,b) "\n\t.globl " a #x "_" #b "\n" a #x "_" #b ":\n\t"
@@ -469,20 +434,19 @@ int paravirt_disable_iospace(void);
 	})
 
 
-#define ____PVOP_CALL(ret, op, clbr, call_clbr, extra_clbr, ...)	\
+#define ____PVOP_CALL(ret, op, call_clbr, extra_clbr, ...)	\
 	({								\
 		PVOP_CALL_ARGS;						\
 		PVOP_TEST_NULL(op);					\
 		asm volatile(paravirt_alt(PARAVIRT_CALL)		\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
 			     : "memory", "cc" extra_clbr);		\
 		ret;							\
 	})
 
-#define ____PVOP_ALT_CALL(ret, op, alt, cond, clbr, call_clbr,		\
+#define ____PVOP_ALT_CALL(ret, op, alt, cond, call_clbr,		\
 			  extra_clbr, ...)				\
 	({								\
 		PVOP_CALL_ARGS;						\
@@ -491,45 +455,44 @@ int paravirt_disable_iospace(void);
 					 alt, cond)			\
 			     : call_clbr, ASM_CALL_CONSTRAINT		\
 			     : paravirt_type(op),			\
-			       paravirt_clobber(clbr),			\
 			       ##__VA_ARGS__				\
 			     : "memory", "cc" extra_clbr);		\
 		ret;							\
 	})
 
 #define __PVOP_CALL(rettype, op, ...)					\
-	____PVOP_CALL(PVOP_RETVAL(rettype), op, CLBR_ANY,		\
+	____PVOP_CALL(PVOP_RETVAL(rettype), op,				\
 		      PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS, ##__VA_ARGS__)
 
 #define __PVOP_ALT_CALL(rettype, op, alt, cond, ...)			\
-	____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond, CLBR_ANY,\
+	____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op, alt, cond,		\
 			  PVOP_CALL_CLOBBERS, EXTRA_CLOBBERS,		\
 			  ##__VA_ARGS__)
 
 #define __PVOP_CALLEESAVE(rettype, op, ...)				\
-	____PVOP_CALL(PVOP_RETVAL(rettype), op.func, CLBR_RET_REG,	\
+	____PVOP_CALL(PVOP_RETVAL(rettype), op.func,			\
 		      PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 #define __PVOP_ALT_CALLEESAVE(rettype, op, alt, cond, ...)		\
 	____PVOP_ALT_CALL(PVOP_RETVAL(rettype), op.func, alt, cond,	\
-			  CLBR_RET_REG, PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
+			  PVOP_CALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
 #define __PVOP_VCALL(op, ...)						\
-	(void)____PVOP_CALL(, op, CLBR_ANY, PVOP_VCALL_CLOBBERS,	\
+	(void)____PVOP_CALL(, op, PVOP_VCALL_CLOBBERS,			\
 		       VEXTRA_CLOBBERS, ##__VA_ARGS__)
 
 #define __PVOP_ALT_VCALL(op, alt, cond, ...)				\
-	(void)____PVOP_ALT_CALL(, op, alt, cond, CLBR_ANY,		\
+	(void)____PVOP_ALT_CALL(, op, alt, cond,			\
 				PVOP_VCALL_CLOBBERS, VEXTRA_CLOBBERS,	\
 				##__VA_ARGS__)
 
 #define __PVOP_VCALLEESAVE(op, ...)					\
-	(void)____PVOP_CALL(, op.func, CLBR_RET_REG,			\
+	(void)____PVOP_CALL(, op.func,					\
 			    PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 #define __PVOP_ALT_VCALLEESAVE(op, alt, cond, ...)			\
-	(void)____PVOP_ALT_CALL(, op.func, alt, cond, CLBR_RET_REG,	\
+	(void)____PVOP_ALT_CALL(, op.func, alt, cond,			\
 				PVOP_VCALLEE_CLOBBERS, , ##__VA_ARGS__)
 
 
-- 
GitLab


From 6f520ce17920b3cdfbd2479b3ccf27f9706219d0 Mon Sep 17 00:00:00 2001
From: Ajay Kaher <akaher@vmware.com>
Date: Wed, 23 Nov 2022 15:48:16 +0530
Subject: [PATCH 2668/4122] perf symbol: correction while adjusting symbol

perf doesn't provide proper symbol information for specially crafted
.debug files.

Sometimes .debug file may not have similar program header as runtime
ELF file. For example if we generate .debug file using objcopy
--only-keep-debug resulting file will not contain .text, .data and
other runtime sections. That means corresponding program headers will
have zero FileSiz and modified Offset.

Example: program header of text section of libxxx.so:

Type           Offset             VirtAddr           PhysAddr
               FileSiz            MemSiz              Flags  Align
LOAD        0x00000000003d3000 0x00000000003d3000 0x00000000003d3000
            0x000000000055ae80 0x000000000055ae80  R E    0x1000

Same program header after executing:
objcopy --only-keep-debug libxxx.so libxxx.so.debug

LOAD        0x0000000000001000 0x00000000003d3000 0x00000000003d3000
            0x0000000000000000 0x000000000055ae80  R E    0x1000

Offset and FileSiz have been changed.

Following formula will not provide correct value, if program header
taken from .debug file (syms_ss):

    sym.st_value -= phdr.p_vaddr - phdr.p_offset;

Correct program header information is located inside runtime ELF
file (runtime_ss).

Fixes: 2d86612aacb7805f ("perf symbol: Correct address for bss symbols")
Signed-off-by: Ajay Kaher <akaher@vmware.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Makhalov <amakhalov@vmware.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Srivatsa S. Bhat <srivatsab@vmware.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Vasavi Sirnapalli <vsirnapalli@vmware.com>
Link: http://lore.kernel.org/lkml/1669198696-50547-1-git-send-email-akaher@vmware.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 647b7dff8ef36..80345695b1360 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1303,7 +1303,7 @@ dso__load_sym_internal(struct dso *dso, struct map *map, struct symsrc *syms_ss,
 			   (!used_opd && syms_ss->adjust_symbols)) {
 			GElf_Phdr phdr;
 
-			if (elf_read_program_header(syms_ss->elf,
+			if (elf_read_program_header(runtime_ss->elf,
 						    (u64)sym.st_value, &phdr)) {
 				pr_debug4("%s: failed to find program header for "
 					   "symbol: %s st_value: %#" PRIx64 "\n",
-- 
GitLab


From f1a033cc6b9eb6d80322008422df3c87aa5d47a0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Wed, 9 Nov 2022 14:44:18 +0100
Subject: [PATCH 2669/4122] x86/paravirt: Use common macro for creating simple
 asm paravirt functions

There are some paravirt assembler functions which are sharing a common
pattern. Introduce a macro DEFINE_PARAVIRT_ASM() for creating them.

Note that this macro is including explicit alignment of the generated
functions, leading to __raw_callee_save___kvm_vcpu_is_preempted(),
_paravirt_nop() and paravirt_ret0() to be aligned at 4 byte boundaries
now.

The explicit _paravirt_nop() prototype in paravirt.c isn't needed, as
it is included in paravirt_types.h already.

Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Srivatsa S. Bhat (VMware) <srivatsa@csail.mit.edu>
Link: https://lkml.kernel.org/r/20221109134418.6516-1-jgross@suse.com
---
 arch/x86/include/asm/paravirt.h           | 12 ++++++
 arch/x86/include/asm/qspinlock_paravirt.h | 47 ++++++++++-------------
 arch/x86/kernel/kvm.c                     | 19 +++------
 arch/x86/kernel/paravirt.c                | 23 +----------
 4 files changed, 40 insertions(+), 61 deletions(-)

diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 2851bc2339d58..73e9522db7c15 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -731,6 +731,18 @@ static __always_inline unsigned long arch_local_irq_save(void)
 #undef PVOP_VCALL4
 #undef PVOP_CALL4
 
+#define DEFINE_PARAVIRT_ASM(func, instr, sec)		\
+	asm (".pushsection " #sec ", \"ax\"\n"		\
+	     ".global " #func "\n\t"			\
+	     ".type " #func ", @function\n\t"		\
+	     ASM_FUNC_ALIGN "\n"			\
+	     #func ":\n\t"				\
+	     ASM_ENDBR					\
+	     instr "\n\t"				\
+	     ASM_RET					\
+	     ".size " #func ", . - " #func "\n\t"	\
+	     ".popsection")
+
 extern void default_banner(void);
 
 #else  /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h
index d861127731f4d..42b17cf10b10e 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -14,8 +14,6 @@
 
 __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
 #define __pv_queued_spin_unlock	__pv_queued_spin_unlock
-#define PV_UNLOCK		"__raw_callee_save___pv_queued_spin_unlock"
-#define PV_UNLOCK_SLOWPATH	"__raw_callee_save___pv_queued_spin_unlock_slowpath"
 
 /*
  * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
@@ -37,32 +35,27 @@ __PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath, ".spinlock.text");
  *   rsi = lockval           (second argument)
  *   rdx = internal variable (set to 0)
  */
-asm    (".pushsection .spinlock.text, \"ax\";"
-	".globl " PV_UNLOCK ";"
-	".type " PV_UNLOCK ", @function;"
-	ASM_FUNC_ALIGN
-	PV_UNLOCK ": "
-	ASM_ENDBR
-	FRAME_BEGIN
-	"push  %rdx;"
-	"mov   $0x1,%eax;"
-	"xor   %edx,%edx;"
-	LOCK_PREFIX "cmpxchg %dl,(%rdi);"
-	"cmp   $0x1,%al;"
-	"jne   .slowpath;"
-	"pop   %rdx;"
+#define PV_UNLOCK_ASM							\
+	FRAME_BEGIN							\
+	"push  %rdx\n\t"						\
+	"mov   $0x1,%eax\n\t"						\
+	"xor   %edx,%edx\n\t"						\
+	LOCK_PREFIX "cmpxchg %dl,(%rdi)\n\t"				\
+	"cmp   $0x1,%al\n\t"						\
+	"jne   .slowpath\n\t"						\
+	"pop   %rdx\n\t"						\
+	FRAME_END							\
+	ASM_RET								\
+	".slowpath:\n\t"						\
+	"push   %rsi\n\t"						\
+	"movzbl %al,%esi\n\t"						\
+	"call __raw_callee_save___pv_queued_spin_unlock_slowpath\n\t"	\
+	"pop    %rsi\n\t"						\
+	"pop    %rdx\n\t"						\
 	FRAME_END
-	ASM_RET
-	".slowpath: "
-	"push   %rsi;"
-	"movzbl %al,%esi;"
-	"call " PV_UNLOCK_SLOWPATH ";"
-	"pop    %rsi;"
-	"pop    %rdx;"
-	FRAME_END
-	ASM_RET
-	".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
-	".popsection");
+
+DEFINE_PARAVIRT_ASM(__raw_callee_save___pv_queued_spin_unlock,
+		    PV_UNLOCK_ASM, .spinlock.text);
 
 #else /* CONFIG_64BIT */
 
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 95fb85bea1118..4d053cb2c48a4 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -798,20 +798,13 @@ extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
  * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
  * restoring to/from the stack.
  */
-asm(
-".pushsection .text;"
-".global __raw_callee_save___kvm_vcpu_is_preempted;"
-".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
-ASM_FUNC_ALIGN
-"__raw_callee_save___kvm_vcpu_is_preempted:"
-ASM_ENDBR
-"movq	__per_cpu_offset(,%rdi,8), %rax;"
-"cmpb	$0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
-"setne	%al;"
-ASM_RET
-".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
-".popsection");
+#define PV_VCPU_PREEMPTED_ASM						     \
+ "movq   __per_cpu_offset(,%rdi,8), %rax\n\t"				     \
+ "cmpb   $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax)\n\t" \
+ "setne  %al\n\t"
 
+DEFINE_PARAVIRT_ASM(__raw_callee_save___kvm_vcpu_is_preempted,
+		    PV_VCPU_PREEMPTED_ASM, .text);
 #endif
 
 static void __init kvm_guest_init(void)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e244c49b52d7e..327757afb0276 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -37,29 +37,10 @@
  * nop stub, which must not clobber anything *including the stack* to
  * avoid confusing the entry prologues.
  */
-extern void _paravirt_nop(void);
-asm (".pushsection .entry.text, \"ax\"\n"
-     ".global _paravirt_nop\n"
-     ASM_FUNC_ALIGN
-     "_paravirt_nop:\n\t"
-     ASM_ENDBR
-     ASM_RET
-     ".size _paravirt_nop, . - _paravirt_nop\n\t"
-     ".type _paravirt_nop, @function\n\t"
-     ".popsection");
+DEFINE_PARAVIRT_ASM(_paravirt_nop, "", .entry.text);
 
 /* stub always returning 0. */
-asm (".pushsection .entry.text, \"ax\"\n"
-     ".global paravirt_ret0\n"
-     ASM_FUNC_ALIGN
-     "paravirt_ret0:\n\t"
-     ASM_ENDBR
-     "xor %" _ASM_AX ", %" _ASM_AX ";\n\t"
-     ASM_RET
-     ".size paravirt_ret0, . - paravirt_ret0\n\t"
-     ".type paravirt_ret0, @function\n\t"
-     ".popsection");
-
+DEFINE_PARAVIRT_ASM(paravirt_ret0, "xor %eax,%eax", .entry.text);
 
 void __init default_banner(void)
 {
-- 
GitLab


From 1a9c20b45d193ead21dc63b07d1abb40b0a237c2 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Nov 2022 18:46:05 -0800
Subject: [PATCH 2670/4122] perf list: Support newlines in wordwrap

Rather than a newline starting from column 0, record a newline was
seen and then add the newline and space before the next word.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221118024607.409083-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-list.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 84fa2d050eacb..f3750331e8f6b 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -74,17 +74,19 @@ static void wordwrap(const char *s, int start, int max, int corr)
 {
 	int column = start;
 	int n;
+	bool saw_newline = false;
 
 	while (*s) {
-		int wlen = strcspn(s, " \t");
+		int wlen = strcspn(s, " \t\n");
 
-		if (column + wlen >= max && column > start) {
+		if ((column + wlen >= max && column > start) || saw_newline) {
 			printf("\n%*s", start, "");
 			column = start + corr;
 		}
 		n = printf("%s%.*s", column > start ? " " : "", wlen, s);
 		if (n <= 0)
 			break;
+		saw_newline = s[wlen] == '\n';
 		s += wlen;
 		column += n;
 		s = skip_spaces(s);
@@ -146,7 +148,7 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
 		wordwrap(desc, 8, pager_get_columns(), 0);
 		printf("]\n");
 	}
-
+	long_desc = long_desc ?: desc;
 	if (long_desc && print_state->long_desc) {
 		printf("%*s", 8, "[");
 		wordwrap(long_desc, 8, pager_get_columns(), 0);
@@ -154,7 +156,8 @@ static void default_print_event(void *ps, const char *pmu_name, const char *topi
 	}
 
 	if (print_state->detailed && encoding_desc) {
-		printf("%*s%s", 8, "", encoding_desc);
+		printf("%*s", 8, "");
+		wordwrap(encoding_desc, 8, pager_get_columns(), 0);
 		if (metric_name)
 			printf(" MetricName: %s", metric_name);
 		if (metric_expr)
-- 
GitLab


From 1284ded7d05952f2657f5abeeda5a3f74ca9cffc Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Nov 2022 18:46:06 -0800
Subject: [PATCH 2671/4122] perf list: JSON escape encoding improvements

Use strbuf to make the string under construction's length unlimited. Use
the format %s to mean a literal string copy and %S to signify a need to
escape the string. Add supported for escaping a newline character.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221118024607.409083-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-list.c | 109 +++++++++++++++++++++++---------------
 1 file changed, 67 insertions(+), 42 deletions(-)

diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index f3750331e8f6b..137d73edb5410 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -17,6 +17,7 @@
 #include "util/metricgroup.h"
 #include "util/string2.h"
 #include "util/strlist.h"
+#include "util/strbuf.h"
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
 #include <linux/zalloc.h>
@@ -250,45 +251,56 @@ static void json_print_end(void *ps)
 	printf("%s]\n", print_state->need_sep ? "\n" : "");
 }
 
-static void fix_escape_printf(const char *fmt, ...)
+static void fix_escape_printf(struct strbuf *buf, const char *fmt, ...)
 {
 	va_list args;
-	char buf[2048];
-	size_t buf_pos = 0;
 
 	va_start(args, fmt);
+	strbuf_setlen(buf, 0);
 	for (size_t fmt_pos = 0; fmt_pos < strlen(fmt); fmt_pos++) {
 		switch (fmt[fmt_pos]) {
-		case '%': {
-			const char *s = va_arg(args, const char*);
-
+		case '%':
 			fmt_pos++;
-			assert(fmt[fmt_pos] == 's');
-			for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) {
-				switch (s[s_pos]) {
-				case '\\':
-					__fallthrough;
-				case '\"':
-					buf[buf_pos++] = '\\';
-					assert(buf_pos < sizeof(buf));
-					__fallthrough;
-				default:
-					buf[buf_pos++] = s[s_pos];
-					assert(buf_pos < sizeof(buf));
-					break;
+			switch (fmt[fmt_pos]) {
+			case 's': {
+				const char *s = va_arg(args, const char*);
+
+				strbuf_addstr(buf, s);
+				break;
+			}
+			case 'S': {
+				const char *s = va_arg(args, const char*);
+
+				for (size_t s_pos = 0; s_pos < strlen(s); s_pos++) {
+					switch (s[s_pos]) {
+					case '\n':
+						strbuf_addstr(buf, "\\n");
+						break;
+					case '\\':
+						__fallthrough;
+					case '\"':
+						strbuf_addch(buf, '\\');
+						__fallthrough;
+					default:
+						strbuf_addch(buf, s[s_pos]);
+						break;
+					}
 				}
+				break;
+			}
+			default:
+				pr_err("Unexpected format character '%c'\n", fmt[fmt_pos]);
+				strbuf_addch(buf, '%');
+				strbuf_addch(buf, fmt[fmt_pos]);
 			}
 			break;
-		}
 		default:
-			buf[buf_pos++] = fmt[fmt_pos];
-			assert(buf_pos < sizeof(buf));
+			strbuf_addch(buf, fmt[fmt_pos]);
 			break;
 		}
 	}
 	va_end(args);
-	buf[buf_pos] = '\0';
-	fputs(buf, stdout);
+	fputs(buf->buf, stdout);
 }
 
 static void json_print_event(void *ps, const char *pmu_name, const char *topic,
@@ -301,62 +313,71 @@ static void json_print_event(void *ps, const char *pmu_name, const char *topic,
 {
 	struct json_print_state *print_state = ps;
 	bool need_sep = false;
+	struct strbuf buf;
 
+	strbuf_init(&buf, 0);
 	printf("%s{\n", print_state->need_sep ? ",\n" : "");
 	print_state->need_sep = true;
 	if (pmu_name) {
-		fix_escape_printf("\t\"Unit\": \"%s\"", pmu_name);
+		fix_escape_printf(&buf, "\t\"Unit\": \"%S\"", pmu_name);
 		need_sep = true;
 	}
 	if (topic) {
-		fix_escape_printf("%s\t\"Topic\": \"%s\"", need_sep ? ",\n" : "", topic);
+		fix_escape_printf(&buf, "%s\t\"Topic\": \"%S\"", need_sep ? ",\n" : "", topic);
 		need_sep = true;
 	}
 	if (event_name) {
-		fix_escape_printf("%s\t\"EventName\": \"%s\"", need_sep ? ",\n" : "", event_name);
+		fix_escape_printf(&buf, "%s\t\"EventName\": \"%S\"", need_sep ? ",\n" : "",
+				  event_name);
 		need_sep = true;
 	}
 	if (event_alias && strlen(event_alias)) {
-		fix_escape_printf("%s\t\"EventAlias\": \"%s\"", need_sep ? ",\n" : "", event_alias);
+		fix_escape_printf(&buf, "%s\t\"EventAlias\": \"%S\"", need_sep ? ",\n" : "",
+				  event_alias);
 		need_sep = true;
 	}
 	if (scale_unit && strlen(scale_unit)) {
-		fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "",
+		fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "",
 				  scale_unit);
 		need_sep = true;
 	}
 	if (event_type_desc) {
-		fix_escape_printf("%s\t\"EventType\": \"%s\"", need_sep ? ",\n" : "",
+		fix_escape_printf(&buf, "%s\t\"EventType\": \"%S\"", need_sep ? ",\n" : "",
 				  event_type_desc);
 		need_sep = true;
 	}
 	if (deprecated) {
-		fix_escape_printf("%s\t\"Deprecated\": \"%s\"", need_sep ? ",\n" : "",
+		fix_escape_printf(&buf, "%s\t\"Deprecated\": \"%S\"", need_sep ? ",\n" : "",
 				  deprecated ? "1" : "0");
 		need_sep = true;
 	}
 	if (desc) {
-		fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc);
+		fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
+				  desc);
 		need_sep = true;
 	}
 	if (long_desc) {
-		fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "",
+		fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
 				  long_desc);
 		need_sep = true;
 	}
 	if (encoding_desc) {
-		fix_escape_printf("%s\t\"Encoding\": \"%s\"", need_sep ? ",\n" : "", encoding_desc);
+		fix_escape_printf(&buf, "%s\t\"Encoding\": \"%S\"", need_sep ? ",\n" : "",
+				  encoding_desc);
 		need_sep = true;
 	}
 	if (metric_name) {
-		fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", metric_name);
+		fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "",
+				  metric_name);
 		need_sep = true;
 	}
 	if (metric_expr) {
-		fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", metric_expr);
+		fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "",
+				  metric_expr);
 		need_sep = true;
 	}
 	printf("%s}", need_sep ? "\n" : "");
+	strbuf_release(&buf);
 }
 
 static void json_print_metric(void *ps __maybe_unused, const char *group,
@@ -366,35 +387,39 @@ static void json_print_metric(void *ps __maybe_unused, const char *group,
 {
 	struct json_print_state *print_state = ps;
 	bool need_sep = false;
+	struct strbuf buf;
 
+	strbuf_init(&buf, 0);
 	printf("%s{\n", print_state->need_sep ? ",\n" : "");
 	print_state->need_sep = true;
 	if (group) {
-		fix_escape_printf("\t\"MetricGroup\": \"%s\"", group);
+		fix_escape_printf(&buf, "\t\"MetricGroup\": \"%S\"", group);
 		need_sep = true;
 	}
 	if (name) {
-		fix_escape_printf("%s\t\"MetricName\": \"%s\"", need_sep ? ",\n" : "", name);
+		fix_escape_printf(&buf, "%s\t\"MetricName\": \"%S\"", need_sep ? ",\n" : "", name);
 		need_sep = true;
 	}
 	if (expr) {
-		fix_escape_printf("%s\t\"MetricExpr\": \"%s\"", need_sep ? ",\n" : "", expr);
+		fix_escape_printf(&buf, "%s\t\"MetricExpr\": \"%S\"", need_sep ? ",\n" : "", expr);
 		need_sep = true;
 	}
 	if (unit) {
-		fix_escape_printf("%s\t\"ScaleUnit\": \"%s\"", need_sep ? ",\n" : "", unit);
+		fix_escape_printf(&buf, "%s\t\"ScaleUnit\": \"%S\"", need_sep ? ",\n" : "", unit);
 		need_sep = true;
 	}
 	if (desc) {
-		fix_escape_printf("%s\t\"BriefDescription\": \"%s\"", need_sep ? ",\n" : "", desc);
+		fix_escape_printf(&buf, "%s\t\"BriefDescription\": \"%S\"", need_sep ? ",\n" : "",
+				  desc);
 		need_sep = true;
 	}
 	if (long_desc) {
-		fix_escape_printf("%s\t\"PublicDescription\": \"%s\"", need_sep ? ",\n" : "",
+		fix_escape_printf(&buf, "%s\t\"PublicDescription\": \"%S\"", need_sep ? ",\n" : "",
 				  long_desc);
 		need_sep = true;
 	}
 	printf("%s}", need_sep ? "\n" : "");
+	strbuf_release(&buf);
 }
 
 int cmd_list(int argc, const char **argv)
-- 
GitLab


From be3392b65f2b989badcadf5f6a353c7924d3ccf4 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 17 Nov 2022 18:46:07 -0800
Subject: [PATCH 2672/4122] perf list: List callback support for libpfm

Missed previously, add libpfm support for 'perf list' callbacks and
thereby JSON support.

Committer notes:

Add __maybe_unused to the args of the new print_libpfm_events() in the
else HAVE_LIBPFM block.

Fixes: e42b0ee61282a2f9 ("perf list: Add JSON output option")
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Caleb Biggers <caleb.biggers@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Perry Taylor <perry.taylor@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Weilin Wang <weilin.wang@intel.com>
Cc: Xin Gao <gaoxin@cdjrlc.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221118024607.409083-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pfm.c | 154 ++++++++++++++++++------------------------
 tools/perf/util/pfm.h |   7 +-
 2 files changed, 71 insertions(+), 90 deletions(-)

diff --git a/tools/perf/util/pfm.c b/tools/perf/util/pfm.c
index f0bcfcab1a932..ac3227ba769c6 100644
--- a/tools/perf/util/pfm.c
+++ b/tools/perf/util/pfm.c
@@ -12,6 +12,7 @@
 #include "util/parse-events.h"
 #include "util/pmu.h"
 #include "util/pfm.h"
+#include "util/strbuf.h"
 
 #include <string.h>
 #include <linux/kernel.h>
@@ -130,53 +131,36 @@ static const char *srcs[PFM_ATTR_CTRL_MAX] = {
 };
 
 static void
-print_attr_flags(pfm_event_attr_info_t *info)
+print_attr_flags(struct strbuf *buf, const pfm_event_attr_info_t *info)
 {
-	int n = 0;
+	if (info->is_dfl)
+		strbuf_addf(buf, "[default] ");
 
-	if (info->is_dfl) {
-		printf("[default] ");
-		n++;
-	}
-
-	if (info->is_precise) {
-		printf("[precise] ");
-		n++;
-	}
-
-	if (!n)
-		printf("- ");
+	if (info->is_precise)
+		strbuf_addf(buf, "[precise] ");
 }
 
 static void
-print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
+print_libpfm_event(const struct print_callbacks *print_cb, void *print_state,
+		const pfm_pmu_info_t *pinfo, const pfm_event_info_t *info,
+		struct strbuf *buf)
 {
-	pfm_event_attr_info_t ainfo;
-	const char *src;
 	int j, ret;
+	char topic[80], name[80];
 
-	ainfo.size = sizeof(ainfo);
+	strbuf_setlen(buf, 0);
+	snprintf(topic, sizeof(topic), "pfm %s", pinfo->name);
 
-	printf("  %s\n", info->name);
-	printf("    [%s]\n", info->desc);
-	if (long_desc) {
-		if (info->equiv)
-			printf("      Equiv: %s\n", info->equiv);
+	snprintf(name, sizeof(name), "%s::%s", pinfo->name, info->name);
+	strbuf_addf(buf, "Code: 0x%"PRIx64"\n", info->code);
 
-		printf("      Code  : 0x%"PRIx64"\n", info->code);
-	}
 	pfm_for_each_event_attr(j, info) {
-		ret = pfm_get_event_attr_info(info->idx, j,
-					      PFM_OS_PERF_EVENT_EXT, &ainfo);
-		if (ret != PFM_SUCCESS)
-			continue;
-
-		if (ainfo.type == PFM_ATTR_UMASK) {
-			printf("      %s:%s\n", info->name, ainfo.name);
-			printf("        [%s]\n", ainfo.desc);
-		}
+		pfm_event_attr_info_t ainfo;
+		const char *src;
 
-		if (!long_desc)
+		ainfo.size = sizeof(ainfo);
+		ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
+		if (ret != PFM_SUCCESS)
 			continue;
 
 		if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
@@ -184,64 +168,74 @@ print_libpfm_events_detailed(pfm_event_info_t *info, bool long_desc)
 
 		src = srcs[ainfo.ctrl];
 		switch (ainfo.type) {
-		case PFM_ATTR_UMASK:
-			printf("        Umask : 0x%02"PRIx64" : %s: ",
-				ainfo.code, src);
-			print_attr_flags(&ainfo);
-			putchar('\n');
+		case PFM_ATTR_UMASK: /* Ignore for now */
 			break;
 		case PFM_ATTR_MOD_BOOL:
-			printf("      Modif : %s: [%s] : %s (boolean)\n", src,
-				ainfo.name, ainfo.desc);
+			strbuf_addf(buf, " Modif: %s: [%s] : %s (boolean)\n", src,
+				    ainfo.name, ainfo.desc);
 			break;
 		case PFM_ATTR_MOD_INTEGER:
-			printf("      Modif : %s: [%s] : %s (integer)\n", src,
-				ainfo.name, ainfo.desc);
+			strbuf_addf(buf, " Modif: %s: [%s] : %s (integer)\n", src,
+				    ainfo.name, ainfo.desc);
 			break;
 		case PFM_ATTR_NONE:
 		case PFM_ATTR_RAW_UMASK:
 		case PFM_ATTR_MAX:
 		default:
-			printf("      Attr  : %s: [%s] : %s\n", src,
-				ainfo.name, ainfo.desc);
+			strbuf_addf(buf, " Attr: %s: [%s] : %s\n", src,
+				    ainfo.name, ainfo.desc);
 		}
 	}
-}
+	print_cb->print_event(print_state,
+			pinfo->name,
+			topic,
+			name, info->equiv,
+			/*scale_unit=*/NULL,
+			/*deprecated=*/NULL, "PFM event",
+			info->desc, /*long_desc=*/NULL,
+			/*encoding_desc=*/buf->buf,
+			/*metric_name=*/NULL, /*metric_expr=*/NULL);
 
-/*
- * list all pmu::event:umask, pmu::event
- * printed events may not be all valid combinations of umask for an event
- */
-static void
-print_libpfm_events_raw(pfm_pmu_info_t *pinfo, pfm_event_info_t *info)
-{
-	pfm_event_attr_info_t ainfo;
-	int j, ret;
-	bool has_umask = false;
+	pfm_for_each_event_attr(j, info) {
+		pfm_event_attr_info_t ainfo;
+		const char *src;
 
-	ainfo.size = sizeof(ainfo);
+		strbuf_setlen(buf, 0);
 
-	pfm_for_each_event_attr(j, info) {
-		ret = pfm_get_event_attr_info(info->idx, j,
-					      PFM_OS_PERF_EVENT_EXT, &ainfo);
+		ainfo.size = sizeof(ainfo);
+		ret = pfm_get_event_attr_info(info->idx, j, PFM_OS_PERF_EVENT_EXT, &ainfo);
 		if (ret != PFM_SUCCESS)
 			continue;
 
-		if (ainfo.type != PFM_ATTR_UMASK)
-			continue;
+		if (ainfo.ctrl >= PFM_ATTR_CTRL_MAX)
+			ainfo.ctrl = PFM_ATTR_CTRL_UNKNOWN;
 
-		printf("%s::%s:%s\n", pinfo->name, info->name, ainfo.name);
-		has_umask = true;
+		src = srcs[ainfo.ctrl];
+		if (ainfo.type == PFM_ATTR_UMASK) {
+			strbuf_addf(buf, "Umask: 0x%02"PRIx64" : %s: ",
+				ainfo.code, src);
+			print_attr_flags(buf, &ainfo);
+			snprintf(name, sizeof(name), "%s::%s:%s",
+				 pinfo->name, info->name, ainfo.name);
+			print_cb->print_event(print_state,
+					pinfo->name,
+					topic,
+					name, /*alias=*/NULL,
+					/*scale_unit=*/NULL,
+					/*deprecated=*/NULL, "PFM event",
+					ainfo.desc, /*long_desc=*/NULL,
+					/*encoding_desc=*/buf->buf,
+					/*metric_name=*/NULL, /*metric_expr=*/NULL);
+		}
 	}
-	if (!has_umask)
-		printf("%s::%s\n", pinfo->name, info->name);
 }
 
-void print_libpfm_events(bool name_only, bool long_desc)
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state)
 {
 	pfm_event_info_t info;
 	pfm_pmu_info_t pinfo;
-	int i, p, ret;
+	int p, ret;
+	struct strbuf storage;
 
 	libpfm_initialize();
 
@@ -249,12 +243,9 @@ void print_libpfm_events(bool name_only, bool long_desc)
 	info.size  = sizeof(info);
 	pinfo.size = sizeof(pinfo);
 
-	if (!name_only)
-		puts("\nList of pre-defined events (to be used in --pfm-events):\n");
+	strbuf_init(&storage, 2048);
 
 	pfm_for_all_pmus(p) {
-		bool printed_pmu = false;
-
 		ret = pfm_get_pmu_info(p, &pinfo);
 		if (ret != PFM_SUCCESS)
 			continue;
@@ -267,25 +258,14 @@ void print_libpfm_events(bool name_only, bool long_desc)
 		if (pinfo.pmu == PFM_PMU_PERF_EVENT)
 			continue;
 
-		for (i = pinfo.first_event; i != -1;
-		     i = pfm_get_event_next(i)) {
-
+		for (int i = pinfo.first_event; i != -1; i = pfm_get_event_next(i)) {
 			ret = pfm_get_event_info(i, PFM_OS_PERF_EVENT_EXT,
 						&info);
 			if (ret != PFM_SUCCESS)
 				continue;
 
-			if (!name_only && !printed_pmu) {
-				printf("%s:\n", pinfo.name);
-				printed_pmu = true;
-			}
-
-			if (!name_only)
-				print_libpfm_events_detailed(&info, long_desc);
-			else
-				print_libpfm_events_raw(&pinfo, &info);
+			print_libpfm_event(print_cb, print_state, &pinfo, &info, &storage);
 		}
-		if (!name_only && printed_pmu)
-			putchar('\n');
 	}
+	strbuf_release(&storage);
 }
diff --git a/tools/perf/util/pfm.h b/tools/perf/util/pfm.h
index 7d70dda870127..fb25c2749d267 100644
--- a/tools/perf/util/pfm.h
+++ b/tools/perf/util/pfm.h
@@ -7,13 +7,14 @@
 #ifndef __PERF_PFM_H
 #define __PERF_PFM_H
 
+#include "print-events.h"
 #include <subcmd/parse-options.h>
 
 #ifdef HAVE_LIBPFM
 int parse_libpfm_events_option(const struct option *opt, const char *str,
 			int unset);
 
-void print_libpfm_events(bool name_only, bool long_desc);
+void print_libpfm_events(const struct print_callbacks *print_cb, void *print_state);
 
 #else
 #include <linux/compiler.h>
@@ -26,8 +27,8 @@ static inline int parse_libpfm_events_option(
 	return 0;
 }
 
-static inline void print_libpfm_events(bool name_only __maybe_unused,
-				       bool long_desc __maybe_unused)
+static inline void print_libpfm_events(const struct print_callbacks *print_cb __maybe_unused,
+				       void *print_state __maybe_unused)
 {
 }
 
-- 
GitLab


From d8a5b59c5fc75c99ba17e3eb1a8f580d8d172b28 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:41 +0100
Subject: [PATCH 2673/4122] phy: qcom-qmp-combo: fix out-of-bounds clock access

The SM8250 only uses three clocks but the DP configuration erroneously
described four clocks.

In case the DP part of the PHY is initialised before the USB part, this
would lead to uninitialised memory beyond the bulk-clocks array to be
treated as a clock pointer as the clocks are requested based on the USB
configuration.

Fixes: aff188feb5e1 ("phy: qcom-qmp: add support for sm8250-usb3-dp phy")
Cc: stable@vger.kernel.org	# 5.13
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 5e11b6a1d1899..bb38b18258ca2 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1270,8 +1270,8 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
-- 
GitLab


From e965ab8216a419fadb4520b65a95dc7017daa800 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:42 +0100
Subject: [PATCH 2674/4122] phy: qcom-qmp-combo: fix sdm845 reset

The SDM845 has two resets but the DP configuration erroneously described
only one.

In case the DP part of the PHY is initialised before the USB part (e.g.
depending on probe order), then only the first reset would be asserted.

Add a dedicated configuration for SDM845 rather than reuse the
incompatible SC7180 configuration.

Fixes: d88497fb6bbd ("phy: qualcomm: phy-qcom-qmp: add support for combo USB3+DP phy on SDM845")
Cc: stable@vger.kernel.org	# 6.1
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 39 ++++++++++++++++++++++-
 1 file changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index bb38b18258ca2..cc53e2f991218 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1084,9 +1084,46 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 	.has_pwrdn_delay	= true,
 };
 
+static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
+	.type			= PHY_TYPE_DP,
+	.lanes			= 2,
+
+	.serdes_tbl		= qmp_v3_dp_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
+	.tx_tbl			= qmp_v3_dp_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(qmp_v3_dp_tx_tbl),
+
+	.serdes_tbl_rbr		= qmp_v3_dp_serdes_tbl_rbr,
+	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr),
+	.serdes_tbl_hbr		= qmp_v3_dp_serdes_tbl_hbr,
+	.serdes_tbl_hbr_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr),
+	.serdes_tbl_hbr2	= qmp_v3_dp_serdes_tbl_hbr2,
+	.serdes_tbl_hbr2_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr2),
+	.serdes_tbl_hbr3	= qmp_v3_dp_serdes_tbl_hbr3,
+	.serdes_tbl_hbr3_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_hbr3),
+
+	.swing_hbr_rbr		= &qmp_dp_v3_voltage_swing_hbr_rbr,
+	.pre_emphasis_hbr_rbr	= &qmp_dp_v3_pre_emphasis_hbr_rbr,
+	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
+	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
+
+	.clk_list		= qmp_v3_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v3_usb3phy_regs_layout,
+
+	.dp_aux_init = qcom_qmp_v3_phy_dp_aux_init,
+	.configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx,
+	.configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy,
+	.calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate,
+};
+
 static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = {
 	.usb_cfg                = &sdm845_usb3phy_cfg,
-	.dp_cfg                 = &sc7180_dpphy_cfg,
+	.dp_cfg                 = &sdm845_dpphy_cfg,
 };
 
 static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
-- 
GitLab


From 910dd4883d757af5faac92590f33f0f7da963032 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:43 +0100
Subject: [PATCH 2675/4122] phy: qcom-qmp-combo: fix sc8180x reset

The SC8180X has two resets but the DP configuration erroneously
described only one.

In case the DP part of the PHY is initialised before the USB part (e.g.
depending on probe order), then only the first reset would be asserted.

Fixes: 1633802cd4ac ("phy: qcom: qmp: Add SC8180x USB/DP combo")
Cc: stable@vger.kernel.org	# 5.15
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index cc53e2f991218..40c25a0ead239 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1177,8 +1177,8 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 
 	.clk_list		= qmp_v3_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= sc7180_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(sc7180_usb3phy_reset_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v3_usb3phy_regs_layout,
-- 
GitLab


From 7a7d86d14d073dfa3429c550667a8e78b99edbd4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:44 +0100
Subject: [PATCH 2676/4122] phy: qcom-qmp-combo: fix broken power on

The PHY is powered on during phy-init by setting the SW_PWRDN bit in the
COM_POWER_DOWN_CTRL register and then setting the same bit in the in the
PCS_POWER_DOWN_CONTROL register that belongs to the USB part of the
PHY.

Currently, whether power on succeeds depends on probe order and having
the USB part of the PHY be initialised first. In case the DP part of the
PHY is instead initialised first, the intended power on of the USB block
results in a corrupted DP_PHY register (e.g. DP_PHY_AUX_CFG8).

Add a pointer to the USB part of the PHY to the driver data and use that
to power on the PHY also if the DP part of the PHY is initialised first.

Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy")
Cc: stable@vger.kernel.org	# 5.10
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 40c25a0ead239..17707f68d482d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -932,6 +932,7 @@ struct qcom_qmp {
 	struct regulator_bulk_data *vregs;
 
 	struct qmp_phy **phys;
+	struct qmp_phy *usb_phy;
 
 	struct mutex phy_mutex;
 	int init_count;
@@ -1911,7 +1912,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 {
 	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs = qphy->pcs;
+	struct qmp_phy *usb_phy = qmp->usb_phy;
 	void __iomem *dp_com = qmp->dp_com;
 	int ret;
 
@@ -1963,7 +1964,8 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
-	qphy_setbits(pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL], SW_PWRDN);
+	qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			SW_PWRDN);
 
 	mutex_unlock(&qmp->phy_mutex);
 
@@ -2831,6 +2833,8 @@ static int qmp_combo_probe(struct platform_device *pdev)
 				goto err_node_put;
 			}
 
+			qmp->usb_phy = qmp->phys[id];
+
 			/*
 			 * Register the pipe clock provided by phy.
 			 * See function description to see details of this pipe clock.
@@ -2846,6 +2850,9 @@ static int qmp_combo_probe(struct platform_device *pdev)
 		id++;
 	}
 
+	if (!qmp->usb_phy)
+		return -EINVAL;
+
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
 	return PTR_ERR_OR_ZERO(phy_provider);
-- 
GitLab


From c7b98de745cffdceefc077ad5cf9cda032ef8959 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:45 +0100
Subject: [PATCH 2677/4122] phy: qcom-qmp-combo: fix runtime suspend

Drop the confused runtime-suspend type check which effectively broke
runtime PM if the DP child node happens to be parsed before the USB
child node during probe (e.g. due to order of child nodes in the
devicetree).

Instead use the new driver data USB PHY pointer to access the USB
configuration and resources.

Fixes: 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo phy")
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 17707f68d482d..fde30205f3323 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2207,15 +2207,11 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy)
 static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->phys[0];
+	struct qmp_phy *qphy = qmp->usb_phy;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 
 	dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode);
 
-	/* Supported only for USB3 PHY and luckily USB3 is the first phy */
-	if (cfg->type != PHY_TYPE_USB3)
-		return 0;
-
 	if (!qmp->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
 		return 0;
@@ -2232,16 +2228,12 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
 static int __maybe_unused qmp_combo_runtime_resume(struct device *dev)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->phys[0];
+	struct qmp_phy *qphy = qmp->usb_phy;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	int ret = 0;
 
 	dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode);
 
-	/* Supported only for USB3 PHY and luckily USB3 is the first phy */
-	if (cfg->type != PHY_TYPE_USB3)
-		return 0;
-
 	if (!qmp->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
 		return 0;
-- 
GitLab


From c209b1b0e1e87e862099482e62a2f2d0bef8e989 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 09:13:46 +0100
Subject: [PATCH 2678/4122] phy: qcom-qmp-combo: clean up common initialisation

Commit 52e013d0bffa ("phy: qcom-qmp: Add support for DP in USB3+DP combo
phy") added support for the DisplayPort part of QMP PHYs but
unfortunately did so by duplicating parts of the shared configuration,
something which has lead to subtle bugs depending on probe order.

As the resources have always been requested based on the USB
configuration, make sure to not rely on fields from the DP configuration
when using them (e.g. in case they get out of sync) and remove the now
unused fields from the DP configurations.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114081346.5116-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 47 ++---------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index fde30205f3323..c7a926d548d8f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1043,14 +1043,6 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v3_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= sc7180_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(sc7180_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v3_usb3phy_regs_layout,
-
 	.dp_aux_init = qcom_qmp_v3_phy_dp_aux_init,
 	.configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx,
 	.configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy,
@@ -1108,14 +1100,6 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v3_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v3_usb3phy_regs_layout,
-
 	.dp_aux_init = qcom_qmp_v3_phy_dp_aux_init,
 	.configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx,
 	.configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy,
@@ -1176,14 +1160,6 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v3_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v3_usb3phy_regs_layout,
-
 	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
 	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy,
@@ -1240,14 +1216,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v5_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v4_usb3phy_regs_layout,
-
 	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
 	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy,
@@ -1308,14 +1276,6 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v4_usb3phy_regs_layout,
-
 	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
 	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy,
@@ -1911,8 +1871,8 @@ static int qcom_qmp_dp_phy_calibrate(struct phy *phy)
 static int qmp_combo_com_init(struct qmp_phy *qphy)
 {
 	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	struct qmp_phy *usb_phy = qmp->usb_phy;
+	const struct qmp_phy_cfg *cfg = usb_phy->cfg;
 	void __iomem *dp_com = qmp->dp_com;
 	int ret;
 
@@ -1964,7 +1924,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
-	qphy_setbits(usb_phy->pcs, usb_phy->cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
 
 	mutex_unlock(&qmp->phy_mutex);
@@ -1984,7 +1944,8 @@ err_unlock:
 static int qmp_combo_com_exit(struct qmp_phy *qphy)
 {
 	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_phy *usb_phy = qmp->usb_phy;
+	const struct qmp_phy_cfg *cfg = usb_phy->cfg;
 
 	mutex_lock(&qmp->phy_mutex);
 	if (--qmp->init_count) {
-- 
GitLab


From 23680f0b7d7f67a935adb38058110d2d81bbe6ea Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 23 Nov 2022 13:25:19 +0100
Subject: [PATCH 2679/4122] driver core: make struct class.dev_uevent() take a
 const *

The dev_uevent() in struct class should not be modifying the device that
is passed into it, so mark it as a const * and propagate the function
signature changes out into all relevant subsystems that use this
callback.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Russ Weight <russell.h.weight@intel.com>
Cc: Jean Delvare <jdelvare@suse.com>
Cc: Johan Hovold <johan@kernel.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Karsten Keil <isdn@linux-pingi.de>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Wolfram Sang <wsa+renesas@sang-engineering.com>
Cc: Raed Salem <raeds@nvidia.com>
Cc: Chen Zhongjin <chenzhongjin@huawei.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Avihai Horon <avihaih@nvidia.com>
Cc: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Colin Ian King <colin.i.king@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Jakob Koschel <jakobkoschel@gmail.com>
Cc: Antoine Tenart <atenart@kernel.org>
Cc: Frederic Weisbecker <frederic@kernel.org>
Cc: Wang Yufen <wangyufen@huawei.com>
Cc: linux-block@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-media@vger.kernel.org
Cc: linux-nvme@lists.infradead.org
Cc: linux-pm@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Cc: linux-wireless@vger.kernel.org
Cc: netdev@vger.kernel.org
Acked-by: Sebastian Reichel <sre@kernel.org>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221123122523.1332370-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 block/genhd.c                             | 4 ++--
 drivers/base/firmware_loader/sysfs.c      | 6 +++---
 drivers/base/firmware_loader/sysfs.h      | 2 +-
 drivers/firmware/dmi-id.c                 | 2 +-
 drivers/gnss/core.c                       | 6 +++---
 drivers/infiniband/core/device.c          | 2 +-
 drivers/isdn/mISDN/core.c                 | 4 ++--
 drivers/media/dvb-core/dvbdev.c           | 4 ++--
 drivers/nvme/host/core.c                  | 4 ++--
 drivers/pcmcia/cs.c                       | 4 ++--
 drivers/power/supply/power_supply.h       | 2 +-
 drivers/power/supply/power_supply_sysfs.c | 8 ++++----
 drivers/usb/gadget/udc/core.c             | 4 ++--
 include/linux/device/class.h              | 2 +-
 include/linux/mISDNif.h                   | 2 +-
 net/atm/atm_sysfs.c                       | 4 ++--
 net/core/net-sysfs.c                      | 4 ++--
 net/rfkill/core.c                         | 2 +-
 18 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 0f9769db2de83..3f11247134424 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1181,9 +1181,9 @@ static void disk_release(struct device *dev)
 	iput(disk->part0->bd_inode);	/* frees the disk */
 }
 
-static int block_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int block_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct gendisk *disk = dev_to_disk(dev);
+	const struct gendisk *disk = dev_to_disk(dev);
 
 	return add_uevent_var(env, "DISKSEQ=%llu", disk->diskseq);
 }
diff --git a/drivers/base/firmware_loader/sysfs.c b/drivers/base/firmware_loader/sysfs.c
index 5b66b3d1fa16b..56911d75b90a1 100644
--- a/drivers/base/firmware_loader/sysfs.c
+++ b/drivers/base/firmware_loader/sysfs.c
@@ -64,7 +64,7 @@ static struct attribute *firmware_class_attrs[] = {
 };
 ATTRIBUTE_GROUPS(firmware_class);
 
-static int do_firmware_uevent(struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env)
+static int do_firmware_uevent(const struct fw_sysfs *fw_sysfs, struct kobj_uevent_env *env)
 {
 	if (add_uevent_var(env, "FIRMWARE=%s", fw_sysfs->fw_priv->fw_name))
 		return -ENOMEM;
@@ -76,9 +76,9 @@ static int do_firmware_uevent(struct fw_sysfs *fw_sysfs, struct kobj_uevent_env
 	return 0;
 }
 
-static int firmware_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int firmware_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
+	const struct fw_sysfs *fw_sysfs = to_fw_sysfs(dev);
 	int err = 0;
 
 	mutex_lock(&fw_lock);
diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h
index df1d5add698f1..fd0b4ad9bdbb6 100644
--- a/drivers/base/firmware_loader/sysfs.h
+++ b/drivers/base/firmware_loader/sysfs.h
@@ -81,7 +81,7 @@ struct fw_sysfs {
 	void *fw_upload_priv;
 };
 
-static inline struct fw_sysfs *to_fw_sysfs(struct device *dev)
+static inline struct fw_sysfs *to_fw_sysfs(const struct device *dev)
 {
 	return container_of(dev, struct fw_sysfs, dev);
 }
diff --git a/drivers/firmware/dmi-id.c b/drivers/firmware/dmi-id.c
index 940ddf916202a..5f3a3e913d28f 100644
--- a/drivers/firmware/dmi-id.c
+++ b/drivers/firmware/dmi-id.c
@@ -155,7 +155,7 @@ static const struct attribute_group* sys_dmi_attribute_groups[] = {
 	NULL
 };
 
-static int dmi_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int dmi_dev_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
 	ssize_t len;
 
diff --git a/drivers/gnss/core.c b/drivers/gnss/core.c
index 1e82b7967570a..77a4b280c552a 100644
--- a/drivers/gnss/core.c
+++ b/drivers/gnss/core.c
@@ -337,7 +337,7 @@ static const char * const gnss_type_names[GNSS_TYPE_COUNT] = {
 	[GNSS_TYPE_MTK]		= "MTK",
 };
 
-static const char *gnss_type_name(struct gnss_device *gdev)
+static const char *gnss_type_name(const struct gnss_device *gdev)
 {
 	const char *name = NULL;
 
@@ -365,9 +365,9 @@ static struct attribute *gnss_attrs[] = {
 };
 ATTRIBUTE_GROUPS(gnss);
 
-static int gnss_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int gnss_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct gnss_device *gdev = to_gnss_device(dev);
+	const struct gnss_device *gdev = to_gnss_device(dev);
 	int ret;
 
 	ret = add_uevent_var(env, "GNSS_TYPE=%s", gnss_type_name(gdev));
diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index fa65c5d3d395c..4186dbf9377f0 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -511,7 +511,7 @@ static void ib_device_release(struct device *device)
 	kfree_rcu(dev, rcu_head);
 }
 
-static int ib_device_uevent(struct device *device,
+static int ib_device_uevent(const struct device *device,
 			    struct kobj_uevent_env *env)
 {
 	if (add_uevent_var(env, "NAME=%s", dev_name(device)))
diff --git a/drivers/isdn/mISDN/core.c b/drivers/isdn/mISDN/core.c
index 90ee56d07a6e9..9120be5903258 100644
--- a/drivers/isdn/mISDN/core.c
+++ b/drivers/isdn/mISDN/core.c
@@ -139,9 +139,9 @@ static struct attribute *mISDN_attrs[] = {
 };
 ATTRIBUTE_GROUPS(mISDN);
 
-static int mISDN_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int mISDN_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct mISDNdevice *mdev = dev_to_mISDN(dev);
+	const struct mISDNdevice *mdev = dev_to_mISDN(dev);
 
 	if (!mdev)
 		return 0;
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 675d877a67b25..6ef18bab9648b 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -1008,9 +1008,9 @@ void dvb_module_release(struct i2c_client *client)
 EXPORT_SYMBOL_GPL(dvb_module_release);
 #endif
 
-static int dvb_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct dvb_device *dvbdev = dev_get_drvdata(dev);
+	const struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
 	add_uevent_var(env, "DVB_ADAPTER_NUM=%d", dvbdev->adapter->num);
 	add_uevent_var(env, "DVB_DEVICE_TYPE=%s", dnames[dvbdev->type]);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index da55ce45ac701..b4778b970dd43 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4580,9 +4580,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_remove_namespaces);
 
-static int nvme_class_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int nvme_class_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct nvme_ctrl *ctrl =
+	const struct nvme_ctrl *ctrl =
 		container_of(dev, struct nvme_ctrl, ctrl_device);
 	struct nvmf_ctrl_options *opts = ctrl->opts;
 	int ret;
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index f70197154a362..e3224e49c43fd 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -810,10 +810,10 @@ int pcmcia_reset_card(struct pcmcia_socket *skt)
 EXPORT_SYMBOL(pcmcia_reset_card);
 
 
-static int pcmcia_socket_uevent(struct device *dev,
+static int pcmcia_socket_uevent(const struct device *dev,
 				struct kobj_uevent_env *env)
 {
-	struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev);
+	const struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev);
 
 	if (add_uevent_var(env, "SOCKET_NO=%u", s->sock))
 		return -ENOMEM;
diff --git a/drivers/power/supply/power_supply.h b/drivers/power/supply/power_supply.h
index c310d4f36c105..645eee4d6b6ae 100644
--- a/drivers/power/supply/power_supply.h
+++ b/drivers/power/supply/power_supply.h
@@ -16,7 +16,7 @@ struct power_supply;
 #ifdef CONFIG_SYSFS
 
 extern void power_supply_init_attrs(struct device_type *dev_type);
-extern int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env);
+extern int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env);
 
 #else
 
diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c
index 5369abaceb5cc..6ca7d3985a406 100644
--- a/drivers/power/supply/power_supply_sysfs.c
+++ b/drivers/power/supply/power_supply_sysfs.c
@@ -427,7 +427,7 @@ void power_supply_init_attrs(struct device_type *dev_type)
 	}
 }
 
-static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env,
+static int add_prop_uevent(const struct device *dev, struct kobj_uevent_env *env,
 			   enum power_supply_property prop, char *prop_buf)
 {
 	int ret = 0;
@@ -438,7 +438,7 @@ static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env,
 	pwr_attr = &power_supply_attrs[prop];
 	dev_attr = &pwr_attr->dev_attr;
 
-	ret = power_supply_show_property(dev, dev_attr, prop_buf);
+	ret = power_supply_show_property((struct device *)dev, dev_attr, prop_buf);
 	if (ret == -ENODEV || ret == -ENODATA) {
 		/*
 		 * When a battery is absent, we expect -ENODEV. Don't abort;
@@ -458,9 +458,9 @@ static int add_prop_uevent(struct device *dev, struct kobj_uevent_env *env,
 			      pwr_attr->prop_name, prop_buf);
 }
 
-int power_supply_uevent(struct device *dev, struct kobj_uevent_env *env)
+int power_supply_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct power_supply *psy = dev_get_drvdata(dev);
+	const struct power_supply *psy = dev_get_drvdata(dev);
 	int ret = 0, j;
 	char *prop_buf;
 
diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
index c63c0c2cf649d..b5994a0604f68 100644
--- a/drivers/usb/gadget/udc/core.c
+++ b/drivers/usb/gadget/udc/core.c
@@ -1723,9 +1723,9 @@ static const struct attribute_group *usb_udc_attr_groups[] = {
 	NULL,
 };
 
-static int usb_udc_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int usb_udc_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
-	struct usb_udc		*udc = container_of(dev, struct usb_udc, dev);
+	const struct usb_udc	*udc = container_of(dev, struct usb_udc, dev);
 	int			ret;
 
 	ret = add_uevent_var(env, "USB_UDC_NAME=%s", udc->gadget->name);
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 20103e0b03c34..94b1107258e50 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -59,7 +59,7 @@ struct class {
 	const struct attribute_group	**dev_groups;
 	struct kobject			*dev_kobj;
 
-	int (*dev_uevent)(struct device *dev, struct kobj_uevent_env *env);
+	int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env);
 	char *(*devnode)(struct device *dev, umode_t *mode);
 
 	void (*class_release)(struct class *class);
diff --git a/include/linux/mISDNif.h b/include/linux/mISDNif.h
index 7dd1f01ec4f9f..7aab4a7697369 100644
--- a/include/linux/mISDNif.h
+++ b/include/linux/mISDNif.h
@@ -586,7 +586,7 @@ extern struct mISDNclock *mISDN_register_clock(char *, int, clockctl_func_t *,
 						void *);
 extern void	mISDN_unregister_clock(struct mISDNclock *);
 
-static inline struct mISDNdevice *dev_to_mISDN(struct device *dev)
+static inline struct mISDNdevice *dev_to_mISDN(const struct device *dev)
 {
 	if (dev)
 		return dev_get_drvdata(dev);
diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c
index 0fdbdfd194746..466353b3dde45 100644
--- a/net/atm/atm_sysfs.c
+++ b/net/atm/atm_sysfs.c
@@ -108,9 +108,9 @@ static struct device_attribute *atm_attrs[] = {
 };
 
 
-static int atm_uevent(struct device *cdev, struct kobj_uevent_env *env)
+static int atm_uevent(const struct device *cdev, struct kobj_uevent_env *env)
 {
-	struct atm_dev *adev;
+	const struct atm_dev *adev;
 
 	if (!cdev)
 		return -ENODEV;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9cfc80b8ed25f..03a61d1dffbd4 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1873,9 +1873,9 @@ const struct kobj_ns_type_operations net_ns_type_operations = {
 };
 EXPORT_SYMBOL_GPL(net_ns_type_operations);
 
-static int netdev_uevent(struct device *d, struct kobj_uevent_env *env)
+static int netdev_uevent(const struct device *d, struct kobj_uevent_env *env)
 {
-	struct net_device *dev = to_net_dev(d);
+	const struct net_device *dev = to_net_dev(d);
 	int retval;
 
 	/* pass interface to uevent. */
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index dac4fdc7488a3..b390ff245d5e8 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -832,7 +832,7 @@ static void rfkill_release(struct device *dev)
 	kfree(rfkill);
 }
 
-static int rfkill_dev_uevent(struct device *dev, struct kobj_uevent_env *env)
+static int rfkill_dev_uevent(const struct device *dev, struct kobj_uevent_env *env)
 {
 	struct rfkill *rfkill = to_rfkill(dev);
 	unsigned long flags;
-- 
GitLab


From ff62b8e6588fb07bedda7423622c140c4edd66a7 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 23 Nov 2022 13:25:20 +0100
Subject: [PATCH 2680/4122] driver core: make struct class.devnode() take a
 const *
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The devnode() in struct class should not be modifying the device that is
passed into it, so mark it as a const * and propagate the function
signature changes out into all relevant subsystems that use this
callback.

Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: x86@kernel.org
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Justin Sanders <justin@coraid.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Benjamin Gaignard <benjamin.gaignard@collabora.com>
Cc: Liam Mark <lmark@codeaurora.org>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Brian Starkey <Brian.Starkey@arm.com>
Cc: John Stultz <jstultz@google.com>
Cc: "Christian König" <christian.koenig@amd.com>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Sean Young <sean@mess.org>
Cc: Frank Haverkamp <haver@linux.ibm.com>
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Jason Wang <jasowang@redhat.com>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Cornelia Huck <cohuck@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Jaroslav Kysela <perex@perex.cz>
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Xie Yongji <xieyongji@bytedance.com>
Cc: Gautam Dawar <gautam.dawar@xilinx.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: Eli Cohen <elic@nvidia.com>
Cc: Parav Pandit <parav@nvidia.com>
Cc: Maxime Coquelin <maxime.coquelin@redhat.com>
Cc: alsa-devel@alsa-project.org
Cc: dri-devel@lists.freedesktop.org
Cc: kvm@vger.kernel.org
Cc: linaro-mm-sig@lists.linaro.org
Cc: linux-block@vger.kernel.org
Cc: linux-input@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-media@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: linux-scsi@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Link: https://lore.kernel.org/r/20221123122523.1332370-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/resctrl/pseudo_lock.c  | 4 ++--
 arch/x86/kernel/cpuid.c                    | 2 +-
 arch/x86/kernel/msr.c                      | 2 +-
 block/bsg.c                                | 2 +-
 drivers/block/aoe/aoechr.c                 | 2 +-
 drivers/char/mem.c                         | 2 +-
 drivers/char/misc.c                        | 4 ++--
 drivers/dma-buf/dma-heap.c                 | 2 +-
 drivers/gpu/drm/drm_sysfs.c                | 2 +-
 drivers/infiniband/core/user_mad.c         | 2 +-
 drivers/infiniband/core/uverbs_main.c      | 2 +-
 drivers/infiniband/hw/hfi1/device.c        | 4 ++--
 drivers/input/input.c                      | 2 +-
 drivers/media/dvb-core/dvbdev.c            | 4 ++--
 drivers/media/pci/ddbridge/ddbridge-core.c | 4 ++--
 drivers/media/rc/rc-main.c                 | 2 +-
 drivers/misc/genwqe/card_base.c            | 2 +-
 drivers/tty/tty_io.c                       | 2 +-
 drivers/usb/core/file.c                    | 2 +-
 drivers/vdpa/vdpa_user/vduse_dev.c         | 2 +-
 drivers/vfio/vfio_main.c                   | 2 +-
 fs/pstore/pmsg.c                           | 2 +-
 include/linux/device/class.h               | 2 +-
 sound/sound_core.c                         | 2 +-
 24 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
index d961ae3ed96e7..4e4231a58f383 100644
--- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
+++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c
@@ -1560,9 +1560,9 @@ static const struct file_operations pseudo_lock_dev_fops = {
 	.mmap =		pseudo_lock_dev_mmap,
 };
 
-static char *pseudo_lock_devnode(struct device *dev, umode_t *mode)
+static char *pseudo_lock_devnode(const struct device *dev, umode_t *mode)
 {
-	struct rdtgroup *rdtgrp;
+	const struct rdtgroup *rdtgrp;
 
 	rdtgrp = dev_get_drvdata(dev);
 	if (mode)
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index 6f7b8cc1bc9fa..621ba9c0f17a9 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -139,7 +139,7 @@ static int cpuid_device_destroy(unsigned int cpu)
 	return 0;
 }
 
-static char *cpuid_devnode(struct device *dev, umode_t *mode)
+static char *cpuid_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/cpuid", MINOR(dev->devt));
 }
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index ed8ac6bcbafb2..7087513117861 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -250,7 +250,7 @@ static int msr_device_destroy(unsigned int cpu)
 	return 0;
 }
 
-static char *msr_devnode(struct device *dev, umode_t *mode)
+static char *msr_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cpu/%u/msr", MINOR(dev->devt));
 }
diff --git a/block/bsg.c b/block/bsg.c
index 2ab1351eb0823..08046bd9207d8 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -232,7 +232,7 @@ out_put_device:
 }
 EXPORT_SYMBOL_GPL(bsg_register_queue);
 
-static char *bsg_devnode(struct device *dev, umode_t *mode)
+static char *bsg_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "bsg/%s", dev_name(dev));
 }
diff --git a/drivers/block/aoe/aoechr.c b/drivers/block/aoe/aoechr.c
index 8eea2529da20a..7a368c90467d1 100644
--- a/drivers/block/aoe/aoechr.c
+++ b/drivers/block/aoe/aoechr.c
@@ -273,7 +273,7 @@ static const struct file_operations aoe_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *aoe_devnode(struct device *dev, umode_t *mode)
+static char *aoe_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "etherd/%s", dev_name(dev));
 }
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 5611d127363e4..83bf2a4dcb57e 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -746,7 +746,7 @@ static const struct file_operations memory_fops = {
 	.llseek = noop_llseek,
 };
 
-static char *mem_devnode(struct device *dev, umode_t *mode)
+static char *mem_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode && devlist[MINOR(dev->devt)].mode)
 		*mode = devlist[MINOR(dev->devt)].mode;
diff --git a/drivers/char/misc.c b/drivers/char/misc.c
index cba19bfdc44dd..88c6995b9a3d1 100644
--- a/drivers/char/misc.c
+++ b/drivers/char/misc.c
@@ -254,9 +254,9 @@ void misc_deregister(struct miscdevice *misc)
 }
 EXPORT_SYMBOL(misc_deregister);
 
-static char *misc_devnode(struct device *dev, umode_t *mode)
+static char *misc_devnode(const struct device *dev, umode_t *mode)
 {
-	struct miscdevice *c = dev_get_drvdata(dev);
+	const struct miscdevice *c = dev_get_drvdata(dev);
 
 	if (mode && c->mode)
 		*mode = c->mode;
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
index 8f5848aa144fe..4d7150791315a 100644
--- a/drivers/dma-buf/dma-heap.c
+++ b/drivers/dma-buf/dma-heap.c
@@ -299,7 +299,7 @@ err0:
 	return err_ret;
 }
 
-static char *dma_heap_devnode(struct device *dev, umode_t *mode)
+static char *dma_heap_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dma_heap/%s", dev_name(dev));
 }
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 430e00b16eeca..14bf156b3f1ba 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -90,7 +90,7 @@ static void drm_sysfs_acpi_register(void) { }
 static void drm_sysfs_acpi_unregister(void) { }
 #endif
 
-static char *drm_devnode(struct device *dev, umode_t *mode)
+static char *drm_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
 }
diff --git a/drivers/infiniband/core/user_mad.c b/drivers/infiniband/core/user_mad.c
index 98cb594cd9a69..f83954180a338 100644
--- a/drivers/infiniband/core/user_mad.c
+++ b/drivers/infiniband/core/user_mad.c
@@ -1224,7 +1224,7 @@ static struct attribute *umad_class_dev_attrs[] = {
 };
 ATTRIBUTE_GROUPS(umad_class_dev);
 
-static char *umad_devnode(struct device *dev, umode_t *mode)
+static char *umad_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
 }
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index d544340887277..bdb179a09d77c 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -1237,7 +1237,7 @@ static void ib_uverbs_remove_one(struct ib_device *device, void *client_data)
 	put_device(&uverbs_dev->dev);
 }
 
-static char *uverbs_devnode(struct device *dev, umode_t *mode)
+static char *uverbs_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0666;
diff --git a/drivers/infiniband/hw/hfi1/device.c b/drivers/infiniband/hw/hfi1/device.c
index 8ceff7141bafe..1f44960321704 100644
--- a/drivers/infiniband/hw/hfi1/device.c
+++ b/drivers/infiniband/hw/hfi1/device.c
@@ -72,7 +72,7 @@ const char *class_name(void)
 	return hfi1_class_name;
 }
 
-static char *hfi1_devnode(struct device *dev, umode_t *mode)
+static char *hfi1_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0600;
@@ -85,7 +85,7 @@ static const char *class_name_user(void)
 	return hfi1_class_name_user;
 }
 
-static char *hfi1_user_devnode(struct device *dev, umode_t *mode)
+static char *hfi1_user_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0666;
diff --git a/drivers/input/input.c b/drivers/input/input.c
index ebb2b7f0f8ff4..50597165dc54f 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -1913,7 +1913,7 @@ static const struct device_type input_dev_type = {
 #endif
 };
 
-static char *input_devnode(struct device *dev, umode_t *mode)
+static char *input_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "input/%s", dev_name(dev));
 }
diff --git a/drivers/media/dvb-core/dvbdev.c b/drivers/media/dvb-core/dvbdev.c
index 6ef18bab9648b..e73f5240cc2cc 100644
--- a/drivers/media/dvb-core/dvbdev.c
+++ b/drivers/media/dvb-core/dvbdev.c
@@ -1018,9 +1018,9 @@ static int dvb_uevent(const struct device *dev, struct kobj_uevent_env *env)
 	return 0;
 }
 
-static char *dvb_devnode(struct device *dev, umode_t *mode)
+static char *dvb_devnode(const struct device *dev, umode_t *mode)
 {
-	struct dvb_device *dvbdev = dev_get_drvdata(dev);
+	const struct dvb_device *dvbdev = dev_get_drvdata(dev);
 
 	return kasprintf(GFP_KERNEL, "dvb/adapter%d/%s%d",
 		dvbdev->adapter->num, dnames[dvbdev->type], dvbdev->id);
diff --git a/drivers/media/pci/ddbridge/ddbridge-core.c b/drivers/media/pci/ddbridge/ddbridge-core.c
index fe833f39698af..ee8087f29b2c4 100644
--- a/drivers/media/pci/ddbridge/ddbridge-core.c
+++ b/drivers/media/pci/ddbridge/ddbridge-core.c
@@ -2716,9 +2716,9 @@ static const struct file_operations ddb_fops = {
 	.release        = ddb_release,
 };
 
-static char *ddb_devnode(struct device *device, umode_t *mode)
+static char *ddb_devnode(const struct device *device, umode_t *mode)
 {
-	struct ddb *dev = dev_get_drvdata(device);
+	const struct ddb *dev = dev_get_drvdata(device);
 
 	return kasprintf(GFP_KERNEL, "ddbridge/card%d", dev->nr);
 }
diff --git a/drivers/media/rc/rc-main.c b/drivers/media/rc/rc-main.c
index eba0cd30e314d..527d9324742bb 100644
--- a/drivers/media/rc/rc-main.c
+++ b/drivers/media/rc/rc-main.c
@@ -1017,7 +1017,7 @@ static void ir_close(struct input_dev *idev)
 }
 
 /* class for /sys/class/rc */
-static char *rc_devnode(struct device *dev, umode_t *mode)
+static char *rc_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "rc/%s", dev_name(dev));
 }
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index 693981891870c..0f00687f72d43 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -1349,7 +1349,7 @@ static struct pci_driver genwqe_driver = {
  * Default mode should be rw for everybody. Do not change default
  * device name.
  */
-static char *genwqe_devnode(struct device *dev, umode_t *mode)
+static char *genwqe_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0666;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index de06c3c2ff70a..aad8171f6c214 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -3494,7 +3494,7 @@ void tty_default_fops(struct file_operations *fops)
 	*fops = tty_fops;
 }
 
-static char *tty_devnode(struct device *dev, umode_t *mode)
+static char *tty_devnode(const struct device *dev, umode_t *mode)
 {
 	if (!mode)
 		return NULL;
diff --git a/drivers/usb/core/file.c b/drivers/usb/core/file.c
index 558890ada0e5b..da7d88e069e6c 100644
--- a/drivers/usb/core/file.c
+++ b/drivers/usb/core/file.c
@@ -62,7 +62,7 @@ static struct usb_class {
 	struct class *class;
 } *usb_class;
 
-static char *usb_devnode(struct device *dev, umode_t *mode)
+static char *usb_devnode(const struct device *dev, umode_t *mode)
 {
 	struct usb_class_driver *drv;
 
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index 35dceee3ed560..0dd3c1f291da3 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -1656,7 +1656,7 @@ static const struct file_operations vduse_ctrl_fops = {
 	.llseek		= noop_llseek,
 };
 
-static char *vduse_devnode(struct device *dev, umode_t *mode)
+static char *vduse_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
 }
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 6e8804fe00953..5bf4b34549189 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1812,7 +1812,7 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 /*
  * Module/class support
  */
-static char *vfio_devnode(struct device *dev, umode_t *mode)
+static char *vfio_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
 }
diff --git a/fs/pstore/pmsg.c b/fs/pstore/pmsg.c
index d8542ec2f38c6..b31c9c72d90b4 100644
--- a/fs/pstore/pmsg.c
+++ b/fs/pstore/pmsg.c
@@ -46,7 +46,7 @@ static int pmsg_major;
 #undef pr_fmt
 #define pr_fmt(fmt) PMSG_NAME ": " fmt
 
-static char *pmsg_devnode(struct device *dev, umode_t *mode)
+static char *pmsg_devnode(const struct device *dev, umode_t *mode)
 {
 	if (mode)
 		*mode = 0220;
diff --git a/include/linux/device/class.h b/include/linux/device/class.h
index 94b1107258e50..42cc3fb44a846 100644
--- a/include/linux/device/class.h
+++ b/include/linux/device/class.h
@@ -60,7 +60,7 @@ struct class {
 	struct kobject			*dev_kobj;
 
 	int (*dev_uevent)(const struct device *dev, struct kobj_uevent_env *env);
-	char *(*devnode)(struct device *dev, umode_t *mode);
+	char *(*devnode)(const struct device *dev, umode_t *mode);
 
 	void (*class_release)(struct class *class);
 	void (*dev_release)(struct device *dev);
diff --git a/sound/sound_core.c b/sound/sound_core.c
index 3332fe321737e..3e7dd6fcb7cf9 100644
--- a/sound/sound_core.c
+++ b/sound/sound_core.c
@@ -30,7 +30,7 @@ MODULE_DESCRIPTION("Core sound module");
 MODULE_AUTHOR("Alan Cox");
 MODULE_LICENSE("GPL");
 
-static char *sound_devnode(struct device *dev, umode_t *mode)
+static char *sound_devnode(const struct device *dev, umode_t *mode)
 {
 	if (MAJOR(dev->devt) == SOUND_MAJOR)
 		return NULL;
-- 
GitLab


From a173ee25a758927adc12664d1ec162a18324a4bd Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:00 +0100
Subject: [PATCH 2681/4122] phy: qcom-qmp-combo: sort device-id table

Sort the device-id table by compatible string to make it easier to find
and add new entries.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index c7a926d548d8f..d6a031bcfc302 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2651,14 +2651,6 @@ static const struct of_device_id qmp_combo_of_match_table[] = {
 		.compatible = "qcom,sc7180-qmp-usb3-dp-phy",
 		.data = &sc7180_usb3dpphy_cfg,
 	},
-	{
-		.compatible = "qcom,sdm845-qmp-usb3-dp-phy",
-		.data = &sdm845_usb3dpphy_cfg,
-	},
-	{
-		.compatible = "qcom,sm8250-qmp-usb3-dp-phy",
-		.data = &sm8250_usb3dpphy_cfg,
-	},
 	{
 		.compatible = "qcom,sc8180x-qmp-usb3-dp-phy",
 		.data = &sc8180x_usb3dpphy_cfg,
@@ -2667,6 +2659,14 @@ static const struct of_device_id qmp_combo_of_match_table[] = {
 		.compatible = "qcom,sc8280xp-qmp-usb43dp-phy",
 		.data = &sc8280xp_usb43dpphy_combo_cfg,
 	},
+	{
+		.compatible = "qcom,sdm845-qmp-usb3-dp-phy",
+		.data = &sdm845_usb3dpphy_cfg,
+	},
+	{
+		.compatible = "qcom,sm8250-qmp-usb3-dp-phy",
+		.data = &sm8250_usb3dpphy_cfg,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table);
-- 
GitLab


From 5c5f9fbc15aa58c1bac22724f429e6e399a2f2b5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:01 +0100
Subject: [PATCH 2682/4122] phy: qcom-qmp-combo: move device-id table

Move the device-id table after probe() and next to the driver structure
to keep the driver callback functions grouped together.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 50 +++++++++++------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index d6a031bcfc302..e7c8c4417142b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2646,31 +2646,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct of_device_id qmp_combo_of_match_table[] = {
-	{
-		.compatible = "qcom,sc7180-qmp-usb3-dp-phy",
-		.data = &sc7180_usb3dpphy_cfg,
-	},
-	{
-		.compatible = "qcom,sc8180x-qmp-usb3-dp-phy",
-		.data = &sc8180x_usb3dpphy_cfg,
-	},
-	{
-		.compatible = "qcom,sc8280xp-qmp-usb43dp-phy",
-		.data = &sc8280xp_usb43dpphy_combo_cfg,
-	},
-	{
-		.compatible = "qcom,sdm845-qmp-usb3-dp-phy",
-		.data = &sdm845_usb3dpphy_cfg,
-	},
-	{
-		.compatible = "qcom,sm8250-qmp-usb3-dp-phy",
-		.data = &sm8250_usb3dpphy_cfg,
-	},
-	{ }
-};
-MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table);
-
 static const struct dev_pm_ops qmp_combo_pm_ops = {
 	SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend,
 			   qmp_combo_runtime_resume, NULL)
@@ -2815,6 +2790,31 @@ err_node_put:
 	return ret;
 }
 
+static const struct of_device_id qmp_combo_of_match_table[] = {
+	{
+		.compatible = "qcom,sc7180-qmp-usb3-dp-phy",
+		.data = &sc7180_usb3dpphy_cfg,
+	},
+	{
+		.compatible = "qcom,sc8180x-qmp-usb3-dp-phy",
+		.data = &sc8180x_usb3dpphy_cfg,
+	},
+	{
+		.compatible = "qcom,sc8280xp-qmp-usb43dp-phy",
+		.data = &sc8280xp_usb43dpphy_combo_cfg,
+	},
+	{
+		.compatible = "qcom,sdm845-qmp-usb3-dp-phy",
+		.data = &sdm845_usb3dpphy_cfg,
+	},
+	{
+		.compatible = "qcom,sm8250-qmp-usb3-dp-phy",
+		.data = &sm8250_usb3dpphy_cfg,
+	},
+	{ }
+};
+MODULE_DEVICE_TABLE(of, qmp_combo_of_match_table);
+
 static struct platform_driver qmp_combo_driver = {
 	.probe		= qmp_combo_probe,
 	.driver = {
-- 
GitLab


From 987a505fa7d79691013ec4bd325ecc5664781c81 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:02 +0100
Subject: [PATCH 2683/4122] phy: qcom-qmp-combo: move pm ops

Move the PM ops structure next to the implementation to keep the driver
callbacks grouped.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index e7c8c4417142b..2ac29b71d3b72 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2216,6 +2216,11 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev)
 	return 0;
 }
 
+static const struct dev_pm_ops qmp_combo_pm_ops = {
+	SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend,
+			   qmp_combo_runtime_resume, NULL)
+};
+
 static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
@@ -2646,11 +2651,6 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 	return 0;
 }
 
-static const struct dev_pm_ops qmp_combo_pm_ops = {
-	SET_RUNTIME_PM_OPS(qmp_combo_runtime_suspend,
-			   qmp_combo_runtime_resume, NULL)
-};
-
 static int qmp_combo_probe(struct platform_device *pdev)
 {
 	struct qcom_qmp *qmp;
-- 
GitLab


From d6c81688f9cd1d198475383c963cd3d9576d29c2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:03 +0100
Subject: [PATCH 2684/4122] phy: qcom-qmp-combo: rename PHY ops structures

Rename the PHY operation structures so that they have a "phy_ops" suffix
and move them next to the implementation.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 2ac29b71d3b72..13800c2243e11 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2118,6 +2118,24 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 	return 0;
 }
 
+static const struct phy_ops qmp_combo_usb_phy_ops = {
+	.init		= qmp_combo_enable,
+	.exit		= qmp_combo_disable,
+	.set_mode	= qmp_combo_set_mode,
+	.owner		= THIS_MODULE,
+};
+
+static const struct phy_ops qmp_combo_dp_phy_ops = {
+	.init		= qmp_combo_init,
+	.configure	= qcom_qmp_dp_phy_configure,
+	.power_on	= qmp_combo_power_on,
+	.calibrate	= qcom_qmp_dp_phy_calibrate,
+	.power_off	= qmp_combo_power_off,
+	.exit		= qmp_combo_exit,
+	.set_mode	= qmp_combo_set_mode,
+	.owner		= THIS_MODULE,
+};
+
 static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy)
 {
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
@@ -2542,24 +2560,6 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static const struct phy_ops qmp_combo_usb_ops = {
-	.init		= qmp_combo_enable,
-	.exit		= qmp_combo_disable,
-	.set_mode	= qmp_combo_set_mode,
-	.owner		= THIS_MODULE,
-};
-
-static const struct phy_ops qmp_combo_dp_ops = {
-	.init		= qmp_combo_init,
-	.configure	= qcom_qmp_dp_phy_configure,
-	.power_on	= qmp_combo_power_on,
-	.calibrate	= qcom_qmp_dp_phy_calibrate,
-	.power_off	= qmp_combo_power_off,
-	.exit		= qmp_combo_exit,
-	.set_mode	= qmp_combo_set_mode,
-	.owner		= THIS_MODULE,
-};
-
 static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
@@ -2632,9 +2632,9 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 	}
 
 	if (cfg->type == PHY_TYPE_DP)
-		ops = &qmp_combo_dp_ops;
+		ops = &qmp_combo_dp_phy_ops;
 	else
-		ops = &qmp_combo_usb_ops;
+		ops = &qmp_combo_usb_phy_ops;
 
 	generic_phy = devm_phy_create(dev, np, ops);
 	if (IS_ERR(generic_phy)) {
-- 
GitLab


From 73d262f8e7ff095965bac0c4bf538f601257f53b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:04 +0100
Subject: [PATCH 2685/4122] phy: qcom-qmp-combo: drop unused DP PHY mode op

The set-mode operation is currently only used by the USB part of the
PHY so drop the corresponding callback from the DP PHY ops.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 13800c2243e11..cec487560fd73 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2132,7 +2132,6 @@ static const struct phy_ops qmp_combo_dp_phy_ops = {
 	.calibrate	= qcom_qmp_dp_phy_calibrate,
 	.power_off	= qmp_combo_power_off,
 	.exit		= qmp_combo_exit,
-	.set_mode	= qmp_combo_set_mode,
 	.owner		= THIS_MODULE,
 };
 
-- 
GitLab


From ae1cdc709762129c33ab64f400c38c9a177189f1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:05 +0100
Subject: [PATCH 2686/4122] phy: qcom-qmp-combo: rename USB PHY ops

Add a "usb" infix to the USB PHY operation functions and name them
after the corresponding operations (e.g. "init" rather than "enable").

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index cec487560fd73..bf0874d22d917 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2084,7 +2084,7 @@ static int qmp_combo_exit(struct phy *phy)
 	return 0;
 }
 
-static int qmp_combo_enable(struct phy *phy)
+static int qmp_combo_usb_init(struct phy *phy)
 {
 	int ret;
 
@@ -2099,7 +2099,7 @@ static int qmp_combo_enable(struct phy *phy)
 	return ret;
 }
 
-static int qmp_combo_disable(struct phy *phy)
+static int qmp_combo_usb_exit(struct phy *phy)
 {
 	int ret;
 
@@ -2109,7 +2109,7 @@ static int qmp_combo_disable(struct phy *phy)
 	return qmp_combo_exit(phy);
 }
 
-static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode)
+static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 
@@ -2119,9 +2119,9 @@ static int qmp_combo_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 }
 
 static const struct phy_ops qmp_combo_usb_phy_ops = {
-	.init		= qmp_combo_enable,
-	.exit		= qmp_combo_disable,
-	.set_mode	= qmp_combo_set_mode,
+	.init		= qmp_combo_usb_init,
+	.exit		= qmp_combo_usb_exit,
+	.set_mode	= qmp_combo_usb_set_mode,
 	.owner		= THIS_MODULE,
 };
 
-- 
GitLab


From 0537692bbec18a173e1bda87f2bd024b3684b47d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:06 +0100
Subject: [PATCH 2687/4122] phy: qcom-qmp-combo: drop unnecessary debug message

Drop the unnecessary (verbose) debug message from the init PHY op.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index bf0874d22d917..ae04f6219fd2a 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1967,10 +1967,8 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy)
 static int qmp_combo_init(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	int ret;
-	dev_vdbg(qmp->dev, "Initializing QMP phy\n");
 
 	ret = qmp_combo_com_init(qphy);
 	if (ret)
-- 
GitLab


From 8c75d9eab1dd402bde2fb1337db2130a409c2743 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:07 +0100
Subject: [PATCH 2688/4122] phy: qcom-qmp-combo: separate USB and DP init ops

Separate the USB and DP init and exit operations by calling the common
initialisation code directly from the USB operation and adding a "dp"
infix to the DP callbacks.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 36 ++++++++++++-----------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index ae04f6219fd2a..5d985195df389 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1964,7 +1964,7 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy)
 	return 0;
 }
 
-static int qmp_combo_init(struct phy *phy)
+static int qmp_combo_dp_init(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
@@ -1974,8 +1974,16 @@ static int qmp_combo_init(struct phy *phy)
 	if (ret)
 		return ret;
 
-	if (cfg->type == PHY_TYPE_DP)
-		cfg->dp_aux_init(qphy);
+	cfg->dp_aux_init(qphy);
+
+	return 0;
+}
+
+static int qmp_combo_dp_exit(struct phy *phy)
+{
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
+
+	qmp_combo_com_exit(qphy);
 
 	return 0;
 }
@@ -2073,38 +2081,32 @@ static int qmp_combo_power_off(struct phy *phy)
 	return 0;
 }
 
-static int qmp_combo_exit(struct phy *phy)
-{
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-
-	qmp_combo_com_exit(qphy);
-
-	return 0;
-}
-
 static int qmp_combo_usb_init(struct phy *phy)
 {
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	int ret;
 
-	ret = qmp_combo_init(phy);
+	ret = qmp_combo_com_init(qphy);
 	if (ret)
 		return ret;
 
 	ret = qmp_combo_power_on(phy);
 	if (ret)
-		qmp_combo_exit(phy);
+		qmp_combo_com_exit(qphy);
 
 	return ret;
 }
 
 static int qmp_combo_usb_exit(struct phy *phy)
 {
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	int ret;
 
 	ret = qmp_combo_power_off(phy);
 	if (ret)
 		return ret;
-	return qmp_combo_exit(phy);
+
+	return qmp_combo_com_exit(qphy);
 }
 
 static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode)
@@ -2124,12 +2126,12 @@ static const struct phy_ops qmp_combo_usb_phy_ops = {
 };
 
 static const struct phy_ops qmp_combo_dp_phy_ops = {
-	.init		= qmp_combo_init,
+	.init		= qmp_combo_dp_init,
 	.configure	= qcom_qmp_dp_phy_configure,
 	.power_on	= qmp_combo_power_on,
 	.calibrate	= qcom_qmp_dp_phy_calibrate,
 	.power_off	= qmp_combo_power_off,
-	.exit		= qmp_combo_exit,
+	.exit		= qmp_combo_dp_exit,
 	.owner		= THIS_MODULE,
 };
 
-- 
GitLab


From 186266f65e68c16ba6714c9b7f561ddcd4998cae Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:08 +0100
Subject: [PATCH 2689/4122] phy: qcom-qmp-combo: rename DP PHY ops

Rename the configure and calibrate DP PHY ops using the common prefix
for consistency.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 5d985195df389..7392ae460fd73 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1842,7 +1842,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy)
 	return 0;
 }
 
-static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts *opts)
+static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts)
 {
 	const struct phy_configure_opts_dp *dp_opts = &opts->dp;
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
@@ -1857,7 +1857,7 @@ static int qcom_qmp_dp_phy_configure(struct phy *phy, union phy_configure_opts *
 	return 0;
 }
 
-static int qcom_qmp_dp_phy_calibrate(struct phy *phy)
+static int qmp_combo_dp_calibrate(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
@@ -2127,9 +2127,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = {
 
 static const struct phy_ops qmp_combo_dp_phy_ops = {
 	.init		= qmp_combo_dp_init,
-	.configure	= qcom_qmp_dp_phy_configure,
+	.configure	= qmp_combo_dp_configure,
 	.power_on	= qmp_combo_power_on,
-	.calibrate	= qcom_qmp_dp_phy_calibrate,
+	.calibrate	= qmp_combo_dp_calibrate,
 	.power_off	= qmp_combo_power_off,
 	.exit		= qmp_combo_dp_exit,
 	.owner		= THIS_MODULE,
-- 
GitLab


From 3ade3ede57a0093da3b432ecceda36386d13a5e5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:09 +0100
Subject: [PATCH 2690/4122] phy: qcom-qmp-combo: separate USB and DP power-on
 ops

Separate the USB and DP power-on and power-off operations in two
dedicated implementations.

Note that the pipe clock is only used by the USB part of the PHY and
that no DP configuration has a pcs (or rx) table or has has_pwrdn_delay
set.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 100 +++++++++++++---------
 1 file changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 7392ae460fd73..748fd32a6f728 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1988,7 +1988,39 @@ static int qmp_combo_dp_exit(struct phy *phy)
 	return 0;
 }
 
-static int qmp_combo_power_on(struct phy *phy)
+static int qmp_combo_dp_power_on(struct phy *phy)
+{
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	void __iomem *tx = qphy->tx;
+
+	qmp_combo_serdes_init(qphy);
+
+	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+
+	if (cfg->lanes >= 2)
+		qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+
+	/* Configure special DP tx tunings */
+	cfg->configure_dp_tx(qphy);
+
+	/* Configure link rate, swing, etc. */
+	cfg->configure_dp_phy(qphy);
+
+	return 0;
+}
+
+static int qmp_combo_dp_power_off(struct phy *phy)
+{
+	struct qmp_phy *qphy = phy_get_drvdata(phy);
+
+	/* Assert DP PHY power down */
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+
+	return 0;
+}
+
+static int qmp_combo_usb_power_on(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	struct qcom_qmp *qmp = qphy->qmp;
@@ -2014,39 +2046,30 @@ static int qmp_combo_power_on(struct phy *phy)
 	if (cfg->lanes >= 2)
 		qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
-	/* Configure special DP tx tunings */
-	if (cfg->type == PHY_TYPE_DP)
-		cfg->configure_dp_tx(qphy);
-
 	qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
 		qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
-	/* Configure link rate, swing, etc. */
-	if (cfg->type == PHY_TYPE_DP)
-		cfg->configure_dp_phy(qphy);
-	else
-		qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
+	qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
 	if (cfg->has_pwrdn_delay)
 		usleep_range(10, 20);
 
-	if (cfg->type != PHY_TYPE_DP) {
-		/* Pull PHY out of reset state */
-		qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
-		/* start SerDes and Phy-Coding-Sublayer */
-		qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL],
-				SERDES_START | PCS_START);
+	/* Pull PHY out of reset state */
+	qphy_clrbits(pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
-		status = pcs + cfg->regs[QPHY_PCS_STATUS];
-		ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
-					 PHY_INIT_COMPLETE_TIMEOUT);
-		if (ret) {
-			dev_err(qmp->dev, "phy initialization timed-out\n");
-			goto err_disable_pipe_clk;
-		}
+	/* start SerDes and Phy-Coding-Sublayer */
+	qphy_setbits(pcs, cfg->regs[QPHY_START_CTRL], SERDES_START | PCS_START);
+
+	status = pcs + cfg->regs[QPHY_PCS_STATUS];
+	ret = readl_poll_timeout(status, val, !(val & PHYSTATUS), 200,
+			PHY_INIT_COMPLETE_TIMEOUT);
+	if (ret) {
+		dev_err(qmp->dev, "phy initialization timed-out\n");
+		goto err_disable_pipe_clk;
 	}
+
 	return 0;
 
 err_disable_pipe_clk:
@@ -2055,28 +2078,23 @@ err_disable_pipe_clk:
 	return ret;
 }
 
-static int qmp_combo_power_off(struct phy *phy)
+static int qmp_combo_usb_power_off(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 
 	clk_disable_unprepare(qphy->pipe_clk);
 
-	if (cfg->type == PHY_TYPE_DP) {
-		/* Assert DP PHY power down */
-		writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL);
-	} else {
-		/* PHY reset */
-		qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+	/* PHY reset */
+	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
-		/* stop SerDes and Phy-Coding-Sublayer */
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
-				SERDES_START | PCS_START);
+	/* stop SerDes and Phy-Coding-Sublayer */
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+			SERDES_START | PCS_START);
 
-		/* Put PHY into POWER DOWN state: active low */
-		qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
-				SW_PWRDN);
-	}
+	/* Put PHY into POWER DOWN state: active low */
+	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+			SW_PWRDN);
 
 	return 0;
 }
@@ -2090,7 +2108,7 @@ static int qmp_combo_usb_init(struct phy *phy)
 	if (ret)
 		return ret;
 
-	ret = qmp_combo_power_on(phy);
+	ret = qmp_combo_usb_power_on(phy);
 	if (ret)
 		qmp_combo_com_exit(qphy);
 
@@ -2102,7 +2120,7 @@ static int qmp_combo_usb_exit(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	int ret;
 
-	ret = qmp_combo_power_off(phy);
+	ret = qmp_combo_usb_power_off(phy);
 	if (ret)
 		return ret;
 
@@ -2128,9 +2146,9 @@ static const struct phy_ops qmp_combo_usb_phy_ops = {
 static const struct phy_ops qmp_combo_dp_phy_ops = {
 	.init		= qmp_combo_dp_init,
 	.configure	= qmp_combo_dp_configure,
-	.power_on	= qmp_combo_power_on,
+	.power_on	= qmp_combo_dp_power_on,
 	.calibrate	= qmp_combo_dp_calibrate,
-	.power_off	= qmp_combo_power_off,
+	.power_off	= qmp_combo_dp_power_off,
 	.exit		= qmp_combo_dp_exit,
 	.owner		= THIS_MODULE,
 };
-- 
GitLab


From dae95d7f667d20ab81976f846ed0bf2c71dbfe18 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:10 +0100
Subject: [PATCH 2691/4122] phy: qcom-qmp-combo: clean up serdes initialisation

Clean up serdes initialisation somewhat by making the current helper a
dedicated helper for the DP part of the PHY.

Note that no error is currently returned for non-supported link rates.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 49 +++++++++++------------
 1 file changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 748fd32a6f728..c059e4aeecdbf 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1313,7 +1313,7 @@ static void qmp_combo_configure(void __iomem *base,
 	qmp_combo_configure_lane(base, tbl, num, 0xff);
 }
 
-static int qmp_combo_serdes_init(struct qmp_phy *qphy)
+static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy)
 {
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *serdes = qphy->serdes;
@@ -1323,28 +1323,26 @@ static int qmp_combo_serdes_init(struct qmp_phy *qphy)
 
 	qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num);
 
-	if (cfg->type == PHY_TYPE_DP) {
-		switch (dp_opts->link_rate) {
-		case 1620:
-			qmp_combo_configure(serdes, cfg->serdes_tbl_rbr,
-					       cfg->serdes_tbl_rbr_num);
-			break;
-		case 2700:
-			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr,
-					       cfg->serdes_tbl_hbr_num);
-			break;
-		case 5400:
-			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2,
-					       cfg->serdes_tbl_hbr2_num);
-			break;
-		case 8100:
-			qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3,
-					       cfg->serdes_tbl_hbr3_num);
-			break;
-		default:
-			/* Other link rates aren't supported */
-			return -EINVAL;
-		}
+	switch (dp_opts->link_rate) {
+	case 1620:
+		qmp_combo_configure(serdes, cfg->serdes_tbl_rbr,
+				cfg->serdes_tbl_rbr_num);
+		break;
+	case 2700:
+		qmp_combo_configure(serdes, cfg->serdes_tbl_hbr,
+				cfg->serdes_tbl_hbr_num);
+		break;
+	case 5400:
+		qmp_combo_configure(serdes, cfg->serdes_tbl_hbr2,
+				cfg->serdes_tbl_hbr2_num);
+		break;
+	case 8100:
+		qmp_combo_configure(serdes, cfg->serdes_tbl_hbr3,
+				cfg->serdes_tbl_hbr3_num);
+		break;
+	default:
+		/* Other link rates aren't supported */
+		return -EINVAL;
 	}
 
 	return 0;
@@ -1994,7 +1992,7 @@ static int qmp_combo_dp_power_on(struct phy *phy)
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *tx = qphy->tx;
 
-	qmp_combo_serdes_init(qphy);
+	qmp_combo_dp_serdes_init(qphy);
 
 	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
@@ -2025,6 +2023,7 @@ static int qmp_combo_usb_power_on(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	struct qcom_qmp *qmp = qphy->qmp;
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	void __iomem *serdes = qphy->serdes;
 	void __iomem *tx = qphy->tx;
 	void __iomem *rx = qphy->rx;
 	void __iomem *pcs = qphy->pcs;
@@ -2032,7 +2031,7 @@ static int qmp_combo_usb_power_on(struct phy *phy)
 	unsigned int val;
 	int ret;
 
-	qmp_combo_serdes_init(qphy);
+	qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num);
 
 	ret = clk_prepare_enable(qphy->pipe_clk);
 	if (ret) {
-- 
GitLab


From c7fbe5bd14145425d38a3b1e4d59f1b3acff3eba Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:11 +0100
Subject: [PATCH 2692/4122] phy: qcom-qmp-combo: separate USB and DP devicetree
 parsing

Separate the devicetree parsing of the USB and DP child nodes in two
dedicated helpers in preparation for merging the driver data.

Note that only the USB part of the PHY has a pipe clock and that the DP
implementation only uses the tx/tx2 and pcs register regions.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 ++++++++++++++++-------
 1 file changed, 58 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index c059e4aeecdbf..9c4528dff3165 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2576,13 +2576,12 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
+static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
 	struct qcom_qmp *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
 	struct qmp_phy *qphy;
-	const struct phy_ops *ops;
 	int ret;
 
 	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
@@ -2592,7 +2591,57 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 	qphy->cfg = cfg;
 	qphy->serdes = serdes;
 	/*
-	 * Get memory resources for each PHY:
+	 * Get memory resources from the DP child node:
+	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
+	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4
+	 *
+	 * Note that only tx/tx2 and pcs are used by the DP implementation.
+	 */
+	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qphy->tx))
+		return PTR_ERR(qphy->tx);
+
+	qphy->pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qphy->pcs))
+		return PTR_ERR(qphy->pcs);
+
+	if (cfg->lanes >= 2) {
+		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
+		if (IS_ERR(qphy->tx2))
+			return PTR_ERR(qphy->tx2);
+	}
+
+	generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops);
+	if (IS_ERR(generic_phy)) {
+		ret = PTR_ERR(generic_phy);
+		dev_err(dev, "failed to create DP PHY: %d\n", ret);
+		return ret;
+	}
+
+	qphy->phy = generic_phy;
+	qphy->qmp = qmp;
+	qmp->phys[id] = qphy;
+	phy_set_drvdata(generic_phy, qphy);
+
+	return 0;
+}
+
+static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id,
+			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+{
+	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct phy *generic_phy;
+	struct qmp_phy *qphy;
+	int ret;
+
+	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
+	if (!qphy)
+		return -ENOMEM;
+
+	qphy->cfg = cfg;
+	qphy->serdes = serdes;
+	/*
+	 * Get memory resources from the USB child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
 	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
 	 * For single lane PHYs: pcs_misc (optional) -> 3.
@@ -2631,31 +2680,16 @@ static int qmp_combo_create(struct device *dev, struct device_node *np, int id,
 		qphy->pcs_misc = NULL;
 	}
 
-	/*
-	 * Get PHY's Pipe clock, if any. USB3 and PCIe are PIPE3
-	 * based phys, so they essentially have pipe clock. So,
-	 * we return error in case phy is USB3 or PIPE type.
-	 * Otherwise, we initialize pipe clock to NULL for
-	 * all phys that don't need this.
-	 */
 	qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
 	if (IS_ERR(qphy->pipe_clk)) {
-		if (cfg->type == PHY_TYPE_USB3)
-			return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk),
-					     "failed to get lane%d pipe_clk\n",
-					     id);
-		qphy->pipe_clk = NULL;
+		return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk),
+				     "failed to get lane%d pipe_clk\n", id);
 	}
 
-	if (cfg->type == PHY_TYPE_DP)
-		ops = &qmp_combo_dp_phy_ops;
-	else
-		ops = &qmp_combo_usb_phy_ops;
-
-	generic_phy = devm_phy_create(dev, np, ops);
+	generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create qphy %d\n", ret);
+		dev_err(dev, "failed to create USB PHY: %d\n", ret);
 		return ret;
 	}
 
@@ -2752,7 +2786,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 			serdes = dp_serdes;
 
 			/* Create per-lane phy */
-			ret = qmp_combo_create(dev, child, id, serdes, cfg);
+			ret = qmp_combo_create_dp(dev, child, id, serdes, cfg);
 			if (ret) {
 				dev_err(dev, "failed to create lane%d phy, %d\n",
 					id, ret);
@@ -2770,7 +2804,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 			serdes = usb_serdes;
 
 			/* Create per-lane phy */
-			ret = qmp_combo_create(dev, child, id, serdes, cfg);
+			ret = qmp_combo_create_usb(dev, child, id, serdes, cfg);
 			if (ret) {
 				dev_err(dev, "failed to create lane%d phy, %d\n",
 					id, ret);
-- 
GitLab


From 4197a2a22df7804b40335ab638eae211acd1a81b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:12 +0100
Subject: [PATCH 2693/4122] phy: qcom-qmp-combo: add dedicated DP iomem
 pointers

In preparation for merging the driver data, add separate iomem pointers
for the DP part of the PHY.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 246 +++++++++++-----------
 1 file changed, 126 insertions(+), 120 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 9c4528dff3165..43193bfe6e11d 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -895,6 +895,12 @@ struct qmp_phy {
 	void __iomem *rx2;
 	void __iomem *pcs_misc;
 	void __iomem *pcs_usb;
+
+	void __iomem *dp_serdes;
+	void __iomem *dp_tx;
+	void __iomem *dp_tx2;
+	void __iomem *dp_pcs;
+
 	struct clk *pipe_clk;
 	struct qcom_qmp *qmp;
 	enum phy_mode mode;
@@ -1316,7 +1322,7 @@ static void qmp_combo_configure(void __iomem *base,
 static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy)
 {
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *serdes = qphy->serdes;
+	void __iomem *serdes = qphy->dp_serdes;
 	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
 	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
 	int serdes_tbl_num = cfg->serdes_tbl_num;
@@ -1352,43 +1358,43 @@ static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
 	writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX |
 	       QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL,
-	       qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
+	       qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
 
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_LANE_0_1_PWRDN |
 	       DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN |
 	       DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	writel(QSERDES_V3_COM_BIAS_EN |
 	       QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN |
 	       QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL |
 	       QSERDES_V3_COM_CLKBUF_RX_DRIVE_L,
-	       qphy->serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
-
-	writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0x24, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xbb, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9);
+	       qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
+
+	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
 	qphy->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qphy->pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
+	       qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
 static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy,
@@ -1421,10 +1427,10 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy,
 	voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN;
 	pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN;
 
-	writel(voltage_swing_cfg, qphy->tx + drv_lvl_reg);
-	writel(pre_emphasis_cfg, qphy->tx + emp_post_reg);
-	writel(voltage_swing_cfg, qphy->tx2 + drv_lvl_reg);
-	writel(pre_emphasis_cfg, qphy->tx2 + emp_post_reg);
+	writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg);
+	writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg);
+	writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg);
+	writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg);
 
 	return 0;
 }
@@ -1446,10 +1452,10 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy)
 		drvr_en = 0x10;
 	}
 
-	writel(drvr_en, qphy->tx + QSERDES_V3_TX_HIGHZ_DRVR_EN);
-	writel(bias_en, qphy->tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr_en, qphy->tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN);
-	writel(bias_en, qphy->tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN);
+	writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN);
+	writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
 }
 
 static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy)
@@ -1472,12 +1478,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy)
 	 * if (lane_cnt == 4 || orientation == ORIENTATION_CC1)
 	 *	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
 	 * if (orientation == ORIENTATION_CC2)
-	 *	writel(0x4c, qphy->pcs + QSERDES_V3_DP_PHY_MODE);
+	 *	writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE);
 	 */
 	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
-	writel(val, qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
-	writel(0x5c, qphy->pcs + QSERDES_DP_PHY_MODE);
+	writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE);
 
 	return reverse;
 }
@@ -1491,8 +1497,8 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy)
 
 	qmp_combo_configure_dp_mode(qphy);
 
-	writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qphy->pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1515,40 +1521,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qphy->pcs + QSERDES_V3_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV);
 
 	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x04, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	writel(0x20, qphy->serdes + QSERDES_V3_COM_RESETSM_CNTRL);
+	writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL);
 
-	if (readl_poll_timeout(qphy->serdes + QSERDES_V3_COM_C_READY_STATUS,
+	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS,
+	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	return readl_poll_timeout(qphy->pcs + QSERDES_V3_DP_PHY_STATUS,
+	return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1568,7 +1574,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy)
 	qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
 	val = cfg1_settings[qphy->dp_aux_cfg];
 
-	writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
@@ -1577,37 +1583,37 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
-	writel(0x17, qphy->serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
-
-	writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qphy->pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qphy->pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qphy->pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xb7, qphy->pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qphy->pcs + QSERDES_DP_PHY_AUX_CFG9);
+	writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
+
+	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
 	qphy->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qphy->pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
+	       qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
 static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy)
 {
 	/* Program default values before writing proper values */
-	writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL);
-	writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
 
 	qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL,
 			QSERDES_V4_TX_TX_EMP_POST1_LVL);
@@ -1620,15 +1626,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy)
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
 
-	writel(0x0f, qphy->pcs + QSERDES_V4_DP_PHY_CFG_1);
+	writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1);
 
 	qmp_combo_configure_dp_mode(qphy);
 
-	writel(0x13, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qphy->pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
 
-	writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qphy->pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1651,49 +1657,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qphy->pcs + QSERDES_V4_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV);
 
 	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qphy->pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	writel(0x20, qphy->serdes + QSERDES_V4_COM_RESETSM_CNTRL);
+	writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL);
 
-	if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_C_READY_STATUS,
+	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS,
+	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->serdes + QSERDES_V4_COM_CMN_STATUS,
+	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1737,30 +1743,30 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy)
 		drvr1_en = 0x10;
 	}
 
-	writel(drvr0_en, qphy->tx + QSERDES_V4_TX_HIGHZ_DRVR_EN);
-	writel(bias0_en, qphy->tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr1_en, qphy->tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN);
-	writel(bias1_en, qphy->tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN);
+	writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN);
+	writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x0a, qphy->tx + QSERDES_V4_TX_TX_POL_INV);
-	writel(0x0a, qphy->tx2 + QSERDES_V4_TX_TX_POL_INV);
+	writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV);
+	writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV);
 
-	writel(0x27, qphy->tx + QSERDES_V4_TX_TX_DRV_LVL);
-	writel(0x27, qphy->tx2 + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
 
 	return 0;
 }
@@ -1794,30 +1800,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy)
 		drvr1_en = 0x10;
 	}
 
-	writel(drvr0_en, qphy->tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
-	writel(bias0_en, qphy->tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr1_en, qphy->tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
-	writel(bias1_en, qphy->tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
+	writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
+	writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x0a, qphy->tx + QSERDES_V5_5NM_TX_TX_POL_INV);
-	writel(0x0a, qphy->tx2 + QSERDES_V5_5NM_TX_TX_POL_INV);
+	writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV);
+	writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV);
 
-	writel(0x27, qphy->tx + QSERDES_V5_5NM_TX_TX_DRV_LVL);
-	writel(0x27, qphy->tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL);
+	writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
 
 	return 0;
 }
@@ -1835,7 +1841,7 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy)
 	qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
 	val = cfg1_settings[qphy->dp_aux_cfg];
 
-	writel(val, qphy->pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
@@ -1990,14 +1996,14 @@ static int qmp_combo_dp_power_on(struct phy *phy)
 {
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->tx;
+	void __iomem *tx = qphy->dp_tx;
 
 	qmp_combo_dp_serdes_init(qphy);
 
 	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+		qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
 	/* Configure special DP tx tunings */
 	cfg->configure_dp_tx(qphy);
@@ -2013,7 +2019,7 @@ static int qmp_combo_dp_power_off(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 
 	/* Assert DP PHY power down */
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	return 0;
 }
@@ -2589,7 +2595,7 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 		return -ENOMEM;
 
 	qphy->cfg = cfg;
-	qphy->serdes = serdes;
+	qphy->dp_serdes = serdes;
 	/*
 	 * Get memory resources from the DP child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
@@ -2597,18 +2603,18 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 	 *
 	 * Note that only tx/tx2 and pcs are used by the DP implementation.
 	 */
-	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qphy->dp_tx))
+		return PTR_ERR(qphy->dp_tx);
 
-	qphy->pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qphy->dp_pcs))
+		return PTR_ERR(qphy->dp_pcs);
 
 	if (cfg->lanes >= 2) {
-		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->tx2))
-			return PTR_ERR(qphy->tx2);
+		qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
+		if (IS_ERR(qphy->dp_tx2))
+			return PTR_ERR(qphy->dp_tx2);
 	}
 
 	generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops);
-- 
GitLab


From bc8615888f3e4dc8f3448b6b4f8dec04b8b5bce2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:13 +0100
Subject: [PATCH 2694/4122] phy: qcom-qmp-combo: clean up DP configurations

In preparation for merging the USB and DP configurations, align the
initialisations of the DP function pointers.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-15-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 40 +++++++++++------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 43193bfe6e11d..50c011d23a9e1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1049,10 +1049,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init = qcom_qmp_v3_phy_dp_aux_init,
-	.configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx,
-	.configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy,
-	.calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate,
+	.dp_aux_init		= qcom_qmp_v3_phy_dp_aux_init,
+	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
+	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
+	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
 };
 
 static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = {
@@ -1106,10 +1106,10 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init = qcom_qmp_v3_phy_dp_aux_init,
-	.configure_dp_tx = qcom_qmp_v3_phy_configure_dp_tx,
-	.configure_dp_phy = qcom_qmp_v3_phy_configure_dp_phy,
-	.calibrate_dp_phy = qcom_qmp_v3_dp_phy_calibrate,
+	.dp_aux_init		= qcom_qmp_v3_phy_dp_aux_init,
+	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
+	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
+	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
 };
 
 static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = {
@@ -1166,10 +1166,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy,
-	.calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
+	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
+	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
+	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
 };
 
 static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = {
@@ -1222,10 +1222,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v5_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy = qcom_qmp_v5_phy_configure_dp_phy,
-	.calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
+	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
+	.configure_dp_phy	= qcom_qmp_v5_phy_configure_dp_phy,
+	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
 };
 
 static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = {
@@ -1282,10 +1282,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init = qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx = qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy = qcom_qmp_v4_phy_configure_dp_phy,
-	.calibrate_dp_phy = qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
+	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
+	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
+	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
 };
 
 static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = {
-- 
GitLab


From ad4db91d60636c2c28487c3f518eab5952511923 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:14 +0100
Subject: [PATCH 2695/4122] phy: qcom-qmp-combo: rename sc8280xp config

In preparation for merging the USB and DP configurations, drop the
"combo" infix from the SC8280XP combined configuration for consistency
with the other platforms.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-16-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 50c011d23a9e1..d3fd6bde4af58 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -1228,7 +1228,7 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
 };
 
-static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_combo_cfg = {
+static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = {
 	.usb_cfg		= &sc8280xp_usb43dp_usb_cfg,
 	.dp_cfg			= &sc8280xp_usb43dp_dp_cfg,
 };
@@ -2857,7 +2857,7 @@ static const struct of_device_id qmp_combo_of_match_table[] = {
 	},
 	{
 		.compatible = "qcom,sc8280xp-qmp-usb43dp-phy",
-		.data = &sc8280xp_usb43dpphy_combo_cfg,
+		.data = &sc8280xp_usb43dpphy_cfg,
 	},
 	{
 		.compatible = "qcom,sdm845-qmp-usb3-dp-phy",
-- 
GitLab


From 488f116de075f2fd0cb90205a76e2ca0756efaff Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:15 +0100
Subject: [PATCH 2696/4122] phy: qcom-qmp-combo: add DP configuration tables

In preparation for merging the USB and DP configurations, add dedicated
pointers for the DP serdes and tx tables to the configurations.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-17-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 45 ++++++++++++-----------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index d3fd6bde4af58..2588cfa5e81e1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -816,6 +816,11 @@ struct qmp_phy_cfg {
 	const struct qmp_phy_init_tbl *pcs_usb_tbl;
 	int pcs_usb_tbl_num;
 
+	const struct qmp_phy_init_tbl *dp_serdes_tbl;
+	int dp_serdes_tbl_num;
+	const struct qmp_phy_init_tbl *dp_tx_tbl;
+	int dp_tx_tbl_num;
+
 	/* Init sequence for DP PHY block link rates */
 	const struct qmp_phy_init_tbl *serdes_tbl_rbr;
 	int serdes_tbl_rbr_num;
@@ -1030,10 +1035,10 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
 	.lanes			= 2,
 
-	.serdes_tbl		= qmp_v3_dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
-	.tx_tbl			= qmp_v3_dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(qmp_v3_dp_tx_tbl),
+	.dp_serdes_tbl		= qmp_v3_dp_serdes_tbl,
+	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
+	.dp_tx_tbl		= qmp_v3_dp_tx_tbl,
+	.dp_tx_tbl_num		= ARRAY_SIZE(qmp_v3_dp_tx_tbl),
 
 	.serdes_tbl_rbr		= qmp_v3_dp_serdes_tbl_rbr,
 	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr),
@@ -1147,10 +1152,10 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
 	.lanes			= 2,
 
-	.serdes_tbl		= qmp_v4_dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
-	.tx_tbl			= qmp_v4_dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(qmp_v4_dp_tx_tbl),
+	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
+	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
+	.dp_tx_tbl		= qmp_v4_dp_tx_tbl,
+	.dp_tx_tbl_num		= ARRAY_SIZE(qmp_v4_dp_tx_tbl),
 
 	.serdes_tbl_rbr		= qmp_v4_dp_serdes_tbl_rbr,
 	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr),
@@ -1203,10 +1208,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.type			= PHY_TYPE_DP,
 	.lanes			= 2,
 
-	.serdes_tbl		= qmp_v5_dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(qmp_v5_dp_serdes_tbl),
-	.tx_tbl			= qmp_v5_5nm_dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl),
+	.dp_serdes_tbl		= qmp_v5_dp_serdes_tbl,
+	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v5_dp_serdes_tbl),
+	.dp_tx_tbl		= qmp_v5_5nm_dp_tx_tbl,
+	.dp_tx_tbl_num		= ARRAY_SIZE(qmp_v5_5nm_dp_tx_tbl),
 
 	.serdes_tbl_rbr		= qmp_v4_dp_serdes_tbl_rbr,
 	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr),
@@ -1263,10 +1268,10 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
 	.lanes			= 2,
 
-	.serdes_tbl		= qmp_v4_dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
-	.tx_tbl			= qmp_v4_dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(qmp_v4_dp_tx_tbl),
+	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
+	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
+	.dp_tx_tbl		= qmp_v4_dp_tx_tbl,
+	.dp_tx_tbl_num		= ARRAY_SIZE(qmp_v4_dp_tx_tbl),
 
 	.serdes_tbl_rbr		= qmp_v4_dp_serdes_tbl_rbr,
 	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl_rbr),
@@ -1324,10 +1329,8 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy)
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *serdes = qphy->dp_serdes;
 	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
-	const struct qmp_phy_init_tbl *serdes_tbl = cfg->serdes_tbl;
-	int serdes_tbl_num = cfg->serdes_tbl_num;
 
-	qmp_combo_configure(serdes, serdes_tbl, serdes_tbl_num);
+	qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -2000,10 +2003,10 @@ static int qmp_combo_dp_power_on(struct phy *phy)
 
 	qmp_combo_dp_serdes_init(qphy);
 
-	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
+	qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1);
 
 	if (cfg->lanes >= 2)
-		qmp_combo_configure_lane(qphy->dp_tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+		qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
 
 	/* Configure special DP tx tunings */
 	cfg->configure_dp_tx(qphy);
-- 
GitLab


From ba0af7b346db8149e33a2f6e1a7b8265cabbfacb Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:16 +0100
Subject: [PATCH 2697/4122] phy: qcom-qmp-combo: drop lanes config parameter

Since the QMP driver split there is really no need for the 'lanes'
configuration parameter as all of these USB-C PHYs support dual-lane
SuperSpeed USB and quad-lane (uni-directional) DP (even if the driver
still only supports CC1 orientation using lanes 2 and 3).

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221114110621.4639-18-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 62 ++++++++---------------
 1 file changed, 20 insertions(+), 42 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 2588cfa5e81e1..a0abeb7c3bcad 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -802,7 +802,6 @@ struct qmp_phy;
 struct qmp_phy_cfg {
 	/* phy-type - PCIE/UFS/USB */
 	unsigned int type;
-	int lanes;
 
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_init_tbl *serdes_tbl;
@@ -1010,7 +1009,6 @@ static const char * const sc7180_usb3phy_reset_l[] = {
 
 static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.type			= PHY_TYPE_USB3,
-	.lanes			= 2,
 
 	.serdes_tbl		= qmp_v3_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
@@ -1033,7 +1031,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 
 static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
-	.lanes			= 2,
 
 	.dp_serdes_tbl		= qmp_v3_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
@@ -1067,7 +1064,6 @@ static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = {
 
 static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 	.type			= PHY_TYPE_USB3,
-	.lanes			= 2,
 
 	.serdes_tbl		= qmp_v3_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
@@ -1090,7 +1086,6 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 
 static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
-	.lanes			= 2,
 
 	.serdes_tbl		= qmp_v3_dp_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
@@ -1124,7 +1119,6 @@ static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = {
 
 static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.type			= PHY_TYPE_USB3,
-	.lanes			= 2,
 
 	.serdes_tbl		= sm8150_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_serdes_tbl),
@@ -1150,7 +1144,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 
 static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
-	.lanes			= 2,
 
 	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
@@ -1184,7 +1177,6 @@ static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = {
 
 static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 	.type			= PHY_TYPE_USB3,
-	.lanes			= 2,
 
 	.serdes_tbl		= sc8280xp_usb43dp_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl),
@@ -1206,7 +1198,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 
 static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.type			= PHY_TYPE_DP,
-	.lanes			= 2,
 
 	.dp_serdes_tbl		= qmp_v5_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v5_dp_serdes_tbl),
@@ -1240,7 +1231,6 @@ static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = {
 
 static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.type			= PHY_TYPE_USB3,
-	.lanes			= 2,
 
 	.serdes_tbl		= sm8150_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_serdes_tbl),
@@ -1266,7 +1256,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 
 static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.type			= PHY_TYPE_DP,
-	.lanes			= 2,
 
 	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
@@ -2000,13 +1989,12 @@ static int qmp_combo_dp_power_on(struct phy *phy)
 	struct qmp_phy *qphy = phy_get_drvdata(phy);
 	const struct qmp_phy_cfg *cfg = qphy->cfg;
 	void __iomem *tx = qphy->dp_tx;
+	void __iomem *tx2 = qphy->dp_tx2;
 
 	qmp_combo_dp_serdes_init(qphy);
 
 	qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_combo_configure_lane(qphy->dp_tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
+	qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
 
 	/* Configure special DP tx tunings */
 	cfg->configure_dp_tx(qphy);
@@ -2035,6 +2023,8 @@ static int qmp_combo_usb_power_on(struct phy *phy)
 	void __iomem *serdes = qphy->serdes;
 	void __iomem *tx = qphy->tx;
 	void __iomem *rx = qphy->rx;
+	void __iomem *tx2 = qphy->tx2;
+	void __iomem *rx2 = qphy->rx2;
 	void __iomem *pcs = qphy->pcs;
 	void __iomem *status;
 	unsigned int val;
@@ -2050,14 +2040,10 @@ static int qmp_combo_usb_power_on(struct phy *phy)
 
 	/* Tx, Rx, and PCS configurations */
 	qmp_combo_configure_lane(tx, cfg->tx_tbl, cfg->tx_tbl_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_combo_configure_lane(qphy->tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
+	qmp_combo_configure_lane(tx2, cfg->tx_tbl, cfg->tx_tbl_num, 2);
 
 	qmp_combo_configure_lane(rx, cfg->rx_tbl, cfg->rx_tbl_num, 1);
-
-	if (cfg->lanes >= 2)
-		qmp_combo_configure_lane(qphy->rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
+	qmp_combo_configure_lane(rx2, cfg->rx_tbl, cfg->rx_tbl_num, 2);
 
 	qmp_combo_configure(pcs, cfg->pcs_tbl, cfg->pcs_tbl_num);
 
@@ -2601,8 +2587,8 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 	qphy->dp_serdes = serdes;
 	/*
 	 * Get memory resources from the DP child node:
-	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
-	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4
+	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
+	 * tx2 -> 3; rx2 -> 4
 	 *
 	 * Note that only tx/tx2 and pcs are used by the DP implementation.
 	 */
@@ -2614,11 +2600,9 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 	if (IS_ERR(qphy->dp_pcs))
 		return PTR_ERR(qphy->dp_pcs);
 
-	if (cfg->lanes >= 2) {
-		qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->dp_tx2))
-			return PTR_ERR(qphy->dp_tx2);
-	}
+	qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
+	if (IS_ERR(qphy->dp_tx2))
+		return PTR_ERR(qphy->dp_tx2);
 
 	generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops);
 	if (IS_ERR(generic_phy)) {
@@ -2651,9 +2635,8 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int
 	qphy->serdes = serdes;
 	/*
 	 * Get memory resources from the USB child node:
-	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2.
-	 * For dual lane PHYs: tx2 -> 3, rx2 -> 4, pcs_misc (optional) -> 5
-	 * For single lane PHYs: pcs_misc (optional) -> 3.
+	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
+	 * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5
 	 */
 	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
 	if (IS_ERR(qphy->tx))
@@ -2670,20 +2653,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int
 	if (cfg->pcs_usb_offset)
 		qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset;
 
-	if (cfg->lanes >= 2) {
-		qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-		if (IS_ERR(qphy->tx2))
-			return PTR_ERR(qphy->tx2);
+	qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
+	if (IS_ERR(qphy->tx2))
+		return PTR_ERR(qphy->tx2);
 
-		qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
-		if (IS_ERR(qphy->rx2))
-			return PTR_ERR(qphy->rx2);
-
-		qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
-	} else {
-		qphy->pcs_misc = devm_of_iomap(dev, np, 3, NULL);
-	}
+	qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
+	if (IS_ERR(qphy->rx2))
+		return PTR_ERR(qphy->rx2);
 
+	qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
 	if (IS_ERR(qphy->pcs_misc)) {
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
 		qphy->pcs_misc = NULL;
-- 
GitLab


From 9e62877eefacecdcd0467cfeb6bcd20786465f9b Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:17 +0100
Subject: [PATCH 2698/4122] phy: qcom-qmp-combo: merge USB and DP
 configurations

It does not really make any sense to keep separate configuration
structures for the USB and DP parts of the same PHY so merge them.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-19-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 182 +++++++---------------
 1 file changed, 57 insertions(+), 125 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index a0abeb7c3bcad..298477259ee62 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -798,11 +798,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = {
 
 struct qmp_phy;
 
-/* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
-	/* phy-type - PCIE/UFS/USB */
-	unsigned int type;
-
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_init_tbl *serdes_tbl;
 	int serdes_tbl_num;
@@ -863,11 +859,6 @@ struct qmp_phy_cfg {
 
 };
 
-struct qmp_phy_combo_cfg {
-	const struct qmp_phy_cfg *usb_cfg;
-	const struct qmp_phy_cfg *dp_cfg;
-};
-
 /**
  * struct qmp_phy - per-lane phy descriptor
  *
@@ -1007,9 +998,7 @@ static const char * const sc7180_usb3phy_reset_l[] = {
 	"phy",
 };
 
-static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
-	.type			= PHY_TYPE_USB3,
-
+static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = {
 	.serdes_tbl		= qmp_v3_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
 	.tx_tbl			= qmp_v3_usb3_tx_tbl,
@@ -1018,19 +1007,6 @@ static const struct qmp_phy_cfg sc7180_usb3phy_cfg = {
 	.rx_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_rx_tbl),
 	.pcs_tbl		= qmp_v3_usb3_pcs_tbl,
 	.pcs_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_pcs_tbl),
-	.clk_list		= qmp_v3_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= sc7180_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(sc7180_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v3_usb3phy_regs_layout,
-
-	.has_pwrdn_delay	= true,
-};
-
-static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
-	.type			= PHY_TYPE_DP,
 
 	.dp_serdes_tbl		= qmp_v3_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
@@ -1055,16 +1031,19 @@ static const struct qmp_phy_cfg sc7180_dpphy_cfg = {
 	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
 	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
 	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
-};
 
-static const struct qmp_phy_combo_cfg sc7180_usb3dpphy_cfg = {
-	.usb_cfg		= &sc7180_usb3phy_cfg,
-	.dp_cfg			= &sc7180_dpphy_cfg,
-};
+	.clk_list		= qmp_v3_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
+	.reset_list		= sc7180_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(sc7180_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v3_usb3phy_regs_layout,
 
-static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
-	.type			= PHY_TYPE_USB3,
+	.has_pwrdn_delay	= true,
+};
 
+static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = {
 	.serdes_tbl		= qmp_v3_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
 	.tx_tbl			= qmp_v3_usb3_tx_tbl,
@@ -1073,24 +1052,11 @@ static const struct qmp_phy_cfg sdm845_usb3phy_cfg = {
 	.rx_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_rx_tbl),
 	.pcs_tbl		= qmp_v3_usb3_pcs_tbl,
 	.pcs_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_pcs_tbl),
-	.clk_list		= qmp_v3_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v3_usb3phy_regs_layout,
-
-	.has_pwrdn_delay	= true,
-};
 
-static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
-	.type			= PHY_TYPE_DP,
-
-	.serdes_tbl		= qmp_v3_dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
-	.tx_tbl			= qmp_v3_dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(qmp_v3_dp_tx_tbl),
+	.dp_serdes_tbl		= qmp_v3_dp_serdes_tbl,
+	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl),
+	.dp_tx_tbl		= qmp_v3_dp_tx_tbl,
+	.dp_tx_tbl_num		= ARRAY_SIZE(qmp_v3_dp_tx_tbl),
 
 	.serdes_tbl_rbr		= qmp_v3_dp_serdes_tbl_rbr,
 	.serdes_tbl_rbr_num	= ARRAY_SIZE(qmp_v3_dp_serdes_tbl_rbr),
@@ -1110,16 +1076,19 @@ static const struct qmp_phy_cfg sdm845_dpphy_cfg = {
 	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
 	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
 	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
-};
 
-static const struct qmp_phy_combo_cfg sdm845_usb3dpphy_cfg = {
-	.usb_cfg                = &sdm845_usb3phy_cfg,
-	.dp_cfg                 = &sdm845_dpphy_cfg,
-};
+	.clk_list		= qmp_v3_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v3_usb3phy_regs_layout,
 
-static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
-	.type			= PHY_TYPE_USB3,
+	.has_pwrdn_delay	= true,
+};
 
+static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = {
 	.serdes_tbl		= sm8150_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_serdes_tbl),
 	.tx_tbl			= sm8150_usb3_tx_tbl,
@@ -1130,20 +1099,6 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8150_usb3_pcs_tbl),
 	.pcs_usb_tbl		= sm8150_usb3_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl),
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v4_usb3phy_regs_layout,
-	.pcs_usb_offset		= 0x300,
-
-	.has_pwrdn_delay	= true,
-};
-
-static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
-	.type			= PHY_TYPE_DP,
 
 	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
@@ -1168,24 +1123,7 @@ static const struct qmp_phy_cfg sc8180x_dpphy_cfg = {
 	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
 	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
-};
-
-static const struct qmp_phy_combo_cfg sc8180x_usb3dpphy_cfg = {
-	.usb_cfg		= &sm8150_usb3phy_cfg,
-	.dp_cfg			= &sc8180x_dpphy_cfg,
-};
-
-static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
-	.type			= PHY_TYPE_USB3,
 
-	.serdes_tbl		= sc8280xp_usb43dp_serdes_tbl,
-	.serdes_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl),
-	.tx_tbl			= sc8280xp_usb43dp_tx_tbl,
-	.tx_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl),
-	.rx_tbl			= sc8280xp_usb43dp_rx_tbl,
-	.rx_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl),
-	.pcs_tbl		= sc8280xp_usb43dp_pcs_tbl,
-	.pcs_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl),
 	.clk_list		= qmp_v4_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
@@ -1194,10 +1132,19 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_usb_cfg = {
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
 	.pcs_usb_offset		= 0x300,
+
+	.has_pwrdn_delay	= true,
 };
 
-static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
-	.type			= PHY_TYPE_DP,
+static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = {
+	.serdes_tbl		= sc8280xp_usb43dp_serdes_tbl,
+	.serdes_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl),
+	.tx_tbl			= sc8280xp_usb43dp_tx_tbl,
+	.tx_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_tx_tbl),
+	.rx_tbl			= sc8280xp_usb43dp_rx_tbl,
+	.rx_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_rx_tbl),
+	.pcs_tbl		= sc8280xp_usb43dp_pcs_tbl,
+	.pcs_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_pcs_tbl),
 
 	.dp_serdes_tbl		= qmp_v5_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v5_dp_serdes_tbl),
@@ -1222,16 +1169,18 @@ static const struct qmp_phy_cfg sc8280xp_usb43dp_dp_cfg = {
 	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy	= qcom_qmp_v5_phy_configure_dp_phy,
 	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
-};
 
-static const struct qmp_phy_combo_cfg sc8280xp_usb43dpphy_cfg = {
-	.usb_cfg		= &sc8280xp_usb43dp_usb_cfg,
-	.dp_cfg			= &sc8280xp_usb43dp_dp_cfg,
+	.clk_list		= qmp_v4_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v4_usb3phy_regs_layout,
+	.pcs_usb_offset		= 0x300,
 };
 
-static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
-	.type			= PHY_TYPE_USB3,
-
+static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = {
 	.serdes_tbl		= sm8150_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sm8150_usb3_serdes_tbl),
 	.tx_tbl			= sm8250_usb3_tx_tbl,
@@ -1242,20 +1191,6 @@ static const struct qmp_phy_cfg sm8250_usb3phy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8250_usb3_pcs_tbl),
 	.pcs_usb_tbl		= sm8250_usb3_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8250_usb3_pcs_usb_tbl),
-	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
-	.reset_list		= msm8996_usb3phy_reset_l,
-	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
-	.vreg_list		= qmp_phy_vreg_l,
-	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
-	.regs			= qmp_v4_usb3phy_regs_layout,
-	.pcs_usb_offset		= 0x300,
-
-	.has_pwrdn_delay	= true,
-};
-
-static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
-	.type			= PHY_TYPE_DP,
 
 	.dp_serdes_tbl		= qmp_v4_dp_serdes_tbl,
 	.dp_serdes_tbl_num	= ARRAY_SIZE(qmp_v4_dp_serdes_tbl),
@@ -1280,11 +1215,17 @@ static const struct qmp_phy_cfg sm8250_dpphy_cfg = {
 	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
 	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
 	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
-};
 
-static const struct qmp_phy_combo_cfg sm8250_usb3dpphy_cfg = {
-	.usb_cfg		= &sm8250_usb3phy_cfg,
-	.dp_cfg			= &sm8250_dpphy_cfg,
+	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
+	.reset_list		= msm8996_usb3phy_reset_l,
+	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
+	.vreg_list		= qmp_phy_vreg_l,
+	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
+	.regs			= qmp_v4_usb3phy_regs_layout,
+	.pcs_usb_offset		= 0x300,
+
+	.has_pwrdn_delay	= true,
 };
 
 static void qmp_combo_configure_lane(void __iomem *base,
@@ -2697,10 +2638,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	void __iomem *serdes;
 	void __iomem *usb_serdes;
 	void __iomem *dp_serdes = NULL;
-	const struct qmp_phy_combo_cfg *combo_cfg = NULL;
 	const struct qmp_phy_cfg *cfg = NULL;
-	const struct qmp_phy_cfg *usb_cfg = NULL;
-	const struct qmp_phy_cfg *dp_cfg = NULL;
 	int num, id, expected_phys;
 	int ret;
 
@@ -2711,13 +2649,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	qmp->dev = dev;
 	dev_set_drvdata(dev, qmp);
 
-	combo_cfg = of_device_get_match_data(dev);
-	if (!combo_cfg)
+	cfg = of_device_get_match_data(dev);
+	if (!cfg)
 		return -EINVAL;
 
-	usb_cfg = combo_cfg->usb_cfg;
-	cfg = usb_cfg; /* Setup clks and regulators */
-
 	usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(serdes))
 		return PTR_ERR(serdes);
@@ -2730,7 +2665,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (IS_ERR(dp_serdes))
 		return PTR_ERR(dp_serdes);
 
-	dp_cfg = combo_cfg->dp_cfg;
 	expected_phys = 2;
 
 	mutex_init(&qmp->phy_mutex);
@@ -2769,7 +2703,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	id = 0;
 	for_each_available_child_of_node(dev->of_node, child) {
 		if (of_node_name_eq(child, "dp-phy")) {
-			cfg = dp_cfg;
 			serdes = dp_serdes;
 
 			/* Create per-lane phy */
@@ -2787,7 +2720,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 				goto err_node_put;
 			}
 		} else if (of_node_name_eq(child, "usb3-phy")) {
-			cfg = usb_cfg;
 			serdes = usb_serdes;
 
 			/* Create per-lane phy */
-- 
GitLab


From dd1153651b0383ee9597609bc449d1751eefdcae Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:18 +0100
Subject: [PATCH 2699/4122] phy: qcom-qmp-combo: merge driver data

The QMP combo driver manages a single PHY (even if it provides two
interfaces for USB and DP, respectively) so merge the old qcom_qmp and
qmp_phy structures and drop the PHY array.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-20-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 688 ++++++++++------------
 1 file changed, 312 insertions(+), 376 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 298477259ee62..707dd68ba9935 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -796,7 +796,7 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = {
 	{ 0x3f, 0xff, 0xff, 0xff }
 };
 
-struct qmp_phy;
+struct qmp_combo;
 
 struct qmp_phy_cfg {
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
@@ -833,10 +833,10 @@ struct qmp_phy_cfg {
 	const u8 (*pre_emphasis_hbr3_hbr2)[4][4];
 
 	/* DP PHY callbacks */
-	int (*configure_dp_phy)(struct qmp_phy *qphy);
-	void (*configure_dp_tx)(struct qmp_phy *qphy);
-	int (*calibrate_dp_phy)(struct qmp_phy *qphy);
-	void (*dp_aux_init)(struct qmp_phy *qphy);
+	int (*configure_dp_phy)(struct qmp_combo *qmp);
+	void (*configure_dp_tx)(struct qmp_combo *qmp);
+	int (*calibrate_dp_phy)(struct qmp_combo *qmp);
+	void (*dp_aux_init)(struct qmp_combo *qmp);
 
 	/* clock ids to be requested */
 	const char * const *clk_list;
@@ -859,29 +859,19 @@ struct qmp_phy_cfg {
 
 };
 
-/**
- * struct qmp_phy - per-lane phy descriptor
- *
- * @phy: generic phy
- * @cfg: phy specific configuration
- * @serdes: iomapped memory space for phy's serdes (i.e. PLL)
- * @tx: iomapped memory space for lane's tx
- * @rx: iomapped memory space for lane's rx
- * @pcs: iomapped memory space for lane's pcs
- * @tx2: iomapped memory space for second lane's tx (in dual lane PHYs)
- * @rx2: iomapped memory space for second lane's rx (in dual lane PHYs)
- * @pcs_misc: iomapped memory space for lane's pcs_misc
- * @pcs_usb: iomapped memory space for lane's pcs_usb
- * @pipe_clk: pipe clock
- * @qmp: QMP phy to which this lane belongs
- * @mode: current PHY mode
- * @dp_aux_cfg: Display port aux config
- * @dp_opts: Display port optional config
- * @dp_clks: Display port clocks
- */
-struct qmp_phy {
-	struct phy *phy;
+struct qmp_phy_dp_clks {
+	struct qmp_combo *qmp;
+	struct clk_hw dp_link_hw;
+	struct clk_hw dp_pixel_hw;
+};
+
+struct qmp_combo {
+	struct device *dev;
+
 	const struct qmp_phy_cfg *cfg;
+
+	void __iomem *dp_com;
+
 	void __iomem *serdes;
 	void __iomem *tx;
 	void __iomem *rx;
@@ -897,59 +887,33 @@ struct qmp_phy {
 	void __iomem *dp_pcs;
 
 	struct clk *pipe_clk;
-	struct qcom_qmp *qmp;
-	enum phy_mode mode;
-	unsigned int dp_aux_cfg;
-	struct phy_configure_opts_dp dp_opts;
-	struct qmp_phy_dp_clks *dp_clks;
-};
-
-struct qmp_phy_dp_clks {
-	struct qmp_phy *qphy;
-	struct clk_hw dp_link_hw;
-	struct clk_hw dp_pixel_hw;
-};
-
-/**
- * struct qcom_qmp - structure holding QMP phy block attributes
- *
- * @dev: device
- * @dp_com: iomapped memory space for phy's dp_com control block
- *
- * @clks: array of clocks required by phy
- * @resets: array of resets required by phy
- * @vregs: regulator supplies bulk data
- *
- * @phys: array of per-lane phy descriptors
- * @phy_mutex: mutex lock for PHY common block initialization
- * @init_count: phy common block initialization count
- */
-struct qcom_qmp {
-	struct device *dev;
-	void __iomem *dp_com;
-
 	struct clk_bulk_data *clks;
 	struct reset_control_bulk_data *resets;
 	struct regulator_bulk_data *vregs;
 
-	struct qmp_phy **phys;
-	struct qmp_phy *usb_phy;
-
 	struct mutex phy_mutex;
 	int init_count;
+
+	struct phy *usb_phy;
+	enum phy_mode mode;
+
+	struct phy *dp_phy;
+	unsigned int dp_aux_cfg;
+	struct phy_configure_opts_dp dp_opts;
+	struct qmp_phy_dp_clks *dp_clks;
 };
 
-static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy);
-static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy);
-static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy);
-static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy);
+static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp);
+static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp);
+static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp);
+static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp);
 
-static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy);
-static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy);
-static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy);
-static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy);
+static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp);
+static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp);
+static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp);
+static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp);
 
-static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy);
+static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp);
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
 {
@@ -1254,11 +1218,11 @@ static void qmp_combo_configure(void __iomem *base,
 	qmp_combo_configure_lane(base, tbl, num, 0xff);
 }
 
-static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy)
+static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *serdes = qphy->dp_serdes;
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *serdes = qmp->dp_serdes;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 
 	qmp_combo_configure(serdes, cfg->dp_serdes_tbl, cfg->dp_serdes_tbl_num);
 
@@ -1287,54 +1251,54 @@ static int qmp_combo_dp_serdes_init(struct qmp_phy *qphy)
 	return 0;
 }
 
-static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_phy *qphy)
+static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
 	writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX |
 	       QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL,
-	       qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
+	       qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
 
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_LANE_0_1_PWRDN |
 	       DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN |
 	       DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	writel(QSERDES_V3_COM_BIAS_EN |
 	       QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN |
 	       QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL |
 	       QSERDES_V3_COM_CLKBUF_RX_DRIVE_L,
-	       qphy->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
-
-	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0x24, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xbb, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
-	qphy->dp_aux_cfg = 0;
+	       qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
+
+	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
+	qmp->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qphy->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
+	       qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
-static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy,
+static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp,
 		unsigned int drv_lvl_reg, unsigned int emp_post_reg)
 {
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	unsigned int v_level = 0, p_level = 0;
 	u8 voltage_swing_cfg, pre_emphasis_cfg;
 	int i;
@@ -1360,20 +1324,20 @@ static int qmp_combo_configure_dp_swing(struct qmp_phy *qphy,
 	voltage_swing_cfg |= DP_PHY_TXn_TX_DRV_LVL_MUX_EN;
 	pre_emphasis_cfg |= DP_PHY_TXn_TX_EMP_POST1_LVL_MUX_EN;
 
-	writel(voltage_swing_cfg, qphy->dp_tx + drv_lvl_reg);
-	writel(pre_emphasis_cfg, qphy->dp_tx + emp_post_reg);
-	writel(voltage_swing_cfg, qphy->dp_tx2 + drv_lvl_reg);
-	writel(pre_emphasis_cfg, qphy->dp_tx2 + emp_post_reg);
+	writel(voltage_swing_cfg, qmp->dp_tx + drv_lvl_reg);
+	writel(pre_emphasis_cfg, qmp->dp_tx + emp_post_reg);
+	writel(voltage_swing_cfg, qmp->dp_tx2 + drv_lvl_reg);
+	writel(pre_emphasis_cfg, qmp->dp_tx2 + emp_post_reg);
 
 	return 0;
 }
 
-static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy)
+static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp)
 {
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias_en, drvr_en;
 
-	if (qmp_combo_configure_dp_swing(qphy, QSERDES_V3_TX_TX_DRV_LVL,
+	if (qmp_combo_configure_dp_swing(qmp, QSERDES_V3_TX_TX_DRV_LVL,
 				QSERDES_V3_TX_TX_EMP_POST1_LVL) < 0)
 		return;
 
@@ -1385,13 +1349,13 @@ static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_phy *qphy)
 		drvr_en = 0x10;
 	}
 
-	writel(drvr_en, qphy->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN);
-	writel(bias_en, qphy->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr_en, qphy->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN);
-	writel(bias_en, qphy->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr_en, qmp->dp_tx + QSERDES_V3_TX_HIGHZ_DRVR_EN);
+	writel(bias_en, qmp->dp_tx + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr_en, qmp->dp_tx2 + QSERDES_V3_TX_HIGHZ_DRVR_EN);
+	writel(bias_en, qmp->dp_tx2 + QSERDES_V3_TX_TRANSCEIVER_BIAS_EN);
 }
 
-static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy)
+static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp)
 {
 	u32 val;
 	bool reverse = false;
@@ -1411,27 +1375,27 @@ static bool qmp_combo_configure_dp_mode(struct qmp_phy *qphy)
 	 * if (lane_cnt == 4 || orientation == ORIENTATION_CC1)
 	 *	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
 	 * if (orientation == ORIENTATION_CC2)
-	 *	writel(0x4c, qphy->dp_pcs + QSERDES_V3_DP_PHY_MODE);
+	 *	writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE);
 	 */
 	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
-	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
-	writel(0x5c, qphy->dp_pcs + QSERDES_DP_PHY_MODE);
+	writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE);
 
 	return reverse;
 }
 
-static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy)
+static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks;
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
 
-	qmp_combo_configure_dp_mode(qphy);
+	qmp_combo_configure_dp_mode(qmp);
 
-	writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qphy->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1454,40 +1418,40 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qphy->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV);
 
 	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x04, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	writel(0x20, qphy->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL);
+	writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL);
 
-	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V3_COM_C_READY_STATUS,
+	if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V3_COM_C_READY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	return readl_poll_timeout(qphy->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
+	return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1498,76 +1462,76 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_phy *qphy)
  * We need to calibrate the aux setting here as many times
  * as the caller tries
  */
-static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_phy *qphy)
+static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp)
 {
 	static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
 	u8 val;
 
-	qphy->dp_aux_cfg++;
-	qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
-	val = cfg1_settings[qphy->dp_aux_cfg];
+	qmp->dp_aux_cfg++;
+	qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
+	val = cfg1_settings[qmp->dp_aux_cfg];
 
-	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
 
-static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_phy *qphy)
+static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
-	writel(0x17, qphy->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
-
-	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xb7, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
-	qphy->dp_aux_cfg = 0;
+	writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
+
+	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
+	qmp->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qphy->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
+	       qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
-static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_phy *qphy)
+static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp)
 {
 	/* Program default values before writing proper values */
-	writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
-	writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
 
-	qmp_combo_configure_dp_swing(qphy, QSERDES_V4_TX_TX_DRV_LVL,
+	qmp_combo_configure_dp_swing(qmp, QSERDES_V4_TX_TX_DRV_LVL,
 			QSERDES_V4_TX_TX_EMP_POST1_LVL);
 }
 
-static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy)
+static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_dp_clks *dp_clks = qphy->dp_clks;
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
 
-	writel(0x0f, qphy->dp_pcs + QSERDES_V4_DP_PHY_CFG_1);
+	writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1);
 
-	qmp_combo_configure_dp_mode(qphy);
+	qmp_combo_configure_dp_mode(qmp);
 
-	writel(0x13, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
 
-	writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qphy->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1590,49 +1554,49 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qphy->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV);
 
 	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	writel(0x20, qphy->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL);
+	writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL);
 
-	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_C_READY_STATUS,
+	if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_C_READY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
+	if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
+	if (readl_poll_timeout(qmp->dp_serdes + QSERDES_V4_COM_CMN_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1642,15 +1606,15 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_phy *qphy)
 	return 0;
 }
 
-static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy)
+static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias0_en, drvr0_en, bias1_en, drvr1_en;
 	bool reverse = false;
 	u32 status;
 	int ret;
 
-	ret = qcom_qmp_v45_phy_configure_dp_phy(qphy);
+	ret = qcom_qmp_v45_phy_configure_dp_phy(qmp);
 	if (ret < 0)
 		return ret;
 
@@ -1676,43 +1640,43 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_phy *qphy)
 		drvr1_en = 0x10;
 	}
 
-	writel(drvr0_en, qphy->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN);
-	writel(bias0_en, qphy->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr1_en, qphy->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN);
-	writel(bias1_en, qphy->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr0_en, qmp->dp_tx + QSERDES_V4_TX_HIGHZ_DRVR_EN);
+	writel(bias0_en, qmp->dp_tx + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN);
+	writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x0a, qphy->dp_tx + QSERDES_V4_TX_TX_POL_INV);
-	writel(0x0a, qphy->dp_tx2 + QSERDES_V4_TX_TX_POL_INV);
+	writel(0x0a, qmp->dp_tx + QSERDES_V4_TX_TX_POL_INV);
+	writel(0x0a, qmp->dp_tx2 + QSERDES_V4_TX_TX_POL_INV);
 
-	writel(0x27, qphy->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
-	writel(0x27, qphy->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx2 + QSERDES_V4_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx + QSERDES_V4_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx2 + QSERDES_V4_TX_TX_EMP_POST1_LVL);
 
 	return 0;
 }
 
-static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy)
+static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct phy_configure_opts_dp *dp_opts = &qphy->dp_opts;
+	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias0_en, drvr0_en, bias1_en, drvr1_en;
 	bool reverse = false;
 	u32 status;
 	int ret;
 
-	ret = qcom_qmp_v45_phy_configure_dp_phy(qphy);
+	ret = qcom_qmp_v45_phy_configure_dp_phy(qmp);
 	if (ret < 0)
 		return ret;
 
@@ -1733,30 +1697,30 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy)
 		drvr1_en = 0x10;
 	}
 
-	writel(drvr0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
-	writel(bias0_en, qphy->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
-	writel(drvr1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
-	writel(bias1_en, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
+	writel(bias0_en, qmp->dp_tx + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
+	writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
+	writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qphy->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qphy->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x0a, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV);
-	writel(0x0a, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV);
+	writel(0x0a, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_POL_INV);
+	writel(0x0a, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_POL_INV);
 
-	writel(0x27, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL);
-	writel(0x27, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_DRV_LVL);
+	writel(0x27, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_DRV_LVL);
 
-	writel(0x20, qphy->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
-	writel(0x20, qphy->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
+	writel(0x20, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TX_EMP_POST1_LVL);
 
 	return 0;
 }
@@ -1765,16 +1729,16 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_phy *qphy)
  * We need to calibrate the aux setting here as many times
  * as the caller tries
  */
-static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy)
+static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp)
 {
 	static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d };
 	u8 val;
 
-	qphy->dp_aux_cfg++;
-	qphy->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
-	val = cfg1_settings[qphy->dp_aux_cfg];
+	qmp->dp_aux_cfg++;
+	qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
+	val = cfg1_settings[qmp->dp_aux_cfg];
 
-	writel(val, qphy->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
@@ -1782,13 +1746,13 @@ static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_phy *qphy)
 static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opts)
 {
 	const struct phy_configure_opts_dp *dp_opts = &opts->dp;
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	memcpy(&qphy->dp_opts, dp_opts, sizeof(*dp_opts));
-	if (qphy->dp_opts.set_voltages) {
-		cfg->configure_dp_tx(qphy);
-		qphy->dp_opts.set_voltages = 0;
+	memcpy(&qmp->dp_opts, dp_opts, sizeof(*dp_opts));
+	if (qmp->dp_opts.set_voltages) {
+		cfg->configure_dp_tx(qmp);
+		qmp->dp_opts.set_voltages = 0;
 	}
 
 	return 0;
@@ -1796,20 +1760,18 @@ static int qmp_combo_dp_configure(struct phy *phy, union phy_configure_opts *opt
 
 static int qmp_combo_dp_calibrate(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	if (cfg->calibrate_dp_phy)
-		return cfg->calibrate_dp_phy(qphy);
+		return cfg->calibrate_dp_phy(qmp);
 
 	return 0;
 }
 
-static int qmp_combo_com_init(struct qmp_phy *qphy)
+static int qmp_combo_com_init(struct qmp_combo *qmp)
 {
-	struct qcom_qmp *qmp = qphy->qmp;
-	struct qmp_phy *usb_phy = qmp->usb_phy;
-	const struct qmp_phy_cfg *cfg = usb_phy->cfg;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	void __iomem *dp_com = qmp->dp_com;
 	int ret;
 
@@ -1861,7 +1823,7 @@ static int qmp_combo_com_init(struct qmp_phy *qphy)
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
 	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
-	qphy_setbits(usb_phy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
 
 	mutex_unlock(&qmp->phy_mutex);
@@ -1878,11 +1840,9 @@ err_unlock:
 	return ret;
 }
 
-static int qmp_combo_com_exit(struct qmp_phy *qphy)
+static int qmp_combo_com_exit(struct qmp_combo *qmp)
 {
-	struct qcom_qmp *qmp = qphy->qmp;
-	struct qmp_phy *usb_phy = qmp->usb_phy;
-	const struct qmp_phy_cfg *cfg = usb_phy->cfg;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
 	mutex_lock(&qmp->phy_mutex);
 	if (--qmp->init_count) {
@@ -1903,77 +1863,76 @@ static int qmp_combo_com_exit(struct qmp_phy *qphy)
 
 static int qmp_combo_dp_init(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	int ret;
 
-	ret = qmp_combo_com_init(qphy);
+	ret = qmp_combo_com_init(qmp);
 	if (ret)
 		return ret;
 
-	cfg->dp_aux_init(qphy);
+	cfg->dp_aux_init(qmp);
 
 	return 0;
 }
 
 static int qmp_combo_dp_exit(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
 
-	qmp_combo_com_exit(qphy);
+	qmp_combo_com_exit(qmp);
 
 	return 0;
 }
 
 static int qmp_combo_dp_power_on(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *tx = qphy->dp_tx;
-	void __iomem *tx2 = qphy->dp_tx2;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *tx = qmp->dp_tx;
+	void __iomem *tx2 = qmp->dp_tx2;
 
-	qmp_combo_dp_serdes_init(qphy);
+	qmp_combo_dp_serdes_init(qmp);
 
 	qmp_combo_configure_lane(tx, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 1);
 	qmp_combo_configure_lane(tx2, cfg->dp_tx_tbl, cfg->dp_tx_tbl_num, 2);
 
 	/* Configure special DP tx tunings */
-	cfg->configure_dp_tx(qphy);
+	cfg->configure_dp_tx(qmp);
 
 	/* Configure link rate, swing, etc. */
-	cfg->configure_dp_phy(qphy);
+	cfg->configure_dp_phy(qmp);
 
 	return 0;
 }
 
 static int qmp_combo_dp_power_off(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
 
 	/* Assert DP PHY power down */
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qphy->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
 
 	return 0;
 }
 
 static int qmp_combo_usb_power_on(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	struct qcom_qmp *qmp = qphy->qmp;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *serdes = qphy->serdes;
-	void __iomem *tx = qphy->tx;
-	void __iomem *rx = qphy->rx;
-	void __iomem *tx2 = qphy->tx2;
-	void __iomem *rx2 = qphy->rx2;
-	void __iomem *pcs = qphy->pcs;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *serdes = qmp->serdes;
+	void __iomem *tx = qmp->tx;
+	void __iomem *rx = qmp->rx;
+	void __iomem *tx2 = qmp->tx2;
+	void __iomem *rx2 = qmp->rx2;
+	void __iomem *pcs = qmp->pcs;
 	void __iomem *status;
 	unsigned int val;
 	int ret;
 
 	qmp_combo_configure(serdes, cfg->serdes_tbl, cfg->serdes_tbl_num);
 
-	ret = clk_prepare_enable(qphy->pipe_clk);
+	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
 		dev_err(qmp->dev, "pipe_clk enable failed err=%d\n", ret);
 		return ret;
@@ -2008,27 +1967,27 @@ static int qmp_combo_usb_power_on(struct phy *phy)
 	return 0;
 
 err_disable_pipe_clk:
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	return ret;
 }
 
 static int qmp_combo_usb_power_off(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 
 	/* PHY reset */
-	qphy_setbits(qphy->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
+	qphy_setbits(qmp->pcs, cfg->regs[QPHY_SW_RESET], SW_RESET);
 
 	/* stop SerDes and Phy-Coding-Sublayer */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_START_CTRL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_START_CTRL],
 			SERDES_START | PCS_START);
 
 	/* Put PHY into POWER DOWN state: active low */
-	qphy_clrbits(qphy->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
+	qphy_clrbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
 
 	return 0;
@@ -2036,37 +1995,37 @@ static int qmp_combo_usb_power_off(struct phy *phy)
 
 static int qmp_combo_usb_init(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
 	int ret;
 
-	ret = qmp_combo_com_init(qphy);
+	ret = qmp_combo_com_init(qmp);
 	if (ret)
 		return ret;
 
 	ret = qmp_combo_usb_power_on(phy);
 	if (ret)
-		qmp_combo_com_exit(qphy);
+		qmp_combo_com_exit(qmp);
 
 	return ret;
 }
 
 static int qmp_combo_usb_exit(struct phy *phy)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
 	int ret;
 
 	ret = qmp_combo_usb_power_off(phy);
 	if (ret)
 		return ret;
 
-	return qmp_combo_com_exit(qphy);
+	return qmp_combo_com_exit(qmp);
 }
 
 static int qmp_combo_usb_set_mode(struct phy *phy, enum phy_mode mode, int submode)
 {
-	struct qmp_phy *qphy = phy_get_drvdata(phy);
+	struct qmp_combo *qmp = phy_get_drvdata(phy);
 
-	qphy->mode = mode;
+	qmp->mode = mode;
 
 	return 0;
 }
@@ -2088,15 +2047,15 @@ static const struct phy_ops qmp_combo_dp_phy_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy)
+static void qmp_combo_enable_autonomous_mode(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 	u32 intr_mask;
 
-	if (qphy->mode == PHY_MODE_USB_HOST_SS ||
-	    qphy->mode == PHY_MODE_USB_DEVICE_SS)
+	if (qmp->mode == PHY_MODE_USB_HOST_SS ||
+	    qmp->mode == PHY_MODE_USB_DEVICE_SS)
 		intr_mask = ARCVR_DTCT_EN | ALFPS_DTCT_EN;
 	else
 		intr_mask = ARCVR_DTCT_EN | ARCVR_DTCT_EVENT_SEL;
@@ -2117,11 +2076,11 @@ static void qmp_combo_enable_autonomous_mode(struct qmp_phy *qphy)
 		qphy_clrbits(pcs_misc, QPHY_V3_PCS_MISC_CLAMP_ENABLE, CLAMP_EN);
 }
 
-static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy)
+static void qmp_combo_disable_autonomous_mode(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
-	void __iomem *pcs_usb = qphy->pcs_usb ?: qphy->pcs;
-	void __iomem *pcs_misc = qphy->pcs_misc;
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	void __iomem *pcs_usb = qmp->pcs_usb ?: qmp->pcs;
+	void __iomem *pcs_misc = qmp->pcs_misc;
 
 	/* Disable i/o clamp_n on resume for normal mode */
 	if (pcs_misc)
@@ -2137,20 +2096,19 @@ static void qmp_combo_disable_autonomous_mode(struct qmp_phy *qphy)
 
 static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->usb_phy;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 
-	dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qphy->mode);
+	dev_vdbg(dev, "Suspending QMP phy, mode:%d\n", qmp->mode);
 
 	if (!qmp->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
 		return 0;
 	}
 
-	qmp_combo_enable_autonomous_mode(qphy);
+	qmp_combo_enable_autonomous_mode(qmp);
 
-	clk_disable_unprepare(qphy->pipe_clk);
+	clk_disable_unprepare(qmp->pipe_clk);
 	clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
 
 	return 0;
@@ -2158,12 +2116,11 @@ static int __maybe_unused qmp_combo_runtime_suspend(struct device *dev)
 
 static int __maybe_unused qmp_combo_runtime_resume(struct device *dev)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
-	struct qmp_phy *qphy = qmp->usb_phy;
-	const struct qmp_phy_cfg *cfg = qphy->cfg;
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	int ret = 0;
 
-	dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qphy->mode);
+	dev_vdbg(dev, "Resuming QMP phy, mode:%d\n", qmp->mode);
 
 	if (!qmp->init_count) {
 		dev_vdbg(dev, "PHY not initialized, bailing out\n");
@@ -2174,14 +2131,14 @@ static int __maybe_unused qmp_combo_runtime_resume(struct device *dev)
 	if (ret)
 		return ret;
 
-	ret = clk_prepare_enable(qphy->pipe_clk);
+	ret = clk_prepare_enable(qmp->pipe_clk);
 	if (ret) {
 		dev_err(dev, "pipe_clk enable failed, err=%d\n", ret);
 		clk_bulk_disable_unprepare(cfg->num_clks, qmp->clks);
 		return ret;
 	}
 
-	qmp_combo_disable_autonomous_mode(qphy);
+	qmp_combo_disable_autonomous_mode(qmp);
 
 	return 0;
 }
@@ -2193,7 +2150,7 @@ static const struct dev_pm_ops qmp_combo_pm_ops = {
 
 static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_vregs;
 	int ret, i;
 
@@ -2225,7 +2182,7 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg
 
 static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
 	int i;
 	int ret;
 
@@ -2246,7 +2203,7 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf
 
 static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
 	int num = cfg->num_clks;
 	int i;
 
@@ -2283,7 +2240,7 @@ static void phy_clk_release_provider(void *res)
  *    clk  |   +-------+   |                   +-----+
  *         +---------------+
  */
-static int phy_pipe_clk_register(struct qcom_qmp *qmp, struct device_node *np)
+static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct clk_fixed_rate *fixed;
 	struct clk_init_data init = { };
@@ -2386,12 +2343,12 @@ static unsigned long
 qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
 	const struct qmp_phy_dp_clks *dp_clks;
-	const struct qmp_phy *qphy;
+	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
 
 	dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw);
-	qphy = dp_clks->qphy;
-	dp_opts = &qphy->dp_opts;
+	qmp = dp_clks->qmp;
+	dp_opts = &qmp->dp_opts;
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -2430,12 +2387,12 @@ static unsigned long
 qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
 	const struct qmp_phy_dp_clks *dp_clks;
-	const struct qmp_phy *qphy;
+	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
 
 	dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw);
-	qphy = dp_clks->qphy;
-	dp_opts = &qphy->dp_opts;
+	qmp = dp_clks->qmp;
+	dp_opts = &qmp->dp_opts;
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -2470,8 +2427,7 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data)
 	return &dp_clks->dp_pixel_hw;
 }
 
-static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
-				struct device_node *np)
+static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct clk_init_data init = { };
 	struct qmp_phy_dp_clks *dp_clks;
@@ -2482,8 +2438,8 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
 	if (!dp_clks)
 		return -ENOMEM;
 
-	dp_clks->qphy = qphy;
-	qphy->dp_clks = dp_clks;
+	dp_clks->qmp = qmp;
+	qmp->dp_clks = dp_clks;
 
 	snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev));
 	init.ops = &qcom_qmp_dp_link_clk_ops;
@@ -2512,20 +2468,15 @@ static int phy_dp_clks_register(struct qcom_qmp *qmp, struct qmp_phy *qphy,
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int id,
+static int qmp_combo_create_dp(struct device *dev, struct device_node *np,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
-	struct qmp_phy *qphy;
 	int ret;
 
-	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
-	if (!qphy)
-		return -ENOMEM;
-
-	qphy->cfg = cfg;
-	qphy->dp_serdes = serdes;
+	qmp->cfg = cfg;
+	qmp->dp_serdes = serdes;
 	/*
 	 * Get memory resources from the DP child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
@@ -2533,17 +2484,17 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 	 *
 	 * Note that only tx/tx2 and pcs are used by the DP implementation.
 	 */
-	qphy->dp_tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->dp_tx))
-		return PTR_ERR(qphy->dp_tx);
+	qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qmp->dp_tx))
+		return PTR_ERR(qmp->dp_tx);
 
-	qphy->dp_pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qphy->dp_pcs))
-		return PTR_ERR(qphy->dp_pcs);
+	qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qmp->dp_pcs))
+		return PTR_ERR(qmp->dp_pcs);
 
-	qphy->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
-	if (IS_ERR(qphy->dp_tx2))
-		return PTR_ERR(qphy->dp_tx2);
+	qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
+	if (IS_ERR(qmp->dp_tx2))
+		return PTR_ERR(qmp->dp_tx2);
 
 	generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops);
 	if (IS_ERR(generic_phy)) {
@@ -2552,66 +2503,59 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np, int i
 		return ret;
 	}
 
-	qphy->phy = generic_phy;
-	qphy->qmp = qmp;
-	qmp->phys[id] = qphy;
-	phy_set_drvdata(generic_phy, qphy);
+	qmp->dp_phy = generic_phy;
+	phy_set_drvdata(generic_phy, qmp);
 
 	return 0;
 }
 
-static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int id,
+static int qmp_combo_create_usb(struct device *dev, struct device_node *np,
 			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
 {
-	struct qcom_qmp *qmp = dev_get_drvdata(dev);
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
 	struct phy *generic_phy;
-	struct qmp_phy *qphy;
 	int ret;
 
-	qphy = devm_kzalloc(dev, sizeof(*qphy), GFP_KERNEL);
-	if (!qphy)
-		return -ENOMEM;
-
-	qphy->cfg = cfg;
-	qphy->serdes = serdes;
+	qmp->cfg = cfg;
+	qmp->serdes = serdes;
 	/*
 	 * Get memory resources from the USB child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
 	 * tx2 -> 3; rx2 -> 4; pcs_misc (optional) -> 5
 	 */
-	qphy->tx = devm_of_iomap(dev, np, 0, NULL);
-	if (IS_ERR(qphy->tx))
-		return PTR_ERR(qphy->tx);
+	qmp->tx = devm_of_iomap(dev, np, 0, NULL);
+	if (IS_ERR(qmp->tx))
+		return PTR_ERR(qmp->tx);
 
-	qphy->rx = devm_of_iomap(dev, np, 1, NULL);
-	if (IS_ERR(qphy->rx))
-		return PTR_ERR(qphy->rx);
+	qmp->rx = devm_of_iomap(dev, np, 1, NULL);
+	if (IS_ERR(qmp->rx))
+		return PTR_ERR(qmp->rx);
 
-	qphy->pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qphy->pcs))
-		return PTR_ERR(qphy->pcs);
+	qmp->pcs = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qmp->pcs))
+		return PTR_ERR(qmp->pcs);
 
 	if (cfg->pcs_usb_offset)
-		qphy->pcs_usb = qphy->pcs + cfg->pcs_usb_offset;
+		qmp->pcs_usb = qmp->pcs + cfg->pcs_usb_offset;
 
-	qphy->tx2 = devm_of_iomap(dev, np, 3, NULL);
-	if (IS_ERR(qphy->tx2))
-		return PTR_ERR(qphy->tx2);
+	qmp->tx2 = devm_of_iomap(dev, np, 3, NULL);
+	if (IS_ERR(qmp->tx2))
+		return PTR_ERR(qmp->tx2);
 
-	qphy->rx2 = devm_of_iomap(dev, np, 4, NULL);
-	if (IS_ERR(qphy->rx2))
-		return PTR_ERR(qphy->rx2);
+	qmp->rx2 = devm_of_iomap(dev, np, 4, NULL);
+	if (IS_ERR(qmp->rx2))
+		return PTR_ERR(qmp->rx2);
 
-	qphy->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
-	if (IS_ERR(qphy->pcs_misc)) {
+	qmp->pcs_misc = devm_of_iomap(dev, np, 5, NULL);
+	if (IS_ERR(qmp->pcs_misc)) {
 		dev_vdbg(dev, "PHY pcs_misc-reg not used\n");
-		qphy->pcs_misc = NULL;
+		qmp->pcs_misc = NULL;
 	}
 
-	qphy->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
-	if (IS_ERR(qphy->pipe_clk)) {
-		return dev_err_probe(dev, PTR_ERR(qphy->pipe_clk),
-				     "failed to get lane%d pipe_clk\n", id);
+	qmp->pipe_clk = devm_get_clk_from_child(dev, np, NULL);
+	if (IS_ERR(qmp->pipe_clk)) {
+		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+				     "failed to get pipe clock\n");
 	}
 
 	generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops);
@@ -2621,17 +2565,15 @@ static int qmp_combo_create_usb(struct device *dev, struct device_node *np, int
 		return ret;
 	}
 
-	qphy->phy = generic_phy;
-	qphy->qmp = qmp;
-	qmp->phys[id] = qphy;
-	phy_set_drvdata(generic_phy, qphy);
+	qmp->usb_phy = generic_phy;
+	phy_set_drvdata(generic_phy, qmp);
 
 	return 0;
 }
 
 static int qmp_combo_probe(struct platform_device *pdev)
 {
-	struct qcom_qmp *qmp;
+	struct qmp_combo *qmp;
 	struct device *dev = &pdev->dev;
 	struct device_node *child;
 	struct phy_provider *phy_provider;
@@ -2686,10 +2628,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (num > expected_phys)
 		return -EINVAL;
 
-	qmp->phys = devm_kcalloc(dev, num, sizeof(*qmp->phys), GFP_KERNEL);
-	if (!qmp->phys)
-		return -ENOMEM;
-
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
 	if (ret)
@@ -2706,14 +2644,14 @@ static int qmp_combo_probe(struct platform_device *pdev)
 			serdes = dp_serdes;
 
 			/* Create per-lane phy */
-			ret = qmp_combo_create_dp(dev, child, id, serdes, cfg);
+			ret = qmp_combo_create_dp(dev, child, serdes, cfg);
 			if (ret) {
 				dev_err(dev, "failed to create lane%d phy, %d\n",
 					id, ret);
 				goto err_node_put;
 			}
 
-			ret = phy_dp_clks_register(qmp, qmp->phys[id], child);
+			ret = phy_dp_clks_register(qmp, child);
 			if (ret) {
 				dev_err(qmp->dev,
 					"failed to register DP clock source\n");
@@ -2723,15 +2661,13 @@ static int qmp_combo_probe(struct platform_device *pdev)
 			serdes = usb_serdes;
 
 			/* Create per-lane phy */
-			ret = qmp_combo_create_usb(dev, child, id, serdes, cfg);
+			ret = qmp_combo_create_usb(dev, child, serdes, cfg);
 			if (ret) {
 				dev_err(dev, "failed to create lane%d phy, %d\n",
 					id, ret);
 				goto err_node_put;
 			}
 
-			qmp->usb_phy = qmp->phys[id];
-
 			/*
 			 * Register the pipe clock provided by phy.
 			 * See function description to see details of this pipe clock.
-- 
GitLab


From 6c7c449a008b7a279e15254a829d096a7ea72ee3 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:19 +0100
Subject: [PATCH 2700/4122] phy: qcom-qmp-combo: clean up device-tree parsing

Since the QMP driver split there will be precisely two child nodes so
drop the obsolete iteration construct.

While at it, drop the verbose error logging that would have been printed
also on probe deferrals.

Note that there is no need to check if there are additional child nodes
(the kernel is not a devicetree validator), but let's return an error if
either child node is missing.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-21-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 ++++++++---------------
 1 file changed, 28 insertions(+), 54 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 707dd68ba9935..9eacbd224012f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2575,13 +2575,12 @@ static int qmp_combo_probe(struct platform_device *pdev)
 {
 	struct qmp_combo *qmp;
 	struct device *dev = &pdev->dev;
-	struct device_node *child;
+	struct device_node *dp_np, *usb_np;
 	struct phy_provider *phy_provider;
 	void __iomem *serdes;
 	void __iomem *usb_serdes;
 	void __iomem *dp_serdes = NULL;
 	const struct qmp_phy_cfg *cfg = NULL;
-	int num, id, expected_phys;
 	int ret;
 
 	qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
@@ -2607,8 +2606,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (IS_ERR(dp_serdes))
 		return PTR_ERR(dp_serdes);
 
-	expected_phys = 2;
-
 	mutex_init(&qmp->phy_mutex);
 
 	ret = qmp_combo_clk_init(dev, cfg);
@@ -2623,75 +2620,52 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	num = of_get_available_child_count(dev->of_node);
-	/* do we have a rogue child node ? */
-	if (num > expected_phys)
+	usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
+	if (!usb_np)
+		return -EINVAL;
+
+	dp_np = of_get_child_by_name(dev->of_node, "dp-phy");
+	if (!dp_np) {
+		of_node_put(usb_np);
 		return -EINVAL;
+	}
 
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
 	if (ret)
-		return ret;
+		goto err_node_put;
 	/*
 	 * Prevent runtime pm from being ON by default. Users can enable
 	 * it using power/control in sysfs.
 	 */
 	pm_runtime_forbid(dev);
 
-	id = 0;
-	for_each_available_child_of_node(dev->of_node, child) {
-		if (of_node_name_eq(child, "dp-phy")) {
-			serdes = dp_serdes;
-
-			/* Create per-lane phy */
-			ret = qmp_combo_create_dp(dev, child, serdes, cfg);
-			if (ret) {
-				dev_err(dev, "failed to create lane%d phy, %d\n",
-					id, ret);
-				goto err_node_put;
-			}
-
-			ret = phy_dp_clks_register(qmp, child);
-			if (ret) {
-				dev_err(qmp->dev,
-					"failed to register DP clock source\n");
-				goto err_node_put;
-			}
-		} else if (of_node_name_eq(child, "usb3-phy")) {
-			serdes = usb_serdes;
-
-			/* Create per-lane phy */
-			ret = qmp_combo_create_usb(dev, child, serdes, cfg);
-			if (ret) {
-				dev_err(dev, "failed to create lane%d phy, %d\n",
-					id, ret);
-				goto err_node_put;
-			}
-
-			/*
-			 * Register the pipe clock provided by phy.
-			 * See function description to see details of this pipe clock.
-			 */
-			ret = phy_pipe_clk_register(qmp, child);
-			if (ret) {
-				dev_err(qmp->dev,
-					"failed to register pipe clock source\n");
-				goto err_node_put;
-			}
-		}
+	ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg);
+	if (ret)
+		goto err_node_put;
 
-		id++;
-	}
+	ret = phy_pipe_clk_register(qmp, usb_np);
+	if (ret)
+		goto err_node_put;
 
-	if (!qmp->usb_phy)
-		return -EINVAL;
+	ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg);
+	if (ret)
+		goto err_node_put;
+
+	ret = phy_dp_clks_register(qmp, dp_np);
+	if (ret)
+		goto err_node_put;
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
+	of_node_put(usb_np);
+	of_node_put(dp_np);
+
 	return PTR_ERR_OR_ZERO(phy_provider);
 
 err_node_put:
-	of_node_put(child);
+	of_node_put(usb_np);
+	of_node_put(dp_np);
 	return ret;
 }
 
-- 
GitLab


From 44aff8e31080e13a24313120aae259c659b04cd1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:20 +0100
Subject: [PATCH 2701/4122] phy: qcom-qmp-combo: clean up probe initialisation

Stop abusing the driver data pointer and instead pass the driver state
structure directly to the initialisation helpers during probe.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-22-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 61 ++++++++++-------------
 1 file changed, 27 insertions(+), 34 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 9eacbd224012f..5d92cbfc458ef 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2148,9 +2148,10 @@ static const struct dev_pm_ops qmp_combo_pm_ops = {
 			   qmp_combo_runtime_resume, NULL)
 };
 
-static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_combo_vreg_init(struct qmp_combo *qmp)
 {
-	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_vregs;
 	int ret, i;
 
@@ -2180,9 +2181,10 @@ static int qmp_combo_vreg_init(struct device *dev, const struct qmp_phy_cfg *cfg
 	return 0;
 }
 
-static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_combo_reset_init(struct qmp_combo *qmp)
 {
-	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int i;
 	int ret;
 
@@ -2201,9 +2203,10 @@ static int qmp_combo_reset_init(struct device *dev, const struct qmp_phy_cfg *cf
 	return 0;
 }
 
-static int qmp_combo_clk_init(struct device *dev, const struct qmp_phy_cfg *cfg)
+static int qmp_combo_clk_init(struct qmp_combo *qmp)
 {
-	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	int num = cfg->num_clks;
 	int i;
 
@@ -2468,15 +2471,12 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static int qmp_combo_create_dp(struct device *dev, struct device_node *np,
-			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np)
 {
-	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	struct device *dev = qmp->dev;
 	struct phy *generic_phy;
 	int ret;
 
-	qmp->cfg = cfg;
-	qmp->dp_serdes = serdes;
 	/*
 	 * Get memory resources from the DP child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
@@ -2509,15 +2509,13 @@ static int qmp_combo_create_dp(struct device *dev, struct device_node *np,
 	return 0;
 }
 
-static int qmp_combo_create_usb(struct device *dev, struct device_node *np,
-			void __iomem *serdes, const struct qmp_phy_cfg *cfg)
+static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np)
 {
-	struct qmp_combo *qmp = dev_get_drvdata(dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	struct device *dev = qmp->dev;
 	struct phy *generic_phy;
 	int ret;
 
-	qmp->cfg = cfg;
-	qmp->serdes = serdes;
 	/*
 	 * Get memory resources from the USB child node:
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
@@ -2577,10 +2575,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct device_node *dp_np, *usb_np;
 	struct phy_provider *phy_provider;
-	void __iomem *serdes;
-	void __iomem *usb_serdes;
-	void __iomem *dp_serdes = NULL;
-	const struct qmp_phy_cfg *cfg = NULL;
 	int ret;
 
 	qmp = devm_kzalloc(dev, sizeof(*qmp), GFP_KERNEL);
@@ -2588,35 +2582,34 @@ static int qmp_combo_probe(struct platform_device *pdev)
 		return -ENOMEM;
 
 	qmp->dev = dev;
-	dev_set_drvdata(dev, qmp);
 
-	cfg = of_device_get_match_data(dev);
-	if (!cfg)
+	qmp->cfg = of_device_get_match_data(dev);
+	if (!qmp->cfg)
 		return -EINVAL;
 
-	usb_serdes = serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(serdes))
-		return PTR_ERR(serdes);
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
 
 	qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
 	if (IS_ERR(qmp->dp_com))
 		return PTR_ERR(qmp->dp_com);
 
-	dp_serdes = devm_platform_ioremap_resource(pdev, 2);
-	if (IS_ERR(dp_serdes))
-		return PTR_ERR(dp_serdes);
+	qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2);
+	if (IS_ERR(qmp->dp_serdes))
+		return PTR_ERR(qmp->dp_serdes);
 
 	mutex_init(&qmp->phy_mutex);
 
-	ret = qmp_combo_clk_init(dev, cfg);
+	ret = qmp_combo_clk_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_combo_reset_init(dev, cfg);
+	ret = qmp_combo_reset_init(qmp);
 	if (ret)
 		return ret;
 
-	ret = qmp_combo_vreg_init(dev, cfg);
+	ret = qmp_combo_vreg_init(qmp);
 	if (ret)
 		return ret;
 
@@ -2640,7 +2633,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = qmp_combo_create_usb(dev, usb_np, usb_serdes, cfg);
+	ret = qmp_combo_create_usb(qmp, usb_np);
 	if (ret)
 		goto err_node_put;
 
@@ -2648,7 +2641,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_node_put;
 
-	ret = qmp_combo_create_dp(dev, dp_np, dp_serdes, cfg);
+	ret = qmp_combo_create_dp(qmp, dp_np);
 	if (ret)
 		goto err_node_put;
 
-- 
GitLab


From 526103b7a6759e4afd1bcdd4de619642689a78d2 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 14 Nov 2022 12:06:21 +0100
Subject: [PATCH 2702/4122] phy: qcom-qmp-combo: clean up DP callback names

Clean up and unify the DP callbacks by dropping the redundant "qcom" and
"phy" prefix and infix and by using a common naming scheme ("qmp" +
version + callback name).

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221114110621.4639-23-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 82 +++++++++++------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 5d92cbfc458ef..c5d8f8bfaaaa1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -903,17 +903,17 @@ struct qmp_combo {
 	struct qmp_phy_dp_clks *dp_clks;
 };
 
-static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp);
-static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp);
-static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp);
-static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp);
+static void qmp_v3_dp_aux_init(struct qmp_combo *qmp);
+static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp);
+static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp);
+static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp);
 
-static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp);
-static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp);
-static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp);
-static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp);
+static void qmp_v4_dp_aux_init(struct qmp_combo *qmp);
+static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp);
+static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp);
+static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp);
 
-static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp);
+static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp);
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
 {
@@ -991,10 +991,10 @@ static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init		= qcom_qmp_v3_phy_dp_aux_init,
-	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
-	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
-	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
+	.dp_aux_init		= qmp_v3_dp_aux_init,
+	.configure_dp_tx	= qmp_v3_configure_dp_tx,
+	.configure_dp_phy	= qmp_v3_configure_dp_phy,
+	.calibrate_dp_phy	= qmp_v3_calibrate_dp_phy,
 
 	.clk_list		= qmp_v3_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
@@ -1036,10 +1036,10 @@ static const struct qmp_phy_cfg sdm845_usb3dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init		= qcom_qmp_v3_phy_dp_aux_init,
-	.configure_dp_tx	= qcom_qmp_v3_phy_configure_dp_tx,
-	.configure_dp_phy	= qcom_qmp_v3_phy_configure_dp_phy,
-	.calibrate_dp_phy	= qcom_qmp_v3_dp_phy_calibrate,
+	.dp_aux_init		= qmp_v3_dp_aux_init,
+	.configure_dp_tx	= qmp_v3_configure_dp_tx,
+	.configure_dp_phy	= qmp_v3_configure_dp_phy,
+	.calibrate_dp_phy	= qmp_v3_calibrate_dp_phy,
 
 	.clk_list		= qmp_v3_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v3_phy_clk_l),
@@ -1083,10 +1083,10 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
-	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qmp_v4_dp_aux_init,
+	.configure_dp_tx	= qmp_v4_configure_dp_tx,
+	.configure_dp_phy	= qmp_v4_configure_dp_phy,
+	.calibrate_dp_phy	= qmp_v4_calibrate_dp_phy,
 
 	.clk_list		= qmp_v4_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
@@ -1129,10 +1129,10 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v5_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v5_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy	= qcom_qmp_v5_phy_configure_dp_phy,
-	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qmp_v4_dp_aux_init,
+	.configure_dp_tx	= qmp_v4_configure_dp_tx,
+	.configure_dp_phy	= qmp_v5_configure_dp_phy,
+	.calibrate_dp_phy	= qmp_v4_calibrate_dp_phy,
 
 	.clk_list		= qmp_v4_phy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
@@ -1175,10 +1175,10 @@ static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = {
 	.swing_hbr3_hbr2	= &qmp_dp_v3_voltage_swing_hbr3_hbr2,
 	.pre_emphasis_hbr3_hbr2 = &qmp_dp_v3_pre_emphasis_hbr3_hbr2,
 
-	.dp_aux_init		= qcom_qmp_v4_phy_dp_aux_init,
-	.configure_dp_tx	= qcom_qmp_v4_phy_configure_dp_tx,
-	.configure_dp_phy	= qcom_qmp_v4_phy_configure_dp_phy,
-	.calibrate_dp_phy	= qcom_qmp_v4_dp_phy_calibrate,
+	.dp_aux_init		= qmp_v4_dp_aux_init,
+	.configure_dp_tx	= qmp_v4_configure_dp_tx,
+	.configure_dp_phy	= qmp_v4_configure_dp_phy,
+	.calibrate_dp_phy	= qmp_v4_calibrate_dp_phy,
 
 	.clk_list		= qmp_v4_sm8250_usbphy_clk_l,
 	.num_clks		= ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
@@ -1251,7 +1251,7 @@ static int qmp_combo_dp_serdes_init(struct qmp_combo *qmp)
 	return 0;
 }
 
-static void qcom_qmp_v3_phy_dp_aux_init(struct qmp_combo *qmp)
+static void qmp_v3_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
@@ -1332,7 +1332,7 @@ static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp,
 	return 0;
 }
 
-static void qcom_qmp_v3_phy_configure_dp_tx(struct qmp_combo *qmp)
+static void qmp_v3_configure_dp_tx(struct qmp_combo *qmp)
 {
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias_en, drvr_en;
@@ -1385,7 +1385,7 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp)
 	return reverse;
 }
 
-static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp)
+static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 {
 	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
@@ -1462,7 +1462,7 @@ static int qcom_qmp_v3_phy_configure_dp_phy(struct qmp_combo *qmp)
  * We need to calibrate the aux setting here as many times
  * as the caller tries
  */
-static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp)
+static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp)
 {
 	static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
 	u8 val;
@@ -1476,7 +1476,7 @@ static int qcom_qmp_v3_dp_phy_calibrate(struct qmp_combo *qmp)
 	return 0;
 }
 
-static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp)
+static void qmp_v4_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
@@ -1503,7 +1503,7 @@ static void qcom_qmp_v4_phy_dp_aux_init(struct qmp_combo *qmp)
 	       qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
-static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp)
+static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp)
 {
 	/* Program default values before writing proper values */
 	writel(0x27, qmp->dp_tx + QSERDES_V4_TX_TX_DRV_LVL);
@@ -1516,7 +1516,7 @@ static void qcom_qmp_v4_phy_configure_dp_tx(struct qmp_combo *qmp)
 			QSERDES_V4_TX_TX_EMP_POST1_LVL);
 }
 
-static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp)
+static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 {
 	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
@@ -1606,7 +1606,7 @@ static int qcom_qmp_v45_phy_configure_dp_phy(struct qmp_combo *qmp)
 	return 0;
 }
 
-static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp)
+static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp)
 {
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias0_en, drvr0_en, bias1_en, drvr1_en;
@@ -1614,7 +1614,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp)
 	u32 status;
 	int ret;
 
-	ret = qcom_qmp_v45_phy_configure_dp_phy(qmp);
+	ret = qmp_v45_configure_dp_phy(qmp);
 	if (ret < 0)
 		return ret;
 
@@ -1668,7 +1668,7 @@ static int qcom_qmp_v4_phy_configure_dp_phy(struct qmp_combo *qmp)
 	return 0;
 }
 
-static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp)
+static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp)
 {
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 bias0_en, drvr0_en, bias1_en, drvr1_en;
@@ -1676,7 +1676,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp)
 	u32 status;
 	int ret;
 
-	ret = qcom_qmp_v45_phy_configure_dp_phy(qmp);
+	ret = qmp_v45_configure_dp_phy(qmp);
 	if (ret < 0)
 		return ret;
 
@@ -1729,7 +1729,7 @@ static int qcom_qmp_v5_phy_configure_dp_phy(struct qmp_combo *qmp)
  * We need to calibrate the aux setting here as many times
  * as the caller tries
  */
-static int qcom_qmp_v4_dp_phy_calibrate(struct qmp_combo *qmp)
+static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp)
 {
 	static const u8 cfg1_settings[] = { 0x20, 0x13, 0x23, 0x1d };
 	u8 val;
-- 
GitLab


From 32efdb0bb6e19965337fb63991237ecd99e0f9a4 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:44 +0100
Subject: [PATCH 2703/4122] dt-bindings: phy: qcom,qmp-usb3-dp: rename current
 bindings

The current QMP USB3-DP PHY bindings are based on the original MSM8996
binding which provided multiple PHYs per IP block and these in turn were
described by child nodes.

The QMP USB3-DP PHY block provides a single multi-protocol PHY and
even if some resources are only used by either the USB or DP part of the
device there is no real benefit in describing these resources in child
nodes.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers (e.g. does not include the PCS_LANE
registers).

In preparation for adding new bindings for SC8280XP which further
bindings can be based on, rename the current schema file after SC7180,
which was the first supported platform, and add a reference to the
SC8280XP bindings.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 ...dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)
 rename Documentation/devicetree/bindings/phy/{qcom,qmp-usb3-dp-phy.yaml => qcom,sc7180-qmp-usb3-dp-phy.yaml} (91%)

diff --git a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
similarity index 91%
rename from Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml
rename to Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
index 97a7ecafbf852..8afc5e815ae8e 100644
--- a/Documentation/devicetree/bindings/phy/qcom,qmp-usb3-dp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
@@ -2,10 +2,17 @@
 
 %YAML 1.2
 ---
-$id: "http://devicetree.org/schemas/phy/qcom,qmp-usb3-dp-phy.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+$id: http://devicetree.org/schemas/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm QMP USB3 DP PHY controller
+title: Qualcomm QMP USB3 DP PHY controller (SC7180)
+
+description:
+  The QMP PHY controller supports physical layer functionality for a number of
+  controllers on Qualcomm chipsets, such as, PCIe, UFS and USB.
+
+  Note that these bindings are for SoCs up to SC8180X. For newer SoCs, see
+  qcom,sc8280xp-qmp-usb43dp-phy.yaml.
 
 maintainers:
   - Wesley Cheng <quic_wcheng@quicinc.com>
-- 
GitLab


From e1c4c5436b4ad579762fbe78bfabc8aef59bd5b1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:45 +0100
Subject: [PATCH 2704/4122] dt-bindings: phy: qcom,qmp-usb3-dp: fix sc8280xp
 binding

The current QMP USB3-DP PHY bindings are based on the original MSM8996
binding which provided multiple PHYs per IP block and these in turn were
described by child nodes.

The QMP USB3-DP PHY block provides a single multi-protocol PHY and even
if some resources are only used by either the USB or DP part of the
device there is no real benefit in describing these resources in child
nodes.

The original MSM8996 binding also ended up describing the individual
register blocks as belonging to either the wrapper node or the PHY child
nodes.

This is an unnecessary level of detail which has lead to problems when
later IP blocks using different register layouts have been forced to fit
the original mould rather than updating the binding. The bindings are
arguable also incomplete as they only the describe register blocks used
by the current Linux drivers (e.g. does not include the PCS LANE
registers).

This is specifically true for later USB4-USB3-DP QMP PHYs where the TX
registers are used by both the USB3 and DP parts of the PHY (and where
the USB4 part of the PHY was not covered by the binding at all). Notably
there are also no DP "RX" (sic) registers as described by the current
bindings and the DP "PCS" region is really a set of DP_PHY registers.

Add a new binding for the USB4-USB3-DP QMP PHYs found on SC8280XP which
further bindings can be based on.

Note that the binding uses a PHY index to access either the USB3 or DP
part of the PHY and that this can later be used also for the USB4 part
if needed.

Similarly, the clock inputs and outputs can later be extended to support
USB4.

Also note that the current binding is simply removed instead of being
deprecated as it was only recently merged and would not allow for
supporting DP mode.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../phy/qcom,sc7180-qmp-usb3-dp-phy.yaml      | 12 ---
 .../phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml    | 99 +++++++++++++++++++
 include/dt-bindings/phy/phy-qcom-qmp.h        | 20 ++++
 3 files changed, 119 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml
 create mode 100644 include/dt-bindings/phy/phy-qcom-qmp.h

diff --git a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
index 8afc5e815ae8e..d9d0ab90edb1b 100644
--- a/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,sc7180-qmp-usb3-dp-phy.yaml
@@ -23,7 +23,6 @@ properties:
       - qcom,sc7180-qmp-usb3-dp-phy
       - qcom,sc7280-qmp-usb3-dp-phy
       - qcom,sc8180x-qmp-usb3-dp-phy
-      - qcom,sc8280xp-qmp-usb43dp-phy
       - qcom,sdm845-qmp-usb3-dp-phy
       - qcom,sm8250-qmp-usb3-dp-phy
   reg:
@@ -169,17 +168,6 @@ required:
 
 additionalProperties: false
 
-allOf:
-  - if:
-      properties:
-        compatible:
-          contains:
-            enum:
-              - qcom,sc8280xp-qmp-usb43dp-phy
-    then:
-      required:
-        - power-domains
-
 examples:
   - |
     #include <dt-bindings/clock/qcom,gcc-sdm845.h>
diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml
new file mode 100644
index 0000000000000..6f31693d98689
--- /dev/null
+++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml
@@ -0,0 +1,99 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/qcom,sc8280xp-qmp-usb43dp-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm QMP USB4-USB3-DP PHY controller (SC8280XP)
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description:
+  The QMP PHY controller supports physical layer functionality for a number of
+  controllers on Qualcomm chipsets, such as, PCIe, UFS and USB.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sc8280xp-qmp-usb43dp-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 4
+
+  clock-names:
+    items:
+      - const: aux
+      - const: ref
+      - const: com_aux
+      - const: usb3_pipe
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    maxItems: 2
+
+  reset-names:
+    items:
+      - const: phy
+      - const: common
+
+  vdda-phy-supply: true
+
+  vdda-pll-supply: true
+
+  "#clock-cells":
+    const: 1
+    description:
+      See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h
+
+  "#phy-cells":
+    const: 1
+    description:
+      See include/dt-bindings/dt-bindings/phy/phy-qcom-qmp.h
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - power-domains
+  - resets
+  - reset-names
+  - vdda-phy-supply
+  - vdda-pll-supply
+  - "#clock-cells"
+  - "#phy-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,gcc-sc8280xp.h>
+
+    phy@88eb000 {
+      compatible = "qcom,sc8280xp-qmp-usb43dp-phy";
+      reg = <0x088eb000 0x4000>;
+
+      clocks = <&gcc GCC_USB3_PRIM_PHY_AUX_CLK>,
+               <&gcc GCC_USB4_EUD_CLKREF_CLK>,
+               <&gcc GCC_USB3_PRIM_PHY_COM_AUX_CLK>,
+               <&gcc GCC_USB3_PRIM_PHY_PIPE_CLK>;
+      clock-names = "aux", "ref", "com_aux", "usb3_pipe";
+
+      power-domains = <&gcc USB30_PRIM_GDSC>;
+
+      resets = <&gcc GCC_USB3_PHY_PRIM_BCR>,
+               <&gcc GCC_USB4_DP_PHY_PRIM_BCR>;
+      reset-names = "phy", "common";
+
+      vdda-phy-supply = <&vreg_l9d>;
+      vdda-pll-supply = <&vreg_l4d>;
+
+      #clock-cells = <1>;
+      #phy-cells = <1>;
+    };
diff --git a/include/dt-bindings/phy/phy-qcom-qmp.h b/include/dt-bindings/phy/phy-qcom-qmp.h
new file mode 100644
index 0000000000000..4edec4c5b2241
--- /dev/null
+++ b/include/dt-bindings/phy/phy-qcom-qmp.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/*
+ * Qualcomm QMP PHY constants
+ *
+ * Copyright (C) 2022 Linaro Limited
+ */
+
+#ifndef _DT_BINDINGS_PHY_QMP
+#define _DT_BINDINGS_PHY_QMP
+
+/* QMP USB4-USB3-DP clocks */
+#define QMP_USB43DP_USB3_PIPE_CLK	0
+#define QMP_USB43DP_DP_LINK_CLK		1
+#define QMP_USB43DP_DP_VCO_DIV_CLK	2
+
+/* QMP USB4-USB3-DP PHYs */
+#define QMP_USB43DP_USB3_PHY		0
+#define QMP_USB43DP_DP_PHY		1
+
+#endif /* _DT_BINDINGS_PHY_QMP */
-- 
GitLab


From 774903ca6c499887f554234bb019c91aa0a8f741 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:46 +0100
Subject: [PATCH 2705/4122] phy: qcom-qmp-combo: drop v4 reference-clock source

The source clock for the reference clock should not be described by the
devicetree and instead this relationship should be modelled in the clock
driver.

Drop the management of the source clock from the driver for SC8180X and
SC8280XP. Note that support for the former is not yet in mainline.

Also note that the binding has never been updated to describe the v4
clocks for SC8180X.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index c5d8f8bfaaaa1..5da42a4e5bf62 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -945,7 +945,7 @@ static const char * const qmp_v3_phy_clk_l[] = {
 };
 
 static const char * const qmp_v4_phy_clk_l[] = {
-	"aux", "ref_clk_src", "ref", "com_aux",
+	"aux", "ref", "com_aux",
 };
 
 /* the primary usb3 phy on sm8250 doesn't have a ref clock */
-- 
GitLab


From b3982f2144e10bd542189e38cd47709e55389606 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:47 +0100
Subject: [PATCH 2706/4122] phy: qcom-qmp-combo: restructure PHY creation

In preparation for supporting devicetree bindings which do not use child
nodes, move the PHY creation to probe() proper and parse the serdes,
dp_com and dp_serdes resources in a dedicated legacy devicetree helper.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-5-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 89 ++++++++++++-----------
 1 file changed, 48 insertions(+), 41 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 5da42a4e5bf62..85def6560e436 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2471,11 +2471,9 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
-static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np)
+static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct device *dev = qmp->dev;
-	struct phy *generic_phy;
-	int ret;
 
 	/*
 	 * Get memory resources from the DP child node:
@@ -2496,25 +2494,13 @@ static int qmp_combo_create_dp(struct qmp_combo *qmp, struct device_node *np)
 	if (IS_ERR(qmp->dp_tx2))
 		return PTR_ERR(qmp->dp_tx2);
 
-	generic_phy = devm_phy_create(dev, np, &qmp_combo_dp_phy_ops);
-	if (IS_ERR(generic_phy)) {
-		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create DP PHY: %d\n", ret);
-		return ret;
-	}
-
-	qmp->dp_phy = generic_phy;
-	phy_set_drvdata(generic_phy, qmp);
-
 	return 0;
 }
 
-static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np)
+static int qmp_combo_parse_dt_lecacy_usb(struct qmp_combo *qmp, struct device_node *np)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
 	struct device *dev = qmp->dev;
-	struct phy *generic_phy;
-	int ret;
 
 	/*
 	 * Get memory resources from the USB child node:
@@ -2556,15 +2542,34 @@ static int qmp_combo_create_usb(struct qmp_combo *qmp, struct device_node *np)
 				     "failed to get pipe clock\n");
 	}
 
-	generic_phy = devm_phy_create(dev, np, &qmp_combo_usb_phy_ops);
-	if (IS_ERR(generic_phy)) {
-		ret = PTR_ERR(generic_phy);
-		dev_err(dev, "failed to create USB PHY: %d\n", ret);
+	return 0;
+}
+
+static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node *usb_np,
+					struct device_node *dp_np)
+{
+	struct platform_device *pdev = to_platform_device(qmp->dev);
+	int ret;
+
+	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(qmp->serdes))
+		return PTR_ERR(qmp->serdes);
+
+	qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(qmp->dp_com))
+		return PTR_ERR(qmp->dp_com);
+
+	qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2);
+	if (IS_ERR(qmp->dp_serdes))
+		return PTR_ERR(qmp->dp_serdes);
+
+	ret = qmp_combo_parse_dt_lecacy_usb(qmp, usb_np);
+	if (ret)
 		return ret;
-	}
 
-	qmp->usb_phy = generic_phy;
-	phy_set_drvdata(generic_phy, qmp);
+	ret = qmp_combo_parse_dt_lecacy_dp(qmp, dp_np);
+	if (ret)
+		return ret;
 
 	return 0;
 }
@@ -2587,18 +2592,6 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (!qmp->cfg)
 		return -EINVAL;
 
-	qmp->serdes = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(qmp->serdes))
-		return PTR_ERR(qmp->serdes);
-
-	qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(qmp->dp_com))
-		return PTR_ERR(qmp->dp_com);
-
-	qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2);
-	if (IS_ERR(qmp->dp_serdes))
-		return PTR_ERR(qmp->dp_serdes);
-
 	mutex_init(&qmp->phy_mutex);
 
 	ret = qmp_combo_clk_init(qmp);
@@ -2623,6 +2616,10 @@ static int qmp_combo_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
+	ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np);
+	if (ret)
+		goto err_node_put;
+
 	pm_runtime_set_active(dev);
 	ret = devm_pm_runtime_enable(dev);
 	if (ret)
@@ -2633,21 +2630,31 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = qmp_combo_create_usb(qmp, usb_np);
+	ret = phy_pipe_clk_register(qmp, usb_np);
 	if (ret)
 		goto err_node_put;
 
-	ret = phy_pipe_clk_register(qmp, usb_np);
+	ret = phy_dp_clks_register(qmp, dp_np);
 	if (ret)
 		goto err_node_put;
 
-	ret = qmp_combo_create_dp(qmp, dp_np);
-	if (ret)
+	qmp->usb_phy = devm_phy_create(dev, usb_np, &qmp_combo_usb_phy_ops);
+	if (IS_ERR(qmp->usb_phy)) {
+		ret = PTR_ERR(qmp->usb_phy);
+		dev_err(dev, "failed to create USB PHY: %d\n", ret);
 		goto err_node_put;
+	}
 
-	ret = phy_dp_clks_register(qmp, dp_np);
-	if (ret)
+	phy_set_drvdata(qmp->usb_phy, qmp);
+
+	qmp->dp_phy = devm_phy_create(dev, dp_np, &qmp_combo_dp_phy_ops);
+	if (IS_ERR(qmp->dp_phy)) {
+		ret = PTR_ERR(qmp->dp_phy);
+		dev_err(dev, "failed to create DP PHY: %d\n", ret);
 		goto err_node_put;
+	}
+
+	phy_set_drvdata(qmp->dp_phy, qmp);
 
 	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
-- 
GitLab


From 0dd521d593ade3e8494d29abb653fda5bec5d508 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:48 +0100
Subject: [PATCH 2707/4122] phy: qcom-qmp-combo: generate pipe clock name

In preparation for supporting devicetree bindings which do not use child
nodes, generate also the USB3 pipe clock name based on the platform
device name as is done for the DP clocks.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-6-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 85def6560e436..7434955c88989 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2247,18 +2247,15 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct clk_fixed_rate *fixed;
 	struct clk_init_data init = { };
+	char name[64];
 	int ret;
 
-	ret = of_property_read_string(np, "clock-output-names", &init.name);
-	if (ret) {
-		dev_err(qmp->dev, "%pOFn: No clock-output-names\n", np);
-		return ret;
-	}
-
 	fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL);
 	if (!fixed)
 		return -ENOMEM;
 
+	snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev));
+	init.name = name;
 	init.ops = &clk_fixed_rate_ops;
 
 	/* controllers using QMP phys use 125MHz pipe clock interface */
-- 
GitLab


From ee81f2eb0ee0c99a109f91a9617a8d7698479181 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:49 +0100
Subject: [PATCH 2708/4122] phy: qcom-qmp-combo: drop redundant clock structure

Drop the unnecessary DP clock structure and instead store the clocks
directly in the driver data.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-7-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 52 ++++++++---------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 7434955c88989..ebfefecffd867 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -859,12 +859,6 @@ struct qmp_phy_cfg {
 
 };
 
-struct qmp_phy_dp_clks {
-	struct qmp_combo *qmp;
-	struct clk_hw dp_link_hw;
-	struct clk_hw dp_pixel_hw;
-};
-
 struct qmp_combo {
 	struct device *dev;
 
@@ -900,7 +894,9 @@ struct qmp_combo {
 	struct phy *dp_phy;
 	unsigned int dp_aux_cfg;
 	struct phy_configure_opts_dp dp_opts;
-	struct qmp_phy_dp_clks *dp_clks;
+
+	struct clk_hw dp_link_hw;
+	struct clk_hw dp_pixel_hw;
 };
 
 static void qmp_v3_dp_aux_init(struct qmp_combo *qmp);
@@ -1387,7 +1383,6 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp)
 
 static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
@@ -1420,8 +1415,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 	}
 	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV);
 
-	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
-	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
+	clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
+	clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
 
 	writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
 	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
@@ -1518,7 +1513,6 @@ static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp)
 
 static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 {
-	const struct qmp_phy_dp_clks *dp_clks = qmp->dp_clks;
 	const struct phy_configure_opts_dp *dp_opts = &qmp->dp_opts;
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
@@ -1556,8 +1550,8 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 	}
 	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV);
 
-	clk_set_rate(dp_clks->dp_link_hw.clk, dp_opts->link_rate * 100000);
-	clk_set_rate(dp_clks->dp_pixel_hw.clk, pixel_freq);
+	clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
+	clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
 
 	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
 	writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
@@ -2342,12 +2336,10 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw,
 static unsigned long
 qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
-	const struct qmp_phy_dp_clks *dp_clks;
 	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
 
-	dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_pixel_hw);
-	qmp = dp_clks->qmp;
+	qmp = container_of(hw, struct qmp_combo, dp_pixel_hw);
 	dp_opts = &qmp->dp_opts;
 
 	switch (dp_opts->link_rate) {
@@ -2386,12 +2378,10 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw,
 static unsigned long
 qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
-	const struct qmp_phy_dp_clks *dp_clks;
 	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
 
-	dp_clks = container_of(hw, struct qmp_phy_dp_clks, dp_link_hw);
-	qmp = dp_clks->qmp;
+	qmp = container_of(hw, struct qmp_combo, dp_link_hw);
 	dp_opts = &qmp->dp_opts;
 
 	switch (dp_opts->link_rate) {
@@ -2413,7 +2403,7 @@ static const struct clk_ops qcom_qmp_dp_link_clk_ops = {
 static struct clk_hw *
 qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data)
 {
-	struct qmp_phy_dp_clks *dp_clks = data;
+	struct qmp_combo *qmp = data;
 	unsigned int idx = clkspec->args[0];
 
 	if (idx >= 2) {
@@ -2422,42 +2412,34 @@ qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data)
 	}
 
 	if (idx == 0)
-		return &dp_clks->dp_link_hw;
+		return &qmp->dp_link_hw;
 
-	return &dp_clks->dp_pixel_hw;
+	return &qmp->dp_pixel_hw;
 }
 
 static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct clk_init_data init = { };
-	struct qmp_phy_dp_clks *dp_clks;
 	char name[64];
 	int ret;
 
-	dp_clks = devm_kzalloc(qmp->dev, sizeof(*dp_clks), GFP_KERNEL);
-	if (!dp_clks)
-		return -ENOMEM;
-
-	dp_clks->qmp = qmp;
-	qmp->dp_clks = dp_clks;
-
 	snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev));
 	init.ops = &qcom_qmp_dp_link_clk_ops;
 	init.name = name;
-	dp_clks->dp_link_hw.init = &init;
-	ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_link_hw);
+	qmp->dp_link_hw.init = &init;
+	ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw);
 	if (ret)
 		return ret;
 
 	snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev));
 	init.ops = &qcom_qmp_dp_pixel_clk_ops;
 	init.name = name;
-	dp_clks->dp_pixel_hw.init = &init;
-	ret = devm_clk_hw_register(qmp->dev, &dp_clks->dp_pixel_hw);
+	qmp->dp_pixel_hw.init = &init;
+	ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw);
 	if (ret)
 		return ret;
 
-	ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, dp_clks);
+	ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 55b1c39b4990ebdab2faa2e4c06d17476d6d2d3c Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:50 +0100
Subject: [PATCH 2709/4122] phy: qcom-qmp-combo: drop redundant clock
 allocation

Since the QMP driver split, there is no reason to allocate the
fixed-rate pipe clock structure separately from the driver data.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-8-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index ebfefecffd867..9b945a72ae9b8 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -895,6 +895,7 @@ struct qmp_combo {
 	unsigned int dp_aux_cfg;
 	struct phy_configure_opts_dp dp_opts;
 
+	struct clk_fixed_rate pipe_clk_fixed;
 	struct clk_hw dp_link_hw;
 	struct clk_hw dp_pixel_hw;
 };
@@ -2239,15 +2240,11 @@ static void phy_clk_release_provider(void *res)
  */
 static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
 {
-	struct clk_fixed_rate *fixed;
+	struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed;
 	struct clk_init_data init = { };
 	char name[64];
 	int ret;
 
-	fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL);
-	if (!fixed)
-		return -ENOMEM;
-
 	snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev));
 	init.name = name;
 	init.ops = &clk_fixed_rate_ops;
-- 
GitLab


From 74401c85fb3b134d884d5de968c66784527d12d1 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:51 +0100
Subject: [PATCH 2710/4122] phy: qcom-qmp-combo: add clock registration helper

In preparation for supporting devicetree bindings which do not use child
nodes, add a clock registration helper to handle the registration of
both the USB and DP clocks.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-9-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 22 +++++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 9b945a72ae9b8..1079a16b45f68 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2447,6 +2447,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
 }
 
+static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np,
+					struct device_node *dp_np)
+{
+	int ret;
+
+	ret = phy_pipe_clk_register(qmp, usb_np);
+	if (ret)
+		return ret;
+
+	ret = phy_dp_clks_register(qmp, dp_np);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
 static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np)
 {
 	struct device *dev = qmp->dev;
@@ -2606,11 +2622,7 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	 */
 	pm_runtime_forbid(dev);
 
-	ret = phy_pipe_clk_register(qmp, usb_np);
-	if (ret)
-		goto err_node_put;
-
-	ret = phy_dp_clks_register(qmp, dp_np);
+	ret = qmp_combo_register_clocks(qmp, usb_np, dp_np);
 	if (ret)
 		goto err_node_put;
 
-- 
GitLab


From ce51f7a70a3bbc20c07079c06e7721cabfe34dd9 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:52 +0100
Subject: [PATCH 2711/4122] phy: qcom-qmp-combo: separate clock and provider
 registration

In preparation for supporting devicetree bindings which do not use child
nodes, separate clock registration from clock-provider registration.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-10-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 44 +++++++++++------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 1079a16b45f68..89a5b51c770d0 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2243,7 +2243,6 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
 	struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed;
 	struct clk_init_data init = { };
 	char name[64];
-	int ret;
 
 	snprintf(name, sizeof(name), "%s::pipe_clk", dev_name(qmp->dev));
 	init.name = name;
@@ -2253,19 +2252,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
 	fixed->fixed_rate = 125000000;
 	fixed->hw.init = &init;
 
-	ret = devm_clk_hw_register(qmp->dev, &fixed->hw);
-	if (ret)
-		return ret;
-
-	ret = of_clk_add_hw_provider(np, of_clk_hw_simple_get, &fixed->hw);
-	if (ret)
-		return ret;
-
-	/*
-	 * Roll a devm action because the clock provider is the child node, but
-	 * the child node is not actually a device.
-	 */
-	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
+	return devm_clk_hw_register(qmp->dev, &fixed->hw);
 }
 
 /*
@@ -2436,15 +2423,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	if (ret)
 		return ret;
 
-	ret = of_clk_add_hw_provider(np, qcom_qmp_dp_clks_hw_get, qmp);
-	if (ret)
-		return ret;
-
-	/*
-	 * Roll a devm action because the clock provider is the child node, but
-	 * the child node is not actually a device.
-	 */
-	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, np);
+	return 0;
 }
 
 static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np,
@@ -2460,7 +2439,24 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *
 	if (ret)
 		return ret;
 
-	return 0;
+	ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get,
+					&qmp->pipe_clk_fixed.hw);
+	if (ret)
+		return ret;
+
+	/*
+	 * Roll a devm action because the clock provider is the child node, but
+	 * the child node is not actually a device.
+	 */
+	ret = devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, usb_np);
+	if (ret)
+		return ret;
+
+	ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp);
+	if (ret)
+		return ret;
+
+	return devm_add_action_or_reset(qmp->dev, phy_clk_release_provider, dp_np);
 }
 
 static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_node *np)
-- 
GitLab


From b71bf1ebe936cc63983e5339d218918ed56e9804 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:53 +0100
Subject: [PATCH 2712/4122] phy: qcom-qmp-combo: clean up DP clock callbacks

Clean up the DP clock callbacks somewhat by dropping the redundant
"qcom" prefix and removing line breaks after type specifiers.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-11-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 33 ++++++++++-------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 89a5b51c770d0..fb3705b008233 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -2304,8 +2304,7 @@ static int phy_pipe_clk_register(struct qmp_combo *qmp, struct device_node *np)
  *              for DP pixel clock
  *
  */
-static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw,
-						struct clk_rate_request *req)
+static int qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
 {
 	switch (req->rate) {
 	case 1620000000UL / 2:
@@ -2317,8 +2316,7 @@ static int qcom_qmp_dp_pixel_clk_determine_rate(struct clk_hw *hw,
 	}
 }
 
-static unsigned long
-qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+static unsigned long qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
 	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
@@ -2340,13 +2338,12 @@ qcom_qmp_dp_pixel_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 	}
 }
 
-static const struct clk_ops qcom_qmp_dp_pixel_clk_ops = {
-	.determine_rate = qcom_qmp_dp_pixel_clk_determine_rate,
-	.recalc_rate = qcom_qmp_dp_pixel_clk_recalc_rate,
+static const struct clk_ops qmp_dp_pixel_clk_ops = {
+	.determine_rate	= qmp_dp_pixel_clk_determine_rate,
+	.recalc_rate	= qmp_dp_pixel_clk_recalc_rate,
 };
 
-static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw,
-					       struct clk_rate_request *req)
+static int qmp_dp_link_clk_determine_rate(struct clk_hw *hw, struct clk_rate_request *req)
 {
 	switch (req->rate) {
 	case 162000000:
@@ -2359,8 +2356,7 @@ static int qcom_qmp_dp_link_clk_determine_rate(struct clk_hw *hw,
 	}
 }
 
-static unsigned long
-qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
+static unsigned long qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 {
 	const struct qmp_combo *qmp;
 	const struct phy_configure_opts_dp *dp_opts;
@@ -2379,13 +2375,12 @@ qcom_qmp_dp_link_clk_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 	}
 }
 
-static const struct clk_ops qcom_qmp_dp_link_clk_ops = {
-	.determine_rate = qcom_qmp_dp_link_clk_determine_rate,
-	.recalc_rate = qcom_qmp_dp_link_clk_recalc_rate,
+static const struct clk_ops qmp_dp_link_clk_ops = {
+	.determine_rate	= qmp_dp_link_clk_determine_rate,
+	.recalc_rate	= qmp_dp_link_clk_recalc_rate,
 };
 
-static struct clk_hw *
-qcom_qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data)
+static struct clk_hw *qmp_dp_clks_hw_get(struct of_phandle_args *clkspec, void *data)
 {
 	struct qmp_combo *qmp = data;
 	unsigned int idx = clkspec->args[0];
@@ -2408,7 +2403,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	int ret;
 
 	snprintf(name, sizeof(name), "%s::link_clk", dev_name(qmp->dev));
-	init.ops = &qcom_qmp_dp_link_clk_ops;
+	init.ops = &qmp_dp_link_clk_ops;
 	init.name = name;
 	qmp->dp_link_hw.init = &init;
 	ret = devm_clk_hw_register(qmp->dev, &qmp->dp_link_hw);
@@ -2416,7 +2411,7 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 		return ret;
 
 	snprintf(name, sizeof(name), "%s::vco_div_clk", dev_name(qmp->dev));
-	init.ops = &qcom_qmp_dp_pixel_clk_ops;
+	init.ops = &qmp_dp_pixel_clk_ops;
 	init.name = name;
 	qmp->dp_pixel_hw.init = &init;
 	ret = devm_clk_hw_register(qmp->dev, &qmp->dp_pixel_hw);
@@ -2452,7 +2447,7 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *
 	if (ret)
 		return ret;
 
-	ret = of_clk_add_hw_provider(dp_np, qcom_qmp_dp_clks_hw_get, qmp);
+	ret = of_clk_add_hw_provider(dp_np, qmp_dp_clks_hw_get, qmp);
 	if (ret)
 		return ret;
 
-- 
GitLab


From 9e5b59ea6c216d9b36e3250c2efa081ab4ea2ff5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:54 +0100
Subject: [PATCH 2713/4122] phy: qcom-qmp-combo: rename common-register
 pointers

The common registers are shared by the USB and DP parts of the PHY so
drop the misleading "dp" prefix from the corresponding pointers.

Note that the "DP" prefix could also be dropped from the corresponding
defines, but leave that in place for now.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-12-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 24 +++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index fb3705b008233..5777bd1f76b38 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -864,7 +864,7 @@ struct qmp_combo {
 
 	const struct qmp_phy_cfg *cfg;
 
-	void __iomem *dp_com;
+	void __iomem *com;
 
 	void __iomem *serdes;
 	void __iomem *tx;
@@ -1767,7 +1767,7 @@ static int qmp_combo_dp_calibrate(struct phy *phy)
 static int qmp_combo_com_init(struct qmp_combo *qmp)
 {
 	const struct qmp_phy_cfg *cfg = qmp->cfg;
-	void __iomem *dp_com = qmp->dp_com;
+	void __iomem *com = qmp->com;
 	int ret;
 
 	mutex_lock(&qmp->phy_mutex);
@@ -1798,25 +1798,25 @@ static int qmp_combo_com_init(struct qmp_combo *qmp)
 	if (ret)
 		goto err_assert_reset;
 
-	qphy_setbits(dp_com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN);
+	qphy_setbits(com, QPHY_V3_DP_COM_POWER_DOWN_CTRL, SW_PWRDN);
 
 	/* override hardware control for reset of qmp phy */
-	qphy_setbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL,
+	qphy_setbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL,
 			SW_DPPHY_RESET_MUX | SW_DPPHY_RESET |
 			SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET);
 
 	/* Default type-c orientation, i.e CC1 */
-	qphy_setbits(dp_com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02);
+	qphy_setbits(com, QPHY_V3_DP_COM_TYPEC_CTRL, 0x02);
 
-	qphy_setbits(dp_com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE);
+	qphy_setbits(com, QPHY_V3_DP_COM_PHY_MODE_CTRL, USB3_MODE | DP_MODE);
 
 	/* bring both QMP USB and QMP DP PHYs PCS block out of reset */
-	qphy_clrbits(dp_com, QPHY_V3_DP_COM_RESET_OVRD_CTRL,
+	qphy_clrbits(com, QPHY_V3_DP_COM_RESET_OVRD_CTRL,
 			SW_DPPHY_RESET_MUX | SW_DPPHY_RESET |
 			SW_USB3PHY_RESET_MUX | SW_USB3PHY_RESET);
 
-	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
-	qphy_clrbits(dp_com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
+	qphy_clrbits(com, QPHY_V3_DP_COM_SWI_CTRL, 0x03);
+	qphy_clrbits(com, QPHY_V3_DP_COM_SW_RESET, SW_RESET);
 
 	qphy_setbits(qmp->pcs, cfg->regs[QPHY_PCS_POWER_DOWN_CONTROL],
 			SW_PWRDN);
@@ -2538,9 +2538,9 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node *
 	if (IS_ERR(qmp->serdes))
 		return PTR_ERR(qmp->serdes);
 
-	qmp->dp_com = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(qmp->dp_com))
-		return PTR_ERR(qmp->dp_com);
+	qmp->com = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(qmp->com))
+		return PTR_ERR(qmp->com);
 
 	qmp->dp_serdes = devm_platform_ioremap_resource(pdev, 2);
 	if (IS_ERR(qmp->dp_serdes))
-- 
GitLab


From 133836a7edf4e5783ab0caa669cdb94ab02b9b62 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:55 +0100
Subject: [PATCH 2714/4122] phy: qcom-qmp-combo: rename DP_PHY register pointer

The DP_PHY registers have erroneously been referred to as "PCS"
registers since DisplayPort support was added to the QMP drivers
(including in the devicetree binding).

Rename the corresponding pointer to match the register names.

Note that the repeated "dp" in the field name is intentional and this DP
register block is called "DP_PHY" (not just "PHY").

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-13-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 139 +++++++++++-----------
 1 file changed, 70 insertions(+), 69 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index 5777bd1f76b38..b82bd0a221d6f 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -878,7 +878,7 @@ struct qmp_combo {
 	void __iomem *dp_serdes;
 	void __iomem *dp_tx;
 	void __iomem *dp_tx2;
-	void __iomem *dp_pcs;
+	void __iomem *dp_dp_phy;
 
 	struct clk *pipe_clk;
 	struct clk_bulk_data *clks;
@@ -1252,20 +1252,20 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
 	writel(QSERDES_V3_COM_BIAS_EN | QSERDES_V3_COM_BIAS_EN_MUX |
 	       QSERDES_V3_COM_CLKBUF_L_EN | QSERDES_V3_COM_EN_SYSCLK_TX_SEL,
 	       qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
 
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_LANE_0_1_PWRDN |
 	       DP_PHY_PD_CTL_LANE_2_3_PWRDN | DP_PHY_PD_CTL_PLL_PWRDN |
 	       DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
 	writel(QSERDES_V3_COM_BIAS_EN |
 	       QSERDES_V3_COM_BIAS_EN_MUX | QSERDES_V3_COM_CLKBUF_R_EN |
@@ -1273,22 +1273,22 @@ static void qmp_v3_dp_aux_init(struct qmp_combo *qmp)
 	       QSERDES_V3_COM_CLKBUF_RX_DRIVE_L,
 	       qmp->dp_serdes + QSERDES_V3_COM_BIAS_EN_CLKBUFLR_EN);
 
-	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0x24, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xbb, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
+	writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0x24, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xbb, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9);
 	qmp->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qmp->dp_pcs + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
+	       qmp->dp_dp_phy + QSERDES_V3_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
 static int qmp_combo_configure_dp_swing(struct qmp_combo *qmp,
@@ -1372,12 +1372,12 @@ static bool qmp_combo_configure_dp_mode(struct qmp_combo *qmp)
 	 * if (lane_cnt == 4 || orientation == ORIENTATION_CC1)
 	 *	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
 	 * if (orientation == ORIENTATION_CC2)
-	 *	writel(0x4c, qmp->dp_pcs + QSERDES_V3_DP_PHY_MODE);
+	 *	writel(0x4c, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_MODE);
 	 */
 	val |= DP_PHY_PD_CTL_LANE_2_3_PWRDN;
-	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
-	writel(0x5c, qmp->dp_pcs + QSERDES_DP_PHY_MODE);
+	writel(0x5c, qmp->dp_dp_phy + QSERDES_DP_PHY_MODE);
 
 	return reverse;
 }
@@ -1390,8 +1390,8 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 
 	qmp_combo_configure_dp_mode(qmp);
 
-	writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qmp->dp_pcs + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1414,16 +1414,16 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V3_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V3_DP_PHY_VCO_DIV);
 
 	clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x04, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x04, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
 	writel(0x20, qmp->dp_serdes + QSERDES_V3_COM_RESETSM_CNTRL);
 
@@ -1434,20 +1434,20 @@ static int qmp_v3_configure_dp_phy(struct qmp_combo *qmp)
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
-	return readl_poll_timeout(qmp->dp_pcs + QSERDES_V3_DP_PHY_STATUS,
+	return readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V3_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1467,7 +1467,7 @@ static int qmp_v3_calibrate_dp_phy(struct qmp_combo *qmp)
 	qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
 	val = cfg1_settings[qmp->dp_aux_cfg];
 
-	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
@@ -1476,27 +1476,27 @@ static void qmp_v4_dp_aux_init(struct qmp_combo *qmp)
 {
 	writel(DP_PHY_PD_CTL_PWRDN | DP_PHY_PD_CTL_PSR_PWRDN | DP_PHY_PD_CTL_AUX_PWRDN |
 	       DP_PHY_PD_CTL_PLL_PWRDN | DP_PHY_PD_CTL_DP_CLAMP_EN,
-	       qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	       qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
 	/* Turn on BIAS current for PHY/PLL */
 	writel(0x17, qmp->dp_serdes + QSERDES_V4_COM_BIAS_EN_CLKBUFLR_EN);
 
-	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG0);
-	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
-	writel(0x00, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG3);
-	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG4);
-	writel(0x26, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG5);
-	writel(0x0a, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG6);
-	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG7);
-	writel(0xb7, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG8);
-	writel(0x03, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG9);
+	writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG0);
+	writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x00, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG3);
+	writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG4);
+	writel(0x26, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG5);
+	writel(0x0a, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG6);
+	writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG7);
+	writel(0xb7, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG8);
+	writel(0x03, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG9);
 	qmp->dp_aux_cfg = 0;
 
 	writel(PHY_AUX_STOP_ERR_MASK | PHY_AUX_DEC_ERR_MASK |
 	       PHY_AUX_SYNC_ERR_MASK | PHY_AUX_ALIGN_ERR_MASK |
 	       PHY_AUX_REQ_ERR_MASK,
-	       qmp->dp_pcs + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
+	       qmp->dp_dp_phy + QSERDES_V4_DP_PHY_AUX_INTERRUPT_MASK);
 }
 
 static void qmp_v4_configure_dp_tx(struct qmp_combo *qmp)
@@ -1518,15 +1518,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 	u32 phy_vco_div, status;
 	unsigned long pixel_freq;
 
-	writel(0x0f, qmp->dp_pcs + QSERDES_V4_DP_PHY_CFG_1);
+	writel(0x0f, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_CFG_1);
 
 	qmp_combo_configure_dp_mode(qmp);
 
-	writel(0x13, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
-	writel(0xa4, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG2);
+	writel(0x13, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1);
+	writel(0xa4, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG2);
 
-	writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
-	writel(0x05, qmp->dp_pcs + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX0_TX1_LANE_CTL);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_TX2_TX3_LANE_CTL);
 
 	switch (dp_opts->link_rate) {
 	case 1620:
@@ -1549,15 +1549,15 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 		/* Other link rates aren't supported */
 		return -EINVAL;
 	}
-	writel(phy_vco_div, qmp->dp_pcs + QSERDES_V4_DP_PHY_VCO_DIV);
+	writel(phy_vco_div, qmp->dp_dp_phy + QSERDES_V4_DP_PHY_VCO_DIV);
 
 	clk_set_rate(qmp->dp_link_hw.clk, dp_opts->link_rate * 100000);
 	clk_set_rate(qmp->dp_pixel_hw.clk, pixel_freq);
 
-	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x05, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x01, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
-	writel(0x09, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x05, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x01, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
+	writel(0x09, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
 	writel(0x20, qmp->dp_serdes + QSERDES_V4_COM_RESETSM_CNTRL);
 
@@ -1582,16 +1582,16 @@ static int qmp_v45_configure_dp_phy(struct qmp_combo *qmp)
 			10000))
 		return -ETIMEDOUT;
 
-	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(0)) > 0),
 			500,
 			10000))
 		return -ETIMEDOUT;
 
-	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1640,11 +1640,11 @@ static int qmp_v4_configure_dp_phy(struct qmp_combo *qmp)
 	writel(drvr1_en, qmp->dp_tx2 + QSERDES_V4_TX_HIGHZ_DRVR_EN);
 	writel(bias1_en, qmp->dp_tx2 + QSERDES_V4_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1697,11 +1697,11 @@ static int qmp_v5_configure_dp_phy(struct qmp_combo *qmp)
 	writel(drvr1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_HIGHZ_DRVR_EN);
 	writel(bias1_en, qmp->dp_tx2 + QSERDES_V5_5NM_TX_TRANSCEIVER_BIAS_EN);
 
-	writel(0x18, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x18, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 	udelay(2000);
-	writel(0x19, qmp->dp_pcs + QSERDES_DP_PHY_CFG);
+	writel(0x19, qmp->dp_dp_phy + QSERDES_DP_PHY_CFG);
 
-	if (readl_poll_timeout(qmp->dp_pcs + QSERDES_V4_DP_PHY_STATUS,
+	if (readl_poll_timeout(qmp->dp_dp_phy + QSERDES_V4_DP_PHY_STATUS,
 			status,
 			((status & BIT(1)) > 0),
 			500,
@@ -1733,7 +1733,7 @@ static int qmp_v4_calibrate_dp_phy(struct qmp_combo *qmp)
 	qmp->dp_aux_cfg %= ARRAY_SIZE(cfg1_settings);
 	val = cfg1_settings[qmp->dp_aux_cfg];
 
-	writel(val, qmp->dp_pcs + QSERDES_DP_PHY_AUX_CFG1);
+	writel(val, qmp->dp_dp_phy + QSERDES_DP_PHY_AUX_CFG1);
 
 	return 0;
 }
@@ -1906,7 +1906,7 @@ static int qmp_combo_dp_power_off(struct phy *phy)
 	struct qmp_combo *qmp = phy_get_drvdata(phy);
 
 	/* Assert DP PHY power down */
-	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_pcs + QSERDES_DP_PHY_PD_CTL);
+	writel(DP_PHY_PD_CTL_PSR_PWRDN, qmp->dp_dp_phy + QSERDES_DP_PHY_PD_CTL);
 
 	return 0;
 }
@@ -2463,15 +2463,16 @@ static int qmp_combo_parse_dt_lecacy_dp(struct qmp_combo *qmp, struct device_nod
 	 * Resources are indexed as: tx -> 0; rx -> 1; pcs -> 2;
 	 * tx2 -> 3; rx2 -> 4
 	 *
-	 * Note that only tx/tx2 and pcs are used by the DP implementation.
+	 * Note that only tx/tx2 and pcs (dp_phy) are used by the DP
+	 * implementation.
 	 */
 	qmp->dp_tx = devm_of_iomap(dev, np, 0, NULL);
 	if (IS_ERR(qmp->dp_tx))
 		return PTR_ERR(qmp->dp_tx);
 
-	qmp->dp_pcs = devm_of_iomap(dev, np, 2, NULL);
-	if (IS_ERR(qmp->dp_pcs))
-		return PTR_ERR(qmp->dp_pcs);
+	qmp->dp_dp_phy = devm_of_iomap(dev, np, 2, NULL);
+	if (IS_ERR(qmp->dp_dp_phy))
+		return PTR_ERR(qmp->dp_dp_phy);
 
 	qmp->dp_tx2 = devm_of_iomap(dev, np, 3, NULL);
 	if (IS_ERR(qmp->dp_tx2))
-- 
GitLab


From 83a0bbe39b1797cab47665efcf689f774b42af88 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 21 Nov 2022 09:50:56 +0100
Subject: [PATCH 2715/4122] phy: qcom-qmp-combo: add support for updated
 sc8280xp binding

Add support for the new SC8280XP binding.

Note that the binding does not try to describe every register subregion
and instead the driver holds the corresponding offsets.

Also note that (possibly) unlike on earlier platforms, the TX registers
are used by both the USB and DP implementation.

Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221121085058.31213-14-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-combo.c | 145 ++++++++++++++++++++--
 1 file changed, 134 insertions(+), 11 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
index b82bd0a221d6f..77052c66cf70b 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-combo.c
@@ -20,7 +20,7 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
-#include <dt-bindings/phy/phy.h>
+#include <dt-bindings/phy/phy-qcom-qmp.h>
 
 #include "phy-qcom-qmp.h"
 
@@ -798,7 +798,23 @@ static const u8 qmp_dp_v5_voltage_swing_hbr_rbr[4][4] = {
 
 struct qmp_combo;
 
+struct qmp_combo_offsets {
+	u16 com;
+	u16 txa;
+	u16 rxa;
+	u16 txb;
+	u16 rxb;
+	u16 usb3_serdes;
+	u16 usb3_pcs_misc;
+	u16 usb3_pcs;
+	u16 usb3_pcs_usb;
+	u16 dp_serdes;
+	u16 dp_dp_phy;
+};
+
 struct qmp_phy_cfg {
+	const struct qmp_combo_offsets *offsets;
+
 	/* Init sequence for PHY blocks - serdes, tx, rx, pcs */
 	const struct qmp_phy_init_tbl *serdes_tbl;
 	int serdes_tbl_num;
@@ -959,6 +975,20 @@ static const char * const sc7180_usb3phy_reset_l[] = {
 	"phy",
 };
 
+static const struct qmp_combo_offsets qmp_combo_offsets_v5 = {
+	.com		= 0x0000,
+	.txa		= 0x0400,
+	.rxa		= 0x0600,
+	.txb		= 0x0a00,
+	.rxb		= 0x0c00,
+	.usb3_serdes	= 0x1000,
+	.usb3_pcs_misc	= 0x1200,
+	.usb3_pcs	= 0x1400,
+	.usb3_pcs_usb	= 0x1700,
+	.dp_serdes	= 0x2000,
+	.dp_dp_phy	= 0x2200,
+};
+
 static const struct qmp_phy_cfg sc7180_usb3dpphy_cfg = {
 	.serdes_tbl		= qmp_v3_usb3_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(qmp_v3_usb3_serdes_tbl),
@@ -1098,6 +1128,8 @@ static const struct qmp_phy_cfg sc8180x_usb3dpphy_cfg = {
 };
 
 static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = {
+	.offsets		= &qmp_combo_offsets_v5,
+
 	.serdes_tbl		= sc8280xp_usb43dp_serdes_tbl,
 	.serdes_tbl_num		= ARRAY_SIZE(sc8280xp_usb43dp_serdes_tbl),
 	.tx_tbl			= sc8280xp_usb43dp_tx_tbl,
@@ -1138,7 +1170,6 @@ static const struct qmp_phy_cfg sc8280xp_usb43dpphy_cfg = {
 	.vreg_list		= qmp_phy_vreg_l,
 	.num_vregs		= ARRAY_SIZE(qmp_phy_vreg_l),
 	.regs			= qmp_v4_usb3phy_regs_layout,
-	.pcs_usb_offset		= 0x300,
 };
 
 static const struct qmp_phy_cfg sm8250_usb3dpphy_cfg = {
@@ -2421,6 +2452,22 @@ static int phy_dp_clks_register(struct qmp_combo *qmp, struct device_node *np)
 	return 0;
 }
 
+static struct clk_hw *qmp_combo_clk_hw_get(struct of_phandle_args *clkspec, void *data)
+{
+	struct qmp_combo *qmp = data;
+
+	switch (clkspec->args[0]) {
+	case QMP_USB43DP_USB3_PIPE_CLK:
+		return &qmp->pipe_clk_fixed.hw;
+	case QMP_USB43DP_DP_LINK_CLK:
+		return &qmp->dp_link_hw;
+	case QMP_USB43DP_DP_VCO_DIV_CLK:
+		return &qmp->dp_pixel_hw;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
 static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *usb_np,
 					struct device_node *dp_np)
 {
@@ -2434,6 +2481,15 @@ static int qmp_combo_register_clocks(struct qmp_combo *qmp, struct device_node *
 	if (ret)
 		return ret;
 
+	/*
+	 * Register a single provider for bindings without child nodes.
+	 */
+	if (usb_np == qmp->dev->of_node)
+		return devm_of_clk_add_hw_provider(qmp->dev, qmp_combo_clk_hw_get, qmp);
+
+	/*
+	 * Register multiple providers for legacy bindings with child nodes.
+	 */
 	ret = of_clk_add_hw_provider(usb_np, of_clk_hw_simple_get,
 					&qmp->pipe_clk_fixed.hw);
 	if (ret)
@@ -2558,6 +2614,63 @@ static int qmp_combo_parse_dt_legacy(struct qmp_combo *qmp, struct device_node *
 	return 0;
 }
 
+static int qmp_combo_parse_dt(struct qmp_combo *qmp)
+{
+	struct platform_device *pdev = to_platform_device(qmp->dev);
+	const struct qmp_phy_cfg *cfg = qmp->cfg;
+	const struct qmp_combo_offsets *offs = cfg->offsets;
+	struct device *dev = qmp->dev;
+	void __iomem *base;
+
+	if (!offs)
+		return -EINVAL;
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	qmp->com = base + offs->com;
+	qmp->tx = base + offs->txa;
+	qmp->rx = base + offs->rxa;
+	qmp->tx2 = base + offs->txb;
+	qmp->rx2 = base + offs->rxb;
+
+	qmp->serdes = base + offs->usb3_serdes;
+	qmp->pcs_misc = base + offs->usb3_pcs_misc;
+	qmp->pcs = base + offs->usb3_pcs;
+	qmp->pcs_usb = base + offs->usb3_pcs_usb;
+
+	qmp->dp_serdes = base + offs->dp_serdes;
+	qmp->dp_tx = base + offs->txa;
+	qmp->dp_tx2 = base + offs->txb;
+	qmp->dp_dp_phy = base + offs->dp_dp_phy;
+
+	qmp->pipe_clk = devm_clk_get(dev, "usb3_pipe");
+	if (IS_ERR(qmp->pipe_clk)) {
+		return dev_err_probe(dev, PTR_ERR(qmp->pipe_clk),
+				"failed to get usb3_pipe clock\n");
+	}
+
+	return 0;
+}
+
+static struct phy *qmp_combo_phy_xlate(struct device *dev, struct of_phandle_args *args)
+{
+	struct qmp_combo *qmp = dev_get_drvdata(dev);
+
+	if (args->args_count == 0)
+		return ERR_PTR(-EINVAL);
+
+	switch (args->args[0]) {
+	case QMP_USB43DP_USB3_PHY:
+		return qmp->usb_phy;
+	case QMP_USB43DP_DP_PHY:
+		return qmp->dp_phy;
+	}
+
+	return ERR_PTR(-EINVAL);
+}
+
 static int qmp_combo_probe(struct platform_device *pdev)
 {
 	struct qmp_combo *qmp;
@@ -2590,17 +2703,22 @@ static int qmp_combo_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
+	/* Check for legacy binding with child nodes. */
 	usb_np = of_get_child_by_name(dev->of_node, "usb3-phy");
-	if (!usb_np)
-		return -EINVAL;
+	if (usb_np) {
+		dp_np = of_get_child_by_name(dev->of_node, "dp-phy");
+		if (!dp_np) {
+			of_node_put(usb_np);
+			return -EINVAL;
+		}
 
-	dp_np = of_get_child_by_name(dev->of_node, "dp-phy");
-	if (!dp_np) {
-		of_node_put(usb_np);
-		return -EINVAL;
-	}
+		ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np);
+	} else {
+		usb_np = of_node_get(dev->of_node);
+		dp_np = of_node_get(dev->of_node);
 
-	ret = qmp_combo_parse_dt_legacy(qmp, usb_np, dp_np);
+		ret = qmp_combo_parse_dt(qmp);
+	}
 	if (ret)
 		goto err_node_put;
 
@@ -2636,7 +2754,12 @@ static int qmp_combo_probe(struct platform_device *pdev)
 
 	phy_set_drvdata(qmp->dp_phy, qmp);
 
-	phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
+	dev_set_drvdata(dev, qmp);
+
+	if (usb_np == dev->of_node)
+		phy_provider = devm_of_phy_provider_register(dev, qmp_combo_phy_xlate);
+	else
+		phy_provider = devm_of_phy_provider_register(dev, of_phy_simple_xlate);
 
 	of_node_put(usb_np);
 	of_node_put(dp_np);
-- 
GitLab


From 1446d03ec290760788b1868b5aa967383d86dd77 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 11 Nov 2022 10:38:55 +0100
Subject: [PATCH 2716/4122] dt-bindings: phy: qcom,sc8280xp-qmp-usb3-uni: drop
 reference-clock source

The source clock for the reference clock is not used by the PHY directly
and should not be included in the devicetree binding.

Fixes: e8e58e29a0c9 ("dt-bindings: phy: qcom,qmp-usb: fix sc8280xp binding")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20221111093857.11360-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml       | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
index ef080509747ae..16fce10382852 100644
--- a/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,sc8280xp-qmp-usb3-uni-phy.yaml
@@ -22,12 +22,11 @@ properties:
     maxItems: 1
 
   clocks:
-    maxItems: 5
+    maxItems: 4
 
   clock-names:
     items:
       - const: aux
-      - const: ref_clk_src
       - const: ref
       - const: com_aux
       - const: pipe
@@ -82,12 +81,10 @@ examples:
       reg = <0x088ef000 0x2000>;
 
       clocks = <&gcc GCC_USB3_MP_PHY_AUX_CLK>,
-               <&rpmhcc RPMH_CXO_CLK>,
                <&gcc GCC_USB3_MP0_CLKREF_CLK>,
                <&gcc GCC_USB3_MP_PHY_COM_AUX_CLK>,
                <&gcc GCC_USB3_MP_PHY_PIPE_0_CLK>;
-      clock-names = "aux", "ref_clk_src", "ref", "com_aux",
-                    "pipe";
+      clock-names = "aux", "ref", "com_aux", "pipe";
 
       power-domains = <&gcc USB30_MP_GDSC>;
 
-- 
GitLab


From 3b41b61a2fe4174ba43fdb599c9d6accd35ac179 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 11 Nov 2022 10:38:56 +0100
Subject: [PATCH 2717/4122] phy: qcom-qmp-usb: drop sc8280xp reference-clock
 source

The source clock for the reference clock is not used by the PHY directly
and should not be described by the devicetree (instead this relationship
should be modelled in the clock driver).

Drop the driver management of the reference-clock source for SC8280XP.

Once the other clock drivers have been updated, the corresponding change
can be done also for the other QMP v4 platforms.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20221111093857.11360-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 55029ea63f731..9fbad6b1d3ab1 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1523,6 +1523,10 @@ static const char * const qmp_v3_phy_clk_l[] = {
 };
 
 static const char * const qmp_v4_phy_clk_l[] = {
+	"aux", "ref", "com_aux",
+};
+
+static const char * const qmp_v4_ref_phy_clk_l[] = {
 	"aux", "ref_clk_src", "ref", "com_aux",
 };
 
@@ -1729,8 +1733,8 @@ static const struct qmp_phy_cfg sm8150_usb3phy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8150_usb3_pcs_tbl),
 	.pcs_usb_tbl		= sm8150_usb3_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8150_usb3_pcs_usb_tbl),
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.clk_list		= qmp_v4_ref_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_ref_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
@@ -1755,8 +1759,8 @@ static const struct qmp_phy_cfg sm8150_usb3_uniphy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8150_usb3_uniphy_pcs_tbl),
 	.pcs_usb_tbl		= sm8150_usb3_uniphy_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8150_usb3_uniphy_pcs_usb_tbl),
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.clk_list		= qmp_v4_ref_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_ref_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
@@ -1806,8 +1810,8 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8250_usb3_uniphy_pcs_tbl),
 	.pcs_usb_tbl		= sm8250_usb3_uniphy_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8250_usb3_uniphy_pcs_usb_tbl),
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.clk_list		= qmp_v4_ref_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_ref_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
@@ -1907,8 +1911,8 @@ static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
 	.pcs_tbl_num		= ARRAY_SIZE(sm8350_usb3_uniphy_pcs_tbl),
 	.pcs_usb_tbl		= sm8350_usb3_uniphy_pcs_usb_tbl,
 	.pcs_usb_tbl_num	= ARRAY_SIZE(sm8350_usb3_uniphy_pcs_usb_tbl),
-	.clk_list		= qmp_v4_phy_clk_l,
-	.num_clks		= ARRAY_SIZE(qmp_v4_phy_clk_l),
+	.clk_list		= qmp_v4_ref_phy_clk_l,
+	.num_clks		= ARRAY_SIZE(qmp_v4_ref_phy_clk_l),
 	.reset_list		= msm8996_usb3phy_reset_l,
 	.num_resets		= ARRAY_SIZE(msm8996_usb3phy_reset_l),
 	.vreg_list		= qmp_phy_vreg_l,
-- 
GitLab


From 905abf1229efd33aa57f3f65881c378770dfbb65 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 11 Nov 2022 10:42:37 +0100
Subject: [PATCH 2718/4122] phy: qcom-qmp: drop unused type header

The PHY type defines are no longer used in the PCIe, UFS and USB QMP
drivers so drop the corresponding include.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221111094239.11547-2-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c | 2 --
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c         | 2 --
 drivers/phy/qualcomm/phy-qcom-qmp-ufs.c          | 2 --
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c          | 2 --
 4 files changed, 8 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
index ff198d846fd2d..a088477e274f7 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie-msm8996.c
@@ -20,8 +20,6 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
-#include <dt-bindings/phy/phy.h>
-
 #include "phy-qcom-qmp.h"
 
 /* QPHY_SW_RESET bit */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 47cccc4b35b22..53dde20a4ce06 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -23,8 +23,6 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
-#include <dt-bindings/phy/phy.h>
-
 #include "phy-qcom-qmp.h"
 
 /* QPHY_SW_RESET bit */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
index 189103d1bd180..318eea35b9721 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-ufs.c
@@ -20,8 +20,6 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
-#include <dt-bindings/phy/phy.h>
-
 #include "phy-qcom-qmp.h"
 
 /* QPHY_SW_RESET bit */
diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 9fbad6b1d3ab1..64b9472a1a702 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -20,8 +20,6 @@
 #include <linux/reset.h>
 #include <linux/slab.h>
 
-#include <dt-bindings/phy/phy.h>
-
 #include "phy-qcom-qmp.h"
 
 /* QPHY_SW_RESET bit */
-- 
GitLab


From 64e1f12b2658c1abca55cffd9413f2d3c3bbfa8f Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 11 Nov 2022 10:42:38 +0100
Subject: [PATCH 2719/4122] phy: qcom-qmp-usb: drop redundant clock allocation

Since the QMP driver split, there is no reason to allocate the
fixed-rate pipe clock structure separately from the driver data.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221111094239.11547-3-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-usb.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
index 64b9472a1a702..4aa338fc4643c 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-usb.c
@@ -1485,6 +1485,8 @@ struct qmp_usb {
 	enum phy_mode mode;
 
 	struct phy *phy;
+
+	struct clk_fixed_rate pipe_clk_fixed;
 };
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
@@ -2357,7 +2359,7 @@ static void phy_clk_release_provider(void *res)
  */
 static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np)
 {
-	struct clk_fixed_rate *fixed;
+	struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed;
 	struct clk_init_data init = { };
 	int ret;
 
@@ -2367,10 +2369,6 @@ static int phy_pipe_clk_register(struct qmp_usb *qmp, struct device_node *np)
 		return ret;
 	}
 
-	fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL);
-	if (!fixed)
-		return -ENOMEM;
-
 	init.ops = &clk_fixed_rate_ops;
 
 	/* controllers using QMP phys use 125MHz pipe clock interface */
-- 
GitLab


From e8511f407b078330dfcca0c7200e72b7638b6e17 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 11 Nov 2022 10:42:39 +0100
Subject: [PATCH 2720/4122] phy: qcom-qmp-pcie: drop redundant clock allocation

Since the QMP driver split, there is no reason to allocate the
fixed-rate pipe clock structure separately from the driver data.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20221111094239.11547-4-johan+linaro@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/qualcomm/phy-qcom-qmp-pcie.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
index 53dde20a4ce06..1b136a87053f8 100644
--- a/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
+++ b/drivers/phy/qualcomm/phy-qcom-qmp-pcie.c
@@ -1550,6 +1550,8 @@ struct qmp_pcie {
 
 	struct phy *phy;
 	int mode;
+
+	struct clk_fixed_rate pipe_clk_fixed;
 };
 
 static inline void qphy_setbits(void __iomem *base, u32 offset, u32 val)
@@ -2416,7 +2418,7 @@ static void phy_clk_release_provider(void *res)
  */
 static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np)
 {
-	struct clk_fixed_rate *fixed;
+	struct clk_fixed_rate *fixed = &qmp->pipe_clk_fixed;
 	struct clk_init_data init = { };
 	int ret;
 
@@ -2426,10 +2428,6 @@ static int phy_pipe_clk_register(struct qmp_pcie *qmp, struct device_node *np)
 		return ret;
 	}
 
-	fixed = devm_kzalloc(qmp->dev, sizeof(*fixed), GFP_KERNEL);
-	if (!fixed)
-		return -ENOMEM;
-
 	init.ops = &clk_fixed_rate_ops;
 
 	/*
-- 
GitLab


From 5a0d2df462568486b85a88ed2c88ffbfa1645cd1 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:06 -0600
Subject: [PATCH 2721/4122] dt-bindings: sun6i-a31-mipi-dphy: Add the
 interrupts property

The sun6i DPHY can generate several interrupts, mostly for reporting
error conditions, but also for detecting BTA and UPLS sequences.
Document this capability in order to accurately describe the hardware.

The DPHY has no interrupt number provided in the vendor documentation
because its interrupt line is shared with the DSI controller.

A trivial interrupt handler was used to verify that interrupts were in
fact generated by the DPHY and not the DSI controller.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221114022113.31694-2-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml        | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
index dfb6a89935351..4c45d7fcc8536 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@@ -24,6 +24,9 @@ properties:
   reg:
     maxItems: 1
 
+  interrupts:
+    maxItems: 1
+
   clocks:
     items:
       - description: Bus Clock
@@ -53,6 +56,7 @@ required:
   - "#phy-cells"
   - compatible
   - reg
+  - interrupts
   - clocks
   - clock-names
   - resets
@@ -61,9 +65,12 @@ additionalProperties: false
 
 examples:
   - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
     dphy0: d-phy@1ca1000 {
         compatible = "allwinner,sun6i-a31-mipi-dphy";
         reg = <0x01ca1000 0x1000>;
+        interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
         clocks = <&ccu 23>, <&ccu 97>;
         clock-names = "bus", "mod";
         resets = <&ccu 4>;
-- 
GitLab


From e7a838694185c7d0965baa9ed2515f2e0ff8d502 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:09 -0600
Subject: [PATCH 2722/4122] dt-bindings: sun6i-a31-mipi-dphy: Add the A100 DPHY
 variant

A100 features an updated DPHY, which moves PLL control inside the DPHY
register space. (Previously PLL-MIPI was controlled from the CCU. This
does not affect the "clocks" property because the link between PLL-MIPI
and the DPHY was never represented in the devicetree.) It also requires
a modified analog power-on sequence. Finally, the new DPHY adds support
for operating as an LVDS PHY. D1 uses this same variant.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221114022113.31694-5-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 .../bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml           | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
index 4c45d7fcc8536..fe9702e7bdd8a 100644
--- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@@ -17,9 +17,13 @@ properties:
   compatible:
     oneOf:
       - const: allwinner,sun6i-a31-mipi-dphy
+      - const: allwinner,sun50i-a100-mipi-dphy
       - items:
           - const: allwinner,sun50i-a64-mipi-dphy
           - const: allwinner,sun6i-a31-mipi-dphy
+      - items:
+          - const: allwinner,sun20i-d1-mipi-dphy
+          - const: allwinner,sun50i-a100-mipi-dphy
 
   reg:
     maxItems: 1
-- 
GitLab


From a709ae51e22802822de85ec7b672cf1cc5412fc0 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:10 -0600
Subject: [PATCH 2723/4122] phy: allwinner: phy-sun6i-mipi-dphy: Make RX
 support optional

While all variants of the DPHY likely support RX mode, the new variant
in the A100 is not used in this direction by the BSP, and it has some
analog register changes, so its RX power-on sequence is unknown. To be
safe, limit RX support to variants where the power-on sequence is known.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221114022113.31694-6-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 26 +++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
index 3900f16508515..7d7322670a83d 100644
--- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
+++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
@@ -114,6 +114,10 @@ enum sun6i_dphy_direction {
 	SUN6I_DPHY_DIRECTION_RX,
 };
 
+struct sun6i_dphy_variant {
+	bool	rx_supported;
+};
+
 struct sun6i_dphy {
 	struct clk				*bus_clk;
 	struct clk				*mod_clk;
@@ -123,6 +127,7 @@ struct sun6i_dphy {
 	struct phy				*phy;
 	struct phy_configure_opts_mipi_dphy	config;
 
+	const struct sun6i_dphy_variant		*variant;
 	enum sun6i_dphy_direction		direction;
 };
 
@@ -409,6 +414,10 @@ static int sun6i_dphy_probe(struct platform_device *pdev)
 	if (!dphy)
 		return -ENOMEM;
 
+	dphy->variant = device_get_match_data(&pdev->dev);
+	if (!dphy->variant)
+		return -EINVAL;
+
 	regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(regs)) {
 		dev_err(&pdev->dev, "Couldn't map the DPHY encoder registers\n");
@@ -445,8 +454,14 @@ static int sun6i_dphy_probe(struct platform_device *pdev)
 	ret = of_property_read_string(pdev->dev.of_node, "allwinner,direction",
 				      &direction);
 
-	if (!ret && !strncmp(direction, "rx", 2))
+	if (!ret && !strncmp(direction, "rx", 2)) {
+		if (!dphy->variant->rx_supported) {
+			dev_err(&pdev->dev, "RX not supported on this variant\n");
+			return -EOPNOTSUPP;
+		}
+
 		dphy->direction = SUN6I_DPHY_DIRECTION_RX;
+	}
 
 	phy_set_drvdata(dphy->phy, dphy);
 	phy_provider = devm_of_phy_provider_register(&pdev->dev, of_phy_simple_xlate);
@@ -454,8 +469,15 @@ static int sun6i_dphy_probe(struct platform_device *pdev)
 	return PTR_ERR_OR_ZERO(phy_provider);
 }
 
+static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = {
+	.rx_supported	= true,
+};
+
 static const struct of_device_id sun6i_dphy_of_table[] = {
-	{ .compatible = "allwinner,sun6i-a31-mipi-dphy" },
+	{
+		.compatible	= "allwinner,sun6i-a31-mipi-dphy",
+		.data		= &sun6i_a31_mipi_dphy_variant,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table);
-- 
GitLab


From cb7f49a31597066b25c9bc6a0bf0781454dd4d2b Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:11 -0600
Subject: [PATCH 2724/4122] phy: allwinner: phy-sun6i-mipi-dphy: Set the enable
 bit last

The A100 variant of the DPHY requires configuring the analog registers
before setting the global enable bit. Since this order also works on the
other variants, always use it, to minimize the differences between them.

Reviewed-by: Paul Kocialkowski <paul.kocialkowski@bootlin.com>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221114022113.31694-7-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
index 7d7322670a83d..a2afedc399fda 100644
--- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
+++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
@@ -183,10 +183,6 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
 		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) |
 		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3));
 
-	regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG,
-		     SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) |
-		     SUN6I_DPHY_GCTL_EN);
-
 	regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG,
 		     SUN6I_DPHY_ANA0_REG_PWS |
 		     SUN6I_DPHY_ANA0_REG_DMPC |
@@ -244,6 +240,10 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
 			   SUN6I_DPHY_ANA2_EN_P2S_CPU_MASK,
 			   SUN6I_DPHY_ANA2_EN_P2S_CPU(lanes_mask));
 
+	regmap_write(dphy->regs, SUN6I_DPHY_GCTL_REG,
+		     SUN6I_DPHY_GCTL_LANE_NUM(dphy->config.lanes) |
+		     SUN6I_DPHY_GCTL_EN);
+
 	return 0;
 }
 
-- 
GitLab


From 3fd490a7197857dc3aa409e56e31deaeab097c5f Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:12 -0600
Subject: [PATCH 2725/4122] phy: allwinner: phy-sun6i-mipi-dphy: Add a variant
 power-on hook

The A100 variant uses the same values for the timing registers, and it
uses the same final power-on sequence, but it needs a different analog
register configuration in the middle. Support this by moving the
variant-specific parts to a hook provided by the variant.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221114022113.31694-8-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 59 ++++++++++++---------
 1 file changed, 35 insertions(+), 24 deletions(-)

diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
index a2afedc399fda..ac144ee0f0a62 100644
--- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
+++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
@@ -114,7 +114,10 @@ enum sun6i_dphy_direction {
 	SUN6I_DPHY_DIRECTION_RX,
 };
 
+struct sun6i_dphy;
+
 struct sun6i_dphy_variant {
+	void	(*tx_power_on)(struct sun6i_dphy *dphy);
 	bool	rx_supported;
 };
 
@@ -156,33 +159,10 @@ static int sun6i_dphy_configure(struct phy *phy, union phy_configure_opts *opts)
 	return 0;
 }
 
-static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
+static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy)
 {
 	u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0);
 
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG,
-		     SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG,
-		     SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) |
-		     SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) |
-		     SUN6I_DPHY_TX_TIME0_HS_TRAIL(10));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG,
-		     SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) |
-		     SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) |
-		     SUN6I_DPHY_TX_TIME1_CLK_PRE(3) |
-		     SUN6I_DPHY_TX_TIME1_CLK_POST(10));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG,
-		     SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30));
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0);
-
-	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG,
-		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) |
-		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3));
-
 	regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG,
 		     SUN6I_DPHY_ANA0_REG_PWS |
 		     SUN6I_DPHY_ANA0_REG_DMPC |
@@ -214,6 +194,36 @@ static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
 		     SUN6I_DPHY_ANA3_EN_LDOC |
 		     SUN6I_DPHY_ANA3_EN_LDOD);
 	udelay(1);
+}
+
+static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
+{
+	u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_CTL_REG,
+		     SUN6I_DPHY_TX_CTL_HS_TX_CLK_CONT);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME0_REG,
+		     SUN6I_DPHY_TX_TIME0_LP_CLK_DIV(14) |
+		     SUN6I_DPHY_TX_TIME0_HS_PREPARE(6) |
+		     SUN6I_DPHY_TX_TIME0_HS_TRAIL(10));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME1_REG,
+		     SUN6I_DPHY_TX_TIME1_CLK_PREPARE(7) |
+		     SUN6I_DPHY_TX_TIME1_CLK_ZERO(50) |
+		     SUN6I_DPHY_TX_TIME1_CLK_PRE(3) |
+		     SUN6I_DPHY_TX_TIME1_CLK_POST(10));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME2_REG,
+		     SUN6I_DPHY_TX_TIME2_CLK_TRAIL(30));
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME3_REG, 0);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_TX_TIME4_REG,
+		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA0(3) |
+		     SUN6I_DPHY_TX_TIME4_HS_TX_ANA1(3));
+
+	dphy->variant->tx_power_on(dphy);
 
 	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA3_REG,
 			   SUN6I_DPHY_ANA3_EN_VTTC |
@@ -470,6 +480,7 @@ static int sun6i_dphy_probe(struct platform_device *pdev)
 }
 
 static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = {
+	.tx_power_on	= sun6i_a31_mipi_dphy_tx_power_on,
 	.rx_supported	= true,
 };
 
-- 
GitLab


From 4d0c2165e64eec00c19b68b1abc83e57e9633db9 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sun, 13 Nov 2022 20:21:13 -0600
Subject: [PATCH 2726/4122] phy: allwinner: phy-sun6i-mipi-dphy: Add the A100
 DPHY variant

A100 features an updated DPHY, which moves PLL control inside the DPHY
register space (previously the PLL was controlled from the CCU). It also
requires a modified analog power-on sequence. This "combo PHY" can also
be used as an LVDS PHY, but that is not yet supported by the driver.

Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221114022113.31694-9-samuel@sholland.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/allwinner/phy-sun6i-mipi-dphy.c | 143 +++++++++++++++++++-
 1 file changed, 142 insertions(+), 1 deletion(-)

diff --git a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
index ac144ee0f0a62..36eab95271b2d 100644
--- a/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
+++ b/drivers/phy/allwinner/phy-sun6i-mipi-dphy.c
@@ -70,11 +70,19 @@
 
 #define SUN6I_DPHY_ANA0_REG		0x4c
 #define SUN6I_DPHY_ANA0_REG_PWS			BIT(31)
+#define SUN6I_DPHY_ANA0_REG_PWEND		BIT(30)
+#define SUN6I_DPHY_ANA0_REG_PWENC		BIT(29)
 #define SUN6I_DPHY_ANA0_REG_DMPC		BIT(28)
 #define SUN6I_DPHY_ANA0_REG_DMPD(n)		(((n) & 0xf) << 24)
+#define SUN6I_DPHY_ANA0_REG_SRXDT(n)		(((n) & 0xf) << 20)
+#define SUN6I_DPHY_ANA0_REG_SRXCK(n)		(((n) & 0xf) << 16)
+#define SUN6I_DPHY_ANA0_REG_SDIV2		BIT(15)
 #define SUN6I_DPHY_ANA0_REG_SLV(n)		(((n) & 7) << 12)
 #define SUN6I_DPHY_ANA0_REG_DEN(n)		(((n) & 0xf) << 8)
+#define SUN6I_DPHY_ANA0_REG_PLR(n)		(((n) & 0xf) << 4)
 #define SUN6I_DPHY_ANA0_REG_SFB(n)		(((n) & 3) << 2)
+#define SUN6I_DPHY_ANA0_REG_RSD			BIT(1)
+#define SUN6I_DPHY_ANA0_REG_SELSCK		BIT(0)
 
 #define SUN6I_DPHY_ANA1_REG		0x50
 #define SUN6I_DPHY_ANA1_REG_VTTMODE		BIT(31)
@@ -97,8 +105,13 @@
 #define SUN6I_DPHY_ANA3_EN_LDOR			BIT(18)
 
 #define SUN6I_DPHY_ANA4_REG		0x5c
+#define SUN6I_DPHY_ANA4_REG_EN_MIPI		BIT(31)
+#define SUN6I_DPHY_ANA4_REG_EN_COMTEST		BIT(30)
+#define SUN6I_DPHY_ANA4_REG_COMTEST(n)		(((n) & 3) << 28)
+#define SUN6I_DPHY_ANA4_REG_IB(n)		(((n) & 3) << 25)
 #define SUN6I_DPHY_ANA4_REG_DMPLVC		BIT(24)
 #define SUN6I_DPHY_ANA4_REG_DMPLVD(n)		(((n) & 0xf) << 20)
+#define SUN6I_DPHY_ANA4_REG_VTT_SET(n)		(((n) & 0x7) << 17)
 #define SUN6I_DPHY_ANA4_REG_CKDV(n)		(((n) & 0x1f) << 12)
 #define SUN6I_DPHY_ANA4_REG_TMSC(n)		(((n) & 3) << 10)
 #define SUN6I_DPHY_ANA4_REG_TMSD(n)		(((n) & 3) << 8)
@@ -109,6 +122,56 @@
 
 #define SUN6I_DPHY_DBG5_REG		0xf4
 
+#define SUN50I_DPHY_TX_SLEW_REG0	0xf8
+#define SUN50I_DPHY_TX_SLEW_REG1	0xfc
+#define SUN50I_DPHY_TX_SLEW_REG2	0x100
+
+#define SUN50I_DPHY_PLL_REG0		0x104
+#define SUN50I_DPHY_PLL_REG0_CP36_EN		BIT(23)
+#define SUN50I_DPHY_PLL_REG0_LDO_EN		BIT(22)
+#define SUN50I_DPHY_PLL_REG0_EN_LVS		BIT(21)
+#define SUN50I_DPHY_PLL_REG0_PLL_EN		BIT(20)
+#define SUN50I_DPHY_PLL_REG0_P(n)		(((n) & 0xf) << 16)
+#define SUN50I_DPHY_PLL_REG0_N(n)		(((n) & 0xff) << 8)
+#define SUN50I_DPHY_PLL_REG0_NDET		BIT(7)
+#define SUN50I_DPHY_PLL_REG0_TDIV		BIT(6)
+#define SUN50I_DPHY_PLL_REG0_M0(n)		(((n) & 3) << 4)
+#define SUN50I_DPHY_PLL_REG0_M1(n)		((n) & 0xf)
+
+#define SUN50I_DPHY_PLL_REG1		0x108
+#define SUN50I_DPHY_PLL_REG1_UNLOCK_MDSEL(n)	(((n) & 3) << 14)
+#define SUN50I_DPHY_PLL_REG1_LOCKMDSEL		BIT(13)
+#define SUN50I_DPHY_PLL_REG1_LOCKDET_EN		BIT(12)
+#define SUN50I_DPHY_PLL_REG1_VSETA(n)		(((n) & 0x7) << 9)
+#define SUN50I_DPHY_PLL_REG1_VSETD(n)		(((n) & 0x7) << 6)
+#define SUN50I_DPHY_PLL_REG1_LPF_SW		BIT(5)
+#define SUN50I_DPHY_PLL_REG1_ICP_SEL(n)		(((n) & 3) << 3)
+#define SUN50I_DPHY_PLL_REG1_ATEST_SEL(n)	(((n) & 3) << 1)
+#define SUN50I_DPHY_PLL_REG1_TEST_EN		BIT(0)
+
+#define SUN50I_DPHY_PLL_REG2		0x10c
+#define SUN50I_DPHY_PLL_REG2_SDM_EN		BIT(31)
+#define SUN50I_DPHY_PLL_REG2_FF_EN		BIT(30)
+#define SUN50I_DPHY_PLL_REG2_SS_EN		BIT(29)
+#define SUN50I_DPHY_PLL_REG2_SS_FRAC(n)		(((n) & 0x1ff) << 20)
+#define SUN50I_DPHY_PLL_REG2_SS_INT(n)		(((n) & 0xff) << 12)
+#define SUN50I_DPHY_PLL_REG2_FRAC(n)		((n) & 0xfff)
+
+#define SUN50I_COMBO_PHY_REG0		0x110
+#define SUN50I_COMBO_PHY_REG0_EN_TEST_COMBOLDO	BIT(5)
+#define SUN50I_COMBO_PHY_REG0_EN_TEST_0P8	BIT(4)
+#define SUN50I_COMBO_PHY_REG0_EN_MIPI		BIT(3)
+#define SUN50I_COMBO_PHY_REG0_EN_LVDS		BIT(2)
+#define SUN50I_COMBO_PHY_REG0_EN_COMBOLDO	BIT(1)
+#define SUN50I_COMBO_PHY_REG0_EN_CP		BIT(0)
+
+#define SUN50I_COMBO_PHY_REG1		0x114
+#define SUN50I_COMBO_PHY_REG2_REG_VREF1P6(n)	(((n) & 0x7) << 4)
+#define SUN50I_COMBO_PHY_REG2_REG_VREF0P8(n)	((n) & 0x7)
+
+#define SUN50I_COMBO_PHY_REG2		0x118
+#define SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(n)	((n) & 0xff)
+
 enum sun6i_dphy_direction {
 	SUN6I_DPHY_DIRECTION_TX,
 	SUN6I_DPHY_DIRECTION_RX,
@@ -196,6 +259,76 @@ static void sun6i_a31_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy)
 	udelay(1);
 }
 
+static void sun50i_a100_mipi_dphy_tx_power_on(struct sun6i_dphy *dphy)
+{
+	unsigned long mipi_symbol_rate = dphy->config.hs_clk_rate;
+	unsigned int div, n;
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA4_REG,
+		     SUN6I_DPHY_ANA4_REG_IB(2) |
+		     SUN6I_DPHY_ANA4_REG_DMPLVD(4) |
+		     SUN6I_DPHY_ANA4_REG_VTT_SET(3) |
+		     SUN6I_DPHY_ANA4_REG_CKDV(3) |
+		     SUN6I_DPHY_ANA4_REG_TMSD(1) |
+		     SUN6I_DPHY_ANA4_REG_TMSC(1) |
+		     SUN6I_DPHY_ANA4_REG_TXPUSD(2) |
+		     SUN6I_DPHY_ANA4_REG_TXPUSC(3) |
+		     SUN6I_DPHY_ANA4_REG_TXDNSD(2) |
+		     SUN6I_DPHY_ANA4_REG_TXDNSC(3));
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
+			   SUN6I_DPHY_ANA2_EN_CK_CPU,
+			   SUN6I_DPHY_ANA2_EN_CK_CPU);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA2_REG,
+			   SUN6I_DPHY_ANA2_REG_ENIB,
+			   SUN6I_DPHY_ANA2_REG_ENIB);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA3_REG,
+		     SUN6I_DPHY_ANA3_EN_LDOR |
+		     SUN6I_DPHY_ANA3_EN_LDOC |
+		     SUN6I_DPHY_ANA3_EN_LDOD);
+
+	regmap_write(dphy->regs, SUN6I_DPHY_ANA0_REG,
+		     SUN6I_DPHY_ANA0_REG_PLR(4) |
+		     SUN6I_DPHY_ANA0_REG_SFB(1));
+
+	regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG0,
+		     SUN50I_COMBO_PHY_REG0_EN_CP);
+
+	/* Choose a divider to limit the VCO frequency to around 2 GHz. */
+	div = 16 >> order_base_2(DIV_ROUND_UP(mipi_symbol_rate, 264000000));
+	n = mipi_symbol_rate * div / 24000000;
+
+	regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG0,
+		     SUN50I_DPHY_PLL_REG0_CP36_EN |
+		     SUN50I_DPHY_PLL_REG0_LDO_EN |
+		     SUN50I_DPHY_PLL_REG0_EN_LVS |
+		     SUN50I_DPHY_PLL_REG0_PLL_EN |
+		     SUN50I_DPHY_PLL_REG0_NDET |
+		     SUN50I_DPHY_PLL_REG0_P((div - 1) % 8) |
+		     SUN50I_DPHY_PLL_REG0_N(n) |
+		     SUN50I_DPHY_PLL_REG0_M0((div - 1) / 8) |
+		     SUN50I_DPHY_PLL_REG0_M1(2));
+
+	/* Disable sigma-delta modulation. */
+	regmap_write(dphy->regs, SUN50I_DPHY_PLL_REG2, 0);
+
+	regmap_update_bits(dphy->regs, SUN6I_DPHY_ANA4_REG,
+			   SUN6I_DPHY_ANA4_REG_EN_MIPI,
+			   SUN6I_DPHY_ANA4_REG_EN_MIPI);
+
+	regmap_update_bits(dphy->regs, SUN50I_COMBO_PHY_REG0,
+			   SUN50I_COMBO_PHY_REG0_EN_MIPI |
+			   SUN50I_COMBO_PHY_REG0_EN_COMBOLDO,
+			   SUN50I_COMBO_PHY_REG0_EN_MIPI |
+			   SUN50I_COMBO_PHY_REG0_EN_COMBOLDO);
+
+	regmap_write(dphy->regs, SUN50I_COMBO_PHY_REG2,
+		     SUN50I_COMBO_PHY_REG2_HS_STOP_DLY(20));
+	udelay(1);
+}
+
 static int sun6i_dphy_tx_power_on(struct sun6i_dphy *dphy)
 {
 	u8 lanes_mask = GENMASK(dphy->config.lanes - 1, 0);
@@ -408,7 +541,7 @@ static const struct regmap_config sun6i_dphy_regmap_config = {
 	.reg_bits	= 32,
 	.val_bits	= 32,
 	.reg_stride	= 4,
-	.max_register	= SUN6I_DPHY_DBG5_REG,
+	.max_register	= SUN50I_COMBO_PHY_REG2,
 	.name		= "mipi-dphy",
 };
 
@@ -484,11 +617,19 @@ static const struct sun6i_dphy_variant sun6i_a31_mipi_dphy_variant = {
 	.rx_supported	= true,
 };
 
+static const struct sun6i_dphy_variant sun50i_a100_mipi_dphy_variant = {
+	.tx_power_on	= sun50i_a100_mipi_dphy_tx_power_on,
+};
+
 static const struct of_device_id sun6i_dphy_of_table[] = {
 	{
 		.compatible	= "allwinner,sun6i-a31-mipi-dphy",
 		.data		= &sun6i_a31_mipi_dphy_variant,
 	},
+	{
+		.compatible	= "allwinner,sun50i-a100-mipi-dphy",
+		.data		= &sun50i_a100_mipi_dphy_variant,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun6i_dphy_of_table);
-- 
GitLab


From b53e19799d45edf7ca7cfd0cf5d6fb2d5179edec Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Thu, 17 Nov 2022 14:11:43 +0800
Subject: [PATCH 2727/4122] phy: use devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/202211171411439883050@zte.com.cn
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/marvell/phy-mmp3-hsic.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/phy/marvell/phy-mmp3-hsic.c b/drivers/phy/marvell/phy-mmp3-hsic.c
index 7cccf01848d8d..f2537fdcc3ab4 100644
--- a/drivers/phy/marvell/phy-mmp3-hsic.c
+++ b/drivers/phy/marvell/phy-mmp3-hsic.c
@@ -41,12 +41,10 @@ static int mmp3_hsic_phy_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct phy_provider *provider;
-	struct resource *resource;
 	void __iomem *base;
 	struct phy *phy;
 
-	resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(dev, resource);
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-- 
GitLab


From f12faa3be8e84fa9232a4654bccb30f46bbfee5c Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Tue, 22 Nov 2022 19:24:12 -0800
Subject: [PATCH 2728/4122] dt-bindings: phy-j721e-wiz: add j721s2 compatible
 string

Add ti,j721s2-wiz-10g compatible string to binding documentation.

Signed-off-by: Matt Ranostay <mranostay@ti.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221123032413.1193961-2-mranostay@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
index a9e38739c0100..c54b36c104abe 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
@@ -15,6 +15,7 @@ properties:
     enum:
       - ti,j721e-wiz-16g
       - ti,j721e-wiz-10g
+      - ti,j721s2-wiz-10g
       - ti,am64-wiz-10g
       - ti,j7200-wiz-10g
       - ti,j784s4-wiz-10g
-- 
GitLab


From bea3ce759b4664f20f1f57c53fe018c3b67da147 Mon Sep 17 00:00:00 2001
From: Matt Ranostay <mranostay@ti.com>
Date: Tue, 22 Nov 2022 19:24:13 -0800
Subject: [PATCH 2729/4122] phy: ti: phy-j721e-wiz: add j721s2-wiz-10g module
 support

Add support for j721s2-wiz-10g device which is similar to j721e-wiz-10g but
uses clock-names interface versus explicitly defining clock nodes within
device tree node.

Signed-off-by: Matt Ranostay <mranostay@ti.com>
Reviewed-by: Roger Quadros <rogerq@kernel.org>
Link: https://lore.kernel.org/r/20221123032413.1193961-3-mranostay@ti.com
Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/phy/ti/phy-j721e-wiz.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/drivers/phy/ti/phy-j721e-wiz.c b/drivers/phy/ti/phy-j721e-wiz.c
index 141b51af4427a..ddce5ef7711c6 100644
--- a/drivers/phy/ti/phy-j721e-wiz.c
+++ b/drivers/phy/ti/phy-j721e-wiz.c
@@ -322,6 +322,7 @@ enum wiz_type {
 	AM64_WIZ_10G,
 	J7200_WIZ_10G,  /* J7200 SR2.0 */
 	J784S4_WIZ_10G,
+	J721S2_WIZ_10G,
 };
 
 struct wiz_data {
@@ -1000,6 +1001,7 @@ static void wiz_clock_cleanup(struct wiz *wiz, struct device_node *node)
 	case AM64_WIZ_10G:
 	case J7200_WIZ_10G:
 	case J784S4_WIZ_10G:
+	case J721S2_WIZ_10G:
 		of_clk_del_provider(dev->of_node);
 		return;
 	default:
@@ -1132,6 +1134,7 @@ static int wiz_clock_init(struct wiz *wiz, struct device_node *node)
 	case AM64_WIZ_10G:
 	case J7200_WIZ_10G:
 	case J784S4_WIZ_10G:
+	case J721S2_WIZ_10G:
 		ret = wiz_clock_register(wiz);
 		if (ret)
 			dev_err(dev, "Failed to register wiz clocks\n");
@@ -1214,6 +1217,7 @@ static int wiz_phy_fullrt_div(struct wiz *wiz, int lane)
 		break;
 	case J721E_WIZ_10G:
 	case J7200_WIZ_10G:
+	case J721S2_WIZ_10G:
 		if (wiz->lane_phy_type[lane] == PHY_TYPE_SGMII)
 			return regmap_field_write(wiz->p0_fullrt_div[lane], 0x2);
 		break;
@@ -1318,6 +1322,15 @@ static struct wiz_data j784s4_10g_data = {
 	.clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G,
 };
 
+static struct wiz_data j721s2_10g_data = {
+	.type = J721S2_WIZ_10G,
+	.pll0_refclk_mux_sel = &pll0_refclk_mux_sel,
+	.pll1_refclk_mux_sel = &pll1_refclk_mux_sel,
+	.refclk_dig_sel = &refclk_dig_sel_10g,
+	.clk_mux_sel = clk_mux_sel_10g,
+	.clk_div_sel_num = WIZ_DIV_NUM_CLOCKS_10G,
+};
+
 static const struct of_device_id wiz_id_table[] = {
 	{
 		.compatible = "ti,j721e-wiz-16g", .data = &j721e_16g_data,
@@ -1334,6 +1347,9 @@ static const struct of_device_id wiz_id_table[] = {
 	{
 		.compatible = "ti,j784s4-wiz-10g", .data = &j784s4_10g_data,
 	},
+	{
+		.compatible = "ti,j721s2-wiz-10g", .data = &j721s2_10g_data,
+	},
 	{}
 };
 MODULE_DEVICE_TABLE(of, wiz_id_table);
-- 
GitLab


From b4d46c57d2fb0fa2611fa2ffbaf715925989f83f Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Thu, 24 Nov 2022 19:49:33 +0800
Subject: [PATCH 2730/4122] RDMA/erdma: Fix a typo in annotation

A non-ASCII character was wrongly put in a comment, use the ACSII version.

Fixes: bee85e0e31ec ("RDMA/erdma: Add main include file")
Link: https://lore.kernel.org/r/20221124114933.77250-1-chengyou@linux.alibaba.com
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index bb23d897c7108..35726f25a989c 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -219,7 +219,7 @@ struct erdma_dev {
 	DECLARE_BITMAP(sdb_page, ERDMA_DWQE_TYPE0_CNT);
 	/*
 	 * We provide max 496 uContexts that each has one SQ normal Db,
-	 * and one directWQE db。
+	 * and one directWQE db.
 	 */
 	DECLARE_BITMAP(sdb_entry, ERDMA_DWQE_TYPE1_CNT);
 
-- 
GitLab


From 35765dccaf3485575a4420da529c72484c980345 Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Wed, 16 Nov 2022 10:31:05 +0800
Subject: [PATCH 2731/4122] RDMA/erdma: Add a workqueue for WRs reflushing

ERDMA driver use a workqueue for asynchronous reflush command posting.
Implement the lifecycle of this workqueue.

Link: https://lore.kernel.org/r/20221116023107.82835-2-chengyou@linux.alibaba.com
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma.h      |  1 +
 drivers/infiniband/hw/erdma/erdma_main.c | 14 ++++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma.h b/drivers/infiniband/hw/erdma/erdma.h
index 35726f25a989c..3d8c11aa23a26 100644
--- a/drivers/infiniband/hw/erdma/erdma.h
+++ b/drivers/infiniband/hw/erdma/erdma.h
@@ -190,6 +190,7 @@ struct erdma_dev {
 	struct net_device *netdev;
 	struct pci_dev *pdev;
 	struct notifier_block netdev_nb;
+	struct workqueue_struct *reflush_wq;
 
 	resource_size_t func_bar_addr;
 	resource_size_t func_bar_len;
diff --git a/drivers/infiniband/hw/erdma/erdma_main.c b/drivers/infiniband/hw/erdma/erdma_main.c
index e44b06fea5957..5dc31e5df5cba 100644
--- a/drivers/infiniband/hw/erdma/erdma_main.c
+++ b/drivers/infiniband/hw/erdma/erdma_main.c
@@ -521,13 +521,22 @@ static int erdma_ib_device_add(struct pci_dev *pdev)
 
 	u64_to_ether_addr(mac, dev->attrs.peer_addr);
 
+	dev->reflush_wq = alloc_workqueue("erdma-reflush-wq", WQ_UNBOUND,
+					  WQ_UNBOUND_MAX_ACTIVE);
+	if (!dev->reflush_wq) {
+		ret = -ENOMEM;
+		goto err_alloc_workqueue;
+	}
+
 	ret = erdma_device_register(dev);
 	if (ret)
-		goto err_out;
+		goto err_register;
 
 	return 0;
 
-err_out:
+err_register:
+	destroy_workqueue(dev->reflush_wq);
+err_alloc_workqueue:
 	xa_destroy(&dev->qp_xa);
 	xa_destroy(&dev->cq_xa);
 
@@ -543,6 +552,7 @@ static void erdma_ib_device_remove(struct pci_dev *pdev)
 	unregister_netdevice_notifier(&dev->netdev_nb);
 	ib_unregister_device(&dev->ibdev);
 
+	destroy_workqueue(dev->reflush_wq);
 	erdma_res_cb_free(dev);
 	xa_destroy(&dev->qp_xa);
 	xa_destroy(&dev->cq_xa);
-- 
GitLab


From 54d8fffc2a500953ba90ff9462ae06bb05ca2354 Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Wed, 16 Nov 2022 10:31:06 +0800
Subject: [PATCH 2732/4122] RDMA/erdma: Implement the lifecycle of reflushing
 work for each QP

Each QP has a work for reflushing purpose. In the work, driver will report
the latest pi to hardware.

Link: https://lore.kernel.org/r/20221116023107.82835-3-chengyou@linux.alibaba.com
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma_hw.h    |  8 ++++++++
 drivers/infiniband/hw/erdma/erdma_verbs.c | 18 ++++++++++++++++++
 drivers/infiniband/hw/erdma/erdma_verbs.h |  2 ++
 3 files changed, 28 insertions(+)

diff --git a/drivers/infiniband/hw/erdma/erdma_hw.h b/drivers/infiniband/hw/erdma/erdma_hw.h
index 1b2e2b70678f5..ab371fec610c3 100644
--- a/drivers/infiniband/hw/erdma/erdma_hw.h
+++ b/drivers/infiniband/hw/erdma/erdma_hw.h
@@ -145,6 +145,7 @@ enum CMDQ_RDMA_OPCODE {
 	CMDQ_OPCODE_MODIFY_QP = 3,
 	CMDQ_OPCODE_CREATE_CQ = 4,
 	CMDQ_OPCODE_DESTROY_CQ = 5,
+	CMDQ_OPCODE_REFLUSH = 6,
 	CMDQ_OPCODE_REG_MR = 8,
 	CMDQ_OPCODE_DEREG_MR = 9
 };
@@ -301,6 +302,13 @@ struct erdma_cmdq_destroy_qp_req {
 	u32 qpn;
 };
 
+struct erdma_cmdq_reflush_req {
+	u64 hdr;
+	u32 qpn;
+	u32 sq_pi;
+	u32 rq_pi;
+};
+
 /* cap qword 0 definition */
 #define ERDMA_CMD_DEV_CAP_MAX_CQE_MASK GENMASK_ULL(47, 40)
 #define ERDMA_CMD_DEV_CAP_FLAGS_MASK GENMASK_ULL(31, 24)
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.c b/drivers/infiniband/hw/erdma/erdma_verbs.c
index d843ce1f35f3d..5dab1e87975ba 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.c
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.c
@@ -379,6 +379,21 @@ int erdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
 	return 0;
 }
 
+static void erdma_flush_worker(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct erdma_qp *qp =
+		container_of(dwork, struct erdma_qp, reflush_dwork);
+	struct erdma_cmdq_reflush_req req;
+
+	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
+				CMDQ_OPCODE_REFLUSH);
+	req.qpn = QP_ID(qp);
+	req.sq_pi = qp->kern_qp.sq_pi;
+	req.rq_pi = qp->kern_qp.rq_pi;
+	erdma_post_cmd_wait(&qp->dev->cmdq, &req, sizeof(req), NULL, NULL);
+}
+
 static int erdma_qp_validate_cap(struct erdma_dev *dev,
 				 struct ib_qp_init_attr *attrs)
 {
@@ -735,6 +750,7 @@ int erdma_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *attrs,
 	qp->attrs.max_send_sge = attrs->cap.max_send_sge;
 	qp->attrs.max_recv_sge = attrs->cap.max_recv_sge;
 	qp->attrs.state = ERDMA_QP_STATE_IDLE;
+	INIT_DELAYED_WORK(&qp->reflush_dwork, erdma_flush_worker);
 
 	ret = create_qp_cmd(dev, qp);
 	if (ret)
@@ -1028,6 +1044,8 @@ int erdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 	erdma_modify_qp_internal(qp, &qp_attrs, ERDMA_QP_ATTR_STATE);
 	up_write(&qp->state_lock);
 
+	cancel_delayed_work_sync(&qp->reflush_dwork);
+
 	erdma_cmdq_build_reqhdr(&req.hdr, CMDQ_SUBMOD_RDMA,
 				CMDQ_OPCODE_DESTROY_QP);
 	req.qpn = QP_ID(qp);
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index a5574f0252bb4..9f341d032069b 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -197,6 +197,8 @@ struct erdma_qp {
 	struct erdma_cep *cep;
 	struct rw_semaphore state_lock;
 
+	struct delayed_work reflush_dwork;
+
 	union {
 		struct erdma_kqp kern_qp;
 		struct erdma_uqp user_qp;
-- 
GitLab


From 0edf42cbcc8690ef349d4432fea74d7791e3c645 Mon Sep 17 00:00:00 2001
From: Cheng Xu <chengyou@linux.alibaba.com>
Date: Wed, 16 Nov 2022 10:31:07 +0800
Subject: [PATCH 2733/4122] RDMA/erdma: Notify the latest PI to FW for
 reflushing when necessary

Firmware is responsible for flushing WRs in HW, and it's a little
difficult for firmware to get the latest PI of QPs, especially for RQs
after QP state being changed to ERROR. So we introduce a new CMDQ command,
by which driver can notify to latest PI to FW, and then FW can flush all
posted WRs.

Link: https://lore.kernel.org/r/20221116023107.82835-4-chengyou@linux.alibaba.com
Signed-off-by: Cheng Xu <chengyou@linux.alibaba.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/erdma/erdma_qp.c    | 30 ++++++++++++++++-------
 drivers/infiniband/hw/erdma/erdma_verbs.h |  5 ++++
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/infiniband/hw/erdma/erdma_qp.c b/drivers/infiniband/hw/erdma/erdma_qp.c
index 521e97258de77..d088d6bef431a 100644
--- a/drivers/infiniband/hw/erdma/erdma_qp.c
+++ b/drivers/infiniband/hw/erdma/erdma_qp.c
@@ -120,6 +120,7 @@ static int erdma_modify_qp_state_to_stop(struct erdma_qp *qp,
 int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
 			     enum erdma_qp_attr_mask mask)
 {
+	bool need_reflush = false;
 	int drop_conn, ret = 0;
 
 	if (!mask)
@@ -135,6 +136,7 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
 			ret = erdma_modify_qp_state_to_rts(qp, attrs, mask);
 		} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
 			qp->attrs.state = ERDMA_QP_STATE_ERROR;
+			need_reflush = true;
 			if (qp->cep) {
 				erdma_cep_put(qp->cep);
 				qp->cep = NULL;
@@ -145,17 +147,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
 	case ERDMA_QP_STATE_RTS:
 		drop_conn = 0;
 
-		if (attrs->state == ERDMA_QP_STATE_CLOSING) {
+		if (attrs->state == ERDMA_QP_STATE_CLOSING ||
+		    attrs->state == ERDMA_QP_STATE_TERMINATE ||
+		    attrs->state == ERDMA_QP_STATE_ERROR) {
 			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
 			drop_conn = 1;
-		} else if (attrs->state == ERDMA_QP_STATE_TERMINATE) {
-			qp->attrs.state = ERDMA_QP_STATE_TERMINATE;
-			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
-			drop_conn = 1;
-		} else if (attrs->state == ERDMA_QP_STATE_ERROR) {
-			ret = erdma_modify_qp_state_to_stop(qp, attrs, mask);
-			qp->attrs.state = ERDMA_QP_STATE_ERROR;
-			drop_conn = 1;
+			need_reflush = true;
 		}
 
 		if (drop_conn)
@@ -180,6 +177,12 @@ int erdma_modify_qp_internal(struct erdma_qp *qp, struct erdma_qp_attrs *attrs,
 		break;
 	}
 
+	if (need_reflush && !ret && rdma_is_kernel_res(&qp->ibqp.res)) {
+		qp->flags |= ERDMA_QP_IN_FLUSHING;
+		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+				 usecs_to_jiffies(100));
+	}
+
 	return ret;
 }
 
@@ -527,6 +530,10 @@ int erdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *send_wr,
 	}
 	spin_unlock_irqrestore(&qp->lock, flags);
 
+	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+				 usecs_to_jiffies(100));
+
 	return ret;
 }
 
@@ -580,5 +587,10 @@ int erdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *recv_wr,
 	}
 
 	spin_unlock_irqrestore(&qp->lock, flags);
+
+	if (unlikely(qp->flags & ERDMA_QP_IN_FLUSHING))
+		mod_delayed_work(qp->dev->reflush_wq, &qp->reflush_dwork,
+				 usecs_to_jiffies(100));
+
 	return ret;
 }
diff --git a/drivers/infiniband/hw/erdma/erdma_verbs.h b/drivers/infiniband/hw/erdma/erdma_verbs.h
index 9f341d032069b..e0a993bc032a4 100644
--- a/drivers/infiniband/hw/erdma/erdma_verbs.h
+++ b/drivers/infiniband/hw/erdma/erdma_verbs.h
@@ -173,6 +173,10 @@ enum erdma_qp_attr_mask {
 	ERDMA_QP_ATTR_MPA = (1 << 7)
 };
 
+enum erdma_qp_flags {
+	ERDMA_QP_IN_FLUSHING = (1 << 0),
+};
+
 struct erdma_qp_attrs {
 	enum erdma_qp_state state;
 	enum erdma_cc_alg cc; /* Congestion control algorithm */
@@ -197,6 +201,7 @@ struct erdma_qp {
 	struct erdma_cep *cep;
 	struct rw_semaphore state_lock;
 
+	unsigned long flags;
 	struct delayed_work reflush_dwork;
 
 	union {
-- 
GitLab


From 3a042fda8813dcb1012347665a69572adbaef1f1 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 24 Nov 2022 09:49:27 +0100
Subject: [PATCH 2734/4122] iio: imu: st_lsm6dsx: add support to ISM330IS

Add support to STM ISM330IS (accelerometer and gyroscope) Mems sensor.
The ISM330IS sensor can use LSM6DSO16IS as fallback device since it
implements all the ISM330IS features currently implemented in
st_lsm6dsx.

Datasheet: https://www.st.com/resource/en/datasheet/ism330is.pdf
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/4cd1614060e06f49cd92f5930d8fd40117c07920.1669279604.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/imu/st_lsm6dsx/Kconfig           | 4 ++--
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h      | 2 ++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c | 8 ++++++--
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c  | 5 +++++
 drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c  | 5 +++++
 5 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/drivers/iio/imu/st_lsm6dsx/Kconfig b/drivers/iio/imu/st_lsm6dsx/Kconfig
index 0096035728cd4..f6660847fb58d 100644
--- a/drivers/iio/imu/st_lsm6dsx/Kconfig
+++ b/drivers/iio/imu/st_lsm6dsx/Kconfig
@@ -13,8 +13,8 @@ config IIO_ST_LSM6DSX
 	  sensor. Supported devices: lsm6ds3, lsm6ds3h, lsm6dsl, lsm6dsm,
 	  ism330dlc, lsm6dso, lsm6dsox, asm330lhh, asm330lhhx, lsm6dsr,
 	  lsm6ds3tr-c, ism330dhcx, lsm6dsrx, lsm6ds0, lsm6dsop, lsm6dstx,
-	  lsm6dsv, lsm6dsv16x, lsm6dso16is, the accelerometer/gyroscope
-	  of lsm9ds1 and lsm6dst.
+	  lsm6dsv, lsm6dsv16x, lsm6dso16is, ism330is, lsm6dst and the
+	  accelerometer/gyroscope of lsm9ds1.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called st_lsm6dsx.
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
index abf14a2ce0e95..5b6f195748fc6 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx.h
@@ -36,6 +36,7 @@
 #define ST_LSM6DSV_DEV_NAME	"lsm6dsv"
 #define ST_LSM6DSV16X_DEV_NAME	"lsm6dsv16x"
 #define ST_LSM6DSO16IS_DEV_NAME	"lsm6dso16is"
+#define ST_ISM330IS_DEV_NAME	"ism330is"
 
 enum st_lsm6dsx_hw_id {
 	ST_LSM6DS3_ID,
@@ -59,6 +60,7 @@ enum st_lsm6dsx_hw_id {
 	ST_LSM6DSV_ID,
 	ST_LSM6DSV16X_ID,
 	ST_LSM6DSO16IS_ID,
+	ST_ISM330IS_ID,
 	ST_LSM6DSX_MAX_ID,
 };
 
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
index 57a79bf35bbaa..3f6060c64f32b 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_core.c
@@ -27,7 +27,7 @@
  *   - FIFO size: 4KB
  *
  * - LSM6DSO/LSM6DSOX/ASM330LHH/ASM330LHHX/LSM6DSR/ISM330DHCX/LSM6DST/LSM6DSOP/
- *   LSM6DSTX/LSM6DSO16IS:
+ *   LSM6DSTX/LSM6DSO16IS/ISM330IS:
  *   - Accelerometer/Gyroscope supported ODR [Hz]: 12.5, 26, 52, 104, 208, 416,
  *     833
  *   - Accelerometer supported full-scale [g]: +-2/+-4/+-8/+-16
@@ -1393,7 +1393,11 @@ static const struct st_lsm6dsx_settings st_lsm6dsx_sensor_settings[] = {
 				.hw_id = ST_LSM6DSO16IS_ID,
 				.name = ST_LSM6DSO16IS_DEV_NAME,
 				.wai = 0x22,
-			},
+			}, {
+				.hw_id = ST_ISM330IS_ID,
+				.name = ST_ISM330IS_DEV_NAME,
+				.wai = 0x22,
+			}
 		},
 		.channels = {
 			[ST_LSM6DSX_ID_ACC] = {
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
index c34ccc85e4c71..df5f609252600 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_i2c.c
@@ -121,6 +121,10 @@ static const struct of_device_id st_lsm6dsx_i2c_of_match[] = {
 		.compatible = "st,lsm6dso16is",
 		.data = (void *)ST_LSM6DSO16IS_ID,
 	},
+	{
+		.compatible = "st,ism330is",
+		.data = (void *)ST_ISM330IS_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_i2c_of_match);
@@ -147,6 +151,7 @@ static const struct i2c_device_id st_lsm6dsx_i2c_id_table[] = {
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
 	{ ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID },
+	{ ST_ISM330IS_DEV_NAME, ST_ISM330IS_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(i2c, st_lsm6dsx_i2c_id_table);
diff --git a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
index 24d5e51a8662e..974584bda875e 100644
--- a/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
+++ b/drivers/iio/imu/st_lsm6dsx/st_lsm6dsx_spi.c
@@ -121,6 +121,10 @@ static const struct of_device_id st_lsm6dsx_spi_of_match[] = {
 		.compatible = "st,lsm6dso16is",
 		.data = (void *)ST_LSM6DSO16IS_ID,
 	},
+	{
+		.compatible = "st,ism330is",
+		.data = (void *)ST_ISM330IS_ID,
+	},
 	{},
 };
 MODULE_DEVICE_TABLE(of, st_lsm6dsx_spi_of_match);
@@ -147,6 +151,7 @@ static const struct spi_device_id st_lsm6dsx_spi_id_table[] = {
 	{ ST_LSM6DSV_DEV_NAME, ST_LSM6DSV_ID },
 	{ ST_LSM6DSV16X_DEV_NAME, ST_LSM6DSV16X_ID },
 	{ ST_LSM6DSO16IS_DEV_NAME, ST_LSM6DSO16IS_ID },
+	{ ST_ISM330IS_DEV_NAME, ST_ISM330IS_ID },
 	{},
 };
 MODULE_DEVICE_TABLE(spi, st_lsm6dsx_spi_id_table);
-- 
GitLab


From 91f46207056d6af2052495f911901a5ff337e364 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 24 Nov 2022 09:49:28 +0100
Subject: [PATCH 2735/4122] dt-bindings: iio: imu: st_lsm6dsx: add ism330is

Add device bindings for ism330is IMU sensor.
Use lsm6dso16is as fallback device for ism330is since it implements all the
features currently supported by ism330is.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/a7a8a00037952928364269615ee8b6da4547795b.1669279604.git.lorenzo@kernel.org
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
index 07d5aee7e4421..68b481c633188 100644
--- a/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
+++ b/Documentation/devicetree/bindings/iio/imu/st,lsm6dsx.yaml
@@ -43,6 +43,9 @@ properties:
       - items:
           - const: st,lsm6dsv16x
           - const: st,lsm6dsv
+      - items:
+          - const: st,ism330is
+          - const: st,lsm6dso16is
 
   reg:
     maxItems: 1
-- 
GitLab


From f1e24342401a69da0519b289f0942fcd69e7db55 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Thu, 24 Nov 2022 17:25:59 +0200
Subject: [PATCH 2736/4122] iio: addac: ad74115: remove unused
 ad74115_dac_slew_rate_hz_tbl

This table was supposed to be used for handling the DAC rate, but it
ended up being unused. Remove it.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20221124152559.7895-1-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/addac/ad74115.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/iio/addac/ad74115.c b/drivers/iio/addac/ad74115.c
index 383b92e7b6826..e6bc5eb3788df 100644
--- a/drivers/iio/addac/ad74115.c
+++ b/drivers/iio/addac/ad74115.c
@@ -317,10 +317,6 @@ static const unsigned int ad74115_dac_rate_step_tbl[][3] = {
 	{ AD74115_SLEW_MODE_LINEAR, AD74115_SLEW_STEP_22_2_PERCENT, AD74115_SLEW_RATE_240KHZ },
 };
 
-static const unsigned int ad74115_dac_slew_rate_hz_tbl[] = {
-	4000, 64000, 150000, 240000
-};
-
 static const unsigned int ad74115_rtd_excitation_current_ua_tbl[] = {
 	250, 500, 750, 1000
 };
-- 
GitLab


From f84eec02b7248b6ae3bd8ef18a66f6f64eeab971 Mon Sep 17 00:00:00 2001
From: Cosmin Tanislav <demonsingur@gmail.com>
Date: Thu, 24 Nov 2022 17:30:49 +0200
Subject: [PATCH 2737/4122] iio: addac: ad74413r: fix blank line after
 declaration warning

Checkpatch wants a blank line after all declarations. Add it now,
even though the patch has already been submitted.

Signed-off-by: Cosmin Tanislav <cosmin.tanislav@analog.com>
Link: https://lore.kernel.org/r/20221124153049.8851-1-cosmin.tanislav@analog.com
Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
---
 drivers/iio/addac/ad74413r.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/iio/addac/ad74413r.c b/drivers/iio/addac/ad74413r.c
index 61030053cbeaa..f32c8c2fb26d2 100644
--- a/drivers/iio/addac/ad74413r.c
+++ b/drivers/iio/addac/ad74413r.c
@@ -1315,6 +1315,7 @@ static int ad74413r_probe(struct spi_device *spi)
 	st->chip_info = device_get_match_data(&spi->dev);
 	if (!st->chip_info) {
 		const struct spi_device_id *id = spi_get_device_id(spi);
+
 		if (id)
 			st->chip_info =
 				(struct ad74413r_chip_info *)id->driver_data;
-- 
GitLab


From 955bee204f3dd307642c101b75e370662987e735 Mon Sep 17 00:00:00 2001
From: Zhang Qilong <zhangqilong3@huawei.com>
Date: Mon, 21 Nov 2022 09:18:31 +0800
Subject: [PATCH 2738/4122] power: supply: z2_battery: Fix possible memleak in
 z2_batt_probe()

If devm_gpiod_get_optional() returns error, the charger should be
freed before z2_batt_probe returns according to the context. We
fix it by just gotoing to 'err' branch.

Fixes: a3b4388ea19b ("power: supply: z2_battery: Convert to GPIO descriptors")
Signed-off-by: Zhang Qilong <zhangqilong3@huawei.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/z2_battery.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/power/supply/z2_battery.c b/drivers/power/supply/z2_battery.c
index eb01b01bf593b..0ba4a590a0a5f 100644
--- a/drivers/power/supply/z2_battery.c
+++ b/drivers/power/supply/z2_battery.c
@@ -205,10 +205,12 @@ static int z2_batt_probe(struct i2c_client *client)
 
 	charger->charge_gpiod = devm_gpiod_get_optional(&client->dev,
 							NULL, GPIOD_IN);
-	if (IS_ERR(charger->charge_gpiod))
-		return dev_err_probe(&client->dev,
+	if (IS_ERR(charger->charge_gpiod)) {
+		ret = dev_err_probe(&client->dev,
 				     PTR_ERR(charger->charge_gpiod),
 				     "failed to get charge GPIO\n");
+		goto err;
+	}
 
 	if (charger->charge_gpiod) {
 		gpiod_set_consumer_name(charger->charge_gpiod, "BATT CHRG");
-- 
GitLab


From 97f2b4ddb0aa700d673691a7d5e44d226d22bab7 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 17 Nov 2022 10:45:58 +0800
Subject: [PATCH 2739/4122] power: supply: cw2015: Fix potential null-ptr-deref
 in cw_bat_probe()

cw_bat_probe() calls create_singlethread_workqueue() and not checked the
ret value, which may return NULL. And a null-ptr-deref may happen:

cw_bat_probe()
    create_singlethread_workqueue() # failed, cw_bat->wq is NULL
    queue_delayed_work()
        queue_delayed_work_on()
            __queue_delayed_work()  # warning here, but continue
                __queue_work()      # access wq->flags, null-ptr-deref

Check the ret value and return -ENOMEM if it is NULL.

Fixes: b4c7715c10c1 ("power: supply: add CellWise cw2015 fuel gauge driver")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/cw2015_battery.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/power/supply/cw2015_battery.c b/drivers/power/supply/cw2015_battery.c
index 6d52641151d9a..473522b4326ad 100644
--- a/drivers/power/supply/cw2015_battery.c
+++ b/drivers/power/supply/cw2015_battery.c
@@ -699,6 +699,9 @@ static int cw_bat_probe(struct i2c_client *client)
 	}
 
 	cw_bat->battery_workqueue = create_singlethread_workqueue("rk_battery");
+	if (!cw_bat->battery_workqueue)
+		return -ENOMEM;
+
 	devm_delayed_work_autocancel(&client->dev,
 							  &cw_bat->battery_delay_work, cw_bat_work);
 	queue_delayed_work(cw_bat->battery_workqueue,
-- 
GitLab


From 3ffa9f713c39a213a08d9ff13ab983a8aa5d8b5d Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Thu, 24 Nov 2022 11:33:32 +0000
Subject: [PATCH 2740/4122] HSI: omap_ssi_core: Fix error handling in
 ssi_init()

The ssi_init() returns the platform_driver_register() directly without
checking its return value, if platform_driver_register() failed, the
ssi_pdriver is not unregistered.
Fix by unregister ssi_pdriver when the last platform_driver_register()
failed.

Fixes: 0fae198988b8 ("HSI: omap_ssi: built omap_ssi and omap_ssi_port into one module")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/hsi/controllers/omap_ssi_core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/hsi/controllers/omap_ssi_core.c b/drivers/hsi/controllers/omap_ssi_core.c
index 052cf3e92dd69..26f2c3c012978 100644
--- a/drivers/hsi/controllers/omap_ssi_core.c
+++ b/drivers/hsi/controllers/omap_ssi_core.c
@@ -631,7 +631,13 @@ static int __init ssi_init(void) {
 	if (ret)
 		return ret;
 
-	return platform_driver_register(&ssi_port_pdriver);
+	ret = platform_driver_register(&ssi_port_pdriver);
+	if (ret) {
+		platform_driver_unregister(&ssi_pdriver);
+		return ret;
+	}
+
+	return 0;
 }
 module_init(ssi_init);
 
-- 
GitLab


From f0052d7a1edb3d8921b4e154aa8c46c4845b3714 Mon Sep 17 00:00:00 2001
From: Duke Xin <duke_xinanwen@163.com>
Date: Sat, 19 Nov 2022 17:44:47 +0800
Subject: [PATCH 2741/4122] USB: serial: option: add Quectel EM05-G modem

The EM05-G modem has 2 USB configurations that are configurable via the AT
command AT+QCFG="usbnet",[ 0 | 2 ] which make the modem enumerate with
the following interfaces, respectively:

"RMNET" : AT + DIAG + NMEA + Modem + QMI
"MBIM"  : MBIM + AT + DIAG + NMEA + Modem

The detailed description of the USB configuration for each mode as follows:

RMNET Mode
--------------
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 21 Spd=480  MxCh= 0
D:  Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=2c7c ProdID=0311 Rev= 3.18
S:  Manufacturer=Quectel
S:  Product=Quectel EM05-G
C:* #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr=500mA
I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=83(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=85(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=87(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 6 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=ff Prot=ff Driver=(none)
E:  Ad=89(I) Atr=03(Int.) MxPS=   8 Ivl=32ms
E:  Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

MBIM Mode
--------------
T:  Bus=01 Lev=01 Prnt=01 Port=00 Cnt=01 Dev#= 16 Spd=480  MxCh= 0
D:  Ver= 2.00 Cls=ef(misc ) Sub=02 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=2c7c ProdID=0311 Rev= 3.18
S:  Manufacturer=Quectel
S:  Product=Quectel EM05-G
C:* #Ifs= 6 Cfg#= 1 Atr=a0 MxPwr=500mA
A:  FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00
I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=ff Prot=ff Driver=option
E:  Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 4 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=83(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=82(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=85(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 5 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=00 Prot=00 Driver=option
E:  Ad=87(I) Atr=03(Int.) MxPS=  10 Ivl=32ms
E:  Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms
I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver=cdc_mbim
E:  Ad=89(I) Atr=03(Int.) MxPS=  64 Ivl=32ms
I:  If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim
I:* If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver=cdc_mbim
E:  Ad=88(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms
E:  Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms

Signed-off-by: Duke Xin <duke_xinanwen@163.com>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/option.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index c3b7f1d98e781..dee79c7d82d5c 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -255,6 +255,7 @@ static void option_instat_callback(struct urb *urb);
 #define QUECTEL_PRODUCT_EP06			0x0306
 #define QUECTEL_PRODUCT_EM05G			0x030a
 #define QUECTEL_PRODUCT_EM060K			0x030b
+#define QUECTEL_PRODUCT_EM05G_SG		0x0311
 #define QUECTEL_PRODUCT_EM12			0x0512
 #define QUECTEL_PRODUCT_RM500Q			0x0800
 #define QUECTEL_PRODUCT_RM520N			0x0801
@@ -1160,6 +1161,8 @@ static const struct usb_device_id option_ids[] = {
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
 	{ USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff),
 	  .driver_info = RSVD(6) | ZLP },
+	{ USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G_SG, 0xff),
+	  .driver_info = RSVD(6) | ZLP },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0x00, 0x40) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x30) },
 	{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM060K, 0xff, 0xff, 0x40) },
-- 
GitLab


From a7008584ab19d2df05caa95634cd72bc41f4cad3 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:33 -0800
Subject: [PATCH 2742/4122] crypto: api - optimize algorithm registration when
 self-tests disabled

Currently, registering an algorithm with the crypto API always causes a
notification to be posted to the "cryptomgr", which then creates a
kthread to self-test the algorithm.  However, if self-tests are disabled
in the kconfig (as is the default option), then this kthread just
notifies waiters that the algorithm has been tested, then exits.

This causes a significant amount of overhead, especially in the kthread
creation and destruction, which is not necessary at all.  For example,
in a quick test I found that booting a "minimum" x86_64 kernel with all
the crypto options enabled (except for the self-tests) takes about 400ms
until PID 1 can start.  Of that, a full 13ms is spent just doing this
pointless dance, involving a kthread being created, run, and destroyed
over 200 times.  That's over 3% of the entire kernel start time.

Fix this by just skipping the creation of the test larval and the
posting of the registration notification entirely, when self-tests are
disabled.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c | 154 +++++++++++++++++++++++++++---------------------
 crypto/api.c    |   3 -
 2 files changed, 86 insertions(+), 71 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 5c69ff8e8fa5c..950195e90bfc9 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -222,12 +222,64 @@ void crypto_remove_spawns(struct crypto_alg *alg, struct list_head *list,
 }
 EXPORT_SYMBOL_GPL(crypto_remove_spawns);
 
+static void crypto_alg_finish_registration(struct crypto_alg *alg,
+					   bool fulfill_requests,
+					   struct list_head *algs_to_put)
+{
+	struct crypto_alg *q;
+
+	list_for_each_entry(q, &crypto_alg_list, cra_list) {
+		if (q == alg)
+			continue;
+
+		if (crypto_is_moribund(q))
+			continue;
+
+		if (crypto_is_larval(q)) {
+			struct crypto_larval *larval = (void *)q;
+
+			/*
+			 * Check to see if either our generic name or
+			 * specific name can satisfy the name requested
+			 * by the larval entry q.
+			 */
+			if (strcmp(alg->cra_name, q->cra_name) &&
+			    strcmp(alg->cra_driver_name, q->cra_name))
+				continue;
+
+			if (larval->adult)
+				continue;
+			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
+				continue;
+
+			if (fulfill_requests && crypto_mod_get(alg))
+				larval->adult = alg;
+			else
+				larval->adult = ERR_PTR(-EAGAIN);
+
+			continue;
+		}
+
+		if (strcmp(alg->cra_name, q->cra_name))
+			continue;
+
+		if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
+		    q->cra_priority > alg->cra_priority)
+			continue;
+
+		crypto_remove_spawns(q, algs_to_put, alg);
+	}
+
+	crypto_notify(CRYPTO_MSG_ALG_LOADED, alg);
+}
+
 static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
 {
 	struct crypto_larval *larval;
 
-	if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER))
-		return NULL;
+	if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) ||
+	    IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+		return NULL; /* No self-test needed */
 
 	larval = crypto_larval_alloc(alg->cra_name,
 				     alg->cra_flags | CRYPTO_ALG_TESTED, 0);
@@ -248,7 +300,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
 	return larval;
 }
 
-static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
+static struct crypto_larval *
+__crypto_register_alg(struct crypto_alg *alg, struct list_head *algs_to_put)
 {
 	struct crypto_alg *q;
 	struct crypto_larval *larval;
@@ -259,9 +312,6 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 
 	INIT_LIST_HEAD(&alg->cra_users);
 
-	/* No cheating! */
-	alg->cra_flags &= ~CRYPTO_ALG_TESTED;
-
 	ret = -EEXIST;
 
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
@@ -288,12 +338,17 @@ static struct crypto_larval *__crypto_register_alg(struct crypto_alg *alg)
 
 	list_add(&alg->cra_list, &crypto_alg_list);
 
-	if (larval)
+	crypto_stats_init(alg);
+
+	if (larval) {
+		/* No cheating! */
+		alg->cra_flags &= ~CRYPTO_ALG_TESTED;
+
 		list_add(&larval->alg.cra_list, &crypto_alg_list);
-	else
+	} else {
 		alg->cra_flags |= CRYPTO_ALG_TESTED;
-
-	crypto_stats_init(alg);
+		crypto_alg_finish_registration(alg, true, algs_to_put);
+	}
 
 out:
 	return larval;
@@ -341,7 +396,10 @@ found:
 
 	alg->cra_flags |= CRYPTO_ALG_TESTED;
 
-	/* Only satisfy larval waiters if we are the best. */
+	/*
+	 * If a higher-priority implementation of the same algorithm is
+	 * currently being tested, then don't fulfill request larvals.
+	 */
 	best = true;
 	list_for_each_entry(q, &crypto_alg_list, cra_list) {
 		if (crypto_is_moribund(q) || !crypto_is_larval(q))
@@ -356,47 +414,7 @@ found:
 		}
 	}
 
-	list_for_each_entry(q, &crypto_alg_list, cra_list) {
-		if (q == alg)
-			continue;
-
-		if (crypto_is_moribund(q))
-			continue;
-
-		if (crypto_is_larval(q)) {
-			struct crypto_larval *larval = (void *)q;
-
-			/*
-			 * Check to see if either our generic name or
-			 * specific name can satisfy the name requested
-			 * by the larval entry q.
-			 */
-			if (strcmp(alg->cra_name, q->cra_name) &&
-			    strcmp(alg->cra_driver_name, q->cra_name))
-				continue;
-
-			if (larval->adult)
-				continue;
-			if ((q->cra_flags ^ alg->cra_flags) & larval->mask)
-				continue;
-
-			if (best && crypto_mod_get(alg))
-				larval->adult = alg;
-			else
-				larval->adult = ERR_PTR(-EAGAIN);
-
-			continue;
-		}
-
-		if (strcmp(alg->cra_name, q->cra_name))
-			continue;
-
-		if (strcmp(alg->cra_driver_name, q->cra_driver_name) &&
-		    q->cra_priority > alg->cra_priority)
-			continue;
-
-		crypto_remove_spawns(q, &list, alg);
-	}
+	crypto_alg_finish_registration(alg, best, &list);
 
 complete:
 	complete_all(&test->completion);
@@ -423,7 +441,8 @@ EXPORT_SYMBOL_GPL(crypto_remove_final);
 int crypto_register_alg(struct crypto_alg *alg)
 {
 	struct crypto_larval *larval;
-	bool test_started;
+	LIST_HEAD(algs_to_put);
+	bool test_started = false;
 	int err;
 
 	alg->cra_flags &= ~CRYPTO_ALG_DEAD;
@@ -432,17 +451,18 @@ int crypto_register_alg(struct crypto_alg *alg)
 		return err;
 
 	down_write(&crypto_alg_sem);
-	larval = __crypto_register_alg(alg);
-	test_started = static_key_enabled(&crypto_boot_test_finished);
-	if (!IS_ERR_OR_NULL(larval))
+	larval = __crypto_register_alg(alg, &algs_to_put);
+	if (!IS_ERR_OR_NULL(larval)) {
+		test_started = static_key_enabled(&crypto_boot_test_finished);
 		larval->test_started = test_started;
+	}
 	up_write(&crypto_alg_sem);
 
-	if (IS_ERR_OR_NULL(larval))
+	if (IS_ERR(larval))
 		return PTR_ERR(larval);
-
 	if (test_started)
 		crypto_wait_for_test(larval);
+	crypto_remove_final(&algs_to_put);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_register_alg);
@@ -619,6 +639,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 	struct crypto_larval *larval;
 	struct crypto_spawn *spawn;
 	u32 fips_internal = 0;
+	LIST_HEAD(algs_to_put);
 	int err;
 
 	err = crypto_check_alg(&inst->alg);
@@ -650,7 +671,7 @@ int crypto_register_instance(struct crypto_template *tmpl,
 
 	inst->alg.cra_flags |= (fips_internal & CRYPTO_ALG_FIPS_INTERNAL);
 
-	larval = __crypto_register_alg(&inst->alg);
+	larval = __crypto_register_alg(&inst->alg, &algs_to_put);
 	if (IS_ERR(larval))
 		goto unlock;
 	else if (larval)
@@ -662,15 +683,12 @@ int crypto_register_instance(struct crypto_template *tmpl,
 unlock:
 	up_write(&crypto_alg_sem);
 
-	err = PTR_ERR(larval);
-	if (IS_ERR_OR_NULL(larval))
-		goto err;
-
-	crypto_wait_for_test(larval);
-	err = 0;
-
-err:
-	return err;
+	if (IS_ERR(larval))
+		return PTR_ERR(larval);
+	if (larval)
+		crypto_wait_for_test(larval);
+	crypto_remove_final(&algs_to_put);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(crypto_register_instance);
 
diff --git a/crypto/api.c b/crypto/api.c
index 64f2d365a8e94..52ce10a353660 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -172,9 +172,6 @@ void crypto_wait_for_test(struct crypto_larval *larval)
 
 	err = wait_for_completion_killable(&larval->completion);
 	WARN_ON(err);
-	if (!err)
-		crypto_notify(CRYPTO_MSG_ALG_LOADED, larval);
-
 out:
 	crypto_larval_kill(&larval->alg);
 }
-- 
GitLab


From 9cadd73adef1e1d53ea100f28e3e258698b92418 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:34 -0800
Subject: [PATCH 2743/4122] crypto: algboss - optimize registration of internal
 algorithms

Since algboss always skips testing of algorithms with the
CRYPTO_ALG_INTERNAL flag, there is no need to go through the dance of
creating the test kthread, which creates a lot of overhead.  Instead, we
can just directly finish the algorithm registration, like is now done
when self-tests are disabled entirely.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c  |  3 ++-
 crypto/algboss.c | 13 +------------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 950195e90bfc9..851b247f043d3 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -278,7 +278,8 @@ static struct crypto_larval *crypto_alloc_test_larval(struct crypto_alg *alg)
 	struct crypto_larval *larval;
 
 	if (!IS_ENABLED(CONFIG_CRYPTO_MANAGER) ||
-	    IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+	    IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS) ||
+	    (alg->cra_flags & CRYPTO_ALG_INTERNAL))
 		return NULL; /* No self-test needed */
 
 	larval = crypto_larval_alloc(alg->cra_name,
diff --git a/crypto/algboss.c b/crypto/algboss.c
index eb5fe84efb83e..13d37320a66eb 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -181,12 +181,8 @@ static int cryptomgr_test(void *data)
 	goto skiptest;
 #endif
 
-	if (type & CRYPTO_ALG_TESTED)
-		goto skiptest;
-
 	err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED);
 
-skiptest:
 	crypto_alg_tested(param->driver, err);
 
 	kfree(param);
@@ -197,7 +193,6 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
 {
 	struct task_struct *thread;
 	struct crypto_test_param *param;
-	u32 type;
 
 	if (!try_module_get(THIS_MODULE))
 		goto err;
@@ -208,13 +203,7 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
 
 	memcpy(param->driver, alg->cra_driver_name, sizeof(param->driver));
 	memcpy(param->alg, alg->cra_name, sizeof(param->alg));
-	type = alg->cra_flags;
-
-	/* Do not test internal algorithms. */
-	if (type & CRYPTO_ALG_INTERNAL)
-		type |= CRYPTO_ALG_TESTED;
-
-	param->type = type;
+	param->type = alg->cra_flags;
 
 	thread = kthread_run(cryptomgr_test, param, "cryptomgr_test");
 	if (IS_ERR(thread))
-- 
GitLab


From 06bd9c967eaac5484c31c3dc6dfbef6183819508 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:35 -0800
Subject: [PATCH 2744/4122] crypto: api - compile out crypto_boot_test_finished
 when tests disabled

The crypto_boot_test_finished static key is unnecessary when self-tests
are disabled in the kconfig, so optimize it out accordingly, along with
the entirety of crypto_start_tests().  This mainly avoids the overhead
of an unnecessary static_branch_enable() on every boot.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algapi.c   |  7 +++++--
 crypto/api.c      |  8 +++++---
 crypto/internal.h | 20 +++++++++++++++++++-
 3 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/crypto/algapi.c b/crypto/algapi.c
index 851b247f043d3..d08f864f08bee 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -454,7 +454,7 @@ int crypto_register_alg(struct crypto_alg *alg)
 	down_write(&crypto_alg_sem);
 	larval = __crypto_register_alg(alg, &algs_to_put);
 	if (!IS_ERR_OR_NULL(larval)) {
-		test_started = static_key_enabled(&crypto_boot_test_finished);
+		test_started = crypto_boot_test_finished();
 		larval->test_started = test_started;
 	}
 	up_write(&crypto_alg_sem);
@@ -1253,6 +1253,9 @@ EXPORT_SYMBOL_GPL(crypto_stats_skcipher_decrypt);
 
 static void __init crypto_start_tests(void)
 {
+	if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+		return;
+
 	for (;;) {
 		struct crypto_larval *larval = NULL;
 		struct crypto_alg *q;
@@ -1286,7 +1289,7 @@ static void __init crypto_start_tests(void)
 		crypto_wait_for_test(larval);
 	}
 
-	static_branch_enable(&crypto_boot_test_finished);
+	set_crypto_boot_test_finished();
 }
 
 static int __init crypto_algapi_init(void)
diff --git a/crypto/api.c b/crypto/api.c
index 52ce10a353660..b022702f64367 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -31,8 +31,10 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem);
 BLOCKING_NOTIFIER_HEAD(crypto_chain);
 EXPORT_SYMBOL_GPL(crypto_chain);
 
-DEFINE_STATIC_KEY_FALSE(crypto_boot_test_finished);
-EXPORT_SYMBOL_GPL(crypto_boot_test_finished);
+#ifndef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+DEFINE_STATIC_KEY_FALSE(__crypto_boot_test_finished);
+EXPORT_SYMBOL_GPL(__crypto_boot_test_finished);
+#endif
 
 static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
 
@@ -202,7 +204,7 @@ static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg)
 	struct crypto_larval *larval = (void *)alg;
 	long timeout;
 
-	if (!static_branch_likely(&crypto_boot_test_finished))
+	if (!crypto_boot_test_finished())
 		crypto_start_test(larval);
 
 	timeout = wait_for_completion_killable_timeout(
diff --git a/crypto/internal.h b/crypto/internal.h
index c08385571853e..932f0aafddc32 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -47,7 +47,25 @@ extern struct list_head crypto_alg_list;
 extern struct rw_semaphore crypto_alg_sem;
 extern struct blocking_notifier_head crypto_chain;
 
-DECLARE_STATIC_KEY_FALSE(crypto_boot_test_finished);
+#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
+static inline bool crypto_boot_test_finished(void)
+{
+	return true;
+}
+static inline void set_crypto_boot_test_finished(void)
+{
+}
+#else
+DECLARE_STATIC_KEY_FALSE(__crypto_boot_test_finished);
+static inline bool crypto_boot_test_finished(void)
+{
+	return static_branch_likely(&__crypto_boot_test_finished);
+}
+static inline void set_crypto_boot_test_finished(void)
+{
+	static_branch_enable(&__crypto_boot_test_finished);
+}
+#endif /* !CONFIG_CRYPTO_MANAGER_DISABLE_TESTS */
 
 #ifdef CONFIG_PROC_FS
 void __init crypto_init_proc(void);
-- 
GitLab


From 0bf365c0efdd8fc03cb82e381ea4d76196c66bc2 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:36 -0800
Subject: [PATCH 2745/4122] crypto: kdf - skip self-test when tests disabled

Make kdf_sp800108 honor the CONFIG_CRYPTO_MANAGER_DISABLE_TESTS kconfig
option, so that it doesn't always waste time running its self-test.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/kdf_sp800108.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
index 58edf7797abfb..c6e3ad82d5f7a 100644
--- a/crypto/kdf_sp800108.c
+++ b/crypto/kdf_sp800108.c
@@ -125,9 +125,13 @@ static const struct kdf_testvec kdf_ctr_hmac_sha256_tv_template[] = {
 
 static int __init crypto_kdf108_init(void)
 {
-	int ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
-			   crypto_kdf108_setkey, crypto_kdf108_ctr_generate);
+	int ret;
 
+	if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+		return 0;
+
+	ret = kdf_test(&kdf_ctr_hmac_sha256_tv_template[0], "hmac(sha256)",
+		       crypto_kdf108_setkey, crypto_kdf108_ctr_generate);
 	if (ret) {
 		if (fips_enabled)
 			panic("alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n",
-- 
GitLab


From 790c4c9f532318e3fe8c6f0b498072abc80e1195 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:37 -0800
Subject: [PATCH 2746/4122] crypto: kdf - silence noisy self-test

Make the kdf_sp800108 self-test only print a message on success when
fips_enabled, so that it's consistent with testmgr.c and doesn't spam
the kernel log with a message that isn't really important.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/kdf_sp800108.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crypto/kdf_sp800108.c b/crypto/kdf_sp800108.c
index c6e3ad82d5f7a..c3f9938e1ad27 100644
--- a/crypto/kdf_sp800108.c
+++ b/crypto/kdf_sp800108.c
@@ -140,7 +140,7 @@ static int __init crypto_kdf108_init(void)
 		WARN(1,
 		     "alg: self-tests for CTR-KDF (hmac(sha256)) failed (rc=%d)\n",
 		     ret);
-	} else {
+	} else if (fips_enabled) {
 		pr_info("alg: self-tests for CTR-KDF (hmac(sha256)) passed\n");
 	}
 
-- 
GitLab


From 441cb1b730006bd2d636f72dc7f6e11a8a0ecce5 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Sun, 13 Nov 2022 16:12:38 -0800
Subject: [PATCH 2747/4122] crypto: algboss - compile out test-related code
 when tests disabled

When CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is set, the code in algboss.c
that handles CRYPTO_MSG_ALG_REGISTER is unnecessary, so make it be
compiled out.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/algboss.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/crypto/algboss.c b/crypto/algboss.c
index 13d37320a66eb..0de1e66979498 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -175,11 +175,7 @@ static int cryptomgr_test(void *data)
 {
 	struct crypto_test_param *param = data;
 	u32 type = param->type;
-	int err = 0;
-
-#ifdef CONFIG_CRYPTO_MANAGER_DISABLE_TESTS
-	goto skiptest;
-#endif
+	int err;
 
 	err = alg_test(param->driver, param->alg, type, CRYPTO_ALG_TESTED);
 
@@ -194,6 +190,9 @@ static int cryptomgr_schedule_test(struct crypto_alg *alg)
 	struct task_struct *thread;
 	struct crypto_test_param *param;
 
+	if (IS_ENABLED(CONFIG_CRYPTO_MANAGER_DISABLE_TESTS))
+		return NOTIFY_DONE;
+
 	if (!try_module_get(THIS_MODULE))
 		goto err;
 
-- 
GitLab


From 1aa33fc8d4032227253ceb736f47c52b859d9683 Mon Sep 17 00:00:00 2001
From: Zhang Yiqun <zhangyiqun@phytium.com.cn>
Date: Wed, 16 Nov 2022 17:24:11 +0800
Subject: [PATCH 2748/4122] crypto: tcrypt - Fix multibuffer skcipher speed
 test mem leak

In the past, the data for mb-skcipher test has been allocated
twice, that means the first allcated memory area is without
free, which may cause a potential memory leakage. So this
patch is to remove one allocation to fix this error.

Fixes: e161c5930c15 ("crypto: tcrypt - add multibuf skcipher...")
Signed-off-by: Zhang Yiqun <zhangyiqun@phytium.com.cn>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/tcrypt.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 0f101897e90fb..a0833654ce946 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1090,15 +1090,6 @@ static void test_mb_skcipher_speed(const char *algo, int enc, int secs,
 			goto out_free_tfm;
 		}
 
-
-	for (i = 0; i < num_mb; ++i)
-		if (testmgr_alloc_buf(data[i].xbuf)) {
-			while (i--)
-				testmgr_free_buf(data[i].xbuf);
-			goto out_free_tfm;
-		}
-
-
 	for (i = 0; i < num_mb; ++i) {
 		data[i].req = skcipher_request_alloc(tfm, GFP_KERNEL);
 		if (!data[i].req) {
-- 
GitLab


From 34c3a47d20ae55b3600fed733bf96eafe9c500d5 Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Wed, 16 Nov 2022 20:28:02 -0500
Subject: [PATCH 2749/4122] padata: Always leave BHs disabled when running
 ->parallel()

A deadlock can happen when an overloaded system runs ->parallel() in the
context of the current task:

    padata_do_parallel
      ->parallel()
        pcrypt_aead_enc/dec
          padata_do_serial
            spin_lock(&reorder->lock) // BHs still enabled
              <interrupt>
                ...
                  __do_softirq
                    ...
                      padata_do_serial
                        spin_lock(&reorder->lock)

It's a bug for BHs to be on in _do_serial as Steffen points out, so
ensure they're off in the "current task" case like they are in
padata_parallel_worker to avoid this situation.

Reported-by: syzbot+bc05445bc14148d51915@syzkaller.appspotmail.com
Fixes: 4611ce224688 ("padata: allocate work structures for parallel jobs from a pool")
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index e5819bb8bd1dc..97f51e0c17766 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -207,14 +207,16 @@ int padata_do_parallel(struct padata_shell *ps,
 	pw = padata_work_alloc();
 	spin_unlock(&padata_works_lock);
 
+	if (!pw) {
+		/* Maximum works limit exceeded, run in the current task. */
+		padata->parallel(padata);
+	}
+
 	rcu_read_unlock_bh();
 
 	if (pw) {
 		padata_work_init(pw, padata_parallel_worker, padata, 0);
 		queue_work(pinst->parallel_wq, &pw->pw_work);
-	} else {
-		/* Maximum works limit exceeded, run in the current task. */
-		padata->parallel(padata);
 	}
 
 	return 0;
-- 
GitLab


From 57ddfecc72a6c9941d159543e1c0c0a74fe9afdd Mon Sep 17 00:00:00 2001
From: Daniel Jordan <daniel.m.jordan@oracle.com>
Date: Wed, 16 Nov 2022 20:28:04 -0500
Subject: [PATCH 2750/4122] padata: Fix list iterator in padata_do_serial()

list_for_each_entry_reverse() assumes that the iterated list is nonempty
and that every list_head is embedded in the same type, but its use in
padata_do_serial() breaks both rules.

This doesn't cause any issues now because padata_priv and padata_list
happen to have their list fields at the same offset, but we really
shouldn't be relying on that.

Fixes: bfde23ce200e ("padata: unbind parallel jobs from specific CPUs")
Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 kernel/padata.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index 97f51e0c17766..de90af5fcbe6b 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -390,13 +390,16 @@ void padata_do_serial(struct padata_priv *padata)
 	int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
 	struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
 	struct padata_priv *cur;
+	struct list_head *pos;
 
 	spin_lock(&reorder->lock);
 	/* Sort in ascending order of sequence number. */
-	list_for_each_entry_reverse(cur, &reorder->list, list)
+	list_for_each_prev(pos, &reorder->list) {
+		cur = list_entry(pos, struct padata_priv, list);
 		if (cur->seq_nr < padata->seq_nr)
 			break;
-	list_add(&padata->list, &cur->list);
+	}
+	list_add(&padata->list, pos);
 	spin_unlock(&reorder->lock);
 
 	/*
-- 
GitLab


From 8bd9974b6bfcd1e14a001deeca051aed7295559a Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:10 -0800
Subject: [PATCH 2751/4122] crypto: x86/aegis128 - fix possible crash with CFI
 enabled

crypto_aegis128_aesni_enc(), crypto_aegis128_aesni_enc_tail(),
crypto_aegis128_aesni_dec(), and crypto_aegis128_aesni_dec_tail() are
called via indirect function calls.  Therefore they need to use
SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type
hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y.
Otherwise, the code crashes with a CFI failure (if the compiler didn't
happen to optimize out the indirect calls).

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aegis128-aesni-asm.S | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S
index b48ddebb47489..cdf3215ec272c 100644
--- a/arch/x86/crypto/aegis128-aesni-asm.S
+++ b/arch/x86/crypto/aegis128-aesni-asm.S
@@ -7,6 +7,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/frame.h>
 
 #define STATE0	%xmm0
@@ -402,7 +403,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad)
  * void crypto_aegis128_aesni_enc(void *state, unsigned int length,
  *                                const void *src, void *dst);
  */
-SYM_FUNC_START(crypto_aegis128_aesni_enc)
+SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc)
 	FRAME_BEGIN
 
 	cmp $0x10, LEN
@@ -499,7 +500,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc)
  * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length,
  *                                     const void *src, void *dst);
  */
-SYM_FUNC_START(crypto_aegis128_aesni_enc_tail)
+SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail)
 	FRAME_BEGIN
 
 	/* load the state: */
@@ -556,7 +557,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail)
  * void crypto_aegis128_aesni_dec(void *state, unsigned int length,
  *                                const void *src, void *dst);
  */
-SYM_FUNC_START(crypto_aegis128_aesni_dec)
+SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec)
 	FRAME_BEGIN
 
 	cmp $0x10, LEN
@@ -653,7 +654,7 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec)
  * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length,
  *                                     const void *src, void *dst);
  */
-SYM_FUNC_START(crypto_aegis128_aesni_dec_tail)
+SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail)
 	FRAME_BEGIN
 
 	/* load the state: */
-- 
GitLab


From c67b553a4f4a8bd921e4c9ceae00e111be09c488 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:11 -0800
Subject: [PATCH 2752/4122] crypto: x86/aria - fix crash with CFI enabled

aria_aesni_avx_encrypt_16way(), aria_aesni_avx_decrypt_16way(),
aria_aesni_avx_ctr_crypt_16way(), aria_aesni_avx_gfni_encrypt_16way(),
aria_aesni_avx_gfni_decrypt_16way(), and
aria_aesni_avx_gfni_ctr_crypt_16way() are called via indirect function
calls.  Therefore they need to use SYM_TYPED_FUNC_START instead of
SYM_FUNC_START to cause their type hashes to be emitted when the kernel
is built with CONFIG_CFI_CLANG=y.  Otherwise, the code crashes with a
CFI failure.

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Cc: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/aria-aesni-avx-asm_64.S | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/aria-aesni-avx-asm_64.S b/arch/x86/crypto/aria-aesni-avx-asm_64.S
index c75fd7d015ed8..03ae4cd1d976a 100644
--- a/arch/x86/crypto/aria-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/aria-aesni-avx-asm_64.S
@@ -7,6 +7,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/frame.h>
 
 /* struct aria_ctx: */
@@ -913,7 +914,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_crypt_16way)
 	RET;
 SYM_FUNC_END(__aria_aesni_avx_crypt_16way)
 
-SYM_FUNC_START(aria_aesni_avx_encrypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_encrypt_16way)
 	/* input:
 	*      %rdi: ctx, CTX
 	*      %rsi: dst
@@ -938,7 +939,7 @@ SYM_FUNC_START(aria_aesni_avx_encrypt_16way)
 	RET;
 SYM_FUNC_END(aria_aesni_avx_encrypt_16way)
 
-SYM_FUNC_START(aria_aesni_avx_decrypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_decrypt_16way)
 	/* input:
 	*      %rdi: ctx, CTX
 	*      %rsi: dst
@@ -1039,7 +1040,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_ctr_gen_keystream_16way)
 	RET;
 SYM_FUNC_END(__aria_aesni_avx_ctr_gen_keystream_16way)
 
-SYM_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_ctr_crypt_16way)
 	/* input:
 	*      %rdi: ctx
 	*      %rsi: dst
@@ -1208,7 +1209,7 @@ SYM_FUNC_START_LOCAL(__aria_aesni_avx_gfni_crypt_16way)
 	RET;
 SYM_FUNC_END(__aria_aesni_avx_gfni_crypt_16way)
 
-SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
 	/* input:
 	*      %rdi: ctx, CTX
 	*      %rsi: dst
@@ -1233,7 +1234,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_encrypt_16way)
 	RET;
 SYM_FUNC_END(aria_aesni_avx_gfni_encrypt_16way)
 
-SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
 	/* input:
 	*      %rdi: ctx, CTX
 	*      %rsi: dst
@@ -1258,7 +1259,7 @@ SYM_FUNC_START(aria_aesni_avx_gfni_decrypt_16way)
 	RET;
 SYM_FUNC_END(aria_aesni_avx_gfni_decrypt_16way)
 
-SYM_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
+SYM_TYPED_FUNC_START(aria_aesni_avx_gfni_ctr_crypt_16way)
 	/* input:
 	*      %rdi: ctx
 	*      %rsi: dst
-- 
GitLab


From 0f8bc4bd48dd148046c19c38568cd9449c79b45f Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:12 -0800
Subject: [PATCH 2753/4122] crypto: x86/nhpoly1305 - eliminate unnecessary CFI
 wrappers

Since the CFI implementation now supports indirect calls to assembly
functions, take advantage of that rather than use wrapper functions.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/nh-avx2-x86_64.S       |  5 +++--
 arch/x86/crypto/nh-sse2-x86_64.S       |  5 +++--
 arch/x86/crypto/nhpoly1305-avx2-glue.c | 11 ++---------
 arch/x86/crypto/nhpoly1305-sse2-glue.c | 11 ++---------
 4 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/arch/x86/crypto/nh-avx2-x86_64.S b/arch/x86/crypto/nh-avx2-x86_64.S
index 6a0b15e7196a8..ef73a3ab87263 100644
--- a/arch/x86/crypto/nh-avx2-x86_64.S
+++ b/arch/x86/crypto/nh-avx2-x86_64.S
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 #define		PASS0_SUMS	%ymm0
 #define		PASS1_SUMS	%ymm1
@@ -65,11 +66,11 @@
 
 /*
  * void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
+ *		__le64 hash[NH_NUM_PASSES])
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-SYM_FUNC_START(nh_avx2)
+SYM_TYPED_FUNC_START(nh_avx2)
 
 	vmovdqu		0x00(KEY), K0
 	vmovdqu		0x10(KEY), K1
diff --git a/arch/x86/crypto/nh-sse2-x86_64.S b/arch/x86/crypto/nh-sse2-x86_64.S
index 34c567bbcb4fa..75fb994b6d177 100644
--- a/arch/x86/crypto/nh-sse2-x86_64.S
+++ b/arch/x86/crypto/nh-sse2-x86_64.S
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 #define		PASS0_SUMS	%xmm0
 #define		PASS1_SUMS	%xmm1
@@ -67,11 +68,11 @@
 
 /*
  * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
+ *		__le64 hash[NH_NUM_PASSES])
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-SYM_FUNC_START(nh_sse2)
+SYM_TYPED_FUNC_START(nh_sse2)
 
 	movdqu		0x00(KEY), K0
 	movdqu		0x10(KEY), K1
diff --git a/arch/x86/crypto/nhpoly1305-avx2-glue.c b/arch/x86/crypto/nhpoly1305-avx2-glue.c
index 8ea5ab0f1ca74..46b036204ed91 100644
--- a/arch/x86/crypto/nhpoly1305-avx2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-avx2-glue.c
@@ -14,14 +14,7 @@
 #include <asm/simd.h>
 
 asmlinkage void nh_avx2(const u32 *key, const u8 *message, size_t message_len,
-			u8 hash[NH_HASH_BYTES]);
-
-/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
-static void _nh_avx2(const u32 *key, const u8 *message, size_t message_len,
-		     __le64 hash[NH_NUM_PASSES])
-{
-	nh_avx2(key, message, message_len, (u8 *)hash);
-}
+			__le64 hash[NH_NUM_PASSES]);
 
 static int nhpoly1305_avx2_update(struct shash_desc *desc,
 				  const u8 *src, unsigned int srclen)
@@ -33,7 +26,7 @@ static int nhpoly1305_avx2_update(struct shash_desc *desc,
 		unsigned int n = min_t(unsigned int, srclen, SZ_4K);
 
 		kernel_fpu_begin();
-		crypto_nhpoly1305_update_helper(desc, src, n, _nh_avx2);
+		crypto_nhpoly1305_update_helper(desc, src, n, nh_avx2);
 		kernel_fpu_end();
 		src += n;
 		srclen -= n;
diff --git a/arch/x86/crypto/nhpoly1305-sse2-glue.c b/arch/x86/crypto/nhpoly1305-sse2-glue.c
index 2b353d42ed13f..4a4970d751076 100644
--- a/arch/x86/crypto/nhpoly1305-sse2-glue.c
+++ b/arch/x86/crypto/nhpoly1305-sse2-glue.c
@@ -14,14 +14,7 @@
 #include <asm/simd.h>
 
 asmlinkage void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
-			u8 hash[NH_HASH_BYTES]);
-
-/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
-static void _nh_sse2(const u32 *key, const u8 *message, size_t message_len,
-		     __le64 hash[NH_NUM_PASSES])
-{
-	nh_sse2(key, message, message_len, (u8 *)hash);
-}
+			__le64 hash[NH_NUM_PASSES]);
 
 static int nhpoly1305_sse2_update(struct shash_desc *desc,
 				  const u8 *src, unsigned int srclen)
@@ -33,7 +26,7 @@ static int nhpoly1305_sse2_update(struct shash_desc *desc,
 		unsigned int n = min_t(unsigned int, srclen, SZ_4K);
 
 		kernel_fpu_begin();
-		crypto_nhpoly1305_update_helper(desc, src, n, _nh_sse2);
+		crypto_nhpoly1305_update_helper(desc, src, n, nh_sse2);
 		kernel_fpu_end();
 		src += n;
 		srclen -= n;
-- 
GitLab


From 32f34bf7e44eeaa241fb845d6f52af5104bc30fd Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:13 -0800
Subject: [PATCH 2754/4122] crypto: x86/sha1 - fix possible crash with CFI
 enabled

sha1_transform_ssse3(), sha1_transform_avx(), and sha1_ni_transform()
(but not sha1_transform_avx2()) are called via indirect function calls.
Therefore they need to use SYM_TYPED_FUNC_START instead of
SYM_FUNC_START to cause their type hashes to be emitted when the kernel
is built with CONFIG_CFI_CLANG=y.  Otherwise, the code crashes with a
CFI failure (if the compiler didn't happen to optimize out the indirect
calls).

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha1_ni_asm.S    | 3 ++-
 arch/x86/crypto/sha1_ssse3_asm.S | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S
index 2f94ec0e763bf..3cae5a1bb3d6e 100644
--- a/arch/x86/crypto/sha1_ni_asm.S
+++ b/arch/x86/crypto/sha1_ni_asm.S
@@ -54,6 +54,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 #define DIGEST_PTR	%rdi	/* 1st arg */
 #define DATA_PTR	%rsi	/* 2nd arg */
@@ -93,7 +94,7 @@
  */
 .text
 .align 32
-SYM_FUNC_START(sha1_ni_transform)
+SYM_TYPED_FUNC_START(sha1_ni_transform)
 	push		%rbp
 	mov		%rsp, %rbp
 	sub		$FRAME_SIZE, %rsp
diff --git a/arch/x86/crypto/sha1_ssse3_asm.S b/arch/x86/crypto/sha1_ssse3_asm.S
index 263f916362e02..f54988c80eb40 100644
--- a/arch/x86/crypto/sha1_ssse3_asm.S
+++ b/arch/x86/crypto/sha1_ssse3_asm.S
@@ -25,6 +25,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 #define CTX	%rdi	// arg1
 #define BUF	%rsi	// arg2
@@ -67,7 +68,7 @@
  * param: function's name
  */
 .macro SHA1_VECTOR_ASM  name
-	SYM_FUNC_START(\name)
+	SYM_TYPED_FUNC_START(\name)
 
 	push	%rbx
 	push	%r12
-- 
GitLab


From 19940ebbb59c12146d05c5f8acd873197b290648 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:14 -0800
Subject: [PATCH 2755/4122] crypto: x86/sha256 - fix possible crash with CFI
 enabled

sha256_transform_ssse3(), sha256_transform_avx(),
sha256_transform_rorx(), and sha256_ni_transform() are called via
indirect function calls.  Therefore they need to use
SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type
hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y.
Otherwise, the code crashes with a CFI failure (if the compiler didn't
happen to optimize out the indirect calls).

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha256-avx-asm.S   | 3 ++-
 arch/x86/crypto/sha256-avx2-asm.S  | 3 ++-
 arch/x86/crypto/sha256-ssse3-asm.S | 3 ++-
 arch/x86/crypto/sha256_ni_asm.S    | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S
index 3baa1ec390974..06ea30c20828d 100644
--- a/arch/x86/crypto/sha256-avx-asm.S
+++ b/arch/x86/crypto/sha256-avx-asm.S
@@ -48,6 +48,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 ## assume buffers not aligned
 #define    VMOVDQ vmovdqu
@@ -346,7 +347,7 @@ a = TMP_
 ## arg 3 : Num blocks
 ########################################################################
 .text
-SYM_FUNC_START(sha256_transform_avx)
+SYM_TYPED_FUNC_START(sha256_transform_avx)
 .align 32
 	pushq   %rbx
 	pushq   %r12
diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S
index 9bcdbc47b8b4b..2d2be531a11ed 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -49,6 +49,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 ## assume buffers not aligned
 #define	VMOVDQ vmovdqu
@@ -523,7 +524,7 @@ STACK_SIZE	= _CTX      + _CTX_SIZE
 ## arg 3 : Num blocks
 ########################################################################
 .text
-SYM_FUNC_START(sha256_transform_rorx)
+SYM_TYPED_FUNC_START(sha256_transform_rorx)
 .align 32
 	pushq	%rbx
 	pushq	%r12
diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S
index c4a5db612c327..7db28839108dd 100644
--- a/arch/x86/crypto/sha256-ssse3-asm.S
+++ b/arch/x86/crypto/sha256-ssse3-asm.S
@@ -47,6 +47,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 ## assume buffers not aligned
 #define    MOVDQ movdqu
@@ -355,7 +356,7 @@ a = TMP_
 ## arg 3 : Num blocks
 ########################################################################
 .text
-SYM_FUNC_START(sha256_transform_ssse3)
+SYM_TYPED_FUNC_START(sha256_transform_ssse3)
 .align 32
 	pushq   %rbx
 	pushq   %r12
diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S
index 94d50dd27cb53..47f93937f798a 100644
--- a/arch/x86/crypto/sha256_ni_asm.S
+++ b/arch/x86/crypto/sha256_ni_asm.S
@@ -54,6 +54,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 #define DIGEST_PTR	%rdi	/* 1st arg */
 #define DATA_PTR	%rsi	/* 2nd arg */
@@ -97,7 +98,7 @@
 
 .text
 .align 32
-SYM_FUNC_START(sha256_ni_transform)
+SYM_TYPED_FUNC_START(sha256_ni_transform)
 
 	shl		$6, NUM_BLKS		/*  convert to bytes */
 	jz		.Ldone_hash
-- 
GitLab


From a1d72fa33186ac69c7d8120c71f41ea4fc23dcc9 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:15 -0800
Subject: [PATCH 2756/4122] crypto: x86/sha512 - fix possible crash with CFI
 enabled

sha512_transform_ssse3(), sha512_transform_avx(), and
sha512_transform_rorx() are called via indirect function calls.
Therefore they need to use SYM_TYPED_FUNC_START instead of
SYM_FUNC_START to cause their type hashes to be emitted when the kernel
is built with CONFIG_CFI_CLANG=y.  Otherwise, the code crashes with a
CFI failure (if the compiler didn't happen to optimize out the indirect
calls).

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sha512-avx-asm.S   | 3 ++-
 arch/x86/crypto/sha512-avx2-asm.S  | 3 ++-
 arch/x86/crypto/sha512-ssse3-asm.S | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S
index 1fefe6dd3a9e2..b0984f19fdb40 100644
--- a/arch/x86/crypto/sha512-avx-asm.S
+++ b/arch/x86/crypto/sha512-avx-asm.S
@@ -48,6 +48,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 .text
 
@@ -273,7 +274,7 @@ frame_size = frame_WK + WK_SIZE
 # of SHA512 message blocks.
 # "blocks" is the message length in SHA512 blocks
 ########################################################################
-SYM_FUNC_START(sha512_transform_avx)
+SYM_TYPED_FUNC_START(sha512_transform_avx)
 	test msglen, msglen
 	je nowork
 
diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S
index 5cdaab7d69015..b1ca99055ef99 100644
--- a/arch/x86/crypto/sha512-avx2-asm.S
+++ b/arch/x86/crypto/sha512-avx2-asm.S
@@ -50,6 +50,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 .text
 
@@ -565,7 +566,7 @@ frame_size = frame_CTX + CTX_SIZE
 # of SHA512 message blocks.
 # "blocks" is the message length in SHA512 blocks
 ########################################################################
-SYM_FUNC_START(sha512_transform_rorx)
+SYM_TYPED_FUNC_START(sha512_transform_rorx)
 	# Save GPRs
 	push	%rbx
 	push	%r12
diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S
index b84c22e06c5f7..c06afb5270e5f 100644
--- a/arch/x86/crypto/sha512-ssse3-asm.S
+++ b/arch/x86/crypto/sha512-ssse3-asm.S
@@ -48,6 +48,7 @@
 ########################################################################
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 .text
 
@@ -274,7 +275,7 @@ frame_size = frame_WK + WK_SIZE
 # of SHA512 message blocks.
 # "blocks" is the message length in SHA512 blocks.
 ########################################################################
-SYM_FUNC_START(sha512_transform_ssse3)
+SYM_TYPED_FUNC_START(sha512_transform_ssse3)
 
 	test msglen, msglen
 	je nowork
-- 
GitLab


From 8ba490d9f5a56f52091644325a32d3f71a982776 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:16 -0800
Subject: [PATCH 2757/4122] crypto: x86/sm3 - fix possible crash with CFI
 enabled

sm3_transform_avx() is called via indirect function calls.  Therefore it
needs to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause its
type hash to be emitted when the kernel is built with
CONFIG_CFI_CLANG=y.  Otherwise, the code crashes with a CFI failure (if
the compiler didn't happen to optimize out the indirect call).

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sm3-avx-asm_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S
index b12b9efb5ec51..8fc5ac681fd63 100644
--- a/arch/x86/crypto/sm3-avx-asm_64.S
+++ b/arch/x86/crypto/sm3-avx-asm_64.S
@@ -12,6 +12,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/frame.h>
 
 /* Context structure */
@@ -328,7 +329,7 @@
  *                        const u8 *data, int nblocks);
  */
 .align 16
-SYM_FUNC_START(sm3_transform_avx)
+SYM_TYPED_FUNC_START(sm3_transform_avx)
 	/* input:
 	 *	%rdi: ctx, CTX
 	 *	%rsi: data (64*nblks bytes)
-- 
GitLab


From 2d203c46a0fa5df0785383b13b722483e1fd27a8 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:17 -0800
Subject: [PATCH 2758/4122] crypto: x86/sm4 - fix crash with CFI enabled

sm4_aesni_avx_ctr_enc_blk8(), sm4_aesni_avx_cbc_dec_blk8(),
sm4_aesni_avx_cfb_dec_blk8(), sm4_aesni_avx2_ctr_enc_blk16(),
sm4_aesni_avx2_cbc_dec_blk16(), and sm4_aesni_avx2_cfb_dec_blk16() are
called via indirect function calls.  Therefore they need to use
SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause their type
hashes to be emitted when the kernel is built with CONFIG_CFI_CLANG=y.
Otherwise, the code crashes with a CFI failure.

(Or at least that should be the case.  For some reason the CFI checks in
sm4_avx_cbc_decrypt(), sm4_avx_cfb_decrypt(), and sm4_avx_ctr_crypt()
are not always being generated, using current tip-of-tree clang.
Anyway, this patch is a good idea anyway.)

Fixes: ccace936eec7 ("x86: Add types to indirectly called assembly functions")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/sm4-aesni-avx-asm_64.S  | 7 ++++---
 arch/x86/crypto/sm4-aesni-avx2-asm_64.S | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/arch/x86/crypto/sm4-aesni-avx-asm_64.S b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
index 4767ab61ff489..22b6560eb9e1e 100644
--- a/arch/x86/crypto/sm4-aesni-avx-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx-asm_64.S
@@ -14,6 +14,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/frame.h>
 
 #define rRIP         (%rip)
@@ -420,7 +421,7 @@ SYM_FUNC_END(sm4_aesni_avx_crypt8)
  *                                 const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
+SYM_TYPED_FUNC_START(sm4_aesni_avx_ctr_enc_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (8 blocks)
@@ -495,7 +496,7 @@ SYM_FUNC_END(sm4_aesni_avx_ctr_enc_blk8)
  *                                 const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
+SYM_TYPED_FUNC_START(sm4_aesni_avx_cbc_dec_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (8 blocks)
@@ -545,7 +546,7 @@ SYM_FUNC_END(sm4_aesni_avx_cbc_dec_blk8)
  *                                 const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
+SYM_TYPED_FUNC_START(sm4_aesni_avx_cfb_dec_blk8)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (8 blocks)
diff --git a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
index 4732fe8bb65b6..23ee39a8ada8c 100644
--- a/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/sm4-aesni-avx2-asm_64.S
@@ -14,6 +14,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/frame.h>
 
 #define rRIP         (%rip)
@@ -282,7 +283,7 @@ SYM_FUNC_END(__sm4_crypt_blk16)
  *                                   const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
+SYM_TYPED_FUNC_START(sm4_aesni_avx2_ctr_enc_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (16 blocks)
@@ -395,7 +396,7 @@ SYM_FUNC_END(sm4_aesni_avx2_ctr_enc_blk16)
  *                                   const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
+SYM_TYPED_FUNC_START(sm4_aesni_avx2_cbc_dec_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (16 blocks)
@@ -449,7 +450,7 @@ SYM_FUNC_END(sm4_aesni_avx2_cbc_dec_blk16)
  *                                   const u8 *src, u8 *iv)
  */
 .align 8
-SYM_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
+SYM_TYPED_FUNC_START(sm4_aesni_avx2_cfb_dec_blk16)
 	/* input:
 	 *	%rdi: round key array, CTX
 	 *	%rsi: dst (16 blocks)
-- 
GitLab


From e5e1c67e2f01d924e9583b67a907934948d852aa Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:18 -0800
Subject: [PATCH 2759/4122] crypto: arm64/nhpoly1305 - eliminate unnecessary
 CFI wrapper

Since the CFI implementation now supports indirect calls to assembly
functions, take advantage of that rather than use a wrapper function.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/nh-neon-core.S         |  5 +++--
 arch/arm64/crypto/nhpoly1305-neon-glue.c | 11 ++---------
 2 files changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/arm64/crypto/nh-neon-core.S b/arch/arm64/crypto/nh-neon-core.S
index 51c0a534ef87c..13eda08fda1e5 100644
--- a/arch/arm64/crypto/nh-neon-core.S
+++ b/arch/arm64/crypto/nh-neon-core.S
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 
 	KEY		.req	x0
 	MESSAGE		.req	x1
@@ -58,11 +59,11 @@
 
 /*
  * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
+ *		__le64 hash[NH_NUM_PASSES])
  *
  * It's guaranteed that message_len % 16 == 0.
  */
-SYM_FUNC_START(nh_neon)
+SYM_TYPED_FUNC_START(nh_neon)
 
 	ld1		{K0.4s,K1.4s}, [KEY], #32
 	  movi		PASS0_SUMS.2d, #0
diff --git a/arch/arm64/crypto/nhpoly1305-neon-glue.c b/arch/arm64/crypto/nhpoly1305-neon-glue.c
index c5405e6a6db76..cd882c35d9252 100644
--- a/arch/arm64/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm64/crypto/nhpoly1305-neon-glue.c
@@ -14,14 +14,7 @@
 #include <linux/module.h>
 
 asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
-			u8 hash[NH_HASH_BYTES]);
-
-/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
-static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
-		     __le64 hash[NH_NUM_PASSES])
-{
-	nh_neon(key, message, message_len, (u8 *)hash);
-}
+			__le64 hash[NH_NUM_PASSES]);
 
 static int nhpoly1305_neon_update(struct shash_desc *desc,
 				  const u8 *src, unsigned int srclen)
@@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
 		unsigned int n = min_t(unsigned int, srclen, SZ_4K);
 
 		kernel_neon_begin();
-		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+		crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
 		kernel_neon_end();
 		src += n;
 		srclen -= n;
-- 
GitLab


From be8f6b6496076588fd49cbe5bfaaf3ab883eb779 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:19 -0800
Subject: [PATCH 2760/4122] crypto: arm64/sm3 - fix possible crash with CFI
 enabled

sm3_neon_transform() is called via indirect function calls.  Therefore
it needs to use SYM_TYPED_FUNC_START instead of SYM_FUNC_START to cause
its type hash to be emitted when the kernel is built with
CONFIG_CFI_CLANG=y.  Otherwise, the code crashes with a CFI failure (if
the compiler didn't happen to optimize out the indirect call).

Fixes: c50d32859e70 ("arm64: Add types to indirect called assembly functions")
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/sm3-neon-core.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/crypto/sm3-neon-core.S b/arch/arm64/crypto/sm3-neon-core.S
index 3e3b4e5c736fc..4357e0e51be38 100644
--- a/arch/arm64/crypto/sm3-neon-core.S
+++ b/arch/arm64/crypto/sm3-neon-core.S
@@ -9,6 +9,7 @@
  */
 
 #include <linux/linkage.h>
+#include <linux/cfi_types.h>
 #include <asm/assembler.h>
 
 /* Context structure */
@@ -351,7 +352,7 @@
 	 */
 	.text
 .align 3
-SYM_FUNC_START(sm3_neon_transform)
+SYM_TYPED_FUNC_START(sm3_neon_transform)
 	ldp		ra, rb, [RSTATE, #0]
 	ldp		rc, rd, [RSTATE, #8]
 	ldp		re, rf, [RSTATE, #16]
-- 
GitLab


From cc7acaadf6ab5d44f43170eb568e1cc9739c3df4 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:20 -0800
Subject: [PATCH 2761/4122] crypto: arm/nhpoly1305 - eliminate unnecessary CFI
 wrapper

The arm architecture doesn't support CFI yet, and even if it did, the
new CFI implementation supports indirect calls to assembly functions.
Therefore, there's no need to use a wrapper function for nh_neon().

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/nh-neon-core.S         |  2 +-
 arch/arm/crypto/nhpoly1305-neon-glue.c | 11 ++---------
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/arch/arm/crypto/nh-neon-core.S b/arch/arm/crypto/nh-neon-core.S
index 434d80ab531c2..01620a0782ca9 100644
--- a/arch/arm/crypto/nh-neon-core.S
+++ b/arch/arm/crypto/nh-neon-core.S
@@ -69,7 +69,7 @@
 
 /*
  * void nh_neon(const u32 *key, const u8 *message, size_t message_len,
- *		u8 hash[NH_HASH_BYTES])
+ *		__le64 hash[NH_NUM_PASSES])
  *
  * It's guaranteed that message_len % 16 == 0.
  */
diff --git a/arch/arm/crypto/nhpoly1305-neon-glue.c b/arch/arm/crypto/nhpoly1305-neon-glue.c
index ffa8d73fe722c..e93e41ff26566 100644
--- a/arch/arm/crypto/nhpoly1305-neon-glue.c
+++ b/arch/arm/crypto/nhpoly1305-neon-glue.c
@@ -14,14 +14,7 @@
 #include <linux/module.h>
 
 asmlinkage void nh_neon(const u32 *key, const u8 *message, size_t message_len,
-			u8 hash[NH_HASH_BYTES]);
-
-/* wrapper to avoid indirect call to assembly, which doesn't work with CFI */
-static void _nh_neon(const u32 *key, const u8 *message, size_t message_len,
-		     __le64 hash[NH_NUM_PASSES])
-{
-	nh_neon(key, message, message_len, (u8 *)hash);
-}
+			__le64 hash[NH_NUM_PASSES]);
 
 static int nhpoly1305_neon_update(struct shash_desc *desc,
 				  const u8 *src, unsigned int srclen)
@@ -33,7 +26,7 @@ static int nhpoly1305_neon_update(struct shash_desc *desc,
 		unsigned int n = min_t(unsigned int, srclen, SZ_4K);
 
 		kernel_neon_begin();
-		crypto_nhpoly1305_update_helper(desc, src, n, _nh_neon);
+		crypto_nhpoly1305_update_helper(desc, src, n, nh_neon);
 		kernel_neon_end();
 		src += n;
 		srclen -= n;
-- 
GitLab


From c060e16ddb51a92b1f7fa84c628d287ea5799864 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Fri, 18 Nov 2022 11:44:21 -0800
Subject: [PATCH 2762/4122] Revert "crypto: shash - avoid comparing pointers to
 exported functions under CFI"

This reverts commit 22ca9f4aaf431a9413dcc115dd590123307f274f because CFI
no longer breaks cross-module function address equality, so
crypto_shash_alg_has_setkey() can now be an inline function like before.

This commit should not be backported to kernels that don't have the new
CFI implementation.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c                 | 18 +++---------------
 include/crypto/internal/hash.h |  8 +++++++-
 2 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/crypto/shash.c b/crypto/shash.c
index 4c88e63b3350f..0f85431588267 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -20,24 +20,12 @@
 
 static const struct crypto_type crypto_shash_type;
 
-static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
-			   unsigned int keylen)
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen)
 {
 	return -ENOSYS;
 }
-
-/*
- * Check whether an shash algorithm has a setkey function.
- *
- * For CFI compatibility, this must not be an inline function.  This is because
- * when CFI is enabled, modules won't get the same address for shash_no_setkey
- * (if it were exported, which inlining would require) as the core kernel will.
- */
-bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
-{
-	return alg->setkey != shash_no_setkey;
-}
-EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey);
+EXPORT_SYMBOL_GPL(shash_no_setkey);
 
 static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
 				  unsigned int keylen)
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 25806141db591..0a288dddcf5be 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -75,7 +75,13 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count);
 int ahash_register_instance(struct crypto_template *tmpl,
 			    struct ahash_instance *inst);
 
-bool crypto_shash_alg_has_setkey(struct shash_alg *alg);
+int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+		    unsigned int keylen);
+
+static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+	return alg->setkey != shash_no_setkey;
+}
 
 static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg)
 {
-- 
GitLab


From b8ed0bff96393cbeef66f00f34fda2a4960b75e4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:40 +0100
Subject: [PATCH 2763/4122] crypto: atmel-ecc - Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-ecc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/atmel-ecc.c b/drivers/crypto/atmel-ecc.c
index 82bf15d495614..53100fb9b07bd 100644
--- a/drivers/crypto/atmel-ecc.c
+++ b/drivers/crypto/atmel-ecc.c
@@ -311,9 +311,9 @@ static struct kpp_alg atmel_ecdh_nist_p256 = {
 	},
 };
 
-static int atmel_ecc_probe(struct i2c_client *client,
-			   const struct i2c_device_id *id)
+static int atmel_ecc_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct atmel_i2c_client_priv *i2c_priv;
 	int ret;
 
@@ -390,7 +390,7 @@ static struct i2c_driver atmel_ecc_driver = {
 		.name	= "atmel-ecc",
 		.of_match_table = of_match_ptr(atmel_ecc_dt_ids),
 	},
-	.probe		= atmel_ecc_probe,
+	.probe_new	= atmel_ecc_probe,
 	.remove		= atmel_ecc_remove,
 	.id_table	= atmel_ecc_id,
 };
-- 
GitLab


From fa2ca3b275874d61f42e68f5eb13645bbeb5d72b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:41 +0100
Subject: [PATCH 2764/4122] crypto: atmel-sha204a - Convert to i2c's
 .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/atmel-sha204a.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index c0103e7fc2e75..272a06f0b5886 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -91,9 +91,9 @@ static int atmel_sha204a_rng_read(struct hwrng *rng, void *data, size_t max,
 	return max;
 }
 
-static int atmel_sha204a_probe(struct i2c_client *client,
-			       const struct i2c_device_id *id)
+static int atmel_sha204a_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct atmel_i2c_client_priv *i2c_priv;
 	int ret;
 
@@ -142,7 +142,7 @@ static const struct i2c_device_id atmel_sha204a_id[] = {
 MODULE_DEVICE_TABLE(i2c, atmel_sha204a_id);
 
 static struct i2c_driver atmel_sha204a_driver = {
-	.probe			= atmel_sha204a_probe,
+	.probe_new		= atmel_sha204a_probe,
 	.remove			= atmel_sha204a_remove,
 	.id_table		= atmel_sha204a_id,
 
-- 
GitLab


From 3901355624d14afe3230252cb36bc3da8ff6890e Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 19 Nov 2022 17:48:43 +0800
Subject: [PATCH 2765/4122] crypto: hisilicon/qm - fix 'QM_XEQ_DEPTH_CAP' mask
 value

'QM_XEQ_DEPTH_CAP' mask value is GENMASK(31, 0) instead of GENMASK(15, 0).
If the mask value is incorrect, will cause abnormal events cannot be
handled. So fix it.

Fixes: 129a9f340172 ("crypto: hisilicon/qm - get qp num and depth from hardware registers")
Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 36d70b9f6117c..9072bee7336fe 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -336,7 +336,7 @@ static const struct hisi_qm_cap_info qm_cap_info_vf[] = {
 static const struct hisi_qm_cap_info qm_basic_info[] = {
 	{QM_TOTAL_QP_NUM_CAP,   0x100158, 0,  GENMASK(10, 0), 0x1000,    0x400,     0x400},
 	{QM_FUNC_MAX_QP_CAP,    0x100158, 11, GENMASK(10, 0), 0x1000,    0x400,     0x400},
-	{QM_XEQ_DEPTH_CAP,      0x3104,   0,  GENMASK(15, 0), 0x800,     0x4000800, 0x4000800},
+	{QM_XEQ_DEPTH_CAP,      0x3104,   0,  GENMASK(31, 0), 0x800,     0x4000800, 0x4000800},
 	{QM_QP_DEPTH_CAP,       0x3108,   0,  GENMASK(31, 0), 0x4000400, 0x4000400, 0x4000400},
 	{QM_EQ_IRQ_TYPE_CAP,    0x310c,   0,  GENMASK(31, 0), 0x10000,   0x10000,   0x10000},
 	{QM_AEQ_IRQ_TYPE_CAP,   0x3110,   0,  GENMASK(31, 0), 0x0,       0x10001,   0x10001},
-- 
GitLab


From 5f9c97a0e6dc873f662528ae591f2bd500eb5940 Mon Sep 17 00:00:00 2001
From: Weili Qian <qianweili@huawei.com>
Date: Sat, 19 Nov 2022 17:50:03 +0800
Subject: [PATCH 2766/4122] crypto: hisilicon/qm - add device status check when
 start fails

In function 'hisi_qm_resume', if the device fails to be started,
directly returning error code will cause the device to be unavailable.
However, the failure may be caused by device error, which will be
reported to the driver, and driver can reset and restart device.
Therefore, check device status instead of returning error code
directly. Returns 0 if device error has occurred, otherwise returns
error code.

Signed-off-by: Weili Qian <qianweili@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/qm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index 9072bee7336fe..007ac7a69ce74 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -5468,8 +5468,14 @@ int hisi_qm_resume(struct device *dev)
 	}
 
 	ret = hisi_qm_start(qm);
-	if (ret)
-		pci_err(pdev, "failed to start qm(%d)\n", ret);
+	if (ret) {
+		if (qm_check_dev_error(qm)) {
+			pci_info(pdev, "failed to start qm due to device error, device will be reset!\n");
+			return 0;
+		}
+
+		pci_err(pdev, "failed to start qm(%d)!\n", ret);
+	}
 
 	return ret;
 }
-- 
GitLab


From 83478938f78fd640c72af83d739a90c840e1b876 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 19 Nov 2022 14:42:59 +0100
Subject: [PATCH 2767/4122] hwrng: u2fzero - account for high quality RNG

The U2F zero apparently has a real TRNG in it with maximum quality, not
one with quality of "1", which was likely a misinterpretation of the
field as a boolean. So remove the assignment entirely, so that we get
the default quality setting.

In the u2f-zero firmware, the 0x21 RNG command used by this driver is
handled as such [1]:

  case U2F_CUSTOM_GET_RNG:
    if (atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1,ATECC_RNG_P2,
      NULL, 0,
      appdata.tmp,
      sizeof(appdata.tmp), &res) == 0 )
    {
      memmove(msg->pkt.init.payload, res.buf, 32);
      U2FHID_SET_LEN(msg, 32);
      usb_write((uint8_t*)msg, 64);
    }
    else
    {
      U2FHID_SET_LEN(msg, 0);
      usb_write((uint8_t*)msg, 64);
    }

This same call to `atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1,
ATECC_RNG_P2,...)` is then also used in the token's cryptographically
critical "u2f_new_keypair" function, as its rather straightforward
source of random bytes [2]:

  int8_t u2f_new_keypair(uint8_t * handle, uint8_t * appid, uint8_t * pubkey)
  {
    struct atecc_response res;
    uint8_t private_key[36];
    int i;

    watchdog();

    if (atecc_send_recv(ATECC_CMD_RNG,ATECC_RNG_P1,ATECC_RNG_P2,
      NULL, 0,
      appdata.tmp,
      sizeof(appdata.tmp), &res) != 0 )
    {
      return -1;
    }

So it seems rather plain that the ATECC RNG is considered to provide
good random numbers.

[1] https://github.com/conorpp/u2f-zero/blob/master/firmware/src/custom.c
[2] https://github.com/conorpp/u2f-zero/blob/master/firmware/src/u2f_atecc.c

Cc: Andrej Shadura <andrew.shadura@collabora.co.uk>
Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Andrej Shadura <andrew.shadura@collabora.co.uk>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/hid/hid-u2fzero.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/hid/hid-u2fzero.c b/drivers/hid/hid-u2fzero.c
index ad489caf53ad8..744a91e6e78c5 100644
--- a/drivers/hid/hid-u2fzero.c
+++ b/drivers/hid/hid-u2fzero.c
@@ -261,7 +261,6 @@ static int u2fzero_init_hwrng(struct u2fzero_device *dev,
 
 	dev->hwrng.name = dev->rng_name;
 	dev->hwrng.read = u2fzero_rng_read;
-	dev->hwrng.quality = 1;
 
 	return devm_hwrng_register(&dev->hdev->dev, &dev->hwrng);
 }
-- 
GitLab


From 70aa0a5551f6a752f3a071529fffde903f1a2637 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:13:56 +0000
Subject: [PATCH 2768/4122] dt-bindings: misc: qcom,fastrpc: increase allowed
 iommus entries

The fastrpc components on the SM8550 SoC can require up to 3 IOMMU
entries, this bumps the maxItems to 3 for this purpose.

Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-2-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml b/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml
index d7576f8ac94b4..1ab9588cdd896 100644
--- a/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml
+++ b/Documentation/devicetree/bindings/misc/qcom,fastrpc.yaml
@@ -79,7 +79,7 @@ patternProperties:
 
       iommus:
         minItems: 1
-        maxItems: 2
+        maxItems: 3
 
       qcom,nsessions:
         $ref: /schemas/types.yaml#/definitions/uint32
-- 
GitLab


From 1959ab9edccd3de4bc8a876f97ce269bb9beeb31 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:13:57 +0000
Subject: [PATCH 2769/4122] misc: fastrpc: Rename audio protection domain to
 root

The AUDIO_PD will be done via static pd, so the proper name here is
actually ROOT_PD.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-3-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 7ff0b63c25e37..f2bda08adca77 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -83,7 +83,7 @@
 #define FASTRPC_RMID_INIT_MEM_UNMAP    11
 
 /* Protection Domain(PD) ids */
-#define AUDIO_PD	(0) /* also GUEST_OS PD? */
+#define ROOT_PD		(0)
 #define USER_PD		(1)
 #define SENSORS_PD	(2)
 
@@ -1886,7 +1886,7 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 		err = fastrpc_invoke(fl, argp);
 		break;
 	case FASTRPC_IOCTL_INIT_ATTACH:
-		err = fastrpc_init_attach(fl, AUDIO_PD);
+		err = fastrpc_init_attach(fl, ROOT_PD);
 		break;
 	case FASTRPC_IOCTL_INIT_ATTACH_SNS:
 		err = fastrpc_init_attach(fl, SENSORS_PD);
-- 
GitLab


From 1ce91d45ba77a4f6bf9209d142d5c89c42cf877a Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:13:58 +0000
Subject: [PATCH 2770/4122] misc: fastrpc: Add reserved mem support

The reserved mem support is needed for CMA heap support, which will be
used by AUDIOPD.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-4-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index f2bda08adca77..3d5809622a6bd 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/qcom_scm.h>
 #include <uapi/misc/fastrpc.h>
+#include <linux/of_reserved_mem.h>
 
 #define ADSP_DOMAIN_ID (0)
 #define MDSP_DOMAIN_ID (1)
@@ -2065,6 +2066,9 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
 		return -EINVAL;
 	}
 
+	if (of_reserved_mem_device_init_by_idx(rdev, rdev->of_node, 0))
+		dev_info(rdev, "no reserved DMA memory for FASTRPC\n");
+
 	vmcount = of_property_read_variable_u32_array(rdev->of_node,
 				"qcom,vmids", &vmids[0], 0, FASTRPC_MAX_VMIDS);
 	if (vmcount < 0)
-- 
GitLab


From 6f18c7e845346f365e08613fdc47a60fc201aedb Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:13:59 +0000
Subject: [PATCH 2771/4122] misc: fastrpc: Add fastrpc_remote_heap_alloc

Split fastrpc_buf_alloc in such a way it allows allocation of remote
heap too and add fastrpc_remote_heap_alloc to do so.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-5-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 3d5809622a6bd..8b43fe5207fb6 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -369,7 +369,7 @@ static void fastrpc_buf_free(struct fastrpc_buf *buf)
 	kfree(buf);
 }
 
-static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+static int __fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
 			     u64 size, struct fastrpc_buf **obuf)
 {
 	struct fastrpc_buf *buf;
@@ -397,14 +397,37 @@ static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
 		return -ENOMEM;
 	}
 
+	*obuf = buf;
+
+	return 0;
+}
+
+static int fastrpc_buf_alloc(struct fastrpc_user *fl, struct device *dev,
+			     u64 size, struct fastrpc_buf **obuf)
+{
+	int ret;
+	struct fastrpc_buf *buf;
+
+	ret = __fastrpc_buf_alloc(fl, dev, size, obuf);
+	if (ret)
+		return ret;
+
+	buf = *obuf;
+
 	if (fl->sctx && fl->sctx->sid)
 		buf->phys += ((u64)fl->sctx->sid << 32);
 
-	*obuf = buf;
-
 	return 0;
 }
 
+static int fastrpc_remote_heap_alloc(struct fastrpc_user *fl, struct device *dev,
+				     u64 size, struct fastrpc_buf **obuf)
+{
+	struct device *rdev = &fl->cctx->rpdev->dev;
+
+	return  __fastrpc_buf_alloc(fl, rdev, size, obuf);
+}
+
 static void fastrpc_channel_ctx_free(struct kref *ref)
 {
 	struct fastrpc_channel_ctx *cctx;
-- 
GitLab


From 334f1a1cbe032d85fd58e771629e3a3b373b96d5 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:00 +0000
Subject: [PATCH 2772/4122] misc: fastrpc: Use fastrpc_map_put in
 fastrpc_map_create on fail

Move the kref_init right after the allocation so that we can use
fastrpc_map_put on any following error case.

Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-6-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 8b43fe5207fb6..332626df5d395 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -735,6 +735,8 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&map->node);
+	kref_init(&map->refcount);
+
 	map->fl = fl;
 	map->fd = fd;
 	map->buf = dma_buf_get(fd);
@@ -761,7 +763,6 @@ static int fastrpc_map_create(struct fastrpc_user *fl, int fd,
 	map->size = len;
 	map->va = sg_virt(map->table->sgl);
 	map->len = len;
-	kref_init(&map->refcount);
 
 	if (attr & FASTRPC_ATTR_SECUREMAP) {
 		/*
@@ -791,7 +792,7 @@ map_err:
 attach_err:
 	dma_buf_put(map->buf);
 get_err:
-	kfree(map);
+	fastrpc_map_put(map);
 
 	return err;
 }
-- 
GitLab


From 72fa6f7820c4cf96c5f7aabc4e54bdf52d1e2ac2 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:01 +0000
Subject: [PATCH 2773/4122] misc: fastrpc: Rework fastrpc_req_munmap

Move the lookup of the munmap request to the fastrpc_req_munmap and pass
on only the buf to the lower level fastrpc_req_munmap_impl. That way
we can use the lower level fastrpc_req_munmap_impl on error path in
fastrpc_req_mmap to free the buf without searching for the munmap
request it belongs to.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-7-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 47 +++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 24 deletions(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 332626df5d395..4590a11f7316f 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1627,30 +1627,14 @@ static int fastrpc_get_dsp_info(struct fastrpc_user *fl, char __user *argp)
 	return 0;
 }
 
-static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
-				   struct fastrpc_req_munmap *req)
+static int fastrpc_req_munmap_impl(struct fastrpc_user *fl, struct fastrpc_buf *buf)
 {
 	struct fastrpc_invoke_args args[1] = { [0] = { 0 } };
-	struct fastrpc_buf *buf = NULL, *iter, *b;
 	struct fastrpc_munmap_req_msg req_msg;
 	struct device *dev = fl->sctx->dev;
 	int err;
 	u32 sc;
 
-	spin_lock(&fl->lock);
-	list_for_each_entry_safe(iter, b, &fl->mmaps, node) {
-		if ((iter->raddr == req->vaddrout) && (iter->size == req->size)) {
-			buf = iter;
-			break;
-		}
-	}
-	spin_unlock(&fl->lock);
-
-	if (!buf) {
-		dev_err(dev, "mmap not in list\n");
-		return -EINVAL;
-	}
-
 	req_msg.pgid = fl->tgid;
 	req_msg.size = buf->size;
 	req_msg.vaddr = buf->raddr;
@@ -1676,12 +1660,29 @@ static int fastrpc_req_munmap_impl(struct fastrpc_user *fl,
 
 static int fastrpc_req_munmap(struct fastrpc_user *fl, char __user *argp)
 {
+	struct fastrpc_buf *buf = NULL, *iter, *b;
 	struct fastrpc_req_munmap req;
+	struct device *dev = fl->sctx->dev;
 
 	if (copy_from_user(&req, argp, sizeof(req)))
 		return -EFAULT;
 
-	return fastrpc_req_munmap_impl(fl, &req);
+	spin_lock(&fl->lock);
+	list_for_each_entry_safe(iter, b, &fl->mmaps, node) {
+		if ((iter->raddr == req.vaddrout) && (iter->size == req.size)) {
+			buf = iter;
+			break;
+		}
+	}
+	spin_unlock(&fl->lock);
+
+	if (!buf) {
+		dev_err(dev, "mmap\t\tpt 0x%09llx [len 0x%08llx] not in list\n",
+			req.vaddrout, req.size);
+		return -EINVAL;
+	}
+
+	return fastrpc_req_munmap_impl(fl, buf);
 }
 
 static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
@@ -1690,7 +1691,6 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
 	struct fastrpc_buf *buf = NULL;
 	struct fastrpc_mmap_req_msg req_msg;
 	struct fastrpc_mmap_rsp_msg rsp_msg;
-	struct fastrpc_req_munmap req_unmap;
 	struct fastrpc_phy_page pages;
 	struct fastrpc_req_mmap req;
 	struct device *dev = fl->sctx->dev;
@@ -1752,11 +1752,8 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
 	spin_unlock(&fl->lock);
 
 	if (copy_to_user((void __user *)argp, &req, sizeof(req))) {
-		/* unmap the memory and release the buffer */
-		req_unmap.vaddrout = buf->raddr;
-		req_unmap.size = buf->size;
-		fastrpc_req_munmap_impl(fl, &req_unmap);
-		return -EFAULT;
+		err = -EFAULT;
+		goto err_assign;
 	}
 
 	dev_dbg(dev, "mmap\t\tpt 0x%09lx OK [len 0x%08llx]\n",
@@ -1764,6 +1761,8 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
 
 	return 0;
 
+err_assign:
+	fastrpc_req_munmap_impl(fl, buf);
 err_invoke:
 	fastrpc_buf_free(buf);
 
-- 
GitLab


From 0871561055e666da421d779397efcc1e5e964cab Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:02 +0000
Subject: [PATCH 2774/4122] misc: fastrpc: Add support for audiopd

In order to be able to start the adsp listener for audiopd using adsprpcd,
we need to add the corresponding ioctl for creating a static process.
On that ioctl call we need to allocate the heap. Allocating the heap needs
to be happening only once and needs to be kept between different device
open calls, so attach it to the channel context to make sure that remains
until the RPMSG driver is removed. Then, if there are any VMIDs associated
with the static ADSP process, do a call to SCM to assign it.
And then, send all the necessary info related to heap to the DSP.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-8-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c      | 135 ++++++++++++++++++++++++++++++++++++
 include/uapi/misc/fastrpc.h |   7 ++
 2 files changed, 142 insertions(+)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 4590a11f7316f..adc44ba0eff65 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -37,8 +37,20 @@
 #define FASTRPC_DSP_UTILITIES_HANDLE	2
 #define FASTRPC_CTXID_MASK (0xFF0)
 #define INIT_FILELEN_MAX (2 * 1024 * 1024)
+#define INIT_FILE_NAMELEN_MAX (128)
 #define FASTRPC_DEVICE_NAME	"fastrpc"
+
+/* Add memory to static PD pool, protection thru XPU */
+#define ADSP_MMAP_HEAP_ADDR  4
+/* MAP static DMA buffer on DSP User PD */
+#define ADSP_MMAP_DMA_BUFFER  6
+/* Add memory to static PD pool protection thru hypervisor */
+#define ADSP_MMAP_REMOTE_HEAP_ADDR  8
+/* Add memory to userPD pool, for user heap */
 #define ADSP_MMAP_ADD_PAGES 0x1000
+/* Add memory to userPD pool, for LLC heap */
+#define ADSP_MMAP_ADD_PAGES_LLC 0x3000,
+
 #define DSP_UNSUPPORTED_API (0x80000414)
 /* MAX NUMBER of DSP ATTRIBUTES SUPPORTED */
 #define FASTRPC_MAX_DSP_ATTRIBUTES (256)
@@ -72,6 +84,7 @@
 		FASTRPC_BUILD_SCALARS(0, method, in, out, 0, 0)
 
 #define FASTRPC_CREATE_PROCESS_NARGS	6
+#define FASTRPC_CREATE_STATIC_PROCESS_NARGS	3
 /* Remote Method id table */
 #define FASTRPC_RMID_INIT_ATTACH	0
 #define FASTRPC_RMID_INIT_RELEASE	1
@@ -261,6 +274,7 @@ struct fastrpc_channel_ctx {
 	u32 dsp_attributes[FASTRPC_MAX_DSP_ATTRIBUTES];
 	struct fastrpc_device *secure_fdevice;
 	struct fastrpc_device *fdevice;
+	struct fastrpc_buf *remote_heap;
 	bool secure;
 	bool unsigned_support;
 };
@@ -1157,6 +1171,7 @@ bail:
 		spin_unlock(&fl->lock);
 		fastrpc_context_put(ctx);
 	}
+
 	if (err)
 		dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err);
 
@@ -1181,6 +1196,120 @@ static bool is_session_rejected(struct fastrpc_user *fl, bool unsigned_pd_reques
 	return false;
 }
 
+static int fastrpc_init_create_static_process(struct fastrpc_user *fl,
+					      char __user *argp)
+{
+	struct fastrpc_init_create_static init;
+	struct fastrpc_invoke_args *args;
+	struct fastrpc_phy_page pages[1];
+	char *name;
+	int err;
+	struct {
+		int pgid;
+		u32 namelen;
+		u32 pageslen;
+	} inbuf;
+	u32 sc;
+
+	args = kcalloc(FASTRPC_CREATE_STATIC_PROCESS_NARGS, sizeof(*args), GFP_KERNEL);
+	if (!args)
+		return -ENOMEM;
+
+	if (copy_from_user(&init, argp, sizeof(init))) {
+		err = -EFAULT;
+		goto err;
+	}
+
+	if (init.namelen > INIT_FILE_NAMELEN_MAX) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	name = kzalloc(init.namelen, GFP_KERNEL);
+	if (!name) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	if (copy_from_user(name, (void __user *)(uintptr_t)init.name, init.namelen)) {
+		err = -EFAULT;
+		goto err_name;
+	}
+
+	if (!fl->cctx->remote_heap) {
+		err = fastrpc_remote_heap_alloc(fl, fl->sctx->dev, init.memlen,
+						&fl->cctx->remote_heap);
+		if (err)
+			goto err_name;
+
+		/* Map if we have any heap VMIDs associated with this ADSP Static Process. */
+		if (fl->cctx->vmcount) {
+			unsigned int perms = BIT(QCOM_SCM_VMID_HLOS);
+
+			err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
+							(u64)fl->cctx->remote_heap->size, &perms,
+							fl->cctx->vmperms, fl->cctx->vmcount);
+			if (err) {
+				dev_err(fl->sctx->dev, "Failed to assign memory with phys 0x%llx size 0x%llx err %d",
+					fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
+				goto err_map;
+			}
+		}
+	}
+
+	inbuf.pgid = fl->tgid;
+	inbuf.namelen = init.namelen;
+	inbuf.pageslen = 0;
+	fl->pd = USER_PD;
+
+	args[0].ptr = (u64)(uintptr_t)&inbuf;
+	args[0].length = sizeof(inbuf);
+	args[0].fd = -1;
+
+	args[1].ptr = (u64)(uintptr_t)name;
+	args[1].length = inbuf.namelen;
+	args[1].fd = -1;
+
+	pages[0].addr = fl->cctx->remote_heap->phys;
+	pages[0].size = fl->cctx->remote_heap->size;
+
+	args[2].ptr = (u64)(uintptr_t) pages;
+	args[2].length = sizeof(*pages);
+	args[2].fd = -1;
+
+	sc = FASTRPC_SCALARS(FASTRPC_RMID_INIT_CREATE_STATIC, 3, 0);
+
+	err = fastrpc_internal_invoke(fl, true, FASTRPC_INIT_HANDLE,
+				      sc, args);
+	if (err)
+		goto err_invoke;
+
+	kfree(args);
+
+	return 0;
+err_invoke:
+	if (fl->cctx->vmcount) {
+		struct qcom_scm_vmperm perm;
+
+		perm.vmid = QCOM_SCM_VMID_HLOS;
+		perm.perm = QCOM_SCM_PERM_RWX;
+		err = qcom_scm_assign_mem(fl->cctx->remote_heap->phys,
+						(u64)fl->cctx->remote_heap->size,
+						&(fl->cctx->vmperms[0].vmid), &perm, 1);
+		if (err)
+			dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+				fl->cctx->remote_heap->phys, fl->cctx->remote_heap->size, err);
+	}
+err_map:
+	fastrpc_buf_free(fl->cctx->remote_heap);
+err_name:
+	kfree(name);
+err:
+	kfree(args);
+
+	return err;
+}
+
 static int fastrpc_init_create_process(struct fastrpc_user *fl,
 					char __user *argp)
 {
@@ -1915,6 +2044,9 @@ static long fastrpc_device_ioctl(struct file *file, unsigned int cmd,
 	case FASTRPC_IOCTL_INIT_ATTACH_SNS:
 		err = fastrpc_init_attach(fl, SENSORS_PD);
 		break;
+	case FASTRPC_IOCTL_INIT_CREATE_STATIC:
+		err = fastrpc_init_create_static_process(fl, argp);
+		break;
 	case FASTRPC_IOCTL_INIT_CREATE:
 		err = fastrpc_init_create_process(fl, argp);
 		break;
@@ -2184,6 +2316,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
 	if (cctx->secure_fdevice)
 		misc_deregister(&cctx->secure_fdevice->miscdev);
 
+	if (cctx->remote_heap)
+		fastrpc_buf_free(cctx->remote_heap);
+
 	of_platform_depopulate(&rpdev->dev);
 
 	cctx->rpdev = NULL;
diff --git a/include/uapi/misc/fastrpc.h b/include/uapi/misc/fastrpc.h
index 5e29f2cfa42d6..f33d914d8f469 100644
--- a/include/uapi/misc/fastrpc.h
+++ b/include/uapi/misc/fastrpc.h
@@ -13,6 +13,7 @@
 #define FASTRPC_IOCTL_MMAP		_IOWR('R', 6, struct fastrpc_req_mmap)
 #define FASTRPC_IOCTL_MUNMAP		_IOWR('R', 7, struct fastrpc_req_munmap)
 #define FASTRPC_IOCTL_INIT_ATTACH_SNS	_IO('R', 8)
+#define FASTRPC_IOCTL_INIT_CREATE_STATIC _IOWR('R', 9, struct fastrpc_init_create_static)
 #define FASTRPC_IOCTL_MEM_MAP		_IOWR('R', 10, struct fastrpc_mem_map)
 #define FASTRPC_IOCTL_MEM_UNMAP		_IOWR('R', 11, struct fastrpc_mem_unmap)
 #define FASTRPC_IOCTL_GET_DSP_INFO	_IOWR('R', 13, struct fastrpc_ioctl_capability)
@@ -87,6 +88,12 @@ struct fastrpc_init_create {
 	__u64 file;	/* pointer to elf file */
 };
 
+struct fastrpc_init_create_static {
+	__u32 namelen;	/* length of pd process name */
+	__u32 memlen;
+	__u64 name;	/* pd process name */
+};
+
 struct fastrpc_alloc_dma_buf {
 	__s32 fd;	/* fd */
 	__u32 flags;	/* flags to map with */
-- 
GitLab


From 76e8e4ace1ed2c97dba3b1370e0e105e07c572bc Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:03 +0000
Subject: [PATCH 2775/4122] misc: fastrpc: Safekeep mmaps on interrupted invoke

If the userspace daemon is killed in the middle of an invoke (e.g.
audiopd listerner invoke), we need to skip the unmapping on device
release, otherwise the DSP will crash. So lets safekeep all the maps
only if there is in invoke interrupted, by attaching them to the channel
context (which is resident until RPMSG driver is removed), and free them
on RPMSG driver remove.

Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-9-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index adc44ba0eff65..d7a98396ee945 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -275,6 +275,7 @@ struct fastrpc_channel_ctx {
 	struct fastrpc_device *secure_fdevice;
 	struct fastrpc_device *fdevice;
 	struct fastrpc_buf *remote_heap;
+	struct list_head invoke_interrupted_mmaps;
 	bool secure;
 	bool unsigned_support;
 };
@@ -1109,6 +1110,8 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
 				   struct fastrpc_invoke_args *args)
 {
 	struct fastrpc_invoke_ctx *ctx = NULL;
+	struct fastrpc_buf *buf, *b;
+
 	int err = 0;
 
 	if (!fl->sctx)
@@ -1172,6 +1175,13 @@ bail:
 		fastrpc_context_put(ctx);
 	}
 
+	if (err == -ERESTARTSYS) {
+		list_for_each_entry_safe(buf, b, &fl->mmaps, node) {
+			list_del(&buf->node);
+			list_add_tail(&buf->node, &fl->cctx->invoke_interrupted_mmaps);
+		}
+	}
+
 	if (err)
 		dev_dbg(fl->sctx->dev, "Error: Invoke Failed %d\n", err);
 
@@ -2278,6 +2288,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
 	dev_set_drvdata(&rpdev->dev, data);
 	dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32));
 	INIT_LIST_HEAD(&data->users);
+	INIT_LIST_HEAD(&data->invoke_interrupted_mmaps);
 	spin_lock_init(&data->lock);
 	idr_init(&data->ctx_idr);
 	data->domain_id = domain_id;
@@ -2302,6 +2313,7 @@ static void fastrpc_notify_users(struct fastrpc_user *user)
 static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
 {
 	struct fastrpc_channel_ctx *cctx = dev_get_drvdata(&rpdev->dev);
+	struct fastrpc_buf *buf, *b;
 	struct fastrpc_user *user;
 	unsigned long flags;
 
@@ -2316,6 +2328,9 @@ static void fastrpc_rpmsg_remove(struct rpmsg_device *rpdev)
 	if (cctx->secure_fdevice)
 		misc_deregister(&cctx->secure_fdevice->miscdev);
 
+	list_for_each_entry_safe(buf, b, &cctx->invoke_interrupted_mmaps, node)
+		list_del(&buf->node);
+
 	if (cctx->remote_heap)
 		fastrpc_buf_free(cctx->remote_heap);
 
-- 
GitLab


From 532ad70c6d449029cfa3eac8408f427e31334f33 Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:04 +0000
Subject: [PATCH 2776/4122] misc: fastrpc: Add mmap request assigning for
 static PD pool

If the mmap request is to add pages and thre are VMIDs associated with
that context, do a call to SCM to reassign that memory. Do not do this
for remote heap allocation, that is done on init create static process
only.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-10-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index d7a98396ee945..2e49bf1c31ef7 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -1839,8 +1839,9 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
 	if (copy_from_user(&req, argp, sizeof(req)))
 		return -EFAULT;
 
-	if (req.flags != ADSP_MMAP_ADD_PAGES) {
+	if (req.flags != ADSP_MMAP_ADD_PAGES && req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR) {
 		dev_err(dev, "flag not supported 0x%x\n", req.flags);
+
 		return -EINVAL;
 	}
 
@@ -1886,6 +1887,22 @@ static int fastrpc_req_mmap(struct fastrpc_user *fl, char __user *argp)
 	/* let the client know the address to use */
 	req.vaddrout = rsp_msg.vaddr;
 
+	/* Add memory to static PD pool, protection thru hypervisor */
+	if (req.flags != ADSP_MMAP_REMOTE_HEAP_ADDR && fl->cctx->vmcount) {
+		struct qcom_scm_vmperm perm;
+		int err = 0;
+
+		perm.vmid = QCOM_SCM_VMID_HLOS;
+		perm.perm = QCOM_SCM_PERM_RWX;
+		err = qcom_scm_assign_mem(buf->phys, buf->size,
+			&(fl->cctx->vmperms[0].vmid), &perm, 1);
+		if (err) {
+			dev_err(fl->sctx->dev, "Failed to assign memory phys 0x%llx size 0x%llx err %d",
+					buf->phys, buf->size, err);
+			goto err_assign;
+		}
+	}
+
 	spin_lock(&fl->lock);
 	list_add_tail(&buf->node, &fl->mmaps);
 	spin_unlock(&fl->lock);
-- 
GitLab


From 9bde43a0e2f469961e18d0a3496a9a74379c22bf Mon Sep 17 00:00:00 2001
From: Abel Vesa <abel.vesa@linaro.org>
Date: Fri, 25 Nov 2022 07:14:05 +0000
Subject: [PATCH 2777/4122] misc: fastrpc: Add dma_mask to fastrpc_channel_ctx

dma_set_mask_and_coherent only updates the mask to which the device
dma_mask pointer points to. Add a dma_mask to the channel ctx and set
the device dma_mask to point to that, otherwise the dma_set_mask will
return an error and the dma_set_coherent_mask will be skipped too.

Co-developed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Signed-off-by: Abel Vesa <abel.vesa@linaro.org>
Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
Link: https://lore.kernel.org/r/20221125071405.148786-11-srinivas.kandagatla@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/fastrpc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/misc/fastrpc.c b/drivers/misc/fastrpc.c
index 2e49bf1c31ef7..278ab6ca1a5ac 100644
--- a/drivers/misc/fastrpc.c
+++ b/drivers/misc/fastrpc.c
@@ -278,6 +278,7 @@ struct fastrpc_channel_ctx {
 	struct list_head invoke_interrupted_mmaps;
 	bool secure;
 	bool unsigned_support;
+	u64 dma_mask;
 };
 
 struct fastrpc_device {
@@ -2303,6 +2304,7 @@ static int fastrpc_rpmsg_probe(struct rpmsg_device *rpdev)
 	kref_init(&data->refcount);
 
 	dev_set_drvdata(&rpdev->dev, data);
+	rdev->dma_mask = &data->dma_mask;
 	dma_set_mask_and_coherent(rdev, DMA_BIT_MASK(32));
 	INIT_LIST_HEAD(&data->users);
 	INIT_LIST_HEAD(&data->invoke_interrupted_mmaps);
-- 
GitLab


From 78316e9dfc24906dd474630928ed1d3c562b568e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 9 Nov 2022 11:24:03 +0800
Subject: [PATCH 2778/4122] scsi: mpt3sas: Fix possible resource leaks in
 mpt3sas_transport_port_add()

In mpt3sas_transport_port_add(), if sas_rphy_add() returns error,
sas_rphy_free() needs be called to free the resource allocated in
sas_end_device_alloc(). Otherwise a kernel crash will happen:

Unable to handle kernel NULL pointer dereference at virtual address 0000000000000108
CPU: 45 PID: 37020 Comm: bash Kdump: loaded Tainted: G        W          6.1.0-rc1+ #189
pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
pc : device_del+0x54/0x3d0
lr : device_del+0x37c/0x3d0
Call trace:
 device_del+0x54/0x3d0
 attribute_container_class_device_del+0x28/0x38
 transport_remove_classdev+0x6c/0x80
 attribute_container_device_trigger+0x108/0x110
 transport_remove_device+0x28/0x38
 sas_rphy_remove+0x50/0x78 [scsi_transport_sas]
 sas_port_delete+0x30/0x148 [scsi_transport_sas]
 do_sas_phy_delete+0x78/0x80 [scsi_transport_sas]
 device_for_each_child+0x68/0xb0
 sas_remove_children+0x30/0x50 [scsi_transport_sas]
 sas_rphy_remove+0x38/0x78 [scsi_transport_sas]
 sas_port_delete+0x30/0x148 [scsi_transport_sas]
 do_sas_phy_delete+0x78/0x80 [scsi_transport_sas]
 device_for_each_child+0x68/0xb0
 sas_remove_children+0x30/0x50 [scsi_transport_sas]
 sas_remove_host+0x20/0x38 [scsi_transport_sas]
 scsih_remove+0xd8/0x420 [mpt3sas]

Because transport_add_device() is not called when sas_rphy_add() fails, the
device is not added. When sas_rphy_remove() is subsequently called to
remove the device in the remove() path, a NULL pointer dereference happens.

Fixes: f92363d12359 ("[SCSI] mpt3sas: add new driver supporting 12GB SAS")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221109032403.1636422-1-yangyingliang@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/mpt3sas/mpt3sas_transport.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/scsi/mpt3sas/mpt3sas_transport.c b/drivers/scsi/mpt3sas/mpt3sas_transport.c
index 0681daee6c149..e5ecd6ada6cdd 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_transport.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_transport.c
@@ -829,6 +829,8 @@ mpt3sas_transport_port_add(struct MPT3SAS_ADAPTER *ioc, u16 handle,
 	if ((sas_rphy_add(rphy))) {
 		ioc_err(ioc, "failure at %s:%d/%s()!\n",
 			__FILE__, __LINE__, __func__);
+		sas_rphy_free(rphy);
+		rphy = NULL;
 	}
 
 	if (mpt3sas_port->remote_identify.device_type == SAS_END_DEVICE) {
-- 
GitLab


From 4ef174a3ad9b5d73c1b6573e244ebba2b0d86eac Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Thu, 10 Nov 2022 23:11:29 +0800
Subject: [PATCH 2779/4122] scsi: hpsa: Fix error handling in
 hpsa_add_sas_host()

hpsa_sas_port_add_phy() does:
  ...
  sas_phy_add()  -> may return error here
  sas_port_add_phy()
  ...

Whereas hpsa_free_sas_phy() does:
  ...
  sas_port_delete_phy()
  sas_phy_delete()
  ...

If hpsa_sas_port_add_phy() returns an error, hpsa_free_sas_phy() can not be
called to free the memory because the port and the phy have not been added
yet.

Replace hpsa_free_sas_phy() with sas_phy_free() and kfree() to avoid kernel
crash in this case.

Fixes: d04e62b9d63a ("hpsa: add in sas transport class")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221110151129.394389-1-yangyingliang@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index e5cbc97a5ea46..6696967c5192c 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -9786,7 +9786,8 @@ static int hpsa_add_sas_host(struct ctlr_info *h)
 	return 0;
 
 free_sas_phy:
-	hpsa_free_sas_phy(hpsa_sas_phy);
+	sas_phy_free(hpsa_sas_phy->phy);
+	kfree(hpsa_sas_phy);
 free_sas_port:
 	hpsa_free_sas_port(hpsa_sas_port);
 free_sas_node:
-- 
GitLab


From fda34a5d304d0b98cc967e8763b52221b66dc202 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 11 Nov 2022 12:30:12 +0800
Subject: [PATCH 2780/4122] scsi: hpsa: Fix possible memory leak in
 hpsa_add_sas_device()

If hpsa_sas_port_add_rphy() returns an error, the 'rphy' allocated in
sas_end_device_alloc() needs to be freed. Address this by calling
sas_rphy_free() in the error path.

Fixes: d04e62b9d63a ("hpsa: add in sas transport class")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221111043012.1074466-1-yangyingliang@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hpsa.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 6696967c5192c..4dbf51e2623ad 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -9823,10 +9823,12 @@ static int hpsa_add_sas_device(struct hpsa_sas_node *hpsa_sas_node,
 
 	rc = hpsa_sas_port_add_rphy(hpsa_sas_port, rphy);
 	if (rc)
-		goto free_sas_port;
+		goto free_sas_rphy;
 
 	return 0;
 
+free_sas_rphy:
+	sas_rphy_free(rphy);
 free_sas_port:
 	hpsa_free_sas_port(hpsa_sas_port);
 	device->sas_port = NULL;
-- 
GitLab


From 859ed37c9c3f456510b97ecb0bf155cee2b9d3fc Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Fri, 11 Nov 2022 15:21:26 +0900
Subject: [PATCH 2781/4122] scsi: ufs: core: Separate function name and message

Separate the function name and message to make it easier to check the log.
Modify messages to fit the format of others.

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221111062126.7307-1-cw9316.lee@samsung.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 6 +++---
 drivers/ufs/core/ufshpb.c | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 768cb49d269ca..7471830521145 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -4480,7 +4480,7 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba)
 		QUERY_FLAG_IDN_FDEVICEINIT, 0, NULL);
 	if (err) {
 		dev_err(hba->dev,
-			"%s setting fDeviceInit flag failed with error %d\n",
+			"%s: setting fDeviceInit flag failed with error %d\n",
 			__func__, err);
 		goto out;
 	}
@@ -4497,11 +4497,11 @@ static int ufshcd_complete_dev_init(struct ufs_hba *hba)
 
 	if (err) {
 		dev_err(hba->dev,
-				"%s reading fDeviceInit flag failed with error %d\n",
+				"%s: reading fDeviceInit flag failed with error %d\n",
 				__func__, err);
 	} else if (flag_res) {
 		dev_err(hba->dev,
-				"%s fDeviceInit was not cleared by the device\n",
+				"%s: fDeviceInit was not cleared by the device\n",
 				__func__);
 		err = -EBUSY;
 	}
diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c
index 3d69a81c5b178..fda3e7b494a61 100644
--- a/drivers/ufs/core/ufshpb.c
+++ b/drivers/ufs/core/ufshpb.c
@@ -2289,7 +2289,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba)
 	/* wait for the device to complete HPB reset query */
 	for (try = 0; try < HPB_RESET_REQ_RETRIES; try++) {
 		dev_dbg(hba->dev,
-			"%s start flag reset polling %d times\n",
+			"%s: start flag reset polling %d times\n",
 			__func__, try);
 
 		/* Poll fHpbReset flag to be cleared */
@@ -2298,7 +2298,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba)
 
 		if (err) {
 			dev_err(hba->dev,
-				"%s reading fHpbReset flag failed with error %d\n",
+				"%s: reading fHpbReset flag failed with error %d\n",
 				__func__, err);
 			return flag_res;
 		}
@@ -2310,7 +2310,7 @@ static bool ufshpb_check_hpb_reset_query(struct ufs_hba *hba)
 	}
 	if (flag_res) {
 		dev_err(hba->dev,
-			"%s fHpbReset was not cleared by the device\n",
+			"%s: fHpbReset was not cleared by the device\n",
 			__func__);
 	}
 out:
-- 
GitLab


From 5277326d07fbf68aa7fc9e7bce6c381002e00fca Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Fri, 11 Nov 2022 15:22:09 +0900
Subject: [PATCH 2782/4122] scsi: ufs: core: Switch 'check_for_bkops' to bool

Only checks true and false so it can be converted to bool.

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221111062209.7365-1-cw9316.lee@samsung.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 7471830521145..0e8850869b21b 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -8825,7 +8825,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 
 static int ufshcd_link_state_transition(struct ufs_hba *hba,
 					enum uic_link_state req_link_state,
-					int check_for_bkops)
+					bool check_for_bkops)
 {
 	int ret = 0;
 
@@ -8976,7 +8976,7 @@ static void ufshcd_hba_vreg_set_hpm(struct ufs_hba *hba)
 static int __ufshcd_wl_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op)
 {
 	int ret = 0;
-	int check_for_bkops;
+	bool check_for_bkops;
 	enum ufs_pm_level pm_lvl;
 	enum ufs_dev_pwr_mode req_dev_pwr_mode;
 	enum uic_link_state req_link_state;
-- 
GitLab


From 222d227f375b4cfa517a8f1f0f266ebe0263ad05 Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Fri, 11 Nov 2022 15:23:01 +0900
Subject: [PATCH 2783/4122] scsi: ufs: core: Fix unnecessary operation for
 early return

Setting bitmap_len is not required when returning early. Defer until it is
needed.

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221111062301.7423-1-cw9316.lee@samsung.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshpb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/ufs/core/ufshpb.c b/drivers/ufs/core/ufshpb.c
index fda3e7b494a61..be3fb24b93d8e 100644
--- a/drivers/ufs/core/ufshpb.c
+++ b/drivers/ufs/core/ufshpb.c
@@ -233,11 +233,6 @@ next_srgn:
 	rgn = hpb->rgn_tbl + rgn_idx;
 	srgn = rgn->srgn_tbl + srgn_idx;
 
-	if (likely(!srgn->is_last))
-		bitmap_len = hpb->entries_per_srgn;
-	else
-		bitmap_len = hpb->last_srgn_entries;
-
 	if (!ufshpb_is_valid_srgn(rgn, srgn))
 		return true;
 
@@ -253,6 +248,11 @@ next_srgn:
 		return true;
 	}
 
+	if (likely(!srgn->is_last))
+		bitmap_len = hpb->entries_per_srgn;
+	else
+		bitmap_len = hpb->last_srgn_entries;
+
 	if ((srgn_offset + cnt) > bitmap_len)
 		bit_len = bitmap_len - srgn_offset;
 	else
-- 
GitLab


From bb0cd225dd37df1f4a22e36dad59ff33178ecdfc Mon Sep 17 00:00:00 2001
From: Chen Zhongjin <chenzhongjin@huawei.com>
Date: Fri, 11 Nov 2022 15:40:46 +0800
Subject: [PATCH 2784/4122] scsi: efct: Fix possible memleak in
 efct_device_init()

In efct_device_init(), when efct_scsi_reg_fc_transport() fails,
efct_scsi_tgt_driver_exit() is not called to release memory for
efct_scsi_tgt_driver_init() and causes memleak:

unreferenced object 0xffff8881020ce000 (size 2048):
  comm "modprobe", pid 465, jiffies 4294928222 (age 55.872s)
  backtrace:
    [<0000000021a1ef1b>] kmalloc_trace+0x27/0x110
    [<000000004c3ed51c>] target_register_template+0x4fd/0x7b0 [target_core_mod]
    [<00000000f3393296>] efct_scsi_tgt_driver_init+0x18/0x50 [efct]
    [<00000000115de533>] 0xffffffffc0d90011
    [<00000000d608f646>] do_one_initcall+0xd0/0x4e0
    [<0000000067828cf1>] do_init_module+0x1cc/0x6a0
    ...

Fixes: 4df84e846624 ("scsi: elx: efct: Driver initialization routines")
Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
Link: https://lore.kernel.org/r/20221111074046.57061-1-chenzhongjin@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/elx/efct/efct_driver.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/elx/efct/efct_driver.c b/drivers/scsi/elx/efct/efct_driver.c
index b08fc8839808d..49fd2cfed70c7 100644
--- a/drivers/scsi/elx/efct/efct_driver.c
+++ b/drivers/scsi/elx/efct/efct_driver.c
@@ -42,6 +42,7 @@ efct_device_init(void)
 
 	rc = efct_scsi_reg_fc_transport();
 	if (rc) {
+		efct_scsi_tgt_driver_exit();
 		pr_err("failed to register to FC host\n");
 		return rc;
 	}
-- 
GitLab


From ed0f17b748b20271cb568c7ca0b23b120316a47d Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 11 Nov 2022 23:00:31 -0800
Subject: [PATCH 2785/4122] scsi: scsi_debug: Fix a warning in resp_verify()

As 'vnum' is controlled by user, so if user tries to allocate memory larger
than(>=) MAX_ORDER, then kcalloc() will fail, it creates a stack trace and
messes up dmesg with a warning.

Add __GFP_NOWARN in order to avoid too large allocation warning.  This is
detected by static analysis using smatch.

Fixes: c3e2fe9222d4 ("scsi: scsi_debug: Implement VERIFY(10), add VERIFY(16)")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Link: https://lore.kernel.org/r/20221112070031.2121068-1-harshit.m.mogalapalli@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 273224d29ce94..f556e36a2e0da 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -4429,7 +4429,7 @@ static int resp_verify(struct scsi_cmnd *scp, struct sdebug_dev_info *devip)
 	if (ret)
 		return ret;
 
-	arr = kcalloc(lb_size, vnum, GFP_ATOMIC);
+	arr = kcalloc(lb_size, vnum, GFP_ATOMIC | __GFP_NOWARN);
 	if (!arr) {
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
 				INSUFF_RES_ASCQ);
-- 
GitLab


From 07f2ca139d9a7a1ba71c4c03997c8de161db2346 Mon Sep 17 00:00:00 2001
From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Date: Fri, 11 Nov 2022 23:06:12 -0800
Subject: [PATCH 2786/4122] scsi: scsi_debug: Fix a warning in
 resp_report_zones()

As 'alloc_len' is user controlled data, if user tries to allocate memory
larger than(>=) MAX_ORDER, then kcalloc() will fail, it creates a stack
trace and messes up dmesg with a warning.

Add __GFP_NOWARN in order to avoid too large allocation warning.  This is
detected by static analysis using smatch.

Fixes: 7db0e0c8190a ("scsi: scsi_debug: Fix buffer size of REPORT ZONES command")
Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com>
Link: https://lore.kernel.org/r/20221112070612.2121535-1-harshit.m.mogalapalli@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index f556e36a2e0da..19ad8e2aa07a7 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -4497,7 +4497,7 @@ static int resp_report_zones(struct scsi_cmnd *scp,
 
 	rep_max_zones = (alloc_len - 64) >> ilog2(RZONES_DESC_HD);
 
-	arr = kzalloc(alloc_len, GFP_ATOMIC);
+	arr = kzalloc(alloc_len, GFP_ATOMIC | __GFP_NOWARN);
 	if (!arr) {
 		mk_sense_buffer(scp, ILLEGAL_REQUEST, INSUFF_RES_ASC,
 				INSUFF_RES_ASCQ);
-- 
GitLab


From 47b6a122c7b69a876c7ee2fc064a26b09627de9d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 12 Nov 2022 17:43:10 +0800
Subject: [PATCH 2787/4122] scsi: fcoe: Fix possible name leak when
 device_register() fails

If device_register() returns an error, the name allocated by dev_set_name()
needs to be freed. As the comment of device_register() says, one should use
put_device() to give up the reference in the error path. Fix this by
calling put_device(), then the name can be freed in kobject_cleanup().

The 'fcf' is freed in fcoe_fcf_device_release(), so the kfree() in the
error path can be removed.

The 'ctlr' is freed in fcoe_ctlr_device_release(), so don't use the error
label, just return NULL after calling put_device().

Fixes: 9a74e884ee71 ("[SCSI] libfcoe: Add fcoe_sysfs")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221112094310.3633291-1-yangyingliang@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fcoe/fcoe_sysfs.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/fcoe/fcoe_sysfs.c b/drivers/scsi/fcoe/fcoe_sysfs.c
index af658aa38fedf..6260aa5ea6af8 100644
--- a/drivers/scsi/fcoe/fcoe_sysfs.c
+++ b/drivers/scsi/fcoe/fcoe_sysfs.c
@@ -830,14 +830,15 @@ struct fcoe_ctlr_device *fcoe_ctlr_device_add(struct device *parent,
 
 	dev_set_name(&ctlr->dev, "ctlr_%d", ctlr->id);
 	error = device_register(&ctlr->dev);
-	if (error)
-		goto out_del_q2;
+	if (error) {
+		destroy_workqueue(ctlr->devloss_work_q);
+		destroy_workqueue(ctlr->work_q);
+		put_device(&ctlr->dev);
+		return NULL;
+	}
 
 	return ctlr;
 
-out_del_q2:
-	destroy_workqueue(ctlr->devloss_work_q);
-	ctlr->devloss_work_q = NULL;
 out_del_q:
 	destroy_workqueue(ctlr->work_q);
 	ctlr->work_q = NULL;
@@ -1036,16 +1037,16 @@ struct fcoe_fcf_device *fcoe_fcf_device_add(struct fcoe_ctlr_device *ctlr,
 	fcf->selected = new_fcf->selected;
 
 	error = device_register(&fcf->dev);
-	if (error)
-		goto out_del;
+	if (error) {
+		put_device(&fcf->dev);
+		goto out;
+	}
 
 	fcf->state = FCOE_FCF_STATE_CONNECTED;
 	list_add_tail(&fcf->peers, &ctlr->fcfs);
 
 	return fcf;
 
-out_del:
-	kfree(fcf);
 out:
 	return NULL;
 }
-- 
GitLab


From e6d773f93a49e0eda88a903a2a6542ca83380eb1 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 12 Nov 2022 21:10:10 +0800
Subject: [PATCH 2788/4122] scsi: scsi_debug: Fix possible name leak in
 sdebug_add_host_helper()

Afer commit 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id
string array"), the name of device is allocated dynamically, it needs be
freed when device_register() returns error.

As comment of device_register() says, one should use put_device() to give
up the reference in the error path. Fix this by calling put_device(), then
the name can be freed in kobject_cleanup(), and sdbg_host is freed in
sdebug_release_adapter().

When the device release is not set, it means the device is not initialized.
We can not call put_device() in this case. Use kfree() to free memory.

Fixes: 1fa5ae857bb1 ("driver core: get rid of struct device's bus_id string array")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221112131010.3757845-1-yangyingliang@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_debug.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 19ad8e2aa07a7..de3d3b1edaf56 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -7329,7 +7329,10 @@ clean:
 		kfree(sdbg_devinfo->zstate);
 		kfree(sdbg_devinfo);
 	}
-	kfree(sdbg_host);
+	if (sdbg_host->dev.release)
+		put_device(&sdbg_host->dev);
+	else
+		kfree(sdbg_host);
 	pr_warn("%s: failed, errno=%d\n", __func__, -error);
 	return error;
 }
-- 
GitLab


From e6f108bffc3708ddcff72324f7d40dfcd0204894 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Sun, 13 Nov 2022 14:45:13 +0800
Subject: [PATCH 2789/4122] scsi: ipr: Fix WARNING in ipr_init()

ipr_init() will not call unregister_reboot_notifier() when
pci_register_driver() fails, which causes a WARNING. Call
unregister_reboot_notifier() when pci_register_driver() fails.

notifier callback ipr_halt [ipr] already registered
WARNING: CPU: 3 PID: 299 at kernel/notifier.c:29
notifier_chain_register+0x16d/0x230
Modules linked in: ipr(+) xhci_pci_renesas xhci_hcd ehci_hcd usbcore
led_class gpu_sched drm_buddy video wmi drm_ttm_helper ttm
drm_display_helper drm_kms_helper drm drm_panel_orientation_quirks
agpgart cfbft
CPU: 3 PID: 299 Comm: modprobe Tainted: G        W
6.1.0-rc1-00190-g39508d23b672-dirty #332
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
RIP: 0010:notifier_chain_register+0x16d/0x230
Call Trace:
 <TASK>
 __blocking_notifier_chain_register+0x73/0xb0
 ipr_init+0x30/0x1000 [ipr]
 do_one_initcall+0xdb/0x480
 do_init_module+0x1cf/0x680
 load_module+0x6a50/0x70a0
 __do_sys_finit_module+0x12f/0x1c0
 do_syscall_64+0x3f/0x90
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: f72919ec2bbb ("[SCSI] ipr: implement shutdown changes and remove obsolete write cache parameter")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Link: https://lore.kernel.org/r/20221113064513.14028-1-shangxiaojing@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/ipr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 9d01a3e3c26aa..2022ffb450417 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -10872,11 +10872,19 @@ static struct notifier_block ipr_notifier = {
  **/
 static int __init ipr_init(void)
 {
+	int rc;
+
 	ipr_info("IBM Power RAID SCSI Device Driver version: %s %s\n",
 		 IPR_DRIVER_VERSION, IPR_DRIVER_DATE);
 
 	register_reboot_notifier(&ipr_notifier);
-	return pci_register_driver(&ipr_driver);
+	rc = pci_register_driver(&ipr_driver);
+	if (rc) {
+		unregister_reboot_notifier(&ipr_notifier);
+		return rc;
+	}
+
+	return 0;
 }
 
 /**
-- 
GitLab


From 42c5907728867df91045f532a38682e0ec7a955b Mon Sep 17 00:00:00 2001
From: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Date: Tue, 15 Nov 2022 09:29:05 +0900
Subject: [PATCH 2790/4122] scsi: sd: Use 16-byte SYNCHRONIZE CACHE on ZBC
 devices

ZBC Zoned Block Commands specification mandates SYNCHRONIZE CACHE(16) for
host-managed zoned block devices, but does not mandate SYNCHRONIZE
CACHE(10). Call SYNCHRONIZE CACHE(16) in place of SYNCHRONIZE CACHE(10) to
ensure that the command is always supported. For this purpose, add
use_16_for_sync flag to struct scsi_device in same manner as use_16_for_rw
flag.

To be precise, ZBC does not mandate SYNCHRONIZE CACHE(16) for host-aware
zoned block devices. However, modern devices should support 16-byte
commands. Hence, call SYNCHRONIZE CACHE (16) on both types of ZBC devices,
host-aware and host-managed. Of note is that READ(16) and WRITE(16) have
same story and they are already called for both types of ZBC devices.

Another note is that this patch depends on the fix commit ea045fd344cb
("ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure").

Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawasaki@wdc.com>
Link: https://lore.kernel.org/r/20221115002905.1709006-1-shinichiro.kawasaki@wdc.com
Reviewed-by: Damien Le Moal <damien.lemoal@opendource.wdc.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/sd.c          | 16 ++++++++++++----
 drivers/scsi/sd_zbc.c      |  3 ++-
 include/scsi/scsi_device.h |  1 +
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index eb76ba0550216..faa2b55d1a21a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1026,8 +1026,13 @@ static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
 	/* flush requests don't perform I/O, zero the S/G table */
 	memset(&cmd->sdb, 0, sizeof(cmd->sdb));
 
-	cmd->cmnd[0] = SYNCHRONIZE_CACHE;
-	cmd->cmd_len = 10;
+	if (cmd->device->use_16_for_sync) {
+		cmd->cmnd[0] = SYNCHRONIZE_CACHE_16;
+		cmd->cmd_len = 16;
+	} else {
+		cmd->cmnd[0] = SYNCHRONIZE_CACHE;
+		cmd->cmd_len = 10;
+	}
 	cmd->transfersize = 0;
 	cmd->allowed = sdkp->max_retries;
 
@@ -1587,9 +1592,12 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr)
 		sshdr = &my_sshdr;
 
 	for (retries = 3; retries > 0; --retries) {
-		unsigned char cmd[10] = { 0 };
+		unsigned char cmd[16] = { 0 };
 
-		cmd[0] = SYNCHRONIZE_CACHE;
+		if (sdp->use_16_for_sync)
+			cmd[0] = SYNCHRONIZE_CACHE_16;
+		else
+			cmd[0] = SYNCHRONIZE_CACHE;
 		/*
 		 * Leave the rest of the command zero to indicate
 		 * flush everything.
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index bd15624c63228..b163bf936acc7 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -921,9 +921,10 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
 		return 0;
 	}
 
-	/* READ16/WRITE16 is mandatory for ZBC disks */
+	/* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */
 	sdkp->device->use_16_for_rw = 1;
 	sdkp->device->use_10_for_rw = 0;
+	sdkp->device->use_16_for_sync = 1;
 
 	if (!blk_queue_is_zoned(q)) {
 		/*
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 24bdbf7999ab4..3642b8e3928b7 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -184,6 +184,7 @@ struct scsi_device {
 	unsigned no_report_opcodes:1;	/* no REPORT SUPPORTED OPERATION CODES */
 	unsigned no_write_same:1;	/* no WRITE SAME command */
 	unsigned use_16_for_rw:1; /* Use read/write(16) over read/write(10) */
+	unsigned use_16_for_sync:1;	/* Use sync (16) over sync (10) */
 	unsigned skip_ms_page_8:1;	/* do not use MODE SENSE page 0x08 */
 	unsigned skip_ms_page_3f:1;	/* do not use MODE SENSE page 0x3f */
 	unsigned skip_vpd_pages:1;	/* do not read VPD pages */
-- 
GitLab


From 4155658cee394b22b24c6d64e49247bf26d95b92 Mon Sep 17 00:00:00 2001
From: Chen Zhongjin <chenzhongjin@huawei.com>
Date: Tue, 15 Nov 2022 17:24:42 +0800
Subject: [PATCH 2791/4122] scsi: fcoe: Fix transport not deattached when
 fcoe_if_init() fails

fcoe_init() calls fcoe_transport_attach(&fcoe_sw_transport), but when
fcoe_if_init() fails, &fcoe_sw_transport is not detached and leaves freed
&fcoe_sw_transport on fcoe_transports list. This causes panic when
reinserting module.

 BUG: unable to handle page fault for address: fffffbfff82e2213
 RIP: 0010:fcoe_transport_attach+0xe1/0x230 [libfcoe]
 Call Trace:
  <TASK>
  do_one_initcall+0xd0/0x4e0
  load_module+0x5eee/0x7210
  ...

Fixes: 78a582463c1e ("[SCSI] fcoe: convert fcoe.ko to become an fcoe transport provider driver")
Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
Link: https://lore.kernel.org/r/20221115092442.133088-1-chenzhongjin@huawei.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/fcoe/fcoe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 6ec296321ffc1..38774a272e627 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -2491,6 +2491,7 @@ static int __init fcoe_init(void)
 
 out_free:
 	mutex_unlock(&fcoe_config_mutex);
+	fcoe_transport_detach(&fcoe_sw_transport);
 out_destroy:
 	destroy_workqueue(fcoe_wq);
 	return rc;
-- 
GitLab


From 3620e174d260adf88fa6511e8a20831cbddc4b66 Mon Sep 17 00:00:00 2001
From: Gleb Chesnokov <gleb.chesnokov@scst.dev>
Date: Tue, 15 Nov 2022 12:38:05 +0300
Subject: [PATCH 2792/4122] scsi: qla2xxx: Remove duplicate of vha->iocb_work
 initialization

Commit 9b3e0f4d4147 ("scsi: qla2xxx: Move work element processing out of
DPC thread") introduced the initialization of vha->iocb_work in
qla2x00_create_host() function.

This initialization is also called from qla2x00_probe_one() function, just
after qla2x00_create_host().

Hence remove this duplicate call since it has already been called before.

Signed-off-by: Gleb Chesnokov <gleb.chesnokov@scst.dev>
Link: https://lore.kernel.org/r/822b3823-f344-67d6-30f1-16e31cf68eed@scst.dev
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_os.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 96ba1398f20c1..7fb28c207ee50 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -3284,7 +3284,6 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	    host->max_cmd_len, host->max_channel, host->max_lun,
 	    host->transportt, sht->vendor_id);
 
-	INIT_WORK(&base_vha->iocb_work, qla2x00_iocb_work_fn);
 	INIT_WORK(&ha->heartbeat_work, qla_heartbeat_work_fn);
 
 	/* Set up the irqs */
-- 
GitLab


From 95da5e58172cd3c58b82cb01e6cd157b6c5eabe9 Mon Sep 17 00:00:00 2001
From: Gleb Chesnokov <gleb.chesnokov@scst.dev>
Date: Tue, 15 Nov 2022 12:38:08 +0300
Subject: [PATCH 2793/4122] scsi: qla2xxx: Initialize vha->unknown_atio_[list,
 work] for NPIV hosts

Initialization of vha->unknown_atio_list and vha->unknown_atio_work only
happens for base_vha in qlt_probe_one_stage1(). But there is no
initialization for NPIV hosts that are created in qla24xx_vport_create().

This causes a crash when trying to access these NPIV host fields.

Fix this by adding initialization to qla_vport_create().

Signed-off-by: Gleb Chesnokov <gleb.chesnokov@scst.dev>
Link: https://lore.kernel.org/r/376c89a2-a9ac-bcf9-bf0f-dfe89a02fd4b@scst.dev
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/qla2xxx/qla_target.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/qla2xxx/qla_target.c b/drivers/scsi/qla2xxx/qla_target.c
index bb754a9508023..548f22705ddc8 100644
--- a/drivers/scsi/qla2xxx/qla_target.c
+++ b/drivers/scsi/qla2xxx/qla_target.c
@@ -6741,6 +6741,9 @@ qlt_vport_create(struct scsi_qla_host *vha, struct qla_hw_data *ha)
 	mutex_init(&vha->vha_tgt.tgt_mutex);
 	mutex_init(&vha->vha_tgt.tgt_host_action_mutex);
 
+	INIT_LIST_HEAD(&vha->unknown_atio_list);
+	INIT_DELAYED_WORK(&vha->unknown_atio_work, qlt_unknown_atio_work_fn);
+
 	qlt_clear_mode(vha);
 
 	/*
-- 
GitLab


From e118df492320176af94deec000ae034cc92be754 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Thu, 17 Nov 2022 11:51:00 +0800
Subject: [PATCH 2794/4122] scsi: snic: Fix possible UAF in snic_tgt_create()

Smatch reports a warning as follows:

drivers/scsi/snic/snic_disc.c:307 snic_tgt_create() warn:
  '&tgt->list' not removed from list

If device_add() fails in snic_tgt_create(), tgt will be freed, but
tgt->list will not be removed from snic->disc.tgt_list, then list traversal
may cause UAF.

Remove from snic->disc.tgt_list before free().

Fixes: c8806b6c9e82 ("snic: driver for Cisco SCSI HBA")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20221117035100.2944812-1-cuigaosheng1@huawei.com
Acked-by: Narsimhulu Musini <nmusini@cisco.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/snic/snic_disc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/snic/snic_disc.c b/drivers/scsi/snic/snic_disc.c
index 9b2b5f8c23b9a..8fbf3c1b1311d 100644
--- a/drivers/scsi/snic/snic_disc.c
+++ b/drivers/scsi/snic/snic_disc.c
@@ -304,6 +304,9 @@ snic_tgt_create(struct snic *snic, struct snic_tgt_id *tgtid)
 			      ret);
 
 		put_device(&snic->shost->shost_gendev);
+		spin_lock_irqsave(snic->shost->host_lock, flags);
+		list_del(&tgt->list);
+		spin_unlock_irqrestore(snic->shost->host_lock, flags);
 		kfree(tgt);
 		tgt = NULL;
 
-- 
GitLab


From a500c4cc06cd2830c692b571dd0a1c3585f23150 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Nov 2022 10:36:25 -0800
Subject: [PATCH 2795/4122] scsi: device_handler: alua: Revert "Move a
 scsi_device_put() call out of alua_check_vpd()"

There is a bug in commit 0b25e17e9018 ("scsi: alua: Move a
scsi_device_put() call out of alua_check_vpd()"): that patch may cause
alua_rtpg_queue() callers to call scsi_device_put() even if that function
should not be called. Revert that commit to prepare for a different
solution.

Cc: Hannes Reinecke <hare@suse.de>
Cc: Martin Wilck <mwilck@suse.com>
Cc: Sachin Sant <sachinp@linux.ibm.com>
Cc: Benjamin Block <bblock@linux.ibm.com>
Reported-by: Sachin Sant <sachinp@linux.ibm.com>
Reported-by: Benjamin Block <bblock@linux.ibm.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221117183626.2656196-2-bvanassche@acm.org
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 23 ++++++++--------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index 693cd827e1384..bd4ee294f5c78 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -324,7 +324,6 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 	struct alua_port_group *pg, *old_pg = NULL;
 	bool pg_updated = false;
 	unsigned long flags;
-	bool put_sdev;
 
 	group_id = scsi_vpd_tpg_id(sdev, &rel_port);
 	if (group_id < 0) {
@@ -374,14 +373,11 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 		list_add_rcu(&h->node, &pg->dh_list);
 	spin_unlock_irqrestore(&pg->lock, flags);
 
-	put_sdev = alua_rtpg_queue(rcu_dereference_protected(h->pg,
+	alua_rtpg_queue(rcu_dereference_protected(h->pg,
 						  lockdep_is_held(&h->pg_lock)),
 			sdev, NULL, true);
 	spin_unlock(&h->pg_lock);
 
-	if (put_sdev)
-		scsi_device_put(sdev);
-
 	if (old_pg)
 		kref_put(&old_pg->kref, release_port_group);
 
@@ -982,10 +978,9 @@ queue_rtpg:
  *         RTPG already has been scheduled.
  *
  * Returns true if and only if alua_rtpg_work() will be called asynchronously.
- * That function is responsible for calling @qdata->fn(). If this function
- * returns true, the caller is responsible for invoking scsi_device_put(@sdev).
+ * That function is responsible for calling @qdata->fn().
  */
-static bool __must_check alua_rtpg_queue(struct alua_port_group *pg,
+static bool alua_rtpg_queue(struct alua_port_group *pg,
 			    struct scsi_device *sdev,
 			    struct alua_queue_data *qdata, bool force)
 {
@@ -1024,6 +1019,8 @@ static bool __must_check alua_rtpg_queue(struct alua_port_group *pg,
 		else
 			kref_put(&pg->kref, release_port_group);
 	}
+	if (sdev)
+		scsi_device_put(sdev);
 
 	return true;
 }
@@ -1130,12 +1127,10 @@ static int alua_activate(struct scsi_device *sdev,
 	rcu_read_unlock();
 	mutex_unlock(&h->init_mutex);
 
-	if (alua_rtpg_queue(pg, sdev, qdata, true)) {
-		scsi_device_put(sdev);
+	if (alua_rtpg_queue(pg, sdev, qdata, true))
 		fn = NULL;
-	} else {
+	else
 		err = SCSI_DH_DEV_OFFLINED;
-	}
 	kref_put(&pg->kref, release_port_group);
 out:
 	if (fn)
@@ -1161,9 +1156,7 @@ static void alua_check(struct scsi_device *sdev, bool force)
 		return;
 	}
 	rcu_read_unlock();
-
-	if (alua_rtpg_queue(pg, sdev, NULL, force))
-		scsi_device_put(sdev);
+	alua_rtpg_queue(pg, sdev, NULL, force);
 	kref_put(&pg->kref, release_port_group);
 }
 
-- 
GitLab


From 50759b881e1d6879e7cef15c74bdea2e937338c9 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 17 Nov 2022 10:36:26 -0800
Subject: [PATCH 2796/4122] scsi: device_handler: alua: Call scsi_device_put()
 from non-atomic context

Since commit f93ed747e2c7 ("scsi: core: Release SCSI devices
synchronously"), scsi_device_put() might sleep. Avoid calling it from
alua_rtpg_queue() with the pg_lock held. The lock only pretects h->pg,
anyway. To avoid the pg being freed under us, because of a race with
another thread, take a temporary reference. In alua_rtpg_queue(), verify
that the pg still belongs to the sdev being passed before actually queueing
the RTPG.

This patch fixes the following smatch warning:

drivers/scsi/device_handler/scsi_dh_alua.c:1013 alua_rtpg_queue() warn: sleeping in atomic context

alua_check_vpd() <- disables preempt
-> alua_rtpg_queue()
   -> scsi_device_put()

Cc: Martin Wilck <mwilck@suse.com>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Sachin Sant <sachinp@linux.ibm.com>
Cc: Benjamin Block <bblock@linux.ibm.com>
Suggested-by: Martin Wilck <mwilck@suse.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221117183626.2656196-3-bvanassche@acm.org
Tested-by: Sachin Sant <sachinp@linux.ibm.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/device_handler/scsi_dh_alua.c | 27 +++++++++++++++-------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c
index bd4ee294f5c78..49cc18a874731 100644
--- a/drivers/scsi/device_handler/scsi_dh_alua.c
+++ b/drivers/scsi/device_handler/scsi_dh_alua.c
@@ -354,6 +354,8 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 			    "%s: port group %x rel port %x\n",
 			    ALUA_DH_NAME, group_id, rel_port);
 
+	kref_get(&pg->kref);
+
 	/* Check for existing port group references */
 	spin_lock(&h->pg_lock);
 	old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock));
@@ -373,11 +375,11 @@ static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h,
 		list_add_rcu(&h->node, &pg->dh_list);
 	spin_unlock_irqrestore(&pg->lock, flags);
 
-	alua_rtpg_queue(rcu_dereference_protected(h->pg,
-						  lockdep_is_held(&h->pg_lock)),
-			sdev, NULL, true);
 	spin_unlock(&h->pg_lock);
 
+	alua_rtpg_queue(pg, sdev, NULL, true);
+	kref_put(&pg->kref, release_port_group);
+
 	if (old_pg)
 		kref_put(&old_pg->kref, release_port_group);
 
@@ -986,6 +988,9 @@ static bool alua_rtpg_queue(struct alua_port_group *pg,
 {
 	int start_queue = 0;
 	unsigned long flags;
+
+	might_sleep();
+
 	if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev))
 		return false;
 
@@ -996,11 +1001,17 @@ static bool alua_rtpg_queue(struct alua_port_group *pg,
 		force = true;
 	}
 	if (pg->rtpg_sdev == NULL) {
-		pg->interval = 0;
-		pg->flags |= ALUA_PG_RUN_RTPG;
-		kref_get(&pg->kref);
-		pg->rtpg_sdev = sdev;
-		start_queue = 1;
+		struct alua_dh_data *h = sdev->handler_data;
+
+		rcu_read_lock();
+		if (h && rcu_dereference(h->pg) == pg) {
+			pg->interval = 0;
+			pg->flags |= ALUA_PG_RUN_RTPG;
+			kref_get(&pg->kref);
+			pg->rtpg_sdev = sdev;
+			start_queue = 1;
+		}
+		rcu_read_unlock();
 	} else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) {
 		pg->flags |= ALUA_PG_RUN_RTPG;
 		/* Do not queue if the worker is already running */
-- 
GitLab


From 541555285339313e831f8e446c03a7994c604d65 Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Fri, 18 Nov 2022 13:41:36 +0900
Subject: [PATCH 2797/4122] scsi: ufs: ufs-mediatek: Remove unneeded code

Remove unnecessary if/goto code.

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221118044136.921-1-cw9316.lee@samsung.com
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/host/ufs-mediatek.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
index 7309f3f87eacf..7d13878dff479 100644
--- a/drivers/ufs/host/ufs-mediatek.c
+++ b/drivers/ufs/host/ufs-mediatek.c
@@ -441,8 +441,6 @@ static int ufs_mtk_mphy_power_on(struct ufs_hba *hba, bool on)
 		if (ufs_mtk_is_va09_supported(hba)) {
 			ufs_mtk_va09_pwr_ctrl(res, 0);
 			ret = regulator_disable(host->reg_va09);
-			if (ret < 0)
-				goto out;
 		}
 	}
 out:
-- 
GitLab


From 96a2dfa1df4b9df0cfa2e807153b4d254db2fa82 Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Fri, 18 Nov 2022 13:52:42 +0900
Subject: [PATCH 2798/4122] scsi: ufs: ufs-mediatek: Modify the return value

Be consistent with the rest of driver wrt. functions returning bool.

  91: 	return !!(host->caps & UFS_MTK_CAP_BOOST_CRYPT_ENGINE);
  98: 	return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL);
  105:	return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC);

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221118045242.2770-1-cw9316.lee@samsung.com
Reviewed-by: Stanley Chu <stanley.chu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/host/ufs-mediatek.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
index 7d13878dff479..ef5816d82326a 100644
--- a/drivers/ufs/host/ufs-mediatek.c
+++ b/drivers/ufs/host/ufs-mediatek.c
@@ -109,7 +109,7 @@ static bool ufs_mtk_is_pmc_via_fastauto(struct ufs_hba *hba)
 {
 	struct ufs_mtk_host *host = ufshcd_get_variant(hba);
 
-	return (host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO);
+	return !!(host->caps & UFS_MTK_CAP_PMC_VIA_FASTAUTO);
 }
 
 static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable)
-- 
GitLab


From 7e613be7c63d2b9041b38d51fc324b8ad67d31e3 Mon Sep 17 00:00:00 2001
From: Jie Zhan <zhanjie9@hisilicon.com>
Date: Fri, 18 Nov 2022 16:37:10 +0800
Subject: [PATCH 2799/4122] scsi: Revert "scsi: hisi_sas: Drain bcast events in
 hisi_sas_rescan_topology()"

This reverts commit 11ff0c98fca35df16c84d4eee52008faecaf10a6.

Draining or flushing events in hisi_sas_rescan_topology() can hang the
driver, typically with phy up or phy down events being processed,
i.e. sas_porte_bytes_dmaed() or sas_phye_loss_of_signal().

Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Link: https://lore.kernel.org/r/20221118083714.4034612-2-zhanjie9@hisilicon.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 54860d2524667..4527ac266bb61 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1323,7 +1323,6 @@ static void hisi_sas_refresh_port_id(struct hisi_hba *hisi_hba)
 
 static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 {
-	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	struct asd_sas_port *_sas_port = NULL;
 	int phy_no;
 
@@ -1352,12 +1351,6 @@ static void hisi_sas_rescan_topology(struct hisi_hba *hisi_hba, u32 state)
 			hisi_sas_phy_down(hisi_hba, phy_no, 0, GFP_KERNEL);
 		}
 	}
-	/*
-	 * Ensure any bcast events are processed prior to calling async nexus
-	 * reset calls from hisi_sas_clear_nexus_ha() ->
-	 * hisi_sas_async_I_T_nexus_reset()
-	 */
-	sas_drain_work(sas_ha);
 }
 
 static void hisi_sas_reset_init_all_devices(struct hisi_hba *hisi_hba)
-- 
GitLab


From 94a3555d1f0f51cf029a8668624e1cd40628880f Mon Sep 17 00:00:00 2001
From: Jie Zhan <zhanjie9@hisilicon.com>
Date: Fri, 18 Nov 2022 16:37:11 +0800
Subject: [PATCH 2800/4122] scsi: Revert "scsi: hisi_sas: Don't send bcast
 events from HW during nexus HA reset"

This reverts commit f5f2a2716055ad8c0c4ff83e51d667646c6c5d8a.

This is now unnecessary to solve the SATA devices missing issue in
hisi_sas_clear_nexus_ha(). Hence, we should not ignore bcast events during
sas_eh_handle_sas_errors() in case of missing bcast events, unless a
justified need is found and a mechanism to defer (but not ignore) bcast
events in sas_eh_handle_sas_errors() is provided.

Also, in hisi_sas_clear_nexus_ha(), there is nothing further to handle in
"out: " other than return, so that part can be reverted.

Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Link: https://lore.kernel.org/r/20221118083714.4034612-3-zhanjie9@hisilicon.com
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 16 ++++------------
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 4527ac266bb61..62080d0fad6ff 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1812,14 +1812,12 @@ static int hisi_sas_clear_nexus_ha(struct sas_ha_struct *sas_ha)
 	struct hisi_hba *hisi_hba = sas_ha->lldd_ha;
 	HISI_SAS_DECLARE_RST_WORK_ON_STACK(r);
 	ASYNC_DOMAIN_EXCLUSIVE(async);
-	int i, ret;
+	int i;
 
 	queue_work(hisi_hba->wq, &r.work);
 	wait_for_completion(r.completion);
-	if (!r.done) {
-		ret = TMF_RESP_FUNC_FAILED;
-		goto out;
-	}
+	if (!r.done)
+		return TMF_RESP_FUNC_FAILED;
 
 	for (i = 0; i < HISI_SAS_MAX_DEVICES; i++) {
 		struct hisi_sas_device *sas_dev = &hisi_hba->devices[i];
@@ -1836,9 +1834,7 @@ static int hisi_sas_clear_nexus_ha(struct sas_ha_struct *sas_ha)
 	async_synchronize_full_domain(&async);
 	hisi_sas_release_tasks(hisi_hba);
 
-	ret = TMF_RESP_FUNC_COMPLETE;
-out:
-	return ret;
+	return TMF_RESP_FUNC_COMPLETE;
 }
 
 static int hisi_sas_query_task(struct sas_task *task)
@@ -1986,14 +1982,10 @@ void hisi_sas_phy_bcast(struct hisi_sas_phy *phy)
 {
 	struct asd_sas_phy *sas_phy = &phy->sas_phy;
 	struct hisi_hba	*hisi_hba = phy->hisi_hba;
-	struct sas_ha_struct *sha = &hisi_hba->sha;
 
 	if (test_bit(HISI_SAS_RESETTING_BIT, &hisi_hba->flags))
 		return;
 
-	if (test_bit(SAS_HA_FROZEN, &sha->state))
-		return;
-
 	sas_notify_port_event(sas_phy, PORTE_BROADCAST_RCVD, GFP_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(hisi_sas_phy_bcast);
-- 
GitLab


From 9181ce3cb5d96f0ee28246a857ca651830fa3746 Mon Sep 17 00:00:00 2001
From: Jie Zhan <zhanjie9@hisilicon.com>
Date: Fri, 18 Nov 2022 16:37:12 +0800
Subject: [PATCH 2801/4122] scsi: libsas: Add smp_ata_check_ready_type()

Create function smp_ata_check_ready_type() for LLDDs to wait for SATA
devices to come up after a link reset.

Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Link: https://lore.kernel.org/r/20221118083714.4034612-4-zhanjie9@hisilicon.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_ata.c      | 25 +++++++++++++++++++++++++
 drivers/scsi/libsas/sas_expander.c |  4 ++--
 drivers/scsi/libsas/sas_internal.h |  2 ++
 include/scsi/sas_ata.h             |  6 ++++++
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 78e6046fb55ad..4b65cd79150f5 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -287,6 +287,31 @@ static int sas_ata_clear_pending(struct domain_device *dev, struct ex_phy *phy)
 		return 1;
 }
 
+int smp_ata_check_ready_type(struct ata_link *link)
+{
+	struct domain_device *dev = link->ap->private_data;
+	struct sas_phy *phy = sas_get_local_phy(dev);
+	struct domain_device *ex_dev = dev->parent;
+	enum sas_device_type type = SAS_PHY_UNUSED;
+	u8 sas_addr[SAS_ADDR_SIZE];
+	int res;
+
+	res = sas_get_phy_attached_dev(ex_dev, phy->number, sas_addr, &type);
+	sas_put_local_phy(phy);
+	if (res)
+		return res;
+
+	switch (type) {
+	case SAS_SATA_PENDING:
+		return 0;
+	case SAS_END_DEVICE:
+		return 1;
+	default:
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL_GPL(smp_ata_check_ready_type);
+
 static int smp_ata_check_ready(struct ata_link *link)
 {
 	int res;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 2907ca5d0ed43..a04cad620e939 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1688,8 +1688,8 @@ static int sas_get_phy_change_count(struct domain_device *dev,
 	return res;
 }
 
-static int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
-				    u8 *sas_addr, enum sas_device_type *type)
+int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, enum sas_device_type *type)
 {
 	int res;
 	struct smp_disc_resp *disc_resp;
diff --git a/drivers/scsi/libsas/sas_internal.h b/drivers/scsi/libsas/sas_internal.h
index 6cf190ade35e2..6f593fa69b586 100644
--- a/drivers/scsi/libsas/sas_internal.h
+++ b/drivers/scsi/libsas/sas_internal.h
@@ -88,6 +88,8 @@ struct domain_device *sas_ex_to_ata(struct domain_device *ex_dev, int phy_id);
 int sas_ex_phy_discover(struct domain_device *dev, int single);
 int sas_get_report_phy_sata(struct domain_device *dev, int phy_id,
 			    struct smp_rps_resp *rps_resp);
+int sas_get_phy_attached_dev(struct domain_device *dev, int phy_id,
+			     u8 *sas_addr, enum sas_device_type *type);
 int sas_try_ata_reset(struct asd_sas_phy *phy);
 void sas_hae_reset(struct work_struct *work);
 
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index e47f0aec0722f..e7d466df81576 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -36,6 +36,7 @@ void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset);
 int sas_execute_ata_cmd(struct domain_device *device, u8 *fis,
 			int force_phy_id);
 int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline);
+int smp_ata_check_ready_type(struct ata_link *link);
 #else
 
 
@@ -104,6 +105,11 @@ static inline int sas_ata_wait_after_reset(struct domain_device *dev,
 {
 	return -ETIMEDOUT;
 }
+
+static inline int smp_ata_check_ready_type(struct ata_link *link)
+{
+	return 0;
+}
 #endif
 
 #endif /* _SAS_ATA_H_ */
-- 
GitLab


From 3c2673a09cf1181318c07b7dbc1bc532ba3d33e3 Mon Sep 17 00:00:00 2001
From: Jie Zhan <zhanjie9@hisilicon.com>
Date: Fri, 18 Nov 2022 16:37:13 +0800
Subject: [PATCH 2802/4122] scsi: hisi_sas: Fix SATA devices missing issue
 during I_T nexus reset

SATA devices on an expander may be removed and not be found again when I_T
nexus reset and revalidation are processed simultaneously.

The issue comes from:

 - Revalidation can remove SATA devices in link reset, e.g. in
   hisi_sas_clear_nexus_ha().

 - However, hisi_sas_debug_I_T_nexus_reset() polls the state of a SATA
   device on an expander after sending link_reset, where it calls:
    hisi_sas_debug_I_T_nexus_reset
     sas_ata_wait_after_reset
      ata_wait_after_reset
       ata_wait_ready
        smp_ata_check_ready
         sas_ex_phy_discover
          sas_ex_phy_discover_helper
           sas_set_ex_phy

   The ex_phy's change count is updated in sas_set_ex_phy(), so SATA
   devices after a link reset may not be found later through revalidation.

A similar issue was reported in:
commit 0f3fce5cc77e ("[SCSI] libsas: fix ata_eh clobbering ex_phys via
smp_ata_check_ready")
commit 87c8331fcf72 ("[SCSI] libsas: prevent domain rediscovery competing
with ata error handling").

To address this issue, in hisi_sas_debug_I_T_nexus_reset(), we now call
smp_ata_check_ready_type() that only polls the device type while not
updating the ex_phy's data of libsas.

Fixes: 71453bd9d1bf ("scsi: hisi_sas: Use sas_ata_wait_after_reset() in IT nexus reset")
Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Link: https://lore.kernel.org/r/20221118083714.4034612-5-zhanjie9@hisilicon.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 62080d0fad6ff..41ba22f6c7f05 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1694,13 +1694,15 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 		return rc;
 	}
 
+	/* Remote phy */
 	if (rc)
 		return rc;
 
-	/* Remote phy */
 	if (dev_is_sata(device)) {
-		rc = sas_ata_wait_after_reset(device,
-					HISI_SAS_WAIT_PHYUP_TIMEOUT);
+		struct ata_link *link = &device->sata_dev.ap->link;
+
+		rc = ata_wait_after_reset(link, HISI_SAS_WAIT_PHYUP_TIMEOUT,
+					  smp_ata_check_ready_type);
 	} else {
 		msleep(2000);
 	}
-- 
GitLab


From 4d450cf2b00d34b53f52d93216dd23af57bdca73 Mon Sep 17 00:00:00 2001
From: Jie Zhan <zhanjie9@hisilicon.com>
Date: Fri, 18 Nov 2022 16:37:14 +0800
Subject: [PATCH 2803/4122] scsi: libsas: Do not export
 sas_ata_wait_after_reset()

sas_ata_wait_after_reset() does not need to be exported since it is no
longer referenced outside libsas.

Signed-off-by: Jie Zhan <zhanjie9@hisilicon.com>
Link: https://lore.kernel.org/r/20221118083714.4034612-6-zhanjie9@hisilicon.com
Reviewed-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/libsas/sas_ata.c | 3 +--
 include/scsi/sas_ata.h        | 7 -------
 2 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/scsi/libsas/sas_ata.c b/drivers/scsi/libsas/sas_ata.c
index 4b65cd79150f5..f7439bf9cdc67 100644
--- a/drivers/scsi/libsas/sas_ata.c
+++ b/drivers/scsi/libsas/sas_ata.c
@@ -383,7 +383,7 @@ static int sas_ata_printk(const char *level, const struct domain_device *ddev,
 	return r;
 }
 
-int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline)
+static int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline)
 {
 	struct sata_device *sata_dev = &dev->sata_dev;
 	int (*check_ready)(struct ata_link *link);
@@ -405,7 +405,6 @@ int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline)
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(sas_ata_wait_after_reset);
 
 static int sas_ata_hard_reset(struct ata_link *link, unsigned int *class,
 			      unsigned long deadline)
diff --git a/include/scsi/sas_ata.h b/include/scsi/sas_ata.h
index e7d466df81576..9c927d46f1362 100644
--- a/include/scsi/sas_ata.h
+++ b/include/scsi/sas_ata.h
@@ -35,7 +35,6 @@ void sas_ata_end_eh(struct ata_port *ap);
 void sas_ata_device_link_abort(struct domain_device *dev, bool force_reset);
 int sas_execute_ata_cmd(struct domain_device *device, u8 *fis,
 			int force_phy_id);
-int sas_ata_wait_after_reset(struct domain_device *dev, unsigned long deadline);
 int smp_ata_check_ready_type(struct ata_link *link);
 #else
 
@@ -100,12 +99,6 @@ static inline int sas_execute_ata_cmd(struct domain_device *device, u8 *fis,
 	return 0;
 }
 
-static inline int sas_ata_wait_after_reset(struct domain_device *dev,
-					   unsigned long deadline)
-{
-	return -ETIMEDOUT;
-}
-
 static inline int smp_ata_check_ready_type(struct ata_link *link)
 {
 	return 0;
-- 
GitLab


From ee8c88cab4afbd5ee10a127d6cbecd6b200185a5 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 18 Nov 2022 15:37:03 -0800
Subject: [PATCH 2804/4122] scsi: ufs: core: Fix the polling implementation

Fix the following issues in ufshcd_poll():

 - If polling succeeds, return a positive value.

 - Do not complete polling requests from interrupt context because the
   block layer expects these requests to be completed from thread
   context. From block/bio.c:

     If REQ_ALLOC_CACHE is set, the final put of the bio MUST be done from
     process context, not hard/soft IRQ.

Fixes: eaab9b573054 ("scsi: ufs: Implement polling support")
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221118233717.441298-1-bvanassche@acm.org
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/core/ufshcd.c | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 0e8850869b21b..2dbe24977822f 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5344,6 +5344,26 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba *hba,
 	}
 }
 
+/* Any value that is not an existing queue number is fine for this constant. */
+enum {
+	UFSHCD_POLL_FROM_INTERRUPT_CONTEXT = -1
+};
+
+static void ufshcd_clear_polled(struct ufs_hba *hba,
+				unsigned long *completed_reqs)
+{
+	int tag;
+
+	for_each_set_bit(tag, completed_reqs, hba->nutrs) {
+		struct scsi_cmnd *cmd = hba->lrb[tag].cmd;
+
+		if (!cmd)
+			continue;
+		if (scsi_cmd_to_rq(cmd)->cmd_flags & REQ_POLLED)
+			__clear_bit(tag, completed_reqs);
+	}
+}
+
 /*
  * Returns > 0 if one or more commands have been completed or 0 if no
  * requests have been completed.
@@ -5360,13 +5380,17 @@ static int ufshcd_poll(struct Scsi_Host *shost, unsigned int queue_num)
 	WARN_ONCE(completed_reqs & ~hba->outstanding_reqs,
 		  "completed: %#lx; outstanding: %#lx\n", completed_reqs,
 		  hba->outstanding_reqs);
+	if (queue_num == UFSHCD_POLL_FROM_INTERRUPT_CONTEXT) {
+		/* Do not complete polled requests from interrupt context. */
+		ufshcd_clear_polled(hba, &completed_reqs);
+	}
 	hba->outstanding_reqs &= ~completed_reqs;
 	spin_unlock_irqrestore(&hba->outstanding_lock, flags);
 
 	if (completed_reqs)
 		__ufshcd_transfer_req_compl(hba, completed_reqs);
 
-	return completed_reqs;
+	return completed_reqs != 0;
 }
 
 /**
@@ -5397,7 +5421,7 @@ static irqreturn_t ufshcd_transfer_req_compl(struct ufs_hba *hba)
 	 * Ignore the ufshcd_poll() return value and return IRQ_HANDLED since we
 	 * do not want polling to trigger spurious interrupt complaints.
 	 */
-	ufshcd_poll(hba->host, 0);
+	ufshcd_poll(hba->host, UFSHCD_POLL_FROM_INTERRUPT_CONTEXT);
 
 	return IRQ_HANDLED;
 }
-- 
GitLab


From d29c32efebf3f10b25e9f88ac75c962e7259412d Mon Sep 17 00:00:00 2001
From: ChanWoo Lee <cw9316.lee@samsung.com>
Date: Mon, 21 Nov 2022 09:33:38 +0900
Subject: [PATCH 2805/4122] scsi: ufs: ufs-mediatek: Remove unnecessary return
 code

Modify to remove unnecessary 'return 0' code.

Signed-off-by: ChanWoo Lee <cw9316.lee@samsung.com>
Link: https://lore.kernel.org/r/20221121003338.11034-1-cw9316.lee@samsung.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/ufs/host/ufs-mediatek.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/ufs/host/ufs-mediatek.c b/drivers/ufs/host/ufs-mediatek.c
index ef5816d82326a..21d9b047539fb 100644
--- a/drivers/ufs/host/ufs-mediatek.c
+++ b/drivers/ufs/host/ufs-mediatek.c
@@ -1095,7 +1095,7 @@ static void ufs_mtk_setup_clk_gating(struct ufs_hba *hba)
 	}
 }
 
-static int ufs_mtk_post_link(struct ufs_hba *hba)
+static void ufs_mtk_post_link(struct ufs_hba *hba)
 {
 	/* enable unipro clock gating feature */
 	ufs_mtk_cfg_unipro_cg(hba, true);
@@ -1106,8 +1106,6 @@ static int ufs_mtk_post_link(struct ufs_hba *hba)
 			FIELD_PREP(UFSHCI_AHIBERN8_SCALE_MASK, 3);
 
 	ufs_mtk_setup_clk_gating(hba);
-
-	return 0;
 }
 
 static int ufs_mtk_link_startup_notify(struct ufs_hba *hba,
@@ -1120,7 +1118,7 @@ static int ufs_mtk_link_startup_notify(struct ufs_hba *hba,
 		ret = ufs_mtk_pre_link(hba);
 		break;
 	case POST_CHANGE:
-		ret = ufs_mtk_post_link(hba);
+		ufs_mtk_post_link(hba);
 		break;
 	default:
 		ret = -EINVAL;
@@ -1272,9 +1270,8 @@ static int ufs_mtk_suspend(struct ufs_hba *hba, enum ufs_pm_op pm_op,
 	struct arm_smccc_res res;
 
 	if (status == PRE_CHANGE) {
-		if (!ufshcd_is_auto_hibern8_supported(hba))
-			return 0;
-		ufs_mtk_auto_hibern8_disable(hba);
+		if (ufshcd_is_auto_hibern8_supported(hba))
+			ufs_mtk_auto_hibern8_disable(hba);
 		return 0;
 	}
 
-- 
GitLab


From 35bd6f9fd33b8beb043aea0db51b726ca6edfd87 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 21 Nov 2022 12:17:24 +0000
Subject: [PATCH 2806/4122] scsi: core: Use SCSI_SCAN_RESCAN in
 __scsi_add_device()

Instead of using hardcoded '1' as the __scsi_add_device() ->
scsi_probe_and_add_lun() rescan arg, use proper macro SCSI_SCAN_RESCAN.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20221121121725.1910795-2-john.g.garry@oracle.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_scan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 5d27f5196de6f..6cc974b382c1b 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1580,7 +1580,8 @@ struct scsi_device *__scsi_add_device(struct Scsi_Host *shost, uint channel,
 		scsi_complete_async_scans();
 
 	if (scsi_host_scan_allowed(shost) && scsi_autopm_get_host(shost) == 0) {
-		scsi_probe_and_add_lun(starget, lun, NULL, &sdev, 1, hostdata);
+		scsi_probe_and_add_lun(starget, lun, NULL, &sdev,
+				       SCSI_SCAN_RESCAN, hostdata);
 		scsi_autopm_put_host(shost);
 	}
 	mutex_unlock(&shost->scan_mutex);
-- 
GitLab


From 425b27a04dd8b2f5abaf8023166071b8342dc079 Mon Sep 17 00:00:00 2001
From: John Garry <john.g.garry@oracle.com>
Date: Mon, 21 Nov 2022 12:17:25 +0000
Subject: [PATCH 2807/4122] scsi: core: Use SCSI_SCAN_INITIAL in
 do_scsi_scan_host()

Instead of using hardcoded '0' as the do_scsi_scan_host() ->
scsi_scan_host_selected() rescan arg, use proper macro SCSI_SCAN_INITIAL.

Signed-off-by: John Garry <john.g.garry@oracle.com>
Link: https://lore.kernel.org/r/20221121121725.1910795-3-john.g.garry@oracle.com
Reviewed-by: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_scan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 6cc974b382c1b..920b145f80b74 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -1920,7 +1920,7 @@ static void do_scsi_scan_host(struct Scsi_Host *shost)
 			msleep(10);
 	} else {
 		scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
-				SCAN_WILD_CARD, 0);
+				SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
 	}
 }
 
-- 
GitLab


From 76dc609556c699676776d53222d342276afd0442 Mon Sep 17 00:00:00 2001
From: Yu Zhe <yuzhe@nfschina.com>
Date: Fri, 25 Nov 2022 10:07:03 +0800
Subject: [PATCH 2808/4122] scsi: megaraid_sas: Fix some spelling mistakes in
 comment

Fix typos in comment.

Reviewed-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Yu Zhe <yuzhe@nfschina.com>
Link: https://lore.kernel.org/r/20221125020703.22216-1-yuzhe@nfschina.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/megaraid/megaraid_sas_fusion.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c b/drivers/scsi/megaraid/megaraid_sas_fusion.c
index 6650f8c8e9b04..fe70f8f114352 100644
--- a/drivers/scsi/megaraid/megaraid_sas_fusion.c
+++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c
@@ -80,7 +80,7 @@ static void megasas_fusion_crash_dump(struct megasas_instance *instance);
  * @ocr_context:			If called from OCR context this will
  *					be set to 1, else 0
  *
- * This function initates a chip reset followed by a wait for controller to
+ * This function initiates a chip reset followed by a wait for controller to
  * transition to ready state.
  * During this, driver will block all access to PCI config space from userspace
  */
@@ -334,7 +334,7 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance,
  *
  * This function is only for fusion controllers.
  * Update host can queue, if firmware downgrade max supported firmware commands.
- * Firmware upgrade case will be skiped because underlying firmware has
+ * Firmware upgrade case will be skipped because underlying firmware has
  * more resource than exposed to the OS.
  *
  */
@@ -2588,7 +2588,7 @@ static void megasas_stream_detect(struct megasas_instance *instance,
 			if ((io_info->ldStartBlock != current_sd->next_seq_lba)	&&
 			    ((!io_info->isRead) || (!is_read_ahead)))
 				/*
-				 * Once the API availible we need to change this.
+				 * Once the API is available we need to change this.
 				 * At this point we are not allowing any gap
 				 */
 				continue;
@@ -4650,7 +4650,7 @@ megasas_issue_tm(struct megasas_instance *instance, u16 device_handle,
 }
 
 /*
- * megasas_fusion_smid_lookup : Look for fusion command correpspodning to SCSI
+ * megasas_fusion_smid_lookup : Look for fusion command corresponding to SCSI
  * @instance: per adapter struct
  *
  * Return Non Zero index, if SMID found in outstanding commands
-- 
GitLab


From 4e80eef45ad775a54fb06a66bf8267a154781ce5 Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Fri, 18 Nov 2022 02:23:04 +0300
Subject: [PATCH 2809/4122] scsi: sg: Fix get_user() in call sg_scsi_ioctl()

get_user() expects the pointer to be pointer-to-simple-variable type, but
sic->data is array of 'unsigned char'. It violates get_user() contracts.

Explicitly take pointer to the first element of the array. It matches
current behaviour.

This is preparation for fixing sparse warnings caused by Linear Address
Masking patchset.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Link: https://lore.kernel.org/r/20221117232304.1544-1-kirill.shutemov@linux.intel.com
Cc: "James E.J. Bottomley" <jejb@linux.ibm.com>
Cc: "Martin K. Petersen" <martin.petersen@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 drivers/scsi/scsi_ioctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index 2d20da55fb640..fdd47565a3115 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -519,7 +519,7 @@ static int sg_scsi_ioctl(struct request_queue *q, fmode_t mode,
 		return -EFAULT;
 	if (in_len > PAGE_SIZE || out_len > PAGE_SIZE)
 		return -EINVAL;
-	if (get_user(opcode, sic->data))
+	if (get_user(opcode, &sic->data[0]))
 		return -EFAULT;
 
 	bytes = max(in_len, out_len);
-- 
GitLab


From fd5ac974fc25feed084c2d1599d0dddb4e0556bc Mon Sep 17 00:00:00 2001
From: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Date: Wed, 23 Nov 2022 14:36:09 +0100
Subject: [PATCH 2810/4122] counter: stm32-lptimer-cnt: fix the check on arr
 and cmp registers update

The ARR (auto reload register) and CMP (compare) registers are
successively written. The status bits to check the update of these
registers are polled together with regmap_read_poll_timeout().
The condition to end the loop may become true, even if one of the register
isn't correctly updated.
So ensure both status bits are set before clearing them.

Fixes: d8958824cf07 ("iio: counter: Add support for STM32 LPTimer")
Signed-off-by: Fabrice Gasnier <fabrice.gasnier@foss.st.com>
Link: https://lore.kernel.org/r/20221123133609.465614-1-fabrice.gasnier@foss.st.com/
Signed-off-by: William Breathitt Gray <william.gray@linaro.org>
---
 drivers/counter/stm32-lptimer-cnt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/counter/stm32-lptimer-cnt.c b/drivers/counter/stm32-lptimer-cnt.c
index d6b80b6dfc287..8439755559b21 100644
--- a/drivers/counter/stm32-lptimer-cnt.c
+++ b/drivers/counter/stm32-lptimer-cnt.c
@@ -69,7 +69,7 @@ static int stm32_lptim_set_enable_state(struct stm32_lptim_cnt *priv,
 
 	/* ensure CMP & ARR registers are properly written */
 	ret = regmap_read_poll_timeout(priv->regmap, STM32_LPTIM_ISR, val,
-				       (val & STM32_LPTIM_CMPOK_ARROK),
+				       (val & STM32_LPTIM_CMPOK_ARROK) == STM32_LPTIM_CMPOK_ARROK,
 				       100, 1000);
 	if (ret)
 		return ret;
-- 
GitLab


From c4d33381b134da188ccd1084aef21e2b8c3c422e Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Thu, 24 Nov 2022 03:19:32 +0000
Subject: [PATCH 2811/4122] power: supply: ab8500: Fix error handling in
 ab8500_charger_init()

The ab8500_charger_init() returns the platform_driver_register() directly
without checking its return value, if platform_driver_register() failed,
all ab8500_charger_component_drivers are not unregistered.

Fix by unregister ab8500_charger_component_drivers when
platform_driver_register() failed.

Fixes: 1c1f13a006ed ("power: supply: ab8500: Move to componentized binding")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/ab8500_charger.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/ab8500_charger.c b/drivers/power/supply/ab8500_charger.c
index cf45a2c26b7f3..308e68545d44d 100644
--- a/drivers/power/supply/ab8500_charger.c
+++ b/drivers/power/supply/ab8500_charger.c
@@ -3719,7 +3719,14 @@ static int __init ab8500_charger_init(void)
 	if (ret)
 		return ret;
 
-	return platform_driver_register(&ab8500_charger_driver);
+	ret = platform_driver_register(&ab8500_charger_driver);
+	if (ret) {
+		platform_unregister_drivers(ab8500_charger_component_drivers,
+				ARRAY_SIZE(ab8500_charger_component_drivers));
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit ab8500_charger_exit(void)
-- 
GitLab


From 248043299bf61134fb675d16963e11f49e79b05b Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 25 Nov 2022 12:12:49 +0200
Subject: [PATCH 2812/4122] modpost: Mark uuid_le type to be suitable only for
 MEI

The uuid_le type is used only in MEI ABI, do not advertise it for others.
While at it, comment out that UUID types are not to be used in a new code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 scripts/mod/file2alias.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 7df23905fdf1c..91c2e7ba5e52c 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -34,19 +34,23 @@ typedef Elf64_Addr	kernel_ulong_t;
 typedef uint32_t	__u32;
 typedef uint16_t	__u16;
 typedef unsigned char	__u8;
+
+/* UUID types for backward compatibility, don't use in new code */
 typedef struct {
 	__u8 b[16];
 } guid_t;
 
-/* backwards compatibility, don't use in new code */
-typedef struct {
-	__u8 b[16];
-} uuid_le;
 typedef struct {
 	__u8 b[16];
 } uuid_t;
+
 #define	UUID_STRING_LEN		36
 
+/* MEI UUID type, don't use anywhere else */
+typedef struct {
+	__u8 b[16];
+} uuid_le;
+
 /* Big exception to the "don't include kernel headers into userspace, which
  * even potentially has different endianness and word sizes, since
  * we handle those differences explicitly below */
-- 
GitLab


From 571650b3a30f67d70df242508631ed5e7de2c2df Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Sun, 27 Nov 2022 19:02:24 +0100
Subject: [PATCH 2813/4122] power: supply: bq25890: Only use
 pdata->regulator_init_data for vbus

bq25890_platform_data.regulator_init_data is intended to only provide
regulator init_data for the vbus regulator.

Remove this from the regulator_config before registering the vsys
regulator. Otherwise the regulator_register() call for vsys will fail
because it tries to register duplicate consumer_dev_name + supply
names from init_data->consumer_supplies[], leading to the entire
probe of the bq25890 driver failing:

[   32.017501] bq25890-charger i2c-bq25892_main: Failed to set supply vbus
[   32.017525] bq25890-charger i2c-bq25892_main: error -EBUSY: registering vsys regulator
[   32.124978] bq25890-charger: probe of i2c-bq25892_main failed with error -16

Fixes: 14a3d159abf8 ("power: supply: bq25890: Add Vsys regulator")
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index bfdd2213ba69a..512c81662eea9 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1161,6 +1161,8 @@ static int bq25890_register_regulator(struct bq25890_device *bq)
 				     "registering vbus regulator");
 	}
 
+	/* pdata->regulator_init_data is for vbus only */
+	cfg.init_data = NULL;
 	reg = devm_regulator_register(bq->dev, &bq25890_vsys_desc, &cfg);
 	if (IS_ERR(reg)) {
 		return dev_err_probe(bq->dev, PTR_ERR(reg),
-- 
GitLab


From e88906b169ebcb8046e8f0ad76edd09ab41cfdfe Mon Sep 17 00:00:00 2001
From: Bruno Thomsen <bruno.thomsen@gmail.com>
Date: Sun, 27 Nov 2022 18:08:11 +0100
Subject: [PATCH 2814/4122] USB: serial: cp210x: add Kamstrup RF sniffer PIDs

The RF sniffers are based on cp210x where the RF frontends
are based on a different USB stack.

RF sniffers can analyze packets meta data including power level
and perform packet injection.

Can be used to perform RF frontend self-test when connected to
a concentrator, ex. arch/arm/boot/dts/imx7d-flex-concentrator.dts

Signed-off-by: Bruno Thomsen <bruno.thomsen@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index 3bcec419f4632..f6fb23620e87a 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -195,6 +195,8 @@ static const struct usb_device_id id_table[] = {
 	{ USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
 	{ USB_DEVICE(0x17A8, 0x0001) }, /* Kamstrup Optical Eye/3-wire */
 	{ USB_DEVICE(0x17A8, 0x0005) }, /* Kamstrup M-Bus Master MultiPort 250D */
+	{ USB_DEVICE(0x17A8, 0x0011) }, /* Kamstrup 444 MHz RF sniffer */
+	{ USB_DEVICE(0x17A8, 0x0013) }, /* Kamstrup 870 MHz RF sniffer */
 	{ USB_DEVICE(0x17A8, 0x0101) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Int Ant) */
 	{ USB_DEVICE(0x17A8, 0x0102) }, /* Kamstrup 868 MHz wM-Bus C-Mode Meter Reader (Ext Ant) */
 	{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
-- 
GitLab


From 09f530f0c6d6689eee5e690c6d98f495fcc3a0f9 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 23 Nov 2022 20:27:14 -0400
Subject: [PATCH 2815/4122] RDMA: Add netdevice_tracker to
 ib_device_set_netdev()

This will cause an informative backtrace to print if the user of
ib_device_set_netdev() isn't careful about tearing down the ibdevice
before its the netdevice parent is destroyed. Such as like this:

  unregister_netdevice: waiting for vlan0 to become free. Usage count = 2
  leaked reference.
   ib_device_set_netdev+0x266/0x730
   siw_newlink+0x4e0/0xfd0
   nldev_newlink+0x35c/0x5c0
   rdma_nl_rcv_msg+0x36d/0x690
   rdma_nl_rcv+0x2ee/0x430
   netlink_unicast+0x543/0x7f0
   netlink_sendmsg+0x918/0xe20
   sock_sendmsg+0xcf/0x120
   ____sys_sendmsg+0x70d/0x8b0
   ___sys_sendmsg+0x11d/0x1b0
   __sys_sendmsg+0xfa/0x1d0
   do_syscall_64+0x35/0xb0
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

This will help debug the issues syzkaller is seeing.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/0-v1-a7c81b3842ce+e5-netdev_tracker_jgg@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/device.c | 6 ++++--
 include/rdma/ib_verbs.h          | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 3409c55ea88bf..ff35cebb25e26 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2159,14 +2159,16 @@ int ib_device_set_netdev(struct ib_device *ib_dev, struct net_device *ndev,
 		return 0;
 	}
 
+	if (old_ndev)
+		netdev_tracker_free(ndev, &pdata->netdev_tracker);
 	if (ndev)
-		dev_hold(ndev);
+		netdev_hold(ndev, &pdata->netdev_tracker, GFP_ATOMIC);
 	rcu_assign_pointer(pdata->netdev, ndev);
 	spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 
 	add_ndev_hash(pdata);
 	if (old_ndev)
-		dev_put(old_ndev);
+		__dev_put(old_ndev);
 
 	return 0;
 }
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a1f4d53a4bb63..77dd9148815b9 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2203,6 +2203,7 @@ struct ib_port_data {
 	struct ib_port_cache cache;
 
 	struct net_device __rcu *netdev;
+	netdevice_tracker netdev_tracker;
 	struct hlist_node ndev_hash_link;
 	struct rdma_port_counter port_counter;
 	struct ib_port *sysfs;
-- 
GitLab


From ea5ef136e215fdef35f14010bc51fcd6686e6922 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Sat, 26 Nov 2022 04:34:10 +0000
Subject: [PATCH 2816/4122] RDMA/nldev: Add checks for nla_nest_start() in
 fill_stat_counter_qps()

As the nla_nest_start() may fail with NULL returned, the return value needs
to be checked.

Fixes: c4ffee7c9bdb ("RDMA/netlink: Implement counter dumpit calback")
Signed-off-by: Yuan Can <yuancan@huawei.com>
Link: https://lore.kernel.org/r/20221126043410.85632-1-yuancan@huawei.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/nldev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index 2be76a3fdd87a..ca0ed7d143261 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -894,6 +894,8 @@ static int fill_stat_counter_qps(struct sk_buff *msg,
 	int ret = 0;
 
 	table_attr = nla_nest_start(msg, RDMA_NLDEV_ATTR_RES_QP);
+	if (!table_attr)
+		return -EMSGSIZE;
 
 	rt = &counter->device->res[RDMA_RESTRACK_QP];
 	xa_lock(&rt->xa);
-- 
GitLab


From 86815735aa571d493cf5768cad5fa8e6fd9c7ba8 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 28 Nov 2022 19:26:29 +0530
Subject: [PATCH 2817/4122] KVM: arm64: PMU: Replace version number '0' with
 ID_AA64DFR0_EL1_PMUVer_NI

kvm_host_pmu_init() returns when detected PMU is either not implemented, or
implementation defined. kvm_pmu_probe_armpmu() also has a similar situation.

Extracted ID_AA64DFR0_EL1_PMUVer value, when PMU is not implemented is '0',
which can be replaced with ID_AA64DFR0_EL1_PMUVer_NI defined as '0b0000'.

Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-perf-users@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221128135629.118346-1-anshuman.khandual@arm.com
---
 arch/arm64/kvm/pmu-emul.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 3295dea34f4c9..bb7251e670a96 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -668,7 +668,8 @@ void kvm_host_pmu_init(struct arm_pmu *pmu)
 {
 	struct arm_pmu_entry *entry;
 
-	if (pmu->pmuver == 0 || pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
+	if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
+	    pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
 		return;
 
 	mutex_lock(&arm_pmus_lock);
@@ -721,7 +722,7 @@ static struct arm_pmu *kvm_pmu_probe_armpmu(void)
 
 	if (event->pmu) {
 		pmu = to_arm_pmu(event->pmu);
-		if (pmu->pmuver == 0 ||
+		if (pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_NI ||
 		    pmu->pmuver == ID_AA64DFR0_EL1_PMUVer_IMP_DEF)
 			pmu = NULL;
 	}
-- 
GitLab


From 292e8f1494764ac46dd1b7dd46fa317db691436c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Nov 2022 10:40:02 +0000
Subject: [PATCH 2818/4122] KVM: arm64: PMU: Simplify PMCR_EL0 reset handling

Resetting PMCR_EL0 is a pretty involved process that includes
poisoning some of the writable bits, just because we can.

It makes it hard to reason about about what gets configured,
and just resetting things to 0 seems like a much saner option.

Reduce reset_pmcr() to just preserving PMCR_EL0.N from the host,
and setting PMCR_EL0.LC if we don't support AArch32.

Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/sys_regs.c | 18 ++++++------------
 1 file changed, 6 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 67eac0f747be8..eb56ad031116b 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -639,24 +639,18 @@ static void reset_pmselr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 
 static void reset_pmcr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r)
 {
-	u64 pmcr, val;
+	u64 pmcr;
 
 	/* No PMU available, PMCR_EL0 may UNDEF... */
 	if (!kvm_arm_support_pmu_v3())
 		return;
 
-	pmcr = read_sysreg(pmcr_el0);
-	/*
-	 * Writable bits of PMCR_EL0 (ARMV8_PMU_PMCR_MASK) are reset to UNKNOWN
-	 * except PMCR.E resetting to zero.
-	 */
-	val = ((pmcr & ~ARMV8_PMU_PMCR_MASK)
-	       | (ARMV8_PMU_PMCR_MASK & 0xdecafbad)) & (~ARMV8_PMU_PMCR_E);
+	/* Only preserve PMCR_EL0.N, and reset the rest to 0 */
+	pmcr = read_sysreg(pmcr_el0) & ARMV8_PMU_PMCR_N_MASK;
 	if (!kvm_supports_32bit_el0())
-		val |= ARMV8_PMU_PMCR_LC;
-	if (!kvm_pmu_is_3p5(vcpu))
-		val &= ~ARMV8_PMU_PMCR_LP;
-	__vcpu_sys_reg(vcpu, r->reg) = val;
+		pmcr |= ARMV8_PMU_PMCR_LC;
+
+	__vcpu_sys_reg(vcpu, r->reg) = pmcr;
 }
 
 static bool check_pmu_access_disabled(struct kvm_vcpu *vcpu, u64 flags)
-- 
GitLab


From 64d6820d64c0a206e744bd8945374d563a76c16c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Thu, 24 Nov 2022 10:44:59 +0000
Subject: [PATCH 2819/4122] KVM: arm64: PMU: Sanitise PMCR_EL0.LP on first vcpu
 run

Userspace can play some dirty tricks on us by selecting a given
PMU version (such as PMUv3p5), restore a PMCR_EL0 value that
has PMCR_EL0.LP set, and then switch the PMU version to PMUv3p1,
for example. In this situation, we end-up with PMCR_EL0.LP being
set and spreading havoc in the PMU emulation.

This is specially hard as the first two step can be done on
one vcpu and the third step on another, meaning that we need
to sanitise *all* vcpus when the PMU version is changed.

In orer to avoid a pretty complicated locking situation,
defer the sanitisation of PMCR_EL0 to the point where the
vcpu is actually run for the first tine, using the existing
KVM_REQ_RELOAD_PMU request that calls into kvm_pmu_handle_pmcr().

There is still an obscure corner case where userspace could
do the above trick, and then save the VM without running it.
They would then observe an inconsistent state (PMUv3.1 + LP set),
but that state will be fixed on the first run anyway whenever
the guest gets restored on a host.

Reported-by: Reiji Watanabe <reijiw@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
---
 arch/arm64/kvm/pmu-emul.c | 6 ++++++
 arch/arm64/kvm/sys_regs.c | 8 ++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index bb7251e670a96..d8ea399430861 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -538,6 +538,12 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
 	if (!kvm_vcpu_has_pmu(vcpu))
 		return;
 
+	/* Fixup PMCR_EL0 to reconcile the PMU version and the LP bit */
+	if (!kvm_pmu_is_3p5(vcpu))
+		val &= ~ARMV8_PMU_PMCR_LP;
+
+	__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+
 	if (val & ARMV8_PMU_PMCR_E) {
 		kvm_pmu_enable_counter_mask(vcpu,
 		       __vcpu_sys_reg(vcpu, PMCNTENSET_EL0));
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index eb56ad031116b..528d253c571ab 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -693,15 +693,15 @@ static bool access_pmcr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
 		return false;
 
 	if (p->is_write) {
-		/* Only update writeable bits of PMCR */
+		/*
+		 * Only update writeable bits of PMCR (continuing into
+		 * kvm_pmu_handle_pmcr() as well)
+		 */
 		val = __vcpu_sys_reg(vcpu, PMCR_EL0);
 		val &= ~ARMV8_PMU_PMCR_MASK;
 		val |= p->regval & ARMV8_PMU_PMCR_MASK;
 		if (!kvm_supports_32bit_el0())
 			val |= ARMV8_PMU_PMCR_LC;
-		if (!kvm_pmu_is_3p5(vcpu))
-			val &= ~ARMV8_PMU_PMCR_LP;
-		__vcpu_sys_reg(vcpu, PMCR_EL0) = val;
 		kvm_pmu_handle_pmcr(vcpu, val);
 		kvm_vcpu_pmu_restore_guest(vcpu);
 	} else {
-- 
GitLab


From 923d011febb4e2fb338036bb0ee6a0a7f9b10da1 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 27 Nov 2022 22:52:10 +0100
Subject: [PATCH 2820/4122] gpio: Do not include <linux/kernel.h> when not
 really needed.

<linux/kernel.h> is included only for using container_of().
Include <linux/container_of.h> instead, it is much lighter.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 include/linux/of_gpio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h
index a5166eb934375..6db627257a7ba 100644
--- a/include/linux/of_gpio.h
+++ b/include/linux/of_gpio.h
@@ -34,7 +34,7 @@ enum of_gpio_flags {
 
 #ifdef CONFIG_OF_GPIO
 
-#include <linux/kernel.h>
+#include <linux/container_of.h>
 
 /*
  * OF GPIO chip for memory mapped banks
-- 
GitLab


From 4ef339bc053a62dac9017f80f7bb8cff0412bd29 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 16 Nov 2022 16:17:28 +0200
Subject: [PATCH 2821/4122] gpiolib: Unify access to the device properties

Some of the functions are using struct fwnode_handle, some struct device
pointer. In the GPIO library the firmware node of the GPIO device is the
same as GPIO node of the GPIO chip. Due to this fact we may use former
to access properties everywhere in the code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Brian Masney <bmasney@redhat.com>
Tested-by: Marijn Suijten <marijn.suijten@somainline.org>
[Bartosz: stick to the 80-char limit where it's not hurting readability]
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 51afdc6ac9198..2729f7ebab9d6 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -367,12 +367,12 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
 static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 {
 	struct gpio_device *gdev = chip->gpiodev;
-	const struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
+	struct device *dev = &gdev->dev;
 	const char **names;
 	int ret, i;
 	int count;
 
-	count = fwnode_property_string_array_count(fwnode, "gpio-line-names");
+	count = device_property_string_array_count(dev, "gpio-line-names");
 	if (count < 0)
 		return 0;
 
@@ -385,7 +385,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 	 * gpiochips.
 	 */
 	if (count <= chip->offset) {
-		dev_warn(&gdev->dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n",
+		dev_warn(dev, "gpio-line-names too short (length %d), cannot map names for the gpiochip at offset %u\n",
 			 count, chip->offset);
 		return 0;
 	}
@@ -394,10 +394,10 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 	if (!names)
 		return -ENOMEM;
 
-	ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
+	ret = device_property_read_string_array(dev, "gpio-line-names",
 						names, count);
 	if (ret < 0) {
-		dev_warn(&gdev->dev, "failed to read GPIO line names\n");
+		dev_warn(dev, "failed to read GPIO line names\n");
 		kfree(names);
 		return ret;
 	}
@@ -448,10 +448,11 @@ static unsigned long *gpiochip_allocate_mask(struct gpio_chip *gc)
 
 static unsigned int gpiochip_count_reserved_ranges(struct gpio_chip *gc)
 {
+	struct device *dev = &gc->gpiodev->dev;
 	int size;
 
 	/* Format is "start, count, ..." */
-	size = fwnode_property_count_u32(gc->fwnode, "gpio-reserved-ranges");
+	size = device_property_count_u32(dev, "gpio-reserved-ranges");
 	if (size > 0 && size % 2 == 0)
 		return size;
 
@@ -472,6 +473,7 @@ static int gpiochip_alloc_valid_mask(struct gpio_chip *gc)
 
 static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc)
 {
+	struct device *dev = &gc->gpiodev->dev;
 	unsigned int size;
 	u32 *ranges;
 	int ret;
@@ -484,7 +486,8 @@ static int gpiochip_apply_reserved_ranges(struct gpio_chip *gc)
 	if (!ranges)
 		return -ENOMEM;
 
-	ret = fwnode_property_read_u32_array(gc->fwnode, "gpio-reserved-ranges", ranges, size);
+	ret = device_property_read_u32_array(dev, "gpio-reserved-ranges",
+					     ranges, size);
 	if (ret) {
 		kfree(ranges);
 		return ret;
-- 
GitLab


From 20ee8c223f792947378196307d8e707c9cdc2d61 Mon Sep 17 00:00:00 2001
From: Yang Shen <shenyang39@huawei.com>
Date: Tue, 22 Nov 2022 17:03:55 +0800
Subject: [PATCH 2822/4122] coresight: trbe: remove cpuhp instance node before
 remove cpuhp state

cpuhp_state_add_instance() and cpuhp_state_remove_instance() should
be used in pairs. Or there will lead to the warn on
cpuhp_remove_multi_state() since the cpuhp_step list is not empty.

The following is the error log with 'rmmod coresight-trbe':
Error: Removing state 215 which has instances left.
Call trace:
  __cpuhp_remove_state_cpuslocked+0x144/0x160
  __cpuhp_remove_state+0xac/0x100
  arm_trbe_device_remove+0x2c/0x60 [coresight_trbe]
  platform_remove+0x34/0x70
  device_remove+0x54/0x90
  device_release_driver_internal+0x1e4/0x250
  driver_detach+0x5c/0xb0
  bus_remove_driver+0x64/0xc0
  driver_unregister+0x3c/0x70
  platform_driver_unregister+0x20/0x30
  arm_trbe_exit+0x1c/0x658 [coresight_trbe]
  __arm64_sys_delete_module+0x1ac/0x24c
  invoke_syscall+0x50/0x120
  el0_svc_common.constprop.0+0x58/0x1a0
  do_el0_svc+0x38/0xd0
  el0_svc+0x2c/0xc0
  el0t_64_sync_handler+0x1ac/0x1b0
  el0t_64_sync+0x19c/0x1a0
 ---[ end trace 0000000000000000 ]---

Fixes: 3fbf7f011f24 ("coresight: sink: Add TRBE driver")
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Yang Shen <shenyang39@huawei.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221122090355.23533-1-shenyang39@huawei.com
---
 drivers/hwtracing/coresight/coresight-trbe.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/hwtracing/coresight/coresight-trbe.c b/drivers/hwtracing/coresight/coresight-trbe.c
index 2b386bb848f8d..1fc4fd79a1c69 100644
--- a/drivers/hwtracing/coresight/coresight-trbe.c
+++ b/drivers/hwtracing/coresight/coresight-trbe.c
@@ -1434,6 +1434,7 @@ static int arm_trbe_probe_cpuhp(struct trbe_drvdata *drvdata)
 
 static void arm_trbe_remove_cpuhp(struct trbe_drvdata *drvdata)
 {
+	cpuhp_state_remove_instance(drvdata->trbe_online, &drvdata->hotplug_node);
 	cpuhp_remove_multi_state(drvdata->trbe_online);
 }
 
-- 
GitLab


From 3dc228b35387803d9c43ed1b098aabb1d3ae9c7d Mon Sep 17 00:00:00 2001
From: Mike Leach <mike.leach@linaro.org>
Date: Wed, 23 Nov 2022 19:38:18 +0000
Subject: [PATCH 2823/4122] coresight: cti: Fix null pointer error on CTI init
 before ETM

When CTI is discovered first then the function
coresight_set_assoc_ectdev_mutex() is called to set the association
between CTI and ETM device. Recent lockdep fix passes a null pointer.

This patch passes the correct pointer.

Before patch: log of boot oops sequence with CTI discovered first:

[   12.424091]  cs_system_cfg: CoreSight Configuration manager initialised
[   12.483474] coresight cti_sys0: CTI initialized
[   12.488109] coresight cti_sys1: CTI initialized
[   12.503594] coresight cti_cpu0: CTI initialized
[   12.517877] coresight-cpu-debug 850000.debug: Coresight debug-CPU0 initialized
[   12.523479] coresight-cpu-debug 852000.debug: Coresight debug-CPU1 initialized
[   12.529926] coresight-cpu-debug 854000.debug: Coresight debug-CPU2 initialized
[   12.541808] coresight stm0: STM32 initialized
[   12.544421] coresight-cpu-debug 856000.debug: Coresight debug-CPU3 initialized
[   12.585639] coresight cti_cpu1: CTI initialized
[   12.614028] coresight cti_cpu2: CTI initialized
[   12.631679] CSCFG registered etm0
[   12.633920] coresight etm0: CPU0: etm v4.0 initialized
[   12.656392] coresight cti_cpu3: CTI initialized

...

[   12.708383] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000348

...

[   12.755094] Internal error: Oops: 0000000096000044 [#1] SMP
[   12.761817] Modules linked in: coresight_etm4x(+) coresight_tmc coresight_cpu_debug coresight_replicator coresight_funnel coresight_cti coresight_tpiu coresight_stm coresight
[   12.767210] CPU: 3 PID: 1346 Comm: systemd-udevd Not tainted 6.1.0-rc3tid-v6tid-v6-235166-gf7f7d7a2204a-dirty #498
[   12.782827] Hardware name: Qualcomm Technologies, Inc. APQ 8016 SBC (DT)
[   12.793154] pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
[   12.800010] pc : coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight]
[   12.806694] lr : coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight]

...

[   12.885064] Call trace:
[   12.892352]  coresight_set_assoc_ectdev_mutex+0x30/0x50 [coresight]
[   12.894693]  cti_add_assoc_to_csdev+0x144/0x1b0 [coresight_cti]
[   12.900943]  coresight_register+0x2c8/0x320 [coresight]
[   12.906844]  etm4_add_coresight_dev.isra.27+0x148/0x280 [coresight_etm4x]
[   12.912056]  etm4_probe+0x144/0x1c0 [coresight_etm4x]
[   12.918998]  etm4_probe_amba+0x40/0x78 [coresight_etm4x]
[   12.924032]  amba_probe+0x11c/0x1f0

After patch: similar log

[   12.444467]  cs_system_cfg: CoreSight Configuration manager initialised
[   12.456329] coresight-cpu-debug 850000.debug: Coresight debug-CPU0 initialized
[   12.456754] coresight-cpu-debug 852000.debug: Coresight debug-CPU1 initialized
[   12.469672] coresight-cpu-debug 854000.debug: Coresight debug-CPU2 initialized
[   12.476098] coresight-cpu-debug 856000.debug: Coresight debug-CPU3 initialized
[   12.532409] coresight stm0: STM32 initialized
[   12.533708] coresight cti_sys0: CTI initialized
[   12.539478] coresight cti_sys1: CTI initialized
[   12.550106] coresight cti_cpu0: CTI initialized
[   12.633931] coresight cti_cpu1: CTI initialized
[   12.634664] coresight cti_cpu2: CTI initialized
[   12.638090] coresight cti_cpu3: CTI initialized
[   12.721136] CSCFG registered etm0

...

[   12.762643] CSCFG registered etm1
[   12.762666] coresight etm1: CPU1: etm v4.0 initialized
[   12.776258] CSCFG registered etm2
[   12.776282] coresight etm2: CPU2: etm v4.0 initialized
[   12.784357] CSCFG registered etm3
[   12.785455] coresight etm3: CPU3: etm v4.0 initialized

Error can also be triggered by manually starting the modules using modprobe
in the following order:

root@linaro-developer:/home/linaro/cs-mods# modprobe coresight
root@linaro-developer:/home/linaro/cs-mods# modprobe coresight-cti
root@linaro-developer:/home/linaro/cs-mods# modprobe coresight-etm4x

Tested on Dragonboard DB410c
Applies to coresight/next

Fixes: 23722fb46725 ("coresight: Fix possible deadlock with lock dependency")
Signed-off-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221123193818.6253-1-mike.leach@linaro.org
---
 drivers/hwtracing/coresight/coresight-cti-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwtracing/coresight/coresight-cti-core.c b/drivers/hwtracing/coresight/coresight-cti-core.c
index c6e8c6542f24b..d2cf4f4848e1b 100644
--- a/drivers/hwtracing/coresight/coresight-cti-core.c
+++ b/drivers/hwtracing/coresight/coresight-cti-core.c
@@ -564,7 +564,7 @@ static void cti_add_assoc_to_csdev(struct coresight_device *csdev)
 			 * if we found a matching csdev then update the ECT
 			 * association pointer for the device with this CTI.
 			 */
-			coresight_set_assoc_ectdev_mutex(csdev->ect_dev,
+			coresight_set_assoc_ectdev_mutex(csdev,
 							 ect_item->csdev);
 			break;
 		}
-- 
GitLab


From c767c34740132ffc478226864a7461493cdc2413 Mon Sep 17 00:00:00 2001
From: Jilin Yuan <yuanjilin@cdjrlc.com>
Date: Wed, 19 Oct 2022 20:49:53 +0800
Subject: [PATCH 2824/4122] coresight: etm4x: fix repeated words in comments

Delete the redundant word 'the'.

Signed-off-by: Jilin Yuan <yuanjilin@cdjrlc.com>
Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Link: https://lore.kernel.org/r/20221019124953.45885-1-yuanjilin@cdjrlc.com
---
 drivers/hwtracing/coresight/coresight-etm4x-core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index 9dc0d7f57565a..1cc052979e016 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -1481,7 +1481,7 @@ static int etm4_set_event_filters(struct etmv4_drvdata *drvdata,
 			/*
 			 * If filters::ssstatus == 1, trace acquisition was
 			 * started but the process was yanked away before the
-			 * the stop address was hit.  As such the start/stop
+			 * stop address was hit.  As such the start/stop
 			 * logic needs to be re-started so that tracing can
 			 * resume where it left.
 			 *
-- 
GitLab


From 3ca9d84e722e8044c09e80992aa7b15bd904d3ce Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 23 Nov 2022 19:40:01 -0500
Subject: [PATCH 2825/4122] KVM: always declare prototype for
 kvm_arch_irqchip_in_kernel

Architecture code might want to use it even if CONFIG_HAVE_KVM_IRQ_ROUTING
is false; for example PPC XICS has KVM_IRQ_LINE and wants to use
kvm_arch_irqchip_in_kernel from there, but it does not have
KVM_SET_GSI_ROUTING so the prototype was not provided.

Fixes: d663b8a28598 ("KVM: replace direct irq.h inclusion")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 6f0f389f5f9cd..b8d12356f015f 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -663,9 +663,9 @@ struct kvm_irq_routing_table {
 	 */
 	struct hlist_head map[];
 };
+#endif
 
 bool kvm_arch_irqchip_in_kernel(struct kvm *kvm);
-#endif
 
 #ifndef KVM_INTERNAL_MEM_SLOTS
 #define KVM_INTERNAL_MEM_SLOTS 0
-- 
GitLab


From c3f3719952b9ab64e001e36df3d7bf24d5a4752d Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 14 Nov 2022 12:48:57 -0800
Subject: [PATCH 2826/4122] KVM: x86/xen: Add CPL to Xen hypercall tracepoint

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/trace.h | 15 +++++++++------
 arch/x86/kvm/xen.c   |  2 +-
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h
index 09f3392dd8300..83843379813ee 100644
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -113,12 +113,13 @@ TRACE_EVENT(kvm_hv_hypercall_done,
  * Tracepoint for Xen hypercall.
  */
 TRACE_EVENT(kvm_xen_hypercall,
-	TP_PROTO(unsigned long nr, unsigned long a0, unsigned long a1,
-		 unsigned long a2, unsigned long a3, unsigned long a4,
-		 unsigned long a5),
-	    TP_ARGS(nr, a0, a1, a2, a3, a4, a5),
+	    TP_PROTO(u8 cpl, unsigned long nr,
+		     unsigned long a0, unsigned long a1, unsigned long a2,
+		     unsigned long a3, unsigned long a4, unsigned long a5),
+	    TP_ARGS(cpl, nr, a0, a1, a2, a3, a4, a5),
 
 	TP_STRUCT__entry(
+		__field(u8, cpl)
 		__field(unsigned long, nr)
 		__field(unsigned long, a0)
 		__field(unsigned long, a1)
@@ -129,6 +130,7 @@ TRACE_EVENT(kvm_xen_hypercall,
 	),
 
 	TP_fast_assign(
+		__entry->cpl = cpl;
 		__entry->nr = nr;
 		__entry->a0 = a0;
 		__entry->a1 = a1;
@@ -138,8 +140,9 @@ TRACE_EVENT(kvm_xen_hypercall,
 		__entry->a4 = a5;
 	),
 
-	TP_printk("nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
-		  __entry->nr, __entry->a0, __entry->a1,  __entry->a2,
+	TP_printk("cpl %d nr 0x%lx a0 0x%lx a1 0x%lx a2 0x%lx a3 0x%lx a4 0x%lx a5 %lx",
+		  __entry->cpl, __entry->nr,
+		  __entry->a0, __entry->a1, __entry->a2,
 		  __entry->a3, __entry->a4, __entry->a5)
 );
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index f3098c0e386a8..4b8e9628fbf57 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1256,7 +1256,7 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu)
 	}
 #endif
 	cpl = static_call(kvm_x86_get_cpl)(vcpu);
-	trace_kvm_xen_hypercall(input, params[0], params[1], params[2],
+	trace_kvm_xen_hypercall(cpl, input, params[0], params[1], params[2],
 				params[3], params[4], params[5]);
 
 	/*
-- 
GitLab


From 7927e27549d3f02354233a9ab3f28e0080ede29b Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 12 Nov 2022 14:28:20 +0000
Subject: [PATCH 2827/4122] MAINTAINERS: Add KVM x86/xen maintainer list

Adding Paul as co-maintainer of Xen support to help ensure that things
don't fall through the cracks when I spend three months at a time
travelling...

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 MAINTAINERS | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 046ff06ff97fa..89672a59c0c3a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11324,6 +11324,16 @@ F:	arch/x86/kvm/svm/hyperv.*
 F:	arch/x86/kvm/svm/svm_onhyperv.*
 F:	arch/x86/kvm/vmx/evmcs.*
 
+KVM X86 Xen (KVM/Xen)
+M:	David Woodhouse <dwmw2@infradead.org>
+M:	Paul Durrant <paul@xen.org>
+M:	Sean Christopherson <seanjc@google.com>
+M:	Paolo Bonzini <pbonzini@redhat.com>
+L:	kvm@vger.kernel.org
+S:	Supported
+T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
+F:	arch/x86/kvm/xen.*
+
 KERNFS
 M:	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 M:	Tejun Heo <tj@kernel.org>
-- 
GitLab


From c4690d016182d271a862767145db8b2bc792f4a8 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 25 Nov 2022 20:58:38 +0800
Subject: [PATCH 2828/4122] KVM: x86: Add BUILD_BUG_ON() to detect bad usage of
 "scattered" flags

Add a compile-time assert in the SF() macro to detect improper usage,
i.e. to detect passing in an X86_FEATURE_* flag that isn't actually
scattered by the kernel.  Upcoming feature flags will be 100% KVM-only
and will have X86_FEATURE_* macros that point at a kvm_only_cpuid_leafs
word, not a kernel-defined word.  Using SF() and thus boot_cpu_has() for
such feature flags would access memory beyond x86_capability[NCAPINTS]
and at best incorrectly hide a feature, and at worst leak kernel state to
userspace.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221125125845.1182922-2-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6b5912578eddc..ff2e9734e5c18 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -65,7 +65,13 @@ u32 xstate_required_size(u64 xstate_bv, bool compacted)
 #define KVM_X86_FEATURE_AMD_PSFD	(13*32+28) /* Predictive Store Forwarding Disable */
 
 #define F feature_bit
-#define SF(name) (boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0)
+
+/* Scattered Flag - For features that are scattered by cpufeatures.h. */
+#define SF(name)						\
+({								\
+	BUILD_BUG_ON(X86_FEATURE_##name >= MAX_CPU_FEATURES);	\
+	(boot_cpu_has(X86_FEATURE_##name) ? F(name) : 0);	\
+})
 
 /*
  * Magic value used by KVM when querying userspace-provided CPUID entries and
-- 
GitLab


From 047c7229906152fb85c23dc18fd25a00cd7cb4de Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 25 Nov 2022 20:58:39 +0800
Subject: [PATCH 2829/4122] KVM: x86: Update KVM-only leaf handling to allow
 for 100% KVM-only leafs

Rename kvm_cpu_cap_init_scattered() to kvm_cpu_cap_init_kvm_defined() in
anticipation of adding KVM-only CPUID leafs that aren't recognized by the
kernel and thus not scattered, i.e. for leafs that are 100% KVM-defined.

Adjust/add comments to kvm_only_cpuid_leafs and KVM_X86_FEATURE to
document how to create new kvm_only_cpuid_leafs entries for scattered
features as well as features that are entirely unknown to the kernel.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221125125845.1182922-3-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c         |  8 ++++----
 arch/x86/kvm/reverse_cpuid.h | 18 +++++++++++++++---
 2 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index ff2e9734e5c18..73c3c6dc6e7b5 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -549,9 +549,9 @@ static __always_inline void __kvm_cpu_cap_mask(unsigned int leaf)
 }
 
 static __always_inline
-void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask)
+void kvm_cpu_cap_init_kvm_defined(enum kvm_only_cpuid_leafs leaf, u32 mask)
 {
-	/* Use kvm_cpu_cap_mask for non-scattered leafs. */
+	/* Use kvm_cpu_cap_mask for leafs that aren't KVM-only. */
 	BUILD_BUG_ON(leaf < NCAPINTS);
 
 	kvm_cpu_caps[leaf] = mask;
@@ -561,7 +561,7 @@ void kvm_cpu_cap_init_scattered(enum kvm_only_cpuid_leafs leaf, u32 mask)
 
 static __always_inline void kvm_cpu_cap_mask(enum cpuid_leafs leaf, u32 mask)
 {
-	/* Use kvm_cpu_cap_init_scattered for scattered leafs. */
+	/* Use kvm_cpu_cap_init_kvm_defined for KVM-only leafs. */
 	BUILD_BUG_ON(leaf >= NCAPINTS);
 
 	kvm_cpu_caps[leaf] &= mask;
@@ -670,7 +670,7 @@ void kvm_set_cpu_caps(void)
 		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
 	);
 
-	kvm_cpu_cap_init_scattered(CPUID_12_EAX,
+	kvm_cpu_cap_init_kvm_defined(CPUID_12_EAX,
 		SF(SGX1) | SF(SGX2)
 	);
 
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index a19d473d01847..443a6b3e66c0e 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -7,9 +7,9 @@
 #include <asm/cpufeatures.h>
 
 /*
- * Hardware-defined CPUID leafs that are scattered in the kernel, but need to
- * be directly used by KVM.  Note, these word values conflict with the kernel's
- * "bug" caps, but KVM doesn't use those.
+ * Hardware-defined CPUID leafs that are either scattered by the kernel or are
+ * unknown to the kernel, but need to be directly used by KVM.  Note, these
+ * word values conflict with the kernel's "bug" caps, but KVM doesn't use those.
  */
 enum kvm_only_cpuid_leafs {
 	CPUID_12_EAX	 = NCAPINTS,
@@ -18,6 +18,18 @@ enum kvm_only_cpuid_leafs {
 	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
 };
 
+/*
+ * Define a KVM-only feature flag.
+ *
+ * For features that are scattered by cpufeatures.h, __feature_translate() also
+ * needs to be updated to translate the kernel-defined feature into the
+ * KVM-defined feature.
+ *
+ * For features that are 100% KVM-only, i.e. not defined by cpufeatures.h,
+ * forego the intermediate KVM_X86_FEATURE and directly define X86_FEATURE_* so
+ * that X86_FEATURE_* can be used in KVM.  No __feature_translate() handling is
+ * needed in this case.
+ */
 #define KVM_X86_FEATURE(w, f)		((w)*32 + (f))
 
 /* Intel-defined SGX sub-features, CPUID level 0x12 (EAX). */
-- 
GitLab


From 6a19d7aa5821522eec528fd44f24fe774b875377 Mon Sep 17 00:00:00 2001
From: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Date: Fri, 25 Nov 2022 20:58:40 +0800
Subject: [PATCH 2830/4122] x86: KVM: Advertise CMPccXADD CPUID to user space

CMPccXADD is a new set of instructions in the latest Intel platform
Sierra Forest. This new instruction set includes a semaphore operation
that can compare and add the operands if condition is met, which can
improve database performance.

The bit definition:
CPUID.(EAX=7,ECX=1):EAX[bit 7]

CMPccXADD is on an expected-dense CPUID leaf and some other bits on this
leaf have kernel usages. Given that, define this feature bit like
X86_FEATURE_<name> in kernel. Considering CMPccXADD itself has no truly
kernel usages and /proc/cpuinfo has too much unreadable flags, hide this
one in /proc/cpuinfo.

Advertise CMPCCXADD to KVM userspace. This is safe because there are no
new VMX controls or additional host enabling required for guests to use
this feature.

Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Message-Id: <20221125125845.1182922-4-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kvm/cpuid.c               | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b71f4f2ecdd57..5cdd57133d904 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -308,6 +308,7 @@
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
+#define X86_FEATURE_CMPCCXADD           (12*32+ 7) /* "" CMPccXADD instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 73c3c6dc6e7b5..2f263ff911d66 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -663,7 +663,7 @@ void kvm_set_cpu_caps(void)
 		kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
 
 	kvm_cpu_cap_mask(CPUID_7_1_EAX,
-		F(AVX_VNNI) | F(AVX512_BF16)
+		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
-- 
GitLab


From af2872f6225476566bcbbd523a74dcaba29e159e Mon Sep 17 00:00:00 2001
From: "Chang S. Bae" <chang.seok.bae@intel.com>
Date: Fri, 25 Nov 2022 20:58:41 +0800
Subject: [PATCH 2831/4122] x86: KVM: Advertise AMX-FP16 CPUID to user space

Latest Intel platform Granite Rapids has introduced a new instruction -
AMX-FP16, which performs dot-products of two FP16 tiles and accumulates
the results into a packed single precision tile. AMX-FP16 adds FP16
capability and also allows a FP16 GPU trained model to run faster
without loss of accuracy or added SW overhead.

The bit definition:
CPUID.(EAX=7,ECX=1):EAX[bit 21]

AMX-FP16 is on an expected-dense CPUID leaf and some other bits on this
leaf have kernel usages. Given that, define this feature bit like
X86_FEATURE_<name> in kernel. Considering AMX-FP16 itself has no truly
kernel usages and /proc/cpuinfo has too much unreadable flags, hide this
one in /proc/cpuinfo.

Advertise AMX-FP16 to KVM userspace. This is safe because there are no
new VMX controls or additional host enabling required for guests to use
this feature.

Signed-off-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Message-Id: <20221125125845.1182922-5-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kvm/cpuid.c               | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 5cdd57133d904..20059dc33d246 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -309,6 +309,7 @@
 #define X86_FEATURE_AVX_VNNI		(12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
 #define X86_FEATURE_CMPCCXADD           (12*32+ 7) /* "" CMPccXADD instructions */
+#define X86_FEATURE_AMX_FP16		(12*32+21) /* "" AMX fp16 Support */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 2f263ff911d66..6360075ca70f4 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -663,7 +663,7 @@ void kvm_set_cpu_caps(void)
 		kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
 
 	kvm_cpu_cap_mask(CPUID_7_1_EAX,
-		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD)
+		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
-- 
GitLab


From 5e85c4ebf206e50c58e82ca44c15e2be2bac6923 Mon Sep 17 00:00:00 2001
From: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Date: Fri, 25 Nov 2022 20:58:42 +0800
Subject: [PATCH 2832/4122] x86: KVM: Advertise AVX-IFMA CPUID to user space

AVX-IFMA is a new instruction in the latest Intel platform Sierra
Forest. This instruction packed multiplies unsigned 52-bit integers and
adds the low/high 52-bit products to Qword Accumulators.

The bit definition:
CPUID.(EAX=7,ECX=1):EAX[bit 23]

AVX-IFMA is on an expected-dense CPUID leaf and some other bits on this
leaf have kernel usages. Given that, define this feature bit like
X86_FEATURE_<name> in kernel. Considering AVX-IFMA itself has no truly
kernel usages and /proc/cpuinfo has too much unreadable flags, hide this
one in /proc/cpuinfo.

Advertise AVX-IFMA to KVM userspace. This is safe because there are no
new VMX controls or additional host enabling required for guests to use
this feature.

Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Acked-by: Borislav Petkov <bp@suse.de>
Message-Id: <20221125125845.1182922-6-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/cpufeatures.h | 1 +
 arch/x86/kvm/cpuid.c               | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 20059dc33d246..1419c4e04d45f 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -310,6 +310,7 @@
 #define X86_FEATURE_AVX512_BF16		(12*32+ 5) /* AVX512 BFLOAT16 instructions */
 #define X86_FEATURE_CMPCCXADD           (12*32+ 7) /* "" CMPccXADD instructions */
 #define X86_FEATURE_AMX_FP16		(12*32+21) /* "" AMX fp16 Support */
+#define X86_FEATURE_AVX_IFMA            (12*32+23) /* "" Support for VPMADD52[H,L]UQ */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_CLZERO		(13*32+ 0) /* CLZERO instruction */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6360075ca70f4..5dfc0d036df5e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -663,7 +663,8 @@ void kvm_set_cpu_caps(void)
 		kvm_cpu_cap_set(X86_FEATURE_SPEC_CTRL_SSBD);
 
 	kvm_cpu_cap_mask(CPUID_7_1_EAX,
-		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16)
+		F(AVX_VNNI) | F(AVX512_BF16) | F(CMPCCXADD) | F(AMX_FP16) |
+		F(AVX_IFMA)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
-- 
GitLab


From 24d74b9f5f2a972ac9228372adeac62b2dc10ea2 Mon Sep 17 00:00:00 2001
From: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Date: Fri, 25 Nov 2022 20:58:43 +0800
Subject: [PATCH 2833/4122] KVM: x86: Advertise AVX-VNNI-INT8 CPUID to user
 space

AVX-VNNI-INT8 is a new set of instructions in the latest Intel platform
Sierra Forest, aims for the platform to have superior AI capabilities.
This instruction multiplies the individual bytes of two unsigned or
unsigned source operands, then adds and accumulates the results into the
destination dword element size operand.

The bit definition:
CPUID.(EAX=7,ECX=1):EDX[bit 4]

AVX-VNNI-INT8 is on a new and sparse CPUID leaf and all bits on this
leaf have no truly kernel use case for now. Given that and to save space
for kernel feature bits, move this new leaf to KVM-only subleaf and plus
an x86_FEATURE definition for AVX-VNNI-INT8 to direct it to the KVM
entry.

Advertise AVX-VNNI-INT8 to KVM userspace. This is safe because there are
no new VMX controls or additional host enabling required for guests to
use this feature.

Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Message-Id: <20221125125845.1182922-7-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c         | 6 +++++-
 arch/x86/kvm/reverse_cpuid.h | 5 +++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 5dfc0d036df5e..b6ce47c4e9728 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -667,6 +667,10 @@ void kvm_set_cpu_caps(void)
 		F(AVX_IFMA)
 	);
 
+	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
+		F(AVX_VNNI_INT8)
+	);
+
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
 		F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | F(XSAVES) | f_xfd
 	);
@@ -920,9 +924,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 				goto out;
 
 			cpuid_entry_override(entry, CPUID_7_1_EAX);
+			cpuid_entry_override(entry, CPUID_7_1_EDX);
 			entry->ebx = 0;
 			entry->ecx = 0;
-			entry->edx = 0;
 		}
 		break;
 	case 0xa: { /* Architectural Performance Monitoring */
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 443a6b3e66c0e..84f56b662424c 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -13,6 +13,7 @@
  */
 enum kvm_only_cpuid_leafs {
 	CPUID_12_EAX	 = NCAPINTS,
+	CPUID_7_1_EDX,
 	NR_KVM_CPU_CAPS,
 
 	NKVMCAPINTS = NR_KVM_CPU_CAPS - NCAPINTS,
@@ -36,6 +37,9 @@ enum kvm_only_cpuid_leafs {
 #define KVM_X86_FEATURE_SGX1		KVM_X86_FEATURE(CPUID_12_EAX, 0)
 #define KVM_X86_FEATURE_SGX2		KVM_X86_FEATURE(CPUID_12_EAX, 1)
 
+/* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
+#define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
+
 struct cpuid_reg {
 	u32 function;
 	u32 index;
@@ -60,6 +64,7 @@ static const struct cpuid_reg reverse_cpuid[] = {
 	[CPUID_7_1_EAX]       = {         7, 1, CPUID_EAX},
 	[CPUID_12_EAX]        = {0x00000012, 0, CPUID_EAX},
 	[CPUID_8000_001F_EAX] = {0x8000001f, 0, CPUID_EAX},
+	[CPUID_7_1_EDX]       = {         7, 1, CPUID_EDX},
 };
 
 /*
-- 
GitLab


From 9977f0877de7f8fc51391e2d52bc993efbd58b90 Mon Sep 17 00:00:00 2001
From: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Date: Fri, 25 Nov 2022 20:58:44 +0800
Subject: [PATCH 2834/4122] KVM: x86: Advertise AVX-NE-CONVERT CPUID to user
 space

AVX-NE-CONVERT is a new set of instructions which can convert low
precision floating point like BF16/FP16 to high precision floating point
FP32, and can also convert FP32 elements to BF16. This instruction
allows the platform to have improved AI capabilities and better
compatibility.

The bit definition:
CPUID.(EAX=7,ECX=1):EDX[bit 5]

AVX-NE-CONVERT is on a KVM-only subleaf. Plus an x86_FEATURE definition
for this feature bit to direct it to the KVM entry.

Advertise AVX-NE-CONVERT to KVM userspace. This is safe because there
are no new VMX controls or additional host enabling required for guests
to use this feature.

Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Message-Id: <20221125125845.1182922-8-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c         | 2 +-
 arch/x86/kvm/reverse_cpuid.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index b6ce47c4e9728..5a95624a658a8 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -668,7 +668,7 @@ void kvm_set_cpu_caps(void)
 	);
 
 	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-		F(AVX_VNNI_INT8)
+		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 84f56b662424c..43eff7207e017 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -39,6 +39,7 @@ enum kvm_only_cpuid_leafs {
 
 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
 #define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
+#define X86_FEATURE_AVX_NE_CONVERT      KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
 
 struct cpuid_reg {
 	u32 function;
-- 
GitLab


From 29c46979b25d5ca867e9859bfdd088d028739cdf Mon Sep 17 00:00:00 2001
From: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Date: Fri, 25 Nov 2022 20:58:45 +0800
Subject: [PATCH 2835/4122] KVM: x86: Advertise PREFETCHIT0/1 CPUID to user
 space

Latest Intel platform Granite Rapids has introduced a new instruction -
PREFETCHIT0/1, which moves code to memory (cache) closer to the
processor depending on specific hints.

The bit definition:
CPUID.(EAX=7,ECX=1):EDX[bit 14]

PREFETCHIT0/1 is on a KVM-only subleaf. Plus an x86_FEATURE definition
for this feature bit to direct it to the KVM entry.

Advertise PREFETCHIT0/1 to KVM userspace. This is safe because there are
no new VMX controls or additional host enabling required for guests to
use this feature.

Signed-off-by: Jiaxi Chen <jiaxi.chen@linux.intel.com>
Message-Id: <20221125125845.1182922-9-jiaxi.chen@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c         | 2 +-
 arch/x86/kvm/reverse_cpuid.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 5a95624a658a8..723502181a3a9 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -668,7 +668,7 @@ void kvm_set_cpu_caps(void)
 	);
 
 	kvm_cpu_cap_init_kvm_defined(CPUID_7_1_EDX,
-		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT)
+		F(AVX_VNNI_INT8) | F(AVX_NE_CONVERT) | F(PREFETCHITI)
 	);
 
 	kvm_cpu_cap_mask(CPUID_D_1_EAX,
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index 43eff7207e017..203fdad07bae6 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -40,6 +40,7 @@ enum kvm_only_cpuid_leafs {
 /* Intel-defined sub-features, CPUID level 0x00000007:1 (EDX) */
 #define X86_FEATURE_AVX_VNNI_INT8       KVM_X86_FEATURE(CPUID_7_1_EDX, 4)
 #define X86_FEATURE_AVX_NE_CONVERT      KVM_X86_FEATURE(CPUID_7_1_EDX, 5)
+#define X86_FEATURE_PREFETCHITI         KVM_X86_FEATURE(CPUID_7_1_EDX, 14)
 
 struct cpuid_reg {
 	u32 function;
-- 
GitLab


From 41e8f85a75fc60e1543e4903428a1b481b672a17 Mon Sep 17 00:00:00 2001
From: Daeho Jeong <daehojeong@google.com>
Date: Fri, 11 Nov 2022 09:04:06 -0800
Subject: [PATCH 2836/4122] f2fs: introduce F2FS_IOC_START_ATOMIC_REPLACE

introduce a new ioctl to replace the whole content of a file atomically,
which means it induces truncate and content update at the same time.
We can start it with F2FS_IOC_START_ATOMIC_REPLACE and complete it with
F2FS_IOC_COMMIT_ATOMIC_WRITE. Or abort it with
F2FS_IOC_ABORT_ATOMIC_WRITE.

Signed-off-by: Daeho Jeong <daehojeong@google.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c            |  3 +++
 fs/f2fs/f2fs.h            |  1 +
 fs/f2fs/file.c            | 21 +++++++++++++++------
 fs/f2fs/segment.c         | 13 ++++++++++++-
 include/uapi/linux/f2fs.h |  1 +
 5 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9b47ded653d1c..560fa80590e99 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3466,6 +3466,9 @@ static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
 	else if (*blk_addr != NULL_ADDR)
 		return 0;
 
+	if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
+		goto reserve_block;
+
 	/* Look for the block in the original inode */
 	err = __find_data_block(inode, index, &ori_blk_addr);
 	if (err)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 6a8cbf5bb1871..b89b5d755ce05 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -769,6 +769,7 @@ enum {
 	FI_ALIGNED_WRITE,	/* enable aligned write */
 	FI_COW_FILE,		/* indicate COW file */
 	FI_ATOMIC_COMMITTED,	/* indicate atomic commit completed except disk sync */
+	FI_ATOMIC_REPLACE,	/* indicate atomic replace */
 	FI_MAX,			/* max flag, never be used */
 };
 
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 28f586e779992..ab0a0d3730f6e 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -2034,7 +2034,7 @@ static int f2fs_ioc_getversion(struct file *filp, unsigned long arg)
 	return put_user(inode->i_generation, (int __user *)arg);
 }
 
-static int f2fs_ioc_start_atomic_write(struct file *filp)
+static int f2fs_ioc_start_atomic_write(struct file *filp, bool truncate)
 {
 	struct inode *inode = file_inode(filp);
 	struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
@@ -2103,15 +2103,22 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
 	f2fs_write_inode(inode, NULL);
 
-	isize = i_size_read(inode);
-	fi->original_i_size = isize;
-	f2fs_i_size_write(fi->cow_inode, isize);
-
 	stat_inc_atomic_inode(inode);
 
 	set_inode_flag(inode, FI_ATOMIC_FILE);
 	set_inode_flag(fi->cow_inode, FI_COW_FILE);
 	clear_inode_flag(fi->cow_inode, FI_INLINE_DATA);
+
+	isize = i_size_read(inode);
+	fi->original_i_size = isize;
+	if (truncate) {
+		set_inode_flag(inode, FI_ATOMIC_REPLACE);
+		truncate_inode_pages_final(inode->i_mapping);
+		f2fs_i_size_write(inode, 0);
+		isize = 0;
+	}
+	f2fs_i_size_write(fi->cow_inode, isize);
+
 	f2fs_up_write(&fi->i_gc_rwsem[WRITE]);
 
 	f2fs_update_time(sbi, REQ_TIME);
@@ -4139,7 +4146,9 @@ static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 	case FS_IOC_GETVERSION:
 		return f2fs_ioc_getversion(filp, arg);
 	case F2FS_IOC_START_ATOMIC_WRITE:
-		return f2fs_ioc_start_atomic_write(filp);
+		return f2fs_ioc_start_atomic_write(filp, false);
+	case F2FS_IOC_START_ATOMIC_REPLACE:
+		return f2fs_ioc_start_atomic_write(filp, true);
 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
 		return f2fs_ioc_commit_atomic_write(filp);
 	case F2FS_IOC_ABORT_ATOMIC_WRITE:
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8aa81238c770f..5ac026a572287 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -197,6 +197,7 @@ void f2fs_abort_atomic_write(struct inode *inode, bool clean)
 	fi->cow_inode = NULL;
 	release_atomic_write_cnt(inode);
 	clear_inode_flag(inode, FI_ATOMIC_COMMITTED);
+	clear_inode_flag(inode, FI_ATOMIC_REPLACE);
 	clear_inode_flag(inode, FI_ATOMIC_FILE);
 	stat_dec_atomic_inode(inode);
 
@@ -261,14 +262,24 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head,
 					bool revoke)
 {
 	struct revoke_entry *cur, *tmp;
+	pgoff_t start_index = 0;
+	bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
 
 	list_for_each_entry_safe(cur, tmp, head, list) {
-		if (revoke)
+		if (revoke) {
 			__replace_atomic_write_block(inode, cur->index,
 						cur->old_addr, NULL, true);
+		} else if (truncate) {
+			f2fs_truncate_hole(inode, start_index, cur->index);
+			start_index = cur->index + 1;
+		}
+
 		list_del(&cur->list);
 		kmem_cache_free(revoke_entry_slab, cur);
 	}
+
+	if (!revoke && truncate)
+		f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
 }
 
 static int __f2fs_commit_atomic_write(struct inode *inode)
diff --git a/include/uapi/linux/f2fs.h b/include/uapi/linux/f2fs.h
index 3121d127d5aae..955d440be1046 100644
--- a/include/uapi/linux/f2fs.h
+++ b/include/uapi/linux/f2fs.h
@@ -42,6 +42,7 @@
 						struct f2fs_comp_option)
 #define F2FS_IOC_DECOMPRESS_FILE	_IO(F2FS_IOCTL_MAGIC, 23)
 #define F2FS_IOC_COMPRESS_FILE		_IO(F2FS_IOCTL_MAGIC, 24)
+#define F2FS_IOC_START_ATOMIC_REPLACE	_IO(F2FS_IOCTL_MAGIC, 25)
 
 /*
  * should be same as XFS_IOC_GOINGDOWN.
-- 
GitLab


From d3b7b4afd6b2c344eabf9cc26b8bfa903c164c7c Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Tue, 15 Nov 2022 00:08:47 +0800
Subject: [PATCH 2837/4122] f2fs: fix to do sanity check on i_extra_isize in
 is_alive()

syzbot found a f2fs bug:

BUG: KASAN: slab-out-of-bounds in data_blkaddr fs/f2fs/f2fs.h:2891 [inline]
BUG: KASAN: slab-out-of-bounds in is_alive fs/f2fs/gc.c:1117 [inline]
BUG: KASAN: slab-out-of-bounds in gc_data_segment fs/f2fs/gc.c:1520 [inline]
BUG: KASAN: slab-out-of-bounds in do_garbage_collect+0x386a/0x3df0 fs/f2fs/gc.c:1734
Read of size 4 at addr ffff888076557568 by task kworker/u4:3/52

CPU: 1 PID: 52 Comm: kworker/u4:3 Not tainted 6.1.0-rc4-syzkaller-00362-gfef7fd48922d #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
Workqueue: writeback wb_workfn (flush-7:0)
Call Trace:
<TASK>
__dump_stack lib/dump_stack.c:88 [inline]
dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
print_address_description mm/kasan/report.c:284 [inline]
print_report+0x15e/0x45d mm/kasan/report.c:395
kasan_report+0xbb/0x1f0 mm/kasan/report.c:495
data_blkaddr fs/f2fs/f2fs.h:2891 [inline]
is_alive fs/f2fs/gc.c:1117 [inline]
gc_data_segment fs/f2fs/gc.c:1520 [inline]
do_garbage_collect+0x386a/0x3df0 fs/f2fs/gc.c:1734
f2fs_gc+0x88c/0x20a0 fs/f2fs/gc.c:1831
f2fs_balance_fs+0x544/0x6b0 fs/f2fs/segment.c:410
f2fs_write_inode+0x57e/0xe20 fs/f2fs/inode.c:753
write_inode fs/fs-writeback.c:1440 [inline]
__writeback_single_inode+0xcfc/0x1440 fs/fs-writeback.c:1652
writeback_sb_inodes+0x54d/0xf90 fs/fs-writeback.c:1870
wb_writeback+0x2c5/0xd70 fs/fs-writeback.c:2044
wb_do_writeback fs/fs-writeback.c:2187 [inline]
wb_workfn+0x2dc/0x12f0 fs/fs-writeback.c:2227
process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289
worker_thread+0x665/0x1080 kernel/workqueue.c:2436
kthread+0x2e4/0x3a0 kernel/kthread.c:376
ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:306

The root cause is that we forgot to do sanity check on .i_extra_isize
in below path, result in accessing invalid address later, fix it.
- gc_data_segment
 - is_alive
  - data_blkaddr
   - offset_in_addr

Reported-by: syzbot+f8f3dfa4abc489e768a1@syzkaller.appspotmail.com
Link: https://lore.kernel.org/linux-f2fs-devel/0000000000003cb3c405ed5c17f9@google.com/T/#u
Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f1a46519a5fe3..0f967b1e98f23 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1077,7 +1077,7 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 {
 	struct page *node_page;
 	nid_t nid;
-	unsigned int ofs_in_node, max_addrs;
+	unsigned int ofs_in_node, max_addrs, base;
 	block_t source_blkaddr;
 
 	nid = le32_to_cpu(sum->nid);
@@ -1103,11 +1103,17 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 		return false;
 	}
 
-	max_addrs = IS_INODE(node_page) ? DEF_ADDRS_PER_INODE :
-						DEF_ADDRS_PER_BLOCK;
-	if (ofs_in_node >= max_addrs) {
-		f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%u, nid:%u, max:%u",
-			ofs_in_node, dni->ino, dni->nid, max_addrs);
+	if (IS_INODE(node_page)) {
+		base = offset_in_addr(F2FS_INODE(node_page));
+		max_addrs = DEF_ADDRS_PER_INODE;
+	} else {
+		base = 0;
+		max_addrs = DEF_ADDRS_PER_BLOCK;
+	}
+
+	if (base + ofs_in_node >= max_addrs) {
+		f2fs_err(sbi, "Inconsistent blkaddr offset: base:%u, ofs_in_node:%u, max:%u, ino:%u, nid:%u",
+			base, ofs_in_node, max_addrs, dni->ino, dni->nid);
 		f2fs_put_page(node_page, 1);
 		return false;
 	}
-- 
GitLab


From 5b7b74b71c7fefbaa3e0ccc120c3cbd50b3fad86 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Sat, 12 Nov 2022 00:13:49 +0800
Subject: [PATCH 2838/4122] f2fs: remove submit label in __submit_discard_cmd()

Complaint from Matthew Wilcox in another similar place:

	"submit?  You don't submit anything at the 'submit' label.
	it should be called 'skip' or something.  But I think this
	is just badly written and you don't need a goto at all."

Let's remove submit label for readability.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5ac026a572287..8b0b765505785 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1143,13 +1143,12 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
 		if (time_to_inject(sbi, FAULT_DISCARD)) {
 			f2fs_show_injection_info(sbi, FAULT_DISCARD);
 			err = -EIO;
-			goto submit;
-		}
-		err = __blkdev_issue_discard(bdev,
+		} else {
+			err = __blkdev_issue_discard(bdev,
 					SECTOR_FROM_BLOCK(start),
 					SECTOR_FROM_BLOCK(len),
 					GFP_NOFS, &bio);
-submit:
+		}
 		if (err) {
 			spin_lock_irqsave(&dc->lock, flags);
 			if (dc->state == D_PARTIAL)
-- 
GitLab


From b7ad23cec26a91a4f7c45ff7ff8e915f21ac5127 Mon Sep 17 00:00:00 2001
From: Yuwei Guan <ssawgyw@gmail.com>
Date: Tue, 15 Nov 2022 14:35:35 +0800
Subject: [PATCH 2839/4122] f2fs: fix to alloc_mode changed after remount on a
 small volume device

The commit 84b89e5d943d8 ("f2fs: add auto tuning for small devices") add
tuning for small volume device, now support to tune alloce_mode to 'reuse'
if it's small size. But the alloc_mode will change to 'default' when do
remount on this small size dievce. This patch fo fix alloc_mode changed
when do remount for a small volume device.

Signed-off-by: Yuwei Guan <Yuwei.Guan@zeekrlife.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 75027ff85cd93..96cfe626a670a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2059,7 +2059,11 @@ static void default_options(struct f2fs_sb_info *sbi)
 		F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
 
 	F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
-	F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
+	if (le32_to_cpu(F2FS_RAW_SUPER(sbi)->segment_count_main) <=
+							SMALL_VOLUME_SEGMENTS)
+		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
+	else
+		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
 	F2FS_OPTION(sbi).fsync_mode = FSYNC_MODE_POSIX;
 	F2FS_OPTION(sbi).s_resuid = make_kuid(&init_user_ns, F2FS_DEF_RESUID);
 	F2FS_OPTION(sbi).s_resgid = make_kgid(&init_user_ns, F2FS_DEF_RESGID);
@@ -4077,7 +4081,6 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 
 	/* adjust parameters according to the volume size */
 	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
-		F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_REUSE;
 		if (f2fs_block_unit_discard(sbi))
 			sm_i->dcc_info->discard_granularity = 1;
 		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE |
-- 
GitLab


From 777cd95b8066ced4e9a2534941b81f8ad98e74fb Mon Sep 17 00:00:00 2001
From: Yuwei Guan <ssawgyw@gmail.com>
Date: Tue, 15 Nov 2022 14:35:36 +0800
Subject: [PATCH 2840/4122] f2fs: cleanup for 'f2fs_tuning_parameters' function

A cleanup patch for 'f2fs_tuning_parameters' function.

Signed-off-by: Yuwei Guan <Yuwei.Guan@zeekrlife.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 96cfe626a670a..05101cd4140b4 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4077,13 +4077,11 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
 
 static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 {
-	struct f2fs_sm_info *sm_i = SM_I(sbi);
-
 	/* adjust parameters according to the volume size */
-	if (sm_i->main_segments <= SMALL_VOLUME_SEGMENTS) {
+	if (MAIN_SEGS(sbi) <= SMALL_VOLUME_SEGMENTS) {
 		if (f2fs_block_unit_discard(sbi))
-			sm_i->dcc_info->discard_granularity = 1;
-		sm_i->ipu_policy = 1 << F2FS_IPU_FORCE |
+			SM_I(sbi)->dcc_info->discard_granularity = 1;
+		SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE |
 					1 << F2FS_IPU_HONOR_OPU_WRITE;
 	}
 
-- 
GitLab


From 66aee5aaa237e5a3475581b462b8b22e0944d264 Mon Sep 17 00:00:00 2001
From: Yuwei Guan <ssawgyw@gmail.com>
Date: Tue, 15 Nov 2022 14:35:37 +0800
Subject: [PATCH 2841/4122] f2fs: change type for 'sbi->readdir_ra'

Before this patch, the varibale 'readdir_ra' takes effect if it's equal
to '1' or not, so we can change type for it from 'int' to 'bool'.

Signed-off-by: Yuwei Guan <Yuwei.Guan@zeekrlife.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/dir.c   | 2 +-
 fs/f2fs/f2fs.h  | 2 +-
 fs/f2fs/super.c | 2 +-
 fs/f2fs/sysfs.c | 5 +++++
 4 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 030b7fd4142ff..8e025157f35c9 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -1010,7 +1010,7 @@ int f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
 	struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(d->inode);
 	struct blk_plug plug;
-	bool readdir_ra = sbi->readdir_ra == 1;
+	bool readdir_ra = sbi->readdir_ra;
 	bool found_valid_dirent = false;
 	int err = 0;
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index b89b5d755ce05..96bd3461c0bbb 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1698,7 +1698,7 @@ struct f2fs_sb_info {
 	unsigned int total_node_count;		/* total node block count */
 	unsigned int total_valid_node_count;	/* valid node block count */
 	int dir_level;				/* directory level */
-	int readdir_ra;				/* readahead inode in readdir */
+	bool readdir_ra;			/* readahead inode in readdir */
 	u64 max_io_bytes;			/* max io bytes to merge IOs */
 
 	block_t user_block_count;		/* # of user blocks */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 05101cd4140b4..31435c8645c83 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4085,7 +4085,7 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 					1 << F2FS_IPU_HONOR_OPU_WRITE;
 	}
 
-	sbi->readdir_ra = 1;
+	sbi->readdir_ra = true;
 }
 
 static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 97bf0dbb0974b..33ec467b37725 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -656,6 +656,11 @@ out:
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "readdir_ra")) {
+		sbi->readdir_ra = !!t;
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
-- 
GitLab


From 4ff23a6547b81ca22adb852dfe93ee5fc45328ac Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong@oppo.com>
Date: Thu, 17 Nov 2022 23:10:54 +0800
Subject: [PATCH 2842/4122] f2fs: set zstd compress level correctly

Fixes: cf30f6a5f0c6 ("lib: zstd: Add kernel-specific API")
Signed-off-by: Sheng Yong <shengyong@oppo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Reviewed-by: Nick Terrell <terrelln@fb.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index d315c2de136f2..74d3f2d2271f3 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -346,7 +346,7 @@ static int zstd_init_compress_ctx(struct compress_ctx *cc)
 	if (!level)
 		level = F2FS_ZSTD_DEFAULT_CLEVEL;
 
-	params = zstd_get_params(F2FS_ZSTD_DEFAULT_CLEVEL, cc->rlen);
+	params = zstd_get_params(level, cc->rlen);
 	workspace_size = zstd_cstream_workspace_bound(&params.cParams);
 
 	workspace = f2fs_kvmalloc(F2FS_I_SB(cc->inode),
-- 
GitLab


From 787caf1bdcd9f04058e4e8d8ed56db1dbafea0b7 Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong@oppo.com>
Date: Fri, 11 Nov 2022 18:08:29 +0800
Subject: [PATCH 2843/4122] f2fs: fix to enable compress for newly created file
 if extension matches

If compress_extension is set, and a newly created file matches the
extension, the file could be marked as compression file. However,
if inline_data is also enabled, there is no chance to check its
extension since f2fs_should_compress() always returns false.

This patch moves set_compress_inode(), which do extension check, in
f2fs_should_compress() to check extensions before setting inline
data flag.

Fixes: 7165841d578e ("f2fs: fix to check inline_data during compressed inode conversion")
Signed-off-by: Sheng Yong <shengyong@oppo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h  |   2 +-
 fs/f2fs/namei.c | 329 ++++++++++++++++++++++++------------------------
 2 files changed, 164 insertions(+), 167 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 96bd3461c0bbb..f0833638f59ea 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -2980,7 +2980,7 @@ static inline void f2fs_change_bit(unsigned int nr, char *addr)
 /* Flags that should be inherited by new inodes from their parent. */
 #define F2FS_FL_INHERITED (F2FS_SYNC_FL | F2FS_NODUMP_FL | F2FS_NOATIME_FL | \
 			   F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
-			   F2FS_CASEFOLD_FL | F2FS_COMPR_FL | F2FS_NOCOMP_FL)
+			   F2FS_CASEFOLD_FL)
 
 /* Flags that are appropriate for regular files (all but dir-specific ones). */
 #define F2FS_REG_FLMASK		(~(F2FS_DIRSYNC_FL | F2FS_PROJINHERIT_FL | \
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index e104409c3a0e5..54448dccbb6ad 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -22,8 +22,163 @@
 #include "acl.h"
 #include <trace/events/f2fs.h>
 
+static inline int is_extension_exist(const unsigned char *s, const char *sub,
+						bool tmp_ext)
+{
+	size_t slen = strlen(s);
+	size_t sublen = strlen(sub);
+	int i;
+
+	if (sublen == 1 && *sub == '*')
+		return 1;
+
+	/*
+	 * filename format of multimedia file should be defined as:
+	 * "filename + '.' + extension + (optional: '.' + temp extension)".
+	 */
+	if (slen < sublen + 2)
+		return 0;
+
+	if (!tmp_ext) {
+		/* file has no temp extension */
+		if (s[slen - sublen - 1] != '.')
+			return 0;
+		return !strncasecmp(s + slen - sublen, sub, sublen);
+	}
+
+	for (i = 1; i < slen - sublen; i++) {
+		if (s[i] != '.')
+			continue;
+		if (!strncasecmp(s + i + 1, sub, sublen))
+			return 1;
+	}
+
+	return 0;
+}
+
+int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
+							bool hot, bool set)
+{
+	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+	int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+	int hot_count = sbi->raw_super->hot_ext_count;
+	int total_count = cold_count + hot_count;
+	int start, count;
+	int i;
+
+	if (set) {
+		if (total_count == F2FS_MAX_EXTENSION)
+			return -EINVAL;
+	} else {
+		if (!hot && !cold_count)
+			return -EINVAL;
+		if (hot && !hot_count)
+			return -EINVAL;
+	}
+
+	if (hot) {
+		start = cold_count;
+		count = total_count;
+	} else {
+		start = 0;
+		count = cold_count;
+	}
+
+	for (i = start; i < count; i++) {
+		if (strcmp(name, extlist[i]))
+			continue;
+
+		if (set)
+			return -EINVAL;
+
+		memcpy(extlist[i], extlist[i + 1],
+				F2FS_EXTENSION_LEN * (total_count - i - 1));
+		memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN);
+		if (hot)
+			sbi->raw_super->hot_ext_count = hot_count - 1;
+		else
+			sbi->raw_super->extension_count =
+						cpu_to_le32(cold_count - 1);
+		return 0;
+	}
+
+	if (!set)
+		return -EINVAL;
+
+	if (hot) {
+		memcpy(extlist[count], name, strlen(name));
+		sbi->raw_super->hot_ext_count = hot_count + 1;
+	} else {
+		char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
+
+		memcpy(buf, &extlist[cold_count],
+				F2FS_EXTENSION_LEN * hot_count);
+		memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
+		memcpy(extlist[cold_count], name, strlen(name));
+		memcpy(&extlist[cold_count + 1], buf,
+				F2FS_EXTENSION_LEN * hot_count);
+		sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
+	}
+	return 0;
+}
+
+static void set_compress_new_inode(struct f2fs_sb_info *sbi, struct inode *dir,
+				struct inode *inode, const unsigned char *name)
+{
+	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+	unsigned char (*noext)[F2FS_EXTENSION_LEN] =
+						F2FS_OPTION(sbi).noextensions;
+	unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions;
+	unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
+	unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
+	int i, cold_count, hot_count;
+
+	if (!f2fs_sb_has_compression(sbi))
+		return;
+
+	if (S_ISDIR(inode->i_mode))
+		goto inherit_comp;
+
+	/* This name comes only from normal files. */
+	if (!name)
+		return;
+
+	/* Don't compress hot files. */
+	f2fs_down_read(&sbi->sb_lock);
+	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+	hot_count = sbi->raw_super->hot_ext_count;
+	for (i = cold_count; i < cold_count + hot_count; i++)
+		if (is_extension_exist(name, extlist[i], false))
+			break;
+	f2fs_up_read(&sbi->sb_lock);
+	if (i < (cold_count + hot_count))
+		return;
+
+	/* Don't compress unallowed extension. */
+	for (i = 0; i < noext_cnt; i++)
+		if (is_extension_exist(name, noext[i], false))
+			return;
+
+	/* Compress wanting extension. */
+	for (i = 0; i < ext_cnt; i++) {
+		if (is_extension_exist(name, ext[i], false)) {
+			set_compress_context(inode);
+			return;
+		}
+	}
+inherit_comp:
+	/* Inherit the {no-}compression flag in directory */
+	if (F2FS_I(dir)->i_flags & F2FS_NOCOMP_FL) {
+		F2FS_I(inode)->i_flags |= F2FS_NOCOMP_FL;
+		f2fs_mark_inode_dirty_sync(inode, true);
+	} else if (F2FS_I(dir)->i_flags & F2FS_COMPR_FL) {
+		set_compress_context(inode);
+	}
+}
+
 static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
-						struct inode *dir, umode_t mode)
+						struct inode *dir, umode_t mode,
+						const char *name)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 	nid_t ino;
@@ -114,12 +269,8 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
 	if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL)
 		set_inode_flag(inode, FI_PROJ_INHERIT);
 
-	if (f2fs_sb_has_compression(sbi)) {
-		/* Inherit the compression flag in directory */
-		if ((F2FS_I(dir)->i_flags & F2FS_COMPR_FL) &&
-					f2fs_may_compress(inode))
-			set_compress_context(inode);
-	}
+	/* Check compression first. */
+	set_compress_new_inode(sbi, dir, inode, name);
 
 	/* Should enable inline_data after compression set */
 	if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
@@ -153,40 +304,6 @@ fail_drop:
 	return ERR_PTR(err);
 }
 
-static inline int is_extension_exist(const unsigned char *s, const char *sub,
-						bool tmp_ext)
-{
-	size_t slen = strlen(s);
-	size_t sublen = strlen(sub);
-	int i;
-
-	if (sublen == 1 && *sub == '*')
-		return 1;
-
-	/*
-	 * filename format of multimedia file should be defined as:
-	 * "filename + '.' + extension + (optional: '.' + temp extension)".
-	 */
-	if (slen < sublen + 2)
-		return 0;
-
-	if (!tmp_ext) {
-		/* file has no temp extension */
-		if (s[slen - sublen - 1] != '.')
-			return 0;
-		return !strncasecmp(s + slen - sublen, sub, sublen);
-	}
-
-	for (i = 1; i < slen - sublen; i++) {
-		if (s[i] != '.')
-			continue;
-		if (!strncasecmp(s + i + 1, sub, sublen))
-			return 1;
-	}
-
-	return 0;
-}
-
 /*
  * Set file's temperature for hot/cold data separation
  */
@@ -217,124 +334,6 @@ static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *
 		file_set_hot(inode);
 }
 
-int f2fs_update_extension_list(struct f2fs_sb_info *sbi, const char *name,
-							bool hot, bool set)
-{
-	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
-	int cold_count = le32_to_cpu(sbi->raw_super->extension_count);
-	int hot_count = sbi->raw_super->hot_ext_count;
-	int total_count = cold_count + hot_count;
-	int start, count;
-	int i;
-
-	if (set) {
-		if (total_count == F2FS_MAX_EXTENSION)
-			return -EINVAL;
-	} else {
-		if (!hot && !cold_count)
-			return -EINVAL;
-		if (hot && !hot_count)
-			return -EINVAL;
-	}
-
-	if (hot) {
-		start = cold_count;
-		count = total_count;
-	} else {
-		start = 0;
-		count = cold_count;
-	}
-
-	for (i = start; i < count; i++) {
-		if (strcmp(name, extlist[i]))
-			continue;
-
-		if (set)
-			return -EINVAL;
-
-		memcpy(extlist[i], extlist[i + 1],
-				F2FS_EXTENSION_LEN * (total_count - i - 1));
-		memset(extlist[total_count - 1], 0, F2FS_EXTENSION_LEN);
-		if (hot)
-			sbi->raw_super->hot_ext_count = hot_count - 1;
-		else
-			sbi->raw_super->extension_count =
-						cpu_to_le32(cold_count - 1);
-		return 0;
-	}
-
-	if (!set)
-		return -EINVAL;
-
-	if (hot) {
-		memcpy(extlist[count], name, strlen(name));
-		sbi->raw_super->hot_ext_count = hot_count + 1;
-	} else {
-		char buf[F2FS_MAX_EXTENSION][F2FS_EXTENSION_LEN];
-
-		memcpy(buf, &extlist[cold_count],
-				F2FS_EXTENSION_LEN * hot_count);
-		memset(extlist[cold_count], 0, F2FS_EXTENSION_LEN);
-		memcpy(extlist[cold_count], name, strlen(name));
-		memcpy(&extlist[cold_count + 1], buf,
-				F2FS_EXTENSION_LEN * hot_count);
-		sbi->raw_super->extension_count = cpu_to_le32(cold_count + 1);
-	}
-	return 0;
-}
-
-static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
-						const unsigned char *name)
-{
-	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
-	unsigned char (*noext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).noextensions;
-	unsigned char (*ext)[F2FS_EXTENSION_LEN] = F2FS_OPTION(sbi).extensions;
-	unsigned char ext_cnt = F2FS_OPTION(sbi).compress_ext_cnt;
-	unsigned char noext_cnt = F2FS_OPTION(sbi).nocompress_ext_cnt;
-	int i, cold_count, hot_count;
-
-	if (!f2fs_sb_has_compression(sbi) ||
-			F2FS_I(inode)->i_flags & F2FS_NOCOMP_FL ||
-			!f2fs_may_compress(inode) ||
-			(!ext_cnt && !noext_cnt))
-		return;
-
-	f2fs_down_read(&sbi->sb_lock);
-
-	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
-	hot_count = sbi->raw_super->hot_ext_count;
-
-	for (i = cold_count; i < cold_count + hot_count; i++) {
-		if (is_extension_exist(name, extlist[i], false)) {
-			f2fs_up_read(&sbi->sb_lock);
-			return;
-		}
-	}
-
-	f2fs_up_read(&sbi->sb_lock);
-
-	for (i = 0; i < noext_cnt; i++) {
-		if (is_extension_exist(name, noext[i], false)) {
-			f2fs_disable_compressed_file(inode);
-			return;
-		}
-	}
-
-	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
-		return;
-
-	for (i = 0; i < ext_cnt; i++) {
-		if (!is_extension_exist(name, ext[i], false))
-			continue;
-
-		/* Do not use inline_data with compression */
-		stat_dec_inline_inode(inode);
-		clear_inode_flag(inode, FI_INLINE_DATA);
-		set_compress_context(inode);
-		return;
-	}
-}
-
 static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
 		       struct dentry *dentry, umode_t mode, bool excl)
 {
@@ -352,15 +351,13 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(mnt_userns, dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode, dentry->d_name.name);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
 	if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
 		set_file_temperature(sbi, inode, dentry->d_name.name);
 
-	set_compress_inode(sbi, inode, dentry->d_name.name);
-
 	inode->i_op = &f2fs_file_inode_operations;
 	inode->i_fop = &f2fs_file_operations;
 	inode->i_mapping->a_ops = &f2fs_dblock_aops;
@@ -689,7 +686,7 @@ static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO);
+	inode = f2fs_new_inode(mnt_userns, dir, S_IFLNK | S_IRWXUGO, NULL);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -760,7 +757,7 @@ static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode);
+	inode = f2fs_new_inode(mnt_userns, dir, S_IFDIR | mode, NULL);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -817,7 +814,7 @@ static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(mnt_userns, dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode, NULL);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
@@ -856,7 +853,7 @@ static int __f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
 	if (err)
 		return err;
 
-	inode = f2fs_new_inode(mnt_userns, dir, mode);
+	inode = f2fs_new_inode(mnt_userns, dir, mode, NULL);
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-- 
GitLab


From b16bcaaf7a325f90967259a0b7cfcce4ff8c56ba Mon Sep 17 00:00:00 2001
From: Sheng Yong <shengyong@oppo.com>
Date: Fri, 11 Nov 2022 18:08:30 +0800
Subject: [PATCH 2844/4122] f2fs: move set_file_temperature into f2fs_new_inode

Since the file name has already passed to f2fs_new_inode(), let's
move set_file_temperature() into f2fs_new_inode().

Signed-off-by: Sheng Yong <shengyong@oppo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/namei.c | 62 +++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 33 deletions(-)

diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 54448dccbb6ad..58a91ce8fe083 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -176,6 +176,32 @@ inherit_comp:
 	}
 }
 
+/*
+ * Set file's temperature for hot/cold data separation
+ */
+static void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
+		const unsigned char *name)
+{
+	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
+	int i, cold_count, hot_count;
+
+	f2fs_down_read(&sbi->sb_lock);
+	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
+	hot_count = sbi->raw_super->hot_ext_count;
+	for (i = 0; i < cold_count + hot_count; i++)
+		if (is_extension_exist(name, extlist[i], true))
+			break;
+	f2fs_up_read(&sbi->sb_lock);
+
+	if (i == cold_count + hot_count)
+		return;
+
+	if (i < cold_count)
+		file_set_cold(inode);
+	else
+		file_set_hot(inode);
+}
+
 static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
 						struct inode *dir, umode_t mode,
 						const char *name)
@@ -276,6 +302,9 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
 	if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
 		set_inode_flag(inode, FI_INLINE_DATA);
 
+	if (name && !test_opt(sbi, DISABLE_EXT_IDENTIFY))
+		set_file_temperature(sbi, inode, name);
+
 	stat_inc_inline_xattr(inode);
 	stat_inc_inline_inode(inode);
 	stat_inc_inline_dir(inode);
@@ -304,36 +333,6 @@ fail_drop:
 	return ERR_PTR(err);
 }
 
-/*
- * Set file's temperature for hot/cold data separation
- */
-static inline void set_file_temperature(struct f2fs_sb_info *sbi, struct inode *inode,
-		const unsigned char *name)
-{
-	__u8 (*extlist)[F2FS_EXTENSION_LEN] = sbi->raw_super->extension_list;
-	int i, cold_count, hot_count;
-
-	f2fs_down_read(&sbi->sb_lock);
-
-	cold_count = le32_to_cpu(sbi->raw_super->extension_count);
-	hot_count = sbi->raw_super->hot_ext_count;
-
-	for (i = 0; i < cold_count + hot_count; i++) {
-		if (is_extension_exist(name, extlist[i], true))
-			break;
-	}
-
-	f2fs_up_read(&sbi->sb_lock);
-
-	if (i == cold_count + hot_count)
-		return;
-
-	if (i < cold_count)
-		file_set_cold(inode);
-	else
-		file_set_hot(inode);
-}
-
 static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
 		       struct dentry *dentry, umode_t mode, bool excl)
 {
@@ -355,9 +354,6 @@ static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	if (!test_opt(sbi, DISABLE_EXT_IDENTIFY))
-		set_file_temperature(sbi, inode, dentry->d_name.name);
-
 	inode->i_op = &f2fs_file_inode_operations;
 	inode->i_fop = &f2fs_file_operations;
 	inode->i_mapping->a_ops = &f2fs_dblock_aops;
-- 
GitLab


From fc031877b8229ee2f56a2cc3868ca7f282b1231d Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Sat, 19 Nov 2022 01:40:28 +0800
Subject: [PATCH 2845/4122] f2fs: fix description about discard_granularity
 node

Let's fix the inconsistency in the text description.
Default discard granularity is 16. For small devices,
default value is 1.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 24e7cb77f265b..32404781e76fa 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -138,7 +138,8 @@ Contact:	"Chao Yu" <yuchao0@huawei.com>
 Description:	Controls discard granularity of inner discard thread. Inner thread
 		will not issue discards with size that is smaller than granularity.
 		The unit size is one block(4KB), now only support configuring
-		in range of [1, 512]. Default value is 4(=16KB).
+		in range of [1, 512]. Default value is 16.
+		For small devices, default value is 1.
 
 What:		/sys/fs/f2fs/<disk>/umount_discard_timeout
 Date:		January 2019
-- 
GitLab


From 620816393239890feff8608251e2746b1cc2cfa0 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 17 Nov 2022 01:10:45 +0800
Subject: [PATCH 2846/4122] f2fs: make __queue_discard_cmd() return void

Since __queue_discard_cmd() never returns an error,
let's make it return void.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8b0b765505785..14ece4bf7c7e2 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1358,13 +1358,13 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
 	}
 }
 
-static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
+static void __queue_discard_cmd(struct f2fs_sb_info *sbi,
 		struct block_device *bdev, block_t blkstart, block_t blklen)
 {
 	block_t lblkstart = blkstart;
 
 	if (!f2fs_bdev_support_discard(bdev))
-		return 0;
+		return;
 
 	trace_f2fs_queue_discard(bdev, blkstart, blklen);
 
@@ -1376,7 +1376,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
 	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
 	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
 	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
-	return 0;
 }
 
 static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
@@ -1776,7 +1775,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
 	}
 
 	/* For conventional zones, use regular discard if supported */
-	return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
+	__queue_discard_cmd(sbi, bdev, lblkstart, blklen);
+	return 0;
 }
 #endif
 
@@ -1787,7 +1787,8 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi,
 	if (f2fs_sb_has_blkzoned(sbi) && bdev_is_zoned(bdev))
 		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
 #endif
-	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
+	__queue_discard_cmd(sbi, bdev, blkstart, blklen);
+	return 0;
 }
 
 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
-- 
GitLab


From 78a99fe6254cad4be310cd84af39f6c46b668c72 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Wed, 23 Nov 2022 06:42:52 +0800
Subject: [PATCH 2847/4122] f2fs: truncate blocks in batch in
 __complete_revoke_list()

Use f2fs_do_truncate_blocks() to truncate all blocks in-batch in
__complete_revoke_list().

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 14ece4bf7c7e2..37c721e1eb034 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -262,24 +262,19 @@ static void __complete_revoke_list(struct inode *inode, struct list_head *head,
 					bool revoke)
 {
 	struct revoke_entry *cur, *tmp;
-	pgoff_t start_index = 0;
 	bool truncate = is_inode_flag_set(inode, FI_ATOMIC_REPLACE);
 
 	list_for_each_entry_safe(cur, tmp, head, list) {
-		if (revoke) {
+		if (revoke)
 			__replace_atomic_write_block(inode, cur->index,
 						cur->old_addr, NULL, true);
-		} else if (truncate) {
-			f2fs_truncate_hole(inode, start_index, cur->index);
-			start_index = cur->index + 1;
-		}
 
 		list_del(&cur->list);
 		kmem_cache_free(revoke_entry_slab, cur);
 	}
 
 	if (!revoke && truncate)
-		f2fs_do_truncate_blocks(inode, start_index * PAGE_SIZE, false);
+		f2fs_do_truncate_blocks(inode, 0, false);
 }
 
 static int __f2fs_commit_atomic_write(struct inode *inode)
-- 
GitLab


From e219aecfd4b766c4e878a3769057e9809f7fcadc Mon Sep 17 00:00:00 2001
From: Yonggil Song <yonggil.song@samsung.com>
Date: Tue, 22 Nov 2022 18:03:20 +0900
Subject: [PATCH 2848/4122] f2fs: avoid victim selection from previous victim
 section

When f2fs chooses GC victim in large section & LFS mode,
next_victim_seg[gc_type] is referenced first. After segment is freed,
next_victim_seg[gc_type] has the next segment number.
However, next_victim_seg[gc_type] still has the last segment number
even after the last segment of section is freed. In this case, when f2fs
chooses a victim for the next GC round, the last segment of previous victim
section is chosen as a victim.

Initialize next_victim_seg[gc_type] to NULL_SEGNO for the last segment in
large section.

Fixes: e3080b0120a1 ("f2fs: support subsectional garbage collection")
Signed-off-by: Yonggil Song <yonggil.song@samsung.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 0f967b1e98f23..f1b68eda2235d 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1749,8 +1749,9 @@ freed:
 				get_valid_blocks(sbi, segno, false) == 0)
 			seg_freed++;
 
-		if (__is_large_section(sbi) && segno + 1 < end_segno)
-			sbi->next_victim_seg[gc_type] = segno + 1;
+		if (__is_large_section(sbi))
+			sbi->next_victim_seg[gc_type] =
+				(segno + 1 < end_segno) ? segno + 1 : NULL_SEGNO;
 skip:
 		f2fs_put_page(sum_page, 0);
 	}
-- 
GitLab


From 48c08c51f938d955dd8f5b8972bc29faa4c9556f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Fri, 18 Nov 2022 11:46:00 +0800
Subject: [PATCH 2849/4122] f2fs: init discard policy after thread wakeup

Under the current logic, after the discard thread wakes up, it will not
run according to the expected policy, but will use the expected policy
before sleep. Move the strategy selection to after the thread wakes up,
so that the running state of the thread meets expectations.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 37c721e1eb034..73ad8dc9a4d3c 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1679,6 +1679,11 @@ static int issue_discard_thread(void *data)
 	set_freezable();
 
 	do {
+		wait_event_interruptible_timeout(*q,
+				kthread_should_stop() || freezing(current) ||
+				dcc->discard_wake,
+				msecs_to_jiffies(wait_ms));
+
 		if (sbi->gc_mode == GC_URGENT_HIGH ||
 			!f2fs_available_free_memory(sbi, DISCARD_CACHE))
 			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
@@ -1686,14 +1691,6 @@ static int issue_discard_thread(void *data)
 			__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
 						dcc->discard_granularity);
 
-		if (!atomic_read(&dcc->discard_cmd_cnt))
-		       wait_ms = dpolicy.max_interval;
-
-		wait_event_interruptible_timeout(*q,
-				kthread_should_stop() || freezing(current) ||
-				dcc->discard_wake,
-				msecs_to_jiffies(wait_ms));
-
 		if (dcc->discard_wake)
 			dcc->discard_wake = 0;
 
@@ -1707,12 +1704,11 @@ static int issue_discard_thread(void *data)
 			continue;
 		if (kthread_should_stop())
 			return 0;
-		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
+		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) ||
+			!atomic_read(&dcc->discard_cmd_cnt)) {
 			wait_ms = dpolicy.max_interval;
 			continue;
 		}
-		if (!atomic_read(&dcc->discard_cmd_cnt))
-			continue;
 
 		sb_start_intwrite(sbi->sb);
 
@@ -1727,6 +1723,8 @@ static int issue_discard_thread(void *data)
 		} else {
 			wait_ms = dpolicy.max_interval;
 		}
+		if (!atomic_read(&dcc->discard_cmd_cnt))
+			wait_ms = dpolicy.max_interval;
 
 		sb_end_intwrite(sbi->sb);
 
-- 
GitLab


From 1cd2e6d544359ae13e6fd9029b6018b957cf08c3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 24 Nov 2022 00:44:01 +0800
Subject: [PATCH 2850/4122] f2fs: define MIN_DISCARD_GRANULARITY macro

Do cleanup in f2fs_tuning_parameters() and __init_discard_policy(),
let's use macro instead of number.

Suggested-by: Chao Yu <chao@kernel.org>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h    | 2 ++
 fs/f2fs/segment.c | 4 ++--
 fs/f2fs/super.c   | 3 ++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index f0833638f59ea..4694b55b6df49 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -329,6 +329,8 @@ struct discard_entry {
 	unsigned char discard_map[SIT_VBLOCK_MAP_SIZE];	/* segment discard bitmap */
 };
 
+/* minimum discard granularity, unit: block count */
+#define MIN_DISCARD_GRANULARITY		1
 /* default discard granularity of inner discard thread, unit: block count */
 #define DEFAULT_DISCARD_GRANULARITY		16
 /* default maximum discard granularity of ordered discard, unit: block count */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 73ad8dc9a4d3c..c7afcc6cd75bf 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1065,7 +1065,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 		dpolicy->sync = false;
 		dpolicy->ordered = true;
 		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
-			dpolicy->granularity = 1;
+			dpolicy->granularity = MIN_DISCARD_GRANULARITY;
 			if (atomic_read(&dcc->discard_cmd_cnt))
 				dpolicy->max_interval =
 					dcc->min_discard_issue_time;
@@ -1080,7 +1080,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 	} else if (discard_type == DPOLICY_UMOUNT) {
 		dpolicy->io_aware = false;
 		/* we need to issue all to keep CP_TRIMMED_FLAG */
-		dpolicy->granularity = 1;
+		dpolicy->granularity = MIN_DISCARD_GRANULARITY;
 		dpolicy->timeout = true;
 	}
 }
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 31435c8645c83..daf14b55a9729 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4080,7 +4080,8 @@ static void f2fs_tuning_parameters(struct f2fs_sb_info *sbi)
 	/* adjust parameters according to the volume size */
 	if (MAIN_SEGS(sbi) <= SMALL_VOLUME_SEGMENTS) {
 		if (f2fs_block_unit_discard(sbi))
-			SM_I(sbi)->dcc_info->discard_granularity = 1;
+			SM_I(sbi)->dcc_info->discard_granularity =
+						MIN_DISCARD_GRANULARITY;
 		SM_I(sbi)->ipu_policy = 1 << F2FS_IPU_FORCE |
 					1 << F2FS_IPU_HONOR_OPU_WRITE;
 	}
-- 
GitLab


From 8a47d228de6a4fd4c751142fb27d56f385b3fe41 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 24 Nov 2022 00:44:02 +0800
Subject: [PATCH 2851/4122] f2fs: introduce discard_urgent_util sysfs node

Through this node, you can control the background discard
to run more aggressively or not aggressively when reach the
utilization rate of the space.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs | 8 ++++++++
 fs/f2fs/f2fs.h                          | 1 +
 fs/f2fs/segment.c                       | 3 ++-
 fs/f2fs/sysfs.c                         | 9 +++++++++
 4 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 32404781e76fa..84a009aab1a13 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -647,3 +647,11 @@ Date:		October 2022
 Contact:	"Yangtao Li" <frank.li@vivo.com>
 Description:	Show the current gc_mode as a string.
 		This is a read-only entry.
+
+What:		/sys/fs/f2fs/<disk>/discard_urgent_util
+Date:		November 2022
+Contact:	"Yangtao Li" <frank.li@vivo.com>
+Description:	When space utilization exceeds this, do background DISCARD aggressively.
+		Does DISCARD forcibly in a period of given min_discard_issue_time when the number
+		of discards is not 0 and set discard granularity to 1.
+		Default: 80
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 4694b55b6df49..296683648d4fa 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -413,6 +413,7 @@ struct discard_cmd_control {
 	unsigned int min_discard_issue_time;	/* min. interval between discard issue */
 	unsigned int mid_discard_issue_time;	/* mid. interval between discard issue */
 	unsigned int max_discard_issue_time;	/* max. interval between discard issue */
+	unsigned int discard_urgent_util;	/* utilization which issue discard proactively */
 	unsigned int discard_granularity;	/* discard granularity */
 	unsigned int max_ordered_discard;	/* maximum discard granularity issued by lba order */
 	unsigned int undiscard_blks;		/* # of undiscard blocks */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c7afcc6cd75bf..0ff451ea18f66 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1064,7 +1064,7 @@ static void __init_discard_policy(struct f2fs_sb_info *sbi,
 		dpolicy->io_aware = true;
 		dpolicy->sync = false;
 		dpolicy->ordered = true;
-		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
+		if (utilization(sbi) > dcc->discard_urgent_util) {
 			dpolicy->granularity = MIN_DISCARD_GRANULARITY;
 			if (atomic_read(&dcc->discard_cmd_cnt))
 				dpolicy->max_interval =
@@ -2079,6 +2079,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
 	dcc->min_discard_issue_time = DEF_MIN_DISCARD_ISSUE_TIME;
 	dcc->mid_discard_issue_time = DEF_MID_DISCARD_ISSUE_TIME;
 	dcc->max_discard_issue_time = DEF_MAX_DISCARD_ISSUE_TIME;
+	dcc->discard_urgent_util = DEF_DISCARD_URGENT_UTIL;
 	dcc->undiscard_blks = 0;
 	dcc->next_pos = 0;
 	dcc->root = RB_ROOT_CACHED;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 33ec467b37725..a4745d596310f 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -493,6 +493,13 @@ out:
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "discard_urgent_util")) {
+		if (t > 100)
+			return -EINVAL;
+		*ui = t;
+		return count;
+	}
+
 	if (!strcmp(a->attr.name, "migration_granularity")) {
 		if (t == 0 || t > sbi->segs_per_sec)
 			return -EINVAL;
@@ -800,6 +807,7 @@ F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_request, max_discard_req
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, min_discard_issue_time, min_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, mid_discard_issue_time, mid_discard_issue_time);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_discard_issue_time, max_discard_issue_time);
+F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_urgent_util, discard_urgent_util);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, discard_granularity, discard_granularity);
 F2FS_RW_ATTR(DCC_INFO, discard_cmd_control, max_ordered_discard, max_ordered_discard);
 F2FS_RW_ATTR(RESERVED_BLOCKS, f2fs_sb_info, reserved_blocks, reserved_blocks);
@@ -930,6 +938,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(min_discard_issue_time),
 	ATTR_LIST(mid_discard_issue_time),
 	ATTR_LIST(max_discard_issue_time),
+	ATTR_LIST(discard_urgent_util),
 	ATTR_LIST(discard_granularity),
 	ATTR_LIST(max_ordered_discard),
 	ATTR_LIST(pending_discard),
-- 
GitLab


From 4bded7af8b9af6e97514b0521004f90267905aef Mon Sep 17 00:00:00 2001
From: Beau Belgrave <beaub@linux.microsoft.com>
Date: Wed, 23 Nov 2022 10:32:48 -0800
Subject: [PATCH 2852/4122] tracing/user_events: Fix call print_fmt leak

If user_event_trace_register() fails within user_event_parse() the
call's print_fmt member is not freed. Add kfree call to fix this.

Link: https://lkml.kernel.org/r/20221123183248.554-1-beaub@linux.microsoft.com

Fixes: aa3b2b4c6692 ("user_events: Add print_fmt generation support for basic types")
Signed-off-by: Beau Belgrave <beaub@linux.microsoft.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_user.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c
index 539b08ae70207..9cb53182bb31c 100644
--- a/kernel/trace/trace_events_user.c
+++ b/kernel/trace/trace_events_user.c
@@ -1359,6 +1359,7 @@ put_user_lock:
 put_user:
 	user_event_destroy_fields(user);
 	user_event_destroy_validators(user);
+	kfree(user->call.print_fmt);
 	kfree(user);
 	return ret;
 }
-- 
GitLab


From f43dc4dc3eff028b5ddddd99f3a66c5a6bdd4e78 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 29 Nov 2022 09:09:11 +1100
Subject: [PATCH 2853/4122] iomap: buffered write failure should not truncate
 the page cache

iomap_file_buffered_write_punch_delalloc() currently invalidates the
page cache over the unused range of the delalloc extent that was
allocated. While the write allocated the delalloc extent, it does
not own it exclusively as the write does not hold any locks that
prevent either writeback or mmap page faults from changing the state
of either the page cache or the extent state backing this range.

Whilst xfs_bmap_punch_delalloc_range() already handles races in
extent conversion - it will only punch out delalloc extents and it
ignores any other type of extent - the page cache truncate does not
discriminate between data written by this write or some other task.
As a result, truncating the page cache can result in data corruption
if the write races with mmap modifications to the file over the same
range.

generic/346 exercises this workload, and if we randomly fail writes
(as will happen when iomap gets stale iomap detection later in the
patchset), it will randomly corrupt the file data because it removes
data written by mmap() in the same page as the write() that failed.

Hence we do not want to punch out the page cache over the range of
the extent we failed to write to - what we actually need to do is
detect the ranges that have dirty data in cache over them and *not
punch them out*.

To do this, we have to walk the page cache over the range of the
delalloc extent we want to remove. This is made complex by the fact
we have to handle partially up-to-date folios correctly and this can
happen even when the FSB size == PAGE_SIZE because we now support
multi-page folios in the page cache.

Because we are only interested in discovering the edges of data
ranges in the page cache (i.e. hole-data boundaries) we can make use
of mapping_seek_hole_data() to find those transitions in the page
cache. As we hold the invalidate_lock, we know that the boundaries
are not going to change while we walk the range. This interface is
also byte-based and is sub-page block aware, so we can find the data
ranges in the cache based on byte offsets rather than page, folio or
fs block sized chunks. This greatly simplifies the logic of finding
dirty cached ranges in the page cache.

Once we've identified a range that contains cached data, we can then
iterate the range folio by folio. This allows us to determine if the
data is dirty and hence perform the correct delalloc extent punching
operations. The seek interface we use to iterate data ranges will
give us sub-folio start/end granularity, so we may end up looking up
the same folio multiple times as the seek interface iterates across
each discontiguous data region in the folio.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 195 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 180 insertions(+), 15 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 734b761a1e4ab..dca9ec9dc4a8e 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -832,6 +832,165 @@ iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *i,
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
 
+/*
+ * Scan the data range passed to us for dirty page cache folios. If we find a
+ * dirty folio, punch out the preceeding range and update the offset from which
+ * the next punch will start from.
+ *
+ * We can punch out storage reservations under clean pages because they either
+ * contain data that has been written back - in which case the delalloc punch
+ * over that range is a no-op - or they have been read faults in which case they
+ * contain zeroes and we can remove the delalloc backing range and any new
+ * writes to those pages will do the normal hole filling operation...
+ *
+ * This makes the logic simple: we only need to keep the delalloc extents only
+ * over the dirty ranges of the page cache.
+ *
+ * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
+ * simplify range iterations.
+ */
+static int iomap_write_delalloc_scan(struct inode *inode,
+		loff_t *punch_start_byte, loff_t start_byte, loff_t end_byte,
+		int (*punch)(struct inode *inode, loff_t offset, loff_t length))
+{
+	while (start_byte < end_byte) {
+		struct folio	*folio;
+
+		/* grab locked page */
+		folio = filemap_lock_folio(inode->i_mapping,
+				start_byte >> PAGE_SHIFT);
+		if (!folio) {
+			start_byte = ALIGN_DOWN(start_byte, PAGE_SIZE) +
+					PAGE_SIZE;
+			continue;
+		}
+
+		/* if dirty, punch up to offset */
+		if (folio_test_dirty(folio)) {
+			if (start_byte > *punch_start_byte) {
+				int	error;
+
+				error = punch(inode, *punch_start_byte,
+						start_byte - *punch_start_byte);
+				if (error) {
+					folio_unlock(folio);
+					folio_put(folio);
+					return error;
+				}
+			}
+
+			/*
+			 * Make sure the next punch start is correctly bound to
+			 * the end of this data range, not the end of the folio.
+			 */
+			*punch_start_byte = min_t(loff_t, end_byte,
+					folio_next_index(folio) << PAGE_SHIFT);
+		}
+
+		/* move offset to start of next folio in range */
+		start_byte = folio_next_index(folio) << PAGE_SHIFT;
+		folio_unlock(folio);
+		folio_put(folio);
+	}
+	return 0;
+}
+
+/*
+ * Punch out all the delalloc blocks in the range given except for those that
+ * have dirty data still pending in the page cache - those are going to be
+ * written and so must still retain the delalloc backing for writeback.
+ *
+ * As we are scanning the page cache for data, we don't need to reimplement the
+ * wheel - mapping_seek_hole_data() does exactly what we need to identify the
+ * start and end of data ranges correctly even for sub-folio block sizes. This
+ * byte range based iteration is especially convenient because it means we
+ * don't have to care about variable size folios, nor where the start or end of
+ * the data range lies within a folio, if they lie within the same folio or even
+ * if there are multiple discontiguous data ranges within the folio.
+ *
+ * It should be noted that mapping_seek_hole_data() is not aware of EOF, and so
+ * can return data ranges that exist in the cache beyond EOF. e.g. a page fault
+ * spanning EOF will initialise the post-EOF data to zeroes and mark it up to
+ * date. A write page fault can then mark it dirty. If we then fail a write()
+ * beyond EOF into that up to date cached range, we allocate a delalloc block
+ * beyond EOF and then have to punch it out. Because the range is up to date,
+ * mapping_seek_hole_data() will return it, and we will skip the punch because
+ * the folio is dirty. THis is incorrect - we always need to punch out delalloc
+ * beyond EOF in this case as writeback will never write back and covert that
+ * delalloc block beyond EOF. Hence we limit the cached data scan range to EOF,
+ * resulting in always punching out the range from the EOF to the end of the
+ * range the iomap spans.
+ *
+ * Intervals are of the form [start_byte, end_byte) (i.e. open ended) because it
+ * matches the intervals returned by mapping_seek_hole_data(). i.e. SEEK_DATA
+ * returns the start of a data range (start_byte), and SEEK_HOLE(start_byte)
+ * returns the end of the data range (data_end). Using closed intervals would
+ * require sprinkling this code with magic "+ 1" and "- 1" arithmetic and expose
+ * the code to subtle off-by-one bugs....
+ */
+static int iomap_write_delalloc_release(struct inode *inode,
+		loff_t start_byte, loff_t end_byte,
+		int (*punch)(struct inode *inode, loff_t pos, loff_t length))
+{
+	loff_t punch_start_byte = start_byte;
+	loff_t scan_end_byte = min(i_size_read(inode), end_byte);
+	int error = 0;
+
+	/*
+	 * Lock the mapping to avoid races with page faults re-instantiating
+	 * folios and dirtying them via ->page_mkwrite whilst we walk the
+	 * cache and perform delalloc extent removal. Failing to do this can
+	 * leave dirty pages with no space reservation in the cache.
+	 */
+	filemap_invalidate_lock(inode->i_mapping);
+	while (start_byte < scan_end_byte) {
+		loff_t		data_end;
+
+		start_byte = mapping_seek_hole_data(inode->i_mapping,
+				start_byte, scan_end_byte, SEEK_DATA);
+		/*
+		 * If there is no more data to scan, all that is left is to
+		 * punch out the remaining range.
+		 */
+		if (start_byte == -ENXIO || start_byte == scan_end_byte)
+			break;
+		if (start_byte < 0) {
+			error = start_byte;
+			goto out_unlock;
+		}
+		WARN_ON_ONCE(start_byte < punch_start_byte);
+		WARN_ON_ONCE(start_byte > scan_end_byte);
+
+		/*
+		 * We find the end of this contiguous cached data range by
+		 * seeking from start_byte to the beginning of the next hole.
+		 */
+		data_end = mapping_seek_hole_data(inode->i_mapping, start_byte,
+				scan_end_byte, SEEK_HOLE);
+		if (data_end < 0) {
+			error = data_end;
+			goto out_unlock;
+		}
+		WARN_ON_ONCE(data_end <= start_byte);
+		WARN_ON_ONCE(data_end > scan_end_byte);
+
+		error = iomap_write_delalloc_scan(inode, &punch_start_byte,
+				start_byte, data_end, punch);
+		if (error)
+			goto out_unlock;
+
+		/* The next data search starts at the end of this one. */
+		start_byte = data_end;
+	}
+
+	if (punch_start_byte < end_byte)
+		error = punch(inode, punch_start_byte,
+				end_byte - punch_start_byte);
+out_unlock:
+	filemap_invalidate_unlock(inode->i_mapping);
+	return error;
+}
+
 /*
  * When a short write occurs, the filesystem may need to remove reserved space
  * that was allocated in ->iomap_begin from it's ->iomap_end method. For
@@ -842,8 +1001,25 @@ EXPORT_SYMBOL_GPL(iomap_file_buffered_write);
  * allocated for this iomap.
  *
  * This function uses [start_byte, end_byte) intervals (i.e. open ended) to
- * simplify range iterations, but converts them back to {offset,len} tuples for
- * the punch callback.
+ * simplify range iterations.
+ *
+ * The punch() callback *must* only punch delalloc extents in the range passed
+ * to it. It must skip over all other types of extents in the range and leave
+ * them completely unchanged. It must do this punch atomically with respect to
+ * other extent modifications.
+ *
+ * The punch() callback may be called with a folio locked to prevent writeback
+ * extent allocation racing at the edge of the range we are currently punching.
+ * The locked folio may or may not cover the range being punched, so it is not
+ * safe for the punch() callback to lock folios itself.
+ *
+ * Lock order is:
+ *
+ * inode->i_rwsem (shared or exclusive)
+ *   inode->i_mapping->invalidate_lock (exclusive)
+ *     folio_lock()
+ *       ->punch
+ *         internal filesystem allocation lock
  */
 int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 		struct iomap *iomap, loff_t pos, loff_t length,
@@ -853,7 +1029,6 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 	loff_t			start_byte;
 	loff_t			end_byte;
 	int			blocksize = i_blocksize(inode);
-	int			error = 0;
 
 	if (iomap->type != IOMAP_DELALLOC)
 		return 0;
@@ -877,18 +1052,8 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode,
 	if (start_byte >= end_byte)
 		return 0;
 
-	/*
-	 * Lock the mapping to avoid races with page faults re-instantiating
-	 * folios and dirtying them via ->page_mkwrite between the page cache
-	 * truncation and the delalloc extent removal. Failing to do this can
-	 * leave dirty pages with no space reservation in the cache.
-	 */
-	filemap_invalidate_lock(inode->i_mapping);
-	truncate_pagecache_range(inode, start_byte, end_byte - 1);
-	error = punch(inode, start_byte, end_byte - start_byte);
-	filemap_invalidate_unlock(inode->i_mapping);
-
-	return error;
+	return iomap_write_delalloc_release(inode, start_byte, end_byte,
+					punch);
 }
 EXPORT_SYMBOL_GPL(iomap_file_buffered_write_punch_delalloc);
 
-- 
GitLab


From 7348b322332d8602a4133f0b861334ea021b134a Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 29 Nov 2022 09:09:17 +1100
Subject: [PATCH 2854/4122] xfs: xfs_bmap_punch_delalloc_range() should take a
 byte range

All the callers of xfs_bmap_punch_delalloc_range() jump through
hoops to convert a byte range to filesystem blocks before calling
xfs_bmap_punch_delalloc_range(). Instead, pass the byte range to
xfs_bmap_punch_delalloc_range() and have it do the conversion to
filesystem blocks internally.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_aops.c      | 16 ++++++----------
 fs/xfs/xfs_bmap_util.c | 10 ++++++----
 fs/xfs/xfs_bmap_util.h |  2 +-
 fs/xfs/xfs_iomap.c     |  8 ++------
 4 files changed, 15 insertions(+), 21 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5d1a995b15f83..6aadc5815068e 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -114,9 +114,8 @@ xfs_end_ioend(
 	if (unlikely(error)) {
 		if (ioend->io_flags & IOMAP_F_SHARED) {
 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
-			xfs_bmap_punch_delalloc_range(ip,
-						      XFS_B_TO_FSBT(mp, offset),
-						      XFS_B_TO_FSB(mp, size));
+			xfs_bmap_punch_delalloc_range(ip, offset,
+					offset + size);
 		}
 		goto done;
 	}
@@ -455,12 +454,8 @@ xfs_discard_folio(
 	struct folio		*folio,
 	loff_t			pos)
 {
-	struct inode		*inode = folio->mapping->host;
-	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_inode	*ip = XFS_I(folio->mapping->host);
 	struct xfs_mount	*mp = ip->i_mount;
-	size_t			offset = offset_in_folio(folio, pos);
-	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, pos);
-	xfs_fileoff_t		pageoff_fsb = XFS_B_TO_FSBT(mp, offset);
 	int			error;
 
 	if (xfs_is_shutdown(mp))
@@ -470,8 +465,9 @@ xfs_discard_folio(
 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
 			folio, ip->i_ino, pos);
 
-	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
-			i_blocks_per_folio(inode, folio) - pageoff_fsb);
+	error = xfs_bmap_punch_delalloc_range(ip, pos,
+			round_up(pos, folio_size(folio)));
+
 	if (error && !xfs_is_shutdown(mp))
 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
 }
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 04d0c2bff67c4..867645b74d889 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -590,11 +590,13 @@ out_unlock_iolock:
 int
 xfs_bmap_punch_delalloc_range(
 	struct xfs_inode	*ip,
-	xfs_fileoff_t		start_fsb,
-	xfs_fileoff_t		length)
+	xfs_off_t		start_byte,
+	xfs_off_t		end_byte)
 {
+	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_ifork	*ifp = &ip->i_df;
-	xfs_fileoff_t		end_fsb = start_fsb + length;
+	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, start_byte);
+	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, end_byte);
 	struct xfs_bmbt_irec	got, del;
 	struct xfs_iext_cursor	icur;
 	int			error = 0;
@@ -607,7 +609,7 @@ xfs_bmap_punch_delalloc_range(
 
 	while (got.br_startoff + got.br_blockcount > start_fsb) {
 		del = got;
-		xfs_trim_extent(&del, start_fsb, length);
+		xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
 
 		/*
 		 * A delete can push the cursor forward. Step back to the
diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h
index 24b37d211f1dc..6888078f5c31e 100644
--- a/fs/xfs/xfs_bmap_util.h
+++ b/fs/xfs/xfs_bmap_util.h
@@ -31,7 +31,7 @@ xfs_bmap_rtalloc(struct xfs_bmalloca *ap)
 #endif /* CONFIG_XFS_RT */
 
 int	xfs_bmap_punch_delalloc_range(struct xfs_inode *ip,
-		xfs_fileoff_t start_fsb, xfs_fileoff_t length);
+		xfs_off_t start_byte, xfs_off_t end_byte);
 
 struct kgetbmap {
 	__s64		bmv_offset;	/* file offset of segment in blocks */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index ea96e8a348687..09676ff6940eb 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1126,12 +1126,8 @@ xfs_buffered_write_delalloc_punch(
 	loff_t			offset,
 	loff_t			length)
 {
-	struct xfs_mount	*mp = XFS_M(inode->i_sb);
-	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, offset);
-	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + length);
-
-	return xfs_bmap_punch_delalloc_range(XFS_I(inode), start_fsb,
-				end_fsb - start_fsb);
+	return xfs_bmap_punch_delalloc_range(XFS_I(inode), offset,
+			offset + length);
 }
 
 static int
-- 
GitLab


From d7b64041164ca177170191d2ad775da074ab2926 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 29 Nov 2022 09:09:17 +1100
Subject: [PATCH 2855/4122] iomap: write iomap validity checks

A recent multithreaded write data corruption has been uncovered in
the iomap write code. The core of the problem is partial folio
writes can be flushed to disk while a new racing write can map it
and fill the rest of the page:

writeback			new write

allocate blocks
  blocks are unwritten
submit IO
.....
				map blocks
				iomap indicates UNWRITTEN range
				loop {
				  lock folio
				  copyin data
.....
IO completes
  runs unwritten extent conv
    blocks are marked written
				  <iomap now stale>
				  get next folio
				}

Now add memory pressure such that memory reclaim evicts the
partially written folio that has already been written to disk.

When the new write finally gets to the last partial page of the new
write, it does not find it in cache, so it instantiates a new page,
sees the iomap is unwritten, and zeros the part of the page that
it does not have data from. This overwrites the data on disk that
was originally written.

The full description of the corruption mechanism can be found here:

https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/

To solve this problem, we need to check whether the iomap is still
valid after we lock each folio during the write. We have to do it
after we lock the page so that we don't end up with state changes
occurring while we wait for the folio to be locked.

Hence we need a mechanism to be able to check that the cached iomap
is still valid (similar to what we already do in buffered
writeback), and we need a way for ->begin_write to back out and
tell the high level iomap iterator that we need to remap the
remaining write range.

The iomap needs to grow some storage for the validity cookie that
the filesystem provides to travel with the iomap. XFS, in
particular, also needs to know some more information about what the
iomap maps (attribute extents rather than file data extents) to for
the validity cookie to cover all the types of iomaps we might need
to validate.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/iomap/buffered-io.c | 29 +++++++++++++++++++++++++++-
 fs/iomap/iter.c        | 19 ++++++++++++++++++-
 include/linux/iomap.h  | 43 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 81 insertions(+), 10 deletions(-)

diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index dca9ec9dc4a8e..356193e44cf07 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -584,7 +584,7 @@ static int iomap_write_begin_inline(const struct iomap_iter *iter,
 	return iomap_read_inline_data(iter, folio);
 }
 
-static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
+static int iomap_write_begin(struct iomap_iter *iter, loff_t pos,
 		size_t len, struct folio **foliop)
 {
 	const struct iomap_page_ops *page_ops = iter->iomap.page_ops;
@@ -618,6 +618,27 @@ static int iomap_write_begin(const struct iomap_iter *iter, loff_t pos,
 		status = (iter->flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOMEM;
 		goto out_no_page;
 	}
+
+	/*
+	 * Now we have a locked folio, before we do anything with it we need to
+	 * check that the iomap we have cached is not stale. The inode extent
+	 * mapping can change due to concurrent IO in flight (e.g.
+	 * IOMAP_UNWRITTEN state can change and memory reclaim could have
+	 * reclaimed a previously partially written page at this index after IO
+	 * completion before this write reaches this file offset) and hence we
+	 * could do the wrong thing here (zero a page range incorrectly or fail
+	 * to zero) and corrupt data.
+	 */
+	if (page_ops && page_ops->iomap_valid) {
+		bool iomap_valid = page_ops->iomap_valid(iter->inode,
+							&iter->iomap);
+		if (!iomap_valid) {
+			iter->iomap.flags |= IOMAP_F_STALE;
+			status = 0;
+			goto out_unlock;
+		}
+	}
+
 	if (pos + len > folio_pos(folio) + folio_size(folio))
 		len = folio_pos(folio) + folio_size(folio) - pos;
 
@@ -773,6 +794,8 @@ again:
 		status = iomap_write_begin(iter, pos, bytes, &folio);
 		if (unlikely(status))
 			break;
+		if (iter->iomap.flags & IOMAP_F_STALE)
+			break;
 
 		page = folio_file_page(folio, pos >> PAGE_SHIFT);
 		if (mapping_writably_mapped(mapping))
@@ -1081,6 +1104,8 @@ static loff_t iomap_unshare_iter(struct iomap_iter *iter)
 		status = iomap_write_begin(iter, pos, bytes, &folio);
 		if (unlikely(status))
 			return status;
+		if (iter->iomap.flags & IOMAP_F_STALE)
+			break;
 
 		status = iomap_write_end(iter, pos, bytes, bytes, folio);
 		if (WARN_ON_ONCE(status == 0))
@@ -1136,6 +1161,8 @@ static loff_t iomap_zero_iter(struct iomap_iter *iter, bool *did_zero)
 		status = iomap_write_begin(iter, pos, bytes, &folio);
 		if (status)
 			return status;
+		if (iter->iomap.flags & IOMAP_F_STALE)
+			break;
 
 		offset = offset_in_folio(folio, pos);
 		if (bytes > folio_size(folio) - offset)
diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c
index a1c7592d2aded..79a0614eaab77 100644
--- a/fs/iomap/iter.c
+++ b/fs/iomap/iter.c
@@ -7,12 +7,28 @@
 #include <linux/iomap.h>
 #include "trace.h"
 
+/*
+ * Advance to the next range we need to map.
+ *
+ * If the iomap is marked IOMAP_F_STALE, it means the existing map was not fully
+ * processed - it was aborted because the extent the iomap spanned may have been
+ * changed during the operation. In this case, the iteration behaviour is to
+ * remap the unprocessed range of the iter, and that means we may need to remap
+ * even when we've made no progress (i.e. iter->processed = 0). Hence the
+ * "finished iterating" case needs to distinguish between
+ * (processed = 0) meaning we are done and (processed = 0 && stale) meaning we
+ * need to remap the entire remaining range.
+ */
 static inline int iomap_iter_advance(struct iomap_iter *iter)
 {
+	bool stale = iter->iomap.flags & IOMAP_F_STALE;
+
 	/* handle the previous iteration (if any) */
 	if (iter->iomap.length) {
-		if (iter->processed <= 0)
+		if (iter->processed < 0)
 			return iter->processed;
+		if (!iter->processed && !stale)
+			return 0;
 		if (WARN_ON_ONCE(iter->processed > iomap_length(iter)))
 			return -EIO;
 		iter->pos += iter->processed;
@@ -33,6 +49,7 @@ static inline void iomap_iter_done(struct iomap_iter *iter)
 	WARN_ON_ONCE(iter->iomap.offset > iter->pos);
 	WARN_ON_ONCE(iter->iomap.length == 0);
 	WARN_ON_ONCE(iter->iomap.offset + iter->iomap.length <= iter->pos);
+	WARN_ON_ONCE(iter->iomap.flags & IOMAP_F_STALE);
 
 	trace_iomap_iter_dstmap(iter->inode, &iter->iomap);
 	if (iter->srcmap.type != IOMAP_HOLE)
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0698c4b8ce0e2..0983dfc9a203c 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -49,26 +49,35 @@ struct vm_fault;
  *
  * IOMAP_F_BUFFER_HEAD indicates that the file system requires the use of
  * buffer heads for this mapping.
+ *
+ * IOMAP_F_XATTR indicates that the iomap is for an extended attribute extent
+ * rather than a file data extent.
  */
-#define IOMAP_F_NEW		0x01
-#define IOMAP_F_DIRTY		0x02
-#define IOMAP_F_SHARED		0x04
-#define IOMAP_F_MERGED		0x08
-#define IOMAP_F_BUFFER_HEAD	0x10
-#define IOMAP_F_ZONE_APPEND	0x20
+#define IOMAP_F_NEW		(1U << 0)
+#define IOMAP_F_DIRTY		(1U << 1)
+#define IOMAP_F_SHARED		(1U << 2)
+#define IOMAP_F_MERGED		(1U << 3)
+#define IOMAP_F_BUFFER_HEAD	(1U << 4)
+#define IOMAP_F_ZONE_APPEND	(1U << 5)
+#define IOMAP_F_XATTR		(1U << 6)
 
 /*
  * Flags set by the core iomap code during operations:
  *
  * IOMAP_F_SIZE_CHANGED indicates to the iomap_end method that the file size
  * has changed as the result of this write operation.
+ *
+ * IOMAP_F_STALE indicates that the iomap is not valid any longer and the file
+ * range it covers needs to be remapped by the high level before the operation
+ * can proceed.
  */
-#define IOMAP_F_SIZE_CHANGED	0x100
+#define IOMAP_F_SIZE_CHANGED	(1U << 8)
+#define IOMAP_F_STALE		(1U << 9)
 
 /*
  * Flags from 0x1000 up are for file system specific usage:
  */
-#define IOMAP_F_PRIVATE		0x1000
+#define IOMAP_F_PRIVATE		(1U << 12)
 
 
 /*
@@ -89,6 +98,7 @@ struct iomap {
 	void			*inline_data;
 	void			*private; /* filesystem private */
 	const struct iomap_page_ops *page_ops;
+	u64			validity_cookie; /* used with .iomap_valid() */
 };
 
 static inline sector_t iomap_sector(const struct iomap *iomap, loff_t pos)
@@ -128,6 +138,23 @@ struct iomap_page_ops {
 	int (*page_prepare)(struct inode *inode, loff_t pos, unsigned len);
 	void (*page_done)(struct inode *inode, loff_t pos, unsigned copied,
 			struct page *page);
+
+	/*
+	 * Check that the cached iomap still maps correctly to the filesystem's
+	 * internal extent map. FS internal extent maps can change while iomap
+	 * is iterating a cached iomap, so this hook allows iomap to detect that
+	 * the iomap needs to be refreshed during a long running write
+	 * operation.
+	 *
+	 * The filesystem can store internal state (e.g. a sequence number) in
+	 * iomap->validity_cookie when the iomap is first mapped to be able to
+	 * detect changes between mapping time and whenever .iomap_valid() is
+	 * called.
+	 *
+	 * This is called with the folio over the specified file position held
+	 * locked by the iomap code.
+	 */
+	bool (*iomap_valid)(struct inode *inode, const struct iomap *iomap);
 };
 
 /*
-- 
GitLab


From 304a68b9c63bbfc1f6e159d68e8892fc54a06067 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 29 Nov 2022 09:09:17 +1100
Subject: [PATCH 2856/4122] xfs: use iomap_valid method to detect stale cached
 iomaps

Now that iomap supports a mechanism to validate cached iomaps for
buffered write operations, hook it up to the XFS buffered write ops
so that we can avoid data corruptions that result from stale cached
iomaps. See:

https://lore.kernel.org/linux-xfs/20220817093627.GZ3600936@dread.disaster.area/

or the ->iomap_valid() introduction commit for exact details of the
corruption vector.

The validity cookie we store in the iomap is based on the type of
iomap we return. It is expected that the iomap->flags we set in
xfs_bmbt_to_iomap() is not perturbed by the iomap core and are
returned to us in the iomap passed via the .iomap_valid() callback.
This ensures that the validity cookie is always checking the correct
inode fork sequence numbers to detect potential changes that affect
the extent cached by the iomap.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_bmap.c |  6 ++-
 fs/xfs/xfs_aops.c        |  2 +-
 fs/xfs/xfs_iomap.c       | 95 +++++++++++++++++++++++++++++++---------
 fs/xfs/xfs_iomap.h       |  5 ++-
 fs/xfs/xfs_pnfs.c        |  6 ++-
 5 files changed, 87 insertions(+), 27 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 49d0d4ea63fcd..56b9b7db38bbd 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4551,7 +4551,8 @@ xfs_bmapi_convert_delalloc(
 	 * the extent.  Just return the real extent at this offset.
 	 */
 	if (!isnullstartblock(bma.got.br_startblock)) {
-		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
+		xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
+				xfs_iomap_inode_sequence(ip, flags));
 		*seq = READ_ONCE(ifp->if_seq);
 		goto out_trans_cancel;
 	}
@@ -4599,7 +4600,8 @@ xfs_bmapi_convert_delalloc(
 	XFS_STATS_INC(mp, xs_xstrat_quick);
 
 	ASSERT(!isnullstartblock(bma.got.br_startblock));
-	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags);
+	xfs_bmbt_to_iomap(ip, iomap, &bma.got, 0, flags,
+				xfs_iomap_inode_sequence(ip, flags));
 	*seq = READ_ONCE(ifp->if_seq);
 
 	if (whichfork == XFS_COW_FORK)
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 6aadc5815068e..a22d90af40c85 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -372,7 +372,7 @@ retry:
 	    isnullstartblock(imap.br_startblock))
 		goto allocate_blocks;
 
-	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
+	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
 	return 0;
 allocate_blocks:
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 09676ff6940eb..26ca3cc1a0489 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -48,13 +48,45 @@ xfs_alert_fsblock_zero(
 	return -EFSCORRUPTED;
 }
 
+u64
+xfs_iomap_inode_sequence(
+	struct xfs_inode	*ip,
+	u16			iomap_flags)
+{
+	u64			cookie = 0;
+
+	if (iomap_flags & IOMAP_F_XATTR)
+		return READ_ONCE(ip->i_af.if_seq);
+	if ((iomap_flags & IOMAP_F_SHARED) && ip->i_cowfp)
+		cookie = (u64)READ_ONCE(ip->i_cowfp->if_seq) << 32;
+	return cookie | READ_ONCE(ip->i_df.if_seq);
+}
+
+/*
+ * Check that the iomap passed to us is still valid for the given offset and
+ * length.
+ */
+static bool
+xfs_iomap_valid(
+	struct inode		*inode,
+	const struct iomap	*iomap)
+{
+	return iomap->validity_cookie ==
+			xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags);
+}
+
+const struct iomap_page_ops xfs_iomap_page_ops = {
+	.iomap_valid		= xfs_iomap_valid,
+};
+
 int
 xfs_bmbt_to_iomap(
 	struct xfs_inode	*ip,
 	struct iomap		*iomap,
 	struct xfs_bmbt_irec	*imap,
 	unsigned int		mapping_flags,
-	u16			iomap_flags)
+	u16			iomap_flags,
+	u64			sequence_cookie)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
@@ -91,6 +123,9 @@ xfs_bmbt_to_iomap(
 	if (xfs_ipincount(ip) &&
 	    (ip->i_itemp->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP))
 		iomap->flags |= IOMAP_F_DIRTY;
+
+	iomap->validity_cookie = sequence_cookie;
+	iomap->page_ops = &xfs_iomap_page_ops;
 	return 0;
 }
 
@@ -195,7 +230,8 @@ xfs_iomap_write_direct(
 	xfs_fileoff_t		offset_fsb,
 	xfs_fileoff_t		count_fsb,
 	unsigned int		flags,
-	struct xfs_bmbt_irec	*imap)
+	struct xfs_bmbt_irec	*imap,
+	u64			*seq)
 {
 	struct xfs_mount	*mp = ip->i_mount;
 	struct xfs_trans	*tp;
@@ -285,6 +321,7 @@ xfs_iomap_write_direct(
 		error = xfs_alert_fsblock_zero(ip, imap);
 
 out_unlock:
+	*seq = xfs_iomap_inode_sequence(ip, 0);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	return error;
 
@@ -743,6 +780,7 @@ xfs_direct_write_iomap_begin(
 	bool			shared = false;
 	u16			iomap_flags = 0;
 	unsigned int		lockmode = XFS_ILOCK_SHARED;
+	u64			seq;
 
 	ASSERT(flags & (IOMAP_WRITE | IOMAP_ZERO));
 
@@ -811,9 +849,10 @@ xfs_direct_write_iomap_begin(
 			goto out_unlock;
 	}
 
+	seq = xfs_iomap_inode_sequence(ip, iomap_flags);
 	xfs_iunlock(ip, lockmode);
 	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags);
+	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, iomap_flags, seq);
 
 allocate_blocks:
 	error = -EAGAIN;
@@ -839,24 +878,26 @@ allocate_blocks:
 	xfs_iunlock(ip, lockmode);
 
 	error = xfs_iomap_write_direct(ip, offset_fsb, end_fsb - offset_fsb,
-			flags, &imap);
+			flags, &imap, &seq);
 	if (error)
 		return error;
 
 	trace_xfs_iomap_alloc(ip, offset, length, XFS_DATA_FORK, &imap);
 	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
-				 iomap_flags | IOMAP_F_NEW);
+				 iomap_flags | IOMAP_F_NEW, seq);
 
 out_found_cow:
-	xfs_iunlock(ip, lockmode);
 	length = XFS_FSB_TO_B(mp, cmap.br_startoff + cmap.br_blockcount);
 	trace_xfs_iomap_found(ip, offset, length - offset, XFS_COW_FORK, &cmap);
 	if (imap.br_startblock != HOLESTARTBLOCK) {
-		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+		seq = xfs_iomap_inode_sequence(ip, 0);
+		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
 		if (error)
-			return error;
+			goto out_unlock;
 	}
-	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED);
+	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+	xfs_iunlock(ip, lockmode);
+	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, IOMAP_F_SHARED, seq);
 
 out_unlock:
 	if (lockmode)
@@ -915,6 +956,7 @@ xfs_buffered_write_iomap_begin(
 	int			allocfork = XFS_DATA_FORK;
 	int			error = 0;
 	unsigned int		lockmode = XFS_ILOCK_EXCL;
+	u64			seq;
 
 	if (xfs_is_shutdown(mp))
 		return -EIO;
@@ -1094,26 +1136,31 @@ retry:
 	 * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch
 	 * them out if the write happens to fail.
 	 */
+	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_NEW);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	trace_xfs_iomap_alloc(ip, offset, count, allocfork, &imap);
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW);
+	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_NEW, seq);
 
 found_imap:
+	seq = xfs_iomap_inode_sequence(ip, 0);
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
 
 found_cow:
-	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	seq = xfs_iomap_inode_sequence(ip, 0);
 	if (imap.br_startoff <= offset_fsb) {
-		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0);
+		error = xfs_bmbt_to_iomap(ip, srcmap, &imap, flags, 0, seq);
 		if (error)
-			return error;
+			goto out_unlock;
+		seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 		return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
-					 IOMAP_F_SHARED);
+					 IOMAP_F_SHARED, seq);
 	}
 
 	xfs_trim_extent(&cmap, offset_fsb, imap.br_startoff - offset_fsb);
-	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0);
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+	return xfs_bmbt_to_iomap(ip, iomap, &cmap, flags, 0, seq);
 
 out_unlock:
 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -1193,6 +1240,7 @@ xfs_read_iomap_begin(
 	int			nimaps = 1, error = 0;
 	bool			shared = false;
 	unsigned int		lockmode = XFS_ILOCK_SHARED;
+	u64			seq;
 
 	ASSERT(!(flags & (IOMAP_WRITE | IOMAP_ZERO)));
 
@@ -1206,13 +1254,14 @@ xfs_read_iomap_begin(
 			       &nimaps, 0);
 	if (!error && (flags & IOMAP_REPORT))
 		error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
+	seq = xfs_iomap_inode_sequence(ip, shared ? IOMAP_F_SHARED : 0);
 	xfs_iunlock(ip, lockmode);
 
 	if (error)
 		return error;
 	trace_xfs_iomap_found(ip, offset, length, XFS_DATA_FORK, &imap);
 	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags,
-				 shared ? IOMAP_F_SHARED : 0);
+				 shared ? IOMAP_F_SHARED : 0, seq);
 }
 
 const struct iomap_ops xfs_read_iomap_ops = {
@@ -1237,6 +1286,7 @@ xfs_seek_iomap_begin(
 	struct xfs_bmbt_irec	imap, cmap;
 	int			error = 0;
 	unsigned		lockmode;
+	u64			seq;
 
 	if (xfs_is_shutdown(mp))
 		return -EIO;
@@ -1271,8 +1321,9 @@ xfs_seek_iomap_begin(
 		if (data_fsb < cow_fsb + cmap.br_blockcount)
 			end_fsb = min(end_fsb, data_fsb);
 		xfs_trim_extent(&cmap, offset_fsb, end_fsb);
+		seq = xfs_iomap_inode_sequence(ip, IOMAP_F_SHARED);
 		error = xfs_bmbt_to_iomap(ip, iomap, &cmap, flags,
-					  IOMAP_F_SHARED);
+				IOMAP_F_SHARED, seq);
 		/*
 		 * This is a COW extent, so we must probe the page cache
 		 * because there could be dirty page cache being backed
@@ -1293,8 +1344,9 @@ xfs_seek_iomap_begin(
 	imap.br_startblock = HOLESTARTBLOCK;
 	imap.br_state = XFS_EXT_NORM;
 done:
+	seq = xfs_iomap_inode_sequence(ip, 0);
 	xfs_trim_extent(&imap, offset_fsb, end_fsb);
-	error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+	error = xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0, seq);
 out_unlock:
 	xfs_iunlock(ip, lockmode);
 	return error;
@@ -1320,6 +1372,7 @@ xfs_xattr_iomap_begin(
 	struct xfs_bmbt_irec	imap;
 	int			nimaps = 1, error = 0;
 	unsigned		lockmode;
+	int			seq;
 
 	if (xfs_is_shutdown(mp))
 		return -EIO;
@@ -1336,12 +1389,14 @@ xfs_xattr_iomap_begin(
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, XFS_BMAPI_ATTRFORK);
 out_unlock:
+
+	seq = xfs_iomap_inode_sequence(ip, IOMAP_F_XATTR);
 	xfs_iunlock(ip, lockmode);
 
 	if (error)
 		return error;
 	ASSERT(nimaps);
-	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, 0);
+	return xfs_bmbt_to_iomap(ip, iomap, &imap, flags, IOMAP_F_XATTR, seq);
 }
 
 const struct iomap_ops xfs_xattr_iomap_ops = {
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 0f62ab633040c..4da13440bae9b 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -13,14 +13,15 @@ struct xfs_bmbt_irec;
 
 int xfs_iomap_write_direct(struct xfs_inode *ip, xfs_fileoff_t offset_fsb,
 		xfs_fileoff_t count_fsb, unsigned int flags,
-		struct xfs_bmbt_irec *imap);
+		struct xfs_bmbt_irec *imap, u64 *sequence);
 int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, xfs_off_t, bool);
 xfs_fileoff_t xfs_iomap_eof_align_last_fsb(struct xfs_inode *ip,
 		xfs_fileoff_t end_fsb);
 
+u64 xfs_iomap_inode_sequence(struct xfs_inode *ip, u16 iomap_flags);
 int xfs_bmbt_to_iomap(struct xfs_inode *ip, struct iomap *iomap,
 		struct xfs_bmbt_irec *imap, unsigned int mapping_flags,
-		u16 iomap_flags);
+		u16 iomap_flags, u64 sequence_cookie);
 
 int xfs_zero_range(struct xfs_inode *ip, loff_t pos, loff_t len,
 		bool *did_zero);
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 37a24f0f7cd40..38d23f0e703a8 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -125,6 +125,7 @@ xfs_fs_map_blocks(
 	int			nimaps = 1;
 	uint			lock_flags;
 	int			error = 0;
+	u64			seq;
 
 	if (xfs_is_shutdown(mp))
 		return -EIO;
@@ -176,6 +177,7 @@ xfs_fs_map_blocks(
 	lock_flags = xfs_ilock_data_map_shared(ip);
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
 				&imap, &nimaps, bmapi_flags);
+	seq = xfs_iomap_inode_sequence(ip, 0);
 
 	ASSERT(!nimaps || imap.br_startblock != DELAYSTARTBLOCK);
 
@@ -189,7 +191,7 @@ xfs_fs_map_blocks(
 		xfs_iunlock(ip, lock_flags);
 
 		error = xfs_iomap_write_direct(ip, offset_fsb,
-				end_fsb - offset_fsb, 0, &imap);
+				end_fsb - offset_fsb, 0, &imap, &seq);
 		if (error)
 			goto out_unlock;
 
@@ -209,7 +211,7 @@ xfs_fs_map_blocks(
 	}
 	xfs_iunlock(ip, XFS_IOLOCK_EXCL);
 
-	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0);
+	error = xfs_bmbt_to_iomap(ip, iomap, &imap, 0, 0, seq);
 	*device_generation = mp->m_generation;
 	return error;
 out_unlock:
-- 
GitLab


From 6e8af15ccdc4e138a5b529c1901a0013e1dcaa09 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 29 Nov 2022 09:09:17 +1100
Subject: [PATCH 2857/4122] xfs: drop write error injection is unfixable,
 remove it

With the changes to scan the page cache for dirty data to avoid data
corruptions from partial write cleanup racing with other page cache
operations, the drop writes error injection no longer works the same
way it used to and causes xfs/196 to fail. This is because xfs/196
writes to the file and populates the page cache before it turns on
the error injection and starts failing -overwrites-.

The result is that the original drop-writes code failed writes only
-after- overwriting the data in the cache, followed by invalidates
the cached data, then punching out the delalloc extent from under
that data.

On the surface, this looks fine. The problem is that page cache
invalidation *doesn't guarantee that it removes anything from the
page cache* and it doesn't change the dirty state of the folio. When
block size == page size and we do page aligned IO (as xfs/196 does)
everything happens to align perfectly and page cache invalidation
removes the single page folios that span the written data. Hence the
followup delalloc punch pass does not find cached data over that
range and it can punch the extent out.

IOWs, xfs/196 "works" for block size == page size with the new
code. I say "works", because it actually only works for the case
where IO is page aligned, and no data was read from disk before
writes occur. Because the moment we actually read data first, the
readahead code allocates multipage folios and suddenly the
invalidate code goes back to zeroing subfolio ranges without
changing dirty state.

Hence, with multipage folios in play, block size == page size is
functionally identical to block size < page size behaviour, and
drop-writes is manifestly broken w.r.t to this case. Invalidation of
a subfolio range doesn't result in the folio being removed from the
cache, just the range gets zeroed. Hence after we've sequentially
walked over a folio that we've dirtied (via write data) and then
invalidated, we end up with a dirty folio full of zeroed data.

And because the new code skips punching ranges that have dirty
folios covering them, we end up leaving the delalloc range intact
after failing all the writes. Hence failed writes now end up
writing zeroes to disk in the cases where invalidation zeroes folios
rather than removing them from cache.

This is a fundamental change of behaviour that is needed to avoid
the data corruption vectors that exist in the old write fail path,
and it renders the drop-writes injection non-functional and
unworkable as it stands.

As it is, I think the error injection is also now unnecessary, as
partial writes that need delalloc extent are going to be a lot more
common with stale iomap detection in place. Hence this patch removes
the drop-writes error injection completely. xfs/196 can remain for
testing kernels that don't have this data corruption fix, but those
that do will report:

xfs/196 3s ... [not run] XFS error injection drop_writes unknown on this kernel.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_errortag.h | 12 +++++-------
 fs/xfs/xfs_error.c           | 27 ++++++++++++++++++++-------
 fs/xfs/xfs_iomap.c           |  9 ---------
 3 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 5362908164b0b..580ccbd5aadc2 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -40,13 +40,12 @@
 #define XFS_ERRTAG_REFCOUNT_FINISH_ONE			25
 #define XFS_ERRTAG_BMAP_FINISH_ONE			26
 #define XFS_ERRTAG_AG_RESV_CRITICAL			27
+
 /*
- * DEBUG mode instrumentation to test and/or trigger delayed allocation
- * block killing in the event of failed writes. When enabled, all
- * buffered writes are silenty dropped and handled as if they failed.
- * All delalloc blocks in the range of the write (including pre-existing
- * delalloc blocks!) are tossed as part of the write failure error
- * handling sequence.
+ * Drop-writes support removed because write error handling cannot trash
+ * pre-existing delalloc extents in any useful way anymore. We retain the
+ * definition so that we can reject it as an invalid value in
+ * xfs_errortag_valid().
  */
 #define XFS_ERRTAG_DROP_WRITES				28
 #define XFS_ERRTAG_LOG_BAD_CRC				29
@@ -95,7 +94,6 @@
 #define XFS_RANDOM_REFCOUNT_FINISH_ONE			1
 #define XFS_RANDOM_BMAP_FINISH_ONE			1
 #define XFS_RANDOM_AG_RESV_CRITICAL			4
-#define XFS_RANDOM_DROP_WRITES				1
 #define XFS_RANDOM_LOG_BAD_CRC				1
 #define XFS_RANDOM_LOG_ITEM_PIN				1
 #define XFS_RANDOM_BUF_LRU_REF				2
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index c6b2aabd6f187..dea3c0649d2f7 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -46,7 +46,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_REFCOUNT_FINISH_ONE,
 	XFS_RANDOM_BMAP_FINISH_ONE,
 	XFS_RANDOM_AG_RESV_CRITICAL,
-	XFS_RANDOM_DROP_WRITES,
+	0, /* XFS_RANDOM_DROP_WRITES has been removed */
 	XFS_RANDOM_LOG_BAD_CRC,
 	XFS_RANDOM_LOG_ITEM_PIN,
 	XFS_RANDOM_BUF_LRU_REF,
@@ -162,7 +162,6 @@ XFS_ERRORTAG_ATTR_RW(refcount_continue_update,	XFS_ERRTAG_REFCOUNT_CONTINUE_UPDA
 XFS_ERRORTAG_ATTR_RW(refcount_finish_one,	XFS_ERRTAG_REFCOUNT_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(bmap_finish_one,	XFS_ERRTAG_BMAP_FINISH_ONE);
 XFS_ERRORTAG_ATTR_RW(ag_resv_critical,	XFS_ERRTAG_AG_RESV_CRITICAL);
-XFS_ERRORTAG_ATTR_RW(drop_writes,	XFS_ERRTAG_DROP_WRITES);
 XFS_ERRORTAG_ATTR_RW(log_bad_crc,	XFS_ERRTAG_LOG_BAD_CRC);
 XFS_ERRORTAG_ATTR_RW(log_item_pin,	XFS_ERRTAG_LOG_ITEM_PIN);
 XFS_ERRORTAG_ATTR_RW(buf_lru_ref,	XFS_ERRTAG_BUF_LRU_REF);
@@ -206,7 +205,6 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(refcount_finish_one),
 	XFS_ERRORTAG_ATTR_LIST(bmap_finish_one),
 	XFS_ERRORTAG_ATTR_LIST(ag_resv_critical),
-	XFS_ERRORTAG_ATTR_LIST(drop_writes),
 	XFS_ERRORTAG_ATTR_LIST(log_bad_crc),
 	XFS_ERRORTAG_ATTR_LIST(log_item_pin),
 	XFS_ERRORTAG_ATTR_LIST(buf_lru_ref),
@@ -256,6 +254,19 @@ xfs_errortag_del(
 	kmem_free(mp->m_errortag);
 }
 
+static bool
+xfs_errortag_valid(
+	unsigned int		error_tag)
+{
+	if (error_tag >= XFS_ERRTAG_MAX)
+		return false;
+
+	/* Error out removed injection types */
+	if (error_tag == XFS_ERRTAG_DROP_WRITES)
+		return false;
+	return true;
+}
+
 bool
 xfs_errortag_test(
 	struct xfs_mount	*mp,
@@ -277,7 +288,9 @@ xfs_errortag_test(
 	if (!mp->m_errortag)
 		return false;
 
-	ASSERT(error_tag < XFS_ERRTAG_MAX);
+	if (!xfs_errortag_valid(error_tag))
+		return false;
+
 	randfactor = mp->m_errortag[error_tag];
 	if (!randfactor || prandom_u32_max(randfactor))
 		return false;
@@ -293,7 +306,7 @@ xfs_errortag_get(
 	struct xfs_mount	*mp,
 	unsigned int		error_tag)
 {
-	if (error_tag >= XFS_ERRTAG_MAX)
+	if (!xfs_errortag_valid(error_tag))
 		return -EINVAL;
 
 	return mp->m_errortag[error_tag];
@@ -305,7 +318,7 @@ xfs_errortag_set(
 	unsigned int		error_tag,
 	unsigned int		tag_value)
 {
-	if (error_tag >= XFS_ERRTAG_MAX)
+	if (!xfs_errortag_valid(error_tag))
 		return -EINVAL;
 
 	mp->m_errortag[error_tag] = tag_value;
@@ -319,7 +332,7 @@ xfs_errortag_add(
 {
 	BUILD_BUG_ON(ARRAY_SIZE(xfs_errortag_random_default) != XFS_ERRTAG_MAX);
 
-	if (error_tag >= XFS_ERRTAG_MAX)
+	if (!xfs_errortag_valid(error_tag))
 		return -EINVAL;
 
 	return xfs_errortag_set(mp, error_tag,
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 26ca3cc1a0489..1bdd7afc10108 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1190,15 +1190,6 @@ xfs_buffered_write_iomap_end(
 	struct xfs_mount	*mp = XFS_M(inode->i_sb);
 	int			error;
 
-	/*
-	 * Behave as if the write failed if drop writes is enabled. Set the NEW
-	 * flag to force delalloc cleanup.
-	 */
-	if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_DROP_WRITES)) {
-		iomap->flags |= IOMAP_F_NEW;
-		written = 0;
-	}
-
 	error = iomap_file_buffered_write_punch_delalloc(inode, iomap, offset,
 			length, written, &xfs_buffered_write_delalloc_punch);
 	if (error && !xfs_is_shutdown(mp)) {
-- 
GitLab


From 46a5cd8c8dcacc58912b4bf04ca4d84d2977bbbc Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 17 Oct 2022 14:05:13 -0400
Subject: [PATCH 2858/4122] tracing: Update MAINTAINERS file for new patchwork
 and mailing list

The tracing subsystem now has its own mailing list (although patches
should also be sent to LKML) as well as a new patchwork entry for kernel
related tracing patches.

Update the MAINTAINERS file to reflect the changes.

Link: https://lore.kernel.org/linux-trace-kernel/20221017140513.14b9ce2e@gandalf.local.home

Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 MAINTAINERS | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 2585e7edc3358..d12576150a706 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8461,6 +8461,9 @@ FUNCTION HOOKS (FTRACE)
 M:	Steven Rostedt <rostedt@goodmis.org>
 M:	Masami Hiramatsu <mhiramat@kernel.org>
 R:	Mark Rutland <mark.rutland@arm.com>
+L:	linux-kernel@vger.kernel.org
+L:	linux-trace-kernel@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-trace-kernel/list/
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
 F:	Documentation/trace/ftrace*
@@ -11483,6 +11486,9 @@ M:	Naveen N. Rao <naveen.n.rao@linux.ibm.com>
 M:	Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
 M:	"David S. Miller" <davem@davemloft.net>
 M:	Masami Hiramatsu <mhiramat@kernel.org>
+L:	linux-kernel@vger.kernel.org
+L:	linux-trace-kernel@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-trace-kernel/list/
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
 F:	Documentation/trace/kprobes.rst
@@ -20862,6 +20868,9 @@ F:	drivers/hwmon/pmbus/tps546d24.c
 TRACING
 M:	Steven Rostedt <rostedt@goodmis.org>
 M:	Masami Hiramatsu <mhiramat@kernel.org>
+L:	linux-kernel@vger.kernel.org
+L:	linux-trace-kernel@vger.kernel.org
+Q:	https://patchwork.kernel.org/project/linux-trace-kernel/list/
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/trace/linux-trace.git
 F:	Documentation/trace/*
-- 
GitLab


From c2beff99eb03866df6fdbd3a93b08fd27eb8bf5c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:35 -0800
Subject: [PATCH 2859/4122] xfs: add debug knob to slow down writeback for fun

Add a new error injection knob so that we can arbitrarily slow down
writeback to test for race conditions and aberrant reclaim behavior if
the writeback mechanisms are slow to issue writeback.  This will enable
functional testing for the ifork sequence counters introduced in commit
745b3f76d1c8 ("xfs: maintain a sequence count for inode fork
manipulations").

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_errortag.h |  4 +++-
 fs/xfs/xfs_aops.c            | 14 ++++++++++--
 fs/xfs/xfs_error.c           | 16 +++++++++++++
 fs/xfs/xfs_error.h           | 13 +++++++++++
 fs/xfs/xfs_trace.c           |  2 ++
 fs/xfs/xfs_trace.h           | 44 ++++++++++++++++++++++++++++++++++++
 6 files changed, 90 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index 580ccbd5aadc2..f5f629174ecad 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -61,7 +61,8 @@
 #define XFS_ERRTAG_LARP					39
 #define XFS_ERRTAG_DA_LEAF_SPLIT			40
 #define XFS_ERRTAG_ATTR_LEAF_TO_NODE			41
-#define XFS_ERRTAG_MAX					42
+#define XFS_ERRTAG_WB_DELAY_MS				42
+#define XFS_ERRTAG_MAX					43
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -107,5 +108,6 @@
 #define XFS_RANDOM_LARP					1
 #define XFS_RANDOM_DA_LEAF_SPLIT			1
 #define XFS_RANDOM_ATTR_LEAF_TO_NODE			1
+#define XFS_RANDOM_WB_DELAY_MS				3000
 
 #endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a22d90af40c85..41734202796f4 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -17,6 +17,8 @@
 #include "xfs_bmap.h"
 #include "xfs_bmap_util.h"
 #include "xfs_reflink.h"
+#include "xfs_errortag.h"
+#include "xfs_error.h"
 
 struct xfs_writepage_ctx {
 	struct iomap_writepage_ctx ctx;
@@ -217,11 +219,17 @@ xfs_imap_valid(
 	 * checked (and found nothing at this offset) could have added
 	 * overlapping blocks.
 	 */
-	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
+	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
+		trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
+				XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
 		return false;
+	}
 	if (xfs_inode_has_cow_data(ip) &&
-	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
+	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
+		trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
+				XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
 		return false;
+	}
 	return true;
 }
 
@@ -285,6 +293,8 @@ xfs_map_blocks(
 	if (xfs_is_shutdown(mp))
 		return -EIO;
 
+	XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
+
 	/*
 	 * COW fork blocks can overlap data fork blocks even if the blocks
 	 * aren't shared.  COW I/O always takes precedent, so we must always
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index dea3c0649d2f7..2d6e3c718e03d 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -60,6 +60,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_LARP,
 	XFS_RANDOM_DA_LEAF_SPLIT,
 	XFS_RANDOM_ATTR_LEAF_TO_NODE,
+	XFS_RANDOM_WB_DELAY_MS,
 };
 
 struct xfs_errortag_attr {
@@ -175,6 +176,7 @@ XFS_ERRORTAG_ATTR_RW(ag_resv_fail, XFS_ERRTAG_AG_RESV_FAIL);
 XFS_ERRORTAG_ATTR_RW(larp,		XFS_ERRTAG_LARP);
 XFS_ERRORTAG_ATTR_RW(da_leaf_split,	XFS_ERRTAG_DA_LEAF_SPLIT);
 XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node,	XFS_ERRTAG_ATTR_LEAF_TO_NODE);
+XFS_ERRORTAG_ATTR_RW(wb_delay_ms,	XFS_ERRTAG_WB_DELAY_MS);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -218,6 +220,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(larp),
 	XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
 	XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
+	XFS_ERRORTAG_ATTR_LIST(wb_delay_ms),
 	NULL,
 };
 ATTRIBUTE_GROUPS(xfs_errortag);
@@ -267,6 +270,19 @@ xfs_errortag_valid(
 	return true;
 }
 
+bool
+xfs_errortag_enabled(
+	struct xfs_mount	*mp,
+	unsigned int		tag)
+{
+	if (!mp->m_errortag)
+		return false;
+	if (!xfs_errortag_valid(tag))
+		return false;
+
+	return mp->m_errortag[tag] != 0;
+}
+
 bool
 xfs_errortag_test(
 	struct xfs_mount	*mp,
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index 5191e9145e552..dbe6c37dc6975 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -45,6 +45,18 @@ extern bool xfs_errortag_test(struct xfs_mount *mp, const char *expression,
 		const char *file, int line, unsigned int error_tag);
 #define XFS_TEST_ERROR(expr, mp, tag)		\
 	((expr) || xfs_errortag_test((mp), #expr, __FILE__, __LINE__, (tag)))
+bool xfs_errortag_enabled(struct xfs_mount *mp, unsigned int tag);
+#define XFS_ERRORTAG_DELAY(mp, tag)		\
+	do { \
+		might_sleep(); \
+		if (!xfs_errortag_enabled((mp), (tag))) \
+			break; \
+		xfs_warn_ratelimited((mp), \
+"Injecting %ums delay at file %s, line %d, on filesystem \"%s\"", \
+				(mp)->m_errortag[(tag)], __FILE__, __LINE__, \
+				(mp)->m_super->s_id); \
+		mdelay((mp)->m_errortag[(tag)]); \
+	} while (0)
 
 extern int xfs_errortag_get(struct xfs_mount *mp, unsigned int error_tag);
 extern int xfs_errortag_set(struct xfs_mount *mp, unsigned int error_tag,
@@ -55,6 +67,7 @@ extern int xfs_errortag_clearall(struct xfs_mount *mp);
 #define xfs_errortag_init(mp)			(0)
 #define xfs_errortag_del(mp)
 #define XFS_TEST_ERROR(expr, mp, tag)		(expr)
+#define XFS_ERRORTAG_DELAY(mp, tag)		((void)0)
 #define xfs_errortag_set(mp, tag, val)		(ENOSYS)
 #define xfs_errortag_add(mp, tag)		(ENOSYS)
 #define xfs_errortag_clearall(mp)		(ENOSYS)
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index d269ef57ff016..8a5dc1538aa82 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -34,6 +34,8 @@
 #include "xfs_ag.h"
 #include "xfs_ag_resv.h"
 #include "xfs_error.h"
+#include <linux/iomap.h>
+#include "xfs_iomap.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 372d871bccc5e..c9ada9577a4a7 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3352,6 +3352,50 @@ DEFINE_EVENT(xfs_inode_irec_class, name, \
 	TP_PROTO(struct xfs_inode *ip, struct xfs_bmbt_irec *irec), \
 	TP_ARGS(ip, irec))
 
+/* inode iomap invalidation events */
+DECLARE_EVENT_CLASS(xfs_wb_invalid_class,
+	TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork),
+	TP_ARGS(ip, iomap, wpcseq, whichfork),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(u64, addr)
+		__field(loff_t, pos)
+		__field(u64, len)
+		__field(u16, type)
+		__field(u16, flags)
+		__field(u32, wpcseq)
+		__field(u32, forkseq)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->addr = iomap->addr;
+		__entry->pos = iomap->offset;
+		__entry->len = iomap->length;
+		__entry->type = iomap->type;
+		__entry->flags = iomap->flags;
+		__entry->wpcseq = wpcseq;
+		__entry->forkseq = READ_ONCE(xfs_ifork_ptr(ip, whichfork)->if_seq);
+	),
+	TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x wpcseq 0x%x forkseq 0x%x",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->pos,
+		  __entry->addr,
+		  __entry->len,
+		  __entry->type,
+		  __entry->flags,
+		  __entry->wpcseq,
+		  __entry->forkseq)
+);
+#define DEFINE_WB_INVALID_EVENT(name) \
+DEFINE_EVENT(xfs_wb_invalid_class, name, \
+	TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap, unsigned int wpcseq, int whichfork), \
+	TP_ARGS(ip, iomap, wpcseq, whichfork))
+DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid);
+DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid);
+
 /* refcount/reflink tracepoint definitions */
 
 /* reflink tracepoints */
-- 
GitLab


From 254e3459285cbf2174350bbc0051e475e1bc5196 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:36 -0800
Subject: [PATCH 2860/4122] xfs: add debug knob to slow down write for fun

Add a new error injection knob so that we can arbitrarily slow down
pagecache writes to test for race conditions and aberrant reclaim
behavior if the writeback mechanisms are slow to issue writeback.  This
will enable functional testing for the ifork sequence counters
introduced in commit 304a68b9c63b ("xfs: use iomap_valid method to
detect stale cached iomaps") that fixes write racing with reclaim
writeback.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_errortag.h |  4 +++-
 fs/xfs/xfs_error.c           |  3 +++
 fs/xfs/xfs_iomap.c           | 14 ++++++++++--
 fs/xfs/xfs_trace.h           | 42 ++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+), 3 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_errortag.h b/fs/xfs/libxfs/xfs_errortag.h
index f5f629174ecad..01a9e86b30379 100644
--- a/fs/xfs/libxfs/xfs_errortag.h
+++ b/fs/xfs/libxfs/xfs_errortag.h
@@ -62,7 +62,8 @@
 #define XFS_ERRTAG_DA_LEAF_SPLIT			40
 #define XFS_ERRTAG_ATTR_LEAF_TO_NODE			41
 #define XFS_ERRTAG_WB_DELAY_MS				42
-#define XFS_ERRTAG_MAX					43
+#define XFS_ERRTAG_WRITE_DELAY_MS			43
+#define XFS_ERRTAG_MAX					44
 
 /*
  * Random factors for above tags, 1 means always, 2 means 1/2 time, etc.
@@ -109,5 +110,6 @@
 #define XFS_RANDOM_DA_LEAF_SPLIT			1
 #define XFS_RANDOM_ATTR_LEAF_TO_NODE			1
 #define XFS_RANDOM_WB_DELAY_MS				3000
+#define XFS_RANDOM_WRITE_DELAY_MS			3000
 
 #endif /* __XFS_ERRORTAG_H_ */
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 2d6e3c718e03d..713341d246d18 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -61,6 +61,7 @@ static unsigned int xfs_errortag_random_default[] = {
 	XFS_RANDOM_DA_LEAF_SPLIT,
 	XFS_RANDOM_ATTR_LEAF_TO_NODE,
 	XFS_RANDOM_WB_DELAY_MS,
+	XFS_RANDOM_WRITE_DELAY_MS,
 };
 
 struct xfs_errortag_attr {
@@ -177,6 +178,7 @@ XFS_ERRORTAG_ATTR_RW(larp,		XFS_ERRTAG_LARP);
 XFS_ERRORTAG_ATTR_RW(da_leaf_split,	XFS_ERRTAG_DA_LEAF_SPLIT);
 XFS_ERRORTAG_ATTR_RW(attr_leaf_to_node,	XFS_ERRTAG_ATTR_LEAF_TO_NODE);
 XFS_ERRORTAG_ATTR_RW(wb_delay_ms,	XFS_ERRTAG_WB_DELAY_MS);
+XFS_ERRORTAG_ATTR_RW(write_delay_ms,	XFS_ERRTAG_WRITE_DELAY_MS);
 
 static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(noerror),
@@ -221,6 +223,7 @@ static struct attribute *xfs_errortag_attrs[] = {
 	XFS_ERRORTAG_ATTR_LIST(da_leaf_split),
 	XFS_ERRORTAG_ATTR_LIST(attr_leaf_to_node),
 	XFS_ERRORTAG_ATTR_LIST(wb_delay_ms),
+	XFS_ERRORTAG_ATTR_LIST(write_delay_ms),
 	NULL,
 };
 ATTRIBUTE_GROUPS(xfs_errortag);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1bdd7afc10108..1005f1e365454 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -27,6 +27,8 @@
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_reflink.h"
+#include "xfs_error.h"
+#include "xfs_errortag.h"
 
 #define XFS_ALLOC_ALIGN(mp, off) \
 	(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
@@ -71,8 +73,16 @@ xfs_iomap_valid(
 	struct inode		*inode,
 	const struct iomap	*iomap)
 {
-	return iomap->validity_cookie ==
-			xfs_iomap_inode_sequence(XFS_I(inode), iomap->flags);
+	struct xfs_inode	*ip = XFS_I(inode);
+
+	if (iomap->validity_cookie !=
+			xfs_iomap_inode_sequence(ip, iomap->flags)) {
+		trace_xfs_iomap_invalid(ip, iomap);
+		return false;
+	}
+
+	XFS_ERRORTAG_DELAY(ip->i_mount, XFS_ERRTAG_WRITE_DELAY_MS);
+	return true;
 }
 
 const struct iomap_page_ops xfs_iomap_page_ops = {
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index c9ada9577a4a7..421d1e504ac43 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -3396,6 +3396,48 @@ DEFINE_EVENT(xfs_wb_invalid_class, name, \
 DEFINE_WB_INVALID_EVENT(xfs_wb_cow_iomap_invalid);
 DEFINE_WB_INVALID_EVENT(xfs_wb_data_iomap_invalid);
 
+DECLARE_EVENT_CLASS(xfs_iomap_invalid_class,
+	TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap),
+	TP_ARGS(ip, iomap),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(xfs_ino_t, ino)
+		__field(u64, addr)
+		__field(loff_t, pos)
+		__field(u64, len)
+		__field(u64, validity_cookie)
+		__field(u64, inodeseq)
+		__field(u16, type)
+		__field(u16, flags)
+	),
+	TP_fast_assign(
+		__entry->dev = VFS_I(ip)->i_sb->s_dev;
+		__entry->ino = ip->i_ino;
+		__entry->addr = iomap->addr;
+		__entry->pos = iomap->offset;
+		__entry->len = iomap->length;
+		__entry->validity_cookie = iomap->validity_cookie;
+		__entry->type = iomap->type;
+		__entry->flags = iomap->flags;
+		__entry->inodeseq = xfs_iomap_inode_sequence(ip, iomap->flags);
+	),
+	TP_printk("dev %d:%d ino 0x%llx pos 0x%llx addr 0x%llx bytecount 0x%llx type 0x%x flags 0x%x validity_cookie 0x%llx inodeseq 0x%llx",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->ino,
+		  __entry->pos,
+		  __entry->addr,
+		  __entry->len,
+		  __entry->type,
+		  __entry->flags,
+		  __entry->validity_cookie,
+		  __entry->inodeseq)
+);
+#define DEFINE_IOMAP_INVALID_EVENT(name) \
+DEFINE_EVENT(xfs_iomap_invalid_class, name, \
+	TP_PROTO(struct xfs_inode *ip, const struct iomap *iomap), \
+	TP_ARGS(ip, iomap))
+DEFINE_IOMAP_INVALID_EVENT(xfs_iomap_invalid);
+
 /* refcount/reflink tracepoint definitions */
 
 /* reflink tracepoints */
-- 
GitLab


From 1ab30c610630da5391a373cddb8a065bf4c4bc01 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Tue, 22 Nov 2022 19:12:26 +0800
Subject: [PATCH 2861/4122] usb: roles: fix of node refcount leak in
 usb_role_switch_is_parent()

I got the following report while doing device(mt6370-tcpc) load
test with CONFIG_OF_UNITTEST and CONFIG_OF_DYNAMIC enabled:

  OF: ERROR: memory leak, expected refcount 1 instead of 2,
  of_node_get()/of_node_put() unbalanced - destroy cset entry:
  attach overlay node /i2c/pmic@34

The 'parent' returned by fwnode_get_parent() with refcount incremented.
it needs be put after using.

Fixes: 6fadd72943b8 ("usb: roles: get usb-role-switch from parent")
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221122111226.251588-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/roles/class.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/roles/class.c b/drivers/usb/roles/class.c
index dfaed7eee94fc..32e6d19f7011a 100644
--- a/drivers/usb/roles/class.c
+++ b/drivers/usb/roles/class.c
@@ -106,10 +106,13 @@ usb_role_switch_is_parent(struct fwnode_handle *fwnode)
 	struct fwnode_handle *parent = fwnode_get_parent(fwnode);
 	struct device *dev;
 
-	if (!parent || !fwnode_property_present(parent, "usb-role-switch"))
+	if (!fwnode_property_present(parent, "usb-role-switch")) {
+		fwnode_handle_put(parent);
 		return NULL;
+	}
 
 	dev = class_find_device_by_fwnode(role_class, parent);
+	fwnode_handle_put(parent);
 	return dev ? to_role_switch(dev) : ERR_PTR(-EPROBE_DEFER);
 }
 
-- 
GitLab


From e0dced9c7d4763fd97c86a13902d135f03cc42eb Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 23 Nov 2022 11:30:21 +0200
Subject: [PATCH 2862/4122] usb: typec: ucsi: Resume in separate work

It can take more than one second to check each connector
when the system is resumed. So if you have, say, eight
connectors, it may take eight seconds for ucsi_resume() to
finish. That's a bit too much.

This will modify ucsi_resume() so that it schedules a work
where the interface is actually resumed instead of checking
the connectors directly. The connections will also be
checked in separate tasks which are queued for each connector
separately.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216706
Fixes: 99f6d4361113 ("usb: typec: ucsi: Check the connection on resume")
Cc: <stable@vger.kernel.org>
Reported-by: Todd Brandt <todd.e.brandt@intel.com>
Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Link: https://lore.kernel.org/r/20221123093021.25981-1-heikki.krogerus@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/ucsi/ucsi.c | 17 +++++++++++++----
 drivers/usb/typec/ucsi/ucsi.h |  1 +
 2 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
index a7987fc764cc6..eabe519013e78 100644
--- a/drivers/usb/typec/ucsi/ucsi.c
+++ b/drivers/usb/typec/ucsi/ucsi.c
@@ -1270,8 +1270,9 @@ err:
 	return ret;
 }
 
-int ucsi_resume(struct ucsi *ucsi)
+static void ucsi_resume_work(struct work_struct *work)
 {
+	struct ucsi *ucsi = container_of(work, struct ucsi, resume_work);
 	struct ucsi_connector *con;
 	u64 command;
 	int ret;
@@ -1279,15 +1280,21 @@ int ucsi_resume(struct ucsi *ucsi)
 	/* Restore UCSI notification enable mask after system resume */
 	command = UCSI_SET_NOTIFICATION_ENABLE | ucsi->ntfy;
 	ret = ucsi_send_command(ucsi, command, NULL, 0);
-	if (ret < 0)
-		return ret;
+	if (ret < 0) {
+		dev_err(ucsi->dev, "failed to re-enable notifications (%d)\n", ret);
+		return;
+	}
 
 	for (con = ucsi->connector; con->port; con++) {
 		mutex_lock(&con->lock);
-		ucsi_check_connection(con);
+		ucsi_partner_task(con, ucsi_check_connection, 1, 0);
 		mutex_unlock(&con->lock);
 	}
+}
 
+int ucsi_resume(struct ucsi *ucsi)
+{
+	queue_work(system_long_wq, &ucsi->resume_work);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ucsi_resume);
@@ -1347,6 +1354,7 @@ struct ucsi *ucsi_create(struct device *dev, const struct ucsi_operations *ops)
 	if (!ucsi)
 		return ERR_PTR(-ENOMEM);
 
+	INIT_WORK(&ucsi->resume_work, ucsi_resume_work);
 	INIT_DELAYED_WORK(&ucsi->work, ucsi_init_work);
 	mutex_init(&ucsi->ppm_lock);
 	ucsi->dev = dev;
@@ -1401,6 +1409,7 @@ void ucsi_unregister(struct ucsi *ucsi)
 
 	/* Make sure that we are not in the middle of driver initialization */
 	cancel_delayed_work_sync(&ucsi->work);
+	cancel_work_sync(&ucsi->resume_work);
 
 	/* Disable notifications */
 	ucsi->ops->async_write(ucsi, UCSI_CONTROL, &cmd, sizeof(cmd));
diff --git a/drivers/usb/typec/ucsi/ucsi.h b/drivers/usb/typec/ucsi/ucsi.h
index 8eb391e3e592c..c968474ee5473 100644
--- a/drivers/usb/typec/ucsi/ucsi.h
+++ b/drivers/usb/typec/ucsi/ucsi.h
@@ -287,6 +287,7 @@ struct ucsi {
 	struct ucsi_capability cap;
 	struct ucsi_connector *connector;
 
+	struct work_struct resume_work;
 	struct delayed_work work;
 	int work_count;
 #define UCSI_ROLE_SWITCH_RETRY_PER_HZ	10
-- 
GitLab


From 57b7b733b1a7aeab25bc2670afff608214284863 Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Date: Wed, 23 Nov 2022 12:07:46 +0100
Subject: [PATCH 2863/4122] usb: gadget: function: Simplify diagnostic
 messaging in printer

Don't issue messages which can be easily achieved with ftrace.
In case of printer_open() the return code is propagated to other layers
so the user will know about -EBUSY anyway.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Link: https://lore.kernel.org/r/20221123110746.59611-1-andrzej.p@collabora.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_printer.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/usb/gadget/function/f_printer.c b/drivers/usb/gadget/function/f_printer.c
index a881c69b1f2bf..4903d761a8724 100644
--- a/drivers/usb/gadget/function/f_printer.c
+++ b/drivers/usb/gadget/function/f_printer.c
@@ -364,7 +364,7 @@ printer_open(struct inode *inode, struct file *fd)
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	kref_get(&dev->kref);
-	DBG(dev, "printer_open returned %x\n", ret);
+
 	return ret;
 }
 
@@ -382,7 +382,6 @@ printer_close(struct inode *inode, struct file *fd)
 	spin_unlock_irqrestore(&dev->lock, flags);
 
 	kref_put(&dev->kref, printer_dev_free);
-	DBG(dev, "printer_close\n");
 
 	return 0;
 }
@@ -848,8 +847,6 @@ static void printer_reset_interface(struct printer_dev *dev)
 	if (dev->interface < 0)
 		return;
 
-	DBG(dev, "%s\n", __func__);
-
 	if (dev->in_ep->desc)
 		usb_ep_disable(dev->in_ep);
 
@@ -887,8 +884,6 @@ static void printer_soft_reset(struct printer_dev *dev)
 {
 	struct usb_request	*req;
 
-	INFO(dev, "Received Printer Reset Request\n");
-
 	if (usb_ep_disable(dev->in_ep))
 		DBG(dev, "Failed to disable USB in_ep\n");
 	if (usb_ep_disable(dev->out_ep))
@@ -1185,8 +1180,6 @@ static void printer_func_disable(struct usb_function *f)
 {
 	struct printer_dev *dev = func_to_printer(f);
 
-	DBG(dev, "%s\n", __func__);
-
 	printer_reset_interface(dev);
 }
 
-- 
GitLab


From 3c347cdafa3db43337870006e5c2d7b78a8dae20 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 25 Nov 2022 14:41:20 +0800
Subject: [PATCH 2864/4122] usb: core: hcd: Fix return value check in
 usb_hcd_setup_local_mem()

If dmam_alloc_attrs() fails, it returns NULL pointer and never
return ERR_PTR(), so repleace IS_ERR() with IS_ERR_OR_NULL()
and if it's NULL, returns -ENOMEM.

Fixes: 9ba26f5cecd8 ("ARM: sa1100/assabet: move dmabounce hack to ohci driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221125064120.2842452-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index faeaace0d197d..8300baedafd20 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -3133,8 +3133,12 @@ int usb_hcd_setup_local_mem(struct usb_hcd *hcd, phys_addr_t phys_addr,
 					     GFP_KERNEL,
 					     DMA_ATTR_WRITE_COMBINE);
 
-	if (IS_ERR(local_mem))
+	if (IS_ERR_OR_NULL(local_mem)) {
+		if (!local_mem)
+			return -ENOMEM;
+
 		return PTR_ERR(local_mem);
+	}
 
 	/*
 	 * Here we pass a dma_addr_t but the arg type is a phys_addr_t.
-- 
GitLab


From f05f80f217bf52443a2582bca19fd78188333f25 Mon Sep 17 00:00:00 2001
From: Shruthi Sanil <shruthi.sanil@intel.com>
Date: Fri, 25 Nov 2022 16:23:27 +0530
Subject: [PATCH 2865/4122] usb: dwc3: pci: Update PCIe device ID for USB3
 controller on CPU sub-system for Raptor Lake

The device ID 0xa70e is defined for the USB3 device controller in the CPU
sub-system of Raptor Lake platform. Hence updating the ID accordingly.

Fixes: bad0d1d726ac ("usb: dwc3: pci: Add support for Intel Raptor Lake")
Cc: stable <stable@kernel.org>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Shruthi Sanil <shruthi.sanil@intel.com>
Link: https://lore.kernel.org/r/20221125105327.27945-1-shruthi.sanil@intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/dwc3-pci.c b/drivers/usb/dwc3/dwc3-pci.c
index fb14511b1e10f..89c9ab2b19f85 100644
--- a/drivers/usb/dwc3/dwc3-pci.c
+++ b/drivers/usb/dwc3/dwc3-pci.c
@@ -45,7 +45,7 @@
 #define PCI_DEVICE_ID_INTEL_ADLN		0x465e
 #define PCI_DEVICE_ID_INTEL_ADLN_PCH		0x54ee
 #define PCI_DEVICE_ID_INTEL_ADLS		0x7ae1
-#define PCI_DEVICE_ID_INTEL_RPL			0x460e
+#define PCI_DEVICE_ID_INTEL_RPL			0xa70e
 #define PCI_DEVICE_ID_INTEL_RPLS		0x7a61
 #define PCI_DEVICE_ID_INTEL_MTLP		0x7ec1
 #define PCI_DEVICE_ID_INTEL_MTL			0x7e7e
-- 
GitLab


From 01792c6036af577e4cb1aa7b9ffce7a4882c86b5 Mon Sep 17 00:00:00 2001
From: Xu Yang <xu.yang_2@nxp.com>
Date: Mon, 28 Nov 2022 16:13:06 +0800
Subject: [PATCH 2866/4122] usb: host: fix a typo in ehci.h

Change "ehci_hq" to "ehci_qh" in this comment.

Signed-off-by: Xu Yang <xu.yang_2@nxp.com>
Link: https://lore.kernel.org/r/20221128081306.2772729-1-xu.yang_2@nxp.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index ad3f13a3eaf1b..c5c7f87825493 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -471,7 +471,7 @@ struct ehci_iso_sched {
  * acts like a qh would, if EHCI had them for ISO.
  */
 struct ehci_iso_stream {
-	/* first field matches ehci_hq, but is NULL */
+	/* first field matches ehci_qh, but is NULL */
 	struct ehci_qh_hw	*hw;
 
 	u8			bEndpointAddress;
-- 
GitLab


From 27ef17849779edd5600aa27d1a246ad424761971 Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Mon, 28 Nov 2022 19:29:54 +0900
Subject: [PATCH 2867/4122] usb: add usb_set_intfdata() documentation

USB drivers do not need to call usb_set_intfdata(intf, NULL) in their
usb_driver::disconnect callback because the core already does it in [1].

However, this fact is widely unknown, c.f.:

  $ git grep "usb_set_intfdata(.*NULL)" | wc -l
  215

Especially, setting the interface to NULL before all action completed
can result in a NULL pointer dereference. Not calling
usb_set_intfdata() at all in disconnect() is the safest method.

Add documentation to usb_set_intfdata() to clarify this point.

Also remove the call in usb-skeletion's disconnect() not to confuse
the new comers.

[1] function usb_unbind_interface() from drivers/usb/core/driver.c
Link: https://elixir.bootlin.com/linux/v6.0/source/drivers/usb/core/driver.c#L497

Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/r/20221128102954.3615579-1-mailhol.vincent@wanadoo.fr
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/usb-skeleton.c |  1 -
 include/linux/usb.h        | 12 ++++++++++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index d87deee3e26e1..900a64ad25e4f 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -564,7 +564,6 @@ static void skel_disconnect(struct usb_interface *interface)
 	int minor = interface->minor;
 
 	dev = usb_get_intfdata(interface);
-	usb_set_intfdata(interface, NULL);
 
 	/* give back our minor */
 	usb_deregister_dev(interface, &skel_class);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 9ff1ad4dfad12..d4afeeec1e1af 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -265,6 +265,18 @@ static inline void *usb_get_intfdata(struct usb_interface *intf)
 	return dev_get_drvdata(&intf->dev);
 }
 
+/**
+ * usb_set_intfdata() - associate driver-specific data with the interface
+ * @intf: the usb interface
+ * @data: pointer to the device priv structure or %NULL
+ *
+ * Drivers should use this function in their probe() to associate their
+ * driver-specific data with the usb interface.
+ *
+ * When disconnecting, the core will take care of setting @intf back to %NULL,
+ * so no actions are needed on the driver side. The interface should not be set
+ * to %NULL before all actions completed (e.g. no outsanding URB remaining).
+ */
 static inline void usb_set_intfdata(struct usb_interface *intf, void *data)
 {
 	dev_set_drvdata(&intf->dev, data);
-- 
GitLab


From 03a88b0bafbe3f548729d970d8366f48718c9b19 Mon Sep 17 00:00:00 2001
From: Chunfeng Yun <chunfeng.yun@mediatek.com>
Date: Mon, 28 Nov 2022 14:33:37 +0800
Subject: [PATCH 2868/4122] usb: xhci-mtk: fix leakage of shared hcd when fail
 to set wakeup irq

Can not set the @shared_hcd to NULL before decrease the usage count
by usb_put_hcd(), this will cause the shared hcd not released.

Fixes: 04284eb74e0c ("usb: xhci-mtk: add support runtime PM")
Cc: <stable@vger.kernel.org>
Signed-off-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Link: https://lore.kernel.org/r/20221128063337.18124-1-chunfeng.yun@mediatek.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-mtk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/usb/host/xhci-mtk.c b/drivers/usb/host/xhci-mtk.c
index cff3c4aea0362..f7cbb08fc5068 100644
--- a/drivers/usb/host/xhci-mtk.c
+++ b/drivers/usb/host/xhci-mtk.c
@@ -646,7 +646,6 @@ static int xhci_mtk_probe(struct platform_device *pdev)
 
 dealloc_usb3_hcd:
 	usb_remove_hcd(xhci->shared_hcd);
-	xhci->shared_hcd = NULL;
 
 put_usb3_hcd:
 	usb_put_hcd(xhci->shared_hcd);
-- 
GitLab


From 032399819dd5f135e6ffe446c8e97ab54eec3464 Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Tue, 22 Nov 2022 22:05:36 +0000
Subject: [PATCH 2869/4122] usb: typec: Add partner PD object wrapper

Some port drivers may want to set a Type-C partner as a parent for a
USB Power Delivery object, but the Type-C partner struct isn't exposed
outside of the Type-C class driver. Add a wrapper to
usb_power_delivery_register() which sets the provided Type-C partner
as a parent to the USB PD object. This helps to avoid exposing the
Type-C partner's device struct unnecessarily.

Cc: Benson Leung <bleung@chromium.org>
Suggested-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Prashant Malani <pmalani@chromium.org>
Link: https://lore.kernel.org/r/20221122220538.2991775-2-pmalani@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/class.c | 19 +++++++++++++++++++
 include/linux/usb/typec.h |  4 ++++
 2 files changed, 23 insertions(+)

diff --git a/drivers/usb/typec/class.c b/drivers/usb/typec/class.c
index bd5e5dd704313..5897905cb4f08 100644
--- a/drivers/usb/typec/class.c
+++ b/drivers/usb/typec/class.c
@@ -821,6 +821,25 @@ void typec_partner_set_svdm_version(struct typec_partner *partner,
 }
 EXPORT_SYMBOL_GPL(typec_partner_set_svdm_version);
 
+/**
+ * typec_partner_usb_power_delivery_register - Register Type-C partner USB Power Delivery Support
+ * @partner: Type-C partner device.
+ * @desc: Description of the USB PD contract.
+ *
+ * This routine is a wrapper around usb_power_delivery_register(). It registers
+ * USB Power Delivery Capabilities for a Type-C partner device. Specifically,
+ * it sets the Type-C partner device as a parent for the resulting USB Power Delivery object.
+ *
+ * Returns handle to struct usb_power_delivery or ERR_PTR.
+ */
+struct usb_power_delivery *
+typec_partner_usb_power_delivery_register(struct typec_partner *partner,
+					  struct usb_power_delivery_desc *desc)
+{
+	return usb_power_delivery_register(&partner->dev, desc);
+}
+EXPORT_SYMBOL_GPL(typec_partner_usb_power_delivery_register);
+
 /**
  * typec_register_partner - Register a USB Type-C Partner
  * @port: The USB Type-C Port the partner is connected to
diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h
index 7751bedcae5dd..8fa781207970a 100644
--- a/include/linux/usb/typec.h
+++ b/include/linux/usb/typec.h
@@ -23,6 +23,7 @@ struct fwnode_handle;
 struct device;
 
 struct usb_power_delivery;
+struct usb_power_delivery_desc;
 
 enum typec_port_type {
 	TYPEC_PORT_SRC,
@@ -327,6 +328,9 @@ void typec_partner_set_svdm_version(struct typec_partner *partner,
 				    enum usb_pd_svdm_ver svdm_version);
 int typec_get_negotiated_svdm_version(struct typec_port *port);
 
+struct usb_power_delivery *typec_partner_usb_power_delivery_register(struct typec_partner *partner,
+							struct usb_power_delivery_desc *desc);
+
 int typec_port_set_usb_power_delivery(struct typec_port *port, struct usb_power_delivery *pd);
 int typec_partner_set_usb_power_delivery(struct typec_partner *partner,
 					 struct usb_power_delivery *pd);
-- 
GitLab


From ab3593eeef606816bcc28b12690c51379c3d12eb Mon Sep 17 00:00:00 2001
From: Prashant Malani <pmalani@chromium.org>
Date: Tue, 22 Nov 2022 22:05:37 +0000
Subject: [PATCH 2870/4122] platform/chrome: cros_ec_typec: Set parent of
 partner PD object

In order to tell what Type-C device a PD object belongs to, its parent
needs to be set. Use the Type-C partner USB PD registration wrapper
to set the parent appropriately for PD objects which are created for
connected Type-C partners.

Cc: Benson Leung <bleung@chromium.org>
Cc: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Prashant Malani <pmalani@chromium.org>
Link: https://lore.kernel.org/r/20221122220538.2991775-3-pmalani@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/platform/chrome/cros_ec_typec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/platform/chrome/cros_ec_typec.c b/drivers/platform/chrome/cros_ec_typec.c
index 2a7ff14dc37e9..d5bc4021aca2f 100644
--- a/drivers/platform/chrome/cros_ec_typec.c
+++ b/drivers/platform/chrome/cros_ec_typec.c
@@ -968,7 +968,7 @@ static void cros_typec_register_partner_pdos(struct cros_typec_data *typec,
 	if (!resp->source_cap_count && !resp->sink_cap_count)
 		return;
 
-	port->partner_pd = usb_power_delivery_register(NULL, &desc);
+	port->partner_pd = typec_partner_usb_power_delivery_register(port->partner, &desc);
 	if (IS_ERR(port->partner_pd)) {
 		dev_warn(typec->dev, "Failed to register partner PD device, port: %d\n", port_num);
 		return;
-- 
GitLab


From 57f8e00d8a82073ab7893ab8ae4055580ef9552f Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Fri, 25 Nov 2022 10:55:06 +0200
Subject: [PATCH 2871/4122] usb: musb: Drop old unused am35x glue layer

The am35x glue layer is no longer in use and can be dropped. There are no
longer any SoCs passing platform data for it as they are booting using
devicetree.

In general, the am35x SoCs are similar to am335x and ti81xx and can
use the musb_dsps glue layer as long as there is a proper phy driver
available.

Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Link: https://lore.kernel.org/r/20221125085506.38127-1-tony@atomide.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/Kconfig  |   5 -
 drivers/usb/musb/Makefile |   1 -
 drivers/usb/musb/am35x.c  | 610 --------------------------------------
 3 files changed, 616 deletions(-)
 delete mode 100644 drivers/usb/musb/am35x.c

diff --git a/drivers/usb/musb/Kconfig b/drivers/usb/musb/Kconfig
index 290df4d5d5ce8..3a1f4bcea80c0 100644
--- a/drivers/usb/musb/Kconfig
+++ b/drivers/usb/musb/Kconfig
@@ -88,11 +88,6 @@ config USB_MUSB_OMAP2PLUS
 	depends on OMAP_CONTROL_PHY || !OMAP_CONTROL_PHY
 	select GENERIC_PHY
 
-config USB_MUSB_AM35X
-	tristate "AM35x"
-	depends on ARCH_OMAP
-	depends on NOP_USB_XCEIV
-
 config USB_MUSB_DSPS
 	tristate "TI DSPS platforms"
 	depends on ARCH_OMAP2PLUS || COMPILE_TEST
diff --git a/drivers/usb/musb/Makefile b/drivers/usb/musb/Makefile
index 44a9e27b2157a..5dccf0e453e1b 100644
--- a/drivers/usb/musb/Makefile
+++ b/drivers/usb/musb/Makefile
@@ -16,7 +16,6 @@ musb_hdrc-$(CONFIG_DEBUG_FS)			+= musb_debugfs.o
 
 # Hardware Glue Layer
 obj-$(CONFIG_USB_MUSB_OMAP2PLUS)		+= omap2430.o
-obj-$(CONFIG_USB_MUSB_AM35X)			+= am35x.o
 obj-$(CONFIG_USB_MUSB_DSPS)			+= musb_dsps.o
 obj-$(CONFIG_USB_MUSB_TUSB6010)			+= tusb6010.o
 obj-$(CONFIG_USB_MUSB_DA8XX)			+= da8xx.o
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
deleted file mode 100644
index bf2c0fa6cb320..0000000000000
--- a/drivers/usb/musb/am35x.c
+++ /dev/null
@@ -1,610 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-/*
- * Texas Instruments AM35x "glue layer"
- *
- * Copyright (c) 2010, by Texas Instruments
- *
- * Based on the DA8xx "glue layer" code.
- * Copyright (c) 2008-2009, MontaVista Software, Inc. <source@mvista.com>
- *
- * This file is part of the Inventra Controller Driver for Linux.
- */
-
-#include <linux/module.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/platform_device.h>
-#include <linux/dma-mapping.h>
-#include <linux/usb/usb_phy_generic.h>
-#include <linux/platform_data/usb-omap.h>
-
-#include "musb_core.h"
-
-/*
- * AM35x specific definitions
- */
-/* USB 2.0 OTG module registers */
-#define USB_REVISION_REG	0x00
-#define USB_CTRL_REG		0x04
-#define USB_STAT_REG		0x08
-#define USB_EMULATION_REG	0x0c
-/* 0x10 Reserved */
-#define USB_AUTOREQ_REG		0x14
-#define USB_SRP_FIX_TIME_REG	0x18
-#define USB_TEARDOWN_REG	0x1c
-#define EP_INTR_SRC_REG		0x20
-#define EP_INTR_SRC_SET_REG	0x24
-#define EP_INTR_SRC_CLEAR_REG	0x28
-#define EP_INTR_MASK_REG	0x2c
-#define EP_INTR_MASK_SET_REG	0x30
-#define EP_INTR_MASK_CLEAR_REG	0x34
-#define EP_INTR_SRC_MASKED_REG	0x38
-#define CORE_INTR_SRC_REG	0x40
-#define CORE_INTR_SRC_SET_REG	0x44
-#define CORE_INTR_SRC_CLEAR_REG	0x48
-#define CORE_INTR_MASK_REG	0x4c
-#define CORE_INTR_MASK_SET_REG	0x50
-#define CORE_INTR_MASK_CLEAR_REG 0x54
-#define CORE_INTR_SRC_MASKED_REG 0x58
-/* 0x5c Reserved */
-#define USB_END_OF_INTR_REG	0x60
-
-/* Control register bits */
-#define AM35X_SOFT_RESET_MASK	1
-
-/* USB interrupt register bits */
-#define AM35X_INTR_USB_SHIFT	16
-#define AM35X_INTR_USB_MASK	(0x1ff << AM35X_INTR_USB_SHIFT)
-#define AM35X_INTR_DRVVBUS	0x100
-#define AM35X_INTR_RX_SHIFT	16
-#define AM35X_INTR_TX_SHIFT	0
-#define AM35X_TX_EP_MASK	0xffff		/* EP0 + 15 Tx EPs */
-#define AM35X_RX_EP_MASK	0xfffe		/* 15 Rx EPs */
-#define AM35X_TX_INTR_MASK	(AM35X_TX_EP_MASK << AM35X_INTR_TX_SHIFT)
-#define AM35X_RX_INTR_MASK	(AM35X_RX_EP_MASK << AM35X_INTR_RX_SHIFT)
-
-#define USB_MENTOR_CORE_OFFSET	0x400
-
-struct am35x_glue {
-	struct device		*dev;
-	struct platform_device	*musb;
-	struct platform_device	*phy;
-	struct clk		*phy_clk;
-	struct clk		*clk;
-};
-
-/*
- * am35x_musb_enable - enable interrupts
- */
-static void am35x_musb_enable(struct musb *musb)
-{
-	void __iomem *reg_base = musb->ctrl_base;
-	u32 epmask;
-
-	/* Workaround: setup IRQs through both register sets. */
-	epmask = ((musb->epmask & AM35X_TX_EP_MASK) << AM35X_INTR_TX_SHIFT) |
-	       ((musb->epmask & AM35X_RX_EP_MASK) << AM35X_INTR_RX_SHIFT);
-
-	musb_writel(reg_base, EP_INTR_MASK_SET_REG, epmask);
-	musb_writel(reg_base, CORE_INTR_MASK_SET_REG, AM35X_INTR_USB_MASK);
-
-	/* Force the DRVVBUS IRQ so we can start polling for ID change. */
-	musb_writel(reg_base, CORE_INTR_SRC_SET_REG,
-			AM35X_INTR_DRVVBUS << AM35X_INTR_USB_SHIFT);
-}
-
-/*
- * am35x_musb_disable - disable HDRC and flush interrupts
- */
-static void am35x_musb_disable(struct musb *musb)
-{
-	void __iomem *reg_base = musb->ctrl_base;
-
-	musb_writel(reg_base, CORE_INTR_MASK_CLEAR_REG, AM35X_INTR_USB_MASK);
-	musb_writel(reg_base, EP_INTR_MASK_CLEAR_REG,
-			 AM35X_TX_INTR_MASK | AM35X_RX_INTR_MASK);
-	musb_writel(reg_base, USB_END_OF_INTR_REG, 0);
-}
-
-#define portstate(stmt)		stmt
-
-static void am35x_musb_set_vbus(struct musb *musb, int is_on)
-{
-	WARN_ON(is_on && is_peripheral_active(musb));
-}
-
-#define	POLL_SECONDS	2
-
-static void otg_timer(struct timer_list *t)
-{
-	struct musb		*musb = from_timer(musb, t, dev_timer);
-	void __iomem		*mregs = musb->mregs;
-	u8			devctl;
-	unsigned long		flags;
-
-	/*
-	 * We poll because AM35x's won't expose several OTG-critical
-	 * status change events (from the transceiver) otherwise.
-	 */
-	devctl = musb_readb(mregs, MUSB_DEVCTL);
-	dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
-		usb_otg_state_string(musb->xceiv->otg->state));
-
-	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->otg->state) {
-	case OTG_STATE_A_WAIT_BCON:
-		devctl &= ~MUSB_DEVCTL_SESSION;
-		musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
-
-		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			MUSB_DEV_MODE(musb);
-		} else {
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-			MUSB_HST_MODE(musb);
-		}
-		break;
-	case OTG_STATE_A_WAIT_VFALL:
-		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-		musb_writel(musb->ctrl_base, CORE_INTR_SRC_SET_REG,
-			    MUSB_INTR_VBUSERROR << AM35X_INTR_USB_SHIFT);
-		break;
-	case OTG_STATE_B_IDLE:
-		devctl = musb_readb(mregs, MUSB_DEVCTL);
-		if (devctl & MUSB_DEVCTL_BDEVICE)
-			mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-		else
-			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
-		break;
-	default:
-		break;
-	}
-	spin_unlock_irqrestore(&musb->lock, flags);
-}
-
-static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
-{
-	static unsigned long last_timer;
-
-	if (timeout == 0)
-		timeout = jiffies + msecs_to_jiffies(3);
-
-	/* Never idle if active, or when VBUS timeout is not set as host */
-	if (musb->is_active || (musb->a_wait_bcon == 0 &&
-				musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) {
-		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->otg->state));
-		del_timer(&musb->dev_timer);
-		last_timer = jiffies;
-		return;
-	}
-
-	if (time_after(last_timer, timeout) && timer_pending(&musb->dev_timer)) {
-		dev_dbg(musb->controller, "Longer idle timer already pending, ignoring...\n");
-		return;
-	}
-	last_timer = timeout;
-
-	dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n",
-		usb_otg_state_string(musb->xceiv->otg->state),
-		jiffies_to_msecs(timeout - jiffies));
-	mod_timer(&musb->dev_timer, timeout);
-}
-
-static irqreturn_t am35x_musb_interrupt(int irq, void *hci)
-{
-	struct musb  *musb = hci;
-	void __iomem *reg_base = musb->ctrl_base;
-	struct device *dev = musb->controller;
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-	unsigned long flags;
-	irqreturn_t ret = IRQ_NONE;
-	u32 epintr, usbintr;
-
-	spin_lock_irqsave(&musb->lock, flags);
-
-	/* Get endpoint interrupts */
-	epintr = musb_readl(reg_base, EP_INTR_SRC_MASKED_REG);
-
-	if (epintr) {
-		musb_writel(reg_base, EP_INTR_SRC_CLEAR_REG, epintr);
-
-		musb->int_rx =
-			(epintr & AM35X_RX_INTR_MASK) >> AM35X_INTR_RX_SHIFT;
-		musb->int_tx =
-			(epintr & AM35X_TX_INTR_MASK) >> AM35X_INTR_TX_SHIFT;
-	}
-
-	/* Get usb core interrupts */
-	usbintr = musb_readl(reg_base, CORE_INTR_SRC_MASKED_REG);
-	if (!usbintr && !epintr)
-		goto eoi;
-
-	if (usbintr) {
-		musb_writel(reg_base, CORE_INTR_SRC_CLEAR_REG, usbintr);
-
-		musb->int_usb =
-			(usbintr & AM35X_INTR_USB_MASK) >> AM35X_INTR_USB_SHIFT;
-	}
-	/*
-	 * DRVVBUS IRQs are the only proxy we have (a very poor one!) for
-	 * AM35x's missing ID change IRQ.  We need an ID change IRQ to
-	 * switch appropriately between halves of the OTG state machine.
-	 * Managing DEVCTL.SESSION per Mentor docs requires that we know its
-	 * value but DEVCTL.BDEVICE is invalid without DEVCTL.SESSION set.
-	 * Also, DRVVBUS pulses for SRP (but not at 5V) ...
-	 */
-	if (usbintr & (AM35X_INTR_DRVVBUS << AM35X_INTR_USB_SHIFT)) {
-		int drvvbus = musb_readl(reg_base, USB_STAT_REG);
-		void __iomem *mregs = musb->mregs;
-		u8 devctl = musb_readb(mregs, MUSB_DEVCTL);
-		int err;
-
-		err = musb->int_usb & MUSB_INTR_VBUSERROR;
-		if (err) {
-			/*
-			 * The Mentor core doesn't debounce VBUS as needed
-			 * to cope with device connect current spikes. This
-			 * means it's not uncommon for bus-powered devices
-			 * to get VBUS errors during enumeration.
-			 *
-			 * This is a workaround, but newer RTL from Mentor
-			 * seems to allow a better one: "re"-starting sessions
-			 * without waiting for VBUS to stop registering in
-			 * devctl.
-			 */
-			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
-			mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-			WARNING("VBUS error workaround (delay coming)\n");
-		} else if (drvvbus) {
-			MUSB_HST_MODE(musb);
-			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
-			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
-			del_timer(&musb->dev_timer);
-		} else {
-			musb->is_active = 0;
-			MUSB_DEV_MODE(musb);
-			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
-			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
-		}
-
-		/* NOTE: this must complete power-on within 100 ms. */
-		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
-				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->otg->state),
-				err ? " ERROR" : "",
-				devctl);
-		ret = IRQ_HANDLED;
-	}
-
-	/* Drop spurious RX and TX if device is disconnected */
-	if (musb->int_usb & MUSB_INTR_DISCONNECT) {
-		musb->int_tx = 0;
-		musb->int_rx = 0;
-	}
-
-	if (musb->int_tx || musb->int_rx || musb->int_usb)
-		ret |= musb_interrupt(musb);
-
-eoi:
-	/* EOI needs to be written for the IRQ to be re-asserted. */
-	if (ret == IRQ_HANDLED || epintr || usbintr) {
-		/* clear level interrupt */
-		if (data->clear_irq)
-			data->clear_irq();
-		/* write EOI */
-		musb_writel(reg_base, USB_END_OF_INTR_REG, 0);
-	}
-
-	/* Poll for ID change */
-	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE)
-		mod_timer(&musb->dev_timer, jiffies + POLL_SECONDS * HZ);
-
-	spin_unlock_irqrestore(&musb->lock, flags);
-
-	return ret;
-}
-
-static int am35x_musb_set_mode(struct musb *musb, u8 musb_mode)
-{
-	struct device *dev = musb->controller;
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-	int     retval = 0;
-
-	if (data->set_mode)
-		data->set_mode(musb_mode);
-	else
-		retval = -EIO;
-
-	return retval;
-}
-
-static int am35x_musb_init(struct musb *musb)
-{
-	struct device *dev = musb->controller;
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-	void __iomem *reg_base = musb->ctrl_base;
-	u32 rev;
-
-	musb->mregs += USB_MENTOR_CORE_OFFSET;
-
-	/* Returns zero if e.g. not clocked */
-	rev = musb_readl(reg_base, USB_REVISION_REG);
-	if (!rev)
-		return -ENODEV;
-
-	musb->xceiv = usb_get_phy(USB_PHY_TYPE_USB2);
-	if (IS_ERR_OR_NULL(musb->xceiv))
-		return -EPROBE_DEFER;
-
-	timer_setup(&musb->dev_timer, otg_timer, 0);
-
-	/* Reset the musb */
-	if (data->reset)
-		data->reset();
-
-	/* Reset the controller */
-	musb_writel(reg_base, USB_CTRL_REG, AM35X_SOFT_RESET_MASK);
-
-	/* Start the on-chip PHY and its PLL. */
-	if (data->set_phy_power)
-		data->set_phy_power(1);
-
-	msleep(5);
-
-	musb->isr = am35x_musb_interrupt;
-
-	/* clear level interrupt */
-	if (data->clear_irq)
-		data->clear_irq();
-
-	return 0;
-}
-
-static int am35x_musb_exit(struct musb *musb)
-{
-	struct device *dev = musb->controller;
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-
-	del_timer_sync(&musb->dev_timer);
-
-	/* Shutdown the on-chip PHY and its PLL. */
-	if (data->set_phy_power)
-		data->set_phy_power(0);
-
-	usb_put_phy(musb->xceiv);
-
-	return 0;
-}
-
-/* AM35x supports only 32bit read operation */
-static void am35x_read_fifo(struct musb_hw_ep *hw_ep, u16 len, u8 *dst)
-{
-	void __iomem *fifo = hw_ep->fifo;
-	u32		val;
-	int		i;
-
-	/* Read for 32bit-aligned destination address */
-	if (likely((0x03 & (unsigned long) dst) == 0) && len >= 4) {
-		readsl(fifo, dst, len >> 2);
-		dst += len & ~0x03;
-		len &= 0x03;
-	}
-	/*
-	 * Now read the remaining 1 to 3 byte or complete length if
-	 * unaligned address.
-	 */
-	if (len > 4) {
-		for (i = 0; i < (len >> 2); i++) {
-			*(u32 *) dst = musb_readl(fifo, 0);
-			dst += 4;
-		}
-		len &= 0x03;
-	}
-	if (len > 0) {
-		val = musb_readl(fifo, 0);
-		memcpy(dst, &val, len);
-	}
-}
-
-static const struct musb_platform_ops am35x_ops = {
-	.quirks		= MUSB_DMA_INVENTRA | MUSB_INDEXED_EP,
-	.init		= am35x_musb_init,
-	.exit		= am35x_musb_exit,
-
-	.read_fifo	= am35x_read_fifo,
-#ifdef CONFIG_USB_INVENTRA_DMA
-	.dma_init	= musbhs_dma_controller_create,
-	.dma_exit	= musbhs_dma_controller_destroy,
-#endif
-	.enable		= am35x_musb_enable,
-	.disable	= am35x_musb_disable,
-
-	.set_mode	= am35x_musb_set_mode,
-	.try_idle	= am35x_musb_try_idle,
-
-	.set_vbus	= am35x_musb_set_vbus,
-};
-
-static const struct platform_device_info am35x_dev_info = {
-	.name		= "musb-hdrc",
-	.id		= PLATFORM_DEVID_AUTO,
-	.dma_mask	= DMA_BIT_MASK(32),
-};
-
-static int am35x_probe(struct platform_device *pdev)
-{
-	struct musb_hdrc_platform_data	*pdata = dev_get_platdata(&pdev->dev);
-	struct platform_device		*musb;
-	struct am35x_glue		*glue;
-	struct platform_device_info	pinfo;
-	struct clk			*phy_clk;
-	struct clk			*clk;
-
-	int				ret = -ENOMEM;
-
-	glue = kzalloc(sizeof(*glue), GFP_KERNEL);
-	if (!glue)
-		goto err0;
-
-	phy_clk = clk_get(&pdev->dev, "fck");
-	if (IS_ERR(phy_clk)) {
-		dev_err(&pdev->dev, "failed to get PHY clock\n");
-		ret = PTR_ERR(phy_clk);
-		goto err3;
-	}
-
-	clk = clk_get(&pdev->dev, "ick");
-	if (IS_ERR(clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		ret = PTR_ERR(clk);
-		goto err4;
-	}
-
-	ret = clk_enable(phy_clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable PHY clock\n");
-		goto err5;
-	}
-
-	ret = clk_enable(clk);
-	if (ret) {
-		dev_err(&pdev->dev, "failed to enable clock\n");
-		goto err6;
-	}
-
-	glue->dev			= &pdev->dev;
-	glue->phy_clk			= phy_clk;
-	glue->clk			= clk;
-
-	pdata->platform_ops		= &am35x_ops;
-
-	glue->phy = usb_phy_generic_register();
-	if (IS_ERR(glue->phy)) {
-		ret = PTR_ERR(glue->phy);
-		goto err7;
-	}
-	platform_set_drvdata(pdev, glue);
-
-	pinfo = am35x_dev_info;
-	pinfo.parent = &pdev->dev;
-	pinfo.res = pdev->resource;
-	pinfo.num_res = pdev->num_resources;
-	pinfo.data = pdata;
-	pinfo.size_data = sizeof(*pdata);
-	pinfo.fwnode = of_fwnode_handle(pdev->dev.of_node);
-	pinfo.of_node_reused = true;
-
-	glue->musb = musb = platform_device_register_full(&pinfo);
-	if (IS_ERR(musb)) {
-		ret = PTR_ERR(musb);
-		dev_err(&pdev->dev, "failed to register musb device: %d\n", ret);
-		goto err8;
-	}
-
-	return 0;
-
-err8:
-	usb_phy_generic_unregister(glue->phy);
-
-err7:
-	clk_disable(clk);
-
-err6:
-	clk_disable(phy_clk);
-
-err5:
-	clk_put(clk);
-
-err4:
-	clk_put(phy_clk);
-
-err3:
-	kfree(glue);
-
-err0:
-	return ret;
-}
-
-static int am35x_remove(struct platform_device *pdev)
-{
-	struct am35x_glue	*glue = platform_get_drvdata(pdev);
-
-	platform_device_unregister(glue->musb);
-	usb_phy_generic_unregister(glue->phy);
-	clk_disable(glue->clk);
-	clk_disable(glue->phy_clk);
-	clk_put(glue->clk);
-	clk_put(glue->phy_clk);
-	kfree(glue);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int am35x_suspend(struct device *dev)
-{
-	struct am35x_glue	*glue = dev_get_drvdata(dev);
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-
-	/* Shutdown the on-chip PHY and its PLL. */
-	if (data->set_phy_power)
-		data->set_phy_power(0);
-
-	clk_disable(glue->phy_clk);
-	clk_disable(glue->clk);
-
-	return 0;
-}
-
-static int am35x_resume(struct device *dev)
-{
-	struct am35x_glue	*glue = dev_get_drvdata(dev);
-	struct musb_hdrc_platform_data *plat = dev_get_platdata(dev);
-	struct omap_musb_board_data *data = plat->board_data;
-	int			ret;
-
-	/* Start the on-chip PHY and its PLL. */
-	if (data->set_phy_power)
-		data->set_phy_power(1);
-
-	ret = clk_enable(glue->phy_clk);
-	if (ret) {
-		dev_err(dev, "failed to enable PHY clock\n");
-		return ret;
-	}
-
-	ret = clk_enable(glue->clk);
-	if (ret) {
-		dev_err(dev, "failed to enable clock\n");
-		return ret;
-	}
-
-	return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(am35x_pm_ops, am35x_suspend, am35x_resume);
-
-static struct platform_driver am35x_driver = {
-	.probe		= am35x_probe,
-	.remove		= am35x_remove,
-	.driver		= {
-		.name	= "musb-am35x",
-		.pm	= &am35x_pm_ops,
-	},
-};
-
-MODULE_DESCRIPTION("AM35x MUSB Glue Layer");
-MODULE_AUTHOR("Ajay Kumar Gupta <ajay.gupta@ti.com>");
-MODULE_LICENSE("GPL v2");
-module_platform_driver(am35x_driver);
-- 
GitLab


From ae423ef5d095e09970f52c08020fdbf7f9d87c22 Mon Sep 17 00:00:00 2001
From: Pawel Laszczak <pawell@cadence.com>
Date: Tue, 22 Nov 2022 03:51:38 -0500
Subject: [PATCH 2872/4122] usb: cdnsp: fix lack of ZLP for ep0

Patch implements the handling of ZLP for control transfer.
To send the ZLP driver must prepare the extra TRB in TD with
length set to zero and TRB type to TRB_NORMAL.
The first TRB must have set TRB_CHAIN flag, TD_SIZE = 1
and TRB type to TRB_DATA.

Fixes: 3d82904559f4 ("usb: cdnsp: cdns3 Add main part of Cadence USBSSP DRD Driver")
cc: <stable@vger.kernel.org>
Reviewed-by: Peter Chen <peter.chen@kernel.org>
Signed-off-by: Pawel Laszczak <pawell@cadence.com>
Link: https://lore.kernel.org/r/20221122085138.332434-1-pawell@cadence.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/cdns3/cdnsp-ring.c | 42 ++++++++++++++++++++++++++--------
 1 file changed, 32 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/cdns3/cdnsp-ring.c b/drivers/usb/cdns3/cdnsp-ring.c
index 2f29431f612e0..b23e543b3a3d5 100644
--- a/drivers/usb/cdns3/cdnsp-ring.c
+++ b/drivers/usb/cdns3/cdnsp-ring.c
@@ -2006,10 +2006,11 @@ int cdnsp_queue_bulk_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 
 int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 {
-	u32 field, length_field, remainder;
+	u32 field, length_field, zlp = 0;
 	struct cdnsp_ep *pep = preq->pep;
 	struct cdnsp_ring *ep_ring;
 	int num_trbs;
+	u32 maxp;
 	int ret;
 
 	ep_ring = cdnsp_request_to_transfer_ring(pdev, preq);
@@ -2019,26 +2020,33 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 	/* 1 TRB for data, 1 for status */
 	num_trbs = (pdev->three_stage_setup) ? 2 : 1;
 
+	maxp = usb_endpoint_maxp(pep->endpoint.desc);
+
+	if (preq->request.zero && preq->request.length &&
+	    (preq->request.length % maxp == 0)) {
+		num_trbs++;
+		zlp = 1;
+	}
+
 	ret = cdnsp_prepare_transfer(pdev, preq, num_trbs);
 	if (ret)
 		return ret;
 
 	/* If there's data, queue data TRBs */
-	if (pdev->ep0_expect_in)
-		field = TRB_TYPE(TRB_DATA) | TRB_IOC;
-	else
-		field = TRB_ISP | TRB_TYPE(TRB_DATA) | TRB_IOC;
-
 	if (preq->request.length > 0) {
-		remainder = cdnsp_td_remainder(pdev, 0, preq->request.length,
-					       preq->request.length, preq, 1, 0);
+		field = TRB_TYPE(TRB_DATA);
 
-		length_field = TRB_LEN(preq->request.length) |
-				TRB_TD_SIZE(remainder) | TRB_INTR_TARGET(0);
+		if (zlp)
+			field |= TRB_CHAIN;
+		else
+			field |= TRB_IOC | (pdev->ep0_expect_in ? 0 : TRB_ISP);
 
 		if (pdev->ep0_expect_in)
 			field |= TRB_DIR_IN;
 
+		length_field = TRB_LEN(preq->request.length) |
+			       TRB_TD_SIZE(zlp) | TRB_INTR_TARGET(0);
+
 		cdnsp_queue_trb(pdev, ep_ring, true,
 				lower_32_bits(preq->request.dma),
 				upper_32_bits(preq->request.dma), length_field,
@@ -2046,6 +2054,20 @@ int cdnsp_queue_ctrl_tx(struct cdnsp_device *pdev, struct cdnsp_request *preq)
 				TRB_SETUPID(pdev->setup_id) |
 				pdev->setup_speed);
 
+		if (zlp) {
+			field = TRB_TYPE(TRB_NORMAL) | TRB_IOC;
+
+			if (!pdev->ep0_expect_in)
+				field = TRB_ISP;
+
+			cdnsp_queue_trb(pdev, ep_ring, true,
+					lower_32_bits(preq->request.dma),
+					upper_32_bits(preq->request.dma), 0,
+					field | ep_ring->cycle_state |
+					TRB_SETUPID(pdev->setup_id) |
+					pdev->setup_speed);
+		}
+
 		pdev->ep0_stage = CDNSP_DATA_STAGE;
 	}
 
-- 
GitLab


From 22683e480b370ad1b3a34cfa461028d1f51da12d Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Sun, 27 Nov 2022 15:52:30 +0100
Subject: [PATCH 2873/4122] usb: misc: onboard_usb_hub: Drop obsolete
 dependency on COMPILE_TEST

Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it
is possible to test-build any driver which depends on OF on any
architecture by explicitly selecting OF. Therefore depending on
COMPILE_TEST as an alternative is no longer needed.

It is actually better to always build such drivers with OF enabled,
so that the test builds are closer to how each driver will actually be
built on its intended target. Building them without OF may not test
much as the compiler will optimize out potentially large parts of the
code. In the worst case, this could even pop false positive warnings.
Dropping COMPILE_TEST here improves the quality of our testing and
avoids wasting time on non-existent issues.

Cc: Matthias Kaehlcke <mka@chromium.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Link: https://lore.kernel.org/r/20221127155230.144886b7@endymion.delvare
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/misc/Kconfig b/drivers/usb/misc/Kconfig
index 9367c12c7e6f8..a5f7652db7da1 100644
--- a/drivers/usb/misc/Kconfig
+++ b/drivers/usb/misc/Kconfig
@@ -298,7 +298,7 @@ config BRCM_USB_PINMAP
 
 config USB_ONBOARD_HUB
 	tristate "Onboard USB hub support"
-	depends on OF || COMPILE_TEST
+	depends on OF
 	help
 	  Say Y here if you want to support discrete onboard USB hubs that
 	  don't require an additional control bus for initialization, but
-- 
GitLab


From 49b42475dd8a9fddbb2f3dc17ff6e4b115c80bfb Mon Sep 17 00:00:00 2001
From: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Date: Wed, 23 Nov 2022 21:55:27 +0800
Subject: [PATCH 2874/4122] dt-bindings: usb: mtu3: add compatible for mt8186

Add a new compatible for mt8186 SoC.

Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Link: https://lore.kernel.org/r/20221123135531.23221-2-allen-kh.cheng@mediatek.com
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml
index 80750b0f458a8..7168110e2f9de 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtu3.yaml
@@ -24,6 +24,7 @@ properties:
           - mediatek,mt2712-mtu3
           - mediatek,mt8173-mtu3
           - mediatek,mt8183-mtu3
+          - mediatek,mt8186-mtu3
           - mediatek,mt8188-mtu3
           - mediatek,mt8192-mtu3
           - mediatek,mt8195-mtu3
-- 
GitLab


From 89ff3dfac604614287ad5aad9370c3f984ea3f4b Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 22 Nov 2022 12:35:21 +0000
Subject: [PATCH 2875/4122] usb: gadget: f_hid: fix f_hidg lifetime vs cdev

The embedded struct cdev does not have its lifetime correctly tied to
the enclosing struct f_hidg, so there is a use-after-free if /dev/hidgN
is held open while the gadget is deleted.

This can readily be replicated with libusbgx's example programs (for
conciseness - operating directly via configfs is equivalent):

	gadget-hid
	exec 3<> /dev/hidg0
	gadget-vid-pid-remove
	exec 3<&-

Pull the existing device up in to struct f_hidg and make use of the
cdev_device_{add,del}() helpers.  This changes the lifetime of the
device object to match struct f_hidg, but note that it is still added
and deleted at the same time.

Fixes: 71adf1189469 ("USB: gadget: add HID gadget driver")
Tested-by: Lee Jones <lee@kernel.org>
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Reviewed-by: Lee Jones <lee@kernel.org>
Signed-off-by: John Keeping <john@metanate.com>
Link: https://lore.kernel.org/r/20221122123523.3068034-2-john@metanate.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 52 ++++++++++++++++-------------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index ca0a7d9eaa34e..8b8bbeaa27cb8 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -71,7 +71,7 @@ struct f_hidg {
 	wait_queue_head_t		write_queue;
 	struct usb_request		*req;
 
-	int				minor;
+	struct device			dev;
 	struct cdev			cdev;
 	struct usb_function		func;
 
@@ -84,6 +84,14 @@ static inline struct f_hidg *func_to_hidg(struct usb_function *f)
 	return container_of(f, struct f_hidg, func);
 }
 
+static void hidg_release(struct device *dev)
+{
+	struct f_hidg *hidg = container_of(dev, struct f_hidg, dev);
+
+	kfree(hidg->set_report_buf);
+	kfree(hidg);
+}
+
 /*-------------------------------------------------------------------------*/
 /*                           Static descriptors                            */
 
@@ -904,9 +912,7 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
 	struct usb_ep		*ep;
 	struct f_hidg		*hidg = func_to_hidg(f);
 	struct usb_string	*us;
-	struct device		*device;
 	int			status;
-	dev_t			dev;
 
 	/* maybe allocate device-global string IDs, and patch descriptors */
 	us = usb_gstrings_attach(c->cdev, ct_func_strings,
@@ -999,21 +1005,11 @@ static int hidg_bind(struct usb_configuration *c, struct usb_function *f)
 
 	/* create char device */
 	cdev_init(&hidg->cdev, &f_hidg_fops);
-	dev = MKDEV(major, hidg->minor);
-	status = cdev_add(&hidg->cdev, dev, 1);
+	status = cdev_device_add(&hidg->cdev, &hidg->dev);
 	if (status)
 		goto fail_free_descs;
 
-	device = device_create(hidg_class, NULL, dev, NULL,
-			       "%s%d", "hidg", hidg->minor);
-	if (IS_ERR(device)) {
-		status = PTR_ERR(device);
-		goto del;
-	}
-
 	return 0;
-del:
-	cdev_del(&hidg->cdev);
 fail_free_descs:
 	usb_free_all_descriptors(f);
 fail:
@@ -1244,9 +1240,7 @@ static void hidg_free(struct usb_function *f)
 
 	hidg = func_to_hidg(f);
 	opts = container_of(f->fi, struct f_hid_opts, func_inst);
-	kfree(hidg->report_desc);
-	kfree(hidg->set_report_buf);
-	kfree(hidg);
+	put_device(&hidg->dev);
 	mutex_lock(&opts->lock);
 	--opts->refcnt;
 	mutex_unlock(&opts->lock);
@@ -1256,8 +1250,7 @@ static void hidg_unbind(struct usb_configuration *c, struct usb_function *f)
 {
 	struct f_hidg *hidg = func_to_hidg(f);
 
-	device_destroy(hidg_class, MKDEV(major, hidg->minor));
-	cdev_del(&hidg->cdev);
+	cdev_device_del(&hidg->cdev, &hidg->dev);
 
 	usb_free_all_descriptors(f);
 }
@@ -1266,6 +1259,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 {
 	struct f_hidg *hidg;
 	struct f_hid_opts *opts;
+	int ret;
 
 	/* allocate and initialize one new instance */
 	hidg = kzalloc(sizeof(*hidg), GFP_KERNEL);
@@ -1277,17 +1271,27 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 	mutex_lock(&opts->lock);
 	++opts->refcnt;
 
-	hidg->minor = opts->minor;
+	device_initialize(&hidg->dev);
+	hidg->dev.release = hidg_release;
+	hidg->dev.class = hidg_class;
+	hidg->dev.devt = MKDEV(major, opts->minor);
+	ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor);
+	if (ret) {
+		--opts->refcnt;
+		mutex_unlock(&opts->lock);
+		return ERR_PTR(ret);
+	}
+
 	hidg->bInterfaceSubClass = opts->subclass;
 	hidg->bInterfaceProtocol = opts->protocol;
 	hidg->report_length = opts->report_length;
 	hidg->report_desc_length = opts->report_desc_length;
 	if (opts->report_desc) {
-		hidg->report_desc = kmemdup(opts->report_desc,
-					    opts->report_desc_length,
-					    GFP_KERNEL);
+		hidg->report_desc = devm_kmemdup(&hidg->dev, opts->report_desc,
+						 opts->report_desc_length,
+						 GFP_KERNEL);
 		if (!hidg->report_desc) {
-			kfree(hidg);
+			put_device(&hidg->dev);
 			mutex_unlock(&opts->lock);
 			return ERR_PTR(-ENOMEM);
 		}
-- 
GitLab


From 70a3288a7586526315105c699b687d78cd32559a Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 22 Nov 2022 12:35:22 +0000
Subject: [PATCH 2876/4122] usb: gadget: f_hid: fix refcount leak on error path

When failing to allocate report_desc, opts->refcnt has already been
incremented so it needs to be decremented to avoid leaving the options
structure permanently locked.

Fixes: 21a9476a7ba8 ("usb: gadget: hid: add configfs support")
Tested-by: Lee Jones <lee@kernel.org>
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Reviewed-by: Lee Jones <lee@kernel.org>
Signed-off-by: John Keeping <john@metanate.com>
Link: https://lore.kernel.org/r/20221122123523.3068034-3-john@metanate.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 8b8bbeaa27cb8..6be6009f911e1 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -1292,6 +1292,7 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 						 GFP_KERNEL);
 		if (!hidg->report_desc) {
 			put_device(&hidg->dev);
+			--opts->refcnt;
 			mutex_unlock(&opts->lock);
 			return ERR_PTR(-ENOMEM);
 		}
-- 
GitLab


From 944fe915d00d3cb1bacb1e77cabfb6dc82e6f8b8 Mon Sep 17 00:00:00 2001
From: John Keeping <john@metanate.com>
Date: Tue, 22 Nov 2022 12:35:23 +0000
Subject: [PATCH 2877/4122] usb: gadget: f_hid: tidy error handling in
 hidg_alloc

Unify error handling at the end of the function, reducing the risk of
missing something on one of the error paths.

Moving the increment of opts->refcnt later means there is no need to
decrement it on the error path and is safe as this is guarded by
opts->lock which is held for this entire section.

Tested-by: Lee Jones <lee@kernel.org>
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Reviewed-by: Lee Jones <lee@kernel.org>
Signed-off-by: John Keeping <john@metanate.com>
Link: https://lore.kernel.org/r/20221122123523.3068034-4-john@metanate.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_hid.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/usb/gadget/function/f_hid.c b/drivers/usb/gadget/function/f_hid.c
index 6be6009f911e1..a8da3b4a2855a 100644
--- a/drivers/usb/gadget/function/f_hid.c
+++ b/drivers/usb/gadget/function/f_hid.c
@@ -1269,18 +1269,14 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 	opts = container_of(fi, struct f_hid_opts, func_inst);
 
 	mutex_lock(&opts->lock);
-	++opts->refcnt;
 
 	device_initialize(&hidg->dev);
 	hidg->dev.release = hidg_release;
 	hidg->dev.class = hidg_class;
 	hidg->dev.devt = MKDEV(major, opts->minor);
 	ret = dev_set_name(&hidg->dev, "hidg%d", opts->minor);
-	if (ret) {
-		--opts->refcnt;
-		mutex_unlock(&opts->lock);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto err_unlock;
 
 	hidg->bInterfaceSubClass = opts->subclass;
 	hidg->bInterfaceProtocol = opts->protocol;
@@ -1291,14 +1287,13 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 						 opts->report_desc_length,
 						 GFP_KERNEL);
 		if (!hidg->report_desc) {
-			put_device(&hidg->dev);
-			--opts->refcnt;
-			mutex_unlock(&opts->lock);
-			return ERR_PTR(-ENOMEM);
+			ret = -ENOMEM;
+			goto err_put_device;
 		}
 	}
 	hidg->use_out_ep = !opts->no_out_endpoint;
 
+	++opts->refcnt;
 	mutex_unlock(&opts->lock);
 
 	hidg->func.name    = "hid";
@@ -1313,6 +1308,12 @@ static struct usb_function *hidg_alloc(struct usb_function_instance *fi)
 	hidg->qlen	   = 4;
 
 	return &hidg->func;
+
+err_put_device:
+	put_device(&hidg->dev);
+err_unlock:
+	mutex_unlock(&opts->lock);
+	return ERR_PTR(ret);
 }
 
 DECLARE_USB_FUNCTION_INIT(hid, hidg_alloc_inst, hidg_alloc);
-- 
GitLab


From a9efc04cfd05690e91279f41c2325c46335c43ef Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Nov 2022 16:48:58 +0200
Subject: [PATCH 2878/4122] i915: Move list_count() to list.h for broader use

Some of the existing users, and definitely will be new ones, want to
count existing nodes in the list. Provide a generic API for that by
moving code from i915 to list.h.

Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
Acked-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221123144901.40493-1-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 +------------
 include/linux/list.h                      | 15 +++++++++++++++
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 1f7188129cd1f..47734c4ebfa01 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -2004,17 +2004,6 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
 	}
 }
 
-static unsigned long list_count(struct list_head *list)
-{
-	struct list_head *pos;
-	unsigned long count = 0;
-
-	list_for_each(pos, list)
-		count++;
-
-	return count;
-}
-
 static unsigned long read_ul(void *p, size_t x)
 {
 	return *(unsigned long *)(p + x);
@@ -2189,7 +2178,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_lock_irqsave(&engine->sched_engine->lock, flags);
 	engine_dump_active_requests(engine, m);
 
-	drm_printf(m, "\tOn hold?: %lu\n",
+	drm_printf(m, "\tOn hold?: %zu\n",
 		   list_count(&engine->sched_engine->hold));
 	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
 
diff --git a/include/linux/list.h b/include/linux/list.h
index 61762054b4be9..632a298c7018c 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -655,6 +655,21 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     !list_is_head(pos, (head)); \
 	     pos = n, n = pos->prev)
 
+/**
+ * list_count - count nodes in the list
+ * @head:	the head for your list.
+ */
+static inline size_t list_count(struct list_head *head)
+{
+	struct list_head *pos;
+	size_t count = 0;
+
+	list_for_each(pos, head)
+		count++;
+
+	return count;
+}
+
 /**
  * list_entry_is_head - test if the entry points to the head of the list
  * @pos:	the type * to cursor
-- 
GitLab


From 33f00f41d963c86176dba2f9faff9b428a542e60 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Nov 2022 16:48:59 +0200
Subject: [PATCH 2879/4122] usb: gadget: hid: Convert to use list_count()

The list API now provides the list_count() to help with counting
existing nodes in the list. Utilise it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221123144901.40493-2-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/legacy/hid.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c
index 1187ee4f316ab..6196c3456e0be 100644
--- a/drivers/usb/gadget/legacy/hid.c
+++ b/drivers/usb/gadget/legacy/hid.c
@@ -133,14 +133,11 @@ static struct usb_configuration config_driver = {
 static int hid_bind(struct usb_composite_dev *cdev)
 {
 	struct usb_gadget *gadget = cdev->gadget;
-	struct list_head *tmp;
 	struct hidg_func_node *n = NULL, *m, *iter_n;
 	struct f_hid_opts *hid_opts;
-	int status, funcs = 0;
-
-	list_for_each(tmp, &hidg_func_list)
-		funcs++;
+	int status, funcs;
 
+	funcs = list_count(&hidg_func_list);
 	if (!funcs)
 		return -ENODEV;
 
-- 
GitLab


From c2d9d02f7bf3c641f9b8e6c9f5de1e564cdeca69 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Nov 2022 16:49:00 +0200
Subject: [PATCH 2880/4122] usb: gadget: udc: bcm63xx: Convert to use
 list_count()

The list API now provides the list_count() to help with counting
existing nodes in the list. Utilise it.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221123144901.40493-3-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/bcm63xx_udc.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
index 2cdb07905bdeb..0762e49e85f85 100644
--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
+++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
@@ -2172,7 +2172,6 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p)
 
 	for (ch_idx = 0; ch_idx < BCM63XX_NUM_IUDMA; ch_idx++) {
 		struct iudma_ch *iudma = &udc->iudma[ch_idx];
-		struct list_head *pos;
 
 		seq_printf(s, "IUDMA channel %d -- ", ch_idx);
 		switch (iudma_defaults[ch_idx].ep_type) {
@@ -2205,14 +2204,10 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p)
 		seq_printf(s, "  desc: %d/%d used", iudma->n_bds_used,
 			   iudma->n_bds);
 
-		if (iudma->bep) {
-			i = 0;
-			list_for_each(pos, &iudma->bep->queue)
-				i++;
-			seq_printf(s, "; %d queued\n", i);
-		} else {
+		if (iudma->bep)
+			seq_printf(s, "; %zu queued\n", list_count(&iudma->bep->queue));
+		else
 			seq_printf(s, "\n");
-		}
 
 		for (i = 0; i < iudma->n_bds; i++) {
 			struct bcm_enet_desc *d = &iudma->bd_ring[i];
-- 
GitLab


From b47ec9727f47d1dce4e8cbc9aef01c80b2332535 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 23 Nov 2022 16:49:01 +0200
Subject: [PATCH 2881/4122] xhci: Convert to use list_count()

The list API now provides the list_count() to help with counting
existing nodes in the list. Utilise it.

Acked-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Link: https://lore.kernel.org/r/20221123144901.40493-4-andriy.shevchenko@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ad81e9a508b14..817c31e3b0c8b 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2532,7 +2532,6 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	union xhci_trb *ep_trb;
 	int status = -EINPROGRESS;
 	struct xhci_ep_ctx *ep_ctx;
-	struct list_head *tmp;
 	u32 trb_comp_code;
 	int td_num = 0;
 	bool handling_skipped_tds = false;
@@ -2580,10 +2579,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	}
 
 	/* Count current td numbers if ep->skip is set */
-	if (ep->skip) {
-		list_for_each(tmp, &ep_ring->td_list)
-			td_num++;
-	}
+	if (ep->skip)
+		td_num += list_count(&ep_ring->td_list);
 
 	/* Look for common error cases */
 	switch (trb_comp_code) {
-- 
GitLab


From fb12940f51d96ead10f9c0fd578e69b8de10ca81 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 28 Nov 2022 18:35:39 +0100
Subject: [PATCH 2882/4122] driver core: fix up some missing class.devnode()
 conversions.

In commit ff62b8e6588f ("driver core: make struct class.devnode() take a
const *") the ->devnode callback changed the pointer to be const, but a
few instances of PowerPC drivers were not caught for some reason.

Fix this up by changing the pointers to be const.

Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *")
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Frederic Barrat <fbarrat@linux.ibm.com>
Cc: Andrew Donnellan <ajd@linux.ibm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linuxppc-dev@lists.ozlabs.org
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Link: https://lore.kernel.org/r/20221128173539.3112234-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/book3s/vas-api.c | 2 +-
 drivers/misc/cxl/file.c                 | 2 +-
 drivers/misc/ocxl/file.c                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/platforms/book3s/vas-api.c b/arch/powerpc/platforms/book3s/vas-api.c
index 40f5ae5e1238d..eb5bed3337503 100644
--- a/arch/powerpc/platforms/book3s/vas-api.c
+++ b/arch/powerpc/platforms/book3s/vas-api.c
@@ -53,7 +53,7 @@ struct coproc_instance {
 	struct vas_window *txwin;
 };
 
-static char *coproc_devnode(struct device *dev, umode_t *mode)
+static char *coproc_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "crypto/%s", dev_name(dev));
 }
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 3dbdce96fae0d..5878329b011a0 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -546,7 +546,7 @@ static const struct file_operations afu_master_fops = {
 };
 
 
-static char *cxl_devnode(struct device *dev, umode_t *mode)
+static char *cxl_devnode(const struct device *dev, umode_t *mode)
 {
 	if (cpu_has_feature(CPU_FTR_HVMODE) &&
 	    CXL_DEVT_IS_CARD(dev->devt)) {
diff --git a/drivers/misc/ocxl/file.c b/drivers/misc/ocxl/file.c
index d46dba2df5a10..d96be36405a03 100644
--- a/drivers/misc/ocxl/file.c
+++ b/drivers/misc/ocxl/file.c
@@ -581,7 +581,7 @@ void ocxl_file_unregister_afu(struct ocxl_afu *afu)
 	device_unregister(&info->dev);
 }
 
-static char *ocxl_devnode(struct device *dev, umode_t *mode)
+static char *ocxl_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "ocxl/%s", dev_name(dev));
 }
-- 
GitLab


From 2d6c66f5253e7d168a76048d18e1209c52f98a2b Mon Sep 17 00:00:00 2001
From: zhang songyi <zhang.songyi@zte.com.cn>
Date: Tue, 29 Nov 2022 15:54:07 +0800
Subject: [PATCH 2883/4122] RDMA/mlx4: Remove NULL check before dev_{put, hold}

The call netdev_{put, hold} of dev_{put, hold} will check NULL,
so there is no need to check before using dev_{put, hold}.

Fix the following coccicheck warnings:
/drivers/infiniband/hw/mlx4/main.c:1311:2-10: WARNING:
WARNING  NULL check before dev_{put, hold} functions is not needed.

/drivers/infiniband/hw/mlx4/main.c:148:2-10: WARNING:
WARNING  NULL check before dev_{put, hold} functions is not needed.

/drivers/infiniband/hw/mlx4/main.c:1959:3-11: WARNING:
WARNING  NULL check before dev_{put, hold} functions is not needed.

/drivers/infiniband/hw/mlx4/main.c:1962:3-10: WARNING:
WARNING  NULL check before dev_{put, hold} functions is not needed.

Signed-off-by: zhang songyi <zhang.songyi@zte.com.cn>
Link: https://lore.kernel.org/r/202211291554079687539@zte.com.cn
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx4/main.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index ba47874f90d38..dceebcd885bbd 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -144,8 +144,7 @@ static struct net_device *mlx4_ib_get_netdev(struct ib_device *device,
 			}
 		}
 	}
-	if (dev)
-		dev_hold(dev);
+	dev_hold(dev);
 
 	rcu_read_unlock();
 	return dev;
@@ -1307,8 +1306,7 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp,
 
 	spin_lock_bh(&mdev->iboe.lock);
 	ndev = mdev->iboe.netdevs[mqp->port - 1];
-	if (ndev)
-		dev_hold(ndev);
+	dev_hold(ndev);
 	spin_unlock_bh(&mdev->iboe.lock);
 
 	if (ndev) {
@@ -1955,11 +1953,9 @@ static int mlx4_ib_mcg_detach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid)
 	if (ge) {
 		spin_lock_bh(&mdev->iboe.lock);
 		ndev = ge->added ? mdev->iboe.netdevs[ge->port - 1] : NULL;
-		if (ndev)
-			dev_hold(ndev);
+		dev_hold(ndev);
 		spin_unlock_bh(&mdev->iboe.lock);
-		if (ndev)
-			dev_put(ndev);
+		dev_put(ndev);
 		list_del(&ge->list);
 		kfree(ge);
 	} else
-- 
GitLab


From b0284cd29a957e62d60c2886fd663be93c56f9c0 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 3 Nov 2022 18:10:34 -0700
Subject: [PATCH 2884/4122] mm: Do not enable PG_arch_2 for all 64-bit
 architectures

Commit 4beba9486abd ("mm: Add PG_arch_2 page flag") introduced a new
page flag for all 64-bit architectures. However, even if an architecture
is 64-bit, it may still have limited spare bits in the 'flags' member of
'struct page'. This may happen if an architecture enables SPARSEMEM
without SPARSEMEM_VMEMMAP as is the case with the newly added loongarch.
This architecture port needs 19 more bits for the sparsemem section
information and, while it is currently fine with PG_arch_2, adding any
more PG_arch_* flags will trigger build-time warnings.

Add a new CONFIG_ARCH_USES_PG_ARCH_X option which can be selected by
architectures that need more PG_arch_* flags beyond PG_arch_1. Select it
on arm64.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[pcc@google.com: fix build with CONFIG_ARM64_MTE disabled]
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Steven Price <steven.price@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-2-pcc@google.com
---
 arch/arm64/Kconfig             | 1 +
 fs/proc/page.c                 | 2 +-
 include/linux/page-flags.h     | 2 +-
 include/trace/events/mmflags.h | 8 ++++----
 mm/Kconfig                     | 8 ++++++++
 mm/huge_memory.c               | 2 +-
 6 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 505c8a1ccbe0c..cd93d07384256 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1965,6 +1965,7 @@ config ARM64_MTE
 	depends on ARM64_PAN
 	select ARCH_HAS_SUBPAGE_FAULTS
 	select ARCH_USES_HIGH_VMA_FLAGS
+	select ARCH_USES_PG_ARCH_X
 	help
 	  Memory Tagging (part of the ARMv8.5 Extensions) provides
 	  architectural support for run-time, always-on detection of
diff --git a/fs/proc/page.c b/fs/proc/page.c
index f2273b164535b..882525c8e94c5 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -219,7 +219,7 @@ u64 stable_page_flags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_PRIVATE_2,	PG_private_2);
 	u |= kpf_copy_bit(k, KPF_OWNER_PRIVATE,	PG_owner_priv_1);
 	u |= kpf_copy_bit(k, KPF_ARCH,		PG_arch_1);
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_ARCH_USES_PG_ARCH_X
 	u |= kpf_copy_bit(k, KPF_ARCH_2,	PG_arch_2);
 #endif
 
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0b0ae5084e60c..5dc7977edf9d5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -132,7 +132,7 @@ enum pageflags {
 	PG_young,
 	PG_idle,
 #endif
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_ARCH_USES_PG_ARCH_X
 	PG_arch_2,
 #endif
 #ifdef CONFIG_KASAN_HW_TAGS
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index e87cb2b80ed3c..d9f6d35fb1509 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -91,10 +91,10 @@
 #define IF_HAVE_PG_IDLE(flag,string)
 #endif
 
-#ifdef CONFIG_64BIT
-#define IF_HAVE_PG_ARCH_2(flag,string) ,{1UL << flag, string}
+#ifdef CONFIG_ARCH_USES_PG_ARCH_X
+#define IF_HAVE_PG_ARCH_X(flag,string) ,{1UL << flag, string}
 #else
-#define IF_HAVE_PG_ARCH_2(flag,string)
+#define IF_HAVE_PG_ARCH_X(flag,string)
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
@@ -130,7 +130,7 @@ IF_HAVE_PG_UNCACHED(PG_uncached,	"uncached"	)		\
 IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
 IF_HAVE_PG_IDLE(PG_idle,		"idle"		)		\
-IF_HAVE_PG_ARCH_2(PG_arch_2,		"arch_2"	)		\
+IF_HAVE_PG_ARCH_X(PG_arch_2,		"arch_2"	)		\
 IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
 
 #define show_page_flags(flags)						\
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b5052..807bd7192f519 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1005,6 +1005,14 @@ config ARCH_USES_HIGH_VMA_FLAGS
 config ARCH_HAS_PKEYS
 	bool
 
+config ARCH_USES_PG_ARCH_X
+	bool
+	help
+	  Enable the definition of PG_arch_x page flags with x > 1. Only
+	  suitable for 64-bit architectures with CONFIG_FLATMEM or
+	  CONFIG_SPARSEMEM_VMEMMAP enabled, otherwise there may not be
+	  enough room for additional bits in page->flags.
+
 config VM_EVENT_COUNTERS
 	default y
 	bool "Enable VM event counters for /proc/vmstat" if EXPERT
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 561a42567477d..76e7b973919ca 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2444,7 +2444,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_workingset) |
 			 (1L << PG_locked) |
 			 (1L << PG_unevictable) |
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_ARCH_USES_PG_ARCH_X
 			 (1L << PG_arch_2) |
 #endif
 			 (1L << PG_dirty) |
-- 
GitLab


From e059853d14ca4ed0f6a190d7109487918a22a976 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 3 Nov 2022 18:10:35 -0700
Subject: [PATCH 2885/4122] arm64: mte: Fix/clarify the PG_mte_tagged semantics

Currently the PG_mte_tagged page flag mostly means the page contains
valid tags and it should be set after the tags have been cleared or
restored. However, in mte_sync_tags() it is set before setting the tags
to avoid, in theory, a race with concurrent mprotect(PROT_MTE) for
shared pages. However, a concurrent mprotect(PROT_MTE) with a copy on
write in another thread can cause the new page to have stale tags.
Similarly, tag reading via ptrace() can read stale tags if the
PG_mte_tagged flag is set before actually clearing/restoring the tags.

Fix the PG_mte_tagged semantics so that it is only set after the tags
have been cleared or restored. This is safe for swap restoring into a
MAP_SHARED or CoW page since the core code takes the page lock. Add two
functions to test and set the PG_mte_tagged flag with acquire and
release semantics. The downside is that concurrent mprotect(PROT_MTE) on
a MAP_SHARED page may cause tag loss. This is already the case for KVM
guests if a VMM changes the page protection while the guest triggers a
user_mem_abort().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[pcc@google.com: fix build with CONFIG_ARM64_MTE disabled]
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Peter Collingbourne <pcc@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-3-pcc@google.com
---
 arch/arm64/include/asm/mte.h     | 30 ++++++++++++++++++++++++++++++
 arch/arm64/include/asm/pgtable.h |  2 +-
 arch/arm64/kernel/cpufeature.c   |  4 +++-
 arch/arm64/kernel/elfcore.c      |  2 +-
 arch/arm64/kernel/hibernate.c    |  2 +-
 arch/arm64/kernel/mte.c          | 17 +++++++++++------
 arch/arm64/kvm/guest.c           |  4 ++--
 arch/arm64/kvm/mmu.c             |  4 ++--
 arch/arm64/mm/copypage.c         |  5 +++--
 arch/arm64/mm/fault.c            |  2 +-
 arch/arm64/mm/mteswap.c          |  2 +-
 11 files changed, 56 insertions(+), 18 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 760c62f8e22f8..3f8199ba265a1 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -37,6 +37,29 @@ void mte_free_tag_storage(char *storage);
 /* track which pages have valid allocation tags */
 #define PG_mte_tagged	PG_arch_2
 
+static inline void set_page_mte_tagged(struct page *page)
+{
+	/*
+	 * Ensure that the tags written prior to this function are visible
+	 * before the page flags update.
+	 */
+	smp_wmb();
+	set_bit(PG_mte_tagged, &page->flags);
+}
+
+static inline bool page_mte_tagged(struct page *page)
+{
+	bool ret = test_bit(PG_mte_tagged, &page->flags);
+
+	/*
+	 * If the page is tagged, ensure ordering with a likely subsequent
+	 * read of the tags.
+	 */
+	if (ret)
+		smp_rmb();
+	return ret;
+}
+
 void mte_zero_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t old_pte, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
@@ -56,6 +79,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size);
 /* unused if !CONFIG_ARM64_MTE, silence the compiler */
 #define PG_mte_tagged	0
 
+static inline void set_page_mte_tagged(struct page *page)
+{
+}
+static inline bool page_mte_tagged(struct page *page)
+{
+	return false;
+}
 static inline void mte_zero_clear_page_tags(void *addr)
 {
 }
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 71a1af42f0e89..98b6384415216 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1050,7 +1050,7 @@ static inline void arch_swap_invalidate_area(int type)
 static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 {
 	if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
-		set_bit(PG_mte_tagged, &folio->flags);
+		set_page_mte_tagged(&folio->page);
 }
 
 #endif /* CONFIG_ARM64_MTE */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 6062454a90674..df11cfe61fcb0 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2050,8 +2050,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 	 * Clear the tags in the zero page. This needs to be done via the
 	 * linear map which has the Tagged attribute.
 	 */
-	if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags))
+	if (!page_mte_tagged(ZERO_PAGE(0))) {
 		mte_clear_page_tags(lm_alias(empty_zero_page));
+		set_page_mte_tagged(ZERO_PAGE(0));
+	}
 
 	kasan_init_hw_tags_cpu();
 }
diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 27ef7ad3ffd2e..353009d7f307f 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
 		 * Pages mapped in user space as !pte_access_permitted() (e.g.
 		 * PROT_EXEC only) may not have the PG_mte_tagged flag set.
 		 */
-		if (!test_bit(PG_mte_tagged, &page->flags)) {
+		if (!page_mte_tagged(page)) {
 			put_page(page);
 			dump_skip(cprm, MTE_PAGE_TAG_STORAGE);
 			continue;
diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c
index af5df48ba915b..788597a6b6a2f 100644
--- a/arch/arm64/kernel/hibernate.c
+++ b/arch/arm64/kernel/hibernate.c
@@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void)
 			if (!page)
 				continue;
 
-			if (!test_bit(PG_mte_tagged, &page->flags))
+			if (!page_mte_tagged(page))
 				continue;
 
 			ret = save_tags(page, pfn);
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 7467217c1eaf3..84a085d536f84 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -41,8 +41,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
 	if (check_swap && is_swap_pte(old_pte)) {
 		swp_entry_t entry = pte_to_swp_entry(old_pte);
 
-		if (!non_swap_entry(entry) && mte_restore_tags(entry, page))
+		if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) {
+			set_page_mte_tagged(page);
 			return;
+		}
 	}
 
 	if (!pte_is_tagged)
@@ -52,8 +54,10 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
 	 * Test PG_mte_tagged again in case it was racing with another
 	 * set_pte_at().
 	 */
-	if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+	if (!page_mte_tagged(page)) {
 		mte_clear_page_tags(page_address(page));
+		set_page_mte_tagged(page);
+	}
 }
 
 void mte_sync_tags(pte_t old_pte, pte_t pte)
@@ -69,9 +73,11 @@ void mte_sync_tags(pte_t old_pte, pte_t pte)
 
 	/* if PG_mte_tagged is set, tags have already been initialised */
 	for (i = 0; i < nr_pages; i++, page++) {
-		if (!test_bit(PG_mte_tagged, &page->flags))
+		if (!page_mte_tagged(page)) {
 			mte_sync_page_tags(page, old_pte, check_swap,
 					   pte_is_tagged);
+			set_page_mte_tagged(page);
+		}
 	}
 
 	/* ensure the tags are visible before the PTE is set */
@@ -96,8 +102,7 @@ int memcmp_pages(struct page *page1, struct page *page2)
 	 * pages is tagged, set_pte_at() may zero or change the tags of the
 	 * other page via mte_sync_tags().
 	 */
-	if (test_bit(PG_mte_tagged, &page1->flags) ||
-	    test_bit(PG_mte_tagged, &page2->flags))
+	if (page_mte_tagged(page1) || page_mte_tagged(page2))
 		return addr1 != addr2;
 
 	return ret;
@@ -454,7 +459,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr,
 			put_page(page);
 			break;
 		}
-		WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags));
+		WARN_ON_ONCE(!page_mte_tagged(page));
 
 		/* limit access to the end of the page */
 		offset = offset_in_page(addr);
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 2ff13a3f84796..817fdd1ab7783 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1059,7 +1059,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
 		maddr = page_address(page);
 
 		if (!write) {
-			if (test_bit(PG_mte_tagged, &page->flags))
+			if (page_mte_tagged(page))
 				num_tags = mte_copy_tags_to_user(tags, maddr,
 							MTE_GRANULES_PER_PAGE);
 			else
@@ -1076,7 +1076,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
 			 * completed fully
 			 */
 			if (num_tags == MTE_GRANULES_PER_PAGE)
-				set_bit(PG_mte_tagged, &page->flags);
+				set_page_mte_tagged(page);
 
 			kvm_release_pfn_dirty(pfn);
 		}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 60ee3d9f01f8c..2c3759f1f2c56 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1110,9 +1110,9 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
 		return -EFAULT;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		if (!test_bit(PG_mte_tagged, &page->flags)) {
+		if (!page_mte_tagged(page)) {
 			mte_clear_page_tags(page_address(page));
-			set_bit(PG_mte_tagged, &page->flags);
+			set_page_mte_tagged(page);
 		}
 	}
 
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 24913271e898c..731d8a35701ea 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -21,9 +21,10 @@ void copy_highpage(struct page *to, struct page *from)
 
 	copy_page(kto, kfrom);
 
-	if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) {
-		set_bit(PG_mte_tagged, &to->flags);
+	if (system_supports_mte() && page_mte_tagged(from)) {
+		page_kasan_tag_reset(to);
 		mte_copy_page_tags(kto, kfrom);
+		set_page_mte_tagged(to);
 	}
 }
 EXPORT_SYMBOL(copy_highpage);
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 5b391490e045b..629e886ceec40 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -934,5 +934,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 void tag_clear_highpage(struct page *page)
 {
 	mte_zero_clear_page_tags(page_address(page));
-	set_bit(PG_mte_tagged, &page->flags);
+	set_page_mte_tagged(page);
 }
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index bed803d8e1585..70f913205db99 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -24,7 +24,7 @@ int mte_save_tags(struct page *page)
 {
 	void *tag_storage, *ret;
 
-	if (!test_bit(PG_mte_tagged, &page->flags))
+	if (!page_mte_tagged(page))
 		return 0;
 
 	tag_storage = mte_allocate_tag_storage();
-- 
GitLab


From 2dbf12ae132cc78048615cfa19c9be64baaf0ced Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 3 Nov 2022 18:10:36 -0700
Subject: [PATCH 2886/4122] KVM: arm64: Simplify the sanitise_mte_tags() logic

Currently sanitise_mte_tags() checks if it's an online page before
attempting to sanitise the tags. Such detection should be done in the
caller via the VM_MTE_ALLOWED vma flag. Since kvm_set_spte_gfn() does
not have the vma, leave the page unmapped if not already tagged. Tag
initialisation will be done on a subsequent access fault in
user_mem_abort().

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
[pcc@google.com: fix the page initializer]
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Peter Collingbourne <pcc@google.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-4-pcc@google.com
---
 arch/arm64/kvm/mmu.c | 40 +++++++++++++++-------------------------
 1 file changed, 15 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 2c3759f1f2c56..e81bfb7306298 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1091,23 +1091,14 @@ static int get_vma_page_shift(struct vm_area_struct *vma, unsigned long hva)
  * - mmap_lock protects between a VM faulting a page in and the VMM performing
  *   an mprotect() to add VM_MTE
  */
-static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
-			     unsigned long size)
+static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
+			      unsigned long size)
 {
 	unsigned long i, nr_pages = size >> PAGE_SHIFT;
-	struct page *page;
+	struct page *page = pfn_to_page(pfn);
 
 	if (!kvm_has_mte(kvm))
-		return 0;
-
-	/*
-	 * pfn_to_online_page() is used to reject ZONE_DEVICE pages
-	 * that may not support tags.
-	 */
-	page = pfn_to_online_page(pfn);
-
-	if (!page)
-		return -EFAULT;
+		return;
 
 	for (i = 0; i < nr_pages; i++, page++) {
 		if (!page_mte_tagged(page)) {
@@ -1115,8 +1106,6 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
 			set_page_mte_tagged(page);
 		}
 	}
-
-	return 0;
 }
 
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
@@ -1127,7 +1116,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	bool write_fault, writable, force_pte = false;
 	bool exec_fault;
 	bool device = false;
-	bool shared;
 	unsigned long mmu_seq;
 	struct kvm *kvm = vcpu->kvm;
 	struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1177,8 +1165,6 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		vma_shift = get_vma_page_shift(vma, hva);
 	}
 
-	shared = (vma->vm_flags & VM_SHARED);
-
 	switch (vma_shift) {
 #ifndef __PAGETABLE_PMD_FOLDED
 	case PUD_SHIFT:
@@ -1299,12 +1285,13 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 
 	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
 		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
-		if (!shared)
-			ret = sanitise_mte_tags(kvm, pfn, vma_pagesize);
-		else
+		if ((vma->vm_flags & VM_MTE_ALLOWED) &&
+		    !(vma->vm_flags & VM_SHARED)) {
+			sanitise_mte_tags(kvm, pfn, vma_pagesize);
+		} else {
 			ret = -EFAULT;
-		if (ret)
 			goto out_unlock;
+		}
 	}
 
 	if (writable)
@@ -1526,15 +1513,18 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range)
 bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
 {
 	kvm_pfn_t pfn = pte_pfn(range->pte);
-	int ret;
 
 	if (!kvm->arch.mmu.pgt)
 		return false;
 
 	WARN_ON(range->end - range->start != 1);
 
-	ret = sanitise_mte_tags(kvm, pfn, PAGE_SIZE);
-	if (ret)
+	/*
+	 * If the page isn't tagged, defer to user_mem_abort() for sanitising
+	 * the MTE tags. The S2 pte should have been unmapped by
+	 * mmu_notifier_invalidate_range_end().
+	 */
+	if (kvm_has_mte(kvm) && !page_mte_tagged(pfn_to_page(pfn)))
 		return false;
 
 	/*
-- 
GitLab


From ef6458b1b6ca3fdb991ce4182e981a88d4c58c0f Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 3 Nov 2022 18:10:37 -0700
Subject: [PATCH 2887/4122] mm: Add PG_arch_3 page flag

As with PG_arch_2, this flag is only allowed on 64-bit architectures due
to the shortage of bits available. It will be used by the arm64 MTE code
in subsequent patches.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Steven Price <steven.price@arm.com>
[catalin.marinas@arm.com: added flag preserving in __split_huge_page_tail()]
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-5-pcc@google.com
---
 fs/proc/page.c                    | 1 +
 include/linux/kernel-page-flags.h | 1 +
 include/linux/page-flags.h        | 1 +
 include/trace/events/mmflags.h    | 1 +
 mm/huge_memory.c                  | 1 +
 5 files changed, 5 insertions(+)

diff --git a/fs/proc/page.c b/fs/proc/page.c
index 882525c8e94c5..6249c347809a9 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -221,6 +221,7 @@ u64 stable_page_flags(struct page *page)
 	u |= kpf_copy_bit(k, KPF_ARCH,		PG_arch_1);
 #ifdef CONFIG_ARCH_USES_PG_ARCH_X
 	u |= kpf_copy_bit(k, KPF_ARCH_2,	PG_arch_2);
+	u |= kpf_copy_bit(k, KPF_ARCH_3,	PG_arch_3);
 #endif
 
 	return u;
diff --git a/include/linux/kernel-page-flags.h b/include/linux/kernel-page-flags.h
index eee1877a354e5..859f4b0c1b2bb 100644
--- a/include/linux/kernel-page-flags.h
+++ b/include/linux/kernel-page-flags.h
@@ -18,5 +18,6 @@
 #define KPF_UNCACHED		39
 #define KPF_SOFTDIRTY		40
 #define KPF_ARCH_2		41
+#define KPF_ARCH_3		42
 
 #endif /* LINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 5dc7977edf9d5..c50ce2812f177 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -134,6 +134,7 @@ enum pageflags {
 #endif
 #ifdef CONFIG_ARCH_USES_PG_ARCH_X
 	PG_arch_2,
+	PG_arch_3,
 #endif
 #ifdef CONFIG_KASAN_HW_TAGS
 	PG_skip_kasan_poison,
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index d9f6d35fb1509..412b5a46374c0 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -131,6 +131,7 @@ IF_HAVE_PG_HWPOISON(PG_hwpoison,	"hwpoison"	)		\
 IF_HAVE_PG_IDLE(PG_young,		"young"		)		\
 IF_HAVE_PG_IDLE(PG_idle,		"idle"		)		\
 IF_HAVE_PG_ARCH_X(PG_arch_2,		"arch_2"	)		\
+IF_HAVE_PG_ARCH_X(PG_arch_3,		"arch_3"	)		\
 IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison")
 
 #define show_page_flags(flags)						\
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 76e7b973919ca..dfe72ea23c5f3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2446,6 +2446,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_unevictable) |
 #ifdef CONFIG_ARCH_USES_PG_ARCH_X
 			 (1L << PG_arch_2) |
+			 (1L << PG_arch_3) |
 #endif
 			 (1L << PG_dirty) |
 			 LRU_GEN_MASK | LRU_REFS_MASK));
-- 
GitLab


From d77e59a8fccde7fb5dd8c57594ed147b4291c970 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 3 Nov 2022 18:10:38 -0700
Subject: [PATCH 2888/4122] arm64: mte: Lock a page for MTE tag initialisation

Initialising the tags and setting PG_mte_tagged flag for a page can race
between multiple set_pte_at() on shared pages or setting the stage 2 pte
via user_mem_abort(). Introduce a new PG_mte_lock flag as PG_arch_3 and
set it before attempting page initialisation. Given that PG_mte_tagged
is never cleared for a page, consider setting this flag to mean page
unlocked and wait on this bit with acquire semantics if the page is
locked:

- try_page_mte_tagging() - lock the page for tagging, return true if it
  can be tagged, false if already tagged. No acquire semantics if it
  returns true (PG_mte_tagged not set) as there is no serialisation with
  a previous set_page_mte_tagged().

- set_page_mte_tagged() - set PG_mte_tagged with release semantics.

The two-bit locking is based on Peter Collingbourne's idea.

Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Peter Collingbourne <pcc@google.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-6-pcc@google.com
---
 arch/arm64/include/asm/mte.h     | 35 +++++++++++++++++++++++++++++++-
 arch/arm64/include/asm/pgtable.h |  4 ++--
 arch/arm64/kernel/cpufeature.c   |  2 +-
 arch/arm64/kernel/mte.c          | 12 +++--------
 arch/arm64/kvm/guest.c           | 16 +++++++++------
 arch/arm64/kvm/mmu.c             |  2 +-
 arch/arm64/mm/copypage.c         |  2 ++
 arch/arm64/mm/fault.c            |  2 ++
 arch/arm64/mm/mteswap.c          | 14 +++++--------
 9 files changed, 60 insertions(+), 29 deletions(-)

diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h
index 3f8199ba265a1..20dd06d70af5f 100644
--- a/arch/arm64/include/asm/mte.h
+++ b/arch/arm64/include/asm/mte.h
@@ -25,7 +25,7 @@ unsigned long mte_copy_tags_to_user(void __user *to, void *from,
 				    unsigned long n);
 int mte_save_tags(struct page *page);
 void mte_save_page_tags(const void *page_addr, void *tag_storage);
-bool mte_restore_tags(swp_entry_t entry, struct page *page);
+void mte_restore_tags(swp_entry_t entry, struct page *page);
 void mte_restore_page_tags(void *page_addr, const void *tag_storage);
 void mte_invalidate_tags(int type, pgoff_t offset);
 void mte_invalidate_tags_area(int type);
@@ -36,6 +36,8 @@ void mte_free_tag_storage(char *storage);
 
 /* track which pages have valid allocation tags */
 #define PG_mte_tagged	PG_arch_2
+/* simple lock to avoid multiple threads tagging the same page */
+#define PG_mte_lock	PG_arch_3
 
 static inline void set_page_mte_tagged(struct page *page)
 {
@@ -60,6 +62,33 @@ static inline bool page_mte_tagged(struct page *page)
 	return ret;
 }
 
+/*
+ * Lock the page for tagging and return 'true' if the page can be tagged,
+ * 'false' if already tagged. PG_mte_tagged is never cleared and therefore the
+ * locking only happens once for page initialisation.
+ *
+ * The page MTE lock state:
+ *
+ *   Locked:	PG_mte_lock && !PG_mte_tagged
+ *   Unlocked:	!PG_mte_lock || PG_mte_tagged
+ *
+ * Acquire semantics only if the page is tagged (returning 'false').
+ */
+static inline bool try_page_mte_tagging(struct page *page)
+{
+	if (!test_and_set_bit(PG_mte_lock, &page->flags))
+		return true;
+
+	/*
+	 * The tags are either being initialised or may have been initialised
+	 * already. Check if the PG_mte_tagged flag has been set or wait
+	 * otherwise.
+	 */
+	smp_cond_load_acquire(&page->flags, VAL & (1UL << PG_mte_tagged));
+
+	return false;
+}
+
 void mte_zero_clear_page_tags(void *addr);
 void mte_sync_tags(pte_t old_pte, pte_t pte);
 void mte_copy_page_tags(void *kto, const void *kfrom);
@@ -86,6 +115,10 @@ static inline bool page_mte_tagged(struct page *page)
 {
 	return false;
 }
+static inline bool try_page_mte_tagging(struct page *page)
+{
+	return false;
+}
 static inline void mte_zero_clear_page_tags(void *addr)
 {
 }
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 98b6384415216..8735ac1a1e326 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1049,8 +1049,8 @@ static inline void arch_swap_invalidate_area(int type)
 #define __HAVE_ARCH_SWAP_RESTORE
 static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio)
 {
-	if (system_supports_mte() && mte_restore_tags(entry, &folio->page))
-		set_page_mte_tagged(&folio->page);
+	if (system_supports_mte())
+		mte_restore_tags(entry, &folio->page);
 }
 
 #endif /* CONFIG_ARM64_MTE */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index df11cfe61fcb0..afb4ffd745c33 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -2050,7 +2050,7 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
 	 * Clear the tags in the zero page. This needs to be done via the
 	 * linear map which has the Tagged attribute.
 	 */
-	if (!page_mte_tagged(ZERO_PAGE(0))) {
+	if (try_page_mte_tagging(ZERO_PAGE(0))) {
 		mte_clear_page_tags(lm_alias(empty_zero_page));
 		set_page_mte_tagged(ZERO_PAGE(0));
 	}
diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c
index 84a085d536f84..f5bcb0dc62673 100644
--- a/arch/arm64/kernel/mte.c
+++ b/arch/arm64/kernel/mte.c
@@ -41,20 +41,14 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte,
 	if (check_swap && is_swap_pte(old_pte)) {
 		swp_entry_t entry = pte_to_swp_entry(old_pte);
 
-		if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) {
-			set_page_mte_tagged(page);
-			return;
-		}
+		if (!non_swap_entry(entry))
+			mte_restore_tags(entry, page);
 	}
 
 	if (!pte_is_tagged)
 		return;
 
-	/*
-	 * Test PG_mte_tagged again in case it was racing with another
-	 * set_pte_at().
-	 */
-	if (!page_mte_tagged(page)) {
+	if (try_page_mte_tagging(page)) {
 		mte_clear_page_tags(page_address(page));
 		set_page_mte_tagged(page);
 	}
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 817fdd1ab7783..5626ddb540ce3 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -1068,15 +1068,19 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm,
 					clear_user(tags, MTE_GRANULES_PER_PAGE);
 			kvm_release_pfn_clean(pfn);
 		} else {
+			/*
+			 * Only locking to serialise with a concurrent
+			 * set_pte_at() in the VMM but still overriding the
+			 * tags, hence ignoring the return value.
+			 */
+			try_page_mte_tagging(page);
 			num_tags = mte_copy_tags_from_user(maddr, tags,
 							MTE_GRANULES_PER_PAGE);
 
-			/*
-			 * Set the flag after checking the write
-			 * completed fully
-			 */
-			if (num_tags == MTE_GRANULES_PER_PAGE)
-				set_page_mte_tagged(page);
+			/* uaccess failed, don't leave stale tags */
+			if (num_tags != MTE_GRANULES_PER_PAGE)
+				mte_clear_page_tags(page);
+			set_page_mte_tagged(page);
 
 			kvm_release_pfn_dirty(pfn);
 		}
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index e81bfb7306298..fa2c85b93149f 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1101,7 +1101,7 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
 		return;
 
 	for (i = 0; i < nr_pages; i++, page++) {
-		if (!page_mte_tagged(page)) {
+		if (try_page_mte_tagging(page)) {
 			mte_clear_page_tags(page_address(page));
 			set_page_mte_tagged(page);
 		}
diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c
index 731d8a35701ea..8dd5a8fe64b4f 100644
--- a/arch/arm64/mm/copypage.c
+++ b/arch/arm64/mm/copypage.c
@@ -23,6 +23,8 @@ void copy_highpage(struct page *to, struct page *from)
 
 	if (system_supports_mte() && page_mte_tagged(from)) {
 		page_kasan_tag_reset(to);
+		/* It's a new page, shouldn't have been tagged yet */
+		WARN_ON_ONCE(!try_page_mte_tagging(to));
 		mte_copy_page_tags(kto, kfrom);
 		set_page_mte_tagged(to);
 	}
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 629e886ceec40..b8b299d1736af 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -933,6 +933,8 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma,
 
 void tag_clear_highpage(struct page *page)
 {
+	/* Newly allocated page, shouldn't have been tagged yet */
+	WARN_ON_ONCE(!try_page_mte_tagging(page));
 	mte_zero_clear_page_tags(page_address(page));
 	set_page_mte_tagged(page);
 }
diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c
index 70f913205db99..cd508ba80ab1b 100644
--- a/arch/arm64/mm/mteswap.c
+++ b/arch/arm64/mm/mteswap.c
@@ -46,21 +46,17 @@ int mte_save_tags(struct page *page)
 	return 0;
 }
 
-bool mte_restore_tags(swp_entry_t entry, struct page *page)
+void mte_restore_tags(swp_entry_t entry, struct page *page)
 {
 	void *tags = xa_load(&mte_pages, entry.val);
 
 	if (!tags)
-		return false;
+		return;
 
-	/*
-	 * Test PG_mte_tagged again in case it was racing with another
-	 * set_pte_at().
-	 */
-	if (!test_and_set_bit(PG_mte_tagged, &page->flags))
+	if (try_page_mte_tagging(page)) {
 		mte_restore_page_tags(page_address(page), tags);
-
-	return true;
+		set_page_mte_tagged(page);
+	}
 }
 
 void mte_invalidate_tags(int type, pgoff_t offset)
-- 
GitLab


From d89585fbb30869011b326ef26c94c3137d228df9 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 3 Nov 2022 18:10:39 -0700
Subject: [PATCH 2889/4122] KVM: arm64: unify the tests for VMAs in memslots
 when MTE is enabled

Previously we allowed creating a memslot containing a private mapping that
was not VM_MTE_ALLOWED, but would later reject KVM_RUN with -EFAULT. Now
we reject the memory region at memslot creation time.

Since this is a minor tweak to the ABI (a VMM that created one of
these memslots would fail later anyway), no VMM to my knowledge has
MTE support yet, and the hardware with the necessary features is not
generally available, we can probably make this ABI change at this point.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-7-pcc@google.com
---
 arch/arm64/kvm/mmu.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index fa2c85b93149f..9ff9a271cf01b 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1108,6 +1108,19 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
 	}
 }
 
+static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
+{
+	/*
+	 * VM_SHARED mappings are not allowed with MTE to avoid races
+	 * when updating the PG_mte_tagged page flag, see
+	 * sanitise_mte_tags for more details.
+	 */
+	if (vma->vm_flags & VM_SHARED)
+		return false;
+
+	return vma->vm_flags & VM_MTE_ALLOWED;
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -1284,9 +1297,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	}
 
 	if (fault_status != FSC_PERM && !device && kvm_has_mte(kvm)) {
-		/* Check the VMM hasn't introduced a new VM_SHARED VMA */
-		if ((vma->vm_flags & VM_MTE_ALLOWED) &&
-		    !(vma->vm_flags & VM_SHARED)) {
+		/* Check the VMM hasn't introduced a new disallowed VMA */
+		if (kvm_vma_mte_allowed(vma)) {
 			sanitise_mte_tags(kvm, pfn, vma_pagesize);
 		} else {
 			ret = -EFAULT;
@@ -1730,12 +1742,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		if (!vma)
 			break;
 
-		/*
-		 * VM_SHARED mappings are not allowed with MTE to avoid races
-		 * when updating the PG_mte_tagged page flag, see
-		 * sanitise_mte_tags for more details.
-		 */
-		if (kvm_has_mte(kvm) && vma->vm_flags & VM_SHARED) {
+		if (kvm_has_mte(kvm) && !kvm_vma_mte_allowed(vma)) {
 			ret = -EINVAL;
 			break;
 		}
-- 
GitLab


From c911f0d4687947915f04024aa01803247fcf7f1a Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 3 Nov 2022 18:10:40 -0700
Subject: [PATCH 2890/4122] KVM: arm64: permit all VM_MTE_ALLOWED mappings with
 MTE enabled

Certain VMMs such as crosvm have features (e.g. sandboxing) that depend
on being able to map guest memory as MAP_SHARED. The current restriction
on sharing MAP_SHARED pages with the guest is preventing the use of
those features with MTE. Now that the races between tasks concurrently
clearing tags on the same page have been fixed, remove this restriction.

Note that this is a relaxation of the ABI.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Steven Price <steven.price@arm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-8-pcc@google.com
---
 arch/arm64/kvm/mmu.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 9ff9a271cf01b..b9402d8b5a90c 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -1110,14 +1110,6 @@ static void sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn,
 
 static bool kvm_vma_mte_allowed(struct vm_area_struct *vma)
 {
-	/*
-	 * VM_SHARED mappings are not allowed with MTE to avoid races
-	 * when updating the PG_mte_tagged page flag, see
-	 * sanitise_mte_tags for more details.
-	 */
-	if (vma->vm_flags & VM_SHARED)
-		return false;
-
 	return vma->vm_flags & VM_MTE_ALLOWED;
 }
 
-- 
GitLab


From a4baf8d2639f24d4d31983ff67c01878e7a5393f Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <pcc@google.com>
Date: Thu, 3 Nov 2022 18:10:41 -0700
Subject: [PATCH 2891/4122] Documentation: document the ABI changes for
 KVM_CAP_ARM_MTE

Document both the restriction on VM_MTE_ALLOWED mappings and
the relaxation for shared mappings.

Signed-off-by: Peter Collingbourne <pcc@google.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221104011041.290951-9-pcc@google.com
---
 Documentation/virt/kvm/api.rst | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index eee9f857a986f..b55f80dadcfeb 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -7385,8 +7385,9 @@ hibernation of the host; however the VMM needs to manually save/restore the
 tags as appropriate if the VM is migrated.
 
 When this capability is enabled all memory in memslots must be mapped as
-not-shareable (no MAP_SHARED), attempts to create a memslot with a
-MAP_SHARED mmap will result in an -EINVAL return.
+``MAP_ANONYMOUS`` or with a RAM-based file mapping (``tmpfs``, ``memfd``),
+attempts to create a memslot with an invalid mmap will result in an
+-EINVAL return.
 
 When enabled the VMM may make use of the ``KVM_ARM_MTE_COPY_TAGS`` ioctl to
 perform a bulk copy of tags to/from the guest.
-- 
GitLab


From 62c73bfea048e66168df09da6d3e4510ecda40bb Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 28 Nov 2022 17:15:26 +0100
Subject: [PATCH 2892/4122] usb: dwc3: Fix race between dwc3_set_mode and
 __dwc3_set_mode

dwc->desired_dr_role is changed by dwc3_set_mode inside a spinlock but
then read by __dwc3_set_mode outside of that lock. This can lead to a
race condition when very quick successive role switch events happen:

CPU A
	dwc3_set_mode(DWC3_GCTL_PRTCAP_HOST) // first role switch event
		spin_lock_irqsave(&dwc->lock, flags);
		dwc->desired_dr_role = mode; // DWC3_GCTL_PRTCAP_HOST
		spin_unlock_irqrestore(&dwc->lock, flags);
		queue_work(system_freezable_wq, &dwc->drd_work);

CPU B
	__dwc3_set_mode
		// ....
		spin_lock_irqsave(&dwc->lock, flags);
		// desired_dr_role is DWC3_GCTL_PRTCAP_HOST
		dwc3_set_prtcap(dwc, dwc->desired_dr_role);
		spin_unlock_irqrestore(&dwc->lock, flags);

CPU A
	dwc3_set_mode(DWC3_GCTL_PRTCAP_DEVICE) // second event
		spin_lock_irqsave(&dwc->lock, flags);
		dwc->desired_dr_role = mode; // DWC3_GCTL_PRTCAP_DEVICE
		spin_unlock_irqrestore(&dwc->lock, flags);

CPU B (continues running __dwc3_set_mode)
	switch (dwc->desired_dr_role) { // DWC3_GCTL_PRTCAP_DEVICE
	// ....
	case DWC3_GCTL_PRTCAP_DEVICE:
		// ....
		ret = dwc3_gadget_init(dwc);

We then have DWC3_GCTL.DWC3_GCTL_PRTCAPDIR = DWC3_GCTL_PRTCAP_HOST and
dwc->current_dr_role = DWC3_GCTL_PRTCAP_HOST but initialized the
controller in device mode. It's also possible to get into a state
where both host and device are intialized at the same time.
Fix this race by creating a local copy of desired_dr_role inside
__dwc3_set_mode while holding dwc->lock.

Fixes: 41ce1456e1db ("usb: dwc3: core: make dwc3_set_mode() work properly")
Cc: stable <stable@kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Link: https://lore.kernel.org/r/20221128161526.79730-1-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index 1f348bc867c22..fc38a8b13efa6 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -122,21 +122,25 @@ static void __dwc3_set_mode(struct work_struct *work)
 	unsigned long flags;
 	int ret;
 	u32 reg;
+	u32 desired_dr_role;
 
 	mutex_lock(&dwc->mutex);
+	spin_lock_irqsave(&dwc->lock, flags);
+	desired_dr_role = dwc->desired_dr_role;
+	spin_unlock_irqrestore(&dwc->lock, flags);
 
 	pm_runtime_get_sync(dwc->dev);
 
 	if (dwc->current_dr_role == DWC3_GCTL_PRTCAP_OTG)
 		dwc3_otg_update(dwc, 0);
 
-	if (!dwc->desired_dr_role)
+	if (!desired_dr_role)
 		goto out;
 
-	if (dwc->desired_dr_role == dwc->current_dr_role)
+	if (desired_dr_role == dwc->current_dr_role)
 		goto out;
 
-	if (dwc->desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev)
+	if (desired_dr_role == DWC3_GCTL_PRTCAP_OTG && dwc->edev)
 		goto out;
 
 	switch (dwc->current_dr_role) {
@@ -164,7 +168,7 @@ static void __dwc3_set_mode(struct work_struct *work)
 	 */
 	if (dwc->current_dr_role && ((DWC3_IP_IS(DWC3) ||
 			DWC3_VER_IS_PRIOR(DWC31, 190A)) &&
-			dwc->desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) {
+			desired_dr_role != DWC3_GCTL_PRTCAP_OTG)) {
 		reg = dwc3_readl(dwc->regs, DWC3_GCTL);
 		reg |= DWC3_GCTL_CORESOFTRESET;
 		dwc3_writel(dwc->regs, DWC3_GCTL, reg);
@@ -184,11 +188,11 @@ static void __dwc3_set_mode(struct work_struct *work)
 
 	spin_lock_irqsave(&dwc->lock, flags);
 
-	dwc3_set_prtcap(dwc, dwc->desired_dr_role);
+	dwc3_set_prtcap(dwc, desired_dr_role);
 
 	spin_unlock_irqrestore(&dwc->lock, flags);
 
-	switch (dwc->desired_dr_role) {
+	switch (desired_dr_role) {
 	case DWC3_GCTL_PRTCAP_HOST:
 		ret = dwc3_host_init(dwc);
 		if (ret) {
-- 
GitLab


From 3b7c7478eda00945987d45f902bc3942c89243d3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 28 Nov 2022 21:00:54 +0200
Subject: [PATCH 2893/4122] gpiolib: Provide to_gpio_device() helper

Provide to_gpio_device() helper which can be utilized in the existing
and future code.

While at it, make sure it becomes no-op at compilation time.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpiolib.c | 2 +-
 drivers/gpio/gpiolib.h | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 2729f7ebab9d6..0058ee83989df 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -549,7 +549,7 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid);
 
 static void gpiodevice_release(struct device *dev)
 {
-	struct gpio_device *gdev = container_of(dev, struct gpio_device, dev);
+	struct gpio_device *gdev = to_gpio_device(dev);
 	unsigned long flags;
 
 	spin_lock_irqsave(&gpio_lock, flags);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index d900ecdbac46d..e443c1023a374 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -20,9 +20,9 @@
 
 /**
  * struct gpio_device - internal state container for GPIO devices
- * @id: numerical ID number for the GPIO chip
  * @dev: the GPIO device struct
  * @chrdev: character device for the GPIO device
+ * @id: numerical ID number for the GPIO chip
  * @mockdev: class device used by the deprecated sysfs interface (may be
  * NULL)
  * @owner: helps prevent removal of modules exporting active GPIOs
@@ -47,9 +47,9 @@
  * userspace.
  */
 struct gpio_device {
-	int			id;
 	struct device		dev;
 	struct cdev		chrdev;
+	int			id;
 	struct device		*mockdev;
 	struct module		*owner;
 	struct gpio_chip	*chip;
@@ -72,6 +72,11 @@ struct gpio_device {
 #endif
 };
 
+static inline struct gpio_device *to_gpio_device(struct device *dev)
+{
+	return container_of(dev, struct gpio_device, dev);
+}
+
 /* gpio suffixes used for ACPI and device tree lookup */
 static __maybe_unused const char * const gpio_suffixes[] = { "gpios", "gpio" };
 
-- 
GitLab


From 9ec1eb1bcceec735fb3c9255cdcdbcc2acf860a0 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 18 Nov 2022 21:15:02 +0000
Subject: [PATCH 2894/4122] KVM: selftests: Have perf_test_util signal when to
 stop vCPUs

Signal that a test run is complete through perf_test_args instead of
having tests open code a similar solution. Ensure that the field resets
to false at the beginning of a test run as the structure is reused
between test runs, eliminating a couple of bugs:

access_tracking_perf_test hangs indefinitely on a subsequent test run,
as 'done' remains true. The bug doesn't amount to much right now, as x86
supports a single guest mode. However, this is a precondition of
enabling the test for other architectures with >1 guest mode, like
arm64.

memslot_modification_stress_test has the exact opposite problem, where
subsequent test runs complete immediately as 'run_vcpus' remains false.

Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
[oliver: added commit message, preserve spin_wait_for_next_iteration()]
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118211503.4049023-2-oliver.upton@linux.dev
---
 tools/testing/selftests/kvm/access_tracking_perf_test.c   | 8 +-------
 tools/testing/selftests/kvm/include/perf_test_util.h      | 3 +++
 tools/testing/selftests/kvm/lib/perf_test_util.c          | 3 +++
 .../selftests/kvm/memslot_modification_stress_test.c      | 6 +-----
 4 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 76c583a07ea22..942370d573925 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -58,9 +58,6 @@ static enum {
 	ITERATION_MARK_IDLE,
 } iteration_work;
 
-/* Set to true when vCPU threads should exit. */
-static bool done;
-
 /* The iteration that was last completed by each vCPU. */
 static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
 
@@ -211,7 +208,7 @@ static bool spin_wait_for_next_iteration(int *current_iteration)
 	int last_iteration = *current_iteration;
 
 	do {
-		if (READ_ONCE(done))
+		if (READ_ONCE(perf_test_args.stop_vcpus))
 			return false;
 
 		*current_iteration = READ_ONCE(iteration);
@@ -321,9 +318,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	mark_memory_idle(vm, nr_vcpus);
 	access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
 
-	/* Set done to signal the vCPU threads to exit */
-	done = true;
-
 	perf_test_join_vcpu_threads(nr_vcpus);
 	perf_test_destroy_vm(vm);
 }
diff --git a/tools/testing/selftests/kvm/include/perf_test_util.h b/tools/testing/selftests/kvm/include/perf_test_util.h
index eaa88df0555a8..536d7c3c3f14e 100644
--- a/tools/testing/selftests/kvm/include/perf_test_util.h
+++ b/tools/testing/selftests/kvm/include/perf_test_util.h
@@ -40,6 +40,9 @@ struct perf_test_args {
 	/* Run vCPUs in L2 instead of L1, if the architecture supports it. */
 	bool nested;
 
+	/* Test is done, stop running vCPUs. */
+	bool stop_vcpus;
+
 	struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
 };
 
diff --git a/tools/testing/selftests/kvm/lib/perf_test_util.c b/tools/testing/selftests/kvm/lib/perf_test_util.c
index 9618b37c66f7e..ee3f499ccbd22 100644
--- a/tools/testing/selftests/kvm/lib/perf_test_util.c
+++ b/tools/testing/selftests/kvm/lib/perf_test_util.c
@@ -267,6 +267,7 @@ void perf_test_start_vcpu_threads(int nr_vcpus,
 
 	vcpu_thread_fn = vcpu_fn;
 	WRITE_ONCE(all_vcpu_threads_running, false);
+	WRITE_ONCE(perf_test_args.stop_vcpus, false);
 
 	for (i = 0; i < nr_vcpus; i++) {
 		struct vcpu_thread *vcpu = &vcpu_threads[i];
@@ -289,6 +290,8 @@ void perf_test_join_vcpu_threads(int nr_vcpus)
 {
 	int i;
 
+	WRITE_ONCE(perf_test_args.stop_vcpus, true);
+
 	for (i = 0; i < nr_vcpus; i++)
 		pthread_join(vcpu_threads[i].thread, NULL);
 }
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index bb1d17a1171bc..3a5e4518307c2 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -34,8 +34,6 @@
 static int nr_vcpus = 1;
 static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
 
-static bool run_vcpus = true;
-
 static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 {
 	struct kvm_vcpu *vcpu = vcpu_args->vcpu;
@@ -45,7 +43,7 @@ static void vcpu_worker(struct perf_test_vcpu_args *vcpu_args)
 	run = vcpu->run;
 
 	/* Let the guest access its memory until a stop signal is received */
-	while (READ_ONCE(run_vcpus)) {
+	while (!READ_ONCE(perf_test_args.stop_vcpus)) {
 		ret = _vcpu_run(vcpu);
 		TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
 
@@ -110,8 +108,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	add_remove_memslot(vm, p->memslot_modification_delay,
 			   p->nr_memslot_modifications);
 
-	run_vcpus = false;
-
 	perf_test_join_vcpu_threads(nr_vcpus);
 	pr_info("All vCPU threads joined\n");
 
-- 
GitLab


From 4568180411e0fb5613e217da1c693466e39b9c27 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Fri, 18 Nov 2022 21:15:03 +0000
Subject: [PATCH 2895/4122] KVM: selftests: Build access_tracking_perf_test for
 arm64

Does exactly what it says on the tin.

Reviewed-by: Gavin Shan <gshan@redhat.com>
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118211503.4049023-3-oliver.upton@linux.dev
---
 tools/testing/selftests/kvm/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 0172eb6cb6eee..4c0ff91a89644 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -156,6 +156,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/psci_test
 TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
 TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
 TEST_GEN_PROGS_aarch64 += demand_paging_test
 TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
-- 
GitLab


From 41555cc9e2e9778ddc7c0293a4a2e4995e332643 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 21 Oct 2022 17:00:30 +0100
Subject: [PATCH 2896/4122] RISC-V: enable sparsemem by default for defconfig

on an arch level, RISC-V defaults to FLATMEM. On PolarFire SoC, the
memory layout is almost always sparse, with a maximum of 1 GiB at
0x8000_0000 & a possible 16 GiB range at 0x10_0000_0000. The Icicle kit,
for example, has 2 GiB of DDR - so there's a big hole in the memory map
between the two gigs. Prior to v6.1-rc1, boot times from defconfig
builds were pretty bad on Icicle but enabling sparsemem would fix those
issues. As of v6.1-rc1, the Icicle kit no longer boots from defconfig
builds with the in-kernel devicetree. A change to the memory map
resulted in a futher "sparse-ification", producing a splat on boot:

	OF: fdt: Ignoring memory range 0x80000000 - 0x80200000
	Machine model: Microchip PolarFire-SoC Icicle Kit
	earlycon: ns16550a0 at MMIO32 0x0000000020100000 (options '115200n8')
	printk: bootconsole [ns16550a0] enabled
	printk: debug: skip boot console de-registration.
	efi: UEFI not found.
	Zone ranges:
	  DMA32    [mem 0x0000000080200000-0x00000000ffffffff]
	  Normal   [mem 0x0000000100000000-0x000000107fffffff]
	Movable zone start for each node
	Early memory node ranges
	  node   0: [mem 0x0000000080200000-0x00000000bfbfffff]
	  node   0: [mem 0x00000000bfc00000-0x00000000bfffffff]
	  node   0: [mem 0x0000001040000000-0x000000107fffffff]
	Initmem setup node 0 [mem 0x0000000080200000-0x000000107fffffff]
	Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map
	CPU: 0 PID: 0 Comm: swapper Not tainted 5.19.0-dirty #1
	Hardware name: Microchip PolarFire-SoC Icicle Kit (DT)
	Call Trace:
	[<ffffffff800057f0>] show_stack+0x30/0x3c
	[<ffffffff807d5802>] dump_stack_lvl+0x4a/0x66
	[<ffffffff807d5836>] dump_stack+0x18/0x20
	[<ffffffff807d1ae8>] panic+0x124/0x2c6
	[<ffffffff80814064>] free_area_init_core+0x0/0x11e
	[<ffffffff80813720>] free_area_init_node+0xc2/0xf6
	[<ffffffff8081331e>] free_area_init+0x222/0x260
	[<ffffffff808064d6>] misc_mem_init+0x62/0x9a
	[<ffffffff80803cb2>] setup_arch+0xb0/0xea
	[<ffffffff8080039a>] start_kernel+0x88/0x4ee
	---[ end Kernel panic - not syncing: Failed to allocate 1073741824 bytes for node 0 memory map ]---

With the aim of keeping defconfig builds booting on icicle, enable
SPARSEMEM_MANUAL.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221021160028.4042304-1-conor@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 05fd5fcf24f9b..daba5d7438620 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -38,6 +38,7 @@ CONFIG_KVM=m
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
+CONFIG_SPARSEMEM_MANUAL=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_NET=y
 CONFIG_PACKET=y
-- 
GitLab


From 4989764d8ed3d3d1024e4e831ff2affc40ee01d6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:24 -0400
Subject: [PATCH 2897/4122] iommu: Add IOMMU_CAP_ENFORCE_CACHE_COHERENCY

This queries if a domain linked to a device should expect to support
enforce_cache_coherency() so iommufd can negotiate the rules for when a
domain should be shared or not.

For iommufd a device that declares IOMMU_CAP_ENFORCE_CACHE_COHERENCY will
not be attached to a domain that does not support it.

Link: https://lore.kernel.org/r/1-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/amd/iommu.c   |  2 ++
 drivers/iommu/intel/iommu.c | 16 +++++++++++-----
 include/linux/iommu.h       |  5 +++++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 45299eb7e8e30..240c535e317cc 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2278,6 +2278,8 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
 		return false;
 	case IOMMU_CAP_PRE_BOOT_PROTECTION:
 		return amdr_ivrs_remap_support;
+	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+		return true;
 	default:
 		break;
 	}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index f298e51d5aa67..157c972741107 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4450,14 +4450,20 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain)
 
 static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
 {
-	if (cap == IOMMU_CAP_CACHE_COHERENCY)
+	struct device_domain_info *info = dev_iommu_priv_get(dev);
+
+	switch (cap) {
+	case IOMMU_CAP_CACHE_COHERENCY:
 		return true;
-	if (cap == IOMMU_CAP_INTR_REMAP)
+	case IOMMU_CAP_INTR_REMAP:
 		return irq_remapping_enabled == 1;
-	if (cap == IOMMU_CAP_PRE_BOOT_PROTECTION)
+	case IOMMU_CAP_PRE_BOOT_PROTECTION:
 		return dmar_platform_optin();
-
-	return false;
+	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
+		return ecap_sc_support(info->iommu->ecap);
+	default:
+		return false;
+	}
 }
 
 static struct iommu_device *intel_iommu_probe_device(struct device *dev)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 68d7d304cdb76..a09fd32d8cc27 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -124,6 +124,11 @@ enum iommu_cap {
 	IOMMU_CAP_NOEXEC,		/* IOMMU_NOEXEC flag */
 	IOMMU_CAP_PRE_BOOT_PROTECTION,	/* Firmware says it used the IOMMU for
 					   DMA protection and we should too */
+	/*
+	 * Per-device flag indicating if enforce_cache_coherency() will work on
+	 * this device.
+	 */
+	IOMMU_CAP_ENFORCE_CACHE_COHERENCY,
 };
 
 /* These are the possible reserved region types */
-- 
GitLab


From 89395ccedbc153fecbc29342fbb94a6dfadf24cd Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Tue, 29 Nov 2022 16:29:25 -0400
Subject: [PATCH 2898/4122] iommu: Add device-centric DMA ownership interfaces

These complement the group interfaces used by VFIO and are for use by
iommufd. The main difference is that multiple devices in the same group
can all share the ownership by passing the same ownership pointer.

Move the common code into shared functions.

Link: https://lore.kernel.org/r/2-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommu.c | 121 +++++++++++++++++++++++++++++++++---------
 include/linux/iommu.h |  12 +++++
 2 files changed, 107 insertions(+), 26 deletions(-)

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 6ca377f4fbf9e..d69ebba81bebd 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3108,41 +3108,49 @@ static int __iommu_group_alloc_blocking_domain(struct iommu_group *group)
 	return 0;
 }
 
+static int __iommu_take_dma_ownership(struct iommu_group *group, void *owner)
+{
+	int ret;
+
+	if ((group->domain && group->domain != group->default_domain) ||
+	    !xa_empty(&group->pasid_array))
+		return -EBUSY;
+
+	ret = __iommu_group_alloc_blocking_domain(group);
+	if (ret)
+		return ret;
+	ret = __iommu_group_set_domain(group, group->blocking_domain);
+	if (ret)
+		return ret;
+
+	group->owner = owner;
+	group->owner_cnt++;
+	return 0;
+}
+
 /**
  * iommu_group_claim_dma_owner() - Set DMA ownership of a group
  * @group: The group.
  * @owner: Caller specified pointer. Used for exclusive ownership.
  *
- * This is to support backward compatibility for vfio which manages
- * the dma ownership in iommu_group level. New invocations on this
- * interface should be prohibited.
+ * This is to support backward compatibility for vfio which manages the dma
+ * ownership in iommu_group level. New invocations on this interface should be
+ * prohibited. Only a single owner may exist for a group.
  */
 int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner)
 {
 	int ret = 0;
 
+	if (WARN_ON(!owner))
+		return -EINVAL;
+
 	mutex_lock(&group->mutex);
 	if (group->owner_cnt) {
 		ret = -EPERM;
 		goto unlock_out;
-	} else {
-		if ((group->domain && group->domain != group->default_domain) ||
-		    !xa_empty(&group->pasid_array)) {
-			ret = -EBUSY;
-			goto unlock_out;
-		}
-
-		ret = __iommu_group_alloc_blocking_domain(group);
-		if (ret)
-			goto unlock_out;
-
-		ret = __iommu_group_set_domain(group, group->blocking_domain);
-		if (ret)
-			goto unlock_out;
-		group->owner = owner;
 	}
 
-	group->owner_cnt++;
+	ret = __iommu_take_dma_ownership(group, owner);
 unlock_out:
 	mutex_unlock(&group->mutex);
 
@@ -3151,30 +3159,91 @@ unlock_out:
 EXPORT_SYMBOL_GPL(iommu_group_claim_dma_owner);
 
 /**
- * iommu_group_release_dma_owner() - Release DMA ownership of a group
- * @group: The group.
+ * iommu_device_claim_dma_owner() - Set DMA ownership of a device
+ * @dev: The device.
+ * @owner: Caller specified pointer. Used for exclusive ownership.
  *
- * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
+ * Claim the DMA ownership of a device. Multiple devices in the same group may
+ * concurrently claim ownership if they present the same owner value. Returns 0
+ * on success and error code on failure
  */
-void iommu_group_release_dma_owner(struct iommu_group *group)
+int iommu_device_claim_dma_owner(struct device *dev, void *owner)
 {
-	int ret;
+	struct iommu_group *group = iommu_group_get(dev);
+	int ret = 0;
+
+	if (!group)
+		return -ENODEV;
+	if (WARN_ON(!owner))
+		return -EINVAL;
 
 	mutex_lock(&group->mutex);
+	if (group->owner_cnt) {
+		if (group->owner != owner) {
+			ret = -EPERM;
+			goto unlock_out;
+		}
+		group->owner_cnt++;
+		goto unlock_out;
+	}
+
+	ret = __iommu_take_dma_ownership(group, owner);
+unlock_out:
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_device_claim_dma_owner);
+
+static void __iommu_release_dma_ownership(struct iommu_group *group)
+{
+	int ret;
+
 	if (WARN_ON(!group->owner_cnt || !group->owner ||
 		    !xa_empty(&group->pasid_array)))
-		goto unlock_out;
+		return;
 
 	group->owner_cnt = 0;
 	group->owner = NULL;
 	ret = __iommu_group_set_domain(group, group->default_domain);
 	WARN(ret, "iommu driver failed to attach the default domain");
+}
 
-unlock_out:
+/**
+ * iommu_group_release_dma_owner() - Release DMA ownership of a group
+ * @dev: The device
+ *
+ * Release the DMA ownership claimed by iommu_group_claim_dma_owner().
+ */
+void iommu_group_release_dma_owner(struct iommu_group *group)
+{
+	mutex_lock(&group->mutex);
+	__iommu_release_dma_ownership(group);
 	mutex_unlock(&group->mutex);
 }
 EXPORT_SYMBOL_GPL(iommu_group_release_dma_owner);
 
+/**
+ * iommu_device_release_dma_owner() - Release DMA ownership of a device
+ * @group: The device.
+ *
+ * Release the DMA ownership claimed by iommu_device_claim_dma_owner().
+ */
+void iommu_device_release_dma_owner(struct device *dev)
+{
+	struct iommu_group *group = iommu_group_get(dev);
+
+	mutex_lock(&group->mutex);
+	if (group->owner_cnt > 1)
+		group->owner_cnt--;
+	else
+		__iommu_release_dma_ownership(group);
+	mutex_unlock(&group->mutex);
+	iommu_group_put(group);
+}
+EXPORT_SYMBOL_GPL(iommu_device_release_dma_owner);
+
 /**
  * iommu_group_dma_owner_claimed() - Query group dma ownership status
  * @group: The group.
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index a09fd32d8cc27..1690c334e5163 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -707,6 +707,9 @@ int iommu_group_claim_dma_owner(struct iommu_group *group, void *owner);
 void iommu_group_release_dma_owner(struct iommu_group *group);
 bool iommu_group_dma_owner_claimed(struct iommu_group *group);
 
+int iommu_device_claim_dma_owner(struct device *dev, void *owner);
+void iommu_device_release_dma_owner(struct device *dev);
+
 struct iommu_domain *iommu_sva_domain_alloc(struct device *dev,
 					    struct mm_struct *mm);
 int iommu_attach_device_pasid(struct iommu_domain *domain,
@@ -1064,6 +1067,15 @@ static inline bool iommu_group_dma_owner_claimed(struct iommu_group *group)
 	return false;
 }
 
+static inline void iommu_device_release_dma_owner(struct device *dev)
+{
+}
+
+static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
+{
+	return -ENODEV;
+}
+
 static inline struct iommu_domain *
 iommu_sva_domain_alloc(struct device *dev, struct mm_struct *mm)
 {
-- 
GitLab


From 5fe937862c8426f24cd1dcbf7c22fb1a31069b4f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:26 -0400
Subject: [PATCH 2899/4122] interval-tree: Add a utility to iterate over spans
 in an interval tree

The span iterator travels over the indexes of the interval_tree, not the
nodes, and classifies spans of indexes as either 'used' or 'hole'.

'used' spans are fully covered by nodes in the tree and 'hole' spans have
no node intersecting the span.

This is done greedily such that spans are maximally sized and every
iteration step switches between used/hole.

As an example a trivial allocator can be written as:

	for (interval_tree_span_iter_first(&span, itree, 0, ULONG_MAX);
	     !interval_tree_span_iter_done(&span);
	     interval_tree_span_iter_next(&span))
		if (span.is_hole &&
		    span.last_hole - span.start_hole >= allocation_size - 1)
			return span.start_hole;

With all the tricky boundary conditions handled by the library code.

The following iommufd patches have several algorithms for its overlapping
node interval trees that are significantly simplified with this kind of
iteration primitive. As it seems generally useful, put it into lib/.

Link: https://lore.kernel.org/r/3-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .clang-format                 |   1 +
 include/linux/interval_tree.h |  58 +++++++++++++++
 lib/Kconfig                   |   4 ++
 lib/interval_tree.c           | 132 ++++++++++++++++++++++++++++++++++
 4 files changed, 195 insertions(+)

diff --git a/.clang-format b/.clang-format
index 1247d54f9e49f..96d07786dcfb4 100644
--- a/.clang-format
+++ b/.clang-format
@@ -440,6 +440,7 @@ ForEachMacros:
   - 'inet_lhash2_for_each_icsk'
   - 'inet_lhash2_for_each_icsk_continue'
   - 'inet_lhash2_for_each_icsk_rcu'
+  - 'interval_tree_for_each_span'
   - 'intlist__for_each_entry'
   - 'intlist__for_each_entry_safe'
   - 'kcore_copy__for_each_phdr'
diff --git a/include/linux/interval_tree.h b/include/linux/interval_tree.h
index 288c26f50732d..2b8026a399064 100644
--- a/include/linux/interval_tree.h
+++ b/include/linux/interval_tree.h
@@ -27,4 +27,62 @@ extern struct interval_tree_node *
 interval_tree_iter_next(struct interval_tree_node *node,
 			unsigned long start, unsigned long last);
 
+/**
+ * struct interval_tree_span_iter - Find used and unused spans.
+ * @start_hole: Start of an interval for a hole when is_hole == 1
+ * @last_hole: Inclusive end of an interval for a hole when is_hole == 1
+ * @start_used: Start of a used interval when is_hole == 0
+ * @last_used: Inclusive end of a used interval when is_hole == 0
+ * @is_hole: 0 == used, 1 == is_hole, -1 == done iteration
+ *
+ * This iterator travels over spans in an interval tree. It does not return
+ * nodes but classifies each span as either a hole, where no nodes intersect, or
+ * a used, which is fully covered by nodes. Each iteration step toggles between
+ * hole and used until the entire range is covered. The returned spans always
+ * fully cover the requested range.
+ *
+ * The iterator is greedy, it always returns the largest hole or used possible,
+ * consolidating all consecutive nodes.
+ *
+ * Use interval_tree_span_iter_done() to detect end of iteration.
+ */
+struct interval_tree_span_iter {
+	/* private: not for use by the caller */
+	struct interval_tree_node *nodes[2];
+	unsigned long first_index;
+	unsigned long last_index;
+
+	/* public: */
+	union {
+		unsigned long start_hole;
+		unsigned long start_used;
+	};
+	union {
+		unsigned long last_hole;
+		unsigned long last_used;
+	};
+	int is_hole;
+};
+
+void interval_tree_span_iter_first(struct interval_tree_span_iter *state,
+				   struct rb_root_cached *itree,
+				   unsigned long first_index,
+				   unsigned long last_index);
+void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
+				     struct rb_root_cached *itree,
+				     unsigned long new_index);
+void interval_tree_span_iter_next(struct interval_tree_span_iter *state);
+
+static inline bool
+interval_tree_span_iter_done(struct interval_tree_span_iter *state)
+{
+	return state->is_hole == -1;
+}
+
+#define interval_tree_for_each_span(span, itree, first_index, last_index)      \
+	for (interval_tree_span_iter_first(span, itree,                        \
+					   first_index, last_index);           \
+	     !interval_tree_span_iter_done(span);                              \
+	     interval_tree_span_iter_next(span))
+
 #endif	/* _LINUX_INTERVAL_TREE_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 9bbf8a4b2108e..c6c323fd25172 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -479,6 +479,10 @@ config INTERVAL_TREE
 
 	  for more information.
 
+config INTERVAL_TREE_SPAN_ITER
+	bool
+	depends on INTERVAL_TREE
+
 config XARRAY_MULTI
 	bool
 	help
diff --git a/lib/interval_tree.c b/lib/interval_tree.c
index 593ce56ece505..3412737ff365e 100644
--- a/lib/interval_tree.c
+++ b/lib/interval_tree.c
@@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert);
 EXPORT_SYMBOL_GPL(interval_tree_remove);
 EXPORT_SYMBOL_GPL(interval_tree_iter_first);
 EXPORT_SYMBOL_GPL(interval_tree_iter_next);
+
+#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER
+/*
+ * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous
+ * span of nodes. This makes nodes[0]->last the end of that contiguous used span
+ * indexes that started at the original nodes[1]->start. nodes[1] is now the
+ * first node starting the next used span. A hole span is between nodes[0]->last
+ * and nodes[1]->start. nodes[1] must be !NULL.
+ */
+static void
+interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state)
+{
+	struct interval_tree_node *cur = state->nodes[1];
+
+	state->nodes[0] = cur;
+	do {
+		if (cur->last > state->nodes[0]->last)
+			state->nodes[0] = cur;
+		cur = interval_tree_iter_next(cur, state->first_index,
+					      state->last_index);
+	} while (cur && (state->nodes[0]->last >= cur->start ||
+			 state->nodes[0]->last + 1 == cur->start));
+	state->nodes[1] = cur;
+}
+
+void interval_tree_span_iter_first(struct interval_tree_span_iter *iter,
+				   struct rb_root_cached *itree,
+				   unsigned long first_index,
+				   unsigned long last_index)
+{
+	iter->first_index = first_index;
+	iter->last_index = last_index;
+	iter->nodes[0] = NULL;
+	iter->nodes[1] =
+		interval_tree_iter_first(itree, first_index, last_index);
+	if (!iter->nodes[1]) {
+		/* No nodes intersect the span, whole span is hole */
+		iter->start_hole = first_index;
+		iter->last_hole = last_index;
+		iter->is_hole = 1;
+		return;
+	}
+	if (iter->nodes[1]->start > first_index) {
+		/* Leading hole on first iteration */
+		iter->start_hole = first_index;
+		iter->last_hole = iter->nodes[1]->start - 1;
+		iter->is_hole = 1;
+		interval_tree_span_iter_next_gap(iter);
+		return;
+	}
+
+	/* Starting inside a used */
+	iter->start_used = first_index;
+	iter->is_hole = 0;
+	interval_tree_span_iter_next_gap(iter);
+	iter->last_used = iter->nodes[0]->last;
+	if (iter->last_used >= last_index) {
+		iter->last_used = last_index;
+		iter->nodes[0] = NULL;
+		iter->nodes[1] = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_first);
+
+void interval_tree_span_iter_next(struct interval_tree_span_iter *iter)
+{
+	if (!iter->nodes[0] && !iter->nodes[1]) {
+		iter->is_hole = -1;
+		return;
+	}
+
+	if (iter->is_hole) {
+		iter->start_used = iter->last_hole + 1;
+		iter->last_used = iter->nodes[0]->last;
+		if (iter->last_used >= iter->last_index) {
+			iter->last_used = iter->last_index;
+			iter->nodes[0] = NULL;
+			iter->nodes[1] = NULL;
+		}
+		iter->is_hole = 0;
+		return;
+	}
+
+	if (!iter->nodes[1]) {
+		/* Trailing hole */
+		iter->start_hole = iter->nodes[0]->last + 1;
+		iter->last_hole = iter->last_index;
+		iter->nodes[0] = NULL;
+		iter->is_hole = 1;
+		return;
+	}
+
+	/* must have both nodes[0] and [1], interior hole */
+	iter->start_hole = iter->nodes[0]->last + 1;
+	iter->last_hole = iter->nodes[1]->start - 1;
+	iter->is_hole = 1;
+	interval_tree_span_iter_next_gap(iter);
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_next);
+
+/*
+ * Advance the iterator index to a specific position. The returned used/hole is
+ * updated to start at new_index. This is faster than calling
+ * interval_tree_span_iter_first() as it can avoid full searches in several
+ * cases where the iterator is already set.
+ */
+void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter,
+				     struct rb_root_cached *itree,
+				     unsigned long new_index)
+{
+	if (iter->is_hole == -1)
+		return;
+
+	iter->first_index = new_index;
+	if (new_index > iter->last_index) {
+		iter->is_hole = -1;
+		return;
+	}
+
+	/* Rely on the union aliasing hole/used */
+	if (iter->start_hole <= new_index && new_index <= iter->last_hole) {
+		iter->start_hole = new_index;
+		return;
+	}
+	if (new_index == iter->last_hole + 1)
+		interval_tree_span_iter_next(iter);
+	else
+		interval_tree_span_iter_first(iter, itree, new_index,
+					      iter->last_index);
+}
+EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance);
+#endif
-- 
GitLab


From 632ce1377dbbdabff575d33bec9c79d75ef0395a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:27 -0400
Subject: [PATCH 2900/4122] scripts/kernel-doc: support EXPORT_SYMBOL_NS_GPL()
 with -export

Parse EXPORT_SYMBOL_NS_GPL() in addition to EXPORT_SYMBOL_GPL() for use
with the -export flag.

Link: https://lore.kernel.org/r/4-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Acked-by: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 scripts/kernel-doc | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index aea04365bc69d..48e3feca31701 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -256,6 +256,7 @@ my $doc_inline_sect = '\s*\*\s*(@\s*[\w][\w\.]*\s*):(.*)';
 my $doc_inline_end = '^\s*\*/\s*$';
 my $doc_inline_oneline = '^\s*/\*\*\s*(@[\w\s]+):\s*(.*)\s*\*/\s*$';
 my $export_symbol = '^\s*EXPORT_SYMBOL(_GPL)?\s*\(\s*(\w+)\s*\)\s*;';
+my $export_symbol_ns = '^\s*EXPORT_SYMBOL_NS(_GPL)?\s*\(\s*(\w+)\s*,\s*\w+\)\s*;';
 my $function_pointer = qr{([^\(]*\(\*)\s*\)\s*\(([^\)]*)\)};
 my $attribute = qr{__attribute__\s*\(\([a-z0-9,_\*\s\(\)]*\)\)}i;
 
@@ -1948,6 +1949,10 @@ sub process_export_file($) {
 	    next if (defined($nosymbol_table{$2}));
 	    $function_table{$2} = 1;
 	}
+	if (/$export_symbol_ns/) {
+	    next if (defined($nosymbol_table{$2}));
+	    $function_table{$2} = 1;
+	}
     }
 
     close(IN);
@@ -2419,12 +2424,12 @@ found on PATH.
 =item -export
 
 Only output documentation for the symbols that have been exported using
-EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
 
 =item -internal
 
 Only output documentation for the symbols that have NOT been exported using
-EXPORT_SYMBOL() or EXPORT_SYMBOL_GPL() in any input FILE or -export-file FILE.
+EXPORT_SYMBOL() and related macros in any input FILE or -export-file FILE.
 
 =item -function NAME
 
@@ -2451,8 +2456,7 @@ Do not output DOC: sections.
 
 =item -export-file FILE
 
-Specify an additional FILE in which to look for EXPORT_SYMBOL() and
-EXPORT_SYMBOL_GPL().
+Specify an additional FILE in which to look for EXPORT_SYMBOL information.
 
 To be used with -export or -internal.
 
-- 
GitLab


From d03a6d4e2beaa358b6c4e16fe106e813a57e927a Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 29 Nov 2022 15:15:39 +0100
Subject: [PATCH 2901/4122] USB: serial: cp210x: add support for B0 hangup

A request to set the line speed to B0 is used to hang up a modem
connection by deasserting the modem control lines.

Note that there is no need reconfigure the line speed in hardware when
B0 is requested (even if some drivers do set it to an arbitrary value
for implementation or protocol reasons).

Reviewed-by: Alex Henrie <alexh@vpitech.com>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/cp210x.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
index f6fb23620e87a..67372acc23529 100644
--- a/drivers/usb/serial/cp210x.c
+++ b/drivers/usb/serial/cp210x.c
@@ -1049,11 +1049,12 @@ static void cp210x_change_speed(struct tty_struct *tty,
 	struct cp210x_serial_private *priv = usb_get_serial_data(serial);
 	u32 baud;
 
+	if (tty->termios.c_ospeed == 0)
+		return;
+
 	/*
 	 * This maps the requested rate to the actual rate, a valid rate on
 	 * cp2102 or cp2103, or to an arbitrary rate in [1M, max_speed].
-	 *
-	 * NOTE: B0 is not implemented.
 	 */
 	baud = clamp(tty->termios.c_ospeed, priv->min_speed, priv->max_speed);
 
@@ -1146,7 +1147,8 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 		tty->termios.c_iflag &= ~(IXON | IXOFF);
 	}
 
-	if (old_termios &&
+	if (tty->termios.c_ospeed != 0 &&
+			old_termios && old_termios->c_ospeed != 0 &&
 			C_CRTSCTS(tty) == (old_termios->c_cflag & CRTSCTS) &&
 			I_IXON(tty) == (old_termios->c_iflag & IXON) &&
 			I_IXOFF(tty) == (old_termios->c_iflag & IXOFF) &&
@@ -1171,6 +1173,14 @@ static void cp210x_set_flow_control(struct tty_struct *tty,
 
 	mutex_lock(&port_priv->mutex);
 
+	if (tty->termios.c_ospeed == 0) {
+		port_priv->dtr = false;
+		port_priv->rts = false;
+	} else if (old_termios && old_termios->c_ospeed == 0) {
+		port_priv->dtr = true;
+		port_priv->rts = true;
+	}
+
 	ret = cp210x_read_reg_block(port, CP210X_GET_FLOW, &flow_ctl,
 			sizeof(flow_ctl));
 	if (ret)
@@ -1243,7 +1253,8 @@ static void cp210x_set_termios(struct tty_struct *tty,
 	u16 bits;
 	int ret;
 
-	if (old_termios && !cp210x_termios_change(&tty->termios, old_termios))
+	if (old_termios && !cp210x_termios_change(&tty->termios, old_termios) &&
+			tty->termios.c_ospeed != 0)
 		return;
 
 	if (!old_termios || tty->termios.c_ospeed != old_termios->c_ospeed)
-- 
GitLab


From 67e6272d53386f9708f91c4d0015c4a1c470eef5 Mon Sep 17 00:00:00 2001
From: Or Har-Toov <ohartoov@nvidia.com>
Date: Mon, 28 Nov 2022 13:52:45 +0200
Subject: [PATCH 2902/4122] RDMA/nldev: Add NULL check to silence false
 warnings

Using nlmsg_put causes static analysis tools to many
false positives of not checking the return value of nlmsg_put.

In all uses in nldev.c, payload parameter is 0 so NULL will never
be returned. So let's add useless checks to silence the warnings.

Signed-off-by: Or Har-Toov <ohartoov@nvidia.com>
Reviewed-by: Michael Guralnik <michaelgur@nvidia.com>
Link: https://lore.kernel.org/r/bd924da89d5b4f5291a4a01d9b5ae47c0a9b6a3f.1669636336.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/nldev.c | 44 +++++++++++++++++++++++++++------
 1 file changed, 36 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index ca0ed7d143261..b4716cda65d89 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -1043,6 +1043,10 @@ static int nldev_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 			0, 0);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto err_free;
+	}
 
 	err = fill_dev_info(msg, device);
 	if (err)
@@ -1128,7 +1132,7 @@ static int _nldev_get_dumpit(struct ib_device *device,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 			0, NLM_F_MULTI);
 
-	if (fill_dev_info(skb, device)) {
+	if (!nlh || fill_dev_info(skb, device)) {
 		nlmsg_cancel(skb, nlh);
 		goto out;
 	}
@@ -1187,6 +1191,10 @@ static int nldev_port_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_GET),
 			0, 0);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto err_free;
+	}
 
 	err = fill_port_info(msg, device, port, sock_net(skb->sk));
 	if (err)
@@ -1248,7 +1256,7 @@ static int nldev_port_get_dumpit(struct sk_buff *skb,
 						 RDMA_NLDEV_CMD_PORT_GET),
 				0, NLM_F_MULTI);
 
-		if (fill_port_info(skb, device, p, sock_net(skb->sk))) {
+		if (!nlh || fill_port_info(skb, device, p, sock_net(skb->sk))) {
 			nlmsg_cancel(skb, nlh);
 			goto out;
 		}
@@ -1290,6 +1298,10 @@ static int nldev_res_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 	nlh = nlmsg_put(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 			0, 0);
+	if (!nlh) {
+		ret = -EMSGSIZE;
+		goto err_free;
+	}
 
 	ret = fill_res_info(msg, device);
 	if (ret)
@@ -1321,7 +1333,7 @@ static int _nldev_res_get_dumpit(struct ib_device *device,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV, RDMA_NLDEV_CMD_RES_GET),
 			0, NLM_F_MULTI);
 
-	if (fill_res_info(skb, device)) {
+	if (!nlh || fill_res_info(skb, device)) {
 		nlmsg_cancel(skb, nlh);
 		goto out;
 	}
@@ -1456,7 +1468,7 @@ static int res_get_common_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 					 RDMA_NL_GET_OP(nlh->nlmsg_type)),
 			0, 0);
 
-	if (fill_nldev_handle(msg, device)) {
+	if (!nlh || fill_nldev_handle(msg, device)) {
 		ret = -EMSGSIZE;
 		goto err_free;
 	}
@@ -1535,7 +1547,7 @@ static int res_get_common_dumpit(struct sk_buff *skb,
 					 RDMA_NL_GET_OP(cb->nlh->nlmsg_type)),
 			0, NLM_F_MULTI);
 
-	if (fill_nldev_handle(skb, device)) {
+	if (!nlh || fill_nldev_handle(skb, device)) {
 		ret = -EMSGSIZE;
 		goto err;
 	}
@@ -1797,6 +1809,10 @@ static int nldev_get_chardev(struct sk_buff *skb, struct nlmsghdr *nlh,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_GET_CHARDEV),
 			0, 0);
+	if (!nlh) {
+		err = -EMSGSIZE;
+		goto out_nlmsg;
+	}
 
 	data.nl_msg = msg;
 	err = ib_get_client_nl_info(ibdev, client_name, &data);
@@ -1854,6 +1870,10 @@ static int nldev_sys_get_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_SYS_GET),
 			0, 0);
+	if (!nlh) {
+		nlmsg_free(msg);
+		return -EMSGSIZE;
+	}
 
 	err = nla_put_u8(msg, RDMA_NLDEV_SYS_ATTR_NETNS_MODE,
 			 (u8)ib_devices_shared_netns);
@@ -2034,7 +2054,7 @@ static int nldev_stat_set_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_STAT_SET),
 			0, 0);
-	if (fill_nldev_handle(msg, device) ||
+	if (!nlh || fill_nldev_handle(msg, device) ||
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
 		ret = -EMSGSIZE;
 		goto err_free_msg;
@@ -2103,6 +2123,10 @@ static int nldev_stat_del_doit(struct sk_buff *skb, struct nlmsghdr *nlh,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_STAT_SET),
 			0, 0);
+	if (!nlh) {
+		ret = -EMSGSIZE;
+		goto err_fill;
+	}
 
 	cntn = nla_get_u32(tb[RDMA_NLDEV_ATTR_STAT_COUNTER_ID]);
 	qpn = nla_get_u32(tb[RDMA_NLDEV_ATTR_RES_LQPN]);
@@ -2173,7 +2197,7 @@ static int stat_get_doit_default_counter(struct sk_buff *skb,
 					 RDMA_NLDEV_CMD_STAT_GET),
 			0, 0);
 
-	if (fill_nldev_handle(msg, device) ||
+	if (!nlh || fill_nldev_handle(msg, device) ||
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port)) {
 		ret = -EMSGSIZE;
 		goto err_msg;
@@ -2261,6 +2285,10 @@ static int stat_get_doit_qp(struct sk_buff *skb, struct nlmsghdr *nlh,
 			RDMA_NL_GET_TYPE(RDMA_NL_NLDEV,
 					 RDMA_NLDEV_CMD_STAT_GET),
 			0, 0);
+	if (!nlh) {
+		ret = -EMSGSIZE;
+		goto err_msg;
+	}
 
 	ret = rdma_counter_get_mode(device, port, &mode, &mask);
 	if (ret)
@@ -2393,7 +2421,7 @@ static int nldev_stat_get_counter_status_doit(struct sk_buff *skb,
 		0, 0);
 
 	ret = -EMSGSIZE;
-	if (fill_nldev_handle(msg, device) ||
+	if (!nlh || fill_nldev_handle(msg, device) ||
 	    nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, port))
 		goto err_msg;
 
-- 
GitLab


From fc8f93ad3e5485d45c992233c96acd902992dfc4 Mon Sep 17 00:00:00 2001
From: Mark Zhang <markzhang@nvidia.com>
Date: Mon, 28 Nov 2022 13:52:46 +0200
Subject: [PATCH 2903/4122] RDMA/nldev: Fix failure to send large messages

Return "-EMSGSIZE" instead of "-EINVAL" when filling a QP entry, so that
new SKBs will be allocated if there's not enough room in current SKB.

Fixes: 65959522f806 ("RDMA: Add support to dump resource tracker in RAW format")
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Reviewed-by: Patrisious Haddad <phaddad@nvidia.com>
Link: https://lore.kernel.org/r/b5e9c62f6b8369acab5648b661bf539cbceeffdc.1669636336.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/nldev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c
index b4716cda65d89..a981ac2f09758 100644
--- a/drivers/infiniband/core/nldev.c
+++ b/drivers/infiniband/core/nldev.c
@@ -513,7 +513,7 @@ static int fill_res_qp_entry(struct sk_buff *msg, bool has_cap_net_admin,
 
 	/* In create_qp() port is not set yet */
 	if (qp->port && nla_put_u32(msg, RDMA_NLDEV_ATTR_PORT_INDEX, qp->port))
-		return -EINVAL;
+		return -EMSGSIZE;
 
 	ret = nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_LQPN, qp->qp_num);
 	if (ret)
-- 
GitLab


From 6c645b01e536757a9e1a9f72c13767f9b3f8559f Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:26 +1000
Subject: [PATCH 2904/4122] KVM: PPC: Book3E: Fix CONFIG_TRACE_IRQFLAGS support

32-bit does not trace_irqs_off() to match the trace_irqs_on() call in
kvmppc_fix_ee_before_entry(). This can lead to irqs being enabled twice
in the trace, and the irqs-off region between guest exit and the host
enabling local irqs again is not properly traced.

64-bit code does call this, but from asm code where volatiles are live
and so incorrectly get clobbered.

Move the irq reconcile into C to fix both problems.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/kvm_ppc.h    | 12 ++++++++++++
 arch/powerpc/kvm/booke.c              |  3 +++
 arch/powerpc/kvm/bookehv_interrupts.S |  9 ---------
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index bfacf12784dde..eae9619b61903 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -1014,6 +1014,18 @@ static inline void kvmppc_fix_ee_before_entry(void)
 #endif
 }
 
+static inline void kvmppc_fix_ee_after_exit(void)
+{
+#ifdef CONFIG_PPC64
+	/* Only need to enable IRQs by hard enabling them after this */
+	local_paca->irq_happened = PACA_IRQ_HARD_DIS;
+	irq_soft_mask_set(IRQS_ALL_DISABLED);
+#endif
+
+	trace_hardirqs_off();
+}
+
+
 static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb)
 {
 	ulong ea;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7b4920e9fd263..0dce93ccaadfa 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -1015,6 +1015,9 @@ int kvmppc_handle_exit(struct kvm_vcpu *vcpu, unsigned int exit_nr)
 	u32 last_inst = KVM_INST_FETCH_FAILED;
 	enum emulation_result emulated = EMULATE_DONE;
 
+	/* Fix irq state (pairs with kvmppc_fix_ee_before_entry()) */
+	kvmppc_fix_ee_after_exit();
+
 	/* update before a new last_exit_type is rewritten */
 	kvmppc_update_timing_stats(vcpu);
 
diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S
index 8262c14fc9e63..b5fe6fb53c662 100644
--- a/arch/powerpc/kvm/bookehv_interrupts.S
+++ b/arch/powerpc/kvm/bookehv_interrupts.S
@@ -424,15 +424,6 @@ _GLOBAL(kvmppc_resume_host)
 	mtspr	SPRN_EPCR, r3
 	isync
 
-#ifdef CONFIG_64BIT
-	/*
-	 * We enter with interrupts disabled in hardware, but
-	 * we need to call RECONCILE_IRQ_STATE to ensure
-	 * that the software state is kept in sync.
-	 */
-	RECONCILE_IRQ_STATE(r3,r5)
-#endif
-
 	/* Switch to kernel stack and jump to handler. */
 	mr	r3, r4
 	mr	r5, r14 /* intno */
-- 
GitLab


From dea681c91d3cd5326f87d0a3c93079573e22ce9a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 22 Nov 2022 08:22:25 +0100
Subject: [PATCH 2905/4122] powerpc/ps3: mark ps3_system_bus_type static

ps3_system_bus_type is only used inside of system-bus.c, so remove
the external declaration and the very outdated comment next to it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Geoff Levand <geoff@infradead.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221122072225.423432-1-hch@lst.de
---
 arch/powerpc/include/asm/ps3.h          | 4 ----
 arch/powerpc/platforms/ps3/system-bus.c | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h
index 8a0d8fb353286..d503dbd7856cb 100644
--- a/arch/powerpc/include/asm/ps3.h
+++ b/arch/powerpc/include/asm/ps3.h
@@ -425,10 +425,6 @@ static inline void *ps3_system_bus_get_drvdata(
 	return dev_get_drvdata(&dev->core);
 }
 
-/* These two need global scope for get_arch_dma_ops(). */
-
-extern struct bus_type ps3_system_bus_type;
-
 /* system manager */
 
 struct ps3_sys_manager_ops {
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index 2502e9b17df4a..38a7e02295c8f 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -466,7 +466,7 @@ static struct attribute *ps3_system_bus_dev_attrs[] = {
 };
 ATTRIBUTE_GROUPS(ps3_system_bus_dev);
 
-struct bus_type ps3_system_bus_type = {
+static struct bus_type ps3_system_bus_type = {
 	.name = "ps3_system_bus",
 	.match = ps3_system_bus_match,
 	.uevent = ps3_system_bus_uevent,
-- 
GitLab


From 71ae6305ad41cfd1ac5aa91d356e71c7a537df2e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 22 Nov 2022 12:10:52 +0530
Subject: [PATCH 2906/4122] selftests/powerpc: Move perror closer to its use

Right now, if perf_event_open() fails for the systemwide tests, error
report is printed too late, sometimes after subsequent system calls.
Move use of perror() to the main function, just after the syscall.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/372ac78c27899f1f612fbd6ac796604a4a9310aa.1669096083.git.naveen.n.rao@linux.vnet.ibm.com
---
 .../selftests/powerpc/ptrace/perf-hwbreak.c   | 21 +++++--------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index ecde2c199f3b1..ea5e14ecbf309 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -146,6 +146,7 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len
 	for (i = 0; i < nprocs; i++) {
 		fd[i] = perf_cpu_event_open(i, type, addr, len);
 		if (fd[i] < 0) {
+			perror("perf_systemwide_event_open");
 			close_fds(fd, i);
 			return fd[i];
 		}
@@ -543,15 +544,12 @@ static int test_syswide_multi_diff_addr(void)
 	int ret;
 
 	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
-	if (ret) {
-		perror("perf_systemwide_event_open");
+	if (ret)
 		exit(EXIT_FAILURE);
-	}
 
 	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
 	if (ret) {
 		close_fds(fd1, nprocs);
-		perror("perf_systemwide_event_open");
 		exit(EXIT_FAILURE);
 	}
 
@@ -590,15 +588,12 @@ static int test_syswide_multi_same_addr(void)
 	int ret;
 
 	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
-	if (ret) {
-		perror("perf_systemwide_event_open");
+	if (ret)
 		exit(EXIT_FAILURE);
-	}
 
 	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
 	if (ret) {
 		close_fds(fd1, nprocs);
-		perror("perf_systemwide_event_open");
 		exit(EXIT_FAILURE);
 	}
 
@@ -637,15 +632,12 @@ static int test_syswide_multi_diff_addr_ro_wo(void)
 	int ret;
 
 	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
-	if (ret) {
-		perror("perf_systemwide_event_open");
+	if (ret)
 		exit(EXIT_FAILURE);
-	}
 
 	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
 	if (ret) {
 		close_fds(fd1, nprocs);
-		perror("perf_systemwide_event_open");
 		exit(EXIT_FAILURE);
 	}
 
@@ -684,15 +676,12 @@ static int test_syswide_multi_same_addr_ro_wo(void)
 	int ret;
 
 	ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
-	if (ret) {
-		perror("perf_systemwide_event_open");
+	if (ret)
 		exit(EXIT_FAILURE);
-	}
 
 	ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
 	if (ret) {
 		close_fds(fd1, nprocs);
-		perror("perf_systemwide_event_open");
 		exit(EXIT_FAILURE);
 	}
 
-- 
GitLab


From 616ad3f4aac287c48b66c92cb777395b4465ed4f Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 22 Nov 2022 12:10:53 +0530
Subject: [PATCH 2907/4122] selftests/powerpc: Bump up rlimit for perf-hwbreak
 test

The systemwide perf hardware breakpoint test tries to open a perf event
on each cpu. On large systems, we run out of file descriptors and fail
the test. Instead, have the test set the file descriptor limit to an
arbitraty high value.

Reported-by: Rohan Deshpande <rohan_d@linux.vnet.ibm.com>
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/187fed5843cecc1e5066677b6296ee88337d7bef.1669096083.git.naveen.n.rao@linux.vnet.ibm.com
---
 .../testing/selftests/powerpc/ptrace/perf-hwbreak.c  | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index ea5e14ecbf309..866e5be48ee33 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -26,6 +26,7 @@
 #include <sys/ioctl.h>
 #include <sys/wait.h>
 #include <sys/ptrace.h>
+#include <sys/resource.h>
 #include <sys/sysinfo.h>
 #include <asm/ptrace.h>
 #include <elf.h>
@@ -140,8 +141,19 @@ static void disable_fds(int *fd, int n)
 
 static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len)
 {
+	struct rlimit rlim;
 	int i = 0;
 
+	if (getrlimit(RLIMIT_NOFILE, &rlim)) {
+		perror("getrlimit");
+		return -1;
+	}
+	rlim.rlim_cur = 65536;
+	if (setrlimit(RLIMIT_NOFILE, &rlim)) {
+		perror("setrlimit");
+		return -1;
+	}
+
 	/* Assume online processors are 0 to nprocs for simplisity */
 	for (i = 0; i < nprocs; i++) {
 		fd[i] = perf_cpu_event_open(i, type, addr, len);
-- 
GitLab


From 260095926d3956071c6699a28824c3f0fa7cd97a Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 22 Nov 2022 12:10:54 +0530
Subject: [PATCH 2908/4122] selftests/powerpc: Account for offline cpus in
 perf-hwbreak test

For systemwide tests, use online cpu mask to only open events on online
cpus. This enables this test to work on systems in lower SMT modes.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/15fd447dcefd19945a7d31f0a475349f548a3603.1669096083.git.naveen.n.rao@linux.vnet.ibm.com
---
 .../selftests/powerpc/ptrace/perf-hwbreak.c   | 45 ++++++++++++++++---
 1 file changed, 39 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index 866e5be48ee33..f75739bbad28c 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -17,8 +17,11 @@
  * Copyright (C) 2018 Michael Neuling, IBM Corporation.
  */
 
+#define _GNU_SOURCE
+
 #include <unistd.h>
 #include <assert.h>
+#include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
@@ -141,8 +144,10 @@ static void disable_fds(int *fd, int n)
 
 static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len)
 {
+	int i, ncpus, cpu, ret = 0;
 	struct rlimit rlim;
-	int i = 0;
+	cpu_set_t *mask;
+	size_t size;
 
 	if (getrlimit(RLIMIT_NOFILE, &rlim)) {
 		perror("getrlimit");
@@ -154,16 +159,44 @@ static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len
 		return -1;
 	}
 
-	/* Assume online processors are 0 to nprocs for simplisity */
-	for (i = 0; i < nprocs; i++) {
-		fd[i] = perf_cpu_event_open(i, type, addr, len);
+	ncpus = get_nprocs_conf();
+	size = CPU_ALLOC_SIZE(ncpus);
+	mask = CPU_ALLOC(ncpus);
+	if (!mask) {
+		perror("malloc");
+		return -1;
+	}
+
+	CPU_ZERO_S(size, mask);
+
+	if (sched_getaffinity(0, size, mask)) {
+		perror("sched_getaffinity");
+		ret = -1;
+		goto done;
+	}
+
+	for (i = 0, cpu = 0; i < nprocs && cpu < ncpus; cpu++) {
+		if (!CPU_ISSET_S(cpu, size, mask))
+			continue;
+		fd[i] = perf_cpu_event_open(cpu, type, addr, len);
 		if (fd[i] < 0) {
 			perror("perf_systemwide_event_open");
 			close_fds(fd, i);
-			return fd[i];
+			ret = fd[i];
+			goto done;
 		}
+		i++;
 	}
-	return 0;
+
+	if (i < nprocs) {
+		printf("Error: Number of online cpus reduced since start of test: %d < %d\n", i, nprocs);
+		close_fds(fd, i);
+		ret = -1;
+	}
+
+done:
+	CPU_FREE(mask);
+	return ret;
 }
 
 static inline bool breakpoint_test(int len)
-- 
GitLab


From d5090716be6791ada9ee142163a4934c1c147aaa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 26 Nov 2022 06:10:00 +0100
Subject: [PATCH 2909/4122] powerpc/book3e: remove #include
 <generated/utsrelease.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 7ad4bd887d27 ("powerpc/book3e: get rid of #include <generated/compile.h>")
removed the usage of the define UTS_RELEASE but forgot to drop the
include.

utsrelease.h is potentially generated on each build. By removing the
unused include we can get rid of some spurious recompilations.

Fixes: 7ad4bd887d27 ("powerpc/book3e: get rid of #include <generated/compile.h>")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
[mpe: Fix typo in change log and add more explanation]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126051002.123199-2-linux@weissschuh.net
---
 arch/powerpc/mm/nohash/kaslr_booke.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
index 0d04f9d5da8d2..2fb3edafe9ab6 100644
--- a/arch/powerpc/mm/nohash/kaslr_booke.c
+++ b/arch/powerpc/mm/nohash/kaslr_booke.c
@@ -19,7 +19,6 @@
 #include <asm/cacheflush.h>
 #include <asm/kdump.h>
 #include <mm/mmu_decl.h>
-#include <generated/utsrelease.h>
 
 struct regions {
 	unsigned long pa_start;
-- 
GitLab


From 67bbb62f61e810734da0a1577a9802ddaed24140 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Fri, 30 Sep 2022 14:39:01 +0200
Subject: [PATCH 2910/4122] powerpc: dts: turris1x.dts: Add channel labels for
 temperature sensor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Channel 0 of SA56004ED chip refers to internal SA56004ED chip sensor (chip
itself is located on the board) and channel 1 of SA56004ED chip refers to
external sensor which is connected to temperature diode of the P2020 CPU.

Fixes: 54c15ec3b738 ("powerpc: dts: Add DTS file for CZ.NIC Turris 1.x routers")
Signed-off-by: Pali Rohár <pali@kernel.org>
Reviewed-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220930123901.10251-1-pali@kernel.org
---
 arch/powerpc/boot/dts/turris1x.dts | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/arch/powerpc/boot/dts/turris1x.dts b/arch/powerpc/boot/dts/turris1x.dts
index 045af668e9284..e9cda34a140e0 100644
--- a/arch/powerpc/boot/dts/turris1x.dts
+++ b/arch/powerpc/boot/dts/turris1x.dts
@@ -69,6 +69,20 @@
 				interrupt-parent = <&gpio>;
 				interrupts = <12 IRQ_TYPE_LEVEL_LOW>, /* GPIO12 - ALERT pin */
 					     <13 IRQ_TYPE_LEVEL_LOW>; /* GPIO13 - CRIT pin */
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				/* Local temperature sensor (SA56004ED internal) */
+				channel@0 {
+					reg = <0>;
+					label = "board";
+				};
+
+				/* Remote temperature sensor (D+/D- connected to P2020 CPU Temperature Diode) */
+				channel@1 {
+					reg = <1>;
+					label = "cpu";
+				};
 			};
 
 			/* DDR3 SPD/EEPROM */
-- 
GitLab


From e082e99f6f87f5204b2531d5a3db7bbd929d23b1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Sat, 20 Aug 2022 14:33:27 +0200
Subject: [PATCH 2911/4122] powerpc/fsl-pci: Choose PCI host bridge with alias
 pci0 as the primary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If there's no PCI host bridge with ISA then check for PCI host bridge with
alias "pci0" (first PCI host bridge) and if it exists then choose it as the
primary PCI host bridge.

This makes choice of primary PCI host bridge more stable across boots and
updates as the last fallback candidate for primary PCI host bridge (if
there is no choice) is selected arbitrary.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20220820123327.20551-1-pali@kernel.org
---
 arch/powerpc/sysdev/fsl_pci.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
index 974d3db6faab2..b7232c46b2448 100644
--- a/arch/powerpc/sysdev/fsl_pci.c
+++ b/arch/powerpc/sysdev/fsl_pci.c
@@ -1138,6 +1138,19 @@ void __init fsl_pci_assign_primary(void)
 			return;
 	}
 
+	/*
+	 * If there's no PCI host bridge with ISA then check for
+	 * PCI host bridge with alias "pci0" (first PCI host bridge).
+	 */
+	np = of_find_node_by_path("pci0");
+	if (np && of_match_node(pci_ids, np) && of_device_is_available(np)) {
+		fsl_pci_primary = np;
+		of_node_put(np);
+		return;
+	}
+	if (np)
+		of_node_put(np);
+
 	/*
 	 * If there's no PCI host bridge with ISA, arbitrarily
 	 * designate one as primary.  This can go away once
-- 
GitLab


From 3671f4ebe3eb12e7222e4d7b0f94e85cfe34253a Mon Sep 17 00:00:00 2001
From: Jordan Niethe <jniethe5@gmail.com>
Date: Wed, 9 Nov 2022 15:51:04 +1100
Subject: [PATCH 2912/4122] powerpc: Allow clearing and restoring registers
 independent of saved breakpoint state

For the coming temporary mm used for instruction patching, the
breakpoint registers need to be cleared to prevent them from
accidentally being triggered. As soon as the patching is done, the
breakpoints will be restored.

The breakpoint state is stored in the per-cpu variable current_brk[].
Add a suspend_breakpoints() function which will clear the breakpoint
registers without touching the state in current_brk[]. Add a pair
function restore_breakpoints() which will move the state in
current_brk[] back to the registers.

Signed-off-by: Jordan Niethe <jniethe5@gmail.com>
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-2-bgray@linux.ibm.com
---
 arch/powerpc/include/asm/debug.h |  2 ++
 arch/powerpc/kernel/process.c    | 38 +++++++++++++++++++++++++++++---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 86a14736c76c3..51c744608f37a 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; }
 #endif
 
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void suspend_breakpoints(void);
+void restore_breakpoints(void);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index e3e1feaa536a9..5265da2d80340 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk)
 	return 0;
 }
 
-void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 {
-	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
-
 	if (dawr_enabled())
 		// Power8 or later
 		set_dawr(nr, brk);
@@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 		WARN_ON_ONCE(1);
 }
 
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+	memcpy(this_cpu_ptr(&current_brk[nr]), brk, sizeof(*brk));
+	set_hw_breakpoint(nr, brk);
+}
+
 /* Check if we have DAWR or DABR hardware */
 bool ppc_breakpoint_available(void)
 {
@@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void)
 }
 EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
 
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
+{
+	struct arch_hw_breakpoint brk = {0};
+	int i;
+
+	if (!ppc_breakpoint_available())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+	int i;
+
+	if (!ppc_breakpoint_available())
+		return;
+
+	for (i = 0; i < nr_wp_slots(); i++)
+		set_hw_breakpoint(i, this_cpu_ptr(&current_brk[i]));
+}
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
 static inline bool tm_enabled(struct task_struct *tsk)
-- 
GitLab


From 071c95c1acbd96e76bab8b25b5cad0d71a011f37 Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:05 +1100
Subject: [PATCH 2913/4122] powerpc/code-patching: Use WARN_ON and fix check in
 poking_init

BUG_ON() when failing to initialise the code patching window is
unnecessary, and use of BUG_ON is discouraged. We don't set
poking_init_done in this case, so failure to init the boot CPU will
result in a strict RWX error when a following patch_instruction uses
raw_patch_instruction. If it only fails for later CPUs, they won't be
onlined in the first place.

The return value of cpuhp_setup_state() is also >= 0 on success,
so check for < 0.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-3-bgray@linux.ibm.com
---
 arch/powerpc/lib/code-patching.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ad0cf3108dd09..3055eef7dcdcc 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -81,16 +81,17 @@ static int text_area_cpu_down(unsigned int cpu)
 
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
 
-/*
- * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
- * we judge it as being preferable to a kernel that will crash later when
- * someone tries to use patch_instruction().
- */
 void __init poking_init(void)
 {
-	BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
-		"powerpc/text_poke:online", text_area_cpu_up,
-		text_area_cpu_down));
+	int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+				    "powerpc/text_poke:online",
+				    text_area_cpu_up,
+				    text_area_cpu_down);
+
+	/* cpuhp_setup_state returns >= 0 on success */
+	if (WARN_ON(ret < 0))
+		return;
+
 	static_branch_enable(&poking_init_done);
 }
 
-- 
GitLab


From baf1ed24b27db475b38f534953885d0425e2232d Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:07 +1100
Subject: [PATCH 2914/4122] powerpc/mm: Remove empty hash__ functions

The empty hash__* functions are unnecessary. The empty definitions were
introduced when 64-bit Hash support was added, as the functions were
still used in generic code. These empty definitions were prefixed with
hash__ when Radix support was added, and new wrappers with the original
names were added that selected the Radix or Hash version based on
radix_enabled().

But the hash__ prefixed functions were not part of a public interface,
so there is no need to include them for compatibility with anything.
Generic code will use the non-prefixed wrappers, and Hash specific code
will know that there is no point in calling them (or even worse, call
them and expect them to do something).

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-5-bgray@linux.ibm.com
---
 .../include/asm/book3s/64/tlbflush-hash.h     | 28 -------------------
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 27 ++++++------------
 2 files changed, 9 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index 751921f6db461..a9ef40dc263e0 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -65,13 +65,6 @@ extern void flush_hash_range(unsigned long number, int local);
 extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
 				pmd_t *pmdp, unsigned int psize, int ssize,
 				unsigned long flags);
-static inline void hash__local_flush_tlb_mm(struct mm_struct *mm)
-{
-}
-
-static inline void hash__flush_tlb_mm(struct mm_struct *mm)
-{
-}
 
 static inline void hash__local_flush_all_mm(struct mm_struct *mm)
 {
@@ -95,27 +88,6 @@ static inline void hash__flush_all_mm(struct mm_struct *mm)
 	WARN_ON_ONCE(1);
 }
 
-static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma,
-					  unsigned long vmaddr)
-{
-}
-
-static inline void hash__flush_tlb_page(struct vm_area_struct *vma,
-				    unsigned long vmaddr)
-{
-}
-
-static inline void hash__flush_tlb_range(struct vm_area_struct *vma,
-				     unsigned long start, unsigned long end)
-{
-}
-
-static inline void hash__flush_tlb_kernel_range(unsigned long start,
-					    unsigned long end)
-{
-}
-
-
 struct mmu_gather;
 extern void hash__tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 67655cd605456..2254a40f0564b 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -47,8 +47,7 @@ static inline void flush_pmd_tlb_range(struct vm_area_struct *vma,
 				       unsigned long start, unsigned long end)
 {
 	if (radix_enabled())
-		return radix__flush_pmd_tlb_range(vma, start, end);
-	return hash__flush_tlb_range(vma, start, end);
+		radix__flush_pmd_tlb_range(vma, start, end);
 }
 
 #define __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
@@ -57,39 +56,34 @@ static inline void flush_hugetlb_tlb_range(struct vm_area_struct *vma,
 					   unsigned long end)
 {
 	if (radix_enabled())
-		return radix__flush_hugetlb_tlb_range(vma, start, end);
-	return hash__flush_tlb_range(vma, start, end);
+		radix__flush_hugetlb_tlb_range(vma, start, end);
 }
 
 static inline void flush_tlb_range(struct vm_area_struct *vma,
 				   unsigned long start, unsigned long end)
 {
 	if (radix_enabled())
-		return radix__flush_tlb_range(vma, start, end);
-	return hash__flush_tlb_range(vma, start, end);
+		radix__flush_tlb_range(vma, start, end);
 }
 
 static inline void flush_tlb_kernel_range(unsigned long start,
 					  unsigned long end)
 {
 	if (radix_enabled())
-		return radix__flush_tlb_kernel_range(start, end);
-	return hash__flush_tlb_kernel_range(start, end);
+		radix__flush_tlb_kernel_range(start, end);
 }
 
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
 	if (radix_enabled())
-		return radix__local_flush_tlb_mm(mm);
-	return hash__local_flush_tlb_mm(mm);
+		radix__local_flush_tlb_mm(mm);
 }
 
 static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 					unsigned long vmaddr)
 {
 	if (radix_enabled())
-		return radix__local_flush_tlb_page(vma, vmaddr);
-	return hash__local_flush_tlb_page(vma, vmaddr);
+		radix__local_flush_tlb_page(vma, vmaddr);
 }
 
 static inline void local_flush_all_mm(struct mm_struct *mm)
@@ -102,24 +96,21 @@ static inline void local_flush_all_mm(struct mm_struct *mm)
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (radix_enabled())
-		return radix__tlb_flush(tlb);
-	return hash__tlb_flush(tlb);
+		radix__tlb_flush(tlb);
 }
 
 #ifdef CONFIG_SMP
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
 	if (radix_enabled())
-		return radix__flush_tlb_mm(mm);
-	return hash__flush_tlb_mm(mm);
+		radix__flush_tlb_mm(mm);
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
 				  unsigned long vmaddr)
 {
 	if (radix_enabled())
-		return radix__flush_tlb_page(vma, vmaddr);
-	return hash__flush_tlb_page(vma, vmaddr);
+		radix__flush_tlb_page(vma, vmaddr);
 }
 
 static inline void flush_all_mm(struct mm_struct *mm)
-- 
GitLab


From 0f0a0a6091e678b1a75078ecd6b02176f3228dbb Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:08 +1100
Subject: [PATCH 2915/4122] cxl: Use radix__flush_all_mm instead of generic
 flush_all_mm

The generic implementation of this function isn't really generic (Hash
is not implemented). Unfortunately, the runtime warnings cannot be
replaced with BUILD_BUG's, so it seems safer not to provide a stub in
the first place.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-6-bgray@linux.ibm.com
---
 arch/powerpc/include/asm/mmu_context.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index c1ea270bb8486..57f5017111f47 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -151,8 +151,8 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
 	 * nMMU and/or PSL need to be cleaned up.
 	 *
 	 * Both the 'copros' and 'active_cpus' counts are looked at in
-	 * flush_all_mm() to determine the scope (local/global) of the
-	 * TLBIs, so we need to flush first before decrementing
+	 * radix__flush_all_mm() to determine the scope (local/global)
+	 * of the TLBIs, so we need to flush first before decrementing
 	 * 'copros'. If this API is used by several callers for the
 	 * same context, it can lead to over-flushing. It's hopefully
 	 * not common enough to be a problem.
@@ -164,7 +164,7 @@ static inline void mm_context_remove_copro(struct mm_struct *mm)
 	 * in-between.
 	 */
 	if (radix_enabled()) {
-		flush_all_mm(mm);
+		radix__flush_all_mm(mm);
 
 		c = atomic_dec_if_positive(&mm->context.copros);
 		/* Detect imbalance between add and remove */
-- 
GitLab


From d34471c9bd5d47ab148dd68817631a4238f755c4 Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:09 +1100
Subject: [PATCH 2916/4122] powerpc/mm: Remove flush_all_mm, local_flush_all_mm

These functions were introduced for "cxl: Enable global TLBIs for cxl
contexts" [1], which ended up using them for Radix only. They were never
implemented on Hash (and creating an implementation appears to be
difficult), so nothing can actually rely on them.

They behave differently to the existing surrounding functions too, in
that they actually need to do something on Hash. The other functions
are primarily for use in generic code that expects their definitions,
but Hash updates the TLB during PTE updates.

After replacing the only usage with the Radix specific version, there
are no more users of these functions, and given they are not implemented
anyway it is safe to delete them.

[1]: https://patchwork.ozlabs.org/project/linuxppc-dev/patch/20170903181513.29635-1-fbarrat@linux.vnet.ibm.com/

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-7-bgray@linux.ibm.com
---
 .../include/asm/book3s/64/tlbflush-hash.h     | 22 -------------------
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 15 -------------
 2 files changed, 37 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index a9ef40dc263e0..146287d9580f4 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -66,28 +66,6 @@ extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr,
 				pmd_t *pmdp, unsigned int psize, int ssize,
 				unsigned long flags);
 
-static inline void hash__local_flush_all_mm(struct mm_struct *mm)
-{
-	/*
-	 * There's no Page Walk Cache for hash, so what is needed is
-	 * the same as flush_tlb_mm(), which doesn't really make sense
-	 * with hash. So the only thing we could do is flush the
-	 * entire LPID! Punt for now, as it's not being used.
-	 */
-	WARN_ON_ONCE(1);
-}
-
-static inline void hash__flush_all_mm(struct mm_struct *mm)
-{
-	/*
-	 * There's no Page Walk Cache for hash, so what is needed is
-	 * the same as flush_tlb_mm(), which doesn't really make sense
-	 * with hash. So the only thing we could do is flush the
-	 * entire LPID! Punt for now, as it's not being used.
-	 */
-	WARN_ON_ONCE(1);
-}
-
 struct mmu_gather;
 extern void hash__tlb_flush(struct mmu_gather *tlb);
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 2254a40f0564b..c56a0aee8124b 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -86,13 +86,6 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 		radix__local_flush_tlb_page(vma, vmaddr);
 }
 
-static inline void local_flush_all_mm(struct mm_struct *mm)
-{
-	if (radix_enabled())
-		return radix__local_flush_all_mm(mm);
-	return hash__local_flush_all_mm(mm);
-}
-
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (radix_enabled())
@@ -112,17 +105,9 @@ static inline void flush_tlb_page(struct vm_area_struct *vma,
 	if (radix_enabled())
 		radix__flush_tlb_page(vma, vmaddr);
 }
-
-static inline void flush_all_mm(struct mm_struct *mm)
-{
-	if (radix_enabled())
-		return radix__flush_all_mm(mm);
-	return hash__flush_all_mm(mm);
-}
 #else
 #define flush_tlb_mm(mm)		local_flush_tlb_mm(mm)
 #define flush_tlb_page(vma, addr)	local_flush_tlb_page(vma, addr)
-#define flush_all_mm(mm)		local_flush_all_mm(mm)
 #endif /* CONFIG_SMP */
 
 #define flush_tlb_fix_spurious_fault flush_tlb_fix_spurious_fault
-- 
GitLab


From 274d842fa1efd9449e62222c8896e0be11621f1f Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:10 +1100
Subject: [PATCH 2917/4122] powerpc/tlb: Add local flush for page given
 mm_struct and psize

Adds a local TLB flush operation that works given an mm_struct, VA to
flush, and page size representation. Most implementations mirror the
surrounding code. The book3s/32/tlbflush.h implementation is left as
a BUILD_BUG because it is more complicated and not required for
anything as yet.

This removes the need to create a vm_area_struct, which the temporary
patching mm work does not need.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-8-bgray@linux.ibm.com
---
 arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 +++++++++
 arch/powerpc/include/asm/book3s/64/tlbflush.h | 7 +++++++
 arch/powerpc/include/asm/nohash/tlbflush.h    | 7 +++++++
 arch/powerpc/mm/nohash/tlb.c                  | 8 ++++++++
 4 files changed, 31 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index ba1743c52b56d..4be5729081243 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
 #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
 
+#include <linux/build_bug.h>
+
 #define MMU_NO_CONTEXT      (0)
 /*
  * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
@@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 {
 	flush_tlb_page(vma, vmaddr);
 }
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	BUILD_BUG();
+}
+
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
 	flush_tlb_mm(mm);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index c56a0aee8124b..dd39313242b4d 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -86,6 +86,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma,
 		radix__local_flush_tlb_page(vma, vmaddr);
 }
 
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	if (radix_enabled())
+		radix__local_flush_tlb_page_psize(mm, vmaddr, psize);
+}
+
 static inline void tlb_flush(struct mmu_gather *tlb)
 {
 	if (radix_enabled())
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h
index bdaf34ad41ea6..9a2cf83ea4f19 100644
--- a/arch/powerpc/include/asm/nohash/tlbflush.h
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon
 	asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
 }
 
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+					      unsigned long vmaddr, int psize)
+{
+	asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
 	start &= PAGE_MASK;
@@ -58,6 +64,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
 extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr);
+void local_flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize);
 
 extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
 				   int tsize, int ind);
diff --git a/arch/powerpc/mm/nohash/tlb.c b/arch/powerpc/mm/nohash/tlb.c
index 2c15c86c70157..a903b308acc54 100644
--- a/arch/powerpc/mm/nohash/tlb.c
+++ b/arch/powerpc/mm/nohash/tlb.c
@@ -184,6 +184,14 @@ void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr)
 			       mmu_get_tsize(mmu_virtual_psize), 0);
 }
 EXPORT_SYMBOL(local_flush_tlb_page);
+
+void local_flush_tlb_page_psize(struct mm_struct *mm,
+				unsigned long vmaddr, int psize)
+{
+	__local_flush_tlb_page(mm, vmaddr, mmu_get_tsize(psize), 0);
+}
+EXPORT_SYMBOL(local_flush_tlb_page_psize);
+
 #endif
 
 /*
-- 
GitLab


From 33379c054211a5144ffae84e9e3c80e2e62416a9 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 Nov 2022 12:02:11 +0100
Subject: [PATCH 2918/4122] Revert "xhci: Convert to use list_count()"

This reverts commit b47ec9727f47d1dce4e8cbc9aef01c80b2332535 as it
breaks the build.

Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au
Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com
Cc: Mathias Nyman <mathias.nyman@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 817c31e3b0c8b..ad81e9a508b14 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2532,6 +2532,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	union xhci_trb *ep_trb;
 	int status = -EINPROGRESS;
 	struct xhci_ep_ctx *ep_ctx;
+	struct list_head *tmp;
 	u32 trb_comp_code;
 	int td_num = 0;
 	bool handling_skipped_tds = false;
@@ -2579,8 +2580,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 	}
 
 	/* Count current td numbers if ep->skip is set */
-	if (ep->skip)
-		td_num += list_count(&ep_ring->td_list);
+	if (ep->skip) {
+		list_for_each(tmp, &ep_ring->td_list)
+			td_num++;
+	}
 
 	/* Look for common error cases */
 	switch (trb_comp_code) {
-- 
GitLab


From acebf61919199771b5d7b92c68c5b515dfcbf800 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 Nov 2022 12:02:12 +0100
Subject: [PATCH 2919/4122] Revert "usb: gadget: udc: bcm63xx: Convert to use
 list_count()"

This reverts commit c2d9d02f7bf3c641f9b8e6c9f5de1e564cdeca69 as it
breaks the build.

Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au
Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/bcm63xx_udc.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
index 0762e49e85f85..2cdb07905bdeb 100644
--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
+++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
@@ -2172,6 +2172,7 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p)
 
 	for (ch_idx = 0; ch_idx < BCM63XX_NUM_IUDMA; ch_idx++) {
 		struct iudma_ch *iudma = &udc->iudma[ch_idx];
+		struct list_head *pos;
 
 		seq_printf(s, "IUDMA channel %d -- ", ch_idx);
 		switch (iudma_defaults[ch_idx].ep_type) {
@@ -2204,10 +2205,14 @@ static int bcm63xx_iudma_dbg_show(struct seq_file *s, void *p)
 		seq_printf(s, "  desc: %d/%d used", iudma->n_bds_used,
 			   iudma->n_bds);
 
-		if (iudma->bep)
-			seq_printf(s, "; %zu queued\n", list_count(&iudma->bep->queue));
-		else
+		if (iudma->bep) {
+			i = 0;
+			list_for_each(pos, &iudma->bep->queue)
+				i++;
+			seq_printf(s, "; %d queued\n", i);
+		} else {
 			seq_printf(s, "\n");
+		}
 
 		for (i = 0; i < iudma->n_bds; i++) {
 			struct bcm_enet_desc *d = &iudma->bd_ring[i];
-- 
GitLab


From 54aa8af53905e39a825773883914810033f4d3d3 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 Nov 2022 12:02:13 +0100
Subject: [PATCH 2920/4122] Revert "usb: gadget: hid: Convert to use
 list_count()"

This reverts commit 33f00f41d963c86176dba2f9faff9b428a542e60 as it
breaks the build.

Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au
Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/legacy/hid.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/legacy/hid.c b/drivers/usb/gadget/legacy/hid.c
index 6196c3456e0be..1187ee4f316ab 100644
--- a/drivers/usb/gadget/legacy/hid.c
+++ b/drivers/usb/gadget/legacy/hid.c
@@ -133,11 +133,14 @@ static struct usb_configuration config_driver = {
 static int hid_bind(struct usb_composite_dev *cdev)
 {
 	struct usb_gadget *gadget = cdev->gadget;
+	struct list_head *tmp;
 	struct hidg_func_node *n = NULL, *m, *iter_n;
 	struct f_hid_opts *hid_opts;
-	int status, funcs;
+	int status, funcs = 0;
+
+	list_for_each(tmp, &hidg_func_list)
+		funcs++;
 
-	funcs = list_count(&hidg_func_list);
 	if (!funcs)
 		return -ENODEV;
 
-- 
GitLab


From 51daa42d6b86efa366320b99e7bbe29a490ed348 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 30 Nov 2022 12:02:13 +0100
Subject: [PATCH 2921/4122] Revert "i915: Move list_count() to list.h for
 broader use"

This reverts commit a9efc04cfd05690e91279f41c2325c46335c43ef as it
breaks the build.

Link: https://lore.kernel.org/r/20221130131854.35b58b16@canb.auug.org.au
Link: https://lore.kernel.org/r/202211301628.iwMjPVMp-lkp@intel.com
Cc: Lucas De Marchi <lucas.demarchi@intel.com>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/gpu/drm/i915/gt/intel_engine_cs.c | 13 ++++++++++++-
 include/linux/list.h                      | 15 ---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 47734c4ebfa01..1f7188129cd1f 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -2004,6 +2004,17 @@ static void print_request_ring(struct drm_printer *m, struct i915_request *rq)
 	}
 }
 
+static unsigned long list_count(struct list_head *list)
+{
+	struct list_head *pos;
+	unsigned long count = 0;
+
+	list_for_each(pos, list)
+		count++;
+
+	return count;
+}
+
 static unsigned long read_ul(void *p, size_t x)
 {
 	return *(unsigned long *)(p + x);
@@ -2178,7 +2189,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
 	spin_lock_irqsave(&engine->sched_engine->lock, flags);
 	engine_dump_active_requests(engine, m);
 
-	drm_printf(m, "\tOn hold?: %zu\n",
+	drm_printf(m, "\tOn hold?: %lu\n",
 		   list_count(&engine->sched_engine->hold));
 	spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
 
diff --git a/include/linux/list.h b/include/linux/list.h
index 632a298c7018c..61762054b4be9 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -655,21 +655,6 @@ static inline void list_splice_tail_init(struct list_head *list,
 	     !list_is_head(pos, (head)); \
 	     pos = n, n = pos->prev)
 
-/**
- * list_count - count nodes in the list
- * @head:	the head for your list.
- */
-static inline size_t list_count(struct list_head *head)
-{
-	struct list_head *pos;
-	size_t count = 0;
-
-	list_for_each(pos, head)
-		count++;
-
-	return count;
-}
-
 /**
  * list_entry_is_head - test if the entry points to the head of the list
  * @pos:	the type * to cursor
-- 
GitLab


From 2a25e66d676dfb9b018abd503deed3d38a892dec Mon Sep 17 00:00:00 2001
From: Longfang Liu <liulongfang@huawei.com>
Date: Wed, 30 Nov 2022 11:19:39 +0200
Subject: [PATCH 2922/4122] xhci: print warning when HCE was set

When HCE(Host Controller Error) is set, it means that the xhci hardware
controller has an error at this time, but the current xhci driver
software does not log this event.

By adding an HCE event detection in the xhci interrupt processing
interface, a warning log is output to the system, which is convenient
for system device status tracking.

Signed-off-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-2-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index ad81e9a508b14..f6af479188e8f 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -3031,6 +3031,11 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd)
 	if (!(status & STS_EINT))
 		goto out;
 
+	if (status & STS_HCE) {
+		xhci_warn(xhci, "WARNING: Host Controller Error\n");
+		goto out;
+	}
+
 	if (status & STS_FATAL) {
 		xhci_warn(xhci, "WARNING: Host System Error\n");
 		xhci_halt(xhci);
-- 
GitLab


From fed70b61ef2c0aed54456db3d485b215f6cc3209 Mon Sep 17 00:00:00 2001
From: Reka Norman <rekanorman@chromium.org>
Date: Wed, 30 Nov 2022 11:19:40 +0200
Subject: [PATCH 2923/4122] xhci: Apply XHCI_RESET_TO_DEFAULT quirk to ADL-N

ADL-N systems have the same issue as ADL-P, where a large boot firmware
delay is seen if USB ports are left in U3 at shutdown. So apply the
XHCI_RESET_TO_DEFAULT quirk to ADL-N as well.

This patch depends on commit 34cd2db408d5 ("xhci: Add quirk to reset
host back to default state at shutdown").

The issue it fixes is a ~20s boot time delay when booting from S5. It
affects ADL-N devices, and ADL-N support was added starting from v5.16.

Cc: stable@vger.kernel.org
Signed-off-by: Reka Norman <rekanorman@chromium.org>
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-3-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-pci.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index a29b681b562e4..1fb773ed37279 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -59,6 +59,7 @@
 #define PCI_DEVICE_ID_INTEL_TIGER_LAKE_XHCI		0x9a13
 #define PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI		0x1138
 #define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI		0x51ed
+#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI	0x54ed
 
 #define PCI_DEVICE_ID_AMD_RENOIR_XHCI			0x1639
 #define PCI_DEVICE_ID_AMD_PROMONTORYA_4			0x43b9
@@ -246,7 +247,8 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
 		xhci->quirks |= XHCI_MISSING_CAS;
 
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-	    pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI)
+	    (pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI ||
+	     pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_PCH_XHCI))
 		xhci->quirks |= XHCI_RESET_TO_DEFAULT;
 
 	if (pdev->vendor == PCI_VENDOR_ID_INTEL &&
-- 
GitLab


From 705c333a7ad2003ad99d96c19a31619b19ad14b9 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 30 Nov 2022 11:19:41 +0200
Subject: [PATCH 2924/4122] xhci: export two xhci_hub functions for xhci-pci
 module usage

some Intel Alder Lake xHC hosts on ChromeOS platforms need special
workarounds touching port registers at xHC pci host hibernate.

Export xhci_port_state_to_neutral() and xhci_find_slot_id_by_port()
so they can be called from xhci-pci.c and thus the xhci-pci module.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-4-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-hub.c | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 4619d5e89d5be..94c94db3faf6d 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -426,24 +426,37 @@ static unsigned int xhci_port_speed(unsigned int port_status)
  */
 #define	XHCI_PORT_RZ	((1<<2) | (1<<24) | (0xf<<28))
 
-/*
+/**
+ * xhci_port_state_to_neutral() - Clean up read portsc value back into writeable
+ * @state: u32 port value read from portsc register to be cleanup up
+ *
  * Given a port state, this function returns a value that would result in the
  * port being in the same state, if the value was written to the port status
  * control register.
  * Save Read Only (RO) bits and save read/write bits where
  * writing a 0 clears the bit and writing a 1 sets the bit (RWS).
  * For all other types (RW1S, RW1CS, RW, and RZ), writing a '0' has no effect.
+ *
+ * Return: u32 value that can be written back to portsc register without
+ * changing port state.
  */
+
 u32 xhci_port_state_to_neutral(u32 state)
 {
 	/* Save read-only status and port state */
 	return (state & XHCI_PORT_RO) | (state & XHCI_PORT_RWS);
 }
+EXPORT_SYMBOL_GPL(xhci_port_state_to_neutral);
 
-/*
- * find slot id based on port number.
- * @port: The one-based port number from one of the two split roothubs.
+/**
+ * xhci_find_slot_id_by_port() - Find slot id of a usb device on a roothub port
+ * @hcd: pointer to hcd of the roothub
+ * @xhci: pointer to xhci structure
+ * @port: one-based port number of the port in this roothub.
+ *
+ * Return: Slot id of the usb device connected to the root port, 0 if not found
  */
+
 int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 		u16 port)
 {
@@ -465,6 +478,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci,
 
 	return slot_id;
 }
+EXPORT_SYMBOL_GPL(xhci_find_slot_id_by_port);
 
 /*
  * Stop device
-- 
GitLab


From c3bbacd61baace2f4fbab17012c3d149df2d50f1 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 30 Nov 2022 11:19:42 +0200
Subject: [PATCH 2925/4122] xhci: disable U3 suspended ports in S4 hibernate
 poweroff_late stage

Disable U3 suspended ports in hibernate S4 poweroff_late for systems
with XHCI_RESET_TO_DEFAULT quirk, if wakeup is not enabled.

This reduces the number of self-powered usb devices from surviving in
U3 suspended state into next reboot.

Bootloader/firmware on these systems can't handle usb ports in U3, and
will timeout, causing extra delay during reboot/restore from S4.

Add pci_poweroff_late() callback to struct usb_hcd to get this done at
the correct stage in hibernate.

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-5-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd-pci.c  | 13 ++++++++++
 drivers/usb/host/xhci-pci.c | 52 +++++++++++++++++++++++++++++++++++++
 include/linux/usb/hcd.h     |  3 +++
 3 files changed, 68 insertions(+)

diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 9b77f49b3560c..ab2f3737764e4 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -558,6 +558,17 @@ static int hcd_pci_suspend_noirq(struct device *dev)
 	return retval;
 }
 
+static int hcd_pci_poweroff_late(struct device *dev)
+{
+	struct pci_dev		*pci_dev = to_pci_dev(dev);
+	struct usb_hcd		*hcd = pci_get_drvdata(pci_dev);
+
+	if (hcd->driver->pci_poweroff_late && !HCD_DEAD(hcd))
+		return hcd->driver->pci_poweroff_late(hcd, device_may_wakeup(dev));
+
+	return 0;
+}
+
 static int hcd_pci_resume_noirq(struct device *dev)
 {
 	powermac_set_asic(to_pci_dev(dev), 1);
@@ -578,6 +589,7 @@ static int hcd_pci_restore(struct device *dev)
 
 #define hcd_pci_suspend		NULL
 #define hcd_pci_suspend_noirq	NULL
+#define hcd_pci_poweroff_late	NULL
 #define hcd_pci_resume_noirq	NULL
 #define hcd_pci_resume		NULL
 #define hcd_pci_restore		NULL
@@ -615,6 +627,7 @@ const struct dev_pm_ops usb_hcd_pci_pm_ops = {
 	.thaw_noirq	= NULL,
 	.thaw		= hcd_pci_resume,
 	.poweroff	= hcd_pci_suspend,
+	.poweroff_late	= hcd_pci_poweroff_late,
 	.poweroff_noirq	= hcd_pci_suspend_noirq,
 	.restore_noirq	= hcd_pci_resume_noirq,
 	.restore	= hcd_pci_restore,
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 1fb773ed37279..79d679b3e0760 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -622,6 +622,57 @@ static int xhci_pci_resume(struct usb_hcd *hcd, bool hibernated)
 	return retval;
 }
 
+static int xhci_pci_poweroff_late(struct usb_hcd *hcd, bool do_wakeup)
+{
+	struct xhci_hcd		*xhci = hcd_to_xhci(hcd);
+	struct xhci_port	*port;
+	struct usb_device	*udev;
+	unsigned int		slot_id;
+	u32			portsc;
+	int			i;
+
+	/*
+	 * Systems with XHCI_RESET_TO_DEFAULT quirk have boot firmware that
+	 * cause significant boot delay if usb ports are in suspended U3 state
+	 * during boot. Some USB devices survive in U3 state over S4 hibernate
+	 *
+	 * Disable ports that are in U3 if remote wake is not enabled for either
+	 * host controller or connected device
+	 */
+
+	if (!(xhci->quirks & XHCI_RESET_TO_DEFAULT))
+		return 0;
+
+	for (i = 0; i < HCS_MAX_PORTS(xhci->hcs_params1); i++) {
+		port = &xhci->hw_ports[i];
+		portsc = readl(port->addr);
+
+		if ((portsc & PORT_PLS_MASK) != XDEV_U3)
+			continue;
+
+		slot_id = xhci_find_slot_id_by_port(port->rhub->hcd, xhci,
+						    port->hcd_portnum + 1);
+		if (!slot_id || !xhci->devs[slot_id]) {
+			xhci_err(xhci, "No dev for slot_id %d for port %d-%d in U3\n",
+				 slot_id, port->rhub->hcd->self.busnum, port->hcd_portnum + 1);
+			continue;
+		}
+
+		udev = xhci->devs[slot_id]->udev;
+
+		/* if wakeup is enabled then don't disable the port */
+		if (udev->do_remote_wakeup && do_wakeup)
+			continue;
+
+		xhci_dbg(xhci, "port %d-%d in U3 without wakeup, disable it\n",
+			 port->rhub->hcd->self.busnum, port->hcd_portnum + 1);
+		portsc = xhci_port_state_to_neutral(portsc);
+		writel(portsc | PORT_PE, port->addr);
+	}
+
+	return 0;
+}
+
 static void xhci_pci_shutdown(struct usb_hcd *hcd)
 {
 	struct xhci_hcd		*xhci = hcd_to_xhci(hcd);
@@ -689,6 +740,7 @@ static int __init xhci_pci_init(void)
 #ifdef CONFIG_PM
 	xhci_pci_hc_driver.pci_suspend = xhci_pci_suspend;
 	xhci_pci_hc_driver.pci_resume = xhci_pci_resume;
+	xhci_pci_hc_driver.pci_poweroff_late = xhci_pci_poweroff_late;
 	xhci_pci_hc_driver.shutdown = xhci_pci_shutdown;
 #endif
 	return pci_register_driver(&xhci_pci_driver);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 78cd566ee2380..b51c07111729b 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -269,6 +269,9 @@ struct hc_driver {
 	/* called after entering D0 (etc), before resuming the hub */
 	int	(*pci_resume)(struct usb_hcd *hcd, bool hibernated);
 
+	/* called just before hibernate final D3 state, allows host to poweroff parts */
+	int	(*pci_poweroff_late)(struct usb_hcd *hcd, bool do_wakeup);
+
 	/* cleanly make HCD stop writing memory and doing I/O */
 	void	(*stop) (struct usb_hcd *hcd);
 
-- 
GitLab


From a1575120972ecd7baa6af6a69e4e7ea9213bde7c Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 30 Nov 2022 11:19:43 +0200
Subject: [PATCH 2926/4122] xhci: Prevent infinite loop in transaction errors
 recovery for streams

Make sure to also limit the amount of soft reset retries for transaction
errors on streams in cases where the transaction error event doesn't point
to any specific TRB.

In these cases we don't know the TRB or stream ring, but we do know which
endpoint had the error.

To keep error counting simple and functional, move the current err_count
from ring structure to endpoint structure.

Cc: stable@vger.kernel.org
Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-6-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 14 ++++++++++----
 drivers/usb/host/xhci.h      |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index f6af479188e8f..039ec9734fcdd 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -2458,7 +2458,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 
 	switch (trb_comp_code) {
 	case COMP_SUCCESS:
-		ep_ring->err_count = 0;
+		ep->err_count = 0;
 		/* handle success with untransferred data as short packet */
 		if (ep_trb != td->last_trb || remaining) {
 			xhci_warn(xhci, "WARN Successful completion on short TX\n");
@@ -2484,7 +2484,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 		break;
 	case COMP_USB_TRANSACTION_ERROR:
 		if (xhci->quirks & XHCI_NO_SOFT_RETRY ||
-		    (ep_ring->err_count++ > MAX_SOFT_RETRY) ||
+		    (ep->err_count++ > MAX_SOFT_RETRY) ||
 		    le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
 			break;
 
@@ -2565,8 +2565,14 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 		case COMP_USB_TRANSACTION_ERROR:
 		case COMP_INVALID_STREAM_TYPE_ERROR:
 		case COMP_INVALID_STREAM_ID_ERROR:
-			xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
-						    EP_SOFT_RESET);
+			xhci_dbg(xhci, "Stream transaction error ep %u no id\n",
+				 ep_index);
+			if (ep->err_count++ > MAX_SOFT_RETRY)
+				xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+							    EP_HARD_RESET);
+			else
+				xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+							    EP_SOFT_RESET);
 			goto cleanup;
 		case COMP_RING_UNDERRUN:
 		case COMP_RING_OVERRUN:
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index cc084d9505cdf..c9f06c5e4e9d2 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -933,6 +933,7 @@ struct xhci_virt_ep {
 	 * have to restore the device state to the previous state
 	 */
 	struct xhci_ring		*new_ring;
+	unsigned int			err_count;
 	unsigned int			ep_state;
 #define SET_DEQ_PENDING		(1 << 0)
 #define EP_HALTED		(1 << 1)	/* For stall handling */
@@ -1627,7 +1628,6 @@ struct xhci_ring {
 	 * if we own the TRB (if we are the consumer).  See section 4.9.1.
 	 */
 	u32			cycle_state;
-	unsigned int            err_count;
 	unsigned int		stream_id;
 	unsigned int		num_segs;
 	unsigned int		num_trbs_free;
-- 
GitLab


From 7428a253315cefa34e6092a0119c56cb3a1c0c12 Mon Sep 17 00:00:00 2001
From: Mathias Nyman <mathias.nyman@linux.intel.com>
Date: Wed, 30 Nov 2022 11:19:44 +0200
Subject: [PATCH 2927/4122] xhci: remove unused stream_id parameter from
 xhci_handle_halted_endpoint()

The stream_id parameter is no longer used when handling halted endpoints.
Remove it

Signed-off-by: Mathias Nyman <mathias.nyman@linux.intel.com>
Link: https://lore.kernel.org/r/20221130091944.2171610-7-mathias.nyman@linux.intel.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/xhci-ring.c | 27 ++++++++++-----------------
 1 file changed, 10 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 039ec9734fcdd..ddc30037f9cef 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -896,7 +896,7 @@ done:
 }
 
 static int xhci_handle_halted_endpoint(struct xhci_hcd *xhci,
-				struct xhci_virt_ep *ep, unsigned int stream_id,
+				struct xhci_virt_ep *ep,
 				struct xhci_td *td,
 				enum xhci_ep_reset_type reset_type)
 {
@@ -1110,8 +1110,7 @@ static void xhci_handle_cmd_stop_ep(struct xhci_hcd *xhci, int slot_id,
 					td->status = -EPROTO;
 			}
 			/* reset ep, reset handler cleans up cancelled tds */
-			err = xhci_handle_halted_endpoint(xhci, ep, 0, td,
-							  reset_type);
+			err = xhci_handle_halted_endpoint(xhci, ep, td, reset_type);
 			if (err)
 				break;
 			ep->ep_state &= ~EP_STOP_CMD_PENDING;
@@ -2183,8 +2182,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 		}
 		/* Almost same procedure as for STALL_ERROR below */
 		xhci_clear_hub_tt_buffer(xhci, td, ep);
-		xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
-					    EP_HARD_RESET);
+		xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET);
 		return 0;
 	case COMP_STALL_ERROR:
 		/*
@@ -2200,8 +2198,7 @@ static int finish_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 		if (ep->ep_index != 0)
 			xhci_clear_hub_tt_buffer(xhci, td, ep);
 
-		xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
-					    EP_HARD_RESET);
+		xhci_handle_halted_endpoint(xhci, ep, td, EP_HARD_RESET);
 
 		return 0; /* xhci_handle_halted_endpoint marked td cancelled */
 	default:
@@ -2490,8 +2487,7 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_virt_ep *ep,
 
 		td->status = 0;
 
-		xhci_handle_halted_endpoint(xhci, ep, ep_ring->stream_id, td,
-					    EP_SOFT_RESET);
+		xhci_handle_halted_endpoint(xhci, ep, td, EP_SOFT_RESET);
 		return 0;
 	default:
 		/* do nothing */
@@ -2568,10 +2564,10 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			xhci_dbg(xhci, "Stream transaction error ep %u no id\n",
 				 ep_index);
 			if (ep->err_count++ > MAX_SOFT_RETRY)
-				xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+				xhci_handle_halted_endpoint(xhci, ep, NULL,
 							    EP_HARD_RESET);
 			else
-				xhci_handle_halted_endpoint(xhci, ep, 0, NULL,
+				xhci_handle_halted_endpoint(xhci, ep, NULL,
 							    EP_SOFT_RESET);
 			goto cleanup;
 		case COMP_RING_UNDERRUN:
@@ -2755,9 +2751,7 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			if (trb_comp_code == COMP_STALL_ERROR ||
 			    xhci_requires_manual_halt_cleanup(xhci, ep_ctx,
 							      trb_comp_code)) {
-				xhci_handle_halted_endpoint(xhci, ep,
-							    ep_ring->stream_id,
-							    NULL,
+				xhci_handle_halted_endpoint(xhci, ep, NULL,
 							    EP_HARD_RESET);
 			}
 			goto cleanup;
@@ -2850,9 +2844,8 @@ static int handle_tx_event(struct xhci_hcd *xhci,
 			if (trb_comp_code == COMP_STALL_ERROR ||
 			    xhci_requires_manual_halt_cleanup(xhci, ep_ctx,
 							      trb_comp_code))
-				xhci_handle_halted_endpoint(xhci, ep,
-							    ep_ring->stream_id,
-							    td, EP_HARD_RESET);
+				xhci_handle_halted_endpoint(xhci, ep, td,
+							    EP_HARD_RESET);
 			goto cleanup;
 		}
 
-- 
GitLab


From a08ca6ebafe615c9028c53fc4c9e6c9b2b1f2888 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 29 Nov 2022 15:17:49 +0100
Subject: [PATCH 2928/4122] USB: serial: f81232: fix division by zero on
 line-speed change

The driver leaves the line speed unchanged in case a requested speed is
not supported. Make sure to handle the case where the current speed is
B0 (hangup) without dividing by zero when determining the clock source.

Fixes: 268ddb5e9b62 ("USB: serial: f81232: add high baud rate support")
Cc: stable@vger.kernel.org      # 5.2
Cc: Ji-Ze Hong (Peter Hong) <hpeter@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/f81232.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/f81232.c b/drivers/usb/serial/f81232.c
index 2dd58cd9f0ccb..891fb1fe69df7 100644
--- a/drivers/usb/serial/f81232.c
+++ b/drivers/usb/serial/f81232.c
@@ -130,9 +130,6 @@ static u8 const clock_table[] = { F81232_CLK_1_846_MHZ, F81232_CLK_14_77_MHZ,
 
 static int calc_baud_divisor(speed_t baudrate, speed_t clockrate)
 {
-	if (!baudrate)
-		return 0;
-
 	return DIV_ROUND_CLOSEST(clockrate, baudrate);
 }
 
@@ -498,9 +495,14 @@ static void f81232_set_baudrate(struct tty_struct *tty,
 	speed_t baud_list[] = { baudrate, old_baudrate, F81232_DEF_BAUDRATE };
 
 	for (i = 0; i < ARRAY_SIZE(baud_list); ++i) {
-		idx = f81232_find_clk(baud_list[i]);
+		baudrate = baud_list[i];
+		if (baudrate == 0) {
+			tty_encode_baud_rate(tty, 0, 0);
+			return;
+		}
+
+		idx = f81232_find_clk(baudrate);
 		if (idx >= 0) {
-			baudrate = baud_list[i];
 			tty_encode_baud_rate(tty, baudrate, baudrate);
 			break;
 		}
-- 
GitLab


From 188c9c2e0c7f4ae864113f80c40bafb394062271 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 29 Nov 2022 15:18:19 +0100
Subject: [PATCH 2929/4122] USB: serial: f81534: fix division by zero on
 line-speed change

The driver leaves the line speed unchanged in case a requested speed is
not supported. Make sure to handle the case where the current speed is
B0 (hangup) without dividing by zero when determining the clock source.

Fixes: 3aacac02f385 ("USB: serial: f81534: add high baud rate support")
Cc: stable@vger.kernel.org      # 4.16
Cc: Ji-Ze Hong (Peter Hong) <hpeter@gmail.com>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/f81534.c | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/drivers/usb/serial/f81534.c b/drivers/usb/serial/f81534.c
index ddfcd72eb0ae7..4083ae961be43 100644
--- a/drivers/usb/serial/f81534.c
+++ b/drivers/usb/serial/f81534.c
@@ -536,9 +536,6 @@ static int f81534_submit_writer(struct usb_serial_port *port, gfp_t mem_flags)
 
 static u32 f81534_calc_baud_divisor(u32 baudrate, u32 clockrate)
 {
-	if (!baudrate)
-		return 0;
-
 	/* Round to nearest divisor */
 	return DIV_ROUND_CLOSEST(clockrate, baudrate);
 }
@@ -568,9 +565,14 @@ static int f81534_set_port_config(struct usb_serial_port *port,
 	u32 baud_list[] = {baudrate, old_baudrate, F81534_DEFAULT_BAUD_RATE};
 
 	for (i = 0; i < ARRAY_SIZE(baud_list); ++i) {
-		idx = f81534_find_clk(baud_list[i]);
+		baudrate = baud_list[i];
+		if (baudrate == 0) {
+			tty_encode_baud_rate(tty, 0, 0);
+			return 0;
+		}
+
+		idx = f81534_find_clk(baudrate);
 		if (idx >= 0) {
-			baudrate = baud_list[i];
 			tty_encode_baud_rate(tty, baudrate, baudrate);
 			break;
 		}
-- 
GitLab


From 63b8ed26cd093ecc1bcdd1fd841f238a52c11031 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 29 Nov 2022 15:18:57 +0100
Subject: [PATCH 2930/4122] USB: serial: xr: avoid requesting zero DTE rate

When the requested line speed is B0 (hangup) there is no need to use the
current speed in the line-coding request. This specifically avoids
requesting a zero DTE rate when the current speed is B0, which could
potentially confuse buggy firmware.

Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Johan Hovold <johan@kernel.org>
---
 drivers/usb/serial/xr_serial.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/serial/xr_serial.c b/drivers/usb/serial/xr_serial.c
index f3811e060a441..fdb0aae546c33 100644
--- a/drivers/usb/serial/xr_serial.c
+++ b/drivers/usb/serial/xr_serial.c
@@ -749,8 +749,6 @@ static void xr_cdc_set_line_coding(struct tty_struct *tty,
 
 	if (tty->termios.c_ospeed)
 		lc->dwDTERate = cpu_to_le32(tty->termios.c_ospeed);
-	else if (old_termios)
-		lc->dwDTERate = cpu_to_le32(old_termios->c_ospeed);
 	else
 		lc->dwDTERate = cpu_to_le32(9600);
 
-- 
GitLab


From 69b14fde0b8dd159ba19d2b2083a0f2958a9130c Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 30 Nov 2022 12:13:00 +1100
Subject: [PATCH 2931/4122] driver core: fix up missed scsi/cxlflash
 class.devnode() conversion.

Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *")
Link: https://lore.kernel.org/r/20221130123851.6a9f2242@canb.auug.org.au
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/scsi/cxlflash/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index cd1324ec742de..395b00b942f70 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -3857,7 +3857,7 @@ static void cxlflash_pci_resume(struct pci_dev *pdev)
  *
  * Return: Allocated string describing the devtmpfs structure.
  */
-static char *cxlflash_devnode(struct device *dev, umode_t *mode)
+static char *cxlflash_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "cxlflash/%s", dev_name(dev));
 }
-- 
GitLab


From 50dc8d18f62d58a2330f08fddc069f263d191c90 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Wed, 30 Nov 2022 12:32:57 +1100
Subject: [PATCH 2932/4122] driver core: fix up missed
 drivers/s390/char/hmcdrv_dev.c class.devnode() conversion.

Fixes: ff62b8e6588f ("driver core: make struct class.devnode() take a const *")
Link: https://lore.kernel.org/r/20221130123851.6a9f2242@canb.auug.org.au
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/s390/char/hmcdrv_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/s390/char/hmcdrv_dev.c b/drivers/s390/char/hmcdrv_dev.c
index 20e9cd542e033..cb8fdf057ecaf 100644
--- a/drivers/s390/char/hmcdrv_dev.c
+++ b/drivers/s390/char/hmcdrv_dev.c
@@ -90,7 +90,7 @@ static dev_t hmcdrv_dev_no; /* device number (major/minor) */
  *
  * Return: recommended device file name in /dev
  */
-static char *hmcdrv_dev_name(struct device *dev, umode_t *mode)
+static char *hmcdrv_dev_name(const struct device *dev, umode_t *mode)
 {
 	char *nodename = NULL;
 	const char *devname = dev_name(dev); /* kernel device name */
-- 
GitLab


From 5ec3289b31ab9bb209be59cee360aac4b03f320a Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Fri, 18 Nov 2022 14:32:38 +0000
Subject: [PATCH 2933/4122] KVM: x86/xen: Compatibility fixes for shared
 runstate area
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The guest runstate area can be arbitrarily byte-aligned. In fact, even
when a sane 32-bit guest aligns the overall structure nicely, the 64-bit
fields in the structure end up being unaligned due to the fact that the
32-bit ABI only aligns them to 32 bits.

So setting the ->state_entry_time field to something|XEN_RUNSTATE_UPDATE
is buggy, because if it's unaligned then we can't update the whole field
atomically; the low bytes might be observable before the _UPDATE bit is.
Xen actually updates the *byte* containing that top bit, on its own. KVM
should do the same.

In addition, we cannot assume that the runstate area fits within a single
page. One option might be to make the gfn_to_pfn cache cope with regions
that cross a page — but getting a contiguous virtual kernel mapping of a
discontiguous set of IOMEM pages is a distinctly non-trivial exercise,
and it seems this is the *only* current use case for the GPC which would
benefit from it.

An earlier version of the runstate code did use a gfn_to_hva cache for
this purpose, but it still had the single-page restriction because it
used the uhva directly — because it needs to be able to do so atomically
when the vCPU is being scheduled out, so it used pagefault_disable()
around the accesses and didn't just use kvm_write_guest_cached() which
has a fallback path.

So... use a pair of GPCs for the first and potential second page covering
the runstate area. We can get away with locking both at once because
nothing else takes more than one GPC lock at a time so we can invent
a trivial ordering rule.

The common case where it's all in the same page is kept as a fast path,
but in both cases, the actual guest structure (compat or not) is built
up from the fields in @vx, following preset pointers to the state and
times fields. The only difference is whether those pointers point to
the kernel stack (in the split case) or to guest memory directly via
the GPC.  The fast path is also fixed to use a byte access for the
XEN_RUNSTATE_UPDATE bit, then the only real difference is the dual
memcpy.

Finally, Xen also does write the runstate area immediately when it's
configured. Flip the kvm_xen_update_runstate() and …_guest() functions
and call the latter directly when the runstate area is set. This means
that other ioctls which modify the runstate also write it immediately
to the guest when they do so, which is also intended.

Update the xen_shinfo_test to exercise the pathological case where the
XEN_RUNSTATE_UPDATE flag in the top byte of the state_entry_time is
actually in a different page to the rest of the 64-bit word.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h               |   1 +
 arch/x86/kvm/xen.c                            | 370 +++++++++++++-----
 arch/x86/kvm/xen.h                            |   6 +-
 .../selftests/kvm/x86_64/xen_shinfo_test.c    |  12 +-
 4 files changed, 276 insertions(+), 113 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d1013c4f673ca..70af7240a1d5a 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -686,6 +686,7 @@ struct kvm_vcpu_xen {
 	struct gfn_to_pfn_cache vcpu_info_cache;
 	struct gfn_to_pfn_cache vcpu_time_info_cache;
 	struct gfn_to_pfn_cache runstate_cache;
+	struct gfn_to_pfn_cache runstate2_cache;
 	u64 last_steal;
 	u64 runstate_entry_time;
 	u64 runstate_times[4];
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 4b8e9628fbf57..cfc1c07bc78f2 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -170,112 +170,44 @@ static void kvm_xen_init_timer(struct kvm_vcpu *vcpu)
 	vcpu->arch.xen.timer.function = xen_timer_callback;
 }
 
-static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 {
 	struct kvm_vcpu_xen *vx = &v->arch.xen;
-	u64 now = get_kvmclock_ns(v->kvm);
-	u64 delta_ns = now - vx->runstate_entry_time;
-	u64 run_delay = current->sched_info.run_delay;
-
-	if (unlikely(!vx->runstate_entry_time))
-		vx->current_runstate = RUNSTATE_offline;
-
-	/*
-	 * Time waiting for the scheduler isn't "stolen" if the
-	 * vCPU wasn't running anyway.
-	 */
-	if (vx->current_runstate == RUNSTATE_running) {
-		u64 steal_ns = run_delay - vx->last_steal;
-
-		delta_ns -= steal_ns;
-
-		vx->runstate_times[RUNSTATE_runnable] += steal_ns;
-	}
-	vx->last_steal = run_delay;
-
-	vx->runstate_times[vx->current_runstate] += delta_ns;
-	vx->current_runstate = state;
-	vx->runstate_entry_time = now;
-}
-
-void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
-{
-	struct kvm_vcpu_xen *vx = &v->arch.xen;
-	struct gfn_to_pfn_cache *gpc = &vx->runstate_cache;
-	uint64_t *user_times;
+	struct gfn_to_pfn_cache *gpc1 = &vx->runstate_cache;
+	struct gfn_to_pfn_cache *gpc2 = &vx->runstate2_cache;
+	size_t user_len, user_len1, user_len2;
+	struct vcpu_runstate_info rs;
 	unsigned long flags;
-	size_t user_len;
-	int *user_state;
-
-	kvm_xen_update_runstate(v, state);
-
-	if (!vx->runstate_cache.active)
-		return;
-
-	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
-		user_len = sizeof(struct vcpu_runstate_info);
-	else
-		user_len = sizeof(struct compat_vcpu_runstate_info);
-
-	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
-					   user_len)) {
-		read_unlock_irqrestore(&gpc->lock, flags);
-
-		/* When invoked from kvm_sched_out() we cannot sleep */
-		if (state == RUNSTATE_runnable)
-			return;
-
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa, user_len))
-			return;
-
-		read_lock_irqsave(&gpc->lock, flags);
-	}
+	size_t times_ofs;
+	uint8_t *update_bit;
+	uint64_t *rs_times;
+	int *rs_state;
 
 	/*
 	 * The only difference between 32-bit and 64-bit versions of the
-	 * runstate struct us the alignment of uint64_t in 32-bit, which
+	 * runstate struct is the alignment of uint64_t in 32-bit, which
 	 * means that the 64-bit version has an additional 4 bytes of
-	 * padding after the first field 'state'.
-	 *
-	 * So we use 'int __user *user_state' to point to the state field,
-	 * and 'uint64_t __user *user_times' for runstate_entry_time. So
-	 * the actual array of time[] in each state starts at user_times[1].
+	 * padding after the first field 'state'. Let's be really really
+	 * paranoid about that, and matching it with our internal data
+	 * structures that we memcpy into it...
 	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) != 0);
 	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state) != 0);
 	BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
 #ifdef CONFIG_X86_64
+	/*
+	 * The 64-bit structure has 4 bytes of padding before 'state_entry_time'
+	 * so each subsequent field is shifted by 4, and it's 4 bytes longer.
+	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
 		     offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
 		     offsetof(struct compat_vcpu_runstate_info, time) + 4);
+	BUILD_BUG_ON(sizeof(struct vcpu_runstate_info) != 0x2c + 4);
 #endif
-
-	user_state = gpc->khva;
-
-	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode)
-		user_times = gpc->khva + offsetof(struct vcpu_runstate_info,
-						  state_entry_time);
-	else
-		user_times = gpc->khva + offsetof(struct compat_vcpu_runstate_info,
-						  state_entry_time);
-
 	/*
-	 * First write the updated state_entry_time at the appropriate
-	 * location determined by 'offset'.
-	 */
-	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
-		     sizeof(user_times[0]));
-	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
-		     sizeof(user_times[0]));
-
-	user_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
-	smp_wmb();
-
-	/*
-	 * Next, write the new runstate. This is in the *same* place
-	 * for 32-bit and 64-bit guests, asserted here for paranoia.
+	 * The state field is in the same place at the start of both structs,
+	 * and is the same size (int) as vx->current_runstate.
 	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
 		     offsetof(struct compat_vcpu_runstate_info, state));
@@ -284,34 +216,228 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
 	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state) !=
 		     sizeof(vx->current_runstate));
 
-	*user_state = vx->current_runstate;
+	/*
+	 * The state_entry_time field is 64 bits in both versions, and the
+	 * XEN_RUNSTATE_UPDATE flag is in the top bit, which given that x86
+	 * is little-endian means that it's in the last *byte* of the word.
+	 * That detail is important later.
+	 */
+	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, state_entry_time) !=
+		     sizeof(uint64_t));
+	BUILD_BUG_ON(sizeof_field(struct compat_vcpu_runstate_info, state_entry_time) !=
+		     sizeof(uint64_t));
+	BUILD_BUG_ON((XEN_RUNSTATE_UPDATE >> 56) != 0x80);
 
 	/*
-	 * Write the actual runstate times immediately after the
-	 * runstate_entry_time.
+	 * The time array is four 64-bit quantities in both versions, matching
+	 * the vx->runstate_times and immediately following state_entry_time.
 	 */
 	BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
-		     offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+		     offsetof(struct vcpu_runstate_info, time) - sizeof(uint64_t));
 	BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
-		     offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+		     offsetof(struct compat_vcpu_runstate_info, time) - sizeof(uint64_t));
 	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
 		     sizeof_field(struct compat_vcpu_runstate_info, time));
 	BUILD_BUG_ON(sizeof_field(struct vcpu_runstate_info, time) !=
 		     sizeof(vx->runstate_times));
 
-	memcpy(user_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
+	if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
+		user_len = sizeof(struct vcpu_runstate_info);
+		times_ofs = offsetof(struct vcpu_runstate_info,
+				     state_entry_time);
+	} else {
+		user_len = sizeof(struct compat_vcpu_runstate_info);
+		times_ofs = offsetof(struct compat_vcpu_runstate_info,
+				     state_entry_time);
+	}
+
+	/*
+	 * There are basically no alignment constraints. The guest can set it
+	 * up so it crosses from one page to the next, and at arbitrary byte
+	 * alignment (and the 32-bit ABI doesn't align the 64-bit integers
+	 * anyway, even if the overall struct had been 64-bit aligned).
+	 */
+	if ((gpc1->gpa & ~PAGE_MASK) + user_len >= PAGE_SIZE) {
+		user_len1 = PAGE_SIZE - (gpc1->gpa & ~PAGE_MASK);
+		user_len2 = user_len - user_len1;
+	} else {
+		user_len1 = user_len;
+		user_len2 = 0;
+	}
+	BUG_ON(user_len1 + user_len2 != user_len);
+
+ retry:
+	/*
+	 * Attempt to obtain the GPC lock on *both* (if there are two)
+	 * gfn_to_pfn caches that cover the region.
+	 */
+	read_lock_irqsave(&gpc1->lock, flags);
+	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, user_len1)) {
+		read_unlock_irqrestore(&gpc1->lock, flags);
+
+		/* When invoked from kvm_sched_out() we cannot sleep */
+		if (atomic)
+			return;
+
+		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
+			return;
+
+		read_lock_irqsave(&gpc1->lock, flags);
+	}
+
+	if (likely(!user_len2)) {
+		/*
+		 * Set up three pointers directly to the runstate_info
+		 * struct in the guest (via the GPC).
+		 *
+		 *  • @rs_state   → state field
+		 *  • @rs_times   → state_entry_time field.
+		 *  • @update_bit → last byte of state_entry_time, which
+		 *                  contains the XEN_RUNSTATE_UPDATE bit.
+		 */
+		rs_state = gpc1->khva;
+		rs_times = gpc1->khva + times_ofs;
+		update_bit = ((void *)(&rs_times[1])) - 1;
+	} else {
+		/*
+		 * The guest's runstate_info is split across two pages and we
+		 * need to hold and validate both GPCs simultaneously. We can
+		 * declare a lock ordering GPC1 > GPC2 because nothing else
+		 * takes them more than one at a time.
+		 */
+		read_lock(&gpc2->lock);
+
+		if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
+			read_unlock(&gpc2->lock);
+			read_unlock_irqrestore(&gpc1->lock, flags);
+
+			/* When invoked from kvm_sched_out() we cannot sleep */
+			if (atomic)
+				return;
+
+			/*
+			 * Use kvm_gpc_activate() here because if the runstate
+			 * area was configured in 32-bit mode and only extends
+			 * to the second page now because the guest changed to
+			 * 64-bit mode, the second GPC won't have been set up.
+			 */
+			if (kvm_gpc_activate(v->kvm, gpc2, NULL, KVM_HOST_USES_PFN,
+					     gpc1->gpa + user_len1, user_len2))
+				return;
+
+			/*
+			 * We dropped the lock on GPC1 so we have to go all the
+			 * way back and revalidate that too.
+			 */
+			goto retry;
+		}
+
+		/*
+		 * In this case, the runstate_info struct will be assembled on
+		 * the kernel stack (compat or not as appropriate) and will
+		 * be copied to GPC1/GPC2 with a dual memcpy. Set up the three
+		 * rs pointers accordingly.
+		 */
+		rs_times = &rs.state_entry_time;
+
+		/*
+		 * The rs_state pointer points to the start of what we'll
+		 * copy to the guest, which in the case of a compat guest
+		 * is the 32-bit field that the compiler thinks is padding.
+		 */
+		rs_state = ((void *)rs_times) - times_ofs;
+
+		/*
+		 * The update_bit is still directly in the guest memory,
+		 * via one GPC or the other.
+		 */
+		if (user_len1 >= times_ofs + sizeof(uint64_t))
+			update_bit = gpc1->khva + times_ofs +
+				sizeof(uint64_t) - 1;
+		else
+			update_bit = gpc2->khva + times_ofs +
+				sizeof(uint64_t) - 1 - user_len1;
+
+#ifdef CONFIG_X86_64
+		/*
+		 * Don't leak kernel memory through the padding in the 64-bit
+		 * version of the struct.
+		 */
+		memset(&rs, 0, offsetof(struct vcpu_runstate_info, state_entry_time));
+#endif
+	}
+
+	/*
+	 * First, set the XEN_RUNSTATE_UPDATE bit in the top bit of the
+	 * state_entry_time field, directly in the guest. We need to set
+	 * that (and write-barrier) before writing to the rest of the
+	 * structure, and clear it last. Just as Xen does, we address the
+	 * single *byte* in which it resides because it might be in a
+	 * different cache line to the rest of the 64-bit word, due to
+	 * the (lack of) alignment constraints.
+	 */
+	*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
 	smp_wmb();
 
 	/*
-	 * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
-	 * runstate_entry_time field.
+	 * Now assemble the actual structure, either on our kernel stack
+	 * or directly in the guest according to how the rs_state and
+	 * rs_times pointers were set up above.
 	 */
-	user_times[0] &= ~XEN_RUNSTATE_UPDATE;
+	*rs_state = vx->current_runstate;
+	rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
+	memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
+
+	/* For the split case, we have to then copy it to the guest. */
+	if (user_len2) {
+		memcpy(gpc1->khva, rs_state, user_len1);
+		memcpy(gpc2->khva, ((void *)rs_state) + user_len1, user_len2);
+	}
 	smp_wmb();
 
-	read_unlock_irqrestore(&gpc->lock, flags);
+	/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
+	*update_bit = vx->runstate_entry_time >> 56;
+	smp_wmb();
 
-	mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+	if (user_len2)
+		read_unlock(&gpc2->lock);
+
+	read_unlock_irqrestore(&gpc1->lock, flags);
+
+	mark_page_dirty_in_slot(v->kvm, gpc1->memslot, gpc1->gpa >> PAGE_SHIFT);
+	if (user_len2)
+		mark_page_dirty_in_slot(v->kvm, gpc2->memslot, gpc2->gpa >> PAGE_SHIFT);
+}
+
+void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+	struct kvm_vcpu_xen *vx = &v->arch.xen;
+	u64 now = get_kvmclock_ns(v->kvm);
+	u64 delta_ns = now - vx->runstate_entry_time;
+	u64 run_delay = current->sched_info.run_delay;
+
+	if (unlikely(!vx->runstate_entry_time))
+		vx->current_runstate = RUNSTATE_offline;
+
+	/*
+	 * Time waiting for the scheduler isn't "stolen" if the
+	 * vCPU wasn't running anyway.
+	 */
+	if (vx->current_runstate == RUNSTATE_running) {
+		u64 steal_ns = run_delay - vx->last_steal;
+
+		delta_ns -= steal_ns;
+
+		vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+	}
+	vx->last_steal = run_delay;
+
+	vx->runstate_times[vx->current_runstate] += delta_ns;
+	vx->current_runstate = state;
+	vx->runstate_entry_time = now;
+
+	if (vx->runstate_cache.active)
+		kvm_xen_update_runstate_guest(v, state == RUNSTATE_runnable);
 }
 
 static void kvm_xen_inject_vcpu_vector(struct kvm_vcpu *v)
@@ -584,23 +710,57 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 		break;
 
-	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR: {
+		size_t sz, sz1, sz2;
+
 		if (!sched_info_on()) {
 			r = -EOPNOTSUPP;
 			break;
 		}
 		if (data->u.gpa == GPA_INVALID) {
+			r = 0;
+		deactivate_out:
 			kvm_gpc_deactivate(vcpu->kvm,
 					   &vcpu->arch.xen.runstate_cache);
-			r = 0;
+			kvm_gpc_deactivate(vcpu->kvm,
+					   &vcpu->arch.xen.runstate2_cache);
 			break;
 		}
 
+		/*
+		 * If the guest switches to 64-bit mode after setting the runstate
+		 * address, that's actually OK. kvm_xen_update_runstate_guest()
+		 * will cope.
+		 */
+		if (IS_ENABLED(CONFIG_64BIT) && vcpu->kvm->arch.xen.long_mode)
+			sz = sizeof(struct vcpu_runstate_info);
+		else
+			sz = sizeof(struct compat_vcpu_runstate_info);
+
+		/* How much fits in the (first) page? */
+		sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
 		r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
-				     NULL, KVM_HOST_USES_PFN, data->u.gpa,
-				     sizeof(struct vcpu_runstate_info));
-		break;
+				     NULL, KVM_HOST_USES_PFN, data->u.gpa, sz1);
+		if (r)
+			goto deactivate_out;
+
+		/* Either map the second page, or deactivate the second GPC */
+		if (sz1 >= sz) {
+			kvm_gpc_deactivate(vcpu->kvm,
+					   &vcpu->arch.xen.runstate2_cache);
+		} else {
+			sz2 = sz - sz1;
+			BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK);
+			r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache,
+					     NULL, KVM_HOST_USES_PFN,
+					     data->u.gpa + sz1, sz2);
+			if (r)
+				goto deactivate_out;
+		}
 
+		kvm_xen_update_runstate_guest(vcpu, false);
+		break;
+	}
 	case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
 		if (!sched_info_on()) {
 			r = -EOPNOTSUPP;
@@ -1834,6 +1994,7 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 
 	kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
+	kvm_gpc_init(&vcpu->arch.xen.runstate2_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
 	kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
 }
@@ -1844,6 +2005,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 		kvm_xen_stop_timer(vcpu);
 
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
+	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache);
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
 	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
 
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 532a535a9e99f..8503d2c6891e3 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -143,11 +143,11 @@ int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 #include <asm/xen/interface.h>
 #include <xen/interface/vcpu.h>
 
-void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
+void kvm_xen_update_runstate(struct kvm_vcpu *vcpu, int state);
 
 static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
 {
-	kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
+	kvm_xen_update_runstate(vcpu, RUNSTATE_running);
 }
 
 static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
@@ -162,7 +162,7 @@ static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
 	if (WARN_ON_ONCE(!vcpu->preempted))
 		return;
 
-	kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+	kvm_xen_update_runstate(vcpu, RUNSTATE_runnable);
 }
 
 /* 32-bit compatibility definitions, also used natively in 32-bit build */
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 2a5727188c8d3..7f39815f17728 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -26,17 +26,17 @@
 #define SHINFO_REGION_GPA	0xc0000000ULL
 #define SHINFO_REGION_SLOT	10
 
-#define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (2 * PAGE_SIZE))
+#define DUMMY_REGION_GPA	(SHINFO_REGION_GPA + (3 * PAGE_SIZE))
 #define DUMMY_REGION_SLOT	11
 
 #define SHINFO_ADDR	(SHINFO_REGION_GPA)
-#define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
-#define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
 #define VCPU_INFO_ADDR	(SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR	(SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
 
 #define SHINFO_VADDR	(SHINFO_REGION_GVA)
-#define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
 #define VCPU_INFO_VADDR	(SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR	(SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
 
 #define EVTCHN_VECTOR	0x10
 
@@ -449,8 +449,8 @@ int main(int argc, char *argv[])
 
 	/* Map a region for the shared_info page */
 	vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
-				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
-	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2);
+				    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+	virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
 
 	struct shared_info *shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
 
-- 
GitLab


From d8ba8ba4c801b794f47852a6f1821ea48f83b5d1 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sun, 27 Nov 2022 12:22:10 +0000
Subject: [PATCH 2934/4122] KVM: x86/xen: Allow XEN_RUNSTATE_UPDATE flag
 behaviour to be configured

Closer inspection of the Xen code shows that we aren't supposed to be
using the XEN_RUNSTATE_UPDATE flag unconditionally. It should be
explicitly enabled by guests through the HYPERVISOR_vm_assist hypercall.
If we randomly set the top bit of ->state_entry_time for a guest that
hasn't asked for it and doesn't expect it, that could make the runtimes
fail to add up and confuse the guest. Without the flag it's perfectly
safe for a vCPU to read its own vcpu_runstate_info; just not for one
vCPU to read *another's*.

I briefly pondered adding a word for the whole set of VMASST_TYPE_*
flags but the only one we care about for HVM guests is this, so it
seemed a bit pointless.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20221127122210.248427-3-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst                | 34 +++++++++--
 arch/x86/include/asm/kvm_host.h               |  1 +
 arch/x86/kvm/x86.c                            |  3 +-
 arch/x86/kvm/xen.c                            | 57 ++++++++++++++-----
 include/uapi/linux/kvm.h                      |  4 ++
 .../selftests/kvm/x86_64/xen_shinfo_test.c    | 14 +++++
 6 files changed, 93 insertions(+), 20 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 9175d41e80816..5617bc4f899f4 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -5339,6 +5339,7 @@ KVM_PV_ASYNC_CLEANUP_PERFORM
 	union {
 		__u8 long_mode;
 		__u8 vector;
+		__u8 runstate_update_flag;
 		struct {
 			__u64 gfn;
 		} shared_info;
@@ -5416,6 +5417,14 @@ KVM_XEN_ATTR_TYPE_XEN_VERSION
   event channel delivery, so responding within the kernel without
   exiting to userspace is beneficial.
 
+KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG
+  This attribute is available when the KVM_CAP_XEN_HVM ioctl indicates
+  support for KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG. It enables the
+  XEN_RUNSTATE_UPDATE flag which allows guest vCPUs to safely read
+  other vCPUs' vcpu_runstate_info. Xen guests enable this feature via
+  the VM_ASST_TYPE_runstate_update_flag of the HYPERVISOR_vm_assist
+  hypercall.
+
 4.127 KVM_XEN_HVM_GET_ATTR
 --------------------------
 
@@ -8059,12 +8068,13 @@ to userspace.
 This capability indicates the features that Xen supports for hosting Xen
 PVHVM guests. Valid flags are::
 
-  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR	(1 << 0)
-  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL	(1 << 1)
-  #define KVM_XEN_HVM_CONFIG_SHARED_INFO	(1 << 2)
-  #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
-  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
-  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND	(1 << 5)
+  #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR		(1 << 0)
+  #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL		(1 << 1)
+  #define KVM_XEN_HVM_CONFIG_SHARED_INFO		(1 << 2)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE			(1 << 3)
+  #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL		(1 << 4)
+  #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
 
 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
 ioctl is available, for the guest to set its hypercall page.
@@ -8096,6 +8106,18 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID/TIMER/UPCALL_VECTOR vCPU attributes.
 related to event channel delivery, timers, and the XENVER_version
 interception.
 
+The KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG flag indicates that KVM supports
+the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute in the KVM_XEN_SET_ATTR
+and KVM_XEN_GET_ATTR ioctls. This controls whether KVM will set the
+XEN_RUNSTATE_UPDATE flag in guest memory mapped vcpu_runstate_info during
+updates of the runstate information. Note that versions of KVM which support
+the RUNSTATE feature above, but not thie RUNSTATE_UPDATE_FLAG feature, will
+always set the XEN_RUNSTATE_UPDATE flag when updating the guest structure,
+which is perhaps counterintuitive. When this flag is advertised, KVM will
+behave more correctly, not using the XEN_RUNSTATE_UPDATE flag until/unless
+specifically enabled (by the guest making the hypercall, causing the VMM
+to enable the KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG attribute).
+
 8.31 KVM_CAP_PPC_MULTITCE
 -------------------------
 
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 70af7240a1d5a..283cbb83d6ae5 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1113,6 +1113,7 @@ struct msr_bitmap_range {
 struct kvm_xen {
 	u32 xen_version;
 	bool long_mode;
+	bool runstate_update_flag;
 	u8 upcall_vector;
 	struct gfn_to_pfn_cache shinfo_cache;
 	struct idr evtchn_ports;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 72ac6bf05c8b4..59fd55badd732 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4431,7 +4431,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 		    KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL |
 		    KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 		if (sched_info_on())
-			r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
+			r |= KVM_XEN_HVM_CONFIG_RUNSTATE |
+			     KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG;
 		break;
 #endif
 	case KVM_CAP_SYNC_REGS:
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index cfc1c07bc78f2..7acac5dfe2f85 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -179,7 +179,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	struct vcpu_runstate_info rs;
 	unsigned long flags;
 	size_t times_ofs;
-	uint8_t *update_bit;
+	uint8_t *update_bit = NULL;
+	uint64_t entry_time;
 	uint64_t *rs_times;
 	int *rs_state;
 
@@ -297,7 +298,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 */
 		rs_state = gpc1->khva;
 		rs_times = gpc1->khva + times_ofs;
-		update_bit = ((void *)(&rs_times[1])) - 1;
+		if (v->kvm->arch.xen.runstate_update_flag)
+			update_bit = ((void *)(&rs_times[1])) - 1;
 	} else {
 		/*
 		 * The guest's runstate_info is split across two pages and we
@@ -351,12 +353,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 * The update_bit is still directly in the guest memory,
 		 * via one GPC or the other.
 		 */
-		if (user_len1 >= times_ofs + sizeof(uint64_t))
-			update_bit = gpc1->khva + times_ofs +
-				sizeof(uint64_t) - 1;
-		else
-			update_bit = gpc2->khva + times_ofs +
-				sizeof(uint64_t) - 1 - user_len1;
+		if (v->kvm->arch.xen.runstate_update_flag) {
+			if (user_len1 >= times_ofs + sizeof(uint64_t))
+				update_bit = gpc1->khva + times_ofs +
+					sizeof(uint64_t) - 1;
+			else
+				update_bit = gpc2->khva + times_ofs +
+					sizeof(uint64_t) - 1 - user_len1;
+		}
 
 #ifdef CONFIG_X86_64
 		/*
@@ -376,8 +380,12 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * different cache line to the rest of the 64-bit word, due to
 	 * the (lack of) alignment constraints.
 	 */
-	*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
-	smp_wmb();
+	entry_time = vx->runstate_entry_time;
+	if (update_bit) {
+		entry_time |= XEN_RUNSTATE_UPDATE;
+		*update_bit = (vx->runstate_entry_time | XEN_RUNSTATE_UPDATE) >> 56;
+		smp_wmb();
+	}
 
 	/*
 	 * Now assemble the actual structure, either on our kernel stack
@@ -385,7 +393,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * rs_times pointers were set up above.
 	 */
 	*rs_state = vx->current_runstate;
-	rs_times[0] = vx->runstate_entry_time | XEN_RUNSTATE_UPDATE;
+	rs_times[0] = entry_time;
 	memcpy(rs_times + 1, vx->runstate_times, sizeof(vx->runstate_times));
 
 	/* For the split case, we have to then copy it to the guest. */
@@ -396,8 +404,11 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	smp_wmb();
 
 	/* Finally, clear the XEN_RUNSTATE_UPDATE bit. */
-	*update_bit = vx->runstate_entry_time >> 56;
-	smp_wmb();
+	if (update_bit) {
+		entry_time &= ~XEN_RUNSTATE_UPDATE;
+		*update_bit = entry_time >> 56;
+		smp_wmb();
+	}
 
 	if (user_len2)
 		read_unlock(&gpc2->lock);
@@ -619,6 +630,17 @@ int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		r = 0;
 		break;
 
+	case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
+		if (!sched_info_on()) {
+			r = -EOPNOTSUPP;
+			break;
+		}
+		mutex_lock(&kvm->lock);
+		kvm->arch.xen.runstate_update_flag = !!data->u.runstate_update_flag;
+		mutex_unlock(&kvm->lock);
+		r = 0;
+		break;
+
 	default:
 		break;
 	}
@@ -656,6 +678,15 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		r = 0;
 		break;
 
+	case KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG:
+		if (!sched_info_on()) {
+			r = -EOPNOTSUPP;
+			break;
+		}
+		data->u.runstate_update_flag = kvm->arch.xen.runstate_update_flag;
+		r = 0;
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 88448397642cb..64dfe9c07c879 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1271,6 +1271,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_RUNSTATE		(1 << 3)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL	(1 << 4)
 #define KVM_XEN_HVM_CONFIG_EVTCHN_SEND		(1 << 5)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG	(1 << 6)
 
 struct kvm_xen_hvm_config {
 	__u32 flags;
@@ -1776,6 +1777,7 @@ struct kvm_xen_hvm_attr {
 	union {
 		__u8 long_mode;
 		__u8 vector;
+		__u8 runstate_update_flag;
 		struct {
 			__u64 gfn;
 		} shared_info;
@@ -1816,6 +1818,8 @@ struct kvm_xen_hvm_attr {
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
 #define KVM_XEN_ATTR_TYPE_EVTCHN		0x3
 #define KVM_XEN_ATTR_TYPE_XEN_VERSION		0x4
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG */
+#define KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG	0x5
 
 /* Per-vCPU Xen attributes */
 #define KVM_XEN_VCPU_GET_ATTR	_IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index 7f39815f17728..c9b0110d73f3b 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -440,6 +440,7 @@ int main(int argc, char *argv[])
 	TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
 
 	bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+	bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
 	bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
 	bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
 
@@ -475,6 +476,19 @@ int main(int argc, char *argv[])
 	};
 	vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
 
+	if (do_runstate_flag) {
+		struct kvm_xen_hvm_attr ruf = {
+			.type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+			.u.runstate_update_flag = 1,
+		};
+		vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+		ruf.u.runstate_update_flag = 0;
+		vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+		TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+			    "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+	}
+
 	struct kvm_xen_hvm_attr ha = {
 		.type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
 		.u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE,
-- 
GitLab


From 8acc35186ed63436bfaf60051c8bb53f344dcbfc Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Sat, 19 Nov 2022 09:27:46 +0000
Subject: [PATCH 2935/4122] KVM: x86/xen: Add runstate tests for 32-bit mode
 and crossing page boundary

Torture test the cases where the runstate crosses a page boundary, and
and especially the case where it's configured in 32-bit mode and doesn't,
but then switching to 64-bit mode makes it go onto the second page.

To simplify this, make the KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST ioctl
also update the guest runstate area. It already did so if the actual
runstate changed, as a side-effect of kvm_xen_update_runstate(). So
doing it in the plain adjustment case is making it more consistent, as
well as giving us a nice way to trigger the update without actually
running the vCPU again and changing the values.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/xen.c                            |   2 +
 .../selftests/kvm/x86_64/xen_shinfo_test.c    | 115 +++++++++++++++---
 2 files changed, 97 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 7acac5dfe2f85..60a9bdd4199fc 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -884,6 +884,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
 		if (data->u.runstate.state <= RUNSTATE_offline)
 			kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+		else if (vcpu->arch.xen.runstate_cache.active)
+			kvm_xen_update_runstate_guest(vcpu, false);
 		r = 0;
 		break;
 
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index c9b0110d73f3b..721f6a693799b 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -88,14 +88,20 @@ struct pvclock_wall_clock {
 } __attribute__((__packed__));
 
 struct vcpu_runstate_info {
-    uint32_t state;
-    uint64_t state_entry_time;
-    uint64_t time[4];
+	uint32_t state;
+	uint64_t state_entry_time;
+	uint64_t time[5]; /* Extra field for overrun check */
 };
 
+struct compat_vcpu_runstate_info {
+	uint32_t state;
+	uint64_t state_entry_time;
+	uint64_t time[5];
+} __attribute__((__packed__));;
+
 struct arch_vcpu_info {
-    unsigned long cr2;
-    unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+	unsigned long cr2;
+	unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
 };
 
 struct vcpu_info {
@@ -1013,22 +1019,91 @@ int main(int argc, char *argv[])
 				       runstate_names[i], rs->time[i]);
 			}
 		}
-		TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
-		TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
-			    "State entry time mismatch");
-		TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
-			    "Running time mismatch");
-		TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
-			    "Runnable time mismatch");
-		TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
-			    "Blocked time mismatch");
-		TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
-			    "Offline time mismatch");
-
-		TEST_ASSERT(rs->state_entry_time == rs->time[0] +
-			    rs->time[1] + rs->time[2] + rs->time[3],
-			    "runstate times don't add up");
+
+		/*
+		 * Exercise runstate info at all points across the page boundary, in
+		 * 32-bit and 64-bit mode. In particular, test the case where it is
+		 * configured in 32-bit mode and then switched to 64-bit mode while
+		 * active, which takes it onto the second page.
+		 */
+		unsigned long runstate_addr;
+		struct compat_vcpu_runstate_info *crs;
+		for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+		     runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+			rs = addr_gpa2hva(vm, runstate_addr);
+			crs = (void *)rs;
+
+			memset(rs, 0xa5, sizeof(*rs));
+
+			/* Set to compatibility mode */
+			lm.u.long_mode = 0;
+			vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+			/* Set runstate to new address (kernel will write it) */
+			struct kvm_xen_vcpu_attr st = {
+				.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+				.u.gpa = runstate_addr,
+			};
+			vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+			if (verbose)
+				printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+			TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+			TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+				    "State entry time mismatch");
+			TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+				    "Running time mismatch");
+			TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+				    "Runnable time mismatch");
+			TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+				    "Blocked time mismatch");
+			TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+				    "Offline time mismatch");
+			TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+				    "Structure overrun");
+			TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+				    crs->time[1] + crs->time[2] + crs->time[3],
+				    "runstate times don't add up");
+
+
+			/* Now switch to 64-bit mode */
+			lm.u.long_mode = 1;
+			vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+			memset(rs, 0xa5, sizeof(*rs));
+
+			/* Don't change the address, just trigger a write */
+			struct kvm_xen_vcpu_attr adj = {
+				.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+				.u.runstate.state = (uint64_t)-1
+			};
+			vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+			if (verbose)
+				printf("64-bit runstate at %08lx\n", runstate_addr);
+
+			TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+			TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+				    "State entry time mismatch");
+			TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+				    "Running time mismatch");
+			TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+				    "Runnable time mismatch");
+			TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+				    "Blocked time mismatch");
+			TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+				    "Offline time mismatch");
+			TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+				    "Structure overrun");
+
+			TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+				    rs->time[1] + rs->time[2] + rs->time[3],
+				    "runstate times don't add up");
+		}
 	}
+
 	kvm_vm_free(vm);
 	return 0;
 }
-- 
GitLab


From aba3caef58626f09b629085440eec5dd1368669a Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:22 +0000
Subject: [PATCH 2936/4122] KVM: Shorten gfn_to_pfn_cache function names

Formalize "gpc" as the acronym and use it in function names.

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c       |  8 ++++----
 arch/x86/kvm/xen.c       | 30 +++++++++++++++---------------
 include/linux/kvm_host.h | 21 ++++++++++-----------
 virt/kvm/pfncache.c      | 20 ++++++++++----------
 4 files changed, 39 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59fd55badd732..246bdc9a91547 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3037,12 +3037,12 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 	unsigned long flags;
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
-					   offset + sizeof(*guest_hv_clock))) {
+	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
+			      offset + sizeof(*guest_hv_clock))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
-						 offset + sizeof(*guest_hv_clock)))
+		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
+				    offset + sizeof(*guest_hv_clock)))
 			return;
 
 		read_lock_irqsave(&gpc->lock, flags);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 60a9bdd4199fc..9187d024d0066 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -273,14 +273,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * gfn_to_pfn caches that cover the region.
 	 */
 	read_lock_irqsave(&gpc1->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc1, gpc1->gpa, user_len1)) {
+	while (!kvm_gpc_check(v->kvm, gpc1, gpc1->gpa, user_len1)) {
 		read_unlock_irqrestore(&gpc1->lock, flags);
 
 		/* When invoked from kvm_sched_out() we cannot sleep */
 		if (atomic)
 			return;
 
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
+		if (kvm_gpc_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
 			return;
 
 		read_lock_irqsave(&gpc1->lock, flags);
@@ -309,7 +309,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 */
 		read_lock(&gpc2->lock);
 
-		if (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
+		if (!kvm_gpc_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
 			read_unlock(&gpc2->lock);
 			read_unlock_irqrestore(&gpc1->lock, flags);
 
@@ -489,12 +489,12 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 	 * little more honest about it.
 	 */
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
-					   sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
+			      sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
-						 sizeof(struct vcpu_info)))
+		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
+				    sizeof(struct vcpu_info)))
 			return;
 
 		read_lock_irqsave(&gpc->lock, flags);
@@ -554,8 +554,8 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		     sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
-					   sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
+			      sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
 		/*
@@ -569,8 +569,8 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		if (in_atomic() || !task_is_running(current))
 			return 1;
 
-		if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
-						 sizeof(struct vcpu_info))) {
+		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
+				    sizeof(struct vcpu_info))) {
 			/*
 			 * If this failed, userspace has screwed up the
 			 * vcpu_info mapping. No interrupts for you.
@@ -1167,7 +1167,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 
 	read_lock_irqsave(&gpc->lock, flags);
 	idx = srcu_read_lock(&kvm->srcu);
-	if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
 		goto out_rcu;
 
 	ret = false;
@@ -1564,7 +1564,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 	idx = srcu_read_lock(&kvm->srcu);
 
 	read_lock_irqsave(&gpc->lock, flags);
-	if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
 		goto out_rcu;
 
 	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
@@ -1598,7 +1598,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 		gpc = &vcpu->arch.xen.vcpu_info_cache;
 
 		read_lock_irqsave(&gpc->lock, flags);
-		if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+		if (!kvm_gpc_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 			/*
 			 * Could not access the vcpu_info. Set the bit in-kernel
 			 * and prod the vCPU to deliver it for itself.
@@ -1696,7 +1696,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 			break;
 
 		idx = srcu_read_lock(&kvm->srcu);
-		rc = kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
+		rc = kvm_gpc_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
 		srcu_read_unlock(&kvm->srcu, idx);
 	} while(!rc);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b8d12356f015f..8f874a9643139 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1288,16 +1288,15 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
  *                 -EFAULT for an untranslatable guest physical address.
  *
  * This primes a gfn_to_pfn_cache and links it into the @kvm's list for
- * invalidations to be processed.  Callers are required to use
- * kvm_gfn_to_pfn_cache_check() to ensure that the cache is valid before
- * accessing the target page.
+ * invalidations to be processed.  Callers are required to use kvm_gpc_check()
+ * to ensure that the cache is valid before accessing the target page.
  */
 int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
 		     gpa_t gpa, unsigned long len);
 
 /**
- * kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
+ * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
  *
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
@@ -1314,11 +1313,11 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * Callers in IN_GUEST_MODE may do so without locking, although they should
  * still hold a read lock on kvm->scru for the memslot checks.
  */
-bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-				gpa_t gpa, unsigned long len);
+bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+		   unsigned long len);
 
 /**
- * kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache.
+ * kvm_gpc_refresh - update a previously initialized cache.
  *
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
@@ -1335,11 +1334,11 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * still lock and check the cache status, as this function does not return
  * with the lock still held to permit access.
  */
-int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-				 gpa_t gpa, unsigned long len);
+int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+		    unsigned long len);
 
 /**
- * kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
+ * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache.
  *
  * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
@@ -1348,7 +1347,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
  * but at least the mapping from GPA to userspace HVA will remain cached
  * and can be reused on a subsequent refresh.
  */
-void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 
 /**
  * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 5f83321bfd2a0..8c4db3dcaf6d5 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -76,8 +76,8 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 	}
 }
 
-bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-				gpa_t gpa, unsigned long len)
+bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+		   unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 
@@ -96,7 +96,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 
 	return true;
 }
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
+EXPORT_SYMBOL_GPL(kvm_gpc_check);
 
 static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
 {
@@ -238,8 +238,8 @@ out_error:
 	return -EFAULT;
 }
 
-int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-				 gpa_t gpa, unsigned long len)
+int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+		    unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(kvm);
 	unsigned long page_offset = gpa & ~PAGE_MASK;
@@ -333,9 +333,9 @@ out_unlock:
 
 	return ret;
 }
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_refresh);
+EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
 
-void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 {
 	void *old_khva;
 	kvm_pfn_t old_pfn;
@@ -360,7 +360,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 
 	gpc_unmap_khva(kvm, old_pfn, old_khva);
 }
-EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
+EXPORT_SYMBOL_GPL(kvm_gpc_unmap);
 
 void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
 {
@@ -396,7 +396,7 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 		gpc->active = true;
 		write_unlock_irq(&gpc->lock);
 	}
-	return kvm_gfn_to_pfn_cache_refresh(kvm, gpc, gpa, len);
+	return kvm_gpc_refresh(kvm, gpc, gpa, len);
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 
@@ -416,7 +416,7 @@ void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 		list_del(&gpc->list);
 		spin_unlock(&kvm->gpc_lock);
 
-		kvm_gfn_to_pfn_cache_unmap(kvm, gpc);
+		kvm_gpc_unmap(kvm, gpc);
 	}
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);
-- 
GitLab


From c1a81f3bd9b40edc1444dfaeac33f92cff0e770a Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:23 +0000
Subject: [PATCH 2937/4122] KVM: x86: Remove unused argument in
 gpc_unmap_khva()

Remove the unused @kvm argument from gpc_unmap_khva().

Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/pfncache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 8c4db3dcaf6d5..b4295474519f8 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -98,7 +98,7 @@ bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_check);
 
-static void gpc_unmap_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
+static void gpc_unmap_khva(kvm_pfn_t pfn, void *khva)
 {
 	/* Unmap the old pfn/page if it was mapped before. */
 	if (!is_error_noslot_pfn(pfn) && khva) {
@@ -177,7 +177,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 			 * the existing mapping and didn't create a new one.
 			 */
 			if (new_khva != old_khva)
-				gpc_unmap_khva(kvm, new_pfn, new_khva);
+				gpc_unmap_khva(new_pfn, new_khva);
 
 			kvm_release_pfn_clean(new_pfn);
 
@@ -329,7 +329,7 @@ out_unlock:
 	mutex_unlock(&gpc->refresh_lock);
 
 	if (unmap_old)
-		gpc_unmap_khva(kvm, old_pfn, old_khva);
+		gpc_unmap_khva(old_pfn, old_khva);
 
 	return ret;
 }
@@ -358,7 +358,7 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 	write_unlock_irq(&gpc->lock);
 	mutex_unlock(&gpc->refresh_lock);
 
-	gpc_unmap_khva(kvm, old_pfn, old_khva);
+	gpc_unmap_khva(old_pfn, old_khva);
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_unmap);
 
-- 
GitLab


From df0bb47baa95aad133820b149851d5b94cbc6790 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 30 Nov 2022 11:14:35 -0500
Subject: [PATCH 2938/4122] KVM: x86: fix uninitialized variable use on
 KVM_REQ_TRIPLE_FAULT

If a triple fault was fixed by kvm_x86_ops.nested_ops->triple_fault (by
turning it into a vmexit), there is no need to leave vcpu_enter_guest().
Any vcpu->requests will be caught later before the actual vmentry,
and in fact vcpu_enter_guest() was not initializing the "r" variable.
Depending on the compiler's whims, this could cause the
x86_64/triple_fault_event_test test to fail.

Cc: Maxim Levitsky <mlevitsk@redhat.com>
Fixes: 92e7d5c83aff ("KVM: x86: allow L1 to not intercept triple fault")
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 246bdc9a91547..7f850dfb40861 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -10280,8 +10280,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 				vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
 				vcpu->mmio_needed = 0;
 				r = 0;
+				goto out;
 			}
-			goto out;
 		}
 		if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
 			/* Page is swapped out. Do synthetic halt */
-- 
GitLab


From 032e160305f6872e590c77f11896fb28365c6d6c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:42 -0800
Subject: [PATCH 2939/4122] xfs: invalidate block device page cache during
 unmount

Every now and then I see fstests failures on aarch64 (64k pages) that
trigger on the following sequence:

mkfs.xfs $dev
mount $dev $mnt
touch $mnt/a
umount $mnt
xfs_db -c 'path /a' -c 'print' $dev

99% of the time this succeeds, but every now and then xfs_db cannot find
/a and fails.  This turns out to be a race involving udev/blkid, the
page cache for the block device, and the xfs_db process.

udev is triggered whenever anyone closes a block device or unmounts it.
The default udev rules invoke blkid to read the fs super and create
symlinks to the bdev under /dev/disk.  For this, it uses buffered reads
through the page cache.

xfs_db also uses buffered reads to examine metadata.  There is no
coordination between xfs_db and udev, which means that they can run
concurrently.  Note there is no coordination between the kernel and
blkid either.

On a system with 64k pages, the page cache can cache the superblock and
the root inode (and hence the root dir) with the same 64k page.  If
udev spawns blkid after the mkfs and the system is busy enough that it
is still running when xfs_db starts up, they'll both read from the same
page in the pagecache.

The unmount writes updated inode metadata to disk directly.  The XFS
buffer cache does not use the bdev pagecache, nor does it invalidate the
pagecache on umount.  If the above scenario occurs, the pagecache no
longer reflects what's on disk, xfs_db reads the stale metadata, and
fails to find /a.  Most of the time this succeeds because closing a bdev
invalidates the page cache, but when processes race, everyone loses.

Fix the problem by invalidating the bdev pagecache after flushing the
bdev, so that xfs_db will see up to date metadata.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_buf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index dde346450952a..54c774af6e1c6 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1945,6 +1945,7 @@ xfs_free_buftarg(
 	list_lru_destroy(&btp->bt_lru);
 
 	blkdev_issue_flush(btp->bt_bdev);
+	invalidate_bdev(btp->bt_bdev);
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 
 	kmem_free(btp);
-- 
GitLab


From fd5beaff250d7e88912a937fad072d9d24f219da Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:42 -0800
Subject: [PATCH 2940/4122] xfs: use memcpy, not strncpy, to format the attr
 prefix during listxattr

When -Wstringop-truncation is enabled, the compiler complains about
truncation of the null byte at the end of the xattr name prefix.  This
is intentional, since we're concatenating the two strings together and
do _not_ want a null byte in the middle of the name.

We've already ensured that the name buffer is long enough to handle
prefix and name, and the prefix_len is supposed to be the length of the
prefix string without the null byte, so use memcpy here instead.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_xattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index c325a28b89a8d..10aa1fd39d2b0 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -210,7 +210,7 @@ __xfs_xattr_put_listent(
 		return;
 	}
 	offset = context->buffer + context->count;
-	strncpy(offset, prefix, prefix_len);
+	memcpy(offset, prefix, prefix_len);
 	offset += prefix_len;
 	strncpy(offset, (char *)name, namelen);			/* real name */
 	offset += namelen;
-- 
GitLab


From e5827a007aa4bb737c63121fd2c77e089b18a372 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:42 -0800
Subject: [PATCH 2941/4122] xfs: shut up -Wuninitialized in xfsaild_push

-Wuninitialized complains about @target in xfsaild_push being
uninitialized in the case where the waitqueue is active but there is no
last item in the AIL to wait for.  I /think/ it should never be the case
that the subsequent xfs_trans_ail_cursor_first returns a log item and
hence we'll never end up at XFS_LSN_CMP, but let's make this explicit.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_trans_ail.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index f51df7d94ef74..7d4109af193e6 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -422,7 +422,7 @@ xfsaild_push(
 	struct xfs_ail_cursor	cur;
 	struct xfs_log_item	*lip;
 	xfs_lsn_t		lsn;
-	xfs_lsn_t		target;
+	xfs_lsn_t		target = NULLCOMMITLSN;
 	long			tout;
 	int			stuck = 0;
 	int			flushing = 0;
@@ -472,6 +472,8 @@ xfsaild_push(
 
 	XFS_STATS_INC(mp, xs_push_ail);
 
+	ASSERT(target != NULLCOMMITLSN);
+
 	lsn = lip->li_lsn;
 	while ((XFS_LSN_CMP(lip->li_lsn, target) <= 0)) {
 		int	lock_result;
-- 
GitLab


From 4c6dbfd2756bd83a0085ed804e2bb7be9cc16bc5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Mon, 28 Nov 2022 17:24:43 -0800
Subject: [PATCH 2942/4122] xfs: attach dquots to inode before reading data/cow
 fork mappings

I've been running near-continuous integration testing of online fsck,
and I've noticed that once a day, one of the ARM VMs will fail the test
with out of order records in the data fork.

xfs/804 races fsstress with online scrub (aka scan but do not change
anything), so I think this might be a bug in the core xfs code.  This
also only seems to trigger if one runs the test for more than ~6 minutes
via TIME_FACTOR=13 or something.
https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfstests-dev.git/tree/tests/xfs/804?h=djwong-wtf

I added a debugging patch to the kernel to check the data fork extents
after taking the ILOCK, before dropping ILOCK, and before and after each
bmapping operation.  So far I've narrowed it down to the delalloc code
inserting a record in the wrong place in the iext tree:

xfs_bmap_add_extent_hole_delay, near line 2691:

	case 0:
		/*
		 * New allocation is not contiguous with another
		 * delayed allocation.
		 * Insert a new entry.
		 */
		oldlen = newlen = 0;
		xfs_iunlock_check_datafork(ip);		<-- ok here
		xfs_iext_insert(ip, icur, new, state);
		xfs_iunlock_check_datafork(ip);		<-- bad here
		break;
	}

I recorded the state of the data fork mappings and iext cursor state
when a corrupt data fork is detected immediately after the
xfs_bmap_add_extent_hole_delay call in xfs_bmapi_reserve_delalloc:

ino 0x140bb3 func xfs_bmapi_reserve_delalloc line 4164 data fork:
    ino 0x140bb3 nr 0x0 nr_real 0x0 offset 0xb9 blockcount 0x1f startblock 0x935de2 state 1
    ino 0x140bb3 nr 0x1 nr_real 0x1 offset 0xe6 blockcount 0xa startblock 0xffffffffe0007 state 0
    ino 0x140bb3 nr 0x2 nr_real 0x1 offset 0xd8 blockcount 0xe startblock 0x935e01 state 0

Here we see that a delalloc extent was inserted into the wrong position
in the iext leaf, same as all the other times.  The extra trace data I
collected are as follows:

ino 0x140bb3 fork 0 oldoff 0xe6 oldlen 0x4 oldprealloc 0x6 isize 0xe6000
    ino 0x140bb3 oldgotoff 0xea oldgotstart 0xfffffffffffffffe oldgotcount 0x0 oldgotstate 0
    ino 0x140bb3 crapgotoff 0x0 crapgotstart 0x0 crapgotcount 0x0 crapgotstate 0
    ino 0x140bb3 freshgotoff 0xd8 freshgotstart 0x935e01 freshgotcount 0xe freshgotstate 0
    ino 0x140bb3 nowgotoff 0xe6 nowgotstart 0xffffffffe0007 nowgotcount 0xa nowgotstate 0
    ino 0x140bb3 oldicurpos 1 oldleafnr 2 oldleaf 0xfffffc00f0609a00
    ino 0x140bb3 crapicurpos 2 crapleafnr 2 crapleaf 0xfffffc00f0609a00
    ino 0x140bb3 freshicurpos 1 freshleafnr 2 freshleaf 0xfffffc00f0609a00
    ino 0x140bb3 newicurpos 1 newleafnr 3 newleaf 0xfffffc00f0609a00

The first line shows that xfs_bmapi_reserve_delalloc was called with
whichfork=XFS_DATA_FORK, off=0xe6, len=0x4, prealloc=6.

The second line ("oldgot") shows the contents of @got at the beginning
of the call, which are the results of the first iext lookup in
xfs_buffered_write_iomap_begin.

Line 3 ("crapgot") is the result of duplicating the cursor at the start
of the body of xfs_bmapi_reserve_delalloc and performing a fresh lookup
at @off.

Line 4 ("freshgot") is the result of a new xfs_iext_get_extent right
before the call to xfs_bmap_add_extent_hole_delay.  Totally garbage.

Line 5 ("nowgot") is contents of @got after the
xfs_bmap_add_extent_hole_delay call.

Line 6 is the contents of @icur at the beginning fo the call.  Lines 7-9
are the contents of the iext cursors at the point where the block
mappings were sampled.

I think @oldgot is a HOLESTARTBLOCK extent because the first lookup
didn't find anything, so we filled in imap with "fake hole until the
end".  At the time of the first lookup, I suspect that there's only one
32-block unwritten extent in the mapping (hence oldicurpos==1) but by
the time we get to recording crapgot, crapicurpos==2.

Dave then added:

Ok, that's much simpler to reason about, and implies the smoke is
coming from xfs_buffered_write_iomap_begin() or
xfs_bmapi_reserve_delalloc(). I suspect the former - it does a lot
of stuff with the ILOCK_EXCL held.....

.... including calling xfs_qm_dqattach_locked().

xfs_buffered_write_iomap_begin
  ILOCK_EXCL
  look up icur
  xfs_qm_dqattach_locked
    xfs_qm_dqattach_one
      xfs_qm_dqget_inode
        dquot cache miss
        xfs_iunlock(ip, XFS_ILOCK_EXCL);
        error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp);
        xfs_ilock(ip, XFS_ILOCK_EXCL);
  ....
  xfs_bmapi_reserve_delalloc(icur)

Yup, that's what is letting the magic smoke out -
xfs_qm_dqattach_locked() can cycle the ILOCK. If that happens, we
can pass a stale icur to xfs_bmapi_reserve_delalloc() and it all
goes downhill from there.

Back to Darrick now:

So.  Fix this by moving the dqattach_locked call up before we take the
ILOCK, like all the other callers in that file.

Fixes: a526c85c2236 ("xfs: move xfs_file_iomap_begin_delay around") # goes further back than this
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/xfs_iomap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 1005f1e365454..68436370927d5 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -978,6 +978,10 @@ xfs_buffered_write_iomap_begin(
 
 	ASSERT(!XFS_IS_REALTIME_INODE(ip));
 
+	error = xfs_qm_dqattach(ip);
+	if (error)
+		return error;
+
 	error = xfs_ilock_for_iomap(ip, flags, &lockmode);
 	if (error)
 		return error;
@@ -1081,10 +1085,6 @@ xfs_buffered_write_iomap_begin(
 			allocfork = XFS_COW_FORK;
 	}
 
-	error = xfs_qm_dqattach_locked(ip, false);
-	if (error)
-		goto out_unlock;
-
 	if (eof && offset + count > XFS_ISIZE(ip)) {
 		/*
 		 * Determine the initial size of the preallocation.
-- 
GitLab


From 1eb52a6a71981b80f9acbd915acd6a05a5037196 Mon Sep 17 00:00:00 2001
From: Guo Xuenan <guoxuenan@huawei.com>
Date: Wed, 30 Nov 2022 09:25:46 -0800
Subject: [PATCH 2943/4122] xfs: wait iclog complete before tearing down AIL

Fix uaf in xfs_trans_ail_delete during xlog force shutdown.
In commit cd6f79d1fb32 ("xfs: run callbacks before waking waiters in
xlog_state_shutdown_callbacks") changed the order of running callbacks
and wait for iclog completion to avoid unmount path untimely destroy AIL.
But which seems not enough to ensue this, adding mdelay in
`xfs_buf_item_unpin` can prove that.

The reproduction is as follows. To ensure destroy AIL safely,
we should wait all xlog ioend workers done and sync the AIL.

==================================================================
BUG: KASAN: use-after-free in xfs_trans_ail_delete+0x240/0x2a0
Read of size 8 at addr ffff888023169400 by task kworker/1:1H/43

CPU: 1 PID: 43 Comm: kworker/1:1H Tainted: G        W
6.1.0-rc1-00002-gc28266863c4a #137
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.13.0-1ubuntu1.1 04/01/2014
Workqueue: xfs-log/sda xlog_ioend_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x4d/0x66
 print_report+0x171/0x4a6
 kasan_report+0xb3/0x130
 xfs_trans_ail_delete+0x240/0x2a0
 xfs_buf_item_done+0x7b/0xa0
 xfs_buf_ioend+0x1e9/0x11f0
 xfs_buf_item_unpin+0x4c8/0x860
 xfs_trans_committed_bulk+0x4c2/0x7c0
 xlog_cil_committed+0xab6/0xfb0
 xlog_cil_process_committed+0x117/0x1e0
 xlog_state_shutdown_callbacks+0x208/0x440
 xlog_force_shutdown+0x1b3/0x3a0
 xlog_ioend_work+0xef/0x1d0
 process_one_work+0x6f9/0xf70
 worker_thread+0x578/0xf30
 kthread+0x28c/0x330
 ret_from_fork+0x1f/0x30
 </TASK>

Allocated by task 9606:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 __kasan_kmalloc+0x7a/0x90
 __kmalloc+0x59/0x140
 kmem_alloc+0xb2/0x2f0
 xfs_trans_ail_init+0x20/0x320
 xfs_log_mount+0x37e/0x690
 xfs_mountfs+0xe36/0x1b40
 xfs_fs_fill_super+0xc5c/0x1a70
 get_tree_bdev+0x3c5/0x6c0
 vfs_get_tree+0x85/0x250
 path_mount+0xec3/0x1830
 do_mount+0xef/0x110
 __x64_sys_mount+0x150/0x1f0
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Freed by task 9662:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 kasan_save_free_info+0x2a/0x40
 __kasan_slab_free+0x105/0x1a0
 __kmem_cache_free+0x99/0x2d0
 kvfree+0x3a/0x40
 xfs_log_unmount+0x60/0xf0
 xfs_unmountfs+0xf3/0x1d0
 xfs_fs_put_super+0x78/0x300
 generic_shutdown_super+0x151/0x400
 kill_block_super+0x9a/0xe0
 deactivate_locked_super+0x82/0xe0
 deactivate_super+0x91/0xb0
 cleanup_mnt+0x32a/0x4a0
 task_work_run+0x15f/0x240
 exit_to_user_mode_prepare+0x188/0x190
 syscall_exit_to_user_mode+0x12/0x30
 do_syscall_64+0x42/0x80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

The buggy address belongs to the object at ffff888023169400
 which belongs to the cache kmalloc-128 of size 128
The buggy address is located 0 bytes inside of
 128-byte region [ffff888023169400, ffff888023169480)

The buggy address belongs to the physical page:
page:ffffea00008c5a00 refcount:1 mapcount:0 mapping:0000000000000000
index:0xffff888023168f80 pfn:0x23168
head:ffffea00008c5a00 order:1 compound_mapcount:0 compound_pincount:0
flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff)
raw: 001fffff80010200 ffffea00006b3988 ffffea0000577a88 ffff88800f842ac0
raw: ffff888023168f80 0000000000150007 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff888023169300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888023169380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff888023169400: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                   ^
 ffff888023169480: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
 ffff888023169500: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================
Disabling lock debugging due to kernel taint

Fixes: cd6f79d1fb32 ("xfs: run callbacks before waking waiters in xlog_state_shutdown_callbacks")
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_log.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0141d9907d31b..fc61cc0240236 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -888,6 +888,23 @@ xlog_force_iclog(
 	return xlog_state_release_iclog(iclog->ic_log, iclog, NULL);
 }
 
+/*
+ * Cycle all the iclogbuf locks to make sure all log IO completion
+ * is done before we tear down these buffers.
+ */
+static void
+xlog_wait_iclog_completion(struct xlog *log)
+{
+	int		i;
+	struct xlog_in_core	*iclog = log->l_iclog;
+
+	for (i = 0; i < log->l_iclog_bufs; i++) {
+		down(&iclog->ic_sema);
+		up(&iclog->ic_sema);
+		iclog = iclog->ic_next;
+	}
+}
+
 /*
  * Wait for the iclog and all prior iclogs to be written disk as required by the
  * log force state machine. Waiting on ic_force_wait ensures iclog completions
@@ -1113,6 +1130,14 @@ xfs_log_unmount(
 {
 	xfs_log_clean(mp);
 
+	/*
+	 * If shutdown has come from iclog IO context, the log
+	 * cleaning will have been skipped and so we need to wait
+	 * for the iclog to complete shutdown processing before we
+	 * tear anything down.
+	 */
+	xlog_wait_iclog_completion(mp->m_log);
+
 	xfs_buftarg_drain(mp->m_ddev_targp);
 
 	xfs_trans_ail_destroy(mp);
@@ -2115,17 +2140,6 @@ xlog_dealloc_log(
 	xlog_in_core_t	*iclog, *next_iclog;
 	int		i;
 
-	/*
-	 * Cycle all the iclogbuf locks to make sure all log IO completion
-	 * is done before we tear down these buffers.
-	 */
-	iclog = log->l_iclog;
-	for (i = 0; i < log->l_iclog_bufs; i++) {
-		down(&iclog->ic_sema);
-		up(&iclog->ic_sema);
-		iclog = iclog->ic_next;
-	}
-
 	/*
 	 * Destroy the CIL after waiting for iclog IO completion because an
 	 * iclog EIO error will try to shut down the log, which accesses the
-- 
GitLab


From 575689fc0ffa6c4bb4e72fd18e31a6525a6124e0 Mon Sep 17 00:00:00 2001
From: Guo Xuenan <guoxuenan@huawei.com>
Date: Wed, 30 Nov 2022 09:25:46 -0800
Subject: [PATCH 2944/4122] xfs: fix super block buf log item UAF during force
 shutdown

xfs log io error will trigger xlog shut down, and end_io worker call
xlog_state_shutdown_callbacks to unpin and release the buf log item.
The race condition is that when there are some thread doing transaction
commit and happened not to be intercepted by xlog_is_shutdown, then,
these log item will be insert into CIL, when unpin and release these
buf log item, UAF will occur. BTW, add delay before `xlog_cil_commit`
can increase recurrence probability.

The following call graph actually encountered this bad situation.
fsstress                    io end worker kworker/0:1H-216
                            xlog_ioend_work
                              ->xlog_force_shutdown
                                ->xlog_state_shutdown_callbacks
                                  ->xlog_cil_process_committed
                                    ->xlog_cil_committed
                                      ->xfs_trans_committed_bulk
->xfs_trans_apply_sb_deltas             ->li_ops->iop_unpin(lip, 1);
  ->xfs_trans_getsb
    ->_xfs_trans_bjoin
      ->xfs_buf_item_init
        ->if (bip) { return 0;} //relog
->xlog_cil_commit
  ->xlog_cil_insert_items //insert into CIL
                                           ->xfs_buf_ioend_fail(bp);
                                             ->xfs_buf_ioend
                                               ->xfs_buf_item_done
                                                 ->xfs_buf_item_relse
                                                   ->xfs_buf_item_free

when cil push worker gather percpu cil and insert super block buf log item
into ctx->log_items then uaf occurs.

==================================================================
BUG: KASAN: use-after-free in xlog_cil_push_work+0x1c8f/0x22f0
Write of size 8 at addr ffff88801800f3f0 by task kworker/u4:4/105

CPU: 0 PID: 105 Comm: kworker/u4:4 Tainted: G W
6.1.0-rc1-00001-g274115149b42 #136
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.13.0-1ubuntu1.1 04/01/2014
Workqueue: xfs-cil/sda xlog_cil_push_work
Call Trace:
 <TASK>
 dump_stack_lvl+0x4d/0x66
 print_report+0x171/0x4a6
 kasan_report+0xb3/0x130
 xlog_cil_push_work+0x1c8f/0x22f0
 process_one_work+0x6f9/0xf70
 worker_thread+0x578/0xf30
 kthread+0x28c/0x330
 ret_from_fork+0x1f/0x30
 </TASK>

Allocated by task 2145:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 __kasan_slab_alloc+0x54/0x60
 kmem_cache_alloc+0x14a/0x510
 xfs_buf_item_init+0x160/0x6d0
 _xfs_trans_bjoin+0x7f/0x2e0
 xfs_trans_getsb+0xb6/0x3f0
 xfs_trans_apply_sb_deltas+0x1f/0x8c0
 __xfs_trans_commit+0xa25/0xe10
 xfs_symlink+0xe23/0x1660
 xfs_vn_symlink+0x157/0x280
 vfs_symlink+0x491/0x790
 do_symlinkat+0x128/0x220
 __x64_sys_symlink+0x7a/0x90
 do_syscall_64+0x35/0x80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd

Freed by task 216:
 kasan_save_stack+0x1e/0x40
 kasan_set_track+0x21/0x30
 kasan_save_free_info+0x2a/0x40
 __kasan_slab_free+0x105/0x1a0
 kmem_cache_free+0xb6/0x460
 xfs_buf_ioend+0x1e9/0x11f0
 xfs_buf_item_unpin+0x3d6/0x840
 xfs_trans_committed_bulk+0x4c2/0x7c0
 xlog_cil_committed+0xab6/0xfb0
 xlog_cil_process_committed+0x117/0x1e0
 xlog_state_shutdown_callbacks+0x208/0x440
 xlog_force_shutdown+0x1b3/0x3a0
 xlog_ioend_work+0xef/0x1d0
 process_one_work+0x6f9/0xf70
 worker_thread+0x578/0xf30
 kthread+0x28c/0x330
 ret_from_fork+0x1f/0x30

The buggy address belongs to the object at ffff88801800f388
 which belongs to the cache xfs_buf_item of size 272
The buggy address is located 104 bytes inside of
 272-byte region [ffff88801800f388, ffff88801800f498)

The buggy address belongs to the physical page:
page:ffffea0000600380 refcount:1 mapcount:0 mapping:0000000000000000
index:0xffff88801800f208 pfn:0x1800e
head:ffffea0000600380 order:1 compound_mapcount:0 compound_pincount:0
flags: 0x1fffff80010200(slab|head|node=0|zone=1|lastcpupid=0x1fffff)
raw: 001fffff80010200 ffffea0000699788 ffff88801319db50 ffff88800fb50640
raw: ffff88801800f208 000000000015000a 00000001ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff88801800f280: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff88801800f300: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc
>ffff88801800f380: fc fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb
                                                             ^
 ffff88801800f400: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
 ffff88801800f480: fb fb fb fc fc fc fc fc fc fc fc fc fc fc fc fc
==================================================================
Disabling lock debugging due to kernel taint

Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf_item.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 522d450a94b18..df7322ed73fa9 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -1018,6 +1018,8 @@ xfs_buf_item_relse(
 	trace_xfs_buf_item_relse(bp, _RET_IP_);
 	ASSERT(!test_bit(XFS_LI_IN_AIL, &bip->bli_item.li_flags));
 
+	if (atomic_read(&bip->bli_refcount))
+		return;
 	bp->b_log_item = NULL;
 	xfs_buf_rele(bp);
 	xfs_buf_item_free(bip);
-- 
GitLab


From 214b0a88c46d5f32d80abe0d1bc2eea1cbd38f11 Mon Sep 17 00:00:00 2001
From: Metin Kaya <metikaya@amazon.com>
Date: Mon, 21 Mar 2022 11:05:32 +0000
Subject: [PATCH 2945/4122] KVM: x86/xen: add support for 32-bit guests in
 SCHEDOP_poll

This patch introduces compat version of struct sched_poll for
SCHEDOP_poll sub-operation of sched_op hypercall, reads correct amount
of data (16 bytes in 32-bit case, 24 bytes otherwise) by using new
compat_sched_poll struct, copies it to sched_poll properly, and lets
rest of the code run as is.

Signed-off-by: Metin Kaya <metikaya@amazon.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
---
 arch/x86/kvm/xen.c | 33 +++++++++++++++++++++++++++++----
 arch/x86/kvm/xen.h |  7 +++++++
 2 files changed, 36 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 9187d024d0066..3e434dc339fb5 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1201,20 +1201,45 @@ static bool kvm_xen_schedop_poll(struct kvm_vcpu *vcpu, bool longmode,
 	evtchn_port_t port, *ports;
 	gpa_t gpa;
 
-	if (!longmode || !lapic_in_kernel(vcpu) ||
+	if (!lapic_in_kernel(vcpu) ||
 	    !(vcpu->kvm->arch.xen_hvm_config.flags & KVM_XEN_HVM_CONFIG_EVTCHN_SEND))
 		return false;
 
 	idx = srcu_read_lock(&vcpu->kvm->srcu);
 	gpa = kvm_mmu_gva_to_gpa_system(vcpu, param, NULL);
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
-
-	if (!gpa || kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
-					sizeof(sched_poll))) {
+	if (!gpa) {
 		*r = -EFAULT;
 		return true;
 	}
 
+	if (IS_ENABLED(CONFIG_64BIT) && !longmode) {
+		struct compat_sched_poll sp32;
+
+		/* Sanity check that the compat struct definition is correct */
+		BUILD_BUG_ON(sizeof(sp32) != 16);
+
+		if (kvm_vcpu_read_guest(vcpu, gpa, &sp32, sizeof(sp32))) {
+			*r = -EFAULT;
+			return true;
+		}
+
+		/*
+		 * This is a 32-bit pointer to an array of evtchn_port_t which
+		 * are uint32_t, so once it's converted no further compat
+		 * handling is needed.
+		 */
+		sched_poll.ports = (void *)(unsigned long)(sp32.ports);
+		sched_poll.nr_ports = sp32.nr_ports;
+		sched_poll.timeout = sp32.timeout;
+	} else {
+		if (kvm_vcpu_read_guest(vcpu, gpa, &sched_poll,
+					sizeof(sched_poll))) {
+			*r = -EFAULT;
+			return true;
+		}
+	}
+
 	if (unlikely(sched_poll.nr_ports > 1)) {
 		/* Xen (unofficially) limits number of pollers to 128 */
 		if (sched_poll.nr_ports > 128) {
diff --git a/arch/x86/kvm/xen.h b/arch/x86/kvm/xen.h
index 8503d2c6891e3..ea33d80a0c51f 100644
--- a/arch/x86/kvm/xen.h
+++ b/arch/x86/kvm/xen.h
@@ -207,4 +207,11 @@ struct compat_vcpu_runstate_info {
     uint64_t time[4];
 } __attribute__((packed));
 
+struct compat_sched_poll {
+	/* This is actually a guest virtual address which points to ports. */
+	uint32_t ports;
+	unsigned int nr_ports;
+	uint64_t timeout;
+};
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
-- 
GitLab


From 8c82a0b3ba1a411b84af5d43a4cc5994efa897ec Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:24 +0000
Subject: [PATCH 2946/4122] KVM: Store immutable gfn_to_pfn_cache properties

Move the assignment of immutable properties @kvm, @vcpu, and @usage to
the initializer.  Make _activate() and _deactivate() use stored values.

Note, @len is also effectively immutable for most cases, but not in the
case of the Xen runstate cache, which may be split across two pages and
the length of the first segment will depend on its address.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
[sean: handle @len in a separate patch]
Signed-off-by: Sean Christopherson <seanjc@google.com>
[dwmw2: acknowledge that @len can actually change for some use cases]
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c        | 14 ++++-----
 arch/x86/kvm/xen.c        | 65 ++++++++++++++++++---------------------
 include/linux/kvm_host.h  | 37 +++++++++++-----------
 include/linux/kvm_types.h |  1 +
 virt/kvm/pfncache.c       | 22 ++++++++-----
 5 files changed, 69 insertions(+), 70 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7f850dfb40861..b5e7aea221106 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2317,13 +2317,11 @@ static void kvm_write_system_time(struct kvm_vcpu *vcpu, gpa_t system_time,
 	kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
 
 	/* we verify if the enable bit is set... */
-	if (system_time & 1) {
-		kvm_gpc_activate(vcpu->kvm, &vcpu->arch.pv_time, vcpu,
-				 KVM_HOST_USES_PFN, system_time & ~1ULL,
+	if (system_time & 1)
+		kvm_gpc_activate(&vcpu->arch.pv_time, system_time & ~1ULL,
 				 sizeof(struct pvclock_vcpu_time_info));
-	} else {
-		kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
-	}
+	else
+		kvm_gpc_deactivate(&vcpu->arch.pv_time);
 
 	return;
 }
@@ -3391,7 +3389,7 @@ static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data)
 
 static void kvmclock_reset(struct kvm_vcpu *vcpu)
 {
-	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.pv_time);
+	kvm_gpc_deactivate(&vcpu->arch.pv_time);
 	vcpu->arch.time = 0;
 }
 
@@ -11542,7 +11540,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
 	vcpu->arch.regs_avail = ~0;
 	vcpu->arch.regs_dirty = ~0;
 
-	kvm_gpc_init(&vcpu->arch.pv_time);
+	kvm_gpc_init(&vcpu->arch.pv_time, vcpu->kvm, vcpu, KVM_HOST_USES_PFN);
 
 	if (!irqchip_in_kernel(vcpu->kvm) || kvm_vcpu_is_reset_bsp(vcpu))
 		vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 3e434dc339fb5..55257c2a1610f 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -42,13 +42,12 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	int idx = srcu_read_lock(&kvm->srcu);
 
 	if (gfn == GPA_INVALID) {
-		kvm_gpc_deactivate(kvm, gpc);
+		kvm_gpc_deactivate(gpc);
 		goto out;
 	}
 
 	do {
-		ret = kvm_gpc_activate(kvm, gpc, NULL, KVM_HOST_USES_PFN, gpa,
-				       PAGE_SIZE);
+		ret = kvm_gpc_activate(gpc, gpa, PAGE_SIZE);
 		if (ret)
 			goto out;
 
@@ -323,8 +322,8 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 			 * to the second page now because the guest changed to
 			 * 64-bit mode, the second GPC won't have been set up.
 			 */
-			if (kvm_gpc_activate(v->kvm, gpc2, NULL, KVM_HOST_USES_PFN,
-					     gpc1->gpa + user_len1, user_len2))
+			if (kvm_gpc_activate(gpc2, gpc1->gpa + user_len1,
+					     user_len2))
 				return;
 
 			/*
@@ -711,15 +710,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			     offsetof(struct compat_vcpu_info, time));
 
 		if (data->u.gpa == GPA_INVALID) {
-			kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
+			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
 			r = 0;
 			break;
 		}
 
-		r = kvm_gpc_activate(vcpu->kvm,
-				     &vcpu->arch.xen.vcpu_info_cache, NULL,
-				     KVM_HOST_USES_PFN, data->u.gpa,
-				     sizeof(struct vcpu_info));
+		r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_info_cache,
+				     data->u.gpa, sizeof(struct vcpu_info));
 		if (!r)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
@@ -727,15 +724,13 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
 	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
 		if (data->u.gpa == GPA_INVALID) {
-			kvm_gpc_deactivate(vcpu->kvm,
-					   &vcpu->arch.xen.vcpu_time_info_cache);
+			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
 			r = 0;
 			break;
 		}
 
-		r = kvm_gpc_activate(vcpu->kvm,
-				     &vcpu->arch.xen.vcpu_time_info_cache,
-				     NULL, KVM_HOST_USES_PFN, data->u.gpa,
+		r = kvm_gpc_activate(&vcpu->arch.xen.vcpu_time_info_cache,
+				     data->u.gpa,
 				     sizeof(struct pvclock_vcpu_time_info));
 		if (!r)
 			kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -751,10 +746,8 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		if (data->u.gpa == GPA_INVALID) {
 			r = 0;
 		deactivate_out:
-			kvm_gpc_deactivate(vcpu->kvm,
-					   &vcpu->arch.xen.runstate_cache);
-			kvm_gpc_deactivate(vcpu->kvm,
-					   &vcpu->arch.xen.runstate2_cache);
+			kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
+			kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
 			break;
 		}
 
@@ -770,20 +763,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 
 		/* How much fits in the (first) page? */
 		sz1 = PAGE_SIZE - (data->u.gpa & ~PAGE_MASK);
-		r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate_cache,
-				     NULL, KVM_HOST_USES_PFN, data->u.gpa, sz1);
+		r = kvm_gpc_activate(&vcpu->arch.xen.runstate_cache,
+				     data->u.gpa, sz1);
 		if (r)
 			goto deactivate_out;
 
 		/* Either map the second page, or deactivate the second GPC */
 		if (sz1 >= sz) {
-			kvm_gpc_deactivate(vcpu->kvm,
-					   &vcpu->arch.xen.runstate2_cache);
+			kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
 		} else {
 			sz2 = sz - sz1;
 			BUG_ON((data->u.gpa + sz1) & ~PAGE_MASK);
-			r = kvm_gpc_activate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache,
-					     NULL, KVM_HOST_USES_PFN,
+			r = kvm_gpc_activate(&vcpu->arch.xen.runstate2_cache,
 					     data->u.gpa + sz1, sz2);
 			if (r)
 				goto deactivate_out;
@@ -2051,10 +2042,14 @@ void kvm_xen_init_vcpu(struct kvm_vcpu *vcpu)
 
 	timer_setup(&vcpu->arch.xen.poll_timer, cancel_evtchn_poll, 0);
 
-	kvm_gpc_init(&vcpu->arch.xen.runstate_cache);
-	kvm_gpc_init(&vcpu->arch.xen.runstate2_cache);
-	kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache);
-	kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache);
+	kvm_gpc_init(&vcpu->arch.xen.runstate_cache, vcpu->kvm, NULL,
+		     KVM_HOST_USES_PFN);
+	kvm_gpc_init(&vcpu->arch.xen.runstate2_cache, vcpu->kvm, NULL,
+		     KVM_HOST_USES_PFN);
+	kvm_gpc_init(&vcpu->arch.xen.vcpu_info_cache, vcpu->kvm, NULL,
+		     KVM_HOST_USES_PFN);
+	kvm_gpc_init(&vcpu->arch.xen.vcpu_time_info_cache, vcpu->kvm, NULL,
+		     KVM_HOST_USES_PFN);
 }
 
 void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
@@ -2062,10 +2057,10 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 	if (kvm_xen_timer_enabled(vcpu))
 		kvm_xen_stop_timer(vcpu);
 
-	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate_cache);
-	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.runstate2_cache);
-	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
-	kvm_gpc_deactivate(vcpu->kvm, &vcpu->arch.xen.vcpu_time_info_cache);
+	kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
+	kvm_gpc_deactivate(&vcpu->arch.xen.runstate2_cache);
+	kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
+	kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
 
 	del_timer_sync(&vcpu->arch.xen.poll_timer);
 }
@@ -2073,7 +2068,7 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
 void kvm_xen_init_vm(struct kvm *kvm)
 {
 	idr_init(&kvm->arch.xen.evtchn_ports);
-	kvm_gpc_init(&kvm->arch.xen.shinfo_cache);
+	kvm_gpc_init(&kvm->arch.xen.shinfo_cache, kvm, NULL, KVM_HOST_USES_PFN);
 }
 
 void kvm_xen_destroy_vm(struct kvm *kvm)
@@ -2081,7 +2076,7 @@ void kvm_xen_destroy_vm(struct kvm *kvm)
 	struct evtchnfd *evtchnfd;
 	int i;
 
-	kvm_gpc_deactivate(kvm, &kvm->arch.xen.shinfo_cache);
+	kvm_gpc_deactivate(&kvm->arch.xen.shinfo_cache);
 
 	idr_for_each_entry(&kvm->arch.xen.evtchn_ports, evtchnfd, i) {
 		if (!evtchnfd->deliver.port.port)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8f874a9643139..73ded328f9dce 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1260,18 +1260,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
  * kvm_gpc_init - initialize gfn_to_pfn_cache.
  *
  * @gpc:	   struct gfn_to_pfn_cache object.
- *
- * This sets up a gfn_to_pfn_cache by initializing locks.  Note, the cache must
- * be zero-allocated (or zeroed by the caller before init).
- */
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
-
-/**
- * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
- *                    physical address.
- *
  * @kvm:	   pointer to kvm instance.
- * @gpc:	   struct gfn_to_pfn_cache object.
  * @vcpu:	   vCPU to be used for marking pages dirty and to be woken on
  *		   invalidation.
  * @usage:	   indicates if the resulting host physical PFN is used while
@@ -1280,20 +1269,31 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc);
  *		   changes!---will also force @vcpu to exit the guest and
  *		   refresh the cache); and/or if the PFN used directly
  *		   by KVM (and thus needs a kernel virtual mapping).
+ *
+ * This sets up a gfn_to_pfn_cache by initializing locks and assigning the
+ * immutable attributes.  Note, the cache must be zero-allocated (or zeroed by
+ * the caller before init).
+ */
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
+		  struct kvm_vcpu *vcpu, enum pfn_cache_usage usage);
+
+/**
+ * kvm_gpc_activate - prepare a cached kernel mapping and HPA for a given guest
+ *                    physical address.
+ *
+ * @gpc:	   struct gfn_to_pfn_cache object.
  * @gpa:	   guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
  *
  * @return:	   0 for success.
  *		   -EINVAL for a mapping which would cross a page boundary.
- *                 -EFAULT for an untranslatable guest physical address.
+ *		   -EFAULT for an untranslatable guest physical address.
  *
- * This primes a gfn_to_pfn_cache and links it into the @kvm's list for
+ * This primes a gfn_to_pfn_cache and links it into the @gpc->kvm's list for
  * invalidations to be processed.  Callers are required to use kvm_gpc_check()
  * to ensure that the cache is valid before accessing the target page.
  */
-int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-		     gpa_t gpa, unsigned long len);
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
@@ -1352,13 +1352,12 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
 /**
  * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
  *
- * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
  *
- * This removes a cache from the @kvm's list to be processed on MMU notifier
+ * This removes a cache from the VM's list to be processed on MMU notifier
  * invocation.
  */
-void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
+void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc);
 
 void kvm_sigset_activate(struct kvm_vcpu *vcpu);
 void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index 3ca3db020e0e3..76de36e56cdfe 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -67,6 +67,7 @@ struct gfn_to_pfn_cache {
 	gpa_t gpa;
 	unsigned long uhva;
 	struct kvm_memory_slot *memslot;
+	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
 	struct list_head list;
 	rwlock_t lock;
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index b4295474519f8..d8ce30b893d97 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -362,25 +362,29 @@ void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_unmap);
 
-void kvm_gpc_init(struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
+		  struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
 {
+	WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
+	WARN_ON_ONCE((usage & KVM_GUEST_USES_PFN) && !vcpu);
+
 	rwlock_init(&gpc->lock);
 	mutex_init(&gpc->refresh_lock);
+
+	gpc->kvm = kvm;
+	gpc->vcpu = vcpu;
+	gpc->usage = usage;
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_init);
 
-int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
-		     struct kvm_vcpu *vcpu, enum pfn_cache_usage usage,
-		     gpa_t gpa, unsigned long len)
+int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 {
-	WARN_ON_ONCE(!usage || (usage & KVM_GUEST_AND_HOST_USE_PFN) != usage);
+	struct kvm *kvm = gpc->kvm;
 
 	if (!gpc->active) {
 		gpc->khva = NULL;
 		gpc->pfn = KVM_PFN_ERR_FAULT;
 		gpc->uhva = KVM_HVA_ERR_BAD;
-		gpc->vcpu = vcpu;
-		gpc->usage = usage;
 		gpc->valid = false;
 
 		spin_lock(&kvm->gpc_lock);
@@ -400,8 +404,10 @@ int kvm_gpc_activate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 
-void kvm_gpc_deactivate(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
 {
+	struct kvm *kvm = gpc->kvm;
+
 	if (gpc->active) {
 		/*
 		 * Deactivate the cache before removing it from the list, KVM
-- 
GitLab


From e308c24a358d1e79951b16c387cbc6c6593639a5 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:26 +0000
Subject: [PATCH 2947/4122] KVM: Use gfn_to_pfn_cache's immutable "kvm" in
 kvm_gpc_check()

Make kvm_gpc_check() use kvm instance cached in gfn_to_pfn_cache.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c       |  2 +-
 arch/x86/kvm/xen.c       | 16 +++++++---------
 include/linux/kvm_host.h |  4 +---
 virt/kvm/pfncache.c      |  5 ++---
 4 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index b5e7aea221106..441f08c3af960 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3035,7 +3035,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 	unsigned long flags;
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
+	while (!kvm_gpc_check(gpc, gpc->gpa,
 			      offset + sizeof(*guest_hv_clock))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 55257c2a1610f..148319e980c41 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -272,7 +272,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * gfn_to_pfn caches that cover the region.
 	 */
 	read_lock_irqsave(&gpc1->lock, flags);
-	while (!kvm_gpc_check(v->kvm, gpc1, gpc1->gpa, user_len1)) {
+	while (!kvm_gpc_check(gpc1, gpc1->gpa, user_len1)) {
 		read_unlock_irqrestore(&gpc1->lock, flags);
 
 		/* When invoked from kvm_sched_out() we cannot sleep */
@@ -308,7 +308,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 */
 		read_lock(&gpc2->lock);
 
-		if (!kvm_gpc_check(v->kvm, gpc2, gpc2->gpa, user_len2)) {
+		if (!kvm_gpc_check(gpc2, gpc2->gpa, user_len2)) {
 			read_unlock(&gpc2->lock);
 			read_unlock_irqrestore(&gpc1->lock, flags);
 
@@ -488,8 +488,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 	 * little more honest about it.
 	 */
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
-			      sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
 		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
@@ -553,8 +552,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		     sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(v->kvm, gpc, gpc->gpa,
-			      sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
 		/*
@@ -1158,7 +1156,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 
 	read_lock_irqsave(&gpc->lock, flags);
 	idx = srcu_read_lock(&kvm->srcu);
-	if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE))
 		goto out_rcu;
 
 	ret = false;
@@ -1580,7 +1578,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 	idx = srcu_read_lock(&kvm->srcu);
 
 	read_lock_irqsave(&gpc->lock, flags);
-	if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE))
 		goto out_rcu;
 
 	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
@@ -1614,7 +1612,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 		gpc = &vcpu->arch.xen.vcpu_info_cache;
 
 		read_lock_irqsave(&gpc->lock, flags);
-		if (!kvm_gpc_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+		if (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 			/*
 			 * Could not access the vcpu_info. Set the bit in-kernel
 			 * and prod the vCPU to deliver it for itself.
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 73ded328f9dce..befc8114ed0da 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1298,7 +1298,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 /**
  * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
  *
- * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @gpa:	   current guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
@@ -1313,8 +1312,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
  * Callers in IN_GUEST_MODE may do so without locking, although they should
  * still hold a read lock on kvm->scru for the memslot checks.
  */
-bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
-		   unsigned long len);
+bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gpc_refresh - update a previously initialized cache.
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index d8ce30b893d97..decf4fdde668f 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -76,10 +76,9 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 	}
 }
 
-bool kvm_gpc_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
-		   unsigned long len)
+bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 {
-	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
 
 	if (!gpc->active)
 		return false;
-- 
GitLab


From 2a0b128a906ab28b1ab41ceedcaf462b6f74f1aa Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:27 +0000
Subject: [PATCH 2948/4122] KVM: Clean up hva_to_pfn_retry()

Make hva_to_pfn_retry() use kvm instance cached in gfn_to_pfn_cache.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 virt/kvm/pfncache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index decf4fdde668f..9d506de6c150d 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -138,7 +138,7 @@ static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_s
 	return kvm->mmu_invalidate_seq != mmu_seq;
 }
 
-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+static kvm_pfn_t hva_to_pfn_retry(struct gfn_to_pfn_cache *gpc)
 {
 	/* Note, the new page offset may be different than the old! */
 	void *old_khva = gpc->khva - offset_in_page(gpc->khva);
@@ -158,7 +158,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 	gpc->valid = false;
 
 	do {
-		mmu_seq = kvm->mmu_invalidate_seq;
+		mmu_seq = gpc->kvm->mmu_invalidate_seq;
 		smp_rmb();
 
 		write_unlock_irq(&gpc->lock);
@@ -216,7 +216,7 @@ static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
 		 * attempting to refresh.
 		 */
 		WARN_ON_ONCE(gpc->valid);
-	} while (mmu_notifier_retry_cache(kvm, mmu_seq));
+	} while (mmu_notifier_retry_cache(gpc->kvm, mmu_seq));
 
 	gpc->valid = true;
 	gpc->pfn = new_pfn;
@@ -294,7 +294,7 @@ int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
 	 * drop the lock and do the HVA to PFN lookup again.
 	 */
 	if (!gpc->valid || old_uhva != gpc->uhva) {
-		ret = hva_to_pfn_retry(kvm, gpc);
+		ret = hva_to_pfn_retry(gpc);
 	} else {
 		/*
 		 * If the HVA→PFN mapping was already valid, don't unmap it.
-- 
GitLab


From 0318f207d1c2e297d1ec1c6e145bb8bd053236f9 Mon Sep 17 00:00:00 2001
From: Michal Luczaj <mhal@rbox.co>
Date: Thu, 13 Oct 2022 21:12:28 +0000
Subject: [PATCH 2949/4122] KVM: Use gfn_to_pfn_cache's immutable "kvm" in
 kvm_gpc_refresh()

Make kvm_gpc_refresh() use kvm instance cached in gfn_to_pfn_cache.

No functional change intended.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Michal Luczaj <mhal@rbox.co>
[sean: leave kvm_gpc_unmap() as-is]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c       |  2 +-
 arch/x86/kvm/xen.c       | 10 ++++------
 include/linux/kvm_host.h | 10 ++++------
 virt/kvm/pfncache.c      |  7 +++----
 4 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 441f08c3af960..490df3e997fac 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3039,7 +3039,7 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 			      offset + sizeof(*guest_hv_clock))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
+		if (kvm_gpc_refresh(gpc, gpc->gpa,
 				    offset + sizeof(*guest_hv_clock)))
 			return;
 
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 148319e980c41..f50c88b1eaab3 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -279,7 +279,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		if (atomic)
 			return;
 
-		if (kvm_gpc_refresh(v->kvm, gpc1, gpc1->gpa, user_len1))
+		if (kvm_gpc_refresh(gpc1, gpc1->gpa, user_len1))
 			return;
 
 		read_lock_irqsave(&gpc1->lock, flags);
@@ -491,8 +491,7 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 	while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
-				    sizeof(struct vcpu_info)))
+		if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info)))
 			return;
 
 		read_lock_irqsave(&gpc->lock, flags);
@@ -566,8 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		if (in_atomic() || !task_is_running(current))
 			return 1;
 
-		if (kvm_gpc_refresh(v->kvm, gpc, gpc->gpa,
-				    sizeof(struct vcpu_info))) {
+		if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
 			/*
 			 * If this failed, userspace has screwed up the
 			 * vcpu_info mapping. No interrupts for you.
@@ -1710,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 			break;
 
 		idx = srcu_read_lock(&kvm->srcu);
-		rc = kvm_gpc_refresh(kvm, gpc, gpc->gpa, PAGE_SIZE);
+		rc = kvm_gpc_refresh(gpc, gpc->gpa, PAGE_SIZE);
 		srcu_read_unlock(&kvm->srcu, idx);
 	} while(!rc);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index befc8114ed0da..3ce4650776b87 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1317,23 +1317,21 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 /**
  * kvm_gpc_refresh - update a previously initialized cache.
  *
- * @kvm:	   pointer to kvm instance.
  * @gpc:	   struct gfn_to_pfn_cache object.
  * @gpa:	   updated guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
- *
+
  * @return:	   0 for success.
  *		   -EINVAL for a mapping which would cross a page boundary.
- *                 -EFAULT for an untranslatable guest physical address.
+ *		   -EFAULT for an untranslatable guest physical address.
  *
  * This will attempt to refresh a gfn_to_pfn_cache. Note that a successful
- * returm from this function does not mean the page can be immediately
+ * return from this function does not mean the page can be immediately
  * accessed because it may have raced with an invalidation. Callers must
  * still lock and check the cache status, as this function does not return
  * with the lock still held to permit access.
  */
-int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
-		    unsigned long len);
+int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 
 /**
  * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache.
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 9d506de6c150d..015c5d16948a1 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -237,10 +237,9 @@ out_error:
 	return -EFAULT;
 }
 
-int kvm_gpc_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc, gpa_t gpa,
-		    unsigned long len)
+int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 {
-	struct kvm_memslots *slots = kvm_memslots(kvm);
+	struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
 	unsigned long page_offset = gpa & ~PAGE_MASK;
 	bool unmap_old = false;
 	unsigned long old_uhva;
@@ -399,7 +398,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 		gpc->active = true;
 		write_unlock_irq(&gpc->lock);
 	}
-	return kvm_gpc_refresh(kvm, gpc, gpa, len);
+	return kvm_gpc_refresh(gpc, gpa, len);
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 
-- 
GitLab


From 9f87791d686d85614584438d4f249eb32ef7964c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Oct 2022 21:12:29 +0000
Subject: [PATCH 2950/4122] KVM: Drop KVM's API to allow temporarily unmapping
 gfn=>pfn cache

Drop kvm_gpc_unmap() as it has no users and unclear requirements.  The
API was added as part of the original gfn_to_pfn_cache support, but its
sole usage[*] was never merged.  Fold the guts of kvm_gpc_unmap() into
the deactivate path and drop the API.  Omit acquiring refresh_lock as
as concurrent calls to kvm_gpc_deactivate() are not allowed (this is
not enforced, e.g. via lockdep. due to it being called during vCPU
destruction).

If/when temporary unmapping makes a comeback, the desirable behavior is
likely to restrict temporary unmapping to vCPU-exclusive mappings and
require the vcpu->mutex be held to serialize unmap.  Use of the
refresh_lock to protect unmapping was somewhat specuatively added by
commit 93984f19e7bc ("KVM: Fully serialize gfn=>pfn cache refresh via
mutex") to guard against concurrent unmaps, but the primary use case of
the temporary unmap, nested virtualization[*], doesn't actually need or
want concurrent unmaps.

[*] https://lore.kernel.org/all/20211210163625.2886-7-dwmw2@infradead.org

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 include/linux/kvm_host.h | 12 -----------
 virt/kvm/pfncache.c      | 44 +++++++++++++++-------------------------
 2 files changed, 16 insertions(+), 40 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 3ce4650776b87..eac76965cf448 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1333,18 +1333,6 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
  */
 int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
 
-/**
- * kvm_gpc_unmap - temporarily unmap a gfn_to_pfn_cache.
- *
- * @kvm:	   pointer to kvm instance.
- * @gpc:	   struct gfn_to_pfn_cache object.
- *
- * This unmaps the referenced page. The cache is left in the invalid state
- * but at least the mapping from GPA to userspace HVA will remain cached
- * and can be reused on a subsequent refresh.
- */
-void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
-
 /**
  * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
  *
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 015c5d16948a1..5b25127936915 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -333,33 +333,6 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
 
-void kvm_gpc_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
-{
-	void *old_khva;
-	kvm_pfn_t old_pfn;
-
-	mutex_lock(&gpc->refresh_lock);
-	write_lock_irq(&gpc->lock);
-
-	gpc->valid = false;
-
-	old_khva = gpc->khva - offset_in_page(gpc->khva);
-	old_pfn = gpc->pfn;
-
-	/*
-	 * We can leave the GPA → uHVA map cache intact but the PFN
-	 * lookup will need to be redone even for the same page.
-	 */
-	gpc->khva = NULL;
-	gpc->pfn = KVM_PFN_ERR_FAULT;
-
-	write_unlock_irq(&gpc->lock);
-	mutex_unlock(&gpc->refresh_lock);
-
-	gpc_unmap_khva(old_pfn, old_khva);
-}
-EXPORT_SYMBOL_GPL(kvm_gpc_unmap);
-
 void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
 		  struct kvm_vcpu *vcpu, enum pfn_cache_usage usage)
 {
@@ -405,6 +378,8 @@ EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
 {
 	struct kvm *kvm = gpc->kvm;
+	kvm_pfn_t old_pfn;
+	void *old_khva;
 
 	if (gpc->active) {
 		/*
@@ -414,13 +389,26 @@ void kvm_gpc_deactivate(struct gfn_to_pfn_cache *gpc)
 		 */
 		write_lock_irq(&gpc->lock);
 		gpc->active = false;
+		gpc->valid = false;
+
+		/*
+		 * Leave the GPA => uHVA cache intact, it's protected by the
+		 * memslot generation.  The PFN lookup needs to be redone every
+		 * time as mmu_notifier protection is lost when the cache is
+		 * removed from the VM's gpc_list.
+		 */
+		old_khva = gpc->khva - offset_in_page(gpc->khva);
+		gpc->khva = NULL;
+
+		old_pfn = gpc->pfn;
+		gpc->pfn = KVM_PFN_ERR_FAULT;
 		write_unlock_irq(&gpc->lock);
 
 		spin_lock(&kvm->gpc_lock);
 		list_del(&gpc->list);
 		spin_unlock(&kvm->gpc_lock);
 
-		kvm_gpc_unmap(kvm, gpc);
+		gpc_unmap_khva(old_pfn, old_khva);
 	}
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_deactivate);
-- 
GitLab


From 5762cb10235776dd1ed5f5f9d6c1aff2b73bec5c Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Oct 2022 21:12:30 +0000
Subject: [PATCH 2951/4122] KVM: Do not partially reinitialize gfn=>pfn cache
 during activation

Don't partially reinitialize a gfn=>pfn cache when activating the cache,
and instead assert that the cache is not valid during activation.  Bug
the VM if the assertion fails, as use-after-free and/or data corruption
is all but guaranteed if KVM ends up with a valid-but-inactive cache.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 virt/kvm/pfncache.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index 5b25127936915..c1a772cedc4bd 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -345,6 +345,8 @@ void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
 	gpc->kvm = kvm;
 	gpc->vcpu = vcpu;
 	gpc->usage = usage;
+	gpc->pfn = KVM_PFN_ERR_FAULT;
+	gpc->uhva = KVM_HVA_ERR_BAD;
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_init);
 
@@ -353,10 +355,8 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 	struct kvm *kvm = gpc->kvm;
 
 	if (!gpc->active) {
-		gpc->khva = NULL;
-		gpc->pfn = KVM_PFN_ERR_FAULT;
-		gpc->uhva = KVM_HVA_ERR_BAD;
-		gpc->valid = false;
+		if (KVM_BUG_ON(gpc->valid, kvm))
+			return -EIO;
 
 		spin_lock(&kvm->gpc_lock);
 		list_add(&gpc->list, &kvm->gpc_list);
-- 
GitLab


From 58f5ee5fedd981e05cb086cba4e8f923c3727a04 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Oct 2022 21:12:31 +0000
Subject: [PATCH 2952/4122] KVM: Drop @gpa from exported gfn=>pfn cache check()
 and refresh() helpers

Drop the @gpa param from the exported check()+refresh() helpers and limit
changing the cache's GPA to the activate path.  All external users just
feed in gpc->gpa, i.e. this is a fancy nop.

Allowing users to change the GPA at check()+refresh() is dangerous as
those helpers explicitly allow concurrent calls, e.g. KVM could get into
a livelock scenario.  It's also unclear as to what the expected behavior
should be if multiple tasks attempt to refresh with different GPAs.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 arch/x86/kvm/x86.c       |  6 ++----
 arch/x86/kvm/xen.c       | 22 +++++++++++-----------
 include/linux/kvm_host.h |  8 +++-----
 virt/kvm/pfncache.c      | 17 +++++++++++------
 4 files changed, 27 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 490df3e997fac..006b445996a9a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3035,12 +3035,10 @@ static void kvm_setup_guest_pvclock(struct kvm_vcpu *v,
 	unsigned long flags;
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(gpc, gpc->gpa,
-			      offset + sizeof(*guest_hv_clock))) {
+	while (!kvm_gpc_check(gpc, offset + sizeof(*guest_hv_clock))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gpc_refresh(gpc, gpc->gpa,
-				    offset + sizeof(*guest_hv_clock)))
+		if (kvm_gpc_refresh(gpc, offset + sizeof(*guest_hv_clock)))
 			return;
 
 		read_lock_irqsave(&gpc->lock, flags);
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index f50c88b1eaab3..5208e05ca9a6c 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -272,14 +272,14 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 	 * gfn_to_pfn caches that cover the region.
 	 */
 	read_lock_irqsave(&gpc1->lock, flags);
-	while (!kvm_gpc_check(gpc1, gpc1->gpa, user_len1)) {
+	while (!kvm_gpc_check(gpc1, user_len1)) {
 		read_unlock_irqrestore(&gpc1->lock, flags);
 
 		/* When invoked from kvm_sched_out() we cannot sleep */
 		if (atomic)
 			return;
 
-		if (kvm_gpc_refresh(gpc1, gpc1->gpa, user_len1))
+		if (kvm_gpc_refresh(gpc1, user_len1))
 			return;
 
 		read_lock_irqsave(&gpc1->lock, flags);
@@ -308,7 +308,7 @@ static void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, bool atomic)
 		 */
 		read_lock(&gpc2->lock);
 
-		if (!kvm_gpc_check(gpc2, gpc2->gpa, user_len2)) {
+		if (!kvm_gpc_check(gpc2, user_len2)) {
 			read_unlock(&gpc2->lock);
 			read_unlock_irqrestore(&gpc1->lock, flags);
 
@@ -488,10 +488,10 @@ void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
 	 * little more honest about it.
 	 */
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
-		if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info)))
+		if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info)))
 			return;
 
 		read_lock_irqsave(&gpc->lock, flags);
@@ -551,7 +551,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		     sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
 
 	read_lock_irqsave(&gpc->lock, flags);
-	while (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+	while (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
 		read_unlock_irqrestore(&gpc->lock, flags);
 
 		/*
@@ -565,7 +565,7 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 		if (in_atomic() || !task_is_running(current))
 			return 1;
 
-		if (kvm_gpc_refresh(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+		if (kvm_gpc_refresh(gpc, sizeof(struct vcpu_info))) {
 			/*
 			 * If this failed, userspace has screwed up the
 			 * vcpu_info mapping. No interrupts for you.
@@ -1154,7 +1154,7 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 
 	read_lock_irqsave(&gpc->lock, flags);
 	idx = srcu_read_lock(&kvm->srcu);
-	if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(gpc, PAGE_SIZE))
 		goto out_rcu;
 
 	ret = false;
@@ -1576,7 +1576,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 	idx = srcu_read_lock(&kvm->srcu);
 
 	read_lock_irqsave(&gpc->lock, flags);
-	if (!kvm_gpc_check(gpc, gpc->gpa, PAGE_SIZE))
+	if (!kvm_gpc_check(gpc, PAGE_SIZE))
 		goto out_rcu;
 
 	if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
@@ -1610,7 +1610,7 @@ int kvm_xen_set_evtchn_fast(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 		gpc = &vcpu->arch.xen.vcpu_info_cache;
 
 		read_lock_irqsave(&gpc->lock, flags);
-		if (!kvm_gpc_check(gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+		if (!kvm_gpc_check(gpc, sizeof(struct vcpu_info))) {
 			/*
 			 * Could not access the vcpu_info. Set the bit in-kernel
 			 * and prod the vCPU to deliver it for itself.
@@ -1708,7 +1708,7 @@ static int kvm_xen_set_evtchn(struct kvm_xen_evtchn *xe, struct kvm *kvm)
 			break;
 
 		idx = srcu_read_lock(&kvm->srcu);
-		rc = kvm_gpc_refresh(gpc, gpc->gpa, PAGE_SIZE);
+		rc = kvm_gpc_refresh(gpc, PAGE_SIZE);
 		srcu_read_unlock(&kvm->srcu, idx);
 	} while(!rc);
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index eac76965cf448..7008846fd3dd2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1299,7 +1299,6 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
  * kvm_gpc_check - check validity of a gfn_to_pfn_cache.
  *
  * @gpc:	   struct gfn_to_pfn_cache object.
- * @gpa:	   current guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
  *
  * @return:	   %true if the cache is still valid and the address matches.
@@ -1312,15 +1311,14 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
  * Callers in IN_GUEST_MODE may do so without locking, although they should
  * still hold a read lock on kvm->scru for the memslot checks.
  */
-bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
+bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len);
 
 /**
  * kvm_gpc_refresh - update a previously initialized cache.
  *
  * @gpc:	   struct gfn_to_pfn_cache object.
- * @gpa:	   updated guest physical address to map.
  * @len:	   sanity check; the range being access must fit a single page.
-
+ *
  * @return:	   0 for success.
  *		   -EINVAL for a mapping which would cross a page boundary.
  *		   -EFAULT for an untranslatable guest physical address.
@@ -1331,7 +1329,7 @@ bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
  * still lock and check the cache status, as this function does not return
  * with the lock still held to permit access.
  */
-int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len);
+int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len);
 
 /**
  * kvm_gpc_deactivate - deactivate and unlink a gfn_to_pfn_cache.
diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index c1a772cedc4bd..a805cc1544bf6 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -76,18 +76,17 @@ void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm, unsigned long start,
 	}
 }
 
-bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+bool kvm_gpc_check(struct gfn_to_pfn_cache *gpc, unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
 
 	if (!gpc->active)
 		return false;
 
-	if ((gpa & ~PAGE_MASK) + len > PAGE_SIZE)
+	if ((gpc->gpa & ~PAGE_MASK) + len > PAGE_SIZE)
 		return false;
 
-	if (gpc->gpa != gpa || gpc->generation != slots->generation ||
-	    kvm_is_error_hva(gpc->uhva))
+	if (gpc->generation != slots->generation || kvm_is_error_hva(gpc->uhva))
 		return false;
 
 	if (!gpc->valid)
@@ -237,7 +236,8 @@ out_error:
 	return -EFAULT;
 }
 
-int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
+static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
+			     unsigned long len)
 {
 	struct kvm_memslots *slots = kvm_memslots(gpc->kvm);
 	unsigned long page_offset = gpa & ~PAGE_MASK;
@@ -331,6 +331,11 @@ out_unlock:
 
 	return ret;
 }
+
+int kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, unsigned long len)
+{
+	return __kvm_gpc_refresh(gpc, gpc->gpa, len);
+}
 EXPORT_SYMBOL_GPL(kvm_gpc_refresh);
 
 void kvm_gpc_init(struct gfn_to_pfn_cache *gpc, struct kvm *kvm,
@@ -371,7 +376,7 @@ int kvm_gpc_activate(struct gfn_to_pfn_cache *gpc, gpa_t gpa, unsigned long len)
 		gpc->active = true;
 		write_unlock_irq(&gpc->lock);
 	}
-	return kvm_gpc_refresh(gpc, gpa, len);
+	return __kvm_gpc_refresh(gpc, gpa, len);
 }
 EXPORT_SYMBOL_GPL(kvm_gpc_activate);
 
-- 
GitLab


From 06e155c44aa0e7921aa44d3c67f8ea464b16cb75 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 13 Oct 2022 21:12:32 +0000
Subject: [PATCH 2953/4122] KVM: Skip unnecessary "unmap" if gpc is already
 valid during refresh

When refreshing a gfn=>pfn cache, skip straight to unlocking if the cache
already valid instead of stuffing the "old" variables to turn the
unmapping outro into a nop.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 virt/kvm/pfncache.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
index a805cc1544bf6..2d6aba6778307 100644
--- a/virt/kvm/pfncache.c
+++ b/virt/kvm/pfncache.c
@@ -301,9 +301,8 @@ static int __kvm_gpc_refresh(struct gfn_to_pfn_cache *gpc, gpa_t gpa,
 		 * may have changed.
 		 */
 		gpc->khva = old_khva + page_offset;
-		old_pfn = KVM_PFN_ERR_FAULT;
-		old_khva = NULL;
 		ret = 0;
+		goto out_unlock;
 	}
 
  out:
-- 
GitLab


From 2ea3498980f5e6f3001f2984b0b92736bf1b78cb Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:32 +0000
Subject: [PATCH 2954/4122] mm/damon/core: split out DAMOS-charged region skip
 logic into a new function

Patch series "mm/damon: cleanup and refactoring code", v2.

This patchset cleans up and refactors a range of DAMON code including the
core, DAMON sysfs interface, and DAMON modules, for better readability and
convenient future feature implementations.

In detail, this patchset splits unnecessarily long and complex functions
in core into smaller functions (patches 1-4).  Then, it cleans up the
DAMON sysfs interface by using more type-safe code (patch 5) and removing
unnecessary function parameters (patch 6).  Further, it refactor the code
by distributing the code into multiple files (patches 7-10).  Last two
patches (patches 11 and 12) deduplicates and remove unnecessary header
inclusion in DAMON modules (reclaim and lru_sort).


This patch (of 12):

The DAMOS action applying function, 'damon_do_apply_schemes()', is quite
long and not so simple.  Split out the already quota-charged region skip
code, which is not a small amount of simple code, into a new function with
some comments for better readability.

Link: https://lkml.kernel.org/r/20221026225943.100429-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20221026225943.100429-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/core.c | 96 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 65 insertions(+), 31 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index 36d098d06c558..06b50ede9cc62 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -694,6 +694,67 @@ static bool damos_valid_target(struct damon_ctx *c, struct damon_target *t,
 	return c->ops.get_scheme_score(c, t, r, s) >= s->quota.min_score;
 }
 
+/*
+ * damos_skip_charged_region() - Check if the given region or starting part of
+ * it is already charged for the DAMOS quota.
+ * @t:	The target of the region.
+ * @rp:	The pointer to the region.
+ * @s:	The scheme to be applied.
+ *
+ * If a quota of a scheme has exceeded in a quota charge window, the scheme's
+ * action would applied to only a part of the target access pattern fulfilling
+ * regions.  To avoid applying the scheme action to only already applied
+ * regions, DAMON skips applying the scheme action to the regions that charged
+ * in the previous charge window.
+ *
+ * This function checks if a given region should be skipped or not for the
+ * reason.  If only the starting part of the region has previously charged,
+ * this function splits the region into two so that the second one covers the
+ * area that not charged in the previous charge widnow and saves the second
+ * region in *rp and returns false, so that the caller can apply DAMON action
+ * to the second one.
+ *
+ * Return: true if the region should be entirely skipped, false otherwise.
+ */
+static bool damos_skip_charged_region(struct damon_target *t,
+		struct damon_region **rp, struct damos *s)
+{
+	struct damon_region *r = *rp;
+	struct damos_quota *quota = &s->quota;
+	unsigned long sz_to_skip;
+
+	/* Skip previously charged regions */
+	if (quota->charge_target_from) {
+		if (t != quota->charge_target_from)
+			return true;
+		if (r == damon_last_region(t)) {
+			quota->charge_target_from = NULL;
+			quota->charge_addr_from = 0;
+			return true;
+		}
+		if (quota->charge_addr_from &&
+				r->ar.end <= quota->charge_addr_from)
+			return true;
+
+		if (quota->charge_addr_from && r->ar.start <
+				quota->charge_addr_from) {
+			sz_to_skip = ALIGN_DOWN(quota->charge_addr_from -
+					r->ar.start, DAMON_MIN_REGION);
+			if (!sz_to_skip) {
+				if (damon_sz_region(r) <= DAMON_MIN_REGION)
+					return true;
+				sz_to_skip = DAMON_MIN_REGION;
+			}
+			damon_split_region_at(t, r, sz_to_skip);
+			r = damon_next_region(r);
+			*rp = r;
+		}
+		quota->charge_target_from = NULL;
+		quota->charge_addr_from = 0;
+	}
+	return false;
+}
+
 static void damon_do_apply_schemes(struct damon_ctx *c,
 				   struct damon_target *t,
 				   struct damon_region *r)
@@ -702,7 +763,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 
 	damon_for_each_scheme(s, c) {
 		struct damos_quota *quota = &s->quota;
-		unsigned long sz = damon_sz_region(r);
+		unsigned long sz;
 		struct timespec64 begin, end;
 		unsigned long sz_applied = 0;
 
@@ -713,41 +774,14 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 		if (quota->esz && quota->charged_sz >= quota->esz)
 			continue;
 
-		/* Skip previously charged regions */
-		if (quota->charge_target_from) {
-			if (t != quota->charge_target_from)
-				continue;
-			if (r == damon_last_region(t)) {
-				quota->charge_target_from = NULL;
-				quota->charge_addr_from = 0;
-				continue;
-			}
-			if (quota->charge_addr_from &&
-					r->ar.end <= quota->charge_addr_from)
-				continue;
-
-			if (quota->charge_addr_from && r->ar.start <
-					quota->charge_addr_from) {
-				sz = ALIGN_DOWN(quota->charge_addr_from -
-						r->ar.start, DAMON_MIN_REGION);
-				if (!sz) {
-					if (damon_sz_region(r) <=
-					    DAMON_MIN_REGION)
-						continue;
-					sz = DAMON_MIN_REGION;
-				}
-				damon_split_region_at(t, r, sz);
-				r = damon_next_region(r);
-				sz = damon_sz_region(r);
-			}
-			quota->charge_target_from = NULL;
-			quota->charge_addr_from = 0;
-		}
+		if (damos_skip_charged_region(t, &r, s))
+			continue;
 
 		if (!damos_valid_target(c, t, r, s))
 			continue;
 
 		/* Apply the scheme */
+		sz = damon_sz_region(r);
 		if (c->ops.apply_scheme) {
 			if (quota->esz &&
 					quota->charged_sz + sz > quota->esz) {
-- 
GitLab


From e63a30c51f8400915db401c05d3c4db6743857e8 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:33 +0000
Subject: [PATCH 2955/4122] mm/damon/core: split damos application logic into a
 new function

The DAMOS action applying function, 'damon_do_apply_schemes()', is still
long and not easy to read.  Split out the code for applying a single
action to a single region into a new function for better readability.

Link: https://lkml.kernel.org/r/20221026225943.100429-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/core.c | 73 ++++++++++++++++++++++++++-----------------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index 06b50ede9cc62..c1a912bc46ae8 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -755,6 +755,44 @@ static bool damos_skip_charged_region(struct damon_target *t,
 	return false;
 }
 
+static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
+		struct damon_region *r, struct damos *s)
+{
+	struct damos_quota *quota = &s->quota;
+	unsigned long sz = damon_sz_region(r);
+	struct timespec64 begin, end;
+	unsigned long sz_applied = 0;
+
+	if (c->ops.apply_scheme) {
+		if (quota->esz && quota->charged_sz + sz > quota->esz) {
+			sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
+					DAMON_MIN_REGION);
+			if (!sz)
+				goto update_stat;
+			damon_split_region_at(t, r, sz);
+		}
+		ktime_get_coarse_ts64(&begin);
+		sz_applied = c->ops.apply_scheme(c, t, r, s);
+		ktime_get_coarse_ts64(&end);
+		quota->total_charged_ns += timespec64_to_ns(&end) -
+			timespec64_to_ns(&begin);
+		quota->charged_sz += sz;
+		if (quota->esz && quota->charged_sz >= quota->esz) {
+			quota->charge_target_from = t;
+			quota->charge_addr_from = r->ar.end + 1;
+		}
+	}
+	if (s->action != DAMOS_STAT)
+		r->age = 0;
+
+update_stat:
+	s->stat.nr_tried++;
+	s->stat.sz_tried += sz;
+	if (sz_applied)
+		s->stat.nr_applied++;
+	s->stat.sz_applied += sz_applied;
+}
+
 static void damon_do_apply_schemes(struct damon_ctx *c,
 				   struct damon_target *t,
 				   struct damon_region *r)
@@ -763,9 +801,6 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 
 	damon_for_each_scheme(s, c) {
 		struct damos_quota *quota = &s->quota;
-		unsigned long sz;
-		struct timespec64 begin, end;
-		unsigned long sz_applied = 0;
 
 		if (!s->wmarks.activated)
 			continue;
@@ -780,37 +815,7 @@ static void damon_do_apply_schemes(struct damon_ctx *c,
 		if (!damos_valid_target(c, t, r, s))
 			continue;
 
-		/* Apply the scheme */
-		sz = damon_sz_region(r);
-		if (c->ops.apply_scheme) {
-			if (quota->esz &&
-					quota->charged_sz + sz > quota->esz) {
-				sz = ALIGN_DOWN(quota->esz - quota->charged_sz,
-						DAMON_MIN_REGION);
-				if (!sz)
-					goto update_stat;
-				damon_split_region_at(t, r, sz);
-			}
-			ktime_get_coarse_ts64(&begin);
-			sz_applied = c->ops.apply_scheme(c, t, r, s);
-			ktime_get_coarse_ts64(&end);
-			quota->total_charged_ns += timespec64_to_ns(&end) -
-				timespec64_to_ns(&begin);
-			quota->charged_sz += sz;
-			if (quota->esz && quota->charged_sz >= quota->esz) {
-				quota->charge_target_from = t;
-				quota->charge_addr_from = r->ar.end + 1;
-			}
-		}
-		if (s->action != DAMOS_STAT)
-			r->age = 0;
-
-update_stat:
-		s->stat.nr_tried++;
-		s->stat.sz_tried += sz;
-		if (sz_applied)
-			s->stat.nr_applied++;
-		s->stat.sz_applied += sz_applied;
+		damos_apply_scheme(c, t, r, s);
 	}
 }
 
-- 
GitLab


From d1cbbf621fc25950938be74a228ef518d05d93a1 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:34 +0000
Subject: [PATCH 2956/4122] mm/damon/core: split out scheme stat update logic
 into a new function

The function for applying a given DAMON scheme action to a given DAMON
region, 'damos_apply_scheme()' is not quite short.  Make it better to read
by splitting out the stat update logic into a new function.

Link: https://lkml.kernel.org/r/20221026225943.100429-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/core.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index c1a912bc46ae8..3a810c6e26bc6 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -755,6 +755,16 @@ static bool damos_skip_charged_region(struct damon_target *t,
 	return false;
 }
 
+static void damos_update_stat(struct damos *s,
+		unsigned long sz_tried, unsigned long sz_applied)
+{
+	s->stat.nr_tried++;
+	s->stat.sz_tried += sz_tried;
+	if (sz_applied)
+		s->stat.nr_applied++;
+	s->stat.sz_applied += sz_applied;
+}
+
 static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
 		struct damon_region *r, struct damos *s)
 {
@@ -786,11 +796,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
 		r->age = 0;
 
 update_stat:
-	s->stat.nr_tried++;
-	s->stat.sz_tried += sz;
-	if (sz_applied)
-		s->stat.nr_applied++;
-	s->stat.sz_applied += sz_applied;
+	damos_update_stat(s, sz, sz_applied);
 }
 
 static void damon_do_apply_schemes(struct damon_ctx *c,
-- 
GitLab


From 898810e5ca54691f4e173f5ffc92bbce0335bc69 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:35 +0000
Subject: [PATCH 2957/4122] mm/damon/core: split out scheme quota adjustment
 logic into a new function

DAMOS quota adjustment logic in 'kdamond_apply_schemes()', has some amount
of code, and the logic is not so straightforward.  Split it out to a new
function for better readability.

Link: https://lkml.kernel.org/r/20221026225943.100429-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/core.c | 91 ++++++++++++++++++++++++++-----------------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/mm/damon/core.c b/mm/damon/core.c
index 3a810c6e26bc6..80d5937fe3373 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -848,59 +848,64 @@ static void damos_set_effective_quota(struct damos_quota *quota)
 	quota->esz = esz;
 }
 
-static void kdamond_apply_schemes(struct damon_ctx *c)
+static void damos_adjust_quota(struct damon_ctx *c, struct damos *s)
 {
+	struct damos_quota *quota = &s->quota;
 	struct damon_target *t;
-	struct damon_region *r, *next_r;
-	struct damos *s;
+	struct damon_region *r;
+	unsigned long cumulated_sz;
+	unsigned int score, max_score = 0;
 
-	damon_for_each_scheme(s, c) {
-		struct damos_quota *quota = &s->quota;
-		unsigned long cumulated_sz;
-		unsigned int score, max_score = 0;
+	if (!quota->ms && !quota->sz)
+		return;
 
-		if (!s->wmarks.activated)
-			continue;
+	/* New charge window starts */
+	if (time_after_eq(jiffies, quota->charged_from +
+				msecs_to_jiffies(quota->reset_interval))) {
+		if (quota->esz && quota->charged_sz >= quota->esz)
+			s->stat.qt_exceeds++;
+		quota->total_charged_sz += quota->charged_sz;
+		quota->charged_from = jiffies;
+		quota->charged_sz = 0;
+		damos_set_effective_quota(quota);
+	}
 
-		if (!quota->ms && !quota->sz)
-			continue;
+	if (!c->ops.get_scheme_score)
+		return;
 
-		/* New charge window starts */
-		if (time_after_eq(jiffies, quota->charged_from +
-					msecs_to_jiffies(
-						quota->reset_interval))) {
-			if (quota->esz && quota->charged_sz >= quota->esz)
-				s->stat.qt_exceeds++;
-			quota->total_charged_sz += quota->charged_sz;
-			quota->charged_from = jiffies;
-			quota->charged_sz = 0;
-			damos_set_effective_quota(quota);
+	/* Fill up the score histogram */
+	memset(quota->histogram, 0, sizeof(quota->histogram));
+	damon_for_each_target(t, c) {
+		damon_for_each_region(r, t) {
+			if (!__damos_valid_target(r, s))
+				continue;
+			score = c->ops.get_scheme_score(c, t, r, s);
+			quota->histogram[score] += damon_sz_region(r);
+			if (score > max_score)
+				max_score = score;
 		}
+	}
 
-		if (!c->ops.get_scheme_score)
-			continue;
+	/* Set the min score limit */
+	for (cumulated_sz = 0, score = max_score; ; score--) {
+		cumulated_sz += quota->histogram[score];
+		if (cumulated_sz >= quota->esz || !score)
+			break;
+	}
+	quota->min_score = score;
+}
 
-		/* Fill up the score histogram */
-		memset(quota->histogram, 0, sizeof(quota->histogram));
-		damon_for_each_target(t, c) {
-			damon_for_each_region(r, t) {
-				if (!__damos_valid_target(r, s))
-					continue;
-				score = c->ops.get_scheme_score(
-						c, t, r, s);
-				quota->histogram[score] += damon_sz_region(r);
-				if (score > max_score)
-					max_score = score;
-			}
-		}
+static void kdamond_apply_schemes(struct damon_ctx *c)
+{
+	struct damon_target *t;
+	struct damon_region *r, *next_r;
+	struct damos *s;
 
-		/* Set the min score limit */
-		for (cumulated_sz = 0, score = max_score; ; score--) {
-			cumulated_sz += quota->histogram[score];
-			if (cumulated_sz >= quota->esz || !score)
-				break;
-		}
-		quota->min_score = score;
+	damon_for_each_scheme(s, c) {
+		if (!s->wmarks.activated)
+			continue;
+
+		damos_adjust_quota(c, s);
 	}
 
 	damon_for_each_target(t, c) {
-- 
GitLab


From 789a230613c8dd14bdd41653de0c22783726276f Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:36 +0000
Subject: [PATCH 2958/4122] mm/damon/sysfs: use damon_addr_range for region's
 start and end values

DAMON has a struct for each address range but DAMON sysfs interface is
using the low type (unsigned long) for storing the start and end addresses
of regions.  Use the dedicated struct for better type safety.

Link: https://lkml.kernel.org/r/20221026225943.100429-6-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 07e5f1bdf025f..a5ef503d84445 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1062,13 +1062,11 @@ static struct kobj_type damon_sysfs_schemes_ktype = {
 
 struct damon_sysfs_region {
 	struct kobject kobj;
-	unsigned long start;
-	unsigned long end;
+	struct damon_addr_range ar;
 };
 
 static struct damon_sysfs_region *damon_sysfs_region_alloc(
-		unsigned long start,
-		unsigned long end)
+		struct damon_addr_range ar)
 {
 	struct damon_sysfs_region *region = kmalloc(sizeof(*region),
 			GFP_KERNEL);
@@ -1076,8 +1074,7 @@ static struct damon_sysfs_region *damon_sysfs_region_alloc(
 	if (!region)
 		return NULL;
 	region->kobj = (struct kobject){};
-	region->start = start;
-	region->end = end;
+	region->ar = ar;
 	return region;
 }
 
@@ -1087,7 +1084,7 @@ static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
 	struct damon_sysfs_region *region = container_of(kobj,
 			struct damon_sysfs_region, kobj);
 
-	return sysfs_emit(buf, "%lu\n", region->start);
+	return sysfs_emit(buf, "%lu\n", region->ar.start);
 }
 
 static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -1095,7 +1092,7 @@ static ssize_t start_store(struct kobject *kobj, struct kobj_attribute *attr,
 {
 	struct damon_sysfs_region *region = container_of(kobj,
 			struct damon_sysfs_region, kobj);
-	int err = kstrtoul(buf, 0, &region->start);
+	int err = kstrtoul(buf, 0, &region->ar.start);
 
 	return err ? err : count;
 }
@@ -1106,7 +1103,7 @@ static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
 	struct damon_sysfs_region *region = container_of(kobj,
 			struct damon_sysfs_region, kobj);
 
-	return sysfs_emit(buf, "%lu\n", region->end);
+	return sysfs_emit(buf, "%lu\n", region->ar.end);
 }
 
 static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr,
@@ -1114,7 +1111,7 @@ static ssize_t end_store(struct kobject *kobj, struct kobj_attribute *attr,
 {
 	struct damon_sysfs_region *region = container_of(kobj,
 			struct damon_sysfs_region, kobj);
-	int err = kstrtoul(buf, 0, &region->end);
+	int err = kstrtoul(buf, 0, &region->ar.end);
 
 	return err ? err : count;
 }
@@ -1187,7 +1184,7 @@ static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions,
 	regions->regions_arr = regions_arr;
 
 	for (i = 0; i < nr_regions; i++) {
-		region = damon_sysfs_region_alloc(0, 0);
+		region = damon_sysfs_region_alloc((struct damon_addr_range){});
 		if (!region) {
 			damon_sysfs_regions_rm_dirs(regions);
 			return -ENOMEM;
@@ -2147,11 +2144,11 @@ static int damon_sysfs_set_regions(struct damon_target *t,
 		struct damon_sysfs_region *sys_region =
 			sysfs_regions->regions_arr[i];
 
-		if (sys_region->start > sys_region->end)
+		if (sys_region->ar.start > sys_region->ar.end)
 			goto out;
 
-		ranges[i].start = sys_region->start;
-		ranges[i].end = sys_region->end;
+		ranges[i].start = sys_region->ar.start;
+		ranges[i].end = sys_region->ar.end;
 		if (i == 0)
 			continue;
 		if (ranges[i - 1].end > ranges[i].start)
-- 
GitLab


From 1f71981408ef5696ad8544f282d336d4fc60a807 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:37 +0000
Subject: [PATCH 2959/4122] mm/damon/sysfs: remove parameters of
 damon_sysfs_region_alloc()

'damon_sysfs_region_alloc()' is always called with zero-filled 'struct
damon_addr_range', because the start and end addresses should set by
users.  Remove unnecessary parameters of the function and simplify the
body by using 'kzalloc()'.

Link: https://lkml.kernel.org/r/20221026225943.100429-7-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index a5ef503d84445..f3d7b34ea0ab8 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1065,17 +1065,9 @@ struct damon_sysfs_region {
 	struct damon_addr_range ar;
 };
 
-static struct damon_sysfs_region *damon_sysfs_region_alloc(
-		struct damon_addr_range ar)
+static struct damon_sysfs_region *damon_sysfs_region_alloc(void)
 {
-	struct damon_sysfs_region *region = kmalloc(sizeof(*region),
-			GFP_KERNEL);
-
-	if (!region)
-		return NULL;
-	region->kobj = (struct kobject){};
-	region->ar = ar;
-	return region;
+	return kzalloc(sizeof(struct damon_sysfs_region), GFP_KERNEL);
 }
 
 static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
@@ -1184,7 +1176,7 @@ static int damon_sysfs_regions_add_dirs(struct damon_sysfs_regions *regions,
 	regions->regions_arr = regions_arr;
 
 	for (i = 0; i < nr_regions; i++) {
-		region = damon_sysfs_region_alloc((struct damon_addr_range){});
+		region = damon_sysfs_region_alloc();
 		if (!region) {
 			damon_sysfs_regions_rm_dirs(regions);
 			return -ENOMEM;
-- 
GitLab


From 39240595917ec0c4f71d7b9dd7909790715968b5 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:38 +0000
Subject: [PATCH 2960/4122] mm/damon/sysfs: move sysfs_lock to common module

DAMON sysfs interface is implemented in a single file, sysfs.c, which has
about 2,800 lines of code.  As the interface is hierarchical and some of
the code can be reused by different hierarchies, it would make more sense
to split out the implementation into common parts and different parts in
multiple files.  As the beginning of the work, create files for common
code and move the global mutex for directories modifications protection
into the new file.

Link: https://lkml.kernel.org/r/20221026225943.100429-8-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/Makefile       |  2 +-
 mm/damon/sysfs-common.c | 11 +++++++++++
 mm/damon/sysfs-common.h | 11 +++++++++++
 mm/damon/sysfs.c        |  4 +---
 4 files changed, 24 insertions(+), 4 deletions(-)
 create mode 100644 mm/damon/sysfs-common.c
 create mode 100644 mm/damon/sysfs-common.h

diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index 3e6b8ad73858a..f8d535a6253ba 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -3,7 +3,7 @@
 obj-y				:= core.o
 obj-$(CONFIG_DAMON_VADDR)	+= ops-common.o vaddr.o
 obj-$(CONFIG_DAMON_PADDR)	+= ops-common.o paddr.o
-obj-$(CONFIG_DAMON_SYSFS)	+= sysfs.o
+obj-$(CONFIG_DAMON_SYSFS)	+= sysfs-common.o sysfs.o
 obj-$(CONFIG_DAMON_DBGFS)	+= dbgfs.o
 obj-$(CONFIG_DAMON_RECLAIM)	+= reclaim.o
 obj-$(CONFIG_DAMON_LRU_SORT)	+= lru_sort.o
diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c
new file mode 100644
index 0000000000000..9dc743868d5b6
--- /dev/null
+++ b/mm/damon/sysfs-common.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Primitives for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include "sysfs-common.h"
+
+DEFINE_MUTEX(damon_sysfs_lock);
+
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
new file mode 100644
index 0000000000000..745a918b94f5d
--- /dev/null
+++ b/mm/damon/sysfs-common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common Primitives for DAMON Sysfs Interface
+ *
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/damon.h>
+#include <linux/kobject.h>
+
+extern struct mutex damon_sysfs_lock;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index f3d7b34ea0ab8..a847b9159718a 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -5,13 +5,11 @@
  * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
  */
 
-#include <linux/damon.h>
-#include <linux/kobject.h>
 #include <linux/pid.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 
-static DEFINE_MUTEX(damon_sysfs_lock);
+#include "sysfs-common.h"
 
 /*
  * unsigned long range directory
-- 
GitLab


From d332fe11debe69fee3de4c2d84fa0b6649678ad2 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:39 +0000
Subject: [PATCH 2961/4122] mm/damon/sysfs: move unsigned long range directory
 to common module

The implementation of unsigned long type range directories can be reused
by multiple DAMON sysfs directories including those for DAMON-based
Operation Schemes and the range of number of monitoring regions.  Move the
code into the files for DAMON sysfs common logics.

Link: https://lkml.kernel.org/r/20221026225943.100429-9-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs-common.c |  96 ++++++++++++++++++++++++++++++++++++++
 mm/damon/sysfs-common.h |  13 ++++++
 mm/damon/sysfs.c        | 100 ----------------------------------------
 3 files changed, 109 insertions(+), 100 deletions(-)

diff --git a/mm/damon/sysfs-common.c b/mm/damon/sysfs-common.c
index 9dc743868d5b6..52bebf242f742 100644
--- a/mm/damon/sysfs-common.c
+++ b/mm/damon/sysfs-common.c
@@ -5,7 +5,103 @@
  * Author: SeongJae Park <sj@kernel.org>
  */
 
+#include <linux/slab.h>
+
 #include "sysfs-common.h"
 
 DEFINE_MUTEX(damon_sysfs_lock);
 
+/*
+ * unsigned long range directory
+ */
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+		unsigned long min,
+		unsigned long max)
+{
+	struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
+			GFP_KERNEL);
+
+	if (!range)
+		return NULL;
+	range->kobj = (struct kobject){};
+	range->min = min;
+	range->max = max;
+
+	return range;
+}
+
+static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_ul_range *range = container_of(kobj,
+			struct damon_sysfs_ul_range, kobj);
+
+	return sysfs_emit(buf, "%lu\n", range->min);
+}
+
+static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct damon_sysfs_ul_range *range = container_of(kobj,
+			struct damon_sysfs_ul_range, kobj);
+	unsigned long min;
+	int err;
+
+	err = kstrtoul(buf, 0, &min);
+	if (err)
+		return err;
+
+	range->min = min;
+	return count;
+}
+
+static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_ul_range *range = container_of(kobj,
+			struct damon_sysfs_ul_range, kobj);
+
+	return sysfs_emit(buf, "%lu\n", range->max);
+}
+
+static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct damon_sysfs_ul_range *range = container_of(kobj,
+			struct damon_sysfs_ul_range, kobj);
+	unsigned long max;
+	int err;
+
+	err = kstrtoul(buf, 0, &max);
+	if (err)
+		return err;
+
+	range->max = max;
+	return count;
+}
+
+void damon_sysfs_ul_range_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_ul_range_min_attr =
+		__ATTR_RW_MODE(min, 0600);
+
+static struct kobj_attribute damon_sysfs_ul_range_max_attr =
+		__ATTR_RW_MODE(max, 0600);
+
+static struct attribute *damon_sysfs_ul_range_attrs[] = {
+	&damon_sysfs_ul_range_min_attr.attr,
+	&damon_sysfs_ul_range_max_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
+
+struct kobj_type damon_sysfs_ul_range_ktype = {
+	.release = damon_sysfs_ul_range_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_ul_range_groups,
+};
+
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
index 745a918b94f5d..56e6a99e353b7 100644
--- a/mm/damon/sysfs-common.h
+++ b/mm/damon/sysfs-common.h
@@ -9,3 +9,16 @@
 #include <linux/kobject.h>
 
 extern struct mutex damon_sysfs_lock;
+
+struct damon_sysfs_ul_range {
+	struct kobject kobj;
+	unsigned long min;
+	unsigned long max;
+};
+
+struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
+		unsigned long min,
+		unsigned long max);
+void damon_sysfs_ul_range_release(struct kobject *kobj);
+
+extern struct kobj_type damon_sysfs_ul_range_ktype;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index a847b9159718a..6774a669962e7 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -11,106 +11,6 @@
 
 #include "sysfs-common.h"
 
-/*
- * unsigned long range directory
- */
-
-struct damon_sysfs_ul_range {
-	struct kobject kobj;
-	unsigned long min;
-	unsigned long max;
-};
-
-static struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
-		unsigned long min,
-		unsigned long max)
-{
-	struct damon_sysfs_ul_range *range = kmalloc(sizeof(*range),
-			GFP_KERNEL);
-
-	if (!range)
-		return NULL;
-	range->kobj = (struct kobject){};
-	range->min = min;
-	range->max = max;
-
-	return range;
-}
-
-static ssize_t min_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_ul_range *range = container_of(kobj,
-			struct damon_sysfs_ul_range, kobj);
-
-	return sysfs_emit(buf, "%lu\n", range->min);
-}
-
-static ssize_t min_store(struct kobject *kobj, struct kobj_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct damon_sysfs_ul_range *range = container_of(kobj,
-			struct damon_sysfs_ul_range, kobj);
-	unsigned long min;
-	int err;
-
-	err = kstrtoul(buf, 0, &min);
-	if (err)
-		return err;
-
-	range->min = min;
-	return count;
-}
-
-static ssize_t max_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_ul_range *range = container_of(kobj,
-			struct damon_sysfs_ul_range, kobj);
-
-	return sysfs_emit(buf, "%lu\n", range->max);
-}
-
-static ssize_t max_store(struct kobject *kobj, struct kobj_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct damon_sysfs_ul_range *range = container_of(kobj,
-			struct damon_sysfs_ul_range, kobj);
-	unsigned long max;
-	int err;
-
-	err = kstrtoul(buf, 0, &max);
-	if (err)
-		return err;
-
-	range->max = max;
-	return count;
-}
-
-static void damon_sysfs_ul_range_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_ul_range, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_ul_range_min_attr =
-		__ATTR_RW_MODE(min, 0600);
-
-static struct kobj_attribute damon_sysfs_ul_range_max_attr =
-		__ATTR_RW_MODE(max, 0600);
-
-static struct attribute *damon_sysfs_ul_range_attrs[] = {
-	&damon_sysfs_ul_range_min_attr.attr,
-	&damon_sysfs_ul_range_max_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_ul_range);
-
-static struct kobj_type damon_sysfs_ul_range_ktype = {
-	.release = damon_sysfs_ul_range_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_ul_range_groups,
-};
-
 /*
  * schemes/stats directory
  */
-- 
GitLab


From 4acd715ff57fd05a481c64d074db68f2cf5711aa Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:40 +0000
Subject: [PATCH 2962/4122] mm/damon/sysfs: split out kdamond-independent
 schemes stats update logic into a new function

'damon_sysfs_schemes_update_stats()' is coupled with both
damon_sysfs_kdamond and damon_sysfs_schemes.  It's a wide range of types
dependency.  It makes splitting the logics a little bit distracting.
Split the function so that each function is coupled with smaller range of
types.

Link: https://lkml.kernel.org/r/20221026225943.100429-10-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs.c | 37 ++++++++++++++++++++++---------------
 1 file changed, 22 insertions(+), 15 deletions(-)

diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 6774a669962e7..836df19a7d86c 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -2246,25 +2246,13 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
 	mutex_unlock(&ctx->kdamond_lock);
 }
 
-/*
- * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
- * @kdamond:	The kobject wrapper that associated to the kdamond thread.
- *
- * This function reads the schemes stats of specific kdamond and update the
- * related values for sysfs files.  This function should be called from DAMON
- * callbacks while holding ``damon_syfs_lock``, to safely access the DAMON
- * contexts-internal data and DAMON sysfs variables.
- */
-static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
+static void damon_sysfs_schemes_update_stats(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx)
 {
-	struct damon_ctx *ctx = kdamond->damon_ctx;
-	struct damon_sysfs_schemes *sysfs_schemes;
 	struct damos *scheme;
 	int schemes_idx = 0;
 
-	if (!ctx)
-		return -EINVAL;
-	sysfs_schemes = kdamond->contexts->contexts_arr[0]->schemes;
 	damon_for_each_scheme(scheme, ctx) {
 		struct damon_sysfs_stats *sysfs_stats;
 
@@ -2279,6 +2267,25 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
 		sysfs_stats->sz_applied = scheme->stat.sz_applied;
 		sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
 	}
+}
+
+/*
+ * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
+ * @kdamond:	The kobject wrapper that associated to the kdamond thread.
+ *
+ * This function reads the schemes stats of specific kdamond and update the
+ * related values for sysfs files.  This function should be called from DAMON
+ * callbacks while holding ``damon_syfs_lock``, to safely access the DAMON
+ * contexts-internal data and DAMON sysfs variables.
+ */
+static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
+{
+	struct damon_ctx *ctx = kdamond->damon_ctx;
+
+	if (!ctx)
+		return -EINVAL;
+	damon_sysfs_schemes_update_stats(
+			kdamond->contexts->contexts_arr[0]->schemes, ctx);
 	return 0;
 }
 
-- 
GitLab


From c8e7b4d0ba348a8ef14956a80c780f152f433764 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:41 +0000
Subject: [PATCH 2963/4122] mm/damon/sysfs: split out schemes directory
 implementation to separate file

DAMON sysfs interface for 'schemes' directory is implemented using about
one thousand lines of code.  It has no strong dependency with other
parts of its file, so split it out to another file for better code
management.

Link: https://lkml.kernel.org/r/20221026225943.100429-11-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/Makefile        |    2 +-
 mm/damon/sysfs-common.h  |   22 +
 mm/damon/sysfs-schemes.c | 1068 ++++++++++++++++++++++++++++++++++++++
 mm/damon/sysfs.c         | 1064 -------------------------------------
 4 files changed, 1091 insertions(+), 1065 deletions(-)
 create mode 100644 mm/damon/sysfs-schemes.c

diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index f8d535a6253ba..1e86f5253d7ff 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -3,7 +3,7 @@
 obj-y				:= core.o
 obj-$(CONFIG_DAMON_VADDR)	+= ops-common.o vaddr.o
 obj-$(CONFIG_DAMON_PADDR)	+= ops-common.o paddr.o
-obj-$(CONFIG_DAMON_SYSFS)	+= sysfs-common.o sysfs.o
+obj-$(CONFIG_DAMON_SYSFS)	+= sysfs-common.o sysfs-schemes.o sysfs.o
 obj-$(CONFIG_DAMON_DBGFS)	+= dbgfs.o
 obj-$(CONFIG_DAMON_RECLAIM)	+= reclaim.o
 obj-$(CONFIG_DAMON_LRU_SORT)	+= lru_sort.o
diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
index 56e6a99e353b7..4626b27844047 100644
--- a/mm/damon/sysfs-common.h
+++ b/mm/damon/sysfs-common.h
@@ -22,3 +22,25 @@ struct damon_sysfs_ul_range *damon_sysfs_ul_range_alloc(
 void damon_sysfs_ul_range_release(struct kobject *kobj);
 
 extern struct kobj_type damon_sysfs_ul_range_ktype;
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes {
+	struct kobject kobj;
+	struct damon_sysfs_scheme **schemes_arr;
+	int nr;
+};
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void);
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes);
+
+extern struct kobj_type damon_sysfs_schemes_ktype;
+
+int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+		struct damon_sysfs_schemes *sysfs_schemes);
+
+void damon_sysfs_schemes_update_stats(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
new file mode 100644
index 0000000000000..9509d5c1e7fce
--- /dev/null
+++ b/mm/damon/sysfs-schemes.c
@@ -0,0 +1,1068 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DAMON sysfs Interface
+ *
+ * Copyright (c) 2022 SeongJae Park <sj@kernel.org>
+ */
+
+#include <linux/slab.h>
+
+#include "sysfs-common.h"
+
+/*
+ * schemes/stats directory
+ */
+
+struct damon_sysfs_stats {
+	struct kobject kobj;
+	unsigned long nr_tried;
+	unsigned long sz_tried;
+	unsigned long nr_applied;
+	unsigned long sz_applied;
+	unsigned long qt_exceeds;
+};
+
+static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void)
+{
+	return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL);
+}
+
+static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_stats *stats = container_of(kobj,
+			struct damon_sysfs_stats, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stats->nr_tried);
+}
+
+static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_stats *stats = container_of(kobj,
+			struct damon_sysfs_stats, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stats->sz_tried);
+}
+
+static ssize_t nr_applied_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_stats *stats = container_of(kobj,
+			struct damon_sysfs_stats, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stats->nr_applied);
+}
+
+static ssize_t sz_applied_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_stats *stats = container_of(kobj,
+			struct damon_sysfs_stats, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stats->sz_applied);
+}
+
+static ssize_t qt_exceeds_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_stats *stats = container_of(kobj,
+			struct damon_sysfs_stats, kobj);
+
+	return sysfs_emit(buf, "%lu\n", stats->qt_exceeds);
+}
+
+static void damon_sysfs_stats_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_stats, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_stats_nr_tried_attr =
+		__ATTR_RO_MODE(nr_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_tried_attr =
+		__ATTR_RO_MODE(sz_tried, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_nr_applied_attr =
+		__ATTR_RO_MODE(nr_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_sz_applied_attr =
+		__ATTR_RO_MODE(sz_applied, 0400);
+
+static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr =
+		__ATTR_RO_MODE(qt_exceeds, 0400);
+
+static struct attribute *damon_sysfs_stats_attrs[] = {
+	&damon_sysfs_stats_nr_tried_attr.attr,
+	&damon_sysfs_stats_sz_tried_attr.attr,
+	&damon_sysfs_stats_nr_applied_attr.attr,
+	&damon_sysfs_stats_sz_applied_attr.attr,
+	&damon_sysfs_stats_qt_exceeds_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_stats);
+
+static struct kobj_type damon_sysfs_stats_ktype = {
+	.release = damon_sysfs_stats_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_stats_groups,
+};
+
+/*
+ * watermarks directory
+ */
+
+struct damon_sysfs_watermarks {
+	struct kobject kobj;
+	enum damos_wmark_metric metric;
+	unsigned long interval_us;
+	unsigned long high;
+	unsigned long mid;
+	unsigned long low;
+};
+
+static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc(
+		enum damos_wmark_metric metric, unsigned long interval_us,
+		unsigned long high, unsigned long mid, unsigned long low)
+{
+	struct damon_sysfs_watermarks *watermarks = kmalloc(
+			sizeof(*watermarks), GFP_KERNEL);
+
+	if (!watermarks)
+		return NULL;
+	watermarks->kobj = (struct kobject){};
+	watermarks->metric = metric;
+	watermarks->interval_us = interval_us;
+	watermarks->high = high;
+	watermarks->mid = mid;
+	watermarks->low = low;
+	return watermarks;
+}
+
+/* Should match with enum damos_wmark_metric */
+static const char * const damon_sysfs_wmark_metric_strs[] = {
+	"none",
+	"free_mem_rate",
+};
+
+static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+
+	return sysfs_emit(buf, "%s\n",
+			damon_sysfs_wmark_metric_strs[watermarks->metric]);
+}
+
+static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+	enum damos_wmark_metric metric;
+
+	for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) {
+		if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) {
+			watermarks->metric = metric;
+			return count;
+		}
+	}
+	return -EINVAL;
+}
+
+static ssize_t interval_us_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+
+	return sysfs_emit(buf, "%lu\n", watermarks->interval_us);
+}
+
+static ssize_t interval_us_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+	int err = kstrtoul(buf, 0, &watermarks->interval_us);
+
+	return err ? err : count;
+}
+
+static ssize_t high_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+
+	return sysfs_emit(buf, "%lu\n", watermarks->high);
+}
+
+static ssize_t high_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+	int err = kstrtoul(buf, 0, &watermarks->high);
+
+	return err ? err : count;
+}
+
+static ssize_t mid_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+
+	return sysfs_emit(buf, "%lu\n", watermarks->mid);
+}
+
+static ssize_t mid_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+	int err = kstrtoul(buf, 0, &watermarks->mid);
+
+	return err ? err : count;
+}
+
+static ssize_t low_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+
+	return sysfs_emit(buf, "%lu\n", watermarks->low);
+}
+
+static ssize_t low_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
+			struct damon_sysfs_watermarks, kobj);
+	int err = kstrtoul(buf, 0, &watermarks->low);
+
+	return err ? err : count;
+}
+
+static void damon_sysfs_watermarks_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_watermarks_metric_attr =
+		__ATTR_RW_MODE(metric, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr =
+		__ATTR_RW_MODE(interval_us, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_high_attr =
+		__ATTR_RW_MODE(high, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_mid_attr =
+		__ATTR_RW_MODE(mid, 0600);
+
+static struct kobj_attribute damon_sysfs_watermarks_low_attr =
+		__ATTR_RW_MODE(low, 0600);
+
+static struct attribute *damon_sysfs_watermarks_attrs[] = {
+	&damon_sysfs_watermarks_metric_attr.attr,
+	&damon_sysfs_watermarks_interval_us_attr.attr,
+	&damon_sysfs_watermarks_high_attr.attr,
+	&damon_sysfs_watermarks_mid_attr.attr,
+	&damon_sysfs_watermarks_low_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_watermarks);
+
+static struct kobj_type damon_sysfs_watermarks_ktype = {
+	.release = damon_sysfs_watermarks_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_watermarks_groups,
+};
+
+/*
+ * scheme/weights directory
+ */
+
+struct damon_sysfs_weights {
+	struct kobject kobj;
+	unsigned int sz;
+	unsigned int nr_accesses;
+	unsigned int age;
+};
+
+static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz,
+		unsigned int nr_accesses, unsigned int age)
+{
+	struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights),
+			GFP_KERNEL);
+
+	if (!weights)
+		return NULL;
+	weights->kobj = (struct kobject){};
+	weights->sz = sz;
+	weights->nr_accesses = nr_accesses;
+	weights->age = age;
+	return weights;
+}
+
+static ssize_t sz_permil_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+
+	return sysfs_emit(buf, "%u\n", weights->sz);
+}
+
+static ssize_t sz_permil_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+	int err = kstrtouint(buf, 0, &weights->sz);
+
+	return err ? err : count;
+}
+
+static ssize_t nr_accesses_permil_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+
+	return sysfs_emit(buf, "%u\n", weights->nr_accesses);
+}
+
+static ssize_t nr_accesses_permil_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+	int err = kstrtouint(buf, 0, &weights->nr_accesses);
+
+	return err ? err : count;
+}
+
+static ssize_t age_permil_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+
+	return sysfs_emit(buf, "%u\n", weights->age);
+}
+
+static ssize_t age_permil_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_weights *weights = container_of(kobj,
+			struct damon_sysfs_weights, kobj);
+	int err = kstrtouint(buf, 0, &weights->age);
+
+	return err ? err : count;
+}
+
+static void damon_sysfs_weights_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_weights, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_weights_sz_attr =
+		__ATTR_RW_MODE(sz_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr =
+		__ATTR_RW_MODE(nr_accesses_permil, 0600);
+
+static struct kobj_attribute damon_sysfs_weights_age_attr =
+		__ATTR_RW_MODE(age_permil, 0600);
+
+static struct attribute *damon_sysfs_weights_attrs[] = {
+	&damon_sysfs_weights_sz_attr.attr,
+	&damon_sysfs_weights_nr_accesses_attr.attr,
+	&damon_sysfs_weights_age_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_weights);
+
+static struct kobj_type damon_sysfs_weights_ktype = {
+	.release = damon_sysfs_weights_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_weights_groups,
+};
+
+/*
+ * quotas directory
+ */
+
+struct damon_sysfs_quotas {
+	struct kobject kobj;
+	struct damon_sysfs_weights *weights;
+	unsigned long ms;
+	unsigned long sz;
+	unsigned long reset_interval_ms;
+};
+
+static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
+{
+	return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL);
+}
+
+static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas)
+{
+	struct damon_sysfs_weights *weights;
+	int err;
+
+	weights = damon_sysfs_weights_alloc(0, 0, 0);
+	if (!weights)
+		return -ENOMEM;
+
+	err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype,
+			&quotas->kobj, "weights");
+	if (err)
+		kobject_put(&weights->kobj);
+	else
+		quotas->weights = weights;
+	return err;
+}
+
+static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas)
+{
+	kobject_put(&quotas->weights->kobj);
+}
+
+static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+
+	return sysfs_emit(buf, "%lu\n", quotas->ms);
+}
+
+static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+	int err = kstrtoul(buf, 0, &quotas->ms);
+
+	if (err)
+		return -EINVAL;
+	return count;
+}
+
+static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+
+	return sysfs_emit(buf, "%lu\n", quotas->sz);
+}
+
+static ssize_t bytes_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+	int err = kstrtoul(buf, 0, &quotas->sz);
+
+	if (err)
+		return -EINVAL;
+	return count;
+}
+
+static ssize_t reset_interval_ms_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+
+	return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms);
+}
+
+static ssize_t reset_interval_ms_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_quotas *quotas = container_of(kobj,
+			struct damon_sysfs_quotas, kobj);
+	int err = kstrtoul(buf, 0, &quotas->reset_interval_ms);
+
+	if (err)
+		return -EINVAL;
+	return count;
+}
+
+static void damon_sysfs_quotas_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_quotas_ms_attr =
+		__ATTR_RW_MODE(ms, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_sz_attr =
+		__ATTR_RW_MODE(bytes, 0600);
+
+static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
+		__ATTR_RW_MODE(reset_interval_ms, 0600);
+
+static struct attribute *damon_sysfs_quotas_attrs[] = {
+	&damon_sysfs_quotas_ms_attr.attr,
+	&damon_sysfs_quotas_sz_attr.attr,
+	&damon_sysfs_quotas_reset_interval_ms_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_quotas);
+
+static struct kobj_type damon_sysfs_quotas_ktype = {
+	.release = damon_sysfs_quotas_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_quotas_groups,
+};
+
+/*
+ * access_pattern directory
+ */
+
+struct damon_sysfs_access_pattern {
+	struct kobject kobj;
+	struct damon_sysfs_ul_range *sz;
+	struct damon_sysfs_ul_range *nr_accesses;
+	struct damon_sysfs_ul_range *age;
+};
+
+static
+struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void)
+{
+	struct damon_sysfs_access_pattern *access_pattern =
+		kmalloc(sizeof(*access_pattern), GFP_KERNEL);
+
+	if (!access_pattern)
+		return NULL;
+	access_pattern->kobj = (struct kobject){};
+	return access_pattern;
+}
+
+static int damon_sysfs_access_pattern_add_range_dir(
+		struct damon_sysfs_access_pattern *access_pattern,
+		struct damon_sysfs_ul_range **range_dir_ptr,
+		char *name)
+{
+	struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0);
+	int err;
+
+	if (!range)
+		return -ENOMEM;
+	err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype,
+			&access_pattern->kobj, name);
+	if (err)
+		kobject_put(&range->kobj);
+	else
+		*range_dir_ptr = range;
+	return err;
+}
+
+static int damon_sysfs_access_pattern_add_dirs(
+		struct damon_sysfs_access_pattern *access_pattern)
+{
+	int err;
+
+	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+			&access_pattern->sz, "sz");
+	if (err)
+		goto put_sz_out;
+
+	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+			&access_pattern->nr_accesses, "nr_accesses");
+	if (err)
+		goto put_nr_accesses_sz_out;
+
+	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
+			&access_pattern->age, "age");
+	if (err)
+		goto put_age_nr_accesses_sz_out;
+	return 0;
+
+put_age_nr_accesses_sz_out:
+	kobject_put(&access_pattern->age->kobj);
+	access_pattern->age = NULL;
+put_nr_accesses_sz_out:
+	kobject_put(&access_pattern->nr_accesses->kobj);
+	access_pattern->nr_accesses = NULL;
+put_sz_out:
+	kobject_put(&access_pattern->sz->kobj);
+	access_pattern->sz = NULL;
+	return err;
+}
+
+static void damon_sysfs_access_pattern_rm_dirs(
+		struct damon_sysfs_access_pattern *access_pattern)
+{
+	kobject_put(&access_pattern->sz->kobj);
+	kobject_put(&access_pattern->nr_accesses->kobj);
+	kobject_put(&access_pattern->age->kobj);
+}
+
+static void damon_sysfs_access_pattern_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj));
+}
+
+static struct attribute *damon_sysfs_access_pattern_attrs[] = {
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_access_pattern);
+
+static struct kobj_type damon_sysfs_access_pattern_ktype = {
+	.release = damon_sysfs_access_pattern_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_access_pattern_groups,
+};
+
+/*
+ * scheme directory
+ */
+
+struct damon_sysfs_scheme {
+	struct kobject kobj;
+	enum damos_action action;
+	struct damon_sysfs_access_pattern *access_pattern;
+	struct damon_sysfs_quotas *quotas;
+	struct damon_sysfs_watermarks *watermarks;
+	struct damon_sysfs_stats *stats;
+};
+
+/* This should match with enum damos_action */
+static const char * const damon_sysfs_damos_action_strs[] = {
+	"willneed",
+	"cold",
+	"pageout",
+	"hugepage",
+	"nohugepage",
+	"lru_prio",
+	"lru_deprio",
+	"stat",
+};
+
+static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
+		enum damos_action action)
+{
+	struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme),
+				GFP_KERNEL);
+
+	if (!scheme)
+		return NULL;
+	scheme->kobj = (struct kobject){};
+	scheme->action = action;
+	return scheme;
+}
+
+static int damon_sysfs_scheme_set_access_pattern(
+		struct damon_sysfs_scheme *scheme)
+{
+	struct damon_sysfs_access_pattern *access_pattern;
+	int err;
+
+	access_pattern = damon_sysfs_access_pattern_alloc();
+	if (!access_pattern)
+		return -ENOMEM;
+	err = kobject_init_and_add(&access_pattern->kobj,
+			&damon_sysfs_access_pattern_ktype, &scheme->kobj,
+			"access_pattern");
+	if (err)
+		goto out;
+	err = damon_sysfs_access_pattern_add_dirs(access_pattern);
+	if (err)
+		goto out;
+	scheme->access_pattern = access_pattern;
+	return 0;
+
+out:
+	kobject_put(&access_pattern->kobj);
+	return err;
+}
+
+static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme)
+{
+	struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc();
+	int err;
+
+	if (!quotas)
+		return -ENOMEM;
+	err = kobject_init_and_add(&quotas->kobj, &damon_sysfs_quotas_ktype,
+			&scheme->kobj, "quotas");
+	if (err)
+		goto out;
+	err = damon_sysfs_quotas_add_dirs(quotas);
+	if (err)
+		goto out;
+	scheme->quotas = quotas;
+	return 0;
+
+out:
+	kobject_put(&quotas->kobj);
+	return err;
+}
+
+static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme)
+{
+	struct damon_sysfs_watermarks *watermarks =
+		damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0);
+	int err;
+
+	if (!watermarks)
+		return -ENOMEM;
+	err = kobject_init_and_add(&watermarks->kobj,
+			&damon_sysfs_watermarks_ktype, &scheme->kobj,
+			"watermarks");
+	if (err)
+		kobject_put(&watermarks->kobj);
+	else
+		scheme->watermarks = watermarks;
+	return err;
+}
+
+static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
+{
+	struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc();
+	int err;
+
+	if (!stats)
+		return -ENOMEM;
+	err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype,
+			&scheme->kobj, "stats");
+	if (err)
+		kobject_put(&stats->kobj);
+	else
+		scheme->stats = stats;
+	return err;
+}
+
+static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
+{
+	int err;
+
+	err = damon_sysfs_scheme_set_access_pattern(scheme);
+	if (err)
+		return err;
+	err = damon_sysfs_scheme_set_quotas(scheme);
+	if (err)
+		goto put_access_pattern_out;
+	err = damon_sysfs_scheme_set_watermarks(scheme);
+	if (err)
+		goto put_quotas_access_pattern_out;
+	err = damon_sysfs_scheme_set_stats(scheme);
+	if (err)
+		goto put_watermarks_quotas_access_pattern_out;
+	return 0;
+
+put_watermarks_quotas_access_pattern_out:
+	kobject_put(&scheme->watermarks->kobj);
+	scheme->watermarks = NULL;
+put_quotas_access_pattern_out:
+	kobject_put(&scheme->quotas->kobj);
+	scheme->quotas = NULL;
+put_access_pattern_out:
+	kobject_put(&scheme->access_pattern->kobj);
+	scheme->access_pattern = NULL;
+	return err;
+}
+
+static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
+{
+	damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
+	kobject_put(&scheme->access_pattern->kobj);
+	damon_sysfs_quotas_rm_dirs(scheme->quotas);
+	kobject_put(&scheme->quotas->kobj);
+	kobject_put(&scheme->watermarks->kobj);
+	kobject_put(&scheme->stats->kobj);
+}
+
+static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_scheme *scheme = container_of(kobj,
+			struct damon_sysfs_scheme, kobj);
+
+	return sysfs_emit(buf, "%s\n",
+			damon_sysfs_damos_action_strs[scheme->action]);
+}
+
+static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct damon_sysfs_scheme *scheme = container_of(kobj,
+			struct damon_sysfs_scheme, kobj);
+	enum damos_action action;
+
+	for (action = 0; action < NR_DAMOS_ACTIONS; action++) {
+		if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) {
+			scheme->action = action;
+			return count;
+		}
+	}
+	return -EINVAL;
+}
+
+static void damon_sysfs_scheme_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_scheme_action_attr =
+		__ATTR_RW_MODE(action, 0600);
+
+static struct attribute *damon_sysfs_scheme_attrs[] = {
+	&damon_sysfs_scheme_action_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme);
+
+static struct kobj_type damon_sysfs_scheme_ktype = {
+	.release = damon_sysfs_scheme_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_scheme_groups,
+};
+
+/*
+ * schemes directory
+ */
+
+struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void)
+{
+	return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL);
+}
+
+void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes)
+{
+	struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr;
+	int i;
+
+	for (i = 0; i < schemes->nr; i++) {
+		damon_sysfs_scheme_rm_dirs(schemes_arr[i]);
+		kobject_put(&schemes_arr[i]->kobj);
+	}
+	schemes->nr = 0;
+	kfree(schemes_arr);
+	schemes->schemes_arr = NULL;
+}
+
+static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes,
+		int nr_schemes)
+{
+	struct damon_sysfs_scheme **schemes_arr, *scheme;
+	int err, i;
+
+	damon_sysfs_schemes_rm_dirs(schemes);
+	if (!nr_schemes)
+		return 0;
+
+	schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr),
+			GFP_KERNEL | __GFP_NOWARN);
+	if (!schemes_arr)
+		return -ENOMEM;
+	schemes->schemes_arr = schemes_arr;
+
+	for (i = 0; i < nr_schemes; i++) {
+		scheme = damon_sysfs_scheme_alloc(DAMOS_STAT);
+		if (!scheme) {
+			damon_sysfs_schemes_rm_dirs(schemes);
+			return -ENOMEM;
+		}
+
+		err = kobject_init_and_add(&scheme->kobj,
+				&damon_sysfs_scheme_ktype, &schemes->kobj,
+				"%d", i);
+		if (err)
+			goto out;
+		err = damon_sysfs_scheme_add_dirs(scheme);
+		if (err)
+			goto out;
+
+		schemes_arr[i] = scheme;
+		schemes->nr++;
+	}
+	return 0;
+
+out:
+	damon_sysfs_schemes_rm_dirs(schemes);
+	kobject_put(&scheme->kobj);
+	return err;
+}
+
+static ssize_t nr_schemes_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_schemes *schemes = container_of(kobj,
+			struct damon_sysfs_schemes, kobj);
+
+	return sysfs_emit(buf, "%d\n", schemes->nr);
+}
+
+static ssize_t nr_schemes_store(struct kobject *kobj,
+		struct kobj_attribute *attr, const char *buf, size_t count)
+{
+	struct damon_sysfs_schemes *schemes;
+	int nr, err = kstrtoint(buf, 0, &nr);
+
+	if (err)
+		return err;
+	if (nr < 0)
+		return -EINVAL;
+
+	schemes = container_of(kobj, struct damon_sysfs_schemes, kobj);
+
+	if (!mutex_trylock(&damon_sysfs_lock))
+		return -EBUSY;
+	err = damon_sysfs_schemes_add_dirs(schemes, nr);
+	mutex_unlock(&damon_sysfs_lock);
+	if (err)
+		return err;
+	return count;
+}
+
+static void damon_sysfs_schemes_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_schemes, kobj));
+}
+
+static struct kobj_attribute damon_sysfs_schemes_nr_attr =
+		__ATTR_RW_MODE(nr_schemes, 0600);
+
+static struct attribute *damon_sysfs_schemes_attrs[] = {
+	&damon_sysfs_schemes_nr_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_schemes);
+
+struct kobj_type damon_sysfs_schemes_ktype = {
+	.release = damon_sysfs_schemes_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_schemes_groups,
+};
+
+static struct damos *damon_sysfs_mk_scheme(
+		struct damon_sysfs_scheme *sysfs_scheme)
+{
+	struct damon_sysfs_access_pattern *access_pattern =
+		sysfs_scheme->access_pattern;
+	struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+	struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+	struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+
+	struct damos_access_pattern pattern = {
+		.min_sz_region = access_pattern->sz->min,
+		.max_sz_region = access_pattern->sz->max,
+		.min_nr_accesses = access_pattern->nr_accesses->min,
+		.max_nr_accesses = access_pattern->nr_accesses->max,
+		.min_age_region = access_pattern->age->min,
+		.max_age_region = access_pattern->age->max,
+	};
+	struct damos_quota quota = {
+		.ms = sysfs_quotas->ms,
+		.sz = sysfs_quotas->sz,
+		.reset_interval = sysfs_quotas->reset_interval_ms,
+		.weight_sz = sysfs_weights->sz,
+		.weight_nr_accesses = sysfs_weights->nr_accesses,
+		.weight_age = sysfs_weights->age,
+	};
+	struct damos_watermarks wmarks = {
+		.metric = sysfs_wmarks->metric,
+		.interval = sysfs_wmarks->interval_us,
+		.high = sysfs_wmarks->high,
+		.mid = sysfs_wmarks->mid,
+		.low = sysfs_wmarks->low,
+	};
+
+	return damon_new_scheme(&pattern, sysfs_scheme->action, &quota,
+			&wmarks);
+}
+
+static void damon_sysfs_update_scheme(struct damos *scheme,
+		struct damon_sysfs_scheme *sysfs_scheme)
+{
+	struct damon_sysfs_access_pattern *access_pattern =
+		sysfs_scheme->access_pattern;
+	struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
+	struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
+	struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
+
+	scheme->pattern.min_sz_region = access_pattern->sz->min;
+	scheme->pattern.max_sz_region = access_pattern->sz->max;
+	scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
+	scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
+	scheme->pattern.min_age_region = access_pattern->age->min;
+	scheme->pattern.max_age_region = access_pattern->age->max;
+
+	scheme->action = sysfs_scheme->action;
+
+	scheme->quota.ms = sysfs_quotas->ms;
+	scheme->quota.sz = sysfs_quotas->sz;
+	scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
+	scheme->quota.weight_sz = sysfs_weights->sz;
+	scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
+	scheme->quota.weight_age = sysfs_weights->age;
+
+	scheme->wmarks.metric = sysfs_wmarks->metric;
+	scheme->wmarks.interval = sysfs_wmarks->interval_us;
+	scheme->wmarks.high = sysfs_wmarks->high;
+	scheme->wmarks.mid = sysfs_wmarks->mid;
+	scheme->wmarks.low = sysfs_wmarks->low;
+}
+
+int damon_sysfs_set_schemes(struct damon_ctx *ctx,
+		struct damon_sysfs_schemes *sysfs_schemes)
+{
+	struct damos *scheme, *next;
+	int i = 0;
+
+	damon_for_each_scheme_safe(scheme, next, ctx) {
+		if (i < sysfs_schemes->nr)
+			damon_sysfs_update_scheme(scheme,
+					sysfs_schemes->schemes_arr[i]);
+		else
+			damon_destroy_scheme(scheme);
+		i++;
+	}
+
+	for (; i < sysfs_schemes->nr; i++) {
+		struct damos *scheme, *next;
+
+		scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
+		if (!scheme) {
+			damon_for_each_scheme_safe(scheme, next, ctx)
+				damon_destroy_scheme(scheme);
+			return -ENOMEM;
+		}
+		damon_add_scheme(ctx, scheme);
+	}
+	return 0;
+}
+
+void damon_sysfs_schemes_update_stats(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx)
+{
+	struct damos *scheme;
+	int schemes_idx = 0;
+
+	damon_for_each_scheme(scheme, ctx) {
+		struct damon_sysfs_stats *sysfs_stats;
+
+		/* user could have removed the scheme sysfs dir */
+		if (schemes_idx >= sysfs_schemes->nr)
+			break;
+
+		sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
+		sysfs_stats->nr_tried = scheme->stat.nr_tried;
+		sysfs_stats->sz_tried = scheme->stat.sz_tried;
+		sysfs_stats->nr_applied = scheme->stat.nr_applied;
+		sysfs_stats->sz_applied = scheme->stat.sz_applied;
+		sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
+	}
+}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 836df19a7d86c..284daf274b3ed 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -11,949 +11,6 @@
 
 #include "sysfs-common.h"
 
-/*
- * schemes/stats directory
- */
-
-struct damon_sysfs_stats {
-	struct kobject kobj;
-	unsigned long nr_tried;
-	unsigned long sz_tried;
-	unsigned long nr_applied;
-	unsigned long sz_applied;
-	unsigned long qt_exceeds;
-};
-
-static struct damon_sysfs_stats *damon_sysfs_stats_alloc(void)
-{
-	return kzalloc(sizeof(struct damon_sysfs_stats), GFP_KERNEL);
-}
-
-static ssize_t nr_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_stats *stats = container_of(kobj,
-			struct damon_sysfs_stats, kobj);
-
-	return sysfs_emit(buf, "%lu\n", stats->nr_tried);
-}
-
-static ssize_t sz_tried_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_stats *stats = container_of(kobj,
-			struct damon_sysfs_stats, kobj);
-
-	return sysfs_emit(buf, "%lu\n", stats->sz_tried);
-}
-
-static ssize_t nr_applied_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_stats *stats = container_of(kobj,
-			struct damon_sysfs_stats, kobj);
-
-	return sysfs_emit(buf, "%lu\n", stats->nr_applied);
-}
-
-static ssize_t sz_applied_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_stats *stats = container_of(kobj,
-			struct damon_sysfs_stats, kobj);
-
-	return sysfs_emit(buf, "%lu\n", stats->sz_applied);
-}
-
-static ssize_t qt_exceeds_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_stats *stats = container_of(kobj,
-			struct damon_sysfs_stats, kobj);
-
-	return sysfs_emit(buf, "%lu\n", stats->qt_exceeds);
-}
-
-static void damon_sysfs_stats_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_stats, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_stats_nr_tried_attr =
-		__ATTR_RO_MODE(nr_tried, 0400);
-
-static struct kobj_attribute damon_sysfs_stats_sz_tried_attr =
-		__ATTR_RO_MODE(sz_tried, 0400);
-
-static struct kobj_attribute damon_sysfs_stats_nr_applied_attr =
-		__ATTR_RO_MODE(nr_applied, 0400);
-
-static struct kobj_attribute damon_sysfs_stats_sz_applied_attr =
-		__ATTR_RO_MODE(sz_applied, 0400);
-
-static struct kobj_attribute damon_sysfs_stats_qt_exceeds_attr =
-		__ATTR_RO_MODE(qt_exceeds, 0400);
-
-static struct attribute *damon_sysfs_stats_attrs[] = {
-	&damon_sysfs_stats_nr_tried_attr.attr,
-	&damon_sysfs_stats_sz_tried_attr.attr,
-	&damon_sysfs_stats_nr_applied_attr.attr,
-	&damon_sysfs_stats_sz_applied_attr.attr,
-	&damon_sysfs_stats_qt_exceeds_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_stats);
-
-static struct kobj_type damon_sysfs_stats_ktype = {
-	.release = damon_sysfs_stats_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_stats_groups,
-};
-
-/*
- * watermarks directory
- */
-
-struct damon_sysfs_watermarks {
-	struct kobject kobj;
-	enum damos_wmark_metric metric;
-	unsigned long interval_us;
-	unsigned long high;
-	unsigned long mid;
-	unsigned long low;
-};
-
-static struct damon_sysfs_watermarks *damon_sysfs_watermarks_alloc(
-		enum damos_wmark_metric metric, unsigned long interval_us,
-		unsigned long high, unsigned long mid, unsigned long low)
-{
-	struct damon_sysfs_watermarks *watermarks = kmalloc(
-			sizeof(*watermarks), GFP_KERNEL);
-
-	if (!watermarks)
-		return NULL;
-	watermarks->kobj = (struct kobject){};
-	watermarks->metric = metric;
-	watermarks->interval_us = interval_us;
-	watermarks->high = high;
-	watermarks->mid = mid;
-	watermarks->low = low;
-	return watermarks;
-}
-
-/* Should match with enum damos_wmark_metric */
-static const char * const damon_sysfs_wmark_metric_strs[] = {
-	"none",
-	"free_mem_rate",
-};
-
-static ssize_t metric_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-
-	return sysfs_emit(buf, "%s\n",
-			damon_sysfs_wmark_metric_strs[watermarks->metric]);
-}
-
-static ssize_t metric_store(struct kobject *kobj, struct kobj_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-	enum damos_wmark_metric metric;
-
-	for (metric = 0; metric < NR_DAMOS_WMARK_METRICS; metric++) {
-		if (sysfs_streq(buf, damon_sysfs_wmark_metric_strs[metric])) {
-			watermarks->metric = metric;
-			return count;
-		}
-	}
-	return -EINVAL;
-}
-
-static ssize_t interval_us_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-
-	return sysfs_emit(buf, "%lu\n", watermarks->interval_us);
-}
-
-static ssize_t interval_us_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-	int err = kstrtoul(buf, 0, &watermarks->interval_us);
-
-	return err ? err : count;
-}
-
-static ssize_t high_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-
-	return sysfs_emit(buf, "%lu\n", watermarks->high);
-}
-
-static ssize_t high_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-	int err = kstrtoul(buf, 0, &watermarks->high);
-
-	return err ? err : count;
-}
-
-static ssize_t mid_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-
-	return sysfs_emit(buf, "%lu\n", watermarks->mid);
-}
-
-static ssize_t mid_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-	int err = kstrtoul(buf, 0, &watermarks->mid);
-
-	return err ? err : count;
-}
-
-static ssize_t low_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-
-	return sysfs_emit(buf, "%lu\n", watermarks->low);
-}
-
-static ssize_t low_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_watermarks *watermarks = container_of(kobj,
-			struct damon_sysfs_watermarks, kobj);
-	int err = kstrtoul(buf, 0, &watermarks->low);
-
-	return err ? err : count;
-}
-
-static void damon_sysfs_watermarks_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_watermarks, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_watermarks_metric_attr =
-		__ATTR_RW_MODE(metric, 0600);
-
-static struct kobj_attribute damon_sysfs_watermarks_interval_us_attr =
-		__ATTR_RW_MODE(interval_us, 0600);
-
-static struct kobj_attribute damon_sysfs_watermarks_high_attr =
-		__ATTR_RW_MODE(high, 0600);
-
-static struct kobj_attribute damon_sysfs_watermarks_mid_attr =
-		__ATTR_RW_MODE(mid, 0600);
-
-static struct kobj_attribute damon_sysfs_watermarks_low_attr =
-		__ATTR_RW_MODE(low, 0600);
-
-static struct attribute *damon_sysfs_watermarks_attrs[] = {
-	&damon_sysfs_watermarks_metric_attr.attr,
-	&damon_sysfs_watermarks_interval_us_attr.attr,
-	&damon_sysfs_watermarks_high_attr.attr,
-	&damon_sysfs_watermarks_mid_attr.attr,
-	&damon_sysfs_watermarks_low_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_watermarks);
-
-static struct kobj_type damon_sysfs_watermarks_ktype = {
-	.release = damon_sysfs_watermarks_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_watermarks_groups,
-};
-
-/*
- * scheme/weights directory
- */
-
-struct damon_sysfs_weights {
-	struct kobject kobj;
-	unsigned int sz;
-	unsigned int nr_accesses;
-	unsigned int age;
-};
-
-static struct damon_sysfs_weights *damon_sysfs_weights_alloc(unsigned int sz,
-		unsigned int nr_accesses, unsigned int age)
-{
-	struct damon_sysfs_weights *weights = kmalloc(sizeof(*weights),
-			GFP_KERNEL);
-
-	if (!weights)
-		return NULL;
-	weights->kobj = (struct kobject){};
-	weights->sz = sz;
-	weights->nr_accesses = nr_accesses;
-	weights->age = age;
-	return weights;
-}
-
-static ssize_t sz_permil_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-
-	return sysfs_emit(buf, "%u\n", weights->sz);
-}
-
-static ssize_t sz_permil_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-	int err = kstrtouint(buf, 0, &weights->sz);
-
-	return err ? err : count;
-}
-
-static ssize_t nr_accesses_permil_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-
-	return sysfs_emit(buf, "%u\n", weights->nr_accesses);
-}
-
-static ssize_t nr_accesses_permil_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-	int err = kstrtouint(buf, 0, &weights->nr_accesses);
-
-	return err ? err : count;
-}
-
-static ssize_t age_permil_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-
-	return sysfs_emit(buf, "%u\n", weights->age);
-}
-
-static ssize_t age_permil_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_weights *weights = container_of(kobj,
-			struct damon_sysfs_weights, kobj);
-	int err = kstrtouint(buf, 0, &weights->age);
-
-	return err ? err : count;
-}
-
-static void damon_sysfs_weights_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_weights, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_weights_sz_attr =
-		__ATTR_RW_MODE(sz_permil, 0600);
-
-static struct kobj_attribute damon_sysfs_weights_nr_accesses_attr =
-		__ATTR_RW_MODE(nr_accesses_permil, 0600);
-
-static struct kobj_attribute damon_sysfs_weights_age_attr =
-		__ATTR_RW_MODE(age_permil, 0600);
-
-static struct attribute *damon_sysfs_weights_attrs[] = {
-	&damon_sysfs_weights_sz_attr.attr,
-	&damon_sysfs_weights_nr_accesses_attr.attr,
-	&damon_sysfs_weights_age_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_weights);
-
-static struct kobj_type damon_sysfs_weights_ktype = {
-	.release = damon_sysfs_weights_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_weights_groups,
-};
-
-/*
- * quotas directory
- */
-
-struct damon_sysfs_quotas {
-	struct kobject kobj;
-	struct damon_sysfs_weights *weights;
-	unsigned long ms;
-	unsigned long sz;
-	unsigned long reset_interval_ms;
-};
-
-static struct damon_sysfs_quotas *damon_sysfs_quotas_alloc(void)
-{
-	return kzalloc(sizeof(struct damon_sysfs_quotas), GFP_KERNEL);
-}
-
-static int damon_sysfs_quotas_add_dirs(struct damon_sysfs_quotas *quotas)
-{
-	struct damon_sysfs_weights *weights;
-	int err;
-
-	weights = damon_sysfs_weights_alloc(0, 0, 0);
-	if (!weights)
-		return -ENOMEM;
-
-	err = kobject_init_and_add(&weights->kobj, &damon_sysfs_weights_ktype,
-			&quotas->kobj, "weights");
-	if (err)
-		kobject_put(&weights->kobj);
-	else
-		quotas->weights = weights;
-	return err;
-}
-
-static void damon_sysfs_quotas_rm_dirs(struct damon_sysfs_quotas *quotas)
-{
-	kobject_put(&quotas->weights->kobj);
-}
-
-static ssize_t ms_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-
-	return sysfs_emit(buf, "%lu\n", quotas->ms);
-}
-
-static ssize_t ms_store(struct kobject *kobj, struct kobj_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-	int err = kstrtoul(buf, 0, &quotas->ms);
-
-	if (err)
-		return -EINVAL;
-	return count;
-}
-
-static ssize_t bytes_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-
-	return sysfs_emit(buf, "%lu\n", quotas->sz);
-}
-
-static ssize_t bytes_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-	int err = kstrtoul(buf, 0, &quotas->sz);
-
-	if (err)
-		return -EINVAL;
-	return count;
-}
-
-static ssize_t reset_interval_ms_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-
-	return sysfs_emit(buf, "%lu\n", quotas->reset_interval_ms);
-}
-
-static ssize_t reset_interval_ms_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_quotas *quotas = container_of(kobj,
-			struct damon_sysfs_quotas, kobj);
-	int err = kstrtoul(buf, 0, &quotas->reset_interval_ms);
-
-	if (err)
-		return -EINVAL;
-	return count;
-}
-
-static void damon_sysfs_quotas_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_quotas, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_quotas_ms_attr =
-		__ATTR_RW_MODE(ms, 0600);
-
-static struct kobj_attribute damon_sysfs_quotas_sz_attr =
-		__ATTR_RW_MODE(bytes, 0600);
-
-static struct kobj_attribute damon_sysfs_quotas_reset_interval_ms_attr =
-		__ATTR_RW_MODE(reset_interval_ms, 0600);
-
-static struct attribute *damon_sysfs_quotas_attrs[] = {
-	&damon_sysfs_quotas_ms_attr.attr,
-	&damon_sysfs_quotas_sz_attr.attr,
-	&damon_sysfs_quotas_reset_interval_ms_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_quotas);
-
-static struct kobj_type damon_sysfs_quotas_ktype = {
-	.release = damon_sysfs_quotas_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_quotas_groups,
-};
-
-/*
- * access_pattern directory
- */
-
-struct damon_sysfs_access_pattern {
-	struct kobject kobj;
-	struct damon_sysfs_ul_range *sz;
-	struct damon_sysfs_ul_range *nr_accesses;
-	struct damon_sysfs_ul_range *age;
-};
-
-static
-struct damon_sysfs_access_pattern *damon_sysfs_access_pattern_alloc(void)
-{
-	struct damon_sysfs_access_pattern *access_pattern =
-		kmalloc(sizeof(*access_pattern), GFP_KERNEL);
-
-	if (!access_pattern)
-		return NULL;
-	access_pattern->kobj = (struct kobject){};
-	return access_pattern;
-}
-
-static int damon_sysfs_access_pattern_add_range_dir(
-		struct damon_sysfs_access_pattern *access_pattern,
-		struct damon_sysfs_ul_range **range_dir_ptr,
-		char *name)
-{
-	struct damon_sysfs_ul_range *range = damon_sysfs_ul_range_alloc(0, 0);
-	int err;
-
-	if (!range)
-		return -ENOMEM;
-	err = kobject_init_and_add(&range->kobj, &damon_sysfs_ul_range_ktype,
-			&access_pattern->kobj, name);
-	if (err)
-		kobject_put(&range->kobj);
-	else
-		*range_dir_ptr = range;
-	return err;
-}
-
-static int damon_sysfs_access_pattern_add_dirs(
-		struct damon_sysfs_access_pattern *access_pattern)
-{
-	int err;
-
-	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
-			&access_pattern->sz, "sz");
-	if (err)
-		goto put_sz_out;
-
-	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
-			&access_pattern->nr_accesses, "nr_accesses");
-	if (err)
-		goto put_nr_accesses_sz_out;
-
-	err = damon_sysfs_access_pattern_add_range_dir(access_pattern,
-			&access_pattern->age, "age");
-	if (err)
-		goto put_age_nr_accesses_sz_out;
-	return 0;
-
-put_age_nr_accesses_sz_out:
-	kobject_put(&access_pattern->age->kobj);
-	access_pattern->age = NULL;
-put_nr_accesses_sz_out:
-	kobject_put(&access_pattern->nr_accesses->kobj);
-	access_pattern->nr_accesses = NULL;
-put_sz_out:
-	kobject_put(&access_pattern->sz->kobj);
-	access_pattern->sz = NULL;
-	return err;
-}
-
-static void damon_sysfs_access_pattern_rm_dirs(
-		struct damon_sysfs_access_pattern *access_pattern)
-{
-	kobject_put(&access_pattern->sz->kobj);
-	kobject_put(&access_pattern->nr_accesses->kobj);
-	kobject_put(&access_pattern->age->kobj);
-}
-
-static void damon_sysfs_access_pattern_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_access_pattern, kobj));
-}
-
-static struct attribute *damon_sysfs_access_pattern_attrs[] = {
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_access_pattern);
-
-static struct kobj_type damon_sysfs_access_pattern_ktype = {
-	.release = damon_sysfs_access_pattern_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_access_pattern_groups,
-};
-
-/*
- * scheme directory
- */
-
-struct damon_sysfs_scheme {
-	struct kobject kobj;
-	enum damos_action action;
-	struct damon_sysfs_access_pattern *access_pattern;
-	struct damon_sysfs_quotas *quotas;
-	struct damon_sysfs_watermarks *watermarks;
-	struct damon_sysfs_stats *stats;
-};
-
-/* This should match with enum damos_action */
-static const char * const damon_sysfs_damos_action_strs[] = {
-	"willneed",
-	"cold",
-	"pageout",
-	"hugepage",
-	"nohugepage",
-	"lru_prio",
-	"lru_deprio",
-	"stat",
-};
-
-static struct damon_sysfs_scheme *damon_sysfs_scheme_alloc(
-		enum damos_action action)
-{
-	struct damon_sysfs_scheme *scheme = kmalloc(sizeof(*scheme),
-				GFP_KERNEL);
-
-	if (!scheme)
-		return NULL;
-	scheme->kobj = (struct kobject){};
-	scheme->action = action;
-	return scheme;
-}
-
-static int damon_sysfs_scheme_set_access_pattern(
-		struct damon_sysfs_scheme *scheme)
-{
-	struct damon_sysfs_access_pattern *access_pattern;
-	int err;
-
-	access_pattern = damon_sysfs_access_pattern_alloc();
-	if (!access_pattern)
-		return -ENOMEM;
-	err = kobject_init_and_add(&access_pattern->kobj,
-			&damon_sysfs_access_pattern_ktype, &scheme->kobj,
-			"access_pattern");
-	if (err)
-		goto out;
-	err = damon_sysfs_access_pattern_add_dirs(access_pattern);
-	if (err)
-		goto out;
-	scheme->access_pattern = access_pattern;
-	return 0;
-
-out:
-	kobject_put(&access_pattern->kobj);
-	return err;
-}
-
-static int damon_sysfs_scheme_set_quotas(struct damon_sysfs_scheme *scheme)
-{
-	struct damon_sysfs_quotas *quotas = damon_sysfs_quotas_alloc();
-	int err;
-
-	if (!quotas)
-		return -ENOMEM;
-	err = kobject_init_and_add(&quotas->kobj, &damon_sysfs_quotas_ktype,
-			&scheme->kobj, "quotas");
-	if (err)
-		goto out;
-	err = damon_sysfs_quotas_add_dirs(quotas);
-	if (err)
-		goto out;
-	scheme->quotas = quotas;
-	return 0;
-
-out:
-	kobject_put(&quotas->kobj);
-	return err;
-}
-
-static int damon_sysfs_scheme_set_watermarks(struct damon_sysfs_scheme *scheme)
-{
-	struct damon_sysfs_watermarks *watermarks =
-		damon_sysfs_watermarks_alloc(DAMOS_WMARK_NONE, 0, 0, 0, 0);
-	int err;
-
-	if (!watermarks)
-		return -ENOMEM;
-	err = kobject_init_and_add(&watermarks->kobj,
-			&damon_sysfs_watermarks_ktype, &scheme->kobj,
-			"watermarks");
-	if (err)
-		kobject_put(&watermarks->kobj);
-	else
-		scheme->watermarks = watermarks;
-	return err;
-}
-
-static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
-{
-	struct damon_sysfs_stats *stats = damon_sysfs_stats_alloc();
-	int err;
-
-	if (!stats)
-		return -ENOMEM;
-	err = kobject_init_and_add(&stats->kobj, &damon_sysfs_stats_ktype,
-			&scheme->kobj, "stats");
-	if (err)
-		kobject_put(&stats->kobj);
-	else
-		scheme->stats = stats;
-	return err;
-}
-
-static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
-{
-	int err;
-
-	err = damon_sysfs_scheme_set_access_pattern(scheme);
-	if (err)
-		return err;
-	err = damon_sysfs_scheme_set_quotas(scheme);
-	if (err)
-		goto put_access_pattern_out;
-	err = damon_sysfs_scheme_set_watermarks(scheme);
-	if (err)
-		goto put_quotas_access_pattern_out;
-	err = damon_sysfs_scheme_set_stats(scheme);
-	if (err)
-		goto put_watermarks_quotas_access_pattern_out;
-	return 0;
-
-put_watermarks_quotas_access_pattern_out:
-	kobject_put(&scheme->watermarks->kobj);
-	scheme->watermarks = NULL;
-put_quotas_access_pattern_out:
-	kobject_put(&scheme->quotas->kobj);
-	scheme->quotas = NULL;
-put_access_pattern_out:
-	kobject_put(&scheme->access_pattern->kobj);
-	scheme->access_pattern = NULL;
-	return err;
-}
-
-static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
-{
-	damon_sysfs_access_pattern_rm_dirs(scheme->access_pattern);
-	kobject_put(&scheme->access_pattern->kobj);
-	damon_sysfs_quotas_rm_dirs(scheme->quotas);
-	kobject_put(&scheme->quotas->kobj);
-	kobject_put(&scheme->watermarks->kobj);
-	kobject_put(&scheme->stats->kobj);
-}
-
-static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
-		char *buf)
-{
-	struct damon_sysfs_scheme *scheme = container_of(kobj,
-			struct damon_sysfs_scheme, kobj);
-
-	return sysfs_emit(buf, "%s\n",
-			damon_sysfs_damos_action_strs[scheme->action]);
-}
-
-static ssize_t action_store(struct kobject *kobj, struct kobj_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct damon_sysfs_scheme *scheme = container_of(kobj,
-			struct damon_sysfs_scheme, kobj);
-	enum damos_action action;
-
-	for (action = 0; action < NR_DAMOS_ACTIONS; action++) {
-		if (sysfs_streq(buf, damon_sysfs_damos_action_strs[action])) {
-			scheme->action = action;
-			return count;
-		}
-	}
-	return -EINVAL;
-}
-
-static void damon_sysfs_scheme_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_scheme, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_scheme_action_attr =
-		__ATTR_RW_MODE(action, 0600);
-
-static struct attribute *damon_sysfs_scheme_attrs[] = {
-	&damon_sysfs_scheme_action_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_scheme);
-
-static struct kobj_type damon_sysfs_scheme_ktype = {
-	.release = damon_sysfs_scheme_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_scheme_groups,
-};
-
-/*
- * schemes directory
- */
-
-struct damon_sysfs_schemes {
-	struct kobject kobj;
-	struct damon_sysfs_scheme **schemes_arr;
-	int nr;
-};
-
-static struct damon_sysfs_schemes *damon_sysfs_schemes_alloc(void)
-{
-	return kzalloc(sizeof(struct damon_sysfs_schemes), GFP_KERNEL);
-}
-
-static void damon_sysfs_schemes_rm_dirs(struct damon_sysfs_schemes *schemes)
-{
-	struct damon_sysfs_scheme **schemes_arr = schemes->schemes_arr;
-	int i;
-
-	for (i = 0; i < schemes->nr; i++) {
-		damon_sysfs_scheme_rm_dirs(schemes_arr[i]);
-		kobject_put(&schemes_arr[i]->kobj);
-	}
-	schemes->nr = 0;
-	kfree(schemes_arr);
-	schemes->schemes_arr = NULL;
-}
-
-static int damon_sysfs_schemes_add_dirs(struct damon_sysfs_schemes *schemes,
-		int nr_schemes)
-{
-	struct damon_sysfs_scheme **schemes_arr, *scheme;
-	int err, i;
-
-	damon_sysfs_schemes_rm_dirs(schemes);
-	if (!nr_schemes)
-		return 0;
-
-	schemes_arr = kmalloc_array(nr_schemes, sizeof(*schemes_arr),
-			GFP_KERNEL | __GFP_NOWARN);
-	if (!schemes_arr)
-		return -ENOMEM;
-	schemes->schemes_arr = schemes_arr;
-
-	for (i = 0; i < nr_schemes; i++) {
-		scheme = damon_sysfs_scheme_alloc(DAMOS_STAT);
-		if (!scheme) {
-			damon_sysfs_schemes_rm_dirs(schemes);
-			return -ENOMEM;
-		}
-
-		err = kobject_init_and_add(&scheme->kobj,
-				&damon_sysfs_scheme_ktype, &schemes->kobj,
-				"%d", i);
-		if (err)
-			goto out;
-		err = damon_sysfs_scheme_add_dirs(scheme);
-		if (err)
-			goto out;
-
-		schemes_arr[i] = scheme;
-		schemes->nr++;
-	}
-	return 0;
-
-out:
-	damon_sysfs_schemes_rm_dirs(schemes);
-	kobject_put(&scheme->kobj);
-	return err;
-}
-
-static ssize_t nr_schemes_show(struct kobject *kobj,
-		struct kobj_attribute *attr, char *buf)
-{
-	struct damon_sysfs_schemes *schemes = container_of(kobj,
-			struct damon_sysfs_schemes, kobj);
-
-	return sysfs_emit(buf, "%d\n", schemes->nr);
-}
-
-static ssize_t nr_schemes_store(struct kobject *kobj,
-		struct kobj_attribute *attr, const char *buf, size_t count)
-{
-	struct damon_sysfs_schemes *schemes;
-	int nr, err = kstrtoint(buf, 0, &nr);
-
-	if (err)
-		return err;
-	if (nr < 0)
-		return -EINVAL;
-
-	schemes = container_of(kobj, struct damon_sysfs_schemes, kobj);
-
-	if (!mutex_trylock(&damon_sysfs_lock))
-		return -EBUSY;
-	err = damon_sysfs_schemes_add_dirs(schemes, nr);
-	mutex_unlock(&damon_sysfs_lock);
-	if (err)
-		return err;
-	return count;
-}
-
-static void damon_sysfs_schemes_release(struct kobject *kobj)
-{
-	kfree(container_of(kobj, struct damon_sysfs_schemes, kobj));
-}
-
-static struct kobj_attribute damon_sysfs_schemes_nr_attr =
-		__ATTR_RW_MODE(nr_schemes, 0600);
-
-static struct attribute *damon_sysfs_schemes_attrs[] = {
-	&damon_sysfs_schemes_nr_attr.attr,
-	NULL,
-};
-ATTRIBUTE_GROUPS(damon_sysfs_schemes);
-
-static struct kobj_type damon_sysfs_schemes_ktype = {
-	.release = damon_sysfs_schemes_release,
-	.sysfs_ops = &kobj_sysfs_ops,
-	.default_groups = damon_sysfs_schemes_groups,
-};
-
 /*
  * init region directory
  */
@@ -2133,104 +1190,6 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
 	return 0;
 }
 
-static struct damos *damon_sysfs_mk_scheme(
-		struct damon_sysfs_scheme *sysfs_scheme)
-{
-	struct damon_sysfs_access_pattern *access_pattern =
-		sysfs_scheme->access_pattern;
-	struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
-	struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
-	struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
-
-	struct damos_access_pattern pattern = {
-		.min_sz_region = access_pattern->sz->min,
-		.max_sz_region = access_pattern->sz->max,
-		.min_nr_accesses = access_pattern->nr_accesses->min,
-		.max_nr_accesses = access_pattern->nr_accesses->max,
-		.min_age_region = access_pattern->age->min,
-		.max_age_region = access_pattern->age->max,
-	};
-	struct damos_quota quota = {
-		.ms = sysfs_quotas->ms,
-		.sz = sysfs_quotas->sz,
-		.reset_interval = sysfs_quotas->reset_interval_ms,
-		.weight_sz = sysfs_weights->sz,
-		.weight_nr_accesses = sysfs_weights->nr_accesses,
-		.weight_age = sysfs_weights->age,
-	};
-	struct damos_watermarks wmarks = {
-		.metric = sysfs_wmarks->metric,
-		.interval = sysfs_wmarks->interval_us,
-		.high = sysfs_wmarks->high,
-		.mid = sysfs_wmarks->mid,
-		.low = sysfs_wmarks->low,
-	};
-
-	return damon_new_scheme(&pattern, sysfs_scheme->action, &quota,
-			&wmarks);
-}
-
-static void damon_sysfs_update_scheme(struct damos *scheme,
-		struct damon_sysfs_scheme *sysfs_scheme)
-{
-	struct damon_sysfs_access_pattern *access_pattern =
-		sysfs_scheme->access_pattern;
-	struct damon_sysfs_quotas *sysfs_quotas = sysfs_scheme->quotas;
-	struct damon_sysfs_weights *sysfs_weights = sysfs_quotas->weights;
-	struct damon_sysfs_watermarks *sysfs_wmarks = sysfs_scheme->watermarks;
-
-	scheme->pattern.min_sz_region = access_pattern->sz->min;
-	scheme->pattern.max_sz_region = access_pattern->sz->max;
-	scheme->pattern.min_nr_accesses = access_pattern->nr_accesses->min;
-	scheme->pattern.max_nr_accesses = access_pattern->nr_accesses->max;
-	scheme->pattern.min_age_region = access_pattern->age->min;
-	scheme->pattern.max_age_region = access_pattern->age->max;
-
-	scheme->action = sysfs_scheme->action;
-
-	scheme->quota.ms = sysfs_quotas->ms;
-	scheme->quota.sz = sysfs_quotas->sz;
-	scheme->quota.reset_interval = sysfs_quotas->reset_interval_ms;
-	scheme->quota.weight_sz = sysfs_weights->sz;
-	scheme->quota.weight_nr_accesses = sysfs_weights->nr_accesses;
-	scheme->quota.weight_age = sysfs_weights->age;
-
-	scheme->wmarks.metric = sysfs_wmarks->metric;
-	scheme->wmarks.interval = sysfs_wmarks->interval_us;
-	scheme->wmarks.high = sysfs_wmarks->high;
-	scheme->wmarks.mid = sysfs_wmarks->mid;
-	scheme->wmarks.low = sysfs_wmarks->low;
-}
-
-static int damon_sysfs_set_schemes(struct damon_ctx *ctx,
-		struct damon_sysfs_schemes *sysfs_schemes)
-{
-	struct damos *scheme, *next;
-	int i = 0;
-
-	damon_for_each_scheme_safe(scheme, next, ctx) {
-		if (i < sysfs_schemes->nr)
-			damon_sysfs_update_scheme(scheme,
-					sysfs_schemes->schemes_arr[i]);
-		else
-			damon_destroy_scheme(scheme);
-		i++;
-	}
-
-	for (; i < sysfs_schemes->nr; i++) {
-		struct damos *scheme, *next;
-
-		scheme = damon_sysfs_mk_scheme(sysfs_schemes->schemes_arr[i]);
-		if (!scheme) {
-			damon_for_each_scheme_safe(scheme, next, ctx)
-				damon_destroy_scheme(scheme);
-			return -ENOMEM;
-		}
-		damon_add_scheme(ctx, scheme);
-	}
-	return 0;
-}
-
 static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
 {
 	struct damon_target *t, *next;
@@ -2246,29 +1205,6 @@ static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
 	mutex_unlock(&ctx->kdamond_lock);
 }
 
-static void damon_sysfs_schemes_update_stats(
-		struct damon_sysfs_schemes *sysfs_schemes,
-		struct damon_ctx *ctx)
-{
-	struct damos *scheme;
-	int schemes_idx = 0;
-
-	damon_for_each_scheme(scheme, ctx) {
-		struct damon_sysfs_stats *sysfs_stats;
-
-		/* user could have removed the scheme sysfs dir */
-		if (schemes_idx >= sysfs_schemes->nr)
-			break;
-
-		sysfs_stats = sysfs_schemes->schemes_arr[schemes_idx++]->stats;
-		sysfs_stats->nr_tried = scheme->stat.nr_tried;
-		sysfs_stats->sz_tried = scheme->stat.sz_tried;
-		sysfs_stats->nr_applied = scheme->stat.nr_applied;
-		sysfs_stats->sz_applied = scheme->stat.sz_applied;
-		sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
-	}
-}
-
 /*
  * damon_sysfs_upd_schemes_stats() - Update schemes stats sysfs files.
  * @kdamond:	The kobject wrapper that associated to the kdamond thread.
-- 
GitLab


From 7ae2c17f53d5054d1fe5c1a103ad46068034617d Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:42 +0000
Subject: [PATCH 2964/4122] mm/damon/modules: deduplicate init steps for DAMON
 context setup

DAMON_RECLAIM and DAMON_LRU_SORT has duplicated code for DAMON context and
target initializations.  Deduplicate the part by implementing a function
for the initialization in 'modules-common.c' and using it.

Link: https://lkml.kernel.org/r/20221026225943.100429-12-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/Makefile         |  4 ++--
 mm/damon/lru_sort.c       | 17 +++-------------
 mm/damon/modules-common.c | 42 +++++++++++++++++++++++++++++++++++++++
 mm/damon/modules-common.h |  3 +++
 mm/damon/reclaim.c        | 17 +++-------------
 5 files changed, 53 insertions(+), 30 deletions(-)
 create mode 100644 mm/damon/modules-common.c

diff --git a/mm/damon/Makefile b/mm/damon/Makefile
index 1e86f5253d7ff..f7add3f4aa793 100644
--- a/mm/damon/Makefile
+++ b/mm/damon/Makefile
@@ -5,5 +5,5 @@ obj-$(CONFIG_DAMON_VADDR)	+= ops-common.o vaddr.o
 obj-$(CONFIG_DAMON_PADDR)	+= ops-common.o paddr.o
 obj-$(CONFIG_DAMON_SYSFS)	+= sysfs-common.o sysfs-schemes.o sysfs.o
 obj-$(CONFIG_DAMON_DBGFS)	+= dbgfs.o
-obj-$(CONFIG_DAMON_RECLAIM)	+= reclaim.o
-obj-$(CONFIG_DAMON_LRU_SORT)	+= lru_sort.o
+obj-$(CONFIG_DAMON_RECLAIM)	+= modules-common.o reclaim.o
+obj-$(CONFIG_DAMON_LRU_SORT)	+= modules-common.o lru_sort.o
diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index efbc2bda8b9cd..a1896c5acfe97 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -314,25 +314,14 @@ static int damon_lru_sort_after_wmarks_check(struct damon_ctx *c)
 
 static int __init damon_lru_sort_init(void)
 {
-	ctx = damon_new_ctx();
-	if (!ctx)
-		return -ENOMEM;
+	int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
 
-	if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
-		damon_destroy_ctx(ctx);
-		return -EINVAL;
-	}
+	if (err)
+		return err;
 
 	ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
 	ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;
 
-	target = damon_new_target();
-	if (!target) {
-		damon_destroy_ctx(ctx);
-		return -ENOMEM;
-	}
-	damon_add_target(ctx, target);
-
 	schedule_delayed_work(&damon_lru_sort_timer, 0);
 
 	damon_lru_sort_initialized = true;
diff --git a/mm/damon/modules-common.c b/mm/damon/modules-common.c
new file mode 100644
index 0000000000000..b2381a8466ecf
--- /dev/null
+++ b/mm/damon/modules-common.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Common Primitives for DAMON Modules
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#include <linux/damon.h>
+
+#include "modules-common.h"
+
+/*
+ * Allocate, set, and return a DAMON context for the physical address space.
+ * @ctxp:	Pointer to save the point to the newly created context
+ * @targetp:	Pointer to save the point to the newly created target
+ */
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+		struct damon_target **targetp)
+{
+	struct damon_ctx *ctx;
+	struct damon_target *target;
+
+	ctx = damon_new_ctx();
+	if (!ctx)
+		return -ENOMEM;
+
+	if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
+		damon_destroy_ctx(ctx);
+		return -EINVAL;
+	}
+
+	target = damon_new_target();
+	if (!target) {
+		damon_destroy_ctx(ctx);
+		return -ENOMEM;
+	}
+	damon_add_target(ctx, target);
+
+	*ctxp = ctx;
+	*targetp = target;
+	return 0;
+}
diff --git a/mm/damon/modules-common.h b/mm/damon/modules-common.h
index 5a4921851d326..f49cdb4170051 100644
--- a/mm/damon/modules-common.h
+++ b/mm/damon/modules-common.h
@@ -44,3 +44,6 @@
 			0400);						\
 	module_param_named(nr_##qt_exceed_name, stat.qt_exceeds, ulong,	\
 			0400);
+
+int damon_modules_new_paddr_ctx_target(struct damon_ctx **ctxp,
+		struct damon_target **targetp);
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 162c9b1ca00fd..3173f373435c2 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -256,25 +256,14 @@ static int damon_reclaim_after_wmarks_check(struct damon_ctx *c)
 
 static int __init damon_reclaim_init(void)
 {
-	ctx = damon_new_ctx();
-	if (!ctx)
-		return -ENOMEM;
+	int err = damon_modules_new_paddr_ctx_target(&ctx, &target);
 
-	if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
-		damon_destroy_ctx(ctx);
-		return -EINVAL;
-	}
+	if (err)
+		return err;
 
 	ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
 	ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
 
-	target = damon_new_target();
-	if (!target) {
-		damon_destroy_ctx(ctx);
-		return -ENOMEM;
-	}
-	damon_add_target(ctx, target);
-
 	schedule_delayed_work(&damon_reclaim_timer, 0);
 
 	damon_reclaim_initialized = true;
-- 
GitLab


From b0d3dbd1b98660ec2154fccbd21c13916c967c05 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Wed, 26 Oct 2022 22:59:43 +0000
Subject: [PATCH 2965/4122] mm/damon/{reclaim,lru_sort}: remove unnecessarily
 included headers

Some headers that 'reclaim.c' and 'lru_sort.c' are including are
unnecessary now owing to previous cleanups and refactorings.  Remove
those.

Link: https://lkml.kernel.org/r/20221026225943.100429-13-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/lru_sort.c | 2 --
 mm/damon/reclaim.c  | 2 --
 2 files changed, 4 deletions(-)

diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index a1896c5acfe97..5c60163e556c8 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -8,9 +8,7 @@
 #define pr_fmt(fmt) "damon-lru-sort: " fmt
 
 #include <linux/damon.h>
-#include <linux/ioport.h>
 #include <linux/module.h>
-#include <linux/sched.h>
 #include <linux/workqueue.h>
 
 #include "modules-common.h"
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 3173f373435c2..e14eb30c01f4f 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -8,9 +8,7 @@
 #define pr_fmt(fmt) "damon-reclaim: " fmt
 
 #include <linux/damon.h>
-#include <linux/ioport.h>
 #include <linux/module.h>
-#include <linux/sched.h>
 #include <linux/workqueue.h>
 
 #include "modules-common.h"
-- 
GitLab


From 04e98764befa371836a78b2b489e8b931a3a9e9a Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 25 Oct 2022 17:36:47 +0000
Subject: [PATCH 2966/4122] mm/damon/reclaim: enable and disable synchronously

Patch series "mm/damon/reclaim,lru_sort: enable/disable synchronously".

Writing a value to DAMON_RECLAIM and DAMON_LRU_SORT's 'enabled' parameters
turns on or off DAMON in an ansychronous way.  This means the parameter
cannot be used to read the current status of them.  'kdamond_pid'
parameter should be used instead for the purpose.  The documentation is
easy to be read as it works in a synchronous way, so it is a little bit
confusing.  It also makes the user space tooling dirty.

There's no real reason to have the asynchronous behavior, though.  Simply
make the parameter works synchronously, rather than updating the document.

The first and second patches changes the behavior of the 'enabled'
parameter for DAMON_RECLAIM and adds a selftest for the changed behavior,
respectively.  Following two patches make the same changes for
DAMON_LRU_SORT.


This patch (of 4):

Writing a value to DAMON_RECLAIM's 'enabled' parameter turns on or off
DAMON in an ansychronous way.  This means the parameter cannot be used to
read the current status of DAMON_RECLAIM.  'kdamond_pid' parameter should
be used instead for the purpose.  The documentation is easy to be read as
it works in a synchronous way, so it is a little bit confusing.  It also
makes the user space tooling dirty.

There's no real reason to have the asynchronous behavior, though.  Simply
make the parameter works synchronously, rather than updating the document.

Link: https://lkml.kernel.org/r/20221025173650.90624-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20221025173650.90624-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/reclaim.c | 53 ++++++++++++++++++++--------------------------
 1 file changed, 23 insertions(+), 30 deletions(-)

diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index e14eb30c01f4f..e57604bec06da 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -9,7 +9,6 @@
 
 #include <linux/damon.h>
 #include <linux/module.h>
-#include <linux/workqueue.h>
 
 #include "modules-common.h"
 
@@ -181,38 +180,31 @@ static int damon_reclaim_turn(bool on)
 	return 0;
 }
 
-static struct delayed_work damon_reclaim_timer;
-static void damon_reclaim_timer_fn(struct work_struct *work)
-{
-	static bool last_enabled;
-	bool now_enabled;
-
-	now_enabled = enabled;
-	if (last_enabled != now_enabled) {
-		if (!damon_reclaim_turn(now_enabled))
-			last_enabled = now_enabled;
-		else
-			enabled = last_enabled;
-	}
-}
-static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
-
-static bool damon_reclaim_initialized;
-
 static int damon_reclaim_enabled_store(const char *val,
 		const struct kernel_param *kp)
 {
-	int rc = param_set_bool(val, kp);
+	bool is_enabled = enabled;
+	bool enable;
+	int err;
 
-	if (rc < 0)
-		return rc;
+	err = strtobool(val, &enable);
+	if (err)
+		return err;
 
-	/* system_wq might not initialized yet */
-	if (!damon_reclaim_initialized)
-		return rc;
+	if (is_enabled == enable)
+		return 0;
 
-	schedule_delayed_work(&damon_reclaim_timer, 0);
-	return 0;
+	/* Called before init function.  The function will handle this. */
+	if (!ctx)
+		goto set_param_out;
+
+	err = damon_reclaim_turn(enable);
+	if (err)
+		return err;
+
+set_param_out:
+	enabled = enable;
+	return err;
 }
 
 static const struct kernel_param_ops enabled_param_ops = {
@@ -262,10 +254,11 @@ static int __init damon_reclaim_init(void)
 	ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
 	ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
 
-	schedule_delayed_work(&damon_reclaim_timer, 0);
+	/* 'enabled' has set before this function, probably via command line */
+	if (enabled)
+		err = damon_reclaim_turn(true);
 
-	damon_reclaim_initialized = true;
-	return 0;
+	return err;
 }
 
 module_init(damon_reclaim_init);
-- 
GitLab


From 4cc0ee7787d7dc595752a8de2e073efa68f7c965 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 25 Oct 2022 17:36:48 +0000
Subject: [PATCH 2967/4122] selftests/damon: add tests for DAMON_RECLAIM's
 enabled parameter

Add simple test cases for DAMON_RECLAIM's 'enabled' parameter.  Those
tests are focusing on the synchronous behavior of DAMON_RECLAIM enabling
and disabling.

Link: https://lkml.kernel.org/r/20221025173650.90624-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/Makefile   |  1 +
 tools/testing/selftests/damon/reclaim.sh | 42 ++++++++++++++++++++++++
 2 files changed, 43 insertions(+)
 create mode 100644 tools/testing/selftests/damon/reclaim.sh

diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index a1fa2eff8192f..dbbf18cb3e6b5 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -8,5 +8,6 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
 TEST_PROGS += debugfs_duplicate_context_creation.sh
 TEST_PROGS += sysfs.sh
+TEST_PROGS += reclaim.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
new file mode 100644
index 0000000000000..78dbc2334cbe1
--- /dev/null
+++ b/tools/testing/selftests/damon/reclaim.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+	echo "Run as root"
+	exit $ksft_skip
+fi
+
+damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled"
+if [ ! -f "$damon_reclaim_enabled" ]
+then
+	echo "No 'enabled' file.  Maybe DAMON_RECLAIM not built"
+	exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+	echo "Another kdamond is running"
+	exit $ksft_skip
+fi
+
+echo Y > "$damon_reclaim_enabled"
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+	echo "kdamond is not turned on"
+	exit 1
+fi
+
+echo N > "$damon_reclaim_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+	echo "kdamond is not turned off"
+	exit 1
+fi
-- 
GitLab


From 7a034fbba3361e94956431d17660d7c5674d13c3 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 25 Oct 2022 17:36:49 +0000
Subject: [PATCH 2968/4122] mm/damon/lru_sort: enable and disable synchronously

Writing a value to DAMON_RECLAIM's 'enabled' parameter turns on or off
DAMON in an ansychronous way.  This means the parameter cannot be used to
read the current status of DAMON_RECLAIM.  'kdamond_pid' parameter should
be used instead for the purpose.  The documentation is easy to be read as
it works in a synchronous way, so it is a little bit confusing.  It also
makes the user space tooling dirty.

There's no real reason to have the asynchronous behavior, though.  Simply
make the parameter works synchronously, rather than updating the document.

Link: https://lkml.kernel.org/r/20221025173650.90624-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/lru_sort.c | 51 +++++++++++++++++++--------------------------
 1 file changed, 22 insertions(+), 29 deletions(-)

diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 5c60163e556c8..2a532e3983dfa 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -9,7 +9,6 @@
 
 #include <linux/damon.h>
 #include <linux/module.h>
-#include <linux/workqueue.h>
 
 #include "modules-common.h"
 
@@ -235,38 +234,31 @@ static int damon_lru_sort_turn(bool on)
 	return 0;
 }
 
-static struct delayed_work damon_lru_sort_timer;
-static void damon_lru_sort_timer_fn(struct work_struct *work)
-{
-	static bool last_enabled;
-	bool now_enabled;
-
-	now_enabled = enabled;
-	if (last_enabled != now_enabled) {
-		if (!damon_lru_sort_turn(now_enabled))
-			last_enabled = now_enabled;
-		else
-			enabled = last_enabled;
-	}
-}
-static DECLARE_DELAYED_WORK(damon_lru_sort_timer, damon_lru_sort_timer_fn);
-
-static bool damon_lru_sort_initialized;
-
 static int damon_lru_sort_enabled_store(const char *val,
 		const struct kernel_param *kp)
 {
-	int rc = param_set_bool(val, kp);
+	bool is_enabled = enabled;
+	bool enable;
+	int err;
+
+	err = strtobool(val, &enable);
+	if (err)
+		return err;
 
-	if (rc < 0)
-		return rc;
+	if (is_enabled == enable)
+		return 0;
 
-	if (!damon_lru_sort_initialized)
-		return rc;
+	/* Called before init function.  The function will handle this. */
+	if (!ctx)
+		goto set_param_out;
 
-	schedule_delayed_work(&damon_lru_sort_timer, 0);
+	err = damon_lru_sort_turn(enable);
+	if (err)
+		return err;
 
-	return 0;
+set_param_out:
+	enabled = enable;
+	return err;
 }
 
 static const struct kernel_param_ops enabled_param_ops = {
@@ -320,10 +312,11 @@ static int __init damon_lru_sort_init(void)
 	ctx->callback.after_wmarks_check = damon_lru_sort_after_wmarks_check;
 	ctx->callback.after_aggregation = damon_lru_sort_after_aggregation;
 
-	schedule_delayed_work(&damon_lru_sort_timer, 0);
+	/* 'enabled' has set before this function, probably via command line */
+	if (enabled)
+		err = damon_lru_sort_turn(true);
 
-	damon_lru_sort_initialized = true;
-	return 0;
+	return err;
 }
 
 module_init(damon_lru_sort_init);
-- 
GitLab


From 9cd6ffa60256e931503d347006049b8bef508203 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 25 Oct 2022 17:36:50 +0000
Subject: [PATCH 2969/4122] selftests/damon: add tests for DAMON_LRU_SORT's
 enabled parameter

Add simple test cases for DAMON_LRU_SORT's 'enabled' parameter.  Those
tests are focusing on the synchronous behavior of DAMON_RECLAIM enabling
and disabling.

Link: https://lkml.kernel.org/r/20221025173650.90624-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/Makefile    |  2 +-
 tools/testing/selftests/damon/lru_sort.sh | 41 +++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/damon/lru_sort.sh

diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index dbbf18cb3e6b5..af490acc53485 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -8,6 +8,6 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
 TEST_PROGS += debugfs_duplicate_context_creation.sh
 TEST_PROGS += sysfs.sh
-TEST_PROGS += reclaim.sh
+TEST_PROGS += reclaim.sh lru_sort.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
new file mode 100644
index 0000000000000..61b80197c8966
--- /dev/null
+++ b/tools/testing/selftests/damon/lru_sort.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+	echo "Run as root"
+	exit $ksft_skip
+fi
+
+damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled"
+if [ ! -f "$damon_lru_sort_enabled" ]
+then
+	echo "No 'enabled' file.  Maybe DAMON_LRU_SORT not built"
+	exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+	echo "Another kdamond is running"
+	exit $ksft_skip
+fi
+
+echo Y > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+	echo "kdamond is not turned on"
+	exit 1
+fi
+
+echo N > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+	echo "kdamond is not turned off"
+	exit 1
+fi
-- 
GitLab


From eb3992e833d3a17f9b0a3e0371d0b1d3d566f740 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 30 Sep 2022 23:31:32 +0000
Subject: [PATCH 2970/4122] KVM: VMX: Resume guest immediately when injecting
 #GP on ECREATE

Resume the guest immediately when injecting a #GP on ECREATE due to an
invalid enclave size, i.e. don't attempt ECREATE in the host.  The #GP is
a terminal fault, e.g. skipping the instruction if ECREATE is successful
would result in KVM injecting #GP on the instruction following ECREATE.

Fixes: 70210c044b4e ("KVM: VMX: Add SGX ENCLS[ECREATE] handler to enforce CPUID restrictions")
Cc: stable@vger.kernel.org
Cc: Kai Huang <kai.huang@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Kai Huang <kai.huang@intel.com>
Link: https://lore.kernel.org/r/20220930233132.1723330-1-seanjc@google.com
---
 arch/x86/kvm/vmx/sgx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 8f95c7c014335..b12da2a6dec95 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -182,8 +182,10 @@ static int __handle_encls_ecreate(struct kvm_vcpu *vcpu,
 	/* Enforce CPUID restriction on max enclave size. */
 	max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 :
 							    sgx_12_0->edx;
-	if (size >= BIT_ULL(max_size_log2))
+	if (size >= BIT_ULL(max_size_log2)) {
 		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
 
 	/*
 	 * sgx_virt_ecreate() returns:
-- 
GitLab


From f1a7941243c102a44e8847e3b94ff4ff3ec56f25 Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Mon, 24 Oct 2022 05:28:41 +0000
Subject: [PATCH 2971/4122] mm: convert mm's rss stats into percpu_counter

Currently mm_struct maintains rss_stats which are updated on page fault
and the unmapping codepaths.  For page fault codepath the updates are
cached per thread with the batch of TASK_RSS_EVENTS_THRESH which is 64.
The reason for caching is performance for multithreaded applications
otherwise the rss_stats updates may become hotspot for such applications.

However this optimization comes with the cost of error margin in the rss
stats.  The rss_stats for applications with large number of threads can be
very skewed.  At worst the error margin is (nr_threads * 64) and we have a
lot of applications with 100s of threads, so the error margin can be very
high.  Internally we had to reduce TASK_RSS_EVENTS_THRESH to 32.

Recently we started seeing the unbounded errors for rss_stats for specific
applications which use TCP rx0cp.  It seems like vm_insert_pages()
codepath does not sync rss_stats at all.

This patch converts the rss_stats into percpu_counter to convert the error
margin from (nr_threads * 64) to approximately (nr_cpus ^ 2).  However
this conversion enable us to get the accurate stats for situations where
accuracy is more important than the cpu cost.

This patch does not make such tradeoffs - we can just use
percpu_counter_add_local() for the updates and percpu_counter_sum() (or
percpu_counter_sync() + percpu_counter_read) for the readers.  At the
moment the readers are either procfs interface, oom_killer and memory
reclaim which I think are not performance critical and should be ok with
slow read.  However I think we can make that change in a separate patch.

Link: https://lkml.kernel.org/r/20221024052841.3291983-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h             | 26 ++++--------
 include/linux/mm_types.h       |  7 +---
 include/linux/mm_types_task.h  | 13 ------
 include/linux/percpu_counter.h |  1 -
 include/linux/sched.h          |  3 --
 include/trace/events/kmem.h    |  8 ++--
 kernel/fork.c                  | 16 +++++++-
 mm/memory.c                    | 73 +++++-----------------------------
 8 files changed, 40 insertions(+), 107 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f919befc8facb..0cb4e196d60b8 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2052,40 +2052,30 @@ static inline bool get_user_page_fast_only(unsigned long addr,
  */
 static inline unsigned long get_mm_counter(struct mm_struct *mm, int member)
 {
-	long val = atomic_long_read(&mm->rss_stat.count[member]);
-
-#ifdef SPLIT_RSS_COUNTING
-	/*
-	 * counter is updated in asynchronous manner and may go to minus.
-	 * But it's never be expected number for users.
-	 */
-	if (val < 0)
-		val = 0;
-#endif
-	return (unsigned long)val;
+	return percpu_counter_read_positive(&mm->rss_stat[member]);
 }
 
-void mm_trace_rss_stat(struct mm_struct *mm, int member, long count);
+void mm_trace_rss_stat(struct mm_struct *mm, int member);
 
 static inline void add_mm_counter(struct mm_struct *mm, int member, long value)
 {
-	long count = atomic_long_add_return(value, &mm->rss_stat.count[member]);
+	percpu_counter_add(&mm->rss_stat[member], value);
 
-	mm_trace_rss_stat(mm, member, count);
+	mm_trace_rss_stat(mm, member);
 }
 
 static inline void inc_mm_counter(struct mm_struct *mm, int member)
 {
-	long count = atomic_long_inc_return(&mm->rss_stat.count[member]);
+	percpu_counter_inc(&mm->rss_stat[member]);
 
-	mm_trace_rss_stat(mm, member, count);
+	mm_trace_rss_stat(mm, member);
 }
 
 static inline void dec_mm_counter(struct mm_struct *mm, int member)
 {
-	long count = atomic_long_dec_return(&mm->rss_stat.count[member]);
+	percpu_counter_dec(&mm->rss_stat[member]);
 
-	mm_trace_rss_stat(mm, member, count);
+	mm_trace_rss_stat(mm, member);
 }
 
 /* Optimized variant when page is already known not to be PageAnon */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 2d5b1575ffe08..e86861ff5bbdb 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -18,6 +18,7 @@
 #include <linux/page-flags-layout.h>
 #include <linux/workqueue.h>
 #include <linux/seqlock.h>
+#include <linux/percpu_counter.h>
 
 #include <asm/mmu.h>
 
@@ -626,11 +627,7 @@ struct mm_struct {
 
 		unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
-		/*
-		 * Special counters, in some configurations protected by the
-		 * page_table_lock, in other configurations by being atomic.
-		 */
-		struct mm_rss_stat rss_stat;
+		struct percpu_counter rss_stat[NR_MM_COUNTERS];
 
 		struct linux_binfmt *binfmt;
 
diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 0bb4b6da99939..5414b5c6a1031 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -36,19 +36,6 @@ enum {
 	NR_MM_COUNTERS
 };
 
-#if USE_SPLIT_PTE_PTLOCKS && defined(CONFIG_MMU)
-#define SPLIT_RSS_COUNTING
-/* per-thread cached information, */
-struct task_rss_stat {
-	int events;	/* for synchronization threshold */
-	int count[NR_MM_COUNTERS];
-};
-#endif /* USE_SPLIT_PTE_PTLOCKS */
-
-struct mm_rss_stat {
-	atomic_long_t count[NR_MM_COUNTERS];
-};
-
 struct page_frag {
 	struct page *page;
 #if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 8ed5fba6d156f..bde6c4c1f4058 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -13,7 +13,6 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
-#include <linux/gfp.h>
 
 /* percpu_counter batch for local add or sub */
 #define PERCPU_COUNTER_LOCAL_BATCH	INT_MAX
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ffb6eb55cd135..079d299fa4654 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -870,9 +870,6 @@ struct task_struct {
 	struct mm_struct		*mm;
 	struct mm_struct		*active_mm;
 
-#ifdef SPLIT_RSS_COUNTING
-	struct task_rss_stat		rss_stat;
-#endif
 	int				exit_state;
 	int				exit_code;
 	int				exit_signal;
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 243073cfc29d4..58688768ef0f0 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -346,10 +346,9 @@ TRACE_MM_PAGES
 TRACE_EVENT(rss_stat,
 
 	TP_PROTO(struct mm_struct *mm,
-		int member,
-		long count),
+		int member),
 
-	TP_ARGS(mm, member, count),
+	TP_ARGS(mm, member),
 
 	TP_STRUCT__entry(
 		__field(unsigned int, mm_id)
@@ -362,7 +361,8 @@ TRACE_EVENT(rss_stat,
 		__entry->mm_id = mm_ptr_to_hash(mm);
 		__entry->curr = !!(current->mm == mm);
 		__entry->member = member;
-		__entry->size = (count << PAGE_SHIFT);
+		__entry->size = (percpu_counter_sum_positive(&mm->rss_stat[member])
+							    << PAGE_SHIFT);
 	),
 
 	TP_printk("mm_id=%u curr=%d type=%s size=%ldB",
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d5..0fef202434c36 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -753,7 +753,7 @@ static void check_mm(struct mm_struct *mm)
 			 "Please make sure 'struct resident_page_types[]' is updated as well");
 
 	for (i = 0; i < NR_MM_COUNTERS; i++) {
-		long x = atomic_long_read(&mm->rss_stat.count[i]);
+		long x = percpu_counter_sum(&mm->rss_stat[i]);
 
 		if (unlikely(x))
 			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
@@ -779,6 +779,8 @@ static void check_mm(struct mm_struct *mm)
  */
 void __mmdrop(struct mm_struct *mm)
 {
+	int i;
+
 	BUG_ON(mm == &init_mm);
 	WARN_ON_ONCE(mm == current->mm);
 	WARN_ON_ONCE(mm == current->active_mm);
@@ -788,6 +790,9 @@ void __mmdrop(struct mm_struct *mm)
 	check_mm(mm);
 	put_user_ns(mm->user_ns);
 	mm_pasid_drop(mm);
+
+	for (i = 0; i < NR_MM_COUNTERS; i++)
+		percpu_counter_destroy(&mm->rss_stat[i]);
 	free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1107,6 +1112,8 @@ static void mm_init_uprobes_state(struct mm_struct *mm)
 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	struct user_namespace *user_ns)
 {
+	int i;
+
 	mt_init_flags(&mm->mm_mt, MM_MT_FLAGS);
 	mt_set_external_lock(&mm->mm_mt, &mm->mmap_lock);
 	atomic_set(&mm->mm_users, 1);
@@ -1148,10 +1155,17 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	if (init_new_context(p, mm))
 		goto fail_nocontext;
 
+	for (i = 0; i < NR_MM_COUNTERS; i++)
+		if (percpu_counter_init(&mm->rss_stat[i], 0, GFP_KERNEL_ACCOUNT))
+			goto fail_pcpu;
+
 	mm->user_ns = get_user_ns(user_ns);
 	lru_gen_init_mm(mm);
 	return mm;
 
+fail_pcpu:
+	while (i > 0)
+		percpu_counter_destroy(&mm->rss_stat[--i]);
 fail_nocontext:
 	mm_free_pgd(mm);
 fail_nopgd:
diff --git a/mm/memory.c b/mm/memory.c
index 7826143ec9cd1..e0555ddd71b53 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -162,58 +162,11 @@ static int __init init_zero_pfn(void)
 }
 early_initcall(init_zero_pfn);
 
-void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
+void mm_trace_rss_stat(struct mm_struct *mm, int member)
 {
-	trace_rss_stat(mm, member, count);
+	trace_rss_stat(mm, member);
 }
 
-#if defined(SPLIT_RSS_COUNTING)
-
-void sync_mm_rss(struct mm_struct *mm)
-{
-	int i;
-
-	for (i = 0; i < NR_MM_COUNTERS; i++) {
-		if (current->rss_stat.count[i]) {
-			add_mm_counter(mm, i, current->rss_stat.count[i]);
-			current->rss_stat.count[i] = 0;
-		}
-	}
-	current->rss_stat.events = 0;
-}
-
-static void add_mm_counter_fast(struct mm_struct *mm, int member, int val)
-{
-	struct task_struct *task = current;
-
-	if (likely(task->mm == mm))
-		task->rss_stat.count[member] += val;
-	else
-		add_mm_counter(mm, member, val);
-}
-#define inc_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, 1)
-#define dec_mm_counter_fast(mm, member) add_mm_counter_fast(mm, member, -1)
-
-/* sync counter once per 64 page faults */
-#define TASK_RSS_EVENTS_THRESH	(64)
-static void check_sync_rss_stat(struct task_struct *task)
-{
-	if (unlikely(task != current))
-		return;
-	if (unlikely(task->rss_stat.events++ > TASK_RSS_EVENTS_THRESH))
-		sync_mm_rss(task->mm);
-}
-#else /* SPLIT_RSS_COUNTING */
-
-#define inc_mm_counter_fast(mm, member) inc_mm_counter(mm, member)
-#define dec_mm_counter_fast(mm, member) dec_mm_counter(mm, member)
-
-static void check_sync_rss_stat(struct task_struct *task)
-{
-}
-
-#endif /* SPLIT_RSS_COUNTING */
-
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
@@ -1857,7 +1810,7 @@ static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte,
 		return -EBUSY;
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
-	inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
+	inc_mm_counter(vma->vm_mm, mm_counter_file(page));
 	page_add_file_rmap(page, vma, false);
 	set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot));
 	return 0;
@@ -3153,12 +3106,11 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	if (likely(pte_same(*vmf->pte, vmf->orig_pte))) {
 		if (old_page) {
 			if (!PageAnon(old_page)) {
-				dec_mm_counter_fast(mm,
-						mm_counter_file(old_page));
-				inc_mm_counter_fast(mm, MM_ANONPAGES);
+				dec_mm_counter(mm, mm_counter_file(old_page));
+				inc_mm_counter(mm, MM_ANONPAGES);
 			}
 		} else {
-			inc_mm_counter_fast(mm, MM_ANONPAGES);
+			inc_mm_counter(mm, MM_ANONPAGES);
 		}
 		flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
@@ -3965,8 +3917,8 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 	if (should_try_to_free_swap(folio, vma, vmf->flags))
 		folio_free_swap(folio);
 
-	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
-	dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS);
+	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
+	dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
 	pte = mk_pte(page, vma->vm_page_prot);
 
 	/*
@@ -4146,7 +4098,7 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
 		return handle_userfault(vmf, VM_UFFD_MISSING);
 	}
 
-	inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
+	inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 	page_add_new_anon_rmap(page, vma, vmf->address);
 	lru_cache_add_inactive_or_unevictable(page, vma);
 setpte:
@@ -4336,11 +4288,11 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 		entry = pte_mkuffd_wp(pte_wrprotect(entry));
 	/* copy-on-write page */
 	if (write && !(vma->vm_flags & VM_SHARED)) {
-		inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES);
+		inc_mm_counter(vma->vm_mm, MM_ANONPAGES);
 		page_add_new_anon_rmap(page, vma, addr);
 		lru_cache_add_inactive_or_unevictable(page, vma);
 	} else {
-		inc_mm_counter_fast(vma->vm_mm, mm_counter_file(page));
+		inc_mm_counter(vma->vm_mm, mm_counter_file(page));
 		page_add_file_rmap(page, vma, false);
 	}
 	set_pte_at(vma->vm_mm, addr, vmf->pte, entry);
@@ -5192,9 +5144,6 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	count_vm_event(PGFAULT);
 	count_memcg_event_mm(vma->vm_mm, PGFAULT);
 
-	/* do counter updates before entering really critical section. */
-	check_sync_rss_stat(current);
-
 	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
 					    flags & FAULT_FLAG_INSTRUCTION,
 					    flags & FAULT_FLAG_REMOTE))
-- 
GitLab


From f689054aace2ff13af2e9a44a74fbba650ca31ba Mon Sep 17 00:00:00 2001
From: Shakeel Butt <shakeelb@google.com>
Date: Wed, 9 Nov 2022 01:20:11 +0000
Subject: [PATCH 2972/4122] percpu_counter: add percpu_counter_sum_all
 interface

The percpu_counter is used for scenarios where performance is more
important than the accuracy.  For percpu_counter users, who want more
accurate information in their slowpath, percpu_counter_sum is provided
which traverses all the online CPUs to accumulate the data.  The reason it
only needs to traverse online CPUs is because percpu_counter does
implement CPU offline callback which syncs the local data of the offlined
CPU.

However there is a small race window between the online CPUs traversal of
percpu_counter_sum and the CPU offline callback.  The offline callback has
to traverse all the percpu_counters on the system to flush the CPU local
data which can be a lot.  During that time, the CPU which is going offline
has already been published as offline to all the readers.  So, as the
offline callback is running, percpu_counter_sum can be called for one
counter which has some state on the CPU going offline.  Since
percpu_counter_sum only traverses online CPUs, it will skip that specific
CPU and the offline callback might not have flushed the state for that
specific percpu_counter on that offlined CPU.

Normally this is not an issue because percpu_counter users can deal with
some inaccuracy for small time window.  However a new user i.e.  mm_struct
on the cleanup path wants to check the exact state of the percpu_counter
through check_mm().  For such users, this patch introduces
percpu_counter_sum_all() which traverses all possible CPUs and it is used
in fork.c:check_mm() to avoid the potential race.

This issue is exposed by the later patch "mm: convert mm's rss stats into
percpu_counter".

Link: https://lkml.kernel.org/r/20221109012011.881058-1-shakeelb@google.com
Signed-off-by: Shakeel Butt <shakeelb@google.com>
Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/percpu_counter.h |  6 ++++++
 kernel/fork.c                  |  5 +++++
 lib/percpu_counter.c           | 29 +++++++++++++++++++++++------
 3 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index bde6c4c1f4058..a3aae8d57a421 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -45,6 +45,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
 void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount,
 			      s32 batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
+s64 percpu_counter_sum_all(struct percpu_counter *fbc);
 int __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch);
 void percpu_counter_sync(struct percpu_counter *fbc);
 
@@ -193,6 +194,11 @@ static inline s64 percpu_counter_sum(struct percpu_counter *fbc)
 	return percpu_counter_read(fbc);
 }
 
+static inline s64 percpu_counter_sum_all(struct percpu_counter *fbc)
+{
+	return percpu_counter_read(fbc);
+}
+
 static inline bool percpu_counter_initialized(struct percpu_counter *fbc)
 {
 	return true;
diff --git a/kernel/fork.c b/kernel/fork.c
index 0fef202434c36..1be4c4ab7f3e7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -755,6 +755,11 @@ static void check_mm(struct mm_struct *mm)
 	for (i = 0; i < NR_MM_COUNTERS; i++) {
 		long x = percpu_counter_sum(&mm->rss_stat[i]);
 
+		if (likely(!x))
+			continue;
+
+		/* Making sure this is not due to race with CPU offlining. */
+		x = percpu_counter_sum_all(&mm->rss_stat[i]);
 		if (unlikely(x))
 			pr_alert("BUG: Bad rss-counter state mm:%p type:%s val:%ld\n",
 				 mm, resident_page_types[i], x);
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ed610b75dc322..42f729c8e56c4 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -117,11 +117,8 @@ void percpu_counter_sync(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(percpu_counter_sync);
 
-/*
- * Add up all the per-cpu counts, return the result.  This is a more accurate
- * but much slower version of percpu_counter_read_positive()
- */
-s64 __percpu_counter_sum(struct percpu_counter *fbc)
+static s64 __percpu_counter_sum_mask(struct percpu_counter *fbc,
+			      const struct cpumask *cpu_mask)
 {
 	s64 ret;
 	int cpu;
@@ -129,15 +126,35 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 
 	raw_spin_lock_irqsave(&fbc->lock, flags);
 	ret = fbc->count;
-	for_each_online_cpu(cpu) {
+	for_each_cpu(cpu, cpu_mask) {
 		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
 	raw_spin_unlock_irqrestore(&fbc->lock, flags);
 	return ret;
 }
+
+/*
+ * Add up all the per-cpu counts, return the result.  This is a more accurate
+ * but much slower version of percpu_counter_read_positive()
+ */
+s64 __percpu_counter_sum(struct percpu_counter *fbc)
+{
+	return __percpu_counter_sum_mask(fbc, cpu_online_mask);
+}
 EXPORT_SYMBOL(__percpu_counter_sum);
 
+/*
+ * This is slower version of percpu_counter_sum as it traverses all possible
+ * cpus. Use this only in the cases where accurate data is needed in the
+ * presense of CPUs getting offlined.
+ */
+s64 percpu_counter_sum_all(struct percpu_counter *fbc)
+{
+	return __percpu_counter_sum_mask(fbc, cpu_possible_mask);
+}
+EXPORT_SYMBOL(percpu_counter_sum_all);
+
 int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
-- 
GitLab


From a873dfe1032a132bf89f9e19a6ac44f5a0b78754 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 21 Oct 2022 13:01:19 -0700
Subject: [PATCH 2973/4122] mm, hwpoison: try to recover from copy-on write
 faults

Patch series "Copy-on-write poison recovery", v3.

Part 1 deals with the process that triggered the copy on write fault with
a store to a shared read-only page.  That process is send a SIGBUS with
the usual machine check decoration to specify the virtual address of the
lost page, together with the scope.

Part 2 sets up to asynchronously take the page with the uncorrected error
offline to prevent additional machine check faults.  H/t to Miaohe Lin
<linmiaohe@huawei.com> and Shuai Xue <xueshuai@linux.alibaba.com> for
pointing me to the existing function to queue a call to memory_failure().

On x86 there is some duplicate reporting (because the error is also
signalled by the memory controller as well as by the core that triggered
the machine check).  Console logs look like this:


This patch (of 2):

If the kernel is copying a page as the result of a copy-on-write
fault and runs into an uncorrectable error, Linux will crash because
it does not have recovery code for this case where poison is consumed
by the kernel.

It is easy to set up a test case. Just inject an error into a private
page, fork(2), and have the child process write to the page.

I wrapped that neatly into a test at:

  git://git.kernel.org/pub/scm/linux/kernel/git/aegl/ras-tools.git

just enable ACPI error injection and run:

  # ./einj_mem-uc -f copy-on-write

Add a new copy_user_highpage_mc() function that uses copy_mc_to_kernel()
on architectures where that is available (currently x86 and powerpc).
When an error is detected during the page copy, return VM_FAULT_HWPOISON
to caller of wp_page_copy(). This propagates up the call stack. Both x86
and powerpc have code in their fault handler to deal with this code by
sending a SIGBUS to the application.

Note that this patch avoids a system crash and signals the process that
triggered the copy-on-write action. It does not take any action for the
memory error that is still in the shared page. To handle that a call to
memory_failure() is needed. But this cannot be done from wp_page_copy()
because it holds mmap_lock(). Perhaps the architecture fault handlers
can deal with this loose end in a subsequent patch?

On Intel/x86 this loose end will often be handled automatically because
the memory controller provides an additional notification of the h/w
poison in memory, the handler for this will call memory_failure(). This
isn't a 100% solution. If there are multiple errors, not all may be
logged in this way.

[tony.luck@intel.com: add call to kmsan_unpoison_memory(), per Miaohe Lin]
  Link: https://lkml.kernel.org/r/20221031201029.102123-2-tony.luck@intel.com
Link: https://lkml.kernel.org/r/20221021200120.175753-1-tony.luck@intel.com
Link: https://lkml.kernel.org/r/20221021200120.175753-2-tony.luck@intel.com
Signed-off-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Tested-by: Shuai Xue <xueshuai@linux.alibaba.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/highmem.h | 26 ++++++++++++++++++++++++++
 mm/memory.c             | 30 ++++++++++++++++++++----------
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index e9912da5441b4..44242268f53bd 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -319,6 +319,32 @@ static inline void copy_user_highpage(struct page *to, struct page *from,
 
 #endif
 
+#ifdef copy_mc_to_kernel
+static inline int copy_mc_user_highpage(struct page *to, struct page *from,
+					unsigned long vaddr, struct vm_area_struct *vma)
+{
+	unsigned long ret;
+	char *vfrom, *vto;
+
+	vfrom = kmap_local_page(from);
+	vto = kmap_local_page(to);
+	ret = copy_mc_to_kernel(vto, vfrom, PAGE_SIZE);
+	if (!ret)
+		kmsan_unpoison_memory(page_address(to), PAGE_SIZE);
+	kunmap_local(vto);
+	kunmap_local(vfrom);
+
+	return ret;
+}
+#else
+static inline int copy_mc_user_highpage(struct page *to, struct page *from,
+					unsigned long vaddr, struct vm_area_struct *vma)
+{
+	copy_user_highpage(to, from, vaddr, vma);
+	return 0;
+}
+#endif
+
 #ifndef __HAVE_ARCH_COPY_HIGHPAGE
 
 static inline void copy_highpage(struct page *to, struct page *from)
diff --git a/mm/memory.c b/mm/memory.c
index e0555ddd71b53..13b1fe661d860 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2798,10 +2798,16 @@ static inline int pte_unmap_same(struct vm_fault *vmf)
 	return same;
 }
 
-static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
-				       struct vm_fault *vmf)
+/*
+ * Return:
+ *	0:		copied succeeded
+ *	-EHWPOISON:	copy failed due to hwpoison in source page
+ *	-EAGAIN:	copied failed (some other reason)
+ */
+static inline int __wp_page_copy_user(struct page *dst, struct page *src,
+				      struct vm_fault *vmf)
 {
-	bool ret;
+	int ret;
 	void *kaddr;
 	void __user *uaddr;
 	bool locked = false;
@@ -2810,8 +2816,9 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
 	unsigned long addr = vmf->address;
 
 	if (likely(src)) {
-		copy_user_highpage(dst, src, addr, vma);
-		return true;
+		if (copy_mc_user_highpage(dst, src, addr, vma))
+			return -EHWPOISON;
+		return 0;
 	}
 
 	/*
@@ -2838,7 +2845,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
 			 * and update local tlb only
 			 */
 			update_mmu_tlb(vma, addr, vmf->pte);
-			ret = false;
+			ret = -EAGAIN;
 			goto pte_unlock;
 		}
 
@@ -2863,7 +2870,7 @@ static inline bool __wp_page_copy_user(struct page *dst, struct page *src,
 		if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) {
 			/* The PTE changed under us, update local tlb */
 			update_mmu_tlb(vma, addr, vmf->pte);
-			ret = false;
+			ret = -EAGAIN;
 			goto pte_unlock;
 		}
 
@@ -2882,7 +2889,7 @@ warn:
 		}
 	}
 
-	ret = true;
+	ret = 0;
 
 pte_unlock:
 	if (locked)
@@ -3054,6 +3061,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	pte_t entry;
 	int page_copied = 0;
 	struct mmu_notifier_range range;
+	int ret;
 
 	delayacct_wpcopy_start();
 
@@ -3071,19 +3079,21 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 		if (!new_page)
 			goto oom;
 
-		if (!__wp_page_copy_user(new_page, old_page, vmf)) {
+		ret = __wp_page_copy_user(new_page, old_page, vmf);
+		if (ret) {
 			/*
 			 * COW failed, if the fault was solved by other,
 			 * it's fine. If not, userspace would re-fault on
 			 * the same address and we will handle the fault
 			 * from the second attempt.
+			 * The -EHWPOISON case will not be retried.
 			 */
 			put_page(new_page);
 			if (old_page)
 				put_page(old_page);
 
 			delayacct_wpcopy_end();
-			return 0;
+			return ret == -EHWPOISON ? VM_FAULT_HWPOISON : 0;
 		}
 		kmsan_copy_page_meta(new_page, old_page);
 	}
-- 
GitLab


From d302c2398ba269e788a4f37ae57c07a7fcabaa42 Mon Sep 17 00:00:00 2001
From: Tony Luck <tony.luck@intel.com>
Date: Fri, 21 Oct 2022 13:01:20 -0700
Subject: [PATCH 2974/4122] mm, hwpoison: when copy-on-write hits poison, take
 page offline

Cannot call memory_failure() directly from the fault handler because
mmap_lock (and others) are held.

It is important, but not urgent, to mark the source page as h/w poisoned
and unmap it from other tasks.

Use memory_failure_queue() to request a call to memory_failure() for the
page with the error.

Also provide a stub version for CONFIG_MEMORY_FAILURE=n

Link: https://lkml.kernel.org/r/20221021200120.175753-3-tony.luck@intel.com
Signed-off-by: Tony Luck <tony.luck@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Shuai Xue <xueshuai@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++++-
 mm/memory.c        | 4 +++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0cb4e196d60b8..3950ef45b9a97 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3280,7 +3280,6 @@ enum mf_flags {
 int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
 		      unsigned long count, int mf_flags);
 extern int memory_failure(unsigned long pfn, int flags);
-extern void memory_failure_queue(unsigned long pfn, int flags);
 extern void memory_failure_queue_kick(int cpu);
 extern int unpoison_memory(unsigned long pfn);
 extern int sysctl_memory_failure_early_kill;
@@ -3289,11 +3288,16 @@ extern void shake_page(struct page *p);
 extern atomic_long_t num_poisoned_pages __read_mostly;
 extern int soft_offline_page(unsigned long pfn, int flags);
 #ifdef CONFIG_MEMORY_FAILURE
+extern void memory_failure_queue(unsigned long pfn, int flags);
 extern int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared);
 void num_poisoned_pages_inc(unsigned long pfn);
 void num_poisoned_pages_sub(unsigned long pfn, long i);
 #else
+static inline void memory_failure_queue(unsigned long pfn, int flags)
+{
+}
+
 static inline int __get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 					bool *migratable_cleared)
 {
diff --git a/mm/memory.c b/mm/memory.c
index 13b1fe661d860..659620b6770fd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2816,8 +2816,10 @@ static inline int __wp_page_copy_user(struct page *dst, struct page *src,
 	unsigned long addr = vmf->address;
 
 	if (likely(src)) {
-		if (copy_mc_user_highpage(dst, src, addr, vma))
+		if (copy_mc_user_highpage(dst, src, addr, vma)) {
+			memory_failure_queue(page_to_pfn(src), 0);
 			return -EHWPOISON;
+		}
 		return 0;
 	}
 
-- 
GitLab


From 1cc53a047b0b9389c2d8f4a69499c6135572f23e Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Thu, 27 Oct 2022 11:36:41 +0800
Subject: [PATCH 2975/4122] mm: hugetlb_vmemmap: remove redundant list_del()

The ->lru field will be assigned to a new value in __free_page().  So it
is unnecessary to delete it from the @list.  Just remove it to simplify
the code.

Link: https://lkml.kernel.org/r/20221027033641.66709-1-songmuchun@bytedance.com
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb_vmemmap.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 4962dd1ba4a68..7898c2c75e351 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -232,10 +232,8 @@ static void free_vmemmap_page_list(struct list_head *list)
 {
 	struct page *page, *next;
 
-	list_for_each_entry_safe(page, next, list, lru) {
-		list_del(&page->lru);
+	list_for_each_entry_safe(page, next, list, lru)
 		free_vmemmap_page(page);
-	}
 }
 
 static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
-- 
GitLab


From bd4149290c3edc09454a8a7e7ef3a5544cb9eed6 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 24 Oct 2022 17:46:18 +0000
Subject: [PATCH 2976/4122] Docs/admin-guide/mm/damon/usage: describe the rules
 of sysfs region directories

Patch series "Docs/admin-buide/mm/damon/usage: minor fixes".

DAMON usage document contains an unclear description and a wrong usage
example.  This patchset fixes the two minor problems.


This patch (of 2):

Target region directories of DAMON sysfs interface should contain no
overlap and sorted by the address, but not clearly documented.  Actually,
a user had an issue[1] due to the poor documentation.  Add clear
description of it on the usage document.

[1] https://lore.kernel.org/damon/CAEZ6=UNUcH2BvJj++OrT=XQLdkidU79wmCO=tantSOB36pPNTg@mail.gmail.com/

Link: https://lkml.kernel.org/r/20221024174619.15600-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20221024174619.15600-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Reported-by: Vinicius Petrucci <vpetrucci@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/mm/damon/usage.rst | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index b47b0cbbd491d..89d9a4f75a299 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -235,6 +235,9 @@ In each region directory, you will find two files (``start`` and ``end``).  You
 can set and get the start and end addresses of the initial monitoring target
 region by writing to and reading from the files, respectively.
 
+Each region should not overlap with others.  ``end`` of directory ``N`` should
+be equal or smaller than ``start`` of directory ``N+1``.
+
 contexts/<N>/schemes/
 ---------------------
 
-- 
GitLab


From 1b0166387586cae69d7da783f0a4521864534aad Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 24 Oct 2022 17:46:19 +0000
Subject: [PATCH 2977/4122] Docs/admin-guide/mm/damon/usage: fix wrong usage
 example of init_regions file

DAMON debugfs interface assumes the users will write all inputs at once.
However, redirecting a string of multiple lines sometimes end up writing
line by line.  Therefore, the example usage of 'init_regions' file, which
writes input as a string of multiple lines can fail.  Fix it to use a
single line string instead.  Also update the description of the usage to
not assume users will write inputs in multiple lines.

Link: https://lkml.kernel.org/r/20221024174619.15600-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Vinicius Petrucci <vpetrucci@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/mm/damon/usage.rst | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index 89d9a4f75a299..c17e02e1e426e 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -468,8 +468,9 @@ regions in case of physical memory monitoring.  Therefore, users should set the
 monitoring target regions by themselves.
 
 In such cases, users can explicitly set the initial monitoring target regions
-as they want, by writing proper values to the ``init_regions`` file.  Each line
-of the input should represent one region in below form.::
+as they want, by writing proper values to the ``init_regions`` file.  The input
+should be a sequence of three integers separated by white spaces that represent
+one region in below form.::
 
     <target idx> <start address> <end address>
 
@@ -484,9 +485,9 @@ ranges, ``20-40`` and ``50-100`` as that of pid 4242, which is the second one
     # cd <debugfs>/damon
     # cat target_ids
     42 4242
-    # echo "0   1       100
-            0   100     200
-            1   20      40
+    # echo "0   1       100 \
+            0   100     200 \
+            1   20      40  \
             1   50      100" > init_regions
 
 Note that this sets the initial monitoring target regions only.  In case of
-- 
GitLab


From 57e9cc50f4dd926d6c38751799d25cad89fb2bd9 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 26 Oct 2022 14:01:33 -0400
Subject: [PATCH 2978/4122] mm: vmscan: split khugepaged stats from direct
 reclaim stats

Direct reclaim stats are useful for identifying a potential source for
application latency, as well as spotting issues with kswapd.  However,
khugepaged currently distorts the picture: as a kernel thread it doesn't
impose allocation latencies on userspace, and it explicitly opts out of
kswapd reclaim.  Its activity showing up in the direct reclaim stats is
misleading.  Counting it as kswapd reclaim could also cause confusion when
trying to understand actual kswapd behavior.

Break out khugepaged from the direct reclaim counters into new
pgsteal_khugepaged, pgdemote_khugepaged, pgscan_khugepaged counters.

Test with a huge executable (CONFIG_READ_ONLY_THP_FOR_FS):

pgsteal_kswapd 1342185
pgsteal_direct 0
pgsteal_khugepaged 3623
pgscan_kswapd 1345025
pgscan_direct 0
pgscan_khugepaged 3623

Link: https://lkml.kernel.org/r/20221026180133.377671-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Eric Bergen <ebergen@meta.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst |  6 +++++
 include/linux/khugepaged.h              |  6 +++++
 include/linux/vm_event_item.h           |  3 +++
 mm/khugepaged.c                         |  5 ++++
 mm/memcontrol.c                         |  8 +++++--
 mm/vmscan.c                             | 32 ++++++++++++++++++-------
 mm/vmstat.c                             |  3 +++
 7 files changed, 53 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index dc254a3cb9568..74cec76be9f2c 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1488,12 +1488,18 @@ PAGE_SIZE multiple when read back.
 	  pgscan_direct (npn)
 		Amount of scanned pages directly  (in an inactive LRU list)
 
+	  pgscan_khugepaged (npn)
+		Amount of scanned pages by khugepaged  (in an inactive LRU list)
+
 	  pgsteal_kswapd (npn)
 		Amount of reclaimed pages by kswapd
 
 	  pgsteal_direct (npn)
 		Amount of reclaimed pages directly
 
+	  pgsteal_khugepaged (npn)
+		Amount of reclaimed pages by khugepaged
+
 	  pgfault (npn)
 		Total number of page faults incurred
 
diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 70162d707caf0..f68865e19b0b0 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -15,6 +15,7 @@ extern void __khugepaged_exit(struct mm_struct *mm);
 extern void khugepaged_enter_vma(struct vm_area_struct *vma,
 				 unsigned long vm_flags);
 extern void khugepaged_min_free_kbytes_update(void);
+extern bool current_is_khugepaged(void);
 #ifdef CONFIG_SHMEM
 extern int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr,
 				   bool install_pmd);
@@ -57,6 +58,11 @@ static inline int collapse_pte_mapped_thp(struct mm_struct *mm,
 static inline void khugepaged_min_free_kbytes_update(void)
 {
 }
+
+static inline bool current_is_khugepaged(void)
+{
+	return false;
+}
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #endif /* _LINUX_KHUGEPAGED_H */
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 3518dba1e02f4..7f5d1caf5890e 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -40,10 +40,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		PGREUSE,
 		PGSTEAL_KSWAPD,
 		PGSTEAL_DIRECT,
+		PGSTEAL_KHUGEPAGED,
 		PGDEMOTE_KSWAPD,
 		PGDEMOTE_DIRECT,
+		PGDEMOTE_KHUGEPAGED,
 		PGSCAN_KSWAPD,
 		PGSCAN_DIRECT,
+		PGSCAN_KHUGEPAGED,
 		PGSCAN_DIRECT_THROTTLE,
 		PGSCAN_ANON,
 		PGSCAN_FILE,
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 3703a56571c12..9c111273bbf91 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2577,6 +2577,11 @@ void khugepaged_min_free_kbytes_update(void)
 	mutex_unlock(&khugepaged_mutex);
 }
 
+bool current_is_khugepaged(void)
+{
+	return kthread_func(current) == khugepaged;
+}
+
 static int madvise_collapse_errno(enum scan_result r)
 {
 	/*
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c95e2ed6e7fd0..23750cec0036a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -661,8 +661,10 @@ static const unsigned int memcg_vm_event_stat[] = {
 	PGPGOUT,
 	PGSCAN_KSWAPD,
 	PGSCAN_DIRECT,
+	PGSCAN_KHUGEPAGED,
 	PGSTEAL_KSWAPD,
 	PGSTEAL_DIRECT,
+	PGSTEAL_KHUGEPAGED,
 	PGFAULT,
 	PGMAJFAULT,
 	PGREFILL,
@@ -1574,10 +1576,12 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
 	/* Accumulated memory events */
 	seq_buf_printf(&s, "pgscan %lu\n",
 		       memcg_events(memcg, PGSCAN_KSWAPD) +
-		       memcg_events(memcg, PGSCAN_DIRECT));
+		       memcg_events(memcg, PGSCAN_DIRECT) +
+		       memcg_events(memcg, PGSCAN_KHUGEPAGED));
 	seq_buf_printf(&s, "pgsteal %lu\n",
 		       memcg_events(memcg, PGSTEAL_KSWAPD) +
-		       memcg_events(memcg, PGSTEAL_DIRECT));
+		       memcg_events(memcg, PGSTEAL_DIRECT) +
+		       memcg_events(memcg, PGSTEAL_KHUGEPAGED));
 
 	for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
 		if (memcg_vm_event_stat[i] == PGPGIN ||
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 55a5b5d66d681..d7c71be6417d3 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -54,6 +54,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
+#include <linux/khugepaged.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1047,6 +1048,24 @@ void drop_slab(void)
 		drop_slab_node(nid);
 }
 
+static int reclaimer_offset(void)
+{
+	BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
+			PGDEMOTE_DIRECT - PGDEMOTE_KSWAPD);
+	BUILD_BUG_ON(PGSTEAL_DIRECT - PGSTEAL_KSWAPD !=
+			PGSCAN_DIRECT - PGSCAN_KSWAPD);
+	BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
+			PGDEMOTE_KHUGEPAGED - PGDEMOTE_KSWAPD);
+	BUILD_BUG_ON(PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD !=
+			PGSCAN_KHUGEPAGED - PGSCAN_KSWAPD);
+
+	if (current_is_kswapd())
+		return 0;
+	if (current_is_khugepaged())
+		return PGSTEAL_KHUGEPAGED - PGSTEAL_KSWAPD;
+	return PGSTEAL_DIRECT - PGSTEAL_KSWAPD;
+}
+
 static inline int is_page_cache_freeable(struct folio *folio)
 {
 	/*
@@ -1599,10 +1618,7 @@ static unsigned int demote_folio_list(struct list_head *demote_folios,
 		      (unsigned long)&mtc, MIGRATE_ASYNC, MR_DEMOTION,
 		      &nr_succeeded);
 
-	if (current_is_kswapd())
-		__count_vm_events(PGDEMOTE_KSWAPD, nr_succeeded);
-	else
-		__count_vm_events(PGDEMOTE_DIRECT, nr_succeeded);
+	__count_vm_events(PGDEMOTE_KSWAPD + reclaimer_offset(), nr_succeeded);
 
 	return nr_succeeded;
 }
@@ -2475,7 +2491,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 				     &nr_scanned, sc, lru);
 
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, nr_taken);
-	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+	item = PGSCAN_KSWAPD + reclaimer_offset();
 	if (!cgroup_reclaim(sc))
 		__count_vm_events(item, nr_scanned);
 	__count_memcg_events(lruvec_memcg(lruvec), item, nr_scanned);
@@ -2492,7 +2508,7 @@ static unsigned long shrink_inactive_list(unsigned long nr_to_scan,
 	move_folios_to_lru(lruvec, &folio_list);
 
 	__mod_node_page_state(pgdat, NR_ISOLATED_ANON + file, -nr_taken);
-	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+	item = PGSTEAL_KSWAPD + reclaimer_offset();
 	if (!cgroup_reclaim(sc))
 		__count_vm_events(item, nr_reclaimed);
 	__count_memcg_events(lruvec_memcg(lruvec), item, nr_reclaimed);
@@ -4871,7 +4887,7 @@ static int scan_folios(struct lruvec *lruvec, struct scan_control *sc,
 			break;
 	}
 
-	item = current_is_kswapd() ? PGSCAN_KSWAPD : PGSCAN_DIRECT;
+	item = PGSCAN_KSWAPD + reclaimer_offset();
 	if (!cgroup_reclaim(sc)) {
 		__count_vm_events(item, isolated);
 		__count_vm_events(PGREFILL, sorted);
@@ -5049,7 +5065,7 @@ retry:
 	if (walk && walk->batched)
 		reset_batch_size(lruvec, walk);
 
-	item = current_is_kswapd() ? PGSTEAL_KSWAPD : PGSTEAL_DIRECT;
+	item = PGSTEAL_KSWAPD + reclaimer_offset();
 	if (!cgroup_reclaim(sc))
 		__count_vm_events(item, reclaimed);
 	__count_memcg_events(memcg, item, reclaimed);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b2371d745e007..1ea6a5ce1c416 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1271,10 +1271,13 @@ const char * const vmstat_text[] = {
 	"pgreuse",
 	"pgsteal_kswapd",
 	"pgsteal_direct",
+	"pgsteal_khugepaged",
 	"pgdemote_kswapd",
 	"pgdemote_direct",
+	"pgdemote_khugepaged",
 	"pgscan_kswapd",
 	"pgscan_direct",
+	"pgscan_khugepaged",
 	"pgscan_direct_throttle",
 	"pgscan_anon",
 	"pgscan_file",
-- 
GitLab


From 6e7ba8b5e2380f941dda8a1025d70c5ce5b38982 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Fri, 28 Oct 2022 14:45:34 +0000
Subject: [PATCH 2979/4122] maple_tree: mte_set_full() and mte_clear_full()
 clang-analyzer clean up

mte_set_full() and mte_clear_full() were incorrectly setting a pointer to
a value without returning a result.  Fix this by returning the modified
pointer to be use as necessary.  Also add a third function to return if
the bit is set or not.

Link: https://lore.kernel.org/lkml/20221026120029.12555-1-lukas.bulwahn@gmail.com/
Link: https://lkml.kernel.org/r/20221028144520.2776767-1-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Suggested-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Suggested-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index df352f6ccc240..3fe1491d2bf98 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -323,14 +323,19 @@ static inline void *mte_safe_root(const struct maple_enode *node)
 	return (void *)((unsigned long)node & ~MAPLE_ROOT_NODE);
 }
 
-static inline void mte_set_full(const struct maple_enode *node)
+static inline void *mte_set_full(const struct maple_enode *node)
 {
-	node = (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
+	return (void *)((unsigned long)node & ~MAPLE_ENODE_NULL);
 }
 
-static inline void mte_clear_full(const struct maple_enode *node)
+static inline void *mte_clear_full(const struct maple_enode *node)
 {
-	node = (void *)((unsigned long)node | MAPLE_ENODE_NULL);
+	return (void *)((unsigned long)node | MAPLE_ENODE_NULL);
+}
+
+static inline bool mte_has_null(const struct maple_enode *node)
+{
+	return (unsigned long)node & MAPLE_ENODE_NULL;
 }
 
 static inline bool ma_is_root(struct maple_node *node)
-- 
GitLab


From b2b23ba03cb9059d11b270cc280dcdfa6dbbdf53 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 28 Oct 2022 08:53:01 -0700
Subject: [PATCH 2980/4122] mempool: do not use ksize() for poisoning

Nothing appears to be using ksize() within the kmalloc-backed mempools
except the mempool poisoning logic.  Use the actual pool size instead of
the ksize() to avoid needing any special handling of the memory as needed
by KASAN, UBSAN_BOUNDS, nor FORTIFY_SOURCE.

[vbabka@suse.cz: for slab mempools pool_data is not object size]
  Link: https://lkml.kernel.org/r/13c4bd6e-09d3-efce-43a5-5a99be8bc96b@suse.cz
Link: https://lkml.kernel.org/r/20221028154823.you.615-kees@kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Suggested-by: Vlastimil Babka <vbabka@suse.cz>
  Link: https://lore.kernel.org/lkml/f4fc52c4-7c18-1d76-0c7a-4058ea2486b9@suse.cz/
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Reported-by: Anders Roxell <anders.roxell@linaro.org>
  Link: https://lore.kernel.org/all/20221031105514.GB69385@mutt/
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mempool.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/mm/mempool.c b/mm/mempool.c
index 96488b13a1ef1..734bcf5afbb78 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -57,8 +57,10 @@ static void __check_element(mempool_t *pool, void *element, size_t size)
 static void check_element(mempool_t *pool, void *element)
 {
 	/* Mempools backed by slab allocator */
-	if (pool->free == mempool_free_slab || pool->free == mempool_kfree) {
-		__check_element(pool, element, ksize(element));
+	if (pool->free == mempool_kfree) {
+		__check_element(pool, element, (size_t)pool->pool_data);
+	} else if (pool->free == mempool_free_slab) {
+		__check_element(pool, element, kmem_cache_size(pool->pool_data));
 	} else if (pool->free == mempool_free_pages) {
 		/* Mempools backed by page allocator */
 		int order = (int)(long)pool->pool_data;
@@ -80,8 +82,10 @@ static void __poison_element(void *element, size_t size)
 static void poison_element(mempool_t *pool, void *element)
 {
 	/* Mempools backed by slab allocator */
-	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc) {
-		__poison_element(element, ksize(element));
+	if (pool->alloc == mempool_kmalloc) {
+		__poison_element(element, (size_t)pool->pool_data);
+	} else if (pool->alloc == mempool_alloc_slab) {
+		__poison_element(element, kmem_cache_size(pool->pool_data));
 	} else if (pool->alloc == mempool_alloc_pages) {
 		/* Mempools backed by page allocator */
 		int order = (int)(long)pool->pool_data;
@@ -111,8 +115,10 @@ static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
 
 static void kasan_unpoison_element(mempool_t *pool, void *element)
 {
-	if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
-		kasan_unpoison_range(element, __ksize(element));
+	if (pool->alloc == mempool_kmalloc)
+		kasan_unpoison_range(element, (size_t)pool->pool_data);
+	else if (pool->alloc == mempool_alloc_slab)
+		kasan_unpoison_range(element, kmem_cache_size(pool->pool_data));
 	else if (pool->alloc == mempool_alloc_pages)
 		kasan_unpoison_pages(element, (unsigned long)pool->pool_data,
 				     false);
-- 
GitLab


From a098c977722ca27d3b4bfeb966767af3cce45f85 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:51 -0700
Subject: [PATCH 2981/4122] mm/hugetlb_cgroup: convert __set_hugetlb_cgroup()
 to folios

Patch series "convert hugetlb_cgroup helper functions to folios", v2.

This patch series continues the conversion of hugetlb code from being
managed in pages to folios by converting many of the hugetlb_cgroup helper
functions to use folios.  This allows the core hugetlb functions to pass
in a folio to these helper functions.


This patch (of 9);

Change __set_hugetlb_cgroup() to use folios so it is explicit that the
function operates on a head page.

Link: https://lkml.kernel.org/r/20221101223059.460937-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20221101223059.460937-2-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 14 +++++++-------
 mm/hugetlb_cgroup.c            |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 630cd255d0cfd..7576e9ed8afe7 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -90,31 +90,31 @@ hugetlb_cgroup_from_page_rsvd(struct page *page)
 	return __hugetlb_cgroup_from_page(page, true);
 }
 
-static inline void __set_hugetlb_cgroup(struct page *page,
+static inline void __set_hugetlb_cgroup(struct folio *folio,
 				       struct hugetlb_cgroup *h_cg, bool rsvd)
 {
-	VM_BUG_ON_PAGE(!PageHuge(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 
-	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return;
 	if (rsvd)
-		set_page_private(page + SUBPAGE_INDEX_CGROUP_RSVD,
+		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD),
 				 (unsigned long)h_cg);
 	else
-		set_page_private(page + SUBPAGE_INDEX_CGROUP,
+		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP),
 				 (unsigned long)h_cg);
 }
 
 static inline void set_hugetlb_cgroup(struct page *page,
 				     struct hugetlb_cgroup *h_cg)
 {
-	__set_hugetlb_cgroup(page, h_cg, false);
+	__set_hugetlb_cgroup(page_folio(page), h_cg, false);
 }
 
 static inline void set_hugetlb_cgroup_rsvd(struct page *page,
 					  struct hugetlb_cgroup *h_cg)
 {
-	__set_hugetlb_cgroup(page, h_cg, true);
+	__set_hugetlb_cgroup(page_folio(page), h_cg, true);
 }
 
 static inline bool hugetlb_cgroup_disabled(void)
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index f61d132df52b3..b2316bcbf634a 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -314,7 +314,7 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 	if (hugetlb_cgroup_disabled() || !h_cg)
 		return;
 
-	__set_hugetlb_cgroup(page, h_cg, rsvd);
+	__set_hugetlb_cgroup(page_folio(page), h_cg, rsvd);
 	if (!rsvd) {
 		unsigned long usage =
 			h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
@@ -356,7 +356,7 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 	h_cg = __hugetlb_cgroup_from_page(page, rsvd);
 	if (unlikely(!h_cg))
 		return;
-	__set_hugetlb_cgroup(page, NULL, rsvd);
+	__set_hugetlb_cgroup(page_folio(page), NULL, rsvd);
 
 	page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 								   rsvd),
-- 
GitLab


From f074732d599e19a2a5b12e54743ad5eaccbe6550 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:52 -0700
Subject: [PATCH 2982/4122] mm/hugetlb_cgroup: convert
 hugetlb_cgroup_from_page() to folios

Introduce folios in __remove_hugetlb_page() by converting
hugetlb_cgroup_from_page() to use folios.

Also gets rid of unsed hugetlb_cgroup_from_page_resv() function.

Link: https://lkml.kernel.org/r/20221101223059.460937-3-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 39 +++++++++++++++++-----------------
 mm/hugetlb.c                   |  5 +++--
 mm/hugetlb_cgroup.c            | 13 +++++++-----
 3 files changed, 31 insertions(+), 26 deletions(-)

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 7576e9ed8afe7..feb2edafc8b68 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -67,27 +67,34 @@ struct hugetlb_cgroup {
 };
 
 static inline struct hugetlb_cgroup *
-__hugetlb_cgroup_from_page(struct page *page, bool rsvd)
+__hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
 {
-	VM_BUG_ON_PAGE(!PageHuge(page), page);
+	struct page *tail;
 
-	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER)
+	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
+	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
-	if (rsvd)
-		return (void *)page_private(page + SUBPAGE_INDEX_CGROUP_RSVD);
-	else
-		return (void *)page_private(page + SUBPAGE_INDEX_CGROUP);
+
+	if (rsvd) {
+		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD);
+		return (void *)page_private(tail);
+	}
+
+	else {
+		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
+		return (void *)page_private(tail);
+	}
 }
 
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
 {
-	return __hugetlb_cgroup_from_page(page, false);
+	return __hugetlb_cgroup_from_folio(folio, false);
 }
 
 static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_rsvd(struct page *page)
+hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
 {
-	return __hugetlb_cgroup_from_page(page, true);
+	return __hugetlb_cgroup_from_folio(folio, true);
 }
 
 static inline void __set_hugetlb_cgroup(struct folio *folio,
@@ -181,19 +188,13 @@ static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 {
 }
 
-static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page)
-{
-	return NULL;
-}
-
-static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_resv(struct page *page)
+static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
 {
 	return NULL;
 }
 
 static inline struct hugetlb_cgroup *
-hugetlb_cgroup_from_page_rsvd(struct page *page)
+hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
 {
 	return NULL;
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index be09678d05825..f86a61a73112a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1446,9 +1446,10 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
 							bool demote)
 {
 	int nid = page_to_nid(page);
+	struct folio *folio = page_folio(page);
 
-	VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
-	VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+	VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
+	VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
 
 	lockdep_assert_held(&hugetlb_lock);
 	if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b2316bcbf634a..8b95c1560f9c3 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -191,8 +191,9 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 	struct page_counter *counter;
 	struct hugetlb_cgroup *page_hcg;
 	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
+	struct folio *folio = page_folio(page);
 
-	page_hcg = hugetlb_cgroup_from_page(page);
+	page_hcg = hugetlb_cgroup_from_folio(folio);
 	/*
 	 * We can have pages in active list without any cgroup
 	 * ie, hugepage with less than 3 pages. We can safely
@@ -349,14 +350,15 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 					   struct page *page, bool rsvd)
 {
 	struct hugetlb_cgroup *h_cg;
+	struct folio *folio = page_folio(page);
 
 	if (hugetlb_cgroup_disabled())
 		return;
 	lockdep_assert_held(&hugetlb_lock);
-	h_cg = __hugetlb_cgroup_from_page(page, rsvd);
+	h_cg = __hugetlb_cgroup_from_folio(folio, rsvd);
 	if (unlikely(!h_cg))
 		return;
-	__set_hugetlb_cgroup(page_folio(page), NULL, rsvd);
+	__set_hugetlb_cgroup(folio, NULL, rsvd);
 
 	page_counter_uncharge(__hugetlb_cgroup_counter_from_cgroup(h_cg, idx,
 								   rsvd),
@@ -888,13 +890,14 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 	struct hugetlb_cgroup *h_cg;
 	struct hugetlb_cgroup *h_cg_rsvd;
 	struct hstate *h = page_hstate(oldhpage);
+	struct folio *old_folio = page_folio(oldhpage);
 
 	if (hugetlb_cgroup_disabled())
 		return;
 
 	spin_lock_irq(&hugetlb_lock);
-	h_cg = hugetlb_cgroup_from_page(oldhpage);
-	h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
+	h_cg = hugetlb_cgroup_from_folio(old_folio);
+	h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio);
 	set_hugetlb_cgroup(oldhpage, NULL);
 	set_hugetlb_cgroup_rsvd(oldhpage, NULL);
 
-- 
GitLab


From de656ed376c4cb47c5713fba52f8bbfbea44f387 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:53 -0700
Subject: [PATCH 2983/4122] mm/hugetlb_cgroup: convert set_hugetlb_cgroup*() to
 folios

Allows __prep_new_huge_page() to operate on a folio by converting
set_hugetlb_cgroup*() to take in a folio.

Link: https://lkml.kernel.org/r/20221101223059.460937-4-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 12 ++++++------
 mm/hugetlb.c                   | 33 +++++++++++++++++++--------------
 mm/hugetlb_cgroup.c            | 11 ++++++-----
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index feb2edafc8b68..a7e3540f7f38d 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -112,16 +112,16 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
 				 (unsigned long)h_cg);
 }
 
-static inline void set_hugetlb_cgroup(struct page *page,
+static inline void set_hugetlb_cgroup(struct folio *folio,
 				     struct hugetlb_cgroup *h_cg)
 {
-	__set_hugetlb_cgroup(page_folio(page), h_cg, false);
+	__set_hugetlb_cgroup(folio, h_cg, false);
 }
 
-static inline void set_hugetlb_cgroup_rsvd(struct page *page,
+static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
 					  struct hugetlb_cgroup *h_cg)
 {
-	__set_hugetlb_cgroup(page_folio(page), h_cg, true);
+	__set_hugetlb_cgroup(folio, h_cg, true);
 }
 
 static inline bool hugetlb_cgroup_disabled(void)
@@ -199,12 +199,12 @@ hugetlb_cgroup_from_folio_rsvd(struct folio *folio)
 	return NULL;
 }
 
-static inline void set_hugetlb_cgroup(struct page *page,
+static inline void set_hugetlb_cgroup(struct folio *folio,
 				     struct hugetlb_cgroup *h_cg)
 {
 }
 
-static inline void set_hugetlb_cgroup_rsvd(struct page *page,
+static inline void set_hugetlb_cgroup_rsvd(struct folio *folio,
 					  struct hugetlb_cgroup *h_cg)
 {
 }
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index f86a61a73112a..01ea43b227245 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1774,19 +1774,21 @@ static void __prep_account_new_huge_page(struct hstate *h, int nid)
 	h->nr_huge_pages_node[nid]++;
 }
 
-static void __prep_new_huge_page(struct hstate *h, struct page *page)
+static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
-	hugetlb_vmemmap_optimize(h, page);
-	INIT_LIST_HEAD(&page->lru);
-	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-	hugetlb_set_page_subpool(page, NULL);
-	set_hugetlb_cgroup(page, NULL);
-	set_hugetlb_cgroup_rsvd(page, NULL);
+	hugetlb_vmemmap_optimize(h, &folio->page);
+	INIT_LIST_HEAD(&folio->lru);
+	folio->_folio_dtor = HUGETLB_PAGE_DTOR;
+	hugetlb_set_folio_subpool(folio, NULL);
+	set_hugetlb_cgroup(folio, NULL);
+	set_hugetlb_cgroup_rsvd(folio, NULL);
 }
 
 static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
-	__prep_new_huge_page(h, page);
+	struct folio *folio = page_folio(page);
+
+	__prep_new_hugetlb_folio(h, folio);
 	spin_lock_irq(&hugetlb_lock);
 	__prep_account_new_huge_page(h, nid);
 	spin_unlock_irq(&hugetlb_lock);
@@ -2748,8 +2750,10 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 					struct list_head *list)
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-	int nid = page_to_nid(old_page);
+	struct folio *old_folio = page_folio(old_page);
+	int nid = folio_nid(old_folio);
 	struct page *new_page;
+	struct folio *new_folio;
 	int ret = 0;
 
 	/*
@@ -2762,16 +2766,17 @@ static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
 	new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
 	if (!new_page)
 		return -ENOMEM;
-	__prep_new_huge_page(h, new_page);
+	new_folio = page_folio(new_page);
+	__prep_new_hugetlb_folio(h, new_folio);
 
 retry:
 	spin_lock_irq(&hugetlb_lock);
-	if (!PageHuge(old_page)) {
+	if (!folio_test_hugetlb(old_folio)) {
 		/*
 		 * Freed from under us. Drop new_page too.
 		 */
 		goto free_new;
-	} else if (page_count(old_page)) {
+	} else if (folio_ref_count(old_folio)) {
 		/*
 		 * Someone has grabbed the page, try to isolate it here.
 		 * Fail with -EBUSY if not possible.
@@ -2780,7 +2785,7 @@ retry:
 		ret = isolate_hugetlb(old_page, list);
 		spin_lock_irq(&hugetlb_lock);
 		goto free_new;
-	} else if (!HPageFreed(old_page)) {
+	} else if (!folio_test_hugetlb_freed(old_folio)) {
 		/*
 		 * Page's refcount is 0 but it has not been enqueued in the
 		 * freelist yet. Race window is small, so we can succeed here if
@@ -2818,7 +2823,7 @@ retry:
 free_new:
 	spin_unlock_irq(&hugetlb_lock);
 	/* Page has a zero ref count, but needs a ref to be freed */
-	set_page_refcounted(new_page);
+	folio_ref_unfreeze(new_folio, 1);
 	update_and_free_page(h, new_page, false);
 
 	return ret;
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 8b95c1560f9c3..87a1125aa42dd 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -212,7 +212,7 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 	/* Take the pages off the local counter */
 	page_counter_cancel(counter, nr_pages);
 
-	set_hugetlb_cgroup(page, parent);
+	set_hugetlb_cgroup(folio, parent);
 out:
 	return;
 }
@@ -891,6 +891,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 	struct hugetlb_cgroup *h_cg_rsvd;
 	struct hstate *h = page_hstate(oldhpage);
 	struct folio *old_folio = page_folio(oldhpage);
+	struct folio *new_folio = page_folio(newhpage);
 
 	if (hugetlb_cgroup_disabled())
 		return;
@@ -898,12 +899,12 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 	spin_lock_irq(&hugetlb_lock);
 	h_cg = hugetlb_cgroup_from_folio(old_folio);
 	h_cg_rsvd = hugetlb_cgroup_from_folio_rsvd(old_folio);
-	set_hugetlb_cgroup(oldhpage, NULL);
-	set_hugetlb_cgroup_rsvd(oldhpage, NULL);
+	set_hugetlb_cgroup(old_folio, NULL);
+	set_hugetlb_cgroup_rsvd(old_folio, NULL);
 
 	/* move the h_cg details to new cgroup */
-	set_hugetlb_cgroup(newhpage, h_cg);
-	set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
+	set_hugetlb_cgroup(new_folio, h_cg);
+	set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd);
 	list_move(&newhpage->lru, &h->hugepage_activelist);
 	spin_unlock_irq(&hugetlb_lock);
 	return;
-- 
GitLab


From 29f394304f624b06fafb3cc9c3da8779f71f4bee Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:54 -0700
Subject: [PATCH 2984/4122] mm/hugetlb_cgroup: convert hugetlb_cgroup_migrate
 to folios

Cleans up intermediate page to folio conversion code in
hugetlb_cgroup_migrate() by changing its arguments from pages to folios.

Link: https://lkml.kernel.org/r/20221101223059.460937-5-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 8 ++++----
 mm/hugetlb.c                   | 2 +-
 mm/hugetlb_cgroup.c            | 8 +++-----
 3 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index a7e3540f7f38d..789b6fef176d9 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -177,8 +177,8 @@ extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
 						bool region_del);
 
 extern void hugetlb_cgroup_file_init(void) __init;
-extern void hugetlb_cgroup_migrate(struct page *oldhpage,
-				   struct page *newhpage);
+extern void hugetlb_cgroup_migrate(struct folio *old_folio,
+				   struct folio *new_folio);
 
 #else
 static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
@@ -286,8 +286,8 @@ static inline void hugetlb_cgroup_file_init(void)
 {
 }
 
-static inline void hugetlb_cgroup_migrate(struct page *oldhpage,
-					  struct page *newhpage)
+static inline void hugetlb_cgroup_migrate(struct folio *old_folio,
+					  struct folio *new_folio)
 {
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 01ea43b227245..05a832886a09f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7325,7 +7325,7 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
 {
 	struct hstate *h = page_hstate(oldpage);
 
-	hugetlb_cgroup_migrate(oldpage, newpage);
+	hugetlb_cgroup_migrate(page_folio(oldpage), page_folio(newpage));
 	set_page_owner_migrate_reason(newpage, reason);
 
 	/*
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 87a1125aa42dd..b1b18337a56a4 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -885,13 +885,11 @@ void __init hugetlb_cgroup_file_init(void)
  * hugetlb_lock will make sure a parallel cgroup rmdir won't happen
  * when we migrate hugepages
  */
-void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
+void hugetlb_cgroup_migrate(struct folio *old_folio, struct folio *new_folio)
 {
 	struct hugetlb_cgroup *h_cg;
 	struct hugetlb_cgroup *h_cg_rsvd;
-	struct hstate *h = page_hstate(oldhpage);
-	struct folio *old_folio = page_folio(oldhpage);
-	struct folio *new_folio = page_folio(newhpage);
+	struct hstate *h = folio_hstate(old_folio);
 
 	if (hugetlb_cgroup_disabled())
 		return;
@@ -905,7 +903,7 @@ void hugetlb_cgroup_migrate(struct page *oldhpage, struct page *newhpage)
 	/* move the h_cg details to new cgroup */
 	set_hugetlb_cgroup(new_folio, h_cg);
 	set_hugetlb_cgroup_rsvd(new_folio, h_cg_rsvd);
-	list_move(&newhpage->lru, &h->hugepage_activelist);
+	list_move(&new_folio->lru, &h->hugepage_activelist);
 	spin_unlock_irq(&hugetlb_lock);
 	return;
 }
-- 
GitLab


From d5e33bd8c16b6f5f47665d378f078bee72b85225 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:55 -0700
Subject: [PATCH 2985/4122] mm/hugetlb: convert isolate_or_dissolve_huge_page
 to folios

Removes a call to compound_head() by using a folio when operating on the
head page of a hugetlb compound page.

Link: https://lkml.kernel.org/r/20221101223059.460937-6-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 05a832886a09f..666a771c9a3d2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2832,7 +2832,7 @@ free_new:
 int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 {
 	struct hstate *h;
-	struct page *head;
+	struct folio *folio = page_folio(page);
 	int ret = -EBUSY;
 
 	/*
@@ -2841,9 +2841,8 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 	 * Return success when racing as if we dissolved the page ourselves.
 	 */
 	spin_lock_irq(&hugetlb_lock);
-	if (PageHuge(page)) {
-		head = compound_head(page);
-		h = page_hstate(head);
+	if (folio_test_hugetlb(folio)) {
+		h = folio_hstate(folio);
 	} else {
 		spin_unlock_irq(&hugetlb_lock);
 		return 0;
@@ -2858,10 +2857,10 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 	if (hstate_is_gigantic(h))
 		return -ENOMEM;
 
-	if (page_count(head) && !isolate_hugetlb(head, list))
+	if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
 		ret = 0;
-	else if (!page_count(head))
-		ret = alloc_and_dissolve_huge_page(h, head, list);
+	else if (!folio_ref_count(folio))
+		ret = alloc_and_dissolve_huge_page(h, &folio->page, list);
 
 	return ret;
 }
-- 
GitLab


From 0356c4b96f6890dd61af4c902f681764f4bdba09 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:56 -0700
Subject: [PATCH 2986/4122] mm/hugetlb: convert free_huge_page to folios

Use folios inside free_huge_page(), this is in preparation for converting
hugetlb_cgroup_uncharge_page() to take in a folio.

Link: https://lkml.kernel.org/r/20221101223059.460937-7-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 666a771c9a3d2..9841fb0fcaf97 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1704,21 +1704,22 @@ void free_huge_page(struct page *page)
 	 * Can't pass hstate in here because it is called from the
 	 * compound page destructor.
 	 */
-	struct hstate *h = page_hstate(page);
-	int nid = page_to_nid(page);
-	struct hugepage_subpool *spool = hugetlb_page_subpool(page);
+	struct folio *folio = page_folio(page);
+	struct hstate *h = folio_hstate(folio);
+	int nid = folio_nid(folio);
+	struct hugepage_subpool *spool = hugetlb_folio_subpool(folio);
 	bool restore_reserve;
 	unsigned long flags;
 
-	VM_BUG_ON_PAGE(page_count(page), page);
-	VM_BUG_ON_PAGE(page_mapcount(page), page);
+	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
+	VM_BUG_ON_FOLIO(folio_mapcount(folio), folio);
 
-	hugetlb_set_page_subpool(page, NULL);
-	if (PageAnon(page))
-		__ClearPageAnonExclusive(page);
-	page->mapping = NULL;
-	restore_reserve = HPageRestoreReserve(page);
-	ClearHPageRestoreReserve(page);
+	hugetlb_set_folio_subpool(folio, NULL);
+	if (folio_test_anon(folio))
+		__ClearPageAnonExclusive(&folio->page);
+	folio->mapping = NULL;
+	restore_reserve = folio_test_hugetlb_restore_reserve(folio);
+	folio_clear_hugetlb_restore_reserve(folio);
 
 	/*
 	 * If HPageRestoreReserve was set on page, page allocation consumed a
@@ -1740,7 +1741,7 @@ void free_huge_page(struct page *page)
 	}
 
 	spin_lock_irqsave(&hugetlb_lock, flags);
-	ClearHPageMigratable(page);
+	folio_clear_hugetlb_migratable(folio);
 	hugetlb_cgroup_uncharge_page(hstate_index(h),
 				     pages_per_huge_page(h), page);
 	hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
@@ -1748,7 +1749,7 @@ void free_huge_page(struct page *page)
 	if (restore_reserve)
 		h->resv_huge_pages++;
 
-	if (HPageTemporary(page)) {
+	if (folio_test_hugetlb_temporary(folio)) {
 		remove_hugetlb_page(h, page, false);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
 		update_and_free_page(h, page, true);
-- 
GitLab


From d4ab0316cc33aeedf6dcb1c2c25e097a25766132 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:57 -0700
Subject: [PATCH 2987/4122] mm/hugetlb_cgroup: convert
 hugetlb_cgroup_uncharge_page() to folios

Continue to use a folio inside free_huge_page() by converting
hugetlb_cgroup_uncharge_page*() to folios.

Link: https://lkml.kernel.org/r/20221101223059.460937-8-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb_cgroup.h | 16 ++++++++--------
 mm/hugetlb.c                   | 15 +++++++++------
 mm/hugetlb_cgroup.c            | 21 ++++++++++-----------
 3 files changed, 27 insertions(+), 25 deletions(-)

diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 789b6fef176d9..c70f92fe493ef 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -158,10 +158,10 @@ extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 extern void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 					      struct hugetlb_cgroup *h_cg,
 					      struct page *page);
-extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
-					 struct page *page);
-extern void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
-					      struct page *page);
+extern void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+					 struct folio *folio);
+extern void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
+					      struct folio *folio);
 
 extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg);
@@ -254,14 +254,14 @@ hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 {
 }
 
-static inline void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
-						struct page *page)
+static inline void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+						struct folio *folio)
 {
 }
 
-static inline void hugetlb_cgroup_uncharge_page_rsvd(int idx,
+static inline void hugetlb_cgroup_uncharge_folio_rsvd(int idx,
 						     unsigned long nr_pages,
-						     struct page *page)
+						     struct folio *folio)
 {
 }
 static inline void hugetlb_cgroup_uncharge_cgroup(int idx,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9841fb0fcaf97..e1950fff6aa9a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1742,10 +1742,10 @@ void free_huge_page(struct page *page)
 
 	spin_lock_irqsave(&hugetlb_lock, flags);
 	folio_clear_hugetlb_migratable(folio);
-	hugetlb_cgroup_uncharge_page(hstate_index(h),
-				     pages_per_huge_page(h), page);
-	hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-					  pages_per_huge_page(h), page);
+	hugetlb_cgroup_uncharge_folio(hstate_index(h),
+				     pages_per_huge_page(h), folio);
+	hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+					  pages_per_huge_page(h), folio);
 	if (restore_reserve)
 		h->resv_huge_pages++;
 
@@ -2872,6 +2872,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	struct hugepage_subpool *spool = subpool_vma(vma);
 	struct hstate *h = hstate_vma(vma);
 	struct page *page;
+	struct folio *folio;
 	long map_chg, map_commit;
 	long gbl_chg;
 	int ret, idx;
@@ -2935,6 +2936,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * a reservation exists for the allocation.
 	 */
 	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
+
 	if (!page) {
 		spin_unlock_irq(&hugetlb_lock);
 		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
@@ -2949,6 +2951,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 		set_page_refcounted(page);
 		/* Fall through */
 	}
+	folio = page_folio(page);
 	hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
 	/* If allocation is not consuming a reservation, also store the
 	 * hugetlb_cgroup pointer on the page.
@@ -2978,8 +2981,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 		rsv_adjust = hugepage_subpool_put_pages(spool, 1);
 		hugetlb_acct_memory(h, -rsv_adjust);
 		if (deferred_reserve)
-			hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
-					pages_per_huge_page(h), page);
+			hugetlb_cgroup_uncharge_folio_rsvd(hstate_index(h),
+					pages_per_huge_page(h), folio);
 	}
 	return page;
 
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index b1b18337a56a4..4cd57f979245f 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -346,11 +346,10 @@ void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 /*
  * Should be called with hugetlb_lock held
  */
-static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
-					   struct page *page, bool rsvd)
+static void __hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+					   struct folio *folio, bool rsvd)
 {
 	struct hugetlb_cgroup *h_cg;
-	struct folio *folio = page_folio(page);
 
 	if (hugetlb_cgroup_disabled())
 		return;
@@ -368,27 +367,27 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 		css_put(&h_cg->css);
 	else {
 		unsigned long usage =
-			h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+			h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
 		/*
 		 * This write is not atomic due to fetching usage and writing
 		 * to it, but that's fine because we call this with
 		 * hugetlb_lock held anyway.
 		 */
-		WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+		WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
 			   usage - nr_pages);
 	}
 }
 
-void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
-				  struct page *page)
+void hugetlb_cgroup_uncharge_folio(int idx, unsigned long nr_pages,
+				  struct folio *folio)
 {
-	__hugetlb_cgroup_uncharge_page(idx, nr_pages, page, false);
+	__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, false);
 }
 
-void hugetlb_cgroup_uncharge_page_rsvd(int idx, unsigned long nr_pages,
-				       struct page *page)
+void hugetlb_cgroup_uncharge_folio_rsvd(int idx, unsigned long nr_pages,
+				       struct folio *folio)
 {
-	__hugetlb_cgroup_uncharge_page(idx, nr_pages, page, true);
+	__hugetlb_cgroup_uncharge_folio(idx, nr_pages, folio, true);
 }
 
 static void __hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
-- 
GitLab


From 541b7c7b3ec0555a09782b463bcbc2cb86d97085 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:58 -0700
Subject: [PATCH 2988/4122] mm/hugeltb_cgroup: convert
 hugetlb_cgroup_commit_charge*() to folios

Convert hugetlb_cgroup_commit_charge*() to internally use folios to clean
up the code after __set_hugetlb_cgroup() was changed to take a folio.

Link: https://lkml.kernel.org/r/20221101223059.460937-9-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb_cgroup.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 4cd57f979245f..d9e4425d81ac4 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -310,21 +310,21 @@ int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
 /* Should be called with hugetlb_lock held */
 static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 					   struct hugetlb_cgroup *h_cg,
-					   struct page *page, bool rsvd)
+					   struct folio *folio, bool rsvd)
 {
 	if (hugetlb_cgroup_disabled() || !h_cg)
 		return;
 
-	__set_hugetlb_cgroup(page_folio(page), h_cg, rsvd);
+	__set_hugetlb_cgroup(folio, h_cg, rsvd);
 	if (!rsvd) {
 		unsigned long usage =
-			h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+			h_cg->nodeinfo[folio_nid(folio)]->usage[idx];
 		/*
 		 * This write is not atomic due to fetching usage and writing
 		 * to it, but that's fine because we call this with
 		 * hugetlb_lock held anyway.
 		 */
-		WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+		WRITE_ONCE(h_cg->nodeinfo[folio_nid(folio)]->usage[idx],
 			   usage + nr_pages);
 	}
 }
@@ -333,14 +333,18 @@ void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
 				  struct hugetlb_cgroup *h_cg,
 				  struct page *page)
 {
-	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, false);
+	struct folio *folio = page_folio(page);
+
+	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, false);
 }
 
 void hugetlb_cgroup_commit_charge_rsvd(int idx, unsigned long nr_pages,
 				       struct hugetlb_cgroup *h_cg,
 				       struct page *page)
 {
-	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, page, true);
+	struct folio *folio = page_folio(page);
+
+	__hugetlb_cgroup_commit_charge(idx, nr_pages, h_cg, folio, true);
 }
 
 /*
-- 
GitLab


From 345c62d163496ae4b5c1ce530b1588067d8f5a8b Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 1 Nov 2022 15:30:59 -0700
Subject: [PATCH 2989/4122] mm/hugetlb: convert move_hugetlb_state() to folios

Clean up unmap_and_move_huge_page() by converting move_hugetlb_state() to
take in folios.

[akpm@linux-foundation.org: fix CONFIG_HUGETLB_PAGE=n build]
Link: https://lkml.kernel.org/r/20221101223059.460937-10-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Bui Quang Minh <minhquangbui99@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h | 11 ++++++++---
 mm/hugetlb.c            | 22 ++++++++++++----------
 mm/migrate.c            |  4 ++--
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 65ea34022aa2b..58a30938a9b19 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -187,7 +187,7 @@ int get_hwpoison_huge_page(struct page *page, bool *hugetlb, bool unpoison);
 int get_huge_page_for_hwpoison(unsigned long pfn, int flags,
 				bool *migratable_cleared);
 void putback_active_hugepage(struct page *page);
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason);
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason);
 void free_huge_page(struct page *page);
 void hugetlb_fix_reserve_counts(struct inode *inode);
 extern struct mutex *hugetlb_fault_mutex_table;
@@ -407,8 +407,8 @@ static inline void putback_active_hugepage(struct page *page)
 {
 }
 
-static inline void move_hugetlb_state(struct page *oldpage,
-					struct page *newpage, int reason)
+static inline void move_hugetlb_state(struct folio *old_folio,
+					struct folio *new_folio, int reason)
 {
 }
 
@@ -991,6 +991,11 @@ void hugetlb_unregister_node(struct node *node);
 #else	/* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 
+static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
+{
+	return NULL;
+}
+
 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 {
 	return NULL;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e1950fff6aa9a..76ebefe02827e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -7324,15 +7324,15 @@ void putback_active_hugepage(struct page *page)
 	put_page(page);
 }
 
-void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
+void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int reason)
 {
-	struct hstate *h = page_hstate(oldpage);
+	struct hstate *h = folio_hstate(old_folio);
 
-	hugetlb_cgroup_migrate(page_folio(oldpage), page_folio(newpage));
-	set_page_owner_migrate_reason(newpage, reason);
+	hugetlb_cgroup_migrate(old_folio, new_folio);
+	set_page_owner_migrate_reason(&new_folio->page, reason);
 
 	/*
-	 * transfer temporary state of the new huge page. This is
+	 * transfer temporary state of the new hugetlb folio. This is
 	 * reverse to other transitions because the newpage is going to
 	 * be final while the old one will be freed so it takes over
 	 * the temporary status.
@@ -7341,12 +7341,14 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
 	 * here as well otherwise the global surplus count will not match
 	 * the per-node's.
 	 */
-	if (HPageTemporary(newpage)) {
-		int old_nid = page_to_nid(oldpage);
-		int new_nid = page_to_nid(newpage);
+	if (folio_test_hugetlb_temporary(new_folio)) {
+		int old_nid = folio_nid(old_folio);
+		int new_nid = folio_nid(new_folio);
+
+
+		folio_set_hugetlb_temporary(old_folio);
+		folio_clear_hugetlb_temporary(new_folio);
 
-		SetHPageTemporary(oldpage);
-		ClearHPageTemporary(newpage);
 
 		/*
 		 * There is no need to transfer the per-node surplus state
diff --git a/mm/migrate.c b/mm/migrate.c
index f8c85b42e2bcc..4aea647a01801 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1298,7 +1298,7 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 	 * folio_mapping() set, hugetlbfs specific move page routine will not
 	 * be called and we could leak usage counts for subpools.
 	 */
-	if (hugetlb_page_subpool(hpage) && !folio_mapping(src)) {
+	if (hugetlb_folio_subpool(src) && !folio_mapping(src)) {
 		rc = -EBUSY;
 		goto out_unlock;
 	}
@@ -1348,7 +1348,7 @@ put_anon:
 		put_anon_vma(anon_vma);
 
 	if (rc == MIGRATEPAGE_SUCCESS) {
-		move_hugetlb_state(hpage, new_hpage, reason);
+		move_hugetlb_state(src, dst, reason);
 		put_new_page = NULL;
 	}
 
-- 
GitLab


From 44467bbb7e81ebcef2a5bfc9d6546bf7cd015374 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:21 +0000
Subject: [PATCH 2990/4122] mm/damon/core: add a callback for scheme target
 regions check

Patch series "efficiently expose damos action tried regions information".

DAMON users can retrieve the monitoring results via 'after_aggregation'
callbacks if the user is using the kernel API, or 'damon_aggregated'
tracepoint if the user is in the user space.  Those are useful if full
monitoring results are necessary.  However, if the user has interest in
only a snapshot of the results for some regions having specific access
pattern, the interfaces could be inefficient.  For example, some users
only want to know which memory regions are not accessed for more than a
specific time at the moment.

Also, some DAMOS users would want to know exactly to what memory regions
the schemes' actions tried to be applied, for a debugging or a tuning.  As
DAMOS has its internal mechanism for quota and regions prioritization, the
users would need to simulate DAMOS' mechanism against the monitoring
results.  That's unnecessarily complex.

This patchset implements DAMON kernel API callbacks and sysfs directory
for efficient exposure of the information for the use cases.  The new
callback will be called for each region when a DAMOS action is gonna tried
to be applied to it.  The sysfs directory will be called 'tried_regions'
and placed under each scheme sysfs directory.  Users can write a special
keyworkd, 'update_schemes_regions', to the 'state' file of a kdamond sysfs
directory.  Then, DAMON sysfs interface will fill the directory with the
information of regions that corresponding scheme action was tried to be
applied for next one aggregation interval.

Patches Sequence
----------------

The first one (patch 1) implements the callback for the kernel space
users.  Following two patches (patches 2 and 3) implements sysfs
directories for the information and its sub directories.  Two patches
(patches 4 and 5) for implementing the special keywords for filling the
data to and cleaning up the directories follow.  Patch 6 adds a selftest
for the new sysfs directory.  Finally, two patches (patches 7 and 8)
document the new feature in the administrator guide and the ABI document.


This patch (of 8):

Getting DAMON monitoring results of only specific access pattern (e.g.,
getting address ranges of memory that not accessed at all for two minutes)
can be useful for efficient monitoring of the system.  The information can
also be helpful for deep level investigation of DAMON-based operation
schemes.

For that, users need to record (in case of the user space users) or
iterate (in case of the kernel space users) full monitoring results and
filter it out for the specific access pattern.  In case of the DAMOS
investigation, users will even need to simulate DAMOS' quota and
prioritization mechanisms.  It's inefficient and complex.

Add a new DAMON callback that will be called before each scheme is applied
to each region.  DAMON kernel API users will be able to do the query-like
monitoring results collection, or DAMOS investigation in an efficient and
simple way using it.

Commits for providing the capability to the user space users will follow.

Link: https://lkml.kernel.org/r/20221101220328.95765-1-sj@kernel.org
Link: https://lkml.kernel.org/r/20221101220328.95765-2-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/damon.h | 5 +++++
 mm/damon/core.c       | 6 +++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/include/linux/damon.h b/include/linux/damon.h
index 620ada094c3b2..35630634d7904 100644
--- a/include/linux/damon.h
+++ b/include/linux/damon.h
@@ -357,6 +357,7 @@ struct damon_operations {
  * @after_wmarks_check:	Called after each schemes' watermarks check.
  * @after_sampling:	Called after each sampling.
  * @after_aggregation:	Called after each aggregation.
+ * @before_damos_apply:	Called before applying DAMOS action.
  * @before_terminate:	Called before terminating the monitoring.
  * @private:		User private data.
  *
@@ -385,6 +386,10 @@ struct damon_callback {
 	int (*after_wmarks_check)(struct damon_ctx *context);
 	int (*after_sampling)(struct damon_ctx *context);
 	int (*after_aggregation)(struct damon_ctx *context);
+	int (*before_damos_apply)(struct damon_ctx *context,
+			struct damon_target *target,
+			struct damon_region *region,
+			struct damos *scheme);
 	void (*before_terminate)(struct damon_ctx *context);
 };
 
diff --git a/mm/damon/core.c b/mm/damon/core.c
index 80d5937fe3373..ceec75b88ef96 100644
--- a/mm/damon/core.c
+++ b/mm/damon/core.c
@@ -772,6 +772,7 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
 	unsigned long sz = damon_sz_region(r);
 	struct timespec64 begin, end;
 	unsigned long sz_applied = 0;
+	int err = 0;
 
 	if (c->ops.apply_scheme) {
 		if (quota->esz && quota->charged_sz + sz > quota->esz) {
@@ -782,7 +783,10 @@ static void damos_apply_scheme(struct damon_ctx *c, struct damon_target *t,
 			damon_split_region_at(t, r, sz);
 		}
 		ktime_get_coarse_ts64(&begin);
-		sz_applied = c->ops.apply_scheme(c, t, r, s);
+		if (c->callback.before_damos_apply)
+			err = c->callback.before_damos_apply(c, t, r, s);
+		if (!err)
+			sz_applied = c->ops.apply_scheme(c, t, r, s);
 		ktime_get_coarse_ts64(&end);
 		quota->total_charged_ns += timespec64_to_ns(&end) -
 			timespec64_to_ns(&begin);
-- 
GitLab


From 5181b75f438d2e5b7f27bf48c6ea88a87c2882b7 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:22 +0000
Subject: [PATCH 2991/4122] mm/damon/sysfs-schemes: implement
 schemes/tried_regions directory

For efficient and simple query-like DAMON monitoring results readings and
deep level investigations of DAMOS, DAMON kernel API
(include/linux/damon.h) users can use 'before_damos_apply' DAMON callback.
However, DAMON sysfs interface users don't have such option.

Add a directory, namely 'tried_regions', under each scheme directory to
use it as the interface for the purpose.  Note that this commit is
implementing only the directory but the data filling.

After the data filling change is made, users will be able to signal DAMON
to fill the directory with the regions that corresponding scheme has tried
to be applied.  By setting the access pattern of the scheme, users could
do the efficient query-like monitoring.

Link: https://lkml.kernel.org/r/20221101220328.95765-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs-schemes.c | 57 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)

diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 9509d5c1e7fce..500759d8b20c7 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -9,6 +9,36 @@
 
 #include "sysfs-common.h"
 
+/*
+ * scheme regions directory
+ */
+
+struct damon_sysfs_scheme_regions {
+	struct kobject kobj;
+};
+
+static struct damon_sysfs_scheme_regions *
+damon_sysfs_scheme_regions_alloc(void)
+{
+	return kzalloc(sizeof(struct damon_sysfs_scheme_regions), GFP_KERNEL);
+}
+
+static void damon_sysfs_scheme_regions_release(struct kobject *kobj)
+{
+	kfree(container_of(kobj, struct damon_sysfs_scheme_regions, kobj));
+}
+
+static struct attribute *damon_sysfs_scheme_regions_attrs[] = {
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_regions);
+
+static struct kobj_type damon_sysfs_scheme_regions_ktype = {
+	.release = damon_sysfs_scheme_regions_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_scheme_regions_groups,
+};
+
 /*
  * schemes/stats directory
  */
@@ -635,6 +665,7 @@ struct damon_sysfs_scheme {
 	struct damon_sysfs_quotas *quotas;
 	struct damon_sysfs_watermarks *watermarks;
 	struct damon_sysfs_stats *stats;
+	struct damon_sysfs_scheme_regions *tried_regions;
 };
 
 /* This should match with enum damos_action */
@@ -743,6 +774,25 @@ static int damon_sysfs_scheme_set_stats(struct damon_sysfs_scheme *scheme)
 	return err;
 }
 
+static int damon_sysfs_scheme_set_tried_regions(
+		struct damon_sysfs_scheme *scheme)
+{
+	struct damon_sysfs_scheme_regions *tried_regions =
+		damon_sysfs_scheme_regions_alloc();
+	int err;
+
+	if (!tried_regions)
+		return -ENOMEM;
+	err = kobject_init_and_add(&tried_regions->kobj,
+			&damon_sysfs_scheme_regions_ktype, &scheme->kobj,
+			"tried_regions");
+	if (err)
+		kobject_put(&tried_regions->kobj);
+	else
+		scheme->tried_regions = tried_regions;
+	return err;
+}
+
 static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
 {
 	int err;
@@ -759,8 +809,14 @@ static int damon_sysfs_scheme_add_dirs(struct damon_sysfs_scheme *scheme)
 	err = damon_sysfs_scheme_set_stats(scheme);
 	if (err)
 		goto put_watermarks_quotas_access_pattern_out;
+	err = damon_sysfs_scheme_set_tried_regions(scheme);
+	if (err)
+		goto put_tried_regions_out;
 	return 0;
 
+put_tried_regions_out:
+	kobject_put(&scheme->tried_regions->kobj);
+	scheme->tried_regions = NULL;
 put_watermarks_quotas_access_pattern_out:
 	kobject_put(&scheme->watermarks->kobj);
 	scheme->watermarks = NULL;
@@ -781,6 +837,7 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
 	kobject_put(&scheme->quotas->kobj);
 	kobject_put(&scheme->watermarks->kobj);
 	kobject_put(&scheme->stats->kobj);
+	kobject_put(&scheme->tried_regions->kobj);
 }
 
 static ssize_t action_show(struct kobject *kobj, struct kobj_attribute *attr,
-- 
GitLab


From 9277d0367ba18ef4bb98bafb1209e715844cdf7e Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:23 +0000
Subject: [PATCH 2992/4122] mm/damon/sysfs-schemes: implement scheme region
 directory

Implement region directories under 'tried_regions' directory of each
scheme DAMON sysfs directory.  This directory will provide the address
range, the monitored access frequency ('nr_accesses'), and the age of each
DAMON region that corresponding DAMON-based operation scheme has tried to
be applied.  Note that this commit doesn't implement the code for filling
the data but only the sysfs directory.

Link: https://lkml.kernel.org/r/20221101220328.95765-4-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs-schemes.c | 123 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 122 insertions(+), 1 deletion(-)

diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 500759d8b20c7..f0b5ad7e721d6 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -9,18 +9,138 @@
 
 #include "sysfs-common.h"
 
+/*
+ * scheme region directory
+ */
+
+struct damon_sysfs_scheme_region {
+	struct kobject kobj;
+	struct damon_addr_range ar;
+	unsigned int nr_accesses;
+	unsigned int age;
+	struct list_head list;
+};
+
+static struct damon_sysfs_scheme_region *damon_sysfs_scheme_region_alloc(
+		struct damon_region *region)
+{
+	struct damon_sysfs_scheme_region *sysfs_region = kmalloc(
+			sizeof(*sysfs_region), GFP_KERNEL);
+
+	if (!sysfs_region)
+		return NULL;
+	sysfs_region->kobj = (struct kobject){};
+	sysfs_region->ar = region->ar;
+	sysfs_region->nr_accesses = region->nr_accesses;
+	sysfs_region->age = region->age;
+	INIT_LIST_HEAD(&sysfs_region->list);
+	return sysfs_region;
+}
+
+static ssize_t start_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_scheme_region *region = container_of(kobj,
+			struct damon_sysfs_scheme_region, kobj);
+
+	return sysfs_emit(buf, "%lu\n", region->ar.start);
+}
+
+static ssize_t end_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_scheme_region *region = container_of(kobj,
+			struct damon_sysfs_scheme_region, kobj);
+
+	return sysfs_emit(buf, "%lu\n", region->ar.end);
+}
+
+static ssize_t nr_accesses_show(struct kobject *kobj,
+		struct kobj_attribute *attr, char *buf)
+{
+	struct damon_sysfs_scheme_region *region = container_of(kobj,
+			struct damon_sysfs_scheme_region, kobj);
+
+	return sysfs_emit(buf, "%u\n", region->nr_accesses);
+}
+
+static ssize_t age_show(struct kobject *kobj, struct kobj_attribute *attr,
+		char *buf)
+{
+	struct damon_sysfs_scheme_region *region = container_of(kobj,
+			struct damon_sysfs_scheme_region, kobj);
+
+	return sysfs_emit(buf, "%u\n", region->age);
+}
+
+static void damon_sysfs_scheme_region_release(struct kobject *kobj)
+{
+	struct damon_sysfs_scheme_region *region = container_of(kobj,
+			struct damon_sysfs_scheme_region, kobj);
+
+	list_del(&region->list);
+	kfree(region);
+}
+
+static struct kobj_attribute damon_sysfs_scheme_region_start_attr =
+		__ATTR_RO_MODE(start, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_end_attr =
+		__ATTR_RO_MODE(end, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_nr_accesses_attr =
+		__ATTR_RO_MODE(nr_accesses, 0400);
+
+static struct kobj_attribute damon_sysfs_scheme_region_age_attr =
+		__ATTR_RO_MODE(age, 0400);
+
+static struct attribute *damon_sysfs_scheme_region_attrs[] = {
+	&damon_sysfs_scheme_region_start_attr.attr,
+	&damon_sysfs_scheme_region_end_attr.attr,
+	&damon_sysfs_scheme_region_nr_accesses_attr.attr,
+	&damon_sysfs_scheme_region_age_attr.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(damon_sysfs_scheme_region);
+
+static struct kobj_type damon_sysfs_scheme_region_ktype = {
+	.release = damon_sysfs_scheme_region_release,
+	.sysfs_ops = &kobj_sysfs_ops,
+	.default_groups = damon_sysfs_scheme_region_groups,
+};
+
 /*
  * scheme regions directory
  */
 
 struct damon_sysfs_scheme_regions {
 	struct kobject kobj;
+	struct list_head regions_list;
+	int nr_regions;
 };
 
 static struct damon_sysfs_scheme_regions *
 damon_sysfs_scheme_regions_alloc(void)
 {
-	return kzalloc(sizeof(struct damon_sysfs_scheme_regions), GFP_KERNEL);
+	struct damon_sysfs_scheme_regions *regions = kmalloc(sizeof(*regions),
+			GFP_KERNEL);
+
+	regions->kobj = (struct kobject){};
+	INIT_LIST_HEAD(&regions->regions_list);
+	regions->nr_regions = 0;
+	return regions;
+}
+
+static void damon_sysfs_scheme_regions_rm_dirs(
+		struct damon_sysfs_scheme_regions *regions)
+{
+	struct damon_sysfs_scheme_region *r, *next;
+
+	list_for_each_entry_safe(r, next, &regions->regions_list, list) {
+		/* release function deletes it from the list */
+		kobject_put(&r->kobj);
+		regions->nr_regions--;
+	}
 }
 
 static void damon_sysfs_scheme_regions_release(struct kobject *kobj)
@@ -837,6 +957,7 @@ static void damon_sysfs_scheme_rm_dirs(struct damon_sysfs_scheme *scheme)
 	kobject_put(&scheme->quotas->kobj);
 	kobject_put(&scheme->watermarks->kobj);
 	kobject_put(&scheme->stats->kobj);
+	damon_sysfs_scheme_regions_rm_dirs(scheme->tried_regions);
 	kobject_put(&scheme->tried_regions->kobj);
 }
 
-- 
GitLab


From f1d13cacabe140305844879e495ca67837e059cc Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:24 +0000
Subject: [PATCH 2993/4122] mm/damon/sysfs: implement DAMOS tried regions
 update command

Implement the code for filling the data of 'tried_regions' DAMON sysfs
directory.  With this commit, DAMON sysfs interface users can write a
special keyword, 'update_schemes_tried_regions' to the corresponding
'state' file of the kdamond.  Then, DAMON sysfs interface will collect the
tried regions information using the 'before_damos_apply()' callback for
one aggregation interval and populate scheme region directories with the
values.

[sj@kernel.org: skip tried regions update if the scheme directory was removed]
  Link: https://lkml.kernel.org/r/20221114182954.4745-2-sj@kernel.org
Link: https://lkml.kernel.org/r/20221101220328.95765-5-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs-common.h  |  6 +++
 mm/damon/sysfs-schemes.c | 80 ++++++++++++++++++++++++++++++++++++++++
 mm/damon/sysfs.c         | 57 +++++++++++++++++++++++++++-
 3 files changed, 141 insertions(+), 2 deletions(-)

diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
index 4626b27844047..634a6e7fca78a 100644
--- a/mm/damon/sysfs-common.h
+++ b/mm/damon/sysfs-common.h
@@ -44,3 +44,9 @@ int damon_sysfs_set_schemes(struct damon_ctx *ctx,
 void damon_sysfs_schemes_update_stats(
 		struct damon_sysfs_schemes *sysfs_schemes,
 		struct damon_ctx *ctx);
+
+int damon_sysfs_schemes_update_regions_start(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx);
+
+int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index f0b5ad7e721d6..5f14f18bcc49c 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1244,3 +1244,83 @@ void damon_sysfs_schemes_update_stats(
 		sysfs_stats->qt_exceeds = scheme->stat.qt_exceeds;
 	}
 }
+
+/*
+ * damon_sysfs_schemes that need to update its schemes regions dir.  Protected
+ * by damon_sysfs_lock
+ */
+static struct damon_sysfs_schemes *damon_sysfs_schemes_for_damos_callback;
+static int damon_sysfs_schemes_region_idx;
+
+/*
+ * DAMON callback that called before damos apply.  While this callback is
+ * registered, damon_sysfs_lock should be held to ensure the regions
+ * directories exist.
+ */
+static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
+		struct damon_target *t, struct damon_region *r,
+		struct damos *s)
+{
+	struct damos *scheme;
+	struct damon_sysfs_scheme_regions *sysfs_regions;
+	struct damon_sysfs_scheme_region *region;
+	struct damon_sysfs_schemes *sysfs_schemes =
+		damon_sysfs_schemes_for_damos_callback;
+	int schemes_idx = 0;
+
+	damon_for_each_scheme(scheme, ctx) {
+		if (scheme == s)
+			break;
+		schemes_idx++;
+	}
+
+	/* user could have removed the scheme sysfs dir */
+	if (schemes_idx >= sysfs_schemes->nr)
+		return 0;
+
+	sysfs_regions = sysfs_schemes->schemes_arr[schemes_idx]->tried_regions;
+	region = damon_sysfs_scheme_region_alloc(r);
+	list_add_tail(&region->list, &sysfs_regions->regions_list);
+	sysfs_regions->nr_regions++;
+	if (kobject_init_and_add(&region->kobj,
+				&damon_sysfs_scheme_region_ktype,
+				&sysfs_regions->kobj, "%d",
+				damon_sysfs_schemes_region_idx++)) {
+		kobject_put(&region->kobj);
+	}
+	return 0;
+}
+
+/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
+int damon_sysfs_schemes_update_regions_start(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx)
+{
+	struct damos *scheme;
+	int schemes_idx = 0;
+
+	damon_for_each_scheme(scheme, ctx) {
+		struct damon_sysfs_scheme *sysfs_scheme;
+
+		sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++];
+		damon_sysfs_scheme_regions_rm_dirs(
+				sysfs_scheme->tried_regions);
+	}
+
+	damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
+	ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
+	return 0;
+}
+
+/*
+ * Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock.  Caller
+ * should unlock damon_sysfs_lock which held before
+ * damon_sysfs_schemes_update_regions_start()
+ */
+int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx)
+{
+	damon_sysfs_schemes_for_damos_callback = NULL;
+	ctx->callback.before_damos_apply = NULL;
+	damon_sysfs_schemes_region_idx = 0;
+	return 0;
+}
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index 284daf274b3ed..ffb5a84059d7c 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -999,6 +999,11 @@ enum damon_sysfs_cmd {
 	 * files.
 	 */
 	DAMON_SYSFS_CMD_UPDATE_SCHEMES_STATS,
+	/*
+	 * @DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS: Update schemes tried
+	 * regions
+	 */
+	DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS,
 	/*
 	 * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands.
 	 */
@@ -1011,6 +1016,7 @@ static const char * const damon_sysfs_cmd_strs[] = {
 	"off",
 	"commit",
 	"update_schemes_stats",
+	"update_schemes_tried_regions",
 };
 
 /*
@@ -1193,6 +1199,16 @@ static int damon_sysfs_set_targets(struct damon_ctx *ctx,
 static void damon_sysfs_before_terminate(struct damon_ctx *ctx)
 {
 	struct damon_target *t, *next;
+	struct damon_sysfs_kdamond *kdamond;
+
+	/* damon_sysfs_schemes_update_regions_stop() might not yet called */
+	kdamond = damon_sysfs_cmd_request.kdamond;
+	if (kdamond && damon_sysfs_cmd_request.cmd ==
+			DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS &&
+			ctx == kdamond->damon_ctx) {
+		damon_sysfs_schemes_update_regions_stop(ctx);
+		mutex_unlock(&damon_sysfs_lock);
+	}
 
 	if (!damon_target_has_pid(ctx))
 		return;
@@ -1225,6 +1241,27 @@ static int damon_sysfs_upd_schemes_stats(struct damon_sysfs_kdamond *kdamond)
 	return 0;
 }
 
+static int damon_sysfs_upd_schemes_regions_start(
+		struct damon_sysfs_kdamond *kdamond)
+{
+	struct damon_ctx *ctx = kdamond->damon_ctx;
+
+	if (!ctx)
+		return -EINVAL;
+	return damon_sysfs_schemes_update_regions_start(
+			kdamond->contexts->contexts_arr[0]->schemes, ctx);
+}
+
+static int damon_sysfs_upd_schemes_regions_stop(
+		struct damon_sysfs_kdamond *kdamond)
+{
+	struct damon_ctx *ctx = kdamond->damon_ctx;
+
+	if (!ctx)
+		return -EINVAL;
+	return damon_sysfs_schemes_update_regions_stop(ctx);
+}
+
 static inline bool damon_sysfs_kdamond_running(
 		struct damon_sysfs_kdamond *kdamond)
 {
@@ -1277,10 +1314,12 @@ static int damon_sysfs_commit_input(struct damon_sysfs_kdamond *kdamond)
 static int damon_sysfs_cmd_request_callback(struct damon_ctx *c)
 {
 	struct damon_sysfs_kdamond *kdamond;
+	static bool damon_sysfs_schemes_regions_updating;
 	int err = 0;
 
 	/* avoid deadlock due to concurrent state_store('off') */
-	if (!mutex_trylock(&damon_sysfs_lock))
+	if (!damon_sysfs_schemes_regions_updating &&
+			!mutex_trylock(&damon_sysfs_lock))
 		return 0;
 	kdamond = damon_sysfs_cmd_request.kdamond;
 	if (!kdamond || kdamond->damon_ctx != c)
@@ -1292,13 +1331,27 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c)
 	case DAMON_SYSFS_CMD_COMMIT:
 		err = damon_sysfs_commit_input(kdamond);
 		break;
+	case DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS:
+		if (!damon_sysfs_schemes_regions_updating) {
+			err = damon_sysfs_upd_schemes_regions_start(kdamond);
+			if (!err) {
+				damon_sysfs_schemes_regions_updating = true;
+				goto keep_lock_out;
+			}
+		} else {
+			err = damon_sysfs_upd_schemes_regions_stop(kdamond);
+			damon_sysfs_schemes_regions_updating = false;
+		}
+		break;
 	default:
 		break;
 	}
 	/* Mark the request as invalid now. */
 	damon_sysfs_cmd_request.kdamond = NULL;
 out:
-	mutex_unlock(&damon_sysfs_lock);
+	if (!damon_sysfs_schemes_regions_updating)
+		mutex_unlock(&damon_sysfs_lock);
+keep_lock_out:
 	return err;
 }
 
-- 
GitLab


From 772c15e5adcb32a42dbbcdb905ec49f662312976 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:25 +0000
Subject: [PATCH 2994/4122] mm/damon/sysfs-schemes: implement DAMOS-tried
 regions clear command

When there are huge number of DAMON regions that specific scheme actions
are tried to be applied, directories and files under 'tried_regions'
scheme directory could waste some memory.  Add another special input
keyword ('clear_schemes_tried_regions') for 'state' file of each kdamond
sysfs directory that can be used for cleanup of the 'tried_regions'
sub-directories.

[sj@kernel.org: skip regions clearing if the scheme directory was removed]
  Link: https://lkml.kernel.org/r/20221114182954.4745-3-sj@kernel.org
Link: https://lkml.kernel.org/r/20221101220328.95765-6-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/sysfs-common.h  |  4 ++++
 mm/damon/sysfs-schemes.c | 14 +++++++++++++-
 mm/damon/sysfs.c         | 20 ++++++++++++++++++++
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/mm/damon/sysfs-common.h b/mm/damon/sysfs-common.h
index 634a6e7fca78a..604a6cbc3edea 100644
--- a/mm/damon/sysfs-common.h
+++ b/mm/damon/sysfs-common.h
@@ -50,3 +50,7 @@ int damon_sysfs_schemes_update_regions_start(
 		struct damon_ctx *ctx);
 
 int damon_sysfs_schemes_update_regions_stop(struct damon_ctx *ctx);
+
+int damon_sysfs_schemes_clear_regions(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx);
diff --git a/mm/damon/sysfs-schemes.c b/mm/damon/sysfs-schemes.c
index 5f14f18bcc49c..81fc4d27f4e45 100644
--- a/mm/damon/sysfs-schemes.c
+++ b/mm/damon/sysfs-schemes.c
@@ -1292,7 +1292,7 @@ static int damon_sysfs_before_damos_apply(struct damon_ctx *ctx,
 }
 
 /* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
-int damon_sysfs_schemes_update_regions_start(
+int damon_sysfs_schemes_clear_regions(
 		struct damon_sysfs_schemes *sysfs_schemes,
 		struct damon_ctx *ctx)
 {
@@ -1302,11 +1302,23 @@ int damon_sysfs_schemes_update_regions_start(
 	damon_for_each_scheme(scheme, ctx) {
 		struct damon_sysfs_scheme *sysfs_scheme;
 
+		/* user could have removed the scheme sysfs dir */
+		if (schemes_idx >= sysfs_schemes->nr)
+			break;
+
 		sysfs_scheme = sysfs_schemes->schemes_arr[schemes_idx++];
 		damon_sysfs_scheme_regions_rm_dirs(
 				sysfs_scheme->tried_regions);
 	}
+	return 0;
+}
 
+/* Called from damon_sysfs_cmd_request_callback under damon_sysfs_lock */
+int damon_sysfs_schemes_update_regions_start(
+		struct damon_sysfs_schemes *sysfs_schemes,
+		struct damon_ctx *ctx)
+{
+	damon_sysfs_schemes_clear_regions(sysfs_schemes, ctx);
 	damon_sysfs_schemes_for_damos_callback = sysfs_schemes;
 	ctx->callback.before_damos_apply = damon_sysfs_before_damos_apply;
 	return 0;
diff --git a/mm/damon/sysfs.c b/mm/damon/sysfs.c
index ffb5a84059d7c..aeb0beb1da913 100644
--- a/mm/damon/sysfs.c
+++ b/mm/damon/sysfs.c
@@ -1004,6 +1004,11 @@ enum damon_sysfs_cmd {
 	 * regions
 	 */
 	DAMON_SYSFS_CMD_UPDATE_SCHEMES_TRIED_REGIONS,
+	/*
+	 * @DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS: Clear schemes tried
+	 * regions
+	 */
+	DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS,
 	/*
 	 * @NR_DAMON_SYSFS_CMDS: Total number of DAMON sysfs commands.
 	 */
@@ -1017,6 +1022,7 @@ static const char * const damon_sysfs_cmd_strs[] = {
 	"commit",
 	"update_schemes_stats",
 	"update_schemes_tried_regions",
+	"clear_schemes_tried_regions",
 };
 
 /*
@@ -1262,6 +1268,17 @@ static int damon_sysfs_upd_schemes_regions_stop(
 	return damon_sysfs_schemes_update_regions_stop(ctx);
 }
 
+static int damon_sysfs_clear_schemes_regions(
+		struct damon_sysfs_kdamond *kdamond)
+{
+	struct damon_ctx *ctx = kdamond->damon_ctx;
+
+	if (!ctx)
+		return -EINVAL;
+	return damon_sysfs_schemes_clear_regions(
+			kdamond->contexts->contexts_arr[0]->schemes, ctx);
+}
+
 static inline bool damon_sysfs_kdamond_running(
 		struct damon_sysfs_kdamond *kdamond)
 {
@@ -1343,6 +1360,9 @@ static int damon_sysfs_cmd_request_callback(struct damon_ctx *c)
 			damon_sysfs_schemes_regions_updating = false;
 		}
 		break;
+	case DAMON_SYSFS_CMD_CLEAR_SCHEMES_TRIED_REGIONS:
+		err = damon_sysfs_clear_schemes_regions(kdamond);
+		break;
 	default:
 		break;
 	}
-- 
GitLab


From 2b3ee3f66c673312ea377bcfb54cb2b9abc8473b Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:26 +0000
Subject: [PATCH 2995/4122] tools/selftets/damon/sysfs: test tried_regions
 directory existence

Add a simple test case for ensuring tried_regions directory existence.

Link: https://lkml.kernel.org/r/20221101220328.95765-7-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/sysfs.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
index 89592c64462f8..db4942383a50f 100644
--- a/tools/testing/selftests/damon/sysfs.sh
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -80,6 +80,12 @@ test_range()
 	ensure_file "$range_dir/max" "exist" 600
 }
 
+test_tried_regions()
+{
+	tried_regions_dir=$1
+	ensure_dir "$tried_regions_dir" "exist"
+}
+
 test_stats()
 {
 	stats_dir=$1
@@ -138,6 +144,7 @@ test_scheme()
 	test_quotas "$scheme_dir/quotas"
 	test_watermarks "$scheme_dir/watermarks"
 	test_stats "$scheme_dir/stats"
+	test_tried_regions "$scheme_dir/tried_regions"
 }
 
 test_schemes()
-- 
GitLab


From 7f0a86f3c99bc9736445ef64aa65c9bd6161a47b Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:27 +0000
Subject: [PATCH 2996/4122] Docs/admin-guide/mm/damon/usage: document
 schemes/<s>/tried_regions sysfs directory

Document 'tried_regions' directory in DAMON sysfs interface usage in the
administrator guide.

Link: https://lkml.kernel.org/r/20221101220328.95765-8-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/mm/damon/usage.rst | 45 ++++++++++++++++++--
 1 file changed, 42 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/mm/damon/usage.rst b/Documentation/admin-guide/mm/damon/usage.rst
index c17e02e1e426e..1a5b6b71efa18 100644
--- a/Documentation/admin-guide/mm/damon/usage.rst
+++ b/Documentation/admin-guide/mm/damon/usage.rst
@@ -88,6 +88,9 @@ comma (","). ::
     │ │ │ │ │ │ │ │ weights/sz_permil,nr_accesses_permil,age_permil
     │ │ │ │ │ │ │ watermarks/metric,interval_us,high,mid,low
     │ │ │ │ │ │ │ stats/nr_tried,sz_tried,nr_applied,sz_applied,qt_exceeds
+    │ │ │ │ │ │ │ tried_regions/
+    │ │ │ │ │ │ │ │ 0/start,end,nr_accesses,age
+    │ │ │ │ │ │ │ │ ...
     │ │ │ │ │ │ ...
     │ │ │ │ ...
     │ │ ...
@@ -125,7 +128,14 @@ in the state.  Writing ``commit`` to the ``state`` file makes kdamond reads the
 user inputs in the sysfs files except ``state`` file again.  Writing
 ``update_schemes_stats`` to ``state`` file updates the contents of stats files
 for each DAMON-based operation scheme of the kdamond.  For details of the
-stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.
+stats, please refer to :ref:`stats section <sysfs_schemes_stats>`.  Writing
+``update_schemes_tried_regions`` to ``state`` file updates the DAMON-based
+operation scheme action tried regions directory for each DAMON-based operation
+scheme of the kdamond.  Writing ``clear_schemes_tried_regions`` to ``state``
+file clears the DAMON-based operating scheme action tried regions directory for
+each DAMON-based operation scheme of the kdamond.  For details of the
+DAMON-based operation scheme action tried regions directory, please refer to
+:ref:tried_regions section <sysfs_schemes_tried_regions>`.
 
 If the state is ``on``, reading ``pid`` shows the pid of the kdamond thread.
 
@@ -166,6 +176,8 @@ You can set and get what type of monitoring operations DAMON will use for the
 context by writing one of the keywords listed in ``avail_operations`` file and
 reading from the ``operations`` file.
 
+.. _sysfs_monitoring_attrs:
+
 contexts/<N>/monitoring_attrs/
 ------------------------------
 
@@ -255,8 +267,9 @@ to ``N-1``.  Each directory represents each DAMON-based operation scheme.
 schemes/<N>/
 ------------
 
-In each scheme directory, four directories (``access_pattern``, ``quotas``,
-``watermarks``, and ``stats``) and one file (``action``) exist.
+In each scheme directory, five directories (``access_pattern``, ``quotas``,
+``watermarks``, ``stats``, and ``tried_regions``) and one file (``action``)
+exist.
 
 The ``action`` file is for setting and getting what action you want to apply to
 memory regions having specific access pattern of the interest.  The keywords
@@ -351,6 +364,32 @@ should ask DAMON sysfs interface to updte the content of the files for the
 stats by writing a special keyword, ``update_schemes_stats`` to the relevant
 ``kdamonds/<N>/state`` file.
 
+.. _sysfs_schemes_tried_regions:
+
+schemes/<N>/tried_regions/
+--------------------------
+
+When a special keyword, ``update_schemes_tried_regions``, is written to the
+relevant ``kdamonds/<N>/state`` file, DAMON creates directories named integer
+starting from ``0`` under this directory.  Each directory contains files
+exposing detailed information about each of the memory region that the
+corresponding scheme's ``action`` has tried to be applied under this directory,
+during next :ref:`aggregation interval <sysfs_monitoring_attrs>`.  The
+information includes address range, ``nr_accesses``, , and ``age`` of the
+region.
+
+The directories will be removed when another special keyword,
+``clear_schemes_tried_regions``, is written to the relevant
+``kdamonds/<N>/state`` file.
+
+tried_regions/<N>/
+------------------
+
+In each region directory, you will find four files (``start``, ``end``,
+``nr_accesses``, and ``age``).  Reading the files will show the start and end
+addresses, ``nr_accesses``, and ``age`` of the region that corresponding
+DAMON-based operation scheme ``action`` has tried to be applied.
+
 Example
 ~~~~~~~
 
-- 
GitLab


From 1b0006daa36f2ccb7f213007365d504bcd016312 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Tue, 1 Nov 2022 22:03:28 +0000
Subject: [PATCH 2997/4122] Docs/ABI/damon: document
 'schemes/<s>/tried_regions' sysfs directory

Update DAMON ABI document for the 'tried_regions' directory of DAMON
sysfs interface.

Link: https://lkml.kernel.org/r/20221101220328.95765-9-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .../ABI/testing/sysfs-kernel-mm-damon         | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-damon b/Documentation/ABI/testing/sysfs-kernel-mm-damon
index 08b9df3235609..13397b8536926 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-damon
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-damon
@@ -27,6 +27,10 @@ Description:	Writing 'on' or 'off' to this file makes the kdamond starts or
 		makes the kdamond reads the user inputs in the sysfs files
 		except 'state' again.  Writing 'update_schemes_stats' to the
 		file updates contents of schemes stats files of the kdamond.
+		Writing 'update_schemes_tried_regions' to the file updates
+		contents of 'tried_regions' directory of every scheme directory
+		of this kdamond.  Writing 'clear_schemes_tried_regions' to the
+		file removes contents of the 'tried_regions' directory.
 
 What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/pid
 Date:		Mar 2022
@@ -283,3 +287,31 @@ Date:		Mar 2022
 Contact:	SeongJae Park <sj@kernel.org>
 Description:	Reading this file returns the number of the exceed events of
 		the scheme's quotas.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/start
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the start address of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/end
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the end address of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/nr_accesses
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the 'nr_accesses' of a memory region
+		that corresponding DAMON-based Operation Scheme's action has
+		tried to be applied.
+
+What:		/sys/kernel/mm/damon/admin/kdamonds/<K>/contexts/<C>/schemes/<S>/tried_regions/<R>/age
+Date:		Oct 2022
+Contact:	SeongJae Park <sj@kernel.org>
+Description:	Reading this file returns the 'age' of a memory region that
+		corresponding DAMON-based Operation Scheme's action has tried
+		to be applied.
-- 
GitLab


From e6aff38b2e25e934e95471351c96d1410bb17561 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:14:08 +0100
Subject: [PATCH 2998/4122] mm/damon: use kstrtobool() instead of strtobool()

strtobool() is the same as kstrtobool().  However, the latter is more used
within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Link: https://lkml.kernel.org/r/ed2b46489a513988688decb53850339cc228940c.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/damon/lru_sort.c | 3 ++-
 mm/damon/reclaim.c  | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/damon/lru_sort.c b/mm/damon/lru_sort.c
index 2a532e3983dfa..7b8fce2f67a8d 100644
--- a/mm/damon/lru_sort.c
+++ b/mm/damon/lru_sort.c
@@ -8,6 +8,7 @@
 #define pr_fmt(fmt) "damon-lru-sort: " fmt
 
 #include <linux/damon.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 
 #include "modules-common.h"
@@ -241,7 +242,7 @@ static int damon_lru_sort_enabled_store(const char *val,
 	bool enable;
 	int err;
 
-	err = strtobool(val, &enable);
+	err = kstrtobool(val, &enable);
 	if (err)
 		return err;
 
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index e57604bec06da..e82631f394816 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -8,6 +8,7 @@
 #define pr_fmt(fmt) "damon-reclaim: " fmt
 
 #include <linux/damon.h>
+#include <linux/kstrtox.h>
 #include <linux/module.h>
 
 #include "modules-common.h"
@@ -187,7 +188,7 @@ static int damon_reclaim_enabled_store(const char *val,
 	bool enable;
 	int err;
 
-	err = strtobool(val, &enable);
+	err = kstrtobool(val, &enable);
 	if (err)
 		return err;
 
-- 
GitLab


From f15be1b8d449a8eebe82d77164bf760804753651 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 1 Nov 2022 22:14:09 +0100
Subject: [PATCH 2999/4122] mm: use kstrtobool() instead of strtobool()

strtobool() is the same as kstrtobool().  However, the latter is more used
within the kernel.

In order to remove strtobool() and slightly simplify kstrtox.h, switch to
the other function name.

While at it, include the corresponding header file (<linux/kstrtox.h>)

Link: https://lkml.kernel.org/r/03f9401a6c8b87a1c786a2138d16b048f8d0eb53.1667336095.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_table_check.c | 3 ++-
 mm/usercopy.c         | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 433dbce13fe1d..93e633c1d5877 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2021, Google LLC.
  * Pasha Tatashin <pasha.tatashin@soleen.com>
  */
+#include <linux/kstrtox.h>
 #include <linux/mm.h>
 #include <linux/page_table_check.h>
 
@@ -23,7 +24,7 @@ EXPORT_SYMBOL(page_table_check_disabled);
 
 static int __init early_page_table_check_param(char *buf)
 {
-	return strtobool(buf, &__page_table_check_enabled);
+	return kstrtobool(buf, &__page_table_check_enabled);
 }
 
 early_param("page_table_check", early_page_table_check_param);
diff --git a/mm/usercopy.c b/mm/usercopy.c
index c1ee15a986338..4c3164beacec0 100644
--- a/mm/usercopy.c
+++ b/mm/usercopy.c
@@ -12,6 +12,7 @@
 
 #include <linux/mm.h>
 #include <linux/highmem.h>
+#include <linux/kstrtox.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/sched/task.h>
@@ -258,7 +259,7 @@ static bool enable_checks __initdata = true;
 
 static int __init parse_hardened_usercopy(char *str)
 {
-	if (strtobool(str, &enable_checks))
+	if (kstrtobool(str, &enable_checks))
 		pr_warn("Invalid option string for hardened_usercopy: '%s'\n",
 			str);
 	return 1;
-- 
GitLab


From ca92ea3dc5a2b01f98e9f02b7a6bc03be06fe124 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Sun, 30 Oct 2022 17:41:50 -0400
Subject: [PATCH 3000/4122] mm: always compile in pte markers

Patch series "mm: Use pte marker for swapin errors".

This series uses the pte marker to replace the swapin error swap entry,
then we save one more swap entry slot for swap devices.  A new pte marker
bit is defined.


This patch (of 2):

The PTE markers code is tiny and now it's enabled for most of the
distributions.  It's fine to keep it as-is, but to make a broader use of
it (e.g.  replacing read error swap entry) it needs to be there always
otherwise we need special code path to take care of !PTE_MARKER case.

It'll be easier just make pte marker always exist.  Use this chance to
extend its usage to anonymous too by simply touching up some of the old
comments, because it'll be used for anonymous pages in the follow up
patches.

Link: https://lkml.kernel.org/r/20221030214151.402274-1-peterx@redhat.com
Link: https://lkml.kernel.org/r/20221030214151.402274-2-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Peter Xu <peterx@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h    | 10 +++-------
 include/linux/swapops.h | 31 -------------------------------
 mm/Kconfig              |  7 -------
 mm/memory.c             |  7 +++----
 4 files changed, 6 insertions(+), 49 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 369d7799205dd..211aeca9bfa7f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -60,17 +60,13 @@ static inline int current_is_kswapd(void)
 			     SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
 			     SWP_PTE_MARKER_NUM)
 /*
- * PTE markers are used to persist information onto PTEs that are mapped with
- * file-backed memories.  As its name "PTE" hints, it should only be applied to
- * the leaves of pgtables.
+ * PTE markers are used to persist information onto PTEs that otherwise
+ * should be a none pte.  As its name "PTE" hints, it should only be
+ * applied to the leaves of pgtables.
  */
-#ifdef CONFIG_PTE_MARKER
 #define SWP_PTE_MARKER_NUM 1
 #define SWP_PTE_MARKER     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
 			    SWP_MIGRATION_NUM + SWP_DEVICE_NUM)
-#else
-#define SWP_PTE_MARKER_NUM 0
-#endif
 
 /*
  * Unaddressable device memory support. See include/linux/hmm.h and
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 3ba9bf56899da..35c1fe62d2e1a 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -412,8 +412,6 @@ typedef unsigned long pte_marker;
 #define  PTE_MARKER_UFFD_WP  BIT(0)
 #define  PTE_MARKER_MASK     (PTE_MARKER_UFFD_WP)
 
-#ifdef CONFIG_PTE_MARKER
-
 static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
 {
 	return swp_entry(SWP_PTE_MARKER, marker);
@@ -434,32 +432,6 @@ static inline bool is_pte_marker(pte_t pte)
 	return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte));
 }
 
-#else /* CONFIG_PTE_MARKER */
-
-static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
-{
-	/* This should never be called if !CONFIG_PTE_MARKER */
-	WARN_ON_ONCE(1);
-	return swp_entry(0, 0);
-}
-
-static inline bool is_pte_marker_entry(swp_entry_t entry)
-{
-	return false;
-}
-
-static inline pte_marker pte_marker_get(swp_entry_t entry)
-{
-	return 0;
-}
-
-static inline bool is_pte_marker(pte_t pte)
-{
-	return false;
-}
-
-#endif /* CONFIG_PTE_MARKER */
-
 static inline pte_t make_pte_marker(pte_marker marker)
 {
 	return swp_entry_to_pte(make_pte_marker_entry(marker));
@@ -477,9 +449,6 @@ static inline pte_t make_pte_marker(pte_marker marker)
  * memory, kernel-only memory (including when the system is during-boot),
  * non-ram based generic file-system.  It's fine to be used even there, but the
  * extra pte marker check will be pure overhead.
- *
- * For systems configured with !CONFIG_PTE_MARKER this will be automatically
- * optimized to pte_none().
  */
 static inline int pte_none_mostly(pte_t pte)
 {
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b5052..4b28800d9be16 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1107,17 +1107,10 @@ config HAVE_ARCH_USERFAULTFD_MINOR
 	help
 	  Arch has userfaultfd minor fault support
 
-config PTE_MARKER
-	bool
-
-	help
-	  Allows to create marker PTEs for file-backed memory.
-
 config PTE_MARKER_UFFD_WP
 	bool "Userfaultfd write protection support for shmem/hugetlbfs"
 	default y
 	depends on HAVE_ARCH_USERFAULTFD_WP
-	select PTE_MARKER
 
 	help
 	  Allows to create marker PTEs for userfaultfd write protection
diff --git a/mm/memory.c b/mm/memory.c
index 659620b6770fd..b79d275337223 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3662,11 +3662,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
 	unsigned long marker = pte_marker_get(entry);
 
 	/*
-	 * PTE markers should always be with file-backed memories, and the
-	 * marker should never be empty.  If anything weird happened, the best
-	 * thing to do is to kill the process along with its mm.
+	 * PTE markers should never be empty.  If anything weird happened,
+	 * the best thing to do is to kill the process along with its mm.
 	 */
-	if (WARN_ON_ONCE(vma_is_anonymous(vmf->vma) || !marker))
+	if (WARN_ON_ONCE(!marker))
 		return VM_FAULT_SIGBUS;
 
 	if (pte_marker_entry_uffd_wp(entry))
-- 
GitLab


From 15520a3f046998e3f57e695743e99b0875e2dae7 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Sun, 30 Oct 2022 17:41:51 -0400
Subject: [PATCH 3001/4122] mm: use pte markers for swap errors

PTE markers are ideal mechanism for things like SWP_SWAPIN_ERROR.  Using a
whole swap entry type for this purpose can be an overkill, especially if
we already have PTE markers.  Define a new bit for swapin error and
replace it with pte markers.  Then we can safely drop SWP_SWAPIN_ERROR and
give one device slot back to swap.

We used to have SWP_SWAPIN_ERROR taking the page pfn as part of the swap
entry, but it's never used.  Neither do I see how it can be useful because
normally the swapin failure should not be caused by a bad page but bad
swap device.  Drop it alongside.

Link: https://lkml.kernel.org/r/20221030214151.402274-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Huang Ying <ying.huang@intel.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h    |  6 +-----
 include/linux/swapops.h | 26 ++++++++++++++------------
 mm/memory.c             |  6 ++++--
 mm/shmem.c              |  2 +-
 mm/swapfile.c           |  2 +-
 5 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index 211aeca9bfa7f..fec6647a289af 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -55,10 +55,6 @@ static inline int current_is_kswapd(void)
  * actions on faults.
  */
 
-#define SWP_SWAPIN_ERROR_NUM 1
-#define SWP_SWAPIN_ERROR     (MAX_SWAPFILES + SWP_HWPOISON_NUM + \
-			     SWP_MIGRATION_NUM + SWP_DEVICE_NUM + \
-			     SWP_PTE_MARKER_NUM)
 /*
  * PTE markers are used to persist information onto PTEs that otherwise
  * should be a none pte.  As its name "PTE" hints, it should only be
@@ -121,7 +117,7 @@ static inline int current_is_kswapd(void)
 #define MAX_SWAPFILES \
 	((1 << MAX_SWAPFILES_SHIFT) - SWP_DEVICE_NUM - \
 	SWP_MIGRATION_NUM - SWP_HWPOISON_NUM - \
-	SWP_PTE_MARKER_NUM - SWP_SWAPIN_ERROR_NUM)
+	SWP_PTE_MARKER_NUM)
 
 /*
  * Magic header for a swap area. The first part of the union is
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 35c1fe62d2e1a..27ade4f22abb5 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -162,16 +162,6 @@ static inline void *swp_to_radix_entry(swp_entry_t entry)
 	return xa_mk_value(entry.val);
 }
 
-static inline swp_entry_t make_swapin_error_entry(struct page *page)
-{
-	return swp_entry(SWP_SWAPIN_ERROR, page_to_pfn(page));
-}
-
-static inline int is_swapin_error_entry(swp_entry_t entry)
-{
-	return swp_type(entry) == SWP_SWAPIN_ERROR;
-}
-
 #if IS_ENABLED(CONFIG_DEVICE_PRIVATE)
 static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset)
 {
@@ -409,8 +399,9 @@ static inline bool is_migration_entry_dirty(swp_entry_t entry)
 
 typedef unsigned long pte_marker;
 
-#define  PTE_MARKER_UFFD_WP  BIT(0)
-#define  PTE_MARKER_MASK     (PTE_MARKER_UFFD_WP)
+#define  PTE_MARKER_UFFD_WP			BIT(0)
+#define  PTE_MARKER_SWAPIN_ERROR		BIT(1)
+#define  PTE_MARKER_MASK			(BIT(2) - 1)
 
 static inline swp_entry_t make_pte_marker_entry(pte_marker marker)
 {
@@ -437,6 +428,17 @@ static inline pte_t make_pte_marker(pte_marker marker)
 	return swp_entry_to_pte(make_pte_marker_entry(marker));
 }
 
+static inline swp_entry_t make_swapin_error_entry(void)
+{
+	return make_pte_marker_entry(PTE_MARKER_SWAPIN_ERROR);
+}
+
+static inline int is_swapin_error_entry(swp_entry_t entry)
+{
+	return is_pte_marker_entry(entry) &&
+	    (pte_marker_get(entry) & PTE_MARKER_SWAPIN_ERROR);
+}
+
 /*
  * This is a special version to check pte_none() just to cover the case when
  * the pte is a pte marker.  It existed because in many cases the pte marker
diff --git a/mm/memory.c b/mm/memory.c
index b79d275337223..142c4229549b0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3668,6 +3668,10 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf)
 	if (WARN_ON_ONCE(!marker))
 		return VM_FAULT_SIGBUS;
 
+	/* Higher priority than uffd-wp when data corrupted */
+	if (marker & PTE_MARKER_SWAPIN_ERROR)
+		return VM_FAULT_SIGBUS;
+
 	if (pte_marker_entry_uffd_wp(entry))
 		return pte_marker_handle_uffd_wp(vmf);
 
@@ -3727,8 +3731,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 			put_page(vmf->page);
 		} else if (is_hwpoison_entry(entry)) {
 			ret = VM_FAULT_HWPOISON;
-		} else if (is_swapin_error_entry(entry)) {
-			ret = VM_FAULT_SIGBUS;
 		} else if (is_pte_marker_entry(entry)) {
 			ret = handle_pte_marker(vmf);
 		} else {
diff --git a/mm/shmem.c b/mm/shmem.c
index 0a7c4a748811d..7428ae3fa4b9e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1682,7 +1682,7 @@ static void shmem_set_folio_swapin_error(struct inode *inode, pgoff_t index,
 	swp_entry_t swapin_error;
 	void *old;
 
-	swapin_error = make_swapin_error_entry(&folio->page);
+	swapin_error = make_swapin_error_entry();
 	old = xa_cmpxchg_irq(&mapping->i_pages, index,
 			     swp_to_radix_entry(swap),
 			     swp_to_radix_entry(swapin_error), 0);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 72e481aacd5df..03fe0949f6b2f 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1781,7 +1781,7 @@ static int unuse_pte(struct vm_area_struct *vma, pmd_t *pmd,
 		pte_t pteval;
 
 		dec_mm_counter(vma->vm_mm, MM_SWAPENTS);
-		pteval = swp_entry_to_pte(make_swapin_error_entry(page));
+		pteval = swp_entry_to_pte(make_swapin_error_entry());
 		set_pte_at(vma->vm_mm, addr, pte, pteval);
 		swap_free(entry);
 		ret = 0;
-- 
GitLab


From 65917b538bcc4d8c0d8e199a6f7b7426acf13d58 Mon Sep 17 00:00:00 2001
From: Deming Wang <wangdeming@inspur.com>
Date: Thu, 3 Nov 2022 22:38:18 -0400
Subject: [PATCH 3002/4122] zsmalloc: replace IS_ERR() with IS_ERR_VALUE()

Avoid typecasts that are needed for IS_ERR() and use IS_ERR_VALUE()
instead.

Link: https://lkml.kernel.org/r/20221104023818.1728-1-wangdeming@inspur.com
Signed-off-by: Deming Wang <wangdeming@inspur.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index d03941cace2c4..b52b7bb88b522 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -387,7 +387,7 @@ static int zs_zpool_malloc(void *pool, size_t size, gfp_t gfp,
 {
 	*handle = zs_malloc(pool, size, gfp);
 
-	if (IS_ERR((void *)(*handle)))
+	if (IS_ERR_VALUE(*handle))
 		return PTR_ERR((void *)*handle);
 	return 0;
 }
-- 
GitLab


From 634ba645f9bc888227ca954ea643579268d1b6d8 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Thu, 3 Nov 2022 18:16:32 -0700
Subject: [PATCH 3003/4122] selftests/vm: update hugetlb madvise

Commit 8ebe0a5eaaeb ("mm,madvise,hugetlb: fix unexpected data loss with
MADV_DONTNEED on hugetlbfs") changed how the passed length was interpreted
for hugetlb mappings.  It was changed from align up to align down.  The
hugetlb-madvise test explicitly tests this behavior.  Change test to
expect new behavior.

Link: https://lkml.kernel.org/r/20221104011632.357049-1-mike.kravetz@oracle.com
Link: https://lore.kernel.org/oe-lkp/202211040619.2ec447d7-oliver.sang@intel.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Reported-by: kernel test robot <oliver.sang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/hugetlb-madvise.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/hugetlb-madvise.c b/tools/testing/selftests/vm/hugetlb-madvise.c
index f96435b709864..a634f47d1e563 100644
--- a/tools/testing/selftests/vm/hugetlb-madvise.c
+++ b/tools/testing/selftests/vm/hugetlb-madvise.c
@@ -195,7 +195,7 @@ int main(int argc, char **argv)
 			exit(1);
 	}
 
-	/* addr + length should be aligned up to huge page size */
+	/* addr + length should be aligned down to huge page size */
 	if (madvise(addr,
 			((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
 			MADV_DONTNEED)) {
@@ -203,10 +203,11 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
-	/* should free all pages in mapping */
-	validate_free_pages(free_hugepages);
+	/* should free all but last page in mapping */
+	validate_free_pages(free_hugepages - 1);
 
 	(void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+	validate_free_pages(free_hugepages);
 
 	/*
 	 * Test MADV_DONTNEED on anonymous private mapping
-- 
GitLab


From dad6a5eb55564845aa17b8b20fa834af21e46c48 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 2 Nov 2022 18:48:45 -0700
Subject: [PATCH 3004/4122] mm,hugetlb: use folio fields in second tail page

Patch series "mm,huge,rmap: unify and speed up compound mapcounts".


This patch (of 3):

We want to declare one more int in the first tail of a compound page: that
first tail page being valuable property, since every compound page has a
first tail, but perhaps no more than that.

No problem on 64-bit: there is already space for it.  No problem with
32-bit THPs: 5.18 commit 5232c63f46fd ("mm: Make compound_pincount always
available") kindly cleared the space for it, apparently not realizing that
only 64-bit architectures enable CONFIG_THP_SWAP (whose use of tail
page->private might conflict) - but make sure of that in its Kconfig.

But hugetlb pages use tail page->private of the first tail page for a
subpool pointer, which will conflict; and they also use page->private of
the 2nd, 3rd and 4th tails.

Undo "mm: add private field of first tail to struct page and struct
folio"'s recent addition of private_1 to the folio tail: instead add
hugetlb_subpool, hugetlb_cgroup, hugetlb_cgroup_rsvd, hugetlb_hwpoison to
a second tail page of the folio: THP has long been using several fields of
that tail, so make better use of it for hugetlb too.  This is not how a
generic folio should be declared in future, but it is an effective
transitional way to make use of it.

Delete the SUBPAGE_INDEX stuff, but keep __NR_USED_SUBPAGE: now 3.

[hughd@google.com: prefix folio's page_1 and page_2 with double underscore,
  give folio's _flags_2 and _head_2 a line documentation each]
  Link: https://lkml.kernel.org/r/9e2cb6b-5b58-d3f2-b5ee-5f8a14e8f10@google.com
Link: https://lkml.kernel.org/r/5f52de70-975-e94f-f141-543765736181@google.com
Link: https://lkml.kernel.org/r/3818cc9a-9999-d064-d778-9c94c5911e6@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/hugetlb.h        | 23 +++--------
 include/linux/hugetlb_cgroup.h | 31 +++++---------
 include/linux/mm_types.h       | 74 +++++++++++++++++++++++-----------
 mm/Kconfig                     |  2 +-
 mm/memory-failure.c            |  5 +--
 5 files changed, 67 insertions(+), 68 deletions(-)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 58a30938a9b19..551834cd52995 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -33,22 +33,9 @@ typedef struct { unsigned long pd; } hugepd_t;
 /*
  * For HugeTLB page, there are more metadata to save in the struct page. But
  * the head struct page cannot meet our needs, so we have to abuse other tail
- * struct page to store the metadata. In order to avoid conflicts caused by
- * subsequent use of more tail struct pages, we gather these discrete indexes
- * of tail struct page here.
+ * struct page to store the metadata.
  */
-enum {
-	SUBPAGE_INDEX_SUBPOOL = 1,	/* reuse page->private */
-#ifdef CONFIG_CGROUP_HUGETLB
-	SUBPAGE_INDEX_CGROUP,		/* reuse page->private */
-	SUBPAGE_INDEX_CGROUP_RSVD,	/* reuse page->private */
-	__MAX_CGROUP_SUBPAGE_INDEX = SUBPAGE_INDEX_CGROUP_RSVD,
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-	SUBPAGE_INDEX_HWPOISON,
-#endif
-	__NR_USED_SUBPAGE,
-};
+#define __NR_USED_SUBPAGE 3
 
 struct hugepage_subpool {
 	spinlock_t lock;
@@ -722,11 +709,11 @@ extern unsigned int default_hstate_idx;
 
 static inline struct hugepage_subpool *hugetlb_folio_subpool(struct folio *folio)
 {
-	return (void *)folio_get_private_1(folio);
+	return folio->_hugetlb_subpool;
 }
 
 /*
- * hugetlb page subpool pointer located in hpage[1].private
+ * hugetlb page subpool pointer located in hpage[2].hugetlb_subpool
  */
 static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 {
@@ -736,7 +723,7 @@ static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
 static inline void hugetlb_set_folio_subpool(struct folio *folio,
 					struct hugepage_subpool *subpool)
 {
-	folio_set_private_1(folio, (unsigned long)subpool);
+	folio->_hugetlb_subpool = subpool;
 }
 
 static inline void hugetlb_set_page_subpool(struct page *hpage,
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index c70f92fe493ef..f706626a80635 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -24,12 +24,10 @@ struct file_region;
 #ifdef CONFIG_CGROUP_HUGETLB
 /*
  * Minimum page order trackable by hugetlb cgroup.
- * At least 4 pages are necessary for all the tracking information.
- * The second tail page (hpage[SUBPAGE_INDEX_CGROUP]) is the fault
- * usage cgroup. The third tail page (hpage[SUBPAGE_INDEX_CGROUP_RSVD])
- * is the reservation usage cgroup.
+ * At least 3 pages are necessary for all the tracking information.
+ * The second tail page contains all of the hugetlb-specific fields.
  */
-#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__MAX_CGROUP_SUBPAGE_INDEX + 1)
+#define HUGETLB_CGROUP_MIN_ORDER order_base_2(__NR_USED_SUBPAGE)
 
 enum hugetlb_memory_event {
 	HUGETLB_MAX,
@@ -69,21 +67,13 @@ struct hugetlb_cgroup {
 static inline struct hugetlb_cgroup *
 __hugetlb_cgroup_from_folio(struct folio *folio, bool rsvd)
 {
-	struct page *tail;
-
 	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
 	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return NULL;
-
-	if (rsvd) {
-		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD);
-		return (void *)page_private(tail);
-	}
-
-	else {
-		tail = folio_page(folio, SUBPAGE_INDEX_CGROUP);
-		return (void *)page_private(tail);
-	}
+	if (rsvd)
+		return folio->_hugetlb_cgroup_rsvd;
+	else
+		return folio->_hugetlb_cgroup;
 }
 
 static inline struct hugetlb_cgroup *hugetlb_cgroup_from_folio(struct folio *folio)
@@ -101,15 +91,12 @@ static inline void __set_hugetlb_cgroup(struct folio *folio,
 				       struct hugetlb_cgroup *h_cg, bool rsvd)
 {
 	VM_BUG_ON_FOLIO(!folio_test_hugetlb(folio), folio);
-
 	if (folio_order(folio) < HUGETLB_CGROUP_MIN_ORDER)
 		return;
 	if (rsvd)
-		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP_RSVD),
-				 (unsigned long)h_cg);
+		folio->_hugetlb_cgroup_rsvd = h_cg;
 	else
-		set_page_private(folio_page(folio, SUBPAGE_INDEX_CGROUP),
-				 (unsigned long)h_cg);
+		folio->_hugetlb_cgroup = h_cg;
 }
 
 static inline void set_hugetlb_cgroup(struct folio *folio,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index e86861ff5bbdb..44f1f8b6be02b 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -145,15 +145,22 @@ struct page {
 			atomic_t compound_pincount;
 #ifdef CONFIG_64BIT
 			unsigned int compound_nr; /* 1 << compound_order */
-			unsigned long _private_1;
 #endif
 		};
-		struct {	/* Second tail page of compound page */
+		struct {	/* Second tail page of transparent huge page */
 			unsigned long _compound_pad_1;	/* compound_head */
 			unsigned long _compound_pad_2;
 			/* For both global and memcg */
 			struct list_head deferred_list;
 		};
+		struct {	/* Second tail page of hugetlb page */
+			unsigned long _hugetlb_pad_1;	/* compound_head */
+			void *hugetlb_subpool;
+			void *hugetlb_cgroup;
+			void *hugetlb_cgroup_rsvd;
+			void *hugetlb_hwpoison;
+			/* No more space on 32-bit: use third tail if more */
+		};
 		struct {	/* Page table pages */
 			unsigned long _pt_pad_1;	/* compound_head */
 			pgtable_t pmd_huge_pte; /* protected by page->ptl */
@@ -260,13 +267,18 @@ struct page {
  *    to find how many references there are to this folio.
  * @memcg_data: Memory Control Group data.
  * @_flags_1: For large folios, additional page flags.
- * @__head: Points to the folio.  Do not use.
+ * @_head_1: Points to the folio.  Do not use.
  * @_folio_dtor: Which destructor to use for this folio.
  * @_folio_order: Do not use directly, call folio_order().
  * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
- * @_private_1: Do not use directly, call folio_get_private_1().
+ * @_flags_2: For alignment.  Do not use.
+ * @_head_2: Points to the folio.  Do not use.
+ * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h.
+ * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h.
+ * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head().
  *
  * A folio is a physically, virtually and logically contiguous set
  * of bytes.  It is a power-of-two in size, and it is aligned to that
@@ -305,16 +317,31 @@ struct folio {
 		};
 		struct page page;
 	};
-	unsigned long _flags_1;
-	unsigned long __head;
-	unsigned char _folio_dtor;
-	unsigned char _folio_order;
-	atomic_t _total_mapcount;
-	atomic_t _pincount;
+	union {
+		struct {
+			unsigned long _flags_1;
+			unsigned long _head_1;
+			unsigned char _folio_dtor;
+			unsigned char _folio_order;
+			atomic_t _total_mapcount;
+			atomic_t _pincount;
 #ifdef CONFIG_64BIT
-	unsigned int _folio_nr_pages;
+			unsigned int _folio_nr_pages;
 #endif
-	unsigned long _private_1;
+		};
+		struct page __page_1;
+	};
+	union {
+		struct {
+			unsigned long _flags_2;
+			unsigned long _head_2;
+			void *_hugetlb_subpool;
+			void *_hugetlb_cgroup;
+			void *_hugetlb_cgroup_rsvd;
+			void *_hugetlb_hwpoison;
+		};
+		struct page __page_2;
+	};
 };
 
 #define FOLIO_MATCH(pg, fl)						\
@@ -335,16 +362,25 @@ FOLIO_MATCH(memcg_data, memcg_data);
 	static_assert(offsetof(struct folio, fl) ==			\
 			offsetof(struct page, pg) + sizeof(struct page))
 FOLIO_MATCH(flags, _flags_1);
-FOLIO_MATCH(compound_head, __head);
+FOLIO_MATCH(compound_head, _head_1);
 FOLIO_MATCH(compound_dtor, _folio_dtor);
 FOLIO_MATCH(compound_order, _folio_order);
 FOLIO_MATCH(compound_mapcount, _total_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
-FOLIO_MATCH(_private_1, _private_1);
 #endif
 #undef FOLIO_MATCH
+#define FOLIO_MATCH(pg, fl)						\
+	static_assert(offsetof(struct folio, fl) ==			\
+			offsetof(struct page, pg) + 2 * sizeof(struct page))
+FOLIO_MATCH(flags, _flags_2);
+FOLIO_MATCH(compound_head, _head_2);
+FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool);
+FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup);
+FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd);
+FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison);
+#undef FOLIO_MATCH
 
 static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
 {
@@ -388,16 +424,6 @@ static inline void *folio_get_private(struct folio *folio)
 	return folio->private;
 }
 
-static inline void folio_set_private_1(struct folio *folio, unsigned long private)
-{
-	folio->_private_1 = private;
-}
-
-static inline unsigned long folio_get_private_1(struct folio *folio)
-{
-	return folio->_private_1;
-}
-
 struct page_frag_cache {
 	void * va;
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
diff --git a/mm/Kconfig b/mm/Kconfig
index 4b28800d9be16..c86b69aff7d4d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -775,7 +775,7 @@ endchoice
 
 config THP_SWAP
 	def_bool y
-	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP
+	depends on TRANSPARENT_HUGEPAGE && ARCH_WANTS_THP_SWAP && SWAP && 64BIT
 	help
 	  Swap transparent huge pages in one piece, without splitting.
 	  XXX: For now, swap cluster backing transparent huge page
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 779a426d2caba..63d8501001c6f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1687,8 +1687,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
 #ifdef CONFIG_HUGETLB_PAGE
 /*
  * Struct raw_hwp_page represents information about "raw error page",
- * constructing singly linked list originated from ->private field of
- * SUBPAGE_INDEX_HWPOISON-th tail page.
+ * constructing singly linked list from ->_hugetlb_hwpoison field of folio.
  */
 struct raw_hwp_page {
 	struct llist_node node;
@@ -1697,7 +1696,7 @@ struct raw_hwp_page {
 
 static inline struct llist_head *raw_hwp_list_head(struct page *hpage)
 {
-	return (struct llist_head *)&page_private(hpage + SUBPAGE_INDEX_HWPOISON);
+	return (struct llist_head *)&page_folio(hpage)->_hugetlb_hwpoison;
 }
 
 static unsigned long __free_raw_hwp_pages(struct page *hpage, bool move_flag)
-- 
GitLab


From cb67f4282bf9693658dbda934a441ddbbb1446df Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 2 Nov 2022 18:51:38 -0700
Subject: [PATCH 3005/4122] mm,thp,rmap: simplify compound page mapcount
 handling

Compound page (folio) mapcount calculations have been different for anon
and file (or shmem) THPs, and involved the obscure PageDoubleMap flag.
And each huge mapping and unmapping of a file (or shmem) THP involved
atomically incrementing and decrementing the mapcount of every subpage of
that huge page, dirtying many struct page cachelines.

Add subpages_mapcount field to the struct folio and first tail page, so
that the total of subpage mapcounts is available in one place near the
head: then page_mapcount() and total_mapcount() and page_mapped(), and
their folio equivalents, are so quick that anon and file and hugetlb don't
need to be optimized differently.  Delete the unloved PageDoubleMap.

page_add and page_remove rmap functions must now maintain the
subpages_mapcount as well as the subpage _mapcount, when dealing with pte
mappings of huge pages; and correct maintenance of NR_ANON_MAPPED and
NR_FILE_MAPPED statistics still needs reading through the subpages, using
nr_subpages_unmapped() - but only when first or last pmd mapping finds
subpages_mapcount raised (double-map case, not the common case).

But are those counts (used to decide when to split an anon THP, and in
vmscan's pagecache_reclaimable heuristic) correctly maintained?  Not
quite: since page_remove_rmap() (and also split_huge_pmd()) is often
called without page lock, there can be races when a subpage pte mapcount
0<->1 while compound pmd mapcount 0<->1 is scanning - races which the
previous implementation had prevented.  The statistics might become
inaccurate, and even drift down until they underflow through 0.  That is
not good enough, but is better dealt with in a followup patch.

Update a few comments on first and second tail page overlaid fields.
hugepage_add_new_anon_rmap() has to "increment" compound_mapcount, but
subpages_mapcount and compound_pincount are already correctly at 0, so
delete its reinitialization of compound_pincount.

A simple 100 X munmap(mmap(2GB, MAP_SHARED|MAP_POPULATE, tmpfs), 2GB) took
18 seconds on small pages, and used to take 1 second on huge pages, but
now takes 119 milliseconds on huge pages.  Mapping by pmds a second time
used to take 860ms and now takes 92ms; mapping by pmds after mapping by
ptes (when the scan is needed) used to take 870ms and now takes 495ms.
But there might be some benchmarks which would show a slowdown, because
tail struct pages now fall out of cache until final freeing checks them.

Link: https://lkml.kernel.org/r/47ad693-717-79c8-e1ba-46c3a6602e48@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/transhuge.rst |  18 -----
 include/linux/mm.h             |  85 ++++++++++++++------
 include/linux/mm_types.h       |  21 ++++-
 include/linux/page-flags.h     |  21 -----
 include/linux/rmap.h           |   2 +
 mm/debug.c                     |   5 +-
 mm/folio-compat.c              |   6 --
 mm/huge_memory.c               |  36 ++-------
 mm/hugetlb.c                   |   2 +
 mm/khugepaged.c                |  11 +--
 mm/page_alloc.c                |  27 ++++---
 mm/rmap.c                      | 142 +++++++++++++++++++--------------
 mm/util.c                      |  79 ------------------
 13 files changed, 194 insertions(+), 261 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index 216db1d67d04e..a560e0c01b16e 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -125,24 +125,6 @@ pages:
     ->_mapcount of all sub-pages in order to have race-free detection of
     last unmap of subpages.
 
-PageDoubleMap() indicates that the page is *possibly* mapped with PTEs.
-
-For anonymous pages, PageDoubleMap() also indicates ->_mapcount in all
-subpages is offset up by one. This additional reference is required to
-get race-free detection of unmap of subpages when we have them mapped with
-both PMDs and PTEs.
-
-This optimization is required to lower the overhead of per-subpage mapcount
-tracking. The alternative is to alter ->_mapcount in all subpages on each
-map/unmap of the whole compound page.
-
-For anonymous pages, we set PG_double_map when a PMD of the page is split
-for the first time, but still have a PMD mapping. The additional references
-go away with the last compound_mapcount.
-
-File pages get PG_double_map set on the first map of the page with PTE and
-goes away when the page gets evicted from the page cache.
-
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
 structures. It can be done easily for refcounts taken by page table
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3950ef45b9a97..a904c2d60f123 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -818,8 +818,8 @@ static inline int is_vmalloc_or_module_addr(const void *x)
 /*
  * How many times the entire folio is mapped as a single unit (eg by a
  * PMD or PUD entry).  This is probably not what you want, except for
- * debugging purposes; look at folio_mapcount() or page_mapcount()
- * instead.
+ * debugging purposes - it does not include PTE-mapped sub-pages; look
+ * at folio_mapcount() or page_mapcount() or total_mapcount() instead.
  */
 static inline int folio_entire_mapcount(struct folio *folio)
 {
@@ -829,12 +829,20 @@ static inline int folio_entire_mapcount(struct folio *folio)
 
 /*
  * Mapcount of compound page as a whole, does not include mapped sub-pages.
- *
- * Must be called only for compound pages.
+ * Must be called only on head of compound page.
  */
-static inline int compound_mapcount(struct page *page)
+static inline int head_compound_mapcount(struct page *head)
 {
-	return folio_entire_mapcount(page_folio(page));
+	return atomic_read(compound_mapcount_ptr(head)) + 1;
+}
+
+/*
+ * Sum of mapcounts of sub-pages, does not include compound mapcount.
+ * Must be called only on head of compound page.
+ */
+static inline int head_subpages_mapcount(struct page *head)
+{
+	return atomic_read(subpages_mapcount_ptr(head));
 }
 
 /*
@@ -847,11 +855,9 @@ static inline void page_mapcount_reset(struct page *page)
 	atomic_set(&(page)->_mapcount, -1);
 }
 
-int __page_mapcount(struct page *page);
-
 /*
  * Mapcount of 0-order page; when compound sub-page, includes
- * compound_mapcount().
+ * compound_mapcount of compound_head of page.
  *
  * Result is undefined for pages which cannot be mapped into userspace.
  * For example SLAB or special types of pages. See function page_has_type().
@@ -859,25 +865,61 @@ int __page_mapcount(struct page *page);
  */
 static inline int page_mapcount(struct page *page)
 {
-	if (unlikely(PageCompound(page)))
-		return __page_mapcount(page);
-	return atomic_read(&page->_mapcount) + 1;
-}
+	int mapcount = atomic_read(&page->_mapcount) + 1;
 
-int folio_mapcount(struct folio *folio);
+	if (likely(!PageCompound(page)))
+		return mapcount;
+	page = compound_head(page);
+	return head_compound_mapcount(page) + mapcount;
+}
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline int total_mapcount(struct page *page)
 {
-	return folio_mapcount(page_folio(page));
+	if (likely(!PageCompound(page)))
+		return atomic_read(&page->_mapcount) + 1;
+	page = compound_head(page);
+	return head_compound_mapcount(page) + head_subpages_mapcount(page);
 }
 
-#else
-static inline int total_mapcount(struct page *page)
+/*
+ * Return true if this page is mapped into pagetables.
+ * For compound page it returns true if any subpage of compound page is mapped,
+ * even if this particular subpage is not itself mapped by any PTE or PMD.
+ */
+static inline bool page_mapped(struct page *page)
 {
-	return page_mapcount(page);
+	return total_mapcount(page) > 0;
+}
+
+/**
+ * folio_mapcount() - Calculate the number of mappings of this folio.
+ * @folio: The folio.
+ *
+ * A large folio tracks both how many times the entire folio is mapped,
+ * and how many times each individual page in the folio is mapped.
+ * This function calculates the total number of times the folio is
+ * mapped.
+ *
+ * Return: The number of times this folio is mapped.
+ */
+static inline int folio_mapcount(struct folio *folio)
+{
+	if (likely(!folio_test_large(folio)))
+		return atomic_read(&folio->_mapcount) + 1;
+	return atomic_read(folio_mapcount_ptr(folio)) + 1 +
+		atomic_read(folio_subpages_mapcount_ptr(folio));
+}
+
+/**
+ * folio_mapped - Is this folio mapped into userspace?
+ * @folio: The folio.
+ *
+ * Return: True if any page in this folio is referenced by user page tables.
+ */
+static inline bool folio_mapped(struct folio *folio)
+{
+	return folio_mapcount(folio) > 0;
 }
-#endif
 
 static inline struct page *virt_to_head_page(const void *x)
 {
@@ -1800,9 +1842,6 @@ static inline pgoff_t page_index(struct page *page)
 	return page->index;
 }
 
-bool page_mapped(struct page *page);
-bool folio_mapped(struct folio *folio);
-
 /*
  * Return true only if the page has been allocated with
  * ALLOC_NO_WATERMARKS and the low watermark was not
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 44f1f8b6be02b..44a1a699b5add 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -142,6 +142,7 @@ struct page {
 			unsigned char compound_dtor;
 			unsigned char compound_order;
 			atomic_t compound_mapcount;
+			atomic_t subpages_mapcount;
 			atomic_t compound_pincount;
 #ifdef CONFIG_64BIT
 			unsigned int compound_nr; /* 1 << compound_order */
@@ -270,7 +271,8 @@ struct page {
  * @_head_1: Points to the folio.  Do not use.
  * @_folio_dtor: Which destructor to use for this folio.
  * @_folio_order: Do not use directly, call folio_order().
- * @_total_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_compound_mapcount: Do not use directly, call folio_entire_mapcount().
+ * @_subpages_mapcount: Do not use directly, call folio_mapcount().
  * @_pincount: Do not use directly, call folio_maybe_dma_pinned().
  * @_folio_nr_pages: Do not use directly, call folio_nr_pages().
  * @_flags_2: For alignment.  Do not use.
@@ -323,7 +325,8 @@ struct folio {
 			unsigned long _head_1;
 			unsigned char _folio_dtor;
 			unsigned char _folio_order;
-			atomic_t _total_mapcount;
+			atomic_t _compound_mapcount;
+			atomic_t _subpages_mapcount;
 			atomic_t _pincount;
 #ifdef CONFIG_64BIT
 			unsigned int _folio_nr_pages;
@@ -365,7 +368,8 @@ FOLIO_MATCH(flags, _flags_1);
 FOLIO_MATCH(compound_head, _head_1);
 FOLIO_MATCH(compound_dtor, _folio_dtor);
 FOLIO_MATCH(compound_order, _folio_order);
-FOLIO_MATCH(compound_mapcount, _total_mapcount);
+FOLIO_MATCH(compound_mapcount, _compound_mapcount);
+FOLIO_MATCH(subpages_mapcount, _subpages_mapcount);
 FOLIO_MATCH(compound_pincount, _pincount);
 #ifdef CONFIG_64BIT
 FOLIO_MATCH(compound_nr, _folio_nr_pages);
@@ -388,11 +392,22 @@ static inline atomic_t *folio_mapcount_ptr(struct folio *folio)
 	return &tail->compound_mapcount;
 }
 
+static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio)
+{
+	struct page *tail = &folio->page + 1;
+	return &tail->subpages_mapcount;
+}
+
 static inline atomic_t *compound_mapcount_ptr(struct page *page)
 {
 	return &page[1].compound_mapcount;
 }
 
+static inline atomic_t *subpages_mapcount_ptr(struct page *page)
+{
+	return &page[1].subpages_mapcount;
+}
+
 static inline atomic_t *compound_pincount_ptr(struct page *page)
 {
 	return &page[1].compound_pincount;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0b0ae5084e60c..e42c55a7e0129 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -176,9 +176,6 @@ enum pageflags {
 	/* SLOB */
 	PG_slob_free = PG_private,
 
-	/* Compound pages. Stored in first tail page's flags */
-	PG_double_map = PG_workingset,
-
 #ifdef CONFIG_MEMORY_FAILURE
 	/*
 	 * Compound pages. Stored in first tail page's flags.
@@ -874,29 +871,11 @@ static inline int PageTransTail(struct page *page)
 {
 	return PageTail(page);
 }
-
-/*
- * PageDoubleMap indicates that the compound page is mapped with PTEs as well
- * as PMDs.
- *
- * This is required for optimization of rmap operations for THP: we can postpone
- * per small page mapcount accounting (and its overhead from atomic operations)
- * until the first PMD split.
- *
- * For the page PageDoubleMap means ->_mapcount in all sub-pages is offset up
- * by one. This reference will go away with last compound_mapcount.
- *
- * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap().
- */
-PAGEFLAG(DoubleMap, double_map, PF_SECOND)
-	TESTSCFLAG(DoubleMap, double_map, PF_SECOND)
 #else
 TESTPAGEFLAG_FALSE(TransHuge, transhuge)
 TESTPAGEFLAG_FALSE(TransCompound, transcompound)
 TESTPAGEFLAG_FALSE(TransCompoundMap, transcompoundmap)
 TESTPAGEFLAG_FALSE(TransTail, transtail)
-PAGEFLAG_FALSE(DoubleMap, double_map)
-	TESTSCFLAG_FALSE(DoubleMap, double_map)
 #endif
 
 #if defined(CONFIG_MEMORY_FAILURE) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index bd3504d11b155..1973649e8f93e 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -206,6 +206,8 @@ void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 
 static inline void __page_dup_rmap(struct page *page, bool compound)
 {
+	if (!compound && PageCompound(page))
+		atomic_inc(subpages_mapcount_ptr(compound_head(page)));
 	atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount);
 }
 
diff --git a/mm/debug.c b/mm/debug.c
index 0fd15ba70d163..7f8e5f744e420 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -94,9 +94,10 @@ static void __dump_page(struct page *page)
 			page, page_ref_count(head), mapcount, mapping,
 			page_to_pgoff(page), page_to_pfn(page));
 	if (compound) {
-		pr_warn("head:%p order:%u compound_mapcount:%d compound_pincount:%d\n",
+		pr_warn("head:%p order:%u compound_mapcount:%d subpages_mapcount:%d compound_pincount:%d\n",
 				head, compound_order(head),
-				folio_entire_mapcount(folio),
+				head_compound_mapcount(head),
+				head_subpages_mapcount(head),
 				head_compound_pincount(head));
 	}
 
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index bac2a366aada0..cbfe51091c391 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -39,12 +39,6 @@ void wait_for_stable_page(struct page *page)
 }
 EXPORT_SYMBOL_GPL(wait_for_stable_page);
 
-bool page_mapped(struct page *page)
-{
-	return folio_mapped(page_folio(page));
-}
-EXPORT_SYMBOL(page_mapped);
-
 void mark_page_accessed(struct page *page)
 {
 	folio_mark_accessed(page_folio(page));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index b26998d1845f8..7703169107c6c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2142,6 +2142,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
 		VM_BUG_ON_PAGE(!page_count(page), page);
 		page_ref_add(page, HPAGE_PMD_NR - 1);
+		atomic_add(HPAGE_PMD_NR, subpages_mapcount_ptr(page));
 
 		/*
 		 * Without "freeze", we'll simply split the PMD, propagating the
@@ -2225,33 +2226,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		pte_unmap(pte);
 	}
 
-	if (!pmd_migration) {
-		/*
-		 * Set PG_double_map before dropping compound_mapcount to avoid
-		 * false-negative page_mapped().
-		 */
-		if (compound_mapcount(page) > 1 &&
-		    !TestSetPageDoubleMap(page)) {
-			for (i = 0; i < HPAGE_PMD_NR; i++)
-				atomic_inc(&page[i]._mapcount);
-		}
-
-		lock_page_memcg(page);
-		if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
-			/* Last compound_mapcount is gone. */
-			__mod_lruvec_page_state(page, NR_ANON_THPS,
-						-HPAGE_PMD_NR);
-			if (TestClearPageDoubleMap(page)) {
-				/* No need in mapcount reference anymore */
-				for (i = 0; i < HPAGE_PMD_NR; i++)
-					atomic_dec(&page[i]._mapcount);
-			}
-		}
-		unlock_page_memcg(page);
-
-		/* Above is effectively page_remove_rmap(page, vma, true) */
-		munlock_vma_page(page, vma, true);
-	}
+	if (!pmd_migration)
+		page_remove_rmap(page, vma, true);
 
 	smp_wmb(); /* make pte visible before pmd */
 	pmd_populate(mm, pmd, pgtable);
@@ -2453,7 +2429,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
 			 (1L << PG_dirty) |
 			 LRU_GEN_MASK | LRU_REFS_MASK));
 
-	/* ->mapping in first tail page is compound_mapcount */
+	/* ->mapping in first and second tail page is replaced by other uses */
 	VM_BUG_ON_PAGE(tail > 2 && page_tail->mapping != TAIL_MAPPING,
 			page_tail);
 	page_tail->mapping = head->mapping;
@@ -2463,6 +2439,10 @@ static void __split_huge_page_tail(struct page *head, int tail,
 	 * page->private should not be set in tail pages with the exception
 	 * of swap cache pages that store the swp_entry_t in tail pages.
 	 * Fix up and warn once if private is unexpectedly set.
+	 *
+	 * What of 32-bit systems, on which head[1].compound_pincount overlays
+	 * head[1].private?  No problem: THP_SWAP is not enabled on 32-bit, and
+	 * compound_pincount must be 0 for folio_ref_freeze() to have succeeded.
 	 */
 	if (!folio_test_swapcache(page_folio(head))) {
 		VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 76ebefe02827e..4f1338d82aabf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1333,6 +1333,7 @@ static void __destroy_compound_gigantic_page(struct page *page,
 	struct page *p;
 
 	atomic_set(compound_mapcount_ptr(page), 0);
+	atomic_set(subpages_mapcount_ptr(page), 0);
 	atomic_set(compound_pincount_ptr(page), 0);
 
 	for (i = 1; i < nr_pages; i++) {
@@ -1852,6 +1853,7 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
 			set_compound_head(p, page);
 	}
 	atomic_set(compound_mapcount_ptr(page), -1);
+	atomic_set(subpages_mapcount_ptr(page), 0);
 	atomic_set(compound_pincount_ptr(page), 0);
 	return true;
 
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 9c111273bbf91..0d8f548d9d7ed 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1238,15 +1238,8 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 		/*
 		 * Check if the page has any GUP (or other external) pins.
 		 *
-		 * Here the check is racy it may see total_mapcount > refcount
-		 * in some cases.
-		 * For example, one process with one forked child process.
-		 * The parent has the PMD split due to MADV_DONTNEED, then
-		 * the child is trying unmap the whole PMD, but khugepaged
-		 * may be scanning the parent between the child has
-		 * PageDoubleMap flag cleared and dec the mapcount.  So
-		 * khugepaged may see total_mapcount > refcount.
-		 *
+		 * Here the check may be racy:
+		 * it may see total_mapcount > refcount in some cases?
 		 * But such case is ephemeral we could always retry collapse
 		 * later.  However it may report false positive if the page
 		 * has excessive GUP pins (i.e. 512).  Anyway the same check
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6e60657875d32..0705917ddf54f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -798,6 +798,7 @@ static void prep_compound_head(struct page *page, unsigned int order)
 	set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
 	set_compound_order(page, order);
 	atomic_set(compound_mapcount_ptr(page), -1);
+	atomic_set(subpages_mapcount_ptr(page), 0);
 	atomic_set(compound_pincount_ptr(page), 0);
 }
 
@@ -1324,11 +1325,19 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
 	}
 	switch (page - head_page) {
 	case 1:
-		/* the first tail page: ->mapping may be compound_mapcount() */
-		if (unlikely(compound_mapcount(page))) {
+		/* the first tail page: these may be in place of ->mapping */
+		if (unlikely(head_compound_mapcount(head_page))) {
 			bad_page(page, "nonzero compound_mapcount");
 			goto out;
 		}
+		if (unlikely(head_subpages_mapcount(head_page))) {
+			bad_page(page, "nonzero subpages_mapcount");
+			goto out;
+		}
+		if (unlikely(head_compound_pincount(head_page))) {
+			bad_page(page, "nonzero compound_pincount");
+			goto out;
+		}
 		break;
 	case 2:
 		/*
@@ -1431,10 +1440,8 @@ static __always_inline bool free_pages_prepare(struct page *page,
 
 		VM_BUG_ON_PAGE(compound && compound_order(page) != order, page);
 
-		if (compound) {
-			ClearPageDoubleMap(page);
+		if (compound)
 			ClearPageHasHWPoisoned(page);
-		}
 		for (i = 1; i < (1 << order); i++) {
 			if (compound)
 				bad += free_tail_pages_check(page, page + i);
@@ -6874,13 +6881,11 @@ static void __ref memmap_init_compound(struct page *head,
 		set_page_count(page, 0);
 
 		/*
-		 * The first tail page stores compound_mapcount_ptr() and
-		 * compound_order() and the second tail page stores
-		 * compound_pincount_ptr(). Call prep_compound_head() after
-		 * the first and second tail pages have been initialized to
-		 * not have the data overwritten.
+		 * The first tail page stores important compound page info.
+		 * Call prep_compound_head() after the first tail page has
+		 * been initialized, to not have the data overwritten.
 		 */
-		if (pfn == head_pfn + 2)
+		if (pfn == head_pfn + 1)
 			prep_compound_head(head, order);
 	}
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 3b2d18bbdc445..f43339ea49701 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1085,6 +1085,24 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 	return page_vma_mkclean_one(&pvmw);
 }
 
+/*
+ * When mapping a THP's first pmd, or unmapping its last pmd, if that THP
+ * also has pte mappings, then those must be discounted: in order to maintain
+ * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift,
+ * and to decide when an anon THP should be put on the deferred split queue.
+ */
+static int nr_subpages_unmapped(struct page *head, int nr_subpages)
+{
+	int nr = nr_subpages;
+	int i;
+
+	/* Discount those subpages mapped by pte */
+	for (i = 0; i < nr_subpages; i++)
+		if (atomic_read(&head[i]._mapcount) >= 0)
+			nr--;
+	return nr;
+}
+
 /**
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:	the page to move to our anon_vma
@@ -1194,6 +1212,7 @@ static void __page_check_anon_rmap(struct page *page,
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address, rmap_t flags)
 {
+	int nr, nr_pages;
 	bool compound = flags & RMAP_COMPOUND;
 	bool first;
 
@@ -1202,28 +1221,32 @@ void page_add_anon_rmap(struct page *page,
 	else
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-	if (compound) {
+	if (compound && PageTransHuge(page)) {
 		atomic_t *mapcount;
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
-		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 		mapcount = compound_mapcount_ptr(page);
 		first = atomic_inc_and_test(mapcount);
+
+		nr = nr_pages = thp_nr_pages(page);
+		if (first && head_subpages_mapcount(page))
+			nr = nr_subpages_unmapped(page, nr_pages);
 	} else {
+		nr = 1;
+		if (PageTransCompound(page)) {
+			struct page *head = compound_head(page);
+
+			atomic_inc(subpages_mapcount_ptr(head));
+			nr = !head_compound_mapcount(head);
+		}
 		first = atomic_inc_and_test(&page->_mapcount);
 	}
+
 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
 	VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
 
 	if (first) {
-		int nr = compound ? thp_nr_pages(page) : 1;
-		/*
-		 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
-		 * these counters are not modified in interrupt context, and
-		 * pte lock(a spinlock) is held, which implies preemption
-		 * disabled.
-		 */
 		if (compound)
-			__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
+			__mod_lruvec_page_state(page, NR_ANON_THPS, nr_pages);
 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
 	}
 
@@ -1265,8 +1288,6 @@ void page_add_new_anon_rmap(struct page *page,
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 		/* increment count (starts at -1) */
 		atomic_set(compound_mapcount_ptr(page), 0);
-		atomic_set(compound_pincount_ptr(page), 0);
-
 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
 	} else {
 		/* increment count (starts at -1) */
@@ -1287,29 +1308,19 @@ void page_add_new_anon_rmap(struct page *page,
 void page_add_file_rmap(struct page *page,
 	struct vm_area_struct *vma, bool compound)
 {
-	int i, nr = 0;
+	int nr = 0;
 
 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
 	if (compound && PageTransHuge(page)) {
-		int nr_pages = thp_nr_pages(page);
+		int nr_pages;
 
-		for (i = 0; i < nr_pages; i++) {
-			if (atomic_inc_and_test(&page[i]._mapcount))
-				nr++;
-		}
 		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
 			goto out;
 
-		/*
-		 * It is racy to ClearPageDoubleMap in page_remove_file_rmap();
-		 * but page lock is held by all page_add_file_rmap() compound
-		 * callers, and SetPageDoubleMap below warns if !PageLocked:
-		 * so here is a place that DoubleMap can be safely cleared.
-		 */
-		VM_WARN_ON_ONCE(!PageLocked(page));
-		if (nr == nr_pages && PageDoubleMap(page))
-			ClearPageDoubleMap(page);
+		nr = nr_pages = thp_nr_pages(page);
+		if (head_subpages_mapcount(page))
+			nr = nr_subpages_unmapped(page, nr_pages);
 
 		if (PageSwapBacked(page))
 			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
@@ -1318,11 +1329,15 @@ void page_add_file_rmap(struct page *page,
 			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
 						nr_pages);
 	} else {
-		if (PageTransCompound(page) && page_mapping(page)) {
-			VM_WARN_ON_ONCE(!PageLocked(page));
-			SetPageDoubleMap(compound_head(page));
+		bool pmd_mapped = false;
+
+		if (PageTransCompound(page)) {
+			struct page *head = compound_head(page);
+
+			atomic_inc(subpages_mapcount_ptr(head));
+			pmd_mapped = head_compound_mapcount(head);
 		}
-		if (atomic_inc_and_test(&page->_mapcount))
+		if (atomic_inc_and_test(&page->_mapcount) && !pmd_mapped)
 			nr++;
 	}
 out:
@@ -1335,7 +1350,7 @@ out:
 
 static void page_remove_file_rmap(struct page *page, bool compound)
 {
-	int i, nr = 0;
+	int nr = 0;
 
 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 
@@ -1348,14 +1363,15 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 
 	/* page still mapped by someone else? */
 	if (compound && PageTransHuge(page)) {
-		int nr_pages = thp_nr_pages(page);
+		int nr_pages;
 
-		for (i = 0; i < nr_pages; i++) {
-			if (atomic_add_negative(-1, &page[i]._mapcount))
-				nr++;
-		}
 		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
-			goto out;
+			return;
+
+		nr = nr_pages = thp_nr_pages(page);
+		if (head_subpages_mapcount(page))
+			nr = nr_subpages_unmapped(page, nr_pages);
+
 		if (PageSwapBacked(page))
 			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
 						-nr_pages);
@@ -1363,17 +1379,25 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
 						-nr_pages);
 	} else {
-		if (atomic_add_negative(-1, &page->_mapcount))
+		bool pmd_mapped = false;
+
+		if (PageTransCompound(page)) {
+			struct page *head = compound_head(page);
+
+			atomic_dec(subpages_mapcount_ptr(head));
+			pmd_mapped = head_compound_mapcount(head);
+		}
+		if (atomic_add_negative(-1, &page->_mapcount) && !pmd_mapped)
 			nr++;
 	}
-out:
+
 	if (nr)
 		__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
 }
 
 static void page_remove_anon_compound_rmap(struct page *page)
 {
-	int i, nr;
+	int nr, nr_pages;
 
 	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
 		return;
@@ -1385,27 +1409,19 @@ static void page_remove_anon_compound_rmap(struct page *page)
 	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
 		return;
 
-	__mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
+	nr = nr_pages = thp_nr_pages(page);
+	__mod_lruvec_page_state(page, NR_ANON_THPS, -nr);
 
-	if (TestClearPageDoubleMap(page)) {
-		/*
-		 * Subpages can be mapped with PTEs too. Check how many of
-		 * them are still mapped.
-		 */
-		for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
-			if (atomic_add_negative(-1, &page[i]._mapcount))
-				nr++;
-		}
+	if (head_subpages_mapcount(page)) {
+		nr = nr_subpages_unmapped(page, nr_pages);
 
 		/*
 		 * Queue the page for deferred split if at least one small
 		 * page of the compound page is unmapped, but at least one
 		 * small page is still mapped.
 		 */
-		if (nr && nr < thp_nr_pages(page))
+		if (nr && nr < nr_pages)
 			deferred_split_huge_page(page);
-	} else {
-		nr = thp_nr_pages(page);
 	}
 
 	if (nr)
@@ -1423,6 +1439,8 @@ static void page_remove_anon_compound_rmap(struct page *page)
 void page_remove_rmap(struct page *page,
 	struct vm_area_struct *vma, bool compound)
 {
+	bool pmd_mapped = false;
+
 	lock_page_memcg(page);
 
 	if (!PageAnon(page)) {
@@ -1435,15 +1453,17 @@ void page_remove_rmap(struct page *page,
 		goto out;
 	}
 
+	if (PageTransCompound(page)) {
+		struct page *head = compound_head(page);
+
+		atomic_dec(subpages_mapcount_ptr(head));
+		pmd_mapped = head_compound_mapcount(head);
+	}
+
 	/* page still mapped by someone else? */
-	if (!atomic_add_negative(-1, &page->_mapcount))
+	if (!atomic_add_negative(-1, &page->_mapcount) || pmd_mapped)
 		goto out;
 
-	/*
-	 * We use the irq-unsafe __{inc|mod}_zone_page_stat because
-	 * these counters are not modified in interrupt context, and
-	 * pte lock(a spinlock) is held, which implies preemption disabled.
-	 */
 	__dec_lruvec_page_state(page, NR_ANON_MAPPED);
 
 	if (PageTransCompound(page))
@@ -2569,8 +2589,8 @@ void hugepage_add_new_anon_rmap(struct page *page,
 			struct vm_area_struct *vma, unsigned long address)
 {
 	BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	/* increment count (starts at -1) */
 	atomic_set(compound_mapcount_ptr(page), 0);
-	atomic_set(compound_pincount_ptr(page), 0);
 	ClearHPageRestoreReserve(page);
 	__page_set_anon_rmap(page, vma, address, 1);
 }
diff --git a/mm/util.c b/mm/util.c
index 12984e76767eb..b56c92fb910fc 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -717,32 +717,6 @@ void *page_rmapping(struct page *page)
 	return folio_raw_mapping(page_folio(page));
 }
 
-/**
- * folio_mapped - Is this folio mapped into userspace?
- * @folio: The folio.
- *
- * Return: True if any page in this folio is referenced by user page tables.
- */
-bool folio_mapped(struct folio *folio)
-{
-	long i, nr;
-
-	if (!folio_test_large(folio))
-		return atomic_read(&folio->_mapcount) >= 0;
-	if (atomic_read(folio_mapcount_ptr(folio)) >= 0)
-		return true;
-	if (folio_test_hugetlb(folio))
-		return false;
-
-	nr = folio_nr_pages(folio);
-	for (i = 0; i < nr; i++) {
-		if (atomic_read(&folio_page(folio, i)->_mapcount) >= 0)
-			return true;
-	}
-	return false;
-}
-EXPORT_SYMBOL(folio_mapped);
-
 struct anon_vma *folio_anon_vma(struct folio *folio)
 {
 	unsigned long mapping = (unsigned long)folio->mapping;
@@ -783,59 +757,6 @@ struct address_space *folio_mapping(struct folio *folio)
 }
 EXPORT_SYMBOL(folio_mapping);
 
-/* Slow path of page_mapcount() for compound pages */
-int __page_mapcount(struct page *page)
-{
-	int ret;
-
-	ret = atomic_read(&page->_mapcount) + 1;
-	/*
-	 * For file THP page->_mapcount contains total number of mapping
-	 * of the page: no need to look into compound_mapcount.
-	 */
-	if (!PageAnon(page) && !PageHuge(page))
-		return ret;
-	page = compound_head(page);
-	ret += atomic_read(compound_mapcount_ptr(page)) + 1;
-	if (PageDoubleMap(page))
-		ret--;
-	return ret;
-}
-EXPORT_SYMBOL_GPL(__page_mapcount);
-
-/**
- * folio_mapcount() - Calculate the number of mappings of this folio.
- * @folio: The folio.
- *
- * A large folio tracks both how many times the entire folio is mapped,
- * and how many times each individual page in the folio is mapped.
- * This function calculates the total number of times the folio is
- * mapped.
- *
- * Return: The number of times this folio is mapped.
- */
-int folio_mapcount(struct folio *folio)
-{
-	int i, compound, nr, ret;
-
-	if (likely(!folio_test_large(folio)))
-		return atomic_read(&folio->_mapcount) + 1;
-
-	compound = folio_entire_mapcount(folio);
-	if (folio_test_hugetlb(folio))
-		return compound;
-	ret = compound;
-	nr = folio_nr_pages(folio);
-	for (i = 0; i < nr; i++)
-		ret += atomic_read(&folio_page(folio, i)->_mapcount) + 1;
-	/* File pages has compound_mapcount included in _mapcount */
-	if (!folio_test_anon(folio))
-		return ret - compound * nr;
-	if (folio_test_double_map(folio))
-		ret -= nr;
-	return ret;
-}
-
 /**
  * folio_copy - Copy the contents of one folio to another.
  * @dst: Folio to copy to.
-- 
GitLab


From 9bd3155ed83b723be719e522760f107229e2a61b Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 2 Nov 2022 18:53:45 -0700
Subject: [PATCH 3006/4122] mm,thp,rmap: lock_compound_mapcounts() on THP
 mapcounts

Fix the races in maintaining compound_mapcount, subpages_mapcount and
subpage _mapcount by using PG_locked in the first tail of any compound
page for a bit_spin_lock() on such modifications; skipping the usual
atomic operations on those fields in this case.

Bring page_remove_file_rmap() and page_remove_anon_compound_rmap() back
into page_remove_rmap() itself.  Rearrange page_add_anon_rmap() and
page_add_file_rmap() and page_remove_rmap() to follow the same "if
(compound) {lock} else if (PageCompound) {lock} else {atomic}" pattern
(with a PageTransHuge in the compound test, like before, to avoid BUG_ONs
and optimize away that block when THP is not configured).  Move all the
stats updates outside, after the bit_spin_locked section, so that it is
sure to be a leaf lock.

Add page_dup_compound_rmap() to manage compound locking versus atomics in
sync with the rest.  In particular, hugetlb pages are still using the
atomics: to avoid unnecessary interference there, and because they never
have subpage mappings; but this exception can easily be changed.
Conveniently, page_dup_compound_rmap() turns out to suit an anon THP's
__split_huge_pmd_locked() too.

bit_spin_lock() is not popular with PREEMPT_RT folks: but PREEMPT_RT
sensibly excludes TRANSPARENT_HUGEPAGE already, so its only exposure is to
the non-hugetlb non-THP pte-mapped compound pages (with large folios being
currently dependent on TRANSPARENT_HUGEPAGE).  There is never any scan of
subpages in this case; but we have chosen to use PageCompound tests rather
than PageTransCompound tests to gate the use of lock_compound_mapcounts(),
so that page_mapped() is correct on all compound pages, whether or not
TRANSPARENT_HUGEPAGE is enabled: could that be a problem for PREEMPT_RT,
when there is contention on the lock - under heavy concurrent forking for
example?  If so, then it can be turned into a sleeping lock (like
folio_lock()) when PREEMPT_RT.

A simple 100 X munmap(mmap(2GB, MAP_SHARED|MAP_POPULATE, tmpfs), 2GB) took
18 seconds on small pages, and used to take 1 second on huge pages, but
now takes 115 milliseconds on huge pages.  Mapping by pmds a second time
used to take 860ms and now takes 86ms; mapping by pmds after mapping by
ptes (when the scan is needed) used to take 870ms and now takes 495ms.
Mapping huge pages by ptes is largely unaffected but variable: between 5%
faster and 5% slower in what I've recorded.  Contention on the lock is
likely to behave worse than contention on the atomics behaved.

Link: https://lkml.kernel.org/r/1b42bd1a-8223-e827-602f-d466c2db7d3c@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/transhuge.rst |  16 +-
 include/linux/rmap.h           |  14 +-
 mm/huge_memory.c               |   3 +-
 mm/rmap.c                      | 333 +++++++++++++++++++--------------
 4 files changed, 204 insertions(+), 162 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index a560e0c01b16e..1e2a637cc6079 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -117,13 +117,15 @@ pages:
   - ->_refcount in tail pages is always zero: get_page_unless_zero() never
     succeeds on tail pages.
 
-  - map/unmap of the pages with PTE entry increment/decrement ->_mapcount
-    on relevant sub-page of the compound page.
-
-  - map/unmap of the whole compound page is accounted for in compound_mapcount
-    (stored in first tail page). For file huge pages, we also increment
-    ->_mapcount of all sub-pages in order to have race-free detection of
-    last unmap of subpages.
+  - map/unmap of PMD entry for the whole compound page increment/decrement
+    ->compound_mapcount, stored in the first tail page of the compound page.
+
+  - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount
+    on relevant sub-page of the compound page, and also increment/decrement
+    ->subpages_mapcount, stored in first tail page of the compound page.
+    In order to have race-free accounting of sub-pages mapped, changes to
+    sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are
+    are all locked by bit_spin_lock of PG_locked in the first tail ->flags.
 
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 1973649e8f93e..011a7530dc76d 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -204,16 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long address);
 
-static inline void __page_dup_rmap(struct page *page, bool compound)
-{
-	if (!compound && PageCompound(page))
-		atomic_inc(subpages_mapcount_ptr(compound_head(page)));
-	atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount);
-}
+void page_dup_compound_rmap(struct page *page, bool compound);
 
 static inline void page_dup_file_rmap(struct page *page, bool compound)
 {
-	__page_dup_rmap(page, compound);
+	if (PageCompound(page))
+		page_dup_compound_rmap(page, compound);
+	else
+		atomic_inc(&page->_mapcount);
 }
 
 /**
@@ -262,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound,
 	 * the page R/O into both processes.
 	 */
 dup:
-	__page_dup_rmap(page, compound);
+	page_dup_file_rmap(page, compound);
 	return 0;
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 7703169107c6c..114517e8cbfc0 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2142,7 +2142,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 
 		VM_BUG_ON_PAGE(!page_count(page), page);
 		page_ref_add(page, HPAGE_PMD_NR - 1);
-		atomic_add(HPAGE_PMD_NR, subpages_mapcount_ptr(page));
 
 		/*
 		 * Without "freeze", we'll simply split the PMD, propagating the
@@ -2222,7 +2221,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
 		if (!pmd_migration)
-			atomic_inc(&page[i]._mapcount);
+			page_dup_compound_rmap(page + i, false);
 		pte_unmap(pte);
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index f43339ea49701..512e53cae2ca1 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1085,11 +1085,66 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 	return page_vma_mkclean_one(&pvmw);
 }
 
+struct compound_mapcounts {
+	unsigned int compound_mapcount;
+	unsigned int subpages_mapcount;
+};
+
+/*
+ * lock_compound_mapcounts() first locks, then copies subpages_mapcount and
+ * compound_mapcount from head[1].compound_mapcount and subpages_mapcount,
+ * converting from struct page's internal representation to logical count
+ * (that is, adding 1 to compound_mapcount to hide its offset by -1).
+ */
+static void lock_compound_mapcounts(struct page *head,
+		struct compound_mapcounts *local)
+{
+	bit_spin_lock(PG_locked, &head[1].flags);
+	local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1;
+	local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head));
+}
+
+/*
+ * After caller has updated subpage._mapcount, local subpages_mapcount and
+ * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts
+ * and copies them back to the compound head[1] fields, and then unlocks.
+ */
+static void unlock_compound_mapcounts(struct page *head,
+		struct compound_mapcounts *local)
+{
+	atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1);
+	atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount);
+	bit_spin_unlock(PG_locked, &head[1].flags);
+}
+
+/*
+ * When acting on a compound page under lock_compound_mapcounts(), avoid the
+ * unnecessary overhead of an actual atomic operation on its subpage mapcount.
+ * Return true if this is the first increment or the last decrement
+ * (remembering that page->_mapcount -1 represents logical mapcount 0).
+ */
+static bool subpage_mapcount_inc(struct page *page)
+{
+	int orig_mapcount = atomic_read(&page->_mapcount);
+
+	atomic_set(&page->_mapcount, orig_mapcount + 1);
+	return orig_mapcount < 0;
+}
+
+static bool subpage_mapcount_dec(struct page *page)
+{
+	int orig_mapcount = atomic_read(&page->_mapcount);
+
+	atomic_set(&page->_mapcount, orig_mapcount - 1);
+	return orig_mapcount == 0;
+}
+
 /*
  * When mapping a THP's first pmd, or unmapping its last pmd, if that THP
  * also has pte mappings, then those must be discounted: in order to maintain
  * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift,
  * and to decide when an anon THP should be put on the deferred split queue.
+ * This function must be called between lock_ and unlock_compound_mapcounts().
  */
 static int nr_subpages_unmapped(struct page *head, int nr_subpages)
 {
@@ -1103,6 +1158,40 @@ static int nr_subpages_unmapped(struct page *head, int nr_subpages)
 	return nr;
 }
 
+/*
+ * page_dup_compound_rmap(), used when copying mm, or when splitting pmd,
+ * provides a simple example of using lock_ and unlock_compound_mapcounts().
+ */
+void page_dup_compound_rmap(struct page *page, bool compound)
+{
+	struct compound_mapcounts mapcounts;
+	struct page *head;
+
+	/*
+	 * Hugetlb pages could use lock_compound_mapcounts(), like THPs do;
+	 * but at present they are still being managed by atomic operations:
+	 * which are likely to be somewhat faster, so don't rush to convert
+	 * them over without evaluating the effect.
+	 *
+	 * Note that hugetlb does not call page_add_file_rmap():
+	 * here is where hugetlb shared page mapcount is raised.
+	 */
+	if (PageHuge(page)) {
+		atomic_inc(compound_mapcount_ptr(page));
+		return;
+	}
+
+	head = compound_head(page);
+	lock_compound_mapcounts(head, &mapcounts);
+	if (compound) {
+		mapcounts.compound_mapcount++;
+	} else {
+		mapcounts.subpages_mapcount++;
+		subpage_mapcount_inc(page);
+	}
+	unlock_compound_mapcounts(head, &mapcounts);
+}
+
 /**
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:	the page to move to our anon_vma
@@ -1212,7 +1301,8 @@ static void __page_check_anon_rmap(struct page *page,
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address, rmap_t flags)
 {
-	int nr, nr_pages;
+	struct compound_mapcounts mapcounts;
+	int nr = 0, nr_pmdmapped = 0;
 	bool compound = flags & RMAP_COMPOUND;
 	bool first;
 
@@ -1222,33 +1312,37 @@ void page_add_anon_rmap(struct page *page,
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	if (compound && PageTransHuge(page)) {
-		atomic_t *mapcount;
-		VM_BUG_ON_PAGE(!PageLocked(page), page);
-		mapcount = compound_mapcount_ptr(page);
-		first = atomic_inc_and_test(mapcount);
+		lock_compound_mapcounts(page, &mapcounts);
+		first = !mapcounts.compound_mapcount;
+		mapcounts.compound_mapcount++;
+		if (first) {
+			nr = nr_pmdmapped = thp_nr_pages(page);
+			if (mapcounts.subpages_mapcount)
+				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+		}
+		unlock_compound_mapcounts(page, &mapcounts);
 
-		nr = nr_pages = thp_nr_pages(page);
-		if (first && head_subpages_mapcount(page))
-			nr = nr_subpages_unmapped(page, nr_pages);
-	} else {
-		nr = 1;
-		if (PageTransCompound(page)) {
-			struct page *head = compound_head(page);
+	} else if (PageCompound(page)) {
+		struct page *head = compound_head(page);
 
-			atomic_inc(subpages_mapcount_ptr(head));
-			nr = !head_compound_mapcount(head);
-		}
+		lock_compound_mapcounts(head, &mapcounts);
+		mapcounts.subpages_mapcount++;
+		first = subpage_mapcount_inc(page);
+		nr = first && !mapcounts.compound_mapcount;
+		unlock_compound_mapcounts(head, &mapcounts);
+
+	} else {
 		first = atomic_inc_and_test(&page->_mapcount);
+		nr = first;
 	}
 
 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
 	VM_BUG_ON_PAGE(!first && PageAnonExclusive(page), page);
 
-	if (first) {
-		if (compound)
-			__mod_lruvec_page_state(page, NR_ANON_THPS, nr_pages);
+	if (nr_pmdmapped)
+		__mod_lruvec_page_state(page, NR_ANON_THPS, nr_pmdmapped);
+	if (nr)
 		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
-	}
 
 	if (unlikely(PageKsm(page)))
 		unlock_page_memcg(page);
@@ -1308,39 +1402,41 @@ void page_add_new_anon_rmap(struct page *page,
 void page_add_file_rmap(struct page *page,
 	struct vm_area_struct *vma, bool compound)
 {
-	int nr = 0;
+	struct compound_mapcounts mapcounts;
+	int nr = 0, nr_pmdmapped = 0;
+	bool first;
 
 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
+
 	if (compound && PageTransHuge(page)) {
-		int nr_pages;
+		lock_compound_mapcounts(page, &mapcounts);
+		first = !mapcounts.compound_mapcount;
+		mapcounts.compound_mapcount++;
+		if (first) {
+			nr = nr_pmdmapped = thp_nr_pages(page);
+			if (mapcounts.subpages_mapcount)
+				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+		}
+		unlock_compound_mapcounts(page, &mapcounts);
 
-		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
-			goto out;
+	} else if (PageCompound(page)) {
+		struct page *head = compound_head(page);
 
-		nr = nr_pages = thp_nr_pages(page);
-		if (head_subpages_mapcount(page))
-			nr = nr_subpages_unmapped(page, nr_pages);
+		lock_compound_mapcounts(head, &mapcounts);
+		mapcounts.subpages_mapcount++;
+		first = subpage_mapcount_inc(page);
+		nr = first && !mapcounts.compound_mapcount;
+		unlock_compound_mapcounts(head, &mapcounts);
 
-		if (PageSwapBacked(page))
-			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
-						nr_pages);
-		else
-			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
-						nr_pages);
 	} else {
-		bool pmd_mapped = false;
-
-		if (PageTransCompound(page)) {
-			struct page *head = compound_head(page);
-
-			atomic_inc(subpages_mapcount_ptr(head));
-			pmd_mapped = head_compound_mapcount(head);
-		}
-		if (atomic_inc_and_test(&page->_mapcount) && !pmd_mapped)
-			nr++;
+		first = atomic_inc_and_test(&page->_mapcount);
+		nr = first;
 	}
-out:
+
+	if (nr_pmdmapped)
+		__mod_lruvec_page_state(page, PageSwapBacked(page) ?
+			NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped);
 	if (nr)
 		__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);
 	unlock_page_memcg(page);
@@ -1348,137 +1444,84 @@ out:
 	mlock_vma_page(page, vma, compound);
 }
 
-static void page_remove_file_rmap(struct page *page, bool compound)
+/**
+ * page_remove_rmap - take down pte mapping from a page
+ * @page:	page to remove mapping from
+ * @vma:	the vm area from which the mapping is removed
+ * @compound:	uncharge the page as compound or small page
+ *
+ * The caller needs to hold the pte lock.
+ */
+void page_remove_rmap(struct page *page,
+	struct vm_area_struct *vma, bool compound)
 {
-	int nr = 0;
+	struct compound_mapcounts mapcounts;
+	int nr = 0, nr_pmdmapped = 0;
+	bool last;
 
 	VM_BUG_ON_PAGE(compound && !PageHead(page), page);
 
-	/* Hugepages are not counted in NR_FILE_MAPPED for now. */
+	/* Hugetlb pages are not counted in NR_*MAPPED */
 	if (unlikely(PageHuge(page))) {
 		/* hugetlb pages are always mapped with pmds */
 		atomic_dec(compound_mapcount_ptr(page));
 		return;
 	}
 
+	lock_page_memcg(page);
+
 	/* page still mapped by someone else? */
 	if (compound && PageTransHuge(page)) {
-		int nr_pages;
+		lock_compound_mapcounts(page, &mapcounts);
+		mapcounts.compound_mapcount--;
+		last = !mapcounts.compound_mapcount;
+		if (last) {
+			nr = nr_pmdmapped = thp_nr_pages(page);
+			if (mapcounts.subpages_mapcount)
+				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+		}
+		unlock_compound_mapcounts(page, &mapcounts);
 
-		if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
-			return;
+	} else if (PageCompound(page)) {
+		struct page *head = compound_head(page);
 
-		nr = nr_pages = thp_nr_pages(page);
-		if (head_subpages_mapcount(page))
-			nr = nr_subpages_unmapped(page, nr_pages);
+		lock_compound_mapcounts(head, &mapcounts);
+		mapcounts.subpages_mapcount--;
+		last = subpage_mapcount_dec(page);
+		nr = last && !mapcounts.compound_mapcount;
+		unlock_compound_mapcounts(head, &mapcounts);
 
-		if (PageSwapBacked(page))
-			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
-						-nr_pages);
-		else
-			__mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
-						-nr_pages);
 	} else {
-		bool pmd_mapped = false;
-
-		if (PageTransCompound(page)) {
-			struct page *head = compound_head(page);
-
-			atomic_dec(subpages_mapcount_ptr(head));
-			pmd_mapped = head_compound_mapcount(head);
-		}
-		if (atomic_add_negative(-1, &page->_mapcount) && !pmd_mapped)
-			nr++;
+		last = atomic_add_negative(-1, &page->_mapcount);
+		nr = last;
 	}
 
-	if (nr)
-		__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr);
-}
-
-static void page_remove_anon_compound_rmap(struct page *page)
-{
-	int nr, nr_pages;
-
-	if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
-		return;
-
-	/* Hugepages are not counted in NR_ANON_PAGES for now. */
-	if (unlikely(PageHuge(page)))
-		return;
-
-	if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-		return;
-
-	nr = nr_pages = thp_nr_pages(page);
-	__mod_lruvec_page_state(page, NR_ANON_THPS, -nr);
-
-	if (head_subpages_mapcount(page)) {
-		nr = nr_subpages_unmapped(page, nr_pages);
-
+	if (nr_pmdmapped) {
+		__mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_THPS :
+				(PageSwapBacked(page) ? NR_SHMEM_PMDMAPPED :
+				NR_FILE_PMDMAPPED), -nr_pmdmapped);
+	}
+	if (nr) {
+		__mod_lruvec_page_state(page, PageAnon(page) ? NR_ANON_MAPPED :
+				NR_FILE_MAPPED, -nr);
 		/*
-		 * Queue the page for deferred split if at least one small
+		 * Queue anon THP for deferred split if at least one small
 		 * page of the compound page is unmapped, but at least one
 		 * small page is still mapped.
 		 */
-		if (nr && nr < nr_pages)
-			deferred_split_huge_page(page);
-	}
-
-	if (nr)
-		__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);
-}
-
-/**
- * page_remove_rmap - take down pte mapping from a page
- * @page:	page to remove mapping from
- * @vma:	the vm area from which the mapping is removed
- * @compound:	uncharge the page as compound or small page
- *
- * The caller needs to hold the pte lock.
- */
-void page_remove_rmap(struct page *page,
-	struct vm_area_struct *vma, bool compound)
-{
-	bool pmd_mapped = false;
-
-	lock_page_memcg(page);
-
-	if (!PageAnon(page)) {
-		page_remove_file_rmap(page, compound);
-		goto out;
+		if (PageTransCompound(page) && PageAnon(page))
+			if (!compound || nr < nr_pmdmapped)
+				deferred_split_huge_page(compound_head(page));
 	}
 
-	if (compound) {
-		page_remove_anon_compound_rmap(page);
-		goto out;
-	}
-
-	if (PageTransCompound(page)) {
-		struct page *head = compound_head(page);
-
-		atomic_dec(subpages_mapcount_ptr(head));
-		pmd_mapped = head_compound_mapcount(head);
-	}
-
-	/* page still mapped by someone else? */
-	if (!atomic_add_negative(-1, &page->_mapcount) || pmd_mapped)
-		goto out;
-
-	__dec_lruvec_page_state(page, NR_ANON_MAPPED);
-
-	if (PageTransCompound(page))
-		deferred_split_huge_page(compound_head(page));
-
 	/*
-	 * It would be tidy to reset the PageAnon mapping here,
+	 * It would be tidy to reset PageAnon mapping when fully unmapped,
 	 * but that might overwrite a racing page_add_anon_rmap
 	 * which increments mapcount after us but sets mapping
-	 * before us: so leave the reset to free_unref_page,
+	 * before us: so leave the reset to free_pages_prepare,
 	 * and remember that it's only reliable while mapped.
-	 * Leaving it set also helps swapoff to reinstate ptes
-	 * faster for those pages still in swapcache.
 	 */
-out:
+
 	unlock_page_memcg(page);
 
 	munlock_vma_page(page, vma, compound);
-- 
GitLab


From d8dd5e979d09c7463618853fb4aedd88e3efc8ae Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Wed, 9 Nov 2022 18:18:49 -0800
Subject: [PATCH 3007/4122] mm,thp,rmap: handle the normal !PageCompound case
 first

Commit ("mm,thp,rmap: lock_compound_mapcounts() on THP mapcounts")
propagated the "if (compound) {lock} else if (PageCompound) {lock} else
{atomic}" pattern throughout; but Linus hated the way that gives primacy
to the uncommon case: switch to "if (!PageCompound) {atomic} else if
(compound) {lock} else {lock}" throughout.  Linus has a bigger idea for
how to improve it all, but here just make that rearrangement.

Link: https://lkml.kernel.org/r/fca2f694-2098-b0ef-d4e-f1d8b94d318c@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/rmap.c | 54 +++++++++++++++++++++++++++---------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 512e53cae2ca1..4833d28c5e1a0 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1311,7 +1311,11 @@ void page_add_anon_rmap(struct page *page,
 	else
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-	if (compound && PageTransHuge(page)) {
+	if (likely(!PageCompound(page))) {
+		first = atomic_inc_and_test(&page->_mapcount);
+		nr = first;
+
+	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		first = !mapcounts.compound_mapcount;
 		mapcounts.compound_mapcount++;
@@ -1321,8 +1325,7 @@ void page_add_anon_rmap(struct page *page,
 				nr = nr_subpages_unmapped(page, nr_pmdmapped);
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-
-	} else if (PageCompound(page)) {
+	} else {
 		struct page *head = compound_head(page);
 
 		lock_compound_mapcounts(head, &mapcounts);
@@ -1330,10 +1333,6 @@ void page_add_anon_rmap(struct page *page,
 		first = subpage_mapcount_inc(page);
 		nr = first && !mapcounts.compound_mapcount;
 		unlock_compound_mapcounts(head, &mapcounts);
-
-	} else {
-		first = atomic_inc_and_test(&page->_mapcount);
-		nr = first;
 	}
 
 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
@@ -1373,20 +1372,23 @@ void page_add_anon_rmap(struct page *page,
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	const bool compound = PageCompound(page);
-	int nr = compound ? thp_nr_pages(page) : 1;
+	int nr;
 
 	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
 	__SetPageSwapBacked(page);
-	if (compound) {
+
+	if (likely(!PageCompound(page))) {
+		/* increment count (starts at -1) */
+		atomic_set(&page->_mapcount, 0);
+		nr = 1;
+	} else {
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 		/* increment count (starts at -1) */
 		atomic_set(compound_mapcount_ptr(page), 0);
+		nr = thp_nr_pages(page);
 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
-	} else {
-		/* increment count (starts at -1) */
-		atomic_set(&page->_mapcount, 0);
 	}
+
 	__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
 	__page_set_anon_rmap(page, vma, address, 1);
 }
@@ -1409,7 +1411,11 @@ void page_add_file_rmap(struct page *page,
 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
 
-	if (compound && PageTransHuge(page)) {
+	if (likely(!PageCompound(page))) {
+		first = atomic_inc_and_test(&page->_mapcount);
+		nr = first;
+
+	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		first = !mapcounts.compound_mapcount;
 		mapcounts.compound_mapcount++;
@@ -1419,8 +1425,7 @@ void page_add_file_rmap(struct page *page,
 				nr = nr_subpages_unmapped(page, nr_pmdmapped);
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-
-	} else if (PageCompound(page)) {
+	} else {
 		struct page *head = compound_head(page);
 
 		lock_compound_mapcounts(head, &mapcounts);
@@ -1428,10 +1433,6 @@ void page_add_file_rmap(struct page *page,
 		first = subpage_mapcount_inc(page);
 		nr = first && !mapcounts.compound_mapcount;
 		unlock_compound_mapcounts(head, &mapcounts);
-
-	} else {
-		first = atomic_inc_and_test(&page->_mapcount);
-		nr = first;
 	}
 
 	if (nr_pmdmapped)
@@ -1471,7 +1472,11 @@ void page_remove_rmap(struct page *page,
 	lock_page_memcg(page);
 
 	/* page still mapped by someone else? */
-	if (compound && PageTransHuge(page)) {
+	if (likely(!PageCompound(page))) {
+		last = atomic_add_negative(-1, &page->_mapcount);
+		nr = last;
+
+	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		mapcounts.compound_mapcount--;
 		last = !mapcounts.compound_mapcount;
@@ -1481,8 +1486,7 @@ void page_remove_rmap(struct page *page,
 				nr = nr_subpages_unmapped(page, nr_pmdmapped);
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-
-	} else if (PageCompound(page)) {
+	} else {
 		struct page *head = compound_head(page);
 
 		lock_compound_mapcounts(head, &mapcounts);
@@ -1490,10 +1494,6 @@ void page_remove_rmap(struct page *page,
 		last = subpage_mapcount_dec(page);
 		nr = last && !mapcounts.compound_mapcount;
 		unlock_compound_mapcounts(head, &mapcounts);
-
-	} else {
-		last = atomic_add_negative(-1, &page->_mapcount);
-		nr = last;
 	}
 
 	if (nr_pmdmapped) {
-- 
GitLab


From d7ec8f421ade2817983963a106b0085cc478c17b Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Mon, 7 Nov 2022 16:50:01 +0000
Subject: [PATCH 3008/4122] selftests/damon: test non-context inputs to
 rm_contexts file

There was a bug[1] that triggered by writing non-context DAMON debugfs
file names to the 'rm_contexts' DAMON debugfs file.  Add a selftest for
the bug to avoid it happen again.

[1] https://lore.kernel.org/damon/000000000000ede3ac05ec4abf8e@google.com/

Link: https://lkml.kernel.org/r/20221107165001.5717-3-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/Makefile        |  1 +
 .../damon/debugfs_rm_non_contexts.sh          | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)
 create mode 100644 tools/testing/selftests/damon/debugfs_rm_non_contexts.sh

diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index af490acc53485..838a8e49f77b9 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -7,6 +7,7 @@ TEST_FILES = _chk_dependency.sh _debugfs_common.sh
 TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
 TEST_PROGS += debugfs_duplicate_context_creation.sh
+TEST_PROGS += debugfs_rm_non_contexts.sh
 TEST_PROGS += sysfs.sh
 TEST_PROGS += reclaim.sh lru_sort.sh
 
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
new file mode 100644
index 0000000000000..48b7af6b022cb
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test putting non-ctx files/dirs to rm_contexts file
+# ===================================================
+
+dmesg -C
+
+for file in "$DBGFS/"*
+do
+	echo "$(basename "$f")" > "$DBGFS/rm_contexts"
+	if dmesg | grep -q BUG
+	then
+		dmesg
+		exit 1
+	fi
+done
-- 
GitLab


From 11aad2631bf74b3c811dee76154702aab855a323 Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Mon, 7 Nov 2022 15:39:22 +0000
Subject: [PATCH 3009/4122] mm/hugetlb_vmemmap: remap head page to newly
 allocated page

Today with `hugetlb_free_vmemmap=on` the struct page memory that is freed
back to page allocator is as following: for a 2M hugetlb page it will reuse
the first 4K vmemmap page to remap the remaining 7 vmemmap pages, and for a
1G hugetlb it will remap the remaining 4095 vmemmap pages. Essentially,
that means that it breaks the first 4K of a potentially contiguous chunk of
memory of 32K (for 2M hugetlb pages) or 16M (for 1G hugetlb pages). For
this reason the memory that it's free back to page allocator cannot be used
for hugetlb to allocate huge pages of the same size, but rather only of a
smaller huge page size:

Trying to assign a 64G node to hugetlb (on a 128G 2node guest, each node
having 64G):

* Before allocation:
Free pages count per migrate type at order       0      1      2      3
4      5      6      7      8      9     10
...
Node    0, zone   Normal, type      Movable    340    100     32     15
1      2      0      0      0      1  15558

$ echo 32768 > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
$ cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
 31987

* After:

Node    0, zone   Normal, type      Movable  30893  32006  31515      7
0      0      0      0      0      0      0

Notice how the memory freed back are put back into 4K / 8K / 16K page
pools. And it allocates a total of 31987 pages (63974M).

To fix this behaviour rather than remapping second vmemmap page (thus
breaking the contiguous block of memory backing the struct pages)
repopulate the first vmemmap page with a new one. We allocate and copy
from the currently mapped vmemmap page, and then remap it later on.
The same algorithm works if there's a pre initialized walk::reuse_page
and the head page doesn't need to be skipped and instead we remap it
when the @addr being changed is the @reuse_addr.

The new head page is allocated in vmemmap_remap_free() given that on
restore there's no need for functional change. Note that, because right
now one hugepage is remapped at a time, thus only one free 4K page at a
time is needed to remap the head page. Should it fail to allocate said
new page, it reuses the one that's already mapped just like before. As a
result, for every 64G of contiguous hugepages it can give back 1G more
of contiguous memory per 64G, while needing in total 128M new 4K pages
(for 2M hugetlb) or 256k (for 1G hugetlb).

After the changes, try to assign a 64G node to hugetlb (on a 128G 2node
guest, each node with 64G):

* Before allocation
Free pages count per migrate type at order       0      1      2      3
4      5      6      7      8      9     10
...
Node    0, zone   Normal, type      Movable      1      1      1      0
0      1      0      0      1      1  15564

$ echo 32768  > /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
$ cat /sys/devices/system/node/node0/hugepages/hugepages-2048kB/nr_hugepages
32394

* After:

Node    0, zone   Normal, type      Movable      0     50     97    108
96     81     70     46     18      0      0

In the example above, 407 more hugeltb 2M pages are allocated i.e. 814M out
of the 32394 (64788M) allocated. So the memory freed back is indeed being
used back in hugetlb and there's no massive order-0..order-2 pages
accumulated unused.

[joao.m.martins@oracle.com: v3]
  Link: https://lkml.kernel.org/r/20221109200623.96867-1-joao.m.martins@oracle.com
[joao.m.martins@oracle.com: add smp_wmb() to ensure page contents are visible prior to PTE write]
  Link: https://lkml.kernel.org/r/20221110121214.6297-1-joao.m.martins@oracle.com
Link: https://lkml.kernel.org/r/20221107153922.77094-1-joao.m.martins@oracle.com
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb_vmemmap.c | 41 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 34 insertions(+), 7 deletions(-)

diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index 7898c2c75e351..45e93a545dd7e 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -203,12 +203,7 @@ static int vmemmap_remap_range(unsigned long start, unsigned long end,
 			return ret;
 	} while (pgd++, addr = next, addr != end);
 
-	/*
-	 * We only change the mapping of the vmemmap virtual address range
-	 * [@start + PAGE_SIZE, end), so we only need to flush the TLB which
-	 * belongs to the range.
-	 */
-	flush_tlb_kernel_range(start + PAGE_SIZE, end);
+	flush_tlb_kernel_range(start, end);
 
 	return 0;
 }
@@ -244,9 +239,23 @@ static void vmemmap_remap_pte(pte_t *pte, unsigned long addr,
 	 * to the tail pages.
 	 */
 	pgprot_t pgprot = PAGE_KERNEL_RO;
-	pte_t entry = mk_pte(walk->reuse_page, pgprot);
 	struct page *page = pte_page(*pte);
+	pte_t entry;
 
+	/* Remapping the head page requires r/w */
+	if (unlikely(addr == walk->reuse_addr)) {
+		pgprot = PAGE_KERNEL;
+		list_del(&walk->reuse_page->lru);
+
+		/*
+		 * Makes sure that preceding stores to the page contents from
+		 * vmemmap_remap_free() become visible before the set_pte_at()
+		 * write.
+		 */
+		smp_wmb();
+	}
+
+	entry = mk_pte(walk->reuse_page, pgprot);
 	list_add_tail(&page->lru, walk->vmemmap_pages);
 	set_pte_at(&init_mm, addr, pte, entry);
 }
@@ -315,6 +324,24 @@ static int vmemmap_remap_free(unsigned long start, unsigned long end,
 		.reuse_addr	= reuse,
 		.vmemmap_pages	= &vmemmap_pages,
 	};
+	int nid = page_to_nid((struct page *)start);
+	gfp_t gfp_mask = GFP_KERNEL | __GFP_THISNODE | __GFP_NORETRY |
+			__GFP_NOWARN;
+
+	/*
+	 * Allocate a new head vmemmap page to avoid breaking a contiguous
+	 * block of struct page memory when freeing it back to page allocator
+	 * in free_vmemmap_page_list(). This will allow the likely contiguous
+	 * struct page backing memory to be kept contiguous and allowing for
+	 * more allocations of hugepages. Fallback to the currently
+	 * mapped head page in case should it fail to allocate.
+	 */
+	walk.reuse_page = alloc_pages_node(nid, gfp_mask, 0);
+	if (walk.reuse_page) {
+		copy_page(page_to_virt(walk.reuse_page),
+			  (void *)walk.reuse_addr);
+		list_add(&walk.reuse_page->lru, &vmemmap_pages);
+	}
 
 	/*
 	 * In order to make remapping routine most efficient for the huge pages,
-- 
GitLab


From be5ef2d9b006bbd93b1a03e1da2dbd19fb0b9f14 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Nov 2022 01:42:04 -0800
Subject: [PATCH 3010/4122] mm,thp,rmap: subpages_mapcount of PTE-mapped
 subpages

Patch series "mm,thp,rmap: rework the use of subpages_mapcount", v2.


This patch (of 3):

Following suggestion from Linus, instead of counting every PTE map of a
compound page in subpages_mapcount, just count how many of its subpages
are PTE-mapped: this yields the exact number needed for NR_ANON_MAPPED and
NR_FILE_MAPPED stats, without any need for a locked scan of subpages; and
requires updating the count less often.

This does then revert total_mapcount() and folio_mapcount() to needing a
scan of subpages; but they are inherently racy, and need no locking, so
Linus is right that the scans are much better done there.  Plus (unlike in
6.1 and previous) subpages_mapcount lets us avoid the scan in the common
case of no PTE maps.  And page_mapped() and folio_mapped() remain scanless
and just as efficient with the new meaning of subpages_mapcount: those are
the functions which I most wanted to remove the scan from.

The updated page_dup_compound_rmap() is no longer suitable for use by anon
THP's __split_huge_pmd_locked(); but page_add_anon_rmap() can be used for
that, so long as its VM_BUG_ON_PAGE(!PageLocked) is deleted.

Evidence is that this way goes slightly faster than the previous
implementation for most cases; but significantly faster in the (now
scanless) pmds after ptes case, which started out at 870ms and was brought
down to 495ms by the previous series, now takes around 105ms.

Link: https://lkml.kernel.org/r/a5849eca-22f1-3517-bf29-95d982242742@google.com
Link: https://lkml.kernel.org/r/eec17e16-4e1-7c59-f1bc-5bca90dac919@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/transhuge.rst |   3 +-
 include/linux/mm.h             |  52 ++++++-----
 include/linux/rmap.h           |   9 +-
 mm/huge_memory.c               |   2 +-
 mm/rmap.c                      | 160 ++++++++++++++-------------------
 5 files changed, 107 insertions(+), 119 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index 1e2a637cc6079..af4c9d70321d0 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -122,7 +122,8 @@ pages:
 
   - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount
     on relevant sub-page of the compound page, and also increment/decrement
-    ->subpages_mapcount, stored in first tail page of the compound page.
+    ->subpages_mapcount, stored in first tail page of the compound page, when
+    _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE.
     In order to have race-free accounting of sub-pages mapped, changes to
     sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are
     are all locked by bit_spin_lock of PG_locked in the first tail ->flags.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a904c2d60f123..84fb91f6f56eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -837,7 +837,7 @@ static inline int head_compound_mapcount(struct page *head)
 }
 
 /*
- * Sum of mapcounts of sub-pages, does not include compound mapcount.
+ * Number of sub-pages mapped by PTE, does not include compound mapcount.
  * Must be called only on head of compound page.
  */
 static inline int head_subpages_mapcount(struct page *head)
@@ -873,23 +873,7 @@ static inline int page_mapcount(struct page *page)
 	return head_compound_mapcount(page) + mapcount;
 }
 
-static inline int total_mapcount(struct page *page)
-{
-	if (likely(!PageCompound(page)))
-		return atomic_read(&page->_mapcount) + 1;
-	page = compound_head(page);
-	return head_compound_mapcount(page) + head_subpages_mapcount(page);
-}
-
-/*
- * Return true if this page is mapped into pagetables.
- * For compound page it returns true if any subpage of compound page is mapped,
- * even if this particular subpage is not itself mapped by any PTE or PMD.
- */
-static inline bool page_mapped(struct page *page)
-{
-	return total_mapcount(page) > 0;
-}
+int total_compound_mapcount(struct page *head);
 
 /**
  * folio_mapcount() - Calculate the number of mappings of this folio.
@@ -906,8 +890,20 @@ static inline int folio_mapcount(struct folio *folio)
 {
 	if (likely(!folio_test_large(folio)))
 		return atomic_read(&folio->_mapcount) + 1;
-	return atomic_read(folio_mapcount_ptr(folio)) + 1 +
-		atomic_read(folio_subpages_mapcount_ptr(folio));
+	return total_compound_mapcount(&folio->page);
+}
+
+static inline int total_mapcount(struct page *page)
+{
+	if (likely(!PageCompound(page)))
+		return atomic_read(&page->_mapcount) + 1;
+	return total_compound_mapcount(compound_head(page));
+}
+
+static inline bool folio_large_is_mapped(struct folio *folio)
+{
+	return atomic_read(folio_mapcount_ptr(folio)) +
+		atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0;
 }
 
 /**
@@ -918,7 +914,21 @@ static inline int folio_mapcount(struct folio *folio)
  */
 static inline bool folio_mapped(struct folio *folio)
 {
-	return folio_mapcount(folio) > 0;
+	if (likely(!folio_test_large(folio)))
+		return atomic_read(&folio->_mapcount) >= 0;
+	return folio_large_is_mapped(folio);
+}
+
+/*
+ * Return true if this page is mapped into pagetables.
+ * For compound page it returns true if any sub-page of compound page is mapped,
+ * even if this particular sub-page is not itself mapped by any PTE or PMD.
+ */
+static inline bool page_mapped(struct page *page)
+{
+	if (likely(!PageCompound(page)))
+		return atomic_read(&page->_mapcount) >= 0;
+	return folio_large_is_mapped(page_folio(page));
 }
 
 static inline struct page *virt_to_head_page(const void *x)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 011a7530dc76d..5dadb9a3e0107 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -204,14 +204,15 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long address);
 
-void page_dup_compound_rmap(struct page *page, bool compound);
+void page_dup_compound_rmap(struct page *page);
 
 static inline void page_dup_file_rmap(struct page *page, bool compound)
 {
-	if (PageCompound(page))
-		page_dup_compound_rmap(page, compound);
-	else
+	/* Is page being mapped by PTE? */
+	if (likely(!compound))
 		atomic_inc(&page->_mapcount);
+	else
+		page_dup_compound_rmap(page);
 }
 
 /**
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 114517e8cbfc0..681253adf5294 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2221,7 +2221,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
 		if (!pmd_migration)
-			page_dup_compound_rmap(page + i, false);
+			page_add_anon_rmap(page + i, vma, addr, false);
 		pte_unmap(pte);
 	}
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 4833d28c5e1a0..e813785da613f 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1117,55 +1117,36 @@ static void unlock_compound_mapcounts(struct page *head,
 	bit_spin_unlock(PG_locked, &head[1].flags);
 }
 
-/*
- * When acting on a compound page under lock_compound_mapcounts(), avoid the
- * unnecessary overhead of an actual atomic operation on its subpage mapcount.
- * Return true if this is the first increment or the last decrement
- * (remembering that page->_mapcount -1 represents logical mapcount 0).
- */
-static bool subpage_mapcount_inc(struct page *page)
-{
-	int orig_mapcount = atomic_read(&page->_mapcount);
-
-	atomic_set(&page->_mapcount, orig_mapcount + 1);
-	return orig_mapcount < 0;
-}
-
-static bool subpage_mapcount_dec(struct page *page)
-{
-	int orig_mapcount = atomic_read(&page->_mapcount);
-
-	atomic_set(&page->_mapcount, orig_mapcount - 1);
-	return orig_mapcount == 0;
-}
-
-/*
- * When mapping a THP's first pmd, or unmapping its last pmd, if that THP
- * also has pte mappings, then those must be discounted: in order to maintain
- * NR_ANON_MAPPED and NR_FILE_MAPPED statistics exactly, without any drift,
- * and to decide when an anon THP should be put on the deferred split queue.
- * This function must be called between lock_ and unlock_compound_mapcounts().
- */
-static int nr_subpages_unmapped(struct page *head, int nr_subpages)
+int total_compound_mapcount(struct page *head)
 {
-	int nr = nr_subpages;
+	int mapcount = head_compound_mapcount(head);
+	int nr_subpages;
 	int i;
 
-	/* Discount those subpages mapped by pte */
+	/* In the common case, avoid the loop when no subpages mapped by PTE */
+	if (head_subpages_mapcount(head) == 0)
+		return mapcount;
+	/*
+	 * Add all the PTE mappings of those subpages mapped by PTE.
+	 * Limit the loop, knowing that only subpages_mapcount are mapped?
+	 * Perhaps: given all the raciness, that may be a good or a bad idea.
+	 */
+	nr_subpages = thp_nr_pages(head);
 	for (i = 0; i < nr_subpages; i++)
-		if (atomic_read(&head[i]._mapcount) >= 0)
-			nr--;
-	return nr;
+		mapcount += atomic_read(&head[i]._mapcount);
+
+	/* But each of those _mapcounts was based on -1 */
+	mapcount += nr_subpages;
+	return mapcount;
 }
 
 /*
- * page_dup_compound_rmap(), used when copying mm, or when splitting pmd,
+ * page_dup_compound_rmap(), used when copying mm,
  * provides a simple example of using lock_ and unlock_compound_mapcounts().
  */
-void page_dup_compound_rmap(struct page *page, bool compound)
+void page_dup_compound_rmap(struct page *head)
 {
 	struct compound_mapcounts mapcounts;
-	struct page *head;
 
 	/*
 	 * Hugetlb pages could use lock_compound_mapcounts(), like THPs do;
@@ -1176,20 +1157,15 @@ void page_dup_compound_rmap(struct page *page, bool compound)
 	 * Note that hugetlb does not call page_add_file_rmap():
 	 * here is where hugetlb shared page mapcount is raised.
 	 */
-	if (PageHuge(page)) {
-		atomic_inc(compound_mapcount_ptr(page));
-		return;
-	}
+	if (PageHuge(head)) {
+		atomic_inc(compound_mapcount_ptr(head));
+	} else if (PageTransHuge(head)) {
+		/* That test is redundant: it's for safety or to optimize out */
 
-	head = compound_head(page);
-	lock_compound_mapcounts(head, &mapcounts);
-	if (compound) {
+		lock_compound_mapcounts(head, &mapcounts);
 		mapcounts.compound_mapcount++;
-	} else {
-		mapcounts.subpages_mapcount++;
-		subpage_mapcount_inc(page);
+		unlock_compound_mapcounts(head, &mapcounts);
 	}
-	unlock_compound_mapcounts(head, &mapcounts);
 }
 
 /**
@@ -1304,35 +1280,34 @@ void page_add_anon_rmap(struct page *page,
 	struct compound_mapcounts mapcounts;
 	int nr = 0, nr_pmdmapped = 0;
 	bool compound = flags & RMAP_COMPOUND;
-	bool first;
+	bool first = true;
 
 	if (unlikely(PageKsm(page)))
 		lock_page_memcg(page);
-	else
-		VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-	if (likely(!PageCompound(page))) {
+	/* Is page being mapped by PTE? Is this its first map to be added? */
+	if (likely(!compound)) {
 		first = atomic_inc_and_test(&page->_mapcount);
 		nr = first;
+		if (first && PageCompound(page)) {
+			struct page *head = compound_head(page);
+
+			lock_compound_mapcounts(head, &mapcounts);
+			mapcounts.subpages_mapcount++;
+			nr = !mapcounts.compound_mapcount;
+			unlock_compound_mapcounts(head, &mapcounts);
+		}
+	} else if (PageTransHuge(page)) {
+		/* That test is redundant: it's for safety or to optimize out */
 
-	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		first = !mapcounts.compound_mapcount;
 		mapcounts.compound_mapcount++;
 		if (first) {
-			nr = nr_pmdmapped = thp_nr_pages(page);
-			if (mapcounts.subpages_mapcount)
-				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+			nr_pmdmapped = thp_nr_pages(page);
+			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-	} else {
-		struct page *head = compound_head(page);
-
-		lock_compound_mapcounts(head, &mapcounts);
-		mapcounts.subpages_mapcount++;
-		first = subpage_mapcount_inc(page);
-		nr = first && !mapcounts.compound_mapcount;
-		unlock_compound_mapcounts(head, &mapcounts);
 	}
 
 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
@@ -1411,28 +1386,29 @@ void page_add_file_rmap(struct page *page,
 	VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
 	lock_page_memcg(page);
 
-	if (likely(!PageCompound(page))) {
+	/* Is page being mapped by PTE? Is this its first map to be added? */
+	if (likely(!compound)) {
 		first = atomic_inc_and_test(&page->_mapcount);
 		nr = first;
+		if (first && PageCompound(page)) {
+			struct page *head = compound_head(page);
+
+			lock_compound_mapcounts(head, &mapcounts);
+			mapcounts.subpages_mapcount++;
+			nr = !mapcounts.compound_mapcount;
+			unlock_compound_mapcounts(head, &mapcounts);
+		}
+	} else if (PageTransHuge(page)) {
+		/* That test is redundant: it's for safety or to optimize out */
 
-	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		first = !mapcounts.compound_mapcount;
 		mapcounts.compound_mapcount++;
 		if (first) {
-			nr = nr_pmdmapped = thp_nr_pages(page);
-			if (mapcounts.subpages_mapcount)
-				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+			nr_pmdmapped = thp_nr_pages(page);
+			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-	} else {
-		struct page *head = compound_head(page);
-
-		lock_compound_mapcounts(head, &mapcounts);
-		mapcounts.subpages_mapcount++;
-		first = subpage_mapcount_inc(page);
-		nr = first && !mapcounts.compound_mapcount;
-		unlock_compound_mapcounts(head, &mapcounts);
 	}
 
 	if (nr_pmdmapped)
@@ -1471,29 +1447,29 @@ void page_remove_rmap(struct page *page,
 
 	lock_page_memcg(page);
 
-	/* page still mapped by someone else? */
-	if (likely(!PageCompound(page))) {
+	/* Is page being unmapped by PTE? Is this its last map to be removed? */
+	if (likely(!compound)) {
 		last = atomic_add_negative(-1, &page->_mapcount);
 		nr = last;
+		if (last && PageCompound(page)) {
+			struct page *head = compound_head(page);
+
+			lock_compound_mapcounts(head, &mapcounts);
+			mapcounts.subpages_mapcount--;
+			nr = !mapcounts.compound_mapcount;
+			unlock_compound_mapcounts(head, &mapcounts);
+		}
+	} else if (PageTransHuge(page)) {
+		/* That test is redundant: it's for safety or to optimize out */
 
-	} else if (compound && PageTransHuge(page)) {
 		lock_compound_mapcounts(page, &mapcounts);
 		mapcounts.compound_mapcount--;
 		last = !mapcounts.compound_mapcount;
 		if (last) {
-			nr = nr_pmdmapped = thp_nr_pages(page);
-			if (mapcounts.subpages_mapcount)
-				nr = nr_subpages_unmapped(page, nr_pmdmapped);
+			nr_pmdmapped = thp_nr_pages(page);
+			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
 		}
 		unlock_compound_mapcounts(page, &mapcounts);
-	} else {
-		struct page *head = compound_head(page);
-
-		lock_compound_mapcounts(head, &mapcounts);
-		mapcounts.subpages_mapcount--;
-		last = subpage_mapcount_dec(page);
-		nr = last && !mapcounts.compound_mapcount;
-		unlock_compound_mapcounts(head, &mapcounts);
 	}
 
 	if (nr_pmdmapped) {
-- 
GitLab


From 4b51634cd16a01b2be0f6b69cc0dae63de4751f2 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Nov 2022 01:49:36 -0800
Subject: [PATCH 3011/4122] mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if
 PMD-mapped

Can the lock_compound_mapcount() bit_spin_lock apparatus be removed now?
Yes.  Not by atomic64_t or cmpxchg games, those get difficult on 32-bit;
but if we slightly abuse subpages_mapcount by additionally demanding that
one bit be set there when the compound page is PMD-mapped, then a cascade
of two atomic ops is able to maintain the stats without bit_spin_lock.

This is harder to reason about than when bit_spin_locked, but I believe
safe; and no drift in stats detected when testing.  When there are racing
removes and adds, of course the sequence of operations is less well-
defined; but each operation on subpages_mapcount is atomically good.  What
might be disastrous, is if subpages_mapcount could ever fleetingly appear
negative: but the pte lock (or pmd lock) these rmap functions are called
under, ensures that a last remove cannot race ahead of a first add.

Continue to make an exception for hugetlb (PageHuge) pages, though that
exception can be easily removed by a further commit if necessary: leave
subpages_mapcount 0, don't bother with COMPOUND_MAPPED in its case, just
carry on checking compound_mapcount too in folio_mapped(), page_mapped().

Evidence is that this way goes slightly faster than the previous
implementation in all cases (pmds after ptes now taking around 103ms); and
relieves us of worrying about contention on the bit_spin_lock.

Link: https://lkml.kernel.org/r/3978f3ca-5473-55a7-4e14-efea5968d892@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/transhuge.rst |   7 +-
 include/linux/mm.h             |  19 +++++-
 include/linux/rmap.h           |  13 ++--
 mm/page_alloc.c                |   2 +-
 mm/rmap.c                      | 121 +++++++--------------------------
 5 files changed, 51 insertions(+), 111 deletions(-)

diff --git a/Documentation/mm/transhuge.rst b/Documentation/mm/transhuge.rst
index af4c9d70321d0..ec3dc5b042260 100644
--- a/Documentation/mm/transhuge.rst
+++ b/Documentation/mm/transhuge.rst
@@ -118,15 +118,14 @@ pages:
     succeeds on tail pages.
 
   - map/unmap of PMD entry for the whole compound page increment/decrement
-    ->compound_mapcount, stored in the first tail page of the compound page.
+    ->compound_mapcount, stored in the first tail page of the compound page;
+    and also increment/decrement ->subpages_mapcount (also in the first tail)
+    by COMPOUND_MAPPED when compound_mapcount goes from -1 to 0 or 0 to -1.
 
   - map/unmap of sub-pages with PTE entry increment/decrement ->_mapcount
     on relevant sub-page of the compound page, and also increment/decrement
     ->subpages_mapcount, stored in first tail page of the compound page, when
     _mapcount goes from -1 to 0 or 0 to -1: counting sub-pages mapped by PTE.
-    In order to have race-free accounting of sub-pages mapped, changes to
-    sub-page ->_mapcount, ->subpages_mapcount and ->compound_mapcount are
-    are all locked by bit_spin_lock of PG_locked in the first tail ->flags.
 
 split_huge_page internally has to distribute the refcounts in the head
 page to the tail pages before clearing all PG_head/tail bits from the page
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 84fb91f6f56eb..d33639be3db3e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -836,13 +836,22 @@ static inline int head_compound_mapcount(struct page *head)
 	return atomic_read(compound_mapcount_ptr(head)) + 1;
 }
 
+/*
+ * If a 16GB hugetlb page were mapped by PTEs of all of its 4kB sub-pages,
+ * its subpages_mapcount would be 0x400000: choose the COMPOUND_MAPPED bit
+ * above that range, instead of 2*(PMD_SIZE/PAGE_SIZE).  Hugetlb currently
+ * leaves subpages_mapcount at 0, but avoid surprise if it participates later.
+ */
+#define COMPOUND_MAPPED	0x800000
+#define SUBPAGES_MAPPED	(COMPOUND_MAPPED - 1)
+
 /*
  * Number of sub-pages mapped by PTE, does not include compound mapcount.
  * Must be called only on head of compound page.
  */
 static inline int head_subpages_mapcount(struct page *head)
 {
-	return atomic_read(subpages_mapcount_ptr(head));
+	return atomic_read(subpages_mapcount_ptr(head)) & SUBPAGES_MAPPED;
 }
 
 /*
@@ -902,8 +911,12 @@ static inline int total_mapcount(struct page *page)
 
 static inline bool folio_large_is_mapped(struct folio *folio)
 {
-	return atomic_read(folio_mapcount_ptr(folio)) +
-		atomic_read(folio_subpages_mapcount_ptr(folio)) >= 0;
+	/*
+	 * Reading folio_mapcount_ptr() below could be omitted if hugetlb
+	 * participated in incrementing subpages_mapcount when compound mapped.
+	 */
+	return atomic_read(folio_subpages_mapcount_ptr(folio)) > 0 ||
+		atomic_read(folio_mapcount_ptr(folio)) >= 0;
 }
 
 /**
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 5dadb9a3e0107..bd3504d11b155 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -204,15 +204,14 @@ void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *,
 void hugepage_add_new_anon_rmap(struct page *, struct vm_area_struct *,
 		unsigned long address);
 
-void page_dup_compound_rmap(struct page *page);
+static inline void __page_dup_rmap(struct page *page, bool compound)
+{
+	atomic_inc(compound ? compound_mapcount_ptr(page) : &page->_mapcount);
+}
 
 static inline void page_dup_file_rmap(struct page *page, bool compound)
 {
-	/* Is page being mapped by PTE? */
-	if (likely(!compound))
-		atomic_inc(&page->_mapcount);
-	else
-		page_dup_compound_rmap(page);
+	__page_dup_rmap(page, compound);
 }
 
 /**
@@ -261,7 +260,7 @@ static inline int page_try_dup_anon_rmap(struct page *page, bool compound,
 	 * the page R/O into both processes.
 	 */
 dup:
-	page_dup_file_rmap(page, compound);
+	__page_dup_rmap(page, compound);
 	return 0;
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 0705917ddf54f..c33b6963c2d76 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1330,7 +1330,7 @@ static int free_tail_pages_check(struct page *head_page, struct page *page)
 			bad_page(page, "nonzero compound_mapcount");
 			goto out;
 		}
-		if (unlikely(head_subpages_mapcount(head_page))) {
+		if (unlikely(atomic_read(subpages_mapcount_ptr(head_page)))) {
 			bad_page(page, "nonzero subpages_mapcount");
 			goto out;
 		}
diff --git a/mm/rmap.c b/mm/rmap.c
index e813785da613f..459dc1c44d8ae 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1085,38 +1085,6 @@ int pfn_mkclean_range(unsigned long pfn, unsigned long nr_pages, pgoff_t pgoff,
 	return page_vma_mkclean_one(&pvmw);
 }
 
-struct compound_mapcounts {
-	unsigned int compound_mapcount;
-	unsigned int subpages_mapcount;
-};
-
-/*
- * lock_compound_mapcounts() first locks, then copies subpages_mapcount and
- * compound_mapcount from head[1].compound_mapcount and subpages_mapcount,
- * converting from struct page's internal representation to logical count
- * (that is, adding 1 to compound_mapcount to hide its offset by -1).
- */
-static void lock_compound_mapcounts(struct page *head,
-		struct compound_mapcounts *local)
-{
-	bit_spin_lock(PG_locked, &head[1].flags);
-	local->compound_mapcount = atomic_read(compound_mapcount_ptr(head)) + 1;
-	local->subpages_mapcount = atomic_read(subpages_mapcount_ptr(head));
-}
-
-/*
- * After caller has updated subpage._mapcount, local subpages_mapcount and
- * local compound_mapcount, as necessary, unlock_compound_mapcounts() converts
- * and copies them back to the compound head[1] fields, and then unlocks.
- */
-static void unlock_compound_mapcounts(struct page *head,
-		struct compound_mapcounts *local)
-{
-	atomic_set(compound_mapcount_ptr(head), local->compound_mapcount - 1);
-	atomic_set(subpages_mapcount_ptr(head), local->subpages_mapcount);
-	bit_spin_unlock(PG_locked, &head[1].flags);
-}
-
 int total_compound_mapcount(struct page *head)
 {
 	int mapcount = head_compound_mapcount(head);
@@ -1140,34 +1108,6 @@ int total_compound_mapcount(struct page *head)
 	return mapcount;
 }
 
-/*
- * page_dup_compound_rmap(), used when copying mm,
- * provides a simple example of using lock_ and unlock_compound_mapcounts().
- */
-void page_dup_compound_rmap(struct page *head)
-{
-	struct compound_mapcounts mapcounts;
-
-	/*
-	 * Hugetlb pages could use lock_compound_mapcounts(), like THPs do;
-	 * but at present they are still being managed by atomic operations:
-	 * which are likely to be somewhat faster, so don't rush to convert
-	 * them over without evaluating the effect.
-	 *
-	 * Note that hugetlb does not call page_add_file_rmap():
-	 * here is where hugetlb shared page mapcount is raised.
-	 */
-	if (PageHuge(head)) {
-		atomic_inc(compound_mapcount_ptr(head));
-	} else if (PageTransHuge(head)) {
-		/* That test is redundant: it's for safety or to optimize out */
-
-		lock_compound_mapcounts(head, &mapcounts);
-		mapcounts.compound_mapcount++;
-		unlock_compound_mapcounts(head, &mapcounts);
-	}
-}
-
 /**
  * page_move_anon_rmap - move a page to our anon_vma
  * @page:	the page to move to our anon_vma
@@ -1277,7 +1217,7 @@ static void __page_check_anon_rmap(struct page *page,
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address, rmap_t flags)
 {
-	struct compound_mapcounts mapcounts;
+	atomic_t *mapped;
 	int nr = 0, nr_pmdmapped = 0;
 	bool compound = flags & RMAP_COMPOUND;
 	bool first = true;
@@ -1290,24 +1230,20 @@ void page_add_anon_rmap(struct page *page,
 		first = atomic_inc_and_test(&page->_mapcount);
 		nr = first;
 		if (first && PageCompound(page)) {
-			struct page *head = compound_head(page);
-
-			lock_compound_mapcounts(head, &mapcounts);
-			mapcounts.subpages_mapcount++;
-			nr = !mapcounts.compound_mapcount;
-			unlock_compound_mapcounts(head, &mapcounts);
+			mapped = subpages_mapcount_ptr(compound_head(page));
+			nr = atomic_inc_return_relaxed(mapped);
+			nr = !(nr & COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
 
-		lock_compound_mapcounts(page, &mapcounts);
-		first = !mapcounts.compound_mapcount;
-		mapcounts.compound_mapcount++;
+		first = atomic_inc_and_test(compound_mapcount_ptr(page));
 		if (first) {
+			mapped = subpages_mapcount_ptr(page);
+			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
 			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
 		}
-		unlock_compound_mapcounts(page, &mapcounts);
 	}
 
 	VM_BUG_ON_PAGE(!first && (flags & RMAP_EXCLUSIVE), page);
@@ -1360,6 +1296,7 @@ void page_add_new_anon_rmap(struct page *page,
 		VM_BUG_ON_PAGE(!PageTransHuge(page), page);
 		/* increment count (starts at -1) */
 		atomic_set(compound_mapcount_ptr(page), 0);
+		atomic_set(subpages_mapcount_ptr(page), COMPOUND_MAPPED);
 		nr = thp_nr_pages(page);
 		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);
 	}
@@ -1379,7 +1316,7 @@ void page_add_new_anon_rmap(struct page *page,
 void page_add_file_rmap(struct page *page,
 	struct vm_area_struct *vma, bool compound)
 {
-	struct compound_mapcounts mapcounts;
+	atomic_t *mapped;
 	int nr = 0, nr_pmdmapped = 0;
 	bool first;
 
@@ -1391,24 +1328,20 @@ void page_add_file_rmap(struct page *page,
 		first = atomic_inc_and_test(&page->_mapcount);
 		nr = first;
 		if (first && PageCompound(page)) {
-			struct page *head = compound_head(page);
-
-			lock_compound_mapcounts(head, &mapcounts);
-			mapcounts.subpages_mapcount++;
-			nr = !mapcounts.compound_mapcount;
-			unlock_compound_mapcounts(head, &mapcounts);
+			mapped = subpages_mapcount_ptr(compound_head(page));
+			nr = atomic_inc_return_relaxed(mapped);
+			nr = !(nr & COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
 
-		lock_compound_mapcounts(page, &mapcounts);
-		first = !mapcounts.compound_mapcount;
-		mapcounts.compound_mapcount++;
+		first = atomic_inc_and_test(compound_mapcount_ptr(page));
 		if (first) {
+			mapped = subpages_mapcount_ptr(page);
+			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
 			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
 		}
-		unlock_compound_mapcounts(page, &mapcounts);
 	}
 
 	if (nr_pmdmapped)
@@ -1432,7 +1365,7 @@ void page_add_file_rmap(struct page *page,
 void page_remove_rmap(struct page *page,
 	struct vm_area_struct *vma, bool compound)
 {
-	struct compound_mapcounts mapcounts;
+	atomic_t *mapped;
 	int nr = 0, nr_pmdmapped = 0;
 	bool last;
 
@@ -1452,24 +1385,20 @@ void page_remove_rmap(struct page *page,
 		last = atomic_add_negative(-1, &page->_mapcount);
 		nr = last;
 		if (last && PageCompound(page)) {
-			struct page *head = compound_head(page);
-
-			lock_compound_mapcounts(head, &mapcounts);
-			mapcounts.subpages_mapcount--;
-			nr = !mapcounts.compound_mapcount;
-			unlock_compound_mapcounts(head, &mapcounts);
+			mapped = subpages_mapcount_ptr(compound_head(page));
+			nr = atomic_dec_return_relaxed(mapped);
+			nr = !(nr & COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
 
-		lock_compound_mapcounts(page, &mapcounts);
-		mapcounts.compound_mapcount--;
-		last = !mapcounts.compound_mapcount;
+		last = atomic_add_negative(-1, compound_mapcount_ptr(page));
 		if (last) {
+			mapped = subpages_mapcount_ptr(page);
+			nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
 			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - mapcounts.subpages_mapcount;
+			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
 		}
-		unlock_compound_mapcounts(page, &mapcounts);
 	}
 
 	if (nr_pmdmapped) {
-- 
GitLab


From 96d82deb743ab42c8f0b911eb49db83f0e6db311 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Tue, 22 Nov 2022 01:51:50 -0800
Subject: [PATCH 3012/4122] mm,thp,rmap: clean up the end of
 __split_huge_pmd_locked()

It's hard to add a page_add_anon_rmap() into __split_huge_pmd_locked()'s
HPAGE_PMD_NR set_pte_at() loop, without wincing at the "freeze" case's
HPAGE_PMD_NR page_remove_rmap() loop below it.

It's just a mistake to add rmaps in the "freeze" (insert migration entries
prior to splitting huge page) case: the pmd_migration case already avoids
doing that, so just follow its lead.  page_add_ref() versus put_page()
likewise.  But why is one more put_page() needed in the "freeze" case?
Because it's removing the pmd rmap, already removed when pmd_migration
(and freeze and pmd_migration are mutually exclusive cases).

Link: https://lkml.kernel.org/r/d43748aa-fece-e0b9-c4ab-f23c9ebc9011@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Dan Carpenter <error27@gmail.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 681253adf5294..aba340684285a 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2141,7 +2141,6 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		uffd_wp = pmd_uffd_wp(old_pmd);
 
 		VM_BUG_ON_PAGE(!page_count(page), page);
-		page_ref_add(page, HPAGE_PMD_NR - 1);
 
 		/*
 		 * Without "freeze", we'll simply split the PMD, propagating the
@@ -2161,6 +2160,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 		anon_exclusive = PageAnon(page) && PageAnonExclusive(page);
 		if (freeze && anon_exclusive && page_try_share_anon_rmap(page))
 			freeze = false;
+		if (!freeze)
+			page_ref_add(page, HPAGE_PMD_NR - 1);
 	}
 
 	/*
@@ -2216,27 +2217,21 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 				entry = pte_mksoft_dirty(entry);
 			if (uffd_wp)
 				entry = pte_mkuffd_wp(entry);
+			page_add_anon_rmap(page + i, vma, addr, false);
 		}
 		pte = pte_offset_map(&_pmd, addr);
 		BUG_ON(!pte_none(*pte));
 		set_pte_at(mm, addr, pte, entry);
-		if (!pmd_migration)
-			page_add_anon_rmap(page + i, vma, addr, false);
 		pte_unmap(pte);
 	}
 
 	if (!pmd_migration)
 		page_remove_rmap(page, vma, true);
+	if (freeze)
+		put_page(page);
 
 	smp_wmb(); /* make pte visible before pmd */
 	pmd_populate(mm, pmd, pgtable);
-
-	if (freeze) {
-		for (i = 0; i < HPAGE_PMD_NR; i++) {
-			page_remove_rmap(page + i, vma, false);
-			put_page(page + i);
-		}
-	}
 }
 
 void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
-- 
GitLab


From 1a1af17ea81115914c8efc1177fd94719c84fc11 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Tue, 8 Nov 2022 19:53:48 +0800
Subject: [PATCH 3013/4122] tools/vm/page_owner: ignore page_owner_sort binary

page_owner_sort was introduced since commit 48c96a368579 ("mm/page_owner:
keep track of page owners"), and we should ignore it.

Link: https://lkml.kernel.org/r/tencent_F6CAC0ABE16839E2B2419BD07316DA65BB06@qq.com
Signed-off-by: Rong Tao <rongtao@cestc.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/vm/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/vm/.gitignore b/tools/vm/.gitignore
index 79bb92ae1bb35..922879f93fc84 100644
--- a/tools/vm/.gitignore
+++ b/tools/vm/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 slabinfo
 page-types
+page_owner_sort
-- 
GitLab


From d84887739d5c982afa50b155aad628bb8ff206c5 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@vmware.com>
Date: Tue, 8 Nov 2022 18:46:46 +0100
Subject: [PATCH 3014/4122] mm/mprotect: allow clean exclusive anon pages to be
 writable

Patch series "mm/autonuma: replace savedwrite infrastructure", v2.

As discussed in my talk at LPC, we can reuse the same mechanism for
deciding whether to map a pte writable when upgrading permissions via
mprotect() -- e.g., PROT_READ -> PROT_READ|PROT_WRITE -- to replace the
savedwrite infrastructure used for NUMA hinting faults (e.g., PROT_NONE ->
PROT_READ|PROT_WRITE).

Instead of maintaining previous write permissions for a pte/pmd, we
re-determine if the pte/pmd can be writable.  The big benefit is that we
have a common logic for deciding whether we can map a pte/pmd writable on
protection changes.

For private mappings, there should be no difference -- from what I
understand, that is what autonuma benchmarks care about.

I ran autonumabench for v1 on a system with 2 NUMA nodes, 96 GiB each via:
	perf stat --null --repeat 10
The numa01 benchmark is quite noisy in my environment and I failed to
reduce the noise so far.

numa01:
	mm-unstable:   146.88 +- 6.54 seconds time elapsed  ( +-  4.45% )
	mm-unstable++: 147.45 +- 13.39 seconds time elapsed  ( +-  9.08% )

numa02:
	mm-unstable:   16.0300 +- 0.0624 seconds time elapsed  ( +-  0.39% )
	mm-unstable++: 16.1281 +- 0.0945 seconds time elapsed  ( +-  0.59% )

It is worth noting that for shared writable mappings that require
writenotify, we will only avoid write faults if the pte/pmd is dirty
(inherited from the older mprotect logic).  If we ever care about
optimizing that further, we'd need a different mechanism to identify
whether the FS still needs to get notified on the next write access.

In any case, such an optimization will then not be autonuma-specific, but
mprotect() permission upgrades would similarly benefit from it.


This patch (of 7):

Anonymous pages might have the dirty bit clear, but this should not
prevent mprotect from making them writable if they are exclusive.
Therefore, skip the test whether the page is dirty in this case.

Note that there are already other ways to get a writable PTE mapping an
anonymous page that is clean: for example, via MADV_FREE.  In an ideal
world, we'd have a different indication from the FS whether writenotify is
still required.

[david@redhat.com: return directly; update description]
Link: https://lkml.kernel.org/r/20221108174652.198904-1-david@redhat.com
Link: https://lkml.kernel.org/r/20221108174652.198904-2-david@redhat.com
Signed-off-by: Nadav Amit <namit@vmware.com>
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mprotect.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8d770855b5915..86a28c0e190fe 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -46,7 +46,7 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
 
 	VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte));
 
-	if (pte_protnone(pte) || !pte_dirty(pte))
+	if (pte_protnone(pte))
 		return false;
 
 	/* Do we need write faults for softdirty tracking? */
@@ -65,11 +65,10 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
 		 * the PT lock.
 		 */
 		page = vm_normal_page(vma, addr, pte);
-		if (!page || !PageAnon(page) || !PageAnonExclusive(page))
-			return false;
+		return page && PageAnon(page) && PageAnonExclusive(page);
 	}
 
-	return true;
+	return pte_dirty(pte);
 }
 
 static unsigned long change_pte_range(struct mmu_gather *tlb,
-- 
GitLab


From 7ea7e333842ed50fe0a0b256c270b54f8ec2353e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:47 +0100
Subject: [PATCH 3015/4122] mm/mprotect: minor can_change_pte_writable()
 cleanups

We want to replicate this code for handling PMDs soon.

(1) No need to crash the kernel, warning and rejecting is good enough. As
    this will no longer get optimized out, drop the pte_write() check: no
    harm would be done.

(2) Add a comment why PROT_NONE mapped pages are excluded.

(3) Add a comment regarding MAP_SHARED handling and why we rely on the
    dirty bit in the PTE.

Link: https://lkml.kernel.org/r/20221108174652.198904-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mprotect.c | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/mm/mprotect.c b/mm/mprotect.c
index 86a28c0e190fe..72aabffb78713 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -44,8 +44,10 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
 {
 	struct page *page;
 
-	VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte));
+	if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
+		return false;
 
+	/* Don't touch entries that are not even readable. */
 	if (pte_protnone(pte))
 		return false;
 
@@ -59,15 +61,22 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma,
 
 	if (!(vma->vm_flags & VM_SHARED)) {
 		/*
-		 * We can only special-case on exclusive anonymous pages,
-		 * because we know that our write-fault handler similarly would
-		 * map them writable without any additional checks while holding
-		 * the PT lock.
+		 * Writable MAP_PRIVATE mapping: We can only special-case on
+		 * exclusive anonymous pages, because we know that our
+		 * write-fault handler similarly would map them writable without
+		 * any additional checks while holding the PT lock.
 		 */
 		page = vm_normal_page(vma, addr, pte);
 		return page && PageAnon(page) && PageAnonExclusive(page);
 	}
 
+	/*
+	 * Writable MAP_SHARED mapping: "clean" might indicate that the FS still
+	 * needs a real write-fault for writenotify
+	 * (see vma_wants_writenotify()). If "dirty", the assumption is that the
+	 * FS was already notified and we can simply mark the PTE writable
+	 * just like the write-fault handler would do.
+	 */
 	return pte_dirty(pte);
 }
 
-- 
GitLab


From c27f479ef5428f691787fb6fe3703a70e931ae8c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:48 +0100
Subject: [PATCH 3016/4122] mm/huge_memory: try avoiding write faults when
 changing PMD protection

Let's replicate what we have for PTEs in can_change_pte_writable() also
for PMDs.

While this might look like a pure performance improvement, we'll us this to
get rid of savedwrite handling in do_huge_pmd_numa_page() next. Place
do_huge_pmd_numa_page() strategically good for that purpose.

Note that MM_CP_TRY_CHANGE_WRITABLE is currently only set when we come
via mprotect_fixup().

Link: https://lkml.kernel.org/r/20221108174652.198904-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 38 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 36 insertions(+), 2 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index aba340684285a..fac917b781029 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1390,6 +1390,36 @@ fallback:
 	return VM_FAULT_FALLBACK;
 }
 
+static inline bool can_change_pmd_writable(struct vm_area_struct *vma,
+					   unsigned long addr, pmd_t pmd)
+{
+	struct page *page;
+
+	if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE)))
+		return false;
+
+	/* Don't touch entries that are not even readable (NUMA hinting). */
+	if (pmd_protnone(pmd))
+		return false;
+
+	/* Do we need write faults for softdirty tracking? */
+	if (vma_soft_dirty_enabled(vma) && !pmd_soft_dirty(pmd))
+		return false;
+
+	/* Do we need write faults for uffd-wp tracking? */
+	if (userfaultfd_huge_pmd_wp(vma, pmd))
+		return false;
+
+	if (!(vma->vm_flags & VM_SHARED)) {
+		/* See can_change_pte_writable(). */
+		page = vm_normal_page_pmd(vma, addr, pmd);
+		return page && PageAnon(page) && PageAnonExclusive(page);
+	}
+
+	/* See can_change_pte_writable(). */
+	return pmd_dirty(pmd);
+}
+
 /* FOLL_FORCE can write to even unwritable PMDs in COW mappings. */
 static inline bool can_follow_write_pmd(pmd_t pmd, struct page *page,
 					struct vm_area_struct *vma,
@@ -1893,13 +1923,17 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 		 */
 		entry = pmd_clear_uffd_wp(entry);
 	}
+
+	/* See change_pte_range(). */
+	if ((cp_flags & MM_CP_TRY_CHANGE_WRITABLE) && !pmd_write(entry) &&
+	    can_change_pmd_writable(vma, addr, entry))
+		entry = pmd_mkwrite(entry);
+
 	ret = HPAGE_PMD_NR;
 	set_pmd_at(mm, addr, pmd, entry);
 
 	if (huge_pmd_needs_flush(oldpmd, entry))
 		tlb_flush_pmd_range(tlb, addr, HPAGE_PMD_SIZE);
-
-	BUG_ON(vma_is_anonymous(vma) && !preserve_write && pmd_write(entry));
 unlock:
 	spin_unlock(ptl);
 	return ret;
-- 
GitLab


From eb309ec89953d6a3e8e35a3a577bab13893858d8 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:49 +0100
Subject: [PATCH 3017/4122] mm/mprotect: factor out check whether manual PTE
 write upgrades are required

Let's factor the check out into vma_wants_manual_pte_write_upgrade(), to be
reused in NUMA hinting fault context soon.

Link: https://lkml.kernel.org/r/20221108174652.198904-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 16 ++++++++++++++--
 mm/mprotect.c      | 17 ++++-------------
 2 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d33639be3db3e..e203e8a83e2de 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2088,6 +2088,20 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma,
 #define  MM_CP_UFFD_WP_ALL                 (MM_CP_UFFD_WP | \
 					    MM_CP_UFFD_WP_RESOLVE)
 
+int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
+static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma)
+{
+	/*
+	 * We want to check manually if we can change individual PTEs writable
+	 * if we can't do that automatically for all PTEs in a mapping. For
+	 * private mappings, that's always the case when we have write
+	 * permissions as we properly have to handle COW.
+	 */
+	if (vma->vm_flags & VM_SHARED)
+		return vma_wants_writenotify(vma, vma->vm_page_prot);
+	return !!(vma->vm_flags & VM_WRITE);
+
+}
 extern unsigned long change_protection(struct mmu_gather *tlb,
 			      struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
@@ -2227,8 +2241,6 @@ static inline int pte_devmap(pte_t pte)
 }
 #endif
 
-int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);
-
 extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
 			       spinlock_t **ptl);
 static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr,
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 72aabffb78713..fe22db2c9cdd5 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -558,8 +558,8 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long oldflags = vma->vm_flags;
 	long nrpages = (end - start) >> PAGE_SHIFT;
+	unsigned int mm_cp_flags = 0;
 	unsigned long charged = 0;
-	bool try_change_writable;
 	pgoff_t pgoff;
 	int error;
 
@@ -637,20 +637,11 @@ success:
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	/*
-	 * We want to check manually if we can change individual PTEs writable
-	 * if we can't do that automatically for all PTEs in a mapping. For
-	 * private mappings, that's always the case when we have write
-	 * permissions as we properly have to handle COW.
-	 */
-	if (vma->vm_flags & VM_SHARED)
-		try_change_writable = vma_wants_writenotify(vma, vma->vm_page_prot);
-	else
-		try_change_writable = !!(vma->vm_flags & VM_WRITE);
+	if (vma_wants_manual_pte_write_upgrade(vma))
+		mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE;
 	vma_set_page_prot(vma);
 
-	change_protection(tlb, vma, start, end, vma->vm_page_prot,
-			  try_change_writable ? MM_CP_TRY_CHANGE_WRITABLE : 0);
+	change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags);
 
 	/*
 	 * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major
-- 
GitLab


From 6a56ccbcf6c69538b152644107a1d7383c876ca7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:50 +0100
Subject: [PATCH 3018/4122] mm/autonuma: use can_change_(pte|pmd)_writable() to
 replace savedwrite

commit b191f9b106ea ("mm: numa: preserve PTE write permissions across a
NUMA hinting fault") added remembering write permissions using ordinary
pte_write() for PROT_NONE mapped pages to avoid write faults when
remapping the page !PROT_NONE on NUMA hinting faults.

That commit noted:

    The patch looks hacky but the alternatives looked worse. The tidest was
    to rewalk the page tables after a hinting fault but it was more complex
    than this approach and the performance was worse. It's not generally
    safe to just mark the page writable during the fault if it's a write
    fault as it may have been read-only for COW so that approach was
    discarded.

Later, commit 288bc54949fc ("mm/autonuma: let architecture override how
the write bit should be stashed in a protnone pte.") introduced a family
of savedwrite PTE functions that didn't necessarily improve the whole
situation.

One confusing thing is that nowadays, if a page is pte_protnone()
and pte_savedwrite() then also pte_write() is true. Another source of
confusion is that there is only a single pte_mk_savedwrite() call in the
kernel. All other write-protection code seems to silently rely on
pte_wrprotect().

Ever since PageAnonExclusive was introduced and we started using it in
mprotect context via commit 64fe24a3e05e ("mm/mprotect: try avoiding write
faults for exclusive anonymous pages when changing protection"), we do
have machinery in place to avoid write faults when changing protection,
which is exactly what we want to do here.

Let's similarly do what ordinary mprotect() does nowadays when upgrading
write permissions and reuse can_change_pte_writable() and
can_change_pmd_writable() to detect if we can upgrade PTE permissions to be
writable.

For anonymous pages there should be absolutely no change: if an
anonymous page is not exclusive, it could not have been mapped writable --
because only exclusive anonymous pages can be mapped writable.

However, there *might* be a change for writable shared mappings that
require writenotify: if they are not dirty, we cannot map them writable.
While it might not matter in practice, we'd need a different way to
identify whether writenotify is actually required -- and ordinary mprotect
would benefit from that as well.

Note that we don't optimize for the actual migration case:
(1) When migration succeeds the new PTE will not be writable because the
    source PTE was not writable (protnone); in the future we
    might just optimize that case similarly by reusing
    can_change_pte_writable()/can_change_pmd_writable() when removing
    migration PTEs.
(2) When migration fails, we'd have to recalculate the "writable" flag
    because we temporarily dropped the PT lock; for now keep it simple and
    set "writable=false".

We'll remove all savedwrite leftovers next.

Link: https://lkml.kernel.org/r/20221108174652.198904-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  2 ++
 mm/huge_memory.c   | 26 +++++++++++++++-----------
 mm/ksm.c           |  9 ++++-----
 mm/memory.c        | 16 +++++++++++++---
 mm/mprotect.c      |  7 ++-----
 5 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e203e8a83e2de..8597ef676fc3f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2102,6 +2102,8 @@ static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma
 	return !!(vma->vm_flags & VM_WRITE);
 
 }
+bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t pte);
 extern unsigned long change_protection(struct mmu_gather *tlb,
 			      struct vm_area_struct *vma, unsigned long start,
 			      unsigned long end, pgprot_t newprot,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index fac917b781029..29102e3ddf843 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1511,8 +1511,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
 	int page_nid = NUMA_NO_NODE;
 	int target_nid, last_cpupid = (-1 & LAST_CPUPID_MASK);
-	bool migrated = false;
-	bool was_writable = pmd_savedwrite(oldpmd);
+	bool migrated = false, writable = false;
 	int flags = 0;
 
 	vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd);
@@ -1522,12 +1521,22 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	}
 
 	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
+
+	/*
+	 * Detect now whether the PMD could be writable; this information
+	 * is only valid while holding the PT lock.
+	 */
+	writable = pmd_write(pmd);
+	if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
+	    can_change_pmd_writable(vma, vmf->address, pmd))
+		writable = true;
+
 	page = vm_normal_page_pmd(vma, haddr, pmd);
 	if (!page)
 		goto out_map;
 
 	/* See similar comment in do_numa_page for explanation */
-	if (!was_writable)
+	if (!writable)
 		flags |= TNF_NO_GROUP;
 
 	page_nid = page_to_nid(page);
@@ -1546,6 +1555,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf)
 	}
 
 	spin_unlock(vmf->ptl);
+	writable = false;
 
 	migrated = migrate_misplaced_page(page, vma, target_nid);
 	if (migrated) {
@@ -1572,7 +1582,7 @@ out_map:
 	/* Restore the PMD */
 	pmd = pmd_modify(oldpmd, vma->vm_page_prot);
 	pmd = pmd_mkyoung(pmd);
-	if (was_writable)
+	if (writable)
 		pmd = pmd_mkwrite(pmd);
 	set_pmd_at(vma->vm_mm, haddr, vmf->pmd, pmd);
 	update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
@@ -1813,11 +1823,10 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	struct mm_struct *mm = vma->vm_mm;
 	spinlock_t *ptl;
 	pmd_t oldpmd, entry;
-	bool preserve_write;
-	int ret;
 	bool prot_numa = cp_flags & MM_CP_PROT_NUMA;
 	bool uffd_wp = cp_flags & MM_CP_UFFD_WP;
 	bool uffd_wp_resolve = cp_flags & MM_CP_UFFD_WP_RESOLVE;
+	int ret = 1;
 
 	tlb_change_page_size(tlb, HPAGE_PMD_SIZE);
 
@@ -1828,9 +1837,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	if (!ptl)
 		return 0;
 
-	preserve_write = prot_numa && pmd_write(*pmd);
-	ret = 1;
-
 #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
 	if (is_swap_pmd(*pmd)) {
 		swp_entry_t entry = pmd_to_swp_entry(*pmd);
@@ -1910,8 +1916,6 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
 	oldpmd = pmdp_invalidate_ad(vma, addr, pmd);
 
 	entry = pmd_modify(oldpmd, newprot);
-	if (preserve_write)
-		entry = pmd_mk_savedwrite(entry);
 	if (uffd_wp) {
 		entry = pmd_wrprotect(entry);
 		entry = pmd_mkuffd_wp(entry);
diff --git a/mm/ksm.c b/mm/ksm.c
index 7ba97f86d8316..a71245241d220 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1041,7 +1041,6 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
 	anon_exclusive = PageAnonExclusive(page);
 	if (pte_write(*pvmw.pte) || pte_dirty(*pvmw.pte) ||
-	    (pte_protnone(*pvmw.pte) && pte_savedwrite(*pvmw.pte)) ||
 	    anon_exclusive || mm_tlb_flush_pending(mm)) {
 		pte_t entry;
 
@@ -1079,11 +1078,11 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 
 		if (pte_dirty(entry))
 			set_page_dirty(page);
+		entry = pte_mkclean(entry);
+
+		if (pte_write(entry))
+			entry = pte_wrprotect(entry);
 
-		if (pte_protnone(entry))
-			entry = pte_mkclean(pte_clear_savedwrite(entry));
-		else
-			entry = pte_mkclean(pte_wrprotect(entry));
 		set_pte_at_notify(mm, pvmw.address, pvmw.pte, entry);
 	}
 	*orig_pte = *pvmw.pte;
diff --git a/mm/memory.c b/mm/memory.c
index 142c4229549b0..1749c638734f9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4675,10 +4675,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct page *page = NULL;
 	int page_nid = NUMA_NO_NODE;
+	bool writable = false;
 	int last_cpupid;
 	int target_nid;
 	pte_t pte, old_pte;
-	bool was_writable = pte_savedwrite(vmf->orig_pte);
 	int flags = 0;
 
 	/*
@@ -4697,6 +4697,15 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	old_pte = ptep_get(vmf->pte);
 	pte = pte_modify(old_pte, vma->vm_page_prot);
 
+	/*
+	 * Detect now whether the PTE could be writable; this information
+	 * is only valid while holding the PT lock.
+	 */
+	writable = pte_write(pte);
+	if (!writable && vma_wants_manual_pte_write_upgrade(vma) &&
+	    can_change_pte_writable(vma, vmf->address, pte))
+		writable = true;
+
 	page = vm_normal_page(vma, vmf->address, pte);
 	if (!page || is_zone_device_page(page))
 		goto out_map;
@@ -4713,7 +4722,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 	 * pte_dirty has unpredictable behaviour between PTE scan updates,
 	 * background writeback, dirty balancing and application behaviour.
 	 */
-	if (!was_writable)
+	if (!writable)
 		flags |= TNF_NO_GROUP;
 
 	/*
@@ -4740,6 +4749,7 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf)
 		goto out_map;
 	}
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
+	writable = false;
 
 	/* Migrate to the requested node */
 	if (migrate_misplaced_page(page, vma, target_nid)) {
@@ -4768,7 +4778,7 @@ out_map:
 	old_pte = ptep_modify_prot_start(vma, vmf->address, vmf->pte);
 	pte = pte_modify(old_pte, vma->vm_page_prot);
 	pte = pte_mkyoung(pte);
-	if (was_writable)
+	if (writable)
 		pte = pte_mkwrite(pte);
 	ptep_modify_prot_commit(vma, vmf->address, vmf->pte, old_pte, pte);
 	update_mmu_cache(vma, vmf->address, vmf->pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index fe22db2c9cdd5..093cb50f2fc46 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -39,8 +39,8 @@
 
 #include "internal.h"
 
-static inline bool can_change_pte_writable(struct vm_area_struct *vma,
-					   unsigned long addr, pte_t pte)
+bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr,
+			     pte_t pte)
 {
 	struct page *page;
 
@@ -121,7 +121,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
 		oldpte = *pte;
 		if (pte_present(oldpte)) {
 			pte_t ptent;
-			bool preserve_write = prot_numa && pte_write(oldpte);
 
 			/*
 			 * Avoid trapping faults against the zero or KSM
@@ -177,8 +176,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
 
 			oldpte = ptep_modify_prot_start(vma, addr, pte);
 			ptent = pte_modify(oldpte, newprot);
-			if (preserve_write)
-				ptent = pte_mk_savedwrite(ptent);
 
 			if (uffd_wp) {
 				ptent = pte_wrprotect(ptent);
-- 
GitLab


From d6379159f47630813f06f97535cc82ce7b9eed49 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:51 +0100
Subject: [PATCH 3019/4122] mm: remove unused savedwrite infrastructure

NUMA hinting no longer uses savedwrite, let's rip it out.

... and while at it, drop __pte_write() and __pmd_write() on ppc64.

Link: https://lkml.kernel.org/r/20221108174652.198904-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 80 +-------------------
 arch/powerpc/kvm/book3s_hv_rm_mmu.c          |  2 +-
 include/linux/pgtable.h                      | 24 ------
 mm/debug_vm_pgtable.c                        | 32 --------
 4 files changed, 5 insertions(+), 133 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c436d84226540..cb4c67bf45d71 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -401,35 +401,9 @@ static inline int __ptep_test_and_clear_young(struct mm_struct *mm,
 #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH
 #define pmdp_clear_flush_young pmdp_test_and_clear_young
 
-static inline int __pte_write(pte_t pte)
-{
-	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
-}
-
-#ifdef CONFIG_NUMA_BALANCING
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
-{
-	/*
-	 * Saved write ptes are prot none ptes that doesn't have
-	 * privileged bit sit. We mark prot none as one which has
-	 * present and pviliged bit set and RWX cleared. To mark
-	 * protnone which used to have _PAGE_WRITE set we clear
-	 * the privileged bit.
-	 */
-	return !(pte_raw(pte) & cpu_to_be64(_PAGE_RWX | _PAGE_PRIVILEGED));
-}
-#else
-#define pte_savedwrite pte_savedwrite
-static inline bool pte_savedwrite(pte_t pte)
-{
-	return false;
-}
-#endif
-
 static inline int pte_write(pte_t pte)
 {
-	return __pte_write(pte) || pte_savedwrite(pte);
+	return !!(pte_raw(pte) & cpu_to_be64(_PAGE_WRITE));
 }
 
 static inline int pte_read(pte_t pte)
@@ -441,24 +415,16 @@ static inline int pte_read(pte_t pte)
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
 {
-	if (__pte_write(*ptep))
+	if (pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 0);
-	else if (unlikely(pte_savedwrite(*ptep)))
-		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 0);
 }
 
 #define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
 static inline void huge_ptep_set_wrprotect(struct mm_struct *mm,
 					   unsigned long addr, pte_t *ptep)
 {
-	/*
-	 * We should not find protnone for hugetlb, but this complete the
-	 * interface.
-	 */
-	if (__pte_write(*ptep))
+	if (pte_write(*ptep))
 		pte_update(mm, addr, ptep, _PAGE_WRITE, 0, 1);
-	else if (unlikely(pte_savedwrite(*ptep)))
-		pte_update(mm, addr, ptep, 0, _PAGE_PRIVILEGED, 1);
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR
@@ -535,36 +501,6 @@ static inline int pte_protnone(pte_t pte)
 	return (pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE | _PAGE_RWX)) ==
 		cpu_to_be64(_PAGE_PRESENT | _PAGE_PTE);
 }
-
-#define pte_mk_savedwrite pte_mk_savedwrite
-static inline pte_t pte_mk_savedwrite(pte_t pte)
-{
-	/*
-	 * Used by Autonuma subsystem to preserve the write bit
-	 * while marking the pte PROT_NONE. Only allow this
-	 * on PROT_NONE pte
-	 */
-	VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
-		  cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
-	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
-}
-
-#define pte_clear_savedwrite pte_clear_savedwrite
-static inline pte_t pte_clear_savedwrite(pte_t pte)
-{
-	/*
-	 * Used by KSM subsystem to make a protnone pte readonly.
-	 */
-	VM_BUG_ON(!pte_protnone(pte));
-	return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
-}
-#else
-#define pte_clear_savedwrite pte_clear_savedwrite
-static inline pte_t pte_clear_savedwrite(pte_t pte)
-{
-	VM_WARN_ON(1);
-	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
-}
 #endif /* CONFIG_NUMA_BALANCING */
 
 static inline bool pte_hw_valid(pte_t pte)
@@ -641,8 +577,6 @@ static inline unsigned long pte_pfn(pte_t pte)
 /* Generic modifiers for PTE bits */
 static inline pte_t pte_wrprotect(pte_t pte)
 {
-	if (unlikely(pte_savedwrite(pte)))
-		return pte_clear_savedwrite(pte);
 	return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
 }
 
@@ -1139,8 +1073,6 @@ static inline pte_t *pmdp_ptep(pmd_t *pmd)
 #define pmd_mkclean(pmd)	pte_pmd(pte_mkclean(pmd_pte(pmd)))
 #define pmd_mkyoung(pmd)	pte_pmd(pte_mkyoung(pmd_pte(pmd)))
 #define pmd_mkwrite(pmd)	pte_pmd(pte_mkwrite(pmd_pte(pmd)))
-#define pmd_mk_savedwrite(pmd)	pte_pmd(pte_mk_savedwrite(pmd_pte(pmd)))
-#define pmd_clear_savedwrite(pmd)	pte_pmd(pte_clear_savedwrite(pmd_pte(pmd)))
 
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #define pmd_soft_dirty(pmd)    pte_soft_dirty(pmd_pte(pmd))
@@ -1162,8 +1094,6 @@ static inline int pmd_protnone(pmd_t pmd)
 #endif /* CONFIG_NUMA_BALANCING */
 
 #define pmd_write(pmd)		pte_write(pmd_pte(pmd))
-#define __pmd_write(pmd)	__pte_write(pmd_pte(pmd))
-#define pmd_savedwrite(pmd)	pte_savedwrite(pmd_pte(pmd))
 
 #define pmd_access_permitted pmd_access_permitted
 static inline bool pmd_access_permitted(pmd_t pmd, bool write)
@@ -1241,10 +1171,8 @@ static inline int __pmdp_test_and_clear_young(struct mm_struct *mm,
 static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pmd_t *pmdp)
 {
-	if (__pmd_write((*pmdp)))
+	if (pmd_write(*pmdp))
 		pmd_hugepage_update(mm, addr, pmdp, _PAGE_WRITE, 0);
-	else if (unlikely(pmd_savedwrite(*pmdp)))
-		pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
 }
 
 /*
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 5a05953ae13fe..9182324dbef93 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -265,7 +265,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
 		}
 		pte = kvmppc_read_update_linux_pte(ptep, writing);
 		if (pte_present(pte) && !pte_protnone(pte)) {
-			if (writing && !__pte_write(pte))
+			if (writing && !pte_write(pte))
 				/* make the actual HPTE be read-only */
 				ptel = hpte_make_readonly(ptel);
 			is_ci = pte_ci(pte);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 5f0d7d0b9471b..c74cce67eec85 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -503,30 +503,6 @@ static inline pte_t pte_sw_mkyoung(pte_t pte)
 #define pte_sw_mkyoung	pte_sw_mkyoung
 #endif
 
-#ifndef pte_savedwrite
-#define pte_savedwrite pte_write
-#endif
-
-#ifndef pte_mk_savedwrite
-#define pte_mk_savedwrite pte_mkwrite
-#endif
-
-#ifndef pte_clear_savedwrite
-#define pte_clear_savedwrite pte_wrprotect
-#endif
-
-#ifndef pmd_savedwrite
-#define pmd_savedwrite pmd_write
-#endif
-
-#ifndef pmd_mk_savedwrite
-#define pmd_mk_savedwrite pmd_mkwrite
-#endif
-
-#ifndef pmd_clear_savedwrite
-#define pmd_clear_savedwrite pmd_wrprotect
-#endif
-
 #ifndef __HAVE_ARCH_PMDP_SET_WRPROTECT
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static inline void pmdp_set_wrprotect(struct mm_struct *mm,
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 2b61fde8c38cc..c631ade3f1d26 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -171,18 +171,6 @@ static void __init pte_advanced_tests(struct pgtable_debug_args *args)
 	ptep_get_and_clear_full(args->mm, args->vaddr, args->ptep, 1);
 }
 
-static void __init pte_savedwrite_tests(struct pgtable_debug_args *args)
-{
-	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot_none);
-
-	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
-		return;
-
-	pr_debug("Validating PTE saved write\n");
-	WARN_ON(!pte_savedwrite(pte_mk_savedwrite(pte_clear_savedwrite(pte))));
-	WARN_ON(pte_savedwrite(pte_clear_savedwrite(pte_mk_savedwrite(pte))));
-}
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 static void __init pmd_basic_tests(struct pgtable_debug_args *args, int idx)
 {
@@ -302,22 +290,6 @@ static void __init pmd_leaf_tests(struct pgtable_debug_args *args)
 	WARN_ON(!pmd_leaf(pmd));
 }
 
-static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args)
-{
-	pmd_t pmd;
-
-	if (!IS_ENABLED(CONFIG_NUMA_BALANCING))
-		return;
-
-	if (!has_transparent_hugepage())
-		return;
-
-	pr_debug("Validating PMD saved write\n");
-	pmd = pfn_pmd(args->fixed_pmd_pfn, args->page_prot_none);
-	WARN_ON(!pmd_savedwrite(pmd_mk_savedwrite(pmd_clear_savedwrite(pmd))));
-	WARN_ON(pmd_savedwrite(pmd_clear_savedwrite(pmd_mk_savedwrite(pmd))));
-}
-
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
 static void __init pud_basic_tests(struct pgtable_debug_args *args, int idx)
 {
@@ -451,7 +423,6 @@ static void __init pmd_advanced_tests(struct pgtable_debug_args *args) { }
 static void __init pud_advanced_tests(struct pgtable_debug_args *args) { }
 static void __init pmd_leaf_tests(struct pgtable_debug_args *args) { }
 static void __init pud_leaf_tests(struct pgtable_debug_args *args) { }
-static void __init pmd_savedwrite_tests(struct pgtable_debug_args *args) { }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
@@ -1288,9 +1259,6 @@ static int __init debug_vm_pgtable(void)
 	pmd_leaf_tests(&args);
 	pud_leaf_tests(&args);
 
-	pte_savedwrite_tests(&args);
-	pmd_savedwrite_tests(&args);
-
 	pte_special_tests(&args);
 	pte_protnone_tests(&args);
 	pmd_protnone_tests(&args);
-- 
GitLab


From 07f8bac4982f98fc4b5ae05679d76fccc15079ea Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Tue, 8 Nov 2022 18:46:52 +0100
Subject: [PATCH 3020/4122] selftests/vm: anon_cow: add mprotect() optimization
 tests

Let's extend the test to cover the possible mprotect() optimization when
removing write-protection. mprotect() must not allow write-access to a
COW-shared page by accident.

Link: https://lkml.kernel.org/r/20221108174652.198904-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Nadav Amit <namit@vmware.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/anon_cow.c | 49 +++++++++++++++++++++++++--
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/anon_cow.c
index 705bd0b3db11c..bbb251eb50258 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/anon_cow.c
@@ -190,7 +190,8 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size,
 
 typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
 
-static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn)
+static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
+				  child_fn fn)
 {
 	struct comm_pipes comm_pipes;
 	char buf;
@@ -212,6 +213,22 @@ static void do_test_cow_in_parent(char *mem, size_t size, child_fn fn)
 
 	while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
 		;
+
+	if (do_mprotect) {
+		/*
+		 * mprotect() optimizations might try avoiding
+		 * write-faults by directly mapping pages writable.
+		 */
+		ret = mprotect(mem, size, PROT_READ);
+		ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
+		if (ret) {
+			ksft_test_result_fail("mprotect() failed\n");
+			write(comm_pipes.parent_ready[1], "0", 1);
+			wait(&ret);
+			goto close_comm_pipes;
+		}
+	}
+
 	/* Modify the page. */
 	memset(mem, 0xff, size);
 	write(comm_pipes.parent_ready[1], "0", 1);
@@ -229,12 +246,22 @@ close_comm_pipes:
 
 static void test_cow_in_parent(char *mem, size_t size)
 {
-	do_test_cow_in_parent(mem, size, child_memcmp_fn);
+	do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
+}
+
+static void test_cow_in_parent_mprotect(char *mem, size_t size)
+{
+	do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
 }
 
 static void test_vmsplice_in_child(char *mem, size_t size)
 {
-	do_test_cow_in_parent(mem, size, child_vmsplice_memcmp_fn);
+	do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
+}
+
+static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
+{
+	do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
 }
 
 static void do_test_vmsplice_in_parent(char *mem, size_t size,
@@ -969,6 +996,14 @@ static const struct test_case test_cases[] = {
 		"Basic COW after fork()",
 		test_cow_in_parent,
 	},
+	/*
+	 * Basic test, but do an additional mprotect(PROT_READ)+
+	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+	 */
+	{
+		"Basic COW after fork() with mprotect() optimization",
+		test_cow_in_parent_mprotect,
+	},
 	/*
 	 * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
 	 * we miss to break COW, the child observes modifications by the parent.
@@ -978,6 +1013,14 @@ static const struct test_case test_cases[] = {
 		"vmsplice() + unmap in child",
 		test_vmsplice_in_child
 	},
+	/*
+	 * vmsplice() test, but do an additional mprotect(PROT_READ)+
+	 * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+	 */
+	{
+		"vmsplice() + unmap in child with mprotect() optimization",
+		test_vmsplice_in_child_mprotect
+	},
 	/*
 	 * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
 	 * fork(); modify in the child. If we miss to break COW, the parent
-- 
GitLab


From 70fb4fdff5826a48886152fd5c5db04eb6c59a40 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 9 Nov 2022 12:30:48 -0800
Subject: [PATCH 3021/4122] mm: introduce 'encoded' page pointers with embedded
 extra bits

We already have this notion in parts of the MM code (see the mlock code
with the LRU_PAGE and NEW_PAGE bits), but I'm going to introduce a new
case, and I refuse to do the same thing we've done before where we just
put bits in the raw pointer and say it's still a normal pointer.

So this introduces a 'struct encoded_page' pointer that cannot be used for
anything else than to encode a real page pointer and a couple of extra
bits in the low bits.  That way the compiler can trivially track the state
of the pointer and you just explicitly encode and decode the extra bits.

Note that this makes the alignment of 'struct page' explicit even for the
case where CONFIG_HAVE_ALIGNED_STRUCT_PAGE is not set.  That is entirely
redundant in almost all cases, since the page structure already contains
several word-sized entries.

However, on m68k, the alignment of even 32-bit data is just 16 bits, and
as such in theory the alignment of 'struct page' could be too.  So let's
just make it very very explicit that the alignment needs to be at least 32
bits, giving us a guarantee of two unused low bits in the pointer.

Now, in practice, our page struct array is aligned much more than that
anyway, even on m68k, and our existing code in mm/mlock.c obviously
already depended on that.  But since the whole point of this change is to
be careful about the type system when hiding extra bits in the pointer,
let's also be explicit about the assumptions we make.

NOTE!  This is being very careful in another way too: it has a build-time
assertion that the 'flags' added to the page pointer actually fit in the
two bits.  That means that this helper must be inlined, and can only be
used in contexts where the compiler can statically determine that the
value fits in the available bits.

[akpm@linux-foundation.org: kerneldoc on a forward-declared struct confuses htmldocs]
Link: https://lore.kernel.org/all/Y2tKixpO4RO6DgW5@tuxmaker.boeblingen.de.ibm.com/
Link: https://lkml.kernel.org/r/20221109203051.1835763-1-torvalds@linux-foundation.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hugh Dickins <hughd@google.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com> [s390]
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 44a1a699b5add..6b0009e7d4ae5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -68,7 +68,7 @@ struct mem_cgroup;
 #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE
 #define _struct_page_alignment	__aligned(2 * sizeof(unsigned long))
 #else
-#define _struct_page_alignment
+#define _struct_page_alignment	__aligned(sizeof(unsigned long))
 #endif
 
 struct page {
@@ -251,6 +251,38 @@ struct page {
 #endif
 } _struct_page_alignment;
 
+/*
+ * struct encoded_page - a nonexistent type marking this pointer
+ *
+ * An 'encoded_page' pointer is a pointer to a regular 'struct page', but
+ * with the low bits of the pointer indicating extra context-dependent
+ * information. Not super-common, but happens in mmu_gather and mlock
+ * handling, and this acts as a type system check on that use.
+ *
+ * We only really have two guaranteed bits in general, although you could
+ * play with 'struct page' alignment (see CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
+ * for more.
+ *
+ * Use the supplied helper functions to endcode/decode the pointer and bits.
+ */
+struct encoded_page;
+#define ENCODE_PAGE_BITS 3ul
+static __always_inline struct encoded_page *encode_page(struct page *page, unsigned long flags)
+{
+	BUILD_BUG_ON(flags > ENCODE_PAGE_BITS);
+	return (struct encoded_page *)(flags | (unsigned long)page);
+}
+
+static inline unsigned long encoded_page_flags(struct encoded_page *page)
+{
+	return ENCODE_PAGE_BITS & (unsigned long)page;
+}
+
+static inline struct page *encoded_page_ptr(struct encoded_page *page)
+{
+	return (struct page *)(~ENCODE_PAGE_BITS & (unsigned long)page);
+}
+
 /**
  * struct folio - Represents a contiguous set of bytes.
  * @flags: Identical to the page flags.
-- 
GitLab


From 449c796768c9a1c738d1fa8671fb01663380b8a7 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 9 Nov 2022 12:30:49 -0800
Subject: [PATCH 3022/4122] mm: teach release_pages() to take an array of
 encoded page pointers too

release_pages() already could take either an array of page pointers, or an
array of folio pointers.  Expand it to also accept an array of encoded
page pointers, which is what both the existing mlock() use and the
upcoming mmu_gather use of encoded page pointers wants.

Note that release_pages() won't actually use, or react to, any extra
encoded bits.  Instead, this is very much a case of "I have walked the
array of encoded pages and done everything the extra bits tell me to do,
now release it all".

Also, while the "either page or folio pointers" dual use was handled with
a cast of the pointer in "release_folios()", this takes a slightly
different approach and uses the "transparent union" attribute to describe
the set of arguments to the function:

  https://gcc.gnu.org/onlinedocs/gcc/Common-Type-Attributes.html

which has been supported by gcc forever, but the kernel hasn't used
before.

That allows us to avoid using various wrappers with casts, and just use
the same function regardless of use.

Link: https://lkml.kernel.org/r/20221109203051.1835763-2-torvalds@linux-foundation.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 21 +++++++++++++++++++--
 mm/swap.c          | 16 ++++++++++++----
 2 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8597ef676fc3f..f873441303b71 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1245,7 +1245,24 @@ static inline void folio_put_refs(struct folio *folio, int refs)
 		__folio_put(folio);
 }
 
-void release_pages(struct page **pages, int nr);
+/**
+ * release_pages - release an array of pages or folios
+ *
+ * This just releases a simple array of multiple pages, and
+ * accepts various different forms of said page array: either
+ * a regular old boring array of pages, an array of folios, or
+ * an array of encoded page pointers.
+ *
+ * The transparent union syntax for this kind of "any of these
+ * argument types" is all kinds of ugly, so look away.
+ */
+typedef union {
+	struct page **pages;
+	struct folio **folios;
+	struct encoded_page **encoded_pages;
+} release_pages_arg __attribute__ ((__transparent_union__));
+
+void release_pages(release_pages_arg, int nr);
 
 /**
  * folios_put - Decrement the reference count on an array of folios.
@@ -1261,7 +1278,7 @@ void release_pages(struct page **pages, int nr);
  */
 static inline void folios_put(struct folio **folios, unsigned int nr)
 {
-	release_pages((struct page **)folios, nr);
+	release_pages(folios, nr);
 }
 
 static inline void put_page(struct page *page)
diff --git a/mm/swap.c b/mm/swap.c
index b9a6817e07ffe..70e2063ef43ad 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -981,22 +981,30 @@ void lru_cache_disable(void)
 
 /**
  * release_pages - batched put_page()
- * @pages: array of pages to release
+ * @arg: array of pages to release
  * @nr: number of pages
  *
- * Decrement the reference count on all the pages in @pages.  If it
+ * Decrement the reference count on all the pages in @arg.  If it
  * fell to zero, remove the page from the LRU and free it.
+ *
+ * Note that the argument can be an array of pages, encoded pages,
+ * or folio pointers. We ignore any encoded bits, and turn any of
+ * them into just a folio that gets free'd.
  */
-void release_pages(struct page **pages, int nr)
+void release_pages(release_pages_arg arg, int nr)
 {
 	int i;
+	struct encoded_page **encoded = arg.encoded_pages;
 	LIST_HEAD(pages_to_free);
 	struct lruvec *lruvec = NULL;
 	unsigned long flags = 0;
 	unsigned int lock_batch;
 
 	for (i = 0; i < nr; i++) {
-		struct folio *folio = page_folio(pages[i]);
+		struct folio *folio;
+
+		/* Turn any of the argument types into a folio */
+		folio = page_folio(encoded_page_ptr(encoded[i]));
 
 		/*
 		 * Make sure the IRQ-safe lock-holding time does not get
-- 
GitLab


From 7cc8f9c7146a5c2dad6e71653c4f69972e73df6b Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 9 Nov 2022 12:30:50 -0800
Subject: [PATCH 3023/4122] mm: mmu_gather: prepare to gather encoded page
 pointers with flags

This is purely a preparatory patch that makes all the data structures
ready for encoding flags with the mmu_gather page pointers.

The code currently always sets the flag to zero and doesn't use it yet,
but now it's tracking the type state along.  The next step will be to
actually start using it.

Link: https://lkml.kernel.org/r/20221109203051.1835763-3-torvalds@linux-foundation.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/include/asm/tlb.h |  8 +++++---
 include/asm-generic/tlb.h   |  9 +++++----
 include/linux/swap.h        |  2 +-
 mm/mmu_gather.c             |  8 ++++----
 mm/swap_state.c             | 11 ++++-------
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 3a5c8fb590e55..05142226d65dc 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -25,7 +25,8 @@
 void __tlb_remove_table(void *_table);
 static inline void tlb_flush(struct mmu_gather *tlb);
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size);
+					  struct encoded_page *page,
+					  int page_size);
 
 #define tlb_flush tlb_flush
 #define pte_free_tlb pte_free_tlb
@@ -42,9 +43,10 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
  * has already been freed, so just do free_page_and_swap_cache.
  */
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
-					  struct page *page, int page_size)
+					  struct encoded_page *page,
+					  int page_size)
 {
-	free_page_and_swap_cache(page);
+	free_page_and_swap_cache(encoded_page_ptr(page));
 	return false;
 }
 
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index cab7cfebf40bd..54d03d1e712eb 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -246,7 +246,7 @@ struct mmu_gather_batch {
 	struct mmu_gather_batch	*next;
 	unsigned int		nr;
 	unsigned int		max;
-	struct page		*pages[];
+	struct encoded_page	*encoded_pages[];
 };
 
 #define MAX_GATHER_BATCH	\
@@ -260,7 +260,8 @@ struct mmu_gather_batch {
  */
 #define MAX_GATHER_BATCH_COUNT	(10000UL/MAX_GATHER_BATCH)
 
-extern bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page,
+extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
+				   struct encoded_page *page,
 				   int page_size);
 #endif
 
@@ -435,13 +436,13 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 					struct page *page, int page_size)
 {
-	if (__tlb_remove_page_size(tlb, page, page_size))
+	if (__tlb_remove_page_size(tlb, encode_page(page, 0), page_size))
 		tlb_flush_mmu(tlb);
 }
 
 static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
 {
-	return __tlb_remove_page_size(tlb, page, PAGE_SIZE);
+	return __tlb_remove_page_size(tlb, encode_page(page, 0), PAGE_SIZE);
 }
 
 /* tlb_remove_page
diff --git a/include/linux/swap.h b/include/linux/swap.h
index fec6647a289af..b61e2007d1568 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -463,7 +463,7 @@ static inline unsigned long total_swapcache_pages(void)
 
 extern void free_swap_cache(struct page *page);
 extern void free_page_and_swap_cache(struct page *);
-extern void free_pages_and_swap_cache(struct page **, int);
+extern void free_pages_and_swap_cache(struct encoded_page **, int);
 /* linux/mm/swapfile.c */
 extern atomic_long_t nr_swap_pages;
 extern long total_swap_pages;
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 3a2c3f8cad2fe..382581c4a9f68 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -48,7 +48,7 @@ static void tlb_batch_pages_flush(struct mmu_gather *tlb)
 	struct mmu_gather_batch *batch;
 
 	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
-		struct page **pages = batch->pages;
+		struct encoded_page **pages = batch->encoded_pages;
 
 		do {
 			/*
@@ -77,7 +77,7 @@ static void tlb_batch_list_free(struct mmu_gather *tlb)
 	tlb->local.next = NULL;
 }
 
-bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
+bool __tlb_remove_page_size(struct mmu_gather *tlb, struct encoded_page *page, int page_size)
 {
 	struct mmu_gather_batch *batch;
 
@@ -92,13 +92,13 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
 	 * Add the page and check if we are full. If so
 	 * force a flush.
 	 */
-	batch->pages[batch->nr++] = page;
+	batch->encoded_pages[batch->nr++] = page;
 	if (batch->nr == batch->max) {
 		if (!tlb_next_batch(tlb))
 			return true;
 		batch = tlb->active;
 	}
-	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
+	VM_BUG_ON_PAGE(batch->nr > batch->max, encoded_page_ptr(page));
 
 	return false;
 }
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 40fe6f23e1057..2927507b43d81 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -303,15 +303,12 @@ void free_page_and_swap_cache(struct page *page)
  * Passed an array of pages, drop them all from swapcache and then release
  * them.  They are removed from the LRU and freed if this is their last use.
  */
-void free_pages_and_swap_cache(struct page **pages, int nr)
+void free_pages_and_swap_cache(struct encoded_page **pages, int nr)
 {
-	struct page **pagep = pages;
-	int i;
-
 	lru_add_drain();
-	for (i = 0; i < nr; i++)
-		free_swap_cache(pagep[i]);
-	release_pages(pagep, nr);
+	for (int i = 0; i < nr; i++)
+		free_swap_cache(encoded_page_ptr(pages[i]));
+	release_pages(pages, nr);
 }
 
 static inline bool swap_use_vma_readahead(void)
-- 
GitLab


From 5df397dec7c4c08c23bd14f162f1228836faa4ce Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Wed, 9 Nov 2022 12:30:51 -0800
Subject: [PATCH 3024/4122] mm: delay page_remove_rmap() until after the TLB
 has been flushed

When we remove a page table entry, we are very careful to only free the
page after we have flushed the TLB, because other CPUs could still be
using the page through stale TLB entries until after the flush.

However, we have removed the rmap entry for that page early, which means
that functions like folio_mkclean() would end up not serializing with the
page table lock because the page had already been made invisible to rmap.

And that is a problem, because while the TLB entry exists, we could end up
with the following situation:

 (a) one CPU could come in and clean it, never seeing our mapping of the
     page

 (b) another CPU could continue to use the stale and dirty TLB entry and
     continue to write to said page

resulting in a page that has been dirtied, but then marked clean again,
all while another CPU might have dirtied it some more.

End result: possibly lost dirty data.

This extends our current TLB gather infrastructure to optionally track a
"should I do a delayed page_remove_rmap() for this page after flushing the
TLB".  It uses the newly introduced 'encoded page pointer' to do that
without having to keep separate data around.

Note, this is complicated by a couple of issues:

 - we want to delay the rmap removal, but not past the page table lock,
   because that simplifies the memcg accounting

 - only SMP configurations want to delay TLB flushing, since on UP
   there are obviously no remote TLBs to worry about, and the page
   table lock means there are no preemption issues either

 - s390 has its own mmu_gather model that doesn't delay TLB flushing,
   and as a result also does not want the delayed rmap. As such, we can
   treat S390 like the UP case and use a common fallback for the "no
   delays" case.

 - we can track an enormous number of pages in our mmu_gather structure,
   with MAX_GATHER_BATCH_COUNT batches of MAX_TABLE_BATCH pages each,
   all set up to be approximately 10k pending pages.

   We do not want to have a huge number of batched pages that we then
   need to check for delayed rmap handling inside the page table lock.

Particularly that last point results in a noteworthy detail, where the
normal page batch gathering is limited once we have delayed rmaps pending,
in such a way that only the last batch (the so-called "active batch") in
the mmu_gather structure can have any delayed entries.

NOTE!  While the "possibly lost dirty data" sounds catastrophic, for this
all to happen you need to have a user thread doing either madvise() with
MADV_DONTNEED or a full re-mmap() of the area concurrently with another
thread continuing to use said mapping.

So arguably this is about user space doing crazy things, but from a VM
consistency standpoint it's better if we track the dirty bit properly even
when user space goes off the rails.

[akpm@linux-foundation.org: fix UP build, per Linus]
Link: https://lore.kernel.org/all/B88D3073-440A-41C7-95F4-895D3F657EF2@gmail.com/
Link: https://lkml.kernel.org/r/20221109203051.1835763-4-torvalds@linux-foundation.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Hugh Dickins <hughd@google.com>
Reported-by: Nadav Amit <nadav.amit@gmail.com>
Tested-by: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/include/asm/tlb.h |  3 +++
 include/asm-generic/tlb.h   | 31 +++++++++++++++++++++++++++++--
 mm/memory.c                 | 23 +++++++++++++++++------
 mm/mmu_gather.c             | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 82 insertions(+), 8 deletions(-)

diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h
index 05142226d65dc..b91f4a9b044cd 100644
--- a/arch/s390/include/asm/tlb.h
+++ b/arch/s390/include/asm/tlb.h
@@ -41,6 +41,9 @@ static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
  * Release the page cache reference for a pte removed by
  * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page
  * has already been freed, so just do free_page_and_swap_cache.
+ *
+ * s390 doesn't delay rmap removal, so there is nothing encoded in
+ * the page pointer.
  */
 static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
 					  struct encoded_page *page,
diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h
index 54d03d1e712eb..b46617207c939 100644
--- a/include/asm-generic/tlb.h
+++ b/include/asm-generic/tlb.h
@@ -263,6 +263,28 @@ struct mmu_gather_batch {
 extern bool __tlb_remove_page_size(struct mmu_gather *tlb,
 				   struct encoded_page *page,
 				   int page_size);
+
+#ifdef CONFIG_SMP
+/*
+ * This both sets 'delayed_rmap', and returns true. It would be an inline
+ * function, except we define it before the 'struct mmu_gather'.
+ */
+#define tlb_delay_rmap(tlb) (((tlb)->delayed_rmap = 1), true)
+extern void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma);
+#endif
+
+#endif
+
+/*
+ * We have a no-op version of the rmap removal that doesn't
+ * delay anything. That is used on S390, which flushes remote
+ * TLBs synchronously, and on UP, which doesn't have any
+ * remote TLBs to flush and is not preemptible due to this
+ * all happening under the page table lock.
+ */
+#ifndef tlb_delay_rmap
+#define tlb_delay_rmap(tlb) (false)
+static inline void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
 #endif
 
 /*
@@ -295,6 +317,11 @@ struct mmu_gather {
 	 */
 	unsigned int		freed_tables : 1;
 
+	/*
+	 * Do we have pending delayed rmap removals?
+	 */
+	unsigned int		delayed_rmap : 1;
+
 	/*
 	 * at which levels have we cleared entries?
 	 */
@@ -440,9 +467,9 @@ static inline void tlb_remove_page_size(struct mmu_gather *tlb,
 		tlb_flush_mmu(tlb);
 }
 
-static inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page)
+static __always_inline bool __tlb_remove_page(struct mmu_gather *tlb, struct page *page, unsigned int flags)
 {
-	return __tlb_remove_page_size(tlb, encode_page(page, 0), PAGE_SIZE);
+	return __tlb_remove_page_size(tlb, encode_page(page, flags), PAGE_SIZE);
 }
 
 /* tlb_remove_page
diff --git a/mm/memory.c b/mm/memory.c
index 1749c638734f9..6c85cba021132 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1374,6 +1374,8 @@ again:
 			break;
 
 		if (pte_present(ptent)) {
+			unsigned int delay_rmap;
+
 			page = vm_normal_page(vma, addr, ptent);
 			if (unlikely(!should_zap_page(details, page)))
 				continue;
@@ -1385,20 +1387,26 @@ again:
 			if (unlikely(!page))
 				continue;
 
+			delay_rmap = 0;
 			if (!PageAnon(page)) {
 				if (pte_dirty(ptent)) {
-					force_flush = 1;
 					set_page_dirty(page);
+					if (tlb_delay_rmap(tlb)) {
+						delay_rmap = 1;
+						force_flush = 1;
+					}
 				}
 				if (pte_young(ptent) &&
 				    likely(!(vma->vm_flags & VM_SEQ_READ)))
 					mark_page_accessed(page);
 			}
 			rss[mm_counter(page)]--;
-			page_remove_rmap(page, vma, false);
-			if (unlikely(page_mapcount(page) < 0))
-				print_bad_pte(vma, addr, ptent, page);
-			if (unlikely(__tlb_remove_page(tlb, page))) {
+			if (!delay_rmap) {
+				page_remove_rmap(page, vma, false);
+				if (unlikely(page_mapcount(page) < 0))
+					print_bad_pte(vma, addr, ptent, page);
+			}
+			if (unlikely(__tlb_remove_page(tlb, page, delay_rmap))) {
 				force_flush = 1;
 				addr += PAGE_SIZE;
 				break;
@@ -1455,8 +1463,11 @@ again:
 	arch_leave_lazy_mmu_mode();
 
 	/* Do the actual TLB flush before dropping ptl */
-	if (force_flush)
+	if (force_flush) {
 		tlb_flush_mmu_tlbonly(tlb);
+		if (tlb->delayed_rmap)
+			tlb_flush_rmaps(tlb, vma);
+	}
 	pte_unmap_unlock(start_pte, ptl);
 
 	/*
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 382581c4a9f68..1de1cf9ba581d 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <linux/smp.h>
 #include <linux/swap.h>
+#include <linux/rmap.h>
 
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
@@ -19,6 +20,10 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
 
+	/* No more batching if we have delayed rmaps pending */
+	if (tlb->delayed_rmap)
+		return false;
+
 	batch = tlb->active;
 	if (batch->next) {
 		tlb->active = batch->next;
@@ -43,6 +48,33 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 	return true;
 }
 
+#ifdef CONFIG_SMP
+/**
+ * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
+ * @tlb: the current mmu_gather
+ *
+ * Note that because of how tlb_next_batch() above works, we will
+ * never start new batches with pending delayed rmaps, so we only
+ * need to walk through the current active batch.
+ */
+void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
+{
+	struct mmu_gather_batch *batch;
+
+	batch = tlb->active;
+	for (int i = 0; i < batch->nr; i++) {
+		struct encoded_page *enc = batch->encoded_pages[i];
+
+		if (encoded_page_flags(enc)) {
+			struct page *page = encoded_page_ptr(enc);
+			page_remove_rmap(page, vma, false);
+		}
+	}
+
+	tlb->delayed_rmap = 0;
+}
+#endif
+
 static void tlb_batch_pages_flush(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
@@ -284,6 +316,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 	tlb->active     = &tlb->local;
 	tlb->batch_count = 0;
 #endif
+	tlb->delayed_rmap = 0;
 
 	tlb_table_init(tlb);
 #ifdef CONFIG_MMU_GATHER_PAGE_SIZE
-- 
GitLab


From f036c8184f8b6750fa642485fb01eb6ff036a86b Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Wed, 16 Nov 2022 08:49:30 +0100
Subject: [PATCH 3025/4122] mm: mmu_gather: do not expose delayed_rmap flag

Flag delayed_rmap of 'struct mmu_gather' is rather a private member, but
it is still accessed directly.  Instead, let the TLB gather code access
the flag.

Link: https://lkml.kernel.org/r/Y3SWCu6NRaMQ5dbD@li-4a3a4a4c-28e5-11b2-a85c-a8d192c6f089.ibm.com
Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory.c     | 3 +--
 mm/mmu_gather.c | 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 6c85cba021132..086cb3dd86081 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1465,8 +1465,7 @@ again:
 	/* Do the actual TLB flush before dropping ptl */
 	if (force_flush) {
 		tlb_flush_mmu_tlbonly(tlb);
-		if (tlb->delayed_rmap)
-			tlb_flush_rmaps(tlb, vma);
+		tlb_flush_rmaps(tlb, vma);
 	}
 	pte_unmap_unlock(start_pte, ptl);
 
diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 1de1cf9ba581d..dd1f8ca40cb53 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -61,6 +61,9 @@ void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
 	struct mmu_gather_batch *batch;
 
+	if (!tlb->delayed_rmap)
+		return;
+
 	batch = tlb->active;
 	for (int i = 0; i < batch->nr; i++) {
 		struct encoded_page *enc = batch->encoded_pages[i];
-- 
GitLab


From 7ac07a26dea79c3892436bce41cce03dcbd3c4c7 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:35 +0900
Subject: [PATCH 3026/4122] zram: preparation for multi-zcomp support

Patch series "zram: Support multiple compression streams", v5.

This series adds support for multiple compression streams.  The main idea
is that different compression algorithms have different characteristics
and zram may benefit when it uses a combination of algorithms: a default
algorithm that is faster but have lower compression rate and a secondary
algorithm that can use higher compression rate at a price of slower
compression/decompression.

There are several use-case for this functionality:

- huge pages re-compression: zstd or deflate can successfully compress
  huge pages (~50% of huge pages on my synthetic ChromeOS tests), IOW
  pages that lzo was not able to compress.

- idle pages re-compression: idle/cold pages sit in the memory and we
  may reduce zsmalloc memory usage if we recompress those idle pages.

Userspace has a number of ways to control the behavior and impact of zram
recompression: what type of pages should be recompressed, size watermarks,
etc.  Please refer to documentation patch.


This patch (of 13):

The patch turns compression streams and compressor algorithm name struct
zram members into arrays, so that we can have multiple compression streams
support (in the next patches).

The patch uses a rather explicit API for compressor selection:

- Get primary (default) compression stream
	zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP])
- Get secondary compression stream
	zcomp_stream_get(zram->comps[ZRAM_SECONDARY_COMP])

We use similar API for compression streams put().

At this point we always have just one compression stream,
since CONFIG_ZRAM_MULTI_COMP is not yet defined.

Link: https://lkml.kernel.org/r/20221109115047.2921851-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20221109115047.2921851-2-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zcomp.c    |  6 +--
 drivers/block/zram/zcomp.h    |  2 +-
 drivers/block/zram/zram_drv.c | 90 +++++++++++++++++++++++++----------
 drivers/block/zram/zram_drv.h | 14 +++++-
 4 files changed, 80 insertions(+), 32 deletions(-)

diff --git a/drivers/block/zram/zcomp.c b/drivers/block/zram/zcomp.c
index 0916de952e091..55af4efd79835 100644
--- a/drivers/block/zram/zcomp.c
+++ b/drivers/block/zram/zcomp.c
@@ -206,7 +206,7 @@ void zcomp_destroy(struct zcomp *comp)
  * case of allocation error, or any other error potentially
  * returned by zcomp_init().
  */
-struct zcomp *zcomp_create(const char *compress)
+struct zcomp *zcomp_create(const char *alg)
 {
 	struct zcomp *comp;
 	int error;
@@ -216,14 +216,14 @@ struct zcomp *zcomp_create(const char *compress)
 	 * is not loaded yet. We must do it here, otherwise we are about to
 	 * call /sbin/modprobe under CPU hot-plug lock.
 	 */
-	if (!zcomp_available_algorithm(compress))
+	if (!zcomp_available_algorithm(alg))
 		return ERR_PTR(-EINVAL);
 
 	comp = kzalloc(sizeof(struct zcomp), GFP_KERNEL);
 	if (!comp)
 		return ERR_PTR(-ENOMEM);
 
-	comp->name = compress;
+	comp->name = alg;
 	error = zcomp_init(comp);
 	if (error) {
 		kfree(comp);
diff --git a/drivers/block/zram/zcomp.h b/drivers/block/zram/zcomp.h
index 40f6420f4b2e9..cdefdef93da8c 100644
--- a/drivers/block/zram/zcomp.h
+++ b/drivers/block/zram/zcomp.h
@@ -27,7 +27,7 @@ int zcomp_cpu_dead(unsigned int cpu, struct hlist_node *node);
 ssize_t zcomp_available_show(const char *comp, char *buf);
 bool zcomp_available_algorithm(const char *comp);
 
-struct zcomp *zcomp_create(const char *comp);
+struct zcomp *zcomp_create(const char *alg);
 void zcomp_destroy(struct zcomp *comp);
 
 struct zcomp_strm *zcomp_stream_get(struct zcomp *comp);
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 87711ddf4b54b..fbe46c6177fd7 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1004,36 +1004,53 @@ static ssize_t comp_algorithm_show(struct device *dev,
 	struct zram *zram = dev_to_zram(dev);
 
 	down_read(&zram->init_lock);
-	sz = zcomp_available_show(zram->compressor, buf);
+	sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf);
 	up_read(&zram->init_lock);
 
 	return sz;
 }
 
+static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
+{
+	/* Do not kfree() algs that we didn't allocate, IOW the default ones */
+	if (zram->comp_algs[prio] != default_compressor)
+		kfree(zram->comp_algs[prio]);
+	zram->comp_algs[prio] = alg;
+}
+
 static ssize_t comp_algorithm_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
 	struct zram *zram = dev_to_zram(dev);
-	char compressor[ARRAY_SIZE(zram->compressor)];
+	char *compressor;
 	size_t sz;
 
-	strscpy(compressor, buf, sizeof(compressor));
+	sz = strlen(buf);
+	if (sz >= CRYPTO_MAX_ALG_NAME)
+		return -E2BIG;
+
+	compressor = kstrdup(buf, GFP_KERNEL);
+	if (!compressor)
+		return -ENOMEM;
+
 	/* ignore trailing newline */
-	sz = strlen(compressor);
 	if (sz > 0 && compressor[sz - 1] == '\n')
 		compressor[sz - 1] = 0x00;
 
-	if (!zcomp_available_algorithm(compressor))
+	if (!zcomp_available_algorithm(compressor)) {
+		kfree(compressor);
 		return -EINVAL;
+	}
 
 	down_write(&zram->init_lock);
 	if (init_done(zram)) {
 		up_write(&zram->init_lock);
+		kfree(compressor);
 		pr_info("Can't change algorithm for initialized device\n");
 		return -EBUSY;
 	}
 
-	strcpy(zram->compressor, compressor);
+	comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, compressor);
 	up_write(&zram->init_lock);
 	return len;
 }
@@ -1281,7 +1298,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 	size = zram_get_obj_size(zram, index);
 
 	if (size != PAGE_SIZE)
-		zstrm = zcomp_stream_get(zram->comp);
+		zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
 
 	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
 	if (size == PAGE_SIZE) {
@@ -1293,7 +1310,7 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 		dst = kmap_atomic(page);
 		ret = zcomp_decompress(zstrm, src, size, dst);
 		kunmap_atomic(dst);
-		zcomp_stream_put(zram->comp);
+		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 	}
 	zs_unmap_object(zram->mem_pool, handle);
 	zram_slot_unlock(zram, index);
@@ -1360,13 +1377,13 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
 	kunmap_atomic(mem);
 
 compress_again:
-	zstrm = zcomp_stream_get(zram->comp);
+	zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
 	src = kmap_atomic(page);
 	ret = zcomp_compress(zstrm, src, &comp_len);
 	kunmap_atomic(src);
 
 	if (unlikely(ret)) {
-		zcomp_stream_put(zram->comp);
+		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 		pr_err("Compression failed! err=%d\n", ret);
 		zs_free(zram->mem_pool, handle);
 		return ret;
@@ -1394,7 +1411,7 @@ compress_again:
 				__GFP_HIGHMEM |
 				__GFP_MOVABLE);
 	if (IS_ERR((void *)handle)) {
-		zcomp_stream_put(zram->comp);
+		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 		atomic64_inc(&zram->stats.writestall);
 		handle = zs_malloc(zram->mem_pool, comp_len,
 				GFP_NOIO | __GFP_HIGHMEM |
@@ -1411,14 +1428,14 @@ compress_again:
 		 * zstrm buffer back. It is necessary that the dereferencing
 		 * of the zstrm variable below occurs correctly.
 		 */
-		zstrm = zcomp_stream_get(zram->comp);
+		zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
 	}
 
 	alloced_pages = zs_get_total_pages(zram->mem_pool);
 	update_used_max(zram, alloced_pages);
 
 	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
-		zcomp_stream_put(zram->comp);
+		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 		zs_free(zram->mem_pool, handle);
 		return -ENOMEM;
 	}
@@ -1432,7 +1449,7 @@ compress_again:
 	if (comp_len == PAGE_SIZE)
 		kunmap_atomic(src);
 
-	zcomp_stream_put(zram->comp);
+	zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 	zs_unmap_object(zram->mem_pool, handle);
 	atomic64_add(comp_len, &zram->stats.compr_data_size);
 out:
@@ -1707,6 +1724,20 @@ out:
 	return ret;
 }
 
+static void zram_destroy_comps(struct zram *zram)
+{
+	u32 prio;
+
+	for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+		struct zcomp *comp = zram->comps[prio];
+
+		zram->comps[prio] = NULL;
+		if (!comp)
+			continue;
+		zcomp_destroy(comp);
+	}
+}
+
 static void zram_reset_device(struct zram *zram)
 {
 	down_write(&zram->init_lock);
@@ -1724,11 +1755,11 @@ static void zram_reset_device(struct zram *zram)
 	/* I/O operation under all of CPU are done so let's free */
 	zram_meta_free(zram, zram->disksize);
 	zram->disksize = 0;
+	zram_destroy_comps(zram);
 	memset(&zram->stats, 0, sizeof(zram->stats));
-	zcomp_destroy(zram->comp);
-	zram->comp = NULL;
 	reset_bdev(zram);
 
+	comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
 	up_write(&zram->init_lock);
 }
 
@@ -1739,6 +1770,7 @@ static ssize_t disksize_store(struct device *dev,
 	struct zcomp *comp;
 	struct zram *zram = dev_to_zram(dev);
 	int err;
+	u32 prio;
 
 	disksize = memparse(buf, NULL);
 	if (!disksize)
@@ -1757,22 +1789,28 @@ static ssize_t disksize_store(struct device *dev,
 		goto out_unlock;
 	}
 
-	comp = zcomp_create(zram->compressor);
-	if (IS_ERR(comp)) {
-		pr_err("Cannot initialise %s compressing backend\n",
-				zram->compressor);
-		err = PTR_ERR(comp);
-		goto out_free_meta;
-	}
+	for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+		if (!zram->comp_algs[prio])
+			continue;
+
+		comp = zcomp_create(zram->comp_algs[prio]);
+		if (IS_ERR(comp)) {
+			pr_err("Cannot initialise %s compressing backend\n",
+			       zram->comp_algs[prio]);
+			err = PTR_ERR(comp);
+			goto out_free_comps;
+		}
 
-	zram->comp = comp;
+		zram->comps[prio] = comp;
+	}
 	zram->disksize = disksize;
 	set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
 	up_write(&zram->init_lock);
 
 	return len;
 
-out_free_meta:
+out_free_comps:
+	zram_destroy_comps(zram);
 	zram_meta_free(zram, disksize);
 out_unlock:
 	up_write(&zram->init_lock);
@@ -1959,7 +1997,7 @@ static int zram_add(void)
 	if (ret)
 		goto out_cleanup_disk;
 
-	strscpy(zram->compressor, default_compressor, sizeof(zram->compressor));
+	zram->comp_algs[ZRAM_PRIMARY_COMP] = default_compressor;
 
 	zram_debugfs_register(zram);
 	pr_info("Added device: %s\n", zram->disk->disk_name);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index a2bda53020fdd..7a643c8c38eca 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -89,10 +89,20 @@ struct zram_stats {
 #endif
 };
 
+#ifdef CONFIG_ZRAM_MULTI_COMP
+#define ZRAM_PRIMARY_COMP	0U
+#define ZRAM_SECONDARY_COMP	1U
+#define ZRAM_MAX_COMPS	4U
+#else
+#define ZRAM_PRIMARY_COMP	0U
+#define ZRAM_SECONDARY_COMP	0U
+#define ZRAM_MAX_COMPS	1U
+#endif
+
 struct zram {
 	struct zram_table_entry *table;
 	struct zs_pool *mem_pool;
-	struct zcomp *comp;
+	struct zcomp *comps[ZRAM_MAX_COMPS];
 	struct gendisk *disk;
 	/* Prevent concurrent execution of device init */
 	struct rw_semaphore init_lock;
@@ -107,7 +117,7 @@ struct zram {
 	 * we can store in a disk.
 	 */
 	u64 disksize;	/* bytes */
-	char compressor[CRYPTO_MAX_ALG_NAME];
+	const char *comp_algs[ZRAM_MAX_COMPS];
 	/*
 	 * zram is claimed so open request will be failed
 	 */
-- 
GitLab


From 001d9273570115b2eb360d5452bbc46f6cc063a1 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:36 +0900
Subject: [PATCH 3027/4122] zram: add recompression algorithm sysfs knob

Introduce recomp_algorithm sysfs knob that controls secondary algorithm
selection used for recompression.

We will support up to 3 secondary compression algorithms which are sorted
in order of their priority.  To select an algorithm user has to provide
its name and priority:

  echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
  echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm

During recompression zram iterates through the list of registered
secondary algorithms in order of their priorities.

We also have a short version for cases when there is only
one secondary compression algorithm:

  echo "algo=zstd" > /sys/block/zramX/recomp_algorithm

This will register zstd as the secondary algorithm with priority 1.

Link: https://lkml.kernel.org/r/20221109115047.2921851-3-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 124 ++++++++++++++++++++++++++++------
 1 file changed, 105 insertions(+), 19 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index fbe46c6177fd7..8ea4908f5961f 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -997,31 +997,28 @@ static ssize_t max_comp_streams_store(struct device *dev,
 	return len;
 }
 
-static ssize_t comp_algorithm_show(struct device *dev,
-		struct device_attribute *attr, char *buf)
+static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
 {
-	size_t sz;
-	struct zram *zram = dev_to_zram(dev);
+	/* Do not free statically defined compression algorithms */
+	if (zram->comp_algs[prio] != default_compressor)
+		kfree(zram->comp_algs[prio]);
+
+	zram->comp_algs[prio] = alg;
+}
+
+static ssize_t __comp_algorithm_show(struct zram *zram, u32 prio, char *buf)
+{
+	ssize_t sz;
 
 	down_read(&zram->init_lock);
-	sz = zcomp_available_show(zram->comp_algs[ZRAM_PRIMARY_COMP], buf);
+	sz = zcomp_available_show(zram->comp_algs[prio], buf);
 	up_read(&zram->init_lock);
 
 	return sz;
 }
 
-static void comp_algorithm_set(struct zram *zram, u32 prio, const char *alg)
+static int __comp_algorithm_store(struct zram *zram, u32 prio, const char *buf)
 {
-	/* Do not kfree() algs that we didn't allocate, IOW the default ones */
-	if (zram->comp_algs[prio] != default_compressor)
-		kfree(zram->comp_algs[prio]);
-	zram->comp_algs[prio] = alg;
-}
-
-static ssize_t comp_algorithm_store(struct device *dev,
-		struct device_attribute *attr, const char *buf, size_t len)
-{
-	struct zram *zram = dev_to_zram(dev);
 	char *compressor;
 	size_t sz;
 
@@ -1050,11 +1047,94 @@ static ssize_t comp_algorithm_store(struct device *dev,
 		return -EBUSY;
 	}
 
-	comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, compressor);
+	comp_algorithm_set(zram, prio, compressor);
 	up_write(&zram->init_lock);
-	return len;
+	return 0;
+}
+
+static ssize_t comp_algorithm_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+
+	return __comp_algorithm_show(zram, ZRAM_PRIMARY_COMP, buf);
+}
+
+static ssize_t comp_algorithm_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf,
+				    size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	int ret;
+
+	ret = __comp_algorithm_store(zram, ZRAM_PRIMARY_COMP, buf);
+	return ret ? ret : len;
 }
 
+#ifdef CONFIG_ZRAM_MULTI_COMP
+static ssize_t recomp_algorithm_show(struct device *dev,
+				     struct device_attribute *attr,
+				     char *buf)
+{
+	struct zram *zram = dev_to_zram(dev);
+	ssize_t sz = 0;
+	u32 prio;
+
+	for (prio = ZRAM_SECONDARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
+		if (!zram->comp_algs[prio])
+			continue;
+
+		sz += scnprintf(buf + sz, PAGE_SIZE - sz - 2, "#%d: ", prio);
+		sz += __comp_algorithm_show(zram, prio, buf + sz);
+	}
+
+	return sz;
+}
+
+static ssize_t recomp_algorithm_store(struct device *dev,
+				      struct device_attribute *attr,
+				      const char *buf,
+				      size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	int prio = ZRAM_SECONDARY_COMP;
+	char *args, *param, *val;
+	char *alg = NULL;
+	int ret;
+
+	args = skip_spaces(buf);
+	while (*args) {
+		args = next_arg(args, &param, &val);
+
+		if (!*val)
+			return -EINVAL;
+
+		if (!strcmp(param, "algo")) {
+			alg = val;
+			continue;
+		}
+
+		if (!strcmp(param, "priority")) {
+			ret = kstrtoint(val, 10, &prio);
+			if (ret)
+				return ret;
+			continue;
+		}
+	}
+
+	if (!alg)
+		return -EINVAL;
+
+	if (prio < ZRAM_SECONDARY_COMP || prio >= ZRAM_MAX_COMPS)
+		return -EINVAL;
+
+	ret = __comp_algorithm_store(zram, prio, alg);
+	return ret ? ret : len;
+}
+#endif
+
 static ssize_t compact_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
 {
@@ -1895,6 +1975,9 @@ static DEVICE_ATTR_WO(writeback);
 static DEVICE_ATTR_RW(writeback_limit);
 static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
+#ifdef CONFIG_ZRAM_MULTI_COMP
+static DEVICE_ATTR_RW(recomp_algorithm);
+#endif
 
 static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_disksize.attr,
@@ -1918,6 +2001,9 @@ static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_bd_stat.attr,
 #endif
 	&dev_attr_debug_stat.attr,
+#ifdef CONFIG_ZRAM_MULTI_COMP
+	&dev_attr_recomp_algorithm.attr,
+#endif
 	NULL,
 };
 
@@ -1997,7 +2083,7 @@ static int zram_add(void)
 	if (ret)
 		goto out_cleanup_disk;
 
-	zram->comp_algs[ZRAM_PRIMARY_COMP] = default_compressor;
+	comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
 
 	zram_debugfs_register(zram);
 	pr_info("Added device: %s\n", zram->disk->disk_name);
-- 
GitLab


From 5561347aa598b6b12fb6069788ccec9b5e5ebec1 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:37 +0900
Subject: [PATCH 3028/4122] zram: factor out WB and non-WB zram read functions

We will use non-WB variant in ZRAM page recompression path.

Link: https://lkml.kernel.org/r/20221109115047.2921851-4-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 72 ++++++++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 23 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 8ea4908f5961f..135fb946f83e2 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1336,8 +1336,29 @@ out:
 		~(1UL << ZRAM_LOCK | 1UL << ZRAM_UNDER_WB));
 }
 
-static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
-				struct bio *bio, bool partial_io)
+/*
+ * Reads a page from the writeback devices. Corresponding ZRAM slot
+ * should be unlocked.
+ */
+static int zram_bvec_read_from_bdev(struct zram *zram, struct page *page,
+				    u32 index, struct bio *bio, bool partial_io)
+{
+	struct bio_vec bvec = {
+		.bv_page = page,
+		.bv_len = PAGE_SIZE,
+		.bv_offset = 0,
+	};
+
+	return read_from_bdev(zram, &bvec, zram_get_element(zram, index), bio,
+			      partial_io);
+}
+
+/*
+ * Reads (decompresses if needed) a page from zspool (zsmalloc).
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_read_from_zspool(struct zram *zram, struct page *page,
+				 u32 index)
 {
 	struct zcomp_strm *zstrm;
 	unsigned long handle;
@@ -1345,23 +1366,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 	void *src, *dst;
 	int ret;
 
-	zram_slot_lock(zram, index);
-	if (zram_test_flag(zram, index, ZRAM_WB)) {
-		struct bio_vec bvec;
-
-		zram_slot_unlock(zram, index);
-		/* A null bio means rw_page was used, we must fallback to bio */
-		if (!bio)
-			return -EOPNOTSUPP;
-
-		bvec.bv_page = page;
-		bvec.bv_len = PAGE_SIZE;
-		bvec.bv_offset = 0;
-		return read_from_bdev(zram, &bvec,
-				zram_get_element(zram, index),
-				bio, partial_io);
-	}
-
 	handle = zram_get_handle(zram, index);
 	if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
 		unsigned long value;
@@ -1371,7 +1375,6 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 		mem = kmap_atomic(page);
 		zram_fill_page(mem, PAGE_SIZE, value);
 		kunmap_atomic(mem);
-		zram_slot_unlock(zram, index);
 		return 0;
 	}
 
@@ -1393,17 +1396,40 @@ static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
 		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 	}
 	zs_unmap_object(zram->mem_pool, handle);
-	zram_slot_unlock(zram, index);
+	return ret;
+}
+
+static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
+			    struct bio *bio, bool partial_io)
+{
+	int ret;
+
+	zram_slot_lock(zram, index);
+	if (!zram_test_flag(zram, index, ZRAM_WB)) {
+		/* Slot should be locked through out the function call */
+		ret = zram_read_from_zspool(zram, page, index);
+		zram_slot_unlock(zram, index);
+	} else {
+		/* Slot should be unlocked before the function call */
+		zram_slot_unlock(zram, index);
+
+		/* A null bio means rw_page was used, we must fallback to bio */
+		if (!bio)
+			return -EOPNOTSUPP;
+
+		ret = zram_bvec_read_from_bdev(zram, page, index, bio,
+					       partial_io);
+	}
 
 	/* Should NEVER happen. Return bio error if it does. */
-	if (WARN_ON(ret))
+	if (WARN_ON(ret < 0))
 		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
 
 	return ret;
 }
 
 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
-				u32 index, int offset, struct bio *bio)
+			  u32 index, int offset, struct bio *bio)
 {
 	int ret;
 	struct page *page;
-- 
GitLab


From 84b33bf7888975d28c0e57011b75c445279c60ec Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:38 +0900
Subject: [PATCH 3029/4122] zram: introduce recompress sysfs knob

Allow zram to recompress (using secondary compression streams)
pages.

Re-compression algorithms (we support up to 3 at this stage)
are selected via recomp_algorithm:

  echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm

Please read documentation for more details.

We support several recompression modes:

1) IDLE pages recompression is activated by `idle` mode

  echo "type=idle" > /sys/block/zram0/recompress

2) Since there may be many idle pages user-space may pass a size
threshold value (in bytes) and we will recompress pages only
of equal or greater size:

  echo "threshold=888" > /sys/block/zram0/recompress

3) HUGE pages recompression is activated by `huge` mode

  echo "type=huge" > /sys/block/zram0/recompress

4) HUGE_IDLE pages recompression is activated by `huge_idle` mode

  echo "type=huge_idle" > /sys/block/zram0/recompress

[senozhatsky@chromium.org: we should always zero out err variable in recompress loop[
  Link: https://lkml.kernel.org/r/20221110143423.3250790-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20221109115047.2921851-5-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/Kconfig    |   9 ++
 drivers/block/zram/zram_drv.c | 264 +++++++++++++++++++++++++++++++++-
 drivers/block/zram/zram_drv.h |   7 +
 3 files changed, 277 insertions(+), 3 deletions(-)

diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index d4100b0c083ec..0386b7da02aa3 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -78,3 +78,12 @@ config ZRAM_MEMORY_TRACKING
 	  /sys/kernel/debug/zram/zramX/block_state.
 
 	  See Documentation/admin-guide/blockdev/zram.rst for more information.
+
+config ZRAM_MULTI_COMP
+	bool "Enable multiple compression streams"
+	depends on ZRAM
+	help
+	  This will enable multi-compression streams, so that ZRAM can
+	  re-compress pages using a potentially slower but more effective
+	  compression algorithm. Note, that IDLE page recompression
+	  requires ZRAM_MEMORY_TRACKING.
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 135fb946f83e2..97300b3a83c30 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -155,6 +155,25 @@ static inline bool is_partial_io(struct bio_vec *bvec)
 }
 #endif
 
+static inline void zram_set_priority(struct zram *zram, u32 index, u32 prio)
+{
+	prio &= ZRAM_COMP_PRIORITY_MASK;
+	/*
+	 * Clear previous priority value first, in case if we recompress
+	 * further an already recompressed page
+	 */
+	zram->table[index].flags &= ~(ZRAM_COMP_PRIORITY_MASK <<
+				      ZRAM_COMP_PRIORITY_BIT1);
+	zram->table[index].flags |= (prio << ZRAM_COMP_PRIORITY_BIT1);
+}
+
+static inline u32 zram_get_priority(struct zram *zram, u32 index)
+{
+	u32 prio = zram->table[index].flags >> ZRAM_COMP_PRIORITY_BIT1;
+
+	return prio & ZRAM_COMP_PRIORITY_MASK;
+}
+
 /*
  * Check if request is within bounds and aligned on zram logical blocks.
  */
@@ -1304,6 +1323,11 @@ static void zram_free_page(struct zram *zram, size_t index)
 		atomic64_dec(&zram->stats.huge_pages);
 	}
 
+	if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+		zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+
+	zram_set_priority(zram, index, 0);
+
 	if (zram_test_flag(zram, index, ZRAM_WB)) {
 		zram_clear_flag(zram, index, ZRAM_WB);
 		free_block_bdev(zram, zram_get_element(zram, index));
@@ -1364,6 +1388,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
 	unsigned long handle;
 	unsigned int size;
 	void *src, *dst;
+	u32 prio;
 	int ret;
 
 	handle = zram_get_handle(zram, index);
@@ -1380,8 +1405,10 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
 
 	size = zram_get_obj_size(zram, index);
 
-	if (size != PAGE_SIZE)
-		zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]);
+	if (size != PAGE_SIZE) {
+		prio = zram_get_priority(zram, index);
+		zstrm = zcomp_stream_get(zram->comps[prio]);
+	}
 
 	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
 	if (size == PAGE_SIZE) {
@@ -1393,7 +1420,7 @@ static int zram_read_from_zspool(struct zram *zram, struct page *page,
 		dst = kmap_atomic(page);
 		ret = zcomp_decompress(zstrm, src, size, dst);
 		kunmap_atomic(dst);
-		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
+		zcomp_stream_put(zram->comps[prio]);
 	}
 	zs_unmap_object(zram->mem_pool, handle);
 	return ret;
@@ -1624,6 +1651,235 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_ZRAM_MULTI_COMP
+/*
+ * This function will decompress (unless it's ZRAM_HUGE) the page and then
+ * attempt to compress it using provided compression algorithm priority
+ * (which is potentially more effective).
+ *
+ * Corresponding ZRAM slot should be locked.
+ */
+static int zram_recompress(struct zram *zram, u32 index, struct page *page,
+			   u32 threshold, u32 prio, u32 prio_max)
+{
+	struct zcomp_strm *zstrm = NULL;
+	unsigned long handle_old;
+	unsigned long handle_new;
+	unsigned int comp_len_old;
+	unsigned int comp_len_new;
+	void *src, *dst;
+	int ret;
+
+	handle_old = zram_get_handle(zram, index);
+	if (!handle_old)
+		return -EINVAL;
+
+	comp_len_old = zram_get_obj_size(zram, index);
+	/*
+	 * Do not recompress objects that are already "small enough".
+	 */
+	if (comp_len_old < threshold)
+		return 0;
+
+	ret = zram_read_from_zspool(zram, page, index);
+	if (ret)
+		return ret;
+
+	/*
+	 * Iterate the secondary comp algorithms list (in order of priority)
+	 * and try to recompress the page.
+	 */
+	for (; prio < prio_max; prio++) {
+		if (!zram->comps[prio])
+			continue;
+
+		/*
+		 * Skip if the object is already re-compressed with a higher
+		 * priority algorithm (or same algorithm).
+		 */
+		if (prio <= zram_get_priority(zram, index))
+			continue;
+
+		zstrm = zcomp_stream_get(zram->comps[prio]);
+		src = kmap_atomic(page);
+		ret = zcomp_compress(zstrm, src, &comp_len_new);
+		kunmap_atomic(src);
+
+		if (ret) {
+			zcomp_stream_put(zram->comps[prio]);
+			return ret;
+		}
+
+		/* Continue until we make progress */
+		if (comp_len_new >= huge_class_size ||
+		    comp_len_new >= comp_len_old ||
+		    (threshold && comp_len_new >= threshold)) {
+			zcomp_stream_put(zram->comps[prio]);
+			continue;
+		}
+
+		/* Recompression was successful so break out */
+		break;
+	}
+
+	/*
+	 * We did not try to recompress, e.g. when we have only one
+	 * secondary algorithm and the page is already recompressed
+	 * using that algorithm
+	 */
+	if (!zstrm)
+		return 0;
+
+	/*
+	 * All secondary algorithms failed to re-compress the page in a way
+	 * that would save memory, mark the object as incompressible so that
+	 * we will not try to compress it again.
+	 */
+	if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) {
+		zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+		return 0;
+	}
+
+	/* Successful recompression but above threshold */
+	if (threshold && comp_len_new >= threshold)
+		return 0;
+
+	/*
+	 * No direct reclaim (slow path) for handle allocation and no
+	 * re-compression attempt (unlike in __zram_bvec_write()) since
+	 * we already have stored that object in zsmalloc. If we cannot
+	 * alloc memory for recompressed object then we bail out and
+	 * simply keep the old (existing) object in zsmalloc.
+	 */
+	handle_new = zs_malloc(zram->mem_pool, comp_len_new,
+			       __GFP_KSWAPD_RECLAIM |
+			       __GFP_NOWARN |
+			       __GFP_HIGHMEM |
+			       __GFP_MOVABLE);
+	if (IS_ERR_VALUE(handle_new)) {
+		zcomp_stream_put(zram->comps[prio]);
+		return PTR_ERR((void *)handle_new);
+	}
+
+	dst = zs_map_object(zram->mem_pool, handle_new, ZS_MM_WO);
+	memcpy(dst, zstrm->buffer, comp_len_new);
+	zcomp_stream_put(zram->comps[prio]);
+
+	zs_unmap_object(zram->mem_pool, handle_new);
+
+	zram_free_page(zram, index);
+	zram_set_handle(zram, index, handle_new);
+	zram_set_obj_size(zram, index, comp_len_new);
+	zram_set_priority(zram, index, prio);
+
+	atomic64_add(comp_len_new, &zram->stats.compr_data_size);
+	atomic64_inc(&zram->stats.pages_stored);
+
+	return 0;
+}
+
+#define RECOMPRESS_IDLE		(1 << 0)
+#define RECOMPRESS_HUGE		(1 << 1)
+
+static ssize_t recompress_store(struct device *dev,
+				struct device_attribute *attr,
+				const char *buf, size_t len)
+{
+	struct zram *zram = dev_to_zram(dev);
+	u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP;
+	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+	char *args, *param, *val;
+	unsigned long index;
+	struct page *page;
+	ssize_t ret;
+
+	args = skip_spaces(buf);
+	while (*args) {
+		args = next_arg(args, &param, &val);
+
+		if (!*val)
+			return -EINVAL;
+
+		if (!strcmp(param, "type")) {
+			if (!strcmp(val, "idle"))
+				mode = RECOMPRESS_IDLE;
+			if (!strcmp(val, "huge"))
+				mode = RECOMPRESS_HUGE;
+			if (!strcmp(val, "huge_idle"))
+				mode = RECOMPRESS_IDLE | RECOMPRESS_HUGE;
+			continue;
+		}
+
+		if (!strcmp(param, "threshold")) {
+			/*
+			 * We will re-compress only idle objects equal or
+			 * greater in size than watermark.
+			 */
+			ret = kstrtouint(val, 10, &threshold);
+			if (ret)
+				return ret;
+			continue;
+		}
+	}
+
+	if (threshold >= PAGE_SIZE)
+		return -EINVAL;
+
+	down_read(&zram->init_lock);
+	if (!init_done(zram)) {
+		ret = -EINVAL;
+		goto release_init_lock;
+	}
+
+	page = alloc_page(GFP_KERNEL);
+	if (!page) {
+		ret = -ENOMEM;
+		goto release_init_lock;
+	}
+
+	ret = len;
+	for (index = 0; index < nr_pages; index++) {
+		int err = 0;
+
+		zram_slot_lock(zram, index);
+
+		if (!zram_allocated(zram, index))
+			goto next;
+
+		if (mode & RECOMPRESS_IDLE &&
+		    !zram_test_flag(zram, index, ZRAM_IDLE))
+			goto next;
+
+		if (mode & RECOMPRESS_HUGE &&
+		    !zram_test_flag(zram, index, ZRAM_HUGE))
+			goto next;
+
+		if (zram_test_flag(zram, index, ZRAM_WB) ||
+		    zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
+		    zram_test_flag(zram, index, ZRAM_SAME) ||
+		    zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+			goto next;
+
+		err = zram_recompress(zram, index, page, threshold,
+				      prio, ZRAM_MAX_COMPS);
+next:
+		zram_slot_unlock(zram, index);
+		if (err) {
+			ret = err;
+			break;
+		}
+
+		cond_resched();
+	}
+
+	__free_page(page);
+
+release_init_lock:
+	up_read(&zram->init_lock);
+	return ret;
+}
+#endif
+
 /*
  * zram_bio_discard - handler on discard request
  * @index: physical block index in PAGE_SIZE units
@@ -2003,6 +2259,7 @@ static DEVICE_ATTR_RW(writeback_limit_enable);
 #endif
 #ifdef CONFIG_ZRAM_MULTI_COMP
 static DEVICE_ATTR_RW(recomp_algorithm);
+static DEVICE_ATTR_WO(recompress);
 #endif
 
 static struct attribute *zram_disk_attrs[] = {
@@ -2029,6 +2286,7 @@ static struct attribute *zram_disk_attrs[] = {
 	&dev_attr_debug_stat.attr,
 #ifdef CONFIG_ZRAM_MULTI_COMP
 	&dev_attr_recomp_algorithm.attr,
+	&dev_attr_recompress.attr,
 #endif
 	NULL,
 };
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 7a643c8c38eca..b80faae768354 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -40,6 +40,9 @@
  */
 #define ZRAM_FLAG_SHIFT (PAGE_SHIFT + 1)
 
+/* Only 2 bits are allowed for comp priority index */
+#define ZRAM_COMP_PRIORITY_MASK	0x3
+
 /* Flags for zram pages (table[page_no].flags) */
 enum zram_pageflags {
 	/* zram slot is locked */
@@ -49,6 +52,10 @@ enum zram_pageflags {
 	ZRAM_UNDER_WB,	/* page is under writeback */
 	ZRAM_HUGE,	/* Incompressible page */
 	ZRAM_IDLE,	/* not accessed page since last idle marking */
+	ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
+
+	ZRAM_COMP_PRIORITY_BIT1, /* First bit of comp priority index */
+	ZRAM_COMP_PRIORITY_BIT2, /* Second bit of comp priority index */
 
 	__NR_ZRAM_PAGEFLAGS,
 };
-- 
GitLab


From 60e9b39ebec56467c36c3da76eee28083196cdf1 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:39 +0900
Subject: [PATCH 3030/4122] zram: add recompress flag to read_block_state()

Add a new flag to zram block state that shows if the page was recompressed
(using alternative compression algorithm).

Link: https://lkml.kernel.org/r/20221109115047.2921851-6-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/blockdev/zram.rst | 9 ++++++---
 drivers/block/zram/zram_drv.c               | 5 +++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index c73b16930449e..177a142c3146d 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -411,9 +411,10 @@ pages of the process with*pagemap.
 If you enable the feature, you could see block state via
 /sys/kernel/debug/zram/zram0/block_state". The output is as follows::
 
-	  300    75.033841 .wh.
-	  301    63.806904 s...
-	  302    63.806919 ..hi
+	  300    75.033841 .wh..
+	  301    63.806904 s....
+	  302    63.806919 ..hi.
+	  303    62.801919 ....r
 
 First column
 	zram's block index.
@@ -430,6 +431,8 @@ Third column
 		huge page
 	i:
 		idle page
+	r:
+		recompressed page (secondary compression algorithm)
 
 First line of above example says 300th block is accessed at 75.033841sec
 and the block's state is huge so it is written back to the backing
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 97300b3a83c30..ddbfa70ef9a33 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -936,13 +936,14 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
 
 		ts = ktime_to_timespec64(zram->table[index].ac_time);
 		copied = snprintf(kbuf + written, count,
-			"%12zd %12lld.%06lu %c%c%c%c\n",
+			"%12zd %12lld.%06lu %c%c%c%c%c\n",
 			index, (s64)ts.tv_sec,
 			ts.tv_nsec / NSEC_PER_USEC,
 			zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 			zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
 			zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
-			zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.');
+			zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
+			zram_get_priority(zram, index) ? 'r' : '.');
 
 		if (count <= copied) {
 			zram_slot_unlock(zram, index);
-- 
GitLab


From 9fda785dbd14cfc7d874d00d2b007cb143aa48d0 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:40 +0900
Subject: [PATCH 3031/4122] zram: clarify writeback_store() comment

Re-phrase writeback BIO error comment.

Link: https://lkml.kernel.org/r/20221109115047.2921851-7-senozhatsky@chromium.org
Reported-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index ddbfa70ef9a33..0ca0bf330d8f9 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -769,8 +769,12 @@ static ssize_t writeback_store(struct device *dev,
 			zram_clear_flag(zram, index, ZRAM_IDLE);
 			zram_slot_unlock(zram, index);
 			/*
-			 * Return last IO error unless every IO were
-			 * not suceeded.
+			 * BIO errors are not fatal, we continue and simply
+			 * attempt to writeback the remaining objects (pages).
+			 * At the same time we need to signal user-space that
+			 * some writes (at least one, but also could be all of
+			 * them) were not successful and we do so by returning
+			 * the most recent BIO error.
 			 */
 			ret = err;
 			continue;
-- 
GitLab


From f24ee92cbe13242758635e654b2422dbf4912e4b Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:41 +0900
Subject: [PATCH 3032/4122] zram: use IS_ERR_VALUE() to check for zs_malloc()
 errors

Avoid typecasts that are needed for IS_ERR() and use IS_ERR_VALUE()
instead.

Link: https://lkml.kernel.org/r/20221109115047.2921851-8-senozhatsky@chromium.org
Suggested-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 0ca0bf330d8f9..473fc5eb71a13 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1542,19 +1542,19 @@ compress_again:
 	 * if we have a 'non-null' handle here then we are coming
 	 * from the slow path and handle has already been allocated.
 	 */
-	if (IS_ERR((void *)handle))
+	if (IS_ERR_VALUE(handle))
 		handle = zs_malloc(zram->mem_pool, comp_len,
 				__GFP_KSWAPD_RECLAIM |
 				__GFP_NOWARN |
 				__GFP_HIGHMEM |
 				__GFP_MOVABLE);
-	if (IS_ERR((void *)handle)) {
+	if (IS_ERR_VALUE(handle)) {
 		zcomp_stream_put(zram->comps[ZRAM_PRIMARY_COMP]);
 		atomic64_inc(&zram->stats.writestall);
 		handle = zs_malloc(zram->mem_pool, comp_len,
 				GFP_NOIO | __GFP_HIGHMEM |
 				__GFP_MOVABLE);
-		if (IS_ERR((void *)handle))
+		if (IS_ERR_VALUE(handle))
 			return PTR_ERR((void *)handle);
 
 		if (comp_len != PAGE_SIZE)
-- 
GitLab


From 7c2af309abd24ff4e313246bf9b68f398d95c871 Mon Sep 17 00:00:00 2001
From: Alexey Romanov <avromanov@sberdevices.ru>
Date: Wed, 9 Nov 2022 20:50:42 +0900
Subject: [PATCH 3033/4122] zram: add size class equals check into
 recompression

It makes no sense for us to recompress the object if it will be in the
same size class.  We anyway don't get any memory gain.  But, at the same
time, we get a CPU time overhead when inserting this object into zspage
and decompressing it afterwards.

[senozhatsky: rebased and fixed conflicts]
Link: https://lkml.kernel.org/r/20221109115047.2921851-9-senozhatsky@chromium.org
Signed-off-by: Alexey Romanov <avromanov@sberdevices.ru>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 11 ++++++++++-
 include/linux/zsmalloc.h      |  2 ++
 mm/zsmalloc.c                 | 21 +++++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 473fc5eb71a13..66659f16f6c8b 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1672,6 +1672,8 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	unsigned long handle_new;
 	unsigned int comp_len_old;
 	unsigned int comp_len_new;
+	unsigned int class_index_old;
+	unsigned int class_index_new;
 	void *src, *dst;
 	int ret;
 
@@ -1690,6 +1692,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	if (ret)
 		return ret;
 
+	class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
 	/*
 	 * Iterate the secondary comp algorithms list (in order of priority)
 	 * and try to recompress the page.
@@ -1715,9 +1718,13 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 			return ret;
 		}
 
+		class_index_new = zs_lookup_class_index(zram->mem_pool,
+							comp_len_new);
+
 		/* Continue until we make progress */
 		if (comp_len_new >= huge_class_size ||
 		    comp_len_new >= comp_len_old ||
+		    class_index_new >= class_index_old ||
 		    (threshold && comp_len_new >= threshold)) {
 			zcomp_stream_put(zram->comps[prio]);
 			continue;
@@ -1740,7 +1747,9 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	 * that would save memory, mark the object as incompressible so that
 	 * we will not try to compress it again.
 	 */
-	if (comp_len_new >= huge_class_size || comp_len_new >= comp_len_old) {
+	if (comp_len_new >= huge_class_size ||
+	    comp_len_new >= comp_len_old ||
+	    class_index_new >= class_index_old) {
 		zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
 		return 0;
 	}
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index 2a430e713ce51..a48cd0ffe57dd 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -55,5 +55,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 unsigned long zs_get_total_pages(struct zs_pool *pool);
 unsigned long zs_compact(struct zs_pool *pool);
 
+unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size);
+
 void zs_pool_stats(struct zs_pool *pool, struct zs_pool_stats *stats);
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b52b7bb88b522..78feda34ad9a8 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -1205,6 +1205,27 @@ static bool zspage_full(struct size_class *class, struct zspage *zspage)
 	return get_zspage_inuse(zspage) == class->objs_per_zspage;
 }
 
+/**
+ * zs_lookup_class_index() - Returns index of the zsmalloc &size_class
+ * that hold objects of the provided size.
+ * @pool: zsmalloc pool to use
+ * @size: object size
+ *
+ * Context: Any context.
+ *
+ * Return: the index of the zsmalloc &size_class that hold objects of the
+ * provided size.
+ */
+unsigned int zs_lookup_class_index(struct zs_pool *pool, unsigned int size)
+{
+	struct size_class *class;
+
+	class = pool->size_class[get_size_class_index(size)];
+
+	return class->index;
+}
+EXPORT_SYMBOL_GPL(zs_lookup_class_index);
+
 unsigned long zs_get_total_pages(struct zs_pool *pool)
 {
 	return atomic_long_read(&pool->pages_allocated);
-- 
GitLab


From 4942cf6ad07c487d24112ffbb27362f4e6b409b8 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:43 +0900
Subject: [PATCH 3034/4122] zram: remove redundant checks from
 zram_recompress()

Size class index comparison is powerful enough so we can remove object
size comparisons.

Link: https://lkml.kernel.org/r/20221109115047.2921851-10-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 66659f16f6c8b..72beb33366fb2 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1722,9 +1722,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 							comp_len_new);
 
 		/* Continue until we make progress */
-		if (comp_len_new >= huge_class_size ||
-		    comp_len_new >= comp_len_old ||
-		    class_index_new >= class_index_old ||
+		if (class_index_new >= class_index_old ||
 		    (threshold && comp_len_new >= threshold)) {
 			zcomp_stream_put(zram->comps[prio]);
 			continue;
@@ -1747,9 +1745,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	 * that would save memory, mark the object as incompressible so that
 	 * we will not try to compress it again.
 	 */
-	if (comp_len_new >= huge_class_size ||
-	    comp_len_new >= comp_len_old ||
-	    class_index_new >= class_index_old) {
+	if (class_index_new >= class_index_old) {
 		zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
 		return 0;
 	}
-- 
GitLab


From a55cf9648d3de486ccf0eca980a02f0faff8ec45 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:44 +0900
Subject: [PATCH 3035/4122] zram: add algo parameter support to
 zram_recompress()

Recompression iterates through all the registered secondary compression
algorithms in order of their priorities so that we have higher chances of
finding the algorithm that compresses a particular page.  This, however,
may not always be best approach and sometimes we may want to limit
recompression to only one particular algorithm.  For instance, when a
higher priority algorithm uses too much power and device has a relatively
low battery level we may want to limit recompression to use only a lower
priority algorithm, which uses less power.

Introduce algo= parameter support to recompression sysfs knob so that
user-sapce can request recompression with particular algorithm only:

  echo "type=idle algo=zstd" > /sys/block/zramX/recompress

Link: https://lkml.kernel.org/r/20221109115047.2921851-11-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 54 +++++++++++++++++++++++++++++------
 drivers/block/zram/zram_drv.h |  1 +
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 72beb33366fb2..798c421fdd364 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1674,6 +1674,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	unsigned int comp_len_new;
 	unsigned int class_index_old;
 	unsigned int class_index_new;
+	u32 num_recomps = 0;
 	void *src, *dst;
 	int ret;
 
@@ -1708,6 +1709,7 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 		if (prio <= zram_get_priority(zram, index))
 			continue;
 
+		num_recomps++;
 		zstrm = zcomp_stream_get(zram->comps[prio]);
 		src = kmap_atomic(page);
 		ret = zcomp_compress(zstrm, src, &comp_len_new);
@@ -1740,13 +1742,19 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
 	if (!zstrm)
 		return 0;
 
-	/*
-	 * All secondary algorithms failed to re-compress the page in a way
-	 * that would save memory, mark the object as incompressible so that
-	 * we will not try to compress it again.
-	 */
 	if (class_index_new >= class_index_old) {
-		zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+		/*
+		 * Secondary algorithms failed to re-compress the page
+		 * in a way that would save memory, mark the object as
+		 * incompressible so that we will not try to compress
+		 * it again.
+		 *
+		 * We need to make sure that all secondary algorithms have
+		 * failed, so we test if the number of recompressions matches
+		 * the number of active secondary algorithms.
+		 */
+		if (num_recomps == zram->num_active_comps - 1)
+			zram_set_flag(zram, index, ZRAM_INCOMPRESSIBLE);
 		return 0;
 	}
 
@@ -1795,10 +1803,11 @@ static ssize_t recompress_store(struct device *dev,
 				struct device_attribute *attr,
 				const char *buf, size_t len)
 {
+	u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
 	struct zram *zram = dev_to_zram(dev);
-	u32 mode = 0, threshold = 0, prio = ZRAM_SECONDARY_COMP;
 	unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
-	char *args, *param, *val;
+	char *args, *param, *val, *algo = NULL;
+	u32 mode = 0, threshold = 0;
 	unsigned long index;
 	struct page *page;
 	ssize_t ret;
@@ -1830,6 +1839,11 @@ static ssize_t recompress_store(struct device *dev,
 				return ret;
 			continue;
 		}
+
+		if (!strcmp(param, "algo")) {
+			algo = val;
+			continue;
+		}
 	}
 
 	if (threshold >= PAGE_SIZE)
@@ -1841,6 +1855,26 @@ static ssize_t recompress_store(struct device *dev,
 		goto release_init_lock;
 	}
 
+	if (algo) {
+		bool found = false;
+
+		for (; prio < ZRAM_MAX_COMPS; prio++) {
+			if (!zram->comp_algs[prio])
+				continue;
+
+			if (!strcmp(zram->comp_algs[prio], algo)) {
+				prio_max = min(prio + 1, ZRAM_MAX_COMPS);
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			ret = -EINVAL;
+			goto release_init_lock;
+		}
+	}
+
 	page = alloc_page(GFP_KERNEL);
 	if (!page) {
 		ret = -ENOMEM;
@@ -1871,7 +1905,7 @@ static ssize_t recompress_store(struct device *dev,
 			goto next;
 
 		err = zram_recompress(zram, index, page, threshold,
-				      prio, ZRAM_MAX_COMPS);
+				      prio, prio_max);
 next:
 		zram_slot_unlock(zram, index);
 		if (err) {
@@ -2107,6 +2141,7 @@ static void zram_destroy_comps(struct zram *zram)
 		if (!comp)
 			continue;
 		zcomp_destroy(comp);
+		zram->num_active_comps--;
 	}
 }
 
@@ -2174,6 +2209,7 @@ static ssize_t disksize_store(struct device *dev,
 		}
 
 		zram->comps[prio] = comp;
+		zram->num_active_comps++;
 	}
 	zram->disksize = disksize;
 	set_capacity_and_notify(zram->disk, zram->disksize >> SECTOR_SHIFT);
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index b80faae768354..473325415a74c 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -125,6 +125,7 @@ struct zram {
 	 */
 	u64 disksize;	/* bytes */
 	const char *comp_algs[ZRAM_MAX_COMPS];
+	s8 num_active_comps;
 	/*
 	 * zram is claimed so open request will be failed
 	 */
-- 
GitLab


From 443dd798062c1549e790539e572cbda4b7a8df30 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:45 +0900
Subject: [PATCH 3036/4122] documentation: add zram recompression documentation

Document user-space visible device attributes that are enabled by
ZRAM_MULTI_COMP.

Link: https://lkml.kernel.org/r/20221109115047.2921851-12-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/blockdev/zram.rst | 81 +++++++++++++++++++++
 1 file changed, 81 insertions(+)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index 177a142c3146d..d898b7ace33de 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -401,6 +401,87 @@ budget in next setting is user's job.
 If admin wants to measure writeback count in a certain period, they could
 know it via /sys/block/zram0/bd_stat's 3rd column.
 
+recompression
+-------------
+
+With CONFIG_ZRAM_MULTI_COMP, zram can recompress pages using alternative
+(secondary) compression algorithms. The basic idea is that alternative
+compression algorithm can provide better compression ratio at a price of
+(potentially) slower compression/decompression speeds. Alternative compression
+algorithm can, for example, be more successful compressing huge pages (those
+that default algorithm failed to compress). Another application is idle pages
+recompression - pages that are cold and sit in the memory can be recompressed
+using more effective algorithm and, hence, reduce zsmalloc memory usage.
+
+With CONFIG_ZRAM_MULTI_COMP, zram supports up to 4 compression algorithms:
+one primary and up to 3 secondary ones. Primary zram compressor is explained
+in "3) Select compression algorithm", secondary algorithms are configured
+using recomp_algorithm device attribute.
+
+Example:::
+
+	#show supported recompression algorithms
+	cat /sys/block/zramX/recomp_algorithm
+	#1: lzo lzo-rle lz4 lz4hc [zstd]
+	#2: lzo lzo-rle lz4 [lz4hc] zstd
+
+Alternative compression algorithms are sorted by priority. In the example
+above, zstd is used as the first alternative algorithm, which has priority
+of 1, while lz4hc is configured as a compression algorithm with priority 2.
+Alternative compression algorithm's priority is provided during algorithms
+configuration:::
+
+	#select zstd recompression algorithm, priority 1
+	echo "algo=zstd priority=1" > /sys/block/zramX/recomp_algorithm
+
+	#select deflate recompression algorithm, priority 2
+	echo "algo=deflate priority=2" > /sys/block/zramX/recomp_algorithm
+
+Another device attribute that CONFIG_ZRAM_MULTI_COMP enables is recompress,
+which controls recompression.
+
+Examples:::
+
+	#IDLE pages recompression is activated by `idle` mode
+	echo "type=idle" > /sys/block/zramX/recompress
+
+	#HUGE pages recompression is activated by `huge` mode
+	echo "type=huge" > /sys/block/zram0/recompress
+
+	#HUGE_IDLE pages recompression is activated by `huge_idle` mode
+	echo "type=huge_idle" > /sys/block/zramX/recompress
+
+The number of idle pages can be significant, so user-space can pass a size
+threshold (in bytes) to the recompress knob: zram will recompress only pages
+of equal or greater size:::
+
+	#recompress all pages larger than 3000 bytes
+	echo "threshold=3000" > /sys/block/zramX/recompress
+
+	#recompress idle pages larger than 2000 bytes
+	echo "type=idle threshold=2000" > /sys/block/zramX/recompress
+
+Recompression of idle pages requires memory tracking.
+
+During re-compression for every page, that matches re-compression criteria,
+ZRAM iterates the list of registered alternative compression algorithms in
+order of their priorities. ZRAM stops either when re-compression was
+successful (re-compressed object is smaller in size than the original one)
+and matches re-compression criteria (e.g. size threshold) or when there are
+no secondary algorithms left to try. If none of the secondary algorithms can
+successfully re-compressed the page such a page is marked as incompressible,
+so ZRAM will not attempt to re-compress it in the future.
+
+This re-compression behaviour, when it iterates through the list of
+registered compression algorithms, increases our chances of finding the
+algorithm that successfully compresses a particular page. Sometimes, however,
+it is convenient (and sometimes even necessary) to limit recompression to
+only one particular algorithm so that it will not try any other algorithms.
+This can be achieved by providing a algo=NAME parameter:::
+
+	#use zstd algorithm only (if registered)
+	echo "type=huge algo=zstd" > /sys/block/zramX/recompress
+
 memory tracking
 ===============
 
-- 
GitLab


From b46f9ea3cb351587b2cfc68f7211f7a7cc5b6673 Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:46 +0900
Subject: [PATCH 3037/4122] zram: add incompressible writeback

Add support for incompressible pages writeback:

  echo incompressible > /sys/block/zramX/writeback

Link: https://lkml.kernel.org/r/20221109115047.2921851-13-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/blockdev/zram.rst |  7 ++++++-
 drivers/block/zram/zram_drv.c               | 18 ++++++++++++------
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index d898b7ace33de..f14c8c2e42f3e 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -348,8 +348,13 @@ this can be accomplished with::
 
         echo huge_idle > /sys/block/zramX/writeback
 
+If a user chooses to writeback only incompressible pages (pages that none of
+algorithms can compress) this can be accomplished with::
+
+	echo incompressible > /sys/block/zramX/writeback
+
 If an admin wants to write a specific page in zram device to the backing device,
-they could write a page index into the interface.
+they could write a page index into the interface::
 
 	echo "page_index=1251" > /sys/block/zramX/writeback
 
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 798c421fdd364..25b7ff2b56bf3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -645,10 +645,10 @@ static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
 
 #define PAGE_WB_SIG "page_index="
 
-#define PAGE_WRITEBACK 0
-#define HUGE_WRITEBACK (1<<0)
-#define IDLE_WRITEBACK (1<<1)
-
+#define PAGE_WRITEBACK			0
+#define HUGE_WRITEBACK			(1<<0)
+#define IDLE_WRITEBACK			(1<<1)
+#define INCOMPRESSIBLE_WRITEBACK	(1<<2)
 
 static ssize_t writeback_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t len)
@@ -669,6 +669,8 @@ static ssize_t writeback_store(struct device *dev,
 		mode = HUGE_WRITEBACK;
 	else if (sysfs_streq(buf, "huge_idle"))
 		mode = IDLE_WRITEBACK | HUGE_WRITEBACK;
+	else if (sysfs_streq(buf, "incompressible"))
+		mode = INCOMPRESSIBLE_WRITEBACK;
 	else {
 		if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
 			return -EINVAL;
@@ -731,11 +733,15 @@ static ssize_t writeback_store(struct device *dev,
 			goto next;
 
 		if (mode & IDLE_WRITEBACK &&
-			  !zram_test_flag(zram, index, ZRAM_IDLE))
+		    !zram_test_flag(zram, index, ZRAM_IDLE))
 			goto next;
 		if (mode & HUGE_WRITEBACK &&
-			  !zram_test_flag(zram, index, ZRAM_HUGE))
+		    !zram_test_flag(zram, index, ZRAM_HUGE))
 			goto next;
+		if (mode & INCOMPRESSIBLE_WRITEBACK &&
+		    !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+			goto next;
+
 		/*
 		 * Clearing ZRAM_UNDER_WB is duty of caller.
 		 * IOW, zram_free_page never clear it.
-- 
GitLab


From 77db7bb56bd711586243924a5582727f7a93fb7f Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Wed, 9 Nov 2022 20:50:47 +0900
Subject: [PATCH 3038/4122] zram: add incompressible flag to read_block_state()

Add a new flag to zram block state that shows if the page is
incompressible: that none of the algorithm (including secondary ones)
could compress it.

Link: https://lkml.kernel.org/r/20221109115047.2921851-14-senozhatsky@chromium.org
Suggested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Alexey Romanov <avromanov@sberdevices.ru>
Cc: Nhat Pham <nphamcs@gmail.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/blockdev/zram.rst | 11 +++++++----
 drivers/block/zram/zram_drv.c               |  6 ++++--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/Documentation/admin-guide/blockdev/zram.rst b/Documentation/admin-guide/blockdev/zram.rst
index f14c8c2e42f3e..e4551579cb128 100644
--- a/Documentation/admin-guide/blockdev/zram.rst
+++ b/Documentation/admin-guide/blockdev/zram.rst
@@ -497,10 +497,11 @@ pages of the process with*pagemap.
 If you enable the feature, you could see block state via
 /sys/kernel/debug/zram/zram0/block_state". The output is as follows::
 
-	  300    75.033841 .wh..
-	  301    63.806904 s....
-	  302    63.806919 ..hi.
-	  303    62.801919 ....r
+	  300    75.033841 .wh...
+	  301    63.806904 s.....
+	  302    63.806919 ..hi..
+	  303    62.801919 ....r.
+	  304   146.781902 ..hi.n
 
 First column
 	zram's block index.
@@ -519,6 +520,8 @@ Third column
 		idle page
 	r:
 		recompressed page (secondary compression algorithm)
+	n:
+		none (including secondary) of algorithms could compress it
 
 First line of above example says 300th block is accessed at 75.033841sec
 and the block's state is huge so it is written back to the backing
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 25b7ff2b56bf3..9d33801e8ba87 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -946,14 +946,16 @@ static ssize_t read_block_state(struct file *file, char __user *buf,
 
 		ts = ktime_to_timespec64(zram->table[index].ac_time);
 		copied = snprintf(kbuf + written, count,
-			"%12zd %12lld.%06lu %c%c%c%c%c\n",
+			"%12zd %12lld.%06lu %c%c%c%c%c%c\n",
 			index, (s64)ts.tv_sec,
 			ts.tv_nsec / NSEC_PER_USEC,
 			zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
 			zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
 			zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.',
 			zram_test_flag(zram, index, ZRAM_IDLE) ? 'i' : '.',
-			zram_get_priority(zram, index) ? 'r' : '.');
+			zram_get_priority(zram, index) ? 'r' : '.',
+			zram_test_flag(zram, index,
+				       ZRAM_INCOMPRESSIBLE) ? 'n' : '.');
 
 		if (count <= copied) {
 			zram_slot_unlock(zram, index);
-- 
GitLab


From c959a0e8de2c4db6ca6cc8f490223e2e1e58934b Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Tue, 15 Nov 2022 11:03:14 +0900
Subject: [PATCH 3039/4122] Docs/ABI/zram: document zram recompress sysfs knobs

Document zram re-compression sysfs knobs.

Link: https://lkml.kernel.org/r/20221115020314.386235-1-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-block-zram | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-block-zram b/Documentation/ABI/testing/sysfs-block-zram
index 14b2bf2e5105c..628a00fb20a95 100644
--- a/Documentation/ABI/testing/sysfs-block-zram
+++ b/Documentation/ABI/testing/sysfs-block-zram
@@ -137,3 +137,17 @@ Description:
 		The writeback_limit file is read-write and specifies the maximum
 		amount of writeback ZRAM can do. The limit could be changed
 		in run time.
+
+What:		/sys/block/zram<id>/recomp_algorithm
+Date:		November 2022
+Contact:	Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+		The recomp_algorithm file is read-write and allows to set
+		or show secondary compression algorithms.
+
+What:		/sys/block/zram<id>/recompress
+Date:		November 2022
+Contact:	Sergey Senozhatsky <senozhatsky@chromium.org>
+Description:
+		The recompress file is write-only and triggers re-compression
+		with secondary compression algorithms.
-- 
GitLab


From c66b6ead74ffdad8659eb829468343a88afc2f2c Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Date: Wed, 9 Nov 2022 14:56:46 +0530
Subject: [PATCH 3040/4122] mm/kfence: remove hung_task cruft

commit fdf756f71271 ("sched: Fix more TASK_state comparisons") makes
hung_task not to monitor TASK_IDLE tasks.  The special handling to
workaround hung_task warnings is not required anymore.

Link: https://lkml.kernel.org/r/1667986006-25420-1-git-send-email-quic_pkondeti@quicinc.com
Signed-off-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kfence/core.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 141788858b708..08f5bd6fc36d6 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -26,7 +26,6 @@
 #include <linux/random.h>
 #include <linux/rcupdate.h>
 #include <linux/sched/clock.h>
-#include <linux/sched/sysctl.h>
 #include <linux/seq_file.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
@@ -799,16 +798,7 @@ static void toggle_allocation_gate(struct work_struct *work)
 	/* Enable static key, and await allocation to happen. */
 	static_branch_enable(&kfence_allocation_key);
 
-	if (sysctl_hung_task_timeout_secs) {
-		/*
-		 * During low activity with no allocations we might wait a
-		 * while; let's avoid the hung task warning.
-		 */
-		wait_event_idle_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
-					sysctl_hung_task_timeout_secs * HZ / 2);
-	} else {
-		wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
-	}
+	wait_event_idle(allocation_wait, atomic_read(&kfence_allocation_gate));
 
 	/* Disable static key and reset timer. */
 	static_branch_disable(&kfence_allocation_key);
-- 
GitLab


From 16fd6b31dd9b24acf83d439a73a41c4138199424 Mon Sep 17 00:00:00 2001
From: Baolin Wang <baolin.wang@linux.alibaba.com>
Date: Wed, 9 Nov 2022 16:40:27 +0800
Subject: [PATCH 3041/4122] Revert "mm: migration: fix the FOLL_GET failure on
 following huge page"

Revert commit 831568214883 ("mm: migration: fix the FOLL_GET failure on
following huge page"), since after commit 1a6baaa0db73 ("s390/hugetlb:
switch to generic version of follow_huge_pud()") and commit 57a196a58421
("hugetlb: simplify hugetlb handling in follow_page_mask") were merged,
now all the following huge page routines can support FOLL_GET operation.

Link: https://lkml.kernel.org/r/496786039852aba90ffa68f10d0df3f4236a990b.1667983080.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Acked-by: Haiyue Wang <haiyue.wang@intel.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 4aea647a01801..4eccf4e1da2ce 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1899,7 +1899,6 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
 
 	for (i = 0; i < nr_pages; i++) {
 		unsigned long addr = (unsigned long)(*pages);
-		unsigned int foll_flags = FOLL_DUMP;
 		struct vm_area_struct *vma;
 		struct page *page;
 		int err = -EFAULT;
@@ -1908,12 +1907,8 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
 		if (!vma)
 			goto set_status;
 
-		/* Not all huge page follow APIs support 'FOLL_GET' */
-		if (!is_vm_hugetlb_page(vma))
-			foll_flags |= FOLL_GET;
-
 		/* FOLL_DUMP to ignore special (like zero) pages */
-		page = follow_page(vma, addr, foll_flags);
+		page = follow_page(vma, addr, FOLL_GET | FOLL_DUMP);
 
 		err = PTR_ERR(page);
 		if (IS_ERR(page))
@@ -1926,8 +1921,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages,
 		if (!is_zone_device_page(page))
 			err = page_to_nid(page);
 
-		if (foll_flags & FOLL_GET)
-			put_page(page);
+		put_page(page);
 set_status:
 		*status = err;
 
-- 
GitLab


From 49f51859221a3dfee27488eaeaff800459cac6a9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 9 Nov 2022 09:23:47 +0800
Subject: [PATCH 3042/4122] migrate: convert unmap_and_move() to use folios

Patch series "migrate: convert migrate_pages()/unmap_and_move() to use
folios", v2.

The conversion is quite straightforward, just replace the page API to the
corresponding folio API.  migrate_pages() and unmap_and_move() mostly work
with folios (head pages) only.


This patch (of 2):

Quite straightforward, the page functions are converted to corresponding
folio functions.  Same for comments.

Link: https://lkml.kernel.org/r/20221109012348.93849-1-ying.huang@intel.com
Link: https://lkml.kernel.org/r/20221109012348.93849-2-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Reviewed-by: Zi Yan <ziy@nvidia.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Oscar Salvador <osalvador@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 54 ++++++++++++++++++++++++++--------------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 4eccf4e1da2ce..e41a3cd24a598 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1150,79 +1150,79 @@ out:
 }
 
 /*
- * Obtain the lock on page, remove all ptes and migrate the page
- * to the newly allocated page in newpage.
+ * Obtain the lock on folio, remove all ptes and migrate the folio
+ * to the newly allocated folio in dst.
  */
 static int unmap_and_move(new_page_t get_new_page,
 				   free_page_t put_new_page,
-				   unsigned long private, struct page *page,
+				   unsigned long private, struct folio *src,
 				   int force, enum migrate_mode mode,
 				   enum migrate_reason reason,
 				   struct list_head *ret)
 {
-	struct folio *dst, *src = page_folio(page);
+	struct folio *dst;
 	int rc = MIGRATEPAGE_SUCCESS;
 	struct page *newpage = NULL;
 
-	if (!thp_migration_supported() && PageTransHuge(page))
+	if (!thp_migration_supported() && folio_test_transhuge(src))
 		return -ENOSYS;
 
-	if (page_count(page) == 1) {
-		/* Page was freed from under us. So we are done. */
-		ClearPageActive(page);
-		ClearPageUnevictable(page);
+	if (folio_ref_count(src) == 1) {
+		/* Folio was freed from under us. So we are done. */
+		folio_clear_active(src);
+		folio_clear_unevictable(src);
 		/* free_pages_prepare() will clear PG_isolated. */
 		goto out;
 	}
 
-	newpage = get_new_page(page, private);
+	newpage = get_new_page(&src->page, private);
 	if (!newpage)
 		return -ENOMEM;
 	dst = page_folio(newpage);
 
-	newpage->private = 0;
+	dst->private = 0;
 	rc = __unmap_and_move(src, dst, force, mode);
 	if (rc == MIGRATEPAGE_SUCCESS)
-		set_page_owner_migrate_reason(newpage, reason);
+		set_page_owner_migrate_reason(&dst->page, reason);
 
 out:
 	if (rc != -EAGAIN) {
 		/*
-		 * A page that has been migrated has all references
-		 * removed and will be freed. A page that has not been
+		 * A folio that has been migrated has all references
+		 * removed and will be freed. A folio that has not been
 		 * migrated will have kept its references and be restored.
 		 */
-		list_del(&page->lru);
+		list_del(&src->lru);
 	}
 
 	/*
 	 * If migration is successful, releases reference grabbed during
-	 * isolation. Otherwise, restore the page to right list unless
+	 * isolation. Otherwise, restore the folio to right list unless
 	 * we want to retry.
 	 */
 	if (rc == MIGRATEPAGE_SUCCESS) {
 		/*
-		 * Compaction can migrate also non-LRU pages which are
+		 * Compaction can migrate also non-LRU folios which are
 		 * not accounted to NR_ISOLATED_*. They can be recognized
-		 * as __PageMovable
+		 * as __folio_test_movable
 		 */
-		if (likely(!__PageMovable(page)))
-			mod_node_page_state(page_pgdat(page), NR_ISOLATED_ANON +
-					page_is_file_lru(page), -thp_nr_pages(page));
+		if (likely(!__folio_test_movable(src)))
+			mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
+					folio_is_file_lru(src), -folio_nr_pages(src));
 
 		if (reason != MR_MEMORY_FAILURE)
 			/*
-			 * We release the page in page_handle_poison.
+			 * We release the folio in page_handle_poison.
 			 */
-			put_page(page);
+			folio_put(src);
 	} else {
 		if (rc != -EAGAIN)
-			list_add_tail(&page->lru, ret);
+			list_add_tail(&src->lru, ret);
 
 		if (put_new_page)
-			put_new_page(newpage, private);
+			put_new_page(&dst->page, private);
 		else
-			put_page(newpage);
+			folio_put(dst);
 	}
 
 	return rc;
@@ -1459,7 +1459,7 @@ thp_subpage_migration:
 						&ret_pages);
 			else
 				rc = unmap_and_move(get_new_page, put_new_page,
-						private, page, pass > 2, mode,
+						private, page_folio(page), pass > 2, mode,
 						reason, &ret_pages);
 			/*
 			 * The rules are:
-- 
GitLab


From eaec4e639f11413ce75fbf38affd1aa5c40979e9 Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Wed, 9 Nov 2022 09:23:48 +0800
Subject: [PATCH 3043/4122] migrate: convert migrate_pages() to use folios

Quite straightforward, the page functions are converted to corresponding
folio functions.  Same for comments.

THP specific code are converted to be large folio.

Link: https://lkml.kernel.org/r/20221109012348.93849-3-ying.huang@intel.com
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Tested-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 210 +++++++++++++++++++++++++++------------------------
 1 file changed, 112 insertions(+), 98 deletions(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index e41a3cd24a598..4aa3b6d4f67c9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1373,231 +1373,245 @@ out:
 	return rc;
 }
 
-static inline int try_split_thp(struct page *page, struct list_head *split_pages)
+static inline int try_split_folio(struct folio *folio, struct list_head *split_folios)
 {
 	int rc;
 
-	lock_page(page);
-	rc = split_huge_page_to_list(page, split_pages);
-	unlock_page(page);
+	folio_lock(folio);
+	rc = split_folio_to_list(folio, split_folios);
+	folio_unlock(folio);
 	if (!rc)
-		list_move_tail(&page->lru, split_pages);
+		list_move_tail(&folio->lru, split_folios);
 
 	return rc;
 }
 
 /*
- * migrate_pages - migrate the pages specified in a list, to the free pages
+ * migrate_pages - migrate the folios specified in a list, to the free folios
  *		   supplied as the target for the page migration
  *
- * @from:		The list of pages to be migrated.
- * @get_new_page:	The function used to allocate free pages to be used
- *			as the target of the page migration.
- * @put_new_page:	The function used to free target pages if migration
+ * @from:		The list of folios to be migrated.
+ * @get_new_page:	The function used to allocate free folios to be used
+ *			as the target of the folio migration.
+ * @put_new_page:	The function used to free target folios if migration
  *			fails, or NULL if no special handling is necessary.
  * @private:		Private data to be passed on to get_new_page()
  * @mode:		The migration mode that specifies the constraints for
- *			page migration, if any.
- * @reason:		The reason for page migration.
- * @ret_succeeded:	Set to the number of normal pages migrated successfully if
+ *			folio migration, if any.
+ * @reason:		The reason for folio migration.
+ * @ret_succeeded:	Set to the number of folios migrated successfully if
  *			the caller passes a non-NULL pointer.
  *
- * The function returns after 10 attempts or if no pages are movable any more
- * because the list has become empty or no retryable pages exist any more.
- * It is caller's responsibility to call putback_movable_pages() to return pages
+ * The function returns after 10 attempts or if no folios are movable any more
+ * because the list has become empty or no retryable folios exist any more.
+ * It is caller's responsibility to call putback_movable_pages() to return folios
  * to the LRU or free list only if ret != 0.
  *
- * Returns the number of {normal page, THP, hugetlb} that were not migrated, or
- * an error code. The number of THP splits will be considered as the number of
- * non-migrated THP, no matter how many subpages of the THP are migrated successfully.
+ * Returns the number of {normal folio, large folio, hugetlb} that were not
+ * migrated, or an error code. The number of large folio splits will be
+ * considered as the number of non-migrated large folio, no matter how many
+ * split folios of the large folio are migrated successfully.
  */
 int migrate_pages(struct list_head *from, new_page_t get_new_page,
 		free_page_t put_new_page, unsigned long private,
 		enum migrate_mode mode, int reason, unsigned int *ret_succeeded)
 {
 	int retry = 1;
+	int large_retry = 1;
 	int thp_retry = 1;
 	int nr_failed = 0;
 	int nr_failed_pages = 0;
 	int nr_retry_pages = 0;
 	int nr_succeeded = 0;
 	int nr_thp_succeeded = 0;
+	int nr_large_failed = 0;
 	int nr_thp_failed = 0;
 	int nr_thp_split = 0;
 	int pass = 0;
+	bool is_large = false;
 	bool is_thp = false;
-	struct page *page;
-	struct page *page2;
-	int rc, nr_subpages;
-	LIST_HEAD(ret_pages);
-	LIST_HEAD(thp_split_pages);
+	struct folio *folio, *folio2;
+	int rc, nr_pages;
+	LIST_HEAD(ret_folios);
+	LIST_HEAD(split_folios);
 	bool nosplit = (reason == MR_NUMA_MISPLACED);
-	bool no_subpage_counting = false;
+	bool no_split_folio_counting = false;
 
 	trace_mm_migrate_pages_start(mode, reason);
 
-thp_subpage_migration:
-	for (pass = 0; pass < 10 && (retry || thp_retry); pass++) {
+split_folio_migration:
+	for (pass = 0; pass < 10 && (retry || large_retry); pass++) {
 		retry = 0;
+		large_retry = 0;
 		thp_retry = 0;
 		nr_retry_pages = 0;
 
-		list_for_each_entry_safe(page, page2, from, lru) {
+		list_for_each_entry_safe(folio, folio2, from, lru) {
 			/*
-			 * THP statistics is based on the source huge page.
-			 * Capture required information that might get lost
-			 * during migration.
+			 * Large folio statistics is based on the source large
+			 * folio. Capture required information that might get
+			 * lost during migration.
 			 */
-			is_thp = PageTransHuge(page) && !PageHuge(page);
-			nr_subpages = compound_nr(page);
+			is_large = folio_test_large(folio) && !folio_test_hugetlb(folio);
+			is_thp = is_large && folio_test_pmd_mappable(folio);
+			nr_pages = folio_nr_pages(folio);
 			cond_resched();
 
-			if (PageHuge(page))
+			if (folio_test_hugetlb(folio))
 				rc = unmap_and_move_huge_page(get_new_page,
-						put_new_page, private, page,
-						pass > 2, mode, reason,
-						&ret_pages);
+						put_new_page, private,
+						&folio->page, pass > 2, mode,
+						reason,
+						&ret_folios);
 			else
 				rc = unmap_and_move(get_new_page, put_new_page,
-						private, page_folio(page), pass > 2, mode,
-						reason, &ret_pages);
+						private, folio, pass > 2, mode,
+						reason, &ret_folios);
 			/*
 			 * The rules are:
-			 *	Success: non hugetlb page will be freed, hugetlb
-			 *		 page will be put back
+			 *	Success: non hugetlb folio will be freed, hugetlb
+			 *		 folio will be put back
 			 *	-EAGAIN: stay on the from list
 			 *	-ENOMEM: stay on the from list
 			 *	-ENOSYS: stay on the from list
-			 *	Other errno: put on ret_pages list then splice to
+			 *	Other errno: put on ret_folios list then splice to
 			 *		     from list
 			 */
 			switch(rc) {
 			/*
-			 * THP migration might be unsupported or the
-			 * allocation could've failed so we should
-			 * retry on the same page with the THP split
-			 * to base pages.
+			 * Large folio migration might be unsupported or
+			 * the allocation could've failed so we should retry
+			 * on the same folio with the large folio split
+			 * to normal folios.
 			 *
-			 * Sub-pages are put in thp_split_pages, and
+			 * Split folios are put in split_folios, and
 			 * we will migrate them after the rest of the
 			 * list is processed.
 			 */
 			case -ENOSYS:
-				/* THP migration is unsupported */
-				if (is_thp) {
-					nr_thp_failed++;
-					if (!try_split_thp(page, &thp_split_pages)) {
-						nr_thp_split++;
+				/* Large folio migration is unsupported */
+				if (is_large) {
+					nr_large_failed++;
+					nr_thp_failed += is_thp;
+					if (!try_split_folio(folio, &split_folios)) {
+						nr_thp_split += is_thp;
 						break;
 					}
 				/* Hugetlb migration is unsupported */
-				} else if (!no_subpage_counting) {
+				} else if (!no_split_folio_counting) {
 					nr_failed++;
 				}
 
-				nr_failed_pages += nr_subpages;
-				list_move_tail(&page->lru, &ret_pages);
+				nr_failed_pages += nr_pages;
+				list_move_tail(&folio->lru, &ret_folios);
 				break;
 			case -ENOMEM:
 				/*
 				 * When memory is low, don't bother to try to migrate
-				 * other pages, just exit.
+				 * other folios, just exit.
 				 */
-				if (is_thp) {
-					nr_thp_failed++;
-					/* THP NUMA faulting doesn't split THP to retry. */
+				if (is_large) {
+					nr_large_failed++;
+					nr_thp_failed += is_thp;
+					/* Large folio NUMA faulting doesn't split to retry. */
 					if (!nosplit) {
-						int ret = try_split_thp(page, &thp_split_pages);
+						int ret = try_split_folio(folio, &split_folios);
 
 						if (!ret) {
-							nr_thp_split++;
+							nr_thp_split += is_thp;
 							break;
 						} else if (reason == MR_LONGTERM_PIN &&
 							   ret == -EAGAIN) {
 							/*
-							 * Try again to split THP to mitigate
-							 * the failure of longterm pinning.
+							 * Try again to split large folio to
+							 * mitigate the failure of longterm pinning.
 							 */
-							thp_retry++;
-							nr_retry_pages += nr_subpages;
+							large_retry++;
+							thp_retry += is_thp;
+							nr_retry_pages += nr_pages;
 							break;
 						}
 					}
-				} else if (!no_subpage_counting) {
+				} else if (!no_split_folio_counting) {
 					nr_failed++;
 				}
 
-				nr_failed_pages += nr_subpages + nr_retry_pages;
+				nr_failed_pages += nr_pages + nr_retry_pages;
 				/*
-				 * There might be some subpages of fail-to-migrate THPs
-				 * left in thp_split_pages list. Move them back to migration
+				 * There might be some split folios of fail-to-migrate large
+				 * folios left in split_folios list. Move them back to migration
 				 * list so that they could be put back to the right list by
-				 * the caller otherwise the page refcnt will be leaked.
+				 * the caller otherwise the folio refcnt will be leaked.
 				 */
-				list_splice_init(&thp_split_pages, from);
+				list_splice_init(&split_folios, from);
 				/* nr_failed isn't updated for not used */
+				nr_large_failed += large_retry;
 				nr_thp_failed += thp_retry;
 				goto out;
 			case -EAGAIN:
-				if (is_thp)
-					thp_retry++;
-				else if (!no_subpage_counting)
+				if (is_large) {
+					large_retry++;
+					thp_retry += is_thp;
+				} else if (!no_split_folio_counting) {
 					retry++;
-				nr_retry_pages += nr_subpages;
+				}
+				nr_retry_pages += nr_pages;
 				break;
 			case MIGRATEPAGE_SUCCESS:
-				nr_succeeded += nr_subpages;
-				if (is_thp)
-					nr_thp_succeeded++;
+				nr_succeeded += nr_pages;
+				nr_thp_succeeded += is_thp;
 				break;
 			default:
 				/*
 				 * Permanent failure (-EBUSY, etc.):
-				 * unlike -EAGAIN case, the failed page is
-				 * removed from migration page list and not
+				 * unlike -EAGAIN case, the failed folio is
+				 * removed from migration folio list and not
 				 * retried in the next outer loop.
 				 */
-				if (is_thp)
-					nr_thp_failed++;
-				else if (!no_subpage_counting)
+				if (is_large) {
+					nr_large_failed++;
+					nr_thp_failed += is_thp;
+				} else if (!no_split_folio_counting) {
 					nr_failed++;
+				}
 
-				nr_failed_pages += nr_subpages;
+				nr_failed_pages += nr_pages;
 				break;
 			}
 		}
 	}
 	nr_failed += retry;
+	nr_large_failed += large_retry;
 	nr_thp_failed += thp_retry;
 	nr_failed_pages += nr_retry_pages;
 	/*
-	 * Try to migrate subpages of fail-to-migrate THPs, no nr_failed
-	 * counting in this round, since all subpages of a THP is counted
-	 * as 1 failure in the first round.
+	 * Try to migrate split folios of fail-to-migrate large folios, no
+	 * nr_failed counting in this round, since all split folios of a
+	 * large folio is counted as 1 failure in the first round.
 	 */
-	if (!list_empty(&thp_split_pages)) {
+	if (!list_empty(&split_folios)) {
 		/*
-		 * Move non-migrated pages (after 10 retries) to ret_pages
+		 * Move non-migrated folios (after 10 retries) to ret_folios
 		 * to avoid migrating them again.
 		 */
-		list_splice_init(from, &ret_pages);
-		list_splice_init(&thp_split_pages, from);
-		no_subpage_counting = true;
+		list_splice_init(from, &ret_folios);
+		list_splice_init(&split_folios, from);
+		no_split_folio_counting = true;
 		retry = 1;
-		goto thp_subpage_migration;
+		goto split_folio_migration;
 	}
 
-	rc = nr_failed + nr_thp_failed;
+	rc = nr_failed + nr_large_failed;
 out:
 	/*
-	 * Put the permanent failure page back to migration list, they
+	 * Put the permanent failure folio back to migration list, they
 	 * will be put back to the right list by the caller.
 	 */
-	list_splice(&ret_pages, from);
+	list_splice(&ret_folios, from);
 
 	/*
-	 * Return 0 in case all subpages of fail-to-migrate THPs are
-	 * migrated successfully.
+	 * Return 0 in case all split folios of fail-to-migrate large folios
+	 * are migrated successfully.
 	 */
 	if (list_empty(from))
 		rc = 0;
-- 
GitLab


From 4a625ceee8a0ab0273534cb6b432ce6b331db5ee Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Thu, 10 Nov 2022 07:07:51 +0400
Subject: [PATCH 3044/4122] mm/demotion: fix NULL vs IS_ERR checking in
 memory_tier_init

alloc_memory_type() returns error pointers on error instead of NULL.  Use
IS_ERR() to check the return value to fix this.

Link: https://lkml.kernel.org/r/20221110030751.1627266-1-linmq006@gmail.com
Fixes: 7b88bda3761b ("mm/demotion/dax/kmem: set node's abstract distance to MEMTIER_DEFAULT_DAX_ADISTANCE")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.ibm.com>
Cc: Wei Xu <weixugc@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-tiers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index 939e200c283bb..c734658c62424 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -645,7 +645,7 @@ static int __init memory_tier_init(void)
 	 * than default DRAM tier.
 	 */
 	default_dram_type = alloc_memory_type(MEMTIER_ADISTANCE_DRAM);
-	if (!default_dram_type)
+	if (IS_ERR(default_dram_type))
 		panic("%s() failed to allocate default DRAM tier\n", __func__);
 
 	/*
-- 
GitLab


From 9b34a307f39497198645de5e43f3f00b5e873249 Mon Sep 17 00:00:00 2001
From: Jian Wen <wenjianhn@gmail.com>
Date: Fri, 11 Nov 2022 11:46:39 +0800
Subject: [PATCH 3045/4122] docs: admin-guide: cgroup-v1: update description of
 inactive_file

MADV_FREE pages have been moved into the LRU_INACTIVE_FILE list by commit
f7ad2a6cb9f7 ("mm: move MADV_FREE pages into LRU_INACTIVE_FILE list").

Link: https://lkml.kernel.org/r/20221111034639.3593380-1-wenjian1@xiaomi.com
Signed-off-by: Jian Wen <wenjian1@xiaomi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v1/memory.rst | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst
index 5b86245450bdc..60370f2c67b99 100644
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -543,7 +543,8 @@ inactive_anon	# of bytes of anonymous and swap cache memory on inactive
 		LRU list.
 active_anon	# of bytes of anonymous and swap cache memory on active
 		LRU list.
-inactive_file	# of bytes of file-backed memory on inactive LRU list.
+inactive_file	# of bytes of file-backed memory and MADV_FREE anonymous memory(
+                LazyFree pages) on inactive LRU list.
 active_file	# of bytes of file-backed memory on active LRU list.
 unevictable	# of bytes of memory that cannot be reclaimed (mlocked etc).
 =============== ===============================================================
-- 
GitLab


From 25e9fa22fbfec1e58c955d2670bf9a18f4ebe9ef Mon Sep 17 00:00:00 2001
From: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Date: Tue, 15 Nov 2022 01:14:26 +0800
Subject: [PATCH 3046/4122] mm/kmemleak.c: fix a comment

I noticed a typo in a code comment and I fixed it.

Link: https://lkml.kernel.org/r/20221114171426.91745-1-caoyixuan2019@email.szu.edu.cn
Signed-off-by: Yixuan Cao <caoyixuan2019@email.szu.edu.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmemleak.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 646e2979641fb..2673329043545 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1461,7 +1461,7 @@ static void scan_gray_list(void)
 }
 
 /*
- * Conditionally call resched() in a object iteration loop while making sure
+ * Conditionally call resched() in an object iteration loop while making sure
  * that the given object won't go away without RCU read lock by performing a
  * get_object() if !pinned.
  *
-- 
GitLab


From c2da319c2e2789dccb20fdafe520ac61c9df84f7 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Sun, 13 Nov 2022 19:04:47 -0500
Subject: [PATCH 3047/4122] mm/uffd: sanity check write bit for uffd-wp
 protected ptes

Let's add one sanity check for CONFIG_DEBUG_VM on the write bit in
whatever chance we have when walking through the pgtables.  It can bring
the error earlier even before the app notices the data was corrupted on
the snapshot.  Also it helps us to identify this is a wrong pgtable setup,
so hopefully a great information to have for debugging too.

Link: https://lkml.kernel.org/r/20221114000447.1681003-3-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/include/asm/pgtable.h | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 286a71810f9e9..0564edd24ffb5 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -292,7 +292,23 @@ static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
 #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP
 static inline int pte_uffd_wp(pte_t pte)
 {
-	return pte_flags(pte) & _PAGE_UFFD_WP;
+	bool wp = pte_flags(pte) & _PAGE_UFFD_WP;
+
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * Having write bit for wr-protect-marked present ptes is fatal,
+	 * because it means the uffd-wp bit will be ignored and write will
+	 * just go through.
+	 *
+	 * Use any chance of pgtable walking to verify this (e.g., when
+	 * page swapped out or being migrated for all purposes). It means
+	 * something is already wrong.  Tell the admin even before the
+	 * process crashes. We also nail it with wrong pgtable setup.
+	 */
+	WARN_ON_ONCE(wp && pte_write(pte));
+#endif
+
+	return wp;
 }
 
 static inline pte_t pte_mkuffd_wp(pte_t pte)
-- 
GitLab


From 369258ce41c6d7663a7b6d509356fecad577378d Mon Sep 17 00:00:00 2001
From: Mike Kravetz <mike.kravetz@oracle.com>
Date: Mon, 14 Nov 2022 15:55:07 -0800
Subject: [PATCH 3048/4122] hugetlb: remove duplicate mmu notifications

The common hugetlb unmap routine __unmap_hugepage_range performs mmu
notification calls.  However, in the case where __unmap_hugepage_range is
called via __unmap_hugepage_range_final, mmu notification calls are
performed earlier in other calling routines.

Remove mmu notification calls from __unmap_hugepage_range.  Add
notification calls to the only other caller: unmap_hugepage_range.
unmap_hugepage_range is called for truncation and hole punch, so change
notification type from UNMAP to CLEAR as this is more appropriate.

Link: https://lkml.kernel.org/r/20221114235507.294320-4-mike.kravetz@oracle.com
Signed-off-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: Peter Xu <peterx@redhat.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 4f1338d82aabf..3fd4570fb8b0e 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5076,7 +5076,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 	struct page *page;
 	struct hstate *h = hstate_vma(vma);
 	unsigned long sz = huge_page_size(h);
-	struct mmu_notifier_range range;
 	unsigned long last_addr_mask;
 	bool force_flush = false;
 
@@ -5091,13 +5090,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 	tlb_change_page_size(tlb, sz);
 	tlb_start_vma(tlb, vma);
 
-	/*
-	 * If sharing possible, alert mmu notifiers of worst case.
-	 */
-	mmu_notifier_range_init(&range, MMU_NOTIFY_UNMAP, 0, vma, mm, start,
-				end);
-	adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
-	mmu_notifier_invalidate_range_start(&range);
 	last_addr_mask = hugetlb_mask_last_page(h);
 	address = start;
 	for (; address < end; address += sz) {
@@ -5182,7 +5174,6 @@ static void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct
 		if (ref_page)
 			break;
 	}
-	mmu_notifier_invalidate_range_end(&range);
 	tlb_end_vma(tlb, vma);
 
 	/*
@@ -5210,6 +5201,7 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 	hugetlb_vma_lock_write(vma);
 	i_mmap_lock_write(vma->vm_file->f_mapping);
 
+	/* mmu notification performed in caller */
 	__unmap_hugepage_range(tlb, vma, start, end, ref_page, zap_flags);
 
 	if (zap_flags & ZAP_FLAG_UNMAP) {	/* final unmap */
@@ -5234,10 +5226,18 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 			  unsigned long end, struct page *ref_page,
 			  zap_flags_t zap_flags)
 {
+	struct mmu_notifier_range range;
 	struct mmu_gather tlb;
 
+	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,
+				start, end);
+	adjust_range_if_pmd_sharing_possible(vma, &range.start, &range.end);
+	mmu_notifier_invalidate_range_start(&range);
 	tlb_gather_mmu(&tlb, vma->vm_mm);
+
 	__unmap_hugepage_range(&tlb, vma, start, end, ref_page, zap_flags);
+
+	mmu_notifier_invalidate_range_end(&range);
 	tlb_finish_mmu(&tlb);
 }
 
-- 
GitLab


From b7217a0bbe00a98a8f4b15ebc2a8355a31a59e1e Mon Sep 17 00:00:00 2001
From: "T.J. Mercier" <tjmercier@google.com>
Date: Mon, 14 Nov 2022 23:59:49 +0000
Subject: [PATCH 3049/4122] mm: shrinkers: add missing includes for undeclared
 types

The shrinker.h header depends on a user including other headers before it
for types used by shrinker.h.  Fix this by including the appropriate
headers in shrinker.h.

./include/linux/shrinker.h:13:9: error: unknown type name `gfp_t'
   13 |         gfp_t gfp_mask;
      |         ^~~~~
./include/linux/shrinker.h:71:26: error: field `list' has incomplete type
   71 |         struct list_head list;
      |                          ^~~~
./include/linux/shrinker.h:82:9: error: unknown type name `atomic_long_t'
   82 |         atomic_long_t *nr_deferred;
      |

Link: https://lkml.kernel.org/r/20221114235949.201749-1-tjmercier@google.com
Fixes: 83aeeada7c69 ("vmscan: use atomic-long for shrinker batching")
Fixes: b0d40c92adaf ("superblock: introduce per-sb cache shrinker infrastructure")
Signed-off-by: T.J. Mercier <tjmercier@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dave Chinner <dchinner@redhat.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/shrinker.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 08e6054e061f3..71310efe2fab9 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -2,6 +2,9 @@
 #ifndef _LINUX_SHRINKER_H
 #define _LINUX_SHRINKER_H
 
+#include <linux/atomic.h>
+#include <linux/types.h>
+
 /*
  * This struct is used to pass information from page reclaim to the shrinkers.
  * We consolidate the values for easier extension later.
-- 
GitLab


From d09e8ca6cb93bb4b97517a18fbbf7eccb0e9ff43 Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Tue, 15 Nov 2022 02:06:01 +0000
Subject: [PATCH 3050/4122] mm: anonymous shared memory naming

Since commit 9a10064f5625 ("mm: add a field to store names for private
anonymous memory"), name for private anonymous memory, but not shared
anonymous, can be set.  However, naming shared anonymous memory just as
useful for tracking purposes.

Extend the functionality to be able to set names for shared anon.

There are two ways to create anonymous shared memory, using memfd or
directly via mmap():
1. fd = memfd_create(...)
   mem = mmap(..., MAP_SHARED, fd, ...)
2. mem = mmap(..., MAP_SHARED | MAP_ANONYMOUS, -1, ...)

In both cases the anonymous shared memory is created the same way by
mapping an unlinked file on tmpfs.

The memfd way allows to give a name for anonymous shared memory, but
not useful when parts of shared memory require to have distinct names.

Example use case: The VMM maps VM memory as anonymous shared memory (not
private because VMM is sandboxed and drivers are running in their own
processes).  However, the VM tells back to the VMM how parts of the memory
are actually used by the guest, how each of the segments should be backed
(i.e.  4K pages, 2M pages), and some other information about the segments.
The naming allows us to monitor the effective memory footprint for each
of these segments from the host without looking inside the guest.

Sample output:
  /* Create shared anonymous segmenet */
  anon_shmem = mmap(NULL, SIZE, PROT_READ | PROT_WRITE,
                    MAP_SHARED | MAP_ANONYMOUS, -1, 0);
  /* Name the segment: "MY-NAME" */
  rv = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME,
             anon_shmem, SIZE, "MY-NAME");

cat /proc/<pid>/maps (and smaps):
7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 [anon_shmem:MY-NAME]

If the segment is not named, the output is:
7fc8e2b4c000-7fc8f2b4c000 rw-s 00000000 00:01 1024 /dev/zero (deleted)

Link: https://lkml.kernel.org/r/20221115020602.804224-1-pasha.tatashin@soleen.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Colin Cross <ccross@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Liam Howlett <liam.howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Mike Rapoport <rppt@kernel.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: xu xin <cgel.zte@gmail.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/filesystems/proc.rst |  8 +++++---
 fs/proc/task_mmu.c                 | 15 +++++++++++----
 include/linux/mm.h                 |  2 ++
 include/linux/mm_types.h           | 26 ++++++++++++--------------
 mm/madvise.c                       |  7 ++-----
 mm/shmem.c                         | 29 +++++++++++++++++++++++++----
 6 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
index 898c99eae8e44..b8f175ae4853b 100644
--- a/Documentation/filesystems/proc.rst
+++ b/Documentation/filesystems/proc.rst
@@ -426,14 +426,16 @@ with the memory region, as the case would be with BSS (uninitialized data).
 The "pathname" shows the name associated file for this mapping.  If the mapping
 is not associated with a file:
 
- =============              ====================================
+ ===================        ===========================================
  [heap]                     the heap of the program
  [stack]                    the stack of the main process
  [vdso]                     the "virtual dynamic shared object",
                             the kernel system call handler
- [anon:<name>]              an anonymous mapping that has been
+ [anon:<name>]              a private anonymous mapping that has been
                             named by userspace
- =============              ====================================
+ [anon_shmem:<name>]        an anonymous shared memory mapping that has
+                            been named by userspace
+ ===================        ===========================================
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 8a74cdcc9af00..89338950afd35 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -274,6 +274,7 @@ static void show_vma_header_prefix(struct seq_file *m,
 static void
 show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 {
+	struct anon_vma_name *anon_name = NULL;
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
 	vm_flags_t flags = vma->vm_flags;
@@ -293,6 +294,8 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	start = vma->vm_start;
 	end = vma->vm_end;
 	show_vma_header_prefix(m, start, end, flags, pgoff, dev, ino);
+	if (mm)
+		anon_name = anon_vma_name(vma);
 
 	/*
 	 * Print the dentry name for named mappings, and a
@@ -300,7 +303,14 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 	 */
 	if (file) {
 		seq_pad(m, ' ');
-		seq_file_path(m, file, "\n");
+		/*
+		 * If user named this anon shared memory via
+		 * prctl(PR_SET_VMA ..., use the provided name.
+		 */
+		if (anon_name)
+			seq_printf(m, "[anon_shmem:%s]", anon_name->name);
+		else
+			seq_file_path(m, file, "\n");
 		goto done;
 	}
 
@@ -312,8 +322,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 
 	name = arch_vma_name(vma);
 	if (!name) {
-		struct anon_vma_name *anon_name;
-
 		if (!mm) {
 			name = "[vdso]";
 			goto done;
@@ -330,7 +338,6 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 			goto done;
 		}
 
-		anon_name = anon_vma_name(vma);
 		if (anon_name) {
 			seq_pad(m, ' ');
 			seq_printf(m, "[anon:%s]", anon_name->name);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f873441303b71..686879dbb0bd1 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -700,8 +700,10 @@ static inline unsigned long vma_iter_addr(struct vma_iterator *vmi)
  * paths in userfault.
  */
 bool vma_is_shmem(struct vm_area_struct *vma);
+bool vma_is_anon_shmem(struct vm_area_struct *vma);
 #else
 static inline bool vma_is_shmem(struct vm_area_struct *vma) { return false; }
+static inline bool vma_is_anon_shmem(struct vm_area_struct *vma) { return false; }
 #endif
 
 int vma_is_stack_for_current(struct vm_area_struct *vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6b0009e7d4ae5..157c2e22cc7fc 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -549,21 +549,11 @@ struct vm_area_struct {
 	 * For areas with an address space and backing store,
 	 * linkage into the address_space->i_mmap interval tree.
 	 *
-	 * For private anonymous mappings, a pointer to a null terminated string
-	 * containing the name given to the vma, or NULL if unnamed.
 	 */
-
-	union {
-		struct {
-			struct rb_node rb;
-			unsigned long rb_subtree_last;
-		} shared;
-		/*
-		 * Serialized by mmap_sem. Never use directly because it is
-		 * valid only when vm_file is NULL. Use anon_vma_name instead.
-		 */
-		struct anon_vma_name *anon_name;
-	};
+	struct {
+		struct rb_node rb;
+		unsigned long rb_subtree_last;
+	} shared;
 
 	/*
 	 * A file's MAP_PRIVATE vma can be in both i_mmap tree and anon_vma
@@ -584,6 +574,14 @@ struct vm_area_struct {
 	struct file * vm_file;		/* File we map to (can be NULL). */
 	void * vm_private_data;		/* was vm_pte (shared mem) */
 
+#ifdef CONFIG_ANON_VMA_NAME
+	/*
+	 * For private and shared anonymous mappings, a pointer to a null
+	 * terminated string containing the name given to the vma, or NULL if
+	 * unnamed. Serialized by mmap_sem. Use anon_vma_name to access.
+	 */
+	struct anon_vma_name *anon_name;
+#endif
 #ifdef CONFIG_SWAP
 	atomic_long_t swap_readahead_info;
 #endif
diff --git a/mm/madvise.c b/mm/madvise.c
index b913ba6efc10b..83b0c91a126b7 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -95,9 +95,6 @@ struct anon_vma_name *anon_vma_name(struct vm_area_struct *vma)
 {
 	mmap_assert_locked(vma->vm_mm);
 
-	if (vma->vm_file)
-		return NULL;
-
 	return vma->anon_name;
 }
 
@@ -183,7 +180,7 @@ success:
 	 * vm_flags is protected by the mmap_lock held in write mode.
 	 */
 	vma->vm_flags = new_flags;
-	if (!vma->vm_file) {
+	if (!vma->vm_file || vma_is_anon_shmem(vma)) {
 		error = replace_anon_vma_name(vma, anon_name);
 		if (error)
 			return error;
@@ -1273,7 +1270,7 @@ static int madvise_vma_anon_name(struct vm_area_struct *vma,
 	int error;
 
 	/* Only anonymous mappings can be named */
-	if (vma->vm_file)
+	if (vma->vm_file && !vma_is_anon_shmem(vma))
 		return -EBADF;
 
 	error = madvise_update_vma(vma, prev, start, end, vma->vm_flags,
diff --git a/mm/shmem.c b/mm/shmem.c
index 7428ae3fa4b9e..f418d21205be1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -237,11 +237,17 @@ static const struct inode_operations shmem_inode_operations;
 static const struct inode_operations shmem_dir_inode_operations;
 static const struct inode_operations shmem_special_inode_operations;
 static const struct vm_operations_struct shmem_vm_ops;
+static const struct vm_operations_struct shmem_anon_vm_ops;
 static struct file_system_type shmem_fs_type;
 
+bool vma_is_anon_shmem(struct vm_area_struct *vma)
+{
+	return vma->vm_ops == &shmem_anon_vm_ops;
+}
+
 bool vma_is_shmem(struct vm_area_struct *vma)
 {
-	return vma->vm_ops == &shmem_vm_ops;
+	return vma_is_anon_shmem(vma) || vma->vm_ops == &shmem_vm_ops;
 }
 
 static LIST_HEAD(shmem_swaplist);
@@ -2263,7 +2269,8 @@ out_nomem:
 
 static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+	struct inode *inode = file_inode(file);
+	struct shmem_inode_info *info = SHMEM_I(inode);
 	int ret;
 
 	ret = seal_check_future_write(info->seals, vma);
@@ -2274,7 +2281,11 @@ static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 	vma->vm_flags |= VM_MTE_ALLOWED;
 
 	file_accessed(file);
-	vma->vm_ops = &shmem_vm_ops;
+	/* This is anonymous shared memory if it is unlinked at the time of mmap */
+	if (inode->i_nlink)
+		vma->vm_ops = &shmem_vm_ops;
+	else
+		vma->vm_ops = &shmem_anon_vm_ops;
 	return 0;
 }
 
@@ -3988,6 +3999,15 @@ static const struct vm_operations_struct shmem_vm_ops = {
 #endif
 };
 
+static const struct vm_operations_struct shmem_anon_vm_ops = {
+	.fault		= shmem_fault,
+	.map_pages	= filemap_map_pages,
+#ifdef CONFIG_NUMA
+	.set_policy     = shmem_set_policy,
+	.get_policy     = shmem_get_policy,
+#endif
+};
+
 int shmem_init_fs_context(struct fs_context *fc)
 {
 	struct shmem_options *ctx;
@@ -4163,6 +4183,7 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
 #define shmem_vm_ops				generic_file_vm_ops
+#define shmem_anon_vm_ops			generic_file_vm_ops
 #define shmem_file_operations			ramfs_file_operations
 #define shmem_get_inode(sb, dir, mode, dev, flags)	ramfs_get_inode(sb, dir, mode, dev)
 #define shmem_acct_size(flags, size)		0
@@ -4268,7 +4289,7 @@ int shmem_zero_setup(struct vm_area_struct *vma)
 	if (vma->vm_file)
 		fput(vma->vm_file);
 	vma->vm_file = file;
-	vma->vm_ops = &shmem_vm_ops;
+	vma->vm_ops = &shmem_anon_vm_ops;
 
 	return 0;
 }
-- 
GitLab


From e83b39d6bbdb6d25bd6f5c258832774635d29b47 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 15 Nov 2022 13:32:55 +0100
Subject: [PATCH 3051/4122] mm: make drop_caches keep reclaiming on all nodes

Currently, drop_caches are reclaiming node-by-node, looping on each node
until reclaim could not make progress.  This can however leave quite some
slab entries (such as filesystem inodes) unreclaimed if objects say on
node 1 keep objects on node 0 pinned.  So move the "loop until no
progress" loop to the node-by-node iteration to retry reclaim also on
other nodes if reclaim on some nodes made progress.  This fixes problem
when drop_caches was not reclaiming lots of otherwise perfectly fine to
reclaim inodes.

Link: https://lkml.kernel.org/r/20221115123255.12559-1-jack@suse.cz
Signed-off-by: Jan Kara <jack@suse.cz>
Reported-by: You Zhou <you.zhou@intel.com>
Reported-by: Pengfei Xu <pengfei.xu@intel.com>
Tested-by: Pengfei Xu <pengfei.xu@intel.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 33 ++++++++++++++++++---------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index d7c71be6417d3..82f32c929b114 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1021,31 +1021,34 @@ out:
 	return freed;
 }
 
-static void drop_slab_node(int nid)
+static unsigned long drop_slab_node(int nid)
 {
-	unsigned long freed;
-	int shift = 0;
+	unsigned long freed = 0;
+	struct mem_cgroup *memcg = NULL;
 
+	memcg = mem_cgroup_iter(NULL, NULL, NULL);
 	do {
-		struct mem_cgroup *memcg = NULL;
-
-		if (fatal_signal_pending(current))
-			return;
+		freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
+	} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
 
-		freed = 0;
-		memcg = mem_cgroup_iter(NULL, NULL, NULL);
-		do {
-			freed += shrink_slab(GFP_KERNEL, nid, memcg, 0);
-		} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
-	} while ((freed >> shift++) > 1);
+	return freed;
 }
 
 void drop_slab(void)
 {
 	int nid;
+	int shift = 0;
+	unsigned long freed;
 
-	for_each_online_node(nid)
-		drop_slab_node(nid);
+	do {
+		freed = 0;
+		for_each_online_node(nid) {
+			if (fatal_signal_pending(current))
+				return;
+
+			freed += drop_slab_node(nid);
+		}
+	} while ((freed >> shift++) > 1);
 }
 
 static int reclaimer_offset(void)
-- 
GitLab


From dbaf7dc97ab8d526a20d3477419bc14b4890a82c Mon Sep 17 00:00:00 2001
From: Li zeming <zeming@nfschina.com>
Date: Mon, 7 Nov 2022 09:56:59 +0800
Subject: [PATCH 3052/4122] hugetlbfs: inode: remove unnecessary (void*)
 conversions

The ei pointer does not need to cast the type.

Link: https://lkml.kernel.org/r/20221107015659.3221-1-zeming@nfschina.com
Signed-off-by: Li zeming <zeming@nfschina.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hugetlbfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 3ee84604e36df..790d2727141a8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -1279,7 +1279,7 @@ static const struct address_space_operations hugetlbfs_aops = {
 
 static void init_once(void *foo)
 {
-	struct hugetlbfs_inode_info *ei = (struct hugetlbfs_inode_info *)foo;
+	struct hugetlbfs_inode_info *ei = foo;
 
 	inode_init_once(&ei->vfs_inode);
 }
-- 
GitLab


From eff6aa17aa7c06c25c0df80060cd0fe621dac276 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Sun, 13 Nov 2022 08:38:45 +0800
Subject: [PATCH 3053/4122] selftests/damon: fix unnecessary compilation
 warnings

When testing overflow and overread, there is no need to keep unnecessary
compilation warnings, we should simply ignore them.

The motivation for this patch is to eliminate the compilation warning,
maybe one day we will compile the kernel with "-Werror -Wall", at which
point this compilation warning will turn into a compilation error, we
should fix this error in advance.

How to reproduce the problem (with gcc-11.3.1):

    $ make -C tools/testing/selftests/
    ...
    warning: `write' reading 4294967295 bytes from a region of size 1
    [-Wstringop-overread]
    warning: `read' writing 4294967295 bytes into a region of size 25
    overflows the destination [-Wstringop-overflow=]

"-Wno-stringop-overread" is supported at least in gcc-11.1.0.

Link: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d14c547abd484d3540b692bb8048c4a6efe92c8b
Link: https://lkml.kernel.org/r/tencent_51C4ACA8CB3895C2D7F35178440283602107@qq.com
Signed-off-by: Rong Tao <rongtao@cestc.cn>
Reviewed-by: SeongJae Park <sj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/huge_count_read_write.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
index ad7a6b4cf3387..a6fe0689f88dc 100644
--- a/tools/testing/selftests/damon/huge_count_read_write.c
+++ b/tools/testing/selftests/damon/huge_count_read_write.c
@@ -8,6 +8,13 @@
 #include <unistd.h>
 #include <stdio.h>
 
+#pragma GCC diagnostic push
+#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1
+/* Ignore read(2) overflow and write(2) overread compile warnings */
+#pragma GCC diagnostic ignored "-Wstringop-overread"
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
 void write_read_with_huge_count(char *file)
 {
 	int filedesc = open(file, O_RDWR);
@@ -27,6 +34,8 @@ void write_read_with_huge_count(char *file)
 	close(filedesc);
 }
 
+#pragma GCC diagnostic pop
+
 int main(int argc, char *argv[])
 {
 	if (argc != 2) {
-- 
GitLab


From 53b2d09bdd12092a7341c08b6b863560db62fa57 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 16 Nov 2022 16:07:09 -0400
Subject: [PATCH 3054/4122] mm/gup: remove the restriction on locked with
 FOLL_LONGTERM

This restriction was created because FOLL_LONGTERM used to scan the vma
list, so it could not tolerate becoming unlocked.  That was fixed in
commit 52650c8b466b ("mm/gup: remove the vma allocation from
gup_longterm_locked()") and the restriction on !vma was removed.

However, the locked restriction remained, even though it isn't necessary
anymore.

Adjust __gup_longterm_locked() so it can handle the mmap_read_lock()
becoming unlocked while it is looping for migration.  Migration does not
require the mmap_read_sem because it is only handling struct pages.  If we
had to unlock then ensure the whole thing returns unlocked.

Remove __get_user_pages_remote() and __gup_longterm_unlocked().  These
cases can now just directly call other functions.

Link: https://lkml.kernel.org/r/0-v1-b9ae39aa8884+14dbb-gup_longterm_locked_jgg@nvidia.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup.c | 109 ++++++++++++++-----------------------------------------
 1 file changed, 27 insertions(+), 82 deletions(-)

diff --git a/mm/gup.c b/mm/gup.c
index 6b16aecf5d2c5..2500d00db51b7 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2049,14 +2049,19 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 				  unsigned long nr_pages,
 				  struct page **pages,
 				  struct vm_area_struct **vmas,
+				  int *locked,
 				  unsigned int gup_flags)
 {
+	bool must_unlock = false;
 	unsigned int flags;
 	long rc, nr_pinned_pages;
 
+	if (locked && WARN_ON_ONCE(!*locked))
+		return -EINVAL;
+
 	if (!(gup_flags & FOLL_LONGTERM))
 		return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
-					       NULL, gup_flags);
+					       locked, gup_flags);
 
 	/*
 	 * If we get to this point then FOLL_LONGTERM is set, and FOLL_LONGTERM
@@ -2070,8 +2075,13 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 		return -EINVAL;
 	flags = memalloc_pin_save();
 	do {
+		if (locked && !*locked) {
+			mmap_read_lock(mm);
+			must_unlock = true;
+			*locked = 1;
+		}
 		nr_pinned_pages = __get_user_pages_locked(mm, start, nr_pages,
-							  pages, vmas, NULL,
+							  pages, vmas, locked,
 							  gup_flags);
 		if (nr_pinned_pages <= 0) {
 			rc = nr_pinned_pages;
@@ -2081,6 +2091,10 @@ static long __gup_longterm_locked(struct mm_struct *mm,
 	} while (rc == -EAGAIN);
 	memalloc_pin_restore(flags);
 
+	if (locked && *locked && must_unlock) {
+		mmap_read_unlock(mm);
+		*locked = 0;
+	}
 	return rc ? rc : nr_pinned_pages;
 }
 
@@ -2104,35 +2118,6 @@ static bool is_valid_gup_flags(unsigned int gup_flags)
 }
 
 #ifdef CONFIG_MMU
-static long __get_user_pages_remote(struct mm_struct *mm,
-				    unsigned long start, unsigned long nr_pages,
-				    unsigned int gup_flags, struct page **pages,
-				    struct vm_area_struct **vmas, int *locked)
-{
-	/*
-	 * Parts of FOLL_LONGTERM behavior are incompatible with
-	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-	 * vmas. However, this only comes up if locked is set, and there are
-	 * callers that do request FOLL_LONGTERM, but do not set locked. So,
-	 * allow what we can.
-	 */
-	if (gup_flags & FOLL_LONGTERM) {
-		if (WARN_ON_ONCE(locked))
-			return -EINVAL;
-		/*
-		 * This will check the vmas (even if our vmas arg is NULL)
-		 * and return -ENOTSUPP if DAX isn't allowed in this case:
-		 */
-		return __gup_longterm_locked(mm, start, nr_pages, pages,
-					     vmas, gup_flags | FOLL_TOUCH |
-					     FOLL_REMOTE);
-	}
-
-	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
-				       locked,
-				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
-}
-
 /**
  * get_user_pages_remote() - pin user pages in memory
  * @mm:		mm_struct of target mm
@@ -2201,8 +2186,8 @@ long get_user_pages_remote(struct mm_struct *mm,
 	if (!is_valid_gup_flags(gup_flags))
 		return -EINVAL;
 
-	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
-				       pages, vmas, locked);
+	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+				     gup_flags | FOLL_TOUCH | FOLL_REMOTE);
 }
 EXPORT_SYMBOL(get_user_pages_remote);
 
@@ -2214,14 +2199,6 @@ long get_user_pages_remote(struct mm_struct *mm,
 {
 	return 0;
 }
-
-static long __get_user_pages_remote(struct mm_struct *mm,
-				    unsigned long start, unsigned long nr_pages,
-				    unsigned int gup_flags, struct page **pages,
-				    struct vm_area_struct **vmas, int *locked)
-{
-	return 0;
-}
 #endif /* !CONFIG_MMU */
 
 /**
@@ -2248,7 +2225,7 @@ long get_user_pages(unsigned long start, unsigned long nr_pages,
 		return -EINVAL;
 
 	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, vmas, gup_flags | FOLL_TOUCH);
+				     pages, vmas, NULL, gup_flags | FOLL_TOUCH);
 }
 EXPORT_SYMBOL(get_user_pages);
 
@@ -2274,18 +2251,9 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
 	int locked = 1;
 	long ret;
 
-	/*
-	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
-	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
-	 * vmas.  As there are no users of this flag in this call we simply
-	 * disallow this option for now.
-	 */
-	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
-		return -EINVAL;
-
 	mmap_read_lock(mm);
-	ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
-				      &locked, gup_flags | FOLL_TOUCH);
+	ret = __gup_longterm_locked(mm, start, nr_pages, pages, NULL, &locked,
+				    gup_flags | FOLL_TOUCH);
 	if (locked)
 		mmap_read_unlock(mm);
 	return ret;
@@ -2879,29 +2847,6 @@ static bool gup_fast_permitted(unsigned long start, unsigned long end)
 }
 #endif
 
-static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
-				   unsigned int gup_flags, struct page **pages)
-{
-	int ret;
-
-	/*
-	 * FIXME: FOLL_LONGTERM does not work with
-	 * get_user_pages_unlocked() (see comments in that function)
-	 */
-	if (gup_flags & FOLL_LONGTERM) {
-		mmap_read_lock(current->mm);
-		ret = __gup_longterm_locked(current->mm,
-					    start, nr_pages,
-					    pages, NULL, gup_flags);
-		mmap_read_unlock(current->mm);
-	} else {
-		ret = get_user_pages_unlocked(start, nr_pages,
-					      pages, gup_flags);
-	}
-
-	return ret;
-}
-
 static unsigned long lockless_pages_from_mm(unsigned long start,
 					    unsigned long end,
 					    unsigned int gup_flags,
@@ -2985,8 +2930,8 @@ static int internal_get_user_pages_fast(unsigned long start,
 	/* Slow path: try to get the remaining pages with get_user_pages */
 	start += nr_pinned << PAGE_SHIFT;
 	pages += nr_pinned;
-	ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
-				      pages);
+	ret = get_user_pages_unlocked(start, nr_pages - nr_pinned, pages,
+				      gup_flags);
 	if (ret < 0) {
 		/*
 		 * The caller has to unpin the pages we already pinned so
@@ -3185,9 +3130,9 @@ long pin_user_pages_remote(struct mm_struct *mm,
 	if (WARN_ON_ONCE(!pages))
 		return -EINVAL;
 
-	gup_flags |= FOLL_PIN;
-	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
-				       pages, vmas, locked);
+	return __gup_longterm_locked(mm, start, nr_pages, pages, vmas, locked,
+				     gup_flags | FOLL_PIN | FOLL_TOUCH |
+					     FOLL_REMOTE);
 }
 EXPORT_SYMBOL(pin_user_pages_remote);
 
@@ -3221,7 +3166,7 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
 
 	gup_flags |= FOLL_PIN;
 	return __gup_longterm_locked(current->mm, start, nr_pages,
-				     pages, vmas, gup_flags);
+				     pages, vmas, NULL, gup_flags);
 }
 EXPORT_SYMBOL(pin_user_pages);
 
-- 
GitLab


From 749477244b05be0d9b6dcc10c161bfa4c4749d78 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Wed, 16 Nov 2022 14:19:22 +0100
Subject: [PATCH 3055/4122] mm: Kconfig: make config SECRETMEM visible with
 EXPERT

Commit 6a108a14fa35 ("kconfig: rename CONFIG_EMBEDDED to CONFIG_EXPERT")
introduces CONFIG_EXPERT to carry the previous intent of CONFIG_EMBEDDED
and just gives that intent a much better name.  That has been clearly a
good and long overdue renaming, and it is clearly an improvement to the
kernel build configuration that has shown to help managing the kernel
build configuration in the last decade.

However, rather than bravely and radically just deleting CONFIG_EMBEDDED,
this commit gives CONFIG_EMBEDDED a new intended semantics, but keeps it
open for future contributors to implement that intended semantics:

    A new CONFIG_EMBEDDED option is added that automatically selects
    CONFIG_EXPERT when enabled and can be used in the future to isolate
    options that should only be considered for embedded systems (RISC
    architectures, SLOB, etc).

Since then, this CONFIG_EMBEDDED implicitly had two purposes:

  - It can make even more options visible beyond what CONFIG_EXPERT makes
    visible. In other words, it may introduce another level of enabling the
    visibility of configuration options: always visible, visible with
    CONFIG_EXPERT and visible with CONFIG_EMBEDDED.

  - Set certain default values of some configurations differently,
    following the assumption that configuring a kernel build for an
    embedded system generally starts with a different set of default values
    compared to kernel builds for all other kind of systems.

Considering the second purpose, note that already probably arguing that a
kernel build for an embedded system would choose some values differently
is already tricky: the set of embedded systems with Linux kernels is
already quite diverse.  Many embedded system have powerful CPUs and it
would not be clear that all embedded systems just optimize towards one
specific aspect, e.g., a smaller kernel image size.  So, it is unclear if
starting with "one set of default configuration" that is induced by
CONFIG_EMBEDDED is a good offer for developers configuring their kernels.

Also, the differences of needed user-space features in an embedded system
compared to a non-embedded system are probably difficult or even
impossible to name in some generic way.

So it is not surprising that in the last decade hardly anyone has
contributed changes to make something default differently in case of
CONFIG_EMBEDDED=y.

Currently, in v6.0-rc4, SECRETMEM is the only config switched off if
CONFIG_EMBEDDED=y.

As long as that is actually the only option that currently is selected or
deselected, it is better to just make SECRETMEM configurable at build time
by experts using menuconfig instead.

Make SECRETMEM configurable when EXPERT is set and otherwise default to
yes.  Further, SECRETMEM needs ARCH_HAS_SET_DIRECT_MAP.

This allows us to remove CONFIG_EMBEDDED in the close future.

Link: https://lkml.kernel.org/r/20221116131922.25533-1-lukas.bulwahn@gmail.com
Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/Kconfig | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/mm/Kconfig b/mm/Kconfig
index c86b69aff7d4d..4e8a2697f28d9 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1074,7 +1074,13 @@ config IO_MAPPING
 	bool
 
 config SECRETMEM
-	def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
+	default y
+	bool "Enable memfd_secret() system call" if EXPERT
+	depends on ARCH_HAS_SET_DIRECT_MAP
+	help
+	  Enable the memfd_secret() system call with the ability to create
+	  memory areas visible only in the context of the owning process and
+	  not mapped to other processes and other kernel page tables.
 
 config ANON_VMA_NAME
 	bool "Anonymous VMA name support"
-- 
GitLab


From 7aca5ca154930a06612f4d7b81f710f3e1027e04 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:40 +0100
Subject: [PATCH 3056/4122] selftests/vm: anon_cow: prepare for non-anonymous
 COW tests

Patch series "mm/gup: remove FOLL_FORCE usage from drivers (reliable R/O
long-term pinning)".

For now, we did not support reliable R/O long-term pinning in COW
mappings.  That means, if we would trigger R/O long-term pinning in
MAP_PRIVATE mapping, we could end up pinning the (R/O-mapped) shared
zeropage or a pagecache page.

The next write access would trigger a write fault and replace the pinned
page by an exclusive anonymous page in the process page table; whatever
the process would write to that private page copy would not be visible by
the owner of the previous page pin: for example, RDMA could read stale
data.  The end result is essentially an unexpected and hard-to-debug
memory corruption.

Some drivers tried working around that limitation by using
"FOLL_FORCE|FOLL_WRITE|FOLL_LONGTERM" for R/O long-term pinning for now.
FOLL_WRITE would trigger a write fault, if required, and break COW before
pinning the page.  FOLL_FORCE is required because the VMA might lack write
permissions, and drivers wanted to make that working as well, just like
one would expect (no write access, but still triggering a write access to
break COW).

However, that is not a practical solution, because
(1) Drivers that don't stick to that undocumented and debatable pattern
    would still run into that issue. For example, VFIO only uses
    FOLL_LONGTERM for R/O long-term pinning.
(2) Using FOLL_WRITE just to work around a COW mapping + page pinning
    limitation is unintuitive. FOLL_WRITE would, for example, mark the
    page softdirty or trigger uffd-wp, even though, there actually isn't
    going to be any write access.
(3) The purpose of FOLL_FORCE is debug access, not access without lack of
    VMA permissions by arbitrarty drivers.

So instead, make R/O long-term pinning work as expected, by breaking COW
in a COW mapping early, such that we can remove any FOLL_FORCE usage from
drivers and make FOLL_FORCE ptrace-specific (renaming it to FOLL_PTRACE).
More details in patch #8.


This patch (of 19):

Originally, the plan was to have a separate tests for testing COW of
non-anonymous (e.g., shared zeropage) pages.

Turns out, that we'd need a lot of similar functionality and that there
isn't a really good reason to separate it. So let's prepare for non-anon
tests by renaming to "cow".

Link: https://lkml.kernel.org/r/20221116102659.70287-1-david@redhat.com
Link: https://lkml.kernel.org/r/20221116102659.70287-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Alex Williamson <alex.williamson@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andy Walls <awalls@md.metrocast.net>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Benvenuti <benve@cisco.com>
Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Airlie <airlied@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: "Eric W . Biederman" <ebiederm@xmission.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Hans Verkuil <hverkuil@xs4all.nl>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Inki Dae <inki.dae@samsung.com>
Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: James Morris <jmorris@namei.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kentaro Takeda <takedakn@nttdata.co.jp>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Leon Romanovsky <leon@kernel.org>
Cc: Leon Romanovsky <leonro@nvidia.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Matt Turner <mattst88@gmail.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Nadav Amit <namit@vmware.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nelson Escobar <neescoba@cisco.com>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul Moore <paul@paul-moore.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Henderson <richard.henderson@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Russell King <linux+etnaviv@armlinux.org.uk>
Cc: Serge Hallyn <serge@hallyn.com>
Cc: Seung-Woo Kim <sw0312.kim@samsung.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/.gitignore         |  2 +-
 tools/testing/selftests/vm/Makefile           | 10 ++++----
 tools/testing/selftests/vm/check_config.sh    |  4 +--
 .../selftests/vm/{anon_cow.c => cow.c}        | 25 +++++++++++--------
 tools/testing/selftests/vm/run_vmtests.sh     |  2 +-
 5 files changed, 24 insertions(+), 19 deletions(-)
 rename tools/testing/selftests/vm/{anon_cow.c => cow.c} (97%)

diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
index 8a536c731e3c2..ee8c41c998e6d 100644
--- a/tools/testing/selftests/vm/.gitignore
+++ b/tools/testing/selftests/vm/.gitignore
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-anon_cow
+cow
 hugepage-mmap
 hugepage-mremap
 hugepage-shm
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index 00920cb8b499b..a4d764efd6e3a 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -27,7 +27,7 @@ MAKEFLAGS += --no-builtin-rules
 
 CFLAGS = -Wall -I $(top_srcdir) -I $(top_srcdir)/usr/include $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
 LDLIBS = -lrt -lpthread
-TEST_GEN_FILES = anon_cow
+TEST_GEN_FILES = cow
 TEST_GEN_FILES += compaction_test
 TEST_GEN_FILES += gup_test
 TEST_GEN_FILES += hmm-tests
@@ -98,7 +98,7 @@ TEST_FILES += va_128TBswitch.sh
 
 include ../lib.mk
 
-$(OUTPUT)/anon_cow: vm_util.c
+$(OUTPUT)/cow: vm_util.c
 $(OUTPUT)/khugepaged: vm_util.c
 $(OUTPUT)/madv_populate: vm_util.c
 $(OUTPUT)/soft-dirty: vm_util.c
@@ -154,8 +154,8 @@ warn_32bit_failure:
 endif
 endif
 
-# ANON_COW_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
-$(OUTPUT)/anon_cow: LDLIBS += $(ANON_COW_EXTRA_LIBS)
+# cow_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/cow: LDLIBS += $(COW_EXTRA_LIBS)
 
 $(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
 
@@ -168,7 +168,7 @@ local_config.mk local_config.h: check_config.sh
 
 EXTRA_CLEAN += local_config.mk local_config.h
 
-ifeq ($(ANON_COW_EXTRA_LIBS),)
+ifeq ($(COW_EXTRA_LIBS),)
 all: warn_missing_liburing
 
 warn_missing_liburing:
diff --git a/tools/testing/selftests/vm/check_config.sh b/tools/testing/selftests/vm/check_config.sh
index 9a44c65209255..bcba3af0acea9 100644
--- a/tools/testing/selftests/vm/check_config.sh
+++ b/tools/testing/selftests/vm/check_config.sh
@@ -21,11 +21,11 @@ $CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
 
 if [ -f $tmpfile_o ]; then
     echo "#define LOCAL_CONFIG_HAVE_LIBURING 1"  > $OUTPUT_H_FILE
-    echo "ANON_COW_EXTRA_LIBS = -luring"         > $OUTPUT_MKFILE
+    echo "COW_EXTRA_LIBS = -luring"              > $OUTPUT_MKFILE
 else
     echo "// No liburing support found"          > $OUTPUT_H_FILE
     echo "# No liburing support found, so:"      > $OUTPUT_MKFILE
-    echo "ANON_COW_EXTRA_LIBS = "               >> $OUTPUT_MKFILE
+    echo "COW_EXTRA_LIBS = "                    >> $OUTPUT_MKFILE
 fi
 
 rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/anon_cow.c b/tools/testing/selftests/vm/cow.c
similarity index 97%
rename from tools/testing/selftests/vm/anon_cow.c
rename to tools/testing/selftests/vm/cow.c
index bbb251eb50258..d202bfd635850 100644
--- a/tools/testing/selftests/vm/anon_cow.c
+++ b/tools/testing/selftests/vm/cow.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * COW (Copy On Write) tests for anonymous memory.
+ * COW (Copy On Write) tests.
  *
  * Copyright 2022, Red Hat, Inc.
  *
@@ -986,7 +986,11 @@ struct test_case {
 	test_fn fn;
 };
 
-static const struct test_case test_cases[] = {
+/*
+ * Test cases that are specific to anonymous pages: pages in private mappings
+ * that may get shared via COW during fork().
+ */
+static const struct test_case anon_test_cases[] = {
 	/*
 	 * Basic COW tests for fork() without any GUP. If we miss to break COW,
 	 * either the child can observe modifications by the parent or the
@@ -1104,7 +1108,7 @@ static const struct test_case test_cases[] = {
 	},
 };
 
-static void run_test_case(struct test_case const *test_case)
+static void run_anon_test_case(struct test_case const *test_case)
 {
 	int i;
 
@@ -1125,15 +1129,17 @@ static void run_test_case(struct test_case const *test_case)
 				 hugetlbsizes[i]);
 }
 
-static void run_test_cases(void)
+static void run_anon_test_cases(void)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
-		run_test_case(&test_cases[i]);
+	ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
+
+	for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
+		run_anon_test_case(&anon_test_cases[i]);
 }
 
-static int tests_per_test_case(void)
+static int tests_per_anon_test_case(void)
 {
 	int tests = 2 + nr_hugetlbsizes;
 
@@ -1144,7 +1150,6 @@ static int tests_per_test_case(void)
 
 int main(int argc, char **argv)
 {
-	int nr_test_cases = ARRAY_SIZE(test_cases);
 	int err;
 
 	pagesize = getpagesize();
@@ -1152,14 +1157,14 @@ int main(int argc, char **argv)
 	detect_hugetlbsizes();
 
 	ksft_print_header();
-	ksft_set_plan(nr_test_cases * tests_per_test_case());
+	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case());
 
 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
 	if (pagemap_fd < 0)
 		ksft_exit_fail_msg("opening pagemap failed\n");
 
-	run_test_cases();
+	run_anon_test_cases();
 
 	err = ksft_get_fail_cnt();
 	if (err)
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 1fa783732296e..54d7a822c2cee 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -186,6 +186,6 @@ fi
 run_test ./soft-dirty
 
 # COW tests for anonymous memory
-run_test ./anon_cow
+run_test ./cow
 
 exit $exitcode
-- 
GitLab


From f8664f3c4a08f799122e8f0a8093056a7b3fbc8d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:41 +0100
Subject: [PATCH 3057/4122] selftests/vm: cow: basic COW tests for
 non-anonymous pages

Let's add basic tests for COW with non-anonymous pages in private
mappings: write access should properly trigger COW and result in the
private changes not being visible through other page mappings.

Especially, add tests for:
* Zeropage
* Huge zeropage
* Ordinary pagecache pages via memfd and tmpfile()
* Hugetlb pages via memfd

Fortunately, all tests pass.

Link: https://lkml.kernel.org/r/20221116102659.70287-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/cow.c | 338 ++++++++++++++++++++++++++++++-
 1 file changed, 337 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c
index d202bfd635850..fb07bd44529c3 100644
--- a/tools/testing/selftests/vm/cow.c
+++ b/tools/testing/selftests/vm/cow.c
@@ -19,6 +19,7 @@
 #include <sys/mman.h>
 #include <sys/ioctl.h>
 #include <sys/wait.h>
+#include <linux/memfd.h>
 
 #include "local_config.h"
 #ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -35,6 +36,7 @@ static size_t thpsize;
 static int nr_hugetlbsizes;
 static size_t hugetlbsizes[10];
 static int gup_fd;
+static bool has_huge_zeropage;
 
 static void detect_thpsize(void)
 {
@@ -64,6 +66,31 @@ static void detect_thpsize(void)
 	close(fd);
 }
 
+static void detect_huge_zeropage(void)
+{
+	int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
+		      O_RDONLY);
+	size_t enabled = 0;
+	char buf[15];
+	int ret;
+
+	if (fd < 0)
+		return;
+
+	ret = pread(fd, buf, sizeof(buf), 0);
+	if (ret > 0 && ret < sizeof(buf)) {
+		buf[ret] = 0;
+
+		enabled = strtoul(buf, NULL, 10);
+		if (enabled == 1) {
+			has_huge_zeropage = true;
+			ksft_print_msg("[INFO] huge zeropage is enabled\n");
+		}
+	}
+
+	close(fd);
+}
+
 static void detect_hugetlbsizes(void)
 {
 	DIR *dir = opendir("/sys/kernel/mm/hugepages/");
@@ -1148,6 +1175,312 @@ static int tests_per_anon_test_case(void)
 	return tests;
 }
 
+typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
+
+static void test_cow(char *mem, const char *smem, size_t size)
+{
+	char *old = malloc(size);
+
+	/* Backup the original content. */
+	memcpy(old, smem, size);
+
+	/* Modify the page. */
+	memset(mem, 0xff, size);
+
+	/* See if we still read the old values via the other mapping. */
+	ksft_test_result(!memcmp(smem, old, size),
+			 "Other mapping not modified\n");
+	free(old);
+}
+
+static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
+{
+	char *mem, *smem, tmp;
+
+	ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
+
+	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+		   MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		return;
+	}
+
+	smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto munmap;
+	}
+
+	/* Read from the page to populate the shared zeropage. */
+	tmp = *mem + *smem;
+	asm volatile("" : "+r" (tmp));
+
+	fn(mem, smem, pagesize);
+munmap:
+	munmap(mem, pagesize);
+	if (smem != MAP_FAILED)
+		munmap(smem, pagesize);
+}
+
+static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
+{
+	char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
+	size_t mmap_size;
+	int ret;
+
+	ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
+
+	if (!has_huge_zeropage) {
+		ksft_test_result_skip("Huge zeropage not enabled\n");
+		return;
+	}
+
+	/* For alignment purposes, we need twice the thp size. */
+	mmap_size = 2 * thpsize;
+	mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mmap_mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		return;
+	}
+	mmap_smem = mmap(NULL, mmap_size, PROT_READ,
+			 MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (mmap_smem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto munmap;
+	}
+
+	/* We need a THP-aligned memory area. */
+	mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+	smem = (char *)(((uintptr_t)mmap_smem + thpsize) & ~(thpsize - 1));
+
+	ret = madvise(mem, thpsize, MADV_HUGEPAGE);
+	ret |= madvise(smem, thpsize, MADV_HUGEPAGE);
+	if (ret) {
+		ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+		goto munmap;
+	}
+
+	/*
+	 * Read from the memory to populate the huge shared zeropage. Read from
+	 * the first sub-page and test if we get another sub-page populated
+	 * automatically.
+	 */
+	tmp = *mem + *smem;
+	asm volatile("" : "+r" (tmp));
+	if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
+	    !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
+		ksft_test_result_skip("Did not get THPs populated\n");
+		goto munmap;
+	}
+
+	fn(mem, smem, thpsize);
+munmap:
+	munmap(mmap_mem, mmap_size);
+	if (mmap_smem != MAP_FAILED)
+		munmap(mmap_smem, mmap_size);
+}
+
+static void run_with_memfd(non_anon_test_fn fn, const char *desc)
+{
+	char *mem, *smem, tmp;
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+
+	fd = memfd_create("test", 0);
+	if (fd < 0) {
+		ksft_test_result_fail("memfd_create() failed\n");
+		return;
+	}
+
+	/* File consists of a single page filled with zeroes. */
+	if (fallocate(fd, 0, 0, pagesize)) {
+		ksft_test_result_fail("fallocate() failed\n");
+		goto close;
+	}
+
+	/* Create a private mapping of the memfd. */
+	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto close;
+	}
+	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto munmap;
+	}
+
+	/* Fault the page in. */
+	tmp = *mem + *smem;
+	asm volatile("" : "+r" (tmp));
+
+	fn(mem, smem, pagesize);
+munmap:
+	munmap(mem, pagesize);
+	if (smem != MAP_FAILED)
+		munmap(smem, pagesize);
+close:
+	close(fd);
+}
+
+static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
+{
+	char *mem, *smem, tmp;
+	FILE *file;
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+
+	file = tmpfile();
+	if (!file) {
+		ksft_test_result_fail("tmpfile() failed\n");
+		return;
+	}
+
+	fd = fileno(file);
+	if (fd < 0) {
+		ksft_test_result_skip("fileno() failed\n");
+		return;
+	}
+
+	/* File consists of a single page filled with zeroes. */
+	if (fallocate(fd, 0, 0, pagesize)) {
+		ksft_test_result_fail("fallocate() failed\n");
+		goto close;
+	}
+
+	/* Create a private mapping of the memfd. */
+	mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto close;
+	}
+	smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto munmap;
+	}
+
+	/* Fault the page in. */
+	tmp = *mem + *smem;
+	asm volatile("" : "+r" (tmp));
+
+	fn(mem, smem, pagesize);
+munmap:
+	munmap(mem, pagesize);
+	if (smem != MAP_FAILED)
+		munmap(smem, pagesize);
+close:
+	fclose(file);
+}
+
+static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
+				   size_t hugetlbsize)
+{
+	int flags = MFD_HUGETLB;
+	char *mem, *smem, tmp;
+	int fd;
+
+	ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+		       hugetlbsize / 1024);
+
+	flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+	fd = memfd_create("test", flags);
+	if (fd < 0) {
+		ksft_test_result_skip("memfd_create() failed\n");
+		return;
+	}
+
+	/* File consists of a single page filled with zeroes. */
+	if (fallocate(fd, 0, 0, hugetlbsize)) {
+		ksft_test_result_skip("need more free huge pages\n");
+		goto close;
+	}
+
+	/* Create a private mapping of the memfd. */
+	mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+		   0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_skip("need more free huge pages\n");
+		goto close;
+	}
+	smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
+	if (mem == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		goto munmap;
+	}
+
+	/* Fault the page in. */
+	tmp = *mem + *smem;
+	asm volatile("" : "+r" (tmp));
+
+	fn(mem, smem, hugetlbsize);
+munmap:
+	munmap(mem, hugetlbsize);
+	if (mem != MAP_FAILED)
+		munmap(smem, hugetlbsize);
+close:
+	close(fd);
+}
+
+struct non_anon_test_case {
+	const char *desc;
+	non_anon_test_fn fn;
+};
+
+/*
+ * Test cases that target any pages in private mappings that are non anonymous:
+ * pages that may get shared via COW ndependent of fork(). This includes
+ * the shared zeropage(s), pagecache pages, ...
+ */
+static const struct non_anon_test_case non_anon_test_cases[] = {
+	/*
+	 * Basic COW test without any GUP. If we miss to break COW, changes are
+	 * visible via other private/shared mappings.
+	 */
+	{
+		"Basic COW",
+		test_cow,
+	},
+};
+
+static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
+{
+	int i;
+
+	run_with_zeropage(test_case->fn, test_case->desc);
+	run_with_memfd(test_case->fn, test_case->desc);
+	run_with_tmpfile(test_case->fn, test_case->desc);
+	if (thpsize)
+		run_with_huge_zeropage(test_case->fn, test_case->desc);
+	for (i = 0; i < nr_hugetlbsizes; i++)
+		run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+				       hugetlbsizes[i]);
+}
+
+static void run_non_anon_test_cases(void)
+{
+	int i;
+
+	ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
+
+	for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
+		run_non_anon_test_case(&non_anon_test_cases[i]);
+}
+
+static int tests_per_non_anon_test_case(void)
+{
+	int tests = 3 + nr_hugetlbsizes;
+
+	if (thpsize)
+		tests += 1;
+	return tests;
+}
+
 int main(int argc, char **argv)
 {
 	int err;
@@ -1155,9 +1488,11 @@ int main(int argc, char **argv)
 	pagesize = getpagesize();
 	detect_thpsize();
 	detect_hugetlbsizes();
+	detect_huge_zeropage();
 
 	ksft_print_header();
-	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case());
+	ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
+		      ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
 
 	gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
 	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
@@ -1165,6 +1500,7 @@ int main(int argc, char **argv)
 		ksft_exit_fail_msg("opening pagemap failed\n");
 
 	run_anon_test_cases();
+	run_non_anon_test_cases();
 
 	err = ksft_get_fail_cnt();
 	if (err)
-- 
GitLab


From 97713a3abe338bb6c968e77264edbb68eb8d932a Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:42 +0100
Subject: [PATCH 3058/4122] selftests/vm: cow: R/O long-term pinning
 reliability tests for non-anon pages

Let's test whether R/O long-term pinning is reliable for non-anonymous
memory: when R/O long-term pinning a page, the expectation is that we
break COW early before pinning, such that actual write access via the
page tables won't break COW later and end up replacing the R/O-pinned
page in the page table.

Consequently, R/O long-term pinning in private mappings would only target
exclusive anonymous pages.

For now, all tests fail:
	# [RUN] R/O longterm GUP pin ... with shared zeropage
	not ok 151 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP pin ... with memfd
	not ok 152 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP pin ... with tmpfile
	not ok 153 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP pin ... with huge zeropage
	not ok 154 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP pin ... with memfd hugetlb (2048 kB)
	not ok 155 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP pin ... with memfd hugetlb (1048576 kB)
	not ok 156 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with shared zeropage
	not ok 157 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with memfd
	not ok 158 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with tmpfile
	not ok 159 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with huge zeropage
	not ok 160 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (2048 kB)
	not ok 161 Longterm R/O pin is reliable
	# [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (1048576 kB)
	not ok 162 Longterm R/O pin is reliable

Link: https://lkml.kernel.org/r/20221116102659.70287-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/cow.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c
index fb07bd44529c3..73e05b52c49ed 100644
--- a/tools/testing/selftests/vm/cow.c
+++ b/tools/testing/selftests/vm/cow.c
@@ -561,6 +561,7 @@ static void test_iouring_fork(char *mem, size_t size)
 #endif /* LOCAL_CONFIG_HAVE_LIBURING */
 
 enum ro_pin_test {
+	RO_PIN_TEST,
 	RO_PIN_TEST_SHARED,
 	RO_PIN_TEST_PREVIOUSLY_SHARED,
 	RO_PIN_TEST_RO_EXCLUSIVE,
@@ -593,6 +594,8 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 	}
 
 	switch (test) {
+	case RO_PIN_TEST:
+		break;
 	case RO_PIN_TEST_SHARED:
 	case RO_PIN_TEST_PREVIOUSLY_SHARED:
 		/*
@@ -1193,6 +1196,16 @@ static void test_cow(char *mem, const char *smem, size_t size)
 	free(old);
 }
 
+static void test_ro_pin(char *mem, const char *smem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST, false);
+}
+
+static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
+{
+	do_test_ro_pin(mem, size, RO_PIN_TEST, true);
+}
+
 static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
 {
 	char *mem, *smem, tmp;
@@ -1433,7 +1446,7 @@ struct non_anon_test_case {
 };
 
 /*
- * Test cases that target any pages in private mappings that are non anonymous:
+ * Test cases that target any pages in private mappings that are not anonymous:
  * pages that may get shared via COW ndependent of fork(). This includes
  * the shared zeropage(s), pagecache pages, ...
  */
@@ -1446,6 +1459,19 @@ static const struct non_anon_test_case non_anon_test_cases[] = {
 		"Basic COW",
 		test_cow,
 	},
+	/*
+	 * Take a R/O longterm pin. When modifying the page via the page table,
+	 * the page content change must be visible via the pin.
+	 */
+	{
+		"R/O longterm GUP pin",
+		test_ro_pin,
+	},
+	/* Same as above, but using GUP-fast. */
+	{
+		"R/O longterm GUP-fast pin",
+		test_ro_fast_pin,
+	},
 };
 
 static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
-- 
GitLab


From cdc5021cda194112bc0962d6a0e90b379968c504 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:43 +0100
Subject: [PATCH 3059/4122] mm: add early FAULT_FLAG_UNSHARE consistency checks

For now, FAULT_FLAG_UNSHARE only applies to anonymous pages, which
implies a COW mapping. Let's hide FAULT_FLAG_UNSHARE early if we're not
dealing with a COW mapping, such that we treat it like a read fault as
documented and don't have to worry about the flag throughout all fault
handlers.

While at it, centralize the check for mutual exclusion of
FAULT_FLAG_UNSHARE and FAULT_FLAG_WRITE and just drop the check that
either flag is set in the WP handler.

Link: https://lkml.kernel.org/r/20221116102659.70287-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c |  3 ---
 mm/hugetlb.c     |  5 -----
 mm/memory.c      | 23 ++++++++++++++++++++---
 3 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 29102e3ddf843..5eb702726a0e5 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1313,9 +1313,6 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf)
 	vmf->ptl = pmd_lockptr(vma->vm_mm, vmf->pmd);
 	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 
-	VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE));
-	VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE));
-
 	if (is_huge_zero_pmd(orig_pmd))
 		goto fallback;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3fd4570fb8b0e..3d381b26d5531 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5316,9 +5316,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long haddr = address & huge_page_mask(h);
 	struct mmu_notifier_range range;
 
-	VM_BUG_ON(unshare && (flags & FOLL_WRITE));
-	VM_BUG_ON(!unshare && !(flags & FOLL_WRITE));
-
 	/*
 	 * hugetlb does not support FOLL_FORCE-style write faults that keep the
 	 * PTE mapped R/O such as maybe_mkwrite() would do.
@@ -5328,8 +5325,6 @@ static vm_fault_t hugetlb_wp(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	/* Let's take out MAP_SHARED mappings first. */
 	if (vma->vm_flags & VM_MAYSHARE) {
-		if (unlikely(unshare))
-			return 0;
 		set_huge_ptep_writable(vma, haddr, ptep);
 		return 0;
 	}
diff --git a/mm/memory.c b/mm/memory.c
index 086cb3dd86081..07380ef935acf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3343,9 +3343,6 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct folio *folio;
 
-	VM_BUG_ON(unshare && (vmf->flags & FAULT_FLAG_WRITE));
-	VM_BUG_ON(!unshare && !(vmf->flags & FAULT_FLAG_WRITE));
-
 	if (likely(!unshare)) {
 		if (userfaultfd_pte_wp(vma, *vmf->pte)) {
 			pte_unmap_unlock(vmf->pte, vmf->ptl);
@@ -5161,6 +5158,22 @@ static void lru_gen_exit_fault(void)
 }
 #endif /* CONFIG_LRU_GEN */
 
+static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma,
+				       unsigned int *flags)
+{
+	if (unlikely(*flags & FAULT_FLAG_UNSHARE)) {
+		if (WARN_ON_ONCE(*flags & FAULT_FLAG_WRITE))
+			return VM_FAULT_SIGSEGV;
+		/*
+		 * FAULT_FLAG_UNSHARE only applies to COW mappings. Let's
+		 * just treat it like an ordinary read-fault otherwise.
+		 */
+		if (!is_cow_mapping(vma->vm_flags))
+			*flags &= ~FAULT_FLAG_UNSHARE;
+	}
+	return 0;
+}
+
 /*
  * By the time we get here, we already hold the mm semaphore
  *
@@ -5177,6 +5190,10 @@ vm_fault_t handle_mm_fault(struct vm_area_struct *vma, unsigned long address,
 	count_vm_event(PGFAULT);
 	count_memcg_event_mm(vma->vm_mm, PGFAULT);
 
+	ret = sanitize_fault_flags(vma, &flags);
+	if (ret)
+		return ret;
+
 	if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE,
 					    flags & FAULT_FLAG_INSTRUCTION,
 					    flags & FAULT_FLAG_REMOTE))
-- 
GitLab


From 79881fed6052a9ce00cfb63297832b9faacf8cf3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:44 +0100
Subject: [PATCH 3060/4122] mm: add early FAULT_FLAG_WRITE consistency checks

Let's catch abuse of FAULT_FLAG_WRITE early, such that we don't have to
care in all other handlers and might get "surprises" if we forget to do
so.

Write faults without VM_MAYWRITE don't make any sense, and our
maybe_mkwrite() logic could have hidden such abuse for now.

Write faults without VM_WRITE on something that is not a COW mapping is
similarly broken, and e.g., do_wp_page() could end up placing an
anonymous page into a shared mapping, which would be bad.

This is a preparation for reliable R/O long-term pinning of pages in
private mappings, whereby we want to make sure that we will never break
COW in a read-only private mapping.

Link: https://lkml.kernel.org/r/20221116102659.70287-6-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mm/memory.c b/mm/memory.c
index 07380ef935acf..5e4df6b87016a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5170,6 +5170,14 @@ static vm_fault_t sanitize_fault_flags(struct vm_area_struct *vma,
 		 */
 		if (!is_cow_mapping(vma->vm_flags))
 			*flags &= ~FAULT_FLAG_UNSHARE;
+	} else if (*flags & FAULT_FLAG_WRITE) {
+		/* Write faults on read-only mappings are impossible ... */
+		if (WARN_ON_ONCE(!(vma->vm_flags & VM_MAYWRITE)))
+			return VM_FAULT_SIGSEGV;
+		/* ... and FOLL_FORCE only applies to COW mappings. */
+		if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE) &&
+				 !is_cow_mapping(vma->vm_flags)))
+			return VM_FAULT_SIGSEGV;
 	}
 	return 0;
 }
-- 
GitLab


From b9086fde6d44e8a95dc95b822bd87386129b832d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:45 +0100
Subject: [PATCH 3061/4122] mm: rework handling in do_wp_page() based on
 private vs. shared mappings

We want to extent FAULT_FLAG_UNSHARE support to anything mapped into a
COW mapping (pagecache page, zeropage, PFN, ...), not just anonymous pages.
Let's prepare for that by handling shared mappings first such that we can
handle private mappings last.

While at it, use folio-based functions instead of page-based functions
where we touch the code either way.

Link: https://lkml.kernel.org/r/20221116102659.70287-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory.c | 38 +++++++++++++++++---------------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 5e4df6b87016a..5d4b42f1a8d6a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3341,7 +3341,7 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 {
 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
 	struct vm_area_struct *vma = vmf->vma;
-	struct folio *folio;
+	struct folio *folio = NULL;
 
 	if (likely(!unshare)) {
 		if (userfaultfd_pte_wp(vma, *vmf->pte)) {
@@ -3359,13 +3359,12 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 	}
 
 	vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
-	if (!vmf->page) {
-		if (unlikely(unshare)) {
-			/* No anonymous page -> nothing to do. */
-			pte_unmap_unlock(vmf->pte, vmf->ptl);
-			return 0;
-		}
 
+	/*
+	 * Shared mapping: we are guaranteed to have VM_WRITE and
+	 * FAULT_FLAG_WRITE set at this point.
+	 */
+	if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
 		/*
 		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
 		 * VM_PFNMAP VMA.
@@ -3373,20 +3372,19 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
 		 * We should not cow pages in a shared writeable mapping.
 		 * Just mark the pages writable and/or call ops->pfn_mkwrite.
 		 */
-		if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
-				     (VM_WRITE|VM_SHARED))
+		if (!vmf->page)
 			return wp_pfn_shared(vmf);
-
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		return wp_page_copy(vmf);
+		return wp_page_shared(vmf);
 	}
 
+	if (vmf->page)
+		folio = page_folio(vmf->page);
+
 	/*
-	 * Take out anonymous pages first, anonymous shared vmas are
-	 * not dirty accountable.
+	 * Private mapping: create an exclusive anonymous page copy if reuse
+	 * is impossible. We might miss VM_WRITE for FOLL_FORCE handling.
 	 */
-	folio = page_folio(vmf->page);
-	if (folio_test_anon(folio)) {
+	if (folio && folio_test_anon(folio)) {
 		/*
 		 * If the page is exclusive to this process we must reuse the
 		 * page without further checks.
@@ -3437,19 +3435,17 @@ reuse:
 		/* No anonymous page -> nothing to do. */
 		pte_unmap_unlock(vmf->pte, vmf->ptl);
 		return 0;
-	} else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==
-					(VM_WRITE|VM_SHARED))) {
-		return wp_page_shared(vmf);
 	}
 copy:
 	/*
 	 * Ok, we need to copy. Oh, well..
 	 */
-	get_page(vmf->page);
+	if (folio)
+		folio_get(folio);
 
 	pte_unmap_unlock(vmf->pte, vmf->ptl);
 #ifdef CONFIG_KSM
-	if (PageKsm(vmf->page))
+	if (folio && folio_test_ksm(folio))
 		count_vm_event(COW_KSM);
 #endif
 	return wp_page_copy(vmf);
-- 
GitLab


From aea06577a9005ca81c35196d6171cac346d3b251 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:46 +0100
Subject: [PATCH 3062/4122] mm: don't call vm_ops->huge_fault() in
 wp_huge_pmd()/wp_huge_pud() for private mappings

If we already have a PMD/PUD mapped write-protected in a private mapping
and we want to break COW either due to FAULT_FLAG_WRITE or
FAULT_FLAG_UNSHARE, there is no need to inform the file system just like on
the PTE path.

Let's just split (->zap) + fallback in that case.

This is a preparation for more generic FAULT_FLAG_UNSHARE support in
COW mappings.

Link: https://lkml.kernel.org/r/20221116102659.70287-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 5d4b42f1a8d6a..6cec0adab37f6 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4802,6 +4802,7 @@ static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
 static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
 {
 	const bool unshare = vmf->flags & FAULT_FLAG_UNSHARE;
+	vm_fault_t ret;
 
 	if (vma_is_anonymous(vmf->vma)) {
 		if (likely(!unshare) &&
@@ -4809,11 +4810,13 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
 			return handle_userfault(vmf, VM_UFFD_WP);
 		return do_huge_pmd_wp_page(vmf);
 	}
-	if (vmf->vma->vm_ops->huge_fault) {
-		vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
 
-		if (!(ret & VM_FAULT_FALLBACK))
-			return ret;
+	if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+		if (vmf->vma->vm_ops->huge_fault) {
+			ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PMD);
+			if (!(ret & VM_FAULT_FALLBACK))
+				return ret;
+		}
 	}
 
 	/* COW or write-notify handled on pte level: split pmd. */
@@ -4839,14 +4842,17 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
 {
 #if defined(CONFIG_TRANSPARENT_HUGEPAGE) &&			\
 	defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+	vm_fault_t ret;
+
 	/* No support for anonymous transparent PUD pages yet */
 	if (vma_is_anonymous(vmf->vma))
 		goto split;
-	if (vmf->vma->vm_ops->huge_fault) {
-		vm_fault_t ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
-
-		if (!(ret & VM_FAULT_FALLBACK))
-			return ret;
+	if (vmf->vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+		if (vmf->vma->vm_ops->huge_fault) {
+			ret = vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+			if (!(ret & VM_FAULT_FALLBACK))
+				return ret;
+		}
 	}
 split:
 	/* COW or write-notify not handled on PUD level: split pud.*/
-- 
GitLab


From 8d6a0ac09a16c026e1e2a03a61e12e95c48a25a6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:47 +0100
Subject: [PATCH 3063/4122] mm: extend FAULT_FLAG_UNSHARE support to anything
 in a COW mapping

Extend FAULT_FLAG_UNSHARE to break COW on anything mapped into a
COW (i.e., private writable) mapping and adjust the documentation
accordingly.

FAULT_FLAG_UNSHARE will now also break COW when encountering the shared
zeropage, a pagecache page, a PFNMAP, ... inside a COW mapping, by
properly replacing the mapped page/pfn by a private copy (an exclusive
anonymous page).

Note that only do_wp_page() needs care: hugetlb_wp() already handles
FAULT_FLAG_UNSHARE correctly. wp_huge_pmd()/wp_huge_pud() also handles it
correctly, for example, splitting the huge zeropage on FAULT_FLAG_UNSHARE
such that we can handle FAULT_FLAG_UNSHARE on the PTE level.

This change is a requirement for reliable long-term R/O pinning in
COW mappings.

Link: https://lkml.kernel.org/r/20221116102659.70287-9-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 8 ++++----
 mm/memory.c              | 4 ----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 157c2e22cc7fc..018b1c0981736 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1039,9 +1039,9 @@ typedef struct {
  * @FAULT_FLAG_REMOTE: The fault is not for current task/mm.
  * @FAULT_FLAG_INSTRUCTION: The fault was during an instruction fetch.
  * @FAULT_FLAG_INTERRUPTIBLE: The fault can be interrupted by non-fatal signals.
- * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to unshare (and mark
- *                      exclusive) a possibly shared anonymous page that is
- *                      mapped R/O.
+ * @FAULT_FLAG_UNSHARE: The fault is an unsharing request to break COW in a
+ *                      COW mapping, making sure that an exclusive anon page is
+ *                      mapped after the fault.
  * @FAULT_FLAG_ORIG_PTE_VALID: whether the fault has vmf->orig_pte cached.
  *                        We should only access orig_pte if this flag set.
  *
@@ -1066,7 +1066,7 @@ typedef struct {
  *
  * The combination FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE is illegal.
  * FAULT_FLAG_UNSHARE is ignored and treated like an ordinary read fault when
- * no existing R/O-mapped anonymous page is encountered.
+ * applied to mappings that are not COW mappings.
  */
 enum fault_flag {
 	FAULT_FLAG_WRITE =		1 << 0,
diff --git a/mm/memory.c b/mm/memory.c
index 6cec0adab37f6..815d2ff05c62b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3431,10 +3431,6 @@ reuse:
 		}
 		wp_page_reuse(vmf);
 		return VM_FAULT_WRITE;
-	} else if (unshare) {
-		/* No anonymous page -> nothing to do. */
-		pte_unmap_unlock(vmf->pte, vmf->ptl);
-		return 0;
 	}
 copy:
 	/*
-- 
GitLab


From 84209e87c6963f928194a890399e24e8ad299db1 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:48 +0100
Subject: [PATCH 3064/4122] mm/gup: reliable R/O long-term pinning in COW
 mappings

We already support reliable R/O pinning of anonymous memory. However,
assume we end up pinning (R/O long-term) a pagecache page or the shared
zeropage inside a writable private ("COW") mapping. The next write access
will trigger a write-fault and replace the pinned page by an exclusive
anonymous page in the process page tables to break COW: the pinned page no
longer corresponds to the page mapped into the process' page table.

Now that FAULT_FLAG_UNSHARE can break COW on anything mapped into a
COW mapping, let's properly break COW first before R/O long-term
pinning something that's not an exclusive anon page inside a COW
mapping. FAULT_FLAG_UNSHARE will break COW and map an exclusive anon page
instead that can get pinned safely.

With this change, we can stop using FOLL_FORCE|FOLL_WRITE for reliable
R/O long-term pinning in COW mappings.

With this change, the new R/O long-term pinning tests for non-anonymous
memory succeed:
  # [RUN] R/O longterm GUP pin ... with shared zeropage
  ok 151 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP pin ... with memfd
  ok 152 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP pin ... with tmpfile
  ok 153 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP pin ... with huge zeropage
  ok 154 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP pin ... with memfd hugetlb (2048 kB)
  ok 155 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP pin ... with memfd hugetlb (1048576 kB)
  ok 156 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with shared zeropage
  ok 157 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with memfd
  ok 158 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with tmpfile
  ok 159 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with huge zeropage
  ok 160 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (2048 kB)
  ok 161 Longterm R/O pin is reliable
  # [RUN] R/O longterm GUP-fast pin ... with memfd hugetlb (1048576 kB)
  ok 162 Longterm R/O pin is reliable

Note 1: We don't care about short-term R/O-pinning, because they have
snapshot semantics: they are not supposed to observe modifications that
happen after pinning.

As one example, assume we start direct I/O to read from a page and store
page content into a file: modifications to page content after starting
direct I/O are not guaranteed to end up in the file. So even if we'd pin
the shared zeropage, the end result would be as expected -- getting zeroes
stored to the file.

Note 2: For shared mappings we'll now always fallback to the slow path to
lookup the VMA when R/O long-term pining. While that's the necessary price
we have to pay right now, it's actually not that bad in practice: most
FOLL_LONGTERM users already specify FOLL_WRITE, for example, along with
FOLL_FORCE because they tried dealing with COW mappings correctly ...

Note 3: For users that use FOLL_LONGTERM right now without FOLL_WRITE,
such as VFIO, we'd now no longer pin the shared zeropage. Instead, we'd
populate exclusive anon pages that we can pin. There was a concern that
this could affect the memlock limit of existing setups.

For example, a VM running with VFIO could run into the memlock limit and
fail to run. However, we essentially had the same behavior already in
commit 17839856fd58 ("gup: document and work around "COW can break either
way" issue") which got merged into some enterprise distros, and there were
not any such complaints. So most probably, we're fine.

Link: https://lkml.kernel.org/r/20221116102659.70287-10-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 27 ++++++++++++++++++++++++---
 mm/gup.c           | 10 +++++-----
 mm/huge_memory.c   |  2 +-
 mm/hugetlb.c       |  7 ++++---
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 686879dbb0bd1..d8363ac34a7c2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3149,8 +3149,12 @@ static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
  * Must be called with the (sub)page that's actually referenced via the
  * page table entry, which might not necessarily be the head page for a
  * PTE-mapped THP.
+ *
+ * If the vma is NULL, we're coming from the GUP-fast path and might have
+ * to fallback to the slow path just to lookup the vma.
  */
-static inline bool gup_must_unshare(unsigned int flags, struct page *page)
+static inline bool gup_must_unshare(struct vm_area_struct *vma,
+				    unsigned int flags, struct page *page)
 {
 	/*
 	 * FOLL_WRITE is implicitly handled correctly as the page table entry
@@ -3163,8 +3167,25 @@ static inline bool gup_must_unshare(unsigned int flags, struct page *page)
 	 * Note: PageAnon(page) is stable until the page is actually getting
 	 * freed.
 	 */
-	if (!PageAnon(page))
-		return false;
+	if (!PageAnon(page)) {
+		/*
+		 * We only care about R/O long-term pining: R/O short-term
+		 * pinning does not have the semantics to observe successive
+		 * changes through the process page tables.
+		 */
+		if (!(flags & FOLL_LONGTERM))
+			return false;
+
+		/* We really need the vma ... */
+		if (!vma)
+			return true;
+
+		/*
+		 * ... because we only care about writable private ("COW")
+		 * mappings where we have to break COW early.
+		 */
+		return is_cow_mapping(vma->vm_flags);
+	}
 
 	/* Paired with a memory barrier in page_try_share_anon_rmap(). */
 	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP))
diff --git a/mm/gup.c b/mm/gup.c
index 2500d00db51b7..39c84a200f067 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -603,7 +603,7 @@ retry:
 		}
 	}
 
-	if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+	if (!pte_write(pte) && gup_must_unshare(vma, flags, page)) {
 		page = ERR_PTR(-EMLINK);
 		goto out;
 	}
@@ -2380,7 +2380,7 @@ static int gup_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
 			goto pte_unmap;
 		}
 
-		if (!pte_write(pte) && gup_must_unshare(flags, page)) {
+		if (!pte_write(pte) && gup_must_unshare(NULL, flags, page)) {
 			gup_put_folio(folio, 1, flags);
 			goto pte_unmap;
 		}
@@ -2566,7 +2566,7 @@ static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
 		return 0;
 	}
 
-	if (!pte_write(pte) && gup_must_unshare(flags, &folio->page)) {
+	if (!pte_write(pte) && gup_must_unshare(NULL, flags, &folio->page)) {
 		gup_put_folio(folio, refs, flags);
 		return 0;
 	}
@@ -2632,7 +2632,7 @@ static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
 		return 0;
 	}
 
-	if (!pmd_write(orig) && gup_must_unshare(flags, &folio->page)) {
+	if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
 		gup_put_folio(folio, refs, flags);
 		return 0;
 	}
@@ -2672,7 +2672,7 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 		return 0;
 	}
 
-	if (!pud_write(orig) && gup_must_unshare(flags, &folio->page)) {
+	if (!pud_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
 		gup_put_folio(folio, refs, flags);
 		return 0;
 	}
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 5eb702726a0e5..86a30041a2e10 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1480,7 +1480,7 @@ struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
 	if (pmd_protnone(*pmd) && !gup_can_follow_protnone(flags))
 		return NULL;
 
-	if (!pmd_write(*pmd) && gup_must_unshare(flags, page))
+	if (!pmd_write(*pmd) && gup_must_unshare(vma, flags, page))
 		return ERR_PTR(-EMLINK);
 
 	VM_BUG_ON_PAGE((flags & FOLL_PIN) && PageAnon(page) &&
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3d381b26d5531..9d97c9a2a15df 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -6197,7 +6197,8 @@ static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
 	}
 }
 
-static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
+static inline bool __follow_hugetlb_must_fault(struct vm_area_struct *vma,
+					       unsigned int flags, pte_t *pte,
 					       bool *unshare)
 {
 	pte_t pteval = huge_ptep_get(pte);
@@ -6209,7 +6210,7 @@ static inline bool __follow_hugetlb_must_fault(unsigned int flags, pte_t *pte,
 		return false;
 	if (flags & FOLL_WRITE)
 		return true;
-	if (gup_must_unshare(flags, pte_page(pteval))) {
+	if (gup_must_unshare(vma, flags, pte_page(pteval))) {
 		*unshare = true;
 		return true;
 	}
@@ -6338,7 +6339,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		 * directly from any kind of swap entries.
 		 */
 		if (absent ||
-		    __follow_hugetlb_must_fault(flags, pte, &unshare)) {
+		    __follow_hugetlb_must_fault(vma, flags, pte, &unshare)) {
 			vm_fault_t ret;
 			unsigned int fault_flags = 0;
 
-- 
GitLab


From b40656aa7d559adc1fe689396dc58b92a9a27286 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:49 +0100
Subject: [PATCH 3065/4122] RDMA/umem: remove FOLL_FORCE usage

GUP now supports reliable R/O long-term pinning in COW mappings, such
that we break COW early. MAP_SHARED VMAs only use the shared zeropage so
far in one corner case (DAXFS file with holes), which can be ignored
because GUP does not support long-term pinning in fsdax (see
check_vma_flags()).

Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required
for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop
using FOLL_FORCE, which is really only for ptrace access.

Link: https://lkml.kernel.org/r/20221116102659.70287-11-david@redhat.com
Tested-by: Leon Romanovsky <leonro@nvidia.com>	[over mlx4 and mlx5]
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/infiniband/core/umem.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 86d479772fbc6..755a9c57db6f3 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -156,7 +156,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 	struct mm_struct *mm;
 	unsigned long npages;
 	int pinned, ret;
-	unsigned int gup_flags = FOLL_WRITE;
+	unsigned int gup_flags = FOLL_LONGTERM;
 
 	/*
 	 * If the combination of the addr and size requested for this memory
@@ -210,8 +210,8 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 
 	cur_base = addr & PAGE_MASK;
 
-	if (!umem->writable)
-		gup_flags |= FOLL_FORCE;
+	if (umem->writable)
+		gup_flags |= FOLL_WRITE;
 
 	while (npages) {
 		cond_resched();
@@ -219,7 +219,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
 					  min_t(unsigned long, npages,
 						PAGE_SIZE /
 						sizeof(struct page *)),
-					  gup_flags | FOLL_LONGTERM, page_list);
+					  gup_flags, page_list);
 		if (pinned < 0) {
 			ret = pinned;
 			goto umem_release;
-- 
GitLab


From a9d0284033e974a355b806fdb5fbabf8301bcd16 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:50 +0100
Subject: [PATCH 3066/4122] RDMA/usnic: remove FOLL_FORCE usage

GUP now supports reliable R/O long-term pinning in COW mappings, such
that we break COW early. MAP_SHARED VMAs only use the shared zeropage so
far in one corner case (DAXFS file with holes), which can be ignored
because GUP does not support long-term pinning in fsdax (see
check_vma_flags()).

Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required
for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop
using FOLL_FORCE, which is really only for ptrace access.

Link: https://lkml.kernel.org/r/20221116102659.70287-12-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Christian Benvenuti <benve@cisco.com>
Cc: Nelson Escobar <neescoba@cisco.com>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/infiniband/hw/usnic/usnic_uiom.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c
index 67923ced6e2d1..c301b3be9f303 100644
--- a/drivers/infiniband/hw/usnic/usnic_uiom.c
+++ b/drivers/infiniband/hw/usnic/usnic_uiom.c
@@ -85,6 +85,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 				int dmasync, struct usnic_uiom_reg *uiomr)
 {
 	struct list_head *chunk_list = &uiomr->chunk_list;
+	unsigned int gup_flags = FOLL_LONGTERM;
 	struct page **page_list;
 	struct scatterlist *sg;
 	struct usnic_uiom_chunk *chunk;
@@ -96,7 +97,6 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 	int off;
 	int i;
 	dma_addr_t pa;
-	unsigned int gup_flags;
 	struct mm_struct *mm;
 
 	/*
@@ -131,8 +131,8 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 		goto out;
 	}
 
-	gup_flags = FOLL_WRITE;
-	gup_flags |= (writable) ? 0 : FOLL_FORCE;
+	if (writable)
+		gup_flags |= FOLL_WRITE;
 	cur_base = addr & PAGE_MASK;
 	ret = 0;
 
@@ -140,8 +140,7 @@ static int usnic_uiom_get_pages(unsigned long addr, size_t size, int writable,
 		ret = pin_user_pages(cur_base,
 				     min_t(unsigned long, npages,
 				     PAGE_SIZE / sizeof(struct page *)),
-				     gup_flags | FOLL_LONGTERM,
-				     page_list, NULL);
+				     gup_flags, page_list, NULL);
 
 		if (ret < 0)
 			goto out;
-- 
GitLab


From 129e636fe9837fcfea68bfd368a07548d9880726 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:51 +0100
Subject: [PATCH 3067/4122] RDMA/siw: remove FOLL_FORCE usage

GUP now supports reliable R/O long-term pinning in COW mappings, such
that we break COW early. MAP_SHARED VMAs only use the shared zeropage so
far in one corner case (DAXFS file with holes), which can be ignored
because GUP does not support long-term pinning in fsdax (see
check_vma_flags()).

Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required
for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop
using FOLL_FORCE, which is really only for ptrace access.

Link: https://lkml.kernel.org/r/20221116102659.70287-13-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Cc: Bernard Metzler <bmt@zurich.ibm.com>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/infiniband/sw/siw/siw_mem.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_mem.c b/drivers/infiniband/sw/siw/siw_mem.c
index 61c17db70d658..b2b33dd3b4fa1 100644
--- a/drivers/infiniband/sw/siw/siw_mem.c
+++ b/drivers/infiniband/sw/siw/siw_mem.c
@@ -368,7 +368,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 	struct mm_struct *mm_s;
 	u64 first_page_va;
 	unsigned long mlock_limit;
-	unsigned int foll_flags = FOLL_WRITE;
+	unsigned int foll_flags = FOLL_LONGTERM;
 	int num_pages, num_chunks, i, rv = 0;
 
 	if (!can_do_mlock())
@@ -391,8 +391,8 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 
 	mmgrab(mm_s);
 
-	if (!writable)
-		foll_flags |= FOLL_FORCE;
+	if (writable)
+		foll_flags |= FOLL_WRITE;
 
 	mmap_read_lock(mm_s);
 
@@ -423,8 +423,7 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
 		while (nents) {
 			struct page **plist = &umem->page_chunk[i].plist[got];
 
-			rv = pin_user_pages(first_page_va, nents,
-					    foll_flags | FOLL_LONGTERM,
+			rv = pin_user_pages(first_page_va, nents, foll_flags,
 					    plist, NULL);
 			if (rv < 0)
 				goto out_sem_up;
-- 
GitLab


From 3298de2c66e0276abe6b95041fd3605a377523fc Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:52 +0100
Subject: [PATCH 3068/4122] media: videobuf-dma-sg: remove FOLL_FORCE usage

GUP now supports reliable R/O long-term pinning in COW mappings, such
that we break COW early. MAP_SHARED VMAs only use the shared zeropage so
far in one corner case (DAXFS file with holes), which can be ignored
because GUP does not support long-term pinning in fsdax (see
check_vma_flags()).

Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required
for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop
using FOLL_FORCE, which is really only for ptrace access.

Link: https://lkml.kernel.org/r/20221116102659.70287-14-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/media/v4l2-core/videobuf-dma-sg.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c
index f75e5eedeee05..234e9f647c963 100644
--- a/drivers/media/v4l2-core/videobuf-dma-sg.c
+++ b/drivers/media/v4l2-core/videobuf-dma-sg.c
@@ -151,17 +151,16 @@ static void videobuf_dma_init(struct videobuf_dmabuf *dma)
 static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 			int direction, unsigned long data, unsigned long size)
 {
+	unsigned int gup_flags = FOLL_LONGTERM;
 	unsigned long first, last;
-	int err, rw = 0;
-	unsigned int flags = FOLL_FORCE;
+	int err;
 
 	dma->direction = direction;
 	switch (dma->direction) {
 	case DMA_FROM_DEVICE:
-		rw = READ;
+		gup_flags |= FOLL_WRITE;
 		break;
 	case DMA_TO_DEVICE:
-		rw = WRITE;
 		break;
 	default:
 		BUG();
@@ -177,14 +176,11 @@ static int videobuf_dma_init_user_locked(struct videobuf_dmabuf *dma,
 	if (NULL == dma->pages)
 		return -ENOMEM;
 
-	if (rw == READ)
-		flags |= FOLL_WRITE;
-
 	dprintk(1, "init user [0x%lx+0x%lx => %lu pages]\n",
 		data, size, dma->nr_pages);
 
-	err = pin_user_pages(data & PAGE_MASK, dma->nr_pages,
-			     flags | FOLL_LONGTERM, dma->pages, NULL);
+	err = pin_user_pages(data & PAGE_MASK, dma->nr_pages, gup_flags,
+			     dma->pages, NULL);
 
 	if (err != dma->nr_pages) {
 		dma->nr_pages = (err >= 0) ? err : 0;
-- 
GitLab


From 7d96eb6a9164607df09c9589ed3ba9ef4e9cc2a6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:53 +0100
Subject: [PATCH 3069/4122] drm/etnaviv: remove FOLL_FORCE usage

GUP now supports reliable R/O long-term pinning in COW mappings, such
that we break COW early. MAP_SHARED VMAs only use the shared zeropage so
far in one corner case (DAXFS file with holes), which can be ignored
because GUP does not support long-term pinning in fsdax (see
check_vma_flags()).

commit cd5297b0855f ("drm/etnaviv: Use FOLL_FORCE for userptr")
documents that FOLL_FORCE | FOLL_WRITE was really only used for reliable
R/O pinning.

Consequently, FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM is no longer required
for reliable R/O long-term pinning: FOLL_LONGTERM is sufficient. So stop
using FOLL_FORCE, which is really only for ptrace access.

Link: https://lkml.kernel.org/r/20221116102659.70287-15-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Lucas Stach <l.stach@pengutronix.de>
Cc: Russell King <linux+etnaviv@armlinux.org.uk>
Cc: Christian Gmeiner <christian.gmeiner@gmail.com>
Cc: David Airlie <airlied@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/etnaviv/etnaviv_gem.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index cc386f8a7116e..efe2240945d07 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -638,6 +638,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	struct page **pvec = NULL;
 	struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
 	int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+	unsigned int gup_flags = FOLL_LONGTERM;
 
 	might_lock_read(&current->mm->mmap_lock);
 
@@ -648,14 +649,15 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	if (!pvec)
 		return -ENOMEM;
 
+	if (!userptr->ro)
+		gup_flags |= FOLL_WRITE;
+
 	do {
 		unsigned num_pages = npages - pinned;
 		uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
 		struct page **pages = pvec + pinned;
 
-		ret = pin_user_pages_fast(ptr, num_pages,
-					  FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM,
-					  pages);
+		ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages);
 		if (ret < 0) {
 			unpin_user_pages(pvec, pinned);
 			kvfree(pvec);
-- 
GitLab


From 70b96f24a441e5a7e0853e3893edd9dc58b67996 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:54 +0100
Subject: [PATCH 3070/4122] media: pci/ivtv: remove FOLL_FORCE usage

FOLL_FORCE is really only for ptrace access. R/O pinning a page is
supposed to fail if the VMA misses proper access permissions (no VM_READ).

Let's just remove FOLL_FORCE usage here; there would have to be a pretty
good reason to allow arbitrary drivers to R/O pin pages in a PROT_NONE
VMA. Most probably, FOLL_FORCE usage is just some legacy leftover.

Link: https://lkml.kernel.org/r/20221116102659.70287-16-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Cc: Andy Walls <awalls@md.metrocast.net>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/media/pci/ivtv/ivtv-udma.c | 2 +-
 drivers/media/pci/ivtv/ivtv-yuv.c  | 5 ++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/media/pci/ivtv/ivtv-udma.c b/drivers/media/pci/ivtv/ivtv-udma.c
index 210be8290f24d..99b9f55ca8292 100644
--- a/drivers/media/pci/ivtv/ivtv-udma.c
+++ b/drivers/media/pci/ivtv/ivtv-udma.c
@@ -115,7 +115,7 @@ int ivtv_udma_setup(struct ivtv *itv, unsigned long ivtv_dest_addr,
 
 	/* Pin user pages for DMA Xfer */
 	err = pin_user_pages_unlocked(user_dma.uaddr, user_dma.page_count,
-			dma->map, FOLL_FORCE);
+			dma->map, 0);
 
 	if (user_dma.page_count != err) {
 		IVTV_DEBUG_WARN("failed to map user pages, returned %d instead of %d\n",
diff --git a/drivers/media/pci/ivtv/ivtv-yuv.c b/drivers/media/pci/ivtv/ivtv-yuv.c
index 4ba10c34a16a4..582146f8d70d5 100644
--- a/drivers/media/pci/ivtv/ivtv-yuv.c
+++ b/drivers/media/pci/ivtv/ivtv-yuv.c
@@ -63,12 +63,11 @@ static int ivtv_yuv_prep_user_dma(struct ivtv *itv, struct ivtv_user_dma *dma,
 
 	/* Pin user pages for DMA Xfer */
 	y_pages = pin_user_pages_unlocked(y_dma.uaddr,
-			y_dma.page_count, &dma->map[0], FOLL_FORCE);
+			y_dma.page_count, &dma->map[0], 0);
 	uv_pages = 0; /* silence gcc. value is set and consumed only if: */
 	if (y_pages == y_dma.page_count) {
 		uv_pages = pin_user_pages_unlocked(uv_dma.uaddr,
-				uv_dma.page_count, &dma->map[y_pages],
-				FOLL_FORCE);
+				uv_dma.page_count, &dma->map[y_pages], 0);
 	}
 
 	if (y_pages != y_dma.page_count || uv_pages != uv_dma.page_count) {
-- 
GitLab


From cb78a634f3f7ff743e19fbffcb72d794e4bd7f73 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:55 +0100
Subject: [PATCH 3071/4122] mm/frame-vector: remove FOLL_FORCE usage

FOLL_FORCE is really only for ptrace access. According to commit
707947247e95 ("media: videobuf2-vmalloc: get_userptr: buffers are always
writable"), get_vaddr_frames() currently pins all pages writable as a
workaround for issues with read-only buffers.

FOLL_FORCE, however, seems to be a legacy leftover as it predates
commit 707947247e95 ("media: videobuf2-vmalloc: get_userptr: buffers are
always writable"). Let's just remove it.

Once the read-only buffer issue has been resolved, FOLL_WRITE could
again be set depending on the DMA direction.

Link: https://lkml.kernel.org/r/20221116102659.70287-17-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl>
Acked-by: Tomasz Figa <tfiga@chromium.org>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/media/common/videobuf2/frame_vector.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/media/common/videobuf2/frame_vector.c b/drivers/media/common/videobuf2/frame_vector.c
index 542dde9d2609b..062e98148c535 100644
--- a/drivers/media/common/videobuf2/frame_vector.c
+++ b/drivers/media/common/videobuf2/frame_vector.c
@@ -50,7 +50,7 @@ int get_vaddr_frames(unsigned long start, unsigned int nr_frames,
 	start = untagged_addr(start);
 
 	ret = pin_user_pages_fast(start, nr_frames,
-				  FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+				  FOLL_WRITE | FOLL_LONGTERM,
 				  (struct page **)(vec->ptrs));
 	if (ret > 0) {
 		vec->got_ref = true;
-- 
GitLab


From c098ce73c247a0e36d8a6b8cfdc7d05e4bc81bd0 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:56 +0100
Subject: [PATCH 3072/4122] drm/exynos: remove FOLL_FORCE usage

FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages
using unpin_user_pages_dirty_lock(true), the assumption is that all these
pages are writable.

FOLL_FORCE in this case seems to be a legacy leftover. Let's just remove
it.

Link: https://lkml.kernel.org/r/20221116102659.70287-18-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Inki Dae <inki.dae@samsung.com>
Cc: Seung-Woo Kim <sw0312.kim@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: David Airlie <airlied@gmail.com>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/gpu/drm/exynos/exynos_drm_g2d.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 471fd6c8135f2..e19c2ceb3759e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -477,7 +477,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d,
 	}
 
 	ret = pin_user_pages_fast(start, npages,
-				  FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+				  FOLL_WRITE | FOLL_LONGTERM,
 				  g2d_userptr->pages);
 	if (ret != npages) {
 		DRM_DEV_ERROR(g2d->dev,
-- 
GitLab


From 20ea7783236c374ddb7f201132a5fb9624563a77 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:57 +0100
Subject: [PATCH 3073/4122] RDMA/hw/qib/qib_user_pages: remove FOLL_FORCE usage

FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages
using unpin_user_pages_dirty_lock(true), the assumption is that all these
pages are writable.

FOLL_FORCE in this case seems to be a legacy leftover. Let's just remove
it.

Link: https://lkml.kernel.org/r/20221116102659.70287-19-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Leon Romanovsky <leon@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/infiniband/hw/qib/qib_user_pages.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c
index f4b5f05058e4c..f693bc753b6b1 100644
--- a/drivers/infiniband/hw/qib/qib_user_pages.c
+++ b/drivers/infiniband/hw/qib/qib_user_pages.c
@@ -110,7 +110,7 @@ int qib_get_user_pages(unsigned long start_page, size_t num_pages,
 	for (got = 0; got < num_pages; got += ret) {
 		ret = pin_user_pages(start_page + got * PAGE_SIZE,
 				     num_pages - got,
-				     FOLL_LONGTERM | FOLL_WRITE | FOLL_FORCE,
+				     FOLL_LONGTERM | FOLL_WRITE,
 				     p + got, NULL);
 		if (ret < 0) {
 			mmap_read_unlock(current->mm);
-- 
GitLab


From 052d9b0f7ae1200b4a0783cf934ee4a987d37fd7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Wed, 16 Nov 2022 11:26:58 +0100
Subject: [PATCH 3074/4122] habanalabs: remove FOLL_FORCE usage

FOLL_FORCE is really only for ptrace access. As we unpin the pinned pages
using unpin_user_pages_dirty_lock(true), the assumption is that all these
pages are writable.

FOLL_FORCE in this case seems to be due to copy-and-past from other
drivers. Let's just remove it.

Link: https://lkml.kernel.org/r/20221116102659.70287-20-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Oded Gabbay <ogabbay@kernel.org>
Cc: Oded Gabbay <ogabbay@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/misc/habanalabs/common/memory.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index ef28f3b37b932..e35cca96bbef4 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -2312,8 +2312,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
 	if (!userptr->pages)
 		return -ENOMEM;
 
-	rc = pin_user_pages_fast(start, npages,
-				 FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+	rc = pin_user_pages_fast(start, npages, FOLL_WRITE | FOLL_LONGTERM,
 				 userptr->pages);
 
 	if (rc != npages) {
-- 
GitLab


From f347454d034184b4f0a2caf6e14daf7848cea01c Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 31 Oct 2022 16:25:24 +0100
Subject: [PATCH 3075/4122] mm/gup: disallow FOLL_FORCE|FOLL_WRITE on hugetlb
 mappings

hugetlb does not support fake write-faults (write faults without write
permissions).  However, we are currently able to trigger a
FAULT_FLAG_WRITE fault on a VMA without VM_WRITE.

If we'd ever want to support FOLL_FORCE|FOLL_WRITE, we'd have to teach
hugetlb to:

(1) Leave the page mapped R/O after the fake write-fault, like
    maybe_mkwrite() does.
(2) Allow writing to an exclusive anon page that's mapped R/O when
    FOLL_FORCE is set, like can_follow_write_pte(). E.g.,
    __follow_hugetlb_must_fault() needs adjustment.

For now, it's not clear if that added complexity is really required.
History tolds us that FOLL_FORCE is dangerous and that we better limit its
use to a bare minimum.

--------------------------------------------------------------------------
  #include <stdio.h>
  #include <stdlib.h>
  #include <fcntl.h>
  #include <unistd.h>
  #include <errno.h>
  #include <stdint.h>
  #include <sys/mman.h>
  #include <linux/mman.h>

  int main(int argc, char **argv)
  {
          char *map;
          int mem_fd;

          map = mmap(NULL, 2 * 1024 * 1024u, PROT_READ,
                     MAP_PRIVATE|MAP_ANON|MAP_HUGETLB|MAP_HUGE_2MB, -1, 0);
          if (map == MAP_FAILED) {
                  fprintf(stderr, "mmap() failed: %d\n", errno);
                  return 1;
          }

          mem_fd = open("/proc/self/mem", O_RDWR);
          if (mem_fd < 0) {
                  fprintf(stderr, "open(/proc/self/mem) failed: %d\n", errno);
                  return 1;
          }

          if (pwrite(mem_fd, "0", 1, (uintptr_t) map) == 1) {
                  fprintf(stderr, "write() succeeded, which is unexpected\n");
                  return 1;
          }

          printf("write() failed as expected: %d\n", errno);
          return 0;
  }
--------------------------------------------------------------------------

Fortunately, we have a sanity check in hugetlb_wp() in place ever since
commit 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared
mappings"), that bails out instead of silently mapping a page writable in
a !PROT_WRITE VMA.

Consequently, above reproducer triggers a warning, similar to the one
reported by szsbot:

------------[ cut here ]------------
WARNING: CPU: 1 PID: 3612 at mm/hugetlb.c:5313 hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313
Modules linked in:
CPU: 1 PID: 3612 Comm: syz-executor250 Not tainted 6.1.0-rc2-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/11/2022
RIP: 0010:hugetlb_wp+0x20a/0x1af0 mm/hugetlb.c:5313
Code: ea 03 80 3c 02 00 0f 85 31 14 00 00 49 8b 5f 20 31 ff 48 89 dd 83 e5 02 48 89 ee e8 70 ab b7 ff 48 85 ed 75 5b e8 76 ae b7 ff <0f> 0b 41 bd 40 00 00 00 e8 69 ae b7 ff 48 b8 00 00 00 00 00 fc ff
RSP: 0018:ffffc90003caf620 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000008640070 RCX: 0000000000000000
RDX: ffff88807b963a80 RSI: ffffffff81c4ed2a RDI: 0000000000000007
RBP: 0000000000000000 R08: 0000000000000007 R09: 0000000000000000
R10: 0000000000000000 R11: 000000000008c07e R12: ffff888023805800
R13: 0000000000000000 R14: ffffffff91217f38 R15: ffff88801d4b0360
FS:  0000555555bba300(0000) GS:ffff8880b9b00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fff7a47a1b8 CR3: 000000002378d000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 hugetlb_no_page mm/hugetlb.c:5755 [inline]
 hugetlb_fault+0x19cc/0x2060 mm/hugetlb.c:5874
 follow_hugetlb_page+0x3f3/0x1850 mm/hugetlb.c:6301
 __get_user_pages+0x2cb/0xf10 mm/gup.c:1202
 __get_user_pages_locked mm/gup.c:1434 [inline]
 __get_user_pages_remote+0x18f/0x830 mm/gup.c:2187
 get_user_pages_remote+0x84/0xc0 mm/gup.c:2260
 __access_remote_vm+0x287/0x6b0 mm/memory.c:5517
 ptrace_access_vm+0x181/0x1d0 kernel/ptrace.c:61
 generic_ptrace_pokedata kernel/ptrace.c:1323 [inline]
 ptrace_request+0xb46/0x10c0 kernel/ptrace.c:1046
 arch_ptrace+0x36/0x510 arch/x86/kernel/ptrace.c:828
 __do_sys_ptrace kernel/ptrace.c:1296 [inline]
 __se_sys_ptrace kernel/ptrace.c:1269 [inline]
 __x64_sys_ptrace+0x178/0x2a0 kernel/ptrace.c:1269
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
[...]

So let's silence that warning by teaching GUP code that FOLL_FORCE -- so
far -- does not apply to hugetlb.

Note that FOLL_FORCE for read-access seems to be working as expected.  The
assumption is that this has been broken forever, only ever since above
commit, we actually detect the wrong handling and WARN_ON_ONCE().

I assume this has been broken at least since 2014, when mm/gup.c came to
life.  I failed to come up with a suitable Fixes tag quickly.

Link: https://lkml.kernel.org/r/20221031152524.173644-1-david@redhat.com
Fixes: 1d8d14641fd9 ("mm/hugetlb: support write-faults in shared mappings")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: <syzbot+f0b97304ef90f0d0b1dc@syzkaller.appspotmail.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/gup.c b/mm/gup.c
index 39c84a200f067..90ae44f24870c 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1009,6 +1009,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
 		if (!(vm_flags & VM_WRITE)) {
 			if (!(gup_flags & FOLL_FORCE))
 				return -EFAULT;
+			/* hugetlb does not support FOLL_FORCE|FOLL_WRITE. */
+			if (is_vm_hugetlb_page(vma))
+				return -EFAULT;
 			/*
 			 * We used to let the write,force case do COW in a
 			 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
-- 
GitLab


From 931b6a8b36a2de3985eca27e758900e70cd99779 Mon Sep 17 00:00:00 2001
From: Yu Zhao <yuzhao@google.com>
Date: Tue, 15 Nov 2022 18:38:08 -0700
Subject: [PATCH 3076/4122] mm: multi-gen LRU: remove NULL checks on
 NODE_DATA()

NODE_DATA() is preallocated for all possible nodes after commit
09f49dca570a ("mm: handle uninitialized numa nodes gracefully").  Checking
its return value against NULL is now unnecessary.

Link: https://lkml.kernel.org/r/20221116013808.3995280-2-yuzhao@google.com
Signed-off-by: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 82f32c929b114..805fa51b175c2 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3166,7 +3166,7 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
 	if (memcg) {
 		struct lruvec *lruvec = &memcg->nodeinfo[nid]->lruvec;
 
-		/* for hotadd_new_pgdat() */
+		/* see the comment in mem_cgroup_lruvec() */
 		if (!lruvec->pgdat)
 			lruvec->pgdat = pgdat;
 
@@ -3175,7 +3175,7 @@ static struct lruvec *get_lruvec(struct mem_cgroup *memcg, int nid)
 #endif
 	VM_WARN_ON_ONCE(!mem_cgroup_disabled());
 
-	return pgdat ? &pgdat->__lruvec : NULL;
+	return &pgdat->__lruvec;
 }
 
 static int get_swappiness(struct lruvec *lruvec, struct scan_control *sc)
@@ -3239,9 +3239,6 @@ void lru_gen_add_mm(struct mm_struct *mm)
 	for_each_node_state(nid, N_MEMORY) {
 		struct lruvec *lruvec = get_lruvec(memcg, nid);
 
-		if (!lruvec)
-			continue;
-
 		/* the first addition since the last iteration */
 		if (lruvec->mm_state.tail == &mm_list->fifo)
 			lruvec->mm_state.tail = &mm->lru_gen.list;
@@ -3271,9 +3268,6 @@ void lru_gen_del_mm(struct mm_struct *mm)
 	for_each_node(nid) {
 		struct lruvec *lruvec = get_lruvec(memcg, nid);
 
-		if (!lruvec)
-			continue;
-
 		/* where the last iteration ended (exclusive) */
 		if (lruvec->mm_state.tail == &mm->lru_gen.list)
 			lruvec->mm_state.tail = lruvec->mm_state.tail->next;
@@ -5348,9 +5342,6 @@ static void lru_gen_change_state(bool enabled)
 		for_each_node(nid) {
 			struct lruvec *lruvec = get_lruvec(memcg, nid);
 
-			if (!lruvec)
-				continue;
-
 			spin_lock_irq(&lruvec->lru_lock);
 
 			VM_WARN_ON_ONCE(!seq_is_valid(lruvec));
-- 
GitLab


From 4c74b65f478dc9353780a6be17fc82f1b06cea80 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 16 Nov 2022 09:23:45 +0800
Subject: [PATCH 3077/4122] mm/migrate.c: stop using 0 as NULL pointer

mm/migrate.c:1198:24: warning: Using plain integer as NULL pointer

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3080
Link: https://lkml.kernel.org/r/20221116012345.84870-1-yang.lee@linux.alibaba.com
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/migrate.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/migrate.c b/mm/migrate.c
index 4aa3b6d4f67c9..3be90351ad1d4 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1180,7 +1180,7 @@ static int unmap_and_move(new_page_t get_new_page,
 		return -ENOMEM;
 	dst = page_folio(newpage);
 
-	dst->private = 0;
+	dst->private = NULL;
 	rc = __unmap_and_move(src, dst, force, mode);
 	if (rc == MIGRATEPAGE_SUCCESS)
 		set_page_owner_migrate_reason(&dst->page, reason);
-- 
GitLab


From 47939359add5242d27ee6a30e8bcb0cef15ba45c Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Thu, 17 Nov 2022 23:13:26 +0900
Subject: [PATCH 3078/4122] zram: remove unused stats fields

We don't show num_reads and num_writes since we removed corresponding
sysfs nodes in 2017.  Block layer stats are exposed via
/sys/block/zramX/stat file.

However, we still increment those atomic vars and store them in zram
stats.  Remove leftovers.

Link: https://lkml.kernel.org/r/20221117141326.1105181-1-senozhatsky@chromium.org
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 2 --
 drivers/block/zram/zram_drv.h | 2 --
 2 files changed, 4 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 9d33801e8ba87..e290d6d970474 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1981,11 +1981,9 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
 	int ret;
 
 	if (!op_is_write(op)) {
-		atomic64_inc(&zram->stats.num_reads);
 		ret = zram_bvec_read(zram, bvec, index, offset, bio);
 		flush_dcache_page(bvec->bv_page);
 	} else {
-		atomic64_inc(&zram->stats.num_writes);
 		ret = zram_bvec_write(zram, bvec, index, offset, bio);
 	}
 
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 473325415a74c..c5254626f051f 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -76,8 +76,6 @@ struct zram_table_entry {
 
 struct zram_stats {
 	atomic64_t compr_data_size;	/* compressed size of pages stored */
-	atomic64_t num_reads;	/* failed + successful */
-	atomic64_t num_writes;	/* --do-- */
 	atomic64_t failed_reads;	/* can happen when memory is too low */
 	atomic64_t failed_writes;	/* can happen when memory is too low */
 	atomic64_t invalid_io;	/* non-page-aligned I/O requests */
-- 
GitLab


From 91a99f1d1248bdde674b66e20ac472ec76f6a202 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Thu, 17 Nov 2022 16:29:15 -0500
Subject: [PATCH 3079/4122] selftests/vm: use memfd for hugepage-mmap test

This test was overlooked with a hard-coded mntpoint path in test when
we're removing the hugetlb mntpoint in commit 0796c7b8be84.  Fix it up so
the test can keep running.

Link: https://lkml.kernel.org/r/Y3aojfUC2nSwbCzB@x1n
Fixes: 0796c7b8be84 ("selftests/vm: drop mnt point for hugetlb in run_vmtests.sh")
Signed-off-by: Peter Xu <peterx@redhat.com>
Reported-by: Joel Savitz <jsavitz@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/hugepage-mmap.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/vm/hugepage-mmap.c
index 93f9e7b813314..955ef87f382cb 100644
--- a/tools/testing/selftests/vm/hugepage-mmap.c
+++ b/tools/testing/selftests/vm/hugepage-mmap.c
@@ -16,14 +16,13 @@
  * range.
  * Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
  */
-
+#define _GNU_SOURCE
 #include <stdlib.h>
 #include <stdio.h>
 #include <unistd.h>
 #include <sys/mman.h>
 #include <fcntl.h>
 
-#define FILE_NAME "huge/hugepagefile"
 #define LENGTH (256UL*1024*1024)
 #define PROTECTION (PROT_READ | PROT_WRITE)
 
@@ -67,16 +66,16 @@ int main(void)
 	void *addr;
 	int fd, ret;
 
-	fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
+	fd = memfd_create("hugepage-mmap", MFD_HUGETLB);
 	if (fd < 0) {
-		perror("Open failed");
+		perror("memfd_create() failed");
 		exit(1);
 	}
 
 	addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
 	if (addr == MAP_FAILED) {
 		perror("mmap");
-		unlink(FILE_NAME);
+		close(fd);
 		exit(1);
 	}
 
@@ -87,7 +86,6 @@ int main(void)
 
 	munmap(addr, LENGTH);
 	close(fd);
-	unlink(FILE_NAME);
 
 	return ret;
 }
-- 
GitLab


From c3e58a70425ac6ddaae1529c8146e88b4f7252bb Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 18 Nov 2022 10:17:13 +0000
Subject: [PATCH 3080/4122] mm/page_alloc: always remove pages from temporary
 list

Patch series "Leave IRQs enabled for per-cpu page allocations", v3.


This patch (of 2):

free_unref_page_list() has neglected to remove pages properly from the
list of pages to free since forever.  It works by coincidence because
list_add happened to do the right thing adding the pages to just the PCP
lists.  However, a later patch added pages to either the PCP list or the
zone list but only properly deleted the page from the list in one path
leading to list corruption and a subsequent failure.  As a preparation
patch, always delete the pages from one list properly before adding to
another.  On its own, this fixes nothing although it adds a fractional
amount of overhead but is critical to the next patch.

Link: https://lkml.kernel.org/r/20221118101714.19590-1-mgorman@techsingularity.net
Link: https://lkml.kernel.org/r/20221118101714.19590-2-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Hugh Dickins <hughd@google.com>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c33b6963c2d76..ca889fb53cbb0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3553,6 +3553,8 @@ void free_unref_page_list(struct list_head *list)
 	list_for_each_entry_safe(page, next, list, lru) {
 		struct zone *zone = page_zone(page);
 
+		list_del(&page->lru);
+
 		/* Different zone, different pcp lock. */
 		if (zone != locked_zone) {
 			if (pcp)
-- 
GitLab


From 5749077415994eb02d660b2559b9d8278521e73d Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Fri, 18 Nov 2022 10:17:14 +0000
Subject: [PATCH 3081/4122] mm/page_alloc: leave IRQs enabled for per-cpu page
 allocations

The pcp_spin_lock_irqsave protecting the PCP lists is IRQ-safe as a task
allocating from the PCP must not re-enter the allocator from IRQ context.
In each instance where IRQ-reentrancy is possible, the lock is acquired
using pcp_spin_trylock_irqsave() even though IRQs are disabled and
re-entrancy is impossible.

Demote the lock to pcp_spin_lock avoids an IRQ disable/enable in the
common case at the cost of some IRQ allocations taking a slower path.  If
the PCP lists need to be refilled, the zone lock still needs to disable
IRQs but that will only happen on PCP refill and drain.  If an IRQ is
raised when a PCP allocation is in progress, the trylock will fail and
fallback to using the buddy lists directly.  Note that this may not be a
universal win if an interrupt-intensive workload also allocates heavily
from interrupt context and contends heavily on the zone->lock as a result.

[mgorman@techsingularity.net: migratetype might be wrong if a PCP was locked]
  Link: https://lkml.kernel.org/r/20221122131229.5263-2-mgorman@techsingularity.net
[yuzhao@google.com: reported lockdep issue on IO completion from softirq]
[hughd@google.com: fix list corruption, lock improvements, micro-optimsations]
Link: https://lkml.kernel.org/r/20221118101714.19590-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Michal Hocko <mhocko@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 124 +++++++++++++++++++++---------------------------
 1 file changed, 54 insertions(+), 70 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ca889fb53cbb0..d9d83254c4853 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -170,21 +170,12 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
 	_ret;								\
 })
 
-#define pcpu_spin_lock_irqsave(type, member, ptr, flags)		\
+#define pcpu_spin_trylock(type, member, ptr)				\
 ({									\
 	type *_ret;							\
 	pcpu_task_pin();						\
 	_ret = this_cpu_ptr(ptr);					\
-	spin_lock_irqsave(&_ret->member, flags);			\
-	_ret;								\
-})
-
-#define pcpu_spin_trylock_irqsave(type, member, ptr, flags)		\
-({									\
-	type *_ret;							\
-	pcpu_task_pin();						\
-	_ret = this_cpu_ptr(ptr);					\
-	if (!spin_trylock_irqsave(&_ret->member, flags)) {		\
+	if (!spin_trylock(&_ret->member)) {				\
 		pcpu_task_unpin();					\
 		_ret = NULL;						\
 	}								\
@@ -197,27 +188,16 @@ static DEFINE_MUTEX(pcp_batch_high_lock);
 	pcpu_task_unpin();						\
 })
 
-#define pcpu_spin_unlock_irqrestore(member, ptr, flags)			\
-({									\
-	spin_unlock_irqrestore(&ptr->member, flags);			\
-	pcpu_task_unpin();						\
-})
-
 /* struct per_cpu_pages specific helpers. */
 #define pcp_spin_lock(ptr)						\
 	pcpu_spin_lock(struct per_cpu_pages, lock, ptr)
 
-#define pcp_spin_lock_irqsave(ptr, flags)				\
-	pcpu_spin_lock_irqsave(struct per_cpu_pages, lock, ptr, flags)
-
-#define pcp_spin_trylock_irqsave(ptr, flags)				\
-	pcpu_spin_trylock_irqsave(struct per_cpu_pages, lock, ptr, flags)
+#define pcp_spin_trylock(ptr)						\
+	pcpu_spin_trylock(struct per_cpu_pages, lock, ptr)
 
 #define pcp_spin_unlock(ptr)						\
 	pcpu_spin_unlock(lock, ptr)
 
-#define pcp_spin_unlock_irqrestore(ptr, flags)				\
-	pcpu_spin_unlock_irqrestore(lock, ptr, flags)
 #ifdef CONFIG_USE_PERCPU_NUMA_NODE_ID
 DEFINE_PER_CPU(int, numa_node);
 EXPORT_PER_CPU_SYMBOL(numa_node);
@@ -1554,6 +1534,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 					struct per_cpu_pages *pcp,
 					int pindex)
 {
+	unsigned long flags;
 	int min_pindex = 0;
 	int max_pindex = NR_PCP_LISTS - 1;
 	unsigned int order;
@@ -1569,8 +1550,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 	/* Ensure requested pindex is drained first. */
 	pindex = pindex - 1;
 
-	/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
-	spin_lock(&zone->lock);
+	spin_lock_irqsave(&zone->lock, flags);
 	isolated_pageblocks = has_isolate_pageblock(zone);
 
 	while (count > 0) {
@@ -1618,7 +1598,7 @@ static void free_pcppages_bulk(struct zone *zone, int count,
 		} while (count > 0 && !list_empty(list));
 	}
 
-	spin_unlock(&zone->lock);
+	spin_unlock_irqrestore(&zone->lock, flags);
 }
 
 static void free_one_page(struct zone *zone,
@@ -3132,10 +3112,10 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 			unsigned long count, struct list_head *list,
 			int migratetype, unsigned int alloc_flags)
 {
+	unsigned long flags;
 	int i, allocated = 0;
 
-	/* Caller must hold IRQ-safe pcp->lock so IRQs are disabled. */
-	spin_lock(&zone->lock);
+	spin_lock_irqsave(&zone->lock, flags);
 	for (i = 0; i < count; ++i) {
 		struct page *page = __rmqueue(zone, order, migratetype,
 								alloc_flags);
@@ -3169,7 +3149,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
 	 * pages added to the pcp list.
 	 */
 	__mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
-	spin_unlock(&zone->lock);
+	spin_unlock_irqrestore(&zone->lock, flags);
 	return allocated;
 }
 
@@ -3186,16 +3166,9 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 	batch = READ_ONCE(pcp->batch);
 	to_drain = min(pcp->count, batch);
 	if (to_drain > 0) {
-		unsigned long flags;
-
-		/*
-		 * free_pcppages_bulk expects IRQs disabled for zone->lock
-		 * so even though pcp->lock is not intended to be IRQ-safe,
-		 * it's needed in this context.
-		 */
-		spin_lock_irqsave(&pcp->lock, flags);
+		spin_lock(&pcp->lock);
 		free_pcppages_bulk(zone, to_drain, pcp, 0);
-		spin_unlock_irqrestore(&pcp->lock, flags);
+		spin_unlock(&pcp->lock);
 	}
 }
 #endif
@@ -3209,12 +3182,9 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 
 	pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
 	if (pcp->count) {
-		unsigned long flags;
-
-		/* See drain_zone_pages on why this is disabling IRQs */
-		spin_lock_irqsave(&pcp->lock, flags);
+		spin_lock(&pcp->lock);
 		free_pcppages_bulk(zone, pcp->count, pcp, 0);
-		spin_unlock_irqrestore(&pcp->lock, flags);
+		spin_unlock(&pcp->lock);
 	}
 }
 
@@ -3480,7 +3450,6 @@ static void free_unref_page_commit(struct zone *zone, struct per_cpu_pages *pcp,
  */
 void free_unref_page(struct page *page, unsigned int order)
 {
-	unsigned long flags;
 	unsigned long __maybe_unused UP_flags;
 	struct per_cpu_pages *pcp;
 	struct zone *zone;
@@ -3508,10 +3477,10 @@ void free_unref_page(struct page *page, unsigned int order)
 
 	zone = page_zone(page);
 	pcp_trylock_prepare(UP_flags);
-	pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 	if (pcp) {
 		free_unref_page_commit(zone, pcp, page, migratetype, order);
-		pcp_spin_unlock_irqrestore(pcp, flags);
+		pcp_spin_unlock(pcp);
 	} else {
 		free_one_page(zone, page, pfn, order, migratetype, FPI_NONE);
 	}
@@ -3523,10 +3492,10 @@ void free_unref_page(struct page *page, unsigned int order)
  */
 void free_unref_page_list(struct list_head *list)
 {
+	unsigned long __maybe_unused UP_flags;
 	struct page *page, *next;
 	struct per_cpu_pages *pcp = NULL;
 	struct zone *locked_zone = NULL;
-	unsigned long flags;
 	int batch_count = 0;
 	int migratetype;
 
@@ -3554,21 +3523,36 @@ void free_unref_page_list(struct list_head *list)
 		struct zone *zone = page_zone(page);
 
 		list_del(&page->lru);
+		migratetype = get_pcppage_migratetype(page);
 
 		/* Different zone, different pcp lock. */
 		if (zone != locked_zone) {
-			if (pcp)
-				pcp_spin_unlock_irqrestore(pcp, flags);
+			if (pcp) {
+				pcp_spin_unlock(pcp);
+				pcp_trylock_finish(UP_flags);
+			}
 
+			/*
+			 * trylock is necessary as pages may be getting freed
+			 * from IRQ or SoftIRQ context after an IO completion.
+			 */
+			pcp_trylock_prepare(UP_flags);
+			pcp = pcp_spin_trylock(zone->per_cpu_pageset);
+			if (unlikely(!pcp)) {
+				pcp_trylock_finish(UP_flags);
+				free_one_page(zone, page, page_to_pfn(page),
+					      0, migratetype, FPI_NONE);
+				locked_zone = NULL;
+				continue;
+			}
 			locked_zone = zone;
-			pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
+			batch_count = 0;
 		}
 
 		/*
 		 * Non-isolated types over MIGRATE_PCPTYPES get added
 		 * to the MIGRATE_MOVABLE pcp list.
 		 */
-		migratetype = get_pcppage_migratetype(page);
 		if (unlikely(migratetype >= MIGRATE_PCPTYPES))
 			migratetype = MIGRATE_MOVABLE;
 
@@ -3576,18 +3560,23 @@ void free_unref_page_list(struct list_head *list)
 		free_unref_page_commit(zone, pcp, page, migratetype, 0);
 
 		/*
-		 * Guard against excessive IRQ disabled times when we get
-		 * a large list of pages to free.
+		 * Guard against excessive lock hold times when freeing
+		 * a large list of pages. Lock will be reacquired if
+		 * necessary on the next iteration.
 		 */
 		if (++batch_count == SWAP_CLUSTER_MAX) {
-			pcp_spin_unlock_irqrestore(pcp, flags);
+			pcp_spin_unlock(pcp);
+			pcp_trylock_finish(UP_flags);
 			batch_count = 0;
-			pcp = pcp_spin_lock_irqsave(locked_zone->per_cpu_pageset, flags);
+			pcp = NULL;
+			locked_zone = NULL;
 		}
 	}
 
-	if (pcp)
-		pcp_spin_unlock_irqrestore(pcp, flags);
+	if (pcp) {
+		pcp_spin_unlock(pcp);
+		pcp_trylock_finish(UP_flags);
+	}
 }
 
 /*
@@ -3788,15 +3777,11 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	struct per_cpu_pages *pcp;
 	struct list_head *list;
 	struct page *page;
-	unsigned long flags;
 	unsigned long __maybe_unused UP_flags;
 
-	/*
-	 * spin_trylock may fail due to a parallel drain. In the future, the
-	 * trylock will also protect against IRQ reentrancy.
-	 */
+	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
 	pcp_trylock_prepare(UP_flags);
-	pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 	if (!pcp) {
 		pcp_trylock_finish(UP_flags);
 		return NULL;
@@ -3810,7 +3795,7 @@ static struct page *rmqueue_pcplist(struct zone *preferred_zone,
 	pcp->free_factor >>= 1;
 	list = &pcp->lists[order_to_pindex(migratetype, order)];
 	page = __rmqueue_pcplist(zone, order, migratetype, alloc_flags, pcp, list);
-	pcp_spin_unlock_irqrestore(pcp, flags);
+	pcp_spin_unlock(pcp);
 	pcp_trylock_finish(UP_flags);
 	if (page) {
 		__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
@@ -5381,7 +5366,6 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 			struct page **page_array)
 {
 	struct page *page;
-	unsigned long flags;
 	unsigned long __maybe_unused UP_flags;
 	struct zone *zone;
 	struct zoneref *z;
@@ -5463,9 +5447,9 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 	if (unlikely(!zone))
 		goto failed;
 
-	/* Is a parallel drain in progress? */
+	/* spin_trylock may fail due to a parallel drain or IRQ reentrancy. */
 	pcp_trylock_prepare(UP_flags);
-	pcp = pcp_spin_trylock_irqsave(zone->per_cpu_pageset, flags);
+	pcp = pcp_spin_trylock(zone->per_cpu_pageset);
 	if (!pcp)
 		goto failed_irq;
 
@@ -5484,7 +5468,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		if (unlikely(!page)) {
 			/* Try and allocate at least one page */
 			if (!nr_account) {
-				pcp_spin_unlock_irqrestore(pcp, flags);
+				pcp_spin_unlock(pcp);
 				goto failed_irq;
 			}
 			break;
@@ -5499,7 +5483,7 @@ unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid,
 		nr_populated++;
 	}
 
-	pcp_spin_unlock_irqrestore(pcp, flags);
+	pcp_spin_unlock(pcp);
 	pcp_trylock_finish(UP_flags);
 
 	__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
-- 
GitLab


From a4bafffb5dc5be6c7a3b77b2de0cbaf6776a3c8b Mon Sep 17 00:00:00 2001
From: Mel Gorman <mgorman@techsingularity.net>
Date: Tue, 22 Nov 2022 13:12:29 +0000
Subject: [PATCH 3082/4122] mm/page_alloc: simplify locking during
 free_unref_page_list

While freeing a large list, the zone lock will be released and reacquired
to avoid long hold times since commit c24ad77d962c ("mm/page_alloc.c:
avoid excessive IRQ disabled times in free_unref_page_list()").  As
suggested by Vlastimil Babka, the lockrelease/reacquire logic can be
simplified by reusing the logic that acquires a different lock when
changing zones.

Link: https://lkml.kernel.org/r/20221122131229.5263-3-mgorman@techsingularity.net
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 25 +++++++++----------------
 1 file changed, 9 insertions(+), 16 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d9d83254c4853..5ab9dd29ef7ec 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3525,13 +3525,19 @@ void free_unref_page_list(struct list_head *list)
 		list_del(&page->lru);
 		migratetype = get_pcppage_migratetype(page);
 
-		/* Different zone, different pcp lock. */
-		if (zone != locked_zone) {
+		/*
+		 * Either different zone requiring a different pcp lock or
+		 * excessive lock hold times when freeing a large list of
+		 * pages.
+		 */
+		if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
 			if (pcp) {
 				pcp_spin_unlock(pcp);
 				pcp_trylock_finish(UP_flags);
 			}
 
+			batch_count = 0;
+
 			/*
 			 * trylock is necessary as pages may be getting freed
 			 * from IRQ or SoftIRQ context after an IO completion.
@@ -3546,7 +3552,6 @@ void free_unref_page_list(struct list_head *list)
 				continue;
 			}
 			locked_zone = zone;
-			batch_count = 0;
 		}
 
 		/*
@@ -3558,19 +3563,7 @@ void free_unref_page_list(struct list_head *list)
 
 		trace_mm_page_free_batched(page);
 		free_unref_page_commit(zone, pcp, page, migratetype, 0);
-
-		/*
-		 * Guard against excessive lock hold times when freeing
-		 * a large list of pages. Lock will be reacquired if
-		 * necessary on the next iteration.
-		 */
-		if (++batch_count == SWAP_CLUSTER_MAX) {
-			pcp_spin_unlock(pcp);
-			pcp_trylock_finish(UP_flags);
-			batch_count = 0;
-			pcp = NULL;
-			locked_zone = NULL;
-		}
+		batch_count++;
 	}
 
 	if (pcp) {
-- 
GitLab


From 6dd8fe86fa84729538d8bed3149faf9c5886bb5b Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Thu, 17 Nov 2022 23:30:52 -0800
Subject: [PATCH 3083/4122] ext4: convert move_extent_per_page() to use folios

Patch series "Removing the try_to_release_page() wrapper", v3.

This patchset replaces the remaining calls of try_to_release_page() with
the folio equivalent: filemap_release_folio().  This allows us to remove
the wrapper.


This patch (of 4):

Convert move_extent_per_page() to use folios.  This change removes 5 calls
to compound_head() and is in preparation for the removal of the
try_to_release_page() wrapper.

Link: https://lkml.kernel.org/r/20221118073055.55694-1-vishal.moola@gmail.com
Link: https://lkml.kernel.org/r/20221118073055.55694-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/ext4/move_extent.c | 52 ++++++++++++++++++++++++++-----------------
 1 file changed, 31 insertions(+), 21 deletions(-)

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 044e34cd835c1..8dbb87edf24c4 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -253,6 +253,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
 {
 	struct inode *orig_inode = file_inode(o_filp);
 	struct page *pagep[2] = {NULL, NULL};
+	struct folio *folio[2] = {NULL, NULL};
 	handle_t *handle;
 	ext4_lblk_t orig_blk_offset, donor_blk_offset;
 	unsigned long blocksize = orig_inode->i_sb->s_blocksize;
@@ -313,6 +314,13 @@ again:
 	 * hold page's lock, if it is still the case data copy is not
 	 * necessary, just swap data blocks between orig and donor.
 	 */
+	folio[0] = page_folio(pagep[0]);
+	folio[1] = page_folio(pagep[1]);
+
+	VM_BUG_ON_FOLIO(folio_test_large(folio[0]), folio[0]);
+	VM_BUG_ON_FOLIO(folio_test_large(folio[1]), folio[1]);
+	VM_BUG_ON_FOLIO(folio_nr_pages(folio[0]) != folio_nr_pages(folio[1]), folio[1]);
+
 	if (unwritten) {
 		ext4_double_down_write_data_sem(orig_inode, donor_inode);
 		/* If any of extents in range became initialized we have to
@@ -331,10 +339,10 @@ again:
 			ext4_double_up_write_data_sem(orig_inode, donor_inode);
 			goto data_copy;
 		}
-		if ((page_has_private(pagep[0]) &&
-		     !try_to_release_page(pagep[0], 0)) ||
-		    (page_has_private(pagep[1]) &&
-		     !try_to_release_page(pagep[1], 0))) {
+		if ((folio_has_private(folio[0]) &&
+		     !filemap_release_folio(folio[0], 0)) ||
+		    (folio_has_private(folio[1]) &&
+		     !filemap_release_folio(folio[1], 0))) {
 			*err = -EBUSY;
 			goto drop_data_sem;
 		}
@@ -344,19 +352,21 @@ again:
 						   block_len_in_page, 1, err);
 	drop_data_sem:
 		ext4_double_up_write_data_sem(orig_inode, donor_inode);
-		goto unlock_pages;
+		goto unlock_folios;
 	}
 data_copy:
-	*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size);
+	*err = mext_page_mkuptodate(&folio[0]->page, from, from + replaced_size);
 	if (*err)
-		goto unlock_pages;
+		goto unlock_folios;
 
 	/* At this point all buffers in range are uptodate, old mapping layout
 	 * is no longer required, try to drop it now. */
-	if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) ||
-	    (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) {
+	if ((folio_has_private(folio[0]) &&
+		!filemap_release_folio(folio[0], 0)) ||
+	    (folio_has_private(folio[1]) &&
+		!filemap_release_folio(folio[1], 0))) {
 		*err = -EBUSY;
-		goto unlock_pages;
+		goto unlock_folios;
 	}
 	ext4_double_down_write_data_sem(orig_inode, donor_inode);
 	replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
@@ -369,13 +379,13 @@ data_copy:
 			replaced_size =
 				block_len_in_page << orig_inode->i_blkbits;
 		} else
-			goto unlock_pages;
+			goto unlock_folios;
 	}
 	/* Perform all necessary steps similar write_begin()/write_end()
 	 * but keeping in mind that i_size will not change */
-	if (!page_has_buffers(pagep[0]))
-		create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
-	bh = page_buffers(pagep[0]);
+	if (!folio_buffers(folio[0]))
+		create_empty_buffers(&folio[0]->page, 1 << orig_inode->i_blkbits, 0);
+	bh = folio_buffers(folio[0]);
 	for (i = 0; i < data_offset_in_page; i++)
 		bh = bh->b_this_page;
 	for (i = 0; i < block_len_in_page; i++) {
@@ -385,7 +395,7 @@ data_copy:
 		bh = bh->b_this_page;
 	}
 	if (!*err)
-		*err = block_commit_write(pagep[0], from, from + replaced_size);
+		*err = block_commit_write(&folio[0]->page, from, from + replaced_size);
 
 	if (unlikely(*err < 0))
 		goto repair_branches;
@@ -395,11 +405,11 @@ data_copy:
 	*err = ext4_jbd2_inode_add_write(handle, orig_inode,
 			(loff_t)orig_page_offset << PAGE_SHIFT, replaced_size);
 
-unlock_pages:
-	unlock_page(pagep[0]);
-	put_page(pagep[0]);
-	unlock_page(pagep[1]);
-	put_page(pagep[1]);
+unlock_folios:
+	folio_unlock(folio[0]);
+	folio_put(folio[0]);
+	folio_unlock(folio[1]);
+	folio_put(folio[1]);
 stop_journal:
 	ext4_journal_stop(handle);
 	if (*err == -ENOSPC &&
@@ -430,7 +440,7 @@ repair_branches:
 		*err = -EIO;
 	}
 	replaced_count = 0;
-	goto unlock_pages;
+	goto unlock_folios;
 }
 
 /**
-- 
GitLab


From 64ab3195ea077eaeedc8b382939c3dc5ca56f369 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Thu, 17 Nov 2022 23:30:53 -0800
Subject: [PATCH 3084/4122] khugepage: replace try_to_release_page() with
 filemap_release_folio()

Replace some calls with their folio equivalents.  This change removes 4
calls to compound_head() and is in preparation for the removal of the
try_to_release_page() wrapper.

Link: https://lkml.kernel.org/r/20221118073055.55694-3-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/khugepaged.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 0d8f548d9d7ed..913b0f4893524 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1789,6 +1789,7 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 	xas_set(&xas, start);
 	for (index = start; index < end; index++) {
 		struct page *page = xas_next(&xas);
+		struct folio *folio;
 
 		VM_BUG_ON(index != xas.xa_index);
 		if (is_shmem) {
@@ -1815,8 +1816,6 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 			}
 
 			if (xa_is_value(page) || !PageUptodate(page)) {
-				struct folio *folio;
-
 				xas_unlock_irq(&xas);
 				/* swap in or instantiate fallocated page */
 				if (shmem_get_folio(mapping->host, index,
@@ -1904,13 +1903,15 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 			goto out_unlock;
 		}
 
-		if (page_mapping(page) != mapping) {
+		folio = page_folio(page);
+
+		if (folio_mapping(folio) != mapping) {
 			result = SCAN_TRUNCATED;
 			goto out_unlock;
 		}
 
-		if (!is_shmem && (PageDirty(page) ||
-				  PageWriteback(page))) {
+		if (!is_shmem && (folio_test_dirty(folio) ||
+				  folio_test_writeback(folio))) {
 			/*
 			 * khugepaged only works on read-only fd, so this
 			 * page is dirty because it hasn't been flushed
@@ -1920,20 +1921,20 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 			goto out_unlock;
 		}
 
-		if (isolate_lru_page(page)) {
+		if (folio_isolate_lru(folio)) {
 			result = SCAN_DEL_PAGE_LRU;
 			goto out_unlock;
 		}
 
-		if (page_has_private(page) &&
-		    !try_to_release_page(page, GFP_KERNEL)) {
+		if (folio_has_private(folio) &&
+		    !filemap_release_folio(folio, GFP_KERNEL)) {
 			result = SCAN_PAGE_HAS_PRIVATE;
-			putback_lru_page(page);
+			folio_putback_lru(folio);
 			goto out_unlock;
 		}
 
-		if (page_mapped(page))
-			try_to_unmap(page_folio(page),
+		if (folio_mapped(folio))
+			try_to_unmap(folio,
 					TTU_IGNORE_MLOCK | TTU_BATCH_FLUSH);
 
 		xas_lock_irq(&xas);
-- 
GitLab


From ac5efa782041670b63a05c36d92d02a80e50bb63 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Thu, 17 Nov 2022 23:30:54 -0800
Subject: [PATCH 3085/4122] memory-failure: convert truncate_error_page() to
 use folio

Replace try_to_release_page() with filemap_release_folio().  This change
is in preparation for the removal of the try_to_release_page() wrapper.

Link: https://lkml.kernel.org/r/20221118073055.55694-4-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 63d8501001c6f..2e62940c7baee 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -840,12 +840,13 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
 	int ret = MF_FAILED;
 
 	if (mapping->a_ops->error_remove_page) {
+		struct folio *folio = page_folio(p);
 		int err = mapping->a_ops->error_remove_page(mapping, p);
 
 		if (err != 0) {
 			pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
-		} else if (page_has_private(p) &&
-			   !try_to_release_page(p, GFP_NOIO)) {
+		} else if (folio_has_private(folio) &&
+			   !filemap_release_folio(folio, GFP_NOIO)) {
 			pr_info("%#lx: failed to release buffers\n", pfn);
 		} else {
 			ret = MF_RECOVERED;
-- 
GitLab


From 7438899b0b8df16a73d9a5d49b2a345d165adfe8 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Thu, 17 Nov 2022 23:30:55 -0800
Subject: [PATCH 3086/4122] folio-compat: remove try_to_release_page()

There are no more callers of try_to_release_page(), so remove it.  This
saves 85 bytes of kernel text.

Link: https://lkml.kernel.org/r/20221118073055.55694-5-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Naoya Horiguchi <naoya.horiguchi@nec.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagemap.h | 1 -
 mm/folio-compat.c       | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index b33ab86d5dca4..2ec0ca1f3d389 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1105,7 +1105,6 @@ void __filemap_remove_folio(struct folio *folio, void *shadow);
 void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct folio_batch *fbatch);
-int try_to_release_page(struct page *page, gfp_t gfp);
 bool filemap_release_folio(struct folio *folio, gfp_t gfp);
 loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
 		int whence);
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index cbfe51091c391..86933fa8f3e11 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -118,12 +118,6 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 }
 EXPORT_SYMBOL(grab_cache_page_write_begin);
 
-int try_to_release_page(struct page *page, gfp_t gfp)
-{
-	return filemap_release_folio(page_folio(page), gfp);
-}
-EXPORT_SYMBOL(try_to_release_page);
-
 int isolate_lru_page(struct page *page)
 {
 	if (WARN_RATELIMIT(PageTail(page), "trying to isolate tail page"))
-- 
GitLab


From f6fbb8b23b8155a6f7af1349b4595d0373167636 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Thu, 10 Nov 2022 12:35:41 +0100
Subject: [PATCH 3087/4122] Revert "kmsan: unpoison @tlb in
 arch_tlb_gather_mmu()"

This reverts commit ac801e7e252c5588325e3c983c7d4167fc68c024.

The patch in question was picked to -mm from the KMSAN v6 patch series
(https://lore.kernel.org/linux-mm/20220905122452.2258262-1-glider@google.com/)
and sneaked into mainline despite its removal from the v7 series
(https://lore.kernel.org/linux-mm/20220915150417.722975-1-glider@google.com/)

Currently KMSAN does not warn about origin chains hitting the maximum
depth, so keeping @tlb poisoned won't result in any inconveniences.

Link: https://lkml.kernel.org/r/20221110113541.1844156-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mmu_gather.c | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index dd1f8ca40cb53..8247553a69c2e 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -1,7 +1,6 @@
 #include <linux/gfp.h>
 #include <linux/highmem.h>
 #include <linux/kernel.h>
-#include <linux/kmsan-checks.h>
 #include <linux/mmdebug.h>
 #include <linux/mm_types.h>
 #include <linux/mm_inline.h>
@@ -299,15 +298,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb)
 static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
 			     bool fullmm)
 {
-	/*
-	 * struct mmu_gather contains 7 1-bit fields packed into a 32-bit
-	 * unsigned int value. The remaining 25 bits remain uninitialized
-	 * and are never used, but KMSAN updates the origin for them in
-	 * zap_pXX_range() in mm/memory.c, thus creating very long origin
-	 * chains. This is technically correct, but consumes too much memory.
-	 * Unpoisoning the whole structure will prevent creating such chains.
-	 */
-	kmsan_unpoison_memory(tlb, sizeof(*tlb));
 	tlb->mm = mm;
 	tlb->fullmm = fullmm;
 
-- 
GitLab


From 845aad0aa038507c166cdc48fbf2e5d863fe73dc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 18 Nov 2022 21:51:17 -0800
Subject: [PATCH 3088/4122] maple_tree: allow TEST_MAPLE_TREE only when
 DEBUG_KERNEL is set

Prevent a kconfig warning that is caused by TEST_MAPLE_TREE by adding a
"depends on" clause for TEST_MAPLE_TREE since 'select' does not follow any
kconfig dependencies.

WARNING: unmet direct dependencies detected for DEBUG_MAPLE_TREE
  Depends on [n]: DEBUG_KERNEL [=n]
  Selected by [y]:
  - TEST_MAPLE_TREE [=y] && RUNTIME_TESTING_MENU [=y]

Link: https://lkml.kernel.org/r/20221119055117.14094-1-rdunlap@infradead.org
Fixes: 120b116208a0 ("maple_tree: reorganize testing to restore module testing")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Reported-by: kernel test robot <lkp@intel.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.debug | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 580e453e284ef..a0dc28fdc5676 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2244,6 +2244,7 @@ config TEST_XARRAY
 	tristate "Test the XArray code at runtime"
 
 config TEST_MAPLE_TREE
+	depends on DEBUG_KERNEL
 	select DEBUG_MAPLE_TREE
 	tristate "Test the Maple Tree code at runtime"
 
-- 
GitLab


From 8e9d5ead865a1a7af74a444d2f00f1ef4539bfba Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:51:56 -0800
Subject: [PATCH 3089/4122] mm: add bdi_set_strict_limit() function

Patch series "mm/block: add bdi sysfs knobs", v4.

At meta network block devices (nbd) are used to implement remote block
storage.  In testing and during production it has been observed that these
network block devices can consume a huge portion of the dirty writeback
cache and writeback can take a considerable time.

To be able to give stricter limits, I'm proposing the following changes:

1) introduce strictlimit knob

  Currently the max_ratio knob exists to limit the dirty_memory. However
  this knob only applies once (dirty_ratio + dirty_background_ratio) / 2
  has been reached.
  With the BDI_CAP_STRICTLIMIT flag, the max_ratio can be applied without
  reaching that limit. This change exposes that knob.

  This knob can also be useful for NFS, fuse filesystems and USB devices.

2) Use part of 1000000 internal calculation

  The max_ratio is based on percentage. With the current machine sizes
  percentage values can be very high (1% of a 256GB main memory is already
  2.5GB). This change uses part of 1000000 instead of percentages for the
  internal calculations.

3) Introduce two new sysfs knobs: min_bytes and max_bytes.

  Currently all calculations are based on ratio, but for a user it often
  more convenient to specify a limit in bytes. The new knobs will not
  store bytes values, instead they will translate the byte value to a
  corresponding ratio. As the internal values are now part of 1000, the
  ratio is closer to the specified value. However the value should be more
  seen as an approximation as it can fluctuate over time.


3) Introduce two new sysfs knobs: min_ratio_fine and max_ratio_fine.

  The granularity for the existing sysfs bdi knobs min_ratio and max_ratio
  is based on percentage values. The new sysfs bdi knobs min_ratio_fine
  and max_ratio_fine allow to specify the ratio as part of 1 million.


This patch (of 20):

This adds the bdi_set_strict_limit function to be able to set/unset the
BDI_CAP_STRICTLIMIT flag.

Link: https://lkml.kernel.org/r/20221119005215.3052436-1-shr@devkernel.io
Link: https://lkml.kernel.org/r/20221119005215.3052436-2-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Chris Mason <clm@meta.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  1 +
 mm/page-writeback.c         | 15 +++++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 439815cc1ab96..9c984ffc8a0aa 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -104,6 +104,7 @@ static inline unsigned long wb_stat_error(void)
 
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
 
 /*
  * Flags in backing_dev_info::capability
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7e9d8d857ecca..3745b886722f8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -698,6 +698,21 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
+int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
+{
+	if (strict_limit > 1)
+		return -EINVAL;
+
+	spin_lock_bh(&bdi_lock);
+	if (strict_limit)
+		bdi->capabilities |= BDI_CAP_STRICTLIMIT;
+	else
+		bdi->capabilities &= ~BDI_CAP_STRICTLIMIT;
+	spin_unlock_bh(&bdi_lock);
+
+	return 0;
+}
+
 static unsigned long dirty_freerun_ceiling(unsigned long thresh,
 					   unsigned long bg_thresh)
 {
-- 
GitLab


From 27bbe9d48d4e298864e18b39f091342c68b81637 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:51:57 -0800
Subject: [PATCH 3090/4122] mm: add knob /sys/class/bdi/<bdi>/strict_limit

Add a new knob to /sys/class/bdi/<bdi>/strict_limit. This new knob
allows to set/unset the flag BDI_CAP_STRICTLIMIT in the bdi
capabilities.

Link: https://lkml.kernel.org/r/20221119005215.3052436-3-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/backing-dev.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index c30419a5e1197..a0899cce72ef9 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -209,11 +209,40 @@ static ssize_t stable_pages_required_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(stable_pages_required);
 
+static ssize_t strict_limit_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	unsigned int strict_limit;
+	ssize_t ret;
+
+	ret = kstrtouint(buf, 10, &strict_limit);
+	if (ret < 0)
+		return ret;
+
+	ret = bdi_set_strict_limit(bdi, strict_limit);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+
+static ssize_t strict_limit_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%d\n",
+			!!(bdi->capabilities & BDI_CAP_STRICTLIMIT));
+}
+static DEVICE_ATTR_RW(strict_limit);
+
 static struct attribute *bdi_dev_attrs[] = {
 	&dev_attr_read_ahead_kb.attr,
 	&dev_attr_min_ratio.attr,
 	&dev_attr_max_ratio.attr,
 	&dev_attr_stable_pages_required.attr,
+	&dev_attr_strict_limit.attr,
 	NULL,
 };
 ATTRIBUTE_GROUPS(bdi_dev);
-- 
GitLab


From 16b837eb84e6948f92411eb32e97a05f89733ddc Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:51:58 -0800
Subject: [PATCH 3091/4122] mm: document /sys/class/bdi/<bdi>/strict_limit knob

This documents the new /sys/class/bdi/<bdi>/strict_limit knob.

Link: https://lkml.kernel.org/r/20221119005215.3052436-4-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index 6d2a2fc189dd6..68b5d4018c2f7 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -55,6 +55,17 @@ Description:
 	mount that is prone to get stuck, or a FUSE mount which cannot
 	be trusted to play fair.
 
+	(read-write)
+What:		/sys/class/bdi/<bdi>/strict_limit
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Forces per-BDI checks for the share of given device in the write-back
+	cache even before the global background dirty limit is reached. This
+	is useful in situations where the global limit is much higher than
+	affordable for given relatively slow (or untrusted) device. Turning
+	strictlimit on has no visible effect if max_ratio is equal to 100%.
+
 	(read-write)
 What:		/sys/class/bdi/<bdi>/stable_pages_required
 Date:		January 2008
-- 
GitLab


From ae82291e9ca47c3d6da6b77a00f427754aca413e Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:51:59 -0800
Subject: [PATCH 3092/4122] mm: use part per 1000000 for bdi ratios

To get finer granularity for ratio calculations use part per million
instead of percentiles. This is especially important if we want to
automatically convert byte values to ratios. Otherwise the values that
are actually used can be quite different. This is also important for
machines with more main memory (1% of 256GB is already 2.5GB).

Link: https://lkml.kernel.org/r/20221119005215.3052436-5-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  3 +++
 mm/backing-dev.c            |  6 +++---
 mm/page-writeback.c         | 15 +++++++++------
 3 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 9c984ffc8a0aa..1b50c028e5add 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -102,6 +102,9 @@ static inline unsigned long wb_stat_error(void)
 #endif
 }
 
+/* BDI ratio is expressed as part per 1000000 for finer granularity. */
+#define BDI_RATIO_SCALE 10000
+
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index a0899cce72ef9..90fa517123dc6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -178,7 +178,7 @@ static ssize_t min_ratio_store(struct device *dev,
 
 	return ret;
 }
-BDI_SHOW(min_ratio, bdi->min_ratio)
+BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE)
 
 static ssize_t max_ratio_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
@@ -197,7 +197,7 @@ static ssize_t max_ratio_store(struct device *dev,
 
 	return ret;
 }
-BDI_SHOW(max_ratio, bdi->max_ratio)
+BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)
 
 static ssize_t stable_pages_required_show(struct device *dev,
 					  struct device_attribute *attr,
@@ -809,7 +809,7 @@ int bdi_init(struct backing_dev_info *bdi)
 
 	kref_init(&bdi->refcnt);
 	bdi->min_ratio = 0;
-	bdi->max_ratio = 100;
+	bdi->max_ratio = 100 * BDI_RATIO_SCALE;
 	bdi->max_prop_frac = FPROP_FRAC_BASE;
 	INIT_LIST_HEAD(&bdi->bdi_list);
 	INIT_LIST_HEAD(&bdi->wb_list);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3745b886722f8..dd98b26543027 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -197,7 +197,7 @@ static void wb_min_max_ratio(struct bdi_writeback *wb,
 			min *= this_bw;
 			min = div64_ul(min, tot_bw);
 		}
-		if (max < 100) {
+		if (max < 100 * BDI_RATIO_SCALE) {
 			max *= this_bw;
 			max = div64_ul(max, tot_bw);
 		}
@@ -655,6 +655,8 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 	unsigned int delta;
 	int ret = 0;
 
+	min_ratio *= BDI_RATIO_SCALE;
+
 	spin_lock_bh(&bdi_lock);
 	if (min_ratio > bdi->max_ratio) {
 		ret = -EINVAL;
@@ -665,7 +667,7 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 			bdi->min_ratio = min_ratio;
 		} else {
 			delta = min_ratio - bdi->min_ratio;
-			if (bdi_min_ratio + delta < 100) {
+			if (bdi_min_ratio + delta < 100 * BDI_RATIO_SCALE) {
 				bdi_min_ratio += delta;
 				bdi->min_ratio = min_ratio;
 			} else {
@@ -684,6 +686,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 
 	if (max_ratio > 100)
 		return -EINVAL;
+	max_ratio *= BDI_RATIO_SCALE;
 
 	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
@@ -775,15 +778,15 @@ static unsigned long __wb_calc_thresh(struct dirty_throttle_control *dtc)
 	fprop_fraction_percpu(&dom->completions, dtc->wb_completions,
 			      &numerator, &denominator);
 
-	wb_thresh = (thresh * (100 - bdi_min_ratio)) / 100;
+	wb_thresh = (thresh * (100 * BDI_RATIO_SCALE - bdi_min_ratio)) / (100 * BDI_RATIO_SCALE);
 	wb_thresh *= numerator;
 	wb_thresh = div64_ul(wb_thresh, denominator);
 
 	wb_min_max_ratio(dtc->wb, &wb_min_ratio, &wb_max_ratio);
 
-	wb_thresh += (thresh * wb_min_ratio) / 100;
-	if (wb_thresh > (thresh * wb_max_ratio) / 100)
-		wb_thresh = thresh * wb_max_ratio / 100;
+	wb_thresh += (thresh * wb_min_ratio) / (100 * BDI_RATIO_SCALE);
+	if (wb_thresh > (thresh * wb_max_ratio) / (100 * BDI_RATIO_SCALE))
+		wb_thresh = thresh * wb_max_ratio / (100 * BDI_RATIO_SCALE);
 
 	return wb_thresh;
 }
-- 
GitLab


From 00df7d51263b46ed93f7572e2d09579746f7b1eb Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:00 -0800
Subject: [PATCH 3093/4122] mm: add bdi_get_max_bytes() function

This adds a function to return the specified value for max_bytes. It
converts the stored max_ratio of the bdi to the corresponding bytes
value. It introduces the bdi_get_bytes helper function to do the
conversion. This is an approximation as it is based on the value that is
returned by global_dirty_limits(), which can change. The helper function
will also be used by the min_bytes bdi knob.

Link: https://lkml.kernel.org/r/20221119005215.3052436-6-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  1 +
 mm/page-writeback.c         | 17 +++++++++++++++++
 2 files changed, 18 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 1b50c028e5add..473686c32775e 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -105,6 +105,7 @@ static inline unsigned long wb_stat_error(void)
 /* BDI ratio is expressed as part per 1000000 for finer granularity. */
 #define BDI_RATIO_SCALE 10000
 
+u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index dd98b26543027..719404e0d03d8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -650,6 +650,18 @@ void wb_domain_exit(struct wb_domain *dom)
  */
 static unsigned int bdi_min_ratio;
 
+static u64 bdi_get_bytes(unsigned int ratio)
+{
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	u64 bytes;
+
+	global_dirty_limits(&background_thresh, &dirty_thresh);
+	bytes = (dirty_thresh * PAGE_SIZE * ratio) / BDI_RATIO_SCALE / 100;
+
+	return bytes;
+}
+
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	unsigned int delta;
@@ -701,6 +713,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
+u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
+{
+	return bdi_get_bytes(bdi->max_ratio);
+}
+
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
 {
 	if (strict_limit > 1)
-- 
GitLab


From efc3e6ad53ea14225b434fddca261c9a1c56c707 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:01 -0800
Subject: [PATCH 3094/4122] mm: split off __bdi_set_max_ratio() function

This splits off __bdi_set_max_ratio() from bdi_set_max_ratio().
__bdi_set_max_ratio() will also be called from bdi_set_max_bytes(),
which will be introduced in the next patch.

Link: https://lkml.kernel.org/r/20221119005215.3052436-7-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page-writeback.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 719404e0d03d8..e74ef596dc276 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -692,14 +692,10 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 	return ret;
 }
 
-int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
+static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
 {
 	int ret = 0;
 
-	if (max_ratio > 100)
-		return -EINVAL;
-	max_ratio *= BDI_RATIO_SCALE;
-
 	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
@@ -711,6 +707,14 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned max_ratio)
 
 	return ret;
 }
+
+int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
+{
+	if (max_ratio > 100)
+		return -EINVAL;
+
+	return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE);
+}
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
-- 
GitLab


From 1bf27e98d26d1e62166a456ef17460be085cbe0b Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:02 -0800
Subject: [PATCH 3095/4122] mm: add bdi_set_max_bytes() function

This introduces the bdi_set_max_bytes() function. The max_bytes function
does not store the max_bytes value. Instead it converts the max_bytes
value into the corresponding ratio value.

Link: https://lkml.kernel.org/r/20221119005215.3052436-8-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  1 +
 mm/page-writeback.c         | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 473686c32775e..ea6c993433d5b 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -108,6 +108,7 @@ static inline unsigned long wb_stat_error(void)
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index e74ef596dc276..20ae9adeb22f4 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/math64.h>
 #include <linux/export.h>
 #include <linux/spinlock.h>
 #include <linux/fs.h>
@@ -650,6 +651,28 @@ void wb_domain_exit(struct wb_domain *dom)
  */
 static unsigned int bdi_min_ratio;
 
+static int bdi_check_pages_limit(unsigned long pages)
+{
+	unsigned long max_dirty_pages = global_dirtyable_memory();
+
+	if (pages > max_dirty_pages)
+		return -EINVAL;
+
+	return 0;
+}
+
+static unsigned long bdi_ratio_from_pages(unsigned long pages)
+{
+	unsigned long background_thresh;
+	unsigned long dirty_thresh;
+	unsigned long ratio;
+
+	global_dirty_limits(&background_thresh, &dirty_thresh);
+	ratio = div64_u64(pages * 100ULL * BDI_RATIO_SCALE, dirty_thresh);
+
+	return ratio;
+}
+
 static u64 bdi_get_bytes(unsigned int ratio)
 {
 	unsigned long background_thresh;
@@ -722,6 +745,20 @@ u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
 	return bdi_get_bytes(bdi->max_ratio);
 }
 
+int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes)
+{
+	int ret;
+	unsigned long pages = max_bytes >> PAGE_SHIFT;
+	unsigned long max_ratio;
+
+	ret = bdi_check_pages_limit(pages);
+	if (ret)
+		return ret;
+
+	max_ratio = bdi_ratio_from_pages(pages);
+	return __bdi_set_max_ratio(bdi, max_ratio);
+}
+
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit)
 {
 	if (strict_limit > 1)
-- 
GitLab


From c56e049a5e401a177c7c9b39a3bcc973ff5cec0b Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:03 -0800
Subject: [PATCH 3096/4122] mm: add knob /sys/class/bdi/<bdi>/max_bytes

This adds the new knob max_bytes to specify a dirty memory limit for the
corresponding bdi. The specified bytes value is converted to a ratio.

Link: https://lkml.kernel.org/r/20221119005215.3052436-9-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/backing-dev.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 90fa517123dc6..95d3229fc81f0 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -199,6 +199,34 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)
 
+static ssize_t max_bytes_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n", bdi_get_max_bytes(bdi));
+}
+
+static ssize_t max_bytes_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	u64 bytes;
+	ssize_t ret;
+
+	ret = kstrtoull(buf, 10, &bytes);
+	if (ret < 0)
+		return ret;
+
+	ret = bdi_set_max_bytes(bdi, bytes);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+DEVICE_ATTR_RW(max_bytes);
+
 static ssize_t stable_pages_required_show(struct device *dev,
 					  struct device_attribute *attr,
 					  char *buf)
@@ -241,6 +269,7 @@ static struct attribute *bdi_dev_attrs[] = {
 	&dev_attr_read_ahead_kb.attr,
 	&dev_attr_min_ratio.attr,
 	&dev_attr_max_ratio.attr,
+	&dev_attr_max_bytes.attr,
 	&dev_attr_stable_pages_required.attr,
 	&dev_attr_strict_limit.attr,
 	NULL,
-- 
GitLab


From c354d9268d7825eb8643f658c5091079d4f11a4a Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:04 -0800
Subject: [PATCH 3097/4122] mm: document /sys/class/bdi/<bdi>/max_bytes knob

This documents the new /sys/class/bdi/<bdi>/max_bytes knob.

Link: https://lkml.kernel.org/r/20221119005215.3052436-10-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index 68b5d4018c2f7..580f723de0496 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -56,6 +56,20 @@ Description:
 	be trusted to play fair.
 
 	(read-write)
+
+What:		/sys/class/bdi/<bdi>/max_bytes
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Allows limiting a particular device to use not more than the
+	given 'max_bytes' of the write-back cache.  This is useful in
+	situations where we want to avoid one device taking all or
+	most of the write-back cache.  For example in case of an NFS
+	mount that is prone to get stuck, a FUSE mount which cannot be
+	trusted to play fair, or a nbd device.
+
+	(read-write)
+
 What:		/sys/class/bdi/<bdi>/strict_limit
 Date:		October 2022
 Contact:	Stefan Roesch <shr@devkernel.io>
-- 
GitLab


From 712c00d66a342a3ed375df41c3df7d3d2abad2c0 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:05 -0800
Subject: [PATCH 3098/4122] mm: add bdi_get_min_bytes() function

This adds a function to return the specified value for min_bytes. It
converts the stored min_ratio of the bdi to the corresponding bytes
value. This is an approximation as it is based on the value that is
returned by global_dirty_limits(), which can change. The returned
value can be different than the value when the min_bytes value was set.

Link: https://lkml.kernel.org/r/20221119005215.3052436-11-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h | 1 +
 mm/page-writeback.c         | 5 +++++
 2 files changed, 6 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index ea6c993433d5b..8e04567727e63 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -105,6 +105,7 @@ static inline unsigned long wb_stat_error(void)
 /* BDI ratio is expressed as part per 1000000 for finer granularity. */
 #define BDI_RATIO_SCALE 10000
 
+u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 20ae9adeb22f4..c47824464f4c6 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -740,6 +740,11 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
 
+u64 bdi_get_min_bytes(struct backing_dev_info *bdi)
+{
+	return bdi_get_bytes(bdi->min_ratio);
+}
+
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
 {
 	return bdi_get_bytes(bdi->max_ratio);
-- 
GitLab


From 8021fb3232f265b81c7e4e7aba15bc3a04ff1fd3 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:06 -0800
Subject: [PATCH 3099/4122] mm: split off __bdi_set_min_ratio() function

This splits off the __bdi_set_min_ratio() function from the
bdi_set_min_ratio() function. The __bdi_set_min_ratio() function will
also be called from the bdi_set_min_bytes() function, which will be
introduced in the next patch.

Link: https://lkml.kernel.org/r/20221119005215.3052436-12-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page-writeback.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index c47824464f4c6..cefee7210d832 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -685,7 +685,7 @@ static u64 bdi_get_bytes(unsigned int ratio)
 	return bytes;
 }
 
-int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	unsigned int delta;
 	int ret = 0;
@@ -731,6 +731,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra
 	return ret;
 }
 
+int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+	return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE);
+}
+
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
 {
 	if (max_ratio > 100)
-- 
GitLab


From 803c98050569850be5fd51a2025c67622de887d9 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:07 -0800
Subject: [PATCH 3100/4122] mm: add bdi_set_min_bytes() function

This introduces the bdi_set_min_bytes() function. The min_bytes function
does not store the min_bytes value. Instead it converts the min_bytes
value into the corresponding ratio value.

Link: https://lkml.kernel.org/r/20221119005215.3052436-13-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  1 +
 mm/page-writeback.c         | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 8e04567727e63..572669758c7f8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
 int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
 
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index cefee7210d832..3d151e7a9b6c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -750,6 +750,20 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi)
 	return bdi_get_bytes(bdi->min_ratio);
 }
 
+int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes)
+{
+	int ret;
+	unsigned long pages = min_bytes >> PAGE_SHIFT;
+	unsigned long min_ratio;
+
+	ret = bdi_check_pages_limit(pages);
+	if (ret)
+		return ret;
+
+	min_ratio = bdi_ratio_from_pages(pages);
+	return __bdi_set_min_ratio(bdi, min_ratio);
+}
+
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi)
 {
 	return bdi_get_bytes(bdi->max_ratio);
-- 
GitLab


From 9c84819bd64ec15cb15d041c45ebe4725e9d4f3b Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:08 -0800
Subject: [PATCH 3101/4122] mm: add /sys/class/bdi/<bdi>/min_bytes knob

bdi has two existing knobs to limit the amount of dirty memory:
min_ratio and max_ratio. However the granularity of the knobs is limited
and often it is more convenient to specify limits in terms of bytes.
This change adds the min_bytes knob.

It does not store the min_bytes value, instead it converts the max_bytes
value to a ratio. The value is therefore more an approximation than an
absolute value.

It also maintains the sum over all the bdi min_ratio values stored in
the variable bdi_min_ratio.

Link: https://lkml.kernel.org/r/20221119005215.3052436-14-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/backing-dev.c | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 95d3229fc81f0..3fab79061adeb 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -199,6 +199,34 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)
 
+static ssize_t min_bytes_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%llu\n", bdi_get_min_bytes(bdi));
+}
+
+static ssize_t min_bytes_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	u64 bytes;
+	ssize_t ret;
+
+	ret = kstrtoull(buf, 10, &bytes);
+	if (ret < 0)
+		return ret;
+
+	ret = bdi_set_min_bytes(bdi, bytes);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+DEVICE_ATTR_RW(min_bytes);
+
 static ssize_t max_bytes_show(struct device *dev,
 			      struct device_attribute *attr,
 			      char *buf)
@@ -269,6 +297,7 @@ static struct attribute *bdi_dev_attrs[] = {
 	&dev_attr_read_ahead_kb.attr,
 	&dev_attr_min_ratio.attr,
 	&dev_attr_max_ratio.attr,
+	&dev_attr_min_bytes.attr,
 	&dev_attr_max_bytes.attr,
 	&dev_attr_stable_pages_required.attr,
 	&dev_attr_strict_limit.attr,
-- 
GitLab


From 9c832a8d571784c998d0f9f5df480c62f7f3064c Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:09 -0800
Subject: [PATCH 3102/4122] mm: document /sys/class/bdi/<bdi>/min_bytes knob

This documents the new /sys/class/bdi/<bdi>/min_bytes knob.

[akpm@linux-foundation.org: fix htmldocs warnings]
Link: https://lkml.kernel.org/r/20221119005215.3052436-15-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index 580f723de0496..a9a2f588a2b74 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -57,6 +57,21 @@ Description:
 
 	(read-write)
 
+What:		/sys/class/bdi/<bdi>/min_bytes
+Date:		October 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Under normal circumstances each device is given a part of the
+	total write-back cache that relates to its current average
+	writeout speed in relation to the other devices.
+
+	The 'min_bytes' parameter allows assigning a minimum
+	percentage of the write-back cache to a particular device
+	expressed in bytes.
+	For example, this is useful for providing a minimum QoS.
+
+	(read-write)
+
 What:		/sys/class/bdi/<bdi>/max_bytes
 Date:		October 2022
 Contact:	Stefan Roesch <shr@devkernel.io>
-- 
GitLab


From 4e230b406eda9bdf7f8a71e2cc3df18a824abcb0 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:10 -0800
Subject: [PATCH 3103/4122] mm: add bdi_set_max_ratio_no_scale() function

This introduces bdi_set_max_ratio_no_scale(). It uses the max
granularity for the ratio. This function by the new sysfs knob
max_ratio_fine.

Link: https://lkml.kernel.org/r/20221119005215.3052436-16-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h |  1 +
 mm/page-writeback.c         | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 572669758c7f8..d9acbb22ff259 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio);
 int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
 int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
 int bdi_set_strict_limit(struct backing_dev_info *bdi, unsigned int strict_limit);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3d151e7a9b6c3..f44ade72966cb 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -719,6 +719,9 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra
 {
 	int ret = 0;
 
+	if (max_ratio > 100 * BDI_RATIO_SCALE)
+		return -EINVAL;
+
 	spin_lock_bh(&bdi_lock);
 	if (bdi->min_ratio > max_ratio) {
 		ret = -EINVAL;
@@ -731,6 +734,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra
 	return ret;
 }
 
+int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio)
+{
+	return __bdi_set_max_ratio(bdi, max_ratio);
+}
+
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 {
 	return __bdi_set_min_ratio(bdi, min_ratio * BDI_RATIO_SCALE);
@@ -738,9 +746,6 @@ int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio)
 
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio)
 {
-	if (max_ratio > 100)
-		return -EINVAL;
-
 	return __bdi_set_max_ratio(bdi, max_ratio * BDI_RATIO_SCALE);
 }
 EXPORT_SYMBOL(bdi_set_max_ratio);
-- 
GitLab


From bca52dcbadc583f4db6435599c44a79f97293f06 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:11 -0800
Subject: [PATCH 3104/4122] mm: add /sys/class/bdi/<bdi>/max_ratio_fine knob

This adds the max_ratio_fine knob. The knob specifies the values not
based on 1 of 100, but instead 1 per million.

Link: https://lkml.kernel.org/r/20221119005215.3052436-17-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/backing-dev.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 3fab79061adeb..94c2382367cf6 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -199,6 +199,25 @@ static ssize_t max_ratio_store(struct device *dev,
 }
 BDI_SHOW(max_ratio, bdi->max_ratio / BDI_RATIO_SCALE)
 
+static ssize_t max_ratio_fine_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	unsigned int ratio;
+	ssize_t ret;
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret < 0)
+		return ret;
+
+	ret = bdi_set_max_ratio_no_scale(bdi, ratio);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+BDI_SHOW(max_ratio_fine, bdi->max_ratio)
+
 static ssize_t min_bytes_show(struct device *dev,
 			      struct device_attribute *attr,
 			      char *buf)
@@ -297,6 +316,7 @@ static struct attribute *bdi_dev_attrs[] = {
 	&dev_attr_read_ahead_kb.attr,
 	&dev_attr_min_ratio.attr,
 	&dev_attr_max_ratio.attr,
+	&dev_attr_max_ratio_fine.attr,
 	&dev_attr_min_bytes.attr,
 	&dev_attr_max_bytes.attr,
 	&dev_attr_stable_pages_required.attr,
-- 
GitLab


From 54790f30fea74247e2f38b4a632ee3dc2fe42d86 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:12 -0800
Subject: [PATCH 3105/4122] mm: document /sys/class/bdi/<bdi>/max_ratio_fine
 knob

This documents the new /sys/class/bdi/<bdi>/max_ratio_fine knob.

[akpm@linux-foundation.org: fix htmldocs warnings]
Link: https://lkml.kernel.org/r/20221119005215.3052436-18-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index a9a2f588a2b74..e9c584db316f8 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -57,6 +57,19 @@ Description:
 
 	(read-write)
 
+What:		/sys/class/bdi/<bdi>/max_ratio_fine
+Date:		November 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Allows limiting a particular device to use not more than the
+	given value of the write-back cache.  The value is given as part
+	of 1 million. This is useful in situations where we want to avoid
+	one device taking all or most of the write-back cache.  For example
+	in case of an NFS mount that is prone to get stuck, or a FUSE mount
+	which cannot be trusted to play fair.
+
+	(read-write)
+
 What:		/sys/class/bdi/<bdi>/min_bytes
 Date:		October 2022
 Contact:	Stefan Roesch <shr@devkernel.io>
-- 
GitLab


From 2c44af4f2aaa260199f218f11920c406e688693c Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:13 -0800
Subject: [PATCH 3106/4122] mm: add bdi_set_min_ratio_no_scale() function

This introduces bdi_set_min_ratio_no_scale(). It uses the max
granularity for the ratio. This function by the new sysfs knob
min_ratio_fine.

Link: https://lkml.kernel.org/r/20221119005215.3052436-19-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/backing-dev.h | 1 +
 mm/page-writeback.c         | 7 +++++++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index d9acbb22ff259..fbad4fcd408e2 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -109,6 +109,7 @@ u64 bdi_get_min_bytes(struct backing_dev_info *bdi);
 u64 bdi_get_max_bytes(struct backing_dev_info *bdi);
 int bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
+int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio);
 int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio);
 int bdi_set_min_bytes(struct backing_dev_info *bdi, u64 min_bytes);
 int bdi_set_max_bytes(struct backing_dev_info *bdi, u64 max_bytes);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index f44ade72966cb..ad608ef2a2436 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -690,6 +690,8 @@ static int __bdi_set_min_ratio(struct backing_dev_info *bdi, unsigned int min_ra
 	unsigned int delta;
 	int ret = 0;
 
+	if (min_ratio > 100 * BDI_RATIO_SCALE)
+		return -EINVAL;
 	min_ratio *= BDI_RATIO_SCALE;
 
 	spin_lock_bh(&bdi_lock);
@@ -734,6 +736,11 @@ static int __bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ra
 	return ret;
 }
 
+int bdi_set_min_ratio_no_scale(struct backing_dev_info *bdi, unsigned int min_ratio)
+{
+	return __bdi_set_min_ratio(bdi, min_ratio);
+}
+
 int bdi_set_max_ratio_no_scale(struct backing_dev_info *bdi, unsigned int max_ratio)
 {
 	return __bdi_set_max_ratio(bdi, max_ratio);
-- 
GitLab


From ad3e6dabf6f7d9ffd68eb711191ef16cdbdd25f0 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:14 -0800
Subject: [PATCH 3107/4122] mm: add /sys/class/bdi/<bdi>/min_ratio_fine knob

This adds the min_ratio_fine knob. The knob specifies the values not
based on 1 of 100, but instead 1 per million.

Link: https://lkml.kernel.org/r/20221119005215.3052436-20-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/backing-dev.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 94c2382367cf6..a53b9360b72ec 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -180,6 +180,25 @@ static ssize_t min_ratio_store(struct device *dev,
 }
 BDI_SHOW(min_ratio, bdi->min_ratio / BDI_RATIO_SCALE)
 
+static ssize_t min_ratio_fine_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi = dev_get_drvdata(dev);
+	unsigned int ratio;
+	ssize_t ret;
+
+	ret = kstrtouint(buf, 10, &ratio);
+	if (ret < 0)
+		return ret;
+
+	ret = bdi_set_min_ratio_no_scale(bdi, ratio);
+	if (!ret)
+		ret = count;
+
+	return ret;
+}
+BDI_SHOW(min_ratio_fine, bdi->min_ratio)
+
 static ssize_t max_ratio_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
@@ -315,6 +334,7 @@ static DEVICE_ATTR_RW(strict_limit);
 static struct attribute *bdi_dev_attrs[] = {
 	&dev_attr_read_ahead_kb.attr,
 	&dev_attr_min_ratio.attr,
+	&dev_attr_min_ratio_fine.attr,
 	&dev_attr_max_ratio.attr,
 	&dev_attr_max_ratio_fine.attr,
 	&dev_attr_min_bytes.attr,
-- 
GitLab


From eba39236f18da7a50b6c51df5d902ee72c43e760 Mon Sep 17 00:00:00 2001
From: Stefan Roesch <shr@devkernel.io>
Date: Fri, 18 Nov 2022 16:52:15 -0800
Subject: [PATCH 3108/4122] mm: document /sys/class/bdi/<bdi>/min_ratio_fine
 knob

This documents the new /sys/class/bdi/<bdi>/max_ratio_fine knob.

[akpm@linux-foundation.org: fix htmldocs warnings]
Link: https://lkml.kernel.org/r/20221119005215.3052436-21-shr@devkernel.io
Signed-off-by: Stefan Roesch <shr@devkernel.io>
Cc: Chris Mason <clm@meta.com>
Cc: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-class-bdi | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-class-bdi b/Documentation/ABI/testing/sysfs-class-bdi
index e9c584db316f8..0d2abd88a18cb 100644
--- a/Documentation/ABI/testing/sysfs-class-bdi
+++ b/Documentation/ABI/testing/sysfs-class-bdi
@@ -44,6 +44,21 @@ Description:
 
 	(read-write)
 
+What:		/sys/class/bdi/<bdi>/min_ratio_fine
+Date:		November 2022
+Contact:	Stefan Roesch <shr@devkernel.io>
+Description:
+	Under normal circumstances each device is given a part of the
+	total write-back cache that relates to its current average
+	writeout speed in relation to the other devices.
+
+	The 'min_ratio_fine' parameter allows assigning a minimum reserve
+	of the write-back cache to a particular device. The value is
+	expressed as part of 1 million. For example, this is useful for
+	providing a minimum QoS.
+
+	(read-write)
+
 What:		/sys/class/bdi/<bdi>/max_ratio
 Date:		January 2008
 Contact:	Peter Zijlstra <a.p.zijlstra@chello.nl>
-- 
GitLab


From be21b32afe470c5ae98e27e49201158a47032942 Mon Sep 17 00:00:00 2001
From: NARIBAYASHI Akira <a.naribayashi@fujitsu.com>
Date: Wed, 26 Oct 2022 20:24:38 +0900
Subject: [PATCH 3109/4122] mm, compaction: fix fast_isolate_around() to stay
 within boundaries

Depending on the memory configuration, isolate_freepages_block() may scan
pages out of the target range and causes panic.

Panic can occur on systems with multiple zones in a single pageblock.

The reason it is rare is that it only happens in special
configurations.  Depending on how many similar systems there are, it
may be a good idea to fix this problem for older kernels as well.

The problem is that pfn as argument of fast_isolate_around() could be out
of the target range.  Therefore we should consider the case where pfn <
start_pfn, and also the case where end_pfn < pfn.

This problem should have been addressd by the commit 6e2b7044c199 ("mm,
compaction: make fast_isolate_freepages() stay within zone") but there was
an oversight.

 Case1: pfn < start_pfn

  <at memory compaction for node Y>
  |  node X's zone  | node Y's zone
  +-----------------+------------------------------...
   pageblock    ^   ^     ^
  +-----------+-----------+-----------+-----------+...
                ^   ^     ^
                ^   ^      end_pfn
                ^    start_pfn = cc->zone->zone_start_pfn
                 pfn
                <---------> scanned range by "Scan After"

 Case2: end_pfn < pfn

  <at memory compaction for node X>
  |  node X's zone  | node Y's zone
  +-----------------+------------------------------...
   pageblock  ^     ^   ^
  +-----------+-----------+-----------+-----------+...
              ^     ^   ^
              ^     ^    pfn
              ^      end_pfn
               start_pfn
              <---------> scanned range by "Scan Before"

It seems that there is no good reason to skip nr_isolated pages just after
given pfn.  So let perform simple scan from start to end instead of
dividing the scan into "Before" and "After".

Link: https://lkml.kernel.org/r/20221026112438.236336-1-a.naribayashi@fujitsu.com
Fixes: 6e2b7044c199 ("mm, compaction: make fast_isolate_freepages() stay within zone").
Signed-off-by: NARIBAYASHI Akira <a.naribayashi@fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/compaction.c | 18 +++++-------------
 1 file changed, 5 insertions(+), 13 deletions(-)

diff --git a/mm/compaction.c b/mm/compaction.c
index 1f6da31dd9a50..ca1603524bbe0 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1344,7 +1344,7 @@ move_freelist_tail(struct list_head *freelist, struct page *freepage)
 }
 
 static void
-fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
+fast_isolate_around(struct compact_control *cc, unsigned long pfn)
 {
 	unsigned long start_pfn, end_pfn;
 	struct page *page;
@@ -1365,21 +1365,13 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
 	if (!page)
 		return;
 
-	/* Scan before */
-	if (start_pfn != pfn) {
-		isolate_freepages_block(cc, &start_pfn, pfn, &cc->freepages, 1, false);
-		if (cc->nr_freepages >= cc->nr_migratepages)
-			return;
-	}
-
-	/* Scan after */
-	start_pfn = pfn + nr_isolated;
-	if (start_pfn < end_pfn)
-		isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
+	isolate_freepages_block(cc, &start_pfn, end_pfn, &cc->freepages, 1, false);
 
 	/* Skip this pageblock in the future as it's full or nearly full */
 	if (cc->nr_freepages < cc->nr_migratepages)
 		set_pageblock_skip(page);
+
+	return;
 }
 
 /* Search orders in round-robin fashion */
@@ -1556,7 +1548,7 @@ fast_isolate_freepages(struct compact_control *cc)
 		return cc->free_pfn;
 
 	low_pfn = page_to_pfn(page);
-	fast_isolate_around(cc, low_pfn, nr_isolated);
+	fast_isolate_around(cc, low_pfn);
 	return low_pfn;
 }
 
-- 
GitLab


From 7ce5f7e16afa82d33dc47d633404b8b1142a5e44 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Wed, 23 Nov 2022 10:43:19 +0530
Subject: [PATCH 3110/4122] documentation/mm: update pmd_present() in
 arch_pgtable_helpers.rst

Although pmd_present() might seem to indicate a valid and mapped pmd
entry, in reality it returns true when pmd_page() points to a valid page
in memory , regardless whether the pmd entry is mapped or not.  Andrea
Arcangeli had earlier explained [1] the required semantics for
pmd_present().  This just updates the documentation for pmd_present() as
required.

[1] https://lore.kernel.org/lkml/20181017020930.GN30832@redhat.com/

Link: https://lkml.kernel.org/r/20221123051319.1312582-1-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: David Hildenbrand <david@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mike Rapoport <rppt@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/mm/arch_pgtable_helpers.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/mm/arch_pgtable_helpers.rst b/Documentation/mm/arch_pgtable_helpers.rst
index cbaee9e592410..fd2a19df884ed 100644
--- a/Documentation/mm/arch_pgtable_helpers.rst
+++ b/Documentation/mm/arch_pgtable_helpers.rst
@@ -94,7 +94,7 @@ PMD Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pmd_trans_huge            | Tests a Transparent Huge Page (THP) at PMD       |
 +---------------------------+--------------------------------------------------+
-| pmd_present               | Tests a valid mapped PMD                         |
+| pmd_present               | Tests whether pmd_page() points to valid memory  |
 +---------------------------+--------------------------------------------------+
 | pmd_young                 | Tests a young PMD                                |
 +---------------------------+--------------------------------------------------+
-- 
GitLab


From 8d9b63708ddd1ac51e0260c7b8f641daf01f4caf Mon Sep 17 00:00:00 2001
From: Sergey Senozhatsky <senozhatsky@chromium.org>
Date: Tue, 22 Nov 2022 12:30:22 +0900
Subject: [PATCH 3111/4122] zswap: do not allocate from atomic pool

zswap_frontswap_load() should be called from preemptible context (we even
call mutex_lock() there) and it does not look like we need to do
GFP_ATOMIC allocaion for temp buffer.  The same applies to
zswap_writeback_entry().

Use GFP_KERNEL for temporary buffer allocation in both cases.

Link: https://lkml.kernel.org/r/Y3xCTr6ikbtcUr/y@google.com
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Signed-off-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zpool.c | 7 +++++++
 mm/zswap.c | 4 ++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/mm/zpool.c b/mm/zpool.c
index 68facc1934965..f46c0d5e766cd 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -387,6 +387,13 @@ bool zpool_evictable(struct zpool *zpool)
  * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
  * @zpool:	The zpool to test
  *
+ * Some allocators enter non-preemptible context in ->map() callback (e.g.
+ * disable pagefaults) and exit that context in ->unmap(), which limits what
+ * we can do with the mapped object. For instance, we cannot wait for
+ * asynchronous crypto API to decompress such an object or take mutexes
+ * since those will call into the scheduler. This function tells us whether
+ * we use such an allocator.
+ *
  * Returns: true if zpool can sleep; false otherwise.
  */
 bool zpool_can_sleep_mapped(struct zpool *zpool)
diff --git a/mm/zswap.c b/mm/zswap.c
index 2d48fd59cc7ab..3019f0bde194d 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -958,7 +958,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	};
 
 	if (!zpool_can_sleep_mapped(pool)) {
-		tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
+		tmp = kmalloc(PAGE_SIZE, GFP_KERNEL);
 		if (!tmp)
 			return -ENOMEM;
 	}
@@ -1311,7 +1311,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
 	}
 
 	if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
-		tmp = kmalloc(entry->length, GFP_ATOMIC);
+		tmp = kmalloc(entry->length, GFP_KERNEL);
 		if (!tmp) {
 			ret = -ENOMEM;
 			goto freeentry;
-- 
GitLab


From 373dfda2bac1173971099dc76254a016646f62ab Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Thu, 24 Nov 2022 18:46:41 +0530
Subject: [PATCH 3112/4122] mm/thp: rename pmd_to_page() as pmd_pgtable_page()

Current pmd_to_page(), which derives the page table page containing the
pmd address has a very misleading name.  The problem being, it sounds
similar to pmd_page() which derives page embedded in a given pmd entry
either for next level page or a mapped huge page.  Rename it as
pmd_pgtable_page() instead.

Link: https://lkml.kernel.org/r/20221124131641.1523772-1-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d8363ac34a7c2..2c73dc112ffc0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2510,7 +2510,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 
 #if USE_SPLIT_PMD_PTLOCKS
 
-static struct page *pmd_to_page(pmd_t *pmd)
+static struct page *pmd_pgtable_page(pmd_t *pmd)
 {
 	unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
 	return virt_to_page((void *)((unsigned long) pmd & mask));
@@ -2518,7 +2518,7 @@ static struct page *pmd_to_page(pmd_t *pmd)
 
 static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd)
 {
-	return ptlock_ptr(pmd_to_page(pmd));
+	return ptlock_ptr(pmd_pgtable_page(pmd));
 }
 
 static inline bool pmd_ptlock_init(struct page *page)
@@ -2537,7 +2537,7 @@ static inline void pmd_ptlock_free(struct page *page)
 	ptlock_free(page);
 }
 
-#define pmd_huge_pte(mm, pmd) (pmd_to_page(pmd)->pmd_huge_pte)
+#define pmd_huge_pte(mm, pmd) (pmd_pgtable_page(pmd)->pmd_huge_pte)
 
 #else
 
-- 
GitLab


From 7e25de77bc5ea56cc3ff618fc8f4ea1896a4dbb3 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Fri, 25 Nov 2022 09:15:02 +0530
Subject: [PATCH 3113/4122] s390/mm: use pmd_pgtable_page() helper in
 __gmap_segment_gaddr()

In __gmap_segment_gaddr() pmd level page table page is being extracted
from the pmd pointer, similar to pmd_pgtable_page() implementation.  This
reduces some redundancy by directly using pmd_pgtable_page() instead,
though first making it available.

Link: https://lkml.kernel.org/r/20221125034502.1559986-1-anshuman.khandual@arm.com
Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/s390/mm/gmap.c | 5 ++---
 include/linux/mm.h  | 2 +-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 02d15c8dc92e9..8947451ae0210 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -336,12 +336,11 @@ static int gmap_alloc_table(struct gmap *gmap, unsigned long *table,
 static unsigned long __gmap_segment_gaddr(unsigned long *entry)
 {
 	struct page *page;
-	unsigned long offset, mask;
+	unsigned long offset;
 
 	offset = (unsigned long) entry / sizeof(unsigned long);
 	offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE;
-	mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
-	page = virt_to_page((void *)((unsigned long) entry & mask));
+	page = pmd_pgtable_page((pmd_t *) entry);
 	return page->index + offset;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2c73dc112ffc0..8df5cae69c804 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2510,7 +2510,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
 
 #if USE_SPLIT_PMD_PTLOCKS
 
-static struct page *pmd_pgtable_page(pmd_t *pmd)
+static inline struct page *pmd_pgtable_page(pmd_t *pmd)
 {
 	unsigned long mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1);
 	return virt_to_page((void *)((unsigned long) pmd & mask));
-- 
GitLab


From 8ef9c32a12a8a0012a4988050947c45521260c5d Mon Sep 17 00:00:00 2001
From: Xu Panda <xu.panda@zte.com.cn>
Date: Thu, 24 Nov 2022 19:29:01 +0800
Subject: [PATCH 3114/4122] mm: vmscan: use sysfs_emit() to instead of
 scnprintf()

Replace open-coded snprintf() with sysfs_emit() to simplify the code.

Link: https://lkml.kernel.org/r/202211241929015476424@zte.com.cn
Signed-off-by: Xu Panda <xu.panda@zte.com.cn>
Signed-off-by: Yang Yang <yang.yang29@zte.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 805fa51b175c2..9356a3ee639ce 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5407,7 +5407,7 @@ static ssize_t show_enabled(struct kobject *kobj, struct kobj_attribute *attr, c
 	if (arch_has_hw_nonleaf_pmd_young() && get_cap(LRU_GEN_NONLEAF_YOUNG))
 		caps |= BIT(LRU_GEN_NONLEAF_YOUNG);
 
-	return snprintf(buf, PAGE_SIZE, "0x%04x\n", caps);
+	return sysfs_emit(buf, "0x%04x\n", caps);
 }
 
 /* see Documentation/admin-guide/mm/multigen_lru.rst for details */
-- 
GitLab


From e833bc50340502a2a75b41bbd1a179aa769e2014 Mon Sep 17 00:00:00 2001
From: Peter Xu <peterx@redhat.com>
Date: Fri, 25 Nov 2022 13:58:57 -0500
Subject: [PATCH 3115/4122] mm/thp: re-apply mkdirty for small pages after
 split

We used to have 624a2c94f5b7 (Partly revert "mm/thp: carry over dirty bit
when thp splits on pmd") fixing the regression reported here by Anatoly
Pugachev on sparc64:

https://lore.kernel.org/r/20221021160603.GA23307@u164.east.ru

Where we temporarily ignored the dirty bit for small pages.

Then, Hev also reported similar issue on loongarch:

(the original mail was private, but Anatoly copied the list here)

https://lore.kernel.org/r/CADxRZqxqb7f_WhMh=jweZP+ynf_JwGd-0VwbYgp4P+T0-AXosw@mail.gmail.com

Hev pointed out that the issue is having HW write bit set within the
pte_mkdirty() so the split pte can be written after split even if e.g.
they were shared by more than one processes, causing data corrupt.

Hev also tried to explain why loongarch set HW write bit in mkdirty:

https://lore.kernel.org/r/CAHirt9itKO_K_HPboXh5AyJtt16Zf0cD73PtHvM=na39u_ztxA@mail.gmail.com

One way to fix it is as what Huacai proposed here for loongarch (then we
can re-apply the dirty bit in thp split):

https://lore.kernel.org/r/20221117042532.4064448-1-chenhuacai@loongson.cnn

We may need similar thing for sparc64, though.

For now since we've found the root cause of the dirty bit issue the
simpler solution (which won't lose the dirty bit for small) that will work
for both is we wr-protect after pte_mkdirty(), so the HW write bit can be
persistent after thp split.

Add a comment for wrprotect, so we will not mess up the ordering later.

With 624a2c94f5b7 (Partly revert "mm/thp: carry over dirty bit when thp
splits on pmd") this is not a fix anymore, but just brings back the dirty
bit for thp split safely, so we re-apply the optimization but in safe way.

Provide a Tested-by credit to Hev too (not the exact same patch but the
same outcome) for loongarch.

Link: https://lkml.kernel.org/r/20221125185857.3110155-1-peterx@redhat.com
Signed-off-by: Peter Xu <peterx@redhat.com>
Tested-by: Hev <r@hev.cc> # loongarch
Cc: Anatoly Pugachev <matorola@gmail.com>
Cc: Raghavendra K T <raghavendra.kt@amd.com>
Cc: Thorsten Leemhuis <regressions@leemhuis.info>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/huge_memory.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86a30041a2e10..8f10afba17a6f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2238,16 +2238,18 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
 			entry = maybe_mkwrite(entry, vma);
 			if (anon_exclusive)
 				SetPageAnonExclusive(page + i);
-			if (!write)
-				entry = pte_wrprotect(entry);
 			if (!young)
 				entry = pte_mkold(entry);
+			/* NOTE: this may set soft-dirty too on some archs */
+			if (dirty)
+				entry = pte_mkdirty(entry);
 			/*
-			 * NOTE: we don't do pte_mkdirty when dirty==true
-			 * because it breaks sparc64 which can sigsegv
-			 * random process.  Need to revisit when we figure
-			 * out what is special with sparc64.
+			 * NOTE: this needs to happen after pte_mkdirty,
+			 * because some archs (sparc64, loongarch) could
+			 * set hw write bit when mkdirty.
 			 */
+			if (!write)
+				entry = pte_wrprotect(entry);
 			if (soft_dirty)
 				entry = pte_mksoft_dirty(entry);
 			if (uffd_wp)
-- 
GitLab


From e0ff428042335c7b62785b3cf911c427a618bc86 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Fri, 25 Nov 2022 14:54:44 +0800
Subject: [PATCH 3116/4122] mm/memory-failure.c: cleanup in unpoison_memory

If freeit is true, the value of ret must be zero, there is no need to
check the value of freeit after label unlock_mutex.

We can drop variable freeit to do this cleanup.

Link: https://lkml.kernel.org/r/20221125065444.3462681-1-mawupeng1@huawei.com
Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Acked-by: Naoya Horiguchi <naoya.horiguchi@nec.com>
Reviewed-by: Miaohe Lin <linmiaohe@huawei.com>
Cc: zhenwei pi <pizhenwei@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memory-failure.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 2e62940c7baee..c77a9e37e27e0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -2338,7 +2338,6 @@ int unpoison_memory(unsigned long pfn)
 	struct page *page;
 	struct page *p;
 	int ret = -EBUSY;
-	int freeit = 0;
 	unsigned long count = 1;
 	bool huge = false;
 	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
@@ -2413,10 +2412,9 @@ int unpoison_memory(unsigned long pfn)
 				goto unlock_mutex;
 			}
 		}
-		freeit = !!TestClearPageHWPoison(p);
 
 		put_page(page);
-		if (freeit) {
+		if (TestClearPageHWPoison(p)) {
 			put_page(page);
 			ret = 0;
 		}
@@ -2424,7 +2422,7 @@ int unpoison_memory(unsigned long pfn)
 
 unlock_mutex:
 	mutex_unlock(&mf_mutex);
-	if (!ret || freeit) {
+	if (!ret) {
 		if (!huge)
 			num_poisoned_pages_sub(pfn, 1);
 		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
-- 
GitLab


From 4265df667bbdc71c640e43c905bd9aeeead92365 Mon Sep 17 00:00:00 2001
From: Peng Hao <flyingpeng@tencent.com>
Date: Tue, 8 Nov 2022 11:50:54 +0800
Subject: [PATCH 3117/4122] KVM: x86: Keep the lock order consistent between
 SRCU and gpc spinlock

Acquire SRCU before taking the gpc spinlock in wait_pending_event() so as
to be consistent with all other functions that acquire both locks.  It's
not illegal to acquire SRCU inside a spinlock, nor is there deadlock
potential, but in general it's preferable to order locks from least
restrictive to most restrictive, e.g. if wait_pending_event() needed to
sleep for whatever reason, it could do so while holding SRCU, but would
need to drop the spinlock.

Signed-off-by: Peng Hao <flyingpeng@tencent.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/CAPm50a++Cb=QfnjMZ2EnCj-Sb9Y4UM-=uOEtHAcjnNLCAAf-dQ@mail.gmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/xen.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index 9187d024d0066..2f21fa5ee7de2 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -1165,8 +1165,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 	bool ret = true;
 	int idx, i;
 
-	read_lock_irqsave(&gpc->lock, flags);
 	idx = srcu_read_lock(&kvm->srcu);
+	read_lock_irqsave(&gpc->lock, flags);
 	if (!kvm_gpc_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
 		goto out_rcu;
 
@@ -1187,8 +1187,8 @@ static bool wait_pending_event(struct kvm_vcpu *vcpu, int nr_ports,
 	}
 
  out_rcu:
-	srcu_read_unlock(&kvm->srcu, idx);
 	read_unlock_irqrestore(&gpc->lock, flags);
+	srcu_read_unlock(&kvm->srcu, idx);
 
 	return ret;
 }
-- 
GitLab


From 17122c06b86c9f77f45b86b8e62c3ed440847a59 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 30 Sep 2022 23:36:32 +0000
Subject: [PATCH 3118/4122] KVM: x86: Fail emulation during EMULTYPE_SKIP on
 any exception

Treat any exception during instruction decode for EMULTYPE_SKIP as a
"full" emulation failure, i.e. signal failure instead of queuing the
exception.  When decoding purely to skip an instruction, KVM and/or the
CPU has already done some amount of emulation that cannot be unwound,
e.g. on an EPT misconfig VM-Exit KVM has already processeed the emulated
MMIO.  KVM already does this if a #UD is encountered, but not for other
exceptions, e.g. if a #PF is encountered during fetch.

In SVM's soft-injection use case, queueing the exception is particularly
problematic as queueing exceptions while injecting events can put KVM
into an infinite loop due to bailing from VM-Enter to service the newly
pending exception.  E.g. multiple warnings to detect such behavior fire:

  ------------[ cut here ]------------
  WARNING: CPU: 3 PID: 1017 at arch/x86/kvm/x86.c:9873 kvm_arch_vcpu_ioctl_run+0x1de5/0x20a0 [kvm]
  Modules linked in: kvm_amd ccp kvm irqbypass
  CPU: 3 PID: 1017 Comm: svm_nested_soft Not tainted 6.0.0-rc1+ #220
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:kvm_arch_vcpu_ioctl_run+0x1de5/0x20a0 [kvm]
  Call Trace:
   kvm_vcpu_ioctl+0x223/0x6d0 [kvm]
   __x64_sys_ioctl+0x85/0xc0
   do_syscall_64+0x2b/0x50
   entry_SYSCALL_64_after_hwframe+0x46/0xb0
  ---[ end trace 0000000000000000 ]---
  ------------[ cut here ]------------
  WARNING: CPU: 3 PID: 1017 at arch/x86/kvm/x86.c:9987 kvm_arch_vcpu_ioctl_run+0x12a3/0x20a0 [kvm]
  Modules linked in: kvm_amd ccp kvm irqbypass
  CPU: 3 PID: 1017 Comm: svm_nested_soft Tainted: G        W          6.0.0-rc1+ #220
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:kvm_arch_vcpu_ioctl_run+0x12a3/0x20a0 [kvm]
  Call Trace:
   kvm_vcpu_ioctl+0x223/0x6d0 [kvm]
   __x64_sys_ioctl+0x85/0xc0
   do_syscall_64+0x2b/0x50
   entry_SYSCALL_64_after_hwframe+0x46/0xb0
  ---[ end trace 0000000000000000 ]---

Fixes: 6ea6e84309ca ("KVM: x86: inject exceptions produced by x86_decode_insn")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220930233632.1725475-1-seanjc@google.com
---
 arch/x86/kvm/x86.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7f850dfb40861..ef12747ecb63d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8772,7 +8772,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 						  write_fault_to_spt,
 						  emulation_type))
 				return 1;
-			if (ctxt->have_exception) {
+
+			if (ctxt->have_exception &&
+			    !(emulation_type & EMULTYPE_SKIP)) {
 				/*
 				 * #UD should result in just EMULATION_FAILED, and trap-like
 				 * exception should not be encountered during decode.
-- 
GitLab


From 5c30e8101e8d5d020b1d7119117889756a6ed713 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Fri, 30 Sep 2022 23:40:31 +0000
Subject: [PATCH 3119/4122] KVM: SVM: Skip WRMSR fastpath on VM-Exit if next
 RIP isn't valid

Skip the WRMSR fastpath in SVM's VM-Exit handler if the next RIP isn't
valid, e.g. because KVM is running with nrips=false.  SVM must decode and
emulate to skip the WRMSR if the CPU doesn't provide the next RIP.
Getting the instruction bytes to decode the WRMSR requires reading guest
memory, which in turn means dereferencing memslots, and that isn't safe
because KVM doesn't hold SRCU when the fastpath runs.

Don't bother trying to enable the fastpath for this case, e.g. by doing
only the WRMSR and leaving the "skip" until later.  NRIPS is supported on
all modern CPUs (KVM has considered making it mandatory), and the next
RIP will be valid the vast, vast majority of the time.

  =============================
  WARNING: suspicious RCU usage
  6.0.0-smp--4e557fcd3d80-skip #13 Tainted: G           O
  -----------------------------
  include/linux/kvm_host.h:954 suspicious rcu_dereference_check() usage!

  other info that might help us debug this:

  rcu_scheduler_active = 2, debug_locks = 1
  1 lock held by stable/206475:
   #0: ffff9d9dfebcc0f0 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0x8b/0x620 [kvm]

  stack backtrace:
  CPU: 152 PID: 206475 Comm: stable Tainted: G           O       6.0.0-smp--4e557fcd3d80-skip #13
  Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022
  Call Trace:
   <TASK>
   dump_stack_lvl+0x69/0xaa
   dump_stack+0x10/0x12
   lockdep_rcu_suspicious+0x11e/0x130
   kvm_vcpu_gfn_to_memslot+0x155/0x190 [kvm]
   kvm_vcpu_gfn_to_hva_prot+0x18/0x80 [kvm]
   paging64_walk_addr_generic+0x183/0x450 [kvm]
   paging64_gva_to_gpa+0x63/0xd0 [kvm]
   kvm_fetch_guest_virt+0x53/0xc0 [kvm]
   __do_insn_fetch_bytes+0x18b/0x1c0 [kvm]
   x86_decode_insn+0xf0/0xef0 [kvm]
   x86_emulate_instruction+0xba/0x790 [kvm]
   kvm_emulate_instruction+0x17/0x20 [kvm]
   __svm_skip_emulated_instruction+0x85/0x100 [kvm_amd]
   svm_skip_emulated_instruction+0x13/0x20 [kvm_amd]
   handle_fastpath_set_msr_irqoff+0xae/0x180 [kvm]
   svm_vcpu_run+0x4b8/0x5a0 [kvm_amd]
   vcpu_enter_guest+0x16ca/0x22f0 [kvm]
   kvm_arch_vcpu_ioctl_run+0x39d/0x900 [kvm]
   kvm_vcpu_ioctl+0x538/0x620 [kvm]
   __se_sys_ioctl+0x77/0xc0
   __x64_sys_ioctl+0x1d/0x20
   do_syscall_64+0x3d/0x80
   entry_SYSCALL_64_after_hwframe+0x63/0xcd

Fixes: 404d5d7bff0d ("KVM: X86: Introduce more exit_fastpath_completion enum values")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220930234031.1732249-1-seanjc@google.com
---
 arch/x86/kvm/svm/svm.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 91352d6928452..6ffadbd577447 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -3895,8 +3895,14 @@ static int svm_vcpu_pre_run(struct kvm_vcpu *vcpu)
 
 static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
 {
-	if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR &&
-	    to_svm(vcpu)->vmcb->control.exit_info_1)
+	struct vmcb_control_area *control = &to_svm(vcpu)->vmcb->control;
+
+	/*
+	 * Note, the next RIP must be provided as SRCU isn't held, i.e. KVM
+	 * can't read guest memory (dereference memslots) to decode the WRMSR.
+	 */
+	if (control->exit_code == SVM_EXIT_MSR && control->exit_info_1 &&
+	    nrips && control->next_rip)
 		return handle_fastpath_set_msr_irqoff(vcpu);
 
 	return EXIT_FASTPATH_NONE;
-- 
GitLab


From a8a12c0069b9e3c909b3c22bf8711d2f18a0af97 Mon Sep 17 00:00:00 2001
From: Zhao Liu <zhao1.liu@intel.com>
Date: Wed, 28 Sep 2022 17:27:48 +0800
Subject: [PATCH 3120/4122] KVM: SVM: Replace kmap_atomic() with
 kmap_local_page()

The use of kmap_atomic() is being deprecated in favor of
kmap_local_page()[1].

The main difference between atomic and local mappings is that local
mappings don't disable page faults or preemption.

There're 2 reasons we can use kmap_local_page() here:
1. SEV is 64-bit only and kmap_local_page() doesn't disable migration in
this case, but here the function clflush_cache_range() uses CLFLUSHOPT
instruction to flush, and on x86 CLFLUSHOPT is not CPU-local and flushes
the page out of the entire cache hierarchy on all CPUs (APM volume 3,
chapter 3, CLFLUSHOPT). So there's no need to disable preemption to ensure
CPU-local.
2. clflush_cache_range() doesn't need to disable pagefault and the mapping
is still valid even if sleeps. This is also true for sched out/in when
preempted.

In addition, though kmap_local_page() is a thin wrapper around
page_address() on 64-bit, kmap_local_page() should still be used here in
preference to page_address() since page_address() isn't suitable to be used
in a generic function (like sev_clflush_pages()) where the page passed in
is not easy to determine the source of allocation. Keeping the kmap* API in
place means it can be used for things other than highmem mappings[2].

Therefore, sev_clflush_pages() is a function that should use
kmap_local_page() in place of kmap_atomic().

Convert the calls of kmap_atomic() / kunmap_atomic() to kmap_local_page() /
kunmap_local().

[1]: https://lore.kernel.org/all/20220813220034.806698-1-ira.weiny@intel.com
[2]: https://lore.kernel.org/lkml/5d667258-b58b-3d28-3609-e7914c99b31b@intel.com/

Suggested-by: Dave Hansen <dave.hansen@intel.com>
Suggested-by: Ira Weiny <ira.weiny@intel.com>
Suggested-by: Fabio M. De Francesco <fmdefrancesco@gmail.com>
Signed-off-by: Zhao Liu <zhao1.liu@intel.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220928092748.463631-1-zhao1.liu@linux.intel.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/svm/sev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 69dbf17f0d6ad..86d6897f48068 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -465,9 +465,9 @@ static void sev_clflush_pages(struct page *pages[], unsigned long npages)
 		return;
 
 	for (i = 0; i < npages; i++) {
-		page_virtual = kmap_atomic(pages[i]);
+		page_virtual = kmap_local_page(pages[i]);
 		clflush_cache_range(page_virtual, PAGE_SIZE);
-		kunmap_atomic(page_virtual);
+		kunmap_local(page_virtual);
 		cond_resched();
 	}
 }
-- 
GitLab


From 9cc409325ddd776f6fd6293d5ce93ce1248af6e4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 6 Oct 2022 00:19:56 +0000
Subject: [PATCH 3121/4122] KVM: nVMX: Inject #GP, not #UD, if "generic" VMXON
 CR0/CR4 check fails

Inject #GP for if VMXON is attempting with a CR0/CR4 that fails the
generic "is CRx valid" check, but passes the CR4.VMXE check, and do the
generic checks _after_ handling the post-VMXON VM-Fail.

The CR4.VMXE check, and all other #UD cases, are special pre-conditions
that are enforced prior to pivoting on the current VMX mode, i.e. occur
before interception if VMXON is attempted in VMX non-root mode.

All other CR0/CR4 checks generate #GP and effectively have lower priority
than the post-VMXON check.

Per the SDM:

    IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ...
        THEN #UD;
    ELSIF not in VMX operation
        THEN
            IF (CPL > 0) or (in A20M mode) or
            (the values of CR0 and CR4 are not supported in VMX operation)
                THEN #GP(0);
    ELSIF in VMX non-root operation
        THEN VMexit;
    ELSIF CPL > 0
        THEN #GP(0);
    ELSE VMfail("VMXON executed in VMX root operation");
    FI;

which, if re-written without ELSIF, yields:

    IF (register operand) or (CR0.PE = 0) or (CR4.VMXE = 0) or ...
        THEN #UD

    IF in VMX non-root operation
        THEN VMexit;

    IF CPL > 0
        THEN #GP(0)

    IF in VMX operation
        THEN VMfail("VMXON executed in VMX root operation");

    IF (in A20M mode) or
       (the values of CR0 and CR4 are not supported in VMX operation)
                THEN #GP(0);

Note, KVM unconditionally forwards VMXON VM-Exits that occur in L2 to L1,
i.e. there is no need to check the vCPU is not in VMX non-root mode.  Add
a comment to explain why unconditionally forwarding such exits is
functionally correct.

Reported-by: Eric Li <ercli@ucdavis.edu>
Fixes: c7d855c2aff2 ("KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4")
Cc: stable@vger.kernel.org
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221006001956.329314-1-seanjc@google.com
---
 arch/x86/kvm/vmx/nested.c | 44 +++++++++++++++++++++++++++++----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index b28be793de298..8927910199681 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -5131,24 +5131,35 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
 		| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
 
 	/*
-	 * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
-	 * that have higher priority than VM-Exit (see Intel SDM's pseudocode
-	 * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
-	 * running the guest, i.e. KVM needs to check the _guest_ values.
+	 * Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
+	 * the guest and so cannot rely on hardware to perform the check,
+	 * which has higher priority than VM-Exit (see Intel SDM's pseudocode
+	 * for VMXON).
 	 *
-	 * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
-	 * !COMPATIBILITY modes.  KVM may run the guest in VM86 to emulate Real
-	 * Mode, but KVM will never take the guest out of those modes.
+	 * Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
+	 * and !COMPATIBILITY modes.  For an unrestricted guest, KVM doesn't
+	 * force any of the relevant guest state.  For a restricted guest, KVM
+	 * does force CR0.PE=1, but only to also force VM86 in order to emulate
+	 * Real Mode, and so there's no need to check CR0.PE manually.
 	 */
-	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
-	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
+	if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
 		kvm_queue_exception(vcpu, UD_VECTOR);
 		return 1;
 	}
 
 	/*
-	 * CPL=0 and all other checks that are lower priority than VM-Exit must
-	 * be checked manually.
+	 * The CPL is checked for "not in VMX operation" and for "in VMX root",
+	 * and has higher priority than the VM-Fail due to being post-VMXON,
+	 * i.e. VMXON #GPs outside of VMX non-root if CPL!=0.  In VMX non-root,
+	 * VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
+	 * from L2 to L1, i.e. there's no need to check for the vCPU being in
+	 * VMX non-root.
+	 *
+	 * Forwarding the VM-Exit unconditionally, i.e. without performing the
+	 * #UD checks (see above), is functionally ok because KVM doesn't allow
+	 * L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
+	 * CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
+	 * missed by hardware due to shadowing CR0 and/or CR4.
 	 */
 	if (vmx_get_cpl(vcpu)) {
 		kvm_inject_gp(vcpu, 0);
@@ -5158,6 +5169,17 @@ static int handle_vmxon(struct kvm_vcpu *vcpu)
 	if (vmx->nested.vmxon)
 		return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
 
+	/*
+	 * Invalid CR0/CR4 generates #GP.  These checks are performed if and
+	 * only if the vCPU isn't already in VMX operation, i.e. effectively
+	 * have lower priority than the VM-Fail above.
+	 */
+	if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
+	    !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
+		kvm_inject_gp(vcpu, 0);
+		return 1;
+	}
+
 	if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
 			!= VMXON_NEEDED_FEATURES) {
 		kvm_inject_gp(vcpu, 0);
-- 
GitLab


From 4f209989586c79e9bf59ba9381101f5fb449dfbb Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 19 Oct 2022 14:36:19 -0700
Subject: [PATCH 3122/4122] KVM: VMX: Guest usage of IA32_SPEC_CTRL is likely

At this point in time, most guests (in the default, out-of-the-box
configuration) are likely to use IA32_SPEC_CTRL.  Therefore, drop the
compiler hint that it is unlikely for KVM to be intercepting WRMSR of
IA32_SPEC_CTRL.

Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221019213620.1953281-2-jmattson@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cea8c07f52298..cb40f724d8cc0 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -858,7 +858,7 @@ unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
 	 * to change it directly without causing a vmexit.  In that case read
 	 * it after vmexit and store it in vmx->spec_ctrl.
 	 */
-	if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+	if (!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL))
 		flags |= VMX_RUN_SAVE_SPEC_CTRL;
 
 	return flags;
-- 
GitLab


From 2e7eab81425ad6c875f2ed47c0ce01e78afc38a5 Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Wed, 19 Oct 2022 14:36:20 -0700
Subject: [PATCH 3123/4122] KVM: VMX: Execute IBPB on emulated VM-exit when
 guest has IBRS

According to Intel's document on Indirect Branch Restricted
Speculation, "Enabling IBRS does not prevent software from controlling
the predicted targets of indirect branches of unrelated software
executed later at the same predictor mode (for example, between two
different user applications, or two different virtual machines). Such
isolation can be ensured through use of the Indirect Branch Predictor
Barrier (IBPB) command." This applies to both basic and enhanced IBRS.

Since L1 and L2 VMs share hardware predictor modes (guest-user and
guest-kernel), hardware IBRS is not sufficient to virtualize
IBRS. (The way that basic IBRS is implemented on pre-eIBRS parts,
hardware IBRS is actually sufficient in practice, even though it isn't
sufficient architecturally.)

For virtual CPUs that support IBRS, add an indirect branch prediction
barrier on emulated VM-exit, to ensure that the predicted targets of
indirect branches executed in L1 cannot be controlled by software that
was executed in L2.

Since we typically don't intercept guest writes to IA32_SPEC_CTRL,
perform the IBPB at emulated VM-exit regardless of the current
IA32_SPEC_CTRL.IBRS value, even though the IBPB could technically be
deferred until L1 sets IA32_SPEC_CTRL.IBRS, if IA32_SPEC_CTRL.IBRS is
clear at emulated VM-exit.

This is CVE-2022-2196.

Fixes: 5c911beff20a ("KVM: nVMX: Skip IBPB when switching between vmcs01 and vmcs02")
Cc: Sean Christopherson <seanjc@google.com>
Signed-off-by: Jim Mattson <jmattson@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221019213620.1953281-3-jmattson@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/vmx/nested.c | 11 +++++++++++
 arch/x86/kvm/vmx/vmx.c    |  6 ++++--
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 8927910199681..61c83424285c8 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4798,6 +4798,17 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
 
 	vmx_switch_vmcs(vcpu, &vmx->vmcs01);
 
+	/*
+	 * If IBRS is advertised to the vCPU, KVM must flush the indirect
+	 * branch predictors when transitioning from L2 to L1, as L1 expects
+	 * hardware (KVM in this case) to provide separate predictor modes.
+	 * Bare metal isolates VMX root (host) from VMX non-root (guest), but
+	 * doesn't isolate different VMCSs, i.e. in this case, doesn't provide
+	 * separate modes for L2 vs L1.
+	 */
+	if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
+		indirect_branch_prediction_barrier();
+
 	/* Update any VMCS fields that might have changed while L2 ran */
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
 	vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index cb40f724d8cc0..3f31c46c306e8 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1348,8 +1348,10 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu,
 
 		/*
 		 * No indirect branch prediction barrier needed when switching
-		 * the active VMCS within a guest, e.g. on nested VM-Enter.
-		 * The L1 VMM can protect itself with retpolines, IBPB or IBRS.
+		 * the active VMCS within a vCPU, unless IBRS is advertised to
+		 * the vCPU.  To minimize the number of IBPBs executed, KVM
+		 * performs IBPB on nested VM-Exit (a single nested transition
+		 * may switch the active VMCS multiple times).
 		 */
 		if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev))
 			indirect_branch_prediction_barrier();
-- 
GitLab


From 658234de0d2ed3a1b86d793f4772e38a2e039b35 Mon Sep 17 00:00:00 2001
From: Kevin Tian <kevin.tian@intel.com>
Date: Tue, 29 Nov 2022 16:29:28 -0400
Subject: [PATCH 3124/4122] iommufd: Document overview of iommufd

Add iommufd into the documentation tree, and supply initial documentation.
Much of this is linked from code comments by kdoc.

Link: https://lore.kernel.org/r/5-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 Documentation/userspace-api/index.rst   |   1 +
 Documentation/userspace-api/iommufd.rst | 223 ++++++++++++++++++++++++
 2 files changed, 224 insertions(+)
 create mode 100644 Documentation/userspace-api/iommufd.rst

diff --git a/Documentation/userspace-api/index.rst b/Documentation/userspace-api/index.rst
index c78da9ce0ec44..f16337bdb8520 100644
--- a/Documentation/userspace-api/index.rst
+++ b/Documentation/userspace-api/index.rst
@@ -25,6 +25,7 @@ place where this information is gathered.
    ebpf/index
    ioctl/index
    iommu
+   iommufd
    media/index
    netlink/index
    sysfs-platform_profile
diff --git a/Documentation/userspace-api/iommufd.rst b/Documentation/userspace-api/iommufd.rst
new file mode 100644
index 0000000000000..79dd9eb515874
--- /dev/null
+++ b/Documentation/userspace-api/iommufd.rst
@@ -0,0 +1,223 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+=======
+IOMMUFD
+=======
+
+:Author: Jason Gunthorpe
+:Author: Kevin Tian
+
+Overview
+========
+
+IOMMUFD is the user API to control the IOMMU subsystem as it relates to managing
+IO page tables from userspace using file descriptors. It intends to be general
+and consumable by any driver that wants to expose DMA to userspace. These
+drivers are eventually expected to deprecate any internal IOMMU logic
+they may already/historically implement (e.g. vfio_iommu_type1.c).
+
+At minimum iommufd provides universal support of managing I/O address spaces and
+I/O page tables for all IOMMUs, with room in the design to add non-generic
+features to cater to specific hardware functionality.
+
+In this context the capital letter (IOMMUFD) refers to the subsystem while the
+small letter (iommufd) refers to the file descriptors created via /dev/iommu for
+use by userspace.
+
+Key Concepts
+============
+
+User Visible Objects
+--------------------
+
+Following IOMMUFD objects are exposed to userspace:
+
+- IOMMUFD_OBJ_IOAS, representing an I/O address space (IOAS), allowing map/unmap
+  of user space memory into ranges of I/O Virtual Address (IOVA).
+
+  The IOAS is a functional replacement for the VFIO container, and like the VFIO
+  container it copies an IOVA map to a list of iommu_domains held within it.
+
+- IOMMUFD_OBJ_DEVICE, representing a device that is bound to iommufd by an
+  external driver.
+
+- IOMMUFD_OBJ_HW_PAGETABLE, representing an actual hardware I/O page table
+  (i.e. a single struct iommu_domain) managed by the iommu driver.
+
+  The IOAS has a list of HW_PAGETABLES that share the same IOVA mapping and
+  it will synchronize its mapping with each member HW_PAGETABLE.
+
+All user-visible objects are destroyed via the IOMMU_DESTROY uAPI.
+
+The diagram below shows relationship between user-visible objects and kernel
+datastructures (external to iommufd), with numbers referred to operations
+creating the objects and links::
+
+  _________________________________________________________
+ |                         iommufd                         |
+ |       [1]                                               |
+ |  _________________                                      |
+ | |                 |                                     |
+ | |                 |                                     |
+ | |                 |                                     |
+ | |                 |                                     |
+ | |                 |                                     |
+ | |                 |                                     |
+ | |                 |        [3]                 [2]      |
+ | |                 |    ____________         __________  |
+ | |      IOAS       |<--|            |<------|          | |
+ | |                 |   |HW_PAGETABLE|       |  DEVICE  | |
+ | |                 |   |____________|       |__________| |
+ | |                 |         |                   |       |
+ | |                 |         |                   |       |
+ | |                 |         |                   |       |
+ | |                 |         |                   |       |
+ | |                 |         |                   |       |
+ | |_________________|         |                   |       |
+ |         |                   |                   |       |
+ |_________|___________________|___________________|_______|
+           |                   |                   |
+           |              _____v______      _______v_____
+           | PFN storage |            |    |             |
+           |------------>|iommu_domain|    |struct device|
+                         |____________|    |_____________|
+
+1. IOMMUFD_OBJ_IOAS is created via the IOMMU_IOAS_ALLOC uAPI. An iommufd can
+   hold multiple IOAS objects. IOAS is the most generic object and does not
+   expose interfaces that are specific to single IOMMU drivers. All operations
+   on the IOAS must operate equally on each of the iommu_domains inside of it.
+
+2. IOMMUFD_OBJ_DEVICE is created when an external driver calls the IOMMUFD kAPI
+   to bind a device to an iommufd. The driver is expected to implement a set of
+   ioctls to allow userspace to initiate the binding operation. Successful
+   completion of this operation establishes the desired DMA ownership over the
+   device. The driver must also set the driver_managed_dma flag and must not
+   touch the device until this operation succeeds.
+
+3. IOMMUFD_OBJ_HW_PAGETABLE is created when an external driver calls the IOMMUFD
+   kAPI to attach a bound device to an IOAS. Similarly the external driver uAPI
+   allows userspace to initiate the attaching operation. If a compatible
+   pagetable already exists then it is reused for the attachment. Otherwise a
+   new pagetable object and iommu_domain is created. Successful completion of
+   this operation sets up the linkages among IOAS, device and iommu_domain. Once
+   this completes the device could do DMA.
+
+   Every iommu_domain inside the IOAS is also represented to userspace as a
+   HW_PAGETABLE object.
+
+   .. note::
+
+      Future IOMMUFD updates will provide an API to create and manipulate the
+      HW_PAGETABLE directly.
+
+A device can only bind to an iommufd due to DMA ownership claim and attach to at
+most one IOAS object (no support of PASID yet).
+
+Kernel Datastructure
+--------------------
+
+User visible objects are backed by following datastructures:
+
+- iommufd_ioas for IOMMUFD_OBJ_IOAS.
+- iommufd_device for IOMMUFD_OBJ_DEVICE.
+- iommufd_hw_pagetable for IOMMUFD_OBJ_HW_PAGETABLE.
+
+Several terminologies when looking at these datastructures:
+
+- Automatic domain - refers to an iommu domain created automatically when
+  attaching a device to an IOAS object. This is compatible to the semantics of
+  VFIO type1.
+
+- Manual domain - refers to an iommu domain designated by the user as the
+  target pagetable to be attached to by a device. Though currently there are
+  no uAPIs to directly create such domain, the datastructure and algorithms
+  are ready for handling that use case.
+
+- In-kernel user - refers to something like a VFIO mdev that is using the
+  IOMMUFD access interface to access the IOAS. This starts by creating an
+  iommufd_access object that is similar to the domain binding a physical device
+  would do. The access object will then allow converting IOVA ranges into struct
+  page * lists, or doing direct read/write to an IOVA.
+
+iommufd_ioas serves as the metadata datastructure to manage how IOVA ranges are
+mapped to memory pages, composed of:
+
+- struct io_pagetable holding the IOVA map
+- struct iopt_area's representing populated portions of IOVA
+- struct iopt_pages representing the storage of PFNs
+- struct iommu_domain representing the IO page table in the IOMMU
+- struct iopt_pages_access representing in-kernel users of PFNs
+- struct xarray pinned_pfns holding a list of pages pinned by in-kernel users
+
+Each iopt_pages represents a logical linear array of full PFNs. The PFNs are
+ultimately derived from userspace VAs via an mm_struct. Once they have been
+pinned the PFNs are stored in IOPTEs of an iommu_domain or inside the pinned_pfns
+xarray if they have been pinned through an iommufd_access.
+
+PFN have to be copied between all combinations of storage locations, depending
+on what domains are present and what kinds of in-kernel "software access" users
+exist. The mechanism ensures that a page is pinned only once.
+
+An io_pagetable is composed of iopt_areas pointing at iopt_pages, along with a
+list of iommu_domains that mirror the IOVA to PFN map.
+
+Multiple io_pagetable-s, through their iopt_area-s, can share a single
+iopt_pages which avoids multi-pinning and double accounting of page
+consumption.
+
+iommufd_ioas is sharable between subsystems, e.g. VFIO and VDPA, as long as
+devices managed by different subsystems are bound to a same iommufd.
+
+IOMMUFD User API
+================
+
+.. kernel-doc:: include/uapi/linux/iommufd.h
+
+IOMMUFD Kernel API
+==================
+
+The IOMMUFD kAPI is device-centric with group-related tricks managed behind the
+scene. This allows the external drivers calling such kAPI to implement a simple
+device-centric uAPI for connecting its device to an iommufd, instead of
+explicitly imposing the group semantics in its uAPI as VFIO does.
+
+.. kernel-doc:: drivers/iommu/iommufd/device.c
+   :export:
+
+.. kernel-doc:: drivers/iommu/iommufd/main.c
+   :export:
+
+VFIO and IOMMUFD
+----------------
+
+Connecting a VFIO device to iommufd can be done in two ways.
+
+First is a VFIO compatible way by directly implementing the /dev/vfio/vfio
+container IOCTLs by mapping them into io_pagetable operations. Doing so allows
+the use of iommufd in legacy VFIO applications by symlinking /dev/vfio/vfio to
+/dev/iommufd or extending VFIO to SET_CONTAINER using an iommufd instead of a
+container fd.
+
+The second approach directly extends VFIO to support a new set of device-centric
+user API based on aforementioned IOMMUFD kernel API. It requires userspace
+change but better matches the IOMMUFD API semantics and easier to support new
+iommufd features when comparing it to the first approach.
+
+Currently both approaches are still work-in-progress.
+
+There are still a few gaps to be resolved to catch up with VFIO type1, as
+documented in iommufd_vfio_check_extension().
+
+Future TODOs
+============
+
+Currently IOMMUFD supports only kernel-managed I/O page table, similar to VFIO
+type1. New features on the radar include:
+
+ - Binding iommu_domain's to PASID/SSID
+ - Userspace page tables, for ARM, x86 and S390
+ - Kernel bypass'd invalidation of user page tables
+ - Re-use of the KVM page table in the IOMMU
+ - Dirty page tracking in the IOMMU
+ - Runtime Increase/Decrease of IOPTE size
+ - PRI support with faults resolved in userspace
-- 
GitLab


From 2ff4bed7fee72ba1abfcff5f11ae8f8e570353f2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:29 -0400
Subject: [PATCH 3125/4122] iommufd: File descriptor, context, kconfig and
 makefiles

This is the basic infrastructure of a new miscdevice to hold the iommufd
IOCTL API.

It provides:
 - A miscdevice to create file descriptors to run the IOCTL interface over

 - A table based ioctl dispatch and centralized extendable pre-validation
   step

 - An xarray mapping userspace ID's to kernel objects. The design has
   multiple inter-related objects held within in a single IOMMUFD fd

 - A simple usage count to build a graph of object relations and protect
   against hostile userspace racing ioctls

The only IOCTL provided in this patch is the generic 'destroy any object
by handle' operation.

Link: https://lore.kernel.org/r/6-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .../userspace-api/ioctl/ioctl-number.rst      |   1 +
 MAINTAINERS                                   |  12 +
 drivers/iommu/Kconfig                         |   1 +
 drivers/iommu/Makefile                        |   2 +-
 drivers/iommu/iommufd/Kconfig                 |  12 +
 drivers/iommu/iommufd/Makefile                |   5 +
 drivers/iommu/iommufd/iommufd_private.h       | 109 ++++++
 drivers/iommu/iommufd/main.c                  | 344 ++++++++++++++++++
 include/linux/iommufd.h                       |  31 ++
 include/uapi/linux/iommufd.h                  |  55 +++
 10 files changed, 571 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/iommufd/Kconfig
 create mode 100644 drivers/iommu/iommufd/Makefile
 create mode 100644 drivers/iommu/iommufd/iommufd_private.h
 create mode 100644 drivers/iommu/iommufd/main.c
 create mode 100644 include/linux/iommufd.h
 create mode 100644 include/uapi/linux/iommufd.h

diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst
index 5f81e2a24a5c0..eb045fc495a4e 100644
--- a/Documentation/userspace-api/ioctl/ioctl-number.rst
+++ b/Documentation/userspace-api/ioctl/ioctl-number.rst
@@ -105,6 +105,7 @@ Code  Seq#    Include File                                           Comments
 '8'   all                                                            SNP8023 advanced NIC card
                                                                      <mailto:mcr@solidum.com>
 ';'   64-7F  linux/vfio.h
+';'   80-FF  linux/iommufd.h
 '='   00-3f  uapi/linux/ptp_clock.h                                  <mailto:richardcochran@gmail.com>
 '@'   00-0F  linux/radeonfb.h                                        conflict!
 '@'   00-0F  drivers/video/aty/aty128fb.c                            conflict!
diff --git a/MAINTAINERS b/MAINTAINERS
index 379945f82a643..c0a93779731d7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10717,6 +10717,18 @@ F:	drivers/iommu/dma-iommu.h
 F:	drivers/iommu/iova.c
 F:	include/linux/iova.h
 
+IOMMUFD
+M:	Jason Gunthorpe <jgg@nvidia.com>
+M:	Kevin Tian <kevin.tian@intel.com>
+L:	iommu@lists.linux.dev
+S:	Maintained
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
+F:	Documentation/userspace-api/iommufd.rst
+F:	drivers/iommu/iommufd/
+F:	include/linux/iommufd.h
+F:	include/uapi/linux/iommufd.h
+F:	tools/testing/selftests/iommu/
+
 IOMMU SUBSYSTEM
 M:	Joerg Roedel <joro@8bytes.org>
 M:	Will Deacon <will@kernel.org>
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dc5f7a156ff5e..319966cde5cf6 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -188,6 +188,7 @@ config MSM_IOMMU
 
 source "drivers/iommu/amd/Kconfig"
 source "drivers/iommu/intel/Kconfig"
+source "drivers/iommu/iommufd/Kconfig"
 
 config IRQ_REMAP
 	bool "Support for Interrupt Remapping"
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 7fbf6a3376620..f461d06513856 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-y += amd/ intel/ arm/
+obj-y += amd/ intel/ arm/ iommufd/
 obj-$(CONFIG_IOMMU_API) += iommu.o
 obj-$(CONFIG_IOMMU_API) += iommu-traces.o
 obj-$(CONFIG_IOMMU_API) += iommu-sysfs.o
diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
new file mode 100644
index 0000000000000..164812084a675
--- /dev/null
+++ b/drivers/iommu/iommufd/Kconfig
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config IOMMUFD
+	tristate "IOMMU Userspace API"
+	select INTERVAL_TREE
+	select INTERVAL_TREE_SPAN_ITER
+	select IOMMU_API
+	default n
+	help
+	  Provides /dev/iommu, the user API to control the IOMMU subsystem as
+	  it relates to managing IO page tables that point at user space memory.
+
+	  If you don't know what to do here, say N.
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
new file mode 100644
index 0000000000000..a07a8cffe937c
--- /dev/null
+++ b/drivers/iommu/iommufd/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+iommufd-y := \
+	main.o
+
+obj-$(CONFIG_IOMMUFD) += iommufd.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
new file mode 100644
index 0000000000000..bb720bc113178
--- /dev/null
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __IOMMUFD_PRIVATE_H
+#define __IOMMUFD_PRIVATE_H
+
+#include <linux/rwsem.h>
+#include <linux/xarray.h>
+#include <linux/refcount.h>
+#include <linux/uaccess.h>
+
+struct iommufd_ctx {
+	struct file *file;
+	struct xarray objects;
+};
+
+struct iommufd_ucmd {
+	struct iommufd_ctx *ictx;
+	void __user *ubuffer;
+	u32 user_size;
+	void *cmd;
+};
+
+/* Copy the response in ucmd->cmd back to userspace. */
+static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
+				       size_t cmd_len)
+{
+	if (copy_to_user(ucmd->ubuffer, ucmd->cmd,
+			 min_t(size_t, ucmd->user_size, cmd_len)))
+		return -EFAULT;
+	return 0;
+}
+
+enum iommufd_object_type {
+	IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+};
+
+/* Base struct for all objects with a userspace ID handle. */
+struct iommufd_object {
+	struct rw_semaphore destroy_rwsem;
+	refcount_t users;
+	enum iommufd_object_type type;
+	unsigned int id;
+};
+
+static inline bool iommufd_lock_obj(struct iommufd_object *obj)
+{
+	if (!down_read_trylock(&obj->destroy_rwsem))
+		return false;
+	if (!refcount_inc_not_zero(&obj->users)) {
+		up_read(&obj->destroy_rwsem);
+		return false;
+	}
+	return true;
+}
+
+struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
+					  enum iommufd_object_type type);
+static inline void iommufd_put_object(struct iommufd_object *obj)
+{
+	refcount_dec(&obj->users);
+	up_read(&obj->destroy_rwsem);
+}
+
+/**
+ * iommufd_ref_to_users() - Switch from destroy_rwsem to users refcount
+ *        protection
+ * @obj - Object to release
+ *
+ * Objects have two refcount protections (destroy_rwsem and the refcount_t
+ * users). Holding either of these will prevent the object from being destroyed.
+ *
+ * Depending on the use case, one protection or the other is appropriate.  In
+ * most cases references are being protected by the destroy_rwsem. This allows
+ * orderly destruction of the object because iommufd_object_destroy_user() will
+ * wait for it to become unlocked. However, as a rwsem, it cannot be held across
+ * a system call return. So cases that have longer term needs must switch
+ * to the weaker users refcount_t.
+ *
+ * With users protection iommufd_object_destroy_user() will return false,
+ * refusing to destroy the object, causing -EBUSY to userspace.
+ */
+static inline void iommufd_ref_to_users(struct iommufd_object *obj)
+{
+	up_read(&obj->destroy_rwsem);
+	/* iommufd_lock_obj() obtains users as well */
+}
+void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj);
+void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
+				      struct iommufd_object *obj);
+void iommufd_object_finalize(struct iommufd_ctx *ictx,
+			     struct iommufd_object *obj);
+bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+				 struct iommufd_object *obj);
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+					     size_t size,
+					     enum iommufd_object_type type);
+
+#define iommufd_object_alloc(ictx, ptr, type)                                  \
+	container_of(_iommufd_object_alloc(                                    \
+			     ictx,                                             \
+			     sizeof(*(ptr)) + BUILD_BUG_ON_ZERO(               \
+						      offsetof(typeof(*(ptr)), \
+							       obj) != 0),     \
+			     type),                                            \
+		     typeof(*(ptr)), obj)
+
+#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
new file mode 100644
index 0000000000000..dfbc68b97506d
--- /dev/null
+++ b/drivers/iommu/iommufd/main.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Intel Corporation
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ *
+ * iommufd provides control over the IOMMU HW objects created by IOMMU kernel
+ * drivers. IOMMU HW objects revolve around IO page tables that map incoming DMA
+ * addresses (IOVA) to CPU addresses.
+ */
+#define pr_fmt(fmt) "iommufd: " fmt
+
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/bug.h>
+#include <uapi/linux/iommufd.h>
+#include <linux/iommufd.h>
+
+#include "iommufd_private.h"
+
+struct iommufd_object_ops {
+	void (*destroy)(struct iommufd_object *obj);
+};
+static const struct iommufd_object_ops iommufd_object_ops[];
+
+struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
+					     size_t size,
+					     enum iommufd_object_type type)
+{
+	struct iommufd_object *obj;
+	int rc;
+
+	obj = kzalloc(size, GFP_KERNEL_ACCOUNT);
+	if (!obj)
+		return ERR_PTR(-ENOMEM);
+	obj->type = type;
+	init_rwsem(&obj->destroy_rwsem);
+	refcount_set(&obj->users, 1);
+
+	/*
+	 * Reserve an ID in the xarray but do not publish the pointer yet since
+	 * the caller hasn't initialized it yet. Once the pointer is published
+	 * in the xarray and visible to other threads we can't reliably destroy
+	 * it anymore, so the caller must complete all errorable operations
+	 * before calling iommufd_object_finalize().
+	 */
+	rc = xa_alloc(&ictx->objects, &obj->id, XA_ZERO_ENTRY,
+		      xa_limit_32b, GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_free;
+	return obj;
+out_free:
+	kfree(obj);
+	return ERR_PTR(rc);
+}
+
+/*
+ * Allow concurrent access to the object.
+ *
+ * Once another thread can see the object pointer it can prevent object
+ * destruction. Expect for special kernel-only objects there is no in-kernel way
+ * to reliably destroy a single object. Thus all APIs that are creating objects
+ * must use iommufd_object_abort() to handle their errors and only call
+ * iommufd_object_finalize() once object creation cannot fail.
+ */
+void iommufd_object_finalize(struct iommufd_ctx *ictx,
+			     struct iommufd_object *obj)
+{
+	void *old;
+
+	old = xa_store(&ictx->objects, obj->id, obj, GFP_KERNEL);
+	/* obj->id was returned from xa_alloc() so the xa_store() cannot fail */
+	WARN_ON(old);
+}
+
+/* Undo _iommufd_object_alloc() if iommufd_object_finalize() was not called */
+void iommufd_object_abort(struct iommufd_ctx *ictx, struct iommufd_object *obj)
+{
+	void *old;
+
+	old = xa_erase(&ictx->objects, obj->id);
+	WARN_ON(old);
+	kfree(obj);
+}
+
+/*
+ * Abort an object that has been fully initialized and needs destroy, but has
+ * not been finalized.
+ */
+void iommufd_object_abort_and_destroy(struct iommufd_ctx *ictx,
+				      struct iommufd_object *obj)
+{
+	iommufd_object_ops[obj->type].destroy(obj);
+	iommufd_object_abort(ictx, obj);
+}
+
+struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
+					  enum iommufd_object_type type)
+{
+	struct iommufd_object *obj;
+
+	xa_lock(&ictx->objects);
+	obj = xa_load(&ictx->objects, id);
+	if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
+	    !iommufd_lock_obj(obj))
+		obj = ERR_PTR(-ENOENT);
+	xa_unlock(&ictx->objects);
+	return obj;
+}
+
+/*
+ * The caller holds a users refcount and wants to destroy the object. Returns
+ * true if the object was destroyed. In all cases the caller no longer has a
+ * reference on obj.
+ */
+bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
+				 struct iommufd_object *obj)
+{
+	/*
+	 * The purpose of the destroy_rwsem is to ensure deterministic
+	 * destruction of objects used by external drivers and destroyed by this
+	 * function. Any temporary increment of the refcount must hold the read
+	 * side of this, such as during ioctl execution.
+	 */
+	down_write(&obj->destroy_rwsem);
+	xa_lock(&ictx->objects);
+	refcount_dec(&obj->users);
+	if (!refcount_dec_if_one(&obj->users)) {
+		xa_unlock(&ictx->objects);
+		up_write(&obj->destroy_rwsem);
+		return false;
+	}
+	__xa_erase(&ictx->objects, obj->id);
+	xa_unlock(&ictx->objects);
+	up_write(&obj->destroy_rwsem);
+
+	iommufd_object_ops[obj->type].destroy(obj);
+	kfree(obj);
+	return true;
+}
+
+static int iommufd_destroy(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_destroy *cmd = ucmd->cmd;
+	struct iommufd_object *obj;
+
+	obj = iommufd_get_object(ucmd->ictx, cmd->id, IOMMUFD_OBJ_ANY);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+	iommufd_ref_to_users(obj);
+	/* See iommufd_ref_to_users() */
+	if (!iommufd_object_destroy_user(ucmd->ictx, obj))
+		return -EBUSY;
+	return 0;
+}
+
+static int iommufd_fops_open(struct inode *inode, struct file *filp)
+{
+	struct iommufd_ctx *ictx;
+
+	ictx = kzalloc(sizeof(*ictx), GFP_KERNEL_ACCOUNT);
+	if (!ictx)
+		return -ENOMEM;
+
+	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
+	ictx->file = filp;
+	filp->private_data = ictx;
+	return 0;
+}
+
+static int iommufd_fops_release(struct inode *inode, struct file *filp)
+{
+	struct iommufd_ctx *ictx = filp->private_data;
+	struct iommufd_object *obj;
+
+	/*
+	 * The objects in the xarray form a graph of "users" counts, and we have
+	 * to destroy them in a depth first manner. Leaf objects will reduce the
+	 * users count of interior objects when they are destroyed.
+	 *
+	 * Repeatedly destroying all the "1 users" leaf objects will progress
+	 * until the entire list is destroyed. If this can't progress then there
+	 * is some bug related to object refcounting.
+	 */
+	while (!xa_empty(&ictx->objects)) {
+		unsigned int destroyed = 0;
+		unsigned long index;
+
+		xa_for_each(&ictx->objects, index, obj) {
+			if (!refcount_dec_if_one(&obj->users))
+				continue;
+			destroyed++;
+			xa_erase(&ictx->objects, index);
+			iommufd_object_ops[obj->type].destroy(obj);
+			kfree(obj);
+		}
+		/* Bug related to users refcount */
+		if (WARN_ON(!destroyed))
+			break;
+	}
+	kfree(ictx);
+	return 0;
+}
+
+union ucmd_buffer {
+	struct iommu_destroy destroy;
+};
+
+struct iommufd_ioctl_op {
+	unsigned int size;
+	unsigned int min_size;
+	unsigned int ioctl_num;
+	int (*execute)(struct iommufd_ucmd *ucmd);
+};
+
+#define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
+	[_IOC_NR(_ioctl) - IOMMUFD_CMD_BASE] = {                               \
+		.size = sizeof(_struct) +                                      \
+			BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
+					  sizeof(_struct)),                    \
+		.min_size = offsetofend(_struct, _last),                       \
+		.ioctl_num = _ioctl,                                           \
+		.execute = _fn,                                                \
+	}
+static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
+	IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
+};
+
+static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
+			       unsigned long arg)
+{
+	const struct iommufd_ioctl_op *op;
+	struct iommufd_ucmd ucmd = {};
+	union ucmd_buffer buf;
+	unsigned int nr;
+	int ret;
+
+	ucmd.ictx = filp->private_data;
+	ucmd.ubuffer = (void __user *)arg;
+	ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
+	if (ret)
+		return ret;
+
+	nr = _IOC_NR(cmd);
+	if (nr < IOMMUFD_CMD_BASE ||
+	    (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops))
+		return -ENOIOCTLCMD;
+	op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE];
+	if (op->ioctl_num != cmd)
+		return -ENOIOCTLCMD;
+	if (ucmd.user_size < op->min_size)
+		return -EINVAL;
+
+	ucmd.cmd = &buf;
+	ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
+				    ucmd.user_size);
+	if (ret)
+		return ret;
+	ret = op->execute(&ucmd);
+	return ret;
+}
+
+static const struct file_operations iommufd_fops = {
+	.owner = THIS_MODULE,
+	.open = iommufd_fops_open,
+	.release = iommufd_fops_release,
+	.unlocked_ioctl = iommufd_fops_ioctl,
+};
+
+/**
+ * iommufd_ctx_get - Get a context reference
+ * @ictx: Context to get
+ *
+ * The caller must already hold a valid reference to ictx.
+ */
+void iommufd_ctx_get(struct iommufd_ctx *ictx)
+{
+	get_file(ictx->file);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_get, IOMMUFD);
+
+/**
+ * iommufd_ctx_from_file - Acquires a reference to the iommufd context
+ * @file: File to obtain the reference from
+ *
+ * Returns a pointer to the iommufd_ctx, otherwise ERR_PTR. The struct file
+ * remains owned by the caller and the caller must still do fput. On success
+ * the caller is responsible to call iommufd_ctx_put().
+ */
+struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
+{
+	struct iommufd_ctx *ictx;
+
+	if (file->f_op != &iommufd_fops)
+		return ERR_PTR(-EBADFD);
+	ictx = file->private_data;
+	iommufd_ctx_get(ictx);
+	return ictx;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_from_file, IOMMUFD);
+
+/**
+ * iommufd_ctx_put - Put back a reference
+ * @ictx: Context to put back
+ */
+void iommufd_ctx_put(struct iommufd_ctx *ictx)
+{
+	fput(ictx->file);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD);
+
+static const struct iommufd_object_ops iommufd_object_ops[] = {
+};
+
+static struct miscdevice iommu_misc_dev = {
+	.minor = MISC_DYNAMIC_MINOR,
+	.name = "iommu",
+	.fops = &iommufd_fops,
+	.nodename = "iommu",
+	.mode = 0660,
+};
+
+static int __init iommufd_init(void)
+{
+	int ret;
+
+	ret = misc_register(&iommu_misc_dev);
+	if (ret)
+		return ret;
+	return 0;
+}
+
+static void __exit iommufd_exit(void)
+{
+	misc_deregister(&iommu_misc_dev);
+}
+
+module_init(iommufd_init);
+module_exit(iommufd_exit);
+
+MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
new file mode 100644
index 0000000000000..d1817472c2737
--- /dev/null
+++ b/include/linux/iommufd.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Intel Corporation
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#ifndef __LINUX_IOMMUFD_H
+#define __LINUX_IOMMUFD_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+
+struct iommufd_ctx;
+struct file;
+
+void iommufd_ctx_get(struct iommufd_ctx *ictx);
+
+#if IS_ENABLED(CONFIG_IOMMUFD)
+struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
+void iommufd_ctx_put(struct iommufd_ctx *ictx);
+#else /* !CONFIG_IOMMUFD */
+static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline void iommufd_ctx_put(struct iommufd_ctx *ictx)
+{
+}
+#endif /* CONFIG_IOMMUFD */
+#endif
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
new file mode 100644
index 0000000000000..37de92f0534b8
--- /dev/null
+++ b/include/uapi/linux/iommufd.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef _UAPI_IOMMUFD_H
+#define _UAPI_IOMMUFD_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#define IOMMUFD_TYPE (';')
+
+/**
+ * DOC: General ioctl format
+ *
+ * The ioctl interface follows a general format to allow for extensibility. Each
+ * ioctl is passed in a structure pointer as the argument providing the size of
+ * the structure in the first u32. The kernel checks that any structure space
+ * beyond what it understands is 0. This allows userspace to use the backward
+ * compatible portion while consistently using the newer, larger, structures.
+ *
+ * ioctls use a standard meaning for common errnos:
+ *
+ *  - ENOTTY: The IOCTL number itself is not supported at all
+ *  - E2BIG: The IOCTL number is supported, but the provided structure has
+ *    non-zero in a part the kernel does not understand.
+ *  - EOPNOTSUPP: The IOCTL number is supported, and the structure is
+ *    understood, however a known field has a value the kernel does not
+ *    understand or support.
+ *  - EINVAL: Everything about the IOCTL was understood, but a field is not
+ *    correct.
+ *  - ENOENT: An ID or IOVA provided does not exist.
+ *  - ENOMEM: Out of memory.
+ *  - EOVERFLOW: Mathematics overflowed.
+ *
+ * As well as additional errnos, within specific ioctls.
+ */
+enum {
+	IOMMUFD_CMD_BASE = 0x80,
+	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
+};
+
+/**
+ * struct iommu_destroy - ioctl(IOMMU_DESTROY)
+ * @size: sizeof(struct iommu_destroy)
+ * @id: iommufd object ID to destroy. Can by any destroyable object type.
+ *
+ * Destroy any object held within iommufd.
+ */
+struct iommu_destroy {
+	__u32 size;
+	__u32 id;
+};
+#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
+
+#endif
-- 
GitLab


From ce5a23c835aa0f0a931b5bcde1e7811f951b0146 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:30 -0400
Subject: [PATCH 3126/4122] kernel/user: Allow user_struct::locked_vm to be
 usable for iommufd

Following the pattern of io_uring, perf, skb, and bpf, iommfd will use
user->locked_vm for accounting pinned pages. Ensure the value is included
in the struct and export free_uid() as iommufd is modular.

user->locked_vm is the good accounting to use for ulimit because it is
per-user, and the security sandboxing of locked pages is not supposed to
be per-process. Other places (vfio, vdpa and infiniband) have used
mm->pinned_vm and/or mm->locked_vm for accounting pinned pages, but this
is only per-process and inconsistent with the new FOLL_LONGTERM users in
the kernel.

Concurrent work is underway to try to put this in a cgroup, so everything
can be consistent and the kernel can provide a FOLL_LONGTERM limit that
actually provides security.

Link: https://lore.kernel.org/r/7-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/linux/sched/user.h | 2 +-
 kernel/user.c              | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index f054d0360a753..4cc52698e214e 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -25,7 +25,7 @@ struct user_struct {
 
 #if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) || \
 	defined(CONFIG_NET) || defined(CONFIG_IO_URING) || \
-	defined(CONFIG_VFIO_PCI_ZDEV_KVM)
+	defined(CONFIG_VFIO_PCI_ZDEV_KVM) || IS_ENABLED(CONFIG_IOMMUFD)
 	atomic_long_t locked_vm;
 #endif
 #ifdef CONFIG_WATCH_QUEUE
diff --git a/kernel/user.c b/kernel/user.c
index e2cf8c22b539a..d667debeafd60 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -185,6 +185,7 @@ void free_uid(struct user_struct *up)
 	if (refcount_dec_and_lock_irqsave(&up->__count, &uidhash_lock, &flags))
 		free_user(up, flags);
 }
+EXPORT_SYMBOL_GPL(free_uid);
 
 struct user_struct *alloc_uid(kuid_t uid)
 {
-- 
GitLab


From f394576eb11dbcd3a740fa41e577b97f0720d26e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:31 -0400
Subject: [PATCH 3127/4122] iommufd: PFN handling for iopt_pages

The top of the data structure provides an IO Address Space (IOAS) that is
similar to a VFIO container. The IOAS allows map/unmap of memory into
ranges of IOVA called iopt_areas. Multiple IOMMU domains (IO page tables)
and in-kernel accesses (like VFIO mdevs) can be attached to the IOAS to
access the PFNs that those IOVA areas cover.

The IO Address Space (IOAS) datastructure is composed of:
 - struct io_pagetable holding the IOVA map
 - struct iopt_areas representing populated portions of IOVA
 - struct iopt_pages representing the storage of PFNs
 - struct iommu_domain representing each IO page table in the system IOMMU
 - struct iopt_pages_access representing in-kernel accesses of PFNs (ie
   VFIO mdevs)
 - struct xarray pinned_pfns holding a list of pages pinned by in-kernel
   accesses

This patch introduces the lowest part of the datastructure - the movement
of PFNs in a tiered storage scheme:
 1) iopt_pages::pinned_pfns xarray
 2) Multiple iommu_domains
 3) The origin of the PFNs, i.e. the userspace pointer

PFN have to be copied between all combinations of tiers, depending on the
configuration.

The interface is an iterator called a 'pfn_reader' which determines which
tier each PFN is stored and loads it into a list of PFNs held in a struct
pfn_batch.

Each step of the iterator will fill up the pfn_batch, then the caller can
use the pfn_batch to send the PFNs to the required destination. Repeating
this loop will read all the PFNs in an IOVA range.

The pfn_reader and pfn_batch also keep track of the pinned page accounting.

While PFNs are always stored and accessed as full PAGE_SIZE units the
iommu_domain tier can store with a sub-page offset/length to support
IOMMUs with a smaller IOPTE size than PAGE_SIZE.

Link: https://lore.kernel.org/r/8-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .clang-format                           |    1 +
 drivers/iommu/iommufd/Makefile          |    3 +-
 drivers/iommu/iommufd/double_span.h     |   53 ++
 drivers/iommu/iommufd/io_pagetable.h    |  109 +++
 drivers/iommu/iommufd/iommufd_private.h |   24 +
 drivers/iommu/iommufd/pages.c           | 1066 +++++++++++++++++++++++
 include/linux/iommufd.h                 |    7 +
 7 files changed, 1262 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/iommufd/double_span.h
 create mode 100644 drivers/iommu/iommufd/io_pagetable.h
 create mode 100644 drivers/iommu/iommufd/pages.c

diff --git a/.clang-format b/.clang-format
index 96d07786dcfb4..501241f897766 100644
--- a/.clang-format
+++ b/.clang-format
@@ -440,6 +440,7 @@ ForEachMacros:
   - 'inet_lhash2_for_each_icsk'
   - 'inet_lhash2_for_each_icsk_continue'
   - 'inet_lhash2_for_each_icsk_rcu'
+  - 'interval_tree_for_each_double_span'
   - 'interval_tree_for_each_span'
   - 'intlist__for_each_entry'
   - 'intlist__for_each_entry_safe'
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index a07a8cffe937c..05a0e91e30afa 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
-	main.o
+	main.o \
+	pages.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
diff --git a/drivers/iommu/iommufd/double_span.h b/drivers/iommu/iommufd/double_span.h
new file mode 100644
index 0000000000000..b37aab7488c0f
--- /dev/null
+++ b/drivers/iommu/iommufd/double_span.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef __IOMMUFD_DOUBLE_SPAN_H
+#define __IOMMUFD_DOUBLE_SPAN_H
+
+#include <linux/interval_tree.h>
+
+/*
+ * This is a variation of the general interval_tree_span_iter that computes the
+ * spans over the union of two different interval trees. Used ranges are broken
+ * up and reported based on the tree that provides the interval. The first span
+ * always takes priority. Like interval_tree_span_iter it is greedy and the same
+ * value of is_used will not repeat on two iteration cycles.
+ */
+struct interval_tree_double_span_iter {
+	struct rb_root_cached *itrees[2];
+	struct interval_tree_span_iter spans[2];
+	union {
+		unsigned long start_hole;
+		unsigned long start_used;
+	};
+	union {
+		unsigned long last_hole;
+		unsigned long last_used;
+	};
+	/* 0 = hole, 1 = used span[0], 2 = used span[1], -1 done iteration */
+	int is_used;
+};
+
+void interval_tree_double_span_iter_update(
+	struct interval_tree_double_span_iter *iter);
+void interval_tree_double_span_iter_first(
+	struct interval_tree_double_span_iter *iter,
+	struct rb_root_cached *itree1, struct rb_root_cached *itree2,
+	unsigned long first_index, unsigned long last_index);
+void interval_tree_double_span_iter_next(
+	struct interval_tree_double_span_iter *iter);
+
+static inline bool
+interval_tree_double_span_iter_done(struct interval_tree_double_span_iter *state)
+{
+	return state->is_used == -1;
+}
+
+#define interval_tree_for_each_double_span(span, itree1, itree2, first_index, \
+					   last_index)                        \
+	for (interval_tree_double_span_iter_first(span, itree1, itree2,       \
+						  first_index, last_index);   \
+	     !interval_tree_double_span_iter_done(span);                      \
+	     interval_tree_double_span_iter_next(span))
+
+#endif
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
new file mode 100644
index 0000000000000..b74bf01ffc52c
--- /dev/null
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ */
+#ifndef __IO_PAGETABLE_H
+#define __IO_PAGETABLE_H
+
+#include <linux/interval_tree.h>
+#include <linux/mutex.h>
+#include <linux/kref.h>
+#include <linux/xarray.h>
+
+#include "iommufd_private.h"
+
+struct iommu_domain;
+
+/*
+ * Each io_pagetable is composed of intervals of areas which cover regions of
+ * the iova that are backed by something. iova not covered by areas is not
+ * populated in the page table. Each area is fully populated with pages.
+ *
+ * iovas are in byte units, but must be iopt->iova_alignment aligned.
+ *
+ * pages can be NULL, this means some other thread is still working on setting
+ * up or tearing down the area. When observed under the write side of the
+ * domain_rwsem a NULL pages must mean the area is still being setup and no
+ * domains are filled.
+ *
+ * storage_domain points at an arbitrary iommu_domain that is holding the PFNs
+ * for this area. It is locked by the pages->mutex. This simplifies the locking
+ * as the pages code can rely on the storage_domain without having to get the
+ * iopt->domains_rwsem.
+ *
+ * The io_pagetable::iova_rwsem protects node
+ * The iopt_pages::mutex protects pages_node
+ * iopt and immu_prot are immutable
+ * The pages::mutex protects num_accesses
+ */
+struct iopt_area {
+	struct interval_tree_node node;
+	struct interval_tree_node pages_node;
+	struct io_pagetable *iopt;
+	struct iopt_pages *pages;
+	struct iommu_domain *storage_domain;
+	/* How many bytes into the first page the area starts */
+	unsigned int page_offset;
+	/* IOMMU_READ, IOMMU_WRITE, etc */
+	int iommu_prot;
+	unsigned int num_accesses;
+};
+
+static inline unsigned long iopt_area_index(struct iopt_area *area)
+{
+	return area->pages_node.start;
+}
+
+static inline unsigned long iopt_area_last_index(struct iopt_area *area)
+{
+	return area->pages_node.last;
+}
+
+static inline unsigned long iopt_area_iova(struct iopt_area *area)
+{
+	return area->node.start;
+}
+
+static inline unsigned long iopt_area_last_iova(struct iopt_area *area)
+{
+	return area->node.last;
+}
+
+enum {
+	IOPT_PAGES_ACCOUNT_NONE = 0,
+	IOPT_PAGES_ACCOUNT_USER = 1,
+	IOPT_PAGES_ACCOUNT_MM = 2,
+};
+
+/*
+ * This holds a pinned page list for multiple areas of IO address space. The
+ * pages always originate from a linear chunk of userspace VA. Multiple
+ * io_pagetable's, through their iopt_area's, can share a single iopt_pages
+ * which avoids multi-pinning and double accounting of page consumption.
+ *
+ * indexes in this structure are measured in PAGE_SIZE units, are 0 based from
+ * the start of the uptr and extend to npages. pages are pinned dynamically
+ * according to the intervals in the access_itree and domains_itree, npinned
+ * records the current number of pages pinned.
+ */
+struct iopt_pages {
+	struct kref kref;
+	struct mutex mutex;
+	size_t npages;
+	size_t npinned;
+	size_t last_npinned;
+	struct task_struct *source_task;
+	struct mm_struct *source_mm;
+	struct user_struct *source_user;
+	void __user *uptr;
+	bool writable:1;
+	u8 account_mode;
+
+	struct xarray pinned_pfns;
+	/* Of iopt_pages_access::node */
+	struct rb_root_cached access_itree;
+	/* Of iopt_area::pages_node */
+	struct rb_root_cached domains_itree;
+};
+
+#endif
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index bb720bc113178..169a30ff3bf0d 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -14,6 +14,30 @@ struct iommufd_ctx {
 	struct xarray objects;
 };
 
+/*
+ * The IOVA to PFN map. The map automatically copies the PFNs into multiple
+ * domains and permits sharing of PFNs between io_pagetable instances. This
+ * supports both a design where IOAS's are 1:1 with a domain (eg because the
+ * domain is HW customized), or where the IOAS is 1:N with multiple generic
+ * domains.  The io_pagetable holds an interval tree of iopt_areas which point
+ * to shared iopt_pages which hold the pfns mapped to the page table.
+ *
+ * The locking order is domains_rwsem -> iova_rwsem -> pages::mutex
+ */
+struct io_pagetable {
+	struct rw_semaphore domains_rwsem;
+	struct xarray domains;
+	unsigned int next_domain_id;
+
+	struct rw_semaphore iova_rwsem;
+	struct rb_root_cached area_itree;
+	/* IOVA that cannot become reserved, struct iopt_allowed */
+	struct rb_root_cached allowed_itree;
+	/* IOVA that cannot be allocated, struct iopt_reserved */
+	struct rb_root_cached reserved_itree;
+	u8 disable_large_pages;
+};
+
 struct iommufd_ucmd {
 	struct iommufd_ctx *ictx;
 	void __user *ubuffer;
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
new file mode 100644
index 0000000000000..ebca78e743c6f
--- /dev/null
+++ b/drivers/iommu/iommufd/pages.c
@@ -0,0 +1,1066 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ * The iopt_pages is the center of the storage and motion of PFNs. Each
+ * iopt_pages represents a logical linear array of full PFNs. The array is 0
+ * based and has npages in it. Accessors use 'index' to refer to the entry in
+ * this logical array, regardless of its storage location.
+ *
+ * PFNs are stored in a tiered scheme:
+ *  1) iopt_pages::pinned_pfns xarray
+ *  2) An iommu_domain
+ *  3) The origin of the PFNs, i.e. the userspace pointer
+ *
+ * PFN have to be copied between all combinations of tiers, depending on the
+ * configuration.
+ *
+ * When a PFN is taken out of the userspace pointer it is pinned exactly once.
+ * The storage locations of the PFN's index are tracked in the two interval
+ * trees. If no interval includes the index then it is not pinned.
+ *
+ * If access_itree includes the PFN's index then an in-kernel access has
+ * requested the page. The PFN is stored in the xarray so other requestors can
+ * continue to find it.
+ *
+ * If the domains_itree includes the PFN's index then an iommu_domain is storing
+ * the PFN and it can be read back using iommu_iova_to_phys(). To avoid
+ * duplicating storage the xarray is not used if only iommu_domains are using
+ * the PFN's index.
+ *
+ * As a general principle this is designed so that destroy never fails. This
+ * means removing an iommu_domain or releasing a in-kernel access will not fail
+ * due to insufficient memory. In practice this means some cases have to hold
+ * PFNs in the xarray even though they are also being stored in an iommu_domain.
+ *
+ * While the iopt_pages can use an iommu_domain as storage, it does not have an
+ * IOVA itself. Instead the iopt_area represents a range of IOVA and uses the
+ * iopt_pages as the PFN provider. Multiple iopt_areas can share the iopt_pages
+ * and reference their own slice of the PFN array, with sub page granularity.
+ *
+ * In this file the term 'last' indicates an inclusive and closed interval, eg
+ * [0,0] refers to a single PFN. 'end' means an open range, eg [0,0) refers to
+ * no PFNs.
+ *
+ * Be cautious of overflow. An IOVA can go all the way up to U64_MAX, so
+ * last_iova + 1 can overflow. An iopt_pages index will always be much less than
+ * ULONG_MAX so last_index + 1 cannot overflow.
+ */
+#include <linux/overflow.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
+#include <linux/highmem.h>
+#include <linux/kthread.h>
+#include <linux/iommufd.h>
+
+#include "io_pagetable.h"
+#include "double_span.h"
+
+#define TEMP_MEMORY_LIMIT 65536
+#define BATCH_BACKUP_SIZE 32
+
+/*
+ * More memory makes pin_user_pages() and the batching more efficient, but as
+ * this is only a performance optimization don't try too hard to get it. A 64k
+ * allocation can hold about 26M of 4k pages and 13G of 2M pages in an
+ * pfn_batch. Various destroy paths cannot fail and provide a small amount of
+ * stack memory as a backup contingency. If backup_len is given this cannot
+ * fail.
+ */
+static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len)
+{
+	void *res;
+
+	if (WARN_ON(*size == 0))
+		return NULL;
+
+	if (*size < backup_len)
+		return backup;
+	*size = min_t(size_t, *size, TEMP_MEMORY_LIMIT);
+	res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+	if (res)
+		return res;
+	*size = PAGE_SIZE;
+	if (backup_len) {
+		res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
+		if (res)
+			return res;
+		*size = backup_len;
+		return backup;
+	}
+	return kmalloc(*size, GFP_KERNEL);
+}
+
+void interval_tree_double_span_iter_update(
+	struct interval_tree_double_span_iter *iter)
+{
+	unsigned long last_hole = ULONG_MAX;
+	unsigned int i;
+
+	for (i = 0; i != ARRAY_SIZE(iter->spans); i++) {
+		if (interval_tree_span_iter_done(&iter->spans[i])) {
+			iter->is_used = -1;
+			return;
+		}
+
+		if (iter->spans[i].is_hole) {
+			last_hole = min(last_hole, iter->spans[i].last_hole);
+			continue;
+		}
+
+		iter->is_used = i + 1;
+		iter->start_used = iter->spans[i].start_used;
+		iter->last_used = min(iter->spans[i].last_used, last_hole);
+		return;
+	}
+
+	iter->is_used = 0;
+	iter->start_hole = iter->spans[0].start_hole;
+	iter->last_hole =
+		min(iter->spans[0].last_hole, iter->spans[1].last_hole);
+}
+
+void interval_tree_double_span_iter_first(
+	struct interval_tree_double_span_iter *iter,
+	struct rb_root_cached *itree1, struct rb_root_cached *itree2,
+	unsigned long first_index, unsigned long last_index)
+{
+	unsigned int i;
+
+	iter->itrees[0] = itree1;
+	iter->itrees[1] = itree2;
+	for (i = 0; i != ARRAY_SIZE(iter->spans); i++)
+		interval_tree_span_iter_first(&iter->spans[i], iter->itrees[i],
+					      first_index, last_index);
+	interval_tree_double_span_iter_update(iter);
+}
+
+void interval_tree_double_span_iter_next(
+	struct interval_tree_double_span_iter *iter)
+{
+	unsigned int i;
+
+	if (iter->is_used == -1 ||
+	    iter->last_hole == iter->spans[0].last_index) {
+		iter->is_used = -1;
+		return;
+	}
+
+	for (i = 0; i != ARRAY_SIZE(iter->spans); i++)
+		interval_tree_span_iter_advance(
+			&iter->spans[i], iter->itrees[i], iter->last_hole + 1);
+	interval_tree_double_span_iter_update(iter);
+}
+
+static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages)
+{
+	pages->npinned += npages;
+}
+
+static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages)
+{
+	pages->npinned -= npages;
+}
+
+static void iopt_pages_err_unpin(struct iopt_pages *pages,
+				 unsigned long start_index,
+				 unsigned long last_index,
+				 struct page **page_list)
+{
+	unsigned long npages = last_index - start_index + 1;
+
+	unpin_user_pages(page_list, npages);
+	iopt_pages_sub_npinned(pages, npages);
+}
+
+/*
+ * index is the number of PAGE_SIZE units from the start of the area's
+ * iopt_pages. If the iova is sub page-size then the area has an iova that
+ * covers a portion of the first and last pages in the range.
+ */
+static unsigned long iopt_area_index_to_iova(struct iopt_area *area,
+					     unsigned long index)
+{
+	index -= iopt_area_index(area);
+	if (index == 0)
+		return iopt_area_iova(area);
+	return iopt_area_iova(area) - area->page_offset + index * PAGE_SIZE;
+}
+
+static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area,
+						  unsigned long index)
+{
+	if (index == iopt_area_last_index(area))
+		return iopt_area_last_iova(area);
+	return iopt_area_iova(area) - area->page_offset +
+	       (index - iopt_area_index(area) + 1) * PAGE_SIZE - 1;
+}
+
+static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova,
+			       size_t size)
+{
+	size_t ret;
+
+	ret = iommu_unmap(domain, iova, size);
+	/*
+	 * It is a logic error in this code or a driver bug if the IOMMU unmaps
+	 * something other than exactly as requested. This implies that the
+	 * iommu driver may not fail unmap for reasons beyond bad agruments.
+	 * Particularly, the iommu driver may not do a memory allocation on the
+	 * unmap path.
+	 */
+	WARN_ON(ret != size);
+}
+
+static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages,
+						     unsigned long index)
+{
+	struct interval_tree_node *node;
+
+	node = interval_tree_iter_first(&pages->domains_itree, index, index);
+	if (!node)
+		return NULL;
+	return container_of(node, struct iopt_area, pages_node);
+}
+
+/*
+ * A simple datastructure to hold a vector of PFNs, optimized for contiguous
+ * PFNs. This is used as a temporary holding memory for shuttling pfns from one
+ * place to another. Generally everything is made more efficient if operations
+ * work on the largest possible grouping of pfns. eg fewer lock/unlock cycles,
+ * better cache locality, etc
+ */
+struct pfn_batch {
+	unsigned long *pfns;
+	u32 *npfns;
+	unsigned int array_size;
+	unsigned int end;
+	unsigned int total_pfns;
+};
+
+static void batch_clear(struct pfn_batch *batch)
+{
+	batch->total_pfns = 0;
+	batch->end = 0;
+	batch->pfns[0] = 0;
+	batch->npfns[0] = 0;
+}
+
+/*
+ * Carry means we carry a portion of the final hugepage over to the front of the
+ * batch
+ */
+static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
+{
+	if (!keep_pfns)
+		return batch_clear(batch);
+
+	batch->total_pfns = keep_pfns;
+	batch->npfns[0] = keep_pfns;
+	batch->pfns[0] = batch->pfns[batch->end - 1] +
+			 (batch->npfns[batch->end - 1] - keep_pfns);
+	batch->end = 0;
+}
+
+static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
+{
+	if (!batch->total_pfns)
+		return;
+	skip_pfns = min(batch->total_pfns, skip_pfns);
+	batch->pfns[0] += skip_pfns;
+	batch->npfns[0] -= skip_pfns;
+	batch->total_pfns -= skip_pfns;
+}
+
+static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup,
+			size_t backup_len)
+{
+	const size_t elmsz = sizeof(*batch->pfns) + sizeof(*batch->npfns);
+	size_t size = max_pages * elmsz;
+
+	batch->pfns = temp_kmalloc(&size, backup, backup_len);
+	if (!batch->pfns)
+		return -ENOMEM;
+	batch->array_size = size / elmsz;
+	batch->npfns = (u32 *)(batch->pfns + batch->array_size);
+	batch_clear(batch);
+	return 0;
+}
+
+static int batch_init(struct pfn_batch *batch, size_t max_pages)
+{
+	return __batch_init(batch, max_pages, NULL, 0);
+}
+
+static void batch_init_backup(struct pfn_batch *batch, size_t max_pages,
+			      void *backup, size_t backup_len)
+{
+	__batch_init(batch, max_pages, backup, backup_len);
+}
+
+static void batch_destroy(struct pfn_batch *batch, void *backup)
+{
+	if (batch->pfns != backup)
+		kfree(batch->pfns);
+}
+
+/* true if the pfn could be added, false otherwise */
+static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn)
+{
+	const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns));
+
+	if (batch->end &&
+	    pfn == batch->pfns[batch->end - 1] + batch->npfns[batch->end - 1] &&
+	    batch->npfns[batch->end - 1] != MAX_NPFNS) {
+		batch->npfns[batch->end - 1]++;
+		batch->total_pfns++;
+		return true;
+	}
+	if (batch->end == batch->array_size)
+		return false;
+	batch->total_pfns++;
+	batch->pfns[batch->end] = pfn;
+	batch->npfns[batch->end] = 1;
+	batch->end++;
+	return true;
+}
+
+/*
+ * Fill the batch with pfns from the domain. When the batch is full, or it
+ * reaches last_index, the function will return. The caller should use
+ * batch->total_pfns to determine the starting point for the next iteration.
+ */
+static void batch_from_domain(struct pfn_batch *batch,
+			      struct iommu_domain *domain,
+			      struct iopt_area *area, unsigned long start_index,
+			      unsigned long last_index)
+{
+	unsigned int page_offset = 0;
+	unsigned long iova;
+	phys_addr_t phys;
+
+	iova = iopt_area_index_to_iova(area, start_index);
+	if (start_index == iopt_area_index(area))
+		page_offset = area->page_offset;
+	while (start_index <= last_index) {
+		/*
+		 * This is pretty slow, it would be nice to get the page size
+		 * back from the driver, or have the driver directly fill the
+		 * batch.
+		 */
+		phys = iommu_iova_to_phys(domain, iova) - page_offset;
+		if (!batch_add_pfn(batch, PHYS_PFN(phys)))
+			return;
+		iova += PAGE_SIZE - page_offset;
+		page_offset = 0;
+		start_index++;
+	}
+}
+
+static struct page **raw_pages_from_domain(struct iommu_domain *domain,
+					   struct iopt_area *area,
+					   unsigned long start_index,
+					   unsigned long last_index,
+					   struct page **out_pages)
+{
+	unsigned int page_offset = 0;
+	unsigned long iova;
+	phys_addr_t phys;
+
+	iova = iopt_area_index_to_iova(area, start_index);
+	if (start_index == iopt_area_index(area))
+		page_offset = area->page_offset;
+	while (start_index <= last_index) {
+		phys = iommu_iova_to_phys(domain, iova) - page_offset;
+		*(out_pages++) = pfn_to_page(PHYS_PFN(phys));
+		iova += PAGE_SIZE - page_offset;
+		page_offset = 0;
+		start_index++;
+	}
+	return out_pages;
+}
+
+/* Continues reading a domain until we reach a discontiguity in the pfns. */
+static void batch_from_domain_continue(struct pfn_batch *batch,
+				       struct iommu_domain *domain,
+				       struct iopt_area *area,
+				       unsigned long start_index,
+				       unsigned long last_index)
+{
+	unsigned int array_size = batch->array_size;
+
+	batch->array_size = batch->end;
+	batch_from_domain(batch, domain, area, start_index, last_index);
+	batch->array_size = array_size;
+}
+
+/*
+ * This is part of the VFIO compatibility support for VFIO_TYPE1_IOMMU. That
+ * mode permits splitting a mapped area up, and then one of the splits is
+ * unmapped. Doing this normally would cause us to violate our invariant of
+ * pairing map/unmap. Thus, to support old VFIO compatibility disable support
+ * for batching consecutive PFNs. All PFNs mapped into the iommu are done in
+ * PAGE_SIZE units, not larger or smaller.
+ */
+static int batch_iommu_map_small(struct iommu_domain *domain,
+				 unsigned long iova, phys_addr_t paddr,
+				 size_t size, int prot)
+{
+	unsigned long start_iova = iova;
+	int rc;
+
+	while (size) {
+		rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot);
+		if (rc)
+			goto err_unmap;
+		iova += PAGE_SIZE;
+		paddr += PAGE_SIZE;
+		size -= PAGE_SIZE;
+	}
+	return 0;
+
+err_unmap:
+	if (start_iova != iova)
+		iommu_unmap_nofail(domain, start_iova, iova - start_iova);
+	return rc;
+}
+
+static int batch_to_domain(struct pfn_batch *batch, struct iommu_domain *domain,
+			   struct iopt_area *area, unsigned long start_index)
+{
+	bool disable_large_pages = area->iopt->disable_large_pages;
+	unsigned long last_iova = iopt_area_last_iova(area);
+	unsigned int page_offset = 0;
+	unsigned long start_iova;
+	unsigned long next_iova;
+	unsigned int cur = 0;
+	unsigned long iova;
+	int rc;
+
+	/* The first index might be a partial page */
+	if (start_index == iopt_area_index(area))
+		page_offset = area->page_offset;
+	next_iova = iova = start_iova =
+		iopt_area_index_to_iova(area, start_index);
+	while (cur < batch->end) {
+		next_iova = min(last_iova + 1,
+				next_iova + batch->npfns[cur] * PAGE_SIZE -
+					page_offset);
+		if (disable_large_pages)
+			rc = batch_iommu_map_small(
+				domain, iova,
+				PFN_PHYS(batch->pfns[cur]) + page_offset,
+				next_iova - iova, area->iommu_prot);
+		else
+			rc = iommu_map(domain, iova,
+				       PFN_PHYS(batch->pfns[cur]) + page_offset,
+				       next_iova - iova, area->iommu_prot);
+		if (rc)
+			goto err_unmap;
+		iova = next_iova;
+		page_offset = 0;
+		cur++;
+	}
+	return 0;
+err_unmap:
+	if (start_iova != iova)
+		iommu_unmap_nofail(domain, start_iova, iova - start_iova);
+	return rc;
+}
+
+static void batch_from_xarray(struct pfn_batch *batch, struct xarray *xa,
+			      unsigned long start_index,
+			      unsigned long last_index)
+{
+	XA_STATE(xas, xa, start_index);
+	void *entry;
+
+	rcu_read_lock();
+	while (true) {
+		entry = xas_next(&xas);
+		if (xas_retry(&xas, entry))
+			continue;
+		WARN_ON(!xa_is_value(entry));
+		if (!batch_add_pfn(batch, xa_to_value(entry)) ||
+		    start_index == last_index)
+			break;
+		start_index++;
+	}
+	rcu_read_unlock();
+}
+
+static void batch_from_xarray_clear(struct pfn_batch *batch, struct xarray *xa,
+				    unsigned long start_index,
+				    unsigned long last_index)
+{
+	XA_STATE(xas, xa, start_index);
+	void *entry;
+
+	xas_lock(&xas);
+	while (true) {
+		entry = xas_next(&xas);
+		if (xas_retry(&xas, entry))
+			continue;
+		WARN_ON(!xa_is_value(entry));
+		if (!batch_add_pfn(batch, xa_to_value(entry)))
+			break;
+		xas_store(&xas, NULL);
+		if (start_index == last_index)
+			break;
+		start_index++;
+	}
+	xas_unlock(&xas);
+}
+
+static void clear_xarray(struct xarray *xa, unsigned long start_index,
+			 unsigned long last_index)
+{
+	XA_STATE(xas, xa, start_index);
+	void *entry;
+
+	xas_lock(&xas);
+	xas_for_each(&xas, entry, last_index)
+		xas_store(&xas, NULL);
+	xas_unlock(&xas);
+}
+
+static int pages_to_xarray(struct xarray *xa, unsigned long start_index,
+			   unsigned long last_index, struct page **pages)
+{
+	struct page **end_pages = pages + (last_index - start_index) + 1;
+	XA_STATE(xas, xa, start_index);
+
+	do {
+		void *old;
+
+		xas_lock(&xas);
+		while (pages != end_pages) {
+			old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages)));
+			if (xas_error(&xas))
+				break;
+			WARN_ON(old);
+			pages++;
+			xas_next(&xas);
+		}
+		xas_unlock(&xas);
+	} while (xas_nomem(&xas, GFP_KERNEL));
+
+	if (xas_error(&xas)) {
+		if (xas.xa_index != start_index)
+			clear_xarray(xa, start_index, xas.xa_index - 1);
+		return xas_error(&xas);
+	}
+	return 0;
+}
+
+static void batch_from_pages(struct pfn_batch *batch, struct page **pages,
+			     size_t npages)
+{
+	struct page **end = pages + npages;
+
+	for (; pages != end; pages++)
+		if (!batch_add_pfn(batch, page_to_pfn(*pages)))
+			break;
+}
+
+static void batch_unpin(struct pfn_batch *batch, struct iopt_pages *pages,
+			unsigned int first_page_off, size_t npages)
+{
+	unsigned int cur = 0;
+
+	while (first_page_off) {
+		if (batch->npfns[cur] > first_page_off)
+			break;
+		first_page_off -= batch->npfns[cur];
+		cur++;
+	}
+
+	while (npages) {
+		size_t to_unpin = min_t(size_t, npages,
+					batch->npfns[cur] - first_page_off);
+
+		unpin_user_page_range_dirty_lock(
+			pfn_to_page(batch->pfns[cur] + first_page_off),
+			to_unpin, pages->writable);
+		iopt_pages_sub_npinned(pages, to_unpin);
+		cur++;
+		first_page_off = 0;
+		npages -= to_unpin;
+	}
+}
+
+static void copy_data_page(struct page *page, void *data, unsigned long offset,
+			   size_t length, unsigned int flags)
+{
+	void *mem;
+
+	mem = kmap_local_page(page);
+	if (flags & IOMMUFD_ACCESS_RW_WRITE) {
+		memcpy(mem + offset, data, length);
+		set_page_dirty_lock(page);
+	} else {
+		memcpy(data, mem + offset, length);
+	}
+	kunmap_local(mem);
+}
+
+static unsigned long batch_rw(struct pfn_batch *batch, void *data,
+			      unsigned long offset, unsigned long length,
+			      unsigned int flags)
+{
+	unsigned long copied = 0;
+	unsigned int npage = 0;
+	unsigned int cur = 0;
+
+	while (cur < batch->end) {
+		unsigned long bytes = min(length, PAGE_SIZE - offset);
+
+		copy_data_page(pfn_to_page(batch->pfns[cur] + npage), data,
+			       offset, bytes, flags);
+		offset = 0;
+		length -= bytes;
+		data += bytes;
+		copied += bytes;
+		npage++;
+		if (npage == batch->npfns[cur]) {
+			npage = 0;
+			cur++;
+		}
+		if (!length)
+			break;
+	}
+	return copied;
+}
+
+/* pfn_reader_user is just the pin_user_pages() path */
+struct pfn_reader_user {
+	struct page **upages;
+	size_t upages_len;
+	unsigned long upages_start;
+	unsigned long upages_end;
+	unsigned int gup_flags;
+	/*
+	 * 1 means mmget() and mmap_read_lock(), 0 means only mmget(), -1 is
+	 * neither
+	 */
+	int locked;
+};
+
+static void pfn_reader_user_init(struct pfn_reader_user *user,
+				 struct iopt_pages *pages)
+{
+	user->upages = NULL;
+	user->upages_start = 0;
+	user->upages_end = 0;
+	user->locked = -1;
+
+	if (pages->writable) {
+		user->gup_flags = FOLL_LONGTERM | FOLL_WRITE;
+	} else {
+		/* Still need to break COWs on read */
+		user->gup_flags = FOLL_LONGTERM | FOLL_FORCE | FOLL_WRITE;
+	}
+}
+
+static void pfn_reader_user_destroy(struct pfn_reader_user *user,
+				    struct iopt_pages *pages)
+{
+	if (user->locked != -1) {
+		if (user->locked)
+			mmap_read_unlock(pages->source_mm);
+		if (pages->source_mm != current->mm)
+			mmput(pages->source_mm);
+		user->locked = 0;
+	}
+
+	kfree(user->upages);
+	user->upages = NULL;
+}
+
+static int pfn_reader_user_pin(struct pfn_reader_user *user,
+			       struct iopt_pages *pages,
+			       unsigned long start_index,
+			       unsigned long last_index)
+{
+	bool remote_mm = pages->source_mm != current->mm;
+	unsigned long npages;
+	uintptr_t uptr;
+	long rc;
+
+	if (!user->upages) {
+		/* All undone in pfn_reader_destroy() */
+		user->upages_len =
+			(last_index - start_index + 1) * sizeof(*user->upages);
+		user->upages = temp_kmalloc(&user->upages_len, NULL, 0);
+		if (!user->upages)
+			return -ENOMEM;
+	}
+
+	if (user->locked == -1) {
+		/*
+		 * The majority of usages will run the map task within the mm
+		 * providing the pages, so we can optimize into
+		 * get_user_pages_fast()
+		 */
+		if (remote_mm) {
+			if (!mmget_not_zero(pages->source_mm))
+				return -EFAULT;
+		}
+		user->locked = 0;
+	}
+
+	npages = min_t(unsigned long, last_index - start_index + 1,
+		       user->upages_len / sizeof(*user->upages));
+
+	uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
+	if (!remote_mm)
+		rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
+					 user->upages);
+	else {
+		if (!user->locked) {
+			mmap_read_lock(pages->source_mm);
+			user->locked = 1;
+		}
+		/*
+		 * FIXME: last NULL can be &pfns->locked once the GUP patch
+		 * is merged.
+		 */
+		rc = pin_user_pages_remote(pages->source_mm, uptr, npages,
+					   user->gup_flags, user->upages, NULL,
+					   NULL);
+	}
+	if (rc <= 0) {
+		if (WARN_ON(!rc))
+			return -EFAULT;
+		return rc;
+	}
+	iopt_pages_add_npinned(pages, rc);
+	user->upages_start = start_index;
+	user->upages_end = start_index + rc;
+	return 0;
+}
+
+/* This is the "modern" and faster accounting method used by io_uring */
+static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages)
+{
+	unsigned long lock_limit;
+	unsigned long cur_pages;
+	unsigned long new_pages;
+
+	lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >>
+		     PAGE_SHIFT;
+	npages = pages->npinned - pages->last_npinned;
+	do {
+		cur_pages = atomic_long_read(&pages->source_user->locked_vm);
+		new_pages = cur_pages + npages;
+		if (new_pages > lock_limit)
+			return -ENOMEM;
+	} while (atomic_long_cmpxchg(&pages->source_user->locked_vm, cur_pages,
+				     new_pages) != cur_pages);
+	return 0;
+}
+
+static void decr_user_locked_vm(struct iopt_pages *pages, unsigned long npages)
+{
+	if (WARN_ON(atomic_long_read(&pages->source_user->locked_vm) < npages))
+		return;
+	atomic_long_sub(npages, &pages->source_user->locked_vm);
+}
+
+/* This is the accounting method used for compatibility with VFIO */
+static int update_mm_locked_vm(struct iopt_pages *pages, unsigned long npages,
+			       bool inc, struct pfn_reader_user *user)
+{
+	bool do_put = false;
+	int rc;
+
+	if (user && user->locked) {
+		mmap_read_unlock(pages->source_mm);
+		user->locked = 0;
+		/* If we had the lock then we also have a get */
+	} else if ((!user || !user->upages) &&
+		   pages->source_mm != current->mm) {
+		if (!mmget_not_zero(pages->source_mm))
+			return -EINVAL;
+		do_put = true;
+	}
+
+	mmap_write_lock(pages->source_mm);
+	rc = __account_locked_vm(pages->source_mm, npages, inc,
+				 pages->source_task, false);
+	mmap_write_unlock(pages->source_mm);
+
+	if (do_put)
+		mmput(pages->source_mm);
+	return rc;
+}
+
+static int do_update_pinned(struct iopt_pages *pages, unsigned long npages,
+			    bool inc, struct pfn_reader_user *user)
+{
+	int rc = 0;
+
+	switch (pages->account_mode) {
+	case IOPT_PAGES_ACCOUNT_NONE:
+		break;
+	case IOPT_PAGES_ACCOUNT_USER:
+		if (inc)
+			rc = incr_user_locked_vm(pages, npages);
+		else
+			decr_user_locked_vm(pages, npages);
+		break;
+	case IOPT_PAGES_ACCOUNT_MM:
+		rc = update_mm_locked_vm(pages, npages, inc, user);
+		break;
+	}
+	if (rc)
+		return rc;
+
+	pages->last_npinned = pages->npinned;
+	if (inc)
+		atomic64_add(npages, &pages->source_mm->pinned_vm);
+	else
+		atomic64_sub(npages, &pages->source_mm->pinned_vm);
+	return 0;
+}
+
+static void update_unpinned(struct iopt_pages *pages)
+{
+	if (WARN_ON(pages->npinned > pages->last_npinned))
+		return;
+	if (pages->npinned == pages->last_npinned)
+		return;
+	do_update_pinned(pages, pages->last_npinned - pages->npinned, false,
+			 NULL);
+}
+
+/*
+ * Changes in the number of pages pinned is done after the pages have been read
+ * and processed. If the user lacked the limit then the error unwind will unpin
+ * everything that was just pinned. This is because it is expensive to calculate
+ * how many pages we have already pinned within a range to generate an accurate
+ * prediction in advance of doing the work to actually pin them.
+ */
+static int pfn_reader_user_update_pinned(struct pfn_reader_user *user,
+					 struct iopt_pages *pages)
+{
+	unsigned long npages;
+	bool inc;
+
+	lockdep_assert_held(&pages->mutex);
+
+	if (pages->npinned == pages->last_npinned)
+		return 0;
+
+	if (pages->npinned < pages->last_npinned) {
+		npages = pages->last_npinned - pages->npinned;
+		inc = false;
+	} else {
+		npages = pages->npinned - pages->last_npinned;
+		inc = true;
+	}
+	return do_update_pinned(pages, npages, inc, user);
+}
+
+/*
+ * PFNs are stored in three places, in order of preference:
+ * - The iopt_pages xarray. This is only populated if there is a
+ *   iopt_pages_access
+ * - The iommu_domain under an area
+ * - The original PFN source, ie pages->source_mm
+ *
+ * This iterator reads the pfns optimizing to load according to the
+ * above order.
+ */
+struct pfn_reader {
+	struct iopt_pages *pages;
+	struct interval_tree_double_span_iter span;
+	struct pfn_batch batch;
+	unsigned long batch_start_index;
+	unsigned long batch_end_index;
+	unsigned long last_index;
+
+	struct pfn_reader_user user;
+};
+
+static int pfn_reader_update_pinned(struct pfn_reader *pfns)
+{
+	return pfn_reader_user_update_pinned(&pfns->user, pfns->pages);
+}
+
+/*
+ * The batch can contain a mixture of pages that are still in use and pages that
+ * need to be unpinned. Unpin only pages that are not held anywhere else.
+ */
+static void pfn_reader_unpin(struct pfn_reader *pfns)
+{
+	unsigned long last = pfns->batch_end_index - 1;
+	unsigned long start = pfns->batch_start_index;
+	struct interval_tree_double_span_iter span;
+	struct iopt_pages *pages = pfns->pages;
+
+	lockdep_assert_held(&pages->mutex);
+
+	interval_tree_for_each_double_span(&span, &pages->access_itree,
+					   &pages->domains_itree, start, last) {
+		if (span.is_used)
+			continue;
+
+		batch_unpin(&pfns->batch, pages, span.start_hole - start,
+			    span.last_hole - span.start_hole + 1);
+	}
+}
+
+/* Process a single span to load it from the proper storage */
+static int pfn_reader_fill_span(struct pfn_reader *pfns)
+{
+	struct interval_tree_double_span_iter *span = &pfns->span;
+	unsigned long start_index = pfns->batch_end_index;
+	struct iopt_area *area;
+	int rc;
+
+	if (span->is_used == 1) {
+		batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns,
+				  start_index, span->last_used);
+		return 0;
+	}
+
+	if (span->is_used == 2) {
+		/*
+		 * Pull as many pages from the first domain we find in the
+		 * target span. If it is too small then we will be called again
+		 * and we'll find another area.
+		 */
+		area = iopt_pages_find_domain_area(pfns->pages, start_index);
+		if (WARN_ON(!area))
+			return -EINVAL;
+
+		/* The storage_domain cannot change without the pages mutex */
+		batch_from_domain(
+			&pfns->batch, area->storage_domain, area, start_index,
+			min(iopt_area_last_index(area), span->last_used));
+		return 0;
+	}
+
+	if (start_index >= pfns->user.upages_end) {
+		rc = pfn_reader_user_pin(&pfns->user, pfns->pages, start_index,
+					 span->last_hole);
+		if (rc)
+			return rc;
+	}
+
+	batch_from_pages(&pfns->batch,
+			 pfns->user.upages +
+				 (start_index - pfns->user.upages_start),
+			 pfns->user.upages_end - start_index);
+	return 0;
+}
+
+static bool pfn_reader_done(struct pfn_reader *pfns)
+{
+	return pfns->batch_start_index == pfns->last_index + 1;
+}
+
+static int pfn_reader_next(struct pfn_reader *pfns)
+{
+	int rc;
+
+	batch_clear(&pfns->batch);
+	pfns->batch_start_index = pfns->batch_end_index;
+
+	while (pfns->batch_end_index != pfns->last_index + 1) {
+		unsigned int npfns = pfns->batch.total_pfns;
+
+		rc = pfn_reader_fill_span(pfns);
+		if (rc)
+			return rc;
+
+		if (WARN_ON(!pfns->batch.total_pfns))
+			return -EINVAL;
+
+		pfns->batch_end_index =
+			pfns->batch_start_index + pfns->batch.total_pfns;
+		if (pfns->batch_end_index == pfns->span.last_used + 1)
+			interval_tree_double_span_iter_next(&pfns->span);
+
+		/* Batch is full */
+		if (npfns == pfns->batch.total_pfns)
+			return 0;
+	}
+	return 0;
+}
+
+static int pfn_reader_init(struct pfn_reader *pfns, struct iopt_pages *pages,
+			   unsigned long start_index, unsigned long last_index)
+{
+	int rc;
+
+	lockdep_assert_held(&pages->mutex);
+
+	pfns->pages = pages;
+	pfns->batch_start_index = start_index;
+	pfns->batch_end_index = start_index;
+	pfns->last_index = last_index;
+	pfn_reader_user_init(&pfns->user, pages);
+	rc = batch_init(&pfns->batch, last_index - start_index + 1);
+	if (rc)
+		return rc;
+	interval_tree_double_span_iter_first(&pfns->span, &pages->access_itree,
+					     &pages->domains_itree, start_index,
+					     last_index);
+	return 0;
+}
+
+/*
+ * There are many assertions regarding the state of pages->npinned vs
+ * pages->last_pinned, for instance something like unmapping a domain must only
+ * decrement the npinned, and pfn_reader_destroy() must be called only after all
+ * the pins are updated. This is fine for success flows, but error flows
+ * sometimes need to release the pins held inside the pfn_reader before going on
+ * to complete unmapping and releasing pins held in domains.
+ */
+static void pfn_reader_release_pins(struct pfn_reader *pfns)
+{
+	struct iopt_pages *pages = pfns->pages;
+
+	if (pfns->user.upages_end > pfns->batch_end_index) {
+		size_t npages = pfns->user.upages_end - pfns->batch_end_index;
+
+		/* Any pages not transferred to the batch are just unpinned */
+		unpin_user_pages(pfns->user.upages + (pfns->batch_end_index -
+						      pfns->user.upages_start),
+				 npages);
+		iopt_pages_sub_npinned(pages, npages);
+		pfns->user.upages_end = pfns->batch_end_index;
+	}
+	if (pfns->batch_start_index != pfns->batch_end_index) {
+		pfn_reader_unpin(pfns);
+		pfns->batch_start_index = pfns->batch_end_index;
+	}
+}
+
+static void pfn_reader_destroy(struct pfn_reader *pfns)
+{
+	struct iopt_pages *pages = pfns->pages;
+
+	pfn_reader_release_pins(pfns);
+	pfn_reader_user_destroy(&pfns->user, pfns->pages);
+	batch_destroy(&pfns->batch, NULL);
+	WARN_ON(pages->last_npinned != pages->npinned);
+}
+
+static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
+			    unsigned long start_index, unsigned long last_index)
+{
+	int rc;
+
+	rc = pfn_reader_init(pfns, pages, start_index, last_index);
+	if (rc)
+		return rc;
+	rc = pfn_reader_next(pfns);
+	if (rc) {
+		pfn_reader_destroy(pfns);
+		return rc;
+	}
+	return 0;
+}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index d1817472c2737..26e09d539737b 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -13,6 +13,13 @@
 struct iommufd_ctx;
 struct file;
 
+enum {
+	IOMMUFD_ACCESS_RW_READ = 0,
+	IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
+	/* Set if the caller is in a kthread then rw will use kthread_use_mm() */
+	IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1,
+};
+
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
-- 
GitLab


From 8d160cd4d5066f864ec0f2c981470e55ac03ac27 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:32 -0400
Subject: [PATCH 3128/4122] iommufd: Algorithms for PFN storage

The iopt_pages which represents a logical linear list of full PFNs held in
different storage tiers. Each area points to a slice of exactly one
iopt_pages, and each iopt_pages can have multiple areas and accesses.

The three storage tiers are managed to meet these objectives:

 - If no iommu_domain or in-kerenel access exists then minimal memory
   should be consumed by iomufd
 - If a page has been pinned then an iopt_pages will not pin it again
 - If an in-kernel access exists then the xarray must provide the backing
   storage to avoid allocations on domain removals
 - Otherwise any iommu_domain will be used for storage

In a common configuration with only an iommu_domain the iopt_pages does
not allocate significant memory itself.

The external interface for pages has several logical operations:

  iopt_area_fill_domain() will load the PFNs from storage into a single
  domain. This is used when attaching a new domain to an existing IOAS.

  iopt_area_fill_domains() will load the PFNs from storage into multiple
  domains. This is used when creating a new IOVA map in an existing IOAS

  iopt_pages_add_access() creates an iopt_pages_access that tracks an
  in-kernel access of PFNs. This is some external driver that might be
  accessing the IOVA using the CPU, or programming PFNs with the DMA
  API. ie a VFIO mdev.

  iopt_pages_rw_access() directly perform a memcpy on the PFNs, without
  the overhead of iopt_pages_add_access()

  iopt_pages_fill_xarray() will load PFNs into the xarray and return a
  'struct page *' array. It is used by iopt_pages_access's to extract PFNs
  for in-kernel use. iopt_pages_fill_from_xarray() is a fast path when it
  is known the xarray is already filled.

As an iopt_pages can be referred to in slices by many areas and accesses
it uses interval trees to keep track of which storage tiers currently hold
the PFNs. On a page-by-page basis any request for a PFN will be satisfied
from one of the storage tiers and the PFN copied to target domain/array.

Unfill actions are similar, on a page by page basis domains are unmapped,
xarray entries freed or struct pages fully put back.

Significant complexity is required to fully optimize all of these data
motions. The implementation calculates the largest consecutive range of
same-storage indexes and operates in blocks. The accumulation of PFNs
always generates the largest contiguous PFN range possible to optimize and
this gathering can cross storage tier boundaries. For cases like 'fill
domains' care is taken to avoid duplicated work and PFNs are read once and
pushed into all domains.

The map/unmap interaction with the iommu_domain always works in contiguous
PFN blocks. The implementation does not require or benefit from any
split/merge optimization in the iommu_domain driver.

This design suggests several possible improvements in the IOMMU API that
would greatly help performance, particularly a way for the driver to map
and read the pfns lists instead of working with one driver call per page
to read, and one driver call per contiguous range to store.

Link: https://lore.kernel.org/r/9-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/io_pagetable.h |  74 +++
 drivers/iommu/iommufd/pages.c        | 843 +++++++++++++++++++++++++++
 2 files changed, 917 insertions(+)

diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index b74bf01ffc52c..a2b724175057b 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -49,6 +49,15 @@ struct iopt_area {
 	unsigned int num_accesses;
 };
 
+int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages);
+void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages);
+
+int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain);
+void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
+			     struct iommu_domain *domain);
+void iopt_area_unmap_domain(struct iopt_area *area,
+			    struct iommu_domain *domain);
+
 static inline unsigned long iopt_area_index(struct iopt_area *area)
 {
 	return area->pages_node.start;
@@ -69,6 +78,39 @@ static inline unsigned long iopt_area_last_iova(struct iopt_area *area)
 	return area->node.last;
 }
 
+static inline size_t iopt_area_length(struct iopt_area *area)
+{
+	return (area->node.last - area->node.start) + 1;
+}
+
+#define __make_iopt_iter(name)                                                 \
+	static inline struct iopt_##name *iopt_##name##_iter_first(            \
+		struct io_pagetable *iopt, unsigned long start,                \
+		unsigned long last)                                            \
+	{                                                                      \
+		struct interval_tree_node *node;                               \
+									       \
+		lockdep_assert_held(&iopt->iova_rwsem);                        \
+		node = interval_tree_iter_first(&iopt->name##_itree, start,    \
+						last);                         \
+		if (!node)                                                     \
+			return NULL;                                           \
+		return container_of(node, struct iopt_##name, node);           \
+	}                                                                      \
+	static inline struct iopt_##name *iopt_##name##_iter_next(             \
+		struct iopt_##name *last_node, unsigned long start,            \
+		unsigned long last)                                            \
+	{                                                                      \
+		struct interval_tree_node *node;                               \
+									       \
+		node = interval_tree_iter_next(&last_node->node, start, last); \
+		if (!node)                                                     \
+			return NULL;                                           \
+		return container_of(node, struct iopt_##name, node);           \
+	}
+
+__make_iopt_iter(area)
+
 enum {
 	IOPT_PAGES_ACCOUNT_NONE = 0,
 	IOPT_PAGES_ACCOUNT_USER = 1,
@@ -106,4 +148,36 @@ struct iopt_pages {
 	struct rb_root_cached domains_itree;
 };
 
+struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
+				    bool writable);
+void iopt_release_pages(struct kref *kref);
+static inline void iopt_put_pages(struct iopt_pages *pages)
+{
+	kref_put(&pages->kref, iopt_release_pages);
+}
+
+void iopt_pages_fill_from_xarray(struct iopt_pages *pages, unsigned long start,
+				 unsigned long last, struct page **out_pages);
+int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start,
+			   unsigned long last, struct page **out_pages);
+void iopt_pages_unfill_xarray(struct iopt_pages *pages, unsigned long start,
+			      unsigned long last);
+
+int iopt_area_add_access(struct iopt_area *area, unsigned long start,
+			 unsigned long last, struct page **out_pages,
+			 unsigned int flags);
+void iopt_area_remove_access(struct iopt_area *area, unsigned long start,
+			    unsigned long last);
+int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
+			 void *data, unsigned long length, unsigned int flags);
+
+/*
+ * Each interval represents an active iopt_access_pages(), it acts as an
+ * interval lock that keeps the PFNs pinned and stored in the xarray.
+ */
+struct iopt_pages_access {
+	struct interval_tree_node node;
+	unsigned int users;
+};
+
 #endif
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index ebca78e743c6f..bafeee9d73e8a 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -212,6 +212,18 @@ static void iommu_unmap_nofail(struct iommu_domain *domain, unsigned long iova,
 	WARN_ON(ret != size);
 }
 
+static void iopt_area_unmap_domain_range(struct iopt_area *area,
+					 struct iommu_domain *domain,
+					 unsigned long start_index,
+					 unsigned long last_index)
+{
+	unsigned long start_iova = iopt_area_index_to_iova(area, start_index);
+
+	iommu_unmap_nofail(domain, start_iova,
+			   iopt_area_index_to_iova_last(area, last_index) -
+				   start_iova + 1);
+}
+
 static struct iopt_area *iopt_pages_find_domain_area(struct iopt_pages *pages,
 						     unsigned long index)
 {
@@ -1064,3 +1076,834 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
 	}
 	return 0;
 }
+
+struct iopt_pages *iopt_alloc_pages(void __user *uptr, unsigned long length,
+				    bool writable)
+{
+	struct iopt_pages *pages;
+
+	/*
+	 * The iommu API uses size_t as the length, and protect the DIV_ROUND_UP
+	 * below from overflow
+	 */
+	if (length > SIZE_MAX - PAGE_SIZE || length == 0)
+		return ERR_PTR(-EINVAL);
+
+	pages = kzalloc(sizeof(*pages), GFP_KERNEL_ACCOUNT);
+	if (!pages)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&pages->kref);
+	xa_init_flags(&pages->pinned_pfns, XA_FLAGS_ACCOUNT);
+	mutex_init(&pages->mutex);
+	pages->source_mm = current->mm;
+	mmgrab(pages->source_mm);
+	pages->uptr = (void __user *)ALIGN_DOWN((uintptr_t)uptr, PAGE_SIZE);
+	pages->npages = DIV_ROUND_UP(length + (uptr - pages->uptr), PAGE_SIZE);
+	pages->access_itree = RB_ROOT_CACHED;
+	pages->domains_itree = RB_ROOT_CACHED;
+	pages->writable = writable;
+	if (capable(CAP_IPC_LOCK))
+		pages->account_mode = IOPT_PAGES_ACCOUNT_NONE;
+	else
+		pages->account_mode = IOPT_PAGES_ACCOUNT_USER;
+	pages->source_task = current->group_leader;
+	get_task_struct(current->group_leader);
+	pages->source_user = get_uid(current_user());
+	return pages;
+}
+
+void iopt_release_pages(struct kref *kref)
+{
+	struct iopt_pages *pages = container_of(kref, struct iopt_pages, kref);
+
+	WARN_ON(!RB_EMPTY_ROOT(&pages->access_itree.rb_root));
+	WARN_ON(!RB_EMPTY_ROOT(&pages->domains_itree.rb_root));
+	WARN_ON(pages->npinned);
+	WARN_ON(!xa_empty(&pages->pinned_pfns));
+	mmdrop(pages->source_mm);
+	mutex_destroy(&pages->mutex);
+	put_task_struct(pages->source_task);
+	free_uid(pages->source_user);
+	kfree(pages);
+}
+
+static void
+iopt_area_unpin_domain(struct pfn_batch *batch, struct iopt_area *area,
+		       struct iopt_pages *pages, struct iommu_domain *domain,
+		       unsigned long start_index, unsigned long last_index,
+		       unsigned long *unmapped_end_index,
+		       unsigned long real_last_index)
+{
+	while (start_index <= last_index) {
+		unsigned long batch_last_index;
+
+		if (*unmapped_end_index <= last_index) {
+			unsigned long start =
+				max(start_index, *unmapped_end_index);
+
+			batch_from_domain(batch, domain, area, start,
+					  last_index);
+			batch_last_index = start + batch->total_pfns - 1;
+		} else {
+			batch_last_index = last_index;
+		}
+
+		/*
+		 * unmaps must always 'cut' at a place where the pfns are not
+		 * contiguous to pair with the maps that always install
+		 * contiguous pages. Thus, if we have to stop unpinning in the
+		 * middle of the domains we need to keep reading pfns until we
+		 * find a cut point to do the unmap. The pfns we read are
+		 * carried over and either skipped or integrated into the next
+		 * batch.
+		 */
+		if (batch_last_index == last_index &&
+		    last_index != real_last_index)
+			batch_from_domain_continue(batch, domain, area,
+						   last_index + 1,
+						   real_last_index);
+
+		if (*unmapped_end_index <= batch_last_index) {
+			iopt_area_unmap_domain_range(
+				area, domain, *unmapped_end_index,
+				start_index + batch->total_pfns - 1);
+			*unmapped_end_index = start_index + batch->total_pfns;
+		}
+
+		/* unpin must follow unmap */
+		batch_unpin(batch, pages, 0,
+			    batch_last_index - start_index + 1);
+		start_index = batch_last_index + 1;
+
+		batch_clear_carry(batch,
+				  *unmapped_end_index - batch_last_index - 1);
+	}
+}
+
+static void __iopt_area_unfill_domain(struct iopt_area *area,
+				      struct iopt_pages *pages,
+				      struct iommu_domain *domain,
+				      unsigned long last_index)
+{
+	struct interval_tree_double_span_iter span;
+	unsigned long start_index = iopt_area_index(area);
+	unsigned long unmapped_end_index = start_index;
+	u64 backup[BATCH_BACKUP_SIZE];
+	struct pfn_batch batch;
+
+	lockdep_assert_held(&pages->mutex);
+
+	/*
+	 * For security we must not unpin something that is still DMA mapped,
+	 * so this must unmap any IOVA before we go ahead and unpin the pages.
+	 * This creates a complexity where we need to skip over unpinning pages
+	 * held in the xarray, but continue to unmap from the domain.
+	 *
+	 * The domain unmap cannot stop in the middle of a contiguous range of
+	 * PFNs. To solve this problem the unpinning step will read ahead to the
+	 * end of any contiguous span, unmap that whole span, and then only
+	 * unpin the leading part that does not have any accesses. The residual
+	 * PFNs that were unmapped but not unpinned are called a "carry" in the
+	 * batch as they are moved to the front of the PFN list and continue on
+	 * to the next iteration(s).
+	 */
+	batch_init_backup(&batch, last_index + 1, backup, sizeof(backup));
+	interval_tree_for_each_double_span(&span, &pages->domains_itree,
+					   &pages->access_itree, start_index,
+					   last_index) {
+		if (span.is_used) {
+			batch_skip_carry(&batch,
+					 span.last_used - span.start_used + 1);
+			continue;
+		}
+		iopt_area_unpin_domain(&batch, area, pages, domain,
+				       span.start_hole, span.last_hole,
+				       &unmapped_end_index, last_index);
+	}
+	/*
+	 * If the range ends in a access then we do the residual unmap without
+	 * any unpins.
+	 */
+	if (unmapped_end_index != last_index + 1)
+		iopt_area_unmap_domain_range(area, domain, unmapped_end_index,
+					     last_index);
+	WARN_ON(batch.total_pfns);
+	batch_destroy(&batch, backup);
+	update_unpinned(pages);
+}
+
+static void iopt_area_unfill_partial_domain(struct iopt_area *area,
+					    struct iopt_pages *pages,
+					    struct iommu_domain *domain,
+					    unsigned long end_index)
+{
+	if (end_index != iopt_area_index(area))
+		__iopt_area_unfill_domain(area, pages, domain, end_index - 1);
+}
+
+/**
+ * iopt_area_unmap_domain() - Unmap without unpinning PFNs in a domain
+ * @area: The IOVA range to unmap
+ * @domain: The domain to unmap
+ *
+ * The caller must know that unpinning is not required, usually because there
+ * are other domains in the iopt.
+ */
+void iopt_area_unmap_domain(struct iopt_area *area, struct iommu_domain *domain)
+{
+	iommu_unmap_nofail(domain, iopt_area_iova(area),
+			   iopt_area_length(area));
+}
+
+/**
+ * iopt_area_unfill_domain() - Unmap and unpin PFNs in a domain
+ * @area: IOVA area to use
+ * @pages: page supplier for the area (area->pages is NULL)
+ * @domain: Domain to unmap from
+ *
+ * The domain should be removed from the domains_itree before calling. The
+ * domain will always be unmapped, but the PFNs may not be unpinned if there are
+ * still accesses.
+ */
+void iopt_area_unfill_domain(struct iopt_area *area, struct iopt_pages *pages,
+			     struct iommu_domain *domain)
+{
+	__iopt_area_unfill_domain(area, pages, domain,
+				  iopt_area_last_index(area));
+}
+
+/**
+ * iopt_area_fill_domain() - Map PFNs from the area into a domain
+ * @area: IOVA area to use
+ * @domain: Domain to load PFNs into
+ *
+ * Read the pfns from the area's underlying iopt_pages and map them into the
+ * given domain. Called when attaching a new domain to an io_pagetable.
+ */
+int iopt_area_fill_domain(struct iopt_area *area, struct iommu_domain *domain)
+{
+	unsigned long done_end_index;
+	struct pfn_reader pfns;
+	int rc;
+
+	lockdep_assert_held(&area->pages->mutex);
+
+	rc = pfn_reader_first(&pfns, area->pages, iopt_area_index(area),
+			      iopt_area_last_index(area));
+	if (rc)
+		return rc;
+
+	while (!pfn_reader_done(&pfns)) {
+		done_end_index = pfns.batch_start_index;
+		rc = batch_to_domain(&pfns.batch, domain, area,
+				     pfns.batch_start_index);
+		if (rc)
+			goto out_unmap;
+		done_end_index = pfns.batch_end_index;
+
+		rc = pfn_reader_next(&pfns);
+		if (rc)
+			goto out_unmap;
+	}
+
+	rc = pfn_reader_update_pinned(&pfns);
+	if (rc)
+		goto out_unmap;
+	goto out_destroy;
+
+out_unmap:
+	pfn_reader_release_pins(&pfns);
+	iopt_area_unfill_partial_domain(area, area->pages, domain,
+					done_end_index);
+out_destroy:
+	pfn_reader_destroy(&pfns);
+	return rc;
+}
+
+/**
+ * iopt_area_fill_domains() - Install PFNs into the area's domains
+ * @area: The area to act on
+ * @pages: The pages associated with the area (area->pages is NULL)
+ *
+ * Called during area creation. The area is freshly created and not inserted in
+ * the domains_itree yet. PFNs are read and loaded into every domain held in the
+ * area's io_pagetable and the area is installed in the domains_itree.
+ *
+ * On failure all domains are left unchanged.
+ */
+int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages)
+{
+	unsigned long done_first_end_index;
+	unsigned long done_all_end_index;
+	struct iommu_domain *domain;
+	unsigned long unmap_index;
+	struct pfn_reader pfns;
+	unsigned long index;
+	int rc;
+
+	lockdep_assert_held(&area->iopt->domains_rwsem);
+
+	if (xa_empty(&area->iopt->domains))
+		return 0;
+
+	mutex_lock(&pages->mutex);
+	rc = pfn_reader_first(&pfns, pages, iopt_area_index(area),
+			      iopt_area_last_index(area));
+	if (rc)
+		goto out_unlock;
+
+	while (!pfn_reader_done(&pfns)) {
+		done_first_end_index = pfns.batch_end_index;
+		done_all_end_index = pfns.batch_start_index;
+		xa_for_each(&area->iopt->domains, index, domain) {
+			rc = batch_to_domain(&pfns.batch, domain, area,
+					     pfns.batch_start_index);
+			if (rc)
+				goto out_unmap;
+		}
+		done_all_end_index = done_first_end_index;
+
+		rc = pfn_reader_next(&pfns);
+		if (rc)
+			goto out_unmap;
+	}
+	rc = pfn_reader_update_pinned(&pfns);
+	if (rc)
+		goto out_unmap;
+
+	area->storage_domain = xa_load(&area->iopt->domains, 0);
+	interval_tree_insert(&area->pages_node, &pages->domains_itree);
+	goto out_destroy;
+
+out_unmap:
+	pfn_reader_release_pins(&pfns);
+	xa_for_each(&area->iopt->domains, unmap_index, domain) {
+		unsigned long end_index;
+
+		if (unmap_index < index)
+			end_index = done_first_end_index;
+		else
+			end_index = done_all_end_index;
+
+		/*
+		 * The area is not yet part of the domains_itree so we have to
+		 * manage the unpinning specially. The last domain does the
+		 * unpin, every other domain is just unmapped.
+		 */
+		if (unmap_index != area->iopt->next_domain_id - 1) {
+			if (end_index != iopt_area_index(area))
+				iopt_area_unmap_domain_range(
+					area, domain, iopt_area_index(area),
+					end_index - 1);
+		} else {
+			iopt_area_unfill_partial_domain(area, pages, domain,
+							end_index);
+		}
+	}
+out_destroy:
+	pfn_reader_destroy(&pfns);
+out_unlock:
+	mutex_unlock(&pages->mutex);
+	return rc;
+}
+
+/**
+ * iopt_area_unfill_domains() - unmap PFNs from the area's domains
+ * @area: The area to act on
+ * @pages: The pages associated with the area (area->pages is NULL)
+ *
+ * Called during area destruction. This unmaps the iova's covered by all the
+ * area's domains and releases the PFNs.
+ */
+void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages)
+{
+	struct io_pagetable *iopt = area->iopt;
+	struct iommu_domain *domain;
+	unsigned long index;
+
+	lockdep_assert_held(&iopt->domains_rwsem);
+
+	mutex_lock(&pages->mutex);
+	if (!area->storage_domain)
+		goto out_unlock;
+
+	xa_for_each(&iopt->domains, index, domain)
+		if (domain != area->storage_domain)
+			iopt_area_unmap_domain_range(
+				area, domain, iopt_area_index(area),
+				iopt_area_last_index(area));
+
+	interval_tree_remove(&area->pages_node, &pages->domains_itree);
+	iopt_area_unfill_domain(area, pages, area->storage_domain);
+	area->storage_domain = NULL;
+out_unlock:
+	mutex_unlock(&pages->mutex);
+}
+
+static void iopt_pages_unpin_xarray(struct pfn_batch *batch,
+				    struct iopt_pages *pages,
+				    unsigned long start_index,
+				    unsigned long end_index)
+{
+	while (start_index <= end_index) {
+		batch_from_xarray_clear(batch, &pages->pinned_pfns, start_index,
+					end_index);
+		batch_unpin(batch, pages, 0, batch->total_pfns);
+		start_index += batch->total_pfns;
+		batch_clear(batch);
+	}
+}
+
+/**
+ * iopt_pages_unfill_xarray() - Update the xarry after removing an access
+ * @pages: The pages to act on
+ * @start_index: Starting PFN index
+ * @last_index: Last PFN index
+ *
+ * Called when an iopt_pages_access is removed, removes pages from the itree.
+ * The access should already be removed from the access_itree.
+ */
+void iopt_pages_unfill_xarray(struct iopt_pages *pages,
+			      unsigned long start_index,
+			      unsigned long last_index)
+{
+	struct interval_tree_double_span_iter span;
+	u64 backup[BATCH_BACKUP_SIZE];
+	struct pfn_batch batch;
+	bool batch_inited = false;
+
+	lockdep_assert_held(&pages->mutex);
+
+	interval_tree_for_each_double_span(&span, &pages->access_itree,
+					   &pages->domains_itree, start_index,
+					   last_index) {
+		if (!span.is_used) {
+			if (!batch_inited) {
+				batch_init_backup(&batch,
+						  last_index - start_index + 1,
+						  backup, sizeof(backup));
+				batch_inited = true;
+			}
+			iopt_pages_unpin_xarray(&batch, pages, span.start_hole,
+						span.last_hole);
+		} else if (span.is_used == 2) {
+			/* Covered by a domain */
+			clear_xarray(&pages->pinned_pfns, span.start_used,
+				     span.last_used);
+		}
+		/* Otherwise covered by an existing access */
+	}
+	if (batch_inited)
+		batch_destroy(&batch, backup);
+	update_unpinned(pages);
+}
+
+/**
+ * iopt_pages_fill_from_xarray() - Fast path for reading PFNs
+ * @pages: The pages to act on
+ * @start_index: The first page index in the range
+ * @last_index: The last page index in the range
+ * @out_pages: The output array to return the pages
+ *
+ * This can be called if the caller is holding a refcount on an
+ * iopt_pages_access that is known to have already been filled. It quickly reads
+ * the pages directly from the xarray.
+ *
+ * This is part of the SW iommu interface to read pages for in-kernel use.
+ */
+void iopt_pages_fill_from_xarray(struct iopt_pages *pages,
+				 unsigned long start_index,
+				 unsigned long last_index,
+				 struct page **out_pages)
+{
+	XA_STATE(xas, &pages->pinned_pfns, start_index);
+	void *entry;
+
+	rcu_read_lock();
+	while (start_index <= last_index) {
+		entry = xas_next(&xas);
+		if (xas_retry(&xas, entry))
+			continue;
+		WARN_ON(!xa_is_value(entry));
+		*(out_pages++) = pfn_to_page(xa_to_value(entry));
+		start_index++;
+	}
+	rcu_read_unlock();
+}
+
+static int iopt_pages_fill_from_domain(struct iopt_pages *pages,
+				       unsigned long start_index,
+				       unsigned long last_index,
+				       struct page **out_pages)
+{
+	while (start_index != last_index + 1) {
+		unsigned long domain_last;
+		struct iopt_area *area;
+
+		area = iopt_pages_find_domain_area(pages, start_index);
+		if (WARN_ON(!area))
+			return -EINVAL;
+
+		domain_last = min(iopt_area_last_index(area), last_index);
+		out_pages = raw_pages_from_domain(area->storage_domain, area,
+						  start_index, domain_last,
+						  out_pages);
+		start_index = domain_last + 1;
+	}
+	return 0;
+}
+
+static int iopt_pages_fill_from_mm(struct iopt_pages *pages,
+				   struct pfn_reader_user *user,
+				   unsigned long start_index,
+				   unsigned long last_index,
+				   struct page **out_pages)
+{
+	unsigned long cur_index = start_index;
+	int rc;
+
+	while (cur_index != last_index + 1) {
+		user->upages = out_pages + (cur_index - start_index);
+		rc = pfn_reader_user_pin(user, pages, cur_index, last_index);
+		if (rc)
+			goto out_unpin;
+		cur_index = user->upages_end;
+	}
+	return 0;
+
+out_unpin:
+	if (start_index != cur_index)
+		iopt_pages_err_unpin(pages, start_index, cur_index - 1,
+				     out_pages);
+	return rc;
+}
+
+/**
+ * iopt_pages_fill_xarray() - Read PFNs
+ * @pages: The pages to act on
+ * @start_index: The first page index in the range
+ * @last_index: The last page index in the range
+ * @out_pages: The output array to return the pages, may be NULL
+ *
+ * This populates the xarray and returns the pages in out_pages. As the slow
+ * path this is able to copy pages from other storage tiers into the xarray.
+ *
+ * On failure the xarray is left unchanged.
+ *
+ * This is part of the SW iommu interface to read pages for in-kernel use.
+ */
+int iopt_pages_fill_xarray(struct iopt_pages *pages, unsigned long start_index,
+			   unsigned long last_index, struct page **out_pages)
+{
+	struct interval_tree_double_span_iter span;
+	unsigned long xa_end = start_index;
+	struct pfn_reader_user user;
+	int rc;
+
+	lockdep_assert_held(&pages->mutex);
+
+	pfn_reader_user_init(&user, pages);
+	user.upages_len = (last_index - start_index + 1) * sizeof(*out_pages);
+	interval_tree_for_each_double_span(&span, &pages->access_itree,
+					   &pages->domains_itree, start_index,
+					   last_index) {
+		struct page **cur_pages;
+
+		if (span.is_used == 1) {
+			cur_pages = out_pages + (span.start_used - start_index);
+			iopt_pages_fill_from_xarray(pages, span.start_used,
+						    span.last_used, cur_pages);
+			continue;
+		}
+
+		if (span.is_used == 2) {
+			cur_pages = out_pages + (span.start_used - start_index);
+			iopt_pages_fill_from_domain(pages, span.start_used,
+						    span.last_used, cur_pages);
+			rc = pages_to_xarray(&pages->pinned_pfns,
+					     span.start_used, span.last_used,
+					     cur_pages);
+			if (rc)
+				goto out_clean_xa;
+			xa_end = span.last_used + 1;
+			continue;
+		}
+
+		/* hole */
+		cur_pages = out_pages + (span.start_hole - start_index);
+		rc = iopt_pages_fill_from_mm(pages, &user, span.start_hole,
+					     span.last_hole, cur_pages);
+		if (rc)
+			goto out_clean_xa;
+		rc = pages_to_xarray(&pages->pinned_pfns, span.start_hole,
+				     span.last_hole, cur_pages);
+		if (rc) {
+			iopt_pages_err_unpin(pages, span.start_hole,
+					     span.last_hole, cur_pages);
+			goto out_clean_xa;
+		}
+		xa_end = span.last_hole + 1;
+	}
+	rc = pfn_reader_user_update_pinned(&user, pages);
+	if (rc)
+		goto out_clean_xa;
+	user.upages = NULL;
+	pfn_reader_user_destroy(&user, pages);
+	return 0;
+
+out_clean_xa:
+	if (start_index != xa_end)
+		iopt_pages_unfill_xarray(pages, start_index, xa_end - 1);
+	user.upages = NULL;
+	pfn_reader_user_destroy(&user, pages);
+	return rc;
+}
+
+/*
+ * This uses the pfn_reader instead of taking a shortcut by using the mm. It can
+ * do every scenario and is fully consistent with what an iommu_domain would
+ * see.
+ */
+static int iopt_pages_rw_slow(struct iopt_pages *pages,
+			      unsigned long start_index,
+			      unsigned long last_index, unsigned long offset,
+			      void *data, unsigned long length,
+			      unsigned int flags)
+{
+	struct pfn_reader pfns;
+	int rc;
+
+	mutex_lock(&pages->mutex);
+
+	rc = pfn_reader_first(&pfns, pages, start_index, last_index);
+	if (rc)
+		goto out_unlock;
+
+	while (!pfn_reader_done(&pfns)) {
+		unsigned long done;
+
+		done = batch_rw(&pfns.batch, data, offset, length, flags);
+		data += done;
+		length -= done;
+		offset = 0;
+		pfn_reader_unpin(&pfns);
+
+		rc = pfn_reader_next(&pfns);
+		if (rc)
+			goto out_destroy;
+	}
+	if (WARN_ON(length != 0))
+		rc = -EINVAL;
+out_destroy:
+	pfn_reader_destroy(&pfns);
+out_unlock:
+	mutex_unlock(&pages->mutex);
+	return rc;
+}
+
+/*
+ * A medium speed path that still allows DMA inconsistencies, but doesn't do any
+ * memory allocations or interval tree searches.
+ */
+static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
+			      unsigned long offset, void *data,
+			      unsigned long length, unsigned int flags)
+{
+	struct page *page = NULL;
+	int rc;
+
+	if (!mmget_not_zero(pages->source_mm))
+		return iopt_pages_rw_slow(pages, index, index, offset, data,
+					  length, flags);
+
+	mmap_read_lock(pages->source_mm);
+	rc = pin_user_pages_remote(
+		pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
+		1, (flags & IOMMUFD_ACCESS_RW_WRITE) ? FOLL_WRITE : 0, &page,
+		NULL, NULL);
+	mmap_read_unlock(pages->source_mm);
+	if (rc != 1) {
+		if (WARN_ON(rc >= 0))
+			rc = -EINVAL;
+		goto out_mmput;
+	}
+	copy_data_page(page, data, offset, length, flags);
+	unpin_user_page(page);
+	rc = 0;
+
+out_mmput:
+	mmput(pages->source_mm);
+	return rc;
+}
+
+/**
+ * iopt_pages_rw_access - Copy to/from a linear slice of the pages
+ * @pages: pages to act on
+ * @start_byte: First byte of pages to copy to/from
+ * @data: Kernel buffer to get/put the data
+ * @length: Number of bytes to copy
+ * @flags: IOMMUFD_ACCESS_RW_* flags
+ *
+ * This will find each page in the range, kmap it and then memcpy to/from
+ * the given kernel buffer.
+ */
+int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
+			 void *data, unsigned long length, unsigned int flags)
+{
+	unsigned long start_index = start_byte / PAGE_SIZE;
+	unsigned long last_index = (start_byte + length - 1) / PAGE_SIZE;
+	bool change_mm = current->mm != pages->source_mm;
+	int rc = 0;
+
+	if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable)
+		return -EPERM;
+
+	if (!(flags & IOMMUFD_ACCESS_RW_KTHREAD) && change_mm) {
+		if (start_index == last_index)
+			return iopt_pages_rw_page(pages, start_index,
+						  start_byte % PAGE_SIZE, data,
+						  length, flags);
+		return iopt_pages_rw_slow(pages, start_index, last_index,
+					  start_byte % PAGE_SIZE, data, length,
+					  flags);
+	}
+
+	/*
+	 * Try to copy using copy_to_user(). We do this as a fast path and
+	 * ignore any pinning inconsistencies, unlike a real DMA path.
+	 */
+	if (change_mm) {
+		if (!mmget_not_zero(pages->source_mm))
+			return iopt_pages_rw_slow(pages, start_index,
+						  last_index,
+						  start_byte % PAGE_SIZE, data,
+						  length, flags);
+		kthread_use_mm(pages->source_mm);
+	}
+
+	if (flags & IOMMUFD_ACCESS_RW_WRITE) {
+		if (copy_to_user(pages->uptr + start_byte, data, length))
+			rc = -EFAULT;
+	} else {
+		if (copy_from_user(data, pages->uptr + start_byte, length))
+			rc = -EFAULT;
+	}
+
+	if (change_mm) {
+		kthread_unuse_mm(pages->source_mm);
+		mmput(pages->source_mm);
+	}
+
+	return rc;
+}
+
+static struct iopt_pages_access *
+iopt_pages_get_exact_access(struct iopt_pages *pages, unsigned long index,
+			    unsigned long last)
+{
+	struct interval_tree_node *node;
+
+	lockdep_assert_held(&pages->mutex);
+
+	/* There can be overlapping ranges in this interval tree */
+	for (node = interval_tree_iter_first(&pages->access_itree, index, last);
+	     node; node = interval_tree_iter_next(node, index, last))
+		if (node->start == index && node->last == last)
+			return container_of(node, struct iopt_pages_access,
+					    node);
+	return NULL;
+}
+
+/**
+ * iopt_area_add_access() - Record an in-knerel access for PFNs
+ * @area: The source of PFNs
+ * @start_index: First page index
+ * @last_index: Inclusive last page index
+ * @out_pages: Output list of struct page's representing the PFNs
+ * @flags: IOMMUFD_ACCESS_RW_* flags
+ *
+ * Record that an in-kernel access will be accessing the pages, ensure they are
+ * pinned, and return the PFNs as a simple list of 'struct page *'.
+ *
+ * This should be undone through a matching call to iopt_area_remove_access()
+ */
+int iopt_area_add_access(struct iopt_area *area, unsigned long start_index,
+			  unsigned long last_index, struct page **out_pages,
+			  unsigned int flags)
+{
+	struct iopt_pages *pages = area->pages;
+	struct iopt_pages_access *access;
+	int rc;
+
+	if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable)
+		return -EPERM;
+
+	mutex_lock(&pages->mutex);
+	access = iopt_pages_get_exact_access(pages, start_index, last_index);
+	if (access) {
+		area->num_accesses++;
+		access->users++;
+		iopt_pages_fill_from_xarray(pages, start_index, last_index,
+					    out_pages);
+		mutex_unlock(&pages->mutex);
+		return 0;
+	}
+
+	access = kzalloc(sizeof(*access), GFP_KERNEL_ACCOUNT);
+	if (!access) {
+		rc = -ENOMEM;
+		goto err_unlock;
+	}
+
+	rc = iopt_pages_fill_xarray(pages, start_index, last_index, out_pages);
+	if (rc)
+		goto err_free;
+
+	access->node.start = start_index;
+	access->node.last = last_index;
+	access->users = 1;
+	area->num_accesses++;
+	interval_tree_insert(&access->node, &pages->access_itree);
+	mutex_unlock(&pages->mutex);
+	return 0;
+
+err_free:
+	kfree(access);
+err_unlock:
+	mutex_unlock(&pages->mutex);
+	return rc;
+}
+
+/**
+ * iopt_area_remove_access() - Release an in-kernel access for PFNs
+ * @area: The source of PFNs
+ * @start_index: First page index
+ * @last_index: Inclusive last page index
+ *
+ * Undo iopt_area_add_access() and unpin the pages if necessary. The caller
+ * must stop using the PFNs before calling this.
+ */
+void iopt_area_remove_access(struct iopt_area *area, unsigned long start_index,
+			     unsigned long last_index)
+{
+	struct iopt_pages *pages = area->pages;
+	struct iopt_pages_access *access;
+
+	mutex_lock(&pages->mutex);
+	access = iopt_pages_get_exact_access(pages, start_index, last_index);
+	if (WARN_ON(!access))
+		goto out_unlock;
+
+	WARN_ON(area->num_accesses == 0 || access->users == 0);
+	area->num_accesses--;
+	access->users--;
+	if (access->users)
+		goto out_unlock;
+
+	interval_tree_remove(&access->node, &pages->access_itree);
+	iopt_pages_unfill_xarray(pages, start_index, last_index);
+	kfree(access);
+out_unlock:
+	mutex_unlock(&pages->mutex);
+}
-- 
GitLab


From 51fe6141f0f64ae0bbc096a41a07572273e8c0ef Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:33 -0400
Subject: [PATCH 3129/4122] iommufd: Data structure to provide IOVA to PFN
 mapping

This is the remainder of the IOAS data structure. Provide an object called
an io_pagetable that is composed of iopt_areas pointing at iopt_pages,
along with a list of iommu_domains that mirror the IOVA to PFN map.

At the top this is a simple interval tree of iopt_areas indicating the map
of IOVA to iopt_pages. An xarray keeps track of a list of domains. Based
on the attached domains there is a minimum alignment for areas (which may
be smaller than PAGE_SIZE), an interval tree of reserved IOVA that can't
be mapped and an IOVA of allowed IOVA that can always be mappable.

The concept of an 'access' refers to something like a VFIO mdev that is
accessing the IOVA and using a 'struct page *' for CPU based access.

Externally an API is provided that matches the requirements of the IOCTL
interface for map/unmap and domain attachment.

The API provides a 'copy' primitive to establish a new IOVA map in a
different IOAS from an existing mapping by re-using the iopt_pages. This
is the basic mechanism to provide single pinning.

This is designed to support a pre-registration flow where userspace would
setup an dummy IOAS with no domains, map in memory and then establish an
access to pin all PFNs into the xarray.

Copy can then be used to create new IOVA mappings in a different IOAS,
with iommu_domains attached. Upon copy the PFNs will be read out of the
xarray and mapped into the iommu_domains, avoiding any pin_user_pages()
overheads.

Link: https://lore.kernel.org/r/10-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 .clang-format                           |    1 +
 drivers/iommu/iommufd/Makefile          |    1 +
 drivers/iommu/iommufd/io_pagetable.c    | 1186 +++++++++++++++++++++++
 drivers/iommu/iommufd/io_pagetable.h    |   55 ++
 drivers/iommu/iommufd/iommufd_private.h |   52 +
 5 files changed, 1295 insertions(+)
 create mode 100644 drivers/iommu/iommufd/io_pagetable.c

diff --git a/.clang-format b/.clang-format
index 501241f897766..78aba4a10b1bb 100644
--- a/.clang-format
+++ b/.clang-format
@@ -444,6 +444,7 @@ ForEachMacros:
   - 'interval_tree_for_each_span'
   - 'intlist__for_each_entry'
   - 'intlist__for_each_entry_safe'
+  - 'iopt_for_each_contig_area'
   - 'kcore_copy__for_each_phdr'
   - 'key_for_each'
   - 'key_for_each_safe'
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 05a0e91e30afa..b66a8c47ff55e 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
+	io_pagetable.o \
 	main.o \
 	pages.o
 
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
new file mode 100644
index 0000000000000..756d347948f0e
--- /dev/null
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -0,0 +1,1186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ * The io_pagetable is the top of datastructure that maps IOVA's to PFNs. The
+ * PFNs can be placed into an iommu_domain, or returned to the caller as a page
+ * list for access by an in-kernel user.
+ *
+ * The datastructure uses the iopt_pages to optimize the storage of the PFNs
+ * between the domains and xarray.
+ */
+#include <linux/iommufd.h>
+#include <linux/lockdep.h>
+#include <linux/iommu.h>
+#include <linux/sched/mm.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+
+#include "io_pagetable.h"
+#include "double_span.h"
+
+struct iopt_pages_list {
+	struct iopt_pages *pages;
+	struct iopt_area *area;
+	struct list_head next;
+	unsigned long start_byte;
+	unsigned long length;
+};
+
+struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
+					struct io_pagetable *iopt,
+					unsigned long iova,
+					unsigned long last_iova)
+{
+	lockdep_assert_held(&iopt->iova_rwsem);
+
+	iter->cur_iova = iova;
+	iter->last_iova = last_iova;
+	iter->area = iopt_area_iter_first(iopt, iova, iova);
+	if (!iter->area)
+		return NULL;
+	if (!iter->area->pages) {
+		iter->area = NULL;
+		return NULL;
+	}
+	return iter->area;
+}
+
+struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter)
+{
+	unsigned long last_iova;
+
+	if (!iter->area)
+		return NULL;
+	last_iova = iopt_area_last_iova(iter->area);
+	if (iter->last_iova <= last_iova)
+		return NULL;
+
+	iter->cur_iova = last_iova + 1;
+	iter->area = iopt_area_iter_next(iter->area, iter->cur_iova,
+					 iter->last_iova);
+	if (!iter->area)
+		return NULL;
+	if (iter->cur_iova != iopt_area_iova(iter->area) ||
+	    !iter->area->pages) {
+		iter->area = NULL;
+		return NULL;
+	}
+	return iter->area;
+}
+
+static bool __alloc_iova_check_hole(struct interval_tree_double_span_iter *span,
+				    unsigned long length,
+				    unsigned long iova_alignment,
+				    unsigned long page_offset)
+{
+	if (span->is_used || span->last_hole - span->start_hole < length - 1)
+		return false;
+
+	span->start_hole = ALIGN(span->start_hole, iova_alignment) |
+			   page_offset;
+	if (span->start_hole > span->last_hole ||
+	    span->last_hole - span->start_hole < length - 1)
+		return false;
+	return true;
+}
+
+static bool __alloc_iova_check_used(struct interval_tree_span_iter *span,
+				    unsigned long length,
+				    unsigned long iova_alignment,
+				    unsigned long page_offset)
+{
+	if (span->is_hole || span->last_used - span->start_used < length - 1)
+		return false;
+
+	span->start_used = ALIGN(span->start_used, iova_alignment) |
+			   page_offset;
+	if (span->start_used > span->last_used ||
+	    span->last_used - span->start_used < length - 1)
+		return false;
+	return true;
+}
+
+/*
+ * Automatically find a block of IOVA that is not being used and not reserved.
+ * Does not return a 0 IOVA even if it is valid.
+ */
+static int iopt_alloc_iova(struct io_pagetable *iopt, unsigned long *iova,
+			   unsigned long uptr, unsigned long length)
+{
+	unsigned long page_offset = uptr % PAGE_SIZE;
+	struct interval_tree_double_span_iter used_span;
+	struct interval_tree_span_iter allowed_span;
+	unsigned long iova_alignment;
+
+	lockdep_assert_held(&iopt->iova_rwsem);
+
+	/* Protect roundup_pow-of_two() from overflow */
+	if (length == 0 || length >= ULONG_MAX / 2)
+		return -EOVERFLOW;
+
+	/*
+	 * Keep alignment present in the uptr when building the IOVA, this
+	 * increases the chance we can map a THP.
+	 */
+	if (!uptr)
+		iova_alignment = roundup_pow_of_two(length);
+	else
+		iova_alignment = min_t(unsigned long,
+				       roundup_pow_of_two(length),
+				       1UL << __ffs64(uptr));
+
+	if (iova_alignment < iopt->iova_alignment)
+		return -EINVAL;
+
+	interval_tree_for_each_span(&allowed_span, &iopt->allowed_itree,
+				    PAGE_SIZE, ULONG_MAX - PAGE_SIZE) {
+		if (RB_EMPTY_ROOT(&iopt->allowed_itree.rb_root)) {
+			allowed_span.start_used = PAGE_SIZE;
+			allowed_span.last_used = ULONG_MAX - PAGE_SIZE;
+			allowed_span.is_hole = false;
+		}
+
+		if (!__alloc_iova_check_used(&allowed_span, length,
+					     iova_alignment, page_offset))
+			continue;
+
+		interval_tree_for_each_double_span(
+			&used_span, &iopt->reserved_itree, &iopt->area_itree,
+			allowed_span.start_used, allowed_span.last_used) {
+			if (!__alloc_iova_check_hole(&used_span, length,
+						     iova_alignment,
+						     page_offset))
+				continue;
+
+			*iova = used_span.start_hole;
+			return 0;
+		}
+	}
+	return -ENOSPC;
+}
+
+static int iopt_check_iova(struct io_pagetable *iopt, unsigned long iova,
+			   unsigned long length)
+{
+	unsigned long last;
+
+	lockdep_assert_held(&iopt->iova_rwsem);
+
+	if ((iova & (iopt->iova_alignment - 1)))
+		return -EINVAL;
+
+	if (check_add_overflow(iova, length - 1, &last))
+		return -EOVERFLOW;
+
+	/* No reserved IOVA intersects the range */
+	if (iopt_reserved_iter_first(iopt, iova, last))
+		return -EINVAL;
+
+	/* Check that there is not already a mapping in the range */
+	if (iopt_area_iter_first(iopt, iova, last))
+		return -EEXIST;
+	return 0;
+}
+
+/*
+ * The area takes a slice of the pages from start_bytes to start_byte + length
+ */
+static int iopt_insert_area(struct io_pagetable *iopt, struct iopt_area *area,
+			    struct iopt_pages *pages, unsigned long iova,
+			    unsigned long start_byte, unsigned long length,
+			    int iommu_prot)
+{
+	lockdep_assert_held_write(&iopt->iova_rwsem);
+
+	if ((iommu_prot & IOMMU_WRITE) && !pages->writable)
+		return -EPERM;
+
+	area->iommu_prot = iommu_prot;
+	area->page_offset = start_byte % PAGE_SIZE;
+	if (area->page_offset & (iopt->iova_alignment - 1))
+		return -EINVAL;
+
+	area->node.start = iova;
+	if (check_add_overflow(iova, length - 1, &area->node.last))
+		return -EOVERFLOW;
+
+	area->pages_node.start = start_byte / PAGE_SIZE;
+	if (check_add_overflow(start_byte, length - 1, &area->pages_node.last))
+		return -EOVERFLOW;
+	area->pages_node.last = area->pages_node.last / PAGE_SIZE;
+	if (WARN_ON(area->pages_node.last >= pages->npages))
+		return -EOVERFLOW;
+
+	/*
+	 * The area is inserted with a NULL pages indicating it is not fully
+	 * initialized yet.
+	 */
+	area->iopt = iopt;
+	interval_tree_insert(&area->node, &iopt->area_itree);
+	return 0;
+}
+
+static int iopt_alloc_area_pages(struct io_pagetable *iopt,
+				 struct list_head *pages_list,
+				 unsigned long length, unsigned long *dst_iova,
+				 int iommu_prot, unsigned int flags)
+{
+	struct iopt_pages_list *elm;
+	unsigned long iova;
+	int rc = 0;
+
+	list_for_each_entry(elm, pages_list, next) {
+		elm->area = kzalloc(sizeof(*elm->area), GFP_KERNEL_ACCOUNT);
+		if (!elm->area)
+			return -ENOMEM;
+	}
+
+	down_write(&iopt->iova_rwsem);
+	if ((length & (iopt->iova_alignment - 1)) || !length) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	if (flags & IOPT_ALLOC_IOVA) {
+		/* Use the first entry to guess the ideal IOVA alignment */
+		elm = list_first_entry(pages_list, struct iopt_pages_list,
+				       next);
+		rc = iopt_alloc_iova(
+			iopt, dst_iova,
+			(uintptr_t)elm->pages->uptr + elm->start_byte, length);
+		if (rc)
+			goto out_unlock;
+	} else {
+		rc = iopt_check_iova(iopt, *dst_iova, length);
+		if (rc)
+			goto out_unlock;
+	}
+
+	/*
+	 * Areas are created with a NULL pages so that the IOVA space is
+	 * reserved and we can unlock the iova_rwsem.
+	 */
+	iova = *dst_iova;
+	list_for_each_entry(elm, pages_list, next) {
+		rc = iopt_insert_area(iopt, elm->area, elm->pages, iova,
+				      elm->start_byte, elm->length, iommu_prot);
+		if (rc)
+			goto out_unlock;
+		iova += elm->length;
+	}
+
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	return rc;
+}
+
+static void iopt_abort_area(struct iopt_area *area)
+{
+	if (area->iopt) {
+		down_write(&area->iopt->iova_rwsem);
+		interval_tree_remove(&area->node, &area->iopt->area_itree);
+		up_write(&area->iopt->iova_rwsem);
+	}
+	kfree(area);
+}
+
+void iopt_free_pages_list(struct list_head *pages_list)
+{
+	struct iopt_pages_list *elm;
+
+	while ((elm = list_first_entry_or_null(pages_list,
+					       struct iopt_pages_list, next))) {
+		if (elm->area)
+			iopt_abort_area(elm->area);
+		if (elm->pages)
+			iopt_put_pages(elm->pages);
+		list_del(&elm->next);
+		kfree(elm);
+	}
+}
+
+static int iopt_fill_domains_pages(struct list_head *pages_list)
+{
+	struct iopt_pages_list *undo_elm;
+	struct iopt_pages_list *elm;
+	int rc;
+
+	list_for_each_entry(elm, pages_list, next) {
+		rc = iopt_area_fill_domains(elm->area, elm->pages);
+		if (rc)
+			goto err_undo;
+	}
+	return 0;
+
+err_undo:
+	list_for_each_entry(undo_elm, pages_list, next) {
+		if (undo_elm == elm)
+			break;
+		iopt_area_unfill_domains(undo_elm->area, undo_elm->pages);
+	}
+	return rc;
+}
+
+int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
+		   unsigned long length, unsigned long *dst_iova,
+		   int iommu_prot, unsigned int flags)
+{
+	struct iopt_pages_list *elm;
+	int rc;
+
+	rc = iopt_alloc_area_pages(iopt, pages_list, length, dst_iova,
+				   iommu_prot, flags);
+	if (rc)
+		return rc;
+
+	down_read(&iopt->domains_rwsem);
+	rc = iopt_fill_domains_pages(pages_list);
+	if (rc)
+		goto out_unlock_domains;
+
+	down_write(&iopt->iova_rwsem);
+	list_for_each_entry(elm, pages_list, next) {
+		/*
+		 * area->pages must be set inside the domains_rwsem to ensure
+		 * any newly added domains will get filled. Moves the reference
+		 * in from the list.
+		 */
+		elm->area->pages = elm->pages;
+		elm->pages = NULL;
+		elm->area = NULL;
+	}
+	up_write(&iopt->iova_rwsem);
+out_unlock_domains:
+	up_read(&iopt->domains_rwsem);
+	return rc;
+}
+
+/**
+ * iopt_map_user_pages() - Map a user VA to an iova in the io page table
+ * @ictx: iommufd_ctx the iopt is part of
+ * @iopt: io_pagetable to act on
+ * @iova: If IOPT_ALLOC_IOVA is set this is unused on input and contains
+ *        the chosen iova on output. Otherwise is the iova to map to on input
+ * @uptr: User VA to map
+ * @length: Number of bytes to map
+ * @iommu_prot: Combination of IOMMU_READ/WRITE/etc bits for the mapping
+ * @flags: IOPT_ALLOC_IOVA or zero
+ *
+ * iova, uptr, and length must be aligned to iova_alignment. For domain backed
+ * page tables this will pin the pages and load them into the domain at iova.
+ * For non-domain page tables this will only setup a lazy reference and the
+ * caller must use iopt_access_pages() to touch them.
+ *
+ * iopt_unmap_iova() must be called to undo this before the io_pagetable can be
+ * destroyed.
+ */
+int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
+			unsigned long *iova, void __user *uptr,
+			unsigned long length, int iommu_prot,
+			unsigned int flags)
+{
+	struct iopt_pages_list elm = {};
+	LIST_HEAD(pages_list);
+	int rc;
+
+	elm.pages = iopt_alloc_pages(uptr, length, iommu_prot & IOMMU_WRITE);
+	if (IS_ERR(elm.pages))
+		return PTR_ERR(elm.pages);
+	if (ictx->account_mode == IOPT_PAGES_ACCOUNT_MM &&
+	    elm.pages->account_mode == IOPT_PAGES_ACCOUNT_USER)
+		elm.pages->account_mode = IOPT_PAGES_ACCOUNT_MM;
+	elm.start_byte = uptr - elm.pages->uptr;
+	elm.length = length;
+	list_add(&elm.next, &pages_list);
+
+	rc = iopt_map_pages(iopt, &pages_list, length, iova, iommu_prot, flags);
+	if (rc) {
+		if (elm.area)
+			iopt_abort_area(elm.area);
+		if (elm.pages)
+			iopt_put_pages(elm.pages);
+		return rc;
+	}
+	return 0;
+}
+
+int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
+		   unsigned long length, struct list_head *pages_list)
+{
+	struct iopt_area_contig_iter iter;
+	unsigned long last_iova;
+	struct iopt_area *area;
+	int rc;
+
+	if (!length)
+		return -EINVAL;
+	if (check_add_overflow(iova, length - 1, &last_iova))
+		return -EOVERFLOW;
+
+	down_read(&iopt->iova_rwsem);
+	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
+		struct iopt_pages_list *elm;
+		unsigned long last = min(last_iova, iopt_area_last_iova(area));
+
+		elm = kzalloc(sizeof(*elm), GFP_KERNEL_ACCOUNT);
+		if (!elm) {
+			rc = -ENOMEM;
+			goto err_free;
+		}
+		elm->start_byte = iopt_area_start_byte(area, iter.cur_iova);
+		elm->pages = area->pages;
+		elm->length = (last - iter.cur_iova) + 1;
+		kref_get(&elm->pages->kref);
+		list_add_tail(&elm->next, pages_list);
+	}
+	if (!iopt_area_contig_done(&iter)) {
+		rc = -ENOENT;
+		goto err_free;
+	}
+	up_read(&iopt->iova_rwsem);
+	return 0;
+err_free:
+	up_read(&iopt->iova_rwsem);
+	iopt_free_pages_list(pages_list);
+	return rc;
+}
+
+static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
+				 unsigned long last, unsigned long *unmapped)
+{
+	struct iopt_area *area;
+	unsigned long unmapped_bytes = 0;
+	int rc = -ENOENT;
+
+	/*
+	 * The domains_rwsem must be held in read mode any time any area->pages
+	 * is NULL. This prevents domain attach/detatch from running
+	 * concurrently with cleaning up the area.
+	 */
+	down_read(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	while ((area = iopt_area_iter_first(iopt, start, last))) {
+		unsigned long area_last = iopt_area_last_iova(area);
+		unsigned long area_first = iopt_area_iova(area);
+		struct iopt_pages *pages;
+
+		/* Userspace should not race map/unmap's of the same area */
+		if (!area->pages) {
+			rc = -EBUSY;
+			goto out_unlock_iova;
+		}
+
+		if (area_first < start || area_last > last) {
+			rc = -ENOENT;
+			goto out_unlock_iova;
+		}
+
+		/*
+		 * num_accesses writers must hold the iova_rwsem too, so we can
+		 * safely read it under the write side of the iovam_rwsem
+		 * without the pages->mutex.
+		 */
+		if (area->num_accesses) {
+			start = area_first;
+			area->prevent_access = true;
+			up_write(&iopt->iova_rwsem);
+			up_read(&iopt->domains_rwsem);
+			/* Later patch calls back to drivers to unmap */
+			return -EBUSY;
+		}
+
+		pages = area->pages;
+		area->pages = NULL;
+		up_write(&iopt->iova_rwsem);
+
+		iopt_area_unfill_domains(area, pages);
+		iopt_abort_area(area);
+		iopt_put_pages(pages);
+
+		unmapped_bytes += area_last - area_first + 1;
+
+		down_write(&iopt->iova_rwsem);
+	}
+	if (unmapped_bytes)
+		rc = 0;
+
+out_unlock_iova:
+	up_write(&iopt->iova_rwsem);
+	up_read(&iopt->domains_rwsem);
+	if (unmapped)
+		*unmapped = unmapped_bytes;
+	return rc;
+}
+
+/**
+ * iopt_unmap_iova() - Remove a range of iova
+ * @iopt: io_pagetable to act on
+ * @iova: Starting iova to unmap
+ * @length: Number of bytes to unmap
+ * @unmapped: Return number of bytes unmapped
+ *
+ * The requested range must be a superset of existing ranges.
+ * Splitting/truncating IOVA mappings is not allowed.
+ */
+int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
+		    unsigned long length, unsigned long *unmapped)
+{
+	unsigned long iova_last;
+
+	if (!length)
+		return -EINVAL;
+
+	if (check_add_overflow(iova, length - 1, &iova_last))
+		return -EOVERFLOW;
+
+	return iopt_unmap_iova_range(iopt, iova, iova_last, unmapped);
+}
+
+int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped)
+{
+	int rc;
+
+	rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped);
+	/* If the IOVAs are empty then unmap all succeeds */
+	if (rc == -ENOENT)
+		return 0;
+	return rc;
+}
+
+/* The caller must always free all the nodes in the allowed_iova rb_root. */
+int iopt_set_allow_iova(struct io_pagetable *iopt,
+			struct rb_root_cached *allowed_iova)
+{
+	struct iopt_allowed *allowed;
+
+	down_write(&iopt->iova_rwsem);
+	swap(*allowed_iova, iopt->allowed_itree);
+
+	for (allowed = iopt_allowed_iter_first(iopt, 0, ULONG_MAX); allowed;
+	     allowed = iopt_allowed_iter_next(allowed, 0, ULONG_MAX)) {
+		if (iopt_reserved_iter_first(iopt, allowed->node.start,
+					     allowed->node.last)) {
+			swap(*allowed_iova, iopt->allowed_itree);
+			up_write(&iopt->iova_rwsem);
+			return -EADDRINUSE;
+		}
+	}
+	up_write(&iopt->iova_rwsem);
+	return 0;
+}
+
+int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
+		      unsigned long last, void *owner)
+{
+	struct iopt_reserved *reserved;
+
+	lockdep_assert_held_write(&iopt->iova_rwsem);
+
+	if (iopt_area_iter_first(iopt, start, last) ||
+	    iopt_allowed_iter_first(iopt, start, last))
+		return -EADDRINUSE;
+
+	reserved = kzalloc(sizeof(*reserved), GFP_KERNEL_ACCOUNT);
+	if (!reserved)
+		return -ENOMEM;
+	reserved->node.start = start;
+	reserved->node.last = last;
+	reserved->owner = owner;
+	interval_tree_insert(&reserved->node, &iopt->reserved_itree);
+	return 0;
+}
+
+static void __iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
+{
+	struct iopt_reserved *reserved, *next;
+
+	lockdep_assert_held_write(&iopt->iova_rwsem);
+
+	for (reserved = iopt_reserved_iter_first(iopt, 0, ULONG_MAX); reserved;
+	     reserved = next) {
+		next = iopt_reserved_iter_next(reserved, 0, ULONG_MAX);
+
+		if (reserved->owner == owner) {
+			interval_tree_remove(&reserved->node,
+					     &iopt->reserved_itree);
+			kfree(reserved);
+		}
+	}
+}
+
+void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner)
+{
+	down_write(&iopt->iova_rwsem);
+	__iopt_remove_reserved_iova(iopt, owner);
+	up_write(&iopt->iova_rwsem);
+}
+
+void iopt_init_table(struct io_pagetable *iopt)
+{
+	init_rwsem(&iopt->iova_rwsem);
+	init_rwsem(&iopt->domains_rwsem);
+	iopt->area_itree = RB_ROOT_CACHED;
+	iopt->allowed_itree = RB_ROOT_CACHED;
+	iopt->reserved_itree = RB_ROOT_CACHED;
+	xa_init_flags(&iopt->domains, XA_FLAGS_ACCOUNT);
+	xa_init_flags(&iopt->access_list, XA_FLAGS_ALLOC);
+
+	/*
+	 * iopt's start as SW tables that can use the entire size_t IOVA space
+	 * due to the use of size_t in the APIs. They have no alignment
+	 * restriction.
+	 */
+	iopt->iova_alignment = 1;
+}
+
+void iopt_destroy_table(struct io_pagetable *iopt)
+{
+	struct interval_tree_node *node;
+
+	while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
+						ULONG_MAX))) {
+		interval_tree_remove(node, &iopt->allowed_itree);
+		kfree(container_of(node, struct iopt_allowed, node));
+	}
+
+	WARN_ON(!RB_EMPTY_ROOT(&iopt->reserved_itree.rb_root));
+	WARN_ON(!xa_empty(&iopt->domains));
+	WARN_ON(!xa_empty(&iopt->access_list));
+	WARN_ON(!RB_EMPTY_ROOT(&iopt->area_itree.rb_root));
+}
+
+/**
+ * iopt_unfill_domain() - Unfill a domain with PFNs
+ * @iopt: io_pagetable to act on
+ * @domain: domain to unfill
+ *
+ * This is used when removing a domain from the iopt. Every area in the iopt
+ * will be unmapped from the domain. The domain must already be removed from the
+ * domains xarray.
+ */
+static void iopt_unfill_domain(struct io_pagetable *iopt,
+			       struct iommu_domain *domain)
+{
+	struct iopt_area *area;
+
+	lockdep_assert_held(&iopt->iova_rwsem);
+	lockdep_assert_held_write(&iopt->domains_rwsem);
+
+	/*
+	 * Some other domain is holding all the pfns still, rapidly unmap this
+	 * domain.
+	 */
+	if (iopt->next_domain_id != 0) {
+		/* Pick an arbitrary remaining domain to act as storage */
+		struct iommu_domain *storage_domain =
+			xa_load(&iopt->domains, 0);
+
+		for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+		     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+			struct iopt_pages *pages = area->pages;
+
+			if (!pages)
+				continue;
+
+			mutex_lock(&pages->mutex);
+			if (area->storage_domain == domain)
+				area->storage_domain = storage_domain;
+			mutex_unlock(&pages->mutex);
+
+			iopt_area_unmap_domain(area, domain);
+		}
+		return;
+	}
+
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+		struct iopt_pages *pages = area->pages;
+
+		if (!pages)
+			continue;
+
+		mutex_lock(&pages->mutex);
+		interval_tree_remove(&area->pages_node, &pages->domains_itree);
+		WARN_ON(area->storage_domain != domain);
+		area->storage_domain = NULL;
+		iopt_area_unfill_domain(area, pages, domain);
+		mutex_unlock(&pages->mutex);
+	}
+}
+
+/**
+ * iopt_fill_domain() - Fill a domain with PFNs
+ * @iopt: io_pagetable to act on
+ * @domain: domain to fill
+ *
+ * Fill the domain with PFNs from every area in the iopt. On failure the domain
+ * is left unchanged.
+ */
+static int iopt_fill_domain(struct io_pagetable *iopt,
+			    struct iommu_domain *domain)
+{
+	struct iopt_area *end_area;
+	struct iopt_area *area;
+	int rc;
+
+	lockdep_assert_held(&iopt->iova_rwsem);
+	lockdep_assert_held_write(&iopt->domains_rwsem);
+
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+		struct iopt_pages *pages = area->pages;
+
+		if (!pages)
+			continue;
+
+		mutex_lock(&pages->mutex);
+		rc = iopt_area_fill_domain(area, domain);
+		if (rc) {
+			mutex_unlock(&pages->mutex);
+			goto out_unfill;
+		}
+		if (!area->storage_domain) {
+			WARN_ON(iopt->next_domain_id != 0);
+			area->storage_domain = domain;
+			interval_tree_insert(&area->pages_node,
+					     &pages->domains_itree);
+		}
+		mutex_unlock(&pages->mutex);
+	}
+	return 0;
+
+out_unfill:
+	end_area = area;
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+		struct iopt_pages *pages = area->pages;
+
+		if (area == end_area)
+			break;
+		if (!pages)
+			continue;
+		mutex_lock(&pages->mutex);
+		if (iopt->next_domain_id == 0) {
+			interval_tree_remove(&area->pages_node,
+					     &pages->domains_itree);
+			area->storage_domain = NULL;
+		}
+		iopt_area_unfill_domain(area, pages, domain);
+		mutex_unlock(&pages->mutex);
+	}
+	return rc;
+}
+
+/* All existing area's conform to an increased page size */
+static int iopt_check_iova_alignment(struct io_pagetable *iopt,
+				     unsigned long new_iova_alignment)
+{
+	unsigned long align_mask = new_iova_alignment - 1;
+	struct iopt_area *area;
+
+	lockdep_assert_held(&iopt->iova_rwsem);
+	lockdep_assert_held(&iopt->domains_rwsem);
+
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX))
+		if ((iopt_area_iova(area) & align_mask) ||
+		    (iopt_area_length(area) & align_mask) ||
+		    (area->page_offset & align_mask))
+			return -EADDRINUSE;
+	return 0;
+}
+
+int iopt_table_add_domain(struct io_pagetable *iopt,
+			  struct iommu_domain *domain)
+{
+	const struct iommu_domain_geometry *geometry = &domain->geometry;
+	struct iommu_domain *iter_domain;
+	unsigned int new_iova_alignment;
+	unsigned long index;
+	int rc;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+
+	xa_for_each(&iopt->domains, index, iter_domain) {
+		if (WARN_ON(iter_domain == domain)) {
+			rc = -EEXIST;
+			goto out_unlock;
+		}
+	}
+
+	/*
+	 * The io page size drives the iova_alignment. Internally the iopt_pages
+	 * works in PAGE_SIZE units and we adjust when mapping sub-PAGE_SIZE
+	 * objects into the iommu_domain.
+	 *
+	 * A iommu_domain must always be able to accept PAGE_SIZE to be
+	 * compatible as we can't guarantee higher contiguity.
+	 */
+	new_iova_alignment = max_t(unsigned long,
+				   1UL << __ffs(domain->pgsize_bitmap),
+				   iopt->iova_alignment);
+	if (new_iova_alignment > PAGE_SIZE) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+	if (new_iova_alignment != iopt->iova_alignment) {
+		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
+		if (rc)
+			goto out_unlock;
+	}
+
+	/* No area exists that is outside the allowed domain aperture */
+	if (geometry->aperture_start != 0) {
+		rc = iopt_reserve_iova(iopt, 0, geometry->aperture_start - 1,
+				       domain);
+		if (rc)
+			goto out_reserved;
+	}
+	if (geometry->aperture_end != ULONG_MAX) {
+		rc = iopt_reserve_iova(iopt, geometry->aperture_end + 1,
+				       ULONG_MAX, domain);
+		if (rc)
+			goto out_reserved;
+	}
+
+	rc = xa_reserve(&iopt->domains, iopt->next_domain_id, GFP_KERNEL);
+	if (rc)
+		goto out_reserved;
+
+	rc = iopt_fill_domain(iopt, domain);
+	if (rc)
+		goto out_release;
+
+	iopt->iova_alignment = new_iova_alignment;
+	xa_store(&iopt->domains, iopt->next_domain_id, domain, GFP_KERNEL);
+	iopt->next_domain_id++;
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+	return 0;
+out_release:
+	xa_release(&iopt->domains, iopt->next_domain_id);
+out_reserved:
+	__iopt_remove_reserved_iova(iopt, domain);
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+	return rc;
+}
+
+static int iopt_calculate_iova_alignment(struct io_pagetable *iopt)
+{
+	unsigned long new_iova_alignment;
+	struct iommufd_access *access;
+	struct iommu_domain *domain;
+	unsigned long index;
+
+	lockdep_assert_held_write(&iopt->iova_rwsem);
+	lockdep_assert_held(&iopt->domains_rwsem);
+
+	/* See batch_iommu_map_small() */
+	if (iopt->disable_large_pages)
+		new_iova_alignment = PAGE_SIZE;
+	else
+		new_iova_alignment = 1;
+
+	xa_for_each(&iopt->domains, index, domain)
+		new_iova_alignment = max_t(unsigned long,
+					   1UL << __ffs(domain->pgsize_bitmap),
+					   new_iova_alignment);
+	xa_for_each(&iopt->access_list, index, access)
+		new_iova_alignment = max_t(unsigned long,
+					   access->iova_alignment,
+					   new_iova_alignment);
+
+	if (new_iova_alignment > iopt->iova_alignment) {
+		int rc;
+
+		rc = iopt_check_iova_alignment(iopt, new_iova_alignment);
+		if (rc)
+			return rc;
+	}
+	iopt->iova_alignment = new_iova_alignment;
+	return 0;
+}
+
+void iopt_table_remove_domain(struct io_pagetable *iopt,
+			      struct iommu_domain *domain)
+{
+	struct iommu_domain *iter_domain = NULL;
+	unsigned long index;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+
+	xa_for_each(&iopt->domains, index, iter_domain)
+		if (iter_domain == domain)
+			break;
+	if (WARN_ON(iter_domain != domain) || index >= iopt->next_domain_id)
+		goto out_unlock;
+
+	/*
+	 * Compress the xarray to keep it linear by swapping the entry to erase
+	 * with the tail entry and shrinking the tail.
+	 */
+	iopt->next_domain_id--;
+	iter_domain = xa_erase(&iopt->domains, iopt->next_domain_id);
+	if (index != iopt->next_domain_id)
+		xa_store(&iopt->domains, index, iter_domain, GFP_KERNEL);
+
+	iopt_unfill_domain(iopt, domain);
+	__iopt_remove_reserved_iova(iopt, domain);
+
+	WARN_ON(iopt_calculate_iova_alignment(iopt));
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+}
+
+/**
+ * iopt_area_split - Split an area into two parts at iova
+ * @area: The area to split
+ * @iova: Becomes the last of a new area
+ *
+ * This splits an area into two. It is part of the VFIO compatibility to allow
+ * poking a hole in the mapping. The two areas continue to point at the same
+ * iopt_pages, just with different starting bytes.
+ */
+static int iopt_area_split(struct iopt_area *area, unsigned long iova)
+{
+	unsigned long alignment = area->iopt->iova_alignment;
+	unsigned long last_iova = iopt_area_last_iova(area);
+	unsigned long start_iova = iopt_area_iova(area);
+	unsigned long new_start = iova + 1;
+	struct io_pagetable *iopt = area->iopt;
+	struct iopt_pages *pages = area->pages;
+	struct iopt_area *lhs;
+	struct iopt_area *rhs;
+	int rc;
+
+	lockdep_assert_held_write(&iopt->iova_rwsem);
+
+	if (iova == start_iova || iova == last_iova)
+		return 0;
+
+	if (!pages || area->prevent_access)
+		return -EBUSY;
+
+	if (new_start & (alignment - 1) ||
+	    iopt_area_start_byte(area, new_start) & (alignment - 1))
+		return -EINVAL;
+
+	lhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
+	if (!lhs)
+		return -ENOMEM;
+
+	rhs = kzalloc(sizeof(*area), GFP_KERNEL_ACCOUNT);
+	if (!rhs) {
+		rc = -ENOMEM;
+		goto err_free_lhs;
+	}
+
+	mutex_lock(&pages->mutex);
+	/*
+	 * Splitting is not permitted if an access exists, we don't track enough
+	 * information to split existing accesses.
+	 */
+	if (area->num_accesses) {
+		rc = -EINVAL;
+		goto err_unlock;
+	}
+
+	/*
+	 * Splitting is not permitted if a domain could have been mapped with
+	 * huge pages.
+	 */
+	if (area->storage_domain && !iopt->disable_large_pages) {
+		rc = -EINVAL;
+		goto err_unlock;
+	}
+
+	interval_tree_remove(&area->node, &iopt->area_itree);
+	rc = iopt_insert_area(iopt, lhs, area->pages, start_iova,
+			      iopt_area_start_byte(area, start_iova),
+			      (new_start - 1) - start_iova + 1,
+			      area->iommu_prot);
+	if (WARN_ON(rc))
+		goto err_insert;
+
+	rc = iopt_insert_area(iopt, rhs, area->pages, new_start,
+			      iopt_area_start_byte(area, new_start),
+			      last_iova - new_start + 1, area->iommu_prot);
+	if (WARN_ON(rc))
+		goto err_remove_lhs;
+
+	lhs->storage_domain = area->storage_domain;
+	lhs->pages = area->pages;
+	rhs->storage_domain = area->storage_domain;
+	rhs->pages = area->pages;
+	kref_get(&rhs->pages->kref);
+	kfree(area);
+	mutex_unlock(&pages->mutex);
+
+	/*
+	 * No change to domains or accesses because the pages hasn't been
+	 * changed
+	 */
+	return 0;
+
+err_remove_lhs:
+	interval_tree_remove(&lhs->node, &iopt->area_itree);
+err_insert:
+	interval_tree_insert(&area->node, &iopt->area_itree);
+err_unlock:
+	mutex_unlock(&pages->mutex);
+	kfree(rhs);
+err_free_lhs:
+	kfree(lhs);
+	return rc;
+}
+
+int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
+		  size_t num_iovas)
+{
+	int rc = 0;
+	int i;
+
+	down_write(&iopt->iova_rwsem);
+	for (i = 0; i < num_iovas; i++) {
+		struct iopt_area *area;
+
+		area = iopt_area_iter_first(iopt, iovas[i], iovas[i]);
+		if (!area)
+			continue;
+		rc = iopt_area_split(area, iovas[i]);
+		if (rc)
+			break;
+	}
+	up_write(&iopt->iova_rwsem);
+	return rc;
+}
+
+void iopt_enable_large_pages(struct io_pagetable *iopt)
+{
+	int rc;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	WRITE_ONCE(iopt->disable_large_pages, false);
+	rc = iopt_calculate_iova_alignment(iopt);
+	WARN_ON(rc);
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+}
+
+int iopt_disable_large_pages(struct io_pagetable *iopt)
+{
+	int rc = 0;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	if (iopt->disable_large_pages)
+		goto out_unlock;
+
+	/* Won't do it if domains already have pages mapped in them */
+	if (!xa_empty(&iopt->domains) &&
+	    !RB_EMPTY_ROOT(&iopt->area_itree.rb_root)) {
+		rc = -EINVAL;
+		goto out_unlock;
+	}
+
+	WRITE_ONCE(iopt->disable_large_pages, true);
+	rc = iopt_calculate_iova_alignment(iopt);
+	if (rc)
+		WRITE_ONCE(iopt->disable_large_pages, false);
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+	return rc;
+}
+
+int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access)
+{
+	int rc;
+
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	rc = xa_alloc(&iopt->access_list, &access->iopt_access_list_id, access,
+		      xa_limit_16b, GFP_KERNEL_ACCOUNT);
+	if (rc)
+		goto out_unlock;
+
+	rc = iopt_calculate_iova_alignment(iopt);
+	if (rc) {
+		xa_erase(&iopt->access_list, access->iopt_access_list_id);
+		goto out_unlock;
+	}
+
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+	return rc;
+}
+
+void iopt_remove_access(struct io_pagetable *iopt,
+			struct iommufd_access *access)
+{
+	down_write(&iopt->domains_rwsem);
+	down_write(&iopt->iova_rwsem);
+	WARN_ON(xa_erase(&iopt->access_list, access->iopt_access_list_id) !=
+		access);
+	WARN_ON(iopt_calculate_iova_alignment(iopt));
+	up_write(&iopt->iova_rwsem);
+	up_write(&iopt->domains_rwsem);
+}
+
+/* Narrow the valid_iova_itree to include reserved ranges from a group. */
+int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
+					  struct device *device,
+					  struct iommu_group *group,
+					  phys_addr_t *sw_msi_start)
+{
+	struct iommu_resv_region *resv;
+	struct iommu_resv_region *tmp;
+	LIST_HEAD(group_resv_regions);
+	int rc;
+
+	down_write(&iopt->iova_rwsem);
+	rc = iommu_get_group_resv_regions(group, &group_resv_regions);
+	if (rc)
+		goto out_unlock;
+
+	list_for_each_entry(resv, &group_resv_regions, list) {
+		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
+			continue;
+
+		/*
+		 * The presence of any 'real' MSI regions should take precedence
+		 * over the software-managed one if the IOMMU driver happens to
+		 * advertise both types.
+		 */
+		if (sw_msi_start && resv->type == IOMMU_RESV_MSI) {
+			*sw_msi_start = 0;
+			sw_msi_start = NULL;
+		}
+		if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI)
+			*sw_msi_start = resv->start;
+
+		rc = iopt_reserve_iova(iopt, resv->start,
+				       resv->length - 1 + resv->start, device);
+		if (rc)
+			goto out_reserved;
+	}
+	rc = 0;
+	goto out_free_resv;
+
+out_reserved:
+	__iopt_remove_reserved_iova(iopt, device);
+out_free_resv:
+	list_for_each_entry_safe(resv, tmp, &group_resv_regions, list)
+		kfree(resv);
+out_unlock:
+	up_write(&iopt->iova_rwsem);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index a2b724175057b..2ee6942c3ef4a 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -46,9 +46,19 @@ struct iopt_area {
 	unsigned int page_offset;
 	/* IOMMU_READ, IOMMU_WRITE, etc */
 	int iommu_prot;
+	bool prevent_access : 1;
 	unsigned int num_accesses;
 };
 
+struct iopt_allowed {
+	struct interval_tree_node node;
+};
+
+struct iopt_reserved {
+	struct interval_tree_node node;
+	void *owner;
+};
+
 int iopt_area_fill_domains(struct iopt_area *area, struct iopt_pages *pages);
 void iopt_area_unfill_domains(struct iopt_area *area, struct iopt_pages *pages);
 
@@ -83,6 +93,24 @@ static inline size_t iopt_area_length(struct iopt_area *area)
 	return (area->node.last - area->node.start) + 1;
 }
 
+/*
+ * Number of bytes from the start of the iopt_pages that the iova begins.
+ * iopt_area_start_byte() / PAGE_SIZE encodes the starting page index
+ * iopt_area_start_byte() % PAGE_SIZE encodes the offset within that page
+ */
+static inline unsigned long iopt_area_start_byte(struct iopt_area *area,
+						 unsigned long iova)
+{
+	return (iova - iopt_area_iova(area)) + area->page_offset +
+	       iopt_area_index(area) * PAGE_SIZE;
+}
+
+static inline unsigned long iopt_area_iova_to_index(struct iopt_area *area,
+						    unsigned long iova)
+{
+	return iopt_area_start_byte(area, iova) / PAGE_SIZE;
+}
+
 #define __make_iopt_iter(name)                                                 \
 	static inline struct iopt_##name *iopt_##name##_iter_first(            \
 		struct io_pagetable *iopt, unsigned long start,                \
@@ -110,6 +138,33 @@ static inline size_t iopt_area_length(struct iopt_area *area)
 	}
 
 __make_iopt_iter(area)
+__make_iopt_iter(allowed)
+__make_iopt_iter(reserved)
+
+struct iopt_area_contig_iter {
+	unsigned long cur_iova;
+	unsigned long last_iova;
+	struct iopt_area *area;
+};
+struct iopt_area *iopt_area_contig_init(struct iopt_area_contig_iter *iter,
+					struct io_pagetable *iopt,
+					unsigned long iova,
+					unsigned long last_iova);
+struct iopt_area *iopt_area_contig_next(struct iopt_area_contig_iter *iter);
+
+static inline bool iopt_area_contig_done(struct iopt_area_contig_iter *iter)
+{
+	return iter->area && iter->last_iova <= iopt_area_last_iova(iter->area);
+}
+
+/*
+ * Iterate over a contiguous list of areas that span the iova,last_iova range.
+ * The caller must check iopt_area_contig_done() after the loop to see if
+ * contiguous areas existed.
+ */
+#define iopt_for_each_contig_area(iter, area, iopt, iova, last_iova)          \
+	for (area = iopt_area_contig_init(iter, iopt, iova, last_iova); area; \
+	     area = iopt_area_contig_next(iter))
 
 enum {
 	IOPT_PAGES_ACCOUNT_NONE = 0,
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 169a30ff3bf0d..f7ab6c6edafd1 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -9,9 +9,14 @@
 #include <linux/refcount.h>
 #include <linux/uaccess.h>
 
+struct iommu_domain;
+struct iommu_group;
+
 struct iommufd_ctx {
 	struct file *file;
 	struct xarray objects;
+
+	u8 account_mode;
 };
 
 /*
@@ -27,6 +32,7 @@ struct iommufd_ctx {
 struct io_pagetable {
 	struct rw_semaphore domains_rwsem;
 	struct xarray domains;
+	struct xarray access_list;
 	unsigned int next_domain_id;
 
 	struct rw_semaphore iova_rwsem;
@@ -36,7 +42,45 @@ struct io_pagetable {
 	/* IOVA that cannot be allocated, struct iopt_reserved */
 	struct rb_root_cached reserved_itree;
 	u8 disable_large_pages;
+	unsigned long iova_alignment;
+};
+
+void iopt_init_table(struct io_pagetable *iopt);
+void iopt_destroy_table(struct io_pagetable *iopt);
+int iopt_get_pages(struct io_pagetable *iopt, unsigned long iova,
+		   unsigned long length, struct list_head *pages_list);
+void iopt_free_pages_list(struct list_head *pages_list);
+enum {
+	IOPT_ALLOC_IOVA = 1 << 0,
 };
+int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
+			unsigned long *iova, void __user *uptr,
+			unsigned long length, int iommu_prot,
+			unsigned int flags);
+int iopt_map_pages(struct io_pagetable *iopt, struct list_head *pages_list,
+		   unsigned long length, unsigned long *dst_iova,
+		   int iommu_prot, unsigned int flags);
+int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
+		    unsigned long length, unsigned long *unmapped);
+int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
+
+int iopt_table_add_domain(struct io_pagetable *iopt,
+			  struct iommu_domain *domain);
+void iopt_table_remove_domain(struct io_pagetable *iopt,
+			      struct iommu_domain *domain);
+int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
+					  struct device *device,
+					  struct iommu_group *group,
+					  phys_addr_t *sw_msi_start);
+int iopt_set_allow_iova(struct io_pagetable *iopt,
+			struct rb_root_cached *allowed_iova);
+int iopt_reserve_iova(struct io_pagetable *iopt, unsigned long start,
+		      unsigned long last, void *owner);
+void iopt_remove_reserved_iova(struct io_pagetable *iopt, void *owner);
+int iopt_cut_iova(struct io_pagetable *iopt, unsigned long *iovas,
+		  size_t num_iovas);
+void iopt_enable_large_pages(struct io_pagetable *iopt);
+int iopt_disable_large_pages(struct io_pagetable *iopt);
 
 struct iommufd_ucmd {
 	struct iommufd_ctx *ictx;
@@ -130,4 +174,12 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 			     type),                                            \
 		     typeof(*(ptr)), obj)
 
+struct iommufd_access {
+	unsigned long iova_alignment;
+	u32 iopt_access_list_id;
+};
+
+int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
+void iopt_remove_access(struct io_pagetable *iopt,
+			struct iommufd_access *access);
 #endif
-- 
GitLab


From aad37e71d5c4dc1d3c25734f0bcd51c324f94b5e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:34 -0400
Subject: [PATCH 3130/4122] iommufd: IOCTLs for the io_pagetable

Connect the IOAS to its IOCTL interface. This exposes most of the
functionality in the io_pagetable to userspace.

This is intended to be the core of the generic interface that IOMMUFD will
provide. Every IOMMU driver should be able to implement an iommu_domain
that is compatible with this generic mechanism.

It is also designed to be easy to use for simple non virtual machine
monitor users, like DPDK:
 - Universal simple support for all IOMMUs (no PPC special path)
 - An IOVA allocator that considers the aperture and the allowed/reserved
   ranges
 - io_pagetable allows any number of iommu_domains to be connected to the
   IOAS
 - Automatic allocation and re-use of iommu_domains

Along with room in the design to add non-generic features to cater to
specific HW functionality.

Link: https://lore.kernel.org/r/11-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |   1 +
 drivers/iommu/iommufd/ioas.c            | 392 ++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h |  33 ++
 drivers/iommu/iommufd/main.c            |  48 +++
 include/uapi/linux/iommufd.h            | 258 +++++++++++++++-
 5 files changed, 731 insertions(+), 1 deletion(-)
 create mode 100644 drivers/iommu/iommufd/ioas.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index b66a8c47ff55e..2b4f36f1b72f9 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
 	io_pagetable.o \
+	ioas.o \
 	main.o \
 	pages.o
 
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
new file mode 100644
index 0000000000000..6ff97dafc8913
--- /dev/null
+++ b/drivers/iommu/iommufd/ioas.c
@@ -0,0 +1,392 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/interval_tree.h>
+#include <linux/iommufd.h>
+#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+
+#include "io_pagetable.h"
+
+void iommufd_ioas_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj);
+	int rc;
+
+	rc = iopt_unmap_all(&ioas->iopt, NULL);
+	WARN_ON(rc && rc != -ENOENT);
+	iopt_destroy_table(&ioas->iopt);
+}
+
+struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
+{
+	struct iommufd_ioas *ioas;
+
+	ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS);
+	if (IS_ERR(ioas))
+		return ioas;
+
+	iopt_init_table(&ioas->iopt);
+	return ioas;
+}
+
+int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_ioas_alloc *cmd = ucmd->cmd;
+	struct iommufd_ioas *ioas;
+	int rc;
+
+	if (cmd->flags)
+		return -EOPNOTSUPP;
+
+	ioas = iommufd_ioas_alloc(ucmd->ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	cmd->out_ioas_id = ioas->obj.id;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_table;
+	iommufd_object_finalize(ucmd->ictx, &ioas->obj);
+	return 0;
+
+out_table:
+	iommufd_object_abort_and_destroy(ucmd->ictx, &ioas->obj);
+	return rc;
+}
+
+int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_iova_range __user *ranges;
+	struct iommu_ioas_iova_ranges *cmd = ucmd->cmd;
+	struct iommufd_ioas *ioas;
+	struct interval_tree_span_iter span;
+	u32 max_iovas;
+	int rc;
+
+	if (cmd->__reserved)
+		return -EOPNOTSUPP;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	down_read(&ioas->iopt.iova_rwsem);
+	max_iovas = cmd->num_iovas;
+	ranges = u64_to_user_ptr(cmd->allowed_iovas);
+	cmd->num_iovas = 0;
+	cmd->out_iova_alignment = ioas->iopt.iova_alignment;
+	interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
+				    ULONG_MAX) {
+		if (!span.is_hole)
+			continue;
+		if (cmd->num_iovas < max_iovas) {
+			struct iommu_iova_range elm = {
+				.start = span.start_hole,
+				.last = span.last_hole,
+			};
+
+			if (copy_to_user(&ranges[cmd->num_iovas], &elm,
+					 sizeof(elm))) {
+				rc = -EFAULT;
+				goto out_put;
+			}
+		}
+		cmd->num_iovas++;
+	}
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_put;
+	if (cmd->num_iovas > max_iovas)
+		rc = -EMSGSIZE;
+out_put:
+	up_read(&ioas->iopt.iova_rwsem);
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static int iommufd_ioas_load_iovas(struct rb_root_cached *itree,
+				   struct iommu_iova_range __user *ranges,
+				   u32 num)
+{
+	u32 i;
+
+	for (i = 0; i != num; i++) {
+		struct iommu_iova_range range;
+		struct iopt_allowed *allowed;
+
+		if (copy_from_user(&range, ranges + i, sizeof(range)))
+			return -EFAULT;
+
+		if (range.start >= range.last)
+			return -EINVAL;
+
+		if (interval_tree_iter_first(itree, range.start, range.last))
+			return -EINVAL;
+
+		allowed = kzalloc(sizeof(*allowed), GFP_KERNEL_ACCOUNT);
+		if (!allowed)
+			return -ENOMEM;
+		allowed->node.start = range.start;
+		allowed->node.last = range.last;
+
+		interval_tree_insert(&allowed->node, itree);
+	}
+	return 0;
+}
+
+int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_ioas_allow_iovas *cmd = ucmd->cmd;
+	struct rb_root_cached allowed_iova = RB_ROOT_CACHED;
+	struct interval_tree_node *node;
+	struct iommufd_ioas *ioas;
+	struct io_pagetable *iopt;
+	int rc = 0;
+
+	if (cmd->__reserved)
+		return -EOPNOTSUPP;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+	iopt = &ioas->iopt;
+
+	rc = iommufd_ioas_load_iovas(&allowed_iova,
+				     u64_to_user_ptr(cmd->allowed_iovas),
+				     cmd->num_iovas);
+	if (rc)
+		goto out_free;
+
+	/*
+	 * We want the allowed tree update to be atomic, so we have to keep the
+	 * original nodes around, and keep track of the new nodes as we allocate
+	 * memory for them. The simplest solution is to have a new/old tree and
+	 * then swap new for old. On success we free the old tree, on failure we
+	 * free the new tree.
+	 */
+	rc = iopt_set_allow_iova(iopt, &allowed_iova);
+out_free:
+	while ((node = interval_tree_iter_first(&allowed_iova, 0, ULONG_MAX))) {
+		interval_tree_remove(node, &allowed_iova);
+		kfree(container_of(node, struct iopt_allowed, node));
+	}
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static int conv_iommu_prot(u32 map_flags)
+{
+	/*
+	 * We provide no manual cache coherency ioctls to userspace and most
+	 * architectures make the CPU ops for cache flushing privileged.
+	 * Therefore we require the underlying IOMMU to support CPU coherent
+	 * operation. Support for IOMMU_CACHE is enforced by the
+	 * IOMMU_CAP_CACHE_COHERENCY test during bind.
+	 */
+	int iommu_prot = IOMMU_CACHE;
+
+	if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
+		iommu_prot |= IOMMU_WRITE;
+	if (map_flags & IOMMU_IOAS_MAP_READABLE)
+		iommu_prot |= IOMMU_READ;
+	return iommu_prot;
+}
+
+int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_ioas_map *cmd = ucmd->cmd;
+	unsigned long iova = cmd->iova;
+	struct iommufd_ioas *ioas;
+	unsigned int flags = 0;
+	int rc;
+
+	if ((cmd->flags &
+	     ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
+	       IOMMU_IOAS_MAP_READABLE)) ||
+	    cmd->__reserved)
+		return -EOPNOTSUPP;
+	if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
+		return -EOVERFLOW;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
+		flags = IOPT_ALLOC_IOVA;
+	rc = iopt_map_user_pages(ucmd->ictx, &ioas->iopt, &iova,
+				 u64_to_user_ptr(cmd->user_va), cmd->length,
+				 conv_iommu_prot(cmd->flags), flags);
+	if (rc)
+		goto out_put;
+
+	cmd->iova = iova;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put:
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_ioas_copy *cmd = ucmd->cmd;
+	struct iommufd_ioas *src_ioas;
+	struct iommufd_ioas *dst_ioas;
+	unsigned int flags = 0;
+	LIST_HEAD(pages_list);
+	unsigned long iova;
+	int rc;
+
+	if ((cmd->flags &
+	     ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
+	       IOMMU_IOAS_MAP_READABLE)))
+		return -EOPNOTSUPP;
+	if (cmd->length >= ULONG_MAX || cmd->src_iova >= ULONG_MAX ||
+	    cmd->dst_iova >= ULONG_MAX)
+		return -EOVERFLOW;
+
+	src_ioas = iommufd_get_ioas(ucmd, cmd->src_ioas_id);
+	if (IS_ERR(src_ioas))
+		return PTR_ERR(src_ioas);
+	rc = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, cmd->length,
+			    &pages_list);
+	iommufd_put_object(&src_ioas->obj);
+	if (rc)
+		return rc;
+
+	dst_ioas = iommufd_get_ioas(ucmd, cmd->dst_ioas_id);
+	if (IS_ERR(dst_ioas)) {
+		rc = PTR_ERR(dst_ioas);
+		goto out_pages;
+	}
+
+	if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
+		flags = IOPT_ALLOC_IOVA;
+	iova = cmd->dst_iova;
+	rc = iopt_map_pages(&dst_ioas->iopt, &pages_list, cmd->length, &iova,
+			    conv_iommu_prot(cmd->flags), flags);
+	if (rc)
+		goto out_put_dst;
+
+	cmd->dst_iova = iova;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put_dst:
+	iommufd_put_object(&dst_ioas->obj);
+out_pages:
+	iopt_free_pages_list(&pages_list);
+	return rc;
+}
+
+int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_ioas_unmap *cmd = ucmd->cmd;
+	struct iommufd_ioas *ioas;
+	unsigned long unmapped = 0;
+	int rc;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	if (cmd->iova == 0 && cmd->length == U64_MAX) {
+		rc = iopt_unmap_all(&ioas->iopt, &unmapped);
+		if (rc)
+			goto out_put;
+	} else {
+		if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) {
+			rc = -EOVERFLOW;
+			goto out_put;
+		}
+		rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length,
+				     &unmapped);
+		if (rc)
+			goto out_put;
+	}
+
+	cmd->length = unmapped;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_put:
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+int iommufd_option_rlimit_mode(struct iommu_option *cmd,
+			       struct iommufd_ctx *ictx)
+{
+	if (cmd->object_id)
+		return -EOPNOTSUPP;
+
+	if (cmd->op == IOMMU_OPTION_OP_GET) {
+		cmd->val64 = ictx->account_mode == IOPT_PAGES_ACCOUNT_MM;
+		return 0;
+	}
+	if (cmd->op == IOMMU_OPTION_OP_SET) {
+		int rc = 0;
+
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+
+		xa_lock(&ictx->objects);
+		if (!xa_empty(&ictx->objects)) {
+			rc = -EBUSY;
+		} else {
+			if (cmd->val64 == 0)
+				ictx->account_mode = IOPT_PAGES_ACCOUNT_USER;
+			else if (cmd->val64 == 1)
+				ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
+			else
+				rc = -EINVAL;
+		}
+		xa_unlock(&ictx->objects);
+
+		return rc;
+	}
+	return -EOPNOTSUPP;
+}
+
+static int iommufd_ioas_option_huge_pages(struct iommu_option *cmd,
+					  struct iommufd_ioas *ioas)
+{
+	if (cmd->op == IOMMU_OPTION_OP_GET) {
+		cmd->val64 = !ioas->iopt.disable_large_pages;
+		return 0;
+	}
+	if (cmd->op == IOMMU_OPTION_OP_SET) {
+		if (cmd->val64 == 0)
+			return iopt_disable_large_pages(&ioas->iopt);
+		if (cmd->val64 == 1) {
+			iopt_enable_large_pages(&ioas->iopt);
+			return 0;
+		}
+		return -EINVAL;
+	}
+	return -EOPNOTSUPP;
+}
+
+int iommufd_ioas_option(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_option *cmd = ucmd->cmd;
+	struct iommufd_ioas *ioas;
+	int rc = 0;
+
+	if (cmd->__reserved)
+		return -EOPNOTSUPP;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->object_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	switch (cmd->option_id) {
+	case IOMMU_OPTION_HUGE_PAGES:
+		rc = iommufd_ioas_option_huge_pages(cmd, ioas);
+		break;
+	default:
+		rc = -EOPNOTSUPP;
+	}
+
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index f7ab6c6edafd1..1a13c54a8def8 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -11,6 +11,7 @@
 
 struct iommu_domain;
 struct iommu_group;
+struct iommu_option;
 
 struct iommufd_ctx {
 	struct file *file;
@@ -102,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
 enum iommufd_object_type {
 	IOMMUFD_OBJ_NONE,
 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_IOAS,
 };
 
 /* Base struct for all objects with a userspace ID handle. */
@@ -174,6 +176,37 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 			     type),                                            \
 		     typeof(*(ptr)), obj)
 
+/*
+ * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
+ * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
+ * mapping is copied into all of the associated domains and made available to
+ * in-kernel users.
+ */
+struct iommufd_ioas {
+	struct iommufd_object obj;
+	struct io_pagetable iopt;
+};
+
+static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
+						    u32 id)
+{
+	return container_of(iommufd_get_object(ucmd->ictx, id,
+					       IOMMUFD_OBJ_IOAS),
+			    struct iommufd_ioas, obj);
+}
+
+struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
+int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_ioas_destroy(struct iommufd_object *obj);
+int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_allow_iovas(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
+int iommufd_option_rlimit_mode(struct iommu_option *cmd,
+			       struct iommufd_ctx *ictx);
+
 struct iommufd_access {
 	unsigned long iova_alignment;
 	u32 iopt_access_list_id;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index dfbc68b97506d..1c0a1f499378d 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -204,8 +204,39 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static int iommufd_option(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_option *cmd = ucmd->cmd;
+	int rc;
+
+	if (cmd->__reserved)
+		return -EOPNOTSUPP;
+
+	switch (cmd->option_id) {
+	case IOMMU_OPTION_RLIMIT_MODE:
+		rc = iommufd_option_rlimit_mode(cmd, ucmd->ictx);
+		break;
+	case IOMMU_OPTION_HUGE_PAGES:
+		rc = iommufd_ioas_option(ucmd);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	if (rc)
+		return rc;
+	if (copy_to_user(&((struct iommu_option __user *)ucmd->ubuffer)->val64,
+			 &cmd->val64, sizeof(cmd->val64)))
+		return -EFAULT;
+	return 0;
+}
+
 union ucmd_buffer {
 	struct iommu_destroy destroy;
+	struct iommu_ioas_alloc alloc;
+	struct iommu_ioas_allow_iovas allow_iovas;
+	struct iommu_ioas_iova_ranges iova_ranges;
+	struct iommu_ioas_map map;
+	struct iommu_ioas_unmap unmap;
 };
 
 struct iommufd_ioctl_op {
@@ -226,6 +257,20 @@ struct iommufd_ioctl_op {
 	}
 static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 	IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
+	IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
+		 struct iommu_ioas_alloc, out_ioas_id),
+	IOCTL_OP(IOMMU_IOAS_ALLOW_IOVAS, iommufd_ioas_allow_iovas,
+		 struct iommu_ioas_allow_iovas, allowed_iovas),
+	IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy,
+		 src_iova),
+	IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,
+		 struct iommu_ioas_iova_ranges, out_iova_alignment),
+	IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map,
+		 iova),
+	IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
+		 length),
+	IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option,
+		 val64),
 };
 
 static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
@@ -312,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx)
 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD);
 
 static const struct iommufd_object_ops iommufd_object_ops[] = {
+	[IOMMUFD_OBJ_IOAS] = {
+		.destroy = iommufd_ioas_destroy,
+	},
 };
 
 static struct miscdevice iommu_misc_dev = {
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 37de92f0534b8..30cc5c5e2b347 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -37,12 +37,19 @@
 enum {
 	IOMMUFD_CMD_BASE = 0x80,
 	IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
+	IOMMUFD_CMD_IOAS_ALLOC,
+	IOMMUFD_CMD_IOAS_ALLOW_IOVAS,
+	IOMMUFD_CMD_IOAS_COPY,
+	IOMMUFD_CMD_IOAS_IOVA_RANGES,
+	IOMMUFD_CMD_IOAS_MAP,
+	IOMMUFD_CMD_IOAS_UNMAP,
+	IOMMUFD_CMD_OPTION,
 };
 
 /**
  * struct iommu_destroy - ioctl(IOMMU_DESTROY)
  * @size: sizeof(struct iommu_destroy)
- * @id: iommufd object ID to destroy. Can by any destroyable object type.
+ * @id: iommufd object ID to destroy. Can be any destroyable object type.
  *
  * Destroy any object held within iommufd.
  */
@@ -52,4 +59,253 @@ struct iommu_destroy {
 };
 #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
 
+/**
+ * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
+ * @size: sizeof(struct iommu_ioas_alloc)
+ * @flags: Must be 0
+ * @out_ioas_id: Output IOAS ID for the allocated object
+ *
+ * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
+ * to memory mapping.
+ */
+struct iommu_ioas_alloc {
+	__u32 size;
+	__u32 flags;
+	__u32 out_ioas_id;
+};
+#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
+
+/**
+ * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE)
+ * @start: First IOVA
+ * @last: Inclusive last IOVA
+ *
+ * An interval in IOVA space.
+ */
+struct iommu_iova_range {
+	__aligned_u64 start;
+	__aligned_u64 last;
+};
+
+/**
+ * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
+ * @size: sizeof(struct iommu_ioas_iova_ranges)
+ * @ioas_id: IOAS ID to read ranges from
+ * @num_iovas: Input/Output total number of ranges in the IOAS
+ * @__reserved: Must be 0
+ * @allowed_iovas: Pointer to the output array of struct iommu_iova_range
+ * @out_iova_alignment: Minimum alignment required for mapping IOVA
+ *
+ * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges
+ * is not allowed. num_iovas will be set to the total number of iovas and
+ * the allowed_iovas[] will be filled in as space permits.
+ *
+ * The allowed ranges are dependent on the HW path the DMA operation takes, and
+ * can change during the lifetime of the IOAS. A fresh empty IOAS will have a
+ * full range, and each attached device will narrow the ranges based on that
+ * device's HW restrictions. Detaching a device can widen the ranges. Userspace
+ * should query ranges after every attach/detach to know what IOVAs are valid
+ * for mapping.
+ *
+ * On input num_iovas is the length of the allowed_iovas array. On output it is
+ * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set
+ * num_iovas to the required value if num_iovas is too small. In this case the
+ * caller should allocate a larger output array and re-issue the ioctl.
+ *
+ * out_iova_alignment returns the minimum IOVA alignment that can be given
+ * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy::
+ *
+ *   starting_iova % out_iova_alignment == 0
+ *   (starting_iova + length) % out_iova_alignment == 0
+ *
+ * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot
+ * be higher than the system PAGE_SIZE.
+ */
+struct iommu_ioas_iova_ranges {
+	__u32 size;
+	__u32 ioas_id;
+	__u32 num_iovas;
+	__u32 __reserved;
+	__aligned_u64 allowed_iovas;
+	__aligned_u64 out_iova_alignment;
+};
+#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
+
+/**
+ * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS)
+ * @size: sizeof(struct iommu_ioas_allow_iovas)
+ * @ioas_id: IOAS ID to allow IOVAs from
+ * @num_iovas: Input/Output total number of ranges in the IOAS
+ * @__reserved: Must be 0
+ * @allowed_iovas: Pointer to array of struct iommu_iova_range
+ *
+ * Ensure a range of IOVAs are always available for allocation. If this call
+ * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges
+ * that are narrower than the ranges provided here. This call will fail if
+ * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges.
+ *
+ * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as
+ * devices are attached the IOVA will narrow based on the device restrictions.
+ * When an allowed range is specified any narrowing will be refused, ie device
+ * attachment can fail if the device requires limiting within the allowed range.
+ *
+ * Automatic IOVA allocation is also impacted by this call. MAP will only
+ * allocate within the allowed IOVAs if they are present.
+ *
+ * This call replaces the entire allowed list with the given list.
+ */
+struct iommu_ioas_allow_iovas {
+	__u32 size;
+	__u32 ioas_id;
+	__u32 num_iovas;
+	__u32 __reserved;
+	__aligned_u64 allowed_iovas;
+};
+#define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS)
+
+/**
+ * enum iommufd_ioas_map_flags - Flags for map and copy
+ * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
+ *                             IOVA to place the mapping at
+ * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
+ * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
+ */
+enum iommufd_ioas_map_flags {
+	IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
+	IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
+	IOMMU_IOAS_MAP_READABLE = 1 << 2,
+};
+
+/**
+ * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
+ * @size: sizeof(struct iommu_ioas_map)
+ * @flags: Combination of enum iommufd_ioas_map_flags
+ * @ioas_id: IOAS ID to change the mapping of
+ * @__reserved: Must be 0
+ * @user_va: Userspace pointer to start mapping from
+ * @length: Number of bytes to map
+ * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
+ *        then this must be provided as input.
+ *
+ * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
+ * mapping will be established at iova, otherwise a suitable location based on
+ * the reserved and allowed lists will be automatically selected and returned in
+ * iova.
+ *
+ * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently
+ * be unused, existing IOVA cannot be replaced.
+ */
+struct iommu_ioas_map {
+	__u32 size;
+	__u32 flags;
+	__u32 ioas_id;
+	__u32 __reserved;
+	__aligned_u64 user_va;
+	__aligned_u64 length;
+	__aligned_u64 iova;
+};
+#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
+
+/**
+ * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
+ * @size: sizeof(struct iommu_ioas_copy)
+ * @flags: Combination of enum iommufd_ioas_map_flags
+ * @dst_ioas_id: IOAS ID to change the mapping of
+ * @src_ioas_id: IOAS ID to copy from
+ * @length: Number of bytes to copy and map
+ * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is
+ *            set then this must be provided as input.
+ * @src_iova: IOVA to start the copy
+ *
+ * Copy an already existing mapping from src_ioas_id and establish it in
+ * dst_ioas_id. The src iova/length must exactly match a range used with
+ * IOMMU_IOAS_MAP.
+ *
+ * This may be used to efficiently clone a subset of an IOAS to another, or as a
+ * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over
+ * establishing equivalent new mappings, as internal resources are shared, and
+ * the kernel will pin the user memory only once.
+ */
+struct iommu_ioas_copy {
+	__u32 size;
+	__u32 flags;
+	__u32 dst_ioas_id;
+	__u32 src_ioas_id;
+	__aligned_u64 length;
+	__aligned_u64 dst_iova;
+	__aligned_u64 src_iova;
+};
+#define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY)
+
+/**
+ * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP)
+ * @size: sizeof(struct iommu_ioas_unmap)
+ * @ioas_id: IOAS ID to change the mapping of
+ * @iova: IOVA to start the unmapping at
+ * @length: Number of bytes to unmap, and return back the bytes unmapped
+ *
+ * Unmap an IOVA range. The iova/length must be a superset of a previously
+ * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or
+ * truncating ranges is not allowed. The values 0 to U64_MAX will unmap
+ * everything.
+ */
+struct iommu_ioas_unmap {
+	__u32 size;
+	__u32 ioas_id;
+	__aligned_u64 iova;
+	__aligned_u64 length;
+};
+#define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP)
+
+/**
+ * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and
+ *                       ioctl(IOMMU_OPTION_HUGE_PAGES)
+ * @IOMMU_OPTION_RLIMIT_MODE:
+ *    Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege
+ *    to invoke this. Value 0 (default) is user based accouting, 1 uses process
+ *    based accounting. Global option, object_id must be 0
+ * @IOMMU_OPTION_HUGE_PAGES:
+ *    Value 1 (default) allows contiguous pages to be combined when generating
+ *    iommu mappings. Value 0 disables combining, everything is mapped to
+ *    PAGE_SIZE. This can be useful for benchmarking.  This is a per-IOAS
+ *    option, the object_id must be the IOAS ID.
+ */
+enum iommufd_option {
+	IOMMU_OPTION_RLIMIT_MODE = 0,
+	IOMMU_OPTION_HUGE_PAGES = 1,
+};
+
+/**
+ * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and
+ *                           ioctl(IOMMU_OPTION_OP_GET)
+ * @IOMMU_OPTION_OP_SET: Set the option's value
+ * @IOMMU_OPTION_OP_GET: Get the option's value
+ */
+enum iommufd_option_ops {
+	IOMMU_OPTION_OP_SET = 0,
+	IOMMU_OPTION_OP_GET = 1,
+};
+
+/**
+ * struct iommu_option - iommu option multiplexer
+ * @size: sizeof(struct iommu_option)
+ * @option_id: One of enum iommufd_option
+ * @op: One of enum iommufd_option_ops
+ * @__reserved: Must be 0
+ * @object_id: ID of the object if required
+ * @val64: Option value to set or value returned on get
+ *
+ * Change a simple option value. This multiplexor allows controlling options
+ * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET
+ * will return the current value.
+ */
+struct iommu_option {
+	__u32 size;
+	__u32 option_id;
+	__u16 op;
+	__u16 __reserved;
+	__u32 object_id;
+	__aligned_u64 val64;
+};
+#define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
 #endif
-- 
GitLab


From ea4acfac57b9dee57a7d5840359a41cc3251de92 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:35 -0400
Subject: [PATCH 3131/4122] iommufd: Add a HW pagetable object

The hw_pagetable object exposes the internal struct iommu_domain's to
userspace. An iommu_domain is required when any DMA device attaches to an
IOAS to control the io page table through the iommu driver.

For compatibility with VFIO the hw_pagetable is automatically created when
a DMA device is attached to the IOAS. If a compatible iommu_domain already
exists then the hw_pagetable associated with it is used for the
attachment.

In the initial series there is no iommufd uAPI for the hw_pagetable
object. The next patch provides driver facing APIs for IO page table
attachment that allows drivers to accept either an IOAS or a hw_pagetable
ID and for the driver to return the hw_pagetable ID that was auto-selected
from an IOAS. The expectation is the driver will provide uAPI through its
own FD for attaching its device to iommufd. This allows userspace to learn
the mapping of devices to iommu_domains and to override the automatic
attachment.

The future HW specific interface will allow userspace to create
hw_pagetable objects using iommu_domains with IOMMU driver specific
parameters. This infrastructure will allow linking those domains to IOAS's
and devices.

Link: https://lore.kernel.org/r/12-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |  1 +
 drivers/iommu/iommufd/hw_pagetable.c    | 57 +++++++++++++++++++++++++
 drivers/iommu/iommufd/ioas.c            |  3 ++
 drivers/iommu/iommufd/iommufd_private.h | 33 ++++++++++++++
 drivers/iommu/iommufd/main.c            |  3 ++
 5 files changed, 97 insertions(+)
 create mode 100644 drivers/iommu/iommufd/hw_pagetable.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 2b4f36f1b72f9..e13e971aa28c6 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
+	hw_pagetable.o \
 	io_pagetable.o \
 	ioas.o \
 	main.o \
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
new file mode 100644
index 0000000000000..43d473989a066
--- /dev/null
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/iommu.h>
+
+#include "iommufd_private.h"
+
+void iommufd_hw_pagetable_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_hw_pagetable *hwpt =
+		container_of(obj, struct iommufd_hw_pagetable, obj);
+
+	WARN_ON(!list_empty(&hwpt->devices));
+
+	iommu_domain_free(hwpt->domain);
+	refcount_dec(&hwpt->ioas->obj.users);
+	mutex_destroy(&hwpt->devices_lock);
+}
+
+/**
+ * iommufd_hw_pagetable_alloc() - Get an iommu_domain for a device
+ * @ictx: iommufd context
+ * @ioas: IOAS to associate the domain with
+ * @dev: Device to get an iommu_domain for
+ *
+ * Allocate a new iommu_domain and return it as a hw_pagetable.
+ */
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
+			   struct device *dev)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	int rc;
+
+	hwpt = iommufd_object_alloc(ictx, hwpt, IOMMUFD_OBJ_HW_PAGETABLE);
+	if (IS_ERR(hwpt))
+		return hwpt;
+
+	hwpt->domain = iommu_domain_alloc(dev->bus);
+	if (!hwpt->domain) {
+		rc = -ENOMEM;
+		goto out_abort;
+	}
+
+	INIT_LIST_HEAD(&hwpt->devices);
+	INIT_LIST_HEAD(&hwpt->hwpt_item);
+	mutex_init(&hwpt->devices_lock);
+	/* Pairs with iommufd_hw_pagetable_destroy() */
+	refcount_inc(&ioas->obj.users);
+	hwpt->ioas = ioas;
+	return hwpt;
+
+out_abort:
+	iommufd_object_abort(ictx, &hwpt->obj);
+	return ERR_PTR(rc);
+}
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index 6ff97dafc8913..302779b33bd4a 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -17,6 +17,7 @@ void iommufd_ioas_destroy(struct iommufd_object *obj)
 	rc = iopt_unmap_all(&ioas->iopt, NULL);
 	WARN_ON(rc && rc != -ENOENT);
 	iopt_destroy_table(&ioas->iopt);
+	mutex_destroy(&ioas->mutex);
 }
 
 struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
@@ -28,6 +29,8 @@ struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
 		return ioas;
 
 	iopt_init_table(&ioas->iopt);
+	INIT_LIST_HEAD(&ioas->hwpt_list);
+	mutex_init(&ioas->mutex);
 	return ioas;
 }
 
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 1a13c54a8def8..6b0448702a956 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -103,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
 enum iommufd_object_type {
 	IOMMUFD_OBJ_NONE,
 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_HW_PAGETABLE,
 	IOMMUFD_OBJ_IOAS,
 };
 
@@ -181,10 +182,20 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
  * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
  * mapping is copied into all of the associated domains and made available to
  * in-kernel users.
+ *
+ * Every iommu_domain that is created is wrapped in a iommufd_hw_pagetable
+ * object. When we go to attach a device to an IOAS we need to get an
+ * iommu_domain and wrapping iommufd_hw_pagetable for it.
+ *
+ * An iommu_domain & iommfd_hw_pagetable will be automatically selected
+ * for a device based on the hwpt_list. If no suitable iommu_domain
+ * is found a new iommu_domain will be created.
  */
 struct iommufd_ioas {
 	struct iommufd_object obj;
 	struct io_pagetable iopt;
+	struct mutex mutex;
+	struct list_head hwpt_list;
 };
 
 static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
@@ -207,6 +218,28 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
 			       struct iommufd_ctx *ictx);
 
+/*
+ * A HW pagetable is called an iommu_domain inside the kernel. This user object
+ * allows directly creating and inspecting the domains. Domains that have kernel
+ * owned page tables will be associated with an iommufd_ioas that provides the
+ * IOVA to PFN map.
+ */
+struct iommufd_hw_pagetable {
+	struct iommufd_object obj;
+	struct iommufd_ioas *ioas;
+	struct iommu_domain *domain;
+	bool auto_domain : 1;
+	/* Head at iommufd_ioas::hwpt_list */
+	struct list_head hwpt_item;
+	struct mutex devices_lock;
+	struct list_head devices;
+};
+
+struct iommufd_hw_pagetable *
+iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
+			   struct device *dev);
+void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
+
 struct iommufd_access {
 	unsigned long iova_alignment;
 	u32 iopt_access_list_id;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 1c0a1f499378d..ac6580a7b7066 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -360,6 +360,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
 	[IOMMUFD_OBJ_IOAS] = {
 		.destroy = iommufd_ioas_destroy,
 	},
+	[IOMMUFD_OBJ_HW_PAGETABLE] = {
+		.destroy = iommufd_hw_pagetable_destroy,
+	},
 };
 
 static struct miscdevice iommu_misc_dev = {
-- 
GitLab


From e8d57210035b6377d424ba964961892d01127cf6 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:36 -0400
Subject: [PATCH 3132/4122] iommufd: Add kAPI toward external drivers for
 physical devices

Add the four functions external drivers need to connect physical DMA to
the IOMMUFD:

iommufd_device_bind() / iommufd_device_unbind()
  Register the device with iommufd and establish security isolation.

iommufd_device_attach() / iommufd_device_detach()
  Connect a bound device to a page table

Binding a device creates a device object ID in the uAPI, however the
generic API does not yet provide any IOCTLs to manipulate them.

Link: https://lore.kernel.org/r/13-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |   1 +
 drivers/iommu/iommufd/device.c          | 419 ++++++++++++++++++++++++
 drivers/iommu/iommufd/iommufd_private.h |   5 +
 drivers/iommu/iommufd/main.c            |   3 +
 include/linux/iommufd.h                 |   9 +
 5 files changed, 437 insertions(+)
 create mode 100644 drivers/iommu/iommufd/device.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index e13e971aa28c6..ca28a135b9675 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 iommufd-y := \
+	device.o \
 	hw_pagetable.o \
 	io_pagetable.o \
 	ioas.o \
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
new file mode 100644
index 0000000000000..67cd00b4d9265
--- /dev/null
+++ b/drivers/iommu/iommufd/device.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/iommufd.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/irqdomain.h>
+
+#include "iommufd_private.h"
+
+static bool allow_unsafe_interrupts;
+module_param(allow_unsafe_interrupts, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(
+	allow_unsafe_interrupts,
+	"Allow IOMMUFD to bind to devices even if the platform cannot isolate "
+	"the MSI interrupt window. Enabling this is a security weakness.");
+
+/*
+ * A iommufd_device object represents the binding relationship between a
+ * consuming driver and the iommufd. These objects are created/destroyed by
+ * external drivers, not by userspace.
+ */
+struct iommufd_device {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommufd_hw_pagetable *hwpt;
+	/* Head at iommufd_hw_pagetable::devices */
+	struct list_head devices_item;
+	/* always the physical device */
+	struct device *dev;
+	struct iommu_group *group;
+	bool enforce_cache_coherency;
+};
+
+void iommufd_device_destroy(struct iommufd_object *obj)
+{
+	struct iommufd_device *idev =
+		container_of(obj, struct iommufd_device, obj);
+
+	iommu_device_release_dma_owner(idev->dev);
+	iommu_group_put(idev->group);
+	iommufd_ctx_put(idev->ictx);
+}
+
+/**
+ * iommufd_device_bind - Bind a physical device to an iommu fd
+ * @ictx: iommufd file descriptor
+ * @dev: Pointer to a physical device struct
+ * @id: Output ID number to return to userspace for this device
+ *
+ * A successful bind establishes an ownership over the device and returns
+ * struct iommufd_device pointer, otherwise returns error pointer.
+ *
+ * A driver using this API must set driver_managed_dma and must not touch
+ * the device until this routine succeeds and establishes ownership.
+ *
+ * Binding a PCI device places the entire RID under iommufd control.
+ *
+ * The caller must undo this with iommufd_device_unbind()
+ */
+struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
+					   struct device *dev, u32 *id)
+{
+	struct iommufd_device *idev;
+	struct iommu_group *group;
+	int rc;
+
+	/*
+	 * iommufd always sets IOMMU_CACHE because we offer no way for userspace
+	 * to restore cache coherency.
+	 */
+	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY))
+		return ERR_PTR(-EINVAL);
+
+	group = iommu_group_get(dev);
+	if (!group)
+		return ERR_PTR(-ENODEV);
+
+	rc = iommu_device_claim_dma_owner(dev, ictx);
+	if (rc)
+		goto out_group_put;
+
+	idev = iommufd_object_alloc(ictx, idev, IOMMUFD_OBJ_DEVICE);
+	if (IS_ERR(idev)) {
+		rc = PTR_ERR(idev);
+		goto out_release_owner;
+	}
+	idev->ictx = ictx;
+	iommufd_ctx_get(ictx);
+	idev->dev = dev;
+	idev->enforce_cache_coherency =
+		device_iommu_capable(dev, IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
+	/* The calling driver is a user until iommufd_device_unbind() */
+	refcount_inc(&idev->obj.users);
+	/* group refcount moves into iommufd_device */
+	idev->group = group;
+
+	/*
+	 * If the caller fails after this success it must call
+	 * iommufd_unbind_device() which is safe since we hold this refcount.
+	 * This also means the device is a leaf in the graph and no other object
+	 * can take a reference on it.
+	 */
+	iommufd_object_finalize(ictx, &idev->obj);
+	*id = idev->obj.id;
+	return idev;
+
+out_release_owner:
+	iommu_device_release_dma_owner(dev);
+out_group_put:
+	iommu_group_put(group);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_bind, IOMMUFD);
+
+/**
+ * iommufd_device_unbind - Undo iommufd_device_bind()
+ * @idev: Device returned by iommufd_device_bind()
+ *
+ * Release the device from iommufd control. The DMA ownership will return back
+ * to unowned with DMA controlled by the DMA API. This invalidates the
+ * iommufd_device pointer, other APIs that consume it must not be called
+ * concurrently.
+ */
+void iommufd_device_unbind(struct iommufd_device *idev)
+{
+	bool was_destroyed;
+
+	was_destroyed = iommufd_object_destroy_user(idev->ictx, &idev->obj);
+	WARN_ON(!was_destroyed);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, IOMMUFD);
+
+static int iommufd_device_setup_msi(struct iommufd_device *idev,
+				    struct iommufd_hw_pagetable *hwpt,
+				    phys_addr_t sw_msi_start)
+{
+	int rc;
+
+	/*
+	 * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it
+	 * creates the MSI window by default in the iommu domain. Nothing
+	 * further to do.
+	 */
+	if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP))
+		return 0;
+
+	/*
+	 * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every
+	 * allocated iommu_domain will block interrupts by default and this
+	 * special flow is needed to turn them back on. iommu_dma_prepare_msi()
+	 * will install pages into our domain after request_irq() to make this
+	 * work.
+	 *
+	 * FIXME: This is conceptually broken for iommufd since we want to allow
+	 * userspace to change the domains, eg switch from an identity IOAS to a
+	 * DMA IOAS. There is currently no way to create a MSI window that
+	 * matches what the IRQ layer actually expects in a newly created
+	 * domain.
+	 */
+	if (irq_domain_check_msi_remap()) {
+		if (WARN_ON(!sw_msi_start))
+			return -EPERM;
+		/*
+		 * iommu_get_msi_cookie() can only be called once per domain,
+		 * it returns -EBUSY on later calls.
+		 */
+		if (hwpt->msi_cookie)
+			return 0;
+		rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
+		if (rc)
+			return rc;
+		hwpt->msi_cookie = true;
+		return 0;
+	}
+
+	/*
+	 * Otherwise the platform has a MSI window that is not isolated. For
+	 * historical compat with VFIO allow a module parameter to ignore the
+	 * insecurity.
+	 */
+	if (!allow_unsafe_interrupts)
+		return -EPERM;
+
+	dev_warn(
+		idev->dev,
+		"MSI interrupt window cannot be isolated by the IOMMU, this platform is insecure. Use the \"allow_unsafe_interrupts\" module parameter to override\n");
+	return 0;
+}
+
+static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
+					   struct iommu_group *group)
+{
+	struct iommufd_device *cur_dev;
+
+	list_for_each_entry(cur_dev, &hwpt->devices, devices_item)
+		if (cur_dev->group == group)
+			return true;
+	return false;
+}
+
+static int iommufd_device_do_attach(struct iommufd_device *idev,
+				    struct iommufd_hw_pagetable *hwpt)
+{
+	phys_addr_t sw_msi_start = 0;
+	int rc;
+
+	mutex_lock(&hwpt->devices_lock);
+
+	/*
+	 * Try to upgrade the domain we have, it is an iommu driver bug to
+	 * report IOMMU_CAP_ENFORCE_CACHE_COHERENCY but fail
+	 * enforce_cache_coherency when there are no devices attached to the
+	 * domain.
+	 */
+	if (idev->enforce_cache_coherency && !hwpt->enforce_cache_coherency) {
+		if (hwpt->domain->ops->enforce_cache_coherency)
+			hwpt->enforce_cache_coherency =
+				hwpt->domain->ops->enforce_cache_coherency(
+					hwpt->domain);
+		if (!hwpt->enforce_cache_coherency) {
+			WARN_ON(list_empty(&hwpt->devices));
+			rc = -EINVAL;
+			goto out_unlock;
+		}
+	}
+
+	rc = iopt_table_enforce_group_resv_regions(&hwpt->ioas->iopt, idev->dev,
+						   idev->group, &sw_msi_start);
+	if (rc)
+		goto out_unlock;
+
+	rc = iommufd_device_setup_msi(idev, hwpt, sw_msi_start);
+	if (rc)
+		goto out_iova;
+
+	/*
+	 * FIXME: Hack around missing a device-centric iommu api, only attach to
+	 * the group once for the first device that is in the group.
+	 */
+	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
+		rc = iommu_attach_group(hwpt->domain, idev->group);
+		if (rc)
+			goto out_iova;
+
+		if (list_empty(&hwpt->devices)) {
+			rc = iopt_table_add_domain(&hwpt->ioas->iopt,
+						   hwpt->domain);
+			if (rc)
+				goto out_detach;
+		}
+	}
+
+	idev->hwpt = hwpt;
+	refcount_inc(&hwpt->obj.users);
+	list_add(&idev->devices_item, &hwpt->devices);
+	mutex_unlock(&hwpt->devices_lock);
+	return 0;
+
+out_detach:
+	iommu_detach_group(hwpt->domain, idev->group);
+out_iova:
+	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+out_unlock:
+	mutex_unlock(&hwpt->devices_lock);
+	return rc;
+}
+
+/*
+ * When automatically managing the domains we search for a compatible domain in
+ * the iopt and if one is found use it, otherwise create a new domain.
+ * Automatic domain selection will never pick a manually created domain.
+ */
+static int iommufd_device_auto_get_domain(struct iommufd_device *idev,
+					  struct iommufd_ioas *ioas)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	int rc;
+
+	/*
+	 * There is no differentiation when domains are allocated, so any domain
+	 * that is willing to attach to the device is interchangeable with any
+	 * other.
+	 */
+	mutex_lock(&ioas->mutex);
+	list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
+		if (!hwpt->auto_domain)
+			continue;
+
+		rc = iommufd_device_do_attach(idev, hwpt);
+
+		/*
+		 * -EINVAL means the domain is incompatible with the device.
+		 * Other error codes should propagate to userspace as failure.
+		 * Success means the domain is attached.
+		 */
+		if (rc == -EINVAL)
+			continue;
+		goto out_unlock;
+	}
+
+	hwpt = iommufd_hw_pagetable_alloc(idev->ictx, ioas, idev->dev);
+	if (IS_ERR(hwpt)) {
+		rc = PTR_ERR(hwpt);
+		goto out_unlock;
+	}
+	hwpt->auto_domain = true;
+
+	rc = iommufd_device_do_attach(idev, hwpt);
+	if (rc)
+		goto out_abort;
+	list_add_tail(&hwpt->hwpt_item, &ioas->hwpt_list);
+
+	mutex_unlock(&ioas->mutex);
+	iommufd_object_finalize(idev->ictx, &hwpt->obj);
+	return 0;
+
+out_abort:
+	iommufd_object_abort_and_destroy(idev->ictx, &hwpt->obj);
+out_unlock:
+	mutex_unlock(&ioas->mutex);
+	return rc;
+}
+
+/**
+ * iommufd_device_attach - Connect a device from an iommu_domain
+ * @idev: device to attach
+ * @pt_id: Input a IOMMUFD_OBJ_IOAS, or IOMMUFD_OBJ_HW_PAGETABLE
+ *         Output the IOMMUFD_OBJ_HW_PAGETABLE ID
+ *
+ * This connects the device to an iommu_domain, either automatically or manually
+ * selected. Once this completes the device could do DMA.
+ *
+ * The caller should return the resulting pt_id back to userspace.
+ * This function is undone by calling iommufd_device_detach().
+ */
+int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id)
+{
+	struct iommufd_object *pt_obj;
+	int rc;
+
+	pt_obj = iommufd_get_object(idev->ictx, *pt_id, IOMMUFD_OBJ_ANY);
+	if (IS_ERR(pt_obj))
+		return PTR_ERR(pt_obj);
+
+	switch (pt_obj->type) {
+	case IOMMUFD_OBJ_HW_PAGETABLE: {
+		struct iommufd_hw_pagetable *hwpt =
+			container_of(pt_obj, struct iommufd_hw_pagetable, obj);
+
+		rc = iommufd_device_do_attach(idev, hwpt);
+		if (rc)
+			goto out_put_pt_obj;
+
+		mutex_lock(&hwpt->ioas->mutex);
+		list_add_tail(&hwpt->hwpt_item, &hwpt->ioas->hwpt_list);
+		mutex_unlock(&hwpt->ioas->mutex);
+		break;
+	}
+	case IOMMUFD_OBJ_IOAS: {
+		struct iommufd_ioas *ioas =
+			container_of(pt_obj, struct iommufd_ioas, obj);
+
+		rc = iommufd_device_auto_get_domain(idev, ioas);
+		if (rc)
+			goto out_put_pt_obj;
+		break;
+	}
+	default:
+		rc = -EINVAL;
+		goto out_put_pt_obj;
+	}
+
+	refcount_inc(&idev->obj.users);
+	*pt_id = idev->hwpt->obj.id;
+	rc = 0;
+
+out_put_pt_obj:
+	iommufd_put_object(pt_obj);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_attach, IOMMUFD);
+
+/**
+ * iommufd_device_detach - Disconnect a device to an iommu_domain
+ * @idev: device to detach
+ *
+ * Undo iommufd_device_attach(). This disconnects the idev from the previously
+ * attached pt_id. The device returns back to a blocked DMA translation.
+ */
+void iommufd_device_detach(struct iommufd_device *idev)
+{
+	struct iommufd_hw_pagetable *hwpt = idev->hwpt;
+
+	mutex_lock(&hwpt->ioas->mutex);
+	mutex_lock(&hwpt->devices_lock);
+	list_del(&idev->devices_item);
+	if (!iommufd_hw_pagetable_has_group(hwpt, idev->group)) {
+		if (list_empty(&hwpt->devices)) {
+			iopt_table_remove_domain(&hwpt->ioas->iopt,
+						 hwpt->domain);
+			list_del(&hwpt->hwpt_item);
+		}
+		iommu_detach_group(hwpt->domain, idev->group);
+	}
+	iopt_remove_reserved_iova(&hwpt->ioas->iopt, idev->dev);
+	mutex_unlock(&hwpt->devices_lock);
+	mutex_unlock(&hwpt->ioas->mutex);
+
+	if (hwpt->auto_domain)
+		iommufd_object_destroy_user(idev->ictx, &hwpt->obj);
+	else
+		refcount_dec(&hwpt->obj.users);
+
+	idev->hwpt = NULL;
+
+	refcount_dec(&idev->obj.users);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 6b0448702a956..72a0c805be233 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -103,6 +103,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
 enum iommufd_object_type {
 	IOMMUFD_OBJ_NONE,
 	IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+	IOMMUFD_OBJ_DEVICE,
 	IOMMUFD_OBJ_HW_PAGETABLE,
 	IOMMUFD_OBJ_IOAS,
 };
@@ -229,6 +230,8 @@ struct iommufd_hw_pagetable {
 	struct iommufd_ioas *ioas;
 	struct iommu_domain *domain;
 	bool auto_domain : 1;
+	bool enforce_cache_coherency : 1;
+	bool msi_cookie : 1;
 	/* Head at iommufd_ioas::hwpt_list */
 	struct list_head hwpt_item;
 	struct mutex devices_lock;
@@ -240,6 +243,8 @@ iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
 			   struct device *dev);
 void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 
+void iommufd_device_destroy(struct iommufd_object *obj);
+
 struct iommufd_access {
 	unsigned long iova_alignment;
 	u32 iopt_access_list_id;
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index ac6580a7b7066..fe98912bab0e0 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -357,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx)
 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD);
 
 static const struct iommufd_object_ops iommufd_object_ops[] = {
+	[IOMMUFD_OBJ_DEVICE] = {
+		.destroy = iommufd_device_destroy,
+	},
 	[IOMMUFD_OBJ_IOAS] = {
 		.destroy = iommufd_ioas_destroy,
 	},
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 26e09d539737b..185dff3eb32f1 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -9,10 +9,19 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/device.h>
 
+struct iommufd_device;
 struct iommufd_ctx;
 struct file;
 
+struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
+					   struct device *dev, u32 *id);
+void iommufd_device_unbind(struct iommufd_device *idev);
+
+int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
+void iommufd_device_detach(struct iommufd_device *idev);
+
 enum {
 	IOMMUFD_ACCESS_RW_READ = 0,
 	IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
-- 
GitLab


From 8d40205f6093f18e07fe3dc5920fc85e9f82b8b3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:37 -0400
Subject: [PATCH 3133/4122] iommufd: Add kAPI toward external drivers for
 kernel access

Kernel access is the mode that VFIO "mdevs" use. In this case there is no
struct device and no IOMMU connection. iommufd acts as a record keeper for
accesses and returns the actual struct pages back to the caller to use
however they need. eg with kmap or the DMA API.

Each caller must create a struct iommufd_access with
iommufd_access_create(), similar to how iommufd_device_bind() works. Using
this struct the caller can access blocks of IOVA using
iommufd_access_pin_pages() or iommufd_access_rw().

Callers must provide a callback that immediately unpins any IOVA being
used within a range. This happens if userspace unmaps the IOVA under the
pin.

The implementation forwards the access requests directly to the iopt
infrastructure that manages the iopt_pages_access.

Link: https://lore.kernel.org/r/14-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c          | 316 ++++++++++++++++++++++++
 drivers/iommu/iommufd/io_pagetable.c    |   8 +-
 drivers/iommu/iommufd/iommufd_private.h |  10 +
 drivers/iommu/iommufd/main.c            |   3 +
 include/linux/iommufd.h                 |  43 +++-
 5 files changed, 377 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 67cd00b4d9265..06b6894b77062 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -6,6 +6,7 @@
 #include <linux/iommu.h>
 #include <linux/irqdomain.h>
 
+#include "io_pagetable.h"
 #include "iommufd_private.h"
 
 static bool allow_unsafe_interrupts;
@@ -417,3 +418,318 @@ void iommufd_device_detach(struct iommufd_device *idev)
 	refcount_dec(&idev->obj.users);
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_device_detach, IOMMUFD);
+
+void iommufd_access_destroy_object(struct iommufd_object *obj)
+{
+	struct iommufd_access *access =
+		container_of(obj, struct iommufd_access, obj);
+
+	iopt_remove_access(&access->ioas->iopt, access);
+	iommufd_ctx_put(access->ictx);
+	refcount_dec(&access->ioas->obj.users);
+}
+
+/**
+ * iommufd_access_create - Create an iommufd_access
+ * @ictx: iommufd file descriptor
+ * @ioas_id: ID for a IOMMUFD_OBJ_IOAS
+ * @ops: Driver's ops to associate with the access
+ * @data: Opaque data to pass into ops functions
+ *
+ * An iommufd_access allows a driver to read/write to the IOAS without using
+ * DMA. The underlying CPU memory can be accessed using the
+ * iommufd_access_pin_pages() or iommufd_access_rw() functions.
+ *
+ * The provided ops are required to use iommufd_access_pin_pages().
+ */
+struct iommufd_access *
+iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
+		      const struct iommufd_access_ops *ops, void *data)
+{
+	struct iommufd_access *access;
+	struct iommufd_object *obj;
+	int rc;
+
+	/*
+	 * There is no uAPI for the access object, but to keep things symmetric
+	 * use the object infrastructure anyhow.
+	 */
+	access = iommufd_object_alloc(ictx, access, IOMMUFD_OBJ_ACCESS);
+	if (IS_ERR(access))
+		return access;
+
+	access->data = data;
+	access->ops = ops;
+
+	obj = iommufd_get_object(ictx, ioas_id, IOMMUFD_OBJ_IOAS);
+	if (IS_ERR(obj)) {
+		rc = PTR_ERR(obj);
+		goto out_abort;
+	}
+	access->ioas = container_of(obj, struct iommufd_ioas, obj);
+	iommufd_ref_to_users(obj);
+
+	if (ops->needs_pin_pages)
+		access->iova_alignment = PAGE_SIZE;
+	else
+		access->iova_alignment = 1;
+	rc = iopt_add_access(&access->ioas->iopt, access);
+	if (rc)
+		goto out_put_ioas;
+
+	/* The calling driver is a user until iommufd_access_destroy() */
+	refcount_inc(&access->obj.users);
+	access->ictx = ictx;
+	iommufd_ctx_get(ictx);
+	iommufd_object_finalize(ictx, &access->obj);
+	return access;
+out_put_ioas:
+	refcount_dec(&access->ioas->obj.users);
+out_abort:
+	iommufd_object_abort(ictx, &access->obj);
+	return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_create, IOMMUFD);
+
+/**
+ * iommufd_access_destroy - Destroy an iommufd_access
+ * @access: The access to destroy
+ *
+ * The caller must stop using the access before destroying it.
+ */
+void iommufd_access_destroy(struct iommufd_access *access)
+{
+	bool was_destroyed;
+
+	was_destroyed = iommufd_object_destroy_user(access->ictx, &access->obj);
+	WARN_ON(!was_destroyed);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_destroy, IOMMUFD);
+
+/**
+ * iommufd_access_notify_unmap - Notify users of an iopt to stop using it
+ * @iopt: iopt to work on
+ * @iova: Starting iova in the iopt
+ * @length: Number of bytes
+ *
+ * After this function returns there should be no users attached to the pages
+ * linked to this iopt that intersect with iova,length. Anyone that has attached
+ * a user through iopt_access_pages() needs to detach it through
+ * iommufd_access_unpin_pages() before this function returns.
+ *
+ * iommufd_access_destroy() will wait for any outstanding unmap callback to
+ * complete. Once iommufd_access_destroy() no unmap ops are running or will
+ * run in the future. Due to this a driver must not create locking that prevents
+ * unmap to complete while iommufd_access_destroy() is running.
+ */
+void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
+				 unsigned long length)
+{
+	struct iommufd_ioas *ioas =
+		container_of(iopt, struct iommufd_ioas, iopt);
+	struct iommufd_access *access;
+	unsigned long index;
+
+	xa_lock(&ioas->iopt.access_list);
+	xa_for_each(&ioas->iopt.access_list, index, access) {
+		if (!iommufd_lock_obj(&access->obj))
+			continue;
+		xa_unlock(&ioas->iopt.access_list);
+
+		access->ops->unmap(access->data, iova, length);
+
+		iommufd_put_object(&access->obj);
+		xa_lock(&ioas->iopt.access_list);
+	}
+	xa_unlock(&ioas->iopt.access_list);
+}
+
+/**
+ * iommufd_access_unpin_pages() - Undo iommufd_access_pin_pages
+ * @access: IOAS access to act on
+ * @iova: Starting IOVA
+ * @length: Number of bytes to access
+ *
+ * Return the struct page's. The caller must stop accessing them before calling
+ * this. The iova/length must exactly match the one provided to access_pages.
+ */
+void iommufd_access_unpin_pages(struct iommufd_access *access,
+				unsigned long iova, unsigned long length)
+{
+	struct io_pagetable *iopt = &access->ioas->iopt;
+	struct iopt_area_contig_iter iter;
+	unsigned long last_iova;
+	struct iopt_area *area;
+
+	if (WARN_ON(!length) ||
+	    WARN_ON(check_add_overflow(iova, length - 1, &last_iova)))
+		return;
+
+	down_read(&iopt->iova_rwsem);
+	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
+		iopt_area_remove_access(
+			area, iopt_area_iova_to_index(area, iter.cur_iova),
+			iopt_area_iova_to_index(
+				area,
+				min(last_iova, iopt_area_last_iova(area))));
+	up_read(&iopt->iova_rwsem);
+	WARN_ON(!iopt_area_contig_done(&iter));
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_unpin_pages, IOMMUFD);
+
+static bool iopt_area_contig_is_aligned(struct iopt_area_contig_iter *iter)
+{
+	if (iopt_area_start_byte(iter->area, iter->cur_iova) % PAGE_SIZE)
+		return false;
+
+	if (!iopt_area_contig_done(iter) &&
+	    (iopt_area_start_byte(iter->area, iopt_area_last_iova(iter->area)) %
+	     PAGE_SIZE) != (PAGE_SIZE - 1))
+		return false;
+	return true;
+}
+
+static bool check_area_prot(struct iopt_area *area, unsigned int flags)
+{
+	if (flags & IOMMUFD_ACCESS_RW_WRITE)
+		return area->iommu_prot & IOMMU_WRITE;
+	return area->iommu_prot & IOMMU_READ;
+}
+
+/**
+ * iommufd_access_pin_pages() - Return a list of pages under the iova
+ * @access: IOAS access to act on
+ * @iova: Starting IOVA
+ * @length: Number of bytes to access
+ * @out_pages: Output page list
+ * @flags: IOPMMUFD_ACCESS_RW_* flags
+ *
+ * Reads @length bytes starting at iova and returns the struct page * pointers.
+ * These can be kmap'd by the caller for CPU access.
+ *
+ * The caller must perform iommufd_access_unpin_pages() when done to balance
+ * this.
+ *
+ * This API always requires a page aligned iova. This happens naturally if the
+ * ioas alignment is >= PAGE_SIZE and the iova is PAGE_SIZE aligned. However
+ * smaller alignments have corner cases where this API can fail on otherwise
+ * aligned iova.
+ */
+int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
+			     unsigned long length, struct page **out_pages,
+			     unsigned int flags)
+{
+	struct io_pagetable *iopt = &access->ioas->iopt;
+	struct iopt_area_contig_iter iter;
+	unsigned long last_iova;
+	struct iopt_area *area;
+	int rc;
+
+	if (!length)
+		return -EINVAL;
+	if (check_add_overflow(iova, length - 1, &last_iova))
+		return -EOVERFLOW;
+
+	down_read(&iopt->iova_rwsem);
+	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
+		unsigned long last = min(last_iova, iopt_area_last_iova(area));
+		unsigned long last_index = iopt_area_iova_to_index(area, last);
+		unsigned long index =
+			iopt_area_iova_to_index(area, iter.cur_iova);
+
+		if (area->prevent_access ||
+		    !iopt_area_contig_is_aligned(&iter)) {
+			rc = -EINVAL;
+			goto err_remove;
+		}
+
+		if (!check_area_prot(area, flags)) {
+			rc = -EPERM;
+			goto err_remove;
+		}
+
+		rc = iopt_area_add_access(area, index, last_index, out_pages,
+					  flags);
+		if (rc)
+			goto err_remove;
+		out_pages += last_index - index + 1;
+	}
+	if (!iopt_area_contig_done(&iter)) {
+		rc = -ENOENT;
+		goto err_remove;
+	}
+
+	up_read(&iopt->iova_rwsem);
+	return 0;
+
+err_remove:
+	if (iova < iter.cur_iova) {
+		last_iova = iter.cur_iova - 1;
+		iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova)
+			iopt_area_remove_access(
+				area,
+				iopt_area_iova_to_index(area, iter.cur_iova),
+				iopt_area_iova_to_index(
+					area, min(last_iova,
+						  iopt_area_last_iova(area))));
+	}
+	up_read(&iopt->iova_rwsem);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_pin_pages, IOMMUFD);
+
+/**
+ * iommufd_access_rw - Read or write data under the iova
+ * @access: IOAS access to act on
+ * @iova: Starting IOVA
+ * @data: Kernel buffer to copy to/from
+ * @length: Number of bytes to access
+ * @flags: IOMMUFD_ACCESS_RW_* flags
+ *
+ * Copy kernel to/from data into the range given by IOVA/length. If flags
+ * indicates IOMMUFD_ACCESS_RW_KTHREAD then a large copy can be optimized
+ * by changing it into copy_to/from_user().
+ */
+int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
+		      void *data, size_t length, unsigned int flags)
+{
+	struct io_pagetable *iopt = &access->ioas->iopt;
+	struct iopt_area_contig_iter iter;
+	struct iopt_area *area;
+	unsigned long last_iova;
+	int rc;
+
+	if (!length)
+		return -EINVAL;
+	if (check_add_overflow(iova, length - 1, &last_iova))
+		return -EOVERFLOW;
+
+	down_read(&iopt->iova_rwsem);
+	iopt_for_each_contig_area(&iter, area, iopt, iova, last_iova) {
+		unsigned long last = min(last_iova, iopt_area_last_iova(area));
+		unsigned long bytes = (last - iter.cur_iova) + 1;
+
+		if (area->prevent_access) {
+			rc = -EINVAL;
+			goto err_out;
+		}
+
+		if (!check_area_prot(area, flags)) {
+			rc = -EPERM;
+			goto err_out;
+		}
+
+		rc = iopt_pages_rw_access(
+			area->pages, iopt_area_start_byte(area, iter.cur_iova),
+			data, bytes, flags);
+		if (rc)
+			goto err_out;
+		data += bytes;
+	}
+	if (!iopt_area_contig_done(&iter))
+		rc = -ENOENT;
+err_out:
+	up_read(&iopt->iova_rwsem);
+	return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 756d347948f0e..4f4a9d9aac570 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -458,6 +458,7 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
 	 * is NULL. This prevents domain attach/detatch from running
 	 * concurrently with cleaning up the area.
 	 */
+again:
 	down_read(&iopt->domains_rwsem);
 	down_write(&iopt->iova_rwsem);
 	while ((area = iopt_area_iter_first(iopt, start, last))) {
@@ -486,8 +487,11 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start,
 			area->prevent_access = true;
 			up_write(&iopt->iova_rwsem);
 			up_read(&iopt->domains_rwsem);
-			/* Later patch calls back to drivers to unmap */
-			return -EBUSY;
+			iommufd_access_notify_unmap(iopt, area_first,
+						    iopt_area_length(area));
+			if (WARN_ON(READ_ONCE(area->num_accesses)))
+				return -EDEADLOCK;
+			goto again;
 		}
 
 		pages = area->pages;
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 72a0c805be233..40302cc0da360 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -65,6 +65,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
 		    unsigned long length, unsigned long *unmapped);
 int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
 
+void iommufd_access_notify_unmap(struct io_pagetable *iopt, unsigned long iova,
+				 unsigned long length);
 int iopt_table_add_domain(struct io_pagetable *iopt,
 			  struct iommu_domain *domain);
 void iopt_table_remove_domain(struct io_pagetable *iopt,
@@ -106,6 +108,7 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_DEVICE,
 	IOMMUFD_OBJ_HW_PAGETABLE,
 	IOMMUFD_OBJ_IOAS,
+	IOMMUFD_OBJ_ACCESS,
 };
 
 /* Base struct for all objects with a userspace ID handle. */
@@ -246,6 +249,11 @@ void iommufd_hw_pagetable_destroy(struct iommufd_object *obj);
 void iommufd_device_destroy(struct iommufd_object *obj);
 
 struct iommufd_access {
+	struct iommufd_object obj;
+	struct iommufd_ctx *ictx;
+	struct iommufd_ioas *ioas;
+	const struct iommufd_access_ops *ops;
+	void *data;
 	unsigned long iova_alignment;
 	u32 iopt_access_list_id;
 };
@@ -253,4 +261,6 @@ struct iommufd_access {
 int iopt_add_access(struct io_pagetable *iopt, struct iommufd_access *access);
 void iopt_remove_access(struct io_pagetable *iopt,
 			struct iommufd_access *access);
+void iommufd_access_destroy_object(struct iommufd_object *obj);
+
 #endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index fe98912bab0e0..4153f6a202555 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -357,6 +357,9 @@ void iommufd_ctx_put(struct iommufd_ctx *ictx)
 EXPORT_SYMBOL_NS_GPL(iommufd_ctx_put, IOMMUFD);
 
 static const struct iommufd_object_ops iommufd_object_ops[] = {
+	[IOMMUFD_OBJ_ACCESS] = {
+		.destroy = iommufd_access_destroy_object,
+	},
 	[IOMMUFD_OBJ_DEVICE] = {
 		.destroy = iommufd_device_destroy,
 	},
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 185dff3eb32f1..46c481a26d791 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -9,10 +9,12 @@
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/err.h>
-#include <linux/device.h>
 
+struct device;
 struct iommufd_device;
+struct page;
 struct iommufd_ctx;
+struct iommufd_access;
 struct file;
 
 struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
@@ -22,6 +24,11 @@ void iommufd_device_unbind(struct iommufd_device *idev);
 int iommufd_device_attach(struct iommufd_device *idev, u32 *pt_id);
 void iommufd_device_detach(struct iommufd_device *idev);
 
+struct iommufd_access_ops {
+	u8 needs_pin_pages : 1;
+	void (*unmap)(void *data, unsigned long iova, unsigned long length);
+};
+
 enum {
 	IOMMUFD_ACCESS_RW_READ = 0,
 	IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
@@ -29,11 +36,24 @@ enum {
 	IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1,
 };
 
+struct iommufd_access *
+iommufd_access_create(struct iommufd_ctx *ictx, u32 ioas_id,
+		      const struct iommufd_access_ops *ops, void *data);
+void iommufd_access_destroy(struct iommufd_access *access);
+
 void iommufd_ctx_get(struct iommufd_ctx *ictx);
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
 struct iommufd_ctx *iommufd_ctx_from_file(struct file *file);
 void iommufd_ctx_put(struct iommufd_ctx *ictx);
+
+int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
+			     unsigned long length, struct page **out_pages,
+			     unsigned int flags);
+void iommufd_access_unpin_pages(struct iommufd_access *access,
+				unsigned long iova, unsigned long length);
+int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
+		      void *data, size_t len, unsigned int flags);
 #else /* !CONFIG_IOMMUFD */
 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 {
@@ -43,5 +63,26 @@ static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 static inline void iommufd_ctx_put(struct iommufd_ctx *ictx)
 {
 }
+
+static inline int iommufd_access_pin_pages(struct iommufd_access *access,
+					   unsigned long iova,
+					   unsigned long length,
+					   struct page **out_pages,
+					   unsigned int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void iommufd_access_unpin_pages(struct iommufd_access *access,
+					      unsigned long iova,
+					      unsigned long length)
+{
+}
+
+static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
+		      void *data, size_t len, unsigned int flags)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_IOMMUFD */
 #endif
-- 
GitLab


From d624d6652a65ad4f47a58b8651a1ec1163bb81d3 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:38 -0400
Subject: [PATCH 3134/4122] iommufd: vfio container FD ioctl compatibility

iommufd can directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into io_pagetable operations.

A userspace application can test against iommufd and confirm compatibility
then simply make a small change to open /dev/iommu instead of
/dev/vfio/vfio.

For testing purposes /dev/vfio/vfio can be symlinked to /dev/iommu and
then all applications will use the compatibility path with no code
changes. A later series allows /dev/vfio/vfio to be directly provided by
iommufd, which allows the rlimit mode to work the same as well.

This series just provides the iommufd side of compatibility. Actually
linking this to VFIO_SET_CONTAINER is a followup series, with a link in
the cover letter.

Internally the compatibility API uses a normal IOAS object that, like
vfio, is automatically allocated when the first device is
attached.

Userspace can also query or set this IOAS object directly using the
IOMMU_VFIO_IOAS ioctl. This allows mixing and matching new iommufd only
features while still using the VFIO style map/unmap ioctls.

While this is enough to operate qemu, it has a few differences:

 - Resource limits rely on memory cgroups to bound what userspace can do
   instead of the module parameter dma_entry_limit.

 - VFIO P2P is not implemented. The DMABUF patches for vfio are a start at
   a solution where iommufd would import a special DMABUF. This is to avoid
   further propogating the follow_pfn() security problem.

 - A full audit for pedantic compatibility details (eg errnos, etc) has
   not yet been done

 - powerpc SPAPR is left out, as it is not connected to the iommu_domain
   framework. It seems interest in SPAPR is minimal as it is currently
   non-working in v6.1-rc1. They will have to convert to the iommu
   subsystem framework to enjoy iommfd.

The following are not going to be implemented and we expect to remove them
from VFIO type1:

 - SW access 'dirty tracking'. As discussed in the cover letter this will
   be done in VFIO.

 - VFIO_TYPE1_NESTING_IOMMU
    https://lore.kernel.org/all/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/

 - VFIO_DMA_MAP_FLAG_VADDR
    https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/

Link: https://lore.kernel.org/r/15-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Makefile          |   3 +-
 drivers/iommu/iommufd/iommufd_private.h |   6 +
 drivers/iommu/iommufd/main.c            |  16 +-
 drivers/iommu/iommufd/vfio_compat.c     | 472 ++++++++++++++++++++++++
 include/linux/iommufd.h                 |   7 +
 include/uapi/linux/iommufd.h            |  36 ++
 6 files changed, 534 insertions(+), 6 deletions(-)
 create mode 100644 drivers/iommu/iommufd/vfio_compat.c

diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index ca28a135b9675..2fdff04000b32 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -5,6 +5,7 @@ iommufd-y := \
 	io_pagetable.o \
 	ioas.o \
 	main.o \
-	pages.o
+	pages.o \
+	vfio_compat.o
 
 obj-$(CONFIG_IOMMUFD) += iommufd.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 40302cc0da360..8fe5f162ccbcf 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -18,6 +18,7 @@ struct iommufd_ctx {
 	struct xarray objects;
 
 	u8 account_mode;
+	struct iommufd_ioas *vfio_ioas;
 };
 
 /*
@@ -92,6 +93,9 @@ struct iommufd_ucmd {
 	void *cmd;
 };
 
+int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
+		       unsigned long arg);
+
 /* Copy the response in ucmd->cmd back to userspace. */
 static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
 				       size_t cmd_len)
@@ -222,6 +226,8 @@ int iommufd_ioas_option(struct iommufd_ucmd *ucmd);
 int iommufd_option_rlimit_mode(struct iommu_option *cmd,
 			       struct iommufd_ctx *ictx);
 
+int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
+
 /*
  * A HW pagetable is called an iommu_domain inside the kernel. This user object
  * allows directly creating and inspecting the domains. Domains that have kernel
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 4153f6a202555..5cf69c4d591de 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -133,6 +133,8 @@ bool iommufd_object_destroy_user(struct iommufd_ctx *ictx,
 		return false;
 	}
 	__xa_erase(&ictx->objects, obj->id);
+	if (ictx->vfio_ioas && &ictx->vfio_ioas->obj == obj)
+		ictx->vfio_ioas = NULL;
 	xa_unlock(&ictx->objects);
 	up_write(&obj->destroy_rwsem);
 
@@ -271,27 +273,31 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 length),
 	IOCTL_OP(IOMMU_OPTION, iommufd_option, struct iommu_option,
 		 val64),
+	IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
+		 __reserved),
 };
 
 static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
 			       unsigned long arg)
 {
+	struct iommufd_ctx *ictx = filp->private_data;
 	const struct iommufd_ioctl_op *op;
 	struct iommufd_ucmd ucmd = {};
 	union ucmd_buffer buf;
 	unsigned int nr;
 	int ret;
 
-	ucmd.ictx = filp->private_data;
+	nr = _IOC_NR(cmd);
+	if (nr < IOMMUFD_CMD_BASE ||
+	    (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops))
+		return iommufd_vfio_ioctl(ictx, cmd, arg);
+
+	ucmd.ictx = ictx;
 	ucmd.ubuffer = (void __user *)arg;
 	ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
 	if (ret)
 		return ret;
 
-	nr = _IOC_NR(cmd);
-	if (nr < IOMMUFD_CMD_BASE ||
-	    (nr - IOMMUFD_CMD_BASE) >= ARRAY_SIZE(iommufd_ioctl_ops))
-		return -ENOIOCTLCMD;
 	op = &iommufd_ioctl_ops[nr - IOMMUFD_CMD_BASE];
 	if (op->ioctl_num != cmd)
 		return -ENOIOCTLCMD;
diff --git a/drivers/iommu/iommufd/vfio_compat.c b/drivers/iommu/iommufd/vfio_compat.c
new file mode 100644
index 0000000000000..3ceca0e8311c3
--- /dev/null
+++ b/drivers/iommu/iommufd/vfio_compat.c
@@ -0,0 +1,472 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/file.h>
+#include <linux/interval_tree.h>
+#include <linux/iommu.h>
+#include <linux/iommufd.h>
+#include <linux/slab.h>
+#include <linux/vfio.h>
+#include <uapi/linux/vfio.h>
+#include <uapi/linux/iommufd.h>
+
+#include "iommufd_private.h"
+
+static struct iommufd_ioas *get_compat_ioas(struct iommufd_ctx *ictx)
+{
+	struct iommufd_ioas *ioas = ERR_PTR(-ENODEV);
+
+	xa_lock(&ictx->objects);
+	if (!ictx->vfio_ioas || !iommufd_lock_obj(&ictx->vfio_ioas->obj))
+		goto out_unlock;
+	ioas = ictx->vfio_ioas;
+out_unlock:
+	xa_unlock(&ictx->objects);
+	return ioas;
+}
+
+/**
+ * iommufd_vfio_compat_ioas_id - Return the IOAS ID that vfio should use
+ * @ictx: Context to operate on
+ * @out_ioas_id: The ioas_id the caller should use
+ *
+ * The compatibility IOAS is the IOAS that the vfio compatibility ioctls operate
+ * on since they do not have an IOAS ID input in their ABI. Only attaching a
+ * group should cause a default creation of the internal ioas, this returns the
+ * existing ioas if it has already been assigned somehow.
+ */
+int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id)
+{
+	struct iommufd_ioas *ioas = NULL;
+	struct iommufd_ioas *out_ioas;
+
+	ioas = iommufd_ioas_alloc(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	xa_lock(&ictx->objects);
+	if (ictx->vfio_ioas && iommufd_lock_obj(&ictx->vfio_ioas->obj))
+		out_ioas = ictx->vfio_ioas;
+	else {
+		out_ioas = ioas;
+		ictx->vfio_ioas = ioas;
+	}
+	xa_unlock(&ictx->objects);
+
+	*out_ioas_id = out_ioas->obj.id;
+	if (out_ioas != ioas) {
+		iommufd_put_object(&out_ioas->obj);
+		iommufd_object_abort(ictx, &ioas->obj);
+		return 0;
+	}
+	/*
+	 * An automatically created compat IOAS is treated as a userspace
+	 * created object. Userspace can learn the ID via IOMMU_VFIO_IOAS_GET,
+	 * and if not manually destroyed it will be destroyed automatically
+	 * at iommufd release.
+	 */
+	iommufd_object_finalize(ictx, &ioas->obj);
+	return 0;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_vfio_compat_ioas_id, IOMMUFD_VFIO);
+
+int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_vfio_ioas *cmd = ucmd->cmd;
+	struct iommufd_ioas *ioas;
+
+	if (cmd->__reserved)
+		return -EOPNOTSUPP;
+	switch (cmd->op) {
+	case IOMMU_VFIO_IOAS_GET:
+		ioas = get_compat_ioas(ucmd->ictx);
+		if (IS_ERR(ioas))
+			return PTR_ERR(ioas);
+		cmd->ioas_id = ioas->obj.id;
+		iommufd_put_object(&ioas->obj);
+		return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+	case IOMMU_VFIO_IOAS_SET:
+		ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+		if (IS_ERR(ioas))
+			return PTR_ERR(ioas);
+		xa_lock(&ucmd->ictx->objects);
+		ucmd->ictx->vfio_ioas = ioas;
+		xa_unlock(&ucmd->ictx->objects);
+		iommufd_put_object(&ioas->obj);
+		return 0;
+
+	case IOMMU_VFIO_IOAS_CLEAR:
+		xa_lock(&ucmd->ictx->objects);
+		ucmd->ictx->vfio_ioas = NULL;
+		xa_unlock(&ucmd->ictx->objects);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int iommufd_vfio_map_dma(struct iommufd_ctx *ictx, unsigned int cmd,
+				void __user *arg)
+{
+	u32 supported_flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+	size_t minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
+	struct vfio_iommu_type1_dma_map map;
+	int iommu_prot = IOMMU_CACHE;
+	struct iommufd_ioas *ioas;
+	unsigned long iova;
+	int rc;
+
+	if (copy_from_user(&map, arg, minsz))
+		return -EFAULT;
+
+	if (map.argsz < minsz || map.flags & ~supported_flags)
+		return -EINVAL;
+
+	if (map.flags & VFIO_DMA_MAP_FLAG_READ)
+		iommu_prot |= IOMMU_READ;
+	if (map.flags & VFIO_DMA_MAP_FLAG_WRITE)
+		iommu_prot |= IOMMU_WRITE;
+
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	/*
+	 * Maps created through the legacy interface always use VFIO compatible
+	 * rlimit accounting. If the user wishes to use the faster user based
+	 * rlimit accounting then they must use the new interface.
+	 */
+	iova = map.iova;
+	rc = iopt_map_user_pages(ictx, &ioas->iopt, &iova, u64_to_user_ptr(map.vaddr),
+				 map.size, iommu_prot, 0);
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static int iommufd_vfio_unmap_dma(struct iommufd_ctx *ictx, unsigned int cmd,
+				  void __user *arg)
+{
+	size_t minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, size);
+	/*
+	 * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP is obsoleted by the new
+	 * dirty tracking direction:
+	 *  https://lore.kernel.org/kvm/20220731125503.142683-1-yishaih@nvidia.com/
+	 *  https://lore.kernel.org/kvm/20220428210933.3583-1-joao.m.martins@oracle.com/
+	 */
+	u32 supported_flags = VFIO_DMA_UNMAP_FLAG_ALL;
+	struct vfio_iommu_type1_dma_unmap unmap;
+	unsigned long unmapped = 0;
+	struct iommufd_ioas *ioas;
+	int rc;
+
+	if (copy_from_user(&unmap, arg, minsz))
+		return -EFAULT;
+
+	if (unmap.argsz < minsz || unmap.flags & ~supported_flags)
+		return -EINVAL;
+
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	if (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+		if (unmap.iova != 0 || unmap.size != 0) {
+			rc = -EINVAL;
+			goto err_put;
+		}
+		rc = iopt_unmap_all(&ioas->iopt, &unmapped);
+	} else {
+		if (READ_ONCE(ioas->iopt.disable_large_pages)) {
+			/*
+			 * Create cuts at the start and last of the requested
+			 * range. If the start IOVA is 0 then it doesn't need to
+			 * be cut.
+			 */
+			unsigned long iovas[] = { unmap.iova + unmap.size - 1,
+						  unmap.iova - 1 };
+
+			rc = iopt_cut_iova(&ioas->iopt, iovas,
+					   unmap.iova ? 2 : 1);
+			if (rc)
+				goto err_put;
+		}
+		rc = iopt_unmap_iova(&ioas->iopt, unmap.iova, unmap.size,
+				     &unmapped);
+	}
+	unmap.size = unmapped;
+	if (copy_to_user(arg, &unmap, minsz))
+		rc = -EFAULT;
+
+err_put:
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static int iommufd_vfio_cc_iommu(struct iommufd_ctx *ictx)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	struct iommufd_ioas *ioas;
+	int rc = 1;
+
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	mutex_lock(&ioas->mutex);
+	list_for_each_entry(hwpt, &ioas->hwpt_list, hwpt_item) {
+		if (!hwpt->enforce_cache_coherency) {
+			rc = 0;
+			break;
+		}
+	}
+	mutex_unlock(&ioas->mutex);
+
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static int iommufd_vfio_check_extension(struct iommufd_ctx *ictx,
+					unsigned long type)
+{
+	switch (type) {
+	case VFIO_TYPE1_IOMMU:
+	case VFIO_TYPE1v2_IOMMU:
+	case VFIO_UNMAP_ALL:
+		return 1;
+
+	case VFIO_DMA_CC_IOMMU:
+		return iommufd_vfio_cc_iommu(ictx);
+
+	/*
+	 * This is obsolete, and to be removed from VFIO. It was an incomplete
+	 * idea that got merged.
+	 * https://lore.kernel.org/kvm/0-v1-0093c9b0e345+19-vfio_no_nesting_jgg@nvidia.com/
+	 */
+	case VFIO_TYPE1_NESTING_IOMMU:
+		return 0;
+
+	/*
+	 * VFIO_DMA_MAP_FLAG_VADDR
+	 * https://lore.kernel.org/kvm/1611939252-7240-1-git-send-email-steven.sistare@oracle.com/
+	 * https://lore.kernel.org/all/Yz777bJZjTyLrHEQ@nvidia.com/
+	 *
+	 * It is hard to see how this could be implemented safely.
+	 */
+	case VFIO_UPDATE_VADDR:
+	default:
+		return 0;
+	}
+}
+
+static int iommufd_vfio_set_iommu(struct iommufd_ctx *ictx, unsigned long type)
+{
+	struct iommufd_ioas *ioas = NULL;
+	int rc = 0;
+
+	if (type != VFIO_TYPE1_IOMMU && type != VFIO_TYPE1v2_IOMMU)
+		return -EINVAL;
+
+	/* VFIO fails the set_iommu if there is no group */
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	/*
+	 * The difference between TYPE1 and TYPE1v2 is the ability to unmap in
+	 * the middle of mapped ranges. This is complicated by huge page support
+	 * which creates single large IOPTEs that cannot be split by the iommu
+	 * driver. TYPE1 is very old at this point and likely nothing uses it,
+	 * however it is simple enough to emulate by simply disabling the
+	 * problematic large IOPTEs. Then we can safely unmap within any range.
+	 */
+	if (type == VFIO_TYPE1_IOMMU)
+		rc = iopt_disable_large_pages(&ioas->iopt);
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+static unsigned long iommufd_get_pagesizes(struct iommufd_ioas *ioas)
+{
+	struct io_pagetable *iopt = &ioas->iopt;
+	unsigned long pgsize_bitmap = ULONG_MAX;
+	struct iommu_domain *domain;
+	unsigned long index;
+
+	down_read(&iopt->domains_rwsem);
+	xa_for_each(&iopt->domains, index, domain)
+		pgsize_bitmap &= domain->pgsize_bitmap;
+
+	/* See vfio_update_pgsize_bitmap() */
+	if (pgsize_bitmap & ~PAGE_MASK) {
+		pgsize_bitmap &= PAGE_MASK;
+		pgsize_bitmap |= PAGE_SIZE;
+	}
+	pgsize_bitmap = max(pgsize_bitmap, ioas->iopt.iova_alignment);
+	up_read(&iopt->domains_rwsem);
+	return pgsize_bitmap;
+}
+
+static int iommufd_fill_cap_iova(struct iommufd_ioas *ioas,
+				 struct vfio_info_cap_header __user *cur,
+				 size_t avail)
+{
+	struct vfio_iommu_type1_info_cap_iova_range __user *ucap_iovas =
+		container_of(cur,
+			     struct vfio_iommu_type1_info_cap_iova_range __user,
+			     header);
+	struct vfio_iommu_type1_info_cap_iova_range cap_iovas = {
+		.header = {
+			.id = VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE,
+			.version = 1,
+		},
+	};
+	struct interval_tree_span_iter span;
+
+	interval_tree_for_each_span(&span, &ioas->iopt.reserved_itree, 0,
+				    ULONG_MAX) {
+		struct vfio_iova_range range;
+
+		if (!span.is_hole)
+			continue;
+		range.start = span.start_hole;
+		range.end = span.last_hole;
+		if (avail >= struct_size(&cap_iovas, iova_ranges,
+					 cap_iovas.nr_iovas + 1) &&
+		    copy_to_user(&ucap_iovas->iova_ranges[cap_iovas.nr_iovas],
+				 &range, sizeof(range)))
+			return -EFAULT;
+		cap_iovas.nr_iovas++;
+	}
+	if (avail >= struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas) &&
+	    copy_to_user(ucap_iovas, &cap_iovas, sizeof(cap_iovas)))
+		return -EFAULT;
+	return struct_size(&cap_iovas, iova_ranges, cap_iovas.nr_iovas);
+}
+
+static int iommufd_fill_cap_dma_avail(struct iommufd_ioas *ioas,
+				      struct vfio_info_cap_header __user *cur,
+				      size_t avail)
+{
+	struct vfio_iommu_type1_info_dma_avail cap_dma = {
+		.header = {
+			.id = VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL,
+			.version = 1,
+		},
+		/*
+		 * iommufd's limit is based on the cgroup's memory limit.
+		 * Normally vfio would return U16_MAX here, and provide a module
+		 * parameter to adjust it. Since S390 qemu userspace actually
+		 * pays attention and needs a value bigger than U16_MAX return
+		 * U32_MAX.
+		 */
+		.avail = U32_MAX,
+	};
+
+	if (avail >= sizeof(cap_dma) &&
+	    copy_to_user(cur, &cap_dma, sizeof(cap_dma)))
+		return -EFAULT;
+	return sizeof(cap_dma);
+}
+
+static int iommufd_vfio_iommu_get_info(struct iommufd_ctx *ictx,
+				       void __user *arg)
+{
+	typedef int (*fill_cap_fn)(struct iommufd_ioas *ioas,
+				   struct vfio_info_cap_header __user *cur,
+				   size_t avail);
+	static const fill_cap_fn fill_fns[] = {
+		iommufd_fill_cap_dma_avail,
+		iommufd_fill_cap_iova,
+	};
+	size_t minsz = offsetofend(struct vfio_iommu_type1_info, iova_pgsizes);
+	struct vfio_info_cap_header __user *last_cap = NULL;
+	struct vfio_iommu_type1_info info;
+	struct iommufd_ioas *ioas;
+	size_t total_cap_size;
+	int rc;
+	int i;
+
+	if (copy_from_user(&info, arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+	minsz = min_t(size_t, info.argsz, sizeof(info));
+
+	ioas = get_compat_ioas(ictx);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	info.flags = VFIO_IOMMU_INFO_PGSIZES;
+	info.iova_pgsizes = iommufd_get_pagesizes(ioas);
+	info.cap_offset = 0;
+
+	down_read(&ioas->iopt.iova_rwsem);
+	total_cap_size = sizeof(info);
+	for (i = 0; i != ARRAY_SIZE(fill_fns); i++) {
+		int cap_size;
+
+		if (info.argsz > total_cap_size)
+			cap_size = fill_fns[i](ioas, arg + total_cap_size,
+					       info.argsz - total_cap_size);
+		else
+			cap_size = fill_fns[i](ioas, NULL, 0);
+		if (cap_size < 0) {
+			rc = cap_size;
+			goto out_put;
+		}
+		if (last_cap && info.argsz >= total_cap_size &&
+		    put_user(total_cap_size, &last_cap->next)) {
+			rc = -EFAULT;
+			goto out_put;
+		}
+		last_cap = arg + total_cap_size;
+		total_cap_size += cap_size;
+	}
+
+	/*
+	 * If the user did not provide enough space then only some caps are
+	 * returned and the argsz will be updated to the correct amount to get
+	 * all caps.
+	 */
+	if (info.argsz >= total_cap_size)
+		info.cap_offset = sizeof(info);
+	info.argsz = total_cap_size;
+	info.flags |= VFIO_IOMMU_INFO_CAPS;
+	if (copy_to_user(arg, &info, minsz)) {
+		rc = -EFAULT;
+		goto out_put;
+	}
+	rc = 0;
+
+out_put:
+	up_read(&ioas->iopt.iova_rwsem);
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+int iommufd_vfio_ioctl(struct iommufd_ctx *ictx, unsigned int cmd,
+		       unsigned long arg)
+{
+	void __user *uarg = (void __user *)arg;
+
+	switch (cmd) {
+	case VFIO_GET_API_VERSION:
+		return VFIO_API_VERSION;
+	case VFIO_SET_IOMMU:
+		return iommufd_vfio_set_iommu(ictx, arg);
+	case VFIO_CHECK_EXTENSION:
+		return iommufd_vfio_check_extension(ictx, arg);
+	case VFIO_IOMMU_GET_INFO:
+		return iommufd_vfio_iommu_get_info(ictx, uarg);
+	case VFIO_IOMMU_MAP_DMA:
+		return iommufd_vfio_map_dma(ictx, cmd, uarg);
+	case VFIO_IOMMU_UNMAP_DMA:
+		return iommufd_vfio_unmap_dma(ictx, cmd, uarg);
+	case VFIO_IOMMU_DIRTY_PAGES:
+	default:
+		return -ENOIOCTLCMD;
+	}
+	return -ENOIOCTLCMD;
+}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 46c481a26d791..84af9a239769a 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -54,6 +54,7 @@ void iommufd_access_unpin_pages(struct iommufd_access *access,
 				unsigned long iova, unsigned long length);
 int iommufd_access_rw(struct iommufd_access *access, unsigned long iova,
 		      void *data, size_t len, unsigned int flags);
+int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx, u32 *out_ioas_id);
 #else /* !CONFIG_IOMMUFD */
 static inline struct iommufd_ctx *iommufd_ctx_from_file(struct file *file)
 {
@@ -84,5 +85,11 @@ static inline int iommufd_access_rw(struct iommufd_access *access, unsigned long
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int iommufd_vfio_compat_ioas_id(struct iommufd_ctx *ictx,
+					      u32 *out_ioas_id)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* CONFIG_IOMMUFD */
 #endif
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 30cc5c5e2b347..98ebba80cfa1f 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -44,6 +44,7 @@ enum {
 	IOMMUFD_CMD_IOAS_MAP,
 	IOMMUFD_CMD_IOAS_UNMAP,
 	IOMMUFD_CMD_OPTION,
+	IOMMUFD_CMD_VFIO_IOAS,
 };
 
 /**
@@ -308,4 +309,39 @@ struct iommu_option {
 	__aligned_u64 val64;
 };
 #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION)
+
+/**
+ * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls
+ * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS
+ * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS
+ * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility
+ */
+enum iommufd_vfio_ioas_op {
+	IOMMU_VFIO_IOAS_GET = 0,
+	IOMMU_VFIO_IOAS_SET = 1,
+	IOMMU_VFIO_IOAS_CLEAR = 2,
+};
+
+/**
+ * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS)
+ * @size: sizeof(struct iommu_vfio_ioas)
+ * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set
+ *           For IOMMU_VFIO_IOAS_GET will output the IOAS ID
+ * @op: One of enum iommufd_vfio_ioas_op
+ * @__reserved: Must be 0
+ *
+ * The VFIO compatibility support uses a single ioas because VFIO APIs do not
+ * support the ID field. Set or Get the IOAS that VFIO compatibility will use.
+ * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the
+ * compatibility ioas, either by taking what is already set, or auto creating
+ * one. From then on VFIO will continue to use that ioas and is not effected by
+ * this ioctl. SET or CLEAR does not destroy any auto-created IOAS.
+ */
+struct iommu_vfio_ioas {
+	__u32 size;
+	__u32 ioas_id;
+	__u16 op;
+	__u16 __reserved;
+};
+#define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS)
 #endif
-- 
GitLab


From f4b20bb34c83dceade5470288f48f94ce3598ada Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:39 -0400
Subject: [PATCH 3135/4122] iommufd: Add kernel support for testing iommufd

Provide a mock kernel module for the iommu_domain that allows it to run
without any HW and the mocking provides a way to directly validate that
the PFNs loaded into the iommu_domain are correct. This exposes the access
kAPI toward userspace to allow userspace to explore the functionality of
pages.c and io_pagetable.c

The mock also simulates the rare case of PAGE_SIZE > iommu page size as
the mock will operate at a 2K iommu page size. This allows exercising all
of the calculations to support this mismatch.

This is also intended to support syzkaller exploring the same space.

However, it is an unusually invasive config option to enable all of
this. The config option should not be enabled in a production kernel.

Link: https://lore.kernel.org/r/16-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> # s390
Tested-by: Eric Auger <eric.auger@redhat.com> # aarch64
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Kconfig           |  12 +
 drivers/iommu/iommufd/Makefile          |   2 +
 drivers/iommu/iommufd/device.c          |  38 ++
 drivers/iommu/iommufd/ioas.c            |   3 +
 drivers/iommu/iommufd/iommufd_private.h |  35 +
 drivers/iommu/iommufd/iommufd_test.h    |  93 +++
 drivers/iommu/iommufd/main.c            |  14 +
 drivers/iommu/iommufd/pages.c           |   8 +
 drivers/iommu/iommufd/selftest.c        | 853 ++++++++++++++++++++++++
 include/linux/iommufd.h                 |   3 +
 10 files changed, 1061 insertions(+)
 create mode 100644 drivers/iommu/iommufd/iommufd_test.h
 create mode 100644 drivers/iommu/iommufd/selftest.c

diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 164812084a675..871244f2443fb 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -10,3 +10,15 @@ config IOMMUFD
 	  it relates to managing IO page tables that point at user space memory.
 
 	  If you don't know what to do here, say N.
+
+if IOMMUFD
+config IOMMUFD_TEST
+	bool "IOMMU Userspace API Test support"
+	depends on DEBUG_KERNEL
+	depends on FAULT_INJECTION
+	depends on RUNTIME_TESTING_MENU
+	default n
+	help
+	  This is dangerous, do not enable unless running
+	  tools/testing/selftests/iommu
+endif
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index 2fdff04000b32..8aeba81800c51 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -8,4 +8,6 @@ iommufd-y := \
 	pages.o \
 	vfio_compat.o
 
+iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
+
 obj-$(CONFIG_IOMMUFD) += iommufd.o
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 06b6894b77062..67ce36152e8ab 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -733,3 +733,41 @@ err_out:
 	return rc;
 }
 EXPORT_SYMBOL_NS_GPL(iommufd_access_rw, IOMMUFD);
+
+#ifdef CONFIG_IOMMUFD_TEST
+/*
+ * Creating a real iommufd_device is too hard, bypass creating a iommufd_device
+ * and go directly to attaching a domain.
+ */
+struct iommufd_hw_pagetable *
+iommufd_device_selftest_attach(struct iommufd_ctx *ictx,
+			       struct iommufd_ioas *ioas,
+			       struct device *mock_dev)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	int rc;
+
+	hwpt = iommufd_hw_pagetable_alloc(ictx, ioas, mock_dev);
+	if (IS_ERR(hwpt))
+		return hwpt;
+
+	rc = iopt_table_add_domain(&hwpt->ioas->iopt, hwpt->domain);
+	if (rc)
+		goto out_hwpt;
+
+	refcount_inc(&hwpt->obj.users);
+	iommufd_object_finalize(ictx, &hwpt->obj);
+	return hwpt;
+
+out_hwpt:
+	iommufd_object_abort_and_destroy(ictx, &hwpt->obj);
+	return ERR_PTR(rc);
+}
+
+void iommufd_device_selftest_detach(struct iommufd_ctx *ictx,
+				    struct iommufd_hw_pagetable *hwpt)
+{
+	iopt_table_remove_domain(&hwpt->ioas->iopt, hwpt->domain);
+	refcount_dec(&hwpt->obj.users);
+}
+#endif
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
index 302779b33bd4a..31577e9d434f8 100644
--- a/drivers/iommu/iommufd/ioas.c
+++ b/drivers/iommu/iommufd/ioas.c
@@ -242,6 +242,9 @@ int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
 	unsigned long iova;
 	int rc;
 
+	iommufd_test_syz_conv_iova_id(ucmd, cmd->src_ioas_id, &cmd->src_iova,
+				      &cmd->flags);
+
 	if ((cmd->flags &
 	     ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
 	       IOMMU_IOAS_MAP_READABLE)))
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 8fe5f162ccbcf..222e86591f8ac 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -113,6 +113,9 @@ enum iommufd_object_type {
 	IOMMUFD_OBJ_HW_PAGETABLE,
 	IOMMUFD_OBJ_IOAS,
 	IOMMUFD_OBJ_ACCESS,
+#ifdef CONFIG_IOMMUFD_TEST
+	IOMMUFD_OBJ_SELFTEST,
+#endif
 };
 
 /* Base struct for all objects with a userspace ID handle. */
@@ -269,4 +272,36 @@ void iopt_remove_access(struct io_pagetable *iopt,
 			struct iommufd_access *access);
 void iommufd_access_destroy_object(struct iommufd_object *obj);
 
+#ifdef CONFIG_IOMMUFD_TEST
+struct iommufd_hw_pagetable *
+iommufd_device_selftest_attach(struct iommufd_ctx *ictx,
+			       struct iommufd_ioas *ioas,
+			       struct device *mock_dev);
+void iommufd_device_selftest_detach(struct iommufd_ctx *ictx,
+				    struct iommufd_hw_pagetable *hwpt);
+int iommufd_test(struct iommufd_ucmd *ucmd);
+void iommufd_selftest_destroy(struct iommufd_object *obj);
+extern size_t iommufd_test_memory_limit;
+void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
+				   unsigned int ioas_id, u64 *iova, u32 *flags);
+bool iommufd_should_fail(void);
+void __init iommufd_test_init(void);
+void iommufd_test_exit(void);
+#else
+static inline void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
+						 unsigned int ioas_id,
+						 u64 *iova, u32 *flags)
+{
+}
+static inline bool iommufd_should_fail(void)
+{
+	return false;
+}
+static inline void __init iommufd_test_init(void)
+{
+}
+static inline void iommufd_test_exit(void)
+{
+}
+#endif
 #endif
diff --git a/drivers/iommu/iommufd/iommufd_test.h b/drivers/iommu/iommufd/iommufd_test.h
new file mode 100644
index 0000000000000..1d96a8f466fd2
--- /dev/null
+++ b/drivers/iommu/iommufd/iommufd_test.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ */
+#ifndef _UAPI_IOMMUFD_TEST_H
+#define _UAPI_IOMMUFD_TEST_H
+
+#include <linux/types.h>
+#include <linux/iommufd.h>
+
+enum {
+	IOMMU_TEST_OP_ADD_RESERVED = 1,
+	IOMMU_TEST_OP_MOCK_DOMAIN,
+	IOMMU_TEST_OP_MD_CHECK_MAP,
+	IOMMU_TEST_OP_MD_CHECK_REFS,
+	IOMMU_TEST_OP_CREATE_ACCESS,
+	IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
+	IOMMU_TEST_OP_ACCESS_PAGES,
+	IOMMU_TEST_OP_ACCESS_RW,
+	IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
+};
+
+enum {
+	MOCK_APERTURE_START = 1UL << 24,
+	MOCK_APERTURE_LAST = (1UL << 31) - 1,
+};
+
+enum {
+	MOCK_FLAGS_ACCESS_WRITE = 1 << 0,
+	MOCK_FLAGS_ACCESS_SYZ = 1 << 16,
+};
+
+enum {
+	MOCK_ACCESS_RW_WRITE = 1 << 0,
+	MOCK_ACCESS_RW_SLOW_PATH = 1 << 2,
+};
+
+enum {
+	MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES = 1 << 0,
+};
+
+struct iommu_test_cmd {
+	__u32 size;
+	__u32 op;
+	__u32 id;
+	__u32 __reserved;
+	union {
+		struct {
+			__aligned_u64 start;
+			__aligned_u64 length;
+		} add_reserved;
+		struct {
+			__u32 out_device_id;
+			__u32 out_hwpt_id;
+		} mock_domain;
+		struct {
+			__aligned_u64 iova;
+			__aligned_u64 length;
+			__aligned_u64 uptr;
+		} check_map;
+		struct {
+			__aligned_u64 length;
+			__aligned_u64 uptr;
+			__u32 refs;
+		} check_refs;
+		struct {
+			__u32 out_access_fd;
+			__u32 flags;
+		} create_access;
+		struct {
+			__u32 access_pages_id;
+		} destroy_access_pages;
+		struct {
+			__u32 flags;
+			__u32 out_access_pages_id;
+			__aligned_u64 iova;
+			__aligned_u64 length;
+			__aligned_u64 uptr;
+		} access_pages;
+		struct {
+			__aligned_u64 iova;
+			__aligned_u64 length;
+			__aligned_u64 uptr;
+			__u32 flags;
+		} access_rw;
+		struct {
+			__u32 limit;
+		} memory_limit;
+	};
+	__u32 last;
+};
+#define IOMMU_TEST_CMD _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE + 32)
+
+#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 5cf69c4d591de..7c8f40bc8d98d 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -19,6 +19,7 @@
 #include <linux/iommufd.h>
 
 #include "iommufd_private.h"
+#include "iommufd_test.h"
 
 struct iommufd_object_ops {
 	void (*destroy)(struct iommufd_object *obj);
@@ -239,6 +240,9 @@ union ucmd_buffer {
 	struct iommu_ioas_iova_ranges iova_ranges;
 	struct iommu_ioas_map map;
 	struct iommu_ioas_unmap unmap;
+#ifdef CONFIG_IOMMUFD_TEST
+	struct iommu_test_cmd test;
+#endif
 };
 
 struct iommufd_ioctl_op {
@@ -275,6 +279,9 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
 		 val64),
 	IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
 		 __reserved),
+#ifdef CONFIG_IOMMUFD_TEST
+	IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
+#endif
 };
 
 static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
@@ -375,6 +382,11 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
 	[IOMMUFD_OBJ_HW_PAGETABLE] = {
 		.destroy = iommufd_hw_pagetable_destroy,
 	},
+#ifdef CONFIG_IOMMUFD_TEST
+	[IOMMUFD_OBJ_SELFTEST] = {
+		.destroy = iommufd_selftest_destroy,
+	},
+#endif
 };
 
 static struct miscdevice iommu_misc_dev = {
@@ -392,11 +404,13 @@ static int __init iommufd_init(void)
 	ret = misc_register(&iommu_misc_dev);
 	if (ret)
 		return ret;
+	iommufd_test_init();
 	return 0;
 }
 
 static void __exit iommufd_exit(void)
 {
+	iommufd_test_exit();
 	misc_deregister(&iommu_misc_dev);
 }
 
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index bafeee9d73e8a..640331b8a0791 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -56,7 +56,11 @@
 #include "io_pagetable.h"
 #include "double_span.h"
 
+#ifndef CONFIG_IOMMUFD_TEST
 #define TEMP_MEMORY_LIMIT 65536
+#else
+#define TEMP_MEMORY_LIMIT iommufd_test_memory_limit
+#endif
 #define BATCH_BACKUP_SIZE 32
 
 /*
@@ -1756,6 +1760,10 @@ int iopt_pages_rw_access(struct iopt_pages *pages, unsigned long start_byte,
 	bool change_mm = current->mm != pages->source_mm;
 	int rc = 0;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    (flags & __IOMMUFD_ACCESS_RW_SLOW_PATH))
+		change_mm = true;
+
 	if ((flags & IOMMUFD_ACCESS_RW_WRITE) && !pages->writable)
 		return -EPERM;
 
diff --git a/drivers/iommu/iommufd/selftest.c b/drivers/iommu/iommufd/selftest.c
new file mode 100644
index 0000000000000..cfb5fe9a5e0ee
--- /dev/null
+++ b/drivers/iommu/iommufd/selftest.c
@@ -0,0 +1,853 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES.
+ *
+ * Kernel side components to support tools/testing/selftests/iommu
+ */
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/xarray.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/fault-inject.h>
+#include <uapi/linux/iommufd.h>
+
+#include "io_pagetable.h"
+#include "iommufd_private.h"
+#include "iommufd_test.h"
+
+static DECLARE_FAULT_ATTR(fail_iommufd);
+static struct dentry *dbgfs_root;
+
+size_t iommufd_test_memory_limit = 65536;
+
+enum {
+	MOCK_IO_PAGE_SIZE = PAGE_SIZE / 2,
+
+	/*
+	 * Like a real page table alignment requires the low bits of the address
+	 * to be zero. xarray also requires the high bit to be zero, so we store
+	 * the pfns shifted. The upper bits are used for metadata.
+	 */
+	MOCK_PFN_MASK = ULONG_MAX / MOCK_IO_PAGE_SIZE,
+
+	_MOCK_PFN_START = MOCK_PFN_MASK + 1,
+	MOCK_PFN_START_IOVA = _MOCK_PFN_START,
+	MOCK_PFN_LAST_IOVA = _MOCK_PFN_START,
+};
+
+/*
+ * Syzkaller has trouble randomizing the correct iova to use since it is linked
+ * to the map ioctl's output, and it has no ide about that. So, simplify things.
+ * In syzkaller mode the 64 bit IOVA is converted into an nth area and offset
+ * value. This has a much smaller randomization space and syzkaller can hit it.
+ */
+static unsigned long iommufd_test_syz_conv_iova(struct io_pagetable *iopt,
+						u64 *iova)
+{
+	struct syz_layout {
+		__u32 nth_area;
+		__u32 offset;
+	};
+	struct syz_layout *syz = (void *)iova;
+	unsigned int nth = syz->nth_area;
+	struct iopt_area *area;
+
+	down_read(&iopt->iova_rwsem);
+	for (area = iopt_area_iter_first(iopt, 0, ULONG_MAX); area;
+	     area = iopt_area_iter_next(area, 0, ULONG_MAX)) {
+		if (nth == 0) {
+			up_read(&iopt->iova_rwsem);
+			return iopt_area_iova(area) + syz->offset;
+		}
+		nth--;
+	}
+	up_read(&iopt->iova_rwsem);
+
+	return 0;
+}
+
+void iommufd_test_syz_conv_iova_id(struct iommufd_ucmd *ucmd,
+				   unsigned int ioas_id, u64 *iova, u32 *flags)
+{
+	struct iommufd_ioas *ioas;
+
+	if (!(*flags & MOCK_FLAGS_ACCESS_SYZ))
+		return;
+	*flags &= ~(u32)MOCK_FLAGS_ACCESS_SYZ;
+
+	ioas = iommufd_get_ioas(ucmd, ioas_id);
+	if (IS_ERR(ioas))
+		return;
+	*iova = iommufd_test_syz_conv_iova(&ioas->iopt, iova);
+	iommufd_put_object(&ioas->obj);
+}
+
+struct mock_iommu_domain {
+	struct iommu_domain domain;
+	struct xarray pfns;
+};
+
+enum selftest_obj_type {
+	TYPE_IDEV,
+};
+
+struct selftest_obj {
+	struct iommufd_object obj;
+	enum selftest_obj_type type;
+
+	union {
+		struct {
+			struct iommufd_hw_pagetable *hwpt;
+			struct iommufd_ctx *ictx;
+			struct device mock_dev;
+		} idev;
+	};
+};
+
+static struct iommu_domain *mock_domain_alloc(unsigned int iommu_domain_type)
+{
+	struct mock_iommu_domain *mock;
+
+	if (WARN_ON(iommu_domain_type != IOMMU_DOMAIN_UNMANAGED))
+		return NULL;
+
+	mock = kzalloc(sizeof(*mock), GFP_KERNEL);
+	if (!mock)
+		return NULL;
+	mock->domain.geometry.aperture_start = MOCK_APERTURE_START;
+	mock->domain.geometry.aperture_end = MOCK_APERTURE_LAST;
+	mock->domain.pgsize_bitmap = MOCK_IO_PAGE_SIZE;
+	xa_init(&mock->pfns);
+	return &mock->domain;
+}
+
+static void mock_domain_free(struct iommu_domain *domain)
+{
+	struct mock_iommu_domain *mock =
+		container_of(domain, struct mock_iommu_domain, domain);
+
+	WARN_ON(!xa_empty(&mock->pfns));
+	kfree(mock);
+}
+
+static int mock_domain_map_pages(struct iommu_domain *domain,
+				 unsigned long iova, phys_addr_t paddr,
+				 size_t pgsize, size_t pgcount, int prot,
+				 gfp_t gfp, size_t *mapped)
+{
+	struct mock_iommu_domain *mock =
+		container_of(domain, struct mock_iommu_domain, domain);
+	unsigned long flags = MOCK_PFN_START_IOVA;
+	unsigned long start_iova = iova;
+
+	/*
+	 * xarray does not reliably work with fault injection because it does a
+	 * retry allocation, so put our own failure point.
+	 */
+	if (iommufd_should_fail())
+		return -ENOENT;
+
+	WARN_ON(iova % MOCK_IO_PAGE_SIZE);
+	WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
+	for (; pgcount; pgcount--) {
+		size_t cur;
+
+		for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
+			void *old;
+
+			if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
+				flags = MOCK_PFN_LAST_IOVA;
+			old = xa_store(&mock->pfns, iova / MOCK_IO_PAGE_SIZE,
+				       xa_mk_value((paddr / MOCK_IO_PAGE_SIZE) |
+						   flags),
+				       gfp);
+			if (xa_is_err(old)) {
+				for (; start_iova != iova;
+				     start_iova += MOCK_IO_PAGE_SIZE)
+					xa_erase(&mock->pfns,
+						 start_iova /
+							 MOCK_IO_PAGE_SIZE);
+				return xa_err(old);
+			}
+			WARN_ON(old);
+			iova += MOCK_IO_PAGE_SIZE;
+			paddr += MOCK_IO_PAGE_SIZE;
+			*mapped += MOCK_IO_PAGE_SIZE;
+			flags = 0;
+		}
+	}
+	return 0;
+}
+
+static size_t mock_domain_unmap_pages(struct iommu_domain *domain,
+				      unsigned long iova, size_t pgsize,
+				      size_t pgcount,
+				      struct iommu_iotlb_gather *iotlb_gather)
+{
+	struct mock_iommu_domain *mock =
+		container_of(domain, struct mock_iommu_domain, domain);
+	bool first = true;
+	size_t ret = 0;
+	void *ent;
+
+	WARN_ON(iova % MOCK_IO_PAGE_SIZE);
+	WARN_ON(pgsize % MOCK_IO_PAGE_SIZE);
+
+	for (; pgcount; pgcount--) {
+		size_t cur;
+
+		for (cur = 0; cur != pgsize; cur += MOCK_IO_PAGE_SIZE) {
+			ent = xa_erase(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
+			WARN_ON(!ent);
+			/*
+			 * iommufd generates unmaps that must be a strict
+			 * superset of the map's performend So every starting
+			 * IOVA should have been an iova passed to map, and the
+			 *
+			 * First IOVA must be present and have been a first IOVA
+			 * passed to map_pages
+			 */
+			if (first) {
+				WARN_ON(!(xa_to_value(ent) &
+					  MOCK_PFN_START_IOVA));
+				first = false;
+			}
+			if (pgcount == 1 && cur + MOCK_IO_PAGE_SIZE == pgsize)
+				WARN_ON(!(xa_to_value(ent) &
+					  MOCK_PFN_LAST_IOVA));
+
+			iova += MOCK_IO_PAGE_SIZE;
+			ret += MOCK_IO_PAGE_SIZE;
+		}
+	}
+	return ret;
+}
+
+static phys_addr_t mock_domain_iova_to_phys(struct iommu_domain *domain,
+					    dma_addr_t iova)
+{
+	struct mock_iommu_domain *mock =
+		container_of(domain, struct mock_iommu_domain, domain);
+	void *ent;
+
+	WARN_ON(iova % MOCK_IO_PAGE_SIZE);
+	ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
+	WARN_ON(!ent);
+	return (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE;
+}
+
+static const struct iommu_ops mock_ops = {
+	.owner = THIS_MODULE,
+	.pgsize_bitmap = MOCK_IO_PAGE_SIZE,
+	.domain_alloc = mock_domain_alloc,
+	.default_domain_ops =
+		&(struct iommu_domain_ops){
+			.free = mock_domain_free,
+			.map_pages = mock_domain_map_pages,
+			.unmap_pages = mock_domain_unmap_pages,
+			.iova_to_phys = mock_domain_iova_to_phys,
+		},
+};
+
+static inline struct iommufd_hw_pagetable *
+get_md_pagetable(struct iommufd_ucmd *ucmd, u32 mockpt_id,
+		 struct mock_iommu_domain **mock)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	struct iommufd_object *obj;
+
+	obj = iommufd_get_object(ucmd->ictx, mockpt_id,
+				 IOMMUFD_OBJ_HW_PAGETABLE);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
+	hwpt = container_of(obj, struct iommufd_hw_pagetable, obj);
+	if (hwpt->domain->ops != mock_ops.default_domain_ops) {
+		iommufd_put_object(&hwpt->obj);
+		return ERR_PTR(-EINVAL);
+	}
+	*mock = container_of(hwpt->domain, struct mock_iommu_domain, domain);
+	return hwpt;
+}
+
+/* Create an hw_pagetable with the mock domain so we can test the domain ops */
+static int iommufd_test_mock_domain(struct iommufd_ucmd *ucmd,
+				    struct iommu_test_cmd *cmd)
+{
+	static struct bus_type mock_bus = { .iommu_ops = &mock_ops };
+	struct iommufd_hw_pagetable *hwpt;
+	struct selftest_obj *sobj;
+	struct iommufd_ioas *ioas;
+	int rc;
+
+	ioas = iommufd_get_ioas(ucmd, cmd->id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+
+	sobj = iommufd_object_alloc(ucmd->ictx, sobj, IOMMUFD_OBJ_SELFTEST);
+	if (IS_ERR(sobj)) {
+		rc = PTR_ERR(sobj);
+		goto out_ioas;
+	}
+	sobj->idev.ictx = ucmd->ictx;
+	sobj->type = TYPE_IDEV;
+	sobj->idev.mock_dev.bus = &mock_bus;
+
+	hwpt = iommufd_device_selftest_attach(ucmd->ictx, ioas,
+					      &sobj->idev.mock_dev);
+	if (IS_ERR(hwpt)) {
+		rc = PTR_ERR(hwpt);
+		goto out_sobj;
+	}
+	sobj->idev.hwpt = hwpt;
+
+	/* Userspace must destroy both of these IDs to destroy the object */
+	cmd->mock_domain.out_hwpt_id = hwpt->obj.id;
+	cmd->mock_domain.out_device_id = sobj->obj.id;
+	iommufd_object_finalize(ucmd->ictx, &sobj->obj);
+	iommufd_put_object(&ioas->obj);
+	return iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+
+out_sobj:
+	iommufd_object_abort(ucmd->ictx, &sobj->obj);
+out_ioas:
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+/* Add an additional reserved IOVA to the IOAS */
+static int iommufd_test_add_reserved(struct iommufd_ucmd *ucmd,
+				     unsigned int mockpt_id,
+				     unsigned long start, size_t length)
+{
+	struct iommufd_ioas *ioas;
+	int rc;
+
+	ioas = iommufd_get_ioas(ucmd, mockpt_id);
+	if (IS_ERR(ioas))
+		return PTR_ERR(ioas);
+	down_write(&ioas->iopt.iova_rwsem);
+	rc = iopt_reserve_iova(&ioas->iopt, start, start + length - 1, NULL);
+	up_write(&ioas->iopt.iova_rwsem);
+	iommufd_put_object(&ioas->obj);
+	return rc;
+}
+
+/* Check that every pfn under each iova matches the pfn under a user VA */
+static int iommufd_test_md_check_pa(struct iommufd_ucmd *ucmd,
+				    unsigned int mockpt_id, unsigned long iova,
+				    size_t length, void __user *uptr)
+{
+	struct iommufd_hw_pagetable *hwpt;
+	struct mock_iommu_domain *mock;
+	int rc;
+
+	if (iova % MOCK_IO_PAGE_SIZE || length % MOCK_IO_PAGE_SIZE ||
+	    (uintptr_t)uptr % MOCK_IO_PAGE_SIZE)
+		return -EINVAL;
+
+	hwpt = get_md_pagetable(ucmd, mockpt_id, &mock);
+	if (IS_ERR(hwpt))
+		return PTR_ERR(hwpt);
+
+	for (; length; length -= MOCK_IO_PAGE_SIZE) {
+		struct page *pages[1];
+		unsigned long pfn;
+		long npages;
+		void *ent;
+
+		npages = get_user_pages_fast((uintptr_t)uptr & PAGE_MASK, 1, 0,
+					     pages);
+		if (npages < 0) {
+			rc = npages;
+			goto out_put;
+		}
+		if (WARN_ON(npages != 1)) {
+			rc = -EFAULT;
+			goto out_put;
+		}
+		pfn = page_to_pfn(pages[0]);
+		put_page(pages[0]);
+
+		ent = xa_load(&mock->pfns, iova / MOCK_IO_PAGE_SIZE);
+		if (!ent ||
+		    (xa_to_value(ent) & MOCK_PFN_MASK) * MOCK_IO_PAGE_SIZE !=
+			    pfn * PAGE_SIZE + ((uintptr_t)uptr % PAGE_SIZE)) {
+			rc = -EINVAL;
+			goto out_put;
+		}
+		iova += MOCK_IO_PAGE_SIZE;
+		uptr += MOCK_IO_PAGE_SIZE;
+	}
+	rc = 0;
+
+out_put:
+	iommufd_put_object(&hwpt->obj);
+	return rc;
+}
+
+/* Check that the page ref count matches, to look for missing pin/unpins */
+static int iommufd_test_md_check_refs(struct iommufd_ucmd *ucmd,
+				      void __user *uptr, size_t length,
+				      unsigned int refs)
+{
+	if (length % PAGE_SIZE || (uintptr_t)uptr % PAGE_SIZE)
+		return -EINVAL;
+
+	for (; length; length -= PAGE_SIZE) {
+		struct page *pages[1];
+		long npages;
+
+		npages = get_user_pages_fast((uintptr_t)uptr, 1, 0, pages);
+		if (npages < 0)
+			return npages;
+		if (WARN_ON(npages != 1))
+			return -EFAULT;
+		if (!PageCompound(pages[0])) {
+			unsigned int count;
+
+			count = page_ref_count(pages[0]);
+			if (count / GUP_PIN_COUNTING_BIAS != refs) {
+				put_page(pages[0]);
+				return -EIO;
+			}
+		}
+		put_page(pages[0]);
+		uptr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+struct selftest_access {
+	struct iommufd_access *access;
+	struct file *file;
+	struct mutex lock;
+	struct list_head items;
+	unsigned int next_id;
+	bool destroying;
+};
+
+struct selftest_access_item {
+	struct list_head items_elm;
+	unsigned long iova;
+	size_t length;
+	unsigned int id;
+};
+
+static const struct file_operations iommfd_test_staccess_fops;
+
+static struct selftest_access *iommufd_access_get(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADFD);
+
+	if (file->f_op != &iommfd_test_staccess_fops) {
+		fput(file);
+		return ERR_PTR(-EBADFD);
+	}
+	return file->private_data;
+}
+
+static void iommufd_test_access_unmap(void *data, unsigned long iova,
+				      unsigned long length)
+{
+	unsigned long iova_last = iova + length - 1;
+	struct selftest_access *staccess = data;
+	struct selftest_access_item *item;
+	struct selftest_access_item *tmp;
+
+	mutex_lock(&staccess->lock);
+	list_for_each_entry_safe(item, tmp, &staccess->items, items_elm) {
+		if (iova > item->iova + item->length - 1 ||
+		    iova_last < item->iova)
+			continue;
+		list_del(&item->items_elm);
+		iommufd_access_unpin_pages(staccess->access, item->iova,
+					   item->length);
+		kfree(item);
+	}
+	mutex_unlock(&staccess->lock);
+}
+
+static int iommufd_test_access_item_destroy(struct iommufd_ucmd *ucmd,
+					    unsigned int access_id,
+					    unsigned int item_id)
+{
+	struct selftest_access_item *item;
+	struct selftest_access *staccess;
+
+	staccess = iommufd_access_get(access_id);
+	if (IS_ERR(staccess))
+		return PTR_ERR(staccess);
+
+	mutex_lock(&staccess->lock);
+	list_for_each_entry(item, &staccess->items, items_elm) {
+		if (item->id == item_id) {
+			list_del(&item->items_elm);
+			iommufd_access_unpin_pages(staccess->access, item->iova,
+						   item->length);
+			mutex_unlock(&staccess->lock);
+			kfree(item);
+			fput(staccess->file);
+			return 0;
+		}
+	}
+	mutex_unlock(&staccess->lock);
+	fput(staccess->file);
+	return -ENOENT;
+}
+
+static int iommufd_test_staccess_release(struct inode *inode,
+					 struct file *filep)
+{
+	struct selftest_access *staccess = filep->private_data;
+
+	if (staccess->access) {
+		iommufd_test_access_unmap(staccess, 0, ULONG_MAX);
+		iommufd_access_destroy(staccess->access);
+	}
+	mutex_destroy(&staccess->lock);
+	kfree(staccess);
+	return 0;
+}
+
+static const struct iommufd_access_ops selftest_access_ops_pin = {
+	.needs_pin_pages = 1,
+	.unmap = iommufd_test_access_unmap,
+};
+
+static const struct iommufd_access_ops selftest_access_ops = {
+	.unmap = iommufd_test_access_unmap,
+};
+
+static const struct file_operations iommfd_test_staccess_fops = {
+	.release = iommufd_test_staccess_release,
+};
+
+static struct selftest_access *iommufd_test_alloc_access(void)
+{
+	struct selftest_access *staccess;
+	struct file *filep;
+
+	staccess = kzalloc(sizeof(*staccess), GFP_KERNEL_ACCOUNT);
+	if (!staccess)
+		return ERR_PTR(-ENOMEM);
+	INIT_LIST_HEAD(&staccess->items);
+	mutex_init(&staccess->lock);
+
+	filep = anon_inode_getfile("[iommufd_test_staccess]",
+				   &iommfd_test_staccess_fops, staccess,
+				   O_RDWR);
+	if (IS_ERR(filep)) {
+		kfree(staccess);
+		return ERR_CAST(filep);
+	}
+	staccess->file = filep;
+	return staccess;
+}
+
+static int iommufd_test_create_access(struct iommufd_ucmd *ucmd,
+				      unsigned int ioas_id, unsigned int flags)
+{
+	struct iommu_test_cmd *cmd = ucmd->cmd;
+	struct selftest_access *staccess;
+	struct iommufd_access *access;
+	int fdno;
+	int rc;
+
+	if (flags & ~MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES)
+		return -EOPNOTSUPP;
+
+	staccess = iommufd_test_alloc_access();
+	if (IS_ERR(staccess))
+		return PTR_ERR(staccess);
+
+	fdno = get_unused_fd_flags(O_CLOEXEC);
+	if (fdno < 0) {
+		rc = -ENOMEM;
+		goto out_free_staccess;
+	}
+
+	access = iommufd_access_create(
+		ucmd->ictx, ioas_id,
+		(flags & MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES) ?
+			&selftest_access_ops_pin :
+			&selftest_access_ops,
+		staccess);
+	if (IS_ERR(access)) {
+		rc = PTR_ERR(access);
+		goto out_put_fdno;
+	}
+	cmd->create_access.out_access_fd = fdno;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_destroy;
+
+	staccess->access = access;
+	fd_install(fdno, staccess->file);
+	return 0;
+
+out_destroy:
+	iommufd_access_destroy(access);
+out_put_fdno:
+	put_unused_fd(fdno);
+out_free_staccess:
+	fput(staccess->file);
+	return rc;
+}
+
+/* Check that the pages in a page array match the pages in the user VA */
+static int iommufd_test_check_pages(void __user *uptr, struct page **pages,
+				    size_t npages)
+{
+	for (; npages; npages--) {
+		struct page *tmp_pages[1];
+		long rc;
+
+		rc = get_user_pages_fast((uintptr_t)uptr, 1, 0, tmp_pages);
+		if (rc < 0)
+			return rc;
+		if (WARN_ON(rc != 1))
+			return -EFAULT;
+		put_page(tmp_pages[0]);
+		if (tmp_pages[0] != *pages)
+			return -EBADE;
+		pages++;
+		uptr += PAGE_SIZE;
+	}
+	return 0;
+}
+
+static int iommufd_test_access_pages(struct iommufd_ucmd *ucmd,
+				     unsigned int access_id, unsigned long iova,
+				     size_t length, void __user *uptr,
+				     u32 flags)
+{
+	struct iommu_test_cmd *cmd = ucmd->cmd;
+	struct selftest_access_item *item;
+	struct selftest_access *staccess;
+	struct page **pages;
+	size_t npages;
+	int rc;
+
+	/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
+	if (length > 16*1024*1024)
+		return -ENOMEM;
+
+	if (flags & ~(MOCK_FLAGS_ACCESS_WRITE | MOCK_FLAGS_ACCESS_SYZ))
+		return -EOPNOTSUPP;
+
+	staccess = iommufd_access_get(access_id);
+	if (IS_ERR(staccess))
+		return PTR_ERR(staccess);
+
+	if (staccess->access->ops != &selftest_access_ops_pin) {
+		rc = -EOPNOTSUPP;
+		goto out_put;
+	}
+
+	if (flags & MOCK_FLAGS_ACCESS_SYZ)
+		iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+					&cmd->access_pages.iova);
+
+	npages = (ALIGN(iova + length, PAGE_SIZE) -
+		  ALIGN_DOWN(iova, PAGE_SIZE)) /
+		 PAGE_SIZE;
+	pages = kvcalloc(npages, sizeof(*pages), GFP_KERNEL_ACCOUNT);
+	if (!pages) {
+		rc = -ENOMEM;
+		goto out_put;
+	}
+
+	/*
+	 * Drivers will need to think very carefully about this locking. The
+	 * core code can do multiple unmaps instantaneously after
+	 * iommufd_access_pin_pages() and *all* the unmaps must not return until
+	 * the range is unpinned. This simple implementation puts a global lock
+	 * around the pin, which may not suit drivers that want this to be a
+	 * performance path. drivers that get this wrong will trigger WARN_ON
+	 * races and cause EDEADLOCK failures to userspace.
+	 */
+	mutex_lock(&staccess->lock);
+	rc = iommufd_access_pin_pages(staccess->access, iova, length, pages,
+				      flags & MOCK_FLAGS_ACCESS_WRITE);
+	if (rc)
+		goto out_unlock;
+
+	/* For syzkaller allow uptr to be NULL to skip this check */
+	if (uptr) {
+		rc = iommufd_test_check_pages(
+			uptr - (iova - ALIGN_DOWN(iova, PAGE_SIZE)), pages,
+			npages);
+		if (rc)
+			goto out_unaccess;
+	}
+
+	item = kzalloc(sizeof(*item), GFP_KERNEL_ACCOUNT);
+	if (!item) {
+		rc = -ENOMEM;
+		goto out_unaccess;
+	}
+
+	item->iova = iova;
+	item->length = length;
+	item->id = staccess->next_id++;
+	list_add_tail(&item->items_elm, &staccess->items);
+
+	cmd->access_pages.out_access_pages_id = item->id;
+	rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+	if (rc)
+		goto out_free_item;
+	goto out_unlock;
+
+out_free_item:
+	list_del(&item->items_elm);
+	kfree(item);
+out_unaccess:
+	iommufd_access_unpin_pages(staccess->access, iova, length);
+out_unlock:
+	mutex_unlock(&staccess->lock);
+	kvfree(pages);
+out_put:
+	fput(staccess->file);
+	return rc;
+}
+
+static int iommufd_test_access_rw(struct iommufd_ucmd *ucmd,
+				  unsigned int access_id, unsigned long iova,
+				  size_t length, void __user *ubuf,
+				  unsigned int flags)
+{
+	struct iommu_test_cmd *cmd = ucmd->cmd;
+	struct selftest_access *staccess;
+	void *tmp;
+	int rc;
+
+	/* Prevent syzkaller from triggering a WARN_ON in kvzalloc() */
+	if (length > 16*1024*1024)
+		return -ENOMEM;
+
+	if (flags & ~(MOCK_ACCESS_RW_WRITE | MOCK_ACCESS_RW_SLOW_PATH |
+		      MOCK_FLAGS_ACCESS_SYZ))
+		return -EOPNOTSUPP;
+
+	staccess = iommufd_access_get(access_id);
+	if (IS_ERR(staccess))
+		return PTR_ERR(staccess);
+
+	tmp = kvzalloc(length, GFP_KERNEL_ACCOUNT);
+	if (!tmp) {
+		rc = -ENOMEM;
+		goto out_put;
+	}
+
+	if (flags & MOCK_ACCESS_RW_WRITE) {
+		if (copy_from_user(tmp, ubuf, length)) {
+			rc = -EFAULT;
+			goto out_free;
+		}
+	}
+
+	if (flags & MOCK_FLAGS_ACCESS_SYZ)
+		iova = iommufd_test_syz_conv_iova(&staccess->access->ioas->iopt,
+					&cmd->access_rw.iova);
+
+	rc = iommufd_access_rw(staccess->access, iova, tmp, length, flags);
+	if (rc)
+		goto out_free;
+	if (!(flags & MOCK_ACCESS_RW_WRITE)) {
+		if (copy_to_user(ubuf, tmp, length)) {
+			rc = -EFAULT;
+			goto out_free;
+		}
+	}
+
+out_free:
+	kvfree(tmp);
+out_put:
+	fput(staccess->file);
+	return rc;
+}
+static_assert((unsigned int)MOCK_ACCESS_RW_WRITE == IOMMUFD_ACCESS_RW_WRITE);
+static_assert((unsigned int)MOCK_ACCESS_RW_SLOW_PATH ==
+	      __IOMMUFD_ACCESS_RW_SLOW_PATH);
+
+void iommufd_selftest_destroy(struct iommufd_object *obj)
+{
+	struct selftest_obj *sobj = container_of(obj, struct selftest_obj, obj);
+
+	switch (sobj->type) {
+	case TYPE_IDEV:
+		iommufd_device_selftest_detach(sobj->idev.ictx,
+					       sobj->idev.hwpt);
+		break;
+	}
+}
+
+int iommufd_test(struct iommufd_ucmd *ucmd)
+{
+	struct iommu_test_cmd *cmd = ucmd->cmd;
+
+	switch (cmd->op) {
+	case IOMMU_TEST_OP_ADD_RESERVED:
+		return iommufd_test_add_reserved(ucmd, cmd->id,
+						 cmd->add_reserved.start,
+						 cmd->add_reserved.length);
+	case IOMMU_TEST_OP_MOCK_DOMAIN:
+		return iommufd_test_mock_domain(ucmd, cmd);
+	case IOMMU_TEST_OP_MD_CHECK_MAP:
+		return iommufd_test_md_check_pa(
+			ucmd, cmd->id, cmd->check_map.iova,
+			cmd->check_map.length,
+			u64_to_user_ptr(cmd->check_map.uptr));
+	case IOMMU_TEST_OP_MD_CHECK_REFS:
+		return iommufd_test_md_check_refs(
+			ucmd, u64_to_user_ptr(cmd->check_refs.uptr),
+			cmd->check_refs.length, cmd->check_refs.refs);
+	case IOMMU_TEST_OP_CREATE_ACCESS:
+		return iommufd_test_create_access(ucmd, cmd->id,
+						  cmd->create_access.flags);
+	case IOMMU_TEST_OP_ACCESS_PAGES:
+		return iommufd_test_access_pages(
+			ucmd, cmd->id, cmd->access_pages.iova,
+			cmd->access_pages.length,
+			u64_to_user_ptr(cmd->access_pages.uptr),
+			cmd->access_pages.flags);
+	case IOMMU_TEST_OP_ACCESS_RW:
+		return iommufd_test_access_rw(
+			ucmd, cmd->id, cmd->access_rw.iova,
+			cmd->access_rw.length,
+			u64_to_user_ptr(cmd->access_rw.uptr),
+			cmd->access_rw.flags);
+	case IOMMU_TEST_OP_DESTROY_ACCESS_PAGES:
+		return iommufd_test_access_item_destroy(
+			ucmd, cmd->id, cmd->destroy_access_pages.access_pages_id);
+	case IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT:
+		/* Protect _batch_init(), can not be less than elmsz */
+		if (cmd->memory_limit.limit <
+		    sizeof(unsigned long) + sizeof(u32))
+			return -EINVAL;
+		iommufd_test_memory_limit = cmd->memory_limit.limit;
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+bool iommufd_should_fail(void)
+{
+	return should_fail(&fail_iommufd, 1);
+}
+
+void __init iommufd_test_init(void)
+{
+	dbgfs_root =
+		fault_create_debugfs_attr("fail_iommufd", NULL, &fail_iommufd);
+}
+
+void iommufd_test_exit(void)
+{
+	debugfs_remove_recursive(dbgfs_root);
+}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 84af9a239769a..650d45629647a 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -34,6 +34,9 @@ enum {
 	IOMMUFD_ACCESS_RW_WRITE = 1 << 0,
 	/* Set if the caller is in a kthread then rw will use kthread_use_mm() */
 	IOMMUFD_ACCESS_RW_KTHREAD = 1 << 1,
+
+	/* Only for use by selftest */
+	__IOMMUFD_ACCESS_RW_SLOW_PATH = 1 << 2,
 };
 
 struct iommufd_access *
-- 
GitLab


From e26eed4f623da70913b535631a29764d108efe98 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:40 -0400
Subject: [PATCH 3136/4122] iommufd: Add some fault injection points

This increases the coverage the fail_nth test gets, as well as via
syzkaller.

Link: https://lore.kernel.org/r/17-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> # s390
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/main.c  |  3 +++
 drivers/iommu/iommufd/pages.c | 26 ++++++++++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 7c8f40bc8d98d..bcb463e581009 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -102,6 +102,9 @@ struct iommufd_object *iommufd_get_object(struct iommufd_ctx *ictx, u32 id,
 {
 	struct iommufd_object *obj;
 
+	if (iommufd_should_fail())
+		return ERR_PTR(-ENOENT);
+
 	xa_lock(&ictx->objects);
 	obj = xa_load(&ictx->objects, id);
 	if (!obj || (type != IOMMUFD_OBJ_ANY && obj->type != type) ||
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 640331b8a0791..c5d2d9a8c5620 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -80,6 +80,10 @@ static void *temp_kmalloc(size_t *size, void *backup, size_t backup_len)
 
 	if (*size < backup_len)
 		return backup;
+
+	if (!backup && iommufd_should_fail())
+		return NULL;
+
 	*size = min_t(size_t, *size, TEMP_MEMORY_LIMIT);
 	res = kmalloc(*size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY);
 	if (res)
@@ -544,6 +548,7 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index,
 			   unsigned long last_index, struct page **pages)
 {
 	struct page **end_pages = pages + (last_index - start_index) + 1;
+	struct page **half_pages = pages + (end_pages - pages) / 2;
 	XA_STATE(xas, xa, start_index);
 
 	do {
@@ -551,6 +556,15 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index,
 
 		xas_lock(&xas);
 		while (pages != end_pages) {
+			/* xarray does not participate in fault injection */
+			if (pages == half_pages && iommufd_should_fail()) {
+				xas_set_err(&xas, -EINVAL);
+				xas_unlock(&xas);
+				/* aka xas_destroy() */
+				xas_nomem(&xas, GFP_KERNEL);
+				goto err_clear;
+			}
+
 			old = xas_store(&xas, xa_mk_value(page_to_pfn(*pages)));
 			if (xas_error(&xas))
 				break;
@@ -561,6 +575,7 @@ static int pages_to_xarray(struct xarray *xa, unsigned long start_index,
 		xas_unlock(&xas);
 	} while (xas_nomem(&xas, GFP_KERNEL));
 
+err_clear:
 	if (xas_error(&xas)) {
 		if (xas.xa_index != start_index)
 			clear_xarray(xa, start_index, xas.xa_index - 1);
@@ -728,6 +743,10 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
 	npages = min_t(unsigned long, last_index - start_index + 1,
 		       user->upages_len / sizeof(*user->upages));
 
+
+	if (iommufd_should_fail())
+		return -EFAULT;
+
 	uptr = (uintptr_t)(pages->uptr + start_index * PAGE_SIZE);
 	if (!remote_mm)
 		rc = pin_user_pages_fast(uptr, npages, user->gup_flags,
@@ -872,6 +891,8 @@ static int pfn_reader_user_update_pinned(struct pfn_reader_user *user,
 		npages = pages->last_npinned - pages->npinned;
 		inc = false;
 	} else {
+		if (iommufd_should_fail())
+			return -ENOMEM;
 		npages = pages->npinned - pages->last_npinned;
 		inc = true;
 	}
@@ -1721,6 +1742,11 @@ static int iopt_pages_rw_page(struct iopt_pages *pages, unsigned long index,
 		return iopt_pages_rw_slow(pages, index, index, offset, data,
 					  length, flags);
 
+	if (iommufd_should_fail()) {
+		rc = -EINVAL;
+		goto out_mmput;
+	}
+
 	mmap_read_lock(pages->source_mm);
 	rc = pin_user_pages_remote(
 		pages->source_mm, (uintptr_t)(pages->uptr + index * PAGE_SIZE),
-- 
GitLab


From 52f528583bb395495f7dd35e6e4d548bccbf8a73 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:41 -0400
Subject: [PATCH 3137/4122] iommufd: Add additional invariant assertions

These are on performance paths so we protect them using the
CONFIG_IOMMUFD_TEST to not take a hit during normal operation.

These are useful when running the test suite and syzkaller to find data
structure inconsistencies early.

Link: https://lore.kernel.org/r/18-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> # s390
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c       |  5 ++++
 drivers/iommu/iommufd/io_pagetable.c | 22 +++++++++++++++
 drivers/iommu/iommufd/io_pagetable.h |  3 ++
 drivers/iommu/iommufd/pages.c        | 42 ++++++++++++++++++++++++++--
 4 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 67ce36152e8ab..dd2a415b603e3 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -625,6 +625,11 @@ int iommufd_access_pin_pages(struct iommufd_access *access, unsigned long iova,
 	struct iopt_area *area;
 	int rc;
 
+	/* Driver's ops don't support pin_pages */
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    WARN_ON(access->iova_alignment != PAGE_SIZE || !access->ops->unmap))
+		return -EINVAL;
+
 	if (!length)
 		return -EINVAL;
 	if (check_add_overflow(iova, length - 1, &last_iova))
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 4f4a9d9aac570..3467cea795684 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -251,6 +251,11 @@ static int iopt_alloc_area_pages(struct io_pagetable *iopt,
 			(uintptr_t)elm->pages->uptr + elm->start_byte, length);
 		if (rc)
 			goto out_unlock;
+		if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+		    WARN_ON(iopt_check_iova(iopt, *dst_iova, length))) {
+			rc = -EINVAL;
+			goto out_unlock;
+		}
 	} else {
 		rc = iopt_check_iova(iopt, *dst_iova, length);
 		if (rc)
@@ -277,6 +282,8 @@ out_unlock:
 
 static void iopt_abort_area(struct iopt_area *area)
 {
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(area->pages);
 	if (area->iopt) {
 		down_write(&area->iopt->iova_rwsem);
 		interval_tree_remove(&area->node, &area->iopt->area_itree);
@@ -642,6 +649,9 @@ void iopt_destroy_table(struct io_pagetable *iopt)
 {
 	struct interval_tree_node *node;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		iopt_remove_reserved_iova(iopt, NULL);
+
 	while ((node = interval_tree_iter_first(&iopt->allowed_itree, 0,
 						ULONG_MAX))) {
 		interval_tree_remove(node, &iopt->allowed_itree);
@@ -688,6 +698,8 @@ static void iopt_unfill_domain(struct io_pagetable *iopt,
 				continue;
 
 			mutex_lock(&pages->mutex);
+			if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+				WARN_ON(!area->storage_domain);
 			if (area->storage_domain == domain)
 				area->storage_domain = storage_domain;
 			mutex_unlock(&pages->mutex);
@@ -792,6 +804,16 @@ static int iopt_check_iova_alignment(struct io_pagetable *iopt,
 		    (iopt_area_length(area) & align_mask) ||
 		    (area->page_offset & align_mask))
 			return -EADDRINUSE;
+
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST)) {
+		struct iommufd_access *access;
+		unsigned long index;
+
+		xa_for_each(&iopt->access_list, index, access)
+			if (WARN_ON(access->iova_alignment >
+				    new_iova_alignment))
+				return -EADDRINUSE;
+	}
 	return 0;
 }
 
diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index 2ee6942c3ef4a..83e7c175f2a27 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -101,6 +101,9 @@ static inline size_t iopt_area_length(struct iopt_area *area)
 static inline unsigned long iopt_area_start_byte(struct iopt_area *area,
 						 unsigned long iova)
 {
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(iova < iopt_area_iova(area) ||
+			iova > iopt_area_last_iova(area));
 	return (iova - iopt_area_iova(area)) + area->page_offset +
 	       iopt_area_index(area) * PAGE_SIZE;
 }
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index c5d2d9a8c5620..429fa3b0a239c 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -162,12 +162,20 @@ void interval_tree_double_span_iter_next(
 
 static void iopt_pages_add_npinned(struct iopt_pages *pages, size_t npages)
 {
-	pages->npinned += npages;
+	int rc;
+
+	rc = check_add_overflow(pages->npinned, npages, &pages->npinned);
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(rc || pages->npinned > pages->npages);
 }
 
 static void iopt_pages_sub_npinned(struct iopt_pages *pages, size_t npages)
 {
-	pages->npinned -= npages;
+	int rc;
+
+	rc = check_sub_overflow(pages->npinned, npages, &pages->npinned);
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(rc || pages->npinned > pages->npages);
 }
 
 static void iopt_pages_err_unpin(struct iopt_pages *pages,
@@ -189,6 +197,9 @@ static void iopt_pages_err_unpin(struct iopt_pages *pages,
 static unsigned long iopt_area_index_to_iova(struct iopt_area *area,
 					     unsigned long index)
 {
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(index < iopt_area_index(area) ||
+			index > iopt_area_last_index(area));
 	index -= iopt_area_index(area);
 	if (index == 0)
 		return iopt_area_iova(area);
@@ -198,6 +209,9 @@ static unsigned long iopt_area_index_to_iova(struct iopt_area *area,
 static unsigned long iopt_area_index_to_iova_last(struct iopt_area *area,
 						  unsigned long index)
 {
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(index < iopt_area_index(area) ||
+			index > iopt_area_last_index(area));
 	if (index == iopt_area_last_index(area))
 		return iopt_area_last_iova(area);
 	return iopt_area_iova(area) - area->page_offset +
@@ -286,6 +300,8 @@ static void batch_skip_carry(struct pfn_batch *batch, unsigned int skip_pfns)
 {
 	if (!batch->total_pfns)
 		return;
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(batch->total_pfns != batch->npfns[0]);
 	skip_pfns = min(batch->total_pfns, skip_pfns);
 	batch->pfns[0] += skip_pfns;
 	batch->npfns[0] -= skip_pfns;
@@ -301,6 +317,8 @@ static int __batch_init(struct pfn_batch *batch, size_t max_pages, void *backup,
 	batch->pfns = temp_kmalloc(&size, backup, backup_len);
 	if (!batch->pfns)
 		return -ENOMEM;
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) && WARN_ON(size < elmsz))
+		return -EINVAL;
 	batch->array_size = size / elmsz;
 	batch->npfns = (u32 *)(batch->pfns + batch->array_size);
 	batch_clear(batch);
@@ -429,6 +447,10 @@ static int batch_iommu_map_small(struct iommu_domain *domain,
 	unsigned long start_iova = iova;
 	int rc;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(paddr % PAGE_SIZE || iova % PAGE_SIZE ||
+			size % PAGE_SIZE);
+
 	while (size) {
 		rc = iommu_map(domain, iova, paddr, PAGE_SIZE, prot);
 		if (rc)
@@ -718,6 +740,10 @@ static int pfn_reader_user_pin(struct pfn_reader_user *user,
 	uintptr_t uptr;
 	long rc;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    WARN_ON(last_index < start_index))
+		return -EINVAL;
+
 	if (!user->upages) {
 		/* All undone in pfn_reader_destroy() */
 		user->upages_len =
@@ -956,6 +982,10 @@ static int pfn_reader_fill_span(struct pfn_reader *pfns)
 	struct iopt_area *area;
 	int rc;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    WARN_ON(span->last_used < start_index))
+		return -EINVAL;
+
 	if (span->is_used == 1) {
 		batch_from_xarray(&pfns->batch, &pfns->pages->pinned_pfns,
 				  start_index, span->last_used);
@@ -1008,6 +1038,10 @@ static int pfn_reader_next(struct pfn_reader *pfns)
 	while (pfns->batch_end_index != pfns->last_index + 1) {
 		unsigned int npfns = pfns->batch.total_pfns;
 
+		if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+		    WARN_ON(interval_tree_double_span_iter_done(&pfns->span)))
+			return -EINVAL;
+
 		rc = pfn_reader_fill_span(pfns);
 		if (rc)
 			return rc;
@@ -1091,6 +1125,10 @@ static int pfn_reader_first(struct pfn_reader *pfns, struct iopt_pages *pages,
 {
 	int rc;
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST) &&
+	    WARN_ON(last_index < start_index))
+		return -EINVAL;
+
 	rc = pfn_reader_init(pfns, pages, start_index, last_index);
 	if (rc)
 		return rc;
-- 
GitLab


From 57f0988706fec1b8dbc3fe00965828a47e2235a1 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:29:42 -0400
Subject: [PATCH 3138/4122] iommufd: Add a selftest

Cover the essential functionality of the iommufd with a directed test from
userspace. This aims to achieve reasonable functional coverage using the
in-kernel self test framework.

A second test does a failure injection sweep of the success paths to study
error unwind behaviors.

This allows achieving high coverage of the corner cases in pages.c.

The selftest requires CONFIG_IOMMUFD_TEST to be enabled, and several huge
pages which may require:

  echo 4 > /proc/sys/vm/nr_hugepages

Link: https://lore.kernel.org/r/19-v6-a196d26f289e+11787-iommufd_jgg@nvidia.com
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com> # s390
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Eric Auger <eric.auger@redhat.com> # aarch64
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 tools/testing/selftests/Makefile              |    1 +
 tools/testing/selftests/iommu/.gitignore      |    3 +
 tools/testing/selftests/iommu/Makefile        |   12 +
 tools/testing/selftests/iommu/config          |    2 +
 tools/testing/selftests/iommu/iommufd.c       | 1654 +++++++++++++++++
 .../selftests/iommu/iommufd_fail_nth.c        |  580 ++++++
 tools/testing/selftests/iommu/iommufd_utils.h |  278 +++
 7 files changed, 2530 insertions(+)
 create mode 100644 tools/testing/selftests/iommu/.gitignore
 create mode 100644 tools/testing/selftests/iommu/Makefile
 create mode 100644 tools/testing/selftests/iommu/config
 create mode 100644 tools/testing/selftests/iommu/iommufd.c
 create mode 100644 tools/testing/selftests/iommu/iommufd_fail_nth.c
 create mode 100644 tools/testing/selftests/iommu/iommufd_utils.h

diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index f07aef7c592c2..d6680af7b2956 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -27,6 +27,7 @@ TARGETS += ftrace
 TARGETS += futex
 TARGETS += gpio
 TARGETS += intel_pstate
+TARGETS += iommu
 TARGETS += ipc
 TARGETS += ir
 TARGETS += kcmp
diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore
new file mode 100644
index 0000000000000..7d0703049ebaf
--- /dev/null
+++ b/tools/testing/selftests/iommu/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/iommufd
+/iommufd_fail_nth
diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile
new file mode 100644
index 0000000000000..7cb74d26f1417
--- /dev/null
+++ b/tools/testing/selftests/iommu/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += -I../../../../include/uapi/
+CFLAGS += -I../../../../include/
+
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += iommufd
+TEST_GEN_PROGS += iommufd_fail_nth
+
+include ../lib.mk
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config
new file mode 100644
index 0000000000000..6c4f901d6fed3
--- /dev/null
+++ b/tools/testing/selftests/iommu/config
@@ -0,0 +1,2 @@
+CONFIG_IOMMUFD
+CONFIG_IOMMUFD_TEST
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
new file mode 100644
index 0000000000000..8aa8a346cf221
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -0,0 +1,1654 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static void *buffer;
+
+static unsigned long PAGE_SIZE;
+static unsigned long HUGEPAGE_SIZE;
+
+#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
+
+static unsigned long get_huge_page_size(void)
+{
+	char buf[80];
+	int ret;
+	int fd;
+
+	fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
+		  O_RDONLY);
+	if (fd < 0)
+		return 2 * 1024 * 1024;
+
+	ret = read(fd, buf, sizeof(buf));
+	close(fd);
+	if (ret <= 0 || ret == sizeof(buf))
+		return 2 * 1024 * 1024;
+	buf[ret] = 0;
+	return strtoul(buf, NULL, 10);
+}
+
+static __attribute__((constructor)) void setup_sizes(void)
+{
+	void *vrc;
+	int rc;
+
+	PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+	HUGEPAGE_SIZE = get_huge_page_size();
+
+	BUFFER_SIZE = PAGE_SIZE * 16;
+	rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE);
+	assert(!rc);
+	assert(buffer);
+	assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0);
+	vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+		   MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+	assert(vrc == buffer);
+}
+
+FIXTURE(iommufd)
+{
+	int fd;
+};
+
+FIXTURE_SETUP(iommufd)
+{
+	self->fd = open("/dev/iommu", O_RDWR);
+	ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(iommufd)
+{
+	teardown_iommufd(self->fd, _metadata);
+}
+
+TEST_F(iommufd, simple_close)
+{
+}
+
+TEST_F(iommufd, cmd_fail)
+{
+	struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 };
+
+	/* object id is invalid */
+	EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0));
+	/* Bad pointer */
+	EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL));
+	/* Unknown ioctl */
+	EXPECT_ERRNO(ENOTTY,
+		     ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1),
+			   &cmd));
+}
+
+TEST_F(iommufd, cmd_length)
+{
+#define TEST_LENGTH(_struct, _ioctl)                                     \
+	{                                                                \
+		struct {                                                 \
+			struct _struct cmd;                              \
+			uint8_t extra;                                   \
+		} cmd = { .cmd = { .size = sizeof(struct _struct) - 1 }, \
+			  .extra = UINT8_MAX };                          \
+		int old_errno;                                           \
+		int rc;                                                  \
+									 \
+		EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd));     \
+		cmd.cmd.size = sizeof(struct _struct) + 1;               \
+		EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd));      \
+		cmd.cmd.size = sizeof(struct _struct);                   \
+		rc = ioctl(self->fd, _ioctl, &cmd);                      \
+		old_errno = errno;                                       \
+		cmd.cmd.size = sizeof(struct _struct) + 1;               \
+		cmd.extra = 0;                                           \
+		if (rc) {                                                \
+			EXPECT_ERRNO(old_errno,                          \
+				     ioctl(self->fd, _ioctl, &cmd));     \
+		} else {                                                 \
+			ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd));     \
+		}                                                        \
+	}
+
+	TEST_LENGTH(iommu_destroy, IOMMU_DESTROY);
+	TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC);
+	TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES);
+	TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS);
+	TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP);
+	TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY);
+	TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP);
+	TEST_LENGTH(iommu_option, IOMMU_OPTION);
+	TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS);
+#undef TEST_LENGTH
+}
+
+TEST_F(iommufd, cmd_ex_fail)
+{
+	struct {
+		struct iommu_destroy cmd;
+		__u64 future;
+	} cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } };
+
+	/* object id is invalid and command is longer */
+	EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+	/* future area is non-zero */
+	cmd.future = 1;
+	EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+	/* Original command "works" */
+	cmd.cmd.size = sizeof(cmd.cmd);
+	EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+	/* Short command fails */
+	cmd.cmd.size = sizeof(cmd.cmd) - 1;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+}
+
+TEST_F(iommufd, global_options)
+{
+	struct iommu_option cmd = {
+		.size = sizeof(cmd),
+		.option_id = IOMMU_OPTION_RLIMIT_MODE,
+		.op = IOMMU_OPTION_OP_GET,
+		.val64 = 1,
+	};
+
+	cmd.option_id = IOMMU_OPTION_RLIMIT_MODE;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	ASSERT_EQ(0, cmd.val64);
+
+	/* This requires root */
+	cmd.op = IOMMU_OPTION_OP_SET;
+	cmd.val64 = 1;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	cmd.val64 = 2;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	cmd.op = IOMMU_OPTION_OP_GET;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	ASSERT_EQ(1, cmd.val64);
+
+	cmd.op = IOMMU_OPTION_OP_SET;
+	cmd.val64 = 0;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	cmd.op = IOMMU_OPTION_OP_GET;
+	cmd.option_id = IOMMU_OPTION_HUGE_PAGES;
+	EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	cmd.op = IOMMU_OPTION_OP_SET;
+	EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+FIXTURE(iommufd_ioas)
+{
+	int fd;
+	uint32_t ioas_id;
+	uint32_t domain_id;
+	uint64_t base_iova;
+};
+
+FIXTURE_VARIANT(iommufd_ioas)
+{
+	unsigned int mock_domains;
+	unsigned int memory_limit;
+};
+
+FIXTURE_SETUP(iommufd_ioas)
+{
+	unsigned int i;
+
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	ASSERT_NE(-1, self->fd);
+	test_ioctl_ioas_alloc(&self->ioas_id);
+
+	if (!variant->memory_limit) {
+		test_ioctl_set_default_memory_limit();
+	} else {
+		test_ioctl_set_temp_memory_limit(variant->memory_limit);
+	}
+
+	for (i = 0; i != variant->mock_domains; i++) {
+		test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_id);
+		self->base_iova = MOCK_APERTURE_START;
+	}
+}
+
+FIXTURE_TEARDOWN(iommufd_ioas)
+{
+	test_ioctl_set_default_memory_limit();
+	teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain)
+{
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain)
+{
+	.mock_domains = 1,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain)
+{
+	.mock_domains = 2,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit)
+{
+	.mock_domains = 1,
+	.memory_limit = 16,
+};
+
+TEST_F(iommufd_ioas, ioas_auto_destroy)
+{
+}
+
+TEST_F(iommufd_ioas, ioas_destroy)
+{
+	if (self->domain_id) {
+		/* IOAS cannot be freed while a domain is on it */
+		EXPECT_ERRNO(EBUSY,
+			     _test_ioctl_destroy(self->fd, self->ioas_id));
+	} else {
+		/* Can allocate and manually free an IOAS table */
+		test_ioctl_destroy(self->ioas_id);
+	}
+}
+
+TEST_F(iommufd_ioas, ioas_area_destroy)
+{
+	/* Adding an area does not change ability to destroy */
+	test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+	if (self->domain_id)
+		EXPECT_ERRNO(EBUSY,
+			     _test_ioctl_destroy(self->fd, self->ioas_id));
+	else
+		test_ioctl_destroy(self->ioas_id);
+}
+
+TEST_F(iommufd_ioas, ioas_area_auto_destroy)
+{
+	int i;
+
+	/* Can allocate and automatically free an IOAS table with many areas */
+	for (i = 0; i != 10; i++) {
+		test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+					  self->base_iova + i * PAGE_SIZE);
+	}
+}
+
+TEST_F(iommufd_ioas, area)
+{
+	int i;
+
+	/* Unmap fails if nothing is mapped */
+	for (i = 0; i != 10; i++)
+		test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE);
+
+	/* Unmap works */
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+					  self->base_iova + i * PAGE_SIZE);
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE,
+				      PAGE_SIZE);
+
+	/* Split fails */
+	test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2,
+				  self->base_iova + 16 * PAGE_SIZE);
+	test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE,
+				  PAGE_SIZE);
+	test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE,
+				  PAGE_SIZE);
+
+	/* Over map fails */
+	test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+				      self->base_iova + 16 * PAGE_SIZE);
+	test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+				      self->base_iova + 16 * PAGE_SIZE);
+	test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+				      self->base_iova + 17 * PAGE_SIZE);
+	test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+				      self->base_iova + 15 * PAGE_SIZE);
+	test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3,
+				      self->base_iova + 15 * PAGE_SIZE);
+
+	/* unmap all works */
+	test_ioctl_ioas_unmap(0, UINT64_MAX);
+
+	/* Unmap all succeeds on an empty IOAS */
+	test_ioctl_ioas_unmap(0, UINT64_MAX);
+}
+
+TEST_F(iommufd_ioas, unmap_fully_contained_areas)
+{
+	uint64_t unmap_len;
+	int i;
+
+	/* Give no_domain some space to rewind base_iova */
+	self->base_iova += 4 * PAGE_SIZE;
+
+	for (i = 0; i != 4; i++)
+		test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE,
+					  self->base_iova + i * 16 * PAGE_SIZE);
+
+	/* Unmap not fully contained area doesn't work */
+	test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE,
+				  8 * PAGE_SIZE);
+	test_err_ioctl_ioas_unmap(ENOENT,
+				  self->base_iova + 3 * 16 * PAGE_SIZE +
+					  8 * PAGE_SIZE - 4 * PAGE_SIZE,
+				  8 * PAGE_SIZE);
+
+	/* Unmap fully contained areas works */
+	ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id,
+					    self->base_iova - 4 * PAGE_SIZE,
+					    3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE +
+						    4 * PAGE_SIZE,
+					    &unmap_len));
+	ASSERT_EQ(32 * PAGE_SIZE, unmap_len);
+}
+
+TEST_F(iommufd_ioas, area_auto_iova)
+{
+	struct iommu_test_cmd test_cmd = {
+		.size = sizeof(test_cmd),
+		.op = IOMMU_TEST_OP_ADD_RESERVED,
+		.id = self->ioas_id,
+		.add_reserved = { .start = PAGE_SIZE * 4,
+				  .length = PAGE_SIZE * 100 },
+	};
+	struct iommu_iova_range ranges[1] = {};
+	struct iommu_ioas_allow_iovas allow_cmd = {
+		.size = sizeof(allow_cmd),
+		.ioas_id = self->ioas_id,
+		.num_iovas = 1,
+		.allowed_iovas = (uintptr_t)ranges,
+	};
+	__u64 iovas[10];
+	int i;
+
+	/* Simple 4k pages */
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]);
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE);
+
+	/* Kernel automatically aligns IOVAs properly */
+	for (i = 0; i != 10; i++) {
+		size_t length = PAGE_SIZE * (i + 1);
+
+		if (self->domain_id) {
+			test_ioctl_ioas_map(buffer, length, &iovas[i]);
+		} else {
+			test_ioctl_ioas_map((void *)(1UL << 31), length,
+					    &iovas[i]);
+		}
+		EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+	}
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+	/* Avoids a reserved region */
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+			&test_cmd));
+	for (i = 0; i != 10; i++) {
+		size_t length = PAGE_SIZE * (i + 1);
+
+		test_ioctl_ioas_map(buffer, length, &iovas[i]);
+		EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+		EXPECT_EQ(false,
+			  iovas[i] > test_cmd.add_reserved.start &&
+				  iovas[i] <
+					  test_cmd.add_reserved.start +
+						  test_cmd.add_reserved.length);
+	}
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+	/* Allowed region intersects with a reserved region */
+	ranges[0].start = PAGE_SIZE;
+	ranges[0].last = PAGE_SIZE * 600;
+	EXPECT_ERRNO(EADDRINUSE,
+		     ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+	/* Allocate from an allowed region */
+	if (self->domain_id) {
+		ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE;
+		ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1;
+	} else {
+		ranges[0].start = PAGE_SIZE * 200;
+		ranges[0].last = PAGE_SIZE * 600 - 1;
+	}
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+	for (i = 0; i != 10; i++) {
+		size_t length = PAGE_SIZE * (i + 1);
+
+		test_ioctl_ioas_map(buffer, length, &iovas[i]);
+		EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+		EXPECT_EQ(true, iovas[i] >= ranges[0].start);
+		EXPECT_EQ(true, iovas[i] <= ranges[0].last);
+		EXPECT_EQ(true, iovas[i] + length > ranges[0].start);
+		EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1);
+	}
+	for (i = 0; i != 10; i++)
+		test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+}
+
+TEST_F(iommufd_ioas, area_allowed)
+{
+	struct iommu_test_cmd test_cmd = {
+		.size = sizeof(test_cmd),
+		.op = IOMMU_TEST_OP_ADD_RESERVED,
+		.id = self->ioas_id,
+		.add_reserved = { .start = PAGE_SIZE * 4,
+				  .length = PAGE_SIZE * 100 },
+	};
+	struct iommu_iova_range ranges[1] = {};
+	struct iommu_ioas_allow_iovas allow_cmd = {
+		.size = sizeof(allow_cmd),
+		.ioas_id = self->ioas_id,
+		.num_iovas = 1,
+		.allowed_iovas = (uintptr_t)ranges,
+	};
+
+	/* Reserved intersects an allowed */
+	allow_cmd.num_iovas = 1;
+	ranges[0].start = self->base_iova;
+	ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+	test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE;
+	test_cmd.add_reserved.length = PAGE_SIZE;
+	EXPECT_ERRNO(EADDRINUSE,
+		     ioctl(self->fd,
+			   _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+			   &test_cmd));
+	allow_cmd.num_iovas = 0;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+	/* Allowed intersects a reserved */
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+			&test_cmd));
+	allow_cmd.num_iovas = 1;
+	ranges[0].start = self->base_iova;
+	ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+	EXPECT_ERRNO(EADDRINUSE,
+		     ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+}
+
+TEST_F(iommufd_ioas, copy_area)
+{
+	struct iommu_ioas_copy copy_cmd = {
+		.size = sizeof(copy_cmd),
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.dst_ioas_id = self->ioas_id,
+		.src_ioas_id = self->ioas_id,
+		.length = PAGE_SIZE,
+	};
+
+	test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+
+	/* Copy inside a single IOAS */
+	copy_cmd.src_iova = self->base_iova;
+	copy_cmd.dst_iova = self->base_iova + PAGE_SIZE;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+
+	/* Copy between IOAS's */
+	copy_cmd.src_iova = self->base_iova;
+	copy_cmd.dst_iova = 0;
+	test_ioctl_ioas_alloc(&copy_cmd.dst_ioas_id);
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+}
+
+TEST_F(iommufd_ioas, iova_ranges)
+{
+	struct iommu_test_cmd test_cmd = {
+		.size = sizeof(test_cmd),
+		.op = IOMMU_TEST_OP_ADD_RESERVED,
+		.id = self->ioas_id,
+		.add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE },
+	};
+	struct iommu_iova_range *ranges = buffer;
+	struct iommu_ioas_iova_ranges ranges_cmd = {
+		.size = sizeof(ranges_cmd),
+		.ioas_id = self->ioas_id,
+		.num_iovas = BUFFER_SIZE / sizeof(*ranges),
+		.allowed_iovas = (uintptr_t)ranges,
+	};
+
+	/* Range can be read */
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+	EXPECT_EQ(1, ranges_cmd.num_iovas);
+	if (!self->domain_id) {
+		EXPECT_EQ(0, ranges[0].start);
+		EXPECT_EQ(SIZE_MAX, ranges[0].last);
+		EXPECT_EQ(1, ranges_cmd.out_iova_alignment);
+	} else {
+		EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+		EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+		EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment);
+	}
+
+	/* Buffer too small */
+	memset(ranges, 0, BUFFER_SIZE);
+	ranges_cmd.num_iovas = 0;
+	EXPECT_ERRNO(EMSGSIZE,
+		     ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+	EXPECT_EQ(1, ranges_cmd.num_iovas);
+	EXPECT_EQ(0, ranges[0].start);
+	EXPECT_EQ(0, ranges[0].last);
+
+	/* 2 ranges */
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+			&test_cmd));
+	ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges);
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+	if (!self->domain_id) {
+		EXPECT_EQ(2, ranges_cmd.num_iovas);
+		EXPECT_EQ(0, ranges[0].start);
+		EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+		EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start);
+		EXPECT_EQ(SIZE_MAX, ranges[1].last);
+	} else {
+		EXPECT_EQ(1, ranges_cmd.num_iovas);
+		EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+		EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+	}
+
+	/* Buffer too small */
+	memset(ranges, 0, BUFFER_SIZE);
+	ranges_cmd.num_iovas = 1;
+	if (!self->domain_id) {
+		EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES,
+					     &ranges_cmd));
+		EXPECT_EQ(2, ranges_cmd.num_iovas);
+		EXPECT_EQ(0, ranges[0].start);
+		EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+	} else {
+		ASSERT_EQ(0,
+			  ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+		EXPECT_EQ(1, ranges_cmd.num_iovas);
+		EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+		EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+	}
+	EXPECT_EQ(0, ranges[1].start);
+	EXPECT_EQ(0, ranges[1].last);
+}
+
+TEST_F(iommufd_ioas, access_pin)
+{
+	struct iommu_test_cmd access_cmd = {
+		.size = sizeof(access_cmd),
+		.op = IOMMU_TEST_OP_ACCESS_PAGES,
+		.access_pages = { .iova = MOCK_APERTURE_START,
+				  .length = BUFFER_SIZE,
+				  .uptr = (uintptr_t)buffer },
+	};
+	struct iommu_test_cmd check_map_cmd = {
+		.size = sizeof(check_map_cmd),
+		.op = IOMMU_TEST_OP_MD_CHECK_MAP,
+		.check_map = { .iova = MOCK_APERTURE_START,
+			       .length = BUFFER_SIZE,
+			       .uptr = (uintptr_t)buffer },
+	};
+	uint32_t access_pages_id;
+	unsigned int npages;
+
+	test_cmd_create_access(self->ioas_id, &access_cmd.id,
+			       MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+	for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) {
+		uint32_t mock_device_id;
+		uint32_t mock_hwpt_id;
+
+		access_cmd.access_pages.length = npages * PAGE_SIZE;
+
+		/* Single map/unmap */
+		test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+					  MOCK_APERTURE_START);
+		ASSERT_EQ(0, ioctl(self->fd,
+				   _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+				   &access_cmd));
+		test_cmd_destroy_access_pages(
+			access_cmd.id,
+			access_cmd.access_pages.out_access_pages_id);
+
+		/* Double user */
+		ASSERT_EQ(0, ioctl(self->fd,
+				   _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+				   &access_cmd));
+		access_pages_id = access_cmd.access_pages.out_access_pages_id;
+		ASSERT_EQ(0, ioctl(self->fd,
+				   _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+				   &access_cmd));
+		test_cmd_destroy_access_pages(
+			access_cmd.id,
+			access_cmd.access_pages.out_access_pages_id);
+		test_cmd_destroy_access_pages(access_cmd.id, access_pages_id);
+
+		/* Add/remove a domain with a user */
+		ASSERT_EQ(0, ioctl(self->fd,
+				   _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+				   &access_cmd));
+		test_cmd_mock_domain(self->ioas_id, &mock_device_id,
+				     &mock_hwpt_id);
+		check_map_cmd.id = mock_hwpt_id;
+		ASSERT_EQ(0, ioctl(self->fd,
+				   _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP),
+				   &check_map_cmd));
+
+		test_ioctl_destroy(mock_device_id);
+		test_ioctl_destroy(mock_hwpt_id);
+		test_cmd_destroy_access_pages(
+			access_cmd.id,
+			access_cmd.access_pages.out_access_pages_id);
+
+		test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+	}
+	test_cmd_destroy_access(access_cmd.id);
+}
+
+TEST_F(iommufd_ioas, access_pin_unmap)
+{
+	struct iommu_test_cmd access_pages_cmd = {
+		.size = sizeof(access_pages_cmd),
+		.op = IOMMU_TEST_OP_ACCESS_PAGES,
+		.access_pages = { .iova = MOCK_APERTURE_START,
+				  .length = BUFFER_SIZE,
+				  .uptr = (uintptr_t)buffer },
+	};
+
+	test_cmd_create_access(self->ioas_id, &access_pages_cmd.id,
+			       MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+	test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START);
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+			&access_pages_cmd));
+
+	/* Trigger the unmap op */
+	test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+
+	/* kernel removed the item for us */
+	test_err_destroy_access_pages(
+		ENOENT, access_pages_cmd.id,
+		access_pages_cmd.access_pages.out_access_pages_id);
+}
+
+static void check_access_rw(struct __test_metadata *_metadata, int fd,
+			    unsigned int access_id, uint64_t iova,
+			    unsigned int def_flags)
+{
+	uint16_t tmp[32];
+	struct iommu_test_cmd access_cmd = {
+		.size = sizeof(access_cmd),
+		.op = IOMMU_TEST_OP_ACCESS_RW,
+		.id = access_id,
+		.access_rw = { .uptr = (uintptr_t)tmp },
+	};
+	uint16_t *buffer16 = buffer;
+	unsigned int i;
+	void *tmp2;
+
+	for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++)
+		buffer16[i] = rand();
+
+	for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50;
+	     access_cmd.access_rw.iova < iova + PAGE_SIZE + 50;
+	     access_cmd.access_rw.iova++) {
+		for (access_cmd.access_rw.length = 1;
+		     access_cmd.access_rw.length < sizeof(tmp);
+		     access_cmd.access_rw.length++) {
+			access_cmd.access_rw.flags = def_flags;
+			ASSERT_EQ(0, ioctl(fd,
+					   _IOMMU_TEST_CMD(
+						   IOMMU_TEST_OP_ACCESS_RW),
+					   &access_cmd));
+			ASSERT_EQ(0,
+				  memcmp(buffer + (access_cmd.access_rw.iova -
+						   iova),
+					 tmp, access_cmd.access_rw.length));
+
+			for (i = 0; i != ARRAY_SIZE(tmp); i++)
+				tmp[i] = rand();
+			access_cmd.access_rw.flags = def_flags |
+						     MOCK_ACCESS_RW_WRITE;
+			ASSERT_EQ(0, ioctl(fd,
+					   _IOMMU_TEST_CMD(
+						   IOMMU_TEST_OP_ACCESS_RW),
+					   &access_cmd));
+			ASSERT_EQ(0,
+				  memcmp(buffer + (access_cmd.access_rw.iova -
+						   iova),
+					 tmp, access_cmd.access_rw.length));
+		}
+	}
+
+	/* Multi-page test */
+	tmp2 = malloc(BUFFER_SIZE);
+	ASSERT_NE(NULL, tmp2);
+	access_cmd.access_rw.iova = iova;
+	access_cmd.access_rw.length = BUFFER_SIZE;
+	access_cmd.access_rw.flags = def_flags;
+	access_cmd.access_rw.uptr = (uintptr_t)tmp2;
+	ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			   &access_cmd));
+	ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length));
+	free(tmp2);
+}
+
+TEST_F(iommufd_ioas, access_rw)
+{
+	__u32 access_id;
+	__u64 iova;
+
+	test_cmd_create_access(self->ioas_id, &access_id, 0);
+	test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova);
+	check_access_rw(_metadata, self->fd, access_id, iova, 0);
+	check_access_rw(_metadata, self->fd, access_id, iova,
+			MOCK_ACCESS_RW_SLOW_PATH);
+	test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+	test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, access_rw_unaligned)
+{
+	__u32 access_id;
+	__u64 iova;
+
+	test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+	/* Unaligned pages */
+	iova = self->base_iova + MOCK_PAGE_SIZE;
+	test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova);
+	check_access_rw(_metadata, self->fd, access_id, iova, 0);
+	test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+	test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_gone)
+{
+	__u32 access_id;
+	pid_t child;
+
+	test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+	/* Create a mapping with a different mm */
+	child = fork();
+	if (!child) {
+		test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+					  MOCK_APERTURE_START);
+		exit(0);
+	}
+	ASSERT_NE(-1, child);
+	ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+	if (self->domain_id) {
+		/*
+		 * If a domain already existed then everything was pinned within
+		 * the fork, so this copies from one domain to another.
+		 */
+		test_cmd_mock_domain(self->ioas_id, NULL, NULL);
+		check_access_rw(_metadata, self->fd, access_id,
+				MOCK_APERTURE_START, 0);
+
+	} else {
+		/*
+		 * Otherwise we need to actually pin pages which can't happen
+		 * since the fork is gone.
+		 */
+		test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL);
+	}
+
+	test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_present)
+{
+	__u32 access_id;
+	int pipefds[2];
+	uint64_t tmp;
+	pid_t child;
+	int efd;
+
+	test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+	ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC));
+	efd = eventfd(0, EFD_CLOEXEC);
+	ASSERT_NE(-1, efd);
+
+	/* Create a mapping with a different mm */
+	child = fork();
+	if (!child) {
+		__u64 iova;
+		uint64_t one = 1;
+
+		close(pipefds[1]);
+		test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+					  MOCK_APERTURE_START);
+		if (write(efd, &one, sizeof(one)) != sizeof(one))
+			exit(100);
+		if (read(pipefds[0], &iova, 1) != 1)
+			exit(100);
+		exit(0);
+	}
+	close(pipefds[0]);
+	ASSERT_NE(-1, child);
+	ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp)));
+
+	/* Read pages from the remote process */
+	test_cmd_mock_domain(self->ioas_id, NULL, NULL);
+	check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0);
+
+	ASSERT_EQ(0, close(pipefds[1]));
+	ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+	test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, ioas_option_huge_pages)
+{
+	struct iommu_option cmd = {
+		.size = sizeof(cmd),
+		.option_id = IOMMU_OPTION_HUGE_PAGES,
+		.op = IOMMU_OPTION_OP_GET,
+		.val64 = 3,
+		.object_id = self->ioas_id,
+	};
+
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	ASSERT_EQ(1, cmd.val64);
+
+	cmd.op = IOMMU_OPTION_OP_SET;
+	cmd.val64 = 0;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	cmd.op = IOMMU_OPTION_OP_GET;
+	cmd.val64 = 3;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	ASSERT_EQ(0, cmd.val64);
+
+	cmd.op = IOMMU_OPTION_OP_SET;
+	cmd.val64 = 2;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	cmd.op = IOMMU_OPTION_OP_SET;
+	cmd.val64 = 1;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+TEST_F(iommufd_ioas, ioas_iova_alloc)
+{
+	unsigned int length;
+	__u64 iova;
+
+	for (length = 1; length != PAGE_SIZE * 2; length++) {
+		if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) {
+			test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova);
+		} else {
+			test_ioctl_ioas_map(buffer, length, &iova);
+			test_ioctl_ioas_unmap(iova, length);
+		}
+	}
+}
+
+TEST_F(iommufd_ioas, ioas_align_change)
+{
+	struct iommu_option cmd = {
+		.size = sizeof(cmd),
+		.option_id = IOMMU_OPTION_HUGE_PAGES,
+		.op = IOMMU_OPTION_OP_SET,
+		.object_id = self->ioas_id,
+		/* 0 means everything must be aligned to PAGE_SIZE */
+		.val64 = 0,
+	};
+
+	/*
+	 * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain
+	 * and map are present.
+	 */
+	if (variant->mock_domains)
+		return;
+
+	/*
+	 * We can upgrade to PAGE_SIZE alignment when things are aligned right
+	 */
+	test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START);
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	/* Misalignment is rejected at map time */
+	test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE,
+				      PAGE_SIZE,
+				      MOCK_APERTURE_START + PAGE_SIZE);
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	/* Reduce alignment */
+	cmd.val64 = 1;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	/* Confirm misalignment is rejected during alignment upgrade */
+	test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE,
+				  MOCK_APERTURE_START + PAGE_SIZE);
+	cmd.val64 = 0;
+	EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+	test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE);
+	test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE);
+}
+
+TEST_F(iommufd_ioas, copy_sweep)
+{
+	struct iommu_ioas_copy copy_cmd = {
+		.size = sizeof(copy_cmd),
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.src_ioas_id = self->ioas_id,
+		.dst_iova = MOCK_APERTURE_START,
+		.length = MOCK_PAGE_SIZE,
+	};
+	unsigned int dst_ioas_id;
+	uint64_t last_iova;
+	uint64_t iova;
+
+	test_ioctl_ioas_alloc(&dst_ioas_id);
+	copy_cmd.dst_ioas_id = dst_ioas_id;
+
+	if (variant->mock_domains)
+		last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1;
+	else
+		last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2;
+
+	test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1,
+				  MOCK_APERTURE_START);
+
+	for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova;
+	     iova += 511) {
+		copy_cmd.src_iova = iova;
+		if (iova < MOCK_APERTURE_START ||
+		    iova + copy_cmd.length - 1 > last_iova) {
+			EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY,
+						   &copy_cmd));
+		} else {
+			ASSERT_EQ(0,
+				  ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+			test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova,
+						 copy_cmd.length);
+		}
+	}
+
+	test_ioctl_destroy(dst_ioas_id);
+}
+
+FIXTURE(iommufd_mock_domain)
+{
+	int fd;
+	uint32_t ioas_id;
+	uint32_t domain_id;
+	uint32_t domain_ids[2];
+	int mmap_flags;
+	size_t mmap_buf_size;
+};
+
+FIXTURE_VARIANT(iommufd_mock_domain)
+{
+	unsigned int mock_domains;
+	bool hugepages;
+};
+
+FIXTURE_SETUP(iommufd_mock_domain)
+{
+	unsigned int i;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	ASSERT_NE(-1, self->fd);
+	test_ioctl_ioas_alloc(&self->ioas_id);
+
+	ASSERT_GE(ARRAY_SIZE(self->domain_ids), variant->mock_domains);
+
+	for (i = 0; i != variant->mock_domains; i++)
+		test_cmd_mock_domain(self->ioas_id, NULL, &self->domain_ids[i]);
+	self->domain_id = self->domain_ids[0];
+
+	self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS;
+	self->mmap_buf_size = PAGE_SIZE * 8;
+	if (variant->hugepages) {
+		/*
+		 * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+		 * not available.
+		 */
+		self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+		self->mmap_buf_size = HUGEPAGE_SIZE * 2;
+	}
+}
+
+FIXTURE_TEARDOWN(iommufd_mock_domain)
+{
+	teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain)
+{
+	.mock_domains = 1,
+	.hugepages = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains)
+{
+	.mock_domains = 2,
+	.hugepages = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage)
+{
+	.mock_domains = 1,
+	.hugepages = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage)
+{
+	.mock_domains = 2,
+	.hugepages = true,
+};
+
+/* Have the kernel check that the user pages made it to the iommu_domain */
+#define check_mock_iova(_ptr, _iova, _length)                                \
+	({                                                                   \
+		struct iommu_test_cmd check_map_cmd = {                      \
+			.size = sizeof(check_map_cmd),                       \
+			.op = IOMMU_TEST_OP_MD_CHECK_MAP,                    \
+			.id = self->domain_id,                               \
+			.check_map = { .iova = _iova,                        \
+				       .length = _length,                    \
+				       .uptr = (uintptr_t)(_ptr) },          \
+		};                                                           \
+		ASSERT_EQ(0,                                                 \
+			  ioctl(self->fd,                                    \
+				_IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \
+				&check_map_cmd));                            \
+		if (self->domain_ids[1]) {                                   \
+			check_map_cmd.id = self->domain_ids[1];              \
+			ASSERT_EQ(0,                                         \
+				  ioctl(self->fd,                            \
+					_IOMMU_TEST_CMD(                     \
+						IOMMU_TEST_OP_MD_CHECK_MAP), \
+					&check_map_cmd));                    \
+		}                                                            \
+	})
+
+TEST_F(iommufd_mock_domain, basic)
+{
+	size_t buf_size = self->mmap_buf_size;
+	uint8_t *buf;
+	__u64 iova;
+
+	/* Simple one page map */
+	test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+	check_mock_iova(buffer, iova, PAGE_SIZE);
+
+	buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+		   0);
+	ASSERT_NE(MAP_FAILED, buf);
+
+	/* EFAULT half way through mapping */
+	ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2));
+	test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+
+	/* EFAULT on first page */
+	ASSERT_EQ(0, munmap(buf, buf_size / 2));
+	test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+}
+
+TEST_F(iommufd_mock_domain, ro_unshare)
+{
+	uint8_t *buf;
+	__u64 iova;
+	int fd;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	ASSERT_NE(-1, fd);
+
+	buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	ASSERT_NE(MAP_FAILED, buf);
+	close(fd);
+
+	/*
+	 * There have been lots of changes to the "unshare" mechanism in
+	 * get_user_pages(), make sure it works right. The write to the page
+	 * after we map it for reading should not change the assigned PFN.
+	 */
+	ASSERT_EQ(0,
+		  _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE,
+				       &iova, IOMMU_IOAS_MAP_READABLE));
+	check_mock_iova(buf, iova, PAGE_SIZE);
+	memset(buf, 1, PAGE_SIZE);
+	check_mock_iova(buf, iova, PAGE_SIZE);
+	ASSERT_EQ(0, munmap(buf, PAGE_SIZE));
+}
+
+TEST_F(iommufd_mock_domain, all_aligns)
+{
+	size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) :
+						MOCK_PAGE_SIZE;
+	size_t buf_size = self->mmap_buf_size;
+	unsigned int start;
+	unsigned int end;
+	uint8_t *buf;
+
+	buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+		   0);
+	ASSERT_NE(MAP_FAILED, buf);
+	check_refs(buf, buf_size, 0);
+
+	/*
+	 * Map every combination of page size and alignment within a big region,
+	 * less for hugepage case as it takes so long to finish.
+	 */
+	for (start = 0; start < buf_size; start += test_step) {
+		if (variant->hugepages)
+			end = buf_size;
+		else
+			end = start + MOCK_PAGE_SIZE;
+		for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+			size_t length = end - start;
+			__u64 iova;
+
+			test_ioctl_ioas_map(buf + start, length, &iova);
+			check_mock_iova(buf + start, iova, length);
+			check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+				   end / PAGE_SIZE * PAGE_SIZE -
+					   start / PAGE_SIZE * PAGE_SIZE,
+				   1);
+
+			test_ioctl_ioas_unmap(iova, length);
+		}
+	}
+	check_refs(buf, buf_size, 0);
+	ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_mock_domain, all_aligns_copy)
+{
+	size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 :
+						MOCK_PAGE_SIZE;
+	size_t buf_size = self->mmap_buf_size;
+	unsigned int start;
+	unsigned int end;
+	uint8_t *buf;
+
+	buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+		   0);
+	ASSERT_NE(MAP_FAILED, buf);
+	check_refs(buf, buf_size, 0);
+
+	/*
+	 * Map every combination of page size and alignment within a big region,
+	 * less for hugepage case as it takes so long to finish.
+	 */
+	for (start = 0; start < buf_size; start += test_step) {
+		if (variant->hugepages)
+			end = buf_size;
+		else
+			end = start + MOCK_PAGE_SIZE;
+		for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+			size_t length = end - start;
+			unsigned int old_id;
+			uint32_t mock_device_id;
+			__u64 iova;
+
+			test_ioctl_ioas_map(buf + start, length, &iova);
+
+			/* Add and destroy a domain while the area exists */
+			old_id = self->domain_ids[1];
+			test_cmd_mock_domain(self->ioas_id, &mock_device_id,
+					     &self->domain_ids[1]);
+
+			check_mock_iova(buf + start, iova, length);
+			check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+				   end / PAGE_SIZE * PAGE_SIZE -
+					   start / PAGE_SIZE * PAGE_SIZE,
+				   1);
+
+			test_ioctl_destroy(mock_device_id);
+			test_ioctl_destroy(self->domain_ids[1]);
+			self->domain_ids[1] = old_id;
+
+			test_ioctl_ioas_unmap(iova, length);
+		}
+	}
+	check_refs(buf, buf_size, 0);
+	ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_mock_domain, user_copy)
+{
+	struct iommu_test_cmd access_cmd = {
+		.size = sizeof(access_cmd),
+		.op = IOMMU_TEST_OP_ACCESS_PAGES,
+		.access_pages = { .length = BUFFER_SIZE,
+				  .uptr = (uintptr_t)buffer },
+	};
+	struct iommu_ioas_copy copy_cmd = {
+		.size = sizeof(copy_cmd),
+		.flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+		.dst_ioas_id = self->ioas_id,
+		.dst_iova = MOCK_APERTURE_START,
+		.length = BUFFER_SIZE,
+	};
+	unsigned int ioas_id;
+
+	/* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */
+	test_ioctl_ioas_alloc(&ioas_id);
+	test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE,
+			       &copy_cmd.src_iova);
+
+	test_cmd_create_access(ioas_id, &access_cmd.id,
+			       MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+	access_cmd.access_pages.iova = copy_cmd.src_iova;
+	ASSERT_EQ(0,
+		  ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+			&access_cmd));
+	copy_cmd.src_ioas_id = ioas_id;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+	check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE);
+
+	test_cmd_destroy_access_pages(
+		access_cmd.id, access_cmd.access_pages.out_access_pages_id);
+	test_cmd_destroy_access(access_cmd.id) test_ioctl_destroy(ioas_id);
+
+	test_ioctl_destroy(ioas_id);
+}
+
+/* VFIO compatibility IOCTLs */
+
+TEST_F(iommufd, simple_ioctls)
+{
+	ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION));
+	ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU));
+}
+
+TEST_F(iommufd, unmap_cmd)
+{
+	struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+		.iova = MOCK_APERTURE_START,
+		.size = PAGE_SIZE,
+	};
+
+	unmap_cmd.argsz = 1;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+	unmap_cmd.argsz = sizeof(unmap_cmd);
+	unmap_cmd.flags = 1 << 31;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+	unmap_cmd.flags = 0;
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+}
+
+TEST_F(iommufd, map_cmd)
+{
+	struct vfio_iommu_type1_dma_map map_cmd = {
+		.iova = MOCK_APERTURE_START,
+		.size = PAGE_SIZE,
+		.vaddr = (__u64)buffer,
+	};
+
+	map_cmd.argsz = 1;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+	map_cmd.argsz = sizeof(map_cmd);
+	map_cmd.flags = 1 << 31;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+	/* Requires a domain to be attached */
+	map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+}
+
+TEST_F(iommufd, info_cmd)
+{
+	struct vfio_iommu_type1_info info_cmd = {};
+
+	/* Invalid argsz */
+	info_cmd.argsz = 1;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+
+	info_cmd.argsz = sizeof(info_cmd);
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+}
+
+TEST_F(iommufd, set_iommu_cmd)
+{
+	/* Requires a domain to be attached */
+	EXPECT_ERRNO(ENODEV,
+		     ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU));
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU));
+}
+
+TEST_F(iommufd, vfio_ioas)
+{
+	struct iommu_vfio_ioas vfio_ioas_cmd = {
+		.size = sizeof(vfio_ioas_cmd),
+		.op = IOMMU_VFIO_IOAS_GET,
+	};
+	__u32 ioas_id;
+
+	/* ENODEV if there is no compat ioas */
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+	/* Invalid id for set */
+	vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET;
+	EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+	/* Valid id for set*/
+	test_ioctl_ioas_alloc(&ioas_id);
+	vfio_ioas_cmd.ioas_id = ioas_id;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+	/* Same id comes back from get */
+	vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+	ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id);
+
+	/* Clear works */
+	vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+	vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+	EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+}
+
+FIXTURE(vfio_compat_mock_domain)
+{
+	int fd;
+	uint32_t ioas_id;
+};
+
+FIXTURE_VARIANT(vfio_compat_mock_domain)
+{
+	unsigned int version;
+};
+
+FIXTURE_SETUP(vfio_compat_mock_domain)
+{
+	struct iommu_vfio_ioas vfio_ioas_cmd = {
+		.size = sizeof(vfio_ioas_cmd),
+		.op = IOMMU_VFIO_IOAS_SET,
+	};
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	ASSERT_NE(-1, self->fd);
+
+	/* Create what VFIO would consider a group */
+	test_ioctl_ioas_alloc(&self->ioas_id);
+	test_cmd_mock_domain(self->ioas_id, NULL, NULL);
+
+	/* Attach it to the vfio compat */
+	vfio_ioas_cmd.ioas_id = self->ioas_id;
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version));
+}
+
+FIXTURE_TEARDOWN(vfio_compat_mock_domain)
+{
+	teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2)
+{
+	.version = VFIO_TYPE1v2_IOMMU,
+};
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0)
+{
+	.version = VFIO_TYPE1_IOMMU,
+};
+
+TEST_F(vfio_compat_mock_domain, simple_close)
+{
+}
+
+TEST_F(vfio_compat_mock_domain, option_huge_pages)
+{
+	struct iommu_option cmd = {
+		.size = sizeof(cmd),
+		.option_id = IOMMU_OPTION_HUGE_PAGES,
+		.op = IOMMU_OPTION_OP_GET,
+		.val64 = 3,
+		.object_id = self->ioas_id,
+	};
+
+	ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+	if (variant->version == VFIO_TYPE1_IOMMU) {
+		ASSERT_EQ(0, cmd.val64);
+	} else {
+		ASSERT_EQ(1, cmd.val64);
+	}
+}
+
+/*
+ * Execute an ioctl command stored in buffer and check that the result does not
+ * overflow memory.
+ */
+static bool is_filled(const void *buf, uint8_t c, size_t len)
+{
+	const uint8_t *cbuf = buf;
+
+	for (; len; cbuf++, len--)
+		if (*cbuf != c)
+			return false;
+	return true;
+}
+
+#define ioctl_check_buf(fd, cmd)                                         \
+	({                                                               \
+		size_t _cmd_len = *(__u32 *)buffer;                      \
+									 \
+		memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \
+		ASSERT_EQ(0, ioctl(fd, cmd, buffer));                    \
+		ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA,       \
+					  BUFFER_SIZE - _cmd_len));      \
+	})
+
+static void check_vfio_info_cap_chain(struct __test_metadata *_metadata,
+				      struct vfio_iommu_type1_info *info_cmd)
+{
+	const struct vfio_info_cap_header *cap;
+
+	ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap));
+	cap = buffer + info_cmd->cap_offset;
+	while (true) {
+		size_t cap_size;
+
+		if (cap->next)
+			cap_size = (buffer + cap->next) - (void *)cap;
+		else
+			cap_size = (buffer + info_cmd->argsz) - (void *)cap;
+
+		switch (cap->id) {
+		case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: {
+			struct vfio_iommu_type1_info_cap_iova_range *data =
+				(void *)cap;
+
+			ASSERT_EQ(1, data->header.version);
+			ASSERT_EQ(1, data->nr_iovas);
+			EXPECT_EQ(MOCK_APERTURE_START,
+				  data->iova_ranges[0].start);
+			EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end);
+			break;
+		}
+		case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: {
+			struct vfio_iommu_type1_info_dma_avail *data =
+				(void *)cap;
+
+			ASSERT_EQ(1, data->header.version);
+			ASSERT_EQ(sizeof(*data), cap_size);
+			break;
+		}
+		default:
+			ASSERT_EQ(false, true);
+			break;
+		}
+		if (!cap->next)
+			break;
+
+		ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap));
+		ASSERT_GE(buffer + cap->next, (void *)cap);
+		cap = buffer + cap->next;
+	}
+}
+
+TEST_F(vfio_compat_mock_domain, get_info)
+{
+	struct vfio_iommu_type1_info *info_cmd = buffer;
+	unsigned int i;
+	size_t caplen;
+
+	/* Pre-cap ABI */
+	*info_cmd = (struct vfio_iommu_type1_info){
+		.argsz = offsetof(struct vfio_iommu_type1_info, cap_offset),
+	};
+	ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+	ASSERT_NE(0, info_cmd->iova_pgsizes);
+	ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+		  info_cmd->flags);
+
+	/* Read the cap chain size */
+	*info_cmd = (struct vfio_iommu_type1_info){
+		.argsz = sizeof(*info_cmd),
+	};
+	ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+	ASSERT_NE(0, info_cmd->iova_pgsizes);
+	ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+		  info_cmd->flags);
+	ASSERT_EQ(0, info_cmd->cap_offset);
+	ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz);
+
+	/* Read the caps, kernel should never create a corrupted caps */
+	caplen = info_cmd->argsz;
+	for (i = sizeof(*info_cmd); i < caplen; i++) {
+		*info_cmd = (struct vfio_iommu_type1_info){
+			.argsz = i,
+		};
+		ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+		ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+			  info_cmd->flags);
+		if (!info_cmd->cap_offset)
+			continue;
+		check_vfio_info_cap_chain(_metadata, info_cmd);
+	}
+}
+
+static void shuffle_array(unsigned long *array, size_t nelms)
+{
+	unsigned int i;
+
+	/* Shuffle */
+	for (i = 0; i != nelms; i++) {
+		unsigned long tmp = array[i];
+		unsigned int other = rand() % (nelms - i);
+
+		array[i] = array[other];
+		array[other] = tmp;
+	}
+}
+
+TEST_F(vfio_compat_mock_domain, map)
+{
+	struct vfio_iommu_type1_dma_map map_cmd = {
+		.argsz = sizeof(map_cmd),
+		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+		.vaddr = (uintptr_t)buffer,
+		.size = BUFFER_SIZE,
+		.iova = MOCK_APERTURE_START,
+	};
+	struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+		.argsz = sizeof(unmap_cmd),
+		.size = BUFFER_SIZE,
+		.iova = MOCK_APERTURE_START,
+	};
+	unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE];
+	unsigned int i;
+
+	/* Simple map/unmap */
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+	ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+
+	/* UNMAP_FLAG_ALL requres 0 iova/size */
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+	unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL;
+	EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+	unmap_cmd.iova = 0;
+	unmap_cmd.size = 0;
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+	ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+
+	/* Small pages */
+	for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+		map_cmd.iova = pages_iova[i] =
+			MOCK_APERTURE_START + i * PAGE_SIZE;
+		map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE;
+		map_cmd.size = PAGE_SIZE;
+		ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+	}
+	shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+	unmap_cmd.flags = 0;
+	unmap_cmd.size = PAGE_SIZE;
+	for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+		unmap_cmd.iova = pages_iova[i];
+		ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+	}
+}
+
+TEST_F(vfio_compat_mock_domain, huge_map)
+{
+	size_t buf_size = HUGEPAGE_SIZE * 2;
+	struct vfio_iommu_type1_dma_map map_cmd = {
+		.argsz = sizeof(map_cmd),
+		.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+		.size = buf_size,
+		.iova = MOCK_APERTURE_START,
+	};
+	struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+		.argsz = sizeof(unmap_cmd),
+	};
+	unsigned long pages_iova[16];
+	unsigned int i;
+	void *buf;
+
+	/* Test huge pages and splitting */
+	buf = mmap(0, buf_size, PROT_READ | PROT_WRITE,
+		   MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1,
+		   0);
+	ASSERT_NE(MAP_FAILED, buf);
+	map_cmd.vaddr = (uintptr_t)buf;
+	ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+	unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+	for (i = 0; i != ARRAY_SIZE(pages_iova); i++)
+		pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size);
+	shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+	/* type1 mode can cut up larger mappings, type1v2 always fails */
+	for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+		unmap_cmd.iova = pages_iova[i];
+		unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+		if (variant->version == VFIO_TYPE1_IOMMU) {
+			ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+					   &unmap_cmd));
+		} else {
+			EXPECT_ERRNO(ENOENT,
+				     ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+					   &unmap_cmd));
+		}
+	}
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
new file mode 100644
index 0000000000000..9713111b820dd
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -0,0 +1,580 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ *
+ * These tests are "kernel integrity" tests. They are looking for kernel
+ * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging
+ * features. It does not attempt to verify that the system calls are doing what
+ * they are supposed to do.
+ *
+ * The basic philosophy is to run a sequence of calls that will succeed and then
+ * sweep every failure injection point on that call chain to look for
+ * interesting things in error handling.
+ *
+ * This test is best run with:
+ *  echo 1 > /proc/sys/kernel/panic_on_warn
+ * If something is actually going wrong.
+ */
+#include <fcntl.h>
+#include <dirent.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static bool have_fault_injection;
+
+static int writeat(int dfd, const char *fn, const char *val)
+{
+	size_t val_len = strlen(val);
+	ssize_t res;
+	int fd;
+
+	fd = openat(dfd, fn, O_WRONLY);
+	if (fd == -1)
+		return -1;
+	res = write(fd, val, val_len);
+	assert(res == val_len);
+	close(fd);
+	return 0;
+}
+
+static __attribute__((constructor)) void setup_buffer(void)
+{
+	BUFFER_SIZE = 2*1024*1024;
+
+	buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+		      MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+}
+
+/*
+ * This sets up fail_injection in a way that is useful for this test.
+ * It does not attempt to restore things back to how they were.
+ */
+static __attribute__((constructor)) void setup_fault_injection(void)
+{
+	DIR *debugfs = opendir("/sys/kernel/debug/");
+	struct dirent *dent;
+
+	if (!debugfs)
+		return;
+
+	/* Allow any allocation call to be fault injected */
+	if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N"))
+		return;
+	writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N");
+	writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N");
+
+	while ((dent = readdir(debugfs))) {
+		char fn[300];
+
+		if (strncmp(dent->d_name, "fail", 4) != 0)
+			continue;
+
+		/* We are looking for kernel splats, quiet down the log */
+		snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name);
+		writeat(dirfd(debugfs), fn, "0");
+	}
+	closedir(debugfs);
+	have_fault_injection = true;
+}
+
+struct fail_nth_state {
+	int proc_fd;
+	unsigned int iteration;
+};
+
+static void fail_nth_first(struct __test_metadata *_metadata,
+			   struct fail_nth_state *nth_state)
+{
+	char buf[300];
+
+	snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid());
+	nth_state->proc_fd = open(buf, O_RDWR);
+	ASSERT_NE(-1, nth_state->proc_fd);
+}
+
+static bool fail_nth_next(struct __test_metadata *_metadata,
+			  struct fail_nth_state *nth_state,
+			  int test_result)
+{
+	static const char disable_nth[] = "0";
+	char buf[300];
+
+	/*
+	 * This is just an arbitrary limit based on the current kernel
+	 * situation. Changes in the kernel can dramtically change the number of
+	 * required fault injection sites, so if this hits it doesn't
+	 * necessarily mean a test failure, just that the limit has to be made
+	 * bigger.
+	 */
+	ASSERT_GT(400, nth_state->iteration);
+	if (nth_state->iteration != 0) {
+		ssize_t res;
+		ssize_t res2;
+
+		buf[0] = 0;
+		/*
+		 * Annoyingly disabling the nth can also fail. This means
+		 * the test passed without triggering failure
+		 */
+		res = pread(nth_state->proc_fd, buf, sizeof(buf), 0);
+		if (res == -1 && errno == EFAULT) {
+			buf[0] = '1';
+			buf[1] = '\n';
+			res = 2;
+		}
+
+		res2 = pwrite(nth_state->proc_fd, disable_nth,
+			      ARRAY_SIZE(disable_nth) - 1, 0);
+		if (res2 == -1 && errno == EFAULT) {
+			res2 = pwrite(nth_state->proc_fd, disable_nth,
+				      ARRAY_SIZE(disable_nth) - 1, 0);
+			buf[0] = '1';
+			buf[1] = '\n';
+		}
+		ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2);
+
+		/* printf("  nth %u result=%d nth=%u\n", nth_state->iteration,
+		       test_result, atoi(buf)); */
+		fflush(stdout);
+		ASSERT_LT(1, res);
+		if (res != 2 || buf[0] != '0' || buf[1] != '\n')
+			return false;
+	} else {
+		/* printf("  nth %u result=%d\n", nth_state->iteration,
+		       test_result); */
+	}
+	nth_state->iteration++;
+	return true;
+}
+
+/*
+ * This is called during the test to start failure injection. It allows the test
+ * to do some setup that has already been swept and thus reduce the required
+ * iterations.
+ */
+void __fail_nth_enable(struct __test_metadata *_metadata,
+		       struct fail_nth_state *nth_state)
+{
+	char buf[300];
+	size_t len;
+
+	if (!nth_state->iteration)
+		return;
+
+	len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration);
+	ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0));
+}
+#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state)
+
+#define TEST_FAIL_NTH(fixture_name, name)                                           \
+	static int test_nth_##name(struct __test_metadata *_metadata,               \
+				   FIXTURE_DATA(fixture_name) *self,                \
+				   const FIXTURE_VARIANT(fixture_name)              \
+					   *variant,                                \
+				   struct fail_nth_state *_nth_state);              \
+	TEST_F(fixture_name, name)                                                  \
+	{                                                                           \
+		struct fail_nth_state nth_state = {};                               \
+		int test_result = 0;                                                \
+										    \
+		if (!have_fault_injection)                                          \
+			SKIP(return,                                                \
+				   "fault injection is not enabled in the kernel"); \
+		fail_nth_first(_metadata, &nth_state);                              \
+		ASSERT_EQ(0, test_nth_##name(_metadata, self, variant,              \
+					     &nth_state));                          \
+		while (fail_nth_next(_metadata, &nth_state, test_result)) {         \
+			fixture_name##_teardown(_metadata, self, variant);          \
+			fixture_name##_setup(_metadata, self, variant);             \
+			test_result = test_nth_##name(_metadata, self,              \
+						      variant, &nth_state);         \
+		};                                                                  \
+		ASSERT_EQ(0, test_result);                                          \
+	}                                                                           \
+	static int test_nth_##name(                                                 \
+		struct __test_metadata __attribute__((unused)) *_metadata,          \
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self,           \
+		const FIXTURE_VARIANT(fixture_name) __attribute__((unused))         \
+			*variant,                                                   \
+		struct fail_nth_state *_nth_state)
+
+FIXTURE(basic_fail_nth)
+{
+	int fd;
+	uint32_t access_id;
+};
+
+FIXTURE_SETUP(basic_fail_nth)
+{
+	self->fd = -1;
+	self->access_id = 0;
+}
+
+FIXTURE_TEARDOWN(basic_fail_nth)
+{
+	int rc;
+
+	if (self->access_id) {
+		/* The access FD holds the iommufd open until it closes */
+		rc = _test_cmd_destroy_access(self->access_id);
+		assert(rc == 0);
+	}
+	teardown_iommufd(self->fd, _metadata);
+}
+
+/* Cover ioas.c */
+TEST_FAIL_NTH(basic_fail_nth, basic)
+{
+	struct iommu_iova_range ranges[10];
+	uint32_t ioas_id;
+	__u64 iova;
+
+	fail_nth_enable();
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	{
+		struct iommu_ioas_iova_ranges ranges_cmd = {
+			.size = sizeof(ranges_cmd),
+			.num_iovas = ARRAY_SIZE(ranges),
+			.ioas_id = ioas_id,
+			.allowed_iovas = (uintptr_t)ranges,
+		};
+		if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd))
+			return -1;
+	}
+
+	{
+		struct iommu_ioas_allow_iovas allow_cmd = {
+			.size = sizeof(allow_cmd),
+			.ioas_id = ioas_id,
+			.num_iovas = 1,
+			.allowed_iovas = (uintptr_t)ranges,
+		};
+
+		ranges[0].start = 16*1024;
+		ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1;
+		if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd))
+			return -1;
+	}
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	{
+		struct iommu_ioas_copy copy_cmd = {
+			.size = sizeof(copy_cmd),
+			.flags = IOMMU_IOAS_MAP_WRITEABLE |
+				 IOMMU_IOAS_MAP_READABLE,
+			.dst_ioas_id = ioas_id,
+			.src_ioas_id = ioas_id,
+			.src_iova = iova,
+			.length = sizeof(ranges),
+		};
+
+		if (ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd))
+			return -1;
+	}
+
+	if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE,
+				   NULL))
+		return -1;
+	/* Failure path of no IOVA to unmap */
+	_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL);
+	return 0;
+}
+
+/* iopt_area_fill_domains() and iopt_area_fill_domain() */
+TEST_FAIL_NTH(basic_fail_nth, map_domain)
+{
+	uint32_t ioas_id;
+	__u32 device_id;
+	__u32 hwpt_id;
+	__u64 iova;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+		return -1;
+
+	fail_nth_enable();
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
+		return -1;
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	if (_test_ioctl_destroy(self->fd, device_id))
+		return -1;
+	if (_test_ioctl_destroy(self->fd, hwpt_id))
+		return -1;
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
+		return -1;
+	return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, map_two_domains)
+{
+	uint32_t ioas_id;
+	__u32 device_id2;
+	__u32 device_id;
+	__u32 hwpt_id2;
+	__u32 hwpt_id;
+	__u64 iova;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+		return -1;
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
+		return -1;
+
+	fail_nth_enable();
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2))
+		return -1;
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	if (_test_ioctl_destroy(self->fd, device_id))
+		return -1;
+	if (_test_ioctl_destroy(self->fd, hwpt_id))
+		return -1;
+
+	if (_test_ioctl_destroy(self->fd, device_id2))
+		return -1;
+	if (_test_ioctl_destroy(self->fd, hwpt_id2))
+		return -1;
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
+		return -1;
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id2, &hwpt_id2))
+		return -1;
+	return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, access_rw)
+{
+	uint64_t tmp_big[4096];
+	uint32_t ioas_id;
+	uint16_t tmp[32];
+	__u64 iova;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+		return -1;
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	fail_nth_enable();
+
+	if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0))
+		return -1;
+
+	{
+		struct iommu_test_cmd access_cmd = {
+			.size = sizeof(access_cmd),
+			.op = IOMMU_TEST_OP_ACCESS_RW,
+			.id = self->access_id,
+			.access_rw = { .iova = iova,
+				       .length = sizeof(tmp),
+				       .uptr = (uintptr_t)tmp },
+		};
+
+		// READ
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+
+		access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE;
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+
+		access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH;
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+		access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH |
+					     MOCK_ACCESS_RW_WRITE;
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+	}
+
+	{
+		struct iommu_test_cmd access_cmd = {
+			.size = sizeof(access_cmd),
+			.op = IOMMU_TEST_OP_ACCESS_RW,
+			.id = self->access_id,
+			.access_rw = { .iova = iova,
+				       .flags = MOCK_ACCESS_RW_SLOW_PATH,
+				       .length = sizeof(tmp_big),
+				       .uptr = (uintptr_t)tmp_big },
+		};
+
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+	}
+	if (_test_cmd_destroy_access(self->access_id))
+		return -1;
+	self->access_id = 0;
+	return 0;
+}
+
+/* pages.c access functions */
+TEST_FAIL_NTH(basic_fail_nth, access_pin)
+{
+	uint32_t access_pages_id;
+	uint32_t ioas_id;
+	__u64 iova;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+		return -1;
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+				    MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+		return -1;
+
+	fail_nth_enable();
+
+	{
+		struct iommu_test_cmd access_cmd = {
+			.size = sizeof(access_cmd),
+			.op = IOMMU_TEST_OP_ACCESS_PAGES,
+			.id = self->access_id,
+			.access_pages = { .iova = iova,
+					  .length = BUFFER_SIZE,
+					  .uptr = (uintptr_t)buffer },
+		};
+
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+		access_pages_id = access_cmd.access_pages.out_access_pages_id;
+	}
+
+	if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+					   access_pages_id))
+		return -1;
+
+	if (_test_cmd_destroy_access(self->access_id))
+		return -1;
+	self->access_id = 0;
+	return 0;
+}
+
+/* iopt_pages_fill_xarray() */
+TEST_FAIL_NTH(basic_fail_nth, access_pin_domain)
+{
+	uint32_t access_pages_id;
+	uint32_t ioas_id;
+	__u32 device_id;
+	__u32 hwpt_id;
+	__u64 iova;
+
+	self->fd = open("/dev/iommu", O_RDWR);
+	if (self->fd == -1)
+		return -1;
+
+	if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+		return -1;
+
+	if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+		return -1;
+
+	if (_test_cmd_mock_domain(self->fd, ioas_id, &device_id, &hwpt_id))
+		return -1;
+
+	if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+				 IOMMU_IOAS_MAP_WRITEABLE |
+					 IOMMU_IOAS_MAP_READABLE))
+		return -1;
+
+	if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+				    MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+		return -1;
+
+	fail_nth_enable();
+
+	{
+		struct iommu_test_cmd access_cmd = {
+			.size = sizeof(access_cmd),
+			.op = IOMMU_TEST_OP_ACCESS_PAGES,
+			.id = self->access_id,
+			.access_pages = { .iova = iova,
+					  .length = BUFFER_SIZE,
+					  .uptr = (uintptr_t)buffer },
+		};
+
+		if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+			  &access_cmd))
+			return -1;
+		access_pages_id = access_cmd.access_pages.out_access_pages_id;
+	}
+
+	if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+					   access_pages_id))
+		return -1;
+
+	if (_test_cmd_destroy_access(self->access_id))
+		return -1;
+	self->access_id = 0;
+
+	if (_test_ioctl_destroy(self->fd, device_id))
+		return -1;
+	if (_test_ioctl_destroy(self->fd, hwpt_id))
+		return -1;
+	return 0;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
new file mode 100644
index 0000000000000..0d1f46369c2a3
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -0,0 +1,278 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#ifndef __SELFTEST_IOMMUFD_UTILS
+#define __SELFTEST_IOMMUFD_UTILS
+
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/fcntl.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include "../kselftest_harness.h"
+#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
+
+/* Hack to make assertions more readable */
+#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
+
+static void *buffer;
+static unsigned long BUFFER_SIZE;
+
+/*
+ * Have the kernel check the refcount on pages. I don't know why a freshly
+ * mmap'd anon non-compound page starts out with a ref of 3
+ */
+#define check_refs(_ptr, _length, _refs)                                      \
+	({                                                                    \
+		struct iommu_test_cmd test_cmd = {                            \
+			.size = sizeof(test_cmd),                             \
+			.op = IOMMU_TEST_OP_MD_CHECK_REFS,                    \
+			.check_refs = { .length = _length,                    \
+					.uptr = (uintptr_t)(_ptr),            \
+					.refs = _refs },                      \
+		};                                                            \
+		ASSERT_EQ(0,                                                  \
+			  ioctl(self->fd,                                     \
+				_IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \
+				&test_cmd));                                  \
+	})
+
+static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *device_id,
+				 __u32 *hwpt_id)
+{
+	struct iommu_test_cmd cmd = {
+		.size = sizeof(cmd),
+		.op = IOMMU_TEST_OP_MOCK_DOMAIN,
+		.id = ioas_id,
+		.mock_domain = {},
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+	if (ret)
+		return ret;
+	if (device_id)
+		*device_id = cmd.mock_domain.out_device_id;
+	assert(cmd.id != 0);
+	if (hwpt_id)
+		*hwpt_id = cmd.mock_domain.out_hwpt_id;
+	return 0;
+}
+#define test_cmd_mock_domain(ioas_id, device_id, hwpt_id)                \
+	ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, device_id, \
+					   hwpt_id))
+#define test_err_mock_domain(_errno, ioas_id, device_id, hwpt_id)     \
+	EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
+						   device_id, hwpt_id))
+
+static int _test_cmd_create_access(int fd, unsigned int ioas_id,
+				   __u32 *access_id, unsigned int flags)
+{
+	struct iommu_test_cmd cmd = {
+		.size = sizeof(cmd),
+		.op = IOMMU_TEST_OP_CREATE_ACCESS,
+		.id = ioas_id,
+		.create_access = { .flags = flags },
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+	if (ret)
+		return ret;
+	*access_id = cmd.create_access.out_access_fd;
+	return 0;
+}
+#define test_cmd_create_access(ioas_id, access_id, flags)                  \
+	ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \
+					     flags))
+
+static int _test_cmd_destroy_access(unsigned int access_id)
+{
+	return close(access_id);
+}
+#define test_cmd_destroy_access(access_id) \
+	ASSERT_EQ(0, _test_cmd_destroy_access(access_id))
+
+static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
+					  unsigned int access_pages_id)
+{
+	struct iommu_test_cmd cmd = {
+		.size = sizeof(cmd),
+		.op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
+		.id = access_id,
+		.destroy_access_pages = { .access_pages_id = access_pages_id },
+	};
+	return ioctl(fd, IOMMU_TEST_CMD, &cmd);
+}
+#define test_cmd_destroy_access_pages(access_id, access_pages_id)        \
+	ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \
+						    access_pages_id))
+#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \
+	EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages(              \
+				     self->fd, access_id, access_pages_id))
+
+static int _test_ioctl_destroy(int fd, unsigned int id)
+{
+	struct iommu_destroy cmd = {
+		.size = sizeof(cmd),
+		.id = id,
+	};
+	return ioctl(fd, IOMMU_DESTROY, &cmd);
+}
+#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id))
+
+static int _test_ioctl_ioas_alloc(int fd, __u32 *id)
+{
+	struct iommu_ioas_alloc cmd = {
+		.size = sizeof(cmd),
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd);
+	if (ret)
+		return ret;
+	*id = cmd.out_ioas_id;
+	return 0;
+}
+#define test_ioctl_ioas_alloc(id)                                   \
+	({                                                          \
+		ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \
+		ASSERT_NE(0, *(id));                                \
+	})
+
+static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
+				size_t length, __u64 *iova, unsigned int flags)
+{
+	struct iommu_ioas_map cmd = {
+		.size = sizeof(cmd),
+		.flags = flags,
+		.ioas_id = ioas_id,
+		.user_va = (uintptr_t)buffer,
+		.length = length,
+	};
+	int ret;
+
+	if (flags & IOMMU_IOAS_MAP_FIXED_IOVA)
+		cmd.iova = *iova;
+
+	ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd);
+	*iova = cmd.iova;
+	return ret;
+}
+#define test_ioctl_ioas_map(buffer, length, iova_p)                        \
+	ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+					  length, iova_p,                  \
+					  IOMMU_IOAS_MAP_WRITEABLE |       \
+						  IOMMU_IOAS_MAP_READABLE))
+
+#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p)            \
+	EXPECT_ERRNO(_errno,                                               \
+		     _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+					  length, iova_p,                  \
+					  IOMMU_IOAS_MAP_WRITEABLE |       \
+						  IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p)              \
+	ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \
+					  iova_p,                            \
+					  IOMMU_IOAS_MAP_WRITEABLE |         \
+						  IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_fixed(buffer, length, iova)                       \
+	({                                                                    \
+		__u64 __iova = iova;                                          \
+		ASSERT_EQ(0, _test_ioctl_ioas_map(                            \
+				     self->fd, self->ioas_id, buffer, length, \
+				     &__iova,                                 \
+				     IOMMU_IOAS_MAP_FIXED_IOVA |              \
+					     IOMMU_IOAS_MAP_WRITEABLE |       \
+					     IOMMU_IOAS_MAP_READABLE));       \
+	})
+
+#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova)           \
+	({                                                                    \
+		__u64 __iova = iova;                                          \
+		EXPECT_ERRNO(_errno,                                          \
+			     _test_ioctl_ioas_map(                            \
+				     self->fd, self->ioas_id, buffer, length, \
+				     &__iova,                                 \
+				     IOMMU_IOAS_MAP_FIXED_IOVA |              \
+					     IOMMU_IOAS_MAP_WRITEABLE |       \
+					     IOMMU_IOAS_MAP_READABLE));       \
+	})
+
+static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova,
+				  size_t length, uint64_t *out_len)
+{
+	struct iommu_ioas_unmap cmd = {
+		.size = sizeof(cmd),
+		.ioas_id = ioas_id,
+		.iova = iova,
+		.length = length,
+	};
+	int ret;
+
+	ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd);
+	if (out_len)
+		*out_len = cmd.length;
+	return ret;
+}
+#define test_ioctl_ioas_unmap(iova, length)                                \
+	ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \
+					    length, NULL))
+
+#define test_ioctl_ioas_unmap_id(ioas_id, iova, length)                      \
+	ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \
+					    NULL))
+
+#define test_err_ioctl_ioas_unmap(_errno, iova, length)                      \
+	EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \
+						    iova, length, NULL))
+
+static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
+{
+	struct iommu_test_cmd memlimit_cmd = {
+		.size = sizeof(memlimit_cmd),
+		.op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
+		.memory_limit = { .limit = limit },
+	};
+
+	return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT),
+		     &memlimit_cmd);
+}
+
+#define test_ioctl_set_temp_memory_limit(limit) \
+	ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit))
+
+#define test_ioctl_set_default_memory_limit() \
+	test_ioctl_set_temp_memory_limit(65536)
+
+static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
+{
+	struct iommu_test_cmd test_cmd = {
+		.size = sizeof(test_cmd),
+		.op = IOMMU_TEST_OP_MD_CHECK_REFS,
+		.check_refs = { .length = BUFFER_SIZE,
+				.uptr = (uintptr_t)buffer },
+	};
+
+	if (fd == -1)
+		return;
+
+	EXPECT_EQ(0, close(fd));
+
+	fd = open("/dev/iommu", O_RDWR);
+	EXPECT_NE(-1, fd);
+	EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS),
+			   &test_cmd));
+	EXPECT_EQ(0, close(fd));
+}
+
+#define EXPECT_ERRNO(expected_errno, cmd)         \
+	({                                        \
+		ASSERT_EQ(-1, cmd);               \
+		EXPECT_EQ(expected_errno, errno); \
+	})
+
+#endif
-- 
GitLab


From 5c8c0b3273822cf982c250a9a19e003e4b315edb Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 31 Aug 2022 00:17:04 +0000
Subject: [PATCH 3139/4122] KVM: x86: Delete documentation for READ|WRITE in
 KVM_X86_SET_MSR_FILTER

Delete the paragraph that describes the behavior when both
KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE are set for a range.  There is
nothing special about KVM's handling of this combination, whereas
explicitly documenting the combination suggests that there is some magic
behavior the user needs to be aware of.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220831001706.4075399-2-seanjc@google.com
---
 Documentation/virt/kvm/api.rst | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 5617bc4f899f4..373cf425e85c1 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4115,13 +4115,6 @@ flags values for ``struct kvm_msr_filter_range``:
   a write for a particular MSR should be handled regardless of the default
   filter action.
 
-``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
-
-  Filter both read and write accesses to MSRs using the given bitmap. A 0
-  in the bitmap indicates that both reads and writes should immediately fail,
-  while a 1 indicates that reads and writes for a particular MSR are not
-  filtered by this range.
-
 flags values for ``struct kvm_msr_filter``:
 
 ``KVM_MSR_FILTER_DEFAULT_ALLOW``
-- 
GitLab


From b93d2ec34ef368bb854289db99d8d6ca7f523e25 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 31 Aug 2022 00:17:05 +0000
Subject: [PATCH 3140/4122] KVM: x86: Reword MSR filtering docs to more
 precisely define behavior

Reword the MSR filtering documentatiion to more precisely define the
behavior of filtering using common virtualization terminology.

  - Explicitly document KVM's behavior when an MSR is denied
  - s/handled/allowed as there is no guarantee KVM will "handle" the
    MSR access
  - Drop the "fall back" terminology, which incorrectly suggests that
    there is existing KVM behavior to fall back to
  - Fix an off-by-one error in the range (the end is exclusive)
  - Call out the interaction between MSR filtering and
    KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER
  - Delete the redundant paragraph on what '0' and '1' in the bitmap
    means, it's covered by the sections on KVM_MSR_FILTER_{READ,WRITE}
  - Delete the clause on x2APIC MSR behavior depending on APIC base, this
    is covered by stating that KVM follows architectural behavior when
    emulating/virtualizing MSR accesses

Reported-by: Aaron Lewis <aaronlewis@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220831001706.4075399-3-seanjc@google.com
---
 Documentation/virt/kvm/api.rst | 70 +++++++++++++++++-----------------
 1 file changed, 35 insertions(+), 35 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 373cf425e85c1..a4d07b866dea8 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4104,15 +4104,15 @@ flags values for ``struct kvm_msr_filter_range``:
 ``KVM_MSR_FILTER_READ``
 
   Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
-  indicates that a read should immediately fail, while a 1 indicates that
-  a read for a particular MSR should be handled regardless of the default
+  indicates that read accesses should be denied, while a 1 indicates that
+  a read for a particular MSR should be allowed regardless of the default
   filter action.
 
 ``KVM_MSR_FILTER_WRITE``
 
   Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
-  indicates that a write should immediately fail, while a 1 indicates that
-  a write for a particular MSR should be handled regardless of the default
+  indicates that write accesses should be denied, while a 1 indicates that
+  a write for a particular MSR should be allowed regardless of the default
   filter action.
 
 flags values for ``struct kvm_msr_filter``:
@@ -4120,57 +4120,55 @@ flags values for ``struct kvm_msr_filter``:
 ``KVM_MSR_FILTER_DEFAULT_ALLOW``
 
   If no filter range matches an MSR index that is getting accessed, KVM will
-  fall back to allowing access to the MSR.
+  allow accesses to all MSRs by default.
 
 ``KVM_MSR_FILTER_DEFAULT_DENY``
 
   If no filter range matches an MSR index that is getting accessed, KVM will
-  fall back to rejecting access to the MSR. In this mode, all MSRs that should
-  be processed by KVM need to explicitly be marked as allowed in the bitmaps.
+  deny accesses to all MSRs by default.
 
-This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
-specify whether a certain MSR access should be explicitly filtered for or not.
+This ioctl allows userspace to define up to 16 bitmaps of MSR ranges to deny
+guest MSR accesses that would normally be allowed by KVM.  If an MSR is not
+covered by a specific range, the "default" filtering behavior applies.  Each
+bitmap range covers MSRs from [base .. base+nmsrs).
 
-If this ioctl has never been invoked, MSR accesses are not guarded and the
-default KVM in-kernel emulation behavior is fully preserved.
+If an MSR access is denied by userspace, the resulting KVM behavior depends on
+whether or not KVM_CAP_X86_USER_SPACE_MSR's KVM_MSR_EXIT_REASON_FILTER is
+enabled.  If KVM_MSR_EXIT_REASON_FILTER is enabled, KVM will exit to userspace
+on denied accesses, i.e. userspace effectively intercepts the MSR access.  If
+KVM_MSR_EXIT_REASON_FILTER is not enabled, KVM will inject a #GP into the guest
+on denied accesses.
+
+If an MSR access is allowed by userspace, KVM will emulate and/or virtualize
+the access in accordance with the vCPU model.  Note, KVM may still ultimately
+inject a #GP if an access is allowed by userspace, e.g. if KVM doesn't support
+the MSR, or to follow architectural behavior for the MSR.
+
+By default, KVM operates in KVM_MSR_FILTER_DEFAULT_ALLOW mode with no MSR range
+filters.
 
 Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
 filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
 an error.
 
-As soon as the filtering is in place, every MSR access is processed through
-the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
-x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
-and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
-register.
-
 .. warning::
-   MSR accesses coming from nested vmentry/vmexit are not filtered.
+   MSR accesses as part of nested VM-Enter/VM-Exit are not filtered.
    This includes both writes to individual VMCS fields and reads/writes
    through the MSR lists pointed to by the VMCS.
 
-If a bit is within one of the defined ranges, read and write accesses are
-guarded by the bitmap's value for the MSR index if the kind of access
-is included in the ``struct kvm_msr_filter_range`` flags.  If no range
-cover this particular access, the behavior is determined by the flags
-field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
-and ``KVM_MSR_FILTER_DEFAULT_DENY``.
-
-Each bitmap range specifies a range of MSRs to potentially allow access on.
-The range goes from MSR index [base .. base+nmsrs]. The flags field
-indicates whether reads, writes or both reads and writes are filtered
-by setting a 1 bit in the bitmap for the corresponding MSR index.
-
-If an MSR access is not permitted through the filtering, it generates a
-#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
-allows user space to deflect and potentially handle various MSR accesses
-into user space.
+   x2APIC MSR accesses cannot be filtered (KVM silently ignores filters that
+   cover any x2APIC MSRs).
 
 Note, invoking this ioctl while a vCPU is running is inherently racy.  However,
 KVM does guarantee that vCPUs will see either the previous filter or the new
 filter, e.g. MSRs with identical settings in both the old and new filter will
 have deterministic behavior.
 
+Similarly, if userspace wishes to intercept on denied accesses,
+KVM_MSR_EXIT_REASON_FILTER must be enabled before activating any filters, and
+left enabled until after all filters are deactivated.  Failure to do so may
+result in KVM injecting a #GP instead of exiting to userspace.
+
 4.98 KVM_CREATE_SPAPR_TCE_64
 ----------------------------
 
@@ -6500,6 +6498,8 @@ wants to write. Once finished processing the event, user space must continue
 vCPU execution. If the MSR write was unsuccessful, user space also sets the
 "error" field to "1".
 
+See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering.
+
 ::
 
 
@@ -7937,7 +7937,7 @@ KVM_EXIT_X86_WRMSR exit notifications.
 This capability indicates that KVM supports that accesses to user defined MSRs
 may be rejected. With this capability exposed, KVM exports new VM ioctl
 KVM_X86_SET_MSR_FILTER which user space can call to specify bitmaps of MSR
-ranges that KVM should reject access to.
+ranges that KVM should deny access to.
 
 In combination with KVM_CAP_X86_USER_SPACE_MSR, this allows user space to
 trap and emulate MSRs that are outside of the scope of KVM as well as
-- 
GitLab


From 1f158147181b83c5ae02273d0b3b9eddaebcc854 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 31 Aug 2022 00:17:06 +0000
Subject: [PATCH 3141/4122] KVM: x86: Clean up KVM_CAP_X86_USER_SPACE_MSR
 documentation

Clean up the KVM_CAP_X86_USER_SPACE_MSR documentation to eliminate
misleading and/or inconsistent verbiage, and to actually document what
accesses are intercepted by which flags.

  - s/will/may since not all #GPs are guaranteed to be intercepted
  - s/deflect/intercept to align with common KVM terminology
  - s/user space/userspace to align with the majority of KVM docs
  - Avoid using "trap" terminology, as KVM exits to userspace _before_
    stepping, i.e. doesn't exhibit trap-like behavior
  - Actually document the flags

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220831001706.4075399-4-seanjc@google.com
---
 Documentation/virt/kvm/api.rst | 40 ++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index a4d07b866dea8..c6857f6b25ab0 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6473,29 +6473,29 @@ if it decides to decode and emulate the instruction.
 
 Used on x86 systems. When the VM capability KVM_CAP_X86_USER_SPACE_MSR is
 enabled, MSR accesses to registers that would invoke a #GP by KVM kernel code
-will instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
+may instead trigger a KVM_EXIT_X86_RDMSR exit for reads and KVM_EXIT_X86_WRMSR
 exit for writes.
 
-The "reason" field specifies why the MSR trap occurred. User space will only
-receive MSR exit traps when a particular reason was requested during through
+The "reason" field specifies why the MSR interception occurred. Userspace will
+only receive MSR exits when a particular reason was requested during through
 ENABLE_CAP. Currently valid exit reasons are:
 
 	KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
 	KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
 	KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
 
-For KVM_EXIT_X86_RDMSR, the "index" field tells user space which MSR the guest
-wants to read. To respond to this request with a successful read, user space
+For KVM_EXIT_X86_RDMSR, the "index" field tells userspace which MSR the guest
+wants to read. To respond to this request with a successful read, userspace
 writes the respective data into the "data" field and must continue guest
 execution to ensure the read data is transferred into guest register state.
 
-If the RDMSR request was unsuccessful, user space indicates that with a "1" in
+If the RDMSR request was unsuccessful, userspace indicates that with a "1" in
 the "error" field. This will inject a #GP into the guest when the VCPU is
 executed again.
 
-For KVM_EXIT_X86_WRMSR, the "index" field tells user space which MSR the guest
-wants to write. Once finished processing the event, user space must continue
-vCPU execution. If the MSR write was unsuccessful, user space also sets the
+For KVM_EXIT_X86_WRMSR, the "index" field tells userspace which MSR the guest
+wants to write. Once finished processing the event, userspace must continue
+vCPU execution. If the MSR write was unsuccessful, userspace also sets the
 "error" field to "1".
 
 See KVM_X86_SET_MSR_FILTER for details on the interaction with MSR filtering.
@@ -7265,19 +7265,27 @@ the module parameter for the target VM.
 :Parameters: args[0] contains the mask of KVM_MSR_EXIT_REASON_* events to report
 :Returns: 0 on success; -1 on error
 
-This capability enables trapping of #GP invoking RDMSR and WRMSR instructions
-into user space.
+This capability allows userspace to intercept RDMSR and WRMSR instructions if
+access to an MSR is denied.  By default, KVM injects #GP on denied accesses.
 
 When a guest requests to read or write an MSR, KVM may not implement all MSRs
 that are relevant to a respective system. It also does not differentiate by
 CPU type.
 
-To allow more fine grained control over MSR handling, user space may enable
+To allow more fine grained control over MSR handling, userspace may enable
 this capability. With it enabled, MSR accesses that match the mask specified in
-args[0] and trigger a #GP event inside the guest by KVM will instead trigger
-KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space
-can then handle to implement model specific MSR handling and/or user notifications
-to inform a user that an MSR was not handled.
+args[0] and would trigger a #GP inside the guest will instead trigger
+KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications.  Userspace
+can then implement model specific MSR handling and/or user notifications
+to inform a user that an MSR was not emulated/virtualized by KVM.
+
+The valid mask flags are:
+
+	KVM_MSR_EXIT_REASON_UNKNOWN - intercept accesses to unknown (to KVM) MSRs
+	KVM_MSR_EXIT_REASON_INVAL   - intercept accesses that are architecturally
+                                invalid according to the vCPU model and/or mode
+	KVM_MSR_EXIT_REASON_FILTER  - intercept accesses that are denied by userspace
+                                via KVM_X86_SET_MSR_FILTER
 
 7.22 KVM_CAP_X86_BUS_LOCK_EXIT
 -------------------------------
-- 
GitLab


From 4a8fd4a720f8a8dbc370076d26388176c311218a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 31 Aug 2022 00:07:21 +0000
Subject: [PATCH 3142/4122] KVM: nVMX: Reword comments about generating nested
 CR0/4 read shadows

Reword the comments that (attempt to) document nVMX's overrides of the
CR0/4 read shadows for L2 after calling vmx_set_cr0/4().  The important
behavior that needs to be documented is that KVM needs to override the
shadows to account for L1's masks even though the shadows are set by the
common helpers (and that setting the shadows first would result in the
correct shadows being clobbered).

Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Link: https://lore.kernel.org/r/20220831000721.4066617-1-seanjc@google.com
---
 arch/x86/kvm/vmx/nested.c | 9 +++------
 arch/x86/kvm/vmx/nested.h | 7 ++++---
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 61c83424285c8..b6f4411b613e9 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -2588,12 +2588,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		nested_ept_init_mmu_context(vcpu);
 
 	/*
-	 * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
-	 * bits which we consider mandatory enabled.
-	 * The CR0_READ_SHADOW is what L2 should have expected to read given
-	 * the specifications by L1; It's not enough to take
-	 * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we
-	 * have more bits than L1 expected.
+	 * Override the CR0/CR4 read shadows after setting the effective guest
+	 * CR0/CR4.  The common helpers also set the shadows, but they don't
+	 * account for vmcs12's cr0/4_guest_host_mask.
 	 */
 	vmx_set_cr0(vcpu, vmcs12->guest_cr0);
 	vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 6312c9541c3cd..96952263b029b 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -79,9 +79,10 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu)
 }
 
 /*
- * Return the cr0 value that a nested guest would read. This is a combination
- * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
- * its hypervisor (cr0_read_shadow).
+ * Return the cr0/4 value that a nested guest would read. This is a combination
+ * of L1's "real" cr0 used to run the guest (guest_cr0), and the bits shadowed
+ * by the L1 hypervisor (cr0_read_shadow).  KVM must emulate CPU behavior as
+ * the value+mask loaded into vmcs02 may not match the vmcs12 fields.
  */
 static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
 {
-- 
GitLab


From 0b5e7a16a0a79a3742f0df9e45bca46f01b40e6a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 28 Sep 2022 23:20:15 +0000
Subject: [PATCH 3143/4122] KVM: VMX: Make vmread_error_trampoline() uncallable
 from C code

Declare vmread_error_trampoline() as an opaque symbol so that it cannot
be called from C code, at least not without some serious fudging.  The
trampoline always passes parameters on the stack so that the inline
VMREAD sequence doesn't need to clobber registers.  regparm(0) was
originally added to document the stack behavior, but it ended up being
confusing because regparm(0) is a nop for 64-bit targets.

Opportunustically wrap the trampoline and its declaration in #ifdeffery
to make it even harder to invoke incorrectly, to document why it exists,
and so that it's not left behind if/when CONFIG_CC_HAS_ASM_GOTO_OUTPUT
is true for all supported toolchains.

No functional change intended.

Cc: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220928232015.745948-1-seanjc@google.com
---
 arch/x86/kvm/vmx/vmenter.S |  2 ++
 arch/x86/kvm/vmx/vmx_ops.h | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S
index 0b5db4de4d09e..766c6b3ef5ed9 100644
--- a/arch/x86/kvm/vmx/vmenter.S
+++ b/arch/x86/kvm/vmx/vmenter.S
@@ -269,6 +269,7 @@ SYM_FUNC_END(__vmx_vcpu_run)
 
 .section .text, "ax"
 
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
 /**
  * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
  * @field:	VMCS field encoding that failed
@@ -317,6 +318,7 @@ SYM_FUNC_START(vmread_error_trampoline)
 
 	RET
 SYM_FUNC_END(vmread_error_trampoline)
+#endif
 
 SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
 	/*
diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h
index f6f23c7397dc5..842dc898c9728 100644
--- a/arch/x86/kvm/vmx/vmx_ops.h
+++ b/arch/x86/kvm/vmx/vmx_ops.h
@@ -11,14 +11,28 @@
 #include "../x86.h"
 
 void vmread_error(unsigned long field, bool fault);
-__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
-							 bool fault);
 void vmwrite_error(unsigned long field, unsigned long value);
 void vmclear_error(struct vmcs *vmcs, u64 phys_addr);
 void vmptrld_error(struct vmcs *vmcs, u64 phys_addr);
 void invvpid_error(unsigned long ext, u16 vpid, gva_t gva);
 void invept_error(unsigned long ext, u64 eptp, gpa_t gpa);
 
+#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
+/*
+ * The VMREAD error trampoline _always_ uses the stack to pass parameters, even
+ * for 64-bit targets.  Preserving all registers allows the VMREAD inline asm
+ * blob to avoid clobbering GPRs, which in turn allows the compiler to better
+ * optimize sequences of VMREADs.
+ *
+ * Declare the trampoline as an opaque label as it's not safe to call from C
+ * code; there is no way to tell the compiler to pass params on the stack for
+ * 64-bit targets.
+ *
+ * void vmread_error_trampoline(unsigned long field, bool fault);
+ */
+extern unsigned long vmread_error_trampoline;
+#endif
+
 static __always_inline void vmcs_check16(unsigned long field)
 {
 	BUILD_BUG_ON_MSG(__builtin_constant_p(field) && ((field) & 0x6001) == 0x2000,
-- 
GitLab


From d2a00af2061db863890e32a4a99a6f82c330df1f Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 7 Jun 2022 23:23:51 +0000
Subject: [PATCH 3144/4122] KVM: VMX: Allow userspace to set all supported
 FEATURE_CONTROL bits

Allow userspace to set all supported bits in MSR IA32_FEATURE_CONTROL
irrespective of the guest CPUID model, e.g. via KVM_SET_MSRS.  KVM's ABI
is that userspace is allowed to set MSRs before CPUID, i.e. can set MSRs
to values that would fault according to the guest CPUID model.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220607232353.3375324-2-seanjc@google.com
---
 arch/x86/kvm/vmx/vmx.c | 36 +++++++++++++++++++++++++++++++-----
 1 file changed, 31 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3f31c46c306e8..7be1fb50a753f 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1836,12 +1836,38 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
 	return nested && guest_cpuid_has(vcpu, X86_FEATURE_VMX);
 }
 
-static inline bool vmx_feature_control_msr_valid(struct kvm_vcpu *vcpu,
-						 uint64_t val)
+/*
+ * Userspace is allowed to set any supported IA32_FEATURE_CONTROL regardless of
+ * guest CPUID.  Note, KVM allows userspace to set "VMX in SMX" to maintain
+ * backwards compatibility even though KVM doesn't support emulating SMX.  And
+ * because userspace set "VMX in SMX", the guest must also be allowed to set it,
+ * e.g. if the MSR is left unlocked and the guest does a RMW operation.
+ */
+#define KVM_SUPPORTED_FEATURE_CONTROL  (FEAT_CTL_LOCKED			 | \
+					FEAT_CTL_VMX_ENABLED_INSIDE_SMX	 | \
+					FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX | \
+					FEAT_CTL_SGX_LC_ENABLED		 | \
+					FEAT_CTL_SGX_ENABLED		 | \
+					FEAT_CTL_LMCE_ENABLED)
+
+static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
+						 struct msr_data *msr)
 {
-	uint64_t valid_bits = to_vmx(vcpu)->msr_ia32_feature_control_valid_bits;
+	uint64_t valid_bits;
+
+	/*
+	 * Ensure KVM_SUPPORTED_FEATURE_CONTROL is updated when new bits are
+	 * exposed to the guest.
+	 */
+	WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits &
+		     ~KVM_SUPPORTED_FEATURE_CONTROL);
+
+	if (msr->host_initiated)
+		valid_bits = KVM_SUPPORTED_FEATURE_CONTROL;
+	else
+		valid_bits = vmx->msr_ia32_feature_control_valid_bits;
 
-	return !(val & ~valid_bits);
+	return !(msr->data & ~valid_bits);
 }
 
 static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
@@ -2240,7 +2266,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.mcg_ext_ctl = data;
 		break;
 	case MSR_IA32_FEAT_CTL:
-		if (!vmx_feature_control_msr_valid(vcpu, data) ||
+		if (!vmx_feature_control_msr_valid(vmx, msr_info) ||
 		    (to_vmx(vcpu)->msr_ia32_feature_control &
 		     FEAT_CTL_LOCKED && !msr_info->host_initiated))
 			return 1;
-- 
GitLab


From 2d6cd68636d60822219074b7c1d0bfe41321f106 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 7 Jun 2022 23:23:52 +0000
Subject: [PATCH 3145/4122] KVM: VMX: Move MSR_IA32_FEAT_CTL.LOCKED check into
 "is valid" helper

Move the check on IA32_FEATURE_CONTROL being locked, i.e. read-only from
the guest, into the helper to check the overall validity of the incoming
value.  Opportunistically rename the helper to make it clear that it
returns a bool.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220607232353.3375324-3-seanjc@google.com
---
 arch/x86/kvm/vmx/vmx.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7be1fb50a753f..fe5615fd8295c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1850,8 +1850,8 @@ bool nested_vmx_allowed(struct kvm_vcpu *vcpu)
 					FEAT_CTL_SGX_ENABLED		 | \
 					FEAT_CTL_LMCE_ENABLED)
 
-static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
-						 struct msr_data *msr)
+static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
+						    struct msr_data *msr)
 {
 	uint64_t valid_bits;
 
@@ -1862,6 +1862,10 @@ static inline bool vmx_feature_control_msr_valid(struct vcpu_vmx *vmx,
 	WARN_ON_ONCE(vmx->msr_ia32_feature_control_valid_bits &
 		     ~KVM_SUPPORTED_FEATURE_CONTROL);
 
+	if (!msr->host_initiated &&
+	    (vmx->msr_ia32_feature_control & FEAT_CTL_LOCKED))
+		return false;
+
 	if (msr->host_initiated)
 		valid_bits = KVM_SUPPORTED_FEATURE_CONTROL;
 	else
@@ -2266,10 +2270,9 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 		vcpu->arch.mcg_ext_ctl = data;
 		break;
 	case MSR_IA32_FEAT_CTL:
-		if (!vmx_feature_control_msr_valid(vmx, msr_info) ||
-		    (to_vmx(vcpu)->msr_ia32_feature_control &
-		     FEAT_CTL_LOCKED && !msr_info->host_initiated))
+		if (!is_vmx_feature_control_msr_valid(vmx, msr_info))
 			return 1;
+
 		vmx->msr_ia32_feature_control = data;
 		if (msr_info->host_initiated && data == 0)
 			vmx_leave_nested(vcpu);
-- 
GitLab


From b80732fdc9b235046687a2999ed198fa55fde901 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 7 Jun 2022 23:23:53 +0000
Subject: [PATCH 3146/4122] KVM: selftests: Verify userspace can stuff
 IA32_FEATURE_CONTROL at will

Verify the KVM allows userspace to set all supported bits in the
IA32_FEATURE_CONTROL MSR irrespective of the current guest CPUID, and
that all unsupported bits are rejected.

Throw the testcase into vmx_msrs_test even though it's not technically a
VMX MSR; it's close enough, and the most frequently feature controlled by
the MSR is VMX.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220607232353.3375324-4-seanjc@google.com
---
 .../selftests/kvm/include/x86_64/processor.h  |  2 +
 .../selftests/kvm/x86_64/vmx_msrs_test.c      | 47 +++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 5d310abe6c3f4..ac4590abb44d7 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -102,6 +102,7 @@ struct kvm_x86_cpu_feature {
 #define	X86_FEATURE_XMM2		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
 #define	X86_FEATURE_FSGSBASE		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
 #define	X86_FEATURE_TSC_ADJUST		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define	X86_FEATURE_SGX			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
 #define	X86_FEATURE_HLE			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
 #define	X86_FEATURE_SMEP	        KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
 #define	X86_FEATURE_INVPCID		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
@@ -115,6 +116,7 @@ struct kvm_x86_cpu_feature {
 #define	X86_FEATURE_PKU			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
 #define	X86_FEATURE_LA57		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
 #define	X86_FEATURE_RDPID		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define	X86_FEATURE_SGX_LC		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
 #define	X86_FEATURE_SHSTK		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
 #define	X86_FEATURE_IBT			KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
 #define	X86_FEATURE_AMX_TILE		KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
index 322d561b4260e..90720b6205f4e 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
@@ -67,6 +67,52 @@ static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
 	vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
 }
 
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+					    uint64_t msr_bit,
+					    struct kvm_x86_cpu_feature feature)
+{
+	uint64_t val;
+
+	vcpu_clear_cpuid_feature(vcpu, feature);
+
+	val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+	vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+	vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+	vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+	vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+	vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+	if (!kvm_cpu_has(feature))
+		return;
+
+	vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+	uint64_t supported_bits = FEAT_CTL_LOCKED |
+				  FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+				  FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+				  FEAT_CTL_SGX_LC_ENABLED |
+				  FEAT_CTL_SGX_ENABLED |
+				  FEAT_CTL_LMCE_ENABLED;
+	int bit, r;
+
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+	__ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+	for_each_clear_bit(bit, &supported_bits, 64) {
+		r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+		TEST_ASSERT(r == 0,
+			    "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+	}
+}
+
 int main(void)
 {
 	struct kvm_vcpu *vcpu;
@@ -79,6 +125,7 @@ int main(void)
 	vm = vm_create_with_one_vcpu(&vcpu, NULL);
 
 	vmx_save_restore_msrs_test(vcpu);
+	ia32_feature_control_msr_test(vcpu);
 
 	kvm_vm_free(vm);
 }
-- 
GitLab


From 3ebcbd2244f5a69e06e5f655bfbd8127c08201c7 Mon Sep 17 00:00:00 2001
From: Anton Romanov <romanton@google.com>
Date: Wed, 8 Jun 2022 18:35:26 +0000
Subject: [PATCH 3147/4122] KVM: x86: Use current rather than snapshotted TSC
 frequency if it is constant

Don't snapshot tsc_khz into per-cpu cpu_tsc_khz if the host TSC is
constant, in which case the actual TSC frequency will never change and thus
capturing TSC during initialization is unnecessary, KVM can simply use
tsc_khz.  This value is snapshotted from
kvm_timer_init->kvmclock_cpu_online->tsc_khz_changed(NULL)

On CPUs with constant TSC, but not a hardware-specified TSC frequency,
snapshotting cpu_tsc_khz and using that to set a VM's target TSC frequency
can lead to VM to think its TSC frequency is not what it actually is if
refining the TSC completes after KVM snapshots tsc_khz.  The actual
frequency never changes, only the kernel's calculation of what that
frequency is changes.

Ideally, KVM would not be able to race with TSC refinement, or would have
a hook into tsc_refine_calibration_work() to get an alert when refinement
is complete.  Avoiding the race altogether isn't practical as refinement
takes a relative eternity; it's deliberately put on a work queue outside of
the normal boot sequence to avoid unnecessarily delaying boot.

Adding a hook is doable, but somewhat gross due to KVM's ability to be
built as a module.  And if the TSC is constant, which is likely the case
for every VMX/SVM-capable CPU produced in the last decade, the race can be
hit if and only if userspace is able to create a VM before TSC refinement
completes; refinement is slow, but not that slow.

For now, punt on a proper fix, as not taking a snapshot can help some uses
cases and not taking a snapshot is arguably correct irrespective of the
race with refinement.

Signed-off-by: Anton Romanov <romanton@google.com>
Reviewed-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20220608183525.1143682-1-romanton@google.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c | 44 +++++++++++++++++++++++++++++++++-----------
 1 file changed, 33 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index ef12747ecb63d..152ea4993b76f 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2974,6 +2974,22 @@ static void kvm_update_masterclock(struct kvm *kvm)
 	kvm_end_pvclock_update(kvm);
 }
 
+/*
+ * Use the kernel's tsc_khz directly if the TSC is constant, otherwise use KVM's
+ * per-CPU value (which may be zero if a CPU is going offline).  Note, tsc_khz
+ * can change during boot even if the TSC is constant, as it's possible for KVM
+ * to be loaded before TSC calibration completes.  Ideally, KVM would get a
+ * notification when calibration completes, but practically speaking calibration
+ * will complete before userspace is alive enough to create VMs.
+ */
+static unsigned long get_cpu_tsc_khz(void)
+{
+	if (static_cpu_has(X86_FEATURE_CONSTANT_TSC))
+		return tsc_khz;
+	else
+		return __this_cpu_read(cpu_tsc_khz);
+}
+
 /* Called within read_seqcount_begin/retry for kvm->pvclock_sc.  */
 static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 {
@@ -2984,7 +3000,8 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 	get_cpu();
 
 	data->flags = 0;
-	if (ka->use_master_clock && __this_cpu_read(cpu_tsc_khz)) {
+	if (ka->use_master_clock &&
+	    (static_cpu_has(X86_FEATURE_CONSTANT_TSC) || __this_cpu_read(cpu_tsc_khz))) {
 #ifdef CONFIG_X86_64
 		struct timespec64 ts;
 
@@ -2998,7 +3015,7 @@ static void __get_kvmclock(struct kvm *kvm, struct kvm_clock_data *data)
 		data->flags |= KVM_CLOCK_TSC_STABLE;
 		hv_clock.tsc_timestamp = ka->master_cycle_now;
 		hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-		kvm_get_time_scale(NSEC_PER_SEC, __this_cpu_read(cpu_tsc_khz) * 1000LL,
+		kvm_get_time_scale(NSEC_PER_SEC, get_cpu_tsc_khz() * 1000LL,
 				   &hv_clock.tsc_shift,
 				   &hv_clock.tsc_to_system_mul);
 		data->clock = __pvclock_read_cycles(&hv_clock, data->host_tsc);
@@ -3108,7 +3125,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 
 	/* Keep irq disabled to prevent changes to the clock */
 	local_irq_save(flags);
-	tgt_tsc_khz = __this_cpu_read(cpu_tsc_khz);
+	tgt_tsc_khz = get_cpu_tsc_khz();
 	if (unlikely(tgt_tsc_khz == 0)) {
 		local_irq_restore(flags);
 		kvm_make_request(KVM_REQ_CLOCK_UPDATE, v);
@@ -9038,9 +9055,11 @@ static void tsc_khz_changed(void *data)
 	struct cpufreq_freqs *freq = data;
 	unsigned long khz = 0;
 
+	WARN_ON_ONCE(boot_cpu_has(X86_FEATURE_CONSTANT_TSC));
+
 	if (data)
 		khz = freq->new;
-	else if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+	else
 		khz = cpufreq_quick_get(raw_smp_processor_id());
 	if (!khz)
 		khz = tsc_khz;
@@ -9061,8 +9080,10 @@ static void kvm_hyperv_tsc_notifier(void)
 	hyperv_stop_tsc_emulation();
 
 	/* TSC frequency always matches when on Hyper-V */
-	for_each_present_cpu(cpu)
-		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
+		for_each_present_cpu(cpu)
+			per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
+	}
 	kvm_caps.max_guest_tsc_khz = tsc_khz;
 
 	list_for_each_entry(kvm, &vm_list, vm_list) {
@@ -9199,10 +9220,10 @@ static void kvm_timer_init(void)
 		}
 		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
 					  CPUFREQ_TRANSITION_NOTIFIER);
-	}
 
-	cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
-			  kvmclock_cpu_online, kvmclock_cpu_down_prep);
+		cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
+				  kvmclock_cpu_online, kvmclock_cpu_down_prep);
+	}
 }
 
 #ifdef CONFIG_X86_64
@@ -9362,10 +9383,11 @@ void kvm_arch_exit(void)
 #endif
 	kvm_lapic_exit();
 
-	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
 		cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block,
 					    CPUFREQ_TRANSITION_NOTIFIER);
-	cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
+		cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
+	}
 #ifdef CONFIG_X86_64
 	pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
 	irq_work_sync(&pvclock_irq_work);
-- 
GitLab


From 10aa7cd398a9ead7464a7f8b49d4e4c843806813 Mon Sep 17 00:00:00 2001
From: Yang Yang <yang.yang29@zte.com.cn>
Date: Wed, 30 Nov 2022 17:44:37 +0800
Subject: [PATCH 3148/4122] IB/hfi1: Switch to netif_napi_add()

There is no need to use netif_napi_add_weight() when the weight argument
is 64. See "net: drop the weight argument from netif_napi_add".

Signed-off-by: Yang Yang <yang.yang29@zte.com.cn>
Link: https://lore.kernel.org/r/202211301744378304494@zte.com.cn
Reviewed-by: xu xin <xu.xin16@zte.com.cn>
Reviewed-by: Zhang Yunkai <zhang.yunkai@zte.com.cn>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/hfi1/netdev_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/netdev_rx.c b/drivers/infiniband/hw/hfi1/netdev_rx.c
index 3dfa5aff25125..720d4c85c9c93 100644
--- a/drivers/infiniband/hw/hfi1/netdev_rx.c
+++ b/drivers/infiniband/hw/hfi1/netdev_rx.c
@@ -216,7 +216,7 @@ static int hfi1_netdev_rxq_init(struct hfi1_netdev_rx *rx)
 		 * right now.
 		 */
 		set_bit(NAPI_STATE_NO_BUSY_POLL, &rxq->napi.state);
-		netif_napi_add_weight(dev, &rxq->napi, hfi1_netdev_rx_napi, 64);
+		netif_napi_add(dev, &rxq->napi, hfi1_netdev_rx_napi);
 		rc = msix_netdev_request_rcd_irq(rxq->rcd);
 		if (rc)
 			goto bail_context_irq_failure;
-- 
GitLab


From 32975c491ee410598b33201344c123fcc81a7c33 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 23 Nov 2022 17:39:18 +0100
Subject: [PATCH 3149/4122] uapi: Add missing _UAPI prefix to
 <asm-generic/types.h> include guard

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 include/uapi/asm-generic/types.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/uapi/asm-generic/types.h b/include/uapi/asm-generic/types.h
index dfaa50d99d8f3..7ad4dd01b8bf0 100644
--- a/include/uapi/asm-generic/types.h
+++ b/include/uapi/asm-generic/types.h
@@ -1,9 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _ASM_GENERIC_TYPES_H
-#define _ASM_GENERIC_TYPES_H
+#ifndef _UAPI_ASM_GENERIC_TYPES_H
+#define _UAPI_ASM_GENERIC_TYPES_H
 /*
  * int-ll64 is used everywhere now.
  */
 #include <asm-generic/int-ll64.h>
 
-#endif /* _ASM_GENERIC_TYPES_H */
+#endif /* _UAPI_ASM_GENERIC_TYPES_H */
-- 
GitLab


From 38931d8989b5760b0bd17c9ec99e81986258e4cb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 22 Sep 2022 13:08:16 -0700
Subject: [PATCH 3150/4122] mm: Make ksize() a reporting-only function

With all "silently resizing" callers of ksize() refactored, remove the
logic in ksize() that would allow it to be used to effectively change
the size of an allocation (bypassing __alloc_size hints, etc). Users
wanting this feature need to either use kmalloc_size_roundup() before an
allocation, or use krealloc() directly.

For kfree_sensitive(), move the unpoisoning logic inline. Replace the
some of the partially open-coded ksize() in __do_krealloc with ksize()
now that it doesn't perform unpoisoning.

Adjust the KUnit tests to match the new ksize() behavior. Execution
tested with:

$ ./tools/testing/kunit/kunit.py run \
	--kconfig_add CONFIG_KASAN=y \
	--kconfig_add CONFIG_KASAN_GENERIC=y \
	--arch x86_64 kasan

Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Hyeonggon Yoo <42.hyeyoo@gmail.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: linux-mm@kvack.org
Cc: kasan-dev@googlegroups.com
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: David Rientjes <rientjes@google.com>
Enhanced-by: Andrey Konovalov <andreyknvl@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 mm/kasan/kasan_test.c | 19 +++++++++++++------
 mm/slab_common.c      | 26 ++++++++++----------------
 2 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index 0d59098f08761..73684642c42d8 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -783,23 +783,30 @@ static void kasan_global_oob_left(struct kunit *test)
 	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-/* Check that ksize() makes the whole object accessible. */
+/* Check that ksize() does NOT unpoison whole object. */
 static void ksize_unpoisons_memory(struct kunit *test)
 {
 	char *ptr;
-	size_t size = 123, real_size;
+	size_t size = 128 - KASAN_GRANULE_SIZE - 5;
+	size_t real_size;
 
 	ptr = kmalloc(size, GFP_KERNEL);
 	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
 	real_size = ksize(ptr);
+	KUNIT_EXPECT_GT(test, real_size, size);
 
 	OPTIMIZER_HIDE_VAR(ptr);
 
-	/* This access shouldn't trigger a KASAN report. */
-	ptr[size] = 'x';
+	/* These accesses shouldn't trigger a KASAN report. */
+	ptr[0] = 'x';
+	ptr[size - 1] = 'x';
 
-	/* This one must. */
-	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]);
+	/* These must trigger a KASAN report. */
+	if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+		KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]);
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size + 5]);
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size - 1]);
 
 	kfree(ptr);
 }
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 33b1886b06ebf..7e96abf1bd7d1 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1333,11 +1333,11 @@ __do_krealloc(const void *p, size_t new_size, gfp_t flags)
 	void *ret;
 	size_t ks;
 
-	/* Don't use instrumented ksize to allow precise KASAN poisoning. */
+	/* Check for double-free before calling ksize. */
 	if (likely(!ZERO_OR_NULL_PTR(p))) {
 		if (!kasan_check_byte(p))
 			return NULL;
-		ks = kfence_ksize(p) ?: __ksize(p);
+		ks = ksize(p);
 	} else
 		ks = 0;
 
@@ -1405,8 +1405,10 @@ void kfree_sensitive(const void *p)
 	void *mem = (void *)p;
 
 	ks = ksize(mem);
-	if (ks)
+	if (ks) {
+		kasan_unpoison_range(mem, ks);
 		memzero_explicit(mem, ks);
+	}
 	kfree(mem);
 }
 EXPORT_SYMBOL(kfree_sensitive);
@@ -1427,13 +1429,11 @@ EXPORT_SYMBOL(kfree_sensitive);
  */
 size_t ksize(const void *objp)
 {
-	size_t size;
-
 	/*
-	 * We need to first check that the pointer to the object is valid, and
-	 * only then unpoison the memory. The report printed from ksize() is
-	 * more useful, then when it's printed later when the behaviour could
-	 * be undefined due to a potential use-after-free or double-free.
+	 * We need to first check that the pointer to the object is valid.
+	 * The KASAN report printed from ksize() is more useful, then when
+	 * it's printed later when the behaviour could be undefined due to
+	 * a potential use-after-free or double-free.
 	 *
 	 * We use kasan_check_byte(), which is supported for the hardware
 	 * tag-based KASAN mode, unlike kasan_check_read/write().
@@ -1447,13 +1447,7 @@ size_t ksize(const void *objp)
 	if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
 		return 0;
 
-	size = kfence_ksize(objp) ?: __ksize(objp);
-	/*
-	 * We assume that ksize callers could use whole allocated area,
-	 * so we need to unpoison this area.
-	 */
-	kasan_unpoison_range(objp, size);
-	return size;
+	return kfence_ksize(objp) ?: __ksize(objp);
 }
 EXPORT_SYMBOL(ksize);
 
-- 
GitLab


From 25226df4b9be7f6d5d722af5b75e86e76e5c3a80 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 21 Sep 2022 13:46:03 -0500
Subject: [PATCH 3151/4122] mm/pgtable: Fix multiple -Wstringop-overflow
 warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The actual size of the following arrays at run-time depends on
CONFIG_X86_PAE.

427         pmd_t *u_pmds[MAX_PREALLOCATED_USER_PMDS];
428         pmd_t *pmds[MAX_PREALLOCATED_PMDS];

If CONFIG_X86_PAE is not enabled, their final size will be zero (which
is technically not a legal storage size in C, but remains "valid" via
the GNU extension). In that case, the compiler complains about trying to
access objects of size zero when calling functions where these objects
are passed as arguments.

Fix this by sanity-checking the size of those arrays just before the
function calls. Also, the following warnings are fixed by these changes
when building with GCC 11+ and -Wstringop-overflow enabled:

arch/x86/mm/pgtable.c:437:13: warning: ‘preallocate_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]
arch/x86/mm/pgtable.c:440:13: warning: ‘preallocate_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]
arch/x86/mm/pgtable.c:462:9: warning: ‘free_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]
arch/x86/mm/pgtable.c:455:9: warning: ‘pgd_prepopulate_user_pmd’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]
arch/x86/mm/pgtable.c:464:9: warning: ‘free_pmds.constprop’ accessing 8 bytes in a region of size 0 [-Wstringop-overflow=]

This is one of the last cases in the ongoing effort to globally enable
-Wstringop-overflow.

The alternative to this is to make the originally suggested change:
make the pmds argument from an array pointer to a pointer pointer. That
situation is considered "legal" for C in the sense that it does not have
a way to reason about the storage. i.e.:

-static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
+static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t **pmds)

With the above change, there's no difference in binary output, and the
compiler warning is silenced.

However, with this patch, the compiler can actually figure out that it
isn't using the code at all, and it gets dropped:

   text    data     bss     dec     hex filename
   8218     718      32    8968    2308 arch/x86/mm/pgtable.o.before
   7765     694      32    8491    212b arch/x86/mm/pgtable.o.after

So this case (fixing a warning and reducing image size) is a clear win.

Additionally drops an old work-around for GCC in the same code.

Link: https://github.com/KSPP/linux/issues/203
Link: https://github.com/KSPP/linux/issues/181
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/Yytb67xvrnctxnEe@work
---
 arch/x86/mm/pgtable.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 8525f2876fb40..e4f499eb0f295 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -299,9 +299,6 @@ static void pgd_prepopulate_pmd(struct mm_struct *mm, pgd_t *pgd, pmd_t *pmds[])
 	pud_t *pud;
 	int i;
 
-	if (PREALLOCATED_PMDS == 0) /* Work around gcc-3.4.x bug */
-		return;
-
 	p4d = p4d_offset(pgd, 0);
 	pud = pud_offset(p4d, 0);
 
@@ -434,10 +431,12 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	mm->pgd = pgd;
 
-	if (preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
+	if (sizeof(pmds) != 0 &&
+			preallocate_pmds(mm, pmds, PREALLOCATED_PMDS) != 0)
 		goto out_free_pgd;
 
-	if (preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
+	if (sizeof(u_pmds) != 0 &&
+			preallocate_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS) != 0)
 		goto out_free_pmds;
 
 	if (paravirt_pgd_alloc(mm) != 0)
@@ -451,17 +450,22 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	spin_lock(&pgd_lock);
 
 	pgd_ctor(mm, pgd);
-	pgd_prepopulate_pmd(mm, pgd, pmds);
-	pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
+	if (sizeof(pmds) != 0)
+		pgd_prepopulate_pmd(mm, pgd, pmds);
+
+	if (sizeof(u_pmds) != 0)
+		pgd_prepopulate_user_pmd(mm, pgd, u_pmds);
 
 	spin_unlock(&pgd_lock);
 
 	return pgd;
 
 out_free_user_pmds:
-	free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
+	if (sizeof(u_pmds) != 0)
+		free_pmds(mm, u_pmds, PREALLOCATED_USER_PMDS);
 out_free_pmds:
-	free_pmds(mm, pmds, PREALLOCATED_PMDS);
+	if (sizeof(pmds) != 0)
+		free_pmds(mm, pmds, PREALLOCATED_PMDS);
 out_free_pgd:
 	_pgd_free(pgd);
 out:
-- 
GitLab


From 9360d035a579d95d1e76c471061b9065b18a0eb1 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 15:43:21 -0800
Subject: [PATCH 3152/4122] panic: Separate sysctl logic from CONFIG_SMP

In preparation for adding more sysctls directly in kernel/panic.c, split
CONFIG_SMP from the logic that adds sysctls.

Cc: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: tangmeng <tangmeng@uniontech.com>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221117234328.594699-1-keescook@chromium.org
---
 kernel/panic.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/kernel/panic.c b/kernel/panic.c
index da323209f5833..d843d036651e0 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -75,8 +75,9 @@ ATOMIC_NOTIFIER_HEAD(panic_notifier_list);
 
 EXPORT_SYMBOL(panic_notifier_list);
 
-#if defined(CONFIG_SMP) && defined(CONFIG_SYSCTL)
+#ifdef CONFIG_SYSCTL
 static struct ctl_table kern_panic_table[] = {
+#ifdef CONFIG_SMP
 	{
 		.procname       = "oops_all_cpu_backtrace",
 		.data           = &sysctl_oops_all_cpu_backtrace,
@@ -86,6 +87,7 @@ static struct ctl_table kern_panic_table[] = {
 		.extra1         = SYSCTL_ZERO,
 		.extra2         = SYSCTL_ONE,
 	},
+#endif
 	{ }
 };
 
-- 
GitLab


From d4ccd54d28d3c8598e2354acc13e28c060961dbb Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Thu, 17 Nov 2022 15:43:22 -0800
Subject: [PATCH 3153/4122] exit: Put an upper limit on how often we can oops

Many Linux systems are configured to not panic on oops; but allowing an
attacker to oops the system **really** often can make even bugs that look
completely unexploitable exploitable (like NULL dereferences and such) if
each crash elevates a refcount by one or a lock is taken in read mode, and
this causes a counter to eventually overflow.

The most interesting counters for this are 32 bits wide (like open-coded
refcounts that don't use refcount_t). (The ldsem reader count on 32-bit
platforms is just 16 bits, but probably nobody cares about 32-bit platforms
that much nowadays.)

So let's panic the system if the kernel is constantly oopsing.

The speed of oopsing 2^32 times probably depends on several factors, like
how long the stack trace is and which unwinder you're using; an empirically
important one is whether your console is showing a graphical environment or
a text console that oopses will be printed to.
In a quick single-threaded benchmark, it looks like oopsing in a vfork()
child with a very short stack trace only takes ~510 microseconds per run
when a graphical console is active; but switching to a text console that
oopses are printed to slows it down around 87x, to ~45 milliseconds per
run.
(Adding more threads makes this faster, but the actual oops printing
happens under &die_lock on x86, so you can maybe speed this up by a factor
of around 2 and then any further improvement gets eaten up by lock
contention.)

It looks like it would take around 8-12 days to overflow a 32-bit counter
with repeated oopsing on a multi-core X86 system running a graphical
environment; both me (in an X86 VM) and Seth (with a distro kernel on
normal hardware in a standard configuration) got numbers in that ballpark.

12 days aren't *that* short on a desktop system, and you'd likely need much
longer on a typical server system (assuming that people don't run graphical
desktop environments on their servers), and this is a *very* noisy and
violent approach to exploiting the kernel; and it also seems to take orders
of magnitude longer on some machines, probably because stuff like EFI
pstore will slow it down a ton if that's active.

Signed-off-by: Jann Horn <jannh@google.com>
Link: https://lore.kernel.org/r/20221107201317.324457-1-jannh@google.com
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221117234328.594699-2-keescook@chromium.org
---
 Documentation/admin-guide/sysctl/kernel.rst |  8 ++++
 kernel/exit.c                               | 42 +++++++++++++++++++++
 2 files changed, 50 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 98d1b198b2b4c..09f3fb2f85858 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -667,6 +667,14 @@ This is the default behavior.
 an oops event is detected.
 
 
+oops_limit
+==========
+
+Number of kernel oopses after which the kernel should panic when
+``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect
+as setting ``panic_on_oops=1``.
+
+
 osrelease, ostype & version
 ===========================
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 35e0a31a0315c..2ab3ead621189 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -72,6 +72,33 @@
 #include <asm/unistd.h>
 #include <asm/mmu_context.h>
 
+/*
+ * The default value should be high enough to not crash a system that randomly
+ * crashes its kernel from time to time, but low enough to at least not permit
+ * overflowing 32-bit refcounts or the ldsem writer count.
+ */
+static unsigned int oops_limit = 10000;
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table kern_exit_table[] = {
+	{
+		.procname       = "oops_limit",
+		.data           = &oops_limit,
+		.maxlen         = sizeof(oops_limit),
+		.mode           = 0644,
+		.proc_handler   = proc_douintvec,
+	},
+	{ }
+};
+
+static __init int kernel_exit_sysctls_init(void)
+{
+	register_sysctl_init("kernel", kern_exit_table);
+	return 0;
+}
+late_initcall(kernel_exit_sysctls_init);
+#endif
+
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
@@ -874,6 +901,8 @@ void __noreturn do_exit(long code)
 
 void __noreturn make_task_dead(int signr)
 {
+	static atomic_t oops_count = ATOMIC_INIT(0);
+
 	/*
 	 * Take the task off the cpu after something catastrophic has
 	 * happened.
@@ -897,6 +926,19 @@ void __noreturn make_task_dead(int signr)
 		preempt_count_set(PREEMPT_ENABLED);
 	}
 
+	/*
+	 * Every time the system oopses, if the oops happens while a reference
+	 * to an object was held, the reference leaks.
+	 * If the oops doesn't also leak memory, repeated oopsing can cause
+	 * reference counters to wrap around (if they're not using refcount_t).
+	 * This means that repeated oopsing can make unexploitable-looking bugs
+	 * exploitable through repeated oopsing.
+	 * To make sure this can't happen, place an upper bound on how often the
+	 * kernel may oops without panic().
+	 */
+	if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit))
+		panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit);
+
 	/*
 	 * We're taking recursive faults here in make_task_dead. Safest is to just
 	 * leave this task alone and wait for reboot.
-- 
GitLab


From 9db89b41117024f80b38b15954017fb293133364 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 15:43:23 -0800
Subject: [PATCH 3154/4122] exit: Expose "oops_count" to sysfs

Since Oops count is now tracked and is a fairly interesting signal, add
the entry /sys/kernel/oops_count to expose it to userspace.

Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Jann Horn <jannh@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221117234328.594699-3-keescook@chromium.org
---
 .../ABI/testing/sysfs-kernel-oops_count       |  6 +++++
 MAINTAINERS                                   |  1 +
 kernel/exit.c                                 | 22 +++++++++++++++++--
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-oops_count

diff --git a/Documentation/ABI/testing/sysfs-kernel-oops_count b/Documentation/ABI/testing/sysfs-kernel-oops_count
new file mode 100644
index 0000000000000..156cca9dbc960
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-oops_count
@@ -0,0 +1,6 @@
+What:		/sys/kernel/oops_count
+Date:		November 2022
+KernelVersion:	6.2.0
+Contact:	Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+		Shows how many times the system has Oopsed since last boot.
diff --git a/MAINTAINERS b/MAINTAINERS
index 1cd80c1137212..0a1e95a58e54c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11106,6 +11106,7 @@ M:	Kees Cook <keescook@chromium.org>
 L:	linux-hardening@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
+F:	Documentation/ABI/testing/sysfs-kernel-oops_count
 F:	include/linux/overflow.h
 F:	include/linux/randomize_kstack.h
 F:	mm/usercopy.c
diff --git a/kernel/exit.c b/kernel/exit.c
index 2ab3ead621189..dc1a32149f944 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -67,6 +67,7 @@
 #include <linux/io_uring.h>
 #include <linux/kprobes.h>
 #include <linux/rethook.h>
+#include <linux/sysfs.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -99,6 +100,25 @@ static __init int kernel_exit_sysctls_init(void)
 late_initcall(kernel_exit_sysctls_init);
 #endif
 
+static atomic_t oops_count = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+static ssize_t oops_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+			       char *page)
+{
+	return sysfs_emit(page, "%d\n", atomic_read(&oops_count));
+}
+
+static struct kobj_attribute oops_count_attr = __ATTR_RO(oops_count);
+
+static __init int kernel_exit_sysfs_init(void)
+{
+	sysfs_add_file_to_group(kernel_kobj, &oops_count_attr.attr, NULL);
+	return 0;
+}
+late_initcall(kernel_exit_sysfs_init);
+#endif
+
 static void __unhash_process(struct task_struct *p, bool group_dead)
 {
 	nr_threads--;
@@ -901,8 +921,6 @@ void __noreturn do_exit(long code)
 
 void __noreturn make_task_dead(int signr)
 {
-	static atomic_t oops_count = ATOMIC_INIT(0);
-
 	/*
 	 * Take the task off the cpu after something catastrophic has
 	 * happened.
-- 
GitLab


From 9d720a5a658f5135861773f26e927449bef93d61 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 30 Nov 2022 09:25:51 -0800
Subject: [PATCH 3155/4122] xfs: hoist refcount record merge predicates

Hoist these multiline conditionals into separate static inline helpers
to improve readability and set the stage for corruption fixes that will
be introduced in the next patch.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
---
 fs/xfs/libxfs/xfs_refcount.c | 129 ++++++++++++++++++++++++++++++-----
 1 file changed, 113 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 3f34bafe18dd1..4408893333a6c 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -815,11 +815,119 @@ out_error:
 /* Is this extent valid? */
 static inline bool
 xfs_refc_valid(
-	struct xfs_refcount_irec	*rc)
+	const struct xfs_refcount_irec	*rc)
 {
 	return rc->rc_startblock != NULLAGBLOCK;
 }
 
+static inline bool
+xfs_refc_want_merge_center(
+	const struct xfs_refcount_irec	*left,
+	const struct xfs_refcount_irec	*cleft,
+	const struct xfs_refcount_irec	*cright,
+	const struct xfs_refcount_irec	*right,
+	bool				cleft_is_cright,
+	enum xfs_refc_adjust_op		adjust,
+	unsigned long long		*ulenp)
+{
+	unsigned long long		ulen = left->rc_blockcount;
+
+	/*
+	 * To merge with a center record, both shoulder records must be
+	 * adjacent to the record we want to adjust.  This is only true if
+	 * find_left and find_right made all four records valid.
+	 */
+	if (!xfs_refc_valid(left)  || !xfs_refc_valid(right) ||
+	    !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
+		return false;
+
+	/* There must only be one record for the entire range. */
+	if (!cleft_is_cright)
+		return false;
+
+	/* The shoulder record refcounts must match the new refcount. */
+	if (left->rc_refcount != cleft->rc_refcount + adjust)
+		return false;
+	if (right->rc_refcount != cleft->rc_refcount + adjust)
+		return false;
+
+	/*
+	 * The new record cannot exceed the max length.  ulen is a ULL as the
+	 * individual record block counts can be up to (u32 - 1) in length
+	 * hence we need to catch u32 addition overflows here.
+	 */
+	ulen += cleft->rc_blockcount + right->rc_blockcount;
+	if (ulen >= MAXREFCEXTLEN)
+		return false;
+
+	*ulenp = ulen;
+	return true;
+}
+
+static inline bool
+xfs_refc_want_merge_left(
+	const struct xfs_refcount_irec	*left,
+	const struct xfs_refcount_irec	*cleft,
+	enum xfs_refc_adjust_op		adjust)
+{
+	unsigned long long		ulen = left->rc_blockcount;
+
+	/*
+	 * For a left merge, the left shoulder record must be adjacent to the
+	 * start of the range.  If this is true, find_left made left and cleft
+	 * contain valid contents.
+	 */
+	if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
+		return false;
+
+	/* Left shoulder record refcount must match the new refcount. */
+	if (left->rc_refcount != cleft->rc_refcount + adjust)
+		return false;
+
+	/*
+	 * The new record cannot exceed the max length.  ulen is a ULL as the
+	 * individual record block counts can be up to (u32 - 1) in length
+	 * hence we need to catch u32 addition overflows here.
+	 */
+	ulen += cleft->rc_blockcount;
+	if (ulen >= MAXREFCEXTLEN)
+		return false;
+
+	return true;
+}
+
+static inline bool
+xfs_refc_want_merge_right(
+	const struct xfs_refcount_irec	*cright,
+	const struct xfs_refcount_irec	*right,
+	enum xfs_refc_adjust_op		adjust)
+{
+	unsigned long long		ulen = right->rc_blockcount;
+
+	/*
+	 * For a right merge, the right shoulder record must be adjacent to the
+	 * end of the range.  If this is true, find_right made cright and right
+	 * contain valid contents.
+	 */
+	if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
+		return false;
+
+	/* Right shoulder record refcount must match the new refcount. */
+	if (right->rc_refcount != cright->rc_refcount + adjust)
+		return false;
+
+	/*
+	 * The new record cannot exceed the max length.  ulen is a ULL as the
+	 * individual record block counts can be up to (u32 - 1) in length
+	 * hence we need to catch u32 addition overflows here.
+	 */
+	ulen += cright->rc_blockcount;
+	if (ulen >= MAXREFCEXTLEN)
+		return false;
+
+	return true;
+}
+
 /*
  * Try to merge with any extents on the boundaries of the adjustment range.
  */
@@ -861,23 +969,15 @@ xfs_refcount_merge_extents(
 		 (cleft.rc_blockcount == cright.rc_blockcount);
 
 	/* Try to merge left, cleft, and right.  cleft must == cright. */
-	ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount +
-			right.rc_blockcount;
-	if (xfs_refc_valid(&left) && xfs_refc_valid(&right) &&
-	    xfs_refc_valid(&cleft) && xfs_refc_valid(&cright) && cequal &&
-	    left.rc_refcount == cleft.rc_refcount + adjust &&
-	    right.rc_refcount == cleft.rc_refcount + adjust &&
-	    ulen < MAXREFCEXTLEN) {
+	if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
+				adjust, &ulen)) {
 		*shape_changed = true;
 		return xfs_refcount_merge_center_extents(cur, &left, &cleft,
 				&right, ulen, aglen);
 	}
 
 	/* Try to merge left and cleft. */
-	ulen = (unsigned long long)left.rc_blockcount + cleft.rc_blockcount;
-	if (xfs_refc_valid(&left) && xfs_refc_valid(&cleft) &&
-	    left.rc_refcount == cleft.rc_refcount + adjust &&
-	    ulen < MAXREFCEXTLEN) {
+	if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
 		*shape_changed = true;
 		error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
 				agbno, aglen);
@@ -893,10 +993,7 @@ xfs_refcount_merge_extents(
 	}
 
 	/* Try to merge cright and right. */
-	ulen = (unsigned long long)right.rc_blockcount + cright.rc_blockcount;
-	if (xfs_refc_valid(&right) && xfs_refc_valid(&cright) &&
-	    right.rc_refcount == cright.rc_refcount + adjust &&
-	    ulen < MAXREFCEXTLEN) {
+	if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
 		*shape_changed = true;
 		return xfs_refcount_merge_right_extent(cur, &right, &cright,
 				aglen);
-- 
GitLab


From b25d1984aa884fc91a73a5a407b9ac976d441e9b Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Wed, 30 Nov 2022 09:25:51 -0800
Subject: [PATCH 3156/4122] xfs: estimate post-merge refcounts correctly

Upon enabling fsdax + reflink for XFS, xfs/179 began to report refcount
metadata corruptions after being run.  Specifically, xfs_repair noticed
single-block refcount records that could be combined but had not been.

The root cause of this is improper MAXREFCOUNT edge case handling in
xfs_refcount_merge_extents.  When we're trying to find candidates for a
refcount btree record merge, we compute the refcount attribute of the
merged record, but we fail to account for the fact that once a record
hits rc_refcount == MAXREFCOUNT, it is pinned that way forever.  Hence
the computed refcount is wrong, and we fail to merge the extents.

Fix this by adjusting the merge predicates to compute the adjusted
refcount correctly.

Fixes: 3172725814f9 ("xfs: adjust refcount of an extent of blocks in refcount btree")
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Xiao Yang <yangx.jy@fujitsu.com>
---
 fs/xfs/libxfs/xfs_refcount.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 4408893333a6c..6f7ed9288fe40 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -820,6 +820,17 @@ xfs_refc_valid(
 	return rc->rc_startblock != NULLAGBLOCK;
 }
 
+static inline xfs_nlink_t
+xfs_refc_merge_refcount(
+	const struct xfs_refcount_irec	*irec,
+	enum xfs_refc_adjust_op		adjust)
+{
+	/* Once a record hits MAXREFCOUNT, it is pinned there forever */
+	if (irec->rc_refcount == MAXREFCOUNT)
+		return MAXREFCOUNT;
+	return irec->rc_refcount + adjust;
+}
+
 static inline bool
 xfs_refc_want_merge_center(
 	const struct xfs_refcount_irec	*left,
@@ -831,6 +842,7 @@ xfs_refc_want_merge_center(
 	unsigned long long		*ulenp)
 {
 	unsigned long long		ulen = left->rc_blockcount;
+	xfs_nlink_t			new_refcount;
 
 	/*
 	 * To merge with a center record, both shoulder records must be
@@ -846,9 +858,10 @@ xfs_refc_want_merge_center(
 		return false;
 
 	/* The shoulder record refcounts must match the new refcount. */
-	if (left->rc_refcount != cleft->rc_refcount + adjust)
+	new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+	if (left->rc_refcount != new_refcount)
 		return false;
-	if (right->rc_refcount != cleft->rc_refcount + adjust)
+	if (right->rc_refcount != new_refcount)
 		return false;
 
 	/*
@@ -871,6 +884,7 @@ xfs_refc_want_merge_left(
 	enum xfs_refc_adjust_op		adjust)
 {
 	unsigned long long		ulen = left->rc_blockcount;
+	xfs_nlink_t			new_refcount;
 
 	/*
 	 * For a left merge, the left shoulder record must be adjacent to the
@@ -881,7 +895,8 @@ xfs_refc_want_merge_left(
 		return false;
 
 	/* Left shoulder record refcount must match the new refcount. */
-	if (left->rc_refcount != cleft->rc_refcount + adjust)
+	new_refcount = xfs_refc_merge_refcount(cleft, adjust);
+	if (left->rc_refcount != new_refcount)
 		return false;
 
 	/*
@@ -903,6 +918,7 @@ xfs_refc_want_merge_right(
 	enum xfs_refc_adjust_op		adjust)
 {
 	unsigned long long		ulen = right->rc_blockcount;
+	xfs_nlink_t			new_refcount;
 
 	/*
 	 * For a right merge, the right shoulder record must be adjacent to the
@@ -913,7 +929,8 @@ xfs_refc_want_merge_right(
 		return false;
 
 	/* Right shoulder record refcount must match the new refcount. */
-	if (right->rc_refcount != cright->rc_refcount + adjust)
+	new_refcount = xfs_refc_merge_refcount(cright, adjust);
+	if (right->rc_refcount != new_refcount)
 		return false;
 
 	/*
-- 
GitLab


From 8c25febf23963431686f04874b96321288504127 Mon Sep 17 00:00:00 2001
From: Guo Xuenan <guoxuenan@huawei.com>
Date: Thu, 1 Dec 2022 09:36:16 -0800
Subject: [PATCH 3157/4122] xfs: get rid of assert from xfs_btree_islastblock

xfs_btree_check_block contains debugging knobs. With XFS_DEBUG setting up,
turn on the debugging knob can trigger the assert of xfs_btree_islastblock,
test script as follows:

while true
do
    mount $disk $mountpoint
    fsstress -d $testdir -l 0 -n 10000 -p 4 >/dev/null
    echo 1 > /sys/fs/xfs/sda/errortag/btree_chk_sblk
    sleep 10
    umount $mountpoint
done

Kick off fsstress and only *then* turn on the debugging knob. If it
happens that the knob gets turned on after the cntbt lookup succeeds
but before the call to xfs_btree_islastblock, then we *can* end up in
the situation where a previously checked btree block suddenly starts
returning EFSCORRUPTED from xfs_btree_check_block. Kaboom.

Darrick give a very detailed explanation as follows:
Looking back at commit 27d9ee577dcce, I think the point of all this was
to make sure that the cursor has actually performed a lookup, and that
the btree block at whatever level we're asking about is ok.

If the caller hasn't ever done a lookup, the bc_levels array will be
empty, so cur->bc_levels[level].bp pointer will be NULL.  The call to
xfs_btree_get_block will crash anyway, so the "ASSERT(block);" part is
pointless.

If the caller did a lookup but the lookup failed due to block
corruption, the corresponding cur->bc_levels[level].bp pointer will also
be NULL, and we'll still crash.  The "ASSERT(xfs_btree_check_block);"
logic is also unnecessary.

If the cursor level points to an inode root, the block buffer will be
incore, so it had better always be consistent.

If the caller ignores a failed lookup after a successful one and calls
this function, the cursor state is garbage and the assert wouldn't have
tripped anyway. So get rid of the assert.

Fixes: 27d9ee577dcc ("xfs: actually check xfs_btree_check_block return in xfs_btree_islastblock")
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/libxfs/xfs_btree.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index eef27858a0135..29c4b4ccb909f 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -556,7 +556,6 @@ xfs_btree_islastblock(
 	struct xfs_buf		*bp;
 
 	block = xfs_btree_get_block(cur, level, &bp);
-	ASSERT(block && xfs_btree_check_block(cur, block, level, bp) == 0);
 
 	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
 		return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLFSBLOCK);
-- 
GitLab


From ddfdd530e43fcb3f7a0a69966e5f6c33497b4ae3 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 1 Dec 2022 09:36:16 -0800
Subject: [PATCH 3158/4122] xfs: invalidate xfs_bufs when allocating cow
 extents

While investigating test failures in xfs/17[1-3] in alwayscow mode, I
noticed through code inspection that xfs_bmap_alloc_userdata isn't
setting XFS_ALLOC_USERDATA when allocating extents for a file's CoW
fork.  COW staging extents should be flagged as USERDATA, since user
data are persisted to these blocks before being remapped into a file.

This mis-classification has a few impacts on the behavior of the system.
First, the filestreams allocator is supposed to keep allocating from a
chosen AG until it runs out of space in that AG.  However, it only does
that for USERDATA allocations, which means that COW allocations aren't
tied to the filestreams AG.  Fortunately, few people use filestreams, so
nobody's noticed.

A more serious problem is that xfs_alloc_ag_vextent_small looks for a
buffer to invalidate *if* the USERDATA flag is set and the AG is so full
that the allocation had to come from the AGFL because the cntbt is
empty.  The consequences of not invalidating the buffer are severe --
if the AIL incorrectly checkpoints a buffer that is now being used to
store user data, that action will clobber the user's written data.

Fix filestreams and yet another data corruption vector by flagging COW
allocations as USERDATA.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
---
 fs/xfs/libxfs/xfs_bmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 56b9b7db38bbd..0d56a8d862e80 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -4058,7 +4058,7 @@ xfs_bmap_alloc_userdata(
 	 * the busy list.
 	 */
 	bma->datatype = XFS_ALLOC_NOBUSY;
-	if (whichfork == XFS_DATA_FORK) {
+	if (whichfork == XFS_DATA_FORK || whichfork == XFS_COW_FORK) {
 		bma->datatype |= XFS_ALLOC_USERDATA;
 		if (bma->offset == 0)
 			bma->datatype |= XFS_ALLOC_INITIAL_USER_DATA;
-- 
GitLab


From 3d50b95b50db36de945bfc34d4d94b7e11ee8fd9 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Mon, 14 Nov 2022 11:56:06 +0000
Subject: [PATCH 3159/4122] i2c: smbus: add DDR support for SPD

On my x05 laptop I got:
Memory type 0x12 not supported yet, not instantiating SPD

Adding the 0x12 case lead to a successful instantiated SPD AT24 EEPROM.
i801_smbus 0000:00:1f.3: SMBus using polling
i2c i2c-6: 2/2 memory slots populated (from DMI)
at24 6-0050: 256 byte spd EEPROM, read-only
i2c i2c-6: Successfully instantiated SPD at 0x50
at24 6-0051: 256 byte spd EEPROM, read-only

And then, I decoded it successfully via decode-dimms.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Reviewed-by: Jean Delvare <jdelvare@suse.de>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/i2c-smbus.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
index 07c92c8495a3c..c85710ed95483 100644
--- a/drivers/i2c/i2c-smbus.c
+++ b/drivers/i2c/i2c-smbus.c
@@ -361,9 +361,15 @@ void i2c_register_spd(struct i2c_adapter *adap)
 		return;
 	}
 
+	/*
+	 * Memory types could be found at section 7.18.2 (Memory Device — Type), table 78
+	 * https://www.dmtf.org/sites/default/files/standards/documents/DSP0134_3.6.0.pdf
+	 */
 	switch (common_mem_type) {
+	case 0x12:	/* DDR */
 	case 0x13:	/* DDR2 */
 	case 0x18:	/* DDR3 */
+	case 0x1B:	/* LPDDR */
 	case 0x1C:	/* LPDDR2 */
 	case 0x1D:	/* LPDDR3 */
 		name = "spd";
-- 
GitLab


From 5bf71889ad9a4d39b7665c105a005c5a33d730ba Mon Sep 17 00:00:00 2001
From: Akhil R <akhilrajeev@nvidia.com>
Date: Thu, 17 Nov 2022 15:34:15 +0530
Subject: [PATCH 3160/4122] i2c: tegra: Set ACPI node as primary fwnode

Set ACPI node as the primary fwnode of I2C adapter to allow
enumeration of child devices from the ACPI table

Signed-off-by: Zubair Waheed <zwaheed@nvidia.com>
Signed-off-by: Akhil R <akhilrajeev@nvidia.com>
Reviewed-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-tegra.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/i2c/busses/i2c-tegra.c b/drivers/i2c/busses/i2c-tegra.c
index 954022c04cc42..69c9ae161bbec 100644
--- a/drivers/i2c/busses/i2c-tegra.c
+++ b/drivers/i2c/busses/i2c-tegra.c
@@ -1826,6 +1826,7 @@ static int tegra_i2c_probe(struct platform_device *pdev)
 	i2c_dev->adapter.class = I2C_CLASS_DEPRECATED;
 	i2c_dev->adapter.algo = &tegra_i2c_algo;
 	i2c_dev->adapter.nr = pdev->id;
+	ACPI_COMPANION_SET(&i2c_dev->adapter.dev, ACPI_COMPANION(&pdev->dev));
 
 	if (i2c_dev->hw->supports_bus_clear)
 		i2c_dev->adapter.bus_recovery_info = &tegra_i2c_recovery_info;
-- 
GitLab


From de917701da5d5ccb31825dca850ac49399e0f289 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 15 Nov 2022 12:30:18 +0000
Subject: [PATCH 3161/4122] dt-bindings: i2c: renesas,riic: Document RZ/Five
 SoC

The RIIC block on the RZ/Five SoC is identical to one found on the RZ/G2UL
SoC. "renesas,riic-r9a07g043" compatible string will be used on the
RZ/Five SoC so to make this clear, update the comment to include RZ/Five
SoC.

No driver changes are required as generic compatible string
"renesas,riic-rz" will be used as a fallback on RZ/Five SoC.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 Documentation/devicetree/bindings/i2c/renesas,riic.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml
index d3c0d5c427acb..2291a7cd619be 100644
--- a/Documentation/devicetree/bindings/i2c/renesas,riic.yaml
+++ b/Documentation/devicetree/bindings/i2c/renesas,riic.yaml
@@ -19,7 +19,7 @@ properties:
       - enum:
           - renesas,riic-r7s72100   # RZ/A1H
           - renesas,riic-r7s9210    # RZ/A2M
-          - renesas,riic-r9a07g043  # RZ/G2UL
+          - renesas,riic-r9a07g043  # RZ/G2UL and RZ/Five
           - renesas,riic-r9a07g044  # RZ/G2{L,LC}
           - renesas,riic-r9a07g054  # RZ/V2L
       - const: renesas,riic-rz      # RZ/A or RZ/G2L
-- 
GitLab


From a33004e844e4c60da86ecf8c249aab7179817fce Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 29 Nov 2022 17:52:59 +0000
Subject: [PATCH 3162/4122] KVM: selftests: Fix inverted "warning" in access
 tracking perf test

Warn if the number of idle pages is greater than or equal to 10% of the
total number of pages, not if the percentage of idle pages is less than
10%.  The original code asserted that less than 10% of pages were still
idle, but the check got inverted when the assert was converted to a
warning.

Opportunistically clean up the warning; selftests are 64-bit only, there
is no need to use "%PRIu64" instead of "%lu".

Fixes: 6336a810db5c ("KVM: selftests: replace assertion with warning in access_tracking_perf_test")
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221129175300.4052283-2-seanjc@google.com
---
 tools/testing/selftests/kvm/access_tracking_perf_test.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 02d3587cab0a3..d45ef319a68f3 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -185,10 +185,9 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
 	 * happens, much more pages are cached there and guest won't see the
 	 * "idle" bit cleared.
 	 */
-	if (still_idle < pages / 10)
-		printf("WARNING: vCPU%d: Too many pages still idle (%" PRIu64
-		       "out of %" PRIu64 "), this will affect performance results"
-		       ".\n",
+	if (still_idle >= pages / 10)
+		printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
+		       "this will affect performance results.\n",
 		       vcpu_idx, still_idle, pages);
 
 	close(page_idle_fd);
-- 
GitLab


From 8fcee0421386344b58fdc1cd5940219617037968 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 29 Nov 2022 17:53:00 +0000
Subject: [PATCH 3163/4122] KVM: selftests: Restore assert for non-nested VMs
 in access tracking test

Restore the assert (on x86-64) that <10% of pages are still idle when NOT
running as a nested VM in the access tracking test.  The original assert
was converted to a "warning" to avoid false failures when running the
test in a VM, but the non-nested case does not suffer from the same
"infinite TLB size" issue.

Using the HYPERVISOR flag isn't infallible as VMMs aren't strictly
required to enumerate the "feature" in CPUID, but practically speaking
anyone that is running KVM selftests in VMs is going to be using a VMM
and hypervisor that sets the HYPERVISOR flag.

Cc: David Matlack <dmatlack@google.com>
Reviewed-by: Emanuele Giuseppe Esposito <eesposit@redhat.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221129175300.4052283-3-seanjc@google.com
---
 .../selftests/kvm/access_tracking_perf_test.c   | 17 ++++++++++++-----
 .../selftests/kvm/include/x86_64/processor.h    |  1 +
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index d45ef319a68f3..9f9503e40ca56 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -46,6 +46,7 @@
 #include "test_util.h"
 #include "memstress.h"
 #include "guest_modes.h"
+#include "processor.h"
 
 /* Global variable used to synchronize all of the vCPU threads. */
 static int iteration;
@@ -180,15 +181,21 @@ static void mark_vcpu_memory_idle(struct kvm_vm *vm,
 	 * access tracking but low enough as to not make the test too brittle
 	 * over time and across architectures.
 	 *
-	 * Note that when run in nested virtualization, this check will trigger
-	 * much more frequently because TLB size is unlimited and since no flush
-	 * happens, much more pages are cached there and guest won't see the
-	 * "idle" bit cleared.
+	 * When running the guest as a nested VM, "warn" instead of asserting
+	 * as the TLB size is effectively unlimited and the KVM doesn't
+	 * explicitly flush the TLB when aging SPTEs.  As a result, more pages
+	 * are cached and the guest won't see the "idle" bit cleared.
 	 */
-	if (still_idle >= pages / 10)
+	if (still_idle >= pages / 10) {
+#ifdef __x86_64__
+		TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
+			    "vCPU%d: Too many pages still idle (%lu out of %lu)",
+			    vcpu_idx, still_idle, pages);
+#endif
 		printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
 		       "this will affect performance results.\n",
 		       vcpu_idx, still_idle, pages);
+	}
 
 	close(page_idle_fd);
 	close(pagemap_fd);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 5d310abe6c3f4..22852bd32d7b7 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -94,6 +94,7 @@ struct kvm_x86_cpu_feature {
 #define	X86_FEATURE_XSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
 #define	X86_FEATURE_OSXSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
 #define	X86_FEATURE_RDRAND		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define	X86_FEATURE_HYPERVISOR		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
 #define X86_FEATURE_PAE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
 #define	X86_FEATURE_MCE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
 #define	X86_FEATURE_APIC		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
-- 
GitLab


From 18eee7bfd18d6c9586dd224cf5b74258700fe815 Mon Sep 17 00:00:00 2001
From: Lei Wang <lei4.wang@intel.com>
Date: Mon, 28 Nov 2022 22:57:32 +0000
Subject: [PATCH 3164/4122] KVM: selftests: Move XFD CPUID checking out of
 __vm_xsave_require_permission()

Move the kvm_cpu_has() check on X86_FEATURE_XFD out of the helper to
enable off-by-default XSAVE-managed features and into the one test that
currenty requires XFD (XFeature Disable) support.   kvm_cpu_has() uses
kvm_get_supported_cpuid() and thus caches KVM_GET_SUPPORTED_CPUID, and so
using kvm_cpu_has() before ARCH_REQ_XCOMP_GUEST_PERM effectively results
in the test caching stale values, e.g. subsequent checks on AMX_TILE will
get false negatives.

Although off-by-default features are nonsensical without XFD, checking
for XFD virtualization prior to enabling such features isn't strictly
required.

Signed-off-by: Lei Wang <lei4.wang@intel.com>
Fixes: 7fbb653e01fd ("KVM: selftests: Check KVM's supported CPUID, not host CPUID, for XFD")
Link: https://lore.kernel.org/r/20221125023839.315207-1-lei4.wang@intel.com
[sean: add Fixes, reword changelog]
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221128225735.3291648-2-seanjc@google.com
---
 tools/testing/selftests/kvm/lib/x86_64/processor.c | 2 --
 tools/testing/selftests/kvm/x86_64/amx_test.c      | 1 +
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index d532c20c74fda..aac7b32a794b5 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -563,8 +563,6 @@ void __vm_xsave_require_permission(int bit, const char *name)
 		.addr = (unsigned long) &bitmask
 	};
 
-	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
-
 	kvm_fd = open_kvm_dev_path_or_exit();
 	rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
 	close(kvm_fd);
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 21de6ae42086d..1256c7faadd32 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -254,6 +254,7 @@ int main(int argc, char *argv[])
 	/* Create VM */
 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
 
+	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
-- 
GitLab


From 2ceade1d363c934633a1788d0f98fc2332062b92 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 28 Nov 2022 22:57:33 +0000
Subject: [PATCH 3165/4122] KVM: selftests: Move
 __vm_xsave_require_permission() below CPUID helpers

Move __vm_xsave_require_permission() below the CPUID helpers so that a
future change can reference the cached result of KVM_GET_SUPPORTED_CPUID
while keeping the definition of the variable close to its intended user,
kvm_get_supported_cpuid().

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221128225735.3291648-3-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c      | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index aac7b32a794b5..23067465c0355 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -552,38 +552,6 @@ static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
 	vcpu_sregs_set(vcpu, &sregs);
 }
 
-void __vm_xsave_require_permission(int bit, const char *name)
-{
-	int kvm_fd;
-	u64 bitmask;
-	long rc;
-	struct kvm_device_attr attr = {
-		.group = 0,
-		.attr = KVM_X86_XCOMP_GUEST_SUPP,
-		.addr = (unsigned long) &bitmask
-	};
-
-	kvm_fd = open_kvm_dev_path_or_exit();
-	rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
-	close(kvm_fd);
-
-	if (rc == -1 && (errno == ENXIO || errno == EINVAL))
-		__TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
-
-	TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
-
-	__TEST_REQUIRE(bitmask & (1ULL << bit),
-		       "Required XSAVE feature '%s' not supported", name);
-
-	TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
-
-	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
-	TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
-	TEST_ASSERT(bitmask & (1ULL << bit),
-		    "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
-		    bitmask);
-}
-
 void kvm_arch_vm_post_create(struct kvm_vm *vm)
 {
 	vm_create_irqchip(vm);
@@ -705,6 +673,38 @@ uint64_t kvm_get_feature_msr(uint64_t msr_index)
 	return buffer.entry.data;
 }
 
+void __vm_xsave_require_permission(int bit, const char *name)
+{
+	int kvm_fd;
+	u64 bitmask;
+	long rc;
+	struct kvm_device_attr attr = {
+		.group = 0,
+		.attr = KVM_X86_XCOMP_GUEST_SUPP,
+		.addr = (unsigned long) &bitmask
+	};
+
+	kvm_fd = open_kvm_dev_path_or_exit();
+	rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+	close(kvm_fd);
+
+	if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+		__TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
+
+	TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
+
+	__TEST_REQUIRE(bitmask & (1ULL << bit),
+		       "Required XSAVE feature '%s' not supported", name);
+
+	TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit));
+
+	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+	TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+	TEST_ASSERT(bitmask & (1ULL << bit),
+		    "prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure bitmask=0x%lx",
+		    bitmask);
+}
+
 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
 {
 	TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
-- 
GitLab


From cd5f3d210095347e9d40a9a1d464f5ee0bb5d7f2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 28 Nov 2022 22:57:34 +0000
Subject: [PATCH 3166/4122] KVM: selftests: Disallow "get supported CPUID"
 before REQ_XCOMP_GUEST_PERM

Disallow using kvm_get_supported_cpuid() and thus caching KVM's supported
CPUID info before enabling XSAVE-managed features that are off-by-default
and must be enabled by ARCH_REQ_XCOMP_GUEST_PERM.  Caching the supported
CPUID before all XSAVE features are enabled can result in false negatives
due to testing features that were cached before they were enabled.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221128225735.3291648-4-seanjc@google.com
---
 .../selftests/kvm/lib/x86_64/processor.c       | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 23067465c0355..1d3829e652e65 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -601,21 +601,24 @@ void vcpu_arch_free(struct kvm_vcpu *vcpu)
 		free(vcpu->cpuid);
 }
 
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
 {
-	static struct kvm_cpuid2 *cpuid;
 	int kvm_fd;
 
-	if (cpuid)
-		return cpuid;
+	if (kvm_supported_cpuid)
+		return kvm_supported_cpuid;
 
-	cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+	kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
 	kvm_fd = open_kvm_dev_path_or_exit();
 
-	kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
+	kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+		  (struct kvm_cpuid2 *)kvm_supported_cpuid);
 
 	close(kvm_fd);
-	return cpuid;
+	return kvm_supported_cpuid;
 }
 
 static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
@@ -684,6 +687,9 @@ void __vm_xsave_require_permission(int bit, const char *name)
 		.addr = (unsigned long) &bitmask
 	};
 
+	TEST_ASSERT(!kvm_supported_cpuid,
+		    "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
+
 	kvm_fd = open_kvm_dev_path_or_exit();
 	rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
 	close(kvm_fd);
-- 
GitLab


From 553d1652b8615b5ae3080bb1a561207aee87fa85 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Mon, 28 Nov 2022 22:57:35 +0000
Subject: [PATCH 3167/4122] KVM: selftests: Do kvm_cpu_has() checks before
 creating VM+vCPU

Move the AMX test's kvm_cpu_has() checks before creating the VM+vCPU,
there are no dependencies between the two operations.  Opportunistically
add a comment to call out that enabling off-by-default XSAVE-managed
features must be done before KVM_GET_SUPPORTED_CPUID is cached.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221128225735.3291648-5-seanjc@google.com
---
 tools/testing/selftests/kvm/x86_64/amx_test.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index 1256c7faadd32..bd72c6eb3b670 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -249,17 +249,21 @@ int main(int argc, char *argv[])
 	u32 amx_offset;
 	int stage, ret;
 
+	/*
+	 * Note, all off-by-default features must be enabled before anything
+	 * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+	 */
 	vm_xsave_require_permission(XSTATE_XTILE_DATA_BIT);
 
-	/* Create VM */
-	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
-
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
 
+	/* Create VM */
+	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
 	TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
 		    "KVM should enumerate max XSAVE size when XSAVE is supported");
 	xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
-- 
GitLab


From 0c3265235fc17e78773025ed0ddc7c0324b6ed89 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 22 Nov 2022 01:33:09 +0000
Subject: [PATCH 3168/4122] KVM: selftests: Define and use a custom static
 assert in lib headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define and use kvm_static_assert() in the common KVM selftests headers to
provide deterministic behavior, and to allow creating static asserts
without dummy messages.

The kernel's static_assert() makes the message param optional, and on the
surface, tools/include/linux/build_bug.h appears to follow suit.  However,
glibc may override static_assert() and redefine it as a direct alias of
_Static_assert(), which makes the message parameter mandatory.  This leads
to non-deterministic behavior as KVM selftests code that utilizes
static_assert() without a custom message may or not compile depending on
the order of includes.  E.g. recently added asserts in
x86_64/processor.h fail on some systems with errors like

  In file included from lib/memstress.c:11:0:
  include/x86_64/processor.h: In function ‘this_cpu_has_p’:
  include/x86_64/processor.h:193:34: error: expected ‘,’ before ‘)’ token
    static_assert(low_bit < high_bit);     \
                                    ^
due to _Static_assert() expecting a comma before a message.  The "message
optional" version of static_assert() uses macro magic to strip away the
comma when presented with empty an __VA_ARGS__

  #ifndef static_assert
  #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
  #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
  #endif // static_assert

and effectively generates "_Static_assert(expr, #expr)".

The incompatible version of static_assert() gets defined by this snippet
in /usr/include/assert.h:

  #if defined __USE_ISOC11 && !defined __cplusplus
  # undef static_assert
  # define static_assert _Static_assert
  #endif

which yields "_Static_assert(expr)" and thus fails as above.

KVM selftests don't actually care about using C11, but __USE_ISOC11 gets
defined because of _GNU_SOURCE, which many tests do #define.  _GNU_SOURCE
triggers a massive pile of defines in /usr/include/features.h, including
_ISOC11_SOURCE:

  /* If _GNU_SOURCE was defined by the user, turn on all the other features.  */
  #ifdef _GNU_SOURCE
  # undef  _ISOC95_SOURCE
  # define _ISOC95_SOURCE 1
  # undef  _ISOC99_SOURCE
  # define _ISOC99_SOURCE 1
  # undef  _ISOC11_SOURCE
  # define _ISOC11_SOURCE 1
  # undef  _POSIX_SOURCE
  # define _POSIX_SOURCE  1
  # undef  _POSIX_C_SOURCE
  # define _POSIX_C_SOURCE        200809L
  # undef  _XOPEN_SOURCE
  # define _XOPEN_SOURCE  700
  # undef  _XOPEN_SOURCE_EXTENDED
  # define _XOPEN_SOURCE_EXTENDED 1
  # undef  _LARGEFILE64_SOURCE
  # define _LARGEFILE64_SOURCE    1
  # undef  _DEFAULT_SOURCE
  # define _DEFAULT_SOURCE        1
  # undef  _ATFILE_SOURCE
  # define _ATFILE_SOURCE 1
  #endif

which further down in /usr/include/features.h leads to:

  /* This is to enable the ISO C11 extension.  */
  #if (defined _ISOC11_SOURCE \
       || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L))
  # define __USE_ISOC11   1
  #endif

To make matters worse, /usr/include/assert.h doesn't guard against
multiple inclusion by turning itself into a nop, but instead #undefs a
few macros and continues on.  As a result, it's all but impossible to
ensure the "message optional" version of static_assert() will actually be
used, e.g. explicitly including assert.h and #undef'ing static_assert()
doesn't work as a later inclusion of assert.h will again redefine its
version.

  #ifdef  _ASSERT_H

  # undef _ASSERT_H
  # undef assert
  # undef __ASSERT_VOID_CAST

  # ifdef __USE_GNU
  #  undef assert_perror
  # endif

  #endif /* assert.h      */

  #define _ASSERT_H       1
  #include <features.h>

Fixes: fcba483e8246 ("KVM: selftests: Sanity check input to ioctls() at build time")
Fixes: ee3795536664 ("KVM: selftests: Refactor X86_FEATURE_* framework to prep for X86_PROPERTY_*")
Fixes: 53a7dc0f215e ("KVM: selftests: Add X86_PROPERTY_* framework to retrieve CPUID values")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Link: https://lore.kernel.org/r/20221122013309.1872347-1-seanjc@google.com
---
 .../selftests/kvm/include/kvm_util_base.h     | 14 +++++++++++-
 .../selftests/kvm/include/x86_64/processor.h  | 22 +++++++++----------
 2 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index c7685c7038ff0..9fa0d340f2915 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -22,6 +22,18 @@
 
 #include "sparsebit.h"
 
+/*
+ * Provide a version of static_assert() that is guaranteed to have an optional
+ * message param.  If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h)
+ * #undefs and #defines static_assert() as a direct alias to _Static_assert(),
+ * i.e. effectively makes the message mandatory.  Many KVM selftests #define
+ * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE.  As
+ * a result, static_assert() behavior is non-deterministic and may or may not
+ * require a message depending on #include order.
+ */
+#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
+
 #define KVM_DEV_PATH "/dev/kvm"
 #define KVM_MAX_VCPUS 512
 
@@ -196,7 +208,7 @@ static inline bool kvm_has_cap(long cap)
 
 #define kvm_do_ioctl(fd, cmd, arg)						\
 ({										\
-	static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd), "");	\
+	kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd));	\
 	ioctl(fd, cmd, arg);							\
 })
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 22852bd32d7b7..411549ef4947f 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -72,11 +72,11 @@ struct kvm_x86_cpu_feature {
 		.bit = __bit,							\
 	};									\
 										\
-	static_assert((fn & 0xc0000000) == 0 ||					\
-		      (fn & 0xc0000000) == 0x40000000 ||			\
-		      (fn & 0xc0000000) == 0x80000000 ||			\
-		      (fn & 0xc0000000) == 0xc0000000);				\
-	static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));	\
+	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
+			  (fn & 0xc0000000) == 0x40000000 ||			\
+			  (fn & 0xc0000000) == 0x80000000 ||			\
+			  (fn & 0xc0000000) == 0xc0000000);			\
+	kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));	\
 	feature;								\
 })
 
@@ -191,12 +191,12 @@ struct kvm_x86_cpu_property {
 		.hi_bit = high_bit,						\
 	};									\
 										\
-	static_assert(low_bit < high_bit);					\
-	static_assert((fn & 0xc0000000) == 0 ||					\
-		      (fn & 0xc0000000) == 0x40000000 ||			\
-		      (fn & 0xc0000000) == 0x80000000 ||			\
-		      (fn & 0xc0000000) == 0xc0000000);				\
-	static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));	\
+	kvm_static_assert(low_bit < high_bit);					\
+	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
+			  (fn & 0xc0000000) == 0x40000000 ||			\
+			  (fn & 0xc0000000) == 0x80000000 ||			\
+			  (fn & 0xc0000000) == 0xc0000000);			\
+	kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));	\
 	property;								\
 })
 
-- 
GitLab


From efa2afc3969e166702fd2ae3cfb1a7a195ef3533 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:37:05 +0000
Subject: [PATCH 3169/4122] RDMA: Extend RDMA user ABI to support atomic write

1) Define new atomic write request/completion in userspace.
2) Define new atomic write capability in userspace.

Link: https://lore.kernel.org/r/1669905432-14-2-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/ib_user_verbs.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 43672cb1fd579..2378148155442 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -466,6 +466,7 @@ enum ib_uverbs_wc_opcode {
 	IB_UVERBS_WC_BIND_MW = 5,
 	IB_UVERBS_WC_LOCAL_INV = 6,
 	IB_UVERBS_WC_TSO = 7,
+	IB_UVERBS_WC_ATOMIC_WRITE = 9,
 };
 
 struct ib_uverbs_wc {
@@ -784,6 +785,7 @@ enum ib_uverbs_wr_opcode {
 	IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
 	IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
 	IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+	IB_UVERBS_WR_ATOMIC_WRITE = 15,
 	/* Review enum ib_wr_opcode before modifying this */
 };
 
@@ -1331,6 +1333,8 @@ enum ib_uverbs_device_cap_flags {
 	/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */
 	IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34,
 	IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36,
+	/* Atomic write attributes */
+	IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40,
 };
 
 enum ib_uverbs_raw_packet_caps {
-- 
GitLab


From 3ff81e827b8d5cea36ff374a11c200b4306f45d2 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:37:06 +0000
Subject: [PATCH 3170/4122] RDMA: Extend RDMA kernel ABI to support atomic
 write

1) Define new atomic write request/completion in kernel.
2) Define new atomic write capability in kernel.
3) Define new atomic write opcode for RC service in packet.

Link: https://lore.kernel.org/r/1669905432-14-3-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_pack.h  | 2 ++
 include/rdma/ib_verbs.h | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index a9162f25beaf5..f932d164af637 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -84,6 +84,7 @@ enum {
 	/* opcode 0x15 is reserved */
 	IB_OPCODE_SEND_LAST_WITH_INVALIDATE         = 0x16,
 	IB_OPCODE_SEND_ONLY_WITH_INVALIDATE         = 0x17,
+	IB_OPCODE_ATOMIC_WRITE                      = 0x1D,
 
 	/* real constants follow -- see comment about above IB_OPCODE()
 	   macro for more details */
@@ -112,6 +113,7 @@ enum {
 	IB_OPCODE(RC, FETCH_ADD),
 	IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
 	IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+	IB_OPCODE(RC, ATOMIC_WRITE),
 
 	/* UC */
 	IB_OPCODE(UC, SEND_FIRST),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 77dd9148815b9..df6bb26ba0be6 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -270,6 +270,7 @@ enum ib_device_cap_flags {
 	/* The device supports padding incoming writes to cacheline. */
 	IB_DEVICE_PCI_WRITE_END_PADDING =
 		IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
+	IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
 };
 
 enum ib_kernel_cap_flags {
@@ -982,6 +983,7 @@ enum ib_wc_opcode {
 	IB_WC_BIND_MW = IB_UVERBS_WC_BIND_MW,
 	IB_WC_LOCAL_INV = IB_UVERBS_WC_LOCAL_INV,
 	IB_WC_LSO = IB_UVERBS_WC_TSO,
+	IB_WC_ATOMIC_WRITE = IB_UVERBS_WC_ATOMIC_WRITE,
 	IB_WC_REG_MR,
 	IB_WC_MASKED_COMP_SWAP,
 	IB_WC_MASKED_FETCH_ADD,
@@ -1325,6 +1327,7 @@ enum ib_wr_opcode {
 		IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
 	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
 		IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+	IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
 
 	/* These are kernel only and can not be issued by userspace */
 	IB_WR_REG_MR = 0x20,
-- 
GitLab


From c2d939002934fa9d7b802f196b069963b46da194 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:37:07 +0000
Subject: [PATCH 3171/4122] RDMA/rxe: Extend rxe user ABI to support atomic
 write

Define an atomic_wr array to store 8-byte value.

Link: https://lore.kernel.org/r/1669905432-14-4-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/rdma_user_rxe.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index 73f679dfd2dfb..d20d1ecf046fd 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -146,6 +146,7 @@ struct rxe_dma_info {
 	__u32			reserved;
 	union {
 		__DECLARE_FLEX_ARRAY(__u8, inline_data);
+		__DECLARE_FLEX_ARRAY(__u8, atomic_wr);
 		__DECLARE_FLEX_ARRAY(struct rxe_sge, sge);
 	};
 };
-- 
GitLab


From 5c7af6c7938466aa2f3c52057f4dd28b4a1e9e42 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:37:08 +0000
Subject: [PATCH 3172/4122] RDMA/rxe: Extend rxe packet format to support
 atomic write

Extend rxe_wr_opcode_info[] and rxe_opcode[] for new atomic write opcode.

Link: https://lore.kernel.org/r/1669905432-14-5-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_opcode.c | 18 ++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_opcode.h |  3 +++
 2 files changed, 21 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index d4ba4d506f176..fb196029048e5 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 			[IB_QPT_UC]	= WR_LOCAL_OP_MASK,
 		},
 	},
+	[IB_WR_ATOMIC_WRITE]                       = {
+		.name   = "IB_WR_ATOMIC_WRITE",
+		.mask   = {
+			[IB_QPT_RC]     = WR_ATOMIC_WRITE_MASK,
+		},
+	},
 };
 
 struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
@@ -378,6 +384,18 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
 					  RXE_IETH_BYTES,
 		}
 	},
+	[IB_OPCODE_RC_ATOMIC_WRITE]                        = {
+		.name   = "IB_OPCODE_RC_ATOMIC_WRITE",
+		.mask   = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
+			  RXE_ATOMIC_WRITE_MASK | RXE_START_MASK |
+			  RXE_END_MASK,
+		.length = RXE_BTH_BYTES + RXE_RETH_BYTES,
+		.offset = {
+			[RXE_BTH]       = 0,
+			[RXE_RETH]      = RXE_BTH_BYTES,
+			[RXE_PAYLOAD]   = RXE_BTH_BYTES + RXE_RETH_BYTES,
+		}
+	},
 
 	/* UC */
 	[IB_OPCODE_UC_SEND_FIRST]			= {
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index 8f9aaaf260f29..a470e9b0b8842 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -20,6 +20,7 @@ enum rxe_wr_mask {
 	WR_READ_MASK			= BIT(3),
 	WR_WRITE_MASK			= BIT(4),
 	WR_LOCAL_OP_MASK		= BIT(5),
+	WR_ATOMIC_WRITE_MASK		= BIT(7),
 
 	WR_READ_OR_WRITE_MASK		= WR_READ_MASK | WR_WRITE_MASK,
 	WR_WRITE_OR_SEND_MASK		= WR_WRITE_MASK | WR_SEND_MASK,
@@ -81,6 +82,8 @@ enum rxe_hdr_mask {
 
 	RXE_LOOPBACK_MASK	= BIT(NUM_HDR_TYPES + 12),
 
+	RXE_ATOMIC_WRITE_MASK   = BIT(NUM_HDR_TYPES + 14),
+
 	RXE_READ_OR_ATOMIC_MASK	= (RXE_READ_MASK | RXE_ATOMIC_MASK),
 	RXE_WRITE_OR_SEND_MASK	= (RXE_WRITE_MASK | RXE_SEND_MASK),
 	RXE_READ_OR_WRITE_MASK	= (RXE_READ_MASK | RXE_WRITE_MASK),
-- 
GitLab


From abb633cf28049e6a7c37c44f83a8584f7dbded7d Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:39:25 +0000
Subject: [PATCH 3173/4122] RDMA/rxe: Make requester support atomic write on RC
 service

Make requester process and send an atomic write request on RC service.

Link: https://lore.kernel.org/r/1669905568-62-1-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 4d45f508392fe..2713e90589225 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -258,6 +258,10 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
 		else
 			return fits ? IB_OPCODE_RC_SEND_ONLY_WITH_INVALIDATE :
 				IB_OPCODE_RC_SEND_FIRST;
+
+	case IB_WR_ATOMIC_WRITE:
+		return IB_OPCODE_RC_ATOMIC_WRITE;
+
 	case IB_WR_REG_MR:
 	case IB_WR_LOCAL_INV:
 		return opcode;
@@ -486,6 +490,11 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
 		}
 	}
 
+	if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+		memcpy(payload_addr(pkt), wqe->dma.atomic_wr, payload);
+		wqe->dma.resid -= payload;
+	}
+
 	return 0;
 }
 
@@ -709,13 +718,15 @@ int rxe_requester(void *arg)
 	}
 
 	mask = rxe_opcode[opcode].mask;
-	if (unlikely(mask & RXE_READ_OR_ATOMIC_MASK)) {
+	if (unlikely(mask & (RXE_READ_OR_ATOMIC_MASK |
+			RXE_ATOMIC_WRITE_MASK))) {
 		if (check_init_depth(qp, wqe))
 			goto exit;
 	}
 
 	mtu = get_mtu(qp);
-	payload = (mask & RXE_WRITE_OR_SEND_MASK) ? wqe->dma.resid : 0;
+	payload = (mask & (RXE_WRITE_OR_SEND_MASK | RXE_ATOMIC_WRITE_MASK)) ?
+			wqe->dma.resid : 0;
 	if (payload > mtu) {
 		if (qp_type(qp) == IB_QPT_UD) {
 			/* C10-93.1.1: If the total sum of all the buffer lengths specified for a
-- 
GitLab


From 034e285f8b99062a0cf29112e1232154a6a44aa5 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:39:26 +0000
Subject: [PATCH 3174/4122] RDMA/rxe: Make responder support atomic write on RC
 service

Make responder process an atomic write request and send a read response
on RC service.

Link: https://lore.kernel.org/r/1669905568-62-2-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 84 ++++++++++++++++++++++++++--
 1 file changed, 79 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 6761bcd1d4d8f..6ac544477f3f7 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -22,6 +22,7 @@ enum resp_states {
 	RESPST_EXECUTE,
 	RESPST_READ_REPLY,
 	RESPST_ATOMIC_REPLY,
+	RESPST_ATOMIC_WRITE_REPLY,
 	RESPST_COMPLETE,
 	RESPST_ACKNOWLEDGE,
 	RESPST_CLEANUP,
@@ -57,6 +58,7 @@ static char *resp_state_name[] = {
 	[RESPST_EXECUTE]			= "EXECUTE",
 	[RESPST_READ_REPLY]			= "READ_REPLY",
 	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
+	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
 	[RESPST_COMPLETE]			= "COMPLETE",
 	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
 	[RESPST_CLEANUP]			= "CLEANUP",
@@ -263,7 +265,7 @@ static enum resp_states check_op_valid(struct rxe_qp *qp,
 	case IB_QPT_RC:
 		if (((pkt->mask & RXE_READ_MASK) &&
 		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
-		    ((pkt->mask & RXE_WRITE_MASK) &&
+		    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
 		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
 		    ((pkt->mask & RXE_ATOMIC_MASK) &&
 		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
@@ -367,7 +369,7 @@ static enum resp_states check_resource(struct rxe_qp *qp,
 		}
 	}
 
-	if (pkt->mask & RXE_READ_OR_ATOMIC_MASK) {
+	if (pkt->mask & (RXE_READ_OR_ATOMIC_MASK | RXE_ATOMIC_WRITE_MASK)) {
 		/* it is the requesters job to not send
 		 * too many read/atomic ops, we just
 		 * recycle the responder resource queue
@@ -438,7 +440,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	enum resp_states state;
 	int access;
 
-	if (pkt->mask & RXE_READ_OR_WRITE_MASK) {
+	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 		if (pkt->mask & RXE_RETH_MASK) {
 			qp->resp.va = reth_va(pkt);
 			qp->resp.offset = 0;
@@ -504,7 +506,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 		goto err;
 	}
 
-	if (pkt->mask & RXE_WRITE_MASK)	 {
+	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 		if (resid > mtu) {
 			if (pktlen != mtu || bth_pad(pkt)) {
 				state = RESPST_ERR_LENGTH;
@@ -604,6 +606,7 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
 		res->state = rdatm_res_state_new;
 		break;
 	case RXE_ATOMIC_MASK:
+	case RXE_ATOMIC_WRITE_MASK:
 		res->first_psn = pkt->psn;
 		res->last_psn = pkt->psn;
 		res->cur_psn = pkt->psn;
@@ -673,6 +676,55 @@ out:
 	return ret;
 }
 
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+						struct rxe_pkt_info *pkt)
+{
+	u64 src, *dst;
+	struct resp_res *res = qp->resp.res;
+	struct rxe_mr *mr = qp->resp.mr;
+	int payload = payload_size(pkt);
+
+	if (!res) {
+		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+		qp->resp.res = res;
+	}
+
+	if (!res->replay) {
+#ifdef CONFIG_64BIT
+		if (mr->state != RXE_MR_STATE_VALID)
+			return RESPST_ERR_RKEY_VIOLATION;
+
+		memcpy(&src, payload_addr(pkt), payload);
+
+		dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+		/* check vaddr is 8 bytes aligned. */
+		if (!dst || (uintptr_t)dst & 7)
+			return RESPST_ERR_MISALIGNED_ATOMIC;
+
+		/* Do atomic write after all prior operations have completed */
+		smp_store_release(dst, src);
+
+		/* decrease resp.resid to zero */
+		qp->resp.resid -= sizeof(payload);
+
+		qp->resp.msn++;
+
+		/* next expected psn, read handles this separately */
+		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+		qp->resp.ack_psn = qp->resp.psn;
+
+		qp->resp.opcode = pkt->opcode;
+		qp->resp.status = IB_WC_SUCCESS;
+
+		return RESPST_ACKNOWLEDGE;
+#else
+		return RESPST_ERR_UNSUPPORTED_OPCODE;
+#endif /* CONFIG_64BIT */
+	}
+
+	return RESPST_ACKNOWLEDGE;
+}
+
 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
 					  struct rxe_pkt_info *ack,
 					  int opcode,
@@ -912,6 +964,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		return RESPST_READ_REPLY;
 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
 		return RESPST_ATOMIC_REPLY;
+	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
+		return RESPST_ATOMIC_WRITE_REPLY;
 	} else {
 		/* Unreachable */
 		WARN_ON_ONCE(1);
@@ -1085,6 +1139,19 @@ static int send_atomic_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
 	return ret;
 }
 
+static int send_read_response_ack(struct rxe_qp *qp, u8 syndrome, u32 psn)
+{
+	int ret = send_common_ack(qp, syndrome, psn,
+			IB_OPCODE_RC_RDMA_READ_RESPONSE_ONLY,
+			"RDMA READ response of length zero ACK");
+
+	/* have to clear this since it is used to trigger
+	 * long read replies
+	 */
+	qp->resp.res = NULL;
+	return ret;
+}
+
 static enum resp_states acknowledge(struct rxe_qp *qp,
 				    struct rxe_pkt_info *pkt)
 {
@@ -1095,6 +1162,8 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
 		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
 	else if (pkt->mask & RXE_ATOMIC_MASK)
 		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
+	else if (pkt->mask & RXE_ATOMIC_WRITE_MASK)
+		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
 	else if (bth_ack(pkt))
 		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
 
@@ -1206,7 +1275,9 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 			res->replay = 1;
 			res->cur_psn = pkt->psn;
 			qp->resp.res = res;
-			rc = RESPST_ATOMIC_REPLY;
+			rc = pkt->mask & RXE_ATOMIC_MASK ?
+					RESPST_ATOMIC_REPLY :
+					RESPST_ATOMIC_WRITE_REPLY;
 			goto out;
 		}
 
@@ -1343,6 +1414,9 @@ int rxe_responder(void *arg)
 		case RESPST_ATOMIC_REPLY:
 			state = atomic_reply(qp, pkt);
 			break;
+		case RESPST_ATOMIC_WRITE_REPLY:
+			state = atomic_write_reply(qp, pkt);
+			break;
 		case RESPST_ACKNOWLEDGE:
 			state = acknowledge(qp, pkt);
 			break;
-- 
GitLab


From 3aec427bb1499bd3325d2e251edb729a5a8643df Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:39:27 +0000
Subject: [PATCH 3175/4122] RDMA/rxe: Implement atomic write completion

Generate an atomic write completion when the atomic write request
has been finished.

Link: https://lore.kernel.org/r/1669905568-62-3-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 4dca4f8bbb5aa..1c525325e2716 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -104,6 +104,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
 	case IB_WR_LOCAL_INV:			return IB_WC_LOCAL_INV;
 	case IB_WR_REG_MR:			return IB_WC_REG_MR;
 	case IB_WR_BIND_MW:			return IB_WC_BIND_MW;
+	case IB_WR_ATOMIC_WRITE:		return IB_WC_ATOMIC_WRITE;
 
 	default:
 		return 0xff;
@@ -269,6 +270,9 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		if ((syn & AETH_TYPE_MASK) != AETH_ACK)
 			return COMPST_ERROR;
 
+		if (wqe->wr.opcode == IB_WR_ATOMIC_WRITE)
+			return COMPST_WRITE_SEND;
+
 		fallthrough;
 		/* (IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE doesn't have an AETH)
 		 */
-- 
GitLab


From 4cd9f1d320f905e7bc60f030566d15003745ba91 Mon Sep 17 00:00:00 2001
From: Xiao Yang <yangx.jy@fujitsu.com>
Date: Thu, 1 Dec 2022 14:39:28 +0000
Subject: [PATCH 3176/4122] RDMA/rxe: Enable atomic write capability for rxe
 device

The capability shows that rxe device supports atomic write operation.

Link: https://lore.kernel.org/r/1669905568-62-4-git-send-email-yangx.jy@fujitsu.com
Signed-off-by: Xiao Yang <yangx.jy@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_param.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index 86c7a8bf3cbbd..bbc88cd71d950 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -51,7 +51,12 @@ enum rxe_device_param {
 					| IB_DEVICE_SRQ_RESIZE
 					| IB_DEVICE_MEM_MGT_EXTENSIONS
 					| IB_DEVICE_MEM_WINDOW
+#ifdef CONFIG_64BIT
+					| IB_DEVICE_MEM_WINDOW_TYPE_2B
+					| IB_DEVICE_ATOMIC_WRITE,
+#else
 					| IB_DEVICE_MEM_WINDOW_TYPE_2B,
+#endif /* CONFIG_64BIT */
 	RXE_MAX_SGE			= 32,
 	RXE_MAX_WQE_SIZE		= sizeof(struct rxe_send_wqe) +
 					  sizeof(struct ib_sge) * RXE_MAX_SGE,
-- 
GitLab


From 9f61521c7a284e799050cd2adacc9a611bd2b491 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2022 13:11:13 +1000
Subject: [PATCH 3177/4122] powerpc/qspinlock: powerpc qspinlock implementation

Add a powerpc specific implementation of queued spinlocks. This is the
build framework with a very simple (non-queued) spinlock implementation
to begin with. Later changes add queueing, and other features and
optimisations one-at-a-time. It is done this way to more easily see how
the queued spinlocks are built, and to make performance and correctness
bisects more useful.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Drop paravirt.h & processor.h changes to fix 32-bit build]
[mpe: Fix 32-bit build of qspinlock.o & disallow GENERIC_LOCKBREAK per Nick]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/CONLLQB6DCJU.2ZPOS7T6S5GRR@bobo
---
 arch/powerpc/Kconfig                          |  3 +-
 arch/powerpc/include/asm/qspinlock.h          | 86 +++++++------------
 arch/powerpc/include/asm/qspinlock_paravirt.h |  7 --
 arch/powerpc/include/asm/qspinlock_types.h    | 13 +++
 arch/powerpc/include/asm/spinlock.h           |  2 +-
 arch/powerpc/include/asm/spinlock_types.h     |  2 +-
 arch/powerpc/lib/Makefile                     |  4 +-
 arch/powerpc/lib/qspinlock.c                  | 17 ++++
 8 files changed, 65 insertions(+), 69 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/qspinlock_paravirt.h
 create mode 100644 arch/powerpc/include/asm/qspinlock_types.h
 create mode 100644 arch/powerpc/lib/qspinlock.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2fc..7fbdf22ce9a97 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -96,7 +96,7 @@ config LOCKDEP_SUPPORT
 config GENERIC_LOCKBREAK
 	bool
 	default y
-	depends on SMP && PREEMPTION
+	depends on SMP && PREEMPTION && !PPC_QUEUED_SPINLOCKS
 
 config GENERIC_HWEIGHT
 	bool
@@ -154,7 +154,6 @@ config PPC
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
 	select ARCH_USE_MEMTEST
 	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
-	select ARCH_USE_QUEUED_SPINLOCKS	if PPC_QUEUED_SPINLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index b676c4fb90fd7..5e6257313557d 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -2,83 +2,55 @@
 #ifndef _ASM_POWERPC_QSPINLOCK_H
 #define _ASM_POWERPC_QSPINLOCK_H
 
-#include <asm-generic/qspinlock_types.h>
+#include <linux/atomic.h>
+#include <linux/compiler.h>
+#include <asm/qspinlock_types.h>
 #include <asm/paravirt.h>
 
-#define _Q_PENDING_LOOPS	(1 << 9) /* not tuned */
-
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-extern void __pv_queued_spin_unlock(struct qspinlock *lock);
-
-static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
-	if (!is_shared_processor())
-		native_queued_spin_lock_slowpath(lock, val);
-	else
-		__pv_queued_spin_lock_slowpath(lock, val);
+	return atomic_read(&lock->val);
 }
 
-#define queued_spin_unlock queued_spin_unlock
-static inline void queued_spin_unlock(struct qspinlock *lock)
+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-	if (!is_shared_processor())
-		smp_store_release(&lock->locked, 0);
-	else
-		__pv_queued_spin_unlock(lock);
+	return !atomic_read(&lock.val);
 }
 
-#else
-extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
-#endif
-
-static __always_inline void queued_spin_lock(struct qspinlock *lock)
+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 {
-	u32 val = 0;
-
-	if (likely(arch_atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
-		return;
-
-	queued_spin_lock_slowpath(lock, val);
+	return 0;
 }
-#define queued_spin_lock queued_spin_lock
 
-#ifdef CONFIG_PARAVIRT_SPINLOCKS
-#define SPIN_THRESHOLD (1<<15) /* not tuned */
-
-static __always_inline void pv_wait(u8 *ptr, u8 val)
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	if (*ptr != val)
-		return;
-	yield_to_any();
-	/*
-	 * We could pass in a CPU here if waiting in the queue and yield to
-	 * the previous CPU in the queue.
-	 */
+	return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0;
 }
 
-static __always_inline void pv_kick(int cpu)
+void queued_spin_lock_slowpath(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
 {
-	prod_cpu(cpu);
+	if (!queued_spin_trylock(lock))
+		queued_spin_lock_slowpath(lock);
 }
 
-extern void __pv_init_lock_hash(void);
-
-static inline void pv_spinlocks_init(void)
+static inline void queued_spin_unlock(struct qspinlock *lock)
 {
-	__pv_init_lock_hash();
+	atomic_set_release(&lock->val, 0);
 }
 
-#endif
-
-/*
- * Queued spinlocks rely heavily on smp_cond_load_relaxed() to busy-wait,
- * which was found to have performance problems if implemented with
- * the preferred spin_begin()/spin_end() SMT priority pattern. Use the
- * generic version instead.
- */
+#define arch_spin_is_locked(l)		queued_spin_is_locked(l)
+#define arch_spin_is_contended(l)	queued_spin_is_contended(l)
+#define arch_spin_value_unlocked(l)	queued_spin_value_unlocked(l)
+#define arch_spin_lock(l)		queued_spin_lock(l)
+#define arch_spin_trylock(l)		queued_spin_trylock(l)
+#define arch_spin_unlock(l)		queued_spin_unlock(l)
 
-#include <asm-generic/qspinlock.h>
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void);
+#else
+static inline void pv_spinlocks_init(void) { }
+#endif
 
 #endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
deleted file mode 100644
index 6b60e7736a47c..0000000000000
--- a/arch/powerpc/include/asm/qspinlock_paravirt.h
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
-
-EXPORT_SYMBOL(__pv_queued_spin_unlock);
-
-#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
new file mode 100644
index 0000000000000..59606bc0c774e
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_TYPES_H
+#define _ASM_POWERPC_QSPINLOCK_TYPES_H
+
+#include <linux/types.h>
+
+typedef struct qspinlock {
+	atomic_t val;
+} arch_spinlock_t;
+
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
+
+#endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index bd75872a6334a..7dafca8e3f02e 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -13,7 +13,7 @@
 /* See include/linux/spinlock.h */
 #define smp_mb__after_spinlock()	smp_mb()
 
-#ifndef CONFIG_PARAVIRT_SPINLOCKS
+#ifndef CONFIG_PPC_QUEUED_SPINLOCKS
 static inline void pv_spinlocks_init(void) { }
 #endif
 
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index d5f8a74ed2e8c..40b01446cf755 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -7,7 +7,7 @@
 #endif
 
 #ifdef CONFIG_PPC_QUEUED_SPINLOCKS
-#include <asm-generic/qspinlock_types.h>
+#include <asm/qspinlock_types.h>
 #include <asm-generic/qrwlock_types.h>
 #else
 #include <asm/simple_spinlock_types.h>
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 8560c912186df..4de71cbf6e8ee 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -52,7 +52,9 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
 	   memcpy_64.o copy_mc_64.o
 
-ifndef CONFIG_PPC_QUEUED_SPINLOCKS
+ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+obj-$(CONFIG_SMP)	+= qspinlock.o
+else
 obj64-$(CONFIG_SMP)	+= locks.o
 endif
 
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
new file mode 100644
index 0000000000000..1c669b5b4607e
--- /dev/null
+++ b/arch/powerpc/lib/qspinlock.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/export.h>
+#include <linux/processor.h>
+#include <asm/qspinlock.h>
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+	while (!queued_spin_trylock(lock))
+		cpu_relax();
+}
+EXPORT_SYMBOL(queued_spin_lock_slowpath);
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+void pv_spinlocks_init(void)
+{
+}
+#endif
-- 
GitLab


From 84990b169557428c318df87b7836cd15f65b62dc Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:16 +1000
Subject: [PATCH 3178/4122] powerpc/qspinlock: add mcs queueing for contended
 waiters

This forms the basis of the qspinlock slow path.

Like generic qspinlocks and unlike the vanilla MCS algorithm, the lock
owner does not participate in the queue, only waiters. The first waiter
spins on the lock word, then when the lock is released it takes
ownership and unqueues the next waiter. This is how qspinlocks can be
implemented with the spinlock API -- lock owners don't need a node, only
waiters do.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-2-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h       |  10 +-
 arch/powerpc/include/asm/qspinlock_types.h |  23 +++
 arch/powerpc/lib/qspinlock.c               | 187 ++++++++++++++++++++-
 3 files changed, 214 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 5e6257313557d..6946dba5d0878 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -19,12 +19,12 @@ static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 
 static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 {
-	return 0;
+	return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK);
 }
 
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	return atomic_cmpxchg_acquire(&lock->val, 0, 1) == 0;
+	return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0;
 }
 
 void queued_spin_lock_slowpath(struct qspinlock *lock);
@@ -37,7 +37,11 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
-	atomic_set_release(&lock->val, 0);
+	for (;;) {
+		int val = atomic_read(&lock->val);
+		if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
+			return;
+	}
 }
 
 #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 59606bc0c774e..20a36dfb14e26 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -10,4 +10,27 @@ typedef struct qspinlock {
 
 #define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
 
+/*
+ * Bitfields in the lock word:
+ *
+ *     0: locked bit
+ *  1-16: unused bits
+ * 17-31: tail cpu (+1)
+ */
+#define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
+				      << _Q_ ## type ## _OFFSET)
+/* 0x00000001 */
+#define _Q_LOCKED_OFFSET	0
+#define _Q_LOCKED_BITS		1
+#define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
+
+/* 0xfffe0000 */
+#define _Q_TAIL_CPU_OFFSET	17
+#define _Q_TAIL_CPU_BITS	15
+#define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
+
+#if CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
 #endif /* _ASM_POWERPC_QSPINLOCK_TYPES_H */
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 1c669b5b4607e..86504628501e4 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -1,12 +1,193 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/processor.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
 #include <asm/qspinlock.h>
 
-void queued_spin_lock_slowpath(struct qspinlock *lock)
+#define MAX_NODES	4
+
+struct qnode {
+	struct qnode	*next;
+	struct qspinlock *lock;
+	u8		locked; /* 1 if lock acquired */
+};
+
+struct qnodes {
+	int		count;
+	struct qnode nodes[MAX_NODES];
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+
+static inline int encode_tail_cpu(int cpu)
+{
+	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+}
+
+static inline int decode_tail_cpu(int val)
+{
+	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
+}
+
+/*
+ * Try to acquire the lock if it was not already locked. If the tail matches
+ * mytail then clear it, otherwise leave it unchnaged. Return previous value.
+ *
+ * This is used by the head of the queue to acquire the lock and clean up
+ * its tail if it was the last one queued.
+ */
+static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail)
+{
+	int val = atomic_read(&lock->val);
+
+	BUG_ON(val & _Q_LOCKED_VAL);
+
+	/* If we're the last queued, must clean up the tail. */
+	if ((val & _Q_TAIL_CPU_MASK) == tail) {
+		if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val)
+			return val;
+		/* Another waiter must have enqueued */
+		val = atomic_read(&lock->val);
+		BUG_ON(val & _Q_LOCKED_VAL);
+	}
+
+	/* We must be the owner, just set the lock bit and acquire */
+	atomic_or(_Q_LOCKED_VAL, &lock->val);
+	__atomic_acquire_fence();
+
+	return val;
+}
+
+/*
+ * Publish our tail, replacing previous tail. Return previous value.
+ *
+ * This provides a release barrier for publishing node, this pairs with the
+ * acquire barrier in get_tail_qnode() when the next CPU finds this tail
+ * value.
+ */
+static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail)
+{
+	for (;;) {
+		int val = atomic_read(&lock->val);
+		int newval = (val & ~_Q_TAIL_CPU_MASK) | tail;
+		int old;
+
+		old = atomic_cmpxchg_release(&lock->val, val, newval);
+		if (old == val)
+			return old;
+	}
+}
+
+static struct qnode *get_tail_qnode(struct qspinlock *lock, int val)
+{
+	int cpu = decode_tail_cpu(val);
+	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+	int idx;
+
+	/*
+	 * After publishing the new tail and finding a previous tail in the
+	 * previous val (which is the control dependency), this barrier
+	 * orders the release barrier in publish_tail_cpu performed by the
+	 * last CPU, with subsequently looking at its qnode structures
+	 * after the barrier.
+	 */
+	smp_acquire__after_ctrl_dep();
+
+	for (idx = 0; idx < MAX_NODES; idx++) {
+		struct qnode *qnode = &qnodesp->nodes[idx];
+		if (qnode->lock == lock)
+			return qnode;
+	}
+
+	BUG();
+}
+
+static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 {
-	while (!queued_spin_trylock(lock))
+	struct qnodes *qnodesp;
+	struct qnode *next, *node;
+	int val, old, tail;
+	int idx;
+
+	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
+
+	qnodesp = this_cpu_ptr(&qnodes);
+	if (unlikely(qnodesp->count >= MAX_NODES)) {
+		while (!queued_spin_trylock(lock))
+			cpu_relax();
+		return;
+	}
+
+	idx = qnodesp->count++;
+	/*
+	 * Ensure that we increment the head node->count before initialising
+	 * the actual node. If the compiler is kind enough to reorder these
+	 * stores, then an IRQ could overwrite our assignments.
+	 */
+	barrier();
+	node = &qnodesp->nodes[idx];
+	node->next = NULL;
+	node->lock = lock;
+	node->locked = 0;
+
+	tail = encode_tail_cpu(smp_processor_id());
+
+	old = publish_tail_cpu(lock, tail);
+
+	/*
+	 * If there was a previous node; link it and wait until reaching the
+	 * head of the waitqueue.
+	 */
+	if (old & _Q_TAIL_CPU_MASK) {
+		struct qnode *prev = get_tail_qnode(lock, old);
+
+		/* Link @node into the waitqueue. */
+		WRITE_ONCE(prev->next, node);
+
+		/* Wait for mcs node lock to be released */
+		while (!node->locked)
+			cpu_relax();
+
+		smp_rmb(); /* acquire barrier for the mcs lock */
+	}
+
+	/* We're at the head of the waitqueue, wait for the lock. */
+	for (;;) {
+		val = atomic_read(&lock->val);
+		if (!(val & _Q_LOCKED_VAL))
+			break;
+
+		cpu_relax();
+	}
+
+	/* If we're the last queued, must clean up the tail. */
+	old = set_locked_clean_tail(lock, tail);
+	if ((old & _Q_TAIL_CPU_MASK) == tail)
+		goto release; /* Another waiter must have enqueued */
+
+	/* There is a next, must wait for node->next != NULL (MCS protocol) */
+	while (!(next = READ_ONCE(node->next)))
 		cpu_relax();
+
+	/*
+	 * Unlock the next mcs waiter node. Release barrier is not required
+	 * here because the acquirer is only accessing the lock word, and
+	 * the acquire barrier we took the lock with orders that update vs
+	 * this store to locked. The corresponding barrier is the smp_rmb()
+	 * acquire barrier for mcs lock, above.
+	 */
+	WRITE_ONCE(next->locked, 1);
+
+release:
+	qnodesp->count--; /* release the node */
+}
+
+void queued_spin_lock_slowpath(struct qspinlock *lock)
+{
+	queued_spin_lock_mcs_queue(lock);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
-- 
GitLab


From 4c93c2e4b9e8988511c06b9c042f23d4b8f593ad Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:17 +1000
Subject: [PATCH 3179/4122] powerpc/qspinlock: use a half-word store to unlock
 to avoid larx/stcx.

The first 16 bits of the lock are only modified by the owner, and other
modifications always use atomic operations on the entire 32 bits, so
unlocks can use plain stores on the 16 bits. This is the same kind of
optimisation done by core qspinlock code.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-3-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h       |  6 +-----
 arch/powerpc/include/asm/qspinlock_types.h | 19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 6946dba5d0878..713f6629f6fbc 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -37,11 +37,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
-	for (;;) {
-		int val = atomic_read(&lock->val);
-		if (atomic_cmpxchg_release(&lock->val, val, val & ~_Q_LOCKED_VAL) == val)
-			return;
-	}
+	smp_store_release(&lock->locked, 0);
 }
 
 #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 20a36dfb14e26..fe87181c59e51 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -3,12 +3,27 @@
 #define _ASM_POWERPC_QSPINLOCK_TYPES_H
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 typedef struct qspinlock {
-	atomic_t val;
+	union {
+		atomic_t val;
+
+#ifdef __LITTLE_ENDIAN
+		struct {
+			u16	locked;
+			u8	reserved[2];
+		};
+#else
+		struct {
+			u8	reserved[2];
+			u16	locked;
+		};
+#endif
+	};
 } arch_spinlock_t;
 
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ .val = ATOMIC_INIT(0) }
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = ATOMIC_INIT(0) } }
 
 /*
  * Bitfields in the lock word:
-- 
GitLab


From b3a73b7db2b6cb3b2e5bfda5518a0e92230ef673 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:18 +1000
Subject: [PATCH 3180/4122] powerpc/qspinlock: convert atomic operations to
 assembly

This uses more optimal ll/sc style access patterns (rather than
cmpxchg), and also sets the EH=1 lock hint on those operations
which acquire ownership of the lock.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-4-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h       | 24 +++++--
 arch/powerpc/include/asm/qspinlock_types.h |  4 +-
 arch/powerpc/lib/qspinlock.c               | 82 +++++++++++++---------
 3 files changed, 68 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 713f6629f6fbc..c16e1f0674b5a 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -2,29 +2,43 @@
 #ifndef _ASM_POWERPC_QSPINLOCK_H
 #define _ASM_POWERPC_QSPINLOCK_H
 
-#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <asm/qspinlock_types.h>
 #include <asm/paravirt.h>
 
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
-	return atomic_read(&lock->val);
+	return READ_ONCE(lock->val);
 }
 
 static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
 {
-	return !atomic_read(&lock.val);
+	return !lock.val;
 }
 
 static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 {
-	return !!(atomic_read(&lock->val) & _Q_TAIL_CPU_MASK);
+	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
 }
 
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
-	return atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0;
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1,%3	# queued_spin_trylock			\n"
+"	cmpwi	0,%0,0							\n"
+"	bne-	2f							\n"
+"	stwcx.	%2,0,%1							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (_Q_LOCKED_VAL),
+	  "i" (IS_ENABLED(CONFIG_PPC64))
+	: "cr0", "memory");
+
+	return likely(prev == 0);
 }
 
 void queued_spin_lock_slowpath(struct qspinlock *lock);
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index fe87181c59e51..b9a5a52fa6703 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -7,7 +7,7 @@
 
 typedef struct qspinlock {
 	union {
-		atomic_t val;
+		u32 val;
 
 #ifdef __LITTLE_ENDIAN
 		struct {
@@ -23,7 +23,7 @@ typedef struct qspinlock {
 	};
 } arch_spinlock_t;
 
-#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = ATOMIC_INIT(0) } }
+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ { .val = 0 } }
 
 /*
  * Bitfields in the lock word:
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 86504628501e4..645d9affacfdb 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
-#include <linux/atomic.h>
 #include <linux/bug.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
@@ -22,12 +21,12 @@ struct qnodes {
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
-static inline int encode_tail_cpu(int cpu)
+static inline u32 encode_tail_cpu(int cpu)
 {
 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 }
 
-static inline int decode_tail_cpu(int val)
+static inline int decode_tail_cpu(u32 val)
 {
 	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
 }
@@ -39,26 +38,34 @@ static inline int decode_tail_cpu(int val)
  * This is used by the head of the queue to acquire the lock and clean up
  * its tail if it was the last one queued.
  */
-static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tail)
+static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail)
 {
-	int val = atomic_read(&lock->val);
-
-	BUG_ON(val & _Q_LOCKED_VAL);
-
-	/* If we're the last queued, must clean up the tail. */
-	if ((val & _Q_TAIL_CPU_MASK) == tail) {
-		if (atomic_cmpxchg_acquire(&lock->val, val, _Q_LOCKED_VAL) == val)
-			return val;
-		/* Another waiter must have enqueued */
-		val = atomic_read(&lock->val);
-		BUG_ON(val & _Q_LOCKED_VAL);
-	}
-
-	/* We must be the owner, just set the lock bit and acquire */
-	atomic_or(_Q_LOCKED_VAL, &lock->val);
-	__atomic_acquire_fence();
-
-	return val;
+	u32 newval = _Q_LOCKED_VAL;
+	u32 prev, tmp;
+
+	asm volatile(
+"1:	lwarx	%0,0,%2,%6	# set_locked_clean_tail			\n"
+	/* Test whether the lock tail == tail */
+"	and	%1,%0,%5						\n"
+"	cmpw	0,%1,%3							\n"
+	/* Merge the new locked value */
+"	or	%1,%1,%4						\n"
+"	bne	2f							\n"
+	/* If the lock tail matched, then clear it, otherwise leave it. */
+"	andc	%1,%1,%5						\n"
+"2:	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"3:									\n"
+	: "=&r" (prev), "=&r" (tmp)
+	: "r" (&lock->val), "r"(tail), "r" (newval),
+	  "r" (_Q_TAIL_CPU_MASK),
+	  "i" (IS_ENABLED(CONFIG_PPC64))
+	: "cr0", "memory");
+
+	BUG_ON(prev & _Q_LOCKED_VAL);
+
+	return prev;
 }
 
 /*
@@ -68,20 +75,25 @@ static __always_inline int set_locked_clean_tail(struct qspinlock *lock, int tai
  * acquire barrier in get_tail_qnode() when the next CPU finds this tail
  * value.
  */
-static __always_inline int publish_tail_cpu(struct qspinlock *lock, int tail)
+static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
 {
-	for (;;) {
-		int val = atomic_read(&lock->val);
-		int newval = (val & ~_Q_TAIL_CPU_MASK) | tail;
-		int old;
-
-		old = atomic_cmpxchg_release(&lock->val, val, newval);
-		if (old == val)
-			return old;
-	}
+	u32 prev, tmp;
+
+	asm volatile(
+"\t"	PPC_RELEASE_BARRIER "						\n"
+"1:	lwarx	%0,0,%2		# publish_tail_cpu			\n"
+"	andc	%1,%0,%4						\n"
+"	or	%1,%1,%3						\n"
+"	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev), "=&r"(tmp)
+	: "r" (&lock->val), "r" (tail), "r"(_Q_TAIL_CPU_MASK)
+	: "cr0", "memory");
+
+	return prev;
 }
 
-static struct qnode *get_tail_qnode(struct qspinlock *lock, int val)
+static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = decode_tail_cpu(val);
 	struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
@@ -109,7 +121,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 {
 	struct qnodes *qnodesp;
 	struct qnode *next, *node;
-	int val, old, tail;
+	u32 val, old, tail;
 	int idx;
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
@@ -156,7 +168,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 
 	/* We're at the head of the waitqueue, wait for the lock. */
 	for (;;) {
-		val = atomic_read(&lock->val);
+		val = READ_ONCE(lock->val);
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
-- 
GitLab


From 6aa42f883c438ea132a28801bef3f86f3883d14c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:19 +1000
Subject: [PATCH 3181/4122] powerpc/qspinlock: allow new waiters to steal the
 lock before queueing

Allow new waiters to "steal" the lock before queueing. That is, to
acquire it while other CPUs have queued.

This particularly helps paravirt performance when physical CPUs are
oversubscribed, by keeping the lock from becoming a strict FIFO and
vCPU preemption causing queue train wrecks.

The new __queued_spin_trylock_steal() function is put in qspinlock.h
to save having to move it, because it will be used there by a later
change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-5-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h |  23 ++++++
 arch/powerpc/lib/qspinlock.c         | 110 ++++++++++++++++++++++++---
 2 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index c16e1f0674b5a..cebd2c89c08d4 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -41,6 +41,29 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 	return likely(prev == 0);
 }
 
+static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
+{
+	u32 prev, tmp;
+
+	/* Trylock may get ahead of queued nodes if it finds unlocked */
+	asm volatile(
+"1:	lwarx	%0,0,%2,%5	# __queued_spin_trylock_steal		\n"
+"	andc.	%1,%0,%4						\n"
+"	bne-	2f							\n"
+"	and	%1,%0,%4						\n"
+"	or	%1,%1,%3						\n"
+"	stwcx.	%1,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (prev), "=&r" (tmp)
+	: "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK),
+	  "i" (IS_ENABLED(CONFIG_PPC64))
+	: "cr0", "memory");
+
+	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
+}
+
 void queued_spin_lock_slowpath(struct qspinlock *lock);
 
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 645d9affacfdb..6ffd3261064c2 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -19,8 +19,17 @@ struct qnodes {
 	struct qnode nodes[MAX_NODES];
 };
 
+/* Tuning parameters */
+static int steal_spins __read_mostly = (1 << 5);
+static bool maybe_stealers __read_mostly = true;
+
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
+static __always_inline int get_steal_spins(void)
+{
+	return steal_spins;
+}
+
 static inline u32 encode_tail_cpu(int cpu)
 {
 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
@@ -38,33 +47,35 @@ static inline int decode_tail_cpu(u32 val)
  * This is used by the head of the queue to acquire the lock and clean up
  * its tail if it was the last one queued.
  */
-static __always_inline u32 set_locked_clean_tail(struct qspinlock *lock, u32 tail)
+static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
 {
 	u32 newval = _Q_LOCKED_VAL;
 	u32 prev, tmp;
 
 	asm volatile(
-"1:	lwarx	%0,0,%2,%6	# set_locked_clean_tail			\n"
-	/* Test whether the lock tail == tail */
-"	and	%1,%0,%5						\n"
+"1:	lwarx	%0,0,%2,%7	# trylock_clean_tail			\n"
+	/* This test is necessary if there could be stealers */
+"	andi.	%1,%0,%5						\n"
+"	bne	3f							\n"
+	/* Test whether the lock tail == mytail */
+"	and	%1,%0,%6						\n"
 "	cmpw	0,%1,%3							\n"
 	/* Merge the new locked value */
 "	or	%1,%1,%4						\n"
 "	bne	2f							\n"
 	/* If the lock tail matched, then clear it, otherwise leave it. */
-"	andc	%1,%1,%5						\n"
+"	andc	%1,%1,%6						\n"
 "2:	stwcx.	%1,0,%2							\n"
 "	bne-	1b							\n"
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
 "3:									\n"
 	: "=&r" (prev), "=&r" (tmp)
 	: "r" (&lock->val), "r"(tail), "r" (newval),
+	  "i" (_Q_LOCKED_VAL),
 	  "r" (_Q_TAIL_CPU_MASK),
 	  "i" (IS_ENABLED(CONFIG_PPC64))
 	: "cr0", "memory");
 
-	BUG_ON(prev & _Q_LOCKED_VAL);
-
 	return prev;
 }
 
@@ -117,6 +128,30 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
+static inline bool try_to_steal_lock(struct qspinlock *lock)
+{
+	int iters = 0;
+
+	if (!steal_spins)
+		return false;
+
+	/* Attempt to steal the lock */
+	do {
+		u32 val = READ_ONCE(lock->val);
+
+		if (unlikely(!(val & _Q_LOCKED_VAL))) {
+			if (__queued_spin_trylock_steal(lock))
+				return true;
+		} else {
+			cpu_relax();
+		}
+
+		iters++;
+	} while (iters < get_steal_spins());
+
+	return false;
+}
+
 static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 {
 	struct qnodes *qnodesp;
@@ -166,6 +201,7 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 		smp_rmb(); /* acquire barrier for the mcs lock */
 	}
 
+again:
 	/* We're at the head of the waitqueue, wait for the lock. */
 	for (;;) {
 		val = READ_ONCE(lock->val);
@@ -176,9 +212,14 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 	}
 
 	/* If we're the last queued, must clean up the tail. */
-	old = set_locked_clean_tail(lock, tail);
+	old = trylock_clean_tail(lock, tail);
+	if (unlikely(old & _Q_LOCKED_VAL)) {
+		BUG_ON(!maybe_stealers);
+		goto again; /* Can only be true if maybe_stealers. */
+	}
+
 	if ((old & _Q_TAIL_CPU_MASK) == tail)
-		goto release; /* Another waiter must have enqueued */
+		goto release; /* We were the tail, no next. */
 
 	/* There is a next, must wait for node->next != NULL (MCS protocol) */
 	while (!(next = READ_ONCE(node->next)))
@@ -199,6 +240,9 @@ release:
 
 void queued_spin_lock_slowpath(struct qspinlock *lock)
 {
+	if (try_to_steal_lock(lock))
+		return;
+
 	queued_spin_lock_mcs_queue(lock);
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
@@ -208,3 +252,51 @@ void pv_spinlocks_init(void)
 {
 }
 #endif
+
+#include <linux/debugfs.h>
+static int steal_spins_set(void *data, u64 val)
+{
+	static DEFINE_MUTEX(lock);
+
+	/*
+	 * The lock slow path has a !maybe_stealers case that can assume
+	 * the head of queue will not see concurrent waiters. That waiter
+	 * is unsafe in the presence of stealers, so must keep them away
+	 * from one another.
+	 */
+
+	mutex_lock(&lock);
+	if (val && !steal_spins) {
+		maybe_stealers = true;
+		/* wait for queue head waiter to go away */
+		synchronize_rcu();
+		steal_spins = val;
+	} else if (!val && steal_spins) {
+		steal_spins = val;
+		/* wait for all possible stealers to go away */
+		synchronize_rcu();
+		maybe_stealers = false;
+	} else {
+		steal_spins = val;
+	}
+	mutex_unlock(&lock);
+
+	return 0;
+}
+
+static int steal_spins_get(void *data, u64 *val)
+{
+	*val = steal_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
+
+static __init int spinlock_debugfs_init(void)
+{
+	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+
+	return 0;
+}
+device_initcall(spinlock_debugfs_init);
-- 
GitLab


From 0944534ef4d5cf39c8133575524be0be3337dd62 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:20 +1000
Subject: [PATCH 3182/4122] powerpc/qspinlock: theft prevention to control
 latency

Give the queue head the ability to stop stealers. After a number of
spins without successfully acquiring the lock, the queue head sets
this, which halts stealing and will assure it is the next owner.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-6-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock_types.h |  8 +++-
 arch/powerpc/lib/qspinlock.c               | 53 ++++++++++++++++++++++
 2 files changed, 60 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index b9a5a52fa6703..1911a8a162371 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -29,7 +29,8 @@ typedef struct qspinlock {
  * Bitfields in the lock word:
  *
  *     0: locked bit
- *  1-16: unused bits
+ *  1-15: unused bits
+ *    16: must queue bit
  * 17-31: tail cpu (+1)
  */
 #define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
@@ -39,6 +40,11 @@ typedef struct qspinlock {
 #define _Q_LOCKED_BITS		1
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
 
+/* 0x00010000 */
+#define _Q_MUST_Q_OFFSET	16
+#define _Q_MUST_Q_BITS		1
+#define _Q_MUST_Q_VAL		(1U << _Q_MUST_Q_OFFSET)
+
 /* 0xfffe0000 */
 #define _Q_TAIL_CPU_OFFSET	17
 #define _Q_TAIL_CPU_BITS	15
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 6ffd3261064c2..9cd442d46b9fd 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -22,6 +22,7 @@ struct qnodes {
 /* Tuning parameters */
 static int steal_spins __read_mostly = (1 << 5);
 static bool maybe_stealers __read_mostly = true;
+static int head_spins __read_mostly = (1 << 8);
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -30,6 +31,11 @@ static __always_inline int get_steal_spins(void)
 	return steal_spins;
 }
 
+static __always_inline int get_head_spins(void)
+{
+	return head_spins;
+}
+
 static inline u32 encode_tail_cpu(int cpu)
 {
 	return (cpu + 1) << _Q_TAIL_CPU_OFFSET;
@@ -104,6 +110,22 @@ static __always_inline u32 publish_tail_cpu(struct qspinlock *lock, u32 tail)
 	return prev;
 }
 
+static __always_inline u32 set_mustq(struct qspinlock *lock)
+{
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# set_mustq				\n"
+"	or	%0,%0,%2						\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+	: "cr0", "memory");
+
+	return prev;
+}
+
 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = decode_tail_cpu(val);
@@ -139,6 +161,9 @@ static inline bool try_to_steal_lock(struct qspinlock *lock)
 	do {
 		u32 val = READ_ONCE(lock->val);
 
+		if (val & _Q_MUST_Q_VAL)
+			break;
+
 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
 			if (__queued_spin_trylock_steal(lock))
 				return true;
@@ -157,7 +182,9 @@ static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
 	struct qnodes *qnodesp;
 	struct qnode *next, *node;
 	u32 val, old, tail;
+	bool mustq = false;
 	int idx;
+	int iters = 0;
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
 
@@ -209,6 +236,15 @@ again:
 			break;
 
 		cpu_relax();
+		if (!maybe_stealers)
+			continue;
+		iters++;
+
+		if (!mustq && iters >= get_head_spins()) {
+			mustq = true;
+			set_mustq(lock);
+			val |= _Q_MUST_Q_VAL;
+		}
 	}
 
 	/* If we're the last queued, must clean up the tail. */
@@ -293,9 +329,26 @@ static int steal_spins_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
 
+static int head_spins_set(void *data, u64 val)
+{
+	head_spins = val;
+
+	return 0;
+}
+
+static int head_spins_get(void *data, u64 *val)
+{
+	*val = head_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 
 	return 0;
 }
-- 
GitLab


From e1a31e7fd7130628cfd229253da2b4630e7a809c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:21 +1000
Subject: [PATCH 3183/4122] powerpc/qspinlock: store owner CPU in lock word

Store the owner CPU number in the lock word so it may be yielded to,
as powerpc's paravirtualised simple spinlocks do.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-7-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h       | 12 ++++++++++--
 arch/powerpc/include/asm/qspinlock_types.h | 12 +++++++++++-
 arch/powerpc/lib/qspinlock.c               |  2 +-
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index cebd2c89c08d4..9572a2ef974da 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -21,8 +21,15 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 	return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
 }
 
+static __always_inline u32 queued_spin_encode_locked_val(void)
+{
+	/* XXX: make this use lock value in paca like simple spinlocks? */
+	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
+}
+
 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 {
+	u32 new = queued_spin_encode_locked_val();
 	u32 prev;
 
 	asm volatile(
@@ -34,7 +41,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
 "2:									\n"
 	: "=&r" (prev)
-	: "r" (&lock->val), "r" (_Q_LOCKED_VAL),
+	: "r" (&lock->val), "r" (new),
 	  "i" (IS_ENABLED(CONFIG_PPC64))
 	: "cr0", "memory");
 
@@ -43,6 +50,7 @@ static __always_inline int queued_spin_trylock(struct qspinlock *lock)
 
 static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
 {
+	u32 new = queued_spin_encode_locked_val();
 	u32 prev, tmp;
 
 	/* Trylock may get ahead of queued nodes if it finds unlocked */
@@ -57,7 +65,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
 "\t"	PPC_ACQUIRE_BARRIER "						\n"
 "2:									\n"
 	: "=&r" (prev), "=&r" (tmp)
-	: "r" (&lock->val), "r" (_Q_LOCKED_VAL), "r" (_Q_TAIL_CPU_MASK),
+	: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
 	  "i" (IS_ENABLED(CONFIG_PPC64))
 	: "cr0", "memory");
 
diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index 1911a8a162371..adfeed4aa495b 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -29,7 +29,8 @@ typedef struct qspinlock {
  * Bitfields in the lock word:
  *
  *     0: locked bit
- *  1-15: unused bits
+ *  1-14: lock holder cpu
+ *    15: unused bit
  *    16: must queue bit
  * 17-31: tail cpu (+1)
  */
@@ -40,6 +41,15 @@ typedef struct qspinlock {
 #define _Q_LOCKED_BITS		1
 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
 
+/* 0x00007ffe */
+#define _Q_OWNER_CPU_OFFSET	1
+#define _Q_OWNER_CPU_BITS	14
+#define _Q_OWNER_CPU_MASK	_Q_SET_MASK(OWNER_CPU)
+
+#if CONFIG_NR_CPUS > (1U << _Q_OWNER_CPU_BITS)
+#error "qspinlock does not support such large CONFIG_NR_CPUS"
+#endif
+
 /* 0x00010000 */
 #define _Q_MUST_Q_OFFSET	16
 #define _Q_MUST_Q_BITS		1
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 9cd442d46b9fd..4d74db0e565f2 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -55,7 +55,7 @@ static inline int decode_tail_cpu(u32 val)
  */
 static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
 {
-	u32 newval = _Q_LOCKED_VAL;
+	u32 newval = queued_spin_encode_locked_val();
 	u32 prev, tmp;
 
 	asm volatile(
-- 
GitLab


From 085f03311bcede99550e08a1f7cad41bf758b460 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:22 +1000
Subject: [PATCH 3184/4122] powerpc/qspinlock: paravirt yield to lock owner

Waiters spinning on the lock word should yield to the lock owner if the
vCPU is preempted. This improves performance when the hypervisor has
oversubscribed physical CPUs.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-8-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 99 +++++++++++++++++++++++++++++++-----
 1 file changed, 87 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 4d74db0e565f2..18e21574e6c59 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -5,6 +5,7 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <asm/qspinlock.h>
+#include <asm/paravirt.h>
 
 #define MAX_NODES	4
 
@@ -24,14 +25,16 @@ static int steal_spins __read_mostly = (1 << 5);
 static bool maybe_stealers __read_mostly = true;
 static int head_spins __read_mostly = (1 << 8);
 
+static bool pv_yield_owner __read_mostly = true;
+
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
-static __always_inline int get_steal_spins(void)
+static __always_inline int get_steal_spins(bool paravirt)
 {
 	return steal_spins;
 }
 
-static __always_inline int get_head_spins(void)
+static __always_inline int get_head_spins(bool paravirt)
 {
 	return head_spins;
 }
@@ -46,6 +49,11 @@ static inline int decode_tail_cpu(u32 val)
 	return (val >> _Q_TAIL_CPU_OFFSET) - 1;
 }
 
+static inline int get_owner_cpu(u32 val)
+{
+	return (val & _Q_OWNER_CPU_MASK) >> _Q_OWNER_CPU_OFFSET;
+}
+
 /*
  * Try to acquire the lock if it was not already locked. If the tail matches
  * mytail then clear it, otherwise leave it unchnaged. Return previous value.
@@ -150,7 +158,45 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
-static inline bool try_to_steal_lock(struct qspinlock *lock)
+static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+	int owner;
+	u32 yield_count;
+
+	BUG_ON(!(val & _Q_LOCKED_VAL));
+
+	if (!paravirt)
+		goto relax;
+
+	if (!pv_yield_owner)
+		goto relax;
+
+	owner = get_owner_cpu(val);
+	yield_count = yield_count_of(owner);
+
+	if ((yield_count & 1) == 0)
+		goto relax; /* owner vcpu is running */
+
+	/*
+	 * Read the lock word after sampling the yield count. On the other side
+	 * there may a wmb because the yield count update is done by the
+	 * hypervisor preemption and the value update by the OS, however this
+	 * ordering might reduce the chance of out of order accesses and
+	 * improve the heuristic.
+	 */
+	smp_rmb();
+
+	if (READ_ONCE(lock->val) == val) {
+		yield_to_preempted(owner, yield_count);
+		/* Don't relax if we yielded. Maybe we should? */
+		return;
+	}
+relax:
+	cpu_relax();
+}
+
+
+static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
 	int iters = 0;
 
@@ -168,16 +214,16 @@ static inline bool try_to_steal_lock(struct qspinlock *lock)
 			if (__queued_spin_trylock_steal(lock))
 				return true;
 		} else {
-			cpu_relax();
+			yield_to_locked_owner(lock, val, paravirt);
 		}
 
 		iters++;
-	} while (iters < get_steal_spins());
+	} while (iters < get_steal_spins(paravirt));
 
 	return false;
 }
 
-static inline void queued_spin_lock_mcs_queue(struct qspinlock *lock)
+static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, bool paravirt)
 {
 	struct qnodes *qnodesp;
 	struct qnode *next, *node;
@@ -235,12 +281,12 @@ again:
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
-		cpu_relax();
+		yield_to_locked_owner(lock, val, paravirt);
 		if (!maybe_stealers)
 			continue;
 		iters++;
 
-		if (!mustq && iters >= get_head_spins()) {
+		if (!mustq && iters >= get_head_spins(paravirt)) {
 			mustq = true;
 			set_mustq(lock);
 			val |= _Q_MUST_Q_VAL;
@@ -276,10 +322,20 @@ release:
 
 void queued_spin_lock_slowpath(struct qspinlock *lock)
 {
-	if (try_to_steal_lock(lock))
-		return;
-
-	queued_spin_lock_mcs_queue(lock);
+	/*
+	 * This looks funny, but it induces the compiler to inline both
+	 * sides of the branch rather than share code as when the condition
+	 * is passed as the paravirt argument to the functions.
+	 */
+	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
+		if (try_to_steal_lock(lock, true))
+			return;
+		queued_spin_lock_mcs_queue(lock, true);
+	} else {
+		if (try_to_steal_lock(lock, false))
+			return;
+		queued_spin_lock_mcs_queue(lock, false);
+	}
 }
 EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
@@ -345,10 +401,29 @@ static int head_spins_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_head_spins, head_spins_get, head_spins_set, "%llu\n");
 
+static int pv_yield_owner_set(void *data, u64 val)
+{
+	pv_yield_owner = !!val;
+
+	return 0;
+}
+
+static int pv_yield_owner_get(void *data, u64 *val)
+{
+	*val = pv_yield_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
+	if (is_shared_processor()) {
+		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+	}
 
 	return 0;
 }
-- 
GitLab


From bd48287b2cf4cd6e95576db3a94fd2a7cdf9832d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:23 +1000
Subject: [PATCH 3185/4122] powerpc/qspinlock: implement option to yield to
 previous node

Queued waiters which are not at the head of the queue don't spin on
the lock word but their qnode lock word, waiting for the previous queued
CPU to release them. Add an option which allows these waiters to yield
to the previous CPU if its vCPU is preempted.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-9-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 46 +++++++++++++++++++++++++++++++++++-
 1 file changed, 45 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 18e21574e6c59..41afd8e689186 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true;
 static int head_spins __read_mostly = (1 << 8);
 
 static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_prev __read_mostly = true;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -195,6 +196,32 @@ relax:
 	cpu_relax();
 }
 
+static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+{
+	int prev_cpu = decode_tail_cpu(val);
+	u32 yield_count;
+
+	if (!paravirt)
+		goto relax;
+
+	if (!pv_yield_prev)
+		goto relax;
+
+	yield_count = yield_count_of(prev_cpu);
+	if ((yield_count & 1) == 0)
+		goto relax; /* owner vcpu is running */
+
+	smp_rmb(); /* See yield_to_locked_owner comment */
+
+	if (!node->locked) {
+		yield_to_preempted(prev_cpu, yield_count);
+		return;
+	}
+
+relax:
+	cpu_relax();
+}
+
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
@@ -269,7 +296,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 
 		/* Wait for mcs node lock to be released */
 		while (!node->locked)
-			cpu_relax();
+			yield_to_prev(lock, node, old, paravirt);
 
 		smp_rmb(); /* acquire barrier for the mcs lock */
 	}
@@ -417,12 +444,29 @@ static int pv_yield_owner_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
 
+static int pv_yield_prev_set(void *data, u64 val)
+{
+	pv_yield_prev = !!val;
+
+	return 0;
+}
+
+static int pv_yield_prev_get(void *data, u64 *val)
+{
+	*val = pv_yield_prev;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 	}
 
 	return 0;
-- 
GitLab


From b4c3cdc1a698a2f6168768d0bed4bf062723722e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:24 +1000
Subject: [PATCH 3186/4122] powerpc/qspinlock: allow stealing when head of
 queue yields

If the head of queue is preventing stealing but it finds the owner vCPU
is preempted, it will yield its cycles to the owner which could cause it
to become preempted. Add an option to re-allow stealers before yielding,
and disallow them again after returning from the yield.

Disable this option by default for now, i.e., no logical change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-10-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 59 ++++++++++++++++++++++++++++++++++--
 1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 41afd8e689186..c1f3b699b63f7 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -26,6 +26,7 @@ static bool maybe_stealers __read_mostly = true;
 static int head_spins __read_mostly = (1 << 8);
 
 static bool pv_yield_owner __read_mostly = true;
+static bool pv_yield_allow_steal __read_mostly = false;
 static bool pv_yield_prev __read_mostly = true;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
@@ -135,6 +136,22 @@ static __always_inline u32 set_mustq(struct qspinlock *lock)
 	return prev;
 }
 
+static __always_inline u32 clear_mustq(struct qspinlock *lock)
+{
+	u32 prev;
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# clear_mustq				\n"
+"	andc	%0,%0,%2						\n"
+"	stwcx.	%0,0,%1							\n"
+"	bne-	1b							\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r" (_Q_MUST_Q_VAL)
+	: "cr0", "memory");
+
+	return prev;
+}
+
 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = decode_tail_cpu(val);
@@ -159,7 +176,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
-static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
 {
 	int owner;
 	u32 yield_count;
@@ -188,7 +205,11 @@ static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 va
 	smp_rmb();
 
 	if (READ_ONCE(lock->val) == val) {
+		if (mustq)
+			clear_mustq(lock);
 		yield_to_preempted(owner, yield_count);
+		if (mustq)
+			set_mustq(lock);
 		/* Don't relax if we yielded. Maybe we should? */
 		return;
 	}
@@ -196,6 +217,21 @@ relax:
 	cpu_relax();
 }
 
+static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+	__yield_to_locked_owner(lock, val, paravirt, false);
+}
+
+static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+{
+	bool mustq = false;
+
+	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
+		mustq = true;
+
+	__yield_to_locked_owner(lock, val, paravirt, mustq);
+}
+
 static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
 {
 	int prev_cpu = decode_tail_cpu(val);
@@ -211,7 +247,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 	if ((yield_count & 1) == 0)
 		goto relax; /* owner vcpu is running */
 
-	smp_rmb(); /* See yield_to_locked_owner comment */
+	smp_rmb(); /* See __yield_to_locked_owner comment */
 
 	if (!node->locked) {
 		yield_to_preempted(prev_cpu, yield_count);
@@ -308,7 +344,7 @@ again:
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
-		yield_to_locked_owner(lock, val, paravirt);
+		yield_head_to_locked_owner(lock, val, paravirt);
 		if (!maybe_stealers)
 			continue;
 		iters++;
@@ -444,6 +480,22 @@ static int pv_yield_owner_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_owner, pv_yield_owner_get, pv_yield_owner_set, "%llu\n");
 
+static int pv_yield_allow_steal_set(void *data, u64 val)
+{
+	pv_yield_allow_steal = !!val;
+
+	return 0;
+}
+
+static int pv_yield_allow_steal_get(void *data, u64 *val)
+{
+	*val = pv_yield_allow_steal;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
+
 static int pv_yield_prev_set(void *data, u64 val)
 {
 	pv_yield_prev = !!val;
@@ -466,6 +518,7 @@ static __init int spinlock_debugfs_init(void)
 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
+		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 	}
 
-- 
GitLab


From 28db61e207ea3890d286cff3141c1ce67346074d Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:25 +1000
Subject: [PATCH 3187/4122] powerpc/qspinlock: allow propagation of yield CPU
 down the queue

Having all CPUs poll the lock word for the owner CPU that should be
yielded to defeats most of the purpose of using MCS queueing for
scalability. Yet it may be desirable for queued waiters to yield to a
preempted owner.

With this change, queue waiters never sample the owner CPU directly from
the lock word. The queue head (which is spinning on the lock) propagates
the owner CPU back to the next waiter if it finds the owner has been
preempted. That waiter then propagates the owner CPU back to the next
waiter, and so on.

s390 addresses this problem differenty, by having queued waiters sample
the lock word to find the owner at a low frequency. That has the
advantage of being simpler, the advantage of propagation is that the
lock word never has to be accesed by queued waiters, and the transfer of
cache lines to transmit the owner data is only required when lock holder
vCPU preemption occurs.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-11-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 79 ++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index c1f3b699b63f7..c45f30c9a19e8 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -12,6 +12,7 @@
 struct qnode {
 	struct qnode	*next;
 	struct qspinlock *lock;
+	int		yield_cpu;
 	u8		locked; /* 1 if lock acquired */
 };
 
@@ -28,6 +29,7 @@ static int head_spins __read_mostly = (1 << 8);
 static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
 static bool pv_yield_prev __read_mostly = true;
+static bool pv_yield_propagate_owner __read_mostly = true;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -232,14 +234,67 @@ static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u
 	__yield_to_locked_owner(lock, val, paravirt, mustq);
 }
 
+static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+{
+	struct qnode *next;
+	int owner;
+
+	if (!paravirt)
+		return;
+	if (!pv_yield_propagate_owner)
+		return;
+
+	owner = get_owner_cpu(val);
+	if (*set_yield_cpu == owner)
+		return;
+
+	next = READ_ONCE(node->next);
+	if (!next)
+		return;
+
+	if (vcpu_is_preempted(owner)) {
+		next->yield_cpu = owner;
+		*set_yield_cpu = owner;
+	} else if (*set_yield_cpu != -1) {
+		next->yield_cpu = owner;
+		*set_yield_cpu = owner;
+	}
+}
+
 static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
 {
 	int prev_cpu = decode_tail_cpu(val);
 	u32 yield_count;
+	int yield_cpu;
 
 	if (!paravirt)
 		goto relax;
 
+	if (!pv_yield_propagate_owner)
+		goto yield_prev;
+
+	yield_cpu = READ_ONCE(node->yield_cpu);
+	if (yield_cpu == -1) {
+		/* Propagate back the -1 CPU */
+		if (node->next && node->next->yield_cpu != -1)
+			node->next->yield_cpu = yield_cpu;
+		goto yield_prev;
+	}
+
+	yield_count = yield_count_of(yield_cpu);
+	if ((yield_count & 1) == 0)
+		goto yield_prev; /* owner vcpu is running */
+
+	smp_rmb();
+
+	if (yield_cpu == node->yield_cpu) {
+		if (node->next && node->next->yield_cpu != yield_cpu)
+			node->next->yield_cpu = yield_cpu;
+		yield_to_preempted(yield_cpu, yield_count);
+		return;
+	}
+
+yield_prev:
 	if (!pv_yield_prev)
 		goto relax;
 
@@ -293,6 +348,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	u32 val, old, tail;
 	bool mustq = false;
 	int idx;
+	int set_yield_cpu = -1;
 	int iters = 0;
 
 	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
@@ -314,6 +370,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	node = &qnodesp->nodes[idx];
 	node->next = NULL;
 	node->lock = lock;
+	node->yield_cpu = -1;
 	node->locked = 0;
 
 	tail = encode_tail_cpu(smp_processor_id());
@@ -334,6 +391,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		while (!node->locked)
 			yield_to_prev(lock, node, old, paravirt);
 
+		/* Clear out stale propagated yield_cpu */
+		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
+			node->yield_cpu = -1;
+
 		smp_rmb(); /* acquire barrier for the mcs lock */
 	}
 
@@ -344,6 +405,7 @@ again:
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
+		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
 		yield_head_to_locked_owner(lock, val, paravirt);
 		if (!maybe_stealers)
 			continue;
@@ -512,6 +574,22 @@ static int pv_yield_prev_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
 
+static int pv_yield_propagate_owner_set(void *data, u64 val)
+{
+	pv_yield_propagate_owner = !!val;
+
+	return 0;
+}
+
+static int pv_yield_propagate_owner_get(void *data, u64 *val)
+{
+	*val = pv_yield_propagate_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
@@ -520,6 +598,7 @@ static __init int spinlock_debugfs_init(void)
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
+		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
 	}
 
 	return 0;
-- 
GitLab


From be742c573fdafcfa1752642ca1c7aaf08c258128 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:26 +1000
Subject: [PATCH 3188/4122] powerpc/qspinlock: add ability to prod new queue
 head CPU

After the head of the queue acquires the lock, it releases the
next waiter in the queue to become the new head. Add an option
to prod the new head if its vCPU was preempted. This may only
have an effect if queue waiters are yielding.

Disable this option by default for now, i.e., no logical change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-12-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index c45f30c9a19e8..2f6c0bed25ead 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -12,6 +12,7 @@
 struct qnode {
 	struct qnode	*next;
 	struct qspinlock *lock;
+	int		cpu;
 	int		yield_cpu;
 	u8		locked; /* 1 if lock acquired */
 };
@@ -30,6 +31,7 @@ static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
 static bool pv_yield_prev __read_mostly = true;
 static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_prod_head __read_mostly = false;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 
@@ -370,10 +372,11 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	node = &qnodesp->nodes[idx];
 	node->next = NULL;
 	node->lock = lock;
+	node->cpu = smp_processor_id();
 	node->yield_cpu = -1;
 	node->locked = 0;
 
-	tail = encode_tail_cpu(smp_processor_id());
+	tail = encode_tail_cpu(node->cpu);
 
 	old = publish_tail_cpu(lock, tail);
 
@@ -439,7 +442,14 @@ again:
 	 * this store to locked. The corresponding barrier is the smp_rmb()
 	 * acquire barrier for mcs lock, above.
 	 */
-	WRITE_ONCE(next->locked, 1);
+	if (paravirt && pv_prod_head) {
+		int next_cpu = next->cpu;
+		WRITE_ONCE(next->locked, 1);
+		if (vcpu_is_preempted(next_cpu))
+			prod_cpu(next_cpu);
+	} else {
+		WRITE_ONCE(next->locked, 1);
+	}
 
 release:
 	qnodesp->count--; /* release the node */
@@ -590,6 +600,22 @@ static int pv_yield_propagate_owner_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
 
+static int pv_prod_head_set(void *data, u64 val)
+{
+	pv_prod_head = !!val;
+
+	return 0;
+}
+
+static int pv_prod_head_get(void *data, u64 *val)
+{
+	*val = pv_prod_head;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "%llu\n");
+
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
@@ -599,6 +625,7 @@ static __init int spinlock_debugfs_init(void)
 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
 	}
 
 	return 0;
-- 
GitLab


From f61ab43cc1a6146d6eef7e0713a452c3677ad13e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:27 +1000
Subject: [PATCH 3189/4122] powerpc/qspinlock: allow lock stealing in trylock
 and lock fastpath

This change allows trylock to steal the lock. It also allows the initial
lock attempt to steal the lock rather than bailing out and going to the
slow path.

This gives trylock more strength: without this a continually-contended
lock will never permit a trylock to succeed. With this change, the
trylock has a small but non-zero chance.

It also gives the lock fastpath most of the benefit of passing the
reservation back through to the steal loop in the slow path without the
complexity.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-13-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h | 22 ++++++++++++++++++++--
 arch/powerpc/lib/qspinlock.c         |  9 +++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 9572a2ef974da..93b1c976db8a5 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -6,6 +6,15 @@
 #include <asm/qspinlock_types.h>
 #include <asm/paravirt.h>
 
+/*
+ * The trylock itself may steal. This makes trylocks slightly stronger, and
+ * might make spin locks slightly more efficient when stealing.
+ *
+ * This is compile-time, so if true then there may always be stealers, so the
+ * nosteal paths become unused.
+ */
+#define _Q_SPIN_TRY_LOCK_STEAL 1
+
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	return READ_ONCE(lock->val);
@@ -27,13 +36,14 @@ static __always_inline u32 queued_spin_encode_locked_val(void)
 	return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
 }
 
-static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
 {
 	u32 new = queued_spin_encode_locked_val();
 	u32 prev;
 
+	/* Trylock succeeds only when unlocked and no queued nodes */
 	asm volatile(
-"1:	lwarx	%0,0,%1,%3	# queued_spin_trylock			\n"
+"1:	lwarx	%0,0,%1,%3	# __queued_spin_trylock_nosteal		\n"
 "	cmpwi	0,%0,0							\n"
 "	bne-	2f							\n"
 "	stwcx.	%2,0,%1							\n"
@@ -72,6 +82,14 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
 	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
 }
 
+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
+{
+	if (!_Q_SPIN_TRY_LOCK_STEAL)
+		return __queued_spin_trylock_nosteal(lock);
+	else
+		return __queued_spin_trylock_steal(lock);
+}
+
 void queued_spin_lock_slowpath(struct qspinlock *lock);
 
 static __always_inline void queued_spin_lock(struct qspinlock *lock)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 2f6c0bed25ead..8e5b8bc3f0947 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -24,7 +24,11 @@ struct qnodes {
 
 /* Tuning parameters */
 static int steal_spins __read_mostly = (1 << 5);
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+static const bool maybe_stealers = true;
+#else
 static bool maybe_stealers __read_mostly = true;
+#endif
 static int head_spins __read_mostly = (1 << 8);
 
 static bool pv_yield_owner __read_mostly = true;
@@ -483,6 +487,10 @@ void pv_spinlocks_init(void)
 #include <linux/debugfs.h>
 static int steal_spins_set(void *data, u64 val)
 {
+#if _Q_SPIN_TRY_LOCK_STEAL == 1
+	/* MAYBE_STEAL remains true */
+	steal_spins = val;
+#else
 	static DEFINE_MUTEX(lock);
 
 	/*
@@ -507,6 +515,7 @@ static int steal_spins_set(void *data, u64 val)
 		steal_spins = val;
 	}
 	mutex_unlock(&lock);
+#endif
 
 	return 0;
 }
-- 
GitLab


From 71c235027ce7940434acd3f553602ad8b5d36469 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:28 +1000
Subject: [PATCH 3190/4122] powerpc/qspinlock: use spin_begin/end API

Use the spin_begin/spin_cpu_relax/spin_end APIs in qspinlock, which helps
to prevent threads issuing a lot of expensive priority nops which may not
have much effect due to immediately executing low then medium priority.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-14-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 39 ++++++++++++++++++++++++++++++++----
 1 file changed, 35 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 8e5b8bc3f0947..36aff7defda84 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -184,6 +184,7 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
+/* Called inside spin_begin() */
 static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
 {
 	int owner;
@@ -203,6 +204,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 	if ((yield_count & 1) == 0)
 		goto relax; /* owner vcpu is running */
 
+	spin_end();
+
 	/*
 	 * Read the lock word after sampling the yield count. On the other side
 	 * there may a wmb because the yield count update is done by the
@@ -218,18 +221,22 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 		yield_to_preempted(owner, yield_count);
 		if (mustq)
 			set_mustq(lock);
+		spin_begin();
 		/* Don't relax if we yielded. Maybe we should? */
 		return;
 	}
+	spin_begin();
 relax:
-	cpu_relax();
+	spin_cpu_relax();
 }
 
+/* Called inside spin_begin() */
 static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 {
 	__yield_to_locked_owner(lock, val, paravirt, false);
 }
 
+/* Called inside spin_begin() */
 static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 {
 	bool mustq = false;
@@ -267,6 +274,7 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int
 	}
 }
 
+/* Called inside spin_begin() */
 static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
 {
 	int prev_cpu = decode_tail_cpu(val);
@@ -291,14 +299,18 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 	if ((yield_count & 1) == 0)
 		goto yield_prev; /* owner vcpu is running */
 
+	spin_end();
+
 	smp_rmb();
 
 	if (yield_cpu == node->yield_cpu) {
 		if (node->next && node->next->yield_cpu != yield_cpu)
 			node->next->yield_cpu = yield_cpu;
 		yield_to_preempted(yield_cpu, yield_count);
+		spin_begin();
 		return;
 	}
+	spin_begin();
 
 yield_prev:
 	if (!pv_yield_prev)
@@ -308,15 +320,19 @@ yield_prev:
 	if ((yield_count & 1) == 0)
 		goto relax; /* owner vcpu is running */
 
+	spin_end();
+
 	smp_rmb(); /* See __yield_to_locked_owner comment */
 
 	if (!node->locked) {
 		yield_to_preempted(prev_cpu, yield_count);
+		spin_begin();
 		return;
 	}
+	spin_begin();
 
 relax:
-	cpu_relax();
+	spin_cpu_relax();
 }
 
 
@@ -328,6 +344,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 		return false;
 
 	/* Attempt to steal the lock */
+	spin_begin();
+
 	do {
 		u32 val = READ_ONCE(lock->val);
 
@@ -335,8 +353,10 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 			break;
 
 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
+			spin_end();
 			if (__queued_spin_trylock_steal(lock))
 				return true;
+			spin_begin();
 		} else {
 			yield_to_locked_owner(lock, val, paravirt);
 		}
@@ -344,6 +364,8 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 		iters++;
 	} while (iters < get_steal_spins(paravirt));
 
+	spin_end();
+
 	return false;
 }
 
@@ -395,8 +417,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		WRITE_ONCE(prev->next, node);
 
 		/* Wait for mcs node lock to be released */
+		spin_begin();
 		while (!node->locked)
 			yield_to_prev(lock, node, old, paravirt);
+		spin_end();
 
 		/* Clear out stale propagated yield_cpu */
 		if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
@@ -407,6 +431,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 
 again:
 	/* We're at the head of the waitqueue, wait for the lock. */
+	spin_begin();
 	for (;;) {
 		val = READ_ONCE(lock->val);
 		if (!(val & _Q_LOCKED_VAL))
@@ -424,6 +449,7 @@ again:
 			val |= _Q_MUST_Q_VAL;
 		}
 	}
+	spin_end();
 
 	/* If we're the last queued, must clean up the tail. */
 	old = trylock_clean_tail(lock, tail);
@@ -436,8 +462,13 @@ again:
 		goto release; /* We were the tail, no next. */
 
 	/* There is a next, must wait for node->next != NULL (MCS protocol) */
-	while (!(next = READ_ONCE(node->next)))
-		cpu_relax();
+	next = READ_ONCE(node->next);
+	if (!next) {
+		spin_begin();
+		while (!(next = READ_ONCE(node->next)))
+			cpu_relax();
+		spin_end();
+	}
 
 	/*
 	 * Unlock the next mcs waiter node. Release barrier is not required
-- 
GitLab


From cc79701114154efe79663ba47d9e51aad2ed3c78 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:29 +1000
Subject: [PATCH 3191/4122] powerpc/qspinlock: reduce remote node steal spins

Allow for a reduction in the number of times a CPU from a different
node than the owner can attempt to steal the lock before queueing.
This could bias the transfer behaviour of the lock across the
machine and reduce NUMA crossings.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-15-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 43 +++++++++++++++++++++++++++++++++---
 1 file changed, 40 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 36aff7defda84..8c6b5ef871184 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -4,6 +4,7 @@
 #include <linux/export.h>
 #include <linux/percpu.h>
 #include <linux/smp.h>
+#include <linux/topology.h>
 #include <asm/qspinlock.h>
 #include <asm/paravirt.h>
 
@@ -24,6 +25,7 @@ struct qnodes {
 
 /* Tuning parameters */
 static int steal_spins __read_mostly = (1 << 5);
+static int remote_steal_spins __read_mostly = (1 << 2);
 #if _Q_SPIN_TRY_LOCK_STEAL == 1
 static const bool maybe_stealers = true;
 #else
@@ -44,6 +46,11 @@ static __always_inline int get_steal_spins(bool paravirt)
 	return steal_spins;
 }
 
+static __always_inline int get_remote_steal_spins(bool paravirt)
+{
+	return remote_steal_spins;
+}
+
 static __always_inline int get_head_spins(bool paravirt)
 {
 	return head_spins;
@@ -335,10 +342,24 @@ relax:
 	spin_cpu_relax();
 }
 
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt)
+{
+	if (iters >= get_steal_spins(paravirt))
+		return true;
+
+	if (IS_ENABLED(CONFIG_NUMA) &&
+	    (iters >= get_remote_steal_spins(paravirt))) {
+		int cpu = get_owner_cpu(val);
+		if (numa_node_id() != cpu_to_node(cpu))
+			return true;
+	}
+	return false;
+}
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
 	int iters = 0;
+	u32 val;
 
 	if (!steal_spins)
 		return false;
@@ -347,8 +368,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 	spin_begin();
 
 	do {
-		u32 val = READ_ONCE(lock->val);
-
+		val = READ_ONCE(lock->val);
 		if (val & _Q_MUST_Q_VAL)
 			break;
 
@@ -362,7 +382,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 		}
 
 		iters++;
-	} while (iters < get_steal_spins(paravirt));
+	} while (!steal_break(val, iters, paravirt));
 
 	spin_end();
 
@@ -560,6 +580,22 @@ static int steal_spins_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_steal_spins, steal_spins_get, steal_spins_set, "%llu\n");
 
+static int remote_steal_spins_set(void *data, u64 val)
+{
+	remote_steal_spins = val;
+
+	return 0;
+}
+
+static int remote_steal_spins_get(void *data, u64 *val)
+{
+	*val = remote_steal_spins;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_remote_steal_spins, remote_steal_spins_get, remote_steal_spins_set, "%llu\n");
+
 static int head_spins_set(void *data, u64 val)
 {
 	head_spins = val;
@@ -659,6 +695,7 @@ DEFINE_SIMPLE_ATTRIBUTE(fops_pv_prod_head, pv_prod_head_get, pv_prod_head_set, "
 static __init int spinlock_debugfs_init(void)
 {
 	debugfs_create_file("qspl_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_steal_spins);
+	debugfs_create_file("qspl_remote_steal_spins", 0600, arch_debugfs_dir, NULL, &fops_remote_steal_spins);
 	debugfs_create_file("qspl_head_spins", 0600, arch_debugfs_dir, NULL, &fops_head_spins);
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
-- 
GitLab


From 39dfc73596b48bb50cf7e4f3f54e38427dda5b4e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:30 +1000
Subject: [PATCH 3192/4122] powerpc/qspinlock: allow indefinite spinning on a
 preempted owner

Provide an option that holds off queueing indefinitely while the lock
owner is preempted. This could reduce queueing latencies for very
overcommitted vcpu situations.

This is disabled by default.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-16-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 77 +++++++++++++++++++++++++++++-------
 1 file changed, 62 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 8c6b5ef871184..eeaaecfd5b774 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -35,6 +35,7 @@ static int head_spins __read_mostly = (1 << 8);
 
 static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
+static bool pv_spin_on_preempted_owner __read_mostly = false;
 static bool pv_yield_prev __read_mostly = true;
 static bool pv_yield_propagate_owner __read_mostly = true;
 static bool pv_prod_head __read_mostly = false;
@@ -191,11 +192,12 @@ static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 	BUG();
 }
 
-/* Called inside spin_begin() */
-static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt, bool mustq)
 {
 	int owner;
 	u32 yield_count;
+	bool preempted = false;
 
 	BUG_ON(!(val & _Q_LOCKED_VAL));
 
@@ -213,6 +215,8 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 
 	spin_end();
 
+	preempted = true;
+
 	/*
 	 * Read the lock word after sampling the yield count. On the other side
 	 * there may a wmb because the yield count update is done by the
@@ -229,29 +233,32 @@ static __always_inline void __yield_to_locked_owner(struct qspinlock *lock, u32
 		if (mustq)
 			set_mustq(lock);
 		spin_begin();
+
 		/* Don't relax if we yielded. Maybe we should? */
-		return;
+		return preempted;
 	}
 	spin_begin();
 relax:
 	spin_cpu_relax();
+
+	return preempted;
 }
 
-/* Called inside spin_begin() */
-static __always_inline void yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 {
-	__yield_to_locked_owner(lock, val, paravirt, false);
+	return __yield_to_locked_owner(lock, val, paravirt, false);
 }
 
-/* Called inside spin_begin() */
-static __always_inline void yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
+/* Called inside spin_begin(). Returns whether or not the vCPU was preempted. */
+static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u32 val, bool paravirt)
 {
 	bool mustq = false;
 
 	if ((val & _Q_MUST_Q_VAL) && pv_yield_allow_steal)
 		mustq = true;
 
-	__yield_to_locked_owner(lock, val, paravirt, mustq);
+	return __yield_to_locked_owner(lock, val, paravirt, mustq);
 }
 
 static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
@@ -361,13 +368,16 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 	int iters = 0;
 	u32 val;
 
-	if (!steal_spins)
+	if (!steal_spins) {
+		/* XXX: should spin_on_preempted_owner do anything here? */
 		return false;
+	}
 
 	/* Attempt to steal the lock */
 	spin_begin();
-
 	do {
+		bool preempted = false;
+
 		val = READ_ONCE(lock->val);
 		if (val & _Q_MUST_Q_VAL)
 			break;
@@ -378,10 +388,23 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 				return true;
 			spin_begin();
 		} else {
-			yield_to_locked_owner(lock, val, paravirt);
+			preempted = yield_to_locked_owner(lock, val, paravirt);
 		}
 
-		iters++;
+		if (preempted) {
+			if (!pv_spin_on_preempted_owner)
+				iters++;
+			/*
+			 * pv_spin_on_preempted_owner don't increase iters
+			 * while the owner is preempted -- we won't interfere
+			 * with it by definition. This could introduce some
+			 * latency issue if we continually observe preempted
+			 * owners, but hopefully that's a rare corner case of
+			 * a badly oversubscribed system.
+			 */
+		} else {
+			iters++;
+		}
 	} while (!steal_break(val, iters, paravirt));
 
 	spin_end();
@@ -453,15 +476,22 @@ again:
 	/* We're at the head of the waitqueue, wait for the lock. */
 	spin_begin();
 	for (;;) {
+		bool preempted;
+
 		val = READ_ONCE(lock->val);
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
 		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
-		yield_head_to_locked_owner(lock, val, paravirt);
+		preempted = yield_head_to_locked_owner(lock, val, paravirt);
 		if (!maybe_stealers)
 			continue;
-		iters++;
+		if (preempted) {
+			if (!pv_spin_on_preempted_owner)
+				iters++;
+		} else {
+			iters++;
+		}
 
 		if (!mustq && iters >= get_head_spins(paravirt)) {
 			mustq = true;
@@ -644,6 +674,22 @@ static int pv_yield_allow_steal_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_allow_steal, pv_yield_allow_steal_get, pv_yield_allow_steal_set, "%llu\n");
 
+static int pv_spin_on_preempted_owner_set(void *data, u64 val)
+{
+	pv_spin_on_preempted_owner = !!val;
+
+	return 0;
+}
+
+static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
+{
+	*val = pv_spin_on_preempted_owner;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
+
 static int pv_yield_prev_set(void *data, u64 val)
 {
 	pv_yield_prev = !!val;
@@ -700,6 +746,7 @@ static __init int spinlock_debugfs_init(void)
 	if (is_shared_processor()) {
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
+		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
-- 
GitLab


From 12b459a5ebf3308e718bc1dd48acb7c4cf7f1a75 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:31 +1000
Subject: [PATCH 3193/4122] powerpc/qspinlock: provide accounting and options
 for sleepy locks

Finding the owner or a queued waiter on a lock with a preempted vcpu is
indicative of an oversubscribed guest causing the lock to get into
trouble. Provide some options to detect this situation and have new CPUs
avoid queueing for a longer time (more steal iterations) to minimise the
problems caused by vcpu preemption on the queue.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-17-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock_types.h |   7 +-
 arch/powerpc/lib/qspinlock.c               | 242 +++++++++++++++++++--
 2 files changed, 230 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock_types.h b/arch/powerpc/include/asm/qspinlock_types.h
index adfeed4aa495b..4766a7aa03cbc 100644
--- a/arch/powerpc/include/asm/qspinlock_types.h
+++ b/arch/powerpc/include/asm/qspinlock_types.h
@@ -30,7 +30,7 @@ typedef struct qspinlock {
  *
  *     0: locked bit
  *  1-14: lock holder cpu
- *    15: unused bit
+ *    15: lock owner or queuer vcpus observed to be preempted bit
  *    16: must queue bit
  * 17-31: tail cpu (+1)
  */
@@ -50,6 +50,11 @@ typedef struct qspinlock {
 #error "qspinlock does not support such large CONFIG_NR_CPUS"
 #endif
 
+/* 0x00008000 */
+#define _Q_SLEEPY_OFFSET	15
+#define _Q_SLEEPY_BITS		1
+#define _Q_SLEEPY_VAL		(1U << _Q_SLEEPY_OFFSET)
+
 /* 0x00010000 */
 #define _Q_MUST_Q_OFFSET	16
 #define _Q_MUST_Q_BITS		1
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index eeaaecfd5b774..0f33a07c1d192 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -5,6 +5,7 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/topology.h>
+#include <linux/sched/clock.h>
 #include <asm/qspinlock.h>
 #include <asm/paravirt.h>
 
@@ -36,25 +37,56 @@ static int head_spins __read_mostly = (1 << 8);
 static bool pv_yield_owner __read_mostly = true;
 static bool pv_yield_allow_steal __read_mostly = false;
 static bool pv_spin_on_preempted_owner __read_mostly = false;
+static bool pv_sleepy_lock __read_mostly = true;
+static bool pv_sleepy_lock_sticky __read_mostly = false;
+static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
+static int pv_sleepy_lock_factor __read_mostly = 256;
 static bool pv_yield_prev __read_mostly = true;
 static bool pv_yield_propagate_owner __read_mostly = true;
 static bool pv_prod_head __read_mostly = false;
 
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
+static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
 
-static __always_inline int get_steal_spins(bool paravirt)
+static __always_inline bool recently_sleepy(void)
 {
-	return steal_spins;
+	/* pv_sleepy_lock is true when this is called */
+	if (pv_sleepy_lock_interval_ns) {
+		u64 seen = this_cpu_read(sleepy_lock_seen_clock);
+
+		if (seen) {
+			u64 delta = sched_clock() - seen;
+			if (delta < pv_sleepy_lock_interval_ns)
+				return true;
+			this_cpu_write(sleepy_lock_seen_clock, 0);
+		}
+	}
+
+	return false;
 }
 
-static __always_inline int get_remote_steal_spins(bool paravirt)
+static __always_inline int get_steal_spins(bool paravirt, bool sleepy)
 {
-	return remote_steal_spins;
+	if (paravirt && sleepy)
+		return steal_spins * pv_sleepy_lock_factor;
+	else
+		return steal_spins;
 }
 
-static __always_inline int get_head_spins(bool paravirt)
+static __always_inline int get_remote_steal_spins(bool paravirt, bool sleepy)
 {
-	return head_spins;
+	if (paravirt && sleepy)
+		return remote_steal_spins * pv_sleepy_lock_factor;
+	else
+		return remote_steal_spins;
+}
+
+static __always_inline int get_head_spins(bool paravirt, bool sleepy)
+{
+	if (paravirt && sleepy)
+		return head_spins * pv_sleepy_lock_factor;
+	else
+		return head_spins;
 }
 
 static inline u32 encode_tail_cpu(int cpu)
@@ -168,6 +200,56 @@ static __always_inline u32 clear_mustq(struct qspinlock *lock)
 	return prev;
 }
 
+static __always_inline bool try_set_sleepy(struct qspinlock *lock, u32 old)
+{
+	u32 prev;
+	u32 new = old | _Q_SLEEPY_VAL;
+
+	BUG_ON(!(old & _Q_LOCKED_VAL));
+	BUG_ON(old & _Q_SLEEPY_VAL);
+
+	asm volatile(
+"1:	lwarx	%0,0,%1		# try_set_sleepy			\n"
+"	cmpw	0,%0,%2							\n"
+"	bne-	2f							\n"
+"	stwcx.	%3,0,%1							\n"
+"	bne-	1b							\n"
+"2:									\n"
+	: "=&r" (prev)
+	: "r" (&lock->val), "r"(old), "r" (new)
+	: "cr0", "memory");
+
+	return likely(prev == old);
+}
+
+static __always_inline void seen_sleepy_owner(struct qspinlock *lock, u32 val)
+{
+	if (pv_sleepy_lock) {
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (!(val & _Q_SLEEPY_VAL))
+			try_set_sleepy(lock, val);
+	}
+}
+
+static __always_inline void seen_sleepy_lock(void)
+{
+	if (pv_sleepy_lock && pv_sleepy_lock_interval_ns)
+		this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+}
+
+static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+{
+	if (pv_sleepy_lock) {
+		if (pv_sleepy_lock_interval_ns)
+			this_cpu_write(sleepy_lock_seen_clock, sched_clock());
+		if (val & _Q_LOCKED_VAL) {
+			if (!(val & _Q_SLEEPY_VAL))
+				try_set_sleepy(lock, val);
+		}
+	}
+}
+
 static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
 {
 	int cpu = decode_tail_cpu(val);
@@ -215,6 +297,7 @@ static __always_inline bool __yield_to_locked_owner(struct qspinlock *lock, u32
 
 	spin_end();
 
+	seen_sleepy_owner(lock, val);
 	preempted = true;
 
 	/*
@@ -289,11 +372,12 @@ static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int
 }
 
 /* Called inside spin_begin() */
-static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
 {
 	int prev_cpu = decode_tail_cpu(val);
 	u32 yield_count;
 	int yield_cpu;
+	bool preempted = false;
 
 	if (!paravirt)
 		goto relax;
@@ -315,6 +399,9 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 
 	spin_end();
 
+	preempted = true;
+	seen_sleepy_node(lock, val);
+
 	smp_rmb();
 
 	if (yield_cpu == node->yield_cpu) {
@@ -322,7 +409,7 @@ static __always_inline void yield_to_prev(struct qspinlock *lock, struct qnode *
 			node->next->yield_cpu = yield_cpu;
 		yield_to_preempted(yield_cpu, yield_count);
 		spin_begin();
-		return;
+		return preempted;
 	}
 	spin_begin();
 
@@ -336,26 +423,31 @@ yield_prev:
 
 	spin_end();
 
+	preempted = true;
+	seen_sleepy_node(lock, val);
+
 	smp_rmb(); /* See __yield_to_locked_owner comment */
 
 	if (!node->locked) {
 		yield_to_preempted(prev_cpu, yield_count);
 		spin_begin();
-		return;
+		return preempted;
 	}
 	spin_begin();
 
 relax:
 	spin_cpu_relax();
+
+	return preempted;
 }
 
-static __always_inline bool steal_break(u32 val, int iters, bool paravirt)
+static __always_inline bool steal_break(u32 val, int iters, bool paravirt, bool sleepy)
 {
-	if (iters >= get_steal_spins(paravirt))
+	if (iters >= get_steal_spins(paravirt, sleepy))
 		return true;
 
 	if (IS_ENABLED(CONFIG_NUMA) &&
-	    (iters >= get_remote_steal_spins(paravirt))) {
+	    (iters >= get_remote_steal_spins(paravirt, sleepy))) {
 		int cpu = get_owner_cpu(val);
 		if (numa_node_id() != cpu_to_node(cpu))
 			return true;
@@ -365,6 +457,8 @@ static __always_inline bool steal_break(u32 val, int iters, bool paravirt)
 
 static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool paravirt)
 {
+	bool seen_preempted = false;
+	bool sleepy = false;
 	int iters = 0;
 	u32 val;
 
@@ -391,7 +485,25 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 			preempted = yield_to_locked_owner(lock, val, paravirt);
 		}
 
+		if (paravirt && pv_sleepy_lock) {
+			if (!sleepy) {
+				if (val & _Q_SLEEPY_VAL) {
+					seen_sleepy_lock();
+					sleepy = true;
+				} else if (recently_sleepy()) {
+					sleepy = true;
+				}
+			}
+			if (pv_sleepy_lock_sticky && seen_preempted &&
+			    !(val & _Q_SLEEPY_VAL)) {
+				if (try_set_sleepy(lock, val))
+					val |= _Q_SLEEPY_VAL;
+			}
+		}
+
 		if (preempted) {
+			seen_preempted = true;
+			sleepy = true;
 			if (!pv_spin_on_preempted_owner)
 				iters++;
 			/*
@@ -405,7 +517,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 		} else {
 			iters++;
 		}
-	} while (!steal_break(val, iters, paravirt));
+	} while (!steal_break(val, iters, paravirt, sleepy));
 
 	spin_end();
 
@@ -417,6 +529,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 	struct qnodes *qnodesp;
 	struct qnode *next, *node;
 	u32 val, old, tail;
+	bool seen_preempted = false;
+	bool sleepy = false;
 	bool mustq = false;
 	int idx;
 	int set_yield_cpu = -1;
@@ -461,8 +575,10 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 
 		/* Wait for mcs node lock to be released */
 		spin_begin();
-		while (!node->locked)
-			yield_to_prev(lock, node, old, paravirt);
+		while (!node->locked) {
+			if (yield_to_prev(lock, node, old, paravirt))
+				seen_preempted = true;
+		}
 		spin_end();
 
 		/* Clear out stale propagated yield_cpu */
@@ -472,8 +588,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		smp_rmb(); /* acquire barrier for the mcs lock */
 	}
 
-again:
 	/* We're at the head of the waitqueue, wait for the lock. */
+again:
 	spin_begin();
 	for (;;) {
 		bool preempted;
@@ -482,18 +598,40 @@ again:
 		if (!(val & _Q_LOCKED_VAL))
 			break;
 
+		if (paravirt && pv_sleepy_lock && maybe_stealers) {
+			if (!sleepy) {
+				if (val & _Q_SLEEPY_VAL) {
+					seen_sleepy_lock();
+					sleepy = true;
+				} else if (recently_sleepy()) {
+					sleepy = true;
+				}
+			}
+			if (pv_sleepy_lock_sticky && seen_preempted &&
+			    !(val & _Q_SLEEPY_VAL)) {
+				if (try_set_sleepy(lock, val))
+					val |= _Q_SLEEPY_VAL;
+			}
+		}
+
 		propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
 		preempted = yield_head_to_locked_owner(lock, val, paravirt);
 		if (!maybe_stealers)
 			continue;
-		if (preempted) {
+
+		if (preempted)
+			seen_preempted = true;
+
+		if (paravirt && preempted) {
+			sleepy = true;
+
 			if (!pv_spin_on_preempted_owner)
 				iters++;
 		} else {
 			iters++;
 		}
 
-		if (!mustq && iters >= get_head_spins(paravirt)) {
+		if (!mustq && iters >= get_head_spins(paravirt, sleepy)) {
 			mustq = true;
 			set_mustq(lock);
 			val |= _Q_MUST_Q_VAL;
@@ -690,6 +828,70 @@ static int pv_spin_on_preempted_owner_get(void *data, u64 *val)
 
 DEFINE_SIMPLE_ATTRIBUTE(fops_pv_spin_on_preempted_owner, pv_spin_on_preempted_owner_get, pv_spin_on_preempted_owner_set, "%llu\n");
 
+static int pv_sleepy_lock_set(void *data, u64 val)
+{
+	pv_sleepy_lock = !!val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock, pv_sleepy_lock_get, pv_sleepy_lock_set, "%llu\n");
+
+static int pv_sleepy_lock_sticky_set(void *data, u64 val)
+{
+	pv_sleepy_lock_sticky = !!val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_sticky_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_sticky;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_sticky, pv_sleepy_lock_sticky_get, pv_sleepy_lock_sticky_set, "%llu\n");
+
+static int pv_sleepy_lock_interval_ns_set(void *data, u64 val)
+{
+	pv_sleepy_lock_interval_ns = val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_interval_ns_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_interval_ns;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_interval_ns, pv_sleepy_lock_interval_ns_get, pv_sleepy_lock_interval_ns_set, "%llu\n");
+
+static int pv_sleepy_lock_factor_set(void *data, u64 val)
+{
+	pv_sleepy_lock_factor = val;
+
+	return 0;
+}
+
+static int pv_sleepy_lock_factor_get(void *data, u64 *val)
+{
+	*val = pv_sleepy_lock_factor;
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_sleepy_lock_factor, pv_sleepy_lock_factor_get, pv_sleepy_lock_factor_set, "%llu\n");
+
 static int pv_yield_prev_set(void *data, u64 val)
 {
 	pv_yield_prev = !!val;
@@ -747,6 +949,10 @@ static __init int spinlock_debugfs_init(void)
 		debugfs_create_file("qspl_pv_yield_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_owner);
 		debugfs_create_file("qspl_pv_yield_allow_steal", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_allow_steal);
 		debugfs_create_file("qspl_pv_spin_on_preempted_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_spin_on_preempted_owner);
+		debugfs_create_file("qspl_pv_sleepy_lock", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock);
+		debugfs_create_file("qspl_pv_sleepy_lock_sticky", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_sticky);
+		debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
+		debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
 		debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
 		debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
 		debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
-- 
GitLab


From 0b2199841a7952d01a717b465df028b40b2cf3e9 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sat, 26 Nov 2022 19:59:32 +1000
Subject: [PATCH 3194/4122] powerpc/qspinlock: add compile-time tuning
 adjustments

This adds compile-time options that allow the EH lock hint bit to be
enabled or disabled, and adds some new options that may or may not
help matters. To help with experimentation and tuning.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221126095932.1234527-18-npiggin@gmail.com
---
 arch/powerpc/include/asm/qspinlock.h | 61 ++++++++++++++++++++++++++--
 arch/powerpc/lib/qspinlock.c         | 39 ++++++++++++++++--
 2 files changed, 94 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
index 93b1c976db8a5..28a53fb69b381 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -6,15 +6,68 @@
 #include <asm/qspinlock_types.h>
 #include <asm/paravirt.h>
 
+#ifdef CONFIG_PPC64
+/*
+ * Use the EH=1 hint for accesses that result in the lock being acquired.
+ * The hardware is supposed to optimise this pattern by holding the lock
+ * cacheline longer, and releasing when a store to the same memory (the
+ * unlock) is performed.
+ */
+#define _Q_SPIN_EH_HINT 1
+#else
+#define _Q_SPIN_EH_HINT 0
+#endif
+
 /*
  * The trylock itself may steal. This makes trylocks slightly stronger, and
- * might make spin locks slightly more efficient when stealing.
+ * makes locks slightly more efficient when stealing.
  *
  * This is compile-time, so if true then there may always be stealers, so the
  * nosteal paths become unused.
  */
 #define _Q_SPIN_TRY_LOCK_STEAL 1
 
+/*
+ * Put a speculation barrier after testing the lock/node and finding it
+ * busy. Try to prevent pointless speculation in slow paths.
+ *
+ * Slows down the lockstorm microbenchmark with no stealing, where locking
+ * is purely FIFO through the queue. May have more benefit in real workload
+ * where speculating into the wrong place could have a greater cost.
+ */
+#define _Q_SPIN_SPEC_BARRIER 0
+
+#ifdef CONFIG_PPC64
+/*
+ * Execute a miso instruction after passing the MCS lock ownership to the
+ * queue head. Miso is intended to make stores visible to other CPUs sooner.
+ *
+ * This seems to make the lockstorm microbenchmark nospin test go slightly
+ * faster on POWER10, but disable for now.
+ */
+#define _Q_SPIN_MISO 0
+#else
+#define _Q_SPIN_MISO 0
+#endif
+
+#ifdef CONFIG_PPC64
+/*
+ * This executes miso after an unlock of the lock word, having ownership
+ * pass to the next CPU sooner. This will slow the uncontended path to some
+ * degree. Not evidence it helps yet.
+ */
+#define _Q_SPIN_MISO_UNLOCK 0
+#else
+#define _Q_SPIN_MISO_UNLOCK 0
+#endif
+
+/*
+ * Seems to slow down lockstorm microbenchmark, suspect queue node just
+ * has to become shared again right afterwards when its waiter spins on
+ * the lock field.
+ */
+#define _Q_SPIN_PREFETCH_NEXT 0
+
 static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
 {
 	return READ_ONCE(lock->val);
@@ -52,7 +105,7 @@ static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
 "2:									\n"
 	: "=&r" (prev)
 	: "r" (&lock->val), "r" (new),
-	  "i" (IS_ENABLED(CONFIG_PPC64))
+	  "i" (_Q_SPIN_EH_HINT)
 	: "cr0", "memory");
 
 	return likely(prev == 0);
@@ -76,7 +129,7 @@ static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
 "2:									\n"
 	: "=&r" (prev), "=&r" (tmp)
 	: "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
-	  "i" (IS_ENABLED(CONFIG_PPC64))
+	  "i" (_Q_SPIN_EH_HINT)
 	: "cr0", "memory");
 
 	return likely(!(prev & ~_Q_TAIL_CPU_MASK));
@@ -101,6 +154,8 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 static inline void queued_spin_unlock(struct qspinlock *lock)
 {
 	smp_store_release(&lock->locked, 0);
+	if (_Q_SPIN_MISO_UNLOCK)
+		asm volatile("miso" ::: "memory");
 }
 
 #define arch_spin_is_locked(l)		queued_spin_is_locked(l)
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 0f33a07c1d192..1cf5d3e752501 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -48,6 +48,12 @@ static bool pv_prod_head __read_mostly = false;
 static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
 static DEFINE_PER_CPU_ALIGNED(u64, sleepy_lock_seen_clock);
 
+#if _Q_SPIN_SPEC_BARRIER == 1
+#define spec_barrier() do { asm volatile("ori 31,31,0" ::: "memory"); } while (0)
+#else
+#define spec_barrier() do { } while (0)
+#endif
+
 static __always_inline bool recently_sleepy(void)
 {
 	/* pv_sleepy_lock is true when this is called */
@@ -137,7 +143,7 @@ static __always_inline u32 trylock_clean_tail(struct qspinlock *lock, u32 tail)
 	: "r" (&lock->val), "r"(tail), "r" (newval),
 	  "i" (_Q_LOCKED_VAL),
 	  "r" (_Q_TAIL_CPU_MASK),
-	  "i" (IS_ENABLED(CONFIG_PPC64))
+	  "i" (_Q_SPIN_EH_HINT)
 	: "cr0", "memory");
 
 	return prev;
@@ -475,6 +481,7 @@ static __always_inline bool try_to_steal_lock(struct qspinlock *lock, bool parav
 		val = READ_ONCE(lock->val);
 		if (val & _Q_MUST_Q_VAL)
 			break;
+		spec_barrier();
 
 		if (unlikely(!(val & _Q_LOCKED_VAL))) {
 			spin_end();
@@ -540,6 +547,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 
 	qnodesp = this_cpu_ptr(&qnodes);
 	if (unlikely(qnodesp->count >= MAX_NODES)) {
+		spec_barrier();
 		while (!queued_spin_trylock(lock))
 			cpu_relax();
 		return;
@@ -576,9 +584,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 		/* Wait for mcs node lock to be released */
 		spin_begin();
 		while (!node->locked) {
+			spec_barrier();
+
 			if (yield_to_prev(lock, node, old, paravirt))
 				seen_preempted = true;
 		}
+		spec_barrier();
 		spin_end();
 
 		/* Clear out stale propagated yield_cpu */
@@ -586,6 +597,17 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
 			node->yield_cpu = -1;
 
 		smp_rmb(); /* acquire barrier for the mcs lock */
+
+		/*
+		 * Generic qspinlocks have this prefetch here, but it seems
+		 * like it could cause additional line transitions because
+		 * the waiter will keep loading from it.
+		 */
+		if (_Q_SPIN_PREFETCH_NEXT) {
+			next = READ_ONCE(node->next);
+			if (next)
+				prefetchw(next);
+		}
 	}
 
 	/* We're at the head of the waitqueue, wait for the lock. */
@@ -597,6 +619,7 @@ again:
 		val = READ_ONCE(lock->val);
 		if (!(val & _Q_LOCKED_VAL))
 			break;
+		spec_barrier();
 
 		if (paravirt && pv_sleepy_lock && maybe_stealers) {
 			if (!sleepy) {
@@ -637,6 +660,7 @@ again:
 			val |= _Q_MUST_Q_VAL;
 		}
 	}
+	spec_barrier();
 	spin_end();
 
 	/* If we're the last queued, must clean up the tail. */
@@ -657,6 +681,7 @@ again:
 			cpu_relax();
 		spin_end();
 	}
+	spec_barrier();
 
 	/*
 	 * Unlock the next mcs waiter node. Release barrier is not required
@@ -668,10 +693,14 @@ again:
 	if (paravirt && pv_prod_head) {
 		int next_cpu = next->cpu;
 		WRITE_ONCE(next->locked, 1);
+		if (_Q_SPIN_MISO)
+			asm volatile("miso" ::: "memory");
 		if (vcpu_is_preempted(next_cpu))
 			prod_cpu(next_cpu);
 	} else {
 		WRITE_ONCE(next->locked, 1);
+		if (_Q_SPIN_MISO)
+			asm volatile("miso" ::: "memory");
 	}
 
 release:
@@ -686,12 +715,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock)
 	 * is passed as the paravirt argument to the functions.
 	 */
 	if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && is_shared_processor()) {
-		if (try_to_steal_lock(lock, true))
+		if (try_to_steal_lock(lock, true)) {
+			spec_barrier();
 			return;
+		}
 		queued_spin_lock_mcs_queue(lock, true);
 	} else {
-		if (try_to_steal_lock(lock, false))
+		if (try_to_steal_lock(lock, false)) {
+			spec_barrier();
 			return;
+		}
 		queued_spin_lock_mcs_queue(lock, false);
 	}
 }
-- 
GitLab


From c28c15b6d28a776538482101522cbcd9f906b15c Mon Sep 17 00:00:00 2001
From: "Christopher M. Riedl" <cmr@bluescreens.de>
Date: Wed, 9 Nov 2022 15:51:11 +1100
Subject: [PATCH 3195/4122] powerpc/code-patching: Use temporary mm for Radix
 MMU

x86 supports the notion of a temporary mm which restricts access to
temporary PTEs to a single CPU. A temporary mm is useful for situations
where a CPU needs to perform sensitive operations (such as patching a
STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
said mappings to other CPUs. Another benefit is that other CPU TLBs do
not need to be flushed when the temporary mm is torn down.

Mappings in the temporary mm can be set in the userspace portion of the
address-space.

Interrupts must be disabled while the temporary mm is in use. HW
breakpoints, which may have been set by userspace as watchpoints on
addresses now within the temporary mm, are saved and disabled when
loading the temporary mm. The HW breakpoints are restored when unloading
the temporary mm. All HW breakpoints are indiscriminately disabled while
the temporary mm is in use - this may include breakpoints set by perf.

Use the `poking_init` init hook to prepare a temporary mm and patching
address. Initialize the temporary mm using mm_alloc(). Choose a
randomized patching address inside the temporary mm userspace address
space. The patching address is randomized between PAGE_SIZE and
DEFAULT_MAP_WINDOW-PAGE_SIZE.

Bits of entropy with 64K page size on BOOK3S_64:

	bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE)

	PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB
	bits of entropy = log2(128TB / 64K)
	bits of entropy = 31

The upper limit is DEFAULT_MAP_WINDOW due to how the Book3s64 Hash MMU
operates - by default the space above DEFAULT_MAP_WINDOW is not
available. Currently the Hash MMU does not use a temporary mm so
technically this upper limit isn't necessary; however, a larger
randomization range does not further "harden" this overall approach and
future work may introduce patching with a temporary mm on Hash as well.

Randomization occurs only once during initialization for each CPU as it
comes online.

The patching page is mapped with PAGE_KERNEL to set EAA[0] for the PTE
which ignores the AMR (so no need to unlock/lock KUAP) according to
PowerISA v3.0b Figure 35 on Radix.

Based on x86 implementation:

commit 4fc19708b165
("x86/alternatives: Initialize temporary mm for patching")

and:

commit b3fd8e83ada0
("x86/alternatives: Use temporary mm for text poking")

From: Benjamin Gray <bgray@linux.ibm.com>

Synchronisation is done according to ISA 3.1B Book 3 Chapter 13
"Synchronization Requirements for Context Alterations". Switching the mm
is a change to the PID, which requires a CSI before and after the change,
and a hwsync between the last instruction that performs address
translation for an associated storage access.

Instruction fetch is an associated storage access, but the instruction
address mappings are not being changed, so it should not matter which
context they use. We must still perform a hwsync to guard arbitrary
prior code that may have accessed a userspace address.

TLB invalidation is local and VA specific. Local because only this core
used the patching mm, and VA specific because we only care that the
writable mapping is purged. Leaving the other mappings intact is more
efficient, especially when performing many code patches in a row (e.g.,
as ftrace would).

Signed-off-by: Christopher M. Riedl <cmr@bluescreens.de>
Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
[mpe: Use mm_alloc() per 107b6828a7cd ("x86/mm: Use mm_alloc() in poking_init()")]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-9-bgray@linux.ibm.com
---
 arch/powerpc/lib/code-patching.c | 177 ++++++++++++++++++++++++++++++-
 1 file changed, 172 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 3055eef7dcdcc..a1902241ff5dd 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -4,12 +4,17 @@
  */
 
 #include <linux/kprobes.h>
+#include <linux/mmu_context.h>
+#include <linux/random.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/cpuhotplug.h>
 #include <linux/uaccess.h>
 #include <linux/jump_label.h>
 
+#include <asm/debug.h>
+#include <asm/pgalloc.h>
+#include <asm/tlb.h>
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <asm/code-patching.h>
@@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm);
+static DEFINE_PER_CPU(unsigned long, cpu_patching_addr);
+static DEFINE_PER_CPU(pte_t *, cpu_patching_pte);
 
 static int map_patch_area(void *addr, unsigned long text_poke_addr);
 static void unmap_patch_area(unsigned long addr);
 
+static bool mm_patch_enabled(void)
+{
+	return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller to perform the CSI and hwsync before
+ * starting/stopping the temp mm.
+ */
+static struct mm_struct *start_using_temp_mm(struct mm_struct *temp_mm)
+{
+	struct mm_struct *orig_mm = current->active_mm;
+
+	lockdep_assert_irqs_disabled();
+	switch_mm_irqs_off(orig_mm, temp_mm, current);
+
+	WARN_ON(!mm_is_thread_local(temp_mm));
+
+	suspend_breakpoints();
+	return orig_mm;
+}
+
+static void stop_using_temp_mm(struct mm_struct *temp_mm,
+			       struct mm_struct *orig_mm)
+{
+	lockdep_assert_irqs_disabled();
+	switch_mm_irqs_off(temp_mm, orig_mm, current);
+	restore_breakpoints();
+}
+
 static int text_area_cpu_up(unsigned int cpu)
 {
 	struct vm_struct *area;
@@ -79,14 +127,86 @@ static int text_area_cpu_down(unsigned int cpu)
 	return 0;
 }
 
+static void put_patching_mm(struct mm_struct *mm, unsigned long patching_addr)
+{
+	struct mmu_gather tlb;
+
+	tlb_gather_mmu(&tlb, mm);
+	free_pgd_range(&tlb, patching_addr, patching_addr + PAGE_SIZE, 0, 0);
+	mmput(mm);
+}
+
+static int text_area_cpu_up_mm(unsigned int cpu)
+{
+	struct mm_struct *mm;
+	unsigned long addr;
+	pte_t *pte;
+	spinlock_t *ptl;
+
+	mm = mm_alloc();
+	if (WARN_ON(!mm))
+		goto fail_no_mm;
+
+	/*
+	 * Choose a random page-aligned address from the interval
+	 * [PAGE_SIZE .. DEFAULT_MAP_WINDOW - PAGE_SIZE].
+	 * The lower address bound is PAGE_SIZE to avoid the zero-page.
+	 */
+	addr = (1 + (get_random_long() % (DEFAULT_MAP_WINDOW / PAGE_SIZE - 2))) << PAGE_SHIFT;
+
+	/*
+	 * PTE allocation uses GFP_KERNEL which means we need to
+	 * pre-allocate the PTE here because we cannot do the
+	 * allocation during patching when IRQs are disabled.
+	 *
+	 * Using get_locked_pte() to avoid open coding, the lock
+	 * is unnecessary.
+	 */
+	pte = get_locked_pte(mm, addr, &ptl);
+	if (!pte)
+		goto fail_no_pte;
+	pte_unmap_unlock(pte, ptl);
+
+	this_cpu_write(cpu_patching_mm, mm);
+	this_cpu_write(cpu_patching_addr, addr);
+	this_cpu_write(cpu_patching_pte, pte);
+
+	return 0;
+
+fail_no_pte:
+	put_patching_mm(mm, addr);
+fail_no_mm:
+	return -ENOMEM;
+}
+
+static int text_area_cpu_down_mm(unsigned int cpu)
+{
+	put_patching_mm(this_cpu_read(cpu_patching_mm),
+			this_cpu_read(cpu_patching_addr));
+
+	this_cpu_write(cpu_patching_mm, NULL);
+	this_cpu_write(cpu_patching_addr, 0);
+	this_cpu_write(cpu_patching_pte, NULL);
+
+	return 0;
+}
+
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
 
 void __init poking_init(void)
 {
-	int ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
-				    "powerpc/text_poke:online",
-				    text_area_cpu_up,
-				    text_area_cpu_down);
+	int ret;
+
+	if (mm_patch_enabled())
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					"powerpc/text_poke_mm:online",
+					text_area_cpu_up_mm,
+					text_area_cpu_down_mm);
+	else
+		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+					"powerpc/text_poke:online",
+					text_area_cpu_up,
+					text_area_cpu_down);
 
 	/* cpuhp_setup_state returns >= 0 on success */
 	if (WARN_ON(ret < 0))
@@ -148,6 +268,50 @@ static void unmap_patch_area(unsigned long addr)
 	flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
+static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+{
+	int err;
+	u32 *patch_addr;
+	unsigned long text_poke_addr;
+	pte_t *pte;
+	unsigned long pfn = get_patch_pfn(addr);
+	struct mm_struct *patching_mm;
+	struct mm_struct *orig_mm;
+
+	patching_mm = __this_cpu_read(cpu_patching_mm);
+	pte = __this_cpu_read(cpu_patching_pte);
+	text_poke_addr = __this_cpu_read(cpu_patching_addr);
+	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+	/* order PTE update before use, also serves as the hwsync */
+	asm volatile("ptesync": : :"memory");
+
+	/* order context switch after arbitrary prior code */
+	isync();
+
+	orig_mm = start_using_temp_mm(patching_mm);
+
+	err = __patch_instruction(addr, instr, patch_addr);
+
+	/* hwsync performed by __patch_instruction (sync) if successful */
+	if (err)
+		mb();  /* sync */
+
+	/* context synchronisation performed by __patch_instruction (isync or exception) */
+	stop_using_temp_mm(patching_mm, orig_mm);
+
+	pte_clear(patching_mm, text_poke_addr, pte);
+	/*
+	 * ptesync to order PTE update before TLB invalidation done
+	 * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+	 */
+	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+	return err;
+}
+
 static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 {
 	int err;
@@ -187,7 +351,10 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
 		return raw_patch_instruction(addr, instr);
 
 	local_irq_save(flags);
-	err = __do_patch_instruction(addr, instr);
+	if (mm_patch_enabled())
+		err = __do_patch_instruction_mm(addr, instr);
+	else
+		err = __do_patch_instruction(addr, instr);
 	local_irq_restore(flags);
 
 	return err;
-- 
GitLab


From 2f228ee1ade5d8d1f26cf94863a36c5693023c58 Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Wed, 9 Nov 2022 15:51:12 +1100
Subject: [PATCH 3196/4122] powerpc/code-patching: Consolidate and cache
 per-cpu patching context

With the temp mm context support, there are CPU local variables to hold
the patch address and pte. Use these in the non-temp mm path as well
instead of adding a level of indirection through the text_poke_area
vm_struct and pointer chasing the pte.

As both paths use these fields now, there is no need to let unreferenced
variables be dropped by the compiler, so it is cleaner to merge them
into a single context struct. This has the additional benefit of
removing a redundant CPU local pointer, as only one of cpu_patching_mm /
text_poke_area is ever used, while remaining well-typed. It also groups
each CPU's data into a single cacheline.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
[mpe: Shorten name to 'area' as suggested by Christophe]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221109045112.187069-10-bgray@linux.ibm.com
---
 arch/powerpc/lib/code-patching.c | 49 +++++++++++++++++++-------------
 1 file changed, 30 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index a1902241ff5dd..5b8f87db12176 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -48,10 +48,16 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
-static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm);
-static DEFINE_PER_CPU(unsigned long, cpu_patching_addr);
-static DEFINE_PER_CPU(pte_t *, cpu_patching_pte);
+struct patch_context {
+	union {
+		struct vm_struct *area;
+		struct mm_struct *mm;
+	};
+	unsigned long addr;
+	pte_t *pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
 
 static int map_patch_area(void *addr, unsigned long text_poke_addr);
 static void unmap_patch_area(unsigned long addr);
@@ -116,14 +122,19 @@ static int text_area_cpu_up(unsigned int cpu)
 
 	unmap_patch_area(addr);
 
-	this_cpu_write(text_poke_area, area);
+	this_cpu_write(cpu_patching_context.area, area);
+	this_cpu_write(cpu_patching_context.addr, addr);
+	this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
 
 	return 0;
 }
 
 static int text_area_cpu_down(unsigned int cpu)
 {
-	free_vm_area(this_cpu_read(text_poke_area));
+	free_vm_area(this_cpu_read(cpu_patching_context.area));
+	this_cpu_write(cpu_patching_context.area, NULL);
+	this_cpu_write(cpu_patching_context.addr, 0);
+	this_cpu_write(cpu_patching_context.pte, NULL);
 	return 0;
 }
 
@@ -167,9 +178,9 @@ static int text_area_cpu_up_mm(unsigned int cpu)
 		goto fail_no_pte;
 	pte_unmap_unlock(pte, ptl);
 
-	this_cpu_write(cpu_patching_mm, mm);
-	this_cpu_write(cpu_patching_addr, addr);
-	this_cpu_write(cpu_patching_pte, pte);
+	this_cpu_write(cpu_patching_context.mm, mm);
+	this_cpu_write(cpu_patching_context.addr, addr);
+	this_cpu_write(cpu_patching_context.pte, pte);
 
 	return 0;
 
@@ -181,12 +192,12 @@ fail_no_mm:
 
 static int text_area_cpu_down_mm(unsigned int cpu)
 {
-	put_patching_mm(this_cpu_read(cpu_patching_mm),
-			this_cpu_read(cpu_patching_addr));
+	put_patching_mm(this_cpu_read(cpu_patching_context.mm),
+			this_cpu_read(cpu_patching_context.addr));
 
-	this_cpu_write(cpu_patching_mm, NULL);
-	this_cpu_write(cpu_patching_addr, 0);
-	this_cpu_write(cpu_patching_pte, NULL);
+	this_cpu_write(cpu_patching_context.mm, NULL);
+	this_cpu_write(cpu_patching_context.addr, 0);
+	this_cpu_write(cpu_patching_context.pte, NULL);
 
 	return 0;
 }
@@ -278,9 +289,9 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
 	struct mm_struct *patching_mm;
 	struct mm_struct *orig_mm;
 
-	patching_mm = __this_cpu_read(cpu_patching_mm);
-	pte = __this_cpu_read(cpu_patching_pte);
-	text_poke_addr = __this_cpu_read(cpu_patching_addr);
+	patching_mm = __this_cpu_read(cpu_patching_context.mm);
+	pte = __this_cpu_read(cpu_patching_context.pte);
+	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
@@ -320,10 +331,10 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	pte_t *pte;
 	unsigned long pfn = get_patch_pfn(addr);
 
-	text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK;
+	text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
-	pte = virt_to_kpte(text_poke_addr);
+	pte = __this_cpu_read(cpu_patching_context.pte);
 	__set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
 	/* See ptesync comment in radix__set_pte_at() */
 	if (radix_enabled())
-- 
GitLab


From f9231a996e229c13d23f907352c2cea84bd1c30a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2022 14:15:36 +1000
Subject: [PATCH 3197/4122] module: add module_elf_check_arch for
 module-specific checks

The elf_check_arch() function is also used to test compatibility of
usermode binaries. Kernel modules may have more specific requirements,
for example powerpc would like to test for ABI version compatibility.

Add a weak module_elf_check_arch() that defaults to true, and call it
from elf_validity_check().

Signed-off-by: Jessica Yu <jeyu@kernel.org>
[np: added changelog, adjust name, rebase]
Acked-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041539.1742489-2-npiggin@gmail.com
---
 include/linux/moduleloader.h |  3 +++
 kernel/module/main.c         | 10 ++++++++++
 2 files changed, 13 insertions(+)

diff --git a/include/linux/moduleloader.h b/include/linux/moduleloader.h
index 9e09d11ffe5b3..7b4587a191895 100644
--- a/include/linux/moduleloader.h
+++ b/include/linux/moduleloader.h
@@ -13,6 +13,9 @@
  * must be implemented by each architecture.
  */
 
+/* arch may override to do additional checking of ELF header architecture */
+bool module_elf_check_arch(Elf_Ehdr *hdr);
+
 /* Adjust arch-specific sections.  Return 0 on success.  */
 int module_frob_arch_sections(Elf_Ehdr *hdr,
 			      Elf_Shdr *sechdrs,
diff --git a/kernel/module/main.c b/kernel/module/main.c
index d02d39c7174e1..7b3f6fb0d4282 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -1674,6 +1674,11 @@ static int elf_validity_check(struct load_info *info)
 		       info->hdr->e_machine);
 		goto no_exec;
 	}
+	if (!module_elf_check_arch(info->hdr)) {
+		pr_err("Invalid module architecture in ELF header: %u\n",
+		       info->hdr->e_machine);
+		goto no_exec;
+	}
 	if (info->hdr->e_shentsize != sizeof(Elf_Shdr)) {
 		pr_err("Invalid ELF section header size\n");
 		goto no_exec;
@@ -2247,6 +2252,11 @@ static void flush_module_icache(const struct module *mod)
 			   (unsigned long)mod->core_layout.base + mod->core_layout.size);
 }
 
+bool __weak module_elf_check_arch(Elf_Ehdr *hdr)
+{
+	return true;
+}
+
 int __weak module_frob_arch_sections(Elf_Ehdr *hdr,
 				     Elf_Shdr *sechdrs,
 				     char *secstrings,
-- 
GitLab


From de3d098dd1fc635535e3689c5d4aa0684242adde Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2022 14:15:37 +1000
Subject: [PATCH 3198/4122] powerpc/64: Add module check for ELF ABI version

Override the generic module ELF check to provide a check for the ELF ABI
version. This becomes important if we allow big-endian ELF ABI V2 builds
but it doesn't hurt to check now.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041539.1742489-3-npiggin@gmail.com
---
 arch/powerpc/kernel/module_64.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 7e45dc98df8a1..ff045644f13ff 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -31,6 +31,16 @@
    this, and makes other things simpler.  Anton?
    --RR.  */
 
+bool module_elf_check_arch(Elf_Ehdr *hdr)
+{
+	unsigned long abi_level = hdr->e_flags & 0x3;
+
+	if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2))
+		return abi_level == 2;
+	else
+		return abi_level < 2;
+}
+
 #ifdef CONFIG_PPC64_ELF_ABI_V2
 
 static func_desc_t func_desc(unsigned long addr)
-- 
GitLab


From 505ea33089dcfc3ee3201b0fcb94751165805413 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2022 14:15:38 +1000
Subject: [PATCH 3199/4122] powerpc/64: Add big-endian ELFv2 flavour to crypto
 VMX asm generation

This allows asm generation for big-endian ELFv2 builds.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041539.1742489-4-npiggin@gmail.com
---
 drivers/crypto/vmx/Makefile     | 12 +++++++++++-
 drivers/crypto/vmx/ppc-xlate.pl | 10 ++++++----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile
index 2560cfea1dec2..e33c7238e7f8b 100644
--- a/drivers/crypto/vmx/Makefile
+++ b/drivers/crypto/vmx/Makefile
@@ -2,8 +2,18 @@
 obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
 vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
 
+ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
+override flavour := linux-ppc64le
+else
+ifdef CONFIG_PPC64_ELF_ABI_V2
+override flavour := linux-ppc64-elfv2
+else
+override flavour := linux-ppc64
+endif
+endif
+
 quiet_cmd_perl = PERL    $@
-      cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@
+      cmd_perl = $(PERL) $< $(flavour) > $@
 
 targets += aesp8-ppc.S ghashp8-ppc.S
 
diff --git a/drivers/crypto/vmx/ppc-xlate.pl b/drivers/crypto/vmx/ppc-xlate.pl
index 36db2ef09e5bf..b583898c11ae8 100644
--- a/drivers/crypto/vmx/ppc-xlate.pl
+++ b/drivers/crypto/vmx/ppc-xlate.pl
@@ -9,6 +9,8 @@ open STDOUT,">$output" || die "can't open $output: $!";
 
 my %GLOBALS;
 my $dotinlocallabels=($flavour=~/linux/)?1:0;
+my $elfv2abi=(($flavour =~ /linux-ppc64le/) or ($flavour =~ /linux-ppc64-elfv2/))?1:0;
+my $dotfunctions=($elfv2abi=~1)?0:1;
 
 ################################################################
 # directives which need special treatment on different platforms
@@ -40,7 +42,7 @@ my $globl = sub {
 };
 my $text = sub {
     my $ret = ($flavour =~ /aix/) ? ".csect\t.text[PR],7" : ".text";
-    $ret = ".abiversion	2\n".$ret	if ($flavour =~ /linux.*64le/);
+    $ret = ".abiversion	2\n".$ret	if ($elfv2abi);
     $ret;
 };
 my $machine = sub {
@@ -56,8 +58,8 @@ my $size = sub {
     if ($flavour =~ /linux/)
     {	shift;
 	my $name = shift; $name =~ s|^[\.\_]||;
-	my $ret  = ".size	$name,.-".($flavour=~/64$/?".":"").$name;
-	$ret .= "\n.size	.$name,.-.$name" if ($flavour=~/64$/);
+	my $ret  = ".size	$name,.-".($dotfunctions?".":"").$name;
+	$ret .= "\n.size	.$name,.-.$name" if ($dotfunctions);
 	$ret;
     }
     else
@@ -142,7 +144,7 @@ my $vmr = sub {
 
 # Some ABIs specify vrsave, special-purpose register #256, as reserved
 # for system use.
-my $no_vrsave = ($flavour =~ /linux-ppc64le/);
+my $no_vrsave = ($elfv2abi);
 my $mtspr = sub {
     my ($f,$idx,$ra) = @_;
     if ($idx == 256 && $no_vrsave) {
-- 
GitLab


From 5017b45946722bdd20ac255c9ae7273b78d1f12e Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 28 Nov 2022 14:15:39 +1000
Subject: [PATCH 3200/4122] powerpc/64: Option to build big-endian with ELFv2
 ABI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Provide an option to build big-endian kernels using the ELFv2 ABI. This
works on GCC only for now. Clang is rumored to support this, but core
build files need updating first, at least.

This gives big-endian kernels useful advantages of the ELFv2 ABI, e.g.,
less stack usage, -mprofile-kernel support, better compatibility with
eBPF tools.

BE+ELFv2 is not officially supported by the GNU toolchain, but it works
fine in testing and has been used by some userspace for some time (e.g.,
Void Linux).

Tested-by: Michal Suchánek <msuchanek@suse.de>
Reviewed-by: Segher Boessenkool <segher@kernel.crashing.org>
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Joel Stanley <joel@jms.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041539.1742489-5-npiggin@gmail.com
---
 arch/powerpc/Kconfig                   | 21 +++++++++++++++++++++
 arch/powerpc/platforms/Kconfig.cputype |  4 ++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index abaf1ef1795c0..1a134c9769f8b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1,6 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 source "arch/powerpc/platforms/Kconfig.cputype"
 
+config CC_HAS_ELFV2
+	def_bool PPC64 && $(cc-option, -mabi=elfv2)
+
 config 32BIT
 	bool
 	default y if PPC32
@@ -586,6 +589,24 @@ config KEXEC_FILE
 config ARCH_HAS_KEXEC_PURGATORY
 	def_bool KEXEC_FILE
 
+config PPC64_BIG_ENDIAN_ELF_ABI_V2
+	bool "Build big-endian kernel using ELF ABI V2 (EXPERIMENTAL)"
+	depends on PPC64 && CPU_BIG_ENDIAN
+	depends on CC_HAS_ELFV2
+	depends on LD_IS_BFD && LD_VERSION >= 22400
+	default n
+	help
+	  This builds the kernel image using the "Power Architecture 64-Bit ELF
+	  V2 ABI Specification", which has a reduced stack overhead and faster
+	  function calls. This internal kernel ABI option does not affect
+          userspace compatibility.
+
+	  The V2 ABI is standard for 64-bit little-endian, but for big-endian
+	  it is less well tested by kernel and toolchain. However some distros
+	  build userspace this way, and it can produce a functioning kernel.
+
+	  This requires GCC and binutils 2.24 or newer.
+
 config RELOCATABLE
 	bool "Build a relocatable kernel"
 	depends on PPC64 || (FLATMEM && (44x || PPC_85xx))
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 7bac213b4125d..9563336e3348f 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -580,10 +580,10 @@ config CPU_LITTLE_ENDIAN
 endchoice
 
 config PPC64_ELF_ABI_V1
-	def_bool PPC64 && CPU_BIG_ENDIAN
+	def_bool PPC64 && (CPU_BIG_ENDIAN && !PPC64_BIG_ENDIAN_ELF_ABI_V2)
 
 config PPC64_ELF_ABI_V2
-	def_bool PPC64 && CPU_LITTLE_ENDIAN
+	def_bool PPC64 && !PPC64_ELF_ABI_V1
 
 config PPC64_BOOT_WRAPPER
 	def_bool n
-- 
GitLab


From d6aee468e4ecbfec46a3eafae4d31d6efc0d4da4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:27 +1000
Subject: [PATCH 3201/4122] powerpc/64: Remove asm interrupt tracing call
 helpers

These are now unused. Remove.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-3-npiggin@gmail.com
---
 arch/powerpc/include/asm/irqflags.h | 58 -----------------------------
 1 file changed, 58 deletions(-)

diff --git a/arch/powerpc/include/asm/irqflags.h b/arch/powerpc/include/asm/irqflags.h
index 1a6c1ce17735a..47d46712928ac 100644
--- a/arch/powerpc/include/asm/irqflags.h
+++ b/arch/powerpc/include/asm/irqflags.h
@@ -11,64 +11,6 @@
  */
 #include <asm/hw_irq.h>
 
-#else
-#ifdef CONFIG_TRACE_IRQFLAGS
-#ifdef CONFIG_IRQSOFF_TRACER
-/*
- * Since the ftrace irqsoff latency trace checks CALLER_ADDR1,
- * which is the stack frame here, we need to force a stack frame
- * in case we came from user space.
- */
-#define TRACE_WITH_FRAME_BUFFER(func)		\
-	mflr	r0;				\
-	stdu	r1, -STACK_FRAME_OVERHEAD(r1);	\
-	std	r0, 16(r1);			\
-	stdu	r1, -STACK_FRAME_OVERHEAD(r1);	\
-	bl func;				\
-	ld	r1, 0(r1);			\
-	ld	r1, 0(r1);
-#else
-#define TRACE_WITH_FRAME_BUFFER(func)		\
-	bl func;
-#endif
-
-/*
- * These are calls to C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define TRACE_ENABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_on)
-#define TRACE_DISABLE_INTS	TRACE_WITH_FRAME_BUFFER(trace_hardirqs_off)
-
-/*
- * This is used by assembly code to soft-disable interrupts first and
- * reconcile irq state.
- *
- * NB: This may call C code, so the caller must be prepared for volatiles to
- * be clobbered.
- */
-#define RECONCILE_IRQ_STATE(__rA, __rB)		\
-	lbz	__rA,PACAIRQSOFTMASK(r13);	\
-	lbz	__rB,PACAIRQHAPPENED(r13);	\
-	andi.	__rA,__rA,IRQS_DISABLED;	\
-	li	__rA,IRQS_DISABLED;		\
-	ori	__rB,__rB,PACA_IRQ_HARD_DIS;	\
-	stb	__rB,PACAIRQHAPPENED(r13);	\
-	bne	44f;				\
-	stb	__rA,PACAIRQSOFTMASK(r13);	\
-	TRACE_DISABLE_INTS;			\
-44:
-
-#else
-#define TRACE_ENABLE_INTS
-#define TRACE_DISABLE_INTS
-
-#define RECONCILE_IRQ_STATE(__rA, __rB)		\
-	lbz	__rA,PACAIRQHAPPENED(r13);	\
-	li	__rB,IRQS_DISABLED;		\
-	ori	__rA,__rA,PACA_IRQ_HARD_DIS;	\
-	stb	__rB,PACAIRQSOFTMASK(r13);	\
-	stb	__rA,PACAIRQHAPPENED(r13)
-#endif
 #endif
 
 #endif
-- 
GitLab


From 32c5209214bd8d4f8c4e9d9b630ef4c671f58e79 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:28 +1000
Subject: [PATCH 3202/4122] powerpc/perf: callchain validate kernel stack
 pointer bounds

The interrupt frame detection and loads from the hypothetical pt_regs
are not bounds-checked. The next-frame validation only bounds-checks
STACK_FRAME_OVERHEAD, which does not include the pt_regs. Add another
test for this.

The user could set r1 to be equal to the address matching the first
interrupt frame - STACK_INT_FRAME_SIZE, which is in the previous page
due to the kernel redzone, and induce the kernel to load the marker from
there. Possibly this could cause a crash at least. If the user could
induce the previous page to contain a valid marker, then it might be
able to direct perf to read specific memory addresses in a way that
could be transmitted back to the user in the perf data.

Fixes: 20002ded4d93 ("perf_counter: powerpc: Add callchain support")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-4-npiggin@gmail.com
---
 arch/powerpc/perf/callchain.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 082f6d0308a47..8718289c051dd 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -61,6 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 		next_sp = fp[0];
 
 		if (next_sp == sp + STACK_INT_FRAME_SIZE &&
+		    validate_sp(sp, current, STACK_INT_FRAME_SIZE) &&
 		    fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
 			/*
 			 * This looks like an interrupt frame for an
-- 
GitLab


From bc0677363d0ffaec0c56685291e97b080116976c Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:29 +1000
Subject: [PATCH 3203/4122] powerpc: Rearrange copy_thread child stack creation

This makes it a bit clearer where the stack frame is created, and will
allow easier use of some of the stack offset constants in a later
change.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-5-npiggin@gmail.com
---
 arch/powerpc/kernel/process.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 5265da2d80340..f93703ea4a127 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1755,13 +1755,16 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 
 	klp_init_thread_info(p);
 
+	/* Create initial stack frame. */
+	sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD);
+	((unsigned long *)sp)[0] = 0;
+
 	/* Copy registers */
-	sp -= sizeof(struct pt_regs);
-	childregs = (struct pt_regs *) sp;
+	childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
 	if (unlikely(args->fn)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
-		childregs->gpr[1] = sp + sizeof(struct pt_regs);
+		childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD);
 		/* function */
 		if (args->fn)
 			childregs->gpr[14] = ppc_function_entry((void *)args->fn);
@@ -1796,7 +1799,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 			f = ret_from_fork;
 	}
 	childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
-	sp -= STACK_FRAME_OVERHEAD;
 
 	/*
 	 * The way this works is that at some point in the future
@@ -1806,7 +1808,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	 * do some house keeping and then return from the fork or clone
 	 * system call, using the stack frame created above.
 	 */
-	((unsigned long *)sp)[0] = 0;
 	sp -= sizeof(struct pt_regs);
 	kregs = (struct pt_regs *) sp;
 	sp -= STACK_FRAME_OVERHEAD;
-- 
GitLab


From baa49d81a94bb4170e7f2f4d97016772117d0f60 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:30 +1000
Subject: [PATCH 3204/4122] powerpc/pseries: hvcall stack frame overhead

This call may use the min size stack frame. The scratch space used is
in the caller's parameter area frame, not this function's frame.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-6-npiggin@gmail.com
---
 arch/powerpc/platforms/pseries/hvCall.S | 38 +++++++++++++------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S
index 762eb15d3bd42..783c16ad648b8 100644
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -27,7 +27,9 @@ hcall_tracepoint_refcount:
 
 /*
  * precall must preserve all registers.  use unused STK_PARAM()
- * areas to save snapshots and opcode.
+ * areas to save snapshots and opcode. STK_PARAM() in the caller's
+ * frame will be available even on ELFv2 because these are all
+ * variadic functions.
  */
 #define HCALL_INST_PRECALL(FIRST_REG)				\
 	mflr	r0;						\
@@ -41,29 +43,29 @@ hcall_tracepoint_refcount:
 	std	r10,STK_PARAM(R10)(r1);				\
 	std	r0,16(r1);					\
 	addi	r4,r1,STK_PARAM(FIRST_REG);			\
-	stdu	r1,-STACK_FRAME_OVERHEAD(r1);			\
+	stdu	r1,-STACK_FRAME_MIN_SIZE(r1);			\
 	bl	__trace_hcall_entry;				\
-	ld	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
-	ld	r4,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1);	\
-	ld	r5,STACK_FRAME_OVERHEAD+STK_PARAM(R5)(r1);	\
-	ld	r6,STACK_FRAME_OVERHEAD+STK_PARAM(R6)(r1);	\
-	ld	r7,STACK_FRAME_OVERHEAD+STK_PARAM(R7)(r1);	\
-	ld	r8,STACK_FRAME_OVERHEAD+STK_PARAM(R8)(r1);	\
-	ld	r9,STACK_FRAME_OVERHEAD+STK_PARAM(R9)(r1);	\
-	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R10)(r1)
+	ld	r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
+	ld	r4,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1);	\
+	ld	r5,STACK_FRAME_MIN_SIZE+STK_PARAM(R5)(r1);	\
+	ld	r6,STACK_FRAME_MIN_SIZE+STK_PARAM(R6)(r1);	\
+	ld	r7,STACK_FRAME_MIN_SIZE+STK_PARAM(R7)(r1);	\
+	ld	r8,STACK_FRAME_MIN_SIZE+STK_PARAM(R8)(r1);	\
+	ld	r9,STACK_FRAME_MIN_SIZE+STK_PARAM(R9)(r1);	\
+	ld	r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R10)(r1)
 
 /*
  * postcall is performed immediately before function return which
  * allows liberal use of volatile registers.
  */
 #define __HCALL_INST_POSTCALL					\
-	ld	r0,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
-	std	r3,STACK_FRAME_OVERHEAD+STK_PARAM(R3)(r1);	\
+	ld	r0,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
+	std	r3,STACK_FRAME_MIN_SIZE+STK_PARAM(R3)(r1);	\
 	mr	r4,r3;						\
 	mr	r3,r0;						\
 	bl	__trace_hcall_exit;				\
-	ld	r0,STACK_FRAME_OVERHEAD+16(r1);			\
-	addi	r1,r1,STACK_FRAME_OVERHEAD;			\
+	ld	r0,STACK_FRAME_MIN_SIZE+16(r1);			\
+	addi	r1,r1,STACK_FRAME_MIN_SIZE;			\
 	ld	r3,STK_PARAM(R3)(r1);				\
 	mtlr	r0
 
@@ -303,14 +305,14 @@ plpar_hcall9_trace:
 	mr	r7,r8
 	mr	r8,r9
 	mr	r9,r10
-	ld	r10,STACK_FRAME_OVERHEAD+STK_PARAM(R11)(r1)
-	ld	r11,STACK_FRAME_OVERHEAD+STK_PARAM(R12)(r1)
-	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R13)(r1)
+	ld	r10,STACK_FRAME_MIN_SIZE+STK_PARAM(R11)(r1)
+	ld	r11,STACK_FRAME_MIN_SIZE+STK_PARAM(R12)(r1)
+	ld	r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R13)(r1)
 
 	HVSC
 
 	mr	r0,r12
-	ld	r12,STACK_FRAME_OVERHEAD+STK_PARAM(R4)(r1)
+	ld	r12,STACK_FRAME_MIN_SIZE+STK_PARAM(R4)(r1)
 	std	r4,0(r12)
 	std	r5,8(r12)
 	std	r6,16(r12)
-- 
GitLab


From 37195b820d32c23bdefce3f460ed7de48a57e5e4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:31 +1000
Subject: [PATCH 3205/4122] powerpc: simplify ppc_save_regs

Adjust the pt_regs pointer so the interrupt frame offsets can be used
to save registers.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-7-npiggin@gmail.com
---
 arch/powerpc/kernel/ppc_save_regs.S | 57 ++++++++---------------------
 1 file changed, 15 insertions(+), 42 deletions(-)

diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 2d4d21bb46a97..6e86f3bf46735 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -21,60 +21,33 @@
  * different ABIs, though).
  */
 _GLOBAL(ppc_save_regs)
-	PPC_STL	r0,0*SZL(r3)
+	/* This allows stack frame accessor macros and offsets to be used */
+	subi	r3,r3,STACK_FRAME_OVERHEAD
+	PPC_STL	r0,GPR0(r3)
 #ifdef CONFIG_PPC32
-	stmw	r2, 2*SZL(r3)
+	stmw	r2,GPR2(r3)
 #else
-	PPC_STL	r2,2*SZL(r3)
-	PPC_STL	r3,3*SZL(r3)
-	PPC_STL	r4,4*SZL(r3)
-	PPC_STL	r5,5*SZL(r3)
-	PPC_STL	r6,6*SZL(r3)
-	PPC_STL	r7,7*SZL(r3)
-	PPC_STL	r8,8*SZL(r3)
-	PPC_STL	r9,9*SZL(r3)
-	PPC_STL	r10,10*SZL(r3)
-	PPC_STL	r11,11*SZL(r3)
-	PPC_STL	r12,12*SZL(r3)
-	PPC_STL	r13,13*SZL(r3)
-	PPC_STL	r14,14*SZL(r3)
-	PPC_STL	r15,15*SZL(r3)
-	PPC_STL	r16,16*SZL(r3)
-	PPC_STL	r17,17*SZL(r3)
-	PPC_STL	r18,18*SZL(r3)
-	PPC_STL	r19,19*SZL(r3)
-	PPC_STL	r20,20*SZL(r3)
-	PPC_STL	r21,21*SZL(r3)
-	PPC_STL	r22,22*SZL(r3)
-	PPC_STL	r23,23*SZL(r3)
-	PPC_STL	r24,24*SZL(r3)
-	PPC_STL	r25,25*SZL(r3)
-	PPC_STL	r26,26*SZL(r3)
-	PPC_STL	r27,27*SZL(r3)
-	PPC_STL	r28,28*SZL(r3)
-	PPC_STL	r29,29*SZL(r3)
-	PPC_STL	r30,30*SZL(r3)
-	PPC_STL	r31,31*SZL(r3)
+	SAVE_GPRS(2, 31, r3)
 	lbz	r0,PACAIRQSOFTMASK(r13)
-	PPC_STL	r0,SOFTE-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,SOFTE(r3)
 #endif
 	/* go up one stack frame for SP */
 	PPC_LL	r4,0(r1)
-	PPC_STL	r4,1*SZL(r3)
+	PPC_STL	r4,GPR1(r3)
 	/* get caller's LR */
 	PPC_LL	r0,LRSAVE(r4)
-	PPC_STL	r0,_LINK-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_LINK(r3)
 	mflr	r0
-	PPC_STL	r0,_NIP-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_NIP(r3)
 	mfmsr	r0
-	PPC_STL	r0,_MSR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_MSR(r3)
 	mfctr	r0
-	PPC_STL	r0,_CTR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_CTR(r3)
 	mfxer	r0
-	PPC_STL	r0,_XER-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_XER(r3)
 	mfcr	r0
-	PPC_STL	r0,_CCR-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_CCR(r3)
 	li	r0,0
-	PPC_STL	r0,_TRAP-STACK_FRAME_OVERHEAD(r3)
-	PPC_STL	r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3)
+	PPC_STL	r0,_TRAP(r3)
+	PPC_STL	r0,ORIG_GPR3(r3)
 	blr
-- 
GitLab


From c03be0a3f3cc656eab5c427b78959b8f1b169a11 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:32 +1000
Subject: [PATCH 3206/4122] powerpc: add definition for pt_regs offset within
 an interrupt frame

This is a common offset that currently uses the overloaded
STACK_FRAME_OVERHEAD constant. It's easier to read and more
flexible to use a specific regs offset for this.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-8-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h             |  2 +
 arch/powerpc/kernel/asm-offsets.c             |  7 +-
 arch/powerpc/kernel/entry_32.S                |  6 +-
 arch/powerpc/kernel/exceptions-64e.S          | 42 +++++-----
 arch/powerpc/kernel/exceptions-64s.S          | 80 +++++++++----------
 arch/powerpc/kernel/head_32.h                 |  2 +-
 arch/powerpc/kernel/head_85xx.S               |  4 +-
 arch/powerpc/kernel/head_booke.h              |  2 +-
 arch/powerpc/kernel/interrupt_64.S            | 22 ++---
 arch/powerpc/kernel/kgdb.c                    |  2 +-
 arch/powerpc/kernel/optprobes_head.S          |  4 +-
 arch/powerpc/kernel/ppc_save_regs.S           |  2 +-
 arch/powerpc/kernel/process.c                 |  4 +-
 arch/powerpc/kernel/tm.S                      |  8 +-
 arch/powerpc/kernel/trace/ftrace_mprofile.S   |  2 +-
 .../lib/test_emulate_step_exec_instr.S        |  2 +-
 arch/powerpc/perf/callchain.c                 |  2 +-
 arch/powerpc/xmon/xmon.c                      |  7 +-
 18 files changed, 100 insertions(+), 100 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 2efec6d87049e..a4ae67aa9b76d 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -124,6 +124,7 @@ struct pt_regs
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_FRAME_MARKER	12
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -143,6 +144,7 @@ struct pt_regs
 #define STACK_FRAME_OVERHEAD	16	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_FRAME_MARKER	2
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
 
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index b4b661f631f5f..68905c9f7c21c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -72,7 +72,7 @@
 #endif
 
 #define STACK_PT_REGS_OFFSET(sym, val)	\
-	DEFINE(sym, STACK_FRAME_OVERHEAD + offsetof(struct pt_regs, val))
+	DEFINE(sym, STACK_INT_FRAME_REGS + offsetof(struct pt_regs, val))
 
 int main(void)
 {
@@ -167,9 +167,8 @@ int main(void)
 	OFFSET(THREAD_CKVRSTATE, thread_struct, ckvr_state.vr);
 	OFFSET(THREAD_CKVRSAVE, thread_struct, ckvrsave);
 	OFFSET(THREAD_CKFPSTATE, thread_struct, ckfp_state.fpr);
-	/* Local pt_regs on stack for Transactional Memory funcs. */
-	DEFINE(TM_FRAME_SIZE, STACK_FRAME_OVERHEAD +
-	       sizeof(struct pt_regs) + 16);
+	/* Local pt_regs on stack in int frame form, plus 16 bytes for TM */
+	DEFINE(TM_FRAME_SIZE, STACK_INT_FRAME_SIZE + 16);
 #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
 
 	OFFSET(TI_LOCAL_FLAGS, thread_info, local_flags);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3fc7c9886bb70..24c8d84a56c96 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -123,12 +123,12 @@ transfer_to_syscall:
 	kuep_lock
 
 	/* Calling convention has r3 = regs, r4 = orig r0 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	mr	r4,r0
 	bl	system_call_exception
 
 ret_from_syscall:
-	addi    r4,r1,STACK_FRAME_OVERHEAD
+	addi    r4,r1,STACK_INT_FRAME_REGS
 	li	r5,0
 	bl	syscall_exit_prepare
 #ifdef CONFIG_PPC_47x
@@ -293,7 +293,7 @@ _ASM_NOKPROBE_SYMBOL(fast_exception_return)
 	.globl interrupt_return
 interrupt_return:
 	lwz	r4,_MSR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	andi.	r0,r4,MSR_PR
 	beq	.Lkernel_interrupt_return
 	bl	interrupt_exit_user_prepare
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 2f68fb2ee4fc3..62033d022e0a2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -455,7 +455,7 @@ exc_##n##_bad_stack:							    \
 	EXCEPTION_COMMON(trapnum)					\
 	ack(r8);							\
 	CHECK_NAPPING();						\
-	addi	r3,r1,STACK_FRAME_OVERHEAD;				\
+	addi	r3,r1,STACK_INT_FRAME_REGS;				\
 	bl	hdlr;							\
 	b	interrupt_return
 
@@ -504,7 +504,7 @@ __end_interrupts:
 	EXCEPTION_COMMON_CRIT(0x100)
 	bl	special_reg_save
 	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
@@ -515,7 +515,7 @@ __end_interrupts:
 	EXCEPTION_COMMON_MC(0x000)
 	bl	special_reg_save
 	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	machine_check_exception
 	b	ret_from_mc_except
 
@@ -570,7 +570,7 @@ __end_interrupts:
 	std	r14,_ESR(r1)
 	ld	r14,PACA_EXGEN+EX_R14(r13)
 	EXCEPTION_COMMON(0x700)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	program_check_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -586,7 +586,7 @@ __end_interrupts:
 	beq-	1f
 	bl	load_up_fpu
 	b	fast_interrupt_return
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+1:	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	kernel_fp_unavailable_exception
 	b	interrupt_return
 
@@ -606,7 +606,7 @@ BEGIN_FTR_SECTION
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	altivec_unavailable_exception
 	b	interrupt_return
 
@@ -616,7 +616,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 				BOOKE_INTERRUPT_ALTIVEC_ASSIST,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x220)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_ALTIVEC
 BEGIN_FTR_SECTION
 	bl	altivec_assist_exception
@@ -643,7 +643,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	EXCEPTION_COMMON_CRIT(0x9f0)
 	bl	special_reg_save
 	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_BOOKE_WDT
 	bl	WatchdogException
 #else
@@ -664,7 +664,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	NORMAL_EXCEPTION_PROLOG(0xf20, BOOKE_INTERRUPT_AP_UNAVAIL,
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0xf20)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return
 
@@ -731,7 +731,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 	ld	r14,PACA_EXCRIT+EX_R14(r13)
 	ld	r15,PACA_EXCRIT+EX_R15(r13)
 	EXCEPTION_COMMON_CRIT(0xd00)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -802,7 +802,7 @@ kernel_dbg_exc:
 	ld	r14,PACA_EXDBG+EX_R14(r13)
 	ld	r15,PACA_EXDBG+EX_R15(r13)
 	EXCEPTION_COMMON_DBG(0xd08)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	DebugException
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -812,7 +812,7 @@ kernel_dbg_exc:
 				PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x260)
 	CHECK_NAPPING()
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	/*
 	 * XXX: Returning from performance_monitor_exception taken as a
 	 * soft-NMI (Linux irqs disabled) may be risky to use interrupt_return
@@ -834,7 +834,7 @@ kernel_dbg_exc:
 	EXCEPTION_COMMON_CRIT(0x2a0)
 	bl	special_reg_save
 	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
@@ -846,7 +846,7 @@ kernel_dbg_exc:
 	GDBELL_EXCEPTION_PROLOG(0x2c0, BOOKE_INTERRUPT_GUEST_DBELL,
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x2c0)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return
 
@@ -857,7 +857,7 @@ kernel_dbg_exc:
 	EXCEPTION_COMMON_CRIT(0x2e0)
 	bl	special_reg_save
 	CHECK_NAPPING();
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_nmi_exception
 	b	ret_from_crit_except
 
@@ -866,7 +866,7 @@ kernel_dbg_exc:
 	NORMAL_EXCEPTION_PROLOG(0x310, BOOKE_INTERRUPT_HV_SYSCALL,
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x310)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return
 
@@ -875,7 +875,7 @@ kernel_dbg_exc:
 	NORMAL_EXCEPTION_PROLOG(0x320, BOOKE_INTERRUPT_HV_PRIV,
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x320)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return
 
@@ -884,7 +884,7 @@ kernel_dbg_exc:
 	NORMAL_EXCEPTION_PROLOG(0x340, BOOKE_INTERRUPT_LRAT_ERROR,
 			        PROLOG_ADDITION_NONE)
 	EXCEPTION_COMMON(0x340)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return
 
@@ -979,7 +979,7 @@ masked_interrupt_book3e_0x2c0:
  * original values stashed away in the PACA
  */
 storage_fault_common:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_page_fault
 	b	interrupt_return
 
@@ -988,7 +988,7 @@ storage_fault_common:
  * continues here.
  */
 alignment_more:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	alignment_exception
 	REST_NVGPRS(r1)
 	b	interrupt_return
@@ -1069,7 +1069,7 @@ bad_stack_book3e:
 	ZEROIZE_GPR(12)
 	std	r12,0(r11)
 	LOAD_PACA_TOC()
-1:	addi	r3,r1,STACK_FRAME_OVERHEAD
+1:	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	kernel_bad_stack
 	b	1b
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 651c36b056bde..29b78536ca597 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1061,7 +1061,7 @@ EXC_COMMON_BEGIN(system_reset_common)
 	subi	r1,r1,INT_FRAME_SIZE
 	__GEN_COMMON_BODY system_reset
 
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	system_reset_exception
 
 	/* Clear MSR_RI before setting SRR0 and SRR1. */
@@ -1208,7 +1208,7 @@ EXC_COMMON_BEGIN(machine_check_early_common)
 BEGIN_FTR_SECTION
 	bl	enable_machine_check
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 BEGIN_FTR_SECTION
 	bl	machine_check_early_boot
 END_FTR_SECTION(0, 1)     // nop out after boot
@@ -1298,7 +1298,7 @@ EXC_COMMON_BEGIN(machine_check_common)
 	 * save area: PACA_EXMC instead of PACA_EXGEN.
 	 */
 	GEN_COMMON machine_check
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	machine_check_exception_async
 	b	interrupt_return_srr
 
@@ -1364,14 +1364,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 	 * This is the NMI version of the handler because we are called from
 	 * the early handler which is a true NMI.
 	 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	machine_check_exception
 
 	/*
 	 * We will not reach here. Even if we did, there is no way out.
 	 * Call unrecoverable_exception and die.
 	 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unrecoverable_exception
 	b	.
 
@@ -1422,7 +1422,7 @@ EXC_VIRT_END(data_access, 0x4300, 0x80)
 EXC_COMMON_BEGIN(data_access_common)
 	GEN_COMMON data_access
 	ld	r4,_DSISR(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	andis.	r0,r4,DSISR_DABRMATCH@h
 	bne-	1f
 #ifdef CONFIG_PPC_64S_HASH_MMU
@@ -1479,7 +1479,7 @@ EXC_COMMON_BEGIN(data_access_slb_common)
 #ifdef CONFIG_PPC_64S_HASH_MMU
 BEGIN_MMU_FTR_SECTION
 	/* HPT case, do SLB fault */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
@@ -1493,7 +1493,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	li	r3,-EFAULT
 #endif
 	std	r3,RESULT(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_bad_segment_interrupt
 	b	interrupt_return_srr
 
@@ -1525,7 +1525,7 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80)
 EXC_VIRT_END(instruction_access, 0x4400, 0x80)
 EXC_COMMON_BEGIN(instruction_access_common)
 	GEN_COMMON instruction_access
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_PPC_64S_HASH_MMU
 BEGIN_MMU_FTR_SECTION
 	bl	do_hash_fault
@@ -1567,7 +1567,7 @@ EXC_COMMON_BEGIN(instruction_access_slb_common)
 #ifdef CONFIG_PPC_64S_HASH_MMU
 BEGIN_MMU_FTR_SECTION
 	/* HPT case, do SLB fault */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_slb_fault
 	cmpdi	r3,0
 	bne-	1f
@@ -1581,7 +1581,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	li	r3,-EFAULT
 #endif
 	std	r3,RESULT(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_bad_segment_interrupt
 	b	interrupt_return_srr
 
@@ -1635,7 +1635,7 @@ EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
 EXC_VIRT_END(hardware_interrupt, 0x4500, 0x100)
 EXC_COMMON_BEGIN(hardware_interrupt_common)
 	GEN_COMMON hardware_interrupt
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_IRQ
 	BEGIN_FTR_SECTION
 	b	interrupt_return_hsrr
@@ -1665,7 +1665,7 @@ EXC_VIRT_BEGIN(alignment, 0x4600, 0x100)
 EXC_VIRT_END(alignment, 0x4600, 0x100)
 EXC_COMMON_BEGIN(alignment_common)
 	GEN_COMMON alignment
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	alignment_exception
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
@@ -1731,7 +1731,7 @@ EXC_COMMON_BEGIN(program_check_common)
 	__GEN_COMMON_BODY program_check
 
 .Ldo_program_check:
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	program_check_exception
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
@@ -1762,7 +1762,7 @@ EXC_VIRT_END(fp_unavailable, 0x4800, 0x100)
 EXC_COMMON_BEGIN(fp_unavailable_common)
 	GEN_COMMON fp_unavailable
 	bne	1f			/* if from user, just load it up */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	kernel_fp_unavailable_exception
 0:	trap
 	EMIT_BUG_ENTRY 0b, __FILE__, __LINE__, 0
@@ -1780,7 +1780,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
 	b	fast_interrupt_return_srr
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	fp_unavailable_tm
 	b	interrupt_return_srr
 #endif
@@ -1824,7 +1824,7 @@ EXC_VIRT_BEGIN(decrementer, 0x4900, 0x80)
 EXC_VIRT_END(decrementer, 0x4900, 0x80)
 EXC_COMMON_BEGIN(decrementer_common)
 	GEN_COMMON decrementer
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	timer_interrupt
 	b	interrupt_return_srr
 
@@ -1909,7 +1909,7 @@ EXC_VIRT_BEGIN(doorbell_super, 0x4a00, 0x100)
 EXC_VIRT_END(doorbell_super, 0x4a00, 0x100)
 EXC_COMMON_BEGIN(doorbell_super_common)
 	GEN_COMMON doorbell_super
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
 #else
@@ -2076,7 +2076,7 @@ EXC_VIRT_BEGIN(single_step, 0x4d00, 0x100)
 EXC_VIRT_END(single_step, 0x4d00, 0x100)
 EXC_COMMON_BEGIN(single_step_common)
 	GEN_COMMON single_step
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	single_step_exception
 	b	interrupt_return_srr
 
@@ -2110,7 +2110,7 @@ EXC_VIRT_BEGIN(h_data_storage, 0x4e00, 0x20)
 EXC_VIRT_END(h_data_storage, 0x4e00, 0x20)
 EXC_COMMON_BEGIN(h_data_storage_common)
 	GEN_COMMON h_data_storage
-	addi    r3,r1,STACK_FRAME_OVERHEAD
+	addi    r3,r1,STACK_INT_FRAME_REGS
 BEGIN_MMU_FTR_SECTION
 	bl      do_bad_page_fault_segv
 MMU_FTR_SECTION_ELSE
@@ -2139,7 +2139,7 @@ EXC_VIRT_BEGIN(h_instr_storage, 0x4e20, 0x20)
 EXC_VIRT_END(h_instr_storage, 0x4e20, 0x20)
 EXC_COMMON_BEGIN(h_instr_storage_common)
 	GEN_COMMON h_instr_storage
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return_hsrr
 
@@ -2162,7 +2162,7 @@ EXC_VIRT_BEGIN(emulation_assist, 0x4e40, 0x20)
 EXC_VIRT_END(emulation_assist, 0x4e40, 0x20)
 EXC_COMMON_BEGIN(emulation_assist_common)
 	GEN_COMMON emulation_assist
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	emulation_assist_interrupt
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_hsrr
@@ -2222,7 +2222,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 
 	__GEN_COMMON_BODY hmi_exception_early
 
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	hmi_exception_realmode
 	cmpdi	cr0,r3,0
 	bne	1f
@@ -2240,7 +2240,7 @@ EXC_COMMON_BEGIN(hmi_exception_early_common)
 
 EXC_COMMON_BEGIN(hmi_exception_common)
 	GEN_COMMON hmi_exception
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	handle_hmi_exception
 	b	interrupt_return_hsrr
 
@@ -2274,7 +2274,7 @@ EXC_VIRT_BEGIN(h_doorbell, 0x4e80, 0x20)
 EXC_VIRT_END(h_doorbell, 0x4e80, 0x20)
 EXC_COMMON_BEGIN(h_doorbell_common)
 	GEN_COMMON h_doorbell
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_PPC_DOORBELL
 	bl	doorbell_exception
 #else
@@ -2310,7 +2310,7 @@ EXC_VIRT_BEGIN(h_virt_irq, 0x4ea0, 0x20)
 EXC_VIRT_END(h_virt_irq, 0x4ea0, 0x20)
 EXC_COMMON_BEGIN(h_virt_irq_common)
 	GEN_COMMON h_virt_irq
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	do_IRQ
 	b	interrupt_return_hsrr
 
@@ -2356,7 +2356,7 @@ EXC_VIRT_BEGIN(performance_monitor, 0x4f00, 0x20)
 EXC_VIRT_END(performance_monitor, 0x4f00, 0x20)
 EXC_COMMON_BEGIN(performance_monitor_common)
 	GEN_COMMON performance_monitor
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	lbz	r4,PACAIRQSOFTMASK(r13)
 	cmpdi	r4,IRQS_ENABLED
 	bne	1f
@@ -2410,14 +2410,14 @@ BEGIN_FTR_SECTION
 	b	fast_interrupt_return_srr
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	altivec_unavailable_tm
 	b	interrupt_return_srr
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
 #endif
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	altivec_unavailable_exception
 	b	interrupt_return_srr
 
@@ -2458,14 +2458,14 @@ BEGIN_FTR_SECTION
 	b	load_up_vsx
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 2:	/* User process was in a transaction */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	vsx_unavailable_tm
 	b	interrupt_return_srr
 #endif
 1:
 END_FTR_SECTION_IFSET(CPU_FTR_VSX)
 #endif
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	vsx_unavailable_exception
 	b	interrupt_return_srr
 
@@ -2492,7 +2492,7 @@ EXC_VIRT_BEGIN(facility_unavailable, 0x4f60, 0x20)
 EXC_VIRT_END(facility_unavailable, 0x4f60, 0x20)
 EXC_COMMON_BEGIN(facility_unavailable_common)
 	GEN_COMMON facility_unavailable
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	facility_unavailable_exception
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
@@ -2520,7 +2520,7 @@ EXC_VIRT_BEGIN(h_facility_unavailable, 0x4f80, 0x20)
 EXC_VIRT_END(h_facility_unavailable, 0x4f80, 0x20)
 EXC_COMMON_BEGIN(h_facility_unavailable_common)
 	GEN_COMMON h_facility_unavailable
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	facility_unavailable_exception
 	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
 	b	interrupt_return_hsrr
@@ -2550,7 +2550,7 @@ EXC_REAL_END(cbe_system_error, 0x1200, 0x100)
 EXC_VIRT_NONE(0x5200, 0x100)
 EXC_COMMON_BEGIN(cbe_system_error_common)
 	GEN_COMMON cbe_system_error
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	cbe_system_error_exception
 	b	interrupt_return_hsrr
 
@@ -2581,7 +2581,7 @@ EXC_VIRT_BEGIN(instruction_breakpoint, 0x5300, 0x100)
 EXC_VIRT_END(instruction_breakpoint, 0x5300, 0x100)
 EXC_COMMON_BEGIN(instruction_breakpoint_common)
 	GEN_COMMON instruction_breakpoint
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	instruction_breakpoint_exception
 	b	interrupt_return_srr
 
@@ -2703,7 +2703,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 
 EXC_COMMON_BEGIN(denorm_exception_common)
 	GEN_COMMON denorm_exception
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unknown_exception
 	b	interrupt_return_hsrr
 
@@ -2720,7 +2720,7 @@ EXC_REAL_END(cbe_maintenance, 0x1600, 0x100)
 EXC_VIRT_NONE(0x5600, 0x100)
 EXC_COMMON_BEGIN(cbe_maintenance_common)
 	GEN_COMMON cbe_maintenance
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	cbe_maintenance_exception
 	b	interrupt_return_hsrr
 
@@ -2745,7 +2745,7 @@ EXC_VIRT_BEGIN(altivec_assist, 0x5700, 0x100)
 EXC_VIRT_END(altivec_assist, 0x5700, 0x100)
 EXC_COMMON_BEGIN(altivec_assist_common)
 	GEN_COMMON altivec_assist
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_ALTIVEC
 	bl	altivec_assist_exception
 	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
@@ -2767,7 +2767,7 @@ EXC_REAL_END(cbe_thermal, 0x1800, 0x100)
 EXC_VIRT_NONE(0x5800, 0x100)
 EXC_COMMON_BEGIN(cbe_thermal_common)
 	GEN_COMMON cbe_thermal
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	cbe_thermal_exception
 	b	interrupt_return_hsrr
 
@@ -2800,7 +2800,7 @@ EXC_COMMON_BEGIN(soft_nmi_common)
 	subi	r1,r1,INT_FRAME_SIZE
 	__GEN_COMMON_BODY soft_nmi
 
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	soft_nmi_interrupt
 
 	/* Clear MSR_RI before setting SRR0 and SRR1. */
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index c3286260a7d1c..117d25330e139 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -127,7 +127,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 	mfspr	r10,SPRN_XER
 	addi	r2, r2, -THREAD
 	stw	r10,_XER(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 .endm
 
 .macro prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 52c0ab416326a..24f39abf81dff 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -972,10 +972,10 @@ _GLOBAL(__giveup_spe)
 	li	r4,THREAD_ACC
 	evstddx	evr6, r4, r3		/* save off accumulator */
 	beq	1f
-	lwz	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	lwz	r4,_MSR-STACK_INT_FRAME_REGS(r5)
 	lis	r3,MSR_SPE@h
 	andc	r4,r4,r3		/* disable SPE for previous task */
-	stw	r4,_MSR-STACK_FRAME_OVERHEAD(r5)
+	stw	r4,_MSR-STACK_INT_FRAME_REGS(r5)
 1:
 	blr
 #endif /* CONFIG_SPE */
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 1cb9d0f7cbf25..3149ac20b18e0 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -99,7 +99,7 @@ END_BTB_FLUSH_SECTION
 	mfspr	r10,SPRN_XER
 	addi	r2, r2, -THREAD
 	stw	r10,_XER(r1)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 .endm
 
 .macro prepare_transfer_to_handler
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index a019ed6fc8393..49d585eae7c8b 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -78,7 +78,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 	std	r12,_CCR(r1)
 	std	r3,ORIG_GPR3(r1)
 	/* Calling convention has r3 = regs, r4 = orig r0 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	mr	r4,r0
 	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
 	std	r11,-16(r3)		/* "regshere" marker */
@@ -99,7 +99,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	bl	system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
-	addi	r4,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_INT_FRAME_REGS
 	li	r5,1 /* scv */
 	bl	syscall_exit_prepare
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -176,7 +176,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_vectored_\name\()_restart)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
 	LOAD_PACA_TOC()
 	ld	r3,RESULT(r1)
-	addi	r4,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_INT_FRAME_REGS
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
 	bl	syscall_exit_restart
@@ -251,7 +251,7 @@ END_BTB_FLUSH_SECTION
 	std	r12,_CCR(r1)
 	std	r3,ORIG_GPR3(r1)
 	/* Calling convention has r3 = regs, r4 = orig r0 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	mr	r4,r0
 	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
 	std	r11,-16(r3)		/* "regshere" marker */
@@ -278,7 +278,7 @@ END_BTB_FLUSH_SECTION
 	bl	system_call_exception
 
 .Lsyscall_exit:
-	addi	r4,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_INT_FRAME_REGS
 	li	r5,0 /* !scv */
 	bl	syscall_exit_prepare
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -357,7 +357,7 @@ _ASM_NOKPROBE_SYMBOL(syscall_restart)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
 	LOAD_PACA_TOC()
 	ld	r3,RESULT(r1)
-	addi	r4,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_INT_FRAME_REGS
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
 	bl	syscall_exit_restart
@@ -388,7 +388,7 @@ _ASM_NOKPROBE_SYMBOL(fast_interrupt_return_srr)
 	andi.	r0,r5,MSR_RI
 	li	r3,0 /* 0 return value, no EMULATE_STACK_STORE */
 	bne+	.Lfast_kernel_interrupt_return_srr
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	unrecoverable_exception
 	b	. /* should not get here */
 #else
@@ -406,7 +406,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\())
 	beq	interrupt_return_\srr\()_kernel
 interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	interrupt_exit_user_prepare
 	cmpdi	r3,0
 	bne-	.Lrestore_nvgprs_\srr
@@ -503,7 +503,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
 	LOAD_PACA_TOC()
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
 	bl	interrupt_exit_user_restart
@@ -518,7 +518,7 @@ RESTART_TABLE(.Linterrupt_return_\srr\()_user_rst_start, .Linterrupt_return_\srr
 	.balign IFETCH_ALIGN_BYTES
 interrupt_return_\srr\()_kernel:
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	interrupt_exit_kernel_prepare
 
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
@@ -684,7 +684,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel_restart)
 	GET_PACA(r13)
 	ld	r1,PACA_EXIT_SAVE_R1(r13)
 	LOAD_PACA_TOC()
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 	li	r11,IRQS_ALL_DISABLED
 	stb	r11,PACAIRQSOFTMASK(r13)
 	bl	interrupt_exit_kernel_restart
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 1a1e9995dae35..ebe4d1645ca1f 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -191,7 +191,7 @@ static int kgdb_break_match(struct pt_regs *regs)
 void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 {
 	struct pt_regs *regs = (struct pt_regs *)(p->thread.ksp +
-						  STACK_FRAME_OVERHEAD);
+						  STACK_INT_FRAME_REGS);
 	unsigned long *ptr = gdb_regs;
 	int reg;
 
diff --git a/arch/powerpc/kernel/optprobes_head.S b/arch/powerpc/kernel/optprobes_head.S
index cd4e7bc32609d..35932f45fb4ec 100644
--- a/arch/powerpc/kernel/optprobes_head.S
+++ b/arch/powerpc/kernel/optprobes_head.S
@@ -85,7 +85,7 @@ optprobe_template_op_address:
 	TEMPLATE_FOR_IMM_LOAD_INSNS
 
 	/* 2. pt_regs pointer in r4 */
-	addi	r4,r1,STACK_FRAME_OVERHEAD
+	addi	r4,r1,STACK_INT_FRAME_REGS
 
 	.global optprobe_template_call_handler
 optprobe_template_call_handler:
@@ -96,7 +96,7 @@ optprobe_template_call_handler:
 	 * Parameters for instruction emulation:
 	 * 1. Pass SP in register r3.
 	 */
-	addi	r3,r1,STACK_FRAME_OVERHEAD
+	addi	r3,r1,STACK_INT_FRAME_REGS
 
 	.global optprobe_template_insn
 optprobe_template_insn:
diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S
index 6e86f3bf46735..49813f9824681 100644
--- a/arch/powerpc/kernel/ppc_save_regs.S
+++ b/arch/powerpc/kernel/ppc_save_regs.S
@@ -22,7 +22,7 @@
  */
 _GLOBAL(ppc_save_regs)
 	/* This allows stack frame accessor macros and offsets to be used */
-	subi	r3,r3,STACK_FRAME_OVERHEAD
+	subi	r3,r3,STACK_INT_FRAME_REGS
 	PPC_STL	r0,GPR0(r3)
 #ifdef CONFIG_PPC32
 	stmw	r2,GPR2(r3)
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index f93703ea4a127..d7a581997d926 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2260,12 +2260,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
 
 		/*
 		 * See if this is an exception frame.
-		 * We look for the "regshere" marker in the current frame.
+		 * We look for the "regs" marker in the current frame.
 		 */
 		if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
 		    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
 			struct pt_regs *regs = (struct pt_regs *)
-				(sp + STACK_FRAME_OVERHEAD);
+				(sp + STACK_INT_FRAME_REGS);
 
 			lr = regs->link;
 			printk("%s--- interrupt: %lx at %pS\n",
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 5a0f023a26e90..9feab5e0485bf 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -117,7 +117,7 @@ _GLOBAL(tm_reclaim)
 	std	r2, STK_GOT(r1)
 	stdu	r1, -TM_FRAME_SIZE(r1)
 
-	/* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD]. */
+	/* We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS]. */
 
 	std	r3, STK_PARAM(R3)(r1)
 	SAVE_NVGPRS(r1)
@@ -222,7 +222,7 @@ _GLOBAL(tm_reclaim)
 	 * Make r7 look like an exception frame so that we can use the neat
 	 * GPRx(n) macros. r7 is NOT a pt_regs ptr!
 	 */
-	subi	r7, r7, STACK_FRAME_OVERHEAD
+	subi	r7, r7, STACK_INT_FRAME_REGS
 
 	/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
 	SAVE_GPR(0, r7)				/* user r0 */
@@ -359,7 +359,7 @@ _GLOBAL(__tm_recheckpoint)
 	stdu	r1, -TM_FRAME_SIZE(r1)
 
 	/*
-	 * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+	 * We've a struct pt_regs at [r1+STACK_INT_FRAME_REGS].
 	 * This is used for backing up the NVGPRs:
 	 */
 	SAVE_NVGPRS(r1)
@@ -379,7 +379,7 @@ _GLOBAL(__tm_recheckpoint)
 	 * Make r7 look like an exception frame so that we can use the neat
 	 * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
 	 */
-	subi	r7, r7, STACK_FRAME_OVERHEAD
+	subi	r7, r7, STACK_INT_FRAME_REGS
 
 	/* We need to setup MSR for FP/VMX/VSX register save instructions. */
 	mfmsr	r6
diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S
index d031093bc4367..ffb1db3868499 100644
--- a/arch/powerpc/kernel/trace/ftrace_mprofile.S
+++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S
@@ -110,7 +110,7 @@
 	.endif
 
 	/* Load &pt_regs in r6 for call below */
-	addi    r6, r1, STACK_FRAME_OVERHEAD
+	addi    r6, r1, STACK_INT_FRAME_REGS
 .endm
 
 .macro	ftrace_regs_exit allregs
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 5473f9d03df3a..e2b646a4f7fa1 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -16,7 +16,7 @@ _GLOBAL(exec_instr)
 
 	/*
 	 * Stack frame layout (INT_FRAME_SIZE bytes)
-	 *   In-memory pt_regs	(SP + STACK_FRAME_OVERHEAD)
+	 *   In-memory pt_regs	(SP + STACK_INT_FRAME_REGS)
 	 *   Scratch space	(SP + 8)
 	 *   Back chain		(SP + 0)
 	 */
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 8718289c051dd..9e254aed1f61a 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -67,7 +67,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 			 * This looks like an interrupt frame for an
 			 * interrupt that occurred in the kernel
 			 */
-			regs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+			regs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
 			next_ip = regs->nip;
 			lr = regs->link;
 			level = 0;
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index e34d7809f6c9f..a14eb4d815c2f 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1782,14 +1782,13 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
 			xmon_print_symbol(ip, " ", "\n");
 		}
 
-		/* Look for "regshere" marker to see if this is
+		/* Look for "regs" marker to see if this is
 		   an exception frame. */
 		if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
 		    && marker == STACK_FRAME_REGS_MARKER) {
-			if (mread(sp + STACK_FRAME_OVERHEAD, &regs, sizeof(regs))
-			    != sizeof(regs)) {
+			if (mread(sp + STACK_INT_FRAME_REGS, &regs, sizeof(regs)) != sizeof(regs)) {
 				printf("Couldn't read registers at %lx\n",
-				       sp + STACK_FRAME_OVERHEAD);
+				       sp + STACK_INT_FRAME_REGS);
 				break;
 			}
 			printf("--- Exception: %lx %s at ", regs.trap,
-- 
GitLab


From d2e8ff9f1492f44c5a6d93f759eea27574d753de Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:33 +1000
Subject: [PATCH 3207/4122] powerpc: add a definition for the marker offset
 within the interrupt frame

Define a constant rather than open-code the offset for the
"regs" marker.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-9-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h       |  2 ++
 arch/powerpc/kernel/entry_32.S          |  2 +-
 arch/powerpc/kernel/exceptions-64e.S    |  2 +-
 arch/powerpc/kernel/exceptions-64s.S    |  2 +-
 arch/powerpc/kernel/head_32.h           |  2 +-
 arch/powerpc/kernel/head_booke.h        |  2 +-
 arch/powerpc/kernel/interrupt_64.S      | 10 +++++-----
 arch/powerpc/kvm/book3s_hv_rmhandlers.S |  2 +-
 8 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index a4ae67aa9b76d..8a9f4cf8c4c58 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -125,6 +125,7 @@ struct pt_regs
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
 #define STACK_FRAME_MARKER	12
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -145,6 +146,7 @@ struct pt_regs
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 8)
 #define STACK_FRAME_MARKER	2
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
 
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 24c8d84a56c96..2f61b7d3677c1 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -114,7 +114,7 @@ transfer_to_syscall:
 	addi	r12,r12,STACK_FRAME_REGS_MARKER@l
 	stw	r9,_MSR(r1)
 	li	r2, INTERRUPT_SYSCALL
-	stw	r12,8(r1)
+	stw	r12,STACK_INT_FRAME_MARKER(r1)
 	stw	r2,_TRAP(r1)
 	SAVE_GPR(0, r1)
 	SAVE_GPRS(3, 8, r1)
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 62033d022e0a2..b9cec22df9f97 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -391,7 +391,7 @@ exc_##n##_common:							    \
 	std	r10,_CCR(r1);		/* store orig CR in stackframe */   \
 	std	r9,GPR1(r1);		/* store stack frame back link */   \
 	std	r11,SOFTE(r1);		/* and save it to stackframe */     \
-	std	r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */	    \
+	std	r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */	    \
 	std	r3,_TRAP(r1);		/* set trap number		*/  \
 	std	r0,RESULT(r1);		/* clear regs->result */	    \
 	SAVE_NVGPRS(r1);
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 29b78536ca597..ac3b0580224ee 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -591,7 +591,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	li	r10,0
 	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
 	std	r10,RESULT(r1)		/* clear regs->result		*/
-	std	r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame	*/
+	std	r11,STACK_INT_FRAME_MARKER(r1) /* mark the frame	*/
 .endm
 
 /*
diff --git a/arch/powerpc/kernel/head_32.h b/arch/powerpc/kernel/head_32.h
index 117d25330e139..f8e2911478a79 100644
--- a/arch/powerpc/kernel/head_32.h
+++ b/arch/powerpc/kernel/head_32.h
@@ -112,7 +112,7 @@ _ASM_NOKPROBE_SYMBOL(\name\()_virt)
 	stw	r0,GPR0(r1)
 	lis	r10,STACK_FRAME_REGS_MARKER@ha /* exception frame marker */
 	addi	r10,r10,STACK_FRAME_REGS_MARKER@l
-	stw	r10,8(r1)
+	stw	r10,STACK_INT_FRAME_MARKER(r1)
 	li	r10, \trapno
 	stw	r10,_TRAP(r1)
 	SAVE_GPRS(3, 8, r1)
diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index 3149ac20b18e0..37d43c1726766 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -84,7 +84,7 @@ END_BTB_FLUSH_SECTION
 	stw	r0,GPR0(r1)
 	lis	r10, STACK_FRAME_REGS_MARKER@ha	/* exception frame marker */
 	addi	r10, r10, STACK_FRAME_REGS_MARKER@l
-	stw	r10, 8(r1)
+	stw	r10, STACK_INT_FRAME_MARKER(r1)
 	li	r10, \trapno
 	stw	r10,_TRAP(r1)
 	SAVE_GPRS(3, 8, r1)
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 49d585eae7c8b..321992c1c9f90 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -77,11 +77,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
 	std	r3,ORIG_GPR3(r1)
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,STACK_INT_FRAME_MARKER(r1)		/* "regs" marker */
 	/* Calling convention has r3 = regs, r4 = orig r0 */
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	mr	r4,r0
-	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
-	std	r11,-16(r3)		/* "regshere" marker */
 
 BEGIN_FTR_SECTION
 	HMT_MEDIUM
@@ -250,11 +250,11 @@ END_BTB_FLUSH_SECTION
 	std	r11,_TRAP(r1)
 	std	r12,_CCR(r1)
 	std	r3,ORIG_GPR3(r1)
+	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
+	std	r11,STACK_INT_FRAME_MARKER(r1)		/* "regs" marker */
 	/* Calling convention has r3 = regs, r4 = orig r0 */
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	mr	r4,r0
-	LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
-	std	r11,-16(r3)		/* "regshere" marker */
 
 #ifdef CONFIG_PPC_BOOK3S
 	li	r11,1
@@ -637,7 +637,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	 * Leaving a stale STACK_FRAME_REGS_MARKER on the stack can confuse
 	 * the reliable stack unwinder later on. Clear it.
 	 */
-	std	r0,STACK_FRAME_OVERHEAD-16(r1)
+	std	r0,STACK_INT_FRAME_MARKER(r1)
 
 	REST_GPRS(2, 5, r1)
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 37f50861dd98f..a9e162a1deec9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -2729,7 +2729,7 @@ kvmppc_bad_host_intr:
 	std	r6, SOFTE(r1)
 	LOAD_PACA_TOC()
 	LOAD_REG_IMMEDIATE(3, STACK_FRAME_REGS_MARKER)
-	std	r3, STACK_FRAME_OVERHEAD-16(r1)
+	std	r3, STACK_INT_FRAME_MARKER(r1)
 
 	/*
 	 * XXX On POWER7 and POWER8, we just spin here since we don't
-- 
GitLab


From e856e336924b0ecd0b7058e65e6b3e7266ee0b95 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:34 +1000
Subject: [PATCH 3208/4122] powerpc: Rename STACK_FRAME_MARKER and derive it
 from frame offset

This is a count of longs from the stack pointer to the regs marker.
Rename it to make it more distinct from the other byte offsets. It
can be derived from the byte offset definitions just added.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-10-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h | 4 ++--
 arch/powerpc/kernel/process.c     | 2 +-
 arch/powerpc/kernel/stacktrace.c  | 2 +-
 arch/powerpc/perf/callchain.c     | 2 +-
 arch/powerpc/xmon/xmon.c          | 3 +--
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 8a9f4cf8c4c58..fdd50648df56e 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -126,7 +126,6 @@ struct pt_regs
 				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
-#define STACK_FRAME_MARKER	12
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
 #define STACK_FRAME_MIN_SIZE	32
@@ -147,7 +146,6 @@ struct pt_regs
 #define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 8)
-#define STACK_FRAME_MARKER	2
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
 
 /* Size of stack frame allocated when calling signal handler. */
@@ -155,6 +153,8 @@ struct pt_regs
 
 #endif /* __powerpc64__ */
 
+#define STACK_INT_FRAME_MARKER_LONGS	(STACK_INT_FRAME_MARKER/sizeof(long))
+
 #ifndef __ASSEMBLY__
 #include <asm/paca.h>
 
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index d7a581997d926..6c0a3c664266c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2263,7 +2263,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
 		 * We look for the "regs" marker in the current frame.
 		 */
 		if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
-		    && stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+		    && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			struct pt_regs *regs = (struct pt_regs *)
 				(sp + STACK_INT_FRAME_REGS);
 
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index a2443d61728eb..7efa0ec9dd774 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -136,7 +136,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
 
 		/* Mark stacktraces with exception frames as unreliable. */
 		if (sp <= stack_end - STACK_INT_FRAME_SIZE &&
-		    stack[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+		    stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			return -EINVAL;
 		}
 
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index 9e254aed1f61a..b01497ed51738 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -62,7 +62,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 
 		if (next_sp == sp + STACK_INT_FRAME_SIZE &&
 		    validate_sp(sp, current, STACK_INT_FRAME_SIZE) &&
-		    fp[STACK_FRAME_MARKER] == STACK_FRAME_REGS_MARKER) {
+		    fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			/*
 			 * This looks like an interrupt frame for an
 			 * interrupt that occurred in the kernel
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index a14eb4d815c2f..0da66bc4823d4 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -1721,7 +1721,6 @@ static void get_function_bounds(unsigned long pc, unsigned long *startp,
 }
 
 #define LRSAVE_OFFSET		(STACK_FRAME_LR_SAVE * sizeof(unsigned long))
-#define MARKER_OFFSET		(STACK_FRAME_MARKER * sizeof(unsigned long))
 
 static void xmon_show_stack(unsigned long sp, unsigned long lr,
 			    unsigned long pc)
@@ -1784,7 +1783,7 @@ static void xmon_show_stack(unsigned long sp, unsigned long lr,
 
 		/* Look for "regs" marker to see if this is
 		   an exception frame. */
-		if (mread(sp + MARKER_OFFSET, &marker, sizeof(unsigned long))
+		if (mread(sp + STACK_INT_FRAME_MARKER, &marker, sizeof(unsigned long))
 		    && marker == STACK_FRAME_REGS_MARKER) {
 			if (mread(sp + STACK_INT_FRAME_REGS, &regs, sizeof(regs)) != sizeof(regs)) {
 				printf("Couldn't read registers at %lx\n",
-- 
GitLab


From 1223e5a20f7fb3c31c91a328d1a04ed26d5e889b Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:35 +1000
Subject: [PATCH 3209/4122] powerpc: add a define for the user interrupt frame
 size

The user interrupt frame is a different size from the kernel frame, so
give it its own name.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-11-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h | 6 +++---
 arch/powerpc/kernel/process.c     | 6 +++---
 arch/powerpc/kernel/stacktrace.c  | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index fdd50648df56e..705ce26ae887d 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -122,8 +122,7 @@ struct pt_regs
 
 #define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
-#define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + \
-				 STACK_FRAME_OVERHEAD + KERNEL_REDZONE_SIZE)
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
 
@@ -143,7 +142,7 @@ struct pt_regs
 #define KERNEL_REDZONE_SIZE	0
 #define STACK_FRAME_OVERHEAD	16	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
-#define STACK_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 8)
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
@@ -153,6 +152,7 @@ struct pt_regs
 
 #endif /* __powerpc64__ */
 
+#define STACK_INT_FRAME_SIZE	(KERNEL_REDZONE_SIZE + STACK_USER_INT_FRAME_SIZE)
 #define STACK_INT_FRAME_MARKER_LONGS	(STACK_INT_FRAME_MARKER/sizeof(long))
 
 #ifndef __ASSEMBLY__
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6c0a3c664266c..010a5ee746aef 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1756,15 +1756,15 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	klp_init_thread_info(p);
 
 	/* Create initial stack frame. */
-	sp -= (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD);
+	sp -= STACK_USER_INT_FRAME_SIZE;
 	((unsigned long *)sp)[0] = 0;
 
 	/* Copy registers */
-	childregs = (struct pt_regs *)(sp + STACK_FRAME_OVERHEAD);
+	childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
 	if (unlikely(args->fn)) {
 		/* kernel thread */
 		memset(childregs, 0, sizeof(struct pt_regs));
-		childregs->gpr[1] = sp + (sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD);
+		childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE;
 		/* function */
 		if (args->fn)
 			childregs->gpr[14] = ppc_function_entry((void *)args->fn);
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 7efa0ec9dd774..453ac317a6cf5 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -77,7 +77,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
 		/*
 		 * For user tasks, this is the SP value loaded on
 		 * kernel entry, see "PACAKSAVE(r13)" in _switch() and
-		 * system_call_common()/EXCEPTION_PROLOG_COMMON().
+		 * system_call_common().
 		 *
 		 * Likewise for non-swapper kernel threads,
 		 * this also happens to be the top of the stack
@@ -88,7 +88,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
 		 * an unreliable stack trace until it's been
 		 * _switch()'ed to for the first time.
 		 */
-		stack_end -= STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+		stack_end -= STACK_USER_INT_FRAME_SIZE;
 	} else {
 		/*
 		 * idle tasks have a custom stack layout,
-- 
GitLab


From 6f291a03819e4051ebc870471d26915ef2e6ba31 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:36 +1000
Subject: [PATCH 3210/4122] powerpc: add a define for the switch frame size and
 regs offset

This is open-coded in process.c, ppc32 uses a different define with the
same value, and the C definition is name differently which makes it an
extra indirection to grep for.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-12-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h |  6 ++++--
 arch/powerpc/kernel/asm-offsets.c |  2 +-
 arch/powerpc/kernel/entry_32.S    |  6 +++---
 arch/powerpc/kernel/process.c     | 12 ++++++++----
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 705ce26ae887d..412ef07497752 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -97,8 +97,6 @@ struct pt_regs
 #endif
 
 
-#define STACK_FRAME_WITH_PT_REGS (STACK_FRAME_OVERHEAD + sizeof(struct pt_regs))
-
 // Always displays as "REGS" in memory dumps
 #ifdef CONFIG_CPU_BIG_ENDIAN
 #define STACK_FRAME_REGS_MARKER	ASM_CONST(0x52454753)
@@ -125,6 +123,8 @@ struct pt_regs
 #define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_OVERHEAD
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
 #define STACK_FRAME_MIN_SIZE	32
@@ -146,6 +146,8 @@ struct pt_regs
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 8)
 #define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_OVERHEAD
 
 /* Size of stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE	64
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 68905c9f7c21c..d24a59a98c0c9 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -260,7 +260,7 @@ int main(void)
 
 	/* Interrupt register frame */
 	DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
-	DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_WITH_PT_REGS);
+	DEFINE(SWITCH_FRAME_SIZE, STACK_SWITCH_FRAME_SIZE);
 	STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
 	STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
 	STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 2f61b7d3677c1..6e99ec10be89b 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -215,9 +215,9 @@ ret_from_kernel_thread:
  * in arch/ppc/kernel/process.c
  */
 _GLOBAL(_switch)
-	stwu	r1,-INT_FRAME_SIZE(r1)
+	stwu	r1,-SWITCH_FRAME_SIZE(r1)
 	mflr	r0
-	stw	r0,INT_FRAME_SIZE+4(r1)
+	stw	r0,SWITCH_FRAME_SIZE+4(r1)
 	/* r3-r12 are caller saved -- Cort */
 	SAVE_NVGPRS(r1)
 	stw	r0,_NIP(r1)	/* Return to switch caller */
@@ -248,7 +248,7 @@ _GLOBAL(_switch)
 
 	lwz	r4,_NIP(r1)	/* Return to _switch caller in new task */
 	mtlr	r4
-	addi	r1,r1,INT_FRAME_SIZE
+	addi	r1,r1,SWITCH_FRAME_SIZE
 	blr
 
 	.globl	fast_exception_return
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 010a5ee746aef..0cb5296c6c41e 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1808,10 +1808,10 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	 * do some house keeping and then return from the fork or clone
 	 * system call, using the stack frame created above.
 	 */
-	sp -= sizeof(struct pt_regs);
-	kregs = (struct pt_regs *) sp;
-	sp -= STACK_FRAME_OVERHEAD;
+	sp -= STACK_SWITCH_FRAME_SIZE;
+	kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
 	p->thread.ksp = sp;
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 	for (i = 0; i < nr_wp_slots(); i++)
 		p->thread.ptrace_bps[i] = NULL;
@@ -2261,8 +2261,12 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
 		/*
 		 * See if this is an exception frame.
 		 * We look for the "regs" marker in the current frame.
+		 *
+		 * STACK_SWITCH_FRAME_SIZE being the smallest frame that
+		 * could hold a pt_regs, if that does not fit then it can't
+		 * have regs.
 		 */
-		if (validate_sp(sp, tsk, STACK_FRAME_WITH_PT_REGS)
+		if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE)
 		    && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			struct pt_regs *regs = (struct pt_regs *)
 				(sp + STACK_INT_FRAME_REGS);
-- 
GitLab


From 6895dfc0474170c492191c126fcfc420f7771a09 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:37 +1000
Subject: [PATCH 3211/4122] powerpc: copy_thread fill in interrupt frame marker
 and back chain

Backtraces will not recognise the fork system call interrupt without
the regs marker. And regular interrupt entry from userspace creates
the back chain to the user stack, so do this for the initial fork
frame too, to be consistent.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-13-npiggin@gmail.com
---
 arch/powerpc/kernel/process.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 0cb5296c6c41e..6b1d80bd370ea 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1757,12 +1757,13 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 
 	/* Create initial stack frame. */
 	sp -= STACK_USER_INT_FRAME_SIZE;
-	((unsigned long *)sp)[0] = 0;
+	*(unsigned long *)(sp + STACK_INT_FRAME_MARKER) = STACK_FRAME_REGS_MARKER;
 
 	/* Copy registers */
 	childregs = (struct pt_regs *)(sp + STACK_INT_FRAME_REGS);
 	if (unlikely(args->fn)) {
 		/* kernel thread */
+		((unsigned long *)sp)[0] = 0;
 		memset(childregs, 0, sizeof(struct pt_regs));
 		childregs->gpr[1] = sp + STACK_USER_INT_FRAME_SIZE;
 		/* function */
@@ -1782,6 +1783,7 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 		*childregs = *regs;
 		if (usp)
 			childregs->gpr[1] = usp;
+		((unsigned long *)sp)[0] = childregs->gpr[1];
 		p->thread.regs = childregs;
 		/* 64s sets this in ret_from_fork */
 		if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
-- 
GitLab


From edbd0387f3249cc7e102f86d4852a9a9f3bb1305 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:38 +1000
Subject: [PATCH 3212/4122] powerpc: copy_thread add a back chain to the switch
 stack frame

Stack unwinders need LR and the back chain as a minimum. The switch
stack uses regs->nip for its return pointer rather than lrsave, so
that was not set in the fork frame, and neither was the back chain.
This change sets those fields in the stack.

With this and the previous change, a stack trace in the switch or
interrupt stack goes from looking like this:

  Oops: Exception in kernel mode, sig: 5 [#1]
  LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
  Modules linked in:
  CPU: 3 PID: 90 Comm: systemd Not tainted
  NIP:  c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff
  [ ... regs ... ]
  NIP [c000000000011060] _switch+0x160/0x17c
  LR [c000000000010f68] _switch+0x68/0x17c
  Call Trace:

To this:

  Oops: Exception in kernel mode, sig: 5 [#1]
  LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
  CPU: 0 PID: 93 Comm: systemd Not tainted
  NIP:  c000000000011060 LR: c000000000010f68 CTR: 0000000000007fff
  [ ... regs ... ]
  NIP [c000000000011060] _switch+0x160/0x17c
  LR [c000000000010f68] _switch+0x68/0x17c
  Call Trace:
  [c000000005a93e10] [c00000000000cdbc] ret_from_fork_scv+0x0/0x54
  --- interrupt: 3000 at 0x7fffa72f56d8
  NIP:  00007fffa72f56d8 LR: 0000000000000000 CTR: 0000000000000000
  [ ... regs ... ]
  NIP [00007fffa72f56d8] 0x7fffa72f56d8
  LR [0000000000000000] 0x0
  --- interrupt: 3000

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-14-npiggin@gmail.com
---
 arch/powerpc/kernel/process.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 6b1d80bd370ea..096b6ea52378c 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1810,7 +1810,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
 	 * do some house keeping and then return from the fork or clone
 	 * system call, using the stack frame created above.
 	 */
+	((unsigned long *)sp)[STACK_FRAME_LR_SAVE] = (unsigned long)f;
 	sp -= STACK_SWITCH_FRAME_SIZE;
+	((unsigned long *)sp)[0] = sp + STACK_SWITCH_FRAME_SIZE;
 	kregs = (struct pt_regs *)(sp + STACK_SWITCH_FRAME_REGS);
 	p->thread.ksp = sp;
 
-- 
GitLab


From 4cefb0f6c555971b3e6544a9b15470f9d1f12089 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:39 +1000
Subject: [PATCH 3213/4122] powerpc: split validate_sp into two functions

Most callers just want to validate an arbitrary kernel stack pointer,
some need a particular size. Make the size case the exceptional one
with an extra function.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-15-npiggin@gmail.com
---
 arch/powerpc/include/asm/processor.h | 15 ++++++++++++---
 arch/powerpc/kernel/process.c        | 23 ++++++++++++++---------
 arch/powerpc/kernel/stacktrace.c     |  2 +-
 arch/powerpc/perf/callchain.c        |  6 +++---
 4 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 631802999d598..e96c9b8c2a60b 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -374,9 +374,18 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
 
 #endif
 
-/* Check that a certain kernel stack pointer is valid in task_struct p */
-int validate_sp(unsigned long sp, struct task_struct *p,
-                       unsigned long nbytes);
+/*
+ * Check that a certain kernel stack pointer is a valid (minimum sized)
+ * stack frame in task_struct p.
+ */
+int validate_sp(unsigned long sp, struct task_struct *p);
+
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+		     unsigned long nbytes);
 
 /*
  * Prefetch macros.
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 096b6ea52378c..9446bee8ca32a 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2157,9 +2157,12 @@ static inline int valid_emergency_stack(unsigned long sp, struct task_struct *p,
 	return 0;
 }
 
-
-int validate_sp(unsigned long sp, struct task_struct *p,
-		       unsigned long nbytes)
+/*
+ * validate the stack frame of a particular minimum size, used for when we are
+ * looking at a certain object in the stack beyond the minimum.
+ */
+int validate_sp_size(unsigned long sp, struct task_struct *p,
+		     unsigned long nbytes)
 {
 	unsigned long stack_page = (unsigned long)task_stack_page(p);
 
@@ -2175,7 +2178,10 @@ int validate_sp(unsigned long sp, struct task_struct *p,
 	return valid_emergency_stack(sp, p, nbytes);
 }
 
-EXPORT_SYMBOL(validate_sp);
+int validate_sp(unsigned long sp, struct task_struct *p)
+{
+	return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD);
+}
 
 static unsigned long ___get_wchan(struct task_struct *p)
 {
@@ -2183,13 +2189,12 @@ static unsigned long ___get_wchan(struct task_struct *p)
 	int count = 0;
 
 	sp = p->thread.ksp;
-	if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD))
+	if (!validate_sp(sp, p))
 		return 0;
 
 	do {
 		sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
-		if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
-		    task_is_running(p))
+		if (!validate_sp(sp, p) || task_is_running(p))
 			return 0;
 		if (count > 0) {
 			ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
@@ -2243,7 +2248,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
 	lr = 0;
 	printk("%sCall Trace:\n", loglvl);
 	do {
-		if (!validate_sp(sp, tsk, STACK_FRAME_OVERHEAD))
+		if (!validate_sp(sp, tsk))
 			break;
 
 		stack = (unsigned long *) sp;
@@ -2270,7 +2275,7 @@ void __no_sanitize_address show_stack(struct task_struct *tsk,
 		 * could hold a pt_regs, if that does not fit then it can't
 		 * have regs.
 		 */
-		if (validate_sp(sp, tsk, STACK_SWITCH_FRAME_SIZE)
+		if (validate_sp_size(sp, tsk, STACK_SWITCH_FRAME_SIZE)
 		    && stack[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			struct pt_regs *regs = (struct pt_regs *)
 				(sp + STACK_INT_FRAME_REGS);
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 453ac317a6cf5..1dbbf30f265e0 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -43,7 +43,7 @@ void __no_sanitize_address arch_stack_walk(stack_trace_consume_fn consume_entry,
 		unsigned long *stack = (unsigned long *) sp;
 		unsigned long newsp, ip;
 
-		if (!validate_sp(sp, task, STACK_FRAME_OVERHEAD))
+		if (!validate_sp(sp, task))
 			return;
 
 		newsp = stack[0];
diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c
index b01497ed51738..6b4434dd0ff30 100644
--- a/arch/powerpc/perf/callchain.c
+++ b/arch/powerpc/perf/callchain.c
@@ -27,7 +27,7 @@ static int valid_next_sp(unsigned long sp, unsigned long prev_sp)
 {
 	if (sp & 0xf)
 		return 0;		/* must be 16-byte aligned */
-	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+	if (!validate_sp(sp, current))
 		return 0;
 	if (sp >= prev_sp + STACK_FRAME_MIN_SIZE)
 		return 1;
@@ -53,7 +53,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 	sp = regs->gpr[1];
 	perf_callchain_store(entry, perf_instruction_pointer(regs));
 
-	if (!validate_sp(sp, current, STACK_FRAME_OVERHEAD))
+	if (!validate_sp(sp, current))
 		return;
 
 	for (;;) {
@@ -61,7 +61,7 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re
 		next_sp = fp[0];
 
 		if (next_sp == sp + STACK_INT_FRAME_SIZE &&
-		    validate_sp(sp, current, STACK_INT_FRAME_SIZE) &&
+		    validate_sp_size(sp, current, STACK_INT_FRAME_SIZE) &&
 		    fp[STACK_INT_FRAME_MARKER_LONGS] == STACK_FRAME_REGS_MARKER) {
 			/*
 			 * This looks like an interrupt frame for an
-- 
GitLab


From 90f1b43196c5e79f6c986a359011a19857984c27 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:40 +1000
Subject: [PATCH 3214/4122] powerpc: allow minimum sized kernel stack frames

This affects only 64-bit ELFv2 kernels, and reduces the minimum
asm-created stack frame size from 112 to 32 byte on those kernels.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-16-npiggin@gmail.com
---
 arch/powerpc/kernel/head_40x.S       | 2 +-
 arch/powerpc/kernel/head_44x.S       | 6 +++---
 arch/powerpc/kernel/head_64.S        | 6 +++---
 arch/powerpc/kernel/head_85xx.S      | 4 ++--
 arch/powerpc/kernel/head_8xx.S       | 2 +-
 arch/powerpc/kernel/head_book3s_32.S | 4 ++--
 arch/powerpc/kernel/irq.c            | 4 ++--
 arch/powerpc/kernel/misc_32.S        | 2 +-
 arch/powerpc/kernel/misc_64.S        | 4 ++--
 arch/powerpc/kernel/process.c        | 2 +-
 arch/powerpc/kernel/smp.c            | 2 +-
 arch/powerpc/kernel/stacktrace.c     | 2 +-
 12 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 088f500896c78..918547b93b5e9 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -602,7 +602,7 @@ start_here:
 	lis	r1,init_thread_union@ha
 	addi	r1,r1,init_thread_union@l
 	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
 
 	bl	early_init	/* We have to do this with MMU on */
 
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index f15cb9fdb692f..63a85c16fef46 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -109,7 +109,7 @@ _GLOBAL(_start);
 	lis	r1,init_thread_union@h
 	ori	r1,r1,init_thread_union@l
 	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
 
 	bl	early_init
 
@@ -1012,7 +1012,7 @@ _GLOBAL(start_secondary_47x)
 	 */
 	lis	r1,temp_boot_stack@h
 	ori	r1,r1,temp_boot_stack@l
-	addi	r1,r1,1024-STACK_FRAME_OVERHEAD
+	addi	r1,r1,1024-STACK_FRAME_MIN_SIZE
 	li	r0,0
 	stw	r0,0(r1)
 	bl	mmu_init_secondary
@@ -1025,7 +1025,7 @@ _GLOBAL(start_secondary_47x)
 	lwz	r1,TASK_STACK(r2)
 
 	/* Current stack pointer */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
 	li	r0,0
 	stw	r0,0(r1)
 
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index dedcc6fe2263a..b513d13bf79e4 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -424,7 +424,7 @@ generic_secondary_common_init:
 
 	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
+	subi	r1,r1,STACK_FRAME_MIN_SIZE
 
 	/* See if we need to call a cpu state restore handler */
 	LOAD_REG_ADDR(r23, cur_cpu_spec)
@@ -780,7 +780,7 @@ _GLOBAL(pmac_secondary_start)
 
 	/* Create a temp kernel stack for use before relocation is on.	*/
 	ld	r1,PACAEMERGSP(r13)
-	subi	r1,r1,STACK_FRAME_OVERHEAD
+	subi	r1,r1,STACK_FRAME_MIN_SIZE
 
 	b	__secondary_start
 
@@ -958,7 +958,7 @@ start_here_multiplatform:
 	LOAD_REG_IMMEDIATE(r1,THREAD_SIZE)
 	add	r1,r3,r1
 	li	r0,0
-	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
+	stdu	r0,-STACK_FRAME_MIN_SIZE(r1)
 
 	/*
 	 * Do very early kernel initializations, including initial hash table
diff --git a/arch/powerpc/kernel/head_85xx.S b/arch/powerpc/kernel/head_85xx.S
index 24f39abf81dff..d9bd377dec919 100644
--- a/arch/powerpc/kernel/head_85xx.S
+++ b/arch/powerpc/kernel/head_85xx.S
@@ -229,7 +229,7 @@ set_ivor:
 	lis	r1,init_thread_union@h
 	ori	r1,r1,init_thread_union@l
 	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
 
 #ifdef CONFIG_SMP
 	stw	r24, TASK_CPU(r2)
@@ -1044,7 +1044,7 @@ __secondary_start:
 	lwz	r1,TASK_STACK(r2)
 
 	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
 	li	r0,0
 	stw	r0,0(r1)
 
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index 0b05f2be66b9f..cf546d0e5c403 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -537,7 +537,7 @@ start_here:
 	ori	r0, r0, STACK_END_MAGIC@l
 	stw	r0, 0(r1)
 	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
 
 	lis	r6, swapper_pg_dir@ha
 	tophys(r6,r6)
diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S
index 519b606951675..40854d092dd31 100644
--- a/arch/powerpc/kernel/head_book3s_32.S
+++ b/arch/powerpc/kernel/head_book3s_32.S
@@ -840,7 +840,7 @@ __secondary_start:
 	lwz	r1,TASK_STACK(r1)
 
 	/* stack */
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
 	li	r0,0
 	tophys(r3,r1)
 	stw	r0,0(r3)
@@ -966,7 +966,7 @@ END_MMU_FTR_SECTION_IFCLR(MMU_FTR_HPTE_TABLE)
 	lis	r1,init_thread_union@ha
 	addi	r1,r1,init_thread_union@l
 	li	r0,0
-	stwu	r0,THREAD_SIZE-STACK_FRAME_OVERHEAD(r1)
+	stwu	r0,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r1)
 /*
  * Do early platform-specific initialization,
  * and set up the MMU.
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 9ede61a5a469e..c5b9ce8874834 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -210,7 +210,7 @@ static __always_inline void call_do_softirq(const void *sp)
 		 PPC_LL "	%%r1, 0(%%r1)		;"
 		 : // Outputs
 		 : // Inputs
-		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
 		   [callee] "i" (__do_softirq)
 		 : // Clobbers
 		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
@@ -264,7 +264,7 @@ static __always_inline void call_do_irq(struct pt_regs *regs, void *sp)
 		 : // Outputs
 		   "+r" (r3)
 		 : // Inputs
-		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_OVERHEAD),
+		   [sp] "b" (sp), [offset] "i" (THREAD_SIZE - STACK_FRAME_MIN_SIZE),
 		   [callee] "i" (__do_irq)
 		 : // Clobbers
 		   "lr", "xer", "ctr", "memory", "cr0", "cr1", "cr5", "cr6",
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index e5127b19fec29..daf8f87d23728 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -382,7 +382,7 @@ EXPORT_SYMBOL(__bswapdi2)
 _GLOBAL(start_secondary_resume)
 	/* Reset stack */
 	rlwinm	r1, r1, 0, 0, 31 - THREAD_SHIFT
-	addi	r1,r1,THREAD_SIZE-STACK_FRAME_OVERHEAD
+	addi	r1,r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE
 	li	r3,0
 	stw	r3,0(r1)		/* Zero the stack frame pointer	*/
 	bl	start_secondary
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 36184cada00b1..4bb6dd30c5568 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -384,7 +384,7 @@ _GLOBAL(kexec_sequence)
 	std	r0,16(r1)
 
 	/* switch stacks to newstack -- &kexec_stack.stack */
-	stdu	r1,THREAD_SIZE-STACK_FRAME_OVERHEAD(r3)
+	stdu	r1,THREAD_SIZE-STACK_FRAME_MIN_SIZE(r3)
 	mr	r1,r3
 
 	li	r0,0
@@ -401,7 +401,7 @@ _GLOBAL(kexec_sequence)
 	std	r26,-48(r1)
 	std	r25,-56(r1)
 
-	stdu	r1,-STACK_FRAME_OVERHEAD-64(r1)
+	stdu	r1,-STACK_FRAME_MIN_SIZE-64(r1)
 
 	/* save args into preserved regs */
 	mr	r31,r3			/* newstack (both) */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9446bee8ca32a..edb46d0806ef7 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -2180,7 +2180,7 @@ int validate_sp_size(unsigned long sp, struct task_struct *p,
 
 int validate_sp(unsigned long sp, struct task_struct *p)
 {
-	return validate_sp_size(sp, p, STACK_FRAME_OVERHEAD);
+	return validate_sp_size(sp, p, STACK_FRAME_MIN_SIZE);
 }
 
 static unsigned long ___get_wchan(struct task_struct *p)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 0da6e59161cd4..6b90f10a6c819 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1249,7 +1249,7 @@ static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle)
 #ifdef CONFIG_PPC64
 	paca_ptrs[cpu]->__current = idle;
 	paca_ptrs[cpu]->kstack = (unsigned long)task_stack_page(idle) +
-				 THREAD_SIZE - STACK_FRAME_OVERHEAD;
+				 THREAD_SIZE - STACK_FRAME_MIN_SIZE;
 #endif
 	task_thread_info(idle)->cpu = cpu;
 	secondary_current = current_set[cpu] = idle;
diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 1dbbf30f265e0..5de8597eaab8d 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -94,7 +94,7 @@ int __no_sanitize_address arch_stack_walk_reliable(stack_trace_consume_fn consum
 		 * idle tasks have a custom stack layout,
 		 * c.f. cpu_idle_thread_init().
 		 */
-		stack_end -= STACK_FRAME_OVERHEAD;
+		stack_end -= STACK_FRAME_MIN_SIZE;
 	}
 
 	if (task == current)
-- 
GitLab


From cd52414d5a6ccea6ce956ef05161fe824522a107 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:41 +1000
Subject: [PATCH 3215/4122] powerpc/64: ELFv2 use minimal stack frames in int
 and switch frame sizes

Adjust the ELFv2 interrupt and switch frames to the minimum C ABI size,
plus pt_regs, plus 16 bytes for the aligned regs marker for the int
frame (and the switch frame needs to match that because it uses the same
regs offset as the int frame).

This saves 80 bytes of kernel stack per interrupt. It's the principle of
getting our accounting right that's more important than the practical
saving.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-17-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 412ef07497752..4ab606f390bc4 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -120,16 +120,26 @@ struct pt_regs
 
 #define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
+
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+#define STACK_FRAME_MIN_SIZE	32
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_REGS	(STACK_FRAME_MIN_SIZE + 16)
+#define STACK_INT_FRAME_MARKER	STACK_FRAME_MIN_SIZE
+#define STACK_SWITCH_FRAME_SIZE (sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE + 16)
+#define STACK_SWITCH_FRAME_REGS	(STACK_FRAME_MIN_SIZE + 16)
+#else
+/*
+ * The ELFv1 ABI specifies 48 bytes plus a minimum 64 byte parameter save
+ * area. This parameter area is not used by calls to C from interrupt entry,
+ * so the second from last one of those is used for the frame marker.
+ */
+#define STACK_FRAME_MIN_SIZE	112
 #define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
 #define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
 #define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
 #define STACK_SWITCH_FRAME_REGS	STACK_FRAME_OVERHEAD
-
-#ifdef CONFIG_PPC64_ELF_ABI_V2
-#define STACK_FRAME_MIN_SIZE	32
-#else
-#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
 #endif
 
 /* Size of dummy stack frame allocated when calling signal handler. */
-- 
GitLab


From dfecd06bc5524517ed7737c30eaaf747338b280a Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Sun, 27 Nov 2022 22:49:42 +1000
Subject: [PATCH 3216/4122] powerpc: remove STACK_FRAME_OVERHEAD

This is equal to STACK_FRAME_MIN_SIZE on 32-bit and 64-bit ELFv1, and no
longer used in 64-bit ELFv2, so replace STACK_FRAME_OVERHEAD occurrences
with STACK_FRAME_MIN_SIZE.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221127124942.1665522-18-npiggin@gmail.com
---
 arch/powerpc/include/asm/ptrace.h | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index 4ab606f390bc4..0eb90a0133466 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -118,7 +118,6 @@ struct pt_regs
 #define USER_REDZONE_SIZE	512
 #define KERNEL_REDZONE_SIZE	288
 
-#define STACK_FRAME_OVERHEAD	112	/* size of minimum stack frame */
 #define STACK_FRAME_LR_SAVE	2	/* Location of LR in stack frame */
 
 #ifdef CONFIG_PPC64_ELF_ABI_V2
@@ -135,11 +134,11 @@ struct pt_regs
  * so the second from last one of those is used for the frame marker.
  */
 #define STACK_FRAME_MIN_SIZE	112
-#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
-#define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 16)
-#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_OVERHEAD
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_MIN_SIZE - 16)
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_MIN_SIZE
 #endif
 
 /* Size of dummy stack frame allocated when calling signal handler. */
@@ -150,14 +149,13 @@ struct pt_regs
 
 #define USER_REDZONE_SIZE	0
 #define KERNEL_REDZONE_SIZE	0
-#define STACK_FRAME_OVERHEAD	16	/* size of minimum stack frame */
+#define STACK_FRAME_MIN_SIZE	16
 #define STACK_FRAME_LR_SAVE	1	/* Location of LR in stack frame */
-#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_INT_FRAME_REGS	STACK_FRAME_OVERHEAD
-#define STACK_INT_FRAME_MARKER	(STACK_FRAME_OVERHEAD - 8)
-#define STACK_FRAME_MIN_SIZE	STACK_FRAME_OVERHEAD
-#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_OVERHEAD)
-#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_OVERHEAD
+#define STACK_USER_INT_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_INT_FRAME_REGS	STACK_FRAME_MIN_SIZE
+#define STACK_INT_FRAME_MARKER	(STACK_FRAME_MIN_SIZE - 8)
+#define STACK_SWITCH_FRAME_SIZE	(sizeof(struct pt_regs) + STACK_FRAME_MIN_SIZE)
+#define STACK_SWITCH_FRAME_REGS	STACK_FRAME_MIN_SIZE
 
 /* Size of stack frame allocated when calling signal handler. */
 #define __SIGNAL_FRAMESIZE	64
-- 
GitLab


From 6b34a099faa123488b13caf704562f4dbe483fc4 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Mon, 24 Oct 2022 13:01:50 +1000
Subject: [PATCH 3217/4122] powerpc/64s/hash: add stress_hpt kernel boot option
 to increase hash faults

This option increases the number of hash misses by limiting the number
of kernel HPT entries, by keeping a per-CPU record of the last kernel
HPTEs installed, and removing that from the hash table on the next hash
insertion. A timer round-robins CPUs removing remaining kernel HPTEs and
clearing the TLB (in the case of bare metal) to increase and slightly
randomise kernel fault activity.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Add comment about NR_CPUS usage, fixup whitespace]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221024030150.852517-1-npiggin@gmail.com
---
 .../admin-guide/kernel-parameters.txt         |   5 +
 arch/powerpc/mm/book3s64/hash_4k.c            |   5 +
 arch/powerpc/mm/book3s64/hash_64k.c           |  10 ++
 arch/powerpc/mm/book3s64/hash_utils.c         | 130 +++++++++++++++++-
 arch/powerpc/mm/book3s64/internal.h           |  11 ++
 5 files changed, 160 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a465d5242774a..9f3d256529d01 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1042,6 +1042,11 @@
 			them frequently to increase the rate of SLB faults
 			on kernel addresses.
 
+	stress_hpt	[PPC]
+			Limits the number of kernel HPT entries in the hash
+			page table to increase the rate of hash page table
+			faults on kernel addresses.
+
 	disable=	[IPV6]
 			See Documentation/networking/ipv6.rst.
 
diff --git a/arch/powerpc/mm/book3s64/hash_4k.c b/arch/powerpc/mm/book3s64/hash_4k.c
index 7de1a8a0c62a8..02acbfd05b460 100644
--- a/arch/powerpc/mm/book3s64/hash_4k.c
+++ b/arch/powerpc/mm/book3s64/hash_4k.c
@@ -16,6 +16,8 @@
 #include <asm/machdep.h>
 #include <asm/mmu.h>
 
+#include "internal.h"
+
 int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
 		   pte_t *ptep, unsigned long trap, unsigned long flags,
 		   int ssize, int subpg_prot)
@@ -118,6 +120,9 @@ repeat:
 		}
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, hpte_group);
 	}
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
diff --git a/arch/powerpc/mm/book3s64/hash_64k.c b/arch/powerpc/mm/book3s64/hash_64k.c
index 998c6817ed47b..954af420f3586 100644
--- a/arch/powerpc/mm/book3s64/hash_64k.c
+++ b/arch/powerpc/mm/book3s64/hash_64k.c
@@ -16,6 +16,8 @@
 #include <asm/machdep.h>
 #include <asm/mmu.h>
 
+#include "internal.h"
+
 /*
  * Return true, if the entry has a slot value which
  * the software considers as invalid.
@@ -216,6 +218,9 @@ repeat:
 	new_pte |= pte_set_hidx(ptep, rpte, subpg_index, slot, PTRS_PER_PTE);
 	new_pte |= H_PAGE_HASHPTE;
 
+	if (stress_hpt())
+		hpt_do_stress(ea, hpte_group);
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
 	return 0;
 }
@@ -327,7 +332,12 @@ repeat:
 
 		new_pte = (new_pte & ~_PAGE_HPTEFLAGS) | H_PAGE_HASHPTE;
 		new_pte |= pte_set_hidx(ptep, rpte, 0, slot, PTRS_PER_PTE);
+
+		if (stress_hpt())
+			hpt_do_stress(ea, hpte_group);
 	}
+
 	*ptep = __pte(new_pte & ~H_PAGE_BUSY);
+
 	return 0;
 }
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 6df4c6d38b660..80a148c57de81 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -471,7 +471,7 @@ int htab_remove_mapping(unsigned long vstart, unsigned long vend,
 	return ret;
 }
 
-static bool disable_1tb_segments = false;
+static bool disable_1tb_segments __ro_after_init;
 
 static int __init parse_disable_1tb_segments(char *p)
 {
@@ -480,6 +480,40 @@ static int __init parse_disable_1tb_segments(char *p)
 }
 early_param("disable_1tb_segments", parse_disable_1tb_segments);
 
+bool stress_hpt_enabled __initdata;
+
+static int __init parse_stress_hpt(char *p)
+{
+	stress_hpt_enabled = true;
+	return 0;
+}
+early_param("stress_hpt", parse_stress_hpt);
+
+__ro_after_init DEFINE_STATIC_KEY_FALSE(stress_hpt_key);
+
+/*
+ * per-CPU array allocated if we enable stress_hpt.
+ */
+#define STRESS_MAX_GROUPS 16
+struct stress_hpt_struct {
+	unsigned long last_group[STRESS_MAX_GROUPS];
+};
+
+static inline int stress_nr_groups(void)
+{
+	/*
+	 * LPAR H_REMOVE flushes TLB, so need some number > 1 of entries
+	 * to allow practical forward progress. Bare metal returns 1, which
+	 * seems to help uncover more bugs.
+	 */
+	if (firmware_has_feature(FW_FEATURE_LPAR))
+		return STRESS_MAX_GROUPS;
+	else
+		return 1;
+}
+
+static struct stress_hpt_struct *stress_hpt_struct;
+
 static int __init htab_dt_scan_seg_sizes(unsigned long node,
 					 const char *uname, int depth,
 					 void *data)
@@ -976,6 +1010,23 @@ static void __init hash_init_partition_table(phys_addr_t hash_table,
 	pr_info("Partition table %p\n", partition_tb);
 }
 
+void hpt_clear_stress(void);
+static struct timer_list stress_hpt_timer;
+void stress_hpt_timer_fn(struct timer_list *timer)
+{
+	int next_cpu;
+
+	hpt_clear_stress();
+	if (!firmware_has_feature(FW_FEATURE_LPAR))
+		tlbiel_all();
+
+	next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
+	if (next_cpu >= nr_cpu_ids)
+		next_cpu = cpumask_first(cpu_online_mask);
+	stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+	add_timer_on(&stress_hpt_timer, next_cpu);
+}
+
 static void __init htab_initialize(void)
 {
 	unsigned long table;
@@ -995,6 +1046,20 @@ static void __init htab_initialize(void)
 	if (stress_slb_enabled)
 		static_branch_enable(&stress_slb_key);
 
+	if (stress_hpt_enabled) {
+		unsigned long tmp;
+		static_branch_enable(&stress_hpt_key);
+		// Too early to use nr_cpu_ids, so use NR_CPUS
+		tmp = memblock_phys_alloc_range(sizeof(struct stress_hpt_struct) * NR_CPUS,
+						0, 0, MEMBLOCK_ALLOC_ANYWHERE);
+		memset((void *)tmp, 0xff, sizeof(struct stress_hpt_struct) * NR_CPUS);
+		stress_hpt_struct = __va(tmp);
+
+		timer_setup(&stress_hpt_timer, stress_hpt_timer_fn, 0);
+		stress_hpt_timer.expires = jiffies + msecs_to_jiffies(10);
+		add_timer(&stress_hpt_timer);
+	}
+
 	/*
 	 * Calculate the required size of the htab.  We want the number of
 	 * PTEGs to equal one half the number of real pages.
@@ -1980,6 +2045,69 @@ repeat:
 	return slot;
 }
 
+void hpt_clear_stress(void)
+{
+	int cpu = raw_smp_processor_id();
+	int g;
+
+	for (g = 0; g < stress_nr_groups(); g++) {
+		unsigned long last_group;
+		last_group = stress_hpt_struct[cpu].last_group[g];
+
+		if (last_group != -1UL) {
+			int i;
+			for (i = 0; i < HPTES_PER_GROUP; i++) {
+				if (mmu_hash_ops.hpte_remove(last_group) == -1)
+					break;
+			}
+			stress_hpt_struct[cpu].last_group[g] = -1;
+		}
+	}
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group)
+{
+	unsigned long last_group;
+	int cpu = raw_smp_processor_id();
+
+	last_group = stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1];
+	if (hpte_group == last_group)
+		return;
+
+	if (last_group != -1UL) {
+		int i;
+		/*
+		 * Concurrent CPUs might be inserting into this group, so
+		 * give up after a number of iterations, to prevent a live
+		 * lock.
+		 */
+		for (i = 0; i < HPTES_PER_GROUP; i++) {
+			if (mmu_hash_ops.hpte_remove(last_group) == -1)
+				break;
+		}
+		stress_hpt_struct[cpu].last_group[stress_nr_groups() - 1] = -1;
+	}
+
+	if (ea >= PAGE_OFFSET) {
+		/*
+		 * We would really like to prefetch to get the TLB loaded, then
+		 * remove the PTE before returning from fault interrupt, to
+		 * increase the hash fault rate.
+		 *
+		 * Unfortunately QEMU TCG does not model the TLB in a way that
+		 * makes this possible, and systemsim (mambo) emulator does not
+		 * bring in TLBs with prefetches (although loads/stores do
+		 * work for non-CI PTEs).
+		 *
+		 * So remember this PTE and clear it on the next hash fault.
+		 */
+		memmove(&stress_hpt_struct[cpu].last_group[1],
+			&stress_hpt_struct[cpu].last_group[0],
+			(stress_nr_groups() - 1) * sizeof(unsigned long));
+		stress_hpt_struct[cpu].last_group[0] = hpte_group;
+	}
+}
+
 #if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE)
 static DEFINE_RAW_SPINLOCK(linear_map_hash_lock);
 
diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h
index 5045048ce244d..a57a25f06a215 100644
--- a/arch/powerpc/mm/book3s64/internal.h
+++ b/arch/powerpc/mm/book3s64/internal.h
@@ -13,6 +13,17 @@ static inline bool stress_slb(void)
 	return static_branch_unlikely(&stress_slb_key);
 }
 
+extern bool stress_hpt_enabled;
+
+DECLARE_STATIC_KEY_FALSE(stress_hpt_key);
+
+static inline bool stress_hpt(void)
+{
+	return static_branch_unlikely(&stress_hpt_key);
+}
+
+void hpt_do_stress(unsigned long ea, unsigned long hpte_group);
+
 void slb_setup_new_exec(void);
 
 void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush);
-- 
GitLab


From 5921eb36d2a1b276b16a24e529788550e6a65449 Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Thu, 1 Dec 2022 10:49:57 +0800
Subject: [PATCH 3218/4122] selftests: powerpc: Use "grep -E" instead of
 "egrep"

The latest version of grep claims the egrep is now obsolete so the build
now contains warnings that look like:
	egrep: warning: egrep is obsolescent; using grep -E
fix this using "grep -E" instead.

  sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/testing/selftests/powerpc`

Here are the steps to install the latest grep:

  wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz
  tar xf grep-3.8.tar.gz
  cd grep-3.8 && ./configure && make
  sudo make install
  export PATH=/usr/local/bin:$PATH

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/1669862997-31335-1-git-send-email-yangtiezhu@loongson.cn
---
 tools/testing/selftests/powerpc/scripts/hmi.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
index dcdb392e84275..bcc7b6b650099 100755
--- a/tools/testing/selftests/powerpc/scripts/hmi.sh
+++ b/tools/testing/selftests/powerpc/scripts/hmi.sh
@@ -36,7 +36,7 @@ trap "ppc64_cpu --smt-snooze-delay=100" 0 1
 
 # for each chip+core combination
 # todo - less fragile parsing
-egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
+grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
 while read chipcore; do
 	chip=$(echo "$chipcore"|awk '{print $3}')
 	core=$(echo "$chipcore"|awk '{print $5}')
-- 
GitLab


From aecfd680099ba518c34dff2941017c5aa97def52 Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Mon, 28 Nov 2022 15:19:42 +1100
Subject: [PATCH 3219/4122] selftests/powerpc: Use mfspr/mtspr macros

No need to write inline asm for mtspr/mfspr, we have macros for this
in reg.h

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041948.58339-2-bgray@linux.ibm.com
---
 tools/testing/selftests/powerpc/dscr/dscr.h     | 17 +++++------------
 .../selftests/powerpc/ptrace/ptrace-hwbreak.c   |  6 ++----
 tools/testing/selftests/powerpc/ptrace/ptrace.h |  5 +----
 .../selftests/powerpc/security/flush_utils.c    |  3 ++-
 4 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
index 13e9b9e28e2c7..b703714e7d98c 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr.h
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -23,6 +23,7 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 
+#include "reg.h"
 #include "utils.h"
 
 #define THREADS		100	/* Max threads */
@@ -41,31 +42,23 @@
 /* Prilvilege state DSCR access */
 inline unsigned long get_dscr(void)
 {
-	unsigned long ret;
-
-	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV));
-
-	return ret;
+	return mfspr(SPRN_DSCR_PRIV);
 }
 
 inline void set_dscr(unsigned long val)
 {
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV));
+	mtspr(SPRN_DSCR_PRIV, val);
 }
 
 /* Problem state DSCR access */
 inline unsigned long get_dscr_usr(void)
 {
-	unsigned long ret;
-
-	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR));
-
-	return ret;
+	return mfspr(SPRN_DSCR);
 }
 
 inline void set_dscr_usr(unsigned long val)
 {
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+	mtspr(SPRN_DSCR, val);
 }
 
 /* Default DSCR access */
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index a0635a3819aa4..1345e9b9af0fb 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -23,6 +23,7 @@
 #include <sys/syscall.h>
 #include <linux/limits.h>
 #include "ptrace.h"
+#include "reg.h"
 
 #define SPRN_PVR	0x11F
 #define PVR_8xx		0x00500000
@@ -620,10 +621,7 @@ static int ptrace_hwbreak(void)
 
 int main(int argc, char **argv, char **envp)
 {
-	int pvr = 0;
-	asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR));
-	if (pvr == PVR_8xx)
-		is_8xx = true;
+	is_8xx = mfspr(SPRN_PVR) == PVR_8xx;
 
 	return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
 }
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 4e0233c0f2b3a..04788e5fc5040 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -745,10 +745,7 @@ int show_tm_spr(pid_t child, struct tm_spr_regs *out)
 /* Analyse TEXASR after TM failure */
 inline unsigned long get_tfiar(void)
 {
-	unsigned long ret;
-
-	asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR));
-	return ret;
+	return mfspr(SPRN_TFIAR);
 }
 
 void analyse_texasr(unsigned long texasr)
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
index 4d95965cb751f..9c5c00e04f633 100644
--- a/tools/testing/selftests/powerpc/security/flush_utils.c
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -14,6 +14,7 @@
 #include <string.h>
 #include <stdio.h>
 #include <sys/utsname.h>
+#include "reg.h"
 #include "utils.h"
 #include "flush_utils.h"
 
@@ -79,5 +80,5 @@ void set_dscr(unsigned long val)
 		init = 1;
 	}
 
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+	mtspr(SPRN_DSCR, val);
 }
-- 
GitLab


From 94ba4f2c33f42dae7813dc169a177e922a39560c Mon Sep 17 00:00:00 2001
From: Benjamin Gray <bgray@linux.ibm.com>
Date: Mon, 28 Nov 2022 15:19:43 +1100
Subject: [PATCH 3220/4122] selftests/powerpc: Add ptrace setup_core_pattern()
 null-terminator

- malloc() does not zero the buffer,
- fread() does not null-terminate it's output,
- `cat /proc/sys/kernel/core_pattern | hexdump -C` shows the file is
  not inherently null-terminated

So using string operations on the buffer is risky. Explicitly add a null
character to the end to make it safer.

Signed-off-by: Benjamin Gray <bgray@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221128041948.58339-3-bgray@linux.ibm.com
---
 tools/testing/selftests/powerpc/ptrace/core-pkey.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index 1a70a96f0bfe7..4e8d0ce1ff58f 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -383,7 +383,7 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_)
 		goto out;
 	}
 
-	ret = fread(core_pattern, 1, PATH_MAX, f);
+	ret = fread(core_pattern, 1, PATH_MAX - 1, f);
 	fclose(f);
 	if (!ret) {
 		perror("Error reading core_pattern file");
@@ -391,6 +391,8 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_)
 		goto out;
 	}
 
+	core_pattern[ret] = '\0';
+
 	/* Check whether we can predict the name of the core file. */
 	if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
 		*changed_ = false;
-- 
GitLab


From 4d0eea415216fe3791da2f65eb41399e70c7bedf Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 29 Oct 2022 19:16:26 +0800
Subject: [PATCH 3221/4122] powerpc/83xx/mpc832x_rdb: call
 platform_device_put() in error case in of_fsl_spi_probe()

If platform_device_add() is not called or failed, it can not call
platform_device_del() to clean up memory, it should call
platform_device_put() in error case.

Fixes: 26f6cb999366 ("[POWERPC] fsl_soc: add support for fsl_spi")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221029111626.429971-1-yangyingliang@huawei.com
---
 arch/powerpc/platforms/83xx/mpc832x_rdb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/83xx/mpc832x_rdb.c b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
index e12cb44e717f1..caa96edf0e72a 100644
--- a/arch/powerpc/platforms/83xx/mpc832x_rdb.c
+++ b/arch/powerpc/platforms/83xx/mpc832x_rdb.c
@@ -107,7 +107,7 @@ static int __init of_fsl_spi_probe(char *type, char *compatible, u32 sysclk,
 
 		goto next;
 unreg:
-		platform_device_del(pdev);
+		platform_device_put(pdev);
 err:
 		pr_err("%pOF: registration failed\n", np);
 next:
-- 
GitLab


From 03f7c1d2a49acd30e38789cd809d3300721e9b0e Mon Sep 17 00:00:00 2001
From: Kajol Jain <kjain@linux.ibm.com>
Date: Wed, 30 Nov 2022 23:15:13 +0530
Subject: [PATCH 3222/4122] powerpc/hv-gpci: Fix hv_gpci event list

Based on getPerfCountInfo v1.018 documentation, some of the
hv_gpci events were deprecated for platform firmware that
supports counter_info_version 0x8 or above.

Fix the hv_gpci event list by adding a new attribute group
called "hv_gpci_event_attrs_v6" and a "ENABLE_EVENTS_COUNTERINFO_V6"
macro to enable these events for platform firmware
that supports counter_info_version 0x6 or below. And assigning
the hv_gpci event list based on output counter info version
of underlying plaform.

Fixes: 97bf2640184f ("powerpc/perf/hv-gpci: add the remaining gpci requests")
Signed-off-by: Kajol Jain <kjain@linux.ibm.com>
Reviewed-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Reviewed-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221130174513.87501-1-kjain@linux.ibm.com
---
 arch/powerpc/perf/hv-gpci-requests.h |  4 ++++
 arch/powerpc/perf/hv-gpci.c          | 35 ++++++++++++++++++++++++++--
 arch/powerpc/perf/hv-gpci.h          |  1 +
 arch/powerpc/perf/req-gen/perf.h     | 20 ++++++++++++++++
 4 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/perf/hv-gpci-requests.h b/arch/powerpc/perf/hv-gpci-requests.h
index 8965b4463d433..5e86371a20c78 100644
--- a/arch/powerpc/perf/hv-gpci-requests.h
+++ b/arch/powerpc/perf/hv-gpci-requests.h
@@ -79,6 +79,7 @@ REQUEST(__field(0,	8,	partition_id)
 )
 #include I(REQUEST_END)
 
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
 /*
  * Not available for counter_info_version >= 0x8, use
  * run_instruction_cycles_by_partition(0x100) instead.
@@ -92,6 +93,7 @@ REQUEST(__field(0,	8,	partition_id)
 	__count(0x10,	8,	cycles)
 )
 #include I(REQUEST_END)
+#endif
 
 #define REQUEST_NAME system_performance_capabilities
 #define REQUEST_NUM 0x40
@@ -103,6 +105,7 @@ REQUEST(__field(0,	1,	perf_collect_privileged)
 )
 #include I(REQUEST_END)
 
+#ifdef ENABLE_EVENTS_COUNTERINFO_V6
 #define REQUEST_NAME processor_bus_utilization_abc_links
 #define REQUEST_NUM 0x50
 #define REQUEST_IDX_KIND "hw_chip_id=?"
@@ -194,6 +197,7 @@ REQUEST(__field(0,	4,	phys_processor_idx)
 	__count(0x28,	8,	instructions_completed)
 )
 #include I(REQUEST_END)
+#endif
 
 /* Processor_core_power_mode (0x95) skipped, no counters */
 /* Affinity_domain_information_by_virtual_processor (0xA0) skipped,
diff --git a/arch/powerpc/perf/hv-gpci.c b/arch/powerpc/perf/hv-gpci.c
index 5eb60ed5b5e8a..7ff8ff3509f5f 100644
--- a/arch/powerpc/perf/hv-gpci.c
+++ b/arch/powerpc/perf/hv-gpci.c
@@ -70,9 +70,9 @@ static const struct attribute_group format_group = {
 	.attrs = format_attrs,
 };
 
-static const struct attribute_group event_group = {
+static struct attribute_group event_group = {
 	.name  = "events",
-	.attrs = hv_gpci_event_attrs,
+	/* .attrs is set in init */
 };
 
 #define HV_CAPS_ATTR(_name, _format)				\
@@ -330,6 +330,7 @@ static int hv_gpci_init(void)
 	int r;
 	unsigned long hret;
 	struct hv_perf_caps caps;
+	struct hv_gpci_request_buffer *arg;
 
 	hv_gpci_assert_offsets_correct();
 
@@ -353,6 +354,36 @@ static int hv_gpci_init(void)
 	/* sampling not supported */
 	h_gpci_pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT;
 
+	arg = (void *)get_cpu_var(hv_gpci_reqb);
+	memset(arg, 0, HGPCI_REQ_BUFFER_SIZE);
+
+	/*
+	 * hcall H_GET_PERF_COUNTER_INFO populates the output
+	 * counter_info_version value based on the system hypervisor.
+	 * Pass the counter request 0x10 corresponds to request type
+	 * 'Dispatch_timebase_by_processor', to get the supported
+	 * counter_info_version.
+	 */
+	arg->params.counter_request = cpu_to_be32(0x10);
+
+	r = plpar_hcall_norets(H_GET_PERF_COUNTER_INFO,
+			virt_to_phys(arg), HGPCI_REQ_BUFFER_SIZE);
+	if (r) {
+		pr_devel("hcall failed, can't get supported counter_info_version: 0x%x\n", r);
+		arg->params.counter_info_version_out = 0x8;
+	}
+
+	/*
+	 * Use counter_info_version_out value to assign
+	 * required hv-gpci event list.
+	 */
+	if (arg->params.counter_info_version_out >= 0x8)
+		event_group.attrs = hv_gpci_event_attrs;
+	else
+		event_group.attrs = hv_gpci_event_attrs_v6;
+
+	put_cpu_var(hv_gpci_reqb);
+
 	r = perf_pmu_register(&h_gpci_pmu, h_gpci_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/hv-gpci.h b/arch/powerpc/perf/hv-gpci.h
index 4d108262bed79..c72020912dea5 100644
--- a/arch/powerpc/perf/hv-gpci.h
+++ b/arch/powerpc/perf/hv-gpci.h
@@ -26,6 +26,7 @@ enum {
 #define REQUEST_FILE "../hv-gpci-requests.h"
 #define NAME_LOWER hv_gpci
 #define NAME_UPPER HV_GPCI
+#define ENABLE_EVENTS_COUNTERINFO_V6
 #include "req-gen/perf.h"
 #undef REQUEST_FILE
 #undef NAME_LOWER
diff --git a/arch/powerpc/perf/req-gen/perf.h b/arch/powerpc/perf/req-gen/perf.h
index fa9bc804e67af..6b2a59fefffa7 100644
--- a/arch/powerpc/perf/req-gen/perf.h
+++ b/arch/powerpc/perf/req-gen/perf.h
@@ -139,6 +139,26 @@ PMU_EVENT_ATTR_STRING(							\
 #define REQUEST_(r_name, r_value, r_idx_1, r_fields)			\
 	r_fields
 
+/* Generate event list for platforms with counter_info_version 0x6 or below */
+static __maybe_unused struct attribute *hv_gpci_event_attrs_v6[] = {
+#include REQUEST_FILE
+	NULL
+};
+
+/*
+ * Based on getPerfCountInfo v1.018 documentation, some of the hv-gpci
+ * events were deprecated for platform firmware that supports
+ * counter_info_version 0x8 or above.
+ * Those deprecated events are still part of platform firmware that
+ * support counter_info_version 0x6 and below. As per the getPerfCountInfo
+ * v1.018 documentation there is no counter_info_version 0x7.
+ * Undefining macro ENABLE_EVENTS_COUNTERINFO_V6, to disable the addition of
+ * deprecated events in "hv_gpci_event_attrs" attribute group, for platforms
+ * that supports counter_info_version 0x8 or above.
+ */
+#undef ENABLE_EVENTS_COUNTERINFO_V6
+
+/* Generate event list for platforms with counter_info_version 0x8 or above*/
 static __maybe_unused struct attribute *hv_gpci_event_attrs[] = {
 #include REQUEST_FILE
 	NULL
-- 
GitLab


From 0e23347f1e0f2b1c98f87a4088231d0d6f59b962 Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:13 +1100
Subject: [PATCH 3223/4122] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS
 Kconfig

Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Acked-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-1-rmclure@linux.ibm.com
---
 arch/powerpc/Kconfig | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index fe2aa445b654e..aec1431be06e3 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -534,6 +534,15 @@ config HOTPLUG_CPU
 
 	  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+	bool "Clear gprs on interrupt arrival"
+	depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+	default PPC_BOOK3E_64
+	help
+	  Reduce the influence of user register state on interrupt handlers and
+	  syscalls through clearing user state from registers before handling
+	  the exception.
+
 config PPC_QUEUED_SPINLOCKS
 	bool "Queued spinlocks" if EXPERT
 	depends on SMP
-- 
GitLab


From cbf892ba56677b942020d2bc7ca9b79281fa0bcc Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:14 +1100
Subject: [PATCH 3224/4122] powerpc/64: Add interrupt register sanitisation
 macros

Include in asm/ppc_asm.h macros to be used in multiple successive
patches to implement zeroising architected registers in interrupt
handlers. Registers will be sanitised in this fashion in future patches
to reduce the speculation influence of user-controlled register values.
These mitigations will be configurable through the
CONFIG_INTERRUPT_SANITIZE_REGISTERS Kconfig option.

Included are macros for conditionally zeroising registers and restoring
as required with the mitigation enabled. With the mitigation disabled,
non-volatiles must be restored on demand at separate locations to
those required by the mitigation.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-2-rmclure@linux.ibm.com
---
 arch/powerpc/include/asm/ppc_asm.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 753a2757bcd4f..d2f44612f4b02 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -74,6 +74,25 @@
 #define SAVE_GPR(n, base)		SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)		REST_GPRS(n, n, base)
 
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_SYSCALL_GPRS()			ZEROIZE_GPR(0);		\
+						ZEROIZE_GPRS(5, 12);	\
+						ZEROIZE_NVGPRS()
+#define SANITIZE_GPR(n)				ZEROIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)		ZEROIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()			ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()		REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_SYSCALL_GPRS()
+#define SANITIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()		REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 #define SAVE_FPR(n, base)	stfd	n,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-- 
GitLab


From 75c5d6b1e194c341371639469fcb8691afa0e254 Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:15 +1100
Subject: [PATCH 3225/4122] powerpc/64: Sanitise common exit code for
 interrupts

Interrupt code is shared between Book3E/S 64-bit systems for interrupt
handlers. Ensure that exit code correctly restores non-volatile gprs on
each system when CONFIG_INTERRUPT_SANITIZE_REGISTERS is enabled.

Also introduce macros for clearing/restoring registers on interrupt
entry for when this configuration option is either disabled or enabled.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-3-rmclure@linux.ibm.com
---
 arch/powerpc/kernel/interrupt_64.S | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index 321992c1c9f90..dd04b0ba3959a 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -408,9 +408,11 @@ interrupt_return_\srr\()_user: /* make backtraces match the _kernel variant */
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	interrupt_exit_user_prepare
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
 	cmpdi	r3,0
 	bne-	.Lrestore_nvgprs_\srr
 .Lrestore_nvgprs_\srr\()_cont:
+#endif
 	std	r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
@@ -424,6 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
 	stb	r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
 
 .Lfast_user_interrupt_return_\srr\():
+	SANITIZE_RESTORE_NVGPRS()
 #ifdef CONFIG_PPC_BOOK3S
 	.ifc \srr,srr
 	lbz	r4,PACASRR_VALID(r13)
@@ -493,9 +496,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 	b	.	/* prevent speculative execution */
 .Linterrupt_return_\srr\()_user_rst_end:
 
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
 .Lrestore_nvgprs_\srr\():
 	REST_NVGPRS(r1)
 	b	.Lrestore_nvgprs_\srr\()_cont
+#endif
 
 #ifdef CONFIG_PPC_BOOK3S
 interrupt_return_\srr\()_user_restart:
@@ -585,6 +590,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
 	stb	r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
 
 .Lfast_kernel_interrupt_return_\srr\():
+	SANITIZE_RESTORE_NVGPRS()
 	cmpdi	cr1,r3,0
 #ifdef CONFIG_PPC_BOOK3S
 	.ifc \srr,srr
-- 
GitLab


From 2487fd2e6d61b5293eed8ecd25add3cc78593d38 Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:16 +1100
Subject: [PATCH 3226/4122] powerpc/64s: IOption for MSR stored in r12

Interrupt handlers in asm/exceptions-64s.S contain a great deal of common
code produced by the GEN_COMMON macros. Currently, at the exit point of
the macro, r12 will contain the contents of the MSR. A future patch will
cause these macros to zeroise architected registers to avoid potential
speculation influence of user data.

Provide an IOption that signals that r12 must be retained, as the
interrupt handler assumes it to hold the contents of the MSR.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-4-rmclure@linux.ibm.com
---
 arch/powerpc/kernel/exceptions-64s.S | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index ac3b0580224ee..42b7c3212f296 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -111,6 +111,7 @@ name:
 #define ISTACK		.L_ISTACK_\name\()	/* Set regular kernel stack */
 #define __ISTACK(name)	.L_ISTACK_ ## name
 #define IKUAP		.L_IKUAP_\name\()	/* Do KUAP lock */
+#define IMSR_R12	.L_IMSR_R12_\name\()	/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)						\
 .macro int_define_ ## n name
@@ -176,6 +177,9 @@ do_define_int n
 	.ifndef IKUAP
 		IKUAP=1
 	.endif
+	.ifndef IMSR_R12
+		IMSR_R12=0
+	.endif
 .endm
 
 /*
@@ -1751,6 +1755,7 @@ INT_DEFINE_BEGIN(fp_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	IMSR_R12=1
 INT_DEFINE_END(fp_unavailable)
 
 EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
@@ -2384,6 +2389,7 @@ INT_DEFINE_BEGIN(altivec_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	IMSR_R12=1
 INT_DEFINE_END(altivec_unavailable)
 
 EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
@@ -2433,6 +2439,7 @@ INT_DEFINE_BEGIN(vsx_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 	IKVM_REAL=1
 #endif
+	IMSR_R12=1
 INT_DEFINE_END(vsx_unavailable)
 
 EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
-- 
GitLab


From 1df45d78b8a89da6544fab5267e8f5da15073d28 Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:17 +1100
Subject: [PATCH 3227/4122] powerpc/64s: Zeroise gprs on interrupt routine
 entry on Book3S

Zeroise user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values. If an interrupt does not
select the IMSR_R12 IOption, zeroise r12.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored on 64bit Book3S. Remove explicit
REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear
user registers on interrupt, and continue to depend on the return value
of interrupt_exit_user_prepare to determine whether or not to restore
non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is able to be enabled/disabled through
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-5-rmclure@linux.ibm.com
---
 arch/powerpc/kernel/exceptions-64s.S | 27 ++++++++++++++++++---------
 arch/powerpc/kernel/interrupt_64.S   | 16 ++++++++++++++--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 42b7c3212f296..429096b037d7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -506,6 +506,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
 	std	r10,0(r1)		/* make stack chain pointer	*/
 	std	r0,GPR0(r1)		/* save r0 in stackframe	*/
 	std	r10,GPR1(r1)		/* save r1 in stackframe	*/
+	SANITIZE_GPR(0)
 
 	/* Mark our [H]SRRs valid for return */
 	li	r10,1
@@ -548,8 +549,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	std	r9,GPR11(r1)
 	std	r10,GPR12(r1)
 	std	r11,GPR13(r1)
+	.if !IMSR_R12
+	SANITIZE_GPRS(9, 12)
+	.else
+	SANITIZE_GPRS(9, 11)
+	.endif
 
 	SAVE_NVGPRS(r1)
+	SANITIZE_NVGPRS()
 
 	.if IDAR
 	.if IISIDE
@@ -581,8 +588,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	ld	r10,IAREA+EX_CTR(r13)
 	std	r10,_CTR(r1)
-	std	r2,GPR2(r1)		/* save r2 in stackframe	*/
-	SAVE_GPRS(3, 8, r1)		/* save r3 - r8 in stackframe   */
+	SAVE_GPRS(2, 8, r1)		/* save r2 - r8 in stackframe   */
+	SANITIZE_GPRS(2, 8)
 	mflr	r9			/* Get LR, later save to stack	*/
 	LOAD_PACA_TOC()			/* get kernel TOC into r2	*/
 	std	r9,_LINK(r1)
@@ -700,6 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
 	mtlr	r9
 	ld	r9,_CCR(r1)
 	mtcr	r9
+	SANITIZE_RESTORE_NVGPRS()
 	REST_GPRS(2, 13, r1)
 	REST_GPR(0, r1)
 	/* restore original r1. */
@@ -1445,7 +1453,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 	 * do_break() may have changed the NV GPRS while handling a breakpoint.
 	 * If so, we need to restore them with their updated values.
 	 */
-	REST_NVGPRS(r1)
+	HANDLER_RESTORE_NVGPRS()
 	b	interrupt_return_srr
 
 
@@ -1671,7 +1679,7 @@ EXC_COMMON_BEGIN(alignment_common)
 	GEN_COMMON alignment
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	alignment_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -1737,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)
 .Ldo_program_check:
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	program_check_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -2169,7 +2177,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
 	GEN_COMMON emulation_assist
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	emulation_assist_interrupt
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
 	b	interrupt_return_hsrr
 
 
@@ -2501,7 +2509,7 @@ EXC_COMMON_BEGIN(facility_unavailable_common)
 	GEN_COMMON facility_unavailable
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	facility_unavailable_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
 	b	interrupt_return_srr
 
 
@@ -2529,7 +2537,8 @@ EXC_COMMON_BEGIN(h_facility_unavailable_common)
 	GEN_COMMON h_facility_unavailable
 	addi	r3,r1,STACK_INT_FRAME_REGS
 	bl	facility_unavailable_exception
-	REST_NVGPRS(r1) /* XXX Shouldn't be necessary in practice */
+	/* XXX Shouldn't be necessary in practice */
+	HANDLER_RESTORE_NVGPRS()
 	b	interrupt_return_hsrr
 
 
@@ -2755,7 +2764,7 @@ EXC_COMMON_BEGIN(altivec_assist_common)
 	addi	r3,r1,STACK_INT_FRAME_REGS
 #ifdef CONFIG_ALTIVEC
 	bl	altivec_assist_exception
-	REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+	HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
 #else
 	bl	unknown_exception
 #endif
diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S
index dd04b0ba3959a..fccc34489add8 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -96,6 +96,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	 * but this is the best we can do.
 	 */
 
+	/*
+	 * Zero user registers to prevent influencing speculative execution
+	 * state of kernel code.
+	 */
+	SANITIZE_SYSCALL_GPRS()
 	bl	system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -124,6 +129,7 @@ BEGIN_FTR_SECTION
 	HMT_MEDIUM_LOW
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
+	SANITIZE_RESTORE_NVGPRS()
 	cmpdi	r3,0
 	bne	.Lsyscall_vectored_\name\()_restore_regs
 
@@ -159,7 +165,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 	ld	r4,_LINK(r1)
 	ld	r5,_XER(r1)
 
-	REST_NVGPRS(r1)
+	HANDLER_RESTORE_NVGPRS()
 	REST_GPR(0, r1)
 	mtcr	r2
 	mtctr	r3
@@ -275,6 +281,11 @@ END_BTB_FLUSH_SECTION
 	wrteei	1
 #endif
 
+	/*
+	 * Zero user registers to prevent influencing speculative execution
+	 * state of kernel code.
+	 */
+	SANITIZE_SYSCALL_GPRS()
 	bl	system_call_exception
 
 .Lsyscall_exit:
@@ -315,6 +326,7 @@ BEGIN_FTR_SECTION
 	stdcx.	r0,0,r1			/* to clear the reservation */
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
+	SANITIZE_RESTORE_NVGPRS()
 	cmpdi	r3,0
 	bne	.Lsyscall_restore_regs
 	/* Zero volatile regs that may contain sensitive kernel data */
@@ -342,7 +354,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 .Lsyscall_restore_regs:
 	ld	r3,_CTR(r1)
 	ld	r4,_XER(r1)
-	REST_NVGPRS(r1)
+	HANDLER_RESTORE_NVGPRS()
 	mtctr	r3
 	mtspr	SPRN_XER,r4
 	REST_GPR(0, r1)
-- 
GitLab


From efe1691ac814e4cf3653538b701662cbd905bddc Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:18 +1100
Subject: [PATCH 3228/4122] powerpc/64e: Clear gprs on interrupt routine entry
 on Book3E

Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-6-rmclure@linux.ibm.com
---
 arch/powerpc/kernel/exceptions-64e.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index b9cec22df9f97..3f86091e68b3b 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,7 +358,6 @@ ret_from_mc_except:
 	std	r14,PACA_EXMC+EX_R14(r13);				    \
 	std	r15,PACA_EXMC+EX_R15(r13)
 
-
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf)				    \
 exc_##n##_common:							    \
@@ -394,7 +393,8 @@ exc_##n##_common:							    \
 	std	r12,STACK_INT_FRAME_MARKER(r1); /* mark the frame */	    \
 	std	r3,_TRAP(r1);		/* set trap number		*/  \
 	std	r0,RESULT(r1);		/* clear regs->result */	    \
-	SAVE_NVGPRS(r1);
+	SAVE_NVGPRS(r1);						    \
+	SANITIZE_NVGPRS();		/* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
 	EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
GitLab


From 7cd882df9485988f7d9b3fae04fde4e95a4c7a74 Mon Sep 17 00:00:00 2001
From: Rohan McLure <rmclure@linux.ibm.com>
Date: Thu, 1 Dec 2022 18:10:19 +1100
Subject: [PATCH 3229/4122] powerpc/64: Sanitise user registers on interrupt in
 pseries, POWERNV

Cause pseries and POWERNV platforms to default to zeroising all potentially
user-defined registers when entering the kernel by means of any interrupt
source, reducing user-influence of the kernel and the likelihood or
producing speculation gadgets.

Acked-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Rohan McLure <rmclure@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201071019.1953023-7-rmclure@linux.ibm.com
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index aec1431be06e3..e21d6de797d6e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -537,7 +537,7 @@ config HOTPLUG_CPU
 config INTERRUPT_SANITIZE_REGISTERS
 	bool "Clear gprs on interrupt arrival"
 	depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
-	default PPC_BOOK3E_64
+	default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
 	help
 	  Reduce the influence of user register state on interrupt handlers and
 	  syscalls through clearing user state from registers before handling
-- 
GitLab


From ad050d2390fccb22aa3e6f65e11757ce7a5a7ca5 Mon Sep 17 00:00:00 2001
From: Michael Jeanson <mjeanson@efficios.com>
Date: Thu, 1 Dec 2022 11:14:42 -0500
Subject: [PATCH 3230/4122] powerpc/ftrace: fix syscall tracing on
 PPC64_ELF_ABI_V1

In v5.7 the powerpc syscall entry/exit logic was rewritten in C, on
PPC64_ELF_ABI_V1 this resulted in the symbols in the syscall table
changing from their dot prefixed variant to the non-prefixed ones.

Since ftrace prefixes a dot to the syscall names when matching them to
build its syscall event list, this resulted in no syscall events being
available.

Remove the PPC64_ELF_ABI_V1 specific version of
arch_syscall_match_sym_name to have the same behavior across all powerpc
variants.

Fixes: 68b34588e202 ("powerpc/64/sycall: Implement syscall entry/exit logic in C")
Cc: stable@vger.kernel.org # v5.7+
Signed-off-by: Michael Jeanson <mjeanson@efficios.com>
Reviewed-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221201161442.2127231-1-mjeanson@efficios.com
---
 arch/powerpc/include/asm/ftrace.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h
index ade406dc6504e..441c5f08258b5 100644
--- a/arch/powerpc/include/asm/ftrace.h
+++ b/arch/powerpc/include/asm/ftrace.h
@@ -71,17 +71,6 @@ void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
  * those.
  */
 #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME
-#ifdef CONFIG_PPC64_ELF_ABI_V1
-static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
-{
-	/* We need to skip past the initial dot, and the __se_sys alias */
-	return !strcmp(sym + 1, name) ||
-		(!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) ||
-		(!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) ||
-		(!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) ||
-		(!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4));
-}
-#else
 static inline bool arch_syscall_match_sym_name(const char *sym, const char *name)
 {
 	return !strcmp(sym, name) ||
@@ -90,7 +79,6 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
 		(!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) ||
 		(!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4));
 }
-#endif /* CONFIG_PPC64_ELF_ABI_V1 */
 #endif /* CONFIG_FTRACE_SYSCALLS */
 
 #if defined(CONFIG_PPC64) && defined(CONFIG_FUNCTION_TRACER)
-- 
GitLab


From 31f81401e23fb88cc030cd586abd28740e6c8136 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 21 Nov 2022 19:27:34 +0800
Subject: [PATCH 3231/4122] crypto: qat - fix error return code in adf_probe

Fix to return a negative error code -EINVAL instead of 0.

Fixes: 0cec19c761e5 ("crypto: qat - add support for compression for 4xxx")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Acked-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_4xxx/adf_drv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index 2f212561acc47..670a58b25cb16 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -261,6 +261,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw_data->accel_capabilities_mask = hw_data->get_accel_cap(accel_dev);
 	if (!hw_data->accel_capabilities_mask) {
 		dev_err(&pdev->dev, "Failed to get capabilities mask.\n");
+		ret = -EINVAL;
 		goto out_err;
 	}
 
-- 
GitLab


From 6a83830f649a614aca445bbcadbd582c7929e63d Mon Sep 17 00:00:00 2001
From: Nikolaus Voss <nikolaus.voss@haag-streit.com>
Date: Mon, 21 Nov 2022 15:12:41 +0100
Subject: [PATCH 3232/4122] crypto: caam - warn if blob_gen key is insecure

If CAAM is not in "trusted" or "secure" state, a fixed non-volatile key
is used instead of the unique device key. This is the default mode of
operation without secure boot (HAB). In this scenario, CAAM encrypted
blobs should be used only for testing but not in a production
environment, so issue a warning.

Signed-off-by: Nikolaus Voss <nikolaus.voss@haag-streit.com>
Reviewed-by: Ahmad Fatoum <a.fatoum@pengutronix.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/blob_gen.c | 9 +++++++++
 drivers/crypto/caam/regs.h     | 3 +++
 2 files changed, 12 insertions(+)

diff --git a/drivers/crypto/caam/blob_gen.c b/drivers/crypto/caam/blob_gen.c
index 6345c7269eb03..1f65df4898478 100644
--- a/drivers/crypto/caam/blob_gen.c
+++ b/drivers/crypto/caam/blob_gen.c
@@ -6,6 +6,7 @@
 
 #define pr_fmt(fmt) "caam blob_gen: " fmt
 
+#include <linux/bitfield.h>
 #include <linux/device.h>
 #include <soc/fsl/caam-blob.h>
 
@@ -61,12 +62,14 @@ static void caam_blob_job_done(struct device *dev, u32 *desc, u32 err, void *con
 int caam_process_blob(struct caam_blob_priv *priv,
 		      struct caam_blob_info *info, bool encap)
 {
+	const struct caam_drv_private *ctrlpriv;
 	struct caam_blob_job_result testres;
 	struct device *jrdev = &priv->jrdev;
 	dma_addr_t dma_in, dma_out;
 	int op = OP_PCLID_BLOB;
 	size_t output_len;
 	u32 *desc;
+	u32 moo;
 	int ret;
 
 	if (info->key_mod_len > CAAM_BLOB_KEYMOD_LENGTH)
@@ -100,6 +103,12 @@ int caam_process_blob(struct caam_blob_priv *priv,
 		goto out_unmap_in;
 	}
 
+	ctrlpriv = dev_get_drvdata(jrdev->parent);
+	moo = FIELD_GET(CSTA_MOO, ioread32(&ctrlpriv->ctrl->perfmon.status));
+	if (moo != CSTA_MOO_SECURE && moo != CSTA_MOO_TRUSTED)
+		dev_warn(jrdev,
+			 "using insecure test key, enable HAB to use unique device key!\n");
+
 	/*
 	 * A data blob is encrypted using a blob key (BK); a random number.
 	 * The BK is used as an AES-CCM key. The initial block (B0) and the
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 66d6dad841bb2..66928f8a0c4b1 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -426,6 +426,9 @@ struct caam_perfmon {
 	u32 rsvd2;
 #define CSTA_PLEND		BIT(10)
 #define CSTA_ALT_PLEND		BIT(18)
+#define CSTA_MOO		GENMASK(9, 8)
+#define CSTA_MOO_SECURE	1
+#define CSTA_MOO_TRUSTED	2
 	u32 status;		/* CSTA - CAAM Status */
 	u64 rsvd3;
 
-- 
GitLab


From 5b11d1a360ea23c80c6d4ec3f5986a788d0a0995 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 13:53:38 +0800
Subject: [PATCH 3233/4122] crypto: rsa-pkcs1pad - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/rsa-pkcs1pad.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/crypto/rsa-pkcs1pad.c b/crypto/rsa-pkcs1pad.c
index 3285e3af43e14..3bc76edb3f8a9 100644
--- a/crypto/rsa-pkcs1pad.c
+++ b/crypto/rsa-pkcs1pad.c
@@ -579,6 +579,10 @@ static int pkcs1pad_init_tfm(struct crypto_akcipher *tfm)
 		return PTR_ERR(child_tfm);
 
 	ctx->child = child_tfm;
+
+	akcipher_set_reqsize(tfm, sizeof(struct pkcs1pad_request) +
+				  crypto_akcipher_reqsize(child_tfm));
+
 	return 0;
 }
 
@@ -674,7 +678,6 @@ static int pkcs1pad_create(struct crypto_template *tmpl, struct rtattr **tb)
 	inst->alg.set_pub_key = pkcs1pad_set_pub_key;
 	inst->alg.set_priv_key = pkcs1pad_set_priv_key;
 	inst->alg.max_size = pkcs1pad_get_max_size;
-	inst->alg.reqsize = sizeof(struct pkcs1pad_request) + rsa_alg->reqsize;
 
 	inst->free = pkcs1pad_free;
 
-- 
GitLab


From bd71e0dced921e00599052b445f9a9f7916a5452 Mon Sep 17 00:00:00 2001
From: Yushan Zhou <katrinzhou@tencent.com>
Date: Tue, 22 Nov 2022 15:49:00 +0800
Subject: [PATCH 3234/4122] crypto: marvell/octeontx - remove redundant NULL
 check

release_firmware() checks whether firmware pointer is NULL. Remove the redundant NULL check in release_tar_archive().

Signed-off-by: Yushan Zhou <katrinzhou@tencent.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
index df9c2b8747e6d..c4250e5fcf8f7 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptpf_ucode.c
@@ -345,8 +345,7 @@ static void release_tar_archive(struct tar_arch_info_t *tar_arch)
 		kfree(curr);
 	}
 
-	if (tar_arch->fw)
-		release_firmware(tar_arch->fw);
+	release_firmware(tar_arch->fw);
 	kfree(tar_arch);
 }
 
-- 
GitLab


From 56861cbde1b9f3b34d300e6ba87f2c3de1a9c309 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 17:24:01 +0800
Subject: [PATCH 3235/4122] crypto: kpp - Add helper to set reqsize

The value of reqsize should only be changed through a helper.
To do so we need to first add a helper for this.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/kpp.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 9cb0662ebe871..31ff3c1986ef0 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -50,6 +50,12 @@ static inline void *kpp_request_ctx(struct kpp_request *req)
 	return req->__ctx;
 }
 
+static inline void kpp_set_reqsize(struct crypto_kpp *kpp,
+				   unsigned int reqsize)
+{
+	crypto_kpp_alg(kpp)->reqsize = reqsize;
+}
+
 static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm)
 {
 	return tfm->base.__crt_ctx;
-- 
GitLab


From 5ba78373561f12d23c975b0a154104a07866f94b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 17:28:38 +0800
Subject: [PATCH 3236/4122] crypto: hisilicon/hpre - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Longfang Liu <liulongfang@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index ef02dadd62170..5f6d363c9435e 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -740,6 +740,8 @@ static int hpre_dh_init_tfm(struct crypto_kpp *tfm)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+
 	return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
 }
 
@@ -1165,6 +1167,9 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm)
 		return PTR_ERR(ctx->rsa.soft_tfm);
 	}
 
+	akcipher_set_reqsize(tfm, sizeof(struct hpre_asym_request) +
+				  HPRE_ALIGN_SZ);
+
 	ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
 	if (ret)
 		crypto_free_akcipher(ctx->rsa.soft_tfm);
@@ -1617,6 +1622,8 @@ static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P192;
 
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
 
@@ -1626,6 +1633,8 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P256;
 
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
 
@@ -1635,6 +1644,8 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P384;
 
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
 
@@ -1961,6 +1972,8 @@ static int hpre_curve25519_init_tfm(struct crypto_kpp *tfm)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
 
@@ -1981,7 +1994,6 @@ static struct akcipher_alg rsa = {
 	.max_size = hpre_rsa_max_size,
 	.init = hpre_rsa_init_tfm,
 	.exit = hpre_rsa_exit_tfm,
-	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 	.base = {
 		.cra_ctxsize = sizeof(struct hpre_ctx),
 		.cra_priority = HPRE_CRYPTO_ALG_PRI,
@@ -1998,7 +2010,6 @@ static struct kpp_alg dh = {
 	.max_size = hpre_dh_max_size,
 	.init = hpre_dh_init_tfm,
 	.exit = hpre_dh_exit_tfm,
-	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 	.base = {
 		.cra_ctxsize = sizeof(struct hpre_ctx),
 		.cra_priority = HPRE_CRYPTO_ALG_PRI,
@@ -2016,7 +2027,6 @@ static struct kpp_alg ecdh_curves[] = {
 		.max_size = hpre_ecdh_max_size,
 		.init = hpre_ecdh_nist_p192_init_tfm,
 		.exit = hpre_ecdh_exit_tfm,
-		.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 		.base = {
 			.cra_ctxsize = sizeof(struct hpre_ctx),
 			.cra_priority = HPRE_CRYPTO_ALG_PRI,
@@ -2031,7 +2041,6 @@ static struct kpp_alg ecdh_curves[] = {
 		.max_size = hpre_ecdh_max_size,
 		.init = hpre_ecdh_nist_p256_init_tfm,
 		.exit = hpre_ecdh_exit_tfm,
-		.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 		.base = {
 			.cra_ctxsize = sizeof(struct hpre_ctx),
 			.cra_priority = HPRE_CRYPTO_ALG_PRI,
@@ -2046,7 +2055,6 @@ static struct kpp_alg ecdh_curves[] = {
 		.max_size = hpre_ecdh_max_size,
 		.init = hpre_ecdh_nist_p384_init_tfm,
 		.exit = hpre_ecdh_exit_tfm,
-		.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 		.base = {
 			.cra_ctxsize = sizeof(struct hpre_ctx),
 			.cra_priority = HPRE_CRYPTO_ALG_PRI,
@@ -2064,7 +2072,6 @@ static struct kpp_alg curve25519_alg = {
 	.max_size = hpre_curve25519_max_size,
 	.init = hpre_curve25519_init_tfm,
 	.exit = hpre_curve25519_exit_tfm,
-	.reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
 	.base = {
 		.cra_ctxsize = sizeof(struct hpre_ctx),
 		.cra_priority = HPRE_CRYPTO_ALG_PRI,
-- 
GitLab


From 80e62ad58db084920d8cf23323b713391e09f374 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 17:30:58 +0800
Subject: [PATCH 3237/4122] crypto: qat - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_asym_algs.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c
index 94a26702aeae1..935a7e012946e 100644
--- a/drivers/crypto/qat/qat_common/qat_asym_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c
@@ -494,6 +494,8 @@ static int qat_dh_init_tfm(struct crypto_kpp *tfm)
 	if (!inst)
 		return -EINVAL;
 
+	kpp_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64);
+
 	ctx->p_size = 0;
 	ctx->g2 = false;
 	ctx->inst = inst;
@@ -1230,6 +1232,8 @@ static int qat_rsa_init_tfm(struct crypto_akcipher *tfm)
 	if (!inst)
 		return -EINVAL;
 
+	akcipher_set_reqsize(tfm, sizeof(struct qat_asym_request) + 64);
+
 	ctx->key_sz = 0;
 	ctx->inst = inst;
 	return 0;
@@ -1252,7 +1256,6 @@ static struct akcipher_alg rsa = {
 	.max_size = qat_rsa_max_size,
 	.init = qat_rsa_init_tfm,
 	.exit = qat_rsa_exit_tfm,
-	.reqsize = sizeof(struct qat_asym_request) + 64,
 	.base = {
 		.cra_name = "rsa",
 		.cra_driver_name = "qat-rsa",
@@ -1269,7 +1272,6 @@ static struct kpp_alg dh = {
 	.max_size = qat_dh_max_size,
 	.init = qat_dh_init_tfm,
 	.exit = qat_dh_exit_tfm,
-	.reqsize = sizeof(struct qat_asym_request) + 64,
 	.base = {
 		.cra_name = "dh",
 		.cra_driver_name = "qat-dh",
-- 
GitLab


From 908d383b6c94be0f89c5e2a5a346d99495efd4d4 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 17:40:51 +0800
Subject: [PATCH 3238/4122] crypto: caam - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Reviewed-by: Gaurav Jain <gaurav.jain@nxp.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caampkc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 8867275767101..642846693d7c0 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -1099,6 +1099,8 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 {
 	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
 
+	akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx));
+
 	ctx->dev = caam_jr_alloc();
 
 	if (IS_ERR(ctx->dev)) {
@@ -1141,7 +1143,6 @@ static struct caam_akcipher_alg caam_rsa = {
 		.max_size = caam_rsa_max_size,
 		.init = caam_rsa_init_tfm,
 		.exit = caam_rsa_exit_tfm,
-		.reqsize = sizeof(struct caam_rsa_req_ctx),
 		.base = {
 			.cra_name = "rsa",
 			.cra_driver_name = "rsa-caam",
-- 
GitLab


From 93c446cd36a410b31519af7a2dd32e899cc03d06 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 17:42:26 +0800
Subject: [PATCH 3239/4122] crypto: virtio - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: Gonglei <arei.gonglei@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/virtio/virtio_crypto_akcipher_algs.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
index 168195672e2e1..b2979be613b8f 100644
--- a/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_akcipher_algs.c
@@ -479,6 +479,9 @@ static int virtio_crypto_rsa_init_tfm(struct crypto_akcipher *tfm)
 	ctx->enginectx.op.prepare_request = NULL;
 	ctx->enginectx.op.unprepare_request = NULL;
 
+	akcipher_set_reqsize(tfm,
+			     sizeof(struct virtio_crypto_akcipher_request));
+
 	return 0;
 }
 
@@ -505,7 +508,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = {
 			.max_size = virtio_crypto_rsa_max_size,
 			.init = virtio_crypto_rsa_init_tfm,
 			.exit = virtio_crypto_rsa_exit_tfm,
-			.reqsize = sizeof(struct virtio_crypto_akcipher_request),
 			.base = {
 				.cra_name = "rsa",
 				.cra_driver_name = "virtio-crypto-rsa",
@@ -528,7 +530,6 @@ static struct virtio_crypto_akcipher_algo virtio_crypto_akcipher_algs[] = {
 			.max_size = virtio_crypto_rsa_max_size,
 			.init = virtio_crypto_rsa_init_tfm,
 			.exit = virtio_crypto_rsa_exit_tfm,
-			.reqsize = sizeof(struct virtio_crypto_akcipher_request),
 			.base = {
 				.cra_name = "pkcs1pad(rsa,sha1)",
 				.cra_driver_name = "virtio-pkcs1-rsa-with-sha1",
-- 
GitLab


From 3e71e5b0efcc730216f4450b796df4fdd627ecd0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 18:03:35 +0800
Subject: [PATCH 3240/4122] crypto: akcipher - Move reqsize into tfm

The value of reqsize cannot be determined in case of fallbacks.
Therefore it must be stored in the tfm and not the alg object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/akcipher.h          | 7 ++++---
 include/crypto/internal/akcipher.h | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/crypto/akcipher.h b/include/crypto/akcipher.h
index 5764b46bd1ec1..734c213918bda 100644
--- a/include/crypto/akcipher.h
+++ b/include/crypto/akcipher.h
@@ -43,9 +43,12 @@ struct akcipher_request {
  * struct crypto_akcipher - user-instantiated objects which encapsulate
  * algorithms and core processing logic
  *
+ * @reqsize:	Request context size required by algorithm implementation
  * @base:	Common crypto API algorithm data structure
  */
 struct crypto_akcipher {
+	unsigned int reqsize;
+
 	struct crypto_tfm base;
 };
 
@@ -86,7 +89,6 @@ struct crypto_akcipher {
  *		counterpart to @init, used to remove various changes set in
  *		@init.
  *
- * @reqsize:	Request context size required by algorithm implementation
  * @base:	Common crypto API algorithm data structure
  */
 struct akcipher_alg {
@@ -102,7 +104,6 @@ struct akcipher_alg {
 	int (*init)(struct crypto_akcipher *tfm);
 	void (*exit)(struct crypto_akcipher *tfm);
 
-	unsigned int reqsize;
 	struct crypto_alg base;
 };
 
@@ -155,7 +156,7 @@ static inline struct akcipher_alg *crypto_akcipher_alg(
 
 static inline unsigned int crypto_akcipher_reqsize(struct crypto_akcipher *tfm)
 {
-	return crypto_akcipher_alg(tfm)->reqsize;
+	return tfm->reqsize;
 }
 
 static inline void akcipher_request_set_tfm(struct akcipher_request *req,
diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h
index 8d3220c9ab772..1474a2d890fc4 100644
--- a/include/crypto/internal/akcipher.h
+++ b/include/crypto/internal/akcipher.h
@@ -36,7 +36,7 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req)
 static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher,
 					unsigned int reqsize)
 {
-	crypto_akcipher_alg(akcipher)->reqsize = reqsize;
+	akcipher->reqsize = reqsize;
 }
 
 static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm)
-- 
GitLab


From cb99fc0dd1f6985e8b6ade93e3d69f5e33930539 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 18:06:56 +0800
Subject: [PATCH 3241/4122] crypto: dh - Use helper to set reqsize

The value of reqsize must only be changed through the helper.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/dh.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/crypto/dh.c b/crypto/dh.c
index 99c3b2ef7adca..e39c1bde1ac07 100644
--- a/crypto/dh.c
+++ b/crypto/dh.c
@@ -318,6 +318,9 @@ static int dh_safe_prime_init_tfm(struct crypto_kpp *tfm)
 	if (IS_ERR(tfm_ctx->dh_tfm))
 		return PTR_ERR(tfm_ctx->dh_tfm);
 
+	kpp_set_reqsize(tfm, sizeof(struct kpp_request) +
+			     crypto_kpp_reqsize(tfm_ctx->dh_tfm));
+
 	return 0;
 }
 
@@ -593,7 +596,6 @@ static int __maybe_unused __dh_safe_prime_create(
 	inst->alg.max_size = dh_safe_prime_max_size;
 	inst->alg.init = dh_safe_prime_init_tfm;
 	inst->alg.exit = dh_safe_prime_exit_tfm;
-	inst->alg.reqsize = sizeof(struct kpp_request) + dh_alg->reqsize;
 	inst->alg.base.cra_priority = dh_alg->base.cra_priority;
 	inst->alg.base.cra_module = THIS_MODULE;
 	inst->alg.base.cra_ctxsize = sizeof(struct dh_safe_prime_tfm_ctx);
-- 
GitLab


From 4d2b225a67e6df962bbeaad473bfd8f97cfbf478 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 22 Nov 2022 18:09:16 +0800
Subject: [PATCH 3242/4122] crypto: kpp - Move reqsize into tfm

The value of reqsize cannot be determined in case of fallbacks.
Therefore it must be stored in the tfm and not the alg object.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/kpp.h | 2 +-
 include/crypto/kpp.h          | 9 +++++----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 31ff3c1986ef0..167662407e36b 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -53,7 +53,7 @@ static inline void *kpp_request_ctx(struct kpp_request *req)
 static inline void kpp_set_reqsize(struct crypto_kpp *kpp,
 				   unsigned int reqsize)
 {
-	crypto_kpp_alg(kpp)->reqsize = reqsize;
+	kpp->reqsize = reqsize;
 }
 
 static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm)
diff --git a/include/crypto/kpp.h b/include/crypto/kpp.h
index 24d01e9877c12..33ff32878802a 100644
--- a/include/crypto/kpp.h
+++ b/include/crypto/kpp.h
@@ -37,9 +37,13 @@ struct kpp_request {
  * struct crypto_kpp - user-instantiated object which encapsulate
  * algorithms and core processing logic
  *
+ * @reqsize:		Request context size required by algorithm
+ *			implementation
  * @base:	Common crypto API algorithm data structure
  */
 struct crypto_kpp {
+	unsigned int reqsize;
+
 	struct crypto_tfm base;
 };
 
@@ -64,8 +68,6 @@ struct crypto_kpp {
  *			put in place here.
  * @exit:		Undo everything @init did.
  *
- * @reqsize:		Request context size required by algorithm
- *			implementation
  * @base:		Common crypto API algorithm data structure
  */
 struct kpp_alg {
@@ -79,7 +81,6 @@ struct kpp_alg {
 	int (*init)(struct crypto_kpp *tfm);
 	void (*exit)(struct crypto_kpp *tfm);
 
-	unsigned int reqsize;
 	struct crypto_alg base;
 };
 
@@ -128,7 +129,7 @@ static inline struct kpp_alg *crypto_kpp_alg(struct crypto_kpp *tfm)
 
 static inline unsigned int crypto_kpp_reqsize(struct crypto_kpp *tfm)
 {
-	return crypto_kpp_alg(tfm)->reqsize;
+	return tfm->reqsize;
 }
 
 static inline void kpp_request_set_tfm(struct kpp_request *req,
-- 
GitLab


From 3d780c8a9850ad60dee47a8d971ba7888f3d1bd3 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 22 Nov 2022 22:56:19 +0100
Subject: [PATCH 3243/4122] crypto: amlogic - Remove kcalloc without check

There is no real point in allocating dedicated memory for the irqs array.
MAXFLOW is only 2, so it is easier to allocated the needed space
directly within the 'meson_dev' structure.

This saves some memory allocation and avoids an indirection when using the
irqs array.

Fixes: 48fe583fe541 ("crypto: amlogic - Add crypto accelerator...")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/amlogic/amlogic-gxl-core.c | 1 -
 drivers/crypto/amlogic/amlogic-gxl.h      | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/crypto/amlogic/amlogic-gxl-core.c b/drivers/crypto/amlogic/amlogic-gxl-core.c
index 6e7ae896717cd..937187027ad57 100644
--- a/drivers/crypto/amlogic/amlogic-gxl-core.c
+++ b/drivers/crypto/amlogic/amlogic-gxl-core.c
@@ -237,7 +237,6 @@ static int meson_crypto_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	mc->irqs = devm_kcalloc(mc->dev, MAXFLOW, sizeof(int), GFP_KERNEL);
 	for (i = 0; i < MAXFLOW; i++) {
 		mc->irqs[i] = platform_get_irq(pdev, i);
 		if (mc->irqs[i] < 0)
diff --git a/drivers/crypto/amlogic/amlogic-gxl.h b/drivers/crypto/amlogic/amlogic-gxl.h
index dc0f142324a3c..8c0746a1d6d43 100644
--- a/drivers/crypto/amlogic/amlogic-gxl.h
+++ b/drivers/crypto/amlogic/amlogic-gxl.h
@@ -95,7 +95,7 @@ struct meson_dev {
 	struct device *dev;
 	struct meson_flow *chanlist;
 	atomic_t flow;
-	int *irqs;
+	int irqs[MAXFLOW];
 #ifdef CONFIG_CRYPTO_DEV_AMLOGIC_GXL_DEBUG
 	struct dentry *dbgfs_dir;
 #endif
-- 
GitLab


From c390c452ebeb44cb979b7374d3acc3859415e86c Mon Sep 17 00:00:00 2001
From: Joe Fradley <joefradley@google.com>
Date: Tue, 22 Nov 2022 14:54:49 -0800
Subject: [PATCH 3244/4122] crypto: x86/curve25519 - disable gcov

curve25519-x86_64.c fails to build when CONFIG_GCOV_KERNEL is enabled.
The error is "inline assembly requires more registers than available"
thrown from the `fsqr()` function. Therefore, excluding this file from
GCOV profiling until this issue is resolved. Thereby allowing
CONFIG_GCOV_PROFILE_ALL to be enabled for x86.

Signed-off-by: Joe Fradley <joefradley@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/x86/crypto/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 3b1d701a4f6c5..3e7a329235bdb 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -107,3 +107,6 @@ quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $< > $@
 $(obj)/%.S: $(src)/%.pl FORCE
 	$(call if_changed,perlasm)
+
+# Disable GCOV in odd or sensitive code
+GCOV_PROFILE_curve25519-x86_64.o := n
-- 
GitLab


From 7bcceb4c9896b1b672b636ae70fe75110d6bf1ad Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 24 Nov 2022 14:49:40 +0800
Subject: [PATCH 3245/4122] crypto: omap-sham - Use pm_runtime_resume_and_get()
 in omap_sham_probe()

omap_sham_probe() calls pm_runtime_get_sync() and calls
pm_runtime_put_sync() latter to put usage_counter. However,
pm_runtime_get_sync() will increment usage_counter even it failed. Fix
it by replacing it with pm_runtime_resume_and_get() to keep usage
counter balanced.

Fixes: b359f034c8bf ("crypto: omap-sham - Convert to use pm_runtime API")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Acked-by: Mark Greer <mgreer@animalcreek.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/omap-sham.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index 655a7f5a406a1..cbeda59c6b191 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -2114,7 +2114,7 @@ static int omap_sham_probe(struct platform_device *pdev)
 
 	pm_runtime_enable(dev);
 
-	err = pm_runtime_get_sync(dev);
+	err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
 		dev_err(dev, "failed to get sync: %d\n", err);
 		goto err_pm;
-- 
GitLab


From 14386d471322a204344ae81a28738b71e261d3a0 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:28 +0800
Subject: [PATCH 3246/4122] crypto: Prepare to move crypto_tfm_ctx

The helper crypto_tfm_ctx is only used by the Crypto API algorithm
code and should really be in algapi.h.  However, for historical
reasons many files relied on it to be in crypto.h.  This patch
changes those files to use algapi.h instead in prepartion for a
move.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/aes-cipher-glue.c      | 2 +-
 arch/arm64/crypto/aes-ce-glue.c        | 2 +-
 arch/arm64/crypto/aes-cipher-glue.c    | 2 +-
 arch/arm64/crypto/sm4-ce-cipher-glue.c | 2 +-
 arch/x86/crypto/twofish_glue.c         | 2 +-
 crypto/aes_generic.c                   | 2 +-
 crypto/aes_ti.c                        | 2 +-
 crypto/anubis.c                        | 2 +-
 crypto/blowfish_common.c               | 3 ++-
 crypto/blowfish_generic.c              | 3 ++-
 crypto/camellia_generic.c              | 2 +-
 crypto/cast5_generic.c                 | 2 +-
 crypto/cast6_generic.c                 | 2 +-
 crypto/des_generic.c                   | 2 +-
 crypto/fcrypt.c                        | 2 +-
 crypto/khazad.c                        | 2 +-
 crypto/seed.c                          | 2 +-
 crypto/serpent_generic.c               | 2 +-
 crypto/sm4_generic.c                   | 2 +-
 crypto/tea.c                           | 2 +-
 crypto/twofish_common.c                | 2 +-
 crypto/twofish_generic.c               | 2 +-
 drivers/crypto/nx/nx-842.h             | 2 +-
 include/crypto/aria.h                  | 2 +-
 include/crypto/internal/acompress.h    | 2 ++
 include/crypto/internal/scompress.h    | 3 ++-
 26 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
index 8cd00f56800e7..6dfaef2d8f913 100644
--- a/arch/arm/crypto/aes-cipher-glue.c
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -7,7 +7,7 @@
  */
 
 #include <crypto/aes.h>
-#include <linux/crypto.h>
+#include <crypto/algapi.h>
 #include <linux/module.h>
 
 asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index 56a5f6f0b0c12..e921823ca103a 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -9,9 +9,9 @@
 #include <asm/simd.h>
 #include <asm/unaligned.h>
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <crypto/internal/simd.h>
 #include <linux/cpufeature.h>
-#include <linux/crypto.h>
 #include <linux/module.h>
 
 #include "aes-ce-setkey.h"
diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c
index 8caf6dfefce88..4ec55e568941c 100644
--- a/arch/arm64/crypto/aes-cipher-glue.c
+++ b/arch/arm64/crypto/aes-cipher-glue.c
@@ -6,7 +6,7 @@
  */
 
 #include <crypto/aes.h>
-#include <linux/crypto.h>
+#include <crypto/algapi.h>
 #include <linux/module.h>
 
 asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
diff --git a/arch/arm64/crypto/sm4-ce-cipher-glue.c b/arch/arm64/crypto/sm4-ce-cipher-glue.c
index 76a34ef4abbbf..c31d76fb5a177 100644
--- a/arch/arm64/crypto/sm4-ce-cipher-glue.c
+++ b/arch/arm64/crypto/sm4-ce-cipher-glue.c
@@ -2,11 +2,11 @@
 
 #include <asm/neon.h>
 #include <asm/simd.h>
+#include <crypto/algapi.h>
 #include <crypto/sm4.h>
 #include <crypto/internal/simd.h>
 #include <linux/module.h>
 #include <linux/cpufeature.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 
 MODULE_ALIAS_CRYPTO("sm4");
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index f9c4adc274040..0614beece2793 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -38,8 +38,8 @@
  * Third Edition.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/twofish.h>
-#include <linux/crypto.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/types.h>
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index 27ab279318137..666474b81c6aa 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -48,11 +48,11 @@
  */
 
 #include <crypto/aes.h>
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 205c2c257d492..a3b342f92fab6 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -6,7 +6,7 @@
  */
 
 #include <crypto/aes.h>
-#include <linux/crypto.h>
+#include <crypto/algapi.h>
 #include <linux/module.h>
 
 static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 5da0241ef453c..9f0cf61bbc6e2 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -29,11 +29,11 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/byteorder.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 
 #define ANUBIS_MIN_KEY_SIZE	16
diff --git a/crypto/blowfish_common.c b/crypto/blowfish_common.c
index 1c072012baff4..c0208ce269a33 100644
--- a/crypto/blowfish_common.c
+++ b/crypto/blowfish_common.c
@@ -14,11 +14,12 @@
  * Copyright (c) Kyle McMartin <kyle@debian.org>
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  */
+
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/byteorder.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 #include <crypto/blowfish.h>
 
diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c
index 003b52c6880ea..0e74c7242e77e 100644
--- a/crypto/blowfish_generic.c
+++ b/crypto/blowfish_generic.c
@@ -11,11 +11,12 @@
  * Copyright (c) Kyle McMartin <kyle@debian.org>
  * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
  */
+
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/unaligned.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 #include <crypto/blowfish.h>
 
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index fd1a88af9e77f..c04670cf51acf 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -9,7 +9,7 @@
  *  https://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html
  */
 
-#include <linux/crypto.h>
+#include <crypto/algapi.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index 0257c14cefc25..085a1eedae03b 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -14,8 +14,8 @@
 
 
 #include <asm/unaligned.h>
+#include <crypto/algapi.h>
 #include <linux/init.h>
-#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index 75346380aa0bc..34f1ab53e3a71 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -11,8 +11,8 @@
 
 
 #include <asm/unaligned.h>
+#include <crypto/algapi.h>
 #include <linux/init.h>
-#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/string.h>
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index c85354a5e94c7..1274e18d3eb90 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -8,11 +8,11 @@
  */
 
 #include <asm/byteorder.h>
+#include <crypto/algapi.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 
 #include <crypto/internal/des.h>
 
diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c
index 76a04d000c0d3..95a16e88899ba 100644
--- a/crypto/fcrypt.c
+++ b/crypto/fcrypt.c
@@ -43,10 +43,10 @@
  */
 
 #include <asm/byteorder.h>
+#include <crypto/algapi.h>
 #include <linux/bitops.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
 
 #define ROUNDS 16
 
diff --git a/crypto/khazad.c b/crypto/khazad.c
index f19339954c89e..70cafe73f9740 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -19,11 +19,11 @@
  *
  */
 
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/byteorder.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 
 #define KHAZAD_KEY_SIZE		16
diff --git a/crypto/seed.c b/crypto/seed.c
index 27720140820ef..d0506ade2a5f8 100644
--- a/crypto/seed.c
+++ b/crypto/seed.c
@@ -8,11 +8,11 @@
  * Copyright (C) 2007 Korea Information Security Agency (KISA).
  */
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <asm/byteorder.h>
 
 #define SEED_NUM_KCONSTANTS	16
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 45f98b750053e..c6bca47931e21 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -7,11 +7,11 @@
  * Copyright (C) 2002 Dag Arne Osvik <osvik@ii.uib.no>
  */
 
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <asm/unaligned.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 #include <crypto/serpent.h>
 
diff --git a/crypto/sm4_generic.c b/crypto/sm4_generic.c
index 4a6480a27fee5..560eba37dc55e 100644
--- a/crypto/sm4_generic.c
+++ b/crypto/sm4_generic.c
@@ -7,12 +7,12 @@
  * All rights reserved.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/sm4.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
diff --git a/crypto/tea.c b/crypto/tea.c
index 02efc5d816903..896f863f3067c 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -14,11 +14,11 @@
  * Copyright (c) 2004 Aaron Grothe ajgrothe@yahoo.com
  */
 
+#include <crypto/algapi.h>
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/byteorder.h>
-#include <linux/crypto.h>
 #include <linux/types.h>
 
 #define TEA_KEY_SIZE		16
diff --git a/crypto/twofish_common.c b/crypto/twofish_common.c
index f921f30334f48..bf4f28742f770 100644
--- a/crypto/twofish_common.c
+++ b/crypto/twofish_common.c
@@ -25,9 +25,9 @@
  * Third Edition.
  */
 
+#include <crypto/algapi.h>
 #include <crypto/twofish.h>
 #include <linux/bitops.h>
-#include <linux/crypto.h>
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index 86b2f067a4162..557915e4062d7 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -25,12 +25,12 @@
  */
 
 #include <asm/unaligned.h>
+#include <crypto/algapi.h>
 #include <crypto/twofish.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <linux/bitops.h>
 
 /* Macros to compute the g() function in the encryption and decryption
diff --git a/drivers/crypto/nx/nx-842.h b/drivers/crypto/nx/nx-842.h
index b66f19ac600f2..7590bfb24d79b 100644
--- a/drivers/crypto/nx/nx-842.h
+++ b/drivers/crypto/nx/nx-842.h
@@ -3,10 +3,10 @@
 #ifndef __NX_842_H__
 #define __NX_842_H__
 
+#include <crypto/algapi.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/crypto.h>
 #include <linux/of.h>
 #include <linux/slab.h>
 #include <linux/io.h>
diff --git a/include/crypto/aria.h b/include/crypto/aria.h
index 254da46cc385c..73295146be119 100644
--- a/include/crypto/aria.h
+++ b/include/crypto/aria.h
@@ -18,11 +18,11 @@
 #ifndef _CRYPTO_ARIA_H
 #define _CRYPTO_ARIA_H
 
+#include <crypto/algapi.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/errno.h>
-#include <linux/crypto.h>
 #include <asm/byteorder.h>
 
 #define ARIA_MIN_KEY_SIZE	16
diff --git a/include/crypto/internal/acompress.h b/include/crypto/internal/acompress.h
index cfc47e18820fb..49339003bd2ce 100644
--- a/include/crypto/internal/acompress.h
+++ b/include/crypto/internal/acompress.h
@@ -8,7 +8,9 @@
  */
 #ifndef _CRYPTO_ACOMP_INT_H
 #define _CRYPTO_ACOMP_INT_H
+
 #include <crypto/acompress.h>
+#include <crypto/algapi.h>
 
 /*
  * Transform internal helpers.
diff --git a/include/crypto/internal/scompress.h b/include/crypto/internal/scompress.h
index f834274c2493f..252cc949d4ee5 100644
--- a/include/crypto/internal/scompress.h
+++ b/include/crypto/internal/scompress.h
@@ -8,7 +8,8 @@
  */
 #ifndef _CRYPTO_SCOMP_INT_H
 #define _CRYPTO_SCOMP_INT_H
-#include <linux/crypto.h>
+
+#include <crypto/algapi.h>
 
 #define SCOMP_SCRATCH_SIZE	131072
 
-- 
GitLab


From e634ac4a8aaab37bdc69177df9b40acf92eccc6d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:31 +0800
Subject: [PATCH 3247/4122] crypto: api - Add crypto_tfm_ctx_dma

This patch adds the helpers crypto_tfm_ctx_aligned and
crypto_tfm_ctx_dma.  The first aligns the tfm context to the
value cra_alignmask.  The second sets the alignment according
to dma_cache_get_alignment();

This patch also moves crypto_tfm_ctx into algapi.h.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 41 +++++++++++++++++++++++++++++++++++++++--
 include/linux/crypto.h  |  5 -----
 2 files changed, 39 insertions(+), 7 deletions(-)

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index f50c5d1725da5..4c99eb66e654c 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -7,6 +7,7 @@
 #ifndef _CRYPTO_ALGAPI_H
 #define _CRYPTO_ALGAPI_H
 
+#include <asm/cache.h>
 #include <linux/align.h>
 #include <linux/crypto.h>
 #include <linux/kconfig.h>
@@ -25,6 +26,14 @@
 #define MAX_CIPHER_BLOCKSIZE		16
 #define MAX_CIPHER_ALIGNMASK		15
 
+#ifdef ARCH_DMA_MINALIGN
+#define CRYPTO_DMA_ALIGN ARCH_DMA_MINALIGN
+#else
+#define CRYPTO_DMA_ALIGN CRYPTO_MINALIGN
+#endif
+
+#define CRYPTO_DMA_PADDING ((CRYPTO_DMA_ALIGN - 1) & ~(CRYPTO_MINALIGN - 1))
+
 struct crypto_aead;
 struct crypto_instance;
 struct module;
@@ -189,10 +198,38 @@ static inline void crypto_xor_cpy(u8 *dst, const u8 *src1, const u8 *src2,
 	}
 }
 
+static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
+{
+	return tfm->__crt_ctx;
+}
+
+static inline void *crypto_tfm_ctx_align(struct crypto_tfm *tfm,
+					 unsigned int align)
+{
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(crypto_tfm_ctx(tfm), align);
+}
+
 static inline void *crypto_tfm_ctx_aligned(struct crypto_tfm *tfm)
 {
-	return PTR_ALIGN(crypto_tfm_ctx(tfm),
-			 crypto_tfm_alg_alignmask(tfm) + 1);
+	return crypto_tfm_ctx_align(tfm, crypto_tfm_alg_alignmask(tfm) + 1);
+}
+
+static inline unsigned int crypto_dma_align(void)
+{
+	return CRYPTO_DMA_ALIGN;
+}
+
+static inline unsigned int crypto_dma_padding(void)
+{
+	return (crypto_dma_align() - 1) & ~(crypto_tfm_ctx_alignment() - 1);
+}
+
+static inline void *crypto_tfm_ctx_dma(struct crypto_tfm *tfm)
+{
+	return crypto_tfm_ctx_align(tfm, crypto_dma_align());
 }
 
 static inline struct crypto_instance *crypto_tfm_alg_instance(
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 2324ab6f1846b..5d1e961f810ec 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -714,11 +714,6 @@ static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags)
 	tfm->crt_flags &= ~flags;
 }
 
-static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm)
-{
-	return tfm->__crt_ctx;
-}
-
 static inline unsigned int crypto_tfm_ctx_alignment(void)
 {
 	struct crypto_tfm *tfm;
-- 
GitLab


From f8e4d1d0ac832de8efc98f302acf9476bbfffb55 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:33 +0800
Subject: [PATCH 3248/4122] crypto: aead - Add ctx helpers with DMA alignment

This patch adds helpers to access the aead context structure and
request context structure with an added alignment for DMA access.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/aead.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/include/crypto/internal/aead.h b/include/crypto/internal/aead.h
index d482017f3e203..cd8cb1e921b7b 100644
--- a/include/crypto/internal/aead.h
+++ b/include/crypto/internal/aead.h
@@ -39,6 +39,11 @@ static inline void *crypto_aead_ctx(struct crypto_aead *tfm)
 	return crypto_tfm_ctx(&tfm->base);
 }
 
+static inline void *crypto_aead_ctx_dma(struct crypto_aead *tfm)
+{
+	return crypto_tfm_ctx_dma(&tfm->base);
+}
+
 static inline struct crypto_instance *aead_crypto_instance(
 	struct aead_instance *inst)
 {
@@ -65,6 +70,16 @@ static inline void *aead_request_ctx(struct aead_request *req)
 	return req->__ctx;
 }
 
+static inline void *aead_request_ctx_dma(struct aead_request *req)
+{
+	unsigned int align = crypto_dma_align();
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(aead_request_ctx(req), align);
+}
+
 static inline void aead_request_complete(struct aead_request *req, int err)
 {
 	req->base.complete(&req->base, err);
@@ -108,6 +123,13 @@ static inline void crypto_aead_set_reqsize(struct crypto_aead *aead,
 	aead->reqsize = reqsize;
 }
 
+static inline void crypto_aead_set_reqsize_dma(struct crypto_aead *aead,
+					       unsigned int reqsize)
+{
+	reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1);
+	aead->reqsize = reqsize;
+}
+
 static inline void aead_init_queue(struct aead_queue *queue,
 				   unsigned int max_qlen)
 {
-- 
GitLab


From b5f755fbd5d1102104c502ae213e9b42478c098f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:35 +0800
Subject: [PATCH 3249/4122] crypto: hash - Add ctx helpers with DMA alignment

This patch adds helpers to access the ahash context structure and
request context structure with an added alignment for DMA access.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/hash.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 0a288dddcf5be..1a2a41b79253c 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -140,6 +140,11 @@ static inline void *crypto_ahash_ctx(struct crypto_ahash *tfm)
 	return crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 }
 
+static inline void *crypto_ahash_ctx_dma(struct crypto_ahash *tfm)
+{
+	return crypto_tfm_ctx_dma(crypto_ahash_tfm(tfm));
+}
+
 static inline struct ahash_alg *__crypto_ahash_alg(struct crypto_alg *alg)
 {
 	return container_of(__crypto_hash_alg_common(alg), struct ahash_alg,
@@ -152,6 +157,13 @@ static inline void crypto_ahash_set_reqsize(struct crypto_ahash *tfm,
 	tfm->reqsize = reqsize;
 }
 
+static inline void crypto_ahash_set_reqsize_dma(struct crypto_ahash *ahash,
+						unsigned int reqsize)
+{
+	reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1);
+	ahash->reqsize = reqsize;
+}
+
 static inline struct crypto_instance *ahash_crypto_instance(
 	struct ahash_instance *inst)
 {
@@ -175,6 +187,16 @@ static inline void *ahash_instance_ctx(struct ahash_instance *inst)
 	return crypto_instance_ctx(ahash_crypto_instance(inst));
 }
 
+static inline void *ahash_request_ctx_dma(struct ahash_request *req)
+{
+	unsigned int align = crypto_dma_align();
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(ahash_request_ctx(req), align);
+}
+
 static inline void ahash_request_complete(struct ahash_request *req, int err)
 {
 	req->base.complete(&req->base, err);
-- 
GitLab


From 12658ac5e612214023c26f0689e6bbe8bbea0871 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:37 +0800
Subject: [PATCH 3250/4122] crypto: skcipher - Add ctx helpers with DMA
 alignment

This patch adds helpers to access the skcipher context structure and
request context structure with an added alignment for DMA access.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/skcipher.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index 2a97540156bbf..06d0a5491cf38 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -130,6 +130,13 @@ static inline void crypto_skcipher_set_reqsize(
 	skcipher->reqsize = reqsize;
 }
 
+static inline void crypto_skcipher_set_reqsize_dma(
+	struct crypto_skcipher *skcipher, unsigned int reqsize)
+{
+	reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1);
+	skcipher->reqsize = reqsize;
+}
+
 int crypto_register_skcipher(struct skcipher_alg *alg);
 void crypto_unregister_skcipher(struct skcipher_alg *alg);
 int crypto_register_skciphers(struct skcipher_alg *algs, int count);
@@ -159,11 +166,26 @@ static inline void *crypto_skcipher_ctx(struct crypto_skcipher *tfm)
 	return crypto_tfm_ctx(&tfm->base);
 }
 
+static inline void *crypto_skcipher_ctx_dma(struct crypto_skcipher *tfm)
+{
+	return crypto_tfm_ctx_dma(&tfm->base);
+}
+
 static inline void *skcipher_request_ctx(struct skcipher_request *req)
 {
 	return req->__ctx;
 }
 
+static inline void *skcipher_request_ctx_dma(struct skcipher_request *req)
+{
+	unsigned int align = crypto_dma_align();
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(skcipher_request_ctx(req), align);
+}
+
 static inline u32 skcipher_request_flags(struct skcipher_request *req)
 {
 	return req->base.flags;
-- 
GitLab


From 1c799571976da15e055f32a0e244697500e97f64 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:39 +0800
Subject: [PATCH 3251/4122] crypto: api - Increase MAX_ALGAPI_ALIGNMASK to 127

Previously we limited the maximum alignment mask to 63.  This
is mostly due to stack usage for shash.  This patch introduces
a separate limit for shash algorithms and increases the general
limit to 127 which is the value that we need for DMA allocations
on arm64.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/shash.c          | 9 +++++++--
 include/crypto/algapi.h | 2 +-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/crypto/shash.c b/crypto/shash.c
index 0f85431588267..868b6ba2b3b74 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -18,6 +18,8 @@
 
 #include "internal.h"
 
+#define MAX_SHASH_ALIGNMASK 63
+
 static const struct crypto_type crypto_shash_type;
 
 int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
@@ -88,7 +90,7 @@ static int shash_update_unaligned(struct shash_desc *desc, const u8 *data,
 	 * We cannot count on __aligned() working for large values:
 	 * https://patchwork.kernel.org/patch/9507697/
 	 */
-	u8 ubuf[MAX_ALGAPI_ALIGNMASK * 2];
+	u8 ubuf[MAX_SHASH_ALIGNMASK * 2];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
@@ -130,7 +132,7 @@ static int shash_final_unaligned(struct shash_desc *desc, u8 *out)
 	 * We cannot count on __aligned() working for large values:
 	 * https://patchwork.kernel.org/patch/9507697/
 	 */
-	u8 ubuf[MAX_ALGAPI_ALIGNMASK + HASH_MAX_DIGESTSIZE];
+	u8 ubuf[MAX_SHASH_ALIGNMASK + HASH_MAX_DIGESTSIZE];
 	u8 *buf = PTR_ALIGN(&ubuf[0], alignmask + 1);
 	int err;
 
@@ -524,6 +526,9 @@ static int shash_prepare_alg(struct shash_alg *alg)
 	    alg->statesize > HASH_MAX_STATESIZE)
 		return -EINVAL;
 
+	if (base->cra_alignmask > MAX_SHASH_ALIGNMASK)
+		return -EINVAL;
+
 	if ((alg->export && !alg->import) || (alg->import && !alg->export))
 		return -EINVAL;
 
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 4c99eb66e654c..8722fd67f40ae 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -22,7 +22,7 @@
  * algs and architectures. Ciphers have a lower maximum size.
  */
 #define MAX_ALGAPI_BLOCKSIZE		160
-#define MAX_ALGAPI_ALIGNMASK		63
+#define MAX_ALGAPI_ALIGNMASK		127
 #define MAX_CIPHER_BLOCKSIZE		16
 #define MAX_CIPHER_ALIGNMASK		15
 
-- 
GitLab


From 4ac3377645e98d319cb5404e72d40a4aa69d252c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:41 +0800
Subject: [PATCH 3252/4122] crypto: akcipher - Add ctx helpers with DMA
 alignment

This patch adds helpers to access the akcipher context structure and
request context structure with an added alignment for DMA access.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/akcipher.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/crypto/internal/akcipher.h b/include/crypto/internal/akcipher.h
index 1474a2d890fc4..aaf1092b93b80 100644
--- a/include/crypto/internal/akcipher.h
+++ b/include/crypto/internal/akcipher.h
@@ -33,15 +33,37 @@ static inline void *akcipher_request_ctx(struct akcipher_request *req)
 	return req->__ctx;
 }
 
+static inline void *akcipher_request_ctx_dma(struct akcipher_request *req)
+{
+	unsigned int align = crypto_dma_align();
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(akcipher_request_ctx(req), align);
+}
+
 static inline void akcipher_set_reqsize(struct crypto_akcipher *akcipher,
 					unsigned int reqsize)
 {
 	akcipher->reqsize = reqsize;
 }
 
+static inline void akcipher_set_reqsize_dma(struct crypto_akcipher *akcipher,
+					    unsigned int reqsize)
+{
+	reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1);
+	akcipher->reqsize = reqsize;
+}
+
 static inline void *akcipher_tfm_ctx(struct crypto_akcipher *tfm)
 {
-	return tfm->base.__crt_ctx;
+	return crypto_tfm_ctx(&tfm->base);
+}
+
+static inline void *akcipher_tfm_ctx_dma(struct crypto_akcipher *tfm)
+{
+	return crypto_tfm_ctx_dma(&tfm->base);
 }
 
 static inline void akcipher_request_complete(struct akcipher_request *req,
-- 
GitLab


From a5a49249effb6f03086214b25719d415cc867b3d Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:43 +0800
Subject: [PATCH 3253/4122] crypto: kpp - Add ctx helpers with DMA alignment

This patch adds helpers to access the kpp context structure and
request context structure with an added alignment for DMA access.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/internal/kpp.h | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

diff --git a/include/crypto/internal/kpp.h b/include/crypto/internal/kpp.h
index 167662407e36b..3c9726e89f537 100644
--- a/include/crypto/internal/kpp.h
+++ b/include/crypto/internal/kpp.h
@@ -50,15 +50,37 @@ static inline void *kpp_request_ctx(struct kpp_request *req)
 	return req->__ctx;
 }
 
+static inline void *kpp_request_ctx_dma(struct kpp_request *req)
+{
+	unsigned int align = crypto_dma_align();
+
+	if (align <= crypto_tfm_ctx_alignment())
+		align = 1;
+
+	return PTR_ALIGN(kpp_request_ctx(req), align);
+}
+
 static inline void kpp_set_reqsize(struct crypto_kpp *kpp,
 				   unsigned int reqsize)
 {
 	kpp->reqsize = reqsize;
 }
 
+static inline void kpp_set_reqsize_dma(struct crypto_kpp *kpp,
+				       unsigned int reqsize)
+{
+	reqsize += crypto_dma_align() & ~(crypto_tfm_ctx_alignment() - 1);
+	kpp->reqsize = reqsize;
+}
+
 static inline void *kpp_tfm_ctx(struct crypto_kpp *tfm)
 {
-	return tfm->base.__crt_ctx;
+	return crypto_tfm_ctx(&tfm->base);
+}
+
+static inline void *kpp_tfm_ctx_dma(struct crypto_kpp *tfm)
+{
+	return crypto_tfm_ctx_dma(&tfm->base);
 }
 
 static inline void kpp_request_complete(struct kpp_request *req, int err)
-- 
GitLab


From 4cb4f7c11deef5222ac15631b16ab54625b926b3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Nov 2022 12:36:45 +0800
Subject: [PATCH 3254/4122] crypto: caam - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/caam/caamalg.c     |  72 ++++++-------
 drivers/crypto/caam/caamalg_qi.c  |  52 ++++-----
 drivers/crypto/caam/caamalg_qi2.c | 173 +++++++++++++++---------------
 drivers/crypto/caam/caamhash.c    |  87 ++++++++-------
 drivers/crypto/caam/caampkc.c     |  47 ++++----
 5 files changed, 216 insertions(+), 215 deletions(-)

diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index d3d8bb0a69900..ecc15bc521db1 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -131,7 +131,7 @@ struct caam_aead_req_ctx {
 
 static int aead_null_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
@@ -184,7 +184,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 struct caam_aead_alg, aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 ctx1_iv_off = 0;
@@ -312,7 +312,7 @@ skip_givenc:
 static int aead_setauthsize(struct crypto_aead *authenc,
 				    unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	ctx->authsize = authsize;
 	aead_set_sh_desc(authenc);
@@ -322,7 +322,7 @@ static int aead_setauthsize(struct crypto_aead *authenc,
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
@@ -372,7 +372,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_gcm_check_authsize(authsize);
@@ -387,7 +387,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
@@ -440,7 +440,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 static int rfc4106_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_rfc4106_check_authsize(authsize);
@@ -455,7 +455,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc,
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
@@ -508,7 +508,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 static int rfc4543_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	if (authsize != 16)
 		return -EINVAL;
@@ -521,7 +521,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc,
 
 static int chachapoly_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc;
@@ -547,7 +547,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead)
 static int chachapoly_setauthsize(struct crypto_aead *aead,
 				  unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 
 	if (authsize != POLY1305_DIGEST_SIZE)
 		return -EINVAL;
@@ -559,7 +559,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead,
 static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 			     unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize;
 
@@ -575,7 +575,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 static int aead_setkey(struct crypto_aead *aead,
 			       const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
@@ -656,7 +656,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key,
 static int gcm_setkey(struct crypto_aead *aead,
 		      const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int err;
 
@@ -677,7 +677,7 @@ static int gcm_setkey(struct crypto_aead *aead,
 static int rfc4106_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int err;
 
@@ -703,7 +703,7 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 static int rfc4543_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int err;
 
@@ -729,7 +729,7 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			   unsigned int keylen, const u32 ctx1_iv_off)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_skcipher_alg *alg =
 		container_of(crypto_skcipher_alg(skcipher), typeof(*alg),
 			     skcipher);
@@ -832,7 +832,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	u32 *desc;
@@ -1057,7 +1057,7 @@ static void init_aead_job(struct aead_request *req,
 			  bool all_contig, bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	int authsize = ctx->authsize;
 	u32 *desc = edesc->hw_desc;
 	u32 out_options, in_options;
@@ -1118,7 +1118,7 @@ static void init_gcm_job(struct aead_request *req,
 			 bool all_contig, bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 *desc = edesc->hw_desc;
 	bool generic_gcm = (ivsize == GCM_AES_IV_SIZE);
@@ -1185,7 +1185,7 @@ static void init_authenc_job(struct aead_request *req,
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 struct caam_aead_alg, aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
 			       OP_ALG_AAI_CTR_MOD128);
@@ -1234,7 +1234,7 @@ static void init_skcipher_job(struct skcipher_request *req,
 			      const bool encrypt)
 {
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc = edesc->hw_desc;
@@ -1290,7 +1290,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 					   bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
@@ -1457,7 +1457,7 @@ static inline int chachapoly_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
 	u32 *desc;
@@ -1491,7 +1491,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
 
@@ -1524,7 +1524,7 @@ static int aead_decrypt(struct aead_request *req)
 static int aead_do_one_req(struct crypto_engine *engine, void *areq)
 {
 	struct aead_request *req = aead_request_cast(areq);
-	struct caam_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(crypto_aead_reqtfm(req));
 	struct caam_aead_req_ctx *rctx = aead_request_ctx(req);
 	u32 *desc = rctx->edesc->hw_desc;
 	int ret;
@@ -1550,7 +1550,7 @@ static inline int gcm_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	bool all_contig;
 
@@ -1597,7 +1597,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 						   int desc_bytes)
 {
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
 	struct device *jrdev = ctx->jrdev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
@@ -1756,7 +1756,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 static int skcipher_do_one_req(struct crypto_engine *engine, void *areq)
 {
 	struct skcipher_request *req = skcipher_request_cast(areq);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(crypto_skcipher_reqtfm(req));
 	struct caam_skcipher_req_ctx *rctx = skcipher_request_ctx(req);
 	u32 *desc = rctx->edesc->hw_desc;
 	int ret;
@@ -1790,7 +1790,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
@@ -3397,7 +3397,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct caam_skcipher_alg *caam_alg =
 		container_of(alg, typeof(*caam_alg), skcipher);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK;
 	int ret = 0;
 
@@ -3434,7 +3434,7 @@ static int caam_aead_init(struct crypto_aead *tfm)
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct caam_aead_alg *caam_alg =
 		 container_of(alg, struct caam_aead_alg, aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	crypto_aead_set_reqsize(tfm, sizeof(struct caam_aead_req_ctx));
 
@@ -3454,7 +3454,7 @@ static void caam_exit_common(struct caam_ctx *ctx)
 
 static void caam_cra_exit(struct crypto_skcipher *tfm)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	if (ctx->fallback)
 		crypto_free_skcipher(ctx->fallback);
@@ -3463,7 +3463,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm)
 
 static void caam_aead_exit(struct crypto_aead *tfm)
 {
-	caam_exit_common(crypto_aead_ctx(tfm));
+	caam_exit_common(crypto_aead_ctx_dma(tfm));
 }
 
 void caam_algapi_exit(void)
@@ -3491,7 +3491,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 			      CRYPTO_ALG_KERN_DRIVER_ONLY);
 
@@ -3505,7 +3505,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 			      CRYPTO_ALG_KERN_DRIVER_ONLY;
 
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 189a7438b29c4..c37b67be0492d 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -81,7 +81,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	u32 ctx1_iv_off = 0;
 	u32 *nonce = NULL;
@@ -184,7 +184,7 @@ skip_givenc:
 
 static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	ctx->authsize = authsize;
 	aead_set_sh_desc(authenc);
@@ -195,7 +195,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	struct crypto_authenc_keys keys;
@@ -299,7 +299,7 @@ static int des3_aead_setkey(struct crypto_aead *aead, const u8 *key,
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -342,7 +342,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_gcm_check_authsize(authsize);
@@ -358,7 +358,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 static int gcm_setkey(struct crypto_aead *aead,
 		      const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
@@ -402,7 +402,7 @@ static int gcm_setkey(struct crypto_aead *aead,
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -446,7 +446,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 static int rfc4106_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_rfc4106_check_authsize(authsize);
@@ -462,7 +462,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc,
 static int rfc4106_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
@@ -510,7 +510,7 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	int rem_bytes = CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN -
 			ctx->cdata.keylen;
@@ -554,7 +554,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 static int rfc4543_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	if (authsize != 16)
 		return -EINVAL;
@@ -568,7 +568,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc,
 static int rfc4543_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
@@ -617,7 +617,7 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			   unsigned int keylen, const u32 ctx1_iv_off)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_skcipher_alg *alg =
 		container_of(crypto_skcipher_alg(skcipher), typeof(*alg),
 			     skcipher);
@@ -731,7 +731,7 @@ static int des_skcipher_setkey(struct crypto_skcipher *skcipher,
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
 	int ret = 0;
@@ -915,7 +915,7 @@ static void aead_done(struct caam_drv_req *drv_req, u32 status)
 	struct aead_edesc *edesc;
 	struct aead_request *aead_req = drv_req->app_ctx;
 	struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
-	struct caam_ctx *caam_ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *caam_ctx = crypto_aead_ctx_dma(aead);
 	int ecode = 0;
 
 	qidev = caam_ctx->qidev;
@@ -937,7 +937,7 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 					   bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
 	struct device *qidev = ctx->qidev;
@@ -1157,7 +1157,7 @@ static inline int aead_crypt(struct aead_request *req, bool encrypt)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	int ret;
 
 	if (unlikely(caam_congested))
@@ -1207,7 +1207,7 @@ static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
 	struct skcipher_edesc *edesc;
 	struct skcipher_request *req = drv_req->app_ctx;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *caam_ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *caam_ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *qidev = caam_ctx->qidev;
 	int ivsize = crypto_skcipher_ivsize(skcipher);
 	int ecode = 0;
@@ -1245,7 +1245,7 @@ static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
 						   bool encrypt)
 {
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *qidev = ctx->qidev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
@@ -1405,7 +1405,7 @@ static inline int skcipher_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(ctx->jrdev->parent);
 	int ret;
 
@@ -2491,7 +2491,7 @@ static int caam_cra_init(struct crypto_skcipher *tfm)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct caam_skcipher_alg *caam_alg =
 		container_of(alg, typeof(*caam_alg), skcipher);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK;
 	int ret = 0;
 
@@ -2524,7 +2524,7 @@ static int caam_aead_init(struct crypto_aead *tfm)
 	struct aead_alg *alg = crypto_aead_alg(tfm);
 	struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg),
 						      aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(tfm);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	return caam_init_common(ctx, &caam_alg->caam, !caam_alg->caam.nodkp);
 }
@@ -2542,7 +2542,7 @@ static void caam_exit_common(struct caam_ctx *ctx)
 
 static void caam_cra_exit(struct crypto_skcipher *tfm)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	if (ctx->fallback)
 		crypto_free_skcipher(ctx->fallback);
@@ -2551,7 +2551,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm)
 
 static void caam_aead_exit(struct crypto_aead *tfm)
 {
-	caam_exit_common(crypto_aead_ctx(tfm));
+	caam_exit_common(crypto_aead_ctx_dma(tfm));
 }
 
 void caam_qi_algapi_exit(void)
@@ -2579,7 +2579,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 				CRYPTO_ALG_KERN_DRIVER_ONLY);
 
@@ -2593,7 +2593,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 			      CRYPTO_ALG_KERN_DRIVER_ONLY;
 
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 4482cb145d051..1b0dd742c53f9 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -134,12 +134,12 @@ static struct caam_request *to_caam_req(struct crypto_async_request *areq)
 {
 	switch (crypto_tfm_alg_type(areq->tfm)) {
 	case CRYPTO_ALG_TYPE_SKCIPHER:
-		return skcipher_request_ctx(skcipher_request_cast(areq));
+		return skcipher_request_ctx_dma(skcipher_request_cast(areq));
 	case CRYPTO_ALG_TYPE_AEAD:
-		return aead_request_ctx(container_of(areq, struct aead_request,
-						     base));
+		return aead_request_ctx_dma(
+			container_of(areq, struct aead_request, base));
 	case CRYPTO_ALG_TYPE_AHASH:
-		return ahash_request_ctx(ahash_request_cast(areq));
+		return ahash_request_ctx_dma(ahash_request_cast(areq));
 	default:
 		return ERR_PTR(-EINVAL);
 	}
@@ -171,7 +171,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 {
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct device *dev = ctx->dev;
 	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
@@ -276,7 +276,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead)
 
 static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	ctx->authsize = authsize;
 	aead_set_sh_desc(authenc);
@@ -287,7 +287,7 @@ static int aead_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 static int aead_setkey(struct crypto_aead *aead, const u8 *key,
 		       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	struct crypto_authenc_keys keys;
 
@@ -350,10 +350,10 @@ static struct aead_edesc *aead_edesc_alloc(struct aead_request *req,
 					   bool encrypt)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_request *req_ctx = aead_request_ctx(req);
+	struct caam_request *req_ctx = aead_request_ctx_dma(req);
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct caam_aead_alg *alg = container_of(crypto_aead_alg(aead),
 						 typeof(*alg), aead);
 	struct device *dev = ctx->dev;
@@ -587,7 +587,7 @@ skip_out_fle:
 
 static int chachapoly_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct device *dev = ctx->dev;
 	struct caam_flc *flc;
@@ -620,7 +620,7 @@ static int chachapoly_set_sh_desc(struct crypto_aead *aead)
 static int chachapoly_setauthsize(struct crypto_aead *aead,
 				  unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 
 	if (authsize != POLY1305_DIGEST_SIZE)
 		return -EINVAL;
@@ -632,7 +632,7 @@ static int chachapoly_setauthsize(struct crypto_aead *aead,
 static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 			     unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	unsigned int saltlen = CHACHAPOLY_IV_SIZE - ivsize;
 
@@ -647,7 +647,7 @@ static int chachapoly_setkey(struct crypto_aead *aead, const u8 *key,
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_flc *flc;
@@ -704,7 +704,7 @@ static int gcm_set_sh_desc(struct crypto_aead *aead)
 
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_gcm_check_authsize(authsize);
@@ -720,7 +720,7 @@ static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 static int gcm_setkey(struct crypto_aead *aead,
 		      const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	int ret;
 
@@ -739,7 +739,7 @@ static int gcm_setkey(struct crypto_aead *aead,
 
 static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_flc *flc;
@@ -799,7 +799,7 @@ static int rfc4106_set_sh_desc(struct crypto_aead *aead)
 static int rfc4106_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 	int err;
 
 	err = crypto_rfc4106_check_authsize(authsize);
@@ -815,7 +815,7 @@ static int rfc4106_setauthsize(struct crypto_aead *authenc,
 static int rfc4106_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	int ret;
 
@@ -840,7 +840,7 @@ static int rfc4106_setkey(struct crypto_aead *aead,
 
 static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	unsigned int ivsize = crypto_aead_ivsize(aead);
 	struct caam_flc *flc;
@@ -900,7 +900,7 @@ static int rfc4543_set_sh_desc(struct crypto_aead *aead)
 static int rfc4543_setauthsize(struct crypto_aead *authenc,
 			       unsigned int authsize)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(authenc);
 
 	if (authsize != 16)
 		return -EINVAL;
@@ -914,7 +914,7 @@ static int rfc4543_setauthsize(struct crypto_aead *authenc,
 static int rfc4543_setkey(struct crypto_aead *aead,
 			  const u8 *key, unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	struct device *dev = ctx->dev;
 	int ret;
 
@@ -940,7 +940,7 @@ static int rfc4543_setkey(struct crypto_aead *aead,
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			   unsigned int keylen, const u32 ctx1_iv_off)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct caam_skcipher_alg *alg =
 		container_of(crypto_skcipher_alg(skcipher),
 			     struct caam_skcipher_alg, skcipher);
@@ -1059,7 +1059,7 @@ static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 			       unsigned int keylen)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *dev = ctx->dev;
 	struct dpaa2_caam_priv *priv = dev_get_drvdata(dev);
 	struct caam_flc *flc;
@@ -1109,10 +1109,10 @@ static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
 static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req)
 {
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_request *req_ctx = skcipher_request_ctx(req);
+	struct caam_request *req_ctx = skcipher_request_ctx_dma(req);
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct device *dev = ctx->dev;
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
@@ -1286,7 +1286,7 @@ static void aead_encrypt_done(void *cbk_ctx, u32 status)
 	struct caam_request *req_ctx = to_caam_req(areq);
 	struct aead_edesc *edesc = req_ctx->edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	int ecode = 0;
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
@@ -1307,7 +1307,7 @@ static void aead_decrypt_done(void *cbk_ctx, u32 status)
 	struct caam_request *req_ctx = to_caam_req(areq);
 	struct aead_edesc *edesc = req_ctx->edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
 	int ecode = 0;
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
@@ -1324,8 +1324,8 @@ static int aead_encrypt(struct aead_request *req)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	struct caam_request *caam_req = aead_request_ctx(req);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
+	struct caam_request *caam_req = aead_request_ctx_dma(req);
 	int ret;
 
 	/* allocate extended descriptor */
@@ -1352,8 +1352,8 @@ static int aead_decrypt(struct aead_request *req)
 {
 	struct aead_edesc *edesc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct caam_ctx *ctx = crypto_aead_ctx(aead);
-	struct caam_request *caam_req = aead_request_ctx(req);
+	struct caam_ctx *ctx = crypto_aead_ctx_dma(aead);
+	struct caam_request *caam_req = aead_request_ctx_dma(req);
 	int ret;
 
 	/* allocate extended descriptor */
@@ -1392,7 +1392,7 @@ static void skcipher_encrypt_done(void *cbk_ctx, u32 status)
 	struct skcipher_request *req = skcipher_request_cast(areq);
 	struct caam_request *req_ctx = to_caam_req(areq);
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct skcipher_edesc *edesc = req_ctx->edesc;
 	int ecode = 0;
 	int ivsize = crypto_skcipher_ivsize(skcipher);
@@ -1430,7 +1430,7 @@ static void skcipher_decrypt_done(void *cbk_ctx, u32 status)
 	struct skcipher_request *req = skcipher_request_cast(areq);
 	struct caam_request *req_ctx = to_caam_req(areq);
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
 	struct skcipher_edesc *edesc = req_ctx->edesc;
 	int ecode = 0;
 	int ivsize = crypto_skcipher_ivsize(skcipher);
@@ -1474,8 +1474,8 @@ static int skcipher_encrypt(struct skcipher_request *req)
 {
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
-	struct caam_request *caam_req = skcipher_request_ctx(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
+	struct caam_request *caam_req = skcipher_request_ctx_dma(req);
 	struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev);
 	int ret;
 
@@ -1524,8 +1524,8 @@ static int skcipher_decrypt(struct skcipher_request *req)
 {
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
-	struct caam_request *caam_req = skcipher_request_ctx(req);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(skcipher);
+	struct caam_request *caam_req = skcipher_request_ctx_dma(req);
 	struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev);
 	int ret;
 
@@ -1603,7 +1603,7 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm)
 	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
 	struct caam_skcipher_alg *caam_alg =
 		container_of(alg, typeof(*caam_alg), skcipher);
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	u32 alg_aai = caam_alg->caam.class1_alg_type & OP_ALG_AAI_MASK;
 	int ret = 0;
 
@@ -1621,10 +1621,12 @@ static int caam_cra_init_skcipher(struct crypto_skcipher *tfm)
 		}
 
 		ctx->fallback = fallback;
-		crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request) +
-					    crypto_skcipher_reqsize(fallback));
+		crypto_skcipher_set_reqsize_dma(
+			tfm, sizeof(struct caam_request) +
+			     crypto_skcipher_reqsize(fallback));
 	} else {
-		crypto_skcipher_set_reqsize(tfm, sizeof(struct caam_request));
+		crypto_skcipher_set_reqsize_dma(tfm,
+						sizeof(struct caam_request));
 	}
 
 	ret = caam_cra_init(ctx, &caam_alg->caam, false);
@@ -1640,8 +1642,8 @@ static int caam_cra_init_aead(struct crypto_aead *tfm)
 	struct caam_aead_alg *caam_alg = container_of(alg, typeof(*caam_alg),
 						      aead);
 
-	crypto_aead_set_reqsize(tfm, sizeof(struct caam_request));
-	return caam_cra_init(crypto_aead_ctx(tfm), &caam_alg->caam,
+	crypto_aead_set_reqsize_dma(tfm, sizeof(struct caam_request));
+	return caam_cra_init(crypto_aead_ctx_dma(tfm), &caam_alg->caam,
 			     !caam_alg->caam.nodkp);
 }
 
@@ -1654,7 +1656,7 @@ static void caam_exit_common(struct caam_ctx *ctx)
 
 static void caam_cra_exit(struct crypto_skcipher *tfm)
 {
-	struct caam_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct caam_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	if (ctx->fallback)
 		crypto_free_skcipher(ctx->fallback);
@@ -1663,7 +1665,7 @@ static void caam_cra_exit(struct crypto_skcipher *tfm)
 
 static void caam_cra_exit_aead(struct crypto_aead *tfm)
 {
-	caam_exit_common(crypto_aead_ctx(tfm));
+	caam_exit_common(crypto_aead_ctx_dma(tfm));
 }
 
 static struct caam_skcipher_alg driver_algs[] = {
@@ -3008,7 +3010,7 @@ static void caam_skcipher_alg_init(struct caam_skcipher_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags |= (CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 			      CRYPTO_ALG_KERN_DRIVER_ONLY);
 
@@ -3022,7 +3024,7 @@ static void caam_aead_alg_init(struct caam_aead_alg *t_alg)
 
 	alg->base.cra_module = THIS_MODULE;
 	alg->base.cra_priority = CAAM_CRA_PRIORITY;
-	alg->base.cra_ctxsize = sizeof(struct caam_ctx);
+	alg->base.cra_ctxsize = sizeof(struct caam_ctx) + crypto_dma_padding();
 	alg->base.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY |
 			      CRYPTO_ALG_KERN_DRIVER_ONLY;
 
@@ -3132,7 +3134,7 @@ static inline int ctx_map_to_qm_sg(struct device *dev,
 
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct dpaa2_caam_priv *priv = dev_get_drvdata(ctx->dev);
 	struct caam_flc *flc;
@@ -3305,7 +3307,7 @@ err_flc:
 static int ahash_setkey(struct crypto_ahash *ahash, const u8 *key,
 			unsigned int keylen)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	unsigned int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	unsigned int digestsize = crypto_ahash_digestsize(ahash);
 	int ret;
@@ -3356,7 +3358,7 @@ bad_free_key:
 static inline void ahash_unmap(struct device *dev, struct ahash_edesc *edesc,
 			       struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	if (edesc->src_nents)
 		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
@@ -3376,7 +3378,7 @@ static inline void ahash_unmap_ctx(struct device *dev,
 				   struct ahash_edesc *edesc,
 				   struct ahash_request *req, u32 flag)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	if (state->ctx_dma) {
 		dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag);
@@ -3390,9 +3392,9 @@ static void ahash_done(void *cbk_ctx, u32 status)
 	struct crypto_async_request *areq = cbk_ctx;
 	struct ahash_request *req = ahash_request_cast(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct ahash_edesc *edesc = state->caam_req.edesc;
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int ecode = 0;
 
@@ -3417,9 +3419,9 @@ static void ahash_done_bi(void *cbk_ctx, u32 status)
 	struct crypto_async_request *areq = cbk_ctx;
 	struct ahash_request *req = ahash_request_cast(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct ahash_edesc *edesc = state->caam_req.edesc;
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int ecode = 0;
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
@@ -3455,9 +3457,9 @@ static void ahash_done_ctx_src(void *cbk_ctx, u32 status)
 	struct crypto_async_request *areq = cbk_ctx;
 	struct ahash_request *req = ahash_request_cast(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct ahash_edesc *edesc = state->caam_req.edesc;
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int ecode = 0;
 
@@ -3482,9 +3484,9 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
 	struct crypto_async_request *areq = cbk_ctx;
 	struct ahash_request *req = ahash_request_cast(areq);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct ahash_edesc *edesc = state->caam_req.edesc;
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int ecode = 0;
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
@@ -3518,8 +3520,8 @@ static void ahash_done_ctx_dst(void *cbk_ctx, u32 status)
 static int ahash_update_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -3637,8 +3639,8 @@ unmap_ctx:
 static int ahash_final_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -3708,8 +3710,8 @@ unmap_ctx:
 static int ahash_finup_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -3802,8 +3804,8 @@ unmap_ctx:
 static int ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -3897,8 +3899,8 @@ unmap:
 static int ahash_final_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -3970,8 +3972,8 @@ unmap:
 static int ahash_update_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -4091,8 +4093,8 @@ unmap_ctx:
 static int ahash_finup_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -4187,8 +4189,8 @@ unmap:
 static int ahash_update_first(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_request *req_ctx = &state->caam_req;
 	struct dpaa2_fl_entry *in_fle = &req_ctx->fd_flt[1];
 	struct dpaa2_fl_entry *out_fle = &req_ctx->fd_flt[0];
@@ -4320,7 +4322,7 @@ static int ahash_finup_first(struct ahash_request *req)
 
 static int ahash_init(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	state->update = ahash_update_first;
 	state->finup = ahash_finup_first;
@@ -4337,28 +4339,28 @@ static int ahash_init(struct ahash_request *req)
 
 static int ahash_update(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->update(req);
 }
 
 static int ahash_finup(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->finup(req);
 }
 
 static int ahash_final(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->final(req);
 }
 
 static int ahash_export(struct ahash_request *req, void *out)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_export_state *export = out;
 	u8 *buf = state->buf;
 	int len = state->buflen;
@@ -4375,7 +4377,7 @@ static int ahash_export(struct ahash_request *req, void *out)
 
 static int ahash_import(struct ahash_request *req, const void *in)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	const struct caam_export_state *export = in;
 
 	memset(state, 0, sizeof(*state));
@@ -4547,7 +4549,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 		 container_of(halg, struct ahash_alg, halg);
 	struct caam_hash_alg *caam_hash =
 		 container_of(alg, struct caam_hash_alg, ahash_alg);
-	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
 	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
 					 HASH_MSG_LEN + SHA1_DIGEST_SIZE,
@@ -4594,8 +4596,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 				   OP_ALG_ALGSEL_SUBMASK) >>
 				  OP_ALG_ALGSEL_SHIFT];
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct caam_hash_state));
+	crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state));
 
 	/*
 	 * For keyed hash algorithms shared descriptors
@@ -4606,7 +4607,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 {
-	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 
 	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc),
 			       DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
@@ -4646,7 +4647,7 @@ static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = caam_hash_cra_init;
 	alg->cra_exit = caam_hash_cra_exit;
-	alg->cra_ctxsize = sizeof(struct caam_hash_ctx);
+	alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding();
 	alg->cra_priority = CAAM_CRA_PRIORITY;
 	alg->cra_blocksize = template->blocksize;
 	alg->cra_alignmask = 0;
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index 36ef738e4a181..1050e965a4385 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -199,7 +199,7 @@ static inline int ctx_map_to_sec4_sg(struct device *jrdev,
 
 static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
 	struct caam_drv_private *ctrlpriv = dev_get_drvdata(jrdev->parent);
@@ -255,7 +255,7 @@ static int ahash_set_sh_desc(struct crypto_ahash *ahash)
 
 static int axcbc_set_sh_desc(struct crypto_ahash *ahash)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
@@ -307,7 +307,7 @@ static int axcbc_set_sh_desc(struct crypto_ahash *ahash)
 
 static int acmac_set_sh_desc(struct crypto_ahash *ahash)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
@@ -421,7 +421,7 @@ static int hash_digest_key(struct caam_hash_ctx *ctx, u32 *keylen, u8 *key,
 static int ahash_setkey(struct crypto_ahash *ahash,
 			const u8 *key, unsigned int keylen)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct device *jrdev = ctx->jrdev;
 	int blocksize = crypto_tfm_alg_blocksize(&ahash->base);
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -484,7 +484,7 @@ static int ahash_setkey(struct crypto_ahash *ahash,
 static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key,
 			unsigned int keylen)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct device *jrdev = ctx->jrdev;
 
 	if (keylen != AES_KEYSIZE_128)
@@ -504,7 +504,7 @@ static int axcbc_setkey(struct crypto_ahash *ahash, const u8 *key,
 static int acmac_setkey(struct crypto_ahash *ahash, const u8 *key,
 			unsigned int keylen)
 {
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int err;
 
 	err = aes_check_keylen(keylen);
@@ -543,7 +543,7 @@ static inline void ahash_unmap(struct device *dev,
 			struct ahash_edesc *edesc,
 			struct ahash_request *req, int dst_len)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	if (edesc->src_nents)
 		dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE);
@@ -563,7 +563,7 @@ static inline void ahash_unmap_ctx(struct device *dev,
 			struct ahash_edesc *edesc,
 			struct ahash_request *req, int dst_len, u32 flag)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	if (state->ctx_dma) {
 		dma_unmap_single(dev, state->ctx_dma, state->ctx_dma_len, flag);
@@ -580,8 +580,8 @@ static inline void ahash_done_cpy(struct device *jrdev, u32 *desc, u32 err,
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	int ecode = 0;
 	bool has_bklog;
 
@@ -630,8 +630,8 @@ static inline void ahash_done_switch(struct device *jrdev, u32 *desc, u32 err,
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(jrdev);
 	struct ahash_edesc *edesc;
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
 	int ecode = 0;
 	bool has_bklog;
@@ -695,8 +695,8 @@ static struct ahash_edesc *ahash_edesc_alloc(struct ahash_request *req,
 					     dma_addr_t sh_desc_dma)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
 		       GFP_KERNEL : GFP_ATOMIC;
 	struct ahash_edesc *edesc;
@@ -755,8 +755,8 @@ static int ahash_edesc_add_src(struct caam_hash_ctx *ctx,
 static int ahash_do_one_req(struct crypto_engine *engine, void *areq)
 {
 	struct ahash_request *req = ahash_request_cast(areq);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(req));
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(crypto_ahash_reqtfm(req));
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc = state->edesc->hw_desc;
 	int ret;
@@ -785,7 +785,7 @@ static int ahash_enqueue_req(struct device *jrdev,
 			     int dst_len, enum dma_data_direction dir)
 {
 	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct ahash_edesc *edesc = state->edesc;
 	u32 *desc = edesc->hw_desc;
 	int ret;
@@ -815,8 +815,8 @@ static int ahash_enqueue_req(struct device *jrdev,
 static int ahash_update_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
@@ -940,8 +940,8 @@ unmap_ctx:
 static int ahash_final_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	int buflen = state->buflen;
 	u32 *desc;
@@ -1001,8 +1001,8 @@ static int ahash_final_ctx(struct ahash_request *req)
 static int ahash_finup_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	int buflen = state->buflen;
 	u32 *desc;
@@ -1075,8 +1075,8 @@ static int ahash_finup_ctx(struct ahash_request *req)
 static int ahash_digest(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
 	int digestsize = crypto_ahash_digestsize(ahash);
@@ -1142,8 +1142,8 @@ static int ahash_digest(struct ahash_request *req)
 static int ahash_final_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u8 *buf = state->buf;
 	int buflen = state->buflen;
@@ -1191,8 +1191,8 @@ static int ahash_final_no_ctx(struct ahash_request *req)
 static int ahash_update_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
@@ -1312,8 +1312,8 @@ static int ahash_update_no_ctx(struct ahash_request *req)
 static int ahash_finup_no_ctx(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	int buflen = state->buflen;
 	u32 *desc;
@@ -1388,8 +1388,8 @@ static int ahash_finup_no_ctx(struct ahash_request *req)
 static int ahash_update_first(struct ahash_request *req)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct device *jrdev = ctx->jrdev;
 	u8 *buf = state->buf;
 	int *buflen = &state->buflen;
@@ -1498,7 +1498,7 @@ static int ahash_finup_first(struct ahash_request *req)
 
 static int ahash_init(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	state->update = ahash_update_first;
 	state->finup = ahash_finup_first;
@@ -1515,28 +1515,28 @@ static int ahash_init(struct ahash_request *req)
 
 static int ahash_update(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->update(req);
 }
 
 static int ahash_finup(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->finup(req);
 }
 
 static int ahash_final(struct ahash_request *req)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 
 	return state->final(req);
 }
 
 static int ahash_export(struct ahash_request *req, void *out)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	struct caam_export_state *export = out;
 	u8 *buf = state->buf;
 	int len = state->buflen;
@@ -1553,7 +1553,7 @@ static int ahash_export(struct ahash_request *req, void *out)
 
 static int ahash_import(struct ahash_request *req, const void *in)
 {
-	struct caam_hash_state *state = ahash_request_ctx(req);
+	struct caam_hash_state *state = ahash_request_ctx_dma(req);
 	const struct caam_export_state *export = in;
 
 	memset(state, 0, sizeof(*state));
@@ -1762,7 +1762,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 		 container_of(halg, struct ahash_alg, halg);
 	struct caam_hash_alg *caam_hash =
 		 container_of(alg, struct caam_hash_alg, ahash_alg);
-	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	/* Sizes for MDHA running digests: MD5, SHA1, 224, 256, 384, 512 */
 	static const u8 runninglen[] = { HASH_MSG_LEN + MD5_DIGEST_SIZE,
 					 HASH_MSG_LEN + SHA1_DIGEST_SIZE,
@@ -1854,8 +1854,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
 	ctx->enginectx.op.do_one_request = ahash_do_one_req;
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct caam_hash_state));
+	crypto_ahash_set_reqsize_dma(ahash, sizeof(struct caam_hash_state));
 
 	/*
 	 * For keyed hash algorithms shared descriptors
@@ -1866,7 +1865,7 @@ static int caam_hash_cra_init(struct crypto_tfm *tfm)
 
 static void caam_hash_cra_exit(struct crypto_tfm *tfm)
 {
-	struct caam_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct caam_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
 			       offsetof(struct caam_hash_ctx, key) -
@@ -1926,7 +1925,7 @@ caam_hash_alloc(struct caam_hash_template *template,
 	alg->cra_module = THIS_MODULE;
 	alg->cra_init = caam_hash_cra_init;
 	alg->cra_exit = caam_hash_cra_exit;
-	alg->cra_ctxsize = sizeof(struct caam_hash_ctx);
+	alg->cra_ctxsize = sizeof(struct caam_hash_ctx) + crypto_dma_padding();
 	alg->cra_priority = CAAM_CRA_PRIORITY;
 	alg->cra_blocksize = template->blocksize;
 	alg->cra_alignmask = 0;
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 642846693d7c0..aef031946f337 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -57,7 +57,7 @@ static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc,
 			  struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_pub_pdb *pdb = &edesc->pdb.pub;
 
@@ -69,7 +69,7 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc,
 			      struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1;
 
@@ -81,7 +81,7 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc,
 			      struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
 	size_t p_sz = key->p_sz;
@@ -98,7 +98,7 @@ static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc,
 			      struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
 	size_t p_sz = key->p_sz;
@@ -149,7 +149,7 @@ static void rsa_priv_f_done(struct device *dev, u32 *desc, u32 err,
 	struct akcipher_request *req = context;
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct rsa_edesc *edesc;
@@ -242,7 +242,7 @@ static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req,
 					 size_t desclen)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct device *dev = ctx->dev;
 	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct caam_rsa_key *key = &ctx->key;
@@ -371,7 +371,7 @@ static int akcipher_do_one_req(struct crypto_engine *engine, void *areq)
 						    base);
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct device *jrdev = ctx->dev;
 	u32 *desc = req_ctx->edesc->hw_desc;
 	int ret;
@@ -399,7 +399,7 @@ static int set_rsa_pub_pdb(struct akcipher_request *req,
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *dev = ctx->dev;
 	struct rsa_pub_pdb *pdb = &edesc->pdb.pub;
@@ -444,7 +444,7 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req,
 			       struct rsa_edesc *edesc)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *dev = ctx->dev;
 	struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1;
@@ -491,7 +491,7 @@ static int set_rsa_priv_f2_pdb(struct akcipher_request *req,
 			       struct rsa_edesc *edesc)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *dev = ctx->dev;
 	struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2;
@@ -568,7 +568,7 @@ static int set_rsa_priv_f3_pdb(struct akcipher_request *req,
 			       struct rsa_edesc *edesc)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *dev = ctx->dev;
 	struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3;
@@ -664,7 +664,7 @@ static int akcipher_enqueue_req(struct device *jrdev,
 {
 	struct caam_drv_private_jr *jrpriv = dev_get_drvdata(jrdev);
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct caam_rsa_req_ctx *req_ctx = akcipher_request_ctx(req);
 	struct rsa_edesc *edesc = req_ctx->edesc;
@@ -707,7 +707,7 @@ static int akcipher_enqueue_req(struct device *jrdev,
 static int caam_rsa_enc(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
@@ -746,7 +746,7 @@ init_fail:
 static int caam_rsa_dec_priv_f1(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -775,7 +775,7 @@ init_fail:
 static int caam_rsa_dec_priv_f2(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -804,7 +804,7 @@ init_fail:
 static int caam_rsa_dec_priv_f3(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct device *jrdev = ctx->dev;
 	struct rsa_edesc *edesc;
 	int ret;
@@ -833,7 +833,7 @@ init_fail:
 static int caam_rsa_dec(struct akcipher_request *req)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 	int ret;
 
@@ -936,7 +936,7 @@ static int caam_rsa_check_key_length(unsigned int len)
 static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key,
 				unsigned int keylen)
 {
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct rsa_key raw_key = {NULL};
 	struct caam_rsa_key *rsa_key = &ctx->key;
 	int ret;
@@ -1038,7 +1038,7 @@ free_p:
 static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key,
 				 unsigned int keylen)
 {
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct rsa_key raw_key = {NULL};
 	struct caam_rsa_key *rsa_key = &ctx->key;
 	int ret;
@@ -1089,7 +1089,7 @@ err:
 
 static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm)
 {
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 
 	return ctx->key.n_sz;
 }
@@ -1097,7 +1097,7 @@ static unsigned int caam_rsa_max_size(struct crypto_akcipher *tfm)
 /* Per session pkc's driver context creation function */
 static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 {
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 
 	akcipher_set_reqsize(tfm, sizeof(struct caam_rsa_req_ctx));
 
@@ -1125,7 +1125,7 @@ static int caam_rsa_init_tfm(struct crypto_akcipher *tfm)
 /* Per session pkc's driver context cleanup function */
 static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm)
 {
-	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct caam_rsa_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct caam_rsa_key *key = &ctx->key;
 
 	dma_unmap_single(ctx->dev, ctx->padding_dma, CAAM_RSA_MAX_INPUT_SIZE -
@@ -1148,7 +1148,8 @@ static struct caam_akcipher_alg caam_rsa = {
 			.cra_driver_name = "rsa-caam",
 			.cra_priority = 3000,
 			.cra_module = THIS_MODULE,
-			.cra_ctxsize = sizeof(struct caam_rsa_ctx),
+			.cra_ctxsize = sizeof(struct caam_rsa_ctx) +
+				       CRYPTO_DMA_PADDING,
 		},
 	}
 };
-- 
GitLab


From 2ae6feb1a1f6678fe11864f1b6920ed10b09ad6a Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Fri, 25 Nov 2022 20:18:11 +0800
Subject: [PATCH 3255/4122] crypto: ccree,hisilicon - Fix dependencies to
 correct algorithm

Commit d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory") moves
the SM3 and SM4 stand-alone library and the algorithm implementation for
the Crypto API into the same directory, and the corresponding relationship
of Kconfig is modified, CONFIG_CRYPTO_SM3/4 corresponds to the stand-alone
library of SM3/4, and CONFIG_CRYPTO_SM3/4_GENERIC corresponds to the
algorithm implementation for the Crypto API. Therefore, it is necessary
for this module to depend on the correct algorithm.

Fixes: d2825fa9365d ("crypto: sm3,sm4 - move into crypto directory")
Cc: Jason A. Donenfeld <Jason@zx2c4.com>
Cc: stable@vger.kernel.org # v5.19+
Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Kconfig           | 4 ++--
 drivers/crypto/hisilicon/Kconfig | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 2947888d3b828..dfb103f81a64b 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -800,8 +800,8 @@ config CRYPTO_DEV_CCREE
 	select CRYPTO_ECB
 	select CRYPTO_CTR
 	select CRYPTO_XTS
-	select CRYPTO_SM4
-	select CRYPTO_SM3
+	select CRYPTO_SM4_GENERIC
+	select CRYPTO_SM3_GENERIC
 	help
 	  Say 'Y' to enable a driver for the REE interface of the Arm
 	  TrustZone CryptoCell family of processors. Currently the
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 27e1fa9120639..743ce4fc3158c 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -26,7 +26,7 @@ config CRYPTO_DEV_HISI_SEC2
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
-	select CRYPTO_SM4
+	select CRYPTO_SM4_GENERIC
 	depends on PCI && PCI_MSI
 	depends on UACCE || UACCE=n
 	depends on ARM64 || (COMPILE_TEST && 64BIT)
-- 
GitLab


From 84ecfe6f38ae4ee779ebd97ee173937fff565bf9 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Dec 2022 09:31:39 +0100
Subject: [PATCH 3256/4122] powerpc/code-patching: Remove #ifdef
 CONFIG_STRICT_KERNEL_RWX

No need to have one implementation of patch_instruction() for
CONFIG_STRICT_KERNEL_RWX and one for !CONFIG_STRICT_KERNEL_RWX.

In patch_instruction(), call raw_patch_instruction() when
!CONFIG_STRICT_KERNEL_RWX.

In poking_init(), bail out immediately, it will be equivalent
to the weak default implementation.

Everything else is declared static and will be discarded by
GCC when !CONFIG_STRICT_KERNEL_RWX.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/f67d2a109404d03e8fdf1ea15388c8778337a76b.1669969781.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/lib/code-patching.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 5b8f87db12176..a6a5047f8ba23 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -46,8 +46,6 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 	return __patch_instruction(addr, instr, addr);
 }
 
-#ifdef CONFIG_STRICT_KERNEL_RWX
-
 struct patch_context {
 	union {
 		struct vm_struct *area;
@@ -208,6 +206,9 @@ void __init poking_init(void)
 {
 	int ret;
 
+	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+		return;
+
 	if (mm_patch_enabled())
 		ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
 					"powerpc/text_poke_mm:online",
@@ -358,7 +359,8 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	 * when text_poke_area is not ready, but we still need
 	 * to allow patching. We just do the plain old patching
 	 */
-	if (!static_branch_likely(&poking_init_done))
+	if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) ||
+	    !static_branch_likely(&poking_init_done))
 		return raw_patch_instruction(addr, instr);
 
 	local_irq_save(flags);
@@ -370,14 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
 
 	return err;
 }
-#else /* !CONFIG_STRICT_KERNEL_RWX */
-
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
-{
-	return raw_patch_instruction(addr, instr);
-}
-
-#endif /* CONFIG_STRICT_KERNEL_RWX */
 
 __ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
 
-- 
GitLab


From 6076dc349b1c587c74c37027efff76f0fa4646f4 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Dec 2022 09:31:40 +0100
Subject: [PATCH 3257/4122] powerpc/feature-fixups: Refactor entry fixups
 patching

Several fonctions have the same loop for patching instructions.

Introduce function do_patch_entry_fixups() to refactor those loops.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/79eeff7b20a98f7136da5f79b1f7c436928f27f3.1669969781.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/lib/feature-fixups.c | 84 ++++++++++++-------------------
 1 file changed, 32 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 31f40f544de54..93b3f8ea38aa7 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -118,9 +118,33 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 }
 
 #ifdef CONFIG_PPC_BOOK3S_64
+static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
+				 bool do_fallback, void *fallback)
+{
+	int i;
+
+	for (i = 0; start < end; start++, i++) {
+		unsigned int *dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		// See comment in do_entry_flush_fixups() RE order of patching
+		if (do_fallback) {
+			patch_instruction(dest, ppc_inst(instrs[0]));
+			patch_instruction(dest + 2, ppc_inst(instrs[2]));
+			patch_branch(dest + 1, (unsigned long)fallback, BRANCH_SET_LINK);
+		} else {
+			patch_instruction(dest + 1, ppc_inst(instrs[1]));
+			patch_instruction(dest + 2, ppc_inst(instrs[2]));
+			patch_instruction(dest, ppc_inst(instrs[0]));
+		}
+	}
+	return i;
+}
+
 static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 {
-	unsigned int instrs[3], *dest;
+	unsigned int instrs[3];
 	long *start, *end;
 	int i;
 
@@ -144,23 +168,8 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 		instrs[i++] = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
 	}
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		// See comment in do_entry_flush_fixups() RE order of patching
-		if (types & STF_BARRIER_FALLBACK) {
-			patch_instruction(dest, ppc_inst(instrs[0]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_branch(dest + 1,
-				     (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
-		} else {
-			patch_instruction(dest + 1, ppc_inst(instrs[1]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_instruction(dest, ppc_inst(instrs[0]));
-		}
-	}
+	i = do_patch_entry_fixups(start, end, instrs, types & STF_BARRIER_FALLBACK,
+				  &stf_barrier_fallback);
 
 	printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
 		(types == STF_BARRIER_NONE)                  ? "no" :
@@ -325,7 +334,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
 static int __do_entry_flush_fixups(void *data)
 {
 	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
-	unsigned int instrs[3], *dest;
+	unsigned int instrs[3];
 	long *start, *end;
 	int i;
 
@@ -375,42 +384,13 @@ static int __do_entry_flush_fixups(void *data)
 
 	start = PTRRELOC(&__start___entry_flush_fixup);
 	end = PTRRELOC(&__stop___entry_flush_fixup);
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		if (types == L1D_FLUSH_FALLBACK) {
-			patch_instruction(dest, ppc_inst(instrs[0]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_branch(dest + 1,
-				     (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
-		} else {
-			patch_instruction(dest + 1, ppc_inst(instrs[1]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_instruction(dest, ppc_inst(instrs[0]));
-		}
-	}
+	i = do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+				  &entry_flush_fallback);
 
 	start = PTRRELOC(&__start___scv_entry_flush_fixup);
 	end = PTRRELOC(&__stop___scv_entry_flush_fixup);
-	for (; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		if (types == L1D_FLUSH_FALLBACK) {
-			patch_instruction(dest, ppc_inst(instrs[0]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_branch(dest + 1,
-				     (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
-		} else {
-			patch_instruction(dest + 1, ppc_inst(instrs[1]));
-			patch_instruction(dest + 2, ppc_inst(instrs[2]));
-			patch_instruction(dest, ppc_inst(instrs[0]));
-		}
-	}
-
+	i += do_patch_entry_fixups(start, end, instrs, types == L1D_FLUSH_FALLBACK,
+				   &scv_entry_flush_fallback);
 
 	printk(KERN_DEBUG "entry-flush: patched %d locations (%s flush)\n", i,
 		(types == L1D_FLUSH_NONE)       ? "no" :
-- 
GitLab


From 3d1dbbca33a9c6dd3aafd4d14aaea9cc310723e1 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Dec 2022 09:31:41 +0100
Subject: [PATCH 3258/4122] powerpc/feature-fixups: Refactor other fixups
 patching

Several fonctions have the same loop for patching instructions.

Introduce function do_patch_fixups() to refactor those loops.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/58ab36949c18f94d466fc98d6c085783b0cd474f.1669969781.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/lib/feature-fixups.c | 77 +++++++++++--------------------
 1 file changed, 28 insertions(+), 49 deletions(-)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 93b3f8ea38aa7..25168a59d1ce1 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -117,6 +117,24 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 	}
 }
 
+#ifdef CONFIG_PPC_BARRIER_NOSPEC
+static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
+{
+	int i;
+
+	for (i = 0; start < end; start++, i++) {
+		int j;
+		unsigned int *dest = (void *)start + *start;
+
+		pr_devel("patching dest %lx\n", (unsigned long)dest);
+
+		for (j = 0; j < num; j++)
+			patch_instruction(dest + j, ppc_inst(instrs[j]));
+	}
+	return i;
+}
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
 				 bool do_fallback, void *fallback)
@@ -181,7 +199,7 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
 
 static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
 {
-	unsigned int instrs[6], *dest;
+	unsigned int instrs[6];
 	long *start, *end;
 	int i;
 
@@ -215,18 +233,8 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
 		instrs[i++] = PPC_RAW_EIEIO() | 0x02000000; /* eieio + bit 6 hint */
 	}
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
 
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		patch_instruction(dest, ppc_inst(instrs[0]));
-		patch_instruction(dest + 1, ppc_inst(instrs[1]));
-		patch_instruction(dest + 2, ppc_inst(instrs[2]));
-		patch_instruction(dest + 3, ppc_inst(instrs[3]));
-		patch_instruction(dest + 4, ppc_inst(instrs[4]));
-		patch_instruction(dest + 5, ppc_inst(instrs[5]));
-	}
 	printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i,
 		(types == STF_BARRIER_NONE)                  ? "no" :
 		(types == STF_BARRIER_FALLBACK)              ? "fallback" :
@@ -283,7 +291,7 @@ void do_stf_barrier_fixups(enum stf_barrier_type types)
 
 void do_uaccess_flush_fixups(enum l1d_flush_type types)
 {
-	unsigned int instrs[4], *dest;
+	unsigned int instrs[4];
 	long *start, *end;
 	int i;
 
@@ -309,17 +317,7 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
 	if (types & L1D_FLUSH_MTTRIG)
 		instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		patch_instruction(dest, ppc_inst(instrs[0]));
-
-		patch_instruction(dest + 1, ppc_inst(instrs[1]));
-		patch_instruction(dest + 2, ppc_inst(instrs[2]));
-		patch_instruction(dest + 3, ppc_inst(instrs[3]));
-	}
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
 
 	printk(KERN_DEBUG "uaccess-flush: patched %d locations (%s flush)\n", i,
 		(types == L1D_FLUSH_NONE)       ? "no" :
@@ -418,7 +416,7 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
 static int __do_rfi_flush_fixups(void *data)
 {
 	enum l1d_flush_type types = *(enum l1d_flush_type *)data;
-	unsigned int instrs[3], *dest;
+	unsigned int instrs[3];
 	long *start, *end;
 	int i;
 
@@ -442,15 +440,7 @@ static int __do_rfi_flush_fixups(void *data)
 	if (types & L1D_FLUSH_MTTRIG)
 		instrs[i++] = PPC_RAW_MTSPR(SPRN_TRIG2, _R0);
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-
-		patch_instruction(dest, ppc_inst(instrs[0]));
-		patch_instruction(dest + 1, ppc_inst(instrs[1]));
-		patch_instruction(dest + 2, ppc_inst(instrs[2]));
-	}
+	i = do_patch_fixups(start, end, instrs, ARRAY_SIZE(instrs));
 
 	printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i,
 		(types == L1D_FLUSH_NONE)       ? "no" :
@@ -492,7 +482,7 @@ void do_rfi_flush_fixups(enum l1d_flush_type types)
 
 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
 {
-	unsigned int instr, *dest;
+	unsigned int instr;
 	long *start, *end;
 	int i;
 
@@ -506,12 +496,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
 		instr = PPC_RAW_ORI(_R31, _R31, 0); /* speculation barrier */
 	}
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-		patch_instruction(dest, ppc_inst(instr));
-	}
+	i = do_patch_fixups(start, end, &instr, 1);
 
 	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
 }
@@ -533,7 +518,7 @@ void do_barrier_nospec_fixups(bool enable)
 #ifdef CONFIG_PPC_E500
 void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_end)
 {
-	unsigned int instr[2], *dest;
+	unsigned int instr[2];
 	long *start, *end;
 	int i;
 
@@ -549,13 +534,7 @@ void do_barrier_nospec_fixups_range(bool enable, void *fixup_start, void *fixup_
 		instr[1] = PPC_RAW_SYNC();
 	}
 
-	for (i = 0; start < end; start++, i++) {
-		dest = (void *)start + *start;
-
-		pr_devel("patching dest %lx\n", (unsigned long)dest);
-		patch_instruction(dest, ppc_inst(instr[0]));
-		patch_instruction(dest + 1, ppc_inst(instr[1]));
-	}
+	i = do_patch_fixups(start, end, instr, ARRAY_SIZE(instr));
 
 	printk(KERN_DEBUG "barrier-nospec: patched %d locations\n", i);
 }
-- 
GitLab


From b988e7797d09379057cf991ae082f9ad7a309a63 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Dec 2022 09:31:42 +0100
Subject: [PATCH 3259/4122] powerpc/feature-fixups: Do not patch init section
 after init

Once init section is freed, attempting to patch init code
ends up in the weed.

Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections")
protected patch_instruction() against that, but it is the responsibility
of the caller to ensure that the patched memory is valid.

In the same spirit as jump_label with its jump_label_can_update()
function, add is_fixup_addr_valid() function to skip patching on
freed init section.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/8e9311fc1b057e4e6a2a3a0701ebcc74b787affe.1669969781.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/lib/feature-fixups.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c
index 25168a59d1ce1..80def1c2afcb6 100644
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -118,6 +118,12 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 }
 
 #ifdef CONFIG_PPC_BARRIER_NOSPEC
+static bool is_fixup_addr_valid(void *dest, size_t size)
+{
+	return system_state < SYSTEM_FREEING_INITMEM ||
+	       !init_section_contains(dest, size);
+}
+
 static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num)
 {
 	int i;
@@ -126,6 +132,9 @@ static int do_patch_fixups(long *start, long *end, unsigned int *instrs, int num
 		int j;
 		unsigned int *dest = (void *)start + *start;
 
+		if (!is_fixup_addr_valid(dest, sizeof(*instrs) * num))
+			continue;
+
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
 		for (j = 0; j < num; j++)
@@ -144,6 +153,9 @@ static int do_patch_entry_fixups(long *start, long *end, unsigned int *instrs,
 	for (i = 0; start < end; start++, i++) {
 		unsigned int *dest = (void *)start + *start;
 
+		if (!is_fixup_addr_valid(dest, sizeof(*instrs) * 3))
+			continue;
+
 		pr_devel("patching dest %lx\n", (unsigned long)dest);
 
 		// See comment in do_entry_flush_fixups() RE order of patching
-- 
GitLab


From 6f3a81b60091031c2c14eb2373d1937b027deb46 Mon Sep 17 00:00:00 2001
From: Christophe Leroy <christophe.leroy@csgroup.eu>
Date: Fri, 2 Dec 2022 09:31:43 +0100
Subject: [PATCH 3260/4122] powerpc/code-patching: Remove protection against
 patching init addresses after init

Once init section is freed, attempting to patch init code
ends up in the weed.

Commit 51c3c62b58b3 ("powerpc: Avoid code patching freed init sections")
protected patch_instruction() against that, but it is the responsibility
of the caller to ensure that the patched memory is valid.

All callers have now been verified and fixed so the check
can be removed.

This improves ftrace activation by about 2% on 8xx.

Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/504310828f473d424e2ed229eff57bf075f52796.1669969781.git.christophe.leroy@csgroup.eu
---
 arch/powerpc/include/asm/code-patching.h |  2 --
 arch/powerpc/lib/code-patching.c         | 13 +------------
 arch/powerpc/mm/mem.c                    |  1 -
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h b/arch/powerpc/include/asm/code-patching.h
index 1c6316ec4b74f..3f881548fb61c 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -22,8 +22,6 @@
 #define BRANCH_SET_LINK	0x1
 #define BRANCH_ABSOLUTE	0x2
 
-DECLARE_STATIC_KEY_FALSE(init_mem_is_free);
-
 /*
  * Powerpc branch instruction is :
  *
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index a6a5047f8ba23..73ce4b90bb1b8 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -349,7 +349,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
 	return err;
 }
 
-static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
+int patch_instruction(u32 *addr, ppc_inst_t instr)
 {
 	int err;
 	unsigned long flags;
@@ -372,17 +372,6 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
 
 	return err;
 }
-
-__ro_after_init DEFINE_STATIC_KEY_FALSE(init_mem_is_free);
-
-int patch_instruction(u32 *addr, ppc_inst_t instr)
-{
-	/* Make sure we aren't patching a freed init section */
-	if (static_branch_likely(&init_mem_is_free) && init_section_contains(addr, 4))
-		return 0;
-
-	return do_patch_instruction(addr, instr);
-}
 NOKPROBE_SYMBOL(patch_instruction);
 
 int patch_branch(u32 *addr, unsigned long target, int flags)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 84d171953ba44..8b121df7b08f8 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -344,7 +344,6 @@ void free_initmem(void)
 {
 	ppc_md.progress = ppc_printk_progress;
 	mark_initmem_nx();
-	static_branch_enable(&init_mem_is_free);
 	free_initmem_default(POISON_FREE_INITMEM);
 	ftrace_free_init_tramp();
 }
-- 
GitLab


From 25483dedd2f5d9bc6928cd790ee59772fb880a79 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vkoul@kernel.org>
Date: Fri, 2 Dec 2022 17:11:44 +0530
Subject: [PATCH 3261/4122] dmaengine: Revert "dmaengine: remove s3c24xx
 driver"

This reverts cccc46ae3623 ("dmaengine: remove s3c24xx driver") as it
causes regression due to missing header

Signed-off-by: Vinod Koul <vkoul@kernel.org>
---
 drivers/dma/Kconfig                       |   12 +
 drivers/dma/Makefile                      |    1 +
 drivers/dma/s3c24xx-dma.c                 | 1428 +++++++++++++++++++++
 include/linux/platform_data/dma-s3c24xx.h |   48 +
 4 files changed, 1489 insertions(+)
 create mode 100644 drivers/dma/s3c24xx-dma.c
 create mode 100644 include/linux/platform_data/dma-s3c24xx.h

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index b9d54f20812f7..80848c6b5cd55 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -610,6 +610,18 @@ config SPRD_DMA
 	help
 	  Enable support for the on-chip DMA controller on Spreadtrum platform.
 
+config S3C24XX_DMAC
+	bool "Samsung S3C24XX DMA support"
+	depends on ARCH_S3C24XX || COMPILE_TEST
+	select DMA_ENGINE
+	select DMA_VIRTUAL_CHANNELS
+	help
+	  Support for the Samsung S3C24XX DMA controller driver. The
+	  DMA controller is having multiple DMA channels which can be
+	  configured for different peripherals like audio, UART, SPI.
+	  The DMA controller can transfer data from memory to peripheral,
+	  periphal to memory, periphal to periphal and memory to memory.
+
 config TXX9_DMAC
 	tristate "Toshiba TXx9 SoC DMA support"
 	depends on MACH_TX49XX
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index a4fd1ce295102..5b55ada052a7f 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -70,6 +70,7 @@ obj-$(CONFIG_STM32_DMA) += stm32-dma.o
 obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o
 obj-$(CONFIG_STM32_MDMA) += stm32-mdma.o
 obj-$(CONFIG_SPRD_DMA) += sprd-dma.o
+obj-$(CONFIG_S3C24XX_DMAC) += s3c24xx-dma.o
 obj-$(CONFIG_TXX9_DMAC) += txx9dmac.o
 obj-$(CONFIG_TEGRA186_GPC_DMA) += tegra186-gpc-dma.o
 obj-$(CONFIG_TEGRA20_APB_DMA) += tegra20-apb-dma.o
diff --git a/drivers/dma/s3c24xx-dma.c b/drivers/dma/s3c24xx-dma.c
new file mode 100644
index 0000000000000..a09eeb545f7d5
--- /dev/null
+++ b/drivers/dma/s3c24xx-dma.c
@@ -0,0 +1,1428 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ *
+ * based on amba-pl08x.c
+ *
+ * Copyright (c) 2006 ARM Ltd.
+ * Copyright (c) 2010 ST-Ericsson SA
+ *
+ * Author: Peter Pearse <peter.pearse@arm.com>
+ * Author: Linus Walleij <linus.walleij@stericsson.com>
+ *
+ * The DMA controllers in S3C24XX SoCs have a varying number of DMA signals
+ * that can be routed to any of the 4 to 8 hardware-channels.
+ *
+ * Therefore on these DMA controllers the number of channels
+ * and the number of incoming DMA signals are two totally different things.
+ * It is usually not possible to theoretically handle all physical signals,
+ * so a multiplexing scheme with possible denial of use is necessary.
+ *
+ * Open items:
+ * - bursts
+ */
+
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/clk.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+#include <linux/platform_data/dma-s3c24xx.h>
+
+#include "dmaengine.h"
+#include "virt-dma.h"
+
+#define MAX_DMA_CHANNELS	8
+
+#define S3C24XX_DISRC			0x00
+#define S3C24XX_DISRCC			0x04
+#define S3C24XX_DISRCC_INC_INCREMENT	0
+#define S3C24XX_DISRCC_INC_FIXED	BIT(0)
+#define S3C24XX_DISRCC_LOC_AHB		0
+#define S3C24XX_DISRCC_LOC_APB		BIT(1)
+
+#define S3C24XX_DIDST			0x08
+#define S3C24XX_DIDSTC			0x0c
+#define S3C24XX_DIDSTC_INC_INCREMENT	0
+#define S3C24XX_DIDSTC_INC_FIXED	BIT(0)
+#define S3C24XX_DIDSTC_LOC_AHB		0
+#define S3C24XX_DIDSTC_LOC_APB		BIT(1)
+#define S3C24XX_DIDSTC_INT_TC0		0
+#define S3C24XX_DIDSTC_INT_RELOAD	BIT(2)
+
+#define S3C24XX_DCON			0x10
+
+#define S3C24XX_DCON_TC_MASK		0xfffff
+#define S3C24XX_DCON_DSZ_BYTE		(0 << 20)
+#define S3C24XX_DCON_DSZ_HALFWORD	(1 << 20)
+#define S3C24XX_DCON_DSZ_WORD		(2 << 20)
+#define S3C24XX_DCON_DSZ_MASK		(3 << 20)
+#define S3C24XX_DCON_DSZ_SHIFT		20
+#define S3C24XX_DCON_AUTORELOAD		0
+#define S3C24XX_DCON_NORELOAD		BIT(22)
+#define S3C24XX_DCON_HWTRIG		BIT(23)
+#define S3C24XX_DCON_HWSRC_SHIFT	24
+#define S3C24XX_DCON_SERV_SINGLE	0
+#define S3C24XX_DCON_SERV_WHOLE		BIT(27)
+#define S3C24XX_DCON_TSZ_UNIT		0
+#define S3C24XX_DCON_TSZ_BURST4		BIT(28)
+#define S3C24XX_DCON_INT		BIT(29)
+#define S3C24XX_DCON_SYNC_PCLK		0
+#define S3C24XX_DCON_SYNC_HCLK		BIT(30)
+#define S3C24XX_DCON_DEMAND		0
+#define S3C24XX_DCON_HANDSHAKE		BIT(31)
+
+#define S3C24XX_DSTAT			0x14
+#define S3C24XX_DSTAT_STAT_BUSY		BIT(20)
+#define S3C24XX_DSTAT_CURRTC_MASK	0xfffff
+
+#define S3C24XX_DMASKTRIG		0x20
+#define S3C24XX_DMASKTRIG_SWTRIG	BIT(0)
+#define S3C24XX_DMASKTRIG_ON		BIT(1)
+#define S3C24XX_DMASKTRIG_STOP		BIT(2)
+
+#define S3C24XX_DMAREQSEL		0x24
+#define S3C24XX_DMAREQSEL_HW		BIT(0)
+
+/*
+ * S3C2410, S3C2440 and S3C2442 SoCs cannot select any physical channel
+ * for a DMA source. Instead only specific channels are valid.
+ * All of these SoCs have 4 physical channels and the number of request
+ * source bits is 3. Additionally we also need 1 bit to mark the channel
+ * as valid.
+ * Therefore we separate the chansel element of the channel data into 4
+ * parts of 4 bits each, to hold the information if the channel is valid
+ * and the hw request source to use.
+ *
+ * Example:
+ * SDI is valid on channels 0, 2 and 3 - with varying hw request sources.
+ * For it the chansel field would look like
+ *
+ * ((BIT(3) | 1) << 3 * 4) | // channel 3, with request source 1
+ * ((BIT(3) | 2) << 2 * 4) | // channel 2, with request source 2
+ * ((BIT(3) | 2) << 0 * 4)   // channel 0, with request source 2
+ */
+#define S3C24XX_CHANSEL_WIDTH		4
+#define S3C24XX_CHANSEL_VALID		BIT(3)
+#define S3C24XX_CHANSEL_REQ_MASK	7
+
+/*
+ * struct soc_data - vendor-specific config parameters for individual SoCs
+ * @stride: spacing between the registers of each channel
+ * @has_reqsel: does the controller use the newer requestselection mechanism
+ * @has_clocks: are controllable dma-clocks present
+ */
+struct soc_data {
+	int stride;
+	bool has_reqsel;
+	bool has_clocks;
+};
+
+/*
+ * enum s3c24xx_dma_chan_state - holds the virtual channel states
+ * @S3C24XX_DMA_CHAN_IDLE: the channel is idle
+ * @S3C24XX_DMA_CHAN_RUNNING: the channel has allocated a physical transport
+ * channel and is running a transfer on it
+ * @S3C24XX_DMA_CHAN_WAITING: the channel is waiting for a physical transport
+ * channel to become available (only pertains to memcpy channels)
+ */
+enum s3c24xx_dma_chan_state {
+	S3C24XX_DMA_CHAN_IDLE,
+	S3C24XX_DMA_CHAN_RUNNING,
+	S3C24XX_DMA_CHAN_WAITING,
+};
+
+/*
+ * struct s3c24xx_sg - structure containing data per sg
+ * @src_addr: src address of sg
+ * @dst_addr: dst address of sg
+ * @len: transfer len in bytes
+ * @node: node for txd's dsg_list
+ */
+struct s3c24xx_sg {
+	dma_addr_t src_addr;
+	dma_addr_t dst_addr;
+	size_t len;
+	struct list_head node;
+};
+
+/*
+ * struct s3c24xx_txd - wrapper for struct dma_async_tx_descriptor
+ * @vd: virtual DMA descriptor
+ * @dsg_list: list of children sg's
+ * @at: sg currently being transfered
+ * @width: transfer width
+ * @disrcc: value for source control register
+ * @didstc: value for destination control register
+ * @dcon: base value for dcon register
+ * @cyclic: indicate cyclic transfer
+ */
+struct s3c24xx_txd {
+	struct virt_dma_desc vd;
+	struct list_head dsg_list;
+	struct list_head *at;
+	u8 width;
+	u32 disrcc;
+	u32 didstc;
+	u32 dcon;
+	bool cyclic;
+};
+
+struct s3c24xx_dma_chan;
+
+/*
+ * struct s3c24xx_dma_phy - holder for the physical channels
+ * @id: physical index to this channel
+ * @valid: does the channel have all required elements
+ * @base: virtual memory base (remapped) for the this channel
+ * @irq: interrupt for this channel
+ * @clk: clock for this channel
+ * @lock: a lock to use when altering an instance of this struct
+ * @serving: virtual channel currently being served by this physicalchannel
+ * @host: a pointer to the host (internal use)
+ */
+struct s3c24xx_dma_phy {
+	unsigned int			id;
+	bool				valid;
+	void __iomem			*base;
+	int				irq;
+	struct clk			*clk;
+	spinlock_t			lock;
+	struct s3c24xx_dma_chan		*serving;
+	struct s3c24xx_dma_engine	*host;
+};
+
+/*
+ * struct s3c24xx_dma_chan - this structure wraps a DMA ENGINE channel
+ * @id: the id of the channel
+ * @name: name of the channel
+ * @vc: wrapped virtual channel
+ * @phy: the physical channel utilized by this channel, if there is one
+ * @runtime_addr: address for RX/TX according to the runtime config
+ * @at: active transaction on this channel
+ * @lock: a lock for this channel data
+ * @host: a pointer to the host (internal use)
+ * @state: whether the channel is idle, running etc
+ * @slave: whether this channel is a device (slave) or for memcpy
+ */
+struct s3c24xx_dma_chan {
+	int id;
+	const char *name;
+	struct virt_dma_chan vc;
+	struct s3c24xx_dma_phy *phy;
+	struct dma_slave_config cfg;
+	struct s3c24xx_txd *at;
+	struct s3c24xx_dma_engine *host;
+	enum s3c24xx_dma_chan_state state;
+	bool slave;
+};
+
+/*
+ * struct s3c24xx_dma_engine - the local state holder for the S3C24XX
+ * @pdev: the corresponding platform device
+ * @pdata: platform data passed in from the platform/machine
+ * @base: virtual memory base (remapped)
+ * @slave: slave engine for this instance
+ * @memcpy: memcpy engine for this instance
+ * @phy_chans: array of data for the physical channels
+ */
+struct s3c24xx_dma_engine {
+	struct platform_device			*pdev;
+	const struct s3c24xx_dma_platdata	*pdata;
+	struct soc_data				*sdata;
+	void __iomem				*base;
+	struct dma_device			slave;
+	struct dma_device			memcpy;
+	struct s3c24xx_dma_phy			*phy_chans;
+};
+
+/*
+ * Physical channel handling
+ */
+
+/*
+ * Check whether a certain channel is busy or not.
+ */
+static int s3c24xx_dma_phy_busy(struct s3c24xx_dma_phy *phy)
+{
+	unsigned int val = readl(phy->base + S3C24XX_DSTAT);
+	return val & S3C24XX_DSTAT_STAT_BUSY;
+}
+
+static bool s3c24xx_dma_phy_valid(struct s3c24xx_dma_chan *s3cchan,
+				  struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	int phyvalid;
+
+	/* every phy is valid for memcopy channels */
+	if (!s3cchan->slave)
+		return true;
+
+	/* On newer variants all phys can be used for all virtual channels */
+	if (s3cdma->sdata->has_reqsel)
+		return true;
+
+	phyvalid = (cdata->chansel >> (phy->id * S3C24XX_CHANSEL_WIDTH));
+	return (phyvalid & S3C24XX_CHANSEL_VALID) ? true : false;
+}
+
+/*
+ * Allocate a physical channel for a virtual channel
+ *
+ * Try to locate a physical channel to be used for this transfer. If all
+ * are taken return NULL and the requester will have to cope by using
+ * some fallback PIO mode or retrying later.
+ */
+static
+struct s3c24xx_dma_phy *s3c24xx_dma_get_phy(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = NULL;
+	unsigned long flags;
+	int i;
+	int ret;
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		phy = &s3cdma->phy_chans[i];
+
+		if (!phy->valid)
+			continue;
+
+		if (!s3c24xx_dma_phy_valid(s3cchan, phy))
+			continue;
+
+		spin_lock_irqsave(&phy->lock, flags);
+
+		if (!phy->serving) {
+			phy->serving = s3cchan;
+			spin_unlock_irqrestore(&phy->lock, flags);
+			break;
+		}
+
+		spin_unlock_irqrestore(&phy->lock, flags);
+	}
+
+	/* No physical channel available, cope with it */
+	if (i == s3cdma->pdata->num_phy_channels) {
+		dev_warn(&s3cdma->pdev->dev, "no phy channel available\n");
+		return NULL;
+	}
+
+	/* start the phy clock */
+	if (s3cdma->sdata->has_clocks) {
+		ret = clk_enable(phy->clk);
+		if (ret) {
+			dev_err(&s3cdma->pdev->dev, "could not enable clock for channel %d, err %d\n",
+				phy->id, ret);
+			phy->serving = NULL;
+			return NULL;
+		}
+	}
+
+	return phy;
+}
+
+/*
+ * Mark the physical channel as free.
+ *
+ * This drops the link between the physical and virtual channel.
+ */
+static inline void s3c24xx_dma_put_phy(struct s3c24xx_dma_phy *phy)
+{
+	struct s3c24xx_dma_engine *s3cdma = phy->host;
+
+	if (s3cdma->sdata->has_clocks)
+		clk_disable(phy->clk);
+
+	phy->serving = NULL;
+}
+
+/*
+ * Stops the channel by writing the stop bit.
+ * This should not be used for an on-going transfer, but as a method of
+ * shutting down a channel (eg, when it's no longer used) or terminating a
+ * transfer.
+ */
+static void s3c24xx_dma_terminate_phy(struct s3c24xx_dma_phy *phy)
+{
+	writel(S3C24XX_DMASKTRIG_STOP, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Virtual channel handling
+ */
+
+static inline
+struct s3c24xx_dma_chan *to_s3c24xx_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct s3c24xx_dma_chan, vc.chan);
+}
+
+static u32 s3c24xx_dma_getbytes_chan(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct s3c24xx_txd *txd = s3cchan->at;
+	u32 tc = readl(phy->base + S3C24XX_DSTAT) & S3C24XX_DSTAT_CURRTC_MASK;
+
+	return tc * txd->width;
+}
+
+static int s3c24xx_dma_set_runtime_config(struct dma_chan *chan,
+				  struct dma_slave_config *config)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+	int ret = 0;
+
+	/* Reject definitely invalid configurations */
+	if (config->src_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES ||
+	    config->dst_addr_width == DMA_SLAVE_BUSWIDTH_8_BYTES)
+		return -EINVAL;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	if (!s3cchan->slave) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	s3cchan->cfg = *config;
+
+out:
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+	return ret;
+}
+
+/*
+ * Transfer handling
+ */
+
+static inline
+struct s3c24xx_txd *to_s3c24xx_txd(struct dma_async_tx_descriptor *tx)
+{
+	return container_of(tx, struct s3c24xx_txd, vd.tx);
+}
+
+static struct s3c24xx_txd *s3c24xx_dma_get_txd(void)
+{
+	struct s3c24xx_txd *txd = kzalloc(sizeof(*txd), GFP_NOWAIT);
+
+	if (txd) {
+		INIT_LIST_HEAD(&txd->dsg_list);
+		txd->dcon = S3C24XX_DCON_INT | S3C24XX_DCON_NORELOAD;
+	}
+
+	return txd;
+}
+
+static void s3c24xx_dma_free_txd(struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_sg *dsg, *_dsg;
+
+	list_for_each_entry_safe(dsg, _dsg, &txd->dsg_list, node) {
+		list_del(&dsg->node);
+		kfree(dsg);
+	}
+
+	kfree(txd);
+}
+
+static void s3c24xx_dma_start_next_sg(struct s3c24xx_dma_chan *s3cchan,
+				       struct s3c24xx_txd *txd)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_sg *dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+	u32 dcon = txd->dcon;
+	u32 val;
+
+	/* transfer-size and -count from len and width */
+	switch (txd->width) {
+	case 1:
+		dcon |= S3C24XX_DCON_DSZ_BYTE | dsg->len;
+		break;
+	case 2:
+		dcon |= S3C24XX_DCON_DSZ_HALFWORD | (dsg->len / 2);
+		break;
+	case 4:
+		dcon |= S3C24XX_DCON_DSZ_WORD | (dsg->len / 4);
+		break;
+	}
+
+	if (s3cchan->slave) {
+		struct s3c24xx_dma_channel *cdata =
+					&pdata->channels[s3cchan->id];
+
+		if (s3cdma->sdata->has_reqsel) {
+			writel_relaxed((cdata->chansel << 1) |
+							S3C24XX_DMAREQSEL_HW,
+					phy->base + S3C24XX_DMAREQSEL);
+		} else {
+			int csel = cdata->chansel >> (phy->id *
+							S3C24XX_CHANSEL_WIDTH);
+
+			csel &= S3C24XX_CHANSEL_REQ_MASK;
+			dcon |= csel << S3C24XX_DCON_HWSRC_SHIFT;
+			dcon |= S3C24XX_DCON_HWTRIG;
+		}
+	} else {
+		if (s3cdma->sdata->has_reqsel)
+			writel_relaxed(0, phy->base + S3C24XX_DMAREQSEL);
+	}
+
+	writel_relaxed(dsg->src_addr, phy->base + S3C24XX_DISRC);
+	writel_relaxed(txd->disrcc, phy->base + S3C24XX_DISRCC);
+	writel_relaxed(dsg->dst_addr, phy->base + S3C24XX_DIDST);
+	writel_relaxed(txd->didstc, phy->base + S3C24XX_DIDSTC);
+	writel_relaxed(dcon, phy->base + S3C24XX_DCON);
+
+	val = readl_relaxed(phy->base + S3C24XX_DMASKTRIG);
+	val &= ~S3C24XX_DMASKTRIG_STOP;
+	val |= S3C24XX_DMASKTRIG_ON;
+
+	/* trigger the dma operation for memcpy transfers */
+	if (!s3cchan->slave)
+		val |= S3C24XX_DMASKTRIG_SWTRIG;
+
+	writel(val, phy->base + S3C24XX_DMASKTRIG);
+}
+
+/*
+ * Set the initial DMA register values and start first sg.
+ */
+static void s3c24xx_dma_start_next_txd(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_phy *phy = s3cchan->phy;
+	struct virt_dma_desc *vd = vchan_next_desc(&s3cchan->vc);
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+
+	list_del(&txd->vd.node);
+
+	s3cchan->at = txd;
+
+	/* Wait for channel inactive */
+	while (s3c24xx_dma_phy_busy(phy))
+		cpu_relax();
+
+	/* point to the first element of the sg list */
+	txd->at = txd->dsg_list.next;
+	s3c24xx_dma_start_next_sg(s3cchan, txd);
+}
+
+/*
+ * Try to allocate a physical channel.  When successful, assign it to
+ * this virtual channel, and initiate the next descriptor.  The
+ * virtual channel lock must be held at this point.
+ */
+static void s3c24xx_dma_phy_alloc_and_start(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_phy *phy;
+
+	phy = s3c24xx_dma_get_phy(s3cchan);
+	if (!phy) {
+		dev_dbg(&s3cdma->pdev->dev, "no physical channel available for xfer on %s\n",
+			s3cchan->name);
+		s3cchan->state = S3C24XX_DMA_CHAN_WAITING;
+		return;
+	}
+
+	dev_dbg(&s3cdma->pdev->dev, "allocated physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+static void s3c24xx_dma_phy_reassign_start(struct s3c24xx_dma_phy *phy,
+	struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+
+	dev_dbg(&s3cdma->pdev->dev, "reassigned physical channel %d for xfer on %s\n",
+		phy->id, s3cchan->name);
+
+	/*
+	 * We do this without taking the lock; we're really only concerned
+	 * about whether this pointer is NULL or not, and we're guaranteed
+	 * that this will only be called when it _already_ is non-NULL.
+	 */
+	phy->serving = s3cchan;
+	s3cchan->phy = phy;
+	s3cchan->state = S3C24XX_DMA_CHAN_RUNNING;
+	s3c24xx_dma_start_next_txd(s3cchan);
+}
+
+/*
+ * Free a physical DMA channel, potentially reallocating it to another
+ * virtual channel if we have any pending.
+ */
+static void s3c24xx_dma_phy_free(struct s3c24xx_dma_chan *s3cchan)
+{
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_dma_chan *p, *next;
+
+retry:
+	next = NULL;
+
+	/* Find a waiting virtual channel for the next transfer. */
+	list_for_each_entry(p, &s3cdma->memcpy.channels, vc.chan.device_node)
+		if (p->state == S3C24XX_DMA_CHAN_WAITING) {
+			next = p;
+			break;
+		}
+
+	if (!next) {
+		list_for_each_entry(p, &s3cdma->slave.channels,
+				    vc.chan.device_node)
+			if (p->state == S3C24XX_DMA_CHAN_WAITING &&
+				      s3c24xx_dma_phy_valid(p, s3cchan->phy)) {
+				next = p;
+				break;
+			}
+	}
+
+	/* Ensure that the physical channel is stopped */
+	s3c24xx_dma_terminate_phy(s3cchan->phy);
+
+	if (next) {
+		bool success;
+
+		/*
+		 * Eww.  We know this isn't going to deadlock
+		 * but lockdep probably doesn't.
+		 */
+		spin_lock(&next->vc.lock);
+		/* Re-check the state now that we have the lock */
+		success = next->state == S3C24XX_DMA_CHAN_WAITING;
+		if (success)
+			s3c24xx_dma_phy_reassign_start(s3cchan->phy, next);
+		spin_unlock(&next->vc.lock);
+
+		/* If the state changed, try to find another channel */
+		if (!success)
+			goto retry;
+	} else {
+		/* No more jobs, so free up the physical channel */
+		s3c24xx_dma_put_phy(s3cchan->phy);
+	}
+
+	s3cchan->phy = NULL;
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+}
+
+static void s3c24xx_dma_desc_free(struct virt_dma_desc *vd)
+{
+	struct s3c24xx_txd *txd = to_s3c24xx_txd(&vd->tx);
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(vd->tx.chan);
+
+	if (!s3cchan->slave)
+		dma_descriptor_unmap(&vd->tx);
+
+	s3c24xx_dma_free_txd(txd);
+}
+
+static irqreturn_t s3c24xx_dma_irq(int irq, void *data)
+{
+	struct s3c24xx_dma_phy *phy = data;
+	struct s3c24xx_dma_chan *s3cchan = phy->serving;
+	struct s3c24xx_txd *txd;
+
+	dev_dbg(&phy->host->pdev->dev, "interrupt on channel %d\n", phy->id);
+
+	/*
+	 * Interrupts happen to notify the completion of a transfer and the
+	 * channel should have moved into its stop state already on its own.
+	 * Therefore interrupts on channels not bound to a virtual channel
+	 * should never happen. Nevertheless send a terminate command to the
+	 * channel if the unlikely case happens.
+	 */
+	if (unlikely(!s3cchan)) {
+		dev_err(&phy->host->pdev->dev, "interrupt on unused channel %d\n",
+			phy->id);
+
+		s3c24xx_dma_terminate_phy(phy);
+
+		return IRQ_HANDLED;
+	}
+
+	spin_lock(&s3cchan->vc.lock);
+	txd = s3cchan->at;
+	if (txd) {
+		/* when more sg's are in this txd, start the next one */
+		if (!list_is_last(txd->at, &txd->dsg_list)) {
+			txd->at = txd->at->next;
+			if (txd->cyclic)
+				vchan_cyclic_callback(&txd->vd);
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		} else if (!txd->cyclic) {
+			s3cchan->at = NULL;
+			vchan_cookie_complete(&txd->vd);
+
+			/*
+			 * And start the next descriptor (if any),
+			 * otherwise free this channel.
+			 */
+			if (vchan_next_desc(&s3cchan->vc))
+				s3c24xx_dma_start_next_txd(s3cchan);
+			else
+				s3c24xx_dma_phy_free(s3cchan);
+		} else {
+			vchan_cyclic_callback(&txd->vd);
+
+			/* Cyclic: reset at beginning */
+			txd->at = txd->dsg_list.next;
+			s3c24xx_dma_start_next_sg(s3cchan, txd);
+		}
+	}
+	spin_unlock(&s3cchan->vc.lock);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * The DMA ENGINE API
+ */
+
+static int s3c24xx_dma_terminate_all(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	LIST_HEAD(head);
+	unsigned long flags;
+	int ret;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+
+	if (!s3cchan->phy && !s3cchan->at) {
+		dev_err(&s3cdma->pdev->dev, "trying to terminate already stopped channel %d\n",
+			s3cchan->id);
+		ret = -EINVAL;
+		goto unlock;
+	}
+
+	s3cchan->state = S3C24XX_DMA_CHAN_IDLE;
+
+	/* Mark physical channel as free */
+	if (s3cchan->phy)
+		s3c24xx_dma_phy_free(s3cchan);
+
+	/* Dequeue current job */
+	if (s3cchan->at) {
+		vchan_terminate_vdesc(&s3cchan->at->vd);
+		s3cchan->at = NULL;
+	}
+
+	/* Dequeue jobs not yet fired as well */
+
+	vchan_get_all_descriptors(&s3cchan->vc, &head);
+
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	vchan_dma_desc_free_list(&s3cchan->vc, &head);
+
+	return 0;
+
+unlock:
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	return ret;
+}
+
+static void s3c24xx_dma_synchronize(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+
+	vchan_synchronize(&s3cchan->vc);
+}
+
+static void s3c24xx_dma_free_chan_resources(struct dma_chan *chan)
+{
+	/* Ensure all queued descriptors are freed */
+	vchan_free_chan_resources(to_virt_chan(chan));
+}
+
+static enum dma_status s3c24xx_dma_tx_status(struct dma_chan *chan,
+		dma_cookie_t cookie, struct dma_tx_state *txstate)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct virt_dma_desc *vd;
+	unsigned long flags;
+	enum dma_status ret;
+	size_t bytes = 0;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	ret = dma_cookie_status(chan, cookie, txstate);
+
+	/*
+	 * There's no point calculating the residue if there's
+	 * no txstate to store the value.
+	 */
+	if (ret == DMA_COMPLETE || !txstate) {
+		spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+		return ret;
+	}
+
+	vd = vchan_find_desc(&s3cchan->vc, cookie);
+	if (vd) {
+		/* On the issued list, so hasn't been processed yet */
+		txd = to_s3c24xx_txd(&vd->tx);
+
+		list_for_each_entry(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+	} else {
+		/*
+		 * Currently running, so sum over the pending sg's and
+		 * the currently active one.
+		 */
+		txd = s3cchan->at;
+
+		dsg = list_entry(txd->at, struct s3c24xx_sg, node);
+		list_for_each_entry_from(dsg, &txd->dsg_list, node)
+			bytes += dsg->len;
+
+		bytes += s3c24xx_dma_getbytes_chan(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+
+	/*
+	 * This cookie not complete yet
+	 * Get number of bytes left in the active transactions and queue
+	 */
+	dma_set_residue(txstate, bytes);
+
+	/* Whether waiting or running, we're in progress */
+	return ret;
+}
+
+/*
+ * Initialize a descriptor to be used by memcpy submit
+ */
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_memcpy(
+		struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	int src_mod, dest_mod;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare memcpy of %zu bytes from %s\n",
+			len, s3cchan->name);
+
+	if ((len & S3C24XX_DCON_TC_MASK) != len) {
+		dev_err(&s3cdma->pdev->dev, "memcpy size %zu to large\n", len);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+	if (!dsg) {
+		s3c24xx_dma_free_txd(txd);
+		return NULL;
+	}
+	list_add_tail(&dsg->node, &txd->dsg_list);
+
+	dsg->src_addr = src;
+	dsg->dst_addr = dest;
+	dsg->len = len;
+
+	/*
+	 * Determine a suitable transfer width.
+	 * The DMA controller cannot fetch/store information which is not
+	 * naturally aligned on the bus, i.e., a 4 byte fetch must start at
+	 * an address divisible by 4 - more generally addr % width must be 0.
+	 */
+	src_mod = src % 4;
+	dest_mod = dest % 4;
+	switch (len % 4) {
+	case 0:
+		txd->width = (src_mod == 0 && dest_mod == 0) ? 4 : 1;
+		break;
+	case 2:
+		txd->width = ((src_mod == 2 || src_mod == 0) &&
+			      (dest_mod == 2 || dest_mod == 0)) ? 2 : 1;
+		break;
+	default:
+		txd->width = 1;
+		break;
+	}
+
+	txd->disrcc = S3C24XX_DISRCC_LOC_AHB | S3C24XX_DISRCC_INC_INCREMENT;
+	txd->didstc = S3C24XX_DIDSTC_LOC_AHB | S3C24XX_DIDSTC_INC_INCREMENT;
+	txd->dcon |= S3C24XX_DCON_DEMAND | S3C24XX_DCON_SYNC_HCLK |
+		     S3C24XX_DCON_SERV_WHOLE;
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_dma_cyclic(
+	struct dma_chan *chan, dma_addr_t addr, size_t size, size_t period,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	unsigned sg_len;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int i;
+
+	dev_dbg(&s3cdma->pdev->dev,
+		"prepare cyclic transaction of %zu bytes with period %zu from %s\n",
+		size, period, s3cchan->name);
+
+	if (!is_slave_direction(direction)) {
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	txd->cyclic = 1;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	}
+
+	sg_len = size / period;
+
+	for (i = 0; i < sg_len; i++) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = period;
+		/* Check last period length */
+		if (i == sg_len - 1)
+			dsg->len = size - period * i;
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = addr + period * i;
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = addr + period * i;
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *s3c24xx_dma_prep_slave_sg(
+		struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_transfer_direction direction,
+		unsigned long flags, void *context)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	struct s3c24xx_dma_engine *s3cdma = s3cchan->host;
+	const struct s3c24xx_dma_platdata *pdata = s3cdma->pdata;
+	struct s3c24xx_dma_channel *cdata = &pdata->channels[s3cchan->id];
+	struct s3c24xx_txd *txd;
+	struct s3c24xx_sg *dsg;
+	struct scatterlist *sg;
+	dma_addr_t slave_addr;
+	u32 hwcfg = 0;
+	int tmp;
+
+	dev_dbg(&s3cdma->pdev->dev, "prepare transaction of %d bytes from %s\n",
+			sg_dma_len(sgl), s3cchan->name);
+
+	txd = s3c24xx_dma_get_txd();
+	if (!txd)
+		return NULL;
+
+	if (cdata->handshake)
+		txd->dcon |= S3C24XX_DCON_HANDSHAKE;
+
+	switch (cdata->bus) {
+	case S3C24XX_DMA_APB:
+		txd->dcon |= S3C24XX_DCON_SYNC_PCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_APB;
+		break;
+	case S3C24XX_DMA_AHB:
+		txd->dcon |= S3C24XX_DCON_SYNC_HCLK;
+		hwcfg |= S3C24XX_DISRCC_LOC_AHB;
+		break;
+	}
+
+	/*
+	 * Always assume our peripheral desintation is a fixed
+	 * address in memory.
+	 */
+	hwcfg |= S3C24XX_DISRCC_INC_FIXED;
+
+	/*
+	 * Individual dma operations are requested by the slave,
+	 * so serve only single atomic operations (S3C24XX_DCON_SERV_SINGLE).
+	 */
+	txd->dcon |= S3C24XX_DCON_SERV_SINGLE;
+
+	if (direction == DMA_MEM_TO_DEV) {
+		txd->disrcc = S3C24XX_DISRCC_LOC_AHB |
+			      S3C24XX_DISRCC_INC_INCREMENT;
+		txd->didstc = hwcfg;
+		slave_addr = s3cchan->cfg.dst_addr;
+		txd->width = s3cchan->cfg.dst_addr_width;
+	} else if (direction == DMA_DEV_TO_MEM) {
+		txd->disrcc = hwcfg;
+		txd->didstc = S3C24XX_DIDSTC_LOC_AHB |
+			      S3C24XX_DIDSTC_INC_INCREMENT;
+		slave_addr = s3cchan->cfg.src_addr;
+		txd->width = s3cchan->cfg.src_addr_width;
+	} else {
+		s3c24xx_dma_free_txd(txd);
+		dev_err(&s3cdma->pdev->dev,
+			"direction %d unsupported\n", direction);
+		return NULL;
+	}
+
+	for_each_sg(sgl, sg, sg_len, tmp) {
+		dsg = kzalloc(sizeof(*dsg), GFP_NOWAIT);
+		if (!dsg) {
+			s3c24xx_dma_free_txd(txd);
+			return NULL;
+		}
+		list_add_tail(&dsg->node, &txd->dsg_list);
+
+		dsg->len = sg_dma_len(sg);
+		if (direction == DMA_MEM_TO_DEV) {
+			dsg->src_addr = sg_dma_address(sg);
+			dsg->dst_addr = slave_addr;
+		} else { /* DMA_DEV_TO_MEM */
+			dsg->src_addr = slave_addr;
+			dsg->dst_addr = sg_dma_address(sg);
+		}
+	}
+
+	return vchan_tx_prep(&s3cchan->vc, &txd->vd, flags);
+}
+
+/*
+ * Slave transactions callback to the slave device to allow
+ * synchronization of slave DMA signals with the DMAC enable
+ */
+static void s3c24xx_dma_issue_pending(struct dma_chan *chan)
+{
+	struct s3c24xx_dma_chan *s3cchan = to_s3c24xx_dma_chan(chan);
+	unsigned long flags;
+
+	spin_lock_irqsave(&s3cchan->vc.lock, flags);
+	if (vchan_issue_pending(&s3cchan->vc)) {
+		if (!s3cchan->phy && s3cchan->state != S3C24XX_DMA_CHAN_WAITING)
+			s3c24xx_dma_phy_alloc_and_start(s3cchan);
+	}
+	spin_unlock_irqrestore(&s3cchan->vc.lock, flags);
+}
+
+/*
+ * Bringup and teardown
+ */
+
+/*
+ * Initialise the DMAC memcpy/slave channels.
+ * Make a local wrapper to hold required data
+ */
+static int s3c24xx_dma_init_virtual_channels(struct s3c24xx_dma_engine *s3cdma,
+		struct dma_device *dmadev, unsigned int channels, bool slave)
+{
+	struct s3c24xx_dma_chan *chan;
+	int i;
+
+	INIT_LIST_HEAD(&dmadev->channels);
+
+	/*
+	 * Register as many memcpy as we have physical channels,
+	 * we won't always be able to use all but the code will have
+	 * to cope with that situation.
+	 */
+	for (i = 0; i < channels; i++) {
+		chan = devm_kzalloc(dmadev->dev, sizeof(*chan), GFP_KERNEL);
+		if (!chan)
+			return -ENOMEM;
+
+		chan->id = i;
+		chan->host = s3cdma;
+		chan->state = S3C24XX_DMA_CHAN_IDLE;
+
+		if (slave) {
+			chan->slave = true;
+			chan->name = kasprintf(GFP_KERNEL, "slave%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		} else {
+			chan->name = kasprintf(GFP_KERNEL, "memcpy%d", i);
+			if (!chan->name)
+				return -ENOMEM;
+		}
+		dev_dbg(dmadev->dev,
+			 "initialize virtual channel \"%s\"\n",
+			 chan->name);
+
+		chan->vc.desc_free = s3c24xx_dma_desc_free;
+		vchan_init(&chan->vc, dmadev);
+	}
+	dev_info(dmadev->dev, "initialized %d virtual %s channels\n",
+		 i, slave ? "slave" : "memcpy");
+	return i;
+}
+
+static void s3c24xx_dma_free_virtual_channels(struct dma_device *dmadev)
+{
+	struct s3c24xx_dma_chan *chan = NULL;
+	struct s3c24xx_dma_chan *next;
+
+	list_for_each_entry_safe(chan,
+				 next, &dmadev->channels, vc.chan.device_node) {
+		list_del(&chan->vc.chan.device_node);
+		tasklet_kill(&chan->vc.task);
+	}
+}
+
+/* s3c2410, s3c2440 and s3c2442 have a 0x40 stride without separate clocks */
+static struct soc_data soc_s3c2410 = {
+	.stride = 0x40,
+	.has_reqsel = false,
+	.has_clocks = false,
+};
+
+/* s3c2412 and s3c2413 have a 0x40 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2412 = {
+	.stride = 0x40,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+/* s3c2443 and following have a 0x100 stride and dmareqsel mechanism */
+static struct soc_data soc_s3c2443 = {
+	.stride = 0x100,
+	.has_reqsel = true,
+	.has_clocks = true,
+};
+
+static const struct platform_device_id s3c24xx_dma_driver_ids[] = {
+	{
+		.name		= "s3c2410-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2410,
+	}, {
+		.name		= "s3c2412-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2412,
+	}, {
+		.name		= "s3c2443-dma",
+		.driver_data	= (kernel_ulong_t)&soc_s3c2443,
+	},
+	{ },
+};
+
+static struct soc_data *s3c24xx_dma_get_soc_data(struct platform_device *pdev)
+{
+	return (struct soc_data *)
+			 platform_get_device_id(pdev)->driver_data;
+}
+
+static int s3c24xx_dma_probe(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma;
+	struct soc_data *sdata;
+	struct resource *res;
+	int ret;
+	int i;
+
+	if (!pdata) {
+		dev_err(&pdev->dev, "platform data missing\n");
+		return -ENODEV;
+	}
+
+	/* Basic sanity check */
+	if (pdata->num_phy_channels > MAX_DMA_CHANNELS) {
+		dev_err(&pdev->dev, "too many dma channels %d, max %d\n",
+			pdata->num_phy_channels, MAX_DMA_CHANNELS);
+		return -EINVAL;
+	}
+
+	sdata = s3c24xx_dma_get_soc_data(pdev);
+	if (!sdata)
+		return -EINVAL;
+
+	s3cdma = devm_kzalloc(&pdev->dev, sizeof(*s3cdma), GFP_KERNEL);
+	if (!s3cdma)
+		return -ENOMEM;
+
+	s3cdma->pdev = pdev;
+	s3cdma->pdata = pdata;
+	s3cdma->sdata = sdata;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	s3cdma->base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(s3cdma->base))
+		return PTR_ERR(s3cdma->base);
+
+	s3cdma->phy_chans = devm_kcalloc(&pdev->dev,
+					      pdata->num_phy_channels,
+					      sizeof(struct s3c24xx_dma_phy),
+					      GFP_KERNEL);
+	if (!s3cdma->phy_chans)
+		return -ENOMEM;
+
+	/* acquire irqs and clocks for all physical channels */
+	for (i = 0; i < pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+		char clk_name[6];
+
+		phy->id = i;
+		phy->base = s3cdma->base + (i * sdata->stride);
+		phy->host = s3cdma;
+
+		phy->irq = platform_get_irq(pdev, i);
+		if (phy->irq < 0)
+			continue;
+
+		ret = devm_request_irq(&pdev->dev, phy->irq, s3c24xx_dma_irq,
+				       0, pdev->name, phy);
+		if (ret) {
+			dev_err(&pdev->dev, "Unable to request irq for channel %d, error %d\n",
+				i, ret);
+			continue;
+		}
+
+		if (sdata->has_clocks) {
+			sprintf(clk_name, "dma.%d", i);
+			phy->clk = devm_clk_get(&pdev->dev, clk_name);
+			if (IS_ERR(phy->clk) && sdata->has_clocks) {
+				dev_err(&pdev->dev, "unable to acquire clock for channel %d, error %lu\n",
+					i, PTR_ERR(phy->clk));
+				continue;
+			}
+
+			ret = clk_prepare(phy->clk);
+			if (ret) {
+				dev_err(&pdev->dev, "clock for phy %d failed, error %d\n",
+					i, ret);
+				continue;
+			}
+		}
+
+		spin_lock_init(&phy->lock);
+		phy->valid = true;
+
+		dev_dbg(&pdev->dev, "physical channel %d is %s\n",
+			i, s3c24xx_dma_phy_busy(phy) ? "BUSY" : "FREE");
+	}
+
+	/* Initialize memcpy engine */
+	dma_cap_set(DMA_MEMCPY, s3cdma->memcpy.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->memcpy.cap_mask);
+	s3cdma->memcpy.dev = &pdev->dev;
+	s3cdma->memcpy.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->memcpy.device_prep_dma_memcpy = s3c24xx_dma_prep_memcpy;
+	s3cdma->memcpy.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->memcpy.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->memcpy.device_config = s3c24xx_dma_set_runtime_config;
+	s3cdma->memcpy.device_terminate_all = s3c24xx_dma_terminate_all;
+	s3cdma->memcpy.device_synchronize = s3c24xx_dma_synchronize;
+
+	/* Initialize slave engine for SoC internal dedicated peripherals */
+	dma_cap_set(DMA_SLAVE, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_CYCLIC, s3cdma->slave.cap_mask);
+	dma_cap_set(DMA_PRIVATE, s3cdma->slave.cap_mask);
+	s3cdma->slave.dev = &pdev->dev;
+	s3cdma->slave.device_free_chan_resources =
+					s3c24xx_dma_free_chan_resources;
+	s3cdma->slave.device_tx_status = s3c24xx_dma_tx_status;
+	s3cdma->slave.device_issue_pending = s3c24xx_dma_issue_pending;
+	s3cdma->slave.device_prep_slave_sg = s3c24xx_dma_prep_slave_sg;
+	s3cdma->slave.device_prep_dma_cyclic = s3c24xx_dma_prep_dma_cyclic;
+	s3cdma->slave.device_config = s3c24xx_dma_set_runtime_config;
+	s3cdma->slave.device_terminate_all = s3c24xx_dma_terminate_all;
+	s3cdma->slave.device_synchronize = s3c24xx_dma_synchronize;
+	s3cdma->slave.filter.map = pdata->slave_map;
+	s3cdma->slave.filter.mapcnt = pdata->slavecnt;
+	s3cdma->slave.filter.fn = s3c24xx_dma_filter;
+
+	/* Register as many memcpy channels as there are physical channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->memcpy,
+						pdata->num_phy_channels, false);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			 "%s failed to enumerate memcpy channels - %d\n",
+			 __func__, ret);
+		goto err_memcpy;
+	}
+
+	/* Register slave channels */
+	ret = s3c24xx_dma_init_virtual_channels(s3cdma, &s3cdma->slave,
+				pdata->num_channels, true);
+	if (ret <= 0) {
+		dev_warn(&pdev->dev,
+			"%s failed to enumerate slave channels - %d\n",
+				__func__, ret);
+		goto err_slave;
+	}
+
+	ret = dma_async_device_register(&s3cdma->memcpy);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register memcpy as an async device - %d\n",
+			__func__, ret);
+		goto err_memcpy_reg;
+	}
+
+	ret = dma_async_device_register(&s3cdma->slave);
+	if (ret) {
+		dev_warn(&pdev->dev,
+			"%s failed to register slave as an async device - %d\n",
+			__func__, ret);
+		goto err_slave_reg;
+	}
+
+	platform_set_drvdata(pdev, s3cdma);
+	dev_info(&pdev->dev, "Loaded dma driver with %d physical channels\n",
+		 pdata->num_phy_channels);
+
+	return 0;
+
+err_slave_reg:
+	dma_async_device_unregister(&s3cdma->memcpy);
+err_memcpy_reg:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+err_slave:
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+err_memcpy:
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return ret;
+}
+
+static void s3c24xx_dma_free_irq(struct platform_device *pdev,
+				struct s3c24xx_dma_engine *s3cdma)
+{
+	int i;
+
+	for (i = 0; i < s3cdma->pdata->num_phy_channels; i++) {
+		struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+
+		devm_free_irq(&pdev->dev, phy->irq, phy);
+	}
+}
+
+static int s3c24xx_dma_remove(struct platform_device *pdev)
+{
+	const struct s3c24xx_dma_platdata *pdata = dev_get_platdata(&pdev->dev);
+	struct s3c24xx_dma_engine *s3cdma = platform_get_drvdata(pdev);
+	struct soc_data *sdata = s3c24xx_dma_get_soc_data(pdev);
+	int i;
+
+	dma_async_device_unregister(&s3cdma->slave);
+	dma_async_device_unregister(&s3cdma->memcpy);
+
+	s3c24xx_dma_free_irq(pdev, s3cdma);
+
+	s3c24xx_dma_free_virtual_channels(&s3cdma->slave);
+	s3c24xx_dma_free_virtual_channels(&s3cdma->memcpy);
+
+	if (sdata->has_clocks)
+		for (i = 0; i < pdata->num_phy_channels; i++) {
+			struct s3c24xx_dma_phy *phy = &s3cdma->phy_chans[i];
+			if (phy->valid)
+				clk_unprepare(phy->clk);
+		}
+
+	return 0;
+}
+
+static struct platform_driver s3c24xx_dma_driver = {
+	.driver		= {
+		.name	= "s3c24xx-dma",
+	},
+	.id_table	= s3c24xx_dma_driver_ids,
+	.probe		= s3c24xx_dma_probe,
+	.remove		= s3c24xx_dma_remove,
+};
+
+module_platform_driver(s3c24xx_dma_driver);
+
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct s3c24xx_dma_chan *s3cchan;
+
+	if (chan->device->dev->driver != &s3c24xx_dma_driver.driver)
+		return false;
+
+	s3cchan = to_s3c24xx_dma_chan(chan);
+
+	return s3cchan->id == (uintptr_t)param;
+}
+EXPORT_SYMBOL(s3c24xx_dma_filter);
+
+MODULE_DESCRIPTION("S3C24XX DMA Driver");
+MODULE_AUTHOR("Heiko Stuebner");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/platform_data/dma-s3c24xx.h b/include/linux/platform_data/dma-s3c24xx.h
new file mode 100644
index 0000000000000..96d02dbeea67a
--- /dev/null
+++ b/include/linux/platform_data/dma-s3c24xx.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * S3C24XX DMA handling
+ *
+ * Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
+ */
+
+/* Helper to encode the source selection constraints for early s3c socs. */
+#define S3C24XX_DMA_CHANREQ(src, chan)	((BIT(3) | src) << chan * 4)
+
+enum s3c24xx_dma_bus {
+	S3C24XX_DMA_APB,
+	S3C24XX_DMA_AHB,
+};
+
+/**
+ * @bus: on which bus does the peripheral reside - AHB or APB.
+ * @handshake: is a handshake with the peripheral necessary
+ * @chansel: channel selection information, depending on variant; reqsel for
+ *	     s3c2443 and later and channel-selection map for earlier SoCs
+ *	     see CHANSEL doc in s3c2443-dma.c
+ */
+struct s3c24xx_dma_channel {
+	enum s3c24xx_dma_bus bus;
+	bool handshake;
+	u16 chansel;
+};
+
+struct dma_slave_map;
+
+/**
+ * struct s3c24xx_dma_platdata - platform specific settings
+ * @num_phy_channels: number of physical channels
+ * @channels: array of virtual channel descriptions
+ * @num_channels: number of virtual channels
+ * @slave_map: dma slave map matching table
+ * @slavecnt: number of elements in slave_map
+ */
+struct s3c24xx_dma_platdata {
+	int num_phy_channels;
+	struct s3c24xx_dma_channel *channels;
+	int num_channels;
+	const struct dma_slave_map *slave_map;
+	int slavecnt;
+};
+
+struct dma_chan;
+bool s3c24xx_dma_filter(struct dma_chan *chan, void *param);
-- 
GitLab


From 35c23fba4eb4b3043b42acbdd3fbabdd8824f56f Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 2 Nov 2022 23:57:50 +0100
Subject: [PATCH 3262/4122] gfs2: Add extra error check in alloc_dinode

We have reserved the number of blocks we want to allocate, so the actual
allocation isn't expected to fail.  Nevertheless, make the code behave
correctly even when things go wrong.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 04a201584fa7c..41fa69c1be1b7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -403,12 +403,15 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
 		goto out_ipreserv;
 
 	error = gfs2_alloc_blocks(ip, &ip->i_no_addr, dblocks, 1, &ip->i_generation);
+	if (error)
+		goto out_trans_end;
+
 	ip->i_no_formal_ino = ip->i_generation;
 	ip->i_inode.i_ino = ip->i_no_addr;
 	ip->i_goal = ip->i_no_addr;
 
+out_trans_end:
 	gfs2_trans_end(sdp);
-
 out_ipreserv:
 	gfs2_inplace_release(ip);
 out_quota:
-- 
GitLab


From 761fdbbce96fb3d0569f50a77b1214dbc4b17c44 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 4 Nov 2022 13:26:46 +0100
Subject: [PATCH 3263/4122] gfs2: Get rid of ghs[] in gfs2_create_inode

In gfs2_create_inode, get rid of the ghs array in favor of two separate
variables.  This makes the code much less irritating.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 41fa69c1be1b7..465f1673101fa 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -599,7 +599,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 {
 	const struct qstr *name = &dentry->d_name;
 	struct posix_acl *default_acl, *acl;
-	struct gfs2_holder ghs[2];
+	struct gfs2_holder d_gh, gh;
 	struct inode *inode = NULL;
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
@@ -620,10 +620,10 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail;
 
-	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs);
+	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, &d_gh);
 	if (error)
 		goto fail;
-	gfs2_holder_mark_uninitialized(ghs + 1);
+	gfs2_holder_mark_uninitialized(&gh);
 
 	error = create_ok(dip, name, mode);
 	if (error)
@@ -645,7 +645,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 			else
 				error = finish_no_open(file, NULL);
 		}
-		gfs2_glock_dq_uninit(ghs);
+		gfs2_glock_dq_uninit(&d_gh);
 		goto fail;
 	} else if (error != -ENOENT) {
 		goto fail_gunlock;
@@ -734,7 +734,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock2;
 
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, ghs + 1);
+	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_SKIP, &gh);
 	if (error)
 		goto fail_gunlock3;
 
@@ -788,9 +788,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		file->f_mode |= FMODE_CREATED;
 		error = finish_open(file, dentry, gfs2_open_common);
 	}
-	gfs2_glock_dq_uninit(ghs);
+	gfs2_glock_dq_uninit(&d_gh);
 	gfs2_qa_put(ip);
-	gfs2_glock_dq_uninit(ghs + 1);
+	gfs2_glock_dq_uninit(&gh);
 	gfs2_glock_put(io_gl);
 	gfs2_qa_put(dip);
 	unlock_new_inode(inode);
@@ -815,7 +815,7 @@ fail_free_acls:
 	posix_acl_release(acl);
 fail_gunlock:
 	gfs2_dir_no_add(&da);
-	gfs2_glock_dq_uninit(ghs);
+	gfs2_glock_dq_uninit(&d_gh);
 	if (!IS_ERR_OR_NULL(inode)) {
 		clear_nlink(inode);
 		if (!free_vfs_inode)
@@ -827,8 +827,8 @@ fail_gunlock:
 		else
 			iput(inode);
 	}
-	if (gfs2_holder_initialized(ghs + 1))
-		gfs2_glock_dq_uninit(ghs + 1);
+	if (gfs2_holder_initialized(&gh))
+		gfs2_glock_dq_uninit(&gh);
 fail:
 	gfs2_qa_put(dip);
 	return error;
-- 
GitLab


From 3d0258bc11185ccb21f922332eca731e1928c5a4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 2 Nov 2022 18:34:42 +0100
Subject: [PATCH 3264/4122] gfs2: Clean up initialization of "ip" in
 gfs2_create_inode

Initialize variable "ip" earlier so that it can be used interchangeably
with "inode" everywhere.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 465f1673101fa..b91f15abe24e8 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -659,12 +659,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	error = -ENOMEM;
 	if (!inode)
 		goto fail_gunlock;
+	ip = GFS2_I(inode);
 
 	error = posix_acl_create(dir, &mode, &default_acl, &acl);
 	if (error)
 		goto fail_gunlock;
 
-	ip = GFS2_I(inode);
 	error = gfs2_qa_get(ip);
 	if (error)
 		goto fail_free_acls;
@@ -821,7 +821,7 @@ fail_gunlock:
 		if (!free_vfs_inode)
 			mark_inode_dirty(inode);
 		set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
-			&GFS2_I(inode)->i_flags);
+			&ip->i_flags);
 		if (inode->i_state & I_NEW)
 			iget_failed(inode);
 		else
-- 
GitLab


From 38552ff676f072e7d15c5e0a877fda613e57ed2d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 2 Nov 2022 17:06:58 +0100
Subject: [PATCH 3265/4122] gfs2: Fix and clean up create / evict interaction

When gfs2_create_inode() fails after creating a new inode, it uses the
GIF_FREE_VFS_INODE and GIF_ALLOC_FAILED inode flags to communicate to
gfs2_evict_inode() which parts of the inode need to be deallocated and
destroyed.  In some error cases, the inode ends up being allocated on
disk and then accidentally left behind.  In others, the inode is
partially constructed and then not properly destroyed.  Clean this up by
completely handling the inode deallocation and destruction in
gfs2_evict_inode().

This means that gfs2_evict_inode() may now be faced with partially
constructed inodes, so add the necessary checks to cope with that.  In
particular, make sure that for incompletely constructed inodes, we're
not accessing the buffers backing the on-disk blocks; the contents may
be undefined.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c   | 26 ++++++++++++--------------
 fs/gfs2/meta_io.c |  6 ++++++
 fs/gfs2/super.c   | 35 +++++++++++++++++++++--------------
 fs/gfs2/xattr.c   | 26 +++++++++++++++-----------
 4 files changed, 54 insertions(+), 39 deletions(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index b91f15abe24e8..c057f3bd475fb 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -409,6 +409,8 @@ static int alloc_dinode(struct gfs2_inode *ip, u32 flags, unsigned *dblocks)
 	ip->i_no_formal_ino = ip->i_generation;
 	ip->i_inode.i_ino = ip->i_no_addr;
 	ip->i_goal = ip->i_no_addr;
+	if (*dblocks > 1)
+		ip->i_eattr = ip->i_no_addr + 1;
 
 out_trans_end:
 	gfs2_trans_end(sdp);
@@ -589,6 +591,12 @@ static int gfs2_initxattrs(struct inode *inode, const struct xattr *xattr_array,
  * @size: The initial size of the inode (ignored for directories)
  * @excl: Force fail if inode exists
  *
+ * FIXME: Change to allocate the disk blocks and write them out in the same
+ * transaction.  That way, we can no longer end up in a situation in which an
+ * inode is allocated, the node crashes, and the block looks like a valid
+ * inode.  (With atomic creates in place, we will also no longer need to zero
+ * the link count and dirty the inode here on failure.)
+ *
  * Returns: 0 on success, or error code
  */
 
@@ -604,7 +612,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
 	struct gfs2_glock *io_gl;
-	int error, free_vfs_inode = 1;
+	int error;
 	u32 aflags = 0;
 	unsigned blocks = 1;
 	struct gfs2_diradd da = { .bh = NULL, .save_loc = 1, };
@@ -742,10 +750,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	if (error)
 		goto fail_gunlock3;
 
-	if (blocks > 1) {
-		ip->i_eattr = ip->i_no_addr + 1;
+	if (blocks > 1)
 		gfs2_init_xattr(ip);
-	}
 	init_dinode(dip, ip, symname);
 	gfs2_trans_end(sdp);
 
@@ -753,9 +759,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	glock_set_object(io_gl, ip);
 	gfs2_set_iop(inode);
 
-	free_vfs_inode = 0; /* After this point, the inode is no longer
-			       considered free. Any failures need to undo
-			       the gfs2 structures. */
 	if (default_acl) {
 		error = __gfs2_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
 		if (error)
@@ -804,10 +807,6 @@ fail_gunlock3:
 fail_gunlock2:
 	gfs2_glock_put(io_gl);
 fail_free_inode:
-	if (ip->i_gl) {
-		if (free_vfs_inode) /* else evict will do the put for us */
-			gfs2_glock_put(ip->i_gl);
-	}
 	gfs2_rs_deltree(&ip->i_res);
 	gfs2_qa_put(ip);
 fail_free_acls:
@@ -817,11 +816,10 @@ fail_gunlock:
 	gfs2_dir_no_add(&da);
 	gfs2_glock_dq_uninit(&d_gh);
 	if (!IS_ERR_OR_NULL(inode)) {
+		set_bit(GIF_ALLOC_FAILED, &ip->i_flags);
 		clear_nlink(inode);
-		if (!free_vfs_inode)
+		if (ip->i_no_addr)
 			mark_inode_dirty(inode);
-		set_bit(free_vfs_inode ? GIF_FREE_VFS_INODE : GIF_ALLOC_FAILED,
-			&ip->i_flags);
 		if (inode->i_state & I_NEW)
 			iget_failed(inode);
 		else
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index 6ed728aae9a53..3c41b864ee5bc 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -442,6 +442,12 @@ void gfs2_journal_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen)
 	struct buffer_head *bh;
 	int ty;
 
+	if (!ip->i_gl) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
 	gfs2_ail1_wipe(sdp, bstart, blen);
 	while (blen) {
 		ty = REMOVE_META;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b018957a1bb24..eac9b0c34aac8 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -475,6 +475,12 @@ static void gfs2_dirty_inode(struct inode *inode, int flags)
 	int need_endtrans = 0;
 	int ret;
 
+	if (unlikely(!ip->i_gl)) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
 	if (unlikely(gfs2_withdrawn(sdp)))
 		return;
 	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) {
@@ -927,8 +933,7 @@ static int gfs2_drop_inode(struct inode *inode)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (!test_bit(GIF_FREE_VFS_INODE, &ip->i_flags) &&
-	    inode->i_nlink &&
+	if (inode->i_nlink &&
 	    gfs2_holder_initialized(&ip->i_iopen_gh)) {
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
@@ -1076,7 +1081,13 @@ static void gfs2_final_release_pages(struct gfs2_inode *ip)
 	struct inode *inode = &ip->i_inode;
 	struct gfs2_glock *gl = ip->i_gl;
 
-	truncate_inode_pages(gfs2_glock2aspace(ip->i_gl), 0);
+	if (unlikely(!gl)) {
+		/* This can only happen during incomplete inode creation. */
+		BUG_ON(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags));
+		return;
+	}
+
+	truncate_inode_pages(gfs2_glock2aspace(gl), 0);
 	truncate_inode_pages(&inode->i_data, 0);
 
 	if (atomic_read(&gl->gl_revokes) == 0) {
@@ -1218,10 +1229,8 @@ static enum dinode_demise evict_should_delete(struct inode *inode,
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	int ret;
 
-	if (test_bit(GIF_ALLOC_FAILED, &ip->i_flags)) {
-		BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));
+	if (unlikely(test_bit(GIF_ALLOC_FAILED, &ip->i_flags)))
 		goto should_delete;
-	}
 
 	if (test_bit(GIF_DEFERRED_DELETE, &ip->i_flags))
 		return SHOULD_DEFER_EVICTION;
@@ -1298,9 +1307,11 @@ static int evict_unlinked_inode(struct inode *inode)
 	   do, gfs2_create_inode can create another inode at the same block
 	   location and try to set gl_object again. We clear gl_object here so
 	   that subsequent inode creates don't see an old gl_object. */
-	glock_clear_object(ip->i_gl, ip);
+	if (ip->i_gl) {
+		glock_clear_object(ip->i_gl, ip);
+		gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
+	}
 	ret = gfs2_dinode_dealloc(ip);
-	gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
 out:
 	return ret;
 }
@@ -1367,12 +1378,7 @@ static void gfs2_evict_inode(struct inode *inode)
 	struct gfs2_holder gh;
 	int ret;
 
-	if (test_bit(GIF_FREE_VFS_INODE, &ip->i_flags)) {
-		clear_inode(inode);
-		return;
-	}
-
-	if (inode->i_nlink || sb_rdonly(sb))
+	if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr)
 		goto out;
 
 	gfs2_holder_mark_uninitialized(&gh);
@@ -1429,6 +1435,7 @@ static struct inode *gfs2_alloc_inode(struct super_block *sb)
 	ip = alloc_inode_sb(sb, gfs2_inode_cachep, GFP_KERNEL);
 	if (!ip)
 		return NULL;
+	ip->i_no_addr = 0;
 	ip->i_flags = 0;
 	ip->i_gl = NULL;
 	gfs2_holder_mark_uninitialized(&ip->i_iopen_gh);
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index f6a66050380e9..518c0677e12ae 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -1412,11 +1412,13 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
 	ip->i_eattr = 0;
 	gfs2_add_inode_blocks(&ip->i_inode, -1);
 
-	error = gfs2_meta_inode_buffer(ip, &dibh);
-	if (!error) {
-		gfs2_trans_add_meta(ip->i_gl, dibh);
-		gfs2_dinode_out(ip, dibh->b_data);
-		brelse(dibh);
+	if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+		error = gfs2_meta_inode_buffer(ip, &dibh);
+		if (!error) {
+			gfs2_trans_add_meta(ip->i_gl, dibh);
+			gfs2_dinode_out(ip, dibh->b_data);
+			brelse(dibh);
+		}
 	}
 
 	gfs2_trans_end(sdp);
@@ -1445,14 +1447,16 @@ int gfs2_ea_dealloc(struct gfs2_inode *ip)
 	if (error)
 		return error;
 
-	error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
-	if (error)
-		goto out_quota;
-
-	if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
-		error = ea_dealloc_indirect(ip);
+	if (likely(!test_bit(GIF_ALLOC_FAILED, &ip->i_flags))) {
+		error = ea_foreach(ip, ea_dealloc_unstuffed, NULL);
 		if (error)
 			goto out_quota;
+
+		if (ip->i_diskflags & GFS2_DIF_EA_INDIRECT) {
+			error = ea_dealloc_indirect(ip);
+			if (error)
+				goto out_quota;
+		}
 	}
 
 	error = ea_dealloc_block(ip);
-- 
GitLab


From 4dc334cab1c34efb17fa6cd10b12fbc9458e5760 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 2 Dec 2022 05:54:01 -0800
Subject: [PATCH 3266/4122] i915/gvt: Move gvt mapping cache initialization to
 intel_vgpu_init_dev()

vfio container registers .dma_unmap() callback after the device is opened.
So it's fine for mdev drivers to initialize internal mapping cache in
.open_device(). See vfio_device_container_register().

Now with iommufd an access ops with an unmap callback is registered when
the device is bound to iommufd which is before .open_device() is
called. This implies gvt's .dma_unmap() could be called before its
internal mapping cache is initialized.

The fix is moving gvt mapping cache initialization to vGPU init. While at
it also move ptable initialization together.

Link: https://lore.kernel.org/r/20221202135402.756470-2-yi.l.liu@intel.com
Reviewed-by: Zhi Wang <zhi.a.wang@intel.com>
Reviewed-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7a45e5360caf2..aaf0d9e8da951 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -671,9 +671,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 
 	vgpu->attached = true;
 
-	kvmgt_protect_table_init(vgpu);
-	gvt_cache_init(vgpu);
-
 	vgpu->track_node.track_write = kvmgt_page_track_write;
 	vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
 	kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
@@ -718,6 +715,11 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 	kvmgt_protect_table_destroy(vgpu);
 	gvt_cache_destroy(vgpu);
 
+	WARN_ON(vgpu->nr_cache_entries);
+
+	vgpu->gfn_cache = RB_ROOT;
+	vgpu->dma_addr_cache = RB_ROOT;
+
 	intel_vgpu_release_msi_eventfd_ctx(vgpu);
 
 	vgpu->attached = false;
@@ -1451,9 +1453,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
 	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 	struct intel_vgpu_type *type =
 		container_of(mdev->type, struct intel_vgpu_type, type);
+	int ret;
 
 	vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
-	return intel_gvt_create_vgpu(vgpu, type->conf);
+	ret = intel_gvt_create_vgpu(vgpu, type->conf);
+	if (ret)
+		return ret;
+
+	kvmgt_protect_table_init(vgpu);
+	gvt_cache_init(vgpu);
+
+	return 0;
 }
 
 static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
-- 
GitLab


From 2a54e347d990574ceb047b71ea0b03979232b85e Mon Sep 17 00:00:00 2001
From: Matthew Rosato <mjrosato@linux.ibm.com>
Date: Fri, 2 Dec 2022 05:54:02 -0800
Subject: [PATCH 3267/4122] vfio/ap: Validate iova during dma_unmap and trigger
 irq disable

Currently, each mapped iova is stashed in its associated vfio_ap_queue;
when we get an unmap request, validate that it matches with one or more of
these stashed values before attempting unpins.

Each stashed iova represents IRQ that was enabled for a queue.  Therefore,
if a match is found, trigger IRQ disable for this queue to ensure that
underlying firmware will no longer try to use the associated pfn after the
page is unpinned. IRQ disable will also handle the associated unpin.

Link: https://lore.kernel.org/r/20221202135402.756470-3-yi.l.liu@intel.com
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/s390/crypto/vfio_ap_ops.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 0b4cc8c597ae6..8bf353d46820e 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1535,13 +1535,29 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
 	return 0;
 }
 
+static void unmap_iova(struct ap_matrix_mdev *matrix_mdev, u64 iova, u64 length)
+{
+	struct ap_queue_table *qtable = &matrix_mdev->qtable;
+	struct vfio_ap_queue *q;
+	int loop_cursor;
+
+	hash_for_each(qtable->queues, loop_cursor, q, mdev_qnode) {
+		if (q->saved_iova >= iova && q->saved_iova < iova + length)
+			vfio_ap_irq_disable(q);
+	}
+}
+
 static void vfio_ap_mdev_dma_unmap(struct vfio_device *vdev, u64 iova,
 				   u64 length)
 {
 	struct ap_matrix_mdev *matrix_mdev =
 		container_of(vdev, struct ap_matrix_mdev, vdev);
 
-	vfio_unpin_pages(&matrix_mdev->vdev, iova, 1);
+	mutex_lock(&matrix_dev->mdevs_lock);
+
+	unmap_iova(matrix_mdev, iova, length);
+
+	mutex_unlock(&matrix_dev->mdevs_lock);
 }
 
 /**
-- 
GitLab


From 294aaccb50130f596943be892c5d3a3568b76c57 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:46 -0400
Subject: [PATCH 3268/4122] vfio: Move vfio_device driver open/close code to a
 function

This error unwind is getting complicated. Move all the code into two
pair'd function. The functions should be called when the open_count == 1
after incrementing/before decrementing.

Link: https://lore.kernel.org/r/1-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 95 ++++++++++++++++++++++------------------
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2d168793d4e1c..2e8346d13c16c 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -734,6 +734,51 @@ bool vfio_assert_device_open(struct vfio_device *device)
 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
 }
 
+static int vfio_device_first_open(struct vfio_device *device)
+{
+	int ret;
+
+	lockdep_assert_held(&device->dev_set->lock);
+
+	if (!try_module_get(device->dev->driver->owner))
+		return -ENODEV;
+
+	/*
+	 * Here we pass the KVM pointer with the group under the lock.  If the
+	 * device driver will use it, it must obtain a reference and release it
+	 * during close_device.
+	 */
+	mutex_lock(&device->group->group_lock);
+	device->kvm = device->group->kvm;
+	if (device->ops->open_device) {
+		ret = device->ops->open_device(device);
+		if (ret)
+			goto err_module_put;
+	}
+	vfio_device_container_register(device);
+	mutex_unlock(&device->group->group_lock);
+	return 0;
+
+err_module_put:
+	device->kvm = NULL;
+	mutex_unlock(&device->group->group_lock);
+	module_put(device->dev->driver->owner);
+	return ret;
+}
+
+static void vfio_device_last_close(struct vfio_device *device)
+{
+	lockdep_assert_held(&device->dev_set->lock);
+
+	mutex_lock(&device->group->group_lock);
+	vfio_device_container_unregister(device);
+	if (device->ops->close_device)
+		device->ops->close_device(device);
+	device->kvm = NULL;
+	mutex_unlock(&device->group->group_lock);
+	module_put(device->dev->driver->owner);
+}
+
 static struct file *vfio_device_open(struct vfio_device *device)
 {
 	struct file *filep;
@@ -745,29 +790,12 @@ static struct file *vfio_device_open(struct vfio_device *device)
 	if (ret)
 		return ERR_PTR(ret);
 
-	if (!try_module_get(device->dev->driver->owner)) {
-		ret = -ENODEV;
-		goto err_unassign_container;
-	}
-
 	mutex_lock(&device->dev_set->lock);
 	device->open_count++;
 	if (device->open_count == 1) {
-		/*
-		 * Here we pass the KVM pointer with the group under the read
-		 * lock.  If the device driver will use it, it must obtain a
-		 * reference and release it during close_device.
-		 */
-		mutex_lock(&device->group->group_lock);
-		device->kvm = device->group->kvm;
-
-		if (device->ops->open_device) {
-			ret = device->ops->open_device(device);
-			if (ret)
-				goto err_undo_count;
-		}
-		vfio_device_container_register(device);
-		mutex_unlock(&device->group->group_lock);
+		ret = vfio_device_first_open(device);
+		if (ret)
+			goto err_unassign_container;
 	}
 	mutex_unlock(&device->dev_set->lock);
 
@@ -800,20 +828,11 @@ static struct file *vfio_device_open(struct vfio_device *device)
 
 err_close_device:
 	mutex_lock(&device->dev_set->lock);
-	mutex_lock(&device->group->group_lock);
-	if (device->open_count == 1 && device->ops->close_device) {
-		device->ops->close_device(device);
-
-		vfio_device_container_unregister(device);
-	}
-err_undo_count:
-	mutex_unlock(&device->group->group_lock);
+	if (device->open_count == 1)
+		vfio_device_last_close(device);
+err_unassign_container:
 	device->open_count--;
-	if (device->open_count == 0 && device->kvm)
-		device->kvm = NULL;
 	mutex_unlock(&device->dev_set->lock);
-	module_put(device->dev->driver->owner);
-err_unassign_container:
 	vfio_device_unassign_container(device);
 	return ERR_PTR(ret);
 }
@@ -1016,19 +1035,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
 	mutex_lock(&device->dev_set->lock);
 	vfio_assert_device_open(device);
-	mutex_lock(&device->group->group_lock);
-	if (device->open_count == 1 && device->ops->close_device)
-		device->ops->close_device(device);
-
-	vfio_device_container_unregister(device);
-	mutex_unlock(&device->group->group_lock);
+	if (device->open_count == 1)
+		vfio_device_last_close(device);
 	device->open_count--;
-	if (device->open_count == 0)
-		device->kvm = NULL;
 	mutex_unlock(&device->dev_set->lock);
 
-	module_put(device->dev->driver->owner);
-
 	vfio_device_unassign_container(device);
 
 	vfio_device_put_registration(device);
-- 
GitLab


From bab6fabc01d99c7e0293807e835231740379b692 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:47 -0400
Subject: [PATCH 3269/4122] vfio: Move vfio_device_assign_container() into
 vfio_device_first_open()

The only thing this function does is assert the group has an assigned
container and incrs refcounts.

The overall model we have is that once a container_users refcount is
incremented it cannot be de-assigned from the group -
vfio_group_ioctl_unset_container() will fail and the group FD cannot be
closed.

Thus we do not need to check this on every device FD open, just the
first. Reorganize the code so that only the first open and last close
manages the container.

Link: https://lore.kernel.org/r/2-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/container.c |  4 ++--
 drivers/vfio/vfio_main.c | 24 +++++++++++-------------
 2 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d74164abbf401..dd79a66ec62ca 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -531,11 +531,11 @@ int vfio_device_assign_container(struct vfio_device *device)
 
 void vfio_device_unassign_container(struct vfio_device *device)
 {
-	mutex_lock(&device->group->group_lock);
+	lockdep_assert_held_write(&device->group->group_lock);
+
 	WARN_ON(device->group->container_users <= 1);
 	device->group->container_users--;
 	fput(device->group->opened_file);
-	mutex_unlock(&device->group->group_lock);
 }
 
 /*
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 2e8346d13c16c..717c7f404feee 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -749,18 +749,24 @@ static int vfio_device_first_open(struct vfio_device *device)
 	 * during close_device.
 	 */
 	mutex_lock(&device->group->group_lock);
+	ret = vfio_device_assign_container(device);
+	if (ret)
+		goto err_module_put;
+
 	device->kvm = device->group->kvm;
 	if (device->ops->open_device) {
 		ret = device->ops->open_device(device);
 		if (ret)
-			goto err_module_put;
+			goto err_container;
 	}
 	vfio_device_container_register(device);
 	mutex_unlock(&device->group->group_lock);
 	return 0;
 
-err_module_put:
+err_container:
 	device->kvm = NULL;
+	vfio_device_unassign_container(device);
+err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 	return ret;
@@ -775,6 +781,7 @@ static void vfio_device_last_close(struct vfio_device *device)
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
+	vfio_device_unassign_container(device);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
@@ -784,18 +791,12 @@ static struct file *vfio_device_open(struct vfio_device *device)
 	struct file *filep;
 	int ret;
 
-	mutex_lock(&device->group->group_lock);
-	ret = vfio_device_assign_container(device);
-	mutex_unlock(&device->group->group_lock);
-	if (ret)
-		return ERR_PTR(ret);
-
 	mutex_lock(&device->dev_set->lock);
 	device->open_count++;
 	if (device->open_count == 1) {
 		ret = vfio_device_first_open(device);
 		if (ret)
-			goto err_unassign_container;
+			goto err_unlock;
 	}
 	mutex_unlock(&device->dev_set->lock);
 
@@ -830,10 +831,9 @@ err_close_device:
 	mutex_lock(&device->dev_set->lock);
 	if (device->open_count == 1)
 		vfio_device_last_close(device);
-err_unassign_container:
+err_unlock:
 	device->open_count--;
 	mutex_unlock(&device->dev_set->lock);
-	vfio_device_unassign_container(device);
 	return ERR_PTR(ret);
 }
 
@@ -1040,8 +1040,6 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 	device->open_count--;
 	mutex_unlock(&device->dev_set->lock);
 
-	vfio_device_unassign_container(device);
-
 	vfio_device_put_registration(device);
 
 	return 0;
-- 
GitLab


From 04f930c3e44bb9010bba8521f970d00d95a94eb0 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:48 -0400
Subject: [PATCH 3270/4122] vfio: Rename
 vfio_device_assign/unassign_container()

These functions don't really assign anything anymore, they just increment
some refcounts and do a sanity check. Call them
vfio_group_[un]use_container()

Link: https://lore.kernel.org/r/3-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/container.c | 14 ++++++--------
 drivers/vfio/vfio.h      |  4 ++--
 drivers/vfio/vfio_main.c |  6 +++---
 3 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index dd79a66ec62ca..499777930b08f 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -511,10 +511,8 @@ void vfio_group_detach_container(struct vfio_group *group)
 	vfio_container_put(container);
 }
 
-int vfio_device_assign_container(struct vfio_device *device)
+int vfio_group_use_container(struct vfio_group *group)
 {
-	struct vfio_group *group = device->group;
-
 	lockdep_assert_held(&group->group_lock);
 
 	if (!group->container || !group->container->iommu_driver ||
@@ -529,13 +527,13 @@ int vfio_device_assign_container(struct vfio_device *device)
 	return 0;
 }
 
-void vfio_device_unassign_container(struct vfio_device *device)
+void vfio_group_unuse_container(struct vfio_group *group)
 {
-	lockdep_assert_held_write(&device->group->group_lock);
+	lockdep_assert_held(&group->group_lock);
 
-	WARN_ON(device->group->container_users <= 1);
-	device->group->container_users--;
-	fput(device->group->opened_file);
+	WARN_ON(group->container_users <= 1);
+	group->container_users--;
+	fput(group->opened_file);
 }
 
 /*
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index bcad54bbab08c..f95f4925b83bb 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -112,8 +112,8 @@ void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
 bool vfio_assert_device_open(struct vfio_device *device);
 
 struct vfio_container *vfio_container_from_file(struct file *filep);
-int vfio_device_assign_container(struct vfio_device *device);
-void vfio_device_unassign_container(struct vfio_device *device);
+int vfio_group_use_container(struct vfio_group *group);
+void vfio_group_unuse_container(struct vfio_group *group);
 int vfio_container_attach_group(struct vfio_container *container,
 				struct vfio_group *group);
 void vfio_group_detach_container(struct vfio_group *group);
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 717c7f404feee..8c2dcb481ae10 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -749,7 +749,7 @@ static int vfio_device_first_open(struct vfio_device *device)
 	 * during close_device.
 	 */
 	mutex_lock(&device->group->group_lock);
-	ret = vfio_device_assign_container(device);
+	ret = vfio_group_use_container(device->group);
 	if (ret)
 		goto err_module_put;
 
@@ -765,7 +765,7 @@ static int vfio_device_first_open(struct vfio_device *device)
 
 err_container:
 	device->kvm = NULL;
-	vfio_device_unassign_container(device);
+	vfio_group_unuse_container(device->group);
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -781,7 +781,7 @@ static void vfio_device_last_close(struct vfio_device *device)
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
-	vfio_device_unassign_container(device);
+	vfio_group_unuse_container(device->group);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
-- 
GitLab


From 0d8227b622f3529661ad6a9702a52932e149a30d Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:49 -0400
Subject: [PATCH 3271/4122] vfio: Use IOMMU_CAP_ENFORCE_CACHE_COHERENCY for
 vfio_file_enforced_coherent()

iommufd doesn't establish the iommu_domains until after the device FD is
opened, even if the container has been set. This design is part of moving
away from the group centric iommu APIs.

This is fine, except that the normal sequence of establishing the kvm
wbinvd won't work:

   group = open("/dev/vfio/XX")
   ioctl(group, VFIO_GROUP_SET_CONTAINER)
   ioctl(kvm, KVM_DEV_VFIO_GROUP_ADD)
   ioctl(group, VFIO_GROUP_GET_DEVICE_FD)

As the domains don't start existing until GET_DEVICE_FD. Further,
GET_DEVICE_FD requires that KVM_DEV_VFIO_GROUP_ADD already be done as that
is what sets the group->kvm and thus device->kvm for the driver to use
during open.

Now that we have device centric cap ops and the new
IOMMU_CAP_ENFORCE_CACHE_COHERENCY we know what the iommu_domain will be
capable of without having to create it. Use this to compute
vfio_file_enforced_coherent() and resolve the ordering problems.

VFIO always tries to upgrade domains to enforce cache coherency, it never
attaches a device that supports enforce cache coherency to a less capable
domain, so the cap test is a sufficient proxy for the ultimate
outcome. iommufd also ensures that devices that set the cap will be
connected to enforcing domains.

Link: https://lore.kernel.org/r/4-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/container.c |  5 +++--
 drivers/vfio/vfio.h      |  2 --
 drivers/vfio/vfio_main.c | 29 ++++++++++++++++-------------
 3 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index 499777930b08f..d97747dfb05d0 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -188,8 +188,9 @@ void vfio_device_container_unregister(struct vfio_device *device)
 			device->group->container->iommu_data, device);
 }
 
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
-					  unsigned long arg)
+static long
+vfio_container_ioctl_check_extension(struct vfio_container *container,
+				     unsigned long arg)
 {
 	struct vfio_iommu_driver *driver;
 	long ret = 0;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index f95f4925b83bb..7315612587042 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -119,8 +119,6 @@ int vfio_container_attach_group(struct vfio_container *container,
 void vfio_group_detach_container(struct vfio_group *group);
 void vfio_device_container_register(struct vfio_device *device);
 void vfio_device_container_unregister(struct vfio_device *device);
-long vfio_container_ioctl_check_extension(struct vfio_container *container,
-					  unsigned long arg);
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
 
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 8c2dcb481ae10..77d6c0ba6a830 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1622,24 +1622,27 @@ EXPORT_SYMBOL_GPL(vfio_file_is_group);
 bool vfio_file_enforced_coherent(struct file *file)
 {
 	struct vfio_group *group = file->private_data;
-	bool ret;
+	struct vfio_device *device;
+	bool ret = true;
 
 	if (!vfio_file_is_group(file))
 		return true;
 
-	mutex_lock(&group->group_lock);
-	if (group->container) {
-		ret = vfio_container_ioctl_check_extension(group->container,
-							   VFIO_DMA_CC_IOMMU);
-	} else {
-		/*
-		 * Since the coherency state is determined only once a container
-		 * is attached the user must do so before they can prove they
-		 * have permission.
-		 */
-		ret = true;
+	/*
+	 * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
+	 * any domain later attached to it will also not support it. If the cap
+	 * is set then the iommu_domain eventually attached to the device/group
+	 * must use a domain with enforce_cache_coherency().
+	 */
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (!device_iommu_capable(device->dev,
+					  IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
+			ret = false;
+			break;
+		}
 	}
-	mutex_unlock(&group->group_lock);
+	mutex_unlock(&group->device_lock);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
-- 
GitLab


From 2a3dab19a0a6c1823645764188776f271de1b3cf Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:50 -0400
Subject: [PATCH 3272/4122] vfio-iommufd: Allow iommufd to be used in place of
 a container fd

This makes VFIO_GROUP_SET_CONTAINER accept both a vfio container FD and an
iommufd.

In iommufd mode an IOAS will exist after the SET_CONTAINER, but it will
not be attached to any groups.

For VFIO this means that the VFIO_GROUP_GET_STATUS and
VFIO_GROUP_FLAGS_VIABLE works subtly differently. With the container FD
the iommu_group_claim_dma_owner() is done during SET_CONTAINER but for
IOMMUFD this is done during VFIO_GROUP_GET_DEVICE_FD. Meaning that
VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due to
viability.

As GET_DEVICE_FD can fail for many reasons already this is not expected to
be a meaningful difference.

Reorganize the tests for if the group has an assigned container or iommu
into a vfio_group_has_iommu() function and consolidate all the duplicated
WARN_ON's etc related to this.

Call container functions only if a container is actually present on the
group.

Link: https://lore.kernel.org/r/5-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Kconfig     |  1 +
 drivers/vfio/container.c |  7 +++-
 drivers/vfio/vfio.h      |  2 +
 drivers/vfio/vfio_main.c | 88 +++++++++++++++++++++++++++++++++-------
 4 files changed, 82 insertions(+), 16 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 86c381ceb9a1e..1118d322eec97 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -2,6 +2,7 @@
 menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	select IOMMU_API
+	depends on IOMMUFD || !IOMMUFD
 	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
 	select INTERVAL_TREE
 	help
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index d97747dfb05d0..8772dad680853 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -516,8 +516,11 @@ int vfio_group_use_container(struct vfio_group *group)
 {
 	lockdep_assert_held(&group->group_lock);
 
-	if (!group->container || !group->container->iommu_driver ||
-	    WARN_ON(!group->container_users))
+	/*
+	 * The container fd has been assigned with VFIO_GROUP_SET_CONTAINER but
+	 * VFIO_SET_IOMMU hasn't been done yet.
+	 */
+	if (!group->container->iommu_driver)
 		return -EINVAL;
 
 	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO))
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 7315612587042..a9dd0615266cb 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -10,6 +10,7 @@
 #include <linux/cdev.h>
 #include <linux/module.h>
 
+struct iommufd_ctx;
 struct iommu_group;
 struct vfio_device;
 struct vfio_container;
@@ -60,6 +61,7 @@ struct vfio_group {
 	struct kvm			*kvm;
 	struct file			*opened_file;
 	struct blocking_notifier_head	notifier;
+	struct iommufd_ctx		*iommufd;
 };
 
 /* events for the backend driver notify callback */
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 77d6c0ba6a830..f11157d056e6c 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -35,6 +35,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/interval_tree.h>
 #include <linux/iova_bitmap.h>
+#include <linux/iommufd.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -662,6 +663,18 @@ EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
 /*
  * VFIO Group fd, /dev/vfio/$GROUP
  */
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+	lockdep_assert_held(&group->group_lock);
+	/*
+	 * There can only be users if there is a container, and if there is a
+	 * container there must be users.
+	 */
+	WARN_ON(!group->container != !group->container_users);
+
+	return group->container || group->iommufd;
+}
+
 /*
  * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
  * if there was no container to unset.  Since the ioctl is called on
@@ -673,15 +686,21 @@ static int vfio_group_ioctl_unset_container(struct vfio_group *group)
 	int ret = 0;
 
 	mutex_lock(&group->group_lock);
-	if (!group->container) {
+	if (!vfio_group_has_iommu(group)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
-	if (group->container_users != 1) {
-		ret = -EBUSY;
-		goto out_unlock;
+	if (group->container) {
+		if (group->container_users != 1) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+		vfio_group_detach_container(group);
+	}
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
 	}
-	vfio_group_detach_container(group);
 
 out_unlock:
 	mutex_unlock(&group->group_lock);
@@ -692,6 +711,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
 					  int __user *arg)
 {
 	struct vfio_container *container;
+	struct iommufd_ctx *iommufd;
 	struct fd f;
 	int ret;
 	int fd;
@@ -704,7 +724,7 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
 		return -EBADF;
 
 	mutex_lock(&group->group_lock);
-	if (group->container || WARN_ON(group->container_users)) {
+	if (vfio_group_has_iommu(group)) {
 		ret = -EINVAL;
 		goto out_unlock;
 	}
@@ -714,12 +734,28 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
 	}
 
 	container = vfio_container_from_file(f.file);
-	ret = -EINVAL;
 	if (container) {
 		ret = vfio_container_attach_group(container, group);
 		goto out_unlock;
 	}
 
+	iommufd = iommufd_ctx_from_file(f.file);
+	if (!IS_ERR(iommufd)) {
+		u32 ioas_id;
+
+		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+		if (ret) {
+			iommufd_ctx_put(group->iommufd);
+			goto out_unlock;
+		}
+
+		group->iommufd = iommufd;
+		goto out_unlock;
+	}
+
+	/* The FD passed is not recognized. */
+	ret = -EBADFD;
+
 out_unlock:
 	mutex_unlock(&group->group_lock);
 	fdput(f);
@@ -749,9 +785,16 @@ static int vfio_device_first_open(struct vfio_device *device)
 	 * during close_device.
 	 */
 	mutex_lock(&device->group->group_lock);
-	ret = vfio_group_use_container(device->group);
-	if (ret)
+	if (!vfio_group_has_iommu(device->group)) {
+		ret = -EINVAL;
 		goto err_module_put;
+	}
+
+	if (device->group->container) {
+		ret = vfio_group_use_container(device->group);
+		if (ret)
+			goto err_module_put;
+	}
 
 	device->kvm = device->group->kvm;
 	if (device->ops->open_device) {
@@ -759,13 +802,15 @@ static int vfio_device_first_open(struct vfio_device *device)
 		if (ret)
 			goto err_container;
 	}
-	vfio_device_container_register(device);
+	if (device->group->container)
+		vfio_device_container_register(device);
 	mutex_unlock(&device->group->group_lock);
 	return 0;
 
 err_container:
 	device->kvm = NULL;
-	vfio_group_unuse_container(device->group);
+	if (device->group->container)
+		vfio_group_unuse_container(device->group);
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -777,11 +822,13 @@ static void vfio_device_last_close(struct vfio_device *device)
 	lockdep_assert_held(&device->dev_set->lock);
 
 	mutex_lock(&device->group->group_lock);
-	vfio_device_container_unregister(device);
+	if (device->group->container)
+		vfio_device_container_unregister(device);
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
-	vfio_group_unuse_container(device->group);
+	if (device->group->container)
+		vfio_group_unuse_container(device->group);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
@@ -897,7 +944,14 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
 		return -ENODEV;
 	}
 
-	if (group->container)
+	/*
+	 * With the container FD the iommu_group_claim_dma_owner() is done
+	 * during SET_CONTAINER but for IOMMFD this is done during
+	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+	 * to viability.
+	 */
+	if (vfio_group_has_iommu(group))
 		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
 				VFIO_GROUP_FLAGS_VIABLE;
 	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
@@ -980,6 +1034,10 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
 	WARN_ON(group->notifier.head);
 	if (group->container)
 		vfio_group_detach_container(group);
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
+	}
 	group->opened_file = NULL;
 	mutex_unlock(&group->group_lock);
 	return 0;
@@ -1878,6 +1936,8 @@ static void __exit vfio_cleanup(void)
 module_init(vfio_init);
 module_exit(vfio_cleanup);
 
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
-- 
GitLab


From a4d1f91db5021c57e14721ac090616c90386ac70 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:51 -0400
Subject: [PATCH 3273/4122] vfio-iommufd: Support iommufd for physical VFIO
 devices

This creates the iommufd_device for the physical VFIO drivers. These are
all the drivers that are calling vfio_register_group_dev() and expect the
type1 code to setup a real iommu_domain against their parent struct
device.

The design gives the driver a choice in how it gets connected to iommufd
by providing bind_iommufd/unbind_iommufd/attach_ioas callbacks to
implement as required. The core code provides three default callbacks for
physical mode using a real iommu_domain. This is suitable for drivers
using vfio_register_group_dev()

Link: https://lore.kernel.org/r/6-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Makefile                         |   1 +
 drivers/vfio/fsl-mc/vfio_fsl_mc.c             |   3 +
 drivers/vfio/iommufd.c                        | 100 ++++++++++++++++++
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    |   6 ++
 drivers/vfio/pci/mlx5/main.c                  |   3 +
 drivers/vfio/pci/vfio_pci.c                   |   3 +
 drivers/vfio/platform/vfio_amba.c             |   3 +
 drivers/vfio/platform/vfio_platform.c         |   3 +
 drivers/vfio/vfio.h                           |  15 +++
 drivers/vfio/vfio_main.c                      |  15 ++-
 include/linux/vfio.h                          |  25 +++++
 11 files changed, 175 insertions(+), 2 deletions(-)
 create mode 100644 drivers/vfio/iommufd.c

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b693a1169286f..3863922529ef2 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -6,6 +6,7 @@ obj-$(CONFIG_VFIO) += vfio.o
 vfio-y += vfio_main.o \
 	  iova_bitmap.o \
 	  container.o
+vfio-$(CONFIG_IOMMUFD) += iommufd.o
 
 obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
diff --git a/drivers/vfio/fsl-mc/vfio_fsl_mc.c b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
index b16874e913e4f..5cd4bb4764403 100644
--- a/drivers/vfio/fsl-mc/vfio_fsl_mc.c
+++ b/drivers/vfio/fsl-mc/vfio_fsl_mc.c
@@ -592,6 +592,9 @@ static const struct vfio_device_ops vfio_fsl_mc_ops = {
 	.read		= vfio_fsl_mc_read,
 	.write		= vfio_fsl_mc_write,
 	.mmap		= vfio_fsl_mc_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static struct fsl_mc_driver vfio_fsl_mc_driver = {
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
new file mode 100644
index 0000000000000..6e47a3df1a710
--- /dev/null
+++ b/drivers/vfio/iommufd.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "vfio.h"
+
+MODULE_IMPORT_NS(IOMMUFD);
+MODULE_IMPORT_NS(IOMMUFD_VFIO);
+
+int vfio_iommufd_bind(struct vfio_device *vdev, struct iommufd_ctx *ictx)
+{
+	u32 ioas_id;
+	u32 device_id;
+	int ret;
+
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	/*
+	 * If the driver doesn't provide this op then it means the device does
+	 * not do DMA at all. So nothing to do.
+	 */
+	if (!vdev->ops->bind_iommufd)
+		return 0;
+
+	ret = vdev->ops->bind_iommufd(vdev, ictx, &device_id);
+	if (ret)
+		return ret;
+
+	ret = iommufd_vfio_compat_ioas_id(ictx, &ioas_id);
+	if (ret)
+		goto err_unbind;
+	ret = vdev->ops->attach_ioas(vdev, &ioas_id);
+	if (ret)
+		goto err_unbind;
+
+	/*
+	 * The legacy path has no way to return the device id or the selected
+	 * pt_id
+	 */
+	return 0;
+
+err_unbind:
+	if (vdev->ops->unbind_iommufd)
+		vdev->ops->unbind_iommufd(vdev);
+	return ret;
+}
+
+void vfio_iommufd_unbind(struct vfio_device *vdev)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (vdev->ops->unbind_iommufd)
+		vdev->ops->unbind_iommufd(vdev);
+}
+
+/*
+ * The physical standard ops mean that the iommufd_device is bound to the
+ * physical device vdev->dev that was provided to vfio_init_group_dev(). Drivers
+ * using this ops set should call vfio_register_group_dev()
+ */
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+	struct iommufd_device *idev;
+
+	idev = iommufd_device_bind(ictx, vdev->dev, out_device_id);
+	if (IS_ERR(idev))
+		return PTR_ERR(idev);
+	vdev->iommufd_device = idev;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_bind);
+
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (vdev->iommufd_attached) {
+		iommufd_device_detach(vdev->iommufd_device);
+		vdev->iommufd_attached = false;
+	}
+	iommufd_device_unbind(vdev->iommufd_device);
+	vdev->iommufd_device = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_unbind);
+
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+	int rc;
+
+	rc = iommufd_device_attach(vdev->iommufd_device, pt_id);
+	if (rc)
+		return rc;
+	vdev->iommufd_attached = true;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 39eeca18a0f7c..40019b11c5a96 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1246,6 +1246,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_migrn_ops = {
 	.mmap = hisi_acc_vfio_pci_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
@@ -1261,6 +1264,9 @@ static const struct vfio_device_ops hisi_acc_vfio_pci_ops = {
 	.mmap = vfio_pci_core_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static int hisi_acc_vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index fd6ccb8454a24..32d1f38d351e7 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -623,6 +623,9 @@ static const struct vfio_device_ops mlx5vf_pci_ops = {
 	.mmap = vfio_pci_core_mmap,
 	.request = vfio_pci_core_request,
 	.match = vfio_pci_core_match,
+	.bind_iommufd = vfio_iommufd_physical_bind,
+	.unbind_iommufd = vfio_iommufd_physical_unbind,
+	.attach_ioas = vfio_iommufd_physical_attach_ioas,
 };
 
 static int mlx5vf_pci_probe(struct pci_dev *pdev,
diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c
index 1d4919edfbde4..29091ee2e9849 100644
--- a/drivers/vfio/pci/vfio_pci.c
+++ b/drivers/vfio/pci/vfio_pci.c
@@ -138,6 +138,9 @@ static const struct vfio_device_ops vfio_pci_ops = {
 	.mmap		= vfio_pci_core_mmap,
 	.request	= vfio_pci_core_request,
 	.match		= vfio_pci_core_match,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static int vfio_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
index eaea63e5294c5..5a046098d0bdf 100644
--- a/drivers/vfio/platform/vfio_amba.c
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -117,6 +117,9 @@ static const struct vfio_device_ops vfio_amba_ops = {
 	.read		= vfio_platform_read,
 	.write		= vfio_platform_write,
 	.mmap		= vfio_platform_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static const struct amba_id pl330_ids[] = {
diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
index 82cedcebfd902..b87c3b7087834 100644
--- a/drivers/vfio/platform/vfio_platform.c
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -106,6 +106,9 @@ static const struct vfio_device_ops vfio_platform_ops = {
 	.read		= vfio_platform_read,
 	.write		= vfio_platform_write,
 	.mmap		= vfio_platform_mmap,
+	.bind_iommufd	= vfio_iommufd_physical_bind,
+	.unbind_iommufd	= vfio_iommufd_physical_unbind,
+	.attach_ioas	= vfio_iommufd_physical_attach_ioas,
 };
 
 static struct platform_driver vfio_platform_driver = {
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a9dd0615266cb..9766f70a12c51 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -124,6 +124,21 @@ void vfio_device_container_unregister(struct vfio_device *device);
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
 
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
+void vfio_iommufd_unbind(struct vfio_device *device);
+#else
+static inline int vfio_iommufd_bind(struct vfio_device *device,
+				    struct iommufd_ctx *ictx)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_iommufd_unbind(struct vfio_device *device)
+{
+}
+#endif
+
 #ifdef CONFIG_VFIO_NOIOMMU
 extern bool vfio_noiommu __read_mostly;
 #else
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f11157d056e6c..a74c34232c034 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -525,6 +525,11 @@ static int __vfio_register_dev(struct vfio_device *device,
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	if (WARN_ON(device->ops->bind_iommufd &&
+		    (!device->ops->unbind_iommufd ||
+		     !device->ops->attach_ioas)))
+		return -EINVAL;
+
 	/*
 	 * If the driver doesn't specify a set then the device is added to a
 	 * singleton set just for itself.
@@ -794,6 +799,10 @@ static int vfio_device_first_open(struct vfio_device *device)
 		ret = vfio_group_use_container(device->group);
 		if (ret)
 			goto err_module_put;
+	} else if (device->group->iommufd) {
+		ret = vfio_iommufd_bind(device, device->group->iommufd);
+		if (ret)
+			goto err_module_put;
 	}
 
 	device->kvm = device->group->kvm;
@@ -811,6 +820,8 @@ err_container:
 	device->kvm = NULL;
 	if (device->group->container)
 		vfio_group_unuse_container(device->group);
+	else if (device->group->iommufd)
+		vfio_iommufd_unbind(device);
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -829,6 +840,8 @@ static void vfio_device_last_close(struct vfio_device *device)
 	device->kvm = NULL;
 	if (device->group->container)
 		vfio_group_unuse_container(device->group);
+	else if (device->group->iommufd)
+		vfio_iommufd_unbind(device);
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
@@ -1936,8 +1949,6 @@ static void __exit vfio_cleanup(void)
 module_init(vfio_init);
 module_exit(vfio_cleanup);
 
-MODULE_IMPORT_NS(IOMMUFD);
-MODULE_IMPORT_NS(IOMMUFD_VFIO);
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index e7cebeb875dd1..a7fc4d747dc22 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -17,6 +17,8 @@
 #include <linux/iova_bitmap.h>
 
 struct kvm;
+struct iommufd_ctx;
+struct iommufd_device;
 
 /*
  * VFIO devices can be placed in a set, this allows all devices to share this
@@ -54,6 +56,10 @@ struct vfio_device {
 	struct completion comp;
 	struct list_head group_next;
 	struct list_head iommu_entry;
+#if IS_ENABLED(CONFIG_IOMMUFD)
+	struct iommufd_device *iommufd_device;
+	bool iommufd_attached;
+#endif
 };
 
 /**
@@ -80,6 +86,10 @@ struct vfio_device_ops {
 	char	*name;
 	int	(*init)(struct vfio_device *vdev);
 	void	(*release)(struct vfio_device *vdev);
+	int	(*bind_iommufd)(struct vfio_device *vdev,
+				struct iommufd_ctx *ictx, u32 *out_device_id);
+	void	(*unbind_iommufd)(struct vfio_device *vdev);
+	int	(*attach_ioas)(struct vfio_device *vdev, u32 *pt_id);
 	int	(*open_device)(struct vfio_device *vdev);
 	void	(*close_device)(struct vfio_device *vdev);
 	ssize_t	(*read)(struct vfio_device *vdev, char __user *buf,
@@ -96,6 +106,21 @@ struct vfio_device_ops {
 				  void __user *arg, size_t argsz);
 };
 
+#if IS_ENABLED(CONFIG_IOMMUFD)
+int vfio_iommufd_physical_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
+int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+#else
+#define vfio_iommufd_physical_bind                                      \
+	((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
+		  u32 *out_device_id)) NULL)
+#define vfio_iommufd_physical_unbind \
+	((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_physical_attach_ioas \
+	((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#endif
+
 /**
  * @migration_set_state: Optional callback to change the migration state for
  *         devices that support migration. It's mandatory for
-- 
GitLab


From 4741f2e941298ad7553b65e66624435e14793391 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:52 -0400
Subject: [PATCH 3274/4122] vfio-iommufd: Support iommufd for emulated VFIO
 devices

Emulated VFIO devices are calling vfio_register_emulated_iommu_dev() and
consist of all the mdev drivers.

Like the physical drivers, support for iommufd is provided by the driver
supplying the correct standard ops. Provide ops from the core that
duplicate what vfio_register_emulated_iommu_dev() does.

Emulated drivers are where it is more likely to see variation in the
iommfd support ops. For instance IDXD will probably need to setup both a
iommfd_device context linked to a PASID and an iommufd_access context to
support all their mdev operations.

Link: https://lore.kernel.org/r/7-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/gpu/drm/i915/gvt/kvmgt.c  |   3 +
 drivers/s390/cio/vfio_ccw_ops.c   |   3 +
 drivers/s390/crypto/vfio_ap_ops.c |   3 +
 drivers/vfio/container.c          | 110 +++++----------------------
 drivers/vfio/iommufd.c            |  58 ++++++++++++++
 drivers/vfio/vfio.h               |  10 ++-
 drivers/vfio/vfio_main.c          | 122 +++++++++++++++++++++++++++++-
 include/linux/vfio.h              |  14 ++++
 8 files changed, 229 insertions(+), 94 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index aaf0d9e8da951..f5164099c264e 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -1484,6 +1484,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
 	.mmap		= intel_vgpu_mmap,
 	.ioctl		= intel_vgpu_ioctl,
 	.dma_unmap	= intel_vgpu_dma_unmap,
+	.bind_iommufd	= vfio_iommufd_emulated_bind,
+	.unbind_iommufd = vfio_iommufd_emulated_unbind,
+	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
 };
 
 static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/s390/cio/vfio_ccw_ops.c b/drivers/s390/cio/vfio_ccw_ops.c
index 6ae4d012d8008..560453d99c24f 100644
--- a/drivers/s390/cio/vfio_ccw_ops.c
+++ b/drivers/s390/cio/vfio_ccw_ops.c
@@ -588,6 +588,9 @@ static const struct vfio_device_ops vfio_ccw_dev_ops = {
 	.ioctl = vfio_ccw_mdev_ioctl,
 	.request = vfio_ccw_mdev_request,
 	.dma_unmap = vfio_ccw_dma_unmap,
+	.bind_iommufd = vfio_iommufd_emulated_bind,
+	.unbind_iommufd = vfio_iommufd_emulated_unbind,
+	.attach_ioas = vfio_iommufd_emulated_attach_ioas,
 };
 
 struct mdev_driver vfio_ccw_mdev_driver = {
diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c
index 8bf353d46820e..68eeb25fb6611 100644
--- a/drivers/s390/crypto/vfio_ap_ops.c
+++ b/drivers/s390/crypto/vfio_ap_ops.c
@@ -1805,6 +1805,9 @@ static const struct vfio_device_ops vfio_ap_matrix_dev_ops = {
 	.close_device = vfio_ap_mdev_close_device,
 	.ioctl = vfio_ap_mdev_ioctl,
 	.dma_unmap = vfio_ap_mdev_dma_unmap,
+	.bind_iommufd = vfio_iommufd_emulated_bind,
+	.unbind_iommufd = vfio_iommufd_emulated_unbind,
+	.attach_ioas = vfio_iommufd_emulated_attach_ioas,
 };
 
 static struct mdev_driver vfio_ap_matrix_driver = {
diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index 8772dad680853..7f3961fd4b5aa 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -540,113 +540,41 @@ void vfio_group_unuse_container(struct vfio_group *group)
 	fput(group->opened_file);
 }
 
-/*
- * Pin contiguous user pages and return their associated host pages for local
- * domain only.
- * @device [in]  : device
- * @iova [in]    : starting IOVA of user pages to be pinned.
- * @npage [in]   : count of pages to be pinned.  This count should not
- *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- * @prot [in]    : protection flags
- * @pages[out]   : array of host pages
- * Return error or number of pages pinned.
- *
- * A driver may only call this function if the vfio_device was created
- * by vfio_register_emulated_iommu_dev().
- */
-int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
-		   int npage, int prot, struct page **pages)
+int vfio_container_pin_pages(struct vfio_container *container,
+			     struct iommu_group *iommu_group, dma_addr_t iova,
+			     int npage, int prot, struct page **pages)
 {
-	struct vfio_container *container;
-	struct vfio_group *group = device->group;
-	struct vfio_iommu_driver *driver;
-	int ret;
-
-	if (!pages || !npage || !vfio_assert_device_open(device))
-		return -EINVAL;
+	struct vfio_iommu_driver *driver = container->iommu_driver;
 
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
 		return -E2BIG;
 
-	/* group->container cannot change while a vfio device is open */
-	container = group->container;
-	driver = container->iommu_driver;
-	if (likely(driver && driver->ops->pin_pages))
-		ret = driver->ops->pin_pages(container->iommu_data,
-					     group->iommu_group, iova,
-					     npage, prot, pages);
-	else
-		ret = -ENOTTY;
-
-	return ret;
+	if (unlikely(!driver || !driver->ops->pin_pages))
+		return -ENOTTY;
+	return driver->ops->pin_pages(container->iommu_data, iommu_group, iova,
+				      npage, prot, pages);
 }
-EXPORT_SYMBOL(vfio_pin_pages);
 
-/*
- * Unpin contiguous host pages for local domain only.
- * @device [in]  : device
- * @iova [in]    : starting address of user pages to be unpinned.
- * @npage [in]   : count of pages to be unpinned.  This count should not
- *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
- */
-void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+void vfio_container_unpin_pages(struct vfio_container *container,
+				dma_addr_t iova, int npage)
 {
-	struct vfio_container *container;
-	struct vfio_iommu_driver *driver;
-
 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
 		return;
 
-	if (WARN_ON(!vfio_assert_device_open(device)))
-		return;
-
-	/* group->container cannot change while a vfio device is open */
-	container = device->group->container;
-	driver = container->iommu_driver;
-
-	driver->ops->unpin_pages(container->iommu_data, iova, npage);
+	container->iommu_driver->ops->unpin_pages(container->iommu_data, iova,
+						  npage);
 }
-EXPORT_SYMBOL(vfio_unpin_pages);
 
-/*
- * This interface allows the CPUs to perform some sort of virtual DMA on
- * behalf of the device.
- *
- * CPUs read/write from/into a range of IOVAs pointing to user space memory
- * into/from a kernel buffer.
- *
- * As the read/write of user space memory is conducted via the CPUs and is
- * not a real device DMA, it is not necessary to pin the user space memory.
- *
- * @device [in]		: VFIO device
- * @iova [in]		: base IOVA of a user space buffer
- * @data [in]		: pointer to kernel buffer
- * @len [in]		: kernel buffer length
- * @write		: indicate read or write
- * Return error code on failure or 0 on success.
- */
-int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
-		size_t len, bool write)
+int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
+			  void *data, size_t len, bool write)
 {
-	struct vfio_container *container;
-	struct vfio_iommu_driver *driver;
-	int ret = 0;
-
-	if (!data || len <= 0 || !vfio_assert_device_open(device))
-		return -EINVAL;
-
-	/* group->container cannot change while a vfio device is open */
-	container = device->group->container;
-	driver = container->iommu_driver;
+	struct vfio_iommu_driver *driver = container->iommu_driver;
 
-	if (likely(driver && driver->ops->dma_rw))
-		ret = driver->ops->dma_rw(container->iommu_data,
-					  iova, data, len, write);
-	else
-		ret = -ENOTTY;
-	return ret;
+	if (unlikely(!driver || !driver->ops->dma_rw))
+		return -ENOTTY;
+	return driver->ops->dma_rw(container->iommu_data, iova, data, len,
+				   write);
 }
-EXPORT_SYMBOL(vfio_dma_rw);
 
 int __init vfio_container_init(void)
 {
diff --git a/drivers/vfio/iommufd.c b/drivers/vfio/iommufd.c
index 6e47a3df1a710..4f82a6fa7c6c7 100644
--- a/drivers/vfio/iommufd.c
+++ b/drivers/vfio/iommufd.c
@@ -98,3 +98,61 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(vfio_iommufd_physical_attach_ioas);
+
+/*
+ * The emulated standard ops mean that vfio_device is going to use the
+ * "mdev path" and will call vfio_pin_pages()/vfio_dma_rw(). Drivers using this
+ * ops set should call vfio_register_emulated_iommu_dev().
+ */
+
+static void vfio_emulated_unmap(void *data, unsigned long iova,
+				unsigned long length)
+{
+	struct vfio_device *vdev = data;
+
+	vdev->ops->dma_unmap(vdev, iova, length);
+}
+
+static const struct iommufd_access_ops vfio_user_ops = {
+	.needs_pin_pages = 1,
+	.unmap = vfio_emulated_unmap,
+};
+
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	vdev->iommufd_ictx = ictx;
+	iommufd_ctx_get(ictx);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_bind);
+
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev)
+{
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	if (vdev->iommufd_access) {
+		iommufd_access_destroy(vdev->iommufd_access);
+		vdev->iommufd_access = NULL;
+	}
+	iommufd_ctx_put(vdev->iommufd_ictx);
+	vdev->iommufd_ictx = NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_unbind);
+
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id)
+{
+	struct iommufd_access *user;
+
+	lockdep_assert_held(&vdev->dev_set->lock);
+
+	user = iommufd_access_create(vdev->iommufd_ictx, *pt_id, &vfio_user_ops,
+				     vdev);
+	if (IS_ERR(user))
+		return PTR_ERR(user);
+	vdev->iommufd_access = user;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vfio_iommufd_emulated_attach_ioas);
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 9766f70a12c51..b1ef842496372 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -111,8 +111,6 @@ struct vfio_iommu_driver {
 int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
 void vfio_unregister_iommu_driver(const struct vfio_iommu_driver_ops *ops);
 
-bool vfio_assert_device_open(struct vfio_device *device);
-
 struct vfio_container *vfio_container_from_file(struct file *filep);
 int vfio_group_use_container(struct vfio_group *group);
 void vfio_group_unuse_container(struct vfio_group *group);
@@ -121,6 +119,14 @@ int vfio_container_attach_group(struct vfio_container *container,
 void vfio_group_detach_container(struct vfio_group *group);
 void vfio_device_container_register(struct vfio_device *device);
 void vfio_device_container_unregister(struct vfio_device *device);
+int vfio_container_pin_pages(struct vfio_container *container,
+			     struct iommu_group *iommu_group, dma_addr_t iova,
+			     int npage, int prot, struct page **pages);
+void vfio_container_unpin_pages(struct vfio_container *container,
+				dma_addr_t iova, int npage);
+int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
+			  void *data, size_t len, bool write);
+
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
 
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a74c34232c034..fd5e969ab653a 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -770,7 +770,7 @@ out_unlock:
 static const struct file_operations vfio_device_fops;
 
 /* true if the vfio_device has open_device() called but not close_device() */
-bool vfio_assert_device_open(struct vfio_device *device)
+static bool vfio_assert_device_open(struct vfio_device *device)
 {
 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
 }
@@ -1876,6 +1876,126 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 }
 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 
+/*
+ * Pin contiguous user pages and return their associated host pages for local
+ * domain only.
+ * @device [in]  : device
+ * @iova [in]    : starting IOVA of user pages to be pinned.
+ * @npage [in]   : count of pages to be pinned.  This count should not
+ *		   be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ * @prot [in]    : protection flags
+ * @pages[out]   : array of host pages
+ * Return error or number of pages pinned.
+ *
+ * A driver may only call this function if the vfio_device was created
+ * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages().
+ */
+int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
+		   int npage, int prot, struct page **pages)
+{
+	/* group->container cannot change while a vfio device is open */
+	if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
+		return -EINVAL;
+	if (device->group->container)
+		return vfio_container_pin_pages(device->group->container,
+						device->group->iommu_group,
+						iova, npage, prot, pages);
+	if (device->iommufd_access) {
+		int ret;
+
+		if (iova > ULONG_MAX)
+			return -EINVAL;
+		/*
+		 * VFIO ignores the sub page offset, npages is from the start of
+		 * a PAGE_SIZE chunk of IOVA. The caller is expected to recover
+		 * the sub page offset by doing:
+		 *     pages[0] + (iova % PAGE_SIZE)
+		 */
+		ret = iommufd_access_pin_pages(
+			device->iommufd_access, ALIGN_DOWN(iova, PAGE_SIZE),
+			npage * PAGE_SIZE, pages,
+			(prot & IOMMU_WRITE) ? IOMMUFD_ACCESS_RW_WRITE : 0);
+		if (ret)
+			return ret;
+		return npage;
+	}
+	return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_pin_pages);
+
+/*
+ * Unpin contiguous host pages for local domain only.
+ * @device [in]  : device
+ * @iova [in]    : starting address of user pages to be unpinned.
+ * @npage [in]   : count of pages to be unpinned.  This count should not
+ *                 be greater than VFIO_PIN_PAGES_MAX_ENTRIES.
+ */
+void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
+{
+	if (WARN_ON(!vfio_assert_device_open(device)))
+		return;
+
+	if (device->group->container) {
+		vfio_container_unpin_pages(device->group->container, iova,
+					   npage);
+		return;
+	}
+	if (device->iommufd_access) {
+		if (WARN_ON(iova > ULONG_MAX))
+			return;
+		iommufd_access_unpin_pages(device->iommufd_access,
+					   ALIGN_DOWN(iova, PAGE_SIZE),
+					   npage * PAGE_SIZE);
+		return;
+	}
+}
+EXPORT_SYMBOL(vfio_unpin_pages);
+
+/*
+ * This interface allows the CPUs to perform some sort of virtual DMA on
+ * behalf of the device.
+ *
+ * CPUs read/write from/into a range of IOVAs pointing to user space memory
+ * into/from a kernel buffer.
+ *
+ * As the read/write of user space memory is conducted via the CPUs and is
+ * not a real device DMA, it is not necessary to pin the user space memory.
+ *
+ * @device [in]		: VFIO device
+ * @iova [in]		: base IOVA of a user space buffer
+ * @data [in]		: pointer to kernel buffer
+ * @len [in]		: kernel buffer length
+ * @write		: indicate read or write
+ * Return error code on failure or 0 on success.
+ */
+int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
+		size_t len, bool write)
+{
+	if (!data || len <= 0 || !vfio_assert_device_open(device))
+		return -EINVAL;
+
+	if (device->group->container)
+		return vfio_container_dma_rw(device->group->container, iova,
+					     data, len, write);
+
+	if (device->iommufd_access) {
+		unsigned int flags = 0;
+
+		if (iova > ULONG_MAX)
+			return -EINVAL;
+
+		/* VFIO historically tries to auto-detect a kthread */
+		if (!current->mm)
+			flags |= IOMMUFD_ACCESS_RW_KTHREAD;
+		if (write)
+			flags |= IOMMUFD_ACCESS_RW_WRITE;
+		return iommufd_access_rw(device->iommufd_access, iova, data,
+					 len, flags);
+	}
+	return -EINVAL;
+}
+EXPORT_SYMBOL(vfio_dma_rw);
+
 /*
  * Module/class support
  */
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index a7fc4d747dc22..d5f84f98c0fa8 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -19,6 +19,7 @@
 struct kvm;
 struct iommufd_ctx;
 struct iommufd_device;
+struct iommufd_access;
 
 /*
  * VFIO devices can be placed in a set, this allows all devices to share this
@@ -56,8 +57,10 @@ struct vfio_device {
 	struct completion comp;
 	struct list_head group_next;
 	struct list_head iommu_entry;
+	struct iommufd_access *iommufd_access;
 #if IS_ENABLED(CONFIG_IOMMUFD)
 	struct iommufd_device *iommufd_device;
+	struct iommufd_ctx *iommufd_ictx;
 	bool iommufd_attached;
 #endif
 };
@@ -111,6 +114,10 @@ int vfio_iommufd_physical_bind(struct vfio_device *vdev,
 			       struct iommufd_ctx *ictx, u32 *out_device_id);
 void vfio_iommufd_physical_unbind(struct vfio_device *vdev);
 int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
+int vfio_iommufd_emulated_bind(struct vfio_device *vdev,
+			       struct iommufd_ctx *ictx, u32 *out_device_id);
+void vfio_iommufd_emulated_unbind(struct vfio_device *vdev);
+int vfio_iommufd_emulated_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
 #else
 #define vfio_iommufd_physical_bind                                      \
 	((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
@@ -119,6 +126,13 @@ int vfio_iommufd_physical_attach_ioas(struct vfio_device *vdev, u32 *pt_id);
 	((void (*)(struct vfio_device *vdev)) NULL)
 #define vfio_iommufd_physical_attach_ioas \
 	((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
+#define vfio_iommufd_emulated_bind                                      \
+	((int (*)(struct vfio_device *vdev, struct iommufd_ctx *ictx,   \
+		  u32 *out_device_id)) NULL)
+#define vfio_iommufd_emulated_unbind \
+	((void (*)(struct vfio_device *vdev)) NULL)
+#define vfio_iommufd_emulated_attach_ioas \
+	((int (*)(struct vfio_device *vdev, u32 *pt_id)) NULL)
 #endif
 
 /**
-- 
GitLab


From 81ab9890da97e07862476bf635c80adee9b1c515 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:53 -0400
Subject: [PATCH 3275/4122] vfio: Move container related MODULE_ALIAS
 statements into container.c

The miscdev is in container.c, so should these related MODULE_ALIAS
statements. This is necessary for the next patch to be able to fully
disable /dev/vfio/vfio.

Fixes: cdc71fe4ecbf ("vfio: Move container code into drivers/vfio/container.c")
Link: https://lore.kernel.org/r/8-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Reported-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/container.c | 3 +++
 drivers/vfio/vfio_main.c | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index 7f3961fd4b5aa..6b362d97d6822 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -608,3 +608,6 @@ void vfio_container_cleanup(void)
 	misc_deregister(&vfio_dev);
 	mutex_destroy(&vfio.iommu_drivers_lock);
 }
+
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index fd5e969ab653a..ce6e6a560c702 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -2073,6 +2073,4 @@ MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_ALIAS_MISCDEV(VFIO_MINOR);
-MODULE_ALIAS("devname:vfio/vfio");
 MODULE_SOFTDEP("post: vfio_iommu_type1 vfio_iommu_spapr_tce");
-- 
GitLab


From e5a9ec7e096ab7a3b34022409a6ddc63e4e83674 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:54 -0400
Subject: [PATCH 3276/4122] vfio: Make vfio_container optionally compiled

Add a kconfig CONFIG_VFIO_CONTAINER that controls compiling the container
code. If 'n' then only iommufd will provide the container service. All the
support for vfio iommu drivers, including type1, will not be built.

This allows a compilation check that no inappropriate dependencies between
the device/group and container have been created.

Link: https://lore.kernel.org/r/9-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Kconfig  | 35 +++++++++++++++--------
 drivers/vfio/Makefile |  4 +--
 drivers/vfio/vfio.h   | 65 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 1118d322eec97..286c1663bd756 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -3,8 +3,8 @@ menuconfig VFIO
 	tristate "VFIO Non-Privileged userspace driver framework"
 	select IOMMU_API
 	depends on IOMMUFD || !IOMMUFD
-	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
 	select INTERVAL_TREE
+	select VFIO_CONTAINER if IOMMUFD=n
 	help
 	  VFIO provides a framework for secure userspace device drivers.
 	  See Documentation/driver-api/vfio.rst for more details.
@@ -12,6 +12,18 @@ menuconfig VFIO
 	  If you don't know what to do here, say N.
 
 if VFIO
+config VFIO_CONTAINER
+	bool "Support for the VFIO container /dev/vfio/vfio"
+	select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
+	default y
+	help
+	  The VFIO container is the classic interface to VFIO for establishing
+	  IOMMU mappings. If N is selected here then IOMMUFD must be used to
+	  manage the mappings.
+
+	  Unless testing IOMMUFD say Y here.
+
+if VFIO_CONTAINER
 config VFIO_IOMMU_TYPE1
 	tristate
 	default n
@@ -21,16 +33,6 @@ config VFIO_IOMMU_SPAPR_TCE
 	depends on SPAPR_TCE_IOMMU
 	default VFIO
 
-config VFIO_SPAPR_EEH
-	tristate
-	depends on EEH && VFIO_IOMMU_SPAPR_TCE
-	default VFIO
-
-config VFIO_VIRQFD
-	tristate
-	select EVENTFD
-	default n
-
 config VFIO_NOIOMMU
 	bool "VFIO No-IOMMU support"
 	help
@@ -44,6 +46,17 @@ config VFIO_NOIOMMU
 	  this mode since there is no IOMMU to provide DMA translation.
 
 	  If you don't know what to do here, say N.
+endif
+
+config VFIO_SPAPR_EEH
+	tristate
+	depends on EEH && VFIO_IOMMU_SPAPR_TCE
+	default VFIO
+
+config VFIO_VIRQFD
+	tristate
+	select EVENTFD
+	default n
 
 source "drivers/vfio/pci/Kconfig"
 source "drivers/vfio/platform/Kconfig"
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 3863922529ef2..b953517dc70f9 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -4,9 +4,9 @@ vfio_virqfd-y := virqfd.o
 obj-$(CONFIG_VFIO) += vfio.o
 
 vfio-y += vfio_main.o \
-	  iova_bitmap.o \
-	  container.o
+	  iova_bitmap.o
 vfio-$(CONFIG_IOMMUFD) += iommufd.o
+vfio-$(CONFIG_VFIO_CONTAINER) += container.o
 
 obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index b1ef842496372..ce5fe3fc493b4 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -55,7 +55,9 @@ struct vfio_group {
 	struct list_head		device_list;
 	struct mutex			device_lock;
 	struct list_head		vfio_next;
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
 	struct list_head		container_next;
+#endif
 	enum vfio_group_type		type;
 	struct mutex			group_lock;
 	struct kvm			*kvm;
@@ -64,6 +66,7 @@ struct vfio_group {
 	struct iommufd_ctx		*iommufd;
 };
 
+#if IS_ENABLED(CONFIG_VFIO_CONTAINER)
 /* events for the backend driver notify callback */
 enum vfio_iommu_notify_type {
 	VFIO_IOMMU_CONTAINER_CLOSE = 0,
@@ -129,6 +132,68 @@ int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
 
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
+#else
+static inline struct vfio_container *
+vfio_container_from_file(struct file *filep)
+{
+	return NULL;
+}
+
+static inline int vfio_group_use_container(struct vfio_group *group)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_unuse_container(struct vfio_group *group)
+{
+}
+
+static inline int vfio_container_attach_group(struct vfio_container *container,
+					      struct vfio_group *group)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_group_detach_container(struct vfio_group *group)
+{
+}
+
+static inline void vfio_device_container_register(struct vfio_device *device)
+{
+}
+
+static inline void vfio_device_container_unregister(struct vfio_device *device)
+{
+}
+
+static inline int vfio_container_pin_pages(struct vfio_container *container,
+					   struct iommu_group *iommu_group,
+					   dma_addr_t iova, int npage, int prot,
+					   struct page **pages)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void vfio_container_unpin_pages(struct vfio_container *container,
+					      dma_addr_t iova, int npage)
+{
+}
+
+static inline int vfio_container_dma_rw(struct vfio_container *container,
+					dma_addr_t iova, void *data, size_t len,
+					bool write)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int vfio_container_init(void)
+{
+	return 0;
+}
+static inline void vfio_container_cleanup(void)
+{
+}
+#endif
 
 #if IS_ENABLED(CONFIG_IOMMUFD)
 int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx);
-- 
GitLab


From 01f70cbb26eadb5959344598977cb7159948263a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 29 Nov 2022 16:31:55 -0400
Subject: [PATCH 3277/4122] iommufd: Allow iommufd to supply /dev/vfio/vfio

If the VFIO container is compiled out, give a kconfig option for iommufd
to provide the miscdev node with the same name and permissions as vfio
uses.

The compatibility node supports the same ioctls as VFIO and automatically
enables the VFIO compatible pinned page accounting mode.

Link: https://lore.kernel.org/r/10-v4-42cd2eb0e3eb+335a-vfio_iommufd_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Yi Liu <yi.l.liu@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Nicolin Chen <nicolinc@nvidia.com>
Tested-by: Yi Liu <yi.l.liu@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Matthew Rosato <mjrosato@linux.ibm.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/Kconfig | 20 +++++++++++++++++++
 drivers/iommu/iommufd/main.c  | 36 +++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+)

diff --git a/drivers/iommu/iommufd/Kconfig b/drivers/iommu/iommufd/Kconfig
index 871244f2443fb..8306616b6d819 100644
--- a/drivers/iommu/iommufd/Kconfig
+++ b/drivers/iommu/iommufd/Kconfig
@@ -12,6 +12,26 @@ config IOMMUFD
 	  If you don't know what to do here, say N.
 
 if IOMMUFD
+config IOMMUFD_VFIO_CONTAINER
+	bool "IOMMUFD provides the VFIO container /dev/vfio/vfio"
+	depends on VFIO && !VFIO_CONTAINER
+	default VFIO && !VFIO_CONTAINER
+	help
+	  IOMMUFD will provide /dev/vfio/vfio instead of VFIO. This relies on
+	  IOMMUFD providing compatibility emulation to give the same ioctls.
+	  It provides an option to build a kernel with legacy VFIO components
+	  removed.
+
+	  IOMMUFD VFIO container emulation is known to lack certain features
+	  of the native VFIO container, such as no-IOMMU support, peer-to-peer
+	  DMA mapping, PPC IOMMU support, as well as other potentially
+	  undiscovered gaps.  This option is currently intended for the
+	  purpose of testing IOMMUFD with unmodified userspace supporting VFIO
+	  and making use of the Type1 VFIO IOMMU backend.  General purpose
+	  enabling of this option is currently discouraged.
+
+	  Unless testing IOMMUFD, say N here.
+
 config IOMMUFD_TEST
 	bool "IOMMU Userspace API Test support"
 	depends on DEBUG_KERNEL
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index bcb463e581009..083e6fcbe10ad 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -18,6 +18,7 @@
 #include <uapi/linux/iommufd.h>
 #include <linux/iommufd.h>
 
+#include "io_pagetable.h"
 #include "iommufd_private.h"
 #include "iommufd_test.h"
 
@@ -25,6 +26,7 @@ struct iommufd_object_ops {
 	void (*destroy)(struct iommufd_object *obj);
 };
 static const struct iommufd_object_ops iommufd_object_ops[];
+static struct miscdevice vfio_misc_dev;
 
 struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
 					     size_t size,
@@ -170,6 +172,16 @@ static int iommufd_fops_open(struct inode *inode, struct file *filp)
 	if (!ictx)
 		return -ENOMEM;
 
+	/*
+	 * For compatibility with VFIO when /dev/vfio/vfio is opened we default
+	 * to the same rlimit accounting as vfio uses.
+	 */
+	if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER) &&
+	    filp->private_data == &vfio_misc_dev) {
+		ictx->account_mode = IOPT_PAGES_ACCOUNT_MM;
+		pr_info_once("IOMMUFD is providing /dev/vfio/vfio, not VFIO.\n");
+	}
+
 	xa_init_flags(&ictx->objects, XA_FLAGS_ALLOC1 | XA_FLAGS_ACCOUNT);
 	ictx->file = filp;
 	filp->private_data = ictx;
@@ -400,6 +412,15 @@ static struct miscdevice iommu_misc_dev = {
 	.mode = 0660,
 };
 
+
+static struct miscdevice vfio_misc_dev = {
+	.minor = VFIO_MINOR,
+	.name = "vfio",
+	.fops = &iommufd_fops,
+	.nodename = "vfio/vfio",
+	.mode = 0666,
+};
+
 static int __init iommufd_init(void)
 {
 	int ret;
@@ -407,18 +428,33 @@ static int __init iommufd_init(void)
 	ret = misc_register(&iommu_misc_dev);
 	if (ret)
 		return ret;
+
+	if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)) {
+		ret = misc_register(&vfio_misc_dev);
+		if (ret)
+			goto err_misc;
+	}
 	iommufd_test_init();
 	return 0;
+err_misc:
+	misc_deregister(&iommu_misc_dev);
+	return ret;
 }
 
 static void __exit iommufd_exit(void)
 {
 	iommufd_test_exit();
+	if (IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER))
+		misc_deregister(&vfio_misc_dev);
 	misc_deregister(&iommu_misc_dev);
 }
 
 module_init(iommufd_init);
 module_exit(iommufd_exit);
 
+#if IS_ENABLED(CONFIG_IOMMUFD_VFIO_CONTAINER)
+MODULE_ALIAS_MISCDEV(VFIO_MINOR);
+MODULE_ALIAS("devname:vfio/vfio");
+#endif
 MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
 MODULE_LICENSE("GPL");
-- 
GitLab


From d7237462561fcd224fa687c56ccb68629f50fc0d Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 2 Dec 2022 01:38:49 -0800
Subject: [PATCH 3278/4122] drivers: mcb: fix resource leak in mcb_probe()

When probe hook function failed in mcb_probe(), it doesn't put the device.
Compiled test only.

Fixes: 7bc364097a89 ("mcb: Acquire reference to device in probe")
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: Johannes Thumshirn <jth@kernel.org>
Link: https://lore.kernel.org/r/9f87de36bfb85158b506cb78c6fc9db3f6a3bad1.1669624063.git.johannes.thumshirn@wdc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mcb/mcb-core.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/mcb/mcb-core.c b/drivers/mcb/mcb-core.c
index 338fc889b357a..b8ad4f16b4acd 100644
--- a/drivers/mcb/mcb-core.c
+++ b/drivers/mcb/mcb-core.c
@@ -71,8 +71,10 @@ static int mcb_probe(struct device *dev)
 
 	get_device(dev);
 	ret = mdrv->probe(mdev, found_id);
-	if (ret)
+	if (ret) {
 		module_put(carrier_mod);
+		put_device(dev);
+	}
 
 	return ret;
 }
-- 
GitLab


From 728ac3389296caf68638628c987aeae6c8851e2d Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 2 Dec 2022 01:38:50 -0800
Subject: [PATCH 3279/4122] mcb: mcb-parse: fix error handing in
 chameleon_parse_gdd()

If mcb_device_register() returns error in chameleon_parse_gdd(), the refcount
of bus and device name are leaked. Fix this by calling put_device() to give up
the reference, so they can be released in mcb_release_dev() and kobject_cleanup().

Fixes: 3764e82e5150 ("drivers: Introduce MEN Chameleon Bus")
Reviewed-by: Johannes Thumshirn <jth@kernel.org>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Johannes Thumshirn <jth@kernel.org>
Link: https://lore.kernel.org/r/ebfb06e39b19272f0197fa9136b5e4b6f34ad732.1669624063.git.johannes.thumshirn@wdc.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/mcb/mcb-parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mcb/mcb-parse.c b/drivers/mcb/mcb-parse.c
index 0266bfddfbe27..aa6938da0db85 100644
--- a/drivers/mcb/mcb-parse.c
+++ b/drivers/mcb/mcb-parse.c
@@ -108,7 +108,7 @@ static int chameleon_parse_gdd(struct mcb_bus *bus,
 	return 0;
 
 err:
-	mcb_free_dev(mdev);
+	put_device(&mdev->dev);
 
 	return ret;
 }
-- 
GitLab


From 11fa7fefe3d8fac7da56bc9aa3dd5fb3081ca797 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 2 Dec 2022 11:02:37 +0800
Subject: [PATCH 3280/4122] chardev: fix error handling in cdev_device_add()

While doing fault injection test, I got the following report:

------------[ cut here ]------------
kobject: '(null)' (0000000039956980): is not initialized, yet kobject_put() is being called.
WARNING: CPU: 3 PID: 6306 at kobject_put+0x23d/0x4e0
CPU: 3 PID: 6306 Comm: 283 Tainted: G        W          6.1.0-rc2-00005-g307c1086d7c9 #1253
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
RIP: 0010:kobject_put+0x23d/0x4e0
Call Trace:
 <TASK>
 cdev_device_add+0x15e/0x1b0
 __iio_device_register+0x13b4/0x1af0 [industrialio]
 __devm_iio_device_register+0x22/0x90 [industrialio]
 max517_probe+0x3d8/0x6b4 [max517]
 i2c_device_probe+0xa81/0xc00

When device_add() is injected fault and returns error, if dev->devt is not set,
cdev_add() is not called, cdev_del() is not needed. Fix this by checking dev->devt
in error path.

Fixes: 233ed09d7fda ("chardev: add helper function to register char devs with a struct device")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221202030237.520280-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/char_dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/char_dev.c b/fs/char_dev.c
index 340e4543b24a9..13deb45f1ec65 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -554,7 +554,7 @@ int cdev_device_add(struct cdev *cdev, struct device *dev)
 	}
 
 	rc = device_add(dev);
-	if (rc)
+	if (rc && dev->devt)
 		cdev_del(cdev);
 
 	return rc;
-- 
GitLab


From b058ea3ab5afea873ab8d976277539ca9e43869a Mon Sep 17 00:00:00 2001
From: Joao Martins <joao.m.martins@oracle.com>
Date: Tue, 29 Nov 2022 13:12:35 +0000
Subject: [PATCH 3281/4122] vfio/iova_bitmap: refactor iova_bitmap_set() to
 better handle page boundaries

Commit f38044e5ef58 ("vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps")
had fixed the unaligned bitmaps by capping the remaining iterable set at
the start of the bitmap. Although, that mistakenly worked around
iova_bitmap_set() incorrectly setting bits across page boundary.

Fix this by reworking the loop inside iova_bitmap_set() to iterate over a
range of bits to set (cur_bit .. last_bit) which may span different pinned
pages, thus updating @page_idx and @offset as it sets the bits. The
previous cap to the first page is now adjusted to be always accounted
rather than when there's only a non-zero pgoff.

While at it, make @page_idx , @offset and @nbits to be unsigned int given
that it won't be more than 512 and 4096 respectively (even a bigger
PAGE_SIZE or a smaller struct page size won't make this bigger than the
above 32-bit max). Also, delete the stale kdoc on Return type.

Cc: Avihai Horon <avihaih@nvidia.com>
Fixes: f38044e5ef58 ("vfio/iova_bitmap: Fix PAGE_SIZE unaligned bitmaps")
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Avihai Horon <avihaih@nvidia.com>
Link: https://lore.kernel.org/r/20221129131235.38880-1-joao.m.martins@oracle.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/iova_bitmap.c | 30 +++++++++++++-----------------
 1 file changed, 13 insertions(+), 17 deletions(-)

diff --git a/drivers/vfio/iova_bitmap.c b/drivers/vfio/iova_bitmap.c
index de6d6ea5c4967..0848f920efb7c 100644
--- a/drivers/vfio/iova_bitmap.c
+++ b/drivers/vfio/iova_bitmap.c
@@ -298,9 +298,7 @@ static unsigned long iova_bitmap_mapped_remaining(struct iova_bitmap *bitmap)
 {
 	unsigned long remaining, bytes;
 
-	/* Cap to one page in the first iteration, if PAGE_SIZE unaligned. */
-	bytes = !bitmap->mapped.pgoff ? bitmap->mapped.npages << PAGE_SHIFT :
-					PAGE_SIZE - bitmap->mapped.pgoff;
+	bytes = (bitmap->mapped.npages << PAGE_SHIFT) - bitmap->mapped.pgoff;
 
 	remaining = bitmap->mapped_total_index - bitmap->mapped_base_index;
 	remaining = min_t(unsigned long, remaining,
@@ -399,29 +397,27 @@ int iova_bitmap_for_each(struct iova_bitmap *bitmap, void *opaque,
  * Set the bits corresponding to the range [iova .. iova+length-1] in
  * the user bitmap.
  *
- * Return: The number of bits set.
  */
 void iova_bitmap_set(struct iova_bitmap *bitmap,
 		     unsigned long iova, size_t length)
 {
 	struct iova_bitmap_map *mapped = &bitmap->mapped;
-	unsigned long offset = (iova - mapped->iova) >> mapped->pgshift;
-	unsigned long nbits = max_t(unsigned long, 1, length >> mapped->pgshift);
-	unsigned long page_idx = offset / BITS_PER_PAGE;
-	unsigned long page_offset = mapped->pgoff;
-	void *kaddr;
-
-	offset = offset % BITS_PER_PAGE;
+	unsigned long cur_bit = ((iova - mapped->iova) >>
+			mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
+	unsigned long last_bit = (((iova + length - 1) - mapped->iova) >>
+			mapped->pgshift) + mapped->pgoff * BITS_PER_BYTE;
 
 	do {
-		unsigned long size = min(BITS_PER_PAGE - offset, nbits);
+		unsigned int page_idx = cur_bit / BITS_PER_PAGE;
+		unsigned int offset = cur_bit % BITS_PER_PAGE;
+		unsigned int nbits = min(BITS_PER_PAGE - offset,
+					 last_bit - cur_bit + 1);
+		void *kaddr;
 
 		kaddr = kmap_local_page(mapped->pages[page_idx]);
-		bitmap_set(kaddr + page_offset, offset, size);
+		bitmap_set(kaddr, offset, nbits);
 		kunmap_local(kaddr);
-		page_offset = offset = 0;
-		nbits -= size;
-		page_idx++;
-	} while (nbits > 0);
+		cur_bit += nbits;
+	} while (cur_bit <= last_bit);
 }
 EXPORT_SYMBOL_GPL(iova_bitmap_set);
-- 
GitLab


From 61e15f871241ee86f217320909005cd022dd844f Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 2 Dec 2022 11:50:08 +0100
Subject: [PATCH 3282/4122] KVM: Delete all references to removed
 KVM_SET_MEMORY_REGION ioctl

The documentation says that the ioctl has been deprecated, but it has been
actually removed and the remaining references are just left overs.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Message-Id: <20221202105011.185147-2-javierm@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 16 ----------------
 include/uapi/linux/kvm.h       | 12 ------------
 tools/include/uapi/linux/kvm.h | 12 ------------
 3 files changed, 40 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 5617bc4f899f4..850e187c0a389 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -272,18 +272,6 @@ the VCPU file descriptor can be mmap-ed, including:
   KVM_CAP_DIRTY_LOG_RING, see section 8.3.
 
 
-4.6 KVM_SET_MEMORY_REGION
--------------------------
-
-:Capability: basic
-:Architectures: all
-:Type: vm ioctl
-:Parameters: struct kvm_memory_region (in)
-:Returns: 0 on success, -1 on error
-
-This ioctl is obsolete and has been removed.
-
-
 4.7 KVM_CREATE_VCPU
 -------------------
 
@@ -1377,10 +1365,6 @@ the memory region are automatically reflected into the guest.  For example, an
 mmap() that affects the region will be made visible immediately.  Another
 example is madvise(MADV_DROP).
 
-It is recommended to use this API instead of the KVM_SET_MEMORY_REGION ioctl.
-The KVM_SET_MEMORY_REGION does not allow fine grained control over memory
-allocation and is deprecated.
-
 
 4.36 KVM_SET_TSS_ADDR
 ---------------------
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 64dfe9c07c879..c338ca2c972d2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -86,14 +86,6 @@ struct kvm_debug_guest {
 /* *** End of deprecated interfaces *** */
 
 
-/* for KVM_CREATE_MEMORY_REGION */
-struct kvm_memory_region {
-	__u32 slot;
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size; /* bytes */
-};
-
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
 	__u32 slot;
@@ -1442,10 +1434,6 @@ struct kvm_vfio_spapr_tce {
 	__s32	tablefd;
 };
 
-/*
- * ioctls for VM fds
- */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 0d5d4419139ae..8899201d5964f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -86,14 +86,6 @@ struct kvm_debug_guest {
 /* *** End of deprecated interfaces *** */
 
 
-/* for KVM_CREATE_MEMORY_REGION */
-struct kvm_memory_region {
-	__u32 slot;
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size; /* bytes */
-};
-
 /* for KVM_SET_USER_MEMORY_REGION */
 struct kvm_userspace_memory_region {
 	__u32 slot;
@@ -1437,10 +1429,6 @@ struct kvm_vfio_spapr_tce {
 	__s32	tablefd;
 };
 
-/*
- * ioctls for VM fds
- */
-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
 /*
  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
  * a vcpu fd.
-- 
GitLab


From 66a9221d73e71199a120ca12ea5bfaac2aa670a3 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 2 Dec 2022 11:50:09 +0100
Subject: [PATCH 3283/4122] KVM: Delete all references to removed
 KVM_SET_MEMORY_ALIAS ioctl

The documentation says that the ioctl has been deprecated, but it has been
actually removed and the remaining references are just left overs.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Message-Id: <20221202105011.185147-3-javierm@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst        | 11 -----------
 arch/x86/include/uapi/asm/kvm.h       |  8 --------
 include/uapi/linux/kvm.h              |  2 --
 tools/arch/x86/include/uapi/asm/kvm.h |  8 --------
 tools/include/uapi/linux/kvm.h        |  2 --
 5 files changed, 31 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 850e187c0a389..07e8a42d839a2 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -356,17 +356,6 @@ see the description of the capability.
 Note that the Xen shared info page, if configured, shall always be assumed
 to be dirty. KVM will not explicitly mark it such.
 
-4.9 KVM_SET_MEMORY_ALIAS
-------------------------
-
-:Capability: basic
-:Architectures: x86
-:Type: vm ioctl
-:Parameters: struct kvm_memory_alias (in)
-:Returns: 0 (success), -1 (error)
-
-This ioctl is obsolete and has been removed.
-
 
 4.10 KVM_RUN
 ------------
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index c6df6b16a0885..e48deab8901d4 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -53,14 +53,6 @@
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};
-
 /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
 struct kvm_pic_state {
 	__u8 last_irr;	/* edge detection */
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c338ca2c972d2..ce91837656169 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1440,8 +1440,6 @@ struct kvm_vfio_spapr_tce {
  */
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
-/* KVM_SET_MEMORY_ALIAS is obsolete: */
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 46de10a809ecb..649e50a8f9ddf 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -53,14 +53,6 @@
 /* Architectural interrupt line count. */
 #define KVM_NR_INTERRUPTS 256
 
-struct kvm_memory_alias {
-	__u32 slot;  /* this has a different namespace than memory slots */
-	__u32 flags;
-	__u64 guest_phys_addr;
-	__u64 memory_size;
-	__u64 target_phys_addr;
-};
-
 /* for KVM_GET_IRQCHIP and KVM_SET_IRQCHIP */
 struct kvm_pic_state {
 	__u8 last_irr;	/* edge detection */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8899201d5964f..6ba2928f8f18f 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1435,8 +1435,6 @@ struct kvm_vfio_spapr_tce {
  */
 #define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
 #define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
-/* KVM_SET_MEMORY_ALIAS is obsolete: */
-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
 #define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
 #define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
 #define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
-- 
GitLab


From 30ee198ce42d60101620d33f8bc70c3234798365 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 2 Dec 2022 11:50:10 +0100
Subject: [PATCH 3284/4122] KVM: Reference to kvm_userspace_memory_region in
 doc and comments

There are still references to the removed kvm_memory_region data structure
but the doc and comments should mention struct kvm_userspace_memory_region
instead, since that is what's used by the ioctl that replaced the old one
and this data structure support the same set of flags.

Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Message-Id: <20221202105011.185147-4-javierm@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 2 +-
 include/linux/kvm_host.h       | 4 ++--
 include/uapi/linux/kvm.h       | 6 +++---
 tools/include/uapi/linux/kvm.h | 6 +++---
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 07e8a42d839a2..92e14823619a2 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -1309,7 +1309,7 @@ yet and must be cleared on entry.
 	__u64 userspace_addr; /* start of the userspace allocated memory */
   };
 
-  /* for kvm_memory_region::flags */
+  /* for kvm_userspace_memory_region::flags */
   #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
   #define KVM_MEM_READONLY	(1UL << 1)
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 8f874a9643139..a5a82b5367749 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -50,8 +50,8 @@
 #endif
 
 /*
- * The bit 16 ~ bit 31 of kvm_memory_region::flags are internally used
- * in kvm, other bits are visible for userspace which are defined in
+ * The bit 16 ~ bit 31 of kvm_userspace_memory_region::flags are internally
+ * used in kvm, other bits are visible for userspace which are defined in
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ce91837656169..03708ce10bdaf 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -96,9 +96,9 @@ struct kvm_userspace_memory_region {
 };
 
 /*
- * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
- * other bits are reserved for kvm internal use which are defined in
- * include/linux/kvm_host.h.
+ * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
+ * userspace, other bits are reserved for kvm internal use which are defined
+ * in include/linux/kvm_host.h.
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 6ba2928f8f18f..21d6d29502e48 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -96,9 +96,9 @@ struct kvm_userspace_memory_region {
 };
 
 /*
- * The bit 0 ~ bit 15 of kvm_memory_region::flags are visible for userspace,
- * other bits are reserved for kvm internal use which are defined in
- * include/linux/kvm_host.h.
+ * The bit 0 ~ bit 15 of kvm_userspace_memory_region::flags are visible for
+ * userspace, other bits are reserved for kvm internal use which are defined
+ *in include/linux/kvm_host.h.
  */
 #define KVM_MEM_LOG_DIRTY_PAGES	(1UL << 0)
 #define KVM_MEM_READONLY	(1UL << 1)
-- 
GitLab


From 10c5e80b2c4d67fa9a931ac57beab782cc3db2ef Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javierm@redhat.com>
Date: Fri, 2 Dec 2022 11:50:11 +0100
Subject: [PATCH 3285/4122] KVM: Add missing arch for KVM_CREATE_DEVICE and
 KVM_{SET,GET}_DEVICE_ATTR

The ioctls are missing an architecture property that is present in others.

Suggested-by: Sergio Lopez Pascual <slp@redhat.com>
Signed-off-by: Javier Martinez Canillas <javierm@redhat.com>
Message-Id: <20221202105011.185147-5-javierm@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 92e14823619a2..a63d86be45d93 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -3266,6 +3266,7 @@ valid entries found.
 ----------------------
 
 :Capability: KVM_CAP_DEVICE_CTRL
+:Architectures: all
 :Type: vm ioctl
 :Parameters: struct kvm_create_device (in/out)
 :Returns: 0 on success, -1 on error
@@ -3306,6 +3307,7 @@ number.
 :Capability: KVM_CAP_DEVICE_CTRL, KVM_CAP_VM_ATTRIBUTES for vm device,
              KVM_CAP_VCPU_ATTRIBUTES for vcpu device
              KVM_CAP_SYS_ATTRIBUTES for system (/dev/kvm) device (no set)
+:Architectures: x86, arm64, s390
 :Type: device ioctl, vm ioctl, vcpu ioctl
 :Parameters: struct kvm_device_attr
 :Returns: 0 on success, -1 on error
-- 
GitLab


From dd03cc90e09daeb8a9509e65a39eb576256790b2 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 1 Dec 2022 22:04:33 +0000
Subject: [PATCH 3286/4122] KVM: Remove stale comment about KVM_REQ_UNHALT

Remove a comment about KVM_REQ_UNHALT being set by kvm_vcpu_check_block()
that was missed when KVM_REQ_UNHALT was dropped.

Fixes: c59fb1275838 ("KVM: remove KVM_REQ_UNHALT")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221201220433.31366-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 virt/kvm/kvm_main.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1782c4555d94f..1401dcba2f826 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3518,10 +3518,6 @@ void kvm_vcpu_halt(struct kvm_vcpu *vcpu)
 		ktime_t stop = ktime_add_ns(start, vcpu->halt_poll_ns);
 
 		do {
-			/*
-			 * This sets KVM_REQ_UNHALT if an interrupt
-			 * arrives.
-			 */
 			if (kvm_vcpu_check_block(vcpu) < 0)
 				goto out;
 			cpu_relax();
-- 
GitLab


From 8a6841c439dfbba2067a533b0e8264ea438689f6 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 15 Nov 2022 20:08:29 +0000
Subject: [PATCH 3287/4122] RISC-V: use REG_S/REG_L for mcount

In preparation for rv32i ftrace support, convert mcount routines to use
native sized loads/stores.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Link: https://lore.kernel.org/r/20221115200832.706370-2-jamie@jamieiles.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/mcount.S | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 6d462681c9c02..9cf0904afd6dd 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -15,8 +15,8 @@
 
 	.macro SAVE_ABI_STATE
 	addi	sp, sp, -16
-	sd	s0, 0(sp)
-	sd	ra, 8(sp)
+	REG_S	s0, 0(sp)
+	REG_S	ra, 8(sp)
 	addi	s0, sp, 16
 	.endm
 
@@ -26,22 +26,22 @@
 	 */
 	.macro SAVE_RET_ABI_STATE
 	addi	sp, sp, -32
-	sd	s0, 16(sp)
-	sd	ra, 24(sp)
-	sd	a0, 8(sp)
+	REG_S	s0, 16(sp)
+	REG_S	ra, 24(sp)
+	REG_S	a0, 8(sp)
 	addi	s0, sp, 32
 	.endm
 
 	.macro RESTORE_ABI_STATE
-	ld	ra, 8(sp)
-	ld	s0, 0(sp)
+	REG_L	ra, 8(sp)
+	REG_L	s0, 0(sp)
 	addi	sp, sp, 16
 	.endm
 
 	.macro RESTORE_RET_ABI_STATE
-	ld	ra, 24(sp)
-	ld	s0, 16(sp)
-	ld	a0, 8(sp)
+	REG_L	ra, 24(sp)
+	REG_L	s0, 16(sp)
+	REG_L	a0, 8(sp)
 	addi	sp, sp, 32
 	.endm
 
@@ -82,16 +82,16 @@ ENTRY(MCOUNT_NAME)
 	la	t4, ftrace_stub
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 	la	t0, ftrace_graph_return
-	ld	t1, 0(t0)
+	REG_L	t1, 0(t0)
 	bne	t1, t4, do_ftrace_graph_caller
 
 	la	t3, ftrace_graph_entry
-	ld	t2, 0(t3)
+	REG_L	t2, 0(t3)
 	la	t6, ftrace_graph_entry_stub
 	bne	t2, t6, do_ftrace_graph_caller
 #endif
 	la	t3, ftrace_trace_function
-	ld	t5, 0(t3)
+	REG_L	t5, 0(t3)
 	bne	t5, t4, do_trace
 	ret
 
@@ -104,7 +104,7 @@ do_ftrace_graph_caller:
 	addi	a0, s0, -8
 	mv	a1, ra
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	ld	a2, -16(s0)
+	REG_L	a2, -16(s0)
 #endif
 	SAVE_ABI_STATE
 	call	prepare_ftrace_return
@@ -117,7 +117,7 @@ do_ftrace_graph_caller:
  * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
  */
 do_trace:
-	ld	a1, -8(s0)
+	REG_L	a1, -8(s0)
 	mv	a0, ra
 
 	SAVE_ABI_STATE
-- 
GitLab


From 3bd7743f8d6d7171db9897a746038eefd52a1fbd Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 15 Nov 2022 20:08:30 +0000
Subject: [PATCH 3288/4122] RISC-V: reduce mcount save space on RV32

For RV32 we can reduce the size of the ABI save+restore state by using
SZREG so that register stores are packed rather than on an 8 byte
boundary.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Link: https://lore.kernel.org/r/20221115200832.706370-3-jamie@jamieiles.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/mcount.S | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 9cf0904afd6dd..613bd07c6268a 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -15,8 +15,8 @@
 
 	.macro SAVE_ABI_STATE
 	addi	sp, sp, -16
-	REG_S	s0, 0(sp)
-	REG_S	ra, 8(sp)
+	REG_S	s0, 0*SZREG(sp)
+	REG_S	ra, 1*SZREG(sp)
 	addi	s0, sp, 16
 	.endm
 
@@ -25,24 +25,24 @@
 	 * register if a0 was not saved.
 	 */
 	.macro SAVE_RET_ABI_STATE
-	addi	sp, sp, -32
-	REG_S	s0, 16(sp)
-	REG_S	ra, 24(sp)
-	REG_S	a0, 8(sp)
-	addi	s0, sp, 32
+	addi	sp, sp, -4*SZREG
+	REG_S	s0, 2*SZREG(sp)
+	REG_S	ra, 3*SZREG(sp)
+	REG_S	a0, 1*SZREG(sp)
+	addi	s0, sp, 4*SZREG
 	.endm
 
 	.macro RESTORE_ABI_STATE
-	REG_L	ra, 8(sp)
-	REG_L	s0, 0(sp)
+	REG_L	ra, 1*SZREG(sp)
+	REG_L	s0, 0*SZREG(sp)
 	addi	sp, sp, 16
 	.endm
 
 	.macro RESTORE_RET_ABI_STATE
-	REG_L	ra, 24(sp)
-	REG_L	s0, 16(sp)
-	REG_L	a0, 8(sp)
-	addi	sp, sp, 32
+	REG_L	ra, 3*SZREG(sp)
+	REG_L	s0, 2*SZREG(sp)
+	REG_L	a0, 1*SZREG(sp)
+	addi	sp, sp, 4*SZREG
 	.endm
 
 ENTRY(ftrace_stub)
@@ -101,10 +101,10 @@ ENTRY(MCOUNT_NAME)
  * prepare_to_return(&ra_to_caller_of_caller, ra_to_caller)
  */
 do_ftrace_graph_caller:
-	addi	a0, s0, -8
+	addi	a0, s0, -SZREG
 	mv	a1, ra
 #ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-	REG_L	a2, -16(s0)
+	REG_L	a2, -2*SZREG(s0)
 #endif
 	SAVE_ABI_STATE
 	call	prepare_ftrace_return
@@ -117,7 +117,7 @@ do_ftrace_graph_caller:
  * (*ftrace_trace_function)(ra_to_caller, ra_to_caller_of_caller)
  */
 do_trace:
-	REG_L	a1, -8(s0)
+	REG_L	a1, -SZREG(s0)
 	mv	a0, ra
 
 	SAVE_ABI_STATE
-- 
GitLab


From dc58a24db8c12ea361e94eaf53adc5d471534694 Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 15 Nov 2022 20:08:31 +0000
Subject: [PATCH 3289/4122] RISC-V: preserve a1 in mcount

The RISC-V ELF psABI states that both a0 and a1 are used for return
values so we should preserve them both in return_to_handler.  This is
especially important for RV32 for functions returning a 64-bit quantity
otherwise the return value can be corrupted and undefined behaviour
results.

Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Link: https://lore.kernel.org/r/20221115200832.706370-4-jamie@jamieiles.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/mcount.S | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S
index 613bd07c6268a..30102aadc4d73 100644
--- a/arch/riscv/kernel/mcount.S
+++ b/arch/riscv/kernel/mcount.S
@@ -29,6 +29,7 @@
 	REG_S	s0, 2*SZREG(sp)
 	REG_S	ra, 3*SZREG(sp)
 	REG_S	a0, 1*SZREG(sp)
+	REG_S	a1, 0*SZREG(sp)
 	addi	s0, sp, 4*SZREG
 	.endm
 
@@ -42,6 +43,7 @@
 	REG_L	ra, 3*SZREG(sp)
 	REG_L	s0, 2*SZREG(sp)
 	REG_L	a0, 1*SZREG(sp)
+	REG_L	a1, 0*SZREG(sp)
 	addi	sp, sp, 4*SZREG
 	.endm
 
@@ -71,9 +73,9 @@ ENTRY(return_to_handler)
 	mv	a0, t6
 #endif
 	call	ftrace_return_to_handler
-	mv	a1, a0
+	mv	a2, a0
 	RESTORE_RET_ABI_STATE
-	jalr	a1
+	jalr	a2
 ENDPROC(return_to_handler)
 #endif
 
-- 
GitLab


From f32b4b467ebd8a035f8342b7ea27efc84b10d96b Mon Sep 17 00:00:00 2001
From: Jamie Iles <jamie@jamieiles.com>
Date: Tue, 15 Nov 2022 20:08:32 +0000
Subject: [PATCH 3290/4122] RISC-V: enable dynamic ftrace for RV32I

The RISC-V mcount function is now capable of supporting RV32I so make it
available in the kernel config.

Signed-off-by: Jamie Iles <jamie@jamieiles.com>
Link: https://lore.kernel.org/r/20221115200832.706370-5-jamie@jamieiles.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6b48a3ae98439..5ae4f7ce2a056 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -129,6 +129,11 @@ config RISCV
 	select TRACE_IRQFLAGS_SUPPORT
 	select UACCESS_MEMCPY if !MMU
 	select ZONE_DMA32 if 64BIT
+	select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8)
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
+	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 
 config ARCH_MMAP_RND_BITS_MIN
 	default 18 if 64BIT
@@ -274,11 +279,6 @@ config ARCH_RV64I
 	bool "RV64I"
 	select 64BIT
 	select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
-	select HAVE_DYNAMIC_FTRACE if !XIP_KERNEL && MMU && $(cc-option,-fpatchable-function-entry=8)
-	select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
-	select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL
-	select HAVE_FUNCTION_GRAPH_TRACER
-	select HAVE_FUNCTION_TRACER if !XIP_KERNEL
 	select SWIOTLB if MMU
 
 endchoice
-- 
GitLab


From ef40757743b47cc95de9b4ed41525c94f8dc73d9 Mon Sep 17 00:00:00 2001
From: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Date: Fri, 2 Dec 2022 20:36:14 +0800
Subject: [PATCH 3291/4122] KVM: x86: fix APICv/x2AVIC disabled when vm reboot
 by itself

When a VM reboots itself, the reset process will result in
an ioctl(KVM_SET_LAPIC, ...) to disable x2APIC mode and set
the xAPIC id of the vCPU to its default value, which is the
vCPU id.

That will be handled in KVM as follows:

     kvm_vcpu_ioctl_set_lapic
       kvm_apic_set_state
	  kvm_lapic_set_base  =>  disable X2APIC mode
	    kvm_apic_state_fixup
	      kvm_lapic_xapic_id_updated
	        kvm_xapic_id(apic) != apic->vcpu->vcpu_id
		kvm_set_apicv_inhibit(APICV_INHIBIT_REASON_APIC_ID_MODIFIED)
	   memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s))  => update APIC_ID

When kvm_apic_set_state invokes kvm_lapic_set_base to disable
x2APIC mode, the old 32-bit x2APIC id is still present rather
than the 8-bit xAPIC id.  kvm_lapic_xapic_id_updated will set the
APICV_INHIBIT_REASON_APIC_ID_MODIFIED bit and disable APICv/x2AVIC.

Instead, kvm_lapic_xapic_id_updated must be called after APIC_ID is
changed.

In fact, this fixes another small issue in the code in that
potential changes to a vCPU's xAPIC ID need not be tracked for
KVM_GET_LAPIC.

Fixes: 3743c2f02517 ("KVM: x86: inhibit APICv/AVIC on changes to APIC ID or APIC base")
Signed-off-by: Yuan ZhaoXiong <yuanzhaoxiong@baidu.com>
Message-Id: <1669984574-32692-1-git-send-email-yuanzhaoxiong@baidu.com>
Cc: stable@vger.kernel.org
Reported-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/lapic.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1bb63746e9910..8224ac8b617a4 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -2724,8 +2724,6 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
 			icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
 			__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
 		}
-	} else {
-		kvm_lapic_xapic_id_updated(vcpu->arch.apic);
 	}
 
 	return 0;
@@ -2761,6 +2759,9 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 	}
 	memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
 
+	if (!apic_x2apic_mode(apic))
+		kvm_lapic_xapic_id_updated(apic);
+
 	atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
 	kvm_recalculate_apic_map(vcpu->kvm);
 	kvm_apic_set_version(vcpu);
-- 
GitLab


From ef16b2dff4d1c71eb32b306d400d4c0f3a383ba7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:44 +0000
Subject: [PATCH 3292/4122] KVM: arm64: selftests: Enable single-step without a
 "full" ucall()

Add a new ucall hook, GUEST_UCALL_NONE(), to allow tests to make ucalls
without allocating a ucall struct, and use it to enable single-step
in ARM's debug-exceptions test.  Like the disable single-step path, the
enabling path also needs to ensure that no exclusive access sequences are
attempted after enabling single-step, as the exclusive monitor is cleared
on ERET from the debug exception taken to EL2.

The test currently "works" because clear_bit() isn't actually an atomic
operation... yet.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-4-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/aarch64/debug-exceptions.c  | 21 ++++++++++---------
 .../selftests/kvm/include/ucall_common.h      |  8 +++++++
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index d86c4e4d1c826..c62ec4d7f6a38 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -239,10 +239,6 @@ static void guest_svc_handler(struct ex_regs *regs)
 	svc_addr = regs->pc;
 }
 
-enum single_step_op {
-	SINGLE_STEP_ENABLE = 0,
-};
-
 static void guest_code_ss(int test_cnt)
 {
 	uint64_t i;
@@ -253,8 +249,16 @@ static void guest_code_ss(int test_cnt)
 		w_bvr = i << 2;
 		w_wvr = i << 2;
 
-		/* Enable Single Step execution */
-		GUEST_SYNC(SINGLE_STEP_ENABLE);
+		/*
+		 * Enable Single Step execution.  Note!  This _must_ be a bare
+		 * ucall as the ucall() path uses atomic operations to manage
+		 * the ucall structures, and the built-in "atomics" are usually
+		 * implemented via exclusive access instructions.  The exlusive
+		 * monitor is cleared on ERET, and so taking debug exceptions
+		 * during a LDREX=>STREX sequence will prevent forward progress
+		 * and hang the guest/test.
+		 */
+		GUEST_UCALL_NONE();
 
 		/*
 		 * The userspace will verify that the pc is as expected during
@@ -356,12 +360,9 @@ void test_single_step_from_userspace(int test_cnt)
 				break;
 			}
 
-			TEST_ASSERT(cmd == UCALL_SYNC,
+			TEST_ASSERT(cmd == UCALL_NONE,
 				    "Unexpected ucall cmd 0x%lx", cmd);
 
-			TEST_ASSERT(uc.args[1] == SINGLE_STEP_ENABLE,
-				    "Unexpected ucall action 0x%lx", uc.args[1]);
-
 			debug.control = KVM_GUESTDBG_ENABLE |
 					KVM_GUESTDBG_SINGLESTEP;
 			ss_enable = true;
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
index bdd373189a777..1a6aaef5ccae1 100644
--- a/tools/testing/selftests/kvm/include/ucall_common.h
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -35,6 +35,14 @@ void ucall(uint64_t cmd, int nargs, ...);
 uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
 void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
 
+/*
+ * Perform userspace call without any associated data.  This bare call avoids
+ * allocating a ucall struct, which can be useful if the atomic operations in
+ * the full ucall() are problematic and/or unwanted.  Note, this will come out
+ * as UCALL_NONE on the backend.
+ */
+#define GUEST_UCALL_NONE()	ucall_arch_do_ucall((vm_vaddr_t)NULL)
+
 #define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4)	\
 				ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
 #define GUEST_SYNC(stage)	ucall(UCALL_SYNC, 2, "hello", stage)
-- 
GitLab


From 7f2b47f22b825c16d9843e6e78bbb2370d2c31a0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:45 +0000
Subject: [PATCH 3293/4122] tools: Take @bit as an "unsigned long" in
 {clear,set}_bit() helpers

Take @bit as an unsigned long instead of a signed int in clear_bit() and
set_bit() so that they match the double-underscore versions, __clear_bit()
and __set_bit().  This will allow converting users that really don't want
atomic operations to the double-underscores without introducing a
functional change, which will in turn allow making {clear,set}_bit()
atomic (as advertised).

Practically speaking, this _should_ have no functional impact.  KVM's
selftests usage is either hardcoded (Hyper-V tests) or is artificially
limited (arch_timer test and dirty_log test).  In KVM, dirty_log test is
the only mildly interesting case as it's use indirectly restricted to
unsigned 32-bit values, but in theory it could generate a negative value
when cast to a signed int.  But in that case, taking an "unsigned long"
is actually a bug fix.

Perf's usage is more difficult to audit, but any code that is affected
by the switch is likely already broken.  perf_header__{set,clear}_feat()
and perf_file_header__read() effectively use only hardcoded enums with
small, positive values, atom_new() passes an unsigned long, but its value
is capped at 128 via NR_ATOM_PER_PAGE, etc...

The only real potential for breakage is in the perf flows that take a
"cpu", but it's unlikely perf is subtly relying on a negative index into
bitmaps, e.g. "cpu" can be "-1", but only as "not valid" placeholder.

Note, tools/testing/nvdimm/ makes heavy use of set_bit(), but that code
builds into a kernel module of sorts, i.e. pulls in all of the kernel's
header and so is getting the kernel's atomic set_bit().  The NVDIMM test
usage of atomics is likely unnecessary, e.g. ndtest_dimm_register() sets
bits in a local variable, but that's neither here nor there as far as
this change is concerned.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-5-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/include/asm-generic/bitops/atomic.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index 2f6ea28764a79..f64b049d236c7 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -5,12 +5,12 @@
 #include <asm/types.h>
 #include <asm/bitsperlong.h>
 
-static inline void set_bit(int nr, unsigned long *addr)
+static inline void set_bit(unsigned long nr, unsigned long *addr)
 {
 	addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
 }
 
-static inline void clear_bit(int nr, unsigned long *addr)
+static inline void clear_bit(unsigned long nr, unsigned long *addr)
 {
 	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
 }
-- 
GitLab


From 75d7ba32f9829e778484cf6e96e6e8f80914b0b3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:46 +0000
Subject: [PATCH 3294/4122] perf tools: Use dedicated non-atomic clear/set bit
 helpers

Use the dedicated non-atomic helpers for {clear,set}_bit() and their
test variants, i.e. the double-underscore versions.  Depsite being
defined in atomic.h, and despite the kernel versions being atomic in the
kernel, tools' {clear,set}_bit() helpers aren't actually atomic.  Move
to the double-underscore versions so that the versions that are expected
to be atomic (for kernel developers) can be made atomic without affecting
users that don't want atomic operations.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Message-Id: <20221119013450.2643007-6-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/perf/bench/find-bit-bench.c                      | 2 +-
 tools/perf/builtin-c2c.c                               | 6 +++---
 tools/perf/builtin-kwork.c                             | 6 +++---
 tools/perf/builtin-record.c                            | 6 +++---
 tools/perf/builtin-sched.c                             | 2 +-
 tools/perf/tests/bitmap.c                              | 2 +-
 tools/perf/tests/mem2node.c                            | 2 +-
 tools/perf/util/affinity.c                             | 4 ++--
 tools/perf/util/header.c                               | 8 ++++----
 tools/perf/util/mmap.c                                 | 6 +++---
 tools/perf/util/pmu.c                                  | 2 +-
 tools/perf/util/scripting-engines/trace-event-perl.c   | 2 +-
 tools/perf/util/scripting-engines/trace-event-python.c | 2 +-
 tools/perf/util/session.c                              | 2 +-
 tools/perf/util/svghelper.c                            | 2 +-
 15 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index 22b5cfe970237..d103c3136983d 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits)
 		bitmap_zero(to_test, num_bits);
 		skip = num_bits / set_bits;
 		for (i = 0; i < num_bits; i += skip)
-			set_bit(i, to_test);
+			__set_bit(i, to_test);
 
 		for (i = 0; i < outer_iterations; i++) {
 			old = accumulator;
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a9190458d2d50..52d94c7dd8366 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
 		      "WARNING: no sample cpu value"))
 		return;
 
-	set_bit(sample->cpu, c2c_he->cpuset);
+	__set_bit(sample->cpu, c2c_he->cpuset);
 }
 
 static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
@@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
 	if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
 		return;
 
-	set_bit(node, c2c_he->nodeset);
+	__set_bit(node, c2c_he->nodeset);
 
 	if (c2c_he->paddr != sample->phys_addr) {
 		c2c_he->paddr_cnt++;
@@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session)
 			continue;
 
 		perf_cpu_map__for_each_cpu(cpu, idx, map) {
-			set_bit(cpu.cpu, set);
+			__set_bit(cpu.cpu, set);
 
 			if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
 				return -EINVAL;
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index fb8c63656ad89..1f63e24f704e2 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -216,7 +216,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork,
 	list_add_tail(&page->list, &kwork->atom_page_list);
 
 found_atom:
-	set_bit(i, page->bitmap);
+	__set_bit(i, page->bitmap);
 	atom->time = sample->time;
 	atom->prev = NULL;
 	atom->page_addr = page;
@@ -229,8 +229,8 @@ static void atom_free(struct kwork_atom *atom)
 	if (atom->prev != NULL)
 		atom_free(atom->prev);
 
-	clear_bit(atom->bit_inpage,
-		  ((struct kwork_atom_page *)atom->page_addr)->bitmap);
+	__clear_bit(atom->bit_inpage,
+		    ((struct kwork_atom_page *)atom->page_addr)->bitmap);
 }
 
 static void atom_del(struct kwork_atom *atom)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index e128b855dddec..2711c141c5bf0 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
 		/* Return ENODEV is input cpu is greater than max cpu */
 		if ((unsigned long)cpu.cpu > mask->nbits)
 			return -ENODEV;
-		set_bit(cpu.cpu, mask->bits);
+		__set_bit(cpu.cpu, mask->bits);
 	}
 
 	return 0;
@@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
 	pr_debug("nr_threads: %d\n", rec->nr_threads);
 
 	for (t = 0; t < rec->nr_threads; t++) {
-		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
-		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
+		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
+		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
 		if (verbose) {
 			pr_debug("thread_masks[%d]: ", t);
 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f93737eef07ba..86e18575c9bee 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
 
 	if (sched->map.comp) {
 		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
-		if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
+		if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
 			sched->map.comp_cpus[cpus_nr++] = this_cpu;
 			new_cpu = true;
 		}
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 4965dd6669566..0173f5402a35b 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 
 	if (map && bm) {
 		for (i = 0; i < perf_cpu_map__nr(map); i++)
-			set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+			__set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
 	}
 
 	if (map)
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index 4c96829510c91..a0e88c4961074 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 		int i;
 
 		perf_cpu_map__for_each_cpu(cpu, i, map)
-			set_bit(cpu.cpu, bm);
+			__set_bit(cpu.cpu, bm);
 	}
 
 	if (map)
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 4ee96b3c755b7..38dc4524b7e86 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu)
 		return;
 
 	a->changed = true;
-	set_bit(cpu, a->sched_cpus);
+	__set_bit(cpu, a->sched_cpus);
 	/*
 	 * We ignore errors because affinity is just an optimization.
 	 * This could happen for example with isolated CPUs or cpusets.
 	 * In this case the IPIs inside the kernel's perf API still work.
 	 */
 	sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
-	clear_bit(cpu, a->sched_cpus);
+	__clear_bit(cpu, a->sched_cpus);
 }
 
 static void __affinity__cleanup(struct affinity *a)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 98dfaf84bd137..dc2ae397d400e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -79,12 +79,12 @@ struct perf_file_attr {
 
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
-	set_bit(feat, header->adds_features);
+	__set_bit(feat, header->adds_features);
 }
 
 void perf_header__clear_feat(struct perf_header *header, int feat)
 {
-	clear_bit(feat, header->adds_features);
+	__clear_bit(feat, header->adds_features);
 }
 
 bool perf_header__has_feat(const struct perf_header *header, int feat)
@@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
 	rewinddir(dir);
 
 	for_each_memory(phys, dir) {
-		set_bit(phys, n->set);
+		__set_bit(phys, n->set);
 	}
 
 	closedir(dir);
@@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header,
 
 		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
 			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
-			set_bit(HEADER_BUILD_ID, header->adds_features);
+			__set_bit(HEADER_BUILD_ID, header->adds_features);
 		}
 	}
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index a4dff881be39b..49093b21ee2da 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i
 			pr_err("Failed to allocate node mask for mbind: error %m\n");
 			return -1;
 		}
-		set_bit(node_index, node_mask);
+		__set_bit(node_index, node_mask);
 		if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
 			pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
 				data, data + mmap_len, node_index);
@@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
 	for (idx = 0; idx < nr_cpus; idx++) {
 		cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
 		if (cpu__get_node(cpu) == node)
-			set_bit(cpu.cpu, mask->bits);
+			__set_bit(cpu.cpu, mask->bits);
 	}
 }
 
@@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
 	if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
 		build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
 	else if (mp->affinity == PERF_AFFINITY_CPU)
-		set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
+		__set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
 
 	return 0;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 03284059175f7..371d8f7a3de3c 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1513,7 +1513,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
 
 	memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
 	for (b = from; b <= to; b++)
-		set_bit(b, bits);
+		__set_bit(b, bits);
 }
 
 void perf_pmu__del_formats(struct list_head *formats)
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index a5d945415bbc1..5b602b6d46854 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 
 	sprintf(handler, "%s::%s", event->system, event->name);
 
-	if (!test_and_set_bit(event->id, events_defined))
+	if (!__test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler, event->print_fmt.args);
 
 	s = nsecs / NSEC_PER_SEC;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1f2040f36d4e9..0f229fa29163e 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -933,7 +933,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 
 	sprintf(handler_name, "%s__%s", event->system, event->name);
 
-	if (!test_and_set_bit(event->id, events_defined))
+	if (!__test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler_name, event->print_fmt.args);
 
 	handler = get_handler(handler_name);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1a4f10de29ffe..873fd51ec1b21 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2748,7 +2748,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 			goto out_delete_map;
 		}
 
-		set_bit(cpu.cpu, cpu_bitmap);
+		__set_bit(cpu.cpu, cpu_bitmap);
 	}
 
 	err = 0;
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1e0c731fc5396..5c62d3118c41f 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
 			break;
 		}
 
-		set_bit(c.cpu, cpumask_bits(b));
+		__set_bit(c.cpu, cpumask_bits(b));
 	}
 
 	perf_cpu_map__put(m);
-- 
GitLab


From 03a0c819e71755398d59993b9adee203544617d5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:47 +0000
Subject: [PATCH 3295/4122] KVM: selftests: Use non-atomic clear/set bit
 helpers in KVM tests

Use the dedicated non-atomic helpers for {clear,set}_bit() and their
test variants, i.e. the double-underscore versions.  Depsite being
defined in atomic.h, and despite the kernel versions being atomic in the
kernel, tools' {clear,set}_bit() helpers aren't actually atomic.  Move
to the double-underscore versions so that the versions that are expected
to be atomic (for kernel developers) can be made atomic without affecting
users that don't want atomic operations.

Leave the usage in ucall_free() as-is, it's the one place in tools/ that
actually wants/needs atomic behavior.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-7-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../selftests/kvm/aarch64/arch_timer.c        |  2 +-
 tools/testing/selftests/kvm/dirty_log_test.c  | 34 +++++++++----------
 .../selftests/kvm/x86_64/hyperv_evmcs.c       |  4 +--
 .../selftests/kvm/x86_64/hyperv_svm_test.c    |  4 +--
 4 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index f2a96779716a8..26556a266021e 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -222,7 +222,7 @@ static void *test_vcpu_run(void *arg)
 
 	/* Currently, any exit from guest is an indication of completion */
 	pthread_mutex_lock(&vcpu_done_map_lock);
-	set_bit(vcpu_idx, vcpu_done_map);
+	__set_bit(vcpu_idx, vcpu_done_map);
 	pthread_mutex_unlock(&vcpu_done_map_lock);
 
 	switch (get_ucall(vcpu, &uc)) {
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index a38c4369fb8ed..a75548865f6b4 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -44,20 +44,20 @@
 # define BITOP_LE_SWIZZLE	((BITS_PER_LONG-1) & ~0x7)
 # define test_bit_le(nr, addr) \
 	test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define set_bit_le(nr, addr) \
-	set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define clear_bit_le(nr, addr) \
-	clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_set_bit_le(nr, addr) \
-	test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_clear_bit_le(nr, addr) \
-	test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __set_bit_le(nr, addr) \
+	__set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __clear_bit_le(nr, addr) \
+	__clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_set_bit_le(nr, addr) \
+	__test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_clear_bit_le(nr, addr) \
+	__test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
 #else
-# define test_bit_le		test_bit
-# define set_bit_le		set_bit
-# define clear_bit_le		clear_bit
-# define test_and_set_bit_le	test_and_set_bit
-# define test_and_clear_bit_le	test_and_clear_bit
+# define test_bit_le			test_bit
+# define __set_bit_le			__set_bit
+# define __clear_bit_le			__clear_bit
+# define __test_and_set_bit_le		__test_and_set_bit
+# define __test_and_clear_bit_le	__test_and_clear_bit
 #endif
 
 #define TEST_DIRTY_RING_COUNT		65536
@@ -305,7 +305,7 @@ static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
 		TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
 			    "0x%llx >= 0x%x", cur->offset, num_pages);
 		//pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
-		set_bit_le(cur->offset, bitmap);
+		__set_bit_le(cur->offset, bitmap);
 		dirty_ring_last_page = cur->offset;
 		dirty_gfn_set_collected(cur);
 		(*fetch_index)++;
@@ -560,7 +560,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 		value_ptr = host_test_mem + page * host_page_size;
 
 		/* If this is a special page that we were tracking... */
-		if (test_and_clear_bit_le(page, host_bmap_track)) {
+		if (__test_and_clear_bit_le(page, host_bmap_track)) {
 			host_track_next_count++;
 			TEST_ASSERT(test_bit_le(page, bmap),
 				    "Page %"PRIu64" should have its dirty bit "
@@ -568,7 +568,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 				    page);
 		}
 
-		if (test_and_clear_bit_le(page, bmap)) {
+		if (__test_and_clear_bit_le(page, bmap)) {
 			bool matched;
 
 			host_dirty_count++;
@@ -661,7 +661,7 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
 				 * should report its dirtyness in the
 				 * next run
 				 */
-				set_bit_le(page, host_bmap_track);
+				__set_bit_le(page, host_bmap_track);
 			}
 		}
 	}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
index ba09d300c9539..af29e5776d403 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -142,7 +142,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
 	/* Intercept RDMSR 0xc0000100 */
 	vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
 		CPU_BASED_USE_MSR_BITMAPS);
-	set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	__set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
 	GUEST_ASSERT(!vmresume());
 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
 	current_evmcs->guest_rip += 2; /* rdmsr */
@@ -154,7 +154,7 @@ void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
 	current_evmcs->guest_rip += 2; /* rdmsr */
 
 	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
-	set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+	__set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
 	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
 	current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
 	GUEST_ASSERT(!vmresume());
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index 3b3cc94ba8e47..68a7d354ea070 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -103,7 +103,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
 
 	/* Intercept RDMSR 0xc0000100 */
 	vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
-	set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+	__set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
 	run_guest(vmcb, svm->vmcb_gpa);
 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
 	vmcb->save.rip += 2; /* rdmsr */
@@ -115,7 +115,7 @@ static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
 	vmcb->save.rip += 2; /* rdmsr */
 
 	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
-	set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+	__set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
 	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
 	vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
 	run_guest(vmcb, svm->vmcb_gpa);
-- 
GitLab


From 7f32a6cf8b5a8067537f25a1f12744292431aae1 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:48 +0000
Subject: [PATCH 3296/4122] tools: Drop conflicting non-atomic
 test_and_{clear,set}_bit() helpers

Drop tools' non-atomic test_and_set_bit() and test_and_clear_bit() helpers
now that all users are gone.  The names will be claimed in the future for
atomic versions.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-8-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/include/linux/bitmap.h | 34 ----------------------------------
 1 file changed, 34 deletions(-)

diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h
index 65d0747c5205c..f3566ea0f932e 100644
--- a/tools/include/linux/bitmap.h
+++ b/tools/include/linux/bitmap.h
@@ -77,40 +77,6 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
 		__bitmap_or(dst, src1, src2, nbits);
 }
 
-/**
- * test_and_set_bit - Set a bit and return its old value
- * @nr: Bit to set
- * @addr: Address to count from
- */
-static inline int test_and_set_bit(int nr, unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old;
-
-	old = *p;
-	*p = old | mask;
-
-	return (old & mask) != 0;
-}
-
-/**
- * test_and_clear_bit - Clear a bit and return its old value
- * @nr: Bit to clear
- * @addr: Address to count from
- */
-static inline int test_and_clear_bit(int nr, unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-	unsigned long old;
-
-	old = *p;
-	*p = old & ~mask;
-
-	return (old & mask) != 0;
-}
-
 /**
  * bitmap_zalloc - Allocate bitmap
  * @nbits: Number of bits
-- 
GitLab


From 36293352ff433061d45d52784983e44950c09ae3 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:49 +0000
Subject: [PATCH 3297/4122] tools: Drop "atomic_" prefix from atomic
 test_and_set_bit()

Drop the "atomic_" prefix from tools' atomic_test_and_set_bit() to
match the kernel nomenclature where test_and_set_bit() is atomic,
and __test_and_set_bit() provides the non-atomic variant.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-9-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/arch/x86/include/asm/atomic.h            | 3 +--
 tools/include/asm-generic/atomic-gcc.h         | 2 +-
 tools/testing/selftests/kvm/lib/ucall_common.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
index 01cc27ec4520d..a42733af7d51a 100644
--- a/tools/arch/x86/include/asm/atomic.h
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -71,10 +71,9 @@ static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new)
 	return cmpxchg(&v->counter, old, new);
 }
 
-static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
+static inline int test_and_set_bit(long nr, unsigned long *addr)
 {
 	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c");
-
 }
 
 #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
index 6daa68bf5b9e0..37ef522aaac48 100644
--- a/tools/include/asm-generic/atomic-gcc.h
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -70,7 +70,7 @@ static inline int atomic_cmpxchg(atomic_t *v, int oldval, int newval)
 	return cmpxchg(&(v)->counter, oldval, newval);
 }
 
-static inline int atomic_test_and_set_bit(long nr, unsigned long *addr)
+static inline int test_and_set_bit(long nr, unsigned long *addr)
 {
 	unsigned long mask = BIT_MASK(nr);
 	long old;
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index fcae96461e460..820ce6c828299 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -44,7 +44,7 @@ static struct ucall *ucall_alloc(void)
 	GUEST_ASSERT(ucall_pool);
 
 	for (i = 0; i < KVM_MAX_VCPUS; ++i) {
-		if (!atomic_test_and_set_bit(i, ucall_pool->in_use)) {
+		if (!test_and_set_bit(i, ucall_pool->in_use)) {
 			uc = &ucall_pool->ucalls[i];
 			memset(uc->args, 0, sizeof(uc->args));
 			return uc;
-- 
GitLab


From bb056c0f080a3d15c2a9ad9057a8b542d45e4ba0 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:50 +0000
Subject: [PATCH 3298/4122] tools: KVM: selftests: Convert clear/set_bit() to
 actual atomics

Convert {clear,set}_bit() to atomics as KVM's ucall implementation relies
on clear_bit() being atomic, they are defined in atomic.h, and the same
helpers in the kernel proper are atomic.

KVM's ucall infrastructure is the only user of clear_bit() in tools/, and
there are no true set_bit() users.  tools/testing/nvdimm/ does make heavy
use of set_bit(), but that code builds into a kernel module of sorts, i.e.
pulls in all of the kernel's header and so is already getting the kernel's
atomic set_bit().

Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221119013450.2643007-10-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/arch/x86/include/asm/atomic.h       |  5 +++++
 tools/include/asm-generic/atomic-gcc.h    | 11 +++++++++++
 tools/include/asm-generic/bitops/atomic.h | 15 ++++++---------
 3 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
index a42733af7d51a..365cf182df12b 100644
--- a/tools/arch/x86/include/asm/atomic.h
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -76,4 +76,9 @@ static inline int test_and_set_bit(long nr, unsigned long *addr)
 	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(bts), *addr, "Ir", nr, "%0", "c");
 }
 
+static inline int test_and_clear_bit(long nr, unsigned long *addr)
+{
+	GEN_BINARY_RMWcc(LOCK_PREFIX __ASM_SIZE(btc), *addr, "Ir", nr, "%0", "c");
+}
+
 #endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
index 37ef522aaac48..9b3c528bab924 100644
--- a/tools/include/asm-generic/atomic-gcc.h
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -81,4 +81,15 @@ static inline int test_and_set_bit(long nr, unsigned long *addr)
 	return !!(old & mask);
 }
 
+static inline int test_and_clear_bit(long nr, unsigned long *addr)
+{
+	unsigned long mask = BIT_MASK(nr);
+	long old;
+
+	addr += BIT_WORD(nr);
+
+	old = __sync_fetch_and_and(addr, ~mask);
+	return !!(old & mask);
+}
+
 #endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm-generic/bitops/atomic.h b/tools/include/asm-generic/bitops/atomic.h
index f64b049d236c7..ab37a221b41a4 100644
--- a/tools/include/asm-generic/bitops/atomic.h
+++ b/tools/include/asm-generic/bitops/atomic.h
@@ -5,14 +5,11 @@
 #include <asm/types.h>
 #include <asm/bitsperlong.h>
 
-static inline void set_bit(unsigned long nr, unsigned long *addr)
-{
-	addr[nr / __BITS_PER_LONG] |= 1UL << (nr % __BITS_PER_LONG);
-}
-
-static inline void clear_bit(unsigned long nr, unsigned long *addr)
-{
-	addr[nr / __BITS_PER_LONG] &= ~(1UL << (nr % __BITS_PER_LONG));
-}
+/*
+ * Just alias the test versions, all of the compiler built-in atomics "fetch",
+ * and optimizing compile-time constants on x86 isn't worth the complexity.
+ */
+#define set_bit test_and_set_bit
+#define clear_bit test_and_clear_bit
 
 #endif /* _TOOLS_LINUX_ASM_GENERIC_BITOPS_ATOMIC_H_ */
-- 
GitLab


From 4bf46e35826d8dc4fc0a103dd0ccd94c072a4c6a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Thu, 1 Dec 2022 09:13:54 +0000
Subject: [PATCH 3299/4122] KVM: selftests: Fix spelling mistake
 "probabalistic" -> "probabilistic"

There is a spelling mistake in some help text. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Message-Id: <20221201091354.1613652-1-colin.i.king@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/dirty_log_perf_test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index c33e89012ae6f..e9d6d1aecf89c 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -398,7 +398,7 @@ static void help(char *name)
 	printf(" -x: Split the memory region into this number of memslots.\n"
 	       "     (default: 1)\n");
 	printf(" -w: specify the percentage of pages which should be written to\n"
-	       "     as an integer from 0-100 inclusive. This is probabalistic,\n"
+	       "     as an integer from 0-100 inclusive. This is probabilistic,\n"
 	       "     so -w X means each page has an X%% chance of writing\n"
 	       "     and a (100-X)%% chance of reading.\n"
 	       "     (default: 100 i.e. all pages are written to.)\n");
-- 
GitLab


From 6925ba3d9b8ccf1989b4cf13d6f0d7e341899481 Mon Sep 17 00:00:00 2001
From: Hal Feng <hal.feng@starfivetech.com>
Date: Fri, 18 Nov 2022 09:17:14 +0800
Subject: [PATCH 3300/4122] RISC-V: defconfig: Enable CONFIG_SERIAL_8250_DW

Add CONFIG_SERIAL_8250_DW=y, which is a necessary option for
StarFive JH7110 and JH7100 SoCs to boot with serial ports.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Signed-off-by: Hal Feng <hal.feng@starfivetech.com>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20221118011714.70877-9-hal.feng@starfivetech.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index daba5d7438620..74ed7037314fd 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -123,6 +123,7 @@ CONFIG_MICROSEMI_PHY=y
 CONFIG_INPUT_MOUSEDEV=y
 CONFIG_SERIAL_8250=y
 CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_VIRTIO_CONSOLE=y
 CONFIG_HW_RANDOM=y
-- 
GitLab


From 0c2a04128f500ea4dfc6bc449507005b998b76ab Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 2 Dec 2022 13:34:45 -0500
Subject: [PATCH 3301/4122] KVM: x86: remove unnecessary exports

Several symbols are not used by vendor modules but still exported.
Removing them ensures that new coupling between kvm.ko and kvm-*.ko
is noticed and reviewed.

Co-developed-by: Sean Christopherson <seanjc@google.com>
Co-developed-by: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Like Xu <like.xu.linux@gmail.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/hyperv.c | 1 -
 arch/x86/kvm/irq.c    | 2 --
 arch/x86/kvm/lapic.c  | 3 ---
 arch/x86/kvm/x86.c    | 8 --------
 4 files changed, 14 deletions(-)

diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c
index 2c7f2a26421e8..cc3e8c7d08500 100644
--- a/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@ -898,7 +898,6 @@ bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
 		return false;
 	return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
 }
-EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
 
 int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index d8d50558f1657..a70952eca9058 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -31,7 +31,6 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 
 	return r;
 }
-EXPORT_SYMBOL(kvm_cpu_has_pending_timer);
 
 /*
  * check if there is a pending userspace external interrupt
@@ -150,7 +149,6 @@ void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu)
 	if (kvm_xen_timer_enabled(vcpu))
 		kvm_xen_inject_timer_irqs(vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_inject_pending_timer_irqs);
 
 void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 8224ac8b617a4..4efdb4a4d72c6 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -160,7 +160,6 @@ bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
 	       && !(kvm_mwait_in_guest(vcpu->kvm) ||
 		    kvm_can_post_timer_interrupt(vcpu));
 }
-EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
 
 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
 {
@@ -1914,7 +1913,6 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 
 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
 }
-EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 
 static void cancel_hv_timer(struct kvm_lapic *apic)
 {
@@ -2432,7 +2430,6 @@ void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
 		apic->isr_count = count_vectors(apic->regs + APIC_ISR);
 	}
 }
-EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
 
 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
 {
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 152ea4993b76f..4825773886f9d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -463,7 +463,6 @@ u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.apic_base;
 }
-EXPORT_SYMBOL_GPL(kvm_get_apic_base);
 
 enum lapic_mode kvm_get_apic_mode(struct kvm_vcpu *vcpu)
 {
@@ -491,7 +490,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	kvm_recalculate_apic_map(vcpu->kvm);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kvm_set_apic_base);
 
 /*
  * Handle a fault on a hardware virtualization (VMX or SVM) instruction.
@@ -782,7 +780,6 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
 		kvm_queue_exception_e_p(vcpu, PF_VECTOR, fault->error_code,
 					fault->address);
 }
-EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
 
 void kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu,
 				    struct x86_exception *fault)
@@ -811,7 +808,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu)
 	atomic_inc(&vcpu->arch.nmi_queued);
 	kvm_make_request(KVM_REQ_NMI, vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_inject_nmi);
 
 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code)
 {
@@ -836,7 +832,6 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl)
 	kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
 	return false;
 }
-EXPORT_SYMBOL_GPL(kvm_require_cpl);
 
 bool kvm_require_dr(struct kvm_vcpu *vcpu, int dr)
 {
@@ -2069,7 +2064,6 @@ int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
 {
 	return kvm_skip_emulated_instruction(vcpu);
 }
-EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
 
 int kvm_emulate_invd(struct kvm_vcpu *vcpu)
 {
@@ -2515,7 +2509,6 @@ u64 kvm_scale_tsc(u64 tsc, u64 ratio)
 
 	return _tsc;
 }
-EXPORT_SYMBOL_GPL(kvm_scale_tsc);
 
 static u64 kvm_compute_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc)
 {
@@ -12068,7 +12061,6 @@ bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
 {
 	return vcpu->kvm->arch.bsp_vcpu_id == vcpu->vcpu_id;
 }
-EXPORT_SYMBOL_GPL(kvm_vcpu_is_reset_bsp);
 
 bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 {
-- 
GitLab


From 74bee0cad8dcd8ddec5e763c369239fc5990676a Mon Sep 17 00:00:00 2001
From: Jim Mattson <jmattson@google.com>
Date: Fri, 7 Oct 2022 15:16:44 -0700
Subject: [PATCH 3302/4122] KVM: x86: Advertise that the SMM_CTL MSR is not
 supported

CPUID.80000021H:EAX[bit 9] indicates that the SMM_CTL MSR (0xc0010116) is
not supported. This defeature can be advertised by KVM_GET_SUPPORTED_CPUID
regardless of whether or not the host enumerates it; currently it will be
included only if the host enumerates at least leaf 8000001DH, due to a
preexisting bug in QEMU that KVM has to work around (commit f751d8eac176,
"KVM: x86: work around QEMU issue with synthetic CPUID leaves", 2022-04-29).

Signed-off-by: Jim Mattson <jmattson@google.com>
Message-Id: <20221007221644.138355-1-jmattson@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 723502181a3a9..0b5bf013fcb8e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -1233,8 +1233,12 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
 		 * Other defined bits are for MSRs that KVM does not expose:
 		 *   EAX      3      SPCL, SMM page configuration lock
 		 *   EAX      13     PCMSR, Prefetch control MSR
+		 *
+		 * KVM doesn't support SMM_CTL.
+		 *   EAX       9     SMM_CTL MSR is not supported
 		 */
 		entry->eax &= BIT(0) | BIT(2) | BIT(6);
+		entry->eax |= BIT(9);
 		if (static_cpu_has(X86_FEATURE_LFENCE_RDTSC))
 			entry->eax |= BIT(2);
 		if (!static_cpu_has_bug(X86_BUG_NULL_SEG))
-- 
GitLab


From d33deda095d3637d218e7eed441633b2a01e1413 Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Mon, 24 Oct 2022 09:47:24 +0000
Subject: [PATCH 3303/4122] riscv/mm: hugepage's PG_dcache_clean flag is only
 set in head page

HugeTLB pages are always fully mapped, so only setting head page's
PG_dcache_clean flag is enough.

Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Link: https://lore.kernel.org/lkml/20220331065640.5777-2-songmuchun@bytedance.com/
Link: https://lore.kernel.org/r/20221024094725.3054311-2-tongtiangen@huawei.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/cacheflush.h | 7 +++++++
 arch/riscv/mm/cacheflush.c          | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 8a5c246b0a216..c172d05de4740 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -17,6 +17,13 @@ static inline void local_flush_icache_all(void)
 
 static inline void flush_dcache_page(struct page *page)
 {
+	/*
+	 * HugeTLB pages are always fully mapped and only head page will be
+	 * set PG_dcache_clean (see comments in flush_icache_pte()).
+	 */
+	if (PageHuge(page))
+		page = compound_head(page);
+
 	if (test_bit(PG_dcache_clean, &page->flags))
 		clear_bit(PG_dcache_clean, &page->flags);
 }
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 6cb7d96ad9c7b..062559c04fc3a 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -82,6 +82,13 @@ void flush_icache_pte(pte_t pte)
 {
 	struct page *page = pte_page(pte);
 
+	/*
+	 * HugeTLB pages are always fully mapped, so only setting head page's
+	 * PG_dcache_clean flag is enough.
+	 */
+	if (PageHuge(page))
+		page = compound_head(page);
+
 	if (!test_and_set_bit(PG_dcache_clean, &page->flags))
 		flush_icache_all();
 }
-- 
GitLab


From d8bf77a1dc3079692f54be3087a5fd16d90027b0 Mon Sep 17 00:00:00 2001
From: Tong Tiangen <tongtiangen@huawei.com>
Date: Mon, 24 Oct 2022 09:47:25 +0000
Subject: [PATCH 3304/4122] riscv/mm: add arch hook arch_clear_hugepage_flags

With the PG_arch_1 we keep track if the page's data cache is clean,
architecture rely on this property to treat new pages as dirty with
respect to the data cache and perform the flushing before mapping the pages
into userspace.

This patch adds a new architecture hook, arch_clear_hugepage_flags,so that
architectures which rely on the page flags being in a particular state for
fresh allocations can adjust the flags accordingly when a page is freed
into the pool.

Fixes: 9e953cda5cdf ("riscv: Introduce huge page support for 32/64bit kernel")
Signed-off-by: Tong Tiangen <tongtiangen@huawei.com>
Link: https://lore.kernel.org/r/20221024094725.3054311-3-tongtiangen@huawei.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/hugetlb.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index a5c2ca1d1cd8b..ec19d6afc8965 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -5,4 +5,10 @@
 #include <asm-generic/hugetlb.h>
 #include <asm/page.h>
 
+static inline void arch_clear_hugepage_flags(struct page *page)
+{
+	clear_bit(PG_dcache_clean, &page->flags);
+}
+#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+
 #endif /* _ASM_RISCV_HUGETLB_H */
-- 
GitLab


From b57c2f124098459a4acc15d5044f87cba31c87f0 Mon Sep 17 00:00:00 2001
From: Binglei Wang <l3b2w1@gmail.com>
Date: Tue, 25 Oct 2022 16:18:32 +0100
Subject: [PATCH 3305/4122] riscv: add riscv rethook implementation

Implement the kretprobes on riscv arch by using rethook machenism
which abstracts general kretprobe info into a struct rethook_node
to be embedded in the struct kretprobe_instance.

Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Binglei Wang <l3b2w1@gmail.com>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221025151831.1097417-1-conor@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig                            |  1 +
 arch/riscv/include/asm/kprobes.h              |  2 --
 arch/riscv/kernel/probes/Makefile             |  2 +-
 arch/riscv/kernel/probes/kprobes.c            | 13 ---------
 arch/riscv/kernel/probes/rethook.c            | 27 +++++++++++++++++++
 arch/riscv/kernel/probes/rethook.h            |  8 ++++++
 ...obes_trampoline.S => rethook_trampoline.S} |  6 ++---
 7 files changed, 40 insertions(+), 19 deletions(-)
 create mode 100644 arch/riscv/kernel/probes/rethook.c
 create mode 100644 arch/riscv/kernel/probes/rethook.h
 rename arch/riscv/kernel/probes/{kprobes_trampoline.S => rethook_trampoline.S} (94%)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 8d47562be90c7..ef8d66de5f381 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -101,6 +101,7 @@ config RISCV
 	select HAVE_KPROBES if !XIP_KERNEL
 	select HAVE_KPROBES_ON_FTRACE if !XIP_KERNEL
 	select HAVE_KRETPROBES if !XIP_KERNEL
+	select HAVE_RETHOOK if !XIP_KERNEL
 	select HAVE_MOVE_PMD
 	select HAVE_MOVE_PUD
 	select HAVE_PCI
diff --git a/arch/riscv/include/asm/kprobes.h b/arch/riscv/include/asm/kprobes.h
index 217ef89f22b9f..e7882ccb0fd46 100644
--- a/arch/riscv/include/asm/kprobes.h
+++ b/arch/riscv/include/asm/kprobes.h
@@ -40,8 +40,6 @@ void arch_remove_kprobe(struct kprobe *p);
 int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
 bool kprobe_breakpoint_handler(struct pt_regs *regs);
 bool kprobe_single_step_handler(struct pt_regs *regs);
-void __kretprobe_trampoline(void);
-void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
 
 #endif /* CONFIG_KPROBES */
 #endif /* _ASM_RISCV_KPROBES_H */
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
index 7f0840dcc31bc..c40139e9ca478 100644
--- a/arch/riscv/kernel/probes/Makefile
+++ b/arch/riscv/kernel/probes/Makefile
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_KPROBES)		+= kprobes.o decode-insn.o simulate-insn.o
-obj-$(CONFIG_KPROBES)		+= kprobes_trampoline.o
+obj-$(CONFIG_RETHOOK)		+= rethook.o rethook_trampoline.o
 obj-$(CONFIG_KPROBES_ON_FTRACE)	+= ftrace.o
 obj-$(CONFIG_UPROBES)		+= uprobes.o decode-insn.o simulate-insn.o
 CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
index e6e950b7cf327..f21592d203062 100644
--- a/arch/riscv/kernel/probes/kprobes.c
+++ b/arch/riscv/kernel/probes/kprobes.c
@@ -345,19 +345,6 @@ int __init arch_populate_kprobe_blacklist(void)
 	return ret;
 }
 
-void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
-{
-	return (void *)kretprobe_trampoline_handler(regs, NULL);
-}
-
-void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
-				      struct pt_regs *regs)
-{
-	ri->ret_addr = (kprobe_opcode_t *)regs->ra;
-	ri->fp = NULL;
-	regs->ra = (unsigned long) &__kretprobe_trampoline;
-}
-
 int __kprobes arch_trampoline_kprobe(struct kprobe *p)
 {
 	return 0;
diff --git a/arch/riscv/kernel/probes/rethook.c b/arch/riscv/kernel/probes/rethook.c
new file mode 100644
index 0000000000000..5c27c1f509894
--- /dev/null
+++ b/arch/riscv/kernel/probes/rethook.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic return hook for riscv.
+ */
+
+#include <linux/kprobes.h>
+#include <linux/rethook.h>
+#include "rethook.h"
+
+/* This is called from arch_rethook_trampoline() */
+unsigned long __used arch_rethook_trampoline_callback(struct pt_regs *regs)
+{
+	return rethook_trampoline_handler(regs, regs->s0);
+}
+
+NOKPROBE_SYMBOL(arch_rethook_trampoline_callback);
+
+void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount)
+{
+	rhn->ret_addr = regs->ra;
+	rhn->frame = regs->s0;
+
+	/* replace return addr with trampoline */
+	regs->ra = (unsigned long)arch_rethook_trampoline;
+}
+
+NOKPROBE_SYMBOL(arch_rethook_prepare);
diff --git a/arch/riscv/kernel/probes/rethook.h b/arch/riscv/kernel/probes/rethook.h
new file mode 100644
index 0000000000000..4758f7e3ce881
--- /dev/null
+++ b/arch/riscv/kernel/probes/rethook.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __RISCV_RETHOOK_H
+#define __RISCV_RETHOOK_H
+
+unsigned long arch_rethook_trampoline_callback(struct pt_regs *regs);
+void arch_rethook_prepare(struct rethook_node *rhn, struct pt_regs *regs, bool mcount);
+
+#endif
diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/rethook_trampoline.S
similarity index 94%
rename from arch/riscv/kernel/probes/kprobes_trampoline.S
rename to arch/riscv/kernel/probes/rethook_trampoline.S
index 7bdb09ded39b8..21bac92a170a9 100644
--- a/arch/riscv/kernel/probes/kprobes_trampoline.S
+++ b/arch/riscv/kernel/probes/rethook_trampoline.S
@@ -75,13 +75,13 @@
 	REG_L x31, PT_T6(sp)
 	.endm
 
-ENTRY(__kretprobe_trampoline)
+ENTRY(arch_rethook_trampoline)
 	addi sp, sp, -(PT_SIZE_ON_STACK)
 	save_all_base_regs
 
 	move a0, sp /* pt_regs */
 
-	call trampoline_probe_handler
+	call arch_rethook_trampoline_callback
 
 	/* use the result as the return-address */
 	move ra, a0
@@ -90,4 +90,4 @@ ENTRY(__kretprobe_trampoline)
 	addi sp, sp, PT_SIZE_ON_STACK
 
 	ret
-ENDPROC(__kretprobe_trampoline)
+ENDPROC(arch_rethook_trampoline)
-- 
GitLab


From de92f65719cd672f4b48397540b9f9eff67eca40 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 2 Dec 2022 12:59:11 -0800
Subject: [PATCH 3306/4122] exit: Allow oops_limit to be disabled

In preparation for keeping oops_limit logic in sync with warn_limit,
have oops_limit == 0 disable checking the Oops counter.

Cc: Jann Horn <jannh@google.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: linux-doc@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/admin-guide/sysctl/kernel.rst | 5 +++--
 kernel/exit.c                               | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 09f3fb2f85858..a31d8d81ea078 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -671,8 +671,9 @@ oops_limit
 ==========
 
 Number of kernel oopses after which the kernel should panic when
-``panic_on_oops`` is not set. Setting this to 0 or 1 has the same effect
-as setting ``panic_on_oops=1``.
+``panic_on_oops`` is not set. Setting this to 0 disables checking
+the count. Setting this to  1 has the same effect as setting
+``panic_on_oops=1``. The default value is 10000.
 
 
 osrelease, ostype & version
diff --git a/kernel/exit.c b/kernel/exit.c
index dc1a32149f944..deffb8e4b1b24 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -954,7 +954,7 @@ void __noreturn make_task_dead(int signr)
 	 * To make sure this can't happen, place an upper bound on how often the
 	 * kernel may oops without panic().
 	 */
-	if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit))
+	if (atomic_inc_return(&oops_count) >= READ_ONCE(oops_limit) && oops_limit)
 		panic("Oopsed too often (kernel.oops_limit is %d)", oops_limit);
 
 	/*
-- 
GitLab


From 79cc1ba7badf9e7a12af99695a557e9ce27ee967 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 15:43:24 -0800
Subject: [PATCH 3307/4122] panic: Consolidate open-coded panic_on_warn checks

Several run-time checkers (KASAN, UBSAN, KFENCE, KCSAN, sched) roll
their own warnings, and each check "panic_on_warn". Consolidate this
into a single function so that future instrumentation can be added in
a single location.

Cc: Marco Elver <elver@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ben Segall <bsegall@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Daniel Bristot de Oliveira <bristot@redhat.com>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Konovalov <andreyknvl@gmail.com>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Gow <davidgow@google.com>
Cc: tangmeng <tangmeng@uniontech.com>
Cc: Jann Horn <jannh@google.com>
Cc: Shuah Khan <skhan@linuxfoundation.org>
Cc: Petr Mladek <pmladek@suse.com>
Cc: "Paul E. McKenney" <paulmck@kernel.org>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: kasan-dev@googlegroups.com
Cc: linux-mm@kvack.org
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Marco Elver <elver@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@gmail.com>
Link: https://lore.kernel.org/r/20221117234328.594699-4-keescook@chromium.org
---
 include/linux/panic.h | 1 +
 kernel/kcsan/report.c | 3 +--
 kernel/panic.c        | 9 +++++++--
 kernel/sched/core.c   | 3 +--
 lib/ubsan.c           | 3 +--
 mm/kasan/report.c     | 4 ++--
 mm/kfence/report.c    | 3 +--
 7 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/include/linux/panic.h b/include/linux/panic.h
index c7759b3f20452..979b776e3bcb3 100644
--- a/include/linux/panic.h
+++ b/include/linux/panic.h
@@ -11,6 +11,7 @@ extern long (*panic_blink)(int state);
 __printf(1, 2)
 void panic(const char *fmt, ...) __noreturn __cold;
 void nmi_panic(struct pt_regs *regs, const char *msg);
+void check_panic_on_warn(const char *origin);
 extern void oops_enter(void);
 extern void oops_exit(void);
 extern bool oops_may_print(void);
diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c
index 67794404042a5..e95ce7d7a76e7 100644
--- a/kernel/kcsan/report.c
+++ b/kernel/kcsan/report.c
@@ -492,8 +492,7 @@ static void print_report(enum kcsan_value_change value_change,
 	dump_stack_print_info(KERN_DEFAULT);
 	pr_err("==================================================================\n");
 
-	if (panic_on_warn)
-		panic("panic_on_warn set ...\n");
+	check_panic_on_warn("KCSAN");
 }
 
 static void release_report(unsigned long *flags, struct other_info *other_info)
diff --git a/kernel/panic.c b/kernel/panic.c
index d843d036651e0..cfa354322d5f5 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -201,6 +201,12 @@ static void panic_print_sys_info(bool console_flush)
 		ftrace_dump(DUMP_ALL);
 }
 
+void check_panic_on_warn(const char *origin)
+{
+	if (panic_on_warn)
+		panic("%s: panic_on_warn set ...\n", origin);
+}
+
 /**
  *	panic - halt the system
  *	@fmt: The text string to print
@@ -619,8 +625,7 @@ void __warn(const char *file, int line, void *caller, unsigned taint,
 	if (regs)
 		show_regs(regs);
 
-	if (panic_on_warn)
-		panic("panic_on_warn set ...\n");
+	check_panic_on_warn("kernel");
 
 	if (!regs)
 		dump_stack();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5800b0623ff30..285ef8821b4f2 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5729,8 +5729,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
 		pr_err("Preemption disabled at:");
 		print_ip_sym(KERN_ERR, preempt_disable_ip);
 	}
-	if (panic_on_warn)
-		panic("scheduling while atomic\n");
+	check_panic_on_warn("scheduling while atomic");
 
 	dump_stack();
 	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 36bd75e334263..60c7099857a05 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -154,8 +154,7 @@ static void ubsan_epilogue(void)
 
 	current->in_ubsan--;
 
-	if (panic_on_warn)
-		panic("panic_on_warn set ...\n");
+	check_panic_on_warn("UBSAN");
 }
 
 void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs)
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index df3602062bfd6..cc98dfdd3ed2f 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -164,8 +164,8 @@ static void end_report(unsigned long *flags, void *addr)
 				       (unsigned long)addr);
 	pr_err("==================================================================\n");
 	spin_unlock_irqrestore(&report_lock, *flags);
-	if (panic_on_warn && !test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
-		panic("panic_on_warn set ...\n");
+	if (!test_bit(KASAN_BIT_MULTI_SHOT, &kasan_flags))
+		check_panic_on_warn("KASAN");
 	if (kasan_arg_fault == KASAN_ARG_FAULT_PANIC)
 		panic("kasan.fault=panic set ...\n");
 	add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
index 7e496856c2ebe..110c27ca597da 100644
--- a/mm/kfence/report.c
+++ b/mm/kfence/report.c
@@ -268,8 +268,7 @@ void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *r
 
 	lockdep_on();
 
-	if (panic_on_warn)
-		panic("panic_on_warn set ...\n");
+	check_panic_on_warn("KFENCE");
 
 	/* We encountered a memory safety error, taint the kernel! */
 	add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
-- 
GitLab


From 9fc9e278a5c0b708eeffaf47d6eb0c82aa74ed78 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 15:43:25 -0800
Subject: [PATCH 3308/4122] panic: Introduce warn_limit

Like oops_limit, add warn_limit for limiting the number of warnings when
panic_on_warn is not set.

Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Petr Mladek <pmladek@suse.com>
Cc: tangmeng <tangmeng@uniontech.com>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: linux-doc@vger.kernel.org
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221117234328.594699-5-keescook@chromium.org
---
 Documentation/admin-guide/sysctl/kernel.rst | 10 ++++++++++
 kernel/panic.c                              | 14 ++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index a31d8d81ea078..179bd303b5853 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -1509,6 +1509,16 @@ entry will default to 2 instead of 0.
 2 Unprivileged calls to ``bpf()`` are disabled
 = =============================================================
 
+
+warn_limit
+==========
+
+Number of kernel warnings after which the kernel should panic when
+``panic_on_warn`` is not set. Setting this to 0 disables checking
+the warning count. Setting this to 1 has the same effect as setting
+``panic_on_warn=1``. The default value is 0.
+
+
 watchdog
 ========
 
diff --git a/kernel/panic.c b/kernel/panic.c
index cfa354322d5f5..f4403fc14f673 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -58,6 +58,7 @@ bool crash_kexec_post_notifiers;
 int panic_on_warn __read_mostly;
 unsigned long panic_on_taint;
 bool panic_on_taint_nousertaint = false;
+static unsigned int warn_limit __read_mostly;
 
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -88,6 +89,13 @@ static struct ctl_table kern_panic_table[] = {
 		.extra2         = SYSCTL_ONE,
 	},
 #endif
+	{
+		.procname       = "warn_limit",
+		.data           = &warn_limit,
+		.maxlen         = sizeof(warn_limit),
+		.mode           = 0644,
+		.proc_handler   = proc_douintvec,
+	},
 	{ }
 };
 
@@ -203,8 +211,14 @@ static void panic_print_sys_info(bool console_flush)
 
 void check_panic_on_warn(const char *origin)
 {
+	static atomic_t warn_count = ATOMIC_INIT(0);
+
 	if (panic_on_warn)
 		panic("%s: panic_on_warn set ...\n", origin);
+
+	if (atomic_inc_return(&warn_count) >= READ_ONCE(warn_limit) && warn_limit)
+		panic("%s: system warned too often (kernel.warn_limit is %d)",
+		      origin, warn_limit);
 }
 
 /**
-- 
GitLab


From 8b05aa26336113c4cea25f1c333ee8cd4fc212a6 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 15:43:26 -0800
Subject: [PATCH 3309/4122] panic: Expose "warn_count" to sysfs

Since Warn count is now tracked and is a fairly interesting signal, add
the entry /sys/kernel/warn_count to expose it to userspace.

Cc: Petr Mladek <pmladek@suse.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: tangmeng <tangmeng@uniontech.com>
Cc: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tiezhu Yang <yangtiezhu@loongson.cn>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221117234328.594699-6-keescook@chromium.org
---
 .../ABI/testing/sysfs-kernel-warn_count       |  6 +++++
 MAINTAINERS                                   |  1 +
 kernel/panic.c                                | 22 +++++++++++++++++--
 3 files changed, 27 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/ABI/testing/sysfs-kernel-warn_count

diff --git a/Documentation/ABI/testing/sysfs-kernel-warn_count b/Documentation/ABI/testing/sysfs-kernel-warn_count
new file mode 100644
index 0000000000000..08f083d2fd51b
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-kernel-warn_count
@@ -0,0 +1,6 @@
+What:		/sys/kernel/oops_count
+Date:		November 2022
+KernelVersion:	6.2.0
+Contact:	Linux Kernel Hardening List <linux-hardening@vger.kernel.org>
+Description:
+		Shows how many times the system has Warned since last boot.
diff --git a/MAINTAINERS b/MAINTAINERS
index 0a1e95a58e54c..282cd8a513fd9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11107,6 +11107,7 @@ L:	linux-hardening@vger.kernel.org
 S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/hardening
 F:	Documentation/ABI/testing/sysfs-kernel-oops_count
+F:	Documentation/ABI/testing/sysfs-kernel-warn_count
 F:	include/linux/overflow.h
 F:	include/linux/randomize_kstack.h
 F:	mm/usercopy.c
diff --git a/kernel/panic.c b/kernel/panic.c
index f4403fc14f673..54deb743b2d5a 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -32,6 +32,7 @@
 #include <linux/bug.h>
 #include <linux/ratelimit.h>
 #include <linux/debugfs.h>
+#include <linux/sysfs.h>
 #include <trace/events/error_report.h>
 #include <asm/sections.h>
 
@@ -107,6 +108,25 @@ static __init int kernel_panic_sysctls_init(void)
 late_initcall(kernel_panic_sysctls_init);
 #endif
 
+static atomic_t warn_count = ATOMIC_INIT(0);
+
+#ifdef CONFIG_SYSFS
+static ssize_t warn_count_show(struct kobject *kobj, struct kobj_attribute *attr,
+			       char *page)
+{
+	return sysfs_emit(page, "%d\n", atomic_read(&warn_count));
+}
+
+static struct kobj_attribute warn_count_attr = __ATTR_RO(warn_count);
+
+static __init int kernel_panic_sysfs_init(void)
+{
+	sysfs_add_file_to_group(kernel_kobj, &warn_count_attr.attr, NULL);
+	return 0;
+}
+late_initcall(kernel_panic_sysfs_init);
+#endif
+
 static long no_blink(int state)
 {
 	return 0;
@@ -211,8 +231,6 @@ static void panic_print_sys_info(bool console_flush)
 
 void check_panic_on_warn(const char *origin)
 {
-	static atomic_t warn_count = ATOMIC_INIT(0);
-
 	if (panic_on_warn)
 		panic("%s: panic_on_warn set ...\n", origin);
 
-- 
GitLab


From 5abf698754b8e5e4f1ca1058ee2b9785fbce6d23 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Mon, 28 Nov 2022 02:44:03 -0800
Subject: [PATCH 3310/4122] lib: fortify_kunit: build without structleak plugin

Building allmodconfig with aarch64-linux-gnu-gcc (Debian 11.3.0-6),
fortify_kunit with strucleak plugin enabled makes the stack frame size
to grow too large:

lib/fortify_kunit.c:140:1: error: the frame size of 2368 bytes is larger than 2048 bytes [-Werror=frame-larger-than=]

Turn off the structleak plugin checks for fortify_kunit.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 lib/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/Makefile b/lib/Makefile
index 2f0454b931dc8..83c650bb4459f 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -379,6 +379,7 @@ obj-$(CONFIG_OVERFLOW_KUNIT_TEST) += overflow_kunit.o
 CFLAGS_stackinit_kunit.o += $(call cc-disable-warning, switch-unreachable)
 obj-$(CONFIG_STACKINIT_KUNIT_TEST) += stackinit_kunit.o
 CFLAGS_fortify_kunit.o += $(call cc-disable-warning, unsequenced)
+CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN)
 obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o
 obj-$(CONFIG_STRSCPY_KUNIT_TEST) += strscpy_kunit.o
 obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o
-- 
GitLab


From 3a017d6355f24de42f2ad688df9fa19e0cb128f2 Mon Sep 17 00:00:00 2001
From: "haifeng.xu" <haifeng.xu@shopee.com>
Date: Mon, 28 Nov 2022 06:56:06 +0000
Subject: [PATCH 3311/4122] signal: Initialize the info in ksignal

When handing the SIGNAL_GROUP_EXIT flag, the info in ksignal isn't cleared.
However, the info acquired by dequeue_synchronous_signal/dequeue_signal is
initialized and can be safely used. Fortunately, the fatal signal process
just uses the si_signo and doesn't use any other member. Even so, the
initialization before use is more safer.

Signed-off-by: haifeng.xu <haifeng.xu@shopee.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221128065606.19570-1-haifeng.xu@shopee.com
---
 kernel/signal.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/signal.c b/kernel/signal.c
index d140672185a48..b9b0c8c620e7a 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2693,6 +2693,7 @@ relock:
 		/* Has this task already been marked for death? */
 		if ((signal->flags & SIGNAL_GROUP_EXIT) ||
 		     signal->group_exec_task) {
+			clear_siginfo(&ksig->info);
 			ksig->info.si_signo = signr = SIGKILL;
 			sigdelset(&current->pending.signal, SIGKILL);
 			trace_signal_deliver(SIGKILL, SEND_SIG_NOINFO,
-- 
GitLab


From bdc77507fecd00ddad2f502f86a48a9ec38f0f84 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 1 Dec 2022 16:23:25 -0800
Subject: [PATCH 3312/4122] um: virt-pci: Avoid GCC non-NULL warning

GCC gets confused about the return value of get_cpu_var() possibly
being NULL, so explicitly test for it before calls to memcpy() and
memset(). Avoids warnings like this:

   arch/um/drivers/virt-pci.c: In function 'um_pci_send_cmd':
   include/linux/fortify-string.h:48:33: warning: argument 1 null where non-null expected [-Wnonnull]
      48 | #define __underlying_memcpy     __builtin_memcpy
         |                                 ^
   include/linux/fortify-string.h:438:9: note: in expansion of macro '__underlying_memcpy'
     438 |         __underlying_##op(p, q, __fortify_size);                        \
         |         ^~~~~~~~~~~~~
   include/linux/fortify-string.h:483:26: note: in expansion of macro '__fortify_memcpy_chk'
     483 | #define memcpy(p, q, s)  __fortify_memcpy_chk(p, q, s,                  \
         |                          ^~~~~~~~~~~~~~~~~~~~
   arch/um/drivers/virt-pci.c:100:9: note: in expansion of macro 'memcpy'
     100 |         memcpy(buf, cmd, cmd_size);
         |         ^~~~~~

While at it, avoid literal "8" and use stored sizeof(buf->data) in
memset() and um_pci_send_cmd().

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202211271212.SUZSC9f9-lkp@intel.com
Fixes: ba38961a069b ("um: Enable FORTIFY_SOURCE")
Cc: Richard Weinberger <richard@nod.at>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Xiu Jianfeng <xiujianfeng@huawei.com>
Cc: Vincent Whitchurch <vincent.whitchurch@axis.com>
Cc: linux-um@lists.infradead.org
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/um/drivers/virt-pci.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/arch/um/drivers/virt-pci.c b/arch/um/drivers/virt-pci.c
index acb55b302b14c..3ac220dafec4a 100644
--- a/arch/um/drivers/virt-pci.c
+++ b/arch/um/drivers/virt-pci.c
@@ -97,7 +97,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
 	}
 
 	buf = get_cpu_var(um_pci_msg_bufs);
-	memcpy(buf, cmd, cmd_size);
+	if (buf)
+		memcpy(buf, cmd, cmd_size);
 
 	if (posted) {
 		u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
@@ -182,6 +183,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 	struct um_pci_message_buffer *buf;
 	u8 *data;
 	unsigned long ret = ULONG_MAX;
+	size_t bytes = sizeof(buf->data);
 
 	if (!dev)
 		return ULONG_MAX;
@@ -189,7 +191,8 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 	buf = get_cpu_var(um_pci_msg_bufs);
 	data = buf->data;
 
-	memset(buf->data, 0xff, sizeof(buf->data));
+	if (buf)
+		memset(data, 0xff, bytes);
 
 	switch (size) {
 	case 1:
@@ -204,7 +207,7 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
 		goto out;
 	}
 
-	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8))
+	if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, bytes))
 		goto out;
 
 	switch (size) {
-- 
GitLab


From d662198e03bc7fb4635156ee7e8b8d325e2d8512 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 17 Nov 2022 19:42:55 -0800
Subject: [PATCH 3313/4122] hpet: Replace one-element array with flexible-array
 member

One-element arrays are deprecated[1] and are being replaced with
flexible array members in support of the ongoing efforts to tighten the
FORTIFY_SOURCE routines on memcpy(), correctly instrument array indexing
with UBSAN_BOUNDS, and to globally enable -fstrict-flex-arrays=3.

Replace one-element array with flexible-array member in struct hpet.

This results in no differences in binary output. The use of struct hpet
is never used with sizeof() and accesses via hpet_timers array are
already done after explicit bounds checking.

[1] https://github.com/KSPP/linux/issues/79

Cc: Clemens Ladisch <clemens@ladisch.de>
Cc: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20221118034250.never.999-kees@kernel.org
---
 include/linux/hpet.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/hpet.h b/include/linux/hpet.h
index 8604564b985dc..21e69eaf7a36d 100644
--- a/include/linux/hpet.h
+++ b/include/linux/hpet.h
@@ -30,7 +30,7 @@ struct hpet {
 			unsigned long _hpet_compare;
 		} _u1;
 		u64 hpet_fsb[2];	/* FSB route */
-	} hpet_timers[1];
+	} hpet_timers[];
 };
 
 #define	hpet_mc		_u0._hpet_mc
-- 
GitLab


From d272e01fa0a2f15c5c331a37cd99c6875c7b7186 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 15 Nov 2022 09:35:10 -0600
Subject: [PATCH 3314/4122] ksmbd: replace one-element arrays with
 flexible-array members

One-element arrays are deprecated, and we are replacing them with flexible
array members instead. So, replace one-element arrays with flexible-array
members in multiple structs in fs/ksmbd/smb_common.h and one in
fs/ksmbd/smb2pdu.h.

Important to mention is that doing a build before/after this patch results
in no binary output differences.

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines
on memcpy() and help us make progress towards globally enabling
-fstrict-flex-arrays=3 [1].

Link: https://github.com/KSPP/linux/issues/242
Link: https://github.com/KSPP/linux/issues/79
Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1]
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/Y3OxronfaPYv9qGP@work
---
 fs/ksmbd/smb2pdu.c    |  4 ++--
 fs/ksmbd/smb2pdu.h    |  2 +-
 fs/ksmbd/smb_common.h | 12 ++++++------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index b2fc85d440d03..31f00cec5255b 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -3438,7 +3438,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
 		goto free_conv_name;
 	}
 
-	struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
+	struct_sz = readdir_info_level_struct_sz(info_level) + conv_len;
 	next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
 	d_info->last_entry_off_align = next_entry_offset - struct_sz;
 
@@ -3690,7 +3690,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
 		return -EOPNOTSUPP;
 
 	conv_len = (d_info->name_len + 1) * 2;
-	next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+	next_entry_offset = ALIGN(struct_sz + conv_len,
 				  KSMBD_DIR_INFO_ALIGNMENT);
 
 	if (next_entry_offset > d_info->out_buf_len) {
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 092fdd3f87505..aa5dbe54f5a18 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -443,7 +443,7 @@ struct smb2_posix_info {
 	/* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */
 	u8 SidBuffer[32];
 	__le32 name_len;
-	u8 name[1];
+	u8 name[];
 	/*
 	 * var sized owner SID
 	 * var sized group SID
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 318c16fa81da3..e663ab9ea7590 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -277,14 +277,14 @@ struct file_directory_info {
 	__le64 AllocationSize;
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
-	char FileName[1];
+	char FileName[];
 } __packed;   /* level 0x101 FF resp data */
 
 struct file_names_info {
 	__le32 NextEntryOffset;
 	__u32 FileIndex;
 	__le32 FileNameLength;
-	char FileName[1];
+	char FileName[];
 } __packed;   /* level 0xc FF resp data */
 
 struct file_full_directory_info {
@@ -299,7 +299,7 @@ struct file_full_directory_info {
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
 	__le32 EaSize;
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x102 FF resp */
 
 struct file_both_directory_info {
@@ -317,7 +317,7 @@ struct file_both_directory_info {
 	__u8   ShortNameLength;
 	__u8   Reserved;
 	__u8   ShortName[24];
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x104 FFrsp data */
 
 struct file_id_both_directory_info {
@@ -337,7 +337,7 @@ struct file_id_both_directory_info {
 	__u8   ShortName[24];
 	__le16 Reserved2;
 	__le64 UniqueId;
-	char FileName[1];
+	char FileName[];
 } __packed;
 
 struct file_id_full_dir_info {
@@ -354,7 +354,7 @@ struct file_id_full_dir_info {
 	__le32 EaSize; /* EA size */
 	__le32 Reserved;
 	__le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x105 FF rsp data */
 
 struct smb_version_values {
-- 
GitLab


From 649d6b1019a2f243bc3a98cb85902a8ebf74289a Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Wed, 26 Oct 2022 22:42:07 +0800
Subject: [PATCH 3315/4122] RISC-V: Add arch_crash_save_vmcoreinfo support

Add arch_crash_save_vmcoreinfo(), which exports VM layout(MODULES,
VMALLOC, VMEMMAP ranges and KERNEL_LINK_ADDR), va bits and ram base for
vmcore.

Default pagetable levels and PAGE_OFFSET aren't same for different
kernel version as below. For pagetable levels, it sets sv57 by default
and falls back to setting sv48 at boot time if sv57 is not supported by
the hardware.

For ram base, the default value is 0x80200000 for qemu riscv64 env and,
for example, is 0x200000 on the XuanTie 910 CPU.

 * Linux Kernel 5.18 ~
 *      PGTABLE_LEVELS = 5
 *      PAGE_OFFSET = 0xff60000000000000
 * Linux Kernel 5.17 ~
 *      PGTABLE_LEVELS = 4
 *      PAGE_OFFSET = 0xffffaf8000000000
 * Linux Kernel 4.19 ~
 *      PGTABLE_LEVELS = 3
 *      PAGE_OFFSET = 0xffffffe000000000

Since these configurations change from time to time and version to
version, it is preferable to export them via vmcoreinfo than to change
the crash's code frequently, it can simplify the development of crash
tool.

Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Tested-by: Deepak Gupta <debug@rivosinc.com>
Tested-by: Guo Ren <guoren@kernel.org>
Acked-by: Baoquan He <bhe@redhat.com>
Link: https://lore.kernel.org/r/20221026144208.373504-2-xianting.tian@linux.alibaba.com
[Palmer: wrap commit text]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/Makefile     |  1 +
 arch/riscv/kernel/crash_core.c | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 arch/riscv/kernel/crash_core.c

diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index db6e4b1294ba3..4cf303a779ab9 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_KGDB)		+= kgdb.o
 obj-$(CONFIG_KEXEC_CORE)	+= kexec_relocate.o crash_save_regs.o machine_kexec.o
 obj-$(CONFIG_KEXEC_FILE)	+= elf_kexec.o machine_kexec_file.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
+obj-$(CONFIG_CRASH_CORE)	+= crash_core.o
 
 obj-$(CONFIG_JUMP_LABEL)	+= jump_label.o
 
diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c
new file mode 100644
index 0000000000000..b351a3c013555
--- /dev/null
+++ b/arch/riscv/kernel/crash_core.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/crash_core.h>
+#include <linux/pagemap.h>
+
+void arch_crash_save_vmcoreinfo(void)
+{
+	VMCOREINFO_NUMBER(VA_BITS);
+	VMCOREINFO_NUMBER(phys_ram_base);
+
+	vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START);
+	vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START);
+	vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END);
+#ifdef CONFIG_64BIT
+	vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR);
+	vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END);
+#endif
+	vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR);
+}
-- 
GitLab


From c5b4216929ebc8ac9107a373db65babc14ba4e80 Mon Sep 17 00:00:00 2001
From: Xianting Tian <xianting.tian@linux.alibaba.com>
Date: Wed, 26 Oct 2022 22:42:08 +0800
Subject: [PATCH 3316/4122] Documentation: kdump: describe VMCOREINFO export
 for RISCV64

The following interrelated definitions and ranges are needed by the
kdump crash tool, which are exported by
"arch/riscv/kernel/crash_core.c":

    VA_BITS,
    PAGE_OFFSET,
    phys_ram_base,
    KERNEL_LINK_ADDR,
    MODULES_VADDR ~ MODULES_END,
    VMALLOC_START ~ VMALLOC_END,
    VMEMMAP_START ~ VMEMMAP_END,

Document these RISCV64 exports above.

Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Xianting Tian <xianting.tian@linux.alibaba.com>
Acked-by: Baoquan He <bhe@redhat.com>
Link: https://lore.kernel.org/r/20221026144208.373504-3-xianting.tian@linux.alibaba.com
[Palmer: wrap commit text]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 .../admin-guide/kdump/vmcoreinfo.rst          | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst
index 6726f439958ca..86fd884928700 100644
--- a/Documentation/admin-guide/kdump/vmcoreinfo.rst
+++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst
@@ -595,3 +595,32 @@ X2TLB
 -----
 
 Indicates whether the crashed kernel enabled SH extended mode.
+
+RISCV64
+=======
+
+VA_BITS
+-------
+
+The maximum number of bits for virtual addresses. Used to compute the
+virtual memory ranges.
+
+PAGE_OFFSET
+-----------
+
+Indicates the virtual kernel start address of the direct-mapped RAM region.
+
+phys_ram_base
+-------------
+
+Indicates the start physical RAM address.
+
+MODULES_VADDR|MODULES_END|VMALLOC_START|VMALLOC_END|VMEMMAP_START|VMEMMAP_END|KERNEL_LINK_ADDR
+----------------------------------------------------------------------------------------------
+
+Used to get the correct ranges:
+
+  * MODULES_VADDR ~ MODULES_END : Kernel module space.
+  * VMALLOC_START ~ VMALLOC_END : vmalloc() / ioremap() space.
+  * VMEMMAP_START ~ VMEMMAP_END : vmemmap space, used for struct page array.
+  * KERNEL_LINK_ADDR : start address of Kernel link and BPF
-- 
GitLab


From 54c03bfd094fb74f9533a9c28250219afe182382 Mon Sep 17 00:00:00 2001
From: Qiheng Lin <linqiheng@huawei.com>
Date: Mon, 28 Nov 2022 22:27:40 +0800
Subject: [PATCH 3317/4122] power: supply: Fix refcount leak in
 rk817_charger_probe

of_get_child_by_name() returns a node pointer with refcount
incremented, we should use of_node_put() on it when not need anymore.
Add missing of_node_put() to avoid refcount leak.

Fixes: 11cb8da0189b ("power: supply: Add charger driver for Rockchip RK817")
Signed-off-by: Qiheng Lin <linqiheng@huawei.com>
Reviewed-by: Chris Morgan <macromorgan@hotmail.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/rk817_charger.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/power/supply/rk817_charger.c b/drivers/power/supply/rk817_charger.c
index 635f051b08216..d25a81d79facb 100644
--- a/drivers/power/supply/rk817_charger.c
+++ b/drivers/power/supply/rk817_charger.c
@@ -1060,8 +1060,10 @@ static int rk817_charger_probe(struct platform_device *pdev)
 		return -ENODEV;
 
 	charger = devm_kzalloc(&pdev->dev, sizeof(*charger), GFP_KERNEL);
-	if (!charger)
+	if (!charger) {
+		of_node_put(node);
 		return -ENOMEM;
+	}
 
 	charger->rk808 = rk808;
 
-- 
GitLab


From a7aaa80098d5b7608b2dc1e883e3c3f929415243 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 28 Nov 2022 10:28:48 +0100
Subject: [PATCH 3318/4122] power: supply: bq25890: Ensure pump_express_work is
 cancelled on remove

The pump_express_work which gets queued from an external_power_changed
callback might be pending / running on remove() (or on probe failure).

Add a devm action cancelling the work, to ensure that it is cancelled.

Note the devm action is added before devm_power_supply_register(), making
it run after devm unregisters the power_supply, so that the work cannot
be queued anymore (this is also why a devm action is used for this).

Fixes: 48f45b094dbb ("power: supply: bq25890: Support higher charging voltages through Pump Express+ protocol")
Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 512c81662eea9..866c475bb7353 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1317,6 +1317,13 @@ static int bq25890_fw_probe(struct bq25890_device *bq)
 	return 0;
 }
 
+static void bq25890_non_devm_cleanup(void *data)
+{
+	struct bq25890_device *bq = data;
+
+	cancel_delayed_work_sync(&bq->pump_express_work);
+}
+
 static int bq25890_probe(struct i2c_client *client)
 {
 	struct device *dev = &client->dev;
@@ -1372,6 +1379,14 @@ static int bq25890_probe(struct i2c_client *client)
 	/* OTG reporting */
 	bq->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
 
+	/*
+	 * This must be before bq25890_power_supply_init(), so that it runs
+	 * after devm unregisters the power_supply.
+	 */
+	ret = devm_add_action_or_reset(dev, bq25890_non_devm_cleanup, bq);
+	if (ret)
+		return ret;
+
 	ret = bq25890_register_regulator(bq);
 	if (ret)
 		return ret;
-- 
GitLab


From 7e6fb67808ab5ceba73a6f45d0942e1e25ac56a7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 28 Nov 2022 10:28:49 +0100
Subject: [PATCH 3319/4122] power: supply: bq25890: Fix usb-notifier probe and
 remove races

There are 2 races surrounding the usb-notifier:

1. The notifier, and thus usb_work, may run before the bq->charger
   power_supply class device is registered. But usb_work may call
   power_supply_changed() which relies on the psy device being registered.

2. usb_work may be pending/running at remove() time, so it needs to be
   cancelled on remove after unregistering the usb-notifier.

Fix 1. by moving usb-notifier registration to after the power_supply
registration.

Fix 2. by adding a cancel_work_sync() call directly after the usb-notifier
unregistration.

Reviewed-by: Marek Vasut <marex@denx.de>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/bq25890_charger.c | 30 +++++++++++---------------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/drivers/power/supply/bq25890_charger.c b/drivers/power/supply/bq25890_charger.c
index 866c475bb7353..2d731ea58323b 100644
--- a/drivers/power/supply/bq25890_charger.c
+++ b/drivers/power/supply/bq25890_charger.c
@@ -1391,40 +1391,34 @@ static int bq25890_probe(struct i2c_client *client)
 	if (ret)
 		return ret;
 
-	if (!IS_ERR_OR_NULL(bq->usb_phy)) {
-		INIT_WORK(&bq->usb_work, bq25890_usb_work);
-		bq->usb_nb.notifier_call = bq25890_usb_notifier;
-		usb_register_notifier(bq->usb_phy, &bq->usb_nb);
-	}
-
 	ret = bq25890_power_supply_init(bq);
-	if (ret < 0) {
-		dev_err(dev, "Failed to register power supply\n");
-		goto err_unregister_usb_notifier;
-	}
+	if (ret < 0)
+		return dev_err_probe(dev, ret, "registering power supply\n");
 
 	ret = devm_request_threaded_irq(dev, client->irq, NULL,
 					bq25890_irq_handler_thread,
 					IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					BQ25890_IRQ_PIN, bq);
 	if (ret)
-		goto err_unregister_usb_notifier;
-
-	return 0;
+		return ret;
 
-err_unregister_usb_notifier:
-	if (!IS_ERR_OR_NULL(bq->usb_phy))
-		usb_unregister_notifier(bq->usb_phy, &bq->usb_nb);
+	if (!IS_ERR_OR_NULL(bq->usb_phy)) {
+		INIT_WORK(&bq->usb_work, bq25890_usb_work);
+		bq->usb_nb.notifier_call = bq25890_usb_notifier;
+		usb_register_notifier(bq->usb_phy, &bq->usb_nb);
+	}
 
-	return ret;
+	return 0;
 }
 
 static void bq25890_remove(struct i2c_client *client)
 {
 	struct bq25890_device *bq = i2c_get_clientdata(client);
 
-	if (!IS_ERR_OR_NULL(bq->usb_phy))
+	if (!IS_ERR_OR_NULL(bq->usb_phy)) {
 		usb_unregister_notifier(bq->usb_phy, &bq->usb_nb);
+		cancel_work_sync(&bq->usb_work);
+	}
 
 	if (!bq->skip_reset) {
 		/* reset all registers to default values */
-- 
GitLab


From 5de7cdd7fa0f62b3e8d2facc8f604e49d887677e Mon Sep 17 00:00:00 2001
From: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Date: Sat, 12 Nov 2022 15:24:46 +0000
Subject: [PATCH 3320/4122] extcon: max77843: Replace irqchip mask_invert with
 unmask_base

Remove use of the deprecated mask_invert flag. Inverted mask
registers (where a '1' bit enables an IRQ) can be described more
directly as an unmask register.

Signed-off-by: Aidan MacDonald <aidanmacdonald.0x0@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-max77843.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/extcon/extcon-max77843.c b/drivers/extcon/extcon-max77843.c
index 8e6e97ec65a8a..1bc0426ce3f1e 100644
--- a/drivers/extcon/extcon-max77843.c
+++ b/drivers/extcon/extcon-max77843.c
@@ -189,8 +189,7 @@ static const struct regmap_irq max77843_muic_irq[] = {
 static const struct regmap_irq_chip max77843_muic_irq_chip = {
 	.name           = "max77843-muic",
 	.status_base    = MAX77843_MUIC_REG_INT1,
-	.mask_base      = MAX77843_MUIC_REG_INTMASK1,
-	.mask_invert    = true,
+	.unmask_base    = MAX77843_MUIC_REG_INTMASK1,
 	.num_regs       = 3,
 	.irqs           = max77843_muic_irq,
 	.num_irqs       = ARRAY_SIZE(max77843_muic_irq),
-- 
GitLab


From df9c4faa81c9659eefc9e149ae9b2124de17dfa7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:42 +0100
Subject: [PATCH 3321/4122] extcon: fsa9480: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-fsa9480.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/extcon/extcon-fsa9480.c b/drivers/extcon/extcon-fsa9480.c
index 7cff66c29907e..e8b2671eb29bc 100644
--- a/drivers/extcon/extcon-fsa9480.c
+++ b/drivers/extcon/extcon-fsa9480.c
@@ -257,8 +257,7 @@ static irqreturn_t fsa9480_irq_handler(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int fsa9480_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int fsa9480_probe(struct i2c_client *client)
 {
 	struct fsa9480_usbsw *info;
 	int ret;
@@ -370,7 +369,7 @@ static struct i2c_driver fsa9480_i2c_driver = {
 		.pm		= &fsa9480_pm_ops,
 		.of_match_table = fsa9480_of_match,
 	},
-	.probe			= fsa9480_probe,
+	.probe_new		= fsa9480_probe,
 	.id_table		= fsa9480_id,
 };
 
-- 
GitLab


From 881de30c28ac0725ab8fb9af905b568a849f8d0f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:43 +0100
Subject: [PATCH 3322/4122] extcon: rt8973: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-rt8973a.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/extcon/extcon-rt8973a.c b/drivers/extcon/extcon-rt8973a.c
index e6e448f6ea2f6..afc9b405d1030 100644
--- a/drivers/extcon/extcon-rt8973a.c
+++ b/drivers/extcon/extcon-rt8973a.c
@@ -548,8 +548,7 @@ static void rt8973a_init_dev_type(struct rt8973a_muic_info *info)
 	}
 }
 
-static int rt8973a_muic_i2c_probe(struct i2c_client *i2c,
-				 const struct i2c_device_id *id)
+static int rt8973a_muic_i2c_probe(struct i2c_client *i2c)
 {
 	struct device_node *np = i2c->dev.of_node;
 	struct rt8973a_muic_info *info;
@@ -696,7 +695,7 @@ static struct i2c_driver rt8973a_muic_i2c_driver = {
 		.pm	= &rt8973a_muic_pm_ops,
 		.of_match_table = rt8973a_dt_match,
 	},
-	.probe	= rt8973a_muic_i2c_probe,
+	.probe_new = rt8973a_muic_i2c_probe,
 	.remove	= rt8973a_muic_i2c_remove,
 	.id_table = rt8973a_i2c_id,
 };
-- 
GitLab


From 5313121b22fd11db0d14f305c110168b8176efdc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:35:44 +0100
Subject: [PATCH 3323/4122] extcon: usbc-tusb320: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-usbc-tusb320.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/extcon/extcon-usbc-tusb320.c b/drivers/extcon/extcon-usbc-tusb320.c
index 2a120d8d3c272..396a026e0c90e 100644
--- a/drivers/extcon/extcon-usbc-tusb320.c
+++ b/drivers/extcon/extcon-usbc-tusb320.c
@@ -421,8 +421,7 @@ static int tusb320_typec_probe(struct i2c_client *client,
 	return 0;
 }
 
-static int tusb320_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int tusb320_probe(struct i2c_client *client)
 {
 	struct tusb320_priv *priv;
 	const void *match_data;
@@ -495,7 +494,7 @@ static const struct of_device_id tusb320_extcon_dt_match[] = {
 MODULE_DEVICE_TABLE(of, tusb320_extcon_dt_match);
 
 static struct i2c_driver tusb320_extcon_driver = {
-	.probe		= tusb320_probe,
+	.probe_new	= tusb320_probe,
 	.driver		= {
 		.name	= "extcon-tusb320",
 		.of_match_table = tusb320_extcon_dt_match,
-- 
GitLab


From 323a74fc20f53c0d0e13a16aee703a30d9751235 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 2 Dec 2022 13:19:40 -0800
Subject: [PATCH 3324/4122] RDMA: Disable IB HW for UML

Disable all of drivers/infiniband/hw/ and rdmavt for UML builds until
someone needs it and provides patches to support it.

This prevents build errors in hw/qib/qib_wc_x86_64.c.

Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: linux-rdma@vger.kernel.org
Cc: Jeff Dike <jdike@addtoit.com>
Cc: Richard Weinberger <richard@nod.at>
Cc: Anton Ivanov <anton.ivanov@cambridgegreys.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: linux-um@lists.infradead.org
Link: https://lore.kernel.org/r/20221202211940.29111-1-rdunlap@infradead.org
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/Kconfig b/drivers/infiniband/Kconfig
index ccc874478f0b6..a5827d11e9346 100644
--- a/drivers/infiniband/Kconfig
+++ b/drivers/infiniband/Kconfig
@@ -78,6 +78,7 @@ config INFINIBAND_VIRT_DMA
 	def_bool !HIGHMEM
 
 if INFINIBAND_USER_ACCESS || !INFINIBAND_USER_ACCESS
+if !UML
 source "drivers/infiniband/hw/bnxt_re/Kconfig"
 source "drivers/infiniband/hw/cxgb4/Kconfig"
 source "drivers/infiniband/hw/efa/Kconfig"
@@ -95,6 +96,7 @@ source "drivers/infiniband/hw/qib/Kconfig"
 source "drivers/infiniband/hw/usnic/Kconfig"
 source "drivers/infiniband/hw/vmw_pvrdma/Kconfig"
 source "drivers/infiniband/sw/rdmavt/Kconfig"
+endif # !UML
 source "drivers/infiniband/sw/rxe/Kconfig"
 source "drivers/infiniband/sw/siw/Kconfig"
 endif
-- 
GitLab


From 725349f8ba1e78a146c6ff8f3ee5e2712e517106 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Fri, 2 Dec 2022 12:00:37 +0800
Subject: [PATCH 3325/4122] RDMA/hfi1: Fix error return code in
 parse_platform_config()

In the previous iteration of the while loop, the "ret" may have been
assigned a value of 0, so the error return code -EINVAL may have been
incorrectly set to 0. To fix set valid return code before calling to
goto.

Fixes: 97167e813415 ("staging/rdma/hfi1: Tune for unknown channel if configuration file is absent")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Link: https://lore.kernel.org/r/1669953638-11747-1-git-send-email-wangyufen@huawei.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/hfi1/firmware.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/infiniband/hw/hfi1/firmware.c b/drivers/infiniband/hw/hfi1/firmware.c
index 1d77514ebbee0..0c0cef5b1e0e5 100644
--- a/drivers/infiniband/hw/hfi1/firmware.c
+++ b/drivers/infiniband/hw/hfi1/firmware.c
@@ -1743,6 +1743,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 
 	if (!dd->platform_config.data) {
 		dd_dev_err(dd, "%s: Missing config file\n", __func__);
+		ret = -EINVAL;
 		goto bail;
 	}
 	ptr = (u32 *)dd->platform_config.data;
@@ -1751,6 +1752,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 	ptr++;
 	if (magic_num != PLATFORM_CONFIG_MAGIC_NUM) {
 		dd_dev_err(dd, "%s: Bad config file\n", __func__);
+		ret = -EINVAL;
 		goto bail;
 	}
 
@@ -1774,6 +1776,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 	if (file_length > dd->platform_config.size) {
 		dd_dev_info(dd, "%s:File claims to be larger than read size\n",
 			    __func__);
+		ret = -EINVAL;
 		goto bail;
 	} else if (file_length < dd->platform_config.size) {
 		dd_dev_info(dd,
@@ -1794,6 +1797,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 			dd_dev_err(dd, "%s: Failed validation at offset %ld\n",
 				   __func__, (ptr - (u32 *)
 					      dd->platform_config.data));
+			ret = -EINVAL;
 			goto bail;
 		}
 
@@ -1837,6 +1841,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 					   __func__, table_type,
 					   (ptr - (u32 *)
 					    dd->platform_config.data));
+				ret = -EINVAL;
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table = ptr;
@@ -1856,6 +1861,7 @@ int parse_platform_config(struct hfi1_devdata *dd)
 					   __func__, table_type,
 					   (ptr -
 					    (u32 *)dd->platform_config.data));
+				ret = -EINVAL;
 				goto bail; /* We don't trust this file now */
 			}
 			pcfgcache->config_tables[table_type].table_metadata =
-- 
GitLab


From ed461b30b22c8fa85c25189c14cb89f29595cd14 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Fri, 2 Dec 2022 12:00:38 +0800
Subject: [PATCH 3326/4122] RDMA/srp: Fix error return code in
 srp_parse_options()

In the previous iteration of the while loop, the "ret" may have been
assigned a value of 0, so the error return code -EINVAL may have been
incorrectly set to 0. To fix set valid return code before calling to
goto. Also investigate each case separately as Andy suggessted.

Fixes: e711f968c49c ("IB/srp: replace custom implementation of hex2bin()")
Fixes: 2a174df0c602 ("IB/srp: Use kstrtoull() instead of simple_strtoull()")
Fixes: 19f313438c77 ("IB/srp: Add RDMA/CM support")
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Link: https://lore.kernel.org/r/1669953638-11747-2-git-send-email-wangyufen@huawei.com
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/srp/ib_srp.c | 96 ++++++++++++++++++++++++-----
 1 file changed, 82 insertions(+), 14 deletions(-)

diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 1075c2ac8fe20..b4d6a4a5ae81e 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -3410,7 +3410,8 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_PKEY:
-			if (match_hex(args, &token)) {
+			ret = match_hex(args, &token);
+			if (ret) {
 				pr_warn("bad P_Key parameter '%s'\n", p);
 				goto out;
 			}
@@ -3470,7 +3471,8 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_MAX_SECT:
-			if (match_int(args, &token)) {
+			ret = match_int(args, &token);
+			if (ret) {
 				pr_warn("bad max sect parameter '%s'\n", p);
 				goto out;
 			}
@@ -3478,8 +3480,15 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_QUEUE_SIZE:
-			if (match_int(args, &token) || token < 1) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for queue_size parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1) {
 				pr_warn("bad queue_size parameter '%s'\n", p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->scsi_host->can_queue = token;
@@ -3490,25 +3499,40 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_MAX_CMD_PER_LUN:
-			if (match_int(args, &token) || token < 1) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for max cmd_per_lun parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1) {
 				pr_warn("bad max cmd_per_lun parameter '%s'\n",
 					p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->scsi_host->cmd_per_lun = token;
 			break;
 
 		case SRP_OPT_TARGET_CAN_QUEUE:
-			if (match_int(args, &token) || token < 1) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for max target_can_queue parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1) {
 				pr_warn("bad max target_can_queue parameter '%s'\n",
 					p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->target_can_queue = token;
 			break;
 
 		case SRP_OPT_IO_CLASS:
-			if (match_hex(args, &token)) {
+			ret = match_hex(args, &token);
+			if (ret) {
 				pr_warn("bad IO class parameter '%s'\n", p);
 				goto out;
 			}
@@ -3517,6 +3541,7 @@ static int srp_parse_options(struct net *net, const char *buf,
 				pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
 					token, SRP_REV10_IB_IO_CLASS,
 					SRP_REV16A_IB_IO_CLASS);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->io_class = token;
@@ -3539,16 +3564,24 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_CMD_SG_ENTRIES:
-			if (match_int(args, &token) || token < 1 || token > 255) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for max cmd_sg_entries parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1 || token > 255) {
 				pr_warn("bad max cmd_sg_entries parameter '%s'\n",
 					p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->cmd_sg_cnt = token;
 			break;
 
 		case SRP_OPT_ALLOW_EXT_SG:
-			if (match_int(args, &token)) {
+			ret = match_int(args, &token);
+			if (ret) {
 				pr_warn("bad allow_ext_sg parameter '%s'\n", p);
 				goto out;
 			}
@@ -3556,43 +3589,77 @@ static int srp_parse_options(struct net *net, const char *buf,
 			break;
 
 		case SRP_OPT_SG_TABLESIZE:
-			if (match_int(args, &token) || token < 1 ||
-					token > SG_MAX_SEGMENTS) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for max sg_tablesize parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1 || token > SG_MAX_SEGMENTS) {
 				pr_warn("bad max sg_tablesize parameter '%s'\n",
 					p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->sg_tablesize = token;
 			break;
 
 		case SRP_OPT_COMP_VECTOR:
-			if (match_int(args, &token) || token < 0) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for comp_vector parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 0) {
 				pr_warn("bad comp_vector parameter '%s'\n", p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->comp_vector = token;
 			break;
 
 		case SRP_OPT_TL_RETRY_COUNT:
-			if (match_int(args, &token) || token < 2 || token > 7) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for tl_retry_count parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 2 || token > 7) {
 				pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
 					p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->tl_retry_count = token;
 			break;
 
 		case SRP_OPT_MAX_IT_IU_SIZE:
-			if (match_int(args, &token) || token < 0) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for max it_iu_size parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 0) {
 				pr_warn("bad maximum initiator to target IU size '%s'\n", p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->max_it_iu_size = token;
 			break;
 
 		case SRP_OPT_CH_COUNT:
-			if (match_int(args, &token) || token < 1) {
+			ret = match_int(args, &token);
+			if (ret) {
+				pr_warn("match_int() failed for channel count parameter '%s', Error %d\n",
+					p, ret);
+				goto out;
+			}
+			if (token < 1) {
 				pr_warn("bad channel count %s\n", p);
+				ret = -EINVAL;
 				goto out;
 			}
 			target->ch_count = token;
@@ -3601,6 +3668,7 @@ static int srp_parse_options(struct net *net, const char *buf,
 		default:
 			pr_warn("unknown parameter or missing value '%s' in target creation request\n",
 				p);
+			ret = -EINVAL;
 			goto out;
 		}
 	}
-- 
GitLab


From 6978837ce42f8bea85041fc08c854f4e28852b3e Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Sat, 3 Dec 2022 11:37:14 +0800
Subject: [PATCH 3327/4122] RDMA/mlx5: no need to kfree NULL pointer

Goto label 'free' where it will kfree the 'in' is not needed though
it's safe to kfree NULL. Return err code directly to simplify the code.

1973 free:
1974         kfree(in);
1975         return err;

Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Link: https://lore.kernel.org/r/20221203033714.25870-1-lizhijian@fujitsu.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/mr.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 410cc5fd25239..053fe946e45ae 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -1929,10 +1929,8 @@ int mlx5_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
 	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
 
 	in = kzalloc(inlen, GFP_KERNEL);
-	if (!in) {
-		err = -ENOMEM;
-		goto free;
-	}
+	if (!in)
+		return -ENOMEM;
 
 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
 
-- 
GitLab


From 1f5619ed881081be300db61da552ffae7163bb72 Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Fri, 2 Dec 2022 13:30:51 -0800
Subject: [PATCH 3328/4122] xfs: Remove duplicated include in xfs_iomap.c

./fs/xfs/xfs_iomap.c: xfs_error.h is included more than once.
./fs/xfs/xfs_iomap.c: xfs_errortag.h is included more than once.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3337
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_iomap.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 68436370927d5..43f447199c082 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -27,8 +27,6 @@
 #include "xfs_dquot_item.h"
 #include "xfs_dquot.h"
 #include "xfs_reflink.h"
-#include "xfs_error.h"
-#include "xfs_errortag.h"
 
 #define XFS_ALLOC_ALIGN(mp, off) \
 	(((off) >> mp->m_allocsize_log) << mp->m_allocsize_log)
-- 
GitLab


From af5d74e32eb8e1b833f687047f0ffe3801d7229d Mon Sep 17 00:00:00 2001
From: Xu Panda <xu.panda@zte.com.cn>
Date: Sat, 3 Dec 2022 14:10:56 +0800
Subject: [PATCH 3329/4122] m68k: use strscpy() to instead of strncpy()

The implementation of strscpy() is more robust and safer.

Signed-off-by: Xu Panda <xu.panda@zte.com.cn>
Signed-off-by: Yang Yang <yang.yang29@zte.com>
Signed-off-by: Greg Ungerer <gerg@linux-m68k.org>
---
 arch/m68k/kernel/setup_no.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index cb6def585851e..37fb663559b47 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -90,8 +90,7 @@ void __init setup_arch(char **cmdline_p)
 	config_BSP(&command_line[0], sizeof(command_line));
 
 #if defined(CONFIG_BOOTPARAM)
-	strncpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line));
-	command_line[sizeof(command_line) - 1] = 0;
+	strscpy(&command_line[0], CONFIG_BOOTPARAM_STRING, sizeof(command_line));
 #endif /* CONFIG_BOOTPARAM */
 
 	process_uboot_commandline(&command_line[0], sizeof(command_line));
-- 
GitLab


From af45de8368883c9620a7735a8c2532e52101c1a2 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:01 +0100
Subject: [PATCH 3330/4122] dt-bindings: qcom: geni-se: document I2C Master Hub
 wrapper variant

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Document the variant compatible, forbid UART and SPI sub-nodes,
and remove requirement for the Master AHB clock and iommu property.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 .../bindings/soc/qcom/qcom,geni-se.yaml       | 44 ++++++++++++++++---
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml
index 2bf5293fc9952..ab4df02052853 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,geni-se.yaml
@@ -21,20 +21,19 @@ properties:
   compatible:
     enum:
       - qcom,geni-se-qup
+      - qcom,geni-se-i2c-master-hub
 
   reg:
     description: QUP wrapper common register address and length.
     maxItems: 1
 
   clock-names:
-    items:
-      - const: m-ahb
-      - const: s-ahb
+    minItems: 1
+    maxItems: 2
 
   clocks:
-    items:
-      - description: Master AHB Clock
-      - description: Slave AHB Clock
+    minItems: 1
+    maxItems: 2
 
   "#address-cells":
     const: 2
@@ -81,6 +80,39 @@ patternProperties:
     description: GENI Serial Engine based UART Controller.
     $ref: /schemas/serial/qcom,serial-geni-qcom.yaml#
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,geni-se-i2c-master-hub
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: s-ahb
+
+        clocks:
+          items:
+            - description: Slave AHB Clock
+
+        iommus: false
+
+      patternProperties:
+        "spi@[0-9a-f]+$": false
+        "serial@[0-9a-f]+$": false
+    else:
+      properties:
+        clock-names:
+          items:
+            - const: m-ahb
+            - const: s-ahb
+
+        clocks:
+          items:
+            - description: Master AHB Clock
+            - description: Slave AHB Clock
+
 additionalProperties: false
 
 examples:
-- 
GitLab


From cb29d4e6a9ef004eed24774bc26839aa9e373ba4 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:02 +0100
Subject: [PATCH 3331/4122] dt-bindings: i2c: qcom-geni: document I2C Master
 Hub serial I2C engine

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Document the I2C Serial Engine variant used within the I2C Master
Hub Wrapper.

This serial engine variant lacks DMA support, requires a core clock,
and since DMA support is lacking the memory interconnect path isn't
needed.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 .../bindings/i2c/qcom,i2c-geni-qcom.yaml      | 64 ++++++++++++++++---
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml
index 0e7ed00562e21..f5f7dc8f325cb 100644
--- a/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml
+++ b/Documentation/devicetree/bindings/i2c/qcom,i2c-geni-qcom.yaml
@@ -10,18 +10,19 @@ maintainers:
   - Andy Gross <agross@kernel.org>
   - Bjorn Andersson <bjorn.andersson@linaro.org>
 
-allOf:
-  - $ref: /schemas/i2c/i2c-controller.yaml#
-
 properties:
   compatible:
-    const: qcom,geni-i2c
+    enum:
+      - qcom,geni-i2c
+      - qcom,geni-i2c-master-hub
 
   clocks:
-    maxItems: 1
+    minItems: 1
+    maxItems: 2
 
   clock-names:
-    const: se
+    minItems: 1
+    maxItems: 2
 
   clock-frequency:
     default: 100000
@@ -35,13 +36,12 @@ properties:
       - const: rx
 
   interconnects:
+    minItems: 2
     maxItems: 3
 
   interconnect-names:
-    items:
-      - const: qup-core
-      - const: qup-config
-      - const: qup-memory
+    minItems: 2
+    maxItems: 3
 
   interrupts:
     maxItems: 1
@@ -71,6 +71,50 @@ required:
   - clock-names
   - reg
 
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: qcom,geni-i2c-master-hub
+    then:
+      properties:
+        clocks:
+          minItems: 2
+
+        clock-names:
+          items:
+            - const: se
+            - const: core
+
+        dmas: false
+        dma-names: false
+
+        interconnects:
+          maxItems: 2
+
+        interconnect-names:
+          items:
+            - const: qup-core
+            - const: qup-config
+    else:
+      properties:
+        clocks:
+          maxItems: 1
+
+        clock-names:
+          const: se
+
+        interconnects:
+          minItems: 3
+
+        interconnect-names:
+          items:
+            - const: qup-core
+            - const: qup-config
+            - const: qup-memory
+
 unevaluatedProperties: false
 
 examples:
-- 
GitLab


From 63fc9af83c11e02ea9c981d3bd0382e36f49916f Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:03 +0100
Subject: [PATCH 3332/4122] soc: qcom: geni-se: add desc struct to specify
 clocks from device match data

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Prepare support for the I2C Master Hub variant by moving the required
clocks list to a new desc struct then passing it through the compatible
match data.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/soc/qcom/qcom-geni-se.c | 69 ++++++++++++++++++++++++++-------
 1 file changed, 55 insertions(+), 14 deletions(-)

diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c
index a0ceeede450f1..9ddee9fd11ba7 100644
--- a/drivers/soc/qcom/qcom-geni-se.c
+++ b/drivers/soc/qcom/qcom-geni-se.c
@@ -81,19 +81,31 @@
  */
 
 #define MAX_CLK_PERF_LEVEL 32
-#define NUM_AHB_CLKS 2
+#define MAX_CLKS 2
 
 /**
  * struct geni_wrapper - Data structure to represent the QUP Wrapper Core
  * @dev:		Device pointer of the QUP wrapper core
  * @base:		Base address of this instance of QUP wrapper core
- * @ahb_clks:		Handle to the primary & secondary AHB clocks
+ * @clks:		Handle to the primary & optional secondary AHB clocks
+ * @num_clks:		Count of clocks
  * @to_core:		Core ICC path
  */
 struct geni_wrapper {
 	struct device *dev;
 	void __iomem *base;
-	struct clk_bulk_data ahb_clks[NUM_AHB_CLKS];
+	struct clk_bulk_data clks[MAX_CLKS];
+	unsigned int num_clks;
+};
+
+/**
+ * struct geni_se_desc - Data structure to represent the QUP Wrapper resources
+ * @clks:		Name of the primary & optional secondary AHB clocks
+ * @num_clks:		Count of clock names
+ */
+struct geni_se_desc {
+	unsigned int num_clks;
+	const char * const *clks;
 };
 
 static const char * const icc_path_names[] = {"qup-core", "qup-config",
@@ -496,8 +508,7 @@ static void geni_se_clks_off(struct geni_se *se)
 	struct geni_wrapper *wrapper = se->wrapper;
 
 	clk_disable_unprepare(se->clk);
-	clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks),
-						wrapper->ahb_clks);
+	clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks);
 }
 
 /**
@@ -528,15 +539,13 @@ static int geni_se_clks_on(struct geni_se *se)
 	int ret;
 	struct geni_wrapper *wrapper = se->wrapper;
 
-	ret = clk_bulk_prepare_enable(ARRAY_SIZE(wrapper->ahb_clks),
-						wrapper->ahb_clks);
+	ret = clk_bulk_prepare_enable(wrapper->num_clks, wrapper->clks);
 	if (ret)
 		return ret;
 
 	ret = clk_prepare_enable(se->clk);
 	if (ret)
-		clk_bulk_disable_unprepare(ARRAY_SIZE(wrapper->ahb_clks),
-							wrapper->ahb_clks);
+		clk_bulk_disable_unprepare(wrapper->num_clks, wrapper->clks);
 	return ret;
 }
 
@@ -887,11 +896,33 @@ static int geni_se_probe(struct platform_device *pdev)
 		return PTR_ERR(wrapper->base);
 
 	if (!has_acpi_companion(&pdev->dev)) {
-		wrapper->ahb_clks[0].id = "m-ahb";
-		wrapper->ahb_clks[1].id = "s-ahb";
-		ret = devm_clk_bulk_get(dev, NUM_AHB_CLKS, wrapper->ahb_clks);
+		const struct geni_se_desc *desc;
+		int i;
+
+		desc = device_get_match_data(&pdev->dev);
+		if (!desc)
+			return -EINVAL;
+
+		wrapper->num_clks = min_t(unsigned int, desc->num_clks, MAX_CLKS);
+
+		for (i = 0; i < wrapper->num_clks; ++i)
+			wrapper->clks[i].id = desc->clks[i];
+
+		ret = of_count_phandle_with_args(dev->of_node, "clocks", "#clock-cells");
+		if (ret < 0) {
+			dev_err(dev, "invalid clocks property at %pOF\n", dev->of_node);
+			return ret;
+		}
+
+		if (ret < wrapper->num_clks) {
+			dev_err(dev, "invalid clocks count at %pOF, expected %d entries\n",
+				dev->of_node, wrapper->num_clks);
+			return -EINVAL;
+		}
+
+		ret = devm_clk_bulk_get(dev, wrapper->num_clks, wrapper->clks);
 		if (ret) {
-			dev_err(dev, "Err getting AHB clks %d\n", ret);
+			dev_err(dev, "Err getting clks %d\n", ret);
 			return ret;
 		}
 	}
@@ -901,8 +932,18 @@ static int geni_se_probe(struct platform_device *pdev)
 	return devm_of_platform_populate(dev);
 }
 
+static const char * const qup_clks[] = {
+	"m-ahb",
+	"s-ahb",
+};
+
+static const struct geni_se_desc qup_desc = {
+	.clks = qup_clks,
+	.num_clks = ARRAY_SIZE(qup_clks),
+};
+
 static const struct of_device_id geni_se_dt_match[] = {
-	{ .compatible = "qcom,geni-se-qup", },
+	{ .compatible = "qcom,geni-se-qup", .data = &qup_desc },
 	{}
 };
 MODULE_DEVICE_TABLE(of, geni_se_dt_match);
-- 
GitLab


From f4aba01db4801cc02d1fae4c8099984a23740996 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:04 +0100
Subject: [PATCH 3333/4122] soc: qcom: geni-se: add support for I2C Master Hub
 wrapper variant

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Add the clock list for the I2C Master Hub variant to a new desc struct
then pass it through the I2C Master Hub compatible match data.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/soc/qcom/qcom-geni-se.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/soc/qcom/qcom-geni-se.c b/drivers/soc/qcom/qcom-geni-se.c
index 9ddee9fd11ba7..f0475b93ca730 100644
--- a/drivers/soc/qcom/qcom-geni-se.c
+++ b/drivers/soc/qcom/qcom-geni-se.c
@@ -942,8 +942,18 @@ static const struct geni_se_desc qup_desc = {
 	.num_clks = ARRAY_SIZE(qup_clks),
 };
 
+static const char * const i2c_master_hub_clks[] = {
+	"s-ahb",
+};
+
+static const struct geni_se_desc i2c_master_hub_desc = {
+	.clks = i2c_master_hub_clks,
+	.num_clks = ARRAY_SIZE(i2c_master_hub_clks),
+};
+
 static const struct of_device_id geni_se_dt_match[] = {
 	{ .compatible = "qcom,geni-se-qup", .data = &qup_desc },
+	{ .compatible = "qcom,geni-se-i2c-master-hub", .data = &i2c_master_hub_desc },
 	{}
 };
 MODULE_DEVICE_TABLE(of, geni_se_dt_match);
-- 
GitLab


From 14d02fbadb5dc1cdf66078ef8430dd1cd22bfd53 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:05 +0100
Subject: [PATCH 3334/4122] i2c: qcom-geni: add desc struct to prepare support
 for I2C Master Hub variant

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Those I2C serial engines variants have some requirements:
- a separate "core" clock
- doesn't support DMA, thus no memory interconnect path
- fixed FIFO size not discoverable in the HW_PARAM_0 register

Add a desc struct specifying all those requirements which will be used in
a next change when adding the I2C Master Hub serial engine compatible.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 50 ++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 84a77512614d9..75dd0718c5a12 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -88,6 +88,7 @@ struct geni_i2c_dev {
 	int cur_wr;
 	int cur_rd;
 	spinlock_t lock;
+	struct clk *core_clk;
 	u32 clk_freq_out;
 	const struct geni_i2c_clk_fld *clk_fld;
 	int suspended;
@@ -100,6 +101,13 @@ struct geni_i2c_dev {
 	bool abort_done;
 };
 
+struct geni_i2c_desc {
+	bool has_core_clk;
+	char *icc_ddr;
+	bool no_dma_support;
+	unsigned int tx_fifo_depth;
+};
+
 struct geni_i2c_err_log {
 	int err;
 	const char *msg;
@@ -764,6 +772,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	u32 proto, tx_depth, fifo_disable;
 	int ret;
 	struct device *dev = &pdev->dev;
+	const struct geni_i2c_desc *desc = NULL;
 
 	gi2c = devm_kzalloc(dev, sizeof(*gi2c), GFP_KERNEL);
 	if (!gi2c)
@@ -776,6 +785,14 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	if (IS_ERR(gi2c->se.base))
 		return PTR_ERR(gi2c->se.base);
 
+	desc = device_get_match_data(&pdev->dev);
+
+	if (desc && desc->has_core_clk) {
+		gi2c->core_clk = devm_clk_get(dev, "core");
+		if (IS_ERR(gi2c->core_clk))
+			return PTR_ERR(gi2c->core_clk);
+	}
+
 	gi2c->se.clk = devm_clk_get(dev, "se");
 	if (IS_ERR(gi2c->se.clk) && !has_acpi_companion(dev))
 		return PTR_ERR(gi2c->se.clk);
@@ -819,7 +836,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	gi2c->adap.dev.of_node = dev->of_node;
 	strscpy(gi2c->adap.name, "Geni-I2C", sizeof(gi2c->adap.name));
 
-	ret = geni_icc_get(&gi2c->se, "qup-memory");
+	ret = geni_icc_get(&gi2c->se, desc ? desc->icc_ddr : "qup-memory");
 	if (ret)
 		return ret;
 	/*
@@ -829,12 +846,17 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	 */
 	gi2c->se.icc_paths[GENI_TO_CORE].avg_bw = GENI_DEFAULT_BW;
 	gi2c->se.icc_paths[CPU_TO_GENI].avg_bw = GENI_DEFAULT_BW;
-	gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out);
+	if (!desc || desc->icc_ddr)
+		gi2c->se.icc_paths[GENI_TO_DDR].avg_bw = Bps_to_icc(gi2c->clk_freq_out);
 
 	ret = geni_icc_set_bw(&gi2c->se);
 	if (ret)
 		return ret;
 
+	ret = clk_prepare_enable(gi2c->core_clk);
+	if (ret)
+		return ret;
+
 	ret = geni_se_resources_on(&gi2c->se);
 	if (ret) {
 		dev_err(dev, "Error turning on resources %d\n", ret);
@@ -844,10 +866,15 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	if (proto != GENI_SE_I2C) {
 		dev_err(dev, "Invalid proto %d\n", proto);
 		geni_se_resources_off(&gi2c->se);
+		clk_disable_unprepare(gi2c->core_clk);
 		return -ENXIO;
 	}
 
-	fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE;
+	if (desc && desc->no_dma_support)
+		fifo_disable = false;
+	else
+		fifo_disable = readl_relaxed(gi2c->se.base + GENI_IF_DISABLE_RO) & FIFO_IF_DISABLE;
+
 	if (fifo_disable) {
 		/* FIFO is disabled, so we can only use GPI DMA */
 		gi2c->gpi_mode = true;
@@ -859,6 +886,16 @@ static int geni_i2c_probe(struct platform_device *pdev)
 	} else {
 		gi2c->gpi_mode = false;
 		tx_depth = geni_se_get_tx_fifo_depth(&gi2c->se);
+
+		/* I2C Master Hub Serial Elements doesn't have the HW_PARAM_0 register */
+		if (!tx_depth && desc)
+			tx_depth = desc->tx_fifo_depth;
+
+		if (!tx_depth) {
+			dev_err(dev, "Invalid TX FIFO depth\n");
+			return -EINVAL;
+		}
+
 		gi2c->tx_wm = tx_depth - 1;
 		geni_se_init(&gi2c->se, gi2c->tx_wm, tx_depth);
 		geni_se_config_packing(&gi2c->se, BITS_PER_BYTE,
@@ -867,6 +904,7 @@ static int geni_i2c_probe(struct platform_device *pdev)
 		dev_dbg(dev, "i2c fifo/se-dma mode. fifo depth:%d\n", tx_depth);
 	}
 
+	clk_disable_unprepare(gi2c->core_clk);
 	ret = geni_se_resources_off(&gi2c->se);
 	if (ret) {
 		dev_err(dev, "Error turning off resources %d\n", ret);
@@ -932,6 +970,8 @@ static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
 		gi2c->suspended = 1;
 	}
 
+	clk_disable_unprepare(gi2c->core_clk);
+
 	return geni_icc_disable(&gi2c->se);
 }
 
@@ -944,6 +984,10 @@ static int __maybe_unused geni_i2c_runtime_resume(struct device *dev)
 	if (ret)
 		return ret;
 
+	ret = clk_prepare_enable(gi2c->core_clk);
+	if (ret)
+		return ret;
+
 	ret = geni_se_resources_on(&gi2c->se);
 	if (ret)
 		return ret;
-- 
GitLab


From cacd9643eca7a1f4635479aff4ec33aaade45e64 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Tue, 29 Nov 2022 15:47:06 +0100
Subject: [PATCH 3335/4122] i2c: qcom-geni: add support for I2C Master Hub
 variant

The I2C Master Hub is a stripped down version of the GENI Serial Engine
QUP Wrapper Controller but only supporting I2C serial engines without
DMA support.

Add the I2C Master Hub serial engine compatible along the specific
requirements in a new desc struct passed through the device match data.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Konrad Dybcio <konrad.dybcio@linaro.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-qcom-geni.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/i2c/busses/i2c-qcom-geni.c b/drivers/i2c/busses/i2c-qcom-geni.c
index 75dd0718c5a12..bfe75038bc149 100644
--- a/drivers/i2c/busses/i2c-qcom-geni.c
+++ b/drivers/i2c/busses/i2c-qcom-geni.c
@@ -1026,8 +1026,16 @@ static const struct dev_pm_ops geni_i2c_pm_ops = {
 									NULL)
 };
 
+const struct geni_i2c_desc i2c_master_hub = {
+	.has_core_clk = true,
+	.icc_ddr = NULL,
+	.no_dma_support = true,
+	.tx_fifo_depth = 16,
+};
+
 static const struct of_device_id geni_i2c_dt_match[] = {
 	{ .compatible = "qcom,geni-i2c" },
+	{ .compatible = "qcom,geni-i2c-master-hub", .data = &i2c_master_hub },
 	{}
 };
 MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
-- 
GitLab


From eaade84a6302f139aede74fe5a568a70adb9baa2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Mon, 5 Dec 2022 12:31:44 +0800
Subject: [PATCH 3336/4122] crypto: api - Use linux/cache.h instead of
 asm/cache.h

Directly including asm/cache.h leads to build failures on powerpc
so replace it with linux/cache.h instead.

Fixes: e634ac4a8aaa ("crypto: api - Add crypto_tfm_ctx_dma")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/algapi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 8722fd67f40ae..61b327206b557 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -7,8 +7,8 @@
 #ifndef _CRYPTO_ALGAPI_H
 #define _CRYPTO_ALGAPI_H
 
-#include <asm/cache.h>
 #include <linux/align.h>
+#include <linux/cache.h>
 #include <linux/crypto.h>
 #include <linux/kconfig.h>
 #include <linux/list.h>
-- 
GitLab


From 73e9841ba7e3e394c276d5f24ef4e639bd5f24e1 Mon Sep 17 00:00:00 2001
From: Binbin Zhou <zhoubinbin@loongson.cn>
Date: Wed, 30 Nov 2022 13:55:51 +0800
Subject: [PATCH 3337/4122] i2c: gpio: Fix potential unused warning for
 'i2c_gpio_dt_ids'

Dropping a matching #ifdef check along with dropping of_match_ptr()
is just a cleanup, while dropping of_match_ptr() that has no
corresponding #ifdef fixes an actual warning.

Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Binbin Zhou <zhoubinbin@loongson.cn>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-gpio.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-gpio.c b/drivers/i2c/busses/i2c-gpio.c
index b1985c1667e16..0e4385a9bcf71 100644
--- a/drivers/i2c/busses/i2c-gpio.c
+++ b/drivers/i2c/busses/i2c-gpio.c
@@ -482,19 +482,17 @@ static int i2c_gpio_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#if defined(CONFIG_OF)
 static const struct of_device_id i2c_gpio_dt_ids[] = {
 	{ .compatible = "i2c-gpio", },
 	{ /* sentinel */ }
 };
 
 MODULE_DEVICE_TABLE(of, i2c_gpio_dt_ids);
-#endif
 
 static struct platform_driver i2c_gpio_driver = {
 	.driver		= {
 		.name	= "i2c-gpio",
-		.of_match_table	= of_match_ptr(i2c_gpio_dt_ids),
+		.of_match_table	= i2c_gpio_dt_ids,
 	},
 	.probe		= i2c_gpio_probe,
 	.remove		= i2c_gpio_remove,
-- 
GitLab


From 99c4ec2397bb88c3325676a308b93aa8ba362de8 Mon Sep 17 00:00:00 2001
From: Peter Rosin <peda@axentia.se>
Date: Tue, 17 Apr 2018 16:32:32 +0200
Subject: [PATCH 3338/4122] i2c: mux: pca9541: switch to using .probe_new

Use the new probe style for i2c drivers.

Signed-off-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/muxes/i2c-mux-pca9541.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-pca9541.c b/drivers/i2c/muxes/i2c-mux-pca9541.c
index ea83de78f52db..09d1d9e67e31f 100644
--- a/drivers/i2c/muxes/i2c-mux-pca9541.c
+++ b/drivers/i2c/muxes/i2c-mux-pca9541.c
@@ -283,8 +283,7 @@ static int pca9541_release_chan(struct i2c_mux_core *muxc, u32 chan)
 /*
  * I2C init/probing/exit functions
  */
-static int pca9541_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int pca9541_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adap = client->adapter;
 	struct i2c_mux_core *muxc;
@@ -337,7 +336,7 @@ static struct i2c_driver pca9541_driver = {
 		   .name = "pca9541",
 		   .of_match_table = of_match_ptr(pca9541_of_match),
 		   },
-	.probe = pca9541_probe,
+	.probe_new = pca9541_probe,
 	.remove = pca9541_remove,
 	.id_table = pca9541_id,
 };
-- 
GitLab


From a00f6d3723f5617222ab8df228228c3c2c84e3ec Mon Sep 17 00:00:00 2001
From: Stephen Kitt <steve@sk2.org>
Date: Wed, 12 Oct 2022 18:36:47 +0200
Subject: [PATCH 3339/4122] drivers/i2c: use simple i2c probe

All these drivers have an i2c probe function which doesn't use the
"struct i2c_device_id *id" parameter, so they can trivially be
converted to the "probe_new" style of probe with a single argument.

This is part of an ongoing transition to single-argument i2c probe
functions. Old-style probe functions involve a call to i2c_match_id:
in drivers/i2c/i2c-core-base.c,

         /*
          * When there are no more users of probe(),
          * rename probe_new to probe.
          */
         if (driver->probe_new)
                 status = driver->probe_new(client);
         else if (driver->probe)
                 status = driver->probe(client,
                                        i2c_match_id(driver->id_table, client));
         else
                 status = -EINVAL;

Drivers which don't need the second parameter can be declared using
probe_new instead, avoiding the call to i2c_match_id. Drivers which do
can still be converted to probe_new-style, calling i2c_match_id
themselves (as is done currently for of_match_id).

This change was done using the following Coccinelle script, and fixed
up for whitespace changes:

@ rule1 @
identifier fn;
identifier client, id;
@@

- static int fn(struct i2c_client *client, const struct i2c_device_id *id)
+ static int fn(struct i2c_client *client)
{
...when != id
}

@ rule2 depends on rule1 @
identifier rule1.fn;
identifier driver;
@@

struct i2c_driver driver = {
-       .probe
+       .probe_new
                =
(
                   fn
|
-                  &fn
+                  fn
)
                ,
};

Signed-off-by: Stephen Kitt <steve@sk2.org>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/i2c-core-base.c | 5 ++---
 drivers/i2c/i2c-smbus.c     | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c
index 9aa7b9d9a4856..82478eab71af0 100644
--- a/drivers/i2c/i2c-core-base.c
+++ b/drivers/i2c/i2c-core-base.c
@@ -1017,15 +1017,14 @@ static const struct i2c_device_id dummy_id[] = {
 	{ },
 };
 
-static int dummy_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int dummy_probe(struct i2c_client *client)
 {
 	return 0;
 }
 
 static struct i2c_driver dummy_driver = {
 	.driver.name	= "dummy",
-	.probe		= dummy_probe,
+	.probe_new	= dummy_probe,
 	.id_table	= dummy_id,
 };
 
diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c
index c85710ed95483..cd19546d31fcb 100644
--- a/drivers/i2c/i2c-smbus.c
+++ b/drivers/i2c/i2c-smbus.c
@@ -112,8 +112,7 @@ static void smbalert_work(struct work_struct *work)
 }
 
 /* Setup SMBALERT# infrastructure */
-static int smbalert_probe(struct i2c_client *ara,
-			  const struct i2c_device_id *id)
+static int smbalert_probe(struct i2c_client *ara)
 {
 	struct i2c_smbus_alert_setup *setup = dev_get_platdata(&ara->dev);
 	struct i2c_smbus_alert *alert;
@@ -170,7 +169,7 @@ static struct i2c_driver smbalert_driver = {
 	.driver = {
 		.name	= "smbus_alert",
 	},
-	.probe		= smbalert_probe,
+	.probe_new	= smbalert_probe,
 	.remove		= smbalert_remove,
 	.id_table	= smbalert_ids,
 };
-- 
GitLab


From a5eacd2e3790ecf1cbc0be2e53a6f3d1ce3bb719 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:22 +0100
Subject: [PATCH 3340/4122] i2c: mux: pca954x: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Peter Rosin <peda@axentia.se>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/muxes/i2c-mux-pca954x.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-pca954x.c b/drivers/i2c/muxes/i2c-mux-pca954x.c
index a5f458b635df6..3639e6d7304cd 100644
--- a/drivers/i2c/muxes/i2c-mux-pca954x.c
+++ b/drivers/i2c/muxes/i2c-mux-pca954x.c
@@ -411,9 +411,9 @@ static int pca954x_init(struct i2c_client *client, struct pca954x *data)
 /*
  * I2C init/probing/exit functions
  */
-static int pca954x_probe(struct i2c_client *client,
-			 const struct i2c_device_id *id)
+static int pca954x_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct i2c_adapter *adap = client->adapter;
 	struct device *dev = &client->dev;
 	struct gpio_desc *gpio;
@@ -554,7 +554,7 @@ static struct i2c_driver pca954x_driver = {
 		.pm	= &pca954x_pm,
 		.of_match_table = pca954x_of_match,
 	},
-	.probe		= pca954x_probe,
+	.probe_new	= pca954x_probe,
 	.remove		= pca954x_remove,
 	.id_table	= pca954x_id,
 };
-- 
GitLab


From 87ab726952267caabb9c1404e007e16e42f03b52 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:36:19 +0100
Subject: [PATCH 3341/4122] i2c: slave-eeprom: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

.probe_new() doesn't get the i2c_device_id * parameter, so determine
that explicitly in the probe function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/i2c-slave-eeprom.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/i2c/i2c-slave-eeprom.c b/drivers/i2c/i2c-slave-eeprom.c
index 4abc2d9198815..5f25f23c4ff8c 100644
--- a/drivers/i2c/i2c-slave-eeprom.c
+++ b/drivers/i2c/i2c-slave-eeprom.c
@@ -140,8 +140,9 @@ static int i2c_slave_init_eeprom_data(struct eeprom_data *eeprom, struct i2c_cli
 	return 0;
 }
 
-static int i2c_slave_eeprom_probe(struct i2c_client *client, const struct i2c_device_id *id)
+static int i2c_slave_eeprom_probe(struct i2c_client *client)
 {
+	const struct i2c_device_id *id = i2c_client_get_device_id(client);
 	struct eeprom_data *eeprom;
 	int ret;
 	unsigned int size = FIELD_GET(I2C_SLAVE_BYTELEN, id->driver_data) + 1;
@@ -206,7 +207,7 @@ static struct i2c_driver i2c_slave_eeprom_driver = {
 	.driver = {
 		.name = "i2c-slave-eeprom",
 	},
-	.probe = i2c_slave_eeprom_probe,
+	.probe_new = i2c_slave_eeprom_probe,
 	.remove = i2c_slave_eeprom_remove,
 	.id_table = i2c_slave_eeprom_id,
 };
-- 
GitLab


From d78a167332e1ca8113268ed922c1212fd71b73ad Mon Sep 17 00:00:00 2001
From: Hui Tang <tanghui20@huawei.com>
Date: Mon, 14 Nov 2022 17:25:40 +0800
Subject: [PATCH 3342/4122] i2c: pxa-pci: fix missing pci_disable_device() on
 error in ce4100_i2c_probe

Using pcim_enable_device() to avoid missing pci_disable_device().

Fixes: 7e94dd154e93 ("i2c-pxa2xx: Add PCI support for PXA I2C controller")
Signed-off-by: Hui Tang <tanghui20@huawei.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-pxa-pci.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/i2c/busses/i2c-pxa-pci.c b/drivers/i2c/busses/i2c-pxa-pci.c
index f614cade432bb..30e38bc8b6db8 100644
--- a/drivers/i2c/busses/i2c-pxa-pci.c
+++ b/drivers/i2c/busses/i2c-pxa-pci.c
@@ -105,7 +105,7 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
 	int i;
 	struct ce4100_devices *sds;
 
-	ret = pci_enable_device_mem(dev);
+	ret = pcim_enable_device(dev);
 	if (ret)
 		return ret;
 
@@ -114,10 +114,8 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
 		return -EINVAL;
 	}
 	sds = kzalloc(sizeof(*sds), GFP_KERNEL);
-	if (!sds) {
-		ret = -ENOMEM;
-		goto err_mem;
-	}
+	if (!sds)
+		return -ENOMEM;
 
 	for (i = 0; i < ARRAY_SIZE(sds->pdev); i++) {
 		sds->pdev[i] = add_i2c_device(dev, i);
@@ -133,8 +131,6 @@ static int ce4100_i2c_probe(struct pci_dev *dev,
 
 err_dev_add:
 	kfree(sds);
-err_mem:
-	pci_disable_device(dev);
 	return ret;
 }
 
-- 
GitLab


From 8f4ab7da904ab7027ccd43ddb4f0094e932a5877 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Mon, 5 Dec 2022 12:44:27 +0400
Subject: [PATCH 3343/4122] selftests/powerpc: Fix resource leaks

In check_all_cpu_dscr_defaults, opendir() opens the directory stream.
Add missing closedir() in the error path to release it.

In check_cpu_dscr_default, open() creates an open file descriptor.
Add missing close() in the error path to release it.

Fixes: ebd5858c904b ("selftests/powerpc: Add test for all DSCR sysfs interfaces")
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221205084429.570654-1-linmq006@gmail.com
---
 tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index fbbdffdb2e5d2..f20d1c166d1e4 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -24,6 +24,7 @@ static int check_cpu_dscr_default(char *file, unsigned long val)
 	rc = read(fd, buf, sizeof(buf));
 	if (rc == -1) {
 		perror("read() failed");
+		close(fd);
 		return 1;
 	}
 	close(fd);
@@ -65,8 +66,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
 		if (access(file, F_OK))
 			continue;
 
-		if (check_cpu_dscr_default(file, val))
+		if (check_cpu_dscr_default(file, val)) {
+			closedir(sysfs);
 			return 1;
+		}
 	}
 	closedir(sysfs);
 	return 0;
-- 
GitLab


From dcb40e9fcce9bd251eaff19f3724131db522846c Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Tue, 18 Oct 2022 10:42:53 +0800
Subject: [PATCH 3344/4122] iommu/mediatek: Add platform_device_put for
 recovering the device refcnt

Add platform_device_put to match with of_find_device_by_node.

Meanwhile, I add a new variable "pcommdev" which is for smi common device.
Otherwise, "platform_device_put(plarbdev)" for smi-common dev may be not
readable. And add a checking for whether pcommdev is NULL.

Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE")
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-2-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index c80f33dd2d434..cce948eead6a7 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1055,7 +1055,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 				  struct mtk_iommu_data *data)
 {
 	struct device_node *larbnode, *smicomm_node, *smi_subcomm_node;
-	struct platform_device *plarbdev;
+	struct platform_device *plarbdev, *pcommdev;
 	struct device_link *link;
 	int i, larb_nr, ret;
 
@@ -1086,12 +1086,14 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 		}
 		if (!plarbdev->dev.driver) {
 			of_node_put(larbnode);
+			platform_device_put(plarbdev);
 			return -EPROBE_DEFER;
 		}
 		data->larb_imu[id].dev = &plarbdev->dev;
 
 		component_match_add_release(dev, match, component_release_of,
 					    component_compare_of, larbnode);
+		platform_device_put(plarbdev);
 	}
 
 	/* Get smi-(sub)-common dev from the last larb. */
@@ -1109,12 +1111,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 	else
 		smicomm_node = smi_subcomm_node;
 
-	plarbdev = of_find_device_by_node(smicomm_node);
+	pcommdev = of_find_device_by_node(smicomm_node);
 	of_node_put(smicomm_node);
-	data->smicomm_dev = &plarbdev->dev;
+	if (!pcommdev)
+		return -ENODEV;
+	data->smicomm_dev = &pcommdev->dev;
 
 	link = device_link_add(data->smicomm_dev, dev,
 			       DL_FLAG_STATELESS | DL_FLAG_PM_RUNTIME);
+	platform_device_put(pcommdev);
 	if (!link) {
 		dev_err(dev, "Unable to link %s.\n", dev_name(data->smicomm_dev));
 		return -EINVAL;
-- 
GitLab


From b5765a1b44bea9dfcae69c53ffeb4c689d0922a7 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Tue, 18 Oct 2022 10:42:54 +0800
Subject: [PATCH 3345/4122] iommu/mediatek: Use component_match_add

In order to simplify the error patch(avoid call of_node_put), Use
component_match_add instead component_match_add_release since we are only
interested in the "device" here. Then we could always call of_node_put in
normal path.

Strictly this is not a fixes patch, but it is a prepare for adding the
error path, thus I add a Fixes tag too.

Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE")
Suggested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-3-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index cce948eead6a7..df490236e1fb5 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1080,19 +1080,17 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 			id = i;
 
 		plarbdev = of_find_device_by_node(larbnode);
-		if (!plarbdev) {
-			of_node_put(larbnode);
+		of_node_put(larbnode);
+		if (!plarbdev)
 			return -ENODEV;
-		}
+
 		if (!plarbdev->dev.driver) {
-			of_node_put(larbnode);
 			platform_device_put(plarbdev);
 			return -EPROBE_DEFER;
 		}
 		data->larb_imu[id].dev = &plarbdev->dev;
 
-		component_match_add_release(dev, match, component_release_of,
-					    component_compare_of, larbnode);
+		component_match_add(dev, match, component_compare_dev, &plarbdev->dev);
 		platform_device_put(plarbdev);
 	}
 
-- 
GitLab


From 26593928564cf5b576ff05d3cbd958f57c9534bb Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Tue, 18 Oct 2022 10:42:55 +0800
Subject: [PATCH 3346/4122] iommu/mediatek: Add error path for loop of
 mm_dts_parse

The mtk_iommu_mm_dts_parse will parse the smi larbs nodes. if the i+1
larb is parsed fail, we should put_device for the i..0 larbs.

There are two places need to comment:
1) The larbid may be not linear mapping, we should loop whole
   the array in the error path.
2) I move this line position: "data->larb_imu[id].dev = &plarbdev->dev;"
   before "if (!plarbdev->dev.driver)", That means set
   data->larb_imu[id].dev before the error path. then we don't need
   "platform_device_put(plarbdev)" again in probe_defer case. All depend
   on "put_device" of the error path in error cases.

Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE")
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-4-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index df490236e1fb5..e0c669b75271d 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1067,8 +1067,10 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 		u32 id;
 
 		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
-		if (!larbnode)
-			return -EINVAL;
+		if (!larbnode) {
+			ret = -EINVAL;
+			goto err_larbdev_put;
+		}
 
 		if (!of_device_is_available(larbnode)) {
 			of_node_put(larbnode);
@@ -1081,14 +1083,16 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 
 		plarbdev = of_find_device_by_node(larbnode);
 		of_node_put(larbnode);
-		if (!plarbdev)
-			return -ENODEV;
+		if (!plarbdev) {
+			ret = -ENODEV;
+			goto err_larbdev_put;
+		}
+		data->larb_imu[id].dev = &plarbdev->dev;
 
 		if (!plarbdev->dev.driver) {
-			platform_device_put(plarbdev);
-			return -EPROBE_DEFER;
+			ret = -EPROBE_DEFER;
+			goto err_larbdev_put;
 		}
-		data->larb_imu[id].dev = &plarbdev->dev;
 
 		component_match_add(dev, match, component_compare_dev, &plarbdev->dev);
 		platform_device_put(plarbdev);
@@ -1123,6 +1127,15 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 		return -EINVAL;
 	}
 	return 0;
+
+err_larbdev_put:
+	/* id may be not linear mapping, loop whole the array */
+	for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) {
+		if (!data->larb_imu[i].dev)
+			continue;
+		put_device(data->larb_imu[i].dev);
+	}
+	return ret;
 }
 
 static int mtk_iommu_probe(struct platform_device *pdev)
-- 
GitLab


From ef693a8440926884bfd9cc3d6d36f65719513350 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <groeck@chromium.org>
Date: Tue, 18 Oct 2022 10:42:56 +0800
Subject: [PATCH 3347/4122] iommu/mediatek: Validate number of phandles
 associated with "mediatek,larbs"

Fix the smatch warnings:
drivers/iommu/mtk_iommu.c:878 mtk_iommu_mm_dts_parse() error: uninitialized
symbol 'larbnode'.

If someone abuse the dtsi node(Don't follow the definition of dt-binding),
for example "mediatek,larbs" is provided as boolean property, "larb_nr"
will be zero and cause abnormal.

To fix this problem and improve the code safety, add some checking
for the invalid input from dtsi, e.g. checking the larb_nr/larbid valid
range, and avoid "mediatek,larb-id" property conflicts in the smi-larb
nodes.

Fixes: d2e9a1102cfc ("iommu/mediatek: Contain MM IOMMU flow with the MM TYPE")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-5-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index e0c669b75271d..41e96da6160f3 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1062,6 +1062,8 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 	larb_nr = of_count_phandle_with_args(dev->of_node, "mediatek,larbs", NULL);
 	if (larb_nr < 0)
 		return larb_nr;
+	if (larb_nr == 0 || larb_nr > MTK_LARB_NR_MAX)
+		return -EINVAL;
 
 	for (i = 0; i < larb_nr; i++) {
 		u32 id;
@@ -1080,6 +1082,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 		ret = of_property_read_u32(larbnode, "mediatek,larb-id", &id);
 		if (ret)/* The id is consecutive if there is no this property */
 			id = i;
+		if (id >= MTK_LARB_NR_MAX) {
+			of_node_put(larbnode);
+			ret = -EINVAL;
+			goto err_larbdev_put;
+		}
 
 		plarbdev = of_find_device_by_node(larbnode);
 		of_node_put(larbnode);
@@ -1087,6 +1094,11 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 			ret = -ENODEV;
 			goto err_larbdev_put;
 		}
+		if (data->larb_imu[id].dev) {
+			platform_device_put(plarbdev);
+			ret = -EEXIST;
+			goto err_larbdev_put;
+		}
 		data->larb_imu[id].dev = &plarbdev->dev;
 
 		if (!plarbdev->dev.driver) {
-- 
GitLab


From 6cde583d5352818a51985b32a960cdde85ab3821 Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Tue, 18 Oct 2022 10:42:57 +0800
Subject: [PATCH 3348/4122] iommu/mediatek: Improve safety for mediatek,smi
 property in larb nodes

No functional change. Just improve safety from dts.

All the larbs that connect to one IOMMU must connect with the same
smi-common. This patch checks all the mediatek,smi property for each
larb, If their mediatek,smi are different, it will return fails.
Also avoid there is no available smi-larb nodes.

Suggested-by: Guenter Roeck <groeck@chromium.org>
Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-6-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 53 +++++++++++++++++++++++++++------------
 1 file changed, 37 insertions(+), 16 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 41e96da6160f3..4a6ee25a6f99a 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1054,7 +1054,7 @@ static const struct component_master_ops mtk_iommu_com_ops = {
 static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **match,
 				  struct mtk_iommu_data *data)
 {
-	struct device_node *larbnode, *smicomm_node, *smi_subcomm_node;
+	struct device_node *larbnode, *frst_avail_smicomm_node = NULL;
 	struct platform_device *plarbdev, *pcommdev;
 	struct device_link *link;
 	int i, larb_nr, ret;
@@ -1066,6 +1066,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 		return -EINVAL;
 
 	for (i = 0; i < larb_nr; i++) {
+		struct device_node *smicomm_node, *smi_subcomm_node;
 		u32 id;
 
 		larbnode = of_parse_phandle(dev->of_node, "mediatek,larbs", i);
@@ -1106,27 +1107,47 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 			goto err_larbdev_put;
 		}
 
+		/* Get smi-(sub)-common dev from the last larb. */
+		smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0);
+		if (!smi_subcomm_node) {
+			ret = -EINVAL;
+			goto err_larbdev_put;
+		}
+
+		/*
+		 * It may have two level smi-common. the node is smi-sub-common if it
+		 * has a new mediatek,smi property. otherwise it is smi-commmon.
+		 */
+		smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0);
+		if (smicomm_node)
+			of_node_put(smi_subcomm_node);
+		else
+			smicomm_node = smi_subcomm_node;
+
+		/*
+		 * All the larbs that connect to one IOMMU must connect with the same
+		 * smi-common.
+		 */
+		if (!frst_avail_smicomm_node) {
+			frst_avail_smicomm_node = smicomm_node;
+		} else if (frst_avail_smicomm_node != smicomm_node) {
+			dev_err(dev, "mediatek,smi property is not right @larb%d.", id);
+			of_node_put(smicomm_node);
+			ret = -EINVAL;
+			goto err_larbdev_put;
+		} else {
+			of_node_put(smicomm_node);
+		}
+
 		component_match_add(dev, match, component_compare_dev, &plarbdev->dev);
 		platform_device_put(plarbdev);
 	}
 
-	/* Get smi-(sub)-common dev from the last larb. */
-	smi_subcomm_node = of_parse_phandle(larbnode, "mediatek,smi", 0);
-	if (!smi_subcomm_node)
+	if (!frst_avail_smicomm_node)
 		return -EINVAL;
 
-	/*
-	 * It may have two level smi-common. the node is smi-sub-common if it
-	 * has a new mediatek,smi property. otherwise it is smi-commmon.
-	 */
-	smicomm_node = of_parse_phandle(smi_subcomm_node, "mediatek,smi", 0);
-	if (smicomm_node)
-		of_node_put(smi_subcomm_node);
-	else
-		smicomm_node = smi_subcomm_node;
-
-	pcommdev = of_find_device_by_node(smicomm_node);
-	of_node_put(smicomm_node);
+	pcommdev = of_find_device_by_node(frst_avail_smicomm_node);
+	of_node_put(frst_avail_smicomm_node);
 	if (!pcommdev)
 		return -ENODEV;
 	data->smicomm_dev = &pcommdev->dev;
-- 
GitLab


From 9ff894edd542618dad2fef538f8272c620a501db Mon Sep 17 00:00:00 2001
From: Yong Wu <yong.wu@mediatek.com>
Date: Tue, 18 Oct 2022 10:42:58 +0800
Subject: [PATCH 3349/4122] iommu/mediatek: Remove unused "mapping" member from
 mtk_iommu_data

Just remove a unused variable that only is for mtk_iommu_v1.

Signed-off-by: Yong Wu <yong.wu@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221018024258.19073-7-yong.wu@mediatek.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 4a6ee25a6f99a..9c1ce8b46a17b 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -229,10 +229,7 @@ struct mtk_iommu_data {
 	struct device			*smicomm_dev;
 
 	struct mtk_iommu_bank_data	*bank;
-
-	struct dma_iommu_mapping	*mapping; /* For mtk_iommu_v1.c */
 	struct regmap			*pericfg;
-
 	struct mutex			mutex; /* Protect m4u_group/m4u_dom above */
 
 	/*
-- 
GitLab


From 88699c024f9227b79af90adc929625e4b7867932 Mon Sep 17 00:00:00 2001
From: Michael Forney <mforney@mforney.org>
Date: Tue, 22 Nov 2022 14:18:25 -0800
Subject: [PATCH 3350/4122] iommu/amd: Fix typo in macro parameter name

IVRS_GET_SBDF_ID is only called with fn as the fourth parameter,
so this had no effect, but fixing the name will avoid bugs if that
ever changes.

Signed-off-by: Michael Forney <mforney@mforney.org>
Reviewed-by: Vasant Hegde <vasant.hegde@amd.com>
Link: https://lore.kernel.org/r/381fbc430c0ccdd78b3b696cfc0c32b233526ca5.1669159392.git.mforney@mforney.org
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 34029d1161073..467b194975b30 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -85,7 +85,7 @@
 
 #define LOOP_TIMEOUT	2000000
 
-#define IVRS_GET_SBDF_ID(seg, bus, dev, fd)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
+#define IVRS_GET_SBDF_ID(seg, bus, dev, fn)	(((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
 						 | ((dev & 0x1f) << 3) | (fn & 0x7))
 
 /*
-- 
GitLab


From ef5bb8e7a7127218f826b9ccdf7508e7a339f4c2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 28 Nov 2022 10:06:28 -0400
Subject: [PATCH 3351/4122] iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY

This driver treats IOMMU_DOMAIN_IDENTITY the same as UNMANAGED, which
cannot possibly be correct.

UNMANAGED domains are required to start out blocking all DMAs. This seems
to be what this driver does as it allocates a first level 'dt' for the IO
page table that is 0 filled.

Thus UNMANAGED looks like a working IO page table, and so IDENTITY must be
a mistake. Remove it.

Fixes: 4100b8c229b3 ("iommu: Add Allwinner H6 IOMMU driver")
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/0-v1-97f0adf27b5e+1f0-s50_identity_jgg@nvidia.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/sun50i-iommu.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 5cb2d44dfb92d..5b585eace3d46 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -672,7 +672,6 @@ static struct iommu_domain *sun50i_iommu_domain_alloc(unsigned type)
 	struct sun50i_iommu_domain *sun50i_domain;
 
 	if (type != IOMMU_DOMAIN_DMA &&
-	    type != IOMMU_DOMAIN_IDENTITY &&
 	    type != IOMMU_DOMAIN_UNMANAGED)
 		return NULL;
 
-- 
GitLab


From 00ef8885a945c37551547d8ac8361cacd20c4e42 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ribalda@chromium.org>
Date: Mon, 28 Nov 2022 23:16:48 +0100
Subject: [PATCH 3352/4122] iommu/mediatek: Fix crash on isr after kexec()

If the system is rebooted via isr(), the IRQ handler might
be triggered before the domain is initialized. Resulting on
an invalid memory access error.

Fix:
[    0.500930] Unable to handle kernel read from unreadable memory at virtual address 0000000000000070
[    0.501166] Call trace:
[    0.501174]  report_iommu_fault+0x28/0xfc
[    0.501180]  mtk_iommu_isr+0x10c/0x1c0

Signed-off-by: Ricardo Ribalda <ribalda@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Link: https://lore.kernel.org/r/20221125-mtk-iommu-v2-0-e168dff7d43e@chromium.org
[ joro: Fixed spelling in commit message ]
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 9c1ce8b46a17b..392b8c167c44c 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -462,7 +462,7 @@ static irqreturn_t mtk_iommu_isr(int irq, void *dev_id)
 		fault_larb = data->plat_data->larbid_remap[fault_larb][sub_comm];
 	}
 
-	if (report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova,
+	if (!dom || report_iommu_fault(&dom->domain, bank->parent_dev, fault_iova,
 			       write ? IOMMU_FAULT_WRITE : IOMMU_FAULT_READ)) {
 		dev_err_ratelimited(
 			bank->parent_dev,
-- 
GitLab


From 58ff6569bc6ec369482eb2d132868870380be64c Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Mon, 5 Dec 2022 12:05:51 +0000
Subject: [PATCH 3353/4122] KVM: arm64: PMU: Fix period computation for 64bit
 counters with 32bit overflow

Fix the bogus masking when computing the period of a 64bit counter
with 32bit overflow. It really should be treated like a 32bit counter
for the purpose of the period.

Reported-by: Ricardo Koller <ricarkol@google.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/Y4jbosgHbUDI0WF4@google.com
---
 arch/arm64/kvm/pmu-emul.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index d8ea399430861..24908400e1906 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter)
 {
 	u64 val;
 
-	if (kvm_pmc_is_64bit(pmc)) {
-		if (!kvm_pmc_has_64bit_overflow(pmc))
-			val = -(counter & GENMASK(31, 0));
-		else
-			val = (-counter) & GENMASK(63, 0);
-	} else {
+	if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc))
+		val = (-counter) & GENMASK(63, 0);
+	else
 		val = (-counter) & GENMASK(31, 0);
-	}
 
 	return val;
 }
-- 
GitLab


From bcc5e2dcf09089b337b76fc1a589f6ff95ca19ac Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 23 Nov 2022 09:43:14 +0300
Subject: [PATCH 3354/4122] staging: rtl8192u: Fix use after free in
 ieee80211_rx()

We cannot dereference the "skb" pointer after calling
ieee80211_monitor_rx(), because it is a use after free.

Fixes: 8fc8598e61f6 ("Staging: Added Realtek rtl8192u driver to staging")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Link: https://lore.kernel.org/r/Y33BArx3k/aw6yv/@kili
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
index f142d09869901..5c73e3f8541ae 100644
--- a/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
+++ b/drivers/staging/rtl8192u/ieee80211/ieee80211_rx.c
@@ -951,9 +951,11 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb,
 #endif
 
 	if (ieee->iw_mode == IW_MODE_MONITOR) {
+		unsigned int len = skb->len;
+
 		ieee80211_monitor_rx(ieee, skb, rx_stats);
 		stats->rx_packets++;
-		stats->rx_bytes += skb->len;
+		stats->rx_bytes += len;
 		return 1;
 	}
 
-- 
GitLab


From 24a525a671ef1e410926da010ef2a3a1b1b96481 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Tue, 22 Nov 2022 23:45:57 +0100
Subject: [PATCH 3355/4122] staging: rtl8192e: Remove unchanged variable
 AcmMethod

AcmMethod is initialized and never changed. The evaluation will always
have the same result. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/7d73a66184e13d5f8d4af7d21564032247a7e923.1669156825.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 --
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   | 2 --
 drivers/staging/rtl8192e/rtl819x_Qos.h         | 7 -------
 4 files changed, 12 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index b9c846015d286..80eba5d5ab37b 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -224,8 +224,6 @@ void rtl92e_set_reg(struct net_device *dev, u8 variable, u8 *val)
 		u8 acm = pAciAifsn->f.acm;
 		u8 AcmCtrl = rtl92e_readb(dev, AcmHwCtrl);
 
-		AcmCtrl = AcmCtrl | ((priv->AcmMethod == 2) ? 0x0 : 0x1);
-
 		if (acm) {
 			switch (eACI) {
 			case AC0_BE:
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 399ee9783f994..a8cbeb9545da3 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -830,7 +830,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	struct r8192_priv *priv = rtllib_priv(dev);
 	u8 i;
 
-	priv->AcmMethod = eAcmWay2_SW;
 	priv->dot11_current_preamble_mode = PREAMBLE_AUTO;
 	priv->rtllib->status = 0;
 	priv->polling_timer_on = 0;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index b1656d4ecbad6..54c7a21280534 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -340,8 +340,6 @@ struct r8192_priv {
 	struct bb_reg_definition PHYRegDef[4];
 	struct rate_adaptive rate_adaptive;
 
-	enum acm_method AcmMethod;
-
 	struct rt_firmware			*pFirmware;
 	enum rtl819x_loopback LoopbackMode;
 
diff --git a/drivers/staging/rtl8192e/rtl819x_Qos.h b/drivers/staging/rtl8192e/rtl819x_Qos.h
index 5073f9f40fdc2..c010eb0d60366 100644
--- a/drivers/staging/rtl8192e/rtl819x_Qos.h
+++ b/drivers/staging/rtl8192e/rtl819x_Qos.h
@@ -97,13 +97,6 @@ enum direction_value {
 	DIR_BI_DIR		= 3,
 };
 
-enum acm_method {
-	eAcmWay0_SwAndHw		= 0,
-	eAcmWay1_HW			= 1,
-	eAcmWay2_SW			= 2,
-};
-
-
 struct acm {
 	u64		UsedTime;
 	u64		MediumTime;
-- 
GitLab


From d694a05fda8797521d51ceba2e7942751391626a Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Tue, 22 Nov 2022 23:46:08 +0100
Subject: [PATCH 3356/4122] staging: rtl8192e: Remove unused variable skb_aggQ

skb_aggQ is initialized, never used and purged. Remove resulting dead
code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/3197be3cb412eea1c662a5bec1b1afda2cee675d.1669156825.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c | 2 --
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   | 2 --
 drivers/staging/rtl8192e/rtllib.h              | 1 -
 3 files changed, 5 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
index 80eba5d5ab37b..f02e67f68e237 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
+++ b/drivers/staging/rtl8192e/rtl8192e/r8192E_dev.c
@@ -1946,8 +1946,6 @@ void rtl92e_stop_adapter(struct net_device *dev, bool reset)
 
 	for (i = 0; i < MAX_QUEUE_SIZE; i++)
 		skb_queue_purge(&priv->rtllib->skb_waitQ[i]);
-	for (i = 0; i < MAX_QUEUE_SIZE; i++)
-		skb_queue_purge(&priv->rtllib->skb_aggQ[i]);
 
 	skb_queue_purge(&priv->skb_queue);
 }
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index a8cbeb9545da3..369a59a753fe8 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -903,8 +903,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 
 	for (i = 0; i < MAX_QUEUE_SIZE; i++)
 		skb_queue_head_init(&priv->rtllib->skb_waitQ[i]);
-	for (i = 0; i < MAX_QUEUE_SIZE; i++)
-		skb_queue_head_init(&priv->rtllib->skb_aggQ[i]);
 }
 
 static void _rtl92e_init_priv_lock(struct r8192_priv *priv)
diff --git a/drivers/staging/rtl8192e/rtllib.h b/drivers/staging/rtl8192e/rtllib.h
index 7119c9c5e1fef..1152fbf433837 100644
--- a/drivers/staging/rtl8192e/rtllib.h
+++ b/drivers/staging/rtl8192e/rtllib.h
@@ -1624,7 +1624,6 @@ struct rtllib_device {
 	int mgmt_queue_tail;
 	u8 AsocRetryCount;
 	struct sk_buff_head skb_waitQ[MAX_QUEUE_SIZE];
-	struct sk_buff_head  skb_aggQ[MAX_QUEUE_SIZE];
 
 	bool	bdynamic_txpower_enable;
 
-- 
GitLab


From efc6f7ce3adf3331298a4bfe2fa9d3912e660261 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Tue, 22 Nov 2022 23:46:15 +0100
Subject: [PATCH 3357/4122] staging: rtl8192e: Remove unused variable
 initialized_at_probe

initialized_at_probe is initialized and never used. Remove resulting dead
code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/4855b2dd5b7296b0eb10e697f605fb820e1dfc7b.1669156825.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c | 1 -
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 369a59a753fe8..7b78bdfadc8da 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -836,7 +836,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->up_first_time = 1;
 	priv->blinked_ingpio = false;
 	priv->being_init_adapter = false;
-	priv->initialized_at_probe = false;
 	priv->bdisable_nic = false;
 	priv->bfirst_init = false;
 	priv->txringcount = 64;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 54c7a21280534..1c00ad709a3c2 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -309,7 +309,6 @@ struct r8192_priv {
 
 	bool		bfirst_init;
 	bool		bfirst_after_down;
-	bool		initialized_at_probe;
 	bool		being_init_adapter;
 
 	int		irq;
-- 
GitLab


From aae3567db0e9e4be26c8c51c441d8bc43d613116 Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Tue, 22 Nov 2022 23:46:21 +0100
Subject: [PATCH 3358/4122] staging: rtl8192e: Remove unused variable
 ChannelAccessSetting

ChannelAccessSetting is never used. Remove resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/b88a31954532f47a4caf9abfcad8e20b32a618cc.1669156825.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index 1c00ad709a3c2..ca2e685148d4d 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -234,15 +234,6 @@ struct rt_stats {
 	u32	CurrentShowTxate;
 };
 
-struct channel_access_setting {
-	u16 SIFS_Timer;
-	u16 DIFS_Timer;
-	u16 SlotTimeTimer;
-	u16 EIFS_Timer;
-	u16 CWminIndex;
-	u16 CWmaxIndex;
-};
-
 struct init_gain {
 	u8	xaagccore1;
 	u8	xbagccore1;
@@ -321,9 +312,6 @@ struct r8192_priv {
 	struct delayed_work		txpower_tracking_wq;
 	struct delayed_work		rfpath_check_wq;
 	struct delayed_work		gpio_change_rf_wq;
-
-	struct channel_access_setting ChannelAccessSetting;
-
 	struct rtl819x_ops			*ops;
 	struct rtllib_device			*rtllib;
 
-- 
GitLab


From e0005909e5e98f230397064de391371ba6a66fba Mon Sep 17 00:00:00 2001
From: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Date: Tue, 22 Nov 2022 23:46:29 +0100
Subject: [PATCH 3359/4122] staging: rtl8192e: Remove unused variable int_log

int_log is initialized and incremented but never evaluated. Remove
resulting dead code.

Signed-off-by: Philipp Hortmann <philipp.g.hortmann@gmail.com>
Link: https://lore.kernel.org/r/e2edcde052cc3c47e6e6b94d09e460b8cf6a49a8.1669156825.git.philipp.g.hortmann@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtl8192e/r8190P_def.h | 15 ---------------
 drivers/staging/rtl8192e/rtl8192e/rtl_core.c   |  3 ---
 drivers/staging/rtl8192e/rtl8192e/rtl_core.h   |  2 --
 3 files changed, 20 deletions(-)

diff --git a/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h b/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h
index 53fd79a281892..ac192254a4bb3 100644
--- a/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h
+++ b/drivers/staging/rtl8192e/rtl8192e/r8190P_def.h
@@ -154,21 +154,6 @@ struct tx_fwinfo_8190pci {
 
 };
 
-struct log_int_8190 {
-	u32	nIMR_COMDOK;
-	u32	nIMR_MGNTDOK;
-	u32	nIMR_HIGH;
-	u32	nIMR_VODOK;
-	u32	nIMR_VIDOK;
-	u32	nIMR_BEDOK;
-	u32	nIMR_BKDOK;
-	u32	nIMR_ROK;
-	u32	nIMR_RCOK;
-	u32	nIMR_TBDOK;
-	u32	nIMR_BDOK;
-	u32	nIMR_RXFOVW;
-};
-
 struct phy_ofdm_rx_status_rxsc_sgien_exintfflag {
 	u8			reserved:4;
 	u8			rxsc:2;
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
index 7b78bdfadc8da..f8fbe78ccad9f 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.c
@@ -866,8 +866,6 @@ static void _rtl92e_init_priv_variable(struct net_device *dev)
 	priv->rst_progress = RESET_TYPE_NORESET;
 	priv->force_reset = false;
 	memset(priv->rtllib->swcamtable, 0, sizeof(struct sw_cam_table) * 32);
-
-	memset(&priv->int_log, 0, sizeof(struct log_int_8190));
 	priv->rx_ctr = 0;
 	priv->rtllib->wx_set_enc = 0;
 	priv->hw_radio_off = false;
@@ -2185,7 +2183,6 @@ static irqreturn_t _rtl92e_irq(int irq, void *netdev)
 
 	if (inta & IMR_ROK) {
 		priv->stats.rxint++;
-		priv->int_log.nIMR_ROK++;
 		tasklet_schedule(&priv->irq_rx_tasklet);
 	}
 
diff --git a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
index ca2e685148d4d..cceb77492363a 100644
--- a/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
+++ b/drivers/staging/rtl8192e/rtl8192e/rtl_core.h
@@ -317,8 +317,6 @@ struct r8192_priv {
 
 	struct work_struct				reset_wq;
 
-	struct log_int_8190 int_log;
-
 	enum rt_customer_id CustomerID;
 
 
-- 
GitLab


From d30f4436f364b4ad915ca2c09be07cd0f93ceb44 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 23 Nov 2022 16:12:53 +0800
Subject: [PATCH 3360/4122] staging: rtl8192e: Fix potential use-after-free in
 rtllib_rx_Monitor()

The skb is delivered to netif_rx() in rtllib_monitor_rx(), which may free it,
after calling this, dereferencing skb may trigger use-after-free.
Found by Smatch.

Fixes: 94a799425eee ("From: wlanfae <wlanfae@realtek.com> [PATCH 1/8] rtl8192e: Import new version of driver from realtek")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Link: https://lore.kernel.org/r/20221123081253.22296-1-yuehaibing@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index 6a0f5bbb99ef4..f8965afab7678 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -1489,9 +1489,9 @@ static int rtllib_rx_Monitor(struct rtllib_device *ieee, struct sk_buff *skb,
 		hdrlen += 4;
 	}
 
-	rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen);
 	ieee->stats.rx_packets++;
 	ieee->stats.rx_bytes += skb->len;
+	rtllib_monitor_rx(ieee, skb, rx_stats, hdrlen);
 
 	return 1;
 }
-- 
GitLab


From d43ea3d4d2efd015ae7999168a58796671df4138 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 16:42:49 +0100
Subject: [PATCH 3361/4122] staging: r8188eu: drop return value from
 issue_probereq_ex

The only caller of issue_probereq_ex does not check the return value. We
can remove it and make issue_probereq_ex a void function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126154253.178275-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 11 ++---------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  4 ++--
 2 files changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 161cb67f78828..2f3f7da08d0c7 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -4482,8 +4482,8 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps
 	_issue_probereq(padapter, pssid, da, false);
 }
 
-int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da,
-	int try_cnt, int wait_ms)
+void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da,
+		       int try_cnt, int wait_ms)
 {
 	int ret;
 	int i = 0;
@@ -4497,13 +4497,6 @@ int issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 			msleep(wait_ms);
 
 	} while ((i < try_cnt) && ((ret == _FAIL) || (wait_ms == 0)));
-
-	if (ret != _FAIL) {
-		ret = _SUCCESS;
-		goto exit;
-	}
-exit:
-	return ret;
 }
 
 /*  if psta == NULL, indicate we are station (client) now... */
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index c46fc1a530859..720610bc8fefc 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -512,8 +512,8 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta,
 		unsigned short status);
 void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 		    u8 *da);
-s32 issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid,
-		      u8 *da, int try_cnt, int wait_ms);
+void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid,
+		       u8 *da, int try_cnt, int wait_ms);
 int issue_nulldata(struct adapter *padapter, unsigned char *da,
 		   unsigned int power_mode, int try_cnt, int wait_ms);
 int issue_qos_nulldata(struct adapter *padapter, unsigned char *da,
-- 
GitLab


From e0e14bbee1811209435d3ba29008769bd7a45057 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 16:42:50 +0100
Subject: [PATCH 3362/4122] staging: r8188eu: remove wait_ms parameter

Remove the wait_ms parameter from function issue_probereq_ex. There's only
one caller, who requests a waiting time of 1 ms.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126154253.178275-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 12 ++++++------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 2f3f7da08d0c7..990336244030d 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -4483,20 +4483,20 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps
 }
 
 void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da,
-		       int try_cnt, int wait_ms)
+		       int try_cnt)
 {
 	int ret;
 	int i = 0;
 
 	do {
-		ret = _issue_probereq(padapter, pssid, da, wait_ms > 0);
+		ret = _issue_probereq(padapter, pssid, da, true);
 
 		i++;
 
-		if (i < try_cnt && wait_ms > 0 && ret == _FAIL)
-			msleep(wait_ms);
+		if (i < try_cnt && ret == _FAIL)
+			msleep(1);
 
-	} while ((i < try_cnt) && ((ret == _FAIL) || (wait_ms == 0)));
+	} while ((i < try_cnt) && (ret == _FAIL));
 }
 
 /*  if psta == NULL, indicate we are station (client) now... */
@@ -7046,7 +7046,7 @@ void linked_status_chk(struct adapter *padapter)
 				}
 
 				if (rx_chk != _SUCCESS)
-					issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3, 1);
+					issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3);
 
 				if ((tx_chk != _SUCCESS && pmlmeinfo->link_count++ == 0xf) || rx_chk != _SUCCESS) {
 					tx_chk = issue_nulldata(padapter, psta->hwaddr, 0, 3, 1);
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 720610bc8fefc..04fd673d3a04c 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -513,7 +513,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta,
 void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 		    u8 *da);
 void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid,
-		       u8 *da, int try_cnt, int wait_ms);
+		       u8 *da, int try_cnt);
 int issue_nulldata(struct adapter *padapter, unsigned char *da,
 		   unsigned int power_mode, int try_cnt, int wait_ms);
 int issue_qos_nulldata(struct adapter *padapter, unsigned char *da,
-- 
GitLab


From 77833c305dc897c094e5c5419c27568509346609 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 16:42:51 +0100
Subject: [PATCH 3363/4122] staging: r8188eu: fix the number of probereq
 retries

issue_probereq_ex sends a probe request and retries if this fails. There's
no point in making the number of retries configurable. Hard-code the value
that's used by issue_probereq_ex's only caller.

Simplify the code to check the loop condition only once.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126154253.178275-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   | 21 +++++++------------
 .../staging/r8188eu/include/rtw_mlme_ext.h    |  3 +--
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 990336244030d..667f54e313eff 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -4482,21 +4482,16 @@ inline void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *ps
 	_issue_probereq(padapter, pssid, da, false);
 }
 
-void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da,
-		       int try_cnt)
+void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da)
 {
-	int ret;
-	int i = 0;
-
-	do {
-		ret = _issue_probereq(padapter, pssid, da, true);
-
-		i++;
+	int i;
 
-		if (i < try_cnt && ret == _FAIL)
+	for (i = 0; i < 3; i++) {
+		if (_issue_probereq(padapter, pssid, da, true) == _FAIL)
 			msleep(1);
-
-	} while ((i < try_cnt) && (ret == _FAIL));
+		else
+			break;
+	}
 }
 
 /*  if psta == NULL, indicate we are station (client) now... */
@@ -7046,7 +7041,7 @@ void linked_status_chk(struct adapter *padapter)
 				}
 
 				if (rx_chk != _SUCCESS)
-					issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr, 3);
+					issue_probereq_ex(padapter, &pmlmeinfo->network.Ssid, psta->hwaddr);
 
 				if ((tx_chk != _SUCCESS && pmlmeinfo->link_count++ == 0xf) || rx_chk != _SUCCESS) {
 					tx_chk = issue_nulldata(padapter, psta->hwaddr, 0, 3, 1);
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 04fd673d3a04c..089bd54467733 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -512,8 +512,7 @@ void issue_auth(struct adapter *padapter, struct sta_info *psta,
 		unsigned short status);
 void issue_probereq(struct adapter *padapter, struct ndis_802_11_ssid *pssid,
 		    u8 *da);
-void issue_probereq_ex(struct adapter *adapter, struct ndis_802_11_ssid *pssid,
-		       u8 *da, int try_cnt);
+void issue_probereq_ex(struct adapter *padapter, struct ndis_802_11_ssid *pssid, u8 *da);
 int issue_nulldata(struct adapter *padapter, unsigned char *da,
 		   unsigned int power_mode, int try_cnt, int wait_ms);
 int issue_qos_nulldata(struct adapter *padapter, unsigned char *da,
-- 
GitLab


From 92f1bb5ff104d7f36754f27c17483d4e8ffe85ae Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 16:42:52 +0100
Subject: [PATCH 3364/4122] staging: r8188eu: simplify the checks for zero
 address

In function ap2sta_data_frame, we can use is_zero_ether_addr to check for
all-zero ethernet addresses. Both pattrib->bssid and mybssid are 16-bit
aligned.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126154253.178275-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_recv.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_recv.c b/drivers/staging/r8188eu/core/rtw_recv.c
index 5b0a66aebff19..631c500dda427 100644
--- a/drivers/staging/r8188eu/core/rtw_recv.c
+++ b/drivers/staging/r8188eu/core/rtw_recv.c
@@ -779,9 +779,8 @@ static int ap2sta_data_frame(
 		}
 
 		/*  check BSSID */
-		if (!memcmp(pattrib->bssid, "\x0\x0\x0\x0\x0\x0", ETH_ALEN) ||
-		    !memcmp(mybssid, "\x0\x0\x0\x0\x0\x0", ETH_ALEN) ||
-		     (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) {
+		if (is_zero_ether_addr(pattrib->bssid) || is_zero_ether_addr(mybssid) ||
+		    (memcmp(pattrib->bssid, mybssid, ETH_ALEN))) {
 			if (!bmcast)
 				issue_deauth(adapter, pattrib->bssid, WLAN_REASON_CLASS3_FRAME_FROM_NONASSOC_STA);
 
-- 
GitLab


From 57a8f00f03f1fa493fadd20992e700d24f6860cb Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 16:42:53 +0100
Subject: [PATCH 3365/4122] staging: r8188eu: use ieee80211 helper for
 protected bit

Use ieee80211_has_protected to check if the "protected" bit is set. Remove
the r8188eu driver's internal macro for this check.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126154253.178275-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 drivers/staging/r8188eu/include/wifi.h      | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 667f54e313eff..93696892ec7dc 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -809,7 +809,7 @@ static void OnAuthClient(struct adapter *padapter, struct recv_frame *precv_fram
 	if (!(pmlmeinfo->state & WIFI_FW_AUTH_STATE))
 		return;
 
-	offset = (GetPrivacy(pframe)) ? 4 : 0;
+	offset = ieee80211_has_protected(hdr->frame_control) ? 4 : 0;
 
 	seq	= le16_to_cpu(*(__le16 *)((size_t)pframe + WLAN_HDR_A3_LEN + offset + 2));
 	status	= le16_to_cpu(*(__le16 *)((size_t)pframe + WLAN_HDR_A3_LEN + offset + 4));
diff --git a/drivers/staging/r8188eu/include/wifi.h b/drivers/staging/r8188eu/include/wifi.h
index 2381c519ceafe..254a4bc1a1419 100644
--- a/drivers/staging/r8188eu/include/wifi.h
+++ b/drivers/staging/r8188eu/include/wifi.h
@@ -170,9 +170,6 @@ enum WIFI_REG_DOMAIN {
 #define SetPrivacy(pbuf)	\
 	*(__le16 *)(pbuf) |= cpu_to_le16(_PRIVACY_)
 
-#define GetPrivacy(pbuf)					\
-	(((*(__le16 *)(pbuf)) & cpu_to_le16(_PRIVACY_)) != 0)
-
 #define GetFrameType(pbuf)				\
 	(le16_to_cpu(*(__le16 *)(pbuf)) & (BIT(3) | BIT(2)))
 
-- 
GitLab


From d664761116ea12a2417b268af2fd936fe571f612 Mon Sep 17 00:00:00 2001
From: Jack Schofield <schofija@oregonstate.edu>
Date: Wed, 30 Nov 2022 21:38:50 -0800
Subject: [PATCH 3366/4122] staging: greybus: loopback_test: Add blank line
 after declaration.

Declarations are to be followed by a blank line. Add to follow Linux
Kernel coding style. Issue reported by checkpatch.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jack Schofield <schofija@oregonstate.edu>
Link: https://lore.kernel.org/r/048967b5c3b157e19ccf4608f71da8418fc31877.1669872193.git.schofija@oregonstate.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/greybus/tools/loopback_test.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index 4c42e393cd3db..242138c4f6cf7 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -675,6 +675,7 @@ err:
 static int close_poll_files(struct loopback_test *t)
 {
 	int i;
+
 	for (i = 0; i < t->poll_count; i++)
 		close(t->fds[i].fd);
 
-- 
GitLab


From 2206c10685ab92a74ead37c30a7f817125ccd723 Mon Sep 17 00:00:00 2001
From: Jack Schofield <schofija@oregonstate.edu>
Date: Wed, 30 Nov 2022 21:38:51 -0800
Subject: [PATCH 3367/4122] staging: greybus: loopback_test: Remove void
 function return statement

Void function return statement is not useful in this case. Issue
reported by checkpatch.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jack Schofield <schofija@oregonstate.edu>
Link: https://lore.kernel.org/r/5d465d56a6e3de2cb9d2f34377cbb82678cc67b1.1669872193.git.schofija@oregonstate.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/greybus/tools/loopback_test.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index 242138c4f6cf7..7c1697304ab4a 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -862,7 +862,6 @@ void loopback_run(struct loopback_test *t)
 
 err:
 	printf("Error running test\n");
-	return;
 }
 
 static int sanity_check(struct loopback_test *t)
-- 
GitLab


From 2cb3ecf1c5fa4850073d48fb4a759fc650f08736 Mon Sep 17 00:00:00 2001
From: Jack Schofield <schofija@oregonstate.edu>
Date: Wed, 30 Nov 2022 21:38:52 -0800
Subject: [PATCH 3368/4122] staging: greybus: loopback_test: Remove extra blank
 lines

Clean up extra uses of blank lines that do not follow the kernel coding
style. Issue reported by checkpatch.

Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jack Schofield <schofija@oregonstate.edu>
Link: https://lore.kernel.org/r/51edbcc7583d64d3fe5a203d3a8649a695ef4a75.1669872193.git.schofija@oregonstate.edu
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/greybus/tools/loopback_test.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/staging/greybus/tools/loopback_test.c b/drivers/staging/greybus/tools/loopback_test.c
index 7c1697304ab4a..d7ad51ff60c55 100644
--- a/drivers/staging/greybus/tools/loopback_test.c
+++ b/drivers/staging/greybus/tools/loopback_test.c
@@ -239,7 +239,6 @@ static void show_loopback_devices(struct loopback_test *t)
 
 	for (i = 0; i < t->device_count; i++)
 		printf("device[%d] = %s\n", i, t->devices[i].name);
-
 }
 
 int open_sysfs(const char *sys_pfx, const char *node, int flags)
@@ -274,7 +273,6 @@ float read_sysfs_float_fd(int fd, const char *sys_pfx, const char *node)
 	char buf[SYSFS_MAX_INT];
 
 	if (read(fd, buf, sizeof(buf)) < 0) {
-
 		fprintf(stderr, "unable to read from %s%s %s\n", sys_pfx, node,
 			strerror(errno));
 		close(fd);
@@ -367,7 +365,6 @@ static int get_results(struct loopback_test *t)
 			r->apbridge_unipro_latency_max - r->apbridge_unipro_latency_min;
 		r->gbphy_firmware_latency_jitter =
 			r->gbphy_firmware_latency_max - r->gbphy_firmware_latency_min;
-
 	}
 
 	/*calculate the aggregate results of all enabled devices */
@@ -407,7 +404,6 @@ static int get_results(struct loopback_test *t)
 			r->apbridge_unipro_latency_max - r->apbridge_unipro_latency_min;
 		r->gbphy_firmware_latency_jitter =
 			r->gbphy_firmware_latency_max - r->gbphy_firmware_latency_min;
-
 	}
 
 	return 0;
@@ -536,7 +532,6 @@ static int log_results(struct loopback_test *t)
 			fprintf(stderr, "unable to open %s for appending\n", file_name);
 			abort();
 		}
-
 	}
 	for (i = 0; i < t->device_count; i++) {
 		if (!device_enabled(t, i))
@@ -550,10 +545,8 @@ static int log_results(struct loopback_test *t)
 			if (ret == -1)
 				fprintf(stderr, "unable to write %d bytes to csv.\n", len);
 		}
-
 	}
 
-
 	if (t->aggregate_output) {
 		len = format_output(t, &t->aggregate_results, "aggregate",
 				    data, sizeof(data), &tm);
@@ -741,7 +734,6 @@ static int wait_for_complete(struct loopback_test *t)
 		ts = &t->poll_timeout;
 
 	while (1) {
-
 		ret = ppoll(t->fds, t->poll_count, ts, &mask_old);
 		if (ret <= 0) {
 			stop_tests(t);
@@ -781,7 +773,6 @@ static void prepare_devices(struct loopback_test *t)
 		if (t->stop_all || device_enabled(t, i))
 			write_sysfs_val(t->devices[i].sysfs_entry, "type", 0);
 
-
 	for (i = 0; i < t->device_count; i++) {
 		if (!device_enabled(t, i))
 			continue;
@@ -824,7 +815,6 @@ static int start(struct loopback_test *t)
 	return 0;
 }
 
-
 void loopback_run(struct loopback_test *t)
 {
 	int i;
@@ -853,7 +843,6 @@ void loopback_run(struct loopback_test *t)
 	if (ret)
 		goto err;
 
-
 	get_results(t);
 
 	log_results(t);
@@ -881,10 +870,8 @@ static int sanity_check(struct loopback_test *t)
 			fprintf(stderr, "Bad device mask %x\n", (1 << i));
 			return -1;
 		}
-
 	}
 
-
 	return 0;
 }
 
-- 
GitLab


From 64a4ade699e70b2b700cf6088916827b73988d52 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:20 +0100
Subject: [PATCH 3369/4122] staging: r8188eu: replace one GetAddr3Ptr call

Define a struct ieee80211_mgmt in the OnBeacon function. Use it to replace
one GetAddr3Ptr call. This is a tiny step towards making GetAddr3Ptr
obsolete.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 93696892ec7dc..80342b53a129f 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -560,6 +560,7 @@ static void OnProbeRsp(struct adapter *padapter, struct recv_frame *precv_frame)
 
 static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	int cam_idx;
 	struct sta_info	*psta;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
@@ -576,7 +577,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 		return;
 	}
 
-	if (!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) {
+	if (!memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) {
 		if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) {
 			/* we should update current network before auth, or some IE is wrong */
 			pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC);
-- 
GitLab


From 84d55656bf4486eee77b783b88adebad0cf5d649 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:21 +0100
Subject: [PATCH 3370/4122] staging: r8188eu: read timestamp from
 ieee80211_mgmt

Read the 64-bit timestamp from struct ieee80211_mgmt instead of parsing
the beacon message ourselves.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 21 ++-------------------
 1 file changed, 2 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 80342b53a129f..66e3a90da3a37 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -389,21 +389,6 @@ static u32 p2p_listen_state_process(struct adapter *padapter, unsigned char *da)
 	return _SUCCESS;
 }
 
-static void update_TSF(struct mlme_ext_priv *pmlmeext, u8 *pframe)
-{
-	u8 *pIE;
-	__le32 *pbuf;
-
-	pIE = pframe + sizeof(struct ieee80211_hdr_3addr);
-	pbuf = (__le32 *)pIE;
-
-	pmlmeext->TSFValue = le32_to_cpu(*(pbuf + 1));
-
-	pmlmeext->TSFValue = pmlmeext->TSFValue << 32;
-
-	pmlmeext->TSFValue |= le32_to_cpu(*pbuf);
-}
-
 static void correct_TSF(struct adapter *padapter)
 {
 	u8 reg;
@@ -592,8 +577,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 			/* check the vendor of the assoc AP */
 			pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr));
 
-			/* update TSF Value */
-			update_TSF(pmlmeext, pframe);
+			pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 			/* start auth */
 			start_clnt_auth(padapter);
@@ -635,8 +619,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 					return;
 				}
 
-				/* update TSF Value */
-				update_TSF(pmlmeext, pframe);
+				pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
 
 				/* report sta add event */
 				report_add_sta_event(padapter, GetAddr2Ptr(pframe), cam_idx);
-- 
GitLab


From 0e73b1276a51eeb649c9676e41c9e4208c8b40af Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:22 +0100
Subject: [PATCH 3371/4122] staging: r8188eu: replace GetAddr2Ptr calls

Get the source address of a beacon frame from our struct ieee80211_mgmt
instead of using the driver's internal GetAddr2Ptr helper.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 66e3a90da3a37..5a31b20dc46d9 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -586,7 +586,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 		}
 
 		if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) {
-			psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe));
+			psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 			if (psta) {
 				ret = rtw_check_bcn_info(padapter, pframe, len);
 				if (!ret) {
@@ -601,7 +601,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 				process_p2p_ps_ie(padapter, (pframe + WLAN_HDR_A3_LEN), (len - WLAN_HDR_A3_LEN));
 			}
 		} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
-			psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe));
+			psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 			if (psta) {
 				/* update WMM, ERP in the beacon */
 				/* todo: the timer is used instead of the number of the beacon received */
@@ -621,8 +621,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 
 				pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
 
-				/* report sta add event */
-				report_add_sta_event(padapter, GetAddr2Ptr(pframe), cam_idx);
+				report_add_sta_event(padapter, mgmt->sa, cam_idx);
 			}
 		}
 	}
-- 
GitLab


From 1ed513f3e0ff13f2a84e3479b7774f213c7d7066 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:23 +0100
Subject: [PATCH 3372/4122] staging: r8188eu: pass only ies to
 process_p2p_ps_ie

The process_p2p_ps_ie function parses the information elements of a beacon
message and extracts p2p-related info.

process_p2p_ps_ie does not receive a pointer to the information elements
as one would expect. Instead it receives a pointer to the timestamp field
in the beacon message. process_p2p_ps_ie increments this pointer by
_BEACON_IE_OFFSET_ to jump to the start of the information elements (and
decreases the buffer length accordingly).

This is clumsy and hard to understand. Rewrite this such that
process_p2p_ps_ie takes a pointer to the information elements and the
total length of all elements. Check up-front that the total length is
not negative.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c |  9 ++++++++-
 drivers/staging/r8188eu/core/rtw_p2p.c      | 11 ++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 5a31b20dc46d9..07c57a2b61b9f 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -556,6 +556,13 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 	uint len = precv_frame->len;
 	struct wlan_bssid_ex *pbss;
 	int ret = _SUCCESS;
+	u8 *ie_ptr;
+	u32 ie_len;
+
+	ie_ptr = (u8 *)&mgmt->u.beacon.variable;
+	if (precv_frame->len < offsetof(struct ieee80211_mgmt, u.beacon.variable))
+		return;
+	ie_len = precv_frame->len - offsetof(struct ieee80211_mgmt, u.beacon.variable);
 
 	if (pmlmeext->sitesurvey_res.state == SCAN_PROCESS) {
 		report_survey_event(padapter, precv_frame);
@@ -598,7 +605,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 				/* todo: the timer is used instead of the number of the beacon received */
 				if ((sta_rx_pkts(psta) & 0xf) == 0)
 					update_beacon_info(padapter, pframe, len, psta);
-				process_p2p_ps_ie(padapter, (pframe + WLAN_HDR_A3_LEN), (len - WLAN_HDR_A3_LEN));
+				process_p2p_ps_ie(padapter, ie_ptr, ie_len);
 			}
 		} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
 			psta = rtw_get_stainfo(pstapriv, mgmt->sa);
diff --git a/drivers/staging/r8188eu/core/rtw_p2p.c b/drivers/staging/r8188eu/core/rtw_p2p.c
index dc159e58f4288..ce05458bd1adc 100644
--- a/drivers/staging/r8188eu/core/rtw_p2p.c
+++ b/drivers/staging/r8188eu/core/rtw_p2p.c
@@ -1505,8 +1505,6 @@ void p2p_protocol_wk_hdl(struct adapter *padapter, int intCmdType)
 
 void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength)
 {
-	u8 *ies;
-	u32 ies_len;
 	u8 *p2p_ie;
 	u32	p2p_ielen = 0;
 	u8	noa_attr[MAX_P2P_IE_LEN] = { 0x00 };/*  NoA length should be n*(13) + 2 */
@@ -1518,13 +1516,8 @@ void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength)
 
 	if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_NONE))
 		return;
-	if (IELength <= _BEACON_IE_OFFSET_)
-		return;
 
-	ies = IEs + _BEACON_IE_OFFSET_;
-	ies_len = IELength - _BEACON_IE_OFFSET_;
-
-	p2p_ie = rtw_get_p2p_ie(ies, ies_len, NULL, &p2p_ielen);
+	p2p_ie = rtw_get_p2p_ie(IEs, IELength, NULL, &p2p_ielen);
 
 	while (p2p_ie) {
 		find_p2p = true;
@@ -1579,7 +1572,7 @@ void process_p2p_ps_ie(struct adapter *padapter, u8 *IEs, u32 IELength)
 		}
 
 		/* Get the next P2P IE */
-		p2p_ie = rtw_get_p2p_ie(p2p_ie + p2p_ielen, ies_len - (p2p_ie - ies + p2p_ielen), NULL, &p2p_ielen);
+		p2p_ie = rtw_get_p2p_ie(p2p_ie + p2p_ielen, IELength - (p2p_ie - IEs + p2p_ielen), NULL, &p2p_ielen);
 	}
 
 	if (find_p2p) {
-- 
GitLab


From 018da8b60688caec763d75cc6fb022a165028cea Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:24 +0100
Subject: [PATCH 3373/4122] staging: r8188eu: use ie buffer in
 update_beacon_info

The update_beacon_info function parses information elements of a beacon
message. It should take the pointer to the information elements and their
total length, not the entire beacon message.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c    | 4 ++--
 drivers/staging/r8188eu/core/rtw_wlan_util.c   | 9 +++------
 drivers/staging/r8188eu/include/rtw_mlme_ext.h | 3 +--
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 07c57a2b61b9f..38dc98cffbc43 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -604,7 +604,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 				/* update WMM, ERP in the beacon */
 				/* todo: the timer is used instead of the number of the beacon received */
 				if ((sta_rx_pkts(psta) & 0xf) == 0)
-					update_beacon_info(padapter, pframe, len, psta);
+					update_beacon_info(padapter, ie_ptr, ie_len, psta);
 				process_p2p_ps_ie(padapter, ie_ptr, ie_len);
 			}
 		} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
@@ -613,7 +613,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 				/* update WMM, ERP in the beacon */
 				/* todo: the timer is used instead of the number of the beacon received */
 				if ((sta_rx_pkts(psta) & 0xf) == 0)
-					update_beacon_info(padapter, pframe, len, psta);
+					update_beacon_info(padapter, ie_ptr, ie_len, psta);
 			} else {
 				/* allocate a new CAM entry for IBSS station */
 				cam_idx = allocate_fw_sta_entry(padapter);
diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c
index 965bb7da4cce3..da3465d6bb0f8 100644
--- a/drivers/staging/r8188eu/core/rtw_wlan_util.c
+++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c
@@ -1035,16 +1035,13 @@ _mismatch:
 	return _FAIL;
 }
 
-void update_beacon_info(struct adapter *padapter, u8 *pframe, uint pkt_len, struct sta_info *psta)
+void update_beacon_info(struct adapter *padapter, u8 *ie_ptr, uint ie_len, struct sta_info *psta)
 {
 	unsigned int i;
-	unsigned int len;
 	struct ndis_802_11_var_ie *pIE;
 
-	len = pkt_len - (_BEACON_IE_OFFSET_ + WLAN_HDR_A3_LEN);
-
-	for (i = 0; i < len;) {
-		pIE = (struct ndis_802_11_var_ie *)(pframe + (_BEACON_IE_OFFSET_ + WLAN_HDR_A3_LEN) + i);
+	for (i = 0; i < ie_len;) {
+		pIE = (struct ndis_802_11_var_ie *)(ie_ptr + i);
 
 		switch (pIE->ElementID) {
 		case _HT_EXTRA_INFO_IE_:	/* HT info */
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 089bd54467733..6724424a334e2 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -447,8 +447,7 @@ void HTOnAssocRsp(struct adapter *padapter);
 void ERP_IE_handler(struct adapter *padapter, struct ndis_802_11_var_ie *pIE);
 void VCS_update(struct adapter *padapter, struct sta_info *psta);
 
-void update_beacon_info(struct adapter *padapter, u8 *pframe, uint len,
-			struct sta_info *psta);
+void update_beacon_info(struct adapter *padapter, u8 *ie_ptr, uint ie_len, struct sta_info *psta);
 int rtw_check_bcn_info(struct adapter  *Adapter, u8 *pframe, u32 packet_len);
 void update_IOT_info(struct adapter *padapter);
 void update_capinfo(struct adapter *adapter, u16 updatecap);
-- 
GitLab


From 02b51d08ec3a0f476c8b1537faa46e35e87e5ce0 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:25 +0100
Subject: [PATCH 3374/4122] staging: r8188eu: simplify update_sta_support_rate
 params

The update_sta_support_rate function takes a pointer to information
elements and the total length of all these elements. Use our variables
for pointer and length instead of calculating them manually.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-7-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 38dc98cffbc43..f7d3ecf551bfe 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -621,7 +621,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 					return;
 
 				/* get supported rate */
-				if (update_sta_support_rate(padapter, (pframe + WLAN_HDR_A3_LEN + _BEACON_IE_OFFSET_), (len - WLAN_HDR_A3_LEN - _BEACON_IE_OFFSET_), cam_idx) == _FAIL) {
+				if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) {
 					pmlmeinfo->FW_sta_info[cam_idx].status = 0;
 					return;
 				}
-- 
GitLab


From 32634359a9c3d70cbf01af0ae87b27d8c0e1ebe3 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:26 +0100
Subject: [PATCH 3375/4122] staging: r8188eu: exit if beacon is not from our
 bss

Do not process an incoming beacon message in the OnBeacon function if the
beacon was sent by a base station other than the one to which we're
connected.

This patch does not modify the behaviour of the code. It reverts the if
condition and returns if the beacon should not be processed. This is
simpler than wrapping the entire processing into a large if clause.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-8-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 103 ++++++++++----------
 1 file changed, 52 insertions(+), 51 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index f7d3ecf551bfe..a15998d912a73 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -569,67 +569,68 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 		return;
 	}
 
-	if (!memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN)) {
-		if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) {
-			/* we should update current network before auth, or some IE is wrong */
-			pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC);
-			if (pbss) {
-				if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) {
-					update_network(&pmlmepriv->cur_network.network, pbss, padapter, true);
-					rtw_get_bcn_info(&pmlmepriv->cur_network);
-				}
-				kfree(pbss);
+	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
+		return;
+
+	if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) {
+		/* we should update current network before auth, or some IE is wrong */
+		pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC);
+		if (pbss) {
+			if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) {
+				update_network(&pmlmepriv->cur_network.network, pbss, padapter, true);
+				rtw_get_bcn_info(&pmlmepriv->cur_network);
 			}
+			kfree(pbss);
+		}
 
-			/* check the vendor of the assoc AP */
-			pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr));
+		/* check the vendor of the assoc AP */
+		pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr));
 
-			pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
+		pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
 
-			/* start auth */
-			start_clnt_auth(padapter);
+		/* start auth */
+		start_clnt_auth(padapter);
 
-			return;
-		}
+		return;
+	}
 
-		if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) {
-			psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-			if (psta) {
-				ret = rtw_check_bcn_info(padapter, pframe, len);
-				if (!ret) {
-					receive_disconnect(padapter,
-							   pmlmeinfo->network.MacAddress, 0);
-					return;
-				}
-				/* update WMM, ERP in the beacon */
-				/* todo: the timer is used instead of the number of the beacon received */
-				if ((sta_rx_pkts(psta) & 0xf) == 0)
-					update_beacon_info(padapter, ie_ptr, ie_len, psta);
-				process_p2p_ps_ie(padapter, ie_ptr, ie_len);
+	if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) {
+		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
+		if (psta) {
+			ret = rtw_check_bcn_info(padapter, pframe, len);
+			if (!ret) {
+				receive_disconnect(padapter,
+						   pmlmeinfo->network.MacAddress, 0);
+				return;
 			}
-		} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
-			psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-			if (psta) {
-				/* update WMM, ERP in the beacon */
-				/* todo: the timer is used instead of the number of the beacon received */
-				if ((sta_rx_pkts(psta) & 0xf) == 0)
-					update_beacon_info(padapter, ie_ptr, ie_len, psta);
-			} else {
-				/* allocate a new CAM entry for IBSS station */
-				cam_idx = allocate_fw_sta_entry(padapter);
-				if (cam_idx == NUM_STA)
-					return;
+			/* update WMM, ERP in the beacon */
+			/* todo: the timer is used instead of the number of the beacon received */
+			if ((sta_rx_pkts(psta) & 0xf) == 0)
+				update_beacon_info(padapter, ie_ptr, ie_len, psta);
+			process_p2p_ps_ie(padapter, ie_ptr, ie_len);
+		}
+	} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
+		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
+		if (psta) {
+			/* update WMM, ERP in the beacon */
+			/* todo: the timer is used instead of the number of the beacon received */
+			if ((sta_rx_pkts(psta) & 0xf) == 0)
+				update_beacon_info(padapter, ie_ptr, ie_len, psta);
+		} else {
+			/* allocate a new CAM entry for IBSS station */
+			cam_idx = allocate_fw_sta_entry(padapter);
+			if (cam_idx == NUM_STA)
+				return;
 
-				/* get supported rate */
-				if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) {
-					pmlmeinfo->FW_sta_info[cam_idx].status = 0;
-					return;
-				}
+			/* get supported rate */
+			if (update_sta_support_rate(padapter, ie_ptr, ie_len, cam_idx) == _FAIL) {
+				pmlmeinfo->FW_sta_info[cam_idx].status = 0;
+				return;
+			}
 
-				pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
+			pmlmeext->TSFValue = le64_to_cpu(mgmt->u.beacon.timestamp);
 
-				report_add_sta_event(padapter, mgmt->sa, cam_idx);
-			}
+			report_add_sta_event(padapter, mgmt->sa, cam_idx);
 		}
 	}
 }
-- 
GitLab


From fa205589d5e9fc2d1b2f8d31f665152da04160bc Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:27 +0100
Subject: [PATCH 3376/4122] staging: r8188eu: stop beacon processing if kmalloc
 fails

If we cannot allocate a struct wlan_bssid_ex in the OnBeacon function, we
should stop processing the incoming beacon message and return.

For kmalloc failures, the current code just skips the update of network
and beacon info and tries to continue with the authentication. The update
would set the encryption algorithm that should be used for the
authentication.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-9-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index a15998d912a73..76424bcba416d 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -575,13 +575,14 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 	if (pmlmeinfo->state & WIFI_FW_AUTH_NULL) {
 		/* we should update current network before auth, or some IE is wrong */
 		pbss = kmalloc(sizeof(struct wlan_bssid_ex), GFP_ATOMIC);
-		if (pbss) {
-			if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) {
-				update_network(&pmlmepriv->cur_network.network, pbss, padapter, true);
-				rtw_get_bcn_info(&pmlmepriv->cur_network);
-			}
-			kfree(pbss);
+		if (!pbss)
+			return;
+
+		if (collect_bss_info(padapter, precv_frame, pbss) == _SUCCESS) {
+			update_network(&pmlmepriv->cur_network.network, pbss, padapter, true);
+			rtw_get_bcn_info(&pmlmepriv->cur_network);
 		}
+		kfree(pbss);
 
 		/* check the vendor of the assoc AP */
 		pmlmeinfo->assoc_AP_vendor = check_assoc_AP(pframe + sizeof(struct ieee80211_hdr_3addr), len - sizeof(struct ieee80211_hdr_3addr));
-- 
GitLab


From dd99fe1f5a8319f0e236c0548cdba849451779d5 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:28 +0100
Subject: [PATCH 3377/4122] staging: r8188eu: simplify error handling for
 missing station

Simplify the code to handle the case where we're associated to a station
that is not in our list of known stations.

We can simply exit in this case. This patch reverts the if-condition and
saves one level of indentation.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-10-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 24 ++++++++++-----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 76424bcba416d..362313c49c526 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -597,19 +597,19 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 
 	if (((pmlmeinfo->state & 0x03) == WIFI_FW_STATION_STATE) && (pmlmeinfo->state & WIFI_FW_ASSOC_SUCCESS)) {
 		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-		if (psta) {
-			ret = rtw_check_bcn_info(padapter, pframe, len);
-			if (!ret) {
-				receive_disconnect(padapter,
-						   pmlmeinfo->network.MacAddress, 0);
-				return;
-			}
-			/* update WMM, ERP in the beacon */
-			/* todo: the timer is used instead of the number of the beacon received */
-			if ((sta_rx_pkts(psta) & 0xf) == 0)
-				update_beacon_info(padapter, ie_ptr, ie_len, psta);
-			process_p2p_ps_ie(padapter, ie_ptr, ie_len);
+		if (!psta)
+			return;
+
+		ret = rtw_check_bcn_info(padapter, pframe, len);
+		if (!ret) {
+			receive_disconnect(padapter, pmlmeinfo->network.MacAddress, 0);
+			return;
 		}
+		/* update WMM, ERP in the beacon */
+		/* todo: the timer is used instead of the number of the beacon received */
+		if ((sta_rx_pkts(psta) & 0xf) == 0)
+			update_beacon_info(padapter, ie_ptr, ie_len, psta);
+		process_p2p_ps_ie(padapter, ie_ptr, ie_len);
 	} else if ((pmlmeinfo->state & 0x03) == WIFI_FW_ADHOC_STATE) {
 		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 		if (psta) {
-- 
GitLab


From 17c1202a9e689dc875c864c1f1e8dded70c0a8a0 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Sat, 26 Nov 2022 17:01:29 +0100
Subject: [PATCH 3378/4122] staging: r8188eu: remove a variable

Check the result of rtw_check_bcn_info directly and remove the ret
variable.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221126160129.178697-11-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 362313c49c526..d32b2d569e237 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -555,7 +555,6 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 	u8 *pframe = precv_frame->rx_data;
 	uint len = precv_frame->len;
 	struct wlan_bssid_ex *pbss;
-	int ret = _SUCCESS;
 	u8 *ie_ptr;
 	u32 ie_len;
 
@@ -600,8 +599,7 @@ static void OnBeacon(struct adapter *padapter, struct recv_frame *precv_frame)
 		if (!psta)
 			return;
 
-		ret = rtw_check_bcn_info(padapter, pframe, len);
-		if (!ret) {
+		if (rtw_check_bcn_info(padapter, pframe, len) != _SUCCESS) {
 			receive_disconnect(padapter, pmlmeinfo->network.MacAddress, 0);
 			return;
 		}
-- 
GitLab


From 49bd97c28b7e7f014a72821fd95fcf11e11599a4 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Sat, 19 Nov 2022 01:34:46 +0000
Subject: [PATCH 3379/4122] perf tools: Use dedicated non-atomic clear/set bit
 helpers

Use the dedicated non-atomic helpers for {clear,set}_bit() and their
test variants, i.e. the double-underscore versions.  Depsite being
defined in atomic.h, and despite the kernel versions being atomic in the
kernel, tools' {clear,set}_bit() helpers aren't actually atomic.  Move
to the double-underscore versions so that the versions that are expected
to be atomic (for kernel developers) can be made atomic without
affecting users that don't want atomic operations.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: James Morse <james.morse@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Oliver Upton <oliver.upton@linux.dev>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Yury Norov <yury.norov@gmail.com>
Cc: alexandru elisei <alexandru.elisei@arm.com>
Cc: kvm@vger.kernel.org
Cc: kvmarm@lists.cs.columbia.edu
Cc: kvmarm@lists.linux.dev
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lore.kernel.org/lkml/20221119013450.2643007-6-seanjc@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/find-bit-bench.c                      | 2 +-
 tools/perf/builtin-c2c.c                               | 6 +++---
 tools/perf/builtin-kwork.c                             | 6 +++---
 tools/perf/builtin-record.c                            | 6 +++---
 tools/perf/builtin-sched.c                             | 2 +-
 tools/perf/tests/bitmap.c                              | 2 +-
 tools/perf/tests/mem2node.c                            | 2 +-
 tools/perf/util/affinity.c                             | 4 ++--
 tools/perf/util/header.c                               | 8 ++++----
 tools/perf/util/mmap.c                                 | 6 +++---
 tools/perf/util/pmu.c                                  | 2 +-
 tools/perf/util/scripting-engines/trace-event-perl.c   | 2 +-
 tools/perf/util/scripting-engines/trace-event-python.c | 2 +-
 tools/perf/util/session.c                              | 2 +-
 tools/perf/util/svghelper.c                            | 2 +-
 15 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/tools/perf/bench/find-bit-bench.c b/tools/perf/bench/find-bit-bench.c
index 22b5cfe970237..d103c3136983d 100644
--- a/tools/perf/bench/find-bit-bench.c
+++ b/tools/perf/bench/find-bit-bench.c
@@ -70,7 +70,7 @@ static int do_for_each_set_bit(unsigned int num_bits)
 		bitmap_zero(to_test, num_bits);
 		skip = num_bits / set_bits;
 		for (i = 0; i < num_bits; i += skip)
-			set_bit(i, to_test);
+			__set_bit(i, to_test);
 
 		for (i = 0; i < outer_iterations; i++) {
 			old = accumulator;
diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c
index a9190458d2d50..52d94c7dd8366 100644
--- a/tools/perf/builtin-c2c.c
+++ b/tools/perf/builtin-c2c.c
@@ -230,7 +230,7 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he,
 		      "WARNING: no sample cpu value"))
 		return;
 
-	set_bit(sample->cpu, c2c_he->cpuset);
+	__set_bit(sample->cpu, c2c_he->cpuset);
 }
 
 static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
@@ -247,7 +247,7 @@ static void c2c_he__set_node(struct c2c_hist_entry *c2c_he,
 	if (WARN_ONCE(node < 0, "WARNING: failed to find node\n"))
 		return;
 
-	set_bit(node, c2c_he->nodeset);
+	__set_bit(node, c2c_he->nodeset);
 
 	if (c2c_he->paddr != sample->phys_addr) {
 		c2c_he->paddr_cnt++;
@@ -2318,7 +2318,7 @@ static int setup_nodes(struct perf_session *session)
 			continue;
 
 		perf_cpu_map__for_each_cpu(cpu, idx, map) {
-			set_bit(cpu.cpu, set);
+			__set_bit(cpu.cpu, set);
 
 			if (WARN_ONCE(cpu2node[cpu.cpu] != -1, "node/cpu topology bug"))
 				return -EINVAL;
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index 4ffbf5908070d..0e02b80986440 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -222,7 +222,7 @@ static struct kwork_atom *atom_new(struct perf_kwork *kwork,
 	list_add_tail(&page->list, &kwork->atom_page_list);
 
 found_atom:
-	set_bit(i, page->bitmap);
+	__set_bit(i, page->bitmap);
 	atom->time = sample->time;
 	atom->prev = NULL;
 	atom->page_addr = page;
@@ -235,8 +235,8 @@ static void atom_free(struct kwork_atom *atom)
 	if (atom->prev != NULL)
 		atom_free(atom->prev);
 
-	clear_bit(atom->bit_inpage,
-		  ((struct kwork_atom_page *)atom->page_addr)->bitmap);
+	__clear_bit(atom->bit_inpage,
+		    ((struct kwork_atom_page *)atom->page_addr)->bitmap);
 }
 
 static void atom_del(struct kwork_atom *atom)
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index bd462a3f2bbdb..b7fd7ec586fbc 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3555,7 +3555,7 @@ static int record__mmap_cpu_mask_init(struct mmap_cpu_mask *mask, struct perf_cp
 		/* Return ENODEV is input cpu is greater than max cpu */
 		if ((unsigned long)cpu.cpu > mask->nbits)
 			return -ENODEV;
-		set_bit(cpu.cpu, mask->bits);
+		__set_bit(cpu.cpu, mask->bits);
 	}
 
 	return 0;
@@ -3627,8 +3627,8 @@ static int record__init_thread_cpu_masks(struct record *rec, struct perf_cpu_map
 	pr_debug("nr_threads: %d\n", rec->nr_threads);
 
 	for (t = 0; t < rec->nr_threads; t++) {
-		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
-		set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
+		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].maps.bits);
+		__set_bit(perf_cpu_map__cpu(cpus, t).cpu, rec->thread_masks[t].affinity.bits);
 		if (verbose) {
 			pr_debug("thread_masks[%d]: ", t);
 			mmap_cpu_mask__scnprintf(&rec->thread_masks[t].maps, "maps");
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index f93737eef07ba..86e18575c9bee 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -1573,7 +1573,7 @@ static int map_switch_event(struct perf_sched *sched, struct evsel *evsel,
 
 	if (sched->map.comp) {
 		cpus_nr = bitmap_weight(sched->map.comp_cpus_mask, MAX_CPUS);
-		if (!test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
+		if (!__test_and_set_bit(this_cpu.cpu, sched->map.comp_cpus_mask)) {
 			sched->map.comp_cpus[cpus_nr++] = this_cpu;
 			new_cpu = true;
 		}
diff --git a/tools/perf/tests/bitmap.c b/tools/perf/tests/bitmap.c
index 4965dd6669566..0173f5402a35b 100644
--- a/tools/perf/tests/bitmap.c
+++ b/tools/perf/tests/bitmap.c
@@ -18,7 +18,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 
 	if (map && bm) {
 		for (i = 0; i < perf_cpu_map__nr(map); i++)
-			set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
+			__set_bit(perf_cpu_map__cpu(map, i).cpu, bm);
 	}
 
 	if (map)
diff --git a/tools/perf/tests/mem2node.c b/tools/perf/tests/mem2node.c
index 4c96829510c91..a0e88c4961074 100644
--- a/tools/perf/tests/mem2node.c
+++ b/tools/perf/tests/mem2node.c
@@ -33,7 +33,7 @@ static unsigned long *get_bitmap(const char *str, int nbits)
 		int i;
 
 		perf_cpu_map__for_each_cpu(cpu, i, map)
-			set_bit(cpu.cpu, bm);
+			__set_bit(cpu.cpu, bm);
 	}
 
 	if (map)
diff --git a/tools/perf/util/affinity.c b/tools/perf/util/affinity.c
index 4ee96b3c755b7..38dc4524b7e86 100644
--- a/tools/perf/util/affinity.c
+++ b/tools/perf/util/affinity.c
@@ -58,14 +58,14 @@ void affinity__set(struct affinity *a, int cpu)
 		return;
 
 	a->changed = true;
-	set_bit(cpu, a->sched_cpus);
+	__set_bit(cpu, a->sched_cpus);
 	/*
 	 * We ignore errors because affinity is just an optimization.
 	 * This could happen for example with isolated CPUs or cpusets.
 	 * In this case the IPIs inside the kernel's perf API still work.
 	 */
 	sched_setaffinity(0, cpu_set_size, (cpu_set_t *)a->sched_cpus);
-	clear_bit(cpu, a->sched_cpus);
+	__clear_bit(cpu, a->sched_cpus);
 }
 
 static void __affinity__cleanup(struct affinity *a)
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 98dfaf84bd137..dc2ae397d400e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -79,12 +79,12 @@ struct perf_file_attr {
 
 void perf_header__set_feat(struct perf_header *header, int feat)
 {
-	set_bit(feat, header->adds_features);
+	__set_bit(feat, header->adds_features);
 }
 
 void perf_header__clear_feat(struct perf_header *header, int feat)
 {
-	clear_bit(feat, header->adds_features);
+	__clear_bit(feat, header->adds_features);
 }
 
 bool perf_header__has_feat(const struct perf_header *header, int feat)
@@ -1358,7 +1358,7 @@ static int memory_node__read(struct memory_node *n, unsigned long idx)
 	rewinddir(dir);
 
 	for_each_memory(phys, dir) {
-		set_bit(phys, n->set);
+		__set_bit(phys, n->set);
 	}
 
 	closedir(dir);
@@ -3952,7 +3952,7 @@ int perf_file_header__read(struct perf_file_header *header,
 
 		if (!test_bit(HEADER_HOSTNAME, header->adds_features)) {
 			bitmap_zero(header->adds_features, HEADER_FEAT_BITS);
-			set_bit(HEADER_BUILD_ID, header->adds_features);
+			__set_bit(HEADER_BUILD_ID, header->adds_features);
 		}
 	}
 
diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c
index a4dff881be39b..49093b21ee2da 100644
--- a/tools/perf/util/mmap.c
+++ b/tools/perf/util/mmap.c
@@ -111,7 +111,7 @@ static int perf_mmap__aio_bind(struct mmap *map, int idx, struct perf_cpu cpu, i
 			pr_err("Failed to allocate node mask for mbind: error %m\n");
 			return -1;
 		}
-		set_bit(node_index, node_mask);
+		__set_bit(node_index, node_mask);
 		if (mbind(data, mmap_len, MPOL_BIND, node_mask, node_index + 1 + 1, 0)) {
 			pr_err("Failed to bind [%p-%p] AIO buffer to node %lu: error %m\n",
 				data, data + mmap_len, node_index);
@@ -256,7 +256,7 @@ static void build_node_mask(int node, struct mmap_cpu_mask *mask)
 	for (idx = 0; idx < nr_cpus; idx++) {
 		cpu = perf_cpu_map__cpu(cpu_map, idx); /* map c index to online cpu index */
 		if (cpu__get_node(cpu) == node)
-			set_bit(cpu.cpu, mask->bits);
+			__set_bit(cpu.cpu, mask->bits);
 	}
 }
 
@@ -270,7 +270,7 @@ static int perf_mmap__setup_affinity_mask(struct mmap *map, struct mmap_params *
 	if (mp->affinity == PERF_AFFINITY_NODE && cpu__max_node() > 1)
 		build_node_mask(cpu__get_node(map->core.cpu), &map->affinity_mask);
 	else if (mp->affinity == PERF_AFFINITY_CPU)
-		set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
+		__set_bit(map->core.cpu.cpu, map->affinity_mask.bits);
 
 	return 0;
 }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index e9a4f31926bf4..8ff6462f051e1 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1533,7 +1533,7 @@ void perf_pmu__set_format(unsigned long *bits, long from, long to)
 
 	memset(bits, 0, BITS_TO_BYTES(PERF_PMU_FORMAT_BITS));
 	for (b = from; b <= to; b++)
-		set_bit(b, bits);
+		__set_bit(b, bits);
 }
 
 void perf_pmu__del_formats(struct list_head *formats)
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index a5d945415bbc1..5b602b6d46854 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -365,7 +365,7 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 
 	sprintf(handler, "%s::%s", event->system, event->name);
 
-	if (!test_and_set_bit(event->id, events_defined))
+	if (!__test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler, event->print_fmt.args);
 
 	s = nsecs / NSEC_PER_SEC;
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index 1cf65db8f861d..d685a7399ee2e 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -934,7 +934,7 @@ static void python_process_tracepoint(struct perf_sample *sample,
 
 	sprintf(handler_name, "%s__%s", event->system, event->name);
 
-	if (!test_and_set_bit(event->id, events_defined))
+	if (!__test_and_set_bit(event->id, events_defined))
 		define_event_symbols(event, handler_name, event->print_fmt.args);
 
 	handler = get_handler(handler_name);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0e1a3d6bacb92..1facd4616317d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -2749,7 +2749,7 @@ int perf_session__cpu_bitmap(struct perf_session *session,
 			goto out_delete_map;
 		}
 
-		set_bit(cpu.cpu, cpu_bitmap);
+		__set_bit(cpu.cpu, cpu_bitmap);
 	}
 
 	err = 0;
diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c
index 1e0c731fc5396..5c62d3118c41f 100644
--- a/tools/perf/util/svghelper.c
+++ b/tools/perf/util/svghelper.c
@@ -741,7 +741,7 @@ static int str_to_bitmap(char *s, cpumask_t *b, int nr_cpus)
 			break;
 		}
 
-		set_bit(c.cpu, cpumask_bits(b));
+		__set_bit(c.cpu, cpumask_bits(b));
 	}
 
 	perf_cpu_map__put(m);
-- 
GitLab


From 4de531ce41e5aab9d3c5884e8360faaec82fb62e Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 29 Nov 2022 21:51:48 +0100
Subject: [PATCH 3380/4122] staging: r8188eu: use ieee80211_mgmt to parse
 addresses

Define a struct ieee80211_mgmt in the OnDisassoc function and use it to
parse the addresses in the incoming disassoc message. This replaces some
driver-specific functions for address parsing.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221129205152.128172-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index d32b2d569e237..b2b2cb57ed044 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1427,6 +1427,7 @@ static void OnDeAuth(struct adapter *padapter, struct recv_frame *precv_frame)
 
 static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	u16 reason;
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
@@ -1434,8 +1435,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	u8 *pframe = precv_frame->rx_data;
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 
-	/* check A3 */
-	if (!(!memcmp(GetAddr3Ptr(pframe), get_my_bssid(&pmlmeinfo->network), ETH_ALEN)))
+	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
 		return;
 
 	if (pwdinfo->rx_invitereq_info.scan_op_ch_only) {
@@ -1449,7 +1449,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 		struct sta_info *psta;
 		struct sta_priv *pstapriv = &padapter->stapriv;
 
-		psta = rtw_get_stainfo(pstapriv, GetAddr2Ptr(pframe));
+		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
 		if (psta) {
 			u8 updated = 0;
 
@@ -1466,7 +1466,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 
 		return;
 	} else {
-		receive_disconnect(padapter, GetAddr3Ptr(pframe), reason);
+		receive_disconnect(padapter, mgmt->bssid, reason);
 	}
 	pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 }
-- 
GitLab


From e70cac829d63f24aac0ea858a5407d25cf55b85a Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 29 Nov 2022 21:51:49 +0100
Subject: [PATCH 3381/4122] staging: r8188eu: read reason code from
 ieee80211_mgmt

Use the struct ieee80211_mgmt to read the reason code instead of parsing
the message manually.

Remove the pframe pointer, it's no longer used.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221129205152.128172-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index b2b2cb57ed044..fe58d4e3e2604 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1432,7 +1432,6 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	struct mlme_priv *pmlmepriv = &padapter->mlmepriv;
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
-	u8 *pframe = precv_frame->rx_data;
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 
 	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
@@ -1443,7 +1442,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 		_set_timer(&pwdinfo->reset_ch_sitesurvey, 10);
 	}
 
-	reason = le16_to_cpu(*(__le16 *)(pframe + WLAN_HDR_A3_LEN));
+	reason = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
 	if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
 		struct sta_info *psta;
-- 
GitLab


From 8f7f05934ea24676c5c259a66bd71fa35ee22ef3 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 29 Nov 2022 21:51:50 +0100
Subject: [PATCH 3382/4122] staging: r8188eu: move bBusyTraffic update

Move the update of bBusyTraffic into the else branch. The if branch ends
with a return statement, so bBusyTraffic will not be updated in this case.

With this change in place, we can reorder the code and save some levels of
indentation.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221129205152.128172-4-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index fe58d4e3e2604..dd1e0b4fc5a09 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1466,8 +1466,8 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 		return;
 	} else {
 		receive_disconnect(padapter, mgmt->bssid, reason);
+		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 	}
-	pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
 }
 
 static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame)
-- 
GitLab


From 17580de3e2b876247f2201b6486559ef1e6b7f58 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 29 Nov 2022 21:51:51 +0100
Subject: [PATCH 3383/4122] staging: r8188eu: handle the non-ap case first

The OnDisassoc function hasn't got much to do if we're not working as an
access point. Move this case out of the large if statement to simplify the
code.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221129205152.128172-5-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 36 ++++++++++-----------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index dd1e0b4fc5a09..d5c9eb52fb0e4 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1433,6 +1433,8 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	struct mlme_ext_priv	*pmlmeext = &padapter->mlmeextpriv;
 	struct mlme_ext_info	*pmlmeinfo = &pmlmeext->mlmext_info;
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
+	struct sta_info *psta;
+	struct sta_priv *pstapriv = &padapter->stapriv;
 
 	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
 		return;
@@ -1444,29 +1446,25 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 
 	reason = le16_to_cpu(mgmt->u.disassoc.reason_code);
 
-	if (check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
-		struct sta_info *psta;
-		struct sta_priv *pstapriv = &padapter->stapriv;
-
-		psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-		if (psta) {
-			u8 updated = 0;
+	if (!check_fwstate(pmlmepriv, WIFI_AP_STATE)) {
+		receive_disconnect(padapter, mgmt->bssid, reason);
+		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
+		return;
+	}
 
-			spin_lock_bh(&pstapriv->asoc_list_lock);
-			if (!list_empty(&psta->asoc_list)) {
-				list_del_init(&psta->asoc_list);
-				pstapriv->asoc_list_cnt--;
-				updated = ap_free_sta(padapter, psta, false, reason);
-			}
-			spin_unlock_bh(&pstapriv->asoc_list_lock);
+	psta = rtw_get_stainfo(pstapriv, mgmt->sa);
+	if (psta) {
+		u8 updated = 0;
 
-			associated_clients_update(padapter, updated);
+		spin_lock_bh(&pstapriv->asoc_list_lock);
+		if (!list_empty(&psta->asoc_list)) {
+			list_del_init(&psta->asoc_list);
+			pstapriv->asoc_list_cnt--;
+			updated = ap_free_sta(padapter, psta, false, reason);
 		}
+		spin_unlock_bh(&pstapriv->asoc_list_lock);
 
-		return;
-	} else {
-		receive_disconnect(padapter, mgmt->bssid, reason);
-		pmlmepriv->LinkDetectInfo.bBusyTraffic = false;
+		associated_clients_update(padapter, updated);
 	}
 }
 
-- 
GitLab


From 30b28fd639abf51464f015f6bebd7506bc895be2 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Tue, 29 Nov 2022 21:51:52 +0100
Subject: [PATCH 3384/4122] staging: r8188eu: simplify err handling for unknown
 station

If we receive a disassoc message from an unknown station, we can drop this
message immediately. Reorder the code to make this clearer.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221129205152.128172-6-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 22 ++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index d5c9eb52fb0e4..49e0b50b1243e 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -1435,6 +1435,7 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	struct wifidirect_info *pwdinfo = &padapter->wdinfo;
 	struct sta_info *psta;
 	struct sta_priv *pstapriv = &padapter->stapriv;
+	u8 updated = 0;
 
 	if (memcmp(mgmt->bssid, get_my_bssid(&pmlmeinfo->network), ETH_ALEN))
 		return;
@@ -1453,19 +1454,18 @@ static void OnDisassoc(struct adapter *padapter, struct recv_frame *precv_frame)
 	}
 
 	psta = rtw_get_stainfo(pstapriv, mgmt->sa);
-	if (psta) {
-		u8 updated = 0;
-
-		spin_lock_bh(&pstapriv->asoc_list_lock);
-		if (!list_empty(&psta->asoc_list)) {
-			list_del_init(&psta->asoc_list);
-			pstapriv->asoc_list_cnt--;
-			updated = ap_free_sta(padapter, psta, false, reason);
-		}
-		spin_unlock_bh(&pstapriv->asoc_list_lock);
+	if (!psta)
+		return;
 
-		associated_clients_update(padapter, updated);
+	spin_lock_bh(&pstapriv->asoc_list_lock);
+	if (!list_empty(&psta->asoc_list)) {
+		list_del_init(&psta->asoc_list);
+		pstapriv->asoc_list_cnt--;
+		updated = ap_free_sta(padapter, psta, false, reason);
 	}
+	spin_unlock_bh(&pstapriv->asoc_list_lock);
+
+	associated_clients_update(padapter, updated);
 }
 
 static void OnAction_back(struct adapter *padapter, struct recv_frame *precv_frame)
-- 
GitLab


From 20ed9fa4965875fdde5bfd65d838465e38d46b22 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Wed, 30 Nov 2022 16:51:58 +0000
Subject: [PATCH 3385/4122] perf branch: Fix interpretation of branch records

Commit 93315e46b000fc80 ("perf/core: Add speculation info to branch
entries") added a new field in between type and new_type. Perf has its
own copy of this struct so update it to match the kernel side.

This doesn't currently cause any issues because new_type is only used by
the Arm BRBE driver which isn't merged yet.

Committer notes:

Is this really an ABI? How are we supposed to deal with old perf.data
files with new tools and vice versa? :-\

Fixes: 93315e46b000fc80 ("perf/core: Add speculation info to branch entries")
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Link: https://lore.kernel.org/r/20221130165158.517385-1-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/branch.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h
index d6017c9b1872d..3ed792db11255 100644
--- a/tools/perf/util/branch.h
+++ b/tools/perf/util/branch.h
@@ -22,9 +22,10 @@ struct branch_flags {
 			u64 abort:1;
 			u64 cycles:16;
 			u64 type:4;
+			u64 spec:2;
 			u64 new_type:4;
 			u64 priv:3;
-			u64 reserved:33;
+			u64 reserved:31;
 		};
 	};
 };
-- 
GitLab


From 7e8e5e879729b238cd17dcc292d86c1e6cc83ecc Mon Sep 17 00:00:00 2001
From: Hans-Peter Nilsson <hp@axis.com>
Date: Mon, 28 Dec 2020 03:39:41 +0100
Subject: [PATCH 3386/4122] perf arm64: Fix mksyscalltbl, don't lose syscalls
 due to sort -nu

When using "sort -nu", arm64 syscalls were lost.  That is, the io_setup
syscall (number 0) and all but one (typically ftruncate; 64) of the
syscalls that are defined symbolically (like "#define __NR_ftruncate
__NR3264_ftruncate") at the point where "sort" is applied.

This creation-of-syscalls.c-scheme is, judging from comments,
copy-pasted from powerpc, and worked there because at the time, its
tools/arch/powerpc/include/uapi/asm/unistd.h had *literals*, like
"#define __NR_ftruncate 93".

With sort being numeric and the non-numeric key effectively evaluating
to 0, the sort option "-u" means these "duplicates" are removed.
There's no need to remove syscall lines with duplicate numbers for arm64
because there are none, so let's fix that by just losing the "-u".
Having the table numerically sorted on syscall-number for the rest of
the syscalls looks nice, so keep the "-n".

Reviewed-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: Hans-Peter Nilsson <hp@axis.com>
Tested-by: Leo Yan <leo.yan@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.garry@huawei.com>
Cc: Kim Phillips <kim.phillips@arm.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20201228023941.E0DE2203B5@pchp3.se.axis.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/entry/syscalls/mksyscalltbl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
index 459469b7222c1..a7ca48d1e37bc 100755
--- a/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
+++ b/tools/perf/arch/arm64/entry/syscalls/mksyscalltbl
@@ -58,5 +58,5 @@ create_table()
 
 $gcc -E -dM -x c -I $incpath/include/uapi $input \
 	|sed -ne 's/^#define __NR_//p' \
-	|sort -t' ' -k2 -nu	       \
+	|sort -t' ' -k2 -n	       \
 	|create_table
-- 
GitLab


From 3f48ad1f243d07d6018a4af2762b6538febde689 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 2 Dec 2022 10:25:24 +0100
Subject: [PATCH 3387/4122] staging: r8188eu: merge two probereq_p2p functions

_issue_probereq_p2p is called only by issue_probereq_p2p. Merge the two
functions and remove the unused return value.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221202092525.403887-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 49e0b50b1243e..cf7b39cfb8f4c 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3183,9 +3183,8 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da)
 	dump_mgntframe(padapter, pmgntframe);
 }
 
-static int _issue_probereq_p2p(struct adapter *padapter, u8 *da)
+inline void issue_probereq_p2p(struct adapter *padapter, u8 *da)
 {
-	int ret = _FAIL;
 	struct xmit_frame		*pmgntframe;
 	struct pkt_attrib		*pattrib;
 	unsigned char			*pframe;
@@ -3201,7 +3200,7 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da)
 
 	pmgntframe = alloc_mgtxmitframe(pxmitpriv);
 	if (!pmgntframe)
-		goto exit;
+		return;
 
 	/* update attribute */
 	pattrib = &pmgntframe->attrib;
@@ -3457,15 +3456,6 @@ static int _issue_probereq_p2p(struct adapter *padapter, u8 *da)
 	pattrib->last_txcmdsz = pattrib->pktlen;
 
 	dump_mgntframe(padapter, pmgntframe);
-	ret = _SUCCESS;
-
-exit:
-	return ret;
-}
-
-inline void issue_probereq_p2p(struct adapter *adapter, u8 *da)
-{
-	_issue_probereq_p2p(adapter, da);
 }
 
 static s32 rtw_action_public_decache(struct recv_frame *recv_frame, u8 token)
-- 
GitLab


From 3d0862367ab8af1144c0e5495b8bf0e2ce892afe Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 2 Dec 2022 10:25:25 +0100
Subject: [PATCH 3388/4122] staging: r8188eu: remove unused da parameter

All callers of issue_probereq_p2p set the da parameter to NULL. Remove
this parameters and the code that runs only for da != NULL.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221202092525.403887-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c   | 28 ++++++++-----------
 drivers/staging/r8188eu/core/rtw_p2p.c        |  8 +++---
 .../staging/r8188eu/include/rtw_mlme_ext.h    |  2 +-
 3 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index cf7b39cfb8f4c..95a9470f4c993 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -3183,7 +3183,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da)
 	dump_mgntframe(padapter, pmgntframe);
 }
 
-inline void issue_probereq_p2p(struct adapter *padapter, u8 *da)
+inline void issue_probereq_p2p(struct adapter *padapter)
 {
 	struct xmit_frame		*pmgntframe;
 	struct pkt_attrib		*pattrib;
@@ -3216,20 +3216,16 @@ inline void issue_probereq_p2p(struct adapter *padapter, u8 *da)
 	fctrl = &pwlanhdr->frame_control;
 	*(fctrl) = 0;
 
-	if (da) {
-		memcpy(pwlanhdr->addr1, da, ETH_ALEN);
-		memcpy(pwlanhdr->addr3, da, ETH_ALEN);
+	if ((pwdinfo->p2p_info.scan_op_ch_only) || (pwdinfo->rx_invitereq_info.scan_op_ch_only)) {
+		/*	This two flags will be set when this is only the P2P client mode. */
+		memcpy(pwlanhdr->addr1, pwdinfo->p2p_peer_interface_addr, ETH_ALEN);
+		memcpy(pwlanhdr->addr3, pwdinfo->p2p_peer_interface_addr, ETH_ALEN);
 	} else {
-		if ((pwdinfo->p2p_info.scan_op_ch_only) || (pwdinfo->rx_invitereq_info.scan_op_ch_only)) {
-			/*	This two flags will be set when this is only the P2P client mode. */
-			memcpy(pwlanhdr->addr1, pwdinfo->p2p_peer_interface_addr, ETH_ALEN);
-			memcpy(pwlanhdr->addr3, pwdinfo->p2p_peer_interface_addr, ETH_ALEN);
-		} else {
-			/*	broadcast probe request frame */
-			eth_broadcast_addr(pwlanhdr->addr1);
-			eth_broadcast_addr(pwlanhdr->addr3);
-		}
+		/*	broadcast probe request frame */
+		eth_broadcast_addr(pwlanhdr->addr1);
+		eth_broadcast_addr(pwlanhdr->addr3);
 	}
+
 	memcpy(pwlanhdr->addr2, mac, ETH_ALEN);
 
 	SetSeqNum(pwlanhdr, pmlmeext->mgnt_seq);
@@ -5867,9 +5863,9 @@ void site_survey(struct adapter *padapter)
 		if (ScanType == SCAN_ACTIVE) { /* obey the channel plan setting... */
 			if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_SCAN) ||
 			    rtw_p2p_chk_state(pwdinfo, P2P_STATE_FIND_PHASE_SEARCH)) {
-				issue_probereq_p2p(padapter, NULL);
-				issue_probereq_p2p(padapter, NULL);
-				issue_probereq_p2p(padapter, NULL);
+				issue_probereq_p2p(padapter);
+				issue_probereq_p2p(padapter);
+				issue_probereq_p2p(padapter);
 			} else {
 				int i;
 				for (i = 0; i < RTW_SSID_SCAN_AMOUNT; i++) {
diff --git a/drivers/staging/r8188eu/core/rtw_p2p.c b/drivers/staging/r8188eu/core/rtw_p2p.c
index ce05458bd1adc..93d3c9c4399cf 100644
--- a/drivers/staging/r8188eu/core/rtw_p2p.c
+++ b/drivers/staging/r8188eu/core/rtw_p2p.c
@@ -1453,7 +1453,7 @@ static void pre_tx_invitereq_handler(struct adapter *padapter)
 
 	set_channel_bwmode(padapter, pwdinfo->invitereq_info.peer_ch, HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20);
 	rtw_mlme_under_site_survey(padapter);
-	issue_probereq_p2p(padapter, NULL);
+	issue_probereq_p2p(padapter);
 	_set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT);
 
 }
@@ -1464,7 +1464,7 @@ static void pre_tx_provdisc_handler(struct adapter *padapter)
 
 	set_channel_bwmode(padapter, pwdinfo->tx_prov_disc_info.peer_channel_num[0], HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20);
 	rtw_mlme_under_site_survey(padapter);
-	issue_probereq_p2p(padapter, NULL);
+	issue_probereq_p2p(padapter);
 	_set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT);
 
 }
@@ -1475,7 +1475,7 @@ static void pre_tx_negoreq_handler(struct adapter *padapter)
 
 	set_channel_bwmode(padapter, pwdinfo->nego_req_info.peer_channel_num[0], HAL_PRIME_CHNL_OFFSET_DONT_CARE, HT_CHANNEL_WIDTH_20);
 	rtw_mlme_under_site_survey(padapter);
-	issue_probereq_p2p(padapter, NULL);
+	issue_probereq_p2p(padapter);
 	_set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT);
 
 }
@@ -1725,7 +1725,7 @@ static void pre_tx_scan_timer_process(struct timer_list *t)
 	if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_TX_PROVISION_DIS_REQ)) {
 		if (pwdinfo->tx_prov_disc_info.benable) {	/*	the provision discovery request frame is trigger to send or not */
 			p2p_protocol_wk_cmd(adapter, P2P_PRE_TX_PROVDISC_PROCESS_WK);
-			/* issue_probereq_p2p(adapter, NULL); */
+			/* issue_probereq_p2p(adapter); */
 			/* _set_timer(&pwdinfo->pre_tx_scan_timer, P2P_TX_PRESCAN_TIMEOUT); */
 		}
 	} else if (rtw_p2p_chk_state(pwdinfo, P2P_STATE_GONEGO_ING)) {
diff --git a/drivers/staging/r8188eu/include/rtw_mlme_ext.h b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
index 6724424a334e2..589de7c54d930 100644
--- a/drivers/staging/r8188eu/include/rtw_mlme_ext.h
+++ b/drivers/staging/r8188eu/include/rtw_mlme_ext.h
@@ -497,7 +497,7 @@ void issue_probersp_p2p(struct adapter *padapter, unsigned char *da);
 void issue_p2p_provision_request(struct adapter *padapter, u8 *pssid,
 				 u8 ussidlen, u8 *pdev_raddr);
 void issue_p2p_GO_request(struct adapter *padapter, u8 *raddr);
-void issue_probereq_p2p(struct adapter *padapter, u8 *da);
+void issue_probereq_p2p(struct adapter *padapter);
 void issue_p2p_invitation_response(struct adapter *padapter, u8 *raddr,
 				   u8 dialogToken, u8 success);
 void issue_p2p_invitation_request(struct adapter *padapter, u8 *raddr);
-- 
GitLab


From eb0b3f501e4dd5e6668bddbbb3c20ecd1545b60d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 30 Nov 2022 11:36:13 -0800
Subject: [PATCH 3389/4122] Revert "perf stat: Rename "aggregate-number" to
 "cpu-count" in JSON"

This reverts commit c4b41b83c25073c09bfcc4e5ec496c9dd316656b.

As Ian said, the "cpu-count" is not appropriate for uncore events, also it
caused a perf test failure.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221130193613.1046804-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 847acdb5dc407..f1ee4b0521986 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -281,19 +281,19 @@ static void print_aggr_id_json(struct perf_stat_config *config,
 
 	switch (config->aggr_mode) {
 	case AGGR_CORE:
-		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"cpu-count\" : %d, ",
+		fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ",
 			id.socket, id.die, id.core, nr);
 		break;
 	case AGGR_DIE:
-		fprintf(output, "\"die\" : \"S%d-D%d\", \"cpu-count\" : %d, ",
+		fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ",
 			id.socket, id.die, nr);
 		break;
 	case AGGR_SOCKET:
-		fprintf(output, "\"socket\" : \"S%d\", \"cpu-count\" : %d, ",
+		fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ",
 			id.socket, nr);
 		break;
 	case AGGR_NODE:
-		fprintf(output, "\"node\" : \"N%d\", \"cpu-count\" : %d, ",
+		fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ",
 			id.node, nr);
 		break;
 	case AGGR_NONE:
-- 
GitLab


From 506783ffa96f953eea94d6ff99c4e908965fc383 Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 2 Dec 2022 10:31:58 +0100
Subject: [PATCH 3390/4122] staging: r8188eu: use subtype helpers in
 collect_bss_info

Use the iee80211 helper functions to check the frame subtype in
collect_bss_info. Replace the call to the driver-specific
GetFrameSubType function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221202093159.404111-2-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_mlme_ext.c | 26 +++++++++------------
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_mlme_ext.c b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
index 95a9470f4c993..1b9cf7596a76e 100644
--- a/drivers/staging/r8188eu/core/rtw_mlme_ext.c
+++ b/drivers/staging/r8188eu/core/rtw_mlme_ext.c
@@ -5963,10 +5963,11 @@ void site_survey(struct adapter *padapter)
 /* collect bss info from Beacon and Probe request/response frames. */
 u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, struct wlan_bssid_ex *bssid)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)precv_frame->rx_data;
 	int	i;
 	u32	len;
 	u8 *p;
-	u16 val16, subtype;
+	u16 val16;
 	u8 *pframe = precv_frame->rx_data;
 	u32	packet_len = precv_frame->len;
 	u8 ie_offset;
@@ -5982,23 +5983,18 @@ u8 collect_bss_info(struct adapter *padapter, struct recv_frame *precv_frame, st
 
 	memset(bssid, 0, sizeof(struct wlan_bssid_ex));
 
-	subtype = GetFrameSubType(pframe);
-
-	if (subtype == WIFI_BEACON) {
+	if (ieee80211_is_beacon(mgmt->frame_control)) {
 		bssid->Reserved[0] = 1;
 		ie_offset = _BEACON_IE_OFFSET_;
+	} else if (ieee80211_is_probe_req(mgmt->frame_control)) {
+		ie_offset = _PROBEREQ_IE_OFFSET_;
+		bssid->Reserved[0] = 2;
+	} else if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		ie_offset = _PROBERSP_IE_OFFSET_;
+		bssid->Reserved[0] = 3;
 	} else {
-		/*  FIXME : more type */
-		if (subtype == WIFI_PROBEREQ) {
-			ie_offset = _PROBEREQ_IE_OFFSET_;
-			bssid->Reserved[0] = 2;
-		} else if (subtype == WIFI_PROBERSP) {
-			ie_offset = _PROBERSP_IE_OFFSET_;
-			bssid->Reserved[0] = 3;
-		} else {
-			bssid->Reserved[0] = 0;
-			ie_offset = _FIXED_IE_LENGTH_;
-		}
+		bssid->Reserved[0] = 0;
+		ie_offset = _FIXED_IE_LENGTH_;
 	}
 
 	bssid->Length = sizeof(struct wlan_bssid_ex) - MAX_IE_SZ + len;
-- 
GitLab


From 7868f8f858968066b23287c9733e777a8407384c Mon Sep 17 00:00:00 2001
From: Martin Kaiser <martin@kaiser.cx>
Date: Fri, 2 Dec 2022 10:31:59 +0100
Subject: [PATCH 3391/4122] staging: r8188eu: use subtype helper in
 rtw_check_bcn_info

Use ieee80211_is_beacon to check the frame subtype in rtw_check_bcn_info.
Replace the call to the driver-specific GetFrameSubType function.

Tested-by: Philipp Hortmann <philipp.g.hortmann@gmail.com> # Edimax N150
Signed-off-by: Martin Kaiser <martin@kaiser.cx>
Link: https://lore.kernel.org/r/20221202093159.404111-3-martin@kaiser.cx
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/r8188eu/core/rtw_wlan_util.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/staging/r8188eu/core/rtw_wlan_util.c b/drivers/staging/r8188eu/core/rtw_wlan_util.c
index da3465d6bb0f8..f1ebb5358cb9a 100644
--- a/drivers/staging/r8188eu/core/rtw_wlan_util.c
+++ b/drivers/staging/r8188eu/core/rtw_wlan_util.c
@@ -874,9 +874,10 @@ void VCS_update(struct adapter *padapter, struct sta_info *psta)
 
 int rtw_check_bcn_info(struct adapter  *Adapter, u8 *pframe, u32 packet_len)
 {
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)pframe;
 	unsigned int		len;
 	unsigned char		*p;
-	unsigned short	val16, subtype;
+	unsigned short	val16;
 	struct wlan_network *cur_network = &Adapter->mlmepriv.cur_network;
 	/* u8 wpa_ie[255], rsn_ie[255]; */
 	u16 wpa_len = 0, rsn_len = 0;
@@ -908,9 +909,7 @@ int rtw_check_bcn_info(struct adapter  *Adapter, u8 *pframe, u32 packet_len)
 	if (!bssid)
 		return _FAIL;
 
-	subtype = GetFrameSubType(pframe) >> 4;
-
-	if (subtype == WIFI_BEACON)
+	if (ieee80211_is_beacon(mgmt->frame_control))
 		bssid->Reserved[0] = 1;
 
 	bssid->Length = sizeof(struct wlan_bssid_ex) - MAX_IE_SZ + len;
-- 
GitLab


From bf44039f1ee828c47e1eb83eb315f0fed5a66126 Mon Sep 17 00:00:00 2001
From: Matt Jan <zoo868e@gmail.com>
Date: Sun, 4 Dec 2022 18:19:16 +0800
Subject: [PATCH 3392/4122] staging: vme_user: remove multiple blank lines

Remove multiple unnecessary blank lines in accordance with the Linux
kernel coding-style regulations. The issues were reported by the
checkpatch script.

Signed-off-by: Matt Jan <zoo868e@gmail.com>
Link: https://lore.kernel.org/r/20221204101916.GA51992@zoo868e
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vme_user/vme.h        | 2 --
 drivers/staging/vme_user/vme_fake.c   | 5 -----
 drivers/staging/vme_user/vme_tsi148.c | 6 ------
 drivers/staging/vme_user/vme_tsi148.h | 7 -------
 4 files changed, 20 deletions(-)

diff --git a/drivers/staging/vme_user/vme.h b/drivers/staging/vme_user/vme.h
index b204a9b4be1bd..98da8d039d606 100644
--- a/drivers/staging/vme_user/vme.h
+++ b/drivers/staging/vme_user/vme.h
@@ -27,7 +27,6 @@ enum vme_resource_type {
 #define VME_A64_MAX	0x10000000000000000ULL
 #define VME_CRCSR_MAX	0x1000000ULL
 
-
 /* VME Cycle Types */
 #define VME_SCT		0x1
 #define VME_BLT		0x2
@@ -185,6 +184,5 @@ int vme_bus_num(struct vme_dev *);
 int vme_register_driver(struct vme_driver *, unsigned int);
 void vme_unregister_driver(struct vme_driver *);
 
-
 #endif /* _VME_H_ */
 
diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c
index dd646b0c531d4..a9d3a7f5c440f 100644
--- a/drivers/staging/vme_user/vme_fake.c
+++ b/drivers/staging/vme_user/vme_fake.c
@@ -356,7 +356,6 @@ static int __fake_master_get(struct vme_master_resource *image, int *enabled,
 	return 0;
 }
 
-
 static int fake_master_get(struct vme_master_resource *image, int *enabled,
 		unsigned long long *vme_base, unsigned long long *size,
 		u32 *aspace, u32 *cycle, u32 *dwidth)
@@ -373,7 +372,6 @@ static int fake_master_get(struct vme_master_resource *image, int *enabled,
 	return retval;
 }
 
-
 static void fake_lm_check(struct fake_driver *bridge, unsigned long long addr,
 			  u32 aspace, u32 cycle)
 {
@@ -1060,7 +1058,6 @@ static void fake_crcsr_exit(struct vme_bridge *fake_bridge)
 	kfree(bridge->crcsr_kernel);
 }
 
-
 static int __init fake_init(void)
 {
 	int retval, i;
@@ -1238,7 +1235,6 @@ err_struct:
 
 }
 
-
 static void __exit fake_exit(void)
 {
 	struct list_head *pos = NULL;
@@ -1294,7 +1290,6 @@ static void __exit fake_exit(void)
 	root_device_unregister(vme_root);
 }
 
-
 MODULE_PARM_DESC(geoid, "Set geographical addressing");
 module_param(geoid, int, 0);
 
diff --git a/drivers/staging/vme_user/vme_tsi148.c b/drivers/staging/vme_user/vme_tsi148.c
index 0171f46d1848f..482049cfc6640 100644
--- a/drivers/staging/vme_user/vme_tsi148.c
+++ b/drivers/staging/vme_user/vme_tsi148.c
@@ -34,7 +34,6 @@
 static int tsi148_probe(struct pci_dev *, const struct pci_device_id *);
 static void tsi148_remove(struct pci_dev *);
 
-
 /* Module parameter */
 static bool err_chk;
 static int geoid;
@@ -673,7 +672,6 @@ static int tsi148_slave_get(struct vme_slave_resource *image, int *enabled,
 	/* Need granularity before we set the size */
 	*size = (unsigned long long)((vme_bound - *vme_base) + granularity);
 
-
 	if ((ctl & TSI148_LCSR_ITAT_2eSSTM_M) == TSI148_LCSR_ITAT_2eSSTM_160)
 		*cycle |= VME_2eSST160;
 	if ((ctl & TSI148_LCSR_ITAT_2eSSTM_M) == TSI148_LCSR_ITAT_2eSSTM_267)
@@ -1142,7 +1140,6 @@ static int __tsi148_master_get(struct vme_master_resource *image, int *enabled,
 	return 0;
 }
 
-
 static int tsi148_master_get(struct vme_master_resource *image, int *enabled,
 	unsigned long long *vme_base, unsigned long long *size, u32 *aspace,
 	u32 *cycle, u32 *dwidth)
@@ -1244,7 +1241,6 @@ out:
 	return retval;
 }
 
-
 static ssize_t tsi148_master_write(struct vme_master_resource *image, void *buf,
 	size_t count, loff_t offset)
 {
@@ -2000,7 +1996,6 @@ static int tsi148_lm_get(struct vme_lm_resource *lm,
 	if ((lm_ctl & TSI148_LCSR_LMAT_AS_M) == TSI148_LCSR_LMAT_AS_A64)
 		*aspace |= VME_A64;
 
-
 	if (lm_ctl & TSI148_LCSR_LMAT_SUPR)
 		*cycle |= VME_SUPER;
 	if (lm_ctl & TSI148_LCSR_LMAT_NPRIV)
@@ -2551,7 +2546,6 @@ static void tsi148_remove(struct pci_dev *pdev)
 
 	bridge = tsi148_bridge->driver_priv;
 
-
 	dev_dbg(&pdev->dev, "Driver is being unloaded.\n");
 
 	/*
diff --git a/drivers/staging/vme_user/vme_tsi148.h b/drivers/staging/vme_user/vme_tsi148.h
index 226fedc6f1671..b3cb4a089cc86 100644
--- a/drivers/staging/vme_user/vme_tsi148.h
+++ b/drivers/staging/vme_user/vme_tsi148.h
@@ -87,7 +87,6 @@ struct tsi148_dma_entry {
  *                                Control and Status Registers
  */
 
-
 /*
  *  Command/Status Registers (CRG + $004)
  */
@@ -342,7 +341,6 @@ static const int TSI148_LCSR_VIACK[8] = { 0, TSI148_LCSR_VIACK1,
 #define TSI148_LCSR_IT7_ITOFL		0x3F4
 #define TSI148_LCSR_IT7_ITAT		0x3F8
 
-
 #define TSI148_LCSR_IT0		0x300
 #define TSI148_LCSR_IT1		0x320
 #define TSI148_LCSR_IT2		0x340
@@ -464,7 +462,6 @@ static const int TSI148_LCSR_IT[8] = { TSI148_LCSR_IT0, TSI148_LCSR_IT1,
 #define TSI148_LCSR_DMA0	0x500
 #define TSI148_LCSR_DMA1	0x580
 
-
 static const int TSI148_LCSR_DMA[TSI148_MAX_DMA] = { TSI148_LCSR_DMA0,
 						TSI148_LCSR_DMA1 };
 
@@ -532,9 +529,6 @@ static const int TSI148_GCSR_MBOX[4] = { TSI148_GCSR_MBOX0,
 #define TSI148_CSRBSR	0xFF8
 #define TSI148_CBAR	0xFFC
 
-
-
-
 	/*
 	 *  TSI148 Register Bit Definitions
 	 */
@@ -828,7 +822,6 @@ static const int TSI148_GCSR_MBOX[4] = { TSI148_GCSR_MBOX0,
 #define TSI148_LCSR_VEAT_AM_M          (0x3F<<8)	/* Address Mode Mask */
 #define TSI148_LCSR_VEAT_XAM_M         (0xFF<<0)	/* Master AMode Mask */
 
-
 /*
  * VMEbus PCI Error Diagnostics PCI/X Attributes Register  CRG + $280
  */
-- 
GitLab


From 7bef797d707f1744f71156b21d41e3b8c946631f Mon Sep 17 00:00:00 2001
From: Chen Zhongjin <chenzhongjin@huawei.com>
Date: Mon, 5 Dec 2022 16:48:05 +0800
Subject: [PATCH 3393/4122] vme: Fix error not catched in fake_init()

In fake_init(), __root_device_register() is possible to fail but it's
ignored, which can cause unregistering vme_root fail when exit.

 general protection fault,
 probably for non-canonical address 0xdffffc000000008c
 KASAN: null-ptr-deref in range [0x0000000000000460-0x0000000000000467]
 RIP: 0010:root_device_unregister+0x26/0x60
 Call Trace:
  <TASK>
  __x64_sys_delete_module+0x34f/0x540
  do_syscall_64+0x38/0x90
  entry_SYSCALL_64_after_hwframe+0x63/0xcd

Return error when __root_device_register() fails.

Fixes: 658bcdae9c67 ("vme: Adding Fake VME driver")
Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
Link: https://lore.kernel.org/r/20221205084805.147436-1-chenzhongjin@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vme_user/vme_fake.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c
index a9d3a7f5c440f..6454ccf6e5b4b 100644
--- a/drivers/staging/vme_user/vme_fake.c
+++ b/drivers/staging/vme_user/vme_fake.c
@@ -1070,6 +1070,8 @@ static int __init fake_init(void)
 
 	/* We need a fake parent device */
 	vme_root = __root_device_register("vme", THIS_MODULE);
+	if (IS_ERR(vme_root))
+		return PTR_ERR(vme_root);
 
 	/* If we want to support more than one bridge at some point, we need to
 	 * dynamically allocate this so we get one per device.
-- 
GitLab


From f794eec86c7cd9a340a66109e6f32f8659ff30fa Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 8 Sep 2022 15:44:58 -0300
Subject: [PATCH 3394/4122] vfio: Simplify vfio_create_group()

The vfio.group_lock is now only used to serialize vfio_group creation and
destruction, we don't need a micro-optimization of searching, unlocking,
then allocating and searching again. Just hold the lock the whole time.

Grabbed from:
https://lore.kernel.org/kvm/20220922152338.2a2238fe.alex.williamson@redhat.com/

Link: https://lore.kernel.org/r/20221201145535.589687-2-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/vfio_main.c | 33 ++++++++++-----------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 6d51b700764e5..f913d862a3868 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -143,10 +143,12 @@ EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
-__vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group;
 
+	lockdep_assert_held(&vfio.group_lock);
+
 	/*
 	 * group->iommu_group from the vfio.group_list cannot be NULL
 	 * under the vfio.group_lock.
@@ -160,17 +162,6 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 	return NULL;
 }
 
-static struct vfio_group *
-vfio_group_get_from_iommu(struct iommu_group *iommu_group)
-{
-	struct vfio_group *group;
-
-	mutex_lock(&vfio.group_lock);
-	group = __vfio_group_get_from_iommu(iommu_group);
-	mutex_unlock(&vfio.group_lock);
-	return group;
-}
-
 static void vfio_group_release(struct device *dev)
 {
 	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
@@ -225,6 +216,8 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 	struct vfio_group *ret;
 	int err;
 
+	lockdep_assert_held(&vfio.group_lock);
+
 	group = vfio_group_alloc(iommu_group, type);
 	if (IS_ERR(group))
 		return group;
@@ -237,26 +230,16 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
 		goto err_put;
 	}
 
-	mutex_lock(&vfio.group_lock);
-
-	/* Did we race creating this group? */
-	ret = __vfio_group_get_from_iommu(iommu_group);
-	if (ret)
-		goto err_unlock;
-
 	err = cdev_device_add(&group->cdev, &group->dev);
 	if (err) {
 		ret = ERR_PTR(err);
-		goto err_unlock;
+		goto err_put;
 	}
 
 	list_add(&group->vfio_next, &vfio.group_list);
 
-	mutex_unlock(&vfio.group_lock);
 	return group;
 
-err_unlock:
-	mutex_unlock(&vfio.group_lock);
 err_put:
 	put_device(&group->dev);
 	return ret;
@@ -467,7 +450,9 @@ static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
 	if (ret)
 		goto out_put_group;
 
+	mutex_lock(&vfio.group_lock);
 	group = vfio_create_group(iommu_group, type);
+	mutex_unlock(&vfio.group_lock);
 	if (IS_ERR(group)) {
 		ret = PTR_ERR(group);
 		goto out_remove_device;
@@ -516,9 +501,11 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 		return ERR_PTR(-EINVAL);
 	}
 
+	mutex_lock(&vfio.group_lock);
 	group = vfio_group_get_from_iommu(iommu_group);
 	if (!group)
 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
+	mutex_unlock(&vfio.group_lock);
 
 	/* The vfio_group holds a reference to the iommu_group */
 	iommu_group_put(iommu_group);
-- 
GitLab


From dcb93d0364a238315dd71f834b199d4b95ae09eb Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 8 Sep 2022 15:44:59 -0300
Subject: [PATCH 3395/4122] vfio: Move the sanity check of the group to
 vfio_create_group()

This avoids opening group specific code in __vfio_register_dev() for the
sanity check if an (existing) group is not corrupted by having two copies
of the same struct device in it. It also simplifies the error unwind for
this sanity check since the failure can be detected in the group
allocation.

This also prepares for moving the group specific code into separate
group.c.

Grabbed from:
https://lore.kernel.org/kvm/20220922152338.2a2238fe.alex.williamson@redhat.com/

Link: https://lore.kernel.org/r/20221201145535.589687-3-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/vfio/vfio_main.c | 62 ++++++++++++++++------------------------
 1 file changed, 25 insertions(+), 37 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f913d862a3868..87d9a1670a2fb 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -143,7 +143,7 @@ EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
  * Group objects - create, release, get, put, search
  */
 static struct vfio_group *
-vfio_group_get_from_iommu(struct iommu_group *iommu_group)
+vfio_group_find_from_iommu(struct iommu_group *iommu_group)
 {
 	struct vfio_group *group;
 
@@ -154,10 +154,8 @@ vfio_group_get_from_iommu(struct iommu_group *iommu_group)
 	 * under the vfio.group_lock.
 	 */
 	list_for_each_entry(group, &vfio.group_list, vfio_next) {
-		if (group->iommu_group == iommu_group) {
-			refcount_inc(&group->drivers);
+		if (group->iommu_group == iommu_group)
 			return group;
-		}
 	}
 	return NULL;
 }
@@ -307,23 +305,6 @@ static bool vfio_device_try_get_registration(struct vfio_device *device)
 	return refcount_inc_not_zero(&device->refcount);
 }
 
-static struct vfio_device *vfio_group_get_device(struct vfio_group *group,
-						 struct device *dev)
-{
-	struct vfio_device *device;
-
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(device, &group->device_list, group_next) {
-		if (device->dev == dev &&
-		    vfio_device_try_get_registration(device)) {
-			mutex_unlock(&group->device_lock);
-			return device;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-	return NULL;
-}
-
 /*
  * VFIO driver API
  */
@@ -467,6 +448,21 @@ out_put_group:
 	return ERR_PTR(ret);
 }
 
+static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
+{
+	struct vfio_device *device;
+
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (device->dev == dev) {
+			mutex_unlock(&group->device_lock);
+			return true;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+	return false;
+}
+
 static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 {
 	struct iommu_group *iommu_group;
@@ -502,9 +498,15 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 	}
 
 	mutex_lock(&vfio.group_lock);
-	group = vfio_group_get_from_iommu(iommu_group);
-	if (!group)
+	group = vfio_group_find_from_iommu(iommu_group);
+	if (group) {
+		if (WARN_ON(vfio_group_has_device(group, dev)))
+			group = ERR_PTR(-EINVAL);
+		else
+			refcount_inc(&group->drivers);
+	} else {
 		group = vfio_create_group(iommu_group, VFIO_IOMMU);
+	}
 	mutex_unlock(&vfio.group_lock);
 
 	/* The vfio_group holds a reference to the iommu_group */
@@ -515,7 +517,6 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 static int __vfio_register_dev(struct vfio_device *device,
 		struct vfio_group *group)
 {
-	struct vfio_device *existing_device;
 	int ret;
 
 	/*
@@ -537,19 +538,6 @@ static int __vfio_register_dev(struct vfio_device *device,
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	existing_device = vfio_group_get_device(group, device->dev);
-	if (existing_device) {
-		/*
-		 * group->iommu_group is non-NULL because we hold the drivers
-		 * refcount.
-		 */
-		dev_WARN(device->dev, "Device already exists on group %d\n",
-			 iommu_group_id(group->iommu_group));
-		vfio_device_put_registration(existing_device);
-		ret = -EBUSY;
-		goto err_out;
-	}
-
 	/* Our reference on group is moved to the device */
 	device->group = group;
 
-- 
GitLab


From 32e0922821f2115eb8940b6a6b942ba61eff15c2 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 23 Sep 2022 02:19:34 -0700
Subject: [PATCH 3396/4122] vfio: Create wrappers for group register/unregister

This avoids decoding group fields in the common functions used by
vfio_device registration, and prepares for further moving the vfio group
specific code into separate file.

Link: https://lore.kernel.org/r/20221201145535.589687-4-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 87d9a1670a2fb..a5122fa4bf4d1 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -514,6 +514,20 @@ static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
 	return group;
 }
 
+static void vfio_device_group_register(struct vfio_device *device)
+{
+	mutex_lock(&device->group->device_lock);
+	list_add(&device->group_next, &device->group->device_list);
+	mutex_unlock(&device->group->device_lock);
+}
+
+static void vfio_device_group_unregister(struct vfio_device *device)
+{
+	mutex_lock(&device->group->device_lock);
+	list_del(&device->group_next);
+	mutex_unlock(&device->group->device_lock);
+}
+
 static int __vfio_register_dev(struct vfio_device *device,
 		struct vfio_group *group)
 {
@@ -552,9 +566,7 @@ static int __vfio_register_dev(struct vfio_device *device,
 	/* Refcounting can't start until the driver calls register */
 	refcount_set(&device->refcount, 1);
 
-	mutex_lock(&group->device_lock);
-	list_add(&device->group_next, &group->device_list);
-	mutex_unlock(&group->device_lock);
+	vfio_device_group_register(device);
 
 	return 0;
 err_out:
@@ -614,7 +626,6 @@ static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
  * removed.  Open file descriptors for the device... */
 void vfio_unregister_group_dev(struct vfio_device *device)
 {
-	struct vfio_group *group = device->group;
 	unsigned int i = 0;
 	bool interrupted = false;
 	long rc;
@@ -642,9 +653,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 		}
 	}
 
-	mutex_lock(&group->device_lock);
-	list_del(&device->group_next);
-	mutex_unlock(&group->device_lock);
+	vfio_device_group_unregister(device);
 
 	/* Balances device_add in register path */
 	device_del(&device->device);
-- 
GitLab


From 49ea02d390a34a538a3f54b9ce5665e474690bc7 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Thu, 24 Nov 2022 21:22:27 -0800
Subject: [PATCH 3397/4122] vfio: Set device->group in helper function

This avoids referencing device->group in __vfio_register_dev().

Link: https://lore.kernel.org/r/20221201145535.589687-5-yi.l.liu@intel.com
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 41 +++++++++++++++++++++++++---------------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a5122fa4bf4d1..7e42ee0ee1bce 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -528,18 +528,29 @@ static void vfio_device_group_unregister(struct vfio_device *device)
 	mutex_unlock(&device->group->device_lock);
 }
 
-static int __vfio_register_dev(struct vfio_device *device,
-		struct vfio_group *group)
+static int vfio_device_set_group(struct vfio_device *device,
+				 enum vfio_group_type type)
 {
-	int ret;
+	struct vfio_group *group;
+
+	if (type == VFIO_IOMMU)
+		group = vfio_group_find_or_alloc(device->dev);
+	else
+		group = vfio_noiommu_group_alloc(device->dev, type);
 
-	/*
-	 * In all cases group is the output of one of the group allocation
-	 * functions and we have group->drivers incremented for us.
-	 */
 	if (IS_ERR(group))
 		return PTR_ERR(group);
 
+	/* Our reference on group is moved to the device */
+	device->group = group;
+	return 0;
+}
+
+static int __vfio_register_dev(struct vfio_device *device,
+			       enum vfio_group_type type)
+{
+	int ret;
+
 	if (WARN_ON(device->ops->bind_iommufd &&
 		    (!device->ops->unbind_iommufd ||
 		     !device->ops->attach_ioas)))
@@ -552,12 +563,13 @@ static int __vfio_register_dev(struct vfio_device *device,
 	if (!device->dev_set)
 		vfio_assign_device_set(device, device);
 
-	/* Our reference on group is moved to the device */
-	device->group = group;
-
 	ret = dev_set_name(&device->device, "vfio%d", device->index);
 	if (ret)
-		goto err_out;
+		return ret;
+
+	ret = vfio_device_set_group(device, type);
+	if (ret)
+		return ret;
 
 	ret = device_add(&device->device);
 	if (ret)
@@ -576,8 +588,7 @@ err_out:
 
 int vfio_register_group_dev(struct vfio_device *device)
 {
-	return __vfio_register_dev(device,
-		vfio_group_find_or_alloc(device->dev));
+	return __vfio_register_dev(device, VFIO_IOMMU);
 }
 EXPORT_SYMBOL_GPL(vfio_register_group_dev);
 
@@ -587,8 +598,7 @@ EXPORT_SYMBOL_GPL(vfio_register_group_dev);
  */
 int vfio_register_emulated_iommu_dev(struct vfio_device *device)
 {
-	return __vfio_register_dev(device,
-		vfio_noiommu_group_alloc(device->dev, VFIO_EMULATED_IOMMU));
+	return __vfio_register_dev(device, VFIO_EMULATED_IOMMU);
 }
 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
 
@@ -658,6 +668,7 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 	/* Balances device_add in register path */
 	device_del(&device->device);
 
+	/* Balances vfio_device_set_group in register path */
 	vfio_device_remove_group(device);
 }
 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
-- 
GitLab


From 07b465863325faceb865871ff5f22c1ebba6df54 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Thu, 10 Nov 2022 07:26:09 -0800
Subject: [PATCH 3398/4122] vfio: Swap order of
 vfio_device_container_register() and open_device()

This makes the DMA unmap callback registration to container be consistent
across the vfio iommufd compat mode and the legacy container mode.

In the vfio iommufd compat mode, this registration is done in the
vfio_iommufd_bind() when creating access which has an unmap callback. This
is prior to calling the open_device() op. The existing mdev drivers have
been converted to be OK with this order. So it is ok to swap the order of
vfio_device_container_register() and open_device() for legacy mode.

This also prepares for further moving group specific code into separate
source file.

Link: https://lore.kernel.org/r/20221201145535.589687-6-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 7e42ee0ee1bce..5dddf962f6509 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -807,6 +807,7 @@ static int vfio_device_first_open(struct vfio_device *device)
 		ret = vfio_group_use_container(device->group);
 		if (ret)
 			goto err_module_put;
+		vfio_device_container_register(device);
 	} else if (device->group->iommufd) {
 		ret = vfio_iommufd_bind(device, device->group->iommufd);
 		if (ret)
@@ -819,17 +820,17 @@ static int vfio_device_first_open(struct vfio_device *device)
 		if (ret)
 			goto err_container;
 	}
-	if (device->group->container)
-		vfio_device_container_register(device);
 	mutex_unlock(&device->group->group_lock);
 	return 0;
 
 err_container:
 	device->kvm = NULL;
-	if (device->group->container)
+	if (device->group->container) {
+		vfio_device_container_unregister(device);
 		vfio_group_unuse_container(device->group);
-	else if (device->group->iommufd)
+	} else if (device->group->iommufd) {
 		vfio_iommufd_unbind(device);
+	}
 err_module_put:
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
@@ -841,15 +842,15 @@ static void vfio_device_last_close(struct vfio_device *device)
 	lockdep_assert_held(&device->dev_set->lock);
 
 	mutex_lock(&device->group->group_lock);
-	if (device->group->container)
-		vfio_device_container_unregister(device);
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
-	if (device->group->container)
+	if (device->group->container) {
+		vfio_device_container_unregister(device);
 		vfio_group_unuse_container(device->group);
-	else if (device->group->iommufd)
+	} else if (device->group->iommufd) {
 		vfio_iommufd_unbind(device);
+	}
 	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 }
-- 
GitLab


From 5cfff0774353ee35601e3d3fe2f0bd95c33aa5db Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 30 Sep 2022 03:22:55 -0700
Subject: [PATCH 3399/4122] vfio: Make vfio_device_open() truly device specific

Then move group related logic into vfio_device_open_file(). Accordingly
introduce a vfio_device_close() to pair up.

Link: https://lore.kernel.org/r/20221201145535.589687-7-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 46 +++++++++++++++++++++++++---------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 5dddf962f6509..37413ac254c0a 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -855,20 +855,41 @@ static void vfio_device_last_close(struct vfio_device *device)
 	module_put(device->dev->driver->owner);
 }
 
-static struct file *vfio_device_open(struct vfio_device *device)
+static int vfio_device_open(struct vfio_device *device)
 {
-	struct file *filep;
-	int ret;
+	int ret = 0;
 
 	mutex_lock(&device->dev_set->lock);
 	device->open_count++;
 	if (device->open_count == 1) {
 		ret = vfio_device_first_open(device);
 		if (ret)
-			goto err_unlock;
+			device->open_count--;
 	}
 	mutex_unlock(&device->dev_set->lock);
 
+	return ret;
+}
+
+static void vfio_device_close(struct vfio_device *device)
+{
+	mutex_lock(&device->dev_set->lock);
+	vfio_assert_device_open(device);
+	if (device->open_count == 1)
+		vfio_device_last_close(device);
+	device->open_count--;
+	mutex_unlock(&device->dev_set->lock);
+}
+
+static struct file *vfio_device_open_file(struct vfio_device *device)
+{
+	struct file *filep;
+	int ret;
+
+	ret = vfio_device_open(device);
+	if (ret)
+		goto err_out;
+
 	/*
 	 * We can't use anon_inode_getfd() because we need to modify
 	 * the f_mode flags directly to allow more than just ioctls
@@ -897,12 +918,8 @@ static struct file *vfio_device_open(struct vfio_device *device)
 	return filep;
 
 err_close_device:
-	mutex_lock(&device->dev_set->lock);
-	if (device->open_count == 1)
-		vfio_device_last_close(device);
-err_unlock:
-	device->open_count--;
-	mutex_unlock(&device->dev_set->lock);
+	vfio_device_close(device);
+err_out:
 	return ERR_PTR(ret);
 }
 
@@ -930,7 +947,7 @@ static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
 		goto err_put_device;
 	}
 
-	filep = vfio_device_open(device);
+	filep = vfio_device_open_file(device);
 	if (IS_ERR(filep)) {
 		ret = PTR_ERR(filep);
 		goto err_put_fdno;
@@ -1113,12 +1130,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 {
 	struct vfio_device *device = filep->private_data;
 
-	mutex_lock(&device->dev_set->lock);
-	vfio_assert_device_open(device);
-	if (device->open_count == 1)
-		vfio_device_last_close(device);
-	device->open_count--;
-	mutex_unlock(&device->dev_set->lock);
+	vfio_device_close(device);
 
 	vfio_device_put_registration(device);
 
-- 
GitLab


From 5c8d3d93f6a7c9371212690b0195160e5f88bdff Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Wed, 2 Nov 2022 04:42:25 -0700
Subject: [PATCH 3400/4122] vfio: Refactor vfio_device open and close

This refactor makes the vfio_device_open() to accept device, iommufd_ctx
pointer and kvm pointer. These parameters are generic items in today's
group path and future device cdev path. Caller of vfio_device_open()
should take care the necessary protections. e.g. the current group path
need to hold the group_lock to ensure the iommufd_ctx and kvm pointer are
valid.

This refactor also wraps the group spefcific codes in the device open and
close paths to be paired helpers like:

- vfio_device_group_open/close(): call vfio_device_open/close()
- vfio_device_group_use/unuse_iommu(): this pair is container specific.
				       iommufd vs. container is selected
				       in vfio_device_first_open().

Such helpers are supposed to be moved to group.c. While iommufd related
codes will be kept in the generic helpers since future device cdev path
also need to handle iommufd.

Link: https://lore.kernel.org/r/20221201145535.589687-8-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 133 +++++++++++++++++++++++++--------------
 1 file changed, 87 insertions(+), 46 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 37413ac254c0a..a4583f4827e59 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -783,7 +783,38 @@ static bool vfio_assert_device_open(struct vfio_device *device)
 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
 }
 
-static int vfio_device_first_open(struct vfio_device *device)
+static int vfio_device_group_use_iommu(struct vfio_device *device)
+{
+	struct vfio_group *group = device->group;
+	int ret = 0;
+
+	lockdep_assert_held(&group->group_lock);
+
+	if (WARN_ON(!group->container))
+		return -EINVAL;
+
+	ret = vfio_group_use_container(group);
+	if (ret)
+		return ret;
+	vfio_device_container_register(device);
+	return 0;
+}
+
+static void vfio_device_group_unuse_iommu(struct vfio_device *device)
+{
+	struct vfio_group *group = device->group;
+
+	lockdep_assert_held(&group->group_lock);
+
+	if (WARN_ON(!group->container))
+		return;
+
+	vfio_device_container_unregister(device);
+	vfio_group_unuse_container(group);
+}
+
+static int vfio_device_first_open(struct vfio_device *device,
+				  struct iommufd_ctx *iommufd, struct kvm *kvm)
 {
 	int ret;
 
@@ -792,77 +823,56 @@ static int vfio_device_first_open(struct vfio_device *device)
 	if (!try_module_get(device->dev->driver->owner))
 		return -ENODEV;
 
-	/*
-	 * Here we pass the KVM pointer with the group under the lock.  If the
-	 * device driver will use it, it must obtain a reference and release it
-	 * during close_device.
-	 */
-	mutex_lock(&device->group->group_lock);
-	if (!vfio_group_has_iommu(device->group)) {
-		ret = -EINVAL;
+	if (iommufd)
+		ret = vfio_iommufd_bind(device, iommufd);
+	else
+		ret = vfio_device_group_use_iommu(device);
+	if (ret)
 		goto err_module_put;
-	}
 
-	if (device->group->container) {
-		ret = vfio_group_use_container(device->group);
-		if (ret)
-			goto err_module_put;
-		vfio_device_container_register(device);
-	} else if (device->group->iommufd) {
-		ret = vfio_iommufd_bind(device, device->group->iommufd);
-		if (ret)
-			goto err_module_put;
-	}
-
-	device->kvm = device->group->kvm;
+	device->kvm = kvm;
 	if (device->ops->open_device) {
 		ret = device->ops->open_device(device);
 		if (ret)
-			goto err_container;
+			goto err_unuse_iommu;
 	}
-	mutex_unlock(&device->group->group_lock);
 	return 0;
 
-err_container:
+err_unuse_iommu:
 	device->kvm = NULL;
-	if (device->group->container) {
-		vfio_device_container_unregister(device);
-		vfio_group_unuse_container(device->group);
-	} else if (device->group->iommufd) {
+	if (iommufd)
 		vfio_iommufd_unbind(device);
-	}
+	else
+		vfio_device_group_unuse_iommu(device);
 err_module_put:
-	mutex_unlock(&device->group->group_lock);
 	module_put(device->dev->driver->owner);
 	return ret;
 }
 
-static void vfio_device_last_close(struct vfio_device *device)
+static void vfio_device_last_close(struct vfio_device *device,
+				   struct iommufd_ctx *iommufd)
 {
 	lockdep_assert_held(&device->dev_set->lock);
 
-	mutex_lock(&device->group->group_lock);
 	if (device->ops->close_device)
 		device->ops->close_device(device);
 	device->kvm = NULL;
-	if (device->group->container) {
-		vfio_device_container_unregister(device);
-		vfio_group_unuse_container(device->group);
-	} else if (device->group->iommufd) {
+	if (iommufd)
 		vfio_iommufd_unbind(device);
-	}
-	mutex_unlock(&device->group->group_lock);
+	else
+		vfio_device_group_unuse_iommu(device);
 	module_put(device->dev->driver->owner);
 }
 
-static int vfio_device_open(struct vfio_device *device)
+static int vfio_device_open(struct vfio_device *device,
+			    struct iommufd_ctx *iommufd, struct kvm *kvm)
 {
 	int ret = 0;
 
 	mutex_lock(&device->dev_set->lock);
 	device->open_count++;
 	if (device->open_count == 1) {
-		ret = vfio_device_first_open(device);
+		ret = vfio_device_first_open(device, iommufd, kvm);
 		if (ret)
 			device->open_count--;
 	}
@@ -871,22 +881,53 @@ static int vfio_device_open(struct vfio_device *device)
 	return ret;
 }
 
-static void vfio_device_close(struct vfio_device *device)
+static void vfio_device_close(struct vfio_device *device,
+			      struct iommufd_ctx *iommufd)
 {
 	mutex_lock(&device->dev_set->lock);
 	vfio_assert_device_open(device);
 	if (device->open_count == 1)
-		vfio_device_last_close(device);
+		vfio_device_last_close(device, iommufd);
 	device->open_count--;
 	mutex_unlock(&device->dev_set->lock);
 }
 
+static int vfio_device_group_open(struct vfio_device *device)
+{
+	int ret;
+
+	mutex_lock(&device->group->group_lock);
+	if (!vfio_group_has_iommu(device->group)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Here we pass the KVM pointer with the group under the lock.  If the
+	 * device driver will use it, it must obtain a reference and release it
+	 * during close_device.
+	 */
+	ret = vfio_device_open(device, device->group->iommufd,
+			       device->group->kvm);
+
+out_unlock:
+	mutex_unlock(&device->group->group_lock);
+	return ret;
+}
+
+static void vfio_device_group_close(struct vfio_device *device)
+{
+	mutex_lock(&device->group->group_lock);
+	vfio_device_close(device, device->group->iommufd);
+	mutex_unlock(&device->group->group_lock);
+}
+
 static struct file *vfio_device_open_file(struct vfio_device *device)
 {
 	struct file *filep;
 	int ret;
 
-	ret = vfio_device_open(device);
+	ret = vfio_device_group_open(device);
 	if (ret)
 		goto err_out;
 
@@ -918,7 +959,7 @@ static struct file *vfio_device_open_file(struct vfio_device *device)
 	return filep;
 
 err_close_device:
-	vfio_device_close(device);
+	vfio_device_group_close(device);
 err_out:
 	return ERR_PTR(ret);
 }
@@ -1130,7 +1171,7 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 {
 	struct vfio_device *device = filep->private_data;
 
-	vfio_device_close(device);
+	vfio_device_group_close(device);
 
 	vfio_device_put_registration(device);
 
-- 
GitLab


From 1334e47ee798ac4715330a6ade0afc929cd54aff Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 23 Sep 2022 07:08:36 -0700
Subject: [PATCH 3401/4122] vfio: Wrap vfio group module init/clean code into
 helpers

This wraps the init/clean code of vfio group global variable to be
helpers, and prepares for further moving vfio group specific code into
separate file.

As container is used by group, so vfio_container_init/cleanup() is moved
into vfio_group_init/cleanup().

Link: https://lore.kernel.org/r/20221201145535.589687-9-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/vfio_main.c | 56 ++++++++++++++++++++++++++--------------
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index a4583f4827e59..e053998baffd8 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -2066,12 +2066,11 @@ static char *vfio_devnode(struct device *dev, umode_t *mode)
 	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
 }
 
-static int __init vfio_init(void)
+static int __init vfio_group_init(void)
 {
 	int ret;
 
 	ida_init(&vfio.group_ida);
-	ida_init(&vfio.device_ida);
 	mutex_init(&vfio.group_lock);
 	INIT_LIST_HEAD(&vfio.group_list);
 
@@ -2088,24 +2087,12 @@ static int __init vfio_init(void)
 
 	vfio.class->devnode = vfio_devnode;
 
-	/* /sys/class/vfio-dev/vfioX */
-	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
-	if (IS_ERR(vfio.device_class)) {
-		ret = PTR_ERR(vfio.device_class);
-		goto err_dev_class;
-	}
-
 	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
 	if (ret)
 		goto err_alloc_chrdev;
-
-	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
 	return 0;
 
 err_alloc_chrdev:
-	class_destroy(vfio.device_class);
-	vfio.device_class = NULL;
-err_dev_class:
 	class_destroy(vfio.class);
 	vfio.class = NULL;
 err_group_class:
@@ -2113,18 +2100,47 @@ err_group_class:
 	return ret;
 }
 
-static void __exit vfio_cleanup(void)
+static void vfio_group_cleanup(void)
 {
 	WARN_ON(!list_empty(&vfio.group_list));
-
-	ida_destroy(&vfio.device_ida);
 	ida_destroy(&vfio.group_ida);
 	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
-	class_destroy(vfio.device_class);
-	vfio.device_class = NULL;
 	class_destroy(vfio.class);
-	vfio_container_cleanup();
 	vfio.class = NULL;
+	vfio_container_cleanup();
+}
+
+static int __init vfio_init(void)
+{
+	int ret;
+
+	ida_init(&vfio.device_ida);
+
+	ret = vfio_group_init();
+	if (ret)
+		return ret;
+
+	/* /sys/class/vfio-dev/vfioX */
+	vfio.device_class = class_create(THIS_MODULE, "vfio-dev");
+	if (IS_ERR(vfio.device_class)) {
+		ret = PTR_ERR(vfio.device_class);
+		goto err_dev_class;
+	}
+
+	pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
+	return 0;
+
+err_dev_class:
+	vfio_group_cleanup();
+	return ret;
+}
+
+static void __exit vfio_cleanup(void)
+{
+	ida_destroy(&vfio.device_ida);
+	class_destroy(vfio.device_class);
+	vfio.device_class = NULL;
+	vfio_group_cleanup();
 	xa_destroy(&vfio_device_set_xa);
 }
 
-- 
GitLab


From 8da7a0e79f9b15330ae68d8532425399f4c27045 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Thu, 10 Nov 2022 18:57:01 -0800
Subject: [PATCH 3402/4122] vfio: Refactor dma APIs for emulated devices

To use group helpers instead of opening group related code in the
API. This prepares moving group specific code out of vfio_main.c.

Link: https://lore.kernel.org/r/20221201145535.589687-10-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Tested-by: Yu He <yu.he@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/container.c | 20 +++++++++++++-------
 drivers/vfio/vfio.h      | 32 ++++++++++++++++----------------
 drivers/vfio/vfio_main.c | 25 ++++++++++++++-----------
 3 files changed, 43 insertions(+), 34 deletions(-)

diff --git a/drivers/vfio/container.c b/drivers/vfio/container.c
index 6b362d97d6822..b7a9560ab25e4 100644
--- a/drivers/vfio/container.c
+++ b/drivers/vfio/container.c
@@ -540,10 +540,12 @@ void vfio_group_unuse_container(struct vfio_group *group)
 	fput(group->opened_file);
 }
 
-int vfio_container_pin_pages(struct vfio_container *container,
-			     struct iommu_group *iommu_group, dma_addr_t iova,
-			     int npage, int prot, struct page **pages)
+int vfio_device_container_pin_pages(struct vfio_device *device,
+				    dma_addr_t iova, int npage,
+				    int prot, struct page **pages)
 {
+	struct vfio_container *container = device->group->container;
+	struct iommu_group *iommu_group = device->group->iommu_group;
 	struct vfio_iommu_driver *driver = container->iommu_driver;
 
 	if (npage > VFIO_PIN_PAGES_MAX_ENTRIES)
@@ -555,9 +557,11 @@ int vfio_container_pin_pages(struct vfio_container *container,
 				      npage, prot, pages);
 }
 
-void vfio_container_unpin_pages(struct vfio_container *container,
-				dma_addr_t iova, int npage)
+void vfio_device_container_unpin_pages(struct vfio_device *device,
+				       dma_addr_t iova, int npage)
 {
+	struct vfio_container *container = device->group->container;
+
 	if (WARN_ON(npage <= 0 || npage > VFIO_PIN_PAGES_MAX_ENTRIES))
 		return;
 
@@ -565,9 +569,11 @@ void vfio_container_unpin_pages(struct vfio_container *container,
 						  npage);
 }
 
-int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
-			  void *data, size_t len, bool write)
+int vfio_device_container_dma_rw(struct vfio_device *device,
+				 dma_addr_t iova, void *data,
+				 size_t len, bool write)
 {
+	struct vfio_container *container = device->group->container;
 	struct vfio_iommu_driver *driver = container->iommu_driver;
 
 	if (unlikely(!driver || !driver->ops->dma_rw))
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index ce5fe3fc493b4..a112e8f2b291a 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -122,13 +122,14 @@ int vfio_container_attach_group(struct vfio_container *container,
 void vfio_group_detach_container(struct vfio_group *group);
 void vfio_device_container_register(struct vfio_device *device);
 void vfio_device_container_unregister(struct vfio_device *device);
-int vfio_container_pin_pages(struct vfio_container *container,
-			     struct iommu_group *iommu_group, dma_addr_t iova,
-			     int npage, int prot, struct page **pages);
-void vfio_container_unpin_pages(struct vfio_container *container,
-				dma_addr_t iova, int npage);
-int vfio_container_dma_rw(struct vfio_container *container, dma_addr_t iova,
-			  void *data, size_t len, bool write);
+int vfio_device_container_pin_pages(struct vfio_device *device,
+				    dma_addr_t iova, int npage,
+				    int prot, struct page **pages);
+void vfio_device_container_unpin_pages(struct vfio_device *device,
+				       dma_addr_t iova, int npage);
+int vfio_device_container_dma_rw(struct vfio_device *device,
+				 dma_addr_t iova, void *data,
+				 size_t len, bool write);
 
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
@@ -166,22 +167,21 @@ static inline void vfio_device_container_unregister(struct vfio_device *device)
 {
 }
 
-static inline int vfio_container_pin_pages(struct vfio_container *container,
-					   struct iommu_group *iommu_group,
-					   dma_addr_t iova, int npage, int prot,
-					   struct page **pages)
+static inline int vfio_device_container_pin_pages(struct vfio_device *device,
+						  dma_addr_t iova, int npage,
+						  int prot, struct page **pages)
 {
 	return -EOPNOTSUPP;
 }
 
-static inline void vfio_container_unpin_pages(struct vfio_container *container,
-					      dma_addr_t iova, int npage)
+static inline void vfio_device_container_unpin_pages(struct vfio_device *device,
+						     dma_addr_t iova, int npage)
 {
 }
 
-static inline int vfio_container_dma_rw(struct vfio_container *container,
-					dma_addr_t iova, void *data, size_t len,
-					bool write)
+static inline int vfio_device_container_dma_rw(struct vfio_device *device,
+					       dma_addr_t iova, void *data,
+					       size_t len, bool write)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index e053998baffd8..b7f94ace7b10b 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1938,6 +1938,11 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 }
 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 
+static bool vfio_device_has_container(struct vfio_device *device)
+{
+	return device->group->container;
+}
+
 /*
  * Pin contiguous user pages and return their associated host pages for local
  * domain only.
@@ -1950,7 +1955,7 @@ EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
  * Return error or number of pages pinned.
  *
  * A driver may only call this function if the vfio_device was created
- * by vfio_register_emulated_iommu_dev() due to vfio_container_pin_pages().
+ * by vfio_register_emulated_iommu_dev() due to vfio_device_container_pin_pages().
  */
 int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
 		   int npage, int prot, struct page **pages)
@@ -1958,10 +1963,9 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
 	/* group->container cannot change while a vfio device is open */
 	if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
 		return -EINVAL;
-	if (device->group->container)
-		return vfio_container_pin_pages(device->group->container,
-						device->group->iommu_group,
-						iova, npage, prot, pages);
+	if (vfio_device_has_container(device))
+		return vfio_device_container_pin_pages(device, iova,
+						       npage, prot, pages);
 	if (device->iommufd_access) {
 		int ret;
 
@@ -1997,9 +2001,8 @@ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
 	if (WARN_ON(!vfio_assert_device_open(device)))
 		return;
 
-	if (device->group->container) {
-		vfio_container_unpin_pages(device->group->container, iova,
-					   npage);
+	if (vfio_device_has_container(device)) {
+		vfio_device_container_unpin_pages(device, iova, npage);
 		return;
 	}
 	if (device->iommufd_access) {
@@ -2036,9 +2039,9 @@ int vfio_dma_rw(struct vfio_device *device, dma_addr_t iova, void *data,
 	if (!data || len <= 0 || !vfio_assert_device_open(device))
 		return -EINVAL;
 
-	if (device->group->container)
-		return vfio_container_dma_rw(device->group->container, iova,
-					     data, len, write);
+	if (vfio_device_has_container(device))
+		return vfio_device_container_dma_rw(device, iova,
+						    data, len, write);
 
 	if (device->iommufd_access) {
 		unsigned int flags = 0;
-- 
GitLab


From 9eefba8002c27d65ab52a533fd0611b099b73591 Mon Sep 17 00:00:00 2001
From: Yi Liu <yi.l.liu@intel.com>
Date: Fri, 25 Nov 2022 03:26:42 -0800
Subject: [PATCH 3403/4122] vfio: Move vfio group specific code into group.c

This prepares for compiling out vfio group after vfio device cdev is
added. No vfio_group decode code should be in vfio_main.c, and neither
device->group reference should be in vfio_main.c.

No functional change is intended.

Link: https://lore.kernel.org/r/20221201145535.589687-11-yi.l.liu@intel.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Alex Williamson <alex.williamson@redhat.com>
Tested-by: Yu He <yu.he@intel.com>
Tested-by: Lixiao Yang <lixiao.yang@intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/vfio/Makefile    |   1 +
 drivers/vfio/group.c     | 877 +++++++++++++++++++++++++++++++++++++++
 drivers/vfio/vfio.h      |  22 +
 drivers/vfio/vfio_main.c | 877 +--------------------------------------
 4 files changed, 907 insertions(+), 870 deletions(-)
 create mode 100644 drivers/vfio/group.c

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b953517dc70f9..3783db7e8082c 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -4,6 +4,7 @@ vfio_virqfd-y := virqfd.o
 obj-$(CONFIG_VFIO) += vfio.o
 
 vfio-y += vfio_main.o \
+	  group.o \
 	  iova_bitmap.o
 vfio-$(CONFIG_IOMMUFD) += iommufd.o
 vfio-$(CONFIG_VFIO_CONTAINER) += container.o
diff --git a/drivers/vfio/group.c b/drivers/vfio/group.c
new file mode 100644
index 0000000000000..c5d8bf10495ec
--- /dev/null
+++ b/drivers/vfio/group.c
@@ -0,0 +1,877 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VFIO core
+ *
+ * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
+ *     Author: Alex Williamson <alex.williamson@redhat.com>
+ *
+ * Derived from original vfio:
+ * Copyright 2010 Cisco Systems, Inc.  All rights reserved.
+ * Author: Tom Lyon, pugs@cisco.com
+ */
+
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+#include <linux/anon_inodes.h>
+#include "vfio.h"
+
+static struct vfio {
+	struct class			*class;
+	struct list_head		group_list;
+	struct mutex			group_lock; /* locks group_list */
+	struct ida			group_ida;
+	dev_t				group_devt;
+} vfio;
+
+static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
+						     char *buf)
+{
+	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
+
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(it, &group->device_list, group_next) {
+		int ret;
+
+		if (it->ops->match) {
+			ret = it->ops->match(it, buf);
+			if (ret < 0) {
+				device = ERR_PTR(ret);
+				break;
+			}
+		} else {
+			ret = !strcmp(dev_name(it->dev), buf);
+		}
+
+		if (ret && vfio_device_try_get_registration(it)) {
+			device = it;
+			break;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+
+	return device;
+}
+
+/*
+ * VFIO Group fd, /dev/vfio/$GROUP
+ */
+static bool vfio_group_has_iommu(struct vfio_group *group)
+{
+	lockdep_assert_held(&group->group_lock);
+	/*
+	 * There can only be users if there is a container, and if there is a
+	 * container there must be users.
+	 */
+	WARN_ON(!group->container != !group->container_users);
+
+	return group->container || group->iommufd;
+}
+
+/*
+ * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
+ * if there was no container to unset.  Since the ioctl is called on
+ * the group, we know that still exists, therefore the only valid
+ * transition here is 1->0.
+ */
+static int vfio_group_ioctl_unset_container(struct vfio_group *group)
+{
+	int ret = 0;
+
+	mutex_lock(&group->group_lock);
+	if (!vfio_group_has_iommu(group)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+	if (group->container) {
+		if (group->container_users != 1) {
+			ret = -EBUSY;
+			goto out_unlock;
+		}
+		vfio_group_detach_container(group);
+	}
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
+	}
+
+out_unlock:
+	mutex_unlock(&group->group_lock);
+	return ret;
+}
+
+static int vfio_group_ioctl_set_container(struct vfio_group *group,
+					  int __user *arg)
+{
+	struct vfio_container *container;
+	struct iommufd_ctx *iommufd;
+	struct fd f;
+	int ret;
+	int fd;
+
+	if (get_user(fd, arg))
+		return -EFAULT;
+
+	f = fdget(fd);
+	if (!f.file)
+		return -EBADF;
+
+	mutex_lock(&group->group_lock);
+	if (vfio_group_has_iommu(group)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+	if (!group->iommu_group) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	container = vfio_container_from_file(f.file);
+	if (container) {
+		ret = vfio_container_attach_group(container, group);
+		goto out_unlock;
+	}
+
+	iommufd = iommufd_ctx_from_file(f.file);
+	if (!IS_ERR(iommufd)) {
+		u32 ioas_id;
+
+		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
+		if (ret) {
+			iommufd_ctx_put(group->iommufd);
+			goto out_unlock;
+		}
+
+		group->iommufd = iommufd;
+		goto out_unlock;
+	}
+
+	/* The FD passed is not recognized. */
+	ret = -EBADFD;
+
+out_unlock:
+	mutex_unlock(&group->group_lock);
+	fdput(f);
+	return ret;
+}
+
+static int vfio_device_group_open(struct vfio_device *device)
+{
+	int ret;
+
+	mutex_lock(&device->group->group_lock);
+	if (!vfio_group_has_iommu(device->group)) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	/*
+	 * Here we pass the KVM pointer with the group under the lock.  If the
+	 * device driver will use it, it must obtain a reference and release it
+	 * during close_device.
+	 */
+	ret = vfio_device_open(device, device->group->iommufd,
+			       device->group->kvm);
+
+out_unlock:
+	mutex_unlock(&device->group->group_lock);
+	return ret;
+}
+
+void vfio_device_group_close(struct vfio_device *device)
+{
+	mutex_lock(&device->group->group_lock);
+	vfio_device_close(device, device->group->iommufd);
+	mutex_unlock(&device->group->group_lock);
+}
+
+static struct file *vfio_device_open_file(struct vfio_device *device)
+{
+	struct file *filep;
+	int ret;
+
+	ret = vfio_device_group_open(device);
+	if (ret)
+		goto err_out;
+
+	/*
+	 * We can't use anon_inode_getfd() because we need to modify
+	 * the f_mode flags directly to allow more than just ioctls
+	 */
+	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
+				   device, O_RDWR);
+	if (IS_ERR(filep)) {
+		ret = PTR_ERR(filep);
+		goto err_close_device;
+	}
+
+	/*
+	 * TODO: add an anon_inode interface to do this.
+	 * Appears to be missing by lack of need rather than
+	 * explicitly prevented.  Now there's need.
+	 */
+	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
+
+	if (device->group->type == VFIO_NO_IOMMU)
+		dev_warn(device->dev, "vfio-noiommu device opened by user "
+			 "(%s:%d)\n", current->comm, task_pid_nr(current));
+	/*
+	 * On success the ref of device is moved to the file and
+	 * put in vfio_device_fops_release()
+	 */
+	return filep;
+
+err_close_device:
+	vfio_device_group_close(device);
+err_out:
+	return ERR_PTR(ret);
+}
+
+static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
+					  char __user *arg)
+{
+	struct vfio_device *device;
+	struct file *filep;
+	char *buf;
+	int fdno;
+	int ret;
+
+	buf = strndup_user(arg, PAGE_SIZE);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	device = vfio_device_get_from_name(group, buf);
+	kfree(buf);
+	if (IS_ERR(device))
+		return PTR_ERR(device);
+
+	fdno = get_unused_fd_flags(O_CLOEXEC);
+	if (fdno < 0) {
+		ret = fdno;
+		goto err_put_device;
+	}
+
+	filep = vfio_device_open_file(device);
+	if (IS_ERR(filep)) {
+		ret = PTR_ERR(filep);
+		goto err_put_fdno;
+	}
+
+	fd_install(fdno, filep);
+	return fdno;
+
+err_put_fdno:
+	put_unused_fd(fdno);
+err_put_device:
+	vfio_device_put_registration(device);
+	return ret;
+}
+
+static int vfio_group_ioctl_get_status(struct vfio_group *group,
+				       struct vfio_group_status __user *arg)
+{
+	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
+	struct vfio_group_status status;
+
+	if (copy_from_user(&status, arg, minsz))
+		return -EFAULT;
+
+	if (status.argsz < minsz)
+		return -EINVAL;
+
+	status.flags = 0;
+
+	mutex_lock(&group->group_lock);
+	if (!group->iommu_group) {
+		mutex_unlock(&group->group_lock);
+		return -ENODEV;
+	}
+
+	/*
+	 * With the container FD the iommu_group_claim_dma_owner() is done
+	 * during SET_CONTAINER but for IOMMFD this is done during
+	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
+	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
+	 * to viability.
+	 */
+	if (vfio_group_has_iommu(group))
+		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
+				VFIO_GROUP_FLAGS_VIABLE;
+	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
+		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
+	mutex_unlock(&group->group_lock);
+
+	if (copy_to_user(arg, &status, minsz))
+		return -EFAULT;
+	return 0;
+}
+
+static long vfio_group_fops_unl_ioctl(struct file *filep,
+				      unsigned int cmd, unsigned long arg)
+{
+	struct vfio_group *group = filep->private_data;
+	void __user *uarg = (void __user *)arg;
+
+	switch (cmd) {
+	case VFIO_GROUP_GET_DEVICE_FD:
+		return vfio_group_ioctl_get_device_fd(group, uarg);
+	case VFIO_GROUP_GET_STATUS:
+		return vfio_group_ioctl_get_status(group, uarg);
+	case VFIO_GROUP_SET_CONTAINER:
+		return vfio_group_ioctl_set_container(group, uarg);
+	case VFIO_GROUP_UNSET_CONTAINER:
+		return vfio_group_ioctl_unset_container(group);
+	default:
+		return -ENOTTY;
+	}
+}
+
+static int vfio_group_fops_open(struct inode *inode, struct file *filep)
+{
+	struct vfio_group *group =
+		container_of(inode->i_cdev, struct vfio_group, cdev);
+	int ret;
+
+	mutex_lock(&group->group_lock);
+
+	/*
+	 * drivers can be zero if this races with vfio_device_remove_group(), it
+	 * will be stable at 0 under the group rwsem
+	 */
+	if (refcount_read(&group->drivers) == 0) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
+
+	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+
+	/*
+	 * Do we need multiple instances of the group open?  Seems not.
+	 */
+	if (group->opened_file) {
+		ret = -EBUSY;
+		goto out_unlock;
+	}
+	group->opened_file = filep;
+	filep->private_data = group;
+	ret = 0;
+out_unlock:
+	mutex_unlock(&group->group_lock);
+	return ret;
+}
+
+static int vfio_group_fops_release(struct inode *inode, struct file *filep)
+{
+	struct vfio_group *group = filep->private_data;
+
+	filep->private_data = NULL;
+
+	mutex_lock(&group->group_lock);
+	/*
+	 * Device FDs hold a group file reference, therefore the group release
+	 * is only called when there are no open devices.
+	 */
+	WARN_ON(group->notifier.head);
+	if (group->container)
+		vfio_group_detach_container(group);
+	if (group->iommufd) {
+		iommufd_ctx_put(group->iommufd);
+		group->iommufd = NULL;
+	}
+	group->opened_file = NULL;
+	mutex_unlock(&group->group_lock);
+	return 0;
+}
+
+static const struct file_operations vfio_group_fops = {
+	.owner		= THIS_MODULE,
+	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
+	.compat_ioctl	= compat_ptr_ioctl,
+	.open		= vfio_group_fops_open,
+	.release	= vfio_group_fops_release,
+};
+
+/*
+ * Group objects - create, release, get, put, search
+ */
+static struct vfio_group *
+vfio_group_find_from_iommu(struct iommu_group *iommu_group)
+{
+	struct vfio_group *group;
+
+	lockdep_assert_held(&vfio.group_lock);
+
+	/*
+	 * group->iommu_group from the vfio.group_list cannot be NULL
+	 * under the vfio.group_lock.
+	 */
+	list_for_each_entry(group, &vfio.group_list, vfio_next) {
+		if (group->iommu_group == iommu_group)
+			return group;
+	}
+	return NULL;
+}
+
+static void vfio_group_release(struct device *dev)
+{
+	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
+
+	mutex_destroy(&group->device_lock);
+	mutex_destroy(&group->group_lock);
+	WARN_ON(group->iommu_group);
+	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
+	kfree(group);
+}
+
+static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
+					   enum vfio_group_type type)
+{
+	struct vfio_group *group;
+	int minor;
+
+	group = kzalloc(sizeof(*group), GFP_KERNEL);
+	if (!group)
+		return ERR_PTR(-ENOMEM);
+
+	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
+	if (minor < 0) {
+		kfree(group);
+		return ERR_PTR(minor);
+	}
+
+	device_initialize(&group->dev);
+	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
+	group->dev.class = vfio.class;
+	group->dev.release = vfio_group_release;
+	cdev_init(&group->cdev, &vfio_group_fops);
+	group->cdev.owner = THIS_MODULE;
+
+	refcount_set(&group->drivers, 1);
+	mutex_init(&group->group_lock);
+	INIT_LIST_HEAD(&group->device_list);
+	mutex_init(&group->device_lock);
+	group->iommu_group = iommu_group;
+	/* put in vfio_group_release() */
+	iommu_group_ref_get(iommu_group);
+	group->type = type;
+	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
+
+	return group;
+}
+
+static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
+		enum vfio_group_type type)
+{
+	struct vfio_group *group;
+	struct vfio_group *ret;
+	int err;
+
+	lockdep_assert_held(&vfio.group_lock);
+
+	group = vfio_group_alloc(iommu_group, type);
+	if (IS_ERR(group))
+		return group;
+
+	err = dev_set_name(&group->dev, "%s%d",
+			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
+			   iommu_group_id(iommu_group));
+	if (err) {
+		ret = ERR_PTR(err);
+		goto err_put;
+	}
+
+	err = cdev_device_add(&group->cdev, &group->dev);
+	if (err) {
+		ret = ERR_PTR(err);
+		goto err_put;
+	}
+
+	list_add(&group->vfio_next, &vfio.group_list);
+
+	return group;
+
+err_put:
+	put_device(&group->dev);
+	return ret;
+}
+
+static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
+		enum vfio_group_type type)
+{
+	struct iommu_group *iommu_group;
+	struct vfio_group *group;
+	int ret;
+
+	iommu_group = iommu_group_alloc();
+	if (IS_ERR(iommu_group))
+		return ERR_CAST(iommu_group);
+
+	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
+	if (ret)
+		goto out_put_group;
+	ret = iommu_group_add_device(iommu_group, dev);
+	if (ret)
+		goto out_put_group;
+
+	mutex_lock(&vfio.group_lock);
+	group = vfio_create_group(iommu_group, type);
+	mutex_unlock(&vfio.group_lock);
+	if (IS_ERR(group)) {
+		ret = PTR_ERR(group);
+		goto out_remove_device;
+	}
+	iommu_group_put(iommu_group);
+	return group;
+
+out_remove_device:
+	iommu_group_remove_device(dev);
+out_put_group:
+	iommu_group_put(iommu_group);
+	return ERR_PTR(ret);
+}
+
+static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
+{
+	struct vfio_device *device;
+
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (device->dev == dev) {
+			mutex_unlock(&group->device_lock);
+			return true;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+	return false;
+}
+
+static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
+{
+	struct iommu_group *iommu_group;
+	struct vfio_group *group;
+
+	iommu_group = iommu_group_get(dev);
+	if (!iommu_group && vfio_noiommu) {
+		/*
+		 * With noiommu enabled, create an IOMMU group for devices that
+		 * don't already have one, implying no IOMMU hardware/driver
+		 * exists.  Taint the kernel because we're about to give a DMA
+		 * capable device to a user without IOMMU protection.
+		 */
+		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
+		if (!IS_ERR(group)) {
+			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
+		}
+		return group;
+	}
+
+	if (!iommu_group)
+		return ERR_PTR(-EINVAL);
+
+	/*
+	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+	 * restore cache coherency. It has to be checked here because it is only
+	 * valid for cases where we are using iommu groups.
+	 */
+	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
+		iommu_group_put(iommu_group);
+		return ERR_PTR(-EINVAL);
+	}
+
+	mutex_lock(&vfio.group_lock);
+	group = vfio_group_find_from_iommu(iommu_group);
+	if (group) {
+		if (WARN_ON(vfio_group_has_device(group, dev)))
+			group = ERR_PTR(-EINVAL);
+		else
+			refcount_inc(&group->drivers);
+	} else {
+		group = vfio_create_group(iommu_group, VFIO_IOMMU);
+	}
+	mutex_unlock(&vfio.group_lock);
+
+	/* The vfio_group holds a reference to the iommu_group */
+	iommu_group_put(iommu_group);
+	return group;
+}
+
+int vfio_device_set_group(struct vfio_device *device,
+			  enum vfio_group_type type)
+{
+	struct vfio_group *group;
+
+	if (type == VFIO_IOMMU)
+		group = vfio_group_find_or_alloc(device->dev);
+	else
+		group = vfio_noiommu_group_alloc(device->dev, type);
+
+	if (IS_ERR(group))
+		return PTR_ERR(group);
+
+	/* Our reference on group is moved to the device */
+	device->group = group;
+	return 0;
+}
+
+void vfio_device_remove_group(struct vfio_device *device)
+{
+	struct vfio_group *group = device->group;
+	struct iommu_group *iommu_group;
+
+	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
+		iommu_group_remove_device(device->dev);
+
+	/* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
+	if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
+		return;
+	list_del(&group->vfio_next);
+
+	/*
+	 * We could concurrently probe another driver in the group that might
+	 * race vfio_device_remove_group() with vfio_get_group(), so we have to
+	 * ensure that the sysfs is all cleaned up under lock otherwise the
+	 * cdev_device_add() will fail due to the name aready existing.
+	 */
+	cdev_device_del(&group->cdev, &group->dev);
+
+	mutex_lock(&group->group_lock);
+	/*
+	 * These data structures all have paired operations that can only be
+	 * undone when the caller holds a live reference on the device. Since
+	 * all pairs must be undone these WARN_ON's indicate some caller did not
+	 * properly hold the group reference.
+	 */
+	WARN_ON(!list_empty(&group->device_list));
+	WARN_ON(group->notifier.head);
+
+	/*
+	 * Revoke all users of group->iommu_group. At this point we know there
+	 * are no devices active because we are unplugging the last one. Setting
+	 * iommu_group to NULL blocks all new users.
+	 */
+	if (group->container)
+		vfio_group_detach_container(group);
+	iommu_group = group->iommu_group;
+	group->iommu_group = NULL;
+	mutex_unlock(&group->group_lock);
+	mutex_unlock(&vfio.group_lock);
+
+	iommu_group_put(iommu_group);
+	put_device(&group->dev);
+}
+
+void vfio_device_group_register(struct vfio_device *device)
+{
+	mutex_lock(&device->group->device_lock);
+	list_add(&device->group_next, &device->group->device_list);
+	mutex_unlock(&device->group->device_lock);
+}
+
+void vfio_device_group_unregister(struct vfio_device *device)
+{
+	mutex_lock(&device->group->device_lock);
+	list_del(&device->group_next);
+	mutex_unlock(&device->group->device_lock);
+}
+
+int vfio_device_group_use_iommu(struct vfio_device *device)
+{
+	struct vfio_group *group = device->group;
+	int ret = 0;
+
+	lockdep_assert_held(&group->group_lock);
+
+	if (WARN_ON(!group->container))
+		return -EINVAL;
+
+	ret = vfio_group_use_container(group);
+	if (ret)
+		return ret;
+	vfio_device_container_register(device);
+	return 0;
+}
+
+void vfio_device_group_unuse_iommu(struct vfio_device *device)
+{
+	struct vfio_group *group = device->group;
+
+	lockdep_assert_held(&group->group_lock);
+
+	if (WARN_ON(!group->container))
+		return;
+
+	vfio_device_container_unregister(device);
+	vfio_group_unuse_container(group);
+}
+
+bool vfio_device_has_container(struct vfio_device *device)
+{
+	return device->group->container;
+}
+
+/**
+ * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
+ * @file: VFIO group file
+ *
+ * The returned iommu_group is valid as long as a ref is held on the file. This
+ * returns a reference on the group. This function is deprecated, only the SPAPR
+ * path in kvm should call it.
+ */
+struct iommu_group *vfio_file_iommu_group(struct file *file)
+{
+	struct vfio_group *group = file->private_data;
+	struct iommu_group *iommu_group = NULL;
+
+	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+		return NULL;
+
+	if (!vfio_file_is_group(file))
+		return NULL;
+
+	mutex_lock(&group->group_lock);
+	if (group->iommu_group) {
+		iommu_group = group->iommu_group;
+		iommu_group_ref_get(iommu_group);
+	}
+	mutex_unlock(&group->group_lock);
+	return iommu_group;
+}
+EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
+
+/**
+ * vfio_file_is_group - True if the file is usable with VFIO aPIS
+ * @file: VFIO group file
+ */
+bool vfio_file_is_group(struct file *file)
+{
+	return file->f_op == &vfio_group_fops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_group);
+
+/**
+ * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
+ *        is always CPU cache coherent
+ * @file: VFIO group file
+ *
+ * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
+ * bit in DMA transactions. A return of false indicates that the user has
+ * rights to access additional instructions such as wbinvd on x86.
+ */
+bool vfio_file_enforced_coherent(struct file *file)
+{
+	struct vfio_group *group = file->private_data;
+	struct vfio_device *device;
+	bool ret = true;
+
+	if (!vfio_file_is_group(file))
+		return true;
+
+	/*
+	 * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
+	 * any domain later attached to it will also not support it. If the cap
+	 * is set then the iommu_domain eventually attached to the device/group
+	 * must use a domain with enforce_cache_coherency().
+	 */
+	mutex_lock(&group->device_lock);
+	list_for_each_entry(device, &group->device_list, group_next) {
+		if (!device_iommu_capable(device->dev,
+					  IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
+			ret = false;
+			break;
+		}
+	}
+	mutex_unlock(&group->device_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
+
+/**
+ * vfio_file_set_kvm - Link a kvm with VFIO drivers
+ * @file: VFIO group file
+ * @kvm: KVM to link
+ *
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
+ */
+void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
+{
+	struct vfio_group *group = file->private_data;
+
+	if (!vfio_file_is_group(file))
+		return;
+
+	mutex_lock(&group->group_lock);
+	group->kvm = kvm;
+	mutex_unlock(&group->group_lock);
+}
+EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
+
+/**
+ * vfio_file_has_dev - True if the VFIO file is a handle for device
+ * @file: VFIO file to check
+ * @device: Device that must be part of the file
+ *
+ * Returns true if given file has permission to manipulate the given device.
+ */
+bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
+{
+	struct vfio_group *group = file->private_data;
+
+	if (!vfio_file_is_group(file))
+		return false;
+
+	return group == device->group;
+}
+EXPORT_SYMBOL_GPL(vfio_file_has_dev);
+
+static char *vfio_devnode(struct device *dev, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
+}
+
+int __init vfio_group_init(void)
+{
+	int ret;
+
+	ida_init(&vfio.group_ida);
+	mutex_init(&vfio.group_lock);
+	INIT_LIST_HEAD(&vfio.group_list);
+
+	ret = vfio_container_init();
+	if (ret)
+		return ret;
+
+	/* /dev/vfio/$GROUP */
+	vfio.class = class_create(THIS_MODULE, "vfio");
+	if (IS_ERR(vfio.class)) {
+		ret = PTR_ERR(vfio.class);
+		goto err_group_class;
+	}
+
+	vfio.class->devnode = vfio_devnode;
+
+	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
+	if (ret)
+		goto err_alloc_chrdev;
+	return 0;
+
+err_alloc_chrdev:
+	class_destroy(vfio.class);
+	vfio.class = NULL;
+err_group_class:
+	vfio_container_cleanup();
+	return ret;
+}
+
+void vfio_group_cleanup(void)
+{
+	WARN_ON(!list_empty(&vfio.group_list));
+	ida_destroy(&vfio.group_ida);
+	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
+	class_destroy(vfio.class);
+	vfio.class = NULL;
+	vfio_container_cleanup();
+}
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index a112e8f2b291a..2e05418fd18df 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -6,6 +6,7 @@
 #ifndef __VFIO_VFIO_H__
 #define __VFIO_VFIO_H__
 
+#include <linux/file.h>
 #include <linux/device.h>
 #include <linux/cdev.h>
 #include <linux/module.h>
@@ -15,6 +16,15 @@ struct iommu_group;
 struct vfio_device;
 struct vfio_container;
 
+void vfio_device_put_registration(struct vfio_device *device);
+bool vfio_device_try_get_registration(struct vfio_device *device);
+int vfio_device_open(struct vfio_device *device,
+		     struct iommufd_ctx *iommufd, struct kvm *kvm);
+void vfio_device_close(struct vfio_device *device,
+		       struct iommufd_ctx *iommufd);
+
+extern const struct file_operations vfio_device_fops;
+
 enum vfio_group_type {
 	/*
 	 * Physical device with IOMMU backing.
@@ -66,6 +76,18 @@ struct vfio_group {
 	struct iommufd_ctx		*iommufd;
 };
 
+int vfio_device_set_group(struct vfio_device *device,
+			  enum vfio_group_type type);
+void vfio_device_remove_group(struct vfio_device *device);
+void vfio_device_group_register(struct vfio_device *device);
+void vfio_device_group_unregister(struct vfio_device *device);
+int vfio_device_group_use_iommu(struct vfio_device *device);
+void vfio_device_group_unuse_iommu(struct vfio_device *device);
+void vfio_device_group_close(struct vfio_device *device);
+bool vfio_device_has_container(struct vfio_device *device);
+int __init vfio_group_init(void);
+void vfio_group_cleanup(void);
+
 #if IS_ENABLED(CONFIG_VFIO_CONTAINER)
 /* events for the backend driver notify callback */
 enum vfio_iommu_notify_type {
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index b7f94ace7b10b..e21ff965141e6 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -13,8 +13,6 @@
 #include <linux/cdev.h>
 #include <linux/compat.h>
 #include <linux/device.h>
-#include <linux/file.h>
-#include <linux/anon_inodes.h>
 #include <linux/fs.h>
 #include <linux/idr.h>
 #include <linux/iommu.h>
@@ -43,17 +41,11 @@
 #define DRIVER_DESC	"VFIO - User Level meta-driver"
 
 static struct vfio {
-	struct class			*class;
-	struct list_head		group_list;
-	struct mutex			group_lock; /* locks group_list */
-	struct ida			group_ida;
-	dev_t				group_devt;
 	struct class			*device_class;
 	struct ida			device_ida;
 } vfio;
 
 static DEFINE_XARRAY(vfio_device_set_xa);
-static const struct file_operations vfio_group_fops;
 
 int vfio_assign_device_set(struct vfio_device *device, void *set_id)
 {
@@ -139,168 +131,17 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
 }
 EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
 
-/*
- * Group objects - create, release, get, put, search
- */
-static struct vfio_group *
-vfio_group_find_from_iommu(struct iommu_group *iommu_group)
-{
-	struct vfio_group *group;
-
-	lockdep_assert_held(&vfio.group_lock);
-
-	/*
-	 * group->iommu_group from the vfio.group_list cannot be NULL
-	 * under the vfio.group_lock.
-	 */
-	list_for_each_entry(group, &vfio.group_list, vfio_next) {
-		if (group->iommu_group == iommu_group)
-			return group;
-	}
-	return NULL;
-}
-
-static void vfio_group_release(struct device *dev)
-{
-	struct vfio_group *group = container_of(dev, struct vfio_group, dev);
-
-	mutex_destroy(&group->device_lock);
-	mutex_destroy(&group->group_lock);
-	WARN_ON(group->iommu_group);
-	ida_free(&vfio.group_ida, MINOR(group->dev.devt));
-	kfree(group);
-}
-
-static struct vfio_group *vfio_group_alloc(struct iommu_group *iommu_group,
-					   enum vfio_group_type type)
-{
-	struct vfio_group *group;
-	int minor;
-
-	group = kzalloc(sizeof(*group), GFP_KERNEL);
-	if (!group)
-		return ERR_PTR(-ENOMEM);
-
-	minor = ida_alloc_max(&vfio.group_ida, MINORMASK, GFP_KERNEL);
-	if (minor < 0) {
-		kfree(group);
-		return ERR_PTR(minor);
-	}
-
-	device_initialize(&group->dev);
-	group->dev.devt = MKDEV(MAJOR(vfio.group_devt), minor);
-	group->dev.class = vfio.class;
-	group->dev.release = vfio_group_release;
-	cdev_init(&group->cdev, &vfio_group_fops);
-	group->cdev.owner = THIS_MODULE;
-
-	refcount_set(&group->drivers, 1);
-	mutex_init(&group->group_lock);
-	INIT_LIST_HEAD(&group->device_list);
-	mutex_init(&group->device_lock);
-	group->iommu_group = iommu_group;
-	/* put in vfio_group_release() */
-	iommu_group_ref_get(iommu_group);
-	group->type = type;
-	BLOCKING_INIT_NOTIFIER_HEAD(&group->notifier);
-
-	return group;
-}
-
-static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
-		enum vfio_group_type type)
-{
-	struct vfio_group *group;
-	struct vfio_group *ret;
-	int err;
-
-	lockdep_assert_held(&vfio.group_lock);
-
-	group = vfio_group_alloc(iommu_group, type);
-	if (IS_ERR(group))
-		return group;
-
-	err = dev_set_name(&group->dev, "%s%d",
-			   group->type == VFIO_NO_IOMMU ? "noiommu-" : "",
-			   iommu_group_id(iommu_group));
-	if (err) {
-		ret = ERR_PTR(err);
-		goto err_put;
-	}
-
-	err = cdev_device_add(&group->cdev, &group->dev);
-	if (err) {
-		ret = ERR_PTR(err);
-		goto err_put;
-	}
-
-	list_add(&group->vfio_next, &vfio.group_list);
-
-	return group;
-
-err_put:
-	put_device(&group->dev);
-	return ret;
-}
-
-static void vfio_device_remove_group(struct vfio_device *device)
-{
-	struct vfio_group *group = device->group;
-	struct iommu_group *iommu_group;
-
-	if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
-		iommu_group_remove_device(device->dev);
-
-	/* Pairs with vfio_create_group() / vfio_group_get_from_iommu() */
-	if (!refcount_dec_and_mutex_lock(&group->drivers, &vfio.group_lock))
-		return;
-	list_del(&group->vfio_next);
-
-	/*
-	 * We could concurrently probe another driver in the group that might
-	 * race vfio_device_remove_group() with vfio_get_group(), so we have to
-	 * ensure that the sysfs is all cleaned up under lock otherwise the
-	 * cdev_device_add() will fail due to the name aready existing.
-	 */
-	cdev_device_del(&group->cdev, &group->dev);
-
-	mutex_lock(&group->group_lock);
-	/*
-	 * These data structures all have paired operations that can only be
-	 * undone when the caller holds a live reference on the device. Since
-	 * all pairs must be undone these WARN_ON's indicate some caller did not
-	 * properly hold the group reference.
-	 */
-	WARN_ON(!list_empty(&group->device_list));
-	WARN_ON(group->notifier.head);
-
-	/*
-	 * Revoke all users of group->iommu_group. At this point we know there
-	 * are no devices active because we are unplugging the last one. Setting
-	 * iommu_group to NULL blocks all new users.
-	 */
-	if (group->container)
-		vfio_group_detach_container(group);
-	iommu_group = group->iommu_group;
-	group->iommu_group = NULL;
-	mutex_unlock(&group->group_lock);
-	mutex_unlock(&vfio.group_lock);
-
-	iommu_group_put(iommu_group);
-	put_device(&group->dev);
-}
-
 /*
  * Device objects - create, release, get, put, search
  */
 /* Device reference always implies a group reference */
-static void vfio_device_put_registration(struct vfio_device *device)
+void vfio_device_put_registration(struct vfio_device *device)
 {
 	if (refcount_dec_and_test(&device->refcount))
 		complete(&device->comp);
 }
 
-static bool vfio_device_try_get_registration(struct vfio_device *device)
+bool vfio_device_try_get_registration(struct vfio_device *device)
 {
 	return refcount_inc_not_zero(&device->refcount);
 }
@@ -413,139 +254,6 @@ void vfio_free_device(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_free_device);
 
-static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev,
-		enum vfio_group_type type)
-{
-	struct iommu_group *iommu_group;
-	struct vfio_group *group;
-	int ret;
-
-	iommu_group = iommu_group_alloc();
-	if (IS_ERR(iommu_group))
-		return ERR_CAST(iommu_group);
-
-	ret = iommu_group_set_name(iommu_group, "vfio-noiommu");
-	if (ret)
-		goto out_put_group;
-	ret = iommu_group_add_device(iommu_group, dev);
-	if (ret)
-		goto out_put_group;
-
-	mutex_lock(&vfio.group_lock);
-	group = vfio_create_group(iommu_group, type);
-	mutex_unlock(&vfio.group_lock);
-	if (IS_ERR(group)) {
-		ret = PTR_ERR(group);
-		goto out_remove_device;
-	}
-	iommu_group_put(iommu_group);
-	return group;
-
-out_remove_device:
-	iommu_group_remove_device(dev);
-out_put_group:
-	iommu_group_put(iommu_group);
-	return ERR_PTR(ret);
-}
-
-static bool vfio_group_has_device(struct vfio_group *group, struct device *dev)
-{
-	struct vfio_device *device;
-
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(device, &group->device_list, group_next) {
-		if (device->dev == dev) {
-			mutex_unlock(&group->device_lock);
-			return true;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-	return false;
-}
-
-static struct vfio_group *vfio_group_find_or_alloc(struct device *dev)
-{
-	struct iommu_group *iommu_group;
-	struct vfio_group *group;
-
-	iommu_group = iommu_group_get(dev);
-	if (!iommu_group && vfio_noiommu) {
-		/*
-		 * With noiommu enabled, create an IOMMU group for devices that
-		 * don't already have one, implying no IOMMU hardware/driver
-		 * exists.  Taint the kernel because we're about to give a DMA
-		 * capable device to a user without IOMMU protection.
-		 */
-		group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU);
-		if (!IS_ERR(group)) {
-			add_taint(TAINT_USER, LOCKDEP_STILL_OK);
-			dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n");
-		}
-		return group;
-	}
-
-	if (!iommu_group)
-		return ERR_PTR(-EINVAL);
-
-	/*
-	 * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
-	 * restore cache coherency. It has to be checked here because it is only
-	 * valid for cases where we are using iommu groups.
-	 */
-	if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) {
-		iommu_group_put(iommu_group);
-		return ERR_PTR(-EINVAL);
-	}
-
-	mutex_lock(&vfio.group_lock);
-	group = vfio_group_find_from_iommu(iommu_group);
-	if (group) {
-		if (WARN_ON(vfio_group_has_device(group, dev)))
-			group = ERR_PTR(-EINVAL);
-		else
-			refcount_inc(&group->drivers);
-	} else {
-		group = vfio_create_group(iommu_group, VFIO_IOMMU);
-	}
-	mutex_unlock(&vfio.group_lock);
-
-	/* The vfio_group holds a reference to the iommu_group */
-	iommu_group_put(iommu_group);
-	return group;
-}
-
-static void vfio_device_group_register(struct vfio_device *device)
-{
-	mutex_lock(&device->group->device_lock);
-	list_add(&device->group_next, &device->group->device_list);
-	mutex_unlock(&device->group->device_lock);
-}
-
-static void vfio_device_group_unregister(struct vfio_device *device)
-{
-	mutex_lock(&device->group->device_lock);
-	list_del(&device->group_next);
-	mutex_unlock(&device->group->device_lock);
-}
-
-static int vfio_device_set_group(struct vfio_device *device,
-				 enum vfio_group_type type)
-{
-	struct vfio_group *group;
-
-	if (type == VFIO_IOMMU)
-		group = vfio_group_find_or_alloc(device->dev);
-	else
-		group = vfio_noiommu_group_alloc(device->dev, type);
-
-	if (IS_ERR(group))
-		return PTR_ERR(group);
-
-	/* Our reference on group is moved to the device */
-	device->group = group;
-	return 0;
-}
-
 static int __vfio_register_dev(struct vfio_device *device,
 			       enum vfio_group_type type)
 {
@@ -602,35 +310,6 @@ int vfio_register_emulated_iommu_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_register_emulated_iommu_dev);
 
-static struct vfio_device *vfio_device_get_from_name(struct vfio_group *group,
-						     char *buf)
-{
-	struct vfio_device *it, *device = ERR_PTR(-ENODEV);
-
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(it, &group->device_list, group_next) {
-		int ret;
-
-		if (it->ops->match) {
-			ret = it->ops->match(it, buf);
-			if (ret < 0) {
-				device = ERR_PTR(ret);
-				break;
-			}
-		} else {
-			ret = !strcmp(dev_name(it->dev), buf);
-		}
-
-		if (ret && vfio_device_try_get_registration(it)) {
-			device = it;
-			break;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-
-	return device;
-}
-
 /*
  * Decrement the device reference count and wait for the device to be
  * removed.  Open file descriptors for the device... */
@@ -673,146 +352,12 @@ void vfio_unregister_group_dev(struct vfio_device *device)
 }
 EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
 
-/*
- * VFIO Group fd, /dev/vfio/$GROUP
- */
-static bool vfio_group_has_iommu(struct vfio_group *group)
-{
-	lockdep_assert_held(&group->group_lock);
-	/*
-	 * There can only be users if there is a container, and if there is a
-	 * container there must be users.
-	 */
-	WARN_ON(!group->container != !group->container_users);
-
-	return group->container || group->iommufd;
-}
-
-/*
- * VFIO_GROUP_UNSET_CONTAINER should fail if there are other users or
- * if there was no container to unset.  Since the ioctl is called on
- * the group, we know that still exists, therefore the only valid
- * transition here is 1->0.
- */
-static int vfio_group_ioctl_unset_container(struct vfio_group *group)
-{
-	int ret = 0;
-
-	mutex_lock(&group->group_lock);
-	if (!vfio_group_has_iommu(group)) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-	if (group->container) {
-		if (group->container_users != 1) {
-			ret = -EBUSY;
-			goto out_unlock;
-		}
-		vfio_group_detach_container(group);
-	}
-	if (group->iommufd) {
-		iommufd_ctx_put(group->iommufd);
-		group->iommufd = NULL;
-	}
-
-out_unlock:
-	mutex_unlock(&group->group_lock);
-	return ret;
-}
-
-static int vfio_group_ioctl_set_container(struct vfio_group *group,
-					  int __user *arg)
-{
-	struct vfio_container *container;
-	struct iommufd_ctx *iommufd;
-	struct fd f;
-	int ret;
-	int fd;
-
-	if (get_user(fd, arg))
-		return -EFAULT;
-
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-
-	mutex_lock(&group->group_lock);
-	if (vfio_group_has_iommu(group)) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-	if (!group->iommu_group) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	container = vfio_container_from_file(f.file);
-	if (container) {
-		ret = vfio_container_attach_group(container, group);
-		goto out_unlock;
-	}
-
-	iommufd = iommufd_ctx_from_file(f.file);
-	if (!IS_ERR(iommufd)) {
-		u32 ioas_id;
-
-		ret = iommufd_vfio_compat_ioas_id(iommufd, &ioas_id);
-		if (ret) {
-			iommufd_ctx_put(group->iommufd);
-			goto out_unlock;
-		}
-
-		group->iommufd = iommufd;
-		goto out_unlock;
-	}
-
-	/* The FD passed is not recognized. */
-	ret = -EBADFD;
-
-out_unlock:
-	mutex_unlock(&group->group_lock);
-	fdput(f);
-	return ret;
-}
-
-static const struct file_operations vfio_device_fops;
-
 /* true if the vfio_device has open_device() called but not close_device() */
 static bool vfio_assert_device_open(struct vfio_device *device)
 {
 	return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
 }
 
-static int vfio_device_group_use_iommu(struct vfio_device *device)
-{
-	struct vfio_group *group = device->group;
-	int ret = 0;
-
-	lockdep_assert_held(&group->group_lock);
-
-	if (WARN_ON(!group->container))
-		return -EINVAL;
-
-	ret = vfio_group_use_container(group);
-	if (ret)
-		return ret;
-	vfio_device_container_register(device);
-	return 0;
-}
-
-static void vfio_device_group_unuse_iommu(struct vfio_device *device)
-{
-	struct vfio_group *group = device->group;
-
-	lockdep_assert_held(&group->group_lock);
-
-	if (WARN_ON(!group->container))
-		return;
-
-	vfio_device_container_unregister(device);
-	vfio_group_unuse_container(group);
-}
-
 static int vfio_device_first_open(struct vfio_device *device,
 				  struct iommufd_ctx *iommufd, struct kvm *kvm)
 {
@@ -864,8 +409,8 @@ static void vfio_device_last_close(struct vfio_device *device,
 	module_put(device->dev->driver->owner);
 }
 
-static int vfio_device_open(struct vfio_device *device,
-			    struct iommufd_ctx *iommufd, struct kvm *kvm)
+int vfio_device_open(struct vfio_device *device,
+		     struct iommufd_ctx *iommufd, struct kvm *kvm)
 {
 	int ret = 0;
 
@@ -881,8 +426,8 @@ static int vfio_device_open(struct vfio_device *device,
 	return ret;
 }
 
-static void vfio_device_close(struct vfio_device *device,
-			      struct iommufd_ctx *iommufd)
+void vfio_device_close(struct vfio_device *device,
+		       struct iommufd_ctx *iommufd)
 {
 	mutex_lock(&device->dev_set->lock);
 	vfio_assert_device_open(device);
@@ -892,245 +437,6 @@ static void vfio_device_close(struct vfio_device *device,
 	mutex_unlock(&device->dev_set->lock);
 }
 
-static int vfio_device_group_open(struct vfio_device *device)
-{
-	int ret;
-
-	mutex_lock(&device->group->group_lock);
-	if (!vfio_group_has_iommu(device->group)) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-
-	/*
-	 * Here we pass the KVM pointer with the group under the lock.  If the
-	 * device driver will use it, it must obtain a reference and release it
-	 * during close_device.
-	 */
-	ret = vfio_device_open(device, device->group->iommufd,
-			       device->group->kvm);
-
-out_unlock:
-	mutex_unlock(&device->group->group_lock);
-	return ret;
-}
-
-static void vfio_device_group_close(struct vfio_device *device)
-{
-	mutex_lock(&device->group->group_lock);
-	vfio_device_close(device, device->group->iommufd);
-	mutex_unlock(&device->group->group_lock);
-}
-
-static struct file *vfio_device_open_file(struct vfio_device *device)
-{
-	struct file *filep;
-	int ret;
-
-	ret = vfio_device_group_open(device);
-	if (ret)
-		goto err_out;
-
-	/*
-	 * We can't use anon_inode_getfd() because we need to modify
-	 * the f_mode flags directly to allow more than just ioctls
-	 */
-	filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops,
-				   device, O_RDWR);
-	if (IS_ERR(filep)) {
-		ret = PTR_ERR(filep);
-		goto err_close_device;
-	}
-
-	/*
-	 * TODO: add an anon_inode interface to do this.
-	 * Appears to be missing by lack of need rather than
-	 * explicitly prevented.  Now there's need.
-	 */
-	filep->f_mode |= (FMODE_PREAD | FMODE_PWRITE);
-
-	if (device->group->type == VFIO_NO_IOMMU)
-		dev_warn(device->dev, "vfio-noiommu device opened by user "
-			 "(%s:%d)\n", current->comm, task_pid_nr(current));
-	/*
-	 * On success the ref of device is moved to the file and
-	 * put in vfio_device_fops_release()
-	 */
-	return filep;
-
-err_close_device:
-	vfio_device_group_close(device);
-err_out:
-	return ERR_PTR(ret);
-}
-
-static int vfio_group_ioctl_get_device_fd(struct vfio_group *group,
-					  char __user *arg)
-{
-	struct vfio_device *device;
-	struct file *filep;
-	char *buf;
-	int fdno;
-	int ret;
-
-	buf = strndup_user(arg, PAGE_SIZE);
-	if (IS_ERR(buf))
-		return PTR_ERR(buf);
-
-	device = vfio_device_get_from_name(group, buf);
-	kfree(buf);
-	if (IS_ERR(device))
-		return PTR_ERR(device);
-
-	fdno = get_unused_fd_flags(O_CLOEXEC);
-	if (fdno < 0) {
-		ret = fdno;
-		goto err_put_device;
-	}
-
-	filep = vfio_device_open_file(device);
-	if (IS_ERR(filep)) {
-		ret = PTR_ERR(filep);
-		goto err_put_fdno;
-	}
-
-	fd_install(fdno, filep);
-	return fdno;
-
-err_put_fdno:
-	put_unused_fd(fdno);
-err_put_device:
-	vfio_device_put_registration(device);
-	return ret;
-}
-
-static int vfio_group_ioctl_get_status(struct vfio_group *group,
-				       struct vfio_group_status __user *arg)
-{
-	unsigned long minsz = offsetofend(struct vfio_group_status, flags);
-	struct vfio_group_status status;
-
-	if (copy_from_user(&status, arg, minsz))
-		return -EFAULT;
-
-	if (status.argsz < minsz)
-		return -EINVAL;
-
-	status.flags = 0;
-
-	mutex_lock(&group->group_lock);
-	if (!group->iommu_group) {
-		mutex_unlock(&group->group_lock);
-		return -ENODEV;
-	}
-
-	/*
-	 * With the container FD the iommu_group_claim_dma_owner() is done
-	 * during SET_CONTAINER but for IOMMFD this is done during
-	 * VFIO_GROUP_GET_DEVICE_FD. Meaning that with iommufd
-	 * VFIO_GROUP_FLAGS_VIABLE could be set but GET_DEVICE_FD will fail due
-	 * to viability.
-	 */
-	if (vfio_group_has_iommu(group))
-		status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
-				VFIO_GROUP_FLAGS_VIABLE;
-	else if (!iommu_group_dma_owner_claimed(group->iommu_group))
-		status.flags |= VFIO_GROUP_FLAGS_VIABLE;
-	mutex_unlock(&group->group_lock);
-
-	if (copy_to_user(arg, &status, minsz))
-		return -EFAULT;
-	return 0;
-}
-
-static long vfio_group_fops_unl_ioctl(struct file *filep,
-				      unsigned int cmd, unsigned long arg)
-{
-	struct vfio_group *group = filep->private_data;
-	void __user *uarg = (void __user *)arg;
-
-	switch (cmd) {
-	case VFIO_GROUP_GET_DEVICE_FD:
-		return vfio_group_ioctl_get_device_fd(group, uarg);
-	case VFIO_GROUP_GET_STATUS:
-		return vfio_group_ioctl_get_status(group, uarg);
-	case VFIO_GROUP_SET_CONTAINER:
-		return vfio_group_ioctl_set_container(group, uarg);
-	case VFIO_GROUP_UNSET_CONTAINER:
-		return vfio_group_ioctl_unset_container(group);
-	default:
-		return -ENOTTY;
-	}
-}
-
-static int vfio_group_fops_open(struct inode *inode, struct file *filep)
-{
-	struct vfio_group *group =
-		container_of(inode->i_cdev, struct vfio_group, cdev);
-	int ret;
-
-	mutex_lock(&group->group_lock);
-
-	/*
-	 * drivers can be zero if this races with vfio_device_remove_group(), it
-	 * will be stable at 0 under the group rwsem
-	 */
-	if (refcount_read(&group->drivers) == 0) {
-		ret = -ENODEV;
-		goto out_unlock;
-	}
-
-	if (group->type == VFIO_NO_IOMMU && !capable(CAP_SYS_RAWIO)) {
-		ret = -EPERM;
-		goto out_unlock;
-	}
-
-	/*
-	 * Do we need multiple instances of the group open?  Seems not.
-	 */
-	if (group->opened_file) {
-		ret = -EBUSY;
-		goto out_unlock;
-	}
-	group->opened_file = filep;
-	filep->private_data = group;
-	ret = 0;
-out_unlock:
-	mutex_unlock(&group->group_lock);
-	return ret;
-}
-
-static int vfio_group_fops_release(struct inode *inode, struct file *filep)
-{
-	struct vfio_group *group = filep->private_data;
-
-	filep->private_data = NULL;
-
-	mutex_lock(&group->group_lock);
-	/*
-	 * Device FDs hold a group file reference, therefore the group release
-	 * is only called when there are no open devices.
-	 */
-	WARN_ON(group->notifier.head);
-	if (group->container)
-		vfio_group_detach_container(group);
-	if (group->iommufd) {
-		iommufd_ctx_put(group->iommufd);
-		group->iommufd = NULL;
-	}
-	group->opened_file = NULL;
-	mutex_unlock(&group->group_lock);
-	return 0;
-}
-
-static const struct file_operations vfio_group_fops = {
-	.owner		= THIS_MODULE,
-	.unlocked_ioctl	= vfio_group_fops_unl_ioctl,
-	.compat_ioctl	= compat_ptr_ioctl,
-	.open		= vfio_group_fops_open,
-	.release	= vfio_group_fops_release,
-};
-
 /*
  * Wrapper around pm_runtime_resume_and_get().
  * Return error code on failure or 0 on success.
@@ -1694,7 +1000,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
 	return device->ops->mmap(device, vma);
 }
 
-static const struct file_operations vfio_device_fops = {
+const struct file_operations vfio_device_fops = {
 	.owner		= THIS_MODULE,
 	.release	= vfio_device_fops_release,
 	.read		= vfio_device_fops_read,
@@ -1704,121 +1010,6 @@ static const struct file_operations vfio_device_fops = {
 	.mmap		= vfio_device_fops_mmap,
 };
 
-/**
- * vfio_file_iommu_group - Return the struct iommu_group for the vfio group file
- * @file: VFIO group file
- *
- * The returned iommu_group is valid as long as a ref is held on the file. This
- * returns a reference on the group. This function is deprecated, only the SPAPR
- * path in kvm should call it.
- */
-struct iommu_group *vfio_file_iommu_group(struct file *file)
-{
-	struct vfio_group *group = file->private_data;
-	struct iommu_group *iommu_group = NULL;
-
-	if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
-		return NULL;
-
-	if (!vfio_file_is_group(file))
-		return NULL;
-
-	mutex_lock(&group->group_lock);
-	if (group->iommu_group) {
-		iommu_group = group->iommu_group;
-		iommu_group_ref_get(iommu_group);
-	}
-	mutex_unlock(&group->group_lock);
-	return iommu_group;
-}
-EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
-
-/**
- * vfio_file_is_group - True if the file is usable with VFIO aPIS
- * @file: VFIO group file
- */
-bool vfio_file_is_group(struct file *file)
-{
-	return file->f_op == &vfio_group_fops;
-}
-EXPORT_SYMBOL_GPL(vfio_file_is_group);
-
-/**
- * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
- *        is always CPU cache coherent
- * @file: VFIO group file
- *
- * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
- * bit in DMA transactions. A return of false indicates that the user has
- * rights to access additional instructions such as wbinvd on x86.
- */
-bool vfio_file_enforced_coherent(struct file *file)
-{
-	struct vfio_group *group = file->private_data;
-	struct vfio_device *device;
-	bool ret = true;
-
-	if (!vfio_file_is_group(file))
-		return true;
-
-	/*
-	 * If the device does not have IOMMU_CAP_ENFORCE_CACHE_COHERENCY then
-	 * any domain later attached to it will also not support it. If the cap
-	 * is set then the iommu_domain eventually attached to the device/group
-	 * must use a domain with enforce_cache_coherency().
-	 */
-	mutex_lock(&group->device_lock);
-	list_for_each_entry(device, &group->device_list, group_next) {
-		if (!device_iommu_capable(device->dev,
-					  IOMMU_CAP_ENFORCE_CACHE_COHERENCY)) {
-			ret = false;
-			break;
-		}
-	}
-	mutex_unlock(&group->device_lock);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
-
-/**
- * vfio_file_set_kvm - Link a kvm with VFIO drivers
- * @file: VFIO group file
- * @kvm: KVM to link
- *
- * When a VFIO device is first opened the KVM will be available in
- * device->kvm if one was associated with the group.
- */
-void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
-{
-	struct vfio_group *group = file->private_data;
-
-	if (!vfio_file_is_group(file))
-		return;
-
-	mutex_lock(&group->group_lock);
-	group->kvm = kvm;
-	mutex_unlock(&group->group_lock);
-}
-EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
-
-/**
- * vfio_file_has_dev - True if the VFIO file is a handle for device
- * @file: VFIO file to check
- * @device: Device that must be part of the file
- *
- * Returns true if given file has permission to manipulate the given device.
- */
-bool vfio_file_has_dev(struct file *file, struct vfio_device *device)
-{
-	struct vfio_group *group = file->private_data;
-
-	if (!vfio_file_is_group(file))
-		return false;
-
-	return group == device->group;
-}
-EXPORT_SYMBOL_GPL(vfio_file_has_dev);
-
 /*
  * Sub-module support
  */
@@ -1938,11 +1129,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr, int num_irqs,
 }
 EXPORT_SYMBOL(vfio_set_irqs_validate_and_prepare);
 
-static bool vfio_device_has_container(struct vfio_device *device)
-{
-	return device->group->container;
-}
-
 /*
  * Pin contiguous user pages and return their associated host pages for local
  * domain only.
@@ -2064,55 +1250,6 @@ EXPORT_SYMBOL(vfio_dma_rw);
 /*
  * Module/class support
  */
-static char *vfio_devnode(struct device *dev, umode_t *mode)
-{
-	return kasprintf(GFP_KERNEL, "vfio/%s", dev_name(dev));
-}
-
-static int __init vfio_group_init(void)
-{
-	int ret;
-
-	ida_init(&vfio.group_ida);
-	mutex_init(&vfio.group_lock);
-	INIT_LIST_HEAD(&vfio.group_list);
-
-	ret = vfio_container_init();
-	if (ret)
-		return ret;
-
-	/* /dev/vfio/$GROUP */
-	vfio.class = class_create(THIS_MODULE, "vfio");
-	if (IS_ERR(vfio.class)) {
-		ret = PTR_ERR(vfio.class);
-		goto err_group_class;
-	}
-
-	vfio.class->devnode = vfio_devnode;
-
-	ret = alloc_chrdev_region(&vfio.group_devt, 0, MINORMASK + 1, "vfio");
-	if (ret)
-		goto err_alloc_chrdev;
-	return 0;
-
-err_alloc_chrdev:
-	class_destroy(vfio.class);
-	vfio.class = NULL;
-err_group_class:
-	vfio_container_cleanup();
-	return ret;
-}
-
-static void vfio_group_cleanup(void)
-{
-	WARN_ON(!list_empty(&vfio.group_list));
-	ida_destroy(&vfio.group_ida);
-	unregister_chrdev_region(vfio.group_devt, MINORMASK + 1);
-	class_destroy(vfio.class);
-	vfio.class = NULL;
-	vfio_container_cleanup();
-}
-
 static int __init vfio_init(void)
 {
 	int ret;
-- 
GitLab


From 219072c09abde0f1d0a6ce091be375e8eb7d08f0 Mon Sep 17 00:00:00 2001
From: Ryan Roberts <ryan.roberts@arm.com>
Date: Mon, 5 Dec 2022 11:40:31 +0000
Subject: [PATCH 3404/4122] KVM: arm64: Fix benign bug with incorrect use of
 VA_BITS

get_user_mapping_size() uses kvm's pgtable library to walk a user space
page table created by the kernel, and in doing so, passes metadata
that the library needs, including ia_bits, which defines the size of the
input address.

For the case where the kernel is compiled for 52 VA bits but runs on HW
that does not support LVA, it will fall back to 48 VA bits at runtime.
Therefore we must use vabits_actual rather than VA_BITS to get the true
address size.

This is benign in the current code base because the pgtable library only
uses it for error checking.

Fixes: 6011cf68c885 ("KVM: arm64: Walk userspace page tables to compute the THP mapping size")
Signed-off-by: Ryan Roberts <ryan.roberts@arm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221205114031.3972780-1-ryan.roberts@arm.com
---
 arch/arm64/kvm/mmu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 4efb983cff436..1ef0704420d96 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -641,7 +641,7 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
 {
 	struct kvm_pgtable pgt = {
 		.pgd		= (kvm_pte_t *)kvm->mm->pgd,
-		.ia_bits	= VA_BITS,
+		.ia_bits	= vabits_actual,
 		.start_level	= (KVM_PGTABLE_MAX_LEVELS -
 				   CONFIG_PGTABLE_LEVELS),
 		.mm_ops		= &kvm_user_mm_ops,
-- 
GitLab


From 96df59b1ae23f5c11698c3c2159aeb2ecd4944a4 Mon Sep 17 00:00:00 2001
From: Li Huafei <lihuafei1@huawei.com>
Date: Fri, 4 Nov 2022 17:56:57 +0800
Subject: [PATCH 3405/4122] RISC-V: kexec: Fix memory leak of fdt buffer

This is reported by kmemleak detector:

unreferenced object 0xff60000082864000 (size 9588):
  comm "kexec", pid 146, jiffies 4294900634 (age 64.788s)
  hex dump (first 32 bytes):
    d0 0d fe ed 00 00 12 ed 00 00 00 48 00 00 11 40  ...........H...@
    00 00 00 28 00 00 00 11 00 00 00 02 00 00 00 00  ...(............
  backtrace:
    [<00000000f95b17c4>] kmemleak_alloc+0x34/0x3e
    [<00000000b9ec8e3e>] kmalloc_order+0x9c/0xc4
    [<00000000a95cf02e>] kmalloc_order_trace+0x34/0xb6
    [<00000000f01e68b4>] __kmalloc+0x5c2/0x62a
    [<000000002bd497b2>] kvmalloc_node+0x66/0xd6
    [<00000000906542fa>] of_kexec_alloc_and_setup_fdt+0xa6/0x6ea
    [<00000000e1166bde>] elf_kexec_load+0x206/0x4ec
    [<0000000036548e09>] kexec_image_load_default+0x40/0x4c
    [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322
    [<0000000040c62c03>] ret_from_syscall+0x0/0x2

In elf_kexec_load(), a buffer is allocated via kvmalloc() to store fdt.
While it's not freed back to system when kexec kernel is reloaded or
unloaded.  Then memory leak is caused.  Fix it by introducing riscv
specific function arch_kimage_file_post_load_cleanup(), and freeing the
buffer there.

Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support")
Signed-off-by: Li Huafei <lihuafei1@huawei.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Link: https://lore.kernel.org/r/20221104095658.141222-1-lihuafei1@huawei.com
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/kexec.h |  5 +++++
 arch/riscv/kernel/elf_kexec.c  | 10 ++++++++++
 2 files changed, 15 insertions(+)

diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h
index eee260e8ab308..2b56769cb530c 100644
--- a/arch/riscv/include/asm/kexec.h
+++ b/arch/riscv/include/asm/kexec.h
@@ -39,6 +39,7 @@ crash_setup_regs(struct pt_regs *newregs,
 #define ARCH_HAS_KIMAGE_ARCH
 
 struct kimage_arch {
+	void *fdt; /* For CONFIG_KEXEC_FILE */
 	unsigned long fdt_addr;
 };
 
@@ -62,6 +63,10 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
 				     const Elf_Shdr *relsec,
 				     const Elf_Shdr *symtab);
 #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
+
+struct kimage;
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
 #endif
 
 #endif
diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index 0cb94992c15b3..ff30fcb43f47a 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -21,6 +21,14 @@
 #include <linux/memblock.h>
 #include <asm/setup.h>
 
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	kvfree(image->arch.fdt);
+	image->arch.fdt = NULL;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
+
 static int riscv_kexec_elf_load(struct kimage *image, struct elfhdr *ehdr,
 				struct kexec_elf_info *elf_info, unsigned long old_pbase,
 				unsigned long new_pbase)
@@ -298,6 +306,8 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf,
 		pr_err("Error add DTB kbuf ret=%d\n", ret);
 		goto out_free_fdt;
 	}
+	/* Cache the fdt buffer address for memory cleanup */
+	image->arch.fdt = fdt;
 	pr_notice("Loaded device tree at 0x%lx\n", kbuf.mem);
 	goto out;
 
-- 
GitLab


From cbc32023ddbdf4baa3d9dc513a2184a84080a5a2 Mon Sep 17 00:00:00 2001
From: Li Huafei <lihuafei1@huawei.com>
Date: Fri, 4 Nov 2022 17:56:58 +0800
Subject: [PATCH 3406/4122] RISC-V: kexec: Fix memory leak of elf header buffer

This is reported by kmemleak detector:

unreferenced object 0xff2000000403d000 (size 4096):
  comm "kexec", pid 146, jiffies 4294900633 (age 64.792s)
  hex dump (first 32 bytes):
    7f 45 4c 46 02 01 01 00 00 00 00 00 00 00 00 00  .ELF............
    04 00 f3 00 01 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<00000000566ca97c>] kmemleak_vmalloc+0x3c/0xbe
    [<00000000979283d8>] __vmalloc_node_range+0x3ac/0x560
    [<00000000b4b3712a>] __vmalloc_node+0x56/0x62
    [<00000000854f75e2>] vzalloc+0x2c/0x34
    [<00000000e9a00db9>] crash_prepare_elf64_headers+0x80/0x30c
    [<0000000067e8bf48>] elf_kexec_load+0x3e8/0x4ec
    [<0000000036548e09>] kexec_image_load_default+0x40/0x4c
    [<0000000079fbe1b4>] sys_kexec_file_load+0x1c4/0x322
    [<0000000040c62c03>] ret_from_syscall+0x0/0x2

In elf_kexec_load(), a buffer is allocated via vzalloc() to store elf
headers.  While it's not freed back to system when kdump kernel is
reloaded or unloaded, or when image->elf_header is successfully set and
then fails to load kdump kernel for some reason. Fix it by freeing the
buffer in arch_kimage_file_post_load_cleanup().

Fixes: 8acea455fafa ("RISC-V: Support for kexec_file on panic")
Signed-off-by: Li Huafei <lihuafei1@huawei.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221104095658.141222-2-lihuafei1@huawei.com
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/elf_kexec.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c
index ff30fcb43f47a..5372b708fae21 100644
--- a/arch/riscv/kernel/elf_kexec.c
+++ b/arch/riscv/kernel/elf_kexec.c
@@ -26,6 +26,10 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image)
 	kvfree(image->arch.fdt);
 	image->arch.fdt = NULL;
 
+	vfree(image->elf_headers);
+	image->elf_headers = NULL;
+	image->elf_headers_sz = 0;
+
 	return kexec_image_post_load_cleanup_default(image);
 }
 
-- 
GitLab


From 8f8bcc8c720c360885639de66fe69756febed824 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 5 Dec 2022 11:29:16 -0400
Subject: [PATCH 3407/4122] vfio/pci: Move all the SPAPR PCI specific logic to
 vfio_pci_core.ko

The vfio_spapr_pci_eeh_open/release() functions are one line wrappers
around an arch function. Just call them directly. This eliminates some
weird exported symbols that don't need to exist.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Link: https://lore.kernel.org/r/1-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/vfio_pci_core.c | 11 +++++++++--
 drivers/vfio/vfio_spapr_eeh.c    | 13 -------------
 include/linux/vfio.h             | 11 -----------
 3 files changed, 9 insertions(+), 26 deletions(-)

diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 189d4930c276d..56501e7ef5645 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -27,6 +27,9 @@
 #include <linux/vgaarb.h>
 #include <linux/nospec.h>
 #include <linux/sched/mm.h>
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+#include <asm/eeh.h>
+#endif
 
 #include "vfio_pci_priv.h"
 
@@ -686,7 +689,9 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
 		vdev->sriov_pf_core_dev->vf_token->users--;
 		mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
 	}
-	vfio_spapr_pci_eeh_release(vdev->pdev);
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+	eeh_dev_release(vdev->pdev);
+#endif
 	vfio_pci_core_disable(vdev);
 
 	mutex_lock(&vdev->igate);
@@ -705,7 +710,9 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
 void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
 {
 	vfio_pci_probe_mmaps(vdev);
-	vfio_spapr_pci_eeh_open(vdev->pdev);
+#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+	eeh_dev_open(vdev->pdev);
+#endif
 
 	if (vdev->sriov_pf_core_dev) {
 		mutex_lock(&vdev->sriov_pf_core_dev->vf_token->lock);
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 67f55ac1d459c..c9d102aafbcd1 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -15,19 +15,6 @@
 #define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
 #define DRIVER_DESC	"VFIO IOMMU SPAPR EEH"
 
-/* We might build address mapping here for "fast" path later */
-void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
-{
-	eeh_dev_open(pdev);
-}
-EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_open);
-
-void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-{
-	eeh_dev_release(pdev);
-}
-EXPORT_SYMBOL_GPL(vfio_spapr_pci_eeh_release);
-
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 				unsigned int cmd, unsigned long arg)
 {
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 43b67e46a2cb5..9378ca79d5480 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -233,21 +233,10 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 				       int num_irqs, int max_irq_type,
 				       size_t *data_size);
 
-struct pci_dev;
 #if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
-void vfio_spapr_pci_eeh_open(struct pci_dev *pdev);
-void vfio_spapr_pci_eeh_release(struct pci_dev *pdev);
 long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd,
 				unsigned long arg);
 #else
-static inline void vfio_spapr_pci_eeh_open(struct pci_dev *pdev)
-{
-}
-
-static inline void vfio_spapr_pci_eeh_release(struct pci_dev *pdev)
-{
-}
-
 static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 					      unsigned int cmd,
 					      unsigned long arg)
-- 
GitLab


From e5c38a203eb4343993e889eb69f5386f085f25ef Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 5 Dec 2022 11:29:17 -0400
Subject: [PATCH 3408/4122] vfio/spapr: Move VFIO_CHECK_EXTENSION into
 tce_iommu_ioctl()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PPC64 kconfig is a bit of a rats nest, but it turns out that if
CONFIG_SPAPR_TCE_IOMMU is on then EEH must be too:

config SPAPR_TCE_IOMMU
	bool "sPAPR TCE IOMMU Support"
	depends on PPC_POWERNV || PPC_PSERIES
	select IOMMU_API
	help
	  Enables bits of IOMMU API required by VFIO. The iommu_ops
	  is not implemented as it is not necessary for VFIO.

config PPC_POWERNV
	select FORCE_PCI

config PPC_PSERIES
	select FORCE_PCI

config EEH
	bool
	depends on (PPC_POWERNV || PPC_PSERIES) && PCI
	default y

So, just open code the call to eeh_enabled() into tce_iommu_ioctl().

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/2-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_iommu_spapr_tce.c | 10 ++++------
 drivers/vfio/vfio_spapr_eeh.c       |  6 ------
 2 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 169f07ac162d9..73cec2beae70b 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -785,14 +785,12 @@ static long tce_iommu_ioctl(void *iommu_data,
 		switch (arg) {
 		case VFIO_SPAPR_TCE_IOMMU:
 		case VFIO_SPAPR_TCE_v2_IOMMU:
-			ret = 1;
-			break;
+			return 1;
+		case VFIO_EEH:
+			return eeh_enabled();
 		default:
-			ret = vfio_spapr_iommu_eeh_ioctl(NULL, cmd, arg);
-			break;
+			return 0;
 		}
-
-		return (ret < 0) ? 0 : ret;
 	}
 
 	/*
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index c9d102aafbcd1..221b1b637e18b 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -24,12 +24,6 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 	long ret = -EINVAL;
 
 	switch (cmd) {
-	case VFIO_CHECK_EXTENSION:
-		if (arg == VFIO_EEH)
-			ret = eeh_enabled() ? 1 : 0;
-		else
-			ret = 0;
-		break;
 	case VFIO_EEH_PE_OP:
 		pe = eeh_iommu_group_to_pe(group);
 		if (!pe)
-- 
GitLab


From e276e25819b8a173a21947720bb0a548c0b724b7 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 5 Dec 2022 11:29:18 -0400
Subject: [PATCH 3409/4122] vfio: Move vfio_spapr_iommu_eeh_ioctl into
 vfio_iommu_spapr_tce.c

As with the previous patch EEH is always enabled if SPAPR_TCE_IOMMU, so
move this last bit of code into the main module.

Now that this function only processes VFIO_EEH_PE_OP remove a level of
indenting as well, it is only called by a case statement that already
checked VFIO_EEH_PE_OP.

This eliminates an unnecessary module and SPAPR code in a global header.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/3-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Makefile               |  1 -
 drivers/vfio/vfio_iommu_spapr_tce.c | 55 +++++++++++++++++-
 drivers/vfio/vfio_spapr_eeh.c       | 88 -----------------------------
 include/linux/vfio.h                | 12 ----
 4 files changed, 53 insertions(+), 103 deletions(-)
 delete mode 100644 drivers/vfio/vfio_spapr_eeh.c

diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index b693a1169286f..50b8e8e3fb10d 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -10,7 +10,6 @@ vfio-y += vfio_main.o \
 obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
-obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o
 obj-$(CONFIG_VFIO_PCI) += pci/
 obj-$(CONFIG_VFIO_PLATFORM) += platform/
 obj-$(CONFIG_VFIO_MDEV) += mdev/
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 73cec2beae70b..60a50ce8701e5 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2013 IBM Corp.  All rights reserved.
  *     Author: Alexey Kardashevskiy <aik@ozlabs.ru>
+ * Copyright Gavin Shan, IBM Corporation 2014.
  *
  * Derived from original vfio_iommu_type1.c:
  * Copyright (C) 2012 Red Hat, Inc.  All rights reserved.
@@ -773,6 +774,57 @@ static long tce_iommu_create_default_window(struct tce_container *container)
 	return ret;
 }
 
+static long vfio_spapr_ioctl_eeh_pe_op(struct iommu_group *group,
+				       unsigned long arg)
+{
+	struct eeh_pe *pe;
+	struct vfio_eeh_pe_op op;
+	unsigned long minsz;
+
+	pe = eeh_iommu_group_to_pe(group);
+	if (!pe)
+		return -ENODEV;
+
+	minsz = offsetofend(struct vfio_eeh_pe_op, op);
+	if (copy_from_user(&op, (void __user *)arg, minsz))
+		return -EFAULT;
+	if (op.argsz < minsz || op.flags)
+		return -EINVAL;
+
+	switch (op.op) {
+	case VFIO_EEH_PE_DISABLE:
+		return eeh_pe_set_option(pe, EEH_OPT_DISABLE);
+	case VFIO_EEH_PE_ENABLE:
+		return eeh_pe_set_option(pe, EEH_OPT_ENABLE);
+	case VFIO_EEH_PE_UNFREEZE_IO:
+		return eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
+	case VFIO_EEH_PE_UNFREEZE_DMA:
+		return eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
+	case VFIO_EEH_PE_GET_STATE:
+		return eeh_pe_get_state(pe);
+		break;
+	case VFIO_EEH_PE_RESET_DEACTIVATE:
+		return eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true);
+	case VFIO_EEH_PE_RESET_HOT:
+		return eeh_pe_reset(pe, EEH_RESET_HOT, true);
+	case VFIO_EEH_PE_RESET_FUNDAMENTAL:
+		return eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true);
+	case VFIO_EEH_PE_CONFIGURE:
+		return eeh_pe_configure(pe);
+	case VFIO_EEH_PE_INJECT_ERR:
+		minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+		if (op.argsz < minsz)
+			return -EINVAL;
+		if (copy_from_user(&op, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		return eeh_pe_inject_err(pe, op.err.type, op.err.func,
+					 op.err.addr, op.err.mask);
+	default:
+		return -EINVAL;
+	}
+}
+
 static long tce_iommu_ioctl(void *iommu_data,
 				 unsigned int cmd, unsigned long arg)
 {
@@ -1044,8 +1096,7 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 		ret = 0;
 		list_for_each_entry(tcegrp, &container->group_list, next) {
-			ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
-					cmd, arg);
+			ret = vfio_spapr_ioctl_eeh_pe_op(tcegrp->grp, arg);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
deleted file mode 100644
index 221b1b637e18b..0000000000000
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * EEH functionality support for VFIO devices. The feature is only
- * available on sPAPR compatible platforms.
- *
- * Copyright Gavin Shan, IBM Corporation 2014.
- */
-
-#include <linux/module.h>
-#include <linux/uaccess.h>
-#include <linux/vfio.h>
-#include <asm/eeh.h>
-
-#define DRIVER_VERSION	"0.1"
-#define DRIVER_AUTHOR	"Gavin Shan, IBM Corporation"
-#define DRIVER_DESC	"VFIO IOMMU SPAPR EEH"
-
-long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
-				unsigned int cmd, unsigned long arg)
-{
-	struct eeh_pe *pe;
-	struct vfio_eeh_pe_op op;
-	unsigned long minsz;
-	long ret = -EINVAL;
-
-	switch (cmd) {
-	case VFIO_EEH_PE_OP:
-		pe = eeh_iommu_group_to_pe(group);
-		if (!pe)
-			return -ENODEV;
-
-		minsz = offsetofend(struct vfio_eeh_pe_op, op);
-		if (copy_from_user(&op, (void __user *)arg, minsz))
-			return -EFAULT;
-		if (op.argsz < minsz || op.flags)
-			return -EINVAL;
-
-		switch (op.op) {
-		case VFIO_EEH_PE_DISABLE:
-			ret = eeh_pe_set_option(pe, EEH_OPT_DISABLE);
-			break;
-		case VFIO_EEH_PE_ENABLE:
-			ret = eeh_pe_set_option(pe, EEH_OPT_ENABLE);
-			break;
-		case VFIO_EEH_PE_UNFREEZE_IO:
-			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_MMIO);
-			break;
-		case VFIO_EEH_PE_UNFREEZE_DMA:
-			ret = eeh_pe_set_option(pe, EEH_OPT_THAW_DMA);
-			break;
-		case VFIO_EEH_PE_GET_STATE:
-			ret = eeh_pe_get_state(pe);
-			break;
-		case VFIO_EEH_PE_RESET_DEACTIVATE:
-			ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, true);
-			break;
-		case VFIO_EEH_PE_RESET_HOT:
-			ret = eeh_pe_reset(pe, EEH_RESET_HOT, true);
-			break;
-		case VFIO_EEH_PE_RESET_FUNDAMENTAL:
-			ret = eeh_pe_reset(pe, EEH_RESET_FUNDAMENTAL, true);
-			break;
-		case VFIO_EEH_PE_CONFIGURE:
-			ret = eeh_pe_configure(pe);
-			break;
-		case VFIO_EEH_PE_INJECT_ERR:
-			minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
-			if (op.argsz < minsz)
-				return -EINVAL;
-			if (copy_from_user(&op, (void __user *)arg, minsz))
-				return -EFAULT;
-
-			ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
-						op.err.addr, op.err.mask);
-			break;
-		default:
-			ret = -EINVAL;
-		}
-	}
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(vfio_spapr_iommu_eeh_ioctl);
-
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 9378ca79d5480..b4d5d4ca3d7d5 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -233,18 +233,6 @@ int vfio_set_irqs_validate_and_prepare(struct vfio_irq_set *hdr,
 				       int num_irqs, int max_irq_type,
 				       size_t *data_size);
 
-#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
-long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group, unsigned int cmd,
-				unsigned long arg);
-#else
-static inline long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
-					      unsigned int cmd,
-					      unsigned long arg)
-{
-	return -ENOTTY;
-}
-#endif /* CONFIG_VFIO_SPAPR_EEH */
-
 /*
  * IRQfd - generic
  */
-- 
GitLab


From 20601c45a0fa20bbb5545f4dd69f4f18448f4973 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 5 Dec 2022 11:29:19 -0400
Subject: [PATCH 3410/4122] vfio: Remove CONFIG_VFIO_SPAPR_EEH

We don't need a kconfig symbol for this, just directly test CONFIG_EEH in
the few places that need it.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/4-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Kconfig             | 5 -----
 drivers/vfio/pci/vfio_pci_core.c | 6 +++---
 2 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index 86c381ceb9a1e..d25b91adfd64c 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -20,11 +20,6 @@ config VFIO_IOMMU_SPAPR_TCE
 	depends on SPAPR_TCE_IOMMU
 	default VFIO
 
-config VFIO_SPAPR_EEH
-	tristate
-	depends on EEH && VFIO_IOMMU_SPAPR_TCE
-	default VFIO
-
 config VFIO_VIRQFD
 	tristate
 	select EVENTFD
diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 56501e7ef5645..f9365a5bc9612 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -27,7 +27,7 @@
 #include <linux/vgaarb.h>
 #include <linux/nospec.h>
 #include <linux/sched/mm.h>
-#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+#if IS_ENABLED(CONFIG_EEH)
 #include <asm/eeh.h>
 #endif
 
@@ -689,7 +689,7 @@ void vfio_pci_core_close_device(struct vfio_device *core_vdev)
 		vdev->sriov_pf_core_dev->vf_token->users--;
 		mutex_unlock(&vdev->sriov_pf_core_dev->vf_token->lock);
 	}
-#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+#if IS_ENABLED(CONFIG_EEH)
 	eeh_dev_release(vdev->pdev);
 #endif
 	vfio_pci_core_disable(vdev);
@@ -710,7 +710,7 @@ EXPORT_SYMBOL_GPL(vfio_pci_core_close_device);
 void vfio_pci_core_finish_enable(struct vfio_pci_core_device *vdev)
 {
 	vfio_pci_probe_mmaps(vdev);
-#if IS_ENABLED(CONFIG_VFIO_SPAPR_EEH)
+#if IS_ENABLED(CONFIG_EEH)
 	eeh_dev_open(vdev->pdev);
 #endif
 
-- 
GitLab


From e2d55709398e62cf53e5c7df3758ae52cc62d63a Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Mon, 5 Dec 2022 11:29:20 -0400
Subject: [PATCH 3411/4122] vfio: Fold vfio_virqfd.ko into vfio.ko

This is only 1.8k, putting it in its own module is not really
necessary. The kconfig infrastructure is still there to completely remove
it for systems that are trying for small footprint.

Put it in the main vfio.ko module now that kbuild can support multiple .c
files.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Cornelia Huck <cohuck@redhat.com>
Link: https://lore.kernel.org/r/5-v5-fc5346cacfd4+4c482-vfio_modules_jgg@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/Kconfig     |  2 +-
 drivers/vfio/Makefile    |  4 +---
 drivers/vfio/vfio.h      | 13 +++++++++++++
 drivers/vfio/vfio_main.c |  7 +++++++
 drivers/vfio/virqfd.c    | 17 +++--------------
 5 files changed, 25 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig
index d25b91adfd64c..0b8d53f63c7e5 100644
--- a/drivers/vfio/Kconfig
+++ b/drivers/vfio/Kconfig
@@ -21,7 +21,7 @@ config VFIO_IOMMU_SPAPR_TCE
 	default VFIO
 
 config VFIO_VIRQFD
-	tristate
+	bool
 	select EVENTFD
 	default n
 
diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile
index 50b8e8e3fb10d..0721ed4831c92 100644
--- a/drivers/vfio/Makefile
+++ b/drivers/vfio/Makefile
@@ -1,13 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
-vfio_virqfd-y := virqfd.o
-
 obj-$(CONFIG_VFIO) += vfio.o
 
 vfio-y += vfio_main.o \
 	  iova_bitmap.o \
 	  container.o
+vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o
 
-obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o
 obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o
 obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o
 obj-$(CONFIG_VFIO_PCI) += pci/
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index bcad54bbab08c..a7113b4baaa24 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -124,6 +124,19 @@ long vfio_container_ioctl_check_extension(struct vfio_container *container,
 int __init vfio_container_init(void);
 void vfio_container_cleanup(void);
 
+#if IS_ENABLED(CONFIG_VFIO_VIRQFD)
+int __init vfio_virqfd_init(void);
+void vfio_virqfd_exit(void);
+#else
+static inline int __init vfio_virqfd_init(void)
+{
+	return 0;
+}
+static inline void vfio_virqfd_exit(void)
+{
+}
+#endif
+
 #ifdef CONFIG_VFIO_NOIOMMU
 extern bool vfio_noiommu __read_mostly;
 #else
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 662e267a3e13d..7f88569c3ebaf 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1832,6 +1832,10 @@ static int __init vfio_init(void)
 	if (ret)
 		return ret;
 
+	ret = vfio_virqfd_init();
+	if (ret)
+		goto err_virqfd;
+
 	/* /dev/vfio/$GROUP */
 	vfio.class = class_create(THIS_MODULE, "vfio");
 	if (IS_ERR(vfio.class)) {
@@ -1862,6 +1866,8 @@ err_dev_class:
 	class_destroy(vfio.class);
 	vfio.class = NULL;
 err_group_class:
+	vfio_virqfd_exit();
+err_virqfd:
 	vfio_container_cleanup();
 	return ret;
 }
@@ -1876,6 +1882,7 @@ static void __exit vfio_cleanup(void)
 	class_destroy(vfio.device_class);
 	vfio.device_class = NULL;
 	class_destroy(vfio.class);
+	vfio_virqfd_exit();
 	vfio_container_cleanup();
 	vfio.class = NULL;
 	xa_destroy(&vfio_device_set_xa);
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 414e98d82b02e..497a17b378656 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -12,15 +12,12 @@
 #include <linux/file.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-
-#define DRIVER_VERSION  "0.1"
-#define DRIVER_AUTHOR   "Alex Williamson <alex.williamson@redhat.com>"
-#define DRIVER_DESC     "IRQFD support for VFIO bus drivers"
+#include "vfio.h"
 
 static struct workqueue_struct *vfio_irqfd_cleanup_wq;
 static DEFINE_SPINLOCK(virqfd_lock);
 
-static int __init vfio_virqfd_init(void)
+int __init vfio_virqfd_init(void)
 {
 	vfio_irqfd_cleanup_wq =
 		create_singlethread_workqueue("vfio-irqfd-cleanup");
@@ -30,7 +27,7 @@ static int __init vfio_virqfd_init(void)
 	return 0;
 }
 
-static void __exit vfio_virqfd_exit(void)
+void vfio_virqfd_exit(void)
 {
 	destroy_workqueue(vfio_irqfd_cleanup_wq);
 }
@@ -216,11 +213,3 @@ void vfio_virqfd_disable(struct virqfd **pvirqfd)
 	flush_workqueue(vfio_irqfd_cleanup_wq);
 }
 EXPORT_SYMBOL_GPL(vfio_virqfd_disable);
-
-module_init(vfio_virqfd_init);
-module_exit(vfio_virqfd_exit);
-
-MODULE_VERSION(DRIVER_VERSION);
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR(DRIVER_AUTHOR);
-MODULE_DESCRIPTION(DRIVER_DESC);
-- 
GitLab


From ce3895735cc26957dc6b2a8f5af07ddab09483ae Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Fri, 2 Dec 2022 09:46:15 -0700
Subject: [PATCH 3412/4122] vfio/ap/ccw/samples: Fix device_register() unwind
 path

We always need to call put_device() if device_register() fails.
All vfio drivers calling device_register() include a similar unwind
stack via gotos, therefore split device_unregister() into its
device_del() and put_device() components in the unwind path, and
add a goto target to handle only the put_device() requirement.

Reported-by: Ruan Jinjie <ruanjinjie@huawei.com>
Link: https://lore.kernel.org/all/20221118032827.3725190-1-ruanjinjie@huawei.com
Fixes: d61fc96f47fd ("sample: vfio mdev display - host device")
Fixes: 9d1a546c53b4 ("docs: Sample driver to demonstrate how to use Mediated device framework.")
Fixes: a5e6e6505f38 ("sample: vfio bochs vbe display (host device for bochs-drm)")
Fixes: 9e6f07cd1eaa ("vfio/ccw: create a parent struct")
Fixes: 36360658eb5a ("s390: vfio_ap: link the vfio_ap devices to the vfio_ap bus subsystem")
Cc: Tony Krowiak <akrowiak@linux.ibm.com>
Cc: Halil Pasic <pasic@linux.ibm.com>
Cc: Jason Herne <jjherne@linux.ibm.com>
Cc: Kirti Wankhede <kwankhede@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Farman <farman@linux.ibm.com>
Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Jason J. Herne <jjherne@linux.ibm.com>
Link: https://lore.kernel.org/r/166999942139.645727.12439756512449846442.stgit@omen
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/s390/cio/vfio_ccw_drv.c   | 3 ++-
 drivers/s390/crypto/vfio_ap_drv.c | 2 +-
 samples/vfio-mdev/mbochs.c        | 7 ++++---
 samples/vfio-mdev/mdpy.c          | 7 ++++---
 samples/vfio-mdev/mtty.c          | 7 ++++---
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/s390/cio/vfio_ccw_drv.c b/drivers/s390/cio/vfio_ccw_drv.c
index c2a65808605af..54aba7cceb33f 100644
--- a/drivers/s390/cio/vfio_ccw_drv.c
+++ b/drivers/s390/cio/vfio_ccw_drv.c
@@ -199,8 +199,9 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
 	return 0;
 
 out_unreg:
-	device_unregister(&parent->dev);
+	device_del(&parent->dev);
 out_free:
+	put_device(&parent->dev);
 	dev_set_drvdata(&sch->dev, NULL);
 	return ret;
 }
diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c
index f43cfeabd2cc8..997b524bdd2b5 100644
--- a/drivers/s390/crypto/vfio_ap_drv.c
+++ b/drivers/s390/crypto/vfio_ap_drv.c
@@ -122,7 +122,7 @@ static int vfio_ap_matrix_dev_create(void)
 	return 0;
 
 matrix_drv_err:
-	device_unregister(&matrix_dev->device);
+	device_del(&matrix_dev->device);
 matrix_reg_err:
 	put_device(&matrix_dev->device);
 matrix_alloc_err:
diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c
index 8b5a3a778a257..e54eb752e1ba8 100644
--- a/samples/vfio-mdev/mbochs.c
+++ b/samples/vfio-mdev/mbochs.c
@@ -1430,7 +1430,7 @@ static int __init mbochs_dev_init(void)
 
 	ret = device_register(&mbochs_dev);
 	if (ret)
-		goto err_class;
+		goto err_put;
 
 	ret = mdev_register_parent(&mbochs_parent, &mbochs_dev, &mbochs_driver,
 				   mbochs_mdev_types,
@@ -1441,8 +1441,9 @@ static int __init mbochs_dev_init(void)
 	return 0;
 
 err_device:
-	device_unregister(&mbochs_dev);
-err_class:
+	device_del(&mbochs_dev);
+err_put:
+	put_device(&mbochs_dev);
 	class_destroy(mbochs_class);
 err_driver:
 	mdev_unregister_driver(&mbochs_driver);
diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c
index 721fb06c6413f..e8400fdab71da 100644
--- a/samples/vfio-mdev/mdpy.c
+++ b/samples/vfio-mdev/mdpy.c
@@ -717,7 +717,7 @@ static int __init mdpy_dev_init(void)
 
 	ret = device_register(&mdpy_dev);
 	if (ret)
-		goto err_class;
+		goto err_put;
 
 	ret = mdev_register_parent(&mdpy_parent, &mdpy_dev, &mdpy_driver,
 				   mdpy_mdev_types,
@@ -728,8 +728,9 @@ static int __init mdpy_dev_init(void)
 	return 0;
 
 err_device:
-	device_unregister(&mdpy_dev);
-err_class:
+	device_del(&mdpy_dev);
+err_put:
+	put_device(&mdpy_dev);
 	class_destroy(mdpy_class);
 err_driver:
 	mdev_unregister_driver(&mdpy_driver);
diff --git a/samples/vfio-mdev/mtty.c b/samples/vfio-mdev/mtty.c
index 3c2a421b9b696..e887de672c526 100644
--- a/samples/vfio-mdev/mtty.c
+++ b/samples/vfio-mdev/mtty.c
@@ -1330,7 +1330,7 @@ static int __init mtty_dev_init(void)
 
 	ret = device_register(&mtty_dev.dev);
 	if (ret)
-		goto err_class;
+		goto err_put;
 
 	ret = mdev_register_parent(&mtty_dev.parent, &mtty_dev.dev,
 				   &mtty_driver, mtty_mdev_types,
@@ -1340,8 +1340,9 @@ static int __init mtty_dev_init(void)
 	return 0;
 
 err_device:
-	device_unregister(&mtty_dev.dev);
-err_class:
+	device_del(&mtty_dev.dev);
+err_put:
+	put_device(&mtty_dev.dev);
 	class_destroy(mtty_dev.vd_class);
 err_driver:
 	mdev_unregister_driver(&mtty_driver);
-- 
GitLab


From 4ec3c19d058f7391ec631b8a1b0a690422b246a9 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sat, 5 Nov 2022 17:12:34 +0100
Subject: [PATCH 3413/4122] gfs2: Handle -EBUSY result of insert_inode_locked4

When creating a new inode, there is a small chance that an inode lookup
for a previous version of the same inode is still in progress.  In that
case, that previous lookup will eventually fail, but we may still need
to retry here.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index c057f3bd475fb..9fbbc365a404c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -734,8 +734,12 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 		goto fail_free_inode;
 	gfs2_cancel_delete_work(io_gl);
 
+retry:
 	error = insert_inode_locked4(inode, ip->i_no_addr, iget_test, &ip->i_no_addr);
-	BUG_ON(error);
+	if (error == -EBUSY)
+		goto retry;
+	if (error)
+		goto fail_gunlock2;
 
 	error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT | GL_NOPID,
 				   &ip->i_iopen_gh);
-- 
GitLab


From 104bb8a663451404a26331263ce5b96c34504049 Mon Sep 17 00:00:00 2001
From: ruanjinjie <ruanjinjie@huawei.com>
Date: Mon, 5 Dec 2022 15:51:53 +0800
Subject: [PATCH 3414/4122] power: supply: fix null pointer dereferencing in
 power_supply_get_battery_info

when kmalloc() fail to allocate memory in kasprintf(), propname
will be NULL, strcmp() called by of_get_property() will cause
null pointer dereference.

So return ENOMEM if kasprintf() return NULL pointer.

Fixes: 3afb50d7125b ("power: supply: core: Add some helpers to use the battery OCV capacity table")
Signed-off-by: ruanjinjie <ruanjinjie@huawei.com>
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
---
 drivers/power/supply/power_supply_core.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c
index 00cb19b46001c..7c790c41e2fe3 100644
--- a/drivers/power/supply/power_supply_core.c
+++ b/drivers/power/supply/power_supply_core.c
@@ -750,6 +750,11 @@ int power_supply_get_battery_info(struct power_supply *psy,
 		int i, tab_len, size;
 
 		propname = kasprintf(GFP_KERNEL, "ocv-capacity-table-%d", index);
+		if (!propname) {
+			power_supply_put_battery_info(psy, info);
+			err = -ENOMEM;
+			goto out_put_node;
+		}
 		list = of_get_property(battery_np, propname, &size);
 		if (!list || !size) {
 			dev_err(&psy->dev, "failed to get %s\n", propname);
-- 
GitLab


From 5c3022e4a616d800cf5f4c3a981d7992179e44a1 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Wed, 9 Nov 2022 01:49:36 -0500
Subject: [PATCH 3415/4122] riscv: stacktrace: Fixup ftrace_graph_ret_addr retp
 argument

The 'retp' is a pointer to the return address on the stack, so we
must pass the current return address pointer as the 'retp'
argument to ftrace_push_return_trace(). Not parent function's
return address on the stack.

Fixes: b785ec129bd9 ("riscv/ftrace: Add HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support")
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Link: https://lore.kernel.org/r/20221109064937.3643993-2-guoren@kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/stacktrace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index 08d11a53f39e7..bcfe9eb55f80f 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -58,7 +58,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 		} else {
 			fp = frame->fp;
 			pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
-						   (unsigned long *)(fp - 8));
+						   &frame->ra);
 		}
 
 	}
-- 
GitLab


From 7ecdadf7f8c659524f6b2aebf6be7bf619764d90 Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Wed, 9 Nov 2022 01:49:37 -0500
Subject: [PATCH 3416/4122] riscv: stacktrace: Make walk_stackframe cross
 pt_regs frame

The current walk_stackframe with FRAME_POINTER would stop unwinding at
ret_from_exception:
  BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
  in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
  CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
  Call Trace:
  [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
  [<ffffffe000aecf48>] show_stack+0x32/0x4a
  [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
  [<ffffffe000af1648>] dump_stack+0x14/0x1c
  [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
  [<ffffffe000239aec>] __might_sleep+0x10/0x18
  [<ffffffe000afe3fe>] down_read+0x22/0xa4
  [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc

The optimization would help walk_stackframe cross the pt_regs frame and
get more backtrace of debug info:
  BUG: sleeping function called from invalid context at kernel/locking/rwsem.c:1518
  in_atomic(): 0, irqs_disabled(): 1, non_block: 0, pid: 1, name: init
  CPU: 0 PID: 1 Comm: init Not tainted 5.10.113-00021-g15c15974895c-dirty #192
  Call Trace:
  [<ffffffe0002038c8>] walk_stackframe+0x0/0xee
  [<ffffffe000aecf48>] show_stack+0x32/0x4a
  [<ffffffe000af1618>] dump_stack_lvl+0x72/0x8e
  [<ffffffe000af1648>] dump_stack+0x14/0x1c
  [<ffffffe000239ad2>] ___might_sleep+0x12e/0x138
  [<ffffffe000239aec>] __might_sleep+0x10/0x18
  [<ffffffe000afe3fe>] down_read+0x22/0xa4
  [<ffffffe000207588>] do_page_fault+0xb0/0x2fe
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc
  [<ffffffe000613c06>] riscv_intc_irq+0x1a/0x72
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc
  [<ffffffe00033f44a>] vma_link+0x54/0x160
  [<ffffffe000341d7a>] mmap_region+0x2cc/0x4d0
  [<ffffffe000342256>] do_mmap+0x2d8/0x3ac
  [<ffffffe000326318>] vm_mmap_pgoff+0x70/0xb8
  [<ffffffe00032638a>] vm_mmap+0x2a/0x36
  [<ffffffe0003cfdde>] elf_map+0x72/0x84
  [<ffffffe0003d05f8>] load_elf_binary+0x69a/0xec8
  [<ffffffe000376240>] bprm_execve+0x246/0x53a
  [<ffffffe00037786c>] kernel_execve+0xe8/0x124
  [<ffffffe000aecdf2>] run_init_process+0xfa/0x10c
  [<ffffffe000aece16>] try_to_run_init_process+0x12/0x3c
  [<ffffffe000afa920>] kernel_init+0xb4/0xf8
  [<ffffffe000201b80>] ret_from_exception+0x0/0xc

Here is the error injection test code for the above output:
 drivers/irqchip/irq-riscv-intc.c:
 static asmlinkage void riscv_intc_irq(struct pt_regs *regs)
 {
        unsigned long cause = regs->cause & ~CAUSE_IRQ_FLAG;
+       u32 tmp; __get_user(tmp, (u32 *)0);

Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Link: https://lore.kernel.org/r/20221109064937.3643993-3-guoren@kernel.org
[Palmer: use SYM_CODE_*]
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/entry.S      | 3 ++-
 arch/riscv/kernel/stacktrace.c | 9 +++++++++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d7..da44fe2d0d821 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -248,7 +248,7 @@ ret_from_syscall_rejected:
 	andi t0, t0, _TIF_SYSCALL_WORK
 	bnez t0, handle_syscall_trace_exit
 
-ret_from_exception:
+SYM_CODE_START_NOALIGN(ret_from_exception)
 	REG_L s0, PT_STATUS(sp)
 	csrc CSR_STATUS, SR_IE
 #ifdef CONFIG_TRACE_IRQFLAGS
@@ -262,6 +262,7 @@ ret_from_exception:
 	andi s0, s0, SR_SPP
 #endif
 	bnez s0, resume_kernel
+SYM_CODE_END(ret_from_exception)
 
 resume_userspace:
 	/* Interrupts must be disabled here so flags are checked atomically */
diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c
index bcfe9eb55f80f..75c8dd64fc48e 100644
--- a/arch/riscv/kernel/stacktrace.c
+++ b/arch/riscv/kernel/stacktrace.c
@@ -16,6 +16,8 @@
 
 #ifdef CONFIG_FRAME_POINTER
 
+extern asmlinkage void ret_from_exception(void);
+
 void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 			     bool (*fn)(void *, unsigned long), void *arg)
 {
@@ -59,6 +61,13 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
 			fp = frame->fp;
 			pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
 						   &frame->ra);
+			if (pc == (unsigned long)ret_from_exception) {
+				if (unlikely(!__kernel_text_address(pc) || !fn(arg, pc)))
+					break;
+
+				pc = ((struct pt_regs *)sp)->epc;
+				fp = ((struct pt_regs *)sp)->s0;
+			}
 		}
 
 	}
-- 
GitLab


From 0a584655ef89541dae4d48d2c523b1480ae80284 Mon Sep 17 00:00:00 2001
From: Francisco Munoz <francisco.munoz.ruiz@linux.intel.com>
Date: Mon, 5 Dec 2022 17:16:37 -0700
Subject: [PATCH 3417/4122] PCI: vmd: Fix secondary bus reset for Intel bridges
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The reset was never applied in the current implementation because Intel
Bridges owned by VMD are parentless. Internally, pci_reset_bus() applies
a reset to the parent of the PCI device supplied as argument, but in this
case it failed because there wasn't a parent.

In more detail, this change allows the VMD driver to enumerate NVMe devices
in pass-through configurations when guest reboots are performed. There was
an attempted to fix this, but later we discovered that the code inside
pci_reset_bus() wasn’t triggering secondary bus resets. Therefore, we
updated the parameters passed to it, and now NVMe SSDs attached to VMD
bridges are properly enumerated in VT-d pass-through scenarios.

Link: https://lore.kernel.org/r/20221206001637.4744-1-francisco.munoz.ruiz@linux.intel.com
Fixes: 6aab5622296b ("PCI: vmd: Clean up domain before enumeration")
Signed-off-by: Francisco Munoz <francisco.munoz.ruiz@linux.intel.com>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Nirmal Patel <nirmal.patel@linux.intel.com>
Reviewed-by: Jonathan Derrick <jonathan.derrick@linux.dev>
---
 drivers/pci/controller/vmd.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c
index 98e0746e681c9..769eedeb8802a 100644
--- a/drivers/pci/controller/vmd.c
+++ b/drivers/pci/controller/vmd.c
@@ -719,6 +719,7 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 	resource_size_t offset[2] = {0};
 	resource_size_t membar2_offset = 0x2000;
 	struct pci_bus *child;
+	struct pci_dev *dev;
 	int ret;
 
 	/*
@@ -859,8 +860,25 @@ static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
 
 	pci_scan_child_bus(vmd->bus);
 	vmd_domain_reset(vmd);
-	list_for_each_entry(child, &vmd->bus->children, node)
-		pci_reset_bus(child->self);
+
+	/* When Intel VMD is enabled, the OS does not discover the Root Ports
+	 * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies
+	 * a reset to the parent of the PCI device supplied as argument. This
+	 * is why we pass a child device, so the reset can be triggered at
+	 * the Intel bridge level and propagated to all the children in the
+	 * hierarchy.
+	 */
+	list_for_each_entry(child, &vmd->bus->children, node) {
+		if (!list_empty(&child->devices)) {
+			dev = list_first_entry(&child->devices,
+					       struct pci_dev, bus_list);
+			if (pci_reset_bus(dev))
+				pci_warn(dev, "can't reset device: %d\n", ret);
+
+			break;
+		}
+	}
+
 	pci_assign_unassigned_bus_resources(vmd->bus);
 
 	/*
-- 
GitLab


From 19098934f910b4d47cb30251dd39ffa57bef9523 Mon Sep 17 00:00:00 2001
From: John Thomson <git@johnthomson.fastmail.com.au>
Date: Tue, 6 Dec 2022 06:46:45 +1000
Subject: [PATCH 3418/4122] PCI: mt7621: Add sentinel to quirks table

Current driver is missing a sentinel in the struct soc_device_attribute
array, which causes an oops when assessed by the
soc_device_match(mt7621_pcie_quirks_match) call.

This was only exposed once the CONFIG_SOC_MT7621 mt7621 soc_dev_attr
was fixed to register the SOC as a device, in:

commit 7c18b64bba3b ("mips: ralink: mt7621: do not use kzalloc too early")

Fix it by adding the required sentinel.

Link: https://lore.kernel.org/lkml/26ebbed1-0fe9-4af9-8466-65f841d0b382@app.fastmail.com
Link: https://lore.kernel.org/r/20221205204645.301301-1-git@johnthomson.fastmail.com.au
Fixes: b483b4e4d3f6 ("staging: mt7621-pci: add quirks for 'E2' revision using 'soc_device_attribute'")
Signed-off-by: John Thomson <git@johnthomson.fastmail.com.au>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Sergio Paracuellos <sergio.paracuellos@gmail.com>
---
 drivers/pci/controller/pcie-mt7621.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/controller/pcie-mt7621.c b/drivers/pci/controller/pcie-mt7621.c
index 4bd1abf26008f..ee7aad09d6277 100644
--- a/drivers/pci/controller/pcie-mt7621.c
+++ b/drivers/pci/controller/pcie-mt7621.c
@@ -466,7 +466,8 @@ static int mt7621_pcie_register_host(struct pci_host_bridge *host)
 }
 
 static const struct soc_device_attribute mt7621_pcie_quirks_match[] = {
-	{ .soc_id = "mt7621", .revision = "E2" }
+	{ .soc_id = "mt7621", .revision = "E2" },
+	{ /* sentinel */ }
 };
 
 static int mt7621_pcie_probe(struct platform_device *pdev)
-- 
GitLab


From 6aecc0a59e07ba895b5473e0c916ba5f3d556c15 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 5 Dec 2022 16:32:31 -0600
Subject: [PATCH 3419/4122] cxl: Remove unnecessary cxl_pci_window_alignment()

cxl_pci_window_alignment() is referenced only via the struct
pci_controller_ops.window_alignment function pointer, and only in the
powerpc implementation of pcibios_window_alignment().

pcibios_window_alignment() defaults to returning 1 if the function pointer
is NULL, which is the same was what cxl_pci_window_alignment() does.

cxl_pci_window_alignment() is unnecessary, so remove it.  No functional
change intended.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Frederic Barrat <fbarrat@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221205223231.1268085-1-helgaas@kernel.org
---
 drivers/misc/cxl/vphb.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/drivers/misc/cxl/vphb.c b/drivers/misc/cxl/vphb.c
index 1264253cc07bc..6332db8044bd6 100644
--- a/drivers/misc/cxl/vphb.c
+++ b/drivers/misc/cxl/vphb.c
@@ -67,12 +67,6 @@ static void cxl_pci_disable_device(struct pci_dev *dev)
 	}
 }
 
-static resource_size_t cxl_pci_window_alignment(struct pci_bus *bus,
-						unsigned long type)
-{
-	return 1;
-}
-
 static void cxl_pci_reset_secondary_bus(struct pci_dev *dev)
 {
 	/* Should we do an AFU reset here ? */
@@ -200,7 +194,6 @@ static struct pci_controller_ops cxl_pci_controller_ops =
 	.enable_device_hook = cxl_pci_enable_device_hook,
 	.disable_device = cxl_pci_disable_device,
 	.release_device = cxl_pci_disable_device,
-	.window_alignment = cxl_pci_window_alignment,
 	.reset_secondary_bus = cxl_pci_reset_secondary_bus,
 	.setup_msi_irqs = cxl_setup_msi_irqs,
 	.teardown_msi_irqs = cxl_teardown_msi_irqs,
-- 
GitLab


From 3ae7c96dd51025550c8001c6f833337f11d00807 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 2 Dec 2022 17:49:26 +0100
Subject: [PATCH 3420/4122] powerpc/dts/fsl: Fix pca954x i2c-mux node names

"make dtbs_check":

    arch/powerpc/boot/dts/fsl/t1040rdb-rev-a.dtb: pca9546@77: $nodename:0: 'pca9546@77' does not match '^(i2c-?)?mux'
           From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml
    arch/powerpc/boot/dts/fsl/t1024qds.dtb: pca9547@77: Unevaluated properties are not allowed ('#address-cells', '#size-cells', 'i2c@0', 'i2c@2', 'i2c@3' were unexpected)
           From schema: Documentation/devicetree/bindings/i2c/i2c-mux-pca954x.yaml
    ...

Fix this by renaming pca954x nodes to "i2c-mux", to match the I2C bus
multiplexer/switch DT bindings and the Generic Names Recommendation in
the Devicetree Specification.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/6c5d86c49ac170e9d56ab121ea0602f3873849ca.1669999298.git.geert+renesas@glider.be
---
 arch/powerpc/boot/dts/fsl/t1024qds.dts  | 2 +-
 arch/powerpc/boot/dts/fsl/t1024rdb.dts  | 2 +-
 arch/powerpc/boot/dts/fsl/t104xqds.dtsi | 2 +-
 arch/powerpc/boot/dts/fsl/t104xrdb.dtsi | 2 +-
 arch/powerpc/boot/dts/fsl/t208xqds.dtsi | 2 +-
 arch/powerpc/boot/dts/fsl/t208xrdb.dtsi | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/boot/dts/fsl/t1024qds.dts b/arch/powerpc/boot/dts/fsl/t1024qds.dts
index d6858b7cd93fb..9ea7942f914e1 100644
--- a/arch/powerpc/boot/dts/fsl/t1024qds.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024qds.dts
@@ -151,7 +151,7 @@
 		};
 
 		i2c@118000 {
-			pca9547@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9547";
 				reg = <0x77>;
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t1024rdb.dts b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
index dbcd31cc35dcd..270aaf631f2ab 100644
--- a/arch/powerpc/boot/dts/fsl/t1024rdb.dts
+++ b/arch/powerpc/boot/dts/fsl/t1024rdb.dts
@@ -165,7 +165,7 @@
 		};
 
 		i2c@118100 {
-			pca9546@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9546";
 				reg = <0x77>;
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
index 6154797322524..1c329f076f64f 100644
--- a/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xqds.dtsi
@@ -268,7 +268,7 @@
 		};
 
 		i2c@118000 {
-			pca9547@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9547";
 				reg = <0x77>;
 			};
diff --git a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
index bfe1ed5be3374..fc7bec5dcb90f 100644
--- a/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t104xrdb.dtsi
@@ -128,7 +128,7 @@
 		};
 
 		i2c@118100 {
-			pca9546@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9546";
 				reg = <0x77>;
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
index db4139999b28e..962c99941645b 100644
--- a/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xqds.dtsi
@@ -135,7 +135,7 @@
 		};
 
 		i2c@118000 {
-			pca9547@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9547";
 				reg = <0x77>;
 				#address-cells = <1>;
diff --git a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
index ff87e67c70da4..ecc3e8c7394ca 100644
--- a/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
+++ b/arch/powerpc/boot/dts/fsl/t208xrdb.dtsi
@@ -138,7 +138,7 @@
 		};
 
 		i2c@118100 {
-			pca9546@77 {
+			i2c-mux@77 {
 				compatible = "nxp,pca9546";
 				reg = <0x77>;
 			};
-- 
GitLab


From 5ddcc03a07ae1ab5062f89a946d9495f1fd8eaa4 Mon Sep 17 00:00:00 2001
From: Aboorva Devarajan <aboorvad@linux.vnet.ibm.com>
Date: Mon, 14 Nov 2022 20:26:11 +0530
Subject: [PATCH 3421/4122] powerpc/cpuidle: Set CPUIDLE_FLAG_POLLING for
 snooze state

During the comparative study of cpuidle governors, it is noticed that the
menu governor does not select CEDE state in some scenarios even though when
the sleep duration of the CPU exceeds the target residency of the CEDE idle
state this is because the CPU exits the snooze "polling" state when snooze
time limit is reached in the snooze_loop(), which is not a real wake up
and it just means that the polling state selection was not adequate.

cpuidle governors rely on CPUIDLE_FLAG_POLLING flag to be set for the
polling states to handle the condition mentioned above.

Hence, set the CPUIDLE_FLAG_POLLING flag for snooze state (polling state)
in powerpc arch to make the cpuidle governor work as expected.

Reference Commits:

- Timeout enabled for snooze state:
  commit 78eaa10f027c
  ("cpuidle: powernv/pseries: Auto-promotion of snooze to deeper idle state")

- commit dc2251bf98c6
  ("cpuidle: Eliminate the CPUIDLE_DRIVER_STATE_START symbol")

- Fix wakeup stats in governor for polling states
  commit 5f26bdceb9c0
  ("cpuidle: menu: Fix wakeup statistics updates for polling state")

Signed-off-by: Aboorva Devarajan <aboorvad@linux.vnet.ibm.com>
Tested-by: Vishal Chourasia <vishalc@linux.vnet.ibm.com>
Reviewed-by: Vaidyanathan Srinivasan <svaidy@linux.ibm.com>
Reviewed-by: Vishal Chourasia <vishalc@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114145611.37669-1-aboorvad@linux.vnet.ibm.com
---
 drivers/cpuidle/cpuidle-powernv.c | 5 ++++-
 drivers/cpuidle/cpuidle-pseries.c | 8 ++++++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 0b5461b3d7dd4..9ebedd972df0b 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -76,6 +76,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 	local_irq_enable();
 
 	snooze_exit_time = get_tb() + get_snooze_timeout(dev, drv, index);
+	dev->poll_time_limit = false;
 	ppc64_runlatch_off();
 	HMT_very_low();
 	while (!need_resched()) {
@@ -86,6 +87,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 			 * cleared to order subsequent test of need_resched().
 			 */
 			clear_thread_flag(TIF_POLLING_NRFLAG);
+			dev->poll_time_limit = true;
 			smp_mb();
 			break;
 		}
@@ -155,7 +157,8 @@ static struct cpuidle_state powernv_states[CPUIDLE_STATE_MAX] = {
 		.desc = "snooze",
 		.exit_latency = 0,
 		.target_residency = 0,
-		.enter = snooze_loop },
+		.enter = snooze_loop,
+		.flags = CPUIDLE_FLAG_POLLING },
 };
 
 static int powernv_cpuidle_cpu_online(unsigned int cpu)
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 7e7ab5597d7ac..1bad4d2b7be33 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -44,6 +44,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 	pseries_idle_prolog();
 	local_irq_enable();
 	snooze_exit_time = get_tb() + snooze_timeout;
+	dev->poll_time_limit = false;
 
 	while (!need_resched()) {
 		HMT_low();
@@ -54,6 +55,7 @@ static int snooze_loop(struct cpuidle_device *dev,
 			 * loop anyway. Require a barrier after polling is
 			 * cleared to order subsequent test of need_resched().
 			 */
+			dev->poll_time_limit = true;
 			clear_thread_flag(TIF_POLLING_NRFLAG);
 			smp_mb();
 			break;
@@ -268,7 +270,8 @@ static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = {
 		.desc = "snooze",
 		.exit_latency = 0,
 		.target_residency = 0,
-		.enter = &snooze_loop },
+		.enter = &snooze_loop,
+		.flags = CPUIDLE_FLAG_POLLING },
 	{ /* CEDE */
 		.name = "CEDE",
 		.desc = "CEDE",
@@ -286,7 +289,8 @@ static struct cpuidle_state shared_states[] = {
 		.desc = "snooze",
 		.exit_latency = 0,
 		.target_residency = 0,
-		.enter = &snooze_loop },
+		.enter = &snooze_loop,
+		.flags = CPUIDLE_FLAG_POLLING },
 	{ /* Shared Cede */
 		.name = "Shared Cede",
 		.desc = "Shared Cede",
-- 
GitLab


From 74eac50391ce42c5d0038d6f0e580576e53aec4e Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Mon, 5 Dec 2022 10:45:30 +0100
Subject: [PATCH 3422/4122] dt-bindings: PCI: qcom: Allow 'dma-coherent'
 property

Devices on some PCIe buses may be cache coherent and must be marked as
such in the devicetree to avoid data corruption.

This is specifically needed on recent Qualcomm platforms like SC8280XP.

Link: https://lore.kernel.org/r/20221205094530.12883-1-johan+linaro@kernel.org
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Acked-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/pci/qcom,pcie.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
index 2f851c804bb07..a5859bb3dc28c 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie.yaml
@@ -62,6 +62,8 @@ properties:
     minItems: 3
     maxItems: 13
 
+  dma-coherent: true
+
   interconnects:
     maxItems: 2
 
-- 
GitLab


From 7db354444ad8429e660b0f8145d425285d4f90ff Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 16:50:41 +0100
Subject: [PATCH 3423/4122] gfs2: Cosmetic gfs2_dinode_{in,out} cleanup

In each of the two functions, add an inode variable that points to
&ip->i_inode and use that throughout the rest of the function.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glops.c | 41 +++++++++++++++++++++--------------------
 fs/gfs2/super.c | 27 ++++++++++++++-------------
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index 49210a2e7ce75..af69a1bacd559 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -397,38 +397,39 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	struct timespec64 atime;
 	u16 height, depth;
 	umode_t mode = be32_to_cpu(str->di_mode);
-	bool is_new = ip->i_inode.i_state & I_NEW;
+	struct inode *inode = &ip->i_inode;
+	bool is_new = inode->i_state & I_NEW;
 
 	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
 		goto corrupt;
-	if (unlikely(!is_new && inode_wrong_type(&ip->i_inode, mode)))
+	if (unlikely(!is_new && inode_wrong_type(inode, mode)))
 		goto corrupt;
 	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
-	ip->i_inode.i_mode = mode;
+	inode->i_mode = mode;
 	if (is_new) {
-		ip->i_inode.i_rdev = 0;
+		inode->i_rdev = 0;
 		switch (mode & S_IFMT) {
 		case S_IFBLK:
 		case S_IFCHR:
-			ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
-						   be32_to_cpu(str->di_minor));
+			inode->i_rdev = MKDEV(be32_to_cpu(str->di_major),
+					      be32_to_cpu(str->di_minor));
 			break;
 		}
 	}
 
-	i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
-	i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
-	set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
-	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
-	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
+	i_uid_write(inode, be32_to_cpu(str->di_uid));
+	i_gid_write(inode, be32_to_cpu(str->di_gid));
+	set_nlink(inode, be32_to_cpu(str->di_nlink));
+	i_size_write(inode, be64_to_cpu(str->di_size));
+	gfs2_set_inode_blocks(inode, be64_to_cpu(str->di_blocks));
 	atime.tv_sec = be64_to_cpu(str->di_atime);
 	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
-	if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0)
-		ip->i_inode.i_atime = atime;
-	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
-	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
-	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
-	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
+	if (timespec64_compare(&inode->i_atime, &atime) < 0)
+		inode->i_atime = atime;
+	inode->i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
+	inode->i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
+	inode->i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
+	inode->i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);
 
 	ip->i_goal = be64_to_cpu(str->di_goal_meta);
 	ip->i_generation = be64_to_cpu(str->di_generation);
@@ -436,7 +437,7 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_diskflags = be32_to_cpu(str->di_flags);
 	ip->i_eattr = be64_to_cpu(str->di_eattr);
 	/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
-	gfs2_set_inode_flags(&ip->i_inode);
+	gfs2_set_inode_flags(inode);
 	height = be16_to_cpu(str->di_height);
 	if (unlikely(height > GFS2_MAX_META_HEIGHT))
 		goto corrupt;
@@ -448,8 +449,8 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_depth = (u8)depth;
 	ip->i_entries = be32_to_cpu(str->di_entries);
 
-	if (S_ISREG(ip->i_inode.i_mode))
-		gfs2_set_aops(&ip->i_inode);
+	if (S_ISREG(inode->i_mode))
+		gfs2_set_aops(inode);
 
 	return 0;
 corrupt:
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index eac9b0c34aac8..075fad8fb1d1e 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -379,6 +379,7 @@ out:
 
 void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 {
+	const struct inode *inode = &ip->i_inode;
 	struct gfs2_dinode *str = buf;
 
 	str->di_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
@@ -386,15 +387,15 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 	str->di_header.mh_format = cpu_to_be32(GFS2_FORMAT_DI);
 	str->di_num.no_addr = cpu_to_be64(ip->i_no_addr);
 	str->di_num.no_formal_ino = cpu_to_be64(ip->i_no_formal_ino);
-	str->di_mode = cpu_to_be32(ip->i_inode.i_mode);
-	str->di_uid = cpu_to_be32(i_uid_read(&ip->i_inode));
-	str->di_gid = cpu_to_be32(i_gid_read(&ip->i_inode));
-	str->di_nlink = cpu_to_be32(ip->i_inode.i_nlink);
-	str->di_size = cpu_to_be64(i_size_read(&ip->i_inode));
-	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(&ip->i_inode));
-	str->di_atime = cpu_to_be64(ip->i_inode.i_atime.tv_sec);
-	str->di_mtime = cpu_to_be64(ip->i_inode.i_mtime.tv_sec);
-	str->di_ctime = cpu_to_be64(ip->i_inode.i_ctime.tv_sec);
+	str->di_mode = cpu_to_be32(inode->i_mode);
+	str->di_uid = cpu_to_be32(i_uid_read(inode));
+	str->di_gid = cpu_to_be32(i_gid_read(inode));
+	str->di_nlink = cpu_to_be32(inode->i_nlink);
+	str->di_size = cpu_to_be64(i_size_read(inode));
+	str->di_blocks = cpu_to_be64(gfs2_get_inode_blocks(inode));
+	str->di_atime = cpu_to_be64(inode->i_atime.tv_sec);
+	str->di_mtime = cpu_to_be64(inode->i_mtime.tv_sec);
+	str->di_ctime = cpu_to_be64(inode->i_ctime.tv_sec);
 
 	str->di_goal_meta = cpu_to_be64(ip->i_goal);
 	str->di_goal_data = cpu_to_be64(ip->i_goal);
@@ -402,16 +403,16 @@ void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf)
 
 	str->di_flags = cpu_to_be32(ip->i_diskflags);
 	str->di_height = cpu_to_be16(ip->i_height);
-	str->di_payload_format = cpu_to_be32(S_ISDIR(ip->i_inode.i_mode) &&
+	str->di_payload_format = cpu_to_be32(S_ISDIR(inode->i_mode) &&
 					     !(ip->i_diskflags & GFS2_DIF_EXHASH) ?
 					     GFS2_FORMAT_DE : 0);
 	str->di_depth = cpu_to_be16(ip->i_depth);
 	str->di_entries = cpu_to_be32(ip->i_entries);
 
 	str->di_eattr = cpu_to_be64(ip->i_eattr);
-	str->di_atime_nsec = cpu_to_be32(ip->i_inode.i_atime.tv_nsec);
-	str->di_mtime_nsec = cpu_to_be32(ip->i_inode.i_mtime.tv_nsec);
-	str->di_ctime_nsec = cpu_to_be32(ip->i_inode.i_ctime.tv_nsec);
+	str->di_atime_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
+	str->di_mtime_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
+	str->di_ctime_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
 }
 
 /**
-- 
GitLab


From 70376c7ff31221f1d21db5611d8209e677781d3a Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 17:00:04 +0100
Subject: [PATCH 3424/4122] gfs2: Always check inode size of inline inodes

Check if the inode size of stuffed (inline) inodes is within the allowed
range when reading inodes from disk (gfs2_dinode_in()).  This prevents
us from on-disk corruption.

The two checks in stuffed_readpage() and gfs2_unstuffer_page() that just
truncate inline data to the maximum allowed size don't actually make
sense, and they can be removed now as well.

Reported-by: syzbot+7bb81dfa9cda07d9cd9d@syzkaller.appspotmail.com
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/aops.c  | 2 --
 fs/gfs2/bmap.c  | 3 ---
 fs/gfs2/glops.c | 3 +++
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 05bee80ac7dee..e782b4f1d1043 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -427,8 +427,6 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 		return error;
 
 	kaddr = kmap_atomic(page);
-	if (dsize > gfs2_max_stuffed_size(ip))
-		dsize = gfs2_max_stuffed_size(ip);
 	memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 	memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 	kunmap_atomic(kaddr);
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 3bdb2c668a71c..e7537fd305dd2 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -61,9 +61,6 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
 		void *kaddr = kmap(page);
 		u64 dsize = i_size_read(inode);
  
-		if (dsize > gfs2_max_stuffed_size(ip))
-			dsize = gfs2_max_stuffed_size(ip);
-
 		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
 		memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
 		kunmap(page);
diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c
index af69a1bacd559..d78b61ecc1cdf 100644
--- a/fs/gfs2/glops.c
+++ b/fs/gfs2/glops.c
@@ -449,6 +449,9 @@ static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
 	ip->i_depth = (u8)depth;
 	ip->i_entries = be32_to_cpu(str->di_entries);
 
+	if (gfs2_is_stuffed(ip) && inode->i_size > gfs2_max_stuffed_size(ip))
+		goto corrupt;
+
 	if (S_ISREG(inode->i_mode))
 		gfs2_set_aops(inode);
 
-- 
GitLab


From 4ad02083a092b497f35804de03eaa62cf81fada6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 2 Dec 2022 18:00:15 +0100
Subject: [PATCH 3425/4122] gfs2: Make gfs2_glock_hold return its glock
 argument

This allows code like 'gl = gfs2_glock_hold(...)'.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/file.c  | 3 +--
 fs/gfs2/glock.c | 6 +++---
 fs/gfs2/glock.h | 2 +-
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 60c6fb91fb589..eea5be4fbf0ef 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -1445,14 +1445,13 @@ static int gfs2_lock(struct file *file, int cmd, struct file_lock *fl)
 
 static void __flock_holder_uninit(struct file *file, struct gfs2_holder *fl_gh)
 {
-	struct gfs2_glock *gl = fl_gh->gh_gl;
+	struct gfs2_glock *gl = gfs2_glock_hold(fl_gh->gh_gl);
 
 	/*
 	 * Make sure gfs2_glock_put() won't sleep under the file->f_lock
 	 * spinlock.
 	 */
 
-	gfs2_glock_hold(gl);
 	spin_lock(&file->f_lock);
 	gfs2_holder_uninit(fl_gh);
 	spin_unlock(&file->f_lock);
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index df335c258eb08..1a6c1eb7bd6b8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -186,10 +186,11 @@ void gfs2_glock_free(struct gfs2_glock *gl)
  *
  */
 
-void gfs2_glock_hold(struct gfs2_glock *gl)
+struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl)
 {
 	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
 	lockref_get(&gl->gl_lockref);
+	return gl;
 }
 
 /**
@@ -1256,13 +1257,12 @@ void __gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
 			struct gfs2_holder *gh, unsigned long ip)
 {
 	INIT_LIST_HEAD(&gh->gh_list);
-	gh->gh_gl = gl;
+	gh->gh_gl = gfs2_glock_hold(gl);
 	gh->gh_ip = ip;
 	gh->gh_owner_pid = get_pid(task_pid(current));
 	gh->gh_state = state;
 	gh->gh_flags = flags;
 	gh->gh_iflags = 0;
-	gfs2_glock_hold(gl);
 }
 
 /**
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 0d068f4fd7d67..76cd2fabc668c 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -196,7 +196,7 @@ static inline struct address_space *gfs2_glock2aspace(struct gfs2_glock *gl)
 extern int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
 			  const struct gfs2_glock_operations *glops,
 			  int create, struct gfs2_glock **glp);
-extern void gfs2_glock_hold(struct gfs2_glock *gl);
+extern struct gfs2_glock *gfs2_glock_hold(struct gfs2_glock *gl);
 extern void gfs2_glock_put(struct gfs2_glock *gl);
 extern void gfs2_glock_queue_put(struct gfs2_glock *gl);
 
-- 
GitLab


From 97236ad5a68c6b7603cea2ad01c588887e5cb961 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 13:02:39 +0100
Subject: [PATCH 3426/4122] gfs2: Avoid dequeuing GL_ASYNC glock holders twice

When a locking request fails, the associated glock holder is
automatically dequeued from the list of active and waiting holders.  For
GL_ASYNC locking requests, this will obviously happen asynchronously
and it can race with attempts to cancel that locking request via
gfs2_glock_dq().  Therefore, don't forget to check if a locking request
has already been dequeued in gfs2_glock_dq().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 1a6c1eb7bd6b8..0f5c5c12d8c6e 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1707,6 +1707,13 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	struct gfs2_glock *gl = gh->gh_gl;
 
 	spin_lock(&gl->gl_lockref.lock);
+	if (!gfs2_holder_queued(gh)) {
+		/*
+		 * May have already been dequeued because the locking request
+		 * was GL_ASYNC and it has failed in the meantime.
+		 */
+		goto out;
+	}
 	if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
 	    !test_bit(HIF_HOLDER, &gh->gh_iflags)) {
 		spin_unlock(&gl->gl_lockref.lock);
@@ -1716,6 +1723,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 	}
 
 	__gfs2_glock_dq(gh);
+out:
 	spin_unlock(&gl->gl_lockref.lock);
 }
 
-- 
GitLab


From 764665c6775251d4569ba9f09981459bbb166359 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 03:48:52 +0100
Subject: [PATCH 3427/4122] gfs2: Clean up after gfs2_create_inode rework

Since commit 3d36e57ff768 ("gfs2: gfs2_create_inode rework"),
gfs2_evict_inode() and gfs2_create_inode() / gfs2_inode_lookup() will
synchronize via the inode hash table and we can be certain that once a
new inode is inserted into the inode hash table(), gfs2_evict_inode()
has completely destroyed any previous versions.  We no longer need to
worry about overlapping inode object lifespans.  Update the code and
comments accordingly.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.h | 14 --------------
 fs/gfs2/super.c | 21 ++++++++++++++-------
 2 files changed, 14 insertions(+), 21 deletions(-)

diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 76cd2fabc668c..d561126cfb477 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -322,20 +322,6 @@ static inline void glock_set_object(struct gfs2_glock *gl, void *object)
 /**
  * glock_clear_object - clear the gl_object field of a glock
  * @gl: the glock
- * @object: the object
- *
- * I'd love to similarly add this:
- *	else if (gfs2_assert_warn(gl->gl_sbd, gl->gl_object == object))
- *		gfs2_dump_glock(NULL, gl, true);
- * Unfortunately, that's not possible because as soon as gfs2_delete_inode
- * frees the block in the rgrp, another process can reassign it for an I_NEW
- * inode in gfs2_create_inode because that calls new_inode, not gfs2_iget.
- * That means gfs2_delete_inode may subsequently try to call this function
- * for a glock that's already pointing to a brand new inode. If we clear the
- * new inode's gl_object, we'll introduce metadata corruption. Function
- * gfs2_delete_inode calls clear_inode which calls gfs2_clear_inode which also
- * tries to clear gl_object, so it's more than just gfs2_delete_inode.
- *
  */
 static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
 {
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 075fad8fb1d1e..02f1b5f2d7f22 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1304,14 +1304,21 @@ static int evict_unlinked_inode(struct inode *inode)
 			goto out;
 	}
 
-	/* We're about to clear the bitmap for the dinode, but as soon as we
-	   do, gfs2_create_inode can create another inode at the same block
-	   location and try to set gl_object again. We clear gl_object here so
-	   that subsequent inode creates don't see an old gl_object. */
-	if (ip->i_gl) {
-		glock_clear_object(ip->i_gl, ip);
+	if (ip->i_gl)
 		gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino);
-	}
+
+	/*
+	 * As soon as we clear the bitmap for the dinode, gfs2_create_inode()
+	 * can get called to recreate it, or even gfs2_inode_lookup() if the
+	 * inode was recreated on another node in the meantime.
+	 *
+	 * However, inserting the new inode into the inode hash table will not
+	 * succeed until the old inode is removed, and that only happens after
+	 * ->evict_inode() returns.  The new inode is attached to its inode and
+	 *  iopen glocks after inserting it into the inode hash table, so at
+	 *  that point we can be sure that both glocks are unused.
+	 */
+
 	ret = gfs2_dinode_dealloc(ip);
 out:
 	return ret;
-- 
GitLab


From fe1bff6517de789d491844f53e61e4ff02e8f8b1 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 13:27:11 +0100
Subject: [PATCH 3428/4122] gfs2: Simply dequeue iopen glock in
 gfs2_evict_inode

With the previous change, to simplify things, we can always just dequeue
and uninitialize the iopen glock in gfs2_evict_inode() even if it isn't
queued anymore.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/super.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 02f1b5f2d7f22..999cc146d7083 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1419,12 +1419,9 @@ out:
 		struct gfs2_glock *gl = ip->i_iopen_gh.gh_gl;
 
 		glock_clear_object(gl, ip);
-		if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
-			ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
-			gfs2_glock_dq(&ip->i_iopen_gh);
-		}
 		gfs2_glock_hold(gl);
-		gfs2_holder_uninit(&ip->i_iopen_gh);
+		ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+		gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 		gfs2_glock_put_eventually(gl);
 	}
 	if (ip->i_gl) {
-- 
GitLab


From 3781ec9e09123d955b93fc8522ffb683a51f865d Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Dec 2022 14:44:37 +0100
Subject: [PATCH 3429/4122] gfs2: Uninline and improve glock_{set,clear}_object

Those functions have reached a size at which having them inline isn't
useful anymore, so uninline them.  In addition, report the glock name on
assertion failures.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 fs/gfs2/glock.h | 29 +++--------------------------
 2 files changed, 45 insertions(+), 26 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 0f5c5c12d8c6e..76432efe6e025 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -928,6 +928,48 @@ out_unlock:
 	return;
 }
 
+/**
+ * glock_set_object - set the gl_object field of a glock
+ * @gl: the glock
+ * @object: the object
+ */
+void glock_set_object(struct gfs2_glock *gl, void *object)
+{
+	void *prev_object;
+
+	spin_lock(&gl->gl_lockref.lock);
+	prev_object = gl->gl_object;
+	gl->gl_object = object;
+	spin_unlock(&gl->gl_lockref.lock);
+	if (gfs2_assert_warn(gl->gl_name.ln_sbd, prev_object == NULL)) {
+		pr_warn("glock=%u/%llx\n",
+			gl->gl_name.ln_type,
+			(unsigned long long)gl->gl_name.ln_number);
+		gfs2_dump_glock(NULL, gl, true);
+	}
+}
+
+/**
+ * glock_clear_object - clear the gl_object field of a glock
+ * @gl: the glock
+ */
+void glock_clear_object(struct gfs2_glock *gl, void *object)
+{
+	void *prev_object;
+
+	spin_lock(&gl->gl_lockref.lock);
+	prev_object = gl->gl_object;
+	gl->gl_object = NULL;
+	spin_unlock(&gl->gl_lockref.lock);
+	if (gfs2_assert_warn(gl->gl_name.ln_sbd,
+			     prev_object == object || prev_object == NULL)) {
+		pr_warn("glock=%u/%llx\n",
+			gl->gl_name.ln_type,
+			(unsigned long long)gl->gl_name.ln_number);
+		gfs2_dump_glock(NULL, gl, true);
+	}
+}
+
 void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
 {
 	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index d561126cfb477..e4be9e4bc9799 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -288,6 +288,9 @@ extern void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp);
 extern void gfs2_register_debugfs(void);
 extern void gfs2_unregister_debugfs(void);
 
+extern void glock_set_object(struct gfs2_glock *gl, void *object);
+extern void glock_clear_object(struct gfs2_glock *gl, void *object);
+
 extern const struct lm_lockops gfs2_dlm_ops;
 
 static inline void gfs2_holder_mark_uninitialized(struct gfs2_holder *gh)
@@ -305,32 +308,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
 	return !list_empty(&gh->gh_list);
 }
 
-/**
- * glock_set_object - set the gl_object field of a glock
- * @gl: the glock
- * @object: the object
- */
-static inline void glock_set_object(struct gfs2_glock *gl, void *object)
-{
-	spin_lock(&gl->gl_lockref.lock);
-	if (gfs2_assert_warn(gl->gl_name.ln_sbd, gl->gl_object == NULL))
-		gfs2_dump_glock(NULL, gl, true);
-	gl->gl_object = object;
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
-/**
- * glock_clear_object - clear the gl_object field of a glock
- * @gl: the glock
- */
-static inline void glock_clear_object(struct gfs2_glock *gl, void *object)
-{
-	spin_lock(&gl->gl_lockref.lock);
-	if (gl->gl_object == object)
-		gl->gl_object = NULL;
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
 static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-- 
GitLab


From 2ec750a01d189cf1872cd79490d0911a7bd519f8 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Sun, 4 Dec 2022 12:51:55 +0100
Subject: [PATCH 3430/4122] gfs2: Add gfs2_inode_lookup comment

Add comment on when and why gfs2_cancel_delete_work() needs to be
skipped in gfs2_inode_lookup().

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/inode.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 9fbbc365a404c..8d4c4b5c4c0da 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -142,6 +142,11 @@ struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
 		if (unlikely(error))
 			goto fail;
 
+		/*
+		 * The only caller that sets @blktype to GFS2_BLKST_UNLINKED is
+		 * delete_work_func().  Make sure not to cancel the delete work
+		 * from within itself here.
+		 */
 		if (blktype == GFS2_BLKST_UNLINKED)
 			extra_flags |= LM_FLAG_TRY;
 		else
-- 
GitLab


From 88f4a9f813c549f6b8a6fbf12030949b48a4d5a4 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Dec 2022 22:27:28 +0100
Subject: [PATCH 3431/4122] gfs2: Partially revert gfs2_inode_lookup change

Commit c412a97cf6c5 changed delete_work_func() to always perform an
inode lookup when gfs2_try_evict() fails.  This doesn't make sense as a
gfs2_try_evict() failure indicates that the inode is likely still in
use.  Revert that change.

Fixes: c412a97cf6c5 ("gfs2: Use TRY lock in gfs2_inode_lookup for UNLINKED inodes")
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 76432efe6e025..6f2de8c0b2d01 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1082,6 +1082,7 @@ static void delete_work_func(struct work_struct *work)
 			if (gfs2_queue_delete_work(gl, 5 * HZ))
 				return;
 		}
+		goto out;
 	}
 
 	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
@@ -1094,6 +1095,7 @@ static void delete_work_func(struct work_struct *work)
 		d_prune_aliases(inode);
 		iput(inode);
 	}
+out:
 	gfs2_glock_put(gl);
 }
 
-- 
GitLab


From ec9eaf68c1dcd1b0d4e0bad0630ddac49c20bbe8 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Sun, 27 Nov 2022 12:41:37 +0100
Subject: [PATCH 3432/4122] dt-bindings: PCI: mediatek-gen3: add SoC based
 clock config

The PCIe driver covers different SOC which needing different clock
configs. Define them based on compatible.

Link: https://lore.kernel.org/r/20221127114142.156573-4-linux@fw-web.de
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Acked-by: Jianjun Wang <jianjun.wang@mediatek.com>
---
 .../bindings/pci/mediatek-pcie-gen3.yaml      | 47 ++++++++++++++-----
 1 file changed, 35 insertions(+), 12 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
index bc90f0ec7bd99..ef5cc1fc4d107 100644
--- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
@@ -43,9 +43,6 @@ description: |+
   each set has its own address for MSI message, and supports 32 MSI vectors
   to generate interrupt.
 
-allOf:
-  - $ref: /schemas/pci/pci-bus.yaml#
-
 properties:
   compatible:
     oneOf:
@@ -90,15 +87,7 @@ properties:
     maxItems: 6
 
   clock-names:
-    items:
-      - const: pl_250m
-      - const: tl_26m
-      - const: tl_96m
-      - const: tl_32k
-      - const: peri_26m
-      - enum:
-          - top_133m        # for MT8192
-          - peri_mem        # for MT8188/MT8195
+    maxItems: 6
 
   assigned-clocks:
     maxItems: 1
@@ -147,6 +136,40 @@ required:
   - '#interrupt-cells'
   - interrupt-controller
 
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+  - if:
+      properties:
+        compatible:
+          const: mediatek,mt8192-pcie
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: pl_250m
+            - const: tl_26m
+            - const: tl_96m
+            - const: tl_32k
+            - const: peri_26m
+            - const: top_133m
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - mediatek,mt8188-pcie
+              - mediatek,mt8195-pcie
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: pl_250m
+            - const: tl_26m
+            - const: tl_96m
+            - const: tl_32k
+            - const: peri_26m
+            - const: peri_mem
+
 unevaluatedProperties: false
 
 examples:
-- 
GitLab


From d3fd0ee7a4a1e796413fab7affc72eeec31bed13 Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Sun, 27 Nov 2022 12:41:38 +0100
Subject: [PATCH 3433/4122] dt-bindings: PCI: mediatek-gen3: add support for
 mt7986

Add compatible string and clock-definition for mt7986. It needs 4 clocks
for PCIe, define them in binding.

Link: https://lore.kernel.org/r/20221127114142.156573-5-linux@fw-web.de
Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Signed-off-by: Lorenzo Pieralisi <lpieralisi@kernel.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Acked-by: Jianjun Wang <jianjun.wang@mediatek.com>
---
 .../bindings/pci/mediatek-pcie-gen3.yaml        | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
index ef5cc1fc4d107..7e8c7a2a5f9b6 100644
--- a/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
+++ b/Documentation/devicetree/bindings/pci/mediatek-pcie-gen3.yaml
@@ -48,6 +48,7 @@ properties:
     oneOf:
       - items:
           - enum:
+              - mediatek,mt7986-pcie
               - mediatek,mt8188-pcie
               - mediatek,mt8195-pcie
           - const: mediatek,mt8192-pcie
@@ -84,9 +85,11 @@ properties:
       enum: [ phy, mac ]
 
   clocks:
+    minItems: 4
     maxItems: 6
 
   clock-names:
+    minItems: 4
     maxItems: 6
 
   assigned-clocks:
@@ -169,6 +172,20 @@ allOf:
             - const: tl_32k
             - const: peri_26m
             - const: peri_mem
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - mediatek,mt7986-pcie
+    then:
+      properties:
+        clock-names:
+          items:
+            - const: pl_250m
+            - const: tl_26m
+            - const: peri_26m
+            - const: top_133m
 
 unevaluatedProperties: false
 
-- 
GitLab


From 64f6a5d1922bf6d2b2d845de20d4563a6f328e2d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 5 Dec 2022 13:12:03 +0100
Subject: [PATCH 3434/4122] container_of: add container_of_const() that
 preserves const-ness of the pointer

container_of does not preserve the const-ness of a pointer that is
passed into it, which can cause C code that passes in a const pointer to
get a pointer back that is not const and then scribble all over the data
in it.  To prevent this, container_of_const() will preserve the const
status of the pointer passed into it using the newly available _Generic()
method.

Suggested-by: Jason Gunthorpe <jgg@ziepe.ca>
Suggested-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221205121206.166576-1-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/container_of.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/container_of.h b/include/linux/container_of.h
index 2008e9f4058c6..1d898f9158b45 100644
--- a/include/linux/container_of.h
+++ b/include/linux/container_of.h
@@ -22,4 +22,17 @@
 		      "pointer type mismatch in container_of()");	\
 	((type *)(__mptr - offsetof(type, member))); })
 
+/**
+ * container_of_const - cast a member of a structure out to the containing
+ *			structure and preserve the const-ness of the pointer
+ * @ptr:		the pointer to the member
+ * @type:		the type of the container struct this is embedded in.
+ * @member:		the name of the member within the struct.
+ */
+#define container_of_const(ptr, type, member)				\
+	_Generic(ptr,							\
+		const typeof(*(ptr)) *: ((const type *)container_of(ptr, type, member)),\
+		default: ((type *)container_of(ptr, type, member))	\
+	)
+
 #endif	/* _LINUX_CONTAINER_OF_H */
-- 
GitLab


From 6149f83b3165955e9519de483b30b26d1518ad0f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 5 Dec 2022 13:12:04 +0100
Subject: [PATCH 3435/4122] device.h: move kobj_to_dev() to use
 container_of_const()

Instead of rolling our own const-checking logic, use the newly
introduced container_of_const() to handle it all for us automatically.

Cc: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221205121206.166576-2-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 21 +--------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/include/linux/device.h b/include/linux/device.h
index 84ae52de6746a..8d172d06b8c15 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -680,26 +680,7 @@ struct device_link {
 	bool supplier_preactivated; /* Owned by consumer probe. */
 };
 
-static inline struct device *__kobj_to_dev(struct kobject *kobj)
-{
-	return container_of(kobj, struct device, kobj);
-}
-
-static inline const struct device *__kobj_to_dev_const(const struct kobject *kobj)
-{
-	return container_of(kobj, const struct device, kobj);
-}
-
-/*
- * container_of() will happily take a const * and spit back a non-const * as it
- * is just doing pointer math.  But we want to be a bit more careful in the
- * driver code, so manually force any const * of a kobject to also be a const *
- * to a device.
- */
-#define kobj_to_dev(kobj)					\
-	_Generic((kobj),					\
-		 const struct kobject *: __kobj_to_dev_const,	\
-		 struct kobject *: __kobj_to_dev)(kobj)
+#define kobj_to_dev(__kobj)	container_of_const(__kobj, struct device, kobj)
 
 /**
  * device_iommu_mapped - Returns true when the device DMA is translated
-- 
GitLab


From c3da679286bee1d897bb24a804cca4ff58781bec Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 5 Dec 2022 13:12:05 +0100
Subject: [PATCH 3436/4122] usb.h: take advantage of container_of_const()

Instead of rolling our own const-checking logic in to_usb_interface()
and to_usb_device() use the newly added container_of_const() instead,
making the logic much simpler overall.

Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20221205121206.166576-3-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb.h | 42 ++----------------------------------------
 1 file changed, 2 insertions(+), 40 deletions(-)

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 4b463a5e4ba22..010c681b8822f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -259,26 +259,7 @@ struct usb_interface {
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 
-static inline struct usb_interface *__to_usb_interface(struct device *d)
-{
-	return container_of(d, struct usb_interface, dev);
-}
-
-static inline const struct usb_interface *__to_usb_interface_const(const struct device *d)
-{
-	return container_of(d, struct usb_interface, dev);
-}
-
-/*
- * container_of() will happily take a const * and spit back a non-const * as it
- * is just doing pointer math.  But we want to be a bit more careful in the USB
- * driver code, so manually force any const * of a device to also be a const *
- * to a usb_device.
- */
-#define to_usb_interface(dev)						\
-	_Generic((dev),							\
-		 const struct device *: __to_usb_interface_const,	\
-		 struct device *: __to_usb_interface)(dev)
+#define to_usb_interface(__dev)	container_of_const(__dev, struct usb_interface, dev)
 
 static inline void *usb_get_intfdata(struct usb_interface *intf)
 {
@@ -730,26 +711,7 @@ struct usb_device {
 	unsigned use_generic_driver:1;
 };
 
-static inline struct usb_device *__to_usb_device(struct device *d)
-{
-	return container_of(d, struct usb_device, dev);
-}
-
-static inline const struct usb_device *__to_usb_device_const(const struct device *d)
-{
-	return container_of(d, struct usb_device, dev);
-}
-
-/*
- * container_of() will happily take a const * and spit back a non-const * as it
- * is just doing pointer math.  But we want to be a bit more careful in the USB
- * driver code, so manually force any const * of a device to also be a const *
- * to a usb_device.
- */
-#define to_usb_device(dev)					\
-	_Generic((dev),						\
-		 const struct device *: __to_usb_device_const,	\
-		 struct device *: __to_usb_device)(dev)
+#define to_usb_device(__dev)	container_of_const(__dev, struct usb_device, dev)
 
 static inline struct usb_device *__intf_to_usbdev(struct usb_interface *intf)
 {
-- 
GitLab


From 47446b50ad2549af4fcc93f82a148ab107e6ef6a Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 5 Dec 2022 13:12:06 +0100
Subject: [PATCH 3437/4122] firmware_loader: fix up to_fw_sysfs() to preserve
 const

to_fw_sysfs() was changed in commit 23680f0b7d7f ("driver core: make
struct class.dev_uevent() take a const *") to pass in a const pointer
but not pass it back out to handle some changes in the driver core.
That isn't the best idea as it could cause problems if used incorrectly,
so switch to use the container_of_const() macro instead which will
preserve the const status of the pointer and enforce it by the compiler.

Fixes: 23680f0b7d7f ("driver core: make struct class.dev_uevent() take a const *")
Cc: Luis Chamberlain <mcgrof@kernel.org>
Reviewed-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Russ Weight <russell.h.weight@intel.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org>
Link: https://lore.kernel.org/r/20221205121206.166576-4-gregkh@linuxfoundation.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/firmware_loader/sysfs.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/base/firmware_loader/sysfs.h b/drivers/base/firmware_loader/sysfs.h
index fd0b4ad9bdbb6..2060add8ef81b 100644
--- a/drivers/base/firmware_loader/sysfs.h
+++ b/drivers/base/firmware_loader/sysfs.h
@@ -80,11 +80,7 @@ struct fw_sysfs {
 	struct firmware *fw;
 	void *fw_upload_priv;
 };
-
-static inline struct fw_sysfs *to_fw_sysfs(const struct device *dev)
-{
-	return container_of(dev, struct fw_sysfs, dev);
-}
+#define to_fw_sysfs(__dev)	container_of_const(__dev, struct fw_sysfs, dev)
 
 void __fw_load_abort(struct fw_priv *fw_priv);
 
-- 
GitLab


From c943a9374d12bebca2d0de7e273b1c723b58c122 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:25 +0200
Subject: [PATCH 3438/4122] net/mlx5: Introduce ifc bits for pre_copy

Introduce ifc related stuff to enable PRE_COPY of VF during migration.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Acked-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-2-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/mlx5/mlx5_ifc.h | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 5a4e914e2a6ff..230a96626a5f3 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1882,7 +1882,12 @@ struct mlx5_ifc_cmd_hca_cap_2_bits {
 	u8	   max_reformat_remove_size[0x8];
 	u8	   max_reformat_remove_offset[0x8];
 
-	u8	   reserved_at_c0[0xe0];
+	u8	   reserved_at_c0[0x8];
+	u8	   migration_multi_load[0x1];
+	u8	   migration_tracking_state[0x1];
+	u8	   reserved_at_ca[0x16];
+
+	u8	   reserved_at_e0[0xc0];
 
 	u8	   reserved_at_1a0[0xb];
 	u8	   log_min_mkey_entity_size[0x5];
@@ -11918,7 +11923,8 @@ struct mlx5_ifc_query_vhca_migration_state_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         incremental[0x1];
+	u8         reserved_at_41[0xf];
 	u8         vhca_id[0x10];
 
 	u8         reserved_at_60[0x20];
@@ -11944,7 +11950,9 @@ struct mlx5_ifc_save_vhca_state_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         reserved_at_40[0x10];
+	u8         incremental[0x1];
+	u8         set_track[0x1];
+	u8         reserved_at_42[0xe];
 	u8         vhca_id[0x10];
 
 	u8         reserved_at_60[0x20];
-- 
GitLab


From 4db52602a6074e9cc523500b8304600ff63e7b85 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:26 +0200
Subject: [PATCH 3439/4122] vfio: Extend the device migration protocol with
 PRE_COPY

The optional PRE_COPY states open the saving data transfer FD before
reaching STOP_COPY and allows the device to dirty track internal state
changes with the general idea to reduce the volume of data transferred
in the STOP_COPY stage.

While in PRE_COPY the device remains RUNNING, but the saving FD is open.

Only if the device also supports RUNNING_P2P can it support PRE_COPY_P2P,
which halts P2P transfers while continuing the saving FD.

PRE_COPY, with P2P support, requires the driver to implement 7 new arcs
and exists as an optional FSM branch between RUNNING and STOP_COPY:
    RUNNING -> PRE_COPY -> PRE_COPY_P2P -> STOP_COPY

A new ioctl VFIO_MIG_GET_PRECOPY_INFO is provided to allow userspace to
query the progress of the precopy operation in the driver with the idea it
will judge to move to STOP_COPY at least once the initial data set is
transferred, and possibly after the dirty size has shrunk appropriately.

This ioctl is valid only in PRE_COPY states and kernel driver should
return -EINVAL from any other migration state.

Compared to the v1 clarification, STOP_COPY -> PRE_COPY is blocked
and to be defined in future.
We also split the pending_bytes report into the initial and sustaining
values, e.g.: initial_bytes and dirty_bytes.
initial_bytes: Amount of initial precopy data.
dirty_bytes: Device state changes relative to data previously retrieved.
These fields are not required to have any bearing to STOP_COPY phase.

It is recommended to leave PRE_COPY for STOP_COPY only after the
initial_bytes field reaches zero. Leaving PRE_COPY earlier might make
things slower.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-3-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/vfio_main.c  |  74 ++++++++++++++++++++++-
 include/uapi/linux/vfio.h | 123 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 191 insertions(+), 6 deletions(-)

diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 7f88569c3ebaf..03dbcd3d96f0e 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -1042,7 +1042,7 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 			    enum vfio_device_mig_state new_fsm,
 			    enum vfio_device_mig_state *next_fsm)
 {
-	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_RUNNING_P2P + 1 };
+	enum { VFIO_DEVICE_NUM_STATES = VFIO_DEVICE_STATE_PRE_COPY_P2P + 1 };
 	/*
 	 * The coding in this table requires the driver to implement the
 	 * following FSM arcs:
@@ -1057,30 +1057,65 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 	 *         RUNNING_P2P -> RUNNING
 	 *         RUNNING_P2P -> STOP
 	 *         STOP -> RUNNING_P2P
-	 * Without P2P the driver must implement:
+	 *
+	 * If precopy is supported then the driver must support these additional
+	 * FSM arcs:
+	 *         RUNNING -> PRE_COPY
+	 *         PRE_COPY -> RUNNING
+	 *         PRE_COPY -> STOP_COPY
+	 * However, if precopy and P2P are supported together then the driver
+	 * must support these additional arcs beyond the P2P arcs above:
+	 *         PRE_COPY -> RUNNING
+	 *         PRE_COPY -> PRE_COPY_P2P
+	 *         PRE_COPY_P2P -> PRE_COPY
+	 *         PRE_COPY_P2P -> RUNNING_P2P
+	 *         PRE_COPY_P2P -> STOP_COPY
+	 *         RUNNING -> PRE_COPY
+	 *         RUNNING_P2P -> PRE_COPY_P2P
+	 *
+	 * Without P2P and precopy the driver must implement:
 	 *         RUNNING -> STOP
 	 *         STOP -> RUNNING
 	 *
 	 * The coding will step through multiple states for some combination
 	 * transitions; if all optional features are supported, this means the
 	 * following ones:
+	 *         PRE_COPY -> PRE_COPY_P2P -> STOP_COPY
+	 *         PRE_COPY -> RUNNING -> RUNNING_P2P
+	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP
+	 *         PRE_COPY -> RUNNING -> RUNNING_P2P -> STOP -> RESUMING
+	 *         PRE_COPY_P2P -> RUNNING_P2P -> RUNNING
+	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP
+	 *         PRE_COPY_P2P -> RUNNING_P2P -> STOP -> RESUMING
 	 *         RESUMING -> STOP -> RUNNING_P2P
+	 *         RESUMING -> STOP -> RUNNING_P2P -> PRE_COPY_P2P
 	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING
+	 *         RESUMING -> STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
 	 *         RESUMING -> STOP -> STOP_COPY
+	 *         RUNNING -> RUNNING_P2P -> PRE_COPY_P2P
 	 *         RUNNING -> RUNNING_P2P -> STOP
 	 *         RUNNING -> RUNNING_P2P -> STOP -> RESUMING
 	 *         RUNNING -> RUNNING_P2P -> STOP -> STOP_COPY
+	 *         RUNNING_P2P -> RUNNING -> PRE_COPY
 	 *         RUNNING_P2P -> STOP -> RESUMING
 	 *         RUNNING_P2P -> STOP -> STOP_COPY
+	 *         STOP -> RUNNING_P2P -> PRE_COPY_P2P
 	 *         STOP -> RUNNING_P2P -> RUNNING
+	 *         STOP -> RUNNING_P2P -> RUNNING -> PRE_COPY
 	 *         STOP_COPY -> STOP -> RESUMING
 	 *         STOP_COPY -> STOP -> RUNNING_P2P
 	 *         STOP_COPY -> STOP -> RUNNING_P2P -> RUNNING
+	 *
+	 *  The following transitions are blocked:
+	 *         STOP_COPY -> PRE_COPY
+	 *         STOP_COPY -> PRE_COPY_P2P
 	 */
 	static const u8 vfio_from_fsm_table[VFIO_DEVICE_NUM_STATES][VFIO_DEVICE_NUM_STATES] = {
 		[VFIO_DEVICE_STATE_STOP] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
@@ -1089,14 +1124,38 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 		[VFIO_DEVICE_STATE_RUNNING] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
 			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
 		},
+		[VFIO_DEVICE_STATE_PRE_COPY] = {
+			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+		},
+		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = {
+			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_PRE_COPY,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
+			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
+			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
+			[VFIO_DEVICE_STATE_ERROR] = VFIO_DEVICE_STATE_ERROR,
+		},
 		[VFIO_DEVICE_STATE_STOP_COPY] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP_COPY,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
@@ -1105,6 +1164,8 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 		[VFIO_DEVICE_STATE_RESUMING] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_STOP,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_STOP,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_RESUMING,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_STOP,
@@ -1113,6 +1174,8 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 		[VFIO_DEVICE_STATE_RUNNING_P2P] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_RUNNING,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_PRE_COPY_P2P,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_STOP,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_RUNNING_P2P,
@@ -1121,6 +1184,8 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 		[VFIO_DEVICE_STATE_ERROR] = {
 			[VFIO_DEVICE_STATE_STOP] = VFIO_DEVICE_STATE_ERROR,
 			[VFIO_DEVICE_STATE_RUNNING] = VFIO_DEVICE_STATE_ERROR,
+			[VFIO_DEVICE_STATE_PRE_COPY] = VFIO_DEVICE_STATE_ERROR,
+			[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_DEVICE_STATE_ERROR,
 			[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_DEVICE_STATE_ERROR,
 			[VFIO_DEVICE_STATE_RESUMING] = VFIO_DEVICE_STATE_ERROR,
 			[VFIO_DEVICE_STATE_RUNNING_P2P] = VFIO_DEVICE_STATE_ERROR,
@@ -1131,6 +1196,11 @@ int vfio_mig_get_next_state(struct vfio_device *device,
 	static const unsigned int state_flags_table[VFIO_DEVICE_NUM_STATES] = {
 		[VFIO_DEVICE_STATE_STOP] = VFIO_MIGRATION_STOP_COPY,
 		[VFIO_DEVICE_STATE_RUNNING] = VFIO_MIGRATION_STOP_COPY,
+		[VFIO_DEVICE_STATE_PRE_COPY] =
+			VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY,
+		[VFIO_DEVICE_STATE_PRE_COPY_P2P] = VFIO_MIGRATION_STOP_COPY |
+						   VFIO_MIGRATION_P2P |
+						   VFIO_MIGRATION_PRE_COPY,
 		[VFIO_DEVICE_STATE_STOP_COPY] = VFIO_MIGRATION_STOP_COPY,
 		[VFIO_DEVICE_STATE_RESUMING] = VFIO_MIGRATION_STOP_COPY,
 		[VFIO_DEVICE_STATE_RUNNING_P2P] =
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 3e45dbaf190ef..23105eb036fa6 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -819,12 +819,20 @@ struct vfio_device_feature {
  * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P means that RUNNING_P2P
  * is supported in addition to the STOP_COPY states.
  *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY means that
+ * PRE_COPY is supported in addition to the STOP_COPY states.
+ *
+ * VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY
+ * means that RUNNING_P2P, PRE_COPY and PRE_COPY_P2P are supported
+ * in addition to the STOP_COPY states.
+ *
  * Other combinations of flags have behavior to be defined in the future.
  */
 struct vfio_device_feature_migration {
 	__aligned_u64 flags;
 #define VFIO_MIGRATION_STOP_COPY	(1 << 0)
 #define VFIO_MIGRATION_P2P		(1 << 1)
+#define VFIO_MIGRATION_PRE_COPY		(1 << 2)
 };
 #define VFIO_DEVICE_FEATURE_MIGRATION 1
 
@@ -875,8 +883,13 @@ struct vfio_device_feature_mig_state {
  *  RESUMING - The device is stopped and is loading a new internal state
  *  ERROR - The device has failed and must be reset
  *
- * And 1 optional state to support VFIO_MIGRATION_P2P:
+ * And optional states to support VFIO_MIGRATION_P2P:
  *  RUNNING_P2P - RUNNING, except the device cannot do peer to peer DMA
+ * And VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY - The device is running normally but tracking internal state
+ *             changes
+ * And VFIO_MIGRATION_P2P | VFIO_MIGRATION_PRE_COPY:
+ *  PRE_COPY_P2P - PRE_COPY, except the device cannot do peer to peer DMA
  *
  * The FSM takes actions on the arcs between FSM states. The driver implements
  * the following behavior for the FSM arcs:
@@ -908,20 +921,48 @@ struct vfio_device_feature_mig_state {
  *
  *   To abort a RESUMING session the device must be reset.
  *
+ * PRE_COPY -> RUNNING
  * RUNNING_P2P -> RUNNING
  *   While in RUNNING the device is fully operational, the device may generate
  *   interrupts, DMA, respond to MMIO, all vfio device regions are functional,
  *   and the device may advance its internal state.
  *
+ *   The PRE_COPY arc will terminate a data transfer session.
+ *
+ * PRE_COPY_P2P -> RUNNING_P2P
  * RUNNING -> RUNNING_P2P
  * STOP -> RUNNING_P2P
  *   While in RUNNING_P2P the device is partially running in the P2P quiescent
  *   state defined below.
  *
+ *   The PRE_COPY_P2P arc will terminate a data transfer session.
+ *
+ * RUNNING -> PRE_COPY
+ * RUNNING_P2P -> PRE_COPY_P2P
  * STOP -> STOP_COPY
- *   This arc begin the process of saving the device state and will return a
- *   new data_fd.
+ *   PRE_COPY, PRE_COPY_P2P and STOP_COPY form the "saving group" of states
+ *   which share a data transfer session. Moving between these states alters
+ *   what is streamed in session, but does not terminate or otherwise affect
+ *   the associated fd.
+ *
+ *   These arcs begin the process of saving the device state and will return a
+ *   new data_fd. The migration driver may perform actions such as enabling
+ *   dirty logging of device state when entering PRE_COPY or PER_COPY_P2P.
  *
+ *   Each arc does not change the device operation, the device remains
+ *   RUNNING, P2P quiesced or in STOP. The STOP_COPY state is described below
+ *   in PRE_COPY_P2P -> STOP_COPY.
+ *
+ * PRE_COPY -> PRE_COPY_P2P
+ *   Entering PRE_COPY_P2P continues all the behaviors of PRE_COPY above.
+ *   However, while in the PRE_COPY_P2P state, the device is partially running
+ *   in the P2P quiescent state defined below, like RUNNING_P2P.
+ *
+ * PRE_COPY_P2P -> PRE_COPY
+ *   This arc allows returning the device to a full RUNNING behavior while
+ *   continuing all the behaviors of PRE_COPY.
+ *
+ * PRE_COPY_P2P -> STOP_COPY
  *   While in the STOP_COPY state the device has the same behavior as STOP
  *   with the addition that the data transfers session continues to stream the
  *   migration state. End of stream on the FD indicates the entire device
@@ -939,6 +980,13 @@ struct vfio_device_feature_mig_state {
  *   device state for this arc if required to prepare the device to receive the
  *   migration data.
  *
+ * STOP_COPY -> PRE_COPY
+ * STOP_COPY -> PRE_COPY_P2P
+ *   These arcs are not permitted and return error if requested. Future
+ *   revisions of this API may define behaviors for these arcs, in this case
+ *   support will be discoverable by a new flag in
+ *   VFIO_DEVICE_FEATURE_MIGRATION.
+ *
  * any -> ERROR
  *   ERROR cannot be specified as a device state, however any transition request
  *   can be failed with an errno return and may then move the device_state into
@@ -950,7 +998,7 @@ struct vfio_device_feature_mig_state {
  * The optional peer to peer (P2P) quiescent state is intended to be a quiescent
  * state for the device for the purposes of managing multiple devices within a
  * user context where peer-to-peer DMA between devices may be active. The
- * RUNNING_P2P states must prevent the device from initiating
+ * RUNNING_P2P and PRE_COPY_P2P states must prevent the device from initiating
  * any new P2P DMA transactions. If the device can identify P2P transactions
  * then it can stop only P2P DMA, otherwise it must stop all DMA. The migration
  * driver must complete any such outstanding operations prior to completing the
@@ -963,6 +1011,8 @@ struct vfio_device_feature_mig_state {
  * above FSM arcs. As there are multiple paths through the FSM arcs the path
  * should be selected based on the following rules:
  *   - Select the shortest path.
+ *   - The path cannot have saving group states as interior arcs, only
+ *     starting/end states.
  * Refer to vfio_mig_get_next_state() for the result of the algorithm.
  *
  * The automatic transit through the FSM arcs that make up the combination
@@ -976,6 +1026,9 @@ struct vfio_device_feature_mig_state {
  * support them. The user can discover if these states are supported by using
  * VFIO_DEVICE_FEATURE_MIGRATION. By using combination transitions the user can
  * avoid knowing about these optional states if the kernel driver supports them.
+ *
+ * Arcs touching PRE_COPY and PRE_COPY_P2P are removed if support for PRE_COPY
+ * is not present.
  */
 enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_ERROR = 0,
@@ -984,8 +1037,70 @@ enum vfio_device_mig_state {
 	VFIO_DEVICE_STATE_STOP_COPY = 3,
 	VFIO_DEVICE_STATE_RESUMING = 4,
 	VFIO_DEVICE_STATE_RUNNING_P2P = 5,
+	VFIO_DEVICE_STATE_PRE_COPY = 6,
+	VFIO_DEVICE_STATE_PRE_COPY_P2P = 7,
+};
+
+/**
+ * VFIO_MIG_GET_PRECOPY_INFO - _IO(VFIO_TYPE, VFIO_BASE + 21)
+ *
+ * This ioctl is used on the migration data FD in the precopy phase of the
+ * migration data transfer. It returns an estimate of the current data sizes
+ * remaining to be transferred. It allows the user to judge when it is
+ * appropriate to leave PRE_COPY for STOP_COPY.
+ *
+ * This ioctl is valid only in PRE_COPY states and kernel driver should
+ * return -EINVAL from any other migration state.
+ *
+ * The vfio_precopy_info data structure returned by this ioctl provides
+ * estimates of data available from the device during the PRE_COPY states.
+ * This estimate is split into two categories, initial_bytes and
+ * dirty_bytes.
+ *
+ * The initial_bytes field indicates the amount of initial precopy
+ * data available from the device. This field should have a non-zero initial
+ * value and decrease as migration data is read from the device.
+ * It is recommended to leave PRE_COPY for STOP_COPY only after this field
+ * reaches zero. Leaving PRE_COPY earlier might make things slower.
+ *
+ * The dirty_bytes field tracks device state changes relative to data
+ * previously retrieved.  This field starts at zero and may increase as
+ * the internal device state is modified or decrease as that modified
+ * state is read from the device.
+ *
+ * Userspace may use the combination of these fields to estimate the
+ * potential data size available during the PRE_COPY phases, as well as
+ * trends relative to the rate the device is dirtying its internal
+ * state, but these fields are not required to have any bearing relative
+ * to the data size available during the STOP_COPY phase.
+ *
+ * Drivers have a lot of flexibility in when and what they transfer during the
+ * PRE_COPY phase, and how they report this from VFIO_MIG_GET_PRECOPY_INFO.
+ *
+ * During pre-copy the migration data FD has a temporary "end of stream" that is
+ * reached when both initial_bytes and dirty_byte are zero. For instance, this
+ * may indicate that the device is idle and not currently dirtying any internal
+ * state. When read() is done on this temporary end of stream the kernel driver
+ * should return ENOMSG from read(). Userspace can wait for more data (which may
+ * never come) by using poll.
+ *
+ * Once in STOP_COPY the migration data FD has a permanent end of stream
+ * signaled in the usual way by read() always returning 0 and poll always
+ * returning readable. ENOMSG may not be returned in STOP_COPY.
+ * Support for this ioctl is mandatory if a driver claims to support
+ * VFIO_MIGRATION_PRE_COPY.
+ *
+ * Return: 0 on success, -1 and errno set on failure.
+ */
+struct vfio_precopy_info {
+	__u32 argsz;
+	__u32 flags;
+	__aligned_u64 initial_bytes;
+	__aligned_u64 dirty_bytes;
 };
 
+#define VFIO_MIG_GET_PRECOPY_INFO _IO(VFIO_TYPE, VFIO_BASE + 21)
+
 /*
  * Upon VFIO_DEVICE_FEATURE_SET, allow the device to be moved into a low power
  * state with the platform-based power management.  Device use of lower power
-- 
GitLab


From 0e7caa65d707b93fbb4322c6313f739fa9103dfa Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:27 +0200
Subject: [PATCH 3440/4122] vfio/mlx5: Enforce a single SAVE command at a time

Enforce a single SAVE command at a time.

As the SAVE command is an asynchronous one, we must enforce running only
a single command at a time.

This will preserve ordering between multiple calls and protect from
races on the migration file data structure.

This is a must for the next patches from the series where as part of
PRE_COPY we may have multiple images to be saved and multiple SAVE
commands may be issued from different flows.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-4-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 6 ++++++
 drivers/vfio/pci/mlx5/cmd.h  | 1 +
 drivers/vfio/pci/mlx5/main.c | 7 +++++++
 3 files changed, 14 insertions(+)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 0848bc905d3ea..55ee8036f59cd 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -281,6 +281,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
 	mlx5_core_dealloc_pd(mdev, async_data->pdn);
 	kvfree(async_data->out);
+	complete(&migf->save_comp);
 	fput(migf->filp);
 }
 
@@ -321,6 +322,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 		return -ENOTCONN;
 
 	mdev = mvdev->mdev;
+	err = wait_for_completion_interruptible(&migf->save_comp);
+	if (err)
+		return err;
+
 	err = mlx5_core_alloc_pd(mdev, &pdn);
 	if (err)
 		return err;
@@ -371,6 +376,7 @@ err_create_mkey:
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
 err_dma_map:
 	mlx5_core_dealloc_pd(mdev, pdn);
+	complete(&migf->save_comp);
 	return err;
 }
 
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 921d5720a1e57..8ffa7699872c4 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -37,6 +37,7 @@ struct mlx5_vf_migration_file {
 	unsigned long last_offset;
 	struct mlx5vf_pci_core_device *mvdev;
 	wait_queue_head_t poll_wait;
+	struct completion save_comp;
 	struct mlx5_async_ctx async_ctx;
 	struct mlx5vf_async_data async_data;
 };
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 6e9cf2aacc527..0d71ebb2a9727 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -245,6 +245,13 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	init_waitqueue_head(&migf->poll_wait);
+	init_completion(&migf->save_comp);
+	/*
+	 * save_comp is being used as a binary semaphore built from
+	 * a completion. A normal mutex cannot be used because the lock is
+	 * passed between kernel threads and lockdep can't model this.
+	 */
+	complete(&migf->save_comp);
 	mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
 	INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
-- 
GitLab


From 9945a67ea4b30657dd998c7fbbea1b3950747168 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:28 +0200
Subject: [PATCH 3441/4122] vfio/mlx5: Refactor PD usage

This patch refactors PD usage such as its life cycle will be as of the
migration file instead of allocating/destroying it upon each SAVE/LOAD
command.

This is a preparation step towards the PRE_COPY series where multiple
images will be SAVED/LOADED and a single PD can be simply reused.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-5-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 53 ++++++++++++++++++++++++------------
 drivers/vfio/pci/mlx5/cmd.h  |  5 +++-
 drivers/vfio/pci/mlx5/main.c | 44 ++++++++++++++++++++++--------
 3 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 55ee8036f59cd..a97eac49e3d68 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -279,7 +279,6 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 
 	mlx5_core_destroy_mkey(mdev, async_data->mkey);
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
-	mlx5_core_dealloc_pd(mdev, async_data->pdn);
 	kvfree(async_data->out);
 	complete(&migf->save_comp);
 	fput(migf->filp);
@@ -314,7 +313,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
 	struct mlx5vf_async_data *async_data;
 	struct mlx5_core_dev *mdev;
-	u32 pdn, mkey;
+	u32 mkey;
 	int err;
 
 	lockdep_assert_held(&mvdev->state_mutex);
@@ -326,16 +325,12 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	if (err)
 		return err;
 
-	err = mlx5_core_alloc_pd(mdev, &pdn);
-	if (err)
-		return err;
-
 	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
 			      0);
 	if (err)
 		goto err_dma_map;
 
-	err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
+	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
 	if (err)
 		goto err_create_mkey;
 
@@ -357,7 +352,6 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	migf->total_length = 0;
 	get_file(migf->filp);
 	async_data->mkey = mkey;
-	async_data->pdn = pdn;
 	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
 			       async_data->out,
 			       out_size, mlx5vf_save_callback,
@@ -375,7 +369,6 @@ err_out:
 err_create_mkey:
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
 err_dma_map:
-	mlx5_core_dealloc_pd(mdev, pdn);
 	complete(&migf->save_comp);
 	return err;
 }
@@ -386,7 +379,7 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	struct mlx5_core_dev *mdev;
 	u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {};
-	u32 pdn, mkey;
+	u32 mkey;
 	int err;
 
 	lockdep_assert_held(&mvdev->state_mutex);
@@ -400,15 +393,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	}
 
 	mdev = mvdev->mdev;
-	err = mlx5_core_alloc_pd(mdev, &pdn);
-	if (err)
-		goto end;
-
 	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
 	if (err)
-		goto err_reg;
+		goto end;
 
-	err = _create_mkey(mdev, pdn, migf, NULL, &mkey);
+	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
 	if (err)
 		goto err_mkey;
 
@@ -424,13 +413,41 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	mlx5_core_destroy_mkey(mdev, mkey);
 err_mkey:
 	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
-err_reg:
-	mlx5_core_dealloc_pd(mdev, pdn);
 end:
 	mutex_unlock(&migf->lock);
 	return err;
 }
 
+int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf)
+{
+	int err;
+
+	lockdep_assert_held(&migf->mvdev->state_mutex);
+	if (migf->mvdev->mdev_detach)
+		return -ENOTCONN;
+
+	err = mlx5_core_alloc_pd(migf->mvdev->mdev, &migf->pdn);
+	return err;
+}
+
+void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf)
+{
+	lockdep_assert_held(&migf->mvdev->state_mutex);
+	if (migf->mvdev->mdev_detach)
+		return;
+
+	mlx5_core_dealloc_pd(migf->mvdev->mdev, migf->pdn);
+}
+
+void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
+{
+	lockdep_assert_held(&migf->mvdev->state_mutex);
+
+	WARN_ON(migf->mvdev->mdev_detach);
+
+	mlx5vf_cmd_dealloc_pd(migf);
+}
+
 static void combine_ranges(struct rb_root_cached *root, u32 cur_nodes,
 			   u32 req_nodes)
 {
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 8ffa7699872c4..ba760f956d535 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -16,7 +16,6 @@ struct mlx5vf_async_data {
 	struct mlx5_async_work cb_work;
 	struct work_struct work;
 	int status;
-	u32 pdn;
 	u32 mkey;
 	void *out;
 };
@@ -27,6 +26,7 @@ struct mlx5_vf_migration_file {
 	u8 disabled:1;
 	u8 is_err:1;
 
+	u32 pdn;
 	struct sg_append_table table;
 	size_t total_length;
 	size_t allocated_length;
@@ -127,6 +127,9 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf);
 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf);
+int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
+void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
+void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 0d71ebb2a9727..1916f7c1468c1 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -236,12 +236,15 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_save_fops, migf,
 					O_RDONLY);
 	if (IS_ERR(migf->filp)) {
-		int err = PTR_ERR(migf->filp);
-
-		kfree(migf);
-		return ERR_PTR(err);
+		ret = PTR_ERR(migf->filp);
+		goto end;
 	}
 
+	migf->mvdev = mvdev;
+	ret = mlx5vf_cmd_alloc_pd(migf);
+	if (ret)
+		goto out_free;
+
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	init_waitqueue_head(&migf->poll_wait);
@@ -257,20 +260,25 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
 						    &migf->total_length);
 	if (ret)
-		goto out_free;
+		goto out_pd;
 
 	ret = mlx5vf_add_migration_pages(
 		migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
 	if (ret)
-		goto out_free;
+		goto out_pd;
 
-	migf->mvdev = mvdev;
 	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
 	if (ret)
-		goto out_free;
+		goto out_save;
 	return migf;
+out_save:
+	mlx5vf_disable_fd(migf);
+out_pd:
+	mlx5vf_cmd_dealloc_pd(migf);
 out_free:
 	fput(migf->filp);
+end:
+	kfree(migf);
 	return ERR_PTR(ret);
 }
 
@@ -352,6 +360,7 @@ static struct mlx5_vf_migration_file *
 mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 {
 	struct mlx5_vf_migration_file *migf;
+	int ret;
 
 	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
 	if (!migf)
@@ -360,20 +369,30 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	migf->filp = anon_inode_getfile("mlx5vf_mig", &mlx5vf_resume_fops, migf,
 					O_WRONLY);
 	if (IS_ERR(migf->filp)) {
-		int err = PTR_ERR(migf->filp);
-
-		kfree(migf);
-		return ERR_PTR(err);
+		ret = PTR_ERR(migf->filp);
+		goto end;
 	}
+
+	migf->mvdev = mvdev;
+	ret = mlx5vf_cmd_alloc_pd(migf);
+	if (ret)
+		goto out_free;
+
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	return migf;
+out_free:
+	fput(migf->filp);
+end:
+	kfree(migf);
+	return ERR_PTR(ret);
 }
 
 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
 {
 	if (mvdev->resuming_migf) {
 		mlx5vf_disable_fd(mvdev->resuming_migf);
+		mlx5fv_cmd_clean_migf_resources(mvdev->resuming_migf);
 		fput(mvdev->resuming_migf->filp);
 		mvdev->resuming_migf = NULL;
 	}
@@ -381,6 +400,7 @@ void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev)
 		mlx5_cmd_cleanup_async_ctx(&mvdev->saving_migf->async_ctx);
 		cancel_work_sync(&mvdev->saving_migf->async_data.work);
 		mlx5vf_disable_fd(mvdev->saving_migf);
+		mlx5fv_cmd_clean_migf_resources(mvdev->saving_migf);
 		fput(mvdev->saving_migf->filp);
 		mvdev->saving_migf = NULL;
 	}
-- 
GitLab


From 91454f8b9bf4ce6be1d9a0b4de401bc3c6313a95 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:29 +0200
Subject: [PATCH 3442/4122] vfio/mlx5: Refactor MKEY usage

This patch refactors MKEY usage such as its life cycle will be as of the
migration file instead of allocating/destroying it upon each
SAVE/LOAD command.

This is a preparation step towards the PRE_COPY series where multiple
images will be SAVED/LOADED.

We achieve it by having a new struct named mlx5_vhca_data_buffer which
holds the mkey and its related stuff as of sg_append_table,
allocated_length, etc.

The above fields were taken out from the migration file main struct,
into mlx5_vhca_data_buffer dedicated struct with the proper helpers in
place.

For now we have a single mlx5_vhca_data_buffer per migration file.
However, in coming patches we'll have multiple of them to support
multiple images.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-6-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 162 ++++++++++++++++++++++-------------
 drivers/vfio/pci/mlx5/cmd.h  |  37 +++++---
 drivers/vfio/pci/mlx5/main.c |  92 +++++++++++---------
 3 files changed, 178 insertions(+), 113 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index a97eac49e3d68..ed4c472d2eae5 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -210,11 +210,11 @@ err_exec:
 }
 
 static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
-			struct mlx5_vf_migration_file *migf,
+			struct mlx5_vhca_data_buffer *buf,
 			struct mlx5_vhca_recv_buf *recv_buf,
 			u32 *mkey)
 {
-	size_t npages = migf ? DIV_ROUND_UP(migf->total_length, PAGE_SIZE) :
+	size_t npages = buf ? DIV_ROUND_UP(buf->allocated_length, PAGE_SIZE) :
 				recv_buf->npages;
 	int err = 0, inlen;
 	__be64 *mtt;
@@ -232,10 +232,10 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 		 DIV_ROUND_UP(npages, 2));
 	mtt = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
 
-	if (migf) {
+	if (buf) {
 		struct sg_dma_page_iter dma_iter;
 
-		for_each_sgtable_dma_page(&migf->table.sgt, &dma_iter, 0)
+		for_each_sgtable_dma_page(&buf->table.sgt, &dma_iter, 0)
 			*mtt++ = cpu_to_be64(sg_page_iter_dma_address(&dma_iter));
 	} else {
 		int i;
@@ -255,20 +255,99 @@ static int _create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
 	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
 	MLX5_SET(mkc, mkc, translations_octword_size, DIV_ROUND_UP(npages, 2));
-	MLX5_SET64(mkc, mkc, len,
-		   migf ? migf->total_length : (npages * PAGE_SIZE));
+	MLX5_SET64(mkc, mkc, len, npages * PAGE_SIZE);
 	err = mlx5_core_create_mkey(mdev, mkey, in, inlen);
 	kvfree(in);
 	return err;
 }
 
+static int mlx5vf_dma_data_buffer(struct mlx5_vhca_data_buffer *buf)
+{
+	struct mlx5vf_pci_core_device *mvdev = buf->migf->mvdev;
+	struct mlx5_core_dev *mdev = mvdev->mdev;
+	int ret;
+
+	lockdep_assert_held(&mvdev->state_mutex);
+	if (mvdev->mdev_detach)
+		return -ENOTCONN;
+
+	if (buf->dmaed || !buf->allocated_length)
+		return -EINVAL;
+
+	ret = dma_map_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
+	if (ret)
+		return ret;
+
+	ret = _create_mkey(mdev, buf->migf->pdn, buf, NULL, &buf->mkey);
+	if (ret)
+		goto err;
+
+	buf->dmaed = true;
+
+	return 0;
+err:
+	dma_unmap_sgtable(mdev->device, &buf->table.sgt, buf->dma_dir, 0);
+	return ret;
+}
+
+void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf)
+{
+	struct mlx5_vf_migration_file *migf = buf->migf;
+	struct sg_page_iter sg_iter;
+
+	lockdep_assert_held(&migf->mvdev->state_mutex);
+	WARN_ON(migf->mvdev->mdev_detach);
+
+	if (buf->dmaed) {
+		mlx5_core_destroy_mkey(migf->mvdev->mdev, buf->mkey);
+		dma_unmap_sgtable(migf->mvdev->mdev->device, &buf->table.sgt,
+				  buf->dma_dir, 0);
+	}
+
+	/* Undo alloc_pages_bulk_array() */
+	for_each_sgtable_page(&buf->table.sgt, &sg_iter, 0)
+		__free_page(sg_page_iter_page(&sg_iter));
+	sg_free_append_table(&buf->table);
+	kfree(buf);
+}
+
+struct mlx5_vhca_data_buffer *
+mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
+			 size_t length,
+			 enum dma_data_direction dma_dir)
+{
+	struct mlx5_vhca_data_buffer *buf;
+	int ret;
+
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (!buf)
+		return ERR_PTR(-ENOMEM);
+
+	buf->dma_dir = dma_dir;
+	buf->migf = migf;
+	if (length) {
+		ret = mlx5vf_add_migration_pages(buf,
+				DIV_ROUND_UP_ULL(length, PAGE_SIZE));
+		if (ret)
+			goto end;
+
+		ret = mlx5vf_dma_data_buffer(buf);
+		if (ret)
+			goto end;
+	}
+
+	return buf;
+end:
+	mlx5vf_free_data_buffer(buf);
+	return ERR_PTR(ret);
+}
+
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 {
 	struct mlx5vf_async_data *async_data = container_of(_work,
 		struct mlx5vf_async_data, work);
 	struct mlx5_vf_migration_file *migf = container_of(async_data,
 		struct mlx5_vf_migration_file, async_data);
-	struct mlx5_core_dev *mdev = migf->mvdev->mdev;
 
 	mutex_lock(&migf->lock);
 	if (async_data->status) {
@@ -276,9 +355,6 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 		wake_up_interruptible(&migf->poll_wait);
 	}
 	mutex_unlock(&migf->lock);
-
-	mlx5_core_destroy_mkey(mdev, async_data->mkey);
-	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
 	kvfree(async_data->out);
 	complete(&migf->save_comp);
 	fput(migf->filp);
@@ -292,7 +368,7 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 			struct mlx5_vf_migration_file, async_data);
 
 	if (!status) {
-		WRITE_ONCE(migf->total_length,
+		WRITE_ONCE(migf->buf->length,
 			   MLX5_GET(save_vhca_state_out, async_data->out,
 				    actual_image_size));
 		wake_up_interruptible(&migf->poll_wait);
@@ -307,39 +383,28 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 }
 
 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
-			       struct mlx5_vf_migration_file *migf)
+			       struct mlx5_vf_migration_file *migf,
+			       struct mlx5_vhca_data_buffer *buf)
 {
 	u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
 	struct mlx5vf_async_data *async_data;
-	struct mlx5_core_dev *mdev;
-	u32 mkey;
 	int err;
 
 	lockdep_assert_held(&mvdev->state_mutex);
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
-	mdev = mvdev->mdev;
 	err = wait_for_completion_interruptible(&migf->save_comp);
 	if (err)
 		return err;
 
-	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE,
-			      0);
-	if (err)
-		goto err_dma_map;
-
-	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
-	if (err)
-		goto err_create_mkey;
-
 	MLX5_SET(save_vhca_state_in, in, opcode,
 		 MLX5_CMD_OP_SAVE_VHCA_STATE);
 	MLX5_SET(save_vhca_state_in, in, op_mod, 0);
 	MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
-	MLX5_SET(save_vhca_state_in, in, mkey, mkey);
-	MLX5_SET(save_vhca_state_in, in, size, migf->total_length);
+	MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
+	MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
 
 	async_data = &migf->async_data;
 	async_data->out = kvzalloc(out_size, GFP_KERNEL);
@@ -348,10 +413,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 		goto err_out;
 	}
 
-	/* no data exists till the callback comes back */
-	migf->total_length = 0;
 	get_file(migf->filp);
-	async_data->mkey = mkey;
 	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
 			       async_data->out,
 			       out_size, mlx5vf_save_callback,
@@ -365,57 +427,33 @@ err_exec:
 	fput(migf->filp);
 	kvfree(async_data->out);
 err_out:
-	mlx5_core_destroy_mkey(mdev, mkey);
-err_create_mkey:
-	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_FROM_DEVICE, 0);
-err_dma_map:
 	complete(&migf->save_comp);
 	return err;
 }
 
 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
-			       struct mlx5_vf_migration_file *migf)
+			       struct mlx5_vf_migration_file *migf,
+			       struct mlx5_vhca_data_buffer *buf)
 {
-	struct mlx5_core_dev *mdev;
 	u32 out[MLX5_ST_SZ_DW(load_vhca_state_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(load_vhca_state_in)] = {};
-	u32 mkey;
 	int err;
 
 	lockdep_assert_held(&mvdev->state_mutex);
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
-	mutex_lock(&migf->lock);
-	if (!migf->total_length) {
-		err = -EINVAL;
-		goto end;
-	}
-
-	mdev = mvdev->mdev;
-	err = dma_map_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
+	err = mlx5vf_dma_data_buffer(buf);
 	if (err)
-		goto end;
-
-	err = _create_mkey(mdev, migf->pdn, migf, NULL, &mkey);
-	if (err)
-		goto err_mkey;
+		return err;
 
 	MLX5_SET(load_vhca_state_in, in, opcode,
 		 MLX5_CMD_OP_LOAD_VHCA_STATE);
 	MLX5_SET(load_vhca_state_in, in, op_mod, 0);
 	MLX5_SET(load_vhca_state_in, in, vhca_id, mvdev->vhca_id);
-	MLX5_SET(load_vhca_state_in, in, mkey, mkey);
-	MLX5_SET(load_vhca_state_in, in, size, migf->total_length);
-
-	err = mlx5_cmd_exec_inout(mdev, load_vhca_state, in, out);
-
-	mlx5_core_destroy_mkey(mdev, mkey);
-err_mkey:
-	dma_unmap_sgtable(mdev->device, &migf->table.sgt, DMA_TO_DEVICE, 0);
-end:
-	mutex_unlock(&migf->lock);
-	return err;
+	MLX5_SET(load_vhca_state_in, in, mkey, buf->mkey);
+	MLX5_SET(load_vhca_state_in, in, size, buf->length);
+	return mlx5_cmd_exec_inout(mvdev->mdev, load_vhca_state, in, out);
 }
 
 int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf)
@@ -445,6 +483,10 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
 
 	WARN_ON(migf->mvdev->mdev_detach);
 
+	if (migf->buf) {
+		mlx5vf_free_data_buffer(migf->buf);
+		migf->buf = NULL;
+	}
 	mlx5vf_cmd_dealloc_pd(migf);
 }
 
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index ba760f956d535..b0f08dfc81207 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -12,11 +12,25 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
+struct mlx5_vhca_data_buffer {
+	struct sg_append_table table;
+	loff_t start_pos;
+	u64 length;
+	u64 allocated_length;
+	u32 mkey;
+	enum dma_data_direction dma_dir;
+	u8 dmaed:1;
+	struct mlx5_vf_migration_file *migf;
+	/* Optimize mlx5vf_get_migration_page() for sequential access */
+	struct scatterlist *last_offset_sg;
+	unsigned int sg_last_entry;
+	unsigned long last_offset;
+};
+
 struct mlx5vf_async_data {
 	struct mlx5_async_work cb_work;
 	struct work_struct work;
 	int status;
-	u32 mkey;
 	void *out;
 };
 
@@ -27,14 +41,7 @@ struct mlx5_vf_migration_file {
 	u8 is_err:1;
 
 	u32 pdn;
-	struct sg_append_table table;
-	size_t total_length;
-	size_t allocated_length;
-
-	/* Optimize mlx5vf_get_migration_page() for sequential access */
-	struct scatterlist *last_offset_sg;
-	unsigned int sg_last_entry;
-	unsigned long last_offset;
+	struct mlx5_vhca_data_buffer *buf;
 	struct mlx5vf_pci_core_device *mvdev;
 	wait_queue_head_t poll_wait;
 	struct completion save_comp;
@@ -124,12 +131,20 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
 void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev);
 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
-			       struct mlx5_vf_migration_file *migf);
+			       struct mlx5_vf_migration_file *migf,
+			       struct mlx5_vhca_data_buffer *buf);
 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
-			       struct mlx5_vf_migration_file *migf);
+			       struct mlx5_vf_migration_file *migf,
+			       struct mlx5_vhca_data_buffer *buf);
 int mlx5vf_cmd_alloc_pd(struct mlx5_vf_migration_file *migf);
 void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf);
 void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf);
+struct mlx5_vhca_data_buffer *
+mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
+			 size_t length, enum dma_data_direction dma_dir);
+void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
+int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+			       unsigned int npages);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 1916f7c1468c1..5f694fce854cf 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -33,7 +33,7 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
 }
 
 static struct page *
-mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
+mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
 			  unsigned long offset)
 {
 	unsigned long cur_offset = 0;
@@ -41,20 +41,20 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
 	unsigned int i;
 
 	/* All accesses are sequential */
-	if (offset < migf->last_offset || !migf->last_offset_sg) {
-		migf->last_offset = 0;
-		migf->last_offset_sg = migf->table.sgt.sgl;
-		migf->sg_last_entry = 0;
+	if (offset < buf->last_offset || !buf->last_offset_sg) {
+		buf->last_offset = 0;
+		buf->last_offset_sg = buf->table.sgt.sgl;
+		buf->sg_last_entry = 0;
 	}
 
-	cur_offset = migf->last_offset;
+	cur_offset = buf->last_offset;
 
-	for_each_sg(migf->last_offset_sg, sg,
-			migf->table.sgt.orig_nents - migf->sg_last_entry, i) {
+	for_each_sg(buf->last_offset_sg, sg,
+			buf->table.sgt.orig_nents - buf->sg_last_entry, i) {
 		if (offset < sg->length + cur_offset) {
-			migf->last_offset_sg = sg;
-			migf->sg_last_entry += i;
-			migf->last_offset = cur_offset;
+			buf->last_offset_sg = sg;
+			buf->sg_last_entry += i;
+			buf->last_offset = cur_offset;
 			return nth_page(sg_page(sg),
 					(offset - cur_offset) / PAGE_SIZE);
 		}
@@ -63,8 +63,8 @@ mlx5vf_get_migration_page(struct mlx5_vf_migration_file *migf,
 	return NULL;
 }
 
-static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
-				      unsigned int npages)
+int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
+			       unsigned int npages)
 {
 	unsigned int to_alloc = npages;
 	struct page **page_list;
@@ -85,13 +85,13 @@ static int mlx5vf_add_migration_pages(struct mlx5_vf_migration_file *migf,
 		}
 		to_alloc -= filled;
 		ret = sg_alloc_append_table_from_pages(
-			&migf->table, page_list, filled, 0,
+			&buf->table, page_list, filled, 0,
 			filled << PAGE_SHIFT, UINT_MAX, SG_MAX_SINGLE_ALLOC,
 			GFP_KERNEL);
 
 		if (ret)
 			goto err;
-		migf->allocated_length += filled * PAGE_SIZE;
+		buf->allocated_length += filled * PAGE_SIZE;
 		/* clean input for another bulk allocation */
 		memset(page_list, 0, filled * sizeof(*page_list));
 		to_fill = min_t(unsigned int, to_alloc,
@@ -108,16 +108,8 @@ err:
 
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
 {
-	struct sg_page_iter sg_iter;
-
 	mutex_lock(&migf->lock);
-	/* Undo alloc_pages_bulk_array() */
-	for_each_sgtable_page(&migf->table.sgt, &sg_iter, 0)
-		__free_page(sg_page_iter_page(&sg_iter));
-	sg_free_append_table(&migf->table);
 	migf->disabled = true;
-	migf->total_length = 0;
-	migf->allocated_length = 0;
 	migf->filp->f_pos = 0;
 	mutex_unlock(&migf->lock);
 }
@@ -136,6 +128,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 			       loff_t *pos)
 {
 	struct mlx5_vf_migration_file *migf = filp->private_data;
+	struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
 	ssize_t done = 0;
 
 	if (pos)
@@ -144,16 +137,16 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 
 	if (!(filp->f_flags & O_NONBLOCK)) {
 		if (wait_event_interruptible(migf->poll_wait,
-			     READ_ONCE(migf->total_length) || migf->is_err))
+			     READ_ONCE(vhca_buf->length) || migf->is_err))
 			return -ERESTARTSYS;
 	}
 
 	mutex_lock(&migf->lock);
-	if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(migf->total_length)) {
+	if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(vhca_buf->length)) {
 		done = -EAGAIN;
 		goto out_unlock;
 	}
-	if (*pos > migf->total_length) {
+	if (*pos > vhca_buf->length) {
 		done = -EINVAL;
 		goto out_unlock;
 	}
@@ -162,7 +155,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		goto out_unlock;
 	}
 
-	len = min_t(size_t, migf->total_length - *pos, len);
+	len = min_t(size_t, vhca_buf->length - *pos, len);
 	while (len) {
 		size_t page_offset;
 		struct page *page;
@@ -171,7 +164,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		int ret;
 
 		page_offset = (*pos) % PAGE_SIZE;
-		page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+		page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
 		if (!page) {
 			if (done == 0)
 				done = -EINVAL;
@@ -208,7 +201,7 @@ static __poll_t mlx5vf_save_poll(struct file *filp,
 	mutex_lock(&migf->lock);
 	if (migf->disabled || migf->is_err)
 		pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
-	else if (READ_ONCE(migf->total_length))
+	else if (READ_ONCE(migf->buf->length))
 		pollflags = EPOLLIN | EPOLLRDNORM;
 	mutex_unlock(&migf->lock);
 
@@ -227,6 +220,8 @@ static struct mlx5_vf_migration_file *
 mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 {
 	struct mlx5_vf_migration_file *migf;
+	struct mlx5_vhca_data_buffer *buf;
+	size_t length;
 	int ret;
 
 	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
@@ -257,22 +252,23 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	complete(&migf->save_comp);
 	mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
 	INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
-	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
-						    &migf->total_length);
+	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length);
 	if (ret)
 		goto out_pd;
 
-	ret = mlx5vf_add_migration_pages(
-		migf, DIV_ROUND_UP_ULL(migf->total_length, PAGE_SIZE));
-	if (ret)
+	buf = mlx5vf_alloc_data_buffer(migf, length, DMA_FROM_DEVICE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
 		goto out_pd;
+	}
 
-	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf);
+	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf);
 	if (ret)
 		goto out_save;
+	migf->buf = buf;
 	return migf;
 out_save:
-	mlx5vf_disable_fd(migf);
+	mlx5vf_free_data_buffer(buf);
 out_pd:
 	mlx5vf_cmd_dealloc_pd(migf);
 out_free:
@@ -286,6 +282,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct mlx5_vf_migration_file *migf = filp->private_data;
+	struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
 	loff_t requested_length;
 	ssize_t done = 0;
 
@@ -306,10 +303,10 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		goto out_unlock;
 	}
 
-	if (migf->allocated_length < requested_length) {
+	if (vhca_buf->allocated_length < requested_length) {
 		done = mlx5vf_add_migration_pages(
-			migf,
-			DIV_ROUND_UP(requested_length - migf->allocated_length,
+			vhca_buf,
+			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
 				     PAGE_SIZE));
 		if (done)
 			goto out_unlock;
@@ -323,7 +320,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		int ret;
 
 		page_offset = (*pos) % PAGE_SIZE;
-		page = mlx5vf_get_migration_page(migf, *pos - page_offset);
+		page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
 		if (!page) {
 			if (done == 0)
 				done = -EINVAL;
@@ -342,7 +339,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		len -= page_len;
 		done += page_len;
 		buf += page_len;
-		migf->total_length += page_len;
+		vhca_buf->length += page_len;
 	}
 out_unlock:
 	mutex_unlock(&migf->lock);
@@ -360,6 +357,7 @@ static struct mlx5_vf_migration_file *
 mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 {
 	struct mlx5_vf_migration_file *migf;
+	struct mlx5_vhca_data_buffer *buf;
 	int ret;
 
 	migf = kzalloc(sizeof(*migf), GFP_KERNEL);
@@ -378,9 +376,18 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	if (ret)
 		goto out_free;
 
+	buf = mlx5vf_alloc_data_buffer(migf, 0, DMA_TO_DEVICE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto out_pd;
+	}
+
+	migf->buf = buf;
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	return migf;
+out_pd:
+	mlx5vf_cmd_dealloc_pd(migf);
 out_free:
 	fput(migf->filp);
 end:
@@ -474,7 +481,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 
 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
 		ret = mlx5vf_cmd_load_vhca_state(mvdev,
-						 mvdev->resuming_migf);
+						 mvdev->resuming_migf,
+						 mvdev->resuming_migf->buf);
 		if (ret)
 			return ERR_PTR(ret);
 		mlx5vf_disable_fds(mvdev);
-- 
GitLab


From 8b599d143419669e57da3881d8293f17809688d7 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:30 +0200
Subject: [PATCH 3443/4122] vfio/mlx5: Refactor migration file state

Refactor migration file state to be an emum which is mutual exclusive.

As of that dropped the 'disabled' state as 'error' is the same from
functional point of view.

Next patches from the series will extend this enum for other relevant
states.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-7-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  2 +-
 drivers/vfio/pci/mlx5/cmd.h  |  7 +++++--
 drivers/vfio/pci/mlx5/main.c | 11 ++++++-----
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index ed4c472d2eae5..fcba12326185c 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -351,7 +351,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 
 	mutex_lock(&migf->lock);
 	if (async_data->status) {
-		migf->is_err = true;
+		migf->state = MLX5_MIGF_STATE_ERROR;
 		wake_up_interruptible(&migf->poll_wait);
 	}
 	mutex_unlock(&migf->lock);
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index b0f08dfc81207..14403e654e4e4 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -12,6 +12,10 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
+enum mlx5_vf_migf_state {
+	MLX5_MIGF_STATE_ERROR = 1,
+};
+
 struct mlx5_vhca_data_buffer {
 	struct sg_append_table table;
 	loff_t start_pos;
@@ -37,8 +41,7 @@ struct mlx5vf_async_data {
 struct mlx5_vf_migration_file {
 	struct file *filp;
 	struct mutex lock;
-	u8 disabled:1;
-	u8 is_err:1;
+	enum mlx5_vf_migf_state state;
 
 	u32 pdn;
 	struct mlx5_vhca_data_buffer *buf;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 5f694fce854cf..d95646c2f010d 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -109,7 +109,7 @@ err:
 static void mlx5vf_disable_fd(struct mlx5_vf_migration_file *migf)
 {
 	mutex_lock(&migf->lock);
-	migf->disabled = true;
+	migf->state = MLX5_MIGF_STATE_ERROR;
 	migf->filp->f_pos = 0;
 	mutex_unlock(&migf->lock);
 }
@@ -137,7 +137,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 
 	if (!(filp->f_flags & O_NONBLOCK)) {
 		if (wait_event_interruptible(migf->poll_wait,
-			     READ_ONCE(vhca_buf->length) || migf->is_err))
+			     READ_ONCE(vhca_buf->length) ||
+			     migf->state == MLX5_MIGF_STATE_ERROR))
 			return -ERESTARTSYS;
 	}
 
@@ -150,7 +151,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		done = -EINVAL;
 		goto out_unlock;
 	}
-	if (migf->disabled || migf->is_err) {
+	if (migf->state == MLX5_MIGF_STATE_ERROR) {
 		done = -ENODEV;
 		goto out_unlock;
 	}
@@ -199,7 +200,7 @@ static __poll_t mlx5vf_save_poll(struct file *filp,
 	poll_wait(filp, &migf->poll_wait, wait);
 
 	mutex_lock(&migf->lock);
-	if (migf->disabled || migf->is_err)
+	if (migf->state == MLX5_MIGF_STATE_ERROR)
 		pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
 	else if (READ_ONCE(migf->buf->length))
 		pollflags = EPOLLIN | EPOLLRDNORM;
@@ -298,7 +299,7 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 		return -ENOMEM;
 
 	mutex_lock(&migf->lock);
-	if (migf->disabled) {
+	if (migf->state == MLX5_MIGF_STATE_ERROR) {
 		done = -ENODEV;
 		goto out_unlock;
 	}
-- 
GitLab


From c668878381b5702f867ec7f43ee3b74259c6ea03 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:31 +0200
Subject: [PATCH 3444/4122] vfio/mlx5: Refactor to use queue based data chunks

Refactor to use queue based data chunks on the migration file.

The SAVE command adds a chunk to the tail of the queue while the read()
API finds the required chunk and returns its data.

In case the queue is empty but the state of the migration file is
MLX5_MIGF_STATE_COMPLETE, read() may not be blocked but will return 0 to
indicate end of file.

This is a step towards maintaining multiple images and their meta data
(i.e. headers) on the migration file as part of next patches from the
series.

Note:
At that point, we still use a single chunk on the migration file but
becomes ready to support multiple.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-8-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  24 +++++-
 drivers/vfio/pci/mlx5/cmd.h  |   5 ++
 drivers/vfio/pci/mlx5/main.c | 145 +++++++++++++++++++++++++++--------
 3 files changed, 136 insertions(+), 38 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index fcba12326185c..0e36b4c8c8169 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -351,6 +351,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 
 	mutex_lock(&migf->lock);
 	if (async_data->status) {
+		migf->buf = async_data->buf;
 		migf->state = MLX5_MIGF_STATE_ERROR;
 		wake_up_interruptible(&migf->poll_wait);
 	}
@@ -368,9 +369,15 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 			struct mlx5_vf_migration_file, async_data);
 
 	if (!status) {
-		WRITE_ONCE(migf->buf->length,
-			   MLX5_GET(save_vhca_state_out, async_data->out,
-				    actual_image_size));
+		unsigned long flags;
+
+		async_data->buf->length =
+			MLX5_GET(save_vhca_state_out, async_data->out,
+				 actual_image_size);
+		spin_lock_irqsave(&migf->list_lock, flags);
+		list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
+		spin_unlock_irqrestore(&migf->list_lock, flags);
+		migf->state = MLX5_MIGF_STATE_COMPLETE;
 		wake_up_interruptible(&migf->poll_wait);
 	}
 
@@ -407,6 +414,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
 
 	async_data = &migf->async_data;
+	async_data->buf = buf;
 	async_data->out = kvzalloc(out_size, GFP_KERNEL);
 	if (!async_data->out) {
 		err = -ENOMEM;
@@ -479,14 +487,22 @@ void mlx5vf_cmd_dealloc_pd(struct mlx5_vf_migration_file *migf)
 
 void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
 {
-	lockdep_assert_held(&migf->mvdev->state_mutex);
+	struct mlx5_vhca_data_buffer *entry;
 
+	lockdep_assert_held(&migf->mvdev->state_mutex);
 	WARN_ON(migf->mvdev->mdev_detach);
 
 	if (migf->buf) {
 		mlx5vf_free_data_buffer(migf->buf);
 		migf->buf = NULL;
 	}
+
+	while ((entry = list_first_entry_or_null(&migf->buf_list,
+				struct mlx5_vhca_data_buffer, buf_elm))) {
+		list_del(&entry->buf_elm);
+		mlx5vf_free_data_buffer(entry);
+	}
+
 	mlx5vf_cmd_dealloc_pd(migf);
 }
 
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 14403e654e4e4..6e594689566e4 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -14,6 +14,7 @@
 
 enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_ERROR = 1,
+	MLX5_MIGF_STATE_COMPLETE,
 };
 
 struct mlx5_vhca_data_buffer {
@@ -24,6 +25,7 @@ struct mlx5_vhca_data_buffer {
 	u32 mkey;
 	enum dma_data_direction dma_dir;
 	u8 dmaed:1;
+	struct list_head buf_elm;
 	struct mlx5_vf_migration_file *migf;
 	/* Optimize mlx5vf_get_migration_page() for sequential access */
 	struct scatterlist *last_offset_sg;
@@ -34,6 +36,7 @@ struct mlx5_vhca_data_buffer {
 struct mlx5vf_async_data {
 	struct mlx5_async_work cb_work;
 	struct work_struct work;
+	struct mlx5_vhca_data_buffer *buf;
 	int status;
 	void *out;
 };
@@ -45,6 +48,8 @@ struct mlx5_vf_migration_file {
 
 	u32 pdn;
 	struct mlx5_vhca_data_buffer *buf;
+	spinlock_t list_lock;
+	struct list_head buf_list;
 	struct mlx5vf_pci_core_device *mvdev;
 	wait_queue_head_t poll_wait;
 	struct completion save_comp;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index d95646c2f010d..ca16425811c4b 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -124,11 +124,90 @@ static int mlx5vf_release_file(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+static struct mlx5_vhca_data_buffer *
+mlx5vf_get_data_buff_from_pos(struct mlx5_vf_migration_file *migf, loff_t pos,
+			      bool *end_of_data)
+{
+	struct mlx5_vhca_data_buffer *buf;
+	bool found = false;
+
+	*end_of_data = false;
+	spin_lock_irq(&migf->list_lock);
+	if (list_empty(&migf->buf_list)) {
+		*end_of_data = true;
+		goto end;
+	}
+
+	buf = list_first_entry(&migf->buf_list, struct mlx5_vhca_data_buffer,
+			       buf_elm);
+	if (pos >= buf->start_pos &&
+	    pos < buf->start_pos + buf->length) {
+		found = true;
+		goto end;
+	}
+
+	/*
+	 * As we use a stream based FD we may expect having the data always
+	 * on first chunk
+	 */
+	migf->state = MLX5_MIGF_STATE_ERROR;
+
+end:
+	spin_unlock_irq(&migf->list_lock);
+	return found ? buf : NULL;
+}
+
+static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf,
+			       char __user **buf, size_t *len, loff_t *pos)
+{
+	unsigned long offset;
+	ssize_t done = 0;
+	size_t copy_len;
+
+	copy_len = min_t(size_t,
+			 vhca_buf->start_pos + vhca_buf->length - *pos, *len);
+	while (copy_len) {
+		size_t page_offset;
+		struct page *page;
+		size_t page_len;
+		u8 *from_buff;
+		int ret;
+
+		offset = *pos - vhca_buf->start_pos;
+		page_offset = offset % PAGE_SIZE;
+		offset -= page_offset;
+		page = mlx5vf_get_migration_page(vhca_buf, offset);
+		if (!page)
+			return -EINVAL;
+		page_len = min_t(size_t, copy_len, PAGE_SIZE - page_offset);
+		from_buff = kmap_local_page(page);
+		ret = copy_to_user(*buf, from_buff + page_offset, page_len);
+		kunmap_local(from_buff);
+		if (ret)
+			return -EFAULT;
+		*pos += page_len;
+		*len -= page_len;
+		*buf += page_len;
+		done += page_len;
+		copy_len -= page_len;
+	}
+
+	if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
+		spin_lock_irq(&vhca_buf->migf->list_lock);
+		list_del_init(&vhca_buf->buf_elm);
+		spin_unlock_irq(&vhca_buf->migf->list_lock);
+	}
+
+	return done;
+}
+
 static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 			       loff_t *pos)
 {
 	struct mlx5_vf_migration_file *migf = filp->private_data;
-	struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
+	struct mlx5_vhca_data_buffer *vhca_buf;
+	bool first_loop_call = true;
+	bool end_of_data;
 	ssize_t done = 0;
 
 	if (pos)
@@ -137,53 +216,47 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 
 	if (!(filp->f_flags & O_NONBLOCK)) {
 		if (wait_event_interruptible(migf->poll_wait,
-			     READ_ONCE(vhca_buf->length) ||
-			     migf->state == MLX5_MIGF_STATE_ERROR))
+				!list_empty(&migf->buf_list) ||
+				migf->state == MLX5_MIGF_STATE_ERROR ||
+				migf->state == MLX5_MIGF_STATE_COMPLETE))
 			return -ERESTARTSYS;
 	}
 
 	mutex_lock(&migf->lock);
-	if ((filp->f_flags & O_NONBLOCK) && !READ_ONCE(vhca_buf->length)) {
-		done = -EAGAIN;
-		goto out_unlock;
-	}
-	if (*pos > vhca_buf->length) {
-		done = -EINVAL;
-		goto out_unlock;
-	}
 	if (migf->state == MLX5_MIGF_STATE_ERROR) {
 		done = -ENODEV;
 		goto out_unlock;
 	}
 
-	len = min_t(size_t, vhca_buf->length - *pos, len);
 	while (len) {
-		size_t page_offset;
-		struct page *page;
-		size_t page_len;
-		u8 *from_buff;
-		int ret;
+		ssize_t count;
+
+		vhca_buf = mlx5vf_get_data_buff_from_pos(migf, *pos,
+							 &end_of_data);
+		if (first_loop_call) {
+			first_loop_call = false;
+			if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) {
+				if (filp->f_flags & O_NONBLOCK) {
+					done = -EAGAIN;
+					goto out_unlock;
+				}
+			}
+		}
 
-		page_offset = (*pos) % PAGE_SIZE;
-		page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
-		if (!page) {
-			if (done == 0)
-				done = -EINVAL;
+		if (end_of_data)
+			goto out_unlock;
+
+		if (!vhca_buf) {
+			done = -EINVAL;
 			goto out_unlock;
 		}
 
-		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
-		from_buff = kmap_local_page(page);
-		ret = copy_to_user(buf, from_buff + page_offset, page_len);
-		kunmap_local(from_buff);
-		if (ret) {
-			done = -EFAULT;
+		count = mlx5vf_buf_read(vhca_buf, &buf, &len, pos);
+		if (count < 0) {
+			done = count;
 			goto out_unlock;
 		}
-		*pos += page_len;
-		len -= page_len;
-		done += page_len;
-		buf += page_len;
+		done += count;
 	}
 
 out_unlock:
@@ -202,7 +275,8 @@ static __poll_t mlx5vf_save_poll(struct file *filp,
 	mutex_lock(&migf->lock);
 	if (migf->state == MLX5_MIGF_STATE_ERROR)
 		pollflags = EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
-	else if (READ_ONCE(migf->buf->length))
+	else if (!list_empty(&migf->buf_list) ||
+		 migf->state == MLX5_MIGF_STATE_COMPLETE)
 		pollflags = EPOLLIN | EPOLLRDNORM;
 	mutex_unlock(&migf->lock);
 
@@ -253,6 +327,8 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	complete(&migf->save_comp);
 	mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
 	INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
+	INIT_LIST_HEAD(&migf->buf_list);
+	spin_lock_init(&migf->list_lock);
 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length);
 	if (ret)
 		goto out_pd;
@@ -266,7 +342,6 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf);
 	if (ret)
 		goto out_save;
-	migf->buf = buf;
 	return migf;
 out_save:
 	mlx5vf_free_data_buffer(buf);
@@ -386,6 +461,8 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	migf->buf = buf;
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
+	INIT_LIST_HEAD(&migf->buf_list);
+	spin_lock_init(&migf->list_lock);
 	return migf;
 out_pd:
 	mlx5vf_cmd_dealloc_pd(migf);
-- 
GitLab


From 3319d287f4c04b9deece8ea00e27a70bbe32941b Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:32 +0200
Subject: [PATCH 3445/4122] vfio/mlx5: Introduce device transitions of PRE_COPY

In order to support PRE_COPY, mlx5 driver is transferring multiple
states (images) of the device. e.g.: the source VF can save and transfer
multiple states, and the target VF will load them by that order.

The device is saving three kinds of states:
1) Initial state - when the device moves to PRE_COPY state.
2) Middle state - during PRE_COPY phase via VFIO_MIG_GET_PRECOPY_INFO.
   There can be multiple states of this type.
3) Final state - when the device moves to STOP_COPY state.

After moving to PRE_COPY state, user is holding the saving migf FD and
can use it. For example: user can start transferring data via read()
callback. Also, user can switch from PRE_COPY to STOP_COPY whenever he
sees it fits. This will invoke saving of final state.

This means that mlx5 VFIO device can be switched to STOP_COPY without
transferring any data in PRE_COPY state. Therefore, when the device
moves to STOP_COPY, mlx5 will store the final state on a dedicated queue
entry on the list.

Co-developed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-9-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 96 +++++++++++++++++++++++++++++++++---
 drivers/vfio/pci/mlx5/cmd.h  | 16 +++++-
 drivers/vfio/pci/mlx5/main.c | 90 ++++++++++++++++++++++++++++++---
 3 files changed, 184 insertions(+), 18 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 0e36b4c8c8169..5fcece201d4c0 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -14,18 +14,36 @@ _mlx5vf_free_page_tracker_resources(struct mlx5vf_pci_core_device *mvdev);
 
 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
 {
+	struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
 	u32 out[MLX5_ST_SZ_DW(suspend_vhca_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(suspend_vhca_in)] = {};
+	int err;
 
 	lockdep_assert_held(&mvdev->state_mutex);
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
+	/*
+	 * In case PRE_COPY is used, saving_migf is exposed while the device is
+	 * running. Make sure to run only once there is no active save command.
+	 * Running both in parallel, might end-up with a failure in the save
+	 * command once it will try to turn on 'tracking' on a suspended device.
+	 */
+	if (migf) {
+		err = wait_for_completion_interruptible(&migf->save_comp);
+		if (err)
+			return err;
+	}
+
 	MLX5_SET(suspend_vhca_in, in, opcode, MLX5_CMD_OP_SUSPEND_VHCA);
 	MLX5_SET(suspend_vhca_in, in, vhca_id, mvdev->vhca_id);
 	MLX5_SET(suspend_vhca_in, in, op_mod, op_mod);
 
-	return mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
+	err = mlx5_cmd_exec_inout(mvdev->mdev, suspend_vhca, in, out);
+	if (migf)
+		complete(&migf->save_comp);
+
+	return err;
 }
 
 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
@@ -45,7 +63,7 @@ int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod)
 }
 
 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
-					  size_t *state_size)
+					  size_t *state_size, u8 query_flags)
 {
 	u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
@@ -59,6 +77,8 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 		 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
 	MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
 	MLX5_SET(query_vhca_migration_state_in, in, op_mod, 0);
+	MLX5_SET(query_vhca_migration_state_in, in, incremental,
+		 query_flags & MLX5VF_QUERY_INC);
 
 	ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
 				  out);
@@ -342,6 +362,56 @@ end:
 	return ERR_PTR(ret);
 }
 
+void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf)
+{
+	spin_lock_irq(&buf->migf->list_lock);
+	list_add_tail(&buf->buf_elm, &buf->migf->avail_list);
+	spin_unlock_irq(&buf->migf->list_lock);
+}
+
+struct mlx5_vhca_data_buffer *
+mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
+		       size_t length, enum dma_data_direction dma_dir)
+{
+	struct mlx5_vhca_data_buffer *buf, *temp_buf;
+	struct list_head free_list;
+
+	lockdep_assert_held(&migf->mvdev->state_mutex);
+	if (migf->mvdev->mdev_detach)
+		return ERR_PTR(-ENOTCONN);
+
+	INIT_LIST_HEAD(&free_list);
+
+	spin_lock_irq(&migf->list_lock);
+	list_for_each_entry_safe(buf, temp_buf, &migf->avail_list, buf_elm) {
+		if (buf->dma_dir == dma_dir) {
+			list_del_init(&buf->buf_elm);
+			if (buf->allocated_length >= length) {
+				spin_unlock_irq(&migf->list_lock);
+				goto found;
+			}
+			/*
+			 * Prevent holding redundant buffers. Put in a free
+			 * list and call at the end not under the spin lock
+			 * (&migf->list_lock) to mlx5vf_free_data_buffer which
+			 * might sleep.
+			 */
+			list_add(&buf->buf_elm, &free_list);
+		}
+	}
+	spin_unlock_irq(&migf->list_lock);
+	buf = mlx5vf_alloc_data_buffer(migf, length, dma_dir);
+
+found:
+	while ((temp_buf = list_first_entry_or_null(&free_list,
+				struct mlx5_vhca_data_buffer, buf_elm))) {
+		list_del(&temp_buf->buf_elm);
+		mlx5vf_free_data_buffer(temp_buf);
+	}
+
+	return buf;
+}
+
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 {
 	struct mlx5vf_async_data *async_data = container_of(_work,
@@ -351,7 +421,7 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 
 	mutex_lock(&migf->lock);
 	if (async_data->status) {
-		migf->buf = async_data->buf;
+		mlx5vf_put_data_buffer(async_data->buf);
 		migf->state = MLX5_MIGF_STATE_ERROR;
 		wake_up_interruptible(&migf->poll_wait);
 	}
@@ -369,15 +439,19 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 			struct mlx5_vf_migration_file, async_data);
 
 	if (!status) {
+		size_t image_size;
 		unsigned long flags;
 
-		async_data->buf->length =
-			MLX5_GET(save_vhca_state_out, async_data->out,
-				 actual_image_size);
+		image_size = MLX5_GET(save_vhca_state_out, async_data->out,
+				      actual_image_size);
+		async_data->buf->length = image_size;
+		async_data->buf->start_pos = migf->max_pos;
+		migf->max_pos += async_data->buf->length;
 		spin_lock_irqsave(&migf->list_lock, flags);
 		list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
 		spin_unlock_irqrestore(&migf->list_lock, flags);
-		migf->state = MLX5_MIGF_STATE_COMPLETE;
+		if (async_data->last_chunk)
+			migf->state = MLX5_MIGF_STATE_COMPLETE;
 		wake_up_interruptible(&migf->poll_wait);
 	}
 
@@ -391,7 +465,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 
 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf,
-			       struct mlx5_vhca_data_buffer *buf)
+			       struct mlx5_vhca_data_buffer *buf, bool inc,
+			       bool track)
 {
 	u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
@@ -412,9 +487,12 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	MLX5_SET(save_vhca_state_in, in, vhca_id, mvdev->vhca_id);
 	MLX5_SET(save_vhca_state_in, in, mkey, buf->mkey);
 	MLX5_SET(save_vhca_state_in, in, size, buf->allocated_length);
+	MLX5_SET(save_vhca_state_in, in, incremental, inc);
+	MLX5_SET(save_vhca_state_in, in, set_track, track);
 
 	async_data = &migf->async_data;
 	async_data->buf = buf;
+	async_data->last_chunk = !track;
 	async_data->out = kvzalloc(out_size, GFP_KERNEL);
 	if (!async_data->out) {
 		err = -ENOMEM;
@@ -497,6 +575,8 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
 		migf->buf = NULL;
 	}
 
+	list_splice(&migf->avail_list, &migf->buf_list);
+
 	while ((entry = list_first_entry_or_null(&migf->buf_list,
 				struct mlx5_vhca_data_buffer, buf_elm))) {
 		list_del(&entry->buf_elm);
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 6e594689566e4..34e61c7aa23d1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -38,6 +38,7 @@ struct mlx5vf_async_data {
 	struct work_struct work;
 	struct mlx5_vhca_data_buffer *buf;
 	int status;
+	u8 last_chunk:1;
 	void *out;
 };
 
@@ -47,9 +48,11 @@ struct mlx5_vf_migration_file {
 	enum mlx5_vf_migf_state state;
 
 	u32 pdn;
+	loff_t max_pos;
 	struct mlx5_vhca_data_buffer *buf;
 	spinlock_t list_lock;
 	struct list_head buf_list;
+	struct list_head avail_list;
 	struct mlx5vf_pci_core_device *mvdev;
 	wait_queue_head_t poll_wait;
 	struct completion save_comp;
@@ -129,10 +132,14 @@ struct mlx5vf_pci_core_device {
 	struct mlx5_core_dev *mdev;
 };
 
+enum {
+	MLX5VF_QUERY_INC = (1UL << 0),
+};
+
 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
 int mlx5vf_cmd_resume_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
 int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
-					  size_t *state_size);
+					  size_t *state_size, u8 query_flags);
 void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
 			       const struct vfio_migration_ops *mig_ops,
 			       const struct vfio_log_ops *log_ops);
@@ -140,7 +147,8 @@ void mlx5vf_cmd_remove_migratable(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_cmd_close_migratable(struct mlx5vf_pci_core_device *mvdev);
 int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf,
-			       struct mlx5_vhca_data_buffer *buf);
+			       struct mlx5_vhca_data_buffer *buf, bool inc,
+			       bool track);
 int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 			       struct mlx5_vf_migration_file *migf,
 			       struct mlx5_vhca_data_buffer *buf);
@@ -151,6 +159,10 @@ struct mlx5_vhca_data_buffer *
 mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
 			 size_t length, enum dma_data_direction dma_dir);
 void mlx5vf_free_data_buffer(struct mlx5_vhca_data_buffer *buf);
+struct mlx5_vhca_data_buffer *
+mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
+		       size_t length, enum dma_data_direction dma_dir);
+void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
 int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
 			       unsigned int npages);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index ca16425811c4b..9cabba4560447 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -195,6 +195,7 @@ static ssize_t mlx5vf_buf_read(struct mlx5_vhca_data_buffer *vhca_buf,
 	if (*pos >= vhca_buf->start_pos + vhca_buf->length) {
 		spin_lock_irq(&vhca_buf->migf->list_lock);
 		list_del_init(&vhca_buf->buf_elm);
+		list_add_tail(&vhca_buf->buf_elm, &vhca_buf->migf->avail_list);
 		spin_unlock_irq(&vhca_buf->migf->list_lock);
 	}
 
@@ -283,6 +284,16 @@ static __poll_t mlx5vf_save_poll(struct file *filp,
 	return pollflags;
 }
 
+/*
+ * FD is exposed and user can use it after receiving an error.
+ * Mark migf in error, and wake the user.
+ */
+static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
+{
+	migf->state = MLX5_MIGF_STATE_ERROR;
+	wake_up_interruptible(&migf->poll_wait);
+}
+
 static const struct file_operations mlx5vf_save_fops = {
 	.owner = THIS_MODULE,
 	.read = mlx5vf_save_read,
@@ -291,8 +302,42 @@ static const struct file_operations mlx5vf_save_fops = {
 	.llseek = no_llseek,
 };
 
+static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
+{
+	struct mlx5_vf_migration_file *migf = mvdev->saving_migf;
+	struct mlx5_vhca_data_buffer *buf;
+	size_t length;
+	int ret;
+
+	if (migf->state == MLX5_MIGF_STATE_ERROR)
+		return -ENODEV;
+
+	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
+						    MLX5VF_QUERY_INC);
+	if (ret)
+		goto err;
+
+	buf = mlx5vf_get_data_buffer(migf, length, DMA_FROM_DEVICE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		goto err;
+	}
+
+	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, false);
+	if (ret)
+		goto err_save;
+
+	return 0;
+
+err_save:
+	mlx5vf_put_data_buffer(buf);
+err:
+	mlx5vf_mark_err(migf);
+	return ret;
+}
+
 static struct mlx5_vf_migration_file *
-mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
+mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev, bool track)
 {
 	struct mlx5_vf_migration_file *migf;
 	struct mlx5_vhca_data_buffer *buf;
@@ -328,8 +373,9 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 	mlx5_cmd_init_async_ctx(mvdev->mdev, &migf->async_ctx);
 	INIT_WORK(&migf->async_data.work, mlx5vf_mig_file_cleanup_cb);
 	INIT_LIST_HEAD(&migf->buf_list);
+	INIT_LIST_HEAD(&migf->avail_list);
 	spin_lock_init(&migf->list_lock);
-	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length);
+	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length, 0);
 	if (ret)
 		goto out_pd;
 
@@ -339,7 +385,7 @@ mlx5vf_pci_save_device_data(struct mlx5vf_pci_core_device *mvdev)
 		goto out_pd;
 	}
 
-	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf);
+	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, false, track);
 	if (ret)
 		goto out_save;
 	return migf;
@@ -462,6 +508,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	INIT_LIST_HEAD(&migf->buf_list);
+	INIT_LIST_HEAD(&migf->avail_list);
 	spin_lock_init(&migf->list_lock);
 	return migf;
 out_pd:
@@ -514,7 +561,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 		return NULL;
 	}
 
-	if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) {
+	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_RUNNING_P2P) ||
+	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
 		ret = mlx5vf_cmd_suspend_vhca(mvdev,
 			MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_INITIATOR);
 		if (ret)
@@ -522,7 +570,8 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 		return NULL;
 	}
 
-	if (cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) {
+	if ((cur == VFIO_DEVICE_STATE_RUNNING_P2P && new == VFIO_DEVICE_STATE_RUNNING) ||
+	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_PRE_COPY)) {
 		ret = mlx5vf_cmd_resume_vhca(mvdev,
 			MLX5_RESUME_VHCA_IN_OP_MOD_RESUME_INITIATOR);
 		if (ret)
@@ -533,7 +582,7 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
 		struct mlx5_vf_migration_file *migf;
 
-		migf = mlx5vf_pci_save_device_data(mvdev);
+		migf = mlx5vf_pci_save_device_data(mvdev, false);
 		if (IS_ERR(migf))
 			return ERR_CAST(migf);
 		get_file(migf->filp);
@@ -541,7 +590,10 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 		return migf->filp;
 	}
 
-	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP)) {
+	if ((cur == VFIO_DEVICE_STATE_STOP_COPY && new == VFIO_DEVICE_STATE_STOP) ||
+	    (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) ||
+	    (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P &&
+	     new == VFIO_DEVICE_STATE_RUNNING_P2P)) {
 		mlx5vf_disable_fds(mvdev);
 		return NULL;
 	}
@@ -567,6 +619,28 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 		return NULL;
 	}
 
+	if ((cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) ||
+	    (cur == VFIO_DEVICE_STATE_RUNNING_P2P &&
+	     new == VFIO_DEVICE_STATE_PRE_COPY_P2P)) {
+		struct mlx5_vf_migration_file *migf;
+
+		migf = mlx5vf_pci_save_device_data(mvdev, true);
+		if (IS_ERR(migf))
+			return ERR_CAST(migf);
+		get_file(migf->filp);
+		mvdev->saving_migf = migf;
+		return migf->filp;
+	}
+
+	if (cur == VFIO_DEVICE_STATE_PRE_COPY_P2P && new == VFIO_DEVICE_STATE_STOP_COPY) {
+		ret = mlx5vf_cmd_suspend_vhca(mvdev,
+			MLX5_SUSPEND_VHCA_IN_OP_MOD_SUSPEND_RESPONDER);
+		if (ret)
+			return ERR_PTR(ret);
+		ret = mlx5vf_pci_save_device_inc_data(mvdev);
+		return ret ? ERR_PTR(ret) : NULL;
+	}
+
 	/*
 	 * vfio_mig_get_next_state() does not use arcs other than the above
 	 */
@@ -635,7 +709,7 @@ static int mlx5vf_pci_get_data_size(struct vfio_device *vdev,
 
 	mutex_lock(&mvdev->state_mutex);
 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev,
-						    &state_size);
+						    &state_size, 0);
 	if (!ret)
 		*stop_copy_length = state_size;
 	mlx5vf_state_mutex_unlock(mvdev);
-- 
GitLab


From 0c9a38fee8b210a8dfd3f177526daac567ec9265 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:33 +0200
Subject: [PATCH 3446/4122] vfio/mlx5: Introduce SW headers for migration
 states

As mentioned in the previous patches, mlx5 is transferring multiple
states when the PRE_COPY protocol is used. This states mechanism
requires the target VM to know the states' size in order to execute
multiple loads.  Therefore, add SW header, with the needed information,
for each saved state the source VM is transferring to the target VM.

This patch implements the source VM handling of the headers, following
patch will implement the target VM handling of the headers.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-10-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 56 ++++++++++++++++++++++++++++++++++--
 drivers/vfio/pci/mlx5/cmd.h  | 13 +++++++++
 drivers/vfio/pci/mlx5/main.c |  2 +-
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 5fcece201d4c0..160fa38fc78dd 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -351,9 +351,11 @@ mlx5vf_alloc_data_buffer(struct mlx5_vf_migration_file *migf,
 		if (ret)
 			goto end;
 
-		ret = mlx5vf_dma_data_buffer(buf);
-		if (ret)
-			goto end;
+		if (dma_dir != DMA_NONE) {
+			ret = mlx5vf_dma_data_buffer(buf);
+			if (ret)
+				goto end;
+		}
 	}
 
 	return buf;
@@ -422,6 +424,8 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 	mutex_lock(&migf->lock);
 	if (async_data->status) {
 		mlx5vf_put_data_buffer(async_data->buf);
+		if (async_data->header_buf)
+			mlx5vf_put_data_buffer(async_data->header_buf);
 		migf->state = MLX5_MIGF_STATE_ERROR;
 		wake_up_interruptible(&migf->poll_wait);
 	}
@@ -431,6 +435,32 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 	fput(migf->filp);
 }
 
+static int add_buf_header(struct mlx5_vhca_data_buffer *header_buf,
+			  size_t image_size)
+{
+	struct mlx5_vf_migration_file *migf = header_buf->migf;
+	struct mlx5_vf_migration_header header = {};
+	unsigned long flags;
+	struct page *page;
+	u8 *to_buff;
+
+	header.image_size = cpu_to_le64(image_size);
+	page = mlx5vf_get_migration_page(header_buf, 0);
+	if (!page)
+		return -EINVAL;
+	to_buff = kmap_local_page(page);
+	memcpy(to_buff, &header, sizeof(header));
+	kunmap_local(to_buff);
+	header_buf->length = sizeof(header);
+	header_buf->header_image_size = image_size;
+	header_buf->start_pos = header_buf->migf->max_pos;
+	migf->max_pos += header_buf->length;
+	spin_lock_irqsave(&migf->list_lock, flags);
+	list_add_tail(&header_buf->buf_elm, &migf->buf_list);
+	spin_unlock_irqrestore(&migf->list_lock, flags);
+	return 0;
+}
+
 static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 {
 	struct mlx5vf_async_data *async_data = container_of(context,
@@ -444,6 +474,11 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 
 		image_size = MLX5_GET(save_vhca_state_out, async_data->out,
 				      actual_image_size);
+		if (async_data->header_buf) {
+			status = add_buf_header(async_data->header_buf, image_size);
+			if (status)
+				goto err;
+		}
 		async_data->buf->length = image_size;
 		async_data->buf->start_pos = migf->max_pos;
 		migf->max_pos += async_data->buf->length;
@@ -455,6 +490,7 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 		wake_up_interruptible(&migf->poll_wait);
 	}
 
+err:
 	/*
 	 * The error and the cleanup flows can't run from an
 	 * interrupt context
@@ -470,6 +506,7 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 {
 	u32 out_size = MLX5_ST_SZ_BYTES(save_vhca_state_out);
 	u32 in[MLX5_ST_SZ_DW(save_vhca_state_in)] = {};
+	struct mlx5_vhca_data_buffer *header_buf = NULL;
 	struct mlx5vf_async_data *async_data;
 	int err;
 
@@ -499,6 +536,16 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 		goto err_out;
 	}
 
+	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
+		header_buf = mlx5vf_get_data_buffer(migf,
+			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+		if (IS_ERR(header_buf)) {
+			err = PTR_ERR(header_buf);
+			goto err_free;
+		}
+	}
+
+	async_data->header_buf = header_buf;
 	get_file(migf->filp);
 	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
 			       async_data->out,
@@ -510,7 +557,10 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	return 0;
 
 err_exec:
+	if (header_buf)
+		mlx5vf_put_data_buffer(header_buf);
 	fput(migf->filp);
+err_free:
 	kvfree(async_data->out);
 err_out:
 	complete(&migf->save_comp);
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 34e61c7aa23d1..3e36ccca820ad 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -12,16 +12,26 @@
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/qp.h>
 
+#define MLX5VF_PRE_COPY_SUPP(mvdev) \
+	((mvdev)->core_device.vdev.migration_flags & VFIO_MIGRATION_PRE_COPY)
+
 enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_ERROR = 1,
 	MLX5_MIGF_STATE_COMPLETE,
 };
 
+struct mlx5_vf_migration_header {
+	__le64 image_size;
+	/* For future use in case we may need to change the kernel protocol */
+	__le64 flags;
+};
+
 struct mlx5_vhca_data_buffer {
 	struct sg_append_table table;
 	loff_t start_pos;
 	u64 length;
 	u64 allocated_length;
+	u64 header_image_size;
 	u32 mkey;
 	enum dma_data_direction dma_dir;
 	u8 dmaed:1;
@@ -37,6 +47,7 @@ struct mlx5vf_async_data {
 	struct mlx5_async_work cb_work;
 	struct work_struct work;
 	struct mlx5_vhca_data_buffer *buf;
+	struct mlx5_vhca_data_buffer *header_buf;
 	int status;
 	u8 last_chunk:1;
 	void *out;
@@ -165,6 +176,8 @@ mlx5vf_get_data_buffer(struct mlx5_vf_migration_file *migf,
 void mlx5vf_put_data_buffer(struct mlx5_vhca_data_buffer *buf);
 int mlx5vf_add_migration_pages(struct mlx5_vhca_data_buffer *buf,
 			       unsigned int npages);
+struct page *mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
+				       unsigned long offset);
 void mlx5vf_state_mutex_unlock(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_disable_fds(struct mlx5vf_pci_core_device *mvdev);
 void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 9cabba4560447..9a36e36ec33b7 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -32,7 +32,7 @@ static struct mlx5vf_pci_core_device *mlx5vf_drvdata(struct pci_dev *pdev)
 			    core_device);
 }
 
-static struct page *
+struct page *
 mlx5vf_get_migration_page(struct mlx5_vhca_data_buffer *buf,
 			  unsigned long offset)
 {
-- 
GitLab


From 0dce165b1adf8d7f67030bb257e00107db8022de Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:34 +0200
Subject: [PATCH 3447/4122] vfio/mlx5: Introduce vfio precopy ioctl
 implementation

vfio precopy ioctl returns an estimation of data available for
transferring from the device.

Whenever a user is using VFIO_MIG_GET_PRECOPY_INFO, track the current
state of the device, and if needed, append the dirty data to the
transfer FD data. This is done by saving a middle state.

As mlx5 runs the SAVE command asynchronously, make sure to query for
incremental data only once there is no active save command.
Running both in parallel, might end-up with a failure in the incremental
query command on un-tracked vhca.

Also, a middle state will be saved only after the previous state has
finished its SAVE command and has been fully transferred, this prevents
endless use resources.

Co-developed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-11-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  16 +++++
 drivers/vfio/pci/mlx5/main.c | 111 +++++++++++++++++++++++++++++++++++
 2 files changed, 127 insertions(+)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 160fa38fc78dd..12e74ecebe641 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -67,12 +67,25 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 {
 	u32 out[MLX5_ST_SZ_DW(query_vhca_migration_state_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(query_vhca_migration_state_in)] = {};
+	bool inc = query_flags & MLX5VF_QUERY_INC;
 	int ret;
 
 	lockdep_assert_held(&mvdev->state_mutex);
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
+	/*
+	 * In case PRE_COPY is used, saving_migf is exposed while device is
+	 * running. Make sure to run only once there is no active save command.
+	 * Running both in parallel, might end-up with a failure in the
+	 * incremental query command on un-tracked vhca.
+	 */
+	if (inc) {
+		ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
+		if (ret)
+			return ret;
+	}
+
 	MLX5_SET(query_vhca_migration_state_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VHCA_MIGRATION_STATE);
 	MLX5_SET(query_vhca_migration_state_in, in, vhca_id, mvdev->vhca_id);
@@ -82,6 +95,9 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 
 	ret = mlx5_cmd_exec_inout(mvdev->mdev, query_vhca_migration_state, in,
 				  out);
+	if (inc)
+		complete(&mvdev->saving_migf->save_comp);
+
 	if (ret)
 		return ret;
 
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 9a36e36ec33b7..2c8ac763057cd 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -294,10 +294,121 @@ static void mlx5vf_mark_err(struct mlx5_vf_migration_file *migf)
 	wake_up_interruptible(&migf->poll_wait);
 }
 
+static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
+				 unsigned long arg)
+{
+	struct mlx5_vf_migration_file *migf = filp->private_data;
+	struct mlx5vf_pci_core_device *mvdev = migf->mvdev;
+	struct mlx5_vhca_data_buffer *buf;
+	struct vfio_precopy_info info = {};
+	loff_t *pos = &filp->f_pos;
+	unsigned long minsz;
+	size_t inc_length = 0;
+	bool end_of_data;
+	int ret;
+
+	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+		return -ENOTTY;
+
+	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+	if (copy_from_user(&info, (void __user *)arg, minsz))
+		return -EFAULT;
+
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	mutex_lock(&mvdev->state_mutex);
+	if (mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY &&
+	    mvdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY_P2P) {
+		ret = -EINVAL;
+		goto err_state_unlock;
+	}
+
+	/*
+	 * We can't issue a SAVE command when the device is suspended, so as
+	 * part of VFIO_DEVICE_STATE_PRE_COPY_P2P no reason to query for extra
+	 * bytes that can't be read.
+	 */
+	if (mvdev->mig_state == VFIO_DEVICE_STATE_PRE_COPY) {
+		/*
+		 * Once the query returns it's guaranteed that there is no
+		 * active SAVE command.
+		 * As so, the other code below is safe with the proper locks.
+		 */
+		ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &inc_length,
+							    MLX5VF_QUERY_INC);
+		if (ret)
+			goto err_state_unlock;
+	}
+
+	mutex_lock(&migf->lock);
+	if (migf->state == MLX5_MIGF_STATE_ERROR) {
+		ret = -ENODEV;
+		goto err_migf_unlock;
+	}
+
+	buf = mlx5vf_get_data_buff_from_pos(migf, *pos, &end_of_data);
+	if (buf) {
+		if (buf->start_pos == 0) {
+			info.initial_bytes = buf->header_image_size - *pos;
+		} else if (buf->start_pos ==
+				sizeof(struct mlx5_vf_migration_header)) {
+			/* First data buffer following the header */
+			info.initial_bytes = buf->start_pos +
+						buf->length - *pos;
+		} else {
+			info.dirty_bytes = buf->start_pos + buf->length - *pos;
+		}
+	} else {
+		if (!end_of_data) {
+			ret = -EINVAL;
+			goto err_migf_unlock;
+		}
+
+		info.dirty_bytes = inc_length;
+	}
+
+	if (!end_of_data || !inc_length) {
+		mutex_unlock(&migf->lock);
+		goto done;
+	}
+
+	mutex_unlock(&migf->lock);
+	/*
+	 * We finished transferring the current state and the device has a
+	 * dirty state, save a new state to be ready for.
+	 */
+	buf = mlx5vf_get_data_buffer(migf, inc_length, DMA_FROM_DEVICE);
+	if (IS_ERR(buf)) {
+		ret = PTR_ERR(buf);
+		mlx5vf_mark_err(migf);
+		goto err_state_unlock;
+	}
+
+	ret = mlx5vf_cmd_save_vhca_state(mvdev, migf, buf, true, true);
+	if (ret) {
+		mlx5vf_mark_err(migf);
+		mlx5vf_put_data_buffer(buf);
+		goto err_state_unlock;
+	}
+
+done:
+	mlx5vf_state_mutex_unlock(mvdev);
+	return copy_to_user((void __user *)arg, &info, minsz);
+err_migf_unlock:
+	mutex_unlock(&migf->lock);
+err_state_unlock:
+	mlx5vf_state_mutex_unlock(mvdev);
+	return ret;
+}
+
 static const struct file_operations mlx5vf_save_fops = {
 	.owner = THIS_MODULE,
 	.read = mlx5vf_save_read,
 	.poll = mlx5vf_save_poll,
+	.unlocked_ioctl = mlx5vf_precopy_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.release = mlx5vf_release_file,
 	.llseek = no_llseek,
 };
-- 
GitLab


From 81156c27271c4a6c594e492c8d119fbacfc99f36 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:35 +0200
Subject: [PATCH 3448/4122] vfio/mlx5: Consider temporary end of stream as part
 of PRE_COPY

During PRE_COPY the migration data FD may have a temporary "end of
stream" that is reached when the initial_bytes were read and no other
dirty data exists yet.

For instance, this may indicate that the device is idle and not
currently dirtying any internal state. When read() is done on this
temporary end of stream the kernel driver should return ENOMSG from
read(). Userspace can wait for more data or consider moving to
STOP_COPY.

To not block the user upon read() and let it get ENOMSG we add a new
state named MLX5_MIGF_STATE_PRE_COPY on the migration file.

In addition, we add the MLX5_MIGF_STATE_SAVE_LAST state to block the
read() once we call the last SAVE upon moving to STOP_COPY.

Any further error will be marked with MLX5_MIGF_STATE_ERROR and the user
won't be blocked.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-12-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 7 +++++--
 drivers/vfio/pci/mlx5/cmd.h  | 2 ++
 drivers/vfio/pci/mlx5/main.c | 7 +++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 12e74ecebe641..f6293da033cca 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -501,8 +501,8 @@ static void mlx5vf_save_callback(int status, struct mlx5_async_work *context)
 		spin_lock_irqsave(&migf->list_lock, flags);
 		list_add_tail(&async_data->buf->buf_elm, &migf->buf_list);
 		spin_unlock_irqrestore(&migf->list_lock, flags);
-		if (async_data->last_chunk)
-			migf->state = MLX5_MIGF_STATE_COMPLETE;
+		migf->state = async_data->last_chunk ?
+			MLX5_MIGF_STATE_COMPLETE : MLX5_MIGF_STATE_PRE_COPY;
 		wake_up_interruptible(&migf->poll_wait);
 	}
 
@@ -561,6 +561,9 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 		}
 	}
 
+	if (async_data->last_chunk)
+		migf->state = MLX5_MIGF_STATE_SAVE_LAST;
+
 	async_data->header_buf = header_buf;
 	get_file(migf->filp);
 	err = mlx5_cmd_exec_cb(&migf->async_ctx, in, sizeof(in),
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 3e36ccca820ad..d048f23977dd1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -17,6 +17,8 @@
 
 enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_ERROR = 1,
+	MLX5_MIGF_STATE_PRE_COPY,
+	MLX5_MIGF_STATE_SAVE_LAST,
 	MLX5_MIGF_STATE_COMPLETE,
 };
 
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 2c8ac763057cd..44b1543c751c9 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -219,6 +219,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		if (wait_event_interruptible(migf->poll_wait,
 				!list_empty(&migf->buf_list) ||
 				migf->state == MLX5_MIGF_STATE_ERROR ||
+				migf->state == MLX5_MIGF_STATE_PRE_COPY ||
 				migf->state == MLX5_MIGF_STATE_COMPLETE))
 			return -ERESTARTSYS;
 	}
@@ -236,6 +237,12 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 							 &end_of_data);
 		if (first_loop_call) {
 			first_loop_call = false;
+			/* Temporary end of file as part of PRE_COPY */
+			if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) {
+				done = -ENOMSG;
+				goto out_unlock;
+			}
+
 			if (end_of_data && migf->state != MLX5_MIGF_STATE_COMPLETE) {
 				if (filp->f_flags & O_NONBLOCK) {
 					done = -EAGAIN;
-- 
GitLab


From 34e2f27143d1b373f088e805f7e11cdf778f791d Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:36 +0200
Subject: [PATCH 3449/4122] vfio/mlx5: Introduce multiple loads

In order to support PRE_COPY, mlx5 driver transfers multiple states
(images) of the device. e.g.: the source VF can save and transfer
multiple states, and the target VF will load them by that order.

This patch implements the changes for the target VF to decompose the
header for each state and to write and load multiple states.

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-13-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  |  13 +-
 drivers/vfio/pci/mlx5/cmd.h  |  10 ++
 drivers/vfio/pci/mlx5/main.c | 279 +++++++++++++++++++++++++++++------
 3 files changed, 257 insertions(+), 45 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index f6293da033cca..993749818d90f 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -598,9 +598,11 @@ int mlx5vf_cmd_load_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	if (mvdev->mdev_detach)
 		return -ENOTCONN;
 
-	err = mlx5vf_dma_data_buffer(buf);
-	if (err)
-		return err;
+	if (!buf->dmaed) {
+		err = mlx5vf_dma_data_buffer(buf);
+		if (err)
+			return err;
+	}
 
 	MLX5_SET(load_vhca_state_in, in, opcode,
 		 MLX5_CMD_OP_LOAD_VHCA_STATE);
@@ -644,6 +646,11 @@ void mlx5fv_cmd_clean_migf_resources(struct mlx5_vf_migration_file *migf)
 		migf->buf = NULL;
 	}
 
+	if (migf->buf_header) {
+		mlx5vf_free_data_buffer(migf->buf_header);
+		migf->buf_header = NULL;
+	}
+
 	list_splice(&migf->avail_list, &migf->buf_list);
 
 	while ((entry = list_first_entry_or_null(&migf->buf_list,
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index d048f23977dd1..7729eac8c78c1 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -22,6 +22,14 @@ enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_COMPLETE,
 };
 
+enum mlx5_vf_load_state {
+	MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER,
+	MLX5_VF_LOAD_STATE_READ_HEADER,
+	MLX5_VF_LOAD_STATE_PREP_IMAGE,
+	MLX5_VF_LOAD_STATE_READ_IMAGE,
+	MLX5_VF_LOAD_STATE_LOAD_IMAGE,
+};
+
 struct mlx5_vf_migration_header {
 	__le64 image_size;
 	/* For future use in case we may need to change the kernel protocol */
@@ -60,9 +68,11 @@ struct mlx5_vf_migration_file {
 	struct mutex lock;
 	enum mlx5_vf_migf_state state;
 
+	enum mlx5_vf_load_state load_state;
 	u32 pdn;
 	loff_t max_pos;
 	struct mlx5_vhca_data_buffer *buf;
+	struct mlx5_vhca_data_buffer *buf_header;
 	spinlock_t list_lock;
 	struct list_head buf_list;
 	struct list_head avail_list;
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 44b1543c751c9..5a669b73994a7 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -518,13 +518,162 @@ end:
 	return ERR_PTR(ret);
 }
 
+static int
+mlx5vf_append_page_to_mig_buf(struct mlx5_vhca_data_buffer *vhca_buf,
+			      const char __user **buf, size_t *len,
+			      loff_t *pos, ssize_t *done)
+{
+	unsigned long offset;
+	size_t page_offset;
+	struct page *page;
+	size_t page_len;
+	u8 *to_buff;
+	int ret;
+
+	offset = *pos - vhca_buf->start_pos;
+	page_offset = offset % PAGE_SIZE;
+
+	page = mlx5vf_get_migration_page(vhca_buf, offset - page_offset);
+	if (!page)
+		return -EINVAL;
+	page_len = min_t(size_t, *len, PAGE_SIZE - page_offset);
+	to_buff = kmap_local_page(page);
+	ret = copy_from_user(to_buff + page_offset, *buf, page_len);
+	kunmap_local(to_buff);
+	if (ret)
+		return -EFAULT;
+
+	*pos += page_len;
+	*done += page_len;
+	*buf += page_len;
+	*len -= page_len;
+	vhca_buf->length += page_len;
+	return 0;
+}
+
+static int
+mlx5vf_resume_read_image_no_header(struct mlx5_vhca_data_buffer *vhca_buf,
+				   loff_t requested_length,
+				   const char __user **buf, size_t *len,
+				   loff_t *pos, ssize_t *done)
+{
+	int ret;
+
+	if (requested_length > MAX_MIGRATION_SIZE)
+		return -ENOMEM;
+
+	if (vhca_buf->allocated_length < requested_length) {
+		ret = mlx5vf_add_migration_pages(
+			vhca_buf,
+			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
+				     PAGE_SIZE));
+		if (ret)
+			return ret;
+	}
+
+	while (*len) {
+		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, len, pos,
+						    done);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static ssize_t
+mlx5vf_resume_read_image(struct mlx5_vf_migration_file *migf,
+			 struct mlx5_vhca_data_buffer *vhca_buf,
+			 size_t image_size, const char __user **buf,
+			 size_t *len, loff_t *pos, ssize_t *done,
+			 bool *has_work)
+{
+	size_t copy_len, to_copy;
+	int ret;
+
+	to_copy = min_t(size_t, *len, image_size - vhca_buf->length);
+	copy_len = to_copy;
+	while (to_copy) {
+		ret = mlx5vf_append_page_to_mig_buf(vhca_buf, buf, &to_copy, pos,
+						    done);
+		if (ret)
+			return ret;
+	}
+
+	*len -= copy_len;
+	if (vhca_buf->length == image_size) {
+		migf->load_state = MLX5_VF_LOAD_STATE_LOAD_IMAGE;
+		migf->max_pos += image_size;
+		*has_work = true;
+	}
+
+	return 0;
+}
+
+static int
+mlx5vf_resume_read_header(struct mlx5_vf_migration_file *migf,
+			  struct mlx5_vhca_data_buffer *vhca_buf,
+			  const char __user **buf,
+			  size_t *len, loff_t *pos,
+			  ssize_t *done, bool *has_work)
+{
+	struct page *page;
+	size_t copy_len;
+	u8 *to_buff;
+	int ret;
+
+	copy_len = min_t(size_t, *len,
+		sizeof(struct mlx5_vf_migration_header) - vhca_buf->length);
+	page = mlx5vf_get_migration_page(vhca_buf, 0);
+	if (!page)
+		return -EINVAL;
+	to_buff = kmap_local_page(page);
+	ret = copy_from_user(to_buff + vhca_buf->length, *buf, copy_len);
+	if (ret) {
+		ret = -EFAULT;
+		goto end;
+	}
+
+	*buf += copy_len;
+	*pos += copy_len;
+	*done += copy_len;
+	*len -= copy_len;
+	vhca_buf->length += copy_len;
+	if (vhca_buf->length == sizeof(struct mlx5_vf_migration_header)) {
+		u64 flags;
+
+		vhca_buf->header_image_size = le64_to_cpup((__le64 *)to_buff);
+		if (vhca_buf->header_image_size > MAX_MIGRATION_SIZE) {
+			ret = -ENOMEM;
+			goto end;
+		}
+
+		flags = le64_to_cpup((__le64 *)(to_buff +
+			    offsetof(struct mlx5_vf_migration_header, flags)));
+		if (flags) {
+			ret = -EOPNOTSUPP;
+			goto end;
+		}
+
+		migf->load_state = MLX5_VF_LOAD_STATE_PREP_IMAGE;
+		migf->max_pos += vhca_buf->length;
+		*has_work = true;
+	}
+end:
+	kunmap_local(to_buff);
+	return ret;
+}
+
 static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 				   size_t len, loff_t *pos)
 {
 	struct mlx5_vf_migration_file *migf = filp->private_data;
 	struct mlx5_vhca_data_buffer *vhca_buf = migf->buf;
+	struct mlx5_vhca_data_buffer *vhca_buf_header = migf->buf_header;
 	loff_t requested_length;
+	bool has_work = false;
 	ssize_t done = 0;
+	int ret = 0;
 
 	if (pos)
 		return -ESPIPE;
@@ -534,56 +683,83 @@ static ssize_t mlx5vf_resume_write(struct file *filp, const char __user *buf,
 	    check_add_overflow((loff_t)len, *pos, &requested_length))
 		return -EINVAL;
 
-	if (requested_length > MAX_MIGRATION_SIZE)
-		return -ENOMEM;
-
+	mutex_lock(&migf->mvdev->state_mutex);
 	mutex_lock(&migf->lock);
 	if (migf->state == MLX5_MIGF_STATE_ERROR) {
-		done = -ENODEV;
+		ret = -ENODEV;
 		goto out_unlock;
 	}
 
-	if (vhca_buf->allocated_length < requested_length) {
-		done = mlx5vf_add_migration_pages(
-			vhca_buf,
-			DIV_ROUND_UP(requested_length - vhca_buf->allocated_length,
-				     PAGE_SIZE));
-		if (done)
-			goto out_unlock;
-	}
+	while (len || has_work) {
+		has_work = false;
+		switch (migf->load_state) {
+		case MLX5_VF_LOAD_STATE_READ_HEADER:
+			ret = mlx5vf_resume_read_header(migf, vhca_buf_header,
+							&buf, &len, pos,
+							&done, &has_work);
+			if (ret)
+				goto out_unlock;
+			break;
+		case MLX5_VF_LOAD_STATE_PREP_IMAGE:
+		{
+			u64 size = vhca_buf_header->header_image_size;
+
+			if (vhca_buf->allocated_length < size) {
+				mlx5vf_free_data_buffer(vhca_buf);
+
+				migf->buf = mlx5vf_alloc_data_buffer(migf,
+							size, DMA_TO_DEVICE);
+				if (IS_ERR(migf->buf)) {
+					ret = PTR_ERR(migf->buf);
+					migf->buf = NULL;
+					goto out_unlock;
+				}
 
-	while (len) {
-		size_t page_offset;
-		struct page *page;
-		size_t page_len;
-		u8 *to_buff;
-		int ret;
+				vhca_buf = migf->buf;
+			}
 
-		page_offset = (*pos) % PAGE_SIZE;
-		page = mlx5vf_get_migration_page(vhca_buf, *pos - page_offset);
-		if (!page) {
-			if (done == 0)
-				done = -EINVAL;
-			goto out_unlock;
+			vhca_buf->start_pos = migf->max_pos;
+			migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE;
+			break;
 		}
+		case MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER:
+			ret = mlx5vf_resume_read_image_no_header(vhca_buf,
+						requested_length,
+						&buf, &len, pos, &done);
+			if (ret)
+				goto out_unlock;
+			break;
+		case MLX5_VF_LOAD_STATE_READ_IMAGE:
+			ret = mlx5vf_resume_read_image(migf, vhca_buf,
+						vhca_buf_header->header_image_size,
+						&buf, &len, pos, &done, &has_work);
+			if (ret)
+				goto out_unlock;
+			break;
+		case MLX5_VF_LOAD_STATE_LOAD_IMAGE:
+			ret = mlx5vf_cmd_load_vhca_state(migf->mvdev, migf, vhca_buf);
+			if (ret)
+				goto out_unlock;
+			migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
 
-		page_len = min_t(size_t, len, PAGE_SIZE - page_offset);
-		to_buff = kmap_local_page(page);
-		ret = copy_from_user(to_buff + page_offset, buf, page_len);
-		kunmap_local(to_buff);
-		if (ret) {
-			done = -EFAULT;
-			goto out_unlock;
+			/* prep header buf for next image */
+			vhca_buf_header->length = 0;
+			vhca_buf_header->header_image_size = 0;
+			/* prep data buf for next image */
+			vhca_buf->length = 0;
+
+			break;
+		default:
+			break;
 		}
-		*pos += page_len;
-		len -= page_len;
-		done += page_len;
-		buf += page_len;
-		vhca_buf->length += page_len;
 	}
+
 out_unlock:
+	if (ret)
+		migf->state = MLX5_MIGF_STATE_ERROR;
 	mutex_unlock(&migf->lock);
-	return done;
+	mlx5vf_state_mutex_unlock(migf->mvdev);
+	return ret ? ret : done;
 }
 
 static const struct file_operations mlx5vf_resume_fops = {
@@ -623,12 +799,29 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	}
 
 	migf->buf = buf;
+	if (MLX5VF_PRE_COPY_SUPP(mvdev)) {
+		buf = mlx5vf_alloc_data_buffer(migf,
+			sizeof(struct mlx5_vf_migration_header), DMA_NONE);
+		if (IS_ERR(buf)) {
+			ret = PTR_ERR(buf);
+			goto out_buf;
+		}
+
+		migf->buf_header = buf;
+		migf->load_state = MLX5_VF_LOAD_STATE_READ_HEADER;
+	} else {
+		/* Initial state will be to read the image */
+		migf->load_state = MLX5_VF_LOAD_STATE_READ_IMAGE_NO_HEADER;
+	}
+
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
 	INIT_LIST_HEAD(&migf->buf_list);
 	INIT_LIST_HEAD(&migf->avail_list);
 	spin_lock_init(&migf->list_lock);
 	return migf;
+out_buf:
+	mlx5vf_free_data_buffer(buf);
 out_pd:
 	mlx5vf_cmd_dealloc_pd(migf);
 out_free:
@@ -728,11 +921,13 @@ mlx5vf_pci_step_device_state_locked(struct mlx5vf_pci_core_device *mvdev,
 	}
 
 	if (cur == VFIO_DEVICE_STATE_RESUMING && new == VFIO_DEVICE_STATE_STOP) {
-		ret = mlx5vf_cmd_load_vhca_state(mvdev,
-						 mvdev->resuming_migf,
-						 mvdev->resuming_migf->buf);
-		if (ret)
-			return ERR_PTR(ret);
+		if (!MLX5VF_PRE_COPY_SUPP(mvdev)) {
+			ret = mlx5vf_cmd_load_vhca_state(mvdev,
+							 mvdev->resuming_migf,
+							 mvdev->resuming_migf->buf);
+			if (ret)
+				return ERR_PTR(ret);
+		}
 		mlx5vf_disable_fds(mvdev);
 		return NULL;
 	}
-- 
GitLab


From d6e18a4bec431c181a60d32876c6c89955b2a4f8 Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:37 +0200
Subject: [PATCH 3450/4122] vfio/mlx5: Fallback to STOP_COPY upon specific
 PRE_COPY error

Before a SAVE command is issued, a QUERY command is issued in order to
know the device data size.
In case PRE_COPY is used, the above commands are issued while the device
is running. Thus, it is possible that between the QUERY and the SAVE
commands the state of the device will be changed significantly and thus
the SAVE will fail.

Currently, if a SAVE command is failing, the driver will fail the
migration. In the above case, don't fail the migration, but don't allow
for new SAVEs to be executed while the device is in a RUNNING state.
Once the device will be moved to STOP_COPY, SAVE can be executed again
and the full device state will be read.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-14-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c  | 27 ++++++++++++++++++++++++++-
 drivers/vfio/pci/mlx5/cmd.h  |  2 ++
 drivers/vfio/pci/mlx5/main.c |  6 ++++--
 3 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 993749818d90f..01ef695e94418 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -84,6 +84,19 @@ int mlx5vf_cmd_query_vhca_migration_state(struct mlx5vf_pci_core_device *mvdev,
 		ret = wait_for_completion_interruptible(&mvdev->saving_migf->save_comp);
 		if (ret)
 			return ret;
+		if (mvdev->saving_migf->state ==
+		    MLX5_MIGF_STATE_PRE_COPY_ERROR) {
+			/*
+			 * In case we had a PRE_COPY error, only query full
+			 * image for final image
+			 */
+			if (!(query_flags & MLX5VF_QUERY_FINAL)) {
+				*state_size = 0;
+				complete(&mvdev->saving_migf->save_comp);
+				return 0;
+			}
+			query_flags &= ~MLX5VF_QUERY_INC;
+		}
 	}
 
 	MLX5_SET(query_vhca_migration_state_in, in, opcode,
@@ -442,7 +455,10 @@ void mlx5vf_mig_file_cleanup_cb(struct work_struct *_work)
 		mlx5vf_put_data_buffer(async_data->buf);
 		if (async_data->header_buf)
 			mlx5vf_put_data_buffer(async_data->header_buf);
-		migf->state = MLX5_MIGF_STATE_ERROR;
+		if (async_data->status == MLX5_CMD_STAT_BAD_RES_STATE_ERR)
+			migf->state = MLX5_MIGF_STATE_PRE_COPY_ERROR;
+		else
+			migf->state = MLX5_MIGF_STATE_ERROR;
 		wake_up_interruptible(&migf->poll_wait);
 	}
 	mutex_unlock(&migf->lock);
@@ -511,6 +527,8 @@ err:
 	 * The error and the cleanup flows can't run from an
 	 * interrupt context
 	 */
+	if (status == -EREMOTEIO)
+		status = MLX5_GET(save_vhca_state_out, async_data->out, status);
 	async_data->status = status;
 	queue_work(migf->mvdev->cb_wq, &async_data->work);
 }
@@ -534,6 +552,13 @@ int mlx5vf_cmd_save_vhca_state(struct mlx5vf_pci_core_device *mvdev,
 	if (err)
 		return err;
 
+	if (migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)
+		/*
+		 * In case we had a PRE_COPY error, SAVE is triggered only for
+		 * the final image, read device full image.
+		 */
+		inc = false;
+
 	MLX5_SET(save_vhca_state_in, in, opcode,
 		 MLX5_CMD_OP_SAVE_VHCA_STATE);
 	MLX5_SET(save_vhca_state_in, in, op_mod, 0);
diff --git a/drivers/vfio/pci/mlx5/cmd.h b/drivers/vfio/pci/mlx5/cmd.h
index 7729eac8c78c1..5483171d57ad3 100644
--- a/drivers/vfio/pci/mlx5/cmd.h
+++ b/drivers/vfio/pci/mlx5/cmd.h
@@ -17,6 +17,7 @@
 
 enum mlx5_vf_migf_state {
 	MLX5_MIGF_STATE_ERROR = 1,
+	MLX5_MIGF_STATE_PRE_COPY_ERROR,
 	MLX5_MIGF_STATE_PRE_COPY,
 	MLX5_MIGF_STATE_SAVE_LAST,
 	MLX5_MIGF_STATE_COMPLETE,
@@ -157,6 +158,7 @@ struct mlx5vf_pci_core_device {
 
 enum {
 	MLX5VF_QUERY_INC = (1UL << 0),
+	MLX5VF_QUERY_FINAL = (1UL << 1),
 };
 
 int mlx5vf_cmd_suspend_vhca(struct mlx5vf_pci_core_device *mvdev, u16 op_mod);
diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 5a669b73994a7..cd90eb86128c3 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -219,6 +219,7 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		if (wait_event_interruptible(migf->poll_wait,
 				!list_empty(&migf->buf_list) ||
 				migf->state == MLX5_MIGF_STATE_ERROR ||
+				migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR ||
 				migf->state == MLX5_MIGF_STATE_PRE_COPY ||
 				migf->state == MLX5_MIGF_STATE_COMPLETE))
 			return -ERESTARTSYS;
@@ -238,7 +239,8 @@ static ssize_t mlx5vf_save_read(struct file *filp, char __user *buf, size_t len,
 		if (first_loop_call) {
 			first_loop_call = false;
 			/* Temporary end of file as part of PRE_COPY */
-			if (end_of_data && migf->state == MLX5_MIGF_STATE_PRE_COPY) {
+			if (end_of_data && (migf->state == MLX5_MIGF_STATE_PRE_COPY ||
+				migf->state == MLX5_MIGF_STATE_PRE_COPY_ERROR)) {
 				done = -ENOMSG;
 				goto out_unlock;
 			}
@@ -431,7 +433,7 @@ static int mlx5vf_pci_save_device_inc_data(struct mlx5vf_pci_core_device *mvdev)
 		return -ENODEV;
 
 	ret = mlx5vf_cmd_query_vhca_migration_state(mvdev, &length,
-						    MLX5VF_QUERY_INC);
+				MLX5VF_QUERY_INC | MLX5VF_QUERY_FINAL);
 	if (ret)
 		goto err;
 
-- 
GitLab


From ccc2a52e464d1c983efede1a6d44728c151cb2ed Mon Sep 17 00:00:00 2001
From: Shay Drory <shayd@nvidia.com>
Date: Tue, 6 Dec 2022 10:34:38 +0200
Subject: [PATCH 3451/4122] vfio/mlx5: Enable MIGRATION_PRE_COPY flag

Now that everything has been set up for MIGRATION_PRE_COPY, enable it.

Signed-off-by: Shay Drory <shayd@nvidia.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/20221206083438.37807-15-yishaih@nvidia.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/cmd.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/vfio/pci/mlx5/cmd.c b/drivers/vfio/pci/mlx5/cmd.c
index 01ef695e94418..64e68d13cb98f 100644
--- a/drivers/vfio/pci/mlx5/cmd.c
+++ b/drivers/vfio/pci/mlx5/cmd.c
@@ -222,6 +222,11 @@ void mlx5vf_cmd_set_migratable(struct mlx5vf_pci_core_device *mvdev,
 	if (MLX5_CAP_GEN(mvdev->mdev, adv_virtualization))
 		mvdev->core_device.vdev.log_ops = log_ops;
 
+	if (MLX5_CAP_GEN_2(mvdev->mdev, migration_multi_load) &&
+	    MLX5_CAP_GEN_2(mvdev->mdev, migration_tracking_state))
+		mvdev->core_device.vdev.migration_flags |=
+			VFIO_MIGRATION_PRE_COPY;
+
 end:
 	mlx5_vf_put_core_dev(mvdev->mdev);
 }
-- 
GitLab


From 64ffbbb1e948876dd9044c32a3ab8a790662b9bb Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 23 Nov 2022 11:32:33 +0000
Subject: [PATCH 3452/4122] hisi_acc_vfio_pci: Add support for precopy IOCTL

PRECOPY IOCTL in the case of HiSiIicon ACC driver can be used to
perform the device compatibility check earlier during migration.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20221123113236.896-2-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 52 +++++++++++++++++++
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    |  1 +
 2 files changed, 53 insertions(+)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 0c0c0c7f05215..f3b74a06edb68 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -764,9 +764,58 @@ hisi_acc_vf_pci_resume(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
+	migf->hisi_acc_vdev = hisi_acc_vdev;
 	return migf;
 }
 
+static long hisi_acc_vf_precopy_ioctl(struct file *filp,
+				      unsigned int cmd, unsigned long arg)
+{
+	struct hisi_acc_vf_migration_file *migf = filp->private_data;
+	struct hisi_acc_vf_core_device *hisi_acc_vdev = migf->hisi_acc_vdev;
+	loff_t *pos = &filp->f_pos;
+	struct vfio_precopy_info info;
+	unsigned long minsz;
+	int ret;
+
+	if (cmd != VFIO_MIG_GET_PRECOPY_INFO)
+		return -ENOTTY;
+
+	minsz = offsetofend(struct vfio_precopy_info, dirty_bytes);
+
+	if (copy_from_user(&info, (void __user *)arg, minsz))
+		return -EFAULT;
+	if (info.argsz < minsz)
+		return -EINVAL;
+
+	mutex_lock(&hisi_acc_vdev->state_mutex);
+	if (hisi_acc_vdev->mig_state != VFIO_DEVICE_STATE_PRE_COPY) {
+		mutex_unlock(&hisi_acc_vdev->state_mutex);
+		return -EINVAL;
+	}
+
+	mutex_lock(&migf->lock);
+
+	if (migf->disabled) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (*pos > migf->total_length) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	info.dirty_bytes = 0;
+	info.initial_bytes = migf->total_length - *pos;
+
+	ret = copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
+out:
+	mutex_unlock(&migf->lock);
+	mutex_unlock(&hisi_acc_vdev->state_mutex);
+	return ret;
+}
+
 static ssize_t hisi_acc_vf_save_read(struct file *filp, char __user *buf, size_t len,
 				     loff_t *pos)
 {
@@ -807,6 +856,8 @@ out_unlock:
 static const struct file_operations hisi_acc_vf_save_fops = {
 	.owner = THIS_MODULE,
 	.read = hisi_acc_vf_save_read,
+	.unlocked_ioctl = hisi_acc_vf_precopy_ioctl,
+	.compat_ioctl = compat_ptr_ioctl,
 	.release = hisi_acc_vf_release_file,
 	.llseek = no_llseek,
 };
@@ -832,6 +883,7 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 
 	stream_open(migf->filp->f_inode, migf->filp);
 	mutex_init(&migf->lock);
+	migf->hisi_acc_vdev = hisi_acc_vdev;
 
 	ret = vf_qm_state_save(hisi_acc_vdev, migf);
 	if (ret) {
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 67343325b3201..11d51345f5b51 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -91,6 +91,7 @@ struct hisi_acc_vf_migration_file {
 	struct mutex lock;
 	bool disabled;
 
+	struct hisi_acc_vf_core_device *hisi_acc_vdev;
 	struct acc_vf_data vf_data;
 	size_t total_length;
 };
-- 
GitLab


From d9a871e4a143047d1d84a606772af319f11516f9 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 23 Nov 2022 11:32:34 +0000
Subject: [PATCH 3453/4122] hisi_acc_vfio_pci: Introduce support for PRE_COPY
 state transitions

The saving_migf is open in PRE_COPY state if it is supported and reads
initial device match data. hisi_acc_vf_stop_copy() is refactored to
make use of common code.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20221123113236.896-3-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 74 ++++++++++++++++++-
 1 file changed, 71 insertions(+), 3 deletions(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index f3b74a06edb68..c8658636a84c0 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -863,7 +863,7 @@ static const struct file_operations hisi_acc_vf_save_fops = {
 };
 
 static struct hisi_acc_vf_migration_file *
-hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+hisi_acc_open_saving_migf(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 {
 	struct hisi_acc_vf_migration_file *migf;
 	int ret;
@@ -885,7 +885,7 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 	mutex_init(&migf->lock);
 	migf->hisi_acc_vdev = hisi_acc_vdev;
 
-	ret = vf_qm_state_save(hisi_acc_vdev, migf);
+	ret = vf_qm_get_match_data(hisi_acc_vdev, &migf->vf_data);
 	if (ret) {
 		fput(migf->filp);
 		return ERR_PTR(ret);
@@ -894,6 +894,44 @@ hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 	return migf;
 }
 
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_pre_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev)
+{
+	struct hisi_acc_vf_migration_file *migf;
+
+	migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+	if (IS_ERR(migf))
+		return migf;
+
+	migf->total_length = QM_MATCH_SIZE;
+	return migf;
+}
+
+static struct hisi_acc_vf_migration_file *
+hisi_acc_vf_stop_copy(struct hisi_acc_vf_core_device *hisi_acc_vdev, bool open)
+{
+	int ret;
+	struct hisi_acc_vf_migration_file *migf = NULL;
+
+	if (open) {
+		/*
+		 * Userspace didn't use PRECOPY support. Hence saving_migf
+		 * is not opened yet.
+		 */
+		migf = hisi_acc_open_saving_migf(hisi_acc_vdev);
+		if (IS_ERR(migf))
+			return migf;
+	} else {
+		migf = hisi_acc_vdev->saving_migf;
+	}
+
+	ret = vf_qm_state_save(hisi_acc_vdev, migf);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return open ? migf : NULL;
+}
+
 static int hisi_acc_vf_stop_device(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 {
 	struct device *dev = &hisi_acc_vdev->vf_dev->dev;
@@ -921,6 +959,31 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	u32 cur = hisi_acc_vdev->mig_state;
 	int ret;
 
+	if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_PRE_COPY) {
+		struct hisi_acc_vf_migration_file *migf;
+
+		migf = hisi_acc_vf_pre_copy(hisi_acc_vdev);
+		if (IS_ERR(migf))
+			return ERR_CAST(migf);
+		get_file(migf->filp);
+		hisi_acc_vdev->saving_migf = migf;
+		return migf->filp;
+	}
+
+	if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_STOP_COPY) {
+		struct hisi_acc_vf_migration_file *migf;
+
+		ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
+		if (ret)
+			return ERR_PTR(ret);
+
+		migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, false);
+		if (IS_ERR(migf))
+			return ERR_CAST(migf);
+
+		return NULL;
+	}
+
 	if (cur == VFIO_DEVICE_STATE_RUNNING && new == VFIO_DEVICE_STATE_STOP) {
 		ret = hisi_acc_vf_stop_device(hisi_acc_vdev);
 		if (ret)
@@ -931,7 +994,7 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_STOP_COPY) {
 		struct hisi_acc_vf_migration_file *migf;
 
-		migf = hisi_acc_vf_stop_copy(hisi_acc_vdev);
+		migf = hisi_acc_vf_stop_copy(hisi_acc_vdev, true);
 		if (IS_ERR(migf))
 			return ERR_CAST(migf);
 		get_file(migf->filp);
@@ -963,6 +1026,11 @@ hisi_acc_vf_set_device_state(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 		return NULL;
 	}
 
+	if (cur == VFIO_DEVICE_STATE_PRE_COPY && new == VFIO_DEVICE_STATE_RUNNING) {
+		hisi_acc_vf_disable_fds(hisi_acc_vdev);
+		return NULL;
+	}
+
 	if (cur == VFIO_DEVICE_STATE_STOP && new == VFIO_DEVICE_STATE_RUNNING) {
 		hisi_acc_vf_start_device(hisi_acc_vdev);
 		return NULL;
-- 
GitLab


From 190125adcad4c5850fd74ecd697e20a446b74ed8 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 23 Nov 2022 11:32:35 +0000
Subject: [PATCH 3454/4122] hisi_acc_vfio_pci: Move the dev compatibility tests
 for early check

Instead of waiting till data transfer is complete to perform dev
compatibility, do it as soon as we have enough data to perform the
check. This will be useful when we enable the support for PRE_COPY.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20221123113236.896-4-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.c    | 19 +++++++------------
 .../vfio/pci/hisilicon/hisi_acc_vfio_pci.h    |  1 +
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index c8658636a84c0..9a51f41e1d2aa 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -360,8 +360,8 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	u32 que_iso_state;
 	int ret;
 
-	if (migf->total_length < QM_MATCH_SIZE)
-		return -EINVAL;
+	if (migf->total_length < QM_MATCH_SIZE || hisi_acc_vdev->match_done)
+		return 0;
 
 	if (vf_data->acc_magic != ACC_DEV_MAGIC) {
 		dev_err(dev, "failed to match ACC_DEV_MAGIC\n");
@@ -406,6 +406,7 @@ static int vf_qm_check_match(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	}
 
 	hisi_acc_vdev->vf_qm_state = vf_data->vf_qm_state;
+	hisi_acc_vdev->match_done = true;
 	return 0;
 }
 
@@ -493,10 +494,6 @@ static int vf_qm_state_save(struct hisi_acc_vf_core_device *hisi_acc_vdev,
 	struct device *dev = &vf_qm->pdev->dev;
 	int ret;
 
-	ret = vf_qm_get_match_data(hisi_acc_vdev, vf_data);
-	if (ret)
-		return ret;
-
 	if (unlikely(qm_wait_dev_not_ready(vf_qm))) {
 		/* Update state and return with match data */
 		vf_data->vf_qm_state = QM_NOT_READY;
@@ -673,12 +670,6 @@ static int hisi_acc_vf_load_state(struct hisi_acc_vf_core_device *hisi_acc_vdev)
 	struct hisi_acc_vf_migration_file *migf = hisi_acc_vdev->resuming_migf;
 	int ret;
 
-	/* Check dev compatibility */
-	ret = vf_qm_check_match(hisi_acc_vdev, migf);
-	if (ret) {
-		dev_err(dev, "failed to match the VF!\n");
-		return ret;
-	}
 	/* Recover data to VF */
 	ret = vf_qm_load_data(hisi_acc_vdev, migf);
 	if (ret) {
@@ -732,6 +723,10 @@ static ssize_t hisi_acc_vf_resume_write(struct file *filp, const char __user *bu
 	*pos += len;
 	done = len;
 	migf->total_length += len;
+
+	ret = vf_qm_check_match(migf->hisi_acc_vdev, migf);
+	if (ret)
+		done = -EFAULT;
 out_unlock:
 	mutex_unlock(&migf->lock);
 	return done;
diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
index 11d51345f5b51..dcabfeec6ca19 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.h
@@ -98,6 +98,7 @@ struct hisi_acc_vf_migration_file {
 
 struct hisi_acc_vf_core_device {
 	struct vfio_pci_core_device core_device;
+	u8 match_done:1;
 	u8 deferred_reset:1;
 	/* For migration state */
 	struct mutex state_mutex;
-- 
GitLab


From f2240b4441cc5bd87820371ee79ad7c9125e76b6 Mon Sep 17 00:00:00 2001
From: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Date: Wed, 23 Nov 2022 11:32:36 +0000
Subject: [PATCH 3455/4122] hisi_acc_vfio_pci: Enable PRE_COPY flag

Now that we have everything to support the PRE_COPY state,
enable it.

Signed-off-by: Shameer Kolothum <shameerali.kolothum.thodi@huawei.com>
Link: https://lore.kernel.org/r/20221123113236.896-5-shameerali.kolothum.thodi@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
index 9a51f41e1d2aa..51941bb4f31f0 100644
--- a/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
+++ b/drivers/vfio/pci/hisilicon/hisi_acc_vfio_pci.c
@@ -1351,7 +1351,7 @@ static int hisi_acc_vfio_pci_migrn_init_dev(struct vfio_device *core_vdev)
 	hisi_acc_vdev->vf_dev = pdev;
 	mutex_init(&hisi_acc_vdev->state_mutex);
 
-	core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY;
+	core_vdev->migration_flags = VFIO_MIGRATION_STOP_COPY | VFIO_MIGRATION_PRE_COPY;
 	core_vdev->mig_ops = &hisi_acc_vfio_pci_migrn_state_ops;
 
 	return vfio_pci_core_init_dev(core_vdev);
-- 
GitLab


From 533aae7c94dbc2b14301cfd68ae7e0e90f0c8438 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 5 Dec 2022 13:39:02 +0100
Subject: [PATCH 3456/4122] gpiolib: cdev: fix NULL-pointer dereferences

There are several places where we can crash the kernel by requesting
lines, unbinding the GPIO device, then calling any of the system calls
relevant to the GPIO character device's annonymous file descriptors:
ioctl(), read(), poll().

While I observed it with the GPIO simulator, it will also happen for any
of the GPIO devices that can be hot-unplugged - for instance any HID GPIO
expander (e.g. CP2112).

This affects both v1 and v2 uAPI.

This fixes it partially by checking if gdev->chip is not NULL but it
doesn't entirely remedy the situation as we still have a race condition
in which another thread can remove the device after the check.

Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines")
Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL")
Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL")
Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL")
Fixes: 7b8e00d98168 ("gpiolib: cdev: support GPIO_V2_LINE_SET_VALUES_IOCTL")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-cdev.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index 08606f32372ce..ac10c9494bf02 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -201,6 +201,9 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd,
 	unsigned int i;
 	int ret;
 
+	if (!lh->gdev->chip)
+		return -ENODEV;
+
 	switch (cmd) {
 	case GPIOHANDLE_GET_LINE_VALUES_IOCTL:
 		/* NOTE: It's okay to read values of output lines */
@@ -1384,6 +1387,9 @@ static long linereq_ioctl(struct file *file, unsigned int cmd,
 	struct linereq *lr = file->private_data;
 	void __user *ip = (void __user *)arg;
 
+	if (!lr->gdev->chip)
+		return -ENODEV;
+
 	switch (cmd) {
 	case GPIO_V2_LINE_GET_VALUES_IOCTL:
 		return linereq_get_values(lr, ip);
@@ -1410,6 +1416,9 @@ static __poll_t linereq_poll(struct file *file,
 	struct linereq *lr = file->private_data;
 	__poll_t events = 0;
 
+	if (!lr->gdev->chip)
+		return EPOLLHUP | EPOLLERR;
+
 	poll_wait(file, &lr->wait, wait);
 
 	if (!kfifo_is_empty_spinlocked_noirqsave(&lr->events,
@@ -1429,6 +1438,9 @@ static ssize_t linereq_read(struct file *file,
 	ssize_t bytes_read = 0;
 	int ret;
 
+	if (!lr->gdev->chip)
+		return -ENODEV;
+
 	if (count < sizeof(le))
 		return -EINVAL;
 
@@ -1716,6 +1728,9 @@ static __poll_t lineevent_poll(struct file *file,
 	struct lineevent_state *le = file->private_data;
 	__poll_t events = 0;
 
+	if (!le->gdev->chip)
+		return EPOLLHUP | EPOLLERR;
+
 	poll_wait(file, &le->wait, wait);
 
 	if (!kfifo_is_empty_spinlocked_noirqsave(&le->events, &le->wait.lock))
@@ -1740,6 +1755,9 @@ static ssize_t lineevent_read(struct file *file,
 	ssize_t ge_size;
 	int ret;
 
+	if (!le->gdev->chip)
+		return -ENODEV;
+
 	/*
 	 * When compatible system call is being used the struct gpioevent_data,
 	 * in case of at least ia32, has different size due to the alignment
@@ -1821,6 +1839,9 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd,
 	void __user *ip = (void __user *)arg;
 	struct gpiohandle_data ghd;
 
+	if (!le->gdev->chip)
+		return -ENODEV;
+
 	/*
 	 * We can get the value for an event line but not set it,
 	 * because it is input by definition.
@@ -2407,6 +2428,9 @@ static __poll_t lineinfo_watch_poll(struct file *file,
 	struct gpio_chardev_data *cdev = file->private_data;
 	__poll_t events = 0;
 
+	if (!cdev->gdev->chip)
+		return EPOLLHUP | EPOLLERR;
+
 	poll_wait(file, &cdev->wait, pollt);
 
 	if (!kfifo_is_empty_spinlocked_noirqsave(&cdev->events,
@@ -2425,6 +2449,9 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
 	int ret;
 	size_t event_size;
 
+	if (!cdev->gdev->chip)
+		return -ENODEV;
+
 #ifndef CONFIG_GPIO_CDEV_V1
 	event_size = sizeof(struct gpio_v2_line_info_changed);
 	if (count < event_size)
-- 
GitLab


From bdbbae241a04f387ba910b8609f95fad5f1470c7 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 5 Dec 2022 13:39:03 +0100
Subject: [PATCH 3457/4122] gpiolib: protect the GPIO device against being
 dropped while in use by user-space

While any of the GPIO cdev syscalls is in progress, the kernel can call
gpiochip_remove() (for instance, when a USB GPIO expander is disconnected)
which will set gdev->chip to NULL after which any subsequent access will
cause a crash.

To avoid that: use an RW-semaphore in which the syscalls take it for
reading (so that we don't needlessly prohibit the user-space from calling
syscalls simultaneously) while gpiochip_remove() takes it for writing so
that it can only happen once all syscalls return.

Fixes: d7c51b47ac11 ("gpio: userspace ABI for reading/writing GPIO lines")
Fixes: 3c0d9c635ae2 ("gpiolib: cdev: support GPIO_V2_GET_LINE_IOCTL and GPIO_V2_LINE_GET_VALUES_IOCTL")
Fixes: aad955842d1c ("gpiolib: cdev: support GPIO_V2_GET_LINEINFO_IOCTL and GPIO_V2_GET_LINEINFO_WATCH_IOCTL")
Fixes: a54756cb24ea ("gpiolib: cdev: support GPIO_V2_LINE_SET_CONFIG_IOCTL")
Fixes: 7b8e00d98168 ("gpiolib: cdev: support GPIO_V2_LINE_SET_VALUES_IOCTL")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
[Nick: fixed a build failure with CDEV_V1 disabled]
Co-authored-by: Nick Hainke <vincent@systemli.org>
Reviewed-by: Kent Gibson <warthog618@gmail.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-cdev.c | 177 +++++++++++++++++++++++++++++++-----
 drivers/gpio/gpiolib.c      |   4 +
 drivers/gpio/gpiolib.h      |   5 +
 3 files changed, 161 insertions(+), 25 deletions(-)

diff --git a/drivers/gpio/gpiolib-cdev.c b/drivers/gpio/gpiolib-cdev.c
index ac10c9494bf02..f7289c2f3a3c3 100644
--- a/drivers/gpio/gpiolib-cdev.c
+++ b/drivers/gpio/gpiolib-cdev.c
@@ -55,6 +55,50 @@ static_assert(IS_ALIGNED(sizeof(struct gpio_v2_line_values), 8));
  * interface to gpiolib GPIOs via ioctl()s.
  */
 
+typedef __poll_t (*poll_fn)(struct file *, struct poll_table_struct *);
+typedef long (*ioctl_fn)(struct file *, unsigned int, unsigned long);
+typedef ssize_t (*read_fn)(struct file *, char __user *,
+			   size_t count, loff_t *);
+
+static __poll_t call_poll_locked(struct file *file,
+				 struct poll_table_struct *wait,
+				 struct gpio_device *gdev, poll_fn func)
+{
+	__poll_t ret;
+
+	down_read(&gdev->sem);
+	ret = func(file, wait);
+	up_read(&gdev->sem);
+
+	return ret;
+}
+
+static long call_ioctl_locked(struct file *file, unsigned int cmd,
+			      unsigned long arg, struct gpio_device *gdev,
+			      ioctl_fn func)
+{
+	long ret;
+
+	down_read(&gdev->sem);
+	ret = func(file, cmd, arg);
+	up_read(&gdev->sem);
+
+	return ret;
+}
+
+static ssize_t call_read_locked(struct file *file, char __user *buf,
+				size_t count, loff_t *f_ps,
+				struct gpio_device *gdev, read_fn func)
+{
+	ssize_t ret;
+
+	down_read(&gdev->sem);
+	ret = func(file, buf, count, f_ps);
+	up_read(&gdev->sem);
+
+	return ret;
+}
+
 /*
  * GPIO line handle management
  */
@@ -191,8 +235,8 @@ static long linehandle_set_config(struct linehandle_state *lh,
 	return 0;
 }
 
-static long linehandle_ioctl(struct file *file, unsigned int cmd,
-			     unsigned long arg)
+static long linehandle_ioctl_unlocked(struct file *file, unsigned int cmd,
+				      unsigned long arg)
 {
 	struct linehandle_state *lh = file->private_data;
 	void __user *ip = (void __user *)arg;
@@ -250,6 +294,15 @@ static long linehandle_ioctl(struct file *file, unsigned int cmd,
 	}
 }
 
+static long linehandle_ioctl(struct file *file, unsigned int cmd,
+			     unsigned long arg)
+{
+	struct linehandle_state *lh = file->private_data;
+
+	return call_ioctl_locked(file, cmd, arg, lh->gdev,
+				 linehandle_ioctl_unlocked);
+}
+
 #ifdef CONFIG_COMPAT
 static long linehandle_ioctl_compat(struct file *file, unsigned int cmd,
 				    unsigned long arg)
@@ -1381,8 +1434,8 @@ static long linereq_set_config(struct linereq *lr, void __user *ip)
 	return ret;
 }
 
-static long linereq_ioctl(struct file *file, unsigned int cmd,
-			  unsigned long arg)
+static long linereq_ioctl_unlocked(struct file *file, unsigned int cmd,
+				   unsigned long arg)
 {
 	struct linereq *lr = file->private_data;
 	void __user *ip = (void __user *)arg;
@@ -1402,6 +1455,15 @@ static long linereq_ioctl(struct file *file, unsigned int cmd,
 	}
 }
 
+static long linereq_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct linereq *lr = file->private_data;
+
+	return call_ioctl_locked(file, cmd, arg, lr->gdev,
+				 linereq_ioctl_unlocked);
+}
+
 #ifdef CONFIG_COMPAT
 static long linereq_ioctl_compat(struct file *file, unsigned int cmd,
 				 unsigned long arg)
@@ -1410,8 +1472,8 @@ static long linereq_ioctl_compat(struct file *file, unsigned int cmd,
 }
 #endif
 
-static __poll_t linereq_poll(struct file *file,
-			    struct poll_table_struct *wait)
+static __poll_t linereq_poll_unlocked(struct file *file,
+				      struct poll_table_struct *wait)
 {
 	struct linereq *lr = file->private_data;
 	__poll_t events = 0;
@@ -1428,10 +1490,16 @@ static __poll_t linereq_poll(struct file *file,
 	return events;
 }
 
-static ssize_t linereq_read(struct file *file,
-			    char __user *buf,
-			    size_t count,
-			    loff_t *f_ps)
+static __poll_t linereq_poll(struct file *file,
+			     struct poll_table_struct *wait)
+{
+	struct linereq *lr = file->private_data;
+
+	return call_poll_locked(file, wait, lr->gdev, linereq_poll_unlocked);
+}
+
+static ssize_t linereq_read_unlocked(struct file *file, char __user *buf,
+				     size_t count, loff_t *f_ps)
 {
 	struct linereq *lr = file->private_data;
 	struct gpio_v2_line_event le;
@@ -1485,6 +1553,15 @@ static ssize_t linereq_read(struct file *file,
 	return bytes_read;
 }
 
+static ssize_t linereq_read(struct file *file, char __user *buf,
+			    size_t count, loff_t *f_ps)
+{
+	struct linereq *lr = file->private_data;
+
+	return call_read_locked(file, buf, count, f_ps, lr->gdev,
+				linereq_read_unlocked);
+}
+
 static void linereq_free(struct linereq *lr)
 {
 	unsigned int i;
@@ -1722,8 +1799,8 @@ struct lineevent_state {
 	(GPIOEVENT_REQUEST_RISING_EDGE | \
 	GPIOEVENT_REQUEST_FALLING_EDGE)
 
-static __poll_t lineevent_poll(struct file *file,
-			       struct poll_table_struct *wait)
+static __poll_t lineevent_poll_unlocked(struct file *file,
+					struct poll_table_struct *wait)
 {
 	struct lineevent_state *le = file->private_data;
 	__poll_t events = 0;
@@ -1739,15 +1816,21 @@ static __poll_t lineevent_poll(struct file *file,
 	return events;
 }
 
+static __poll_t lineevent_poll(struct file *file,
+			       struct poll_table_struct *wait)
+{
+	struct lineevent_state *le = file->private_data;
+
+	return call_poll_locked(file, wait, le->gdev, lineevent_poll_unlocked);
+}
+
 struct compat_gpioeevent_data {
 	compat_u64	timestamp;
 	u32		id;
 };
 
-static ssize_t lineevent_read(struct file *file,
-			      char __user *buf,
-			      size_t count,
-			      loff_t *f_ps)
+static ssize_t lineevent_read_unlocked(struct file *file, char __user *buf,
+				       size_t count, loff_t *f_ps)
 {
 	struct lineevent_state *le = file->private_data;
 	struct gpioevent_data ge;
@@ -1815,6 +1898,15 @@ static ssize_t lineevent_read(struct file *file,
 	return bytes_read;
 }
 
+static ssize_t lineevent_read(struct file *file, char __user *buf,
+			      size_t count, loff_t *f_ps)
+{
+	struct lineevent_state *le = file->private_data;
+
+	return call_read_locked(file, buf, count, f_ps, le->gdev,
+				lineevent_read_unlocked);
+}
+
 static void lineevent_free(struct lineevent_state *le)
 {
 	if (le->irq)
@@ -1832,8 +1924,8 @@ static int lineevent_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
-static long lineevent_ioctl(struct file *file, unsigned int cmd,
-			    unsigned long arg)
+static long lineevent_ioctl_unlocked(struct file *file, unsigned int cmd,
+				     unsigned long arg)
 {
 	struct lineevent_state *le = file->private_data;
 	void __user *ip = (void __user *)arg;
@@ -1864,6 +1956,15 @@ static long lineevent_ioctl(struct file *file, unsigned int cmd,
 	return -EINVAL;
 }
 
+static long lineevent_ioctl(struct file *file, unsigned int cmd,
+			    unsigned long arg)
+{
+	struct lineevent_state *le = file->private_data;
+
+	return call_ioctl_locked(file, cmd, arg, le->gdev,
+				 lineevent_ioctl_unlocked);
+}
+
 #ifdef CONFIG_COMPAT
 static long lineevent_ioctl_compat(struct file *file, unsigned int cmd,
 				   unsigned long arg)
@@ -2422,8 +2523,8 @@ static int lineinfo_changed_notify(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static __poll_t lineinfo_watch_poll(struct file *file,
-				    struct poll_table_struct *pollt)
+static __poll_t lineinfo_watch_poll_unlocked(struct file *file,
+					     struct poll_table_struct *pollt)
 {
 	struct gpio_chardev_data *cdev = file->private_data;
 	__poll_t events = 0;
@@ -2440,8 +2541,17 @@ static __poll_t lineinfo_watch_poll(struct file *file,
 	return events;
 }
 
-static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
-				   size_t count, loff_t *off)
+static __poll_t lineinfo_watch_poll(struct file *file,
+				    struct poll_table_struct *pollt)
+{
+	struct gpio_chardev_data *cdev = file->private_data;
+
+	return call_poll_locked(file, pollt, cdev->gdev,
+				lineinfo_watch_poll_unlocked);
+}
+
+static ssize_t lineinfo_watch_read_unlocked(struct file *file, char __user *buf,
+					    size_t count, loff_t *off)
 {
 	struct gpio_chardev_data *cdev = file->private_data;
 	struct gpio_v2_line_info_changed event;
@@ -2519,6 +2629,15 @@ static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
 	return bytes_read;
 }
 
+static ssize_t lineinfo_watch_read(struct file *file, char __user *buf,
+				   size_t count, loff_t *off)
+{
+	struct gpio_chardev_data *cdev = file->private_data;
+
+	return call_read_locked(file, buf, count, off, cdev->gdev,
+				lineinfo_watch_read_unlocked);
+}
+
 /**
  * gpio_chrdev_open() - open the chardev for ioctl operations
  * @inode: inode for this chardev
@@ -2532,13 +2651,17 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
 	struct gpio_chardev_data *cdev;
 	int ret = -ENOMEM;
 
+	down_read(&gdev->sem);
+
 	/* Fail on open if the backing gpiochip is gone */
-	if (!gdev->chip)
-		return -ENODEV;
+	if (!gdev->chip) {
+		ret = -ENODEV;
+		goto out_unlock;
+	}
 
 	cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
 	if (!cdev)
-		return -ENOMEM;
+		goto out_unlock;
 
 	cdev->watched_lines = bitmap_zalloc(gdev->chip->ngpio, GFP_KERNEL);
 	if (!cdev->watched_lines)
@@ -2561,6 +2684,8 @@ static int gpio_chrdev_open(struct inode *inode, struct file *file)
 	if (ret)
 		goto out_unregister_notifier;
 
+	up_read(&gdev->sem);
+
 	return ret;
 
 out_unregister_notifier:
@@ -2570,6 +2695,8 @@ out_free_bitmap:
 	bitmap_free(cdev->watched_lines);
 out_free_cdev:
 	kfree(cdev);
+out_unlock:
+	up_read(&gdev->sem);
 	return ret;
 }
 
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 0058ee83989df..b8e17a4e38c51 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -790,6 +790,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
 	spin_unlock_irqrestore(&gpio_lock, flags);
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&gdev->notifier);
+	init_rwsem(&gdev->sem);
 
 #ifdef CONFIG_PINCTRL
 	INIT_LIST_HEAD(&gdev->pin_ranges);
@@ -924,6 +925,8 @@ void gpiochip_remove(struct gpio_chip *gc)
 	unsigned long	flags;
 	unsigned int	i;
 
+	down_write(&gdev->sem);
+
 	/* FIXME: should the legacy sysfs handling be moved to gpio_device? */
 	gpiochip_sysfs_unregister(gdev);
 	gpiochip_free_hogs(gc);
@@ -958,6 +961,7 @@ void gpiochip_remove(struct gpio_chip *gc)
 	 * gone.
 	 */
 	gcdev_unregister(gdev);
+	up_write(&gdev->sem);
 	put_device(&gdev->dev);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
diff --git a/drivers/gpio/gpiolib.h b/drivers/gpio/gpiolib.h
index e443c1023a374..b3c2db6eba80c 100644
--- a/drivers/gpio/gpiolib.h
+++ b/drivers/gpio/gpiolib.h
@@ -15,6 +15,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/cdev.h>
+#include <linux/rwsem.h>
 
 #define GPIOCHIP_NAME	"gpiochip"
 
@@ -39,6 +40,9 @@
  * @list: links gpio_device:s together for traversal
  * @notifier: used to notify subscribers about lines being requested, released
  *            or reconfigured
+ * @sem: protects the structure from a NULL-pointer dereference of @chip by
+ *       user-space operations when the device gets unregistered during
+ *       a hot-unplug event
  * @pin_ranges: range of pins served by the GPIO driver
  *
  * This state container holds most of the runtime variable data
@@ -60,6 +64,7 @@ struct gpio_device {
 	void			*data;
 	struct list_head        list;
 	struct blocking_notifier_head notifier;
+	struct rw_semaphore	sem;
 
 #ifdef CONFIG_PINCTRL
 	/*
-- 
GitLab


From d074f0aebde5649f7a9f1807551efc019b8e81c4 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Wed, 7 Dec 2022 16:32:18 +0800
Subject: [PATCH 3458/4122] RDMA/hfi1: use sysfs_emit() to instead of
 scnprintf()

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/202212071632188074249@zte.com.cn
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/hfi1/driver.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hfi1/driver.c b/drivers/infiniband/hw/hfi1/driver.c
index 8e71bef9d9826..bcc6bc0540f03 100644
--- a/drivers/infiniband/hw/hfi1/driver.c
+++ b/drivers/infiniband/hw/hfi1/driver.c
@@ -112,7 +112,7 @@ static int hfi1_caps_get(char *buffer, const struct kernel_param *kp)
 	cap_mask &= ~HFI1_CAP_LOCKED_SMASK;
 	cap_mask |= ((cap_mask & HFI1_CAP_K2U) << HFI1_CAP_USER_SHIFT);
 
-	return scnprintf(buffer, PAGE_SIZE, "0x%lx", cap_mask);
+	return sysfs_emit(buffer, "0x%lx\n", cap_mask);
 }
 
 struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi)
-- 
GitLab


From e13d23a404f2e6dfaf8b1ef7d161a0836fce4fa5 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Thu, 10 Nov 2022 19:06:18 +0100
Subject: [PATCH 3459/4122] powerpc: export the CPU node count

At boot time, the FDT is parsed to compute the number of CPUs.
In addition count the number of CPU nodes and export it.

This is useful when building the FDT for a kexeced kernel since we need to
take in account the CPU node added since the boot time during CPU hotplug
operations.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221110180619.15796-2-ldufour@linux.ibm.com
---
 arch/powerpc/include/asm/prom.h | 1 +
 arch/powerpc/kernel/prom.c      | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h
index 2e82820fbd64c..c0107d8ddd8c8 100644
--- a/arch/powerpc/include/asm/prom.h
+++ b/arch/powerpc/include/asm/prom.h
@@ -85,6 +85,7 @@ struct of_drc_info {
 extern int of_read_drc_info_cell(struct property **prop,
 			const __be32 **curval, struct of_drc_info *data);
 
+extern unsigned int boot_cpu_node_count;
 
 /*
  * There are two methods for telling firmware what our capabilities are.
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1eed87d954ba8..645f4450dfc38 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -72,6 +72,7 @@ int __initdata iommu_is_off;
 int __initdata iommu_force_on;
 unsigned long tce_alloc_start, tce_alloc_end;
 u64 ppc64_rma_size;
+unsigned int boot_cpu_node_count __ro_after_init;
 #endif
 static phys_addr_t first_memblock_size;
 static int __initdata boot_cpu_count;
@@ -335,6 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
+	boot_cpu_node_count++;
+
 	/* Get physical cpuid */
 	intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
 	if (!intserv)
-- 
GitLab


From 340a4a9f8773e102cc5ef531665970a686dfa245 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Thu, 10 Nov 2022 19:06:19 +0100
Subject: [PATCH 3460/4122] powerpc: Take in account addition CPU node when
 building kexec FDT

On a system with a large number of CPUs, the creation of the FDT for a
kexec kernel may fail because the allocated FDT is not large enough.

When this happens, such a message is displayed on the console:

  Unable to add ibm,processor-vadd-size property: FDT_ERR_NOSPACE

The property's name may change depending when the buffer overwrite is
detected.

Obviously the created FDT is missing information, and it is expected
that system dump or kexec kernel failed to run properly.

When the FDT is allocated, the size of the FDT the kernel received at
boot time is used and an extra size can be applied. Currently, only
memory added after boot time is taken in account, not the CPU nodes.

The extra size should take in account these additional CPU nodes and
compute the required extra space. To achieve that, the size of a CPU
node, including its subnode is computed once and multiplied by the
number of additional CPU nodes.

The assumption is that the size of the CPU node is _same_ for all the
node, the only variable part should be the name "PowerPC,POWERxx@##"
where "##" may vary a little.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
[mpe: Don't shadow function name w/variable, minor coding style changes]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221110180619.15796-3-ldufour@linux.ibm.com
---
 arch/powerpc/kexec/file_load_64.c | 59 ++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
index 349a781cea0b3..2500c37c628c6 100644
--- a/arch/powerpc/kexec/file_load_64.c
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -26,6 +26,7 @@
 #include <asm/firmware.h>
 #include <asm/kexec_ranges.h>
 #include <asm/crashdump-ppc64.h>
+#include <asm/prom.h>
 
 struct umem_info {
 	u64 *buf;		/* data buffer for usable-memory property */
@@ -928,6 +929,45 @@ out:
 	return ret;
 }
 
+/**
+ * get_cpu_node_size - Compute the size of a CPU node in the FDT.
+ *                     This should be done only once and the value is stored in
+ *                     a static variable.
+ * Returns the max size of a CPU node in the FDT.
+ */
+static unsigned int cpu_node_size(void)
+{
+	static unsigned int size;
+	struct device_node *dn;
+	struct property *pp;
+
+	/*
+	 * Don't compute it twice, we are assuming that the per CPU node size
+	 * doesn't change during the system's life.
+	 */
+	if (size)
+		return size;
+
+	dn = of_find_node_by_type(NULL, "cpu");
+	if (WARN_ON_ONCE(!dn)) {
+		// Unlikely to happen
+		return 0;
+	}
+
+	/*
+	 * We compute the sub node size for a CPU node, assuming it
+	 * will be the same for all.
+	 */
+	size += strlen(dn->name) + 5;
+	for_each_property_of_node(dn, pp) {
+		size += strlen(pp->name);
+		size += pp->length;
+	}
+
+	of_node_put(dn);
+	return size;
+}
+
 /**
  * kexec_extra_fdt_size_ppc64 - Return the estimated additional size needed to
  *                              setup FDT for kexec/kdump kernel.
@@ -937,6 +977,8 @@ out:
  */
 unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
 {
+	unsigned int cpu_nodes, extra_size;
+	struct device_node *dn;
 	u64 usm_entries;
 
 	if (image->type != KEXEC_TYPE_CRASH)
@@ -949,7 +991,22 @@ unsigned int kexec_extra_fdt_size_ppc64(struct kimage *image)
 	 */
 	usm_entries = ((memblock_end_of_DRAM() / drmem_lmb_size()) +
 		       (2 * (resource_size(&crashk_res) / drmem_lmb_size())));
-	return (unsigned int)(usm_entries * sizeof(u64));
+
+	extra_size = (unsigned int)(usm_entries * sizeof(u64));
+
+	/*
+	 * Get the number of CPU nodes in the current DT. This allows to
+	 * reserve places for CPU nodes added since the boot time.
+	 */
+	cpu_nodes = 0;
+	for_each_node_by_type(dn, "cpu") {
+		cpu_nodes++;
+	}
+
+	if (cpu_nodes > boot_cpu_node_count)
+		extra_size += (cpu_nodes - boot_cpu_node_count) * cpu_node_size();
+
+	return extra_size;
 }
 
 /**
-- 
GitLab


From 9b574cfab7d4e68c67c4ee4fcde912ef54a25b88 Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Fri, 25 Nov 2022 18:32:04 +0100
Subject: [PATCH 3461/4122] powerpc/pseries: reset the RCU watchdogs after a
 LPM

The RCU watchdog timer should be reset when restarting the CPU after a
Live Partition Mobility operation.

Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Acked-by: Nicholas Piggin <npiggin@gmail.com>
[mpe: Combine comments into a single comment block]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221125173204.15329-1-ldufour@linux.ibm.com
---
 arch/powerpc/platforms/pseries/mobility.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 634fac5db3f98..4cea71aa0f41d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -635,10 +635,13 @@ retry:
 		prod_others();
 	}
 	/*
-	 * Execution may have been suspended for several seconds, so
-	 * reset the watchdog.
+	 * Execution may have been suspended for several seconds, so reset
+	 * the watchdogs. touch_nmi_watchdog() also touches the soft lockup
+	 * watchdog.
 	 */
+	rcu_cpu_stall_reset();
 	touch_nmi_watchdog();
+
 	return ret;
 }
 
-- 
GitLab


From f6aa37c51ec0d053ee34c235bfe0e666618a3baf Mon Sep 17 00:00:00 2001
From: Laurent Dufour <ldufour@linux.ibm.com>
Date: Mon, 14 Nov 2022 17:01:50 +0100
Subject: [PATCH 3462/4122] powerpc/pseries: unregister VPA when hot unplugging
 a CPU

The VPA should unregister when offlining a CPU. Otherwise there could be
a short window where 2 CPUs could share the same VPA.

This happens because the hypervisor is still keeping the VPA attached to
the vCPU even if it became offline.

Here is a potential situation:
 1. remove proc A,
 2. add proc B. If proc B gets proc A's place in cpu_present_mask, then
    it registers proc A's VPAs.
 3. If proc B is then re-added to the LP, its threads are sharing VPAs
    with proc A briefly as they come online.

As the hypervisor may check for the VPA's yield_count field oddity, it
may detect an unexpected value and kill the LPAR.

Suggested-by: Nathan Lynch <nathanl@linux.ibm.com>
Signed-off-by: Laurent Dufour <ldufour@linux.ibm.com>
Reviewed-by: Nathan Lynch <nathanl@linux.ibm.com>
[mpe: s/cpu_present_map/cpu_present_mask/ in change log]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221114160150.13554-1-ldufour@linux.ibm.com
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index e0a7ac5db15d9..090ae5a1e0f5e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -70,6 +70,7 @@ static void pseries_cpu_offline_self(void)
 		xics_teardown_cpu();
 
 	unregister_slb_shadow(hwcpu);
+	unregister_vpa(hwcpu);
 	rtas_stop_self();
 
 	/* Should never get here... */
-- 
GitLab


From 336e2554ec99eb97616004c791ee89abe96bdab2 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:39 -0600
Subject: [PATCH 3463/4122] powerpc/rtas: document rtas_call()

rtas_call() has a complex calling convention, non-standard return
values, and many users. Add kernel-doc for it and remove the less
structured commentary from rtas.h.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-2-nathanl@linux.ibm.com
---
 arch/powerpc/include/asm/rtas.h | 15 ---------
 arch/powerpc/kernel/rtas.c      | 58 +++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 56319aea646e6..479a95cb27701 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -33,21 +33,6 @@
 #define RTAS_THREADS_ACTIVE     -9005 /* Multiple processor threads active */
 #define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */
 
-/*
- * In general to call RTAS use rtas_token("string") to lookup
- * an RTAS token for the given string (e.g. "event-scan").
- * To actually perform the call use
- *    ret = rtas_call(token, n_in, n_out, ...)
- * Where n_in is the number of input parameters and
- *       n_out is the number of output parameters
- *
- * If the "string" is invalid on this system, RTAS_UNKNOWN_SERVICE
- * will be returned as a token.  rtas_call() does look for this
- * token and error out gracefully so rtas_call(rtas_token("str"), ...)
- * may be safely used for one-shot calls to RTAS.
- *
- */
-
 /* RTAS event classes */
 #define RTAS_INTERNAL_ERROR		0x80000000 /* set bit 0 */
 #define RTAS_EPOW_WARNING		0x40000000 /* set bit 1 */
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index e847f9b1c5b9f..c12dd5ed5e001 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -467,6 +467,64 @@ void rtas_call_unlocked(struct rtas_args *args, int token, int nargs, int nret,
 static int ibm_open_errinjct_token;
 static int ibm_errinjct_token;
 
+/**
+ * rtas_call() - Invoke an RTAS firmware function.
+ * @token: Identifies the function being invoked.
+ * @nargs: Number of input parameters. Does not include token.
+ * @nret: Number of output parameters, including the call status.
+ * @outputs: Array of @nret output words.
+ * @....: List of @nargs input parameters.
+ *
+ * Invokes the RTAS function indicated by @token, which the caller
+ * should obtain via rtas_token().
+ *
+ * The @nargs and @nret arguments must match the number of input and
+ * output parameters specified for the RTAS function.
+ *
+ * rtas_call() returns RTAS status codes, not conventional Linux errno
+ * values. Callers must translate any failure to an appropriate errno
+ * in syscall context. Most callers of RTAS functions that can return
+ * -2 or 990x should use rtas_busy_delay() to correctly handle those
+ * statuses before calling again.
+ *
+ * The return value descriptions are adapted from 7.2.8 [RTAS] Return
+ * Codes of the PAPR and CHRP specifications.
+ *
+ * Context: Process context preferably, interrupt context if
+ *          necessary.  Acquires an internal spinlock and may perform
+ *          GFP_ATOMIC slab allocation in error path. Unsafe for NMI
+ *          context.
+ * Return:
+ * *                          0 - RTAS function call succeeded.
+ * *                         -1 - RTAS function encountered a hardware or
+ *                                platform error, or the token is invalid,
+ *                                or the function is restricted by kernel policy.
+ * *                         -2 - Specs say "A necessary hardware device was busy,
+ *                                and the requested function could not be
+ *                                performed. The operation should be retried at
+ *                                a later time." This is misleading, at least with
+ *                                respect to current RTAS implementations. What it
+ *                                usually means in practice is that the function
+ *                                could not be completed while meeting RTAS's
+ *                                deadline for returning control to the OS (250us
+ *                                for PAPR/PowerVM, typically), but the call may be
+ *                                immediately reattempted to resume work on it.
+ * *                         -3 - Parameter error.
+ * *                         -7 - Unexpected state change.
+ * *                9000...9899 - Vendor-specific success codes.
+ * *                9900...9905 - Advisory extended delay. Caller should try
+ *                                again after ~10^x ms has elapsed, where x is
+ *                                the last digit of the status [0-5]. Again going
+ *                                beyond the PAPR text, 990x on PowerVM indicates
+ *                                contention for RTAS-internal resources. Other
+ *                                RTAS call sequences in progress should be
+ *                                allowed to complete before reattempting the
+ *                                call.
+ * *                      -9000 - Multi-level isolation error.
+ * *              -9999...-9004 - Vendor-specific error codes.
+ * * Additional negative values - Function-specific error.
+ * * Additional positive values - Function-specific success.
+ */
 int rtas_call(int token, int nargs, int nret, int *outputs, ...)
 {
 	va_list list;
-- 
GitLab


From b10af504a2015d12c566b6b0a4c7e3b602949eeb Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:40 -0600
Subject: [PATCH 3464/4122] powerpc/rtasd: use correct OF API for event scan
 rate

rtas_token() should be used only for properties that are RTAS function
tokens. "rtas-event-scan-rate" does not contain a function token, but it
has the same size/format as token properties so reading it with
rtas_token() happens to work.

Convert to of_property_read_u32().

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-3-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtasd.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 5270b450bbde4..cc56ac6ba4b04 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -9,6 +9,7 @@
 #include <linux/errno.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/of.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
@@ -499,6 +500,8 @@ EXPORT_SYMBOL_GPL(rtas_cancel_event_scan);
 
 static int __init rtas_event_scan_init(void)
 {
+	int err;
+
 	if (!machine_is(pseries) && !machine_is(chrp))
 		return 0;
 
@@ -509,8 +512,8 @@ static int __init rtas_event_scan_init(void)
 		return -ENODEV;
 	}
 
-	rtas_event_scan_rate = rtas_token("rtas-event-scan-rate");
-	if (rtas_event_scan_rate == RTAS_UNKNOWN_SERVICE) {
+	err = of_property_read_u32(rtas.dev, "rtas-event-scan-rate", &rtas_event_scan_rate);
+	if (err) {
 		printk(KERN_ERR "rtasd: no rtas-event-scan-rate on system\n");
 		return -ENODEV;
 	}
-- 
GitLab


From ed2213bfb192ab51f09f12e9b49b5d482c6493f3 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:41 -0600
Subject: [PATCH 3465/4122] powerpc/rtas: avoid device tree lookups in
 rtas_os_term()

rtas_os_term() is called during panic. Its behavior depends on a couple
of conditions in the /rtas node of the device tree, the traversal of
which entails locking and local IRQ state changes. If the kernel panics
while devtree_lock is held, rtas_os_term() as currently written could
hang.

Instead of discovering the relevant characteristics at panic time,
cache them in file-static variables at boot. Note the lookup for
"ibm,extended-os-term" is converted to of_property_read_bool() since it
is a boolean property, not an RTAS function token.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
[mpe: Incorporate suggested change from Nick]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-4-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtas.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index c12dd5ed5e001..db43cbdcc74ca 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -947,6 +947,7 @@ void __noreturn rtas_halt(void)
 
 /* Must be in the RMO region, so we place it here */
 static char rtas_os_term_buf[2048];
+static s32 ibm_os_term_token = RTAS_UNKNOWN_SERVICE;
 
 void rtas_os_term(char *str)
 {
@@ -958,14 +959,13 @@ void rtas_os_term(char *str)
 	 * this property may terminate the partition which we want to avoid
 	 * since it interferes with panic_timeout.
 	 */
-	if (RTAS_UNKNOWN_SERVICE == rtas_token("ibm,os-term") ||
-	    RTAS_UNKNOWN_SERVICE == rtas_token("ibm,extended-os-term"))
+	if (ibm_os_term_token == RTAS_UNKNOWN_SERVICE)
 		return;
 
 	snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
 
 	do {
-		status = rtas_call(rtas_token("ibm,os-term"), 1, 1, NULL,
+		status = rtas_call(ibm_os_term_token, 1, 1, NULL,
 				   __pa(rtas_os_term_buf));
 	} while (rtas_busy_delay(status));
 
@@ -1335,6 +1335,13 @@ void __init rtas_initialize(void)
 	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
 	rtas.entry = no_entry ? rtas.base : entry;
 
+	/*
+	 * Discover these now to avoid device tree lookups in the
+	 * panic path.
+	 */
+	if (of_property_read_bool(rtas.dev, "ibm,extended-os-term"))
+		ibm_os_term_token = rtas_token("ibm,os-term");
+
 	/* If RTAS was found, allocate the RMO buffer for it and look for
 	 * the stop-self token if any
 	 */
-- 
GitLab


From 6c606e57eecc37d6b36d732b1ff7e55b7dc32dd4 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:42 -0600
Subject: [PATCH 3466/4122] powerpc/rtas: avoid scheduling in rtas_os_term()

It's unsafe to use rtas_busy_delay() to handle a busy status from
the ibm,os-term RTAS function in rtas_os_term():

Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
BUG: sleeping function called from invalid context at arch/powerpc/kernel/rtas.c:618
in_atomic(): 1, irqs_disabled(): 1, non_block: 0, pid: 1, name: swapper/0
preempt_count: 2, expected: 0
CPU: 7 PID: 1 Comm: swapper/0 Tainted: G      D            6.0.0-rc5-02182-gf8553a572277-dirty #9
Call Trace:
[c000000007b8f000] [c000000001337110] dump_stack_lvl+0xb4/0x110 (unreliable)
[c000000007b8f040] [c0000000002440e4] __might_resched+0x394/0x3c0
[c000000007b8f0e0] [c00000000004f680] rtas_busy_delay+0x120/0x1b0
[c000000007b8f100] [c000000000052d04] rtas_os_term+0xb8/0xf4
[c000000007b8f180] [c0000000001150fc] pseries_panic+0x50/0x68
[c000000007b8f1f0] [c000000000036354] ppc_panic_platform_handler+0x34/0x50
[c000000007b8f210] [c0000000002303c4] notifier_call_chain+0xd4/0x1c0
[c000000007b8f2b0] [c0000000002306cc] atomic_notifier_call_chain+0xac/0x1c0
[c000000007b8f2f0] [c0000000001d62b8] panic+0x228/0x4d0
[c000000007b8f390] [c0000000001e573c] do_exit+0x140c/0x1420
[c000000007b8f480] [c0000000001e586c] make_task_dead+0xdc/0x200

Use rtas_busy_delay_time() instead, which signals without side effects
whether to attempt the ibm,os-term RTAS call again.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Nicholas Piggin <npiggin@gmail.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-5-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtas.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index db43cbdcc74ca..f21b39fcaf996 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -964,10 +964,15 @@ void rtas_os_term(char *str)
 
 	snprintf(rtas_os_term_buf, 2048, "OS panic: %s", str);
 
+	/*
+	 * Keep calling as long as RTAS returns a "try again" status,
+	 * but don't use rtas_busy_delay(), which potentially
+	 * schedules.
+	 */
 	do {
 		status = rtas_call(ibm_os_term_token, 1, 1, NULL,
 				   __pa(rtas_os_term_buf));
-	} while (rtas_busy_delay(status));
+	} while (rtas_busy_delay_time(status));
 
 	if (status != 0)
 		printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
-- 
GitLab


From 9aafbfa5f57a4b75bafd3bed0191e8429c5fa618 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:43 -0600
Subject: [PATCH 3467/4122] powerpc/pseries/eeh: use correct API for error log
 size

rtas-error-log-max is not the name of an RTAS function, so rtas_token()
is not the appropriate API for retrieving its value. We already have
rtas_get_error_log_max() which returns a sensible value if the property
is absent for any reason, so use that instead.

Fixes: 8d633291b4fc ("powerpc/eeh: pseries platform EEH error log retrieval")
Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
[mpe: Drop no-longer possible error handling as noticed by ajd]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-6-nathanl@linux.ibm.com
---
 arch/powerpc/platforms/pseries/eeh_pseries.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ea890037843c4..6b507b62ce8f1 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -848,16 +848,7 @@ static int __init eeh_pseries_init(void)
 	}
 
 	/* Initialize error log size */
-	eeh_error_buf_size = rtas_token("rtas-error-log-max");
-	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
-		pr_info("%s: unknown EEH error log size\n",
-			__func__);
-		eeh_error_buf_size = 1024;
-	} else if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
-		pr_info("%s: EEH error log size %d exceeds the maximal %d\n",
-			__func__, eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
-		eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
-	}
+	eeh_error_buf_size = rtas_get_error_log_max();
 
 	/* Set EEH probe mode */
 	eeh_add_flag(EEH_PROBE_MODE_DEVTREE | EEH_ENABLE_IO_FOR_LOG);
-- 
GitLab


From c67a0e411d0ffe0648fe84e25e9f899ce770feb3 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:44 -0600
Subject: [PATCH 3468/4122] powerpc/rtas: clean up rtas_error_log_max
 initialization

The code in rtas_get_error_log_max() doesn't cause problems in
practice, but there are no measures to ensure that the lazy
initialization of the static rtas_error_log_max variable is atomic,
and it's not worth adding them.

Initialize the static rtas_error_log_max variable at boot when we're
single-threaded instead of lazily on first use. Use the more
appropriate of_property_read_u32() API instead of rtas_token() to
consult the "rtas-error-log-max" property, which is not the name of an
RTAS function. Convert use of printk() to pr_warn() and distinguish
the possible error cases.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-7-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtas.c | 37 ++++++++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index f21b39fcaf996..a1b637259e3db 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -353,6 +353,9 @@ int rtas_service_present(const char *service)
 EXPORT_SYMBOL(rtas_service_present);
 
 #ifdef CONFIG_RTAS_ERROR_LOGGING
+
+static u32 rtas_error_log_max __ro_after_init = RTAS_ERROR_LOG_MAX;
+
 /*
  * Return the firmware-specified size of the error log buffer
  *  for all rtas calls that require an error buffer argument.
@@ -360,21 +363,30 @@ EXPORT_SYMBOL(rtas_service_present);
  */
 int rtas_get_error_log_max(void)
 {
-	static int rtas_error_log_max;
-	if (rtas_error_log_max)
-		return rtas_error_log_max;
-
-	rtas_error_log_max = rtas_token ("rtas-error-log-max");
-	if ((rtas_error_log_max == RTAS_UNKNOWN_SERVICE) ||
-	    (rtas_error_log_max > RTAS_ERROR_LOG_MAX)) {
-		printk (KERN_WARNING "RTAS: bad log buffer size %d\n",
-			rtas_error_log_max);
-		rtas_error_log_max = RTAS_ERROR_LOG_MAX;
-	}
 	return rtas_error_log_max;
 }
 EXPORT_SYMBOL(rtas_get_error_log_max);
 
+static void __init init_error_log_max(void)
+{
+	static const char propname[] __initconst = "rtas-error-log-max";
+	u32 max;
+
+	if (of_property_read_u32(rtas.dev, propname, &max)) {
+		pr_warn("%s not found, using default of %u\n",
+			propname, RTAS_ERROR_LOG_MAX);
+		max = RTAS_ERROR_LOG_MAX;
+	}
+
+	if (max > RTAS_ERROR_LOG_MAX) {
+		pr_warn("%s = %u, clamping max error log size to %u\n",
+			propname, max, RTAS_ERROR_LOG_MAX);
+		max = RTAS_ERROR_LOG_MAX;
+	}
+
+	rtas_error_log_max = max;
+}
+
 
 static char rtas_err_buf[RTAS_ERROR_LOG_MAX];
 static int rtas_last_error_token;
@@ -432,6 +444,7 @@ static char *__fetch_rtas_last_error(char *altbuf)
 #else /* CONFIG_RTAS_ERROR_LOGGING */
 #define __fetch_rtas_last_error(x)	NULL
 #define get_errorlog_buffer()		NULL
+static void __init init_error_log_max(void) {}
 #endif
 
 
@@ -1340,6 +1353,8 @@ void __init rtas_initialize(void)
 	no_entry = of_property_read_u32(rtas.dev, "linux,rtas-entry", &entry);
 	rtas.entry = no_entry ? rtas.base : entry;
 
+	init_error_log_max();
+
 	/*
 	 * Discover these now to avoid device tree lookups in the
 	 * panic path.
-- 
GitLab


From 9581f8a00777a073fdd8146659a51ca007cae8d6 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:45 -0600
Subject: [PATCH 3469/4122] powerpc/rtas: clean up includes

rtas.c used to host complex code related to pseries-specific guest
migration and suspend, which used atomics, completions, hcalls, and
CPU hotplug APIs. That's all been deleted or moved, so remove the
include directives that have been rendered unnecessary. Sort the
remainder (with linux/ before asm/) to impose some order on where
future additions go.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-8-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtas.c | 42 +++++++++++++++-----------------------
 1 file changed, 16 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index a1b637259e3db..5ce17b4bd7ebb 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,43 +7,33 @@
  * Copyright (C) 2001 IBM.
  */
 
-#include <linux/stdarg.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/export.h>
-#include <linux/init.h>
 #include <linux/capability.h>
 #include <linux/delay.h>
-#include <linux/cpu.h>
-#include <linux/sched.h>
-#include <linux/smp.h>
-#include <linux/completion.h>
-#include <linux/cpumask.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
 #include <linux/memblock.h>
-#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/of_fdt.h>
 #include <linux/reboot.h>
+#include <linux/sched.h>
 #include <linux/security.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/stdarg.h>
 #include <linux/syscalls.h>
-#include <linux/of.h>
-#include <linux/of_fdt.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
 
+#include <asm/delay.h>
+#include <asm/firmware.h>
 #include <asm/interrupt.h>
-#include <asm/rtas.h>
-#include <asm/hvcall.h>
 #include <asm/machdep.h>
-#include <asm/firmware.h>
+#include <asm/mmu.h>
 #include <asm/page.h>
-#include <asm/param.h>
-#include <asm/delay.h>
-#include <linux/uaccess.h>
-#include <asm/udbg.h>
-#include <asm/syscalls.h>
-#include <asm/smp.h>
-#include <linux/atomic.h>
+#include <asm/rtas.h>
 #include <asm/time.h>
-#include <asm/mmu.h>
-#include <asm/topology.h>
+#include <asm/udbg.h>
 
 /* This is here deliberately so it's only used in this file */
 void enter_rtas(unsigned long);
-- 
GitLab


From f975b6559bac510f1b1b39637997bb240f0a9969 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:46 -0600
Subject: [PATCH 3470/4122] powerpc/rtas: define pr_fmt and convert printk call
 sites

Set pr_fmt to "rtas: " and convert the handful of printk() uses in
rtas.c, adjusting the messages to remove now-redundant "RTAS"
strings.

Note that rtas_restart(), rtas_power_off(), and rtas_halt() all
currently use printk() without specifying a log level. These have been
changed to use pr_emerg(), which matches the behavior of
rtas_os_term().

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-9-nathanl@linux.ibm.com
---
 arch/powerpc/kernel/rtas.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 5ce17b4bd7ebb..10c19228aaa37 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -7,6 +7,8 @@
  * Copyright (C) 2001 IBM.
  */
 
+#define pr_fmt(fmt)	"rtas: " fmt
+
 #include <linux/capability.h>
 #include <linux/delay.h>
 #include <linux/export.h>
@@ -718,8 +720,7 @@ static int rtas_error_rc(int rtas_rc)
 			rc = -ENODEV;
 			break;
 		default:
-			printk(KERN_ERR "%s: unexpected RTAS error %d\n",
-					__func__, rtas_rc);
+			pr_err("%s: unexpected error %d\n", __func__, rtas_rc);
 			rc = -ERANGE;
 			break;
 	}
@@ -923,8 +924,8 @@ void __noreturn rtas_restart(char *cmd)
 {
 	if (rtas_flash_term_hook)
 		rtas_flash_term_hook(SYS_RESTART);
-	printk("RTAS system-reboot returned %d\n",
-	       rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
+	pr_emerg("system-reboot returned %d\n",
+		 rtas_call(rtas_token("system-reboot"), 0, 1, NULL));
 	for (;;);
 }
 
@@ -933,8 +934,8 @@ void rtas_power_off(void)
 	if (rtas_flash_term_hook)
 		rtas_flash_term_hook(SYS_POWER_OFF);
 	/* allow power on only with power button press */
-	printk("RTAS power-off returned %d\n",
-	       rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+	pr_emerg("power-off returned %d\n",
+		 rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
 	for (;;);
 }
 
@@ -943,8 +944,8 @@ void __noreturn rtas_halt(void)
 	if (rtas_flash_term_hook)
 		rtas_flash_term_hook(SYS_HALT);
 	/* allow power on only with power button press */
-	printk("RTAS power-off returned %d\n",
-	       rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
+	pr_emerg("power-off returned %d\n",
+		 rtas_call(rtas_token("power-off"), 2, 1, NULL, -1, -1));
 	for (;;);
 }
 
@@ -978,7 +979,7 @@ void rtas_os_term(char *str)
 	} while (rtas_busy_delay_time(status));
 
 	if (status != 0)
-		printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
+		pr_emerg("ibm,os-term call failed %d\n", status);
 }
 
 /**
-- 
GitLab


From 98c738c8cee6e5a58d4060862e2f8cf3cdc8a328 Mon Sep 17 00:00:00 2001
From: Nathan Lynch <nathanl@linux.ibm.com>
Date: Fri, 18 Nov 2022 09:07:47 -0600
Subject: [PATCH 3471/4122] powerpc/rtas: mandate RTAS syscall filtering

CONFIG_PPC_RTAS_FILTER has been optional but default-enabled since its
introduction. It's been enabled in enterprise distro kernels for a
while without causing ABI breakage that wasn't easily fixed, and it
prevents harmful abuses of the rtas syscall.

Let's make it unconditional.

Signed-off-by: Nathan Lynch <nathanl@linux.ibm.com>
Reviewed-by: Andrew Donnellan <ajd@linux.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221118150751.469393-10-nathanl@linux.ibm.com
---
 arch/powerpc/Kconfig       | 13 -------------
 arch/powerpc/kernel/rtas.c | 16 ----------------
 2 files changed, 29 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index e21d6de797d6e..65952f62ea4be 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1044,19 +1044,6 @@ config PPC_SECVAR_SYSFS
 	  read/write operations on these variables. Say Y if you have
 	  secure boot enabled and want to expose variables to userspace.
 
-config PPC_RTAS_FILTER
-	bool "Enable filtering of RTAS syscalls"
-	default y
-	depends on PPC_RTAS
-	help
-	  The RTAS syscall API has security issues that could be used to
-	  compromise system integrity. This option enforces restrictions on the
-	  RTAS calls and arguments passed by userspace programs to mitigate
-	  these issues.
-
-	  Say Y unless you know what you are doing and the filter is causing
-	  problems for you.
-
 endmenu
 
 config ISA_DMA_API
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 10c19228aaa37..deded51a79784 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1050,8 +1050,6 @@ noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log
 	return NULL;
 }
 
-#ifdef CONFIG_PPC_RTAS_FILTER
-
 /*
  * The sys_rtas syscall, as originally designed, allows root to pass
  * arbitrary physical addresses to RTAS calls. A number of RTAS calls
@@ -1200,20 +1198,6 @@ static void __init rtas_syscall_filter_init(void)
 		rtas_filters[i].token = rtas_token(rtas_filters[i].name);
 }
 
-#else
-
-static bool block_rtas_call(int token, int nargs,
-			    struct rtas_args *args)
-{
-	return false;
-}
-
-static void __init rtas_syscall_filter_init(void)
-{
-}
-
-#endif /* CONFIG_PPC_RTAS_FILTER */
-
 /* We assume to be passed big endian arguments */
 SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
 {
-- 
GitLab


From 7ccb966779645636679a723588b7bae4f0a8d7d5 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 14 Nov 2022 10:42:25 -0800
Subject: [PATCH 3472/4122] PCI: aardvark: Switch to using
 devm_gpiod_get_optional()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switch the driver to the generic version of gpiod API (and away from
OF-specific variant), so that we can stop exporting
devm_gpiod_get_from_of_node().

Link: https://lore.kernel.org/r/Y3KMEZFv6dpxA+Gv@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Pali Rohár <pali@kernel.org>
---
 drivers/pci/controller/pci-aardvark.c | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/controller/pci-aardvark.c b/drivers/pci/controller/pci-aardvark.c
index ba36bbc5897d0..513d8edf3a5cf 100644
--- a/drivers/pci/controller/pci-aardvark.c
+++ b/drivers/pci/controller/pci-aardvark.c
@@ -1859,20 +1859,18 @@ static int advk_pcie_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	pcie->reset_gpio = devm_gpiod_get_from_of_node(dev, dev->of_node,
-						       "reset-gpios", 0,
-						       GPIOD_OUT_LOW,
-						       "pcie1-reset");
+	pcie->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
 	ret = PTR_ERR_OR_ZERO(pcie->reset_gpio);
 	if (ret) {
-		if (ret == -ENOENT) {
-			pcie->reset_gpio = NULL;
-		} else {
-			if (ret != -EPROBE_DEFER)
-				dev_err(dev, "Failed to get reset-gpio: %i\n",
-					ret);
-			return ret;
-		}
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get reset-gpio: %i\n", ret);
+		return ret;
+	}
+
+	ret = gpiod_set_consumer_name(pcie->reset_gpio, "pcie1-reset");
+	if (ret) {
+		dev_err(dev, "Failed to set reset gpio name: %d\n", ret);
+		return ret;
 	}
 
 	ret = of_pci_get_max_link_speed(dev->of_node);
-- 
GitLab


From 6d4671b534f6c084e92ef167a52dc47e55f636c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Pali=20Roh=C3=A1r?= <pali@kernel.org>
Date: Tue, 27 Sep 2022 16:19:17 +0200
Subject: [PATCH 3473/4122] PCI: pciehp: Enable Command Completed Interrupt
 only if supported
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The No Command Completed Support bit in the Slot Capabilities register
indicates whether Command Completed Interrupt Enable is unsupported.

We already check whether No Command Completed Support bit is set in
pcie_wait_cmd(), and do not wait in this case.

Don't enable this Command Completed Interrupt at all if NCCS is set, so
that when users dump configuration space from userspace, the dump does not
confuse them by saying that Command Completed Interrupt is not supported,
but it is enabled.

Link: https://lore.kernel.org/r/20220927141926.8895-2-kabel@kernel.org
Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Marek Behún <kabel@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Lukas Wunner <lukas@wunner.de>
---
 drivers/pci/hotplug/pciehp_hpc.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index 040ae076ec0e9..10e9670eea0b0 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -811,7 +811,9 @@ static void pcie_enable_notification(struct controller *ctrl)
 	else
 		cmd |= PCI_EXP_SLTCTL_PDCE;
 	if (!pciehp_poll_mode)
-		cmd |= PCI_EXP_SLTCTL_HPIE | PCI_EXP_SLTCTL_CCIE;
+		cmd |= PCI_EXP_SLTCTL_HPIE;
+	if (!pciehp_poll_mode && !NO_CMD_CMPL(ctrl))
+		cmd |= PCI_EXP_SLTCTL_CCIE;
 
 	mask = (PCI_EXP_SLTCTL_PDCE | PCI_EXP_SLTCTL_ABPE |
 		PCI_EXP_SLTCTL_PFDE |
-- 
GitLab


From f18caf261398a7f2de4fa3f600deb87072fe7b8d Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Wed, 7 Dec 2022 15:22:18 +0400
Subject: [PATCH 3474/4122] device property: Fix documentation for
 fwnode_get_next_parent()

Use fwnode_handle_put() on the node pointer to release the refcount.
Change fwnode_handle_node() to fwnode_handle_put().

Fixes: 233872585de1 ("device property: Add fwnode_get_next_parent()")
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Reviewed-by: Daniel Scally <djrscally@gmail.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://lore.kernel.org/r/20221207112219.2652411-1-linmq006@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/property.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/property.c b/drivers/base/property.c
index ed74083c179d8..bbb3e499ff4a5 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -609,7 +609,7 @@ EXPORT_SYMBOL_GPL(fwnode_get_parent);
  * node's parents.
  *
  * Returns a node pointer with refcount incremented, use
- * fwnode_handle_node() on it when done.
+ * fwnode_handle_put() on it when done.
  */
 struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode)
 {
-- 
GitLab


From 447242e1292e8aa22e1371110867b9c543bf0373 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee@kernel.org>
Date: Tue, 6 Dec 2022 10:45:10 +0000
Subject: [PATCH 3475/4122] MAINTAINERS: Add additional co-maintainer to LEDs

Add myself as co-maintainer for the LED subsystem,
in support of Pavel during busy times.

Suggested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Lee Jones <lee@kernel.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..e12755e75c820 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11555,6 +11555,7 @@ F:	scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
 M:	Pavel Machek <pavel@ucw.cz>
+M:	Lee Jones <lee@kernel.org>
 L:	linux-leds@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
-- 
GitLab


From 135780f1048b3f956f5b10bb23dec9c2d2c4ef6d Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca@z3ntu.xyz>
Date: Tue, 29 Nov 2022 22:29:01 +0100
Subject: [PATCH 3476/4122] leds: is31fl319x: Fix setting current limit for
 is31fl319{0,1,3}

The current setting lives in bits 4:2 (as also defined by the mask) but
the current limit defines in the driver use bits 2:0 which should be
shifted over so they don't get masked out completely (except for 17.5mA
which became 10mA).

Now checking /sys/kernel/debug/regmap/1-0068/registers shows that the
current limit is applied correctly and doesn't take the default b000 =
42mA.

Fixes: fa877cf1abb9 ("leds: is31fl319x: Add support for is31fl319{0,1,3} chips")
Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
Reviewed-by: Vincent Knecht <vincent.knecht@mailoo.org>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-is31fl319x.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/leds/leds-is31fl319x.c b/drivers/leds/leds-is31fl319x.c
index 52b59b62f437c..b2f4c4ec7c567 100644
--- a/drivers/leds/leds-is31fl319x.c
+++ b/drivers/leds/leds-is31fl319x.c
@@ -38,6 +38,7 @@
 #define IS31FL3190_CURRENT_uA_MIN	5000
 #define IS31FL3190_CURRENT_uA_DEFAULT	42000
 #define IS31FL3190_CURRENT_uA_MAX	42000
+#define IS31FL3190_CURRENT_SHIFT	2
 #define IS31FL3190_CURRENT_MASK		GENMASK(4, 2)
 #define IS31FL3190_CURRENT_5_mA		0x02
 #define IS31FL3190_CURRENT_10_mA	0x01
@@ -553,7 +554,7 @@ static int is31fl319x_probe(struct i2c_client *client)
 			     is31fl3196_db_to_gain(is31->audio_gain_db));
 	else
 		regmap_update_bits(is31->regmap, IS31FL3190_CURRENT, IS31FL3190_CURRENT_MASK,
-				   is31fl3190_microamp_to_cs(dev, aggregated_led_microamp));
+				   is31fl3190_microamp_to_cs(dev, aggregated_led_microamp) << IS31FL3190_CURRENT_SHIFT);
 
 	for (i = 0; i < is31->cdef->num_leds; i++) {
 		struct is31fl319x_led *led = &is31->leds[i];
-- 
GitLab


From 3256412fc57b6cabbc1cf1317d020e42f6d7aeab Mon Sep 17 00:00:00 2001
From: Weilong Chen <chenweilong@huawei.com>
Date: Tue, 8 Nov 2022 09:58:11 +0800
Subject: [PATCH 3477/4122] i2c: hisi: Add support to get clock frequency from
 clock

The clk_rate attribute is not generic device tree bindings for I2C
busses described in Documentation/devicetree/bindings/i2c/i2c.txt.
It can be managed by clock binding.

Support the driver to obtain clock information by clk_rate or
clock property. Find clock first, if not, fall back to clk_rate.

Signed-off-by: Weilong Chen <chenweilong@huawei.com>
Acked-by: Yicong Yang <yangyicong@hisilicon.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-hisi.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/i2c/busses/i2c-hisi.c b/drivers/i2c/busses/i2c-hisi.c
index bcc97e4fcb654..8c6c7075c765c 100644
--- a/drivers/i2c/busses/i2c-hisi.c
+++ b/drivers/i2c/busses/i2c-hisi.c
@@ -7,6 +7,7 @@
 
 #include <linux/bits.h>
 #include <linux/bitfield.h>
+#include <linux/clk.h>
 #include <linux/completion.h>
 #include <linux/i2c.h>
 #include <linux/interrupt.h>
@@ -88,6 +89,7 @@ struct hisi_i2c_controller {
 	struct i2c_adapter adapter;
 	void __iomem *iobase;
 	struct device *dev;
+	struct clk *clk;
 	int irq;
 
 	/* Intermediates for recording the transfer process */
@@ -454,10 +456,15 @@ static int hisi_i2c_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz);
-	if (ret) {
-		dev_err(dev, "failed to get clock frequency, ret = %d\n", ret);
-		return ret;
+	ctlr->clk = devm_clk_get_optional_enabled(&pdev->dev, NULL);
+	if (IS_ERR_OR_NULL(ctlr->clk)) {
+		ret = device_property_read_u64(dev, "clk_rate", &clk_rate_hz);
+		if (ret) {
+			dev_err(dev, "failed to get clock frequency, ret = %d\n", ret);
+			return ret;
+		}
+	} else {
+		clk_rate_hz = clk_get_rate(ctlr->clk);
 	}
 
 	ctlr->clk_rate_khz = DIV_ROUND_UP_ULL(clk_rate_hz, HZ_PER_KHZ);
-- 
GitLab


From 810199f7315604bd969409109f1c96b4ebe772ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Wed, 19 Oct 2022 22:28:08 +0200
Subject: [PATCH 3478/4122] i2c: xiic: Make sure to disable clock on .remove()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

If for whatever reasons pm_runtime_resume_and_get() failed, .remove() is
exited early, the clock isn't freed and runtime PM state isn't reset.

The right thing to do however is to free all resources that don't need
HW access after a problem with runtime PM. Also issue a warning in that
case and return 0 to suppress a less helpful warning by the driver core.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Acked-by: Michal Simek <michal.simek@amd.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-xiic.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/busses/i2c-xiic.c b/drivers/i2c/busses/i2c-xiic.c
index 277a02455cddd..bee5a2ef1f229 100644
--- a/drivers/i2c/busses/i2c-xiic.c
+++ b/drivers/i2c/busses/i2c-xiic.c
@@ -858,11 +858,14 @@ static int xiic_i2c_remove(struct platform_device *pdev)
 	/* remove adapter & data */
 	i2c_del_adapter(&i2c->adap);
 
-	ret = pm_runtime_resume_and_get(i2c->dev);
+	ret = pm_runtime_get_sync(i2c->dev);
+
 	if (ret < 0)
-		return ret;
+		dev_warn(&pdev->dev, "Failed to activate device for removal (%pe)\n",
+			 ERR_PTR(ret));
+	else
+		xiic_deinit(i2c);
 
-	xiic_deinit(i2c);
 	pm_runtime_put_sync(i2c->dev);
 	clk_disable_unprepare(i2c->clk);
 	pm_runtime_disable(&pdev->dev);
-- 
GitLab


From 3f6fb1cfaf30d0d701d877ffacfd88dd6bcc5841 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Thu, 1 Dec 2022 16:11:24 +0800
Subject: [PATCH 3479/4122] leds: use sysfs_emit() to instead of scnprintf()

Replace the open-code with sysfs_emit() to simplify the code.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 drivers/leds/leds-blinkm.c             |  8 ++++----
 drivers/leds/leds-lm3533.c             | 12 ++++++------
 drivers/leds/leds-lp5521.c             |  2 +-
 drivers/leds/leds-lp55xx-common.c      |  4 ++--
 drivers/leds/trigger/ledtrig-pattern.c |  2 +-
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/leds/leds-blinkm.c b/drivers/leds/leds-blinkm.c
index 3fb6a2fdaefa6..e19cc8a7b7cac 100644
--- a/drivers/leds/leds-blinkm.c
+++ b/drivers/leds/leds-blinkm.c
@@ -139,11 +139,11 @@ static ssize_t show_color_common(struct device *dev, char *buf, int color)
 		return ret;
 	switch (color) {
 	case RED:
-		return scnprintf(buf, PAGE_SIZE, "%02X\n", data->red);
+		return sysfs_emit(buf, "%02X\n", data->red);
 	case GREEN:
-		return scnprintf(buf, PAGE_SIZE, "%02X\n", data->green);
+		return sysfs_emit(buf, "%02X\n", data->green);
 	case BLUE:
-		return scnprintf(buf, PAGE_SIZE, "%02X\n", data->blue);
+		return sysfs_emit(buf, "%02X\n", data->blue);
 	default:
 		return -EINVAL;
 	}
@@ -253,7 +253,7 @@ static DEVICE_ATTR_RW(blue);
 static ssize_t test_show(struct device *dev, struct device_attribute *attr,
 			 char *buf)
 {
-	return scnprintf(buf, PAGE_SIZE,
+	return sysfs_emit(buf,
 			 "#Write into test to start test sequence!#\n");
 }
 
diff --git a/drivers/leds/leds-lm3533.c b/drivers/leds/leds-lm3533.c
index 43d5970d96aa2..bcd414eb47246 100644
--- a/drivers/leds/leds-lm3533.c
+++ b/drivers/leds/leds-lm3533.c
@@ -314,7 +314,7 @@ static ssize_t show_id(struct device *dev,
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	struct lm3533_led *led = to_lm3533_led(led_cdev);
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", led->id);
+	return sysfs_emit(buf, "%d\n", led->id);
 }
 
 /*
@@ -344,7 +344,7 @@ static ssize_t show_risefalltime(struct device *dev,
 	if (ret)
 		return ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%x\n", val);
+	return sysfs_emit(buf, "%x\n", val);
 }
 
 static ssize_t show_risetime(struct device *dev,
@@ -415,7 +415,7 @@ static ssize_t show_als_channel(struct device *dev,
 
 	channel = (val & LM3533_REG_CTRLBANK_BCONF_ALS_CHANNEL_MASK) + 1;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", channel);
+	return sysfs_emit(buf, "%u\n", channel);
 }
 
 static ssize_t store_als_channel(struct device *dev,
@@ -465,7 +465,7 @@ static ssize_t show_als_en(struct device *dev,
 
 	enable = val & LM3533_REG_CTRLBANK_BCONF_ALS_EN_MASK;
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", enable);
+	return sysfs_emit(buf, "%d\n", enable);
 }
 
 static ssize_t store_als_en(struct device *dev,
@@ -518,7 +518,7 @@ static ssize_t show_linear(struct device *dev,
 	else
 		linear = 0;
 
-	return scnprintf(buf, PAGE_SIZE, "%x\n", linear);
+	return sysfs_emit(buf, "%x\n", linear);
 }
 
 static ssize_t store_linear(struct device *dev,
@@ -564,7 +564,7 @@ static ssize_t show_pwm(struct device *dev,
 	if (ret)
 		return ret;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
+	return sysfs_emit(buf, "%u\n", val);
 }
 
 static ssize_t store_pwm(struct device *dev,
diff --git a/drivers/leds/leds-lp5521.c b/drivers/leds/leds-lp5521.c
index 7ff20c2605041..19478d9c19a70 100644
--- a/drivers/leds/leds-lp5521.c
+++ b/drivers/leds/leds-lp5521.c
@@ -469,7 +469,7 @@ static ssize_t lp5521_selftest(struct device *dev,
 	ret = lp5521_run_selftest(chip, buf);
 	mutex_unlock(&chip->lock);
 
-	return scnprintf(buf, PAGE_SIZE, "%s\n", ret ? "FAIL" : "OK");
+	return sysfs_emit(buf, "%s\n", ret ? "FAIL" : "OK");
 }
 
 /* device attributes */
diff --git a/drivers/leds/leds-lp55xx-common.c b/drivers/leds/leds-lp55xx-common.c
index ca2e28fb843f1..c1940964067af 100644
--- a/drivers/leds/leds-lp55xx-common.c
+++ b/drivers/leds/leds-lp55xx-common.c
@@ -88,7 +88,7 @@ static ssize_t led_current_show(struct device *dev,
 {
 	struct lp55xx_led *led = dev_to_lp55xx_led(dev);
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", led->led_current);
+	return sysfs_emit(buf, "%d\n", led->led_current);
 }
 
 static ssize_t led_current_store(struct device *dev,
@@ -121,7 +121,7 @@ static ssize_t max_current_show(struct device *dev,
 {
 	struct lp55xx_led *led = dev_to_lp55xx_led(dev);
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", led->max_current);
+	return sysfs_emit(buf, "%d\n", led->max_current);
 }
 
 static DEVICE_ATTR_RW(led_current);
diff --git a/drivers/leds/trigger/ledtrig-pattern.c b/drivers/leds/trigger/ledtrig-pattern.c
index 43a265dc4696e..885ca63f383fb 100644
--- a/drivers/leds/trigger/ledtrig-pattern.c
+++ b/drivers/leds/trigger/ledtrig-pattern.c
@@ -155,7 +155,7 @@ static ssize_t repeat_show(struct device *dev, struct device_attribute *attr,
 
 	mutex_unlock(&data->lock);
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", repeat);
+	return sysfs_emit(buf, "%d\n", repeat);
 }
 
 static ssize_t repeat_store(struct device *dev, struct device_attribute *attr,
-- 
GitLab


From 6afd8bd5db7c5f734f24a2df8c1093df9f2ec38f Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Thu, 1 Dec 2022 14:15:05 +0100
Subject: [PATCH 3480/4122] leds: qcom,pm8058-led: Convert to DT schema

Convert the Qualcomm PM8058 PMIC LED bindings to DT schema.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 .../devicetree/bindings/leds/leds-pm8058.txt  | 67 -------------------
 .../bindings/leds/qcom,pm8058-led.yaml        | 57 ++++++++++++++++
 .../devicetree/bindings/mfd/qcom-pm8xxx.yaml  |  4 ++
 3 files changed, 61 insertions(+), 67 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/leds/leds-pm8058.txt
 create mode 100644 Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml

diff --git a/Documentation/devicetree/bindings/leds/leds-pm8058.txt b/Documentation/devicetree/bindings/leds/leds-pm8058.txt
deleted file mode 100644
index 89584c49aab2b..0000000000000
--- a/Documentation/devicetree/bindings/leds/leds-pm8058.txt
+++ /dev/null
@@ -1,67 +0,0 @@
-Qualcomm PM8058 LED driver
-
-The Qualcomm PM8058 is a multi-functional device which contains
-an LED driver block for up to six LEDs: three normal LEDs, two
-"flash" LEDs and one "keypad backlight" LED. The names are
-quoted because sometimes these LED drivers are used for wildly
-different things than flash or keypad backlight: their names
-are more of a suggestion than a hard-wired usecase.
-
-Hardware-wise the different LEDs support slightly different
-output currents. The "flash" LEDs do not need to charge nor
-do they support external triggers. They are just powerful LED
-drivers.
-
-The LEDs appear as children to the PM8058 device, with the
-proper compatible string. For the PM8058 bindings see:
-mfd/qcom-pm8xxx.txt.
-
-Each LED is represented as a sub-node of the syscon device. Each
-node's name represents the name of the corresponding LED.
-
-LED sub-node properties:
-
-Required properties:
-- compatible: one of
-  "qcom,pm8058-led" (for the normal LEDs at 0x131, 0x132 and 0x133)
-  "qcom,pm8058-keypad-led" (for the "keypad" LED at 0x48)
-  "qcom,pm8058-flash-led" (for the "flash" LEDs at 0x49 and 0xFB)
-
-Optional properties:
-- label: see Documentation/devicetree/bindings/leds/common.txt
-- default-state: see Documentation/devicetree/bindings/leds/common.txt
-- linux,default-trigger: see Documentation/devicetree/bindings/leds/common.txt
-
-Example:
-
-qcom,ssbi@500000 {
-	pmicintc: pmic@0 {
-		compatible = "qcom,pm8058";
-		led@48 {
-			compatible = "qcom,pm8058-keypad-led";
-			reg = <0x48>;
-			label = "pm8050:white:keypad";
-			default-state = "off";
-		};
-		led@131 {
-			compatible = "qcom,pm8058-led";
-			reg = <0x131>;
-			label = "pm8058:red";
-			default-state = "off";
-		};
-		led@132 {
-			compatible = "qcom,pm8058-led";
-			reg = <0x132>;
-			label = "pm8058:yellow";
-			default-state = "off";
-			linux,default-trigger = "mmc0";
-		};
-		led@133 {
-			compatible = "qcom,pm8058-led";
-			reg = <0x133>;
-			label = "pm8058:green";
-			default-state = "on";
-			linux,default-trigger = "heartbeat";
-		};
-	};
-};
diff --git a/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml b/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml
new file mode 100644
index 0000000000000..fa03e73622d48
--- /dev/null
+++ b/Documentation/devicetree/bindings/leds/qcom,pm8058-led.yaml
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/qcom,pm8058-led.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PM8058 PMIC LED
+
+maintainers:
+  - Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
+
+description: |
+  The Qualcomm PM8058 contains an LED block for up to six LEDs:: three normal
+  LEDs, two "flash" LEDs and one "keypad backlight" LED. The names are quoted
+  because sometimes these LED drivers are used for wildly different things than
+  flash or keypad backlight:: their names are more of a suggestion than a
+  hard-wired usecase.
+
+  Hardware-wise the different LEDs support slightly different output currents.
+  The "flash" LEDs do not need to charge nor do they support external triggers.
+  They are just powerful LED drivers.
+
+allOf:
+  - $ref: common.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,pm8058-led
+      - qcom,pm8058-keypad-led
+      - qcom,pm8058-flash-led
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/leds/common.h>
+
+    pmic {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led@131 {
+            compatible = "qcom,pm8058-led";
+            reg = <0x131>;
+            label = "pm8058:red";
+            color = <LED_COLOR_ID_RED>;
+            default-state = "off";
+        };
+    };
diff --git a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml
index 61bd0b3ce02f8..bd6e4aecfe2b1 100644
--- a/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml
+++ b/Documentation/devicetree/bindings/mfd/qcom-pm8xxx.yaml
@@ -39,6 +39,10 @@ properties:
   interrupt-controller: true
 
 patternProperties:
+  "led@[0-9a-f]+$":
+    type: object
+    $ref: /schemas/leds/qcom,pm8058-led.yaml#
+
   "rtc@[0-9a-f]+$":
     type: object
     $ref: "../rtc/qcom-pm8xxx-rtc.yaml"
-- 
GitLab


From 7cb092a0336c5770656c6742e7a7ce3042c8c44e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 30 Aug 2022 11:36:25 +0300
Subject: [PATCH 3481/4122] leds: MAINTAINERS: include dt-bindings headers

Include the Devicetree binding headers in LED SUBSYSTEM entry.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexander Dahl <ada@thorsis.com>
Signed-off-by: Pavel Machek <pavel@ucw.cz>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index e12755e75c820..ce0f246d0544a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11561,6 +11561,7 @@ S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
 F:	Documentation/devicetree/bindings/leds/
 F:	drivers/leds/
+F:	include/dt-bindings/leds/
 F:	include/linux/leds.h
 
 LEGACY EEPROM DRIVER
-- 
GitLab


From b3525072835b523b397d459fadd0785d9c24bbd1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 24 Oct 2022 14:29:39 +0100
Subject: [PATCH 3482/4122] orangefs: remove variable i

Variable i is just being incremented and it's never used
anywhere else. The variable and the increment are redundant so
remove it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/inode.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 7a8c0c6e698de..eaa35a9661151 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -530,7 +530,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	ssize_t total_count = 0;
 	ssize_t ret = -EINVAL;
-	int i = 0;
 
 	gossip_debug(GOSSIP_FILE_DEBUG,
 		"%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
@@ -556,7 +555,6 @@ static ssize_t orangefs_direct_IO(struct kiocb *iocb,
 	while (iov_iter_count(iter)) {
 		size_t each_count = iov_iter_count(iter);
 		size_t amt_complete;
-		i++;
 
 		/* how much to transfer in this loop iteration */
 		if (each_count > orangefs_bufmap_size_query())
-- 
GitLab


From 610defdccff7b955fe899a825990c2202153a22e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Mon, 17 Oct 2022 22:49:37 +0100
Subject: [PATCH 3483/4122] orangefs: remove redundant assignment to variable
 buffer_index

The variable buffer_index is assigned a value that is never read,
it is assigned just before the function returns. The assignment is
redundant and can be removed.

Cleans up clang scan build warning:
fs/orangefs/file.c:276:3: warning: Value stored to 'buffer_index'
is never read [deadcode.DeadStores]

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/file.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 732661aa26804..167fa43b24f9a 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -273,7 +273,6 @@ out:
 		gossip_debug(GOSSIP_FILE_DEBUG,
 			"%s(%pU): PUT buffer_index %d\n",
 			__func__, handle, buffer_index);
-		buffer_index = -1;
 	}
 	op_release(new_op);
 	return ret;
-- 
GitLab


From ea60a4ad0cf88b411cde6888b8c890935686ecd7 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 18 Oct 2022 12:40:04 +0800
Subject: [PATCH 3484/4122] orangefs: Fix sysfs not cleanup when dev init
 failed

When the dev init failed, should cleanup the sysfs, otherwise, the
module will never be loaded since can not create duplicate sysfs
directory:

  sysfs: cannot create duplicate filename '/fs/orangefs'

  CPU: 1 PID: 6549 Comm: insmod Tainted: G        W          6.0.0+ #44
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
  Call Trace:
   <TASK>
   dump_stack_lvl+0x34/0x44
   sysfs_warn_dup.cold+0x17/0x24
   sysfs_create_dir_ns+0x16d/0x180
   kobject_add_internal+0x156/0x3a0
   kobject_init_and_add+0xcf/0x120
   orangefs_sysfs_init+0x7e/0x3a0 [orangefs]
   orangefs_init+0xfe/0x1000 [orangefs]
   do_one_initcall+0x87/0x2a0
   do_init_module+0xdf/0x320
   load_module+0x2f98/0x3330
   __do_sys_finit_module+0x113/0x1b0
   do_syscall_64+0x35/0x80
   entry_SYSCALL_64_after_hwframe+0x46/0xb0

  kobject_add_internal failed for orangefs with -EEXIST, don't try to register things with the same name in the same directory.

Fixes: 2f83ace37181 ("orangefs: put register_chrdev immediately before register_filesystem")
Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/orangefs-mod.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index cd7297815f91e..5ab741c60b7e2 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -141,7 +141,7 @@ static int __init orangefs_init(void)
 		gossip_err("%s: could not initialize device subsystem %d!\n",
 			   __func__,
 			   ret);
-		goto cleanup_device;
+		goto cleanup_sysfs;
 	}
 
 	ret = register_filesystem(&orangefs_fs_type);
@@ -152,11 +152,11 @@ static int __init orangefs_init(void)
 		goto out;
 	}
 
-	orangefs_sysfs_exit();
-
-cleanup_device:
 	orangefs_dev_cleanup();
 
+cleanup_sysfs:
+	orangefs_sysfs_exit();
+
 sysfs_init_failed:
 	orangefs_debugfs_cleanup();
 
-- 
GitLab


From d23417a5bf3a3afc55de5442eb46e1e60458b0a1 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 18 Oct 2022 12:40:05 +0800
Subject: [PATCH 3485/4122] orangefs: Fix kmemleak in
 orangefs_prepare_debugfs_help_string()

When insert and remove the orangefs module, then debug_help_string will
be leaked:

  unreferenced object 0xffff8881652ba000 (size 4096):
    comm "insmod", pid 1701, jiffies 4294893639 (age 13218.530s)
    hex dump (first 32 bytes):
      43 6c 69 65 6e 74 20 44 65 62 75 67 20 4b 65 79  Client Debug Key
      77 6f 72 64 73 20 61 72 65 20 75 6e 6b 6e 6f 77  words are unknow
    backtrace:
      [<0000000004e6f8e3>] kmalloc_trace+0x27/0xa0
      [<0000000006f75d85>] orangefs_prepare_debugfs_help_string+0x5e/0x480 [orangefs]
      [<0000000091270a2a>] _sub_I_65535_1+0x57/0xf70 [crc_itu_t]
      [<000000004b1ee1a3>] do_one_initcall+0x87/0x2a0
      [<000000001d0614ae>] do_init_module+0xdf/0x320
      [<00000000efef068c>] load_module+0x2f98/0x3330
      [<000000006533b44d>] __do_sys_finit_module+0x113/0x1b0
      [<00000000a0da6f99>] do_syscall_64+0x35/0x80
      [<000000007790b19b>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

When remove the module, should always free debug_help_string. Should
always free the allocated buffer when change the free_debug_help_string.

Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/orangefs-debugfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index 29eaa45443727..a848b6ef95990 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -222,6 +222,8 @@ out:
 void orangefs_debugfs_cleanup(void)
 {
 	debugfs_remove_recursive(debug_dir);
+	kfree(debug_help_string);
+	debug_help_string = NULL;
 }
 
 /* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
@@ -671,6 +673,7 @@ int orangefs_prepare_debugfs_help_string(int at_boot)
 		memset(debug_help_string, 0, DEBUG_HELP_STRING_SIZE);
 		strlcat(debug_help_string, new, string_size);
 		mutex_unlock(&orangefs_help_file_lock);
+		kfree(new);
 	}
 
 	rc = 0;
-- 
GitLab


From 1f2c0e8a587bcafad85019a2d80f158d8d41a868 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 18 Oct 2022 12:40:06 +0800
Subject: [PATCH 3486/4122] orangefs: Fix kmemleak in orangefs_sysfs_init()

When insert and remove the orangefs module, there are kobjects memory
leaked as below:

unreferenced object 0xffff88810f95af00 (size 64):
  comm "insmod", pid 783, jiffies 4294813439 (age 65.512s)
  hex dump (first 32 bytes):
    a0 83 af 01 81 88 ff ff 08 af 95 0f 81 88 ff ff  ................
    08 af 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000005a6e4dfe>] orangefs_sysfs_init+0x42/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

unreferenced object 0xffff88810f95ae80 (size 64):
  comm "insmod", pid 783, jiffies 4294813439 (age 65.512s)
  hex dump (first 32 bytes):
    c8 90 0f 02 81 88 ff ff 88 ae 95 0f 81 88 ff ff  ................
    88 ae 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000001a4841fa>] orangefs_sysfs_init+0xc7/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

unreferenced object 0xffff88810f95ae00 (size 64):
  comm "insmod", pid 783, jiffies 4294813440 (age 65.511s)
  hex dump (first 32 bytes):
    60 87 a1 00 81 88 ff ff 08 ae 95 0f 81 88 ff ff  `...............
    08 ae 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000005915e797>] orangefs_sysfs_init+0x12b/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

unreferenced object 0xffff88810f95ad80 (size 64):
  comm "insmod", pid 783, jiffies 4294813440 (age 65.511s)
  hex dump (first 32 bytes):
    78 90 0f 02 81 88 ff ff 88 ad 95 0f 81 88 ff ff  x...............
    88 ad 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000007a14eb35>] orangefs_sysfs_init+0x1ac/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

unreferenced object 0xffff88810f95ac00 (size 64):
  comm "insmod", pid 783, jiffies 4294813440 (age 65.531s)
  hex dump (first 32 bytes):
    e0 ff 67 02 81 88 ff ff 08 ac 95 0f 81 88 ff ff  ..g.............
    08 ac 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000001f38adcb>] orangefs_sysfs_init+0x291/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

unreferenced object 0xffff88810f95ab80 (size 64):
  comm "insmod", pid 783, jiffies 4294813441 (age 65.530s)
  hex dump (first 32 bytes):
    50 bf 2f 02 81 88 ff ff 88 ab 95 0f 81 88 ff ff  P./.............
    88 ab 95 0f 81 88 ff ff 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000009cc7d95b>] orangefs_sysfs_init+0x2f5/0x3a0
    [<00000000722645ca>] 0xffffffffa02780fe
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

Should add release function for each kobject_type to free the memory.

Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/orangefs-sysfs.c | 71 ++++++++++++++++++++++++++++++++----
 1 file changed, 63 insertions(+), 8 deletions(-)

diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
index de80b62553bb1..be4ba03a01a0f 100644
--- a/fs/orangefs/orangefs-sysfs.c
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -896,9 +896,18 @@ static struct attribute *orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(orangefs_default);
 
+static struct kobject *orangefs_obj;
+
+static void orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(orangefs_obj);
+	orangefs_obj = NULL;
+}
+
 static struct kobj_type orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = orangefs_default_groups,
+	.release = orangefs_obj_release,
 };
 
 static struct orangefs_attribute acache_hard_limit_attribute =
@@ -934,9 +943,18 @@ static struct attribute *acache_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(acache_orangefs_default);
 
+static struct kobject *acache_orangefs_obj;
+
+static void acache_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(acache_orangefs_obj);
+	acache_orangefs_obj = NULL;
+}
+
 static struct kobj_type acache_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = acache_orangefs_default_groups,
+	.release = acache_orangefs_obj_release,
 };
 
 static struct orangefs_attribute capcache_hard_limit_attribute =
@@ -972,9 +990,18 @@ static struct attribute *capcache_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(capcache_orangefs_default);
 
+static struct kobject *capcache_orangefs_obj;
+
+static void capcache_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(capcache_orangefs_obj);
+	capcache_orangefs_obj = NULL;
+}
+
 static struct kobj_type capcache_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = capcache_orangefs_default_groups,
+	.release = capcache_orangefs_obj_release,
 };
 
 static struct orangefs_attribute ccache_hard_limit_attribute =
@@ -1010,9 +1037,18 @@ static struct attribute *ccache_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(ccache_orangefs_default);
 
+static struct kobject *ccache_orangefs_obj;
+
+static void ccache_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(ccache_orangefs_obj);
+	ccache_orangefs_obj = NULL;
+}
+
 static struct kobj_type ccache_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = ccache_orangefs_default_groups,
+	.release = ccache_orangefs_obj_release,
 };
 
 static struct orangefs_attribute ncache_hard_limit_attribute =
@@ -1048,9 +1084,18 @@ static struct attribute *ncache_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(ncache_orangefs_default);
 
+static struct kobject *ncache_orangefs_obj;
+
+static void ncache_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(ncache_orangefs_obj);
+	ncache_orangefs_obj = NULL;
+}
+
 static struct kobj_type ncache_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = ncache_orangefs_default_groups,
+	.release = ncache_orangefs_obj_release,
 };
 
 static struct orangefs_attribute pc_acache_attribute =
@@ -1079,9 +1124,18 @@ static struct attribute *pc_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(pc_orangefs_default);
 
+static struct kobject *pc_orangefs_obj;
+
+static void pc_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(pc_orangefs_obj);
+	pc_orangefs_obj = NULL;
+}
+
 static struct kobj_type pc_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = pc_orangefs_default_groups,
+	.release = pc_orangefs_obj_release,
 };
 
 static struct orangefs_attribute stats_reads_attribute =
@@ -1103,19 +1157,20 @@ static struct attribute *stats_orangefs_default_attrs[] = {
 };
 ATTRIBUTE_GROUPS(stats_orangefs_default);
 
+static struct kobject *stats_orangefs_obj;
+
+static void stats_orangefs_obj_release(struct kobject *kobj)
+{
+	kfree(stats_orangefs_obj);
+	stats_orangefs_obj = NULL;
+}
+
 static struct kobj_type stats_orangefs_ktype = {
 	.sysfs_ops = &orangefs_sysfs_ops,
 	.default_groups = stats_orangefs_default_groups,
+	.release = stats_orangefs_obj_release,
 };
 
-static struct kobject *orangefs_obj;
-static struct kobject *acache_orangefs_obj;
-static struct kobject *capcache_orangefs_obj;
-static struct kobject *ccache_orangefs_obj;
-static struct kobject *ncache_orangefs_obj;
-static struct kobject *pc_orangefs_obj;
-static struct kobject *stats_orangefs_obj;
-
 int orangefs_sysfs_init(void)
 {
 	int rc = -EINVAL;
-- 
GitLab


From 31720a2b109b3080eb77e97b8f6f50a27b4ae599 Mon Sep 17 00:00:00 2001
From: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Date: Tue, 18 Oct 2022 12:40:07 +0800
Subject: [PATCH 3487/4122] orangefs: Fix kmemleak in
 orangefs_{kernel,client}_debug_init()

When insert and remove the orangefs module, there are memory leaked
as below:

unreferenced object 0xffff88816b0cc000 (size 2048):
  comm "insmod", pid 783, jiffies 4294813439 (age 65.512s)
  hex dump (first 32 bytes):
    6e 6f 6e 65 0a 00 00 00 00 00 00 00 00 00 00 00  none............
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
  backtrace:
    [<0000000031ab7788>] kmalloc_trace+0x27/0xa0
    [<000000005b405fee>] orangefs_debugfs_init.cold+0xaf/0x17f
    [<00000000e5a0085b>] 0xffffffffa02780f9
    [<000000004232d9f7>] do_one_initcall+0x87/0x2a0
    [<0000000054f22384>] do_init_module+0xdf/0x320
    [<000000003263bdea>] load_module+0x2f98/0x3330
    [<0000000052cd4153>] __do_sys_finit_module+0x113/0x1b0
    [<00000000250ae02b>] do_syscall_64+0x35/0x80
    [<00000000f11c03c7>] entry_SYSCALL_64_after_hwframe+0x46/0xb0

Use the golbal variable as the buffer rather than dynamic allocate to
slove the problem.

Signed-off-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Signed-off-by: Mike Marshall <hubcap@omnibond.com>
---
 fs/orangefs/orangefs-debugfs.c | 26 +++-----------------------
 1 file changed, 3 insertions(+), 23 deletions(-)

diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
index a848b6ef95990..1b508f5433846 100644
--- a/fs/orangefs/orangefs-debugfs.c
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -194,15 +194,10 @@ void orangefs_debugfs_init(int debug_mask)
  */
 static void orangefs_kernel_debug_init(void)
 {
-	int rc = -ENOMEM;
-	char *k_buffer = NULL;
+	static char k_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { };
 
 	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
 
-	k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
-	if (!k_buffer)
-		goto out;
-
 	if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
 		strcpy(k_buffer, kernel_debug_string);
 		strcat(k_buffer, "\n");
@@ -213,9 +208,6 @@ static void orangefs_kernel_debug_init(void)
 
 	debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE, 0444, debug_dir, k_buffer,
 			    &kernel_debug_fops);
-
-out:
-	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
 }
 
 
@@ -299,18 +291,13 @@ static int help_show(struct seq_file *m, void *v)
 /*
  * initialize the client-debug file.
  */
-static int orangefs_client_debug_init(void)
+static void orangefs_client_debug_init(void)
 {
 
-	int rc = -ENOMEM;
-	char *c_buffer = NULL;
+	static char c_buffer[ORANGEFS_MAX_DEBUG_STRING_LEN] = { };
 
 	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
 
-	c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
-	if (!c_buffer)
-		goto out;
-
 	if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
 		strcpy(c_buffer, client_debug_string);
 		strcat(c_buffer, "\n");
@@ -324,13 +311,6 @@ static int orangefs_client_debug_init(void)
 						  debug_dir,
 						  c_buffer,
 						  &kernel_debug_fops);
-
-	rc = 0;
-
-out:
-
-	gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
-	return rc;
 }
 
 /* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
-- 
GitLab


From 2d47b79d2bd39cc6369eccf94a06568d84c906ae Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 29 Oct 2022 17:38:25 +0800
Subject: [PATCH 3488/4122] i2c: mux: reg: check return value after calling
 platform_get_resource()

It will cause null-ptr-deref in resource_size(), if platform_get_resource()
returns NULL, move calling resource_size() after devm_ioremap_resource() that
will check 'res' to avoid null-ptr-deref.
And use devm_platform_get_and_ioremap_resource() to simplify code.

Fixes: b3fdd32799d8 ("i2c: mux: Add register-based mux i2c-mux-reg")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/muxes/i2c-mux-reg.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/i2c/muxes/i2c-mux-reg.c b/drivers/i2c/muxes/i2c-mux-reg.c
index 0e0679f65cf77..30a6de1694e07 100644
--- a/drivers/i2c/muxes/i2c-mux-reg.c
+++ b/drivers/i2c/muxes/i2c-mux-reg.c
@@ -183,13 +183,12 @@ static int i2c_mux_reg_probe(struct platform_device *pdev)
 	if (!mux->data.reg) {
 		dev_info(&pdev->dev,
 			"Register not set, using platform resource\n");
-		res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-		mux->data.reg_size = resource_size(res);
-		mux->data.reg = devm_ioremap_resource(&pdev->dev, res);
+		mux->data.reg = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 		if (IS_ERR(mux->data.reg)) {
 			ret = PTR_ERR(mux->data.reg);
 			goto err_put_parent;
 		}
+		mux->data.reg_size = resource_size(res);
 	}
 
 	if (mux->data.reg_size != 4 && mux->data.reg_size != 2 &&
-- 
GitLab


From 39244cc754829bf707dccd12e2ce37510f5b1f8d Mon Sep 17 00:00:00 2001
From: Zheyu Ma <zheyuma97@gmail.com>
Date: Fri, 29 Jul 2022 19:02:16 +0800
Subject: [PATCH 3489/4122] i2c: ismt: Fix an out-of-bounds bug in
 ismt_access()

When the driver does not check the data from the user, the variable
'data->block[0]' may be very large to cause an out-of-bounds bug.

The following log can reveal it:

[   33.995542] i2c i2c-1: ioctl, cmd=0x720, arg=0x7ffcb3dc3a20
[   33.995978] ismt_smbus 0000:00:05.0: I2C_SMBUS_BLOCK_DATA:  WRITE
[   33.996475] ==================================================================
[   33.996995] BUG: KASAN: out-of-bounds in ismt_access.cold+0x374/0x214b
[   33.997473] Read of size 18446744073709551615 at addr ffff88810efcfdb1 by task ismt_poc/485
[   33.999450] Call Trace:
[   34.001849]  memcpy+0x20/0x60
[   34.002077]  ismt_access.cold+0x374/0x214b
[   34.003382]  __i2c_smbus_xfer+0x44f/0xfb0
[   34.004007]  i2c_smbus_xfer+0x10a/0x390
[   34.004291]  i2cdev_ioctl_smbus+0x2c8/0x710
[   34.005196]  i2cdev_ioctl+0x5ec/0x74c

Fix this bug by checking the size of 'data->block[0]' first.

Fixes: 13f35ac14cd0 ("i2c: Adding support for Intel iSMT SMBus 2.0 host controller")
Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
Signed-off-by: Wolfram Sang <wsa@kernel.org>
---
 drivers/i2c/busses/i2c-ismt.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/i2c/busses/i2c-ismt.c b/drivers/i2c/busses/i2c-ismt.c
index fe2349590f75e..c74985d77b0ec 100644
--- a/drivers/i2c/busses/i2c-ismt.c
+++ b/drivers/i2c/busses/i2c-ismt.c
@@ -509,6 +509,9 @@ static int ismt_access(struct i2c_adapter *adap, u16 addr,
 		if (read_write == I2C_SMBUS_WRITE) {
 			/* Block Write */
 			dev_dbg(dev, "I2C_SMBUS_BLOCK_DATA:  WRITE\n");
+			if (data->block[0] < 1 || data->block[0] > I2C_SMBUS_BLOCK_MAX)
+				return -EINVAL;
+
 			dma_size = data->block[0] + 1;
 			dma_direction = DMA_TO_DEVICE;
 			desc->wr_len_cmd = dma_size;
-- 
GitLab


From 76007ccc5727f86c105e96697e96dcf2df6b1634 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 7 Dec 2022 13:07:10 -0800
Subject: [PATCH 3490/4122] PCI: mvebu: Switch to using gpiod API

Switch the driver away from legacy gpio/of_gpio API to gpiod API, and
remove use of of_get_named_gpio_flags() which I want to make private to
gpiolib.

Link: https://lore.kernel.org/r/Y5EAft42YiT66mVj@google.com
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pci-mvebu.c | 51 ++++++++++--------------------
 1 file changed, 17 insertions(+), 34 deletions(-)

diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c
index 1ced73726a267..b12e128fc5d76 100644
--- a/drivers/pci/controller/pci-mvebu.c
+++ b/drivers/pci/controller/pci-mvebu.c
@@ -11,14 +11,14 @@
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/init.h>
+#include <linux/irqchip/chained_irq.h>
 #include <linux/mbus.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
-#include <linux/of_gpio.h>
 #include <linux/of_pci.h>
 #include <linux/of_platform.h>
 
@@ -1261,9 +1261,8 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 	struct mvebu_pcie_port *port, struct device_node *child)
 {
 	struct device *dev = &pcie->pdev->dev;
-	enum of_gpio_flags flags;
 	u32 slot_power_limit;
-	int reset_gpio, ret;
+	int ret;
 	u32 num_lanes;
 
 	port->pcie = pcie;
@@ -1327,40 +1326,24 @@ static int mvebu_pcie_parse_port(struct mvebu_pcie *pcie,
 			 port->name, child);
 	}
 
-	reset_gpio = of_get_named_gpio_flags(child, "reset-gpios", 0, &flags);
-	if (reset_gpio == -EPROBE_DEFER) {
-		ret = reset_gpio;
+	port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset",
+					  port->name);
+	if (!port->reset_name) {
+		ret = -ENOMEM;
 		goto err;
 	}
 
-	if (gpio_is_valid(reset_gpio)) {
-		unsigned long gpio_flags;
-
-		port->reset_name = devm_kasprintf(dev, GFP_KERNEL, "%s-reset",
-						  port->name);
-		if (!port->reset_name) {
-			ret = -ENOMEM;
+	port->reset_gpio = devm_fwnode_gpiod_get(dev, of_fwnode_handle(child),
+						 "reset", GPIOD_OUT_HIGH,
+						 port->name);
+	ret = PTR_ERR_OR_ZERO(port->reset_gpio);
+	if (ret) {
+		if (ret != -ENOENT)
 			goto err;
-		}
-
-		if (flags & OF_GPIO_ACTIVE_LOW) {
-			dev_info(dev, "%pOF: reset gpio is active low\n",
-				 child);
-			gpio_flags = GPIOF_ACTIVE_LOW |
-				     GPIOF_OUT_INIT_LOW;
-		} else {
-			gpio_flags = GPIOF_OUT_INIT_HIGH;
-		}
-
-		ret = devm_gpio_request_one(dev, reset_gpio, gpio_flags,
-					    port->reset_name);
-		if (ret) {
-			if (ret == -EPROBE_DEFER)
-				goto err;
-			goto skip;
-		}
-
-		port->reset_gpio = gpio_to_desc(reset_gpio);
+		/* reset gpio is optional */
+		port->reset_gpio = NULL;
+		devm_kfree(dev, port->reset_name);
+		port->reset_name = NULL;
 	}
 
 	slot_power_limit = of_pci_get_slot_power_limit(child,
-- 
GitLab


From 64fdcbcc064966bbf261bb455876dffa58858d32 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Thu, 8 Dec 2022 09:43:15 +1100
Subject: [PATCH 3491/4122] powerpc/prom: Fix 32-bit build

Add an IS_ENABLED() check to fix the build error:

arch/powerpc/kernel/prom.o: in function `early_init_dt_scan_cpus':
  prom.c:(.init.text+0x2ea): undefined reference to `boot_cpu_node_count'

Fixes: e13d23a404f2 ("powerpc: export the CPU node count")
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/kernel/prom.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 645f4450dfc38..4f1c920aa13ed 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -336,7 +336,8 @@ static int __init early_init_dt_scan_cpus(unsigned long node,
 	if (type == NULL || strcmp(type, "cpu") != 0)
 		return 0;
 
-	boot_cpu_node_count++;
+	if (IS_ENABLED(CONFIG_PPC64))
+		boot_cpu_node_count++;
 
 	/* Get physical cpuid */
 	intserv = of_get_flat_dt_prop(node, "ibm,ppc-interrupt-server#s", &len);
-- 
GitLab


From fb4907f487254375830f135dcfe5dd7e6f8b705f Mon Sep 17 00:00:00 2001
From: Chao Leng <lengchao@huawei.com>
Date: Fri, 25 Nov 2022 09:00:26 +0800
Subject: [PATCH 3492/4122] RDMA/cma: Change RoCE packet life time from 18 to
 16

The ack timeout retransmission time is affected by the following two
factors: one is packet life time, another is the HCA processing time.

Now the default packet lifetime(CMA_IBOE_PACKET_LIFETIME) is 18.

That means the minimum ack timeout is 2
seconds (2^(18+1)*4us=2.097seconds).  The packet lifetime means the
maximum transmission time of packets on the network, 2 seconds is too
long.

Assume the network is a clos topology with three layers, every packet will
pass through five hops of switches. Assume the buffer of every switch is
128MB and the port transmission rate is 25 Gbit/s, the maximum
transmission time of the packet is 200ms(128MB*5/25Gbit/s).  Add double
redundancy, it is less than 500ms.

So change the CMA_IBOE_PACKET_LIFETIME to 16, the maximum transmission
time of the packet will be about 500+ms, it is long enough.

Link: https://lore.kernel.org/r/20221125010026.755-1-lengchao@huawei.com
Signed-off-by: Chao Leng <lengchao@huawei.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cma.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index cc2222b85c881..2f5b5e6f3d110 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -47,7 +47,7 @@ MODULE_LICENSE("Dual BSD/GPL");
 #define CMA_CM_RESPONSE_TIMEOUT 20
 #define CMA_MAX_CM_RETRIES 15
 #define CMA_CM_MRA_SETTING (IB_CM_MRA_FLAG_DELAY | 24)
-#define CMA_IBOE_PACKET_LIFETIME 18
+#define CMA_IBOE_PACKET_LIFETIME 16
 #define CMA_PREFERRED_ROCE_GID_TYPE IB_GID_TYPE_ROCE_UDP_ENCAP
 
 static const char * const cma_events[] = {
-- 
GitLab


From 487d65090a3dce1ae54946aded55d0f8ac87cbab Mon Sep 17 00:00:00 2001
From: Yixing Liu <liuyixing1@huawei.com>
Date: Sat, 26 Nov 2022 18:29:06 +0800
Subject: [PATCH 3493/4122] RDMA/hns: Fix the gid problem caused by free mr

After the hns roce driver is loaded, if you modify the mac address of the
network port, the following error will appear:

   __ib_cache_gid_add: unable to add gid fe80:0000:0000:0000:4600:4dff:fe22:abb5 error=-28
    hns3 0000:7d:00.0 hns_0: attr path_mtu(1) invalid while modify qp

The reason for the error is that the gid being occupied will cause the
failure to modify the gid. The gid is occupied by the loopback QP used by
free mr. When the mac address is modified, the gid will change. If there
is a busy QP at this time, the gid will not be released and the
modification will fail. The QP of free mr is created using the ib
interface. The ib interface will add a reference count to the gid,
resulting in this error scenario.

Considering that free mr is solving a bug in HIP08, not an actual
business, it is not necessary to use ib interfaces.

Fixes: 70f92521584f ("RDMA/hns: Use the reserved loopback QPs to free MR before destroying MPT")
Link: https://lore.kernel.org/r/20221126102911.2921820-2-xuhaoyue1@hisilicon.com
Signed-off-by: Yixing Liu <liuyixing1@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 175 ++++++++++++++++-----
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h |   8 +-
 2 files changed, 137 insertions(+), 46 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 939811867249f..93c6772396865 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2634,31 +2634,124 @@ static void free_dip_list(struct hns_roce_dev *hr_dev)
 	spin_unlock_irqrestore(&hr_dev->dip_list_lock, flags);
 }
 
-static void free_mr_exit(struct hns_roce_dev *hr_dev)
+static struct ib_pd *free_mr_init_pd(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_pd *hr_pd;
+	struct ib_pd *pd;
+
+	hr_pd = kzalloc(sizeof(*hr_pd), GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(hr_pd))
+		return NULL;
+	pd = &hr_pd->ibpd;
+	pd->device = ibdev;
+
+	if (hns_roce_alloc_pd(pd, NULL)) {
+		ibdev_err(ibdev, "failed to create pd for free mr.\n");
+		kfree(hr_pd);
+		return NULL;
+	}
+	free_mr->rsv_pd = to_hr_pd(pd);
+	free_mr->rsv_pd->ibpd.device = &hr_dev->ib_dev;
+	free_mr->rsv_pd->ibpd.uobject = NULL;
+	free_mr->rsv_pd->ibpd.__internal_mr = NULL;
+	atomic_set(&free_mr->rsv_pd->ibpd.usecnt, 0);
+
+	return pd;
+}
+
+static struct ib_cq *free_mr_init_cq(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct ib_cq_init_attr cq_init_attr = {};
+	struct hns_roce_cq *hr_cq;
+	struct ib_cq *cq;
+
+	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
+
+	hr_cq = kzalloc(sizeof(*hr_cq), GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(hr_cq))
+		return NULL;
+
+	cq = &hr_cq->ib_cq;
+	cq->device = ibdev;
+
+	if (hns_roce_create_cq(cq, &cq_init_attr, NULL)) {
+		ibdev_err(ibdev, "failed to create cq for free mr.\n");
+		kfree(hr_cq);
+		return NULL;
+	}
+	free_mr->rsv_cq = to_hr_cq(cq);
+	free_mr->rsv_cq->ib_cq.device = &hr_dev->ib_dev;
+	free_mr->rsv_cq->ib_cq.uobject = NULL;
+	free_mr->rsv_cq->ib_cq.comp_handler = NULL;
+	free_mr->rsv_cq->ib_cq.event_handler = NULL;
+	free_mr->rsv_cq->ib_cq.cq_context = NULL;
+	atomic_set(&free_mr->rsv_cq->ib_cq.usecnt, 0);
+
+	return cq;
+}
+
+static int free_mr_init_qp(struct hns_roce_dev *hr_dev, struct ib_cq *cq,
+			   struct ib_qp_init_attr *init_attr, int i)
 {
 	struct hns_roce_v2_priv *priv = hr_dev->priv;
 	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_device *ibdev = &hr_dev->ib_dev;
+	struct hns_roce_qp *hr_qp;
+	struct ib_qp *qp;
 	int ret;
+
+	hr_qp = kzalloc(sizeof(*hr_qp), GFP_KERNEL);
+	if (ZERO_OR_NULL_PTR(hr_qp))
+		return -ENOMEM;
+
+	qp = &hr_qp->ibqp;
+	qp->device = ibdev;
+
+	ret = hns_roce_create_qp(qp, init_attr, NULL);
+	if (ret) {
+		ibdev_err(ibdev, "failed to create qp for free mr.\n");
+		kfree(hr_qp);
+		return ret;
+	}
+
+	free_mr->rsv_qp[i] = hr_qp;
+	free_mr->rsv_qp[i]->ibqp.recv_cq = cq;
+	free_mr->rsv_qp[i]->ibqp.send_cq = cq;
+
+	return 0;
+}
+
+static void free_mr_exit(struct hns_roce_dev *hr_dev)
+{
+	struct hns_roce_v2_priv *priv = hr_dev->priv;
+	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
+	struct ib_qp *qp;
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
 		if (free_mr->rsv_qp[i]) {
-			ret = ib_destroy_qp(free_mr->rsv_qp[i]);
-			if (ret)
-				ibdev_err(&hr_dev->ib_dev,
-					  "failed to destroy qp in free mr.\n");
-
+			qp = &free_mr->rsv_qp[i]->ibqp;
+			hns_roce_v2_destroy_qp(qp, NULL);
+			kfree(free_mr->rsv_qp[i]);
 			free_mr->rsv_qp[i] = NULL;
 		}
 	}
 
 	if (free_mr->rsv_cq) {
-		ib_destroy_cq(free_mr->rsv_cq);
+		hns_roce_destroy_cq(&free_mr->rsv_cq->ib_cq, NULL);
+		kfree(free_mr->rsv_cq);
 		free_mr->rsv_cq = NULL;
 	}
 
 	if (free_mr->rsv_pd) {
-		ib_dealloc_pd(free_mr->rsv_pd);
+		hns_roce_dealloc_pd(&free_mr->rsv_pd->ibpd, NULL);
+		kfree(free_mr->rsv_pd);
 		free_mr->rsv_pd = NULL;
 	}
 }
@@ -2667,55 +2760,46 @@ static int free_mr_alloc_res(struct hns_roce_dev *hr_dev)
 {
 	struct hns_roce_v2_priv *priv = hr_dev->priv;
 	struct hns_roce_v2_free_mr *free_mr = &priv->free_mr;
-	struct ib_device *ibdev = &hr_dev->ib_dev;
-	struct ib_cq_init_attr cq_init_attr = {};
 	struct ib_qp_init_attr qp_init_attr = {};
 	struct ib_pd *pd;
 	struct ib_cq *cq;
-	struct ib_qp *qp;
 	int ret;
 	int i;
 
-	pd = ib_alloc_pd(ibdev, 0);
-	if (IS_ERR(pd)) {
-		ibdev_err(ibdev, "failed to create pd for free mr.\n");
-		return PTR_ERR(pd);
-	}
-	free_mr->rsv_pd = pd;
+	pd = free_mr_init_pd(hr_dev);
+	if (!pd)
+		return -ENOMEM;
 
-	cq_init_attr.cqe = HNS_ROCE_FREE_MR_USED_CQE_NUM;
-	cq = ib_create_cq(ibdev, NULL, NULL, NULL, &cq_init_attr);
-	if (IS_ERR(cq)) {
-		ibdev_err(ibdev, "failed to create cq for free mr.\n");
-		ret = PTR_ERR(cq);
-		goto create_failed;
+	cq = free_mr_init_cq(hr_dev);
+	if (!cq) {
+		ret = -ENOMEM;
+		goto create_failed_cq;
 	}
-	free_mr->rsv_cq = cq;
 
 	qp_init_attr.qp_type = IB_QPT_RC;
 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
-	qp_init_attr.send_cq = free_mr->rsv_cq;
-	qp_init_attr.recv_cq = free_mr->rsv_cq;
+	qp_init_attr.send_cq = cq;
+	qp_init_attr.recv_cq = cq;
 	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
 		qp_init_attr.cap.max_send_wr = HNS_ROCE_FREE_MR_USED_SQWQE_NUM;
 		qp_init_attr.cap.max_send_sge = HNS_ROCE_FREE_MR_USED_SQSGE_NUM;
 		qp_init_attr.cap.max_recv_wr = HNS_ROCE_FREE_MR_USED_RQWQE_NUM;
 		qp_init_attr.cap.max_recv_sge = HNS_ROCE_FREE_MR_USED_RQSGE_NUM;
 
-		qp = ib_create_qp(free_mr->rsv_pd, &qp_init_attr);
-		if (IS_ERR(qp)) {
-			ibdev_err(ibdev, "failed to create qp for free mr.\n");
-			ret = PTR_ERR(qp);
-			goto create_failed;
-		}
-
-		free_mr->rsv_qp[i] = qp;
+		ret = free_mr_init_qp(hr_dev, cq, &qp_init_attr, i);
+		if (ret)
+			goto create_failed_qp;
 	}
 
 	return 0;
 
-create_failed:
-	free_mr_exit(hr_dev);
+create_failed_qp:
+	hns_roce_destroy_cq(cq, NULL);
+	kfree(cq);
+
+create_failed_cq:
+	hns_roce_dealloc_pd(pd, NULL);
+	kfree(pd);
 
 	return ret;
 }
@@ -2731,14 +2815,17 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
 	int mask;
 	int ret;
 
-	hr_qp = to_hr_qp(free_mr->rsv_qp[sl_num]);
+	hr_qp = to_hr_qp(&free_mr->rsv_qp[sl_num]->ibqp);
 	hr_qp->free_mr_en = 1;
+	hr_qp->ibqp.device = ibdev;
+	hr_qp->ibqp.qp_type = IB_QPT_RC;
 
 	mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_ACCESS_FLAGS;
 	attr->qp_state = IB_QPS_INIT;
 	attr->port_num = 1;
 	attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
-	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+				    IB_QPS_INIT);
 	if (ret) {
 		ibdev_err(ibdev, "failed to modify qp to init, ret = %d.\n",
 			  ret);
@@ -2759,7 +2846,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
 
 	rdma_ah_set_sl(&attr->ah_attr, (u8)sl_num);
 
-	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_INIT,
+				    IB_QPS_RTR);
 	hr_dev->loop_idc = loopback;
 	if (ret) {
 		ibdev_err(ibdev, "failed to modify qp to rtr, ret = %d.\n",
@@ -2773,7 +2861,8 @@ static int free_mr_modify_rsv_qp(struct hns_roce_dev *hr_dev,
 	attr->sq_psn = HNS_ROCE_FREE_MR_USED_PSN;
 	attr->retry_cnt = HNS_ROCE_FREE_MR_USED_QP_RETRY_CNT;
 	attr->timeout = HNS_ROCE_FREE_MR_USED_QP_TIMEOUT;
-	ret = ib_modify_qp(&hr_qp->ibqp, attr, mask);
+	ret = hr_dev->hw->modify_qp(&hr_qp->ibqp, attr, mask, IB_QPS_RTR,
+				    IB_QPS_RTS);
 	if (ret)
 		ibdev_err(ibdev, "failed to modify qp to rts, ret = %d.\n",
 			  ret);
@@ -3416,7 +3505,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
 	mutex_lock(&free_mr->mutex);
 
 	for (i = 0; i < ARRAY_SIZE(free_mr->rsv_qp); i++) {
-		hr_qp = to_hr_qp(free_mr->rsv_qp[i]);
+		hr_qp = free_mr->rsv_qp[i];
 
 		ret = free_mr_post_send_lp_wqe(hr_qp);
 		if (ret) {
@@ -3431,7 +3520,7 @@ static void free_mr_send_cmd_to_hw(struct hns_roce_dev *hr_dev)
 
 	end = msecs_to_jiffies(HNS_ROCE_V2_FREE_MR_TIMEOUT) + jiffies;
 	while (cqe_cnt) {
-		npolled = hns_roce_v2_poll_cq(free_mr->rsv_cq, cqe_cnt, wc);
+		npolled = hns_roce_v2_poll_cq(&free_mr->rsv_cq->ib_cq, cqe_cnt, wc);
 		if (npolled < 0) {
 			ibdev_err(ibdev,
 				  "failed to poll cqe for free mr, remain %d cqe.\n",
@@ -5474,7 +5563,7 @@ static int hns_roce_v2_destroy_qp_common(struct hns_roce_dev *hr_dev,
 	return ret;
 }
 
-static int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
 {
 	struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
 	struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index b11579027e827..017462e52843c 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -1329,9 +1329,9 @@ struct hns_roce_link_table {
 #define HNS_ROCE_EXT_LLM_MIN_PAGES(que_num) ((que_num) * 4 + 2)
 
 struct hns_roce_v2_free_mr {
-	struct ib_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
-	struct ib_cq *rsv_cq;
-	struct ib_pd *rsv_pd;
+	struct hns_roce_qp *rsv_qp[HNS_ROCE_FREE_MR_USED_QP_NUM];
+	struct hns_roce_cq *rsv_cq;
+	struct hns_roce_pd *rsv_pd;
 	struct mutex mutex;
 };
 
@@ -1461,6 +1461,8 @@ struct hns_roce_sccc_clr_done {
 	__le32 rsv[5];
 };
 
+int hns_roce_v2_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata);
+
 static inline void hns_roce_write64(struct hns_roce_dev *hr_dev, __le32 val[2],
 				    void __iomem *dest)
 {
-- 
GitLab


From bc34c04f7b97c3794dec5a6d6d27ffd5f0e4f5c8 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Sat, 26 Nov 2022 18:29:07 +0800
Subject: [PATCH 3494/4122] RDMA/hns: Fix AH attr queried by query_qp

The queried AH attr is invalid. This patch fix it.

This problem is found by rdma-core test test_mr_rereg_pd

ERROR: test_mr_rereg_pd (tests.test_mr.MRTest)
Test that cover rereg MR's PD with this flow:
----------------------------------------------------------------------
Traceback (most recent call last):
  File "./tests/test_mr.py", line 157, in test_mr_rereg_pd
    self.restate_qps()
  File "./tests/test_mr.py", line 113, in restate_qps
    self.server.qp.to_rts(self.server_qp_attr)
  File "qp.pyx", line 1137, in pyverbs.qp.QP.to_rts
  File "qp.pyx", line 1123, in pyverbs.qp.QP.to_rtr
pyverbs.pyverbs_error.PyverbsRDMAError: Failed to modify QP state to RTR.
Errno: 22, Invalid argument

Fixes: 926a01dc000d ("RDMA/hns: Add QP operations support for hip08 SoC")
Link: https://lore.kernel.org/r/20221126102911.2921820-3-xuhaoyue1@hisilicon.com
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 93c6772396865..a8f8c790d31dc 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -5470,6 +5470,8 @@ static int hns_roce_v2_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
 
 		rdma_ah_set_sl(&qp_attr->ah_attr,
 			       hr_reg_read(&context, QPC_SL));
+		rdma_ah_set_port_num(&qp_attr->ah_attr, hr_qp->port + 1);
+		rdma_ah_set_ah_flags(&qp_attr->ah_attr, IB_AH_GRH);
 		grh->flow_label = hr_reg_read(&context, QPC_FL);
 		grh->sgid_index = hr_reg_read(&context, QPC_GMV_IDX);
 		grh->hop_limit = hr_reg_read(&context, QPC_HOPLIMIT);
-- 
GitLab


From 9fb39ef2ff3e18f1740625ba04093dfbef086d2b Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Sat, 26 Nov 2022 18:29:08 +0800
Subject: [PATCH 3495/4122] RDMA/hns: Fix PBL page MTR find

Now, The address of the first two pages in the MR will be searched, which
use to speed up the lookup of the pbl table for hardware.  An exception
will occur when there is only one page in this MR.  This patch fix the
number of page to search.

Fixes: 9b2cf76c9f05 ("RDMA/hns: Optimize PBL buffer allocation process")
Link: https://lore.kernel.org/r/20221126102911.2921820-4-xuhaoyue1@hisilicon.com
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index a8f8c790d31dc..41835cb05983a 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -3274,7 +3274,8 @@ static int set_mtpt_pbl(struct hns_roce_dev *hr_dev,
 	int i, count;
 
 	count = hns_roce_mtr_find(hr_dev, &mr->pbl_mtr, 0, pages,
-				  ARRAY_SIZE(pages), &pbl_ba);
+				  min_t(int, ARRAY_SIZE(pages), mr->npages),
+				  &pbl_ba);
 	if (count < 1) {
 		ibdev_err(ibdev, "failed to find PBL mtr, count = %d.\n",
 			  count);
-- 
GitLab


From 99dc5a0712883d5d13b620d25b3759d429577bc8 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Sat, 26 Nov 2022 18:29:09 +0800
Subject: [PATCH 3496/4122] RDMA/hns: Fix page size cap from firmware

Add verification to make sure the roce page size cap is supported by the
system page size.

Fixes: ba6bb7e97421 ("RDMA/hns: Add interfaces to get pf capabilities from firmware")
Link: https://lore.kernel.org/r/20221126102911.2921820-5-xuhaoyue1@hisilicon.com
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 41835cb05983a..6e74735bbcf87 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2345,6 +2345,9 @@ static int hns_roce_query_pf_caps(struct hns_roce_dev *hr_dev)
 	caps->wqe_sge_hop_num = hr_reg_read(resp_d, PF_CAPS_D_EX_SGE_HOP_NUM);
 	caps->wqe_rq_hop_num = hr_reg_read(resp_d, PF_CAPS_D_RQWQE_HOP_NUM);
 
+	if (!(caps->page_size_cap & PAGE_SIZE))
+		caps->page_size_cap = HNS_ROCE_V2_PAGE_SIZE_SUPPORTED;
+
 	return 0;
 }
 
-- 
GitLab


From 667d6164b84884c64de3fc18670cd5a98b0b10cf Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Sat, 26 Nov 2022 18:29:10 +0800
Subject: [PATCH 3497/4122] RDMA/hns: Fix error code of CMD

The error code is fixed to EIO when CMD fails to excute. This patch
converts the error status reported by firmware to linux errno.

Fixes: a04ff739f2a9 ("RDMA/hns: Add command queue support for hip08 RoCE driver")
Link: https://lore.kernel.org/r/20221126102911.2921820-6-xuhaoyue1@hisilicon.com
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 26 +++++++++++++++++++++-
 drivers/infiniband/hw/hns/hns_roce_hw_v2.h |  5 +++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index 6e74735bbcf87..f32100c6f1d9e 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -1277,6 +1277,30 @@ static void update_cmdq_status(struct hns_roce_dev *hr_dev)
 		hr_dev->cmd.state = HNS_ROCE_CMDQ_STATE_FATAL_ERR;
 }
 
+static int hns_roce_cmd_err_convert_errno(u16 desc_ret)
+{
+	struct hns_roce_cmd_errcode errcode_table[] = {
+		{CMD_EXEC_SUCCESS, 0},
+		{CMD_NO_AUTH, -EPERM},
+		{CMD_NOT_EXIST, -EOPNOTSUPP},
+		{CMD_CRQ_FULL, -EXFULL},
+		{CMD_NEXT_ERR, -ENOSR},
+		{CMD_NOT_EXEC, -ENOTBLK},
+		{CMD_PARA_ERR, -EINVAL},
+		{CMD_RESULT_ERR, -ERANGE},
+		{CMD_TIMEOUT, -ETIME},
+		{CMD_HILINK_ERR, -ENOLINK},
+		{CMD_INFO_ILLEGAL, -ENXIO},
+		{CMD_INVALID, -EBADR},
+	};
+	u16 i;
+
+	for (i = 0; i < ARRAY_SIZE(errcode_table); i++)
+		if (desc_ret == errcode_table[i].return_status)
+			return errcode_table[i].errno;
+	return -EIO;
+}
+
 static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
 			       struct hns_roce_cmq_desc *desc, int num)
 {
@@ -1322,7 +1346,7 @@ static int __hns_roce_cmq_send(struct hns_roce_dev *hr_dev,
 			dev_err_ratelimited(hr_dev->dev,
 					    "Cmdq IO error, opcode = 0x%x, return = 0x%x.\n",
 					    desc->opcode, desc_ret);
-			ret = -EIO;
+			ret = hns_roce_cmd_err_convert_errno(desc_ret);
 		}
 	} else {
 		/* FW/HW reset or incorrect number of desc */
diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
index 017462e52843c..47fad456839d1 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.h
@@ -273,6 +273,11 @@ enum hns_roce_cmd_return_status {
 	CMD_OTHER_ERR = 0xff
 };
 
+struct hns_roce_cmd_errcode {
+	enum hns_roce_cmd_return_status return_status;
+	int errno;
+};
+
 enum hns_roce_sgid_type {
 	GID_TYPE_FLAG_ROCE_V1 = 0,
 	GID_TYPE_FLAG_ROCE_V2_IPV4,
-- 
GitLab


From 682c0722addae4b4a1440c9db9d8c86cb8e09ce5 Mon Sep 17 00:00:00 2001
From: Chengchang Tang <tangchengchang@huawei.com>
Date: Sat, 26 Nov 2022 18:29:11 +0800
Subject: [PATCH 3498/4122] RDMA/hns: Fix XRC caps on HIP08

XRC caps has been set by default. But in fact, XRC is not supported in
HIP08.

Fixes: 32548870d438 ("RDMA/hns: Add support for XRC on HIP09")
Link: https://lore.kernel.org/r/20221126102911.2921820-7-xuhaoyue1@hisilicon.com
Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Haoyue Xu <xuhaoyue1@hisilicon.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/hw/hns/hns_roce_hw_v2.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
index f32100c6f1d9e..2716852f5e929 100644
--- a/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
+++ b/drivers/infiniband/hw/hns/hns_roce_hw_v2.c
@@ -2051,13 +2051,14 @@ static void set_default_caps(struct hns_roce_dev *hr_dev)
 
 	caps->flags |= HNS_ROCE_CAP_FLAG_ATOMIC | HNS_ROCE_CAP_FLAG_MW |
 		       HNS_ROCE_CAP_FLAG_SRQ | HNS_ROCE_CAP_FLAG_FRMR |
-		       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL | HNS_ROCE_CAP_FLAG_XRC;
+		       HNS_ROCE_CAP_FLAG_QP_FLOW_CTRL;
 
 	caps->gid_table_len[0] = HNS_ROCE_V2_GID_INDEX_NUM;
 
 	if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09) {
 		caps->flags |= HNS_ROCE_CAP_FLAG_STASH |
-			       HNS_ROCE_CAP_FLAG_DIRECT_WQE;
+			       HNS_ROCE_CAP_FLAG_DIRECT_WQE |
+			       HNS_ROCE_CAP_FLAG_XRC;
 		caps->max_sq_inline = HNS_ROCE_V3_MAX_SQ_INLINE;
 	} else {
 		caps->max_sq_inline = HNS_ROCE_V2_MAX_SQ_INLINE;
-- 
GitLab


From 6cfe7bd0dfd33033683639039b5608d6534c19eb Mon Sep 17 00:00:00 2001
From: Zhu Yanjun <yanjun.zhu@linux.dev>
Date: Thu, 8 Dec 2022 05:19:54 -0500
Subject: [PATCH 3499/4122] RDMA/mlx5: Remove not-used IB_FLOW_SPEC_IB define

IB_FLOW_SPEC_IB is not used in mlx5 and can be deleted.

Signed-off-by: Zhu Yanjun <yanjun.zhu@linux.dev>
Link: https://lore.kernel.org/r/20221208101954.687960-1-yanjun.zhu@intel.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/hw/mlx5/fs.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c
index 490ec308e3098..3008632a6c206 100644
--- a/drivers/infiniband/hw/mlx5/fs.c
+++ b/drivers/infiniband/hw/mlx5/fs.c
@@ -127,7 +127,6 @@ static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
 }
 
 #define LAST_ETH_FIELD vlan_tag
-#define LAST_IB_FIELD sl
 #define LAST_IPV4_FIELD tos
 #define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
-- 
GitLab


From 74d58cd48a8f5848dfda8bc09d11c90f3ea42b0e Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 8 Dec 2022 10:07:46 +0100
Subject: [PATCH 3500/4122] USB: sisusbvga: remove console support

It was marked as BROKEN since commit 862ee699fefe (USB: sisusbvga: Make
console support depend on BROKEN) 2 years ago. Since noone stepped up to
fix it, remove it completely.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-sh@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221208090749.28056-1-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/configs/ppc6xx_defconfig    |    1 -
 arch/sh/configs/landisk_defconfig        |    1 -
 drivers/usb/misc/sisusbvga/Kconfig       |   34 -
 drivers/usb/misc/sisusbvga/Makefile      |    1 -
 drivers/usb/misc/sisusbvga/sisusb.c      |  276 +---
 drivers/usb/misc/sisusbvga/sisusb.h      |   21 -
 drivers/usb/misc/sisusbvga/sisusb_con.c  | 1496 ----------------------
 drivers/usb/misc/sisusbvga/sisusb_init.c |  955 --------------
 drivers/usb/misc/sisusbvga/sisusb_init.h |  180 ---
 9 files changed, 6 insertions(+), 2959 deletions(-)
 delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_con.c
 delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_init.c
 delete mode 100644 drivers/usb/misc/sisusbvga/sisusb_init.h

diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index d23deb94b36e7..f73c98be56c8f 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -912,7 +912,6 @@ CONFIG_USB_IDMOUSE=m
 CONFIG_USB_FTDI_ELAN=m
 CONFIG_USB_APPLEDISPLAY=m
 CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
 CONFIG_USB_LD=m
 CONFIG_USB_TRANCEVIBRATOR=m
 CONFIG_USB_IOWARRIOR=m
diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig
index 492a0a2e0e367..7037320b654ad 100644
--- a/arch/sh/configs/landisk_defconfig
+++ b/arch/sh/configs/landisk_defconfig
@@ -92,7 +92,6 @@ CONFIG_USB_SERIAL_PL2303=m
 CONFIG_USB_EMI62=m
 CONFIG_USB_EMI26=m
 CONFIG_USB_SISUSBVGA=m
-CONFIG_USB_SISUSBVGA_CON=y
 CONFIG_EXT2_FS=y
 CONFIG_EXT3_FS=y
 # CONFIG_EXT3_DEFAULTS_TO_ORDERED is not set
diff --git a/drivers/usb/misc/sisusbvga/Kconfig b/drivers/usb/misc/sisusbvga/Kconfig
index c12cdd0154102..42f81c8eaa92c 100644
--- a/drivers/usb/misc/sisusbvga/Kconfig
+++ b/drivers/usb/misc/sisusbvga/Kconfig
@@ -3,7 +3,6 @@
 config USB_SISUSBVGA
 	tristate "USB 2.0 SVGA dongle support (Net2280/SiS315)"
 	depends on (USB_MUSB_HDRC || USB_EHCI_HCD)
-	select FONT_SUPPORT if USB_SISUSBVGA_CON
 	help
 	  Say Y here if you intend to attach a USB2VGA dongle based on a
 	  Net2280 and a SiS315 chip.
@@ -13,36 +12,3 @@ config USB_SISUSBVGA
 
 	  To compile this driver as a module, choose M here; the module will be
 	  called sisusbvga. If unsure, say N.
-
-config USB_SISUSBVGA_CON
-	bool "Text console and mode switching support" if USB_SISUSBVGA
-	depends on VT && BROKEN
-	select FONT_8x16
-	help
-	  Say Y here if you want a VGA text console via the USB dongle or
-	  want to support userland applications that utilize the driver's
-	  display mode switching capabilities.
-
-	  Note that this console supports VGA/EGA text mode only.
-
-	  By default, the console part of the driver will not kick in when
-	  the driver is initialized. If you want the driver to take over
-	  one or more of the consoles, you need to specify the number of
-	  the first and last consoles (starting at 1) as driver parameters.
-
-	  For example, if the driver is compiled as a module:
-
-	     modprobe sisusbvga first=1 last=5
-
-	  If you use hotplug, add this to your modutils config files with
-	  the "options" keyword, such as eg.
-
-	     options sisusbvga first=1 last=5
-
-	  If the driver is compiled into the kernel image, the parameters
-	  must be given in the kernel command like, such as
-
-	     sisusbvga.first=1 sisusbvga.last=5
-
-
-
diff --git a/drivers/usb/misc/sisusbvga/Makefile b/drivers/usb/misc/sisusbvga/Makefile
index 6551bce68ac5d..93265de80eb9f 100644
--- a/drivers/usb/misc/sisusbvga/Makefile
+++ b/drivers/usb/misc/sisusbvga/Makefile
@@ -6,4 +6,3 @@
 obj-$(CONFIG_USB_SISUSBVGA) += sisusbvga.o
 
 sisusbvga-y := sisusb.o
-sisusbvga-$(CONFIG_USB_SISUSBVGA_CON) += sisusb_con.o sisusb_init.o
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index f08de33d9ff38..a0d5ba8058f8f 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -51,25 +51,11 @@
 #include <linux/vmalloc.h>
 
 #include "sisusb.h"
-#include "sisusb_init.h"
-
-#ifdef CONFIG_USB_SISUSBVGA_CON
-#include <linux/font.h>
-#endif
 
 #define SISUSB_DONTSYNC
 
 /* Forward declarations / clean-up routines */
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-static int sisusb_first_vc;
-static int sisusb_last_vc;
-module_param_named(first, sisusb_first_vc, int, 0);
-module_param_named(last, sisusb_last_vc, int, 0);
-MODULE_PARM_DESC(first, "Number of first console to take over (1 - MAX_NR_CONSOLES)");
-MODULE_PARM_DESC(last, "Number of last console to take over (1 - MAX_NR_CONSOLES)");
-#endif
-
 static struct usb_driver sisusb_driver;
 
 static void sisusb_free_buffers(struct sisusb_usb_data *sisusb)
@@ -1198,19 +1184,7 @@ static int sisusb_read_mem_bulk(struct sisusb_usb_data *sisusb, u32 addr,
 
 /* High level: Gfx (indexed) register access */
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data)
-{
-	return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_IO, port, data);
-}
-
-int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 *data)
-{
-	return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_IO, port, data);
-}
-#endif
-
-int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port,
+static int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port,
 		u8 index, u8 data)
 {
 	int ret;
@@ -1220,7 +1194,7 @@ int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port,
 	return ret;
 }
 
-int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port,
+static int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port,
 		u8 index, u8 *data)
 {
 	int ret;
@@ -1230,7 +1204,7 @@ int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port,
 	return ret;
 }
 
-int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx,
+static int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port, u8 idx,
 		u8 myand, u8 myor)
 {
 	int ret;
@@ -1258,13 +1232,13 @@ static int sisusb_setidxregmask(struct sisusb_usb_data *sisusb,
 	return ret;
 }
 
-int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port,
+static int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port,
 		u8 index, u8 myor)
 {
 	return sisusb_setidxregandor(sisusb, port, index, 0xff, myor);
 }
 
-int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port,
+static int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port,
 		u8 idx, u8 myand)
 {
 	return sisusb_setidxregandor(sisusb, port, idx, myand, 0x00);
@@ -1272,38 +1246,6 @@ int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port,
 
 /* Write/read video ram */
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-int sisusb_writeb(struct sisusb_usb_data *sisusb, u32 adr, u8 data)
-{
-	return sisusb_write_memio_byte(sisusb, SISUSB_TYPE_MEM, adr, data);
-}
-
-int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 *data)
-{
-	return sisusb_read_memio_byte(sisusb, SISUSB_TYPE_MEM, adr, data);
-}
-
-int sisusb_copy_memory(struct sisusb_usb_data *sisusb, u8 *src,
-		u32 dest, int length)
-{
-	size_t dummy;
-
-	return sisusb_write_mem_bulk(sisusb, dest, src, length,
-			NULL, 0, &dummy);
-}
-
-#ifdef SISUSBENDIANTEST
-static int sisusb_read_memory(struct sisusb_usb_data *sisusb, char *dest,
-		u32 src, int length)
-{
-	size_t dummy;
-
-	return sisusb_read_mem_bulk(sisusb, src, dest, length,
-			NULL, &dummy);
-}
-#endif
-#endif
-
 #ifdef SISUSBENDIANTEST
 static void sisusb_testreadwrite(struct sisusb_usb_data *sisusb)
 {
@@ -2252,131 +2194,6 @@ static int sisusb_init_gfxdevice(struct sisusb_usb_data *sisusb, int initscreen)
 	return ret;
 }
 
-
-#ifdef CONFIG_USB_SISUSBVGA_CON
-
-/* Set up default text mode:
- * - Set text mode (0x03)
- * - Upload default font
- * - Upload user font (if available)
- */
-
-int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init)
-{
-	int ret = 0, slot = sisusb->font_slot, i;
-	const struct font_desc *myfont;
-	u8 *tempbuf;
-	u16 *tempbufb;
-	static const char bootstring[] =
-		"SiSUSB VGA text console, (C) 2005 Thomas Winischhofer.";
-	static const char bootlogo[] = "(o_ //\\ V_/_";
-
-	/* sisusb->lock is down */
-
-	if (!sisusb->SiS_Pr)
-		return 1;
-
-	sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30;
-	sisusb->SiS_Pr->sisusb = (void *)sisusb;
-
-	/* Set mode 0x03 */
-	SiSUSBSetMode(sisusb->SiS_Pr, 0x03);
-
-	myfont = find_font("VGA8x16");
-	if (!myfont)
-		return 1;
-
-	tempbuf = vmalloc(8192);
-	if (!tempbuf)
-		return 1;
-
-	for (i = 0; i < 256; i++)
-		memcpy(tempbuf + (i * 32), myfont->data + (i * 16), 16);
-
-	/* Upload default font */
-	ret = sisusbcon_do_font_op(sisusb, 1, 0, tempbuf, 8192,
-			0, 1, NULL, 16, 0);
-
-	vfree(tempbuf);
-
-	/* Upload user font (and reset current slot) */
-	if (sisusb->font_backup) {
-		ret |= sisusbcon_do_font_op(sisusb, 1, 2, sisusb->font_backup,
-				8192, sisusb->font_backup_512, 1, NULL,
-				sisusb->font_backup_height, 0);
-		if (slot != 2)
-			sisusbcon_do_font_op(sisusb, 1, 0, NULL, 0, 0, 1,
-					NULL, 16, 0);
-	}
-
-	if (init && !sisusb->scrbuf) {
-
-		tempbuf = vmalloc(8192);
-		if (tempbuf) {
-
-			i = 4096;
-			tempbufb = (u16 *)tempbuf;
-			while (i--)
-				*(tempbufb++) = 0x0720;
-
-			i = 0;
-			tempbufb = (u16 *)tempbuf;
-			while (bootlogo[i]) {
-				*(tempbufb++) = 0x0700 | bootlogo[i++];
-				if (!(i % 4))
-					tempbufb += 76;
-			}
-
-			i = 0;
-			tempbufb = (u16 *)tempbuf + 6;
-			while (bootstring[i])
-				*(tempbufb++) = 0x0700 | bootstring[i++];
-
-			ret |= sisusb_copy_memory(sisusb, tempbuf,
-					sisusb->vrambase, 8192);
-
-			vfree(tempbuf);
-
-		}
-
-	} else if (sisusb->scrbuf) {
-		ret |= sisusb_copy_memory(sisusb, (u8 *)sisusb->scrbuf,
-				sisusb->vrambase, sisusb->scrbuf_size);
-	}
-
-	if (sisusb->sisusb_cursor_size_from >= 0 &&
-			sisusb->sisusb_cursor_size_to >= 0) {
-		sisusb_setidxreg(sisusb, SISCR, 0x0a,
-				sisusb->sisusb_cursor_size_from);
-		sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0xe0,
-				sisusb->sisusb_cursor_size_to);
-	} else {
-		sisusb_setidxreg(sisusb, SISCR, 0x0a, 0x2d);
-		sisusb_setidxreg(sisusb, SISCR, 0x0b, 0x0e);
-		sisusb->sisusb_cursor_size_to = -1;
-	}
-
-	slot = sisusb->sisusb_cursor_loc;
-	if (slot < 0)
-		slot = 0;
-
-	sisusb->sisusb_cursor_loc = -1;
-	sisusb->bad_cursor_pos = 1;
-
-	sisusb_set_cursor(sisusb, slot);
-
-	sisusb_setidxreg(sisusb, SISCR, 0x0c, (sisusb->cur_start_addr >> 8));
-	sisusb_setidxreg(sisusb, SISCR, 0x0d, (sisusb->cur_start_addr & 0xff));
-
-	sisusb->textmodedestroyed = 0;
-
-	/* sisusb->lock is down */
-
-	return ret;
-}
-
-#endif
-
 /* fops */
 
 static int sisusb_open(struct inode *inode, struct file *file)
@@ -2434,7 +2251,7 @@ static int sisusb_open(struct inode *inode, struct file *file)
 	return 0;
 }
 
-void sisusb_delete(struct kref *kref)
+static void sisusb_delete(struct kref *kref)
 {
 	struct sisusb_usb_data *sisusb = to_sisusb_dev(kref);
 
@@ -2446,9 +2263,6 @@ void sisusb_delete(struct kref *kref)
 	sisusb->sisusb_dev = NULL;
 	sisusb_free_buffers(sisusb);
 	sisusb_free_urbs(sisusb);
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	kfree(sisusb->SiS_Pr);
-#endif
 	kfree(sisusb);
 }
 
@@ -2842,53 +2656,7 @@ static int sisusb_handle_command(struct sisusb_usb_data *sisusb,
 
 	case SUCMD_HANDLETEXTMODE:
 		retval = 0;
-#ifdef CONFIG_USB_SISUSBVGA_CON
-		/* Gfx core must be initialized, SiS_Pr must exist */
-		if (!sisusb->gfxinit || !sisusb->SiS_Pr)
-			return -ENODEV;
-
-		switch (y->data0) {
-		case 0:
-			retval = sisusb_reset_text_mode(sisusb, 0);
-			break;
-		case 1:
-			sisusb->textmodedestroyed = 1;
-			break;
-		}
-#endif
-		break;
-
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	case SUCMD_SETMODE:
-		/* Gfx core must be initialized, SiS_Pr must exist */
-		if (!sisusb->gfxinit || !sisusb->SiS_Pr)
-			return -ENODEV;
-
-		retval = 0;
-
-		sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30;
-		sisusb->SiS_Pr->sisusb = (void *)sisusb;
-
-		if (SiSUSBSetMode(sisusb->SiS_Pr, y->data3))
-			retval = -EINVAL;
-
-		break;
-
-	case SUCMD_SETVESAMODE:
-		/* Gfx core must be initialized, SiS_Pr must exist */
-		if (!sisusb->gfxinit || !sisusb->SiS_Pr)
-			return -ENODEV;
-
-		retval = 0;
-
-		sisusb->SiS_Pr->IOAddress = SISUSB_PCI_IOPORTBASE + 0x30;
-		sisusb->SiS_Pr->sisusb = (void *)sisusb;
-
-		if (SiSUSBSetVESAMode(sisusb->SiS_Pr, y->data3))
-			retval = -EINVAL;
-
 		break;
-#endif
 
 	default:
 		retval = -EINVAL;
@@ -2942,11 +2710,7 @@ static long sisusb_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		x.sisusb_vramsize = sisusb->vramsize;
 		x.sisusb_minor = sisusb->minor;
 		x.sisusb_fbdevactive = 0;
-#ifdef CONFIG_USB_SISUSBVGA_CON
-		x.sisusb_conactive  = sisusb->haveconsole ? 1 : 0;
-#else
 		x.sisusb_conactive  = 0;
-#endif
 		memset(x.sisusb_reserved, 0, sizeof(x.sisusb_reserved));
 
 		if (copy_to_user((void __user *)arg, &x, sizeof(x)))
@@ -3090,15 +2854,6 @@ static int sisusb_probe(struct usb_interface *intf,
 	dev_info(&sisusb->sisusb_dev->dev, "Allocated %d output buffers\n",
 			sisusb->numobufs);
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	/* Allocate our SiS_Pr */
-	sisusb->SiS_Pr = kmalloc(sizeof(struct SiS_Private), GFP_KERNEL);
-	if (!sisusb->SiS_Pr) {
-		retval = -ENOMEM;
-		goto error_4;
-	}
-#endif
-
 	/* Do remaining init stuff */
 
 	init_waitqueue_head(&sisusb->wait_q);
@@ -3111,12 +2866,6 @@ static int sisusb_probe(struct usb_interface *intf,
 
 	if (dev->speed == USB_SPEED_HIGH || dev->speed >= USB_SPEED_SUPER) {
 		int initscreen = 1;
-#ifdef CONFIG_USB_SISUSBVGA_CON
-		if (sisusb_first_vc > 0 && sisusb_last_vc > 0 &&
-				sisusb_first_vc <= sisusb_last_vc &&
-				sisusb_last_vc <= MAX_NR_CONSOLES)
-			initscreen = 0;
-#endif
 		if (sisusb_init_gfxdevice(sisusb, initscreen))
 			dev_err(&sisusb->sisusb_dev->dev,
 					"Failed to early initialize device\n");
@@ -3133,10 +2882,6 @@ static int sisusb_probe(struct usb_interface *intf,
 	dev_dbg(&sisusb->sisusb_dev->dev, "*** RWTEST END ***\n");
 #endif
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	sisusb_console_init(sisusb, sisusb_first_vc, sisusb_last_vc);
-#endif
-
 	return 0;
 
 error_4:
@@ -3159,10 +2904,6 @@ static void sisusb_disconnect(struct usb_interface *intf)
 	if (!sisusb)
 		return;
 
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	sisusb_console_exit(sisusb);
-#endif
-
 	usb_deregister_dev(intf, &usb_sisusb_class);
 
 	mutex_lock(&sisusb->lock);
@@ -3208,11 +2949,6 @@ static struct usb_driver sisusb_driver = {
 
 static int __init usb_sisusb_init(void)
 {
-
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	sisusb_init_concode();
-#endif
-
 	return usb_register(&sisusb_driver);
 }
 
diff --git a/drivers/usb/misc/sisusbvga/sisusb.h b/drivers/usb/misc/sisusbvga/sisusb.h
index c0fb9e1c5361b..e5b1228655d08 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.h
+++ b/drivers/usb/misc/sisusbvga/sisusb.h
@@ -48,7 +48,6 @@
 
 /* Include console and mode switching code? */
 
-#include <linux/console.h>
 #include <linux/vt_kern.h>
 #include "sisusb_struct.h"
 
@@ -126,26 +125,6 @@ struct sisusb_usb_data {
 	unsigned char gfxinit;	/* graphics core initialized? */
 	unsigned short chipid, chipvendor;
 	unsigned short chiprevision;
-#ifdef CONFIG_USB_SISUSBVGA_CON
-	struct SiS_Private *SiS_Pr;
-	unsigned long scrbuf;
-	unsigned int scrbuf_size;
-	int haveconsole, con_first, con_last;
-	int havethisconsole[MAX_NR_CONSOLES];
-	int textmodedestroyed;
-	unsigned int sisusb_num_columns;	/* real number, not vt's idea */
-	int cur_start_addr, con_rolled_over;
-	int sisusb_cursor_loc, bad_cursor_pos;
-	int sisusb_cursor_size_from;
-	int sisusb_cursor_size_to;
-	int current_font_height, current_font_512;
-	int font_backup_size, font_backup_height, font_backup_512;
-	char *font_backup;
-	int font_slot;
-	struct vc_data *sisusb_display_fg;
-	int is_gfx;
-	int con_blanked;
-#endif
 };
 
 #define to_sisusb_dev(d) container_of(d, struct sisusb_usb_data, kref)
diff --git a/drivers/usb/misc/sisusbvga/sisusb_con.c b/drivers/usb/misc/sisusbvga/sisusb_con.c
deleted file mode 100644
index fcb95fb639e00..0000000000000
--- a/drivers/usb/misc/sisusbvga/sisusb_con.c
+++ /dev/null
@@ -1,1496 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-/*
- * sisusb - usb kernel driver for SiS315(E) based USB2VGA dongles
- *
- * VGA text mode console part
- *
- * Copyright (C) 2005 by Thomas Winischhofer, Vienna, Austria
- *
- * If distributed as part of the Linux kernel, this code is licensed under the
- * terms of the GPL v2.
- *
- * Otherwise, the following license terms apply:
- *
- * * Redistribution and use in source and binary forms, with or without
- * * modification, are permitted provided that the following conditions
- * * are met:
- * * 1) Redistributions of source code must retain the above copyright
- * *    notice, this list of conditions and the following disclaimer.
- * * 2) Redistributions in binary form must reproduce the above copyright
- * *    notice, this list of conditions and the following disclaimer in the
- * *    documentation and/or other materials provided with the distribution.
- * * 3) The name of the author may not be used to endorse or promote products
- * *    derived from this software without specific psisusbr written permission.
- * *
- * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESSED OR
- * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author: Thomas Winischhofer <thomas@winischhofer.net>
- *
- * Portions based on vgacon.c which are
- *	Created 28 Sep 1997 by Geert Uytterhoeven
- *      Rewritten by Martin Mares <mj@ucw.cz>, July 1998
- *      based on code Copyright (C) 1991, 1992  Linus Torvalds
- *			    1995  Jay Estabrook
- *
- * A note on using in_atomic() in here: We can't handle console
- * calls from non-schedulable context due to our USB-dependend
- * nature. For now, this driver just ignores any calls if it
- * detects this state.
- *
- */
-
-#include <linux/mutex.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/signal.h>
-#include <linux/fs.h>
-#include <linux/usb.h>
-#include <linux/tty.h>
-#include <linux/console.h>
-#include <linux/string.h>
-#include <linux/kd.h>
-#include <linux/init.h>
-#include <linux/vt_kern.h>
-#include <linux/selection.h>
-#include <linux/spinlock.h>
-#include <linux/kref.h>
-#include <linux/ioport.h>
-#include <linux/interrupt.h>
-#include <linux/vmalloc.h>
-
-#include "sisusb.h"
-#include "sisusb_init.h"
-
-/* vc_data -> sisusb conversion table */
-static struct sisusb_usb_data *mysisusbs[MAX_NR_CONSOLES];
-
-/* Forward declaration */
-static const struct consw sisusb_con;
-
-static inline void
-sisusbcon_memsetw(u16 *s, u16 c, unsigned int count)
-{
-	memset16(s, c, count / 2);
-}
-
-static inline void
-sisusb_initialize(struct sisusb_usb_data *sisusb)
-{
-	/* Reset cursor and start address */
-	if (sisusb_setidxreg(sisusb, SISCR, 0x0c, 0x00))
-		return;
-	if (sisusb_setidxreg(sisusb, SISCR, 0x0d, 0x00))
-		return;
-	if (sisusb_setidxreg(sisusb, SISCR, 0x0e, 0x00))
-		return;
-	sisusb_setidxreg(sisusb, SISCR, 0x0f, 0x00);
-}
-
-static inline void
-sisusbcon_set_start_address(struct sisusb_usb_data *sisusb, struct vc_data *c)
-{
-	sisusb->cur_start_addr = (c->vc_visible_origin - sisusb->scrbuf) / 2;
-
-	sisusb_setidxreg(sisusb, SISCR, 0x0c, (sisusb->cur_start_addr >> 8));
-	sisusb_setidxreg(sisusb, SISCR, 0x0d, (sisusb->cur_start_addr & 0xff));
-}
-
-void
-sisusb_set_cursor(struct sisusb_usb_data *sisusb, unsigned int location)
-{
-	if (sisusb->sisusb_cursor_loc == location)
-		return;
-
-	sisusb->sisusb_cursor_loc = location;
-
-	/* Hardware bug: Text cursor appears twice or not at all
-	 * at some positions. Work around it with the cursor skew
-	 * bits.
-	 */
-
-	if ((location & 0x0007) == 0x0007) {
-		sisusb->bad_cursor_pos = 1;
-		location--;
-		if (sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0x1f, 0x20))
-			return;
-	} else if (sisusb->bad_cursor_pos) {
-		if (sisusb_setidxregand(sisusb, SISCR, 0x0b, 0x1f))
-			return;
-		sisusb->bad_cursor_pos = 0;
-	}
-
-	if (sisusb_setidxreg(sisusb, SISCR, 0x0e, (location >> 8)))
-		return;
-	sisusb_setidxreg(sisusb, SISCR, 0x0f, (location & 0xff));
-}
-
-static inline struct sisusb_usb_data *
-sisusb_get_sisusb(unsigned short console)
-{
-	return mysisusbs[console];
-}
-
-static inline int
-sisusb_sisusb_valid(struct sisusb_usb_data *sisusb)
-{
-	if (!sisusb->present || !sisusb->ready || !sisusb->sisusb_dev)
-		return 0;
-
-	return 1;
-}
-
-static struct sisusb_usb_data *
-sisusb_get_sisusb_lock_and_check(unsigned short console)
-{
-	struct sisusb_usb_data *sisusb;
-
-	/* We can't handle console calls in non-schedulable
-	 * context due to our locks and the USB transport.
-	 * So we simply ignore them. This should only affect
-	 * some calls to printk.
-	 */
-	if (in_atomic())
-		return NULL;
-
-	sisusb = sisusb_get_sisusb(console);
-	if (!sisusb)
-		return NULL;
-
-	mutex_lock(&sisusb->lock);
-
-	if (!sisusb_sisusb_valid(sisusb) ||
-	    !sisusb->havethisconsole[console]) {
-		mutex_unlock(&sisusb->lock);
-		return NULL;
-	}
-
-	return sisusb;
-}
-
-static int
-sisusb_is_inactive(struct vc_data *c, struct sisusb_usb_data *sisusb)
-{
-	if (sisusb->is_gfx ||
-	    sisusb->textmodedestroyed ||
-	    c->vc_mode != KD_TEXT)
-		return 1;
-
-	return 0;
-}
-
-/* con_startup console interface routine */
-static const char *
-sisusbcon_startup(void)
-{
-	return "SISUSBCON";
-}
-
-/* con_init console interface routine */
-static void
-sisusbcon_init(struct vc_data *c, int init)
-{
-	struct sisusb_usb_data *sisusb;
-	int cols, rows;
-
-	/* This is called by do_take_over_console(),
-	 * ie by us/under our control. It is
-	 * only called after text mode and fonts
-	 * are set up/restored.
-	 */
-
-	sisusb = sisusb_get_sisusb(c->vc_num);
-	if (!sisusb)
-		return;
-
-	mutex_lock(&sisusb->lock);
-
-	if (!sisusb_sisusb_valid(sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	c->vc_can_do_color = 1;
-
-	c->vc_complement_mask = 0x7700;
-
-	c->vc_hi_font_mask = sisusb->current_font_512 ? 0x0800 : 0;
-
-	sisusb->haveconsole = 1;
-
-	sisusb->havethisconsole[c->vc_num] = 1;
-
-	/* We only support 640x400 */
-	c->vc_scan_lines = 400;
-
-	c->vc_font.height = sisusb->current_font_height;
-
-	/* We only support width = 8 */
-	cols = 80;
-	rows = c->vc_scan_lines / c->vc_font.height;
-
-	/* Increment usage count for our sisusb.
-	 * Doing so saves us from upping/downing
-	 * the disconnect semaphore; we can't
-	 * lose our sisusb until this is undone
-	 * in con_deinit. For all other console
-	 * interface functions, it suffices to
-	 * use sisusb->lock and do a quick check
-	 * of sisusb for device disconnection.
-	 */
-	kref_get(&sisusb->kref);
-
-	if (!*c->uni_pagedict_loc)
-		con_set_default_unimap(c);
-
-	mutex_unlock(&sisusb->lock);
-
-	if (init) {
-		c->vc_cols = cols;
-		c->vc_rows = rows;
-	} else
-		vc_resize(c, cols, rows);
-}
-
-/* con_deinit console interface routine */
-static void
-sisusbcon_deinit(struct vc_data *c)
-{
-	struct sisusb_usb_data *sisusb;
-	int i;
-
-	/* This is called by do_take_over_console()
-	 * and others, ie not under our control.
-	 */
-
-	sisusb = sisusb_get_sisusb(c->vc_num);
-	if (!sisusb)
-		return;
-
-	mutex_lock(&sisusb->lock);
-
-	/* Clear ourselves in mysisusbs */
-	mysisusbs[c->vc_num] = NULL;
-
-	sisusb->havethisconsole[c->vc_num] = 0;
-
-	/* Free our font buffer if all consoles are gone */
-	if (sisusb->font_backup) {
-		for(i = 0; i < MAX_NR_CONSOLES; i++) {
-			if (sisusb->havethisconsole[c->vc_num])
-				break;
-		}
-		if (i == MAX_NR_CONSOLES) {
-			vfree(sisusb->font_backup);
-			sisusb->font_backup = NULL;
-		}
-	}
-
-	mutex_unlock(&sisusb->lock);
-
-	/* decrement the usage count on our sisusb */
-	kref_put(&sisusb->kref, sisusb_delete);
-}
-
-/* interface routine */
-static u8
-sisusbcon_build_attr(struct vc_data *c, u8 color, enum vc_intensity intensity,
-			    bool blink, bool underline, bool reverse,
-			    bool unused)
-{
-	u8 attr = color;
-
-	if (underline)
-		attr = (attr & 0xf0) | c->vc_ulcolor;
-	else if (intensity == VCI_HALF_BRIGHT)
-		attr = (attr & 0xf0) | c->vc_halfcolor;
-
-	if (reverse)
-		attr = ((attr) & 0x88) |
-		       ((((attr) >> 4) |
-		       ((attr) << 4)) & 0x77);
-
-	if (blink)
-		attr ^= 0x80;
-
-	if (intensity == VCI_BOLD)
-		attr ^= 0x08;
-
-	return attr;
-}
-
-/* Interface routine */
-static void
-sisusbcon_invert_region(struct vc_data *vc, u16 *p, int count)
-{
-	/* Invert a region. This is called with a pointer
-	 * to the console's internal screen buffer. So we
-	 * simply do the inversion there and rely on
-	 * a call to putc(s) to update the real screen.
-	 */
-
-	while (count--) {
-		u16 a = *p;
-
-		*p++ = ((a) & 0x88ff)        |
-		       (((a) & 0x7000) >> 4) |
-		       (((a) & 0x0700) << 4);
-	}
-}
-
-static inline void *sisusb_vaddr(const struct sisusb_usb_data *sisusb,
-		const struct vc_data *c, unsigned int x, unsigned int y)
-{
-	return (u16 *)c->vc_origin + y * sisusb->sisusb_num_columns + x;
-}
-
-static inline unsigned long sisusb_haddr(const struct sisusb_usb_data *sisusb,
-	      const struct vc_data *c, unsigned int x, unsigned int y)
-{
-	unsigned long offset = c->vc_origin - sisusb->scrbuf;
-
-	/* 2 bytes per each character */
-	offset += 2 * (y * sisusb->sisusb_num_columns + x);
-
-	return sisusb->vrambase + offset;
-}
-
-/* Interface routine */
-static void
-sisusbcon_putc(struct vc_data *c, int ch, int y, int x)
-{
-	struct sisusb_usb_data *sisusb;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y),
-				sisusb_haddr(sisusb, c, x, y), 2);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* Interface routine */
-static void
-sisusbcon_putcs(struct vc_data *c, const unsigned short *s,
-		         int count, int y, int x)
-{
-	struct sisusb_usb_data *sisusb;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	/* Need to put the characters into the buffer ourselves,
-	 * because the vt does this AFTER calling us.
-	 */
-
-	memcpy(sisusb_vaddr(sisusb, c, x, y), s, count * 2);
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y),
-			sisusb_haddr(sisusb, c, x, y), count * 2);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* Interface routine */
-static void
-sisusbcon_clear(struct vc_data *c, int y, int x, int height, int width)
-{
-	struct sisusb_usb_data *sisusb;
-	u16 eattr = c->vc_video_erase_char;
-	int i, length, cols;
-	u16 *dest;
-
-	if (width <= 0 || height <= 0)
-		return;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	/* Need to clear buffer ourselves, because the vt does
-	 * this AFTER calling us.
-	 */
-
-	dest = sisusb_vaddr(sisusb, c, x, y);
-
-	cols = sisusb->sisusb_num_columns;
-
-	if (width > cols)
-		width = cols;
-
-	if (x == 0 && width >= c->vc_cols) {
-
-		sisusbcon_memsetw(dest, eattr, height * cols * 2);
-
-	} else {
-
-		for (i = height; i > 0; i--, dest += cols)
-			sisusbcon_memsetw(dest, eattr, width * 2);
-
-	}
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	length = ((height * cols) - x - (cols - width - x)) * 2;
-
-
-	sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, x, y),
-			sisusb_haddr(sisusb, c, x, y), length);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* interface routine */
-static int
-sisusbcon_switch(struct vc_data *c)
-{
-	struct sisusb_usb_data *sisusb;
-	int length;
-
-	/* Returnvalue 0 means we have fully restored screen,
-	 *	and vt doesn't need to call do_update_region().
-	 * Returnvalue != 0 naturally means the opposite.
-	 */
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return 0;
-
-	/* sisusb->lock is down */
-
-	/* Don't write to screen if in gfx mode */
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return 0;
-	}
-
-	/* That really should not happen. It would mean we are
-	 * being called while the vc is using its private buffer
-	 * as origin.
-	 */
-	if (c->vc_origin == (unsigned long)c->vc_screenbuf) {
-		mutex_unlock(&sisusb->lock);
-		dev_dbg(&sisusb->sisusb_dev->dev, "ASSERT ORIGIN != SCREENBUF!\n");
-		return 0;
-	}
-
-	/* Check that we don't copy too much */
-	length = min((int)c->vc_screenbuf_size,
-			(int)(sisusb->scrbuf + sisusb->scrbuf_size - c->vc_origin));
-
-	/* Restore the screen contents */
-	memcpy((u16 *)c->vc_origin, (u16 *)c->vc_screenbuf, length);
-
-	sisusb_copy_memory(sisusb, (u8 *)c->vc_origin,
-			sisusb_haddr(sisusb, c, 0, 0), length);
-
-	mutex_unlock(&sisusb->lock);
-
-	return 0;
-}
-
-/* interface routine */
-static void
-sisusbcon_save_screen(struct vc_data *c)
-{
-	struct sisusb_usb_data *sisusb;
-	int length;
-
-	/* Save the current screen contents to vc's private
-	 * buffer.
-	 */
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	/* Check that we don't copy too much */
-	length = min((int)c->vc_screenbuf_size,
-			(int)(sisusb->scrbuf + sisusb->scrbuf_size - c->vc_origin));
-
-	/* Save the screen contents to vc's private buffer */
-	memcpy((u16 *)c->vc_screenbuf, (u16 *)c->vc_origin, length);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* interface routine */
-static void
-sisusbcon_set_palette(struct vc_data *c, const unsigned char *table)
-{
-	struct sisusb_usb_data *sisusb;
-	int i, j;
-
-	/* Return value not used by vt */
-
-	if (!con_is_visible(c))
-		return;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	for (i = j = 0; i < 16; i++) {
-		if (sisusb_setreg(sisusb, SISCOLIDX, table[i]))
-			break;
-		if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2))
-			break;
-		if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2))
-			break;
-		if (sisusb_setreg(sisusb, SISCOLDATA, c->vc_palette[j++] >> 2))
-			break;
-	}
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* interface routine */
-static int
-sisusbcon_blank(struct vc_data *c, int blank, int mode_switch)
-{
-	struct sisusb_usb_data *sisusb;
-	u8 sr1, cr17, pmreg, cr63;
-	int ret = 0;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return 0;
-
-	/* sisusb->lock is down */
-
-	if (mode_switch)
-		sisusb->is_gfx = blank ? 1 : 0;
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return 0;
-	}
-
-	switch (blank) {
-
-	case 1:		/* Normal blanking: Clear screen */
-	case -1:
-		sisusbcon_memsetw((u16 *)c->vc_origin,
-				c->vc_video_erase_char,
-				c->vc_screenbuf_size);
-		sisusb_copy_memory(sisusb, (u8 *)c->vc_origin,
-				sisusb_haddr(sisusb, c, 0, 0),
-				c->vc_screenbuf_size);
-		sisusb->con_blanked = 1;
-		ret = 1;
-		break;
-
-	default:	/* VESA blanking */
-		switch (blank) {
-		case 0: /* Unblank */
-			sr1   = 0x00;
-			cr17  = 0x80;
-			pmreg = 0x00;
-			cr63  = 0x00;
-			ret = 1;
-			sisusb->con_blanked = 0;
-			break;
-		case VESA_VSYNC_SUSPEND + 1:
-			sr1   = 0x20;
-			cr17  = 0x80;
-			pmreg = 0x80;
-			cr63  = 0x40;
-			break;
-		case VESA_HSYNC_SUSPEND + 1:
-			sr1   = 0x20;
-			cr17  = 0x80;
-			pmreg = 0x40;
-			cr63  = 0x40;
-			break;
-		case VESA_POWERDOWN + 1:
-			sr1   = 0x20;
-			cr17  = 0x00;
-			pmreg = 0xc0;
-			cr63  = 0x40;
-			break;
-		default:
-			mutex_unlock(&sisusb->lock);
-			return -EINVAL;
-		}
-
-		sisusb_setidxregandor(sisusb, SISSR, 0x01, ~0x20, sr1);
-		sisusb_setidxregandor(sisusb, SISCR, 0x17, 0x7f, cr17);
-		sisusb_setidxregandor(sisusb, SISSR, 0x1f, 0x3f, pmreg);
-		sisusb_setidxregandor(sisusb, SISCR, 0x63, 0xbf, cr63);
-
-	}
-
-	mutex_unlock(&sisusb->lock);
-
-	return ret;
-}
-
-/* interface routine */
-static void
-sisusbcon_scrolldelta(struct vc_data *c, int lines)
-{
-	struct sisusb_usb_data *sisusb;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	vc_scrolldelta_helper(c, lines, sisusb->con_rolled_over,
-			(void *)sisusb->scrbuf, sisusb->scrbuf_size);
-
-	sisusbcon_set_start_address(sisusb, c);
-
-	mutex_unlock(&sisusb->lock);
-}
-
-/* Interface routine */
-static void
-sisusbcon_cursor(struct vc_data *c, int mode)
-{
-	struct sisusb_usb_data *sisusb;
-	int from, to, baseline;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	if (c->vc_origin != c->vc_visible_origin) {
-		c->vc_visible_origin = c->vc_origin;
-		sisusbcon_set_start_address(sisusb, c);
-	}
-
-	if (mode == CM_ERASE) {
-		sisusb_setidxregor(sisusb, SISCR, 0x0a, 0x20);
-		sisusb->sisusb_cursor_size_to = -1;
-		mutex_unlock(&sisusb->lock);
-		return;
-	}
-
-	sisusb_set_cursor(sisusb, (c->vc_pos - sisusb->scrbuf) / 2);
-
-	baseline = c->vc_font.height - (c->vc_font.height < 10 ? 1 : 2);
-
-	switch (CUR_SIZE(c->vc_cursor_type)) {
-		case CUR_BLOCK:		from = 1;
-					to   = c->vc_font.height;
-					break;
-		case CUR_TWO_THIRDS:	from = c->vc_font.height / 3;
-					to   = baseline;
-					break;
-		case CUR_LOWER_HALF:	from = c->vc_font.height / 2;
-					to   = baseline;
-					break;
-		case CUR_LOWER_THIRD:	from = (c->vc_font.height * 2) / 3;
-					to   = baseline;
-					break;
-		case CUR_NONE:		from = 31;
-					to = 30;
-					break;
-		default:
-		case CUR_UNDERLINE:	from = baseline - 1;
-					to   = baseline;
-					break;
-	}
-
-	if (sisusb->sisusb_cursor_size_from != from ||
-	    sisusb->sisusb_cursor_size_to != to) {
-
-		sisusb_setidxreg(sisusb, SISCR, 0x0a, from);
-		sisusb_setidxregandor(sisusb, SISCR, 0x0b, 0xe0, to);
-
-		sisusb->sisusb_cursor_size_from = from;
-		sisusb->sisusb_cursor_size_to   = to;
-	}
-
-	mutex_unlock(&sisusb->lock);
-}
-
-static bool
-sisusbcon_scroll_area(struct vc_data *c, struct sisusb_usb_data *sisusb,
-		unsigned int t, unsigned int b, enum con_scroll dir,
-		unsigned int lines)
-{
-	int cols = sisusb->sisusb_num_columns;
-	int length = ((b - t) * cols) * 2;
-	u16 eattr = c->vc_video_erase_char;
-
-	/* sisusb->lock is down */
-
-	/* Scroll an area which does not match the
-	 * visible screen's dimensions. This needs
-	 * to be done separately, as it does not
-	 * use hardware panning.
-	 */
-
-	switch (dir) {
-
-		case SM_UP:
-			memmove(sisusb_vaddr(sisusb, c, 0, t),
-					   sisusb_vaddr(sisusb, c, 0, t + lines),
-					   (b - t - lines) * cols * 2);
-			sisusbcon_memsetw(sisusb_vaddr(sisusb, c, 0, b - lines),
-					eattr, lines * cols * 2);
-			break;
-
-		case SM_DOWN:
-			memmove(sisusb_vaddr(sisusb, c, 0, t + lines),
-					   sisusb_vaddr(sisusb, c, 0, t),
-					   (b - t - lines) * cols * 2);
-			sisusbcon_memsetw(sisusb_vaddr(sisusb, c, 0, t), eattr,
-					  lines * cols * 2);
-			break;
-	}
-
-	sisusb_copy_memory(sisusb, sisusb_vaddr(sisusb, c, 0, t),
-			sisusb_haddr(sisusb, c, 0, t), length);
-
-	mutex_unlock(&sisusb->lock);
-
-	return true;
-}
-
-/* Interface routine */
-static bool
-sisusbcon_scroll(struct vc_data *c, unsigned int t, unsigned int b,
-		enum con_scroll dir, unsigned int lines)
-{
-	struct sisusb_usb_data *sisusb;
-	u16 eattr = c->vc_video_erase_char;
-	int copyall = 0;
-	unsigned long oldorigin;
-	unsigned int delta = lines * c->vc_size_row;
-
-	/* Returning != 0 means we have done the scrolling successfully.
-	 * Returning 0 makes vt do the scrolling on its own.
-	 * Note that con_scroll is only called if the console is
-	 * visible. In that case, the origin should be our buffer,
-	 * not the vt's private one.
-	 */
-
-	if (!lines)
-		return true;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return false;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb)) {
-		mutex_unlock(&sisusb->lock);
-		return false;
-	}
-
-	/* Special case */
-	if (t || b != c->vc_rows)
-		return sisusbcon_scroll_area(c, sisusb, t, b, dir, lines);
-
-	if (c->vc_origin != c->vc_visible_origin) {
-		c->vc_visible_origin = c->vc_origin;
-		sisusbcon_set_start_address(sisusb, c);
-	}
-
-	/* limit amount to maximum realistic size */
-	if (lines > c->vc_rows)
-		lines = c->vc_rows;
-
-	oldorigin = c->vc_origin;
-
-	switch (dir) {
-
-	case SM_UP:
-
-		if (c->vc_scr_end + delta >=
-				sisusb->scrbuf + sisusb->scrbuf_size) {
-			memcpy((u16 *)sisusb->scrbuf,
-					  (u16 *)(oldorigin + delta),
-					  c->vc_screenbuf_size - delta);
-			c->vc_origin = sisusb->scrbuf;
-			sisusb->con_rolled_over = oldorigin - sisusb->scrbuf;
-			copyall = 1;
-		} else
-			c->vc_origin += delta;
-
-		sisusbcon_memsetw(
-			(u16 *)(c->vc_origin + c->vc_screenbuf_size - delta),
-					eattr, delta);
-
-		break;
-
-	case SM_DOWN:
-
-		if (oldorigin - delta < sisusb->scrbuf) {
-			memmove((void *)sisusb->scrbuf + sisusb->scrbuf_size -
-					c->vc_screenbuf_size + delta,
-					(u16 *)oldorigin,
-					c->vc_screenbuf_size - delta);
-			c->vc_origin = sisusb->scrbuf +
-					sisusb->scrbuf_size -
-					c->vc_screenbuf_size;
-			sisusb->con_rolled_over = 0;
-			copyall = 1;
-		} else
-			c->vc_origin -= delta;
-
-		c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size;
-
-		scr_memsetw((u16 *)(c->vc_origin), eattr, delta);
-
-		break;
-	}
-
-	if (copyall)
-		sisusb_copy_memory(sisusb,
-			(u8 *)c->vc_origin,
-			sisusb_haddr(sisusb, c, 0, 0),
-			c->vc_screenbuf_size);
-	else if (dir == SM_UP)
-		sisusb_copy_memory(sisusb,
-			(u8 *)c->vc_origin + c->vc_screenbuf_size - delta,
-			sisusb_haddr(sisusb, c, 0, 0) +
-					c->vc_screenbuf_size - delta,
-			delta);
-	else
-		sisusb_copy_memory(sisusb,
-			(u8 *)c->vc_origin,
-			sisusb_haddr(sisusb, c, 0, 0),
-			delta);
-
-	c->vc_scr_end = c->vc_origin + c->vc_screenbuf_size;
-	c->vc_visible_origin = c->vc_origin;
-
-	sisusbcon_set_start_address(sisusb, c);
-
-	c->vc_pos = c->vc_pos - oldorigin + c->vc_origin;
-
-	mutex_unlock(&sisusb->lock);
-
-	return true;
-}
-
-/* Interface routine */
-static int
-sisusbcon_set_origin(struct vc_data *c)
-{
-	struct sisusb_usb_data *sisusb;
-
-	/* Returning != 0 means we were successful.
-	 * Returning 0 will vt make to use its own
-	 *	screenbuffer as the origin.
-	 */
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return 0;
-
-	/* sisusb->lock is down */
-
-	if (sisusb_is_inactive(c, sisusb) || sisusb->con_blanked) {
-		mutex_unlock(&sisusb->lock);
-		return 0;
-	}
-
-	c->vc_origin = c->vc_visible_origin = sisusb->scrbuf;
-
-	sisusbcon_set_start_address(sisusb, c);
-
-	sisusb->con_rolled_over = 0;
-
-	mutex_unlock(&sisusb->lock);
-
-	return true;
-}
-
-/* Interface routine */
-static int
-sisusbcon_resize(struct vc_data *c, unsigned int newcols, unsigned int newrows,
-		 unsigned int user)
-{
-	struct sisusb_usb_data *sisusb;
-	int fh;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return -ENODEV;
-
-	fh = sisusb->current_font_height;
-
-	mutex_unlock(&sisusb->lock);
-
-	/* We are quite unflexible as regards resizing. The vt code
-	 * handles sizes where the line length isn't equal the pitch
-	 * quite badly. As regards the rows, our panning tricks only
-	 * work well if the number of rows equals the visible number
-	 * of rows.
-	 */
-
-	if (newcols != 80 || c->vc_scan_lines / fh != newrows)
-		return -EINVAL;
-
-	return 0;
-}
-
-int
-sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot,
-			u8 *arg, int cmapsz, int ch512, int dorecalc,
-			struct vc_data *c, int fh, int uplock)
-{
-	int font_select = 0x00, i, err = 0;
-	u32 offset = 0;
-	u8 dummy;
-
-	/* sisusb->lock is down */
-
-	/*
-	 * The default font is kept in slot 0.
-	 * A user font is loaded in slot 2 (256 ch)
-	 * or 2+3 (512 ch).
-	 */
-
-	if ((slot != 0 && slot != 2) || !fh) {
-		if (uplock)
-			mutex_unlock(&sisusb->lock);
-		return -EINVAL;
-	}
-
-	if (set)
-		sisusb->font_slot = slot;
-
-	/* Default font is always 256 */
-	if (slot == 0)
-		ch512 = 0;
-	else
-		offset = 4 * cmapsz;
-
-	font_select = (slot == 0) ? 0x00 : (ch512 ? 0x0e : 0x0a);
-
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x01); /* Reset */
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x02, 0x04); /* Write to plane 2 */
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x04, 0x07); /* Memory mode a0-bf */
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x03); /* Reset */
-
-	if (err)
-		goto font_op_error;
-
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x04, 0x03); /* Select plane read 2 */
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x05, 0x00); /* Disable odd/even */
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x06, 0x00); /* Address range a0-bf */
-
-	if (err)
-		goto font_op_error;
-
-	if (arg) {
-		if (set)
-			for (i = 0; i < cmapsz; i++) {
-				err |= sisusb_writeb(sisusb,
-					sisusb->vrambase + offset + i,
-					arg[i]);
-				if (err)
-					break;
-			}
-		else
-			for (i = 0; i < cmapsz; i++) {
-				err |= sisusb_readb(sisusb,
-					sisusb->vrambase + offset + i,
-					&arg[i]);
-				if (err)
-					break;
-			}
-
-		/*
-		 * In 512-character mode, the character map is not contiguous if
-		 * we want to remain EGA compatible -- which we do
-		 */
-
-		if (ch512) {
-			if (set)
-				for (i = 0; i < cmapsz; i++) {
-					err |= sisusb_writeb(sisusb,
-						sisusb->vrambase + offset +
-							(2 * cmapsz) + i,
-						arg[cmapsz + i]);
-					if (err)
-						break;
-				}
-			else
-				for (i = 0; i < cmapsz; i++) {
-					err |= sisusb_readb(sisusb,
-						sisusb->vrambase + offset +
-							(2 * cmapsz) + i,
-						&arg[cmapsz + i]);
-					if (err)
-						break;
-				}
-		}
-	}
-
-	if (err)
-		goto font_op_error;
-
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x01); /* Reset */
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x02, 0x03); /* Write to planes 0+1 */
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x04, 0x03); /* Memory mode a0-bf */
-	if (set)
-		sisusb_setidxreg(sisusb, SISSR, 0x03, font_select);
-	err |= sisusb_setidxreg(sisusb, SISSR, 0x00, 0x03); /* Reset end */
-
-	if (err)
-		goto font_op_error;
-
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x04, 0x00); /* Select plane read 0 */
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x05, 0x10); /* Enable odd/even */
-	err |= sisusb_setidxreg(sisusb, SISGR, 0x06, 0x06); /* Address range b8-bf */
-
-	if (err)
-		goto font_op_error;
-
-	if ((set) && (ch512 != sisusb->current_font_512)) {
-
-		/* Font is shared among all our consoles.
-		 * And so is the hi_font_mask.
-		 */
-		for (i = 0; i < MAX_NR_CONSOLES; i++) {
-			struct vc_data *d = vc_cons[i].d;
-			if (d && d->vc_sw == &sisusb_con)
-				d->vc_hi_font_mask = ch512 ? 0x0800 : 0;
-		}
-
-		sisusb->current_font_512 = ch512;
-
-		/* color plane enable register:
-			256-char: enable intensity bit
-			512-char: disable intensity bit */
-		sisusb_getreg(sisusb, SISINPSTAT, &dummy);
-		sisusb_setreg(sisusb, SISAR, 0x12);
-		sisusb_setreg(sisusb, SISAR, ch512 ? 0x07 : 0x0f);
-
-		sisusb_getreg(sisusb, SISINPSTAT, &dummy);
-		sisusb_setreg(sisusb, SISAR, 0x20);
-		sisusb_getreg(sisusb, SISINPSTAT, &dummy);
-	}
-
-	if (dorecalc) {
-
-		/*
-		 * Adjust the screen to fit a font of a certain height
-		 */
-
-		unsigned char ovr, vde, fsr;
-		int rows = 0, maxscan = 0;
-
-		if (c) {
-
-			/* Number of video rows */
-			rows = c->vc_scan_lines / fh;
-			/* Scan lines to actually display-1 */
-			maxscan = rows * fh - 1;
-
-			/*printk(KERN_DEBUG "sisusb recalc rows %d maxscan %d fh %d sl %d\n",
-				rows, maxscan, fh, c->vc_scan_lines);*/
-
-			sisusb_getidxreg(sisusb, SISCR, 0x07, &ovr);
-			vde = maxscan & 0xff;
-			ovr = (ovr & 0xbd) |
-			      ((maxscan & 0x100) >> 7) |
-			      ((maxscan & 0x200) >> 3);
-			sisusb_setidxreg(sisusb, SISCR, 0x07, ovr);
-			sisusb_setidxreg(sisusb, SISCR, 0x12, vde);
-
-		}
-
-		sisusb_getidxreg(sisusb, SISCR, 0x09, &fsr);
-		fsr = (fsr & 0xe0) | (fh - 1);
-		sisusb_setidxreg(sisusb, SISCR, 0x09, fsr);
-		sisusb->current_font_height = fh;
-
-		sisusb->sisusb_cursor_size_from = -1;
-		sisusb->sisusb_cursor_size_to   = -1;
-
-	}
-
-	if (uplock)
-		mutex_unlock(&sisusb->lock);
-
-	if (dorecalc && c) {
-		int rows = c->vc_scan_lines / fh;
-
-		/* Now adjust our consoles' size */
-
-		for (i = 0; i < MAX_NR_CONSOLES; i++) {
-			struct vc_data *vc = vc_cons[i].d;
-
-			if (vc && vc->vc_sw == &sisusb_con) {
-				if (con_is_visible(vc)) {
-					vc->vc_sw->con_cursor(vc, CM_DRAW);
-				}
-				vc->vc_font.height = fh;
-				vc_resize(vc, 0, rows);
-			}
-		}
-	}
-
-	return 0;
-
-font_op_error:
-	if (uplock)
-		mutex_unlock(&sisusb->lock);
-
-	return -EIO;
-}
-
-/* Interface routine */
-static int
-sisusbcon_font_set(struct vc_data *c, struct console_font *font,
-		   unsigned int flags)
-{
-	struct sisusb_usb_data *sisusb;
-	unsigned charcount = font->charcount;
-
-	if (font->width != 8 || (charcount != 256 && charcount != 512))
-		return -EINVAL;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return -ENODEV;
-
-	/* sisusb->lock is down */
-
-	/* Save the user-provided font into a buffer. This
-	 * is used for restoring text mode after quitting
-	 * from X and for the con_getfont routine.
-	 */
-	if (sisusb->font_backup) {
-		if (sisusb->font_backup_size < charcount) {
-			vfree(sisusb->font_backup);
-			sisusb->font_backup = NULL;
-		}
-	}
-
-	if (!sisusb->font_backup)
-		sisusb->font_backup = vmalloc(array_size(charcount, 32));
-
-	if (sisusb->font_backup) {
-		memcpy(sisusb->font_backup, font->data, array_size(charcount, 32));
-		sisusb->font_backup_size = charcount;
-		sisusb->font_backup_height = font->height;
-		sisusb->font_backup_512 = (charcount == 512) ? 1 : 0;
-	}
-
-	/* do_font_op ups sisusb->lock */
-
-	return sisusbcon_do_font_op(sisusb, 1, 2, font->data,
-			8192, (charcount == 512),
-			(!(flags & KD_FONT_FLAG_DONT_RECALC)) ? 1 : 0,
-			c, font->height, 1);
-}
-
-/* Interface routine */
-static int
-sisusbcon_font_get(struct vc_data *c, struct console_font *font)
-{
-	struct sisusb_usb_data *sisusb;
-
-	sisusb = sisusb_get_sisusb_lock_and_check(c->vc_num);
-	if (!sisusb)
-		return -ENODEV;
-
-	/* sisusb->lock is down */
-
-	font->width = 8;
-	font->height = c->vc_font.height;
-	font->charcount = 256;
-
-	if (!font->data) {
-		mutex_unlock(&sisusb->lock);
-		return 0;
-	}
-
-	if (!sisusb->font_backup) {
-		mutex_unlock(&sisusb->lock);
-		return -ENODEV;
-	}
-
-	/* Copy 256 chars only, like vgacon */
-	memcpy(font->data, sisusb->font_backup, 256 * 32);
-
-	mutex_unlock(&sisusb->lock);
-
-	return 0;
-}
-
-/*
- *  The console `switch' structure for the sisusb console
- */
-
-static const struct consw sisusb_con = {
-	.owner =		THIS_MODULE,
-	.con_startup =		sisusbcon_startup,
-	.con_init =		sisusbcon_init,
-	.con_deinit =		sisusbcon_deinit,
-	.con_clear =		sisusbcon_clear,
-	.con_putc =		sisusbcon_putc,
-	.con_putcs =		sisusbcon_putcs,
-	.con_cursor =		sisusbcon_cursor,
-	.con_scroll =		sisusbcon_scroll,
-	.con_switch =		sisusbcon_switch,
-	.con_blank =		sisusbcon_blank,
-	.con_font_set =		sisusbcon_font_set,
-	.con_font_get =		sisusbcon_font_get,
-	.con_set_palette =	sisusbcon_set_palette,
-	.con_scrolldelta =	sisusbcon_scrolldelta,
-	.con_build_attr =	sisusbcon_build_attr,
-	.con_invert_region =	sisusbcon_invert_region,
-	.con_set_origin =	sisusbcon_set_origin,
-	.con_save_screen =	sisusbcon_save_screen,
-	.con_resize =		sisusbcon_resize,
-};
-
-/* Our very own dummy console driver */
-
-static const char *sisusbdummycon_startup(void)
-{
-    return "SISUSBVGADUMMY";
-}
-
-static void sisusbdummycon_init(struct vc_data *vc, int init)
-{
-    vc->vc_can_do_color = 1;
-    if (init) {
-	vc->vc_cols = 80;
-	vc->vc_rows = 25;
-    } else
-	vc_resize(vc, 80, 25);
-}
-
-static void sisusbdummycon_deinit(struct vc_data *vc) { }
-static void sisusbdummycon_clear(struct vc_data *vc, int sy, int sx,
-				 int height, int width) { }
-static void sisusbdummycon_putc(struct vc_data *vc, int c, int ypos,
-				int xpos) { }
-static void sisusbdummycon_putcs(struct vc_data *vc, const unsigned short *s,
-				 int count, int ypos, int xpos) { }
-static void sisusbdummycon_cursor(struct vc_data *vc, int mode) { }
-
-static bool sisusbdummycon_scroll(struct vc_data *vc, unsigned int top,
-				  unsigned int bottom, enum con_scroll dir,
-				  unsigned int lines)
-{
-	return false;
-}
-
-static int sisusbdummycon_switch(struct vc_data *vc)
-{
-	return 0;
-}
-
-static int sisusbdummycon_blank(struct vc_data *vc, int blank, int mode_switch)
-{
-	return 0;
-}
-
-static const struct consw sisusb_dummy_con = {
-	.owner =		THIS_MODULE,
-	.con_startup =		sisusbdummycon_startup,
-	.con_init =		sisusbdummycon_init,
-	.con_deinit =		sisusbdummycon_deinit,
-	.con_clear =		sisusbdummycon_clear,
-	.con_putc =		sisusbdummycon_putc,
-	.con_putcs =		sisusbdummycon_putcs,
-	.con_cursor =		sisusbdummycon_cursor,
-	.con_scroll =		sisusbdummycon_scroll,
-	.con_switch =		sisusbdummycon_switch,
-	.con_blank =		sisusbdummycon_blank,
-};
-
-int
-sisusb_console_init(struct sisusb_usb_data *sisusb, int first, int last)
-{
-	int i, ret;
-
-	mutex_lock(&sisusb->lock);
-
-	/* Erm.. that should not happen */
-	if (sisusb->haveconsole || !sisusb->SiS_Pr) {
-		mutex_unlock(&sisusb->lock);
-		return 1;
-	}
-
-	sisusb->con_first = first;
-	sisusb->con_last  = last;
-
-	if (first > last ||
-	    first > MAX_NR_CONSOLES ||
-	    last > MAX_NR_CONSOLES) {
-		mutex_unlock(&sisusb->lock);
-		return 1;
-	}
-
-	/* If gfxcore not initialized or no consoles given, quit graciously */
-	if (!sisusb->gfxinit || first < 1 || last < 1) {
-		mutex_unlock(&sisusb->lock);
-		return 0;
-	}
-
-	sisusb->sisusb_cursor_loc       = -1;
-	sisusb->sisusb_cursor_size_from = -1;
-	sisusb->sisusb_cursor_size_to   = -1;
-
-	/* Set up text mode (and upload  default font) */
-	if (sisusb_reset_text_mode(sisusb, 1)) {
-		mutex_unlock(&sisusb->lock);
-		dev_err(&sisusb->sisusb_dev->dev, "Failed to set up text mode\n");
-		return 1;
-	}
-
-	/* Initialize some gfx registers */
-	sisusb_initialize(sisusb);
-
-	for (i = first - 1; i <= last - 1; i++) {
-		/* Save sisusb for our interface routines */
-		mysisusbs[i] = sisusb;
-	}
-
-	/* Initial console setup */
-	sisusb->sisusb_num_columns = 80;
-
-	/* Use a 32K buffer (matches b8000-bffff area) */
-	sisusb->scrbuf_size = 32 * 1024;
-
-	/* Allocate screen buffer */
-	if (!(sisusb->scrbuf = (unsigned long)vmalloc(sisusb->scrbuf_size))) {
-		mutex_unlock(&sisusb->lock);
-		dev_err(&sisusb->sisusb_dev->dev, "Failed to allocate screen buffer\n");
-		return 1;
-	}
-
-	mutex_unlock(&sisusb->lock);
-
-	/* Now grab the desired console(s) */
-	console_lock();
-	ret = do_take_over_console(&sisusb_con, first - 1, last - 1, 0);
-	console_unlock();
-	if (!ret)
-		sisusb->haveconsole = 1;
-	else {
-		for (i = first - 1; i <= last - 1; i++)
-			mysisusbs[i] = NULL;
-	}
-
-	return ret;
-}
-
-void
-sisusb_console_exit(struct sisusb_usb_data *sisusb)
-{
-	int i;
-
-	/* This is called if the device is disconnected
-	 * and while disconnect and lock semaphores
-	 * are up. This should be save because we
-	 * can't lose our sisusb any other way but by
-	 * disconnection (and hence, the disconnect
-	 * sema is for protecting all other access
-	 * functions from disconnection, not the
-	 * other way round).
-	 */
-
-	/* Now what do we do in case of disconnection:
-	 * One alternative would be to simply call
-	 * give_up_console(). Nah, not a good idea.
-	 * give_up_console() is obviously buggy as it
-	 * only discards the consw pointer from the
-	 * driver_map, but doesn't adapt vc->vc_sw
-	 * of the affected consoles. Hence, the next
-	 * call to any of the console functions will
-	 * eventually take a trip to oops county.
-	 * Also, give_up_console for some reason
-	 * doesn't decrement our module refcount.
-	 * Instead, we switch our consoles to a private
-	 * dummy console. This, of course, keeps our
-	 * refcount up as well, but it works perfectly.
-	 */
-
-	if (sisusb->haveconsole) {
-		for (i = 0; i < MAX_NR_CONSOLES; i++)
-			if (sisusb->havethisconsole[i]) {
-				console_lock();
-				do_take_over_console(&sisusb_dummy_con, i, i, 0);
-				console_unlock();
-				/* At this point, con_deinit for all our
-				 * consoles is executed by do_take_over_console().
-				 */
-			}
-		sisusb->haveconsole = 0;
-	}
-
-	vfree((void *)sisusb->scrbuf);
-	sisusb->scrbuf = 0;
-
-	vfree(sisusb->font_backup);
-	sisusb->font_backup = NULL;
-}
-
-void __init sisusb_init_concode(void)
-{
-	int i;
-
-	for (i = 0; i < MAX_NR_CONSOLES; i++)
-		mysisusbs[i] = NULL;
-}
diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.c b/drivers/usb/misc/sisusbvga/sisusb_init.c
deleted file mode 100644
index 7c11198d5dda0..0000000000000
--- a/drivers/usb/misc/sisusbvga/sisusb_init.c
+++ /dev/null
@@ -1,955 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
-/*
- * sisusb - usb kernel driver for SiS315(E) based USB2VGA dongles
- *
- * Display mode initializing code
- *
- * Copyright (C) 2001-2005 by Thomas Winischhofer, Vienna, Austria
- *
- * If distributed as part of the Linux kernel, this code is licensed under the
- * terms of the GPL v2.
- *
- * Otherwise, the following license terms apply:
- *
- * * Redistribution and use in source and binary forms, with or without
- * * modification, are permitted provided that the following conditions
- * * are met:
- * * 1) Redistributions of source code must retain the above copyright
- * *    notice, this list of conditions and the following disclaimer.
- * * 2) Redistributions in binary form must reproduce the above copyright
- * *    notice, this list of conditions and the following disclaimer in the
- * *    documentation and/or other materials provided with the distribution.
- * * 3) The name of the author may not be used to endorse or promote products
- * *    derived from this software without specific prior written permission.
- * *
- * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author:	Thomas Winischhofer <thomas@winischhofer.net>
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/poll.h>
-#include <linux/spinlock.h>
-
-#include "sisusb.h"
-#include "sisusb_init.h"
-#include "sisusb_tables.h"
-
-/*********************************************/
-/*         POINTER INITIALIZATION            */
-/*********************************************/
-
-static void SiSUSB_InitPtr(struct SiS_Private *SiS_Pr)
-{
-	SiS_Pr->SiS_ModeResInfo = SiSUSB_ModeResInfo;
-	SiS_Pr->SiS_StandTable = SiSUSB_StandTable;
-
-	SiS_Pr->SiS_SModeIDTable = SiSUSB_SModeIDTable;
-	SiS_Pr->SiS_EModeIDTable = SiSUSB_EModeIDTable;
-	SiS_Pr->SiS_RefIndex = SiSUSB_RefIndex;
-	SiS_Pr->SiS_CRT1Table = SiSUSB_CRT1Table;
-
-	SiS_Pr->SiS_VCLKData = SiSUSB_VCLKData;
-}
-
-/*********************************************/
-/*          HELPER: SetReg, GetReg           */
-/*********************************************/
-
-static void
-SiS_SetReg(struct SiS_Private *SiS_Pr, unsigned long port,
-	   unsigned short index, unsigned short data)
-{
-	sisusb_setidxreg(SiS_Pr->sisusb, port, index, data);
-}
-
-static void
-SiS_SetRegByte(struct SiS_Private *SiS_Pr, unsigned long port,
-	       unsigned short data)
-{
-	sisusb_setreg(SiS_Pr->sisusb, port, data);
-}
-
-static unsigned char
-SiS_GetReg(struct SiS_Private *SiS_Pr, unsigned long port, unsigned short index)
-{
-	u8 data;
-
-	sisusb_getidxreg(SiS_Pr->sisusb, port, index, &data);
-
-	return data;
-}
-
-static unsigned char
-SiS_GetRegByte(struct SiS_Private *SiS_Pr, unsigned long port)
-{
-	u8 data;
-
-	sisusb_getreg(SiS_Pr->sisusb, port, &data);
-
-	return data;
-}
-
-static void
-SiS_SetRegANDOR(struct SiS_Private *SiS_Pr, unsigned long port,
-		unsigned short index, unsigned short DataAND,
-		unsigned short DataOR)
-{
-	sisusb_setidxregandor(SiS_Pr->sisusb, port, index, DataAND, DataOR);
-}
-
-static void
-SiS_SetRegAND(struct SiS_Private *SiS_Pr, unsigned long port,
-	      unsigned short index, unsigned short DataAND)
-{
-	sisusb_setidxregand(SiS_Pr->sisusb, port, index, DataAND);
-}
-
-static void
-SiS_SetRegOR(struct SiS_Private *SiS_Pr, unsigned long port,
-	     unsigned short index, unsigned short DataOR)
-{
-	sisusb_setidxregor(SiS_Pr->sisusb, port, index, DataOR);
-}
-
-/*********************************************/
-/*      HELPER: DisplayOn, DisplayOff        */
-/*********************************************/
-
-static void SiS_DisplayOn(struct SiS_Private *SiS_Pr)
-{
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, 0xDF);
-}
-
-/*********************************************/
-/*        HELPER: Init Port Addresses        */
-/*********************************************/
-
-static void SiSUSBRegInit(struct SiS_Private *SiS_Pr, unsigned long BaseAddr)
-{
-	SiS_Pr->SiS_P3c4 = BaseAddr + 0x14;
-	SiS_Pr->SiS_P3d4 = BaseAddr + 0x24;
-	SiS_Pr->SiS_P3c0 = BaseAddr + 0x10;
-	SiS_Pr->SiS_P3ce = BaseAddr + 0x1e;
-	SiS_Pr->SiS_P3c2 = BaseAddr + 0x12;
-	SiS_Pr->SiS_P3ca = BaseAddr + 0x1a;
-	SiS_Pr->SiS_P3c6 = BaseAddr + 0x16;
-	SiS_Pr->SiS_P3c7 = BaseAddr + 0x17;
-	SiS_Pr->SiS_P3c8 = BaseAddr + 0x18;
-	SiS_Pr->SiS_P3c9 = BaseAddr + 0x19;
-	SiS_Pr->SiS_P3cb = BaseAddr + 0x1b;
-	SiS_Pr->SiS_P3cc = BaseAddr + 0x1c;
-	SiS_Pr->SiS_P3cd = BaseAddr + 0x1d;
-	SiS_Pr->SiS_P3da = BaseAddr + 0x2a;
-	SiS_Pr->SiS_Part1Port = BaseAddr + SIS_CRT2_PORT_04;
-}
-
-/*********************************************/
-/*             HELPER: GetSysFlags           */
-/*********************************************/
-
-static void SiS_GetSysFlags(struct SiS_Private *SiS_Pr)
-{
-	SiS_Pr->SiS_MyCR63 = 0x63;
-}
-
-/*********************************************/
-/*         HELPER: Init PCI & Engines        */
-/*********************************************/
-
-static void SiSInitPCIetc(struct SiS_Private *SiS_Pr)
-{
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x20, 0xa1);
-	/*  - Enable 2D (0x40)
-	 *  - Enable 3D (0x02)
-	 *  - Enable 3D vertex command fetch (0x10)
-	 *  - Enable 3D command parser (0x08)
-	 *  - Enable 3D G/L transformation engine (0x80)
-	 */
-	SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1E, 0xDA);
-}
-
-/*********************************************/
-/*        HELPER: SET SEGMENT REGISTERS      */
-/*********************************************/
-
-static void SiS_SetSegRegLower(struct SiS_Private *SiS_Pr, unsigned short value)
-{
-	unsigned short temp;
-
-	value &= 0x00ff;
-	temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb) & 0xf0;
-	temp |= (value >> 4);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb, temp);
-	temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd) & 0xf0;
-	temp |= (value & 0x0f);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd, temp);
-}
-
-static void SiS_SetSegRegUpper(struct SiS_Private *SiS_Pr, unsigned short value)
-{
-	unsigned short temp;
-
-	value &= 0x00ff;
-	temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb) & 0x0f;
-	temp |= (value & 0xf0);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cb, temp);
-	temp = SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd) & 0x0f;
-	temp |= (value << 4);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3cd, temp);
-}
-
-static void SiS_SetSegmentReg(struct SiS_Private *SiS_Pr, unsigned short value)
-{
-	SiS_SetSegRegLower(SiS_Pr, value);
-	SiS_SetSegRegUpper(SiS_Pr, value);
-}
-
-static void SiS_ResetSegmentReg(struct SiS_Private *SiS_Pr)
-{
-	SiS_SetSegmentReg(SiS_Pr, 0);
-}
-
-static void
-SiS_SetSegmentRegOver(struct SiS_Private *SiS_Pr, unsigned short value)
-{
-	unsigned short temp = value >> 8;
-
-	temp &= 0x07;
-	temp |= (temp << 4);
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1d, temp);
-	SiS_SetSegmentReg(SiS_Pr, value);
-}
-
-static void SiS_ResetSegmentRegOver(struct SiS_Private *SiS_Pr)
-{
-	SiS_SetSegmentRegOver(SiS_Pr, 0);
-}
-
-static void SiS_ResetSegmentRegisters(struct SiS_Private *SiS_Pr)
-{
-	SiS_ResetSegmentReg(SiS_Pr);
-	SiS_ResetSegmentRegOver(SiS_Pr);
-}
-
-/*********************************************/
-/*           HELPER: SearchModeID            */
-/*********************************************/
-
-static int
-SiS_SearchModeID(struct SiS_Private *SiS_Pr, unsigned short *ModeNo,
-		 unsigned short *ModeIdIndex)
-{
-	if ((*ModeNo) <= 0x13) {
-
-		if ((*ModeNo) != 0x03)
-			return 0;
-
-		(*ModeIdIndex) = 0;
-
-	} else {
-
-		for (*ModeIdIndex = 0;; (*ModeIdIndex)++) {
-
-			if (SiS_Pr->SiS_EModeIDTable[*ModeIdIndex].Ext_ModeID ==
-			    (*ModeNo))
-				break;
-
-			if (SiS_Pr->SiS_EModeIDTable[*ModeIdIndex].Ext_ModeID ==
-			    0xFF)
-				return 0;
-		}
-
-	}
-
-	return 1;
-}
-
-/*********************************************/
-/*            HELPER: ENABLE CRT1            */
-/*********************************************/
-
-static void SiS_HandleCRT1(struct SiS_Private *SiS_Pr)
-{
-	/* Enable CRT1 gating */
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, SiS_Pr->SiS_MyCR63, 0xbf);
-}
-
-/*********************************************/
-/*           HELPER: GetColorDepth           */
-/*********************************************/
-
-static unsigned short
-SiS_GetColorDepth(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		  unsigned short ModeIdIndex)
-{
-	static const unsigned short ColorDepth[6] = { 1, 2, 4, 4, 6, 8 };
-	unsigned short modeflag;
-	short index;
-
-	if (ModeNo <= 0x13) {
-		modeflag = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ModeFlag;
-	} else {
-		modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag;
-	}
-
-	index = (modeflag & ModeTypeMask) - ModeEGA;
-	if (index < 0)
-		index = 0;
-	return ColorDepth[index];
-}
-
-/*********************************************/
-/*             HELPER: GetOffset             */
-/*********************************************/
-
-static unsigned short
-SiS_GetOffset(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-	      unsigned short ModeIdIndex, unsigned short rrti)
-{
-	unsigned short xres, temp, colordepth, infoflag;
-
-	infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag;
-	xres = SiS_Pr->SiS_RefIndex[rrti].XRes;
-
-	colordepth = SiS_GetColorDepth(SiS_Pr, ModeNo, ModeIdIndex);
-
-	temp = xres / 16;
-
-	if (infoflag & InterlaceMode)
-		temp <<= 1;
-
-	temp *= colordepth;
-
-	if (xres % 16)
-		temp += (colordepth >> 1);
-
-	return temp;
-}
-
-/*********************************************/
-/*                   SEQ                     */
-/*********************************************/
-
-static void
-SiS_SetSeqRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex)
-{
-	unsigned char SRdata;
-	int i;
-
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x00, 0x03);
-
-	SRdata = SiS_Pr->SiS_StandTable[StandTableIndex].SR[0] | 0x20;
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, SRdata);
-
-	for (i = 2; i <= 4; i++) {
-		SRdata = SiS_Pr->SiS_StandTable[StandTableIndex].SR[i - 1];
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, i, SRdata);
-	}
-}
-
-/*********************************************/
-/*                  MISC                     */
-/*********************************************/
-
-static void
-SiS_SetMiscRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex)
-{
-	unsigned char Miscdata = SiS_Pr->SiS_StandTable[StandTableIndex].MISC;
-
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c2, Miscdata);
-}
-
-/*********************************************/
-/*                  CRTC                     */
-/*********************************************/
-
-static void
-SiS_SetCRTCRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex)
-{
-	unsigned char CRTCdata;
-	unsigned short i;
-
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, 0x11, 0x7f);
-
-	for (i = 0; i <= 0x18; i++) {
-		CRTCdata = SiS_Pr->SiS_StandTable[StandTableIndex].CRTC[i];
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, i, CRTCdata);
-	}
-}
-
-/*********************************************/
-/*                   ATT                     */
-/*********************************************/
-
-static void
-SiS_SetATTRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex)
-{
-	unsigned char ARdata;
-	unsigned short i;
-
-	for (i = 0; i <= 0x13; i++) {
-		ARdata = SiS_Pr->SiS_StandTable[StandTableIndex].ATTR[i];
-		SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da);
-		SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, i);
-		SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, ARdata);
-	}
-	SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x14);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x00);
-
-	SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c0, 0x20);
-	SiS_GetRegByte(SiS_Pr, SiS_Pr->SiS_P3da);
-}
-
-/*********************************************/
-/*                   GRC                     */
-/*********************************************/
-
-static void
-SiS_SetGRCRegs(struct SiS_Private *SiS_Pr, unsigned short StandTableIndex)
-{
-	unsigned char GRdata;
-	unsigned short i;
-
-	for (i = 0; i <= 0x08; i++) {
-		GRdata = SiS_Pr->SiS_StandTable[StandTableIndex].GRC[i];
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3ce, i, GRdata);
-	}
-
-	if (SiS_Pr->SiS_ModeType > ModeVGA) {
-		/* 256 color disable */
-		SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3ce, 0x05, 0xBF);
-	}
-}
-
-/*********************************************/
-/*          CLEAR EXTENDED REGISTERS         */
-/*********************************************/
-
-static void SiS_ClearExt1Regs(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
-{
-	int i;
-
-	for (i = 0x0A; i <= 0x0E; i++) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, i, 0x00);
-	}
-
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x37, 0xFE);
-}
-
-/*********************************************/
-/*              Get rate index               */
-/*********************************************/
-
-static unsigned short
-SiS_GetRatePtr(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-	       unsigned short ModeIdIndex)
-{
-	unsigned short rrti, i, index, temp;
-
-	if (ModeNo <= 0x13)
-		return 0xFFFF;
-
-	index = SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x33) & 0x0F;
-	if (index > 0)
-		index--;
-
-	rrti = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].REFindex;
-	ModeNo = SiS_Pr->SiS_RefIndex[rrti].ModeID;
-
-	i = 0;
-	do {
-		if (SiS_Pr->SiS_RefIndex[rrti + i].ModeID != ModeNo)
-			break;
-
-		temp =
-		    SiS_Pr->SiS_RefIndex[rrti + i].Ext_InfoFlag & ModeTypeMask;
-		if (temp < SiS_Pr->SiS_ModeType)
-			break;
-
-		i++;
-		index--;
-	} while (index != 0xFFFF);
-
-	i--;
-
-	return (rrti + i);
-}
-
-/*********************************************/
-/*                  SYNC                     */
-/*********************************************/
-
-static void SiS_SetCRT1Sync(struct SiS_Private *SiS_Pr, unsigned short rrti)
-{
-	unsigned short sync = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag >> 8;
-	sync &= 0xC0;
-	sync |= 0x2f;
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c2, sync);
-}
-
-/*********************************************/
-/*                  CRTC/2                   */
-/*********************************************/
-
-static void
-SiS_SetCRT1CRTC(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		unsigned short ModeIdIndex, unsigned short rrti)
-{
-	unsigned char index;
-	unsigned short temp, i, j, modeflag;
-
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3d4, 0x11, 0x7f);
-
-	modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag;
-
-	index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRT1CRTC;
-
-	for (i = 0, j = 0; i <= 7; i++, j++) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j,
-			   SiS_Pr->SiS_CRT1Table[index].CR[i]);
-	}
-	for (j = 0x10; i <= 10; i++, j++) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j,
-			   SiS_Pr->SiS_CRT1Table[index].CR[i]);
-	}
-	for (j = 0x15; i <= 12; i++, j++) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, j,
-			   SiS_Pr->SiS_CRT1Table[index].CR[i]);
-	}
-	for (j = 0x0A; i <= 15; i++, j++) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, j,
-			   SiS_Pr->SiS_CRT1Table[index].CR[i]);
-	}
-
-	temp = SiS_Pr->SiS_CRT1Table[index].CR[16] & 0xE0;
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0E, temp);
-
-	temp = ((SiS_Pr->SiS_CRT1Table[index].CR[16]) & 0x01) << 5;
-	if (modeflag & DoubleScanMode)
-		temp |= 0x80;
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3d4, 0x09, 0x5F, temp);
-
-	if (SiS_Pr->SiS_ModeType > ModeVGA)
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x14, 0x4F);
-}
-
-/*********************************************/
-/*               OFFSET & PITCH              */
-/*********************************************/
-/*  (partly overruled by SetPitch() in XF86) */
-/*********************************************/
-
-static void
-SiS_SetCRT1Offset(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		  unsigned short ModeIdIndex, unsigned short rrti)
-{
-	unsigned short du = SiS_GetOffset(SiS_Pr, ModeNo, ModeIdIndex, rrti);
-	unsigned short infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag;
-	unsigned short temp;
-
-	temp = (du >> 8) & 0x0f;
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0E, 0xF0, temp);
-
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x13, (du & 0xFF));
-
-	if (infoflag & InterlaceMode)
-		du >>= 1;
-
-	du <<= 5;
-	temp = (du >> 8) & 0xff;
-	if (du & 0xff)
-		temp++;
-	temp++;
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x10, temp);
-}
-
-/*********************************************/
-/*                  VCLK                     */
-/*********************************************/
-
-static void
-SiS_SetCRT1VCLK(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		unsigned short rrti)
-{
-	unsigned short index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRTVCLK;
-	unsigned short clka = SiS_Pr->SiS_VCLKData[index].SR2B;
-	unsigned short clkb = SiS_Pr->SiS_VCLKData[index].SR2C;
-
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x31, 0xCF);
-
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2B, clka);
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2C, clkb);
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x2D, 0x01);
-}
-
-/*********************************************/
-/*                  FIFO                     */
-/*********************************************/
-
-static void
-SiS_SetCRT1FIFO_310(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		    unsigned short mi)
-{
-	unsigned short modeflag = SiS_Pr->SiS_EModeIDTable[mi].Ext_ModeFlag;
-
-	/* disable auto-threshold */
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x3D, 0xFE);
-
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x08, 0xAE);
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x09, 0xF0);
-
-	if (ModeNo <= 0x13)
-		return;
-
-	if ((!(modeflag & DoubleScanMode)) || (!(modeflag & HalfDCLK))) {
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x08, 0x34);
-		SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x3D, 0x01);
-	}
-}
-
-/*********************************************/
-/*              MODE REGISTERS               */
-/*********************************************/
-
-static void
-SiS_SetVCLKState(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		 unsigned short rrti)
-{
-	unsigned short data = 0, VCLK = 0, index = 0;
-
-	if (ModeNo > 0x13) {
-		index = SiS_Pr->SiS_RefIndex[rrti].Ext_CRTVCLK;
-		VCLK = SiS_Pr->SiS_VCLKData[index].CLOCK;
-	}
-
-	if (VCLK >= 166)
-		data |= 0x0c;
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x32, 0xf3, data);
-
-	if (VCLK >= 166)
-		SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1f, 0xe7);
-
-	/* DAC speed */
-	data = 0x03;
-	if (VCLK >= 260)
-		data = 0x00;
-	else if (VCLK >= 160)
-		data = 0x01;
-	else if (VCLK >= 135)
-		data = 0x02;
-
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x07, 0xF8, data);
-}
-
-static void
-SiS_SetCRT1ModeRegs(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		    unsigned short ModeIdIndex, unsigned short rrti)
-{
-	unsigned short data, infoflag = 0, modeflag;
-
-	if (ModeNo <= 0x13)
-		modeflag = SiS_Pr->SiS_SModeIDTable[ModeIdIndex].St_ModeFlag;
-	else {
-		modeflag = SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag;
-		infoflag = SiS_Pr->SiS_RefIndex[rrti].Ext_InfoFlag;
-	}
-
-	/* Disable DPMS */
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x1F, 0x3F);
-
-	data = 0;
-	if (ModeNo > 0x13) {
-		if (SiS_Pr->SiS_ModeType > ModeEGA) {
-			data |= 0x02;
-			data |= ((SiS_Pr->SiS_ModeType - ModeVGA) << 2);
-		}
-		if (infoflag & InterlaceMode)
-			data |= 0x20;
-	}
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x06, 0xC0, data);
-
-	data = 0;
-	if (infoflag & InterlaceMode) {
-		/* data = (Hsync / 8) - ((Htotal / 8) / 2) + 3 */
-		unsigned short hrs =
-		    (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x04) |
-		     ((SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0b) & 0xc0) << 2))
-		    - 3;
-		unsigned short hto =
-		    (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x00) |
-		     ((SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0b) & 0x03) << 8))
-		    + 5;
-		data = hrs - (hto >> 1) + 3;
-	}
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x19, (data & 0xFF));
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3d4, 0x1a, 0xFC, (data >> 8));
-
-	if (modeflag & HalfDCLK)
-		SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x01, 0x08);
-
-	data = 0;
-	if (modeflag & LineCompareOff)
-		data = 0x08;
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0F, 0xB7, data);
-
-	if ((SiS_Pr->SiS_ModeType == ModeEGA) && (ModeNo > 0x13))
-		SiS_SetRegOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x0F, 0x40);
-
-	SiS_SetRegAND(SiS_Pr, SiS_Pr->SiS_P3c4, 0x31, 0xfb);
-
-	data = 0x60;
-	if (SiS_Pr->SiS_ModeType != ModeText) {
-		data ^= 0x60;
-		if (SiS_Pr->SiS_ModeType != ModeEGA)
-			data ^= 0xA0;
-	}
-	SiS_SetRegANDOR(SiS_Pr, SiS_Pr->SiS_P3c4, 0x21, 0x1F, data);
-
-	SiS_SetVCLKState(SiS_Pr, ModeNo, rrti);
-
-	if (SiS_GetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x31) & 0x40)
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x52, 0x2c);
-	else
-		SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x52, 0x6c);
-}
-
-/*********************************************/
-/*                 LOAD DAC                  */
-/*********************************************/
-
-static void
-SiS_WriteDAC(struct SiS_Private *SiS_Pr, unsigned long DACData,
-	     unsigned short shiftflag, unsigned short dl, unsigned short ah,
-	     unsigned short al, unsigned short dh)
-{
-	unsigned short d1, d2, d3;
-
-	switch (dl) {
-	case 0:
-		d1 = dh;
-		d2 = ah;
-		d3 = al;
-		break;
-	case 1:
-		d1 = ah;
-		d2 = al;
-		d3 = dh;
-		break;
-	default:
-		d1 = al;
-		d2 = dh;
-		d3 = ah;
-	}
-	SiS_SetRegByte(SiS_Pr, DACData, (d1 << shiftflag));
-	SiS_SetRegByte(SiS_Pr, DACData, (d2 << shiftflag));
-	SiS_SetRegByte(SiS_Pr, DACData, (d3 << shiftflag));
-}
-
-static void
-SiS_LoadDAC(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-	    unsigned short mi)
-{
-	unsigned short data, data2, time, i, j, k, m, n, o;
-	unsigned short si, di, bx, sf;
-	unsigned long DACAddr, DACData;
-	const unsigned char *table = NULL;
-
-	if (ModeNo < 0x13)
-		data = SiS_Pr->SiS_SModeIDTable[mi].St_ModeFlag;
-	else
-		data = SiS_Pr->SiS_EModeIDTable[mi].Ext_ModeFlag;
-
-	data &= DACInfoFlag;
-
-	j = time = 64;
-	if (data == 0x00)
-		table = SiS_MDA_DAC;
-	else if (data == 0x08)
-		table = SiS_CGA_DAC;
-	else if (data == 0x10)
-		table = SiS_EGA_DAC;
-	else {
-		j = 16;
-		time = 256;
-		table = SiS_VGA_DAC;
-	}
-
-	DACAddr = SiS_Pr->SiS_P3c8;
-	DACData = SiS_Pr->SiS_P3c9;
-	sf = 0;
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c6, 0xFF);
-
-	SiS_SetRegByte(SiS_Pr, DACAddr, 0x00);
-
-	for (i = 0; i < j; i++) {
-		data = table[i];
-		for (k = 0; k < 3; k++) {
-			data2 = 0;
-			if (data & 0x01)
-				data2 += 0x2A;
-			if (data & 0x02)
-				data2 += 0x15;
-			SiS_SetRegByte(SiS_Pr, DACData, (data2 << sf));
-			data >>= 2;
-		}
-	}
-
-	if (time == 256) {
-		for (i = 16; i < 32; i++) {
-			data = table[i] << sf;
-			for (k = 0; k < 3; k++)
-				SiS_SetRegByte(SiS_Pr, DACData, data);
-		}
-		si = 32;
-		for (m = 0; m < 9; m++) {
-			di = si;
-			bx = si + 4;
-			for (n = 0; n < 3; n++) {
-				for (o = 0; o < 5; o++) {
-					SiS_WriteDAC(SiS_Pr, DACData, sf, n,
-						     table[di], table[bx],
-						     table[si]);
-					si++;
-				}
-				si -= 2;
-				for (o = 0; o < 3; o++) {
-					SiS_WriteDAC(SiS_Pr, DACData, sf, n,
-						     table[di], table[si],
-						     table[bx]);
-					si--;
-				}
-			}
-			si += 5;
-		}
-	}
-}
-
-/*********************************************/
-/*         SET CRT1 REGISTER GROUP           */
-/*********************************************/
-
-static void
-SiS_SetCRT1Group(struct SiS_Private *SiS_Pr, unsigned short ModeNo,
-		 unsigned short ModeIdIndex)
-{
-	unsigned short StandTableIndex, rrti;
-
-	SiS_Pr->SiS_CRT1Mode = ModeNo;
-
-	if (ModeNo <= 0x13)
-		StandTableIndex = 0;
-	else
-		StandTableIndex = 1;
-
-	SiS_ResetSegmentRegisters(SiS_Pr);
-	SiS_SetSeqRegs(SiS_Pr, StandTableIndex);
-	SiS_SetMiscRegs(SiS_Pr, StandTableIndex);
-	SiS_SetCRTCRegs(SiS_Pr, StandTableIndex);
-	SiS_SetATTRegs(SiS_Pr, StandTableIndex);
-	SiS_SetGRCRegs(SiS_Pr, StandTableIndex);
-	SiS_ClearExt1Regs(SiS_Pr, ModeNo);
-
-	rrti = SiS_GetRatePtr(SiS_Pr, ModeNo, ModeIdIndex);
-
-	if (rrti != 0xFFFF) {
-		SiS_SetCRT1Sync(SiS_Pr, rrti);
-		SiS_SetCRT1CRTC(SiS_Pr, ModeNo, ModeIdIndex, rrti);
-		SiS_SetCRT1Offset(SiS_Pr, ModeNo, ModeIdIndex, rrti);
-		SiS_SetCRT1VCLK(SiS_Pr, ModeNo, rrti);
-	}
-
-	SiS_SetCRT1FIFO_310(SiS_Pr, ModeNo, ModeIdIndex);
-
-	SiS_SetCRT1ModeRegs(SiS_Pr, ModeNo, ModeIdIndex, rrti);
-
-	SiS_LoadDAC(SiS_Pr, ModeNo, ModeIdIndex);
-
-	SiS_DisplayOn(SiS_Pr);
-}
-
-/*********************************************/
-/*                 SiSSetMode()              */
-/*********************************************/
-
-int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo)
-{
-	unsigned short ModeIdIndex;
-	unsigned long BaseAddr = SiS_Pr->IOAddress;
-
-	SiSUSB_InitPtr(SiS_Pr);
-	SiSUSBRegInit(SiS_Pr, BaseAddr);
-	SiS_GetSysFlags(SiS_Pr);
-
-	if (!(SiS_SearchModeID(SiS_Pr, &ModeNo, &ModeIdIndex)))
-		return 0;
-
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3c4, 0x05, 0x86);
-
-	SiSInitPCIetc(SiS_Pr);
-
-	ModeNo &= 0x7f;
-
-	SiS_Pr->SiS_ModeType =
-	    SiS_Pr->SiS_EModeIDTable[ModeIdIndex].Ext_ModeFlag & ModeTypeMask;
-
-	SiS_Pr->SiS_SetFlag = LowModeTests;
-
-	/* Set mode on CRT1 */
-	SiS_SetCRT1Group(SiS_Pr, ModeNo, ModeIdIndex);
-
-	SiS_HandleCRT1(SiS_Pr);
-
-	SiS_DisplayOn(SiS_Pr);
-	SiS_SetRegByte(SiS_Pr, SiS_Pr->SiS_P3c6, 0xFF);
-
-	/* Store mode number */
-	SiS_SetReg(SiS_Pr, SiS_Pr->SiS_P3d4, 0x34, ModeNo);
-
-	return 1;
-}
-
-int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo)
-{
-	unsigned short ModeNo = 0;
-	int i;
-
-	SiSUSB_InitPtr(SiS_Pr);
-
-	if (VModeNo == 0x03) {
-
-		ModeNo = 0x03;
-
-	} else {
-
-		i = 0;
-		do {
-
-			if (SiS_Pr->SiS_EModeIDTable[i].Ext_VESAID == VModeNo) {
-				ModeNo = SiS_Pr->SiS_EModeIDTable[i].Ext_ModeID;
-				break;
-			}
-
-		} while (SiS_Pr->SiS_EModeIDTable[i++].Ext_ModeID != 0xff);
-
-	}
-
-	if (!ModeNo)
-		return 0;
-
-	return SiSUSBSetMode(SiS_Pr, ModeNo);
-}
diff --git a/drivers/usb/misc/sisusbvga/sisusb_init.h b/drivers/usb/misc/sisusbvga/sisusb_init.h
deleted file mode 100644
index b5cd77ae941d7..0000000000000
--- a/drivers/usb/misc/sisusbvga/sisusb_init.h
+++ /dev/null
@@ -1,180 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
-/* $XFree86$ */
-/* $XdotOrg$ */
-/*
- * Data and prototypes for init.c
- *
- * Copyright (C) 2001-2005 by Thomas Winischhofer, Vienna, Austria
- *
- * If distributed as part of the Linux kernel, the following license terms
- * apply:
- *
- * * This program is free software; you can redistribute it and/or modify
- * * it under the terms of the GNU General Public License as published by
- * * the Free Software Foundation; either version 2 of the named License,
- * * or any later version.
- * *
- * * This program is distributed in the hope that it will be useful,
- * * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * * GNU General Public License for more details.
- * *
- * * You should have received a copy of the GNU General Public License
- * * along with this program; if not, write to the Free Software
- * * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
- *
- * Otherwise, the following license terms apply:
- *
- * * Redistribution and use in source and binary forms, with or without
- * * modification, are permitted provided that the following conditions
- * * are met:
- * * 1) Redistributions of source code must retain the above copyright
- * *    notice, this list of conditions and the following disclaimer.
- * * 2) Redistributions in binary form must reproduce the above copyright
- * *    notice, this list of conditions and the following disclaimer in the
- * *    documentation and/or other materials provided with the distribution.
- * * 3) The name of the author may not be used to endorse or promote products
- * *    derived from this software without specific prior written permission.
- * *
- * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
- * * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
- * * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
- * * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * Author:	Thomas Winischhofer <thomas@winischhofer.net>
- *
- */
-
-#ifndef _SISUSB_INIT_H_
-#define _SISUSB_INIT_H_
-
-/* SiS_ModeType */
-#define ModeText		0x00
-#define ModeCGA			0x01
-#define ModeEGA			0x02
-#define ModeVGA			0x03
-#define Mode15Bpp		0x04
-#define Mode16Bpp		0x05
-#define Mode24Bpp		0x06
-#define Mode32Bpp		0x07
-
-#define ModeTypeMask		0x07
-#define IsTextMode		0x07
-
-#define DACInfoFlag		0x0018
-#define MemoryInfoFlag		0x01E0
-#define MemorySizeShift		5
-
-/* modeflag */
-#define Charx8Dot		0x0200
-#define LineCompareOff		0x0400
-#define CRT2Mode		0x0800
-#define HalfDCLK		0x1000
-#define NoSupportSimuTV		0x2000
-#define NoSupportLCDScale	0x4000	/* SiS bridge: No scaling possible (no matter what panel) */
-#define DoubleScanMode		0x8000
-
-/* Infoflag */
-#define SupportTV		0x0008
-#define SupportTV1024		0x0800
-#define SupportCHTV		0x0800
-#define Support64048060Hz	0x0800	/* Special for 640x480 LCD */
-#define SupportHiVision		0x0010
-#define SupportYPbPr750p	0x1000
-#define SupportLCD		0x0020
-#define SupportRAMDAC2		0x0040	/* All           (<= 100Mhz) */
-#define SupportRAMDAC2_135	0x0100	/* All except DH (<= 135Mhz) */
-#define SupportRAMDAC2_162	0x0200	/* B, C          (<= 162Mhz) */
-#define SupportRAMDAC2_202	0x0400	/* C             (<= 202Mhz) */
-#define InterlaceMode		0x0080
-#define SyncPP			0x0000
-#define SyncPN			0x4000
-#define SyncNP			0x8000
-#define SyncNN			0xc000
-
-/* SetFlag */
-#define ProgrammingCRT2		0x0001
-#define LowModeTests		0x0002
-#define LCDVESATiming		0x0008
-#define EnableLVDSDDA		0x0010
-#define SetDispDevSwitchFlag	0x0020
-#define CheckWinDos		0x0040
-#define SetDOSMode		0x0080
-
-/* Index in ModeResInfo table */
-#define SIS_RI_320x200		0
-#define SIS_RI_320x240		1
-#define SIS_RI_320x400		2
-#define SIS_RI_400x300		3
-#define SIS_RI_512x384		4
-#define SIS_RI_640x400		5
-#define SIS_RI_640x480		6
-#define SIS_RI_800x600		7
-#define SIS_RI_1024x768		8
-#define SIS_RI_1280x1024	9
-#define SIS_RI_1600x1200	10
-#define SIS_RI_1920x1440	11
-#define SIS_RI_2048x1536	12
-#define SIS_RI_720x480		13
-#define SIS_RI_720x576		14
-#define SIS_RI_1280x960		15
-#define SIS_RI_800x480		16
-#define SIS_RI_1024x576		17
-#define SIS_RI_1280x720		18
-#define SIS_RI_856x480		19
-#define SIS_RI_1280x768		20
-#define SIS_RI_1400x1050	21
-#define SIS_RI_1152x864		22	/* Up to here SiS conforming */
-#define SIS_RI_848x480		23
-#define SIS_RI_1360x768		24
-#define SIS_RI_1024x600		25
-#define SIS_RI_1152x768		26
-#define SIS_RI_768x576		27
-#define SIS_RI_1360x1024	28
-#define SIS_RI_1680x1050	29
-#define SIS_RI_1280x800		30
-#define SIS_RI_1920x1080	31
-#define SIS_RI_960x540		32
-#define SIS_RI_960x600		33
-
-#define SIS_VIDEO_CAPTURE	0x00 - 0x30
-#define SIS_VIDEO_PLAYBACK	0x02 - 0x30
-#define SIS_CRT2_PORT_04	0x04 - 0x30
-
-int SiSUSBSetMode(struct SiS_Private *SiS_Pr, unsigned short ModeNo);
-int SiSUSBSetVESAMode(struct SiS_Private *SiS_Pr, unsigned short VModeNo);
-
-extern int sisusb_setreg(struct sisusb_usb_data *sisusb, u32 port, u8 data);
-extern int sisusb_getreg(struct sisusb_usb_data *sisusb, u32 port, u8 * data);
-extern int sisusb_setidxreg(struct sisusb_usb_data *sisusb, u32 port,
-			    u8 index, u8 data);
-extern int sisusb_getidxreg(struct sisusb_usb_data *sisusb, u32 port,
-			    u8 index, u8 * data);
-extern int sisusb_setidxregandor(struct sisusb_usb_data *sisusb, u32 port,
-				 u8 idx, u8 myand, u8 myor);
-extern int sisusb_setidxregor(struct sisusb_usb_data *sisusb, u32 port,
-			      u8 index, u8 myor);
-extern int sisusb_setidxregand(struct sisusb_usb_data *sisusb, u32 port,
-			       u8 idx, u8 myand);
-
-void sisusb_delete(struct kref *kref);
-int sisusb_writeb(struct sisusb_usb_data *sisusb, u32 adr, u8 data);
-int sisusb_readb(struct sisusb_usb_data *sisusb, u32 adr, u8 * data);
-int sisusb_copy_memory(struct sisusb_usb_data *sisusb, u8 *src,
-		       u32 dest, int length);
-int sisusb_reset_text_mode(struct sisusb_usb_data *sisusb, int init);
-int sisusbcon_do_font_op(struct sisusb_usb_data *sisusb, int set, int slot,
-			 u8 * arg, int cmapsz, int ch512, int dorecalc,
-			 struct vc_data *c, int fh, int uplock);
-void sisusb_set_cursor(struct sisusb_usb_data *sisusb, unsigned int location);
-int sisusb_console_init(struct sisusb_usb_data *sisusb, int first, int last);
-void sisusb_console_exit(struct sisusb_usb_data *sisusb);
-void sisusb_init_concode(void);
-
-#endif
-- 
GitLab


From a2f3d83cd74eb7cfc69c92d086ec4509cd9c58fb Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 8 Dec 2022 10:07:47 +0100
Subject: [PATCH 3501/4122] USB: sisusbvga: rename sisusb.c to sisusbvga.c

As it's the only source for the sisusbvga module, there is no need for a
2-steps build.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-sh@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221208090749.28056-2-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/sisusbvga/Makefile                  | 2 --
 drivers/usb/misc/sisusbvga/{sisusb.c => sisusbvga.c} | 0
 2 files changed, 2 deletions(-)
 rename drivers/usb/misc/sisusbvga/{sisusb.c => sisusbvga.c} (100%)

diff --git a/drivers/usb/misc/sisusbvga/Makefile b/drivers/usb/misc/sisusbvga/Makefile
index 93265de80eb9f..28aa1e6ef823f 100644
--- a/drivers/usb/misc/sisusbvga/Makefile
+++ b/drivers/usb/misc/sisusbvga/Makefile
@@ -4,5 +4,3 @@
 #
 
 obj-$(CONFIG_USB_SISUSBVGA) += sisusbvga.o
-
-sisusbvga-y := sisusb.o
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusbvga.c
similarity index 100%
rename from drivers/usb/misc/sisusbvga/sisusb.c
rename to drivers/usb/misc/sisusbvga/sisusbvga.c
-- 
GitLab


From 4b6be020bd6b126112c06648de17ead360919ab4 Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Thu, 8 Dec 2022 10:07:48 +0100
Subject: [PATCH 3502/4122] USB: sisusbvga: use module_usb_driver()

Now, that we only do usb_register() and usb_sisusb_exit() in
module_init() and module_exit() respectivelly, we can simply use
module_usb_driver().

Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Christophe Leroy <christophe.leroy@csgroup.eu>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: Rich Felker <dalias@libc.org>
Cc: Thomas Winischhofer <thomas@winischhofer.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-sh@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20221208090749.28056-3-jirislaby@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/sisusbvga/sisusbvga.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/usb/misc/sisusbvga/sisusbvga.c b/drivers/usb/misc/sisusbvga/sisusbvga.c
index a0d5ba8058f8f..654a79fd3231e 100644
--- a/drivers/usb/misc/sisusbvga/sisusbvga.c
+++ b/drivers/usb/misc/sisusbvga/sisusbvga.c
@@ -2947,18 +2947,7 @@ static struct usb_driver sisusb_driver = {
 	.id_table =	sisusb_table,
 };
 
-static int __init usb_sisusb_init(void)
-{
-	return usb_register(&sisusb_driver);
-}
-
-static void __exit usb_sisusb_exit(void)
-{
-	usb_deregister(&sisusb_driver);
-}
-
-module_init(usb_sisusb_init);
-module_exit(usb_sisusb_exit);
+module_usb_driver(sisusb_driver);
 
 MODULE_AUTHOR("Thomas Winischhofer <thomas@winischhofer.net>");
 MODULE_DESCRIPTION("sisusbvga - Driver for Net2280/SiS315-based USB2VGA dongles");
-- 
GitLab


From c35ca10f53c51eeb610d3f8fbc6dd6d511b58a58 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Thu, 8 Dec 2022 19:00:58 +0800
Subject: [PATCH 3503/4122] usb: storage: Add check for kcalloc

As kcalloc may return NULL pointer, the return value should
be checked and return error if fails as same as the ones in
alauda_read_map.

Fixes: e80b0fade09e ("[PATCH] USB Storage: add alauda support")
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Link: https://lore.kernel.org/r/20221208110058.12983-1-jiasheng@iscas.ac.cn
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/storage/alauda.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
index 747be69e5e699..5e912dd29b4c9 100644
--- a/drivers/usb/storage/alauda.c
+++ b/drivers/usb/storage/alauda.c
@@ -438,6 +438,8 @@ static int alauda_init_media(struct us_data *us)
 		+ MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift);
 	MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
 	MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
+	if (MEDIA_INFO(us).pba_to_lba == NULL || MEDIA_INFO(us).lba_to_pba == NULL)
+		return USB_STOR_TRANSPORT_ERROR;
 
 	if (alauda_reset_media(us) != USB_STOR_XFER_GOOD)
 		return USB_STOR_TRANSPORT_ERROR;
-- 
GitLab


From dc18a4c7b3bd447cef2395deeb1f6ac16dfaca0e Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 3 Dec 2022 15:10:27 +0800
Subject: [PATCH 3504/4122] usb: typec: wusb3801: fix fwnode refcount leak in
 wusb3801_probe()

I got the following report while doing fault injection test:

  OF: ERROR: memory leak, expected refcount 1 instead of 4,
  of_node_get()/of_node_put() unbalanced - destroy cset entry:
  attach overlay node /i2c/tcpc@60/connector

If wusb3801_hw_init() fails, fwnode_handle_put() needs be called to
avoid refcount leak.

Fixes: d016cbe4d7ac ("usb: typec: Support the WUSB3801 port controller")
Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Link: https://lore.kernel.org/r/20221203071027.3808308-1-yangyingliang@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/wusb3801.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/typec/wusb3801.c b/drivers/usb/typec/wusb3801.c
index 3cc7a15ecbd31..a43a18d4b02ed 100644
--- a/drivers/usb/typec/wusb3801.c
+++ b/drivers/usb/typec/wusb3801.c
@@ -364,7 +364,7 @@ static int wusb3801_probe(struct i2c_client *client)
 	/* Initialize the hardware with the devicetree settings. */
 	ret = wusb3801_hw_init(wusb3801);
 	if (ret)
-		return ret;
+		goto err_put_connector;
 
 	wusb3801->cap.revision		= USB_TYPEC_REV_1_2;
 	wusb3801->cap.accessory[0]	= TYPEC_ACCESSORY_AUDIO;
-- 
GitLab


From 97a48da1619ba6bd42a0e5da0a03aa490a9496b1 Mon Sep 17 00:00:00 2001
From: Miaoqian Lin <linmq006@gmail.com>
Date: Tue, 6 Dec 2022 12:17:31 +0400
Subject: [PATCH 3505/4122] usb: dwc3: qcom: Fix memory leak in
 dwc3_qcom_interconnect_init

of_icc_get() alloc resources for path handle, we should release it when not
need anymore. Like the release in dwc3_qcom_interconnect_exit() function.
Add icc_put() in error handling to fix this.

Fixes: bea46b981515 ("usb: dwc3: qcom: Add interconnect support in dwc3 driver")
Cc: stable <stable@kernel.org>
Acked-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Signed-off-by: Miaoqian Lin <linmq006@gmail.com>
Link: https://lore.kernel.org/r/20221206081731.818107-1-linmq006@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/dwc3-qcom.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc3/dwc3-qcom.c b/drivers/usb/dwc3/dwc3-qcom.c
index 7c40f3ffc0544..b0a0351d2d8b5 100644
--- a/drivers/usb/dwc3/dwc3-qcom.c
+++ b/drivers/usb/dwc3/dwc3-qcom.c
@@ -261,7 +261,8 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
 	if (IS_ERR(qcom->icc_path_apps)) {
 		dev_err(dev, "failed to get apps-usb path: %ld\n",
 				PTR_ERR(qcom->icc_path_apps));
-		return PTR_ERR(qcom->icc_path_apps);
+		ret = PTR_ERR(qcom->icc_path_apps);
+		goto put_path_ddr;
 	}
 
 	max_speed = usb_get_maximum_speed(&qcom->dwc3->dev);
@@ -274,16 +275,22 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
 	}
 	if (ret) {
 		dev_err(dev, "failed to set bandwidth for usb-ddr path: %d\n", ret);
-		return ret;
+		goto put_path_apps;
 	}
 
 	ret = icc_set_bw(qcom->icc_path_apps, APPS_USB_AVG_BW, APPS_USB_PEAK_BW);
 	if (ret) {
 		dev_err(dev, "failed to set bandwidth for apps-usb path: %d\n", ret);
-		return ret;
+		goto put_path_apps;
 	}
 
 	return 0;
+
+put_path_apps:
+	icc_put(qcom->icc_path_apps);
+put_path_ddr:
+	icc_put(qcom->icc_path_ddr);
+	return ret;
 }
 
 /**
-- 
GitLab


From 4c92670b16727365699fe4b19ed32013bab2c107 Mon Sep 17 00:00:00 2001
From: Szymon Heidrich <szymon.heidrich@gmail.com>
Date: Tue, 6 Dec 2022 15:13:01 +0100
Subject: [PATCH 3506/4122] usb: gadget: uvc: Prevent buffer overflow in setup
 handler

Setup function uvc_function_setup permits control transfer
requests with up to 64 bytes of payload (UVC_MAX_REQUEST_SIZE),
data stage handler for OUT transfer uses memcpy to copy req->actual
bytes to uvc_event->data.data array of size 60. This may result
in an overflow of 4 bytes.

Fixes: cdda479f15cd ("USB gadget: video class function driver")
Cc: stable <stable@kernel.org>
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Daniel Scally <dan.scally@ideasonboard.com>
Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
Link: https://lore.kernel.org/r/20221206141301.51305-1-szymon.heidrich@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/function/f_uvc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/function/f_uvc.c b/drivers/usb/gadget/function/f_uvc.c
index 6e131624011a5..32f2c16454670 100644
--- a/drivers/usb/gadget/function/f_uvc.c
+++ b/drivers/usb/gadget/function/f_uvc.c
@@ -213,8 +213,9 @@ uvc_function_ep0_complete(struct usb_ep *ep, struct usb_request *req)
 
 		memset(&v4l2_event, 0, sizeof(v4l2_event));
 		v4l2_event.type = UVC_EVENT_DATA;
-		uvc_event->data.length = req->actual;
-		memcpy(&uvc_event->data.data, req->buf, req->actual);
+		uvc_event->data.length = min_t(unsigned int, req->actual,
+			sizeof(uvc_event->data.data));
+		memcpy(&uvc_event->data.data, req->buf, uvc_event->data.length);
 		v4l2_event_queue(&uvc->vdev, &v4l2_event);
 	}
 }
-- 
GitLab


From ecec4b20d29c3d6922dafe7d2555254a454272d2 Mon Sep 17 00:00:00 2001
From: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Date: Fri, 25 Nov 2022 20:21:15 +0200
Subject: [PATCH 3507/4122] usb: musb: remove extra check in
 musb_gadget_vbus_draw

The checks for musb->xceiv and musb->xceiv->set_power duplicate those in
usb_phy_set_power(), so there is no need of them. Moreover, not calling
usb_phy_set_power() results in usb_phy_set_charger_current() not being
called, so current USB config max current is not propagated through USB
charger framework and charger drivers may try to draw more current than
allowed or possible.

Fix that by removing those extra checks and calling usb_phy_set_power()
directly.

Tested on Motorola Droid4 and Nokia N900

Fixes: a9081a008f84 ("usb: phy: Add USB charger support")
Cc: stable <stable@kernel.org>
Signed-off-by: Ivaylo Dimitrov <ivo.g.dimitrov.75@gmail.com>
Link: https://lore.kernel.org/r/1669400475-4762-1-git-send-email-ivo.g.dimitrov.75@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/musb/musb_gadget.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 6cb9514ef3405..31c44325e8287 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1630,8 +1630,6 @@ static int musb_gadget_vbus_draw(struct usb_gadget *gadget, unsigned mA)
 {
 	struct musb	*musb = gadget_to_musb(gadget);
 
-	if (!musb->xceiv || !musb->xceiv->set_power)
-		return -EOPNOTSUPP;
 	return usb_phy_set_power(musb->xceiv, mA);
 }
 
-- 
GitLab


From 6f1f0ad910f73f5533b65e1748448d334e0ec697 Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Fri, 25 Nov 2022 17:04:44 +0100
Subject: [PATCH 3508/4122] usb: gadget: udc: drop obsolete dependencies on
 COMPILE_TEST

Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it
is possible to test-build any driver which depends on OF on any
architecture by explicitly selecting OF. Therefore depending on
COMPILE_TEST as an alternative is no longer needed.

It is actually better to always build such drivers with OF enabled,
so that the test builds are closer to how each driver will actually be
built on its intended target. Building them without OF may not test
much as the compiler will optimize out potentially large parts of the
code. In the worst case, this could even pop false positive warnings.
Dropping COMPILE_TEST here improves the quality of our testing and
avoids wasting time on non-existent issues.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Nicolas Ferre <nicolas.ferre@microchip.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Claudiu Beznea <claudiu.beznea@microchip.com>
Cc: Michal Simek <michal.simek@xilinx.com>
Acked-by: Nicolas Ferre <nicolas.ferre@microchip.com>
Signed-off-by: Jean Delvare <jdelvare@suse.de>
Link: https://lore.kernel.org/r/20221125170444.36620123@endymion.delvare
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/usb/gadget/udc/Kconfig b/drivers/usb/gadget/udc/Kconfig
index 16243964b1cd1..b3006d8b04ab3 100644
--- a/drivers/usb/gadget/udc/Kconfig
+++ b/drivers/usb/gadget/udc/Kconfig
@@ -33,7 +33,7 @@ menu "USB Peripheral Controller"
 config USB_AT91
 	tristate "Atmel AT91 USB Device Port"
 	depends on ARCH_AT91
-	depends on OF || COMPILE_TEST
+	depends on OF
 	help
 	   Many Atmel AT91 processors (such as the AT91RM2000) have a
 	   full speed USB Device Port with support for five configurable
@@ -419,7 +419,7 @@ config USB_EG20T
 config USB_GADGET_XILINX
 	tristate "Xilinx USB Driver"
 	depends on HAS_DMA
-	depends on OF || COMPILE_TEST
+	depends on OF
 	help
 	  USB peripheral controller driver for Xilinx USB2 device.
 	  Xilinx USB2 device is a soft IP which supports both full
-- 
GitLab


From 59d54aa09020fe52061d4cda51d474f5bd5e6be1 Mon Sep 17 00:00:00 2001
From: Sven Peter <sven@svenpeter.dev>
Date: Mon, 28 Nov 2022 17:23:04 +0100
Subject: [PATCH 3509/4122] usb: typec: tipd: Set mode of operation for USB
 Type-C connector

Forward the mode of operation to the typec subsystem such that it can
configure the mux correctly.

Reviewed-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Sven Peter <sven@svenpeter.dev>
Link: https://lore.kernel.org/r/20221128162304.80125-1-sven@svenpeter.dev
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/typec/tipd/core.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/usb/typec/tipd/core.c b/drivers/usb/typec/tipd/core.c
index 982bd2cad931d..46a4d8b128f0e 100644
--- a/drivers/usb/typec/tipd/core.c
+++ b/drivers/usb/typec/tipd/core.c
@@ -14,6 +14,7 @@
 #include <linux/regmap.h>
 #include <linux/interrupt.h>
 #include <linux/usb/typec.h>
+#include <linux/usb/typec_altmode.h>
 #include <linux/usb/role.h>
 
 #include "tps6598x.h"
@@ -257,6 +258,7 @@ static int tps6598x_connect(struct tps6598x *tps, u32 status)
 		typec_set_orientation(tps->port, TYPEC_ORIENTATION_REVERSE);
 	else
 		typec_set_orientation(tps->port, TYPEC_ORIENTATION_NORMAL);
+	typec_set_mode(tps->port, TYPEC_STATE_USB);
 	tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), true);
 
 	tps->partner = typec_register_partner(tps->port, &desc);
@@ -280,6 +282,7 @@ static void tps6598x_disconnect(struct tps6598x *tps, u32 status)
 	typec_set_pwr_role(tps->port, TPS_STATUS_TO_TYPEC_PORTROLE(status));
 	typec_set_vconn_role(tps->port, TPS_STATUS_TO_TYPEC_VCONN(status));
 	typec_set_orientation(tps->port, TYPEC_ORIENTATION_NONE);
+	typec_set_mode(tps->port, TYPEC_STATE_SAFE);
 	tps6598x_set_data_role(tps, TPS_STATUS_TO_TYPEC_DATAROLE(status), false);
 
 	power_supply_changed(tps->psy);
-- 
GitLab


From 0cd142b4665ee3133cd80539b5a430be5fd326c6 Mon Sep 17 00:00:00 2001
From: Yi Yang <yiyang13@huawei.com>
Date: Fri, 2 Dec 2022 09:21:26 +0800
Subject: [PATCH 3510/4122] usb: fotg210-udc: fix potential memory leak in
 fotg210_udc_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In fotg210_udc_probe(), if devm_clk_get() or clk_prepare_enable()
fails, 'fotg210' will not be freed, which will lead to a memory leak.
Fix it by moving kfree() to a proper location.

In addition，we can use "return -ENOMEM" instead of "goto err"
to simplify the code.

Fixes: 718a38d092ec ("fotg210-udc: Handle PCLK")
Reviewed-by: Andrzej Pietrasiewicz <andrzej.p@collabora.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Link: https://lore.kernel.org/r/20221202012126.246953-1-yiyang13@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/fotg210/fotg210-udc.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/usb/fotg210/fotg210-udc.c b/drivers/usb/fotg210/fotg210-udc.c
index b9ea6c6d931c6..66e1b7ee3346e 100644
--- a/drivers/usb/fotg210/fotg210-udc.c
+++ b/drivers/usb/fotg210/fotg210-udc.c
@@ -1163,12 +1163,10 @@ int fotg210_udc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	ret = -ENOMEM;
-
 	/* initialize udc */
 	fotg210 = kzalloc(sizeof(struct fotg210_udc), GFP_KERNEL);
 	if (fotg210 == NULL)
-		goto err;
+		return -ENOMEM;
 
 	fotg210->dev = dev;
 
@@ -1178,7 +1176,7 @@ int fotg210_udc_probe(struct platform_device *pdev)
 		ret = clk_prepare_enable(fotg210->pclk);
 		if (ret) {
 			dev_err(dev, "failed to enable PCLK\n");
-			return ret;
+			goto err;
 		}
 	} else if (PTR_ERR(fotg210->pclk) == -EPROBE_DEFER) {
 		/*
@@ -1302,8 +1300,7 @@ err_pclk:
 	if (!IS_ERR(fotg210->pclk))
 		clk_disable_unprepare(fotg210->pclk);
 
-	kfree(fotg210);
-
 err:
+	kfree(fotg210);
 	return ret;
 }
-- 
GitLab


From 38cea8e31e9ef143187135d714aed4d7bd18463c Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <uwu@icenowy.me>
Date: Tue, 6 Dec 2022 13:52:23 +0800
Subject: [PATCH 3511/4122] dt-bindings: vendor-prefixes: add Genesys Logic

Genesys Logic, Inc. is a manufacturer for interface chips, especially
USB hubs.

https://www.genesyslogic.com.tw/

Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221206055228.306074-2-uwu@icenowy.me
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/vendor-prefixes.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 6e323a380294a..43359c0ccaf53 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -488,6 +488,8 @@ patternProperties:
     description: GE Fanuc Intelligent Platforms Embedded Systems, Inc.
   "^gemei,.*":
     description: Gemei Digital Technology Co., Ltd.
+  "^genesys,.*":
+    description: Genesys Logic, Inc.
   "^geniatech,.*":
     description: Geniatech, Inc.
   "^giantec,.*":
-- 
GitLab


From 4e3a4fcd871274c0233ea498c685b118a21ff3d0 Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <uwu@icenowy.me>
Date: Tue, 6 Dec 2022 13:52:24 +0800
Subject: [PATCH 3512/4122] dt-bindings: usb: Add binding for Genesys Logic
 GL850G hub controller

The Genesys Logic GL850G is a USB 2.0 Single TT hub controller that
features 4 downstream ports, an internal 5V-to-3.3V LDO regulator (can
be bypassed) and an external reset pin.

Add a device tree binding for its USB protocol part. The internal LDO is
not covered by this and can just be modelled as a fixed regulator.

Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221206055228.306074-3-uwu@icenowy.me
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 .../bindings/usb/genesys,gl850g.yaml          | 48 +++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/usb/genesys,gl850g.yaml

diff --git a/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml
new file mode 100644
index 0000000000000..a9f831448ccae
--- /dev/null
+++ b/Documentation/devicetree/bindings/usb/genesys,gl850g.yaml
@@ -0,0 +1,48 @@
+# SPDX-License-Identifier: GPL-2.0-only or BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/usb/genesys,gl850g.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Genesys Logic GL850G USB 2.0 hub controller
+
+maintainers:
+  - Icenowy Zheng <uwu@icenowy.me>
+
+allOf:
+  - $ref: usb-device.yaml#
+
+properties:
+  compatible:
+    enum:
+      - usb5e3,608
+
+  reg: true
+
+  reset-gpios:
+    description: GPIO controlling the RESET# pin.
+
+  vdd-supply:
+    description:
+      the regulator that provides 3.3V core power to the hub.
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    usb {
+        dr_mode = "host";
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        hub: hub@1 {
+            compatible = "usb5e3,608";
+            reg = <1>;
+            reset-gpios = <&pio 7 2 GPIO_ACTIVE_LOW>;
+        };
+    };
-- 
GitLab


From 9bae996ffa28ac03b6d95382a2a082eb219e745a Mon Sep 17 00:00:00 2001
From: Icenowy Zheng <uwu@icenowy.me>
Date: Tue, 6 Dec 2022 13:52:25 +0800
Subject: [PATCH 3513/4122] usb: misc: onboard_usb_hub: add Genesys Logic
 GL850G hub support

Genesys Logic GL850G is a 4-port USB 2.0 STT hub that has a reset pin to
toggle and a 3.3V core supply exported (although an integrated LDO is
available for powering it with 5V).

Add the support for this hub, for controlling the reset pin and the core
power supply.

Signed-off-by: Icenowy Zheng <uwu@icenowy.me>
Acked-by: Matthias Kaehlcke <mka@chromium.org>
Link: https://lore.kernel.org/r/20221206055228.306074-4-uwu@icenowy.me
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/misc/onboard_usb_hub.c | 2 ++
 drivers/usb/misc/onboard_usb_hub.h | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/usb/misc/onboard_usb_hub.c b/drivers/usb/misc/onboard_usb_hub.c
index d63c63942af1b..94e7966e199d1 100644
--- a/drivers/usb/misc/onboard_usb_hub.c
+++ b/drivers/usb/misc/onboard_usb_hub.c
@@ -331,6 +331,7 @@ static struct platform_driver onboard_hub_driver = {
 
 /************************** USB driver **************************/
 
+#define VENDOR_ID_GENESYS	0x05e3
 #define VENDOR_ID_MICROCHIP	0x0424
 #define VENDOR_ID_REALTEK	0x0bda
 #define VENDOR_ID_TI		0x0451
@@ -407,6 +408,7 @@ static void onboard_hub_usbdev_disconnect(struct usb_device *udev)
 }
 
 static const struct usb_device_id onboard_hub_id_table[] = {
+	{ USB_DEVICE(VENDOR_ID_GENESYS, 0x0608) }, /* Genesys Logic GL850G USB 2.0 */
 	{ USB_DEVICE(VENDOR_ID_MICROCHIP, 0x2514) }, /* USB2514B USB 2.0 */
 	{ USB_DEVICE(VENDOR_ID_REALTEK, 0x0411) }, /* RTS5411 USB 3.1 */
 	{ USB_DEVICE(VENDOR_ID_REALTEK, 0x5411) }, /* RTS5411 USB 2.1 */
diff --git a/drivers/usb/misc/onboard_usb_hub.h b/drivers/usb/misc/onboard_usb_hub.h
index 34beab8bce3d6..62129a6a1ba5a 100644
--- a/drivers/usb/misc/onboard_usb_hub.h
+++ b/drivers/usb/misc/onboard_usb_hub.h
@@ -22,10 +22,15 @@ static const struct onboard_hub_pdata ti_tusb8041_data = {
 	.reset_us = 3000,
 };
 
+static const struct onboard_hub_pdata genesys_gl850g_data = {
+	.reset_us = 3,
+};
+
 static const struct of_device_id onboard_hub_match[] = {
 	{ .compatible = "usb424,2514", .data = &microchip_usb424_data, },
 	{ .compatible = "usb451,8140", .data = &ti_tusb8041_data, },
 	{ .compatible = "usb451,8142", .data = &ti_tusb8041_data, },
+	{ .compatible = "usb5e3,608", .data = &genesys_gl850g_data, },
 	{ .compatible = "usbbda,411", .data = &realtek_rts5411_data, },
 	{ .compatible = "usbbda,5411", .data = &realtek_rts5411_data, },
 	{ .compatible = "usbbda,414", .data = &realtek_rts5411_data, },
-- 
GitLab


From 8a7b31d545d3a15f0e6f5984ae16f0ca4fd76aac Mon Sep 17 00:00:00 2001
From: Ferry Toth <ftoth@exalondelft.nl>
Date: Mon, 5 Dec 2022 21:15:26 +0100
Subject: [PATCH 3514/4122] usb: ulpi: defer ulpi_register on ulpi_read_id
 timeout

Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral
if extcon is present") Dual Role support on Intel Merrifield platform
broke due to rearranging the call to dwc3_get_extcon().

It appears to be caused by ulpi_read_id() on the first test write failing
with -ETIMEDOUT. Currently ulpi_read_id() expects to discover the phy via
DT when the test write fails and returns 0 in that case, even if DT does not
provide the phy. As a result usb probe completes without phy.

Make ulpi_read_id() return -ETIMEDOUT to its user if the first test write
fails. The user should then handle it appropriately. A follow up patch
will make dwc3_core_init() set -EPROBE_DEFER in this case and bail out.

Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT")
Cc: stable@vger.kernel.org
Acked-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Ferry Toth <ftoth@exalondelft.nl>
Link: https://lore.kernel.org/r/20221205201527.13525-2-ftoth@exalondelft.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/common/ulpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/usb/common/ulpi.c b/drivers/usb/common/ulpi.c
index d7c8461976ce0..60e8174686a17 100644
--- a/drivers/usb/common/ulpi.c
+++ b/drivers/usb/common/ulpi.c
@@ -207,7 +207,7 @@ static int ulpi_read_id(struct ulpi *ulpi)
 	/* Test the interface */
 	ret = ulpi_write(ulpi, ULPI_SCRATCH, 0xaa);
 	if (ret < 0)
-		goto err;
+		return ret;
 
 	ret = ulpi_read(ulpi, ULPI_SCRATCH);
 	if (ret < 0)
-- 
GitLab


From 63130462c919ece0ad0d9bb5a1f795ef8d79687e Mon Sep 17 00:00:00 2001
From: Ferry Toth <ftoth@exalondelft.nl>
Date: Mon, 5 Dec 2022 21:15:27 +0100
Subject: [PATCH 3515/4122] usb: dwc3: core: defer probe on ulpi_read_id
 timeout

Since commit 0f0101719138 ("usb: dwc3: Don't switch OTG -> peripheral
if extcon is present"), Dual Role support on Intel Merrifield platform
broke due to rearranging the call to dwc3_get_extcon().

It appears to be caused by ulpi_read_id() masking the timeout on the first
test write. In the past dwc3 probe continued by calling dwc3_core_soft_reset()
followed by dwc3_get_extcon() which happend to return -EPROBE_DEFER.
On deferred probe ulpi_read_id() finally succeeded. Due to above mentioned
rearranging -EPROBE_DEFER is not returned and probe completes without phy.

On Intel Merrifield the timeout on the first test write issue is reproducible
but it is difficult to find the root cause. Using a mainline kernel and
rootfs with buildroot ulpi_read_id() succeeds. As soon as adding
ftrace / bootconfig to find out why, ulpi_read_id() fails and we can't
analyze the flow. Using another rootfs ulpi_read_id() fails even without
adding ftrace. We suspect the issue is some kind of timing / race, but
merely retrying ulpi_read_id() does not resolve the issue.

As we now changed ulpi_read_id() to return -ETIMEDOUT in this case, we
need to handle the error by calling dwc3_core_soft_reset() and request
-EPROBE_DEFER. On deferred probe ulpi_read_id() is retried and succeeds.

Fixes: ef6a7bcfb01c ("usb: ulpi: Support device discovery via DT")
Cc: stable@vger.kernel.org
Acked-by: Thinh Nguyen <Thinh.Nguyen@synopsys.com>
Signed-off-by: Ferry Toth <ftoth@exalondelft.nl>
Link: https://lore.kernel.org/r/20221205201527.13525-3-ftoth@exalondelft.nl
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc3/core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/usb/dwc3/core.c b/drivers/usb/dwc3/core.c
index fc38a8b13efa6..476b636185116 100644
--- a/drivers/usb/dwc3/core.c
+++ b/drivers/usb/dwc3/core.c
@@ -1100,8 +1100,13 @@ static int dwc3_core_init(struct dwc3 *dwc)
 
 	if (!dwc->ulpi_ready) {
 		ret = dwc3_core_ulpi_init(dwc);
-		if (ret)
+		if (ret) {
+			if (ret == -ETIMEDOUT) {
+				dwc3_core_soft_reset(dwc);
+				ret = -EPROBE_DEFER;
+			}
 			goto err0;
+		}
 		dwc->ulpi_ready = true;
 	}
 
-- 
GitLab


From 2a81a7aa420b80865fdd82ec383fe365e18f922b Mon Sep 17 00:00:00 2001
From: Frank Wunderlich <frank-w@public-files.de>
Date: Sun, 27 Nov 2022 12:41:36 +0100
Subject: [PATCH 3516/4122] dt-bindings: usb: mtk-xhci: add support for mt7986

Add compatible string for mt7986.

Signed-off-by: Frank Wunderlich <frank-w@public-files.de>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Chunfeng Yun <chunfeng.yun@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Reviewed-by: Matthias Brugger <matthias.bgg@gmail.com>
Link: https://lore.kernel.org/r/20221127114142.156573-3-linux@fw-web.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
index 939623867a646..a3c37944c6305 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
+++ b/Documentation/devicetree/bindings/usb/mediatek,mtk-xhci.yaml
@@ -28,6 +28,7 @@ properties:
           - mediatek,mt7622-xhci
           - mediatek,mt7623-xhci
           - mediatek,mt7629-xhci
+          - mediatek,mt7986-xhci
           - mediatek,mt8173-xhci
           - mediatek,mt8183-xhci
           - mediatek,mt8186-xhci
-- 
GitLab


From 42a317d076b58f08413219b1679d211783c2e5f3 Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@theobroma-systems.com>
Date: Wed, 7 Dec 2022 14:19:16 +0100
Subject: [PATCH 3517/4122] usb: dwc2: disable lpm feature on Rockchip SoCs

LPM feature of DWC2 module integrated in Rockchip SoCs doesn't work
properly or needs some additional handling, so disable it for now.
Without disabling LPM feature, the USB ADB communication fail with
the following error log:

dwc2 ff580000.usb: new address 27
dwc2 ff580000.usb: Failed to exit L1 sleep state in 200us.
dwc2 ff580000.usb: dwc2_hsotg_send_reply: cannot queue req
dwc2 ff580000.usb: dwc2_hsotg_process_req_status: failed to send reply
dwc2 ff580000.usb: dwc2_hsotg_enqueue_setup: failed queue (-11)
dwc2 ff580000.usb: Failed to exit L1 sleep state in 200us.

[diff vs vendor kernel: added lpm_clock_gating, besl and
 hird_threshold_en settings as seen in  commit 53febc956900 ("usb: dwc2:
 disable Link Power Management on STM32MP15 HS OTG")]

Signed-off-by: William Wu <william.wu@rock-chips.com>
Signed-off-by: Frank Wang <frank.wang@rock-chips.com>
Signed-off-by: Quentin Schulz <quentin.schulz@theobroma-systems.com>
Link: https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-1-36515e1092cd@theobroma-systems.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/params.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/usb/dwc2/params.c b/drivers/usb/dwc2/params.c
index 8eab5f38b1101..9ed9fd9569404 100644
--- a/drivers/usb/dwc2/params.c
+++ b/drivers/usb/dwc2/params.c
@@ -113,6 +113,10 @@ static void dwc2_set_rk_params(struct dwc2_hsotg *hsotg)
 	p->ahbcfg = GAHBCFG_HBSTLEN_INCR16 <<
 		GAHBCFG_HBSTLEN_SHIFT;
 	p->power_down = DWC2_POWER_DOWN_PARAM_NONE;
+	p->lpm = false;
+	p->lpm_clock_gating = false;
+	p->besl = false;
+	p->hird_threshold_en = false;
 }
 
 static void dwc2_set_ltq_params(struct dwc2_hsotg *hsotg)
-- 
GitLab


From ade23d7b7ec5c38bd43ec44ccb753cb7ea8ac08a Mon Sep 17 00:00:00 2001
From: Quentin Schulz <quentin.schulz@theobroma-systems.com>
Date: Wed, 7 Dec 2022 14:19:17 +0100
Subject: [PATCH 3518/4122] usb: dwc2: power on/off phy for peripheral mode in
 dual-role mode

The PHY power is handled for peripheral mode but only when the device is
forced into this peripheral mode. It is missing when the device is
operating in peripheral mode when dual-role mode is enabled, so let's
update the condition to match this scenario.

Signed-off-by: Quentin Schulz <quentin.schulz@theobroma-systems.com>
Link: https://lore.kernel.org/r/20221206-dwc2-gadget-dual-role-v1-2-36515e1092cd@theobroma-systems.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/dwc2/gadget.c   | 6 ++++--
 drivers/usb/dwc2/platform.c | 3 ++-
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index 8b15742d9e8aa..62fa6378d2d73 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -4549,7 +4549,8 @@ static int dwc2_hsotg_udc_start(struct usb_gadget *gadget,
 	hsotg->gadget.dev.of_node = hsotg->dev->of_node;
 	hsotg->gadget.speed = USB_SPEED_UNKNOWN;
 
-	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL) {
+	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL ||
+	    (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg))) {
 		ret = dwc2_lowlevel_hw_enable(hsotg);
 		if (ret)
 			goto err;
@@ -4611,7 +4612,8 @@ static int dwc2_hsotg_udc_stop(struct usb_gadget *gadget)
 	if (!IS_ERR_OR_NULL(hsotg->uphy))
 		otg_set_peripheral(hsotg->uphy->otg, NULL);
 
-	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL)
+	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL ||
+	    (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg)))
 		dwc2_lowlevel_hw_disable(hsotg);
 
 	return 0;
diff --git a/drivers/usb/dwc2/platform.c b/drivers/usb/dwc2/platform.c
index 262c13b6362ac..23ef759968231 100644
--- a/drivers/usb/dwc2/platform.c
+++ b/drivers/usb/dwc2/platform.c
@@ -576,7 +576,8 @@ static int dwc2_driver_probe(struct platform_device *dev)
 	dwc2_debugfs_init(hsotg);
 
 	/* Gadget code manages lowlevel hw on its own */
-	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL)
+	if (hsotg->dr_mode == USB_DR_MODE_PERIPHERAL ||
+	    (hsotg->dr_mode == USB_DR_MODE_OTG && dwc2_is_device_mode(hsotg)))
 		dwc2_lowlevel_hw_disable(hsotg);
 
 #if IS_ENABLED(CONFIG_USB_DWC2_PERIPHERAL) || \
-- 
GitLab


From 0bf588274f73b29d7058042a6b7cc2b764502cc1 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Wed, 23 Nov 2022 17:51:21 +0100
Subject: [PATCH 3519/4122] Fix path in cifs/usage.rst

/sys/module/... not /proc/module/...

Signed-off-by: Volker Lendecke <vl@samba.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 Documentation/admin-guide/cifs/usage.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/cifs/usage.rst b/Documentation/admin-guide/cifs/usage.rst
index 3766bf8a1c20e..ed3b8dc854ecf 100644
--- a/Documentation/admin-guide/cifs/usage.rst
+++ b/Documentation/admin-guide/cifs/usage.rst
@@ -858,7 +858,7 @@ CIFS kernel module parameters
 These module parameters can be specified or modified either during the time of
 module loading or during the runtime by using the interface::
 
-	/proc/module/cifs/parameters/<param>
+	/sys/module/cifs/parameters/<param>
 
 i.e.::
 
-- 
GitLab


From 83fb8abec29383eb0cf35495d21669e38548771b Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Fri, 25 Nov 2022 12:26:00 +0100
Subject: [PATCH 3520/4122] cifs: Add "extbuf" and "extbuflen" args to
 smb2_compound_op()

Will carry the variable-sized reply from SMB_FIND_FILE_POSIX_INFO

Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2inode.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 68e08c85fbb87..1be86ba950b35 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -59,6 +59,7 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
 			    struct cifs_sb_info *cifs_sb, const char *full_path,
 			    __u32 desired_access, __u32 create_disposition, __u32 create_options,
 			    umode_t mode, void *ptr, int command, struct cifsFileInfo *cfile,
+			    __u8 **extbuf, size_t *extbuflen,
 			    struct kvec *err_iov, int *err_buftype)
 {
 	struct cop_vars *vars = NULL;
@@ -539,7 +540,7 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 	cifs_get_readable_path(tcon, full_path, &cfile);
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN,
 			      create_options, ACL_NO_MODE, data, SMB2_OP_QUERY_INFO, cfile,
-			      err_iov, err_buftype);
+			      NULL, NULL, err_iov, err_buftype);
 	if (rc == -EOPNOTSUPP) {
 		if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER &&
 		    ((struct smb2_hdr *)err_iov[0].iov_base)->Command == SMB2_CREATE &&
@@ -555,7 +556,7 @@ int smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES,
 				      FILE_OPEN, create_options, ACL_NO_MODE, data,
-				      SMB2_OP_QUERY_INFO, cfile, NULL, NULL);
+				      SMB2_OP_QUERY_INFO, cfile, NULL, NULL, NULL, NULL);
 	}
 
 out:
@@ -589,7 +590,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 	cifs_get_readable_path(tcon, full_path, &cfile);
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN,
 			      create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile,
-			      err_iov, err_buftype);
+			      NULL, NULL, err_iov, err_buftype);
 	if (rc == -EOPNOTSUPP) {
 		/* BB TODO: When support for special files added to Samba re-verify this path */
 		if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER &&
@@ -606,7 +607,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES,
 				      FILE_OPEN, create_options, ACL_NO_MODE, data,
-				      SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL);
+				      SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL, NULL, NULL);
 	}
 
 out:
@@ -624,7 +625,7 @@ smb2_mkdir(const unsigned int xid, struct inode *parent_inode, umode_t mode,
 	return smb2_compound_op(xid, tcon, cifs_sb, name,
 				FILE_WRITE_ATTRIBUTES, FILE_CREATE,
 				CREATE_NOT_FILE, mode, NULL, SMB2_OP_MKDIR,
-				NULL, NULL, NULL);
+				NULL, NULL, NULL, NULL, NULL);
 }
 
 void
@@ -646,7 +647,7 @@ smb2_mkdir_setinfo(struct inode *inode, const char *name,
 	tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
 				 FILE_WRITE_ATTRIBUTES, FILE_CREATE,
 				 CREATE_NOT_FILE, ACL_NO_MODE,
-				 &data, SMB2_OP_SET_INFO, cfile, NULL, NULL);
+				 &data, SMB2_OP_SET_INFO, cfile, NULL, NULL, NULL, NULL);
 	if (tmprc == 0)
 		cifs_i->cifsAttrs = dosattrs;
 }
@@ -658,7 +659,7 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 	drop_cached_dir_by_name(xid, tcon, name, cifs_sb);
 	return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
 				CREATE_NOT_FILE, ACL_NO_MODE,
-				NULL, SMB2_OP_RMDIR, NULL, NULL, NULL);
+				NULL, SMB2_OP_RMDIR, NULL, NULL, NULL, NULL, NULL);
 }
 
 int
@@ -667,7 +668,7 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
 {
 	return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
 				CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
-				ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL);
+				ACL_NO_MODE, NULL, SMB2_OP_DELETE, NULL, NULL, NULL, NULL, NULL);
 }
 
 static int
@@ -686,7 +687,7 @@ smb2_set_path_attr(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 	rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
 			      FILE_OPEN, 0, ACL_NO_MODE, smb2_to_name,
-			      command, cfile, NULL, NULL);
+			      command, cfile, NULL, NULL, NULL, NULL);
 smb2_rename_path:
 	kfree(smb2_to_name);
 	return rc;
@@ -727,7 +728,7 @@ smb2_set_path_size(const unsigned int xid, struct cifs_tcon *tcon,
 	cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile);
 	return smb2_compound_op(xid, tcon, cifs_sb, full_path,
 				FILE_WRITE_DATA, FILE_OPEN, 0, ACL_NO_MODE,
-				&eof, SMB2_OP_SET_EOF, cfile, NULL, NULL);
+				&eof, SMB2_OP_SET_EOF, cfile, NULL, NULL, NULL, NULL);
 }
 
 int
@@ -754,7 +755,7 @@ smb2_set_file_info(struct inode *inode, const char *full_path,
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
 			      FILE_WRITE_ATTRIBUTES, FILE_OPEN,
 			      0, ACL_NO_MODE, buf, SMB2_OP_SET_INFO, cfile,
-			      NULL, NULL);
+			      NULL, NULL, NULL, NULL);
 	cifs_put_tlink(tlink);
 	return rc;
 }
-- 
GitLab


From 64ce47cb1b29d7d6aab6dcc287ae1fddb4876bd5 Mon Sep 17 00:00:00 2001
From: Volker Lendecke <vl@samba.org>
Date: Fri, 25 Nov 2022 12:37:44 +0100
Subject: [PATCH 3521/4122] cifs: Parse owner/group for stat in smb311 posix
 extensions

stat was returning default owner and group (unlike readdir)
for SMB3.1.1 POSIX extensions

Signed-off-by: Volker Lendecke <vl@samba.org>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/inode.c     | 13 ++++++++----
 fs/cifs/smb2inode.c | 49 ++++++++++++++++++++++++++++++++++++++++++---
 fs/cifs/smb2proto.h |  5 ++++-
 3 files changed, 59 insertions(+), 8 deletions(-)

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 4e2ca3c6e5c00..286a5400b94e5 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -632,6 +632,8 @@ static int cifs_sfu_mode(struct cifs_fattr *fattr, const unsigned char *path,
 
 /* Fill a cifs_fattr struct with info from POSIX info struct */
 static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_open_info_data *data,
+				       struct cifs_sid *owner,
+				       struct cifs_sid *group,
 				       struct super_block *sb, bool adjust_tz, bool symlink)
 {
 	struct smb311_posix_qinfo *info = &data->posix_fi;
@@ -680,8 +682,8 @@ static void smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct cifs_ope
 	}
 	/* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */
 
-	fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */
-	fattr->cf_gid = cifs_sb->ctx->linux_gid;
+	sid_to_id(cifs_sb, owner, fattr, SIDOWNER);
+	sid_to_id(cifs_sb, group, fattr, SIDGROUP);
 
 	cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n",
 		fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
@@ -1175,6 +1177,7 @@ smb311_posix_get_inode_info(struct inode **inode,
 	struct cifs_fattr fattr = {0};
 	bool symlink = false;
 	struct cifs_open_info_data data = {};
+	struct cifs_sid owner, group;
 	int rc = 0;
 	int tmprc = 0;
 
@@ -1192,7 +1195,8 @@ smb311_posix_get_inode_info(struct inode **inode,
 		goto out;
 	}
 
-	rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data, &adjust_tz,
+	rc = smb311_posix_query_path_info(xid, tcon, cifs_sb, full_path, &data,
+					  &owner, &group, &adjust_tz,
 					  &symlink);
 
 	/*
@@ -1201,7 +1205,8 @@ smb311_posix_get_inode_info(struct inode **inode,
 
 	switch (rc) {
 	case 0:
-		smb311_posix_info_to_fattr(&fattr, &data, sb, adjust_tz, symlink);
+		smb311_posix_info_to_fattr(&fattr, &data, &owner, &group,
+					   sb, adjust_tz, symlink);
 		break;
 	case -EREMOTE:
 		/* DFS link, no metadata available on this server */
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 1be86ba950b35..fbd46db1023ae 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -431,6 +431,21 @@ static int smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
 				&rsp_iov[1], sizeof(idata->posix_fi) /* add SIDs */,
 				(char *)&idata->posix_fi);
 		}
+		if (rc == 0) {
+			unsigned int length = le32_to_cpu(qi_rsp->OutputBufferLength);
+
+			if (length > sizeof(idata->posix_fi)) {
+				char *base = (char *)rsp_iov[1].iov_base +
+					le16_to_cpu(qi_rsp->OutputBufferOffset) +
+					sizeof(idata->posix_fi);
+				*extbuflen = length - sizeof(idata->posix_fi);
+				*extbuf = kmemdup(base, *extbuflen, GFP_KERNEL);
+				if (!*extbuf)
+					rc = -ENOMEM;
+			} else {
+				rc = -EINVAL;
+			}
+		}
 		if (rqst[1].rq_iov)
 			SMB2_query_info_free(&rqst[1]);
 		if (rqst[2].rq_iov)
@@ -569,13 +584,20 @@ out:
 
 int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb, const char *full_path,
-				 struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse)
+				 struct cifs_open_info_data *data,
+				 struct cifs_sid *owner,
+				 struct cifs_sid *group,
+				 bool *adjust_tz, bool *reparse)
 {
 	int rc;
 	__u32 create_options = 0;
 	struct cifsFileInfo *cfile;
 	struct kvec err_iov[3] = {};
 	int err_buftype[3] = {};
+	__u8 *sidsbuf = NULL;
+	__u8 *sidsbuf_end = NULL;
+	size_t sidsbuflen = 0;
+	size_t owner_len, group_len;
 
 	*adjust_tz = false;
 	*reparse = false;
@@ -590,7 +612,7 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 	cifs_get_readable_path(tcon, full_path, &cfile);
 	rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES, FILE_OPEN,
 			      create_options, ACL_NO_MODE, data, SMB2_OP_POSIX_QUERY_INFO, cfile,
-			      NULL, NULL, err_iov, err_buftype);
+			      &sidsbuf, &sidsbuflen, err_iov, err_buftype);
 	if (rc == -EOPNOTSUPP) {
 		/* BB TODO: When support for special files added to Samba re-verify this path */
 		if (err_iov[0].iov_base && err_buftype[0] != CIFS_NO_BUFFER &&
@@ -607,10 +629,31 @@ int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 		cifs_get_readable_path(tcon, full_path, &cfile);
 		rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, FILE_READ_ATTRIBUTES,
 				      FILE_OPEN, create_options, ACL_NO_MODE, data,
-				      SMB2_OP_POSIX_QUERY_INFO, cfile, NULL, NULL, NULL, NULL);
+				      SMB2_OP_POSIX_QUERY_INFO, cfile,
+				      &sidsbuf, &sidsbuflen, NULL, NULL);
+	}
+
+	if (rc == 0) {
+		sidsbuf_end = sidsbuf + sidsbuflen;
+
+		owner_len = posix_info_sid_size(sidsbuf, sidsbuf_end);
+		if (owner_len == -1) {
+			rc = -EINVAL;
+			goto out;
+		}
+		memcpy(owner, sidsbuf, owner_len);
+
+		group_len = posix_info_sid_size(
+			sidsbuf + owner_len, sidsbuf_end);
+		if (group_len == -1) {
+			rc = -EINVAL;
+			goto out;
+		}
+		memcpy(group, sidsbuf + owner_len, group_len);
 	}
 
 out:
+	kfree(sidsbuf);
 	free_rsp_buf(err_buftype[0], err_iov[0].iov_base);
 	free_rsp_buf(err_buftype[1], err_iov[1].iov_base);
 	free_rsp_buf(err_buftype[2], err_iov[2].iov_base);
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index be21b5d26f67e..d5d7ffb7711c4 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -277,7 +277,10 @@ extern int smb2_query_info_compound(const unsigned int xid,
 /* query path info from the server using SMB311 POSIX extensions*/
 int smb311_posix_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb, const char *full_path,
-				 struct cifs_open_info_data *data, bool *adjust_tz, bool *reparse);
+				 struct cifs_open_info_data *data,
+				 struct cifs_sid *owner,
+				 struct cifs_sid *group,
+				 bool *adjust_tz, bool *reparse);
 int posix_info_parse(const void *beg, const void *end,
 		     struct smb2_posix_info_parsed *out);
 int posix_info_sid_size(const void *beg, const void *end);
-- 
GitLab


From 9381666e289852f93be9d7f4f7844017e04f6315 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Nov 2022 14:18:33 +0100
Subject: [PATCH 3522/4122] cifs: wire up >migrate_folio

CIFS does not use page private data that needs migration, so it can just
wire up filemap_migrate_folio.  This prepares for removing ->writepage,
which is used as a fallback if no migrate_folio method is set.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index cd96982099309..6be924caed393 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -5240,10 +5240,10 @@ const struct address_space_operations cifs_addr_ops = {
 	.direct_IO = cifs_direct_io,
 	.invalidate_folio = cifs_invalidate_folio,
 	.launder_folio = cifs_launder_folio,
+	.migrate_folio = filemap_migrate_folio,
 	/*
-	 * TODO: investigate and if useful we could add an cifs_migratePage
-	 * helper (under an CONFIG_MIGRATION) in the future, and also
-	 * investigate and add an is_dirty_writeback helper if needed
+	 * TODO: investigate and if useful we could add an is_dirty_writeback
+	 * helper if needed
 	 */
 	.swap_activate = cifs_swap_activate,
 	.swap_deactivate = cifs_swap_deactivate,
@@ -5264,4 +5264,5 @@ const struct address_space_operations cifs_addr_ops_smallbuf = {
 	.release_folio = cifs_release_folio,
 	.invalidate_folio = cifs_invalidate_folio,
 	.launder_folio = cifs_launder_folio,
+	.migrate_folio = filemap_migrate_folio,
 };
-- 
GitLab


From bff9018d3a52c45711bd63c446a2c80c0275e935 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Nov 2022 14:18:34 +0100
Subject: [PATCH 3523/4122] cifs: stop using generic_writepages

generic_writepages is just a wrapper that calls ->writepages on a range,
and thus in the way of eventually removing ->writepage.  Switch cifs
to just open code it in preparation of removing ->writepage.

[note: I suspect just integrating the small wsize case with the rest
 of the writeback code might be a better idea here, but that needs
 someone more familiar with the code]

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 6be924caed393..ec14e38411a13 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2646,6 +2646,21 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
 	return rc;
 }
 
+static int
+cifs_writepage_locked(struct page *page, struct writeback_control *wbc);
+
+static int cifs_write_one_page(struct page *page, struct writeback_control *wbc,
+		void *data)
+{
+	struct address_space *mapping = data;
+	int ret;
+
+	ret = cifs_writepage_locked(page, wbc);
+	unlock_page(page);
+	mapping_set_error(mapping, ret);
+	return ret;
+}
+
 static int cifs_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
@@ -2662,10 +2677,11 @@ static int cifs_writepages(struct address_space *mapping,
 
 	/*
 	 * If wsize is smaller than the page cache size, default to writing
-	 * one page at a time via cifs_writepage
+	 * one page at a time.
 	 */
 	if (cifs_sb->ctx->wsize < PAGE_SIZE)
-		return generic_writepages(mapping, wbc);
+		return write_cache_pages(mapping, wbc, cifs_write_one_page,
+				mapping);
 
 	xid = get_xid();
 	if (wbc->range_cyclic) {
-- 
GitLab


From ebaad77c89921c8237ca17791d5462bd289052d0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 16 Nov 2022 14:18:35 +0100
Subject: [PATCH 3524/4122] cifs: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.  Now that cifs implements ->migrate_folio and
doesn't call generic_writepages, the writepage method can be removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/file.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ec14e38411a13..6701257541ab2 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2868,13 +2868,6 @@ retry_write:
 	return rc;
 }
 
-static int cifs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	int rc = cifs_writepage_locked(page, wbc);
-	unlock_page(page);
-	return rc;
-}
-
 static int cifs_write_end(struct file *file, struct address_space *mapping,
 			loff_t pos, unsigned len, unsigned copied,
 			struct page *page, void *fsdata)
@@ -5247,7 +5240,6 @@ static bool cifs_dirty_folio(struct address_space *mapping, struct folio *folio)
 const struct address_space_operations cifs_addr_ops = {
 	.read_folio = cifs_read_folio,
 	.readahead = cifs_readahead,
-	.writepage = cifs_writepage,
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
 	.write_end = cifs_write_end,
@@ -5272,7 +5264,6 @@ const struct address_space_operations cifs_addr_ops = {
  */
 const struct address_space_operations cifs_addr_ops_smallbuf = {
 	.read_folio = cifs_read_folio,
-	.writepage = cifs_writepage,
 	.writepages = cifs_writepages,
 	.write_begin = cifs_write_begin,
 	.write_end = cifs_write_end,
-- 
GitLab


From d406d26745aba3365ab9171b2d5cbea9c1757305 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Mon, 5 Dec 2022 23:31:53 -0300
Subject: [PATCH 3525/4122] cifs: skip alloc when request has no pages

When smb3_init_transform_rq() was being called with requests (@old_rq)
which had no pages, it was unnecessarily allocating a single page for
every request in @new_rq.

Fix this by skipping page array allocation when requests have no pages
(e.g. !smb_rqst::rq_npages).

Also get rid of deprecated kmap() and use kmap_local_page() instead
while we're at it.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/smb2ops.c | 36 +++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index bfaafd02fb1f2..72b22d033ed56 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4445,21 +4445,27 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
 	int rc = -ENOMEM;
 
 	for (i = 1; i < num_rqst; i++) {
-		npages = old_rq[i - 1].rq_npages;
+		struct smb_rqst *old = &old_rq[i - 1];
+		struct smb_rqst *new = &new_rq[i];
+
+		orig_len += smb_rqst_len(server, old);
+		new->rq_iov = old->rq_iov;
+		new->rq_nvec = old->rq_nvec;
+
+		npages = old->rq_npages;
+		if (!npages)
+			continue;
+
 		pages = kmalloc_array(npages, sizeof(struct page *),
 				      GFP_KERNEL);
 		if (!pages)
 			goto err_free;
 
-		new_rq[i].rq_pages = pages;
-		new_rq[i].rq_npages = npages;
-		new_rq[i].rq_offset = old_rq[i - 1].rq_offset;
-		new_rq[i].rq_pagesz = old_rq[i - 1].rq_pagesz;
-		new_rq[i].rq_tailsz = old_rq[i - 1].rq_tailsz;
-		new_rq[i].rq_iov = old_rq[i - 1].rq_iov;
-		new_rq[i].rq_nvec = old_rq[i - 1].rq_nvec;
-
-		orig_len += smb_rqst_len(server, &old_rq[i - 1]);
+		new->rq_pages = pages;
+		new->rq_npages = npages;
+		new->rq_offset = old->rq_offset;
+		new->rq_pagesz = old->rq_pagesz;
+		new->rq_tailsz = old->rq_tailsz;
 
 		for (j = 0; j < npages; j++) {
 			pages[j] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
@@ -4472,14 +4478,14 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst,
 			char *dst, *src;
 			unsigned int offset, len;
 
-			rqst_page_get_length(&new_rq[i], j, &len, &offset);
+			rqst_page_get_length(new, j, &len, &offset);
 
-			dst = (char *) kmap(new_rq[i].rq_pages[j]) + offset;
-			src = (char *) kmap(old_rq[i - 1].rq_pages[j]) + offset;
+			dst = kmap_local_page(new->rq_pages[j]) + offset;
+			src = kmap_local_page(old->rq_pages[j]) + offset;
 
 			memcpy(dst, src, len);
-			kunmap(new_rq[i].rq_pages[j]);
-			kunmap(old_rq[i - 1].rq_pages[j]);
+			kunmap(new->rq_pages[j]);
+			kunmap(old->rq_pages[j]);
 		}
 	}
 
-- 
GitLab


From 81c25247a2a03a0f97e4805d7aff7541ccff6baa Mon Sep 17 00:00:00 2001
From: Daniel Scally <dan.scally@ideasonboard.com>
Date: Tue, 6 Dec 2022 16:12:03 +0000
Subject: [PATCH 3526/4122] usb: gadget: uvc: Rename bmInterfaceFlags ->
 bmInterlaceFlags

In the specification documents for the Uncompressed and MJPEG USB
Video Payloads, the field name is bmInterlaceFlags - it has been
misnamed within the kernel.

Although renaming the field does break the kernel's interface to
userspace it should be low-risk in this instance. The field is read
only and hardcoded to 0, so there was never any value in anyone
reading it. A search of the uvc-gadget application and all the
forks that I could find for it did not reveal any users either.

Fixes: cdda479f15cd ("USB gadget: video class function driver")
Reviewed-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Reviewed-by: Kieran Bingham <kieran.bingham@ideasonboard.com>
Signed-off-by: Daniel Scally <dan.scally@ideasonboard.com>
Link: https://lore.kernel.org/r/20221206161203.1562827-1-dan.scally@ideasonboard.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/configfs-usb-gadget-uvc |  4 ++--
 drivers/usb/gadget/function/uvc_configfs.c        | 12 ++++++------
 drivers/usb/gadget/legacy/webcam.c                |  4 ++--
 include/uapi/linux/usb/video.h                    |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/Documentation/ABI/testing/configfs-usb-gadget-uvc b/Documentation/ABI/testing/configfs-usb-gadget-uvc
index 611b23e6488d9..f00cff6d8c5cb 100644
--- a/Documentation/ABI/testing/configfs-usb-gadget-uvc
+++ b/Documentation/ABI/testing/configfs-usb-gadget-uvc
@@ -197,7 +197,7 @@ Description:	Specific MJPEG format descriptors
 					read-only
 		bmaControls		this format's data for bmaControls in
 					the streaming header
-		bmInterfaceFlags	specifies interlace information,
+		bmInterlaceFlags	specifies interlace information,
 					read-only
 		bAspectRatioY		the X dimension of the picture aspect
 					ratio, read-only
@@ -253,7 +253,7 @@ Description:	Specific uncompressed format descriptors
 					read-only
 		bmaControls		this format's data for bmaControls in
 					the streaming header
-		bmInterfaceFlags	specifies interlace information,
+		bmInterlaceFlags	specifies interlace information,
 					read-only
 		bAspectRatioY		the X dimension of the picture aspect
 					ratio, read-only
diff --git a/drivers/usb/gadget/function/uvc_configfs.c b/drivers/usb/gadget/function/uvc_configfs.c
index 4303a3283ba0a..76cb60d13049f 100644
--- a/drivers/usb/gadget/function/uvc_configfs.c
+++ b/drivers/usb/gadget/function/uvc_configfs.c
@@ -1512,7 +1512,7 @@ UVCG_UNCOMPRESSED_ATTR(b_bits_per_pixel, bBitsPerPixel, 8);
 UVCG_UNCOMPRESSED_ATTR(b_default_frame_index, bDefaultFrameIndex, 8);
 UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8);
 UVCG_UNCOMPRESSED_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8);
-UVCG_UNCOMPRESSED_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8);
+UVCG_UNCOMPRESSED_ATTR_RO(bm_interlace_flags, bmInterlaceFlags, 8);
 
 #undef UVCG_UNCOMPRESSED_ATTR
 #undef UVCG_UNCOMPRESSED_ATTR_RO
@@ -1541,7 +1541,7 @@ static struct configfs_attribute *uvcg_uncompressed_attrs[] = {
 	&uvcg_uncompressed_attr_b_default_frame_index,
 	&uvcg_uncompressed_attr_b_aspect_ratio_x,
 	&uvcg_uncompressed_attr_b_aspect_ratio_y,
-	&uvcg_uncompressed_attr_bm_interface_flags,
+	&uvcg_uncompressed_attr_bm_interlace_flags,
 	&uvcg_uncompressed_attr_bma_controls,
 	NULL,
 };
@@ -1574,7 +1574,7 @@ static struct config_group *uvcg_uncompressed_make(struct config_group *group,
 	h->desc.bDefaultFrameIndex	= 1;
 	h->desc.bAspectRatioX		= 0;
 	h->desc.bAspectRatioY		= 0;
-	h->desc.bmInterfaceFlags	= 0;
+	h->desc.bmInterlaceFlags	= 0;
 	h->desc.bCopyProtect		= 0;
 
 	INIT_LIST_HEAD(&h->fmt.frames);
@@ -1700,7 +1700,7 @@ UVCG_MJPEG_ATTR(b_default_frame_index, bDefaultFrameIndex, 8);
 UVCG_MJPEG_ATTR_RO(bm_flags, bmFlags, 8);
 UVCG_MJPEG_ATTR_RO(b_aspect_ratio_x, bAspectRatioX, 8);
 UVCG_MJPEG_ATTR_RO(b_aspect_ratio_y, bAspectRatioY, 8);
-UVCG_MJPEG_ATTR_RO(bm_interface_flags, bmInterfaceFlags, 8);
+UVCG_MJPEG_ATTR_RO(bm_interlace_flags, bmInterlaceFlags, 8);
 
 #undef UVCG_MJPEG_ATTR
 #undef UVCG_MJPEG_ATTR_RO
@@ -1728,7 +1728,7 @@ static struct configfs_attribute *uvcg_mjpeg_attrs[] = {
 	&uvcg_mjpeg_attr_bm_flags,
 	&uvcg_mjpeg_attr_b_aspect_ratio_x,
 	&uvcg_mjpeg_attr_b_aspect_ratio_y,
-	&uvcg_mjpeg_attr_bm_interface_flags,
+	&uvcg_mjpeg_attr_bm_interlace_flags,
 	&uvcg_mjpeg_attr_bma_controls,
 	NULL,
 };
@@ -1755,7 +1755,7 @@ static struct config_group *uvcg_mjpeg_make(struct config_group *group,
 	h->desc.bDefaultFrameIndex	= 1;
 	h->desc.bAspectRatioX		= 0;
 	h->desc.bAspectRatioY		= 0;
-	h->desc.bmInterfaceFlags	= 0;
+	h->desc.bmInterlaceFlags	= 0;
 	h->desc.bCopyProtect		= 0;
 
 	INIT_LIST_HEAD(&h->fmt.frames);
diff --git a/drivers/usb/gadget/legacy/webcam.c b/drivers/usb/gadget/legacy/webcam.c
index 94e22867da1d0..53e38f87472b0 100644
--- a/drivers/usb/gadget/legacy/webcam.c
+++ b/drivers/usb/gadget/legacy/webcam.c
@@ -171,7 +171,7 @@ static const struct uvc_format_uncompressed uvc_format_yuv = {
 	.bDefaultFrameIndex	= 1,
 	.bAspectRatioX		= 0,
 	.bAspectRatioY		= 0,
-	.bmInterfaceFlags	= 0,
+	.bmInterlaceFlags	= 0,
 	.bCopyProtect		= 0,
 };
 
@@ -222,7 +222,7 @@ static const struct uvc_format_mjpeg uvc_format_mjpg = {
 	.bDefaultFrameIndex	= 1,
 	.bAspectRatioX		= 0,
 	.bAspectRatioY		= 0,
-	.bmInterfaceFlags	= 0,
+	.bmInterlaceFlags	= 0,
 	.bCopyProtect		= 0,
 };
 
diff --git a/include/uapi/linux/usb/video.h b/include/uapi/linux/usb/video.h
index bfdae12cdacf8..6e8e572c2980a 100644
--- a/include/uapi/linux/usb/video.h
+++ b/include/uapi/linux/usb/video.h
@@ -466,7 +466,7 @@ struct uvc_format_uncompressed {
 	__u8  bDefaultFrameIndex;
 	__u8  bAspectRatioX;
 	__u8  bAspectRatioY;
-	__u8  bmInterfaceFlags;
+	__u8  bmInterlaceFlags;
 	__u8  bCopyProtect;
 } __attribute__((__packed__));
 
@@ -522,7 +522,7 @@ struct uvc_format_mjpeg {
 	__u8  bDefaultFrameIndex;
 	__u8  bAspectRatioX;
 	__u8  bAspectRatioY;
-	__u8  bmInterfaceFlags;
+	__u8  bmInterlaceFlags;
 	__u8  bCopyProtect;
 } __attribute__((__packed__));
 
-- 
GitLab


From c7912f27dedd874d49eadf78b5b6fbfdec52c7c3 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 7 Dec 2022 09:33:31 +0000
Subject: [PATCH 3527/4122] staging: rtl8192e: Fix spelling mistake "ContryIE"
 -> "CountryIE"

There is a spelling mistake in a netdev_info message. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Link: https://lore.kernel.org/r/20221207093331.2280355-1-colin.i.king@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/rtl8192e/rtllib_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/rtl8192e/rtllib_rx.c b/drivers/staging/rtl8192e/rtllib_rx.c
index f8965afab7678..669e74a67190d 100644
--- a/drivers/staging/rtl8192e/rtllib_rx.c
+++ b/drivers/staging/rtl8192e/rtllib_rx.c
@@ -1776,7 +1776,7 @@ static inline void rtllib_extract_country_ie(
 				if (rtllib_act_scanning(ieee, false) &&
 				    ieee->FirstIe_InScan)
 					netdev_info(ieee->dev,
-						    "Received beacon ContryIE, SSID: <%s>\n",
+						    "Received beacon CountryIE, SSID: <%s>\n",
 						    network->ssid);
 				dot11d_update_country(ieee, addr2,
 						       info_element->len,
-- 
GitLab


From 37aa6b98237082a4bf882b4d986329b94ebd447d Mon Sep 17 00:00:00 2001
From: Chen Zhongjin <chenzhongjin@huawei.com>
Date: Tue, 6 Dec 2022 09:50:33 +0800
Subject: [PATCH 3528/4122] vme: Use root_device_register() not underlined
 version

root_device_register and __root_device_register have exactly same
effect. Use the not underlined version to keep usage consistency.

Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
Link: https://lore.kernel.org/r/20221206015033.125827-1-chenzhongjin@huawei.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/staging/vme_user/vme_fake.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/staging/vme_user/vme_fake.c b/drivers/staging/vme_user/vme_fake.c
index 6454ccf6e5b4b..f5d2c345978ac 100644
--- a/drivers/staging/vme_user/vme_fake.c
+++ b/drivers/staging/vme_user/vme_fake.c
@@ -1069,7 +1069,7 @@ static int __init fake_init(void)
 	struct vme_lm_resource *lm;
 
 	/* We need a fake parent device */
-	vme_root = __root_device_register("vme", THIS_MODULE);
+	vme_root = root_device_register("vme");
 	if (IS_ERR(vme_root))
 		return PTR_ERR(vme_root);
 
-- 
GitLab


From 52f31ed228212ba572c44e15e818a3a5c74122c0 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Thu, 8 Dec 2022 08:29:22 -0800
Subject: [PATCH 3529/4122] xfs: dquot shrinker doesn't check for
 XFS_DQFLAG_FREEING

Resulting in a UAF if the shrinker races with some other dquot
freeing mechanism that sets XFS_DQFLAG_FREEING before the dquot is
removed from the LRU. This can occur if a dquot purge races with
drop_caches.

Reported-by: syzbot+912776840162c13db1a3@syzkaller.appspotmail.com
Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_qm.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 18bb4ec4d7c9b..ff53d40a2dae3 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -422,6 +422,14 @@ xfs_qm_dquot_isolate(
 	if (!xfs_dqlock_nowait(dqp))
 		goto out_miss_busy;
 
+	/*
+	 * If something else is freeing this dquot and hasn't yet removed it
+	 * from the LRU, leave it for the freeing task to complete the freeing
+	 * process rather than risk it being free from under us here.
+	 */
+	if (dqp->q_flags & XFS_DQFLAG_FREEING)
+		goto out_miss_unlock;
+
 	/*
 	 * This dquot has acquired a reference in the meantime remove it from
 	 * the freelist and try again.
@@ -441,10 +449,8 @@ xfs_qm_dquot_isolate(
 	 * skip it so there is time for the IO to complete before we try to
 	 * reclaim it again on the next LRU pass.
 	 */
-	if (!xfs_dqflock_nowait(dqp)) {
-		xfs_dqunlock(dqp);
-		goto out_miss_busy;
-	}
+	if (!xfs_dqflock_nowait(dqp))
+		goto out_miss_unlock;
 
 	if (XFS_DQ_IS_DIRTY(dqp)) {
 		struct xfs_buf	*bp = NULL;
@@ -478,6 +484,8 @@ xfs_qm_dquot_isolate(
 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaims);
 	return LRU_REMOVED;
 
+out_miss_unlock:
+	xfs_dqunlock(dqp);
 out_miss_busy:
 	trace_xfs_dqreclaim_busy(dqp);
 	XFS_STATS_INC(dqp->q_mount, xs_qm_dqreclaim_misses);
-- 
GitLab


From 7dfb216eda99bbfc2a8c3b03d2eec63314f52b3c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Amadeusz=20S=C5=82awi=C5=84ski?=
 <amadeuszx.slawinski@linux.intel.com>
Date: Thu, 8 Dec 2022 23:40:16 +0100
Subject: [PATCH 3530/4122] ACPICA: Fix operand resolution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In our tests we get UBSAN warning coming from ACPI parser. This is
caused by trying to resolve operands when there is none.

[    0.000000] Linux version 5.15.0-rc3chromeavsrel1.0.184+ (root@...) (gcc (Ubuntu 10.3.0-1ubuntu1~20.04) 10.3.0, GNU ld (GNU Binutils for Ubuntu) 2.34) #1 SMP PREEMPT Sat Oct 16 00:08:27 UTC 2021
...
[ 14.719508] ================================================================================
[ 14.719551] UBSAN: array-index-out-of-bounds in /.../linux/drivers/acpi/acpica/dswexec.c:401:12
[ 14.719594] index -1 is out of range for type 'acpi_operand_object *[9]'
[ 14.719621] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.15.0-rc3chromeavsrel1.0.184+ #1
[ 14.719657] Hardware name: Intel Corp. Geminilake/GLK RVP2 LP4SD (07), BIOS GELKRVPA.X64.0214.B50.2009111159 09/11/2020
[ 14.719694] Call Trace:
[ 14.719712] dump_stack_lvl+0x38/0x49
[ 14.719749] dump_stack+0x10/0x12
[ 14.719775] ubsan_epilogue+0x9/0x45
[ 14.719801] __ubsan_handle_out_of_bounds.cold+0x44/0x49
[ 14.719835] acpi_ds_exec_end_op+0x1d7/0x6b5
[ 14.719870] acpi_ps_parse_loop+0x942/0xb34
...

Problem happens because WalkState->NumOperands is 0 and it is used when
trying to access into operands table. Actual code is:
WalkState->Operands [WalkState->NumOperands -1]
which causes out of bound access. Improve the check before above access
to check if ACPI opcode should have any arguments (operands) at all.

Link: https://github.com/acpica/acpica/pull/745
Signed-off-by: Amadeusz Sławiński <amadeuszx.slawinski@linux.intel.com>
Reviewed-by: Cezary Rojewski <cezary.rojewski@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/dswexec.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/acpica/dswexec.c b/drivers/acpi/acpica/dswexec.c
index e8ad41387f84c..b082eb942a0f8 100644
--- a/drivers/acpi/acpica/dswexec.c
+++ b/drivers/acpi/acpica/dswexec.c
@@ -389,9 +389,11 @@ acpi_status acpi_ds_exec_end_op(struct acpi_walk_state *walk_state)
 
 		/*
 		 * All opcodes require operand resolution, with the only exceptions
-		 * being the object_type and size_of operators.
+		 * being the object_type and size_of operators as well as opcodes that
+		 * take no arguments.
 		 */
-		if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE)) {
+		if (!(walk_state->op_info->flags & AML_NO_OPERAND_RESOLVE) &&
+		    (walk_state->op_info->flags & AML_HAS_ARGS)) {
 
 			/* Resolve all operands */
 
-- 
GitLab


From 7a9d74e7e403cb2e60d4d00c05f2f3ab2a33d0c3 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 8 Dec 2022 15:23:32 +0100
Subject: [PATCH 3531/4122] ACPICA: include/acpi/acpixf.h: Fix indentation

A bunch of the functions declared in include/acpi/acpixf.h have their
name aligned a space after the '(' of e.g. the
`ACPI_EXTERNAL_RETURN_STATUS(acpi_status` line above rather then being
directly aligned after the '('.

This breaks applying patches generated from the ACPICA upstream git,
remove the extra space before the function-names and all the arguments
to fix this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/acpi/acpixf.h | 120 +++++++++++++++++++++---------------------
 1 file changed, 60 insertions(+), 60 deletions(-)

diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index 9e49b37fc869f..d1329d6d526d9 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -589,82 +589,82 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_install_initialization_handler
 			    (acpi_init_handler handler, u32 function))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_install_sci_handler(acpi_sci_handler
-							  address,
-							  void *context))
-ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_remove_sci_handler(acpi_sci_handler
-							 address))
+				acpi_install_sci_handler(acpi_sci_handler
+							 address,
+							 void *context))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_install_global_event_handler
-				 (acpi_gbl_event_handler handler,
-				  void *context))
+				acpi_remove_sci_handler(acpi_sci_handler
+							address))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_install_fixed_event_handler(u32
-								  acpi_event,
-								  acpi_event_handler
-								  handler,
-								  void
-								  *context))
+				acpi_install_global_event_handler
+				(acpi_gbl_event_handler handler,
+				 void *context))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_remove_fixed_event_handler(u32 acpi_event,
+				acpi_install_fixed_event_handler(u32
+								 acpi_event,
 								 acpi_event_handler
-								 handler))
-ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_install_gpe_handler(acpi_handle
-							  gpe_device,
-							  u32 gpe_number,
-							  u32 type,
-							  acpi_gpe_handler
-							  address,
-							  void *context))
+								 handler,
+								 void
+								 *context))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_install_gpe_raw_handler(acpi_handle
-							      gpe_device,
-							      u32 gpe_number,
-							      u32 type,
-							      acpi_gpe_handler
-							      address,
-							      void *context))
+				acpi_remove_fixed_event_handler(u32 acpi_event,
+								acpi_event_handler
+								handler))
 ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
-				 acpi_remove_gpe_handler(acpi_handle gpe_device,
+				acpi_install_gpe_handler(acpi_handle
+							 gpe_device,
 							 u32 gpe_number,
+							 u32 type,
 							 acpi_gpe_handler
-							 address))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_install_notify_handler(acpi_handle device,
-							 u32 handler_type,
-							 acpi_notify_handler
-							 handler,
+							 address,
 							 void *context))
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+				acpi_install_gpe_raw_handler(acpi_handle
+							     gpe_device,
+							     u32 gpe_number,
+							     u32 type,
+							     acpi_gpe_handler
+							     address,
+							     void *context))
+ACPI_HW_DEPENDENT_RETURN_STATUS(acpi_status
+				acpi_remove_gpe_handler(acpi_handle gpe_device,
+							u32 gpe_number,
+							acpi_gpe_handler
+							address))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_remove_notify_handler(acpi_handle device,
+			    acpi_install_notify_handler(acpi_handle device,
 							u32 handler_type,
 							acpi_notify_handler
-							handler))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_install_address_space_handler(acpi_handle
-								device,
-								acpi_adr_space_type
-								space_id,
-								acpi_adr_space_handler
-								handler,
-								acpi_adr_space_setup
-								setup,
-								void *context))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_remove_address_space_handler(acpi_handle
+							handler,
+							void *context))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_remove_notify_handler(acpi_handle device,
+						       u32 handler_type,
+						       acpi_notify_handler
+						       handler))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_install_address_space_handler(acpi_handle
 							       device,
 							       acpi_adr_space_type
 							       space_id,
 							       acpi_adr_space_handler
-							       handler))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_install_exception_handler
-			     (acpi_exception_handler handler))
-ACPI_EXTERNAL_RETURN_STATUS(acpi_status
-			     acpi_install_interface_handler
-			     (acpi_interface_handler handler))
+							       handler,
+							       acpi_adr_space_setup
+							       setup,
+							       void *context))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_remove_address_space_handler(acpi_handle
+							      device,
+							      acpi_adr_space_type
+							      space_id,
+							      acpi_adr_space_handler
+							      handler))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_install_exception_handler
+			    (acpi_exception_handler handler))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_install_interface_handler
+			    (acpi_interface_handler handler))
 
 /*
  * Global Lock interfaces
-- 
GitLab


From 54c516aeb8b39eeae6450b7d8076d381568dca46 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 8 Dec 2022 15:23:33 +0100
Subject: [PATCH 3532/4122] ACPICA: Allow address_space_handler Install and
 _REG execution as 2 separate steps
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ACPI-2.0 says that the EC op_region handler must be available immediately
(like the standard default op_region handlers):

Quoting from the ACPI spec version 6.3: "6.5.4 _REG (Region) ...
2. OSPM must make Embedded Controller operation regions, accessed via
the Embedded Controllers described in ECDT, available before executing
any control method. These operation regions may become inaccessible
after OSPM runs _REG(EmbeddedControl, 0)."

So the OS must probe the ECDT described EC and install the OpRegion handler
before calling acpi_enable_subsystem() and acpi_initialize_objects().

This is a problem because calling acpi_install_address_space_handler()
does not just install the op_region handler, it also runs the EC's _REG
method. This _REG method may rely on initialization done by the _INI
methods of one of the PCI / _SB root devices.

For the other early/default op_region handlers the op_region handler
install and the _REG execution is split into 2 separate steps:
1. acpi_ev_install_region_handlers(), called early from acpi_load_tables()
2. acpi_ev_initialize_op_regions(), called from acpi_initialize_objects()

To fix the EC op_region issue, add 2 bew functions:
1. acpi_install_address_space_handler_no_reg()
2. acpi_execute_reg_methods()
to allow doing things in 2 steps for other op_region handlers,
like the EC handler, too.

Note that the comment describing acpi_ev_install_region_handlers() even has
an alinea describing this problem. Using the new methods allows users
to avoid this problem.

Link: https://github.com/acpica/acpica/pull/786
Link: https://bugzilla.kernel.org/show_bug.cgi?id=214899
Reported-and-tested-by: Johannes Penßel <johannespenssel@posteo.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpica/evxfregn.c | 92 +++++++++++++++++++++++++++++++---
 include/acpi/acpixf.h          | 10 ++++
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/acpica/evxfregn.c b/drivers/acpi/acpica/evxfregn.c
index 0a8372bf6a77d..a5c19f46ec17d 100644
--- a/drivers/acpi/acpica/evxfregn.c
+++ b/drivers/acpi/acpica/evxfregn.c
@@ -20,13 +20,14 @@ ACPI_MODULE_NAME("evxfregn")
 
 /*******************************************************************************
  *
- * FUNCTION:    acpi_install_address_space_handler
+ * FUNCTION:    acpi_install_address_space_handler_internal
  *
  * PARAMETERS:  device          - Handle for the device
  *              space_id        - The address space ID
  *              handler         - Address of the handler
  *              setup           - Address of the setup function
  *              context         - Value passed to the handler on each access
+ *              Run_reg         - Run _REG methods for this address space?
  *
  * RETURN:      Status
  *
@@ -37,13 +38,16 @@ ACPI_MODULE_NAME("evxfregn")
  * are executed here, and these methods can only be safely executed after
  * the default handlers have been installed and the hardware has been
  * initialized (via acpi_enable_subsystem.)
+ * To avoid this problem pass FALSE for Run_Reg and later on call
+ * acpi_execute_reg_methods() to execute _REG.
  *
  ******************************************************************************/
-acpi_status
-acpi_install_address_space_handler(acpi_handle device,
-				   acpi_adr_space_type space_id,
-				   acpi_adr_space_handler handler,
-				   acpi_adr_space_setup setup, void *context)
+static acpi_status
+acpi_install_address_space_handler_internal(acpi_handle device,
+					    acpi_adr_space_type space_id,
+					    acpi_adr_space_handler handler,
+					    acpi_adr_space_setup setup,
+					    void *context, u8 run_reg)
 {
 	struct acpi_namespace_node *node;
 	acpi_status status;
@@ -80,14 +84,40 @@ acpi_install_address_space_handler(acpi_handle device,
 
 	/* Run all _REG methods for this address space */
 
-	acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
+	if (run_reg) {
+		acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
+	}
 
 unlock_and_exit:
 	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
 	return_ACPI_STATUS(status);
 }
 
+acpi_status
+acpi_install_address_space_handler(acpi_handle device,
+				   acpi_adr_space_type space_id,
+				   acpi_adr_space_handler handler,
+				   acpi_adr_space_setup setup, void *context)
+{
+	return acpi_install_address_space_handler_internal(device, space_id,
+							   handler, setup,
+							   context, TRUE);
+}
+
 ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler)
+acpi_status
+acpi_install_address_space_handler_no_reg(acpi_handle device,
+					  acpi_adr_space_type space_id,
+					  acpi_adr_space_handler handler,
+					  acpi_adr_space_setup setup,
+					  void *context)
+{
+	return acpi_install_address_space_handler_internal(device, space_id,
+							   handler, setup,
+							   context, FALSE);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_install_address_space_handler_no_reg)
 
 /*******************************************************************************
  *
@@ -228,3 +258,51 @@ unlock_and_exit:
 }
 
 ACPI_EXPORT_SYMBOL(acpi_remove_address_space_handler)
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_execute_reg_methods
+ *
+ * PARAMETERS:  device          - Handle for the device
+ *              space_id        - The address space ID
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: Execute _REG for all op_regions of a given space_id.
+ *
+ ******************************************************************************/
+acpi_status
+acpi_execute_reg_methods(acpi_handle device, acpi_adr_space_type space_id)
+{
+	struct acpi_namespace_node *node;
+	acpi_status status;
+
+	ACPI_FUNCTION_TRACE(acpi_execute_reg_methods);
+
+	/* Parameter validation */
+
+	if (!device) {
+		return_ACPI_STATUS(AE_BAD_PARAMETER);
+	}
+
+	status = acpi_ut_acquire_mutex(ACPI_MTX_NAMESPACE);
+	if (ACPI_FAILURE(status)) {
+		return_ACPI_STATUS(status);
+	}
+
+	/* Convert and validate the device handle */
+
+	node = acpi_ns_validate_handle(device);
+	if (node) {
+
+		/* Run all _REG methods for this address space */
+
+		acpi_ev_execute_reg_methods(node, space_id, ACPI_REG_CONNECT);
+	} else {
+		status = AE_BAD_PARAMETER;
+	}
+
+	(void)acpi_ut_release_mutex(ACPI_MTX_NAMESPACE);
+	return_ACPI_STATUS(status);
+}
+
+ACPI_EXPORT_SYMBOL(acpi_execute_reg_methods)
diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h
index d1329d6d526d9..9778408f8db40 100644
--- a/include/acpi/acpixf.h
+++ b/include/acpi/acpixf.h
@@ -652,6 +652,16 @@ ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 							       acpi_adr_space_setup
 							       setup,
 							       void *context))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_install_address_space_handler_no_reg
+			    (acpi_handle device, acpi_adr_space_type space_id,
+			     acpi_adr_space_handler handler,
+			     acpi_adr_space_setup setup,
+			     void *context))
+ACPI_EXTERNAL_RETURN_STATUS(acpi_status
+			    acpi_execute_reg_methods(acpi_handle device,
+						     acpi_adr_space_type
+						     space_id))
 ACPI_EXTERNAL_RETURN_STATUS(acpi_status
 			    acpi_remove_address_space_handler(acpi_handle
 							      device,
-- 
GitLab


From a5072078dbfaa9d70130805766dfa34bbb7bf2a7 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 8 Dec 2022 15:23:34 +0100
Subject: [PATCH 3533/4122] ACPI: EC: Fix EC address space handler
 unregistration

When an ECDT table is present the EC address space handler gets registered
on the root node. So to unregister it properly the unregister call also
must be done on the root node.

Store the ACPI handle used for the acpi_install_address_space_handler()
call and use te same handle for the acpi_remove_address_space_handler()
call.

Reported-by: Rafael J. Wysocki <rafael@kernel.org>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c       | 4 +++-
 drivers/acpi/internal.h | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 9751b84c1b221..5a21e4d58322b 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -1475,6 +1475,7 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device)
 			return -ENODEV;
 		}
 		set_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
+		ec->address_space_handler_holder = ec->handle;
 	}
 
 	if (!device)
@@ -1526,7 +1527,8 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device)
 static void ec_remove_handlers(struct acpi_ec *ec)
 {
 	if (test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
-		if (ACPI_FAILURE(acpi_remove_address_space_handler(ec->handle,
+		if (ACPI_FAILURE(acpi_remove_address_space_handler(
+					ec->address_space_handler_holder,
 					ACPI_ADR_SPACE_EC, &acpi_ec_space_handler)))
 			pr_err("failed to remove space handler\n");
 		clear_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 219c02df9a08c..ec584442fb298 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -173,6 +173,7 @@ enum acpi_ec_event_state {
 
 struct acpi_ec {
 	acpi_handle handle;
+	acpi_handle address_space_handler_holder;
 	int gpe;
 	int irq;
 	unsigned long command_addr;
-- 
GitLab


From ab4620f58d38206687b9f99d9d2cc1d5a2640985 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Thu, 8 Dec 2022 15:23:35 +0100
Subject: [PATCH 3534/4122] ACPI: EC: Fix ECDT probe ordering issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ACPI-2.0 says that the EC OpRegion handler must be available immediately
(like the standard default OpRegion handlers):

Quoting from the ACPI spec version 6.3: "6.5.4 _REG (Region) ...
2. OSPM must make Embedded Controller operation regions, accessed via
the Embedded Controllers described in ECDT, available before executing
any control method. These operation regions may become inaccessible
after OSPM runs _REG(EmbeddedControl, 0)."

So acpi_bus_init() calls acpi_ec_ecdt_probe(), which calls
acpi_install_address_space_handler() to install the EC's OpRegion
handler, early on.

This not only installs the OpRegion handler, but also calls the EC's
_REG method. The _REG method call is a problem because it may rely on
initialization done by the _INI methods of one of the PCI / _SB root devs,
see for example: https://bugzilla.kernel.org/show_bug.cgi?id=214899 .

Generally speaking _REG methods are executed when the ACPI-device they
are part of has a driver bound to it. Where as _INI methods must be
executed at table load time (according to the spec). The problem here
is that the early acpi_install_address_space_handler() call causes
the _REG handler to run too early.

To allow fixing this the ACPICA code now allows to split the OpRegion
handler installation and the executing of _REG into 2 separate steps.

This commit uses this ACPICA functionality to fix the EC probe ordering
by delaying the executing of _REG for ECDT described ECs till the matching
EC device in the DSDT gets parsed and acpi_ec_add() for it gets called.
This moves the calling of _REG for the EC on devices with an ECDT to
the same point in time where it is called on devices without an ECDT table.

BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=214899
Reported-and-tested-by: Johannes Penßel <johannespenssel@posteo.net>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/ec.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index 5a21e4d58322b..73ac2f222897b 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -94,6 +94,7 @@ enum {
 	EC_FLAGS_QUERY_ENABLED,		/* Query is enabled */
 	EC_FLAGS_EVENT_HANDLER_INSTALLED,	/* Event handler installed */
 	EC_FLAGS_EC_HANDLER_INSTALLED,	/* OpReg handler installed */
+	EC_FLAGS_EC_REG_CALLED,		/* OpReg ACPI _REG method called */
 	EC_FLAGS_QUERY_METHODS_INSTALLED, /* _Qxx handlers installed */
 	EC_FLAGS_STARTED,		/* Driver is started */
 	EC_FLAGS_STOPPED,		/* Driver is stopped */
@@ -1446,6 +1447,7 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec)
  * ec_install_handlers - Install service callbacks and register query methods.
  * @ec: Target EC.
  * @device: ACPI device object corresponding to @ec.
+ * @call_reg: If _REG should be called to notify OpRegion availability
  *
  * Install a handler for the EC address space type unless it has been installed
  * already.  If @device is not NULL, also look for EC query methods in the
@@ -1458,7 +1460,8 @@ static bool install_gpio_irq_event_handler(struct acpi_ec *ec)
  * -EPROBE_DEFER if GPIO IRQ acquisition needs to be deferred,
  * or 0 (success) otherwise.
  */
-static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device)
+static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device,
+			       bool call_reg)
 {
 	acpi_status status;
 
@@ -1466,10 +1469,10 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device)
 
 	if (!test_bit(EC_FLAGS_EC_HANDLER_INSTALLED, &ec->flags)) {
 		acpi_ec_enter_noirq(ec);
-		status = acpi_install_address_space_handler(ec->handle,
-							    ACPI_ADR_SPACE_EC,
-							    &acpi_ec_space_handler,
-							    NULL, ec);
+		status = acpi_install_address_space_handler_no_reg(ec->handle,
+								   ACPI_ADR_SPACE_EC,
+								   &acpi_ec_space_handler,
+								   NULL, ec);
 		if (ACPI_FAILURE(status)) {
 			acpi_ec_stop(ec, false);
 			return -ENODEV;
@@ -1478,6 +1481,11 @@ static int ec_install_handlers(struct acpi_ec *ec, struct acpi_device *device)
 		ec->address_space_handler_holder = ec->handle;
 	}
 
+	if (call_reg && !test_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags)) {
+		acpi_execute_reg_methods(ec->handle, ACPI_ADR_SPACE_EC);
+		set_bit(EC_FLAGS_EC_REG_CALLED, &ec->flags);
+	}
+
 	if (!device)
 		return 0;
 
@@ -1564,11 +1572,11 @@ static void ec_remove_handlers(struct acpi_ec *ec)
 	}
 }
 
-static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device)
+static int acpi_ec_setup(struct acpi_ec *ec, struct acpi_device *device, bool call_reg)
 {
 	int ret;
 
-	ret = ec_install_handlers(ec, device);
+	ret = ec_install_handlers(ec, device, call_reg);
 	if (ret)
 		return ret;
 
@@ -1633,7 +1641,7 @@ static int acpi_ec_add(struct acpi_device *device)
 		}
 	}
 
-	ret = acpi_ec_setup(ec, device);
+	ret = acpi_ec_setup(ec, device, true);
 	if (ret)
 		goto err;
 
@@ -1753,7 +1761,7 @@ void __init acpi_ec_dsdt_probe(void)
 	 * At this point, the GPE is not fully initialized, so do not to
 	 * handle the events.
 	 */
-	ret = acpi_ec_setup(ec, NULL);
+	ret = acpi_ec_setup(ec, NULL, true);
 	if (ret) {
 		acpi_ec_free(ec);
 		return;
@@ -1947,7 +1955,7 @@ void __init acpi_ec_ecdt_probe(void)
 	 * At this point, the namespace is not initialized, so do not find
 	 * the namespace objects, or handle the events.
 	 */
-	ret = acpi_ec_setup(ec, NULL);
+	ret = acpi_ec_setup(ec, NULL, false);
 	if (ret) {
 		acpi_ec_free(ec);
 		goto out;
-- 
GitLab


From c1ddc3dad85dda4421e852c72f7596cdb10e9fc6 Mon Sep 17 00:00:00 2001
From: Michal Simek <michal.simek@amd.com>
Date: Thu, 8 Dec 2022 13:38:50 +0100
Subject: [PATCH 3535/4122] PCI: xilinx-nwl: Fix coding style violations

Fix code alignments and remove additional newline.

Link: https://lore.kernel.org/r/17c75e7003bb8c43a0f45ae3d7c45cac230ef852.1670503129.git.michal.simek@amd.com
Signed-off-by: Michal Simek <michal.simek@amd.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/controller/pcie-xilinx-nwl.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/pci/controller/pcie-xilinx-nwl.c b/drivers/pci/controller/pcie-xilinx-nwl.c
index 40d070e54ad2e..e10a58649bf57 100644
--- a/drivers/pci/controller/pcie-xilinx-nwl.c
+++ b/drivers/pci/controller/pcie-xilinx-nwl.c
@@ -474,15 +474,15 @@ static int nwl_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
 	for (i = 0; i < nr_irqs; i++) {
 		irq_domain_set_info(domain, virq + i, bit + i, &nwl_irq_chip,
-				domain->host_data, handle_simple_irq,
-				NULL, NULL);
+				    domain->host_data, handle_simple_irq,
+				    NULL, NULL);
 	}
 	mutex_unlock(&msi->lock);
 	return 0;
 }
 
 static void nwl_irq_domain_free(struct irq_domain *domain, unsigned int virq,
-					unsigned int nr_irqs)
+				unsigned int nr_irqs)
 {
 	struct irq_data *data = irq_domain_get_irq_data(domain, virq);
 	struct nwl_pcie *pcie = irq_data_get_irq_chip_data(data);
@@ -722,7 +722,6 @@ static int nwl_pcie_bridge_init(struct nwl_pcie *pcie)
 	/* Enable all misc interrupts */
 	nwl_bridge_writel(pcie, MSGF_MISC_SR_MASKALL, MSGF_MISC_MASK);
 
-
 	/* Disable all legacy interrupts */
 	nwl_bridge_writel(pcie, (u32)~MSGF_LEG_SR_MASKALL, MSGF_LEG_MASK);
 
-- 
GitLab


From 1c8a8ec0a0e9a1176022a35c4daf04fe1594d270 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Nov 2022 10:43:44 +0100
Subject: [PATCH 3536/4122] f2fs: remove struct segment_allocation
 default_salloc_ops

There is only  single instance of these ops, so remove the indirection
and call allocate_segment_by_default directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 11 ++---------
 fs/f2fs/segment.h |  6 ------
 2 files changed, 2 insertions(+), 15 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 0ff451ea18f66..bbe6556799ce9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2926,7 +2926,7 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
 		return;
 alloc:
 	old_segno = curseg->segno;
-	SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
+	allocate_segment_by_default(sbi, type, true);
 	locate_dirty_segment(sbi, old_segno);
 }
 
@@ -2957,10 +2957,6 @@ void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
 	f2fs_up_read(&SM_I(sbi)->curseg_lock);
 }
 
-static const struct segment_allocation default_salloc_ops = {
-	.allocate_segment = allocate_segment_by_default,
-};
-
 bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
 						struct cp_control *cpc)
 {
@@ -3284,7 +3280,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			get_atssr_segment(sbi, type, se->type,
 						AT_SSR, se->mtime);
 		else
-			sit_i->s_ops->allocate_segment(sbi, type, false);
+			allocate_segment_by_default(sbi, type, false);
 	}
 	/*
 	 * segment dirty status should be updated after segment allocation,
@@ -4270,9 +4266,6 @@ static int build_sit_info(struct f2fs_sb_info *sbi)
 		return -ENOMEM;
 #endif
 
-	/* init SIT information */
-	sit_i->s_ops = &default_salloc_ops;
-
 	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
 	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
 	sit_i->written_valid_blocks = 0;
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index be8f2d7d007b9..3ad1b7b6fa946 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -222,10 +222,6 @@ struct sec_entry {
 	unsigned int valid_blocks;	/* # of valid blocks in a section */
 };
 
-struct segment_allocation {
-	void (*allocate_segment)(struct f2fs_sb_info *, int, bool);
-};
-
 #define MAX_SKIP_GC_COUNT			16
 
 struct revoke_entry {
@@ -235,8 +231,6 @@ struct revoke_entry {
 };
 
 struct sit_info {
-	const struct segment_allocation *s_ops;
-
 	block_t sit_base_addr;		/* start block address of SIT area */
 	block_t sit_blocks;		/* # of blocks used by SIT area */
 	block_t written_valid_blocks;	/* # of valid blocks in main area */
-- 
GitLab


From 8442d94b8ac8d5d8300725a9ffa9def526b71170 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Nov 2022 10:43:45 +0100
Subject: [PATCH 3537/4122] f2fs: open code allocate_segment_by_default

allocate_segment_by_default has just two callers, which use very
different code pathes inside it based on the force paramter.  Just
open code the logic in the two callers using a new helper to decided
if a new segment should be allocated.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 50 +++++++++++++++++++++++------------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index bbe6556799ce9..c4e118eb7d197 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2849,31 +2849,20 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
 	return 0;
 }
 
-/*
- * flush out current segment and replace it with new segment
- * This function should be returned with success, otherwise BUG
- */
-static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
-						int type, bool force)
+static bool need_new_seg(struct f2fs_sb_info *sbi, int type)
 {
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
 
-	if (force)
-		new_curseg(sbi, type, true);
-	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
-					curseg->seg_type == CURSEG_WARM_NODE)
-		new_curseg(sbi, type, false);
-	else if (curseg->alloc_type == LFS &&
-			is_next_segment_free(sbi, curseg, type) &&
-			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
-		new_curseg(sbi, type, false);
-	else if (f2fs_need_SSR(sbi) &&
-			get_ssr_segment(sbi, type, SSR, 0))
-		change_curseg(sbi, type, true);
-	else
-		new_curseg(sbi, type, false);
-
-	stat_inc_seg_type(sbi, curseg);
+	if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
+	    curseg->seg_type == CURSEG_WARM_NODE)
+		return true;
+	if (curseg->alloc_type == LFS &&
+	    is_next_segment_free(sbi, curseg, type) &&
+	    likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
+		return true;
+	if (!f2fs_need_SSR(sbi) || !get_ssr_segment(sbi, type, SSR, 0))
+		return true;
+	return false;
 }
 
 void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
@@ -2926,7 +2915,8 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
 		return;
 alloc:
 	old_segno = curseg->segno;
-	allocate_segment_by_default(sbi, type, true);
+	new_curseg(sbi, type, true);
+	stat_inc_seg_type(sbi, curseg);
 	locate_dirty_segment(sbi, old_segno);
 }
 
@@ -3276,11 +3266,19 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 		update_sit_entry(sbi, old_blkaddr, -1);
 
 	if (!__has_curseg_space(sbi, curseg)) {
-		if (from_gc)
+		/*
+		 * Flush out current segment and replace it with new segment.
+		 */
+		if (from_gc) {
 			get_atssr_segment(sbi, type, se->type,
 						AT_SSR, se->mtime);
-		else
-			allocate_segment_by_default(sbi, type, false);
+		} else {
+			if (need_new_seg(sbi, type))
+				new_curseg(sbi, type, false);
+			else
+				change_curseg(sbi, type, true);
+			stat_inc_seg_type(sbi, curseg);
+		}
 	}
 	/*
 	 * segment dirty status should be updated after segment allocation,
-- 
GitLab


From 5bcd655fffaec24e849bda1207446f5cc821713e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 28 Nov 2022 10:43:46 +0100
Subject: [PATCH 3538/4122] f2fs: remove the unused flush argument to
 change_curseg

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index c4e118eb7d197..9486ca49ecb15 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2656,7 +2656,7 @@ bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
  * This function always allocates a used segment(from dirty seglist) by SSR
  * manner, so it should recover the existing segment information of valid blocks
  */
-static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
+static void change_curseg(struct f2fs_sb_info *sbi, int type)
 {
 	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
 	struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2664,9 +2664,7 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
 	struct f2fs_summary_block *sum_node;
 	struct page *sum_page;
 
-	if (flush)
-		write_sum_page(sbi, curseg->sum_blk,
-					GET_SUM_BLOCK(sbi, curseg->segno));
+	write_sum_page(sbi, curseg->sum_blk, GET_SUM_BLOCK(sbi, curseg->segno));
 
 	__set_test_and_inuse(sbi, new_segno);
 
@@ -2705,7 +2703,7 @@ static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
 		struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
 
 		curseg->seg_type = se->type;
-		change_curseg(sbi, type, true);
+		change_curseg(sbi, type);
 	} else {
 		/* allocate cold segment by default */
 		curseg->seg_type = CURSEG_COLD_DATA;
@@ -2880,7 +2878,7 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
 		goto unlock;
 
 	if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
-		change_curseg(sbi, type, true);
+		change_curseg(sbi, type);
 	else
 		new_curseg(sbi, type, true);
 
@@ -3276,7 +3274,7 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 			if (need_new_seg(sbi, type))
 				new_curseg(sbi, type, false);
 			else
-				change_curseg(sbi, type, true);
+				change_curseg(sbi, type);
 			stat_inc_seg_type(sbi, curseg);
 		}
 	}
@@ -3539,7 +3537,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	/* change the current segment */
 	if (segno != curseg->segno) {
 		curseg->next_segno = segno;
-		change_curseg(sbi, type, true);
+		change_curseg(sbi, type);
 	}
 
 	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
@@ -3567,7 +3565,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
 	if (recover_curseg) {
 		if (old_cursegno != curseg->segno) {
 			curseg->next_segno = old_cursegno;
-			change_curseg(sbi, type, true);
+			change_curseg(sbi, type);
 		}
 		curseg->next_blkoff = old_blkoff;
 		curseg->alloc_type = old_alloc_type;
-- 
GitLab


From 398bb30d4f4e857ee1352130c6935f0fb16d7af2 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao@kernel.org>
Date: Sat, 26 Nov 2022 10:38:07 +0800
Subject: [PATCH 3539/4122] MAINTAINERS: Add f2fs bug tracker link

As f2fs component in bugzilla.kernel.org was created and used since
2018-7.

Signed-off-by: Chao Yu <chao@kernel.org>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/filesystems/f2fs.rst | 6 +++++-
 MAINTAINERS                        | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 6e67c5e6c7c37..67e1f3e86f32f 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -25,10 +25,14 @@ a consistency checking tool (fsck.f2fs), and a debugging tool (dump.f2fs).
 
 - git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs-tools.git
 
-For reporting bugs and sending patches, please use the following mailing list:
+For sending patches, please use the following mailing list:
 
 - linux-f2fs-devel@lists.sourceforge.net
 
+For reporting bugs, please use the following f2fs bug tracker link:
+
+- https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
+
 Background and Design issues
 ============================
 
diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..01fdbb592ea70 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7828,6 +7828,7 @@ M:	Chao Yu <chao@kernel.org>
 L:	linux-f2fs-devel@lists.sourceforge.net
 S:	Maintained
 W:	https://f2fs.wiki.kernel.org/
+B:	https://bugzilla.kernel.org/enter_bug.cgi?product=File%20System&component=f2fs
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs.git
 F:	Documentation/ABI/testing/sysfs-fs-f2fs
 F:	Documentation/filesystems/f2fs.rst
-- 
GitLab


From 870af777da22505851174a34c0228042d7ed5f5f Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Fri, 25 Nov 2022 19:47:36 +0800
Subject: [PATCH 3540/4122] f2fs: do some cleanup for f2fs module init

Just for cleanup, no functional changes.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/compress.c | 46 ++++++----------------------------------------
 fs/f2fs/data.c     | 14 ++++----------
 fs/f2fs/gc.c       |  4 +---
 fs/f2fs/recovery.c |  4 +---
 fs/f2fs/super.c    |  8 ++------
 5 files changed, 14 insertions(+), 62 deletions(-)

diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 74d3f2d2271f3..9723f0bed9237 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -567,10 +567,7 @@ MODULE_PARM_DESC(num_compress_pages,
 int f2fs_init_compress_mempool(void)
 {
 	compress_page_pool = mempool_create_page_pool(num_compress_pages, 0);
-	if (!compress_page_pool)
-		return -ENOMEM;
-
-	return 0;
+	return compress_page_pool ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_compress_mempool(void)
@@ -1983,9 +1980,7 @@ int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
 
 	sbi->page_array_slab = f2fs_kmem_cache_create(slab_name,
 					sbi->page_array_slab_size);
-	if (!sbi->page_array_slab)
-		return -ENOMEM;
-	return 0;
+	return sbi->page_array_slab ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
@@ -1993,53 +1988,24 @@ void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
 	kmem_cache_destroy(sbi->page_array_slab);
 }
 
-static int __init f2fs_init_cic_cache(void)
+int __init f2fs_init_compress_cache(void)
 {
 	cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry",
 					sizeof(struct compress_io_ctx));
 	if (!cic_entry_slab)
 		return -ENOMEM;
-	return 0;
-}
-
-static void f2fs_destroy_cic_cache(void)
-{
-	kmem_cache_destroy(cic_entry_slab);
-}
-
-static int __init f2fs_init_dic_cache(void)
-{
 	dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry",
 					sizeof(struct decompress_io_ctx));
 	if (!dic_entry_slab)
-		return -ENOMEM;
-	return 0;
-}
-
-static void f2fs_destroy_dic_cache(void)
-{
-	kmem_cache_destroy(dic_entry_slab);
-}
-
-int __init f2fs_init_compress_cache(void)
-{
-	int err;
-
-	err = f2fs_init_cic_cache();
-	if (err)
-		goto out;
-	err = f2fs_init_dic_cache();
-	if (err)
 		goto free_cic;
 	return 0;
 free_cic:
-	f2fs_destroy_cic_cache();
-out:
+	kmem_cache_destroy(cic_entry_slab);
 	return -ENOMEM;
 }
 
 void f2fs_destroy_compress_cache(void)
 {
-	f2fs_destroy_dic_cache();
-	f2fs_destroy_cic_cache();
+	kmem_cache_destroy(dic_entry_slab);
+	kmem_cache_destroy(cic_entry_slab);
 }
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 560fa80590e99..35c19248b1e2d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -39,10 +39,8 @@ static struct bio_set f2fs_bioset;
 
 int __init f2fs_init_bioset(void)
 {
-	if (bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
-					0, BIOSET_NEED_BVECS))
-		return -ENOMEM;
-	return 0;
+	return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
+					0, BIOSET_NEED_BVECS);
 }
 
 void f2fs_destroy_bioset(void)
@@ -4090,9 +4088,7 @@ int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
 	sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
 						 WQ_UNBOUND | WQ_HIGHPRI,
 						 num_online_cpus());
-	if (!sbi->post_read_wq)
-		return -ENOMEM;
-	return 0;
+	return sbi->post_read_wq ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
@@ -4105,9 +4101,7 @@ int __init f2fs_init_bio_entry_cache(void)
 {
 	bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
 			sizeof(struct bio_entry));
-	if (!bio_entry_slab)
-		return -ENOMEM;
-	return 0;
+	return bio_entry_slab ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_bio_entry_cache(void)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f1b68eda2235d..d19e26b2e875e 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1904,9 +1904,7 @@ int __init f2fs_create_garbage_collection_cache(void)
 {
 	victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
 					sizeof(struct victim_entry));
-	if (!victim_entry_slab)
-		return -ENOMEM;
-	return 0;
+	return victim_entry_slab ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_garbage_collection_cache(void)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index dea95b48b647d..77fd453949b1b 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -923,9 +923,7 @@ int __init f2fs_create_recovery_cache(void)
 {
 	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
 					sizeof(struct fsync_inode_entry));
-	if (!fsync_entry_slab)
-		return -ENOMEM;
-	return 0;
+	return fsync_entry_slab ? 0 : -ENOMEM;
 }
 
 void f2fs_destroy_recovery_cache(void)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index daf14b55a9729..a5f6f632cf7cf 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -288,9 +288,7 @@ static int __init f2fs_create_casefold_cache(void)
 {
 	f2fs_cf_name_slab = f2fs_kmem_cache_create("f2fs_casefolded_name",
 							F2FS_NAME_LEN);
-	if (!f2fs_cf_name_slab)
-		return -ENOMEM;
-	return 0;
+	return f2fs_cf_name_slab ? 0 : -ENOMEM;
 }
 
 static void f2fs_destroy_casefold_cache(void)
@@ -4647,9 +4645,7 @@ static int __init init_inodecache(void)
 	f2fs_inode_cachep = kmem_cache_create("f2fs_inode_cache",
 			sizeof(struct f2fs_inode_info), 0,
 			SLAB_RECLAIM_ACCOUNT|SLAB_ACCOUNT, NULL);
-	if (!f2fs_inode_cachep)
-		return -ENOMEM;
-	return 0;
+	return f2fs_inode_cachep ? 0 : -ENOMEM;
 }
 
 static void destroy_inodecache(void)
-- 
GitLab


From 5fb45f95eec682621748b7cb012c6a8f0f981e6a Mon Sep 17 00:00:00 2001
From: Qingfang DENG <dqfext@gmail.com>
Date: Thu, 8 Dec 2022 20:35:29 +0800
Subject: [PATCH 3541/4122] netfilter: flowtable: really fix NAT IPv6 offload

The for-loop was broken from the start. It translates to:

	for (i = 0; i < 4; i += 4)

which means the loop statement is run only once, so only the highest
32-bit of the IPv6 address gets mangled.

Fix the loop increment.

Fixes: 0e07e25b481a ("netfilter: flowtable: fix NAT IPv6 offload mangling")
Fixes: 5c27d8d76ce8 ("netfilter: nf_flow_table_offload: add IPv6 support")
Signed-off-by: Qingfang DENG <dqfext@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_flow_table_offload.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 0fdcdb2c9ae43..4d9b99abe37d6 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -383,12 +383,12 @@ static void flow_offload_ipv6_mangle(struct nf_flow_rule *flow_rule,
 				     const __be32 *addr, const __be32 *mask)
 {
 	struct flow_action_entry *entry;
-	int i, j;
+	int i;
 
-	for (i = 0, j = 0; i < sizeof(struct in6_addr) / sizeof(u32); i += sizeof(u32), j++) {
+	for (i = 0; i < sizeof(struct in6_addr) / sizeof(u32); i++) {
 		entry = flow_action_entry_next(flow_rule);
 		flow_offload_mangle(entry, FLOW_ACT_MANGLE_HDR_TYPE_IP6,
-				    offset + i, &addr[j], mask);
+				    offset + i * sizeof(u32), &addr[i], mask);
 	}
 }
 
-- 
GitLab


From 4bd1d80efb5af640f99157f39b50fb11326ce641 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Date: Mon, 29 Aug 2022 23:52:19 +0300
Subject: [PATCH 3542/4122] riscv: mm: notify remote harts about mmu cache
 updates

Current implementation of update_mmu_cache function performs local TLB
flush. It does not take into account ASID information. Besides, it does
not take into account other harts currently running the same mm context
or possible migration of the running context to other harts. Meanwhile
TLB flush is not performed for every context switch if ASID support
is enabled.

Patch [1] proposed to add ASID support to update_mmu_cache to avoid
flushing local TLB entirely. This patch takes into account other
harts currently running the same mm context as well as possible
migration of this context to other harts.

For this purpose the approach from flush_icache_mm is reused. Remote
harts currently running the same mm context are informed via SBI calls
that they need to flush their local TLBs. All the other harts are marked
as needing a deferred TLB flush when this mm context runs on them.

[1] https://lore.kernel.org/linux-riscv/20220821013926.8968-1-tjytimi@163.com/

Signed-off-by: Sergey Matyukevich <sergey.matyukevich@syntacore.com>
Fixes: 65d4b9c53017 ("RISC-V: Implement ASID allocator")
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/linux-riscv/20220829205219.283543-1-geomatsi@gmail.com/#t
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/mmu.h      |  2 ++
 arch/riscv/include/asm/pgtable.h  |  2 +-
 arch/riscv/include/asm/tlbflush.h | 18 ++++++++++++++++++
 arch/riscv/mm/context.c           | 10 ++++++++++
 arch/riscv/mm/tlbflush.c          | 28 +++++++++++-----------------
 5 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/arch/riscv/include/asm/mmu.h b/arch/riscv/include/asm/mmu.h
index 0099dc1161683..5ff1f19fd45c2 100644
--- a/arch/riscv/include/asm/mmu.h
+++ b/arch/riscv/include/asm/mmu.h
@@ -19,6 +19,8 @@ typedef struct {
 #ifdef CONFIG_SMP
 	/* A local icache flush is needed before user execution can resume. */
 	cpumask_t icache_stale_mask;
+	/* A local tlb flush is needed before user execution can resume. */
+	cpumask_t tlb_stale_mask;
 #endif
 } mm_context_t;
 
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index c61ae83aadee8..2359f1f9bda9c 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -415,7 +415,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma,
 	 * Relying on flush_tlb_fix_spurious_fault would suffice, but
 	 * the extra traps reduce performance.  So, eagerly SFENCE.VMA.
 	 */
-	local_flush_tlb_page(address);
+	flush_tlb_page(vma, address);
 }
 
 #define __HAVE_ARCH_UPDATE_MMU_TLB
diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h
index 801019381dea3..907b9efd39a87 100644
--- a/arch/riscv/include/asm/tlbflush.h
+++ b/arch/riscv/include/asm/tlbflush.h
@@ -22,6 +22,24 @@ static inline void local_flush_tlb_page(unsigned long addr)
 {
 	ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
 }
+
+static inline void local_flush_tlb_all_asid(unsigned long asid)
+{
+	__asm__ __volatile__ ("sfence.vma x0, %0"
+			:
+			: "r" (asid)
+			: "memory");
+}
+
+static inline void local_flush_tlb_page_asid(unsigned long addr,
+		unsigned long asid)
+{
+	__asm__ __volatile__ ("sfence.vma %0, %1"
+			:
+			: "r" (addr), "r" (asid)
+			: "memory");
+}
+
 #else /* CONFIG_MMU */
 #define local_flush_tlb_all()			do { } while (0)
 #define local_flush_tlb_page(addr)		do { } while (0)
diff --git a/arch/riscv/mm/context.c b/arch/riscv/mm/context.c
index 7acbfbd14557e..80ce9caba8d22 100644
--- a/arch/riscv/mm/context.c
+++ b/arch/riscv/mm/context.c
@@ -196,6 +196,16 @@ switch_mm_fast:
 
 	if (need_flush_tlb)
 		local_flush_tlb_all();
+#ifdef CONFIG_SMP
+	else {
+		cpumask_t *mask = &mm->context.tlb_stale_mask;
+
+		if (cpumask_test_cpu(cpu, mask)) {
+			cpumask_clear_cpu(cpu, mask);
+			local_flush_tlb_all_asid(cntx & asid_mask);
+		}
+	}
+#endif
 }
 
 static void set_mm_noasid(struct mm_struct *mm)
diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c
index 37ed760d007c3..ce7dfc81bb3fe 100644
--- a/arch/riscv/mm/tlbflush.c
+++ b/arch/riscv/mm/tlbflush.c
@@ -5,23 +5,7 @@
 #include <linux/sched.h>
 #include <asm/sbi.h>
 #include <asm/mmu_context.h>
-
-static inline void local_flush_tlb_all_asid(unsigned long asid)
-{
-	__asm__ __volatile__ ("sfence.vma x0, %0"
-			:
-			: "r" (asid)
-			: "memory");
-}
-
-static inline void local_flush_tlb_page_asid(unsigned long addr,
-		unsigned long asid)
-{
-	__asm__ __volatile__ ("sfence.vma %0, %1"
-			:
-			: "r" (addr), "r" (asid)
-			: "memory");
-}
+#include <asm/tlbflush.h>
 
 void flush_tlb_all(void)
 {
@@ -31,6 +15,7 @@ void flush_tlb_all(void)
 static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
 				  unsigned long size, unsigned long stride)
 {
+	struct cpumask *pmask = &mm->context.tlb_stale_mask;
 	struct cpumask *cmask = mm_cpumask(mm);
 	unsigned int cpuid;
 	bool broadcast;
@@ -44,6 +29,15 @@ static void __sbi_tlb_flush_range(struct mm_struct *mm, unsigned long start,
 	if (static_branch_unlikely(&use_asid_allocator)) {
 		unsigned long asid = atomic_long_read(&mm->context.id);
 
+		/*
+		 * TLB will be immediately flushed on harts concurrently
+		 * executing this MM context. TLB flush on other harts
+		 * is deferred until this MM context migrates there.
+		 */
+		cpumask_setall(pmask);
+		cpumask_clear_cpu(cpuid, pmask);
+		cpumask_andnot(pmask, pmask, cmask);
+
 		if (broadcast) {
 			sbi_remote_sfence_vma_asid(cmask, start, size, asid);
 		} else if (size <= stride) {
-- 
GitLab


From b0f4c74eadbf69a3298f38566bfaa2e202541f2f Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@rivosinc.com>
Date: Fri, 11 Nov 2022 17:31:08 -0500
Subject: [PATCH 3543/4122] RISC-V: Fix unannoted hardirqs-on in return to
 userspace slow-path

The return to userspace path in entry.S may enable interrupts without the
corresponding lockdep annotation, producing a splat[0] when DEBUG_LOCKDEP
is enabled. Simply calling __trace_hardirqs_on() here gets a bit messy
due to the use of RA to point back to ret_from_exception, so just move
the whole slow-path loop into C. It's more readable and it lets us use
local_irq_{enable,disable}(), avoiding the need for manual annotations
altogether.

[0]:
  ------------[ cut here ]------------
  DEBUG_LOCKS_WARN_ON(!lockdep_hardirqs_enabled())
  WARNING: CPU: 2 PID: 1 at kernel/locking/lockdep.c:5512 check_flags+0x10a/0x1e0
  Modules linked in:
  CPU: 2 PID: 1 Comm: init Not tainted 6.1.0-rc4-00160-gb56b6e2b4f31 #53
  Hardware name: riscv-virtio,qemu (DT)
  epc : check_flags+0x10a/0x1e0
  ra : check_flags+0x10a/0x1e0
  <snip>
   status: 0000000200000100 badaddr: 0000000000000000 cause: 0000000000000003
  [<ffffffff808edb90>] lock_is_held_type+0x78/0x14e
  [<ffffffff8003dae2>] __might_resched+0x26/0x22c
  [<ffffffff8003dd24>] __might_sleep+0x3c/0x66
  [<ffffffff80022c60>] get_signal+0x9e/0xa70
  [<ffffffff800054a2>] do_notify_resume+0x6e/0x422
  [<ffffffff80003c68>] ret_from_exception+0x0/0x10
  irq event stamp: 44512
  hardirqs last  enabled at (44511): [<ffffffff808f901c>] _raw_spin_unlock_irqrestore+0x54/0x62
  hardirqs last disabled at (44512): [<ffffffff80008200>] __trace_hardirqs_off+0xc/0x14
  softirqs last  enabled at (44472): [<ffffffff808f9fbe>] __do_softirq+0x3de/0x51e
  softirqs last disabled at (44467): [<ffffffff80017760>] irq_exit+0xd6/0x104
  ---[ end trace 0000000000000000 ]---
  possible reason: unannotated irqs-on.

Signed-off-by: Andrew Bresticker <abrestic@rivosinc.com>
Fixes: 3c4697982982 ("riscv: Enable LOCKDEP_SUPPORT & fixup TRACE_IRQFLAGS_SUPPORT")
Link: https://lore.kernel.org/r/20221111223108.1976562-1-abrestic@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/entry.S  | 18 +++++-------------
 arch/riscv/kernel/signal.c | 34 +++++++++++++++++++++-------------
 2 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index b9eda3fcbd6d7..58dfa8595e19d 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -263,12 +263,11 @@ ret_from_exception:
 #endif
 	bnez s0, resume_kernel
 
-resume_userspace:
 	/* Interrupts must be disabled here so flags are checked atomically */
 	REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
 	andi s1, s0, _TIF_WORK_MASK
-	bnez s1, work_pending
-
+	bnez s1, resume_userspace_slow
+resume_userspace:
 #ifdef CONFIG_CONTEXT_TRACKING_USER
 	call user_enter_callable
 #endif
@@ -368,19 +367,12 @@ resume_kernel:
 	j restore_all
 #endif
 
-work_pending:
+resume_userspace_slow:
 	/* Enter slow path for supplementary processing */
-	la ra, ret_from_exception
-	andi s1, s0, _TIF_NEED_RESCHED
-	bnez s1, work_resched
-work_notifysig:
-	/* Handle pending signals and notify-resume requests */
-	csrs CSR_STATUS, SR_IE /* Enable interrupts for do_notify_resume() */
 	move a0, sp /* pt_regs */
 	move a1, s0 /* current_thread_info->flags */
-	tail do_notify_resume
-work_resched:
-	tail schedule
+	call do_work_pending
+	j resume_userspace
 
 /* Slow paths for ptrace. */
 handle_syscall_trace_enter:
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index 5c591123c4409..bfb2afa4135f8 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -313,19 +313,27 @@ static void do_signal(struct pt_regs *regs)
 }
 
 /*
- * notification of userspace execution resumption
- * - triggered by the _TIF_WORK_MASK flags
+ * Handle any pending work on the resume-to-userspace path, as indicated by
+ * _TIF_WORK_MASK. Entered from assembly with IRQs off.
  */
-asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
-					   unsigned long thread_info_flags)
+asmlinkage __visible void do_work_pending(struct pt_regs *regs,
+					  unsigned long thread_info_flags)
 {
-	if (thread_info_flags & _TIF_UPROBE)
-		uprobe_notify_resume(regs);
-
-	/* Handle pending signal delivery */
-	if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
-		do_signal(regs);
-
-	if (thread_info_flags & _TIF_NOTIFY_RESUME)
-		resume_user_mode_work(regs);
+	do {
+		if (thread_info_flags & _TIF_NEED_RESCHED) {
+			schedule();
+		} else {
+			local_irq_enable();
+			if (thread_info_flags & _TIF_UPROBE)
+				uprobe_notify_resume(regs);
+			/* Handle pending signal delivery */
+			if (thread_info_flags & (_TIF_SIGPENDING |
+						 _TIF_NOTIFY_SIGNAL))
+				do_signal(regs);
+			if (thread_info_flags & _TIF_NOTIFY_RESUME)
+				resume_user_mode_work(regs);
+		}
+		local_irq_disable();
+		thread_info_flags = read_thread_flags();
+	} while (thread_info_flags & _TIF_WORK_MASK);
 }
-- 
GitLab


From b91676fc16cd384a81e3af52c641aa61985cc231 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 14 Nov 2022 14:35:34 +0530
Subject: [PATCH 3544/4122] RISC-V: Fix MEMREMAP_WB for systems with Svpbmt

Currently, the memremap() called with MEMREMAP_WB maps memory using
the generic ioremap() function which breaks on system with Svpbmt
because memory mapped using _PAGE_IOREMAP page attributes is treated
as strongly-ordered non-cacheable IO memory.

To address this, we implement RISC-V specific arch_memremap_wb()
which maps memory using _PAGE_KERNEL page attributes resulting in
write-back cacheable mapping on systems with Svpbmt.

Fixes: ff689fd21cb1 ("riscv: add RISC-V Svpbmt extension support")
Co-developed-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221114090536.1662624-2-apatel@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/io.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/riscv/include/asm/io.h b/arch/riscv/include/asm/io.h
index 92080a2279372..42497d487a174 100644
--- a/arch/riscv/include/asm/io.h
+++ b/arch/riscv/include/asm/io.h
@@ -135,4 +135,9 @@ __io_writes_outs(outs, u64, q, __io_pbr(), __io_paw())
 
 #include <asm-generic/io.h>
 
+#ifdef CONFIG_MMU
+#define arch_memremap_wb(addr, size)	\
+	((__force void *)ioremap_prot((addr), (size), _PAGE_KERNEL))
+#endif
+
 #endif /* _ASM_RISCV_IO_H */
-- 
GitLab


From a49ab905a1fc8630a94221f9a06ce0dafb266576 Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 14 Nov 2022 14:35:35 +0530
Subject: [PATCH 3545/4122] RISC-V: Implement arch specific PMEM APIs

The NVDIMM PMEM driver expects arch specific APIs for cache maintenance
and if arch does not provide these APIs then NVDIMM PMEM driver will
always use MEMREMAP_WT to map persistent memory which in-turn maps as
UC memory type defined by the RISC-V Svpbmt specification.

Now that the Svpbmt and Zicbom support is available in RISC-V kernel,
we implement PMEM APIs using ALT_CMO_OP() macros so that the NVDIMM
PMEM driver can use MEMREMAP_WB to map persistent memory.

Co-developed-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Mayuresh Chitale <mchitale@ventanamicro.com>
Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221114090536.1662624-3-apatel@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig     |  1 +
 arch/riscv/mm/Makefile |  1 +
 arch/riscv/mm/pmem.c   | 21 +++++++++++++++++++++
 3 files changed, 23 insertions(+)
 create mode 100644 arch/riscv/mm/pmem.c

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6b48a3ae98439..025e2a1b1c60d 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -25,6 +25,7 @@ config RISCV
 	select ARCH_HAS_GIGANTIC_PAGE
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_MMIOWB
+	select ARCH_HAS_PMEM_API
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_SET_DIRECT_MAP if MMU
 	select ARCH_HAS_SET_MEMORY if MMU
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index d76aabf4b94d6..b4f35da889bfb 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -13,6 +13,7 @@ obj-y += extable.o
 obj-$(CONFIG_MMU) += fault.o pageattr.o
 obj-y += cacheflush.o
 obj-y += context.o
+obj-y += pmem.o
 
 ifeq ($(CONFIG_MMU),y)
 obj-$(CONFIG_SMP) += tlbflush.o
diff --git a/arch/riscv/mm/pmem.c b/arch/riscv/mm/pmem.c
new file mode 100644
index 0000000000000..089df92ae8760
--- /dev/null
+++ b/arch/riscv/mm/pmem.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Ventana Micro Systems Inc.
+ */
+
+#include <linux/export.h>
+#include <linux/libnvdimm.h>
+
+#include <asm/cacheflush.h>
+
+void arch_wb_cache_pmem(void *addr, size_t size)
+{
+	ALT_CMO_OP(clean, addr, size, riscv_cbom_block_size);
+}
+EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
+
+void arch_invalidate_pmem(void *addr, size_t size)
+{
+	ALT_CMO_OP(inval, addr, size, riscv_cbom_block_size);
+}
+EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
-- 
GitLab


From 497bcbe3ce0466123a834f2777a8a762bd5d7aae Mon Sep 17 00:00:00 2001
From: Anup Patel <apatel@ventanamicro.com>
Date: Mon, 14 Nov 2022 14:35:36 +0530
Subject: [PATCH 3546/4122] RISC-V: Enable PMEM drivers

We now have PMEM arch support available in RISC-V kernel so let us
enable relevant drivers in defconfig.

Signed-off-by: Anup Patel <apatel@ventanamicro.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221114090536.1662624-4-apatel@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/configs/defconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig
index 05fd5fcf24f9b..462da9f7410d0 100644
--- a/arch/riscv/configs/defconfig
+++ b/arch/riscv/configs/defconfig
@@ -159,6 +159,7 @@ CONFIG_VIRTIO_MMIO=y
 CONFIG_RPMSG_CHAR=y
 CONFIG_RPMSG_CTRL=y
 CONFIG_RPMSG_VIRTIO=y
+CONFIG_LIBNVDIMM=y
 CONFIG_EXT4_FS=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
-- 
GitLab


From fdb1742aff436399f5769a7559bbb71c7f37a85f Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 18 Nov 2022 10:42:59 +0000
Subject: [PATCH 3547/4122] irqchip/sifive-plic: remove user selectability of
 SIFIVE_PLIC

The SiFive PLIC driver is used by all current implementations, including
those that do not have a SiFive PLIC. The current driver supports more
than just SiFive PLICs at present and, where possible, future PLIC
implementations will also use this driver. As every supported RISC-V SoC
selects the driver directly in Kconfig.socs there's no point in exposing
this kconfig option to users.

The Kconfig help text, in its current form, is misleading. There's no
point doing anything about that though, as it will no longer be user
selectable. Remove it.

Suggested-by: Marc Zyngier <maz@kernel.org>
Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118104300.85016-2-conor@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 drivers/irqchip/Kconfig | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 7ef9f5e696d31..ecb3e3119d2e1 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -551,18 +551,10 @@ config RISCV_INTC
 	   If you don't know what to do here, say Y.
 
 config SIFIVE_PLIC
-	bool "SiFive Platform-Level Interrupt Controller"
+	bool
 	depends on RISCV
 	select IRQ_DOMAIN_HIERARCHY
 	select GENERIC_IRQ_EFFECTIVE_AFF_MASK if SMP
-	help
-	   This enables support for the PLIC chip found in SiFive (and
-	   potentially other) RISC-V systems.  The PLIC controls devices
-	   interrupts and connects them to each core's local interrupt
-	   controller.  Aside from timer and software interrupts, all other
-	   interrupt sources are subordinate to the PLIC.
-
-	   If you don't know what to do here, say Y.
 
 config EXYNOS_IRQ_COMBINER
 	bool "Samsung Exynos IRQ combiner support" if COMPILE_TEST
-- 
GitLab


From d8fb13070c3c99b6a17b75fda28943f9261e23e7 Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 18 Nov 2022 10:43:00 +0000
Subject: [PATCH 3548/4122] irqchip/riscv-intc: remove user selectability of
 RISCV_INTC

Since commit e71ee06e3ca3 ("RISC-V: Force select RISCV_INTC for
CONFIG_RISCV") the driver has been enabled at the arch level - and is
mandatory anyway. There's no point exposing this as a choice to users,
so stop bothering.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118104300.85016-3-conor@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 drivers/irqchip/Kconfig | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index ecb3e3119d2e1..4633a549ebbf6 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -538,17 +538,8 @@ config TI_PRUSS_INTC
 	  different processors within the SoC.
 
 config RISCV_INTC
-	bool "RISC-V Local Interrupt Controller"
+	bool
 	depends on RISCV
-	default y
-	help
-	   This enables support for the per-HART local interrupt controller
-	   found in standard RISC-V systems.  The per-HART local interrupt
-	   controller handles timer interrupts, software interrupts, and
-	   hardware interrupts. Without a per-HART local interrupt controller,
-	   a RISC-V system will be unable to handle any interrupts.
-
-	   If you don't know what to do here, say Y.
 
 config SIFIVE_PLIC
 	bool
-- 
GitLab


From bf3d7b1d8499ca46874c7373d2043ecbe252cccc Mon Sep 17 00:00:00 2001
From: Conor Dooley <conor.dooley@microchip.com>
Date: Fri, 18 Nov 2022 10:43:01 +0000
Subject: [PATCH 3549/4122] RISC-V: stop selecting SIFIVE_PLIC at the SoC level

The SIFIVE_PLIC driver is used by all current RISC-V SoCs & will be,
where possible, used for future implementations. Rather than having each
driver select the option on a case-by-case basis, do so at the arch
level.

Signed-off-by: Conor Dooley <conor.dooley@microchip.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20221118104300.85016-4-conor@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig      | 1 +
 arch/riscv/Kconfig.socs | 5 -----
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6b48a3ae98439..3ee67dc4e98bf 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -123,6 +123,7 @@ config RISCV
 	select PCI_MSI if PCI
 	select RISCV_INTC
 	select RISCV_TIMER if RISCV_SBI
+	select SIFIVE_PLIC
 	select SPARSE_IRQ
 	select SYSCTL_EXCEPTION_TRACE
 	select THREAD_INFO_IN_TASK
diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs
index 69774bb362d6a..15e391f38f758 100644
--- a/arch/riscv/Kconfig.socs
+++ b/arch/riscv/Kconfig.socs
@@ -3,7 +3,6 @@ menu "SoC selection"
 config SOC_MICROCHIP_POLARFIRE
 	bool "Microchip PolarFire SoCs"
 	select MCHP_CLK_MPFS
-	select SIFIVE_PLIC
 	help
 	  This enables support for Microchip PolarFire SoC platforms.
 
@@ -13,7 +12,6 @@ config SOC_SIFIVE
 	select SERIAL_SIFIVE_CONSOLE if TTY
 	select CLK_SIFIVE
 	select CLK_SIFIVE_PRCI
-	select SIFIVE_PLIC
 	select ERRATA_SIFIVE if !XIP_KERNEL
 	help
 	  This enables support for SiFive SoC platform hardware.
@@ -22,7 +20,6 @@ config SOC_STARFIVE
 	bool "StarFive SoCs"
 	select PINCTRL
 	select RESET_CONTROLLER
-	select SIFIVE_PLIC
 	help
 	  This enables support for StarFive SoC platform hardware.
 
@@ -34,7 +31,6 @@ config SOC_VIRT
 	select POWER_RESET_SYSCON_POWEROFF
 	select GOLDFISH
 	select RTC_DRV_GOLDFISH if RTC_CLASS
-	select SIFIVE_PLIC
 	select PM_GENERIC_DOMAINS if PM
 	select PM_GENERIC_DOMAINS_OF if PM && OF
 	select RISCV_SBI_CPUIDLE if CPU_IDLE && RISCV_SBI
@@ -47,7 +43,6 @@ config SOC_CANAAN
 	select CLINT_TIMER if RISCV_M_MODE
 	select SERIAL_SIFIVE if TTY
 	select SERIAL_SIFIVE_CONSOLE if TTY
-	select SIFIVE_PLIC
 	select ARCH_HAS_RESET_CONTROLLER
 	select PINCTRL
 	select COMMON_CLK
-- 
GitLab


From de59b6ed0618b909be78f6bc60874a57dd016063 Mon Sep 17 00:00:00 2001
From: Jisheng Zhang <jszhang@kernel.org>
Date: Wed, 23 Nov 2022 23:02:57 +0800
Subject: [PATCH 3550/4122] riscv: boot: add zstd support

Support build the zstd compressed Image.zst. Similar as other
compressed formats, the Image.zst is not self-decompressing and
the bootloader still needs to handle decompression before
launching the kernel image.

Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20221123150257.3108-1-jszhang@kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/boot/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index d1a49adcb1d76..c72de7232abb2 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -56,6 +56,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE
 $(obj)/Image.lzo: $(obj)/Image FORCE
 	$(call if_changed,lzo)
 
+$(obj)/Image.zst: $(obj)/Image FORCE
+	$(call if_changed,zstd)
+
 $(obj)/loader.bin: $(obj)/loader FORCE
 	$(call if_changed,objcopy)
 
-- 
GitLab


From 0c49688174f5347c3f8012e84c0ffa0d2b2890c8 Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sat, 26 Nov 2022 00:09:19 -0600
Subject: [PATCH 3551/4122] riscv: Fix crash during early errata patching

The patch function for the T-Head PBMT errata calls __pa_symbol() before
relocation. This crashes when CONFIG_DEBUG_VIRTUAL is enabled, because
__pa_symbol() forwards to __phys_addr_symbol(), and __phys_addr_symbol()
checks against the absolute kernel start/end address.

Fix this by checking against the kernel map instead of a symbol address.

Fixes: a35707c3d850 ("riscv: add memory-type errata for T-Head")
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Tested-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221126060920.65009-1-samuel@sholland.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/physaddr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index 19cf25a74ee29..9b18bda74154e 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -22,7 +22,7 @@ EXPORT_SYMBOL(__virt_to_phys);
 phys_addr_t __phys_addr_symbol(unsigned long x)
 {
 	unsigned long kernel_start = kernel_map.virt_addr;
-	unsigned long kernel_end = (unsigned long)_end;
+	unsigned long kernel_end = kernel_start + kernel_map.size;
 
 	/*
 	 * Boundary checking aginst the kernel image mapping.
-- 
GitLab


From 583286e2072ed25c31b7db14d69fdf03f1fae7ba Mon Sep 17 00:00:00 2001
From: Samuel Holland <samuel@sholland.org>
Date: Sat, 26 Nov 2022 00:09:20 -0600
Subject: [PATCH 3552/4122] riscv: Move cast inside
 kernel_mapping_[pv]a_to_[vp]a

Before commit 44c922572952 ("RISC-V: enable XIP"), these macros cast
their argument to unsigned long. That commit moved the cast after an
assignment to an unsigned long variable, rendering it ineffectual.
Move the cast back, so we can remove the cast at each call site.

Reviewed-by: Alexandre Ghiti <alexandre.ghiti@canonical.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Signed-off-by: Samuel Holland <samuel@sholland.org>
Link: https://lore.kernel.org/r/20221126060920.65009-2-samuel@sholland.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/page.h | 18 +++++++++---------
 arch/riscv/mm/init.c          | 16 ++++++++--------
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index ac70b0fd9a9a3..9f432c1b52899 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -123,20 +123,20 @@ extern phys_addr_t phys_ram_base;
 	((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
 
 #define linear_mapping_pa_to_va(x)	((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
-#define kernel_mapping_pa_to_va(y)	({						\
-	unsigned long _y = y;								\
-	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ?					\
-		(void *)((unsigned long)(_y) + kernel_map.va_kernel_xip_pa_offset) :		\
-		(void *)((unsigned long)(_y) + kernel_map.va_kernel_pa_offset + XIP_OFFSET);	\
+#define kernel_mapping_pa_to_va(y)	({					\
+	unsigned long _y = (unsigned long)(y);					\
+	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ?			\
+		(void *)(_y + kernel_map.va_kernel_xip_pa_offset) :		\
+		(void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET);	\
 	})
 #define __pa_to_va_nodebug(x)		linear_mapping_pa_to_va(x)
 
 #define linear_mapping_va_to_pa(x)	((unsigned long)(x) - kernel_map.va_pa_offset)
 #define kernel_mapping_va_to_pa(y) ({						\
-	unsigned long _y = y;							\
-	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ?	\
-		((unsigned long)(_y) - kernel_map.va_kernel_xip_pa_offset) :		\
-		((unsigned long)(_y) - kernel_map.va_kernel_pa_offset - XIP_OFFSET);	\
+	unsigned long _y = (unsigned long)(y);					\
+	(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
+		(_y - kernel_map.va_kernel_xip_pa_offset) :			\
+		(_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET);		\
 	})
 
 #define __va_to_pa_nodebug(x)	({						\
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index b56a0a75533fe..7d59516ce6b39 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -927,15 +927,15 @@ static void __init pt_ops_set_early(void)
  */
 static void __init pt_ops_set_fixmap(void)
 {
-	pt_ops.alloc_pte = kernel_mapping_pa_to_va((uintptr_t)alloc_pte_fixmap);
-	pt_ops.get_pte_virt = kernel_mapping_pa_to_va((uintptr_t)get_pte_virt_fixmap);
+	pt_ops.alloc_pte = kernel_mapping_pa_to_va(alloc_pte_fixmap);
+	pt_ops.get_pte_virt = kernel_mapping_pa_to_va(get_pte_virt_fixmap);
 #ifndef __PAGETABLE_PMD_FOLDED
-	pt_ops.alloc_pmd = kernel_mapping_pa_to_va((uintptr_t)alloc_pmd_fixmap);
-	pt_ops.get_pmd_virt = kernel_mapping_pa_to_va((uintptr_t)get_pmd_virt_fixmap);
-	pt_ops.alloc_pud = kernel_mapping_pa_to_va((uintptr_t)alloc_pud_fixmap);
-	pt_ops.get_pud_virt = kernel_mapping_pa_to_va((uintptr_t)get_pud_virt_fixmap);
-	pt_ops.alloc_p4d = kernel_mapping_pa_to_va((uintptr_t)alloc_p4d_fixmap);
-	pt_ops.get_p4d_virt = kernel_mapping_pa_to_va((uintptr_t)get_p4d_virt_fixmap);
+	pt_ops.alloc_pmd = kernel_mapping_pa_to_va(alloc_pmd_fixmap);
+	pt_ops.get_pmd_virt = kernel_mapping_pa_to_va(get_pmd_virt_fixmap);
+	pt_ops.alloc_pud = kernel_mapping_pa_to_va(alloc_pud_fixmap);
+	pt_ops.get_pud_virt = kernel_mapping_pa_to_va(get_pud_virt_fixmap);
+	pt_ops.alloc_p4d = kernel_mapping_pa_to_va(alloc_p4d_fixmap);
+	pt_ops.get_p4d_virt = kernel_mapping_pa_to_va(get_p4d_virt_fixmap);
 #endif
 }
 
-- 
GitLab


From e8b9a055fa0481679132781db574ecb771960f16 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Wed, 7 Dec 2022 21:48:07 +0000
Subject: [PATCH 3553/4122] KVM: arm64: selftests: Align VA space allocator
 with TTBR0

An interesting feature of the Arm architecture is that the stage-1 MMU
supports two distinct VA regions, controlled by TTBR{0,1}_EL1. As KVM
selftests on arm64 only uses TTBR0_EL1, the VA space is constrained to
[0, 2^(va_bits-1)). This is different from other architectures that
allow for addressing low and high regions of the VA space from a single
page table.

KVM selftests' VA space allocator presumes the valid address range is
split between low and high memory based the MSB, which of course is a
poor match for arm64's TTBR0 region.

Allow architectures to override the default VA space layout. Make use of
the override to align vpages_valid with the behavior of TTBR0 on arm64.

Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Message-Id: <20221207214809.489070-4-oliver.upton@linux.dev>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 .../testing/selftests/kvm/include/kvm_util_base.h |  1 +
 .../testing/selftests/kvm/lib/aarch64/processor.c | 10 ++++++++++
 tools/testing/selftests/kvm/lib/kvm_util.c        | 15 ++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
index 37500c92dd0a6..2e267cd692889 100644
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -408,6 +408,7 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
 void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
 void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
 struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
 vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
 vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 316de70db91da..5972a23b27654 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -541,3 +541,13 @@ void kvm_selftest_arch_init(void)
 	 */
 	guest_modes_append_default();
 }
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+	/*
+	 * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+	 * is [0, 2^(64 - TCR_EL1.T0SZ)).
+	 */
+	sparsebit_set_num(vm->vpages_valid, 0,
+			  (1ULL << vm->va_bits) >> vm->page_shift);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e9607eb089bee..c88c3ace16d2f 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -186,6 +186,15 @@ const struct vm_guest_mode_params vm_guest_mode_params[] = {
 _Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
 	       "Missing new mode params?");
 
+__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+	sparsebit_set_num(vm->vpages_valid,
+		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	sparsebit_set_num(vm->vpages_valid,
+		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+}
+
 struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 {
 	struct kvm_vm *vm;
@@ -274,11 +283,7 @@ struct kvm_vm *____vm_create(enum vm_guest_mode mode)
 
 	/* Limit to VA-bit canonical virtual addresses. */
 	vm->vpages_valid = sparsebit_alloc();
-	sparsebit_set_num(vm->vpages_valid,
-		0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
-	sparsebit_set_num(vm->vpages_valid,
-		(~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
-		(1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+	vm_vaddr_populate_bitmap(vm);
 
 	/* Limit physical addresses to PA-bits. */
 	vm->max_gfn = vm_compute_max_gfn(vm);
-- 
GitLab


From 2afc1fbbdab2aee831561f09f859989dcd5ed648 Mon Sep 17 00:00:00 2001
From: Oliver Upton <oliver.upton@linux.dev>
Date: Wed, 7 Dec 2022 21:48:08 +0000
Subject: [PATCH 3554/4122] KVM: selftests: Allocate ucall pool from
 MEM_REGION_DATA

MEM_REGION_TEST_DATA is meant to hold data explicitly used by a
selftest, not implicit allocations due to the selftests infrastructure.
Allocate the ucall pool from MEM_REGION_DATA much like the rest of the
selftests library allocations.

Fixes: 426729b2cf2e ("KVM: selftests: Add ucall pool based implementation")
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
Message-Id: <20221207214809.489070-5-oliver.upton@linux.dev>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 tools/testing/selftests/kvm/lib/ucall_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index fcae96461e460..72420171c0d47 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -22,7 +22,7 @@ void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
 	vm_vaddr_t vaddr;
 	int i;
 
-	vaddr = vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR);
+	vaddr = __vm_vaddr_alloc(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR, MEM_REGION_DATA);
 	hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
 	memset(hdr, 0, sizeof(*hdr));
 
-- 
GitLab


From e9612987e437b7ada686f472c7596686fabecb2b Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:12 +0000
Subject: [PATCH 3555/4122] crypto: qat - relocate bufferlist logic

Move the logic that maps, unmaps and converts scatterlists into QAT
bufferlists from qat_algs.c to a new module, qat_bl.
This is to allow reuse of the logic by the data compression service.

This commit does not implement any functional change.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/Makefile   |   3 +-
 drivers/crypto/qat/qat_common/qat_algs.c | 184 +--------------------
 drivers/crypto/qat/qat_common/qat_bl.c   | 194 +++++++++++++++++++++++
 drivers/crypto/qat/qat_common/qat_bl.h   |  17 ++
 4 files changed, 214 insertions(+), 184 deletions(-)
 create mode 100644 drivers/crypto/qat/qat_common/qat_bl.c
 create mode 100644 drivers/crypto/qat/qat_common/qat_bl.h

diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index 80919cfcc29da..b0587d03eac29 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -19,7 +19,8 @@ intel_qat-objs := adf_cfg.o \
 	qat_asym_algs.o \
 	qat_algs_send.o \
 	qat_uclo.o \
-	qat_hal.o
+	qat_hal.o \
+	qat_bl.o
 
 intel_qat-$(CONFIG_DEBUG_FS) += adf_transport_debug.o
 intel_qat-$(CONFIG_PCI_IOV) += adf_sriov.o adf_vf_isr.o adf_pfvf_utils.o \
diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index cad9c58caab13..2ee4fa64032fa 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -23,6 +23,7 @@
 #include "icp_qat_hw.h"
 #include "icp_qat_fw.h"
 #include "icp_qat_fw_la.h"
+#include "qat_bl.h"
 
 #define QAT_AES_HW_CONFIG_ENC(alg, mode) \
 	ICP_QAT_HW_CIPHER_CONFIG_BUILD(mode, alg, \
@@ -663,189 +664,6 @@ static int qat_alg_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 		return qat_alg_aead_newkey(tfm, key, keylen);
 }
 
-static void qat_alg_free_bufl(struct qat_crypto_instance *inst,
-			      struct qat_crypto_request *qat_req)
-{
-	struct device *dev = &GET_DEV(inst->accel_dev);
-	struct qat_alg_buf_list *bl = qat_req->buf.bl;
-	struct qat_alg_buf_list *blout = qat_req->buf.blout;
-	dma_addr_t blp = qat_req->buf.blp;
-	dma_addr_t blpout = qat_req->buf.bloutp;
-	size_t sz = qat_req->buf.sz;
-	size_t sz_out = qat_req->buf.sz_out;
-	int bl_dma_dir;
-	int i;
-
-	bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
-
-	for (i = 0; i < bl->num_bufs; i++)
-		dma_unmap_single(dev, bl->bufers[i].addr,
-				 bl->bufers[i].len, bl_dma_dir);
-
-	dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
-
-	if (!qat_req->buf.sgl_src_valid)
-		kfree(bl);
-
-	if (blp != blpout) {
-		/* If out of place operation dma unmap only data */
-		int bufless = blout->num_bufs - blout->num_mapped_bufs;
-
-		for (i = bufless; i < blout->num_bufs; i++) {
-			dma_unmap_single(dev, blout->bufers[i].addr,
-					 blout->bufers[i].len,
-					 DMA_FROM_DEVICE);
-		}
-		dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
-
-		if (!qat_req->buf.sgl_dst_valid)
-			kfree(blout);
-	}
-}
-
-static int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
-			       struct scatterlist *sgl,
-			       struct scatterlist *sglout,
-			       struct qat_crypto_request *qat_req,
-			       gfp_t flags)
-{
-	struct device *dev = &GET_DEV(inst->accel_dev);
-	int i, sg_nctr = 0;
-	int n = sg_nents(sgl);
-	struct qat_alg_buf_list *bufl;
-	struct qat_alg_buf_list *buflout = NULL;
-	dma_addr_t blp = DMA_MAPPING_ERROR;
-	dma_addr_t bloutp = DMA_MAPPING_ERROR;
-	struct scatterlist *sg;
-	size_t sz_out, sz = struct_size(bufl, bufers, n);
-	int node = dev_to_node(&GET_DEV(inst->accel_dev));
-	int bufl_dma_dir;
-
-	if (unlikely(!n))
-		return -EINVAL;
-
-	qat_req->buf.sgl_src_valid = false;
-	qat_req->buf.sgl_dst_valid = false;
-
-	if (n > QAT_MAX_BUFF_DESC) {
-		bufl = kzalloc_node(sz, flags, node);
-		if (unlikely(!bufl))
-			return -ENOMEM;
-	} else {
-		bufl = &qat_req->buf.sgl_src.sgl_hdr;
-		memset(bufl, 0, sizeof(struct qat_alg_buf_list));
-		qat_req->buf.sgl_src_valid = true;
-	}
-
-	bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
-
-	for_each_sg(sgl, sg, n, i)
-		bufl->bufers[i].addr = DMA_MAPPING_ERROR;
-
-	for_each_sg(sgl, sg, n, i) {
-		int y = sg_nctr;
-
-		if (!sg->length)
-			continue;
-
-		bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg),
-						      sg->length,
-						      bufl_dma_dir);
-		bufl->bufers[y].len = sg->length;
-		if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr)))
-			goto err_in;
-		sg_nctr++;
-	}
-	bufl->num_bufs = sg_nctr;
-	blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(dev, blp)))
-		goto err_in;
-	qat_req->buf.bl = bufl;
-	qat_req->buf.blp = blp;
-	qat_req->buf.sz = sz;
-	/* Handle out of place operation */
-	if (sgl != sglout) {
-		struct qat_alg_buf *bufers;
-
-		n = sg_nents(sglout);
-		sz_out = struct_size(buflout, bufers, n);
-		sg_nctr = 0;
-
-		if (n > QAT_MAX_BUFF_DESC) {
-			buflout = kzalloc_node(sz_out, flags, node);
-			if (unlikely(!buflout))
-				goto err_in;
-		} else {
-			buflout = &qat_req->buf.sgl_dst.sgl_hdr;
-			memset(buflout, 0, sizeof(struct qat_alg_buf_list));
-			qat_req->buf.sgl_dst_valid = true;
-		}
-
-		bufers = buflout->bufers;
-		for_each_sg(sglout, sg, n, i)
-			bufers[i].addr = DMA_MAPPING_ERROR;
-
-		for_each_sg(sglout, sg, n, i) {
-			int y = sg_nctr;
-
-			if (!sg->length)
-				continue;
-
-			bufers[y].addr = dma_map_single(dev, sg_virt(sg),
-							sg->length,
-							DMA_FROM_DEVICE);
-			if (unlikely(dma_mapping_error(dev, bufers[y].addr)))
-				goto err_out;
-			bufers[y].len = sg->length;
-			sg_nctr++;
-		}
-		buflout->num_bufs = sg_nctr;
-		buflout->num_mapped_bufs = sg_nctr;
-		bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(dev, bloutp)))
-			goto err_out;
-		qat_req->buf.blout = buflout;
-		qat_req->buf.bloutp = bloutp;
-		qat_req->buf.sz_out = sz_out;
-	} else {
-		/* Otherwise set the src and dst to the same address */
-		qat_req->buf.bloutp = qat_req->buf.blp;
-		qat_req->buf.sz_out = 0;
-	}
-	return 0;
-
-err_out:
-	if (!dma_mapping_error(dev, bloutp))
-		dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE);
-
-	n = sg_nents(sglout);
-	for (i = 0; i < n; i++)
-		if (!dma_mapping_error(dev, buflout->bufers[i].addr))
-			dma_unmap_single(dev, buflout->bufers[i].addr,
-					 buflout->bufers[i].len,
-					 DMA_FROM_DEVICE);
-
-	if (!qat_req->buf.sgl_dst_valid)
-		kfree(buflout);
-
-err_in:
-	if (!dma_mapping_error(dev, blp))
-		dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
-
-	n = sg_nents(sgl);
-	for (i = 0; i < n; i++)
-		if (!dma_mapping_error(dev, bufl->bufers[i].addr))
-			dma_unmap_single(dev, bufl->bufers[i].addr,
-					 bufl->bufers[i].len,
-					 bufl_dma_dir);
-
-	if (!qat_req->buf.sgl_src_valid)
-		kfree(bufl);
-
-	dev_err(dev, "Failed to map buf for dma\n");
-	return -ENOMEM;
-}
-
 static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 				  struct qat_crypto_request *qat_req)
 {
diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
new file mode 100644
index 0000000000000..6d0a39f8ce109
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2014 - 2022 Intel Corporation */
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include "adf_accel_devices.h"
+#include "qat_bl.h"
+#include "qat_crypto.h"
+
+void qat_alg_free_bufl(struct qat_crypto_instance *inst,
+		       struct qat_crypto_request *qat_req)
+{
+	struct device *dev = &GET_DEV(inst->accel_dev);
+	struct qat_alg_buf_list *bl = qat_req->buf.bl;
+	struct qat_alg_buf_list *blout = qat_req->buf.blout;
+	dma_addr_t blp = qat_req->buf.blp;
+	dma_addr_t blpout = qat_req->buf.bloutp;
+	size_t sz = qat_req->buf.sz;
+	size_t sz_out = qat_req->buf.sz_out;
+	int bl_dma_dir;
+	int i;
+
+	bl_dma_dir = blp != blpout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+
+	for (i = 0; i < bl->num_bufs; i++)
+		dma_unmap_single(dev, bl->bufers[i].addr,
+				 bl->bufers[i].len, bl_dma_dir);
+
+	dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
+
+	if (!qat_req->buf.sgl_src_valid)
+		kfree(bl);
+
+	if (blp != blpout) {
+		/* If out of place operation dma unmap only data */
+		int bufless = blout->num_bufs - blout->num_mapped_bufs;
+
+		for (i = bufless; i < blout->num_bufs; i++) {
+			dma_unmap_single(dev, blout->bufers[i].addr,
+					 blout->bufers[i].len,
+					 DMA_FROM_DEVICE);
+		}
+		dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
+
+		if (!qat_req->buf.sgl_dst_valid)
+			kfree(blout);
+	}
+}
+
+int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+			struct scatterlist *sgl,
+			struct scatterlist *sglout,
+			struct qat_crypto_request *qat_req,
+			gfp_t flags)
+{
+	struct device *dev = &GET_DEV(inst->accel_dev);
+	int i, sg_nctr = 0;
+	int n = sg_nents(sgl);
+	struct qat_alg_buf_list *bufl;
+	struct qat_alg_buf_list *buflout = NULL;
+	dma_addr_t blp = DMA_MAPPING_ERROR;
+	dma_addr_t bloutp = DMA_MAPPING_ERROR;
+	struct scatterlist *sg;
+	size_t sz_out, sz = struct_size(bufl, bufers, n);
+	int node = dev_to_node(&GET_DEV(inst->accel_dev));
+	int bufl_dma_dir;
+
+	if (unlikely(!n))
+		return -EINVAL;
+
+	qat_req->buf.sgl_src_valid = false;
+	qat_req->buf.sgl_dst_valid = false;
+
+	if (n > QAT_MAX_BUFF_DESC) {
+		bufl = kzalloc_node(sz, flags, node);
+		if (unlikely(!bufl))
+			return -ENOMEM;
+	} else {
+		bufl = &qat_req->buf.sgl_src.sgl_hdr;
+		memset(bufl, 0, sizeof(struct qat_alg_buf_list));
+		qat_req->buf.sgl_src_valid = true;
+	}
+
+	bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+
+	for_each_sg(sgl, sg, n, i)
+		bufl->bufers[i].addr = DMA_MAPPING_ERROR;
+
+	for_each_sg(sgl, sg, n, i) {
+		int y = sg_nctr;
+
+		if (!sg->length)
+			continue;
+
+		bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg),
+						      sg->length,
+						      bufl_dma_dir);
+		bufl->bufers[y].len = sg->length;
+		if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr)))
+			goto err_in;
+		sg_nctr++;
+	}
+	bufl->num_bufs = sg_nctr;
+	blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, blp)))
+		goto err_in;
+	qat_req->buf.bl = bufl;
+	qat_req->buf.blp = blp;
+	qat_req->buf.sz = sz;
+	/* Handle out of place operation */
+	if (sgl != sglout) {
+		struct qat_alg_buf *bufers;
+
+		n = sg_nents(sglout);
+		sz_out = struct_size(buflout, bufers, n);
+		sg_nctr = 0;
+
+		if (n > QAT_MAX_BUFF_DESC) {
+			buflout = kzalloc_node(sz_out, flags, node);
+			if (unlikely(!buflout))
+				goto err_in;
+		} else {
+			buflout = &qat_req->buf.sgl_dst.sgl_hdr;
+			memset(buflout, 0, sizeof(struct qat_alg_buf_list));
+			qat_req->buf.sgl_dst_valid = true;
+		}
+
+		bufers = buflout->bufers;
+		for_each_sg(sglout, sg, n, i)
+			bufers[i].addr = DMA_MAPPING_ERROR;
+
+		for_each_sg(sglout, sg, n, i) {
+			int y = sg_nctr;
+
+			if (!sg->length)
+				continue;
+
+			bufers[y].addr = dma_map_single(dev, sg_virt(sg),
+							sg->length,
+							DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(dev, bufers[y].addr)))
+				goto err_out;
+			bufers[y].len = sg->length;
+			sg_nctr++;
+		}
+		buflout->num_bufs = sg_nctr;
+		buflout->num_mapped_bufs = sg_nctr;
+		bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(dev, bloutp)))
+			goto err_out;
+		qat_req->buf.blout = buflout;
+		qat_req->buf.bloutp = bloutp;
+		qat_req->buf.sz_out = sz_out;
+	} else {
+		/* Otherwise set the src and dst to the same address */
+		qat_req->buf.bloutp = qat_req->buf.blp;
+		qat_req->buf.sz_out = 0;
+	}
+	return 0;
+
+err_out:
+	if (!dma_mapping_error(dev, bloutp))
+		dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE);
+
+	n = sg_nents(sglout);
+	for (i = 0; i < n; i++)
+		if (!dma_mapping_error(dev, buflout->bufers[i].addr))
+			dma_unmap_single(dev, buflout->bufers[i].addr,
+					 buflout->bufers[i].len,
+					 DMA_FROM_DEVICE);
+
+	if (!qat_req->buf.sgl_dst_valid)
+		kfree(buflout);
+
+err_in:
+	if (!dma_mapping_error(dev, blp))
+		dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
+
+	n = sg_nents(sgl);
+	for (i = 0; i < n; i++)
+		if (!dma_mapping_error(dev, bufl->bufers[i].addr))
+			dma_unmap_single(dev, bufl->bufers[i].addr,
+					 bufl->bufers[i].len,
+					 bufl_dma_dir);
+
+	if (!qat_req->buf.sgl_src_valid)
+		kfree(bufl);
+
+	dev_err(dev, "Failed to map buf for dma\n");
+	return -ENOMEM;
+}
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
new file mode 100644
index 0000000000000..7a916f1ec645f
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2014 - 2022 Intel Corporation */
+#ifndef QAT_BL_H
+#define QAT_BL_H
+#include <linux/scatterlist.h>
+#include <linux/types.h>
+#include "qat_crypto.h"
+
+void qat_alg_free_bufl(struct qat_crypto_instance *inst,
+		       struct qat_crypto_request *qat_req);
+int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
+			struct scatterlist *sgl,
+			struct scatterlist *sglout,
+			struct qat_crypto_request *qat_req,
+			gfp_t flags);
+
+#endif
-- 
GitLab


From b0cd997f35598c4fc01bf22061e1eb88fc10afad Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:13 +0000
Subject: [PATCH 3556/4122] crypto: qat - rename bufferlist functions

Rename the functions qat_alg_sgl_to_bufl() and qat_alg_free_bufl() as
qat_bl_sgl_to_bufl() and qat_bl_free_bufl() after their relocation into
the qat_bl module.

This commit does not implement any functional change.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_algs.c | 20 ++++++++++----------
 drivers/crypto/qat/qat_common/qat_bl.c   | 14 +++++++-------
 drivers/crypto/qat/qat_common/qat_bl.h   | 14 +++++++-------
 3 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 2ee4fa64032fa..3e7e9fffe28b3 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -673,7 +673,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	u8 stat_filed = qat_resp->comn_resp.comn_status;
 	int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
-	qat_alg_free_bufl(inst, qat_req);
+	qat_bl_free_bufl(inst, qat_req);
 	if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		res = -EBADMSG;
 	areq->base.complete(&areq->base, res);
@@ -743,7 +743,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	u8 stat_filed = qat_resp->comn_resp.comn_status;
 	int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
-	qat_alg_free_bufl(inst, qat_req);
+	qat_bl_free_bufl(inst, qat_req);
 	if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		res = -EINVAL;
 
@@ -799,7 +799,7 @@ static int qat_alg_aead_dec(struct aead_request *areq)
 	if (cipher_len % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -821,7 +821,7 @@ static int qat_alg_aead_dec(struct aead_request *areq)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
 	if (ret == -ENOSPC)
-		qat_alg_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst, qat_req);
 
 	return ret;
 }
@@ -842,7 +842,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 	if (areq->cryptlen % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	ret = qat_alg_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -866,7 +866,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
 	if (ret == -ENOSPC)
-		qat_alg_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst, qat_req);
 
 	return ret;
 }
@@ -1027,7 +1027,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 	if (req->cryptlen == 0)
 		return 0;
 
-	ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1048,7 +1048,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
 	if (ret == -ENOSPC)
-		qat_alg_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst, qat_req);
 
 	return ret;
 }
@@ -1093,7 +1093,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 	if (req->cryptlen == 0)
 		return 0;
 
-	ret = qat_alg_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1115,7 +1115,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
 	if (ret == -ENOSPC)
-		qat_alg_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst, qat_req);
 
 	return ret;
 }
diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
index 6d0a39f8ce109..8f7743f3c89b9 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -10,8 +10,8 @@
 #include "qat_bl.h"
 #include "qat_crypto.h"
 
-void qat_alg_free_bufl(struct qat_crypto_instance *inst,
-		       struct qat_crypto_request *qat_req)
+void qat_bl_free_bufl(struct qat_crypto_instance *inst,
+		      struct qat_crypto_request *qat_req)
 {
 	struct device *dev = &GET_DEV(inst->accel_dev);
 	struct qat_alg_buf_list *bl = qat_req->buf.bl;
@@ -50,11 +50,11 @@ void qat_alg_free_bufl(struct qat_crypto_instance *inst,
 	}
 }
 
-int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
-			struct scatterlist *sgl,
-			struct scatterlist *sglout,
-			struct qat_crypto_request *qat_req,
-			gfp_t flags)
+int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
+		       struct scatterlist *sgl,
+		       struct scatterlist *sglout,
+		       struct qat_crypto_request *qat_req,
+		       gfp_t flags)
 {
 	struct device *dev = &GET_DEV(inst->accel_dev);
 	int i, sg_nctr = 0;
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index 7a916f1ec645f..ed4c200ac6197 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -6,12 +6,12 @@
 #include <linux/types.h>
 #include "qat_crypto.h"
 
-void qat_alg_free_bufl(struct qat_crypto_instance *inst,
-		       struct qat_crypto_request *qat_req);
-int qat_alg_sgl_to_bufl(struct qat_crypto_instance *inst,
-			struct scatterlist *sgl,
-			struct scatterlist *sglout,
-			struct qat_crypto_request *qat_req,
-			gfp_t flags);
+void qat_bl_free_bufl(struct qat_crypto_instance *inst,
+		      struct qat_crypto_request *qat_req);
+int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
+		       struct scatterlist *sgl,
+		       struct scatterlist *sglout,
+		       struct qat_crypto_request *qat_req,
+		       gfp_t flags);
 
 #endif
-- 
GitLab


From 3ed330d0dba61d2e08a0eed7aa3d5def3f0c749b Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:14 +0000
Subject: [PATCH 3557/4122] crypto: qat - change bufferlist logic interface

The functions qat_alg_sgl_to_bufl() and qat_alg_free_bufl() take as
argument a qat_crypto_instance and a qat_crypto_request structure.
These two structures are used only to get a reference to the
adf_accel_dev and qat_crypto_request_buffs.

In order to reuse these functions for the compression service, change
the signature so that they take adf_accel_dev and
qat_crypto_request_buffs.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_algs.c | 24 +++++----
 drivers/crypto/qat/qat_common/qat_bl.c   | 62 ++++++++++++------------
 drivers/crypto/qat/qat_common/qat_bl.h   |  8 +--
 3 files changed, 49 insertions(+), 45 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index 3e7e9fffe28b3..dfa65e42db781 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -673,7 +673,7 @@ static void qat_aead_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	u8 stat_filed = qat_resp->comn_resp.comn_status;
 	int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
-	qat_bl_free_bufl(inst, qat_req);
+	qat_bl_free_bufl(inst->accel_dev, &qat_req->buf);
 	if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		res = -EBADMSG;
 	areq->base.complete(&areq->base, res);
@@ -743,7 +743,7 @@ static void qat_skcipher_alg_callback(struct icp_qat_fw_la_resp *qat_resp,
 	u8 stat_filed = qat_resp->comn_resp.comn_status;
 	int res = 0, qat_res = ICP_QAT_FW_COMN_RESP_CRYPTO_STAT_GET(stat_filed);
 
-	qat_bl_free_bufl(inst, qat_req);
+	qat_bl_free_bufl(inst->accel_dev, &qat_req->buf);
 	if (unlikely(qat_res != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		res = -EINVAL;
 
@@ -799,7 +799,8 @@ static int qat_alg_aead_dec(struct aead_request *areq)
 	if (cipher_len % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst,
+				 &qat_req->buf, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -821,7 +822,7 @@ static int qat_alg_aead_dec(struct aead_request *areq)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
 	if (ret == -ENOSPC)
-		qat_bl_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf);
 
 	return ret;
 }
@@ -842,7 +843,8 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 	if (areq->cryptlen % AES_BLOCK_SIZE != 0)
 		return -EINVAL;
 
-	ret = qat_bl_sgl_to_bufl(ctx->inst, areq->src, areq->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst,
+				 &qat_req->buf, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -866,7 +868,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &areq->base);
 	if (ret == -ENOSPC)
-		qat_bl_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf);
 
 	return ret;
 }
@@ -1027,7 +1029,8 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 	if (req->cryptlen == 0)
 		return 0;
 
-	ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst,
+				 &qat_req->buf, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1048,7 +1051,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
 	if (ret == -ENOSPC)
-		qat_bl_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf);
 
 	return ret;
 }
@@ -1093,7 +1096,8 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 	if (req->cryptlen == 0)
 		return 0;
 
-	ret = qat_bl_sgl_to_bufl(ctx->inst, req->src, req->dst, qat_req, f);
+	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst,
+				 &qat_req->buf, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1115,7 +1119,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 
 	ret = qat_alg_send_sym_message(qat_req, ctx->inst, &req->base);
 	if (ret == -ENOSPC)
-		qat_bl_free_bufl(ctx->inst, qat_req);
+		qat_bl_free_bufl(ctx->inst->accel_dev, &qat_req->buf);
 
 	return ret;
 }
diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
index 8f7743f3c89b9..5e319887f8d69 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -10,16 +10,16 @@
 #include "qat_bl.h"
 #include "qat_crypto.h"
 
-void qat_bl_free_bufl(struct qat_crypto_instance *inst,
-		      struct qat_crypto_request *qat_req)
+void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
+		      struct qat_crypto_request_buffs *buf)
 {
-	struct device *dev = &GET_DEV(inst->accel_dev);
-	struct qat_alg_buf_list *bl = qat_req->buf.bl;
-	struct qat_alg_buf_list *blout = qat_req->buf.blout;
-	dma_addr_t blp = qat_req->buf.blp;
-	dma_addr_t blpout = qat_req->buf.bloutp;
-	size_t sz = qat_req->buf.sz;
-	size_t sz_out = qat_req->buf.sz_out;
+	struct device *dev = &GET_DEV(accel_dev);
+	struct qat_alg_buf_list *bl = buf->bl;
+	struct qat_alg_buf_list *blout = buf->blout;
+	dma_addr_t blp = buf->blp;
+	dma_addr_t blpout = buf->bloutp;
+	size_t sz = buf->sz;
+	size_t sz_out = buf->sz_out;
 	int bl_dma_dir;
 	int i;
 
@@ -31,7 +31,7 @@ void qat_bl_free_bufl(struct qat_crypto_instance *inst,
 
 	dma_unmap_single(dev, blp, sz, DMA_TO_DEVICE);
 
-	if (!qat_req->buf.sgl_src_valid)
+	if (!buf->sgl_src_valid)
 		kfree(bl);
 
 	if (blp != blpout) {
@@ -45,18 +45,18 @@ void qat_bl_free_bufl(struct qat_crypto_instance *inst,
 		}
 		dma_unmap_single(dev, blpout, sz_out, DMA_TO_DEVICE);
 
-		if (!qat_req->buf.sgl_dst_valid)
+		if (!buf->sgl_dst_valid)
 			kfree(blout);
 	}
 }
 
-int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
+int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct scatterlist *sgl,
 		       struct scatterlist *sglout,
-		       struct qat_crypto_request *qat_req,
+		       struct qat_crypto_request_buffs *buf,
 		       gfp_t flags)
 {
-	struct device *dev = &GET_DEV(inst->accel_dev);
+	struct device *dev = &GET_DEV(accel_dev);
 	int i, sg_nctr = 0;
 	int n = sg_nents(sgl);
 	struct qat_alg_buf_list *bufl;
@@ -65,23 +65,23 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
 	dma_addr_t bloutp = DMA_MAPPING_ERROR;
 	struct scatterlist *sg;
 	size_t sz_out, sz = struct_size(bufl, bufers, n);
-	int node = dev_to_node(&GET_DEV(inst->accel_dev));
+	int node = dev_to_node(&GET_DEV(accel_dev));
 	int bufl_dma_dir;
 
 	if (unlikely(!n))
 		return -EINVAL;
 
-	qat_req->buf.sgl_src_valid = false;
-	qat_req->buf.sgl_dst_valid = false;
+	buf->sgl_src_valid = false;
+	buf->sgl_dst_valid = false;
 
 	if (n > QAT_MAX_BUFF_DESC) {
 		bufl = kzalloc_node(sz, flags, node);
 		if (unlikely(!bufl))
 			return -ENOMEM;
 	} else {
-		bufl = &qat_req->buf.sgl_src.sgl_hdr;
+		bufl = &buf->sgl_src.sgl_hdr;
 		memset(bufl, 0, sizeof(struct qat_alg_buf_list));
-		qat_req->buf.sgl_src_valid = true;
+		buf->sgl_src_valid = true;
 	}
 
 	bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
@@ -107,9 +107,9 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
 	blp = dma_map_single(dev, bufl, sz, DMA_TO_DEVICE);
 	if (unlikely(dma_mapping_error(dev, blp)))
 		goto err_in;
-	qat_req->buf.bl = bufl;
-	qat_req->buf.blp = blp;
-	qat_req->buf.sz = sz;
+	buf->bl = bufl;
+	buf->blp = blp;
+	buf->sz = sz;
 	/* Handle out of place operation */
 	if (sgl != sglout) {
 		struct qat_alg_buf *bufers;
@@ -123,9 +123,9 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
 			if (unlikely(!buflout))
 				goto err_in;
 		} else {
-			buflout = &qat_req->buf.sgl_dst.sgl_hdr;
+			buflout = &buf->sgl_dst.sgl_hdr;
 			memset(buflout, 0, sizeof(struct qat_alg_buf_list));
-			qat_req->buf.sgl_dst_valid = true;
+			buf->sgl_dst_valid = true;
 		}
 
 		bufers = buflout->bufers;
@@ -151,13 +151,13 @@ int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
 		bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(dev, bloutp)))
 			goto err_out;
-		qat_req->buf.blout = buflout;
-		qat_req->buf.bloutp = bloutp;
-		qat_req->buf.sz_out = sz_out;
+		buf->blout = buflout;
+		buf->bloutp = bloutp;
+		buf->sz_out = sz_out;
 	} else {
 		/* Otherwise set the src and dst to the same address */
-		qat_req->buf.bloutp = qat_req->buf.blp;
-		qat_req->buf.sz_out = 0;
+		buf->bloutp = buf->blp;
+		buf->sz_out = 0;
 	}
 	return 0;
 
@@ -172,7 +172,7 @@ err_out:
 					 buflout->bufers[i].len,
 					 DMA_FROM_DEVICE);
 
-	if (!qat_req->buf.sgl_dst_valid)
+	if (!buf->sgl_dst_valid)
 		kfree(buflout);
 
 err_in:
@@ -186,7 +186,7 @@ err_in:
 					 bufl->bufers[i].len,
 					 bufl_dma_dir);
 
-	if (!qat_req->buf.sgl_src_valid)
+	if (!buf->sgl_src_valid)
 		kfree(bufl);
 
 	dev_err(dev, "Failed to map buf for dma\n");
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index ed4c200ac6197..241299c219dd5 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -6,12 +6,12 @@
 #include <linux/types.h>
 #include "qat_crypto.h"
 
-void qat_bl_free_bufl(struct qat_crypto_instance *inst,
-		      struct qat_crypto_request *qat_req);
-int qat_bl_sgl_to_bufl(struct qat_crypto_instance *inst,
+void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
+		      struct qat_crypto_request_buffs *buf);
+int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct scatterlist *sgl,
 		       struct scatterlist *sglout,
-		       struct qat_crypto_request *qat_req,
+		       struct qat_crypto_request_buffs *buf,
 		       gfp_t flags);
 
 #endif
-- 
GitLab


From 36ebc7472afeb58f1eb1d4c1f0546b9e98acea46 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:15 +0000
Subject: [PATCH 3558/4122] crypto: qat - generalize crypto request buffers

The structure qat_crypto_request_buffs which contains the source and
destination buffer lists and correspondent sizes and dma addresses is
also required for the compression service.
Rename it as qat_request_buffs and move it to qat_bl.h.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_bl.c     |  4 +--
 drivers/crypto/qat/qat_common/qat_bl.h     | 38 ++++++++++++++++++++--
 drivers/crypto/qat/qat_common/qat_crypto.h | 36 ++------------------
 3 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
index 5e319887f8d69..c32b12d386f0a 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -11,7 +11,7 @@
 #include "qat_crypto.h"
 
 void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
-		      struct qat_crypto_request_buffs *buf)
+		      struct qat_request_buffs *buf)
 {
 	struct device *dev = &GET_DEV(accel_dev);
 	struct qat_alg_buf_list *bl = buf->bl;
@@ -53,7 +53,7 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
 int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct scatterlist *sgl,
 		       struct scatterlist *sglout,
-		       struct qat_crypto_request_buffs *buf,
+		       struct qat_request_buffs *buf,
 		       gfp_t flags)
 {
 	struct device *dev = &GET_DEV(accel_dev);
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index 241299c219dd5..1c534c57a36bc 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -4,14 +4,46 @@
 #define QAT_BL_H
 #include <linux/scatterlist.h>
 #include <linux/types.h>
-#include "qat_crypto.h"
+
+#define QAT_MAX_BUFF_DESC	4
+
+struct qat_alg_buf {
+	u32 len;
+	u32 resrvd;
+	u64 addr;
+} __packed;
+
+struct qat_alg_buf_list {
+	u64 resrvd;
+	u32 num_bufs;
+	u32 num_mapped_bufs;
+	struct qat_alg_buf bufers[];
+} __packed;
+
+struct qat_alg_fixed_buf_list {
+	struct qat_alg_buf_list sgl_hdr;
+	struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC];
+} __packed __aligned(64);
+
+struct qat_request_buffs {
+	struct qat_alg_buf_list *bl;
+	dma_addr_t blp;
+	struct qat_alg_buf_list *blout;
+	dma_addr_t bloutp;
+	size_t sz;
+	size_t sz_out;
+	bool sgl_src_valid;
+	bool sgl_dst_valid;
+	struct qat_alg_fixed_buf_list sgl_src;
+	struct qat_alg_fixed_buf_list sgl_dst;
+};
 
 void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
-		      struct qat_crypto_request_buffs *buf);
+		      struct qat_request_buffs *buf);
 int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct scatterlist *sgl,
 		       struct scatterlist *sglout,
-		       struct qat_crypto_request_buffs *buf,
+		       struct qat_request_buffs *buf,
 		       gfp_t flags);
 
 #endif
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index df3c738ce323a..bb116357a5684 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -8,6 +8,7 @@
 #include <linux/slab.h>
 #include "adf_accel_devices.h"
 #include "icp_qat_fw_la.h"
+#include "qat_bl.h"
 
 struct qat_instance_backlog {
 	struct list_head list;
@@ -35,39 +36,6 @@ struct qat_crypto_instance {
 	struct qat_instance_backlog backlog;
 };
 
-#define QAT_MAX_BUFF_DESC	4
-
-struct qat_alg_buf {
-	u32 len;
-	u32 resrvd;
-	u64 addr;
-} __packed;
-
-struct qat_alg_buf_list {
-	u64 resrvd;
-	u32 num_bufs;
-	u32 num_mapped_bufs;
-	struct qat_alg_buf bufers[];
-} __packed;
-
-struct qat_alg_fixed_buf_list {
-	struct qat_alg_buf_list sgl_hdr;
-	struct qat_alg_buf descriptors[QAT_MAX_BUFF_DESC];
-} __packed __aligned(64);
-
-struct qat_crypto_request_buffs {
-	struct qat_alg_buf_list *bl;
-	dma_addr_t blp;
-	struct qat_alg_buf_list *blout;
-	dma_addr_t bloutp;
-	size_t sz;
-	size_t sz_out;
-	bool sgl_src_valid;
-	bool sgl_dst_valid;
-	struct qat_alg_fixed_buf_list sgl_src;
-	struct qat_alg_fixed_buf_list sgl_dst;
-};
-
 struct qat_crypto_request;
 
 struct qat_crypto_request {
@@ -80,7 +48,7 @@ struct qat_crypto_request {
 		struct aead_request *aead_req;
 		struct skcipher_request *skcipher_req;
 	};
-	struct qat_crypto_request_buffs buf;
+	struct qat_request_buffs buf;
 	void (*cb)(struct icp_qat_fw_la_resp *resp,
 		   struct qat_crypto_request *req);
 	union {
-- 
GitLab


From cf692906bd61af2eec06a32a83d2a8ec3acf3548 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:16 +0000
Subject: [PATCH 3559/4122] crypto: qat - extend buffer list interface

The compression service requires an additional pre-allocated buffer for
each destination scatter list.
Extend the function qat_alg_sgl_to_bufl() to take an additional
structure that contains the dma address and the size of the extra
buffer which will be appended in the destination FW SGL.

The logic that unmaps buffers in qat_alg_free_bufl() has been changed to
start unmapping from buffer 0 instead of skipping the initial buffers
num_buff - num_mapped_bufs as that functionality was not used in the
code.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_algs.c |  8 ++--
 drivers/crypto/qat/qat_common/qat_bl.c   | 58 ++++++++++++++++++------
 drivers/crypto/qat/qat_common/qat_bl.h   |  6 +++
 3 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c
index dfa65e42db781..b4b9f0aa59b98 100644
--- a/drivers/crypto/qat/qat_common/qat_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_algs.c
@@ -800,7 +800,7 @@ static int qat_alg_aead_dec(struct aead_request *areq)
 		return -EINVAL;
 
 	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst,
-				 &qat_req->buf, f);
+				 &qat_req->buf, NULL, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -844,7 +844,7 @@ static int qat_alg_aead_enc(struct aead_request *areq)
 		return -EINVAL;
 
 	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst,
-				 &qat_req->buf, f);
+				 &qat_req->buf, NULL, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1030,7 +1030,7 @@ static int qat_alg_skcipher_encrypt(struct skcipher_request *req)
 		return 0;
 
 	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst,
-				 &qat_req->buf, f);
+				 &qat_req->buf, NULL, f);
 	if (unlikely(ret))
 		return ret;
 
@@ -1097,7 +1097,7 @@ static int qat_alg_skcipher_decrypt(struct skcipher_request *req)
 		return 0;
 
 	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, req->src, req->dst,
-				 &qat_req->buf, f);
+				 &qat_req->buf, NULL, f);
 	if (unlikely(ret))
 		return ret;
 
diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
index c32b12d386f0a..221a4eb610a38 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -35,10 +35,7 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
 		kfree(bl);
 
 	if (blp != blpout) {
-		/* If out of place operation dma unmap only data */
-		int bufless = blout->num_bufs - blout->num_mapped_bufs;
-
-		for (i = bufless; i < blout->num_bufs; i++) {
+		for (i = 0; i < blout->num_mapped_bufs; i++) {
 			dma_unmap_single(dev, blout->bufers[i].addr,
 					 blout->bufers[i].len,
 					 DMA_FROM_DEVICE);
@@ -50,11 +47,13 @@ void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
 	}
 }
 
-int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
-		       struct scatterlist *sgl,
-		       struct scatterlist *sglout,
-		       struct qat_request_buffs *buf,
-		       gfp_t flags)
+static int __qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
+				struct scatterlist *sgl,
+				struct scatterlist *sglout,
+				struct qat_request_buffs *buf,
+				dma_addr_t extra_dst_buff,
+				size_t sz_extra_dst_buff,
+				gfp_t flags)
 {
 	struct device *dev = &GET_DEV(accel_dev);
 	int i, sg_nctr = 0;
@@ -86,7 +85,7 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 
 	bufl_dma_dir = sgl != sglout ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
 
-	for_each_sg(sgl, sg, n, i)
+	for (i = 0; i < n; i++)
 		bufl->bufers[i].addr = DMA_MAPPING_ERROR;
 
 	for_each_sg(sgl, sg, n, i) {
@@ -113,8 +112,10 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 	/* Handle out of place operation */
 	if (sgl != sglout) {
 		struct qat_alg_buf *bufers;
+		int extra_buff = extra_dst_buff ? 1 : 0;
+		int n_sglout = sg_nents(sglout);
 
-		n = sg_nents(sglout);
+		n = n_sglout + extra_buff;
 		sz_out = struct_size(buflout, bufers, n);
 		sg_nctr = 0;
 
@@ -129,10 +130,10 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		}
 
 		bufers = buflout->bufers;
-		for_each_sg(sglout, sg, n, i)
+		for (i = 0; i < n; i++)
 			bufers[i].addr = DMA_MAPPING_ERROR;
 
-		for_each_sg(sglout, sg, n, i) {
+		for_each_sg(sglout, sg, n_sglout, i) {
 			int y = sg_nctr;
 
 			if (!sg->length)
@@ -146,7 +147,13 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 			bufers[y].len = sg->length;
 			sg_nctr++;
 		}
+		if (extra_buff) {
+			bufers[sg_nctr].addr = extra_dst_buff;
+			bufers[sg_nctr].len = sz_extra_dst_buff;
+		}
+
 		buflout->num_bufs = sg_nctr;
+		buflout->num_bufs += extra_buff;
 		buflout->num_mapped_bufs = sg_nctr;
 		bloutp = dma_map_single(dev, buflout, sz_out, DMA_TO_DEVICE);
 		if (unlikely(dma_mapping_error(dev, bloutp)))
@@ -166,11 +173,14 @@ err_out:
 		dma_unmap_single(dev, bloutp, sz_out, DMA_TO_DEVICE);
 
 	n = sg_nents(sglout);
-	for (i = 0; i < n; i++)
+	for (i = 0; i < n; i++) {
+		if (buflout->bufers[i].addr == extra_dst_buff)
+			break;
 		if (!dma_mapping_error(dev, buflout->bufers[i].addr))
 			dma_unmap_single(dev, buflout->bufers[i].addr,
 					 buflout->bufers[i].len,
 					 DMA_FROM_DEVICE);
+	}
 
 	if (!buf->sgl_dst_valid)
 		kfree(buflout);
@@ -192,3 +202,23 @@ err_in:
 	dev_err(dev, "Failed to map buf for dma\n");
 	return -ENOMEM;
 }
+
+int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
+		       struct scatterlist *sgl,
+		       struct scatterlist *sglout,
+		       struct qat_request_buffs *buf,
+		       struct qat_sgl_to_bufl_params *params,
+		       gfp_t flags)
+{
+	dma_addr_t extra_dst_buff = 0;
+	size_t sz_extra_dst_buff = 0;
+
+	if (params) {
+		extra_dst_buff = params->extra_dst_buff;
+		sz_extra_dst_buff = params->sz_extra_dst_buff;
+	}
+
+	return __qat_bl_sgl_to_bufl(accel_dev, sgl, sglout, buf,
+				    extra_dst_buff, sz_extra_dst_buff,
+				    flags);
+}
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index 1c534c57a36bc..0c174fee9e645 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -38,12 +38,18 @@ struct qat_request_buffs {
 	struct qat_alg_fixed_buf_list sgl_dst;
 };
 
+struct qat_sgl_to_bufl_params {
+	dma_addr_t extra_dst_buff;
+	size_t sz_extra_dst_buff;
+};
+
 void qat_bl_free_bufl(struct adf_accel_dev *accel_dev,
 		      struct qat_request_buffs *buf);
 int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct scatterlist *sgl,
 		       struct scatterlist *sglout,
 		       struct qat_request_buffs *buf,
+		       struct qat_sgl_to_bufl_params *params,
 		       gfp_t flags);
 
 #endif
-- 
GitLab


From 4d76f3880987a00da79f455876488ac3c7343e83 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:17 +0000
Subject: [PATCH 3560/4122] crypto: qat - relocate backlog related structures

Move the structures qat_instance_backlog and qat_alg_req from
qat_crypto.h to qat_algs_send.h since they are not unique to crypto.
Both structures will be used by the compression service to support
requests with the CRYPTO_TFM_REQ_MAY_BACKLOG flag set.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_algs_send.h | 16 +++++++++++++++-
 drivers/crypto/qat/qat_common/qat_crypto.h    | 14 +-------------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_algs_send.h b/drivers/crypto/qat/qat_common/qat_algs_send.h
index 5ce9f4f69d8ff..0baca16e1eff0 100644
--- a/drivers/crypto/qat/qat_common/qat_algs_send.h
+++ b/drivers/crypto/qat/qat_common/qat_algs_send.h
@@ -3,7 +3,21 @@
 #ifndef QAT_ALGS_SEND_H
 #define QAT_ALGS_SEND_H
 
-#include "qat_crypto.h"
+#include <linux/list.h>
+#include "adf_transport_internal.h"
+
+struct qat_instance_backlog {
+	struct list_head list;
+	spinlock_t lock; /* protects backlog list */
+};
+
+struct qat_alg_req {
+	u32 *fw_req;
+	struct adf_etr_ring_data *tx_ring;
+	struct crypto_async_request *base;
+	struct list_head list;
+	struct qat_instance_backlog *backlog;
+};
 
 int qat_alg_send_message(struct qat_alg_req *req);
 void qat_alg_send_backlog(struct qat_instance_backlog *backlog);
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index bb116357a5684..505e881022a7d 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -8,21 +8,9 @@
 #include <linux/slab.h>
 #include "adf_accel_devices.h"
 #include "icp_qat_fw_la.h"
+#include "qat_algs_send.h"
 #include "qat_bl.h"
 
-struct qat_instance_backlog {
-	struct list_head list;
-	spinlock_t lock; /* protects backlog list */
-};
-
-struct qat_alg_req {
-	u32 *fw_req;
-	struct adf_etr_ring_data *tx_ring;
-	struct crypto_async_request *base;
-	struct list_head list;
-	struct qat_instance_backlog *backlog;
-};
-
 struct qat_crypto_instance {
 	struct adf_etr_ring_data *sym_tx;
 	struct adf_etr_ring_data *sym_rx;
-- 
GitLab


From 79d8dbf155d4e670b6ac20acbb6b22f02c728da5 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:18 +0000
Subject: [PATCH 3561/4122] crypto: qat - relocate qat_algs_alloc_flags()

Move qat_algs_alloc_flags() from qat_crypto.h to qat_bl.h as this will
be used also by the compression logic.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_bl.h     | 6 ++++++
 drivers/crypto/qat/qat_common/qat_crypto.h | 5 -----
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index 0c174fee9e645..5f2ea8f352f76 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -2,6 +2,7 @@
 /* Copyright(c) 2014 - 2022 Intel Corporation */
 #ifndef QAT_BL_H
 #define QAT_BL_H
+#include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <linux/types.h>
 
@@ -52,4 +53,9 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 		       struct qat_sgl_to_bufl_params *params,
 		       gfp_t flags);
 
+static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req)
+{
+	return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
+}
+
 #endif
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.h b/drivers/crypto/qat/qat_common/qat_crypto.h
index 505e881022a7d..6a0e961bb9dc7 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.h
+++ b/drivers/crypto/qat/qat_common/qat_crypto.h
@@ -65,9 +65,4 @@ static inline bool adf_hw_dev_has_crypto(struct adf_accel_dev *accel_dev)
 	return true;
 }
 
-static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req)
-{
-	return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
-}
-
 #endif
-- 
GitLab


From 93b2f5799cee57814a36882e61ef5f03d5dc5392 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:19 +0000
Subject: [PATCH 3562/4122] crypto: qat - rename and relocate GEN2 config
 function

Rename qat_crypto_dev_config() in adf_gen2_dev_config() and relocate it
to the newly created file adf_gen2_config.c.
This function is specific to QAT GEN2 devices and will be used also to
configure the compression service.

In addition change the drivers to use the dev_config() in the hardware
data structure (which for GEN2 devices now points to
adf_gen2_dev_config()), for consistency.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c  |   2 +
 drivers/crypto/qat/qat_c3xxx/adf_drv.c        |   2 +-
 .../qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c     |   2 +
 .../crypto/qat/qat_c62x/adf_c62x_hw_data.c    |   2 +
 drivers/crypto/qat/qat_c62x/adf_drv.c         |   2 +-
 .../qat/qat_c62xvf/adf_c62xvf_hw_data.c       |   2 +
 drivers/crypto/qat/qat_common/Makefile        |   1 +
 .../crypto/qat/qat_common/adf_common_drv.h    |   1 -
 .../crypto/qat/qat_common/adf_gen2_config.c   | 131 ++++++++++++++++++
 .../crypto/qat/qat_common/adf_gen2_config.h   |  10 ++
 drivers/crypto/qat/qat_common/qat_crypto.c    | 120 +---------------
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c   |   2 +
 drivers/crypto/qat/qat_dh895xcc/adf_drv.c     |   2 +-
 .../qat_dh895xccvf/adf_dh895xccvf_hw_data.c   |   2 +
 14 files changed, 158 insertions(+), 123 deletions(-)
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_config.c
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_config.h

diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index 50d5afa26a9be..c0519a79060a9 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2014 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c3xxx_hw_data.h"
@@ -124,6 +125,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 	hw_data->reset_device = adf_reset_flr;
 	hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
+	hw_data->dev_config = adf_gen2_dev_config;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_drv.c b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
index 2aef0bb791dfa..1f4fbf4562b22 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_drv.c
@@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_err_disable_aer;
 	}
 
-	ret = qat_crypto_dev_config(accel_dev);
+	ret = hw_data->dev_config(accel_dev);
 	if (ret)
 		goto out_err_disable_aer;
 
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index a9fbe57b32ae3..6c37dda6da2ee 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2015 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -86,6 +87,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
 	hw_data->get_sku = get_sku;
 	hw_data->enable_ints = adf_vf_void_noop;
 	hw_data->dev_class->instances++;
+	hw_data->dev_config = adf_gen2_dev_config;
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index c00386fe6587a..689358cb7bb08 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2014 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c62x_hw_data.h"
@@ -126,6 +127,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 	hw_data->reset_device = adf_reset_flr;
 	hw_data->set_ssm_wdtimer = adf_gen2_set_ssm_wdtimer;
 	hw_data->disable_iov = adf_disable_sriov;
+	hw_data->dev_config = adf_gen2_dev_config;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_c62x/adf_drv.c b/drivers/crypto/qat/qat_c62x/adf_drv.c
index 56163083f1616..4ccaf298250c3 100644
--- a/drivers/crypto/qat/qat_c62x/adf_drv.c
+++ b/drivers/crypto/qat/qat_c62x/adf_drv.c
@@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_err_disable_aer;
 	}
 
-	ret = qat_crypto_dev_config(accel_dev);
+	ret = hw_data->dev_config(accel_dev);
 	if (ret)
 		goto out_err_disable_aer;
 
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index 0282038fca548..521110ecd07fd 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2015 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -86,6 +87,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
 	hw_data->get_sku = get_sku;
 	hw_data->enable_ints = adf_vf_void_noop;
 	hw_data->dev_class->instances++;
+	hw_data->dev_config = adf_gen2_dev_config;
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index b0587d03eac29..b59b6315134b6 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -12,6 +12,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_hw_arbiter.o \
 	adf_sysfs.o \
 	adf_gen2_hw_data.o \
+	adf_gen2_config.o \
 	adf_gen4_hw_data.o \
 	adf_gen4_pm.o \
 	qat_crypto.o \
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 7bb477c3ce25f..b8ec0268d2d29 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -110,7 +110,6 @@ int adf_init_etr_data(struct adf_accel_dev *accel_dev);
 void adf_cleanup_etr_data(struct adf_accel_dev *accel_dev);
 int qat_crypto_register(void);
 int qat_crypto_unregister(void);
-int qat_crypto_dev_config(struct adf_accel_dev *accel_dev);
 int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev);
 struct qat_crypto_instance *qat_crypto_get_instance_node(int node);
 void qat_crypto_put_instance(struct qat_crypto_instance *inst);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.c b/drivers/crypto/qat/qat_common/adf_gen2_config.c
new file mode 100644
index 0000000000000..1c490e1859a76
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen2_config.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation */
+#include "adf_accel_devices.h"
+#include "adf_cfg.h"
+#include "adf_cfg_strings.h"
+#include "adf_gen2_config.h"
+#include "adf_common_drv.h"
+#include "qat_crypto.h"
+#include "adf_transport_access_macros.h"
+
+static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev)
+{
+	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
+	int banks = GET_MAX_BANKS(accel_dev);
+	int cpus = num_online_cpus();
+	unsigned long val;
+	int instances;
+	int ret;
+	int i;
+
+	if (adf_hw_dev_has_crypto(accel_dev))
+		instances = min(cpus, banks);
+	else
+		instances = 0;
+
+	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
+	if (ret)
+		goto err;
+
+	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
+	if (ret)
+		goto err;
+
+	for (i = 0; i < instances; i++) {
+		val = i;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY,
+			 i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i);
+		val = 128;
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 512;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 0;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 2;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 8;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 10;
+		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = ADF_COALESCING_DEF_TIME;
+		snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0",
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+	}
+
+	val = i;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
+					  &val, ADF_DEC);
+	if (ret)
+		goto err;
+
+	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+	return 0;
+err:
+	dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n");
+	return ret;
+}
+
+/**
+ * adf_gen2_dev_config() - create dev config required to create instances
+ *
+ * @accel_dev: Pointer to acceleration device.
+ *
+ * Function creates device configuration required to create instances
+ *
+ * Return: 0 on success, error code otherwise.
+ */
+int adf_gen2_dev_config(struct adf_accel_dev *accel_dev)
+{
+	return adf_gen2_crypto_dev_config(accel_dev);
+}
+EXPORT_SYMBOL_GPL(adf_gen2_dev_config);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.h b/drivers/crypto/qat/qat_common/adf_gen2_config.h
new file mode 100644
index 0000000000000..4bf9da2de68aa
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen2_config.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN2_CONFIG_H_
+#define ADF_GEN2_CONFIG_H_
+
+#include "adf_accel_devices.h"
+
+int adf_gen2_dev_config(struct adf_accel_dev *accel_dev);
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/qat_crypto.c b/drivers/crypto/qat/qat_common/qat_crypto.c
index 9341d892533a7..e31199eade5b6 100644
--- a/drivers/crypto/qat/qat_common/qat_crypto.c
+++ b/drivers/crypto/qat/qat_common/qat_crypto.c
@@ -5,7 +5,6 @@
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
 #include "adf_transport.h"
-#include "adf_transport_access_macros.h"
 #include "adf_cfg.h"
 #include "adf_cfg_strings.h"
 #include "adf_gen2_hw_data.h"
@@ -126,126 +125,9 @@ int qat_crypto_vf_dev_config(struct adf_accel_dev *accel_dev)
 		return -EFAULT;
 	}
 
-	return qat_crypto_dev_config(accel_dev);
+	return GET_HW_DATA(accel_dev)->dev_config(accel_dev);
 }
 
-/**
- * qat_crypto_dev_config() - create dev config required to create crypto inst.
- *
- * @accel_dev: Pointer to acceleration device.
- *
- * Function creates device configuration required to create crypto instances
- *
- * Return: 0 on success, error code otherwise.
- */
-int qat_crypto_dev_config(struct adf_accel_dev *accel_dev)
-{
-	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
-	int banks = GET_MAX_BANKS(accel_dev);
-	int cpus = num_online_cpus();
-	unsigned long val;
-	int instances;
-	int ret;
-	int i;
-
-	if (adf_hw_dev_has_crypto(accel_dev))
-		instances = min(cpus, banks);
-	else
-		instances = 0;
-
-	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
-	if (ret)
-		goto err;
-
-	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
-	if (ret)
-		goto err;
-
-	for (i = 0; i < instances; i++) {
-		val = i;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_BANK_NUM, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_ETRMGR_CORE_AFFINITY,
-			 i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_SIZE, i);
-		val = 128;
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = 512;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_SIZE, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = 0;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_TX, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = 2;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_TX, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = 8;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_RX, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = 10;
-		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_SYM_RX, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-
-		val = ADF_COALESCING_DEF_TIME;
-		snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i);
-		ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0",
-						  key, &val, ADF_DEC);
-		if (ret)
-			goto err;
-	}
-
-	val = i;
-	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
-					  &val, ADF_DEC);
-	if (ret)
-		goto err;
-
-	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
-	return 0;
-err:
-	dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n");
-	return ret;
-}
-EXPORT_SYMBOL_GPL(qat_crypto_dev_config);
-
 static int qat_crypto_create_instances(struct adf_accel_dev *accel_dev)
 {
 	unsigned long num_inst, num_msg_sym, num_msg_asym;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index cb3bdd3618fb0..baacf817abf68 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2014 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_dh895xcc_hw_data.h"
@@ -234,6 +235,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->enable_ints = adf_gen2_enable_ints;
 	hw_data->reset_device = adf_reset_sbr;
 	hw_data->disable_iov = adf_disable_sriov;
+	hw_data->dev_config = adf_gen2_dev_config;
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	hw_data->pfvf_ops.enable_vf2pf_interrupts = enable_vf2pf_interrupts;
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
index acca56752aa02..ebeb17b67fcd5 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_drv.c
@@ -201,7 +201,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_err_disable_aer;
 	}
 
-	ret = qat_crypto_dev_config(accel_dev);
+	ret = hw_data->dev_config(accel_dev);
 	if (ret)
 		goto out_err_disable_aer;
 
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index 31c14d7e1c115..b933a00fb91b8 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2015 - 2021 Intel Corporation */
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
+#include <adf_gen2_config.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -86,6 +87,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
 	hw_data->get_sku = get_sku;
 	hw_data->enable_ints = adf_vf_void_noop;
 	hw_data->dev_class->instances++;
+	hw_data->dev_config = adf_gen2_dev_config;
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
-- 
GitLab


From 1198ae56c9a520384dcf53f01cd9adecd73751d0 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:20 +0000
Subject: [PATCH 3563/4122] crypto: qat - expose deflate through acomp api for
 QAT GEN2

Add infrastructure for implementing the acomp APIs in the QAT driver and
expose the deflate algorithm for QAT GEN2 devices.
This adds
  (1) the compression service which includes logic to create, allocate
  and handle compression instances;
  (2) logic to create configuration entries at probe time for the
  compression instances;
  (3) updates to the firmware API for allowing the compression service;
  and;
  (4) a back-end for deflate that implements the acomp api for QAT GEN2
  devices.

The implementation configures the device to produce data compressed
statically, optimized for throughput over compression ratio.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_4xxx/adf_drv.c         |   6 +
 .../crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c  |   2 +
 .../qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c     |   2 +
 .../crypto/qat/qat_c62x/adf_c62x_hw_data.c    |   2 +
 .../qat/qat_c62xvf/adf_c62xvf_hw_data.c       |   2 +
 drivers/crypto/qat/qat_common/Makefile        |   3 +
 .../crypto/qat/qat_common/adf_accel_devices.h |  14 +
 .../crypto/qat/qat_common/adf_cfg_strings.h   |   1 +
 .../crypto/qat/qat_common/adf_common_drv.h    |   8 +
 drivers/crypto/qat/qat_common/adf_ctl_drv.c   |   6 +
 .../crypto/qat/qat_common/adf_gen2_config.c   |  99 ++++-
 drivers/crypto/qat/qat_common/adf_gen2_dc.c   |  70 +++
 drivers/crypto/qat/qat_common/adf_gen2_dc.h   |  10 +
 drivers/crypto/qat/qat_common/adf_init.c      |  11 +
 drivers/crypto/qat/qat_common/adf_sriov.c     |   4 +
 drivers/crypto/qat/qat_common/icp_qat_fw.h    |  24 ++
 .../crypto/qat/qat_common/icp_qat_fw_comp.h   | 404 ++++++++++++++++++
 drivers/crypto/qat/qat_common/icp_qat_hw.h    |  66 +++
 drivers/crypto/qat/qat_common/qat_comp_algs.c | 274 ++++++++++++
 drivers/crypto/qat/qat_common/qat_comp_req.h  | 113 +++++
 .../crypto/qat/qat_common/qat_compression.c   | 297 +++++++++++++
 .../crypto/qat/qat_common/qat_compression.h   |  37 ++
 .../qat/qat_dh895xcc/adf_dh895xcc_hw_data.c   |   2 +
 .../qat_dh895xccvf/adf_dh895xccvf_hw_data.c   |   2 +
 24 files changed, 1447 insertions(+), 12 deletions(-)
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_dc.c
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen2_dc.h
 create mode 100644 drivers/crypto/qat/qat_common/icp_qat_fw_comp.h
 create mode 100644 drivers/crypto/qat/qat_common/qat_comp_algs.c
 create mode 100644 drivers/crypto/qat/qat_common/qat_comp_req.h
 create mode 100644 drivers/crypto/qat/qat_common/qat_compression.c
 create mode 100644 drivers/crypto/qat/qat_common/qat_compression.h

diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index 670a58b25cb16..8496c451b48e1 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -155,6 +155,12 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
+	val = 0;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+					  &val, ADF_DEC);
+	if (ret)
+		goto err;
+
 	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
 	return 0;
 err:
diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
index c0519a79060a9..c55c51a07677d 100644
--- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c3xxx_hw_data.h"
@@ -129,6 +130,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_c3xxx(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
index 6c37dda6da2ee..84d9486e04de6 100644
--- a/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c3xxxvf/adf_c3xxxvf_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -91,6 +92,7 @@ void adf_init_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_c3xxxiov(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
index 689358cb7bb08..b7aa19d2fa804 100644
--- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
+++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_c62x_hw_data.h"
@@ -131,6 +132,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data)
 
 	adf_gen2_init_pf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_c62x(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
index 521110ecd07fd..751d7aa57fc7f 100644
--- a/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
+++ b/drivers/crypto/qat/qat_c62xvf/adf_c62xvf_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -91,6 +92,7 @@ void adf_init_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_c62xiov(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index b59b6315134b6..e3db4786738f7 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -15,7 +15,10 @@ intel_qat-objs := adf_cfg.o \
 	adf_gen2_config.o \
 	adf_gen4_hw_data.o \
 	adf_gen4_pm.o \
+	adf_gen2_dc.o \
 	qat_crypto.o \
+	qat_compression.o \
+	qat_comp_algs.o \
 	qat_algs.o \
 	qat_asym_algs.o \
 	qat_algs_send.o \
diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h
index 0a55a4f34dcfd..284f5aad3ee0b 100644
--- a/drivers/crypto/qat/qat_common/adf_accel_devices.h
+++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h
@@ -163,6 +163,10 @@ struct adf_pfvf_ops {
 					u32 pfvf_offset, u8 compat_ver);
 };
 
+struct adf_dc_ops {
+	void (*build_deflate_ctx)(void *ctx);
+};
+
 struct adf_hw_device_data {
 	struct adf_hw_device_class *dev_class;
 	u32 (*get_accel_mask)(struct adf_hw_device_data *self);
@@ -202,6 +206,7 @@ struct adf_hw_device_data {
 	int (*dev_config)(struct adf_accel_dev *accel_dev);
 	struct adf_pfvf_ops pfvf_ops;
 	struct adf_hw_csr_ops csr_ops;
+	struct adf_dc_ops dc_ops;
 	const char *fw_name;
 	const char *fw_mmp_name;
 	u32 fuses;
@@ -247,6 +252,7 @@ struct adf_hw_device_data {
 #define GET_MAX_ACCELENGINES(accel_dev) (GET_HW_DATA(accel_dev)->num_engines)
 #define GET_CSR_OPS(accel_dev) (&(accel_dev)->hw_device->csr_ops)
 #define GET_PFVF_OPS(accel_dev) (&(accel_dev)->hw_device->pfvf_ops)
+#define GET_DC_OPS(accel_dev) (&(accel_dev)->hw_device->dc_ops)
 #define accel_to_pci_dev(accel_ptr) accel_ptr->accel_pci_dev.pci_dev
 
 struct adf_admin_comms;
@@ -266,13 +272,21 @@ struct adf_accel_vf_info {
 	u8 vf_compat_ver;
 };
 
+struct adf_dc_data {
+	u8 *ovf_buff;
+	size_t ovf_buff_sz;
+	dma_addr_t ovf_buff_p;
+};
+
 struct adf_accel_dev {
 	struct adf_etr_data *transport;
 	struct adf_hw_device_data *hw_device;
 	struct adf_cfg_device_data *cfg;
 	struct adf_fw_loader_data *fw_loader;
 	struct adf_admin_comms *admin;
+	struct adf_dc_data *dc_data;
 	struct list_head crypto_list;
+	struct list_head compression_list;
 	unsigned long status;
 	atomic_t ref_count;
 	struct dentry *debugfs_dir;
diff --git a/drivers/crypto/qat/qat_common/adf_cfg_strings.h b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
index 655248dbf962d..5d8c3bdb258c1 100644
--- a/drivers/crypto/qat/qat_common/adf_cfg_strings.h
+++ b/drivers/crypto/qat/qat_common/adf_cfg_strings.h
@@ -20,6 +20,7 @@
 #define ADF_ETRMGR_BANK "Bank"
 #define ADF_RING_SYM_BANK_NUM "BankSymNumber"
 #define ADF_RING_ASYM_BANK_NUM "BankAsymNumber"
+#define ADF_RING_DC_BANK_NUM "BankDcNumber"
 #define ADF_CY "Cy"
 #define ADF_DC "Dc"
 #define ADF_CFG_DC "dc"
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index b8ec0268d2d29..7189265573c08 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -120,6 +120,14 @@ void qat_algs_unregister(void);
 int qat_asym_algs_register(void);
 void qat_asym_algs_unregister(void);
 
+struct qat_compression_instance *qat_compression_get_instance_node(int node);
+void qat_compression_put_instance(struct qat_compression_instance *inst);
+int qat_compression_register(void);
+int qat_compression_unregister(void);
+int qat_comp_algs_register(void);
+void qat_comp_algs_unregister(void);
+void qat_comp_alg_callback(void *resp);
+
 int adf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
 void adf_isr_resource_free(struct adf_accel_dev *accel_dev);
 int adf_vf_isr_resource_alloc(struct adf_accel_dev *accel_dev);
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 82b69e1f725ba..9190532b27eb5 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -438,8 +438,13 @@ static int __init adf_register_ctl_device_driver(void)
 	if (qat_crypto_register())
 		goto err_crypto_register;
 
+	if (qat_compression_register())
+		goto err_compression_register;
+
 	return 0;
 
+err_compression_register:
+	qat_crypto_unregister();
 err_crypto_register:
 	adf_exit_vf_wq();
 err_vf_wq:
@@ -463,6 +468,7 @@ static void __exit adf_unregister_ctl_device_driver(void)
 	adf_exit_vf_wq();
 	adf_exit_pf_wq();
 	qat_crypto_unregister();
+	qat_compression_unregister();
 	adf_clean_vf_map(false);
 	mutex_destroy(&adf_ctl_lock);
 }
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_config.c b/drivers/crypto/qat/qat_common/adf_gen2_config.c
index 1c490e1859a76..eeb30da7587a5 100644
--- a/drivers/crypto/qat/qat_common/adf_gen2_config.c
+++ b/drivers/crypto/qat/qat_common/adf_gen2_config.c
@@ -6,6 +6,7 @@
 #include "adf_gen2_config.h"
 #include "adf_common_drv.h"
 #include "qat_crypto.h"
+#include "qat_compression.h"
 #include "adf_transport_access_macros.h"
 
 static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev)
@@ -23,14 +24,6 @@ static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	else
 		instances = 0;
 
-	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
-	if (ret)
-		goto err;
-
-	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
-	if (ret)
-		goto err;
-
 	for (i = 0; i < instances; i++) {
 		val = i;
 		snprintf(key, sizeof(key), ADF_CY "%d" ADF_RING_ASYM_BANK_NUM, i);
@@ -108,10 +101,68 @@ static int adf_gen2_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
-	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
-	return 0;
+	return ret;
+
+err:
+	dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n");
+	return ret;
+}
+
+static int adf_gen2_comp_dev_config(struct adf_accel_dev *accel_dev)
+{
+	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
+	int banks = GET_MAX_BANKS(accel_dev);
+	int cpus = num_online_cpus();
+	unsigned long val;
+	int instances;
+	int ret;
+	int i;
+
+	if (adf_hw_dev_has_compression(accel_dev))
+		instances = min(cpus, banks);
+	else
+		instances = 0;
+
+	for (i = 0; i < instances; i++) {
+		val = i;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 512;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 6;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 14;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+	}
+
+	val = i;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+					  &val, ADF_DEC);
+	if (ret)
+		return ret;
+
+	return ret;
+
 err:
-	dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n");
+	dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n");
 	return ret;
 }
 
@@ -126,6 +177,30 @@ err:
  */
 int adf_gen2_dev_config(struct adf_accel_dev *accel_dev)
 {
-	return adf_gen2_crypto_dev_config(accel_dev);
+	int ret;
+
+	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
+	if (ret)
+		goto err;
+
+	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
+	if (ret)
+		goto err;
+
+	ret = adf_gen2_crypto_dev_config(accel_dev);
+	if (ret)
+		goto err;
+
+	ret = adf_gen2_comp_dev_config(accel_dev);
+	if (ret)
+		goto err;
+
+	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+
+	return ret;
+
+err:
+	dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n");
+	return ret;
 }
 EXPORT_SYMBOL_GPL(adf_gen2_dev_config);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.c b/drivers/crypto/qat/qat_common/adf_gen2_dc.c
new file mode 100644
index 0000000000000..47261b1c1da69
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation */
+#include "adf_accel_devices.h"
+#include "adf_gen2_dc.h"
+#include "icp_qat_fw_comp.h"
+
+static void qat_comp_build_deflate_ctx(void *ctx)
+{
+	struct icp_qat_fw_comp_req *req_tmpl = (struct icp_qat_fw_comp_req *)ctx;
+	struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+	struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+	struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars;
+	struct icp_qat_fw_comp_cd_hdr *comp_cd_ctrl = &req_tmpl->comp_cd_ctrl;
+
+	memset(req_tmpl, 0, sizeof(*req_tmpl));
+	header->hdr_flags =
+		ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
+	header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP;
+	header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC;
+	header->comn_req_flags =
+		ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA,
+					    QAT_COMN_PTR_TYPE_SGL);
+	header->serv_specif_flags =
+		ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION,
+					    ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF);
+	cd_pars->u.sl.comp_slice_cfg_word[0] =
+		ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_COMPRESS,
+						    ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
+						    ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
+						    ICP_QAT_HW_COMPRESSION_DEPTH_1,
+						    ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
+	req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER;
+	req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC;
+	req_pars->req_par_flags =
+		ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP,
+						      ICP_QAT_FW_COMP_EOP,
+						      ICP_QAT_FW_COMP_BFINAL,
+						      ICP_QAT_FW_COMP_CNV,
+						      ICP_QAT_FW_COMP_CNV_RECOVERY,
+						      ICP_QAT_FW_COMP_NO_CNV_DFX,
+						      ICP_QAT_FW_COMP_CRC_MODE_LEGACY,
+						      ICP_QAT_FW_COMP_NO_XXHASH_ACC,
+						      ICP_QAT_FW_COMP_CNV_ERROR_NONE,
+						      ICP_QAT_FW_COMP_NO_APPEND_CRC,
+						      ICP_QAT_FW_COMP_NO_DROP_DATA);
+	ICP_QAT_FW_COMN_NEXT_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_DRAM_WR);
+	ICP_QAT_FW_COMN_CURR_ID_SET(comp_cd_ctrl, ICP_QAT_FW_SLICE_COMP);
+
+	/* Fill second half of the template for decompression */
+	memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl));
+	req_tmpl++;
+	header = &req_tmpl->comn_hdr;
+	header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+	cd_pars = &req_tmpl->cd_pars;
+	cd_pars->u.sl.comp_slice_cfg_word[0] =
+		ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS,
+						    ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED,
+						    ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE,
+						    ICP_QAT_HW_COMPRESSION_DEPTH_1,
+						    ICP_QAT_HW_COMPRESSION_FILE_TYPE_0);
+}
+
+void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops)
+{
+	dc_ops->build_deflate_ctx = qat_comp_build_deflate_ctx;
+}
+EXPORT_SYMBOL_GPL(adf_gen2_init_dc_ops);
diff --git a/drivers/crypto/qat/qat_common/adf_gen2_dc.h b/drivers/crypto/qat/qat_common/adf_gen2_dc.h
new file mode 100644
index 0000000000000..6eae023354d76
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen2_dc.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN2_DC_H
+#define ADF_GEN2_DC_H
+
+#include "adf_accel_devices.h"
+
+void adf_gen2_init_dc_ops(struct adf_dc_ops *dc_ops);
+
+#endif /* ADF_GEN2_DC_H */
diff --git a/drivers/crypto/qat/qat_common/adf_init.c b/drivers/crypto/qat/qat_common/adf_init.c
index 33a9a46d69494..cef7bb8ec0073 100644
--- a/drivers/crypto/qat/qat_common/adf_init.c
+++ b/drivers/crypto/qat/qat_common/adf_init.c
@@ -209,6 +209,14 @@ int adf_dev_start(struct adf_accel_dev *accel_dev)
 		clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
 		return -EFAULT;
 	}
+
+	if (!list_empty(&accel_dev->compression_list) && qat_comp_algs_register()) {
+		dev_err(&GET_DEV(accel_dev),
+			"Failed to register compression algs\n");
+		set_bit(ADF_STATUS_STARTING, &accel_dev->status);
+		clear_bit(ADF_STATUS_STARTED, &accel_dev->status);
+		return -EFAULT;
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(adf_dev_start);
@@ -242,6 +250,9 @@ void adf_dev_stop(struct adf_accel_dev *accel_dev)
 		qat_asym_algs_unregister();
 	}
 
+	if (!list_empty(&accel_dev->compression_list))
+		qat_comp_algs_unregister();
+
 	list_for_each(list_itr, &service_table) {
 		service = list_entry(list_itr, struct service_hndl, list);
 		if (!test_bit(accel_dev->accel_id, service->start_status))
diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c
index b2db1d70d71fb..d85a90cc387b4 100644
--- a/drivers/crypto/qat/qat_common/adf_sriov.c
+++ b/drivers/crypto/qat/qat_common/adf_sriov.c
@@ -170,6 +170,10 @@ int adf_sriov_configure(struct pci_dev *pdev, int numvfs)
 	if (adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
 					ADF_NUM_CY, (void *)&val, ADF_DEC))
 		return -EFAULT;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+					  &val, ADF_DEC);
+	if (ret)
+		return ret;
 
 	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
 
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw.h b/drivers/crypto/qat/qat_common/icp_qat_fw.h
index 6dc09d270082f..c141160421e19 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw.h
@@ -116,6 +116,10 @@ struct icp_qat_fw_comn_resp {
 #define ICP_QAT_FW_COMN_VALID_FLAG_BITPOS 7
 #define ICP_QAT_FW_COMN_VALID_FLAG_MASK 0x1
 #define ICP_QAT_FW_COMN_HDR_RESRVD_FLD_MASK 0x7F
+#define ICP_QAT_FW_COMN_CNV_FLAG_BITPOS 6
+#define ICP_QAT_FW_COMN_CNV_FLAG_MASK 0x1
+#define ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS 5
+#define ICP_QAT_FW_COMN_CNVNR_FLAG_MASK 0x1
 
 #define ICP_QAT_FW_COMN_OV_SRV_TYPE_GET(icp_qat_fw_comn_req_hdr_t) \
 	icp_qat_fw_comn_req_hdr_t.service_type
@@ -132,6 +136,26 @@ struct icp_qat_fw_comn_resp {
 #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_GET(hdr_t) \
 	ICP_QAT_FW_COMN_VALID_FLAG_GET(hdr_t.hdr_flags)
 
+#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_GET(hdr_flags) \
+	QAT_FIELD_GET(hdr_flags, \
+	ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \
+	ICP_QAT_FW_COMN_CNVNR_FLAG_MASK)
+
+#define ICP_QAT_FW_COMN_HDR_CNVNR_FLAG_SET(hdr_t, val) \
+	QAT_FIELD_SET((hdr_t.hdr_flags), (val), \
+	ICP_QAT_FW_COMN_CNVNR_FLAG_BITPOS, \
+	ICP_QAT_FW_COMN_CNVNR_FLAG_MASK)
+
+#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(hdr_flags) \
+	QAT_FIELD_GET(hdr_flags, \
+	ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \
+	ICP_QAT_FW_COMN_CNV_FLAG_MASK)
+
+#define ICP_QAT_FW_COMN_HDR_CNV_FLAG_SET(hdr_t, val) \
+	QAT_FIELD_SET((hdr_t.hdr_flags), (val), \
+	ICP_QAT_FW_COMN_CNV_FLAG_BITPOS, \
+	ICP_QAT_FW_COMN_CNV_FLAG_MASK)
+
 #define ICP_QAT_FW_COMN_HDR_VALID_FLAG_SET(hdr_t, val) \
 	ICP_QAT_FW_COMN_VALID_FLAG_SET(hdr_t, val)
 
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h
new file mode 100644
index 0000000000000..a03d43fef2b37
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_comp.h
@@ -0,0 +1,404 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef _ICP_QAT_FW_COMP_H_
+#define _ICP_QAT_FW_COMP_H_
+#include "icp_qat_fw.h"
+
+enum icp_qat_fw_comp_cmd_id {
+	ICP_QAT_FW_COMP_CMD_STATIC = 0,
+	ICP_QAT_FW_COMP_CMD_DYNAMIC = 1,
+	ICP_QAT_FW_COMP_CMD_DECOMPRESS = 2,
+	ICP_QAT_FW_COMP_CMD_DELIMITER
+};
+
+enum icp_qat_fw_comp_20_cmd_id {
+	ICP_QAT_FW_COMP_20_CMD_LZ4_COMPRESS = 3,
+	ICP_QAT_FW_COMP_20_CMD_LZ4_DECOMPRESS = 4,
+	ICP_QAT_FW_COMP_20_CMD_LZ4S_COMPRESS = 5,
+	ICP_QAT_FW_COMP_20_CMD_LZ4S_DECOMPRESS = 6,
+	ICP_QAT_FW_COMP_20_CMD_XP10_COMPRESS = 7,
+	ICP_QAT_FW_COMP_20_CMD_XP10_DECOMPRESS = 8,
+	ICP_QAT_FW_COMP_20_CMD_RESERVED_9 = 9,
+	ICP_QAT_FW_COMP_23_CMD_ZSTD_COMPRESS = 10,
+	ICP_QAT_FW_COMP_23_CMD_ZSTD_DECOMPRESS = 11,
+	ICP_QAT_FW_COMP_20_CMD_DELIMITER
+};
+
+#define ICP_QAT_FW_COMP_STATELESS_SESSION 0
+#define ICP_QAT_FW_COMP_STATEFUL_SESSION 1
+#define ICP_QAT_FW_COMP_NOT_AUTO_SELECT_BEST 0
+#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST 1
+#define ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST 0
+#define ICP_QAT_FW_COMP_ENH_AUTO_SELECT_BEST 1
+#define ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 0
+#define ICP_QAT_FW_COMP_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST 1
+#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_USED_AS_INTMD_BUF 1
+#define ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF 0
+#define ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS 2
+#define ICP_QAT_FW_COMP_SESSION_TYPE_MASK 0x1
+#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS 3
+#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK 0x1
+#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS 4
+#define ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK 0x1
+#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS 5
+#define ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK 0x1
+#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS 7
+#define ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK 0x1
+
+#define ICP_QAT_FW_COMP_FLAGS_BUILD(sesstype, autoselect, enhanced_asb, \
+	ret_uncomp, secure_ram) \
+	((((sesstype) & ICP_QAT_FW_COMP_SESSION_TYPE_MASK) << \
+	ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS) | \
+	(((autoselect) & ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK) << \
+	ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS) | \
+	(((enhanced_asb) & ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK) << \
+	ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS) | \
+	(((ret_uncomp) & ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK) << \
+	ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS) | \
+	(((secure_ram) & ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK) << \
+	ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS))
+
+#define ICP_QAT_FW_COMP_SESSION_TYPE_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \
+	ICP_QAT_FW_COMP_SESSION_TYPE_MASK)
+
+#define ICP_QAT_FW_COMP_SESSION_TYPE_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SESSION_TYPE_BITPOS, \
+	ICP_QAT_FW_COMP_SESSION_TYPE_MASK)
+
+#define ICP_QAT_FW_COMP_AUTO_SELECT_BEST_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_AUTO_SELECT_BEST_BITPOS, \
+	ICP_QAT_FW_COMP_AUTO_SELECT_BEST_MASK)
+
+#define ICP_QAT_FW_COMP_EN_ASB_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_BITPOS, \
+	ICP_QAT_FW_COMP_ENHANCED_AUTO_SELECT_BEST_MASK)
+
+#define ICP_QAT_FW_COMP_RET_UNCOMP_GET(flags) \
+	QAT_FIELD_GET(flags, \
+	ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_BITPOS, \
+	ICP_QAT_FW_COMP_RET_DISABLE_TYPE0_HEADER_DATA_MASK)
+
+#define ICP_QAT_FW_COMP_SECURE_RAM_USE_GET(flags) \
+	QAT_FIELD_GET(flags, \
+	ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_BITPOS, \
+	ICP_QAT_FW_COMP_DISABLE_SECURE_RAM_AS_INTMD_BUF_MASK)
+
+struct icp_qat_fw_comp_req_hdr_cd_pars {
+	union {
+		struct {
+			__u64 content_desc_addr;
+			__u16 content_desc_resrvd1;
+			__u8 content_desc_params_sz;
+			__u8 content_desc_hdr_resrvd2;
+			__u32 content_desc_resrvd3;
+		} s;
+		struct {
+			__u32 comp_slice_cfg_word[ICP_QAT_FW_NUM_LONGWORDS_2];
+			__u32 content_desc_resrvd4;
+		} sl;
+	} u;
+};
+
+struct icp_qat_fw_comp_req_params {
+	__u32 comp_len;
+	__u32 out_buffer_sz;
+	union {
+		struct {
+			__u32 initial_crc32;
+			__u32 initial_adler;
+		} legacy;
+		__u64 crc_data_addr;
+	} crc;
+	__u32 req_par_flags;
+	__u32 rsrvd;
+};
+
+#define ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(sop, eop, bfinal, cnv, cnvnr, \
+					      cnvdfx, crc, xxhash_acc, \
+					      cnv_error_type, append_crc, \
+					      drop_data) \
+	((((sop) & ICP_QAT_FW_COMP_SOP_MASK) << \
+	ICP_QAT_FW_COMP_SOP_BITPOS) | \
+	(((eop) & ICP_QAT_FW_COMP_EOP_MASK) << \
+	ICP_QAT_FW_COMP_EOP_BITPOS) | \
+	(((bfinal) & ICP_QAT_FW_COMP_BFINAL_MASK) \
+	<< ICP_QAT_FW_COMP_BFINAL_BITPOS) | \
+	(((cnv) & ICP_QAT_FW_COMP_CNV_MASK) << \
+	ICP_QAT_FW_COMP_CNV_BITPOS) | \
+	(((cnvnr) & ICP_QAT_FW_COMP_CNVNR_MASK) \
+	<< ICP_QAT_FW_COMP_CNVNR_BITPOS) | \
+	(((cnvdfx) & ICP_QAT_FW_COMP_CNV_DFX_MASK) \
+	<< ICP_QAT_FW_COMP_CNV_DFX_BITPOS) | \
+	(((crc) & ICP_QAT_FW_COMP_CRC_MODE_MASK) \
+	<< ICP_QAT_FW_COMP_CRC_MODE_BITPOS) | \
+	(((xxhash_acc) & ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK) \
+	<< ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS) | \
+	(((cnv_error_type) & ICP_QAT_FW_COMP_CNV_ERROR_MASK) \
+	<< ICP_QAT_FW_COMP_CNV_ERROR_BITPOS) | \
+	(((append_crc) & ICP_QAT_FW_COMP_APPEND_CRC_MASK) \
+	<< ICP_QAT_FW_COMP_APPEND_CRC_BITPOS) | \
+	(((drop_data) & ICP_QAT_FW_COMP_DROP_DATA_MASK) \
+	<< ICP_QAT_FW_COMP_DROP_DATA_BITPOS))
+
+#define ICP_QAT_FW_COMP_NOT_SOP 0
+#define ICP_QAT_FW_COMP_SOP 1
+#define ICP_QAT_FW_COMP_NOT_EOP 0
+#define ICP_QAT_FW_COMP_EOP 1
+#define ICP_QAT_FW_COMP_NOT_BFINAL 0
+#define ICP_QAT_FW_COMP_BFINAL 1
+#define ICP_QAT_FW_COMP_NO_CNV 0
+#define ICP_QAT_FW_COMP_CNV 1
+#define ICP_QAT_FW_COMP_NO_CNV_RECOVERY 0
+#define ICP_QAT_FW_COMP_CNV_RECOVERY 1
+#define ICP_QAT_FW_COMP_NO_CNV_DFX 0
+#define ICP_QAT_FW_COMP_CNV_DFX 1
+#define ICP_QAT_FW_COMP_CRC_MODE_LEGACY 0
+#define ICP_QAT_FW_COMP_CRC_MODE_E2E 1
+#define ICP_QAT_FW_COMP_NO_XXHASH_ACC 0
+#define ICP_QAT_FW_COMP_XXHASH_ACC 1
+#define ICP_QAT_FW_COMP_APPEND_CRC 1
+#define ICP_QAT_FW_COMP_NO_APPEND_CRC 0
+#define ICP_QAT_FW_COMP_DROP_DATA 1
+#define ICP_QAT_FW_COMP_NO_DROP_DATA 0
+#define ICP_QAT_FW_COMP_SOP_BITPOS 0
+#define ICP_QAT_FW_COMP_SOP_MASK 0x1
+#define ICP_QAT_FW_COMP_EOP_BITPOS 1
+#define ICP_QAT_FW_COMP_EOP_MASK 0x1
+#define ICP_QAT_FW_COMP_BFINAL_BITPOS 6
+#define ICP_QAT_FW_COMP_BFINAL_MASK 0x1
+#define ICP_QAT_FW_COMP_CNV_BITPOS 16
+#define ICP_QAT_FW_COMP_CNV_MASK 0x1
+#define ICP_QAT_FW_COMP_CNVNR_BITPOS 17
+#define ICP_QAT_FW_COMP_CNVNR_MASK 0x1
+#define ICP_QAT_FW_COMP_CNV_DFX_BITPOS 18
+#define ICP_QAT_FW_COMP_CNV_DFX_MASK 0x1
+#define ICP_QAT_FW_COMP_CRC_MODE_BITPOS 19
+#define ICP_QAT_FW_COMP_CRC_MODE_MASK 0x1
+#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS 20
+#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK 0x1
+#define ICP_QAT_FW_COMP_CNV_ERROR_BITPOS 21
+#define ICP_QAT_FW_COMP_CNV_ERROR_MASK 0b111
+#define ICP_QAT_FW_COMP_CNV_ERROR_NONE 0b000
+#define ICP_QAT_FW_COMP_CNV_ERROR_CHECKSUM 0b001
+#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_OBC_DIFF 0b010
+#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR 0b011
+#define ICP_QAT_FW_COMP_CNV_ERROR_XLT 0b100
+#define ICP_QAT_FW_COMP_CNV_ERROR_DCPR_IBC_DIFF 0b101
+#define ICP_QAT_FW_COMP_APPEND_CRC_BITPOS 24
+#define ICP_QAT_FW_COMP_APPEND_CRC_MASK 0x1
+#define ICP_QAT_FW_COMP_DROP_DATA_BITPOS 25
+#define ICP_QAT_FW_COMP_DROP_DATA_MASK 0x1
+
+#define ICP_QAT_FW_COMP_SOP_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_SOP_BITPOS, \
+	ICP_QAT_FW_COMP_SOP_MASK)
+
+#define ICP_QAT_FW_COMP_SOP_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_SOP_BITPOS, \
+	ICP_QAT_FW_COMP_SOP_MASK)
+
+#define ICP_QAT_FW_COMP_EOP_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_EOP_BITPOS, \
+	ICP_QAT_FW_COMP_EOP_MASK)
+
+#define ICP_QAT_FW_COMP_EOP_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_EOP_BITPOS, \
+	ICP_QAT_FW_COMP_EOP_MASK)
+
+#define ICP_QAT_FW_COMP_BFINAL_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_BFINAL_BITPOS, \
+	ICP_QAT_FW_COMP_BFINAL_MASK)
+
+#define ICP_QAT_FW_COMP_BFINAL_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_BFINAL_BITPOS, \
+	ICP_QAT_FW_COMP_BFINAL_MASK)
+
+#define ICP_QAT_FW_COMP_CNV_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_BITPOS, \
+	ICP_QAT_FW_COMP_CNV_MASK)
+
+#define ICP_QAT_FW_COMP_CNVNR_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNVNR_BITPOS, \
+	ICP_QAT_FW_COMP_CNVNR_MASK)
+
+#define ICP_QAT_FW_COMP_CNV_DFX_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \
+	ICP_QAT_FW_COMP_CNV_DFX_MASK)
+
+#define ICP_QAT_FW_COMP_CNV_DFX_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_DFX_BITPOS, \
+	ICP_QAT_FW_COMP_CNV_DFX_MASK)
+
+#define ICP_QAT_FW_COMP_CRC_MODE_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CRC_MODE_BITPOS, \
+	ICP_QAT_FW_COMP_CRC_MODE_MASK)
+
+#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \
+	ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK)
+
+#define ICP_QAT_FW_COMP_XXHASH_ACC_MODE_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_XXHASH_ACC_MODE_BITPOS, \
+	ICP_QAT_FW_COMP_XXHASH_ACC_MODE_MASK)
+
+#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_GET(flags) \
+	QAT_FIELD_GET(flags, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \
+	ICP_QAT_FW_COMP_CNV_ERROR_MASK)
+
+#define ICP_QAT_FW_COMP_CNV_ERROR_TYPE_SET(flags, val) \
+	QAT_FIELD_SET(flags, val, ICP_QAT_FW_COMP_CNV_ERROR_BITPOS, \
+	ICP_QAT_FW_COMP_CNV_ERROR_MASK)
+
+struct icp_qat_fw_xlt_req_params {
+	__u64 inter_buff_ptr;
+};
+
+struct icp_qat_fw_comp_cd_hdr {
+	__u16 ram_bank_flags;
+	__u8 comp_cfg_offset;
+	__u8 next_curr_id;
+	__u32 resrvd;
+	__u64 comp_state_addr;
+	__u64 ram_banks_addr;
+};
+
+#define COMP_CPR_INITIAL_CRC 0
+#define COMP_CPR_INITIAL_ADLER 1
+
+struct icp_qat_fw_xlt_cd_hdr {
+	__u16 resrvd1;
+	__u8 resrvd2;
+	__u8 next_curr_id;
+	__u32 resrvd3;
+};
+
+struct icp_qat_fw_comp_req {
+	struct icp_qat_fw_comn_req_hdr comn_hdr;
+	struct icp_qat_fw_comp_req_hdr_cd_pars cd_pars;
+	struct icp_qat_fw_comn_req_mid comn_mid;
+	struct icp_qat_fw_comp_req_params comp_pars;
+	union {
+		struct icp_qat_fw_xlt_req_params xlt_pars;
+		__u32 resrvd1[ICP_QAT_FW_NUM_LONGWORDS_2];
+	} u1;
+	__u32 resrvd2[ICP_QAT_FW_NUM_LONGWORDS_2];
+	struct icp_qat_fw_comp_cd_hdr comp_cd_ctrl;
+	union {
+		struct icp_qat_fw_xlt_cd_hdr xlt_cd_ctrl;
+		__u32 resrvd3[ICP_QAT_FW_NUM_LONGWORDS_2];
+	} u2;
+};
+
+struct icp_qat_fw_resp_comp_pars {
+	__u32 input_byte_counter;
+	__u32 output_byte_counter;
+	union {
+		struct {
+			__u32 curr_crc32;
+			__u32 curr_adler_32;
+		} legacy;
+		__u32 resrvd[ICP_QAT_FW_NUM_LONGWORDS_2];
+	} crc;
+};
+
+struct icp_qat_fw_comp_state {
+	__u32 rd8_counter;
+	__u32 status_flags;
+	__u32 in_counter;
+	__u32 out_counter;
+	__u64 intermediate_state;
+	__u32 lobc;
+	__u32 replaybc;
+	__u64 pcrc64_poly;
+	__u32 crc32;
+	__u32 adler_xxhash32;
+	__u64 pcrc64_xorout;
+	__u32 out_buf_size;
+	__u32 in_buf_size;
+	__u64 in_pcrc64;
+	__u64 out_pcrc64;
+	__u32 lobs;
+	__u32 libc;
+	__u64 reserved;
+	__u32 xxhash_state[4];
+	__u32 cleartext[4];
+};
+
+struct icp_qat_fw_comp_resp {
+	struct icp_qat_fw_comn_resp_hdr comn_resp;
+	__u64 opaque_data;
+	struct icp_qat_fw_resp_comp_pars comp_resp_pars;
+};
+
+#define QAT_FW_COMP_BANK_FLAG_MASK 0x1
+#define QAT_FW_COMP_BANK_I_BITPOS 8
+#define QAT_FW_COMP_BANK_H_BITPOS 7
+#define QAT_FW_COMP_BANK_G_BITPOS 6
+#define QAT_FW_COMP_BANK_F_BITPOS 5
+#define QAT_FW_COMP_BANK_E_BITPOS 4
+#define QAT_FW_COMP_BANK_D_BITPOS 3
+#define QAT_FW_COMP_BANK_C_BITPOS 2
+#define QAT_FW_COMP_BANK_B_BITPOS 1
+#define QAT_FW_COMP_BANK_A_BITPOS 0
+
+enum icp_qat_fw_comp_bank_enabled {
+	ICP_QAT_FW_COMP_BANK_DISABLED = 0,
+	ICP_QAT_FW_COMP_BANK_ENABLED = 1,
+	ICP_QAT_FW_COMP_BANK_DELIMITER = 2
+};
+
+#define ICP_QAT_FW_COMP_RAM_FLAGS_BUILD(bank_i_enable, bank_h_enable, \
+					bank_g_enable, bank_f_enable, \
+					bank_e_enable, bank_d_enable, \
+					bank_c_enable, bank_b_enable, \
+					bank_a_enable) \
+	((((bank_i_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_I_BITPOS) | \
+	(((bank_h_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_H_BITPOS) | \
+	(((bank_g_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_G_BITPOS) | \
+	(((bank_f_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_F_BITPOS) | \
+	(((bank_e_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_E_BITPOS) | \
+	(((bank_d_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_D_BITPOS) | \
+	(((bank_c_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_C_BITPOS) | \
+	(((bank_b_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_B_BITPOS) | \
+	(((bank_a_enable) & QAT_FW_COMP_BANK_FLAG_MASK) << \
+	QAT_FW_COMP_BANK_A_BITPOS))
+
+struct icp_qat_fw_comp_crc_data_struct {
+	__u32 crc32;
+	union {
+		__u32 adler;
+		__u32 xxhash;
+	} adler_xxhash_u;
+	__u32 cpr_in_crc_lo;
+	__u32 cpr_in_crc_hi;
+	__u32 cpr_out_crc_lo;
+	__u32 cpr_out_crc_hi;
+	__u32 xlt_in_crc_lo;
+	__u32 xlt_in_crc_hi;
+	__u32 xlt_out_crc_lo;
+	__u32 xlt_out_crc_hi;
+	__u32 prog_crc_poly_lo;
+	__u32 prog_crc_poly_hi;
+	__u32 xor_out_lo;
+	__u32 xor_out_hi;
+	__u32 append_crc_lo;
+	__u32 append_crc_hi;
+};
+
+struct xxhash_acc_state_buff {
+	__u32 in_counter;
+	__u32 out_counter;
+	__u32 xxhash_state[4];
+	__u32 clear_txt[4];
+};
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw.h b/drivers/crypto/qat/qat_common/icp_qat_hw.h
index 433304cad2edf..4042739bb6fa9 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_hw.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_hw.h
@@ -307,4 +307,70 @@ struct icp_qat_hw_cipher_algo_blk {
 		struct icp_qat_hw_ucs_cipher_aes256_f8 ucs_aes;
 	};
 } __aligned(64);
+
+enum icp_qat_hw_compression_direction {
+	ICP_QAT_HW_COMPRESSION_DIR_COMPRESS = 0,
+	ICP_QAT_HW_COMPRESSION_DIR_DECOMPRESS = 1,
+	ICP_QAT_HW_COMPRESSION_DIR_DELIMITER = 2
+};
+
+enum icp_qat_hw_compression_delayed_match {
+	ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DISABLED = 0,
+	ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_ENABLED = 1,
+	ICP_QAT_HW_COMPRESSION_DELAYED_MATCH_DELIMITER = 2
+};
+
+enum icp_qat_hw_compression_algo {
+	ICP_QAT_HW_COMPRESSION_ALGO_DEFLATE = 0,
+	ICP_QAT_HW_COMPRESSION_ALGO_LZS = 1,
+	ICP_QAT_HW_COMPRESSION_ALGO_DELIMITER = 2
+};
+
+enum icp_qat_hw_compression_depth {
+	ICP_QAT_HW_COMPRESSION_DEPTH_1 = 0,
+	ICP_QAT_HW_COMPRESSION_DEPTH_4 = 1,
+	ICP_QAT_HW_COMPRESSION_DEPTH_8 = 2,
+	ICP_QAT_HW_COMPRESSION_DEPTH_16 = 3,
+	ICP_QAT_HW_COMPRESSION_DEPTH_128 = 4,
+	ICP_QAT_HW_COMPRESSION_DEPTH_DELIMITER = 5
+};
+
+enum icp_qat_hw_compression_file_type {
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_0 = 0,
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_1 = 1,
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_2 = 2,
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_3 = 3,
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_4 = 4,
+	ICP_QAT_HW_COMPRESSION_FILE_TYPE_DELIMITER = 5
+};
+
+struct icp_qat_hw_compression_config {
+	__u32 lower_val;
+	__u32 upper_val;
+};
+
+#define QAT_COMPRESSION_DIR_BITPOS 4
+#define QAT_COMPRESSION_DIR_MASK 0x7
+#define QAT_COMPRESSION_DELAYED_MATCH_BITPOS 16
+#define QAT_COMPRESSION_DELAYED_MATCH_MASK 0x1
+#define QAT_COMPRESSION_ALGO_BITPOS 31
+#define QAT_COMPRESSION_ALGO_MASK 0x1
+#define QAT_COMPRESSION_DEPTH_BITPOS 28
+#define QAT_COMPRESSION_DEPTH_MASK 0x7
+#define QAT_COMPRESSION_FILE_TYPE_BITPOS 24
+#define QAT_COMPRESSION_FILE_TYPE_MASK 0xF
+
+#define ICP_QAT_HW_COMPRESSION_CONFIG_BUILD(dir, delayed, \
+	algo, depth, filetype) \
+	((((dir) & QAT_COMPRESSION_DIR_MASK) << \
+	QAT_COMPRESSION_DIR_BITPOS) | \
+	(((delayed) & QAT_COMPRESSION_DELAYED_MATCH_MASK) << \
+	QAT_COMPRESSION_DELAYED_MATCH_BITPOS) | \
+	(((algo) & QAT_COMPRESSION_ALGO_MASK) << \
+	QAT_COMPRESSION_ALGO_BITPOS) | \
+	(((depth) & QAT_COMPRESSION_DEPTH_MASK) << \
+	QAT_COMPRESSION_DEPTH_BITPOS) | \
+	(((filetype) & QAT_COMPRESSION_FILE_TYPE_MASK) << \
+	QAT_COMPRESSION_FILE_TYPE_BITPOS))
+
 #endif
diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c
new file mode 100644
index 0000000000000..63fd4ac33dbf3
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation */
+#include <linux/crypto.h>
+#include <crypto/acompress.h>
+#include <crypto/internal/acompress.h>
+#include <crypto/scatterwalk.h>
+#include <linux/dma-mapping.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "qat_bl.h"
+#include "qat_comp_req.h"
+#include "qat_compression.h"
+#include "qat_algs_send.h"
+
+static DEFINE_MUTEX(algs_lock);
+static unsigned int active_devs;
+
+enum direction {
+	DECOMPRESSION = 0,
+	COMPRESSION = 1,
+};
+
+struct qat_compression_ctx {
+	u8 comp_ctx[QAT_COMP_CTX_SIZE];
+	struct qat_compression_instance *inst;
+};
+
+struct qat_compression_req {
+	u8 req[QAT_COMP_REQ_SIZE];
+	struct qat_compression_ctx *qat_compression_ctx;
+	struct acomp_req *acompress_req;
+	struct qat_request_buffs buf;
+	enum direction dir;
+	int actual_dlen;
+	struct qat_alg_req alg_req;
+};
+
+static int qat_alg_send_dc_message(struct qat_compression_req *qat_req,
+				   struct qat_compression_instance *inst,
+				   struct crypto_async_request *base)
+{
+	struct qat_alg_req *alg_req = &qat_req->alg_req;
+
+	alg_req->fw_req = (u32 *)&qat_req->req;
+	alg_req->tx_ring = inst->dc_tx;
+	alg_req->base = base;
+	alg_req->backlog = &inst->backlog;
+
+	return qat_alg_send_message(alg_req);
+}
+
+static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
+				      void *resp)
+{
+	struct acomp_req *areq = qat_req->acompress_req;
+	struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx;
+	struct adf_accel_dev *accel_dev = ctx->inst->accel_dev;
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq);
+	struct qat_compression_instance *inst = ctx->inst;
+	int consumed, produced;
+	s8 cmp_err, xlt_err;
+	int res = -EBADMSG;
+	int status;
+	u8 cnv;
+
+	status = qat_comp_get_cmp_status(resp);
+	status |= qat_comp_get_xlt_status(resp);
+	cmp_err = qat_comp_get_cmp_err(resp);
+	xlt_err = qat_comp_get_xlt_err(resp);
+
+	consumed = qat_comp_get_consumed_ctr(resp);
+	produced = qat_comp_get_produced_ctr(resp);
+
+	dev_dbg(&GET_DEV(accel_dev),
+		"[%s][%s][%s] slen = %8d dlen = %8d consumed = %8d produced = %8d cmp_err = %3d xlt_err = %3d",
+		crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)),
+		qat_req->dir == COMPRESSION ? "comp  " : "decomp",
+		status ? "ERR" : "OK ",
+		areq->slen, areq->dlen, consumed, produced, cmp_err, xlt_err);
+
+	areq->dlen = 0;
+
+	if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
+		goto end;
+
+	if (qat_req->dir == COMPRESSION) {
+		cnv = qat_comp_get_cmp_cnv_flag(resp);
+		if (unlikely(!cnv)) {
+			dev_err(&GET_DEV(accel_dev),
+				"Verified compression not supported\n");
+			goto end;
+		}
+
+		if (unlikely(produced > qat_req->actual_dlen)) {
+			memset(inst->dc_data->ovf_buff, 0,
+			       inst->dc_data->ovf_buff_sz);
+			dev_dbg(&GET_DEV(accel_dev),
+				"Actual buffer overflow: produced=%d, dlen=%d\n",
+				produced, qat_req->actual_dlen);
+			goto end;
+		}
+	}
+
+	res = 0;
+	areq->dlen = produced;
+
+end:
+	qat_bl_free_bufl(accel_dev, &qat_req->buf);
+	areq->base.complete(&areq->base, res);
+}
+
+void qat_comp_alg_callback(void *resp)
+{
+	struct qat_compression_req *qat_req =
+			(void *)(__force long)qat_comp_get_opaque(resp);
+	struct qat_instance_backlog *backlog = qat_req->alg_req.backlog;
+
+	qat_comp_generic_callback(qat_req, resp);
+
+	qat_alg_send_backlog(backlog);
+}
+
+static int qat_comp_alg_init_tfm(struct crypto_acomp *acomp_tfm)
+{
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
+	struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_compression_instance *inst;
+	int node;
+
+	if (tfm->node == NUMA_NO_NODE)
+		node = numa_node_id();
+	else
+		node = tfm->node;
+
+	memset(ctx, 0, sizeof(*ctx));
+	inst = qat_compression_get_instance_node(node);
+	if (!inst)
+		return -EINVAL;
+	ctx->inst = inst;
+
+	ctx->inst->build_deflate_ctx(ctx->comp_ctx);
+
+	return 0;
+}
+
+static void qat_comp_alg_exit_tfm(struct crypto_acomp *acomp_tfm)
+{
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
+	struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	qat_compression_put_instance(ctx->inst);
+	memset(ctx, 0, sizeof(*ctx));
+}
+
+static int qat_comp_alg_compress_decompress(struct acomp_req *areq,
+					    enum direction dir)
+{
+	struct qat_compression_req *qat_req = acomp_request_ctx(areq);
+	struct crypto_acomp *acomp_tfm = crypto_acomp_reqtfm(areq);
+	struct crypto_tfm *tfm = crypto_acomp_tfm(acomp_tfm);
+	struct qat_compression_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct qat_compression_instance *inst = ctx->inst;
+	struct qat_sgl_to_bufl_params *p_params = NULL;
+	gfp_t f = qat_algs_alloc_flags(&areq->base);
+	struct qat_sgl_to_bufl_params params;
+	unsigned int slen = areq->slen;
+	unsigned int dlen = areq->dlen;
+	dma_addr_t sfbuf, dfbuf;
+	u8 *req = qat_req->req;
+	size_t ovf_buff_sz;
+	int ret;
+
+	if (!areq->src || !slen)
+		return -EINVAL;
+
+	if (areq->dst && !dlen)
+		return -EINVAL;
+
+	/* Handle acomp requests that require the allocation of a destination
+	 * buffer. The size of the destination buffer is double the source
+	 * buffer (rounded up to the size of a page) to fit the decompressed
+	 * output or an expansion on the data for compression.
+	 */
+	if (!areq->dst) {
+		dlen = round_up(2 * slen, PAGE_SIZE);
+		areq->dst = sgl_alloc(dlen, f, NULL);
+		if (!areq->dst)
+			return -ENOMEM;
+	}
+
+	if (dir == COMPRESSION) {
+		params.extra_dst_buff = inst->dc_data->ovf_buff_p;
+		ovf_buff_sz = inst->dc_data->ovf_buff_sz;
+		params.sz_extra_dst_buff = ovf_buff_sz;
+		p_params = &params;
+	}
+
+	ret = qat_bl_sgl_to_bufl(ctx->inst->accel_dev, areq->src, areq->dst,
+				 &qat_req->buf, p_params, f);
+	if (unlikely(ret))
+		return ret;
+
+	sfbuf = qat_req->buf.blp;
+	dfbuf = qat_req->buf.bloutp;
+	qat_req->qat_compression_ctx = ctx;
+	qat_req->acompress_req = areq;
+	qat_req->dir = dir;
+
+	if (dir == COMPRESSION) {
+		qat_req->actual_dlen = dlen;
+		dlen += ovf_buff_sz;
+		qat_comp_create_compression_req(ctx->comp_ctx, req,
+						(u64)(__force long)sfbuf, slen,
+						(u64)(__force long)dfbuf, dlen,
+						(u64)(__force long)qat_req);
+	} else {
+		qat_comp_create_decompression_req(ctx->comp_ctx, req,
+						  (u64)(__force long)sfbuf, slen,
+						  (u64)(__force long)dfbuf, dlen,
+						  (u64)(__force long)qat_req);
+	}
+
+	ret = qat_alg_send_dc_message(qat_req, inst, &areq->base);
+	if (ret == -ENOSPC)
+		qat_bl_free_bufl(inst->accel_dev, &qat_req->buf);
+
+	return ret;
+}
+
+static int qat_comp_alg_compress(struct acomp_req *req)
+{
+	return qat_comp_alg_compress_decompress(req, COMPRESSION);
+}
+
+static int qat_comp_alg_decompress(struct acomp_req *req)
+{
+	return qat_comp_alg_compress_decompress(req, DECOMPRESSION);
+}
+
+static struct acomp_alg qat_acomp[] = { {
+	.base = {
+		.cra_name = "deflate",
+		.cra_driver_name = "qat_deflate",
+		.cra_priority = 4001,
+		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
+		.cra_ctxsize = sizeof(struct qat_compression_ctx),
+		.cra_module = THIS_MODULE,
+	},
+	.init = qat_comp_alg_init_tfm,
+	.exit = qat_comp_alg_exit_tfm,
+	.compress = qat_comp_alg_compress,
+	.decompress = qat_comp_alg_decompress,
+	.dst_free = sgl_free,
+	.reqsize = sizeof(struct qat_compression_req),
+} };
+
+int qat_comp_algs_register(void)
+{
+	int ret = 0;
+
+	mutex_lock(&algs_lock);
+	if (++active_devs == 1)
+		ret = crypto_register_acomps(qat_acomp, ARRAY_SIZE(qat_acomp));
+	mutex_unlock(&algs_lock);
+	return ret;
+}
+
+void qat_comp_algs_unregister(void)
+{
+	mutex_lock(&algs_lock);
+	if (--active_devs == 0)
+		crypto_unregister_acomps(qat_acomp, ARRAY_SIZE(qat_acomp));
+	mutex_unlock(&algs_lock);
+}
diff --git a/drivers/crypto/qat/qat_common/qat_comp_req.h b/drivers/crypto/qat/qat_common/qat_comp_req.h
new file mode 100644
index 0000000000000..18a1f33a6db98
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_comp_req.h
@@ -0,0 +1,113 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef _QAT_COMP_REQ_H_
+#define _QAT_COMP_REQ_H_
+
+#include "icp_qat_fw_comp.h"
+
+#define QAT_COMP_REQ_SIZE (sizeof(struct icp_qat_fw_comp_req))
+#define QAT_COMP_CTX_SIZE (QAT_COMP_REQ_SIZE * 2)
+
+static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen,
+				       u64 dst, u32 dlen, u64 opaque)
+{
+	struct icp_qat_fw_comp_req *fw_tmpl = ctx;
+	struct icp_qat_fw_comp_req *fw_req = req;
+	struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars;
+
+	memcpy(fw_req, fw_tmpl, sizeof(*fw_req));
+	fw_req->comn_mid.src_data_addr = src;
+	fw_req->comn_mid.src_length = slen;
+	fw_req->comn_mid.dest_data_addr = dst;
+	fw_req->comn_mid.dst_length = dlen;
+	fw_req->comn_mid.opaque_data = opaque;
+	req_pars->comp_len = slen;
+	req_pars->out_buffer_sz = dlen;
+}
+
+static inline void qat_comp_create_compression_req(void *ctx, void *req,
+						   u64 src, u32 slen,
+						   u64 dst, u32 dlen,
+						   u64 opaque)
+{
+	qat_comp_create_req(ctx, req, src, slen, dst, dlen, opaque);
+}
+
+static inline void qat_comp_create_decompression_req(void *ctx, void *req,
+						     u64 src, u32 slen,
+						     u64 dst, u32 dlen,
+						     u64 opaque)
+{
+	struct icp_qat_fw_comp_req *fw_tmpl = ctx;
+
+	fw_tmpl++;
+	qat_comp_create_req(fw_tmpl, req, src, slen, dst, dlen, opaque);
+}
+
+static inline u32 qat_comp_get_consumed_ctr(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->comp_resp_pars.input_byte_counter;
+}
+
+static inline u32 qat_comp_get_produced_ctr(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->comp_resp_pars.output_byte_counter;
+}
+
+static inline u32 qat_comp_get_produced_adler32(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->comp_resp_pars.crc.legacy.curr_adler_32;
+}
+
+static inline u64 qat_comp_get_opaque(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->opaque_data;
+}
+
+static inline s8 qat_comp_get_cmp_err(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->comn_resp.comn_error.cmp_err_code;
+}
+
+static inline s8 qat_comp_get_xlt_err(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+
+	return qat_resp->comn_resp.comn_error.xlat_err_code;
+}
+
+static inline s8 qat_comp_get_cmp_status(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+	u8 stat_filed = qat_resp->comn_resp.comn_status;
+
+	return ICP_QAT_FW_COMN_RESP_CMP_STAT_GET(stat_filed);
+}
+
+static inline s8 qat_comp_get_xlt_status(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+	u8 stat_filed = qat_resp->comn_resp.comn_status;
+
+	return ICP_QAT_FW_COMN_RESP_XLAT_STAT_GET(stat_filed);
+}
+
+static inline u8 qat_comp_get_cmp_cnv_flag(void *resp)
+{
+	struct icp_qat_fw_comp_resp *qat_resp = resp;
+	u8 flags = qat_resp->comn_resp.hdr_flags;
+
+	return ICP_QAT_FW_COMN_HDR_CNV_FLAG_GET(flags);
+}
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/qat_compression.c b/drivers/crypto/qat/qat_common/qat_compression.c
new file mode 100644
index 0000000000000..9fd10f4242f84
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_compression.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation */
+#include <linux/module.h>
+#include <linux/slab.h>
+#include "adf_accel_devices.h"
+#include "adf_common_drv.h"
+#include "adf_transport.h"
+#include "adf_transport_access_macros.h"
+#include "adf_cfg.h"
+#include "adf_cfg_strings.h"
+#include "qat_compression.h"
+#include "icp_qat_fw.h"
+
+#define SEC ADF_KERNEL_SEC
+
+static struct service_hndl qat_compression;
+
+void qat_compression_put_instance(struct qat_compression_instance *inst)
+{
+	atomic_dec(&inst->refctr);
+	adf_dev_put(inst->accel_dev);
+}
+
+static int qat_compression_free_instances(struct adf_accel_dev *accel_dev)
+{
+	struct qat_compression_instance *inst;
+	struct list_head *list_ptr, *tmp;
+	int i;
+
+	list_for_each_safe(list_ptr, tmp, &accel_dev->compression_list) {
+		inst = list_entry(list_ptr,
+				  struct qat_compression_instance, list);
+
+		for (i = 0; i < atomic_read(&inst->refctr); i++)
+			qat_compression_put_instance(inst);
+
+		if (inst->dc_tx)
+			adf_remove_ring(inst->dc_tx);
+
+		if (inst->dc_rx)
+			adf_remove_ring(inst->dc_rx);
+
+		list_del(list_ptr);
+		kfree(inst);
+	}
+	return 0;
+}
+
+struct qat_compression_instance *qat_compression_get_instance_node(int node)
+{
+	struct qat_compression_instance *inst = NULL;
+	struct adf_accel_dev *accel_dev = NULL;
+	unsigned long best = ~0;
+	struct list_head *itr;
+
+	list_for_each(itr, adf_devmgr_get_head()) {
+		struct adf_accel_dev *tmp_dev;
+		unsigned long ctr;
+		int tmp_dev_node;
+
+		tmp_dev = list_entry(itr, struct adf_accel_dev, list);
+		tmp_dev_node = dev_to_node(&GET_DEV(tmp_dev));
+
+		if ((node == tmp_dev_node || tmp_dev_node < 0) &&
+		    adf_dev_started(tmp_dev) && !list_empty(&tmp_dev->compression_list)) {
+			ctr = atomic_read(&tmp_dev->ref_count);
+			if (best > ctr) {
+				accel_dev = tmp_dev;
+				best = ctr;
+			}
+		}
+	}
+
+	if (!accel_dev) {
+		pr_info("QAT: Could not find a device on node %d\n", node);
+		/* Get any started device */
+		list_for_each(itr, adf_devmgr_get_head()) {
+			struct adf_accel_dev *tmp_dev;
+
+			tmp_dev = list_entry(itr, struct adf_accel_dev, list);
+			if (adf_dev_started(tmp_dev) &&
+			    !list_empty(&tmp_dev->compression_list)) {
+				accel_dev = tmp_dev;
+				break;
+			}
+		}
+	}
+
+	if (!accel_dev)
+		return NULL;
+
+	best = ~0;
+	list_for_each(itr, &accel_dev->compression_list) {
+		struct qat_compression_instance *tmp_inst;
+		unsigned long ctr;
+
+		tmp_inst = list_entry(itr, struct qat_compression_instance, list);
+		ctr = atomic_read(&tmp_inst->refctr);
+		if (best > ctr) {
+			inst = tmp_inst;
+			best = ctr;
+		}
+	}
+	if (inst) {
+		if (adf_dev_get(accel_dev)) {
+			dev_err(&GET_DEV(accel_dev), "Could not increment dev refctr\n");
+			return NULL;
+		}
+		atomic_inc(&inst->refctr);
+	}
+	return inst;
+}
+
+static int qat_compression_create_instances(struct adf_accel_dev *accel_dev)
+{
+	struct qat_compression_instance *inst;
+	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
+	char val[ADF_CFG_MAX_VAL_LEN_IN_BYTES];
+	unsigned long num_inst, num_msg_dc;
+	unsigned long bank;
+	int msg_size;
+	int ret;
+	int i;
+
+	INIT_LIST_HEAD(&accel_dev->compression_list);
+	strscpy(key, ADF_NUM_DC, sizeof(key));
+	ret = adf_cfg_get_param_value(accel_dev, SEC, key, val);
+	if (ret)
+		return ret;
+
+	ret = kstrtoul(val, 10, &num_inst);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < num_inst; i++) {
+		inst = kzalloc_node(sizeof(*inst), GFP_KERNEL,
+				    dev_to_node(&GET_DEV(accel_dev)));
+		if (!inst) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		list_add_tail(&inst->list, &accel_dev->compression_list);
+		inst->id = i;
+		atomic_set(&inst->refctr, 0);
+		inst->accel_dev = accel_dev;
+		inst->build_deflate_ctx = GET_DC_OPS(accel_dev)->build_deflate_ctx;
+
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i);
+		ret = adf_cfg_get_param_value(accel_dev, SEC, key, val);
+		if (ret)
+			return ret;
+
+		ret = kstrtoul(val, 10, &bank);
+		if (ret)
+			return ret;
+
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i);
+		ret = adf_cfg_get_param_value(accel_dev, SEC, key, val);
+		if (ret)
+			return ret;
+
+		ret = kstrtoul(val, 10, &num_msg_dc);
+		if (ret)
+			return ret;
+
+		msg_size = ICP_QAT_FW_REQ_DEFAULT_SZ;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i);
+		ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc,
+				      msg_size, key, NULL, 0, &inst->dc_tx);
+		if (ret)
+			return ret;
+
+		msg_size = ICP_QAT_FW_RESP_DEFAULT_SZ;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i);
+		ret = adf_create_ring(accel_dev, SEC, bank, num_msg_dc,
+				      msg_size, key, qat_comp_alg_callback, 0,
+				      &inst->dc_rx);
+		if (ret)
+			return ret;
+
+		inst->dc_data = accel_dev->dc_data;
+		INIT_LIST_HEAD(&inst->backlog.list);
+		spin_lock_init(&inst->backlog.lock);
+	}
+	return 0;
+err:
+	qat_compression_free_instances(accel_dev);
+	return ret;
+}
+
+static int qat_compression_alloc_dc_data(struct adf_accel_dev *accel_dev)
+{
+	struct device *dev = &GET_DEV(accel_dev);
+	dma_addr_t obuff_p = DMA_MAPPING_ERROR;
+	size_t ovf_buff_sz = QAT_COMP_MAX_SKID;
+	struct adf_dc_data *dc_data = NULL;
+	u8 *obuff = NULL;
+
+	dc_data = devm_kzalloc(dev, sizeof(*dc_data), GFP_KERNEL);
+	if (!dc_data)
+		goto err;
+
+	obuff = kzalloc_node(ovf_buff_sz, GFP_KERNEL, dev_to_node(dev));
+	if (!obuff)
+		goto err;
+
+	obuff_p = dma_map_single(dev, obuff, ovf_buff_sz, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, obuff_p)))
+		goto err;
+
+	dc_data->ovf_buff = obuff;
+	dc_data->ovf_buff_p = obuff_p;
+	dc_data->ovf_buff_sz = ovf_buff_sz;
+
+	accel_dev->dc_data = dc_data;
+
+	return 0;
+
+err:
+	accel_dev->dc_data = NULL;
+	kfree(obuff);
+	devm_kfree(dev, dc_data);
+	return -ENOMEM;
+}
+
+static void qat_free_dc_data(struct adf_accel_dev *accel_dev)
+{
+	struct adf_dc_data *dc_data = accel_dev->dc_data;
+	struct device *dev = &GET_DEV(accel_dev);
+
+	if (!dc_data)
+		return;
+
+	dma_unmap_single(dev, dc_data->ovf_buff_p, dc_data->ovf_buff_sz,
+			 DMA_FROM_DEVICE);
+	memset(dc_data->ovf_buff, 0, dc_data->ovf_buff_sz);
+	kfree(dc_data->ovf_buff);
+	devm_kfree(dev, dc_data);
+	accel_dev->dc_data = NULL;
+}
+
+static int qat_compression_init(struct adf_accel_dev *accel_dev)
+{
+	int ret;
+
+	ret = qat_compression_alloc_dc_data(accel_dev);
+	if (ret)
+		return ret;
+
+	ret = qat_compression_create_instances(accel_dev);
+	if (ret)
+		qat_free_dc_data(accel_dev);
+
+	return ret;
+}
+
+static int qat_compression_shutdown(struct adf_accel_dev *accel_dev)
+{
+	qat_free_dc_data(accel_dev);
+	return qat_compression_free_instances(accel_dev);
+}
+
+static int qat_compression_event_handler(struct adf_accel_dev *accel_dev,
+					 enum adf_event event)
+{
+	int ret;
+
+	switch (event) {
+	case ADF_EVENT_INIT:
+		ret = qat_compression_init(accel_dev);
+		break;
+	case ADF_EVENT_SHUTDOWN:
+		ret = qat_compression_shutdown(accel_dev);
+		break;
+	case ADF_EVENT_RESTARTING:
+	case ADF_EVENT_RESTARTED:
+	case ADF_EVENT_START:
+	case ADF_EVENT_STOP:
+	default:
+		ret = 0;
+	}
+	return ret;
+}
+
+int qat_compression_register(void)
+{
+	memset(&qat_compression, 0, sizeof(qat_compression));
+	qat_compression.event_hld = qat_compression_event_handler;
+	qat_compression.name = "qat_compression";
+	return adf_service_register(&qat_compression);
+}
+
+int qat_compression_unregister(void)
+{
+	return adf_service_unregister(&qat_compression);
+}
diff --git a/drivers/crypto/qat/qat_common/qat_compression.h b/drivers/crypto/qat/qat_common/qat_compression.h
new file mode 100644
index 0000000000000..aebac2302dcf2
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/qat_compression.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef _QAT_COMPRESSION_H_
+#define _QAT_COMPRESSION_H_
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include "adf_accel_devices.h"
+#include "qat_algs_send.h"
+
+#define QAT_COMP_MAX_SKID 4096
+
+struct qat_compression_instance {
+	struct adf_etr_ring_data *dc_tx;
+	struct adf_etr_ring_data *dc_rx;
+	struct adf_accel_dev *accel_dev;
+	struct list_head list;
+	unsigned long state;
+	int id;
+	atomic_t refctr;
+	struct qat_instance_backlog backlog;
+	struct adf_dc_data *dc_data;
+	void (*build_deflate_ctx)(void *ctx);
+};
+
+static inline bool adf_hw_dev_has_compression(struct adf_accel_dev *accel_dev)
+{
+	struct adf_hw_device_data *hw_device = accel_dev->hw_device;
+	u32 mask = ~hw_device->accel_capabilities_mask;
+
+	if (mask & ADF_ACCEL_CAPABILITIES_COMPRESSION)
+		return false;
+
+	return true;
+}
+
+#endif
diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
index baacf817abf68..bc80bb475118d 100644
--- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include "adf_dh895xcc_hw_data.h"
@@ -242,6 +243,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
 	hw_data->pfvf_ops.disable_all_vf2pf_interrupts = disable_all_vf2pf_interrupts;
 	hw_data->pfvf_ops.disable_pending_vf2pf_interrupts = disable_pending_vf2pf_interrupts;
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_dh895xcc(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
index b933a00fb91b8..70e56cc16eceb 100644
--- a/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
+++ b/drivers/crypto/qat/qat_dh895xccvf/adf_dh895xccvf_hw_data.c
@@ -3,6 +3,7 @@
 #include <adf_accel_devices.h>
 #include <adf_common_drv.h>
 #include <adf_gen2_config.h>
+#include <adf_gen2_dc.h>
 #include <adf_gen2_hw_data.h>
 #include <adf_gen2_pfvf.h>
 #include <adf_pfvf_vf_msg.h>
@@ -91,6 +92,7 @@ void adf_init_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
 	adf_devmgr_update_class_index(hw_data);
 	adf_gen2_init_vf_pfvf_ops(&hw_data->pfvf_ops);
 	adf_gen2_init_hw_csr_ops(&hw_data->csr_ops);
+	adf_gen2_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_dh895xcciov(struct adf_hw_device_data *hw_data)
-- 
GitLab


From 5b14b2b307e4045b38a4961718cbe9c17cef2bf4 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:21 +0000
Subject: [PATCH 3564/4122] crypto: qat - enable deflate for QAT GEN4

Enable deflate for QAT GEN4 devices.

This adds
  (1) logic to create configuration entries at probe time for the
  compression instances for QAT GEN4 devices;
  (2) the implementation of QAT GEN4 specific compression operations,
  required since the creation of the compression request template is
  different between GEN2 and GEN4; and
  (3) updates to the firmware API related to compression for GEN4.

The implementation configures the device to produce data compressed
dynamically, optimized for throughput over compression ratio.

Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Reviewed-by: Wojciech Ziemba <wojciech.ziemba@intel.com>
Reviewed-by: Adam Guerin <adam.guerin@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.c    |   4 +-
 .../crypto/qat/qat_4xxx/adf_4xxx_hw_data.h    |   2 +-
 drivers/crypto/qat/qat_4xxx/adf_drv.c         | 139 +++++++-
 drivers/crypto/qat/qat_common/Makefile        |   1 +
 drivers/crypto/qat/qat_common/adf_gen4_dc.c   |  83 +++++
 drivers/crypto/qat/qat_common/adf_gen4_dc.h   |  10 +
 .../qat/qat_common/icp_qat_hw_20_comp.h       | 164 ++++++++++
 .../qat/qat_common/icp_qat_hw_20_comp_defs.h  | 300 ++++++++++++++++++
 8 files changed, 689 insertions(+), 14 deletions(-)
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_dc.c
 create mode 100644 drivers/crypto/qat/qat_common/adf_gen4_dc.h
 create mode 100644 drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h
 create mode 100644 drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h

diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
index fda5f699ff575..834a705180c02 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.c
@@ -4,6 +4,7 @@
 #include <adf_accel_devices.h>
 #include <adf_cfg.h>
 #include <adf_common_drv.h>
+#include <adf_gen4_dc.h>
 #include <adf_gen4_hw_data.h>
 #include <adf_gen4_pfvf.h>
 #include <adf_gen4_pm.h>
@@ -357,10 +358,11 @@ void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data)
 	hw_data->ring_pair_reset = adf_gen4_ring_pair_reset;
 	hw_data->enable_pm = adf_gen4_enable_pm;
 	hw_data->handle_pm_interrupt = adf_gen4_handle_pm_interrupt;
-	hw_data->dev_config = adf_crypto_dev_config;
+	hw_data->dev_config = adf_gen4_dev_config;
 
 	adf_gen4_init_hw_csr_ops(&hw_data->csr_ops);
 	adf_gen4_init_pf_pfvf_ops(&hw_data->pfvf_ops);
+	adf_gen4_init_dc_ops(&hw_data->dc_ops);
 }
 
 void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data)
diff --git a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
index 9d49248931f6a..e98428ba78e29 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
+++ b/drivers/crypto/qat/qat_4xxx/adf_4xxx_hw_data.h
@@ -70,6 +70,6 @@ enum icp_qat_4xxx_slice_mask {
 
 void adf_init_hw_data_4xxx(struct adf_hw_device_data *hw_data);
 void adf_clean_hw_data_4xxx(struct adf_hw_device_data *hw_data);
-int adf_crypto_dev_config(struct adf_accel_dev *accel_dev);
+int adf_gen4_dev_config(struct adf_accel_dev *accel_dev);
 
 #endif
diff --git a/drivers/crypto/qat/qat_4xxx/adf_drv.c b/drivers/crypto/qat/qat_4xxx/adf_drv.c
index 8496c451b48e1..b3a4c7b238644 100644
--- a/drivers/crypto/qat/qat_4xxx/adf_drv.c
+++ b/drivers/crypto/qat/qat_4xxx/adf_drv.c
@@ -9,6 +9,7 @@
 #include <adf_common_drv.h>
 
 #include "adf_4xxx_hw_data.h"
+#include "qat_compression.h"
 #include "qat_crypto.h"
 #include "adf_transport_access_macros.h"
 
@@ -19,6 +20,16 @@ static const struct pci_device_id adf_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, adf_pci_tbl);
 
+enum configs {
+	DEV_CFG_CY = 0,
+	DEV_CFG_DC,
+};
+
+static const char * const services_operations[] = {
+	ADF_CFG_CY,
+	ADF_CFG_DC,
+};
+
 static void adf_cleanup_accel(struct adf_accel_dev *accel_dev)
 {
 	if (accel_dev->hw_device) {
@@ -53,7 +64,7 @@ static int adf_cfg_dev_init(struct adf_accel_dev *accel_dev)
 	return 0;
 }
 
-int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
+static int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 {
 	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
 	int banks = GET_MAX_BANKS(accel_dev);
@@ -68,14 +79,6 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	else
 		instances = 0;
 
-	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
-	if (ret)
-		goto err;
-
-	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
-	if (ret)
-		goto err;
-
 	for (i = 0; i < instances; i++) {
 		val = i;
 		bank = i * 2;
@@ -161,10 +164,122 @@ int adf_crypto_dev_config(struct adf_accel_dev *accel_dev)
 	if (ret)
 		goto err;
 
-	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
 	return 0;
 err:
-	dev_err(&GET_DEV(accel_dev), "Failed to start QAT accel dev\n");
+	dev_err(&GET_DEV(accel_dev), "Failed to add configuration for crypto\n");
+	return ret;
+}
+
+static int adf_comp_dev_config(struct adf_accel_dev *accel_dev)
+{
+	char key[ADF_CFG_MAX_KEY_LEN_IN_BYTES];
+	int banks = GET_MAX_BANKS(accel_dev);
+	int cpus = num_online_cpus();
+	unsigned long val;
+	int instances;
+	int ret;
+	int i;
+
+	if (adf_hw_dev_has_compression(accel_dev))
+		instances = min(cpus, banks);
+	else
+		instances = 0;
+
+	for (i = 0; i < instances; i++) {
+		val = i;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_BANK_NUM, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 512;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_SIZE, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 0;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_TX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = 1;
+		snprintf(key, sizeof(key), ADF_DC "%d" ADF_RING_DC_RX, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC,
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+
+		val = ADF_COALESCING_DEF_TIME;
+		snprintf(key, sizeof(key), ADF_ETRMGR_COALESCE_TIMER_FORMAT, i);
+		ret = adf_cfg_add_key_value_param(accel_dev, "Accelerator0",
+						  key, &val, ADF_DEC);
+		if (ret)
+			goto err;
+	}
+
+	val = i;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_DC,
+					  &val, ADF_DEC);
+	if (ret)
+		goto err;
+
+	val = 0;
+	ret = adf_cfg_add_key_value_param(accel_dev, ADF_KERNEL_SEC, ADF_NUM_CY,
+					  &val, ADF_DEC);
+	if (ret)
+		goto err;
+
+	return 0;
+err:
+	dev_err(&GET_DEV(accel_dev), "Failed to add configuration for compression\n");
+	return ret;
+}
+
+int adf_gen4_dev_config(struct adf_accel_dev *accel_dev)
+{
+	char services[ADF_CFG_MAX_VAL_LEN_IN_BYTES] = {0};
+	int ret;
+
+	ret = adf_cfg_section_add(accel_dev, ADF_KERNEL_SEC);
+	if (ret)
+		goto err;
+
+	ret = adf_cfg_section_add(accel_dev, "Accelerator0");
+	if (ret)
+		goto err;
+
+	ret = adf_cfg_get_param_value(accel_dev, ADF_GENERAL_SEC,
+				      ADF_SERVICES_ENABLED, services);
+	if (ret)
+		goto err;
+
+	ret = sysfs_match_string(services_operations, services);
+	if (ret < 0)
+		goto err;
+
+	switch (ret) {
+	case DEV_CFG_CY:
+		ret = adf_crypto_dev_config(accel_dev);
+		break;
+	case DEV_CFG_DC:
+		ret = adf_comp_dev_config(accel_dev);
+		break;
+	}
+
+	if (ret)
+		goto err;
+
+	set_bit(ADF_STATUS_CONFIGURED, &accel_dev->status);
+
+	return ret;
+
+err:
+	dev_err(&GET_DEV(accel_dev), "Failed to configure QAT driver\n");
 	return ret;
 }
 
@@ -300,7 +415,7 @@ static int adf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto out_err_disable_aer;
 
-	ret = adf_crypto_dev_config(accel_dev);
+	ret = hw_data->dev_config(accel_dev);
 	if (ret)
 		goto out_err_disable_aer;
 
diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile
index e3db4786738f7..1fb8d50f509f8 100644
--- a/drivers/crypto/qat/qat_common/Makefile
+++ b/drivers/crypto/qat/qat_common/Makefile
@@ -16,6 +16,7 @@ intel_qat-objs := adf_cfg.o \
 	adf_gen4_hw_data.o \
 	adf_gen4_pm.o \
 	adf_gen2_dc.o \
+	adf_gen4_dc.o \
 	qat_crypto.o \
 	qat_compression.o \
 	qat_comp_algs.o \
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.c b/drivers/crypto/qat/qat_common/adf_gen4_dc.c
new file mode 100644
index 0000000000000..5859238e37de5
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation */
+#include "adf_accel_devices.h"
+#include "icp_qat_fw_comp.h"
+#include "icp_qat_hw_20_comp.h"
+#include "adf_gen4_dc.h"
+
+static void qat_comp_build_deflate(void *ctx)
+{
+	struct icp_qat_fw_comp_req *req_tmpl =
+				(struct icp_qat_fw_comp_req *)ctx;
+	struct icp_qat_fw_comn_req_hdr *header = &req_tmpl->comn_hdr;
+	struct icp_qat_fw_comp_req_hdr_cd_pars *cd_pars = &req_tmpl->cd_pars;
+	struct icp_qat_fw_comp_req_params *req_pars = &req_tmpl->comp_pars;
+	struct icp_qat_hw_comp_20_config_csr_upper hw_comp_upper_csr = {0};
+	struct icp_qat_hw_comp_20_config_csr_lower hw_comp_lower_csr = {0};
+	struct icp_qat_hw_decomp_20_config_csr_lower hw_decomp_lower_csr = {0};
+	u32 upper_val;
+	u32 lower_val;
+
+	memset(req_tmpl, 0, sizeof(*req_tmpl));
+	header->hdr_flags =
+		ICP_QAT_FW_COMN_HDR_FLAGS_BUILD(ICP_QAT_FW_COMN_REQ_FLAG_SET);
+	header->service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_COMP;
+	header->service_cmd_id = ICP_QAT_FW_COMP_CMD_STATIC;
+	header->comn_req_flags =
+		ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_CD_FLD_TYPE_16BYTE_DATA,
+					    QAT_COMN_PTR_TYPE_SGL);
+	header->serv_specif_flags =
+		ICP_QAT_FW_COMP_FLAGS_BUILD(ICP_QAT_FW_COMP_STATELESS_SESSION,
+					    ICP_QAT_FW_COMP_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_NOT_ENH_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_NOT_DISABLE_TYPE0_ENH_AUTO_SELECT_BEST,
+					    ICP_QAT_FW_COMP_ENABLE_SECURE_RAM_USED_AS_INTMD_BUF);
+	hw_comp_lower_csr.skip_ctrl = ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL;
+	hw_comp_lower_csr.algo = ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77;
+	hw_comp_lower_csr.lllbd = ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED;
+	hw_comp_lower_csr.sd = ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1;
+	hw_comp_lower_csr.hash_update = ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW;
+	hw_comp_lower_csr.edmm = ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED;
+	hw_comp_upper_csr.nice = ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL;
+	hw_comp_upper_csr.lazy = ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL;
+
+	upper_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(hw_comp_upper_csr);
+	lower_val = ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(hw_comp_lower_csr);
+
+	cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+	cd_pars->u.sl.comp_slice_cfg_word[1] = upper_val;
+
+	req_pars->crc.legacy.initial_adler = COMP_CPR_INITIAL_ADLER;
+	req_pars->crc.legacy.initial_crc32 = COMP_CPR_INITIAL_CRC;
+	req_pars->req_par_flags =
+		ICP_QAT_FW_COMP_REQ_PARAM_FLAGS_BUILD(ICP_QAT_FW_COMP_SOP,
+						      ICP_QAT_FW_COMP_EOP,
+						      ICP_QAT_FW_COMP_BFINAL,
+						      ICP_QAT_FW_COMP_CNV,
+						      ICP_QAT_FW_COMP_CNV_RECOVERY,
+						      ICP_QAT_FW_COMP_NO_CNV_DFX,
+						      ICP_QAT_FW_COMP_CRC_MODE_LEGACY,
+						      ICP_QAT_FW_COMP_NO_XXHASH_ACC,
+						      ICP_QAT_FW_COMP_CNV_ERROR_NONE,
+						      ICP_QAT_FW_COMP_NO_APPEND_CRC,
+						      ICP_QAT_FW_COMP_NO_DROP_DATA);
+
+	/* Fill second half of the template for decompression */
+	memcpy(req_tmpl + 1, req_tmpl, sizeof(*req_tmpl));
+	req_tmpl++;
+	header = &req_tmpl->comn_hdr;
+	header->service_cmd_id = ICP_QAT_FW_COMP_CMD_DECOMPRESS;
+	cd_pars = &req_tmpl->cd_pars;
+
+	hw_decomp_lower_csr.algo = ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE;
+	lower_val = ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(hw_decomp_lower_csr);
+
+	cd_pars->u.sl.comp_slice_cfg_word[0] = lower_val;
+	cd_pars->u.sl.comp_slice_cfg_word[1] = 0;
+}
+
+void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops)
+{
+	dc_ops->build_deflate_ctx = qat_comp_build_deflate;
+}
+EXPORT_SYMBOL_GPL(adf_gen4_init_dc_ops);
diff --git a/drivers/crypto/qat/qat_common/adf_gen4_dc.h b/drivers/crypto/qat/qat_common/adf_gen4_dc.h
new file mode 100644
index 0000000000000..0b1a6774412eb
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/adf_gen4_dc.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef ADF_GEN4_DC_H
+#define ADF_GEN4_DC_H
+
+#include "adf_accel_devices.h"
+
+void adf_gen4_init_dc_ops(struct adf_dc_ops *dc_ops);
+
+#endif /* ADF_GEN4_DC_H */
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h
new file mode 100644
index 0000000000000..7ea8962272f2f
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef _ICP_QAT_HW_20_COMP_H_
+#define _ICP_QAT_HW_20_COMP_H_
+
+#include "icp_qat_hw_20_comp_defs.h"
+#include "icp_qat_fw.h"
+
+struct icp_qat_hw_comp_20_config_csr_lower {
+	enum icp_qat_hw_comp_20_extended_delay_match_mode edmm;
+	enum icp_qat_hw_comp_20_hw_comp_format algo;
+	enum icp_qat_hw_comp_20_search_depth sd;
+	enum icp_qat_hw_comp_20_hbs_control hbs;
+	enum icp_qat_hw_comp_20_abd abd;
+	enum icp_qat_hw_comp_20_lllbd_ctrl lllbd;
+	enum icp_qat_hw_comp_20_min_match_control mmctrl;
+	enum icp_qat_hw_comp_20_skip_hash_collision hash_col;
+	enum icp_qat_hw_comp_20_skip_hash_update hash_update;
+	enum icp_qat_hw_comp_20_byte_skip skip_ctrl;
+};
+
+static inline __u32
+ICP_QAT_FW_COMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_comp_20_config_csr_lower csr)
+{
+	u32 val32 = 0;
+
+	QAT_FIELD_SET(val32, csr.algo,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK);
+	QAT_FIELD_SET(val32, csr.sd,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK);
+	QAT_FIELD_SET(val32, csr.edmm,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK);
+	QAT_FIELD_SET(val32, csr.hbs,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.lllbd,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK);
+	QAT_FIELD_SET(val32, csr.mmctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.hash_col,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK);
+	QAT_FIELD_SET(val32, csr.hash_update,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK);
+	QAT_FIELD_SET(val32, csr.skip_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK);
+	QAT_FIELD_SET(val32, csr.abd, ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK);
+
+	return __builtin_bswap32(val32);
+}
+
+struct icp_qat_hw_comp_20_config_csr_upper {
+	enum icp_qat_hw_comp_20_scb_control scb_ctrl;
+	enum icp_qat_hw_comp_20_rmb_control rmb_ctrl;
+	enum icp_qat_hw_comp_20_som_control som_ctrl;
+	enum icp_qat_hw_comp_20_skip_hash_rd_control skip_hash_ctrl;
+	enum icp_qat_hw_comp_20_scb_unload_control scb_unload_ctrl;
+	enum icp_qat_hw_comp_20_disable_token_fusion_control disable_token_fusion_ctrl;
+	enum icp_qat_hw_comp_20_lbms lbms;
+	enum icp_qat_hw_comp_20_scb_mode_reset_mask scb_mode_reset;
+	__u16 lazy;
+	__u16 nice;
+};
+
+static inline __u32
+ICP_QAT_FW_COMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_comp_20_config_csr_upper csr)
+{
+	u32 val32 = 0;
+
+	QAT_FIELD_SET(val32, csr.scb_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.rmb_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.som_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.skip_hash_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.scb_unload_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.disable_token_fusion_ctrl,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.lbms,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK);
+	QAT_FIELD_SET(val32, csr.scb_mode_reset,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK);
+	QAT_FIELD_SET(val32, csr.lazy,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK);
+	QAT_FIELD_SET(val32, csr.nice,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS,
+		      ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK);
+
+	return __builtin_bswap32(val32);
+}
+
+struct icp_qat_hw_decomp_20_config_csr_lower {
+	enum icp_qat_hw_decomp_20_hbs_control hbs;
+	enum icp_qat_hw_decomp_20_lbms lbms;
+	enum icp_qat_hw_decomp_20_hw_comp_format algo;
+	enum icp_qat_hw_decomp_20_min_match_control mmctrl;
+	enum icp_qat_hw_decomp_20_lz4_block_checksum_present lbc;
+};
+
+static inline __u32
+ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_LOWER(struct icp_qat_hw_decomp_20_config_csr_lower csr)
+{
+	u32 val32 = 0;
+
+	QAT_FIELD_SET(val32, csr.hbs,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.lbms,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK);
+	QAT_FIELD_SET(val32, csr.algo,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK);
+	QAT_FIELD_SET(val32, csr.mmctrl,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.lbc,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK);
+
+	return __builtin_bswap32(val32);
+}
+
+struct icp_qat_hw_decomp_20_config_csr_upper {
+	enum icp_qat_hw_decomp_20_speculative_decoder_control sdc;
+	enum icp_qat_hw_decomp_20_mini_cam_control mcc;
+};
+
+static inline __u32
+ICP_QAT_FW_DECOMP_20_BUILD_CONFIG_UPPER(struct icp_qat_hw_decomp_20_config_csr_upper csr)
+{
+	u32 val32 = 0;
+
+	QAT_FIELD_SET(val32, csr.sdc,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK);
+	QAT_FIELD_SET(val32, csr.mcc,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS,
+		      ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK);
+
+	return __builtin_bswap32(val32);
+}
+
+#endif
diff --git a/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h
new file mode 100644
index 0000000000000..208d4554283bf
--- /dev/null
+++ b/drivers/crypto/qat/qat_common/icp_qat_hw_20_comp_defs.h
@@ -0,0 +1,300 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2022 Intel Corporation */
+#ifndef _ICP_QAT_HW_20_COMP_DEFS_H
+#define _ICP_QAT_HW_20_COMP_DEFS_H
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_BITPOS 31
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_scb_control {
+	ICP_QAT_HW_COMP_20_SCB_CONTROL_ENABLE = 0x0,
+	ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SCB_CONTROL_DISABLE
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_BITPOS 30
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_rmb_control {
+	ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL = 0x0,
+	ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_FC_ONLY = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_RMB_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_RMB_CONTROL_RESET_ALL
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_BITPOS 28
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_MASK 0x3
+
+enum icp_qat_hw_comp_20_som_control {
+	ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE = 0x0,
+	ICP_QAT_HW_COMP_20_SOM_CONTROL_REPLAY_MODE = 0x1,
+	ICP_QAT_HW_COMP_20_SOM_CONTROL_INPUT_CRC = 0x2,
+	ICP_QAT_HW_COMP_20_SOM_CONTROL_RESERVED_MODE = 0x3,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SOM_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SOM_CONTROL_NORMAL_MODE
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_BITPOS 27
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_skip_hash_rd_control {
+	ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP = 0x0,
+	ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_SKIP_HASH_READS = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_RD_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SKIP_HASH_RD_CONTROL_NO_SKIP
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_BITPOS 26
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_scb_unload_control {
+	ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD = 0x0,
+	ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_NO_UNLOAD = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_UNLOAD_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SCB_UNLOAD_CONTROL_UNLOAD
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_BITPOS 21
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_disable_token_fusion_control {
+	ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE = 0x0,
+	ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_DISABLE = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_DISABLE_TOKEN_FUSION_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_DISABLE_TOKEN_FUSION_CONTROL_ENABLE
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_BITPOS 19
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_MASK 0x3
+
+enum icp_qat_hw_comp_20_lbms {
+	ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB = 0x0,
+	ICP_QAT_HW_COMP_20_LBMS_LBMS_256KB = 0x1,
+	ICP_QAT_HW_COMP_20_LBMS_LBMS_1MB = 0x2,
+	ICP_QAT_HW_COMP_20_LBMS_LBMS_4MB = 0x3,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_LBMS_LBMS_64KB
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_BITPOS 18
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_MASK 0x1
+
+enum icp_qat_hw_comp_20_scb_mode_reset_mask {
+	ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS = 0x0,
+	ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS_AND_HISTORY = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SCB_MODE_RESET_MASK_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SCB_MODE_RESET_MASK_RESET_COUNTERS
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_BITPOS 9
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_MASK 0x1ff
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LAZY_PARAM_DEFAULT_VAL 258
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_BITPOS 0
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_MASK 0x1ff
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_NICE_PARAM_DEFAULT_VAL 259
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7
+
+enum icp_qat_hw_comp_20_hbs_control {
+	ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0,
+	ICP_QAT_HW_COMP_23_HBS_CONTROL_HBS_IS_64KB = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_HBS_CONTROL_HBS_IS_32KB
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_BITPOS 13
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_MASK 0x1
+
+enum icp_qat_hw_comp_20_abd {
+	ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED = 0x0,
+	ICP_QAT_HW_COMP_20_ABD_ABD_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_ABD_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_ABD_ABD_ENABLED
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_BITPOS 12
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_MASK 0x1
+
+enum icp_qat_hw_comp_20_lllbd_ctrl {
+	ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED = 0x0,
+	ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_DISABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_LLLBD_CTRL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_LLLBD_CTRL_LLLBD_ENABLED
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_BITPOS 8
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_MASK 0xf
+
+enum icp_qat_hw_comp_20_search_depth {
+	ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1 = 0x1,
+	ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_6 = 0x3,
+	ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_9 = 0x4,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SEARCH_DEPTH_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SEARCH_DEPTH_LEVEL_1
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_BITPOS 5
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_MASK 0x7
+
+enum icp_qat_hw_comp_20_hw_comp_format {
+	ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_ILZ77 = 0x0,
+	ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE = 0x1,
+	ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4 = 0x2,
+	ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_LZ4S = 0x3,
+	ICP_QAT_HW_COMP_23_HW_COMP_FORMAT_ZSTD = 0x4,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_HW_COMP_FORMAT_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_HW_COMP_FORMAT_DEFLATE
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1
+
+enum icp_qat_hw_comp_20_min_match_control {
+	ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0,
+	ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_MIN_MATCH_CONTROL_MATCH_3B
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_BITPOS 3
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_MASK 0x1
+
+enum icp_qat_hw_comp_20_skip_hash_collision {
+	ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW = 0x0,
+	ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_DONT_ALLOW = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_COLLISION_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SKIP_HASH_COLLISION_ALLOW
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_BITPOS 2
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_MASK 0x1
+
+enum icp_qat_hw_comp_20_skip_hash_update {
+	ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW = 0x0,
+	ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_DONT_ALLOW = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_SKIP_HASH_UPDATE_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_SKIP_HASH_UPDATE_ALLOW
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_BITPOS 1
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_MASK 0x1
+
+enum icp_qat_hw_comp_20_byte_skip {
+	ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN = 0x0,
+	ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_LITERAL = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_BYTE_SKIP_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_BYTE_SKIP_3BYTE_TOKEN
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_BITPOS 0
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_MASK 0x1
+
+enum icp_qat_hw_comp_20_extended_delay_match_mode {
+	ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED = 0x0,
+	ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_ENABLED = 0x1,
+};
+
+#define ICP_QAT_HW_COMP_20_CONFIG_CSR_EXTENDED_DELAY_MATCH_MODE_DEFAULT_VAL \
+	ICP_QAT_HW_COMP_20_EXTENDED_DELAY_MATCH_MODE_EDMM_DISABLED
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_BITPOS 31
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_MASK 0x1
+
+enum icp_qat_hw_decomp_20_speculative_decoder_control {
+	ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE = 0x0,
+	ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_DISABLE = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_SPECULATIVE_DECODER_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_SPECULATIVE_DECODER_CONTROL_ENABLE
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_BITPOS 30
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_MASK 0x1
+
+enum icp_qat_hw_decomp_20_mini_cam_control {
+	ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE = 0x0,
+	ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_DISABLE = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MINI_CAM_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_MINI_CAM_CONTROL_ENABLE
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_BITPOS 14
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_MASK 0x7
+
+enum icp_qat_hw_decomp_20_hbs_control {
+	ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB = 0x0,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HBS_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_HBS_CONTROL_HBS_IS_32KB
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_BITPOS 8
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_MASK 0x3
+
+enum icp_qat_hw_decomp_20_lbms {
+	ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB = 0x0,
+	ICP_QAT_HW_DECOMP_20_LBMS_LBMS_256KB = 0x1,
+	ICP_QAT_HW_DECOMP_20_LBMS_LBMS_1MB = 0x2,
+	ICP_QAT_HW_DECOMP_20_LBMS_LBMS_4MB = 0x3,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LBMS_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_LBMS_LBMS_64KB
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_BITPOS 5
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_MASK 0x7
+
+enum icp_qat_hw_decomp_20_hw_comp_format {
+	ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE = 0x1,
+	ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4 = 0x2,
+	ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_LZ4S = 0x3,
+	ICP_QAT_HW_DECOMP_23_HW_DECOMP_FORMAT_ZSTD = 0x4,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_HW_DECOMP_FORMAT_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_HW_DECOMP_FORMAT_DEFLATE
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_BITPOS 4
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_MASK 0x1
+
+enum icp_qat_hw_decomp_20_min_match_control {
+	ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B = 0x0,
+	ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_4B = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_MIN_MATCH_CONTROL_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_MIN_MATCH_CONTROL_MATCH_3B
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_BITPOS 3
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_MASK 0x1
+
+enum icp_qat_hw_decomp_20_lz4_block_checksum_present {
+	ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT = 0x0,
+	ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_PRESENT = 0x1,
+};
+
+#define ICP_QAT_HW_DECOMP_20_CONFIG_CSR_LZ4_BLOCK_CHECKSUM_PRESENT_DEFAULT_VAL \
+	ICP_QAT_HW_DECOMP_20_LZ4_BLOCK_CHKSUM_ABSENT
+
+#endif
-- 
GitLab


From 5fc8041e56782e4d44682f8c2e4d822817a4dae6 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:22 +0000
Subject: [PATCH 3565/4122] crypto: acomp - define max size for destination

The acomp API allows to send requests with a NULL destination buffer. In
this case, the algorithm implementation needs to allocate the
destination scatter list, perform the operation and return the buffer to
the user. For decompression, data is likely to expand and be bigger
than the allocated buffer.

Define the maximum size (128KB) that acomp implementations will allocate
for decompression operations as destination buffer when they receive a
request with a NULL destination buffer.

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/crypto/acompress.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/crypto/acompress.h b/include/crypto/acompress.h
index cb3d6b1c655de..e4bc96528902e 100644
--- a/include/crypto/acompress.h
+++ b/include/crypto/acompress.h
@@ -11,6 +11,7 @@
 #include <linux/crypto.h>
 
 #define CRYPTO_ACOMP_ALLOC_OUTPUT	0x00000001
+#define CRYPTO_ACOMP_DST_MAX		131072
 
 /**
  * struct acomp_req - asynchronous (de)compression request
-- 
GitLab


From 3112d0f1b0b32daac97d170dbc9d3cce69f7ff49 Mon Sep 17 00:00:00 2001
From: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Date: Mon, 28 Nov 2022 12:21:23 +0000
Subject: [PATCH 3566/4122] crypto: qat - add resubmit logic for decompression

The acomp API allows to send requests with a NULL destination buffer. In
this case, the algorithm implementation needs to allocate the
destination scatter list, perform the operation and return the buffer to
the user. For decompression, data is likely to expand and be bigger than
the allocated buffer.

This implements a re-submission mechanism for decompression requests
that is triggered if the destination buffer, allocated by the driver,
is not sufficiently big to store the output from decompression.

If an overflow is detected when processing the callback for a
decompression request with a NULL destination buffer, a workqueue is
scheduled. This allocates a new scatter list of size CRYPTO_ACOMP_DST_MAX,
now 128KB, creates a new firmware scatter list and resubmits the job to
the hardware accelerator.

Suggested-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Giovanni Cabiddu <giovanni.cabiddu@intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qat/qat_common/qat_bl.c        | 159 ++++++++++++++++++
 drivers/crypto/qat/qat_common/qat_bl.h        |   6 +
 drivers/crypto/qat/qat_common/qat_comp_algs.c |  70 ++++++++
 drivers/crypto/qat/qat_common/qat_comp_req.h  |  10 ++
 4 files changed, 245 insertions(+)

diff --git a/drivers/crypto/qat/qat_common/qat_bl.c b/drivers/crypto/qat/qat_common/qat_bl.c
index 221a4eb610a38..2e89ff08041bf 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.c
+++ b/drivers/crypto/qat/qat_common/qat_bl.c
@@ -222,3 +222,162 @@ int qat_bl_sgl_to_bufl(struct adf_accel_dev *accel_dev,
 				    extra_dst_buff, sz_extra_dst_buff,
 				    flags);
 }
+
+static void qat_bl_sgl_unmap(struct adf_accel_dev *accel_dev,
+			     struct qat_alg_buf_list *bl)
+{
+	struct device *dev = &GET_DEV(accel_dev);
+	int n = bl->num_bufs;
+	int i;
+
+	for (i = 0; i < n; i++)
+		if (!dma_mapping_error(dev, bl->bufers[i].addr))
+			dma_unmap_single(dev, bl->bufers[i].addr,
+					 bl->bufers[i].len, DMA_FROM_DEVICE);
+}
+
+static int qat_bl_sgl_map(struct adf_accel_dev *accel_dev,
+			  struct scatterlist *sgl,
+			  struct qat_alg_buf_list **bl)
+{
+	struct device *dev = &GET_DEV(accel_dev);
+	struct qat_alg_buf_list *bufl;
+	int node = dev_to_node(dev);
+	struct scatterlist *sg;
+	int n, i, sg_nctr;
+	size_t sz;
+
+	n = sg_nents(sgl);
+	sz = struct_size(bufl, bufers, n);
+	bufl = kzalloc_node(sz, GFP_KERNEL, node);
+	if (unlikely(!bufl))
+		return -ENOMEM;
+
+	for (i = 0; i < n; i++)
+		bufl->bufers[i].addr = DMA_MAPPING_ERROR;
+
+	sg_nctr = 0;
+	for_each_sg(sgl, sg, n, i) {
+		int y = sg_nctr;
+
+		if (!sg->length)
+			continue;
+
+		bufl->bufers[y].addr = dma_map_single(dev, sg_virt(sg),
+						      sg->length,
+						      DMA_FROM_DEVICE);
+		bufl->bufers[y].len = sg->length;
+		if (unlikely(dma_mapping_error(dev, bufl->bufers[y].addr)))
+			goto err_map;
+		sg_nctr++;
+	}
+	bufl->num_bufs = sg_nctr;
+	bufl->num_mapped_bufs = sg_nctr;
+
+	*bl = bufl;
+
+	return 0;
+
+err_map:
+	for (i = 0; i < n; i++)
+		if (!dma_mapping_error(dev, bufl->bufers[i].addr))
+			dma_unmap_single(dev, bufl->bufers[i].addr,
+					 bufl->bufers[i].len,
+					 DMA_FROM_DEVICE);
+	kfree(bufl);
+	*bl = NULL;
+
+	return -ENOMEM;
+}
+
+static void qat_bl_sgl_free_unmap(struct adf_accel_dev *accel_dev,
+				  struct scatterlist *sgl,
+				  struct qat_alg_buf_list *bl,
+				  bool free_bl)
+{
+	if (bl) {
+		qat_bl_sgl_unmap(accel_dev, bl);
+
+		if (free_bl)
+			kfree(bl);
+	}
+	if (sgl)
+		sgl_free(sgl);
+}
+
+static int qat_bl_sgl_alloc_map(struct adf_accel_dev *accel_dev,
+				struct scatterlist **sgl,
+				struct qat_alg_buf_list **bl,
+				unsigned int dlen,
+				gfp_t gfp)
+{
+	struct scatterlist *dst;
+	int ret;
+
+	dst = sgl_alloc(dlen, gfp, NULL);
+	if (!dst) {
+		dev_err(&GET_DEV(accel_dev), "sg_alloc failed\n");
+		return -ENOMEM;
+	}
+
+	ret = qat_bl_sgl_map(accel_dev, dst, bl);
+	if (ret)
+		goto err;
+
+	*sgl = dst;
+
+	return 0;
+
+err:
+	sgl_free(dst);
+	*sgl = NULL;
+	return ret;
+}
+
+int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev,
+			       struct scatterlist **sg,
+			       unsigned int dlen,
+			       struct qat_request_buffs *qat_bufs,
+			       gfp_t gfp)
+{
+	struct device *dev = &GET_DEV(accel_dev);
+	dma_addr_t new_blp = DMA_MAPPING_ERROR;
+	struct qat_alg_buf_list *new_bl;
+	struct scatterlist *new_sg;
+	size_t new_bl_size;
+	int ret;
+
+	ret = qat_bl_sgl_alloc_map(accel_dev, &new_sg, &new_bl, dlen, gfp);
+	if (ret)
+		return ret;
+
+	new_bl_size = struct_size(new_bl, bufers, new_bl->num_bufs);
+
+	/* Map new firmware SGL descriptor */
+	new_blp = dma_map_single(dev, new_bl, new_bl_size, DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(dev, new_blp)))
+		goto err;
+
+	/* Unmap old firmware SGL descriptor */
+	dma_unmap_single(dev, qat_bufs->bloutp, qat_bufs->sz_out, DMA_TO_DEVICE);
+
+	/* Free and unmap old scatterlist */
+	qat_bl_sgl_free_unmap(accel_dev, *sg, qat_bufs->blout,
+			      !qat_bufs->sgl_dst_valid);
+
+	qat_bufs->sgl_dst_valid = false;
+	qat_bufs->blout = new_bl;
+	qat_bufs->bloutp = new_blp;
+	qat_bufs->sz_out = new_bl_size;
+
+	*sg = new_sg;
+
+	return 0;
+err:
+	qat_bl_sgl_free_unmap(accel_dev, new_sg, new_bl, true);
+
+	if (!dma_mapping_error(dev, new_blp))
+		dma_unmap_single(dev, new_blp, new_bl_size, DMA_TO_DEVICE);
+
+	return -ENOMEM;
+}
diff --git a/drivers/crypto/qat/qat_common/qat_bl.h b/drivers/crypto/qat/qat_common/qat_bl.h
index 5f2ea8f352f76..8ca5e52ee9e21 100644
--- a/drivers/crypto/qat/qat_common/qat_bl.h
+++ b/drivers/crypto/qat/qat_common/qat_bl.h
@@ -58,4 +58,10 @@ static inline gfp_t qat_algs_alloc_flags(struct crypto_async_request *req)
 	return req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC;
 }
 
+int qat_bl_realloc_map_new_dst(struct adf_accel_dev *accel_dev,
+			       struct scatterlist **newd,
+			       unsigned int dlen,
+			       struct qat_request_buffs *qat_bufs,
+			       gfp_t gfp);
+
 #endif
diff --git a/drivers/crypto/qat/qat_common/qat_comp_algs.c b/drivers/crypto/qat/qat_common/qat_comp_algs.c
index 63fd4ac33dbf3..1480d36a8d2bb 100644
--- a/drivers/crypto/qat/qat_common/qat_comp_algs.c
+++ b/drivers/crypto/qat/qat_common/qat_comp_algs.c
@@ -5,6 +5,7 @@
 #include <crypto/internal/acompress.h>
 #include <crypto/scatterwalk.h>
 #include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
 #include "qat_bl.h"
@@ -25,6 +26,11 @@ struct qat_compression_ctx {
 	struct qat_compression_instance *inst;
 };
 
+struct qat_dst {
+	bool is_null;
+	int resubmitted;
+};
+
 struct qat_compression_req {
 	u8 req[QAT_COMP_REQ_SIZE];
 	struct qat_compression_ctx *qat_compression_ctx;
@@ -33,6 +39,8 @@ struct qat_compression_req {
 	enum direction dir;
 	int actual_dlen;
 	struct qat_alg_req alg_req;
+	struct work_struct resubmit;
+	struct qat_dst dst;
 };
 
 static int qat_alg_send_dc_message(struct qat_compression_req *qat_req,
@@ -49,6 +57,46 @@ static int qat_alg_send_dc_message(struct qat_compression_req *qat_req,
 	return qat_alg_send_message(alg_req);
 }
 
+static void qat_comp_resubmit(struct work_struct *work)
+{
+	struct qat_compression_req *qat_req =
+		container_of(work, struct qat_compression_req, resubmit);
+	struct qat_compression_ctx *ctx = qat_req->qat_compression_ctx;
+	struct adf_accel_dev *accel_dev = ctx->inst->accel_dev;
+	struct qat_request_buffs *qat_bufs = &qat_req->buf;
+	struct qat_compression_instance *inst = ctx->inst;
+	struct acomp_req *areq = qat_req->acompress_req;
+	struct crypto_acomp *tfm = crypto_acomp_reqtfm(areq);
+	unsigned int dlen = CRYPTO_ACOMP_DST_MAX;
+	u8 *req = qat_req->req;
+	dma_addr_t dfbuf;
+	int ret;
+
+	areq->dlen = dlen;
+
+	dev_dbg(&GET_DEV(accel_dev), "[%s][%s] retry NULL dst request - dlen = %d\n",
+		crypto_tfm_alg_driver_name(crypto_acomp_tfm(tfm)),
+		qat_req->dir == COMPRESSION ? "comp" : "decomp", dlen);
+
+	ret = qat_bl_realloc_map_new_dst(accel_dev, &areq->dst, dlen, qat_bufs,
+					 qat_algs_alloc_flags(&areq->base));
+	if (ret)
+		goto err;
+
+	qat_req->dst.resubmitted = true;
+
+	dfbuf = qat_req->buf.bloutp;
+	qat_comp_override_dst(req, dfbuf, dlen);
+
+	ret = qat_alg_send_dc_message(qat_req, inst, &areq->base);
+	if (ret != -ENOSPC)
+		return;
+
+err:
+	qat_bl_free_bufl(accel_dev, qat_bufs);
+	areq->base.complete(&areq->base, ret);
+}
+
 static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
 				      void *resp)
 {
@@ -80,6 +128,21 @@ static void qat_comp_generic_callback(struct qat_compression_req *qat_req,
 
 	areq->dlen = 0;
 
+	if (qat_req->dir == DECOMPRESSION && qat_req->dst.is_null) {
+		if (cmp_err == ERR_CODE_OVERFLOW_ERROR) {
+			if (qat_req->dst.resubmitted) {
+				dev_dbg(&GET_DEV(accel_dev),
+					"Output does not fit destination buffer\n");
+				res = -EOVERFLOW;
+				goto end;
+			}
+
+			INIT_WORK(&qat_req->resubmit, qat_comp_resubmit);
+			adf_misc_wq_queue_work(&qat_req->resubmit);
+			return;
+		}
+	}
+
 	if (unlikely(status != ICP_QAT_FW_COMN_STATUS_FLAG_OK))
 		goto end;
 
@@ -176,16 +239,23 @@ static int qat_comp_alg_compress_decompress(struct acomp_req *areq,
 	if (areq->dst && !dlen)
 		return -EINVAL;
 
+	qat_req->dst.is_null = false;
+
 	/* Handle acomp requests that require the allocation of a destination
 	 * buffer. The size of the destination buffer is double the source
 	 * buffer (rounded up to the size of a page) to fit the decompressed
 	 * output or an expansion on the data for compression.
 	 */
 	if (!areq->dst) {
+		qat_req->dst.is_null = true;
+
 		dlen = round_up(2 * slen, PAGE_SIZE);
 		areq->dst = sgl_alloc(dlen, f, NULL);
 		if (!areq->dst)
 			return -ENOMEM;
+
+		areq->dlen = dlen;
+		qat_req->dst.resubmitted = false;
 	}
 
 	if (dir == COMPRESSION) {
diff --git a/drivers/crypto/qat/qat_common/qat_comp_req.h b/drivers/crypto/qat/qat_common/qat_comp_req.h
index 18a1f33a6db98..404e32c5e7783 100644
--- a/drivers/crypto/qat/qat_common/qat_comp_req.h
+++ b/drivers/crypto/qat/qat_common/qat_comp_req.h
@@ -25,6 +25,16 @@ static inline void qat_comp_create_req(void *ctx, void *req, u64 src, u32 slen,
 	req_pars->out_buffer_sz = dlen;
 }
 
+static inline void qat_comp_override_dst(void *req, u64 dst, u32 dlen)
+{
+	struct icp_qat_fw_comp_req *fw_req = req;
+	struct icp_qat_fw_comp_req_params *req_pars = &fw_req->comp_pars;
+
+	fw_req->comn_mid.dest_data_addr = dst;
+	fw_req->comn_mid.dst_length = dlen;
+	req_pars->out_buffer_sz = dlen;
+}
+
 static inline void qat_comp_create_compression_req(void *ctx, void *req,
 						   u64 src, u32 slen,
 						   u64 dst, u32 dlen,
-- 
GitLab


From 3564f5a2144355cadb4f0c5c14d2bc7fcd2418b9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 29 Nov 2022 17:52:35 +0800
Subject: [PATCH 3567/4122] crypto: chelsio - Fix flexible struct array warning

This patch fixes the sparse warning about arrays of flexible
structures by removing an unnecessary use of them in struct
__crypto_ctx.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c   | 6 +++---
 drivers/crypto/chelsio/chcr_crypto.h | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 6933546f87b1a..9fac1e7584066 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -98,17 +98,17 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
 
 static inline  struct chcr_aead_ctx *AEAD_CTX(struct chcr_context *ctx)
 {
-	return ctx->crypto_ctx->aeadctx;
+	return &ctx->crypto_ctx->aeadctx;
 }
 
 static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx)
 {
-	return ctx->crypto_ctx->ablkctx;
+	return &ctx->crypto_ctx->ablkctx;
 }
 
 static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx)
 {
-	return ctx->crypto_ctx->hmacctx;
+	return &ctx->crypto_ctx->hmacctx;
 }
 
 static inline struct chcr_gcm_ctx *GCM_CTX(struct chcr_aead_ctx *gctx)
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index c7816c83e3246..7f88ddb086313 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -248,9 +248,9 @@ struct hmac_ctx {
 
 struct __crypto_ctx {
 	union {
-		DECLARE_FLEX_ARRAY(struct hmac_ctx, hmacctx);
-		DECLARE_FLEX_ARRAY(struct ablk_ctx, ablkctx);
-		DECLARE_FLEX_ARRAY(struct chcr_aead_ctx, aeadctx);
+		struct hmac_ctx hmacctx;
+		struct ablk_ctx ablkctx;
+		struct chcr_aead_ctx aeadctx;
 	};
 };
 
-- 
GitLab


From 67ab02dce3adad3ea399e824b37f8e1c2453449f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 29 Nov 2022 17:48:49 +0100
Subject: [PATCH 3568/4122] crypto: arm64/aes-neonbs - use frame_push/pop
 consistently

Use the frame_push and frame_pop macros consistently to create the stack
frame, so that we will get PAC and/or shadow call stack handling as well
when enabled.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-neonbs-core.S | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index d427f4556b6eb..7278a37c2d5cd 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -760,7 +760,7 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
 	eor		v6.16b, v6.16b, v31.16b
 	eor		v7.16b, v7.16b, v16.16b
 
-	stp		q16, q17, [sp, #16]
+	stp		q16, q17, [x6]
 
 	mov		bskey, x2
 	mov		rounds, x3
@@ -768,8 +768,8 @@ SYM_FUNC_START_LOCAL(__xts_crypt8)
 SYM_FUNC_END(__xts_crypt8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
-	stp		x29, x30, [sp, #-48]!
-	mov		x29, sp
+	frame_push	0, 32
+	add		x6, sp, #.Lframe_local_offset
 
 	ld1		{v25.16b}, [x5]
 
@@ -781,7 +781,7 @@ SYM_FUNC_END(__xts_crypt8)
 	eor		v18.16b, \o2\().16b, v27.16b
 	eor		v19.16b, \o3\().16b, v28.16b
 
-	ldp		q24, q25, [sp, #16]
+	ldp		q24, q25, [x6]
 
 	eor		v20.16b, \o4\().16b, v29.16b
 	eor		v21.16b, \o5\().16b, v30.16b
@@ -795,7 +795,7 @@ SYM_FUNC_END(__xts_crypt8)
 	b.gt		0b
 
 	st1		{v25.16b}, [x5]
-	ldp		x29, x30, [sp], #48
+	frame_pop
 	ret
 	.endm
 
@@ -820,9 +820,7 @@ SYM_FUNC_END(aesbs_xts_decrypt)
 	 *		     int rounds, int blocks, u8 iv[])
 	 */
 SYM_FUNC_START(aesbs_ctr_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
+	frame_push	0
 	ldp		x7, x8, [x5]
 	ld1		{v0.16b}, [x5]
 CPU_LE(	rev		x7, x7		)
@@ -862,6 +860,6 @@ CPU_LE(	rev		x8, x8		)
 	b.gt		0b
 
 	st1		{v0.16b}, [x5]
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 SYM_FUNC_END(aesbs_ctr_encrypt)
-- 
GitLab


From 7d709af18054bc9e2043499bb35eb1809c2a316f Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 29 Nov 2022 17:48:50 +0100
Subject: [PATCH 3569/4122] crypto: arm64/aes-modes - use frame_push/pop macros
 consistently

Use the frame_push and frame_pop macros to create the stack frames in
the AES chaining mode wrappers so that they will get PAC and/or shadow
call stack protection when configured.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/aes-modes.S | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 5abc834271f4a..0e834a2c062cf 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -52,8 +52,7 @@ SYM_FUNC_END(aes_decrypt_block5x)
 	 */
 
 AES_FUNC_START(aes_ecb_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	0
 
 	enc_prepare	w3, x2, x5
 
@@ -77,14 +76,13 @@ ST5(	st1		{v4.16b}, [x0], #16		)
 	subs		w4, w4, #1
 	bne		.Lecbencloop
 .Lecbencout:
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 AES_FUNC_END(aes_ecb_encrypt)
 
 
 AES_FUNC_START(aes_ecb_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	0
 
 	dec_prepare	w3, x2, x5
 
@@ -108,7 +106,7 @@ ST5(	st1		{v4.16b}, [x0], #16		)
 	subs		w4, w4, #1
 	bne		.Lecbdecloop
 .Lecbdecout:
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 AES_FUNC_END(aes_ecb_decrypt)
 
@@ -171,9 +169,6 @@ AES_FUNC_END(aes_cbc_encrypt)
 AES_FUNC_END(aes_essiv_cbc_encrypt)
 
 AES_FUNC_START(aes_essiv_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
 	ld1		{cbciv.16b}, [x5]		/* get iv */
 
 	mov		w8, #14				/* AES-256: 14 rounds */
@@ -182,11 +177,9 @@ AES_FUNC_START(aes_essiv_cbc_decrypt)
 	b		.Lessivcbcdecstart
 
 AES_FUNC_START(aes_cbc_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
-
 	ld1		{cbciv.16b}, [x5]		/* get iv */
 .Lessivcbcdecstart:
+	frame_push	0
 	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
@@ -236,7 +229,7 @@ ST5(	st1		{v4.16b}, [x0], #16		)
 	bne		.Lcbcdecloop
 .Lcbcdecout:
 	st1		{cbciv.16b}, [x5]		/* return iv */
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 AES_FUNC_END(aes_cbc_decrypt)
 AES_FUNC_END(aes_essiv_cbc_decrypt)
@@ -337,8 +330,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt)
 	BLOCKS		.req x13
 	BLOCKS_W	.req w13
 
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	0
 
 	enc_prepare	ROUNDS_W, KEY, IV_PART
 	ld1		{vctr.16b}, [IV]
@@ -481,7 +473,7 @@ ST5(	st1		{v4.16b}, [OUT], #16		)
 	.if !\xctr
 		st1		{vctr.16b}, [IV] /* return next CTR value */
 	.endif
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 
 .Lctrtail\xctr:
@@ -645,8 +637,7 @@ AES_FUNC_END(aes_xctr_encrypt)
 	.endm
 
 AES_FUNC_START(aes_xts_encrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	0
 
 	ld1		{v4.16b}, [x6]
 	xts_load_mask	v8
@@ -704,7 +695,7 @@ AES_FUNC_START(aes_xts_encrypt)
 	st1		{v0.16b}, [x0]
 .Lxtsencret:
 	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 
 .LxtsencctsNx:
@@ -732,8 +723,7 @@ AES_FUNC_START(aes_xts_encrypt)
 AES_FUNC_END(aes_xts_encrypt)
 
 AES_FUNC_START(aes_xts_decrypt)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	0
 
 	/* subtract 16 bytes if we are doing CTS */
 	sub		w8, w4, #0x10
@@ -794,7 +784,7 @@ AES_FUNC_START(aes_xts_decrypt)
 	b		.Lxtsdecloop
 .Lxtsdecout:
 	st1		{v4.16b}, [x6]
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	ret
 
 .Lxtsdeccts:
-- 
GitLab


From 489a4a05fe6d544f1f1052d2c6cd5bffbd89ddb6 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 29 Nov 2022 17:48:51 +0100
Subject: [PATCH 3570/4122] crypto: arm64/crct10dif - use frame_push/pop macros
 consistently

Use the frame_push and frame_pop macros to set up the stack frame so
that return address protections will be enabled automically when
configured.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/crct10dif-ce-core.S | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S
index dce6dcebfca18..5604de61d06d0 100644
--- a/arch/arm64/crypto/crct10dif-ce-core.S
+++ b/arch/arm64/crypto/crct10dif-ce-core.S
@@ -429,7 +429,7 @@ CPU_LE(	ext		v0.16b, v0.16b, v0.16b, #8	)
 
 	umov		w0, v0.h[0]
 	.ifc		\p, p8
-	ldp		x29, x30, [sp], #16
+	frame_pop
 	.endif
 	ret
 
@@ -466,8 +466,7 @@ CPU_LE(	ext		v7.16b, v7.16b, v7.16b, #8	)
 // Assumes len >= 16.
 //
 SYM_FUNC_START(crc_t10dif_pmull_p8)
-	stp		x29, x30, [sp, #-16]!
-	mov		x29, sp
+	frame_push	1
 	crc_t10dif_pmull p8
 SYM_FUNC_END(crc_t10dif_pmull_p8)
 
-- 
GitLab


From a428636d4c827ebe967aa31a83684b4c8e742ed1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 29 Nov 2022 17:48:52 +0100
Subject: [PATCH 3571/4122] crypto: arm64/ghash-ce - use frame_push/pop macros
 consistently

Use the frame_push and frame_pop macros to set up the stack frame so
that return address protections will be enabled automically when
configured.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm64/crypto/ghash-ce-core.S | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S
index ebe5558929b7b..23ee9a5eaf27c 100644
--- a/arch/arm64/crypto/ghash-ce-core.S
+++ b/arch/arm64/crypto/ghash-ce-core.S
@@ -436,9 +436,7 @@ SYM_FUNC_END(pmull_ghash_update_p8)
 
 	.align		6
 	.macro		pmull_gcm_do_crypt, enc
-	stp		x29, x30, [sp, #-32]!
-	mov		x29, sp
-	str		x19, [sp, #24]
+	frame_push	1
 
 	load_round_keys	x7, x6, x8
 
@@ -529,7 +527,7 @@ CPU_LE(	rev		w8, w8		)
 	.endif
 	bne		0b
 
-3:	ldp		x19, x10, [sp, #24]
+3:	ldr		x10, [sp, #.Lframe_local_offset]
 	cbz		x10, 5f				// output tag?
 
 	ld1		{INP3.16b}, [x10]		// load lengths[]
@@ -562,7 +560,7 @@ CPU_LE(	rev		w8, w8		)
 	smov		w0, v0.b[0]			// return b0
 	.endif
 
-4:	ldp		x29, x30, [sp], #32
+4:	frame_pop
 	ret
 
 5:
-- 
GitLab


From 04ba54e5af8f8f0137b08cb51a0b3a2e1ea46c94 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Thu, 1 Dec 2022 14:25:26 +0800
Subject: [PATCH 3572/4122] crypto: img-hash - Fix variable dereferenced before
 check 'hdev->req'

Smatch report warning as follows:

drivers/crypto/img-hash.c:366 img_hash_dma_task() warn: variable
dereferenced before check 'hdev->req'

Variable dereferenced should be done after check 'hdev->req',
fix it.

Fixes: d358f1abbf71 ("crypto: img-hash - Add Imagination Technologies hw hash accelerator")
Fixes: 10badea259fa ("crypto: img-hash - Fix null pointer exception")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/img-hash.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index d8e82d69745d8..9629e98bd68b7 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -358,12 +358,16 @@ static int img_hash_dma_init(struct img_hash_dev *hdev)
 static void img_hash_dma_task(unsigned long d)
 {
 	struct img_hash_dev *hdev = (struct img_hash_dev *)d;
-	struct img_hash_request_ctx *ctx = ahash_request_ctx(hdev->req);
+	struct img_hash_request_ctx *ctx;
 	u8 *addr;
 	size_t nbytes, bleft, wsend, len, tbc;
 	struct scatterlist tsg;
 
-	if (!hdev->req || !ctx->sg)
+	if (!hdev->req)
+		return;
+
+	ctx = ahash_request_ctx(hdev->req);
+	if (!ctx->sg)
 		return;
 
 	addr = sg_virt(ctx->sg);
-- 
GitLab


From 1c64a7e1f931821acadf964c5ddb0dc41abf9e20 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:47 +0800
Subject: [PATCH 3573/4122] crypto: cavium - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/cavium/cpt/cptvf_algs.c     | 10 +++++-----
 drivers/crypto/cavium/nitrox/nitrox_aead.c | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index ce3b91c612f0f..9eca0c3021866 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -97,7 +97,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cvm_enc_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct fc_context *fctx = &rctx->fctx;
 	u32 enc_iv_len = crypto_skcipher_ivsize(tfm);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
@@ -151,7 +151,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 static inline u32 create_input_list(struct skcipher_request  *req, u32 enc,
 				    u32 enc_iv_len)
 {
-	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 
@@ -173,7 +173,7 @@ static inline void store_cb_info(struct skcipher_request *req,
 static inline void create_output_list(struct skcipher_request *req,
 				      u32 enc_iv_len)
 {
-	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct cpt_request_info *req_info = &rctx->cpt_req;
 	u32 argcnt = 0;
 
@@ -193,7 +193,7 @@ static inline void create_output_list(struct skcipher_request *req,
 static inline int cvm_enc_dec(struct skcipher_request *req, u32 enc)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct cvm_req_ctx *rctx = skcipher_request_ctx(req);
+	struct cvm_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	u32 enc_iv_len = crypto_skcipher_ivsize(tfm);
 	struct fc_context *fctx = &rctx->fctx;
 	struct cpt_request_info *req_info = &rctx->cpt_req;
@@ -335,7 +335,7 @@ static int cvm_ecb_des3_setkey(struct crypto_skcipher *cipher, const u8 *key,
 
 static int cvm_enc_dec_init(struct crypto_skcipher *tfm)
 {
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct cvm_req_ctx));
+	crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct cvm_req_ctx));
 
 	return 0;
 }
diff --git a/drivers/crypto/cavium/nitrox/nitrox_aead.c b/drivers/crypto/cavium/nitrox/nitrox_aead.c
index c93c4e41d2678..0653484df23f7 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_aead.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_aead.c
@@ -392,7 +392,7 @@ static int nitrox_rfc4106_setauthsize(struct crypto_aead *aead,
 
 static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq)
 {
-	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq);
+	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq);
 	struct nitrox_aead_rctx *aead_rctx = &rctx->base;
 	unsigned int assoclen = areq->assoclen - GCM_RFC4106_IV_SIZE;
 	struct scatterlist *sg;
@@ -424,7 +424,7 @@ static int nitrox_rfc4106_set_aead_rctx_sglist(struct aead_request *areq)
 static void nitrox_rfc4106_callback(void *arg, int err)
 {
 	struct aead_request *areq = arg;
-	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq);
+	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq);
 	struct nitrox_kcrypt_request *nkreq = &rctx->base.nkreq;
 
 	free_src_sglist(nkreq);
@@ -441,7 +441,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
 	struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead);
-	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq);
+	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx_dma(areq);
 	struct nitrox_aead_rctx *aead_rctx = &rctx->base;
 	struct se_crypto_request *creq = &aead_rctx->nkreq.creq;
 	int ret;
@@ -472,7 +472,7 @@ static int nitrox_rfc4106_enc(struct aead_request *areq)
 static int nitrox_rfc4106_dec(struct aead_request *areq)
 {
 	struct crypto_aead *aead = crypto_aead_reqtfm(areq);
-	struct nitrox_crypto_ctx *nctx = crypto_aead_ctx(aead);
+	struct nitrox_crypto_ctx *nctx = crypto_aead_ctx_dma(aead);
 	struct nitrox_rfc4106_rctx *rctx = aead_request_ctx(areq);
 	struct nitrox_aead_rctx *aead_rctx = &rctx->base;
 	struct se_crypto_request *creq = &aead_rctx->nkreq.creq;
@@ -510,8 +510,8 @@ static int nitrox_rfc4106_init(struct crypto_aead *aead)
 	if (ret)
 		return ret;
 
-	crypto_aead_set_reqsize(aead, sizeof(struct aead_request) +
-				sizeof(struct nitrox_rfc4106_rctx));
+	crypto_aead_set_reqsize_dma(aead, sizeof(struct aead_request) +
+					  sizeof(struct nitrox_rfc4106_rctx));
 
 	return 0;
 }
-- 
GitLab


From 99c6b20edfc031610240afca97ba9be5ec6f5750 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:49 +0800
Subject: [PATCH 3574/4122] crypto: ccp - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccp/ccp-crypto-aes-cmac.c   | 21 ++++++++--------
 drivers/crypto/ccp/ccp-crypto-aes-galois.c | 12 ++++-----
 drivers/crypto/ccp/ccp-crypto-aes-xts.c    | 20 ++++++++-------
 drivers/crypto/ccp/ccp-crypto-aes.c        | 29 +++++++++++-----------
 drivers/crypto/ccp/ccp-crypto-des3.c       | 17 +++++++------
 drivers/crypto/ccp/ccp-crypto-main.c       |  4 +--
 drivers/crypto/ccp/ccp-crypto-rsa.c        | 18 +++++++-------
 drivers/crypto/ccp/ccp-crypto-sha.c        | 26 +++++++++----------
 8 files changed, 76 insertions(+), 71 deletions(-)

diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index 11a305fa19e67..d8426bdf31908 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -25,7 +25,7 @@ static int ccp_aes_cmac_complete(struct crypto_async_request *async_req,
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req);
 	unsigned int digest_size = crypto_ahash_digestsize(tfm);
 
 	if (ret)
@@ -56,8 +56,8 @@ static int ccp_do_cmac_update(struct ahash_request *req, unsigned int nbytes,
 			      unsigned int final)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct scatterlist *sg, *cmac_key_sg = NULL;
 	unsigned int block_size =
 		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
@@ -182,7 +182,7 @@ e_free:
 
 static int ccp_aes_cmac_init(struct ahash_request *req)
 {
-	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req);
 
 	memset(rctx, 0, sizeof(*rctx));
 
@@ -219,7 +219,7 @@ static int ccp_aes_cmac_digest(struct ahash_request *req)
 
 static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 {
-	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct ccp_aes_cmac_exp_ctx state;
 
 	/* Don't let anything leak to 'out' */
@@ -238,7 +238,7 @@ static int ccp_aes_cmac_export(struct ahash_request *req, void *out)
 
 static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
 {
-	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_aes_cmac_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct ccp_aes_cmac_exp_ctx state;
 
 	/* 'in' may not be aligned so memcpy to local variable */
@@ -256,7 +256,7 @@ static int ccp_aes_cmac_import(struct ahash_request *req, const void *in)
 static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 			       unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct ccp_crypto_ahash_alg *alg =
 		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
 	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
@@ -334,13 +334,14 @@ static int ccp_aes_cmac_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 static int ccp_aes_cmac_cra_init(struct crypto_tfm *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
 
 	ctx->complete = ccp_aes_cmac_complete;
 	ctx->u.aes.key_len = 0;
 
-	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
+	crypto_ahash_set_reqsize_dma(ahash,
+				     sizeof(struct ccp_aes_cmac_req_ctx));
 
 	return 0;
 }
@@ -382,7 +383,7 @@ int ccp_register_aes_cmac_algs(struct list_head *head)
 			  CRYPTO_ALG_KERN_DRIVER_ONLY |
 			  CRYPTO_ALG_NEED_FALLBACK;
 	base->cra_blocksize = AES_BLOCK_SIZE;
-	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding();
 	base->cra_priority = CCP_CRA_PRIORITY;
 	base->cra_init = ccp_aes_cmac_cra_init;
 	base->cra_module = THIS_MODULE;
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-galois.c b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
index 1c1c939f5c39b..b1dbb8cea559e 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-galois.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-galois.c
@@ -29,7 +29,7 @@ static int ccp_aes_gcm_complete(struct crypto_async_request *async_req, int ret)
 static int ccp_aes_gcm_setkey(struct crypto_aead *tfm, const u8 *key,
 			      unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	switch (key_len) {
 	case AES_KEYSIZE_128:
@@ -76,8 +76,8 @@ static int ccp_aes_gcm_setauthsize(struct crypto_aead *tfm,
 static int ccp_aes_gcm_crypt(struct aead_request *req, bool encrypt)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
-	struct ccp_aes_req_ctx *rctx = aead_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm);
+	struct ccp_aes_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
 	int i;
@@ -148,12 +148,12 @@ static int ccp_aes_gcm_decrypt(struct aead_request *req)
 
 static int ccp_aes_gcm_cra_init(struct crypto_aead *tfm)
 {
-	struct ccp_ctx *ctx = crypto_aead_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	ctx->complete = ccp_aes_gcm_complete;
 	ctx->u.aes.key_len = 0;
 
-	crypto_aead_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
+	crypto_aead_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx));
 
 	return 0;
 }
@@ -176,7 +176,7 @@ static struct aead_alg ccp_aes_gcm_defaults = {
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK,
 		.cra_blocksize	= AES_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct ccp_ctx),
+		.cra_ctxsize	= sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority	= CCP_CRA_PRIORITY,
 		.cra_exit	= ccp_aes_gcm_cra_exit,
 		.cra_module	= THIS_MODULE,
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 6849261ca47d0..93f735d6b02b2 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -62,7 +62,7 @@ static struct ccp_unit_size_map xts_unit_sizes[] = {
 static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async_req);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 
 	if (ret)
 		return ret;
@@ -75,7 +75,7 @@ static int ccp_aes_xts_complete(struct crypto_async_request *async_req, int ret)
 static int ccp_aes_xts_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			      unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	unsigned int ccpversion = ccp_version();
 	int ret;
 
@@ -105,8 +105,8 @@ static int ccp_aes_xts_crypt(struct skcipher_request *req,
 			     unsigned int encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	unsigned int ccpversion = ccp_version();
 	unsigned int fallback = 0;
 	unsigned int unit;
@@ -196,7 +196,7 @@ static int ccp_aes_xts_decrypt(struct skcipher_request *req)
 
 static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	struct crypto_skcipher *fallback_tfm;
 
 	ctx->complete = ccp_aes_xts_complete;
@@ -210,15 +210,16 @@ static int ccp_aes_xts_init_tfm(struct crypto_skcipher *tfm)
 	}
 	ctx->u.aes.tfm_skcipher = fallback_tfm;
 
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx) +
-					 crypto_skcipher_reqsize(fallback_tfm));
+	crypto_skcipher_set_reqsize_dma(tfm,
+					sizeof(struct ccp_aes_req_ctx) +
+					crypto_skcipher_reqsize(fallback_tfm));
 
 	return 0;
 }
 
 static void ccp_aes_xts_exit_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	crypto_free_skcipher(ctx->u.aes.tfm_skcipher);
 }
@@ -246,7 +247,8 @@ static int ccp_register_aes_xts_alg(struct list_head *head,
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK;
 	alg->base.cra_blocksize	= AES_BLOCK_SIZE;
-	alg->base.cra_ctxsize	= sizeof(struct ccp_ctx);
+	alg->base.cra_ctxsize	= sizeof(struct ccp_ctx) +
+				  crypto_dma_padding();
 	alg->base.cra_priority	= CCP_CRA_PRIORITY;
 	alg->base.cra_module	= THIS_MODULE;
 
diff --git a/drivers/crypto/ccp/ccp-crypto-aes.c b/drivers/crypto/ccp/ccp-crypto-aes.c
index bed331953ff94..918e223f21b65 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes.c
@@ -22,8 +22,9 @@
 static int ccp_aes_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async_req);
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(
+		crypto_skcipher_reqtfm(req));
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 
 	if (ret)
 		return ret;
@@ -38,7 +39,7 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 			  unsigned int key_len)
 {
 	struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	switch (key_len) {
 	case AES_KEYSIZE_128:
@@ -65,8 +66,8 @@ static int ccp_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
 static int ccp_aes_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
 
@@ -118,7 +119,7 @@ static int ccp_aes_decrypt(struct skcipher_request *req)
 
 static int ccp_aes_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	ctx->complete = ccp_aes_complete;
 	ctx->u.aes.key_len = 0;
@@ -132,7 +133,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
 				    int ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async_req);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 
 	/* Restore the original pointer */
 	req->iv = rctx->rfc3686_info;
@@ -143,7 +144,7 @@ static int ccp_aes_rfc3686_complete(struct crypto_async_request *async_req,
 static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 				  unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	if (key_len < CTR_RFC3686_NONCE_SIZE)
 		return -EINVAL;
@@ -157,8 +158,8 @@ static int ccp_aes_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
 static int ccp_aes_rfc3686_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
+	struct ccp_aes_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	u8 *iv;
 
 	/* Initialize the CTR block */
@@ -190,12 +191,12 @@ static int ccp_aes_rfc3686_decrypt(struct skcipher_request *req)
 
 static int ccp_aes_rfc3686_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	ctx->complete = ccp_aes_rfc3686_complete;
 	ctx->u.aes.key_len = 0;
 
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_aes_req_ctx));
+	crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_aes_req_ctx));
 
 	return 0;
 }
@@ -213,7 +214,7 @@ static const struct skcipher_alg ccp_aes_defaults = {
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK,
 	.base.cra_blocksize	= AES_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING,
 	.base.cra_priority	= CCP_CRA_PRIORITY,
 	.base.cra_module	= THIS_MODULE,
 };
@@ -231,7 +232,7 @@ static const struct skcipher_alg ccp_aes_rfc3686_defaults = {
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK,
 	.base.cra_blocksize	= CTR_RFC3686_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING,
 	.base.cra_priority	= CCP_CRA_PRIORITY,
 	.base.cra_module	= THIS_MODULE,
 };
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index 278636ed251a5..afae30adb703f 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -21,8 +21,9 @@
 static int ccp_des3_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct skcipher_request *req = skcipher_request_cast(async_req);
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm);
-	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(
+		crypto_skcipher_reqtfm(req));
+	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req);
 
 	if (ret)
 		return ret;
@@ -37,7 +38,7 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 		unsigned int key_len)
 {
 	struct ccp_crypto_skcipher_alg *alg = ccp_crypto_skcipher_alg(tfm);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 	int err;
 
 	err = verify_skcipher_des3_key(tfm, key);
@@ -60,8 +61,8 @@ static int ccp_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
 static int ccp_des3_crypt(struct skcipher_request *req, bool encrypt)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
+	struct ccp_des3_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct scatterlist *iv_sg = NULL;
 	unsigned int iv_len = 0;
 
@@ -114,12 +115,12 @@ static int ccp_des3_decrypt(struct skcipher_request *req)
 
 static int ccp_des3_init_tfm(struct crypto_skcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_skcipher_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_skcipher_ctx_dma(tfm);
 
 	ctx->complete = ccp_des3_complete;
 	ctx->u.des3.key_len = 0;
 
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct ccp_des3_req_ctx));
+	crypto_skcipher_set_reqsize_dma(tfm, sizeof(struct ccp_des3_req_ctx));
 
 	return 0;
 }
@@ -137,7 +138,7 @@ static const struct skcipher_alg ccp_des3_defaults = {
 				  CRYPTO_ALG_KERN_DRIVER_ONLY |
 				  CRYPTO_ALG_NEED_FALLBACK,
 	.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-	.base.cra_ctxsize	= sizeof(struct ccp_ctx),
+	.base.cra_ctxsize	= sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING,
 	.base.cra_priority	= CCP_CRA_PRIORITY,
 	.base.cra_module	= THIS_MODULE,
 };
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index dd86d2650bea4..73442a382f683 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -139,7 +139,7 @@ static void ccp_crypto_complete(void *data, int err)
 	struct ccp_crypto_cmd *crypto_cmd = data;
 	struct ccp_crypto_cmd *held, *next, *backlog;
 	struct crypto_async_request *req = crypto_cmd->req;
-	struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx_dma(req->tfm);
 	int ret;
 
 	if (err == -EINPROGRESS) {
@@ -183,7 +183,7 @@ static void ccp_crypto_complete(void *data, int err)
 			break;
 
 		/* Error occurred, report it and get the next entry */
-		ctx = crypto_tfm_ctx(held->req->tfm);
+		ctx = crypto_tfm_ctx_dma(held->req->tfm);
 		if (ctx->complete)
 			ret = ctx->complete(held->req, ret);
 		held->req->complete(held->req, ret);
diff --git a/drivers/crypto/ccp/ccp-crypto-rsa.c b/drivers/crypto/ccp/ccp-crypto-rsa.c
index 1223ac70aea28..a14f85512cf4f 100644
--- a/drivers/crypto/ccp/ccp-crypto-rsa.c
+++ b/drivers/crypto/ccp/ccp-crypto-rsa.c
@@ -44,7 +44,7 @@ static inline int ccp_copy_and_save_keypart(u8 **kpbuf, unsigned int *kplen,
 static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct akcipher_request *req = akcipher_request_cast(async_req);
-	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req);
 
 	if (ret)
 		return ret;
@@ -56,7 +56,7 @@ static int ccp_rsa_complete(struct crypto_async_request *async_req, int ret)
 
 static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
 {
-	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 
 	return ctx->u.rsa.n_len;
 }
@@ -64,8 +64,8 @@ static unsigned int ccp_rsa_maxsize(struct crypto_akcipher *tfm)
 static int ccp_rsa_crypt(struct akcipher_request *req, bool encrypt)
 {
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
-	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
-	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx(req);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
+	struct ccp_rsa_req_ctx *rctx = akcipher_request_ctx_dma(req);
 	int ret = 0;
 
 	memset(&rctx->cmd, 0, sizeof(rctx->cmd));
@@ -126,7 +126,7 @@ static void ccp_rsa_free_key_bufs(struct ccp_ctx *ctx)
 static int ccp_rsa_setkey(struct crypto_akcipher *tfm, const void *key,
 			  unsigned int keylen, bool private)
 {
-	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 	struct rsa_key raw_key;
 	int ret;
 
@@ -192,9 +192,9 @@ static int ccp_rsa_setpubkey(struct crypto_akcipher *tfm, const void *key,
 
 static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
 {
-	struct ccp_ctx *ctx = akcipher_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 
-	akcipher_set_reqsize(tfm, sizeof(struct ccp_rsa_req_ctx));
+	akcipher_set_reqsize_dma(tfm, sizeof(struct ccp_rsa_req_ctx));
 	ctx->complete = ccp_rsa_complete;
 
 	return 0;
@@ -202,7 +202,7 @@ static int ccp_rsa_init_tfm(struct crypto_akcipher *tfm)
 
 static void ccp_rsa_exit_tfm(struct crypto_akcipher *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+	struct ccp_ctx *ctx = akcipher_tfm_ctx_dma(tfm);
 
 	ccp_rsa_free_key_bufs(ctx);
 }
@@ -220,7 +220,7 @@ static struct akcipher_alg ccp_rsa_defaults = {
 		.cra_driver_name = "rsa-ccp",
 		.cra_priority = CCP_CRA_PRIORITY,
 		.cra_module = THIS_MODULE,
-		.cra_ctxsize = 2 * sizeof(struct ccp_ctx),
+		.cra_ctxsize = 2 * sizeof(struct ccp_ctx) + CRYPTO_DMA_PADDING,
 	},
 };
 
diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c
index 74fa5360e7226..fa3ae8e78f6f3 100644
--- a/drivers/crypto/ccp/ccp-crypto-sha.c
+++ b/drivers/crypto/ccp/ccp-crypto-sha.c
@@ -28,7 +28,7 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret)
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req);
 	unsigned int digest_size = crypto_ahash_digestsize(tfm);
 
 	if (ret)
@@ -59,8 +59,8 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes,
 			     unsigned int final)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct scatterlist *sg;
 	unsigned int block_size =
 		crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm));
@@ -182,8 +182,8 @@ e_free:
 static int ccp_sha_init(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ccp_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct ccp_crypto_ahash_alg *alg =
 		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
 	unsigned int block_size =
@@ -231,7 +231,7 @@ static int ccp_sha_digest(struct ahash_request *req)
 
 static int ccp_sha_export(struct ahash_request *req, void *out)
 {
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct ccp_sha_exp_ctx state;
 
 	/* Don't let anything leak to 'out' */
@@ -252,7 +252,7 @@ static int ccp_sha_export(struct ahash_request *req, void *out)
 
 static int ccp_sha_import(struct ahash_request *req, const void *in)
 {
-	struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req);
+	struct ccp_sha_req_ctx *rctx = ahash_request_ctx_dma(req);
 	struct ccp_sha_exp_ctx state;
 
 	/* 'in' may not be aligned so memcpy to local variable */
@@ -272,7 +272,7 @@ static int ccp_sha_import(struct ahash_request *req, const void *in)
 static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 			  unsigned int key_len)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct crypto_shash *shash = ctx->u.sha.hmac_tfm;
 	unsigned int block_size = crypto_shash_blocksize(shash);
 	unsigned int digest_size = crypto_shash_digestsize(shash);
@@ -313,13 +313,13 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key,
 
 static int ccp_sha_cra_init(struct crypto_tfm *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
+	struct ccp_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 
 	ctx->complete = ccp_sha_complete;
 	ctx->u.sha.key_len = 0;
 
-	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_sha_req_ctx));
+	crypto_ahash_set_reqsize_dma(ahash, sizeof(struct ccp_sha_req_ctx));
 
 	return 0;
 }
@@ -330,7 +330,7 @@ static void ccp_sha_cra_exit(struct crypto_tfm *tfm)
 
 static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 	struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm);
 	struct crypto_shash *hmac_tfm;
 
@@ -348,7 +348,7 @@ static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm)
 
 static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm)
 {
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct ccp_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 
 	if (ctx->u.sha.hmac_tfm)
 		crypto_free_shash(ctx->u.sha.hmac_tfm);
@@ -492,7 +492,7 @@ static int ccp_register_sha_alg(struct list_head *head,
 			  CRYPTO_ALG_KERN_DRIVER_ONLY |
 			  CRYPTO_ALG_NEED_FALLBACK;
 	base->cra_blocksize = def->block_size;
-	base->cra_ctxsize = sizeof(struct ccp_ctx);
+	base->cra_ctxsize = sizeof(struct ccp_ctx) + crypto_dma_padding();
 	base->cra_priority = CCP_CRA_PRIORITY;
 	base->cra_init = ccp_sha_cra_init;
 	base->cra_exit = ccp_sha_cra_exit;
-- 
GitLab


From 07547fa73e4645363165e662f50427a7d302dcf1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:51 +0800
Subject: [PATCH 3575/4122] crypto: ccree - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ccree/cc_aead.c       | 62 ++++++++++----------
 drivers/crypto/ccree/cc_buffer_mgr.c | 18 +++---
 drivers/crypto/ccree/cc_hash.c       | 86 ++++++++++++++--------------
 3 files changed, 83 insertions(+), 83 deletions(-)

diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index 35794c7271fb6..109ffb375fc69 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -138,7 +138,7 @@ static int cc_aead_init(struct crypto_aead *tfm)
 	ctx->flow_mode = cc_alg->flow_mode;
 	ctx->auth_mode = cc_alg->auth_mode;
 	ctx->drvdata = cc_alg->drvdata;
-	crypto_aead_set_reqsize(tfm, sizeof(struct aead_req_ctx));
+	crypto_aead_set_reqsize_dma(tfm, sizeof(struct aead_req_ctx));
 
 	/* Allocate key buffer, cache line aligned */
 	ctx->enckey = dma_alloc_coherent(dev, AES_MAX_KEY_SIZE,
@@ -208,7 +208,7 @@ init_failed:
 static void cc_aead_complete(struct device *dev, void *cc_req, int err)
 {
 	struct aead_request *areq = (struct aead_request *)cc_req;
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(cc_req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
 
@@ -723,7 +723,7 @@ static void cc_set_assoc_desc(struct aead_request *areq, unsigned int flow_mode,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq);
 	enum cc_req_dma_buf_type assoc_dma_type = areq_ctx->assoc_buff_type;
 	unsigned int idx = *seq_size;
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
@@ -762,7 +762,7 @@ static void cc_proc_authen_desc(struct aead_request *areq,
 				struct cc_hw_desc desc[],
 				unsigned int *seq_size, int direct)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq);
 	enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type;
 	unsigned int idx = *seq_size;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
@@ -827,7 +827,7 @@ static void cc_proc_cipher_desc(struct aead_request *areq,
 				unsigned int *seq_size)
 {
 	unsigned int idx = *seq_size;
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(areq);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(areq);
 	enum cc_req_dma_buf_type data_dma_type = areq_ctx->data_buff_type;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(areq);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
@@ -873,7 +873,7 @@ static void cc_proc_digest_desc(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int idx = *seq_size;
 	unsigned int hash_mode = (ctx->auth_mode == DRV_HASH_SHA1) ?
 				DRV_HASH_HW_SHA1 : DRV_HASH_HW_SHA256;
@@ -923,7 +923,7 @@ static void cc_set_cipher_desc(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int hw_iv_size = req_ctx->hw_iv_size;
 	unsigned int idx = *seq_size;
 	int direct = req_ctx->gen_ctx.op_type;
@@ -965,7 +965,7 @@ static void cc_set_cipher_desc(struct aead_request *req,
 static void cc_proc_cipher(struct aead_request *req, struct cc_hw_desc desc[],
 			   unsigned int *seq_size, unsigned int data_flow_mode)
 {
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	int direct = req_ctx->gen_ctx.op_type;
 	unsigned int idx = *seq_size;
 
@@ -1082,7 +1082,7 @@ static void cc_proc_header_desc(struct aead_request *req,
 				struct cc_hw_desc desc[],
 				unsigned int *seq_size)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	unsigned int idx = *seq_size;
 
 	/* Hash associated data */
@@ -1158,7 +1158,7 @@ static void cc_proc_scheme_desc(struct aead_request *req,
 static void cc_mlli_to_sram(struct aead_request *req,
 			    struct cc_hw_desc desc[], unsigned int *seq_size)
 {
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
@@ -1212,7 +1212,7 @@ static void cc_hmac_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	int direct = req_ctx->gen_ctx.op_type;
 	unsigned int data_flow_mode =
 		cc_get_data_flow(direct, ctx->flow_mode,
@@ -1265,7 +1265,7 @@ cc_xcbc_authenc(struct aead_request *req, struct cc_hw_desc desc[],
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	int direct = req_ctx->gen_ctx.op_type;
 	unsigned int data_flow_mode =
 		cc_get_data_flow(direct, ctx->flow_mode,
@@ -1312,7 +1312,7 @@ static int validate_data_size(struct cc_aead_ctx *ctx,
 			      enum drv_crypto_direction direct,
 			      struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	unsigned int assoclen = areq_ctx->assoclen;
 	unsigned int cipherlen = (direct == DRV_CRYPTO_DIRECTION_DECRYPT) ?
@@ -1411,7 +1411,7 @@ static int cc_ccm(struct aead_request *req, struct cc_hw_desc desc[],
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int idx = *seq_size;
 	unsigned int cipher_flow_mode;
 	dma_addr_t mac_result;
@@ -1533,7 +1533,7 @@ static int config_ccm_adata(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	//unsigned int size_of_a = 0, rem_a_size = 0;
 	unsigned int lp = req->iv[0];
 	/* Note: The code assume that req->iv[0] already contains the value
@@ -1591,7 +1591,7 @@ static void cc_proc_rfc4309_ccm(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 
 	/* L' */
 	memset(areq_ctx->ctr_iv, 0, AES_BLOCK_SIZE);
@@ -1615,7 +1615,7 @@ static void cc_set_ghash_desc(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int idx = *seq_size;
 
 	/* load key to AES*/
@@ -1693,7 +1693,7 @@ static void cc_set_gctr_desc(struct aead_request *req, struct cc_hw_desc desc[],
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int idx = *seq_size;
 
 	/* load key to AES*/
@@ -1730,7 +1730,7 @@ static void cc_proc_gcm_result(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	dma_addr_t mac_result;
 	unsigned int idx = *seq_size;
 
@@ -1792,7 +1792,7 @@ static void cc_proc_gcm_result(struct aead_request *req,
 static int cc_gcm(struct aead_request *req, struct cc_hw_desc desc[],
 		  unsigned int *seq_size)
 {
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	unsigned int cipher_flow_mode;
 
 	//in RFC4543 no data to encrypt. just copy data from src to dest.
@@ -1830,7 +1830,7 @@ static int config_gcm_context(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *req_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *req_ctx = aead_request_ctx_dma(req);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 
 	unsigned int cryptlen = (req_ctx->gen_ctx.op_type ==
@@ -1879,7 +1879,7 @@ static void cc_proc_rfc4_gcm(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 
 	memcpy(areq_ctx->ctr_iv + GCM_BLOCK_RFC4_NONCE_OFFSET,
 	       ctx->ctr_nonce, GCM_BLOCK_RFC4_NONCE_SIZE);
@@ -1896,7 +1896,7 @@ static int cc_proc_aead(struct aead_request *req,
 	struct cc_hw_desc desc[MAX_AEAD_PROCESS_SEQ];
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct cc_crypto_req cc_req = {};
 
@@ -2019,7 +2019,7 @@ exit:
 
 static int cc_aead_encrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
@@ -2039,7 +2039,7 @@ static int cc_rfc4309_ccm_encrypt(struct aead_request *req)
 {
 	/* Very similar to cc_aead_encrypt() above. */
 
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
@@ -2063,7 +2063,7 @@ out:
 
 static int cc_aead_decrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
@@ -2081,7 +2081,7 @@ static int cc_aead_decrypt(struct aead_request *req)
 
 static int cc_rfc4309_ccm_decrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
@@ -2193,7 +2193,7 @@ static int cc_rfc4543_gcm_setauthsize(struct crypto_aead *authenc,
 
 static int cc_rfc4106_gcm_encrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
@@ -2217,7 +2217,7 @@ out:
 
 static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
@@ -2244,7 +2244,7 @@ out:
 
 static int cc_rfc4106_gcm_decrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
@@ -2268,7 +2268,7 @@ out:
 
 static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc;
 
 	rc = crypto_ipsec_check_assoclen(req->assoclen);
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index 9efd88f871d12..bcca55bff910e 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -52,7 +52,7 @@ static inline char *cc_dma_buf_type(enum cc_req_dma_buf_type type)
 static void cc_copy_mac(struct device *dev, struct aead_request *req,
 			enum cc_sg_cpy_direct dir)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	u32 skip = req->assoclen + req->cryptlen;
 
 	cc_copy_sg_portion(dev, areq_ctx->backup_mac, req->src,
@@ -456,7 +456,7 @@ cipher_exit:
 
 void cc_unmap_aead_request(struct device *dev, struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	unsigned int hw_iv_size = areq_ctx->hw_iv_size;
 	struct cc_drvdata *drvdata = dev_get_drvdata(dev);
 	int src_direction = (req->src != req->dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL);
@@ -546,7 +546,7 @@ static int cc_aead_chain_iv(struct cc_drvdata *drvdata,
 			    struct buffer_array *sg_data,
 			    bool is_last, bool do_chain)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	unsigned int hw_iv_size = areq_ctx->hw_iv_size;
 	struct device *dev = drvdata_to_dev(drvdata);
 	gfp_t flags = cc_gfp_flags(&req->base);
@@ -586,7 +586,7 @@ static int cc_aead_chain_assoc(struct cc_drvdata *drvdata,
 			       struct buffer_array *sg_data,
 			       bool is_last, bool do_chain)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	int rc = 0;
 	int mapped_nents = 0;
 	struct device *dev = drvdata_to_dev(drvdata);
@@ -652,7 +652,7 @@ chain_assoc_exit:
 static void cc_prepare_aead_data_dlli(struct aead_request *req,
 				      u32 *src_last_bytes, u32 *dst_last_bytes)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
 	unsigned int authsize = areq_ctx->req_authsize;
 	struct scatterlist *sg;
@@ -678,7 +678,7 @@ static void cc_prepare_aead_data_mlli(struct cc_drvdata *drvdata,
 				      u32 *src_last_bytes, u32 *dst_last_bytes,
 				      bool is_last_table)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
 	unsigned int authsize = areq_ctx->req_authsize;
 	struct device *dev = drvdata_to_dev(drvdata);
@@ -790,7 +790,7 @@ static int cc_aead_chain_data(struct cc_drvdata *drvdata,
 			      struct buffer_array *sg_data,
 			      bool is_last_table, bool do_chain)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	struct device *dev = drvdata_to_dev(drvdata);
 	enum drv_crypto_direction direct = areq_ctx->gen_ctx.op_type;
 	unsigned int authsize = areq_ctx->req_authsize;
@@ -895,7 +895,7 @@ chain_data_exit:
 static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata,
 				      struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	u32 curr_mlli_size = 0;
 
 	if (areq_ctx->assoc_buff_type == CC_DMA_BUF_MLLI) {
@@ -945,7 +945,7 @@ static void cc_update_aead_mlli_nents(struct cc_drvdata *drvdata,
 
 int cc_map_aead_request(struct cc_drvdata *drvdata, struct aead_request *req)
 {
-	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
+	struct aead_req_ctx *areq_ctx = aead_request_ctx_dma(req);
 	struct mlli_params *mlli_params = &areq_ctx->mlli_params;
 	struct device *dev = drvdata_to_dev(drvdata);
 	struct buffer_array sg_data;
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
index 683c9a430e11b..f418162932fe3 100644
--- a/drivers/crypto/ccree/cc_hash.c
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -283,9 +283,9 @@ static void cc_unmap_result(struct device *dev, struct ahash_req_ctx *state,
 static void cc_update_complete(struct device *dev, void *cc_req, int err)
 {
 	struct ahash_request *req = (struct ahash_request *)cc_req;
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 
 	dev_dbg(dev, "req=%pK\n", req);
 
@@ -301,9 +301,9 @@ static void cc_update_complete(struct device *dev, void *cc_req, int err)
 static void cc_digest_complete(struct device *dev, void *cc_req, int err)
 {
 	struct ahash_request *req = (struct ahash_request *)cc_req;
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 
 	dev_dbg(dev, "req=%pK\n", req);
@@ -321,9 +321,9 @@ static void cc_digest_complete(struct device *dev, void *cc_req, int err)
 static void cc_hash_complete(struct device *dev, void *cc_req, int err)
 {
 	struct ahash_request *req = (struct ahash_request *)cc_req;
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 
 	dev_dbg(dev, "req=%pK\n", req);
@@ -341,9 +341,9 @@ static void cc_hash_complete(struct device *dev, void *cc_req, int err)
 static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req,
 			 int idx)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 
 	/* Get final MAC result */
@@ -364,9 +364,9 @@ static int cc_fin_result(struct cc_hw_desc *desc, struct ahash_request *req,
 static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req,
 		       int idx)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 
 	/* store the hash digest result in the context */
@@ -417,9 +417,9 @@ static int cc_fin_hmac(struct cc_hw_desc *desc, struct ahash_request *req,
 
 static int cc_hash_digest(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 	struct scatterlist *src = req->src;
 	unsigned int nbytes = req->nbytes;
@@ -555,9 +555,9 @@ static int cc_restore_hash(struct cc_hw_desc *desc, struct cc_hash_ctx *ctx,
 
 static int cc_hash_update(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base);
 	struct scatterlist *src = req->src;
 	unsigned int nbytes = req->nbytes;
@@ -631,9 +631,9 @@ static int cc_hash_update(struct ahash_request *req)
 
 static int cc_do_finup(struct ahash_request *req, bool update)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 	struct scatterlist *src = req->src;
 	unsigned int nbytes = req->nbytes;
@@ -711,9 +711,9 @@ static int cc_hash_final(struct ahash_request *req)
 
 static int cc_hash_init(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 
 	dev_dbg(dev, "===== init (%d) ====\n", req->nbytes);
@@ -736,7 +736,7 @@ static int cc_hash_setkey(struct crypto_ahash *ahash, const u8 *key,
 	u32 larval_addr;
 	struct device *dev;
 
-	ctx = crypto_ahash_ctx(ahash);
+	ctx = crypto_ahash_ctx_dma(ahash);
 	dev = drvdata_to_dev(ctx->drvdata);
 	dev_dbg(dev, "start keylen: %d", keylen);
 
@@ -922,7 +922,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash,
 			  const u8 *key, unsigned int keylen)
 {
 	struct cc_crypto_req cc_req = {};
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	int rc = 0;
 	unsigned int idx = 0;
@@ -1007,7 +1007,7 @@ static int cc_xcbc_setkey(struct crypto_ahash *ahash,
 static int cc_cmac_setkey(struct crypto_ahash *ahash,
 			  const u8 *key, unsigned int keylen)
 {
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 
 	dev_dbg(dev, "===== setkey (%d) ====\n", keylen);
@@ -1109,7 +1109,7 @@ fail:
 
 static int cc_get_hash_len(struct crypto_tfm *tfm)
 {
-	struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 
 	if (ctx->hash_mode == DRV_HASH_SM3)
 		return CC_SM3_HASH_LEN_SIZE;
@@ -1119,7 +1119,7 @@ static int cc_get_hash_len(struct crypto_tfm *tfm)
 
 static int cc_cra_init(struct crypto_tfm *tfm)
 {
-	struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 	struct hash_alg_common *hash_alg_common =
 		container_of(tfm->__crt_alg, struct hash_alg_common, base);
 	struct ahash_alg *ahash_alg =
@@ -1127,8 +1127,8 @@ static int cc_cra_init(struct crypto_tfm *tfm)
 	struct cc_hash_alg *cc_alg =
 			container_of(ahash_alg, struct cc_hash_alg, ahash_alg);
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct ahash_req_ctx));
+	crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm),
+				     sizeof(struct ahash_req_ctx));
 
 	ctx->hash_mode = cc_alg->hash_mode;
 	ctx->hw_mode = cc_alg->hw_mode;
@@ -1140,7 +1140,7 @@ static int cc_cra_init(struct crypto_tfm *tfm)
 
 static void cc_cra_exit(struct crypto_tfm *tfm)
 {
-	struct cc_hash_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_tfm_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 
 	dev_dbg(dev, "cc_cra_exit");
@@ -1149,9 +1149,9 @@ static void cc_cra_exit(struct crypto_tfm *tfm)
 
 static int cc_mac_update(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	unsigned int block_size = crypto_tfm_alg_blocksize(&tfm->base);
 	struct cc_crypto_req cc_req = {};
@@ -1217,9 +1217,9 @@ static int cc_mac_update(struct ahash_request *req)
 
 static int cc_mac_final(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct cc_crypto_req cc_req = {};
 	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
@@ -1338,9 +1338,9 @@ static int cc_mac_final(struct ahash_request *req)
 
 static int cc_mac_finup(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct cc_crypto_req cc_req = {};
 	struct cc_hw_desc desc[CC_MAX_HASH_SEQ_LEN];
@@ -1419,9 +1419,9 @@ static int cc_mac_finup(struct ahash_request *req)
 
 static int cc_mac_digest(struct ahash_request *req)
 {
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	u32 digestsize = crypto_ahash_digestsize(tfm);
 	struct cc_crypto_req cc_req = {};
@@ -1499,8 +1499,8 @@ static int cc_mac_digest(struct ahash_request *req)
 static int cc_hash_export(struct ahash_request *req, void *out)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	u8 *curr_buff = cc_hash_buf(state);
 	u32 curr_buff_cnt = *cc_hash_buf_cnt(state);
 	const u32 tmp = CC_EXPORT_MAGIC;
@@ -1525,9 +1525,9 @@ static int cc_hash_export(struct ahash_request *req, void *out)
 static int cc_hash_import(struct ahash_request *req, const void *in)
 {
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(ahash);
 	struct device *dev = drvdata_to_dev(ctx->drvdata);
-	struct ahash_req_ctx *state = ahash_request_ctx(req);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(req);
 	u32 tmp;
 
 	memcpy(&tmp, in, sizeof(u32));
@@ -1846,7 +1846,7 @@ static struct cc_hash_alg *cc_alloc_hash_alg(struct cc_hash_template *template,
 			 template->driver_name);
 	}
 	alg->cra_module = THIS_MODULE;
-	alg->cra_ctxsize = sizeof(struct cc_hash_ctx);
+	alg->cra_ctxsize = sizeof(struct cc_hash_ctx) + crypto_dma_padding();
 	alg->cra_priority = CC_CRA_PRIO;
 	alg->cra_blocksize = template->blocksize;
 	alg->cra_alignmask = 0;
@@ -2073,9 +2073,9 @@ static void cc_setup_xcbc(struct ahash_request *areq, struct cc_hw_desc desc[],
 			  unsigned int *seq_size)
 {
 	unsigned int idx = *seq_size;
-	struct ahash_req_ctx *state = ahash_request_ctx(areq);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 
 	/* Setup XCBC MAC K1 */
 	hw_desc_init(&desc[idx]);
@@ -2130,9 +2130,9 @@ static void cc_setup_cmac(struct ahash_request *areq, struct cc_hw_desc desc[],
 			  unsigned int *seq_size)
 {
 	unsigned int idx = *seq_size;
-	struct ahash_req_ctx *state = ahash_request_ctx(areq);
+	struct ahash_req_ctx *state = ahash_request_ctx_dma(areq);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
-	struct cc_hash_ctx *ctx = crypto_ahash_ctx(tfm);
+	struct cc_hash_ctx *ctx = crypto_ahash_ctx_dma(tfm);
 
 	/* Setup CMAC Key */
 	hw_desc_init(&desc[idx]);
-- 
GitLab


From e055bffaa390042d73fed56a0ef9bfe71a675614 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:53 +0800
Subject: [PATCH 3576/4122] crypto: chelsio - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/chelsio/chcr_algo.c | 43 +++++++++++++++---------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 9fac1e7584066..68d65773ef2bd 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -210,7 +210,7 @@ static inline int chcr_handle_aead_resp(struct aead_request *req,
 					 unsigned char *input,
 					 int err)
 {
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_dev *dev = a_ctx(tfm)->dev;
 
@@ -718,7 +718,7 @@ static inline int get_qidxs(struct crypto_async_request *req,
 	{
 		struct aead_request *aead_req =
 			container_of(req, struct aead_request, base);
-		struct chcr_aead_reqctx *reqctx = aead_request_ctx(aead_req);
+		struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(aead_req);
 		*txqidx = reqctx->txqidx;
 		*rxqidx = reqctx->rxqidx;
 		break;
@@ -2362,7 +2362,7 @@ static void chcr_hmac_cra_exit(struct crypto_tfm *tfm)
 
 inline void chcr_aead_common_exit(struct aead_request *req)
 {
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct uld_ctx *u_ctx = ULD_CTX(a_ctx(tfm));
 
@@ -2373,7 +2373,7 @@ static int chcr_aead_common_init(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	int error = -EINVAL;
 
@@ -2417,7 +2417,7 @@ static int chcr_aead_fallback(struct aead_request *req, unsigned short op_type)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
-	struct aead_request *subreq = aead_request_ctx(req);
+	struct aead_request *subreq = aead_request_ctx_dma(req);
 
 	aead_request_set_tfm(subreq, aeadctx->sw_cipher);
 	aead_request_set_callback(subreq, req->base.flags,
@@ -2438,7 +2438,7 @@ static struct sk_buff *create_authenc_wr(struct aead_request *req,
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
 	struct chcr_authenc_ctx *actx = AUTHENC_CTX(aeadctx);
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
@@ -2576,7 +2576,7 @@ int chcr_aead_dma_map(struct device *dev,
 		      unsigned short op_type)
 {
 	int error;
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	int src_len, dst_len;
@@ -2637,7 +2637,7 @@ void chcr_aead_dma_unmap(struct device *dev,
 			 struct aead_request *req,
 			 unsigned short op_type)
 {
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	int src_len, dst_len;
@@ -2678,7 +2678,7 @@ void chcr_add_aead_src_ent(struct aead_request *req,
 			   struct ulptx_sgl *ulptx)
 {
 	struct ulptx_walk ulp_walk;
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 
 	if (reqctx->imm) {
 		u8 *buf = (u8 *)ulptx;
@@ -2704,7 +2704,7 @@ void chcr_add_aead_dst_ent(struct aead_request *req,
 			   struct cpl_rx_phys_dsgl *phys_cpl,
 			   unsigned short qid)
 {
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct dsgl_walk dsgl_walk;
 	unsigned int authsize = crypto_aead_authsize(tfm);
@@ -2894,7 +2894,7 @@ static int generate_b0(struct aead_request *req, u8 *ivptr,
 	unsigned int l, lp, m;
 	int rc;
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	u8 *b0 = reqctx->scratch_pad;
 
 	m = crypto_aead_authsize(aead);
@@ -2932,7 +2932,7 @@ static int ccm_format_packet(struct aead_request *req,
 			     unsigned short op_type,
 			     unsigned int assoclen)
 {
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
 	int rc = 0;
@@ -2963,7 +2963,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
 	struct chcr_context *ctx = a_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	unsigned int cipher_mode = CHCR_SCMD_CIPHER_MODE_AES_CCM;
 	unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
 	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
@@ -3036,7 +3036,7 @@ static struct sk_buff *create_aead_ccm_wr(struct aead_request *req,
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(tfm));
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
@@ -3135,7 +3135,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	struct chcr_context *ctx = a_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct sk_buff *skb = NULL;
 	struct chcr_wr *chcr_req;
 	struct cpl_rx_phys_dsgl *phys_cpl;
@@ -3255,9 +3255,10 @@ static int chcr_aead_cra_init(struct crypto_aead *tfm)
 					       CRYPTO_ALG_ASYNC);
 	if  (IS_ERR(aeadctx->sw_cipher))
 		return PTR_ERR(aeadctx->sw_cipher);
-	crypto_aead_set_reqsize(tfm, max(sizeof(struct chcr_aead_reqctx),
-				 sizeof(struct aead_request) +
-				 crypto_aead_reqsize(aeadctx->sw_cipher)));
+	crypto_aead_set_reqsize_dma(
+		tfm, max(sizeof(struct chcr_aead_reqctx),
+			 sizeof(struct aead_request) +
+			 crypto_aead_reqsize(aeadctx->sw_cipher)));
 	return chcr_device_init(a_ctx(tfm));
 }
 
@@ -3735,7 +3736,7 @@ static int chcr_aead_op(struct aead_request *req,
 			create_wr_t create_wr_fn)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_reqctx  *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct chcr_context *ctx = a_ctx(tfm);
 	struct uld_ctx *u_ctx = ULD_CTX(ctx);
 	struct sk_buff *skb;
@@ -3785,7 +3786,7 @@ static int chcr_aead_op(struct aead_request *req,
 static int chcr_aead_encrypt(struct aead_request *req)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	struct chcr_context *ctx = a_ctx(tfm);
 	unsigned int cpu;
 
@@ -3816,7 +3817,7 @@ static int chcr_aead_decrypt(struct aead_request *req)
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct chcr_context *ctx = a_ctx(tfm);
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(ctx);
-	struct chcr_aead_reqctx *reqctx = aead_request_ctx(req);
+	struct chcr_aead_reqctx *reqctx = aead_request_ctx_dma(req);
 	int size;
 	unsigned int cpu;
 
-- 
GitLab


From 80b61baca4c8698139881f41473e652bedc65a73 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:55 +0800
Subject: [PATCH 3577/4122] crypto: hisilicon/hpre - Set DMA alignment
 explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/hisilicon/hpre/hpre_crypto.c | 40 +++++++++++++--------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index 5f6d363c9435e..8ede77310dc52 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -147,6 +147,16 @@ struct hpre_asym_request {
 	struct timespec64 req_time;
 };
 
+static inline unsigned int hpre_align_sz(void)
+{
+	return ((crypto_dma_align() - 1) | (HPRE_ALIGN_SZ - 1)) + 1;
+}
+
+static inline unsigned int hpre_align_pd(void)
+{
+	return (hpre_align_sz() - 1) & ~(crypto_tfm_ctx_alignment() - 1);
+}
+
 static int hpre_alloc_req_id(struct hpre_ctx *ctx)
 {
 	unsigned long flags;
@@ -517,7 +527,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
 		}
 
 		tmp = akcipher_request_ctx(akreq);
-		h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+		h_req = PTR_ALIGN(tmp, hpre_align_sz());
 		h_req->cb = hpre_rsa_cb;
 		h_req->areq.rsa = akreq;
 		msg = &h_req->req;
@@ -531,7 +541,7 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
 		}
 
 		tmp = kpp_request_ctx(kreq);
-		h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+		h_req = PTR_ALIGN(tmp, hpre_align_sz());
 		h_req->cb = hpre_dh_cb;
 		h_req->areq.dh = kreq;
 		msg = &h_req->req;
@@ -582,7 +592,7 @@ static int hpre_dh_compute_value(struct kpp_request *req)
 	struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 	void *tmp = kpp_request_ctx(req);
-	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz());
 	struct hpre_sqe *msg = &hpre_req->req;
 	int ret;
 
@@ -740,7 +750,7 @@ static int hpre_dh_init_tfm(struct crypto_kpp *tfm)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 
-	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
 
 	return hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
 }
@@ -785,7 +795,7 @@ static int hpre_rsa_enc(struct akcipher_request *req)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
 	void *tmp = akcipher_request_ctx(req);
-	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz());
 	struct hpre_sqe *msg = &hpre_req->req;
 	int ret;
 
@@ -833,7 +843,7 @@ static int hpre_rsa_dec(struct akcipher_request *req)
 	struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req);
 	struct hpre_ctx *ctx = akcipher_tfm_ctx(tfm);
 	void *tmp = akcipher_request_ctx(req);
-	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz());
 	struct hpre_sqe *msg = &hpre_req->req;
 	int ret;
 
@@ -1168,7 +1178,7 @@ static int hpre_rsa_init_tfm(struct crypto_akcipher *tfm)
 	}
 
 	akcipher_set_reqsize(tfm, sizeof(struct hpre_asym_request) +
-				  HPRE_ALIGN_SZ);
+				  hpre_align_pd());
 
 	ret = hpre_ctx_init(ctx, HPRE_V2_ALG_TYPE);
 	if (ret)
@@ -1490,7 +1500,7 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx,
 	}
 
 	tmp = kpp_request_ctx(req);
-	h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	h_req = PTR_ALIGN(tmp, hpre_align_sz());
 	h_req->cb = hpre_ecdh_cb;
 	h_req->areq.ecdh = req;
 	msg = &h_req->req;
@@ -1571,7 +1581,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req)
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
 	void *tmp = kpp_request_ctx(req);
-	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz());
 	struct hpre_sqe *msg = &hpre_req->req;
 	int ret;
 
@@ -1622,7 +1632,7 @@ static int hpre_ecdh_nist_p192_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P192;
 
-	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
 
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
@@ -1633,7 +1643,7 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P256;
 
-	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
 
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
@@ -1644,7 +1654,7 @@ static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
 
 	ctx->curve_id = ECC_CURVE_NIST_P384;
 
-	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
 
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
@@ -1802,7 +1812,7 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx,
 	}
 
 	tmp = kpp_request_ctx(req);
-	h_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	h_req = PTR_ALIGN(tmp, hpre_align_sz());
 	h_req->cb = hpre_curve25519_cb;
 	h_req->areq.curve25519 = req;
 	msg = &h_req->req;
@@ -1923,7 +1933,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req)
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 	struct device *dev = ctx->dev;
 	void *tmp = kpp_request_ctx(req);
-	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
+	struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, hpre_align_sz());
 	struct hpre_sqe *msg = &hpre_req->req;
 	int ret;
 
@@ -1972,7 +1982,7 @@ static int hpre_curve25519_init_tfm(struct crypto_kpp *tfm)
 {
 	struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
 
-	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ);
+	kpp_set_reqsize(tfm, sizeof(struct hpre_asym_request) + hpre_align_pd());
 
 	return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
 }
-- 
GitLab


From b2e2e2da7b4f62c54ce0d6a66c54e9fb05a8d514 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:57 +0800
Subject: [PATCH 3578/4122] crypto: safexcel - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/inside-secure/safexcel_hash.c | 99 ++++++++++----------
 1 file changed, 50 insertions(+), 49 deletions(-)

diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index 103fc551d2af9..ca46328472d4b 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -231,7 +231,7 @@ static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv,
 	struct safexcel_result_desc *rdesc;
 	struct ahash_request *areq = ahash_request_cast(async);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
-	struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *sreq = ahash_request_ctx_dma(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash);
 	u64 cache_len;
 
@@ -312,7 +312,7 @@ static int safexcel_ahash_send_req(struct crypto_async_request *async, int ring,
 				   int *commands, int *results)
 {
 	struct ahash_request *areq = ahash_request_cast(async);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_crypto_priv *priv = ctx->base.priv;
 	struct safexcel_command_desc *cdesc, *first_cdesc = NULL;
@@ -569,7 +569,7 @@ static int safexcel_handle_result(struct safexcel_crypto_priv *priv, int ring,
 				  bool *should_complete, int *ret)
 {
 	struct ahash_request *areq = ahash_request_cast(async);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	int err;
 
 	BUG_ON(!(priv->flags & EIP197_TRC_CACHE) && req->needs_inv);
@@ -608,7 +608,7 @@ static int safexcel_ahash_send(struct crypto_async_request *async,
 			       int ring, int *commands, int *results)
 {
 	struct ahash_request *areq = ahash_request_cast(async);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	int ret;
 
 	if (req->needs_inv)
@@ -624,7 +624,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_crypto_priv *priv = ctx->base.priv;
 	EIP197_REQUEST_ON_STACK(req, ahash, EIP197_AHASH_REQ_SIZE);
-	struct safexcel_ahash_req *rctx = ahash_request_ctx(req);
+	struct safexcel_ahash_req *rctx = ahash_request_ctx_dma(req);
 	struct safexcel_inv_result result = {};
 	int ring = ctx->base.ring;
 
@@ -663,7 +663,7 @@ static int safexcel_ahash_exit_inv(struct crypto_tfm *tfm)
  */
 static int safexcel_ahash_cache(struct ahash_request *areq)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	u64 cache_len;
 
 	/* cache_len: everything accepted by the driver but not sent yet,
@@ -689,7 +689,7 @@ static int safexcel_ahash_cache(struct ahash_request *areq)
 static int safexcel_ahash_enqueue(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	struct safexcel_crypto_priv *priv = ctx->base.priv;
 	int ret, ring;
 
@@ -741,7 +741,7 @@ static int safexcel_ahash_enqueue(struct ahash_request *areq)
 
 static int safexcel_ahash_update(struct ahash_request *areq)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	int ret;
 
 	/* If the request is 0 length, do nothing */
@@ -766,7 +766,7 @@ static int safexcel_ahash_update(struct ahash_request *areq)
 
 static int safexcel_ahash_final(struct ahash_request *areq)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 
 	req->finish = true;
@@ -870,7 +870,7 @@ static int safexcel_ahash_final(struct ahash_request *areq)
 
 static int safexcel_ahash_finup(struct ahash_request *areq)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	req->finish = true;
 
@@ -880,7 +880,7 @@ static int safexcel_ahash_finup(struct ahash_request *areq)
 
 static int safexcel_ahash_export(struct ahash_request *areq, void *out)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	struct safexcel_ahash_export_state *export = out;
 
 	export->len = req->len;
@@ -896,7 +896,7 @@ static int safexcel_ahash_export(struct ahash_request *areq, void *out)
 
 static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
 {
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 	const struct safexcel_ahash_export_state *export = in;
 	int ret;
 
@@ -927,15 +927,15 @@ static int safexcel_ahash_cra_init(struct crypto_tfm *tfm)
 	ctx->base.handle_result = safexcel_handle_result;
 	ctx->fb_do_setkey = false;
 
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct safexcel_ahash_req));
+	crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm),
+				     sizeof(struct safexcel_ahash_req));
 	return 0;
 }
 
 static int safexcel_sha1_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1012,7 +1012,7 @@ struct safexcel_alg_template safexcel_alg_sha1 = {
 static int safexcel_hmac_sha1_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1124,7 +1124,7 @@ static int safexcel_hmac_init_iv(struct ahash_request *areq,
 	if (ret)
 		return ret;
 
-	req = ahash_request_ctx(areq);
+	req = ahash_request_ctx_dma(areq);
 	req->hmac = true;
 	req->last_req = true;
 
@@ -1264,7 +1264,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
 static int safexcel_sha256_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1321,7 +1321,7 @@ struct safexcel_alg_template safexcel_alg_sha256 = {
 static int safexcel_sha224_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1385,7 +1385,7 @@ static int safexcel_hmac_sha224_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int safexcel_hmac_sha224_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1457,7 +1457,7 @@ static int safexcel_hmac_sha256_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int safexcel_hmac_sha256_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1522,7 +1522,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha256 = {
 static int safexcel_sha512_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1579,7 +1579,7 @@ struct safexcel_alg_template safexcel_alg_sha512 = {
 static int safexcel_sha384_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1643,7 +1643,7 @@ static int safexcel_hmac_sha512_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int safexcel_hmac_sha512_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1715,7 +1715,7 @@ static int safexcel_hmac_sha384_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int safexcel_hmac_sha384_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1780,7 +1780,7 @@ struct safexcel_alg_template safexcel_alg_hmac_sha384 = {
 static int safexcel_md5_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1837,7 +1837,7 @@ struct safexcel_alg_template safexcel_alg_md5 = {
 static int safexcel_hmac_md5_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1920,7 +1920,7 @@ static int safexcel_crc32_cra_init(struct crypto_tfm *tfm)
 static int safexcel_crc32_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -1992,7 +1992,7 @@ struct safexcel_alg_template safexcel_alg_crc32 = {
 static int safexcel_cbcmac_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2252,7 +2252,7 @@ struct safexcel_alg_template safexcel_alg_cmac = {
 static int safexcel_sm3_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2316,7 +2316,7 @@ static int safexcel_hmac_sm3_setkey(struct crypto_ahash *tfm, const u8 *key,
 static int safexcel_hmac_sm3_init(struct ahash_request *areq)
 {
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2382,7 +2382,7 @@ static int safexcel_sha3_224_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2400,7 +2400,7 @@ static int safexcel_sha3_fbcheck(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 	int ret = 0;
 
 	if (ctx->do_fallback) {
@@ -2437,7 +2437,7 @@ static int safexcel_sha3_update(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback = true;
 	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_update(subreq);
@@ -2447,7 +2447,7 @@ static int safexcel_sha3_final(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback = true;
 	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_final(subreq);
@@ -2457,7 +2457,7 @@ static int safexcel_sha3_finup(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback |= !req->nbytes;
 	if (ctx->do_fallback)
@@ -2472,7 +2472,7 @@ static int safexcel_sha3_digest_fallback(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback = true;
 	ctx->fb_init_done = false;
@@ -2492,7 +2492,7 @@ static int safexcel_sha3_export(struct ahash_request *req, void *out)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback = true;
 	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_export(subreq, out);
@@ -2502,7 +2502,7 @@ static int safexcel_sha3_import(struct ahash_request *req, const void *in)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct ahash_request *subreq = ahash_request_ctx(req);
+	struct ahash_request *subreq = ahash_request_ctx_dma(req);
 
 	ctx->do_fallback = true;
 	return safexcel_sha3_fbcheck(req) ?: crypto_ahash_import(subreq, in);
@@ -2526,9 +2526,10 @@ static int safexcel_sha3_cra_init(struct crypto_tfm *tfm)
 	/* Update statesize from fallback algorithm! */
 	crypto_hash_alg_common(ahash)->statesize =
 		crypto_ahash_statesize(ctx->fback);
-	crypto_ahash_set_reqsize(ahash, max(sizeof(struct safexcel_ahash_req),
-					    sizeof(struct ahash_request) +
-					    crypto_ahash_reqsize(ctx->fback)));
+	crypto_ahash_set_reqsize_dma(
+		ahash, max(sizeof(struct safexcel_ahash_req),
+			   sizeof(struct ahash_request) +
+			   crypto_ahash_reqsize(ctx->fback)));
 	return 0;
 }
 
@@ -2575,7 +2576,7 @@ static int safexcel_sha3_256_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2633,7 +2634,7 @@ static int safexcel_sha3_384_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2691,7 +2692,7 @@ static int safexcel_sha3_512_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2841,7 +2842,7 @@ static int safexcel_hmac_sha3_224_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2912,7 +2913,7 @@ static int safexcel_hmac_sha3_256_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -2983,7 +2984,7 @@ static int safexcel_hmac_sha3_384_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
@@ -3054,7 +3055,7 @@ static int safexcel_hmac_sha3_512_init(struct ahash_request *areq)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(tfm);
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
+	struct safexcel_ahash_req *req = ahash_request_ctx_dma(areq);
 
 	memset(req, 0, sizeof(*req));
 
-- 
GitLab


From be75969c81d9a6e13487e1c043e62ed5432d9fa1 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:20:59 +0800
Subject: [PATCH 3579/4122] crypto: keembay - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/keembay/keembay-ocs-hcu-core.c | 26 +++++++++----------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/crypto/keembay/keembay-ocs-hcu-core.c b/drivers/crypto/keembay/keembay-ocs-hcu-core.c
index 0379dbf32a4c4..d4bcbed1f5466 100644
--- a/drivers/crypto/keembay/keembay-ocs-hcu-core.c
+++ b/drivers/crypto/keembay/keembay-ocs-hcu-core.c
@@ -226,7 +226,7 @@ static void kmb_ocs_hcu_dma_cleanup(struct ahash_request *req,
  */
 static int kmb_ocs_dma_prepare(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	struct device *dev = rctx->hcu_dev->dev;
 	unsigned int remainder = 0;
 	unsigned int total;
@@ -356,7 +356,7 @@ cleanup:
 
 static void kmb_ocs_hcu_secure_cleanup(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 
 	/* Clear buffer of any data. */
 	memzero_explicit(rctx->buffer, sizeof(rctx->buffer));
@@ -374,7 +374,7 @@ static int kmb_ocs_hcu_handle_queue(struct ahash_request *req)
 
 static int prepare_ipad(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm);
 	int i;
@@ -414,7 +414,7 @@ static int kmb_ocs_hcu_do_one_request(struct crypto_engine *engine, void *areq)
 						 base);
 	struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	struct ocs_hcu_ctx *tctx = crypto_ahash_ctx(tfm);
 	int rc;
 	int i;
@@ -561,7 +561,7 @@ error:
 static int kmb_ocs_hcu_init(struct ahash_request *req)
 {
 	struct ocs_hcu_dev *hcu_dev = kmb_ocs_hcu_find_dev(req);
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm);
 
@@ -614,7 +614,7 @@ static int kmb_ocs_hcu_init(struct ahash_request *req)
 
 static int kmb_ocs_hcu_update(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	int rc;
 
 	if (!req->nbytes)
@@ -650,7 +650,7 @@ static int kmb_ocs_hcu_update(struct ahash_request *req)
 /* Common logic for kmb_ocs_hcu_final() and kmb_ocs_hcu_finup(). */
 static int kmb_ocs_hcu_fin_common(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct ocs_hcu_ctx *ctx = crypto_ahash_ctx(tfm);
 	int rc;
@@ -687,7 +687,7 @@ static int kmb_ocs_hcu_fin_common(struct ahash_request *req)
 
 static int kmb_ocs_hcu_final(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 
 	rctx->sg_data_total = 0;
 	rctx->sg_data_offset = 0;
@@ -698,7 +698,7 @@ static int kmb_ocs_hcu_final(struct ahash_request *req)
 
 static int kmb_ocs_hcu_finup(struct ahash_request *req)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 
 	rctx->sg_data_total = req->nbytes;
 	rctx->sg_data_offset = 0;
@@ -726,7 +726,7 @@ static int kmb_ocs_hcu_digest(struct ahash_request *req)
 
 static int kmb_ocs_hcu_export(struct ahash_request *req, void *out)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 
 	/* Intermediate data is always stored and applied per request. */
 	memcpy(out, rctx, sizeof(*rctx));
@@ -736,7 +736,7 @@ static int kmb_ocs_hcu_export(struct ahash_request *req, void *out)
 
 static int kmb_ocs_hcu_import(struct ahash_request *req, const void *in)
 {
-	struct ocs_hcu_rctx *rctx = ahash_request_ctx(req);
+	struct ocs_hcu_rctx *rctx = ahash_request_ctx_dma(req);
 
 	/* Intermediate data is always stored and applied per request. */
 	memcpy(rctx, in, sizeof(*rctx));
@@ -822,8 +822,8 @@ err_free_ahash:
 /* Set request size and initialize tfm context. */
 static void __cra_init(struct crypto_tfm *tfm, struct ocs_hcu_ctx *ctx)
 {
-	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
-				 sizeof(struct ocs_hcu_rctx));
+	crypto_ahash_set_reqsize_dma(__crypto_ahash_cast(tfm),
+				     sizeof(struct ocs_hcu_rctx));
 
 	/* Init context to 0. */
 	memzero_explicit(ctx, sizeof(*ctx));
-- 
GitLab


From 0a55f4e38556f7e59b0f30fac0751e3a04be44c2 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:21:01 +0800
Subject: [PATCH 3580/4122] crypto: octeontx - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../crypto/marvell/octeontx/otx_cptvf_algs.c  | 69 ++++++++++---------
 1 file changed, 35 insertions(+), 34 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
index 01c48ddc4eeb4..80ba77c793a7c 100644
--- a/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx/otx_cptvf_algs.c
@@ -103,7 +103,7 @@ static inline int validate_hmac_cipher_null(struct otx_cpt_req_info *cpt_req)
 
 	req = container_of(cpt_req->areq, struct aead_request, base);
 	tfm = crypto_aead_reqtfm(req);
-	rctx = aead_request_ctx(req);
+	rctx = aead_request_ctx_dma(req);
 	if (memcmp(rctx->fctx.hmac.s.hmac_calc,
 		   rctx->fctx.hmac.s.hmac_recv,
 		   crypto_aead_authsize(tfm)) != 0)
@@ -155,7 +155,7 @@ static void output_iv_copyback(struct crypto_async_request *areq)
 	ctx = crypto_skcipher_ctx(stfm);
 	if (ctx->cipher_type == OTX_CPT_AES_CBC ||
 	    ctx->cipher_type == OTX_CPT_DES3_CBC) {
-		rctx = skcipher_request_ctx(sreq);
+		rctx = skcipher_request_ctx_dma(sreq);
 		req_info = &rctx->cpt_req;
 		ivsize = crypto_skcipher_ivsize(stfm);
 		start = sreq->cryptlen - ivsize;
@@ -233,7 +233,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 				 u32 *argcnt)
 {
 	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
-	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	struct crypto_tfm *tfm = crypto_skcipher_tfm(stfm);
 	struct otx_cpt_enc_ctx *ctx = crypto_tfm_ctx(tfm);
@@ -303,7 +303,7 @@ static inline u32 create_ctx_hdr(struct skcipher_request *req, u32 enc,
 static inline u32 create_input_list(struct skcipher_request *req, u32 enc,
 				    u32 enc_iv_len)
 {
-	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 	int ret;
@@ -321,7 +321,7 @@ static inline u32 create_input_list(struct skcipher_request *req, u32 enc,
 static inline void create_output_list(struct skcipher_request *req,
 				      u32 enc_iv_len)
 {
-	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 argcnt = 0;
 
@@ -340,7 +340,7 @@ static inline void create_output_list(struct skcipher_request *req,
 static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc)
 {
 	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
-	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 enc_iv_len = crypto_skcipher_ivsize(stfm);
 	struct pci_dev *pdev;
@@ -501,15 +501,16 @@ static int otx_cpt_enc_dec_init(struct crypto_skcipher *tfm)
 	 * allocated since the cryptd daemon uses
 	 * this memory for request_ctx information
 	 */
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx) +
-					sizeof(struct skcipher_request));
+	crypto_skcipher_set_reqsize_dma(
+		tfm, sizeof(struct otx_cpt_req_ctx) +
+		     sizeof(struct skcipher_request));
 
 	return 0;
 }
 
 static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	ctx->cipher_type = cipher_type;
 	ctx->mac_type = mac_type;
@@ -551,7 +552,7 @@ static int cpt_aead_init(struct crypto_aead *tfm, u8 cipher_type, u8 mac_type)
 		}
 	}
 
-	crypto_aead_set_reqsize(tfm, sizeof(struct otx_cpt_req_ctx));
+	crypto_aead_set_reqsize_dma(tfm, sizeof(struct otx_cpt_req_ctx));
 
 	return 0;
 }
@@ -603,7 +604,7 @@ static int otx_cpt_aead_gcm_aes_init(struct crypto_aead *tfm)
 
 static void otx_cpt_aead_exit(struct crypto_aead *tfm)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	kfree(ctx->ipad);
 	kfree(ctx->opad);
@@ -619,7 +620,7 @@ static void otx_cpt_aead_exit(struct crypto_aead *tfm)
 static int otx_cpt_aead_set_authsize(struct crypto_aead *tfm,
 				     unsigned int authsize)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	switch (ctx->mac_type) {
 	case OTX_CPT_SHA1:
@@ -739,7 +740,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad)
 
 static int aead_hmac_init(struct crypto_aead *cipher)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	int state_size = crypto_shash_statesize(ctx->hashalg);
 	int ds = crypto_shash_digestsize(ctx->hashalg);
 	int bs = crypto_shash_blocksize(ctx->hashalg);
@@ -837,7 +838,7 @@ static int otx_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
 					   const unsigned char *key,
 					   unsigned int keylen)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	struct crypto_authenc_key_param *param;
 	int enckeylen = 0, authkeylen = 0;
 	struct rtattr *rta = (void *)key;
@@ -896,7 +897,7 @@ static int otx_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,
 					    const unsigned char *key,
 					    unsigned int keylen)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	struct crypto_authenc_key_param *param;
 	struct rtattr *rta = (void *)key;
 	int enckeylen = 0;
@@ -932,7 +933,7 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
 				       const unsigned char *key,
 				       unsigned int keylen)
 {
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 
 	/*
 	 * For aes gcm we expect to get encryption key (16, 24, 32 bytes)
@@ -965,9 +966,9 @@ static int otx_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
 static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
 				      u32 *argcnt)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	struct otx_cpt_fc_ctx *fctx = &rctx->fctx;
 	int mac_len = crypto_aead_authsize(tfm);
@@ -1050,9 +1051,9 @@ static inline u32 create_aead_ctx_hdr(struct aead_request *req, u32 enc,
 static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
 				      u32 enc)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 
 	req_info->ctrl.s.dma_mode = OTX_CPT_DMA_GATHER_SCATTER;
@@ -1076,7 +1077,7 @@ static inline u32 create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
 
 static inline u32 create_aead_input_list(struct aead_request *req, u32 enc)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 inputlen =  req->cryptlen + req->assoclen;
 	u32 status, argcnt = 0;
@@ -1093,7 +1094,7 @@ static inline u32 create_aead_input_list(struct aead_request *req, u32 enc)
 static inline u32 create_aead_output_list(struct aead_request *req, u32 enc,
 					  u32 mac_len)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info =  &rctx->cpt_req;
 	u32 argcnt = 0, outputlen = 0;
 
@@ -1111,7 +1112,7 @@ static inline u32 create_aead_output_list(struct aead_request *req, u32 enc,
 static inline u32 create_aead_null_input_list(struct aead_request *req,
 					      u32 enc, u32 mac_len)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 inputlen, argcnt = 0;
 
@@ -1130,7 +1131,7 @@ static inline u32 create_aead_null_input_list(struct aead_request *req,
 static inline u32 create_aead_null_output_list(struct aead_request *req,
 					       u32 enc, u32 mac_len)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info =  &rctx->cpt_req;
 	struct scatterlist *dst;
 	u8 *ptr = NULL;
@@ -1217,7 +1218,7 @@ error:
 
 static u32 cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc)
 {
-	struct otx_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx_cpt_req_info *req_info = &rctx->cpt_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct pci_dev *pdev;
@@ -1409,7 +1410,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha1_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1428,7 +1429,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha256_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1447,7 +1448,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha384_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1466,7 +1467,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha512_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1485,7 +1486,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha1_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1504,7 +1505,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha256_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1523,7 +1524,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha384_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1542,7 +1543,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha512_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1561,7 +1562,7 @@ static struct aead_alg otx_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_rfc4106_gcm_aes",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,
-		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
-- 
GitLab


From d887dec105cdeda6b8da0e84d96c7a07d80269bc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:21:03 +0800
Subject: [PATCH 3581/4122] crypto: octeontx2 - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../marvell/octeontx2/otx2_cptvf_algs.c       | 79 ++++++++++---------
 1 file changed, 40 insertions(+), 39 deletions(-)

diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
index 67530e90bbfed..30b423605c9c1 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_algs.c
@@ -87,7 +87,7 @@ static inline int validate_hmac_cipher_null(struct otx2_cpt_req_info *cpt_req)
 
 	req = container_of(cpt_req->areq, struct aead_request, base);
 	tfm = crypto_aead_reqtfm(req);
-	rctx = aead_request_ctx(req);
+	rctx = aead_request_ctx_dma(req);
 	if (memcmp(rctx->fctx.hmac.s.hmac_calc,
 		   rctx->fctx.hmac.s.hmac_recv,
 		   crypto_aead_authsize(tfm)) != 0)
@@ -137,7 +137,7 @@ static void output_iv_copyback(struct crypto_async_request *areq)
 	ctx = crypto_skcipher_ctx(stfm);
 	if (ctx->cipher_type == OTX2_CPT_AES_CBC ||
 	    ctx->cipher_type == OTX2_CPT_DES3_CBC) {
-		rctx = skcipher_request_ctx(sreq);
+		rctx = skcipher_request_ctx_dma(sreq);
 		req_info = &rctx->cpt_req;
 		ivsize = crypto_skcipher_ivsize(stfm);
 		start = sreq->cryptlen - ivsize;
@@ -219,7 +219,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc,
 				 u32 *argcnt)
 {
 	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
-	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	struct otx2_cpt_fc_ctx *fctx = &rctx->fctx;
@@ -288,7 +288,7 @@ static inline int create_ctx_hdr(struct skcipher_request *req, u32 enc,
 static inline int create_input_list(struct skcipher_request *req, u32 enc,
 				    u32 enc_iv_len)
 {
-	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 argcnt =  0;
 	int ret;
@@ -306,7 +306,7 @@ static inline int create_input_list(struct skcipher_request *req, u32 enc,
 static inline void create_output_list(struct skcipher_request *req,
 				      u32 enc_iv_len)
 {
-	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 argcnt = 0;
 
@@ -325,7 +325,7 @@ static inline void create_output_list(struct skcipher_request *req,
 static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc)
 {
 	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
-	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm);
 	int ret;
 
@@ -348,7 +348,7 @@ static int skcipher_do_fallback(struct skcipher_request *req, bool is_enc)
 static inline int cpt_enc_dec(struct skcipher_request *req, u32 enc)
 {
 	struct crypto_skcipher *stfm = crypto_skcipher_reqtfm(req);
-	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = skcipher_request_ctx_dma(req);
 	struct otx2_cpt_enc_ctx *ctx = crypto_skcipher_ctx(stfm);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 enc_iv_len = crypto_skcipher_ivsize(stfm);
@@ -537,8 +537,9 @@ static int otx2_cpt_enc_dec_init(struct crypto_skcipher *stfm)
 	 * allocated since the cryptd daemon uses
 	 * this memory for request_ctx information
 	 */
-	crypto_skcipher_set_reqsize(stfm, sizeof(struct otx2_cpt_req_ctx) +
-					sizeof(struct skcipher_request));
+	crypto_skcipher_set_reqsize_dma(
+		stfm, sizeof(struct otx2_cpt_req_ctx) +
+		      sizeof(struct skcipher_request));
 
 	return cpt_skcipher_fallback_init(ctx, alg);
 }
@@ -572,7 +573,7 @@ static int cpt_aead_fallback_init(struct otx2_cpt_aead_ctx *ctx,
 
 static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(atfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(atfm);
 	struct crypto_tfm *tfm = crypto_aead_tfm(atfm);
 	struct crypto_alg *alg = tfm->__crt_alg;
 
@@ -629,7 +630,7 @@ static int cpt_aead_init(struct crypto_aead *atfm, u8 cipher_type, u8 mac_type)
 		ctx->enc_align_len = 1;
 		break;
 	}
-	crypto_aead_set_reqsize(atfm, sizeof(struct otx2_cpt_req_ctx));
+	crypto_aead_set_reqsize_dma(atfm, sizeof(struct otx2_cpt_req_ctx));
 
 	return cpt_aead_fallback_init(ctx, alg);
 }
@@ -681,7 +682,7 @@ static int otx2_cpt_aead_gcm_aes_init(struct crypto_aead *tfm)
 
 static void otx2_cpt_aead_exit(struct crypto_aead *tfm)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	kfree(ctx->ipad);
 	kfree(ctx->opad);
@@ -698,7 +699,7 @@ static void otx2_cpt_aead_exit(struct crypto_aead *tfm)
 static int otx2_cpt_aead_gcm_set_authsize(struct crypto_aead *tfm,
 					  unsigned int authsize)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	if (crypto_rfc4106_check_authsize(authsize))
 		return -EINVAL;
@@ -722,7 +723,7 @@ static int otx2_cpt_aead_set_authsize(struct crypto_aead *tfm,
 static int otx2_cpt_aead_null_set_authsize(struct crypto_aead *tfm,
 					   unsigned int authsize)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 
 	ctx->is_trunc_hmac = true;
 	tfm->authsize = authsize;
@@ -794,7 +795,7 @@ static int copy_pad(u8 mac_type, u8 *out_pad, u8 *in_pad)
 
 static int aead_hmac_init(struct crypto_aead *cipher)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	int state_size = crypto_shash_statesize(ctx->hashalg);
 	int ds = crypto_shash_digestsize(ctx->hashalg);
 	int bs = crypto_shash_blocksize(ctx->hashalg);
@@ -892,7 +893,7 @@ static int otx2_cpt_aead_cbc_aes_sha_setkey(struct crypto_aead *cipher,
 					    const unsigned char *key,
 					    unsigned int keylen)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	struct crypto_authenc_key_param *param;
 	int enckeylen = 0, authkeylen = 0;
 	struct rtattr *rta = (void *)key;
@@ -944,7 +945,7 @@ static int otx2_cpt_aead_ecb_null_sha_setkey(struct crypto_aead *cipher,
 					     const unsigned char *key,
 					     unsigned int keylen)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 	struct crypto_authenc_key_param *param;
 	struct rtattr *rta = (void *)key;
 	int enckeylen = 0;
@@ -979,7 +980,7 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
 					const unsigned char *key,
 					unsigned int keylen)
 {
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(cipher);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(cipher);
 
 	/*
 	 * For aes gcm we expect to get encryption key (16, 24, 32 bytes)
@@ -1012,9 +1013,9 @@ static int otx2_cpt_aead_gcm_aes_setkey(struct crypto_aead *cipher,
 static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc,
 				      u32 *argcnt)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	struct otx2_cpt_fc_ctx *fctx = &rctx->fctx;
 	int mac_len = crypto_aead_authsize(tfm);
@@ -1103,9 +1104,9 @@ static inline int create_aead_ctx_hdr(struct aead_request *req, u32 enc,
 static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
 				      u32 enc)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 
 	req_info->ctrl.s.dma_mode = OTX2_CPT_DMA_MODE_SG;
@@ -1127,7 +1128,7 @@ static inline void create_hmac_ctx_hdr(struct aead_request *req, u32 *argcnt,
 
 static inline int create_aead_input_list(struct aead_request *req, u32 enc)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 inputlen =  req->cryptlen + req->assoclen;
 	u32 status, argcnt = 0;
@@ -1144,7 +1145,7 @@ static inline int create_aead_input_list(struct aead_request *req, u32 enc)
 static inline void create_aead_output_list(struct aead_request *req, u32 enc,
 					   u32 mac_len)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info =  &rctx->cpt_req;
 	u32 argcnt = 0, outputlen = 0;
 
@@ -1160,7 +1161,7 @@ static inline void create_aead_output_list(struct aead_request *req, u32 enc,
 static inline void create_aead_null_input_list(struct aead_request *req,
 					       u32 enc, u32 mac_len)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	u32 inputlen, argcnt = 0;
 
@@ -1177,7 +1178,7 @@ static inline void create_aead_null_input_list(struct aead_request *req,
 static inline int create_aead_null_output_list(struct aead_request *req,
 					       u32 enc, u32 mac_len)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info =  &rctx->cpt_req;
 	struct scatterlist *dst;
 	u8 *ptr = NULL;
@@ -1257,9 +1258,9 @@ error_free:
 
 static int aead_do_fallback(struct aead_request *req, bool is_enc)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *aead = crypto_aead_reqtfm(req);
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(aead);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(aead);
 	int ret;
 
 	if (ctx->fbk_cipher) {
@@ -1281,10 +1282,10 @@ static int aead_do_fallback(struct aead_request *req, bool is_enc)
 
 static int cpt_aead_enc_dec(struct aead_request *req, u8 reg_type, u8 enc)
 {
-	struct otx2_cpt_req_ctx *rctx = aead_request_ctx(req);
+	struct otx2_cpt_req_ctx *rctx = aead_request_ctx_dma(req);
 	struct otx2_cpt_req_info *req_info = &rctx->cpt_req;
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct otx2_cpt_aead_ctx *ctx = crypto_aead_ctx_dma(tfm);
 	struct pci_dev *pdev;
 	int status, cpu_num;
 
@@ -1458,7 +1459,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha1_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1477,7 +1478,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha256_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1496,7 +1497,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha384_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1515,7 +1516,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha512_cbc_aes",
 		.cra_blocksize = AES_BLOCK_SIZE,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1534,7 +1535,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha1_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1553,7 +1554,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha256_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1572,7 +1573,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha384_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1591,7 +1592,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_hmac_sha512_ecb_null",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
@@ -1610,7 +1611,7 @@ static struct aead_alg otx2_cpt_aeads[] = { {
 		.cra_driver_name = "cpt_rfc4106_gcm_aes",
 		.cra_blocksize = 1,
 		.cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
-		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx),
+		.cra_ctxsize = sizeof(struct otx2_cpt_aead_ctx) + CRYPTO_DMA_PADDING,
 		.cra_priority = 4001,
 		.cra_alignmask = 0,
 		.cra_module = THIS_MODULE,
-- 
GitLab


From 18daae5b0c41bf54af5f162a3e205858c9771400 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 2 Dec 2022 17:21:05 +0800
Subject: [PATCH 3582/4122] crypto: qce - Set DMA alignment explicitly

This driver has been implicitly relying on kmalloc alignment
to be sufficient for DMA.  This may no longer be the case with
upcoming arm64 changes.

This patch changes it to explicitly request DMA alignment from
the Crypto API.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/qce/aead.c   | 22 +++++++++++-----------
 drivers/crypto/qce/common.c |  5 +++--
 drivers/crypto/qce/sha.c    | 18 +++++++++---------
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
index 6eb4d2e356297..7d811728f0478 100644
--- a/drivers/crypto/qce/aead.c
+++ b/drivers/crypto/qce/aead.c
@@ -24,7 +24,7 @@ static void qce_aead_done(void *data)
 {
 	struct crypto_async_request *async_req = data;
 	struct aead_request *req = aead_request_cast(async_req);
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
@@ -92,7 +92,7 @@ static void qce_aead_done(void *data)
 static struct scatterlist *
 qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req)
 {
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
 
@@ -103,7 +103,7 @@ qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req)
 static struct scatterlist *
 qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req)
 {
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 
 	sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE);
 	return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE);
@@ -112,7 +112,7 @@ qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req)
 static struct scatterlist *
 qce_aead_prepare_dst_buf(struct aead_request *req)
 {
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
 	struct scatterlist *sg, *msg_sg, __sg[2];
@@ -186,7 +186,7 @@ qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req)
 {
 	struct scatterlist *sg, *msg_sg, __sg[2];
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	unsigned int assoclen = rctx->assoclen;
 	unsigned int adata_header_len, cryptlen, totallen;
@@ -300,7 +300,7 @@ err_free:
 
 static int qce_aead_prepare_buf(struct aead_request *req)
 {
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
 	struct scatterlist *sg;
@@ -328,7 +328,7 @@ static int qce_aead_prepare_buf(struct aead_request *req)
 
 static int qce_aead_ccm_prepare_buf(struct aead_request *req)
 {
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct scatterlist *sg;
@@ -408,7 +408,7 @@ static int
 qce_aead_async_req_handle(struct crypto_async_request *async_req)
 {
 	struct aead_request *req = aead_request_cast(async_req);
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
 	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
@@ -502,7 +502,7 @@ error_free:
 static int qce_aead_crypt(struct aead_request *req, int encrypt)
 {
 	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
 	struct qce_alg_template *tmpl = to_aead_tmpl(tfm);
 	unsigned int blocksize = crypto_aead_blocksize(tfm);
@@ -675,8 +675,8 @@ static int qce_aead_init(struct crypto_aead *tfm)
 	if (IS_ERR(ctx->fallback))
 		return PTR_ERR(ctx->fallback);
 
-	crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) +
-				crypto_aead_reqsize(ctx->fallback));
+	crypto_aead_set_reqsize_dma(tfm, sizeof(struct qce_aead_reqctx) +
+					 crypto_aead_reqsize(ctx->fallback));
 	return 0;
 }
 
diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index 7c612ba5068f7..04253a8d33409 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -3,6 +3,7 @@
  * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
  */
 
+#include <crypto/internal/hash.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/types.h>
@@ -147,7 +148,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
 	struct crypto_ahash *ahash = __crypto_ahash_cast(async_req->tfm);
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm);
 	struct qce_device *qce = tmpl->qce;
 	unsigned int digestsize = crypto_ahash_digestsize(ahash);
@@ -419,7 +420,7 @@ static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int
 static int qce_setup_regs_aead(struct crypto_async_request *async_req)
 {
 	struct aead_request *req = aead_request_cast(async_req);
-	struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+	struct qce_aead_reqctx *rctx = aead_request_ctx_dma(req);
 	struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
 	struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
 	struct qce_device *qce = tmpl->qce;
diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c
index 37bafd7aeb79d..fc72af8aa9a72 100644
--- a/drivers/crypto/qce/sha.c
+++ b/drivers/crypto/qce/sha.c
@@ -38,7 +38,7 @@ static void qce_ahash_done(void *data)
 	struct crypto_async_request *async_req = data;
 	struct ahash_request *req = ahash_request_cast(async_req);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(req);
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm);
 	struct qce_device *qce = tmpl->qce;
 	struct qce_result_dump *result = qce->dma.result_buf;
@@ -75,7 +75,7 @@ static void qce_ahash_done(void *data)
 static int qce_ahash_async_req_handle(struct crypto_async_request *async_req)
 {
 	struct ahash_request *req = ahash_request_cast(async_req);
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_sha_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(async_req->tfm);
 	struct qce_device *qce = tmpl->qce;
@@ -132,7 +132,7 @@ error_unmap_src:
 
 static int qce_ahash_init(struct ahash_request *req)
 {
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
 	const u32 *std_iv = tmpl->std_iv;
 
@@ -147,7 +147,7 @@ static int qce_ahash_init(struct ahash_request *req)
 
 static int qce_ahash_export(struct ahash_request *req, void *out)
 {
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_sha_saved_state *export_state = out;
 
 	memcpy(export_state->pending_buf, rctx->buf, rctx->buflen);
@@ -164,7 +164,7 @@ static int qce_ahash_export(struct ahash_request *req, void *out)
 
 static int qce_ahash_import(struct ahash_request *req, const void *in)
 {
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	const struct qce_sha_saved_state *import_state = in;
 
 	memset(rctx, 0, sizeof(*rctx));
@@ -183,7 +183,7 @@ static int qce_ahash_import(struct ahash_request *req, const void *in)
 static int qce_ahash_update(struct ahash_request *req)
 {
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
 	struct qce_device *qce = tmpl->qce;
 	struct scatterlist *sg_last, *sg;
@@ -275,7 +275,7 @@ static int qce_ahash_update(struct ahash_request *req)
 
 static int qce_ahash_final(struct ahash_request *req)
 {
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
 	struct qce_device *qce = tmpl->qce;
 
@@ -302,7 +302,7 @@ static int qce_ahash_final(struct ahash_request *req)
 
 static int qce_ahash_digest(struct ahash_request *req)
 {
-	struct qce_sha_reqctx *rctx = ahash_request_ctx(req);
+	struct qce_sha_reqctx *rctx = ahash_request_ctx_dma(req);
 	struct qce_alg_template *tmpl = to_ahash_tmpl(req->base.tfm);
 	struct qce_device *qce = tmpl->qce;
 	int ret;
@@ -395,7 +395,7 @@ static int qce_ahash_cra_init(struct crypto_tfm *tfm)
 	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
 	struct qce_sha_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	crypto_ahash_set_reqsize(ahash, sizeof(struct qce_sha_reqctx));
+	crypto_ahash_set_reqsize_dma(ahash, sizeof(struct qce_sha_reqctx));
 	memset(ctx, 0, sizeof(*ctx));
 	return 0;
 }
-- 
GitLab


From ecadb5b0111ea19fc7c240bb25d424a94471eb7d Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Fri, 2 Dec 2022 21:22:33 +0800
Subject: [PATCH 3583/4122] hwrng: amd - Fix PCI device refcount leak

for_each_pci_dev() is implemented by pci_get_device(). The comment of
pci_get_device() says that it will increase the reference count for the
returned pci_dev and also decrease the reference count for the input
pci_dev @from if it is not NULL.

If we break for_each_pci_dev() loop with pdev not NULL, we need to call
pci_dev_put() to decrease the reference count. Add the missing
pci_dev_put() for the normal and error path.

Fixes: 96d63c0297cc ("[PATCH] Add AMD HW RNG driver")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/amd-rng.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index c22d4184bb612..0555e3838bce1 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -143,15 +143,19 @@ static int __init amd_rng_mod_init(void)
 found:
 	err = pci_read_config_dword(pdev, 0x58, &pmbase);
 	if (err)
-		return err;
+		goto put_dev;
 
 	pmbase &= 0x0000FF00;
-	if (pmbase == 0)
-		return -EIO;
+	if (pmbase == 0) {
+		err = -EIO;
+		goto put_dev;
+	}
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
+	if (!priv) {
+		err = -ENOMEM;
+		goto put_dev;
+	}
 
 	if (!request_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE, DRV_NAME)) {
 		dev_err(&pdev->dev, DRV_NAME " region 0x%x already in use!\n",
@@ -185,6 +189,8 @@ err_iomap:
 	release_region(pmbase + PMBASE_OFFSET, PMBASE_SIZE);
 out:
 	kfree(priv);
+put_dev:
+	pci_dev_put(pdev);
 	return err;
 }
 
@@ -200,6 +206,8 @@ static void __exit amd_rng_mod_exit(void)
 
 	release_region(priv->pmbase + PMBASE_OFFSET, PMBASE_SIZE);
 
+	pci_dev_put(priv->pcidev);
+
 	kfree(priv);
 }
 
-- 
GitLab


From 9f6ec8dc574efb7f4f3d7ee9cd59ae307e78f445 Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Fri, 2 Dec 2022 21:22:34 +0800
Subject: [PATCH 3584/4122] hwrng: geode - Fix PCI device refcount leak

for_each_pci_dev() is implemented by pci_get_device(). The comment of
pci_get_device() says that it will increase the reference count for the
returned pci_dev and also decrease the reference count for the input
pci_dev @from if it is not NULL.

If we break for_each_pci_dev() loop with pdev not NULL, we need to call
pci_dev_put() to decrease the reference count. We add a new struct
'amd_geode_priv' to record pointer of the pci_dev and membase, and then
add missing pci_dev_put() for the normal and error path.

Fixes: ef5d862734b8 ("[PATCH] Add Geode HW RNG driver")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/char/hw_random/geode-rng.c | 36 +++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/char/hw_random/geode-rng.c b/drivers/char/hw_random/geode-rng.c
index 138ce434f86b2..12fbe80918319 100644
--- a/drivers/char/hw_random/geode-rng.c
+++ b/drivers/char/hw_random/geode-rng.c
@@ -51,6 +51,10 @@ static const struct pci_device_id pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, pci_tbl);
 
+struct amd_geode_priv {
+	struct pci_dev *pcidev;
+	void __iomem *membase;
+};
 
 static int geode_rng_data_read(struct hwrng *rng, u32 *data)
 {
@@ -90,6 +94,7 @@ static int __init geode_rng_init(void)
 	const struct pci_device_id *ent;
 	void __iomem *mem;
 	unsigned long rng_base;
+	struct amd_geode_priv *priv;
 
 	for_each_pci_dev(pdev) {
 		ent = pci_match_id(pci_tbl, pdev);
@@ -97,17 +102,26 @@ static int __init geode_rng_init(void)
 			goto found;
 	}
 	/* Device not found. */
-	goto out;
+	return err;
 
 found:
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		err = -ENOMEM;
+		goto put_dev;
+	}
+
 	rng_base = pci_resource_start(pdev, 0);
 	if (rng_base == 0)
-		goto out;
+		goto free_priv;
 	err = -ENOMEM;
 	mem = ioremap(rng_base, 0x58);
 	if (!mem)
-		goto out;
-	geode_rng.priv = (unsigned long)mem;
+		goto free_priv;
+
+	geode_rng.priv = (unsigned long)priv;
+	priv->membase = mem;
+	priv->pcidev = pdev;
 
 	pr_info("AMD Geode RNG detected\n");
 	err = hwrng_register(&geode_rng);
@@ -116,20 +130,26 @@ found:
 		       err);
 		goto err_unmap;
 	}
-out:
 	return err;
 
 err_unmap:
 	iounmap(mem);
-	goto out;
+free_priv:
+	kfree(priv);
+put_dev:
+	pci_dev_put(pdev);
+	return err;
 }
 
 static void __exit geode_rng_exit(void)
 {
-	void __iomem *mem = (void __iomem *)geode_rng.priv;
+	struct amd_geode_priv *priv;
 
+	priv = (struct amd_geode_priv *)geode_rng.priv;
 	hwrng_unregister(&geode_rng);
-	iounmap(mem);
+	iounmap(priv->membase);
+	pci_dev_put(priv->pcidev);
+	kfree(priv);
 }
 
 module_init(geode_rng_init);
-- 
GitLab


From 6c013679eb5c7e0b09cbcb64276f6dd97b473d12 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 3 Dec 2022 10:15:15 +0100
Subject: [PATCH 3585/4122] dt-bindings: crypto: Let STM32 define Ux500 CRYP

This adds device tree bindings for the Ux500 CRYP block
as a compatible in the STM32 CRYP bindings.

The Ux500 CRYP binding has been used for ages in the kernel
device tree for Ux500 but was never documented, so fill in
the gap by making it a sibling of the STM32 CRYP block,
which is what it is.

The relationship to the existing STM32 CRYP block is pretty
obvious when looking at the register map, and I have written
patches to reuse the STM32 CRYP driver on the Ux500.

The two properties added are DMA channels and power domain.
Power domains are a generic SoC feature and the STM32 variant
also has DMA channels.

Cc: devicetree@vger.kernel.org
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Krzysztof Kozlowski <krzysztof.kozlowski+dt@linaro.org>
Cc: Lionel Debieve <lionel.debieve@foss.st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 .../bindings/crypto/st,stm32-cryp.yaml        | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
index ed23bf94a8e00..6759c5bf3e572 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
@@ -6,12 +6,18 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: STMicroelectronics STM32 CRYP bindings
 
+description: The STM32 CRYP block is built on the CRYP block found in
+  the STn8820 SoC introduced in 2007, and subsequently used in the U8500
+  SoC in 2010.
+
 maintainers:
   - Lionel Debieve <lionel.debieve@foss.st.com>
 
 properties:
   compatible:
     enum:
+      - st,stn8820-cryp
+      - stericsson,ux500-cryp
       - st,stm32f756-cryp
       - st,stm32mp1-cryp
 
@@ -27,6 +33,19 @@ properties:
   resets:
     maxItems: 1
 
+  dmas:
+    items:
+      - description: mem2cryp DMA channel
+      - description: cryp2mem DMA channel
+
+  dma-names:
+    items:
+      - const: mem2cryp
+      - const: cryp2mem
+
+  power-domains:
+    maxItems: 1
+
 required:
   - compatible
   - reg
-- 
GitLab


From fe867538c1620738bda5328a14179a3c2bc95ab1 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 3 Dec 2022 10:15:16 +0100
Subject: [PATCH 3586/4122] crypto: stm32 - enable drivers to be used on Ux500

The Ux500 cryp and hash drivers are older versions of the
hardware managed by the stm32 driver.

Instead of trying to improve the Ux500 cryp and hash drivers,
start to switch over to the modern and more well-maintained
STM32 drivers.

Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Acked-by: Lionel Debieve <lionel.debieve@foss.st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/Makefile      | 2 +-
 drivers/crypto/stm32/Kconfig | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index 116de173a66c5..fa8bf1be1a8cd 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -41,7 +41,7 @@ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
 obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o
 obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
 obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
-obj-$(CONFIG_ARCH_STM32) += stm32/
+obj-y += stm32/
 obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
 obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
 obj-$(CONFIG_CRYPTO_DEV_VIRTIO) += virtio/
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index 4a4c3284ae1f3..4fc581e9e5956 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -10,7 +10,7 @@ config CRYPTO_DEV_STM32_CRC
 
 config CRYPTO_DEV_STM32_HASH
 	tristate "Support for STM32 hash accelerators"
-	depends on ARCH_STM32
+	depends on ARCH_STM32 || ARCH_U8500
 	depends on HAS_DMA
 	select CRYPTO_HASH
 	select CRYPTO_MD5
@@ -23,7 +23,7 @@ config CRYPTO_DEV_STM32_HASH
 
 config CRYPTO_DEV_STM32_CRYP
 	tristate "Support for STM32 cryp accelerators"
-	depends on ARCH_STM32
+	depends on ARCH_STM32 || ARCH_U8500
 	select CRYPTO_HASH
 	select CRYPTO_ENGINE
 	select CRYPTO_LIB_DES
-- 
GitLab


From 0b496efbd2d00f658dbf906882d935e7fa3dfd03 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 3 Dec 2022 10:15:17 +0100
Subject: [PATCH 3587/4122] crypto: stm32/cryp - enable for use with Ux500

This adds a few small quirks to handle the differences between
the STM32 and Ux500 cryp blocks. The following differences
are handled with special bool switch bits in the capabilities:

- The main difference is that some registers are removed, so we
  add register offsets for all registers in the
  per-variant data. Then we assign the right offsets for Ux500
  vs the STM32 variants.

- The Ux500 does not support the aeads algorithms; gcm(aes)
  and ccm(aes). Avoid registering them when running on Ux500.

- The Ux500 has a special "linear" key format and does some
  elaborare bit swizzling of the key bits before writing them
  into the key registers. This is written as an "application
  note" inside the DB8500 design specification, and seems to
  be the result of some mishap when assigning the data lines
  to register bits. (STM32 has clearly fixed this.)

- The Ux500 does not have the KP "key prepare" bit in the
  CR register. Instead, we need to set the KSE bit,
  "key schedule encryption" bit which does the same thing
  but is in bit 11 rather than being a special "algorithm
  type" as on STM32. The algorithm must however be specified
  as AES ECB while doing this.

- The Ux500 cannot just read out IV registers, we need to
  set the KEYRDEN "key read enable" bit, as this protects
  not just the key but also the IV from being read out.
  Enable this bit before reading out the IV and disable it
  afterwards.

Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Acked by: Lionel Debieve <lionel.debieve@foss.st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/stm32/stm32-cryp.c | 413 +++++++++++++++++++++++-------
 1 file changed, 322 insertions(+), 91 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 59638dfce573b..4208338e72b60 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) STMicroelectronics SA 2017
  * Author: Fabien Dessenne <fabien.dessenne@st.com>
+ * Ux500 support taken from snippets in the old Ux500 cryp driver
  */
 
 #include <linux/clk.h>
@@ -62,6 +63,29 @@
 #define CRYP_CSGCMCCM0R         0x00000050
 #define CRYP_CSGCM0R            0x00000070
 
+#define UX500_CRYP_CR		0x00000000
+#define UX500_CRYP_SR		0x00000004
+#define UX500_CRYP_DIN		0x00000008
+#define UX500_CRYP_DINSIZE	0x0000000C
+#define UX500_CRYP_DOUT		0x00000010
+#define UX500_CRYP_DOUSIZE	0x00000014
+#define UX500_CRYP_DMACR	0x00000018
+#define UX500_CRYP_IMSC		0x0000001C
+#define UX500_CRYP_RIS		0x00000020
+#define UX500_CRYP_MIS		0x00000024
+#define UX500_CRYP_K1L		0x00000028
+#define UX500_CRYP_K1R		0x0000002C
+#define UX500_CRYP_K2L		0x00000030
+#define UX500_CRYP_K2R		0x00000034
+#define UX500_CRYP_K3L		0x00000038
+#define UX500_CRYP_K3R		0x0000003C
+#define UX500_CRYP_K4L		0x00000040
+#define UX500_CRYP_K4R		0x00000044
+#define UX500_CRYP_IV0L		0x00000048
+#define UX500_CRYP_IV0R		0x0000004C
+#define UX500_CRYP_IV1L		0x00000050
+#define UX500_CRYP_IV1R		0x00000054
+
 /* Registers values */
 #define CR_DEC_NOT_ENC          0x00000004
 #define CR_TDES_ECB             0x00000000
@@ -71,7 +95,8 @@
 #define CR_AES_ECB              0x00000020
 #define CR_AES_CBC              0x00000028
 #define CR_AES_CTR              0x00000030
-#define CR_AES_KP               0x00000038
+#define CR_AES_KP               0x00000038 /* Not on Ux500 */
+#define CR_AES_XTS              0x00000038 /* Only on Ux500 */
 #define CR_AES_GCM              0x00080000
 #define CR_AES_CCM              0x00080008
 #define CR_AES_UNKNOWN          0xFFFFFFFF
@@ -83,6 +108,8 @@
 #define CR_KEY128               0x00000000
 #define CR_KEY192               0x00000100
 #define CR_KEY256               0x00000200
+#define CR_KEYRDEN              0x00000400 /* Only on Ux500 */
+#define CR_KSE                  0x00000800 /* Only on Ux500 */
 #define CR_FFLUSH               0x00004000
 #define CR_CRYPEN               0x00008000
 #define CR_PH_INIT              0x00000000
@@ -107,8 +134,25 @@
 #define CRYP_AUTOSUSPEND_DELAY	50
 
 struct stm32_cryp_caps {
-	bool                    swap_final;
-	bool                    padding_wa;
+	bool			aeads_support;
+	bool			linear_aes_key;
+	bool			kp_mode;
+	bool			iv_protection;
+	bool			swap_final;
+	bool			padding_wa;
+	u32			cr;
+	u32			sr;
+	u32			din;
+	u32			dout;
+	u32			imsc;
+	u32			mis;
+	u32			k1l;
+	u32			k1r;
+	u32			k3r;
+	u32			iv0l;
+	u32			iv0r;
+	u32			iv1l;
+	u32			iv1r;
 };
 
 struct stm32_cryp_ctx {
@@ -228,20 +272,21 @@ static inline int stm32_cryp_wait_busy(struct stm32_cryp *cryp)
 {
 	u32 status;
 
-	return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status,
+	return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status,
 			!(status & SR_BUSY), 10, 100000);
 }
 
 static inline void stm32_cryp_enable(struct stm32_cryp *cryp)
 {
-	writel_relaxed(readl_relaxed(cryp->regs + CRYP_CR) | CR_CRYPEN, cryp->regs + CRYP_CR);
+	writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_CRYPEN,
+		       cryp->regs + cryp->caps->cr);
 }
 
 static inline int stm32_cryp_wait_enable(struct stm32_cryp *cryp)
 {
 	u32 status;
 
-	return readl_relaxed_poll_timeout(cryp->regs + CRYP_CR, status,
+	return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->cr, status,
 			!(status & CR_CRYPEN), 10, 100000);
 }
 
@@ -249,10 +294,22 @@ static inline int stm32_cryp_wait_output(struct stm32_cryp *cryp)
 {
 	u32 status;
 
-	return readl_relaxed_poll_timeout(cryp->regs + CRYP_SR, status,
+	return readl_relaxed_poll_timeout(cryp->regs + cryp->caps->sr, status,
 			status & SR_OFNE, 10, 100000);
 }
 
+static inline void stm32_cryp_key_read_enable(struct stm32_cryp *cryp)
+{
+	writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) | CR_KEYRDEN,
+		       cryp->regs + cryp->caps->cr);
+}
+
+static inline void stm32_cryp_key_read_disable(struct stm32_cryp *cryp)
+{
+	writel_relaxed(readl_relaxed(cryp->regs + cryp->caps->cr) & ~CR_KEYRDEN,
+		       cryp->regs + cryp->caps->cr);
+}
+
 static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp);
 static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err);
 
@@ -281,12 +338,12 @@ static void stm32_cryp_hw_write_iv(struct stm32_cryp *cryp, __be32 *iv)
 	if (!iv)
 		return;
 
-	stm32_cryp_write(cryp, CRYP_IV0LR, be32_to_cpu(*iv++));
-	stm32_cryp_write(cryp, CRYP_IV0RR, be32_to_cpu(*iv++));
+	stm32_cryp_write(cryp, cryp->caps->iv0l, be32_to_cpu(*iv++));
+	stm32_cryp_write(cryp, cryp->caps->iv0r, be32_to_cpu(*iv++));
 
 	if (is_aes(cryp)) {
-		stm32_cryp_write(cryp, CRYP_IV1LR, be32_to_cpu(*iv++));
-		stm32_cryp_write(cryp, CRYP_IV1RR, be32_to_cpu(*iv++));
+		stm32_cryp_write(cryp, cryp->caps->iv1l, be32_to_cpu(*iv++));
+		stm32_cryp_write(cryp, cryp->caps->iv1r, be32_to_cpu(*iv++));
 	}
 }
 
@@ -298,12 +355,102 @@ static void stm32_cryp_get_iv(struct stm32_cryp *cryp)
 	if (!tmp)
 		return;
 
-	*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR));
-	*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR));
+	if (cryp->caps->iv_protection)
+		stm32_cryp_key_read_enable(cryp);
+
+	*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l));
+	*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r));
 
 	if (is_aes(cryp)) {
-		*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR));
-		*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR));
+		*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l));
+		*tmp++ = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r));
+	}
+
+	if (cryp->caps->iv_protection)
+		stm32_cryp_key_read_disable(cryp);
+}
+
+/**
+ * ux500_swap_bits_in_byte() - mirror the bits in a byte
+ * @b: the byte to be mirrored
+ *
+ * The bits are swapped the following way:
+ *  Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and
+ *  nibble 2 (n2) bits 4-7.
+ *
+ *  Nibble 1 (n1):
+ *  (The "old" (moved) bit is replaced with a zero)
+ *  1. Move bit 6 and 7, 4 positions to the left.
+ *  2. Move bit 3 and 5, 2 positions to the left.
+ *  3. Move bit 1-4, 1 position to the left.
+ *
+ *  Nibble 2 (n2):
+ *  1. Move bit 0 and 1, 4 positions to the right.
+ *  2. Move bit 2 and 4, 2 positions to the right.
+ *  3. Move bit 3-6, 1 position to the right.
+ *
+ *  Combine the two nibbles to a complete and swapped byte.
+ */
+static inline u8 ux500_swap_bits_in_byte(u8 b)
+{
+#define R_SHIFT_4_MASK  0xc0 /* Bits 6 and 7, right shift 4 */
+#define R_SHIFT_2_MASK  0x28 /* (After right shift 4) Bits 3 and 5,
+				  right shift 2 */
+#define R_SHIFT_1_MASK  0x1e /* (After right shift 2) Bits 1-4,
+				  right shift 1 */
+#define L_SHIFT_4_MASK  0x03 /* Bits 0 and 1, left shift 4 */
+#define L_SHIFT_2_MASK  0x14 /* (After left shift 4) Bits 2 and 4,
+				  left shift 2 */
+#define L_SHIFT_1_MASK  0x78 /* (After left shift 1) Bits 3-6,
+				  left shift 1 */
+
+	u8 n1;
+	u8 n2;
+
+	/* Swap most significant nibble */
+	/* Right shift 4, bits 6 and 7 */
+	n1 = ((b  & R_SHIFT_4_MASK) >> 4) | (b  & ~(R_SHIFT_4_MASK >> 4));
+	/* Right shift 2, bits 3 and 5 */
+	n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2));
+	/* Right shift 1, bits 1-4 */
+	n1 = (n1  & R_SHIFT_1_MASK) >> 1;
+
+	/* Swap least significant nibble */
+	/* Left shift 4, bits 0 and 1 */
+	n2 = ((b  & L_SHIFT_4_MASK) << 4) | (b  & ~(L_SHIFT_4_MASK << 4));
+	/* Left shift 2, bits 2 and 4 */
+	n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2));
+	/* Left shift 1, bits 3-6 */
+	n2 = (n2  & L_SHIFT_1_MASK) << 1;
+
+	return n1 | n2;
+}
+
+/**
+ * ux500_swizzle_key() - Shuffle around words and bits in the AES key
+ * @in: key to swizzle
+ * @out: swizzled key
+ * @len: length of key, in bytes
+ *
+ * This "key swizzling procedure" is described in the examples in the
+ * DB8500 design specification. There is no real description of why
+ * the bits have been arranged like this in the hardware.
+ */
+static inline void ux500_swizzle_key(const u8 *in, u8 *out, u32 len)
+{
+	int i = 0;
+	int bpw = sizeof(u32);
+	int j;
+	int index = 0;
+
+	j = len - bpw;
+	while (j >= 0) {
+		for (i = 0; i < bpw; i++) {
+			index = len - j - bpw + i;
+			out[j + i] =
+				ux500_swap_bits_in_byte(in[index]);
+		}
+		j -= bpw;
 	}
 }
 
@@ -313,14 +460,33 @@ static void stm32_cryp_hw_write_key(struct stm32_cryp *c)
 	int r_id;
 
 	if (is_des(c)) {
-		stm32_cryp_write(c, CRYP_K1LR, be32_to_cpu(c->ctx->key[0]));
-		stm32_cryp_write(c, CRYP_K1RR, be32_to_cpu(c->ctx->key[1]));
-	} else {
-		r_id = CRYP_K3RR;
-		for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4)
-			stm32_cryp_write(c, r_id,
-					 be32_to_cpu(c->ctx->key[i - 1]));
+		stm32_cryp_write(c, c->caps->k1l, be32_to_cpu(c->ctx->key[0]));
+		stm32_cryp_write(c, c->caps->k1r, be32_to_cpu(c->ctx->key[1]));
+		return;
 	}
+
+	/*
+	 * On the Ux500 the AES key is considered as a single bit sequence
+	 * of 128, 192 or 256 bits length. It is written linearly into the
+	 * registers from K1L and down, and need to be processed to become
+	 * a proper big-endian bit sequence.
+	 */
+	if (is_aes(c) && c->caps->linear_aes_key) {
+		u32 tmpkey[8];
+
+		ux500_swizzle_key((u8 *)c->ctx->key,
+				  (u8 *)tmpkey, c->ctx->keylen);
+
+		r_id = c->caps->k1l;
+		for (i = 0; i < c->ctx->keylen / sizeof(u32); i++, r_id += 4)
+			stm32_cryp_write(c, r_id, tmpkey[i]);
+
+		return;
+	}
+
+	r_id = c->caps->k3r;
+	for (i = c->ctx->keylen / sizeof(u32); i > 0; i--, r_id -= 4)
+		stm32_cryp_write(c, r_id, be32_to_cpu(c->ctx->key[i - 1]));
 }
 
 static u32 stm32_cryp_get_hw_mode(struct stm32_cryp *cryp)
@@ -373,7 +539,7 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg)
 	cryp->gcm_ctr = GCM_CTR_INIT;
 	stm32_cryp_hw_write_iv(cryp, iv);
 
-	stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN);
 
 	/* Wait for end of processing */
 	ret = stm32_cryp_wait_enable(cryp);
@@ -385,10 +551,10 @@ static int stm32_cryp_gcm_init(struct stm32_cryp *cryp, u32 cfg)
 	/* Prepare next phase */
 	if (cryp->areq->assoclen) {
 		cfg |= CR_PH_HEADER;
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 	} else if (stm32_cryp_get_input_text_len(cryp)) {
 		cfg |= CR_PH_PAYLOAD;
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 	}
 
 	return 0;
@@ -405,20 +571,20 @@ static void stm32_crypt_gcmccm_end_header(struct stm32_cryp *cryp)
 		err = stm32_cryp_wait_busy(cryp);
 		if (err) {
 			dev_err(cryp->dev, "Timeout (gcm/ccm header)\n");
-			stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+			stm32_cryp_write(cryp, cryp->caps->imsc, 0);
 			stm32_cryp_finish_req(cryp, err);
 			return;
 		}
 
 		if (stm32_cryp_get_input_text_len(cryp)) {
 			/* Phase 3 : payload */
-			cfg = stm32_cryp_read(cryp, CRYP_CR);
+			cfg = stm32_cryp_read(cryp, cryp->caps->cr);
 			cfg &= ~CR_CRYPEN;
-			stm32_cryp_write(cryp, CRYP_CR, cfg);
+			stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 			cfg &= ~CR_PH_MASK;
 			cfg |= CR_PH_PAYLOAD | CR_CRYPEN;
-			stm32_cryp_write(cryp, CRYP_CR, cfg);
+			stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 		} else {
 			/*
 			 * Phase 4 : tag.
@@ -458,7 +624,7 @@ static void stm32_cryp_write_ccm_first_header(struct stm32_cryp *cryp)
 
 	scatterwalk_copychunks((char *)block + len, &cryp->in_walk, written, 0);
 	for (i = 0; i < AES_BLOCK_32; i++)
-		stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+		stm32_cryp_write(cryp, cryp->caps->din, block[i]);
 
 	cryp->header_in -= written;
 
@@ -494,7 +660,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
 	b0[AES_BLOCK_SIZE - 1] = textlen & 0xFF;
 
 	/* Enable HW */
-	stm32_cryp_write(cryp, CRYP_CR, cfg | CR_PH_INIT | CR_CRYPEN);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg | CR_PH_INIT | CR_CRYPEN);
 
 	/* Write B0 */
 	d = (u32 *)b0;
@@ -505,7 +671,7 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
 
 		if (!cryp->caps->padding_wa)
 			xd = be32_to_cpu(bd[i]);
-		stm32_cryp_write(cryp, CRYP_DIN, xd);
+		stm32_cryp_write(cryp, cryp->caps->din, xd);
 	}
 
 	/* Wait for end of processing */
@@ -518,13 +684,13 @@ static int stm32_cryp_ccm_init(struct stm32_cryp *cryp, u32 cfg)
 	/* Prepare next phase */
 	if (cryp->areq->assoclen) {
 		cfg |= CR_PH_HEADER | CR_CRYPEN;
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 		/* Write first (special) block (may move to next phase [payload]) */
 		stm32_cryp_write_ccm_first_header(cryp);
 	} else if (stm32_cryp_get_input_text_len(cryp)) {
 		cfg |= CR_PH_PAYLOAD;
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 	}
 
 	return 0;
@@ -538,7 +704,7 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 	pm_runtime_get_sync(cryp->dev);
 
 	/* Disable interrupt */
-	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+	stm32_cryp_write(cryp, cryp->caps->imsc, 0);
 
 	/* Set configuration */
 	cfg = CR_DATA8 | CR_FFLUSH;
@@ -566,7 +732,12 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 	if (is_decrypt(cryp) &&
 	    ((hw_mode == CR_AES_ECB) || (hw_mode == CR_AES_CBC))) {
 		/* Configure in key preparation mode */
-		stm32_cryp_write(cryp, CRYP_CR, cfg | CR_AES_KP);
+		if (cryp->caps->kp_mode)
+			stm32_cryp_write(cryp, cryp->caps->cr,
+				cfg | CR_AES_KP);
+		else
+			stm32_cryp_write(cryp,
+				cryp->caps->cr, cfg | CR_AES_ECB | CR_KSE);
 
 		/* Set key only after full configuration done */
 		stm32_cryp_hw_write_key(cryp);
@@ -583,14 +754,14 @@ static int stm32_cryp_hw_init(struct stm32_cryp *cryp)
 		cfg |= hw_mode | CR_DEC_NOT_ENC;
 
 		/* Apply updated config (Decrypt + algo) and flush */
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 	} else {
 		cfg |= hw_mode;
 		if (is_decrypt(cryp))
 			cfg |= CR_DEC_NOT_ENC;
 
 		/* Apply config and flush */
-		stm32_cryp_write(cryp, CRYP_CR, cfg);
+		stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 		/* Set key only after configuration done */
 		stm32_cryp_hw_write_key(cryp);
@@ -649,7 +820,7 @@ static void stm32_cryp_finish_req(struct stm32_cryp *cryp, int err)
 static int stm32_cryp_cpu_start(struct stm32_cryp *cryp)
 {
 	/* Enable interrupt and let the IRQ handler do everything */
-	stm32_cryp_write(cryp, CRYP_IMSCR, IMSCR_IN | IMSCR_OUT);
+	stm32_cryp_write(cryp, cryp->caps->imsc, IMSCR_IN | IMSCR_OUT);
 
 	return 0;
 }
@@ -1137,14 +1308,14 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 	int ret = 0;
 
 	/* Update Config */
-	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg = stm32_cryp_read(cryp, cryp->caps->cr);
 
 	cfg &= ~CR_PH_MASK;
 	cfg |= CR_PH_FINAL;
 	cfg &= ~CR_DEC_NOT_ENC;
 	cfg |= CR_CRYPEN;
 
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	if (is_gcm(cryp)) {
 		/* GCM: write aad and payload size (in bits) */
@@ -1152,8 +1323,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 		if (cryp->caps->swap_final)
 			size_bit = (__force u32)cpu_to_be32(size_bit);
 
-		stm32_cryp_write(cryp, CRYP_DIN, 0);
-		stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+		stm32_cryp_write(cryp, cryp->caps->din, 0);
+		stm32_cryp_write(cryp, cryp->caps->din, size_bit);
 
 		size_bit = is_encrypt(cryp) ? cryp->areq->cryptlen :
 				cryp->areq->cryptlen - cryp->authsize;
@@ -1161,8 +1332,8 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 		if (cryp->caps->swap_final)
 			size_bit = (__force u32)cpu_to_be32(size_bit);
 
-		stm32_cryp_write(cryp, CRYP_DIN, 0);
-		stm32_cryp_write(cryp, CRYP_DIN, size_bit);
+		stm32_cryp_write(cryp, cryp->caps->din, 0);
+		stm32_cryp_write(cryp, cryp->caps->din, size_bit);
 	} else {
 		/* CCM: write CTR0 */
 		u32 iv32[AES_BLOCK_32];
@@ -1177,7 +1348,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 
 			if (!cryp->caps->padding_wa)
 				xiv = be32_to_cpu(biv[i]);
-			stm32_cryp_write(cryp, CRYP_DIN, xiv);
+			stm32_cryp_write(cryp, cryp->caps->din, xiv);
 		}
 	}
 
@@ -1193,7 +1364,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 
 		/* Get and write tag */
 		for (i = 0; i < AES_BLOCK_32; i++)
-			out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+			out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout);
 
 		scatterwalk_copychunks(out_tag, &cryp->out_walk, cryp->authsize, 1);
 	} else {
@@ -1203,7 +1374,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 		scatterwalk_copychunks(in_tag, &cryp->in_walk, cryp->authsize, 0);
 
 		for (i = 0; i < AES_BLOCK_32; i++)
-			out_tag[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+			out_tag[i] = stm32_cryp_read(cryp, cryp->caps->dout);
 
 		if (crypto_memneq(in_tag, out_tag, cryp->authsize))
 			ret = -EBADMSG;
@@ -1211,7 +1382,7 @@ static int stm32_cryp_read_auth_tag(struct stm32_cryp *cryp)
 
 	/* Disable cryp */
 	cfg &= ~CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	return ret;
 }
@@ -1227,19 +1398,19 @@ static void stm32_cryp_check_ctr_counter(struct stm32_cryp *cryp)
 		 */
 		crypto_inc((u8 *)cryp->last_ctr, sizeof(cryp->last_ctr));
 
-		cr = stm32_cryp_read(cryp, CRYP_CR);
-		stm32_cryp_write(cryp, CRYP_CR, cr & ~CR_CRYPEN);
+		cr = stm32_cryp_read(cryp, cryp->caps->cr);
+		stm32_cryp_write(cryp, cryp->caps->cr, cr & ~CR_CRYPEN);
 
 		stm32_cryp_hw_write_iv(cryp, cryp->last_ctr);
 
-		stm32_cryp_write(cryp, CRYP_CR, cr);
+		stm32_cryp_write(cryp, cryp->caps->cr, cr);
 	}
 
 	/* The IV registers are BE  */
-	cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0LR));
-	cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV0RR));
-	cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1LR));
-	cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, CRYP_IV1RR));
+	cryp->last_ctr[0] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0l));
+	cryp->last_ctr[1] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv0r));
+	cryp->last_ctr[2] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1l));
+	cryp->last_ctr[3] = cpu_to_be32(stm32_cryp_read(cryp, cryp->caps->iv1r));
 }
 
 static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
@@ -1248,7 +1419,7 @@ static void stm32_cryp_irq_read_data(struct stm32_cryp *cryp)
 	u32 block[AES_BLOCK_32];
 
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
-		block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+		block[i] = stm32_cryp_read(cryp, cryp->caps->dout);
 
 	scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
 							     cryp->payload_out), 1);
@@ -1264,7 +1435,7 @@ static void stm32_cryp_irq_write_block(struct stm32_cryp *cryp)
 	scatterwalk_copychunks(block, &cryp->in_walk, min_t(size_t, cryp->hw_blocksize,
 							    cryp->payload_in), 0);
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
-		stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+		stm32_cryp_write(cryp, cryp->caps->din, block[i]);
 
 	cryp->payload_in -= min_t(size_t, cryp->hw_blocksize, cryp->payload_in);
 }
@@ -1278,22 +1449,22 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
 	/* 'Special workaround' procedure described in the datasheet */
 
 	/* a) disable ip */
-	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
-	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	stm32_cryp_write(cryp, cryp->caps->imsc, 0);
+	cfg = stm32_cryp_read(cryp, cryp->caps->cr);
 	cfg &= ~CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* b) Update IV1R */
-	stm32_cryp_write(cryp, CRYP_IV1RR, cryp->gcm_ctr - 2);
+	stm32_cryp_write(cryp, cryp->caps->iv1r, cryp->gcm_ctr - 2);
 
 	/* c) change mode to CTR */
 	cfg &= ~CR_ALGO_MASK;
 	cfg |= CR_AES_CTR;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* a) enable IP */
 	cfg |= CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* b) pad and write the last block */
 	stm32_cryp_irq_write_block(cryp);
@@ -1310,7 +1481,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
 	 * block value
 	 */
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
-		block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+		block[i] = stm32_cryp_read(cryp, cryp->caps->dout);
 
 	scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
 							     cryp->payload_out), 1);
@@ -1320,16 +1491,16 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
 	/* d) change mode back to AES GCM */
 	cfg &= ~CR_ALGO_MASK;
 	cfg |= CR_AES_GCM;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* e) change phase to Final */
 	cfg &= ~CR_PH_MASK;
 	cfg |= CR_PH_FINAL;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* f) write padded data */
 	for (i = 0; i < AES_BLOCK_32; i++)
-		stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+		stm32_cryp_write(cryp, cryp->caps->din, block[i]);
 
 	/* g) Empty fifo out */
 	err = stm32_cryp_wait_output(cryp);
@@ -1339,7 +1510,7 @@ static void stm32_cryp_irq_write_gcm_padded_data(struct stm32_cryp *cryp)
 	}
 
 	for (i = 0; i < AES_BLOCK_32; i++)
-		stm32_cryp_read(cryp, CRYP_DOUT);
+		stm32_cryp_read(cryp, cryp->caps->dout);
 
 	/* h) run the he normal Final phase */
 	stm32_cryp_finish_req(cryp, 0);
@@ -1350,13 +1521,13 @@ static void stm32_cryp_irq_set_npblb(struct stm32_cryp *cryp)
 	u32 cfg;
 
 	/* disable ip, set NPBLB and reneable ip */
-	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg = stm32_cryp_read(cryp, cryp->caps->cr);
 	cfg &= ~CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	cfg |= (cryp->hw_blocksize - cryp->payload_in) << CR_NBPBL_SHIFT;
 	cfg |= CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 }
 
 static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
@@ -1370,11 +1541,11 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 	/* 'Special workaround' procedure described in the datasheet */
 
 	/* a) disable ip */
-	stm32_cryp_write(cryp, CRYP_IMSCR, 0);
+	stm32_cryp_write(cryp, cryp->caps->imsc, 0);
 
-	cfg = stm32_cryp_read(cryp, CRYP_CR);
+	cfg = stm32_cryp_read(cryp, cryp->caps->cr);
 	cfg &= ~CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* b) get IV1 from CRYP_CSGCMCCM7 */
 	iv1tmp = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + 7 * 4);
@@ -1384,16 +1555,16 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 		cstmp1[i] = stm32_cryp_read(cryp, CRYP_CSGCMCCM0R + i * 4);
 
 	/* d) Write IV1R */
-	stm32_cryp_write(cryp, CRYP_IV1RR, iv1tmp);
+	stm32_cryp_write(cryp, cryp->caps->iv1r, iv1tmp);
 
 	/* e) change mode to CTR */
 	cfg &= ~CR_ALGO_MASK;
 	cfg |= CR_AES_CTR;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* a) enable IP */
 	cfg |= CR_CRYPEN;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* b) pad and write the last block */
 	stm32_cryp_irq_write_block(cryp);
@@ -1410,7 +1581,7 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 	 * block value
 	 */
 	for (i = 0; i < cryp->hw_blocksize / sizeof(u32); i++)
-		block[i] = stm32_cryp_read(cryp, CRYP_DOUT);
+		block[i] = stm32_cryp_read(cryp, cryp->caps->dout);
 
 	scatterwalk_copychunks(block, &cryp->out_walk, min_t(size_t, cryp->hw_blocksize,
 							     cryp->payload_out), 1);
@@ -1423,18 +1594,18 @@ static void stm32_cryp_irq_write_ccm_padded_data(struct stm32_cryp *cryp)
 	/* e) change mode back to AES CCM */
 	cfg &= ~CR_ALGO_MASK;
 	cfg |= CR_AES_CCM;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* f) change phase to header */
 	cfg &= ~CR_PH_MASK;
 	cfg |= CR_PH_HEADER;
-	stm32_cryp_write(cryp, CRYP_CR, cfg);
+	stm32_cryp_write(cryp, cryp->caps->cr, cfg);
 
 	/* g) XOR and write padded data */
 	for (i = 0; i < ARRAY_SIZE(block); i++) {
 		block[i] ^= cstmp1[i];
 		block[i] ^= cstmp2[i];
-		stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+		stm32_cryp_write(cryp, cryp->caps->din, block[i]);
 	}
 
 	/* h) wait for completion */
@@ -1497,7 +1668,7 @@ static void stm32_cryp_irq_write_gcmccm_header(struct stm32_cryp *cryp)
 
 	scatterwalk_copychunks(block, &cryp->in_walk, written, 0);
 	for (i = 0; i < AES_BLOCK_32; i++)
-		stm32_cryp_write(cryp, CRYP_DIN, block[i]);
+		stm32_cryp_write(cryp, cryp->caps->din, block[i]);
 
 	cryp->header_in -= written;
 
@@ -1508,7 +1679,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
 {
 	struct stm32_cryp *cryp = arg;
 	u32 ph;
-	u32 it_mask = stm32_cryp_read(cryp, CRYP_IMSCR);
+	u32 it_mask = stm32_cryp_read(cryp, cryp->caps->imsc);
 
 	if (cryp->irq_status & MISR_OUT)
 		/* Output FIFO IRQ: read data */
@@ -1516,7 +1687,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
 
 	if (cryp->irq_status & MISR_IN) {
 		if (is_gcm(cryp) || is_ccm(cryp)) {
-			ph = stm32_cryp_read(cryp, CRYP_CR) & CR_PH_MASK;
+			ph = stm32_cryp_read(cryp, cryp->caps->cr) & CR_PH_MASK;
 			if (unlikely(ph == CR_PH_HEADER))
 				/* Write Header */
 				stm32_cryp_irq_write_gcmccm_header(cryp);
@@ -1536,7 +1707,7 @@ static irqreturn_t stm32_cryp_irq_thread(int irq, void *arg)
 		it_mask &= ~IMSCR_IN;
 	if (!cryp->payload_out)
 		it_mask &= ~IMSCR_OUT;
-	stm32_cryp_write(cryp, CRYP_IMSCR, it_mask);
+	stm32_cryp_write(cryp, cryp->caps->imsc, it_mask);
 
 	if (!cryp->payload_in && !cryp->header_in && !cryp->payload_out)
 		stm32_cryp_finish_req(cryp, 0);
@@ -1548,7 +1719,7 @@ static irqreturn_t stm32_cryp_irq(int irq, void *arg)
 {
 	struct stm32_cryp *cryp = arg;
 
-	cryp->irq_status = stm32_cryp_read(cryp, CRYP_MISR);
+	cryp->irq_status = stm32_cryp_read(cryp, cryp->caps->mis);
 
 	return IRQ_WAKE_THREAD;
 }
@@ -1722,17 +1893,74 @@ static struct aead_alg aead_algs[] = {
 },
 };
 
+static const struct stm32_cryp_caps ux500_data = {
+	.aeads_support = false,
+	.linear_aes_key = true,
+	.kp_mode = false,
+	.iv_protection = true,
+	.swap_final = true,
+	.padding_wa = true,
+	.cr = UX500_CRYP_CR,
+	.sr = UX500_CRYP_SR,
+	.din = UX500_CRYP_DIN,
+	.dout = UX500_CRYP_DOUT,
+	.imsc = UX500_CRYP_IMSC,
+	.mis = UX500_CRYP_MIS,
+	.k1l = UX500_CRYP_K1L,
+	.k1r = UX500_CRYP_K1R,
+	.k3r = UX500_CRYP_K3R,
+	.iv0l = UX500_CRYP_IV0L,
+	.iv0r = UX500_CRYP_IV0R,
+	.iv1l = UX500_CRYP_IV1L,
+	.iv1r = UX500_CRYP_IV1R,
+};
+
 static const struct stm32_cryp_caps f7_data = {
+	.aeads_support = true,
+	.linear_aes_key = false,
+	.kp_mode = true,
+	.iv_protection = false,
 	.swap_final = true,
 	.padding_wa = true,
+	.cr = CRYP_CR,
+	.sr = CRYP_SR,
+	.din = CRYP_DIN,
+	.dout = CRYP_DOUT,
+	.imsc = CRYP_IMSCR,
+	.mis = CRYP_MISR,
+	.k1l = CRYP_K1LR,
+	.k1r = CRYP_K1RR,
+	.k3r = CRYP_K3RR,
+	.iv0l = CRYP_IV0LR,
+	.iv0r = CRYP_IV0RR,
+	.iv1l = CRYP_IV1LR,
+	.iv1r = CRYP_IV1RR,
 };
 
 static const struct stm32_cryp_caps mp1_data = {
+	.aeads_support = true,
+	.linear_aes_key = false,
+	.kp_mode = true,
+	.iv_protection = false,
 	.swap_final = false,
 	.padding_wa = false,
+	.cr = CRYP_CR,
+	.sr = CRYP_SR,
+	.din = CRYP_DIN,
+	.dout = CRYP_DOUT,
+	.imsc = CRYP_IMSCR,
+	.mis = CRYP_MISR,
+	.k1l = CRYP_K1LR,
+	.k1r = CRYP_K1RR,
+	.k3r = CRYP_K3RR,
+	.iv0l = CRYP_IV0LR,
+	.iv0r = CRYP_IV0RR,
+	.iv1l = CRYP_IV1LR,
+	.iv1r = CRYP_IV1RR,
 };
 
 static const struct of_device_id stm32_dt_ids[] = {
+	{ .compatible = "stericsson,ux500-cryp", .data = &ux500_data},
 	{ .compatible = "st,stm32f756-cryp", .data = &f7_data},
 	{ .compatible = "st,stm32mp1-cryp", .data = &mp1_data},
 	{},
@@ -1829,9 +2057,11 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 		goto err_algs;
 	}
 
-	ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
-	if (ret)
-		goto err_aead_algs;
+	if (cryp->caps->aeads_support) {
+		ret = crypto_register_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+		if (ret)
+			goto err_aead_algs;
+	}
 
 	dev_info(dev, "Initialized\n");
 
@@ -1869,7 +2099,8 @@ static int stm32_cryp_remove(struct platform_device *pdev)
 	if (ret < 0)
 		return ret;
 
-	crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
+	if (cryp->caps->aeads_support)
+		crypto_unregister_aeads(aead_algs, ARRAY_SIZE(aead_algs));
 	crypto_unregister_skciphers(crypto_algs, ARRAY_SIZE(crypto_algs));
 
 	crypto_engine_exit(cryp->engine);
-- 
GitLab


From 453de3eb08c4b7e31b3019a4b0cc3ebce51a6219 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sat, 3 Dec 2022 10:15:18 +0100
Subject: [PATCH 3588/4122] crypto: ux500/cryp - delete driver

It turns out we can just modify the newer STM32 CRYP driver
to be used with Ux500 and now that we have done that, delete
the old and sparsely maintained Ux500 CRYP driver.

Cc: Lionel Debieve <lionel.debieve@foss.st.com>
Cc: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 drivers/crypto/ux500/Kconfig          |   10 -
 drivers/crypto/ux500/Makefile         |    1 -
 drivers/crypto/ux500/cryp/Makefile    |   10 -
 drivers/crypto/ux500/cryp/cryp.c      |  394 ------
 drivers/crypto/ux500/cryp/cryp.h      |  315 -----
 drivers/crypto/ux500/cryp/cryp_core.c | 1600 -------------------------
 drivers/crypto/ux500/cryp/cryp_irq.c  |   45 -
 drivers/crypto/ux500/cryp/cryp_irq.h  |   31 -
 drivers/crypto/ux500/cryp/cryp_irqp.h |  125 --
 drivers/crypto/ux500/cryp/cryp_p.h    |  122 --
 10 files changed, 2653 deletions(-)
 delete mode 100644 drivers/crypto/ux500/cryp/Makefile
 delete mode 100644 drivers/crypto/ux500/cryp/cryp.c
 delete mode 100644 drivers/crypto/ux500/cryp/cryp.h
 delete mode 100644 drivers/crypto/ux500/cryp/cryp_core.c
 delete mode 100644 drivers/crypto/ux500/cryp/cryp_irq.c
 delete mode 100644 drivers/crypto/ux500/cryp/cryp_irq.h
 delete mode 100644 drivers/crypto/ux500/cryp/cryp_irqp.h
 delete mode 100644 drivers/crypto/ux500/cryp/cryp_p.h

diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig
index f56d65c56ccf7..dcbd7404768f1 100644
--- a/drivers/crypto/ux500/Kconfig
+++ b/drivers/crypto/ux500/Kconfig
@@ -4,16 +4,6 @@
 # Author: Shujuan Chen (shujuan.chen@stericsson.com)
 #
 
-config CRYPTO_DEV_UX500_CRYP
-	tristate "UX500 crypto driver for CRYP block"
-	depends on CRYPTO_DEV_UX500
-	select CRYPTO_ALGAPI
-	select CRYPTO_SKCIPHER
-	select CRYPTO_LIB_DES
-	help
-	This selects the crypto driver for the UX500_CRYP hardware. It supports
-	AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes.
-
 config CRYPTO_DEV_UX500_HASH
 	tristate "UX500 crypto driver for HASH block"
 	depends on CRYPTO_DEV_UX500
diff --git a/drivers/crypto/ux500/Makefile b/drivers/crypto/ux500/Makefile
index f014eb01710a6..f1aa4edf66f4f 100644
--- a/drivers/crypto/ux500/Makefile
+++ b/drivers/crypto/ux500/Makefile
@@ -5,4 +5,3 @@
 #
 
 obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += hash/
-obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += cryp/
diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile
deleted file mode 100644
index 3e67531f484cd..0000000000000
--- a/drivers/crypto/ux500/cryp/Makefile
+++ /dev/null
@@ -1,10 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#/*
-# * Copyright (C) ST-Ericsson SA 2010
-# * Author: shujuan.chen@stericsson.com for ST-Ericsson.
-# */
-
-ccflags-$(CONFIG_CRYPTO_DEV_UX500_DEBUG) += -DDEBUG
-
-obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o
-ux500_cryp-objs :=  cryp.o cryp_irq.o cryp_core.o
diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c
deleted file mode 100644
index 759d0d9786fd1..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp.c
+++ /dev/null
@@ -1,394 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- */
-
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-
-#include "cryp_p.h"
-#include "cryp.h"
-
-/*
- * cryp_wait_until_done - wait until the device logic is not busy
- */
-void cryp_wait_until_done(struct cryp_device_data *device_data)
-{
-	while (cryp_is_logic_busy(device_data))
-		cpu_relax();
-}
-
-/**
- * cryp_check - This routine checks Peripheral and PCell Id
- * @device_data: Pointer to the device data struct for base address.
- */
-int cryp_check(struct cryp_device_data *device_data)
-{
-	int peripheralid2 = 0;
-
-	if (NULL == device_data)
-		return -EINVAL;
-
-	peripheralid2 = readl_relaxed(&device_data->base->periphId2);
-
-	if (peripheralid2 != CRYP_PERIPHERAL_ID2_DB8500)
-		return -EPERM;
-
-	/* Check Peripheral and Pcell Id Register for CRYP */
-	if ((CRYP_PERIPHERAL_ID0 ==
-		readl_relaxed(&device_data->base->periphId0))
-	    && (CRYP_PERIPHERAL_ID1 ==
-		    readl_relaxed(&device_data->base->periphId1))
-	    && (CRYP_PERIPHERAL_ID3 ==
-		    readl_relaxed(&device_data->base->periphId3))
-	    && (CRYP_PCELL_ID0 ==
-		    readl_relaxed(&device_data->base->pcellId0))
-	    && (CRYP_PCELL_ID1 ==
-		    readl_relaxed(&device_data->base->pcellId1))
-	    && (CRYP_PCELL_ID2 ==
-		    readl_relaxed(&device_data->base->pcellId2))
-	    && (CRYP_PCELL_ID3 ==
-		    readl_relaxed(&device_data->base->pcellId3))) {
-		return 0;
-	}
-
-	return -EPERM;
-}
-
-/**
- * cryp_activity - This routine enables/disable the cryptography function.
- * @device_data: Pointer to the device data struct for base address.
- * @cryp_crypen: Enable/Disable functionality
- */
-void cryp_activity(struct cryp_device_data *device_data,
-		   enum cryp_crypen cryp_crypen)
-{
-	CRYP_PUT_BITS(&device_data->base->cr,
-		      cryp_crypen,
-		      CRYP_CR_CRYPEN_POS,
-		      CRYP_CR_CRYPEN_MASK);
-}
-
-/**
- * cryp_flush_inoutfifo - Resets both the input and the output FIFOs
- * @device_data: Pointer to the device data struct for base address.
- */
-void cryp_flush_inoutfifo(struct cryp_device_data *device_data)
-{
-	/*
-	 * We always need to disable the hardware before trying to flush the
-	 * FIFO. This is something that isn't written in the design
-	 * specification, but we have been informed by the hardware designers
-	 * that this must be done.
-	 */
-	cryp_activity(device_data, CRYP_CRYPEN_DISABLE);
-	cryp_wait_until_done(device_data);
-
-	CRYP_SET_BITS(&device_data->base->cr, CRYP_CR_FFLUSH_MASK);
-	/*
-	 * CRYP_SR_INFIFO_READY_MASK is the expected value on the status
-	 * register when starting a new calculation, which means Input FIFO is
-	 * not full and input FIFO is empty.
-	 */
-	while (readl_relaxed(&device_data->base->sr) !=
-	       CRYP_SR_INFIFO_READY_MASK)
-		cpu_relax();
-}
-
-/**
- * cryp_set_configuration - This routine set the cr CRYP IP
- * @device_data: Pointer to the device data struct for base address.
- * @cryp_config: Pointer to the configuration parameter
- * @control_register: The control register to be written later on.
- */
-int cryp_set_configuration(struct cryp_device_data *device_data,
-			   struct cryp_config *cryp_config,
-			   u32 *control_register)
-{
-	u32 cr_for_kse;
-
-	if (NULL == device_data || NULL == cryp_config)
-		return -EINVAL;
-
-	*control_register |= (cryp_config->keysize << CRYP_CR_KEYSIZE_POS);
-
-	/* Prepare key for decryption in AES_ECB and AES_CBC mode. */
-	if ((CRYP_ALGORITHM_DECRYPT == cryp_config->algodir) &&
-	    ((CRYP_ALGO_AES_ECB == cryp_config->algomode) ||
-	     (CRYP_ALGO_AES_CBC == cryp_config->algomode))) {
-		cr_for_kse = *control_register;
-		/*
-		 * This seems a bit odd, but it is indeed needed to set this to
-		 * encrypt even though it is a decryption that we are doing. It
-		 * also mentioned in the design spec that you need to do this.
-		 * After the keyprepartion for decrypting is done you should set
-		 * algodir back to decryption, which is done outside this if
-		 * statement.
-		 *
-		 * According to design specification we should set mode ECB
-		 * during key preparation even though we might be running CBC
-		 * when enter this function.
-		 *
-		 * Writing to KSE_ENABLED will drop CRYPEN when key preparation
-		 * is done. Therefore we need to set CRYPEN again outside this
-		 * if statement when running decryption.
-		 */
-		cr_for_kse |= ((CRYP_ALGORITHM_ENCRYPT << CRYP_CR_ALGODIR_POS) |
-			       (CRYP_ALGO_AES_ECB << CRYP_CR_ALGOMODE_POS) |
-			       (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS) |
-			       (KSE_ENABLED << CRYP_CR_KSE_POS));
-
-		writel_relaxed(cr_for_kse, &device_data->base->cr);
-		cryp_wait_until_done(device_data);
-	}
-
-	*control_register |=
-		((cryp_config->algomode << CRYP_CR_ALGOMODE_POS) |
-		 (cryp_config->algodir << CRYP_CR_ALGODIR_POS));
-
-	return 0;
-}
-
-/**
- * cryp_configure_protection - set the protection bits in the CRYP logic.
- * @device_data: Pointer to the device data struct for base address.
- * @p_protect_config:	Pointer to the protection mode and
- *			secure mode configuration
- */
-int cryp_configure_protection(struct cryp_device_data *device_data,
-			      struct cryp_protection_config *p_protect_config)
-{
-	if (NULL == p_protect_config)
-		return -EINVAL;
-
-	CRYP_WRITE_BIT(&device_data->base->cr,
-		       (u32) p_protect_config->secure_access,
-		       CRYP_CR_SECURE_MASK);
-	CRYP_PUT_BITS(&device_data->base->cr,
-		      p_protect_config->privilege_access,
-		      CRYP_CR_PRLG_POS,
-		      CRYP_CR_PRLG_MASK);
-
-	return 0;
-}
-
-/**
- * cryp_is_logic_busy - returns the busy status of the CRYP logic
- * @device_data: Pointer to the device data struct for base address.
- */
-int cryp_is_logic_busy(struct cryp_device_data *device_data)
-{
-	return CRYP_TEST_BITS(&device_data->base->sr,
-			      CRYP_SR_BUSY_MASK);
-}
-
-/**
- * cryp_configure_for_dma - configures the CRYP IP for DMA operation
- * @device_data: Pointer to the device data struct for base address.
- * @dma_req: Specifies the DMA request type value.
- */
-void cryp_configure_for_dma(struct cryp_device_data *device_data,
-			    enum cryp_dma_req_type dma_req)
-{
-	CRYP_SET_BITS(&device_data->base->dmacr,
-		      (u32) dma_req);
-}
-
-/**
- * cryp_configure_key_values - configures the key values for CRYP operations
- * @device_data: Pointer to the device data struct for base address.
- * @key_reg_index: Key value index register
- * @key_value: The key value struct
- */
-int cryp_configure_key_values(struct cryp_device_data *device_data,
-			      enum cryp_key_reg_index key_reg_index,
-			      struct cryp_key_value key_value)
-{
-	while (cryp_is_logic_busy(device_data))
-		cpu_relax();
-
-	switch (key_reg_index) {
-	case CRYP_KEY_REG_1:
-		writel_relaxed(key_value.key_value_left,
-				&device_data->base->key_1_l);
-		writel_relaxed(key_value.key_value_right,
-				&device_data->base->key_1_r);
-		break;
-	case CRYP_KEY_REG_2:
-		writel_relaxed(key_value.key_value_left,
-				&device_data->base->key_2_l);
-		writel_relaxed(key_value.key_value_right,
-				&device_data->base->key_2_r);
-		break;
-	case CRYP_KEY_REG_3:
-		writel_relaxed(key_value.key_value_left,
-				&device_data->base->key_3_l);
-		writel_relaxed(key_value.key_value_right,
-				&device_data->base->key_3_r);
-		break;
-	case CRYP_KEY_REG_4:
-		writel_relaxed(key_value.key_value_left,
-				&device_data->base->key_4_l);
-		writel_relaxed(key_value.key_value_right,
-				&device_data->base->key_4_r);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * cryp_configure_init_vector - configures the initialization vector register
- * @device_data: Pointer to the device data struct for base address.
- * @init_vector_index: Specifies the index of the init vector.
- * @init_vector_value: Specifies the value for the init vector.
- */
-int cryp_configure_init_vector(struct cryp_device_data *device_data,
-			       enum cryp_init_vector_index
-			       init_vector_index,
-			       struct cryp_init_vector_value
-			       init_vector_value)
-{
-	while (cryp_is_logic_busy(device_data))
-		cpu_relax();
-
-	switch (init_vector_index) {
-	case CRYP_INIT_VECTOR_INDEX_0:
-		writel_relaxed(init_vector_value.init_value_left,
-		       &device_data->base->init_vect_0_l);
-		writel_relaxed(init_vector_value.init_value_right,
-		       &device_data->base->init_vect_0_r);
-		break;
-	case CRYP_INIT_VECTOR_INDEX_1:
-		writel_relaxed(init_vector_value.init_value_left,
-		       &device_data->base->init_vect_1_l);
-		writel_relaxed(init_vector_value.init_value_right,
-		       &device_data->base->init_vect_1_r);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/**
- * cryp_save_device_context -	Store hardware registers and
- *				other device context parameter
- * @device_data: Pointer to the device data struct for base address.
- * @ctx: Crypto device context
- * @cryp_mode: Mode: Polling, Interrupt or DMA
- */
-void cryp_save_device_context(struct cryp_device_data *device_data,
-			      struct cryp_device_context *ctx,
-			      int cryp_mode)
-{
-	enum cryp_algo_mode algomode;
-	struct cryp_register __iomem *src_reg = device_data->base;
-	struct cryp_config *config =
-		(struct cryp_config *)device_data->current_ctx;
-
-	/*
-	 * Always start by disable the hardware and wait for it to finish the
-	 * ongoing calculations before trying to reprogram it.
-	 */
-	cryp_activity(device_data, CRYP_CRYPEN_DISABLE);
-	cryp_wait_until_done(device_data);
-
-	if (cryp_mode == CRYP_MODE_DMA)
-		cryp_configure_for_dma(device_data, CRYP_DMA_DISABLE_BOTH);
-
-	if (CRYP_TEST_BITS(&src_reg->sr, CRYP_SR_IFEM_MASK) == 0)
-		ctx->din = readl_relaxed(&src_reg->din);
-
-	ctx->cr = readl_relaxed(&src_reg->cr) & CRYP_CR_CONTEXT_SAVE_MASK;
-
-	switch (config->keysize) {
-	case CRYP_KEY_SIZE_256:
-		ctx->key_4_l = readl_relaxed(&src_reg->key_4_l);
-		ctx->key_4_r = readl_relaxed(&src_reg->key_4_r);
-		fallthrough;
-
-	case CRYP_KEY_SIZE_192:
-		ctx->key_3_l = readl_relaxed(&src_reg->key_3_l);
-		ctx->key_3_r = readl_relaxed(&src_reg->key_3_r);
-		fallthrough;
-
-	case CRYP_KEY_SIZE_128:
-		ctx->key_2_l = readl_relaxed(&src_reg->key_2_l);
-		ctx->key_2_r = readl_relaxed(&src_reg->key_2_r);
-		fallthrough;
-
-	default:
-		ctx->key_1_l = readl_relaxed(&src_reg->key_1_l);
-		ctx->key_1_r = readl_relaxed(&src_reg->key_1_r);
-	}
-
-	/* Save IV for CBC mode for both AES and DES. */
-	algomode = ((ctx->cr & CRYP_CR_ALGOMODE_MASK) >> CRYP_CR_ALGOMODE_POS);
-	if (algomode == CRYP_ALGO_TDES_CBC ||
-	    algomode == CRYP_ALGO_DES_CBC ||
-	    algomode == CRYP_ALGO_AES_CBC) {
-		ctx->init_vect_0_l = readl_relaxed(&src_reg->init_vect_0_l);
-		ctx->init_vect_0_r = readl_relaxed(&src_reg->init_vect_0_r);
-		ctx->init_vect_1_l = readl_relaxed(&src_reg->init_vect_1_l);
-		ctx->init_vect_1_r = readl_relaxed(&src_reg->init_vect_1_r);
-	}
-}
-
-/**
- * cryp_restore_device_context -	Restore hardware registers and
- *					other device context parameter
- * @device_data: Pointer to the device data struct for base address.
- * @ctx: Crypto device context
- */
-void cryp_restore_device_context(struct cryp_device_data *device_data,
-				 struct cryp_device_context *ctx)
-{
-	struct cryp_register __iomem *reg = device_data->base;
-	struct cryp_config *config =
-		(struct cryp_config *)device_data->current_ctx;
-
-	/*
-	 * Fall through for all items in switch statement. DES is captured in
-	 * the default.
-	 */
-	switch (config->keysize) {
-	case CRYP_KEY_SIZE_256:
-		writel_relaxed(ctx->key_4_l, &reg->key_4_l);
-		writel_relaxed(ctx->key_4_r, &reg->key_4_r);
-		fallthrough;
-
-	case CRYP_KEY_SIZE_192:
-		writel_relaxed(ctx->key_3_l, &reg->key_3_l);
-		writel_relaxed(ctx->key_3_r, &reg->key_3_r);
-		fallthrough;
-
-	case CRYP_KEY_SIZE_128:
-		writel_relaxed(ctx->key_2_l, &reg->key_2_l);
-		writel_relaxed(ctx->key_2_r, &reg->key_2_r);
-		fallthrough;
-
-	default:
-		writel_relaxed(ctx->key_1_l, &reg->key_1_l);
-		writel_relaxed(ctx->key_1_r, &reg->key_1_r);
-	}
-
-	/* Restore IV for CBC mode for AES and DES. */
-	if (config->algomode == CRYP_ALGO_TDES_CBC ||
-	    config->algomode == CRYP_ALGO_DES_CBC ||
-	    config->algomode == CRYP_ALGO_AES_CBC) {
-		writel_relaxed(ctx->init_vect_0_l, &reg->init_vect_0_l);
-		writel_relaxed(ctx->init_vect_0_r, &reg->init_vect_0_r);
-		writel_relaxed(ctx->init_vect_1_l, &reg->init_vect_1_l);
-		writel_relaxed(ctx->init_vect_1_r, &reg->init_vect_1_r);
-	}
-}
diff --git a/drivers/crypto/ux500/cryp/cryp.h b/drivers/crypto/ux500/cryp/cryp.h
deleted file mode 100644
index 59e1557a620a9..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp.h
+++ /dev/null
@@ -1,315 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- */
-
-#ifndef _CRYP_H_
-#define _CRYP_H_
-
-#include <linux/completion.h>
-#include <linux/dmaengine.h>
-#include <linux/klist.h>
-#include <linux/mutex.h>
-
-#define DEV_DBG_NAME "crypX crypX:"
-
-/* CRYP enable/disable */
-enum cryp_crypen {
-	CRYP_CRYPEN_DISABLE = 0,
-	CRYP_CRYPEN_ENABLE = 1
-};
-
-/* CRYP Start Computation enable/disable */
-enum cryp_start {
-	CRYP_START_DISABLE = 0,
-	CRYP_START_ENABLE = 1
-};
-
-/* CRYP Init Signal enable/disable */
-enum cryp_init {
-	CRYP_INIT_DISABLE = 0,
-	CRYP_INIT_ENABLE = 1
-};
-
-/* Cryp State enable/disable */
-enum cryp_state {
-	CRYP_STATE_DISABLE = 0,
-	CRYP_STATE_ENABLE = 1
-};
-
-/* Key preparation bit enable */
-enum cryp_key_prep {
-	KSE_DISABLED = 0,
-	KSE_ENABLED = 1
-};
-
-/* Key size for AES */
-#define	CRYP_KEY_SIZE_128 (0)
-#define	CRYP_KEY_SIZE_192 (1)
-#define	CRYP_KEY_SIZE_256 (2)
-
-/* AES modes */
-enum cryp_algo_mode {
-	CRYP_ALGO_TDES_ECB,
-	CRYP_ALGO_TDES_CBC,
-	CRYP_ALGO_DES_ECB,
-	CRYP_ALGO_DES_CBC,
-	CRYP_ALGO_AES_ECB,
-	CRYP_ALGO_AES_CBC,
-	CRYP_ALGO_AES_CTR,
-	CRYP_ALGO_AES_XTS
-};
-
-/* Cryp Encryption or Decryption */
-enum cryp_algorithm_dir {
-	CRYP_ALGORITHM_ENCRYPT,
-	CRYP_ALGORITHM_DECRYPT
-};
-
-/* Hardware access method */
-enum cryp_mode {
-	CRYP_MODE_POLLING,
-	CRYP_MODE_INTERRUPT,
-	CRYP_MODE_DMA
-};
-
-/**
- * struct cryp_config -
- * @keysize: Key size for AES
- * @algomode: AES modes
- * @algodir: Cryp Encryption or Decryption
- *
- * CRYP configuration structure to be passed to set configuration
- */
-struct cryp_config {
-	int keysize;
-	enum cryp_algo_mode algomode;
-	enum cryp_algorithm_dir algodir;
-};
-
-/**
- * struct cryp_protection_config -
- * @privilege_access: Privileged cryp state enable/disable
- * @secure_access: Secure cryp state enable/disable
- *
- * Protection configuration structure for setting privilage access
- */
-struct cryp_protection_config {
-	enum cryp_state privilege_access;
-	enum cryp_state secure_access;
-};
-
-/* Cryp status */
-enum cryp_status_id {
-	CRYP_STATUS_BUSY = 0x10,
-	CRYP_STATUS_OUTPUT_FIFO_FULL = 0x08,
-	CRYP_STATUS_OUTPUT_FIFO_NOT_EMPTY = 0x04,
-	CRYP_STATUS_INPUT_FIFO_NOT_FULL = 0x02,
-	CRYP_STATUS_INPUT_FIFO_EMPTY = 0x01
-};
-
-/* Cryp DMA interface */
-#define CRYP_DMA_TX_FIFO	0x08
-#define CRYP_DMA_RX_FIFO	0x10
-
-enum cryp_dma_req_type {
-	CRYP_DMA_DISABLE_BOTH,
-	CRYP_DMA_ENABLE_IN_DATA,
-	CRYP_DMA_ENABLE_OUT_DATA,
-	CRYP_DMA_ENABLE_BOTH_DIRECTIONS
-};
-
-enum cryp_dma_channel {
-	CRYP_DMA_RX = 0,
-	CRYP_DMA_TX
-};
-
-/* Key registers */
-enum cryp_key_reg_index {
-	CRYP_KEY_REG_1,
-	CRYP_KEY_REG_2,
-	CRYP_KEY_REG_3,
-	CRYP_KEY_REG_4
-};
-
-/* Key register left and right */
-struct cryp_key_value {
-	u32 key_value_left;
-	u32 key_value_right;
-};
-
-/* Cryp Initialization structure */
-enum cryp_init_vector_index {
-	CRYP_INIT_VECTOR_INDEX_0,
-	CRYP_INIT_VECTOR_INDEX_1
-};
-
-/* struct cryp_init_vector_value -
- * @init_value_left
- * @init_value_right
- * */
-struct cryp_init_vector_value {
-	u32 init_value_left;
-	u32 init_value_right;
-};
-
-/**
- * struct cryp_device_context - structure for a cryp context.
- * @cr: control register
- * @dmacr: DMA control register
- * @imsc: Interrupt mask set/clear register
- * @key_1_l: Key 1l register
- * @key_1_r: Key 1r register
- * @key_2_l: Key 2l register
- * @key_2_r: Key 2r register
- * @key_3_l: Key 3l register
- * @key_3_r: Key 3r register
- * @key_4_l: Key 4l register
- * @key_4_r: Key 4r register
- * @init_vect_0_l: Initialization vector 0l register
- * @init_vect_0_r: Initialization vector 0r register
- * @init_vect_1_l: Initialization vector 1l register
- * @init_vect_1_r: Initialization vector 0r register
- * @din: Data in register
- * @dout: Data out register
- *
- * CRYP power management specifc structure.
- */
-struct cryp_device_context {
-	u32 cr;
-	u32 dmacr;
-	u32 imsc;
-
-	u32 key_1_l;
-	u32 key_1_r;
-	u32 key_2_l;
-	u32 key_2_r;
-	u32 key_3_l;
-	u32 key_3_r;
-	u32 key_4_l;
-	u32 key_4_r;
-
-	u32 init_vect_0_l;
-	u32 init_vect_0_r;
-	u32 init_vect_1_l;
-	u32 init_vect_1_r;
-
-	u32 din;
-	u32 dout;
-};
-
-struct cryp_dma {
-	dma_cap_mask_t mask;
-	struct completion cryp_dma_complete;
-	struct dma_chan *chan_cryp2mem;
-	struct dma_chan *chan_mem2cryp;
-	struct stedma40_chan_cfg *cfg_cryp2mem;
-	struct stedma40_chan_cfg *cfg_mem2cryp;
-	int sg_src_len;
-	int sg_dst_len;
-	struct scatterlist *sg_src;
-	struct scatterlist *sg_dst;
-	int nents_src;
-	int nents_dst;
-};
-
-/**
- * struct cryp_device_data - structure for a cryp device.
- * @base: Pointer to virtual base address of the cryp device.
- * @phybase: Pointer to physical memory location of the cryp device.
- * @dev: Pointer to the devices dev structure.
- * @clk: Pointer to the device's clock control.
- * @irq: IRQ number
- * @pwr_regulator: Pointer to the device's power control.
- * @power_status: Current status of the power.
- * @ctx_lock: Lock for current_ctx.
- * @current_ctx: Pointer to the currently allocated context.
- * @list_node: For inclusion into a klist.
- * @dma: The dma structure holding channel configuration.
- * @power_state: TRUE = power state on, FALSE = power state off.
- * @power_state_spinlock: Spinlock for power_state.
- * @restore_dev_ctx: TRUE = saved ctx, FALSE = no saved ctx.
- */
-struct cryp_device_data {
-	struct cryp_register __iomem *base;
-	phys_addr_t phybase;
-	struct device *dev;
-	struct clk *clk;
-	int irq;
-	struct regulator *pwr_regulator;
-	int power_status;
-	spinlock_t ctx_lock;
-	struct cryp_ctx *current_ctx;
-	struct klist_node list_node;
-	struct cryp_dma dma;
-	bool power_state;
-	spinlock_t power_state_spinlock;
-	bool restore_dev_ctx;
-};
-
-void cryp_wait_until_done(struct cryp_device_data *device_data);
-
-/* Initialization functions */
-
-int cryp_check(struct cryp_device_data *device_data);
-
-void cryp_activity(struct cryp_device_data *device_data,
-		   enum cryp_crypen cryp_crypen);
-
-void cryp_flush_inoutfifo(struct cryp_device_data *device_data);
-
-int cryp_set_configuration(struct cryp_device_data *device_data,
-			   struct cryp_config *cryp_config,
-			   u32 *control_register);
-
-void cryp_configure_for_dma(struct cryp_device_data *device_data,
-			    enum cryp_dma_req_type dma_req);
-
-int cryp_configure_key_values(struct cryp_device_data *device_data,
-			      enum cryp_key_reg_index key_reg_index,
-			      struct cryp_key_value key_value);
-
-int cryp_configure_init_vector(struct cryp_device_data *device_data,
-			       enum cryp_init_vector_index
-			       init_vector_index,
-			       struct cryp_init_vector_value
-			       init_vector_value);
-
-int cryp_configure_protection(struct cryp_device_data *device_data,
-			      struct cryp_protection_config *p_protect_config);
-
-/* Power management funtions */
-void cryp_save_device_context(struct cryp_device_data *device_data,
-			      struct cryp_device_context *ctx,
-			      int cryp_mode);
-
-void cryp_restore_device_context(struct cryp_device_data *device_data,
-				 struct cryp_device_context *ctx);
-
-/* Data transfer and status bits. */
-int cryp_is_logic_busy(struct cryp_device_data *device_data);
-
-int cryp_get_status(struct cryp_device_data *device_data);
-
-/**
- * cryp_write_indata - This routine writes 32 bit data into the data input
- *		       register of the cryptography IP.
- * @device_data: Pointer to the device data struct for base address.
- * @write_data: Data to write.
- */
-int cryp_write_indata(struct cryp_device_data *device_data, u32 write_data);
-
-/**
- * cryp_read_outdata - This routine reads the data from the data output
- *		       register of the CRYP logic
- * @device_data: Pointer to the device data struct for base address.
- * @read_data: Read the data from the output FIFO.
- */
-int cryp_read_outdata(struct cryp_device_data *device_data, u32 *read_data);
-
-#endif /* _CRYP_H_ */
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
deleted file mode 100644
index 5a57c9afd8c88..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ /dev/null
@@ -1,1600 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Andreas Westin <andreas.westin@stericsson.com> for ST-Ericsson.
- */
-
-#include <linux/clk.h>
-#include <linux/completion.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmaengine.h>
-#include <linux/err.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/irqreturn.h>
-#include <linux/kernel.h>
-#include <linux/klist.h>
-#include <linux/module.h>
-#include <linux/mod_devicetable.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/semaphore.h>
-#include <linux/platform_data/dma-ste-dma40.h>
-
-#include <crypto/aes.h>
-#include <crypto/ctr.h>
-#include <crypto/internal/des.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-
-#include <linux/platform_data/crypto-ux500.h>
-
-#include "cryp_p.h"
-#include "cryp.h"
-
-#define CRYP_MAX_KEY_SIZE	32
-#define BYTES_PER_WORD		4
-
-static int cryp_mode;
-static atomic_t session_id;
-
-static struct stedma40_chan_cfg *mem_to_engine;
-static struct stedma40_chan_cfg *engine_to_mem;
-
-/**
- * struct cryp_driver_data - data specific to the driver.
- *
- * @device_list: A list of registered devices to choose from.
- * @device_allocation: A semaphore initialized with number of devices.
- */
-struct cryp_driver_data {
-	struct klist device_list;
-	struct semaphore device_allocation;
-};
-
-/**
- * struct cryp_ctx - Crypto context
- * @config: Crypto mode.
- * @key: Key array.
- * @keylen: Length of key.
- * @iv: Pointer to initialization vector.
- * @indata: Pointer to indata.
- * @outdata: Pointer to outdata.
- * @datalen: Length of indata.
- * @outlen: Length of outdata.
- * @blocksize: Size of blocks.
- * @updated: Updated flag.
- * @dev_ctx: Device dependent context.
- * @device: Pointer to the device.
- * @session_id: Atomic session ID.
- */
-struct cryp_ctx {
-	struct cryp_config config;
-	u8 key[CRYP_MAX_KEY_SIZE];
-	u32 keylen;
-	u8 *iv;
-	const u8 *indata;
-	u8 *outdata;
-	u32 datalen;
-	u32 outlen;
-	u32 blocksize;
-	u8 updated;
-	struct cryp_device_context dev_ctx;
-	struct cryp_device_data *device;
-	u32 session_id;
-};
-
-static struct cryp_driver_data driver_data;
-
-/**
- * swap_bits_in_byte - mirror the bits in a byte
- * @b: the byte to be mirrored
- *
- * The bits are swapped the following way:
- *  Byte b include bits 0-7, nibble 1 (n1) include bits 0-3 and
- *  nibble 2 (n2) bits 4-7.
- *
- *  Nibble 1 (n1):
- *  (The "old" (moved) bit is replaced with a zero)
- *  1. Move bit 6 and 7, 4 positions to the left.
- *  2. Move bit 3 and 5, 2 positions to the left.
- *  3. Move bit 1-4, 1 position to the left.
- *
- *  Nibble 2 (n2):
- *  1. Move bit 0 and 1, 4 positions to the right.
- *  2. Move bit 2 and 4, 2 positions to the right.
- *  3. Move bit 3-6, 1 position to the right.
- *
- *  Combine the two nibbles to a complete and swapped byte.
- */
-
-static inline u8 swap_bits_in_byte(u8 b)
-{
-#define R_SHIFT_4_MASK  0xc0 /* Bits 6 and 7, right shift 4 */
-#define R_SHIFT_2_MASK  0x28 /* (After right shift 4) Bits 3 and 5,
-				  right shift 2 */
-#define R_SHIFT_1_MASK  0x1e /* (After right shift 2) Bits 1-4,
-				  right shift 1 */
-#define L_SHIFT_4_MASK  0x03 /* Bits 0 and 1, left shift 4 */
-#define L_SHIFT_2_MASK  0x14 /* (After left shift 4) Bits 2 and 4,
-				  left shift 2 */
-#define L_SHIFT_1_MASK  0x78 /* (After left shift 1) Bits 3-6,
-				  left shift 1 */
-
-	u8 n1;
-	u8 n2;
-
-	/* Swap most significant nibble */
-	/* Right shift 4, bits 6 and 7 */
-	n1 = ((b  & R_SHIFT_4_MASK) >> 4) | (b  & ~(R_SHIFT_4_MASK >> 4));
-	/* Right shift 2, bits 3 and 5 */
-	n1 = ((n1 & R_SHIFT_2_MASK) >> 2) | (n1 & ~(R_SHIFT_2_MASK >> 2));
-	/* Right shift 1, bits 1-4 */
-	n1 = (n1  & R_SHIFT_1_MASK) >> 1;
-
-	/* Swap least significant nibble */
-	/* Left shift 4, bits 0 and 1 */
-	n2 = ((b  & L_SHIFT_4_MASK) << 4) | (b  & ~(L_SHIFT_4_MASK << 4));
-	/* Left shift 2, bits 2 and 4 */
-	n2 = ((n2 & L_SHIFT_2_MASK) << 2) | (n2 & ~(L_SHIFT_2_MASK << 2));
-	/* Left shift 1, bits 3-6 */
-	n2 = (n2  & L_SHIFT_1_MASK) << 1;
-
-	return n1 | n2;
-}
-
-static inline void swap_words_in_key_and_bits_in_byte(const u8 *in,
-						      u8 *out, u32 len)
-{
-	unsigned int i = 0;
-	int j;
-	int index = 0;
-
-	j = len - BYTES_PER_WORD;
-	while (j >= 0) {
-		for (i = 0; i < BYTES_PER_WORD; i++) {
-			index = len - j - BYTES_PER_WORD + i;
-			out[j + i] =
-				swap_bits_in_byte(in[index]);
-		}
-		j -= BYTES_PER_WORD;
-	}
-}
-
-static void add_session_id(struct cryp_ctx *ctx)
-{
-	/*
-	 * We never want 0 to be a valid value, since this is the default value
-	 * for the software context.
-	 */
-	if (unlikely(atomic_inc_and_test(&session_id)))
-		atomic_inc(&session_id);
-
-	ctx->session_id = atomic_read(&session_id);
-}
-
-static irqreturn_t cryp_interrupt_handler(int irq, void *param)
-{
-	struct cryp_ctx *ctx;
-	int count;
-	struct cryp_device_data *device_data;
-
-	if (param == NULL) {
-		BUG_ON(!param);
-		return IRQ_HANDLED;
-	}
-
-	/* The device is coming from the one found in hw_crypt_noxts. */
-	device_data = (struct cryp_device_data *)param;
-
-	ctx = device_data->current_ctx;
-
-	if (ctx == NULL) {
-		BUG_ON(!ctx);
-		return IRQ_HANDLED;
-	}
-
-	dev_dbg(ctx->device->dev, "[%s] (len: %d) %s, ", __func__, ctx->outlen,
-		cryp_pending_irq_src(device_data, CRYP_IRQ_SRC_OUTPUT_FIFO) ?
-		"out" : "in");
-
-	if (cryp_pending_irq_src(device_data,
-				 CRYP_IRQ_SRC_OUTPUT_FIFO)) {
-		if (ctx->outlen / ctx->blocksize > 0) {
-			count = ctx->blocksize / 4;
-
-			readsl(&device_data->base->dout, ctx->outdata, count);
-			ctx->outdata += count;
-			ctx->outlen -= count;
-
-			if (ctx->outlen == 0) {
-				cryp_disable_irq_src(device_data,
-						     CRYP_IRQ_SRC_OUTPUT_FIFO);
-			}
-		}
-	} else if (cryp_pending_irq_src(device_data,
-					CRYP_IRQ_SRC_INPUT_FIFO)) {
-		if (ctx->datalen / ctx->blocksize > 0) {
-			count = ctx->blocksize / 4;
-
-			writesl(&device_data->base->din, ctx->indata, count);
-
-			ctx->indata += count;
-			ctx->datalen -= count;
-
-			if (ctx->datalen == 0)
-				cryp_disable_irq_src(device_data,
-						   CRYP_IRQ_SRC_INPUT_FIFO);
-
-			if (ctx->config.algomode == CRYP_ALGO_AES_XTS) {
-				CRYP_PUT_BITS(&device_data->base->cr,
-					      CRYP_START_ENABLE,
-					      CRYP_CR_START_POS,
-					      CRYP_CR_START_MASK);
-
-				cryp_wait_until_done(device_data);
-			}
-		}
-	}
-
-	return IRQ_HANDLED;
-}
-
-static int mode_is_aes(enum cryp_algo_mode mode)
-{
-	return	CRYP_ALGO_AES_ECB == mode ||
-		CRYP_ALGO_AES_CBC == mode ||
-		CRYP_ALGO_AES_CTR == mode ||
-		CRYP_ALGO_AES_XTS == mode;
-}
-
-static int cfg_iv(struct cryp_device_data *device_data, u32 left, u32 right,
-		  enum cryp_init_vector_index index)
-{
-	struct cryp_init_vector_value vector_value;
-
-	dev_dbg(device_data->dev, "[%s]", __func__);
-
-	vector_value.init_value_left = left;
-	vector_value.init_value_right = right;
-
-	return cryp_configure_init_vector(device_data,
-					  index,
-					  vector_value);
-}
-
-static int cfg_ivs(struct cryp_device_data *device_data, struct cryp_ctx *ctx)
-{
-	int i;
-	int status = 0;
-	int num_of_regs = ctx->blocksize / 8;
-	__be32 *civ = (__be32 *)ctx->iv;
-	u32 iv[AES_BLOCK_SIZE / 4];
-
-	dev_dbg(device_data->dev, "[%s]", __func__);
-
-	/*
-	 * Since we loop on num_of_regs we need to have a check in case
-	 * someone provides an incorrect blocksize which would force calling
-	 * cfg_iv with i greater than 2 which is an error.
-	 */
-	if (num_of_regs > 2) {
-		dev_err(device_data->dev, "[%s] Incorrect blocksize %d",
-			__func__, ctx->blocksize);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < ctx->blocksize / 4; i++)
-		iv[i] = be32_to_cpup(civ + i);
-
-	for (i = 0; i < num_of_regs; i++) {
-		status = cfg_iv(device_data, iv[i*2], iv[i*2+1],
-				(enum cryp_init_vector_index) i);
-		if (status != 0)
-			return status;
-	}
-	return status;
-}
-
-static int set_key(struct cryp_device_data *device_data,
-		   u32 left_key,
-		   u32 right_key,
-		   enum cryp_key_reg_index index)
-{
-	struct cryp_key_value key_value;
-	int cryp_error;
-
-	dev_dbg(device_data->dev, "[%s]", __func__);
-
-	key_value.key_value_left = left_key;
-	key_value.key_value_right = right_key;
-
-	cryp_error = cryp_configure_key_values(device_data,
-					       index,
-					       key_value);
-	if (cryp_error != 0)
-		dev_err(device_data->dev, "[%s]: "
-			"cryp_configure_key_values() failed!", __func__);
-
-	return cryp_error;
-}
-
-static int cfg_keys(struct cryp_ctx *ctx)
-{
-	int i;
-	int num_of_regs = ctx->keylen / 8;
-	u32 swapped_key[CRYP_MAX_KEY_SIZE / 4];
-	__be32 *ckey = (__be32 *)ctx->key;
-	int cryp_error = 0;
-
-	dev_dbg(ctx->device->dev, "[%s]", __func__);
-
-	if (mode_is_aes(ctx->config.algomode)) {
-		swap_words_in_key_and_bits_in_byte((u8 *)ckey,
-						   (u8 *)swapped_key,
-						   ctx->keylen);
-	} else {
-		for (i = 0; i < ctx->keylen / 4; i++)
-			swapped_key[i] = be32_to_cpup(ckey + i);
-	}
-
-	for (i = 0; i < num_of_regs; i++) {
-		cryp_error = set_key(ctx->device,
-				     swapped_key[i * 2],
-				     swapped_key[i * 2 + 1],
-				     (enum cryp_key_reg_index) i);
-
-		if (cryp_error != 0) {
-			dev_err(ctx->device->dev, "[%s]: set_key() failed!",
-					__func__);
-			return cryp_error;
-		}
-	}
-	return cryp_error;
-}
-
-static int cryp_setup_context(struct cryp_ctx *ctx,
-			      struct cryp_device_data *device_data)
-{
-	u32 control_register = CRYP_CR_DEFAULT;
-
-	switch (cryp_mode) {
-	case CRYP_MODE_INTERRUPT:
-		writel_relaxed(CRYP_IMSC_DEFAULT, &device_data->base->imsc);
-		break;
-
-	case CRYP_MODE_DMA:
-		writel_relaxed(CRYP_DMACR_DEFAULT, &device_data->base->dmacr);
-		break;
-
-	default:
-		break;
-	}
-
-	if (ctx->updated == 0) {
-		cryp_flush_inoutfifo(device_data);
-		if (cfg_keys(ctx) != 0) {
-			dev_err(ctx->device->dev, "[%s]: cfg_keys failed!",
-				__func__);
-			return -EINVAL;
-		}
-
-		if (ctx->iv &&
-		    CRYP_ALGO_AES_ECB != ctx->config.algomode &&
-		    CRYP_ALGO_DES_ECB != ctx->config.algomode &&
-		    CRYP_ALGO_TDES_ECB != ctx->config.algomode) {
-			if (cfg_ivs(device_data, ctx) != 0)
-				return -EPERM;
-		}
-
-		cryp_set_configuration(device_data, &ctx->config,
-				       &control_register);
-		add_session_id(ctx);
-	} else if (ctx->updated == 1 &&
-		   ctx->session_id != atomic_read(&session_id)) {
-		cryp_flush_inoutfifo(device_data);
-		cryp_restore_device_context(device_data, &ctx->dev_ctx);
-
-		add_session_id(ctx);
-		control_register = ctx->dev_ctx.cr;
-	} else
-		control_register = ctx->dev_ctx.cr;
-
-	writel(control_register |
-	       (CRYP_CRYPEN_ENABLE << CRYP_CR_CRYPEN_POS),
-	       &device_data->base->cr);
-
-	return 0;
-}
-
-static int cryp_get_device_data(struct cryp_ctx *ctx,
-				struct cryp_device_data **device_data)
-{
-	int ret;
-	struct klist_iter device_iterator;
-	struct klist_node *device_node;
-	struct cryp_device_data *local_device_data = NULL;
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	/* Wait until a device is available */
-	ret = down_interruptible(&driver_data.device_allocation);
-	if (ret)
-		return ret;  /* Interrupted */
-
-	/* Select a device */
-	klist_iter_init(&driver_data.device_list, &device_iterator);
-
-	device_node = klist_next(&device_iterator);
-	while (device_node) {
-		local_device_data = container_of(device_node,
-					   struct cryp_device_data, list_node);
-		spin_lock(&local_device_data->ctx_lock);
-		/* current_ctx allocates a device, NULL = unallocated */
-		if (local_device_data->current_ctx) {
-			device_node = klist_next(&device_iterator);
-		} else {
-			local_device_data->current_ctx = ctx;
-			ctx->device = local_device_data;
-			spin_unlock(&local_device_data->ctx_lock);
-			break;
-		}
-		spin_unlock(&local_device_data->ctx_lock);
-	}
-	klist_iter_exit(&device_iterator);
-
-	if (!device_node) {
-		/**
-		 * No free device found.
-		 * Since we allocated a device with down_interruptible, this
-		 * should not be able to happen.
-		 * Number of available devices, which are contained in
-		 * device_allocation, is therefore decremented by not doing
-		 * an up(device_allocation).
-		 */
-		return -EBUSY;
-	}
-
-	*device_data = local_device_data;
-
-	return 0;
-}
-
-static void cryp_dma_setup_channel(struct cryp_device_data *device_data,
-				   struct device *dev)
-{
-	struct dma_slave_config mem2cryp = {
-		.direction = DMA_MEM_TO_DEV,
-		.dst_addr = device_data->phybase + CRYP_DMA_TX_FIFO,
-		.dst_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
-		.dst_maxburst = 4,
-	};
-	struct dma_slave_config cryp2mem = {
-		.direction = DMA_DEV_TO_MEM,
-		.src_addr = device_data->phybase + CRYP_DMA_RX_FIFO,
-		.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES,
-		.src_maxburst = 4,
-	};
-
-	dma_cap_zero(device_data->dma.mask);
-	dma_cap_set(DMA_SLAVE, device_data->dma.mask);
-
-	device_data->dma.cfg_mem2cryp = mem_to_engine;
-	device_data->dma.chan_mem2cryp =
-		dma_request_channel(device_data->dma.mask,
-				    stedma40_filter,
-				    device_data->dma.cfg_mem2cryp);
-
-	device_data->dma.cfg_cryp2mem = engine_to_mem;
-	device_data->dma.chan_cryp2mem =
-		dma_request_channel(device_data->dma.mask,
-				    stedma40_filter,
-				    device_data->dma.cfg_cryp2mem);
-
-	dmaengine_slave_config(device_data->dma.chan_mem2cryp, &mem2cryp);
-	dmaengine_slave_config(device_data->dma.chan_cryp2mem, &cryp2mem);
-
-	init_completion(&device_data->dma.cryp_dma_complete);
-}
-
-static void cryp_dma_out_callback(void *data)
-{
-	struct cryp_ctx *ctx = (struct cryp_ctx *) data;
-	dev_dbg(ctx->device->dev, "[%s]: ", __func__);
-
-	complete(&ctx->device->dma.cryp_dma_complete);
-}
-
-static int cryp_set_dma_transfer(struct cryp_ctx *ctx,
-				 struct scatterlist *sg,
-				 int len,
-				 enum dma_data_direction direction)
-{
-	struct dma_async_tx_descriptor *desc;
-	struct dma_chan *channel = NULL;
-	dma_cookie_t cookie;
-
-	dev_dbg(ctx->device->dev, "[%s]: ", __func__);
-
-	if (unlikely(!IS_ALIGNED((unsigned long)sg, 4))) {
-		dev_err(ctx->device->dev, "[%s]: Data in sg list isn't "
-			"aligned! Addr: 0x%08lx", __func__, (unsigned long)sg);
-		return -EFAULT;
-	}
-
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		channel = ctx->device->dma.chan_mem2cryp;
-		ctx->device->dma.sg_src = sg;
-		ctx->device->dma.sg_src_len = dma_map_sg(channel->device->dev,
-						 ctx->device->dma.sg_src,
-						 ctx->device->dma.nents_src,
-						 direction);
-
-		if (!ctx->device->dma.sg_src_len) {
-			dev_dbg(ctx->device->dev,
-				"[%s]: Could not map the sg list (TO_DEVICE)",
-				__func__);
-			return -EFAULT;
-		}
-
-		dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer "
-			"(TO_DEVICE)", __func__);
-
-		desc = dmaengine_prep_slave_sg(channel,
-				ctx->device->dma.sg_src,
-				ctx->device->dma.sg_src_len,
-				DMA_MEM_TO_DEV, DMA_CTRL_ACK);
-		break;
-
-	case DMA_FROM_DEVICE:
-		channel = ctx->device->dma.chan_cryp2mem;
-		ctx->device->dma.sg_dst = sg;
-		ctx->device->dma.sg_dst_len = dma_map_sg(channel->device->dev,
-						 ctx->device->dma.sg_dst,
-						 ctx->device->dma.nents_dst,
-						 direction);
-
-		if (!ctx->device->dma.sg_dst_len) {
-			dev_dbg(ctx->device->dev,
-				"[%s]: Could not map the sg list (FROM_DEVICE)",
-				__func__);
-			return -EFAULT;
-		}
-
-		dev_dbg(ctx->device->dev, "[%s]: Setting up DMA for buffer "
-			"(FROM_DEVICE)", __func__);
-
-		desc = dmaengine_prep_slave_sg(channel,
-				ctx->device->dma.sg_dst,
-				ctx->device->dma.sg_dst_len,
-				DMA_DEV_TO_MEM,
-				DMA_CTRL_ACK |
-				DMA_PREP_INTERRUPT);
-
-		desc->callback = cryp_dma_out_callback;
-		desc->callback_param = ctx;
-		break;
-
-	default:
-		dev_dbg(ctx->device->dev, "[%s]: Invalid DMA direction",
-			__func__);
-		return -EFAULT;
-	}
-
-	cookie = dmaengine_submit(desc);
-	if (dma_submit_error(cookie)) {
-		dev_dbg(ctx->device->dev, "[%s]: DMA submission failed\n",
-			__func__);
-		return cookie;
-	}
-
-	dma_async_issue_pending(channel);
-
-	return 0;
-}
-
-static void cryp_dma_done(struct cryp_ctx *ctx)
-{
-	struct dma_chan *chan;
-
-	dev_dbg(ctx->device->dev, "[%s]: ", __func__);
-
-	chan = ctx->device->dma.chan_mem2cryp;
-	dmaengine_terminate_all(chan);
-	dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_src,
-		     ctx->device->dma.nents_src, DMA_TO_DEVICE);
-
-	chan = ctx->device->dma.chan_cryp2mem;
-	dmaengine_terminate_all(chan);
-	dma_unmap_sg(chan->device->dev, ctx->device->dma.sg_dst,
-		     ctx->device->dma.nents_dst, DMA_FROM_DEVICE);
-}
-
-static int cryp_dma_write(struct cryp_ctx *ctx, struct scatterlist *sg,
-			  int len)
-{
-	int error = cryp_set_dma_transfer(ctx, sg, len, DMA_TO_DEVICE);
-	dev_dbg(ctx->device->dev, "[%s]: ", __func__);
-
-	if (error) {
-		dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() "
-			"failed", __func__);
-		return error;
-	}
-
-	return len;
-}
-
-static int cryp_dma_read(struct cryp_ctx *ctx, struct scatterlist *sg, int len)
-{
-	int error = cryp_set_dma_transfer(ctx, sg, len, DMA_FROM_DEVICE);
-	if (error) {
-		dev_dbg(ctx->device->dev, "[%s]: cryp_set_dma_transfer() "
-			"failed", __func__);
-		return error;
-	}
-
-	return len;
-}
-
-static void cryp_polling_mode(struct cryp_ctx *ctx,
-			      struct cryp_device_data *device_data)
-{
-	int len = ctx->blocksize / BYTES_PER_WORD;
-	int remaining_length = ctx->datalen;
-	u32 *indata = (u32 *)ctx->indata;
-	u32 *outdata = (u32 *)ctx->outdata;
-
-	while (remaining_length > 0) {
-		writesl(&device_data->base->din, indata, len);
-		indata += len;
-		remaining_length -= (len * BYTES_PER_WORD);
-		cryp_wait_until_done(device_data);
-
-		readsl(&device_data->base->dout, outdata, len);
-		outdata += len;
-		cryp_wait_until_done(device_data);
-	}
-}
-
-static int cryp_disable_power(struct device *dev,
-			      struct cryp_device_data *device_data,
-			      bool save_device_context)
-{
-	int ret = 0;
-
-	dev_dbg(dev, "[%s]", __func__);
-
-	spin_lock(&device_data->power_state_spinlock);
-	if (!device_data->power_state)
-		goto out;
-
-	spin_lock(&device_data->ctx_lock);
-	if (save_device_context && device_data->current_ctx) {
-		cryp_save_device_context(device_data,
-				&device_data->current_ctx->dev_ctx,
-				cryp_mode);
-		device_data->restore_dev_ctx = true;
-	}
-	spin_unlock(&device_data->ctx_lock);
-
-	clk_disable(device_data->clk);
-	ret = regulator_disable(device_data->pwr_regulator);
-	if (ret)
-		dev_err(dev, "[%s]: "
-				"regulator_disable() failed!",
-				__func__);
-
-	device_data->power_state = false;
-
-out:
-	spin_unlock(&device_data->power_state_spinlock);
-
-	return ret;
-}
-
-static int cryp_enable_power(
-		struct device *dev,
-		struct cryp_device_data *device_data,
-		bool restore_device_context)
-{
-	int ret = 0;
-
-	dev_dbg(dev, "[%s]", __func__);
-
-	spin_lock(&device_data->power_state_spinlock);
-	if (!device_data->power_state) {
-		ret = regulator_enable(device_data->pwr_regulator);
-		if (ret) {
-			dev_err(dev, "[%s]: regulator_enable() failed!",
-					__func__);
-			goto out;
-		}
-
-		ret = clk_enable(device_data->clk);
-		if (ret) {
-			dev_err(dev, "[%s]: clk_enable() failed!",
-					__func__);
-			regulator_disable(device_data->pwr_regulator);
-			goto out;
-		}
-		device_data->power_state = true;
-	}
-
-	if (device_data->restore_dev_ctx) {
-		spin_lock(&device_data->ctx_lock);
-		if (restore_device_context && device_data->current_ctx) {
-			device_data->restore_dev_ctx = false;
-			cryp_restore_device_context(device_data,
-					&device_data->current_ctx->dev_ctx);
-		}
-		spin_unlock(&device_data->ctx_lock);
-	}
-out:
-	spin_unlock(&device_data->power_state_spinlock);
-
-	return ret;
-}
-
-static int hw_crypt_noxts(struct cryp_ctx *ctx,
-			  struct cryp_device_data *device_data)
-{
-	int ret = 0;
-
-	const u8 *indata = ctx->indata;
-	u8 *outdata = ctx->outdata;
-	u32 datalen = ctx->datalen;
-	u32 outlen = datalen;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	ctx->outlen = ctx->datalen;
-
-	if (unlikely(!IS_ALIGNED((unsigned long)indata, 4))) {
-		pr_debug(DEV_DBG_NAME " [%s]: Data isn't aligned! Addr: "
-			 "0x%08lx", __func__, (unsigned long)indata);
-		return -EINVAL;
-	}
-
-	ret = cryp_setup_context(ctx, device_data);
-
-	if (ret)
-		goto out;
-
-	if (cryp_mode == CRYP_MODE_INTERRUPT) {
-		cryp_enable_irq_src(device_data, CRYP_IRQ_SRC_INPUT_FIFO |
-				    CRYP_IRQ_SRC_OUTPUT_FIFO);
-
-		/*
-		 * ctx->outlen is decremented in the cryp_interrupt_handler
-		 * function. We had to add cpu_relax() (barrier) to make sure
-		 * that gcc didn't optimze away this variable.
-		 */
-		while (ctx->outlen > 0)
-			cpu_relax();
-	} else if (cryp_mode == CRYP_MODE_POLLING ||
-		   cryp_mode == CRYP_MODE_DMA) {
-		/*
-		 * The reason for having DMA in this if case is that if we are
-		 * running cryp_mode = 2, then we separate DMA routines for
-		 * handling cipher/plaintext > blocksize, except when
-		 * running the normal CRYPTO_ALG_TYPE_CIPHER, then we still use
-		 * the polling mode. Overhead of doing DMA setup eats up the
-		 * benefits using it.
-		 */
-		cryp_polling_mode(ctx, device_data);
-	} else {
-		dev_err(ctx->device->dev, "[%s]: Invalid operation mode!",
-			__func__);
-		ret = -EPERM;
-		goto out;
-	}
-
-	cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode);
-	ctx->updated = 1;
-
-out:
-	ctx->indata = indata;
-	ctx->outdata = outdata;
-	ctx->datalen = datalen;
-	ctx->outlen = outlen;
-
-	return ret;
-}
-
-static int get_nents(struct scatterlist *sg, int nbytes)
-{
-	int nents = 0;
-
-	while (nbytes > 0) {
-		nbytes -= sg->length;
-		sg = sg_next(sg);
-		nents++;
-	}
-
-	return nents;
-}
-
-static int ablk_dma_crypt(struct skcipher_request *areq)
-{
-	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-	struct cryp_device_data *device_data;
-
-	int bytes_written = 0;
-	int bytes_read = 0;
-	int ret;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	ctx->datalen = areq->cryptlen;
-	ctx->outlen = areq->cryptlen;
-
-	ret = cryp_get_device_data(ctx, &device_data);
-	if (ret)
-		return ret;
-
-	ret = cryp_setup_context(ctx, device_data);
-	if (ret)
-		goto out;
-
-	/* We have the device now, so store the nents in the dma struct. */
-	ctx->device->dma.nents_src = get_nents(areq->src, ctx->datalen);
-	ctx->device->dma.nents_dst = get_nents(areq->dst, ctx->outlen);
-
-	/* Enable DMA in- and output. */
-	cryp_configure_for_dma(device_data, CRYP_DMA_ENABLE_BOTH_DIRECTIONS);
-
-	bytes_written = cryp_dma_write(ctx, areq->src, ctx->datalen);
-	bytes_read = cryp_dma_read(ctx, areq->dst, bytes_written);
-
-	wait_for_completion(&ctx->device->dma.cryp_dma_complete);
-	cryp_dma_done(ctx);
-
-	cryp_save_device_context(device_data, &ctx->dev_ctx, cryp_mode);
-	ctx->updated = 1;
-
-out:
-	spin_lock(&device_data->ctx_lock);
-	device_data->current_ctx = NULL;
-	ctx->device = NULL;
-	spin_unlock(&device_data->ctx_lock);
-
-	/*
-	 * The down_interruptible part for this semaphore is called in
-	 * cryp_get_device_data.
-	 */
-	up(&driver_data.device_allocation);
-
-	if (unlikely(bytes_written != bytes_read))
-		return -EPERM;
-
-	return 0;
-}
-
-static int ablk_crypt(struct skcipher_request *areq)
-{
-	struct skcipher_walk walk;
-	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-	struct cryp_device_data *device_data;
-	unsigned long src_paddr;
-	unsigned long dst_paddr;
-	int ret;
-	int nbytes;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	ret = cryp_get_device_data(ctx, &device_data);
-	if (ret)
-		goto out;
-
-	ret = skcipher_walk_async(&walk, areq);
-
-	if (ret) {
-		pr_err(DEV_DBG_NAME "[%s]: skcipher_walk_async() failed!",
-			__func__);
-		goto out;
-	}
-
-	while ((nbytes = walk.nbytes) > 0) {
-		ctx->iv = walk.iv;
-		src_paddr = (page_to_phys(walk.src.phys.page) + walk.src.phys.offset);
-		ctx->indata = phys_to_virt(src_paddr);
-
-		dst_paddr = (page_to_phys(walk.dst.phys.page) + walk.dst.phys.offset);
-		ctx->outdata = phys_to_virt(dst_paddr);
-
-		ctx->datalen = nbytes - (nbytes % ctx->blocksize);
-
-		ret = hw_crypt_noxts(ctx, device_data);
-		if (ret)
-			goto out;
-
-		nbytes -= ctx->datalen;
-		ret = skcipher_walk_done(&walk, nbytes);
-		if (ret)
-			goto out;
-	}
-
-out:
-	/* Release the device */
-	spin_lock(&device_data->ctx_lock);
-	device_data->current_ctx = NULL;
-	ctx->device = NULL;
-	spin_unlock(&device_data->ctx_lock);
-
-	/*
-	 * The down_interruptible part for this semaphore is called in
-	 * cryp_get_device_data.
-	 */
-	up(&driver_data.device_allocation);
-
-	return ret;
-}
-
-static int aes_skcipher_setkey(struct crypto_skcipher *cipher,
-				 const u8 *key, unsigned int keylen)
-{
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	switch (keylen) {
-	case AES_KEYSIZE_128:
-		ctx->config.keysize = CRYP_KEY_SIZE_128;
-		break;
-
-	case AES_KEYSIZE_192:
-		ctx->config.keysize = CRYP_KEY_SIZE_192;
-		break;
-
-	case AES_KEYSIZE_256:
-		ctx->config.keysize = CRYP_KEY_SIZE_256;
-		break;
-
-	default:
-		pr_err(DEV_DBG_NAME "[%s]: Unknown keylen!", __func__);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key, key, keylen);
-	ctx->keylen = keylen;
-
-	ctx->updated = 0;
-
-	return 0;
-}
-
-static int des_skcipher_setkey(struct crypto_skcipher *cipher,
-				 const u8 *key, unsigned int keylen)
-{
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-	int err;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	err = verify_skcipher_des_key(cipher, key);
-	if (err)
-		return err;
-
-	memcpy(ctx->key, key, keylen);
-	ctx->keylen = keylen;
-
-	ctx->updated = 0;
-	return 0;
-}
-
-static int des3_skcipher_setkey(struct crypto_skcipher *cipher,
-				  const u8 *key, unsigned int keylen)
-{
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-	int err;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	err = verify_skcipher_des3_key(cipher, key);
-	if (err)
-		return err;
-
-	memcpy(ctx->key, key, keylen);
-	ctx->keylen = keylen;
-
-	ctx->updated = 0;
-	return 0;
-}
-
-static int cryp_blk_encrypt(struct skcipher_request *areq)
-{
-	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	ctx->config.algodir = CRYP_ALGORITHM_ENCRYPT;
-
-	/*
-	 * DMA does not work for DES due to a hw bug */
-	if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode))
-		return ablk_dma_crypt(areq);
-
-	/* For everything except DMA, we run the non DMA version. */
-	return ablk_crypt(areq);
-}
-
-static int cryp_blk_decrypt(struct skcipher_request *areq)
-{
-	struct crypto_skcipher *cipher = crypto_skcipher_reqtfm(areq);
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(cipher);
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	ctx->config.algodir = CRYP_ALGORITHM_DECRYPT;
-
-	/* DMA does not work for DES due to a hw bug */
-	if (cryp_mode == CRYP_MODE_DMA && mode_is_aes(ctx->config.algomode))
-		return ablk_dma_crypt(areq);
-
-	/* For everything except DMA, we run the non DMA version. */
-	return ablk_crypt(areq);
-}
-
-struct cryp_algo_template {
-	enum cryp_algo_mode algomode;
-	struct skcipher_alg skcipher;
-};
-
-static int cryp_init_tfm(struct crypto_skcipher *tfm)
-{
-	struct cryp_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
-	struct cryp_algo_template *cryp_alg = container_of(alg,
-			struct cryp_algo_template,
-			skcipher);
-
-	ctx->config.algomode = cryp_alg->algomode;
-	ctx->blocksize = crypto_skcipher_blocksize(tfm);
-
-	return 0;
-}
-
-static struct cryp_algo_template cryp_algs[] = {
-	{
-		.algomode = CRYP_ALGO_AES_ECB,
-		.skcipher = {
-			.base.cra_name		= "ecb(aes)",
-			.base.cra_driver_name	= "ecb-aes-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= AES_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= AES_MIN_KEY_SIZE,
-			.max_keysize		= AES_MAX_KEY_SIZE,
-			.setkey			= aes_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.init			= cryp_init_tfm,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_AES_CBC,
-		.skcipher = {
-			.base.cra_name		= "cbc(aes)",
-			.base.cra_driver_name	= "cbc-aes-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= AES_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= AES_MIN_KEY_SIZE,
-			.max_keysize		= AES_MAX_KEY_SIZE,
-			.setkey			= aes_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.init			= cryp_init_tfm,
-			.ivsize			= AES_BLOCK_SIZE,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_AES_CTR,
-		.skcipher = {
-			.base.cra_name		= "ctr(aes)",
-			.base.cra_driver_name	= "ctr-aes-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= 1,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= AES_MIN_KEY_SIZE,
-			.max_keysize		= AES_MAX_KEY_SIZE,
-			.setkey			= aes_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.init			= cryp_init_tfm,
-			.ivsize			= AES_BLOCK_SIZE,
-			.chunksize		= AES_BLOCK_SIZE,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_DES_ECB,
-		.skcipher = {
-			.base.cra_name		= "ecb(des)",
-			.base.cra_driver_name	= "ecb-des-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= DES_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= DES_KEY_SIZE,
-			.max_keysize		= DES_KEY_SIZE,
-			.setkey			= des_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.init			= cryp_init_tfm,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_TDES_ECB,
-		.skcipher = {
-			.base.cra_name		= "ecb(des3_ede)",
-			.base.cra_driver_name	= "ecb-des3_ede-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= DES3_EDE_KEY_SIZE,
-			.max_keysize		= DES3_EDE_KEY_SIZE,
-			.setkey			= des3_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.init			= cryp_init_tfm,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_DES_CBC,
-		.skcipher = {
-			.base.cra_name		= "cbc(des)",
-			.base.cra_driver_name	= "cbc-des-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= DES_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= DES_KEY_SIZE,
-			.max_keysize		= DES_KEY_SIZE,
-			.setkey			= des_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.ivsize			= DES_BLOCK_SIZE,
-			.init			= cryp_init_tfm,
-		}
-	},
-	{
-		.algomode = CRYP_ALGO_TDES_CBC,
-		.skcipher = {
-			.base.cra_name		= "cbc(des3_ede)",
-			.base.cra_driver_name	= "cbc-des3_ede-ux500",
-			.base.cra_priority	= 300,
-			.base.cra_flags		= CRYPTO_ALG_ASYNC,
-			.base.cra_blocksize	= DES3_EDE_BLOCK_SIZE,
-			.base.cra_ctxsize	= sizeof(struct cryp_ctx),
-			.base.cra_alignmask	= 3,
-			.base.cra_module	= THIS_MODULE,
-
-			.min_keysize		= DES3_EDE_KEY_SIZE,
-			.max_keysize		= DES3_EDE_KEY_SIZE,
-			.setkey			= des3_skcipher_setkey,
-			.encrypt		= cryp_blk_encrypt,
-			.decrypt		= cryp_blk_decrypt,
-			.ivsize			= DES3_EDE_BLOCK_SIZE,
-			.init			= cryp_init_tfm,
-		}
-	}
-};
-
-/**
- * cryp_algs_register_all -
- */
-static int cryp_algs_register_all(void)
-{
-	int ret;
-	int i;
-	int count;
-
-	pr_debug("[%s]", __func__);
-
-	for (i = 0; i < ARRAY_SIZE(cryp_algs); i++) {
-		ret = crypto_register_skcipher(&cryp_algs[i].skcipher);
-		if (ret) {
-			count = i;
-			pr_err("[%s] alg registration failed",
-					cryp_algs[i].skcipher.base.cra_driver_name);
-			goto unreg;
-		}
-	}
-	return 0;
-unreg:
-	for (i = 0; i < count; i++)
-		crypto_unregister_skcipher(&cryp_algs[i].skcipher);
-	return ret;
-}
-
-/**
- * cryp_algs_unregister_all -
- */
-static void cryp_algs_unregister_all(void)
-{
-	int i;
-
-	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-
-	for (i = 0; i < ARRAY_SIZE(cryp_algs); i++)
-		crypto_unregister_skcipher(&cryp_algs[i].skcipher);
-}
-
-static int ux500_cryp_probe(struct platform_device *pdev)
-{
-	int ret;
-	struct resource *res;
-	struct cryp_device_data *device_data;
-	struct cryp_protection_config prot = {
-		.privilege_access = CRYP_STATE_ENABLE
-	};
-	struct device *dev = &pdev->dev;
-
-	dev_dbg(dev, "[%s]", __func__);
-	device_data = devm_kzalloc(dev, sizeof(*device_data), GFP_KERNEL);
-	if (!device_data) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	device_data->dev = dev;
-	device_data->current_ctx = NULL;
-
-	/* Grab the DMA configuration from platform data. */
-	mem_to_engine = &((struct cryp_platform_data *)
-			 dev->platform_data)->mem_to_engine;
-	engine_to_mem = &((struct cryp_platform_data *)
-			 dev->platform_data)->engine_to_mem;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(dev, "[%s]: platform_get_resource() failed",
-				__func__);
-		ret = -ENODEV;
-		goto out;
-	}
-
-	device_data->phybase = res->start;
-	device_data->base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(device_data->base)) {
-		ret = PTR_ERR(device_data->base);
-		goto out;
-	}
-
-	spin_lock_init(&device_data->ctx_lock);
-	spin_lock_init(&device_data->power_state_spinlock);
-
-	/* Enable power for CRYP hardware block */
-	device_data->pwr_regulator = regulator_get(&pdev->dev, "v-ape");
-	if (IS_ERR(device_data->pwr_regulator)) {
-		dev_err(dev, "[%s]: could not get cryp regulator", __func__);
-		ret = PTR_ERR(device_data->pwr_regulator);
-		device_data->pwr_regulator = NULL;
-		goto out;
-	}
-
-	/* Enable the clk for CRYP hardware block */
-	device_data->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(device_data->clk)) {
-		dev_err(dev, "[%s]: clk_get() failed!", __func__);
-		ret = PTR_ERR(device_data->clk);
-		goto out_regulator;
-	}
-
-	ret = clk_prepare(device_data->clk);
-	if (ret) {
-		dev_err(dev, "[%s]: clk_prepare() failed!", __func__);
-		goto out_regulator;
-	}
-
-	/* Enable device power (and clock) */
-	ret = cryp_enable_power(device_data->dev, device_data, false);
-	if (ret) {
-		dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__);
-		goto out_clk_unprepare;
-	}
-
-	if (cryp_check(device_data)) {
-		dev_err(dev, "[%s]: cryp_check() failed!", __func__);
-		ret = -EINVAL;
-		goto out_power;
-	}
-
-	if (cryp_configure_protection(device_data, &prot)) {
-		dev_err(dev, "[%s]: cryp_configure_protection() failed!",
-			__func__);
-		ret = -EINVAL;
-		goto out_power;
-	}
-
-	device_data->irq = platform_get_irq(pdev, 0);
-	if (device_data->irq <= 0) {
-		ret = device_data->irq ? device_data->irq : -ENXIO;
-		goto out_power;
-	}
-
-	ret = devm_request_irq(&pdev->dev, device_data->irq,
-			       cryp_interrupt_handler, 0, "cryp1", device_data);
-	if (ret) {
-		dev_err(dev, "[%s]: Unable to request IRQ", __func__);
-		goto out_power;
-	}
-
-	if (cryp_mode == CRYP_MODE_DMA)
-		cryp_dma_setup_channel(device_data, dev);
-
-	platform_set_drvdata(pdev, device_data);
-
-	/* Put the new device into the device list... */
-	klist_add_tail(&device_data->list_node, &driver_data.device_list);
-
-	/* ... and signal that a new device is available. */
-	up(&driver_data.device_allocation);
-
-	atomic_set(&session_id, 1);
-
-	ret = cryp_algs_register_all();
-	if (ret) {
-		dev_err(dev, "[%s]: cryp_algs_register_all() failed!",
-			__func__);
-		goto out_power;
-	}
-
-	dev_info(dev, "successfully registered\n");
-
-	return 0;
-
-out_power:
-	cryp_disable_power(device_data->dev, device_data, false);
-
-out_clk_unprepare:
-	clk_unprepare(device_data->clk);
-
-out_regulator:
-	regulator_put(device_data->pwr_regulator);
-
-out:
-	return ret;
-}
-
-static int ux500_cryp_remove(struct platform_device *pdev)
-{
-	struct cryp_device_data *device_data;
-
-	dev_dbg(&pdev->dev, "[%s]", __func__);
-	device_data = platform_get_drvdata(pdev);
-	if (!device_data) {
-		dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!",
-			__func__);
-		return -ENOMEM;
-	}
-
-	/* Try to decrease the number of available devices. */
-	if (down_trylock(&driver_data.device_allocation))
-		return -EBUSY;
-
-	/* Check that the device is free */
-	spin_lock(&device_data->ctx_lock);
-	/* current_ctx allocates a device, NULL = unallocated */
-	if (device_data->current_ctx) {
-		/* The device is busy */
-		spin_unlock(&device_data->ctx_lock);
-		/* Return the device to the pool. */
-		up(&driver_data.device_allocation);
-		return -EBUSY;
-	}
-
-	spin_unlock(&device_data->ctx_lock);
-
-	/* Remove the device from the list */
-	if (klist_node_attached(&device_data->list_node))
-		klist_remove(&device_data->list_node);
-
-	/* If this was the last device, remove the services */
-	if (list_empty(&driver_data.device_list.k_list))
-		cryp_algs_unregister_all();
-
-	if (cryp_disable_power(&pdev->dev, device_data, false))
-		dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed",
-			__func__);
-
-	clk_unprepare(device_data->clk);
-	regulator_put(device_data->pwr_regulator);
-
-	return 0;
-}
-
-static void ux500_cryp_shutdown(struct platform_device *pdev)
-{
-	struct cryp_device_data *device_data;
-
-	dev_dbg(&pdev->dev, "[%s]", __func__);
-
-	device_data = platform_get_drvdata(pdev);
-	if (!device_data) {
-		dev_err(&pdev->dev, "[%s]: platform_get_drvdata() failed!",
-			__func__);
-		return;
-	}
-
-	/* Check that the device is free */
-	spin_lock(&device_data->ctx_lock);
-	/* current_ctx allocates a device, NULL = unallocated */
-	if (!device_data->current_ctx) {
-		if (down_trylock(&driver_data.device_allocation))
-			dev_dbg(&pdev->dev, "[%s]: Cryp still in use!"
-				"Shutting down anyway...", __func__);
-		/**
-		 * (Allocate the device)
-		 * Need to set this to non-null (dummy) value,
-		 * to avoid usage if context switching.
-		 */
-		device_data->current_ctx++;
-	}
-	spin_unlock(&device_data->ctx_lock);
-
-	/* Remove the device from the list */
-	if (klist_node_attached(&device_data->list_node))
-		klist_remove(&device_data->list_node);
-
-	/* If this was the last device, remove the services */
-	if (list_empty(&driver_data.device_list.k_list))
-		cryp_algs_unregister_all();
-
-	if (cryp_disable_power(&pdev->dev, device_data, false))
-		dev_err(&pdev->dev, "[%s]: cryp_disable_power() failed",
-			__func__);
-
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int ux500_cryp_suspend(struct device *dev)
-{
-	int ret;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct cryp_device_data *device_data;
-	struct cryp_ctx *temp_ctx = NULL;
-
-	dev_dbg(dev, "[%s]", __func__);
-
-	/* Handle state? */
-	device_data = platform_get_drvdata(pdev);
-	if (!device_data) {
-		dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__);
-		return -ENOMEM;
-	}
-
-	disable_irq(device_data->irq);
-
-	spin_lock(&device_data->ctx_lock);
-	if (!device_data->current_ctx)
-		device_data->current_ctx++;
-	spin_unlock(&device_data->ctx_lock);
-
-	if (device_data->current_ctx == ++temp_ctx) {
-		if (down_interruptible(&driver_data.device_allocation))
-			dev_dbg(dev, "[%s]: down_interruptible() failed",
-				__func__);
-		ret = cryp_disable_power(dev, device_data, false);
-
-	} else
-		ret = cryp_disable_power(dev, device_data, true);
-
-	if (ret)
-		dev_err(dev, "[%s]: cryp_disable_power()", __func__);
-
-	return ret;
-}
-
-static int ux500_cryp_resume(struct device *dev)
-{
-	int ret = 0;
-	struct platform_device *pdev = to_platform_device(dev);
-	struct cryp_device_data *device_data;
-	struct cryp_ctx *temp_ctx = NULL;
-
-	dev_dbg(dev, "[%s]", __func__);
-
-	device_data = platform_get_drvdata(pdev);
-	if (!device_data) {
-		dev_err(dev, "[%s]: platform_get_drvdata() failed!", __func__);
-		return -ENOMEM;
-	}
-
-	spin_lock(&device_data->ctx_lock);
-	if (device_data->current_ctx == ++temp_ctx)
-		device_data->current_ctx = NULL;
-	spin_unlock(&device_data->ctx_lock);
-
-
-	if (!device_data->current_ctx)
-		up(&driver_data.device_allocation);
-	else
-		ret = cryp_enable_power(dev, device_data, true);
-
-	if (ret)
-		dev_err(dev, "[%s]: cryp_enable_power() failed!", __func__);
-	else
-		enable_irq(device_data->irq);
-
-	return ret;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(ux500_cryp_pm, ux500_cryp_suspend, ux500_cryp_resume);
-
-static const struct of_device_id ux500_cryp_match[] = {
-	{ .compatible = "stericsson,ux500-cryp" },
-	{ },
-};
-MODULE_DEVICE_TABLE(of, ux500_cryp_match);
-
-static struct platform_driver cryp_driver = {
-	.probe  = ux500_cryp_probe,
-	.remove = ux500_cryp_remove,
-	.shutdown = ux500_cryp_shutdown,
-	.driver = {
-		.name  = "cryp1",
-		.of_match_table = ux500_cryp_match,
-		.pm    = &ux500_cryp_pm,
-	}
-};
-
-static int __init ux500_cryp_mod_init(void)
-{
-	pr_debug("[%s] is called!", __func__);
-	klist_init(&driver_data.device_list, NULL, NULL);
-	/* Initialize the semaphore to 0 devices (locked state) */
-	sema_init(&driver_data.device_allocation, 0);
-	return platform_driver_register(&cryp_driver);
-}
-
-static void __exit ux500_cryp_mod_fini(void)
-{
-	pr_debug("[%s] is called!", __func__);
-	platform_driver_unregister(&cryp_driver);
-}
-
-module_init(ux500_cryp_mod_init);
-module_exit(ux500_cryp_mod_fini);
-
-module_param(cryp_mode, int, 0);
-
-MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine.");
-MODULE_ALIAS_CRYPTO("aes-all");
-MODULE_ALIAS_CRYPTO("des-all");
-
-MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/ux500/cryp/cryp_irq.c b/drivers/crypto/ux500/cryp/cryp_irq.c
deleted file mode 100644
index 6d2f07bec98a7..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp_irq.c
+++ /dev/null
@@ -1,45 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- */
-
-#include <linux/kernel.h>
-#include <linux/bitmap.h>
-#include <linux/device.h>
-
-#include "cryp.h"
-#include "cryp_p.h"
-#include "cryp_irq.h"
-#include "cryp_irqp.h"
-
-void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src)
-{
-	u32 i;
-
-	dev_dbg(device_data->dev, "[%s]", __func__);
-
-	i = readl_relaxed(&device_data->base->imsc);
-	i = i | irq_src;
-	writel_relaxed(i, &device_data->base->imsc);
-}
-
-void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src)
-{
-	u32 i;
-
-	dev_dbg(device_data->dev, "[%s]", __func__);
-
-	i = readl_relaxed(&device_data->base->imsc);
-	i = i & ~irq_src;
-	writel_relaxed(i, &device_data->base->imsc);
-}
-
-bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src)
-{
-	return (readl_relaxed(&device_data->base->mis) & irq_src) > 0;
-}
diff --git a/drivers/crypto/ux500/cryp/cryp_irq.h b/drivers/crypto/ux500/cryp/cryp_irq.h
deleted file mode 100644
index da90029ea1412..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp_irq.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- */
-
-#ifndef _CRYP_IRQ_H_
-#define _CRYP_IRQ_H_
-
-#include "cryp.h"
-
-enum cryp_irq_src_id {
-	CRYP_IRQ_SRC_INPUT_FIFO = 0x1,
-	CRYP_IRQ_SRC_OUTPUT_FIFO = 0x2,
-	CRYP_IRQ_SRC_ALL = 0x3
-};
-
-/*
- * M0 Funtions
- */
-void cryp_enable_irq_src(struct cryp_device_data *device_data, u32 irq_src);
-
-void cryp_disable_irq_src(struct cryp_device_data *device_data, u32 irq_src);
-
-bool cryp_pending_irq_src(struct cryp_device_data *device_data, u32 irq_src);
-
-#endif				/* _CRYP_IRQ_H_ */
diff --git a/drivers/crypto/ux500/cryp/cryp_irqp.h b/drivers/crypto/ux500/cryp/cryp_irqp.h
deleted file mode 100644
index 4981a3f461e5e..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp_irqp.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- */
-
-#ifndef __CRYP_IRQP_H_
-#define __CRYP_IRQP_H_
-
-#include "cryp_irq.h"
-
-/*
- *
- * CRYP Registers - Offset mapping
- *     +-----------------+
- * 00h | CRYP_CR         |  Configuration register
- *     +-----------------+
- * 04h | CRYP_SR         |  Status register
- *     +-----------------+
- * 08h | CRYP_DIN        |  Data In register
- *     +-----------------+
- * 0ch | CRYP_DOUT       |  Data out register
- *     +-----------------+
- * 10h | CRYP_DMACR      |  DMA control register
- *     +-----------------+
- * 14h | CRYP_IMSC       |  IMSC
- *     +-----------------+
- * 18h | CRYP_RIS        |  Raw interrupt status
- *     +-----------------+
- * 1ch | CRYP_MIS        |  Masked interrupt status.
- *     +-----------------+
- *       Key registers
- *       IVR registers
- *       Peripheral
- *       Cell IDs
- *
- *       Refer data structure for other register map
- */
-
-/**
- * struct cryp_register
- * @cr			- Configuration register
- * @status		- Status register
- * @din			- Data input register
- * @din_size		- Data input size register
- * @dout		- Data output register
- * @dout_size		- Data output size register
- * @dmacr		- Dma control register
- * @imsc		- Interrupt mask set/clear register
- * @ris			- Raw interrupt status
- * @mis			- Masked interrupt statu register
- * @key_1_l		- Key register 1 L
- * @key_1_r		- Key register 1 R
- * @key_2_l		- Key register 2 L
- * @key_2_r		- Key register 2 R
- * @key_3_l		- Key register 3 L
- * @key_3_r		- Key register 3 R
- * @key_4_l		- Key register 4 L
- * @key_4_r		- Key register 4 R
- * @init_vect_0_l	- init vector 0 L
- * @init_vect_0_r	- init vector 0 R
- * @init_vect_1_l	- init vector 1 L
- * @init_vect_1_r	- init vector 1 R
- * @cryp_unused1	- unused registers
- * @itcr		- Integration test control register
- * @itip		- Integration test input register
- * @itop		- Integration test output register
- * @cryp_unused2	- unused registers
- * @periphId0		- FE0 CRYP Peripheral Identication Register
- * @periphId1		- FE4
- * @periphId2		- FE8
- * @periphId3		- FEC
- * @pcellId0		- FF0  CRYP PCell Identication Register
- * @pcellId1		- FF4
- * @pcellId2		- FF8
- * @pcellId3		- FFC
- */
-struct cryp_register {
-	u32 cr;			/* Configuration register   */
-	u32 sr;			/* Status register          */
-	u32 din;		/* Data input register      */
-	u32 din_size;		/* Data input size register */
-	u32 dout;		/* Data output register     */
-	u32 dout_size;		/* Data output size register */
-	u32 dmacr;		/* Dma control register     */
-	u32 imsc;		/* Interrupt mask set/clear register */
-	u32 ris;		/* Raw interrupt status             */
-	u32 mis;		/* Masked interrupt statu register  */
-
-	u32 key_1_l;		/*Key register 1 L */
-	u32 key_1_r;		/*Key register 1 R */
-	u32 key_2_l;		/*Key register 2 L */
-	u32 key_2_r;		/*Key register 2 R */
-	u32 key_3_l;		/*Key register 3 L */
-	u32 key_3_r;		/*Key register 3 R */
-	u32 key_4_l;		/*Key register 4 L */
-	u32 key_4_r;		/*Key register 4 R */
-
-	u32 init_vect_0_l;	/*init vector 0 L */
-	u32 init_vect_0_r;	/*init vector 0 R */
-	u32 init_vect_1_l;	/*init vector 1 L */
-	u32 init_vect_1_r;	/*init vector 1 R */
-
-	u32 cryp_unused1[(0x80 - 0x58) / sizeof(u32)];	/* unused registers */
-	u32 itcr;		/*Integration test control register */
-	u32 itip;		/*Integration test input register */
-	u32 itop;		/*Integration test output register */
-	u32 cryp_unused2[(0xFE0 - 0x8C) / sizeof(u32)];	/* unused registers */
-
-	u32 periphId0;		/* FE0  CRYP Peripheral Identication Register */
-	u32 periphId1;		/* FE4 */
-	u32 periphId2;		/* FE8 */
-	u32 periphId3;		/* FEC */
-
-	u32 pcellId0;		/* FF0  CRYP PCell Identication Register */
-	u32 pcellId1;		/* FF4 */
-	u32 pcellId2;		/* FF8 */
-	u32 pcellId3;		/* FFC */
-};
-
-#endif
diff --git a/drivers/crypto/ux500/cryp/cryp_p.h b/drivers/crypto/ux500/cryp/cryp_p.h
deleted file mode 100644
index 60b47fe4de35d..0000000000000
--- a/drivers/crypto/ux500/cryp/cryp_p.h
+++ /dev/null
@@ -1,122 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) ST-Ericsson SA 2010
- * Author: Shujuan Chen <shujuan.chen@stericsson.com> for ST-Ericsson.
- * Author: Jonas Linde <jonas.linde@stericsson.com> for ST-Ericsson.
- * Author: Joakim Bech <joakim.xx.bech@stericsson.com> for ST-Ericsson.
- * Author: Berne Hebark <berne.herbark@stericsson.com> for ST-Ericsson.
- * Author: Niklas Hernaeus <niklas.hernaeus@stericsson.com> for ST-Ericsson.
- */
-
-#ifndef _CRYP_P_H_
-#define _CRYP_P_H_
-
-#include <linux/io.h>
-#include <linux/bitops.h>
-
-#include "cryp.h"
-#include "cryp_irqp.h"
-
-/*
- * Generic Macros
- */
-#define CRYP_SET_BITS(reg_name, mask) \
-	writel_relaxed((readl_relaxed(reg_name) | mask), reg_name)
-
-#define CRYP_WRITE_BIT(reg_name, val, mask) \
-	writel_relaxed(((readl_relaxed(reg_name) & ~(mask)) |\
-			((val) & (mask))), reg_name)
-
-#define CRYP_TEST_BITS(reg_name, val) \
-	(readl_relaxed(reg_name) & (val))
-
-#define CRYP_PUT_BITS(reg, val, shift, mask) \
-	writel_relaxed(((readl_relaxed(reg) & ~(mask)) | \
-		(((u32)val << shift) & (mask))), reg)
-
-/*
- * CRYP specific Macros
- */
-#define CRYP_PERIPHERAL_ID0		0xE3
-#define CRYP_PERIPHERAL_ID1		0x05
-
-#define CRYP_PERIPHERAL_ID2_DB8500	0x28
-#define CRYP_PERIPHERAL_ID3		0x00
-
-#define CRYP_PCELL_ID0			0x0D
-#define CRYP_PCELL_ID1			0xF0
-#define CRYP_PCELL_ID2			0x05
-#define CRYP_PCELL_ID3			0xB1
-
-/*
- * CRYP register default values
- */
-#define MAX_DEVICE_SUPPORT		2
-
-/* Priv set, keyrden set and datatype 8bits swapped set as default. */
-#define CRYP_CR_DEFAULT			0x0482
-#define CRYP_DMACR_DEFAULT		0x0
-#define CRYP_IMSC_DEFAULT		0x0
-#define CRYP_DIN_DEFAULT		0x0
-#define CRYP_DOUT_DEFAULT		0x0
-#define CRYP_KEY_DEFAULT		0x0
-#define CRYP_INIT_VECT_DEFAULT		0x0
-
-/*
- * CRYP Control register specific mask
- */
-#define CRYP_CR_SECURE_MASK		BIT(0)
-#define CRYP_CR_PRLG_MASK		BIT(1)
-#define CRYP_CR_ALGODIR_MASK		BIT(2)
-#define CRYP_CR_ALGOMODE_MASK		(BIT(5) | BIT(4) | BIT(3))
-#define CRYP_CR_DATATYPE_MASK		(BIT(7) | BIT(6))
-#define CRYP_CR_KEYSIZE_MASK		(BIT(9) | BIT(8))
-#define CRYP_CR_KEYRDEN_MASK		BIT(10)
-#define CRYP_CR_KSE_MASK		BIT(11)
-#define CRYP_CR_START_MASK		BIT(12)
-#define CRYP_CR_INIT_MASK		BIT(13)
-#define CRYP_CR_FFLUSH_MASK		BIT(14)
-#define CRYP_CR_CRYPEN_MASK		BIT(15)
-#define CRYP_CR_CONTEXT_SAVE_MASK	(CRYP_CR_SECURE_MASK |\
-					 CRYP_CR_PRLG_MASK |\
-					 CRYP_CR_ALGODIR_MASK |\
-					 CRYP_CR_ALGOMODE_MASK |\
-					 CRYP_CR_KEYSIZE_MASK |\
-					 CRYP_CR_KEYRDEN_MASK |\
-					 CRYP_CR_DATATYPE_MASK)
-
-
-#define CRYP_SR_INFIFO_READY_MASK	(BIT(0) | BIT(1))
-#define CRYP_SR_IFEM_MASK		BIT(0)
-#define CRYP_SR_BUSY_MASK		BIT(4)
-
-/*
- * Bit position used while setting bits in register
- */
-#define CRYP_CR_PRLG_POS		1
-#define CRYP_CR_ALGODIR_POS		2
-#define CRYP_CR_ALGOMODE_POS		3
-#define CRYP_CR_DATATYPE_POS		6
-#define CRYP_CR_KEYSIZE_POS		8
-#define CRYP_CR_KEYRDEN_POS		10
-#define CRYP_CR_KSE_POS			11
-#define CRYP_CR_START_POS		12
-#define CRYP_CR_INIT_POS		13
-#define CRYP_CR_CRYPEN_POS		15
-
-#define CRYP_SR_BUSY_POS		4
-
-/*
- * CRYP PCRs------PC_NAND control register
- * BIT_MASK
- */
-#define CRYP_DMA_REQ_MASK		(BIT(1) | BIT(0))
-#define CRYP_DMA_REQ_MASK_POS		0
-
-
-struct cryp_system_context {
-	/* CRYP Register structure */
-	struct cryp_register *p_cryp_reg[MAX_DEVICE_SUPPORT];
-};
-
-#endif
-- 
GitLab


From fd3dc56253acbe9c641a66d312d8393cd55eb04c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Fri, 9 Dec 2022 10:52:47 -0500
Subject: [PATCH 3589/4122] ftrace/x86: Add back ftrace_expected for ftrace bug
 reports

After someone reported a bug report with a failed modification due to the
expected value not matching what was found, it came to my attention that
the ftrace_expected is no longer set when that happens. This makes for
debugging the issue a bit more difficult.

Set ftrace_expected to the expected code before calling ftrace_bug, so
that it shows what was expected and why it failed.

Link: https://lore.kernel.org/all/CA+wXwBQ-VhK+hpBtYtyZP-NiX4g8fqRRWithFOHQW-0coQ3vLg@mail.gmail.com/
Link: https://lore.kernel.org/linux-trace-kernel/20221209105247.01d4e51d@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "x86@kernel.org" <x86@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: stable@vger.kernel.org
Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()")
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bd165004776d9..e07234ec7e237 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -217,7 +217,9 @@ void ftrace_replace_code(int enable)
 
 		ret = ftrace_verify_code(rec->ip, old);
 		if (ret) {
+			ftrace_expected = old;
 			ftrace_bug(ret, rec);
+			ftrace_expected = NULL;
 			return;
 		}
 	}
-- 
GitLab


From c9b8a83a8f2dca9f82288a621595a6a5970cdc5e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 7 Dec 2022 16:44:41 -0400
Subject: [PATCH 3590/4122] iommufd: Fix comment typos

Repair some typos in comments that were noticed late in the review
cycle.

Fixes: f394576eb11d ("iommufd: PFN handling for iopt_pages")
Link: https://lore.kernel.org/r/1-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reported-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/io_pagetable.h | 2 +-
 drivers/iommu/iommufd/pages.c        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/iommu/iommufd/io_pagetable.h b/drivers/iommu/iommufd/io_pagetable.h
index 83e7c175f2a27..0ec3509b7e339 100644
--- a/drivers/iommu/iommufd/io_pagetable.h
+++ b/drivers/iommu/iommufd/io_pagetable.h
@@ -33,7 +33,7 @@ struct iommu_domain;
  *
  * The io_pagetable::iova_rwsem protects node
  * The iopt_pages::mutex protects pages_node
- * iopt and immu_prot are immutable
+ * iopt and iommu_prot are immutable
  * The pages::mutex protects num_accesses
  */
 struct iopt_area {
diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index 429fa3b0a239c..fccdba782cb69 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -342,7 +342,7 @@ static void batch_destroy(struct pfn_batch *batch, void *backup)
 		kfree(batch->pfns);
 }
 
-/* true if the pfn could be added, false otherwise */
+/* true if the pfn was added, false otherwise */
 static bool batch_add_pfn(struct pfn_batch *batch, unsigned long pfn)
 {
 	const unsigned int MAX_NPFNS = type_max(typeof(*batch->npfns));
@@ -418,7 +418,7 @@ static struct page **raw_pages_from_domain(struct iommu_domain *domain,
 	return out_pages;
 }
 
-/* Continues reading a domain until we reach a discontiguity in the pfns. */
+/* Continues reading a domain until we reach a discontinuity in the pfns. */
 static void batch_from_domain_continue(struct pfn_batch *batch,
 				       struct iommu_domain *domain,
 				       struct iopt_area *area,
-- 
GitLab


From a26fa392068d1dcdf781397b7a7dd908dd68f030 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 7 Dec 2022 16:44:42 -0400
Subject: [PATCH 3591/4122] iommufd: Improve a few unclear bits of code

Correct a few items noticed late in review:

 - We should assert that the math in batch_clear_carry() doesn't underflow

 - user->locked should be -1 not 0 sicne we just did mmput

 - npages should not have been recalculated, it already has that value

No functional change.

Fixes: 8d160cd4d506 ("iommufd: Algorithms for PFN storage")
Link: https://lore.kernel.org/r/2-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Eric Auger <eric.auger@redhat.com>
Reported-by: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/pages.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/iommufd/pages.c b/drivers/iommu/iommufd/pages.c
index fccdba782cb69..c771772296485 100644
--- a/drivers/iommu/iommufd/pages.c
+++ b/drivers/iommu/iommufd/pages.c
@@ -289,6 +289,10 @@ static void batch_clear_carry(struct pfn_batch *batch, unsigned int keep_pfns)
 	if (!keep_pfns)
 		return batch_clear(batch);
 
+	if (IS_ENABLED(CONFIG_IOMMUFD_TEST))
+		WARN_ON(!batch->end ||
+			batch->npfns[batch->end - 1] < keep_pfns);
+
 	batch->total_pfns = keep_pfns;
 	batch->npfns[0] = keep_pfns;
 	batch->pfns[0] = batch->pfns[batch->end - 1] +
@@ -723,7 +727,7 @@ static void pfn_reader_user_destroy(struct pfn_reader_user *user,
 			mmap_read_unlock(pages->source_mm);
 		if (pages->source_mm != current->mm)
 			mmput(pages->source_mm);
-		user->locked = 0;
+		user->locked = -1;
 	}
 
 	kfree(user->upages);
@@ -810,7 +814,6 @@ static int incr_user_locked_vm(struct iopt_pages *pages, unsigned long npages)
 
 	lock_limit = task_rlimit(pages->source_task, RLIMIT_MEMLOCK) >>
 		     PAGE_SHIFT;
-	npages = pages->npinned - pages->last_npinned;
 	do {
 		cur_pages = atomic_long_read(&pages->source_user->locked_vm);
 		new_pages = cur_pages + npages;
-- 
GitLab


From d6c55c0a20e5059abdde81713ddf6324a946eb3c Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Wed, 7 Dec 2022 16:44:43 -0400
Subject: [PATCH 3592/4122] iommufd: Change the order of MSI setup

Eric points out this is wrong for the rare case of someone using
allow_unsafe_interrupts on ARM. We always have to setup the MSI window in
the domain if the iommu driver asks for it.

Move the iommu_get_msi_cookie() setup to the top of the function and
always do it, regardless of the security mode. Add checks to
iommufd_device_setup_msi() to ensure the driver is not doing something
incomprehensible. No current driver will set both a HW and SW MSI window,
or have more than one SW MSI window.

Fixes: e8d57210035b ("iommufd: Add kAPI toward external drivers for physical devices")
Link: https://lore.kernel.org/r/3-v1-0362a1a1c034+98-iommufd_fixes1_jgg@nvidia.com
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reported-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/iommufd/device.c       | 56 +++++++++++++---------------
 drivers/iommu/iommufd/io_pagetable.c | 24 +++++++-----
 2 files changed, 39 insertions(+), 41 deletions(-)

diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index dd2a415b603e3..d81f93a321afc 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -139,19 +139,11 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
 	int rc;
 
 	/*
-	 * IOMMU_CAP_INTR_REMAP means that the platform is isolating MSI, and it
-	 * creates the MSI window by default in the iommu domain. Nothing
-	 * further to do.
-	 */
-	if (device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP))
-		return 0;
-
-	/*
-	 * On ARM systems that set the global IRQ_DOMAIN_FLAG_MSI_REMAP every
-	 * allocated iommu_domain will block interrupts by default and this
-	 * special flow is needed to turn them back on. iommu_dma_prepare_msi()
-	 * will install pages into our domain after request_irq() to make this
-	 * work.
+	 * If the IOMMU driver gives a IOMMU_RESV_SW_MSI then it is asking us to
+	 * call iommu_get_msi_cookie() on its behalf. This is necessary to setup
+	 * the MSI window so iommu_dma_prepare_msi() can install pages into our
+	 * domain after request_irq(). If it is not done interrupts will not
+	 * work on this domain.
 	 *
 	 * FIXME: This is conceptually broken for iommufd since we want to allow
 	 * userspace to change the domains, eg switch from an identity IOAS to a
@@ -159,33 +151,35 @@ static int iommufd_device_setup_msi(struct iommufd_device *idev,
 	 * matches what the IRQ layer actually expects in a newly created
 	 * domain.
 	 */
-	if (irq_domain_check_msi_remap()) {
-		if (WARN_ON(!sw_msi_start))
-			return -EPERM;
+	if (sw_msi_start != PHYS_ADDR_MAX && !hwpt->msi_cookie) {
+		rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
+		if (rc)
+			return rc;
+
 		/*
 		 * iommu_get_msi_cookie() can only be called once per domain,
 		 * it returns -EBUSY on later calls.
 		 */
-		if (hwpt->msi_cookie)
-			return 0;
-		rc = iommu_get_msi_cookie(hwpt->domain, sw_msi_start);
-		if (rc)
-			return rc;
 		hwpt->msi_cookie = true;
-		return 0;
 	}
 
 	/*
-	 * Otherwise the platform has a MSI window that is not isolated. For
-	 * historical compat with VFIO allow a module parameter to ignore the
-	 * insecurity.
+	 * For historical compat with VFIO the insecure interrupt path is
+	 * allowed if the module parameter is set. Insecure means that a MemWr
+	 * operation from the device (eg a simple DMA) cannot trigger an
+	 * interrupt outside this iommufd context.
 	 */
-	if (!allow_unsafe_interrupts)
-		return -EPERM;
+	if (!device_iommu_capable(idev->dev, IOMMU_CAP_INTR_REMAP) &&
+	    !irq_domain_check_msi_remap()) {
+		if (!allow_unsafe_interrupts)
+			return -EPERM;
 
-	dev_warn(
-		idev->dev,
-		"MSI interrupt window cannot be isolated by the IOMMU, this platform is insecure. Use the \"allow_unsafe_interrupts\" module parameter to override\n");
+		dev_warn(
+			idev->dev,
+			"MSI interrupts are not secure, they cannot be isolated by the platform. "
+			"Check that platform features like interrupt remapping are enabled. "
+			"Use the \"allow_unsafe_interrupts\" module parameter to override\n");
+	}
 	return 0;
 }
 
@@ -203,7 +197,7 @@ static bool iommufd_hw_pagetable_has_group(struct iommufd_hw_pagetable *hwpt,
 static int iommufd_device_do_attach(struct iommufd_device *idev,
 				    struct iommufd_hw_pagetable *hwpt)
 {
-	phys_addr_t sw_msi_start = 0;
+	phys_addr_t sw_msi_start = PHYS_ADDR_MAX;
 	int rc;
 
 	mutex_lock(&hwpt->devices_lock);
diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c
index 3467cea795684..e0ae72b9e67f8 100644
--- a/drivers/iommu/iommufd/io_pagetable.c
+++ b/drivers/iommu/iommufd/io_pagetable.c
@@ -1170,6 +1170,8 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 	struct iommu_resv_region *resv;
 	struct iommu_resv_region *tmp;
 	LIST_HEAD(group_resv_regions);
+	unsigned int num_hw_msi = 0;
+	unsigned int num_sw_msi = 0;
 	int rc;
 
 	down_write(&iopt->iova_rwsem);
@@ -1181,23 +1183,25 @@ int iopt_table_enforce_group_resv_regions(struct io_pagetable *iopt,
 		if (resv->type == IOMMU_RESV_DIRECT_RELAXABLE)
 			continue;
 
-		/*
-		 * The presence of any 'real' MSI regions should take precedence
-		 * over the software-managed one if the IOMMU driver happens to
-		 * advertise both types.
-		 */
-		if (sw_msi_start && resv->type == IOMMU_RESV_MSI) {
-			*sw_msi_start = 0;
-			sw_msi_start = NULL;
-		}
-		if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI)
+		if (sw_msi_start && resv->type == IOMMU_RESV_MSI)
+			num_hw_msi++;
+		if (sw_msi_start && resv->type == IOMMU_RESV_SW_MSI) {
 			*sw_msi_start = resv->start;
+			num_sw_msi++;
+		}
 
 		rc = iopt_reserve_iova(iopt, resv->start,
 				       resv->length - 1 + resv->start, device);
 		if (rc)
 			goto out_reserved;
 	}
+
+	/* Drivers must offer sane combinations of regions */
+	if (WARN_ON(num_sw_msi && num_hw_msi) || WARN_ON(num_sw_msi > 1)) {
+		rc = -EINVAL;
+		goto out_reserved;
+	}
+
 	rc = 0;
 	goto out_free_resv;
 
-- 
GitLab


From 3282a549cf9b300e2d1b007925ed007ab24e4131 Mon Sep 17 00:00:00 2001
From: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Date: Fri, 9 Dec 2022 13:59:26 +0900
Subject: [PATCH 3593/4122] RDMA/rxe: Fix oops with zero length reads

The commit 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing") causes
a kernel crash. If responder get a zero-byte RDMA Read request,
qp->resp.mr is not set in check_rkey() (see IBA C9-88). The mr is NULL in
this case, and a NULL pointer dereference occurs as shown below.

 BUG: kernel NULL pointer dereference, address: 0000000000000010
 #PF: supervisor write access in kernel mode
 #PF: error_code(0x0002) - not-present page
 PGD 0 P4D 0
 Oops: 0002 [#1] PREEMPT SMP PTI
 CPU: 2 PID: 3622 Comm: python3 Kdump: loaded Not tainted 6.1.0-rc3+ #34
 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014
 RIP: 0010:__rxe_put+0xc/0x60 [rdma_rxe]
 Code: cc cc cc 31 f6 e8 64 36 1b d3 41 b8 01 00 00 00 44 89 c0 c3 cc cc cc cc 41 89 c0 eb c1 90 0f 1f 44 00 00 41 54 b8 ff ff ff ff <f0> 0f c1 47 10 83 f8 01 74 11 45 31 e4 85 c0 7e 20 44 89 e0 41 5c
 RSP: 0018:ffffb27bc012ce78 EFLAGS: 00010246
 RAX: 00000000ffffffff RBX: ffff9790857b0580 RCX: 0000000000000000
 RDX: ffff979080fe145a RSI: 000055560e3e0000 RDI: 0000000000000000
 RBP: ffff97909c7dd800 R08: 0000000000000001 R09: e7ce43d97f7bed0f
 R10: ffff97908b29c300 R11: 0000000000000000 R12: 0000000000000000
 R13: 0000000000000000 R14: ffff97908b29c300 R15: 0000000000000000
 FS:  00007f276f7bd740(0000) GS:ffff9792b5c80000(0000) knlGS:0000000000000000
 CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
 CR2: 0000000000000010 CR3: 0000000114230002 CR4: 0000000000060ee0
 Call Trace:
  <IRQ>
  read_reply+0xda/0x310 [rdma_rxe]
  rxe_responder+0x82d/0xe50 [rdma_rxe]
  do_task+0x84/0x170 [rdma_rxe]
  tasklet_action_common.constprop.0+0xa7/0x120
  __do_softirq+0xcb/0x2ac
  do_softirq+0x63/0x90
  </IRQ>

Support a NULL mr during read_reply()

Fixes: 686d348476ee ("RDMA/rxe: Remove unnecessary mr testing")
Fixes: b5f9a01fae42 ("RDMA/rxe: Fix mr leak in RESPST_ERR_RNR")
Link: https://lore.kernel.org/r/20221209045926.531689-1-matsuda-daisuke@fujitsu.com
Link: https://lore.kernel.org/r/20221202145713.13152-1-lizhijian@fujitsu.com
Signed-off-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 12eb85e8d4154..0c8bec2819376 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -460,7 +460,7 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 		return RESPST_EXECUTE;
 	}
 
-	/* A zero-byte op is not required to set an addr or rkey. */
+	/* A zero-byte op is not required to set an addr or rkey. See C9-88 */
 	if ((pkt->mask & RXE_READ_OR_WRITE_MASK) &&
 	    (pkt->mask & RXE_RETH_MASK) &&
 	    reth_len(pkt) == 0) {
@@ -880,13 +880,15 @@ static enum resp_states read_reply(struct rxe_qp *qp,
 	skb = prepare_ack_packet(qp, &ack_pkt, opcode, payload,
 				 res->cur_psn, AETH_ACK_UNLIMITED);
 	if (!skb) {
-		rxe_put(mr);
+		if (mr)
+			rxe_put(mr);
 		return RESPST_ERR_RNR;
 	}
 
 	err = rxe_mr_copy(mr, res->read.va, payload_addr(&ack_pkt),
 			  payload, RXE_FROM_MR_OBJ);
-	rxe_put(mr);
+	if (mr)
+		rxe_put(mr);
 	if (err) {
 		kfree_skb(skb);
 		return RESPST_ERR_RKEY_VIOLATION;
-- 
GitLab


From 6ff8ca3f93d3cd2a77f051d2d971cf3638d39546 Mon Sep 17 00:00:00 2001
From: Qinglin Pan <panqinglin2020@iscas.ac.cn>
Date: Mon, 28 Nov 2022 10:36:43 +0800
Subject: [PATCH 3594/4122] riscv: mm: call best_map_size many times during
 linear-mapping

Modify the best_map_size function to give map_size many times instead
of only once, so a memory region can be mapped by both PMD_SIZE and
PAGE_SIZE.

Signed-off-by: Qinglin Pan <panqinglin2020@iscas.ac.cn>
Reviewed-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Link: https://lore.kernel.org/r/20221128023643.329091-1-panqinglin2020@iscas.ac.cn
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/mm/init.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 7d59516ce6b39..bb0028c07ef30 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -673,10 +673,11 @@ void __init create_pgd_mapping(pgd_t *pgdp,
 static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
 {
 	/* Upgrade to PMD_SIZE mappings whenever possible */
-	if ((base & (PMD_SIZE - 1)) || (size & (PMD_SIZE - 1)))
-		return PAGE_SIZE;
+	base &= PMD_SIZE - 1;
+	if (!base && size >= PMD_SIZE)
+		return PMD_SIZE;
 
-	return PMD_SIZE;
+	return PAGE_SIZE;
 }
 
 #ifdef CONFIG_XIP_KERNEL
@@ -1111,9 +1112,9 @@ static void __init setup_vm_final(void)
 		if (end >= __pa(PAGE_OFFSET) + memory_limit)
 			end = __pa(PAGE_OFFSET) + memory_limit;
 
-		map_size = best_map_size(start, end - start);
 		for (pa = start; pa < end; pa += map_size) {
 			va = (uintptr_t)__va(pa);
+			map_size = best_map_size(pa, end - pa);
 
 			create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
 					   pgprot_from_va(va));
-- 
GitLab


From 689c5421bfe0eac65526bd97a466b9590a6aad3c Mon Sep 17 00:00:00 2001
From: Bob Pearson <rpearsonhpe@gmail.com>
Date: Thu, 8 Dec 2022 15:09:46 -0600
Subject: [PATCH 3595/4122] RDMA/rxe: Fix incorrect responder length checking

The code in rxe_resp.c at check_length() is incorrect as it compares
pkt->opcode an 8 bit value against various mask bits which are all higher
than 256 so nothing is ever reported.

This patch rewrites this to compare against pkt->mask which is
correct. However this now exposes another error. For UD send packets the
value of the pmtu cannot be determined from qp->mtu.  All that is required
here is to later check if the payload fits into the posted receive buffer
in that case.

Fixes: 837a55847ead ("RDMA/rxe: Implement packet length validation on responder")
Link: https://lore.kernel.org/r/20221208210945.28607-1-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Reviewed-by: Daisuke Matsuda <matsuda-daisuke@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 62 ++++++++++++++++------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 0c8bec2819376..abbaa41017e87 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -392,36 +392,46 @@ static enum resp_states check_resource(struct rxe_qp *qp,
 	return RESPST_CHK_LENGTH;
 }
 
-static enum resp_states check_length(struct rxe_qp *qp,
-				     struct rxe_pkt_info *pkt)
+static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
+					      struct rxe_pkt_info *pkt)
 {
-	int mtu = qp->mtu;
-	u32 payload = payload_size(pkt);
-	u32 dmalen = reth_len(pkt);
-
-	/* RoCEv2 packets do not have LRH.
-	 * Let's skip checking it.
+	/*
+	 * See IBA C9-92
+	 * For UD QPs we only check if the packet will fit in the
+	 * receive buffer later. For rmda operations additional
+	 * length checks are performed in check_rkey.
 	 */
-
-	if ((pkt->opcode & RXE_START_MASK) &&
-	    (pkt->opcode & RXE_END_MASK)) {
-		/* "only" packets */
-		if (payload > mtu)
-			return RESPST_ERR_LENGTH;
-	} else if ((pkt->opcode & RXE_START_MASK) ||
-		   (pkt->opcode & RXE_MIDDLE_MASK)) {
-		/* "first" or "middle" packets */
-		if (payload != mtu)
-			return RESPST_ERR_LENGTH;
-	} else if (pkt->opcode & RXE_END_MASK) {
-		/* "last" packets */
-		if ((payload == 0) || (payload > mtu))
-			return RESPST_ERR_LENGTH;
+	if (pkt->mask & RXE_PAYLOAD_MASK && ((qp_type(qp) == IB_QPT_RC) ||
+					     (qp_type(qp) == IB_QPT_UC))) {
+		unsigned int mtu = qp->mtu;
+		unsigned int payload = payload_size(pkt);
+
+		if ((pkt->mask & RXE_START_MASK) &&
+		    (pkt->mask & RXE_END_MASK)) {
+			if (unlikely(payload > mtu)) {
+				rxe_dbg_qp(qp, "only packet too long");
+				return RESPST_ERR_LENGTH;
+			}
+		} else if ((pkt->mask & RXE_START_MASK) ||
+			   (pkt->mask & RXE_MIDDLE_MASK)) {
+			if (unlikely(payload != mtu)) {
+				rxe_dbg_qp(qp, "first or middle packet not mtu");
+				return RESPST_ERR_LENGTH;
+			}
+		} else if (pkt->mask & RXE_END_MASK) {
+			if (unlikely((payload == 0) || (payload > mtu))) {
+				rxe_dbg_qp(qp, "last packet zero or too long");
+				return RESPST_ERR_LENGTH;
+			}
+		}
 	}
 
-	if (pkt->opcode & (RXE_WRITE_MASK | RXE_READ_MASK)) {
-		if (dmalen > (1 << 31))
+	/* See IBA C9-94 */
+	if (pkt->mask & RXE_RETH_MASK) {
+		if (reth_len(pkt) > (1U << 31)) {
+			rxe_dbg_qp(qp, "dma length too long");
 			return RESPST_ERR_LENGTH;
+		}
 	}
 
 	return RESPST_CHK_RKEY;
@@ -1401,7 +1411,7 @@ int rxe_responder(void *arg)
 			state = check_resource(qp, pkt);
 			break;
 		case RESPST_CHK_LENGTH:
-			state = check_length(qp, pkt);
+			state = rxe_resp_check_length(qp, pkt);
 			break;
 		case RESPST_CHK_RKEY:
 			state = check_rkey(qp, pkt);
-- 
GitLab


From 0c17da492dc6c33cc5b99633adb4bd7b2587153c Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:52 +0800
Subject: [PATCH 3596/4122] RDMA: Extend RDMA user ABI to support flush

This commit extends the RDMA user ABI to support the flush
operation defined in IBA A19.4.1. These changes are
backward compatible with the existing RDMA user ABI.

Link: https://lore.kernel.org/r/20221206130201.30986-2-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/ib_user_ioctl_verbs.h |  2 ++
 include/uapi/rdma/ib_user_verbs.h       | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
index e0c25537fd2e9..d7c5aaa327445 100644
--- a/include/uapi/rdma/ib_user_ioctl_verbs.h
+++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
@@ -57,6 +57,8 @@ enum ib_uverbs_access_flags {
 	IB_UVERBS_ACCESS_ZERO_BASED = 1 << 5,
 	IB_UVERBS_ACCESS_ON_DEMAND = 1 << 6,
 	IB_UVERBS_ACCESS_HUGETLB = 1 << 7,
+	IB_UVERBS_ACCESS_FLUSH_GLOBAL = 1 << 8,
+	IB_UVERBS_ACCESS_FLUSH_PERSISTENT = 1 << 9,
 
 	IB_UVERBS_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_OPTIONAL_FIRST,
 	IB_UVERBS_ACCESS_OPTIONAL_RANGE =
diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
index 2378148155442..e16650f0c85dd 100644
--- a/include/uapi/rdma/ib_user_verbs.h
+++ b/include/uapi/rdma/ib_user_verbs.h
@@ -105,6 +105,18 @@ enum {
 	IB_USER_VERBS_EX_CMD_MODIFY_CQ
 };
 
+/* see IBA A19.4.1.1 Placement Types */
+enum ib_placement_type {
+	IB_FLUSH_GLOBAL = 1U << 0,
+	IB_FLUSH_PERSISTENT = 1U << 1,
+};
+
+/* see IBA A19.4.1.2 Selectivity Level */
+enum ib_selectivity_level {
+	IB_FLUSH_RANGE = 0,
+	IB_FLUSH_MR,
+};
+
 /*
  * Make sure that all structs defined in this file remain laid out so
  * that they pack the same way on 32-bit and 64-bit architectures (to
@@ -466,6 +478,7 @@ enum ib_uverbs_wc_opcode {
 	IB_UVERBS_WC_BIND_MW = 5,
 	IB_UVERBS_WC_LOCAL_INV = 6,
 	IB_UVERBS_WC_TSO = 7,
+	IB_UVERBS_WC_FLUSH = 8,
 	IB_UVERBS_WC_ATOMIC_WRITE = 9,
 };
 
@@ -785,6 +798,7 @@ enum ib_uverbs_wr_opcode {
 	IB_UVERBS_WR_RDMA_READ_WITH_INV = 11,
 	IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP = 12,
 	IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD = 13,
+	IB_UVERBS_WR_FLUSH = 14,
 	IB_UVERBS_WR_ATOMIC_WRITE = 15,
 	/* Review enum ib_wr_opcode before modifying this */
 };
@@ -1333,6 +1347,9 @@ enum ib_uverbs_device_cap_flags {
 	/* Deprecated. Please use IB_UVERBS_RAW_PACKET_CAP_SCATTER_FCS. */
 	IB_UVERBS_DEVICE_RAW_SCATTER_FCS = 1ULL << 34,
 	IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING = 1ULL << 36,
+	/* Flush placement types */
+	IB_UVERBS_DEVICE_FLUSH_GLOBAL = 1ULL << 38,
+	IB_UVERBS_DEVICE_FLUSH_PERSISTENT = 1ULL << 39,
 	/* Atomic write attributes */
 	IB_UVERBS_DEVICE_ATOMIC_WRITE = 1ULL << 40,
 };
-- 
GitLab


From 208e3a134b50d95ea3962d7a37b4d8a8f5368376 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:53 +0800
Subject: [PATCH 3597/4122] RDMA: Extend RDMA kernel verbs ABI to support flush

This commit extends the RDMA kernel verbs ABI to support the flush
operation defined in IBA A19.4.1. These changes are
backward compatible with the existing RDMA kernel verbs ABI.

It makes device/HCA support new FLUSH attributes/capabilities, and it
also makes memory region support new FLUSH access flags.

Users can use ibv_reg_mr(3) to register flush access flags. Only the
access flags also supported by device's capabilities can be registered
successfully.

Once registered successfully, it means the MR is flushable. Similarly,
A flushable MR should also have one or both of GLOBAL_VISIBILITY and
PERSISTENT attributes/capabilities like device/HCA.

Link: https://lore.kernel.org/r/20221206130201.30986-3-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/rdma/ib_pack.h  |  3 +++
 include/rdma/ib_verbs.h | 18 +++++++++++++++++-
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/include/rdma/ib_pack.h b/include/rdma/ib_pack.h
index f932d164af637..b8c56d7dc35da 100644
--- a/include/rdma/ib_pack.h
+++ b/include/rdma/ib_pack.h
@@ -84,6 +84,7 @@ enum {
 	/* opcode 0x15 is reserved */
 	IB_OPCODE_SEND_LAST_WITH_INVALIDATE         = 0x16,
 	IB_OPCODE_SEND_ONLY_WITH_INVALIDATE         = 0x17,
+	IB_OPCODE_FLUSH                             = 0x1C,
 	IB_OPCODE_ATOMIC_WRITE                      = 0x1D,
 
 	/* real constants follow -- see comment about above IB_OPCODE()
@@ -113,6 +114,7 @@ enum {
 	IB_OPCODE(RC, FETCH_ADD),
 	IB_OPCODE(RC, SEND_LAST_WITH_INVALIDATE),
 	IB_OPCODE(RC, SEND_ONLY_WITH_INVALIDATE),
+	IB_OPCODE(RC, FLUSH),
 	IB_OPCODE(RC, ATOMIC_WRITE),
 
 	/* UC */
@@ -151,6 +153,7 @@ enum {
 	IB_OPCODE(RD, ATOMIC_ACKNOWLEDGE),
 	IB_OPCODE(RD, COMPARE_SWAP),
 	IB_OPCODE(RD, FETCH_ADD),
+	IB_OPCODE(RD, FLUSH),
 
 	/* UD */
 	IB_OPCODE(UD, SEND_ONLY),
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index df6bb26ba0be6..a9a429172c0a1 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -270,6 +270,9 @@ enum ib_device_cap_flags {
 	/* The device supports padding incoming writes to cacheline. */
 	IB_DEVICE_PCI_WRITE_END_PADDING =
 		IB_UVERBS_DEVICE_PCI_WRITE_END_PADDING,
+	/* Placement type attributes */
+	IB_DEVICE_FLUSH_GLOBAL = IB_UVERBS_DEVICE_FLUSH_GLOBAL,
+	IB_DEVICE_FLUSH_PERSISTENT = IB_UVERBS_DEVICE_FLUSH_PERSISTENT,
 	IB_DEVICE_ATOMIC_WRITE = IB_UVERBS_DEVICE_ATOMIC_WRITE,
 };
 
@@ -987,6 +990,7 @@ enum ib_wc_opcode {
 	IB_WC_REG_MR,
 	IB_WC_MASKED_COMP_SWAP,
 	IB_WC_MASKED_FETCH_ADD,
+	IB_WC_FLUSH = IB_UVERBS_WC_FLUSH,
 /*
  * Set value of IB_WC_RECV so consumers can test if a completion is a
  * receive by testing (opcode & IB_WC_RECV).
@@ -1327,6 +1331,7 @@ enum ib_wr_opcode {
 		IB_UVERBS_WR_MASKED_ATOMIC_CMP_AND_SWP,
 	IB_WR_MASKED_ATOMIC_FETCH_AND_ADD =
 		IB_UVERBS_WR_MASKED_ATOMIC_FETCH_AND_ADD,
+	IB_WR_FLUSH = IB_UVERBS_WR_FLUSH,
 	IB_WR_ATOMIC_WRITE = IB_UVERBS_WR_ATOMIC_WRITE,
 
 	/* These are kernel only and can not be issued by userspace */
@@ -1461,10 +1466,12 @@ enum ib_access_flags {
 	IB_ACCESS_ON_DEMAND = IB_UVERBS_ACCESS_ON_DEMAND,
 	IB_ACCESS_HUGETLB = IB_UVERBS_ACCESS_HUGETLB,
 	IB_ACCESS_RELAXED_ORDERING = IB_UVERBS_ACCESS_RELAXED_ORDERING,
+	IB_ACCESS_FLUSH_GLOBAL = IB_UVERBS_ACCESS_FLUSH_GLOBAL,
+	IB_ACCESS_FLUSH_PERSISTENT = IB_UVERBS_ACCESS_FLUSH_PERSISTENT,
 
 	IB_ACCESS_OPTIONAL = IB_UVERBS_ACCESS_OPTIONAL_RANGE,
 	IB_ACCESS_SUPPORTED =
-		((IB_ACCESS_HUGETLB << 1) - 1) | IB_ACCESS_OPTIONAL,
+		((IB_ACCESS_FLUSH_PERSISTENT << 1) - 1) | IB_ACCESS_OPTIONAL,
 };
 
 /*
@@ -4325,6 +4332,8 @@ int ib_dealloc_xrcd_user(struct ib_xrcd *xrcd, struct ib_udata *udata);
 static inline int ib_check_mr_access(struct ib_device *ib_dev,
 				     unsigned int flags)
 {
+	u64 device_cap = ib_dev->attrs.device_cap_flags;
+
 	/*
 	 * Local write permission is required if remote write or
 	 * remote atomic permission is also requested.
@@ -4339,6 +4348,13 @@ static inline int ib_check_mr_access(struct ib_device *ib_dev,
 	if (flags & IB_ACCESS_ON_DEMAND &&
 	    !(ib_dev->attrs.kernel_cap_flags & IBK_ON_DEMAND_PAGING))
 		return -EOPNOTSUPP;
+
+	if ((flags & IB_ACCESS_FLUSH_GLOBAL &&
+	    !(device_cap & IB_DEVICE_FLUSH_GLOBAL)) ||
+	    (flags & IB_ACCESS_FLUSH_PERSISTENT &&
+	    !(device_cap & IB_DEVICE_FLUSH_PERSISTENT)))
+		return -EOPNOTSUPP;
+
 	return 0;
 }
 
-- 
GitLab


From 668ce52d5eef477c0def757610768a1a3ccc9785 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:54 +0800
Subject: [PATCH 3598/4122] RDMA/rxe: Extend rxe user ABI to support flush

This commit extends the rxe user ABI to support the flush
operation defined in IBA A19.4.1. These changes are
backward compatible with the existing rxe user ABI.

The user API request a flush by filling this structure.

Link: https://lore.kernel.org/r/20221206130201.30986-4-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 include/uapi/rdma/rdma_user_rxe.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h
index d20d1ecf046fd..bb092fccb813c 100644
--- a/include/uapi/rdma/rdma_user_rxe.h
+++ b/include/uapi/rdma/rdma_user_rxe.h
@@ -82,6 +82,13 @@ struct rxe_send_wr {
 		__u32		invalidate_rkey;
 	} ex;
 	union {
+		struct {
+			__aligned_u64 remote_addr;
+			__u32	length;
+			__u32	rkey;
+			__u8	type;
+			__u8	level;
+		} flush;
 		struct {
 			__aligned_u64 remote_addr;
 			__u32	rkey;
-- 
GitLab


From 02ea0a511558c907bde0e01fdebcd4536924d996 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:55 +0800
Subject: [PATCH 3599/4122] RDMA/rxe: Allow registering persistent flag for
 pmem MR only

Memory region could  support at most 2 flush access flags:
IB_ACCESS_FLUSH_PERSISTENT and IB_ACCESS_FLUSH_GLOBAL

But we only allow user to register persistent flush flags to the pmem MR
where it has the ability of persisting data across power cycles.

So registering a persistent access flag to a non-pmem MR will be rejected.

Link: https://lore.kernel.org/r/20221206130201.30986-5-lizhijian@fujitsu.com
CC: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_mr.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index b7c9ff1ddf0e1..81a438e5010aa 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -111,6 +111,15 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr)
 	mr->ibmr.type = IB_MR_TYPE_DMA;
 }
 
+static bool is_pmem_page(struct page *pg)
+{
+	unsigned long paddr = page_to_phys(pg);
+
+	return REGION_INTERSECTS ==
+	       region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM,
+				 IORES_DESC_PERSISTENT_MEMORY);
+}
+
 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr)
 {
@@ -146,16 +155,25 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 	num_buf			= 0;
 	map = mr->map;
 	if (length > 0) {
-		buf = map[0]->buf;
+		bool persistent_access = access & IB_ACCESS_FLUSH_PERSISTENT;
 
+		buf = map[0]->buf;
 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
+			struct page *pg = sg_page_iter_page(&sg_iter);
+
+			if (persistent_access && !is_pmem_page(pg)) {
+				rxe_dbg_mr(mr, "Unable to register persistent access to non-pmem device\n");
+				err = -EINVAL;
+				goto err_release_umem;
+			}
+
 			if (num_buf >= RXE_BUF_PER_MAP) {
 				map++;
 				buf = map[0]->buf;
 				num_buf = 0;
 			}
 
-			vaddr = page_address(sg_page_iter_page(&sg_iter));
+			vaddr = page_address(pg);
 			if (!vaddr) {
 				rxe_dbg_mr(mr, "Unable to get virtual address\n");
 				err = -ENOMEM;
-- 
GitLab


From 02e9a31c897d17981508ceaac4430b93ff56ffc7 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:56 +0800
Subject: [PATCH 3600/4122] RDMA/rxe: Extend rxe packet format to support flush

Extend rxe opcode tables, headers, helper and constants to support
flush operations.

Refer to the IBA A19.4.1 for more FETH definition details

Link: https://lore.kernel.org/r/20221206130201.30986-6-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_hdr.h    | 47 ++++++++++++++++++++++++++
 drivers/infiniband/sw/rxe/rxe_opcode.c | 17 ++++++++++
 drivers/infiniband/sw/rxe/rxe_opcode.h | 14 +++++---
 3 files changed, 73 insertions(+), 5 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_hdr.h b/drivers/infiniband/sw/rxe/rxe_hdr.h
index 804594b76040a..46f82b27fcd2f 100644
--- a/drivers/infiniband/sw/rxe/rxe_hdr.h
+++ b/drivers/infiniband/sw/rxe/rxe_hdr.h
@@ -607,6 +607,52 @@ static inline void reth_set_len(struct rxe_pkt_info *pkt, u32 len)
 		rxe_opcode[pkt->opcode].offset[RXE_RETH], len);
 }
 
+/******************************************************************************
+ * FLUSH Extended Transport Header
+ ******************************************************************************/
+
+struct rxe_feth {
+	__be32 bits;
+};
+
+#define FETH_PLT_MASK		(0x0000000f) /* bits 3-0 */
+#define FETH_SEL_MASK		(0x00000030) /* bits 5-4 */
+#define FETH_SEL_SHIFT		(4U)
+
+static inline u32 __feth_plt(void *arg)
+{
+	struct rxe_feth *feth = arg;
+
+	return be32_to_cpu(feth->bits) & FETH_PLT_MASK;
+}
+
+static inline u32 __feth_sel(void *arg)
+{
+	struct rxe_feth *feth = arg;
+
+	return (be32_to_cpu(feth->bits) & FETH_SEL_MASK) >> FETH_SEL_SHIFT;
+}
+
+static inline u32 feth_plt(struct rxe_pkt_info *pkt)
+{
+	return __feth_plt(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline u32 feth_sel(struct rxe_pkt_info *pkt)
+{
+	return __feth_sel(pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+}
+
+static inline void feth_init(struct rxe_pkt_info *pkt, u8 type, u8 level)
+{
+	struct rxe_feth *feth = (struct rxe_feth *)
+		    (pkt->hdr + rxe_opcode[pkt->opcode].offset[RXE_FETH]);
+	u32 bits = ((level << FETH_SEL_SHIFT) & FETH_SEL_MASK) |
+		   (type & FETH_PLT_MASK);
+
+	feth->bits = cpu_to_be32(bits);
+}
+
 /******************************************************************************
  * Atomic Extended Transport Header
  ******************************************************************************/
@@ -909,6 +955,7 @@ enum rxe_hdr_length {
 	RXE_ATMETH_BYTES	= sizeof(struct rxe_atmeth),
 	RXE_IETH_BYTES		= sizeof(struct rxe_ieth),
 	RXE_RDETH_BYTES		= sizeof(struct rxe_rdeth),
+	RXE_FETH_BYTES		= sizeof(struct rxe_feth),
 };
 
 static inline size_t header_size(struct rxe_pkt_info *pkt)
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.c b/drivers/infiniband/sw/rxe/rxe_opcode.c
index fb196029048e5..5c0d5c6ffda4f 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.c
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.c
@@ -101,6 +101,12 @@ struct rxe_wr_opcode_info rxe_wr_opcode_info[] = {
 			[IB_QPT_UC]	= WR_LOCAL_OP_MASK,
 		},
 	},
+	[IB_WR_FLUSH]					= {
+		.name   = "IB_WR_FLUSH",
+		.mask   = {
+			[IB_QPT_RC]	= WR_FLUSH_MASK,
+		},
+	},
 	[IB_WR_ATOMIC_WRITE]                       = {
 		.name   = "IB_WR_ATOMIC_WRITE",
 		.mask   = {
@@ -384,6 +390,17 @@ struct rxe_opcode_info rxe_opcode[RXE_NUM_OPCODE] = {
 					  RXE_IETH_BYTES,
 		}
 	},
+	[IB_OPCODE_RC_FLUSH]					= {
+		.name	= "IB_OPCODE_RC_FLUSH",
+		.mask	= RXE_FETH_MASK | RXE_RETH_MASK | RXE_FLUSH_MASK |
+			  RXE_START_MASK | RXE_END_MASK | RXE_REQ_MASK,
+		.length = RXE_BTH_BYTES + RXE_FETH_BYTES + RXE_RETH_BYTES,
+		.offset = {
+			[RXE_BTH]	= 0,
+			[RXE_FETH]	= RXE_BTH_BYTES,
+			[RXE_RETH]	= RXE_BTH_BYTES + RXE_FETH_BYTES,
+		}
+	},
 	[IB_OPCODE_RC_ATOMIC_WRITE]                        = {
 		.name   = "IB_OPCODE_RC_ATOMIC_WRITE",
 		.mask   = RXE_RETH_MASK | RXE_PAYLOAD_MASK | RXE_REQ_MASK |
diff --git a/drivers/infiniband/sw/rxe/rxe_opcode.h b/drivers/infiniband/sw/rxe/rxe_opcode.h
index a470e9b0b8842..cea4e0a639199 100644
--- a/drivers/infiniband/sw/rxe/rxe_opcode.h
+++ b/drivers/infiniband/sw/rxe/rxe_opcode.h
@@ -20,6 +20,7 @@ enum rxe_wr_mask {
 	WR_READ_MASK			= BIT(3),
 	WR_WRITE_MASK			= BIT(4),
 	WR_LOCAL_OP_MASK		= BIT(5),
+	WR_FLUSH_MASK			= BIT(6),
 	WR_ATOMIC_WRITE_MASK		= BIT(7),
 
 	WR_READ_OR_WRITE_MASK		= WR_READ_MASK | WR_WRITE_MASK,
@@ -48,6 +49,7 @@ enum rxe_hdr_type {
 	RXE_RDETH,
 	RXE_DETH,
 	RXE_IMMDT,
+	RXE_FETH,
 	RXE_PAYLOAD,
 	NUM_HDR_TYPES
 };
@@ -64,6 +66,7 @@ enum rxe_hdr_mask {
 	RXE_IETH_MASK		= BIT(RXE_IETH),
 	RXE_RDETH_MASK		= BIT(RXE_RDETH),
 	RXE_DETH_MASK		= BIT(RXE_DETH),
+	RXE_FETH_MASK		= BIT(RXE_FETH),
 	RXE_PAYLOAD_MASK	= BIT(RXE_PAYLOAD),
 
 	RXE_REQ_MASK		= BIT(NUM_HDR_TYPES + 0),
@@ -72,13 +75,14 @@ enum rxe_hdr_mask {
 	RXE_WRITE_MASK		= BIT(NUM_HDR_TYPES + 3),
 	RXE_READ_MASK		= BIT(NUM_HDR_TYPES + 4),
 	RXE_ATOMIC_MASK		= BIT(NUM_HDR_TYPES + 5),
+	RXE_FLUSH_MASK		= BIT(NUM_HDR_TYPES + 6),
 
-	RXE_RWR_MASK		= BIT(NUM_HDR_TYPES + 6),
-	RXE_COMP_MASK		= BIT(NUM_HDR_TYPES + 7),
+	RXE_RWR_MASK		= BIT(NUM_HDR_TYPES + 7),
+	RXE_COMP_MASK		= BIT(NUM_HDR_TYPES + 8),
 
-	RXE_START_MASK		= BIT(NUM_HDR_TYPES + 8),
-	RXE_MIDDLE_MASK		= BIT(NUM_HDR_TYPES + 9),
-	RXE_END_MASK		= BIT(NUM_HDR_TYPES + 10),
+	RXE_START_MASK		= BIT(NUM_HDR_TYPES + 9),
+	RXE_MIDDLE_MASK		= BIT(NUM_HDR_TYPES + 10),
+	RXE_END_MASK		= BIT(NUM_HDR_TYPES + 11),
 
 	RXE_LOOPBACK_MASK	= BIT(NUM_HDR_TYPES + 12),
 
-- 
GitLab


From fa1fd682ad3ef35b0e532c3bb14140786d17527c Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:57 +0800
Subject: [PATCH 3601/4122] RDMA/rxe: Implement RC RDMA FLUSH service in
 requester side

Implement FLUSH request operation in the requester.

Link: https://lore.kernel.org/r/20221206130201.30986-7-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_req.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_req.c b/drivers/infiniband/sw/rxe/rxe_req.c
index 2713e90589225..899c8779f8001 100644
--- a/drivers/infiniband/sw/rxe/rxe_req.c
+++ b/drivers/infiniband/sw/rxe/rxe_req.c
@@ -241,6 +241,9 @@ static int next_opcode_rc(struct rxe_qp *qp, u32 opcode, int fits)
 				IB_OPCODE_RC_SEND_ONLY_WITH_IMMEDIATE :
 				IB_OPCODE_RC_SEND_FIRST;
 
+	case IB_WR_FLUSH:
+		return IB_OPCODE_RC_FLUSH;
+
 	case IB_WR_RDMA_READ:
 		return IB_OPCODE_RC_RDMA_READ_REQUEST;
 
@@ -425,11 +428,18 @@ static struct sk_buff *init_req_packet(struct rxe_qp *qp,
 
 	/* init optional headers */
 	if (pkt->mask & RXE_RETH_MASK) {
-		reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
+		if (pkt->mask & RXE_FETH_MASK)
+			reth_set_rkey(pkt, ibwr->wr.flush.rkey);
+		else
+			reth_set_rkey(pkt, ibwr->wr.rdma.rkey);
 		reth_set_va(pkt, wqe->iova);
 		reth_set_len(pkt, wqe->dma.resid);
 	}
 
+	/* Fill Flush Extension Transport Header */
+	if (pkt->mask & RXE_FETH_MASK)
+		feth_init(pkt, ibwr->wr.flush.type, ibwr->wr.flush.level);
+
 	if (pkt->mask & RXE_IMMDT_MASK)
 		immdt_set_imm(pkt, ibwr->ex.imm_data);
 
@@ -488,6 +498,9 @@ static int finish_packet(struct rxe_qp *qp, struct rxe_av *av,
 
 			memset(pad, 0, bth_pad(pkt));
 		}
+	} else if (pkt->mask & RXE_FLUSH_MASK) {
+		/* oA19-2: shall have no payload. */
+		wqe->dma.resid = 0;
 	}
 
 	if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
-- 
GitLab


From ea1bb00ee9a5527b032a6efebe4a879db4cb42bb Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:58 +0800
Subject: [PATCH 3602/4122] RDMA/rxe: Implement flush execution in responder
 side

Only the requested placement types that also registered in the destination
memory region are acceptable.
Otherwise, responder will also reply NAK "Remote Access Error" if it
found a placement type violation.

We will persist data via arch_wb_cache_pmem(), which could be
architecture specific.

This commit also adds 2 helpers to update qp.resp from the incoming packet.

Link: https://lore.kernel.org/r/20221206130201.30986-8-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_loc.h   |   1 +
 drivers/infiniband/sw/rxe/rxe_mr.c    |  36 ++++++
 drivers/infiniband/sw/rxe/rxe_resp.c  | 160 ++++++++++++++++++++++----
 drivers/infiniband/sw/rxe/rxe_verbs.h |   6 +
 4 files changed, 183 insertions(+), 20 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h
index a22476d27b384..948ce4902b10f 100644
--- a/drivers/infiniband/sw/rxe/rxe_loc.h
+++ b/drivers/infiniband/sw/rxe/rxe_loc.h
@@ -64,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 		     int access, struct rxe_mr *mr);
 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length);
 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
 		enum rxe_mr_copy_dir dir);
 int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,
diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c
index 81a438e5010aa..072eac4b65d29 100644
--- a/drivers/infiniband/sw/rxe/rxe_mr.c
+++ b/drivers/infiniband/sw/rxe/rxe_mr.c
@@ -4,6 +4,8 @@
  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
  */
 
+#include <linux/libnvdimm.h>
+
 #include "rxe.h"
 #include "rxe_loc.h"
 
@@ -192,6 +194,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
 	mr->offset = ib_umem_offset(umem);
 	mr->state = RXE_MR_STATE_VALID;
 	mr->ibmr.type = IB_MR_TYPE_USER;
+	mr->ibmr.page_size = PAGE_SIZE;
 
 	return 0;
 
@@ -295,6 +298,39 @@ out:
 	return addr;
 }
 
+int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length)
+{
+	size_t offset;
+
+	if (length == 0)
+		return 0;
+
+	if (mr->ibmr.type == IB_MR_TYPE_DMA)
+		return -EFAULT;
+
+	offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask;
+	while (length > 0) {
+		u8 *va;
+		int bytes;
+
+		bytes = mr->ibmr.page_size - offset;
+		if (bytes > length)
+			bytes = length;
+
+		va = iova_to_vaddr(mr, iova, length);
+		if (!va)
+			return -EFAULT;
+
+		arch_wb_cache_pmem(va, bytes);
+
+		length -= bytes;
+		iova += bytes;
+		offset = 0;
+	}
+
+	return 0;
+}
+
 /* copy data from a range (vaddr, vaddr+length-1) to or from
  * a mr object starting at iova.
  */
diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index abbaa41017e87..7a60c7709da04 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -23,6 +23,7 @@ enum resp_states {
 	RESPST_READ_REPLY,
 	RESPST_ATOMIC_REPLY,
 	RESPST_ATOMIC_WRITE_REPLY,
+	RESPST_PROCESS_FLUSH,
 	RESPST_COMPLETE,
 	RESPST_ACKNOWLEDGE,
 	RESPST_CLEANUP,
@@ -59,6 +60,7 @@ static char *resp_state_name[] = {
 	[RESPST_READ_REPLY]			= "READ_REPLY",
 	[RESPST_ATOMIC_REPLY]			= "ATOMIC_REPLY",
 	[RESPST_ATOMIC_WRITE_REPLY]		= "ATOMIC_WRITE_REPLY",
+	[RESPST_PROCESS_FLUSH]			= "PROCESS_FLUSH",
 	[RESPST_COMPLETE]			= "COMPLETE",
 	[RESPST_ACKNOWLEDGE]			= "ACKNOWLEDGE",
 	[RESPST_CLEANUP]			= "CLEANUP",
@@ -258,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
 	}
 }
 
+static bool check_qp_attr_access(struct rxe_qp *qp,
+				 struct rxe_pkt_info *pkt)
+{
+	if (((pkt->mask & RXE_READ_MASK) &&
+	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
+	    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
+	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
+	    ((pkt->mask & RXE_ATOMIC_MASK) &&
+	     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
+		return false;
+
+	if (pkt->mask & RXE_FLUSH_MASK) {
+		u32 flush_type = feth_plt(pkt);
+
+		if ((flush_type & IB_FLUSH_GLOBAL &&
+		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
+		    (flush_type & IB_FLUSH_PERSISTENT &&
+		     !(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
+			return false;
+	}
+
+	return true;
+}
+
 static enum resp_states check_op_valid(struct rxe_qp *qp,
 				       struct rxe_pkt_info *pkt)
 {
 	switch (qp_type(qp)) {
 	case IB_QPT_RC:
-		if (((pkt->mask & RXE_READ_MASK) &&
-		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
-		    ((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
-		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
-		    ((pkt->mask & RXE_ATOMIC_MASK) &&
-		     !(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
+		if (!check_qp_attr_access(qp, pkt))
 			return RESPST_ERR_UNSUPPORTED_OPCODE;
-		}
 
 		break;
 
@@ -437,6 +457,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
 	return RESPST_CHK_RKEY;
 }
 
+static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+	qp->resp.va = reth_va(pkt);
+	qp->resp.offset = 0;
+	qp->resp.rkey = reth_rkey(pkt);
+	qp->resp.resid = reth_len(pkt);
+	qp->resp.length = reth_len(pkt);
+}
+
+static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
+{
+	qp->resp.va = atmeth_va(pkt);
+	qp->resp.offset = 0;
+	qp->resp.rkey = atmeth_rkey(pkt);
+	qp->resp.resid = sizeof(u64);
+}
+
 static enum resp_states check_rkey(struct rxe_qp *qp,
 				   struct rxe_pkt_info *pkt)
 {
@@ -448,23 +485,26 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 	u32 pktlen;
 	int mtu = qp->mtu;
 	enum resp_states state;
-	int access;
+	int access = 0;
 
 	if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
-		if (pkt->mask & RXE_RETH_MASK) {
-			qp->resp.va = reth_va(pkt);
-			qp->resp.offset = 0;
-			qp->resp.rkey = reth_rkey(pkt);
-			qp->resp.resid = reth_len(pkt);
-			qp->resp.length = reth_len(pkt);
-		}
+		if (pkt->mask & RXE_RETH_MASK)
+			qp_resp_from_reth(qp, pkt);
+
 		access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
 						     : IB_ACCESS_REMOTE_WRITE;
+	} else if (pkt->mask & RXE_FLUSH_MASK) {
+		u32 flush_type = feth_plt(pkt);
+
+		if (pkt->mask & RXE_RETH_MASK)
+			qp_resp_from_reth(qp, pkt);
+
+		if (flush_type & IB_FLUSH_GLOBAL)
+			access |= IB_ACCESS_FLUSH_GLOBAL;
+		if (flush_type & IB_FLUSH_PERSISTENT)
+			access |= IB_ACCESS_FLUSH_PERSISTENT;
 	} else if (pkt->mask & RXE_ATOMIC_MASK) {
-		qp->resp.va = atmeth_va(pkt);
-		qp->resp.offset = 0;
-		qp->resp.rkey = atmeth_rkey(pkt);
-		qp->resp.resid = sizeof(u64);
+		qp_resp_from_atmeth(qp, pkt);
 		access = IB_ACCESS_REMOTE_ATOMIC;
 	} else {
 		return RESPST_EXECUTE;
@@ -511,11 +551,20 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
 		}
 	}
 
+	if (pkt->mask & RXE_FLUSH_MASK) {
+		/* FLUSH MR may not set va or resid
+		 * no need to check range since we will flush whole mr
+		 */
+		if (feth_sel(pkt) == IB_FLUSH_MR)
+			goto skip_check_range;
+	}
+
 	if (mr_check_range(mr, va + qp->resp.offset, resid)) {
 		state = RESPST_ERR_RKEY_VIOLATION;
 		goto err;
 	}
 
+skip_check_range:
 	if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
 		if (resid > mtu) {
 			if (pktlen != mtu || bth_pad(pkt)) {
@@ -621,11 +670,61 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
 		res->last_psn = pkt->psn;
 		res->cur_psn = pkt->psn;
 		break;
+	case RXE_FLUSH_MASK:
+		res->flush.va = qp->resp.va + qp->resp.offset;
+		res->flush.length = qp->resp.length;
+		res->flush.type = feth_plt(pkt);
+		res->flush.level = feth_sel(pkt);
 	}
 
 	return res;
 }
 
+static enum resp_states process_flush(struct rxe_qp *qp,
+				       struct rxe_pkt_info *pkt)
+{
+	u64 length, start;
+	struct rxe_mr *mr = qp->resp.mr;
+	struct resp_res *res = qp->resp.res;
+
+	/* oA19-14, oA19-15 */
+	if (res && res->replay)
+		return RESPST_ACKNOWLEDGE;
+	else if (!res) {
+		res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
+		qp->resp.res = res;
+	}
+
+	if (res->flush.level == IB_FLUSH_RANGE) {
+		start = res->flush.va;
+		length = res->flush.length;
+	} else { /* level == IB_FLUSH_MR */
+		start = mr->ibmr.iova;
+		length = mr->ibmr.length;
+	}
+
+	if (res->flush.type & IB_FLUSH_PERSISTENT) {
+		if (rxe_flush_pmem_iova(mr, start, length))
+			return RESPST_ERR_RKEY_VIOLATION;
+		/* Make data persistent. */
+		wmb();
+	} else if (res->flush.type & IB_FLUSH_GLOBAL) {
+		/* Make data global visibility. */
+		wmb();
+	}
+
+	qp->resp.msn++;
+
+	/* next expected psn, read handles this separately */
+	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+	qp->resp.ack_psn = qp->resp.psn;
+
+	qp->resp.opcode = pkt->opcode;
+	qp->resp.status = IB_WC_SUCCESS;
+
+	return RESPST_ACKNOWLEDGE;
+}
+
 /* Guarantee atomicity of atomic operations at the machine level. */
 static DEFINE_SPINLOCK(atomic_ops_lock);
 
@@ -980,6 +1079,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
 		return RESPST_ATOMIC_REPLY;
 	} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
 		return RESPST_ATOMIC_WRITE_REPLY;
+	} else if (pkt->mask & RXE_FLUSH_MASK) {
+		return RESPST_PROCESS_FLUSH;
 	} else {
 		/* Unreachable */
 		WARN_ON_ONCE(1);
@@ -1176,7 +1277,7 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
 		send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
 	else if (pkt->mask & RXE_ATOMIC_MASK)
 		send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
-	else if (pkt->mask & RXE_ATOMIC_WRITE_MASK)
+	else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
 		send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
 	else if (bth_ack(pkt))
 		send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1234,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
 		/* SEND. Ack again and cleanup. C9-105. */
 		send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
 		return RESPST_CLEANUP;
+	} else if (pkt->mask & RXE_FLUSH_MASK) {
+		struct resp_res *res;
+
+		/* Find the operation in our list of responder resources. */
+		res = find_resource(qp, pkt->psn);
+		if (res) {
+			res->replay = 1;
+			res->cur_psn = pkt->psn;
+			qp->resp.res = res;
+			rc = RESPST_PROCESS_FLUSH;
+			goto out;
+		}
+
+		/* Resource not found. Class D error. Drop the request. */
+		rc = RESPST_CLEANUP;
+		goto out;
 	} else if (pkt->mask & RXE_READ_MASK) {
 		struct resp_res *res;
 
@@ -1431,6 +1548,9 @@ int rxe_responder(void *arg)
 		case RESPST_ATOMIC_WRITE_REPLY:
 			state = atomic_write_reply(qp, pkt);
 			break;
+		case RESPST_PROCESS_FLUSH:
+			state = process_flush(qp, pkt);
+			break;
 		case RESPST_ACKNOWLEDGE:
 			state = acknowledge(qp, pkt);
 			break;
diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.h b/drivers/infiniband/sw/rxe/rxe_verbs.h
index 22a299b0a9f0a..19ddfa8904803 100644
--- a/drivers/infiniband/sw/rxe/rxe_verbs.h
+++ b/drivers/infiniband/sw/rxe/rxe_verbs.h
@@ -165,6 +165,12 @@ struct resp_res {
 			u64		va;
 			u32		resid;
 		} read;
+		struct {
+			u32		length;
+			u64		va;
+			u8		type;
+			u8		level;
+		} flush;
 	};
 };
 
-- 
GitLab


From 70aad902ce8aeb094dd3ef14988a652f24cce7c8 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:01:59 +0800
Subject: [PATCH 3603/4122] RDMA/rxe: Implement flush completion

Per IBA SPEC, FLUSH will ack in rdma read response with 0 length.

Use IB_WC_FLUSH (aka IB_UVERBS_WC_FLUSH) code to tell userspace a FLUSH
completion.

Link: https://lore.kernel.org/r/20221206130201.30986-9-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_comp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_comp.c b/drivers/infiniband/sw/rxe/rxe_comp.c
index 1c525325e2716..20737fec392bf 100644
--- a/drivers/infiniband/sw/rxe/rxe_comp.c
+++ b/drivers/infiniband/sw/rxe/rxe_comp.c
@@ -105,6 +105,7 @@ static enum ib_wc_opcode wr_to_wc_opcode(enum ib_wr_opcode opcode)
 	case IB_WR_REG_MR:			return IB_WC_REG_MR;
 	case IB_WR_BIND_MW:			return IB_WC_BIND_MW;
 	case IB_WR_ATOMIC_WRITE:		return IB_WC_ATOMIC_WRITE;
+	case IB_WR_FLUSH:			return IB_WC_FLUSH;
 
 	default:
 		return 0xff;
@@ -278,7 +279,8 @@ static inline enum comp_state check_ack(struct rxe_qp *qp,
 		 */
 	case IB_OPCODE_RC_RDMA_READ_RESPONSE_MIDDLE:
 		if (wqe->wr.opcode != IB_WR_RDMA_READ &&
-		    wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV) {
+		    wqe->wr.opcode != IB_WR_RDMA_READ_WITH_INV &&
+		    wqe->wr.opcode != IB_WR_FLUSH) {
 			wqe->status = IB_WC_FATAL_ERR;
 			return COMPST_ERROR;
 		}
-- 
GitLab


From 8b4d379b399d19f4c803e565bfe13f07b66b5ad7 Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:02:00 +0800
Subject: [PATCH 3604/4122] RDMA/cm: Make QP FLUSHABLE for supported device

Similar to RDMA and Atomic qp attributes enabled by default in CM, enable
FLUSH attribute for supported device. That makes applications that are
built with rdma_create_ep, rdma_accept APIs have FLUSH qp attribute
natively so that user is able to request FLUSH operation simpler.

Note that, a FLUSH operation requires FLUSH are supported by both
device(HCA) and memory region(MR) and QP at the same time, so it's safe
to enable FLUSH qp attribute by default here.

FLUSH attribute can be disable by modify_qp() interface.

Link: https://lore.kernel.org/r/20221206130201.30986-10-lizhijian@fujitsu.com
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/core/cm.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c
index 1f9938a2c4752..603c0aecc3614 100644
--- a/drivers/infiniband/core/cm.c
+++ b/drivers/infiniband/core/cm.c
@@ -4094,9 +4094,18 @@ static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
 		*qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
 				IB_QP_PKEY_INDEX | IB_QP_PORT;
 		qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
-		if (cm_id_priv->responder_resources)
+		if (cm_id_priv->responder_resources) {
+			struct ib_device *ib_dev = cm_id_priv->id.device;
+			u64 support_flush = ib_dev->attrs.device_cap_flags &
+			  (IB_DEVICE_FLUSH_GLOBAL | IB_DEVICE_FLUSH_PERSISTENT);
+			u32 flushable = support_flush ?
+					(IB_ACCESS_FLUSH_GLOBAL |
+					 IB_ACCESS_FLUSH_PERSISTENT) : 0;
+
 			qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
-						    IB_ACCESS_REMOTE_ATOMIC;
+						    IB_ACCESS_REMOTE_ATOMIC |
+						    flushable;
+		}
 		qp_attr->pkey_index = cm_id_priv->av.pkey_index;
 		if (cm_id_priv->av.port)
 			qp_attr->port_num = cm_id_priv->av.port->port_num;
-- 
GitLab


From 124011e6e933bead5852c3f69b32dec43919fe1a Mon Sep 17 00:00:00 2001
From: Li Zhijian <lizhijian@fujitsu.com>
Date: Tue, 6 Dec 2022 21:02:01 +0800
Subject: [PATCH 3605/4122] RDMA/rxe: Enable RDMA FLUSH capability for rxe
 device

Now we are ready to enable RDMA FLUSH capability for RXE.
It can support Global Visibility and Persistence placement types.

Link: https://lore.kernel.org/r/20221206130201.30986-11-lizhijian@fujitsu.com
Reviewed-by: Zhu Yanjun <zyjzyj2000@gmail.com>
Signed-off-by: Li Zhijian <lizhijian@fujitsu.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_param.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h
index bbc88cd71d950..a754fc902e3d1 100644
--- a/drivers/infiniband/sw/rxe/rxe_param.h
+++ b/drivers/infiniband/sw/rxe/rxe_param.h
@@ -51,6 +51,8 @@ enum rxe_device_param {
 					| IB_DEVICE_SRQ_RESIZE
 					| IB_DEVICE_MEM_MGT_EXTENSIONS
 					| IB_DEVICE_MEM_WINDOW
+					| IB_DEVICE_FLUSH_GLOBAL
+					| IB_DEVICE_FLUSH_PERSISTENT
 #ifdef CONFIG_64BIT
 					| IB_DEVICE_MEM_WINDOW_TYPE_2B
 					| IB_DEVICE_ATOMIC_WRITE,
-- 
GitLab


From bd328def2f987ebd4e20725a490f005556d737bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 26 Nov 2022 06:09:59 +0100
Subject: [PATCH 3606/4122] firmware_loader: remove #include
 <generated/utsrelease.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

utsrelease.h is potentially generated on each build.
By removing this unused include we can get rid of some spurious
recompilations.

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Reviewed-by: Russ Weight <russell.h.weight@intel.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 drivers/base/firmware_loader/firmware.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/base/firmware_loader/firmware.h b/drivers/base/firmware_loader/firmware.h
index fe77e91c38a21..bf549d6500d7b 100644
--- a/drivers/base/firmware_loader/firmware.h
+++ b/drivers/base/firmware_loader/firmware.h
@@ -9,8 +9,6 @@
 #include <linux/list.h>
 #include <linux/completion.h>
 
-#include <generated/utsrelease.h>
-
 /**
  * enum fw_opt - options to control firmware loading behaviour
  *
-- 
GitLab


From 9edb4fd3d70a9dffd8ac6af6d060e97672b4a22f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <linux@weissschuh.net>
Date: Sat, 26 Nov 2022 06:10:01 +0100
Subject: [PATCH 3607/4122] init/version.c: remove #include
 <generated/utsrelease.h>
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 2df8220cc511 ("kbuild: build init/built-in.a just once") moved
the usage of the define UTS_RELEASE to the file version-timestamp.c.

version-timestamp.c in turn is included from version.c but already
includes utsrelease.h itself properly.

The unneeded include of utsrelease.h from version.c can be dropped.

Fixes: 2df8220cc511 ("kbuild: build init/built-in.a just once")
Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 init/version.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/init/version.c b/init/version.c
index 01d4ab05f0bac..f117921811b45 100644
--- a/init/version.c
+++ b/init/version.c
@@ -15,7 +15,6 @@
 #include <linux/printk.h>
 #include <linux/uts.h>
 #include <linux/utsname.h>
-#include <generated/utsrelease.h>
 #include <linux/proc_ns.h>
 
 static int __init early_hostname(char *arg)
-- 
GitLab


From 4bf73588165ba7d32131a043775557a54b6e1db5 Mon Sep 17 00:00:00 2001
From: Dmitry Goncharov <dgoncharov@users.sf.net>
Date: Mon, 5 Dec 2022 16:48:19 -0500
Subject: [PATCH 3608/4122] kbuild: Port silent mode detection to future gnu
 make.

Port silent mode detection to the future (post make-4.4) versions of gnu make.

Makefile contains the following piece of make code to detect if option -s is
specified on the command line.

ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)

This code is executed by make at parse time and assumes that MAKEFLAGS
does not contain command line variable definitions.
Currently if the user defines a=s on the command line, then at build only
time MAKEFLAGS contains " -- a=s".
However, starting with commit dc2d963989b96161472b2cd38cef5d1f4851ea34
MAKEFLAGS contains command line definitions at both parse time and
build time.

This '-s' detection code then confuses a command line variable
definition which contains letter 's' with option -s.

$ # old make
$ make net/wireless/ocb.o a=s
  CALL    scripts/checksyscalls.sh
  DESCEND objtool
$ # this a new make which defines makeflags at parse time
$ ~/src/gmake/make/l64/make net/wireless/ocb.o a=s
$

We can see here that the letter 's' from 'a=s' was confused with -s.

This patch checks for presence of -s using a method recommended by the
make manual here
https://www.gnu.org/software/make/manual/make.html#Testing-Flags.

Link: https://lists.gnu.org/archive/html/bug-make/2022-11/msg00190.html
Reported-by: Jan Palus <jpalus+gnu@fastmail.com>
Signed-off-by: Dmitry Goncharov <dgoncharov@users.sf.net>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Makefile | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index 6f846b1f2618f..fbd9ff4a61e70 100644
--- a/Makefile
+++ b/Makefile
@@ -93,10 +93,17 @@ endif
 
 # If the user is running make -s (silent mode), suppress echoing of
 # commands
+# make-4.0 (and later) keep single letter options in the 1st word of MAKEFLAGS.
 
-ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
-  quiet=silent_
-  KBUILD_VERBOSE = 0
+ifeq ($(filter 3.%,$(MAKE_VERSION)),)
+silence:=$(findstring s,$(firstword -$(MAKEFLAGS)))
+else
+silence:=$(findstring s,$(filter-out --%,$(MAKEFLAGS)))
+endif
+
+ifeq ($(silence),s)
+quiet=silent_
+KBUILD_VERBOSE = 0
 endif
 
 export quiet Q KBUILD_VERBOSE
-- 
GitLab


From 2ba8c7dc71c098935977528747b82ffae43f3f18 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 16:00:50 +0100
Subject: [PATCH 3609/4122] riscv: Don't duplicate __ALTERNATIVE_CFG in
 __ALTERNATIVE_CFG_2

Build __ALTERNATIVE_CFG_2 by adding on to __ALTERNATIVE_CFG rather
than duplicating it.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221129150053.50464-2-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/alternative-macros.h | 18 ++----------------
 1 file changed, 2 insertions(+), 16 deletions(-)

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index ec2f3f1b836f3..c7036166af2c6 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -49,14 +49,7 @@
 
 .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
 				  new_c_2, vendor_id_2, errata_id_2, enable_2
-886 :
-	.option push
-	.option norvc
-	.option norelax
-	\old_c
-	.option pop
-887 :
-	ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1
+	__ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1
 	ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
 .endm
 
@@ -116,14 +109,7 @@
 					enable_1,			\
 				   new_c_2, vendor_id_2, errata_id_2,	\
 					enable_2)			\
-	"886 :\n"							\
-	".option push\n"						\
-	".option norvc\n"						\
-	".option norelax\n"						\
-	old_c "\n"							\
-	".option pop\n"							\
-	"887 :\n"							\
-	ALT_NEW_CONTENT(vendor_id_1, errata_id_1, enable_1, new_c_1)	\
+	__ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \
 	ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2)
 
 #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-- 
GitLab


From 7d52eace1bf5c55704bb0ca5dc8f2489927683ff Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 16:00:51 +0100
Subject: [PATCH 3610/4122] riscv: alternatives: Don't name unused macro
 parameters

Without CONFIG_RISCV_ALTERNATIVE only the first parameter of the
ALTERNATIVE macros is needed. Use ... for the rest to cut down on
clutter. While there, fix a couple space vs. tab issues.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221129150053.50464-3-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/alternative-macros.h | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index c7036166af2c6..7cc2b587c5c4e 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -130,28 +130,22 @@
 	\old_c
 .endm
 
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+#define _ALTERNATIVE_CFG(old_c, ...)	\
 	__ALTERNATIVE_CFG old_c
 
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					CONFIG_k_1,			\
-				  new_c_2, vendor_id_2, errata_id_2,	\
-					CONFIG_k_2)			\
-       __ALTERNATIVE_CFG old_c
+#define _ALTERNATIVE_CFG_2(old_c, ...)	\
+	__ALTERNATIVE_CFG old_c
 
 #else /* !__ASSEMBLY__ */
 
-#define __ALTERNATIVE_CFG(old_c)  \
+#define __ALTERNATIVE_CFG(old_c)	\
 	old_c "\n"
 
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+#define _ALTERNATIVE_CFG(old_c, ...)	\
 	__ALTERNATIVE_CFG(old_c)
 
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					CONFIG_k_1,			\
-				  new_c_2, vendor_id_2, errata_id_2,	\
-					CONFIG_k_2) \
-       __ALTERNATIVE_CFG(old_c)
+#define _ALTERNATIVE_CFG_2(old_c, ...)	\
+	__ALTERNATIVE_CFG(old_c)
 
 #endif /* __ASSEMBLY__ */
 #endif /* CONFIG_RISCV_ALTERNATIVE */
-- 
GitLab


From bb2efcde594628ae08ee6e4be51b2047df9d2d06 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 16:00:52 +0100
Subject: [PATCH 3611/4122] riscv: alternatives: Drop the underscores from the
 assembly macro names

The underscores aren't needed because there isn't anything already
named without them and the _CFG extension. This is a bit of a cleanup
by itself, but the real motivation is for a coming patch which would
otherwise need to add two more underscores to these macro names,
i.e. ____ALTERNATIVE_CFG, and that'd be gross.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221129150053.50464-4-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/alternative-macros.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 7cc2b587c5c4e..9ea95331a2801 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -33,7 +33,7 @@
 	.endif
 .endm
 
-.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
 886 :
 	.option push
 	.option norvc
@@ -45,11 +45,11 @@
 .endm
 
 #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
-	__ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)
+	ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)
 
-.macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
-				  new_c_2, vendor_id_2, errata_id_2, enable_2
-	__ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1
+.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1,	\
+				new_c_2, vendor_id_2, errata_id_2, enable_2
+	ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1
 	ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
 .endm
 
@@ -57,9 +57,9 @@
 					CONFIG_k_1,			\
 				  new_c_2, vendor_id_2, errata_id_2,	\
 					CONFIG_k_2)			\
-	__ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1,	\
+	ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1,	\
 					IS_ENABLED(CONFIG_k_1),		\
-				   new_c_2, vendor_id_2, errata_id_2,	\
+				 new_c_2, vendor_id_2, errata_id_2,	\
 					IS_ENABLED(CONFIG_k_2)
 
 #else /* !__ASSEMBLY__ */
@@ -126,15 +126,15 @@
 #else /* CONFIG_RISCV_ALTERNATIVE */
 #ifdef __ASSEMBLY__
 
-.macro __ALTERNATIVE_CFG old_c
+.macro ALTERNATIVE_CFG old_c
 	\old_c
 .endm
 
 #define _ALTERNATIVE_CFG(old_c, ...)	\
-	__ALTERNATIVE_CFG old_c
+	ALTERNATIVE_CFG old_c
 
 #define _ALTERNATIVE_CFG_2(old_c, ...)	\
-	__ALTERNATIVE_CFG old_c
+	ALTERNATIVE_CFG old_c
 
 #else /* !__ASSEMBLY__ */
 
-- 
GitLab


From 26fb4b90b745a808e94a81dc732d440c285fa74b Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 16:00:53 +0100
Subject: [PATCH 3612/4122] riscv: Don't duplicate _ALTERNATIVE_CFG* macros

Reduce clutter by only defining the _ALTERNATIVE_CFG* macros once,
rather than once for assembly and once for C. To do that, we need to
add __ALTERNATIVE_CFG* macros to the assembly side, but those are
one-liners. Also take the opportunity to do a bit of reformatting,
taking full advantage of the fact checkpatch gives us 100 char lines.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221129150053.50464-5-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/alternative-macros.h | 53 +++++++--------------
 1 file changed, 17 insertions(+), 36 deletions(-)

diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 9ea95331a2801..7226e2462584b 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -44,23 +44,14 @@
 	ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c
 .endm
 
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
-	ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)
-
 .macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1,	\
 				new_c_2, vendor_id_2, errata_id_2, enable_2
 	ALTERNATIVE_CFG \old_c, \new_c_1, \vendor_id_1, \errata_id_1, \enable_1
 	ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
 .endm
 
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					CONFIG_k_1,			\
-				  new_c_2, vendor_id_2, errata_id_2,	\
-					CONFIG_k_2)			\
-	ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					IS_ENABLED(CONFIG_k_1),		\
-				 new_c_2, vendor_id_2, errata_id_2,	\
-					IS_ENABLED(CONFIG_k_2)
+#define __ALTERNATIVE_CFG(...)		ALTERNATIVE_CFG __VA_ARGS__
+#define __ALTERNATIVE_CFG_2(...)	ALTERNATIVE_CFG_2 __VA_ARGS__
 
 #else /* !__ASSEMBLY__ */
 
@@ -102,27 +93,21 @@
 	"887 :\n"							\
 	ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)
 
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k)	\
-	__ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
-
-#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					enable_1,			\
-				   new_c_2, vendor_id_2, errata_id_2,	\
-					enable_2)			\
-	__ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \
+#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1,	\
+				   new_c_2, vendor_id_2, errata_id_2, enable_2)	\
+	__ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1)	\
 	ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2)
 
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					CONFIG_k_1,			\
-				  new_c_2, vendor_id_2, errata_id_2,	\
-					CONFIG_k_2)			\
-	__ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1,	\
-					IS_ENABLED(CONFIG_k_1),		\
-				   new_c_2, vendor_id_2, errata_id_2,	\
-					IS_ENABLED(CONFIG_k_2))
-
 #endif /* __ASSEMBLY__ */
 
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k)	\
+	__ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
+
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1,		\
+				  new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2)		\
+	__ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1),	\
+				   new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2))
+
 #else /* CONFIG_RISCV_ALTERNATIVE */
 #ifdef __ASSEMBLY__
 
@@ -173,13 +158,9 @@
  * on the following sample code and then replace ALTERNATIVE() with
  * ALTERNATIVE_2() to append its customized content.
  */
-#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1,		\
-					errata_id_1, CONFIG_k_1,	\
-				   new_content_2, vendor_id_2,		\
-					errata_id_2, CONFIG_k_2)	\
-	_ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1,	\
-					    errata_id_1, CONFIG_k_1,	\
-					new_content_2, vendor_id_2,	\
-					    errata_id_2, CONFIG_k_2)
+#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1,		\
+				   new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2)		\
+	_ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1,	\
+					new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2)
 
 #endif
-- 
GitLab


From 726855549cf8d5c6b05795cf74a9c23584f45544 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 15:34:45 +0100
Subject: [PATCH 3613/4122] RISC-V: Improve use of isa2hwcap[]

Improve isa2hwcap[] by removing it from static storage, as
riscv_fill_hwcap() is only called once, and by reducing its size
from 256 bytes to 26. The latter improvement is possible because
isa2hwcap[] will never be indexed with capital letters and we can
precompute the offsets from 'a'.

No functional change intended.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20221129143447.49714-2-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/cpufeature.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 694267d1fe814..4677320d7e318 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -74,15 +74,15 @@ void __init riscv_fill_hwcap(void)
 	const char *isa;
 	char print_str[NUM_ALPHA_EXTS + 1];
 	int i, j, rc;
-	static unsigned long isa2hwcap[256] = {0};
+	unsigned long isa2hwcap[26] = {0};
 	unsigned long hartid;
 
-	isa2hwcap['i'] = isa2hwcap['I'] = COMPAT_HWCAP_ISA_I;
-	isa2hwcap['m'] = isa2hwcap['M'] = COMPAT_HWCAP_ISA_M;
-	isa2hwcap['a'] = isa2hwcap['A'] = COMPAT_HWCAP_ISA_A;
-	isa2hwcap['f'] = isa2hwcap['F'] = COMPAT_HWCAP_ISA_F;
-	isa2hwcap['d'] = isa2hwcap['D'] = COMPAT_HWCAP_ISA_D;
-	isa2hwcap['c'] = isa2hwcap['C'] = COMPAT_HWCAP_ISA_C;
+	isa2hwcap['i' - 'a'] = COMPAT_HWCAP_ISA_I;
+	isa2hwcap['m' - 'a'] = COMPAT_HWCAP_ISA_M;
+	isa2hwcap['a' - 'a'] = COMPAT_HWCAP_ISA_A;
+	isa2hwcap['f' - 'a'] = COMPAT_HWCAP_ISA_F;
+	isa2hwcap['d' - 'a'] = COMPAT_HWCAP_ISA_D;
+	isa2hwcap['c' - 'a'] = COMPAT_HWCAP_ISA_C;
 
 	elf_hwcap = 0;
 
@@ -196,8 +196,10 @@ void __init riscv_fill_hwcap(void)
 			if (unlikely(ext_err))
 				continue;
 			if (!ext_long) {
-				this_hwcap |= isa2hwcap[(unsigned char)(*ext)];
-				set_bit(*ext - 'a', this_isa);
+				int nr = *ext - 'a';
+
+				this_hwcap |= isa2hwcap[nr];
+				set_bit(nr, this_isa);
 			} else {
 				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
 				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
-- 
GitLab


From fb0ff0a95d61f69415cb8d8f2d921e1f7eed75af Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 15:34:46 +0100
Subject: [PATCH 3614/4122] RISC-V: Introduce riscv_isa_extension_check

Currently any isa extension found in the isa string is set in the
isa bitmap. An isa extension set in the bitmap indicates that the
extension is present and may be used (a.k.a is enabled). However,
when an extension cannot be used due to missing dependencies or
errata it should not be added to the bitmap. Introduce a function
where additional checks may be placed in order to determine if an
extension should be enabled or not.

Note, the checks may simply indicate an issue with the DT, but,
since extensions may be used in early boot, it's not always possible
to simply produce an error at the point the issue is determined.
It's best to keep the extension disabled and produce an error.

No functional change intended, as the function is only introduced
and always returns true. A later patch will provide checks for an
isa extension.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20221129143447.49714-3-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/cpufeature.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 4677320d7e318..220be7222129b 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -68,6 +68,11 @@ bool __riscv_isa_extension_available(const unsigned long *isa_bitmap, int bit)
 }
 EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
 
+static bool riscv_isa_extension_check(int id)
+{
+	return true;
+}
+
 void __init riscv_fill_hwcap(void)
 {
 	struct device_node *node;
@@ -189,7 +194,8 @@ void __init riscv_fill_hwcap(void)
 #define SET_ISA_EXT_MAP(name, bit)						\
 			do {							\
 				if ((ext_end - ext == sizeof(name) - 1) &&	\
-				     !memcmp(ext, name, sizeof(name) - 1))	\
+				     !memcmp(ext, name, sizeof(name) - 1) &&	\
+				     riscv_isa_extension_check(bit))		\
 					set_bit(bit, this_isa);			\
 			} while (false)						\
 
@@ -198,8 +204,10 @@ void __init riscv_fill_hwcap(void)
 			if (!ext_long) {
 				int nr = *ext - 'a';
 
-				this_hwcap |= isa2hwcap[nr];
-				set_bit(nr, this_isa);
+				if (riscv_isa_extension_check(nr)) {
+					this_hwcap |= isa2hwcap[nr];
+					set_bit(nr, this_isa);
+				}
 			} else {
 				SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
 				SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
-- 
GitLab


From 9daaca4a44d6f0741060e67c54a0175c035edb1f Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Tue, 29 Nov 2022 15:34:47 +0100
Subject: [PATCH 3615/4122] RISC-V: Ensure Zicbom has a valid block size

When a DT puts zicbom in the isa string, but does not provide a block
size, ALT_CMO_OP() will attempt to do cache operations on address
zero since the start address will be ANDed with zero. We can't simply
BUG() in riscv_init_cbom_blocksize() when we fail to find a block
size because the failure will happen before logging works, leaving
users to scratch their heads as to why the boot hung. Instead, ensure
Zicbom is disabled and output an error which will hopefully alert
people that the DT needs to be fixed. While at it, add a check that
the block size is a power-of-2 too.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Link: https://lore.kernel.org/r/20221129143447.49714-4-ajones@ventanamicro.com
[Palmer: base on 5c20a3a9df19 ("RISC-V: Fix compilation without RISCV_ISA_ZICBOM"]
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/cpufeature.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 220be7222129b..93e45560af307 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -9,6 +9,7 @@
 #include <linux/bitmap.h>
 #include <linux/ctype.h>
 #include <linux/libfdt.h>
+#include <linux/log2.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <asm/alternative.h>
@@ -70,6 +71,18 @@ EXPORT_SYMBOL_GPL(__riscv_isa_extension_available);
 
 static bool riscv_isa_extension_check(int id)
 {
+	switch (id) {
+	case RISCV_ISA_EXT_ZICBOM:
+		if (!riscv_cbom_block_size) {
+			pr_err("Zicbom detected in ISA string, but no cbom-block-size found\n");
+			return false;
+		} else if (!is_power_of_2(riscv_cbom_block_size)) {
+			pr_err("cbom-block-size present, but is not a power-of-2\n");
+			return false;
+		}
+		return true;
+	}
+
 	return true;
 }
 
-- 
GitLab


From 5f2e094ed2592abb3d1f49e263957188e121c2ac Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@kernel.org>
Date: Thu, 20 Oct 2022 00:31:55 +0900
Subject: [PATCH 3616/4122] tracing: Allow multiple hitcount values in
 histograms

The hitcount is treated specially in the histograms - since it's
always expected to be there regardless of whether the user specified
anything or not, it's always added as the first histogram value.

Currently the code doesn't allow it to be added more than once as a
value, which is inconsistent with all the other possible values.  It
would seem to be a pointless thing to want to do, but other features
being added such as percent and graph modifiers don't work properly
with the current hitcount restrictions.

Fix this by allowing multiple hitcounts to be added.

Link: https://lore.kernel.org/linux-trace-kernel/166610812248.56030.16754785928712505251.stgit@devnote2

Signed-off-by: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Tested-by: Tom Zanussi <zanussi@kernel.org>
---
 kernel/trace/trace_events_hist.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 1c82478e8dffe..31d58ddcc1d9e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -1356,6 +1356,8 @@ static const char *hist_field_name(struct hist_field *field,
 			field_name = field->name;
 	} else if (field->flags & HIST_FIELD_FL_TIMESTAMP)
 		field_name = "common_timestamp";
+	else if (field->flags & HIST_FIELD_FL_HITCOUNT)
+		field_name = "hitcount";
 
 	if (field_name == NULL)
 		field_name = "";
@@ -2328,6 +2330,8 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			hist_data->attrs->ts_in_usecs = true;
 	} else if (strcmp(field_name, "common_cpu") == 0)
 		*flags |= HIST_FIELD_FL_CPU;
+	else if (strcmp(field_name, "hitcount") == 0)
+		*flags |= HIST_FIELD_FL_HITCOUNT;
 	else {
 		field = trace_find_event_field(file->event_call, field_name);
 		if (!field || !field->size) {
@@ -4328,8 +4332,8 @@ static int create_var_field(struct hist_trigger_data *hist_data,
 static int create_val_fields(struct hist_trigger_data *hist_data,
 			     struct trace_event_file *file)
 {
+	unsigned int i, j = 1, n_hitcount = 0;
 	char *fields_str, *field_str;
-	unsigned int i, j = 1;
 	int ret;
 
 	ret = create_hitcount_val(hist_data);
@@ -4346,8 +4350,10 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
 		if (!field_str)
 			break;
 
-		if (strcmp(field_str, "hitcount") == 0)
-			continue;
+		if (strcmp(field_str, "hitcount") == 0) {
+			if (!n_hitcount++)
+				continue;
+		}
 
 		ret = create_val_field(hist_data, j++, file, field_str);
 		if (ret)
-- 
GitLab


From abaa5258ce5e5887a9de049f50a85dc023391a1c Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 20 Oct 2022 00:31:55 +0900
Subject: [PATCH 3617/4122] tracing: Add .percent suffix option to histogram
 values

Add .percent suffix option to show the histogram values in percentage.
This feature is useful when we need yo undersntand the overall trend
for the histograms of large values.
E.g. this shows the runtime percentage for each tasks.

------
  # cd /sys/kernel/debug/tracing/
  # echo hist:keys=pid:vals=hitcount,runtime.percent:sort=pid > \
    events/sched/sched_stat_runtime/trigger
  # sleep 10
  # cat events/sched/sched_stat_runtime/hist
 # event histogram
 #
 # trigger info: hist:keys=pid:vals=hitcount,runtime.percent:sort=pid:size=2048 [active]
 #

 { pid:          8 } hitcount:          7  runtime (%):   4.14
 { pid:         14 } hitcount:          5  runtime (%):   3.69
 { pid:         16 } hitcount:         11  runtime (%):   3.41
 { pid:         61 } hitcount:         41  runtime (%):  19.75
 { pid:         65 } hitcount:          4  runtime (%):   1.48
 { pid:         70 } hitcount:          6  runtime (%):   3.60
 { pid:         72 } hitcount:          2  runtime (%):   1.10
 { pid:        144 } hitcount:         10  runtime (%):  32.01
 { pid:        151 } hitcount:          8  runtime (%):  22.66
 { pid:        152 } hitcount:          2  runtime (%):   8.10

 Totals:
     Hits: 96
     Entries: 10
     Dropped: 0
-----

Link: https://lore.kernel.org/linux-trace-kernel/166610813077.56030.4238090506973562347.stgit@devnote2

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
Tested-by: Tom Zanussi <zanussi@kernel.org>
---
 kernel/trace/trace.c             |  3 +-
 kernel/trace/trace_events_hist.c | 90 +++++++++++++++++++++++++++-----
 2 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 93a75a97118f1..08e9568849b1a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5724,7 +5724,8 @@ static const char readme_msg[] =
 	"\t            .syscall    display a syscall id as a syscall name\n"
 	"\t            .log2       display log2 value rather than raw number\n"
 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
-	"\t            .usecs      display a common_timestamp in microseconds\n\n"
+	"\t            .usecs      display a common_timestamp in microseconds\n"
+	"\t            .percent    display a number of percentage value\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
 	"\t    until told to do so.  'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 31d58ddcc1d9e..35b0e956f06ef 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -506,6 +506,7 @@ enum hist_field_flags {
 	HIST_FIELD_FL_ALIAS		= 1 << 16,
 	HIST_FIELD_FL_BUCKET		= 1 << 17,
 	HIST_FIELD_FL_CONST		= 1 << 18,
+	HIST_FIELD_FL_PERCENT		= 1 << 19,
 };
 
 struct var_defs {
@@ -1707,6 +1708,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 		flags_str = "buckets";
 	else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP_USECS)
 		flags_str = "usecs";
+	else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
+		flags_str = "percent";
 
 	return flags_str;
 }
@@ -2315,6 +2318,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			if (ret || !(*buckets))
 				goto error;
 			*flags |= HIST_FIELD_FL_BUCKET;
+		} else if (strncmp(modifier, "percent", 7) == 0) {
+			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
+				goto error;
+			*flags |= HIST_FIELD_FL_PERCENT;
 		} else {
  error:
 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
@@ -5291,33 +5298,69 @@ static void hist_trigger_print_key(struct seq_file *m,
 	seq_puts(m, "}");
 }
 
+/* Get the 100 times of the percentage of @val in @total */
+static inline unsigned int __get_percentage(u64 val, u64 total)
+{
+	if (!total)
+		goto div0;
+
+	if (val < (U64_MAX / 10000))
+		return (unsigned int)div64_ul(val * 10000, total);
+
+	total = div64_u64(total, 10000);
+	if (!total)
+		goto div0;
+
+	return (unsigned int)div64_ul(val, total);
+div0:
+	return val ? UINT_MAX : 0;
+}
+
+static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
+				   const char *field_name, unsigned long flags,
+				   u64 *totals, struct tracing_map_elt *elt)
+{
+	u64 val = tracing_map_read_sum(elt, idx);
+	unsigned int pc;
+
+	if (flags & HIST_FIELD_FL_PERCENT) {
+		pc = __get_percentage(val, totals[idx]);
+		if (pc == UINT_MAX)
+			seq_printf(m, " %s (%%):[ERROR]", field_name);
+		else
+			seq_printf(m, " %s (%%): %3u.%02u", field_name,
+					pc / 100, pc % 100);
+	} else if (flags & HIST_FIELD_FL_HEX) {
+		seq_printf(m, " %s: %10llx", field_name, val);
+	} else {
+		seq_printf(m, " %s: %10llu", field_name, val);
+	}
+}
+
 static void hist_trigger_entry_print(struct seq_file *m,
 				     struct hist_trigger_data *hist_data,
+				     u64 *totals,
 				     void *key,
 				     struct tracing_map_elt *elt)
 {
 	const char *field_name;
-	unsigned int i;
+	unsigned int i = HITCOUNT_IDX;
+	unsigned long flags;
 
 	hist_trigger_print_key(m, hist_data, key, elt);
 
-	seq_printf(m, " hitcount: %10llu",
-		   tracing_map_read_sum(elt, HITCOUNT_IDX));
+	/* At first, show the raw hitcount always */
+	hist_trigger_print_val(m, i, "hitcount", 0, totals, elt);
 
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
+		flags = hist_data->fields[i]->flags;
 
-		if (hist_data->fields[i]->flags & HIST_FIELD_FL_VAR ||
-		    hist_data->fields[i]->flags & HIST_FIELD_FL_EXPR)
+		if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR)
 			continue;
 
-		if (hist_data->fields[i]->flags & HIST_FIELD_FL_HEX) {
-			seq_printf(m, "  %s: %10llx", field_name,
-				   tracing_map_read_sum(elt, i));
-		} else {
-			seq_printf(m, "  %s: %10llu", field_name,
-				   tracing_map_read_sum(elt, i));
-		}
+		seq_puts(m, " ");
+		hist_trigger_print_val(m, i, field_name, flags, totals, elt);
 	}
 
 	print_actions(m, hist_data, elt);
@@ -5330,7 +5373,8 @@ static int print_entries(struct seq_file *m,
 {
 	struct tracing_map_sort_entry **sort_entries = NULL;
 	struct tracing_map *map = hist_data->map;
-	int i, n_entries;
+	int i, j, n_entries;
+	u64 *totals = NULL;
 
 	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
 					     hist_data->n_sort_keys,
@@ -5338,11 +5382,29 @@ static int print_entries(struct seq_file *m,
 	if (n_entries < 0)
 		return n_entries;
 
+	for (j = 0; j < hist_data->n_vals; j++) {
+		if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
+			continue;
+		if (!totals) {
+			totals = kcalloc(hist_data->n_vals, sizeof(u64),
+					 GFP_KERNEL);
+			if (!totals) {
+				n_entries = -ENOMEM;
+				goto out;
+			}
+		}
+		for (i = 0; i < n_entries; i++)
+			totals[j] += tracing_map_read_sum(
+					sort_entries[i]->elt, j);
+	}
+
 	for (i = 0; i < n_entries; i++)
-		hist_trigger_entry_print(m, hist_data,
+		hist_trigger_entry_print(m, hist_data, totals,
 					 sort_entries[i]->key,
 					 sort_entries[i]->elt);
 
+	kfree(totals);
+out:
 	tracing_map_destroy_sort_entries(sort_entries, n_entries);
 
 	return n_entries;
-- 
GitLab


From a2c54256dec7510477e2b4f4db187e638f7cac37 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 20 Oct 2022 00:31:55 +0900
Subject: [PATCH 3618/4122] tracing: Add .graph suffix option to histogram
 value

Add the .graph suffix which shows the bar graph of the histogram value.

For example, the below example shows that the bar graph
of the histogram of the runtime for each tasks.

------
  # cd /sys/kernel/debug/tracing/
  # echo hist:keys=pid:vals=runtime.graph:sort=pid > \
   events/sched/sched_stat_runtime/trigger
  # sleep 10
  # cat events/sched/sched_stat_runtime/hist
 # event histogram
 #
 # trigger info: hist:keys=pid:vals=hitcount,runtime.graph:sort=pid:size=2048 [active]
 #

 { pid:         14 } hitcount:          2  runtime:
 { pid:         16 } hitcount:          8  runtime:
 { pid:         26 } hitcount:          1  runtime:
 { pid:         57 } hitcount:          3  runtime:
 { pid:         61 } hitcount:         20  runtime: ###
 { pid:         66 } hitcount:          2  runtime:
 { pid:         70 } hitcount:          3  runtime:
 { pid:         72 } hitcount:          2  runtime:
 { pid:        145 } hitcount:         14  runtime: ####################
 { pid:        152 } hitcount:          5  runtime: #######
 { pid:        153 } hitcount:          2  runtime: ####

 Totals:
     Hits: 62
     Entries: 11
     Dropped: 0
-------

Link: https://lore.kernel.org/linux-trace-kernel/166610813953.56030.10944148382315789485.stgit@devnote2

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
Tested-by: Tom Zanussi <zanussi@kernel.org>
---
 kernel/trace/trace.c             |  3 +-
 kernel/trace/trace_events_hist.c | 77 +++++++++++++++++++++++++-------
 2 files changed, 63 insertions(+), 17 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 08e9568849b1a..55aec4616d8b7 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5725,7 +5725,8 @@ static const char readme_msg[] =
 	"\t            .log2       display log2 value rather than raw number\n"
 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
 	"\t            .usecs      display a common_timestamp in microseconds\n"
-	"\t            .percent    display a number of percentage value\n\n"
+	"\t            .percent    display a number of percentage value\n"
+	"\t            .graph      display a bar-graph of a value\n\n"
 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
 	"\t    trigger or to start a hist trigger but not log any events\n"
 	"\t    until told to do so.  'continue' can be used to start or\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 35b0e956f06ef..946b2b8f0f2ca 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -507,6 +507,7 @@ enum hist_field_flags {
 	HIST_FIELD_FL_BUCKET		= 1 << 17,
 	HIST_FIELD_FL_CONST		= 1 << 18,
 	HIST_FIELD_FL_PERCENT		= 1 << 19,
+	HIST_FIELD_FL_GRAPH		= 1 << 20,
 };
 
 struct var_defs {
@@ -1710,6 +1711,8 @@ static const char *get_hist_field_flags(struct hist_field *hist_field)
 		flags_str = "usecs";
 	else if (hist_field->flags & HIST_FIELD_FL_PERCENT)
 		flags_str = "percent";
+	else if (hist_field->flags & HIST_FIELD_FL_GRAPH)
+		flags_str = "graph";
 
 	return flags_str;
 }
@@ -2322,6 +2325,10 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,
 			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
 				goto error;
 			*flags |= HIST_FIELD_FL_PERCENT;
+		} else if (strncmp(modifier, "graph", 5) == 0) {
+			if (*flags & (HIST_FIELD_FL_VAR | HIST_FIELD_FL_KEY))
+				goto error;
+			*flags |= HIST_FIELD_FL_GRAPH;
 		} else {
  error:
 			hist_err(tr, HIST_ERR_BAD_FIELD_MODIFIER, errpos(modifier));
@@ -5316,20 +5323,52 @@ div0:
 	return val ? UINT_MAX : 0;
 }
 
+#define BAR_CHAR '#'
+
+static inline const char *__fill_bar_str(char *buf, int size, u64 val, u64 max)
+{
+	unsigned int len = __get_percentage(val, max);
+	int i;
+
+	if (len == UINT_MAX) {
+		snprintf(buf, size, "[ERROR]");
+		return buf;
+	}
+
+	len = len * size / 10000;
+	for (i = 0; i < len && i < size; i++)
+		buf[i] = BAR_CHAR;
+	while (i < size)
+		buf[i++] = ' ';
+	buf[size] = '\0';
+
+	return buf;
+}
+
+struct hist_val_stat {
+	u64 max;
+	u64 total;
+};
+
 static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
 				   const char *field_name, unsigned long flags,
-				   u64 *totals, struct tracing_map_elt *elt)
+				   struct hist_val_stat *stats,
+				   struct tracing_map_elt *elt)
 {
 	u64 val = tracing_map_read_sum(elt, idx);
 	unsigned int pc;
+	char bar[21];
 
 	if (flags & HIST_FIELD_FL_PERCENT) {
-		pc = __get_percentage(val, totals[idx]);
+		pc = __get_percentage(val, stats[idx].total);
 		if (pc == UINT_MAX)
 			seq_printf(m, " %s (%%):[ERROR]", field_name);
 		else
 			seq_printf(m, " %s (%%): %3u.%02u", field_name,
 					pc / 100, pc % 100);
+	} else if (flags & HIST_FIELD_FL_GRAPH) {
+		seq_printf(m, " %s: %20s", field_name,
+			   __fill_bar_str(bar, 20, val, stats[idx].max));
 	} else if (flags & HIST_FIELD_FL_HEX) {
 		seq_printf(m, " %s: %10llx", field_name, val);
 	} else {
@@ -5339,7 +5378,7 @@ static void hist_trigger_print_val(struct seq_file *m, unsigned int idx,
 
 static void hist_trigger_entry_print(struct seq_file *m,
 				     struct hist_trigger_data *hist_data,
-				     u64 *totals,
+				     struct hist_val_stat *stats,
 				     void *key,
 				     struct tracing_map_elt *elt)
 {
@@ -5350,7 +5389,7 @@ static void hist_trigger_entry_print(struct seq_file *m,
 	hist_trigger_print_key(m, hist_data, key, elt);
 
 	/* At first, show the raw hitcount always */
-	hist_trigger_print_val(m, i, "hitcount", 0, totals, elt);
+	hist_trigger_print_val(m, i, "hitcount", 0, stats, elt);
 
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
@@ -5360,7 +5399,7 @@ static void hist_trigger_entry_print(struct seq_file *m,
 			continue;
 
 		seq_puts(m, " ");
-		hist_trigger_print_val(m, i, field_name, flags, totals, elt);
+		hist_trigger_print_val(m, i, field_name, flags, stats, elt);
 	}
 
 	print_actions(m, hist_data, elt);
@@ -5374,7 +5413,8 @@ static int print_entries(struct seq_file *m,
 	struct tracing_map_sort_entry **sort_entries = NULL;
 	struct tracing_map *map = hist_data->map;
 	int i, j, n_entries;
-	u64 *totals = NULL;
+	struct hist_val_stat *stats = NULL;
+	u64 val;
 
 	n_entries = tracing_map_sort_entries(map, hist_data->sort_keys,
 					     hist_data->n_sort_keys,
@@ -5382,28 +5422,33 @@ static int print_entries(struct seq_file *m,
 	if (n_entries < 0)
 		return n_entries;
 
+	/* Calculate the max and the total for each field if needed. */
 	for (j = 0; j < hist_data->n_vals; j++) {
-		if (!(hist_data->fields[j]->flags & HIST_FIELD_FL_PERCENT))
+		if (!(hist_data->fields[j]->flags &
+			(HIST_FIELD_FL_PERCENT | HIST_FIELD_FL_GRAPH)))
 			continue;
-		if (!totals) {
-			totals = kcalloc(hist_data->n_vals, sizeof(u64),
-					 GFP_KERNEL);
-			if (!totals) {
+		if (!stats) {
+			stats = kcalloc(hist_data->n_vals, sizeof(*stats),
+				       GFP_KERNEL);
+			if (!stats) {
 				n_entries = -ENOMEM;
 				goto out;
 			}
 		}
-		for (i = 0; i < n_entries; i++)
-			totals[j] += tracing_map_read_sum(
-					sort_entries[i]->elt, j);
+		for (i = 0; i < n_entries; i++) {
+			val = tracing_map_read_sum(sort_entries[i]->elt, j);
+			stats[j].total += val;
+			if (stats[j].max < val)
+				stats[j].max = val;
+		}
 	}
 
 	for (i = 0; i < n_entries; i++)
-		hist_trigger_entry_print(m, hist_data, totals,
+		hist_trigger_entry_print(m, hist_data, stats,
 					 sort_entries[i]->key,
 					 sort_entries[i]->elt);
 
-	kfree(totals);
+	kfree(stats);
 out:
 	tracing_map_destroy_sort_entries(sort_entries, n_entries);
 
-- 
GitLab


From ccf47f5cc4cebdba7444cf1c04fc1b02d4b3cd15 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 20 Oct 2022 00:31:55 +0900
Subject: [PATCH 3619/4122] tracing: Add nohitcount option for suppressing
 display of raw hitcount

Add 'nohitcount' ('NOHC' for short) option for suppressing display of
the raw hitcount column in the histogram.
Note that you must specify at least one value except raw 'hitcount'
when you specify this nohitcount option.

  # cd /sys/kernel/debug/tracing/
  # echo hist:keys=pid:vals=runtime.percent,runtime.graph:sort=pid:NOHC > \
        events/sched/sched_stat_runtime/trigger
  # sleep 10
  # cat events/sched/sched_stat_runtime/hist
 # event histogram
 #
 # trigger info: hist:keys=pid:vals=runtime.percent,runtime.graph:sort=pid:size=2048:nohitcount  [active]
 #

 { pid:          8 }  runtime (%):   3.02  runtime: #
 { pid:         14 }  runtime (%):   2.25  runtime:
 { pid:         16 }  runtime (%):   2.25  runtime:
 { pid:         26 }  runtime (%):   0.17  runtime:
 { pid:         61 }  runtime (%):  11.52  runtime: ####
 { pid:         67 }  runtime (%):   1.56  runtime:
 { pid:         68 }  runtime (%):   0.84  runtime:
 { pid:         76 }  runtime (%):   0.92  runtime:
 { pid:        117 }  runtime (%):   2.50  runtime: #
 { pid:        146 }  runtime (%):  49.88  runtime: ####################
 { pid:        157 }  runtime (%):  16.63  runtime: ######
 { pid:        158 }  runtime (%):   8.38  runtime: ###

Link: https://lore.kernel.org/linux-trace-kernel/166610814787.56030.4980636083486339906.stgit@devnote2

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
Tested-by: Tom Zanussi <zanussi@kernel.org>
---
 kernel/trace/trace.c             |  3 +++
 kernel/trace/trace_events_hist.c | 34 ++++++++++++++++++++++++--------
 2 files changed, 29 insertions(+), 8 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 55aec4616d8b7..948f321b9df17 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5678,6 +5678,7 @@ static const char readme_msg[] =
 	"\t            [:size=#entries]\n"
 	"\t            [:pause][:continue][:clear]\n"
 	"\t            [:name=histname1]\n"
+	"\t            [:nohitcount]\n"
 	"\t            [:<handler>.<action>]\n"
 	"\t            [if <filter>]\n\n"
 	"\t    Note, special fields can be used as well:\n"
@@ -5734,6 +5735,8 @@ static const char readme_msg[] =
 	"\t    The 'clear' parameter will clear the contents of a running\n"
 	"\t    hist trigger and leave its current paused/active state\n"
 	"\t    unchanged.\n\n"
+	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
+	"\t    raw hitcount in the histogram.\n\n"
 	"\t    The enable_hist and disable_hist triggers can be used to\n"
 	"\t    have one event conditionally start and stop another event's\n"
 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 946b2b8f0f2ca..a0cd118af5272 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -69,7 +69,8 @@
 	C(INVALID_STR_OPERAND,	"String type can not be an operand in expression"), \
 	C(EXPECT_NUMBER,	"Expecting numeric literal"),		\
 	C(UNARY_MINUS_SUBEXPR,	"Unary minus not supported in sub-expressions"), \
-	C(DIVISION_BY_ZERO,	"Division by zero"),
+	C(DIVISION_BY_ZERO,	"Division by zero"),			\
+	C(NEED_NOHC_VAL,	"Non-hitcount value is required for 'nohitcount'"),
 
 #undef C
 #define C(a, b)		HIST_ERR_##a
@@ -526,6 +527,7 @@ struct hist_trigger_attrs {
 	bool		cont;
 	bool		clear;
 	bool		ts_in_usecs;
+	bool		no_hitcount;
 	unsigned int	map_bits;
 
 	char		*assignment_str[TRACING_MAP_VARS_MAX];
@@ -1550,7 +1552,10 @@ parse_hist_trigger_attrs(struct trace_array *tr, char *trigger_str)
 			ret = parse_assignment(tr, str, attrs);
 			if (ret)
 				goto free;
-		} else if (strcmp(str, "pause") == 0)
+		} else if (strcmp(str, "nohitcount") == 0 ||
+			   strcmp(str, "NOHC") == 0)
+			attrs->no_hitcount = true;
+		else if (strcmp(str, "pause") == 0)
 			attrs->pause = true;
 		else if ((strcmp(str, "cont") == 0) ||
 			 (strcmp(str, "continue") == 0))
@@ -4377,6 +4382,12 @@ static int create_val_fields(struct hist_trigger_data *hist_data,
 	if (fields_str && (strcmp(fields_str, "hitcount") != 0))
 		ret = -EINVAL;
  out:
+	/* There is only raw hitcount but nohitcount suppresses it. */
+	if (j == 1 && hist_data->attrs->no_hitcount) {
+		hist_err(hist_data->event_file->tr, HIST_ERR_NEED_NOHC_VAL, 0);
+		ret = -ENOENT;
+	}
+
 	return ret;
 }
 
@@ -5388,13 +5399,13 @@ static void hist_trigger_entry_print(struct seq_file *m,
 
 	hist_trigger_print_key(m, hist_data, key, elt);
 
-	/* At first, show the raw hitcount always */
-	hist_trigger_print_val(m, i, "hitcount", 0, stats, elt);
+	/* At first, show the raw hitcount if !nohitcount */
+	if (!hist_data->attrs->no_hitcount)
+		hist_trigger_print_val(m, i, "hitcount", 0, stats, elt);
 
 	for (i = 1; i < hist_data->n_vals; i++) {
 		field_name = hist_field_name(hist_data->fields[i], 0);
 		flags = hist_data->fields[i]->flags;
-
 		if (flags & HIST_FIELD_FL_VAR || flags & HIST_FIELD_FL_EXPR)
 			continue;
 
@@ -5839,6 +5850,7 @@ static int event_hist_trigger_print(struct seq_file *m,
 	struct hist_trigger_data *hist_data = data->private_data;
 	struct hist_field *field;
 	bool have_var = false;
+	bool show_val = false;
 	unsigned int i;
 
 	seq_puts(m, HIST_PREFIX);
@@ -5869,12 +5881,16 @@ static int event_hist_trigger_print(struct seq_file *m,
 			continue;
 		}
 
-		if (i == HITCOUNT_IDX)
+		if (i == HITCOUNT_IDX) {
+			if (hist_data->attrs->no_hitcount)
+				continue;
 			seq_puts(m, "hitcount");
-		else {
-			seq_puts(m, ",");
+		} else {
+			if (show_val)
+				seq_puts(m, ",");
 			hist_field_print(m, field);
 		}
+		show_val = true;
 	}
 
 	if (have_var) {
@@ -5925,6 +5941,8 @@ static int event_hist_trigger_print(struct seq_file *m,
 	seq_printf(m, ":size=%u", (1 << hist_data->map->map_bits));
 	if (hist_data->enable_timestamps)
 		seq_printf(m, ":clock=%s", hist_data->attrs->clock);
+	if (hist_data->attrs->no_hitcount)
+		seq_puts(m, ":nohitcount");
 
 	print_actions_spec(m, hist_data);
 
-- 
GitLab


From 8c2b99790196d34a58a2b00a4c9862d2de3af3e2 Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Thu, 20 Oct 2022 00:31:56 +0900
Subject: [PATCH 3620/4122] tracing: docs: Update histogram doc for
 .percent/.graph and 'nohitcount'

Update histogram document for .percent/.graph suffixes and 'nohitcount'
option.

Link: https://lore.kernel.org/linux-trace-kernel/166610815604.56030.4124933216911828519.stgit@devnote2

Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Reviewed-by: Tom Zanussi <zanussi@kernel.org>
Tested-by: Tom Zanussi <zanussi@kernel.org>
---
 Documentation/trace/histogram.rst | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/trace/histogram.rst b/Documentation/trace/histogram.rst
index 87bd772836c0c..f95459aa984f3 100644
--- a/Documentation/trace/histogram.rst
+++ b/Documentation/trace/histogram.rst
@@ -25,7 +25,7 @@ Documentation written by Tom Zanussi
 
         hist:keys=<field1[,field2,...]>[:values=<field1[,field2,...]>]
           [:sort=<field1[,field2,...]>][:size=#entries][:pause][:continue]
-          [:clear][:name=histname1][:<handler>.<action>] [if <filter>]
+          [:clear][:name=histname1][:nohitcount][:<handler>.<action>] [if <filter>]
 
   When a matching event is hit, an entry is added to a hash table
   using the key(s) and value(s) named.  Keys and values correspond to
@@ -79,6 +79,8 @@ Documentation written by Tom Zanussi
 	.log2          display log2 value rather than raw number
 	.buckets=size  display grouping of values rather than raw number
 	.usecs         display a common_timestamp in microseconds
+        .percent       display a number of percentage value
+        .graph         display a bar-graph of a value
 	=============  =================================================
 
   Note that in general the semantics of a given field aren't
@@ -137,6 +139,12 @@ Documentation written by Tom Zanussi
   existing trigger, rather than via the '>' operator, which will cause
   the trigger to be removed through truncation.
 
+  The 'nohitcount' (or NOHC) parameter will suppress display of
+  raw hitcount in the histogram. This option requires at least one
+  value field which is not a 'raw hitcount'. For example,
+  'hist:...:vals=hitcount:nohitcount' is rejected, but
+  'hist:...:vals=hitcount.percent:nohitcount' is OK.
+
 - enable_hist/disable_hist
 
   The enable_hist and disable_hist triggers can be used to have one
-- 
GitLab


From fff1787adaeebe66f27c01d5c40d8d2e4d79d5ee Mon Sep 17 00:00:00 2001
From: Song Chen <chensong_2000@189.cn>
Date: Wed, 19 Oct 2022 10:11:18 +0800
Subject: [PATCH 3621/4122] trace/kprobe: remove duplicated calls of
 ring_buffer_event_data

Function __kprobe_trace_func calls ring_buffer_event_data to
get a ring buffer, however, it has been done in above call
trace_event_buffer_reserve. So does __kretprobe_trace_func.

This patch removes those duplicated calls.

Link: https://lore.kernel.org/all/1666145478-4706-1-git-send-email-chensong_2000@189.cn/

Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Song Chen <chensong_2000@189.cn>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
---
 kernel/trace/trace_kprobe.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5a75b039e5860..ee77c8203bd58 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1344,7 +1344,6 @@ __kprobe_trace_func(struct trace_kprobe *tk, struct pt_regs *regs,
 		return;
 
 	fbuffer.regs = regs;
-	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 	entry->ip = (unsigned long)tk->rp.kp.addr;
 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
 
@@ -1385,7 +1384,6 @@ __kretprobe_trace_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		return;
 
 	fbuffer.regs = regs;
-	entry = fbuffer.entry = ring_buffer_event_data(fbuffer.event);
 	entry->func = (unsigned long)tk->rp.kp.addr;
 	entry->ret_ip = get_kretprobe_retaddr(ri);
 	store_trace_args(&entry[1], &tk->tp, regs, sizeof(*entry), dsize);
-- 
GitLab


From 575b76cb885532aae13a9d979fd476bb2b156cb9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 22 Nov 2022 12:23:45 -0500
Subject: [PATCH 3622/4122] tracing/probes: Handle system names with hyphens

When creating probe names, a check is done to make sure it matches basic C
standard variable naming standards. Basically, starts with alphabetic or
underline, and then the rest of the characters have alpha-numeric or
underline in them.

But system names do not have any true naming conventions, as they are
created by the TRACE_SYSTEM macro and nothing tests to see what they are.
The "xhci-hcd" trace events has a '-' in the system name. When trying to
attach a eprobe to one of these trace points, it fails because the system
name does not follow the variable naming convention because of the
hyphen, and the eprobe checks fail on this.

Allow hyphens in the system name so that eprobes can attach to the
"xhci-hcd" trace events.

Link: https://lore.kernel.org/all/Y3eJ8GiGnEvVd8%2FN@macondo/
Link: https://lore.kernel.org/linux-trace-kernel/20221122122345.160f5077@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: stable@vger.kernel.org
Fixes: 5b7a96220900e ("tracing/probe: Check event/group naming rule at parsing")
Reported-by: Rafael Mendonca <rafaelmendsr@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.h       | 19 ++++++++++++++++---
 kernel/trace/trace_probe.c |  2 +-
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 48643f07bc018..8f37ff032b4f8 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1954,17 +1954,30 @@ static __always_inline void trace_iterator_reset(struct trace_iterator *iter)
 }
 
 /* Check the name is good for event/group/fields */
-static inline bool is_good_name(const char *name)
+static inline bool __is_good_name(const char *name, bool hash_ok)
 {
-	if (!isalpha(*name) && *name != '_')
+	if (!isalpha(*name) && *name != '_' && (!hash_ok || *name != '-'))
 		return false;
 	while (*++name != '\0') {
-		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_' &&
+		    (!hash_ok || *name != '-'))
 			return false;
 	}
 	return true;
 }
 
+/* Check the name is good for event/group/fields */
+static inline bool is_good_name(const char *name)
+{
+	return __is_good_name(name, false);
+}
+
+/* Check the name is good for system */
+static inline bool is_good_system_name(const char *name)
+{
+	return __is_good_name(name, true);
+}
+
 /* Convert certain expected symbols into '_' when generating event names */
 static inline void sanitize_event_name(char *name)
 {
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 36dff277de464..bb2f95d7175c2 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -246,7 +246,7 @@ int traceprobe_parse_event_name(const char **pevent, const char **pgroup,
 			return -EINVAL;
 		}
 		strlcpy(buf, event, slash - event + 1);
-		if (!is_good_name(buf)) {
+		if (!is_good_system_name(buf)) {
 			trace_probe_log_err(offset, BAD_GROUP_NAME);
 			return -EINVAL;
 		}
-- 
GitLab


From e25e43a4e5d8cb2323553d8b6a7ba08d2ebab21f Mon Sep 17 00:00:00 2001
From: "Masami Hiramatsu (Google)" <mhiramat@kernel.org>
Date: Tue, 6 Dec 2022 23:18:01 +0900
Subject: [PATCH 3623/4122] tracing: Fix complicated dependency of
 CONFIG_TRACER_MAX_TRACE

Both CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER partially enables the
CONFIG_TRACER_MAX_TRACE code, but that is complicated and has
introduced a bug; It declares tracing_max_lat_fops data structure outside
of #ifdefs, but since it is defined only when CONFIG_TRACER_MAX_TRACE=y
or CONFIG_HWLAT_TRACER=y, if only CONFIG_OSNOISE_TRACER=y, that
declaration comes to a definition(!).

To fix this issue, and do not repeat the similar problem, makes
CONFIG_OSNOISE_TRACER and CONFIG_HWLAT_TRACER enables the
CONFIG_TRACER_MAX_TRACE always. It has there benefits;
- Fix the tracing_max_lat_fops bug
- Simplify the #ifdefs
- CONFIG_TRACER_MAX_TRACE code is fully enabled, or not.

Link: https://lore.kernel.org/linux-trace-kernel/167033628155.4111793.12185405690820208159.stgit@devnote3

Fixes: 424b650f35c7 ("tracing: Fix missing osnoise tracer on max_latency")
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: stable@vger.kernel.org
Reported-by: David Howells <dhowells@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Link: https://lore.kernel.org/all/166992525941.1716618.13740663757583361463.stgit@warthog.procyon.org.uk/ (original thread and v1)
Link: https://lore.kernel.org/all/202212052253.VuhZ2ulJ-lkp@intel.com/T/#u (v1 error report)
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/Kconfig |  2 ++
 kernel/trace/trace.c | 23 +++++++++++++----------
 kernel/trace/trace.h |  8 +++-----
 3 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e9e95c790b8ee..93d7249962833 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -375,6 +375,7 @@ config SCHED_TRACER
 config HWLAT_TRACER
 	bool "Tracer to detect hardware latencies (like SMIs)"
 	select GENERIC_TRACER
+	select TRACER_MAX_TRACE
 	help
 	 This tracer, when enabled will create one or more kernel threads,
 	 depending on what the cpumask file is set to, which each thread
@@ -410,6 +411,7 @@ config HWLAT_TRACER
 config OSNOISE_TRACER
 	bool "OS Noise tracer"
 	select GENERIC_TRACER
+	select TRACER_MAX_TRACE
 	help
 	  In the context of high-performance computing (HPC), the Operating
 	  System Noise (osnoise) refers to the interference experienced by an
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 948f321b9df17..664619b3f1e17 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1421,6 +1421,7 @@ int tracing_snapshot_cond_disable(struct trace_array *tr)
 	return false;
 }
 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
+#define free_snapshot(tr)	do { } while (0)
 #endif /* CONFIG_TRACER_SNAPSHOT */
 
 void tracer_tracing_off(struct trace_array *tr)
@@ -1692,6 +1693,8 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 }
 
 unsigned long __read_mostly	tracing_thresh;
+
+#ifdef CONFIG_TRACER_MAX_TRACE
 static const struct file_operations tracing_max_lat_fops;
 
 #ifdef LATENCY_FS_NOTIFY
@@ -1748,18 +1751,14 @@ void latency_fsnotify(struct trace_array *tr)
 	irq_work_queue(&tr->fsnotify_irqwork);
 }
 
-#elif defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)	\
-	|| defined(CONFIG_OSNOISE_TRACER)
+#else /* !LATENCY_FS_NOTIFY */
 
 #define trace_create_maxlat_file(tr, d_tracer)				\
 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
 			  d_tracer, &tr->max_latency, &tracing_max_lat_fops)
 
-#else
-#define trace_create_maxlat_file(tr, d_tracer)	 do { } while (0)
 #endif
 
-#ifdef CONFIG_TRACER_MAX_TRACE
 /*
  * Copy the new maximum trace into the separate maximum-trace
  * structure. (this way the maximum trace is permanently saved,
@@ -1834,14 +1833,15 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
 		ring_buffer_record_off(tr->max_buffer.buffer);
 
 #ifdef CONFIG_TRACER_SNAPSHOT
-	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data))
-		goto out_unlock;
+	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
+		arch_spin_unlock(&tr->max_lock);
+		return;
+	}
 #endif
 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
 
 	__update_max_tr(tr, tsk, cpu);
 
- out_unlock:
 	arch_spin_unlock(&tr->max_lock);
 }
 
@@ -1888,6 +1888,7 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 	__update_max_tr(tr, tsk, cpu);
 	arch_spin_unlock(&tr->max_lock);
 }
+
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
 static int wait_on_pipe(struct trace_iterator *iter, int full)
@@ -6577,7 +6578,7 @@ out:
 	return ret;
 }
 
-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
+#ifdef CONFIG_TRACER_MAX_TRACE
 
 static ssize_t
 tracing_max_lat_read(struct file *filp, char __user *ubuf,
@@ -7592,7 +7593,7 @@ static const struct file_operations tracing_thresh_fops = {
 	.llseek		= generic_file_llseek,
 };
 
-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER)
+#ifdef CONFIG_TRACER_MAX_TRACE
 static const struct file_operations tracing_max_lat_fops = {
 	.open		= tracing_open_generic,
 	.read		= tracing_max_lat_read,
@@ -9606,7 +9607,9 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
 
 	create_trace_options_dir(tr);
 
+#ifdef CONFIG_TRACER_MAX_TRACE
 	trace_create_maxlat_file(tr, d_tracer);
+#endif
 
 	if (ftrace_create_function_files(tr, d_tracer))
 		MEM_FAIL(1, "Could not allocate function filter files");
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 8f37ff032b4f8..9dc920b01c17f 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -308,8 +308,7 @@ struct trace_array {
 	struct array_buffer	max_buffer;
 	bool			allocated_snapshot;
 #endif
-#if defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
-	|| defined(CONFIG_OSNOISE_TRACER)
+#ifdef CONFIG_TRACER_MAX_TRACE
 	unsigned long		max_latency;
 #ifdef CONFIG_FSNOTIFY
 	struct dentry		*d_max_latency;
@@ -688,12 +687,11 @@ void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
 		   void *cond_data);
 void update_max_tr_single(struct trace_array *tr,
 			  struct task_struct *tsk, int cpu);
-#endif /* CONFIG_TRACER_MAX_TRACE */
 
-#if (defined(CONFIG_TRACER_MAX_TRACE) || defined(CONFIG_HWLAT_TRACER) \
-	|| defined(CONFIG_OSNOISE_TRACER)) && defined(CONFIG_FSNOTIFY)
+#ifdef CONFIG_FSNOTIFY
 #define LATENCY_FS_NOTIFY
 #endif
+#endif /* CONFIG_TRACER_MAX_TRACE */
 
 #ifdef LATENCY_FS_NOTIFY
 void latency_fsnotify(struct trace_array *tr);
-- 
GitLab


From 4994e387d7332f03cd4eab55f7896ddf04cab1c0 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Tue, 6 Dec 2022 14:12:02 -0500
Subject: [PATCH 3624/4122] x86/mm/kmmio: Switch to arch_spin_lock()

The mmiotrace tracer is "special". The purpose is to help reverse engineer
binary drivers by removing the memory allocated by the driver and when the
driver goes to access it, a fault occurs, the mmiotracer will record what
the driver was doing and then do the work on its behalf by single stepping
through the process.

But to achieve this ability, it must do some special things. One is it
needs to grab a lock while in the breakpoint handler. This is considered
an NMI state, and then lockdep warns that the lock is being held in both
an NMI state (really a breakpoint handler) and also in normal context.

As the breakpoint/NMI state only happens when the driver is accessing
memory, there's no concern of a race condition against the setup and
tear-down of mmiotracer.

To make lockdep and mmiotrace work together, convert the locks used in the
breakpoint handler into arch_spin_lock().

Link: https://lkml.kernel.org/r/20221206191229.656244029@goodmis.org
Link: https://lore.kernel.org/lkml/20221201213126.620b7dd3@gandalf.local.home/
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Karol Herbst <karolherbst@gmail.com>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Borislav Petkov <bp@alien8.de>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/x86/mm/kmmio.c | 31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index d3efbc5b34498..edb4864501582 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -62,7 +62,13 @@ struct kmmio_context {
 	int active;
 };
 
-static DEFINE_SPINLOCK(kmmio_lock);
+/*
+ * The kmmio_lock is taken in int3 context, which is treated as NMI context.
+ * This causes lockdep to complain about it bein in both NMI and normal
+ * context. Hide it from lockdep, as it should not have any other locks
+ * taken under it, and this is only enabled for debugging mmio anyway.
+ */
+static arch_spinlock_t kmmio_lock = __ARCH_SPIN_LOCK_UNLOCKED;
 
 /* Protected by kmmio_lock */
 unsigned int kmmio_count;
@@ -346,10 +352,10 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 		ctx->probe->post_handler(ctx->probe, condition, regs);
 
 	/* Prevent racing against release_kmmio_fault_page(). */
-	spin_lock(&kmmio_lock);
+	arch_spin_lock(&kmmio_lock);
 	if (ctx->fpage->count)
 		arm_kmmio_fault_page(ctx->fpage);
-	spin_unlock(&kmmio_lock);
+	arch_spin_unlock(&kmmio_lock);
 
 	regs->flags &= ~X86_EFLAGS_TF;
 	regs->flags |= ctx->saved_flags;
@@ -440,7 +446,8 @@ int register_kmmio_probe(struct kmmio_probe *p)
 	unsigned int l;
 	pte_t *pte;
 
-	spin_lock_irqsave(&kmmio_lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(&kmmio_lock);
 	if (get_kmmio_probe(addr)) {
 		ret = -EEXIST;
 		goto out;
@@ -460,7 +467,9 @@ int register_kmmio_probe(struct kmmio_probe *p)
 		size += page_level_size(l);
 	}
 out:
-	spin_unlock_irqrestore(&kmmio_lock, flags);
+	arch_spin_unlock(&kmmio_lock);
+	local_irq_restore(flags);
+
 	/*
 	 * XXX: What should I do here?
 	 * Here was a call to global_flush_tlb(), but it does not exist
@@ -494,7 +503,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
 	struct kmmio_fault_page **prevp = &dr->release_list;
 	unsigned long flags;
 
-	spin_lock_irqsave(&kmmio_lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(&kmmio_lock);
 	while (f) {
 		if (!f->count) {
 			list_del_rcu(&f->list);
@@ -506,7 +516,8 @@ static void remove_kmmio_fault_pages(struct rcu_head *head)
 		}
 		f = *prevp;
 	}
-	spin_unlock_irqrestore(&kmmio_lock, flags);
+	arch_spin_unlock(&kmmio_lock);
+	local_irq_restore(flags);
 
 	/* This is the real RCU destroy call. */
 	call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
@@ -540,14 +551,16 @@ void unregister_kmmio_probe(struct kmmio_probe *p)
 	if (!pte)
 		return;
 
-	spin_lock_irqsave(&kmmio_lock, flags);
+	local_irq_save(flags);
+	arch_spin_lock(&kmmio_lock);
 	while (size < size_lim) {
 		release_kmmio_fault_page(addr + size, &release_list);
 		size += page_level_size(l);
 	}
 	list_del_rcu(&p->list);
 	kmmio_count--;
-	spin_unlock_irqrestore(&kmmio_lock, flags);
+	arch_spin_unlock(&kmmio_lock);
+	local_irq_restore(flags);
 
 	if (!release_list)
 		return;
-- 
GitLab


From f0c0ade8d874fb127f9b451d415bee8cbb6bf7a6 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 5 Dec 2022 22:27:38 +0100
Subject: [PATCH 3625/4122] gfs2: Minor gfs2_try_evict cleanup

In gfs2_try_evict(), when an inode can't be evicted, we are grabbing a
temporary reference on the inode glock to poke that glock.  That should
be safe, but it's easier to just grab an inode reference as we already
do earlier in this function.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 6f2de8c0b2d01..c32c25b4c37c8 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1023,8 +1023,6 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		ip = NULL;
 	spin_unlock(&gl->gl_lockref.lock);
 	if (ip) {
-		struct gfs2_glock *inode_gl = NULL;
-
 		gl->gl_no_formal_ino = ip->i_no_formal_ino;
 		set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
 		d_prune_aliases(&ip->i_inode);
@@ -1034,14 +1032,14 @@ static bool gfs2_try_evict(struct gfs2_glock *gl)
 		spin_lock(&gl->gl_lockref.lock);
 		ip = gl->gl_object;
 		if (ip) {
-			inode_gl = ip->i_gl;
-			lockref_get(&inode_gl->gl_lockref);
 			clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
+			if (!igrab(&ip->i_inode))
+				ip = NULL;
 		}
 		spin_unlock(&gl->gl_lockref.lock);
-		if (inode_gl) {
-			gfs2_glock_poke(inode_gl);
-			gfs2_glock_put(inode_gl);
+		if (ip) {
+			gfs2_glock_poke(ip->i_gl);
+			iput(&ip->i_inode);
 		}
 		evicted = !ip;
 	}
-- 
GitLab


From d8d2b65a940bb497749d66bdab59b530901d3854 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 9 Dec 2022 11:01:00 -0600
Subject: [PATCH 3626/4122] PCI/portdrv: Allow AER service only for Root Ports
 & RCECs

Previously portdrv allowed the AER service for any device with an AER
capability (assuming Linux had control of AER) even though the AER service
driver only attaches to Root Port and RCECs.

Because get_port_device_capability() included AER for non-RP, non-RCEC
devices, we tried to initialize the AER IRQ even though these devices
don't generate AER interrupts.

Intel DG1 and DG2 discrete graphics cards contain a switch leading to a
GPU.  The switch supports AER but not MSI, so initializing an AER IRQ
failed, and portdrv failed to claim the switch port at all.  The GPU itself
could be suspended, but the switch could not be put in a low-power state
because it had no driver.

Don't allow the AER service on non-Root Port, non-Root Complex Event
Collector devices.  This means we won't enable Bus Mastering if the device
doesn't require MSI, the AER service will not appear in sysfs, and the AER
service driver will not bind to the device.

Link: https://lore.kernel.org/r/20221207084105.84947-1-mika.westerberg@linux.intel.com
Link: https://lore.kernel.org/r/20221210002922.1749403-1-helgaas@kernel.org
Based-on-patch-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
---
 drivers/pci/pcie/portdrv.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/portdrv.c b/drivers/pci/pcie/portdrv.c
index a6c4225505d53..8b16e96ec15c6 100644
--- a/drivers/pci/pcie/portdrv.c
+++ b/drivers/pci/pcie/portdrv.c
@@ -232,7 +232,9 @@ static int get_port_device_capability(struct pci_dev *dev)
 	}
 
 #ifdef CONFIG_PCIEAER
-	if (dev->aer_cap && pci_aer_available() &&
+	if ((pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT ||
+             pci_pcie_type(dev) == PCI_EXP_TYPE_RC_EC) &&
+	    dev->aer_cap && pci_aer_available() &&
 	    (pcie_ports_native || host->native_aer))
 		services |= PCIE_PORT_SERVICE_AER;
 #endif
-- 
GitLab


From 07eab0901ede8b7540c52160663bd300cc238164 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 8 Dec 2022 13:03:38 -0600
Subject: [PATCH 3627/4122] efi/x86: Remove EfiMemoryMappedIO from E820 map

Firmware can use EfiMemoryMappedIO to request that MMIO regions be mapped
by the OS so they can be accessed by EFI runtime services, but should have
no other significance to the OS (UEFI r2.10, sec 7.2).  However, most
bootloaders and EFI stubs convert EfiMemoryMappedIO regions to
E820_TYPE_RESERVED entries, which prevent Linux from allocating space from
them (see remove_e820_regions()).

Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and PCI
host bridge windows, which means Linux can't allocate BAR space for
hot-added devices.

Remove large EfiMemoryMappedIO regions from the E820 map to avoid this
problem.

Leave small (< 256KB) EfiMemoryMappedIO regions alone because on some
platforms, these describe non-window space that's included in host bridge
_CRS.  If we assign that space to PCI devices, they don't work.  On the
Lenovo X1 Carbon, this leads to suspend/resume failures.

The previous solution to the problem of allocating BARs in these regions
was to add pci_crs_quirks[] entries to disable E820 checking for these
machines (see d341838d776a ("x86/PCI: Disable E820 reserved region clipping
via quirks")):

  Acer   DMI_PRODUCT_NAME    Spin SP513-54N
  Clevo  DMI_BOARD_NAME      X170KM-G
  Lenovo DMI_PRODUCT_VERSION *IIL*

Florent reported the BAR allocation issue on the Clevo NL4XLU.  We could
add another quirk for the NL4XLU, but I hope this generic change can solve
it for many machines without having to add quirks.

This change has been tested on Clevo X170KM-G (Konrad) and Lenovo Ideapad
Slim 3 (Matt) and solves the problem even when overriding the existing
quirks by booting with "pci=use_e820".

Link: https://bugzilla.kernel.org/show_bug.cgi?id=216565     Clevo NL4XLU
Link: https://bugzilla.kernel.org/show_bug.cgi?id=206459#c78 Clevo X170KM-G
Link: https://bugzilla.redhat.com/show_bug.cgi?id=1868899    Ideapad Slim 3
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2029207    X1 Carbon
Link: https://lore.kernel.org/r/20221208190341.1560157-2-helgaas@kernel.org
Reported-by: Florent DELAHAYE <kernelorg@undead.fr>
Tested-by: Konrad J Hambrick <kjhambrick@gmail.com>
Tested-by: Matt Hansen <2lprbe78@duck.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/x86/platform/efi/efi.c | 46 +++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index ebc98a68c4005..75bf0e56bb53d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -303,6 +303,50 @@ static void __init efi_clean_memmap(void)
 	}
 }
 
+/*
+ * Firmware can use EfiMemoryMappedIO to request that MMIO regions be
+ * mapped by the OS so they can be accessed by EFI runtime services, but
+ * should have no other significance to the OS (UEFI r2.10, sec 7.2).
+ * However, most bootloaders and EFI stubs convert EfiMemoryMappedIO
+ * regions to E820_TYPE_RESERVED entries, which prevent Linux from
+ * allocating space from them (see remove_e820_regions()).
+ *
+ * Some platforms use EfiMemoryMappedIO entries for PCI MMCONFIG space and
+ * PCI host bridge windows, which means Linux can't allocate BAR space for
+ * hot-added devices.
+ *
+ * Remove large EfiMemoryMappedIO regions from the E820 map to avoid this
+ * problem.
+ *
+ * Retain small EfiMemoryMappedIO regions because on some platforms, these
+ * describe non-window space that's included in host bridge _CRS.  If we
+ * assign that space to PCI devices, they don't work.
+ */
+static void __init efi_remove_e820_mmio(void)
+{
+	efi_memory_desc_t *md;
+	u64 size, start, end;
+	int i = 0;
+
+	for_each_efi_memory_desc(md) {
+		if (md->type == EFI_MEMORY_MAPPED_IO) {
+			size = md->num_pages << EFI_PAGE_SHIFT;
+			start = md->phys_addr;
+			end = start + size - 1;
+			if (size >= 256*1024) {
+				pr_info("Remove mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluMB) from e820 map\n",
+					i, start, end, size >> 20);
+				e820__range_remove(start, size,
+						   E820_TYPE_RESERVED, 1);
+			} else {
+				pr_info("Not removing mem%02u: MMIO range=[0x%08llx-0x%08llx] (%lluKB) from e820 map\n",
+					i, start, end, size >> 10);
+			}
+		}
+		i++;
+	}
+}
+
 void __init efi_print_memmap(void)
 {
 	efi_memory_desc_t *md;
@@ -474,6 +518,8 @@ void __init efi_init(void)
 	set_bit(EFI_RUNTIME_SERVICES, &efi.flags);
 	efi_clean_memmap();
 
+	efi_remove_e820_mmio();
+
 	if (efi_enabled(EFI_DBG))
 		efi_print_memmap();
 }
-- 
GitLab


From 5c5fb3c3a793b34554e1d21f07cda34308b082cd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 8 Dec 2022 13:03:39 -0600
Subject: [PATCH 3628/4122] PCI: Skip allocate_resource() if too little space
 available

pci_bus_alloc_from_region() allocates MMIO space by iterating through all
the resources available on the bus.  The available resource might be
reduced if the caller requires 32-bit space or we're avoiding BIOS or E820
areas.

Don't bother calling allocate_resource() if we need more space than is
available in this resource.  This prevents some pointless and annoying
messages about avoided areas.

Link: https://lore.kernel.org/r/20221208190341.1560157-3-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/pci/bus.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c
index 3cef835b375fd..83ae838ceb5f0 100644
--- a/drivers/pci/bus.c
+++ b/drivers/pci/bus.c
@@ -197,6 +197,10 @@ static int pci_bus_alloc_from_region(struct pci_bus *bus, struct resource *res,
 
 		max = avail.end;
 
+		/* Don't bother if available space isn't large enough */
+		if (size > max - min_used + 1)
+			continue;
+
 		/* Ok, try it out.. */
 		ret = allocate_resource(r, res, size, min_used, max,
 					align, alignf, alignf_data);
-- 
GitLab


From 00904bf64c2819d90a76407d79bdbc8918541320 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 8 Dec 2022 13:03:40 -0600
Subject: [PATCH 3629/4122] x86/PCI: Tidy E820 removal messages

These messages:

  clipped [mem size 0x00000000 64bit] to [mem size 0xfffffffffffa0000 64bit] for e820 entry [mem 0x0009f000-0x000fffff]

aren't as useful as they could be because (a) the resource is often
IORESOURCE_UNSET, so we print the size instead of the start/end and (b) we
print the available resource even if it is empty after removing the E820
entry.

Print the available space by hand to avoid the IORESOURCE_UNSET problem and
only if it's non-empty.  No functional change intended.

Link: https://lore.kernel.org/r/20221208190341.1560157-4-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/x86/kernel/resource.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/resource.c b/arch/x86/kernel/resource.c
index bba1abd05bfeb..79bc8a97a083c 100644
--- a/arch/x86/kernel/resource.c
+++ b/arch/x86/kernel/resource.c
@@ -42,8 +42,16 @@ static void remove_e820_regions(struct resource *avail)
 
 		resource_clip(avail, e820_start, e820_end);
 		if (orig.start != avail->start || orig.end != avail->end) {
-			pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
-				 &orig, avail, e820_start, e820_end);
+			pr_info("resource: avoiding allocation from e820 entry [mem %#010Lx-%#010Lx]\n",
+				e820_start, e820_end);
+			if (avail->end > avail->start)
+				/*
+				 * Use %pa instead of %pR because "avail"
+				 * is typically IORESOURCE_UNSET, so %pR
+				 * shows the size instead of addresses.
+				 */
+				pr_info("resource: remaining [mem %pa-%pa] available\n",
+					&avail->start, &avail->end);
 			orig = *avail;
 		}
 	}
-- 
GitLab


From 2bfa89fab5ff06703c034c4702e6ea4418194ffe Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 8 Dec 2022 13:03:41 -0600
Subject: [PATCH 3630/4122] x86/PCI: Fix log message typo

Add missing word in the log message:

  - ... so future kernels can this automatically
  + ... so future kernels can do this automatically

Suggested-by: Andy Shevchenko <andriy.shevchenko@intel.com>
Link: https://lore.kernel.org/r/20221208190341.1560157-5-helgaas@kernel.org
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 arch/x86/pci/acpi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 2f82480fd4305..83dfea9e9894b 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -245,7 +245,7 @@ void __init pci_acpi_crs_quirks(void)
 	printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n",
 	       pci_use_e820 ? "Using" : "Ignoring");
 	if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
-		printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can this automatically\n");
+		printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n");
 }
 
 #ifdef	CONFIG_PCI_MMCONFIG
-- 
GitLab


From d91482bb212b36354b0e46d7a5c0adae807e7a12 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Fri, 9 Dec 2022 14:41:27 -0600
Subject: [PATCH 3631/4122] x86/PCI: Use pr_info() when possible

Use pr_info() and similar when possible.  No functional change intended.

Link: https://lore.kernel.org/r/20221209205131.GA1726524@bhelgaas
Suggested-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 arch/x86/pci/acpi.c | 39 ++++++++++++++++++---------------------
 1 file changed, 18 insertions(+), 21 deletions(-)

diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 83dfea9e9894b..ea2eb2ec90e2b 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -1,4 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "PCI: " fmt
+
 #include <linux/pci.h>
 #include <linux/acpi.h>
 #include <linux/init.h>
@@ -37,15 +40,15 @@ static int __init set_nouse_crs(const struct dmi_system_id *id)
 
 static int __init set_ignore_seg(const struct dmi_system_id *id)
 {
-	printk(KERN_INFO "PCI: %s detected: ignoring ACPI _SEG\n", id->ident);
+	pr_info("%s detected: ignoring ACPI _SEG\n", id->ident);
 	pci_ignore_seg = true;
 	return 0;
 }
 
 static int __init set_no_e820(const struct dmi_system_id *id)
 {
-	printk(KERN_INFO "PCI: %s detected: not clipping E820 regions from _CRS\n",
-	       id->ident);
+	pr_info("%s detected: not clipping E820 regions from _CRS\n",
+	        id->ident);
 	pci_use_e820 = false;
 	return 0;
 }
@@ -231,10 +234,9 @@ void __init pci_acpi_crs_quirks(void)
 	else if (pci_probe & PCI_USE__CRS)
 		pci_use_crs = true;
 
-	printk(KERN_INFO "PCI: %s host bridge windows from ACPI; "
-	       "if necessary, use \"pci=%s\" and report a bug\n",
-	       pci_use_crs ? "Using" : "Ignoring",
-	       pci_use_crs ? "nocrs" : "use_crs");
+	pr_info("%s host bridge windows from ACPI; if necessary, use \"pci=%s\" and report a bug\n",
+	        pci_use_crs ? "Using" : "Ignoring",
+	        pci_use_crs ? "nocrs" : "use_crs");
 
 	/* "pci=use_e820"/"pci=no_e820" on the kernel cmdline takes precedence */
 	if (pci_probe & PCI_NO_E820)
@@ -242,19 +244,17 @@ void __init pci_acpi_crs_quirks(void)
 	else if (pci_probe & PCI_USE_E820)
 		pci_use_e820 = true;
 
-	printk(KERN_INFO "PCI: %s E820 reservations for host bridge windows\n",
-	       pci_use_e820 ? "Using" : "Ignoring");
+	pr_info("%s E820 reservations for host bridge windows\n",
+	        pci_use_e820 ? "Using" : "Ignoring");
 	if (pci_probe & (PCI_NO_E820 | PCI_USE_E820))
-		printk(KERN_INFO "PCI: Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n");
+		pr_info("Please notify linux-pci@vger.kernel.org so future kernels can do this automatically\n");
 }
 
 #ifdef	CONFIG_PCI_MMCONFIG
 static int check_segment(u16 seg, struct device *dev, char *estr)
 {
 	if (seg) {
-		dev_err(dev,
-			"%s can't access PCI configuration "
-			"space under this host bridge.\n",
+		dev_err(dev, "%s can't access configuration space under this host bridge\n",
 			estr);
 		return -EIO;
 	}
@@ -264,9 +264,7 @@ static int check_segment(u16 seg, struct device *dev, char *estr)
 	 * just can't access extended configuration space of
 	 * devices under this host bridge.
 	 */
-	dev_warn(dev,
-		 "%s can't access extended PCI configuration "
-		 "space under this bridge.\n",
+	dev_warn(dev, "%s can't access extended configuration space under this bridge\n",
 		 estr);
 
 	return 0;
@@ -421,9 +419,8 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
 		root->segment = domain = 0;
 
 	if (domain && !pci_domains_supported) {
-		printk(KERN_WARNING "pci_bus %04x:%02x: "
-		       "ignored (multiple domains not supported)\n",
-		       domain, busnum);
+		pr_warn("pci_bus %04x:%02x: ignored (multiple domains not supported)\n",
+		        domain, busnum);
 		return NULL;
 	}
 
@@ -491,7 +488,7 @@ int __init pci_acpi_init(void)
 	if (acpi_noirq)
 		return -ENODEV;
 
-	printk(KERN_INFO "PCI: Using ACPI for IRQ routing\n");
+	pr_info("Using ACPI for IRQ routing\n");
 	acpi_irq_penalty_init();
 	pcibios_enable_irq = acpi_pci_irq_enable;
 	pcibios_disable_irq = acpi_pci_irq_disable;
@@ -503,7 +500,7 @@ int __init pci_acpi_init(void)
 		 * also do it here in case there are still broken drivers that
 		 * don't use pci_enable_device().
 		 */
-		printk(KERN_INFO "PCI: Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
+		pr_info("Routing PCI interrupts for all devices because \"pci=routeirq\" specified\n");
 		for_each_pci_dev(dev)
 			acpi_pci_irq_enable(dev);
 	}
-- 
GitLab


From 20fb6c997600012209699564a79911584dd5876b Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 9 Dec 2022 13:41:44 -0500
Subject: [PATCH 3632/4122] x86/mm/kmmio: Use rcu_read_lock_sched_notrace()

The mmiotrace tracer is "special". The purpose is to help reverse engineer
binary drivers by removing the memory allocated by the driver and when the
driver goes to access it, a fault occurs, the mmiotracer will record what
the driver was doing and then do the work on its behalf by single stepping
through the process.

But to achieve this ability, it must do some special things. One is to
take the rcu_read_lock() when the fault occurs, and then release it in the
breakpoint that is single stepping. This makes lockdep unhappy, as it
changes the state of RCU from within an exception that is not contained in
that exception, and we get a nasty splat from lockdep.

Instead, switch to rcu_read_lock_sched_notrace() as the RCU sched variant
has the same grace period as normal RCU. This is basically the same as
rcu_read_lock() but does not make lockdep complain about it.

Note, the preempt_disable() is still needed as it uses preempt_enable_no_resched().

Link: https://lore.kernel.org/linux-trace-kernel/20221209134144.04f33626@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Karol Herbst <karolherbst@gmail.com>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/x86/mm/kmmio.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index edb4864501582..853c49877c161 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -254,7 +254,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	 * again.
 	 */
 	preempt_disable();
-	rcu_read_lock();
+	rcu_read_lock_sched_notrace();
 
 	faultpage = get_kmmio_fault_page(page_base);
 	if (!faultpage) {
@@ -323,7 +323,7 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	return 1; /* fault handled */
 
 no_kmmio:
-	rcu_read_unlock();
+	rcu_read_unlock_sched_notrace();
 	preempt_enable_no_resched();
 	return ret;
 }
@@ -363,7 +363,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	/* These were acquired in kmmio_handler(). */
 	ctx->active--;
 	BUG_ON(ctx->active);
-	rcu_read_unlock();
+	rcu_read_unlock_sched_notrace();
 	preempt_enable_no_resched();
 
 	/*
-- 
GitLab


From 2cc6a528882d0e0ccbc1bca5f95b8c963cedac54 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Wed, 7 Dec 2022 11:46:35 +0800
Subject: [PATCH 3633/4122] tracing/hist: Fix wrong return value in
 parse_action_params()

When number of synth fields is more than SYNTH_FIELDS_MAX,
parse_action_params() should return -EINVAL.

Link: https://lore.kernel.org/linux-trace-kernel/20221207034635.2253990-1-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <zanussi@kernel.org>
Cc: stable@vger.kernel.org
Fixes: c282a386a397 ("tracing: Add 'onmatch' hist trigger action support")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index a0cd118af5272..b4ad86c22b43a 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -3609,6 +3609,7 @@ static int parse_action_params(struct trace_array *tr, char *params,
 	while (params) {
 		if (data->n_params >= SYNTH_FIELDS_MAX) {
 			hist_err(tr, HIST_ERR_TOO_MANY_PARAMS, 0);
+			ret = -EINVAL;
 			goto out;
 		}
 
-- 
GitLab


From 82470f7d9044842618c847a7166de2b7458157a7 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Wed, 7 Dec 2022 11:51:43 +0800
Subject: [PATCH 3634/4122] tracing/hist: Fix out-of-bound write on
 'action_data.var_ref_idx'

When generate a synthetic event with many params and then create a trace
action for it [1], kernel panic happened [2].

It is because that in trace_action_create() 'data->n_params' is up to
SYNTH_FIELDS_MAX (current value is 64), and array 'data->var_ref_idx'
keeps indices into array 'hist_data->var_refs' for each synthetic event
param, but the length of 'data->var_ref_idx' is TRACING_MAP_VARS_MAX
(current value is 16), so out-of-bound write happened when 'data->n_params'
more than 16. In this case, 'data->match_data.event' is overwritten and
eventually cause the panic.

To solve the issue, adjust the length of 'data->var_ref_idx' to be
SYNTH_FIELDS_MAX and add sanity checks to avoid out-of-bound write.

[1]
 # cd /sys/kernel/tracing/
 # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\
int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\
int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\
int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\
int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\
int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\
int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\
int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\
int v63" >> synthetic_events
 # echo 'hist:keys=pid:ts0=common_timestamp.usecs if comm=="bash"' >> \
events/sched/sched_waking/trigger
 # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\
pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,pid,\
pid,pid,pid,pid,pid,pid,pid,pid,pid)" >> events/sched/sched_switch/trigger

[2]
BUG: unable to handle page fault for address: ffff91c900000000
PGD 61001067 P4D 61001067 PUD 0
Oops: 0000 [#1] PREEMPT SMP NOPTI
CPU: 2 PID: 322 Comm: bash Tainted: G        W          6.1.0-rc8+ #229
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
rel-1.15.0-0-g2dd4b9b3f840-prebuilt.qemu.org 04/01/2014
RIP: 0010:strcmp+0xc/0x30
Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0
Call Trace:
 <TASK>
 __find_event_file+0x55/0x90
 action_create+0x76c/0x1060
 event_hist_trigger_parse+0x146d/0x2060
 ? event_trigger_write+0x31/0xd0
 trigger_process_regex+0xbb/0x110
 event_trigger_write+0x6b/0xd0
 vfs_write+0xc8/0x3e0
 ? alloc_fd+0xc0/0x160
 ? preempt_count_add+0x4d/0xa0
 ? preempt_count_add+0x70/0xa0
 ksys_write+0x5f/0xe0
 do_syscall_64+0x3b/0x90
 entry_SYSCALL_64_after_hwframe+0x63/0xcd
RIP: 0033:0x7f1d1d0cf077
Code: 64 89 02 48 c7 c0 ff ff ff ff eb bb 0f 1f 80 00 00 00 00 f3 0f 1e
fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00
f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74
RSP: 002b:00007ffcebb0e568 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 0000000000000143 RCX: 00007f1d1d0cf077
RDX: 0000000000000143 RSI: 00005639265aa7e0 RDI: 0000000000000001
RBP: 00005639265aa7e0 R08: 000000000000000a R09: 0000000000000142
R10: 000056392639c017 R11: 0000000000000246 R12: 0000000000000143
R13: 00007f1d1d1ae6a0 R14: 00007f1d1d1aa4a0 R15: 00007f1d1d1a98a0
 </TASK>
Modules linked in:
CR2: ffff91c900000000
---[ end trace 0000000000000000 ]---
RIP: 0010:strcmp+0xc/0x30
Code: 75 f7 31 d2 44 0f b6 04 16 44 88 04 11 48 83 c2 01 45 84 c0 75 ee
c3 cc cc cc cc 0f 1f 00 31 c0 eb 08 48 83 c0 01 84 d2 74 13 <0f> b6 14
07 3a 14 06 74 ef 19 c0 83 c8 01 c3 cc cc cc cc 31 c3
RSP: 0018:ffff9b3b00f53c48 EFLAGS: 00000246
RAX: 0000000000000000 RBX: ffffffffba958a68 RCX: 0000000000000000
RDX: 0000000000000010 RSI: ffff91c943d33a90 RDI: ffff91c900000000
RBP: ffff91c900000000 R08: 00000018d604b529 R09: 0000000000000000
R10: ffff91c9483eddb1 R11: ffff91ca483eddab R12: ffff91c946171580
R13: ffff91c9479f0538 R14: ffff91c9457c2848 R15: ffff91c9479f0538
FS:  00007f1d1cfbe740(0000) GS:ffff91c9bdc80000(0000)
knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: ffff91c900000000 CR3: 0000000006316000 CR4: 00000000000006e0

Link: https://lore.kernel.org/linux-trace-kernel/20221207035143.2278781-1-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <zanussi@kernel.org>
Cc: stable@vger.kernel.org
Fixes: d380dcde9a07 ("tracing: Fix now invalid var_ref_vals assumption in trace action")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index b4ad86c22b43a..8264b28d5a57e 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -621,7 +621,7 @@ struct action_data {
 	 * event param, and is passed to the synthetic event
 	 * invocation.
 	 */
-	unsigned int		var_ref_idx[TRACING_MAP_VARS_MAX];
+	unsigned int		var_ref_idx[SYNTH_FIELDS_MAX];
 	struct synth_event	*synth_event;
 	bool			use_trace_keyword;
 	char			*synth_event_name;
@@ -2186,7 +2186,9 @@ static struct hist_field *create_var_ref(struct hist_trigger_data *hist_data,
 			return ref_field;
 		}
 	}
-
+	/* Sanity check to avoid out-of-bound write on 'hist_data->var_refs' */
+	if (hist_data->n_var_refs >= TRACING_MAP_VARS_MAX)
+		return NULL;
 	ref_field = create_hist_field(var_field->hist_data, NULL, flags, NULL);
 	if (ref_field) {
 		if (init_var_ref(ref_field, var_field, system, event_name)) {
@@ -3946,6 +3948,10 @@ static int trace_action_create(struct hist_trigger_data *hist_data,
 
 	lockdep_assert_held(&event_mutex);
 
+	/* Sanity check to avoid out-of-bound write on 'data->var_ref_idx' */
+	if (data->n_params > SYNTH_FIELDS_MAX)
+		return -EINVAL;
+
 	if (data->use_trace_keyword)
 		synth_event_name = data->synth_event_name;
 	else
-- 
GitLab


From ff4837f7fe59ff018eca4705a70eca5e0b486b97 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Wed, 7 Dec 2022 17:15:57 +0800
Subject: [PATCH 3635/4122] tracing: Fix issue of missing one synthetic field

The maximum number of synthetic fields supported is defined as
SYNTH_FIELDS_MAX which value currently is 64, but it actually fails
when try to generate a synthetic event with 64 fields by executing like:

  # echo "my_synth_event int v1; int v2; int v3; int v4; int v5; int v6;\
   int v7; int v8; int v9; int v10; int v11; int v12; int v13; int v14;\
   int v15; int v16; int v17; int v18; int v19; int v20; int v21; int v22;\
   int v23; int v24; int v25; int v26; int v27; int v28; int v29; int v30;\
   int v31; int v32; int v33; int v34; int v35; int v36; int v37; int v38;\
   int v39; int v40; int v41; int v42; int v43; int v44; int v45; int v46;\
   int v47; int v48; int v49; int v50; int v51; int v52; int v53; int v54;\
   int v55; int v56; int v57; int v58; int v59; int v60; int v61; int v62;\
   int v63; int v64" >> /sys/kernel/tracing/synthetic_events

Correct the field counting to fix it.

Link: https://lore.kernel.org/linux-trace-kernel/20221207091557.3137904-1-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <zanussi@kernel.org>
Cc: stable@vger.kernel.org
Fixes: c9e759b1e845 ("tracing: Rework synthetic event command parsing")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_synth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_synth.c b/kernel/trace/trace_events_synth.c
index c3b582d19b620..67592eed0be8d 100644
--- a/kernel/trace/trace_events_synth.c
+++ b/kernel/trace/trace_events_synth.c
@@ -1282,12 +1282,12 @@ static int __create_synth_event(const char *name, const char *raw_fields)
 				goto err_free_arg;
 			}
 
-			fields[n_fields++] = field;
 			if (n_fields == SYNTH_FIELDS_MAX) {
 				synth_err(SYNTH_ERR_TOO_MANY_FIELDS, 0);
 				ret = -EINVAL;
 				goto err_free_arg;
 			}
+			fields[n_fields++] = field;
 
 			n_fields_this_loop++;
 		}
-- 
GitLab


From 608c6ed3337850c767ab0dd6c583477922233e29 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Wed, 7 Dec 2022 21:53:26 +0800
Subject: [PATCH 3636/4122] tracing/hist: Fix issue of losting command info in
 error_log

When input some constructed invalid 'trigger' command, command info
in 'error_log' are lost [1].

The root cause is that there is a path that event_hist_trigger_parse()
is recursely called once and 'last_cmd' which save origin command is
cleared, then later calling of hist_err() will no longer record origin
command info:

  event_hist_trigger_parse() {
    last_cmd_set()  // <1> 'last_cmd' save origin command here at first
    create_actions() {
      onmatch_create() {
        action_create() {
          trace_action_create() {
            trace_action_create_field_var() {
              create_field_var_hist() {
                event_hist_trigger_parse() {  // <2> recursely called once
                  hist_err_clear()  // <3> 'last_cmd' is cleared here
                }
                hist_err()  // <4> No longer find origin command!!!

Since 'glob' is empty string while running into the recurse call, we
can trickly check it and bypass the call of hist_err_clear() to solve it.

[1]
 # cd /sys/kernel/tracing
 # echo "my_synth_event int v1; int v2; int v3;" >> synthetic_events
 # echo 'hist:keys=pid' >> events/sched/sched_waking/trigger
 # echo "hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(\
pid,pid1)" >> events/sched/sched_switch/trigger
 # cat error_log
[  8.405018] hist:sched:sched_switch: error: Couldn't find synthetic event
  Command:
hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1)
                                                          ^
[  8.816902] hist:sched:sched_switch: error: Couldn't find field
  Command:
hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1)
                          ^
[  8.816902] hist:sched:sched_switch: error: Couldn't parse field variable
  Command:
hist:keys=next_pid:onmatch(sched.sched_waking).my_synth_event(pid,pid1)
                          ^
[  8.999880] : error: Couldn't find field
  Command:
           ^
[  8.999880] : error: Couldn't parse field variable
  Command:
           ^
[  8.999880] : error: Couldn't find field
  Command:
           ^
[  8.999880] : error: Couldn't create histogram for field
  Command:
           ^

Link: https://lore.kernel.org/linux-trace-kernel/20221207135326.3483216-1-zhengyejian1@huawei.com

Cc: <mhiramat@kernel.org>
Cc: <zanussi@kernel.org>
Fixes: f404da6e1d46 ("tracing: Add 'last error' error facility for hist triggers")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_hist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c
index 8264b28d5a57e..fcaf226b77446 100644
--- a/kernel/trace/trace_events_hist.c
+++ b/kernel/trace/trace_events_hist.c
@@ -6576,7 +6576,7 @@ enable:
 	if (se)
 		se->ref++;
  out:
-	if (ret == 0)
+	if (ret == 0 && glob[0])
 		hist_err_clear();
 
 	return ret;
-- 
GitLab


From 88ca6a71dcab4a4ba6e6e2ff66415a5c4f86e874 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Fri, 9 Dec 2022 10:11:51 -0500
Subject: [PATCH 3637/4122] ring-buffer: Handle resize in early boot up

With the new command line option that allows trace event triggers to be
added at boot, the "snapshot" trigger will allocate the snapshot buffer
very early, when interrupts can not be enabled. Allocating the ring buffer
is not the problem, but it also resizes it, which is, as the resize code
does synchronization that can not be preformed at early boot.

To handle this, first change the raw_spin_lock_irq() in rb_insert_pages()
to raw_spin_lock_irqsave(), such that the unlocking of that spin lock will
not enable interrupts.

Next, where it calls schedule_work_on(), disable migration and check if
the CPU to update is the current CPU, and if so, perform the work
directly, otherwise re-enable migration and call the schedule_work_on() to
the CPU that is being updated. The rb_insert_pages() just needs to be run
on the CPU that it is updating, and does not need preemption nor
interrupts disabled when calling it.

Link: https://lore.kernel.org/lkml/Y5J%2FCajlNh1gexvo@google.com/
Link: https://lore.kernel.org/linux-trace-kernel/20221209101151.1fec1167@gandalf.local.home
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>

Fixes: a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option")
Reported-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Tested-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ring_buffer.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 843818ee4814e..c366a0a9ddba4 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2062,8 +2062,10 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct list_head *pages = &cpu_buffer->new_pages;
 	int retries, success;
+	unsigned long flags;
 
-	raw_spin_lock_irq(&cpu_buffer->reader_lock);
+	/* Can be called at early boot up, where interrupts must not been enabled */
+	raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 	/*
 	 * We are holding the reader lock, so the reader page won't be swapped
 	 * in the ring buffer. Now we are racing with the writer trying to
@@ -2120,7 +2122,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer)
 	 * tracing
 	 */
 	RB_WARN_ON(cpu_buffer, !success);
-	raw_spin_unlock_irq(&cpu_buffer->reader_lock);
+	raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
 	/* free pages if they weren't inserted */
 	if (!success) {
@@ -2248,8 +2250,16 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 				rb_update_pages(cpu_buffer);
 				cpu_buffer->nr_pages_to_update = 0;
 			} else {
-				schedule_work_on(cpu,
-						&cpu_buffer->update_pages_work);
+				/* Run directly if possible. */
+				migrate_disable();
+				if (cpu != smp_processor_id()) {
+					migrate_enable();
+					schedule_work_on(cpu,
+							 &cpu_buffer->update_pages_work);
+				} else {
+					update_pages_handler(&cpu_buffer->update_pages_work);
+					migrate_enable();
+				}
 			}
 		}
 
@@ -2298,9 +2308,17 @@ int ring_buffer_resize(struct trace_buffer *buffer, unsigned long size,
 		if (!cpu_online(cpu_id))
 			rb_update_pages(cpu_buffer);
 		else {
-			schedule_work_on(cpu_id,
-					 &cpu_buffer->update_pages_work);
-			wait_for_completion(&cpu_buffer->update_done);
+			/* Run directly if possible. */
+			migrate_disable();
+			if (cpu_id == smp_processor_id()) {
+				rb_update_pages(cpu_buffer);
+				migrate_enable();
+			} else {
+				migrate_enable();
+				schedule_work_on(cpu_id,
+						 &cpu_buffer->update_pages_work);
+				wait_for_completion(&cpu_buffer->update_done);
+			}
 		}
 
 		cpu_buffer->nr_pages_to_update = 0;
-- 
GitLab


From 98629dadcd020cba690ce0c49a564a1166444646 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <zwisler@chromium.org>
Date: Thu, 8 Dec 2022 17:33:10 -0700
Subject: [PATCH 3638/4122] tracing: remove unnecessary trace_trigger ifdef

The trace_trigger command line option introduced by
commit a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option")
doesn't need to depend on the CONFIG_HIST_TRIGGERS kernel config option.

This code doesn't depend on the histogram code, and the run-time
selection of triggers is usable without CONFIG_HIST_TRIGGERS.

Link: https://lore.kernel.org/linux-trace-kernel/20221209003310.1737039-1-zwisler@google.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Joel Fernandes <joel@joelfernandes.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Fixes: a01fdc897fa5 ("tracing: Add trace_trigger kernel command line option")
Signed-off-by: Ross Zwisler <zwisler@google.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3bfaf560ecc46..33e0b4f8ebe62 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2796,7 +2796,6 @@ trace_create_new_event(struct trace_event_call *call,
 	return file;
 }
 
-#ifdef CONFIG_HIST_TRIGGERS
 #define MAX_BOOT_TRIGGERS 32
 
 static struct boot_triggers {
@@ -2832,7 +2831,6 @@ static __init int setup_trace_triggers(char *str)
 	return 1;
 }
 __setup("trace_trigger=", setup_trace_triggers);
-#endif
 
 /* Add an event to a trace directory */
 static int
@@ -2850,7 +2848,6 @@ __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
 		return event_define_fields(call);
 }
 
-#ifdef CONFIG_HIST_TRIGGERS
 static void trace_early_triggers(struct trace_event_file *file, const char *name)
 {
 	int ret;
@@ -2868,9 +2865,6 @@ static void trace_early_triggers(struct trace_event_file *file, const char *name
 			       bootup_triggers[i].event);
 	}
 }
-#else
-static inline void trace_early_triggers(struct trace_event_file *file, const char *name) { }
-#endif
 
 /*
  * Just create a descriptor for early init. A descriptor is required
-- 
GitLab


From ec370890f92ba8ad5476a34068655b06ba48def7 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Mon, 5 Dec 2022 09:27:38 +0100
Subject: [PATCH 3639/4122] tracing/osnoise: Make osnoise_options static

Make osnoise_options static, as reported by the kernel test robot.

Link: https://lkml.kernel.org/r/63255826485400d7a2270e9c5e66111079671e7a.1670228712.git.bristot@kernel.org

Reported-by: kernel test robot <lkp@intel.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 3f10dd1f2f1ce..8ba82c71268f7 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -59,8 +59,8 @@ enum osnoise_options_index {
 
 static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" };
 
-#define OSN_DEFAULT_OPTIONS	0x2
-unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
+#define OSN_DEFAULT_OPTIONS		0x2
+static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
 
 /*
  * trace_array of the enabled osnoise/timerlat instances.
-- 
GitLab


From bfd5a5e82d22da43afa0e2bb9fb72339aa79c6cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 5 Dec 2022 10:21:52 +0000
Subject: [PATCH 3640/4122] tracing: Fix some checker warnings

Fix some checker warnings in the trace code by adding __printf attributes
to a number of trace functions and their declarations.

Changes:
========
ver #2)
 - Dropped the fix for the unconditional tracing_max_lat_fops decl[1].

Link: https://lore.kernel.org/r/20221205180617.9b9d3971cbe06ee536603523@kernel.org/ [1]
Link: https://lore.kernel.org/r/166992525941.1716618.13740663757583361463.stgit@warthog.procyon.org.uk/ # v1
Link: https://lkml.kernel.org/r/167023571258.382307.15314866482834835192.stgit@warthog.procyon.org.uk

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/linux/trace_events.h | 3 ++-
 include/linux/trace_seq.h    | 3 ++-
 kernel/trace/trace.h         | 2 +-
 kernel/trace/trace_output.c  | 5 +++--
 4 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index bb2053246d6a2..4342e996bcdb7 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -234,7 +234,8 @@ void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
 void tracing_record_cmdline(struct task_struct *task);
 void tracing_record_tgid(struct task_struct *task);
 
-int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...);
+int trace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
+	 __printf(3, 4);
 
 struct event_filter;
 
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 5a2c650d9e1c1..0c4c7587d6c36 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -97,7 +97,8 @@ extern int trace_seq_hex_dump(struct trace_seq *s, const char *prefix_str,
 			      const void *buf, size_t len, bool ascii);
 
 #else /* CONFIG_TRACING */
-static inline void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+static inline __printf(2, 3)
+void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
 }
 static inline void
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9dc920b01c17f..e46a49269be20 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -614,7 +614,7 @@ void trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
 bool trace_is_tracepoint_string(const char *str);
 const char *trace_event_format(struct trace_iterator *iter, const char *fmt);
 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
-			 va_list ap);
+			 va_list ap) __printf(2, 0);
 
 int trace_empty(struct trace_iterator *iter);
 
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index f0ba97121345b..57a13b61f1865 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -322,8 +322,9 @@ void trace_event_printf(struct trace_iterator *iter, const char *fmt, ...)
 }
 EXPORT_SYMBOL(trace_event_printf);
 
-static int trace_output_raw(struct trace_iterator *iter, char *name,
-			    char *fmt, va_list ap)
+static __printf(3, 0)
+int trace_output_raw(struct trace_iterator *iter, char *name,
+		     char *fmt, va_list ap)
 {
 	struct trace_seq *s = &iter->seq;
 
-- 
GitLab


From 0e162c6f1c8f7f502b3978f5a37ed6fa2e4d21f9 Mon Sep 17 00:00:00 2001
From: Bagas Sanjaya <bagasdotme@gmail.com>
Date: Fri, 25 Nov 2022 10:43:00 +0700
Subject: [PATCH 3641/4122] Documentation/osnoise: Escape underscore of NO_
 prefix

kernel test robot reported unknown target name warning:

Documentation/trace/osnoise-tracer.rst:112: WARNING: Unknown target name: "no".

The warning causes NO_ prefix to be rendered as link text instead, which
points to non-existent link target.

Escape the prefix underscore to fix the warning.

Link: https://lkml.kernel.org/r/20221125034300.24168-1-bagasdotme@gmail.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Cc: GNU/Weeb Mailing List <gwml@vger.gnuweeb.org>
Link: https://lore.kernel.org/linux-doc/202211240447.HxRNftE5-lkp@intel.com/
Fixes: 67543cd6b8eee5 ("Documentation/osnoise: Add osnoise/options documentation")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
Acked-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/osnoise-tracer.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index 3c675ed82b27a..fdd562d7c22d1 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -111,7 +111,7 @@ The tracer has a set of options inside the osnoise directory, they are:
    be used, which is currently 5 us.
  - osnoise/options: a set of on/off options that can be enabled by
    writing the option name to the file or disabled by writing the option
-   name preceded with the 'NO_' prefix. For example, writing
+   name preceded with the 'NO\_' prefix. For example, writing
    NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The
    special DEAFAULTS option resets all options to the default value.
 
-- 
GitLab


From 1603dda47714cebe8a29b2154407da7a929d13f4 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Fri, 9 Dec 2022 23:05:52 +0100
Subject: [PATCH 3642/4122] tracing/osnoise: Add PANIC_ON_STOP option

Often the latency observed in a CPU is not caused by the work being done
in the CPU itself, but by work done on another CPU that causes the
hardware to stall all CPUs. In this case, it is interesting to know
what is happening on ALL CPUs, and the best way to do this is via
crash dump analysis.

Add the PANIC_ON_STOP option to osnoise/timerlat tracers. The default
behavior is having this option off. When enabled by the user, the system
will panic after hitting a stop tracing condition.

This option was motivated by a real scenario that Juri Lelli and I
were debugging.

Link: https://lkml.kernel.org/r/249ce4287c6725543e6db845a6e0df621dc67db5.1670623111.git.bristot@kernel.org

Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 8ba82c71268f7..5a7613942223f 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -54,10 +54,11 @@
 enum osnoise_options_index {
 	OSN_DEFAULTS = 0,
 	OSN_WORKLOAD,
+	OSN_PANIC_ON_STOP,
 	OSN_MAX
 };
 
-static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD" };
+static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD", "PANIC_ON_STOP" };
 
 #define OSN_DEFAULT_OPTIONS		0x2
 static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
@@ -1270,6 +1271,9 @@ static __always_inline void osnoise_stop_tracing(void)
 		trace_array_printk_buf(tr->array_buffer.buffer, _THIS_IP_,
 				"stop tracing hit on cpu %d\n", smp_processor_id());
 
+		if (test_bit(OSN_PANIC_ON_STOP, &osnoise_options))
+			panic("tracer hit stop condition on CPU %d\n", smp_processor_id());
+
 		tracer_tracing_off(tr);
 	}
 	rcu_read_unlock();
-- 
GitLab


From b5dce2002567a9b1a83ef3e3a8678d8c32be2a78 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Fri, 9 Dec 2022 23:05:53 +0100
Subject: [PATCH 3643/4122] tracing/osnoise: Add preempt and/or irq disabled
 options

The osnoise workload runs with preemption and IRQs enabled in such
a way as to allow all sorts of noise to disturb osnoise's execution.
hwlat tracer has a similar workload but works with irq disabled,
allowing only NMIs and the hardware to generate noise.

While thinking about adding an options file to hwlat tracer to
allow the system to panic, and other features I was thinking
to add, like having a tracepoint at each noise detection, it
came to my mind that is easier to make osnoise and also do
hardware latency detection than making hwlat "feature compatible"
with osnoise.

Other points are:
 - osnoise already has an independent cpu file.
 - osnoise has a more intuitive interface, e.g., runtime/period vs.
   window/width (and people often need help remembering what it is).
 - osnoise: tracepoints
 - osnoise stop options
 - osnoise options file itself

Moreover, the user-space side (in rtla) is simplified by reusing the
existing osnoise code.

Finally, people have been asking me about using osnoise for hw latency
detection, and I have to explain that it was sufficient but not
necessary. These options make it sufficient and necessary.

Adding a Suggested-by Clark, as he often asked me about this
possibility.

Link: https://lkml.kernel.org/r/d9c6c19135497054986900f94c8e47410b15316a.1670623111.git.bristot@kernel.org

Cc: Suggested-by: Clark Williams <williams@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_osnoise.c | 48 ++++++++++++++++++++++++++++++++----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 5a7613942223f..94c1b5eb1dc02 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -55,10 +55,17 @@ enum osnoise_options_index {
 	OSN_DEFAULTS = 0,
 	OSN_WORKLOAD,
 	OSN_PANIC_ON_STOP,
+	OSN_PREEMPT_DISABLE,
+	OSN_IRQ_DISABLE,
 	OSN_MAX
 };
 
-static const char * const osnoise_options_str[OSN_MAX] = { "DEFAULTS", "OSNOISE_WORKLOAD", "PANIC_ON_STOP" };
+static const char * const osnoise_options_str[OSN_MAX] = {
+							"DEFAULTS",
+							"OSNOISE_WORKLOAD",
+							"PANIC_ON_STOP",
+							"OSNOISE_PREEMPT_DISABLE",
+							"OSNOISE_IRQ_DISABLE" };
 
 #define OSN_DEFAULT_OPTIONS		0x2
 static unsigned long osnoise_options	= OSN_DEFAULT_OPTIONS;
@@ -1308,18 +1315,26 @@ static void notify_new_max_latency(u64 latency)
  */
 static int run_osnoise(void)
 {
+	bool disable_irq = test_bit(OSN_IRQ_DISABLE, &osnoise_options);
 	struct osnoise_variables *osn_var = this_cpu_osn_var();
 	u64 start, sample, last_sample;
 	u64 last_int_count, int_count;
 	s64 noise = 0, max_noise = 0;
 	s64 total, last_total = 0;
 	struct osnoise_sample s;
+	bool disable_preemption;
 	unsigned int threshold;
 	u64 runtime, stop_in;
 	u64 sum_noise = 0;
 	int hw_count = 0;
 	int ret = -1;
 
+	/*
+	 * Disabling preemption is only required if IRQs are enabled,
+	 * and the options is set on.
+	 */
+	disable_preemption = !disable_irq && test_bit(OSN_PREEMPT_DISABLE, &osnoise_options);
+
 	/*
 	 * Considers the current thread as the workload.
 	 */
@@ -1335,6 +1350,15 @@ static int run_osnoise(void)
 	 */
 	threshold = tracing_thresh ? : 5000;
 
+	/*
+	 * Apply PREEMPT and IRQ disabled options.
+	 */
+	if (disable_irq)
+		local_irq_disable();
+
+	if (disable_preemption)
+		preempt_disable();
+
 	/*
 	 * Make sure NMIs see sampling first
 	 */
@@ -1422,16 +1446,21 @@ static int run_osnoise(void)
 		 * cond_resched()
 		 */
 		if (IS_ENABLED(CONFIG_PREEMPT_RCU)) {
-			local_irq_disable();
+			if (!disable_irq)
+				local_irq_disable();
+
 			rcu_momentary_dyntick_idle();
-			local_irq_enable();
+
+			if (!disable_irq)
+				local_irq_enable();
 		}
 
 		/*
 		 * For the non-preemptive kernel config: let threads runs, if
-		 * they so wish.
+		 * they so wish, unless set not do to so.
 		 */
-		cond_resched();
+		if (!disable_irq && !disable_preemption)
+			cond_resched();
 
 		last_sample = sample;
 		last_int_count = int_count;
@@ -1450,6 +1479,15 @@ static int run_osnoise(void)
 	 */
 	barrier();
 
+	/*
+	 * Return to the preemptive state.
+	 */
+	if (disable_preemption)
+		preempt_enable();
+
+	if (disable_irq)
+		local_irq_enable();
+
 	/*
 	 * Save noise info.
 	 */
-- 
GitLab


From d358dfe60b7724ad0acb8cf8375a608b983e2b59 Mon Sep 17 00:00:00 2001
From: Daniel Bristot de Oliveira <bristot@kernel.org>
Date: Fri, 9 Dec 2022 23:05:54 +0100
Subject: [PATCH 3644/4122] Documentation/osnoise: Add osnoise/options
 documentation

Add the documentation about the osnoise/options file, the options,
and some additional explanation about the OSNOISE_WORKLOAD option.

Link: https://lkml.kernel.org/r/fde5567a4bae364f67fd1e9a644d1d62862618a6.1670623111.git.bristot@kernel.org

Cc: Daniel Bristot de Oliveira <bristot@kernel.org>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Clark Williams <williams@redhat.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Daniel Bristot de Oliveira <bristot@kernel.org>
Reviewed-by: Bagas Sanjaya <bagasdotme@gmail.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 Documentation/trace/osnoise-tracer.rst | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/Documentation/trace/osnoise-tracer.rst b/Documentation/trace/osnoise-tracer.rst
index fdd562d7c22d1..140ef2533d26a 100644
--- a/Documentation/trace/osnoise-tracer.rst
+++ b/Documentation/trace/osnoise-tracer.rst
@@ -92,8 +92,8 @@ Note that the example above shows a high number of HW noise samples.
 The reason being is that this sample was taken on a virtual machine,
 and the host interference is detected as a hardware interference.
 
-Tracer options
----------------------
+Tracer Configuration
+--------------------
 
 The tracer has a set of options inside the osnoise directory, they are:
 
@@ -115,6 +115,22 @@ The tracer has a set of options inside the osnoise directory, they are:
    NO_OSNOISE_WORKLOAD disables the OSNOISE_WORKLOAD option. The
    special DEAFAULTS option resets all options to the default value.
 
+Tracer Options
+--------------
+
+The osnoise/options file exposes a set of on/off configuration options for
+the osnoise tracer. These options are:
+
+ - DEFAULTS: reset the options to the default value.
+ - OSNOISE_WORKLOAD: do not dispatch osnoise workload (see dedicated
+   section below).
+ - PANIC_ON_STOP: call panic() if the tracer stops. This option serves to
+   capture a vmcore.
+ - OSNOISE_PREEMPT_DISABLE: disable preemption while running the osnoise
+   workload, allowing only IRQ and hardware-related noise.
+ - OSNOISE_IRQ_DISABLE: disable IRQs while running the osnoise workload,
+   allowing only NMIs and hardware-related noise, like hwlat tracer.
+
 Additional Tracing
 ------------------
 
-- 
GitLab


From 5fb733d7bd6949e90028efdce8bd528c6ab7cf1e Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Wed, 23 Nov 2022 09:48:05 +0800
Subject: [PATCH 3645/4122] rtc: st-lpc: Add missing clk_disable_unprepare in
 st_rtc_probe()

The clk_disable_unprepare() should be called in the error handling
of clk_get_rate(), fix it.

Fixes: b5b2bdfc2893 ("rtc: st: Add new driver for ST's LPC RTC")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20221123014805.1993052-1-cuigaosheng1@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-st-lpc.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/rtc/rtc-st-lpc.c b/drivers/rtc/rtc-st-lpc.c
index bdb20f63254e2..0f8e4231098ef 100644
--- a/drivers/rtc/rtc-st-lpc.c
+++ b/drivers/rtc/rtc-st-lpc.c
@@ -238,6 +238,7 @@ static int st_rtc_probe(struct platform_device *pdev)
 
 	rtc->clkrate = clk_get_rate(rtc->clk);
 	if (!rtc->clkrate) {
+		clk_disable_unprepare(rtc->clk);
 		dev_err(&pdev->dev, "Unable to fetch clock rate\n");
 		return -EINVAL;
 	}
-- 
GitLab


From 90cd5c88830140c9fade92a8027e0fb2c6e4cc49 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Wed, 23 Nov 2022 09:59:53 +0800
Subject: [PATCH 3646/4122] rtc: pic32: Move devm_rtc_allocate_device earlier
 in pic32_rtc_probe()

The pic32_rtc_enable(pdata, 0) and clk_disable_unprepare(pdata->clk)
should be called in the error handling of devm_rtc_allocate_device(),
so we should move devm_rtc_allocate_device earlier in pic32_rtc_probe()
to fix it.

Fixes: 6515e23b9fde ("rtc: pic32: convert to devm_rtc_allocate_device")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Link: https://lore.kernel.org/r/20221123015953.1998521-1-cuigaosheng1@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pic32.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/rtc/rtc-pic32.c b/drivers/rtc/rtc-pic32.c
index 7fb9145c43bd5..fa351ac201587 100644
--- a/drivers/rtc/rtc-pic32.c
+++ b/drivers/rtc/rtc-pic32.c
@@ -324,16 +324,16 @@ static int pic32_rtc_probe(struct platform_device *pdev)
 
 	spin_lock_init(&pdata->alarm_lock);
 
+	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
+	if (IS_ERR(pdata->rtc))
+		return PTR_ERR(pdata->rtc);
+
 	clk_prepare_enable(pdata->clk);
 
 	pic32_rtc_enable(pdata, 1);
 
 	device_init_wakeup(&pdev->dev, 1);
 
-	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
-	if (IS_ERR(pdata->rtc))
-		return PTR_ERR(pdata->rtc);
-
 	pdata->rtc->ops = &pic32_rtcops;
 	pdata->rtc->range_min = RTC_TIMESTAMP_BEGIN_2000;
 	pdata->rtc->range_max = RTC_TIMESTAMP_END_2099;
-- 
GitLab


From 800b55b4dc62c4348fbc1f7570a8ac8be3f0eb66 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Fri, 18 Nov 2022 15:33:32 +0100
Subject: [PATCH 3647/4122] dt-bindings: rtc: convert rtc-meson.txt to
 dt-schema

Convert the Amlogic Meson6 RTC bindings to dt-schema.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221117-b4-amlogic-bindings-convert-v1-6-3f025599b968@linaro.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../bindings/rtc/amlogic,meson6-rtc.yaml      | 62 +++++++++++++++++++
 .../devicetree/bindings/rtc/rtc-meson.txt     | 35 -----------
 2 files changed, 62 insertions(+), 35 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml
 delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-meson.txt

diff --git a/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml
new file mode 100644
index 0000000000000..8bf7d3a9be98c
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/amlogic,meson6-rtc.yaml
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/amlogic,meson6-rtc.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC
+
+maintainers:
+  - Neil Armstrong <neil.armstrong@linaro.org>
+  - Martin Blumenstingl <martin.blumenstingl@googlemail.com>
+
+allOf:
+  - $ref: rtc.yaml#
+  - $ref: /schemas/nvmem/nvmem.yaml#
+
+properties:
+  compatible:
+    enum:
+      - amlogic,meson6-rtc
+      - amlogic,meson8-rtc
+      - amlogic,meson8b-rtc
+      - amlogic,meson8m2-rtc
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  vdd-supply: true
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    rtc: rtc@740 {
+        compatible = "amlogic,meson6-rtc";
+        reg = <0x740 0x14>;
+        interrupts = <GIC_SPI 72 IRQ_TYPE_EDGE_RISING>;
+        clocks = <&rtc32k_xtal>;
+        vdd-supply = <&rtc_vdd>;
+        resets = <&reset_rtc>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+
+        mac@0 {
+            reg = <0 6>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/rtc/rtc-meson.txt b/Documentation/devicetree/bindings/rtc/rtc-meson.txt
deleted file mode 100644
index e921fe66a3622..0000000000000
--- a/Documentation/devicetree/bindings/rtc/rtc-meson.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-* Amlogic Meson6, Meson8, Meson8b and Meson8m2 RTC
-
-Required properties:
-- compatible: should be one of the following describing the hardware:
-	* "amlogic,meson6-rtc"
-	* "amlogic,meson8-rtc"
-	* "amlogic,meson8b-rtc"
-	* "amlogic,meson8m2-rtc"
-
-- reg: physical register space for the controller's memory mapped registers.
-- interrupts: the interrupt line of the RTC block.
-- clocks: reference to the external 32.768kHz crystal oscillator.
-- vdd-supply: reference to the power supply of the RTC block.
-- resets: reset controller reference to allow reset of the controller
-
-Optional properties for the battery-backed non-volatile memory:
-- #address-cells: should be 1 to address the battery-backed non-volatile memory
-- #size-cells: should be 1 to reference the battery-backed non-volatile memory
-
-Optional child nodes:
-- see ../nvmem/nvmem.txt
-
-Example:
-
-	rtc: rtc@740 {
-		compatible = "amlogic,meson6-rtc";
-		reg = <0x740 0x14>;
-		interrupts = <GIC_SPI 72 IRQ_TYPE_EDGE_RISING>;
-		clocks = <&rtc32k_xtal>;
-		vdd-supply = <&rtc_vdd>;
-		resets = <&reset RESET_RTC>;
-
-		#address-cells = <1>;
-		#size-cells = <1>;
-	};
-- 
GitLab


From e42f9c2e6aad583986e91979bf2fce47aaced1c2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 9 Dec 2022 10:21:56 -0400
Subject: [PATCH 3648/4122] RDMA: Add missed netdev_put() for the
 netdevice_tracker

The netdev core will detect if any untracked puts are done on tracked
pointers and throw refcount warnings:

  refcount_t: decrement hit 0; leaking memory.
  WARNING: CPU: 1 PID: 33 at lib/refcount.c:31 refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31
  Modules linked in:
  CPU: 1 PID: 33 Comm: kworker/u4:2 Not tainted 6.1.0-rc8-next-20221207-syzkaller #0
  Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022
  Workqueue: ib-unreg-wq ib_unregister_work
  RIP: 0010:refcount_warn_saturate+0x1d7/0x1f0 lib/refcount.c:31
  Code: 05 5a 60 51 0a 01 e8 35 0a b5 05 0f 0b e9 d3 fe ff ff e8 6c 9b 75 fd 48 c7 c7 c0 6d a6 8a c6 05 37 60 51 0a 01 e8 16 0a b5 05 <0f> 0b e9 b4 fe
  +ff ff 48 89 ef e8 5a b5 c3 fd e9 5c fe ff ff 0f 1f
  RSP: 0018:ffffc90000aa7b30 EFLAGS: 00010082
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
  RDX: ffff8880172f9d40 RSI: ffffffff8166b1dc RDI: fffff52000154f58
  RBP: ffff88807906c600 R08: 0000000000000005 R09: 0000000000000000
  R10: 0000000080000001 R11: 0000000000000000 R12: 1ffff92000154f6b
  R13: 0000000000000000 R14: ffff88807906c600 R15: ffff888046894000
  FS:  0000000000000000(0000) GS:ffff8880b9900000(0000) knlGS:0000000000000000
  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
  CR2: 00007ffe350a8ff8 CR3: 000000007a9e7000 CR4: 00000000003526e0
  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
  Call Trace:
   <TASK>
   __refcount_dec include/linux/refcount.h:344 [inline]
   refcount_dec include/linux/refcount.h:359 [inline]
   ref_tracker_free+0x539/0x6b0 lib/ref_tracker.c:118
   netdev_tracker_free include/linux/netdevice.h:4039 [inline]
   netdev_put include/linux/netdevice.h:4056 [inline]
   dev_put include/linux/netdevice.h:4082 [inline]
   free_netdevs+0x1f8/0x470 drivers/infiniband/core/device.c:2204
   __ib_unregister_device+0xa0/0x1a0 drivers/infiniband/core/device.c:1478
   ib_unregister_work+0x19/0x30 drivers/infiniband/core/device.c:1586
   process_one_work+0x9bf/0x1710 kernel/workqueue.c:2289
   worker_thread+0x669/0x1090 kernel/workqueue.c:2436
   kthread+0x2e8/0x3a0 kernel/kthread.c:376
   ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308

So change the missed dev_put for pdata->netdev to also follow the tracker.

Fixes: 09f530f0c6d6 ("RDMA: Add netdevice_tracker to ib_device_set_netdev()")
Reported-by: syzbot+3fd8326d9a0812d19218@syzkaller.appspotmail.com
Reported-by: syzbot+a1ed8ffe3121380cd5dd@syzkaller.appspotmail.com
Reported-by: syzbot+8d0a099c8a6d1e4e601c@syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/0-v1-e99919867b8d+1e2-netdev_tracker2_jgg@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/core/device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c
index 9211d2794aac7..894c06846224e 100644
--- a/drivers/infiniband/core/device.c
+++ b/drivers/infiniband/core/device.c
@@ -2201,7 +2201,7 @@ static void free_netdevs(struct ib_device *ib_dev)
 			 * comparisons after the put
 			 */
 			rcu_assign_pointer(pdata->netdev, NULL);
-			dev_put(ndev);
+			netdev_put(ndev, &pdata->netdev_tracker);
 		}
 		spin_unlock_irqrestore(&pdata->netdev_lock, flags);
 	}
-- 
GitLab


From efa80b028c7a9c74fd875517aa0fc9fd8d610ed0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 6 Dec 2022 13:07:30 +0900
Subject: [PATCH 3649/4122] kbuild: move -Werror from KBUILD_CFLAGS to
 KBUILD_CPPFLAGS

CONFIG_WERROR turns warnings into errors, which happens only for *.c
files because -Werror is added to KBUILD_CFLAGS.

Adding it to KBUILD_CPPFLAGS makes more sense because preprocessors
understand the -Werror option.

For example, you can put a #warning directive in any preprocessed code.

    warning: #warning "this is a warning message" [-Wcpp]

If -Werror is added, it is promoted to an error.

    error: #warning "this is a warning message" [-Werror=cpp]

This commit moves -Werror to KBUILD_CPPFLAGS so it works in the same way
for *.c, *.S, *.lds.S or whatever needs preprocessing.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
---
 Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index fbd9ff4a61e70..6b047daa46cc1 100644
--- a/Makefile
+++ b/Makefile
@@ -866,7 +866,8 @@ stackp-flags-$(CONFIG_STACKPROTECTOR_STRONG)      := -fstack-protector-strong
 
 KBUILD_CFLAGS += $(stackp-flags-y)
 
-KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CPPFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CPPFLAGS += $(KBUILD_CPPFLAGS-y)
 KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
 
 KBUILD_RUSTFLAGS-$(CONFIG_WERROR) += -Dwarnings
-- 
GitLab


From 80b6093b55e31c2c40ff082fb32523d4e852954f Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 6 Dec 2022 13:07:31 +0900
Subject: [PATCH 3650/4122] kbuild: add -Wundef to KBUILD_CPPFLAGS for W=1
 builds

The use of an undefined macro in an #if directive is warned, but only
in *.c files. No warning from other files such as *.S, *.lds.S.

Since -Wundef is a preprocessor-related warning, it should be added to
KBUILD_CPPFLAGS instead of KBUILD_CFLAGS.

My previous attempt [1] uncovered several issues. I could not finish
fixing them all.

This commit adds -Wundef to KBUILD_CPPFLAGS for W=1 builds in order to
block new breakages. (The kbuild test robot tests with W=1)

We can fix the warnings one by one. After fixing all of them, we can
make it default in the top Makefile, and remove -Wundef from
KBUILD_CFLAGS.

[1]: https://lore.kernel.org/all/20221012180118.331005-2-masahiroy@kernel.org/

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 scripts/Makefile.extrawarn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/scripts/Makefile.extrawarn b/scripts/Makefile.extrawarn
index 6bbba36c59695..40cd13eca82e8 100644
--- a/scripts/Makefile.extrawarn
+++ b/scripts/Makefile.extrawarn
@@ -38,6 +38,7 @@ KBUILD_CFLAGS += -Wno-sign-compare
 KBUILD_CFLAGS += -Wno-type-limits
 KBUILD_CFLAGS += -Wno-shift-negative-value
 
+KBUILD_CPPFLAGS += -Wundef
 KBUILD_CPPFLAGS += -DKBUILD_EXTRA_WARN1
 
 else
-- 
GitLab


From dbc94a0fb81771a38733c0e8f2ea8c4fa6934dc1 Mon Sep 17 00:00:00 2001
From: Dragos Tatulea <dtatulea@nvidia.com>
Date: Thu, 8 Dec 2022 09:52:54 +0200
Subject: [PATCH 3651/4122] IB/IPoIB: Fix queue count inconsistency for PKEY
 child interfaces

There are 2 ways to create IPoIB PKEY child interfaces:
1) Writing a PKEY to /sys/class/net/<ib parent interface>/create_child.
2) Using netlink with iproute.

While with sysfs the child interface has the same number of tx and
rx queues as the parent, with netlink there will always be 1 tx
and 1 rx queue for the child interface. That's because the
get_num_tx/rx_queues() netlink ops are missing and the default value
of 1 is taken for the number of queues (in rtnl_create_link()).

This change adds the get_num_tx/rx_queues() ops which allows for
interfaces with multiple queues to be created over netlink. This
constant only represents the max number of tx and rx queues on that
net device.

Fixes: 9baa0b036410 ("IB/ipoib: Add rtnl_link_ops support")
Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com>
Link: https://lore.kernel.org/r/f4a42c8aa43c02d5ae5559a60c3e5e0f18c82531.1670485816.git.leonro@nvidia.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
---
 drivers/infiniband/ulp/ipoib/ipoib_netlink.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
index ea16ba5d8da6c..9ad8d98562752 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_netlink.c
@@ -41,6 +41,11 @@ static const struct nla_policy ipoib_policy[IFLA_IPOIB_MAX + 1] = {
 	[IFLA_IPOIB_UMCAST]	= { .type = NLA_U16 },
 };
 
+static unsigned int ipoib_get_max_num_queues(void)
+{
+	return min_t(unsigned int, num_possible_cpus(), 128);
+}
+
 static int ipoib_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
 	struct ipoib_dev_priv *priv = ipoib_priv(dev);
@@ -172,6 +177,8 @@ static struct rtnl_link_ops ipoib_link_ops __read_mostly = {
 	.changelink	= ipoib_changelink,
 	.get_size	= ipoib_get_size,
 	.fill_info	= ipoib_fill_info,
+	.get_num_rx_queues = ipoib_get_max_num_queues,
+	.get_num_tx_queues = ipoib_get_max_num_queues,
 };
 
 struct rtnl_link_ops *ipoib_get_link_ops(void)
-- 
GitLab


From 37ba7b005a7a4454046bd8659c7a9c5330552396 Mon Sep 17 00:00:00 2001
From: Namjae Jeon <linkinjeon@kernel.org>
Date: Sat, 29 Oct 2022 00:01:38 +0900
Subject: [PATCH 3652/4122] ksmbd: set SMB2_SESSION_FLAG_ENCRYPT_DATA when
 enforcing data encryption for this share

Currently, SMB2_SESSION_FLAG_ENCRYPT_DATA is always set session setup
response. Since this forces data encryption from the client, there is a
problem that data is always encrypted regardless of the use of the cifs
seal mount option. SMB2_SESSION_FLAG_ENCRYPT_DATA should be set according
to KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION flags, and in case of
KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF, encryption mode is turned off for
all connections.

Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/ksmbd_netlink.h |  1 +
 fs/ksmbd/smb2ops.c       | 10 ++++++++--
 fs/ksmbd/smb2pdu.c       |  8 +++++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/fs/ksmbd/ksmbd_netlink.h b/fs/ksmbd/ksmbd_netlink.h
index ff07c67f4565e..b6bd8311e6b4d 100644
--- a/fs/ksmbd/ksmbd_netlink.h
+++ b/fs/ksmbd/ksmbd_netlink.h
@@ -74,6 +74,7 @@ struct ksmbd_heartbeat {
 #define KSMBD_GLOBAL_FLAG_SMB2_LEASES		BIT(0)
 #define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION	BIT(1)
 #define KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL	BIT(2)
+#define KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF	BIT(3)
 
 /*
  * IPC request for ksmbd server startup
diff --git a/fs/ksmbd/smb2ops.c b/fs/ksmbd/smb2ops.c
index ab23da2120b94..e401302478c36 100644
--- a/fs/ksmbd/smb2ops.c
+++ b/fs/ksmbd/smb2ops.c
@@ -247,8 +247,9 @@ void init_smb3_02_server(struct ksmbd_conn *conn)
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
 
-	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION &&
-	    conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION)
+	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+	    (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+	     conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
 
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
@@ -271,6 +272,11 @@ int init_smb3_11_server(struct ksmbd_conn *conn)
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_LEASES)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_LEASING;
 
+	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION ||
+	    (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF) &&
+	     conn->cli_cap & SMB2_GLOBAL_CAP_ENCRYPTION))
+		conn->vals->capabilities |= SMB2_GLOBAL_CAP_ENCRYPTION;
+
 	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB3_MULTICHANNEL)
 		conn->vals->capabilities |= SMB2_GLOBAL_CAP_MULTI_CHANNEL;
 
diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index b2fc85d440d03..56d68ddc409cd 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -903,7 +903,7 @@ static void decode_encrypt_ctxt(struct ksmbd_conn *conn,
 		return;
 	}
 
-	if (!(server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION))
+	if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION_OFF)
 		return;
 
 	for (i = 0; i < cph_cnt; i++) {
@@ -1508,7 +1508,8 @@ static int ntlm_authenticate(struct ksmbd_work *work)
 			return -EINVAL;
 		}
 		sess->enc = true;
-		rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+		if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION)
+			rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
 		/*
 		 * signing is disable if encryption is enable
 		 * on this session
@@ -1599,7 +1600,8 @@ static int krb5_authenticate(struct ksmbd_work *work)
 			return -EINVAL;
 		}
 		sess->enc = true;
-		rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
+		if (server_conf.flags & KSMBD_GLOBAL_FLAG_SMB2_ENCRYPTION)
+			rsp->SessionFlags = SMB2_SESSION_FLAG_ENCRYPT_DATA_LE;
 		sess->sign = false;
 	}
 
-- 
GitLab


From 7ecbe92696bb7fe32c80b6cf64736a0d157717a9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 11 Nov 2022 08:11:53 -0500
Subject: [PATCH 3653/4122] ksmbd: use F_SETLK when unlocking a file

ksmbd seems to be trying to use a cmd value of 0 when unlocking a file.
That activity requires a type of F_UNLCK with a cmd of F_SETLK. For
local POSIX locking, it doesn't matter much since vfs_lock_file ignores
@cmd, but filesystems that define their own ->lock operation expect to
see it set sanely.

Cc: David Howells <dhowells@redhat.com>
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: David Howells <dhowells@redhat.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/smb2pdu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 56d68ddc409cd..de8e367095c9d 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -6753,7 +6753,7 @@ static int smb2_set_flock_flags(struct file_lock *flock, int flags)
 	case SMB2_LOCKFLAG_UNLOCK:
 		ksmbd_debug(SMB, "received unlock request\n");
 		flock->fl_type = F_UNLCK;
-		cmd = 0;
+		cmd = F_SETLK;
 		break;
 	}
 
@@ -7131,7 +7131,7 @@ out:
 		rlock->fl_start = smb_lock->start;
 		rlock->fl_end = smb_lock->end;
 
-		rc = vfs_lock_file(filp, 0, rlock, NULL);
+		rc = vfs_lock_file(filp, F_SETLK, rlock, NULL);
 		if (rc)
 			pr_err("rollback unlock fail : %d\n", rc);
 
-- 
GitLab


From 30429388531b120902126a39cf64f877fc5c7773 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Tue, 15 Nov 2022 09:35:10 -0600
Subject: [PATCH 3654/4122] ksmbd: replace one-element arrays with
 flexible-array members

One-element arrays are deprecated, and we are replacing them with flexible
array members instead. So, replace one-element arrays with flexible-array
members in multiple structs in fs/ksmbd/smb_common.h and one in
fs/ksmbd/smb2pdu.h.

Important to mention is that doing a build before/after this patch results
in no binary output differences.

This helps with the ongoing efforts to tighten the FORTIFY_SOURCE routines
on memcpy() and help us make progress towards globally enabling
-fstrict-flex-arrays=3 [1].

Link: https://github.com/KSPP/linux/issues/242
Link: https://github.com/KSPP/linux/issues/79
Link: https://gcc.gnu.org/pipermail/gcc-patches/2022-October/602902.html [1]
Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/smb2pdu.c    |  4 ++--
 fs/ksmbd/smb2pdu.h    |  2 +-
 fs/ksmbd/smb_common.h | 12 ++++++------
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index de8e367095c9d..79261493a2128 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -3440,7 +3440,7 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
 		goto free_conv_name;
 	}
 
-	struct_sz = readdir_info_level_struct_sz(info_level) - 1 + conv_len;
+	struct_sz = readdir_info_level_struct_sz(info_level) + conv_len;
 	next_entry_offset = ALIGN(struct_sz, KSMBD_DIR_INFO_ALIGNMENT);
 	d_info->last_entry_off_align = next_entry_offset - struct_sz;
 
@@ -3692,7 +3692,7 @@ static int reserve_populate_dentry(struct ksmbd_dir_info *d_info,
 		return -EOPNOTSUPP;
 
 	conv_len = (d_info->name_len + 1) * 2;
-	next_entry_offset = ALIGN(struct_sz - 1 + conv_len,
+	next_entry_offset = ALIGN(struct_sz + conv_len,
 				  KSMBD_DIR_INFO_ALIGNMENT);
 
 	if (next_entry_offset > d_info->out_buf_len) {
diff --git a/fs/ksmbd/smb2pdu.h b/fs/ksmbd/smb2pdu.h
index 092fdd3f87505..aa5dbe54f5a18 100644
--- a/fs/ksmbd/smb2pdu.h
+++ b/fs/ksmbd/smb2pdu.h
@@ -443,7 +443,7 @@ struct smb2_posix_info {
 	/* SidBuffer contain two sids (UNIX user sid(16), UNIX group sid(16)) */
 	u8 SidBuffer[32];
 	__le32 name_len;
-	u8 name[1];
+	u8 name[];
 	/*
 	 * var sized owner SID
 	 * var sized group SID
diff --git a/fs/ksmbd/smb_common.h b/fs/ksmbd/smb_common.h
index 318c16fa81da3..e663ab9ea7590 100644
--- a/fs/ksmbd/smb_common.h
+++ b/fs/ksmbd/smb_common.h
@@ -277,14 +277,14 @@ struct file_directory_info {
 	__le64 AllocationSize;
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
-	char FileName[1];
+	char FileName[];
 } __packed;   /* level 0x101 FF resp data */
 
 struct file_names_info {
 	__le32 NextEntryOffset;
 	__u32 FileIndex;
 	__le32 FileNameLength;
-	char FileName[1];
+	char FileName[];
 } __packed;   /* level 0xc FF resp data */
 
 struct file_full_directory_info {
@@ -299,7 +299,7 @@ struct file_full_directory_info {
 	__le32 ExtFileAttributes;
 	__le32 FileNameLength;
 	__le32 EaSize;
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x102 FF resp */
 
 struct file_both_directory_info {
@@ -317,7 +317,7 @@ struct file_both_directory_info {
 	__u8   ShortNameLength;
 	__u8   Reserved;
 	__u8   ShortName[24];
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x104 FFrsp data */
 
 struct file_id_both_directory_info {
@@ -337,7 +337,7 @@ struct file_id_both_directory_info {
 	__u8   ShortName[24];
 	__le16 Reserved2;
 	__le64 UniqueId;
-	char FileName[1];
+	char FileName[];
 } __packed;
 
 struct file_id_full_dir_info {
@@ -354,7 +354,7 @@ struct file_id_full_dir_info {
 	__le32 EaSize; /* EA size */
 	__le32 Reserved;
 	__le64 UniqueId; /* inode num - le since Samba puts ino in low 32 bit*/
-	char FileName[1];
+	char FileName[];
 } __packed; /* level 0x105 FF rsp data */
 
 struct smb_version_values {
-- 
GitLab


From bc044414fa0326a4e5c3c509c00b1fcaf621b5f4 Mon Sep 17 00:00:00 2001
From: Xiu Jianfeng <xiujianfeng@huawei.com>
Date: Wed, 16 Nov 2022 20:22:37 +0800
Subject: [PATCH 3655/4122] ksmbd: Fix resource leak in
 ksmbd_session_rpc_open()

When ksmbd_rpc_open() fails then it must call ksmbd_rpc_id_free() to
undo the result of ksmbd_ipc_id_alloc().

Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
Signed-off-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/mgmt/user_session.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/ksmbd/mgmt/user_session.c b/fs/ksmbd/mgmt/user_session.c
index 3fa2139a0b309..92b1603b5abeb 100644
--- a/fs/ksmbd/mgmt/user_session.c
+++ b/fs/ksmbd/mgmt/user_session.c
@@ -108,15 +108,17 @@ int ksmbd_session_rpc_open(struct ksmbd_session *sess, char *rpc_name)
 	entry->method = method;
 	entry->id = ksmbd_ipc_id_alloc();
 	if (entry->id < 0)
-		goto error;
+		goto free_entry;
 
 	resp = ksmbd_rpc_open(sess, entry->id);
 	if (!resp)
-		goto error;
+		goto free_id;
 
 	kvfree(resp);
 	return entry->id;
-error:
+free_id:
+	ksmbd_rpc_id_free(entry->id);
+free_entry:
 	list_del(&entry->list);
 	kfree(entry);
 	return -EINVAL;
-- 
GitLab


From 01f6c61bae3d658058ee6322af77acea26a5ee3a Mon Sep 17 00:00:00 2001
From: Marios Makassikis <mmakassikis@freebox.fr>
Date: Tue, 29 Nov 2022 12:19:33 +0100
Subject: [PATCH 3656/4122] ksmbd: Fix resource leak in smb2_lock()

"flock" is leaked if an error happens before smb2_lock_init(), as the
lock is not added to the lock_list to be cleaned up.

Signed-off-by: Marios Makassikis <mmakassikis@freebox.fr>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/smb2pdu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
index 79261493a2128..88d2c23369fe8 100644
--- a/fs/ksmbd/smb2pdu.c
+++ b/fs/ksmbd/smb2pdu.c
@@ -6857,6 +6857,7 @@ int smb2_lock(struct ksmbd_work *work)
 		if (lock_start > U64_MAX - lock_length) {
 			pr_err("Invalid lock range requested\n");
 			rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+			locks_free_lock(flock);
 			goto out;
 		}
 
@@ -6876,6 +6877,7 @@ int smb2_lock(struct ksmbd_work *work)
 				    "the end offset(%llx) is smaller than the start offset(%llx)\n",
 				    flock->fl_end, flock->fl_start);
 			rsp->hdr.Status = STATUS_INVALID_LOCK_RANGE;
+			locks_free_lock(flock);
 			goto out;
 		}
 
@@ -6887,6 +6889,7 @@ int smb2_lock(struct ksmbd_work *work)
 				    flock->fl_type != F_UNLCK) {
 					pr_err("conflict two locks in one request\n");
 					err = -EINVAL;
+					locks_free_lock(flock);
 					goto out;
 				}
 			}
@@ -6895,6 +6898,7 @@ int smb2_lock(struct ksmbd_work *work)
 		smb_lock = smb2_lock_init(flock, cmd, flags, &lock_list);
 		if (!smb_lock) {
 			err = -EINVAL;
+			locks_free_lock(flock);
 			goto out;
 		}
 	}
-- 
GitLab


From 72ee45fd46d0d3578c4e6046f66fae3218543ce3 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Wed, 7 Dec 2022 09:29:27 +0800
Subject: [PATCH 3657/4122] ksmbd: Convert to use sysfs_emit()/sysfs_emit_at()
 APIs

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/ksmbd/server.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/fs/ksmbd/server.c b/fs/ksmbd/server.c
index a0d635304754a..394b6ceac4312 100644
--- a/fs/ksmbd/server.c
+++ b/fs/ksmbd/server.c
@@ -432,11 +432,9 @@ static ssize_t stats_show(struct class *class, struct class_attribute *attr,
 		"reset",
 		"shutdown"
 	};
-
-	ssize_t sz = scnprintf(buf, PAGE_SIZE, "%d %s %d %lu\n", stats_version,
-			       state[server_conf.state], server_conf.tcp_port,
-			       server_conf.ipc_last_active / HZ);
-	return sz;
+	return sysfs_emit(buf, "%d %s %d %lu\n", stats_version,
+			  state[server_conf.state], server_conf.tcp_port,
+			  server_conf.ipc_last_active / HZ);
 }
 
 static ssize_t kill_server_store(struct class *class,
@@ -468,19 +466,13 @@ static ssize_t debug_show(struct class *class, struct class_attribute *attr,
 
 	for (i = 0; i < ARRAY_SIZE(debug_type_strings); i++) {
 		if ((ksmbd_debug_types >> i) & 1) {
-			pos = scnprintf(buf + sz,
-					PAGE_SIZE - sz,
-					"[%s] ",
-					debug_type_strings[i]);
+			pos = sysfs_emit_at(buf, sz, "[%s] ", debug_type_strings[i]);
 		} else {
-			pos = scnprintf(buf + sz,
-					PAGE_SIZE - sz,
-					"%s ",
-					debug_type_strings[i]);
+			pos = sysfs_emit_at(buf, sz, "%s ", debug_type_strings[i]);
 		}
 		sz += pos;
 	}
-	sz += scnprintf(buf + sz, PAGE_SIZE - sz, "\n");
+	sz += sysfs_emit_at(buf, sz, "\n");
 	return sz;
 }
 
-- 
GitLab


From ac3a2585f018f10039b4a856dcb122da88c1c1c9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Sun, 11 Dec 2022 06:19:33 -0500
Subject: [PATCH 3658/4122] nfsd: rework refcounting in filecache

The filecache refcounting is a bit non-standard for something searchable
by RCU, in that we maintain a sentinel reference while it's hashed. This
in turn requires that we have to do things differently in the "put"
depending on whether its hashed, which we believe to have led to races.

There are other problems in here too. nfsd_file_close_inode_sync can end
up freeing an nfsd_file while there are still outstanding references to
it, and there are a number of subtle ToC/ToU races.

Rework the code so that the refcount is what drives the lifecycle. When
the refcount goes to zero, then unhash and rcu free the object. A task
searching for a nfsd_file is allowed to bump its refcount, but only if
it's not already 0. Ensure that we don't make any other changes to it
until a reference is held.

With this change, the LRU carries a reference. Take special care to deal
with it when removing an entry from the list, and ensure that we only
repurpose the nf_lru list_head when the refcount is 0 to ensure
exclusive access to it.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/filecache.c | 318 +++++++++++++++++++++++---------------------
 fs/nfsd/trace.h     |  51 +++----
 2 files changed, 189 insertions(+), 180 deletions(-)

diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 1998b4d5f692a..45b2c9e3f6360 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -324,8 +324,7 @@ nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
 		if (key->gc)
 			__set_bit(NFSD_FILE_GC, &nf->nf_flags);
 		nf->nf_inode = key->inode;
-		/* nf_ref is pre-incremented for hash table */
-		refcount_set(&nf->nf_ref, 2);
+		refcount_set(&nf->nf_ref, 1);
 		nf->nf_may = key->need;
 		nf->nf_mark = NULL;
 	}
@@ -377,24 +376,35 @@ nfsd_file_unhash(struct nfsd_file *nf)
 	return false;
 }
 
-static bool
+static void
 nfsd_file_free(struct nfsd_file *nf)
 {
 	s64 age = ktime_to_ms(ktime_sub(ktime_get(), nf->nf_birthtime));
-	bool flush = false;
 
 	trace_nfsd_file_free(nf);
 
 	this_cpu_inc(nfsd_file_releases);
 	this_cpu_add(nfsd_file_total_age, age);
 
+	nfsd_file_unhash(nf);
+
+	/*
+	 * We call fsync here in order to catch writeback errors. It's not
+	 * strictly required by the protocol, but an nfsd_file could get
+	 * evicted from the cache before a COMMIT comes in. If another
+	 * task were to open that file in the interim and scrape the error,
+	 * then the client may never see it. By calling fsync here, we ensure
+	 * that writeback happens before the entry is freed, and that any
+	 * errors reported result in the write verifier changing.
+	 */
+	nfsd_file_fsync(nf);
+
 	if (nf->nf_mark)
 		nfsd_file_mark_put(nf->nf_mark);
 	if (nf->nf_file) {
 		get_file(nf->nf_file);
 		filp_close(nf->nf_file, NULL);
 		fput(nf->nf_file);
-		flush = true;
 	}
 
 	/*
@@ -402,10 +412,9 @@ nfsd_file_free(struct nfsd_file *nf)
 	 * WARN and leak it to preserve system stability.
 	 */
 	if (WARN_ON_ONCE(!list_empty(&nf->nf_lru)))
-		return flush;
+		return;
 
 	call_rcu(&nf->nf_rcu, nfsd_file_slab_free);
-	return flush;
 }
 
 static bool
@@ -421,17 +430,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
 		mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
 }
 
-static void nfsd_file_lru_add(struct nfsd_file *nf)
+static bool nfsd_file_lru_add(struct nfsd_file *nf)
 {
 	set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
-	if (list_lru_add(&nfsd_file_lru, &nf->nf_lru))
+	if (list_lru_add(&nfsd_file_lru, &nf->nf_lru)) {
 		trace_nfsd_file_lru_add(nf);
+		return true;
+	}
+	return false;
 }
 
-static void nfsd_file_lru_remove(struct nfsd_file *nf)
+static bool nfsd_file_lru_remove(struct nfsd_file *nf)
 {
-	if (list_lru_del(&nfsd_file_lru, &nf->nf_lru))
+	if (list_lru_del(&nfsd_file_lru, &nf->nf_lru)) {
 		trace_nfsd_file_lru_del(nf);
+		return true;
+	}
+	return false;
 }
 
 struct nfsd_file *
@@ -442,86 +457,60 @@ nfsd_file_get(struct nfsd_file *nf)
 	return NULL;
 }
 
-static void
-nfsd_file_unhash_and_queue(struct nfsd_file *nf, struct list_head *dispose)
-{
-	trace_nfsd_file_unhash_and_queue(nf);
-	if (nfsd_file_unhash(nf)) {
-		/* caller must call nfsd_file_dispose_list() later */
-		nfsd_file_lru_remove(nf);
-		list_add(&nf->nf_lru, dispose);
-	}
-}
-
-static void
-nfsd_file_put_noref(struct nfsd_file *nf)
-{
-	trace_nfsd_file_put(nf);
-
-	if (refcount_dec_and_test(&nf->nf_ref)) {
-		WARN_ON(test_bit(NFSD_FILE_HASHED, &nf->nf_flags));
-		nfsd_file_lru_remove(nf);
-		nfsd_file_free(nf);
-	}
-}
-
-static void
-nfsd_file_unhash_and_put(struct nfsd_file *nf)
-{
-	if (nfsd_file_unhash(nf))
-		nfsd_file_put_noref(nf);
-}
-
+/**
+ * nfsd_file_put - put the reference to a nfsd_file
+ * @nf: nfsd_file of which to put the reference
+ *
+ * Put a reference to a nfsd_file. In the non-GC case, we just put the
+ * reference immediately. In the GC case, if the reference would be
+ * the last one, the put it on the LRU instead to be cleaned up later.
+ */
 void
 nfsd_file_put(struct nfsd_file *nf)
 {
 	might_sleep();
+	trace_nfsd_file_put(nf);
 
-	if (test_bit(NFSD_FILE_GC, &nf->nf_flags))
-		nfsd_file_lru_add(nf);
-	else if (refcount_read(&nf->nf_ref) == 2)
-		nfsd_file_unhash_and_put(nf);
-
-	if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-		nfsd_file_fsync(nf);
-		nfsd_file_put_noref(nf);
-	} else if (nf->nf_file && test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
-		nfsd_file_put_noref(nf);
-		nfsd_file_schedule_laundrette();
-	} else
-		nfsd_file_put_noref(nf);
-}
-
-static void
-nfsd_file_dispose_list(struct list_head *dispose)
-{
-	struct nfsd_file *nf;
+	if (test_bit(NFSD_FILE_GC, &nf->nf_flags) &&
+	    test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+		/*
+		 * If this is the last reference (nf_ref == 1), then try to
+		 * transfer it to the LRU.
+		 */
+		if (refcount_dec_not_one(&nf->nf_ref))
+			return;
+
+		/* Try to add it to the LRU.  If that fails, decrement. */
+		if (nfsd_file_lru_add(nf)) {
+			/* If it's still hashed, we're done */
+			if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
+				nfsd_file_schedule_laundrette();
+				return;
+			}
 
-	while(!list_empty(dispose)) {
-		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
-		list_del_init(&nf->nf_lru);
-		nfsd_file_fsync(nf);
-		nfsd_file_put_noref(nf);
+			/*
+			 * We're racing with unhashing, so try to remove it from
+			 * the LRU. If removal fails, then someone else already
+			 * has our reference.
+			 */
+			if (!nfsd_file_lru_remove(nf))
+				return;
+		}
 	}
+	if (refcount_dec_and_test(&nf->nf_ref))
+		nfsd_file_free(nf);
 }
 
 static void
-nfsd_file_dispose_list_sync(struct list_head *dispose)
+nfsd_file_dispose_list(struct list_head *dispose)
 {
-	bool flush = false;
 	struct nfsd_file *nf;
 
-	while(!list_empty(dispose)) {
+	while (!list_empty(dispose)) {
 		nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
 		list_del_init(&nf->nf_lru);
-		nfsd_file_fsync(nf);
-		if (!refcount_dec_and_test(&nf->nf_ref))
-			continue;
-		if (nfsd_file_free(nf))
-			flush = true;
+		nfsd_file_free(nf);
 	}
-	if (flush)
-		flush_delayed_fput();
 }
 
 static void
@@ -591,21 +580,8 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 	struct list_head *head = arg;
 	struct nfsd_file *nf = list_entry(item, struct nfsd_file, nf_lru);
 
-	/*
-	 * Do a lockless refcount check. The hashtable holds one reference, so
-	 * we look to see if anything else has a reference, or if any have
-	 * been put since the shrinker last ran. Those don't get unhashed and
-	 * released.
-	 *
-	 * Note that in the put path, we set the flag and then decrement the
-	 * counter. Here we check the counter and then test and clear the flag.
-	 * That order is deliberate to ensure that we can do this locklessly.
-	 */
-	if (refcount_read(&nf->nf_ref) > 1) {
-		list_lru_isolate(lru, &nf->nf_lru);
-		trace_nfsd_file_gc_in_use(nf);
-		return LRU_REMOVED;
-	}
+	/* We should only be dealing with GC entries here */
+	WARN_ON_ONCE(!test_bit(NFSD_FILE_GC, &nf->nf_flags));
 
 	/*
 	 * Don't throw out files that are still undergoing I/O or
@@ -616,40 +592,30 @@ nfsd_file_lru_cb(struct list_head *item, struct list_lru_one *lru,
 		return LRU_SKIP;
 	}
 
+	/* If it was recently added to the list, skip it */
 	if (test_and_clear_bit(NFSD_FILE_REFERENCED, &nf->nf_flags)) {
 		trace_nfsd_file_gc_referenced(nf);
 		return LRU_ROTATE;
 	}
 
-	if (!test_and_clear_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
-		trace_nfsd_file_gc_hashed(nf);
-		return LRU_SKIP;
+	/*
+	 * Put the reference held on behalf of the LRU. If it wasn't the last
+	 * one, then just remove it from the LRU and ignore it.
+	 */
+	if (!refcount_dec_and_test(&nf->nf_ref)) {
+		trace_nfsd_file_gc_in_use(nf);
+		list_lru_isolate(lru, &nf->nf_lru);
+		return LRU_REMOVED;
 	}
 
+	/* Refcount went to zero. Unhash it and queue it to the dispose list */
+	nfsd_file_unhash(nf);
 	list_lru_isolate_move(lru, &nf->nf_lru, head);
 	this_cpu_inc(nfsd_file_evictions);
 	trace_nfsd_file_gc_disposed(nf);
 	return LRU_REMOVED;
 }
 
-/*
- * Unhash items on @dispose immediately, then queue them on the
- * disposal workqueue to finish releasing them in the background.
- *
- * cel: Note that between the time list_lru_shrink_walk runs and
- * now, these items are in the hash table but marked unhashed.
- * Why release these outside of lru_cb ? There's no lock ordering
- * problem since lru_cb currently takes no lock.
- */
-static void nfsd_file_gc_dispose_list(struct list_head *dispose)
-{
-	struct nfsd_file *nf;
-
-	list_for_each_entry(nf, dispose, nf_lru)
-		nfsd_file_hash_remove(nf);
-	nfsd_file_dispose_list_delayed(dispose);
-}
-
 static void
 nfsd_file_gc(void)
 {
@@ -659,7 +625,7 @@ nfsd_file_gc(void)
 	ret = list_lru_walk(&nfsd_file_lru, nfsd_file_lru_cb,
 			    &dispose, list_lru_count(&nfsd_file_lru));
 	trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
-	nfsd_file_gc_dispose_list(&dispose);
+	nfsd_file_dispose_list_delayed(&dispose);
 }
 
 static void
@@ -685,7 +651,7 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
 	ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
 				   nfsd_file_lru_cb, &dispose);
 	trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
-	nfsd_file_gc_dispose_list(&dispose);
+	nfsd_file_dispose_list_delayed(&dispose);
 	return ret;
 }
 
@@ -695,72 +661,111 @@ static struct shrinker	nfsd_file_shrinker = {
 	.seeks = 1,
 };
 
-/*
- * Find all cache items across all net namespaces that match @inode and
- * move them to @dispose. The lookup is atomic wrt nfsd_file_acquire().
+/**
+ * nfsd_file_queue_for_close: try to close out any open nfsd_files for an inode
+ * @inode:   inode on which to close out nfsd_files
+ * @dispose: list on which to gather nfsd_files to close out
+ *
+ * An nfsd_file represents a struct file being held open on behalf of nfsd. An
+ * open file however can block other activity (such as leases), or cause
+ * undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
+ *
+ * This function is intended to find open nfsd_files when this sort of
+ * conflicting access occurs and then attempt to close those files out.
+ *
+ * Populates the dispose list with entries that have already had their
+ * refcounts go to zero. The actual free of an nfsd_file can be expensive,
+ * so we leave it up to the caller whether it wants to wait or not.
  */
-static unsigned int
-__nfsd_file_close_inode(struct inode *inode, struct list_head *dispose)
+static void
+nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
 {
 	struct nfsd_file_lookup_key key = {
 		.type	= NFSD_FILE_KEY_INODE,
 		.inode	= inode,
 	};
-	unsigned int count = 0;
 	struct nfsd_file *nf;
 
 	rcu_read_lock();
 	do {
+		int decrement = 1;
+
 		nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
 				       nfsd_file_rhash_params);
 		if (!nf)
 			break;
-		nfsd_file_unhash_and_queue(nf, dispose);
-		count++;
+
+		/* If we raced with someone else unhashing, ignore it */
+		if (!nfsd_file_unhash(nf))
+			continue;
+
+		/* If we can't get a reference, ignore it */
+		if (!nfsd_file_get(nf))
+			continue;
+
+		/* Extra decrement if we remove from the LRU */
+		if (nfsd_file_lru_remove(nf))
+			++decrement;
+
+		/* If refcount goes to 0, then put on the dispose list */
+		if (refcount_sub_and_test(decrement, &nf->nf_ref)) {
+			list_add(&nf->nf_lru, dispose);
+			trace_nfsd_file_closing(nf);
+		}
 	} while (1);
 	rcu_read_unlock();
-	return count;
 }
 
 /**
- * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
+ * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
  * @inode: inode of the file to attempt to remove
  *
- * Unhash and put, then flush and fput all cache items associated with @inode.
+ * Close out any open nfsd_files that can be reaped for @inode. The
+ * actual freeing is deferred to the dispose_list_delayed infrastructure.
+ *
+ * This is used by the fsnotify callbacks and setlease notifier.
  */
-void
-nfsd_file_close_inode_sync(struct inode *inode)
+static void
+nfsd_file_close_inode(struct inode *inode)
 {
 	LIST_HEAD(dispose);
-	unsigned int count;
 
-	count = __nfsd_file_close_inode(inode, &dispose);
-	trace_nfsd_file_close_inode_sync(inode, count);
-	nfsd_file_dispose_list_sync(&dispose);
+	nfsd_file_queue_for_close(inode, &dispose);
+	nfsd_file_dispose_list_delayed(&dispose);
 }
 
 /**
- * nfsd_file_close_inode - attempt a delayed close of a nfsd_file
+ * nfsd_file_close_inode_sync - attempt to forcibly close a nfsd_file
  * @inode: inode of the file to attempt to remove
  *
- * Unhash and put all cache item associated with @inode.
+ * Close out any open nfsd_files that can be reaped for @inode. The
+ * nfsd_files are closed out synchronously.
+ *
+ * This is called from nfsd_rename and nfsd_unlink to avoid silly-renames
+ * when reexporting NFS.
  */
-static void
-nfsd_file_close_inode(struct inode *inode)
+void
+nfsd_file_close_inode_sync(struct inode *inode)
 {
+	struct nfsd_file *nf;
 	LIST_HEAD(dispose);
-	unsigned int count;
 
-	count = __nfsd_file_close_inode(inode, &dispose);
-	trace_nfsd_file_close_inode(inode, count);
-	nfsd_file_dispose_list_delayed(&dispose);
+	trace_nfsd_file_close(inode);
+
+	nfsd_file_queue_for_close(inode, &dispose);
+	while (!list_empty(&dispose)) {
+		nf = list_first_entry(&dispose, struct nfsd_file, nf_lru);
+		list_del_init(&nf->nf_lru);
+		nfsd_file_free(nf);
+	}
+	flush_delayed_fput();
 }
 
 /**
  * nfsd_file_delayed_close - close unused nfsd_files
  * @work: dummy
  *
- * Walk the LRU list and close any entries that have not been used since
+ * Walk the LRU list and destroy any entries that have not been used since
  * the last scan.
  */
 static void
@@ -782,7 +787,7 @@ nfsd_file_lease_notifier_call(struct notifier_block *nb, unsigned long arg,
 
 	/* Only close files for F_SETLEASE leases */
 	if (fl->fl_flags & FL_LEASE)
-		nfsd_file_close_inode_sync(file_inode(fl->fl_file));
+		nfsd_file_close_inode(file_inode(fl->fl_file));
 	return 0;
 }
 
@@ -903,6 +908,13 @@ out_err:
 	goto out;
 }
 
+/**
+ * __nfsd_file_cache_purge: clean out the cache for shutdown
+ * @net: net-namespace to shut down the cache (may be NULL)
+ *
+ * Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
+ * then close out everything. Called when an nfsd instance is being shut down.
+ */
 static void
 __nfsd_file_cache_purge(struct net *net)
 {
@@ -916,8 +928,11 @@ __nfsd_file_cache_purge(struct net *net)
 
 		nf = rhashtable_walk_next(&iter);
 		while (!IS_ERR_OR_NULL(nf)) {
-			if (!net || nf->nf_net == net)
-				nfsd_file_unhash_and_queue(nf, &dispose);
+			if (!net || nf->nf_net == net) {
+				nfsd_file_unhash(nf);
+				nfsd_file_lru_remove(nf);
+				list_add(&nf->nf_lru, &dispose);
+			}
 			nf = rhashtable_walk_next(&iter);
 		}
 
@@ -1084,8 +1099,12 @@ retry:
 	if (nf)
 		nf = nfsd_file_get(nf);
 	rcu_read_unlock();
-	if (nf)
+
+	if (nf) {
+		if (nfsd_file_lru_remove(nf))
+			WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
 		goto wait_for_construction;
+	}
 
 	nf = nfsd_file_alloc(&key, may_flags);
 	if (!nf) {
@@ -1118,11 +1137,11 @@ wait_for_construction:
 			goto out;
 		}
 		open_retry = false;
-		nfsd_file_put_noref(nf);
+		if (refcount_dec_and_test(&nf->nf_ref))
+			nfsd_file_free(nf);
 		goto retry;
 	}
 
-	nfsd_file_lru_remove(nf);
 	this_cpu_inc(nfsd_file_cache_hits);
 
 	status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
@@ -1132,7 +1151,8 @@ out:
 			this_cpu_inc(nfsd_file_acquisitions);
 		*pnf = nf;
 	} else {
-		nfsd_file_put(nf);
+		if (refcount_dec_and_test(&nf->nf_ref))
+			nfsd_file_free(nf);
 		nf = NULL;
 	}
 
@@ -1158,8 +1178,10 @@ open_file:
 	 * If construction failed, or we raced with a call to unlink()
 	 * then unhash.
 	 */
-	if (status != nfs_ok || key.inode->i_nlink == 0)
-		nfsd_file_unhash_and_put(nf);
+	if (status == nfs_ok && key.inode->i_nlink == 0)
+		status = nfserr_jukebox;
+	if (status != nfs_ok)
+		nfsd_file_unhash(nf);
 	clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
 	smp_mb__after_atomic();
 	wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 46b8f68a24974..c852ae8eaf371 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -876,8 +876,8 @@ DEFINE_CLID_EVENT(confirmed_r);
 	__print_flags(val, "|",						\
 		{ 1 << NFSD_FILE_HASHED,	"HASHED" },		\
 		{ 1 << NFSD_FILE_PENDING,	"PENDING" },		\
-		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED"},		\
-		{ 1 << NFSD_FILE_GC,		"GC"})
+		{ 1 << NFSD_FILE_REFERENCED,	"REFERENCED" },		\
+		{ 1 << NFSD_FILE_GC,		"GC" })
 
 DECLARE_EVENT_CLASS(nfsd_file_class,
 	TP_PROTO(struct nfsd_file *nf),
@@ -912,6 +912,7 @@ DEFINE_EVENT(nfsd_file_class, name, \
 DEFINE_NFSD_FILE_EVENT(nfsd_file_free);
 DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash);
 DEFINE_NFSD_FILE_EVENT(nfsd_file_put);
+DEFINE_NFSD_FILE_EVENT(nfsd_file_closing);
 DEFINE_NFSD_FILE_EVENT(nfsd_file_unhash_and_queue);
 
 TRACE_EVENT(nfsd_file_alloc,
@@ -1103,35 +1104,6 @@ TRACE_EVENT(nfsd_file_open,
 		__entry->nf_file)
 )
 
-DECLARE_EVENT_CLASS(nfsd_file_search_class,
-	TP_PROTO(
-		const struct inode *inode,
-		unsigned int count
-	),
-	TP_ARGS(inode, count),
-	TP_STRUCT__entry(
-		__field(const struct inode *, inode)
-		__field(unsigned int, count)
-	),
-	TP_fast_assign(
-		__entry->inode = inode;
-		__entry->count = count;
-	),
-	TP_printk("inode=%p count=%u",
-		__entry->inode, __entry->count)
-);
-
-#define DEFINE_NFSD_FILE_SEARCH_EVENT(name)				\
-DEFINE_EVENT(nfsd_file_search_class, name,				\
-	TP_PROTO(							\
-		const struct inode *inode,				\
-		unsigned int count					\
-	),								\
-	TP_ARGS(inode, count))
-
-DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode_sync);
-DEFINE_NFSD_FILE_SEARCH_EVENT(nfsd_file_close_inode);
-
 TRACE_EVENT(nfsd_file_is_cached,
 	TP_PROTO(
 		const struct inode *inode,
@@ -1209,7 +1181,6 @@ DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_lru_del_disposed);
 DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_in_use);
 DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_writeback);
 DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_referenced);
-DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_hashed);
 DEFINE_NFSD_FILE_GC_EVENT(nfsd_file_gc_disposed);
 
 DECLARE_EVENT_CLASS(nfsd_file_lruwalk_class,
@@ -1241,6 +1212,22 @@ DEFINE_EVENT(nfsd_file_lruwalk_class, name,				\
 DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_gc_removed);
 DEFINE_NFSD_FILE_LRUWALK_EVENT(nfsd_file_shrinker_removed);
 
+TRACE_EVENT(nfsd_file_close,
+	TP_PROTO(
+		const struct inode *inode
+	),
+	TP_ARGS(inode),
+	TP_STRUCT__entry(
+		__field(const void *, inode)
+	),
+	TP_fast_assign(
+		__entry->inode = inode;
+	),
+	TP_printk("inode=%p",
+		__entry->inode
+	)
+);
+
 TRACE_EVENT(nfsd_file_fsync,
 	TP_PROTO(
 		const struct nfsd_file *nf,
-- 
GitLab


From 21b8a1dd56a163825e5749b303858fb902ebf198 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 2 Dec 2022 10:45:30 -0800
Subject: [PATCH 3659/4122] rtc: msc313: Fix function prototype mismatch in
 msc313_rtc_probe()

With clang's kernel control flow integrity (kCFI, CONFIG_CFI_CLANG),
indirect call targets are validated against the expected function
pointer prototype to make sure the call target is valid to help mitigate
ROP attacks. If they are not identical, there is a failure at run time,
which manifests as either a kernel panic or thread getting killed.

msc313_rtc_probe() was passing clk_disable_unprepare() directly, which
did not have matching prototypes for devm_add_action_or_reset()'s
callback argument. Refactor to use devm_clk_get_enabled() instead.

This was found as a result of Clang's new -Wcast-function-type-strict
flag, which is more sensitive than the simpler -Wcast-function-type,
which only checks for type width mismatches.

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/lkml/202211041527.HD8TLSE1-lkp@intel.com
Suggested-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Cc: Daniel Palmer <daniel@thingy.jp>
Cc: Romain Perier <romain.perier@gmail.com>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-rtc@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Daniel Palmer <daniel@thingy.jp>
Tested-by: Daniel Palmer <daniel@thingy.jp>
Link: https://lore.kernel.org/r/20221202184525.gonna.423-kees@kernel.org
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-msc313.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/rtc/rtc-msc313.c b/drivers/rtc/rtc-msc313.c
index f3fde013c4b8b..8d7737e0e2e02 100644
--- a/drivers/rtc/rtc-msc313.c
+++ b/drivers/rtc/rtc-msc313.c
@@ -212,22 +212,12 @@ static int msc313_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	clk = devm_clk_get(dev, NULL);
+	clk = devm_clk_get_enabled(dev, NULL);
 	if (IS_ERR(clk)) {
 		dev_err(dev, "No input reference clock\n");
 		return PTR_ERR(clk);
 	}
 
-	ret = clk_prepare_enable(clk);
-	if (ret) {
-		dev_err(dev, "Failed to enable the reference clock, %d\n", ret);
-		return ret;
-	}
-
-	ret = devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare, clk);
-	if (ret)
-		return ret;
-
 	rate = clk_get_rate(clk);
 	writew(rate & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_L);
 	writew((rate >> 16) & 0xFFFF, priv->rtc_base + REG_RTC_FREQ_CW_H);
-- 
GitLab


From 2e830ccc21eb67a4c2490279d907e5e9199e5156 Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sebastian.reichel@collabora.com>
Date: Thu, 20 Oct 2022 22:42:41 +0200
Subject: [PATCH 3660/4122] rtc: rk808: reduce 'struct rk808' usage

Reduce usage of 'struct rk808' (driver data of the parent MFD), so
that only the chip variant field is still being accessed directly.
This allows restructuring the MFD driver to support SPI based
PMICs.

Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Signed-off-by: Sebastian Reichel <sebastian.reichel@collabora.com>
Link: https://lore.kernel.org/r/20221020204251.108565-4-sebastian.reichel@collabora.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rk808.c | 47 ++++++++++++++++++-----------------------
 1 file changed, 20 insertions(+), 27 deletions(-)

diff --git a/drivers/rtc/rtc-rk808.c b/drivers/rtc/rtc-rk808.c
index e920da8c08da1..2d9bcb3ce1e3b 100644
--- a/drivers/rtc/rtc-rk808.c
+++ b/drivers/rtc/rtc-rk808.c
@@ -14,7 +14,6 @@
 #include <linux/bcd.h>
 #include <linux/mfd/rk808.h>
 #include <linux/platform_device.h>
-#include <linux/i2c.h>
 
 /* RTC_CTRL_REG bitfields */
 #define BIT_RTC_CTRL_REG_STOP_RTC_M		BIT(0)
@@ -51,7 +50,7 @@ struct rk_rtc_compat_reg {
 };
 
 struct rk808_rtc {
-	struct rk808 *rk808;
+	struct regmap *regmap;
 	struct rtc_device *rtc;
 	struct rk_rtc_compat_reg *creg;
 	int irq;
@@ -97,12 +96,11 @@ static void gregorian_to_rockchip(struct rtc_time *tm)
 static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
 {
 	struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev);
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	u8 rtc_data[NUM_TIME_REGS];
 	int ret;
 
 	/* Force an update of the shadowed registers right now */
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg,
 				 BIT_RTC_CTRL_REG_RTC_GET_TIME,
 				 BIT_RTC_CTRL_REG_RTC_GET_TIME);
 	if (ret) {
@@ -116,7 +114,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
 	 * 32khz. If we clear the GET_TIME bit here, the time of i2c transfer
 	 * certainly more than 31.25us: 16 * 2.5us at 400kHz bus frequency.
 	 */
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg,
 				 BIT_RTC_CTRL_REG_RTC_GET_TIME,
 				 0);
 	if (ret) {
@@ -124,7 +122,7 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
 		return ret;
 	}
 
-	ret = regmap_bulk_read(rk808->regmap, rk808_rtc->creg->seconds_reg,
+	ret = regmap_bulk_read(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg,
 			       rtc_data, NUM_TIME_REGS);
 	if (ret) {
 		dev_err(dev, "Failed to bulk read rtc_data: %d\n", ret);
@@ -148,7 +146,6 @@ static int rk808_rtc_readtime(struct device *dev, struct rtc_time *tm)
 static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
 {
 	struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev);
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	u8 rtc_data[NUM_TIME_REGS];
 	int ret;
 
@@ -163,7 +160,7 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	rtc_data[6] = bin2bcd(tm->tm_wday);
 
 	/* Stop RTC while updating the RTC registers */
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg,
 				 BIT_RTC_CTRL_REG_STOP_RTC_M,
 				 BIT_RTC_CTRL_REG_STOP_RTC_M);
 	if (ret) {
@@ -171,14 +168,14 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
 		return ret;
 	}
 
-	ret = regmap_bulk_write(rk808->regmap, rk808_rtc->creg->seconds_reg,
+	ret = regmap_bulk_write(rk808_rtc->regmap, rk808_rtc->creg->seconds_reg,
 				rtc_data, NUM_TIME_REGS);
 	if (ret) {
 		dev_err(dev, "Failed to bull write rtc_data: %d\n", ret);
 		return ret;
 	}
 	/* Start RTC again */
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg,
 				 BIT_RTC_CTRL_REG_STOP_RTC_M, 0);
 	if (ret) {
 		dev_err(dev, "Failed to update RTC control: %d\n", ret);
@@ -191,12 +188,11 @@ static int rk808_rtc_set_time(struct device *dev, struct rtc_time *tm)
 static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev);
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	u8 alrm_data[NUM_ALARM_REGS];
 	uint32_t int_reg;
 	int ret;
 
-	ret = regmap_bulk_read(rk808->regmap,
+	ret = regmap_bulk_read(rk808_rtc->regmap,
 			       rk808_rtc->creg->alarm_seconds_reg,
 			       alrm_data, NUM_ALARM_REGS);
 	if (ret) {
@@ -212,7 +208,7 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm->time.tm_year = (bcd2bin(alrm_data[5] & YEARS_REG_MSK)) + 100;
 	rockchip_to_gregorian(&alrm->time);
 
-	ret = regmap_read(rk808->regmap, rk808_rtc->creg->int_reg, &int_reg);
+	ret = regmap_read(rk808_rtc->regmap, rk808_rtc->creg->int_reg, &int_reg);
 	if (ret) {
 		dev_err(dev, "Failed to read RTC INT REG: %d\n", ret);
 		return ret;
@@ -228,10 +224,9 @@ static int rk808_rtc_readalarm(struct device *dev, struct rtc_wkalrm *alrm)
 
 static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc)
 {
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	int ret;
 
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg,
 				 BIT_RTC_INTERRUPTS_REG_IT_ALARM_M, 0);
 
 	return ret;
@@ -239,10 +234,9 @@ static int rk808_rtc_stop_alarm(struct rk808_rtc *rk808_rtc)
 
 static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc)
 {
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	int ret;
 
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->int_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->int_reg,
 				 BIT_RTC_INTERRUPTS_REG_IT_ALARM_M,
 				 BIT_RTC_INTERRUPTS_REG_IT_ALARM_M);
 
@@ -252,7 +246,6 @@ static int rk808_rtc_start_alarm(struct rk808_rtc *rk808_rtc)
 static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 {
 	struct rk808_rtc *rk808_rtc = dev_get_drvdata(dev);
-	struct rk808 *rk808 = rk808_rtc->rk808;
 	u8 alrm_data[NUM_ALARM_REGS];
 	int ret;
 
@@ -272,7 +265,7 @@ static int rk808_rtc_setalarm(struct device *dev, struct rtc_wkalrm *alrm)
 	alrm_data[4] = bin2bcd(alrm->time.tm_mon + 1);
 	alrm_data[5] = bin2bcd(alrm->time.tm_year - 100);
 
-	ret = regmap_bulk_write(rk808->regmap,
+	ret = regmap_bulk_write(rk808_rtc->regmap,
 				rk808_rtc->creg->alarm_seconds_reg,
 				alrm_data, NUM_ALARM_REGS);
 	if (ret) {
@@ -313,20 +306,18 @@ static int rk808_rtc_alarm_irq_enable(struct device *dev,
 static irqreturn_t rk808_alarm_irq(int irq, void *data)
 {
 	struct rk808_rtc *rk808_rtc = data;
-	struct rk808 *rk808 = rk808_rtc->rk808;
-	struct i2c_client *client = rk808->i2c;
 	int ret;
 
-	ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg,
+	ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg,
 			   RTC_STATUS_MASK);
 	if (ret) {
-		dev_err(&client->dev,
+		dev_err(&rk808_rtc->rtc->dev,
 			"%s:Failed to update RTC status: %d\n", __func__, ret);
 		return ret;
 	}
 
 	rtc_update_irq(rk808_rtc->rtc, 1, RTC_IRQF | RTC_AF);
-	dev_dbg(&client->dev,
+	dev_dbg(&rk808_rtc->rtc->dev,
 		 "%s:irq=%d\n", __func__, irq);
 	return IRQ_HANDLED;
 }
@@ -404,10 +395,12 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 		break;
 	}
 	platform_set_drvdata(pdev, rk808_rtc);
-	rk808_rtc->rk808 = rk808;
+	rk808_rtc->regmap = dev_get_regmap(pdev->dev.parent, NULL);
+	if (!rk808_rtc->regmap)
+		return -ENODEV;
 
 	/* start rtc running by default, and use shadowed timer. */
-	ret = regmap_update_bits(rk808->regmap, rk808_rtc->creg->ctrl_reg,
+	ret = regmap_update_bits(rk808_rtc->regmap, rk808_rtc->creg->ctrl_reg,
 				 BIT_RTC_CTRL_REG_STOP_RTC_M |
 				 BIT_RTC_CTRL_REG_RTC_READSEL_M,
 				 BIT_RTC_CTRL_REG_RTC_READSEL_M);
@@ -417,7 +410,7 @@ static int rk808_rtc_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = regmap_write(rk808->regmap, rk808_rtc->creg->status_reg,
+	ret = regmap_write(rk808_rtc->regmap, rk808_rtc->creg->status_reg,
 			   RTC_STATUS_MASK);
 	if (ret) {
 		dev_err(&pdev->dev,
-- 
GitLab


From 01744ce9f07f0b76b0b2d30adba2a7c104f1ff2a Mon Sep 17 00:00:00 2001
From: Naveen Krishna Chatradhi <nchatrad@amd.com>
Date: Mon, 5 Dec 2022 10:54:13 +0000
Subject: [PATCH 3661/4122] i3c: Correct the macro module_i3c_i2c_driver

Present definition for module_i3c_i2c_driver uses only the
1st argument i.e., struct i3c_driver.
Irrespective of CONFIG_I3C being enabled/disabled,
struct i2c_driver is never passed to module_driver()

Passing struct i2c_driver as the 4th argument works.

Signed-off-by: Akshay Gupta <Akshay.Gupta@amd.com>
Signed-off-by: Naveen Krishna Chatradhi <nchatrad@amd.com>
Link: https://lore.kernel.org/r/20221205105413.937704-1-naveenkrishna.chatradhi@amd.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 include/linux/i3c/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index 8242e13e7b0bc..419192b5cc4d3 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -287,7 +287,8 @@ static inline void i3c_i2c_driver_unregister(struct i3c_driver *i3cdrv,
 #define module_i3c_i2c_driver(__i3cdrv, __i2cdrv)	\
 	module_driver(__i3cdrv,				\
 		      i3c_i2c_driver_register,		\
-		      i3c_i2c_driver_unregister)
+		      i3c_i2c_driver_unregister,	\
+		      __i2cdrv)
 
 int i3c_device_do_priv_xfers(struct i3c_device *dev,
 			     struct i3c_priv_xfer *xfers,
-- 
GitLab


From 672825cd2823a0cee4687ce80fef5b702ff3caa3 Mon Sep 17 00:00:00 2001
From: Jack Chen <zenghuchen@google.com>
Date: Wed, 7 Dec 2022 15:50:59 -0500
Subject: [PATCH 3662/4122] i3c: export SETDASA method

Because not all I3C drivers have the hot-join feature ready, and
especially not all I3C devices support hot-join feature, exporting
SETDASA method could be useful. With this function, the I3C controller
could perform a DAA to I3C devices when users decide to turn these I3C
devices off and on again during run-time.

Tested: This change has been tested with turnning off an I3C device and
turning on it again during run-time. The device driver calls SETDASA
method to perform DAA to the device. And communication between I3C
controller and device is set up again correctly.

Signed-off-by: Jack Chen <zenghuchen@google.com>
Link: https://lore.kernel.org/r/20221207205059.3848851-1-zenghuchen@google.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/i3c/device.c       | 20 ++++++++++++++++++++
 drivers/i3c/internals.h    |  1 +
 drivers/i3c/master.c       | 19 +++++++++++++++++++
 include/linux/i3c/device.h |  2 ++
 4 files changed, 42 insertions(+)

diff --git a/drivers/i3c/device.c b/drivers/i3c/device.c
index e92d3e9a52bd5..9762630b917ea 100644
--- a/drivers/i3c/device.c
+++ b/drivers/i3c/device.c
@@ -50,6 +50,26 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev,
 }
 EXPORT_SYMBOL_GPL(i3c_device_do_priv_xfers);
 
+/**
+ * i3c_device_do_setdasa() - do I3C dynamic address assignement with
+ *                           static address
+ *
+ * @dev: device with which the DAA should be done
+ *
+ * Return: 0 in case of success, a negative error core otherwise.
+ */
+int i3c_device_do_setdasa(struct i3c_device *dev)
+{
+	int ret;
+
+	i3c_bus_normaluse_lock(dev->bus);
+	ret = i3c_dev_setdasa_locked(dev->desc);
+	i3c_bus_normaluse_unlock(dev->bus);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i3c_device_do_setdasa);
+
 /**
  * i3c_device_get_info() - get I3C device information
  *
diff --git a/drivers/i3c/internals.h b/drivers/i3c/internals.h
index 86b7b44cfca28..908a807badaf9 100644
--- a/drivers/i3c/internals.h
+++ b/drivers/i3c/internals.h
@@ -15,6 +15,7 @@ extern struct bus_type i3c_bus_type;
 void i3c_bus_normaluse_lock(struct i3c_bus *bus);
 void i3c_bus_normaluse_unlock(struct i3c_bus *bus);
 
+int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev);
 int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev,
 				 struct i3c_priv_xfer *xfers,
 				 int nxfers);
diff --git a/drivers/i3c/master.c b/drivers/i3c/master.c
index 351c81a929a6c..d7e6f6c99aea5 100644
--- a/drivers/i3c/master.c
+++ b/drivers/i3c/master.c
@@ -2708,6 +2708,25 @@ int i3c_master_unregister(struct i3c_master_controller *master)
 }
 EXPORT_SYMBOL_GPL(i3c_master_unregister);
 
+int i3c_dev_setdasa_locked(struct i3c_dev_desc *dev)
+{
+	struct i3c_master_controller *master;
+
+	if (!dev)
+		return -ENOENT;
+
+	master = i3c_dev_get_master(dev);
+	if (!master)
+		return -EINVAL;
+
+	if (!dev->boardinfo || !dev->boardinfo->init_dyn_addr ||
+		!dev->boardinfo->static_addr)
+		return -EINVAL;
+
+	return i3c_master_setdasa_locked(master, dev->info.static_addr,
+						dev->boardinfo->init_dyn_addr);
+}
+
 int i3c_dev_do_priv_xfers_locked(struct i3c_dev_desc *dev,
 				 struct i3c_priv_xfer *xfers,
 				 int nxfers)
diff --git a/include/linux/i3c/device.h b/include/linux/i3c/device.h
index 419192b5cc4d3..1c997abe868c6 100644
--- a/include/linux/i3c/device.h
+++ b/include/linux/i3c/device.h
@@ -294,6 +294,8 @@ int i3c_device_do_priv_xfers(struct i3c_device *dev,
 			     struct i3c_priv_xfer *xfers,
 			     int nxfers);
 
+int i3c_device_do_setdasa(struct i3c_device *dev);
+
 void i3c_device_get_info(struct i3c_device *dev, struct i3c_device_info *info);
 
 struct i3c_ibi_payload {
-- 
GitLab


From 08dcf0732cb4d97b85493d9f60470e48eebf87fe Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Sun, 11 Dec 2022 21:55:38 +0100
Subject: [PATCH 3663/4122] MAINTAINERS: mark I3C DRIVER FOR SYNOPSYS
 DESIGNWARE orphan

Vitor left Synopsys and the email address is now bouncing.

Link: https://lore.kernel.org/r/20221211205539.19353-1-alexandre.belloni@bootlin.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 MAINTAINERS | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cf0f185023724..ebd7f7c957ea7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9697,8 +9697,7 @@ F:	Documentation/devicetree/bindings/i3c/cdns,i3c-master.yaml
 F:	drivers/i3c/master/i3c-master-cdns.c
 
 I3C DRIVER FOR SYNOPSYS DESIGNWARE
-M:	Vitor Soares <vitor.soares@synopsys.com>
-S:	Maintained
+S:	Orphan
 F:	Documentation/devicetree/bindings/i3c/snps,dw-i3c-master.yaml
 F:	drivers/i3c/master/dw*
 
-- 
GitLab


From 103c14db61a24cc0cd344dc5d93d264a36687c35 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Sun, 11 Dec 2022 22:57:55 +0100
Subject: [PATCH 3664/4122] rtc: rx6110: fix warning with !OF

rx6110_spi_of_match is not used when !OF, leading to a warning:

>> drivers/rtc/rtc-rx6110.c:384:34: warning: 'rx6110_spi_of_match' defined but not used [-Wunused-const-variable=]
     384 | static const struct of_device_id rx6110_spi_of_match[] = {
         |                                  ^~~~~~~~~~~~~~~~~~~

Reported-by: kernel test robot <lkp@intel.com>
Link: https://lore.kernel.org/r/20221211215756.54002-1-alexandre.belloni@bootlin.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rx6110.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rx6110.c b/drivers/rtc/rtc-rx6110.c
index cc634558b9280..76a49838014ba 100644
--- a/drivers/rtc/rtc-rx6110.c
+++ b/drivers/rtc/rtc-rx6110.c
@@ -376,7 +376,7 @@ static const struct spi_device_id rx6110_spi_id[] = {
 };
 MODULE_DEVICE_TABLE(spi, rx6110_spi_id);
 
-static const struct of_device_id rx6110_spi_of_match[] = {
+static const __maybe_unused struct of_device_id rx6110_spi_of_match[] = {
 	{ .compatible = "epson,rx6110" },
 	{ },
 };
-- 
GitLab


From c2d12e85336f6d4172fb2bab5935027c446d7343 Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@bootlin.com>
Date: Sun, 11 Dec 2022 23:35:53 +0100
Subject: [PATCH 3665/4122] rtc: pcf85063: fix pcf85063_clkout_control

pcf85063_clkout_control reads the wrong register but then update the
correct one.

Reported-by: Janne Terho <janne.terho@ouman.fi>
Fixes: 8c229ab6048b ("rtc: pcf85063: Add pcf85063 clkout control to common clock framework")
Link: https://lore.kernel.org/r/20221211223553.59955-1-alexandre.belloni@bootlin.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-pcf85063.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-pcf85063.c b/drivers/rtc/rtc-pcf85063.c
index 99f9cc57c7b30..754e03984f986 100644
--- a/drivers/rtc/rtc-pcf85063.c
+++ b/drivers/rtc/rtc-pcf85063.c
@@ -424,7 +424,7 @@ static int pcf85063_clkout_control(struct clk_hw *hw, bool enable)
 	unsigned int buf;
 	int ret;
 
-	ret = regmap_read(pcf85063->regmap, PCF85063_REG_OFFSET, &buf);
+	ret = regmap_read(pcf85063->regmap, PCF85063_REG_CTRL2, &buf);
 	if (ret < 0)
 		return ret;
 	buf &= PCF85063_REG_CLKO_F_MASK;
-- 
GitLab


From 13959373e9c9021cc80730c7bd1242e07b10b328 Mon Sep 17 00:00:00 2001
From: Nicholas Piggin <npiggin@gmail.com>
Date: Thu, 8 Dec 2022 22:32:25 +1000
Subject: [PATCH 3666/4122] powerpc/qspinlock: Fix 32-bit build

Some 32-bit configurations don't pull in the spin_begin/end/relax
definitions. Fix is to restore a lost include.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 84990b169557 ("powerpc/qspinlock: add mcs queueing for contended waiters")
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/oe-kbuild-all/202212050224.i7uh9fOh-lkp@intel.com
Link: https://lore.kernel.org/r/20221208123225.1566113-1-npiggin@gmail.com
---
 arch/powerpc/lib/qspinlock.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 1cf5d3e752501..e4bd145255d00 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -3,6 +3,7 @@
 #include <linux/compiler.h>
 #include <linux/export.h>
 #include <linux/percpu.h>
+#include <linux/processor.h>
 #include <linux/smp.h>
 #include <linux/topology.h>
 #include <linux/sched/clock.h>
-- 
GitLab


From ff39899be80b9d90d5e13775eb9fd150338b6e15 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 21 Jul 2022 09:59:32 +0800
Subject: [PATCH 3667/4122] exfat: simplify empty entry hint

This commit adds exfat_set_empty_hint()/exfat_reset_empty_hint()
to reduce code complexity and make code more readable.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c | 58 +++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 27 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 0fc08fdcba733..c0e60ff9ec7d4 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -897,6 +897,29 @@ free_es:
 	return NULL;
 }
 
+static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp)
+{
+	hint_femp->eidx = EXFAT_HINT_NONE;
+	hint_femp->count = 0;
+}
+
+static inline void exfat_set_empty_hint(struct exfat_inode_info *ei,
+		struct exfat_hint_femp *candi_empty, struct exfat_chain *clu,
+		int dentry, int num_entries)
+{
+	if (ei->hint_femp.eidx == EXFAT_HINT_NONE ||
+	    ei->hint_femp.eidx > dentry) {
+		if (candi_empty->count == 0) {
+			candi_empty->cur = *clu;
+			candi_empty->eidx = dentry;
+		}
+
+		candi_empty->count++;
+		if (candi_empty->count == num_entries)
+			ei->hint_femp = *candi_empty;
+	}
+}
+
 enum {
 	DIRENT_STEP_FILE,
 	DIRENT_STEP_STRM,
@@ -921,7 +944,7 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
 {
 	int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len;
 	int order, step, name_len = 0;
-	int dentries_per_clu, num_empty = 0;
+	int dentries_per_clu;
 	unsigned int entry_type;
 	unsigned short *uniname = NULL;
 	struct exfat_chain clu;
@@ -939,10 +962,13 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
 		end_eidx = dentry;
 	}
 
-	candi_empty.eidx = EXFAT_HINT_NONE;
+	exfat_reset_empty_hint(&ei->hint_femp);
+
 rewind:
 	order = 0;
 	step = DIRENT_STEP_FILE;
+	exfat_reset_empty_hint(&candi_empty);
+
 	while (clu.dir != EXFAT_EOF_CLUSTER) {
 		i = dentry & (dentries_per_clu - 1);
 		for (; i < dentries_per_clu; i++, dentry++) {
@@ -962,26 +988,8 @@ rewind:
 			    entry_type == TYPE_DELETED) {
 				step = DIRENT_STEP_FILE;
 
-				num_empty++;
-				if (candi_empty.eidx == EXFAT_HINT_NONE &&
-						num_empty == 1) {
-					exfat_chain_set(&candi_empty.cur,
-						clu.dir, clu.size, clu.flags);
-				}
-
-				if (candi_empty.eidx == EXFAT_HINT_NONE &&
-						num_empty >= num_entries) {
-					candi_empty.eidx =
-						dentry - (num_empty - 1);
-					WARN_ON(candi_empty.eidx < 0);
-					candi_empty.count = num_empty;
-
-					if (ei->hint_femp.eidx ==
-							EXFAT_HINT_NONE ||
-						candi_empty.eidx <=
-							 ei->hint_femp.eidx)
-						ei->hint_femp = candi_empty;
-				}
+				exfat_set_empty_hint(ei, &candi_empty, &clu,
+						dentry, num_entries);
 
 				brelse(bh);
 				if (entry_type == TYPE_UNUSED)
@@ -989,8 +997,7 @@ rewind:
 				continue;
 			}
 
-			num_empty = 0;
-			candi_empty.eidx = EXFAT_HINT_NONE;
+			exfat_reset_empty_hint(&candi_empty);
 
 			if (entry_type == TYPE_FILE || entry_type == TYPE_DIR) {
 				step = DIRENT_STEP_FILE;
@@ -1090,9 +1097,6 @@ not_found:
 		rewind = 1;
 		dentry = 0;
 		clu.dir = p_dir->dir;
-		/* reset empty hint */
-		num_empty = 0;
-		candi_empty.eidx = EXFAT_HINT_NONE;
 		goto rewind;
 	}
 
-- 
GitLab


From e298c8a818a3e517582e60c412f4a41b3a1647c5 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Mon, 7 Nov 2022 17:22:13 +0900
Subject: [PATCH 3668/4122] exfat: hint the empty entry which at the end of
 cluster chain

After traversing all directory entries, hint the empty directory
entry no matter whether or not there are enough empty directory
entries.

After this commit, hint the empty directory entries like this:

1. Hint the deleted directory entries if enough;
2. Hint the deleted and unused directory entries which at the
   end of the cluster chain no matter whether enough or not(Add
   by this commit);
3. If no any empty directory entries, hint the empty directory
   entries in the new cluster(Add by this commit).

This avoids repeated traversal of directory entries, reduces CPU
usage, and improves the performance of creating files and
directories(especially on low-performance CPUs).

Test create 5000 files in a class 4 SD card on imx6q-sabrelite
with:

for ((i=0;i<5;i++)); do
   sync
   time (for ((j=1;j<=1000;j++)); do touch file$((i*1000+j)); done)
done

The more files, the more performance improvements.

            Before   After    Improvement
   1~1000   25.360s  22.168s  14.40%
1001~2000   38.242s  28.72ss  33.15%
2001~3000   49.134s  35.037s  40.23%
3001~4000   62.042s  41.624s  49.05%
4001~5000   73.629s  46.772s  57.42%

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c   | 26 ++++++++++++++++++++++----
 fs/exfat/namei.c | 33 +++++++++++++++++++++------------
 2 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index c0e60ff9ec7d4..30d0ac43b66c1 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -905,17 +905,24 @@ static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp)
 
 static inline void exfat_set_empty_hint(struct exfat_inode_info *ei,
 		struct exfat_hint_femp *candi_empty, struct exfat_chain *clu,
-		int dentry, int num_entries)
+		int dentry, int num_entries, int entry_type)
 {
 	if (ei->hint_femp.eidx == EXFAT_HINT_NONE ||
 	    ei->hint_femp.eidx > dentry) {
+		int total_entries = EXFAT_B_TO_DEN(i_size_read(&ei->vfs_inode));
+
 		if (candi_empty->count == 0) {
 			candi_empty->cur = *clu;
 			candi_empty->eidx = dentry;
 		}
 
-		candi_empty->count++;
-		if (candi_empty->count == num_entries)
+		if (entry_type == TYPE_UNUSED)
+			candi_empty->count += total_entries - dentry;
+		else
+			candi_empty->count++;
+
+		if (candi_empty->count == num_entries ||
+		    candi_empty->count + candi_empty->eidx == total_entries)
 			ei->hint_femp = *candi_empty;
 	}
 }
@@ -989,7 +996,8 @@ rewind:
 				step = DIRENT_STEP_FILE;
 
 				exfat_set_empty_hint(ei, &candi_empty, &clu,
-						dentry, num_entries);
+						dentry, num_entries,
+						entry_type);
 
 				brelse(bh);
 				if (entry_type == TYPE_UNUSED)
@@ -1100,6 +1108,16 @@ not_found:
 		goto rewind;
 	}
 
+	/*
+	 * set the EXFAT_EOF_CLUSTER flag to avoid search
+	 * from the beginning again when allocated a new cluster
+	 */
+	if (ei->hint_femp.eidx == EXFAT_HINT_NONE) {
+		ei->hint_femp.cur.dir = EXFAT_EOF_CLUSTER;
+		ei->hint_femp.eidx = p_dir->size * dentries_per_clu;
+		ei->hint_femp.count = 0;
+	}
+
 	/* initialized hint_stat */
 	hint_stat->clu = p_dir->dir;
 	hint_stat->eidx = 0;
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index b617bebc3d0f3..99e00a36c0290 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -224,11 +224,18 @@ static int exfat_search_empty_slot(struct super_block *sb,
 
 	if (hint_femp->eidx != EXFAT_HINT_NONE) {
 		dentry = hint_femp->eidx;
-		if (num_entries <= hint_femp->count) {
-			hint_femp->eidx = EXFAT_HINT_NONE;
-			return dentry;
-		}
 
+		/*
+		 * If hint_femp->count is enough, it is needed to check if
+		 * there are actual empty entries.
+		 * Otherwise, and if "dentry + hint_famp->count" is also equal
+		 * to "p_dir->size * dentries_per_clu", it means ENOSPC.
+		 */
+		if (dentry + hint_femp->count == p_dir->size * dentries_per_clu &&
+		    num_entries > hint_femp->count)
+			return -ENOSPC;
+
+		hint_femp->eidx = EXFAT_HINT_NONE;
 		exfat_chain_dup(&clu, &hint_femp->cur);
 	} else {
 		exfat_chain_dup(&clu, p_dir);
@@ -293,6 +300,12 @@ static int exfat_search_empty_slot(struct super_block *sb,
 		}
 	}
 
+	hint_femp->eidx = p_dir->size * dentries_per_clu - num_empty;
+	hint_femp->count = num_empty;
+	if (num_empty == 0)
+		exfat_chain_set(&hint_femp->cur, EXFAT_EOF_CLUSTER, 0,
+				clu.flags);
+
 	return -ENOSPC;
 }
 
@@ -369,15 +382,11 @@ static int exfat_find_empty_entry(struct inode *inode,
 			if (exfat_ent_set(sb, last_clu, clu.dir))
 				return -EIO;
 
-		if (hint_femp.eidx == EXFAT_HINT_NONE) {
-			/* the special case that new dentry
-			 * should be allocated from the start of new cluster
-			 */
-			hint_femp.eidx = EXFAT_B_TO_DEN_IDX(p_dir->size, sbi);
-			hint_femp.count = sbi->dentries_per_clu;
-
+		if (hint_femp.cur.dir == EXFAT_EOF_CLUSTER)
 			exfat_chain_set(&hint_femp.cur, clu.dir, 0, clu.flags);
-		}
+
+		hint_femp.count += sbi->dentries_per_clu;
+
 		hint_femp.cur.size++;
 		p_dir->size++;
 		size = EXFAT_CLU_TO_B(p_dir->size, sbi);
-- 
GitLab


From f83d8a3b532097276266b5e81073ea46e27b17ab Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 7 Apr 2022 15:55:56 +0800
Subject: [PATCH 3669/4122] exfat: reduce the size of exfat_entry_set_cache

In normal, there are 19 directory entries at most for a file or
a directory.
  - A file directory entry
  - A stream extension directory entry
  - 1~17 file name directory entry

So the directory entries are in 3 sectors at most, it is enough
for struct exfat_entry_set_cache to pre-allocate 3 bh.

This commit changes the size of struct exfat_entry_set_cache as:

                   Before   After
32-bit system      88       32    bytes
64-bit system      168      48    bytes

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/exfat_fs.h | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index a8f8eee4937cb..af55018ff22e5 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -9,6 +9,7 @@
 #include <linux/fs.h>
 #include <linux/ratelimit.h>
 #include <linux/nls.h>
+#include <linux/blkdev.h>
 
 #define EXFAT_ROOT_INO		1
 
@@ -41,6 +42,14 @@ enum {
 #define ES_2_ENTRIES		2
 #define ES_ALL_ENTRIES		0
 
+#define ES_IDX_FILE		0
+#define ES_IDX_STREAM		1
+#define ES_IDX_FIRST_FILENAME	2
+#define EXFAT_FILENAME_ENTRY_NUM(name_len) \
+	DIV_ROUND_UP(name_len, EXFAT_FILE_NAME_LEN)
+#define ES_IDX_LAST_FILENAME(name_len)	\
+	(ES_IDX_FIRST_FILENAME + EXFAT_FILENAME_ENTRY_NUM(name_len) - 1)
+
 #define DIR_DELETED		0xFFFF0321
 
 /* type values */
@@ -68,9 +77,6 @@ enum {
 #define MAX_NAME_LENGTH		255 /* max len of file name excluding NULL */
 #define MAX_VFSNAME_BUF_SIZE	((MAX_NAME_LENGTH + 1) * MAX_CHARSET_SIZE)
 
-/* Enough size to hold 256 dentry (even 512 Byte sector) */
-#define DIR_CACHE_SIZE		(256*sizeof(struct exfat_dentry)/512+1)
-
 #define EXFAT_HINT_NONE		-1
 #define EXFAT_MIN_SUBDIR	2
 
@@ -125,6 +131,17 @@ enum {
 #define BITS_PER_BYTE_MASK	0x7
 #define IGNORED_BITS_REMAINED(clu, clu_base) ((1 << ((clu) - (clu_base))) - 1)
 
+#define ES_ENTRY_NUM(name_len)	(ES_IDX_LAST_FILENAME(name_len) + 1)
+/* 19 entries = 1 file entry + 1 stream entry + 17 filename entries */
+#define ES_MAX_ENTRY_NUM	ES_ENTRY_NUM(MAX_NAME_LENGTH)
+
+/*
+ * 19 entries x 32 bytes/entry = 608 bytes.
+ * The 608 bytes are in 3 sectors at most (even 512 Byte sector).
+ */
+#define DIR_CACHE_SIZE		\
+	(DIV_ROUND_UP(EXFAT_DEN_TO_B(ES_MAX_ENTRY_NUM), SECTOR_SIZE) + 1)
+
 struct exfat_dentry_namebuf {
 	char *lfn;
 	int lfnbuf_len; /* usually MAX_UNINAME_BUF_SIZE */
@@ -166,11 +183,11 @@ struct exfat_hint {
 
 struct exfat_entry_set_cache {
 	struct super_block *sb;
-	bool modified;
 	unsigned int start_off;
 	int num_bh;
 	struct buffer_head *bh[DIR_CACHE_SIZE];
 	unsigned int num_entries;
+	bool modified;
 };
 
 struct exfat_dir_entry {
-- 
GitLab


From a3ff29a95fde16906304455aa8c0bd84eb770258 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Wed, 9 Nov 2022 13:50:22 +0800
Subject: [PATCH 3670/4122] exfat: support dynamic allocate bh for
 exfat_entry_set_cache

In special cases, a file or a directory may occupied more than 19
directory entries, pre-allocating 3 bh is not enough. Such as
  - Support vendor secondary directory entry in the future.
  - Since file directory entry is damaged, the SecondaryCount
    field is bigger than 18.

So this commit supports dynamic allocation of bh.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      | 15 +++++++++++++++
 fs/exfat/exfat_fs.h |  5 ++++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 30d0ac43b66c1..03e9c9e3966e3 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -615,6 +615,10 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync)
 			bforget(es->bh[i]);
 		else
 			brelse(es->bh[i]);
+
+	if (IS_DYNAMIC_ES(es))
+		kfree(es->bh);
+
 	kfree(es);
 	return err;
 }
@@ -847,6 +851,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 	/* byte offset in sector */
 	off = EXFAT_BLK_OFFSET(byte_offset, sb);
 	es->start_off = off;
+	es->bh = es->__bh;
 
 	/* sector offset in cluster */
 	sec = EXFAT_B_TO_BLK(byte_offset, sb);
@@ -866,6 +871,16 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 	es->num_entries = num_entries;
 
 	num_bh = EXFAT_B_TO_BLK_ROUND_UP(off + num_entries * DENTRY_SIZE, sb);
+	if (num_bh > ARRAY_SIZE(es->__bh)) {
+		es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL);
+		if (!es->bh) {
+			brelse(bh);
+			kfree(es);
+			return NULL;
+		}
+		es->bh[0] = bh;
+	}
+
 	for (i = 1; i < num_bh; i++) {
 		/* get the next sector */
 		if (exfat_is_last_sector_in_cluster(sbi, sec)) {
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index af55018ff22e5..82395ae80dba1 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -185,11 +185,14 @@ struct exfat_entry_set_cache {
 	struct super_block *sb;
 	unsigned int start_off;
 	int num_bh;
-	struct buffer_head *bh[DIR_CACHE_SIZE];
+	struct buffer_head *__bh[DIR_CACHE_SIZE];
+	struct buffer_head **bh;
 	unsigned int num_entries;
 	bool modified;
 };
 
+#define IS_DYNAMIC_ES(es)	((es)->__bh != (es)->bh)
+
 struct exfat_dir_entry {
 	struct exfat_chain dir;
 	int entry;
-- 
GitLab


From 20914ff6dd56dd6b548bf5dd90bff09ef89999e4 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 17 Nov 2022 11:37:13 +0800
Subject: [PATCH 3671/4122] exfat: move exfat_entry_set_cache from heap to
 stack

The size of struct exfat_entry_set_cache is only 56 bytes on
64-bit system, and allocating from stack is more efficient than
allocating from heap.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      | 35 +++++++++++++++--------------------
 fs/exfat/exfat_fs.h |  5 +++--
 fs/exfat/inode.c    | 13 ++++++-------
 fs/exfat/namei.c    | 11 +++++------
 4 files changed, 29 insertions(+), 35 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 03e9c9e3966e3..a3fb609dd1299 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -33,10 +33,9 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 		struct exfat_chain *p_dir, int entry, unsigned short *uniname)
 {
 	int i;
-	struct exfat_entry_set_cache *es;
+	struct exfat_entry_set_cache es;
 
-	es = exfat_get_dentry_set(sb, p_dir, entry, ES_ALL_ENTRIES);
-	if (!es)
+	if (exfat_get_dentry_set(&es, sb, p_dir, entry, ES_ALL_ENTRIES))
 		return;
 
 	/*
@@ -45,8 +44,8 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 	 * Third entry  : first file-name entry
 	 * So, the index of first file-name dentry should start from 2.
 	 */
-	for (i = 2; i < es->num_entries; i++) {
-		struct exfat_dentry *ep = exfat_get_dentry_cached(es, i);
+	for (i = 2; i < es.num_entries; i++) {
+		struct exfat_dentry *ep = exfat_get_dentry_cached(&es, i);
 
 		/* end of name entry */
 		if (exfat_get_entry_type(ep) != TYPE_EXTEND)
@@ -56,7 +55,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 		uniname += EXFAT_FILE_NAME_LEN;
 	}
 
-	exfat_free_dentry_set(es, false);
+	exfat_free_dentry_set(&es, false);
 }
 
 /* read a directory entry from the opened directory */
@@ -619,7 +618,6 @@ int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync)
 	if (IS_DYNAMIC_ES(es))
 		kfree(es->bh);
 
-	kfree(es);
 	return err;
 }
 
@@ -816,14 +814,14 @@ struct exfat_dentry *exfat_get_dentry_cached(
  *   pointer of entry set on success,
  *   NULL on failure.
  */
-struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
-		struct exfat_chain *p_dir, int entry, unsigned int type)
+int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
+		struct super_block *sb, struct exfat_chain *p_dir, int entry,
+		unsigned int type)
 {
 	int ret, i, num_bh;
 	unsigned int off, byte_offset, clu = 0;
 	sector_t sec;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
-	struct exfat_entry_set_cache *es;
 	struct exfat_dentry *ep;
 	int num_entries;
 	enum exfat_validate_dentry_mode mode = ES_MODE_STARTED;
@@ -831,17 +829,15 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 
 	if (p_dir->dir == DIR_DELETED) {
 		exfat_err(sb, "access to deleted dentry");
-		return NULL;
+		return -EIO;
 	}
 
 	byte_offset = EXFAT_DEN_TO_B(entry);
 	ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu);
 	if (ret)
-		return NULL;
+		return ret;
 
-	es = kzalloc(sizeof(*es), GFP_KERNEL);
-	if (!es)
-		return NULL;
+	memset(es, 0, sizeof(*es));
 	es->sb = sb;
 	es->modified = false;
 
@@ -859,7 +855,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 
 	bh = sb_bread(sb, sec);
 	if (!bh)
-		goto free_es;
+		return -EIO;
 	es->bh[es->num_bh++] = bh;
 
 	ep = exfat_get_dentry_cached(es, 0);
@@ -875,8 +871,7 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 		es->bh = kmalloc_array(num_bh, sizeof(*es->bh), GFP_KERNEL);
 		if (!es->bh) {
 			brelse(bh);
-			kfree(es);
-			return NULL;
+			return -ENOMEM;
 		}
 		es->bh[0] = bh;
 	}
@@ -905,11 +900,11 @@ struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
 		if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
 			goto free_es;
 	}
-	return es;
+	return 0;
 
 free_es:
 	exfat_free_dentry_set(es, false);
-	return NULL;
+	return -EIO;
 }
 
 static inline void exfat_reset_empty_hint(struct exfat_hint_femp *hint_femp)
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 82395ae80dba1..2ffe5792b1a99 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -490,8 +490,9 @@ struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
 		struct exfat_chain *p_dir, int entry, struct buffer_head **bh);
 struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es,
 		int num);
-struct exfat_entry_set_cache *exfat_get_dentry_set(struct super_block *sb,
-		struct exfat_chain *p_dir, int entry, unsigned int type);
+int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
+		struct super_block *sb, struct exfat_chain *p_dir, int entry,
+		unsigned int type);
 int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync);
 int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir);
 
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 5590a1e83126c..cdcf037a304f5 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -21,7 +21,7 @@ int __exfat_write_inode(struct inode *inode, int sync)
 {
 	unsigned long long on_disk_size;
 	struct exfat_dentry *ep, *ep2;
-	struct exfat_entry_set_cache *es = NULL;
+	struct exfat_entry_set_cache es;
 	struct super_block *sb = inode->i_sb;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *ei = EXFAT_I(inode);
@@ -42,11 +42,10 @@ int __exfat_write_inode(struct inode *inode, int sync)
 	exfat_set_volume_dirty(sb);
 
 	/* get the directory entry of given file or directory */
-	es = exfat_get_dentry_set(sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES);
-	if (!es)
+	if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES))
 		return -EIO;
-	ep = exfat_get_dentry_cached(es, 0);
-	ep2 = exfat_get_dentry_cached(es, 1);
+	ep = exfat_get_dentry_cached(&es, 0);
+	ep2 = exfat_get_dentry_cached(&es, 1);
 
 	ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode));
 
@@ -83,8 +82,8 @@ int __exfat_write_inode(struct inode *inode, int sync)
 		ep2->dentry.stream.start_clu = EXFAT_FREE_CLUSTER;
 	}
 
-	exfat_update_dir_chksum_with_entry_set(es);
-	return exfat_free_dentry_set(es, sync);
+	exfat_update_dir_chksum_with_entry_set(&es);
+	return exfat_free_dentry_set(&es, sync);
 }
 
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 99e00a36c0290..8d72527dfb78f 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -604,7 +604,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_inode_info *ei = EXFAT_I(dir);
 	struct exfat_dentry *ep, *ep2;
-	struct exfat_entry_set_cache *es;
+	struct exfat_entry_set_cache es;
 	/* for optimized dir & entry to prevent long traverse of cluster chain */
 	struct exfat_hint hint_opt;
 
@@ -644,11 +644,10 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 	if (cdir.flags & ALLOC_NO_FAT_CHAIN)
 		cdir.size -= dentry / sbi->dentries_per_clu;
 	dentry = hint_opt.eidx;
-	es = exfat_get_dentry_set(sb, &cdir, dentry, ES_2_ENTRIES);
-	if (!es)
+	if (exfat_get_dentry_set(&es, sb, &cdir, dentry, ES_2_ENTRIES))
 		return -EIO;
-	ep = exfat_get_dentry_cached(es, 0);
-	ep2 = exfat_get_dentry_cached(es, 1);
+	ep = exfat_get_dentry_cached(&es, 0);
+	ep2 = exfat_get_dentry_cached(&es, 1);
 
 	info->type = exfat_get_entry_type(ep);
 	info->attr = le16_to_cpu(ep->dentry.file.attr);
@@ -677,7 +676,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 			     ep->dentry.file.access_time,
 			     ep->dentry.file.access_date,
 			     0);
-	exfat_free_dentry_set(es, false);
+	exfat_free_dentry_set(&es, false);
 
 	if (ei->start_clu == EXFAT_FREE_CLUSTER) {
 		exfat_fs_error(sb,
-- 
GitLab


From 3b9681acb0ef739343d8cfd35e054aab9597f1dc Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 17 Mar 2022 18:12:40 +0800
Subject: [PATCH 3672/4122] exfat: rename exfat_free_dentry_set() to
 exfat_put_dentry_set()

Since struct exfat_entry_set_cache is allocated from stack,
no need to free, so rename exfat_free_dentry_set() to
exfat_put_dentry_set(). After renaming, the new function pair
is exfat_get_dentry_set()/exfat_put_dentry_set().

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      | 16 ++++++++--------
 fs/exfat/exfat_fs.h |  2 +-
 fs/exfat/inode.c    |  2 +-
 fs/exfat/namei.c    |  2 +-
 4 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index a3fb609dd1299..a9a0b3e46af2d 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -55,7 +55,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 		uniname += EXFAT_FILE_NAME_LEN;
 	}
 
-	exfat_free_dentry_set(&es, false);
+	exfat_put_dentry_set(&es, false);
 }
 
 /* read a directory entry from the opened directory */
@@ -602,7 +602,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es)
 	es->modified = true;
 }
 
-int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync)
+int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync)
 {
 	int i, err = 0;
 
@@ -860,7 +860,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 
 	ep = exfat_get_dentry_cached(es, 0);
 	if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
-		goto free_es;
+		goto put_es;
 
 	num_entries = type == ES_ALL_ENTRIES ?
 		ep->dentry.file.num_ext + 1 : type;
@@ -882,7 +882,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 			if (p_dir->flags == ALLOC_NO_FAT_CHAIN)
 				clu++;
 			else if (exfat_get_next_cluster(sb, &clu))
-				goto free_es;
+				goto put_es;
 			sec = exfat_cluster_to_sector(sbi, clu);
 		} else {
 			sec++;
@@ -890,7 +890,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 
 		bh = sb_bread(sb, sec);
 		if (!bh)
-			goto free_es;
+			goto put_es;
 		es->bh[es->num_bh++] = bh;
 	}
 
@@ -898,12 +898,12 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 	for (i = 1; i < num_entries; i++) {
 		ep = exfat_get_dentry_cached(es, i);
 		if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
-			goto free_es;
+			goto put_es;
 	}
 	return 0;
 
-free_es:
-	exfat_free_dentry_set(es, false);
+put_es:
+	exfat_put_dentry_set(es, false);
 	return -EIO;
 }
 
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 2ffe5792b1a99..324acc57d0292 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -493,7 +493,7 @@ struct exfat_dentry *exfat_get_dentry_cached(struct exfat_entry_set_cache *es,
 int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 		struct super_block *sb, struct exfat_chain *p_dir, int entry,
 		unsigned int type);
-int exfat_free_dentry_set(struct exfat_entry_set_cache *es, int sync);
+int exfat_put_dentry_set(struct exfat_entry_set_cache *es, int sync);
 int exfat_count_dir_entries(struct super_block *sb, struct exfat_chain *p_dir);
 
 /* inode.c */
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index cdcf037a304f5..a84eae72556d7 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -83,7 +83,7 @@ int __exfat_write_inode(struct inode *inode, int sync)
 	}
 
 	exfat_update_dir_chksum_with_entry_set(&es);
-	return exfat_free_dentry_set(&es, sync);
+	return exfat_put_dentry_set(&es, sync);
 }
 
 int exfat_write_inode(struct inode *inode, struct writeback_control *wbc)
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 8d72527dfb78f..57510d7f58cfe 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -676,7 +676,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 			     ep->dentry.file.access_time,
 			     ep->dentry.file.access_date,
 			     0);
-	exfat_free_dentry_set(&es, false);
+	exfat_put_dentry_set(&es, false);
 
 	if (ei->start_clu == EXFAT_FREE_CLUSTER) {
 		exfat_fs_error(sb,
-- 
GitLab


From f3fe3954c09f97d8227d9d2edc807796a8b228ab Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 17 Mar 2022 19:39:20 +0800
Subject: [PATCH 3673/4122] exfat: replace magic numbers with Macros

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c   | 12 ++++++------
 fs/exfat/inode.c |  4 ++--
 fs/exfat/namei.c |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index a9a0b3e46af2d..c05493fc91241 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -44,7 +44,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 	 * Third entry  : first file-name entry
 	 * So, the index of first file-name dentry should start from 2.
 	 */
-	for (i = 2; i < es.num_entries; i++) {
+	for (i = ES_IDX_FIRST_FILENAME; i < es.num_entries; i++) {
 		struct exfat_dentry *ep = exfat_get_dentry_cached(&es, i);
 
 		/* end of name entry */
@@ -336,7 +336,7 @@ int exfat_calc_num_entries(struct exfat_uni_name *p_uniname)
 		return -EINVAL;
 
 	/* 1 file entry + 1 stream entry + name entries */
-	return ((len - 1) / EXFAT_FILE_NAME_LEN + 3);
+	return ES_ENTRY_NUM(len);
 }
 
 unsigned int exfat_get_entry_type(struct exfat_dentry *ep)
@@ -591,13 +591,13 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es)
 	unsigned short chksum = 0;
 	struct exfat_dentry *ep;
 
-	for (i = 0; i < es->num_entries; i++) {
+	for (i = ES_IDX_FILE; i < es->num_entries; i++) {
 		ep = exfat_get_dentry_cached(es, i);
 		chksum = exfat_calc_chksum16(ep, DENTRY_SIZE, chksum,
 					     chksum_type);
 		chksum_type = CS_DEFAULT;
 	}
-	ep = exfat_get_dentry_cached(es, 0);
+	ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
 	ep->dentry.file.checksum = cpu_to_le16(chksum);
 	es->modified = true;
 }
@@ -858,7 +858,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 		return -EIO;
 	es->bh[es->num_bh++] = bh;
 
-	ep = exfat_get_dentry_cached(es, 0);
+	ep = exfat_get_dentry_cached(es, ES_IDX_FILE);
 	if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
 		goto put_es;
 
@@ -895,7 +895,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 	}
 
 	/* validate cached dentries */
-	for (i = 1; i < num_entries; i++) {
+	for (i = ES_IDX_STREAM; i < num_entries; i++) {
 		ep = exfat_get_dentry_cached(es, i);
 		if (!exfat_validate_entry(exfat_get_entry_type(ep), &mode))
 			goto put_es;
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index a84eae72556d7..dac5001bae9eb 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -44,8 +44,8 @@ int __exfat_write_inode(struct inode *inode, int sync)
 	/* get the directory entry of given file or directory */
 	if (exfat_get_dentry_set(&es, sb, &(ei->dir), ei->entry, ES_ALL_ENTRIES))
 		return -EIO;
-	ep = exfat_get_dentry_cached(&es, 0);
-	ep2 = exfat_get_dentry_cached(&es, 1);
+	ep = exfat_get_dentry_cached(&es, ES_IDX_FILE);
+	ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM);
 
 	ep->dentry.file.attr = cpu_to_le16(exfat_make_attr(inode));
 
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 57510d7f58cfe..01e4e8c60bbea 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -646,8 +646,8 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 	dentry = hint_opt.eidx;
 	if (exfat_get_dentry_set(&es, sb, &cdir, dentry, ES_2_ENTRIES))
 		return -EIO;
-	ep = exfat_get_dentry_cached(&es, 0);
-	ep2 = exfat_get_dentry_cached(&es, 1);
+	ep = exfat_get_dentry_cached(&es, ES_IDX_FILE);
+	ep2 = exfat_get_dentry_cached(&es, ES_IDX_STREAM);
 
 	info->type = exfat_get_entry_type(ep);
 	info->attr = le16_to_cpu(ep->dentry.file.attr);
-- 
GitLab


From 088f1343d9108c16fca064951d85e6de9f5cab42 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Tue, 16 Aug 2022 16:55:06 +0800
Subject: [PATCH 3674/4122] exfat: remove call ilog2() from exfat_readdir()

There is no need to call ilog2() for the conversions between
cluster and dentry in exfat_readdir(), because these conversions
can be replaced with EXFAT_DEN_TO_CLU()/EXFAT_CLU_TO_DEN().

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      |  9 ++++-----
 fs/exfat/exfat_fs.h | 10 ++++++++--
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index c05493fc91241..397ea2d98848f 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -61,7 +61,7 @@ static void exfat_get_uniname_from_ext_entry(struct super_block *sb,
 /* read a directory entry from the opened directory */
 static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_entry *dir_entry)
 {
-	int i, dentries_per_clu, dentries_per_clu_bits = 0, num_ext;
+	int i, dentries_per_clu, num_ext;
 	unsigned int type, clu_offset, max_dentries;
 	struct exfat_chain dir, clu;
 	struct exfat_uni_name uni_name;
@@ -83,11 +83,10 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 			EXFAT_B_TO_CLU(i_size_read(inode), sbi), ei->flags);
 
 	dentries_per_clu = sbi->dentries_per_clu;
-	dentries_per_clu_bits = ilog2(dentries_per_clu);
 	max_dentries = (unsigned int)min_t(u64, MAX_EXFAT_DENTRIES,
-					   (u64)sbi->num_clusters << dentries_per_clu_bits);
+				(u64)EXFAT_CLU_TO_DEN(sbi->num_clusters, sbi));
 
-	clu_offset = dentry >> dentries_per_clu_bits;
+	clu_offset = EXFAT_DEN_TO_CLU(dentry, sbi);
 	exfat_chain_dup(&clu, &dir);
 
 	if (clu.flags == ALLOC_NO_FAT_CHAIN) {
@@ -162,7 +161,7 @@ static int exfat_readdir(struct inode *inode, loff_t *cpos, struct exfat_dir_ent
 			dir_entry->entry = dentry;
 			brelse(bh);
 
-			ei->hint_bmap.off = dentry >> dentries_per_clu_bits;
+			ei->hint_bmap.off = EXFAT_DEN_TO_CLU(dentry, sbi);
 			ei->hint_bmap.clu = clu.dir;
 
 			*cpos = EXFAT_DEN_TO_B(dentry + 1 + num_ext);
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 324acc57d0292..37e8af8042aa9 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -101,11 +101,17 @@ enum {
 /*
  * helpers for block size to dentry size conversion.
  */
-#define EXFAT_B_TO_DEN_IDX(b, sbi)	\
-	((b) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
 #define EXFAT_B_TO_DEN(b)		((b) >> DENTRY_SIZE_BITS)
 #define EXFAT_DEN_TO_B(b)		((b) << DENTRY_SIZE_BITS)
 
+/*
+ * helpers for cluster size to dentry size conversion.
+ */
+#define EXFAT_CLU_TO_DEN(clu, sbi)	\
+	((clu) << ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
+#define EXFAT_DEN_TO_CLU(dentry, sbi)	\
+	((dentry) >> ((sbi)->cluster_size_bits - DENTRY_SIZE_BITS))
+
 /*
  * helpers for fat entry.
  */
-- 
GitLab


From 015c0d4f6b1e65857de88279f07d7ecc5e305137 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Mon, 15 Aug 2022 10:15:16 +0800
Subject: [PATCH 3675/4122] exfat: remove unneeded codes from __exfat_rename()

The code gets the dentry, but the dentry is not used, remove the
code.

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/namei.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 01e4e8c60bbea..347c8df45bd00 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -1175,7 +1175,7 @@ static int __exfat_rename(struct inode *old_parent_inode,
 	struct exfat_inode_info *new_ei = NULL;
 	unsigned int new_entry_type = TYPE_UNUSED;
 	int new_entry = 0;
-	struct buffer_head *old_bh, *new_bh = NULL;
+	struct buffer_head *new_bh = NULL;
 
 	/* check the validity of pointer parameters */
 	if (new_path == NULL || strlen(new_path) == 0)
@@ -1191,13 +1191,6 @@ static int __exfat_rename(struct inode *old_parent_inode,
 		EXFAT_I(old_parent_inode)->flags);
 	dentry = ei->entry;
 
-	ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh);
-	if (!ep) {
-		ret = -EIO;
-		goto out;
-	}
-	brelse(old_bh);
-
 	/* check whether new dir is existing directory and empty */
 	if (new_inode) {
 		ret = -EIO;
-- 
GitLab


From 72880cb5f157514d797d5f6ab3184bbde671a18a Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Sun, 10 Apr 2022 16:12:14 +0800
Subject: [PATCH 3676/4122] exfat: remove unnecessary arguments from
 exfat_find_dir_entry()

This commit removes argument 'num_entries' and 'type' from
exfat_find_dir_entry().

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c      | 12 +++++++-----
 fs/exfat/exfat_fs.h |  3 +--
 fs/exfat/namei.c    | 10 ++--------
 3 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 397ea2d98848f..8121a7e073bc7 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -956,7 +956,7 @@ enum {
  */
 int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
 		struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname,
-		int num_entries, unsigned int type, struct exfat_hint *hint_opt)
+		struct exfat_hint *hint_opt)
 {
 	int i, rewind = 0, dentry = 0, end_eidx = 0, num_ext = 0, len;
 	int order, step, name_len = 0;
@@ -967,6 +967,10 @@ int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
 	struct exfat_hint *hint_stat = &ei->hint_stat;
 	struct exfat_hint_femp candi_empty;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
+	int num_entries = exfat_calc_num_entries(p_uniname);
+
+	if (num_entries < 0)
+		return num_entries;
 
 	dentries_per_clu = sbi->dentries_per_clu;
 
@@ -1020,10 +1024,8 @@ rewind:
 				step = DIRENT_STEP_FILE;
 				hint_opt->clu = clu.dir;
 				hint_opt->eidx = i;
-				if (type == TYPE_ALL || type == entry_type) {
-					num_ext = ep->dentry.file.num_ext;
-					step = DIRENT_STEP_STRM;
-				}
+				num_ext = ep->dentry.file.num_ext;
+				step = DIRENT_STEP_STRM;
 				brelse(bh);
 				continue;
 			}
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 37e8af8042aa9..21fec01d68ff0 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -71,7 +71,6 @@ enum {
 #define TYPE_PADDING		0x0402
 #define TYPE_ACLTAB		0x0403
 #define TYPE_BENIGN_SEC		0x0800
-#define TYPE_ALL		0x0FFF
 
 #define MAX_CHARSET_SIZE	6 /* max size of multi-byte character */
 #define MAX_NAME_LENGTH		255 /* max len of file name excluding NULL */
@@ -490,7 +489,7 @@ void exfat_update_dir_chksum_with_entry_set(struct exfat_entry_set_cache *es);
 int exfat_calc_num_entries(struct exfat_uni_name *p_uniname);
 int exfat_find_dir_entry(struct super_block *sb, struct exfat_inode_info *ei,
 		struct exfat_chain *p_dir, struct exfat_uni_name *p_uniname,
-		int num_entries, unsigned int type, struct exfat_hint *hint_opt);
+		struct exfat_hint *hint_opt);
 int exfat_alloc_new_dir(struct inode *inode, struct exfat_chain *clu);
 struct exfat_dentry *exfat_get_dentry(struct super_block *sb,
 		struct exfat_chain *p_dir, int entry, struct buffer_head **bh);
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 347c8df45bd00..5f995eba5dbbe 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -597,7 +597,7 @@ unlock:
 static int exfat_find(struct inode *dir, struct qstr *qname,
 		struct exfat_dir_entry *info)
 {
-	int ret, dentry, num_entries, count;
+	int ret, dentry, count;
 	struct exfat_chain cdir;
 	struct exfat_uni_name uni_name;
 	struct super_block *sb = dir->i_sb;
@@ -616,10 +616,6 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 	if (ret)
 		return ret;
 
-	num_entries = exfat_calc_num_entries(&uni_name);
-	if (num_entries < 0)
-		return num_entries;
-
 	/* check the validation of hint_stat and initialize it if required */
 	if (ei->version != (inode_peek_iversion_raw(dir) & 0xffffffff)) {
 		ei->hint_stat.clu = cdir.dir;
@@ -629,9 +625,7 @@ static int exfat_find(struct inode *dir, struct qstr *qname,
 	}
 
 	/* search the file name for directories */
-	dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name,
-			num_entries, TYPE_ALL, &hint_opt);
-
+	dentry = exfat_find_dir_entry(sb, ei, &cdir, &uni_name, &hint_opt);
 	if (dentry < 0)
 		return dentry; /* -error value */
 
-- 
GitLab


From e981917b3fae689e9372647a38746444205bb905 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 17 Nov 2022 10:36:21 +0800
Subject: [PATCH 3677/4122] exfat: remove argument 'size' from exfat_truncate()

argument 'size' is not used in exfat_truncate(), remove it.

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/exfat_fs.h | 2 +-
 fs/exfat/file.c     | 4 ++--
 fs/exfat/inode.c    | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 21fec01d68ff0..ae048802f9db1 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -449,7 +449,7 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
 /* file.c */
 extern const struct file_operations exfat_file_operations;
 int __exfat_truncate(struct inode *inode, loff_t new_size);
-void exfat_truncate(struct inode *inode, loff_t size);
+void exfat_truncate(struct inode *inode);
 int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		  struct iattr *attr);
 int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path,
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 4e0793f35e8fb..7c97c1df1305a 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -189,7 +189,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
 	return 0;
 }
 
-void exfat_truncate(struct inode *inode, loff_t size)
+void exfat_truncate(struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
@@ -310,7 +310,7 @@ int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		 * __exfat_write_inode() is called from exfat_truncate(), inode
 		 * is already written by it, so mark_inode_dirty() is unneeded.
 		 */
-		exfat_truncate(inode, attr->ia_size);
+		exfat_truncate(inode);
 		up_write(&EXFAT_I(inode)->truncate_lock);
 	} else
 		mark_inode_dirty(inode);
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index dac5001bae9eb..0d147f8a1f7c6 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -362,7 +362,7 @@ static void exfat_write_failed(struct address_space *mapping, loff_t to)
 	if (to > i_size_read(inode)) {
 		truncate_pagecache(inode, i_size_read(inode));
 		inode->i_mtime = inode->i_ctime = current_time(inode);
-		exfat_truncate(inode, EXFAT_I(inode)->i_size_aligned);
+		exfat_truncate(inode);
 	}
 }
 
-- 
GitLab


From f7cde96710a4362dca199458d3de04f631178453 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Mon, 28 Mar 2022 16:37:58 +0800
Subject: [PATCH 3678/4122] exfat: remove i_size_write() from
 __exfat_truncate()

The file/directory size is updated into inode by i_size_write()
before __exfat_truncate() is called, so it is redundant to
re-update by i_size_write() in __exfat_truncate().

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/exfat_fs.h | 2 +-
 fs/exfat/file.c     | 8 +++-----
 fs/exfat/inode.c    | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index ae048802f9db1..a1e7feb220797 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -448,7 +448,7 @@ int exfat_trim_fs(struct inode *inode, struct fstrim_range *range);
 
 /* file.c */
 extern const struct file_operations exfat_file_operations;
-int __exfat_truncate(struct inode *inode, loff_t new_size);
+int __exfat_truncate(struct inode *inode);
 void exfat_truncate(struct inode *inode);
 int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
 		  struct iattr *attr);
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index 7c97c1df1305a..f5b29072775de 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -93,7 +93,7 @@ static int exfat_sanitize_mode(const struct exfat_sb_info *sbi,
 }
 
 /* resize the file length */
-int __exfat_truncate(struct inode *inode, loff_t new_size)
+int __exfat_truncate(struct inode *inode)
 {
 	unsigned int num_clusters_new, num_clusters_phys;
 	unsigned int last_clu = EXFAT_FREE_CLUSTER;
@@ -113,7 +113,7 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
 
 	exfat_chain_set(&clu, ei->start_clu, num_clusters_phys, ei->flags);
 
-	if (new_size > 0) {
+	if (i_size_read(inode) > 0) {
 		/*
 		 * Truncate FAT chain num_clusters after the first cluster
 		 * num_clusters = min(new, phys);
@@ -143,8 +143,6 @@ int __exfat_truncate(struct inode *inode, loff_t new_size)
 		ei->start_clu = EXFAT_EOF_CLUSTER;
 	}
 
-	i_size_write(inode, new_size);
-
 	if (ei->type == TYPE_FILE)
 		ei->attr |= ATTR_ARCHIVE;
 
@@ -207,7 +205,7 @@ void exfat_truncate(struct inode *inode)
 		goto write_size;
 	}
 
-	err = __exfat_truncate(inode, i_size_read(inode));
+	err = __exfat_truncate(inode);
 	if (err)
 		goto write_size;
 
diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 0d147f8a1f7c6..95adc4b2e436b 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -626,7 +626,7 @@ void exfat_evict_inode(struct inode *inode)
 	if (!inode->i_nlink) {
 		i_size_write(inode, 0);
 		mutex_lock(&EXFAT_SB(inode->i_sb)->s_lock);
-		__exfat_truncate(inode, 0);
+		__exfat_truncate(inode);
 		mutex_unlock(&EXFAT_SB(inode->i_sb)->s_lock);
 	}
 
-- 
GitLab


From 85463321e726fe59873bbc21f2f480747810aef8 Mon Sep 17 00:00:00 2001
From: Joel Savitz <jsavitz@redhat.com>
Date: Tue, 18 Oct 2022 19:12:22 -0400
Subject: [PATCH 3679/4122] selftests/vm: enable running select groups of tests

Our memory management kernel CI testing at Red Hat uses the VM
selftests and we have run into two problems:

First, our LTP tests overlap with the VM selftests.

We want to avoid unhelpful redundancy in our testing practices.

Second, we have observed the current run_vmtests.sh to report overall
failure/ambiguous results in the case that a machine lacks the necessary
hardware to perform one or more of the tests. E.g. ksm tests that
require more than one numa node.

We want to be able to run the vm selftests suitable to particular hardware.

Add the ability to run one or more groups of vm tests via run_vmtests.sh
instead of simply all-or-none in order to solve these problems.

Preserve existing default behavior of running all tests when the script
is invoked with no arguments.

Documentation of test groups is included in the patch as follows:

    # ./run_vmtests.sh [ -h || --help ]

    usage: ./tools/testing/selftests/vm/run_vmtests.sh [ -h | -t "<categories>"]
      -t: specify specific categories to tests to run
      -h: display this message

    The default behavior is to run all tests.

    Alternatively, specific groups tests can be run by passing a string
    to the -t argument containing one or more of the following categories
    separated by spaces:
    - mmap
	    tests for mmap(2)
    - gup_test
	    tests for gup using gup_test interface
    - userfaultfd
	    tests for  userfaultfd(2)
    - compaction
	    a test for the patch "Allow compaction of unevictable pages"
    - mlock
	    tests for mlock(2)
    - mremap
	    tests for mremap(2)
    - hugevm
	    tests for very large virtual address space
    - vmalloc
	    vmalloc smoke tests
    - hmm
	    hmm smoke tests
    - madv_populate
	    test memadvise(2) MADV_POPULATE_{READ,WRITE} options
    - memfd_secret
	    test memfd_secret(2)
    - process_mrelease
	    test process_mrelease(2)
    - ksm
	    ksm tests that do not require >=2 NUMA nodes
    - ksm_numa
	    ksm tests that require >=2 NUMA nodes
    - pkey
	    memory protection key tests
    - soft_dirty
    	    test soft dirty page bit semantics
    - anon_cow
            test anonymous copy-on-write semantics
    example: ./run_vmtests.sh -t "hmm mmap ksm"

Link: https://lkml.kernel.org/r/20221018231222.1884715-1-jsavitz@redhat.com
Signed-off-by: Joel Savitz <jsavitz@redhat.com>
Cc: Joel Savitz <jsavitz@redhat.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/run_vmtests.sh | 207 +++++++++++++++-------
 1 file changed, 144 insertions(+), 63 deletions(-)

diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index 54d7a822c2cee..e26661feacf5c 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -1,13 +1,88 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-#please run as root
+# Please run as root
 
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
 exitcode=0
 
-#get huge pagesize and freepages from /proc/meminfo
+usage() {
+	cat <<EOF
+usage: ${BASH_SOURCE[0]:-$0} [ -h | -t "<categories>"]
+  -t: specify specific categories to tests to run
+  -h: display this message
+
+The default behavior is to run all tests.
+
+Alternatively, specific groups tests can be run by passing a string
+to the -t argument containing one or more of the following categories
+separated by spaces:
+- mmap
+	tests for mmap(2)
+- gup_test
+	tests for gup using gup_test interface
+- userfaultfd
+	tests for  userfaultfd(2)
+- compaction
+	a test for the patch "Allow compaction of unevictable pages"
+- mlock
+	tests for mlock(2)
+- mremap
+	tests for mremap(2)
+- hugevm
+	tests for very large virtual address space
+- vmalloc
+	vmalloc smoke tests
+- hmm
+	hmm smoke tests
+- madv_populate
+	test memadvise(2) MADV_POPULATE_{READ,WRITE} options
+- memfd_secret
+	test memfd_secret(2)
+- process_mrelease
+	test process_mrelease(2)
+- ksm
+	ksm tests that do not require >=2 NUMA nodes
+- ksm_numa
+	ksm tests that require >=2 NUMA nodes
+- pkey
+	memory protection key tests
+- soft_dirty
+	test soft dirty page bit semantics
+- cow
+	test copy-on-write semantics
+example: ./run_vmtests.sh -t "hmm mmap ksm"
+EOF
+	exit 0
+}
+
+
+while getopts "ht:" OPT; do
+	case ${OPT} in
+		"h") usage ;;
+		"t") VM_SELFTEST_ITEMS=${OPTARG} ;;
+	esac
+done
+shift $((OPTIND -1))
+
+# default behavior: run all tests
+VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default}
+
+test_selected() {
+	if [ "$VM_SELFTEST_ITEMS" == "default" ]; then
+		# If no VM_SELFTEST_ITEMS are specified, run all tests
+		return 0
+	fi
+	# If test selected argument is one of the test items
+	if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then
+	        return 0
+	else
+	        return 1
+	fi
+}
+
+# get huge pagesize and freepages from /proc/meminfo
 while read -r name size unit; do
 	if [ "$name" = "HugePages_Free:" ]; then
 		freepgs="$size"
@@ -27,7 +102,7 @@ hpgsize_MB=$((hpgsize_KB / 1024))
 half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
 needmem_KB=$((half_ufd_size_MB * 2 * 1024))
 
-#set proper nr_hugepages
+# set proper nr_hugepages
 if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
 	nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
 	needpgs=$((needmem_KB / hpgsize_KB))
@@ -56,136 +131,142 @@ else
 	exit 1
 fi
 
-#filter 64bit architectures
+# filter 64bit architectures
 ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
 if [ -z "$ARCH" ]; then
 	ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
 fi
 VADDR64=0
-echo "$ARCH64STR" | grep "$ARCH" && VADDR64=1
+echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1
 
 # Usage: run_test [test binary] [arbitrary test arguments...]
 run_test() {
-	local title="running $*"
-	local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
-	printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
-
-	"$@"
-	local ret=$?
-	if [ $ret -eq 0 ]; then
-		echo "[PASS]"
-	elif [ $ret -eq $ksft_skip ]; then
-		echo "[SKIP]"
-		exitcode=$ksft_skip
-	else
-		echo "[FAIL]"
-		exitcode=1
-	fi
+	if test_selected ${CATEGORY}; then
+		local title="running $*"
+		local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+		printf "%s\n%s\n%s\n" "$sep" "$title" "$sep"
+
+		"$@"
+		local ret=$?
+		if [ $ret -eq 0 ]; then
+			echo "[PASS]"
+		elif [ $ret -eq $ksft_skip ]; then
+			echo "[SKIP]"
+			exitcode=$ksft_skip
+		else
+			echo "[FAIL]"
+			exitcode=1
+		fi
+	fi # test_selected
 }
 
-run_test ./hugepage-mmap
+CATEGORY="hugetlb" run_test ./hugepage-mmap
 
 shmmax=$(cat /proc/sys/kernel/shmmax)
 shmall=$(cat /proc/sys/kernel/shmall)
 echo 268435456 > /proc/sys/kernel/shmmax
 echo 4194304 > /proc/sys/kernel/shmall
-run_test ./hugepage-shm
+CATEGORY="hugetlb" run_test ./hugepage-shm
 echo "$shmmax" > /proc/sys/kernel/shmmax
 echo "$shmall" > /proc/sys/kernel/shmall
 
-run_test ./map_hugetlb
-run_test ./hugepage-mremap
-run_test ./hugepage-vmemmap
-run_test ./hugetlb-madvise
+CATEGORY="hugetlb" run_test ./map_hugetlb
+CATEGORY="hugetlb" run_test ./hugepage-mremap
+CATEGORY="hugetlb" run_test ./hugepage-vmemmap
+CATEGORY="hugetlb" run_test ./hugetlb-madvise
 
-echo "NOTE: The above hugetlb tests provide minimal coverage.  Use"
-echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
-echo "      hugetlb regression testing."
+if test_selected "hugetlb"; then
+	echo "NOTE: These hugetlb tests provide minimal coverage.  Use"
+	echo "      https://github.com/libhugetlbfs/libhugetlbfs.git for"
+	echo "      hugetlb regression testing."
+fi
 
-run_test ./map_fixed_noreplace
+CATEGORY="mmap" run_test ./map_fixed_noreplace
 
 # get_user_pages_fast() benchmark
-run_test ./gup_test -u
+CATEGORY="gup_test" run_test ./gup_test -u
 # pin_user_pages_fast() benchmark
-run_test ./gup_test -a
+CATEGORY="gup_test" run_test ./gup_test -a
 # Dump pages 0, 19, and 4096, using pin_user_pages:
-run_test ./gup_test -ct -F 0x1 0 19 0x1000
+CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
 
 uffd_mods=("" ":dev")
 for mod in "${uffd_mods[@]}"; do
-	run_test ./userfaultfd anon${mod} 20 16
+	CATEGORY="userfaultfd" run_test ./userfaultfd anon${mod} 20 16
 	# Hugetlb tests require source and destination huge pages. Pass in half
 	# the size ($half_ufd_size_MB), which is used for *each*.
-	run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32
-	run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32
-	run_test ./userfaultfd shmem${mod} 20 16
+	CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb${mod} "$half_ufd_size_MB" 32
+	CATEGORY="userfaultfd" run_test ./userfaultfd hugetlb_shared${mod} "$half_ufd_size_MB" 32
+	CATEGORY="userfaultfd" run_test ./userfaultfd shmem${mod} 20 16
 done
 
 #cleanup
 echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
 
-run_test ./compaction_test
+CATEGORY="compaction" run_test ./compaction_test
 
-run_test sudo -u nobody ./on-fault-limit
+CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
 
-run_test ./map_populate
+CATEGORY="mmap" run_test ./map_populate
 
-run_test ./mlock-random-test
+CATEGORY="mlock" run_test ./mlock-random-test
 
-run_test ./mlock2-tests
+CATEGORY="mlock" run_test ./mlock2-tests
 
-run_test ./mrelease_test
+CATEGORY="process_mrelease" run_test ./mrelease_test
 
-run_test ./mremap_test
+CATEGORY="mremap" run_test ./mremap_test
 
-run_test ./thuge-gen
+CATEGORY="hugetlb" run_test ./thuge-gen
 
 if [ $VADDR64 -ne 0 ]; then
-	run_test ./virtual_address_range
+	CATEGORY="hugevm" run_test ./virtual_address_range
 
 	# virtual address 128TB switch test
-	run_test ./va_128TBswitch.sh
+	CATEGORY="hugevm" run_test ./va_128TBswitch.sh
 fi # VADDR64
 
 # vmalloc stability smoke test
-run_test ./test_vmalloc.sh smoke
+CATEGORY="vmalloc" run_test ./test_vmalloc.sh smoke
 
-run_test ./mremap_dontunmap
+CATEGORY="mremap" run_test ./mremap_dontunmap
 
-run_test ./test_hmm.sh smoke
+CATEGORY="hmm" run_test ./test_hmm.sh smoke
 
 # MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
-run_test ./madv_populate
+CATEGORY="madv_populate" run_test ./madv_populate
 
-run_test ./memfd_secret
+CATEGORY="memfd_secret" run_test ./memfd_secret
 
 # KSM MADV_MERGEABLE test with 10 identical pages
-run_test ./ksm_tests -M -p 10
+CATEGORY="ksm" run_test ./ksm_tests -M -p 10
 # KSM unmerge test
-run_test ./ksm_tests -U
+CATEGORY="ksm" run_test ./ksm_tests -U
 # KSM test with 10 zero pages and use_zero_pages = 0
-run_test ./ksm_tests -Z -p 10 -z 0
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0
 # KSM test with 10 zero pages and use_zero_pages = 1
-run_test ./ksm_tests -Z -p 10 -z 1
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1
 # KSM test with 2 NUMA nodes and merge_across_nodes = 1
-run_test ./ksm_tests -N -m 1
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1
 # KSM test with 2 NUMA nodes and merge_across_nodes = 0
-run_test ./ksm_tests -N -m 0
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
+
+CATEGORY="ksm" run_test ./ksm_functional_tests
 
 # protection_keys tests
 if [ -x ./protection_keys_32 ]
 then
-	run_test ./protection_keys_32
+	CATEGORY="pkey" run_test ./protection_keys_32
 fi
 
 if [ -x ./protection_keys_64 ]
 then
-	run_test ./protection_keys_64
+	CATEGORY="pkey" run_test ./protection_keys_64
 fi
 
-run_test ./soft-dirty
+CATEGORY="soft_dirty" run_test ./soft-dirty
 
-# COW tests for anonymous memory
-run_test ./cow
+# COW tests
+CATEGORY="cow" run_test ./cow
 
 exit $exitcode
-- 
GitLab


From 93fb70aa5904c2577fab8100fa990ecfa4f5b4c7 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:36 +0200
Subject: [PATCH 3680/4122] selftests/vm: add KSM unmerge tests

Patch series "mm/ksm: break_ksm() cleanups and fixes", v2.

This series cleans up and fixes break_ksm().  In summary, we no longer use
fake write faults to break COW but instead FAULT_FLAG_UNSHARE.  Further,
we move away from using follow_page() --- that we can hopefully remove
completely at one point --- and use new walk_page_range_vma() instead.

Fortunately, we can get rid of VM_FAULT_WRITE and FOLL_MIGRATION in common
code now.

Extend the existing ksm tests by an unmerge benchmark, and a some new
unmerge tests.

Also, add a selftest to measure MADV_UNMERGEABLE performance.  In my setup
(AMD Ryzen 9 3900X), running the KSM selftest to test unmerge performance
on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in a
performance degradation of ~6% -- 7% (old: ~5250 MiB/s, new: ~4900 MiB/s).
I don't think we particularly care for now, but it's good to be aware of
the implication.


This patch (of 9):

Let's add three unmerge tests (MADV_UNMERGEABLE unmerging all pages in the
range).

test_unmerge(): basic unmerge tests
test_unmerge_discarded(): have some pte_none() entries in the range
test_unmerge_uffd_wp(): protect the merged pages using uffd-wp

ksm_tests.c currently contains a mixture of benchmarks and tests, whereby
each test is carried out by executing the ksm_tests binary with specific
parameters.  Let's add new ksm_functional_tests.c that performs multiple,
smaller functional tests all at once.

Link: https://lkml.kernel.org/r/20221021101141.84170-1-david@redhat.com
Link: https://lkml.kernel.org/r/20221021101141.84170-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/Makefile           |   2 +
 .../selftests/vm/ksm_functional_tests.c       | 279 ++++++++++++++++++
 tools/testing/selftests/vm/run_vmtests.sh     |   2 +
 tools/testing/selftests/vm/vm_util.c          |  10 +
 tools/testing/selftests/vm/vm_util.h          |   1 +
 5 files changed, 294 insertions(+)
 create mode 100644 tools/testing/selftests/vm/ksm_functional_tests.c

diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a4d764efd6e3a..89c14e41bd437 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -55,6 +55,7 @@ TEST_GEN_FILES += userfaultfd
 TEST_GEN_PROGS += soft-dirty
 TEST_GEN_PROGS += split_huge_page_test
 TEST_GEN_FILES += ksm_tests
+TEST_GEN_PROGS += ksm_functional_tests
 
 ifeq ($(MACHINE),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
@@ -100,6 +101,7 @@ include ../lib.mk
 
 $(OUTPUT)/cow: vm_util.c
 $(OUTPUT)/khugepaged: vm_util.c
+$(OUTPUT)/ksm_functional_tests: vm_util.c
 $(OUTPUT)/madv_populate: vm_util.c
 $(OUTPUT)/soft-dirty: vm_util.c
 $(OUTPUT)/split_huge_page_test: vm_util.c
diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c
new file mode 100644
index 0000000000000..96644be689626
--- /dev/null
+++ b/tools/testing/selftests/vm/ksm_functional_tests.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KSM functional tests
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define KiB 1024u
+#define MiB (1024 * KiB)
+
+static int ksm_fd;
+static int ksm_full_scans_fd;
+static int pagemap_fd;
+static size_t pagesize;
+
+static bool range_maps_duplicates(char *addr, unsigned long size)
+{
+	unsigned long offs_a, offs_b, pfn_a, pfn_b;
+
+	/*
+	 * There is no easy way to check if there are KSM pages mapped into
+	 * this range. We only check that the range does not map the same PFN
+	 * twice by comaring each pair of mapped pages.
+	 */
+	for (offs_a = 0; offs_a < size; offs_a += pagesize) {
+		pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
+		/* Page not present or PFN not exposed by the kernel. */
+		if (pfn_a == -1ull || !pfn_a)
+			continue;
+
+		for (offs_b = offs_a + pagesize; offs_b < size;
+		     offs_b += pagesize) {
+			pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
+			if (pfn_b == -1ull || !pfn_b)
+				continue;
+			if (pfn_a == pfn_b)
+				return true;
+		}
+	}
+	return false;
+}
+
+static long ksm_get_full_scans(void)
+{
+	char buf[10];
+	ssize_t ret;
+
+	ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
+	if (ret <= 0)
+		return -errno;
+	buf[ret] = 0;
+
+	return strtol(buf, NULL, 10);
+}
+
+static int ksm_merge(void)
+{
+	long start_scans, end_scans;
+
+	/* Wait for two full scans such that any possible merging happened. */
+	start_scans = ksm_get_full_scans();
+	if (start_scans < 0)
+		return start_scans;
+	if (write(ksm_fd, "1", 1) != 1)
+		return -errno;
+	do {
+		end_scans = ksm_get_full_scans();
+		if (end_scans < 0)
+			return end_scans;
+	} while (end_scans < start_scans + 2);
+
+	return 0;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size)
+{
+	char *map;
+
+	map = mmap(NULL, size, PROT_READ|PROT_WRITE,
+		   MAP_PRIVATE|MAP_ANON, -1, 0);
+	if (map == MAP_FAILED) {
+		ksft_test_result_fail("mmap() failed\n");
+		return MAP_FAILED;
+	}
+
+	/* Don't use THP. Ignore if THP are not around on a kernel. */
+	if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
+		ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+		goto unmap;
+	}
+
+	/* Make sure each page contains the same values to merge them. */
+	memset(map, val, size);
+	if (madvise(map, size, MADV_MERGEABLE)) {
+		ksft_test_result_fail("MADV_MERGEABLE failed\n");
+		goto unmap;
+	}
+
+	/* Run KSM to trigger merging and wait. */
+	if (ksm_merge()) {
+		ksft_test_result_fail("Running KSM failed\n");
+		goto unmap;
+	}
+	return map;
+unmap:
+	munmap(map, size);
+	return MAP_FAILED;
+}
+
+static void test_unmerge(void)
+{
+	const unsigned int size = 2 * MiB;
+	char *map;
+
+	ksft_print_msg("[RUN] %s\n", __func__);
+
+	map = mmap_and_merge_range(0xcf, size);
+	if (map == MAP_FAILED)
+		return;
+
+	if (madvise(map, size, MADV_UNMERGEABLE)) {
+		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+		goto unmap;
+	}
+
+	ksft_test_result(!range_maps_duplicates(map, size),
+			 "Pages were unmerged\n");
+unmap:
+	munmap(map, size);
+}
+
+static void test_unmerge_discarded(void)
+{
+	const unsigned int size = 2 * MiB;
+	char *map;
+
+	ksft_print_msg("[RUN] %s\n", __func__);
+
+	map = mmap_and_merge_range(0xcf, size);
+	if (map == MAP_FAILED)
+		return;
+
+	/* Discard half of all mapped pages so we have pte_none() entries. */
+	if (madvise(map, size / 2, MADV_DONTNEED)) {
+		ksft_test_result_fail("MADV_DONTNEED failed\n");
+		goto unmap;
+	}
+
+	if (madvise(map, size, MADV_UNMERGEABLE)) {
+		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+		goto unmap;
+	}
+
+	ksft_test_result(!range_maps_duplicates(map, size),
+			 "Pages were unmerged\n");
+unmap:
+	munmap(map, size);
+}
+
+#ifdef __NR_userfaultfd
+static void test_unmerge_uffd_wp(void)
+{
+	struct uffdio_writeprotect uffd_writeprotect;
+	struct uffdio_register uffdio_register;
+	const unsigned int size = 2 * MiB;
+	struct uffdio_api uffdio_api;
+	char *map;
+	int uffd;
+
+	ksft_print_msg("[RUN] %s\n", __func__);
+
+	map = mmap_and_merge_range(0xcf, size);
+	if (map == MAP_FAILED)
+		return;
+
+	/* See if UFFD is around. */
+	uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+	if (uffd < 0) {
+		ksft_test_result_skip("__NR_userfaultfd failed\n");
+		goto unmap;
+	}
+
+	/* See if UFFD-WP is around. */
+	uffdio_api.api = UFFD_API;
+	uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+	if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+		ksft_test_result_fail("UFFDIO_API failed\n");
+		goto close_uffd;
+	}
+	if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+		ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n");
+		goto close_uffd;
+	}
+
+	/* Register UFFD-WP, no need for an actual handler. */
+	uffdio_register.range.start = (unsigned long) map;
+	uffdio_register.range.len = size;
+	uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) < 0) {
+		ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
+		goto close_uffd;
+	}
+
+	/* Write-protect the range using UFFD-WP. */
+	uffd_writeprotect.range.start = (unsigned long) map;
+	uffd_writeprotect.range.len = size;
+	uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+	if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
+		ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n");
+		goto close_uffd;
+	}
+
+	if (madvise(map, size, MADV_UNMERGEABLE)) {
+		ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+		goto close_uffd;
+	}
+
+	ksft_test_result(!range_maps_duplicates(map, size),
+			 "Pages were unmerged\n");
+close_uffd:
+	close(uffd);
+unmap:
+	munmap(map, size);
+}
+#endif
+
+int main(int argc, char **argv)
+{
+	unsigned int tests = 2;
+	int err;
+
+#ifdef __NR_userfaultfd
+	tests++;
+#endif
+
+	ksft_print_header();
+	ksft_set_plan(tests);
+
+	pagesize = getpagesize();
+
+	ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+	if (ksm_fd < 0)
+		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
+	ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+	if (ksm_full_scans_fd < 0)
+		ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+	pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+	if (pagemap_fd < 0)
+		ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+
+	test_unmerge();
+	test_unmerge_discarded();
+#ifdef __NR_userfaultfd
+	test_unmerge_uffd_wp();
+#endif
+
+	err = ksft_get_fail_cnt();
+	if (err)
+		ksft_exit_fail_msg("%d out of %d tests failed\n",
+				   err, ksft_test_num());
+	return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/run_vmtests.sh b/tools/testing/selftests/vm/run_vmtests.sh
index e26661feacf5c..8984e0bb58c7a 100755
--- a/tools/testing/selftests/vm/run_vmtests.sh
+++ b/tools/testing/selftests/vm/run_vmtests.sh
@@ -253,6 +253,8 @@ CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
 
 CATEGORY="ksm" run_test ./ksm_functional_tests
 
+run_test ./ksm_functional_tests
+
 # protection_keys tests
 if [ -x ./protection_keys_32 ]
 then
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
index 5bbf7641a0f07..7105719027433 100644
--- a/tools/testing/selftests/vm/vm_util.c
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -43,6 +43,16 @@ bool pagemap_is_populated(int fd, char *start)
 	return entry & 0xc000000000000000ull;
 }
 
+unsigned long pagemap_get_pfn(int fd, char *start)
+{
+	uint64_t entry = pagemap_get_entry(fd, start);
+
+	/* If present (63th bit), PFN is at bit 0 -- 54. */
+	if (entry & 0x8000000000000000ull)
+		return entry & 0x007fffffffffffffull;
+	return -1ull;
+}
+
 void clear_softdirty(void)
 {
 	int ret;
diff --git a/tools/testing/selftests/vm/vm_util.h b/tools/testing/selftests/vm/vm_util.h
index 80d5a6ad413b1..1995ee911ef20 100644
--- a/tools/testing/selftests/vm/vm_util.h
+++ b/tools/testing/selftests/vm/vm_util.h
@@ -6,6 +6,7 @@ uint64_t pagemap_get_entry(int fd, char *start);
 bool pagemap_is_softdirty(int fd, char *start);
 bool pagemap_is_swapped(int fd, char *start);
 bool pagemap_is_populated(int fd, char *start);
+unsigned long pagemap_get_pfn(int fd, char *start);
 void clear_softdirty(void);
 bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
 uint64_t read_pmd_pagesize(void);
-- 
GitLab


From c31783eeae7b22dc3f6edde7339de6112959225d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:38 +0200
Subject: [PATCH 3681/4122] mm/pagewalk: don't trigger test_walk() in
 walk_page_vma()

As Peter points out, the caller passes a single VMA and can just do that
check itself.

And in fact, no existing users rely on test_walk() getting called.  So
let's just remove it and make the implementation slightly more efficient.

Link: https://lkml.kernel.org/r/20221021101141.84170-7-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagewalk.h | 2 ++
 mm/pagewalk.c            | 7 -------
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index f3fafb731ffd8..37dc0208862d7 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -27,6 +27,8 @@ struct mm_walk;
  *			"do page table walk over the current vma", returning
  *			a negative value means "abort current page table walk
  *			right now" and returning 1 means "skip the current vma"
+ *			Note that this callback is not called when the caller
+ *			passes in a single VMA as for walk_page_vma().
  * @pre_vma:            if set, called before starting walk on a non-null vma.
  * @post_vma:           if set, called after a walk on a non-null vma, provided
  *                      that @pre_vma and the vma walk succeeded.
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 2ff3a5bebcebb..0a5d71aaf9c7f 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -526,18 +526,11 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
 		.vma		= vma,
 		.private	= private,
 	};
-	int err;
 
 	if (!walk.mm)
 		return -EINVAL;
 
 	mmap_assert_locked(walk.mm);
-
-	err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
-	if (err > 0)
-		return 0;
-	if (err < 0)
-		return err;
 	return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
 }
 
-- 
GitLab


From 5036880efdad976165c817dcb6a1c8c24fb16caa Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:33 +0200
Subject: [PATCH 3682/4122] selftests/vm: add test to measure MADV_UNMERGEABLE
 performance

Let's add a test to measure performance of KSM breaking not triggered via
COW, but triggered by disabling KSM on an area filled with KSM pages via
MADV_UNMERGEABLE.

Link: https://lkml.kernel.org/r/20221021101141.84170-2-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/ksm_tests.c | 76 +++++++++++++++++++++++++-
 1 file changed, 74 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/vm/ksm_tests.c b/tools/testing/selftests/vm/ksm_tests.c
index 0d85be2350fa3..f9eb4d67e0dd6 100644
--- a/tools/testing/selftests/vm/ksm_tests.c
+++ b/tools/testing/selftests/vm/ksm_tests.c
@@ -40,6 +40,7 @@ enum ksm_test_name {
 	CHECK_KSM_NUMA_MERGE,
 	KSM_MERGE_TIME,
 	KSM_MERGE_TIME_HUGE_PAGES,
+	KSM_UNMERGE_TIME,
 	KSM_COW_TIME
 };
 
@@ -108,7 +109,10 @@ static void print_help(void)
 	       " -P evaluate merging time and speed.\n"
 	       "    For this test, the size of duplicated memory area (in MiB)\n"
 	       "    must be provided using -s option\n"
-				 " -H evaluate merging time and speed of area allocated mostly with huge pages\n"
+	       " -H evaluate merging time and speed of area allocated mostly with huge pages\n"
+	       "    For this test, the size of duplicated memory area (in MiB)\n"
+	       "    must be provided using -s option\n"
+	       " -D evaluate unmerging time and speed when disabling KSM.\n"
 	       "    For this test, the size of duplicated memory area (in MiB)\n"
 	       "    must be provided using -s option\n"
 	       " -C evaluate the time required to break COW of merged pages.\n\n");
@@ -188,6 +192,16 @@ static int ksm_merge_pages(void *addr, size_t size, struct timespec start_time,
 	return 0;
 }
 
+static int ksm_unmerge_pages(void *addr, size_t size,
+			     struct timespec start_time, int timeout)
+{
+	if (madvise(addr, size, MADV_UNMERGEABLE)) {
+		perror("madvise");
+		return 1;
+	}
+	return 0;
+}
+
 static bool assert_ksm_pages_count(long dupl_page_count)
 {
 	unsigned long max_page_sharing, pages_sharing, pages_shared;
@@ -560,6 +574,53 @@ err_out:
 	return KSFT_FAIL;
 }
 
+static int ksm_unmerge_time(int mapping, int prot, int timeout, size_t map_size)
+{
+	void *map_ptr;
+	struct timespec start_time, end_time;
+	unsigned long scan_time_ns;
+
+	map_size *= MB;
+
+	map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+	if (!map_ptr)
+		return KSFT_FAIL;
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+	if (ksm_merge_pages(map_ptr, map_size, start_time, timeout))
+		goto err_out;
+
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+	if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout))
+		goto err_out;
+	if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+		perror("clock_gettime");
+		goto err_out;
+	}
+
+	scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+		       (end_time.tv_nsec - start_time.tv_nsec);
+
+	printf("Total size:    %lu MiB\n", map_size / MB);
+	printf("Total time:    %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+	       scan_time_ns % NSEC_PER_SEC);
+	printf("Average speed:  %.3f MiB/s\n", (map_size / MB) /
+					       ((double)scan_time_ns / NSEC_PER_SEC));
+
+	munmap(map_ptr, map_size);
+	return KSFT_PASS;
+
+err_out:
+	printf("Not OK\n");
+	munmap(map_ptr, map_size);
+	return KSFT_FAIL;
+}
+
 static int ksm_cow_time(int mapping, int prot, int timeout, size_t page_size)
 {
 	void *map_ptr;
@@ -644,7 +705,7 @@ int main(int argc, char *argv[])
 	bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
 	long size_MB = 0;
 
-	while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCH")) != -1) {
+	while ((opt = getopt(argc, argv, "ha:p:l:z:m:s:MUZNPCHD")) != -1) {
 		switch (opt) {
 		case 'a':
 			prot = str_to_prot(optarg);
@@ -701,6 +762,9 @@ int main(int argc, char *argv[])
 		case 'H':
 			test_name = KSM_MERGE_TIME_HUGE_PAGES;
 			break;
+		case 'D':
+			test_name = KSM_UNMERGE_TIME;
+			break;
 		case 'C':
 			test_name = KSM_COW_TIME;
 			break;
@@ -762,6 +826,14 @@ int main(int argc, char *argv[])
 		ret = ksm_merge_hugepages_time(MAP_PRIVATE | MAP_ANONYMOUS, prot,
 				ksm_scan_limit_sec, size_MB);
 		break;
+	case KSM_UNMERGE_TIME:
+		if (size_MB == 0) {
+			printf("Option '-s' is required.\n");
+			return KSFT_FAIL;
+		}
+		ret = ksm_unmerge_time(MAP_PRIVATE | MAP_ANONYMOUS, prot,
+				       ksm_scan_limit_sec, size_MB);
+		break;
 	case KSM_COW_TIME:
 		ret = ksm_cow_time(MAP_PRIVATE | MAP_ANONYMOUS, prot, ksm_scan_limit_sec,
 				   page_size);
-- 
GitLab


From 58f595c6659198e1ad0ed431a408ddd79b21e579 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:34 +0200
Subject: [PATCH 3683/4122] mm/ksm: simplify break_ksm() to not rely on
 VM_FAULT_WRITE

Now that GUP no longer requires VM_FAULT_WRITE, break_ksm() is the sole
remaining user of VM_FAULT_WRITE.  As we also want to stop triggering a
fake write fault and instead use FAULT_FLAG_UNSHARE -- similar to
GUP-triggered unsharing when taking a R/O pin on a shared anonymous page
(including KSM pages), let's stop relying on VM_FAULT_WRITE.

Let's rework break_ksm() to not rely on the return value of
handle_mm_fault() anymore to figure out whether COW-breaking was
successful.  Simply perform another follow_page() lookup to verify the
result.

While this makes break_ksm() slightly less efficient, we can simplify
handle_mm_fault() a little and easily switch to FAULT_FLAG_UNSHARE without
introducing similar KSM-specific behavior for FAULT_FLAG_UNSHARE.

In my setup (AMD Ryzen 9 3900X), running the KSM selftest to test unmerge
performance on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in
a performance degradation of ~4% -- 5% (old: ~5250 MiB/s, new: ~5010
MiB/s).

I don't think that we particularly care about that performance drop when
unmerging.  If it ever turns out to be an actual performance issue, we can
think about a better alternative for FAULT_FLAG_UNSHARE -- let's just keep
it simple for now.

Link: https://lkml.kernel.org/r/20221021101141.84170-3-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/ksm.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index a71245241d220..4efdc424a3fce 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -440,26 +440,27 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 	vm_fault_t ret = 0;
 
 	do {
+		bool ksm_page = false;
+
 		cond_resched();
 		page = follow_page(vma, addr,
 				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
 		if (IS_ERR_OR_NULL(page))
 			break;
 		if (PageKsm(page))
-			ret = handle_mm_fault(vma, addr,
-					      FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
-					      NULL);
-		else
-			ret = VM_FAULT_WRITE;
+			ksm_page = true;
 		put_page(page);
-	} while (!(ret & (VM_FAULT_WRITE | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
+
+		if (!ksm_page)
+			return 0;
+		ret = handle_mm_fault(vma, addr,
+				      FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
+				      NULL);
+	} while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
 	/*
-	 * We must loop because handle_mm_fault() may back out if there's
-	 * any difficulty e.g. if pte accessed bit gets updated concurrently.
-	 *
-	 * VM_FAULT_WRITE is what we have been hoping for: it indicates that
-	 * COW has been broken, even if the vma does not permit VM_WRITE;
-	 * but note that a concurrent fault might break PageKsm for us.
+	 * We must loop until we no longer find a KSM page because
+	 * handle_mm_fault() may back out if there's any difficulty e.g. if
+	 * pte accessed bit gets updated concurrently.
 	 *
 	 * VM_FAULT_SIGBUS could occur if we race with truncation of the
 	 * backing file, which also invalidates anonymous pages: that's
-- 
GitLab


From cb8d863313436339fb60f7dd5131af2e5854621e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:35 +0200
Subject: [PATCH 3684/4122] mm: remove VM_FAULT_WRITE

All users -- GUP and KSM -- are gone, let's just remove it.

Link: https://lkml.kernel.org/r/20221021101141.84170-4-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm_types.h | 3 ---
 mm/huge_memory.c         | 2 +-
 mm/memory.c              | 9 ++++-----
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 018b1c0981736..199f98be6f9cd 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -929,7 +929,6 @@ typedef __bitwise unsigned int vm_fault_t;
  * @VM_FAULT_OOM:		Out Of Memory
  * @VM_FAULT_SIGBUS:		Bad access
  * @VM_FAULT_MAJOR:		Page read from storage
- * @VM_FAULT_WRITE:		Special case for get_user_pages
  * @VM_FAULT_HWPOISON:		Hit poisoned small page
  * @VM_FAULT_HWPOISON_LARGE:	Hit poisoned large page. Index encoded
  *				in upper bits
@@ -950,7 +949,6 @@ enum vm_fault_reason {
 	VM_FAULT_OOM            = (__force vm_fault_t)0x000001,
 	VM_FAULT_SIGBUS         = (__force vm_fault_t)0x000002,
 	VM_FAULT_MAJOR          = (__force vm_fault_t)0x000004,
-	VM_FAULT_WRITE          = (__force vm_fault_t)0x000008,
 	VM_FAULT_HWPOISON       = (__force vm_fault_t)0x000010,
 	VM_FAULT_HWPOISON_LARGE = (__force vm_fault_t)0x000020,
 	VM_FAULT_SIGSEGV        = (__force vm_fault_t)0x000040,
@@ -976,7 +974,6 @@ enum vm_fault_reason {
 	{ VM_FAULT_OOM,                 "OOM" },	\
 	{ VM_FAULT_SIGBUS,              "SIGBUS" },	\
 	{ VM_FAULT_MAJOR,               "MAJOR" },	\
-	{ VM_FAULT_WRITE,               "WRITE" },	\
 	{ VM_FAULT_HWPOISON,            "HWPOISON" },	\
 	{ VM_FAULT_HWPOISON_LARGE,      "HWPOISON_LARGE" },	\
 	{ VM_FAULT_SIGSEGV,             "SIGSEGV" },	\
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8f10afba17a6f..1d9ad909c87c3 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1376,7 +1376,7 @@ reuse:
 		if (pmdp_set_access_flags(vma, haddr, vmf->pmd, entry, 1))
 			update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
 		spin_unlock(vmf->ptl);
-		return VM_FAULT_WRITE;
+		return 0;
 	}
 
 unlock_fallback:
diff --git a/mm/memory.c b/mm/memory.c
index 815d2ff05c62b..aad226daf41bd 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3213,7 +3213,7 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
 	}
 
 	delayacct_wpcopy_end();
-	return (page_copied && !unshare) ? VM_FAULT_WRITE : 0;
+	return 0;
 oom_free_new:
 	put_page(new_page);
 oom:
@@ -3277,14 +3277,14 @@ static vm_fault_t wp_pfn_shared(struct vm_fault *vmf)
 		return finish_mkwrite_fault(vmf);
 	}
 	wp_page_reuse(vmf);
-	return VM_FAULT_WRITE;
+	return 0;
 }
 
 static vm_fault_t wp_page_shared(struct vm_fault *vmf)
 	__releases(vmf->ptl)
 {
 	struct vm_area_struct *vma = vmf->vma;
-	vm_fault_t ret = VM_FAULT_WRITE;
+	vm_fault_t ret = 0;
 
 	get_page(vmf->page);
 
@@ -3430,7 +3430,7 @@ reuse:
 			return 0;
 		}
 		wp_page_reuse(vmf);
-		return VM_FAULT_WRITE;
+		return 0;
 	}
 copy:
 	/*
@@ -3944,7 +3944,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf)
 		if (vmf->flags & FAULT_FLAG_WRITE) {
 			pte = maybe_mkwrite(pte_mkdirty(pte), vma);
 			vmf->flags &= ~FAULT_FLAG_WRITE;
-			ret |= VM_FAULT_WRITE;
 		}
 		rmap_flags |= RMAP_EXCLUSIVE;
 	}
-- 
GitLab


From 6cce3314b928b2db7d5f48171e18314226551c3f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:37 +0200
Subject: [PATCH 3685/4122] mm/ksm: fix KSM COW breaking with userfaultfd-wp
 via FAULT_FLAG_UNSHARE

Let's stop breaking COW via a fake write fault and let's use
FAULT_FLAG_UNSHARE instead.  This avoids any wrong side effects of the
fake write fault, such as mapping the PTE writable and marking the pte
dirty/softdirty.

Consequently, we will no longer trigger a fake write fault and break COW
without any such side-effects.

Also, this fixes KSM interaction with userfaultfd-wp: when we have a KSM
page that's write-protected by userfaultfd, break_ksm()->handle_mm_fault()
will fail with VM_FAULT_SIGBUS and will simply return in break_ksm() with
0 instead of actually breaking COW.

For now, the KSM unmerge tests can trigger that:
    $ sudo ./ksm_functional_tests
    TAP version 13
    1..3
    # [RUN] test_unmerge
    ok 1 Pages were unmerged
    # [RUN] test_unmerge_discarded
    ok 2 Pages were unmerged
    # [RUN] test_unmerge_uffd_wp
    not ok 3 Pages were unmerged
    Bail out! 1 out of 3 tests failed
    # Planned tests != run tests (2 != 3)
    # Totals: pass:2 fail:1 xfail:0 xpass:0 skip:0 error:0

The warning in dmesg also indicates this wrong handling:
    [  230.096368] FAULT_FLAG_ALLOW_RETRY missing 881
    [  230.100822] CPU: 1 PID: 1643 Comm: ksm-uffd-wp [...]
    [  230.110124] Hardware name: [...]
    [  230.117775] Call Trace:
    [  230.120227]  <TASK>
    [  230.122334]  dump_stack_lvl+0x44/0x5c
    [  230.126010]  handle_userfault.cold+0x14/0x19
    [  230.130281]  ? tlb_finish_mmu+0x65/0x170
    [  230.134207]  ? uffd_wp_range+0x65/0xa0
    [  230.137959]  ? _raw_spin_unlock+0x15/0x30
    [  230.141972]  ? do_wp_page+0x50/0x590
    [  230.145551]  __handle_mm_fault+0x9f5/0xf50
    [  230.149652]  ? mmput+0x1f/0x40
    [  230.152712]  handle_mm_fault+0xb9/0x2a0
    [  230.156550]  break_ksm+0x141/0x180
    [  230.159964]  unmerge_ksm_pages+0x60/0x90
    [  230.163890]  ksm_madvise+0x3c/0xb0
    [  230.167295]  do_madvise.part.0+0x10c/0xeb0
    [  230.171396]  ? do_syscall_64+0x67/0x80
    [  230.175157]  __x64_sys_madvise+0x5a/0x70
    [  230.179082]  do_syscall_64+0x58/0x80
    [  230.182661]  ? do_syscall_64+0x67/0x80
    [  230.186413]  entry_SYSCALL_64_after_hwframe+0x63/0xcd

This is primarily a fix for KSM+userfaultfd-wp, however, the fake write
fault was always questionable.  As this fix is not easy to backport and
it's not very critical, let's not cc stable.

Link: https://lkml.kernel.org/r/20221021101141.84170-6-david@redhat.com
Fixes: 529b930b87d9 ("userfaultfd: wp: hook userfault handler to write protection fault")
Signed-off-by: David Hildenbrand <david@redhat.com>
Acked-by: Peter Xu <peterx@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/ksm.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 4efdc424a3fce..0805221e1d4c0 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -420,17 +420,15 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 }
 
 /*
- * We use break_ksm to break COW on a ksm page: it's a stripped down
+ * We use break_ksm to break COW on a ksm page by triggering unsharing,
+ * such that the ksm page will get replaced by an exclusive anonymous page.
  *
- *	if (get_user_pages(addr, 1, FOLL_WRITE, &page, NULL) == 1)
- *		put_page(page);
- *
- * but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
+ * We take great care only to touch a ksm page, in a VM_MERGEABLE vma,
  * in case the application has unmapped and remapped mm,addr meanwhile.
  * Could a ksm page appear anywhere else?  Actually yes, in a VM_PFNMAP
  * mmap of /dev/mem, where we would not want to touch it.
  *
- * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
+ * FAULT_FLAG_REMOTE/FOLL_REMOTE are because we do this outside the context
  * of the process that owns 'vma'.  We also do not want to enforce
  * protection keys here anyway.
  */
@@ -454,7 +452,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 		if (!ksm_page)
 			return 0;
 		ret = handle_mm_fault(vma, addr,
-				      FAULT_FLAG_WRITE | FAULT_FLAG_REMOTE,
+				      FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
 				      NULL);
 	} while (!(ret & (VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | VM_FAULT_OOM)));
 	/*
-- 
GitLab


From e07cda5f232fac4de0925d8a4c92e51e41fa2f6e Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:39 +0200
Subject: [PATCH 3686/4122] mm/pagewalk: add walk_page_range_vma()

Let's add walk_page_range_vma(), which is similar to walk_page_vma(),
however, is only interested in a subset of the VMA range.

To be used in KSM code to stop using follow_page() next.

Link: https://lkml.kernel.org/r/20221021101141.84170-8-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pagewalk.h |  3 +++
 mm/pagewalk.c            | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index 37dc0208862d7..959f52e5867df 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -101,6 +101,9 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
 			  unsigned long end, const struct mm_walk_ops *ops,
 			  pgd_t *pgd,
 			  void *private);
+int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, const struct mm_walk_ops *ops,
+			void *private);
 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
 		void *private);
 int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 0a5d71aaf9c7f..7f1c9b2749060 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -517,6 +517,26 @@ int walk_page_range_novma(struct mm_struct *mm, unsigned long start,
 	return walk_pgd_range(start, end, &walk);
 }
 
+int walk_page_range_vma(struct vm_area_struct *vma, unsigned long start,
+			unsigned long end, const struct mm_walk_ops *ops,
+			void *private)
+{
+	struct mm_walk walk = {
+		.ops		= ops,
+		.mm		= vma->vm_mm,
+		.vma		= vma,
+		.private	= private,
+	};
+
+	if (start >= end || !walk.mm)
+		return -EINVAL;
+	if (start < vma->vm_start || end > vma->vm_end)
+		return -EINVAL;
+
+	mmap_assert_locked(walk.mm);
+	return __walk_page_range(start, end, &walk);
+}
+
 int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
 		void *private)
 {
-- 
GitLab


From d7c0e68dab98f0f5a2af501eaefeb90cc855fc80 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:40 +0200
Subject: [PATCH 3687/4122] mm/ksm: convert break_ksm() to use
 walk_page_range_vma()

FOLL_MIGRATION exists only for the purpose of break_ksm(), and actually,
there is not even the need to wait for the migration to finish, we only
want to know if we're dealing with a KSM page.

Using follow_page() just to identify a KSM page overcomplicates GUP code.
Let's use walk_page_range_vma() instead, because we don't actually care
about the page itself, we only need to know a single property -- no need
to even grab a reference.

So, get rid of follow_page() usage such that we can get rid of
FOLL_MIGRATION now and eventually be able to get rid of follow_page() in
the future.

In my setup (AMD Ryzen 9 3900X), running the KSM selftest to test unmerge
performance on 2 GiB (taskset 0x8 ./ksm_tests -D -s 2048), this results in
a performance degradation of ~2% (old: ~5010 MiB/s, new: ~4900 MiB/s).  I
don't think we particularly care for now.

Interestingly, the benchmark reduction is due to the single callback.
Adding a second callback (e.g., pud_entry()) reduces the benchmark by
another 100-200 MiB/s.

Link: https://lkml.kernel.org/r/20221021101141.84170-9-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/ksm.c | 49 +++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/mm/ksm.c b/mm/ksm.c
index 0805221e1d4c0..dd02780c387f0 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -39,6 +39,7 @@
 #include <linux/freezer.h>
 #include <linux/oom.h>
 #include <linux/numa.h>
+#include <linux/pagewalk.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -419,6 +420,39 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
 	return atomic_read(&mm->mm_users) == 0;
 }
 
+static int break_ksm_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next,
+			struct mm_walk *walk)
+{
+	struct page *page = NULL;
+	spinlock_t *ptl;
+	pte_t *pte;
+	int ret;
+
+	if (pmd_leaf(*pmd) || !pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+	if (pte_present(*pte)) {
+		page = vm_normal_page(walk->vma, addr, *pte);
+	} else if (!pte_none(*pte)) {
+		swp_entry_t entry = pte_to_swp_entry(*pte);
+
+		/*
+		 * As KSM pages remain KSM pages until freed, no need to wait
+		 * here for migration to end.
+		 */
+		if (is_migration_entry(entry))
+			page = pfn_swap_entry_to_page(entry);
+	}
+	ret = page && PageKsm(page);
+	pte_unmap_unlock(pte, ptl);
+	return ret;
+}
+
+static const struct mm_walk_ops break_ksm_ops = {
+	.pmd_entry = break_ksm_pmd_entry,
+};
+
 /*
  * We use break_ksm to break COW on a ksm page by triggering unsharing,
  * such that the ksm page will get replaced by an exclusive anonymous page.
@@ -434,21 +468,16 @@ static inline bool ksm_test_exit(struct mm_struct *mm)
  */
 static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
 {
-	struct page *page;
 	vm_fault_t ret = 0;
 
 	do {
-		bool ksm_page = false;
+		int ksm_page;
 
 		cond_resched();
-		page = follow_page(vma, addr,
-				FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE);
-		if (IS_ERR_OR_NULL(page))
-			break;
-		if (PageKsm(page))
-			ksm_page = true;
-		put_page(page);
-
+		ksm_page = walk_page_range_vma(vma, addr, addr + 1,
+					       &break_ksm_ops, NULL);
+		if (WARN_ON_ONCE(ksm_page < 0))
+			return ksm_page;
 		if (!ksm_page)
 			return 0;
 		ret = handle_mm_fault(vma, addr,
-- 
GitLab


From f7355e99d9f71fcde093193fd4b569a648ba5ce3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Fri, 21 Oct 2022 12:11:41 +0200
Subject: [PATCH 3688/4122] mm/gup: remove FOLL_MIGRATION

Fortunately, the last user (KSM) is gone, so let's just remove this rather
special code from generic GUP handling -- especially because KSM never
required the PMD handling as KSM only deals with individual base pages.

[akpm@linux-foundation.org: fix merge snafu]Link: https://lkml.kernel.org/r/20221021101141.84170-10-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Peter Xu <peterx@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h |  1 -
 mm/gup.c           | 55 +++++-----------------------------------------
 2 files changed, 5 insertions(+), 51 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8df5cae69c804..767c8c522e709 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3057,7 +3057,6 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 				 * and return without waiting upon it */
 #define FOLL_NOFAULT	0x80	/* do not fault in pages */
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
-#define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
 #define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
 #define FOLL_REMOTE	0x2000	/* we are working on non-current tsk/mm */
 #define FOLL_ANON	0x8000	/* don't do file mappings */
diff --git a/mm/gup.c b/mm/gup.c
index 2860cf4a85e1f..82b275bbaad54 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -537,30 +537,13 @@ static struct page *follow_page_pte(struct vm_area_struct *vma,
 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
 			 (FOLL_PIN | FOLL_GET)))
 		return ERR_PTR(-EINVAL);
-retry:
 	if (unlikely(pmd_bad(*pmd)))
 		return no_page_table(vma, flags);
 
 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
 	pte = *ptep;
-	if (!pte_present(pte)) {
-		swp_entry_t entry;
-		/*
-		 * KSM's break_ksm() relies upon recognizing a ksm page
-		 * even while it is being migrated, so for that case we
-		 * need migration_entry_wait().
-		 */
-		if (likely(!(flags & FOLL_MIGRATION)))
-			goto no_page;
-		if (pte_none(pte))
-			goto no_page;
-		entry = pte_to_swp_entry(pte);
-		if (!is_migration_entry(entry))
-			goto no_page;
-		pte_unmap_unlock(ptep, ptl);
-		migration_entry_wait(mm, pmd, address);
-		goto retry;
-	}
+	if (!pte_present(pte))
+		goto no_page;
 	if (pte_protnone(pte) && !gup_can_follow_protnone(flags))
 		goto no_page;
 
@@ -668,28 +651,8 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma,
 	pmdval = READ_ONCE(*pmd);
 	if (pmd_none(pmdval))
 		return no_page_table(vma, flags);
-retry:
-	if (!pmd_present(pmdval)) {
-		/*
-		 * Should never reach here, if thp migration is not supported;
-		 * Otherwise, it must be a thp migration entry.
-		 */
-		VM_BUG_ON(!thp_migration_supported() ||
-				  !is_pmd_migration_entry(pmdval));
-
-		if (likely(!(flags & FOLL_MIGRATION)))
-			return no_page_table(vma, flags);
-
-		pmd_migration_entry_wait(mm, pmd);
-		pmdval = READ_ONCE(*pmd);
-		/*
-		 * MADV_DONTNEED may convert the pmd to null because
-		 * mmap_lock is held in read mode
-		 */
-		if (pmd_none(pmdval))
-			return no_page_table(vma, flags);
-		goto retry;
-	}
+	if (!pmd_present(pmdval))
+		return no_page_table(vma, flags);
 	if (pmd_devmap(pmdval)) {
 		ptl = pmd_lock(mm, pmd);
 		page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
@@ -703,18 +666,10 @@ retry:
 	if (pmd_protnone(pmdval) && !gup_can_follow_protnone(flags))
 		return no_page_table(vma, flags);
 
-retry_locked:
 	ptl = pmd_lock(mm, pmd);
-	if (unlikely(pmd_none(*pmd))) {
-		spin_unlock(ptl);
-		return no_page_table(vma, flags);
-	}
 	if (unlikely(!pmd_present(*pmd))) {
 		spin_unlock(ptl);
-		if (likely(!(flags & FOLL_MIGRATION)))
-			return no_page_table(vma, flags);
-		pmd_migration_entry_wait(mm, pmd);
-		goto retry_locked;
+		return no_page_table(vma, flags);
 	}
 	if (unlikely(!pmd_trans_huge(*pmd))) {
 		spin_unlock(ptl);
-- 
GitLab


From 4c9473e87e75a2a77ccd02e55c91ffe6a52b5df6 Mon Sep 17 00:00:00 2001
From: Gautam Menghani <gautammenghani201@gmail.com>
Date: Wed, 26 Oct 2022 10:52:18 +0530
Subject: [PATCH 3689/4122] mm/khugepaged: add tracepoint to collapse_file()

"mm_khugepaged_collapse_file" for capturing is_shmem.
Currently, is_shmem is not being captured. Capturing is_shmem is useful
as it can indicate if tmpfs is being used as a backing store instead of
persistent storage. Add the tracepoint in collapse_file() named
"mm_khugepaged_collapse_file" for capturing is_shmem.

[gautammenghani201@gmail.com: swap is_shmem and addr to save space, per Steven Rostedt]
  Link: https://lkml.kernel.org/r/20221202201807.182829-1-gautammenghani201@gmail.com
Link: https://lkml.kernel.org/r/20221026052218.148234-1-gautammenghani201@gmail.com
Signed-off-by: Gautam Menghani <gautammenghani201@gmail.com>
Reviewed-by: Steven Rostedt (Google) <rostedt@goodmis.org>	[tracing]
Cc: David Hildenbrand <david@redhat.com>
Cc: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/trace/events/huge_memory.h | 38 ++++++++++++++++++++++++++++++
 mm/khugepaged.c                    |  7 +++---
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/huge_memory.h b/include/trace/events/huge_memory.h
index 760455dfa8600..3e6fb05852f9a 100644
--- a/include/trace/events/huge_memory.h
+++ b/include/trace/events/huge_memory.h
@@ -203,5 +203,43 @@ TRACE_EVENT(mm_khugepaged_scan_file,
 		__print_symbolic(__entry->result, SCAN_STATUS))
 );
 
+TRACE_EVENT(mm_khugepaged_collapse_file,
+	TP_PROTO(struct mm_struct *mm, struct page *hpage, pgoff_t index,
+			bool is_shmem, unsigned long addr, struct file *file,
+			int nr, int result),
+	TP_ARGS(mm, hpage, index, addr, is_shmem, file, nr, result),
+	TP_STRUCT__entry(
+		__field(struct mm_struct *, mm)
+		__field(unsigned long, hpfn)
+		__field(pgoff_t, index)
+		__field(unsigned long, addr)
+		__field(bool, is_shmem)
+		__string(filename, file->f_path.dentry->d_iname)
+		__field(int, nr)
+		__field(int, result)
+	),
+
+	TP_fast_assign(
+		__entry->mm = mm;
+		__entry->hpfn = hpage ? page_to_pfn(hpage) : -1;
+		__entry->index = index;
+		__entry->addr = addr;
+		__entry->is_shmem = is_shmem;
+		__assign_str(filename, file->f_path.dentry->d_iname);
+		__entry->nr = nr;
+		__entry->result = result;
+	),
+
+	TP_printk("mm=%p, hpage_pfn=0x%lx, index=%ld, addr=%ld, is_shmem=%d, filename=%s, nr=%d, result=%s",
+		__entry->mm,
+		__entry->hpfn,
+		__entry->index,
+		__entry->addr,
+		__entry->is_shmem,
+		__get_str(filename),
+		__entry->nr,
+		__print_symbolic(__entry->result, SCAN_STATUS))
+);
+
 #endif /* __HUGE_MEMORY_H */
 #include <trace/define_trace.h>
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 913b0f4893524..78ec2771cc652 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1744,12 +1744,12 @@ static int collapse_file(struct mm_struct *mm, unsigned long addr,
 {
 	struct address_space *mapping = file->f_mapping;
 	struct page *hpage;
-	pgoff_t index, end = start + HPAGE_PMD_NR;
+	pgoff_t index = 0, end = start + HPAGE_PMD_NR;
 	LIST_HEAD(pagelist);
 	XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
 	int nr_none = 0, result = SCAN_SUCCEED;
 	bool is_shmem = shmem_file(file);
-	int nr;
+	int nr = 0;
 
 	VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
 	VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -2102,7 +2102,8 @@ out:
 		mem_cgroup_uncharge(page_folio(hpage));
 		put_page(hpage);
 	}
-	/* TODO: tracepoints */
+
+	trace_mm_khugepaged_collapse_file(mm, hpage, index, is_shmem, addr, file, nr, result);
 	return result;
 }
 
-- 
GitLab


From fd3b1bc3c86ee11ba77421b00c70280605b521c6 Mon Sep 17 00:00:00 2001
From: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Date: Wed, 9 Nov 2022 10:48:36 +0530
Subject: [PATCH 3690/4122] mm/madvise: fix madvise_pageout for private file
 mappings

When MADV_PAGEOUT is called on a private file mapping VMA region, we bail
out early if the process is neither owner nor write capable of the file.
However, this VMA may have both private/shared clean pages and private
dirty pages.  The opportunity of paging out the private dirty pages (Anon
pages) is missed.  Fix this behavior by allowing private file mappings
pageout further and perform the file access check along with PageAnon()
during page walk.

We observe ~10% improvement in zram usage, thus leaving more available
memory on a 4GB RAM system running Android.

[quic_pkondeti@quicinc.com: v2]
  Link: https://lkml.kernel.org/r/1669962597-27724-1-git-send-email-quic_pkondeti@quicinc.com
Link: https://lkml.kernel.org/r/1667971116-12900-1-git-send-email-quic_pkondeti@quicinc.com
Signed-off-by: Pavankumar Kondeti <quic_pkondeti@quicinc.com>
Cc: Charan Teja Kalla <quic_charante@quicinc.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Suren Baghdasaryan <surenb@google.com>
Cc: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/madvise.c | 53 ++++++++++++++++++++++++++++++++++------------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/mm/madvise.c b/mm/madvise.c
index 83b0c91a126b7..2573ea3ed6843 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -318,6 +318,21 @@ static long madvise_willneed(struct vm_area_struct *vma,
 	return 0;
 }
 
+static inline bool can_do_file_pageout(struct vm_area_struct *vma)
+{
+	if (!vma->vm_file)
+		return false;
+	/*
+	 * paging out pagecache only for non-anonymous mappings that correspond
+	 * to the files the calling process could (if tried) open for writing;
+	 * otherwise we'd be including shared non-exclusive mappings, which
+	 * opens a side channel.
+	 */
+	return inode_owner_or_capable(&init_user_ns,
+				      file_inode(vma->vm_file)) ||
+	       file_permission(vma->vm_file, MAY_WRITE) == 0;
+}
+
 static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 				unsigned long addr, unsigned long end,
 				struct mm_walk *walk)
@@ -331,10 +346,14 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 	spinlock_t *ptl;
 	struct page *page = NULL;
 	LIST_HEAD(page_list);
+	bool pageout_anon_only_filter;
 
 	if (fatal_signal_pending(current))
 		return -EINTR;
 
+	pageout_anon_only_filter = pageout && !vma_is_anonymous(vma) &&
+					!can_do_file_pageout(vma);
+
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	if (pmd_trans_huge(*pmd)) {
 		pmd_t orig_pmd;
@@ -361,6 +380,9 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd,
 		if (page_mapcount(page) != 1)
 			goto huge_unlock;
 
+		if (pageout_anon_only_filter && !PageAnon(page))
+			goto huge_unlock;
+
 		if (next - addr != HPAGE_PMD_SIZE) {
 			int err;
 
@@ -429,6 +451,8 @@ regular_page:
 		if (PageTransCompound(page)) {
 			if (page_mapcount(page) != 1)
 				break;
+			if (pageout_anon_only_filter && !PageAnon(page))
+				break;
 			get_page(page);
 			if (!trylock_page(page)) {
 				put_page(page);
@@ -456,6 +480,9 @@ regular_page:
 		if (!PageLRU(page) || page_mapcount(page) != 1)
 			continue;
 
+		if (pageout_anon_only_filter && !PageAnon(page))
+			continue;
+
 		VM_BUG_ON_PAGE(PageTransCompound(page), page);
 
 		if (pte_young(ptent)) {
@@ -550,23 +577,6 @@ static void madvise_pageout_page_range(struct mmu_gather *tlb,
 	tlb_end_vma(tlb, vma);
 }
 
-static inline bool can_do_pageout(struct vm_area_struct *vma)
-{
-	if (vma_is_anonymous(vma))
-		return true;
-	if (!vma->vm_file)
-		return false;
-	/*
-	 * paging out pagecache only for non-anonymous mappings that correspond
-	 * to the files the calling process could (if tried) open for writing;
-	 * otherwise we'd be including shared non-exclusive mappings, which
-	 * opens a side channel.
-	 */
-	return inode_owner_or_capable(&init_user_ns,
-				      file_inode(vma->vm_file)) ||
-	       file_permission(vma->vm_file, MAY_WRITE) == 0;
-}
-
 static long madvise_pageout(struct vm_area_struct *vma,
 			struct vm_area_struct **prev,
 			unsigned long start_addr, unsigned long end_addr)
@@ -578,7 +588,14 @@ static long madvise_pageout(struct vm_area_struct *vma,
 	if (!can_madv_lru_vma(vma))
 		return -EINVAL;
 
-	if (!can_do_pageout(vma))
+	/*
+	 * If the VMA belongs to a private file mapping, there can be private
+	 * dirty pages which can be paged out if even this process is neither
+	 * owner nor write capable of the file. We allow private file mappings
+	 * further to pageout dirty anon pages.
+	 */
+	if (!vma_is_anonymous(vma) && (!can_do_file_pageout(vma) &&
+				(vma->vm_flags & VM_MAYSHARE)))
 		return 0;
 
 	lru_add_drain();
-- 
GitLab


From 6b3379e8dcbea09b7e27bf0eea2f53fd15a164ac Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 28 Nov 2022 11:16:10 -0800
Subject: [PATCH 3691/4122] zswap: fix writeback lock ordering for zsmalloc

Patch series "Implement writeback for zsmalloc", v7.

Unlike other zswap allocators such as zbud or z3fold, zsmalloc currently
lacks the writeback mechanism.  This means that when the zswap pool is
full, it will simply reject further allocations, and the pages will be
written directly to swap.

This series of patches implements writeback for zsmalloc. When the zswap
pool becomes full, zsmalloc will attempt to evict all the compressed
objects in the least-recently used zspages.


This patch (of 6):

zswap's customary lock order is tree->lock before pool->lock, because the
tree->lock protects the entries' refcount, and the free callbacks in the
backends acquire their respective pool locks to dispatch the backing
object.  zsmalloc's map callback takes the pool lock, so zswap must not
grab the tree->lock while a handle is mapped.  This currently only happens
during writeback, which isn't implemented for zsmalloc.  In preparation
for it, move the tree->lock section out of the mapped entry section

Link: https://lkml.kernel.org/r/20221128191616.1261026-1-nphamcs@gmail.com
Link: https://lkml.kernel.org/r/20221128191616.1261026-2-nphamcs@gmail.com
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zswap.c | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/mm/zswap.c b/mm/zswap.c
index 3019f0bde194d..f6c89049cf700 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -968,6 +968,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	swpentry = zhdr->swpentry; /* here */
 	tree = zswap_trees[swp_type(swpentry)];
 	offset = swp_offset(swpentry);
+	zpool_unmap_handle(pool, handle);
 
 	/* find and ref zswap entry */
 	spin_lock(&tree->lock);
@@ -975,20 +976,12 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	if (!entry) {
 		/* entry was invalidated */
 		spin_unlock(&tree->lock);
-		zpool_unmap_handle(pool, handle);
 		kfree(tmp);
 		return 0;
 	}
 	spin_unlock(&tree->lock);
 	BUG_ON(offset != entry->offset);
 
-	src = (u8 *)zhdr + sizeof(struct zswap_header);
-	if (!zpool_can_sleep_mapped(pool)) {
-		memcpy(tmp, src, entry->length);
-		src = tmp;
-		zpool_unmap_handle(pool, handle);
-	}
-
 	/* try to allocate swap cache page */
 	switch (zswap_get_swap_cache_page(swpentry, &page)) {
 	case ZSWAP_SWAPCACHE_FAIL: /* no memory or invalidate happened */
@@ -1006,6 +999,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 		acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
 		dlen = PAGE_SIZE;
 
+		zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
+		src = (u8 *)zhdr + sizeof(struct zswap_header);
+		if (!zpool_can_sleep_mapped(pool)) {
+			memcpy(tmp, src, entry->length);
+			src = tmp;
+			zpool_unmap_handle(pool, handle);
+		}
+
 		mutex_lock(acomp_ctx->mutex);
 		sg_init_one(&input, src, entry->length);
 		sg_init_table(&output, 1);
@@ -1015,6 +1016,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 		dlen = acomp_ctx->req->dlen;
 		mutex_unlock(acomp_ctx->mutex);
 
+		if (!zpool_can_sleep_mapped(pool))
+			kfree(tmp);
+		else
+			zpool_unmap_handle(pool, handle);
+
 		BUG_ON(ret);
 		BUG_ON(dlen != PAGE_SIZE);
 
@@ -1045,7 +1051,11 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 		zswap_entry_put(tree, entry);
 	spin_unlock(&tree->lock);
 
-	goto end;
+	return ret;
+
+fail:
+	if (!zpool_can_sleep_mapped(pool))
+		kfree(tmp);
 
 	/*
 	* if we get here due to ZSWAP_SWAPCACHE_EXIST
@@ -1054,17 +1064,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 	* if we free the entry in the following put
 	* it is also okay to return !0
 	*/
-fail:
 	spin_lock(&tree->lock);
 	zswap_entry_put(tree, entry);
 	spin_unlock(&tree->lock);
 
-end:
-	if (zpool_can_sleep_mapped(pool))
-		zpool_unmap_handle(pool, handle);
-	else
-		kfree(tmp);
-
 	return ret;
 }
 
-- 
GitLab


From 6a05aa30109d5cd4bebfb89415c58fa4599ef875 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Mon, 28 Nov 2022 11:16:11 -0800
Subject: [PATCH 3692/4122] zpool: clean out dead code

There is a lot of provision for flexibility that isn't actually needed or
used.  Zswap (the only zpool user) always passes zpool_ops with an .evict
method set.  The backends who reclaim only do so for zswap, so they can
also directly call zpool_ops without indirection or checks.

Finally, there is no need to check the retries parameters and bail with
-EINVAL in the reclaim function, when that's called just a few lines below
with a hard-coded 8.  There is no need to duplicate the evictable and
sleep_mapped attrs from the driver in zpool_ops.

Link: https://lkml.kernel.org/r/20221128191616.1261026-3-nphamcs@gmail.com
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/z3fold.c | 36 +++++-------------------------------
 mm/zbud.c   | 32 +++++---------------------------
 mm/zpool.c  | 10 ++--------
 3 files changed, 12 insertions(+), 66 deletions(-)

diff --git a/mm/z3fold.c b/mm/z3fold.c
index cf71da10d04e7..a4de0c317ac7c 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -68,9 +68,6 @@
  * Structures
 *****************/
 struct z3fold_pool;
-struct z3fold_ops {
-	int (*evict)(struct z3fold_pool *pool, unsigned long handle);
-};
 
 enum buddy {
 	HEADLESS = 0,
@@ -138,8 +135,6 @@ struct z3fold_header {
  * @stale:	list of pages marked for freeing
  * @pages_nr:	number of z3fold pages in the pool.
  * @c_handle:	cache for z3fold_buddy_slots allocation
- * @ops:	pointer to a structure of user defined operations specified at
- *		pool creation time.
  * @zpool:	zpool driver
  * @zpool_ops:	zpool operations structure with an evict callback
  * @compact_wq:	workqueue for page layout background optimization
@@ -158,7 +153,6 @@ struct z3fold_pool {
 	struct list_head stale;
 	atomic64_t pages_nr;
 	struct kmem_cache *c_handle;
-	const struct z3fold_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
 	struct workqueue_struct *compact_wq;
@@ -907,13 +901,11 @@ out_fail:
  * z3fold_create_pool() - create a new z3fold pool
  * @name:	pool name
  * @gfp:	gfp flags when allocating the z3fold pool structure
- * @ops:	user-defined operations for the z3fold pool
  *
  * Return: pointer to the new z3fold pool or NULL if the metadata allocation
  * failed.
  */
-static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
-		const struct z3fold_ops *ops)
+static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp)
 {
 	struct z3fold_pool *pool = NULL;
 	int i, cpu;
@@ -949,7 +941,6 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
 	if (!pool->release_wq)
 		goto out_wq;
 	INIT_WORK(&pool->work, free_pages_work);
-	pool->ops = ops;
 	return pool;
 
 out_wq:
@@ -1230,10 +1221,6 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 	slots.pool = (unsigned long)pool | (1 << HANDLES_NOFREE);
 
 	spin_lock(&pool->lock);
-	if (!pool->ops || !pool->ops->evict || retries == 0) {
-		spin_unlock(&pool->lock);
-		return -EINVAL;
-	}
 	for (i = 0; i < retries; i++) {
 		if (list_empty(&pool->lru)) {
 			spin_unlock(&pool->lock);
@@ -1319,17 +1306,17 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
 		}
 		/* Issue the eviction callback(s) */
 		if (middle_handle) {
-			ret = pool->ops->evict(pool, middle_handle);
+			ret = pool->zpool_ops->evict(pool->zpool, middle_handle);
 			if (ret)
 				goto next;
 		}
 		if (first_handle) {
-			ret = pool->ops->evict(pool, first_handle);
+			ret = pool->zpool_ops->evict(pool->zpool, first_handle);
 			if (ret)
 				goto next;
 		}
 		if (last_handle) {
-			ret = pool->ops->evict(pool, last_handle);
+			ret = pool->zpool_ops->evict(pool->zpool, last_handle);
 			if (ret)
 				goto next;
 		}
@@ -1593,26 +1580,13 @@ static const struct movable_operations z3fold_mops = {
  * zpool
  ****************/
 
-static int z3fold_zpool_evict(struct z3fold_pool *pool, unsigned long handle)
-{
-	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
-		return pool->zpool_ops->evict(pool->zpool, handle);
-	else
-		return -ENOENT;
-}
-
-static const struct z3fold_ops z3fold_zpool_ops = {
-	.evict =	z3fold_zpool_evict
-};
-
 static void *z3fold_zpool_create(const char *name, gfp_t gfp,
 			       const struct zpool_ops *zpool_ops,
 			       struct zpool *zpool)
 {
 	struct z3fold_pool *pool;
 
-	pool = z3fold_create_pool(name, gfp,
-				zpool_ops ? &z3fold_zpool_ops : NULL);
+	pool = z3fold_create_pool(name, gfp);
 	if (pool) {
 		pool->zpool = zpool;
 		pool->zpool_ops = zpool_ops;
diff --git a/mm/zbud.c b/mm/zbud.c
index 6348932430b85..3acd261939204 100644
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -74,10 +74,6 @@
 
 struct zbud_pool;
 
-struct zbud_ops {
-	int (*evict)(struct zbud_pool *pool, unsigned long handle);
-};
-
 /**
  * struct zbud_pool - stores metadata for each zbud pool
  * @lock:	protects all pool fields and first|last_chunk fields of any
@@ -90,8 +86,6 @@ struct zbud_ops {
  * @lru:	list tracking the zbud pages in LRU order by most recently
  *		added buddy.
  * @pages_nr:	number of zbud pages in the pool.
- * @ops:	pointer to a structure of user defined operations specified at
- *		pool creation time.
  * @zpool:	zpool driver
  * @zpool_ops:	zpool operations structure with an evict callback
  *
@@ -110,7 +104,6 @@ struct zbud_pool {
 	};
 	struct list_head lru;
 	u64 pages_nr;
-	const struct zbud_ops *ops;
 	struct zpool *zpool;
 	const struct zpool_ops *zpool_ops;
 };
@@ -212,12 +205,11 @@ static int num_free_chunks(struct zbud_header *zhdr)
 /**
  * zbud_create_pool() - create a new zbud pool
  * @gfp:	gfp flags when allocating the zbud pool structure
- * @ops:	user-defined operations for the zbud pool
  *
  * Return: pointer to the new zbud pool or NULL if the metadata allocation
  * failed.
  */
-static struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops)
+static struct zbud_pool *zbud_create_pool(gfp_t gfp)
 {
 	struct zbud_pool *pool;
 	int i;
@@ -231,7 +223,6 @@ static struct zbud_pool *zbud_create_pool(gfp_t gfp, const struct zbud_ops *ops)
 	INIT_LIST_HEAD(&pool->buddied);
 	INIT_LIST_HEAD(&pool->lru);
 	pool->pages_nr = 0;
-	pool->ops = ops;
 	return pool;
 }
 
@@ -419,8 +410,7 @@ static int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 	unsigned long first_handle = 0, last_handle = 0;
 
 	spin_lock(&pool->lock);
-	if (!pool->ops || !pool->ops->evict || list_empty(&pool->lru) ||
-			retries == 0) {
+	if (list_empty(&pool->lru)) {
 		spin_unlock(&pool->lock);
 		return -EINVAL;
 	}
@@ -444,12 +434,12 @@ static int zbud_reclaim_page(struct zbud_pool *pool, unsigned int retries)
 
 		/* Issue the eviction callback(s) */
 		if (first_handle) {
-			ret = pool->ops->evict(pool, first_handle);
+			ret = pool->zpool_ops->evict(pool->zpool, first_handle);
 			if (ret)
 				goto next;
 		}
 		if (last_handle) {
-			ret = pool->ops->evict(pool, last_handle);
+			ret = pool->zpool_ops->evict(pool->zpool, last_handle);
 			if (ret)
 				goto next;
 		}
@@ -524,25 +514,13 @@ static u64 zbud_get_pool_size(struct zbud_pool *pool)
  * zpool
  ****************/
 
-static int zbud_zpool_evict(struct zbud_pool *pool, unsigned long handle)
-{
-	if (pool->zpool && pool->zpool_ops && pool->zpool_ops->evict)
-		return pool->zpool_ops->evict(pool->zpool, handle);
-	else
-		return -ENOENT;
-}
-
-static const struct zbud_ops zbud_zpool_ops = {
-	.evict =	zbud_zpool_evict
-};
-
 static void *zbud_zpool_create(const char *name, gfp_t gfp,
 			       const struct zpool_ops *zpool_ops,
 			       struct zpool *zpool)
 {
 	struct zbud_pool *pool;
 
-	pool = zbud_create_pool(gfp, zpool_ops ? &zbud_zpool_ops : NULL);
+	pool = zbud_create_pool(gfp);
 	if (pool) {
 		pool->zpool = zpool;
 		pool->zpool_ops = zpool_ops;
diff --git a/mm/zpool.c b/mm/zpool.c
index f46c0d5e766cd..571f5c5031dda 100644
--- a/mm/zpool.c
+++ b/mm/zpool.c
@@ -21,9 +21,6 @@
 struct zpool {
 	struct zpool_driver *driver;
 	void *pool;
-	const struct zpool_ops *ops;
-	bool evictable;
-	bool can_sleep_mapped;
 };
 
 static LIST_HEAD(drivers_head);
@@ -177,9 +174,6 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
 
 	zpool->driver = driver;
 	zpool->pool = driver->create(name, gfp, ops, zpool);
-	zpool->ops = ops;
-	zpool->evictable = driver->shrink && ops && ops->evict;
-	zpool->can_sleep_mapped = driver->sleep_mapped;
 
 	if (!zpool->pool) {
 		pr_err("couldn't create %s pool\n", type);
@@ -380,7 +374,7 @@ u64 zpool_get_total_size(struct zpool *zpool)
  */
 bool zpool_evictable(struct zpool *zpool)
 {
-	return zpool->evictable;
+	return zpool->driver->shrink;
 }
 
 /**
@@ -398,7 +392,7 @@ bool zpool_evictable(struct zpool *zpool)
  */
 bool zpool_can_sleep_mapped(struct zpool *zpool)
 {
-	return zpool->can_sleep_mapped;
+	return zpool->driver->sleep_mapped;
 }
 
 MODULE_LICENSE("GPL");
-- 
GitLab


From c0547d0b6a4b637db05406b90ba82e1b2e71de56 Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Mon, 28 Nov 2022 11:16:12 -0800
Subject: [PATCH 3693/4122] zsmalloc: consolidate zs_pool's migrate_lock and
 size_class's locks

Currently, zsmalloc has a hierarchy of locks, which includes a pool-level
migrate_lock, and a lock for each size class.  We have to obtain both
locks in the hotpath in most cases anyway, except for zs_malloc.  This
exception will no longer exist when we introduce a LRU into the zs_pool
for the new writeback functionality - we will need to obtain a pool-level
lock to synchronize LRU handling even in zs_malloc.

In preparation for zsmalloc writeback, consolidate these locks into a
single pool-level lock, which drastically reduces the complexity of
synchronization in zsmalloc.

We have also benchmarked the lock consolidation to see the performance
effect of this change on zram.

First, we ran a synthetic FS workload on a server machine with 36 cores
(same machine for all runs), using

fs_mark  -d  ../zram1mnt  -s  100000  -n  2500  -t  32  -k

before and after for btrfs and ext4 on zram (FS usage is 80%).

Here is the result (unit is file/second):

With lock consolidation (btrfs):
Average: 13520.2, Median: 13531.0, Stddev: 137.5961482019028

Without lock consolidation (btrfs):
Average: 13487.2, Median: 13575.0, Stddev: 309.08283679298665

With lock consolidation (ext4):
Average: 16824.4, Median: 16839.0, Stddev: 89.97388510006668

Without lock consolidation (ext4)
Average: 16958.0, Median: 16986.0, Stddev: 194.7370021336469

As you can see, we observe a 0.3% regression for btrfs, and a 0.9%
regression for ext4. This is a small, barely measurable difference in my
opinion.

For a more realistic scenario, we also tries building the kernel on zram.
Here is the time it takes (in seconds):

With lock consolidation (btrfs):
real
Average: 319.6, Median: 320.0, Stddev: 0.8944271909999159
user
Average: 6894.2, Median: 6895.0, Stddev: 25.528415540334656
sys
Average: 521.4, Median: 522.0, Stddev: 1.51657508881031

Without lock consolidation (btrfs):
real
Average: 319.8, Median: 320.0, Stddev: 0.8366600265340756
user
Average: 6896.6, Median: 6899.0, Stddev: 16.04057355583023
sys
Average: 520.6, Median: 521.0, Stddev: 1.140175425099138

With lock consolidation (ext4):
real
Average: 320.0, Median: 319.0, Stddev: 1.4142135623730951
user
Average: 6896.8, Median: 6878.0, Stddev: 28.621670111997307
sys
Average: 521.2, Median: 521.0, Stddev: 1.7888543819998317

Without lock consolidation (ext4)
real
Average: 319.6, Median: 319.0, Stddev: 0.8944271909999159
user
Average: 6886.2, Median: 6887.0, Stddev: 16.93221781102523
sys
Average: 520.4, Median: 520.0, Stddev: 1.140175425099138

The difference is entirely within the noise of a typical run on zram.
This hardly justifies the complexity of maintaining both the pool lock and
the class lock.  In fact, for writeback, we would need to introduce yet
another lock to prevent data races on the pool's LRU, further complicating
the lock handling logic.  IMHO, it is just better to collapse all of these
into a single pool-level lock.

Link: https://lkml.kernel.org/r/20221128191616.1261026-4-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 87 ++++++++++++++++++++++-----------------------------
 1 file changed, 37 insertions(+), 50 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 78feda34ad9a8..5427a00a0518d 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -33,8 +33,7 @@
 /*
  * lock ordering:
  *	page_lock
- *	pool->migrate_lock
- *	class->lock
+ *	pool->lock
  *	zspage->lock
  */
 
@@ -192,7 +191,6 @@ static const int fullness_threshold_frac = 4;
 static size_t huge_class_size;
 
 struct size_class {
-	spinlock_t lock;
 	struct list_head fullness_list[NR_ZS_FULLNESS];
 	/*
 	 * Size of objects stored in this class. Must be multiple
@@ -247,8 +245,7 @@ struct zs_pool {
 #ifdef CONFIG_COMPACTION
 	struct work_struct free_work;
 #endif
-	/* protect page/zspage migration */
-	rwlock_t migrate_lock;
+	spinlock_t lock;
 };
 
 struct zspage {
@@ -355,7 +352,7 @@ static void cache_free_zspage(struct zs_pool *pool, struct zspage *zspage)
 	kmem_cache_free(pool->zspage_cachep, zspage);
 }
 
-/* class->lock(which owns the handle) synchronizes races */
+/* pool->lock(which owns the handle) synchronizes races */
 static void record_obj(unsigned long handle, unsigned long obj)
 {
 	*(unsigned long *)handle = obj;
@@ -452,7 +449,7 @@ static __maybe_unused int is_first_page(struct page *page)
 	return PagePrivate(page);
 }
 
-/* Protected by class->lock */
+/* Protected by pool->lock */
 static inline int get_zspage_inuse(struct zspage *zspage)
 {
 	return zspage->inuse;
@@ -597,13 +594,13 @@ static int zs_stats_size_show(struct seq_file *s, void *v)
 		if (class->index != i)
 			continue;
 
-		spin_lock(&class->lock);
+		spin_lock(&pool->lock);
 		class_almost_full = zs_stat_get(class, CLASS_ALMOST_FULL);
 		class_almost_empty = zs_stat_get(class, CLASS_ALMOST_EMPTY);
 		obj_allocated = zs_stat_get(class, OBJ_ALLOCATED);
 		obj_used = zs_stat_get(class, OBJ_USED);
 		freeable = zs_can_compact(class);
-		spin_unlock(&class->lock);
+		spin_unlock(&pool->lock);
 
 		objs_per_zspage = class->objs_per_zspage;
 		pages_used = obj_allocated / objs_per_zspage *
@@ -916,7 +913,7 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
 
 	get_zspage_mapping(zspage, &class_idx, &fg);
 
-	assert_spin_locked(&class->lock);
+	assert_spin_locked(&pool->lock);
 
 	VM_BUG_ON(get_zspage_inuse(zspage));
 	VM_BUG_ON(fg != ZS_EMPTY);
@@ -1268,19 +1265,19 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	BUG_ON(in_interrupt());
 
 	/* It guarantees it can get zspage from handle safely */
-	read_lock(&pool->migrate_lock);
+	spin_lock(&pool->lock);
 	obj = handle_to_obj(handle);
 	obj_to_location(obj, &page, &obj_idx);
 	zspage = get_zspage(page);
 
 	/*
-	 * migration cannot move any zpages in this zspage. Here, class->lock
+	 * migration cannot move any zpages in this zspage. Here, pool->lock
 	 * is too heavy since callers would take some time until they calls
 	 * zs_unmap_object API so delegate the locking from class to zspage
 	 * which is smaller granularity.
 	 */
 	migrate_read_lock(zspage);
-	read_unlock(&pool->migrate_lock);
+	spin_unlock(&pool->lock);
 
 	class = zspage_class(pool, zspage);
 	off = (class->size * obj_idx) & ~PAGE_MASK;
@@ -1433,8 +1430,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 	size += ZS_HANDLE_SIZE;
 	class = pool->size_class[get_size_class_index(size)];
 
-	/* class->lock effectively protects the zpage migration */
-	spin_lock(&class->lock);
+	/* pool->lock effectively protects the zpage migration */
+	spin_lock(&pool->lock);
 	zspage = find_get_zspage(class);
 	if (likely(zspage)) {
 		obj = obj_malloc(pool, zspage, handle);
@@ -1442,12 +1439,12 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 		fix_fullness_group(class, zspage);
 		record_obj(handle, obj);
 		class_stat_inc(class, OBJ_USED, 1);
-		spin_unlock(&class->lock);
+		spin_unlock(&pool->lock);
 
 		return handle;
 	}
 
-	spin_unlock(&class->lock);
+	spin_unlock(&pool->lock);
 
 	zspage = alloc_zspage(pool, class, gfp);
 	if (!zspage) {
@@ -1455,7 +1452,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 		return (unsigned long)ERR_PTR(-ENOMEM);
 	}
 
-	spin_lock(&class->lock);
+	spin_lock(&pool->lock);
 	obj = obj_malloc(pool, zspage, handle);
 	newfg = get_fullness_group(class, zspage);
 	insert_zspage(class, zspage, newfg);
@@ -1468,7 +1465,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size, gfp_t gfp)
 
 	/* We completely set up zspage so mark them as movable */
 	SetZsPageMovable(pool, zspage);
-	spin_unlock(&class->lock);
+	spin_unlock(&pool->lock);
 
 	return handle;
 }
@@ -1512,16 +1509,14 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 		return;
 
 	/*
-	 * The pool->migrate_lock protects the race with zpage's migration
+	 * The pool->lock protects the race with zpage's migration
 	 * so it's safe to get the page from handle.
 	 */
-	read_lock(&pool->migrate_lock);
+	spin_lock(&pool->lock);
 	obj = handle_to_obj(handle);
 	obj_to_page(obj, &f_page);
 	zspage = get_zspage(f_page);
 	class = zspage_class(pool, zspage);
-	spin_lock(&class->lock);
-	read_unlock(&pool->migrate_lock);
 
 	obj_free(class->size, obj);
 	class_stat_dec(class, OBJ_USED, 1);
@@ -1531,7 +1526,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 
 	free_zspage(pool, class, zspage);
 out:
-	spin_unlock(&class->lock);
+	spin_unlock(&pool->lock);
 	cache_free_handle(pool, handle);
 }
 EXPORT_SYMBOL_GPL(zs_free);
@@ -1888,16 +1883,12 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	pool = zspage->pool;
 
 	/*
-	 * The pool migrate_lock protects the race between zpage migration
+	 * The pool's lock protects the race between zpage migration
 	 * and zs_free.
 	 */
-	write_lock(&pool->migrate_lock);
+	spin_lock(&pool->lock);
 	class = zspage_class(pool, zspage);
 
-	/*
-	 * the class lock protects zpage alloc/free in the zspage.
-	 */
-	spin_lock(&class->lock);
 	/* the migrate_write_lock protects zpage access via zs_map_object */
 	migrate_write_lock(zspage);
 
@@ -1927,10 +1918,9 @@ static int zs_page_migrate(struct page *newpage, struct page *page,
 	replace_sub_page(class, zspage, newpage, page);
 	/*
 	 * Since we complete the data copy and set up new zspage structure,
-	 * it's okay to release migration_lock.
+	 * it's okay to release the pool's lock.
 	 */
-	write_unlock(&pool->migrate_lock);
-	spin_unlock(&class->lock);
+	spin_unlock(&pool->lock);
 	dec_zspage_isolation(zspage);
 	migrate_write_unlock(zspage);
 
@@ -1985,9 +1975,9 @@ static void async_free_zspage(struct work_struct *work)
 		if (class->index != i)
 			continue;
 
-		spin_lock(&class->lock);
+		spin_lock(&pool->lock);
 		list_splice_init(&class->fullness_list[ZS_EMPTY], &free_pages);
-		spin_unlock(&class->lock);
+		spin_unlock(&pool->lock);
 	}
 
 	list_for_each_entry_safe(zspage, tmp, &free_pages, list) {
@@ -1997,9 +1987,9 @@ static void async_free_zspage(struct work_struct *work)
 		get_zspage_mapping(zspage, &class_idx, &fullness);
 		VM_BUG_ON(fullness != ZS_EMPTY);
 		class = pool->size_class[class_idx];
-		spin_lock(&class->lock);
+		spin_lock(&pool->lock);
 		__free_zspage(pool, class, zspage);
-		spin_unlock(&class->lock);
+		spin_unlock(&pool->lock);
 	}
 };
 
@@ -2060,10 +2050,11 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 	struct zspage *dst_zspage = NULL;
 	unsigned long pages_freed = 0;
 
-	/* protect the race between zpage migration and zs_free */
-	write_lock(&pool->migrate_lock);
-	/* protect zpage allocation/free */
-	spin_lock(&class->lock);
+	/*
+	 * protect the race between zpage migration and zs_free
+	 * as well as zpage allocation/free
+	 */
+	spin_lock(&pool->lock);
 	while ((src_zspage = isolate_zspage(class, true))) {
 		/* protect someone accessing the zspage(i.e., zs_map_object) */
 		migrate_write_lock(src_zspage);
@@ -2088,7 +2079,7 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 			putback_zspage(class, dst_zspage);
 			migrate_write_unlock(dst_zspage);
 			dst_zspage = NULL;
-			if (rwlock_is_contended(&pool->migrate_lock))
+			if (spin_is_contended(&pool->lock))
 				break;
 		}
 
@@ -2105,11 +2096,9 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 			pages_freed += class->pages_per_zspage;
 		} else
 			migrate_write_unlock(src_zspage);
-		spin_unlock(&class->lock);
-		write_unlock(&pool->migrate_lock);
+		spin_unlock(&pool->lock);
 		cond_resched();
-		write_lock(&pool->migrate_lock);
-		spin_lock(&class->lock);
+		spin_lock(&pool->lock);
 	}
 
 	if (src_zspage) {
@@ -2117,8 +2106,7 @@ static unsigned long __zs_compact(struct zs_pool *pool,
 		migrate_write_unlock(src_zspage);
 	}
 
-	spin_unlock(&class->lock);
-	write_unlock(&pool->migrate_lock);
+	spin_unlock(&pool->lock);
 
 	return pages_freed;
 }
@@ -2221,7 +2209,7 @@ struct zs_pool *zs_create_pool(const char *name)
 		return NULL;
 
 	init_deferred_free(pool);
-	rwlock_init(&pool->migrate_lock);
+	spin_lock_init(&pool->lock);
 
 	pool->name = kstrdup(name, GFP_KERNEL);
 	if (!pool->name)
@@ -2292,7 +2280,6 @@ struct zs_pool *zs_create_pool(const char *name)
 		class->index = i;
 		class->pages_per_zspage = pages_per_zspage;
 		class->objs_per_zspage = objs_per_zspage;
-		spin_lock_init(&class->lock);
 		pool->size_class[i] = class;
 		for (fullness = ZS_EMPTY; fullness < NR_ZS_FULLNESS;
 							fullness++)
-- 
GitLab


From 64f768c6b32e1957e2b65b70e97cb4cb62344bc4 Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Mon, 28 Nov 2022 11:16:13 -0800
Subject: [PATCH 3694/4122] zsmalloc: add a LRU to zs_pool to keep track of
 zspages in LRU order

This helps determines the coldest zspages as candidates for writeback.

Link: https://lkml.kernel.org/r/20221128191616.1261026-5-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 5427a00a0518d..b1bc231d94a36 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -239,6 +239,11 @@ struct zs_pool {
 	/* Compact classes */
 	struct shrinker shrinker;
 
+#ifdef CONFIG_ZPOOL
+	/* List tracking the zspages in LRU order by most recently added object */
+	struct list_head lru;
+#endif
+
 #ifdef CONFIG_ZSMALLOC_STAT
 	struct dentry *stat_dentry;
 #endif
@@ -260,6 +265,12 @@ struct zspage {
 	unsigned int freeobj;
 	struct page *first_page;
 	struct list_head list; /* fullness list */
+
+#ifdef CONFIG_ZPOOL
+	/* links the zspage to the lru list in the pool */
+	struct list_head lru;
+#endif
+
 	struct zs_pool *pool;
 #ifdef CONFIG_COMPACTION
 	rwlock_t lock;
@@ -953,6 +964,9 @@ static void free_zspage(struct zs_pool *pool, struct size_class *class,
 	}
 
 	remove_zspage(class, zspage, ZS_EMPTY);
+#ifdef CONFIG_ZPOOL
+	list_del(&zspage->lru);
+#endif
 	__free_zspage(pool, class, zspage);
 }
 
@@ -998,6 +1012,10 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
 		off %= PAGE_SIZE;
 	}
 
+#ifdef CONFIG_ZPOOL
+	INIT_LIST_HEAD(&zspage->lru);
+#endif
+
 	set_freeobj(zspage, 0);
 }
 
@@ -1270,6 +1288,31 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	obj_to_location(obj, &page, &obj_idx);
 	zspage = get_zspage(page);
 
+#ifdef CONFIG_ZPOOL
+	/*
+	 * Move the zspage to front of pool's LRU.
+	 *
+	 * Note that this is swap-specific, so by definition there are no ongoing
+	 * accesses to the memory while the page is swapped out that would make
+	 * it "hot". A new entry is hot, then ages to the tail until it gets either
+	 * written back or swaps back in.
+	 *
+	 * Furthermore, map is also called during writeback. We must not put an
+	 * isolated page on the LRU mid-reclaim.
+	 *
+	 * As a result, only update the LRU when the page is mapped for write
+	 * when it's first instantiated.
+	 *
+	 * This is a deviation from the other backends, which perform this update
+	 * in the allocation function (zbud_alloc, z3fold_alloc).
+	 */
+	if (mm == ZS_MM_WO) {
+		if (!list_empty(&zspage->lru))
+			list_del(&zspage->lru);
+		list_add(&zspage->lru, &pool->lru);
+	}
+#endif
+
 	/*
 	 * migration cannot move any zpages in this zspage. Here, pool->lock
 	 * is too heavy since callers would take some time until they calls
@@ -1988,6 +2031,9 @@ static void async_free_zspage(struct work_struct *work)
 		VM_BUG_ON(fullness != ZS_EMPTY);
 		class = pool->size_class[class_idx];
 		spin_lock(&pool->lock);
+#ifdef CONFIG_ZPOOL
+		list_del(&zspage->lru);
+#endif
 		__free_zspage(pool, class, zspage);
 		spin_unlock(&pool->lock);
 	}
@@ -2299,6 +2345,10 @@ struct zs_pool *zs_create_pool(const char *name)
 	 */
 	zs_register_shrinker(pool);
 
+#ifdef CONFIG_ZPOOL
+	INIT_LIST_HEAD(&pool->lru);
+#endif
+
 	return pool;
 
 err:
-- 
GitLab


From bd0fded29689a762e6a749a1258a59cc2b99a18a Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Mon, 28 Nov 2022 11:16:14 -0800
Subject: [PATCH 3695/4122] zsmalloc: add zpool_ops field to zs_pool to store
 evict handlers

This adds a new field to zs_pool to store evict handlers for writeback,
analogous to the zbud allocator.

Link: https://lkml.kernel.org/r/20221128191616.1261026-6-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index b1bc231d94a36..d06f9150b9da0 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -242,6 +242,8 @@ struct zs_pool {
 #ifdef CONFIG_ZPOOL
 	/* List tracking the zspages in LRU order by most recently added object */
 	struct list_head lru;
+	struct zpool *zpool;
+	const struct zpool_ops *zpool_ops;
 #endif
 
 #ifdef CONFIG_ZSMALLOC_STAT
@@ -382,7 +384,14 @@ static void *zs_zpool_create(const char *name, gfp_t gfp,
 	 * different contexts and its caller must provide a valid
 	 * gfp mask.
 	 */
-	return zs_create_pool(name);
+	struct zs_pool *pool = zs_create_pool(name);
+
+	if (pool) {
+		pool->zpool = zpool;
+		pool->zpool_ops = zpool_ops;
+	}
+
+	return pool;
 }
 
 static void zs_zpool_destroy(void *pool)
-- 
GitLab


From 9997bc017549acd6425e32300eff28424ffeeb6b Mon Sep 17 00:00:00 2001
From: Nhat Pham <nphamcs@gmail.com>
Date: Mon, 28 Nov 2022 11:16:15 -0800
Subject: [PATCH 3696/4122] zsmalloc: implement writeback mechanism for
 zsmalloc

This commit adds the writeback mechanism for zsmalloc, analogous to the
zbud allocator.  Zsmalloc will attempt to determine the coldest zspage
(i.e least recently used) in the pool, and attempt to write back all the
stored compressed objects via the pool's evict handler.

Link: https://lkml.kernel.org/r/20221128191616.1261026-7-nphamcs@gmail.com
Signed-off-by: Nhat Pham <nphamcs@gmail.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Vitaly Wool <vitaly.wool@konsulko.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/zsmalloc.c | 194 +++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 183 insertions(+), 11 deletions(-)

diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index d06f9150b9da0..9445bee6b0140 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -271,12 +271,13 @@ struct zspage {
 #ifdef CONFIG_ZPOOL
 	/* links the zspage to the lru list in the pool */
 	struct list_head lru;
+	bool under_reclaim;
+	/* list of unfreed handles whose objects have been reclaimed */
+	unsigned long *deferred_handles;
 #endif
 
 	struct zs_pool *pool;
-#ifdef CONFIG_COMPACTION
 	rwlock_t lock;
-#endif
 };
 
 struct mapping_area {
@@ -297,10 +298,11 @@ static bool ZsHugePage(struct zspage *zspage)
 	return zspage->huge;
 }
 
-#ifdef CONFIG_COMPACTION
 static void migrate_lock_init(struct zspage *zspage);
 static void migrate_read_lock(struct zspage *zspage);
 static void migrate_read_unlock(struct zspage *zspage);
+
+#ifdef CONFIG_COMPACTION
 static void migrate_write_lock(struct zspage *zspage);
 static void migrate_write_lock_nested(struct zspage *zspage);
 static void migrate_write_unlock(struct zspage *zspage);
@@ -308,9 +310,6 @@ static void kick_deferred_free(struct zs_pool *pool);
 static void init_deferred_free(struct zs_pool *pool);
 static void SetZsPageMovable(struct zs_pool *pool, struct zspage *zspage);
 #else
-static void migrate_lock_init(struct zspage *zspage) {}
-static void migrate_read_lock(struct zspage *zspage) {}
-static void migrate_read_unlock(struct zspage *zspage) {}
 static void migrate_write_lock(struct zspage *zspage) {}
 static void migrate_write_lock_nested(struct zspage *zspage) {}
 static void migrate_write_unlock(struct zspage *zspage) {}
@@ -413,6 +412,27 @@ static void zs_zpool_free(void *pool, unsigned long handle)
 	zs_free(pool, handle);
 }
 
+static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries);
+
+static int zs_zpool_shrink(void *pool, unsigned int pages,
+			unsigned int *reclaimed)
+{
+	unsigned int total = 0;
+	int ret = -EINVAL;
+
+	while (total < pages) {
+		ret = zs_reclaim_page(pool, 8);
+		if (ret < 0)
+			break;
+		total++;
+	}
+
+	if (reclaimed)
+		*reclaimed = total;
+
+	return ret;
+}
+
 static void *zs_zpool_map(void *pool, unsigned long handle,
 			enum zpool_mapmode mm)
 {
@@ -451,6 +471,7 @@ static struct zpool_driver zs_zpool_driver = {
 	.malloc_support_movable = true,
 	.malloc =		  zs_zpool_malloc,
 	.free =			  zs_zpool_free,
+	.shrink =		  zs_zpool_shrink,
 	.map =			  zs_zpool_map,
 	.unmap =		  zs_zpool_unmap,
 	.total_size =		  zs_zpool_total_size,
@@ -924,6 +945,25 @@ unlock:
 	return 0;
 }
 
+#ifdef CONFIG_ZPOOL
+/*
+ * Free all the deferred handles whose objects are freed in zs_free.
+ */
+static void free_handles(struct zs_pool *pool, struct zspage *zspage)
+{
+	unsigned long handle = (unsigned long)zspage->deferred_handles;
+
+	while (handle) {
+		unsigned long nxt_handle = handle_to_obj(handle);
+
+		cache_free_handle(pool, handle);
+		handle = nxt_handle;
+	}
+}
+#else
+static inline void free_handles(struct zs_pool *pool, struct zspage *zspage) {}
+#endif
+
 static void __free_zspage(struct zs_pool *pool, struct size_class *class,
 				struct zspage *zspage)
 {
@@ -938,6 +978,9 @@ static void __free_zspage(struct zs_pool *pool, struct size_class *class,
 	VM_BUG_ON(get_zspage_inuse(zspage));
 	VM_BUG_ON(fg != ZS_EMPTY);
 
+	/* Free all deferred handles from zs_free */
+	free_handles(pool, zspage);
+
 	next = page = get_first_page(zspage);
 	do {
 		VM_BUG_ON_PAGE(!PageLocked(page), page);
@@ -1023,6 +1066,8 @@ static void init_zspage(struct size_class *class, struct zspage *zspage)
 
 #ifdef CONFIG_ZPOOL
 	INIT_LIST_HEAD(&zspage->lru);
+	zspage->under_reclaim = false;
+	zspage->deferred_handles = NULL;
 #endif
 
 	set_freeobj(zspage, 0);
@@ -1572,12 +1617,26 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 
 	obj_free(class->size, obj);
 	class_stat_dec(class, OBJ_USED, 1);
+
+#ifdef CONFIG_ZPOOL
+	if (zspage->under_reclaim) {
+		/*
+		 * Reclaim needs the handles during writeback. It'll free
+		 * them along with the zspage when it's done with them.
+		 *
+		 * Record current deferred handle at the memory location
+		 * whose address is given by handle.
+		 */
+		record_obj(handle, (unsigned long)zspage->deferred_handles);
+		zspage->deferred_handles = (unsigned long *)handle;
+		spin_unlock(&pool->lock);
+		return;
+	}
+#endif
 	fullness = fix_fullness_group(class, zspage);
-	if (fullness != ZS_EMPTY)
-		goto out;
+	if (fullness == ZS_EMPTY)
+		free_zspage(pool, class, zspage);
 
-	free_zspage(pool, class, zspage);
-out:
 	spin_unlock(&pool->lock);
 	cache_free_handle(pool, handle);
 }
@@ -1777,7 +1836,7 @@ static enum fullness_group putback_zspage(struct size_class *class,
 	return fullness;
 }
 
-#ifdef CONFIG_COMPACTION
+#if defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION)
 /*
  * To prevent zspage destroy during migration, zspage freeing should
  * hold locks of all pages in the zspage.
@@ -1819,6 +1878,24 @@ static void lock_zspage(struct zspage *zspage)
 	}
 	migrate_read_unlock(zspage);
 }
+#endif /* defined(CONFIG_ZPOOL) || defined(CONFIG_COMPACTION) */
+
+#ifdef CONFIG_ZPOOL
+/*
+ * Unlocks all the pages of the zspage.
+ *
+ * pool->lock must be held before this function is called
+ * to prevent the underlying pages from migrating.
+ */
+static void unlock_zspage(struct zspage *zspage)
+{
+	struct page *page = get_first_page(zspage);
+
+	do {
+		unlock_page(page);
+	} while ((page = get_next_page(page)) != NULL);
+}
+#endif /* CONFIG_ZPOOL */
 
 static void migrate_lock_init(struct zspage *zspage)
 {
@@ -1835,6 +1912,7 @@ static void migrate_read_unlock(struct zspage *zspage) __releases(&zspage->lock)
 	read_unlock(&zspage->lock);
 }
 
+#ifdef CONFIG_COMPACTION
 static void migrate_write_lock(struct zspage *zspage)
 {
 	write_lock(&zspage->lock);
@@ -2399,6 +2477,100 @@ void zs_destroy_pool(struct zs_pool *pool)
 }
 EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
+#ifdef CONFIG_ZPOOL
+static int zs_reclaim_page(struct zs_pool *pool, unsigned int retries)
+{
+	int i, obj_idx, ret = 0;
+	unsigned long handle;
+	struct zspage *zspage;
+	struct page *page;
+	enum fullness_group fullness;
+
+	/* Lock LRU and fullness list */
+	spin_lock(&pool->lock);
+	if (list_empty(&pool->lru)) {
+		spin_unlock(&pool->lock);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < retries; i++) {
+		struct size_class *class;
+
+		zspage = list_last_entry(&pool->lru, struct zspage, lru);
+		list_del(&zspage->lru);
+
+		/* zs_free may free objects, but not the zspage and handles */
+		zspage->under_reclaim = true;
+
+		class = zspage_class(pool, zspage);
+		fullness = get_fullness_group(class, zspage);
+
+		/* Lock out object allocations and object compaction */
+		remove_zspage(class, zspage, fullness);
+
+		spin_unlock(&pool->lock);
+		cond_resched();
+
+		/* Lock backing pages into place */
+		lock_zspage(zspage);
+
+		obj_idx = 0;
+		page = get_first_page(zspage);
+		while (1) {
+			handle = find_alloced_obj(class, page, &obj_idx);
+			if (!handle) {
+				page = get_next_page(page);
+				if (!page)
+					break;
+				obj_idx = 0;
+				continue;
+			}
+
+			/*
+			 * This will write the object and call zs_free.
+			 *
+			 * zs_free will free the object, but the
+			 * under_reclaim flag prevents it from freeing
+			 * the zspage altogether. This is necessary so
+			 * that we can continue working with the
+			 * zspage potentially after the last object
+			 * has been freed.
+			 */
+			ret = pool->zpool_ops->evict(pool->zpool, handle);
+			if (ret)
+				goto next;
+
+			obj_idx++;
+		}
+
+next:
+		/* For freeing the zspage, or putting it back in the pool and LRU list. */
+		spin_lock(&pool->lock);
+		zspage->under_reclaim = false;
+
+		if (!get_zspage_inuse(zspage)) {
+			/*
+			 * Fullness went stale as zs_free() won't touch it
+			 * while the page is removed from the pool. Fix it
+			 * up for the check in __free_zspage().
+			 */
+			zspage->fullness = ZS_EMPTY;
+
+			__free_zspage(pool, class, zspage);
+			spin_unlock(&pool->lock);
+			return 0;
+		}
+
+		putback_zspage(class, zspage);
+		list_add(&zspage->lru, &pool->lru);
+		unlock_zspage(zspage);
+	}
+
+	spin_unlock(&pool->lock);
+	return -EAGAIN;
+}
+#endif /* CONFIG_ZPOOL */
+
 static int __init zs_init(void)
 {
 	int ret;
-- 
GitLab


From feeb9b26952367bc1171592ee476f95aa81ee588 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Mon, 28 Nov 2022 10:56:31 -0500
Subject: [PATCH 3697/4122] filemap: skip write and wait if end offset precedes
 start

Patch series "filemap: skip write and wait if end offset precedes start",
v2.

A fix for the odd write and wait behavior described in the patch 1 commit
log.  Technically patch 1 could simply remove the check rather than lift
it into the callers, but this seemed a bit more user friendly to me.
Patch 2 is appended after observation that fadvise() interacted poorly
with the v1 patch.  This is no longer a problem with v2, making patch 2
purely a cleanup.

This series survived both fstests and ltp regression runs without
observable problems.  I had (end < start) warning checks in each relevant
function, with fadvise() being the only caller that triggered them.  That
said, I dropped the warnings after testing because there seemed to much
potential for noise from the various other callers.


This patch (of 2):

A call to file[map]_write_and_wait_range() with an end offset that
precedes the start offset but happens to land in the same page can trigger
writeback submission but fails to wait on the submitted page.  Writeback
submission occurs because __filemap_fdatawrite_range() passes both offsets
down into write_cache_pages(), which rounds down to page indexes before it
starts processing writeback.  However, __filemap_fdatawait_range()
immediately returns if the byte-granular end offset precedes the start
offset.

This behavior was observed in the form of unpredictable latency from a
frequent write and wait call with incorrect parameters.  The behavior gave
the impression that the fdatawait path might occasionally fail to wait on
writeback, but further investigation showed the latency was from
write_cache_pages() waiting on writeback state to clear for a page already
under writeback.  Therefore, this indicated that fdatawait actually never
waits on writeback in this particular situation.

The byte granular check in __filemap_fdatawait_range() goes all the way
back to the old wait_on_page_writeback() helper.  It originally used page
offsets and so would have waited in this problematic case.  That changed
to byte granularity file offsets in commit 94004ed726f3 ("kill
wait_on_page_writeback_range"), which subtly changed this behavior.  The
check itself has become somewhat redundant since the error checking code
that used to follow the wait loop (at the time of the aforementioned
commit) has now been removed and lifted into the higher level callers.

Therefore, we can restore historical fdatawait behavior by simply removing
the check.  Since the current fdatawait behavior has been in place for
quite some time and is consistent with other interfaces that use file
offsets, instead lift the check into the file[map]_write_and_wait_range()
helpers to provide consistent behavior between the write and wait.

Link: https://lkml.kernel.org/r/20221128155632.3950447-1-bfoster@redhat.com
Link: https://lkml.kernel.org/r/20221128155632.3950447-2-bfoster@redhat.com
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/filemap.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index 65eee6ec10667..242cd8bd83304 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -506,9 +506,6 @@ static void __filemap_fdatawait_range(struct address_space *mapping,
 	struct pagevec pvec;
 	int nr_pages;
 
-	if (end_byte < start_byte)
-		return;
-
 	pagevec_init(&pvec);
 	while (index <= end) {
 		unsigned i;
@@ -670,6 +667,9 @@ int filemap_write_and_wait_range(struct address_space *mapping,
 {
 	int err = 0, err2;
 
+	if (lend < lstart)
+		return 0;
+
 	if (mapping_needs_writeback(mapping)) {
 		err = __filemap_fdatawrite_range(mapping, lstart, lend,
 						 WB_SYNC_ALL);
@@ -770,6 +770,9 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
 	int err = 0, err2;
 	struct address_space *mapping = file->f_mapping;
 
+	if (lend < lstart)
+		return 0;
+
 	if (mapping_needs_writeback(mapping)) {
 		err = __filemap_fdatawrite_range(mapping, lstart, lend,
 						 WB_SYNC_ALL);
-- 
GitLab


From 3cd629e5775397103e0428f62ce64747741dbfe5 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Mon, 28 Nov 2022 10:56:32 -0500
Subject: [PATCH 3698/4122] mm/fadvise: use LLONG_MAX instead of -1 for eof

generic_fadvise() sets endbyte = -1 to specify end of file (i.e.  if
length == 0 is passed from userspace).  Most other callers to
filemap_fdatawrite_range() use LLONG_MAX for this purpose, particularly if
they also call fdatawait_range() (which requires end >= start).  For
example, sync_file_range(), vfs_fsync() (where the range is passed down
through per-fs ->fsync() callbacks), filemap_flush(), etc.
generic_fadvise() does not currently wait on writeback, but fix the call
up to be consistent with other callers.

Link: https://lkml.kernel.org/r/20221128155632.3950447-3-bfoster@redhat.com
Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/fadvise.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/fadvise.c b/mm/fadvise.c
index c76ee665355a4..bf04fec87f356 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -72,7 +72,7 @@ int generic_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
 	 */
 	endbyte = (u64)offset + (u64)len;
 	if (!len || endbyte < len)
-		endbyte = -1;
+		endbyte = LLONG_MAX;
 	else
 		endbyte--;		/* inclusive */
 
-- 
GitLab


From d3a89233583bf8edab18ac09732759c71dbe0173 Mon Sep 17 00:00:00 2001
From: zhang songyi <zhang.songyi@zte.com.cn>
Date: Mon, 28 Nov 2022 21:07:43 +0800
Subject: [PATCH 3699/4122] include/linux/pgtable.h: : remove redundant pte
 variable

Return value from ptep_get_and_clear_full() directly instead of taking
this in another redundant variable.

Link: https://lkml.kernel.org/r/202211282107437343474@zte.com.cn
Signed-off-by: zhang songyi <zhang.songyi@zte.com.cn>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/pgtable.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c74cce67eec85..dfabd549d2e71 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -425,9 +425,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
 					    unsigned long address, pte_t *ptep,
 					    int full)
 {
-	pte_t pte;
-	pte = ptep_get_and_clear(mm, address, ptep);
-	return pte;
+	return ptep_get_and_clear(mm, address, ptep);
 }
 #endif
 
-- 
GitLab


From 1e8e4a7cc2fa3017b1daf02612e095d51924ce1e Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 28 Nov 2022 10:45:40 +0100
Subject: [PATCH 3700/4122] lockdep: allow instrumenting lockdep.c with KMSAN

Lockdep and KMSAN used to play badly together, causing deadlocks when
KMSAN instrumentation of lockdep.c called lockdep functions recursively.

Looks like this is no more the case, and a kernel can run (yet slower)
with both KMSAN and lockdep enabled.  This patch should fix false
positives on wq_head->lock->dep_map, which KMSAN used to consider
uninitialized because of lockdep.c not being instrumented.

Link: https://lore.kernel.org/lkml/Y3b9AAEKp2Vr3e6O@sol.localdomain/
Link: https://lkml.kernel.org/r/20221128094541.2645890-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Reported-by: Eric Biggers <ebiggers@kernel.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Marco Elver <elver@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 kernel/locking/Makefile | 1 -
 1 file changed, 1 deletion(-)

diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index ea925731fa40f..0db4093d17b8a 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -7,7 +7,6 @@ obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o
 
 # Avoid recursion lockdep -> sanitizer -> ... -> lockdep.
 KCSAN_SANITIZE_lockdep.o := n
-KMSAN_SANITIZE_lockdep.o := n
 
 ifdef CONFIG_FUNCTION_TRACER
 CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
-- 
GitLab


From 85716a80c16dd6b6d1aaed87cd4b91c9b1d9b9b2 Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 28 Nov 2022 10:45:41 +0100
Subject: [PATCH 3701/4122] kmsan: allow using __msan_instrument_asm_store()
 inside runtime

In certain cases (e.g.  when handling a softirq)
__msan_instrument_asm_store(&var, sizeof(var)) may be called with from
within KMSAN runtime, but later the value of @var is used with
!kmsan_in_runtime(), leading to false positives.

Because kmsan_internal_unpoison_memory() doesn't take locks, it should be
fine to call it without kmsan_in_runtime() checks, which fixes the
mentioned false positives.

Link: https://lkml.kernel.org/r/20221128094541.2645890-2-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Eric Biggers <ebiggers@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Marco Elver <elver@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmsan/instrumentation.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c
index 271f135f97a16..770fe02904f36 100644
--- a/mm/kmsan/instrumentation.c
+++ b/mm/kmsan/instrumentation.c
@@ -81,12 +81,16 @@ DECLARE_METADATA_PTR_GETTER(8);
  * Handle a memory store performed by inline assembly. KMSAN conservatively
  * attempts to unpoison the outputs of asm() directives to prevent false
  * positives caused by missed stores.
+ *
+ * __msan_instrument_asm_store() may be called for inline assembly code when
+ * entering or leaving IRQ. We omit the check for kmsan_in_runtime() to ensure
+ * the memory written to in these cases is also marked as initialized.
  */
 void __msan_instrument_asm_store(void *addr, uintptr_t size)
 {
 	unsigned long ua_flags;
 
-	if (!kmsan_enabled || kmsan_in_runtime())
+	if (!kmsan_enabled)
 		return;
 
 	ua_flags = user_access_save();
@@ -103,10 +107,8 @@ void __msan_instrument_asm_store(void *addr, uintptr_t size)
 		user_access_restore(ua_flags);
 		return;
 	}
-	kmsan_enter_runtime();
 	/* Unpoisoning the memory on best effort. */
 	kmsan_internal_unpoison_memory(addr, size, /*checked*/ false);
-	kmsan_leave_runtime();
 	user_access_restore(ua_flags);
 }
 EXPORT_SYMBOL(__msan_instrument_asm_store);
-- 
GitLab


From 22c4e80466eb88cff283ed50a5d0b0ff1654d0c3 Mon Sep 17 00:00:00 2001
From: Feiyang Chen <chenfeiyang@loongson.cn>
Date: Thu, 27 Oct 2022 20:52:50 +0800
Subject: [PATCH 3702/4122] MIPS&LoongArch&NIOS2: adjust prototypes of
 p?d_init()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Patch series "mm/sparse-vmemmap: Generalise helpers and enable for
LoongArch", v14.

This series is in order to enable sparse-vmemmap for LoongArch.  But
LoongArch cannot use generic helpers directly because MIPS&LoongArch need
to call pgd_init()/pud_init()/pmd_init() when populating page tables.  So
we adjust the prototypes of p?d_init() to make generic helpers can call
them, then enable sparse-vmemmap with generic helpers, and to be further,
generalise vmemmap_populate_hugepages() for ARM64, X86 and LoongArch.


This patch (of 4):

We are preparing to add sparse vmemmap support to LoongArch.  MIPS and
LoongArch need to call pgd_init()/pud_init()/pmd_init() when populating
page tables, so adjust their prototypes to make generic helpers can call
them.

NIOS2 declares pmd_init() but doesn't use, just remove it to avoid build
errors.

Link: https://lkml.kernel.org/r/20221027125253.3458989-1-chenhuacai@loongson.cn
Link: https://lkml.kernel.org/r/20221027125253.3458989-2-chenhuacai@loongson.cn
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Reviewed-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Will Deacon <will@kernel.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Cc: Xuerui Wang <kernel@xen0n.name>
Cc: Min Zhou <zhoumin@loongson.cn>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/loongarch/include/asm/pgalloc.h | 13 ++-----------
 arch/loongarch/include/asm/pgtable.h |  8 ++++----
 arch/loongarch/kernel/numa.c         |  4 ++--
 arch/loongarch/mm/pgtable.c          | 23 +++++++++++++----------
 arch/mips/include/asm/pgalloc.h      | 10 +++++-----
 arch/mips/include/asm/pgtable-64.h   |  8 ++++----
 arch/mips/kvm/mmu.c                  |  3 +--
 arch/mips/mm/pgtable-32.c            |  9 ++++-----
 arch/mips/mm/pgtable-64.c            | 18 ++++++++++--------
 arch/mips/mm/pgtable.c               |  2 +-
 arch/nios2/include/asm/pgalloc.h     |  5 -----
 11 files changed, 46 insertions(+), 57 deletions(-)

diff --git a/arch/loongarch/include/asm/pgalloc.h b/arch/loongarch/include/asm/pgalloc.h
index 4bfeb3c9c9acc..af1d1e4a69659 100644
--- a/arch/loongarch/include/asm/pgalloc.h
+++ b/arch/loongarch/include/asm/pgalloc.h
@@ -42,15 +42,6 @@ static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 
 extern void pagetable_init(void);
 
-/*
- * Initialize a new pmd table with invalid pointers.
- */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
-
-/*
- * Initialize a new pgd / pmd table with invalid pointers.
- */
-extern void pgd_init(unsigned long page);
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 #define __pte_free_tlb(tlb, pte, address)			\
@@ -76,7 +67,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 	}
 
 	pmd = (pmd_t *)page_address(pg);
-	pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+	pmd_init(pmd);
 	return pmd;
 }
 
@@ -92,7 +83,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 	pud = (pud_t *) __get_free_page(GFP_KERNEL);
 	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+		pud_init(pud);
 	return pud;
 }
 
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index b8d837ee69101..9e6651846db95 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -237,11 +237,11 @@ extern void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pm
 #define pfn_pmd(pfn, prot)	__pmd(((pfn) << _PFN_SHIFT) | pgprot_val(prot))
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);
 
 /*
  * Non-present pages:  high 40 bits are offset, next 8 bits type,
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index a13f92593cfda..eb5d3a4c8a7ad 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -78,7 +78,7 @@ void __init pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		pgd_populate(&init_mm, pgd, new);
 #ifndef __PAGETABLE_PUD_FOLDED
-		pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+		pud_init(new);
 #endif
 	}
 
@@ -89,7 +89,7 @@ void __init pcpu_populate_pte(unsigned long addr)
 		new = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
 		pud_populate(&init_mm, pud, new);
 #ifndef __PAGETABLE_PMD_FOLDED
-		pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+		pmd_init(new);
 #endif
 	}
 
diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c
index ee179ccd3e3f2..36a6dc0148aef 100644
--- a/arch/loongarch/mm/pgtable.c
+++ b/arch/loongarch/mm/pgtable.c
@@ -16,7 +16,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	ret = (pgd_t *) __get_free_page(GFP_KERNEL);
 	if (ret) {
 		init = pgd_offset(&init_mm, 0UL);
-		pgd_init((unsigned long)ret);
+		pgd_init(ret);
 		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 	}
@@ -25,7 +25,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(pgd_alloc);
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
 	unsigned long *p, *end;
 	unsigned long entry;
@@ -38,7 +38,7 @@ void pgd_init(unsigned long page)
 	entry = (unsigned long)invalid_pte_table;
 #endif
 
-	p = (unsigned long *) page;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PGD;
 
 	do {
@@ -56,11 +56,12 @@ void pgd_init(unsigned long page)
 EXPORT_SYMBOL_GPL(pgd_init);
 
 #ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pte_table;
 
-	p = (unsigned long *) addr;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PMD;
 
 	do {
@@ -79,9 +80,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
 #ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pmd_table;
 
 	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PUD;
@@ -98,6 +100,7 @@ void pud_init(unsigned long addr, unsigned long pagetable)
 		p[-1] = pagetable;
 	} while (p != end);
 }
+EXPORT_SYMBOL_GPL(pud_init);
 #endif
 
 pmd_t mk_pmd(struct page *page, pgprot_t prot)
@@ -119,12 +122,12 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
 void __init pagetable_init(void)
 {
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
-	pgd_init((unsigned long)invalid_pg_dir);
+	pgd_init(swapper_pg_dir);
+	pgd_init(invalid_pg_dir);
 #ifndef __PAGETABLE_PUD_FOLDED
-	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+	pud_init(invalid_pud_table);
 #endif
 #ifndef __PAGETABLE_PMD_FOLDED
-	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+	pmd_init(invalid_pmd_table);
 #endif
 }
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index 796035784c733..f72e737dda214 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -33,7 +33,7 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 /*
  * Initialize a new pmd table with invalid pointers.
  */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pmd_init(void *addr);
 
 #ifndef __PAGETABLE_PMD_FOLDED
 
@@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 #endif
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
+extern void pgd_init(void *addr);
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
@@ -77,7 +77,7 @@ static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long address)
 	}
 
 	pmd = (pmd_t *)page_address(pg);
-	pmd_init((unsigned long)pmd, (unsigned long)invalid_pte_table);
+	pmd_init(pmd);
 	return pmd;
 }
 
@@ -93,7 +93,7 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address)
 
 	pud = (pud_t *) __get_free_pages(GFP_KERNEL, PUD_TABLE_ORDER);
 	if (pud)
-		pud_init((unsigned long)pud, (unsigned long)invalid_pmd_table);
+		pud_init(pud);
 	return pud;
 }
 
diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h
index 436c29d698fa3..c6310192b654d 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -313,11 +313,11 @@ static inline pmd_t *pud_pgtable(pud_t pud)
 #endif
 
 /*
- * Initialize a new pgd / pmd table with invalid pointers.
+ * Initialize a new pgd / pud / pmd table with invalid pointers.
  */
-extern void pgd_init(unsigned long page);
-extern void pud_init(unsigned long page, unsigned long pagetable);
-extern void pmd_init(unsigned long page, unsigned long pagetable);
+extern void pgd_init(void *addr);
+extern void pud_init(void *addr);
+extern void pmd_init(void *addr);
 
 /*
  * Non-present pages:  high 40 bits are offset, next 8 bits type,
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index 74cd64a24d059..e8c08988ed377 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -122,8 +122,7 @@ static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache,
 		if (!cache)
 			return NULL;
 		new_pmd = kvm_mmu_memory_cache_alloc(cache);
-		pmd_init((unsigned long)new_pmd,
-			 (unsigned long)invalid_pte_table);
+		pmd_init(new_pmd);
 		pud_populate(NULL, pud, new_pmd);
 	}
 	pmd = pmd_offset(pud, addr);
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 61891af250197..f57fb69472f84 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -13,9 +13,9 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
-	unsigned long *p = (unsigned long *) page;
+	unsigned long *p = (unsigned long *)addr;
 	int i;
 
 	for (i = 0; i < USER_PTRS_PER_PGD; i+=8) {
@@ -61,9 +61,8 @@ void __init pagetable_init(void)
 #endif
 
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
-	pgd_init((unsigned long)swapper_pg_dir
-		 + sizeof(pgd_t) * USER_PTRS_PER_PGD);
+	pgd_init(swapper_pg_dir);
+	pgd_init(&swapper_pg_dir[USER_PTRS_PER_PGD]);
 
 	pgd_base = swapper_pg_dir;
 
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 7536f7804c440..b4386a0e2ef87 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -13,7 +13,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
-void pgd_init(unsigned long page)
+void pgd_init(void *addr)
 {
 	unsigned long *p, *end;
 	unsigned long entry;
@@ -26,7 +26,7 @@ void pgd_init(unsigned long page)
 	entry = (unsigned long)invalid_pte_table;
 #endif
 
-	p = (unsigned long *) page;
+	p = (unsigned long *) addr;
 	end = p + PTRS_PER_PGD;
 
 	do {
@@ -43,11 +43,12 @@ void pgd_init(unsigned long page)
 }
 
 #ifndef __PAGETABLE_PMD_FOLDED
-void pmd_init(unsigned long addr, unsigned long pagetable)
+void pmd_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pte_table;
 
-	p = (unsigned long *) addr;
+	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PMD;
 
 	do {
@@ -66,9 +67,10 @@ EXPORT_SYMBOL_GPL(pmd_init);
 #endif
 
 #ifndef __PAGETABLE_PUD_FOLDED
-void pud_init(unsigned long addr, unsigned long pagetable)
+void pud_init(void *addr)
 {
 	unsigned long *p, *end;
+	unsigned long pagetable = (unsigned long)invalid_pmd_table;
 
 	p = (unsigned long *)addr;
 	end = p + PTRS_PER_PUD;
@@ -108,12 +110,12 @@ void __init pagetable_init(void)
 	pgd_t *pgd_base;
 
 	/* Initialize the entire pgd.  */
-	pgd_init((unsigned long)swapper_pg_dir);
+	pgd_init(swapper_pg_dir);
 #ifndef __PAGETABLE_PUD_FOLDED
-	pud_init((unsigned long)invalid_pud_table, (unsigned long)invalid_pmd_table);
+	pud_init(invalid_pud_table);
 #endif
 #ifndef __PAGETABLE_PMD_FOLDED
-	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+	pmd_init(invalid_pmd_table);
 #endif
 	pgd_base = swapper_pg_dir;
 	/*
diff --git a/arch/mips/mm/pgtable.c b/arch/mips/mm/pgtable.c
index 3b7590660a04b..b13314be5d0e5 100644
--- a/arch/mips/mm/pgtable.c
+++ b/arch/mips/mm/pgtable.c
@@ -15,7 +15,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
 	ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_TABLE_ORDER);
 	if (ret) {
 		init = pgd_offset(&init_mm, 0UL);
-		pgd_init((unsigned long)ret);
+		pgd_init(ret);
 		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
 	}
diff --git a/arch/nios2/include/asm/pgalloc.h b/arch/nios2/include/asm/pgalloc.h
index 3c4ae74d57985..ecd1657bb2ced 100644
--- a/arch/nios2/include/asm/pgalloc.h
+++ b/arch/nios2/include/asm/pgalloc.h
@@ -26,11 +26,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 	set_pmd(pmd, __pmd((unsigned long)page_address(pte)));
 }
 
-/*
- * Initialize a new pmd table with invalid pointers.
- */
-extern void pmd_init(unsigned long page, unsigned long pagetable);
-
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
 #define __pte_free_tlb(tlb, pte, addr)				\
-- 
GitLab


From 7b09f5af01ede480cbe7abcb281cf17550a46ff5 Mon Sep 17 00:00:00 2001
From: Feiyang Chen <chenfeiyang@loongson.cn>
Date: Thu, 27 Oct 2022 20:52:51 +0800
Subject: [PATCH 3703/4122] LoongArch: add sparse memory vmemmap support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add sparse memory vmemmap support for LoongArch.  SPARSEMEM_VMEMMAP uses a
virtually mapped memmap to optimise pfn_to_page and page_to_pfn
operations.  This is the most efficient option when sufficient kernel
resources are available.

Link: https://lkml.kernel.org/r/20221027125253.3458989-3-chenhuacai@loongson.cn
Signed-off-by: Min Zhou <zhoumin@loongson.cn>
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Will Deacon <will@kernel.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Cc: Xuerui Wang <kernel@xen0n.name>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/loongarch/Kconfig                 |  1 +
 arch/loongarch/include/asm/pgtable.h   |  7 ++-
 arch/loongarch/include/asm/sparsemem.h |  8 +++
 arch/loongarch/mm/init.c               | 72 ++++++++++++++++++++++++--
 include/linux/mm.h                     |  2 +
 mm/sparse-vmemmap.c                    | 10 ++++
 6 files changed, 96 insertions(+), 4 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903096bd87f88..6f7fa0c0ca08a 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -487,6 +487,7 @@ config ARCH_FLATMEM_ENABLE
 
 config ARCH_SPARSEMEM_ENABLE
 	def_bool y
+	select SPARSEMEM_VMEMMAP_ENABLE
 	help
 	  Say Y to support efficient handling of sparse physical memory,
 	  for architectures which are either NUMA (Non-Uniform Memory Access)
diff --git a/arch/loongarch/include/asm/pgtable.h b/arch/loongarch/include/asm/pgtable.h
index 9e6651846db95..022ec6be3602f 100644
--- a/arch/loongarch/include/asm/pgtable.h
+++ b/arch/loongarch/include/asm/pgtable.h
@@ -11,6 +11,7 @@
 
 #include <linux/compiler.h>
 #include <asm/addrspace.h>
+#include <asm/page.h>
 #include <asm/pgtable-bits.h>
 
 #if CONFIG_PGTABLE_LEVELS == 2
@@ -59,6 +60,7 @@
 #include <linux/mm_types.h>
 #include <linux/mmzone.h>
 #include <asm/fixmap.h>
+#include <asm/sparsemem.h>
 
 struct mm_struct;
 struct vm_area_struct;
@@ -86,7 +88,10 @@ extern unsigned long zero_page_mask;
 #define VMALLOC_START	MODULES_END
 #define VMALLOC_END	\
 	(vm_map_base +	\
-	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE)
+	 min(PTRS_PER_PGD * PTRS_PER_PUD * PTRS_PER_PMD * PTRS_PER_PTE * PAGE_SIZE, (1UL << cpu_vabits)) - PMD_SIZE - VMEMMAP_SIZE)
+
+#define vmemmap		((struct page *)((VMALLOC_END + PMD_SIZE) & PMD_MASK))
+#define VMEMMAP_END	((unsigned long)vmemmap + VMEMMAP_SIZE - 1)
 
 #define pte_ERROR(e) \
 	pr_err("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e))
diff --git a/arch/loongarch/include/asm/sparsemem.h b/arch/loongarch/include/asm/sparsemem.h
index 3d18cdf1b0693..8d4af6aff8a8f 100644
--- a/arch/loongarch/include/asm/sparsemem.h
+++ b/arch/loongarch/include/asm/sparsemem.h
@@ -11,8 +11,16 @@
 #define SECTION_SIZE_BITS	29 /* 2^29 = Largest Huge Page Size */
 #define MAX_PHYSMEM_BITS	48
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+#define VMEMMAP_SIZE	(sizeof(struct page) * (1UL << (cpu_pabits + 1 - PAGE_SHIFT)))
+#endif
+
 #endif /* CONFIG_SPARSEMEM */
 
+#ifndef VMEMMAP_SIZE
+#define VMEMMAP_SIZE	0	/* 1, For FLATMEM; 2, For SPARSEMEM without VMEMMAP. */
+#endif
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 int memory_add_physaddr_to_nid(u64 addr);
 #define memory_add_physaddr_to_nid memory_add_physaddr_to_nid
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 080061793c859..451d93667bcce 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -22,7 +22,7 @@
 #include <linux/pfn.h>
 #include <linux/hardirq.h>
 #include <linux/gfp.h>
-#include <linux/initrd.h>
+#include <linux/hugetlb.h>
 #include <linux/mmzone.h>
 
 #include <asm/asm-offsets.h>
@@ -152,6 +152,72 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 #endif
 
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+static int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+						int node, struct vmem_altmap *altmap)
+{
+	unsigned long addr = start;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (addr = start; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+		pud = vmemmap_pud_populate(p4d, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(*pmd)) {
+			void *p = NULL;
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
+			if (p) {
+				pmd_t entry;
+
+				entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+				pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+				set_pmd_at(&init_mm, addr, pmd, entry);
+
+				continue;
+			}
+		} else if (pmd_val(*pmd) & _PAGE_HUGE) {
+			vmemmap_verify((pte_t *)pmd, node, addr, next);
+			continue;
+		}
+		if (vmemmap_populate_basepages(addr, next, node, NULL))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+int __meminit vmemmap_populate(unsigned long start, unsigned long end,
+			       int node, struct vmem_altmap *altmap)
+{
+#if CONFIG_PGTABLE_LEVELS == 2
+	return vmemmap_populate_basepages(start, end, node, NULL);
+#else
+	return vmemmap_populate_hugepages(start, end, node, NULL);
+#endif
+}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+void vmemmap_free(unsigned long start, unsigned long end, struct vmem_altmap *altmap)
+{
+}
+#endif
+#endif
+
 static pte_t *fixmap_pte(unsigned long addr)
 {
 	pgd_t *pgd;
@@ -168,7 +234,7 @@ static pte_t *fixmap_pte(unsigned long addr)
 		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 		pgd_populate(&init_mm, pgd, new);
 #ifndef __PAGETABLE_PUD_FOLDED
-		pud_init((unsigned long)new, (unsigned long)invalid_pmd_table);
+		pud_init(new);
 #endif
 	}
 
@@ -179,7 +245,7 @@ static pte_t *fixmap_pte(unsigned long addr)
 		new = memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
 		pud_populate(&init_mm, pud, new);
 #ifndef __PAGETABLE_PMD_FOLDED
-		pmd_init((unsigned long)new, (unsigned long)invalid_pte_table);
+		pmd_init(new);
 #endif
 	}
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 767c8c522e709..7c31f898337ce 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3360,6 +3360,8 @@ void *sparse_buffer_alloc(unsigned long size);
 struct page * __populate_section_memmap(unsigned long pfn,
 		unsigned long nr_pages, int nid, struct vmem_altmap *altmap,
 		struct dev_pagemap *pgmap);
+void pmd_init(void *addr);
+void pud_init(void *addr);
 pgd_t *vmemmap_pgd_populate(unsigned long addr, int node);
 p4d_t *vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node);
 pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 46ae542118c09..797b30e9050c5 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -196,6 +196,10 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node)
 	return pmd;
 }
 
+void __weak __meminit pmd_init(void *addr)
+{
+}
+
 pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 {
 	pud_t *pud = pud_offset(p4d, addr);
@@ -203,11 +207,16 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
+		pmd_init(p);
 		pud_populate(&init_mm, pud, p);
 	}
 	return pud;
 }
 
+void __weak __meminit pud_init(void *addr)
+{
+}
+
 p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
 {
 	p4d_t *p4d = p4d_offset(pgd, addr);
@@ -215,6 +224,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node)
 		void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
 		if (!p)
 			return NULL;
+		pud_init(p);
 		p4d_populate(&init_mm, p4d, p);
 	}
 	return p4d;
-- 
GitLab


From 2045a3b8911b6ee64dd9b522d61abc468ecdcdb5 Mon Sep 17 00:00:00 2001
From: Feiyang Chen <chenfeiyang@loongson.cn>
Date: Thu, 27 Oct 2022 20:52:52 +0800
Subject: [PATCH 3704/4122] mm/sparse-vmemmap: generalise
 vmemmap_populate_hugepages()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Generalise vmemmap_populate_hugepages() so ARM64 & X86 & LoongArch can
share its implementation.

Link: https://lkml.kernel.org/r/20221027125253.3458989-4-chenhuacai@loongson.cn
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Acked-by: Will Deacon <will@kernel.org>
Acked-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Min Zhou <zhoumin@loongson.cn>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Philippe Mathieu-Daudé <philmd@linaro.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Cc: Xuerui Wang <kernel@xen0n.name>
Cc: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/arm64/mm/mmu.c      | 55 +++++++-----------------
 arch/loongarch/mm/init.c | 59 +++++++-------------------
 arch/x86/mm/init_64.c    | 92 ++++++++++++++--------------------------
 include/linux/mm.h       |  6 +++
 mm/sparse-vmemmap.c      | 63 +++++++++++++++++++++++++++
 5 files changed, 132 insertions(+), 143 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 556154d821bf4..27217ba12e576 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1137,53 +1137,28 @@ static void free_empty_tables(unsigned long addr, unsigned long end,
 }
 #endif
 
+void __meminit vmemmap_set_pmd(pmd_t *pmdp, void *p, int node,
+			       unsigned long addr, unsigned long next)
+{
+	pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmdp, int node,
+				unsigned long addr, unsigned long next)
+{
+	vmemmap_verify((pte_t *)pmdp, node, addr, next);
+	return 1;
+}
+
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap)
 {
-	unsigned long addr = start;
-	unsigned long next;
-	pgd_t *pgdp;
-	p4d_t *p4dp;
-	pud_t *pudp;
-	pmd_t *pmdp;
-
 	WARN_ON((start < VMEMMAP_START) || (end > VMEMMAP_END));
 
 	if (!ARM64_KERNEL_USES_PMD_MAPS)
 		return vmemmap_populate_basepages(start, end, node, altmap);
-
-	do {
-		next = pmd_addr_end(addr, end);
-
-		pgdp = vmemmap_pgd_populate(addr, node);
-		if (!pgdp)
-			return -ENOMEM;
-
-		p4dp = vmemmap_p4d_populate(pgdp, addr, node);
-		if (!p4dp)
-			return -ENOMEM;
-
-		pudp = vmemmap_pud_populate(p4dp, addr, node);
-		if (!pudp)
-			return -ENOMEM;
-
-		pmdp = pmd_offset(pudp, addr);
-		if (pmd_none(READ_ONCE(*pmdp))) {
-			void *p = NULL;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-			if (!p) {
-				if (vmemmap_populate_basepages(addr, next, node, altmap))
-					return -ENOMEM;
-				continue;
-			}
-
-			pmd_set_huge(pmdp, __pa(p), __pgprot(PROT_SECT_NORMAL));
-		} else
-			vmemmap_verify((pte_t *)pmdp, node, addr, next);
-	} while (addr = next, addr != end);
-
-	return 0;
+	else
+		return vmemmap_populate_hugepages(start, end, node, altmap);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/arch/loongarch/mm/init.c b/arch/loongarch/mm/init.c
index 451d93667bcce..e018aed345866 100644
--- a/arch/loongarch/mm/init.c
+++ b/arch/loongarch/mm/init.c
@@ -153,52 +153,25 @@ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
 #endif
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
-static int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
-						int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
 {
-	unsigned long addr = start;
-	unsigned long next;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
+	pmd_t entry;
 
-	for (addr = start; addr < end; addr = next) {
-		next = pmd_addr_end(addr, end);
-
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
-			return -ENOMEM;
-		p4d = vmemmap_p4d_populate(pgd, addr, node);
-		if (!p4d)
-			return -ENOMEM;
-		pud = vmemmap_pud_populate(p4d, addr, node);
-		if (!pud)
-			return -ENOMEM;
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			void *p = NULL;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, NULL);
-			if (p) {
-				pmd_t entry;
-
-				entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
-				pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
-				set_pmd_at(&init_mm, addr, pmd, entry);
-
-				continue;
-			}
-		} else if (pmd_val(*pmd) & _PAGE_HUGE) {
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
-			continue;
-		}
-		if (vmemmap_populate_basepages(addr, next, node, NULL))
-			return -ENOMEM;
-	}
+	entry = pfn_pmd(virt_to_pfn(p), PAGE_KERNEL);
+	pmd_val(entry) |= _PAGE_HUGE | _PAGE_HGLOBAL;
+	set_pmd_at(&init_mm, addr, pmd, entry);
+}
+
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
+				unsigned long addr, unsigned long next)
+{
+	int huge = pmd_val(*pmd) & _PAGE_HUGE;
+
+	if (huge)
+		vmemmap_verify((pte_t *)pmd, node, addr, next);
 
-	return 0;
+	return huge;
 }
 
 int __meminit vmemmap_populate(unsigned long start, unsigned long end,
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e8db4edd7cc9b..a190aae8ceaf7 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1492,72 +1492,44 @@ static long __meminitdata addr_start, addr_end;
 static void __meminitdata *p_start, *p_end;
 static int __meminitdata node_start;
 
-static int __meminit vmemmap_populate_hugepages(unsigned long start,
-		unsigned long end, int node, struct vmem_altmap *altmap)
+void __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+			       unsigned long addr, unsigned long next)
 {
-	unsigned long addr;
-	unsigned long next;
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-
-	for (addr = start; addr < end; addr = next) {
-		next = pmd_addr_end(addr, end);
-
-		pgd = vmemmap_pgd_populate(addr, node);
-		if (!pgd)
-			return -ENOMEM;
-
-		p4d = vmemmap_p4d_populate(pgd, addr, node);
-		if (!p4d)
-			return -ENOMEM;
-
-		pud = vmemmap_pud_populate(p4d, addr, node);
-		if (!pud)
-			return -ENOMEM;
-
-		pmd = pmd_offset(pud, addr);
-		if (pmd_none(*pmd)) {
-			void *p;
-
-			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
-			if (p) {
-				pte_t entry;
-
-				entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
-						PAGE_KERNEL_LARGE);
-				set_pmd(pmd, __pmd(pte_val(entry)));
+	pte_t entry;
+
+	entry = pfn_pte(__pa(p) >> PAGE_SHIFT,
+			PAGE_KERNEL_LARGE);
+	set_pmd(pmd, __pmd(pte_val(entry)));
+
+	/* check to see if we have contiguous blocks */
+	if (p_end != p || node_start != node) {
+		if (p_start)
+			pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+				addr_start, addr_end-1, p_start, p_end-1, node_start);
+		addr_start = addr;
+		node_start = node;
+		p_start = p;
+	}
 
-				/* check to see if we have contiguous blocks */
-				if (p_end != p || node_start != node) {
-					if (p_start)
-						pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
-						       addr_start, addr_end-1, p_start, p_end-1, node_start);
-					addr_start = addr;
-					node_start = node;
-					p_start = p;
-				}
+	addr_end = addr + PMD_SIZE;
+	p_end = p + PMD_SIZE;
 
-				addr_end = addr + PMD_SIZE;
-				p_end = p + PMD_SIZE;
+	if (!IS_ALIGNED(addr, PMD_SIZE) ||
+		!IS_ALIGNED(next, PMD_SIZE))
+		vmemmap_use_new_sub_pmd(addr, next);
+}
 
-				if (!IS_ALIGNED(addr, PMD_SIZE) ||
-				    !IS_ALIGNED(next, PMD_SIZE))
-					vmemmap_use_new_sub_pmd(addr, next);
+int __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
+				unsigned long addr, unsigned long next)
+{
+	int large = pmd_large(*pmd);
 
-				continue;
-			} else if (altmap)
-				return -ENOMEM; /* no fallback */
-		} else if (pmd_large(*pmd)) {
-			vmemmap_verify((pte_t *)pmd, node, addr, next);
-			vmemmap_use_sub_pmd(addr, next);
-			continue;
-		}
-		if (vmemmap_populate_basepages(addr, next, node, NULL))
-			return -ENOMEM;
+	if (pmd_large(*pmd)) {
+		vmemmap_verify((pte_t *)pmd, node, addr, next);
+		vmemmap_use_sub_pmd(addr, next);
 	}
-	return 0;
+
+	return large;
 }
 
 int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7c31f898337ce..472cb60ace075 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3373,8 +3373,14 @@ struct vmem_altmap;
 void *vmemmap_alloc_block_buf(unsigned long size, int node,
 			      struct vmem_altmap *altmap);
 void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
+void vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+		     unsigned long addr, unsigned long next);
+int vmemmap_check_pmd(pmd_t *pmd, int node,
+		      unsigned long addr, unsigned long next);
 int vmemmap_populate_basepages(unsigned long start, unsigned long end,
 			       int node, struct vmem_altmap *altmap);
+int vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+			       int node, struct vmem_altmap *altmap);
 int vmemmap_populate(unsigned long start, unsigned long end, int node,
 		struct vmem_altmap *altmap);
 void vmemmap_populate_print_last(void);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 797b30e9050c5..c5398a5960d05 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -295,6 +295,69 @@ int __meminit vmemmap_populate_basepages(unsigned long start, unsigned long end,
 	return vmemmap_populate_range(start, end, node, altmap, NULL);
 }
 
+void __weak __meminit vmemmap_set_pmd(pmd_t *pmd, void *p, int node,
+				      unsigned long addr, unsigned long next)
+{
+}
+
+int __weak __meminit vmemmap_check_pmd(pmd_t *pmd, int node,
+				       unsigned long addr, unsigned long next)
+{
+	return 0;
+}
+
+int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end,
+					 int node, struct vmem_altmap *altmap)
+{
+	unsigned long addr;
+	unsigned long next;
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	for (addr = start; addr < end; addr = next) {
+		next = pmd_addr_end(addr, end);
+
+		pgd = vmemmap_pgd_populate(addr, node);
+		if (!pgd)
+			return -ENOMEM;
+
+		p4d = vmemmap_p4d_populate(pgd, addr, node);
+		if (!p4d)
+			return -ENOMEM;
+
+		pud = vmemmap_pud_populate(p4d, addr, node);
+		if (!pud)
+			return -ENOMEM;
+
+		pmd = pmd_offset(pud, addr);
+		if (pmd_none(READ_ONCE(*pmd))) {
+			void *p;
+
+			p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+			if (p) {
+				vmemmap_set_pmd(pmd, p, node, addr, next);
+				continue;
+			} else if (altmap) {
+				/*
+				 * No fallback: In any case we care about, the
+				 * altmap should be reasonably sized and aligned
+				 * such that vmemmap_alloc_block_buf() will always
+				 * succeed. For consistency with the PTE case,
+				 * return an error here as failure could indicate
+				 * a configuration issue with the size of the altmap.
+				 */
+				return -ENOMEM;
+			}
+		} else if (vmemmap_check_pmd(pmd, node, addr, next))
+			continue;
+		if (vmemmap_populate_basepages(addr, next, node, altmap))
+			return -ENOMEM;
+	}
+	return 0;
+}
+
 /*
  * For compound pages bigger than section size (e.g. x86 1G compound
  * pages with 2M subsection size) fill the rest of sections as tail
-- 
GitLab


From c5a303a51b9ca85b52250fd8d92bf4918fbbdf0d Mon Sep 17 00:00:00 2001
From: Feiyang Chen <chenfeiyang@loongson.cn>
Date: Thu, 27 Oct 2022 20:52:53 +0800
Subject: [PATCH 3705/4122] LoongArch: enable
 ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The feature of minimizing overhead of struct page associated with each
HugeTLB page is implemented on x86_64.  However, the infrastructure of
this feature is already there, so just select ARCH_WANT_HUGETLB_PAGE_
OPTIMIZE_VMEMMAP is enough to enable this feature for LoongArch.

Link: https://lkml.kernel.org/r/20221027125253.3458989-5-chenhuacai@loongson.cn
Signed-off-by: Feiyang Chen <chenfeiyang@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Acked-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dinh Nguyen <dinguyen@kernel.org>
Cc: Guo Ren <guoren@kernel.org>
Cc: Jiaxun Yang <jiaxun.yang@flygoat.com>
Cc: Min Zhou <zhoumin@loongson.cn>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Will Deacon <will@kernel.org>
Cc: Xuefeng Li <lixuefeng@loongson.cn>
Cc: Xuerui Wang <kernel@xen0n.name>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 arch/loongarch/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 6f7fa0c0ca08a..0a6ef613124c6 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -52,6 +52,7 @@ config LOONGARCH
 	select ARCH_USE_QUEUED_RWLOCKS
 	select ARCH_USE_QUEUED_SPINLOCKS
 	select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
+	select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
 	select ARCH_WANT_LD_ORPHAN_WARN
 	select ARCH_WANTS_NO_INSTR
 	select BUILDTIME_TABLE_SORT
-- 
GitLab


From 3720dd6dcac38d03424d6ba38107f39af5318bcf Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 1 Nov 2022 10:53:22 -0700
Subject: [PATCH 3706/4122] filemap: convert replace_page_cache_page() to
 replace_page_cache_folio()

Patch series "Removing the lru_cache_add() wrapper".

This patchset replaces all calls of lru_cache_add() with the folio
equivalent: folio_add_lru().  This is allows us to get rid of the wrapper
The series passes xfstests and the userfaultfd selftests.


This patch (of 5):

Eliminates 7 calls to compound_head().

Link: https://lkml.kernel.org/r/20221101175326.13265-1-vishal.moola@gmail.com
Link: https://lkml.kernel.org/r/20221101175326.13265-2-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fuse/dev.c           |  2 +-
 include/linux/pagemap.h |  2 +-
 mm/filemap.c            | 52 ++++++++++++++++++++---------------------
 3 files changed, 27 insertions(+), 29 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index b4a6e0a1b945a..26817a2db4639 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -837,7 +837,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (WARN_ON(PageMlocked(oldpage)))
 		goto out_fallback_unlock;
 
-	replace_page_cache_page(oldpage, newpage);
+	replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
 
 	get_page(newpage);
 
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2ec0ca1f3d389..29e1f9e76eb6d 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -1102,7 +1102,7 @@ int filemap_add_folio(struct address_space *mapping, struct folio *folio,
 		pgoff_t index, gfp_t gfp);
 void filemap_remove_folio(struct folio *folio);
 void __filemap_remove_folio(struct folio *folio, void *shadow);
-void replace_page_cache_page(struct page *old, struct page *new);
+void replace_page_cache_folio(struct folio *old, struct folio *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
 				  struct folio_batch *fbatch);
 bool filemap_release_folio(struct folio *folio, gfp_t gfp);
diff --git a/mm/filemap.c b/mm/filemap.c
index 242cd8bd83304..c4d4ace9cc700 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -788,56 +788,54 @@ int file_write_and_wait_range(struct file *file, loff_t lstart, loff_t lend)
 EXPORT_SYMBOL(file_write_and_wait_range);
 
 /**
- * replace_page_cache_page - replace a pagecache page with a new one
- * @old:	page to be replaced
- * @new:	page to replace with
- *
- * This function replaces a page in the pagecache with a new one.  On
- * success it acquires the pagecache reference for the new page and
- * drops it for the old page.  Both the old and new pages must be
- * locked.  This function does not add the new page to the LRU, the
+ * replace_page_cache_folio - replace a pagecache folio with a new one
+ * @old:	folio to be replaced
+ * @new:	folio to replace with
+ *
+ * This function replaces a folio in the pagecache with a new one.  On
+ * success it acquires the pagecache reference for the new folio and
+ * drops it for the old folio.  Both the old and new folios must be
+ * locked.  This function does not add the new folio to the LRU, the
  * caller must do that.
  *
  * The remove + add is atomic.  This function cannot fail.
  */
-void replace_page_cache_page(struct page *old, struct page *new)
+void replace_page_cache_folio(struct folio *old, struct folio *new)
 {
-	struct folio *fold = page_folio(old);
-	struct folio *fnew = page_folio(new);
 	struct address_space *mapping = old->mapping;
 	void (*free_folio)(struct folio *) = mapping->a_ops->free_folio;
 	pgoff_t offset = old->index;
 	XA_STATE(xas, &mapping->i_pages, offset);
 
-	VM_BUG_ON_PAGE(!PageLocked(old), old);
-	VM_BUG_ON_PAGE(!PageLocked(new), new);
-	VM_BUG_ON_PAGE(new->mapping, new);
+	VM_BUG_ON_FOLIO(!folio_test_locked(old), old);
+	VM_BUG_ON_FOLIO(!folio_test_locked(new), new);
+	VM_BUG_ON_FOLIO(new->mapping, new);
 
-	get_page(new);
+	folio_get(new);
 	new->mapping = mapping;
 	new->index = offset;
 
-	mem_cgroup_migrate(fold, fnew);
+	mem_cgroup_migrate(old, new);
 
 	xas_lock_irq(&xas);
 	xas_store(&xas, new);
 
 	old->mapping = NULL;
 	/* hugetlb pages do not participate in page cache accounting. */
-	if (!PageHuge(old))
-		__dec_lruvec_page_state(old, NR_FILE_PAGES);
-	if (!PageHuge(new))
-		__inc_lruvec_page_state(new, NR_FILE_PAGES);
-	if (PageSwapBacked(old))
-		__dec_lruvec_page_state(old, NR_SHMEM);
-	if (PageSwapBacked(new))
-		__inc_lruvec_page_state(new, NR_SHMEM);
+	if (!folio_test_hugetlb(old))
+		__lruvec_stat_sub_folio(old, NR_FILE_PAGES);
+	if (!folio_test_hugetlb(new))
+		__lruvec_stat_add_folio(new, NR_FILE_PAGES);
+	if (folio_test_swapbacked(old))
+		__lruvec_stat_sub_folio(old, NR_SHMEM);
+	if (folio_test_swapbacked(new))
+		__lruvec_stat_add_folio(new, NR_SHMEM);
 	xas_unlock_irq(&xas);
 	if (free_folio)
-		free_folio(fold);
-	folio_put(fold);
+		free_folio(old);
+	folio_put(old);
 }
-EXPORT_SYMBOL_GPL(replace_page_cache_page);
+EXPORT_SYMBOL_GPL(replace_page_cache_folio);
 
 noinline int __filemap_add_folio(struct address_space *mapping,
 		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)
-- 
GitLab


From 063aaad792eef49a11d7575dc9914b43c0fa3792 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 1 Nov 2022 10:53:23 -0700
Subject: [PATCH 3707/4122] fuse: convert fuse_try_move_page() to use folios

Converts the function to try to move folios instead of pages. Also
converts fuse_check_page() to fuse_get_folio() since this is its only
caller. This change removes 15 calls to compound_head().

Link: https://lkml.kernel.org/r/20221101175326.13265-3-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Acked-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fuse/dev.c | 55 ++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 27 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 26817a2db4639..204c332cd3433 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -764,11 +764,11 @@ static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
 	return ncpy;
 }
 
-static int fuse_check_page(struct page *page)
+static int fuse_check_folio(struct folio *folio)
 {
-	if (page_mapcount(page) ||
-	    page->mapping != NULL ||
-	    (page->flags & PAGE_FLAGS_CHECK_AT_PREP &
+	if (folio_mapped(folio) ||
+	    folio->mapping != NULL ||
+	    (folio->flags & PAGE_FLAGS_CHECK_AT_PREP &
 	     ~(1 << PG_locked |
 	       1 << PG_referenced |
 	       1 << PG_uptodate |
@@ -778,7 +778,7 @@ static int fuse_check_page(struct page *page)
 	       1 << PG_reclaim |
 	       1 << PG_waiters |
 	       LRU_GEN_MASK | LRU_REFS_MASK))) {
-		dump_page(page, "fuse: trying to steal weird page");
+		dump_page(&folio->page, "fuse: trying to steal weird page");
 		return 1;
 	}
 	return 0;
@@ -787,11 +787,11 @@ static int fuse_check_page(struct page *page)
 static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 {
 	int err;
-	struct page *oldpage = *pagep;
-	struct page *newpage;
+	struct folio *oldfolio = page_folio(*pagep);
+	struct folio *newfolio;
 	struct pipe_buffer *buf = cs->pipebufs;
 
-	get_page(oldpage);
+	folio_get(oldfolio);
 	err = unlock_request(cs->req);
 	if (err)
 		goto out_put_old;
@@ -814,35 +814,36 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (!pipe_buf_try_steal(cs->pipe, buf))
 		goto out_fallback;
 
-	newpage = buf->page;
+	newfolio = page_folio(buf->page);
 
-	if (!PageUptodate(newpage))
-		SetPageUptodate(newpage);
+	if (!folio_test_uptodate(newfolio))
+		folio_mark_uptodate(newfolio);
 
-	ClearPageMappedToDisk(newpage);
+	folio_clear_mappedtodisk(newfolio);
 
-	if (fuse_check_page(newpage) != 0)
+	if (fuse_check_folio(newfolio) != 0)
 		goto out_fallback_unlock;
 
 	/*
 	 * This is a new and locked page, it shouldn't be mapped or
 	 * have any special flags on it
 	 */
-	if (WARN_ON(page_mapped(oldpage)))
+	if (WARN_ON(folio_mapped(oldfolio)))
 		goto out_fallback_unlock;
-	if (WARN_ON(page_has_private(oldpage)))
+	if (WARN_ON(folio_has_private(oldfolio)))
 		goto out_fallback_unlock;
-	if (WARN_ON(PageDirty(oldpage) || PageWriteback(oldpage)))
+	if (WARN_ON(folio_test_dirty(oldfolio) ||
+				folio_test_writeback(oldfolio)))
 		goto out_fallback_unlock;
-	if (WARN_ON(PageMlocked(oldpage)))
+	if (WARN_ON(folio_test_mlocked(oldfolio)))
 		goto out_fallback_unlock;
 
-	replace_page_cache_folio(page_folio(oldpage), page_folio(newpage));
+	replace_page_cache_folio(oldfolio, newfolio);
 
-	get_page(newpage);
+	folio_get(newfolio);
 
 	if (!(buf->flags & PIPE_BUF_FLAG_LRU))
-		lru_cache_add(newpage);
+		folio_add_lru(newfolio);
 
 	/*
 	 * Release while we have extra ref on stolen page.  Otherwise
@@ -855,28 +856,28 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
 	if (test_bit(FR_ABORTED, &cs->req->flags))
 		err = -ENOENT;
 	else
-		*pagep = newpage;
+		*pagep = &newfolio->page;
 	spin_unlock(&cs->req->waitq.lock);
 
 	if (err) {
-		unlock_page(newpage);
-		put_page(newpage);
+		folio_unlock(newfolio);
+		folio_put(newfolio);
 		goto out_put_old;
 	}
 
-	unlock_page(oldpage);
+	folio_unlock(oldfolio);
 	/* Drop ref for ap->pages[] array */
-	put_page(oldpage);
+	folio_put(oldfolio);
 	cs->len = 0;
 
 	err = 0;
 out_put_old:
 	/* Drop ref obtained in this function */
-	put_page(oldpage);
+	folio_put(oldfolio);
 	return err;
 
 out_fallback_unlock:
-	unlock_page(newpage);
+	folio_unlock(newfolio);
 out_fallback:
 	cs->pg = buf->page;
 	cs->offset = buf->offset;
-- 
GitLab


From 28965f0f8be62e1ed8296fe0240b5d5dc064b681 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 1 Nov 2022 10:53:24 -0700
Subject: [PATCH 3708/4122] userfaultfd: replace lru_cache functions with
 folio_add functions

Replaces lru_cache_add() and lru_cache_add_inactive_or_unevictable() with
folio_add_lru() and folio_add_lru_vma().  This is in preparation for the
removal of lru_cache_add().

Link: https://lkml.kernel.org/r/20221101175326.13265-4-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/userfaultfd.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 650ab6cfd5f49..b7a9479bece21 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -66,6 +66,7 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 	bool vm_shared = dst_vma->vm_flags & VM_SHARED;
 	bool page_in_cache = page_mapping(page);
 	spinlock_t *ptl;
+	struct folio *folio;
 	struct inode *inode;
 	pgoff_t offset, max_off;
 
@@ -113,14 +114,15 @@ int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 	if (!pte_none_mostly(*dst_pte))
 		goto out_unlock;
 
+	folio = page_folio(page);
 	if (page_in_cache) {
 		/* Usually, cache pages are already added to LRU */
 		if (newly_allocated)
-			lru_cache_add(page);
+			folio_add_lru(folio);
 		page_add_file_rmap(page, dst_vma, false);
 	} else {
 		page_add_new_anon_rmap(page, dst_vma, dst_addr);
-		lru_cache_add_inactive_or_unevictable(page, dst_vma);
+		folio_add_lru_vma(folio, dst_vma);
 	}
 
 	/*
-- 
GitLab


From 284a344ed19dc92526024d062b30f90774fea50f Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 1 Nov 2022 10:53:25 -0700
Subject: [PATCH 3709/4122] khugepage: replace lru_cache_add() with
 folio_add_lru()

Replaces some calls with their folio equivalents.  This is in preparation
for the removal of lru_cache_add().  This replaces 3 calls to
compound_head() with 1.

Link: https://lkml.kernel.org/r/20221101175326.13265-5-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/khugepaged.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 78ec2771cc652..5a7d2d5093f9c 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -2013,6 +2013,7 @@ xa_unlocked:
 
 	if (result == SCAN_SUCCEED) {
 		struct page *page, *tmp;
+		struct folio *folio;
 
 		/*
 		 * Replacing old pages with new one has succeeded, now we
@@ -2040,11 +2041,13 @@ xa_unlocked:
 			index++;
 		}
 
-		SetPageUptodate(hpage);
-		page_ref_add(hpage, HPAGE_PMD_NR - 1);
+		folio = page_folio(hpage);
+		folio_mark_uptodate(folio);
+		folio_ref_add(folio, HPAGE_PMD_NR - 1);
+
 		if (is_shmem)
-			set_page_dirty(hpage);
-		lru_cache_add(hpage);
+			folio_mark_dirty(folio);
+		folio_add_lru(folio);
 
 		/*
 		 * Remove pte page tables, so we can re-fault the page as huge.
-- 
GitLab


From 6e1ca48d0669b0f5efcbaa051b23cd8e651a1614 Mon Sep 17 00:00:00 2001
From: "Vishal Moola (Oracle)" <vishal.moola@gmail.com>
Date: Tue, 1 Nov 2022 10:53:26 -0700
Subject: [PATCH 3710/4122] folio-compat: remove lru_cache_add()

There are no longer any callers of lru_cache_add(), so remove it.  This
saves 79 bytes of kernel text.  Also cleanup some comments such that
they reference the new folio_add_lru() instead.

Link: https://lkml.kernel.org/r/20221101175326.13265-6-vishal.moola@gmail.com
Signed-off-by: Vishal Moola (Oracle) <vishal.moola@gmail.com>
Reviewed-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/swap.h | 1 -
 mm/folio-compat.c    | 6 ------
 mm/truncate.c        | 2 +-
 mm/workingset.c      | 5 ++++-
 4 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b61e2007d1568..0ceed49516adc 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -381,7 +381,6 @@ void lru_note_cost(struct lruvec *lruvec, bool file,
 void lru_note_cost_refault(struct folio *);
 void folio_add_lru(struct folio *);
 void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
-void lru_cache_add(struct page *);
 void mark_page_accessed(struct page *);
 void folio_mark_accessed(struct folio *);
 
diff --git a/mm/folio-compat.c b/mm/folio-compat.c
index 86933fa8f3e11..69ed25790c68d 100644
--- a/mm/folio-compat.c
+++ b/mm/folio-compat.c
@@ -76,12 +76,6 @@ bool redirty_page_for_writepage(struct writeback_control *wbc,
 }
 EXPORT_SYMBOL(redirty_page_for_writepage);
 
-void lru_cache_add(struct page *page)
-{
-	folio_add_lru(page_folio(page));
-}
-EXPORT_SYMBOL(lru_cache_add);
-
 void lru_cache_add_inactive_or_unevictable(struct page *page,
 		struct vm_area_struct *vma)
 {
diff --git a/mm/truncate.c b/mm/truncate.c
index c7bfd247a651c..7b4ea4c4a46b2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -565,7 +565,7 @@ EXPORT_SYMBOL(invalidate_mapping_pages);
  * refcount.  We do this because invalidate_inode_pages2() needs stronger
  * invalidation guarantees, and cannot afford to leave pages behind because
  * shrink_page_list() has a temp ref on them, or because they're transiently
- * sitting in the lru_cache_add() pagevecs.
+ * sitting in the folio_add_lru() pagevecs.
  */
 static int invalidate_complete_folio2(struct address_space *mapping,
 					struct folio *folio)
diff --git a/mm/workingset.c b/mm/workingset.c
index d2d02978588cf..1a86645b7b3c6 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -492,7 +492,10 @@ void workingset_refault(struct folio *folio, void *shadow)
 	/* Folio was active prior to eviction */
 	if (workingset) {
 		folio_set_workingset(folio);
-		/* XXX: Move to lru_cache_add() when it supports new vs putback */
+		/*
+		 * XXX: Move to folio_add_lru() when it supports new vs
+		 * putback
+		 */
 		lru_note_cost_refault(folio);
 		mod_lruvec_state(lruvec, WORKINGSET_RESTORE_BASE + file, nr);
 	}
-- 
GitLab


From 9fd330582b2fe43c49ebcd02b2480f051f85aad4 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:30 -0800
Subject: [PATCH 3711/4122] mm: add folio dtor and order setter functions

Patch series "convert core hugetlb functions to folios", v5.

============== OVERVIEW ===========================
Now that many hugetlb helper functions that deal with hugetlb specific
flags[1] and hugetlb cgroups[2] are converted to folios, higher level
allocation, prep, and freeing functions within hugetlb can also be
converted to operate in folios.

Patch 1 of this series implements the wrapper functions around setting the
compound destructor and compound order for a folio.  Besides the user
added in patch 1, patch 2 and patch 9 also use these helper functions.

Patches 2-10 convert the higher level hugetlb functions to folios.

============== TESTING ===========================
LTP:
	Ran 10 back to back rounds of the LTP hugetlb test suite.

Gigantic Huge Pages:
	Test allocation and freeing via hugeadm commands:
		hugeadm --pool-pages-min 1GB:10
		hugeadm --pool-pages-min 1GB:0

Demote:
	Demote 1 1GB hugepages to 512 2MB hugepages
		echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
		echo 1 > /sys/kernel/mm/hugepages/hugepages-1048576kB/demote
		cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
			# 512
		cat /sys/kernel/mm/hugepages/hugepages-1048576kB/nr_hugepages
			# 0

[1] https://lore.kernel.org/lkml/20220922154207.1575343-1-sidhartha.kumar@oracle.com/
[2] https://lore.kernel.org/linux-mm/20221101223059.460937-1-sidhartha.kumar@oracle.com/


This patch (of 10):

Add folio equivalents for set_compound_order() and
set_compound_page_dtor().

Also remove extra new-lines introduced by mm/hugetlb: convert
move_hugetlb_state() to folios and mm/hugetlb_cgroup: convert
hugetlb_cgroup_uncharge_page() to folios.

[sidhartha.kumar@oracle.com: clarify folio_set_compound_order() zero support]
  Link: https://lkml.kernel.org/r/20221207223731.32784-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20221129225039.82257-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20221129225039.82257-2-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Suggested-by: Mike Kravetz <mike.kravetz@oracle.com>
Suggested-by: Muchun Song <songmuchun@bytedance.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mm.h | 23 +++++++++++++++++++++++
 mm/hugetlb.c       |  4 +---
 2 files changed, 24 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 472cb60ace075..7dc376052d400 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -997,6 +997,13 @@ static inline void set_compound_page_dtor(struct page *page,
 	page[1].compound_dtor = compound_dtor;
 }
 
+static inline void folio_set_compound_dtor(struct folio *folio,
+		enum compound_dtor_id compound_dtor)
+{
+	VM_BUG_ON_FOLIO(compound_dtor >= NR_COMPOUND_DTORS, folio);
+	folio->_folio_dtor = compound_dtor;
+}
+
 void destroy_large_folio(struct folio *folio);
 
 static inline int head_compound_pincount(struct page *head)
@@ -1012,6 +1019,22 @@ static inline void set_compound_order(struct page *page, unsigned int order)
 #endif
 }
 
+/*
+ * folio_set_compound_order is generally passed a non-zero order to
+ * initialize a large folio.  However, hugetlb code abuses this by
+ * passing in zero when 'dissolving' a large folio.
+ */
+static inline void folio_set_compound_order(struct folio *folio,
+		unsigned int order)
+{
+	VM_BUG_ON_FOLIO(!folio_test_large(folio), folio);
+
+	folio->_folio_order = order;
+#ifdef CONFIG_64BIT
+	folio->_folio_nr_pages = order ? 1U << order : 0;
+#endif
+}
+
 /* Returns the number of pages in this potentially compound page. */
 static inline unsigned long compound_nr(struct page *page)
 {
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9d97c9a2a15df..22512f7b0237c 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1780,7 +1780,7 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
 	hugetlb_vmemmap_optimize(h, &folio->page);
 	INIT_LIST_HEAD(&folio->lru);
-	folio->_folio_dtor = HUGETLB_PAGE_DTOR;
+	folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
 	hugetlb_set_folio_subpool(folio, NULL);
 	set_hugetlb_cgroup(folio, NULL);
 	set_hugetlb_cgroup_rsvd(folio, NULL);
@@ -2938,7 +2938,6 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 	 * a reservation exists for the allocation.
 	 */
 	page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
-
 	if (!page) {
 		spin_unlock_irq(&hugetlb_lock);
 		page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
@@ -7343,7 +7342,6 @@ void move_hugetlb_state(struct folio *old_folio, struct folio *new_folio, int re
 		int old_nid = folio_nid(old_folio);
 		int new_nid = folio_nid(new_folio);
 
-
 		folio_set_hugetlb_temporary(old_folio);
 		folio_clear_hugetlb_temporary(new_folio);
 
-- 
GitLab


From 911565b8285381e62d3bfd0cae2889a022737c37 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:31 -0800
Subject: [PATCH 3712/4122] mm/hugetlb: convert
 destroy_compound_gigantic_page() to folios

Convert page operations within __destroy_compound_gigantic_page() to the
corresponding folio operations.

Link: https://lkml.kernel.org/r/20221129225039.82257-3-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 43 +++++++++++++++++++++----------------------
 1 file changed, 21 insertions(+), 22 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 22512f7b0237c..5960a05cb3703 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1325,43 +1325,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 		nr_nodes--)
 
 /* used to demote non-gigantic_huge pages as well */
-static void __destroy_compound_gigantic_page(struct page *page,
+static void __destroy_compound_gigantic_folio(struct folio *folio,
 					unsigned int order, bool demote)
 {
 	int i;
 	int nr_pages = 1 << order;
 	struct page *p;
 
-	atomic_set(compound_mapcount_ptr(page), 0);
-	atomic_set(subpages_mapcount_ptr(page), 0);
-	atomic_set(compound_pincount_ptr(page), 0);
+	atomic_set(folio_mapcount_ptr(folio), 0);
+	atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+	atomic_set(folio_pincount_ptr(folio), 0);
 
 	for (i = 1; i < nr_pages; i++) {
-		p = nth_page(page, i);
+		p = folio_page(folio, i);
 		p->mapping = NULL;
 		clear_compound_head(p);
 		if (!demote)
 			set_page_refcounted(p);
 	}
 
-	set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-	page[1].compound_nr = 0;
-#endif
-	__ClearPageHead(page);
+	folio_set_compound_order(folio, 0);
+	__folio_clear_head(folio);
 }
 
-static void destroy_compound_hugetlb_page_for_demote(struct page *page,
+static void destroy_compound_hugetlb_folio_for_demote(struct folio *folio,
 					unsigned int order)
 {
-	__destroy_compound_gigantic_page(page, order, true);
+	__destroy_compound_gigantic_folio(folio, order, true);
 }
 
 #ifdef CONFIG_ARCH_HAS_GIGANTIC_PAGE
-static void destroy_compound_gigantic_page(struct page *page,
+static void destroy_compound_gigantic_folio(struct folio *folio,
 					unsigned int order)
 {
-	__destroy_compound_gigantic_page(page, order, false);
+	__destroy_compound_gigantic_folio(folio, order, false);
 }
 
 static void free_gigantic_page(struct page *page, unsigned int order)
@@ -1430,7 +1427,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 	return NULL;
 }
 static inline void free_gigantic_page(struct page *page, unsigned int order) { }
-static inline void destroy_compound_gigantic_page(struct page *page,
+static inline void destroy_compound_gigantic_folio(struct folio *folio,
 						unsigned int order) { }
 #endif
 
@@ -1477,8 +1474,8 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
 	 *
 	 * For gigantic pages set the destructor to the null dtor.  This
 	 * destructor will never be called.  Before freeing the gigantic
-	 * page destroy_compound_gigantic_page will turn the compound page
-	 * into a simple group of pages.  After this the destructor does not
+	 * page destroy_compound_gigantic_folio will turn the folio into a
+	 * simple group of pages.  After this the destructor does not
 	 * apply.
 	 *
 	 * This handles the case where more than one ref is held when and
@@ -1559,6 +1556,7 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
 static void __update_and_free_page(struct hstate *h, struct page *page)
 {
 	int i;
+	struct folio *folio = page_folio(page);
 	struct page *subpage;
 
 	if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
@@ -1587,8 +1585,8 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	 * Move PageHWPoison flag from head page to the raw error pages,
 	 * which makes any healthy subpages reusable.
 	 */
-	if (unlikely(PageHWPoison(page)))
-		hugetlb_clear_page_hwpoison(page);
+	if (unlikely(folio_test_hwpoison(folio)))
+		hugetlb_clear_page_hwpoison(&folio->page);
 
 	for (i = 0; i < pages_per_huge_page(h); i++) {
 		subpage = nth_page(page, i);
@@ -1604,7 +1602,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	 */
 	if (hstate_is_gigantic(h) ||
 	    hugetlb_cma_page(page, huge_page_order(h))) {
-		destroy_compound_gigantic_page(page, huge_page_order(h));
+		destroy_compound_gigantic_folio(folio, huge_page_order(h));
 		free_gigantic_page(page, huge_page_order(h));
 	} else {
 		__free_pages(page, huge_page_order(h));
@@ -3437,6 +3435,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 {
 	int i, nid = page_to_nid(page);
 	struct hstate *target_hstate;
+	struct folio *folio = page_folio(page);
 	struct page *subpage;
 	int rc = 0;
 
@@ -3455,10 +3454,10 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 	}
 
 	/*
-	 * Use destroy_compound_hugetlb_page_for_demote for all huge page
+	 * Use destroy_compound_hugetlb_folio_for_demote for all huge page
 	 * sizes as it will not ref count pages.
 	 */
-	destroy_compound_hugetlb_page_for_demote(page, huge_page_order(h));
+	destroy_compound_hugetlb_folio_for_demote(folio, huge_page_order(h));
 
 	/*
 	 * Taking target hstate mutex synchronizes with set_max_huge_pages.
-- 
GitLab


From 1a7cdab59b22465b850501e3897a3f3aa01670d8 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:32 -0800
Subject: [PATCH 3713/4122] mm/hugetlb: convert dissolve_free_huge_page() to
 folios

Removes compound_head() call by using a folio rather than a head page.

Link: https://lkml.kernel.org/r/20221129225039.82257-4-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5960a05cb3703..d02293fd2e644 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2128,21 +2128,21 @@ static struct page *remove_pool_huge_page(struct hstate *h,
 int dissolve_free_huge_page(struct page *page)
 {
 	int rc = -EBUSY;
+	struct folio *folio = page_folio(page);
 
 retry:
 	/* Not to disrupt normal path by vainly holding hugetlb_lock */
-	if (!PageHuge(page))
+	if (!folio_test_hugetlb(folio))
 		return 0;
 
 	spin_lock_irq(&hugetlb_lock);
-	if (!PageHuge(page)) {
+	if (!folio_test_hugetlb(folio)) {
 		rc = 0;
 		goto out;
 	}
 
-	if (!page_count(page)) {
-		struct page *head = compound_head(page);
-		struct hstate *h = page_hstate(head);
+	if (!folio_ref_count(folio)) {
+		struct hstate *h = folio_hstate(folio);
 		if (!available_huge_pages(h))
 			goto out;
 
@@ -2150,7 +2150,7 @@ retry:
 		 * We should make sure that the page is already on the free list
 		 * when it is dissolved.
 		 */
-		if (unlikely(!HPageFreed(head))) {
+		if (unlikely(!folio_test_hugetlb_freed(folio))) {
 			spin_unlock_irq(&hugetlb_lock);
 			cond_resched();
 
@@ -2165,7 +2165,7 @@ retry:
 			goto retry;
 		}
 
-		remove_hugetlb_page(h, head, false);
+		remove_hugetlb_page(h, &folio->page, false);
 		h->max_huge_pages--;
 		spin_unlock_irq(&hugetlb_lock);
 
@@ -2177,12 +2177,12 @@ retry:
 		 * Attempt to allocate vmemmmap here so that we can take
 		 * appropriate action on failure.
 		 */
-		rc = hugetlb_vmemmap_restore(h, head);
+		rc = hugetlb_vmemmap_restore(h, &folio->page);
 		if (!rc) {
-			update_and_free_page(h, head, false);
+			update_and_free_page(h, &folio->page, false);
 		} else {
 			spin_lock_irq(&hugetlb_lock);
-			add_hugetlb_page(h, head, false);
+			add_hugetlb_page(h, &folio->page, false);
 			h->max_huge_pages++;
 			spin_unlock_irq(&hugetlb_lock);
 		}
-- 
GitLab


From cfd5082b514765f873504cc60a50cce30738bfd3 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:33 -0800
Subject: [PATCH 3714/4122] mm/hugetlb: convert remove_hugetlb_page() to folios

Removes page_folio() call by converting callers to directly pass a folio
into __remove_hugetlb_page().

Link: https://lkml.kernel.org/r/20221129225039.82257-5-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 48 +++++++++++++++++++++++++-----------------------
 1 file changed, 25 insertions(+), 23 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d02293fd2e644..9b1c9d05ba343 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1432,19 +1432,18 @@ static inline void destroy_compound_gigantic_folio(struct folio *folio,
 #endif
 
 /*
- * Remove hugetlb page from lists, and update dtor so that page appears
+ * Remove hugetlb folio from lists, and update dtor so that the folio appears
  * as just a compound page.
  *
- * A reference is held on the page, except in the case of demote.
+ * A reference is held on the folio, except in the case of demote.
  *
  * Must be called with hugetlb lock held.
  */
-static void __remove_hugetlb_page(struct hstate *h, struct page *page,
+static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
 							bool adjust_surplus,
 							bool demote)
 {
-	int nid = page_to_nid(page);
-	struct folio *folio = page_folio(page);
+	int nid = folio_nid(folio);
 
 	VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio(folio), folio);
 	VM_BUG_ON_FOLIO(hugetlb_cgroup_from_folio_rsvd(folio), folio);
@@ -1453,9 +1452,9 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
 	if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
 		return;
 
-	list_del(&page->lru);
+	list_del(&folio->lru);
 
-	if (HPageFreed(page)) {
+	if (folio_test_hugetlb_freed(folio)) {
 		h->free_huge_pages--;
 		h->free_huge_pages_node[nid]--;
 	}
@@ -1485,26 +1484,26 @@ static void __remove_hugetlb_page(struct hstate *h, struct page *page,
 	 * be turned into a page of smaller size.
 	 */
 	if (!demote)
-		set_page_refcounted(page);
+		folio_ref_unfreeze(folio, 1);
 	if (hstate_is_gigantic(h))
-		set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+		folio_set_compound_dtor(folio, NULL_COMPOUND_DTOR);
 	else
-		set_compound_page_dtor(page, COMPOUND_PAGE_DTOR);
+		folio_set_compound_dtor(folio, COMPOUND_PAGE_DTOR);
 
 	h->nr_huge_pages--;
 	h->nr_huge_pages_node[nid]--;
 }
 
-static void remove_hugetlb_page(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio(struct hstate *h, struct folio *folio,
 							bool adjust_surplus)
 {
-	__remove_hugetlb_page(h, page, adjust_surplus, false);
+	__remove_hugetlb_folio(h, folio, adjust_surplus, false);
 }
 
-static void remove_hugetlb_page_for_demote(struct hstate *h, struct page *page,
+static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *folio,
 							bool adjust_surplus)
 {
-	__remove_hugetlb_page(h, page, adjust_surplus, true);
+	__remove_hugetlb_folio(h, folio, adjust_surplus, true);
 }
 
 static void add_hugetlb_page(struct hstate *h, struct page *page,
@@ -1639,8 +1638,9 @@ static void free_hpage_workfn(struct work_struct *work)
 		/*
 		 * The VM_BUG_ON_PAGE(!PageHuge(page), page) in page_hstate()
 		 * is going to trigger because a previous call to
-		 * remove_hugetlb_page() will set_compound_page_dtor(page,
-		 * NULL_COMPOUND_DTOR), so do not use page_hstate() directly.
+		 * remove_hugetlb_folio() will call folio_set_compound_dtor
+		 * (folio, NULL_COMPOUND_DTOR), so do not use page_hstate()
+		 * directly.
 		 */
 		h = size_to_hstate(page_size(page));
 
@@ -1749,12 +1749,12 @@ void free_huge_page(struct page *page)
 		h->resv_huge_pages++;
 
 	if (folio_test_hugetlb_temporary(folio)) {
-		remove_hugetlb_page(h, page, false);
+		remove_hugetlb_folio(h, folio, false);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
 		update_and_free_page(h, page, true);
 	} else if (h->surplus_huge_pages_node[nid]) {
 		/* remove the page from active list */
-		remove_hugetlb_page(h, page, true);
+		remove_hugetlb_folio(h, folio, true);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
 		update_and_free_page(h, page, true);
 	} else {
@@ -2092,6 +2092,7 @@ static struct page *remove_pool_huge_page(struct hstate *h,
 {
 	int nr_nodes, node;
 	struct page *page = NULL;
+	struct folio *folio;
 
 	lockdep_assert_held(&hugetlb_lock);
 	for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
@@ -2103,7 +2104,8 @@ static struct page *remove_pool_huge_page(struct hstate *h,
 		    !list_empty(&h->hugepage_freelists[node])) {
 			page = list_entry(h->hugepage_freelists[node].next,
 					  struct page, lru);
-			remove_hugetlb_page(h, page, acct_surplus);
+			folio = page_folio(page);
+			remove_hugetlb_folio(h, folio, acct_surplus);
 			break;
 		}
 	}
@@ -2165,7 +2167,7 @@ retry:
 			goto retry;
 		}
 
-		remove_hugetlb_page(h, &folio->page, false);
+		remove_hugetlb_folio(h, folio, false);
 		h->max_huge_pages--;
 		spin_unlock_irq(&hugetlb_lock);
 
@@ -2803,7 +2805,7 @@ retry:
 		 * and enqueue_huge_page() for new_page. The counters will remain
 		 * stable since this happens under the lock.
 		 */
-		remove_hugetlb_page(h, old_page, false);
+		remove_hugetlb_folio(h, old_folio, false);
 
 		/*
 		 * Ref count on new page is already zero as it was dropped
@@ -3230,7 +3232,7 @@ static void try_to_free_low(struct hstate *h, unsigned long count,
 				goto out;
 			if (PageHighMem(page))
 				continue;
-			remove_hugetlb_page(h, page, false);
+			remove_hugetlb_folio(h, page_folio(page), false);
 			list_add(&page->lru, &page_list);
 		}
 	}
@@ -3441,7 +3443,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 
 	target_hstate = size_to_hstate(PAGE_SIZE << h->demote_order);
 
-	remove_hugetlb_page_for_demote(h, page, false);
+	remove_hugetlb_folio_for_demote(h, folio, false);
 	spin_unlock_irq(&hugetlb_lock);
 
 	rc = hugetlb_vmemmap_restore(h, page);
-- 
GitLab


From d6ef19e25df2aa50f932a78c368d7bb710eaaa1b Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:34 -0800
Subject: [PATCH 3715/4122] mm/hugetlb: convert update_and_free_page() to
 folios

Make more progress on converting the free_huge_page() destructor to
operate on folios by converting update_and_free_page() to folios.

Link: https://lkml.kernel.org/r/20221129225039.82257-6-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>\
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 30 ++++++++++++++++--------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9b1c9d05ba343..5f3622ce791f2 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1478,7 +1478,7 @@ static void __remove_hugetlb_folio(struct hstate *h, struct folio *folio,
 	 * apply.
 	 *
 	 * This handles the case where more than one ref is held when and
-	 * after update_and_free_page is called.
+	 * after update_and_free_hugetlb_folio is called.
 	 *
 	 * In the case of demote we do not ref count the page as it will soon
 	 * be turned into a page of smaller size.
@@ -1609,7 +1609,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 }
 
 /*
- * As update_and_free_page() can be called under any context, so we cannot
+ * As update_and_free_hugetlb_folio() can be called under any context, so we cannot
  * use GFP_KERNEL to allocate vmemmap pages. However, we can defer the
  * actual freeing in a workqueue to prevent from using GFP_ATOMIC to allocate
  * the vmemmap pages.
@@ -1657,11 +1657,11 @@ static inline void flush_free_hpage_work(struct hstate *h)
 		flush_work(&free_hpage_work);
 }
 
-static void update_and_free_page(struct hstate *h, struct page *page,
+static void update_and_free_hugetlb_folio(struct hstate *h, struct folio *folio,
 				 bool atomic)
 {
-	if (!HPageVmemmapOptimized(page) || !atomic) {
-		__update_and_free_page(h, page);
+	if (!folio_test_hugetlb_vmemmap_optimized(folio) || !atomic) {
+		__update_and_free_page(h, &folio->page);
 		return;
 	}
 
@@ -1672,16 +1672,18 @@ static void update_and_free_page(struct hstate *h, struct page *page,
 	 * empty. Otherwise, schedule_work() had been called but the workfn
 	 * hasn't retrieved the list yet.
 	 */
-	if (llist_add((struct llist_node *)&page->mapping, &hpage_freelist))
+	if (llist_add((struct llist_node *)&folio->mapping, &hpage_freelist))
 		schedule_work(&free_hpage_work);
 }
 
 static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
 {
 	struct page *page, *t_page;
+	struct folio *folio;
 
 	list_for_each_entry_safe(page, t_page, list, lru) {
-		update_and_free_page(h, page, false);
+		folio = page_folio(page);
+		update_and_free_hugetlb_folio(h, folio, false);
 		cond_resched();
 	}
 }
@@ -1751,12 +1753,12 @@ void free_huge_page(struct page *page)
 	if (folio_test_hugetlb_temporary(folio)) {
 		remove_hugetlb_folio(h, folio, false);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
-		update_and_free_page(h, page, true);
+		update_and_free_hugetlb_folio(h, folio, true);
 	} else if (h->surplus_huge_pages_node[nid]) {
 		/* remove the page from active list */
 		remove_hugetlb_folio(h, folio, true);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
-		update_and_free_page(h, page, true);
+		update_and_free_hugetlb_folio(h, folio, true);
 	} else {
 		arch_clear_hugepage_flags(page);
 		enqueue_huge_page(h, page);
@@ -2172,8 +2174,8 @@ retry:
 		spin_unlock_irq(&hugetlb_lock);
 
 		/*
-		 * Normally update_and_free_page will allocate required vmemmmap
-		 * before freeing the page.  update_and_free_page will fail to
+		 * Normally update_and_free_hugtlb_folio will allocate required vmemmmap
+		 * before freeing the page.  update_and_free_hugtlb_folio will fail to
 		 * free the page if it can not allocate required vmemmap.  We
 		 * need to adjust max_huge_pages if the page is not freed.
 		 * Attempt to allocate vmemmmap here so that we can take
@@ -2181,7 +2183,7 @@ retry:
 		 */
 		rc = hugetlb_vmemmap_restore(h, &folio->page);
 		if (!rc) {
-			update_and_free_page(h, &folio->page, false);
+			update_and_free_hugetlb_folio(h, folio, false);
 		} else {
 			spin_lock_irq(&hugetlb_lock);
 			add_hugetlb_page(h, &folio->page, false);
@@ -2818,7 +2820,7 @@ retry:
 		 * Pages have been replaced, we can safely free the old one.
 		 */
 		spin_unlock_irq(&hugetlb_lock);
-		update_and_free_page(h, old_page, false);
+		update_and_free_hugetlb_folio(h, old_folio, false);
 	}
 
 	return ret;
@@ -2827,7 +2829,7 @@ free_new:
 	spin_unlock_irq(&hugetlb_lock);
 	/* Page has a zero ref count, but needs a ref to be freed */
 	folio_ref_unfreeze(new_folio, 1);
-	update_and_free_page(h, new_page, false);
+	update_and_free_hugetlb_folio(h, new_folio, false);
 
 	return ret;
 }
-- 
GitLab


From 2f6c57d696abcd2d27d07b8506d5e6bcc060e77a Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:35 -0800
Subject: [PATCH 3716/4122] mm/hugetlb: convert add_hugetlb_page() to folios
 and add hugetlb_cma_folio()

Convert add_hugetlb_page() to take in a folio, also convert
hugetlb_cma_page() to take in a folio.

Link: https://lkml.kernel.org/r/20221129225039.82257-7-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 5f3622ce791f2..d80a83490b9ae 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -54,13 +54,13 @@ struct hstate hstates[HUGE_MAX_HSTATE];
 #ifdef CONFIG_CMA
 static struct cma *hugetlb_cma[MAX_NUMNODES];
 static unsigned long hugetlb_cma_size_in_node[MAX_NUMNODES] __initdata;
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
 {
-	return cma_pages_valid(hugetlb_cma[page_to_nid(page)], page,
+	return cma_pages_valid(hugetlb_cma[folio_nid(folio)], &folio->page,
 				1 << order);
 }
 #else
-static bool hugetlb_cma_page(struct page *page, unsigned int order)
+static bool hugetlb_cma_folio(struct folio *folio, unsigned int order)
 {
 	return false;
 }
@@ -1506,17 +1506,17 @@ static void remove_hugetlb_folio_for_demote(struct hstate *h, struct folio *foli
 	__remove_hugetlb_folio(h, folio, adjust_surplus, true);
 }
 
-static void add_hugetlb_page(struct hstate *h, struct page *page,
+static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
 			     bool adjust_surplus)
 {
 	int zeroed;
-	int nid = page_to_nid(page);
+	int nid = folio_nid(folio);
 
-	VM_BUG_ON_PAGE(!HPageVmemmapOptimized(page), page);
+	VM_BUG_ON_FOLIO(!folio_test_hugetlb_vmemmap_optimized(folio), folio);
 
 	lockdep_assert_held(&hugetlb_lock);
 
-	INIT_LIST_HEAD(&page->lru);
+	INIT_LIST_HEAD(&folio->lru);
 	h->nr_huge_pages++;
 	h->nr_huge_pages_node[nid]++;
 
@@ -1525,21 +1525,21 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
 		h->surplus_huge_pages_node[nid]++;
 	}
 
-	set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
-	set_page_private(page, 0);
+	folio_set_compound_dtor(folio, HUGETLB_PAGE_DTOR);
+	folio_change_private(folio, NULL);
 	/*
-	 * We have to set HPageVmemmapOptimized again as above
-	 * set_page_private(page, 0) cleared it.
+	 * We have to set hugetlb_vmemmap_optimized again as above
+	 * folio_change_private(folio, NULL) cleared it.
 	 */
-	SetHPageVmemmapOptimized(page);
+	folio_set_hugetlb_vmemmap_optimized(folio);
 
 	/*
-	 * This page is about to be managed by the hugetlb allocator and
+	 * This folio is about to be managed by the hugetlb allocator and
 	 * should have no users.  Drop our reference, and check for others
 	 * just in case.
 	 */
-	zeroed = put_page_testzero(page);
-	if (!zeroed)
+	zeroed = folio_put_testzero(folio);
+	if (unlikely(!zeroed))
 		/*
 		 * It is VERY unlikely soneone else has taken a ref on
 		 * the page.  In this case, we simply return as the
@@ -1548,8 +1548,8 @@ static void add_hugetlb_page(struct hstate *h, struct page *page,
 		 */
 		return;
 
-	arch_clear_hugepage_flags(page);
-	enqueue_huge_page(h, page);
+	arch_clear_hugepage_flags(&folio->page);
+	enqueue_huge_page(h, &folio->page);
 }
 
 static void __update_and_free_page(struct hstate *h, struct page *page)
@@ -1575,7 +1575,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 		 * page and put the page back on the hugetlb free list and treat
 		 * as a surplus page.
 		 */
-		add_hugetlb_page(h, page, true);
+		add_hugetlb_folio(h, page_folio(page), true);
 		spin_unlock_irq(&hugetlb_lock);
 		return;
 	}
@@ -1600,7 +1600,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	 * need to be given back to CMA in free_gigantic_page.
 	 */
 	if (hstate_is_gigantic(h) ||
-	    hugetlb_cma_page(page, huge_page_order(h))) {
+	    hugetlb_cma_folio(folio, huge_page_order(h))) {
 		destroy_compound_gigantic_folio(folio, huge_page_order(h));
 		free_gigantic_page(page, huge_page_order(h));
 	} else {
@@ -2186,7 +2186,7 @@ retry:
 			update_and_free_hugetlb_folio(h, folio, false);
 		} else {
 			spin_lock_irq(&hugetlb_lock);
-			add_hugetlb_page(h, &folio->page, false);
+			add_hugetlb_folio(h, folio, false);
 			h->max_huge_pages++;
 			spin_unlock_irq(&hugetlb_lock);
 		}
@@ -3453,7 +3453,7 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 		/* Allocation of vmemmmap failed, we can not demote page */
 		spin_lock_irq(&hugetlb_lock);
 		set_page_refcounted(page);
-		add_hugetlb_page(h, page, false);
+		add_hugetlb_folio(h, page_folio(page), false);
 		return rc;
 	}
 
-- 
GitLab


From 240d67a86ecb0fa18863821a0cb55783ad50ef30 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:36 -0800
Subject: [PATCH 3717/4122] mm/hugetlb: convert enqueue_huge_page() to folios

Convert callers of enqueue_huge_page() to pass in a folio, function is
renamed to enqueue_hugetlb_folio().

Link: https://lkml.kernel.org/r/20221129225039.82257-8-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d80a83490b9ae..6da673f1d8305 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1127,17 +1127,17 @@ static bool vma_has_reserves(struct vm_area_struct *vma, long chg)
 	return false;
 }
 
-static void enqueue_huge_page(struct hstate *h, struct page *page)
+static void enqueue_hugetlb_folio(struct hstate *h, struct folio *folio)
 {
-	int nid = page_to_nid(page);
+	int nid = folio_nid(folio);
 
 	lockdep_assert_held(&hugetlb_lock);
-	VM_BUG_ON_PAGE(page_count(page), page);
+	VM_BUG_ON_FOLIO(folio_ref_count(folio), folio);
 
-	list_move(&page->lru, &h->hugepage_freelists[nid]);
+	list_move(&folio->lru, &h->hugepage_freelists[nid]);
 	h->free_huge_pages++;
 	h->free_huge_pages_node[nid]++;
-	SetHPageFreed(page);
+	folio_set_hugetlb_freed(folio);
 }
 
 static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1549,7 +1549,7 @@ static void add_hugetlb_folio(struct hstate *h, struct folio *folio,
 		return;
 
 	arch_clear_hugepage_flags(&folio->page);
-	enqueue_huge_page(h, &folio->page);
+	enqueue_hugetlb_folio(h, folio);
 }
 
 static void __update_and_free_page(struct hstate *h, struct page *page)
@@ -1761,7 +1761,7 @@ void free_huge_page(struct page *page)
 		update_and_free_hugetlb_folio(h, folio, true);
 	} else {
 		arch_clear_hugepage_flags(page);
-		enqueue_huge_page(h, page);
+		enqueue_hugetlb_folio(h, folio);
 		spin_unlock_irqrestore(&hugetlb_lock, flags);
 	}
 }
@@ -2438,7 +2438,7 @@ retry:
 		if ((--needed) < 0)
 			break;
 		/* Add the page to the hugetlb allocator */
-		enqueue_huge_page(h, page);
+		enqueue_hugetlb_folio(h, page_folio(page));
 	}
 free:
 	spin_unlock_irq(&hugetlb_lock);
@@ -2804,8 +2804,8 @@ retry:
 		 * Ok, old_page is still a genuine free hugepage. Remove it from
 		 * the freelist and decrease the counters. These will be
 		 * incremented again when calling __prep_account_new_huge_page()
-		 * and enqueue_huge_page() for new_page. The counters will remain
-		 * stable since this happens under the lock.
+		 * and enqueue_hugetlb_folio() for new_folio. The counters will
+		 * remain stable since this happens under the lock.
 		 */
 		remove_hugetlb_folio(h, old_folio, false);
 
@@ -2814,7 +2814,7 @@ retry:
 		 * earlier.  It can be directly added to the pool free list.
 		 */
 		__prep_account_new_huge_page(h, nid);
-		enqueue_huge_page(h, new_page);
+		enqueue_hugetlb_folio(h, new_folio);
 
 		/*
 		 * Pages have been replaced, we can safely free the old one.
-- 
GitLab


From 7f325a8d25631e68cd75afaeaf330187e45e0eb5 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:37 -0800
Subject: [PATCH 3718/4122] mm/hugetlb: convert free_gigantic_page() to folios

Convert callers of free_gigantic_page() to use folios, function is then
renamed to free_gigantic_folio().

Link: https://lkml.kernel.org/r/20221129225039.82257-9-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6da673f1d8305..eb58b0f382229 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1361,18 +1361,20 @@ static void destroy_compound_gigantic_folio(struct folio *folio,
 	__destroy_compound_gigantic_folio(folio, order, false);
 }
 
-static void free_gigantic_page(struct page *page, unsigned int order)
+static void free_gigantic_folio(struct folio *folio, unsigned int order)
 {
 	/*
 	 * If the page isn't allocated using the cma allocator,
 	 * cma_release() returns false.
 	 */
 #ifdef CONFIG_CMA
-	if (cma_release(hugetlb_cma[page_to_nid(page)], page, 1 << order))
+	int nid = folio_nid(folio);
+
+	if (cma_release(hugetlb_cma[nid], &folio->page, 1 << order))
 		return;
 #endif
 
-	free_contig_range(page_to_pfn(page), 1 << order);
+	free_contig_range(folio_pfn(folio), 1 << order);
 }
 
 #ifdef CONFIG_CONTIG_ALLOC
@@ -1426,7 +1428,8 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 {
 	return NULL;
 }
-static inline void free_gigantic_page(struct page *page, unsigned int order) { }
+static inline void free_gigantic_folio(struct folio *folio,
+						unsigned int order) { }
 static inline void destroy_compound_gigantic_folio(struct folio *folio,
 						unsigned int order) { }
 #endif
@@ -1565,7 +1568,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 	 * If we don't know which subpages are hwpoisoned, we can't free
 	 * the hugepage, so it's leaked intentionally.
 	 */
-	if (HPageRawHwpUnreliable(page))
+	if (folio_test_hugetlb_raw_hwp_unreliable(folio))
 		return;
 
 	if (hugetlb_vmemmap_restore(h, page)) {
@@ -1575,7 +1578,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 		 * page and put the page back on the hugetlb free list and treat
 		 * as a surplus page.
 		 */
-		add_hugetlb_folio(h, page_folio(page), true);
+		add_hugetlb_folio(h, folio, true);
 		spin_unlock_irq(&hugetlb_lock);
 		return;
 	}
@@ -1588,7 +1591,7 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 		hugetlb_clear_page_hwpoison(&folio->page);
 
 	for (i = 0; i < pages_per_huge_page(h); i++) {
-		subpage = nth_page(page, i);
+		subpage = folio_page(folio, i);
 		subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
 				1 << PG_referenced | 1 << PG_dirty |
 				1 << PG_active | 1 << PG_private |
@@ -1597,12 +1600,12 @@ static void __update_and_free_page(struct hstate *h, struct page *page)
 
 	/*
 	 * Non-gigantic pages demoted from CMA allocated gigantic pages
-	 * need to be given back to CMA in free_gigantic_page.
+	 * need to be given back to CMA in free_gigantic_folio.
 	 */
 	if (hstate_is_gigantic(h) ||
 	    hugetlb_cma_folio(folio, huge_page_order(h))) {
 		destroy_compound_gigantic_folio(folio, huge_page_order(h));
-		free_gigantic_page(page, huge_page_order(h));
+		free_gigantic_folio(folio, huge_page_order(h));
 	} else {
 		__free_pages(page, huge_page_order(h));
 	}
@@ -2025,6 +2028,7 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
 		nodemask_t *node_alloc_noretry)
 {
 	struct page *page;
+	struct folio *folio;
 	bool retry = false;
 
 retry:
@@ -2035,14 +2039,14 @@ retry:
 				nid, nmask, node_alloc_noretry);
 	if (!page)
 		return NULL;
-
+	folio = page_folio(page);
 	if (hstate_is_gigantic(h)) {
 		if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
 			/*
 			 * Rare failure to convert pages to compound page.
 			 * Free pages and try again - ONCE!
 			 */
-			free_gigantic_page(page, huge_page_order(h));
+			free_gigantic_folio(folio, huge_page_order(h));
 			if (!retry) {
 				retry = true;
 				goto retry;
@@ -3050,6 +3054,7 @@ static void __init gather_bootmem_prealloc(void)
 
 	list_for_each_entry(m, &huge_boot_pages, list) {
 		struct page *page = virt_to_page(m);
+		struct folio *folio = page_folio(page);
 		struct hstate *h = m->hstate;
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
@@ -3060,7 +3065,7 @@ static void __init gather_bootmem_prealloc(void)
 			free_huge_page(page); /* add to the hugepage allocator */
 		} else {
 			/* VERY unlikely inflated ref count on a tail page */
-			free_gigantic_page(page, huge_page_order(h));
+			free_gigantic_folio(folio, huge_page_order(h));
 		}
 
 		/*
-- 
GitLab


From d1c6095572d0cf00c0cd30378639ff9387b34edd Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:38 -0800
Subject: [PATCH 3719/4122] mm/hugetlb: convert hugetlb prep functions to
 folios

Convert prep_new_huge_page() and __prep_compound_gigantic_page() to
folios.

Link: https://lkml.kernel.org/r/20221129225039.82257-10-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Cc: Wei Chen <harperchen1110@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 63 +++++++++++++++++++++++++---------------------------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eb58b0f382229..903ee75cccd6f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1789,29 +1789,27 @@ static void __prep_new_hugetlb_folio(struct hstate *h, struct folio *folio)
 	set_hugetlb_cgroup_rsvd(folio, NULL);
 }
 
-static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+static void prep_new_hugetlb_folio(struct hstate *h, struct folio *folio, int nid)
 {
-	struct folio *folio = page_folio(page);
-
 	__prep_new_hugetlb_folio(h, folio);
 	spin_lock_irq(&hugetlb_lock);
 	__prep_account_new_huge_page(h, nid);
 	spin_unlock_irq(&hugetlb_lock);
 }
 
-static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
-								bool demote)
+static bool __prep_compound_gigantic_folio(struct folio *folio,
+					unsigned int order, bool demote)
 {
 	int i, j;
 	int nr_pages = 1 << order;
 	struct page *p;
 
-	/* we rely on prep_new_huge_page to set the destructor */
-	set_compound_order(page, order);
-	__ClearPageReserved(page);
-	__SetPageHead(page);
+	/* we rely on prep_new_hugetlb_folio to set the destructor */
+	folio_set_compound_order(folio, order);
+	__folio_clear_reserved(folio);
+	__folio_set_head(folio);
 	for (i = 0; i < nr_pages; i++) {
-		p = nth_page(page, i);
+		p = folio_page(folio, i);
 
 		/*
 		 * For gigantic hugepages allocated through bootmem at
@@ -1853,43 +1851,41 @@ static bool __prep_compound_gigantic_page(struct page *page, unsigned int order,
 			VM_BUG_ON_PAGE(page_count(p), p);
 		}
 		if (i != 0)
-			set_compound_head(p, page);
+			set_compound_head(p, &folio->page);
 	}
-	atomic_set(compound_mapcount_ptr(page), -1);
-	atomic_set(subpages_mapcount_ptr(page), 0);
-	atomic_set(compound_pincount_ptr(page), 0);
+	atomic_set(folio_mapcount_ptr(folio), -1);
+	atomic_set(folio_subpages_mapcount_ptr(folio), 0);
+	atomic_set(folio_pincount_ptr(folio), 0);
 	return true;
 
 out_error:
 	/* undo page modifications made above */
 	for (j = 0; j < i; j++) {
-		p = nth_page(page, j);
+		p = folio_page(folio, j);
 		if (j != 0)
 			clear_compound_head(p);
 		set_page_refcounted(p);
 	}
 	/* need to clear PG_reserved on remaining tail pages  */
 	for (; j < nr_pages; j++) {
-		p = nth_page(page, j);
+		p = folio_page(folio, j);
 		__ClearPageReserved(p);
 	}
-	set_compound_order(page, 0);
-#ifdef CONFIG_64BIT
-	page[1].compound_nr = 0;
-#endif
-	__ClearPageHead(page);
+	folio_set_compound_order(folio, 0);
+	__folio_clear_head(folio);
 	return false;
 }
 
-static bool prep_compound_gigantic_page(struct page *page, unsigned int order)
+static bool prep_compound_gigantic_folio(struct folio *folio,
+							unsigned int order)
 {
-	return __prep_compound_gigantic_page(page, order, false);
+	return __prep_compound_gigantic_folio(folio, order, false);
 }
 
-static bool prep_compound_gigantic_page_for_demote(struct page *page,
+static bool prep_compound_gigantic_folio_for_demote(struct folio *folio,
 							unsigned int order)
 {
-	return __prep_compound_gigantic_page(page, order, true);
+	return __prep_compound_gigantic_folio(folio, order, true);
 }
 
 /*
@@ -2041,7 +2037,7 @@ retry:
 		return NULL;
 	folio = page_folio(page);
 	if (hstate_is_gigantic(h)) {
-		if (!prep_compound_gigantic_page(page, huge_page_order(h))) {
+		if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
 			/*
 			 * Rare failure to convert pages to compound page.
 			 * Free pages and try again - ONCE!
@@ -2054,7 +2050,7 @@ retry:
 			return NULL;
 		}
 	}
-	prep_new_huge_page(h, page, page_to_nid(page));
+	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
 
 	return page;
 }
@@ -3058,10 +3054,10 @@ static void __init gather_bootmem_prealloc(void)
 		struct hstate *h = m->hstate;
 
 		VM_BUG_ON(!hstate_is_gigantic(h));
-		WARN_ON(page_count(page) != 1);
-		if (prep_compound_gigantic_page(page, huge_page_order(h))) {
-			WARN_ON(PageReserved(page));
-			prep_new_huge_page(h, page, page_to_nid(page));
+		WARN_ON(folio_ref_count(folio) != 1);
+		if (prep_compound_gigantic_folio(folio, huge_page_order(h))) {
+			WARN_ON(folio_test_reserved(folio));
+			prep_new_hugetlb_folio(h, folio, folio_nid(folio));
 			free_huge_page(page); /* add to the hugepage allocator */
 		} else {
 			/* VERY unlikely inflated ref count on a tail page */
@@ -3480,13 +3476,14 @@ static int demote_free_huge_page(struct hstate *h, struct page *page)
 	for (i = 0; i < pages_per_huge_page(h);
 				i += pages_per_huge_page(target_hstate)) {
 		subpage = nth_page(page, i);
+		folio = page_folio(subpage);
 		if (hstate_is_gigantic(target_hstate))
-			prep_compound_gigantic_page_for_demote(subpage,
+			prep_compound_gigantic_folio_for_demote(folio,
 							target_hstate->order);
 		else
 			prep_compound_page(subpage, target_hstate->order);
 		set_page_private(subpage, 0);
-		prep_new_huge_page(target_hstate, subpage, nid);
+		prep_new_hugetlb_folio(target_hstate, folio, nid);
 		free_huge_page(subpage);
 	}
 	mutex_unlock(&target_hstate->resize_lock);
-- 
GitLab


From 19fc1a7e8b2b3b0e18fbea84ee26517e1b0f1a6e Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Tue, 29 Nov 2022 14:50:39 -0800
Subject: [PATCH 3720/4122] mm/hugetlb: change hugetlb allocation functions to
 return a folio

Many hugetlb allocation helper functions have now been converting to
folios, update their higher level callers to be compatible with folios.
alloc_pool_huge_page is reorganized to avoid a smatch warning reporting
the folio variable is uninitialized.

[sidhartha.kumar@oracle.com: update alloc_and_dissolve_hugetlb_folio comments]
  Link: https://lkml.kernel.org/r/20221206233512.146535-1-sidhartha.kumar@oracle.com
Link: https://lkml.kernel.org/r/20221129225039.82257-11-sidhartha.kumar@oracle.com
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reported-by: Wei Chen <harperchen1110@gmail.com>
Suggested-by: John Hubbard <jhubbard@nvidia.com>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Tarun Sahu <tsahu@linux.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 134 ++++++++++++++++++++++++---------------------------
 1 file changed, 64 insertions(+), 70 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 903ee75cccd6f..8c6fe22868144 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1378,23 +1378,23 @@ static void free_gigantic_folio(struct folio *folio, unsigned int order)
 }
 
 #ifdef CONFIG_CONTIG_ALLOC
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 		int nid, nodemask_t *nodemask)
 {
+	struct page *page;
 	unsigned long nr_pages = pages_per_huge_page(h);
 	if (nid == NUMA_NO_NODE)
 		nid = numa_mem_id();
 
 #ifdef CONFIG_CMA
 	{
-		struct page *page;
 		int node;
 
 		if (hugetlb_cma[nid]) {
 			page = cma_alloc(hugetlb_cma[nid], nr_pages,
 					huge_page_order(h), true);
 			if (page)
-				return page;
+				return page_folio(page);
 		}
 
 		if (!(gfp_mask & __GFP_THISNODE)) {
@@ -1405,17 +1405,18 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 				page = cma_alloc(hugetlb_cma[node], nr_pages,
 						huge_page_order(h), true);
 				if (page)
-					return page;
+					return page_folio(page);
 			}
 		}
 	}
 #endif
 
-	return alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+	page = alloc_contig_pages(nr_pages, gfp_mask, nid, nodemask);
+	return page ? page_folio(page) : NULL;
 }
 
 #else /* !CONFIG_CONTIG_ALLOC */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 					int nid, nodemask_t *nodemask)
 {
 	return NULL;
@@ -1423,7 +1424,7 @@ static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
 #endif /* CONFIG_CONTIG_ALLOC */
 
 #else /* !CONFIG_ARCH_HAS_GIGANTIC_PAGE */
-static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
+static struct folio *alloc_gigantic_folio(struct hstate *h, gfp_t gfp_mask,
 					int nid, nodemask_t *nodemask)
 {
 	return NULL;
@@ -1950,7 +1951,7 @@ pgoff_t hugetlb_basepage_index(struct page *page)
 	return (index << compound_order(page_head)) + compound_idx;
 }
 
-static struct page *alloc_buddy_huge_page(struct hstate *h,
+static struct folio *alloc_buddy_hugetlb_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
 {
@@ -1988,11 +1989,6 @@ retry:
 		page = NULL;
 	}
 
-	if (page)
-		__count_vm_event(HTLB_BUDDY_PGALLOC);
-	else
-		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
 	/*
 	 * If we did not specify __GFP_RETRY_MAYFAIL, but still got a page this
 	 * indicates an overall state change.  Clear bit so that we resume
@@ -2009,7 +2005,13 @@ retry:
 	if (node_alloc_noretry && !page && alloc_try_hard)
 		node_set(nid, *node_alloc_noretry);
 
-	return page;
+	if (!page) {
+		__count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+		return NULL;
+	}
+
+	__count_vm_event(HTLB_BUDDY_PGALLOC);
+	return page_folio(page);
 }
 
 /*
@@ -2019,23 +2021,21 @@ retry:
  * Note that returned page is 'frozen':  ref count of head page and all tail
  * pages is zero.
  */
-static struct page *alloc_fresh_huge_page(struct hstate *h,
+static struct folio *alloc_fresh_hugetlb_folio(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
 {
-	struct page *page;
 	struct folio *folio;
 	bool retry = false;
 
 retry:
 	if (hstate_is_gigantic(h))
-		page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
+		folio = alloc_gigantic_folio(h, gfp_mask, nid, nmask);
 	else
-		page = alloc_buddy_huge_page(h, gfp_mask,
+		folio = alloc_buddy_hugetlb_folio(h, gfp_mask,
 				nid, nmask, node_alloc_noretry);
-	if (!page)
+	if (!folio)
 		return NULL;
-	folio = page_folio(page);
 	if (hstate_is_gigantic(h)) {
 		if (!prep_compound_gigantic_folio(folio, huge_page_order(h))) {
 			/*
@@ -2052,7 +2052,7 @@ retry:
 	}
 	prep_new_hugetlb_folio(h, folio, folio_nid(folio));
 
-	return page;
+	return folio;
 }
 
 /*
@@ -2062,23 +2062,20 @@ retry:
 static int alloc_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
 				nodemask_t *node_alloc_noretry)
 {
-	struct page *page;
+	struct folio *folio;
 	int nr_nodes, node;
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
 	for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
-		page = alloc_fresh_huge_page(h, gfp_mask, node, nodes_allowed,
-						node_alloc_noretry);
-		if (page)
-			break;
+		folio = alloc_fresh_hugetlb_folio(h, gfp_mask, node,
+					nodes_allowed, node_alloc_noretry);
+		if (folio) {
+			free_huge_page(&folio->page); /* free it into the hugepage allocator */
+			return 1;
+		}
 	}
 
-	if (!page)
-		return 0;
-
-	free_huge_page(page); /* free it into the hugepage allocator */
-
-	return 1;
+	return 0;
 }
 
 /*
@@ -2237,7 +2234,7 @@ int dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
 static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 						int nid, nodemask_t *nmask)
 {
-	struct page *page = NULL;
+	struct folio *folio = NULL;
 
 	if (hstate_is_gigantic(h))
 		return NULL;
@@ -2247,8 +2244,8 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 		goto out_unlock;
 	spin_unlock_irq(&hugetlb_lock);
 
-	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-	if (!page)
+	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (!folio)
 		return NULL;
 
 	spin_lock_irq(&hugetlb_lock);
@@ -2260,43 +2257,42 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
 	 * codeflow
 	 */
 	if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-		SetHPageTemporary(page);
+		folio_set_hugetlb_temporary(folio);
 		spin_unlock_irq(&hugetlb_lock);
-		free_huge_page(page);
+		free_huge_page(&folio->page);
 		return NULL;
 	}
 
 	h->surplus_huge_pages++;
-	h->surplus_huge_pages_node[page_to_nid(page)]++;
+	h->surplus_huge_pages_node[folio_nid(folio)]++;
 
 out_unlock:
 	spin_unlock_irq(&hugetlb_lock);
 
-	return page;
+	return &folio->page;
 }
 
 static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
 				     int nid, nodemask_t *nmask)
 {
-	struct page *page;
+	struct folio *folio;
 
 	if (hstate_is_gigantic(h))
 		return NULL;
 
-	page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
-	if (!page)
+	folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid, nmask, NULL);
+	if (!folio)
 		return NULL;
 
 	/* fresh huge pages are frozen */
-	set_page_refcounted(page);
-
+	folio_ref_unfreeze(folio, 1);
 	/*
 	 * We do not account these pages as surplus because they are only
 	 * temporary and will be released properly on the last reference
 	 */
-	SetHPageTemporary(page);
+	folio_set_hugetlb_temporary(folio);
 
-	return page;
+	return &folio->page;
 }
 
 /*
@@ -2745,54 +2741,52 @@ void restore_reserve_on_error(struct hstate *h, struct vm_area_struct *vma,
 }
 
 /*
- * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * alloc_and_dissolve_hugetlb_folio - Allocate a new folio and dissolve
+ * the old one
  * @h: struct hstate old page belongs to
- * @old_page: Old page to dissolve
+ * @old_folio: Old folio to dissolve
  * @list: List to isolate the page in case we need to
  * Returns 0 on success, otherwise negated error.
  */
-static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
-					struct list_head *list)
+static int alloc_and_dissolve_hugetlb_folio(struct hstate *h,
+			struct folio *old_folio, struct list_head *list)
 {
 	gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
-	struct folio *old_folio = page_folio(old_page);
 	int nid = folio_nid(old_folio);
-	struct page *new_page;
 	struct folio *new_folio;
 	int ret = 0;
 
 	/*
-	 * Before dissolving the page, we need to allocate a new one for the
-	 * pool to remain stable.  Here, we allocate the page and 'prep' it
+	 * Before dissolving the folio, we need to allocate a new one for the
+	 * pool to remain stable.  Here, we allocate the folio and 'prep' it
 	 * by doing everything but actually updating counters and adding to
 	 * the pool.  This simplifies and let us do most of the processing
 	 * under the lock.
 	 */
-	new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
-	if (!new_page)
+	new_folio = alloc_buddy_hugetlb_folio(h, gfp_mask, nid, NULL, NULL);
+	if (!new_folio)
 		return -ENOMEM;
-	new_folio = page_folio(new_page);
 	__prep_new_hugetlb_folio(h, new_folio);
 
 retry:
 	spin_lock_irq(&hugetlb_lock);
 	if (!folio_test_hugetlb(old_folio)) {
 		/*
-		 * Freed from under us. Drop new_page too.
+		 * Freed from under us. Drop new_folio too.
 		 */
 		goto free_new;
 	} else if (folio_ref_count(old_folio)) {
 		/*
-		 * Someone has grabbed the page, try to isolate it here.
+		 * Someone has grabbed the folio, try to isolate it here.
 		 * Fail with -EBUSY if not possible.
 		 */
 		spin_unlock_irq(&hugetlb_lock);
-		ret = isolate_hugetlb(old_page, list);
+		ret = isolate_hugetlb(&old_folio->page, list);
 		spin_lock_irq(&hugetlb_lock);
 		goto free_new;
 	} else if (!folio_test_hugetlb_freed(old_folio)) {
 		/*
-		 * Page's refcount is 0 but it has not been enqueued in the
+		 * Folio's refcount is 0 but it has not been enqueued in the
 		 * freelist yet. Race window is small, so we can succeed here if
 		 * we retry.
 		 */
@@ -2801,7 +2795,7 @@ retry:
 		goto retry;
 	} else {
 		/*
-		 * Ok, old_page is still a genuine free hugepage. Remove it from
+		 * Ok, old_folio is still a genuine free hugepage. Remove it from
 		 * the freelist and decrease the counters. These will be
 		 * incremented again when calling __prep_account_new_huge_page()
 		 * and enqueue_hugetlb_folio() for new_folio. The counters will
@@ -2810,14 +2804,14 @@ retry:
 		remove_hugetlb_folio(h, old_folio, false);
 
 		/*
-		 * Ref count on new page is already zero as it was dropped
+		 * Ref count on new_folio is already zero as it was dropped
 		 * earlier.  It can be directly added to the pool free list.
 		 */
 		__prep_account_new_huge_page(h, nid);
 		enqueue_hugetlb_folio(h, new_folio);
 
 		/*
-		 * Pages have been replaced, we can safely free the old one.
+		 * Folio has been replaced, we can safely free the old one.
 		 */
 		spin_unlock_irq(&hugetlb_lock);
 		update_and_free_hugetlb_folio(h, old_folio, false);
@@ -2827,7 +2821,7 @@ retry:
 
 free_new:
 	spin_unlock_irq(&hugetlb_lock);
-	/* Page has a zero ref count, but needs a ref to be freed */
+	/* Folio has a zero ref count, but needs a ref to be freed */
 	folio_ref_unfreeze(new_folio, 1);
 	update_and_free_hugetlb_folio(h, new_folio, false);
 
@@ -2865,7 +2859,7 @@ int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
 	if (folio_ref_count(folio) && !isolate_hugetlb(&folio->page, list))
 		ret = 0;
 	else if (!folio_ref_count(folio))
-		ret = alloc_and_dissolve_huge_page(h, &folio->page, list);
+		ret = alloc_and_dissolve_hugetlb_folio(h, folio, list);
 
 	return ret;
 }
@@ -3083,14 +3077,14 @@ static void __init hugetlb_hstate_alloc_pages_onenode(struct hstate *h, int nid)
 			if (!alloc_bootmem_huge_page(h, nid))
 				break;
 		} else {
-			struct page *page;
+			struct folio *folio;
 			gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
 
-			page = alloc_fresh_huge_page(h, gfp_mask, nid,
+			folio = alloc_fresh_hugetlb_folio(h, gfp_mask, nid,
 					&node_states[N_MEMORY], NULL);
-			if (!page)
+			if (!folio)
 				break;
-			free_huge_page(page); /* free it into the hugepage allocator */
+			free_huge_page(&folio->page); /* free it into the hugepage allocator */
 		}
 		cond_resched();
 	}
-- 
GitLab


From c8c7016f50c85688d71feea2dba1bd955d5f5358 Mon Sep 17 00:00:00 2001
From: Andrey Konovalov <andreyknvl@google.com>
Date: Wed, 30 Nov 2022 16:02:03 +0100
Subject: [PATCH 3721/4122] kasan: fail non-kasan KUnit tests on KASAN reports

After the recent changes done to KUnit-enabled KASAN tests, non-KASAN
KUnit tests stopped being failed when KASAN report is detected.

Recover that property by failing the currently running non-KASAN KUnit
test when KASAN detects and prints a report for a bad memory access.

Note that if the bad accesses happened in a kernel thread that doesn't
have a reference to the currently running KUnit-test available via
current->kunit_test, the test won't be failed.  This is a limitation of
KUnit, which doesn't yet provide a thread-agnostic way to find the
reference to the currenly running test.

Link: https://lkml.kernel.org/r/7be29a8ea967cee6b7e48d3d5a242d1d0bd96851.1669820505.git.andreyknvl@google.com
Fixes: 49d9977ac909 ("kasan: check CONFIG_KASAN_KUNIT_TEST instead of CONFIG_KUNIT")
Fixes: 7ce0ea19d50e ("kasan: switch kunit tests to console tracepoints")
Signed-off-by: Andrey Konovalov <andreyknvl@google.com>
Cc: Alexander Potapenko <glider@google.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: David Gow <davidgow@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kasan/kasan.h      | 12 ++++++++++
 mm/kasan/kasan_test.c |  4 ++++
 mm/kasan/report.c     | 53 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index a84491bc4867e..ea8cf1310b1e8 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -541,6 +541,18 @@ static inline bool kasan_arch_is_ready(void)	{ return true; }
 #error kasan_arch_is_ready only works in KASAN generic outline mode!
 #endif
 
+#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
+
+void kasan_kunit_test_suite_start(void);
+void kasan_kunit_test_suite_end(void);
+
+#else /* CONFIG_KASAN_KUNIT_TEST */
+
+static inline void kasan_kunit_test_suite_start(void) { }
+static inline void kasan_kunit_test_suite_end(void) { }
+
+#endif /* CONFIG_KASAN_KUNIT_TEST */
+
 #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST) || IS_ENABLED(CONFIG_KASAN_MODULE_TEST)
 
 bool kasan_save_enable_multi_shot(void);
diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
index e27591ef2777f..9aa892e7b76c3 100644
--- a/mm/kasan/kasan_test.c
+++ b/mm/kasan/kasan_test.c
@@ -76,6 +76,9 @@ static int kasan_suite_init(struct kunit_suite *suite)
 		return -1;
 	}
 
+	/* Stop failing KUnit tests on KASAN reports. */
+	kasan_kunit_test_suite_start();
+
 	/*
 	 * Temporarily enable multi-shot mode. Otherwise, KASAN would only
 	 * report the first detected bug and panic the kernel if panic_on_warn
@@ -94,6 +97,7 @@ static int kasan_suite_init(struct kunit_suite *suite)
 
 static void kasan_suite_exit(struct kunit_suite *suite)
 {
+	kasan_kunit_test_suite_end();
 	kasan_restore_multi_shot(multishot);
 	for_each_kernel_tracepoint(unregister_tracepoints, NULL);
 	tracepoint_synchronize_unregister();
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 31355851a5ec3..f2db8605ee0ff 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -9,6 +9,7 @@
  *        Andrey Konovalov <andreyknvl@gmail.com>
  */
 
+#include <kunit/test.h>
 #include <linux/bitops.h>
 #include <linux/ftrace.h>
 #include <linux/init.h>
@@ -112,10 +113,62 @@ EXPORT_SYMBOL_GPL(kasan_restore_multi_shot);
 
 #endif
 
+#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
+
+/*
+ * Whether the KASAN KUnit test suite is currently being executed.
+ * Updated in kasan_test.c.
+ */
+bool kasan_kunit_executing;
+
+void kasan_kunit_test_suite_start(void)
+{
+	WRITE_ONCE(kasan_kunit_executing, true);
+}
+EXPORT_SYMBOL_GPL(kasan_kunit_test_suite_start);
+
+void kasan_kunit_test_suite_end(void)
+{
+	WRITE_ONCE(kasan_kunit_executing, false);
+}
+EXPORT_SYMBOL_GPL(kasan_kunit_test_suite_end);
+
+static bool kasan_kunit_test_suite_executing(void)
+{
+	return READ_ONCE(kasan_kunit_executing);
+}
+
+#else /* CONFIG_KASAN_KUNIT_TEST */
+
+static inline bool kasan_kunit_test_suite_executing(void) { return false; }
+
+#endif /* CONFIG_KASAN_KUNIT_TEST */
+
+#if IS_ENABLED(CONFIG_KUNIT)
+
+static void fail_non_kasan_kunit_test(void)
+{
+	struct kunit *test;
+
+	if (kasan_kunit_test_suite_executing())
+		return;
+
+	test = current->kunit_test;
+	if (test)
+		kunit_set_failure(test);
+}
+
+#else /* CONFIG_KUNIT */
+
+static inline void fail_non_kasan_kunit_test(void) { }
+
+#endif /* CONFIG_KUNIT */
+
 static DEFINE_SPINLOCK(report_lock);
 
 static void start_report(unsigned long *flags, bool sync)
 {
+	fail_non_kasan_kunit_test();
 	/* Respect the /proc/sys/kernel/traceoff_on_warning interface. */
 	disable_trace_on_warning();
 	/* Do not allow LOCKDEP mangling KASAN reports. */
-- 
GitLab


From 0b7623bdf89b9f6d320784e929acb78291aaf5f6 Mon Sep 17 00:00:00 2001
From: SeongJae Park <sj@kernel.org>
Date: Thu, 1 Dec 2022 17:08:34 +0000
Subject: [PATCH 3722/4122] selftests/damon: test removed scheme sysfs dir
 access bug

A DAMON sysfs user could start DAMON with a scheme, remove the sysfs
directory for the scheme, and then ask stats or schemes tried regions
update.  The related logic were not aware of the already removed directory
situation, so it was able to results in invalid memory accesses.  The fix
has made with commit 8468b486612c ("mm/damon/sysfs-schemes: skip stats
update if the scheme directory is removed"), though.  Add a selftest to
prevent such kinds of bugs from being introduced again.

Link: https://lkml.kernel.org/r/20221201170834.62823-1-sj@kernel.org
Signed-off-by: SeongJae Park <sj@kernel.org>
Cc: Shuah Khan <shuah@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/damon/Makefile        |  2 +-
 .../damon/sysfs_update_removed_scheme_dir.sh  | 58 +++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh

diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 838a8e49f77b9..b71247ba71969 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -8,7 +8,7 @@ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
 TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
 TEST_PROGS += debugfs_duplicate_context_creation.sh
 TEST_PROGS += debugfs_rm_non_contexts.sh
-TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
 TEST_PROGS += reclaim.sh lru_sort.sh
 
 include ../lib.mk
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
new file mode 100644
index 0000000000000..ade35576e7487
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+	echo "Run as root"
+	exit $ksft_skip
+fi
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+	echo "damon sysfs not found"
+	exit $ksft_skip
+fi
+
+# clear log
+dmesg -C
+
+# start DAMON with a scheme
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets"
+echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0"
+echo 4096000 > "$scheme_dir/access_pattern/sz/max"
+echo 20 > "$scheme_dir/access_pattern/nr_accesses/max"
+echo 1024 > "$scheme_dir/access_pattern/age/max"
+echo "on" > "$damon_sysfs/kdamonds/0/state"
+sleep 0.3
+
+# remove scheme sysfs dir
+echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+
+# try to update stat of already removed scheme sysfs dir
+echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+	echo "update_schemes_stats triggers a kernel bug"
+	dmesg
+	exit 1
+fi
+
+# try to update tried regions of already removed scheme sysfs dir
+echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+	echo "update_schemes_tried_regions triggers a kernel bug"
+	dmesg
+	exit 1
+fi
+
+echo "off" > "$damon_sysfs/kdamonds/0/state"
-- 
GitLab


From 169004265860327182ecf92297b25b6271e81e96 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:28:51 +0000
Subject: [PATCH 3723/4122] fsdax: introduce page->share for fsdax in reflink
 mode

Patch series "fsdax,xfs: fix warning messages", v2.

Many testcases failed in dax+reflink mode with warning message in dmesg.
Such as generic/051,075,127.  The warning message is like this:
[  775.509337] ------------[ cut here ]------------
[  775.509636] WARNING: CPU: 1 PID: 16815 at fs/dax.c:386 dax_insert_entry.cold+0x2e/0x69
[  775.510151] Modules linked in: auth_rpcgss oid_registry nfsv4 algif_hash af_alg af_packet nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat iptable_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set nf_tables nfnetlink ip6table_filter ip6_tables iptable_filter ip_tables x_tables dax_pmem nd_pmem nd_btt sch_fq_codel configfs xfs libcrc32c fuse
[  775.524288] CPU: 1 PID: 16815 Comm: fsx Kdump: loaded Tainted: G        W          6.1.0-rc4+ #164 eb34e4ee4200c7cbbb47de2b1892c5a3e027fd6d
[  775.524904] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Arch Linux 1.16.0-3-3 04/01/2014
[  775.525460] RIP: 0010:dax_insert_entry.cold+0x2e/0x69
[  775.525797] Code: c7 c7 18 eb e0 81 48 89 4c 24 20 48 89 54 24 10 e8 73 6d ff ff 48 83 7d 18 00 48 8b 54 24 10 48 8b 4c 24 20 0f 84 e3 e9 b9 ff <0f> 0b e9 dc e9 b9 ff 48 c7 c6 a0 20 c3 81 48 c7 c7 f0 ea e0 81 48
[  775.526708] RSP: 0000:ffffc90001d57b30 EFLAGS: 00010082
[  775.527042] RAX: 000000000000002a RBX: 0000000000000000 RCX: 0000000000000042
[  775.527396] RDX: ffffea000a0f6c80 RSI: ffffffff81dfab1b RDI: 00000000ffffffff
[  775.527819] RBP: ffffea000a0f6c40 R08: 0000000000000000 R09: ffffffff820625e0
[  775.528241] R10: ffffc90001d579d8 R11: ffffffff820d2628 R12: ffff88815fc98320
[  775.528598] R13: ffffc90001d57c18 R14: 0000000000000000 R15: 0000000000000001
[  775.528997] FS:  00007f39fc75d740(0000) GS:ffff88817bc80000(0000) knlGS:0000000000000000
[  775.529474] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  775.529800] CR2: 00007f39fc772040 CR3: 0000000107eb6001 CR4: 00000000003706e0
[  775.530214] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  775.530592] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  775.531002] Call Trace:
[  775.531230]  <TASK>
[  775.531444]  dax_fault_iter+0x267/0x6c0
[  775.531719]  dax_iomap_pte_fault+0x198/0x3d0
[  775.532002]  __xfs_filemap_fault+0x24a/0x2d0 [xfs aa8d25411432b306d9554da38096f4ebb86bdfe7]
[  775.532603]  __do_fault+0x30/0x1e0
[  775.532903]  do_fault+0x314/0x6c0
[  775.533166]  __handle_mm_fault+0x646/0x1250
[  775.533480]  handle_mm_fault+0xc1/0x230
[  775.533810]  do_user_addr_fault+0x1ac/0x610
[  775.534110]  exc_page_fault+0x63/0x140
[  775.534389]  asm_exc_page_fault+0x22/0x30
[  775.534678] RIP: 0033:0x7f39fc55820a
[  775.534950] Code: 00 01 00 00 00 74 99 83 f9 c0 0f 87 7b fe ff ff c5 fe 6f 4e 20 48 29 fe 48 83 c7 3f 49 8d 0c 10 48 83 e7 c0 48 01 fe 48 29 f9 <f3> a4 c4 c1 7e 7f 00 c4 c1 7e 7f 48 20 c5 f8 77 c3 0f 1f 44 00 00
[  775.535839] RSP: 002b:00007ffc66a08118 EFLAGS: 00010202
[  775.536157] RAX: 00007f39fc772001 RBX: 0000000000042001 RCX: 00000000000063c1
[  775.536537] RDX: 0000000000006400 RSI: 00007f39fac42050 RDI: 00007f39fc772040
[  775.536919] RBP: 0000000000006400 R08: 00007f39fc772001 R09: 0000000000042000
[  775.537304] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000001
[  775.537694] R13: 00007f39fc772000 R14: 0000000000006401 R15: 0000000000000003
[  775.538086]  </TASK>
[  775.538333] ---[ end trace 0000000000000000 ]---

This also affects dax+noreflink mode if we run the test after a
dax+reflink test.  So, the most urgent thing is solving the warning
messages.

With these fixes, most warning messages in dax_associate_entry() are gone.
But honestly, generic/388 will randomly failed with the warning.  The
case shutdown the xfs when fsstress is running, and do it for many times.
I think the reason is that dax pages in use are not able to be invalidated
in time when fs is shutdown.  The next time dax page to be associated, it
still remains the mapping value set last time.  I'll keep on solving it.

The warning message in dax_writeback_one() can also be fixed because of
the dax unshare.


This patch (of 8):

fsdax page is used not only when CoW, but also mapread.  To make the it
easily understood, use 'share' to indicate that the dax page is shared by
more than one extent.  And add helper functions to use it.

Also, the flag needs to be renamed to PAGE_MAPPING_DAX_SHARED.

[ruansy.fnst@fujitsu.com: rename several functions]
  Link: https://lkml.kernel.org/r/1669972991-246-1-git-send-email-ruansy.fnst@fujitsu.com
[ruansy.fnst@fujitsu.com: v2.2]
  Link: https://lkml.kernel.org/r/1670381359-53-1-git-send-email-ruansy.fnst@fujitsu.com
Link: https://lkml.kernel.org/r/1669908538-55-1-git-send-email-ruansy.fnst@fujitsu.com
Link: https://lkml.kernel.org/r/1669908538-55-2-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c                   | 38 ++++++++++++++++++++++----------------
 include/linux/mm_types.h   |  5 ++++-
 include/linux/page-flags.h |  2 +-
 3 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 1c6867810cbd6..84fadea087051 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -334,35 +334,41 @@ static unsigned long dax_end_pfn(void *entry)
 	for (pfn = dax_to_pfn(entry); \
 			pfn < dax_end_pfn(entry); pfn++)
 
-static inline bool dax_mapping_is_cow(struct address_space *mapping)
+static inline bool dax_page_is_shared(struct page *page)
 {
-	return (unsigned long)mapping == PAGE_MAPPING_DAX_COW;
+	return page->mapping == PAGE_MAPPING_DAX_SHARED;
 }
 
 /*
- * Set the page->mapping with FS_DAX_MAPPING_COW flag, increase the refcount.
+ * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the
+ * refcount.
  */
-static inline void dax_mapping_set_cow(struct page *page)
+static inline void dax_page_share_get(struct page *page)
 {
-	if ((uintptr_t)page->mapping != PAGE_MAPPING_DAX_COW) {
+	if (page->mapping != PAGE_MAPPING_DAX_SHARED) {
 		/*
 		 * Reset the index if the page was already mapped
 		 * regularly before.
 		 */
 		if (page->mapping)
-			page->index = 1;
-		page->mapping = (void *)PAGE_MAPPING_DAX_COW;
+			page->share = 1;
+		page->mapping = PAGE_MAPPING_DAX_SHARED;
 	}
-	page->index++;
+	page->share++;
+}
+
+static inline unsigned long dax_page_share_put(struct page *page)
+{
+	return --page->share;
 }
 
 /*
- * When it is called in dax_insert_entry(), the cow flag will indicate that
+ * When it is called in dax_insert_entry(), the shared flag will indicate that
  * whether this entry is shared by multiple files.  If so, set the page->mapping
- * FS_DAX_MAPPING_COW, and use page->index as refcount.
+ * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount.
  */
 static void dax_associate_entry(void *entry, struct address_space *mapping,
-		struct vm_area_struct *vma, unsigned long address, bool cow)
+		struct vm_area_struct *vma, unsigned long address, bool shared)
 {
 	unsigned long size = dax_entry_size(entry), pfn, index;
 	int i = 0;
@@ -374,8 +380,8 @@ static void dax_associate_entry(void *entry, struct address_space *mapping,
 	for_each_mapped_pfn(entry, pfn) {
 		struct page *page = pfn_to_page(pfn);
 
-		if (cow) {
-			dax_mapping_set_cow(page);
+		if (shared) {
+			dax_page_share_get(page);
 		} else {
 			WARN_ON_ONCE(page->mapping);
 			page->mapping = mapping;
@@ -396,9 +402,9 @@ static void dax_disassociate_entry(void *entry, struct address_space *mapping,
 		struct page *page = pfn_to_page(pfn);
 
 		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
-		if (dax_mapping_is_cow(page->mapping)) {
-			/* keep the CoW flag if this page is still shared */
-			if (page->index-- > 0)
+		if (dax_page_is_shared(page)) {
+			/* keep the shared flag if this page is still shared */
+			if (dax_page_share_put(page) > 0)
 				continue;
 		} else
 			WARN_ON_ONCE(page->mapping && page->mapping != mapping);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 199f98be6f9cd..3b8475007734d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -104,7 +104,10 @@ struct page {
 			};
 			/* See page-flags.h for PAGE_MAPPING_FLAGS */
 			struct address_space *mapping;
-			pgoff_t index;		/* Our offset within mapping. */
+			union {
+				pgoff_t index;		/* Our offset within mapping. */
+				unsigned long share;	/* share count for fsdax */
+			};
 			/**
 			 * @private: Mapping-private opaque data.
 			 * Usually used for buffer_heads if PagePrivate.
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index e42c55a7e0129..9aec9fd8c50bd 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -638,7 +638,7 @@ PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
  * Different with flags above, this flag is used only for fsdax mode.  It
  * indicates that this page->mapping is now under reflink case.
  */
-#define PAGE_MAPPING_DAX_COW	0x1
+#define PAGE_MAPPING_DAX_SHARED	((void *)0x1)
 
 static __always_inline bool folio_mapping_flags(struct folio *folio)
 {
-- 
GitLab


From f80e1668888f34c0764822e74953c997daf2ccdb Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:28:52 +0000
Subject: [PATCH 3724/4122] fsdax: invalidate pages when CoW

CoW changes the share state of a dax page, but the share count of the page
isn't updated.  The next time access this page, it should have been a
newly accessed, but old association exists.  So, we need to clear the
share state when CoW happens, in both dax_iomap_rw() and dax_zero_iter().

Link: https://lkml.kernel.org/r/1669908538-55-3-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 84fadea087051..c975d075e77b0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1264,6 +1264,15 @@ static s64 dax_zero_iter(struct iomap_iter *iter, bool *did_zero)
 	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
 		return length;
 
+	/*
+	 * invalidate the pages whose sharing state is to be changed
+	 * because of CoW.
+	 */
+	if (iomap->flags & IOMAP_F_SHARED)
+		invalidate_inode_pages2_range(iter->inode->i_mapping,
+					      pos >> PAGE_SHIFT,
+					      (pos + length - 1) >> PAGE_SHIFT);
+
 	do {
 		unsigned offset = offset_in_page(pos);
 		unsigned size = min_t(u64, PAGE_SIZE - offset, length);
@@ -1324,12 +1333,13 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		struct iov_iter *iter)
 {
 	const struct iomap *iomap = &iomi->iomap;
-	const struct iomap *srcmap = &iomi->srcmap;
+	const struct iomap *srcmap = iomap_iter_srcmap(iomi);
 	loff_t length = iomap_length(iomi);
 	loff_t pos = iomi->pos;
 	struct dax_device *dax_dev = iomap->dax_dev;
 	loff_t end = pos + length, done = 0;
 	bool write = iov_iter_rw(iter) == WRITE;
+	bool cow = write && iomap->flags & IOMAP_F_SHARED;
 	ssize_t ret = 0;
 	size_t xfer;
 	int id;
@@ -1356,7 +1366,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 	 * into page tables. We have to tear down these mappings so that data
 	 * written by write(2) is visible in mmap.
 	 */
-	if (iomap->flags & IOMAP_F_NEW) {
+	if (iomap->flags & IOMAP_F_NEW || cow) {
 		invalidate_inode_pages2_range(iomi->inode->i_mapping,
 					      pos >> PAGE_SHIFT,
 					      (end - 1) >> PAGE_SHIFT);
@@ -1390,8 +1400,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 			break;
 		}
 
-		if (write &&
-		    srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
+		if (cow) {
 			ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
 						 kaddr);
 			if (ret)
-- 
GitLab


From 708dfad2eb4169324189782edd6d3763237e0489 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:28:53 +0000
Subject: [PATCH 3725/4122] fsdax: zero the edges if source is HOLE or
 UNWRITTEN

If srcmap contains invalid data, such as HOLE and UNWRITTEN, the dest page
should be zeroed.  Otherwise, since it's a pmem, old data may remains on
the dest page, the result of CoW will be incorrect.

The function name is also not easy to understand, rename it to
"dax_iomap_copy_around()", which means it copies data around the range.

[akpm@linux-foundation.org: update dax_iomap_copy_around() kerneldoc, per Darrick]
Link: https://lkml.kernel.org/r/1669973145-318-1-git-send-email-ruansy.fnst@fujitsu.com
Link: https://lkml.kernel.org/r/1669908538-55-4-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Allison Henderson <allison.henderson@oracle.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c | 79 +++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index c975d075e77b0..359b958eb835d 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1092,7 +1092,8 @@ out:
 }
 
 /**
- * dax_iomap_cow_copy - Copy the data from source to destination before write
+ * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page
+ * by copying the data before and after the range to be written.
  * @pos:	address to do copy from.
  * @length:	size of copy operation.
  * @align_size:	aligned w.r.t align_size (either PMD_SIZE or PAGE_SIZE)
@@ -1101,35 +1102,50 @@ out:
  *
  * This can be called from two places. Either during DAX write fault (page
  * aligned), to copy the length size data to daddr. Or, while doing normal DAX
- * write operation, dax_iomap_actor() might call this to do the copy of either
+ * write operation, dax_iomap_iter() might call this to do the copy of either
  * start or end unaligned address. In the latter case the rest of the copy of
- * aligned ranges is taken care by dax_iomap_actor() itself.
+ * aligned ranges is taken care by dax_iomap_iter() itself.
+ * If the srcmap contains invalid data, such as HOLE and UNWRITTEN, zero the
+ * area to make sure no old data remains.
  */
-static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
+static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
 		const struct iomap *srcmap, void *daddr)
 {
 	loff_t head_off = pos & (align_size - 1);
 	size_t size = ALIGN(head_off + length, align_size);
 	loff_t end = pos + length;
 	loff_t pg_end = round_up(end, align_size);
+	/* copy_all is usually in page fault case */
 	bool copy_all = head_off == 0 && end == pg_end;
+	/* zero the edges if srcmap is a HOLE or IOMAP_UNWRITTEN */
+	bool zero_edge = srcmap->flags & IOMAP_F_SHARED ||
+			 srcmap->type == IOMAP_UNWRITTEN;
 	void *saddr = 0;
 	int ret = 0;
 
-	ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
-	if (ret)
-		return ret;
+	if (!zero_edge) {
+		ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
+		if (ret)
+			return ret;
+	}
 
 	if (copy_all) {
-		ret = copy_mc_to_kernel(daddr, saddr, length);
-		return ret ? -EIO : 0;
+		if (zero_edge)
+			memset(daddr, 0, size);
+		else
+			ret = copy_mc_to_kernel(daddr, saddr, length);
+		goto out;
 	}
 
 	/* Copy the head part of the range */
 	if (head_off) {
-		ret = copy_mc_to_kernel(daddr, saddr, head_off);
-		if (ret)
-			return -EIO;
+		if (zero_edge)
+			memset(daddr, 0, head_off);
+		else {
+			ret = copy_mc_to_kernel(daddr, saddr, head_off);
+			if (ret)
+				return -EIO;
+		}
 	}
 
 	/* Copy the tail part of the range */
@@ -1137,12 +1153,19 @@ static int dax_iomap_cow_copy(loff_t pos, uint64_t length, size_t align_size,
 		loff_t tail_off = head_off + length;
 		loff_t tail_len = pg_end - end;
 
-		ret = copy_mc_to_kernel(daddr + tail_off, saddr + tail_off,
-					tail_len);
-		if (ret)
-			return -EIO;
+		if (zero_edge)
+			memset(daddr + tail_off, 0, tail_len);
+		else {
+			ret = copy_mc_to_kernel(daddr + tail_off,
+						saddr + tail_off, tail_len);
+			if (ret)
+				return -EIO;
+		}
 	}
-	return 0;
+out:
+	if (zero_edge)
+		dax_flush(srcmap->dax_dev, daddr, size);
+	return ret ? -EIO : 0;
 }
 
 /*
@@ -1241,13 +1264,10 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
 	if (ret < 0)
 		return ret;
 	memset(kaddr + offset, 0, size);
-	if (srcmap->addr != iomap->addr) {
-		ret = dax_iomap_cow_copy(pos, size, PAGE_SIZE, srcmap,
-					 kaddr);
-		if (ret < 0)
-			return ret;
-		dax_flush(iomap->dax_dev, kaddr, PAGE_SIZE);
-	} else
+	if (iomap->flags & IOMAP_F_SHARED)
+		ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
+					    kaddr);
+	else
 		dax_flush(iomap->dax_dev, kaddr + offset, size);
 	return ret;
 }
@@ -1401,8 +1421,8 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
 		}
 
 		if (cow) {
-			ret = dax_iomap_cow_copy(pos, length, PAGE_SIZE, srcmap,
-						 kaddr);
+			ret = dax_iomap_copy_around(pos, length, PAGE_SIZE,
+						    srcmap, kaddr);
 			if (ret)
 				break;
 		}
@@ -1547,7 +1567,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
 		struct xa_state *xas, void **entry, bool pmd)
 {
 	const struct iomap *iomap = &iter->iomap;
-	const struct iomap *srcmap = &iter->srcmap;
+	const struct iomap *srcmap = iomap_iter_srcmap(iter);
 	size_t size = pmd ? PMD_SIZE : PAGE_SIZE;
 	loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
 	bool write = iter->flags & IOMAP_WRITE;
@@ -1578,9 +1598,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
 
 	*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, entry_flags);
 
-	if (write &&
-	    srcmap->type != IOMAP_HOLE && srcmap->addr != iomap->addr) {
-		err = dax_iomap_cow_copy(pos, size, size, srcmap, kaddr);
+	if (write && iomap->flags & IOMAP_F_SHARED) {
+		err = dax_iomap_copy_around(pos, size, size, srcmap, kaddr);
 		if (err)
 			return dax_fault_return(err);
 	}
-- 
GitLab


From c6f0b395b2110aa26a134a9a395875b1ec0a5aae Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:28:54 +0000
Subject: [PATCH 3726/4122] fsdax,xfs: set the shared flag when file extent is
 shared

If a dax page is shared, mapread at different offsets can also trigger
page fault on same dax page.  So, change the flag from "cow" to "shared".
And get the shared flag from filesystem when read.

Link: https://lkml.kernel.org/r/1669908538-55-5-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c           | 19 +++++++------------
 fs/xfs/xfs_iomap.c |  2 +-
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 359b958eb835d..fa547ce41add4 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -846,12 +846,6 @@ static bool dax_fault_is_synchronous(const struct iomap_iter *iter,
 		(iter->iomap.flags & IOMAP_F_DIRTY);
 }
 
-static bool dax_fault_is_cow(const struct iomap_iter *iter)
-{
-	return (iter->flags & IOMAP_WRITE) &&
-		(iter->iomap.flags & IOMAP_F_SHARED);
-}
-
 /*
  * By this point grab_mapping_entry() has ensured that we have a locked entry
  * of the appropriate size so we don't have to worry about downgrading PMDs to
@@ -865,13 +859,14 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
 {
 	struct address_space *mapping = vmf->vma->vm_file->f_mapping;
 	void *new_entry = dax_make_entry(pfn, flags);
-	bool dirty = !dax_fault_is_synchronous(iter, vmf->vma);
-	bool cow = dax_fault_is_cow(iter);
+	bool write = iter->flags & IOMAP_WRITE;
+	bool dirty = write && !dax_fault_is_synchronous(iter, vmf->vma);
+	bool shared = iter->iomap.flags & IOMAP_F_SHARED;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
 
-	if (cow || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
+	if (shared || (dax_is_zero_entry(entry) && !(flags & DAX_ZERO_PAGE))) {
 		unsigned long index = xas->xa_index;
 		/* we are replacing a zero page with block mapping */
 		if (dax_is_pmd_entry(entry))
@@ -883,12 +878,12 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
 
 	xas_reset(xas);
 	xas_lock_irq(xas);
-	if (cow || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
+	if (shared || dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		void *old;
 
 		dax_disassociate_entry(entry, mapping, false);
 		dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
-				cow);
+				shared);
 		/*
 		 * Only swap our new entry into the page cache if the current
 		 * entry is a zero page or an empty entry.  If a normal PTE or
@@ -908,7 +903,7 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
 	if (dirty)
 		xas_set_mark(xas, PAGECACHE_TAG_DIRTY);
 
-	if (cow)
+	if (write && shared)
 		xas_set_mark(xas, PAGECACHE_TAG_TOWRITE);
 
 	xas_unlock_irq(xas);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 07da03976ec12..881de99766cad 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1215,7 +1215,7 @@ xfs_read_iomap_begin(
 		return error;
 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
 			       &nimaps, 0);
-	if (!error && (flags & IOMAP_REPORT))
+	if (!error && ((flags & IOMAP_REPORT) || IS_DAX(inode)))
 		error = xfs_reflink_trim_around_shared(ip, &imap, &shared);
 	xfs_iunlock(ip, lockmode);
 
-- 
GitLab


From 0e79e3736d54bb8efbc9fb29cc3b54a132783565 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:31:41 +0000
Subject: [PATCH 3727/4122] fsdax: dedupe: iter two files at the same time

The iomap_iter() on a range of one file may loop more than once.  In this
case, the inner dst_iter can update its iomap but the outer src_iter
can't.  This may cause the wrong remapping in filesystem.  Let them called
at the same time.

Link: https://lkml.kernel.org/r/1669908701-93-1-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index fa547ce41add4..8fb928cd9dce5 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1965,15 +1965,15 @@ int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
 		.len		= len,
 		.flags		= IOMAP_DAX,
 	};
-	int ret;
+	int ret, compared = 0;
 
-	while ((ret = iomap_iter(&src_iter, ops)) > 0) {
-		while ((ret = iomap_iter(&dst_iter, ops)) > 0) {
-			dst_iter.processed = dax_range_compare_iter(&src_iter,
-						&dst_iter, len, same);
-		}
-		if (ret <= 0)
-			src_iter.processed = ret;
+	while ((ret = iomap_iter(&src_iter, ops)) > 0 &&
+	       (ret = iomap_iter(&dst_iter, ops)) > 0) {
+		compared = dax_range_compare_iter(&src_iter, &dst_iter, len,
+						  same);
+		if (compared < 0)
+			return ret;
+		src_iter.processed = dst_iter.processed = compared;
 	}
 	return ret;
 }
-- 
GitLab


From 64e6edc185da7e101e867c4732c097fedb1da08e Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:32:10 +0000
Subject: [PATCH 3728/4122] xfs: use dax ops for zero and truncate in fsdax
 mode

Zero and truncate on a dax file may execute CoW.  So use dax ops which
contains end work for CoW.

Link: https://lkml.kernel.org/r/1669908730-131-1-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/xfs/xfs_iomap.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 881de99766cad..d9401d0300ad1 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1370,7 +1370,7 @@ xfs_zero_range(
 
 	if (IS_DAX(inode))
 		return dax_zero_range(inode, pos, len, did_zero,
-				      &xfs_direct_write_iomap_ops);
+				      &xfs_dax_write_iomap_ops);
 	return iomap_zero_range(inode, pos, len, did_zero,
 				&xfs_buffered_write_iomap_ops);
 }
@@ -1385,7 +1385,7 @@ xfs_truncate_page(
 
 	if (IS_DAX(inode))
 		return dax_truncate_page(inode, pos, did_zero,
-					&xfs_direct_write_iomap_ops);
+					&xfs_dax_write_iomap_ops);
 	return iomap_truncate_page(inode, pos, did_zero,
 				   &xfs_buffered_write_iomap_ops);
 }
-- 
GitLab


From d984648e428bf88cbd94ebe346c73632cb92fffb Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:32:33 +0000
Subject: [PATCH 3729/4122] fsdax,xfs: port unshare to fsdax

Implement unshare in fsdax mode: copy data from srcmap to iomap.

Link: https://lkml.kernel.org/r/1669908753-169-1-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/dax.c             | 52 ++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.c |  8 +++++--
 include/linux/dax.h  |  2 ++
 3 files changed, 60 insertions(+), 2 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index 8fb928cd9dce5..c48a3a93ab297 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1245,6 +1245,58 @@ static vm_fault_t dax_pmd_load_hole(struct xa_state *xas, struct vm_fault *vmf,
 }
 #endif /* CONFIG_FS_DAX_PMD */
 
+static s64 dax_unshare_iter(struct iomap_iter *iter)
+{
+	struct iomap *iomap = &iter->iomap;
+	const struct iomap *srcmap = iomap_iter_srcmap(iter);
+	loff_t pos = iter->pos;
+	loff_t length = iomap_length(iter);
+	int id = 0;
+	s64 ret = 0;
+	void *daddr = NULL, *saddr = NULL;
+
+	/* don't bother with blocks that are not shared to start with */
+	if (!(iomap->flags & IOMAP_F_SHARED))
+		return length;
+	/* don't bother with holes or unwritten extents */
+	if (srcmap->type == IOMAP_HOLE || srcmap->type == IOMAP_UNWRITTEN)
+		return length;
+
+	id = dax_read_lock();
+	ret = dax_iomap_direct_access(iomap, pos, length, &daddr, NULL);
+	if (ret < 0)
+		goto out_unlock;
+
+	ret = dax_iomap_direct_access(srcmap, pos, length, &saddr, NULL);
+	if (ret < 0)
+		goto out_unlock;
+
+	ret = copy_mc_to_kernel(daddr, saddr, length);
+	if (ret)
+		ret = -EIO;
+
+out_unlock:
+	dax_read_unlock(id);
+	return ret;
+}
+
+int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
+		const struct iomap_ops *ops)
+{
+	struct iomap_iter iter = {
+		.inode		= inode,
+		.pos		= pos,
+		.len		= len,
+		.flags		= IOMAP_WRITE | IOMAP_UNSHARE | IOMAP_DAX,
+	};
+	int ret;
+
+	while ((ret = iomap_iter(&iter, ops)) > 0)
+		iter.processed = dax_unshare_iter(&iter);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dax_file_unshare);
+
 static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
 {
 	const struct iomap *iomap = &iter->iomap;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 93bdd25680bc9..fe46bce8cae63 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -1693,8 +1693,12 @@ xfs_reflink_unshare(
 
 	inode_dio_wait(inode);
 
-	error = iomap_file_unshare(inode, offset, len,
-			&xfs_buffered_write_iomap_ops);
+	if (IS_DAX(inode))
+		error = dax_file_unshare(inode, offset, len,
+				&xfs_dax_write_iomap_ops);
+	else
+		error = iomap_file_unshare(inode, offset, len,
+				&xfs_buffered_write_iomap_ops);
 	if (error)
 		goto out;
 
diff --git a/include/linux/dax.h b/include/linux/dax.h
index ba985333e26bf..2b5ecb5910591 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -205,6 +205,8 @@ static inline void dax_unlock_mapping_entry(struct address_space *mapping,
 }
 #endif
 
+int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
+		const struct iomap_ops *ops);
 int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero,
 		const struct iomap_ops *ops);
 int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero,
-- 
GitLab


From 480017957d6380d3336a8e80ad90f70415bb86f7 Mon Sep 17 00:00:00 2001
From: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Date: Thu, 1 Dec 2022 15:32:53 +0000
Subject: [PATCH 3730/4122] xfs: remove restrictions for fsdax and reflink

Since the basic function for fsdax and reflink has been implemented,
remove the restrictions of them for widly test.

Link: https://lkml.kernel.org/r/1669908773-207-1-git-send-email-ruansy.fnst@fujitsu.com
Signed-off-by: Shiyang Ruan <ruansy.fnst@fujitsu.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/xfs/xfs_ioctl.c | 4 ----
 fs/xfs/xfs_iops.c  | 4 ----
 2 files changed, 8 deletions(-)

diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 1f783e9796296..13f1b2add3904 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1138,10 +1138,6 @@ xfs_ioctl_setattr_xflags(
 	if ((fa->fsx_xflags & FS_XFLAG_REALTIME) && xfs_is_reflink_inode(ip))
 		ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
 
-	/* Don't allow us to set DAX mode for a reflinked file for now. */
-	if ((fa->fsx_xflags & FS_XFLAG_DAX) && xfs_is_reflink_inode(ip))
-		return -EINVAL;
-
 	/* diflags2 only valid for v3 inodes. */
 	i_flags2 = xfs_flags2diflags2(ip, fa->fsx_xflags);
 	if (i_flags2 && !xfs_has_v3inodes(mp))
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 2e10e1c66ad60..bf0495f7a5e1c 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1185,10 +1185,6 @@ xfs_inode_supports_dax(
 	if (!S_ISREG(VFS_I(ip)->i_mode))
 		return false;
 
-	/* Only supported on non-reflinked files. */
-	if (xfs_is_reflink_inode(ip))
-		return false;
-
 	/* Block size must match page size */
 	if (mp->m_sb.sb_blocksize != PAGE_SIZE)
 		return false;
-- 
GitLab


From ac4b2901a112e4dcee1455c96d89ef83cc7aa545 Mon Sep 17 00:00:00 2001
From: Deyan Wang <wonder_rock@126.com>
Date: Thu, 1 Dec 2022 21:50:45 +0800
Subject: [PATCH 3731/4122] mm/page_alloc: update comments in __free_pages_ok()

Add a comment to explain why we call get_pfnblock_migratetype() twice in
__free_pages_ok().

Link: https://lkml.kernel.org/r/20221201135045.31663-1-wonder_rock@126.com
Signed-off-by: Deyan Wang <wonder_rock@126.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/page_alloc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5ab9dd29ef7ec..0745aedebb371 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1702,6 +1702,11 @@ static void __free_pages_ok(struct page *page, unsigned int order,
 	if (!free_pages_prepare(page, order, true, fpi_flags))
 		return;
 
+	/*
+	 * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here
+	 * is used to avoid calling get_pfnblock_migratetype() under the lock.
+	 * This will reduce the lock holding time.
+	 */
 	migratetype = get_pfnblock_migratetype(page, pfn);
 
 	spin_lock_irqsave(&zone->lock, flags);
-- 
GitLab


From a11774122180a782b327b0a9a5091d99c91a4db7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:38 +0100
Subject: [PATCH 3732/4122] extfat: remove ->writepage

Patch series "start removing writepage instances v2".

The VM doesn't need or want ->writepage for writeback and is fine with
just having ->writepages as long as ->migrate_folio is implemented.

This series removes all ->writepage instances that use
block_write_full_page directly and also have a plain mpage_writepages
based ->writepages.


This patch (of 7):

->writepage is a very inefficient method to write back data, and only used
through write_cache_pages or a a fallback when no ->migrate_folio method
is present.

Set ->migrate_folio to the generic buffer_head based helper, and remove
the ->writepage implementation.

Link: https://lkml.kernel.org/r/20221202102644.770505-1-hch@lst.de
Link: https://lkml.kernel.org/r/20221202102644.770505-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Namjae Jeon <linkinjeon@kernel.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Bob Copeland <me@bobcopeland.com>
Cc: Dave Kleikamp <shaggy@kernel.org>
Cc: Jan Kara <jack@suse.com>
Cc: Mikulas Patocka <mikulas@artax.karlin.mff.cuni.cz>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/exfat/inode.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c
index 5590a1e83126c..eac95bcd9a8aa 100644
--- a/fs/exfat/inode.c
+++ b/fs/exfat/inode.c
@@ -345,11 +345,6 @@ static void exfat_readahead(struct readahead_control *rac)
 	mpage_readahead(rac, exfat_get_block);
 }
 
-static int exfat_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, exfat_get_block, wbc);
-}
-
 static int exfat_writepages(struct address_space *mapping,
 		struct writeback_control *wbc)
 {
@@ -473,12 +468,12 @@ static const struct address_space_operations exfat_aops = {
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio	= exfat_read_folio,
 	.readahead	= exfat_readahead,
-	.writepage	= exfat_writepage,
 	.writepages	= exfat_writepages,
 	.write_begin	= exfat_write_begin,
 	.write_end	= exfat_write_end,
 	.direct_IO	= exfat_direct_IO,
-	.bmap		= exfat_aop_bmap
+	.bmap		= exfat_aop_bmap,
+	.migrate_folio	= buffer_migrate_folio,
 };
 
 static inline unsigned long exfat_hash(loff_t i_pos)
-- 
GitLab


From ee649af0d9a60ea61a5dad99ef5d6b4aa346f0a0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:39 +0100
Subject: [PATCH 3733/4122] fat: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and remove
the ->writepage implementation.

Link: https://lkml.kernel.org/r/20221202102644.770505-3-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/fat/inode.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 1cbcc4608dc78..d99b8549ec8f9 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -194,11 +194,6 @@ static int fat_get_block(struct inode *inode, sector_t iblock,
 	return 0;
 }
 
-static int fat_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, fat_get_block, wbc);
-}
-
 static int fat_writepages(struct address_space *mapping,
 			  struct writeback_control *wbc)
 {
@@ -346,12 +341,12 @@ static const struct address_space_operations fat_aops = {
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio	= fat_read_folio,
 	.readahead	= fat_readahead,
-	.writepage	= fat_writepage,
 	.writepages	= fat_writepages,
 	.write_begin	= fat_write_begin,
 	.write_end	= fat_write_end,
 	.direct_IO	= fat_direct_IO,
-	.bmap		= _fat_bmap
+	.bmap		= _fat_bmap,
+	.migrate_folio	= buffer_migrate_folio,
 };
 
 /*
-- 
GitLab


From ba195d9f14829690b8e4f67549960d83169a314e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:40 +0100
Subject: [PATCH 3734/4122] hfs: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and stop
wiring up ->writepage for hfs_aops.

Link: https://lkml.kernel.org/r/20221202102644.770505-4-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index c4526f16355d5..16466a5e88b44 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -173,12 +173,12 @@ const struct address_space_operations hfs_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio	= hfs_read_folio,
-	.writepage	= hfs_writepage,
 	.write_begin	= hfs_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfs_bmap,
 	.direct_IO	= hfs_direct_IO,
 	.writepages	= hfs_writepages,
+	.migrate_folio	= buffer_migrate_folio,
 };
 
 /*
-- 
GitLab


From 12f9b9a73dc603e658bf24eed2777cecdaf4103e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:41 +0100
Subject: [PATCH 3735/4122] hfsplus: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and stop
wiring up ->writepage for hfsplus_aops.

Link: https://lkml.kernel.org/r/20221202102644.770505-5-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hfsplus/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index aeab83ed1c9c6..d6572ad2407a7 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -170,12 +170,12 @@ const struct address_space_operations hfsplus_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio	= hfsplus_read_folio,
-	.writepage	= hfsplus_writepage,
 	.write_begin	= hfsplus_write_begin,
 	.write_end	= generic_write_end,
 	.bmap		= hfsplus_bmap,
 	.direct_IO	= hfsplus_direct_IO,
 	.writepages	= hfsplus_writepages,
+	.migrate_folio	= buffer_migrate_folio,
 };
 
 const struct dentry_operations hfsplus_dentry_operations = {
-- 
GitLab


From cd2e6024260de27a523e0af6ee47a20a6b8b8aa8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:42 +0100
Subject: [PATCH 3736/4122] hpfs: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and remove
the ->writepage implementation.

Link: https://lkml.kernel.org/r/20221202102644.770505-6-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/hpfs/file.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c
index f7547a62c81f6..88952d4a631e6 100644
--- a/fs/hpfs/file.c
+++ b/fs/hpfs/file.c
@@ -163,11 +163,6 @@ static int hpfs_read_folio(struct file *file, struct folio *folio)
 	return mpage_read_folio(folio, hpfs_get_block);
 }
 
-static int hpfs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, hpfs_get_block, wbc);
-}
-
 static void hpfs_readahead(struct readahead_control *rac)
 {
 	mpage_readahead(rac, hpfs_get_block);
@@ -248,12 +243,12 @@ const struct address_space_operations hpfs_aops = {
 	.dirty_folio	= block_dirty_folio,
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio = hpfs_read_folio,
-	.writepage = hpfs_writepage,
 	.readahead = hpfs_readahead,
 	.writepages = hpfs_writepages,
 	.write_begin = hpfs_write_begin,
 	.write_end = hpfs_write_end,
-	.bmap = _hpfs_bmap
+	.bmap = _hpfs_bmap,
+	.migrate_folio = buffer_migrate_folio,
 };
 
 const struct file_operations hpfs_file_ops =
-- 
GitLab


From 2274c3b281bb47e6980ae42fb8dc93b7a38192d5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:43 +0100
Subject: [PATCH 3737/4122] jfs: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and remove
the ->writepage implementation.

Link: https://lkml.kernel.org/r/20221202102644.770505-7-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/jfs/inode.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index d1ec920aa030a..8ac10e3960508 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -264,11 +264,6 @@ int jfs_get_block(struct inode *ip, sector_t lblock,
 	return rc;
 }
 
-static int jfs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, jfs_get_block, wbc);
-}
-
 static int jfs_writepages(struct address_space *mapping,
 			struct writeback_control *wbc)
 {
@@ -355,12 +350,12 @@ const struct address_space_operations jfs_aops = {
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio	= jfs_read_folio,
 	.readahead	= jfs_readahead,
-	.writepage	= jfs_writepage,
 	.writepages	= jfs_writepages,
 	.write_begin	= jfs_write_begin,
 	.write_end	= jfs_write_end,
 	.bmap		= jfs_bmap,
 	.direct_IO	= jfs_direct_IO,
+	.migrate_folio	= buffer_migrate_folio,
 };
 
 /*
-- 
GitLab


From 1bda9dad5aa0199c8592bac32b91afbf8ea236ff Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 2 Dec 2022 11:26:44 +0100
Subject: [PATCH 3738/4122] omfs: remove ->writepage

->writepage is a very inefficient method to write back data, and only
used through write_cache_pages or a a fallback when no ->migrate_folio
method is present.

Set ->migrate_folio to the generic buffer_head based helper, and remove
the ->writepage implementation.

Link: https://lkml.kernel.org/r/20221202102644.770505-8-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Bob Copeland <me@bobcopeland.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/omfs/file.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/fs/omfs/file.c b/fs/omfs/file.c
index fa7fe2393ff68..3a5b4b88a5838 100644
--- a/fs/omfs/file.c
+++ b/fs/omfs/file.c
@@ -294,11 +294,6 @@ static void omfs_readahead(struct readahead_control *rac)
 	mpage_readahead(rac, omfs_get_block);
 }
 
-static int omfs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	return block_write_full_page(page, omfs_get_block, wbc);
-}
-
 static int
 omfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
@@ -375,10 +370,10 @@ const struct address_space_operations omfs_aops = {
 	.invalidate_folio = block_invalidate_folio,
 	.read_folio = omfs_read_folio,
 	.readahead = omfs_readahead,
-	.writepage = omfs_writepage,
 	.writepages = omfs_writepages,
 	.write_begin = omfs_write_begin,
 	.write_end = generic_write_end,
 	.bmap = omfs_bmap,
+	.migrate_folio = buffer_migrate_folio,
 };
 
-- 
GitLab


From 675eaca1f441acd4f0d403d71036b100cd49036a Mon Sep 17 00:00:00 2001
From: Alistair Popple <apopple@nvidia.com>
Date: Fri, 2 Dec 2022 15:53:39 +1100
Subject: [PATCH 3739/4122] mm/mmap: properly unaccount memory on
 mas_preallocate() failure

security_vm_enough_memory_mm() accounts memory via a call to
vm_acct_memory().  Therefore any subsequent failures should unaccount for
this memory prior to returning the error.

Link: https://lkml.kernel.org/r/20221202045339.2999017-1-apopple@nvidia.com
Fixes: 28c5609fb236 ("mm/mmap: preallocate maple nodes for brk vma expansion")
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mmap.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/mm/mmap.c b/mm/mmap.c
index 1741273ac34c7..7d24fc478ffa5 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2953,7 +2953,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 				addr >> PAGE_SHIFT, NULL_VM_UFFD_CTX, NULL)) {
 		mas_set_range(mas, vma->vm_start, addr + len - 1);
 		if (mas_preallocate(mas, vma, GFP_KERNEL))
-			return -ENOMEM;
+			goto unacct_fail;
 
 		vma_adjust_trans_huge(vma, vma->vm_start, addr + len, 0);
 		if (vma->anon_vma) {
@@ -2975,7 +2975,7 @@ static int do_brk_flags(struct ma_state *mas, struct vm_area_struct *vma,
 	/* create a vma struct for an anonymous mapping */
 	vma = vm_area_alloc(mm);
 	if (!vma)
-		goto vma_alloc_fail;
+		goto unacct_fail;
 
 	vma_set_anonymous(vma);
 	vma->vm_start = addr;
@@ -3000,7 +3000,7 @@ out:
 
 mas_store_fail:
 	vm_area_free(vma);
-vma_alloc_fail:
+unacct_fail:
 	vm_unacct_memory(len >> PAGE_SHIFT);
 	return -ENOMEM;
 }
-- 
GitLab


From adb8213014b25c7f1d75d5b219becaadcd695efb Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 2 Dec 2022 03:15:10 +0000
Subject: [PATCH 3740/4122] mm: memcg: fix stale protection of reclaim target
 memcg

Patch series "mm: memcg: fix protection of reclaim target memcg", v3.

This series fixes a bug in calculating the protection of the reclaim
target memcg where we end up using stale effective protection values from
the last reclaim operation, instead of completely ignoring the protection
of the reclaim target as intended.  More detailed explanation and examples
in patch 1, which includes the fix.  Patches 2 & 3 introduce a selftest
case that catches the bug.


This patch (of 3):

When we are doing memcg reclaim, the intended behavior is that we
ignore any protection (memory.min, memory.low) of the target memcg (but
not its children).  Ever since the patch pointed to by the "Fixes" tag,
we actually read a stale value for the target memcg protection when
deciding whether to skip the memcg or not because it is protected.  If
the stale value happens to be high enough, we don't reclaim from the
target memcg.

Essentially, in some cases we may falsely skip reclaiming from the
target memcg of reclaim because we read a stale protection value from
last time we reclaimed from it.


During reclaim, mem_cgroup_calculate_protection() is used to determine the
effective protection (emin and elow) values of a memcg.  The protection of
the reclaim target is ignored, but we cannot set their effective
protection to 0 due to a limitation of the current implementation (see
comment in mem_cgroup_protection()).  Instead, we leave their effective
protection values unchaged, and later ignore it in
mem_cgroup_protection().

However, mem_cgroup_protection() is called later in
shrink_lruvec()->get_scan_count(), which is after the
mem_cgroup_below_{min/low}() checks in shrink_node_memcgs().  As a result,
the stale effective protection values of the target memcg may lead us to
skip reclaiming from the target memcg entirely, before calling
shrink_lruvec().  This can be even worse with recursive protection, where
the stale target memcg protection can be higher than its standalone
protection.  See two examples below (a similar version of example (a) is
added to test_memcontrol in a later patch).

(a) A simple example with proactive reclaim is as follows. Consider the
following hierarchy:
ROOT
 |
 A
 |
 B (memory.min = 10M)

Consider the following scenario:
- B has memory.current = 10M.
- The system undergoes global reclaim (or memcg reclaim in A).
- In shrink_node_memcgs():
  - mem_cgroup_calculate_protection() calculates the effective min (emin)
    of B as 10M.
  - mem_cgroup_below_min() returns true for B, we do not reclaim from B.
- Now if we want to reclaim 5M from B using proactive reclaim
  (memory.reclaim), we should be able to, as the protection of the
  target memcg should be ignored.
- In shrink_node_memcgs():
  - mem_cgroup_calculate_protection() immediately returns for B without
    doing anything, as B is the target memcg, relying on
    mem_cgroup_protection() to ignore B's stale effective min (still 10M).
  - mem_cgroup_below_min() reads the stale effective min for B and we
    skip it instead of ignoring its protection as intended, as we never
    reach mem_cgroup_protection().

(b) An more complex example with recursive protection is as follows.
Consider the following hierarchy with memory_recursiveprot:
ROOT
 |
 A (memory.min = 50M)
 |
 B (memory.min = 10M, memory.high = 40M)

Consider the following scenario:
- B has memory.current = 35M.
- The system undergoes global reclaim (target memcg is NULL).
- B will have an effective min of 50M (all of A's unclaimed protection).
- B will not be reclaimed from.
- Now allocate 10M more memory in B, pushing it above it's high limit.
- The system undergoes memcg reclaim from B (target memcg is B).
- Like example (a), we do nothing in mem_cgroup_calculate_protection(),
  then call mem_cgroup_below_min(), which will read the stale effective
  min for B (50M) and skip it. In this case, it's even worse because we
  are not just considering B's standalone protection (10M), but we are
  reading a much higher stale protection (50M) which will cause us to not
  reclaim from B at all.

This is an artifact of commit 45c7f7e1ef17 ("mm, memcg: decouple
e{low,min} state mutations from protection checks") which made
mem_cgroup_calculate_protection() only change the state without returning
any value.  Before that commit, we used to return MEMCG_PROT_NONE for the
target memcg, which would cause us to skip the
mem_cgroup_below_{min/low}() checks.  After that commit we do not return
anything and we end up checking the min & low effective protections for
the target memcg, which are stale.

Update mem_cgroup_supports_protection() to also check if we are reclaiming
from the target, and rename it to mem_cgroup_unprotected() (now returns
true if we should not protect the memcg, much simpler logic).

Link: https://lkml.kernel.org/r/20221202031512.1365483-1-yosryahmed@google.com
Link: https://lkml.kernel.org/r/20221202031512.1365483-2-yosryahmed@google.com
Fixes: 45c7f7e1ef17 ("mm, memcg: decouple e{low,min} state mutations from protection checks")
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Chris Down <chris@chrisdown.name>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/memcontrol.h | 31 +++++++++++++++++++++----------
 mm/vmscan.c                | 11 ++++++-----
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e1644a24009c8..d3c8203cab6ce 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -615,28 +615,32 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root,
 void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 				     struct mem_cgroup *memcg);
 
-static inline bool mem_cgroup_supports_protection(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
+					  struct mem_cgroup *memcg)
 {
 	/*
 	 * The root memcg doesn't account charges, and doesn't support
-	 * protection.
+	 * protection. The target memcg's protection is ignored, see
+	 * mem_cgroup_calculate_protection() and mem_cgroup_protection()
 	 */
-	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg);
-
+	return mem_cgroup_disabled() || mem_cgroup_is_root(memcg) ||
+		memcg == target;
 }
 
-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
+					struct mem_cgroup *memcg)
 {
-	if (!mem_cgroup_supports_protection(memcg))
+	if (mem_cgroup_unprotected(target, memcg))
 		return false;
 
 	return READ_ONCE(memcg->memory.elow) >=
 		page_counter_read(&memcg->memory);
 }
 
-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
+					struct mem_cgroup *memcg)
 {
-	if (!mem_cgroup_supports_protection(memcg))
+	if (mem_cgroup_unprotected(target, memcg))
 		return false;
 
 	return READ_ONCE(memcg->memory.emin) >=
@@ -1209,12 +1213,19 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root,
 {
 }
 
-static inline bool mem_cgroup_below_low(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_unprotected(struct mem_cgroup *target,
+					  struct mem_cgroup *memcg)
+{
+	return true;
+}
+static inline bool mem_cgroup_below_low(struct mem_cgroup *target,
+					struct mem_cgroup *memcg)
 {
 	return false;
 }
 
-static inline bool mem_cgroup_below_min(struct mem_cgroup *memcg)
+static inline bool mem_cgroup_below_min(struct mem_cgroup *target,
+					struct mem_cgroup *memcg)
 {
 	return false;
 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 9356a3ee639ce..dcd476a66a596 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4513,7 +4513,7 @@ static bool age_lruvec(struct lruvec *lruvec, struct scan_control *sc, unsigned
 
 	mem_cgroup_calculate_protection(NULL, memcg);
 
-	if (mem_cgroup_below_min(memcg))
+	if (mem_cgroup_below_min(NULL, memcg))
 		return false;
 
 	need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, swappiness, &nr_to_scan);
@@ -5100,8 +5100,9 @@ static unsigned long get_nr_to_scan(struct lruvec *lruvec, struct scan_control *
 	DEFINE_MAX_SEQ(lruvec);
 	DEFINE_MIN_SEQ(lruvec);
 
-	if (mem_cgroup_below_min(memcg) ||
-	    (mem_cgroup_below_low(memcg) && !sc->memcg_low_reclaim))
+	if (mem_cgroup_below_min(sc->target_mem_cgroup, memcg) ||
+	    (mem_cgroup_below_low(sc->target_mem_cgroup, memcg) &&
+	     !sc->memcg_low_reclaim))
 		return 0;
 
 	*need_aging = should_run_aging(lruvec, max_seq, min_seq, sc, can_swap, &nr_to_scan);
@@ -6096,13 +6097,13 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 
 		mem_cgroup_calculate_protection(target_memcg, memcg);
 
-		if (mem_cgroup_below_min(memcg)) {
+		if (mem_cgroup_below_min(target_memcg, memcg)) {
 			/*
 			 * Hard protection.
 			 * If there is no reclaimable memory, OOM.
 			 */
 			continue;
-		} else if (mem_cgroup_below_low(memcg)) {
+		} else if (mem_cgroup_below_low(target_memcg, memcg)) {
 			/*
 			 * Soft protection.
 			 * Respect the protection only as long as
-- 
GitLab


From e5d64edac64531375716fabe35c9e0a502ca2894 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 2 Dec 2022 03:15:11 +0000
Subject: [PATCH 3741/4122] selftests: cgroup: refactor proactive reclaim code
 to reclaim_until()

Refactor the code that drives writing to memory.reclaim (retrying, error
handling, etc) from test_memcg_reclaim() to a helper called
reclaim_until(), which proactively reclaims from a memcg until its usage
reaches a certain value.

While we are at it, refactor and simplify the reclaim loop.

This will be used in a following patch in another test.

Link: https://lkml.kernel.org/r/20221202031512.1365483-3-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Suggested-by: Roman Gushchin <roman.gushchin@linux.dev>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Chris Down <chris@chrisdown.name>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 .../selftests/cgroup/test_memcontrol.c        | 80 ++++++++++---------
 1 file changed, 44 insertions(+), 36 deletions(-)

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index 8833359556f38..a8f4700353a4d 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -645,6 +645,48 @@ cleanup:
 	return ret;
 }
 
+/*
+ * Reclaim from @memcg until usage reaches @goal by writing to
+ * memory.reclaim.
+ *
+ * This function will return false if the usage is already below the
+ * goal.
+ *
+ * This function assumes that writing to memory.reclaim is the only
+ * source of change in memory.current (no concurrent allocations or
+ * reclaim).
+ *
+ * This function makes sure memory.reclaim is sane. It will return
+ * false if memory.reclaim's error codes do not make sense, even if
+ * the usage goal was satisfied.
+ */
+static bool reclaim_until(const char *memcg, long goal)
+{
+	char buf[64];
+	int retries, err;
+	long current, to_reclaim;
+	bool reclaimed = false;
+
+	for (retries = 5; retries > 0; retries--) {
+		current = cg_read_long(memcg, "memory.current");
+
+		if (current < goal || values_close(current, goal, 3))
+			break;
+		/* Did memory.reclaim return 0 incorrectly? */
+		else if (reclaimed)
+			return false;
+
+		to_reclaim = current - goal;
+		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+		err = cg_write(memcg, "memory.reclaim", buf);
+		if (!err)
+			reclaimed = true;
+		else if (err != -EAGAIN)
+			return false;
+	}
+	return reclaimed;
+}
+
 /*
  * This test checks that memory.reclaim reclaims the given
  * amount of memory (from both anon and file, if possible).
@@ -653,8 +695,7 @@ static int test_memcg_reclaim(const char *root)
 {
 	int ret = KSFT_FAIL, fd, retries;
 	char *memcg;
-	long current, expected_usage, to_reclaim;
-	char buf[64];
+	long current, expected_usage;
 
 	memcg = cg_name(root, "memcg_test");
 	if (!memcg)
@@ -705,41 +746,8 @@ static int test_memcg_reclaim(const char *root)
 	 * Reclaim until current reaches 30M, this makes sure we hit both anon
 	 * and file if swap is enabled.
 	 */
-	retries = 5;
-	while (true) {
-		int err;
-
-		current = cg_read_long(memcg, "memory.current");
-		to_reclaim = current - MB(30);
-
-		/*
-		 * We only keep looping if we get EAGAIN, which means we could
-		 * not reclaim the full amount.
-		 */
-		if (to_reclaim <= 0)
-			goto cleanup;
-
-
-		snprintf(buf, sizeof(buf), "%ld", to_reclaim);
-		err = cg_write(memcg, "memory.reclaim", buf);
-		if (!err) {
-			/*
-			 * If writing succeeds, then the written amount should have been
-			 * fully reclaimed (and maybe more).
-			 */
-			current = cg_read_long(memcg, "memory.current");
-			if (!values_close(current, MB(30), 3) && current > MB(30))
-				goto cleanup;
-			break;
-		}
-
-		/* The kernel could not reclaim the full amount, try again. */
-		if (err == -EAGAIN && retries--)
-			continue;
-
-		/* We got an unexpected error or ran out of retries. */
+	if (!reclaim_until(memcg, MB(30)))
 		goto cleanup;
-	}
 
 	ret = KSFT_PASS;
 cleanup:
-- 
GitLab


From 1c74697776e17619e485a40cf8cfdb4bf18fd18e Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Fri, 2 Dec 2022 03:15:12 +0000
Subject: [PATCH 3742/4122] selftests: cgroup: make sure reclaim target memcg
 is unprotected

Make sure that we ignore protection of a memcg that is the target of memcg
reclaim.

Link: https://lkml.kernel.org/r/20221202031512.1365483-4-yosryahmed@google.com
Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Reviewed-by: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Chris Down <chris@chrisdown.name>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vasily Averin <vasily.averin@linux.dev>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/cgroup/test_memcontrol.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index a8f4700353a4d..1e616a8c6a9cf 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -238,6 +238,8 @@ static int cg_test_proc_killed(const char *cgroup)
 	return -1;
 }
 
+static bool reclaim_until(const char *memcg, long goal);
+
 /*
  * First, this test creates the following hierarchy:
  * A       memory.min = 0,    memory.max = 200M
@@ -266,6 +268,12 @@ static int cg_test_proc_killed(const char *cgroup)
  * unprotected memory in A available, and checks that:
  * a) memory.min protects pagecache even in this case,
  * b) memory.low allows reclaiming page cache with low events.
+ *
+ * Then we try to reclaim from A/B/C using memory.reclaim until its
+ * usage reaches 10M.
+ * This makes sure that:
+ * (a) We ignore the protection of the reclaim target memcg.
+ * (b) The previously calculated emin value (~29M) should be dismissed.
  */
 static int test_memcg_protection(const char *root, bool min)
 {
@@ -385,6 +393,9 @@ static int test_memcg_protection(const char *root, bool min)
 	if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
 		goto cleanup;
 
+	if (!reclaim_until(children[0], MB(10)))
+		goto cleanup;
+
 	if (min) {
 		ret = KSFT_PASS;
 		goto cleanup;
-- 
GitLab


From 6b426d071419a40f61fe41fe1bd9e1b4fa5aeb37 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Thu, 1 Dec 2022 15:33:17 -0800
Subject: [PATCH 3743/4122] mm: disable top-tier fallback to reclaim on
 proactive reclaim

Reclaiming directly from top tier nodes breaks the aging pipeline of
memory tiers.  If we have a RAM -> CXL -> storage hierarchy, we should
demote from RAM to CXL and from CXL to storage.  If we reclaim a page from
RAM, it means we 'demote' it directly from RAM to storage, bypassing
potentially a huge amount of pages colder than it in CXL.

However disabling reclaim from top tier nodes entirely would cause ooms in
edge scenarios where lower tier memory is unreclaimable for whatever
reason, e.g.  memory being mlocked() or too hot to reclaim.  In these
cases we would rather the job run with a performance regression rather
than it oom altogether.

However, we can disable reclaim from top tier nodes for proactive reclaim.
That reclaim is not real memory pressure, and we don't have any cause to
be breaking the aging pipeline.

[akpm@linux-foundation.org: restore comment layout, per Ying Huang]
Link: https://lkml.kernel.org/r/20221201233317.1394958-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Reviewed-by: Yang Shi <shy828301@gmail.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Shakeel Butt <shakeelb@google.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Wei Xu <weixugc@google.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/vmscan.c | 25 ++++++++++++++++++++++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index dcd476a66a596..1a59171c6695c 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2088,10 +2088,29 @@ keep:
 	nr_reclaimed += demote_folio_list(&demote_folios, pgdat);
 	/* Folios that could not be demoted are still in @demote_folios */
 	if (!list_empty(&demote_folios)) {
-		/* Folios which weren't demoted go back on @folio_list for retry: */
+		/* Folios which weren't demoted go back on @folio_list */
 		list_splice_init(&demote_folios, folio_list);
-		do_demote_pass = false;
-		goto retry;
+
+		/*
+		 * goto retry to reclaim the undemoted folios in folio_list if
+		 * desired.
+		 *
+		 * Reclaiming directly from top tier nodes is not often desired
+		 * due to it breaking the LRU ordering: in general memory
+		 * should be reclaimed from lower tier nodes and demoted from
+		 * top tier nodes.
+		 *
+		 * However, disabling reclaim from top tier nodes entirely
+		 * would cause ooms in edge scenarios where lower tier memory
+		 * is unreclaimable for whatever reason, eg memory being
+		 * mlocked or too hot to reclaim. We can disable reclaim
+		 * from top tier nodes in proactive reclaim though as that is
+		 * not real memory pressure.
+		 */
+		if (!sc->proactive) {
+			do_demote_pass = false;
+			goto retry;
+		}
 	}
 
 	pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
-- 
GitLab


From 12a5d3955227b0d7e04fb793ccceeb2a1dd275c5 Mon Sep 17 00:00:00 2001
From: Mina Almasry <almasrymina@google.com>
Date: Fri, 2 Dec 2022 14:35:31 -0800
Subject: [PATCH 3744/4122] mm: add nodes= arg to memory.reclaim

The nodes= arg instructs the kernel to only scan the given nodes for
proactive reclaim.  For example use cases, consider a 2 tier memory
system:

nodes 0,1 -> top tier
nodes 2,3 -> second tier

$ echo "1m nodes=0" > memory.reclaim

This instructs the kernel to attempt to reclaim 1m memory from node 0.
Since node 0 is a top tier node, demotion will be attempted first.  This
is useful to direct proactive reclaim to specific nodes that are under
pressure.

$ echo "1m nodes=2,3" > memory.reclaim

This instructs the kernel to attempt to reclaim 1m memory in the second
tier, since this tier of memory has no demotion targets the memory will be
reclaimed.

$ echo "1m nodes=0,1" > memory.reclaim

Instructs the kernel to reclaim memory from the top tier nodes, which can
be desirable according to the userspace policy if there is pressure on the
top tiers.  Since these nodes have demotion targets, the kernel will
attempt demotion first.

Since commit 3f1509c57b1b ("Revert "mm/vmscan: never demote for memcg
reclaim""), the proactive reclaim interface memory.reclaim does both
reclaim and demotion.  Reclaim and demotion incur different latency costs
to the jobs in the cgroup.  Demoted memory would still be addressable by
the userspace at a higher latency, but reclaimed memory would need to
incur a pagefault.

The 'nodes' arg is useful to allow the userspace to control demotion and
reclaim independently according to its policy: if the memory.reclaim is
called on a node with demotion targets, it will attempt demotion first; if
it is called on a node without demotion targets, it will only attempt
reclaim.

Link: https://lkml.kernel.org/r/20221202223533.1785418-1-almasrymina@google.com
Signed-off-by: Mina Almasry <almasrymina@google.com>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Shakeel Butt <shakeelb@google.com>
Acked-by: Muchun Song <songmuchun@bytedance.com>
Cc: Bagas Sanjaya <bagasdotme@gmail.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Tejun Heo <tj@kernel.org>
Cc: Wei Xu <weixugc@google.com>
Cc: Yang Shi <yang.shi@linux.alibaba.com>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: zefan li <lizefan.x@bytedance.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 Documentation/admin-guide/cgroup-v2.rst | 15 +++---
 include/linux/swap.h                    |  3 +-
 mm/memcontrol.c                         | 67 ++++++++++++++++++++-----
 mm/vmscan.c                             |  4 +-
 4 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst
index 74cec76be9f2c..c8ae7c897f142 100644
--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1245,17 +1245,13 @@ PAGE_SIZE multiple when read back.
 	This is a simple interface to trigger memory reclaim in the
 	target cgroup.
 
-	This file accepts a single key, the number of bytes to reclaim.
-	No nested keys are currently supported.
+	This file accepts a string which contains the number of bytes to
+	reclaim.
 
 	Example::
 
 	  echo "1G" > memory.reclaim
 
-	The interface can be later extended with nested keys to
-	configure the reclaim behavior. For example, specify the
-	type of memory to reclaim from (anon, file, ..).
-
 	Please note that the kernel can over or under reclaim from
 	the target cgroup. If less bytes are reclaimed than the
 	specified amount, -EAGAIN is returned.
@@ -1267,6 +1263,13 @@ PAGE_SIZE multiple when read back.
 	This means that the networking layer will not adapt based on
 	reclaim induced by memory.reclaim.
 
+	This file also allows the user to specify the nodes to reclaim from,
+	via the 'nodes=' key, for example::
+
+	  echo "1G nodes=0,1" > memory.reclaim
+
+	The above instructs the kernel to reclaim memory from nodes 0,1.
+
   memory.peak
 	A read-only single value file which exists on non-root
 	cgroups.
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 0ceed49516adc..2787b84eaf12f 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -418,7 +418,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-						  unsigned int reclaim_options);
+						  unsigned int reclaim_options,
+						  nodemask_t *nodemask);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						pg_data_t *pgdat,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2c7a91689fef6..ff65bc23be13c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -63,6 +63,7 @@
 #include <linux/resume_user_mode.h>
 #include <linux/psi.h>
 #include <linux/seq_buf.h>
+#include <linux/parser.h>
 #include "internal.h"
 #include <net/sock.h>
 #include <net/ip.h>
@@ -2392,7 +2393,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,
 		psi_memstall_enter(&pflags);
 		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
 							gfp_mask,
-							MEMCG_RECLAIM_MAY_SWAP);
+							MEMCG_RECLAIM_MAY_SWAP,
+							NULL);
 		psi_memstall_leave(&pflags);
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
@@ -2683,7 +2685,8 @@ retry:
 
 	psi_memstall_enter(&pflags);
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
-						    gfp_mask, reclaim_options);
+						    gfp_mask, reclaim_options,
+						    NULL);
 	psi_memstall_leave(&pflags);
 
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3503,7 +3506,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
 		}
 
 		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
-					memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
+					memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP,
+					NULL)) {
 			ret = -EBUSY;
 			break;
 		}
@@ -3614,7 +3618,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 			return -EINTR;
 
 		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
-						  MEMCG_RECLAIM_MAY_SWAP))
+						  MEMCG_RECLAIM_MAY_SWAP,
+						  NULL))
 			nr_retries--;
 	}
 
@@ -6418,7 +6423,8 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 		}
 
 		reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
-					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
+					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
+					NULL);
 
 		if (!reclaimed && !nr_retries--)
 			break;
@@ -6467,7 +6473,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 
 		if (nr_reclaims) {
 			if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
-					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
+					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP,
+					NULL))
 				nr_reclaims--;
 			continue;
 		}
@@ -6590,21 +6597,54 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of,
 	return nbytes;
 }
 
+enum {
+	MEMORY_RECLAIM_NODES = 0,
+	MEMORY_RECLAIM_NULL,
+};
+
+static const match_table_t if_tokens = {
+	{ MEMORY_RECLAIM_NODES, "nodes=%s" },
+	{ MEMORY_RECLAIM_NULL, NULL },
+};
+
 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
 	unsigned long nr_to_reclaim, nr_reclaimed = 0;
-	unsigned int reclaim_options;
-	int err;
+	unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP |
+				       MEMCG_RECLAIM_PROACTIVE;
+	char *old_buf, *start;
+	substring_t args[MAX_OPT_ARGS];
+	int token;
+	char value[256];
+	nodemask_t nodemask = NODE_MASK_ALL;
 
 	buf = strstrip(buf);
-	err = page_counter_memparse(buf, "", &nr_to_reclaim);
-	if (err)
-		return err;
 
-	reclaim_options	= MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
+	old_buf = buf;
+	nr_to_reclaim = memparse(buf, &buf) / PAGE_SIZE;
+	if (buf == old_buf)
+		return -EINVAL;
+
+	buf = strstrip(buf);
+
+	while ((start = strsep(&buf, " ")) != NULL) {
+		if (!strlen(start))
+			continue;
+		token = match_token(start, if_tokens, args);
+		match_strlcpy(value, args, sizeof(value));
+		switch (token) {
+		case MEMORY_RECLAIM_NODES:
+			if (nodelist_parse(value, nodemask) < 0)
+				return -EINVAL;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
 	while (nr_reclaimed < nr_to_reclaim) {
 		unsigned long reclaimed;
 
@@ -6621,7 +6661,8 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 
 		reclaimed = try_to_free_mem_cgroup_pages(memcg,
 						nr_to_reclaim - nr_reclaimed,
-						GFP_KERNEL, reclaim_options);
+						GFP_KERNEL, reclaim_options,
+						&nodemask);
 
 		if (!reclaimed && !nr_retries--)
 			return -EAGAIN;
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1a59171c6695c..2b42ac9ad7554 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6758,7 +6758,8 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   unsigned long nr_pages,
 					   gfp_t gfp_mask,
-					   unsigned int reclaim_options)
+					   unsigned int reclaim_options,
+					   nodemask_t *nodemask)
 {
 	unsigned long nr_reclaimed;
 	unsigned int noreclaim_flag;
@@ -6773,6 +6774,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.may_unmap = 1,
 		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
 		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+		.nodemask = nodemask,
 	};
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put
-- 
GitLab


From c449deb2b99ff2458214ed4a3526277bc9e40757 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Sun, 4 Dec 2022 17:01:03 -0800
Subject: [PATCH 3745/4122] mm: memcg: fix swapcached stat accounting

I'd been worried by high "swapcached" counts in memcg OOM reports, thought
we had a problem freeing swapcache, but it was just the accounting that
was wrong.

Two issues:

1.  When __remove_mapping() removes swapcache,
   __delete_from_swap_cache() relies on memcg_data for the right counts to
   be updated; but that had already been reset by mem_cgroup_swapout().
   Swap those calls around - mem_cgroup_swapout() does not require the
   swapcached flag to be set.

   6.1 commit ac35a4902374 ("mm: multi-gen LRU: minimal
   implementation") already made a similar swap for workingset_eviction(),
   but not for this.

2.  memcg's "swapcached" count was added for memcg v2 stats, but
   displayed on OOM even for memcg v1: so mem_cgroup_move_account() ought
   to move it.

Link: https://lkml.kernel.org/r/b8b96ee0-1e1e-85f8-df97-c82a11d7cd14@google.com
Fixes: b6038942480e ("mm: memcg: add swapcache stat for memcg v2")
Signed-off-by: Hugh Dickins <hughd@google.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Shakeel Butt <shakeelb@google.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Yu Zhao <yuzhao@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/memcontrol.c | 6 ++++++
 mm/vmscan.c     | 3 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ff65bc23be13c..ab457f0394ab6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5767,6 +5767,12 @@ static int mem_cgroup_move_account(struct page *page,
 		}
 	}
 
+#ifdef CONFIG_SWAP
+	if (folio_test_swapcache(folio)) {
+		__mod_lruvec_state(from_vec, NR_SWAPCACHE, -nr_pages);
+		__mod_lruvec_state(to_vec, NR_SWAPCACHE, nr_pages);
+	}
+#endif
 	if (folio_test_writeback(folio)) {
 		__mod_lruvec_state(from_vec, NR_WRITEBACK, -nr_pages);
 		__mod_lruvec_state(to_vec, NR_WRITEBACK, nr_pages);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2b42ac9ad7554..aba991c505f19 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1368,11 +1368,10 @@ static int __remove_mapping(struct address_space *mapping, struct folio *folio,
 	if (folio_test_swapcache(folio)) {
 		swp_entry_t swap = folio_swap_entry(folio);
 
-		/* get a shadow entry before mem_cgroup_swapout() clears folio_memcg() */
 		if (reclaimed && !mapping_exiting(mapping))
 			shadow = workingset_eviction(folio, target_memcg);
-		mem_cgroup_swapout(folio, swap);
 		__delete_from_swap_cache(folio, swap, shadow);
+		mem_cgroup_swapout(folio, swap);
 		xa_unlock_irq(&mapping->i_pages);
 		put_swap_folio(folio, swap);
 	} else {
-- 
GitLab


From 6287b7dae80944bfa37784a8f9d6861a4facaa6e Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Sun, 4 Dec 2022 17:57:07 -0800
Subject: [PATCH 3746/4122] mm,thp,rmap: fix races between updates of
 subpages_mapcount

Commit 4b51634cd16a, introducing the COMPOUND_MAPPED bit, paid attention
to the impossibility of subpages_mapcount ever appearing negative; but did
not attend to those races in which it can momentarily appear larger than
thought possible.

These arise from how page_remove_rmap() first decrements page->_mapcount
or compound_mapcount, then, if that transition goes negative (logical 0),
decrements subpages_mapcount.  The initial decrement lets a racing
page_add_*_rmap() reincrement _mapcount or compound_mapcount immediately,
and then in rare cases its corresponding increment of subpages_mapcount
may be completed before page_remove_rmap()'s decrement.  There could even
(with increasing unlikelihood) be a series of increments intermixed with
the decrements.

In practice, checking subpages_mapcount with a temporary WARN on range,
has caught values of 0x1000000 (2*COMPOUND_MAPPED, when move_pages() was
using remove_migration_pmd()) and 0x800201 (do_huge_pmd_wp_page() using
__split_huge_pmd()): page_add_anon_rmap() racing page_remove_rmap(), as
predicted.

I certainly found it harder to reason about than when bit_spin_locked, but
the easy case gives a clue to how to handle the harder case.  The easy
case being the three !(nr & COMPOUND_MAPPED) checks, which should
obviously be replaced by (nr < COMPOUND_MAPPED) checks - to count a page
as compound mapped, even while the bit in that position is 0.

The harder case is when trying to decide how many subpages are newly
covered or uncovered, when compound map is first added or last removed:
not knowing all that racily happened between first and second atomic ops.

But the easy way to handle that, is again to count the page as compound
mapped all the while that its subpages_mapcount indicates so - ignoring
the _mapcount or compound_mapcount transition while it is on the way to
being reversed.

Link: https://lkml.kernel.org/r/4388158-3092-a960-ff2d-55f2b0fe4ef8@google.com
Fixes: 4b51634cd16a ("mm,thp,rmap: subpages_mapcount COMPOUND_MAPPED if PMD-mapped")
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: James Houghton <jthoughton@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Miaohe Lin <linmiaohe@huawei.com>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Mina Almasry <almasrymina@google.com>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Naoya Horiguchi <naoya.horiguchi@linux.dev>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zach O'Keefe <zokeefe@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/rmap.c | 42 +++++++++++++++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/mm/rmap.c b/mm/rmap.c
index 459dc1c44d8ae..b616870a09be8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1232,7 +1232,7 @@ void page_add_anon_rmap(struct page *page,
 		if (first && PageCompound(page)) {
 			mapped = subpages_mapcount_ptr(compound_head(page));
 			nr = atomic_inc_return_relaxed(mapped);
-			nr = !(nr & COMPOUND_MAPPED);
+			nr = (nr < COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
@@ -1241,8 +1241,16 @@ void page_add_anon_rmap(struct page *page,
 		if (first) {
 			mapped = subpages_mapcount_ptr(page);
 			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
-			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+			if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
+				nr_pmdmapped = thp_nr_pages(page);
+				nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+				/* Raced ahead of a remove and another add? */
+				if (unlikely(nr < 0))
+					nr = 0;
+			} else {
+				/* Raced ahead of a remove of COMPOUND_MAPPED */
+				nr = 0;
+			}
 		}
 	}
 
@@ -1330,7 +1338,7 @@ void page_add_file_rmap(struct page *page,
 		if (first && PageCompound(page)) {
 			mapped = subpages_mapcount_ptr(compound_head(page));
 			nr = atomic_inc_return_relaxed(mapped);
-			nr = !(nr & COMPOUND_MAPPED);
+			nr = (nr < COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
@@ -1339,8 +1347,16 @@ void page_add_file_rmap(struct page *page,
 		if (first) {
 			mapped = subpages_mapcount_ptr(page);
 			nr = atomic_add_return_relaxed(COMPOUND_MAPPED, mapped);
-			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+			if (likely(nr < COMPOUND_MAPPED + COMPOUND_MAPPED)) {
+				nr_pmdmapped = thp_nr_pages(page);
+				nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+				/* Raced ahead of a remove and another add? */
+				if (unlikely(nr < 0))
+					nr = 0;
+			} else {
+				/* Raced ahead of a remove of COMPOUND_MAPPED */
+				nr = 0;
+			}
 		}
 	}
 
@@ -1387,7 +1403,7 @@ void page_remove_rmap(struct page *page,
 		if (last && PageCompound(page)) {
 			mapped = subpages_mapcount_ptr(compound_head(page));
 			nr = atomic_dec_return_relaxed(mapped);
-			nr = !(nr & COMPOUND_MAPPED);
+			nr = (nr < COMPOUND_MAPPED);
 		}
 	} else if (PageTransHuge(page)) {
 		/* That test is redundant: it's for safety or to optimize out */
@@ -1396,8 +1412,16 @@ void page_remove_rmap(struct page *page,
 		if (last) {
 			mapped = subpages_mapcount_ptr(page);
 			nr = atomic_sub_return_relaxed(COMPOUND_MAPPED, mapped);
-			nr_pmdmapped = thp_nr_pages(page);
-			nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+			if (likely(nr < COMPOUND_MAPPED)) {
+				nr_pmdmapped = thp_nr_pages(page);
+				nr = nr_pmdmapped - (nr & SUBPAGES_MAPPED);
+				/* Raced ahead of another remove and an add? */
+				if (unlikely(nr < 0))
+					nr = 0;
+			} else {
+				/* An add of COMPOUND_MAPPED raced ahead */
+				nr = 0;
+			}
 		}
 	}
 
-- 
GitLab


From a0ac9b3598fac36907bd1baec28c99656d2ed0b6 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 5 Dec 2022 20:37:13 +0100
Subject: [PATCH 3747/4122] mm/gup_test: fix PIN_LONGTERM_TEST_READ with
 highmem

Patch series "selftests/vm: fix some tests on 32bit".

I finally had the time to run some of the selftests written by me
(especially "cow") on x86 PAE.  I found some unexpected "surprises" :)

With these changes, and with [1] on top of mm-unstable, the "cow" tests
and the "ksm_functional_tests" compile and pass as expected (expected
failures with hugetlb in the "cow" tests).  "madv_populate" has one
expected test failure -- x86 does not support softdirty tracking.

#1-#3 fix commits with stable commit ids.  #4 fixes a test that is not in
mm-stable yet.

A note that there are many other compile errors/warnings when compiling on
32bit and with older Linux headers ...  something for another day.

[1] https://lkml.kernel.org/r/20221205150857.167583-1-david@redhat.com


This patch (of 4):

...  we have to kmap()/kunmap(), otherwise this won't work as expected
with highmem.

Link: https://lkml.kernel.org/r/20221205193716.276024-1-david@redhat.com
Link: https://lkml.kernel.org/r/20221205193716.276024-2-david@redhat.com
Fixes: c77369b437f9 ("mm/gup_test: start/stop/read functionality for PIN LONGTERM test")
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>,
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup_test.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/mm/gup_test.c b/mm/gup_test.c
index 0d76d9b4bb5a1..33f431e0da60b 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -4,6 +4,7 @@
 #include <linux/uaccess.h>
 #include <linux/ktime.h>
 #include <linux/debugfs.h>
+#include <linux/highmem.h>
 #include "gup_test.h"
 
 static void put_back_pages(unsigned int cmd, struct page **pages,
@@ -297,10 +298,13 @@ static inline int pin_longterm_test_read(unsigned long arg)
 		return -EFAULT;
 
 	for (i = 0; i < pin_longterm_test_nr_pages; i++) {
-		void *addr = page_to_virt(pin_longterm_test_pages[i]);
+		void *addr = kmap_local_page(pin_longterm_test_pages[i]);
+		unsigned long ret;
 
-		if (copy_to_user((void __user *)(unsigned long)user_addr, addr,
-				 PAGE_SIZE))
+		ret = copy_to_user((void __user *)(unsigned long)user_addr, addr,
+				   PAGE_SIZE);
+		kunmap_local(addr);
+		if (ret)
 			return -EFAULT;
 		user_addr += PAGE_SIZE;
 	}
-- 
GitLab


From d88825f22b8f1cad8acb429b6bf0a54c85d7b93f Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 5 Dec 2022 20:37:14 +0100
Subject: [PATCH 3748/4122] selftests/vm: madv_populate: fix missing
 MADV_POPULATE_(READ|WRITE) definitions

The tests fail to compile in some environments (e.g., Debian 11.5 on x86).
Let's simply conditionally define MADV_POPULATE_(READ|WRITE) if not
already defined, similar to how the khugepaged.c test handles it.

Link: https://lkml.kernel.org/r/20221205193716.276024-3-david@redhat.com
Fixes: 39b2e5cae43d ("selftests/vm: make MADV_POPULATE_(READ|WRITE) use in-tree headers")
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/madv_populate.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/testing/selftests/vm/madv_populate.c b/tools/testing/selftests/vm/madv_populate.c
index 60547245e479b..262eae6b58f23 100644
--- a/tools/testing/selftests/vm/madv_populate.c
+++ b/tools/testing/selftests/vm/madv_populate.c
@@ -20,6 +20,13 @@
 #include "../kselftest.h"
 #include "vm_util.h"
 
+#ifndef MADV_POPULATE_READ
+#define MADV_POPULATE_READ	22
+#endif /* MADV_POPULATE_READ */
+#ifndef MADV_POPULATE_WRITE
+#define MADV_POPULATE_WRITE	23
+#endif /* MADV_POPULATE_WRITE */
+
 /*
  * For now, we're using 2 MiB of private anonymous memory for all tests.
  */
-- 
GitLab


From 380969fe5aacdea661d3f9ab32e84327e8624ae2 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 5 Dec 2022 20:37:15 +0100
Subject: [PATCH 3749/4122] selftests/vm: cow: fix compile warning on 32bit

The compiler complains about the conversion of a pointer to an int of
different width.

Link: https://lkml.kernel.org/r/20221205193716.276024-4-david@redhat.com
Fixes: 6f1405efc61b ("selftests/vm: anon_cow: add R/O longterm tests via gup_test")
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/cow.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/vm/cow.c b/tools/testing/selftests/vm/cow.c
index 73e05b52c49ed..26f6ea3079e2f 100644
--- a/tools/testing/selftests/vm/cow.c
+++ b/tools/testing/selftests/vm/cow.c
@@ -650,7 +650,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 	}
 
 	/* Take a R/O pin. This should trigger unsharing. */
-	args.addr = (__u64)mem;
+	args.addr = (__u64)(uintptr_t)mem;
 	args.size = size;
 	args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
@@ -669,7 +669,7 @@ static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
 	 * Read back the content via the pin to the temporary buffer and
 	 * test if we observed the modification.
 	 */
-	tmp_val = (__u64)tmp;
+	tmp_val = (__u64)(uintptr_t)tmp;
 	ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
 	if (ret)
 		ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
-- 
GitLab


From 9d789c3b4170574baa4242dd8cae5988cf97d48d Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 5 Dec 2022 20:37:16 +0100
Subject: [PATCH 3750/4122] selftests/vm: ksm_functional_tests: fixes for 32bit

The test currently fails on 32bit.  Fixing the "-1ull" vs.  "-1ul" seems
to make the test pass and the compiler happy.

Note: This test is not in mm-stable yet. This fix should be squashed into
      "selftests/vm: add KSM unmerge tests".

Link: https://lkml.kernel.org/r/20221205193716.276024-5-david@redhat.com
Signed-off-by: David Hildenbrand <david@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/selftests/vm/ksm_functional_tests.c | 4 ++--
 tools/testing/selftests/vm/vm_util.c              | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/vm/ksm_functional_tests.c b/tools/testing/selftests/vm/ksm_functional_tests.c
index 96644be689626..b11b7e5115dc5 100644
--- a/tools/testing/selftests/vm/ksm_functional_tests.c
+++ b/tools/testing/selftests/vm/ksm_functional_tests.c
@@ -42,13 +42,13 @@ static bool range_maps_duplicates(char *addr, unsigned long size)
 	for (offs_a = 0; offs_a < size; offs_a += pagesize) {
 		pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
 		/* Page not present or PFN not exposed by the kernel. */
-		if (pfn_a == -1ull || !pfn_a)
+		if (pfn_a == -1ul || !pfn_a)
 			continue;
 
 		for (offs_b = offs_a + pagesize; offs_b < size;
 		     offs_b += pagesize) {
 			pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
-			if (pfn_b == -1ull || !pfn_b)
+			if (pfn_b == -1ul || !pfn_b)
 				continue;
 			if (pfn_a == pfn_b)
 				return true;
diff --git a/tools/testing/selftests/vm/vm_util.c b/tools/testing/selftests/vm/vm_util.c
index 7105719027433..40e795624ff33 100644
--- a/tools/testing/selftests/vm/vm_util.c
+++ b/tools/testing/selftests/vm/vm_util.c
@@ -50,7 +50,7 @@ unsigned long pagemap_get_pfn(int fd, char *start)
 	/* If present (63th bit), PFN is at bit 0 -- 54. */
 	if (entry & 0x8000000000000000ull)
 		return entry & 0x007fffffffffffffull;
-	return -1ull;
+	return -1ul;
 }
 
 void clear_softdirty(void)
-- 
GitLab


From 8614d6c5eda005ad72b37afeaae2879d7c101b18 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Mon, 5 Dec 2022 18:30:07 +0100
Subject: [PATCH 3751/4122] mm: do not show fs mm pc for VM_LOCKONFAULT pages

When VM_LOCKONFAULT was added, /proc/PID/smaps wasn't hooked up to it, so
looking at /proc/PID/smaps, it shows '??' instead of something
intelligable.  This can be reached by userspace by simply calling
`mlock2(..., MLOCK_ONFAULT);`.

Fix this by adding "lf" to denote VM_LOCKONFAULT.

Link: https://lkml.kernel.org/r/20221205173007.580210-1-Jason@zx2c4.com
Fixes: de60f5f10c58 ("mm: introduce VM_LOCKONFAULT")
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Eric B Munson <emunson@akamai.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 fs/proc/task_mmu.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 89338950afd35..e35a0398db63f 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -674,6 +674,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_RAND_READ)]	= "rr",
 		[ilog2(VM_DONTCOPY)]	= "dc",
 		[ilog2(VM_DONTEXPAND)]	= "de",
+		[ilog2(VM_LOCKONFAULT)]	= "lf",
 		[ilog2(VM_ACCOUNT)]	= "ac",
 		[ilog2(VM_NORESERVE)]	= "nr",
 		[ilog2(VM_HUGETLB)]	= "ht",
-- 
GitLab


From de2e5171433126d340573cb7d0d4fcac084ab2a0 Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Mon, 5 Dec 2022 22:03:27 +0800
Subject: [PATCH 3752/4122] mm: add cond_resched() in swapin_walk_pmd_entry()

When handling MADV_WILLNEED in madvise(), a soflockup may occurr in
swapin_walk_pmd_entry() if swapping in lots of memory on a slow device.
Add a cond_resched() to avoid the possible softlockup.

Link: https://lkml.kernel.org/r/20221205140327.72304-1-wangkefeng.wang@huawei.com
Fixes: 1998cc048901 ("mm: make madvise(MADV_WILLNEED) support swap file prefetch")
Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Shaohua Li <shli@fusionio.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/madvise.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mm/madvise.c b/mm/madvise.c
index 2573ea3ed6843..b7d9b1a1c135c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -223,6 +223,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start,
 			put_page(page);
 	}
 	swap_read_unplug(splug);
+	cond_resched();
 
 	return 0;
 }
-- 
GitLab


From 5478afc55a2104caaef5b78c7c1f9acb9ec1f92a Mon Sep 17 00:00:00 2001
From: Alexander Potapenko <glider@google.com>
Date: Mon, 5 Dec 2022 15:57:40 +0100
Subject: [PATCH 3753/4122] kmsan: fix memcpy tests

Recent Clang changes may cause it to delete calls of memcpy(), if the
source is an uninitialized volatile local.  This happens because passing a
pointer to a volatile local into memcpy() discards the volatile qualifier,
giving the compiler a free hand to optimize the memcpy() call away.

Use OPTIMIZER_HIDE_VAR() to hide the uninitialized var from the too-smart
compiler.

Link: https://lkml.kernel.org/r/20221205145740.694038-1-glider@google.com
Signed-off-by: Alexander Potapenko <glider@google.com>
Suggested-by: Marco Elver <elver@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmsan/kmsan_test.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/kmsan/kmsan_test.c b/mm/kmsan/kmsan_test.c
index 9a29ea2dbfb9b..eb44ef3c5f290 100644
--- a/mm/kmsan/kmsan_test.c
+++ b/mm/kmsan/kmsan_test.c
@@ -419,6 +419,7 @@ static void test_memcpy_aligned_to_aligned(struct kunit *test)
 	kunit_info(
 		test,
 		"memcpy()ing aligned uninit src to aligned dst (UMR report)\n");
+	OPTIMIZER_HIDE_VAR(uninit_src);
 	memcpy((void *)&dst, (void *)&uninit_src, sizeof(uninit_src));
 	kmsan_check_memory((void *)&dst, sizeof(dst));
 	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
@@ -441,6 +442,7 @@ static void test_memcpy_aligned_to_unaligned(struct kunit *test)
 	kunit_info(
 		test,
 		"memcpy()ing aligned uninit src to unaligned dst (UMR report)\n");
+	OPTIMIZER_HIDE_VAR(uninit_src);
 	memcpy((void *)&dst[1], (void *)&uninit_src, sizeof(uninit_src));
 	kmsan_check_memory((void *)dst, 4);
 	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
@@ -464,6 +466,7 @@ static void test_memcpy_aligned_to_unaligned2(struct kunit *test)
 	kunit_info(
 		test,
 		"memcpy()ing aligned uninit src to unaligned dst - part 2 (UMR report)\n");
+	OPTIMIZER_HIDE_VAR(uninit_src);
 	memcpy((void *)&dst[1], (void *)&uninit_src, sizeof(uninit_src));
 	kmsan_check_memory((void *)&dst[4], sizeof(uninit_src));
 	KUNIT_EXPECT_TRUE(test, report_matches(&expect));
-- 
GitLab


From c7cdf94e9cd7a03549e61b0f85949959191b8a10 Mon Sep 17 00:00:00 2001
From: Wang Yong <yongw.kernel@gmail.com>
Date: Wed, 7 Dec 2022 07:40:11 +0000
Subject: [PATCH 3754/4122] mm: fix typo in struct pglist_data code comment

change "stat" to "start".

Link: https://lkml.kernel.org/r/20221207074011.GA151242@cloud
Fixes: c959924b0dc5 ("memory tiering: adjust hot threshold automatically")
Signed-off-by: Wang Yong <yongw.kernel@gmail.com>
Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mmzone.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5f74891556f33..128f3cde800cb 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1200,7 +1200,7 @@ typedef struct pglist_data {
 	/* start time in ms of current promote threshold adjustment period */
 	unsigned int nbp_th_start;
 	/*
-	 * number of promote candidate pages at stat time of current promote
+	 * number of promote candidate pages at start time of current promote
 	 * threshold adjustment period
 	 */
 	unsigned long nbp_th_nr_cand;
-- 
GitLab


From c47454823bd4e3ab34ed3f795afd4479ab938a3f Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 6 Dec 2022 11:15:09 -0800
Subject: [PATCH 3755/4122] mm: mmu_gather: allow more than one batch of
 delayed rmaps

Commit 5df397dec7c4 ("mm: delay page_remove_rmap() until after the TLB has
been flushed") limited the page batching for the mmu gather operation when
a dirty shared page needed to delay rmap removal until after the TLB had
been flushed.

It did so because it needs to walk that array of pages while still holding
the page table lock, and our mmu_gather infrastructure allows for batching
quite a lot of pages.  We may have thousands on pages queued up for
freeing, and we wanted to walk only the last batch if we then added a
dirty page to the queue.

However, when I limited it to one batch, I didn't think of the degenerate
case of the special first batch that is embedded on-stack in the
mmu_gather structure (called "local") and that only has eight entries.

So with the right pattern, that "limit delayed rmap to just one batch"
will trigger over and over in that first small batch, and we'll waste a
lot of time flushing TLB's every eight pages.

And those right patterns are trivially triggered by just having a shared
mappings with lots of adjacent dirty pages.  Like the 'page_fault3'
subtest of the 'will-it-scale' benchmark, that just maps a shared area,
dirties all pages, and unmaps it.  Rinse and repeat.

We still want to limit the batching, but to fix this (easily triggered)
degenerate case, just expand the "only one batch" logic to instead be
"only one batch that isn't the special first on-stack ('local') batch".

That way, when we need to flush the delayed rmaps, we can still limit our
walk to just the last batch - and that first small one.

Link: https://lkml.kernel.org/r/CAHk-=whkL5aM1fR7kYUmhHQHBcMUc-bDoFP7EwYjTxy64DGtvw@mail.gmail.com
Fixes: 5df397dec7c4 ("mm: delay page_remove_rmap() until after the TLB has been flushed")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: kernel test robot <yujie.liu@intel.com>
  Link: https://lore.kernel.org/oe-lkp/202212051534.852804af-yujie.liu@intel.com
Tested-by: Huang, Ying <ying.huang@intel.com>
Tested-by: Hugh Dickins <hughd@google.com>
Cc: Feng Tang <feng.tang@intel.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nadav Amit <nadav.amit@gmail.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: "Yin, Fengwei" <fengwei.yin@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/mmu_gather.c | 36 ++++++++++++++++++++----------------
 1 file changed, 20 insertions(+), 16 deletions(-)

diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c
index 8247553a69c2e..2b93cf6ac9ae9 100644
--- a/mm/mmu_gather.c
+++ b/mm/mmu_gather.c
@@ -19,8 +19,8 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 {
 	struct mmu_gather_batch *batch;
 
-	/* No more batching if we have delayed rmaps pending */
-	if (tlb->delayed_rmap)
+	/* Limit batching if we have delayed rmaps pending */
+	if (tlb->delayed_rmap && tlb->active != &tlb->local)
 		return false;
 
 	batch = tlb->active;
@@ -48,31 +48,35 @@ static bool tlb_next_batch(struct mmu_gather *tlb)
 }
 
 #ifdef CONFIG_SMP
+static void tlb_flush_rmap_batch(struct mmu_gather_batch *batch, struct vm_area_struct *vma)
+{
+	for (int i = 0; i < batch->nr; i++) {
+		struct encoded_page *enc = batch->encoded_pages[i];
+
+		if (encoded_page_flags(enc)) {
+			struct page *page = encoded_page_ptr(enc);
+			page_remove_rmap(page, vma, false);
+		}
+	}
+}
+
 /**
  * tlb_flush_rmaps - do pending rmap removals after we have flushed the TLB
  * @tlb: the current mmu_gather
  *
  * Note that because of how tlb_next_batch() above works, we will
- * never start new batches with pending delayed rmaps, so we only
- * need to walk through the current active batch.
+ * never start multiple new batches with pending delayed rmaps, so
+ * we only need to walk through the current active batch and the
+ * original local one.
  */
 void tlb_flush_rmaps(struct mmu_gather *tlb, struct vm_area_struct *vma)
 {
-	struct mmu_gather_batch *batch;
-
 	if (!tlb->delayed_rmap)
 		return;
 
-	batch = tlb->active;
-	for (int i = 0; i < batch->nr; i++) {
-		struct encoded_page *enc = batch->encoded_pages[i];
-
-		if (encoded_page_flags(enc)) {
-			struct page *page = encoded_page_ptr(enc);
-			page_remove_rmap(page, vma, false);
-		}
-	}
-
+	tlb_flush_rmap_batch(&tlb->local, vma);
+	if (tlb->active != &tlb->local)
+		tlb_flush_rmap_batch(tlb->active, vma);
 	tlb->delayed_rmap = 0;
 }
 #endif
-- 
GitLab


From 3e39f7971d75cafe1c90dec60526ad45484657c0 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 11 Dec 2022 21:51:23 +0100
Subject: [PATCH 3756/4122] dt-bindings: rtc: m41t80: Convert text schema to
 YAML one

Convert the m41t80 text schema to YAML schema.

Add "#clock-cells" requirement, which is required by clock-output-names.

Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221211205124.23823-1-marex@denx.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 .../devicetree/bindings/rtc/rtc-m41t80.txt    | 39 ----------
 .../devicetree/bindings/rtc/st,m41t80.yaml    | 73 +++++++++++++++++++
 2 files changed, 73 insertions(+), 39 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/rtc/rtc-m41t80.txt
 create mode 100644 Documentation/devicetree/bindings/rtc/st,m41t80.yaml

diff --git a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt b/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt
deleted file mode 100644
index cdd196b1e9bdb..0000000000000
--- a/Documentation/devicetree/bindings/rtc/rtc-m41t80.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-ST M41T80 family of RTC and compatible
-
-Required properties:
-- compatible: should be one of:
-	"st,m41t62",
-	"st,m41t65",
-	"st,m41t80",
-	"st,m41t81",
-	"st,m41t81s",
-	"st,m41t82",
-	"st,m41t83",
-	"st,m41t84",
-	"st,m41t85",
-	"st,m41t87",
-	"microcrystal,rv4162",
-- reg: I2C bus address of the device
-
-Optional properties:
-- interrupts: rtc alarm interrupt.
-- clock-output-names: From common clock binding to override the default output
-                      clock name
-- wakeup-source: Enables wake up of host system on alarm
-
-Optional child node:
-- clock: Provide this if the square wave pin is used as boot-enabled fixed clock.
-
-Example:
-	rtc@68 {
-		compatible = "st,m41t80";
-		reg = <0x68>;
-		interrupt-parent = <&UIC0>;
-		interrupts = <0x9 0x8>;
-
-		clock {
-			compatible = "fixed-clock";
-			#clock-cells = <0>;
-			clock-frequency = <32768>;
-		};
-	};
diff --git a/Documentation/devicetree/bindings/rtc/st,m41t80.yaml b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml
new file mode 100644
index 0000000000000..fc9c6da6483f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/rtc/st,m41t80.yaml
@@ -0,0 +1,73 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/rtc/st,m41t80.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ST M41T80 family of RTC and compatible
+
+maintainers:
+  - Alexandre Belloni <alexandre.belloni@bootlin.com>
+
+properties:
+  compatible:
+    enum:
+      - st,m41t62
+      - st,m41t65
+      - st,m41t80
+      - st,m41t81
+      - st,m41t81s
+      - st,m41t82
+      - st,m41t83
+      - st,m41t84
+      - st,m41t85
+      - st,m41t87
+      - microcrystal,rv4162
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  "#clock-cells":
+    const: 1
+
+  clock-output-names:
+    maxItems: 1
+    description: From common clock binding to override the default output clock name.
+
+  clock:
+    type: object
+    $ref: /schemas/clock/fixed-clock.yaml#
+    properties:
+      clock-frequency:
+        const: 32768
+
+allOf:
+  - $ref: rtc.yaml
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      rtc@68 {
+        compatible = "st,m41t80";
+        reg = <0x68>;
+        interrupt-parent = <&UIC0>;
+        interrupts = <0x9 0x8>;
+
+        clock {
+          compatible = "fixed-clock";
+          #clock-cells = <0>;
+          clock-frequency = <32768>;
+        };
+      };
+    };
-- 
GitLab


From 462e768b55a2331324ff72e74706261134369826 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Wed, 7 Dec 2022 18:56:09 +0300
Subject: [PATCH 3757/4122] iommu/mediatek: Fix forever loop in error handling

There is a typo so this loop does i++ where i-- was intended.  It will
result in looping until the kernel crashes.

Fixes: 26593928564c ("iommu/mediatek: Add error path for loop of mm_dts_parse")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Yong Wu <yong.wu@mediatek.com>
Link: https://lore.kernel.org/r/Y5C3mTam2nkbaz6o@kili
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/mtk_iommu.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 392b8c167c44c..e9b3b794811dc 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -1159,8 +1159,7 @@ static int mtk_iommu_mm_dts_parse(struct device *dev, struct component_match **m
 	return 0;
 
 err_larbdev_put:
-	/* id may be not linear mapping, loop whole the array */
-	for (i = MTK_LARB_NR_MAX - 1; i >= 0; i++) {
+	for (i = MTK_LARB_NR_MAX - 1; i >= 0; i--) {
 		if (!data->larb_imu[i].dev)
 			continue;
 		put_device(data->larb_imu[i].dev);
-- 
GitLab


From 3bc8edc98bd43540dbe648e4ef91f443d6d20a24 Mon Sep 17 00:00:00 2001
From: Dan Aloni <dan.aloni@vastdata.com>
Date: Mon, 12 Dec 2022 13:11:06 +0200
Subject: [PATCH 3758/4122] nfsd: under NFSv4.1, fix double svc_xprt_put on
 rpc_create failure

On error situation `clp->cl_cb_conn.cb_xprt` should not be given
a reference to the xprt otherwise both client cleanup and the
error handling path of the caller call to put it. Better to
delay handing over the reference to a later branch.

[   72.530665] refcount_t: underflow; use-after-free.
[   72.531933] WARNING: CPU: 0 PID: 173 at lib/refcount.c:28 refcount_warn_saturate+0xcf/0x120
[   72.533075] Modules linked in: nfsd(OE) nfsv4(OE) nfsv3(OE) nfs(OE) lockd(OE) compat_nfs_ssc(OE) nfs_acl(OE) rpcsec_gss_krb5(OE) auth_rpcgss(OE) rpcrdma(OE) dns_resolver fscache netfs grace rdma_cm iw_cm ib_cm sunrpc(OE) mlx5_ib mlx5_core mlxfw pci_hyperv_intf ib_uverbs ib_core xt_MASQUERADE nf_conntrack_netlink nft_counter xt_addrtype nft_compat br_netfilter bridge stp llc nft_reject_inet nf_reject_ipv4 nf_reject_ipv6 nft_reject nft_ct nft_chain_nat nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 ip_set overlay nf_tables nfnetlink crct10dif_pclmul crc32_pclmul ghash_clmulni_intel xfs serio_raw virtio_net virtio_blk net_failover failover fuse [last unloaded: sunrpc]
[   72.540389] CPU: 0 PID: 173 Comm: kworker/u16:5 Tainted: G           OE     5.15.82-dan #1
[   72.541511] Hardware name: Red Hat KVM/RHEL-AV, BIOS 1.16.0-3.module+el8.7.0+1084+97b81f61 04/01/2014
[   72.542717] Workqueue: nfsd4_callbacks nfsd4_run_cb_work [nfsd]
[   72.543575] RIP: 0010:refcount_warn_saturate+0xcf/0x120
[   72.544299] Code: 55 00 0f 0b 5d e9 01 50 98 00 80 3d 75 9e 39 08 00 0f 85 74 ff ff ff 48 c7 c7 e8 d1 60 8e c6 05 61 9e 39 08 01 e8 f6 51 55 00 <0f> 0b 5d e9 d9 4f 98 00 80 3d 4b 9e 39 08 00 0f 85 4c ff ff ff 48
[   72.546666] RSP: 0018:ffffb3f841157cf0 EFLAGS: 00010286
[   72.547393] RAX: 0000000000000026 RBX: ffff89ac6231d478 RCX: 0000000000000000
[   72.548324] RDX: ffff89adb7c2c2c0 RSI: ffff89adb7c205c0 RDI: ffff89adb7c205c0
[   72.549271] RBP: ffffb3f841157cf0 R08: 0000000000000000 R09: c0000000ffefffff
[   72.550209] R10: 0000000000000001 R11: ffffb3f841157ad0 R12: ffff89ac6231d180
[   72.551142] R13: ffff89ac6231d478 R14: ffff89ac40c06180 R15: ffff89ac6231d4b0
[   72.552089] FS:  0000000000000000(0000) GS:ffff89adb7c00000(0000) knlGS:0000000000000000
[   72.553175] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   72.553934] CR2: 0000563a310506a8 CR3: 0000000109a66000 CR4: 0000000000350ef0
[   72.554874] Call Trace:
[   72.555278]  <TASK>
[   72.555614]  svc_xprt_put+0xaf/0xe0 [sunrpc]
[   72.556276]  nfsd4_process_cb_update.isra.11+0xb7/0x410 [nfsd]
[   72.557087]  ? update_load_avg+0x82/0x610
[   72.557652]  ? cpuacct_charge+0x60/0x70
[   72.558212]  ? dequeue_entity+0xdb/0x3e0
[   72.558765]  ? queued_spin_unlock+0x9/0x20
[   72.559358]  nfsd4_run_cb_work+0xfc/0x270 [nfsd]
[   72.560031]  process_one_work+0x1df/0x390
[   72.560600]  worker_thread+0x37/0x3b0
[   72.561644]  ? process_one_work+0x390/0x390
[   72.562247]  kthread+0x12f/0x150
[   72.562710]  ? set_kthread_struct+0x50/0x50
[   72.563309]  ret_from_fork+0x22/0x30
[   72.563818]  </TASK>
[   72.564189] ---[ end trace 031117b1c72ec616 ]---
[   72.566019] list_add corruption. next->prev should be prev (ffff89ac4977e538), but was ffff89ac4763e018. (next=ffff89ac4763e018).
[   72.567647] ------------[ cut here ]------------

Fixes: a4abc6b12eb1 ("nfsd: Fix svc_xprt refcnt leak when setup callback client failed")
Cc: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Cc: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Dan Aloni <dan.aloni@vastdata.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4callback.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 1b57f2c2f0bb1..905d66acf6ab3 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -988,7 +988,6 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 	} else {
 		if (!conn->cb_xprt)
 			return -EINVAL;
-		clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
 		clp->cl_cb_session = ses;
 		args.bc_xprt = conn->cb_xprt;
 		args.prognumber = clp->cl_cb_session->se_cb_prog;
@@ -1008,6 +1007,9 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 		rpc_shutdown_client(client);
 		return -ENOMEM;
 	}
+
+	if (clp->cl_minorversion != 0)
+		clp->cl_cb_conn.cb_xprt = conn->cb_xprt;
 	clp->cl_cb_client = client;
 	clp->cl_cb_cred = cred;
 	rcu_read_lock();
-- 
GitLab


From c1ac03af6ed45d05786c219d102f37eb44880f28 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Tue, 29 Nov 2022 19:30:09 +0800
Subject: [PATCH 3759/4122] tracing: Fix infinite loop in tracing_read_pipe on
 overflowed print_trace_line

print_trace_line may overflow seq_file buffer. If the event is not
consumed, the while loop keeps peeking this event, causing a infinite loop.

Link: https://lkml.kernel.org/r/20221129113009.182425-1-yangjihong1@huawei.com

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: stable@vger.kernel.org
Fixes: 088b1e427dbba ("ftrace: pipe fixes")
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 664619b3f1e17..548890c7c0f5c 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6802,7 +6802,20 @@ waitagain:
 
 		ret = print_trace_line(iter);
 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
-			/* don't print partial lines */
+			/*
+			 * If one print_trace_line() fills entire trace_seq in one shot,
+			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
+			 * In this case, we need to consume it, otherwise, loop will peek
+			 * this event next time, resulting in an infinite loop.
+			 */
+			if (save_len == 0) {
+				iter->seq.full = 0;
+				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
+				trace_consume(iter);
+				break;
+			}
+
+			/* In other cases, don't print partial lines */
 			iter->seq.seq.len = save_len;
 			break;
 		}
-- 
GitLab


From 3e12758392bee50135301b0189c064ab80980aca Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 12 Dec 2022 10:37:03 -0500
Subject: [PATCH 3760/4122] x86/mm/kmmio: Remove redundant preempt_disable()

Now that kmmio uses rcu_read_lock_sched_notrace() there's no reason to
call preempt_disable() as the read_lock_sched_notrace() already does that
and is redundant.

This also removes the preempt_enable_no_resched() as the "no_resched()"
portion was bogus as there's no reason to do that.

Link: https://lkml.kernel.org/r/20221212103703.7129cc5d@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: "x86@kernel.org" <x86@kernel.org>
Cc: Karol Herbst <karolherbst@gmail.com>
Cc: Pekka Paalanen <ppaalanen@gmail.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 arch/x86/mm/kmmio.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c
index 853c49877c161..9f82019179e11 100644
--- a/arch/x86/mm/kmmio.c
+++ b/arch/x86/mm/kmmio.c
@@ -246,14 +246,13 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 	page_base &= page_level_mask(l);
 
 	/*
-	 * Preemption is now disabled to prevent process switch during
-	 * single stepping. We can only handle one active kmmio trace
+	 * Hold the RCU read lock over single stepping to avoid looking
+	 * up the probe and kmmio_fault_page again. The rcu_read_lock_sched()
+	 * also disables preemption and prevents process switch during
+	 * the single stepping. We can only handle one active kmmio trace
 	 * per cpu, so ensure that we finish it before something else
-	 * gets to run. We also hold the RCU read lock over single
-	 * stepping to avoid looking up the probe and kmmio_fault_page
-	 * again.
+	 * gets to run.
 	 */
-	preempt_disable();
 	rcu_read_lock_sched_notrace();
 
 	faultpage = get_kmmio_fault_page(page_base);
@@ -324,7 +323,6 @@ int kmmio_handler(struct pt_regs *regs, unsigned long addr)
 
 no_kmmio:
 	rcu_read_unlock_sched_notrace();
-	preempt_enable_no_resched();
 	return ret;
 }
 
@@ -364,7 +362,6 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
 	ctx->active--;
 	BUG_ON(ctx->active);
 	rcu_read_unlock_sched_notrace();
-	preempt_enable_no_resched();
 
 	/*
 	 * if somebody else is singlestepping across a probe point, flags
-- 
GitLab


From b003b3b77d65133a0011ae3b7b255347438c12f6 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 29 Nov 2022 18:35:14 -0800
Subject: [PATCH 3761/4122] RISC-V: Align the shadow stack

The standard RISC-V ABIs all require 16-byte stack alignment.  We're
only calling that one function on the shadow stack so I doubt it'd
result in a real issue, but might as well keep this lined up.

Fixes: 31da94c25aea ("riscv: add VMAP_STACK overflow detection")
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20221130023515.20217-1-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/traps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index be54ccea8c475..acdfcacd7e578 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -206,7 +206,7 @@ static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
  * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used
  * to get per-cpu overflow stack(get_overflow_stack).
  */
-long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)];
+long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
 asmlinkage unsigned long get_overflow_stack(void)
 {
 	return (unsigned long)this_cpu_ptr(overflow_stack) +
-- 
GitLab


From de57ecc476103179e93fd85091770921f76a19af Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 29 Nov 2022 18:35:15 -0800
Subject: [PATCH 3762/4122] RISC-V: Add some comments about the shadow and
 overflow stacks

It took me a while to page all this back in when trying to review the
recent spin_shadow_stack, so I figured I'd just write up some comments.

Reviewed-by: Guo Ren <guoren@kernel.org>
Reviewed-by: Jisheng Zhang <jszhang@kernel.org>
Link: https://lore.kernel.org/r/20221130023515.20217-2-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/kernel/traps.c | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index acdfcacd7e578..336d4aadadb1c 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -200,18 +200,18 @@ void __init trap_init(void)
 }
 
 #ifdef CONFIG_VMAP_STACK
+/*
+ * Extra stack space that allows us to provide panic messages when the kernel
+ * has overflowed its stack.
+ */
 static DEFINE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)],
 		overflow_stack)__aligned(16);
 /*
- * shadow stack, handled_ kernel_ stack_ overflow(in kernel/entry.S) is used
- * to get per-cpu overflow stack(get_overflow_stack).
+ * A temporary stack for use by handle_kernel_stack_overflow.  This is used so
+ * we can call into C code to get the per-hart overflow stack.  Usage of this
+ * stack must be protected by spin_shadow_stack.
  */
 long shadow_stack[SHADOW_OVERFLOW_STACK_SIZE/sizeof(long)] __aligned(16);
-asmlinkage unsigned long get_overflow_stack(void)
-{
-	return (unsigned long)this_cpu_ptr(overflow_stack) +
-		OVERFLOW_STACK_SIZE;
-}
 
 /*
  * A pseudo spinlock to protect the shadow stack from being used by multiple
@@ -222,6 +222,12 @@ asmlinkage unsigned long get_overflow_stack(void)
  */
 unsigned long spin_shadow_stack;
 
+asmlinkage unsigned long get_overflow_stack(void)
+{
+	return (unsigned long)this_cpu_ptr(overflow_stack) +
+		OVERFLOW_STACK_SIZE;
+}
+
 asmlinkage void handle_bad_stack(struct pt_regs *regs)
 {
 	unsigned long tsk_stk = (unsigned long)current->stack;
-- 
GitLab


From e4b731ccb0975fd97283e0c0d9841a89063ec31a Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Tue, 18 Oct 2022 09:03:29 +0800
Subject: [PATCH 3763/4122] ceph: remove useless session parameter for
 check_caps()

The session parameter makes no sense any more.

Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/addr.c  |  2 +-
 fs/ceph/caps.c  | 23 +++++++++--------------
 fs/ceph/file.c  | 17 +++++++----------
 fs/ceph/inode.c |  6 +++---
 fs/ceph/ioctl.c |  2 +-
 fs/ceph/super.h |  3 +--
 6 files changed, 22 insertions(+), 31 deletions(-)

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index dcf701b05cc1c..ff6e3c279a798 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1367,7 +1367,7 @@ out:
 	folio_put(folio);
 
 	if (check_cap)
-		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY);
 
 	return copied;
 }
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index e54814d0c2f7b..1323fa28ab017 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1898,8 +1898,7 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
  *  CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
  *    further delay.
  */
-void ceph_check_caps(struct ceph_inode_info *ci, int flags,
-		     struct ceph_mds_session *session)
+void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 {
 	struct inode *inode = &ci->netfs.inode;
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
@@ -1913,15 +1912,12 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
 	bool queue_invalidate = false;
 	bool tried_invalidate = false;
 	bool queue_writeback = false;
-
-	if (session)
-		ceph_get_mds_session(session);
+	struct ceph_mds_session *session = NULL;
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
 		/* Don't send messages until we get async create reply */
 		spin_unlock(&ci->i_ceph_lock);
-		ceph_put_mds_session(session);
 		return;
 	}
 
@@ -2851,7 +2847,7 @@ static void check_max_size(struct inode *inode, loff_t endoff)
 		check = 1;
 	spin_unlock(&ci->i_ceph_lock);
 	if (check)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY);
 }
 
 static inline int get_used_fmode(int caps)
@@ -3140,7 +3136,7 @@ static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
 	switch (mode) {
 	case PUT_CAP_REFS_SYNC:
 		if (last)
-			ceph_check_caps(ci, 0, NULL);
+			ceph_check_caps(ci, 0);
 		else if (flushsnaps)
 			ceph_flush_snaps(ci, NULL);
 		break;
@@ -3255,7 +3251,7 @@ unlock:
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (last) {
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 	} else if (flush_snaps) {
 		ceph_flush_snaps(ci, NULL);
 	}
@@ -3604,10 +3600,9 @@ static void handle_cap_grant(struct inode *inode,
 
 	mutex_unlock(&session->s_mutex);
 	if (check_caps == 1)
-		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL,
-				session);
+		ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
 	else if (check_caps == 2)
-		ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session);
+		ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
 }
 
 /*
@@ -4333,7 +4328,7 @@ unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
 		if (inode) {
 			spin_unlock(&mdsc->cap_delay_lock);
 			dout("check_delayed_caps on %p\n", inode);
-			ceph_check_caps(ci, 0, NULL);
+			ceph_check_caps(ci, 0);
 			iput(inode);
 			spin_lock(&mdsc->cap_delay_lock);
 		}
@@ -4362,7 +4357,7 @@ static void flush_dirty_session_caps(struct ceph_mds_session *s)
 		dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
 		spin_unlock(&mdsc->cap_dirty_lock);
 		ceph_wait_on_async_create(inode);
-		ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
+		ceph_check_caps(ci, CHECK_CAPS_FLUSH);
 		iput(inode);
 		spin_lock(&mdsc->cap_dirty_lock);
 	}
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 04fd34557de84..4e68220bc06d8 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -313,7 +313,7 @@ int ceph_renew_caps(struct inode *inode, int fmode)
 		spin_unlock(&ci->i_ceph_lock);
 		dout("renew caps %p want %s issued %s updating mds_wanted\n",
 		     inode, ceph_cap_string(wanted), ceph_cap_string(issued));
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 		return 0;
 	}
 	spin_unlock(&ci->i_ceph_lock);
@@ -408,7 +408,7 @@ int ceph_open(struct inode *inode, struct file *file)
 		if ((issued & wanted) != wanted &&
 		    (mds_wanted & wanted) != wanted &&
 		    ceph_snap(inode) != CEPH_SNAPDIR)
-			ceph_check_caps(ci, 0, NULL);
+			ceph_check_caps(ci, 0);
 
 		return ceph_init_file(inode, file, fmode);
 	} else if (ceph_snap(inode) != CEPH_NOSNAP &&
@@ -1092,7 +1092,7 @@ static void ceph_aio_complete(struct inode *inode,
 		loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
 		if (endoff > i_size_read(inode)) {
 			if (ceph_inode_set_size(inode, endoff))
-				ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
+				ceph_check_caps(ci, CHECK_CAPS_AUTHONLY);
 		}
 
 		spin_lock(&ci->i_ceph_lock);
@@ -1421,8 +1421,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
 		if (write && pos > size) {
 			if (ceph_inode_set_size(inode, pos))
 				ceph_check_caps(ceph_inode(inode),
-						CHECK_CAPS_AUTHONLY,
-						NULL);
+						CHECK_CAPS_AUTHONLY);
 		}
 	}
 
@@ -1577,8 +1576,7 @@ out:
 			check_caps = ceph_inode_set_size(inode, pos);
 			if (check_caps)
 				ceph_check_caps(ceph_inode(inode),
-						CHECK_CAPS_AUTHONLY,
-						NULL);
+						CHECK_CAPS_AUTHONLY);
 		}
 
 	}
@@ -1906,7 +1904,7 @@ retry_snap:
 		if (dirty)
 			__mark_inode_dirty(inode, dirty);
 		if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
-			ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
+			ceph_check_caps(ci, CHECK_CAPS_FLUSH);
 	}
 
 	dout("aio_write %p %llx.%llx %llu~%u  dropping cap refs on %s\n",
@@ -2521,8 +2519,7 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
 		/* Let the MDS know about dst file size change */
 		if (ceph_inode_set_size(dst_inode, dst_off) ||
 		    ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
-			ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_FLUSH,
-					NULL);
+			ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_FLUSH);
 	}
 	/* Mark Fw dirty */
 	spin_lock(&dst_ci->i_ceph_lock);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index bad9eeb6a1a59..12173c00129f2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1909,7 +1909,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 	mutex_unlock(&ci->i_truncate_mutex);
 out:
 	if (check)
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 }
 
 /*
@@ -1969,7 +1969,7 @@ retry:
 	mutex_unlock(&ci->i_truncate_mutex);
 
 	if (wrbuffer_refs == 0)
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 
 	wake_up_all(&ci->i_cap_wq);
 }
@@ -1991,7 +1991,7 @@ static void ceph_inode_work(struct work_struct *work)
 		__ceph_do_pending_vmtruncate(inode);
 
 	if (test_and_clear_bit(CEPH_I_WORK_CHECK_CAPS, &ci->i_work_mask))
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 
 	if (test_and_clear_bit(CEPH_I_WORK_FLUSH_SNAPS, &ci->i_work_mask))
 		ceph_flush_snaps(ci, NULL);
diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c
index 6e061bf62ad47..deac817647eb0 100644
--- a/fs/ceph/ioctl.c
+++ b/fs/ceph/ioctl.c
@@ -253,7 +253,7 @@ static long ceph_ioctl_lazyio(struct file *file)
 		spin_unlock(&ci->i_ceph_lock);
 		dout("ioctl_layzio: file %p marked lazy\n", file);
 
-		ceph_check_caps(ci, 0, NULL);
+		ceph_check_caps(ci, 0);
 	} else {
 		dout("ioctl_layzio: file %p already lazy\n", file);
 	}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 40630e6f691c7..e8bc1d0d2614b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -1200,8 +1200,7 @@ extern void ceph_remove_capsnap(struct inode *inode,
 extern void ceph_flush_snaps(struct ceph_inode_info *ci,
 			     struct ceph_mds_session **psession);
 extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
-extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
-			    struct ceph_mds_session *session);
+extern void ceph_check_caps(struct ceph_inode_info *ci, int flags);
 extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
 extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
 extern int  ceph_drop_caps_for_unlink(struct inode *inode);
-- 
GitLab


From 68c62bee9d081cf815310b3a96e38d94fc16007d Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Mon, 17 Oct 2022 22:17:35 +0800
Subject: [PATCH 3764/4122] ceph: try to check caps immediately after async
 creating finishes

We should call the check_caps() again immediately after the async
creating finishes in case the MDS is waiting for caps revocation
to finish.

Link: https://tracker.ceph.com/issues/46904
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/caps.c  | 2 ++
 fs/ceph/file.c  | 9 +++++++++
 fs/ceph/super.h | 2 ++
 3 files changed, 13 insertions(+)

diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 1323fa28ab017..4b159f97fe7b5 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1916,6 +1916,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
+		ci->i_ceph_flags |= CEPH_I_ASYNC_CHECK_CAPS;
+
 		/* Don't send messages until we get async create reply */
 		spin_unlock(&ci->i_ceph_lock);
 		return;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 4e68220bc06d8..a6681b12e2806 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -534,14 +534,23 @@ static void wake_async_create_waiters(struct inode *inode,
 				      struct ceph_mds_session *session)
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
+	bool check_cap = false;
 
 	spin_lock(&ci->i_ceph_lock);
 	if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
 		ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE;
 		wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT);
+
+		if (ci->i_ceph_flags & CEPH_I_ASYNC_CHECK_CAPS) {
+			ci->i_ceph_flags &= ~CEPH_I_ASYNC_CHECK_CAPS;
+			check_cap = true;
+		}
 	}
 	ceph_kick_flushing_inode_caps(session, ci);
 	spin_unlock(&ci->i_ceph_lock);
+
+	if (check_cap)
+		ceph_check_caps(ci, CHECK_CAPS_FLUSH);
 }
 
 static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index e8bc1d0d2614b..e1bd6f487226e 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -593,6 +593,8 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
 #define CEPH_ASYNC_CREATE_BIT	(12)	  /* async create in flight for this */
 #define CEPH_I_ASYNC_CREATE	(1 << CEPH_ASYNC_CREATE_BIT)
 #define CEPH_I_SHUTDOWN		(1 << 13) /* inode is no longer usable */
+#define CEPH_I_ASYNC_CHECK_CAPS	(1 << 14) /* check caps immediately after async
+					     creating finishes */
 
 /*
  * Masks of ceph inode work.
-- 
GitLab


From c19204cbd65c12fdcd34fb8f5d645007238ed5cd Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Thu, 8 Dec 2022 16:11:00 -0600
Subject: [PATCH 3765/4122] cifs: minor cleanup of some headers

checkpatch showed formatting problems with extra spaces,
and extra semicolon and some missing blank lines in some
cifs headers.

Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Reviewed-by: Germano Percossi <germano.percossi@gmail.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifs_ioctl.h | 2 +-
 fs/cifs/cifsfs.h     | 4 ++--
 fs/cifs/cifsglob.h   | 7 +++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/fs/cifs/cifs_ioctl.h b/fs/cifs/cifs_ioctl.h
index d86d78d5bfdc1..332588e77c311 100644
--- a/fs/cifs/cifs_ioctl.h
+++ b/fs/cifs/cifs_ioctl.h
@@ -108,7 +108,7 @@ struct smb3_notify_info {
 #define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
 #define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)
 #define CIFS_IOC_NOTIFY_INFO _IOWR(CIFS_IOCTL_MAGIC, 11, struct smb3_notify_info)
-#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32)
+#define CIFS_IOC_SHUTDOWN _IOR('X', 125, __u32)
 
 /*
  * Flags for going down operation
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 388b745a978e2..00a573e0ad0e1 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -105,8 +105,8 @@ extern int cifs_lock(struct file *, int, struct file_lock *);
 extern int cifs_fsync(struct file *, loff_t, loff_t, int);
 extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int);
 extern int cifs_flush(struct file *, fl_owner_t id);
-extern int cifs_file_mmap(struct file * , struct vm_area_struct *);
-extern int cifs_file_strict_mmap(struct file * , struct vm_area_struct *);
+extern int cifs_file_mmap(struct file *file, struct vm_area_struct *vma);
+extern int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma);
 extern const struct file_operations cifs_dir_ops;
 extern int cifs_dir_open(struct inode *inode, struct file *file);
 extern int cifs_readdir(struct file *file, struct dir_context *ctx);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 1420acf987f03..cd3a173e65b1c 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -785,6 +785,7 @@ static inline unsigned int
 in_flight(struct TCP_Server_Info *server)
 {
 	unsigned int num;
+
 	spin_lock(&server->req_lock);
 	num = server->in_flight;
 	spin_unlock(&server->req_lock);
@@ -795,6 +796,7 @@ static inline bool
 has_credits(struct TCP_Server_Info *server, int *credits, int num_credits)
 {
 	int num;
+
 	spin_lock(&server->req_lock);
 	num = *credits;
 	spin_unlock(&server->req_lock);
@@ -1025,7 +1027,7 @@ struct cifs_ses {
 	struct TCP_Server_Info *server;	/* pointer to server info */
 	int ses_count;		/* reference counter */
 	enum ses_status_enum ses_status;  /* updates protected by cifs_tcp_ses_lock */
-	unsigned overrideSecFlg;  /* if non-zero override global sec flags */
+	unsigned int overrideSecFlg; /* if non-zero override global sec flags */
 	char *serverOS;		/* name of operating system underlying server */
 	char *serverNOS;	/* name of network operating system of server */
 	char *serverDomain;	/* security realm of server */
@@ -1381,7 +1383,7 @@ struct cifsFileInfo {
 	__u32 pid;		/* process id who opened file */
 	struct cifs_fid fid;	/* file id from remote */
 	struct list_head rlist; /* reconnect list */
-	/* BB add lock scope info here if needed */ ;
+	/* BB add lock scope info here if needed */
 	/* lock scope id (0 if none) */
 	struct dentry *dentry;
 	struct tcon_link *tlink;
@@ -1769,6 +1771,7 @@ static inline void free_dfs_info_array(struct dfs_info3_param *param,
 				       int number_of_items)
 {
 	int i;
+
 	if ((number_of_items == 0) || (param == NULL))
 		return;
 	for (i = 0; i < number_of_items; i++) {
-- 
GitLab


From 9544597b5b63ff1674d60e069f93555ab924b62b Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 9 Dec 2022 16:55:51 -0600
Subject: [PATCH 3766/4122] cifs: fix various whitespace errors in headers

Fix some extra spaces and a few comments that were unnecessarily split over
two lines. These were some trivial issues pointed out by checkpatch)

Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifspdu.h   | 50 +++++++++++++++++++--------------------------
 fs/cifs/cifsproto.h |  2 +-
 2 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index d1abaeea974a9..623caece2b10c 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -1429,7 +1429,7 @@ typedef struct smb_com_transaction_change_notify_req {
 	__u8 WatchTree;  /* 1 = Monitor subdirectories */
 	__u8 Reserved2;
 	__le16 ByteCount;
-/* 	__u8 Pad[3];*/
+/*	__u8 Pad[3];*/
 /*	__u8 Data[1];*/
 } __attribute__((packed)) TRANSACT_CHANGE_NOTIFY_REQ;
 
@@ -1752,8 +1752,7 @@ struct smb_com_transaction2_sfi_rsp {
 	struct smb_hdr hdr;	/* wct = 10 + SetupCount */
 	struct trans2_resp t2;
 	__u16 ByteCount;
-	__u16 Reserved2;	/* parameter word reserved -
-					present for infolevels > 100 */
+	__u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */
 } __attribute__((packed));
 
 struct smb_t2_qfi_req {
@@ -1768,8 +1767,7 @@ struct smb_t2_qfi_rsp {
 	struct smb_hdr hdr;     /* wct = 10 + SetupCount */
 	struct trans2_resp t2;
 	__u16 ByteCount;
-	__u16 Reserved2;        /* parameter word reserved -
-				   present for infolevels > 100 */
+	__u16 Reserved2; /* parameter word reserved - present for infolevels > 100 */
 } __attribute__((packed));
 
 /*
@@ -2146,13 +2144,11 @@ typedef struct {
 #define CIFS_UNIX_POSIX_PATH_OPS_CAP    0x00000020 /* Allow new POSIX path based
 						      calls including posix open
 						      and posix unlink */
-#define CIFS_UNIX_LARGE_READ_CAP        0x00000040 /* support reads >128K (up
-						      to 0xFFFF00 */
+#define CIFS_UNIX_LARGE_READ_CAP        0x00000040 /* support reads >128K (up to 0xFFFF00 */
 #define CIFS_UNIX_LARGE_WRITE_CAP       0x00000080
 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_CAP 0x00000100 /* can do SPNEGO crypt */
 #define CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP  0x00000200 /* must do  */
-#define CIFS_UNIX_PROXY_CAP             0x00000400 /* Proxy cap: 0xACE ioctl and
-						      QFS PROXY call */
+#define CIFS_UNIX_PROXY_CAP             0x00000400 /* Proxy cap: 0xACE ioctl and QFS PROXY call */
 #ifdef CONFIG_CIFS_POSIX
 /* presumably don't need the 0x20 POSIX_PATH_OPS_CAP since we never send
    LockingX instead of posix locking call on unix sess (and we do not expect
@@ -2368,8 +2364,7 @@ typedef struct {
 
 struct file_allocation_info {
 	__le64 AllocationSize; /* Note old Samba srvr rounds this up too much */
-} __attribute__((packed));	/* size used on disk, for level 0x103 for set,
-				   0x105 for query */
+} __packed; /* size used on disk, for level 0x103 for set, 0x105 for query */
 
 struct file_end_of_file_info {
 	__le64 FileSize;		/* offset to end of file */
@@ -2409,8 +2404,7 @@ struct cifs_posix_acl { /* access conrol list  (ACL) */
 	__le16	access_entry_count;  /* access ACL - count of entries */
 	__le16	default_entry_count; /* default ACL - count of entries */
 	struct cifs_posix_ace ace_array[];
-	/* followed by
-	struct cifs_posix_ace default_ace_arraay[] */
+	/* followed by struct cifs_posix_ace default_ace_array[] */
 } __attribute__((packed));  /* level 0x204 */
 
 /* types of access control entries already defined in posix_acl.h */
@@ -2429,17 +2423,17 @@ struct cifs_posix_acl { /* access conrol list  (ACL) */
 /* end of POSIX ACL definitions */
 
 /* POSIX Open Flags */
-#define SMB_O_RDONLY 	 0x1
-#define SMB_O_WRONLY 	0x2
-#define SMB_O_RDWR 	0x4
-#define SMB_O_CREAT 	0x10
-#define SMB_O_EXCL 	0x20
-#define SMB_O_TRUNC 	0x40
-#define SMB_O_APPEND 	0x80
-#define SMB_O_SYNC 	0x100
-#define SMB_O_DIRECTORY 0x200
-#define SMB_O_NOFOLLOW 	0x400
-#define SMB_O_DIRECT 	0x800
+#define SMB_O_RDONLY	0x1
+#define SMB_O_WRONLY	0x2
+#define SMB_O_RDWR	0x4
+#define SMB_O_CREAT	0x10
+#define SMB_O_EXCL	0x20
+#define SMB_O_TRUNC	0x40
+#define SMB_O_APPEND	0x80
+#define SMB_O_SYNC	0x100
+#define SMB_O_DIRECTORY	0x200
+#define SMB_O_NOFOLLOW	0x400
+#define SMB_O_DIRECT	0x800
 
 typedef struct {
 	__le32 OpenFlags; /* same as NT CreateX */
@@ -2716,15 +2710,13 @@ typedef struct file_xattr_info {
 	__u32 xattr_value_len;
 	char  xattr_name[];
 	/* followed by xattr_value[xattr_value_len], no pad */
-} __attribute__((packed)) FILE_XATTR_INFO; /* extended attribute info
-					      level 0x205 */
+} __packed FILE_XATTR_INFO; /* extended attribute info level 0x205 */
 
 /* flags for lsattr and chflags commands removed arein uapi/linux/fs.h */
 
 typedef struct file_chattr_info {
 	__le64	mask; /* list of all possible attribute bits */
 	__le64	mode; /* list of actual attribute bits on this inode */
-} __attribute__((packed)) FILE_CHATTR_INFO;  /* ext attributes
-						(chattr, chflags) level 0x206 */
-#endif 				/* POSIX */
+} __packed FILE_CHATTR_INFO;  /* ext attributes (chattr, chflags) level 0x206 */
+#endif				/* POSIX */
 #endif				/* _CIFSPDU_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 83e83d8beabba..f216fa269c850 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -124,7 +124,7 @@ extern int SendReceive2(const unsigned int /* xid */ , struct cifs_ses *,
 			struct kvec * /* resp vec */);
 extern int SendReceiveBlockingLock(const unsigned int xid,
 			struct cifs_tcon *ptcon,
-			struct smb_hdr *in_buf ,
+			struct smb_hdr *in_buf,
 			struct smb_hdr *out_buf,
 			int *bytes_returned);
 void
-- 
GitLab


From 2bfd81043e944af0e52835ef6d9b41795af22341 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 11 Dec 2022 13:54:21 -0600
Subject: [PATCH 3767/4122] cifs: fix missing display of three mount options

Three mount options: "tcpnodelay" and "noautotune" and "noblocksend"
were not displayed when passed in on cifs/smb3 mounts (e.g. displayed
in /proc/mounts e.g.).  No change to defaults so these are not
displayed if not specified on mount.

Cc: stable@vger.kernel.org
Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsfs.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 712a431614480..6094cb2ff099b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -678,9 +678,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	seq_printf(s, ",echo_interval=%lu",
 			tcon->ses->server->echo_interval / HZ);
 
-	/* Only display max_credits if it was overridden on mount */
+	/* Only display the following if overridden on mount */
 	if (tcon->ses->server->max_credits != SMB2_MAX_CREDITS_AVAILABLE)
 		seq_printf(s, ",max_credits=%u", tcon->ses->server->max_credits);
+	if (tcon->ses->server->tcp_nodelay)
+		seq_puts(s, ",tcpnodelay");
+	if (tcon->ses->server->noautotune)
+		seq_puts(s, ",noautotune");
+	if (tcon->ses->server->noblocksnd)
+		seq_puts(s, ",noblocksend");
 
 	if (tcon->snapshot_time)
 		seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
-- 
GitLab


From 9d91f8108ebfed54284332e04d2073107df18794 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 11 Dec 2022 14:44:31 -0600
Subject: [PATCH 3768/4122] cifs: print warning when conflicting soft vs. hard
 mount options specified

If the user specifies conflicting hard vs. soft mount options
(or nosoft vs. nohard) print a warning to dmesg

We were missing a warning when a user e.g. mounted with both
"hard,soft" mount options.

Reviewed-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/fs_context.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index 45119597c7655..2c92a821e0284 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -884,16 +884,21 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
 		ctx->nodfs = 1;
 		break;
 	case Opt_hard:
-		if (result.negated)
+		if (result.negated) {
+			if (ctx->retry == 1)
+				cifs_dbg(VFS, "conflicting hard vs. soft mount options\n");
 			ctx->retry = 0;
-		else
+		} else
 			ctx->retry = 1;
 		break;
 	case Opt_soft:
 		if (result.negated)
 			ctx->retry = 1;
-		else
+		else {
+			if (ctx->retry == 1)
+				cifs_dbg(VFS, "conflicting hard vs soft mount options\n");
 			ctx->retry = 0;
+		}
 		break;
 	case Opt_mapposix:
 		if (result.negated)
-- 
GitLab


From f7f291e14dde32a07b1f0aa06921d28f875a7b54 Mon Sep 17 00:00:00 2001
From: Paulo Alcantara <pc@cjr.nz>
Date: Sun, 11 Dec 2022 18:18:55 -0300
Subject: [PATCH 3769/4122] cifs: fix oops during encryption

When running xfstests against Azure the following oops occurred on an
arm64 system

  Unable to handle kernel write to read-only memory at virtual address
  ffff0001221cf000
  Mem abort info:
    ESR = 0x9600004f
    EC = 0x25: DABT (current EL), IL = 32 bits
    SET = 0, FnV = 0
    EA = 0, S1PTW = 0
    FSC = 0x0f: level 3 permission fault
  Data abort info:
    ISV = 0, ISS = 0x0000004f
    CM = 0, WnR = 1
  swapper pgtable: 4k pages, 48-bit VAs, pgdp=00000000294f3000
  [ffff0001221cf000] pgd=18000001ffff8003, p4d=18000001ffff8003,
  pud=18000001ff82e003, pmd=18000001ff71d003, pte=00600001221cf787
  Internal error: Oops: 9600004f [#1] PREEMPT SMP
  ...
  pstate: 80000005 (Nzcv daif -PAN -UAO -TCO BTYPE=--)
  pc : __memcpy+0x40/0x230
  lr : scatterwalk_copychunks+0xe0/0x200
  sp : ffff800014e92de0
  x29: ffff800014e92de0 x28: ffff000114f9de80 x27: 0000000000000008
  x26: 0000000000000008 x25: ffff800014e92e78 x24: 0000000000000008
  x23: 0000000000000001 x22: 0000040000000000 x21: ffff000000000000
  x20: 0000000000000001 x19: ffff0001037c4488 x18: 0000000000000014
  x17: 235e1c0d6efa9661 x16: a435f9576b6edd6c x15: 0000000000000058
  x14: 0000000000000001 x13: 0000000000000008 x12: ffff000114f2e590
  x11: ffffffffffffffff x10: 0000040000000000 x9 : ffff8000105c3580
  x8 : 2e9413b10000001a x7 : 534b4410fb86b005 x6 : 534b4410fb86b005
  x5 : ffff0001221cf008 x4 : ffff0001037c4490 x3 : 0000000000000001
  x2 : 0000000000000008 x1 : ffff0001037c4488 x0 : ffff0001221cf000
  Call trace:
   __memcpy+0x40/0x230
   scatterwalk_map_and_copy+0x98/0x100
   crypto_ccm_encrypt+0x150/0x180
   crypto_aead_encrypt+0x2c/0x40
   crypt_message+0x750/0x880
   smb3_init_transform_rq+0x298/0x340
   smb_send_rqst.part.11+0xd8/0x180
   smb_send_rqst+0x3c/0x100
   compound_send_recv+0x534/0xbc0
   smb2_query_info_compound+0x32c/0x440
   smb2_set_ea+0x438/0x4c0
   cifs_xattr_set+0x5d4/0x7c0

This is because in scatterwalk_copychunks(), we attempted to write to
a buffer (@sign) that was allocated in the stack (vmalloc area) by
crypt_message() and thus accessing its remaining 8 (x2) bytes ended up
crossing a page boundary.

To simply fix it, we could just pass @sign kmalloc'd from
crypt_message() and then we're done.  Luckily, we don't seem to pass
any other vmalloc'd buffers in smb_rqst::rq_iov...

Instead, let's map the correct pages and offsets from vmalloc buffers
as well in cifs_sg_set_buf() and then avoiding such oopses.

Signed-off-by: Paulo Alcantara (SUSE) <pc@cjr.nz>
Cc: stable@vger.kernel.org
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h  |  68 +++++++++++++++++++++
 fs/cifs/cifsproto.h |   4 +-
 fs/cifs/misc.c      |   4 +-
 fs/cifs/smb2ops.c   | 143 +++++++++++++++++++++-----------------------
 4 files changed, 140 insertions(+), 79 deletions(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index cd3a173e65b1c..703685e2db5e7 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -13,6 +13,8 @@
 #include <linux/in6.h>
 #include <linux/inet.h>
 #include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/mm.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
 #include <linux/utsname.h>
@@ -2140,4 +2142,70 @@ static inline void move_cifs_info_to_smb2(struct smb2_file_all_info *dst, const
 	dst->FileNameLength = src->FileNameLength;
 }
 
+static inline unsigned int cifs_get_num_sgs(const struct smb_rqst *rqst,
+					    int num_rqst,
+					    const u8 *sig)
+{
+	unsigned int len, skip;
+	unsigned int nents = 0;
+	unsigned long addr;
+	int i, j;
+
+	/* Assumes the first rqst has a transform header as the first iov.
+	 * I.e.
+	 * rqst[0].rq_iov[0]  is transform header
+	 * rqst[0].rq_iov[1+] data to be encrypted/decrypted
+	 * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
+	 */
+	for (i = 0; i < num_rqst; i++) {
+		/*
+		 * The first rqst has a transform header where the
+		 * first 20 bytes are not part of the encrypted blob.
+		 */
+		for (j = 0; j < rqst[i].rq_nvec; j++) {
+			struct kvec *iov = &rqst[i].rq_iov[j];
+
+			skip = (i == 0) && (j == 0) ? 20 : 0;
+			addr = (unsigned long)iov->iov_base + skip;
+			if (unlikely(is_vmalloc_addr((void *)addr))) {
+				len = iov->iov_len - skip;
+				nents += DIV_ROUND_UP(offset_in_page(addr) + len,
+						      PAGE_SIZE);
+			} else {
+				nents++;
+			}
+		}
+		nents += rqst[i].rq_npages;
+	}
+	nents += DIV_ROUND_UP(offset_in_page(sig) + SMB2_SIGNATURE_SIZE, PAGE_SIZE);
+	return nents;
+}
+
+/* We can not use the normal sg_set_buf() as we will sometimes pass a
+ * stack object as buf.
+ */
+static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg,
+						  const void *buf,
+						  unsigned int buflen)
+{
+	unsigned long addr = (unsigned long)buf;
+	unsigned int off = offset_in_page(addr);
+
+	addr &= PAGE_MASK;
+	if (unlikely(is_vmalloc_addr((void *)addr))) {
+		do {
+			unsigned int len = min_t(unsigned int, buflen, PAGE_SIZE - off);
+
+			sg_set_page(sg++, vmalloc_to_page((void *)addr), len, off);
+
+			off = 0;
+			addr += PAGE_SIZE;
+			buflen -= len;
+		} while (buflen);
+	} else {
+		sg_set_page(sg++, virt_to_page(addr), buflen, off);
+	}
+	return sg;
+}
+
 #endif	/* _CIFS_GLOB_H */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index f216fa269c850..9c6147ca029d2 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -600,8 +600,8 @@ int setup_aio_ctx_iter(struct cifs_aio_ctx *ctx, struct iov_iter *iter, int rw);
 int cifs_alloc_hash(const char *name, struct shash_desc **sdesc);
 void cifs_free_hash(struct shash_desc **sdesc);
 
-extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
-				unsigned int *len, unsigned int *offset);
+void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
+			  unsigned int *len, unsigned int *offset);
 struct cifs_chan *
 cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
 int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 3e68d8208cf5e..1cbecd64d697f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1136,8 +1136,8 @@ cifs_free_hash(struct shash_desc **sdesc)
  * @len: Where to store the length for this page:
  * @offset: Where to store the offset for this page
  */
-void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
-				unsigned int *len, unsigned int *offset)
+void rqst_page_get_length(const struct smb_rqst *rqst, unsigned int page,
+			  unsigned int *len, unsigned int *offset)
 {
 	*len = rqst->rq_pagesz;
 	*offset = (page == 0) ? rqst->rq_offset : 0;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 72b22d033ed56..6e772b31e02a3 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -4204,69 +4204,82 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
 	memcpy(&tr_hdr->SessionId, &shdr->SessionId, 8);
 }
 
-/* We can not use the normal sg_set_buf() as we will sometimes pass a
- * stack object as buf.
- */
-static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
-				   unsigned int buflen)
+static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst,
+				 int num_rqst, const u8 *sig, u8 **iv,
+				 struct aead_request **req, struct scatterlist **sgl,
+				 unsigned int *num_sgs)
 {
-	void *addr;
-	/*
-	 * VMAP_STACK (at least) puts stack into the vmalloc address space
-	 */
-	if (is_vmalloc_addr(buf))
-		addr = vmalloc_to_page(buf);
-	else
-		addr = virt_to_page(buf);
-	sg_set_page(sg, addr, buflen, offset_in_page(buf));
+	unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm);
+	unsigned int iv_size = crypto_aead_ivsize(tfm);
+	unsigned int len;
+	u8 *p;
+
+	*num_sgs = cifs_get_num_sgs(rqst, num_rqst, sig);
+
+	len = iv_size;
+	len += crypto_aead_alignmask(tfm) & ~(crypto_tfm_ctx_alignment() - 1);
+	len = ALIGN(len, crypto_tfm_ctx_alignment());
+	len += req_size;
+	len = ALIGN(len, __alignof__(struct scatterlist));
+	len += *num_sgs * sizeof(**sgl);
+
+	p = kmalloc(len, GFP_ATOMIC);
+	if (!p)
+		return NULL;
+
+	*iv = (u8 *)PTR_ALIGN(p, crypto_aead_alignmask(tfm) + 1);
+	*req = (struct aead_request *)PTR_ALIGN(*iv + iv_size,
+						crypto_tfm_ctx_alignment());
+	*sgl = (struct scatterlist *)PTR_ALIGN((u8 *)*req + req_size,
+					       __alignof__(struct scatterlist));
+	return p;
 }
 
-/* Assumes the first rqst has a transform header as the first iov.
- * I.e.
- * rqst[0].rq_iov[0]  is transform header
- * rqst[0].rq_iov[1+] data to be encrypted/decrypted
- * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
- */
-static struct scatterlist *
-init_sg(int num_rqst, struct smb_rqst *rqst, u8 *sign)
+static void *smb2_get_aead_req(struct crypto_aead *tfm, const struct smb_rqst *rqst,
+			       int num_rqst, const u8 *sig, u8 **iv,
+			       struct aead_request **req, struct scatterlist **sgl)
 {
-	unsigned int sg_len;
+	unsigned int off, len, skip;
 	struct scatterlist *sg;
-	unsigned int i;
-	unsigned int j;
-	unsigned int idx = 0;
-	int skip;
-
-	sg_len = 1;
-	for (i = 0; i < num_rqst; i++)
-		sg_len += rqst[i].rq_nvec + rqst[i].rq_npages;
+	unsigned int num_sgs;
+	unsigned long addr;
+	int i, j;
+	void *p;
 
-	sg = kmalloc_array(sg_len, sizeof(struct scatterlist), GFP_KERNEL);
-	if (!sg)
+	p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, sgl, &num_sgs);
+	if (!p)
 		return NULL;
 
-	sg_init_table(sg, sg_len);
+	sg_init_table(*sgl, num_sgs);
+	sg = *sgl;
+
+	/* Assumes the first rqst has a transform header as the first iov.
+	 * I.e.
+	 * rqst[0].rq_iov[0]  is transform header
+	 * rqst[0].rq_iov[1+] data to be encrypted/decrypted
+	 * rqst[1+].rq_iov[0+] data to be encrypted/decrypted
+	 */
 	for (i = 0; i < num_rqst; i++) {
+		/*
+		 * The first rqst has a transform header where the
+		 * first 20 bytes are not part of the encrypted blob.
+		 */
 		for (j = 0; j < rqst[i].rq_nvec; j++) {
-			/*
-			 * The first rqst has a transform header where the
-			 * first 20 bytes are not part of the encrypted blob
-			 */
-			skip = (i == 0) && (j == 0) ? 20 : 0;
-			smb2_sg_set_buf(&sg[idx++],
-					rqst[i].rq_iov[j].iov_base + skip,
-					rqst[i].rq_iov[j].iov_len - skip);
-			}
+			struct kvec *iov = &rqst[i].rq_iov[j];
 
+			skip = (i == 0) && (j == 0) ? 20 : 0;
+			addr = (unsigned long)iov->iov_base + skip;
+			len = iov->iov_len - skip;
+			sg = cifs_sg_set_buf(sg, (void *)addr, len);
+		}
 		for (j = 0; j < rqst[i].rq_npages; j++) {
-			unsigned int len, offset;
-
-			rqst_page_get_length(&rqst[i], j, &len, &offset);
-			sg_set_page(&sg[idx++], rqst[i].rq_pages[j], len, offset);
+			rqst_page_get_length(&rqst[i], j, &len, &off);
+			sg_set_page(sg++, rqst[i].rq_pages[j], len, off);
 		}
 	}
-	smb2_sg_set_buf(&sg[idx], sign, SMB2_SIGNATURE_SIZE);
-	return sg;
+	cifs_sg_set_buf(sg, sig, SMB2_SIGNATURE_SIZE);
+
+	return p;
 }
 
 static int
@@ -4314,11 +4327,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
 	u8 sign[SMB2_SIGNATURE_SIZE] = {};
 	u8 key[SMB3_ENC_DEC_KEY_SIZE];
 	struct aead_request *req;
-	char *iv;
-	unsigned int iv_len;
+	u8 *iv;
 	DECLARE_CRYPTO_WAIT(wait);
 	struct crypto_aead *tfm;
 	unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize);
+	void *creq;
 
 	rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key);
 	if (rc) {
@@ -4352,32 +4365,15 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
 		return rc;
 	}
 
-	req = aead_request_alloc(tfm, GFP_KERNEL);
-	if (!req) {
-		cifs_server_dbg(VFS, "%s: Failed to alloc aead request\n", __func__);
+	creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg);
+	if (unlikely(!creq))
 		return -ENOMEM;
-	}
 
 	if (!enc) {
 		memcpy(sign, &tr_hdr->Signature, SMB2_SIGNATURE_SIZE);
 		crypt_len += SMB2_SIGNATURE_SIZE;
 	}
 
-	sg = init_sg(num_rqst, rqst, sign);
-	if (!sg) {
-		cifs_server_dbg(VFS, "%s: Failed to init sg\n", __func__);
-		rc = -ENOMEM;
-		goto free_req;
-	}
-
-	iv_len = crypto_aead_ivsize(tfm);
-	iv = kzalloc(iv_len, GFP_KERNEL);
-	if (!iv) {
-		cifs_server_dbg(VFS, "%s: Failed to alloc iv\n", __func__);
-		rc = -ENOMEM;
-		goto free_sg;
-	}
-
 	if ((server->cipher_type == SMB2_ENCRYPTION_AES128_GCM) ||
 	    (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
 		memcpy(iv, (char *)tr_hdr->Nonce, SMB3_AES_GCM_NONCE);
@@ -4386,6 +4382,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
 		memcpy(iv + 1, (char *)tr_hdr->Nonce, SMB3_AES_CCM_NONCE);
 	}
 
+	aead_request_set_tfm(req, tfm);
 	aead_request_set_crypt(req, sg, sg, crypt_len, iv);
 	aead_request_set_ad(req, assoc_data_len);
 
@@ -4398,11 +4395,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
 	if (!rc && enc)
 		memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE);
 
-	kfree_sensitive(iv);
-free_sg:
-	kfree_sensitive(sg);
-free_req:
-	kfree_sensitive(req);
+	kfree_sensitive(creq);
 	return rc;
 }
 
-- 
GitLab


From a9438b44bc7015b18931e312bbd249a25bb59a65 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 12 Dec 2022 12:36:33 +0100
Subject: [PATCH 3770/4122] writeback: Add asserts for adding freed inode to
 lists

In the past we had several use-after-free issues with inodes getting
added to writeback lists after evict() removed them. These are painful
to debug so add some asserts to catch the problem earlier. The only
non-obvious change in the commit is that we need to tweak
redirty_tail_locked() to avoid triggering assertion in
inode_io_list_move_locked().

Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20221212113633.29181-1-jack@suse.cz
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 443f83382b9bd..6cd172c4cb3e2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -121,6 +121,7 @@ static bool inode_io_list_move_locked(struct inode *inode,
 {
 	assert_spin_locked(&wb->list_lock);
 	assert_spin_locked(&inode->i_lock);
+	WARN_ON_ONCE(inode->i_state & I_FREEING);
 
 	list_move(&inode->i_io_list, head);
 
@@ -280,6 +281,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode,
 {
 	assert_spin_locked(&wb->list_lock);
 	assert_spin_locked(&inode->i_lock);
+	WARN_ON_ONCE(inode->i_state & I_FREEING);
 
 	inode->i_state &= ~I_SYNC_QUEUED;
 	if (wb != &wb->bdi->wb)
@@ -1129,6 +1131,7 @@ static void inode_cgwb_move_to_attached(struct inode *inode,
 {
 	assert_spin_locked(&wb->list_lock);
 	assert_spin_locked(&inode->i_lock);
+	WARN_ON_ONCE(inode->i_state & I_FREEING);
 
 	inode->i_state &= ~I_SYNC_QUEUED;
 	list_del_init(&inode->i_io_list);
@@ -1294,6 +1297,17 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
 {
 	assert_spin_locked(&inode->i_lock);
 
+	inode->i_state &= ~I_SYNC_QUEUED;
+	/*
+	 * When the inode is being freed just don't bother with dirty list
+	 * tracking. Flush worker will ignore this inode anyway and it will
+	 * trigger assertions in inode_io_list_move_locked().
+	 */
+	if (inode->i_state & I_FREEING) {
+		list_del_init(&inode->i_io_list);
+		wb_io_lists_depopulated(wb);
+		return;
+	}
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
@@ -1302,7 +1316,6 @@ static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
 			inode->dirtied_when = jiffies;
 	}
 	inode_io_list_move_locked(inode, wb, &wb->b_dirty);
-	inode->i_state &= ~I_SYNC_QUEUED;
 }
 
 static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
-- 
GitLab


From 23e188a16423a6e65290abf39dd427ff047e6843 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Sat, 10 Dec 2022 18:10:42 +0800
Subject: [PATCH 3771/4122] writeback: remove obsolete macro EXPIRE_DIRTY_ATIME

EXPIRE_DIRTY_ATIME is not used anymore. Remove it.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20221210101042.2012931-1-linmiaohe@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/fs-writeback.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 6cd172c4cb3e2..6c113585e7824 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1358,8 +1358,6 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
 	return ret;
 }
 
-#define EXPIRE_DIRTY_ATIME 0x0001
-
 /*
  * Move expired (dirtied before dirtied_before) dirty inodes from
  * @delaying_queue to @dispatch_queue.
-- 
GitLab


From d1f0f50fbbbbca1e3e8157e51934613bf88f6d44 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 8 Dec 2022 09:33:41 +0800
Subject: [PATCH 3772/4122] samples: vfio-mdev: Fix missing
 pci_disable_device() in mdpy_fb_probe()

Add missing pci_disable_device() in fail path of mdpy_fb_probe().
Besides, fix missing release functions in mdpy_fb_remove().

Fixes: cacade1946a4 ("sample: vfio mdev display - guest driver")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Link: https://lore.kernel.org/r/20221208013341.3999-1-shangxiaojing@huawei.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 samples/vfio-mdev/mdpy-fb.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c
index 9ec93d90e8a5a..4eb7aa11cfbb2 100644
--- a/samples/vfio-mdev/mdpy-fb.c
+++ b/samples/vfio-mdev/mdpy-fb.c
@@ -109,7 +109,7 @@ static int mdpy_fb_probe(struct pci_dev *pdev,
 
 	ret = pci_request_regions(pdev, "mdpy-fb");
 	if (ret < 0)
-		return ret;
+		goto err_disable_dev;
 
 	pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format);
 	pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET,	&width);
@@ -191,6 +191,9 @@ err_release_fb:
 err_release_regions:
 	pci_release_regions(pdev);
 
+err_disable_dev:
+	pci_disable_device(pdev);
+
 	return ret;
 }
 
@@ -199,7 +202,10 @@ static void mdpy_fb_remove(struct pci_dev *pdev)
 	struct fb_info *info = pci_get_drvdata(pdev);
 
 	unregister_framebuffer(info);
+	iounmap(info->screen_base);
 	framebuffer_release(info);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
 }
 
 static struct pci_device_id mdpy_fb_pci_table[] = {
-- 
GitLab


From fe3dd71db2b81c202bc80532bbe0e07238a45ed9 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 8 Dec 2022 19:01:26 +0300
Subject: [PATCH 3773/4122] vfio/mlx5: fix error code in mlx5vf_precopy_ioctl()

The copy_to_user() function returns the number of bytes remaining to
be copied but we want to return a negative error code here.

Fixes: 0dce165b1adf ("vfio/mlx5: Introduce vfio precopy ioctl implementation")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/Y5IKVknlf5Z5NPtU@kili
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index cd90eb86128c3..94f7a0fd10e85 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -404,7 +404,10 @@ static long mlx5vf_precopy_ioctl(struct file *filp, unsigned int cmd,
 
 done:
 	mlx5vf_state_mutex_unlock(mvdev);
-	return copy_to_user((void __user *)arg, &info, minsz);
+	if (copy_to_user((void __user *)arg, &info, minsz))
+		return -EFAULT;
+	return 0;
+
 err_migf_unlock:
 	mutex_unlock(&migf->lock);
 err_state_unlock:
-- 
GitLab


From 70be6f322860d322ebcd120cf0c05402ead5c6de Mon Sep 17 00:00:00 2001
From: Dan Carpenter <error27@gmail.com>
Date: Thu, 8 Dec 2022 19:02:17 +0300
Subject: [PATCH 3774/4122] vfio/mlx5: error pointer dereference in error
 handling

This code frees the wrong "buf" variable and results in an error pointer
dereference.

Fixes: 34e2f27143d1 ("vfio/mlx5: Introduce multiple loads")
Signed-off-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Yishai Hadas <yishaih@nvidia.com>
Link: https://lore.kernel.org/r/Y5IKia5SaiVxYmG5@kili
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/pci/mlx5/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/vfio/pci/mlx5/main.c b/drivers/vfio/pci/mlx5/main.c
index 94f7a0fd10e85..031ac8cc215d4 100644
--- a/drivers/vfio/pci/mlx5/main.c
+++ b/drivers/vfio/pci/mlx5/main.c
@@ -826,7 +826,7 @@ mlx5vf_pci_resume_device_data(struct mlx5vf_pci_core_device *mvdev)
 	spin_lock_init(&migf->list_lock);
 	return migf;
 out_buf:
-	mlx5vf_free_data_buffer(buf);
+	mlx5vf_free_data_buffer(migf->buf);
 out_pd:
 	mlx5vf_cmd_dealloc_pd(migf);
 out_free:
-- 
GitLab


From e480751970e84bc13ab5c288dbbe16b0638cc088 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 24 Nov 2022 11:37:08 +0800
Subject: [PATCH 3775/4122] f2fs: remove F2FS_SET_FEATURE() and
 F2FS_CLEAR_FEATURE() macro

F2FS_SET_FEATURE() and F2FS_CLEAR_FEATURE() have never
been used since they were introduced by this commit
76f105a2dbcd("f2fs: add feature facility in superblock").

So let's remove them. BTW, convert f2fs_sb_has_##name to return bool.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 296683648d4fa..cf738f1275b2d 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -203,10 +203,6 @@ struct f2fs_mount_info {
 #define __F2FS_HAS_FEATURE(raw_super, mask)				\
 	((raw_super->feature & cpu_to_le32(mask)) != 0)
 #define F2FS_HAS_FEATURE(sbi, mask)	__F2FS_HAS_FEATURE(sbi->raw_super, mask)
-#define F2FS_SET_FEATURE(sbi, mask)					\
-	(sbi->raw_super->feature |= cpu_to_le32(mask))
-#define F2FS_CLEAR_FEATURE(sbi, mask)					\
-	(sbi->raw_super->feature &= ~cpu_to_le32(mask))
 
 /*
  * Default values for user and/or group using reserved blocks
@@ -4387,7 +4383,7 @@ static inline bool f2fs_disable_compressed_file(struct inode *inode)
 }
 
 #define F2FS_FEATURE_FUNCS(name, flagname) \
-static inline int f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
+static inline bool f2fs_sb_has_##name(struct f2fs_sb_info *sbi) \
 { \
 	return F2FS_HAS_FEATURE(sbi, F2FS_FEATURE_##flagname); \
 }
-- 
GitLab


From ed8ac22b6b75804743f1dae6563d75f85cfd1483 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Thu, 24 Nov 2022 10:48:42 +0800
Subject: [PATCH 3776/4122] f2fs: introduce f2fs_is_readonly() for readability

Introduce f2fs_is_readonly() and use it to simplify code.

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h  | 5 +++++
 fs/f2fs/super.c | 5 ++---
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cf738f1275b2d..eb8c27c4e5fc2 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4575,6 +4575,11 @@ static inline void f2fs_handle_page_eio(struct f2fs_sb_info *sbi, pgoff_t ofs,
 	}
 }
 
+static inline bool f2fs_is_readonly(struct f2fs_sb_info *sbi)
+{
+	return f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb);
+}
+
 #define EFSBADCRC	EBADMSG		/* Bad CRC detected */
 #define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */
 
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index a5f6f632cf7cf..79bf1faf41616 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1351,8 +1351,7 @@ default_check:
 		return -EINVAL;
 	}
 
-	if ((f2fs_sb_has_readonly(sbi) || f2fs_readonly(sbi->sb)) &&
-		test_opt(sbi, FLUSH_MERGE)) {
+	if (f2fs_is_readonly(sbi) && test_opt(sbi, FLUSH_MERGE)) {
 		f2fs_err(sbi, "FLUSH_MERGE not compatible with readonly mode");
 		return -EINVAL;
 	}
@@ -2083,7 +2082,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, MERGE_CHECKPOINT);
 	F2FS_OPTION(sbi).unusable_cap = 0;
 	sbi->sb->s_flags |= SB_LAZYTIME;
-	if (!f2fs_sb_has_readonly(sbi) && !f2fs_readonly(sbi->sb))
+	if (!f2fs_is_readonly(sbi))
 		set_opt(sbi, FLUSH_MERGE);
 	if (f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi))
 		set_opt(sbi, DISCARD);
-- 
GitLab


From 12607c1ba7637e750402f555b6695c50fce77a2b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 30 Nov 2022 09:36:43 -0800
Subject: [PATCH 3777/4122] f2fs: specify extent cache for read explicitly

Let's descrbie it's read extent cache.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c |  4 ++--
 fs/f2fs/f2fs.h         | 10 +++++-----
 fs/f2fs/inode.c        |  2 +-
 fs/f2fs/node.c         |  2 +-
 fs/f2fs/node.h         |  2 +-
 fs/f2fs/segment.c      |  4 ++--
 fs/f2fs/super.c        | 12 ++++++------
 7 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 932c070173b97..8cd87aee0292b 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -383,7 +383,7 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 	if (!i_ext || !i_ext->len)
 		return;
 
-	get_extent_info(&ei, i_ext);
+	get_read_extent_info(&ei, i_ext);
 
 	write_lock(&et->lock);
 	if (atomic_read(&et->node_cnt))
@@ -710,7 +710,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 	unsigned int node_cnt = 0, tree_cnt = 0;
 	int remained;
 
-	if (!test_opt(sbi, EXTENT_CACHE))
+	if (!test_opt(sbi, READ_EXTENT_CACHE))
 		return 0;
 
 	if (!atomic_read(&sbi->total_zombie_tree))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index eb8c27c4e5fc2..1c39f8145b61b 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -92,7 +92,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_FLUSH_MERGE		0x00000400
 #define F2FS_MOUNT_NOBARRIER		0x00000800
 #define F2FS_MOUNT_FASTBOOT		0x00001000
-#define F2FS_MOUNT_EXTENT_CACHE		0x00002000
+#define F2FS_MOUNT_READ_EXTENT_CACHE	0x00002000
 #define F2FS_MOUNT_DATA_FLUSH		0x00008000
 #define F2FS_MOUNT_FAULT_INJECTION	0x00010000
 #define F2FS_MOUNT_USRQUOTA		0x00080000
@@ -600,7 +600,7 @@ enum {
 #define F2FS_MIN_EXTENT_LEN	64	/* minimum extent length */
 
 /* number of extent info in extent cache we try to shrink */
-#define EXTENT_CACHE_SHRINK_NUMBER	128
+#define READ_EXTENT_CACHE_SHRINK_NUMBER	128
 
 #define RECOVERY_MAX_RA_BLOCKS		BIO_MAX_VECS
 #define RECOVERY_MIN_RA_BLOCKS		1
@@ -830,7 +830,7 @@ struct f2fs_inode_info {
 	loff_t original_i_size;		/* original i_size before atomic write */
 };
 
-static inline void get_extent_info(struct extent_info *ext,
+static inline void get_read_extent_info(struct extent_info *ext,
 					struct f2fs_extent *i_ext)
 {
 	ext->fofs = le32_to_cpu(i_ext->fofs);
@@ -838,7 +838,7 @@ static inline void get_extent_info(struct extent_info *ext,
 	ext->len = le32_to_cpu(i_ext->len);
 }
 
-static inline void set_raw_extent(struct extent_info *ext,
+static inline void set_raw_read_extent(struct extent_info *ext,
 					struct f2fs_extent *i_ext)
 {
 	i_ext->fofs = cpu_to_le32(ext->fofs);
@@ -4407,7 +4407,7 @@ static inline bool f2fs_may_extent_tree(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
-	if (!test_opt(sbi, EXTENT_CACHE) ||
+	if (!test_opt(sbi, READ_EXTENT_CACHE) ||
 			is_inode_flag_set(inode, FI_NO_EXTENT) ||
 			(is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
 			 !f2fs_sb_has_readonly(sbi)))
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 577f109b4e1d9..2c705c60019b2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -629,7 +629,7 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
 
 	if (et) {
 		read_lock(&et->lock);
-		set_raw_extent(&et->largest, &ri->i_ext);
+		set_raw_read_extent(&et->largest, &ri->i_ext);
 		read_unlock(&et->lock);
 	} else {
 		memset(&ri->i_ext, 0, sizeof(ri->i_ext));
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index b9ee5a1176a07..84b147966080e 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -85,7 +85,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 						sizeof(struct ino_entry);
 		mem_size >>= PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
-	} else if (type == EXTENT_CACHE) {
+	} else if (type == READ_EXTENT_CACHE) {
 		mem_size = (atomic_read(&sbi->total_ext_tree) *
 				sizeof(struct extent_tree) +
 				atomic_read(&sbi->total_ext_node) *
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 3c09cae058b0a..0aa48704c77a0 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -146,7 +146,7 @@ enum mem_type {
 	NAT_ENTRIES,	/* indicates the cached nat entry */
 	DIRTY_DENTS,	/* indicates dirty dentry pages */
 	INO_ENTRIES,	/* indicates inode entries */
-	EXTENT_CACHE,	/* indicates extent cache */
+	READ_EXTENT_CACHE,	/* indicates read extent cache */
 	DISCARD_CACHE,	/* indicates memory of cached discard cmds */
 	COMPRESS_PAGE,	/* indicates memory of cached compressed pages */
 	BASE_CHECK,	/* check kernel status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 9486ca49ecb15..51de358bc4520 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -449,8 +449,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 		return;
 
 	/* try to shrink extent cache when there is no enough memory */
-	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
-		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
+	if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
+		f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER);
 
 	/* check the # of cached NAT entries */
 	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 79bf1faf41616..412c2e7352c04 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -814,10 +814,10 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 			set_opt(sbi, FASTBOOT);
 			break;
 		case Opt_extent_cache:
-			set_opt(sbi, EXTENT_CACHE);
+			set_opt(sbi, READ_EXTENT_CACHE);
 			break;
 		case Opt_noextent_cache:
-			clear_opt(sbi, EXTENT_CACHE);
+			clear_opt(sbi, READ_EXTENT_CACHE);
 			break;
 		case Opt_noinline_data:
 			clear_opt(sbi, INLINE_DATA);
@@ -1954,7 +1954,7 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",barrier");
 	if (test_opt(sbi, FASTBOOT))
 		seq_puts(seq, ",fastboot");
-	if (test_opt(sbi, EXTENT_CACHE))
+	if (test_opt(sbi, READ_EXTENT_CACHE))
 		seq_puts(seq, ",extent_cache");
 	else
 		seq_puts(seq, ",noextent_cache");
@@ -2076,7 +2076,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	set_opt(sbi, INLINE_XATTR);
 	set_opt(sbi, INLINE_DATA);
 	set_opt(sbi, INLINE_DENTRY);
-	set_opt(sbi, EXTENT_CACHE);
+	set_opt(sbi, READ_EXTENT_CACHE);
 	set_opt(sbi, NOHEAP);
 	clear_opt(sbi, DISABLE_CHECKPOINT);
 	set_opt(sbi, MERGE_CHECKPOINT);
@@ -2218,7 +2218,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_ckpt = false, need_stop_ckpt = false;
 	bool need_restart_flush = false, need_stop_flush = false;
 	bool need_restart_discard = false, need_stop_discard = false;
-	bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
+	bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
 	bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
 	bool no_io_align = !F2FS_IO_ALIGNED(sbi);
 	bool no_atgc = !test_opt(sbi, ATGC);
@@ -2308,7 +2308,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	}
 
 	/* disallow enable/disable extent_cache dynamically */
-	if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
+	if (no_read_extent_cache == !!test_opt(sbi, READ_EXTENT_CACHE)) {
 		err = -EINVAL;
 		f2fs_warn(sbi, "switch extent_cache option is not allowed");
 		goto restore_opts;
-- 
GitLab


From 3bac20a8f011b8ed4012b43f4f33010432b3c647 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 30 Nov 2022 09:44:58 -0800
Subject: [PATCH 3778/4122] f2fs: move internal functions into extent_cache.c

No functional change.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 88 +++++++++++++++++++++++++++++++++++++-----
 fs/f2fs/f2fs.h         | 69 +--------------------------------
 2 files changed, 81 insertions(+), 76 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 8cd87aee0292b..2a8e31e6d518f 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -15,6 +15,77 @@
 #include "node.h"
 #include <trace/events/f2fs.h>
 
+static void __set_extent_info(struct extent_info *ei,
+				unsigned int fofs, unsigned int len,
+				block_t blk, bool keep_clen)
+{
+	ei->fofs = fofs;
+	ei->blk = blk;
+	ei->len = len;
+
+	if (keep_clen)
+		return;
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+	ei->c_len = 0;
+#endif
+}
+
+static bool f2fs_may_extent_tree(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	/*
+	 * for recovered files during mount do not create extents
+	 * if shrinker is not registered.
+	 */
+	if (list_empty(&sbi->s_list))
+		return false;
+
+	if (!test_opt(sbi, READ_EXTENT_CACHE) ||
+			is_inode_flag_set(inode, FI_NO_EXTENT) ||
+			(is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+			 !f2fs_sb_has_readonly(sbi)))
+		return false;
+
+	return S_ISREG(inode->i_mode);
+}
+
+static void __try_update_largest_extent(struct extent_tree *et,
+						struct extent_node *en)
+{
+	if (en->ei.len <= et->largest.len)
+		return;
+
+	et->largest = en->ei;
+	et->largest_updated = true;
+}
+
+static bool __is_extent_mergeable(struct extent_info *back,
+				struct extent_info *front)
+{
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+	if (back->c_len && back->len != back->c_len)
+		return false;
+	if (front->c_len && front->len != front->c_len)
+		return false;
+#endif
+	return (back->fofs + back->len == front->fofs &&
+			back->blk + back->len == front->blk);
+}
+
+static bool __is_back_mergeable(struct extent_info *cur,
+				struct extent_info *back)
+{
+	return __is_extent_mergeable(back, cur);
+}
+
+static bool __is_front_mergeable(struct extent_info *cur,
+				struct extent_info *front)
+{
+	return __is_extent_mergeable(cur, front);
+}
+
 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
 							unsigned int ofs)
 {
@@ -591,16 +662,16 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 
 		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
 			if (parts) {
-				set_extent_info(&ei, end,
-						end - dei.fofs + dei.blk,
-						org_end - end);
+				__set_extent_info(&ei,
+					end, org_end - end,
+					end - dei.fofs + dei.blk, false);
 				en1 = __insert_extent_tree(sbi, et, &ei,
 							NULL, NULL, true);
 				next_en = en1;
 			} else {
-				en->ei.fofs = end;
-				en->ei.blk += end - dei.fofs;
-				en->ei.len -= end - dei.fofs;
+				__set_extent_info(&en->ei,
+					end, en->ei.len - (end - dei.fofs),
+					en->ei.blk + (end - dei.fofs), true);
 				next_en = en;
 			}
 			parts++;
@@ -632,8 +703,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 
 	/* 3. update extent in extent cache */
 	if (blkaddr) {
-
-		set_extent_info(&ei, fofs, blkaddr, len);
+		__set_extent_info(&ei, fofs, len, blkaddr, false);
 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 			__insert_extent_tree(sbi, et, &ei,
 					insert_p, insert_parent, leftmost);
@@ -692,7 +762,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode,
 	if (en)
 		goto unlock_out;
 
-	set_extent_info(&ei, fofs, blkaddr, llen);
+	__set_extent_info(&ei, fofs, llen, blkaddr, true);
 	ei.c_len = c_len;
 
 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 1c39f8145b61b..04fdf010bb771 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -621,7 +621,7 @@ struct rb_entry {
 struct extent_info {
 	unsigned int fofs;		/* start offset in a file */
 	unsigned int len;		/* length of the extent */
-	u32 blk;			/* start block address of the extent */
+	block_t blk;			/* start block address of the extent */
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	unsigned int c_len;		/* physical extent length of compressed blocks */
 #endif
@@ -846,17 +846,6 @@ static inline void set_raw_read_extent(struct extent_info *ext,
 	i_ext->len = cpu_to_le32(ext->len);
 }
 
-static inline void set_extent_info(struct extent_info *ei, unsigned int fofs,
-						u32 blk, unsigned int len)
-{
-	ei->fofs = fofs;
-	ei->blk = blk;
-	ei->len = len;
-#ifdef CONFIG_F2FS_FS_COMPRESSION
-	ei->c_len = 0;
-#endif
-}
-
 static inline bool __is_discard_mergeable(struct discard_info *back,
 			struct discard_info *front, unsigned int max_len)
 {
@@ -876,41 +865,6 @@ static inline bool __is_discard_front_mergeable(struct discard_info *cur,
 	return __is_discard_mergeable(cur, front, max_len);
 }
 
-static inline bool __is_extent_mergeable(struct extent_info *back,
-						struct extent_info *front)
-{
-#ifdef CONFIG_F2FS_FS_COMPRESSION
-	if (back->c_len && back->len != back->c_len)
-		return false;
-	if (front->c_len && front->len != front->c_len)
-		return false;
-#endif
-	return (back->fofs + back->len == front->fofs &&
-			back->blk + back->len == front->blk);
-}
-
-static inline bool __is_back_mergeable(struct extent_info *cur,
-						struct extent_info *back)
-{
-	return __is_extent_mergeable(back, cur);
-}
-
-static inline bool __is_front_mergeable(struct extent_info *cur,
-						struct extent_info *front)
-{
-	return __is_extent_mergeable(cur, front);
-}
-
-extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
-static inline void __try_update_largest_extent(struct extent_tree *et,
-						struct extent_node *en)
-{
-	if (en->ei.len > et->largest.len) {
-		et->largest = en->ei;
-		et->largest_updated = true;
-	}
-}
-
 /*
  * For free nid management
  */
@@ -2581,6 +2535,7 @@ static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi)
 	return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum);
 }
 
+extern void f2fs_mark_inode_dirty_sync(struct inode *inode, bool sync);
 static inline int inc_valid_node_count(struct f2fs_sb_info *sbi,
 					struct inode *inode, bool is_inode)
 {
@@ -4403,26 +4358,6 @@ F2FS_FEATURE_FUNCS(casefold, CASEFOLD);
 F2FS_FEATURE_FUNCS(compression, COMPRESSION);
 F2FS_FEATURE_FUNCS(readonly, RO);
 
-static inline bool f2fs_may_extent_tree(struct inode *inode)
-{
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-
-	if (!test_opt(sbi, READ_EXTENT_CACHE) ||
-			is_inode_flag_set(inode, FI_NO_EXTENT) ||
-			(is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
-			 !f2fs_sb_has_readonly(sbi)))
-		return false;
-
-	/*
-	 * for recovered files during mount do not create extents
-	 * if shrinker is not registered.
-	 */
-	if (list_empty(&sbi->s_list))
-		return false;
-
-	return S_ISREG(inode->i_mode);
-}
-
 #ifdef CONFIG_BLK_DEV_ZONED
 static inline bool f2fs_blkz_is_seq(struct f2fs_sb_info *sbi, int devi,
 				    block_t blkaddr)
-- 
GitLab


From 749d543c0d451fff31e8f7a3e0a031ffcbf1ebb1 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 30 Nov 2022 10:01:18 -0800
Subject: [PATCH 3779/4122] f2fs: remove unnecessary __init_extent_tree

Added into the caller.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 21 +++++----------------
 1 file changed, 5 insertions(+), 16 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 2a8e31e6d518f..c6810347e2054 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -386,21 +386,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode)
 	return et;
 }
 
-static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
-				struct extent_tree *et, struct extent_info *ei)
-{
-	struct rb_node **p = &et->root.rb_root.rb_node;
-	struct extent_node *en;
-
-	en = __attach_extent_node(sbi, et, ei, NULL, p, true);
-	if (!en)
-		return NULL;
-
-	et->largest = en->ei;
-	et->cached_en = en;
-	return en;
-}
-
 static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
 					struct extent_tree *et)
 {
@@ -460,8 +445,12 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 	if (atomic_read(&et->node_cnt))
 		goto out;
 
-	en = __init_extent_tree(sbi, et, &ei);
+	en = __attach_extent_node(sbi, et, &ei, NULL,
+				&et->root.rb_root.rb_node, true);
 	if (en) {
+		et->largest = en->ei;
+		et->cached_en = en;
+
 		spin_lock(&sbi->extent_lock);
 		list_add_tail(&en->list, &sbi->extent_list);
 		spin_unlock(&sbi->extent_lock);
-- 
GitLab


From e7547daccd6a37522f0af74ec4b5a3036f3dd328 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Wed, 30 Nov 2022 09:26:29 -0800
Subject: [PATCH 3780/4122] f2fs: refactor extent_cache to support for read and
 more

This patch prepares extent_cache to be ready for addition.

Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c              |  20 +-
 fs/f2fs/debug.c             |  65 +++--
 fs/f2fs/extent_cache.c      | 463 +++++++++++++++++++++---------------
 fs/f2fs/f2fs.h              | 119 +++++----
 fs/f2fs/file.c              |   8 +-
 fs/f2fs/gc.c                |   4 +-
 fs/f2fs/inode.c             |   6 +-
 fs/f2fs/node.c              |   8 +-
 fs/f2fs/segment.c           |   3 +-
 fs/f2fs/shrinker.c          |  19 +-
 include/trace/events/f2fs.h |  62 +++--
 11 files changed, 470 insertions(+), 307 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 35c19248b1e2d..75abd450730b3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1126,7 +1126,7 @@ void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
 {
 	dn->data_blkaddr = blkaddr;
 	f2fs_set_data_blkaddr(dn);
-	f2fs_update_extent_cache(dn);
+	f2fs_update_read_extent_cache(dn);
 }
 
 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
@@ -1195,7 +1195,7 @@ int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
 	struct extent_info ei = {0, };
 	struct inode *inode = dn->inode;
 
-	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+	if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
 		dn->data_blkaddr = ei.blk + index - ei.fofs;
 		return 0;
 	}
@@ -1217,7 +1217,7 @@ struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
 	if (!page)
 		return ERR_PTR(-ENOMEM);
 
-	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+	if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
 		dn.data_blkaddr = ei.blk + index - ei.fofs;
 		if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
 						DATA_GENERIC_ENHANCE_READ)) {
@@ -1485,7 +1485,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	pgofs =	(pgoff_t)map->m_lblk;
 	end = pgofs + maxblocks;
 
-	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+	if (!create && f2fs_lookup_read_extent_cache(inode, pgofs, &ei)) {
 		if (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO &&
 							map->m_may_create)
 			goto next_dnode;
@@ -1695,7 +1695,7 @@ skip:
 		if (map->m_flags & F2FS_MAP_MAPPED) {
 			unsigned int ofs = start_pgofs - map->m_lblk;
 
-			f2fs_update_extent_cache_range(&dn,
+			f2fs_update_read_extent_cache_range(&dn,
 				start_pgofs, map->m_pblk + ofs,
 				map->m_len - ofs);
 		}
@@ -1740,7 +1740,7 @@ sync_out:
 		if (map->m_flags & F2FS_MAP_MAPPED) {
 			unsigned int ofs = start_pgofs - map->m_lblk;
 
-			f2fs_update_extent_cache_range(&dn,
+			f2fs_update_read_extent_cache_range(&dn,
 				start_pgofs, map->m_pblk + ofs,
 				map->m_len - ofs);
 		}
@@ -2201,7 +2201,7 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 	if (f2fs_cluster_is_empty(cc))
 		goto out;
 
-	if (f2fs_lookup_extent_cache(inode, start_idx, &ei))
+	if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
 		from_dnode = false;
 
 	if (!from_dnode)
@@ -2635,7 +2635,7 @@ int f2fs_do_write_data_page(struct f2fs_io_info *fio)
 		set_new_dnode(&dn, inode, NULL, NULL, 0);
 
 	if (need_inplace_update(fio) &&
-			f2fs_lookup_extent_cache(inode, page->index, &ei)) {
+	    f2fs_lookup_read_extent_cache(inode, page->index, &ei)) {
 		fio->old_blkaddr = ei.blk + page->index - ei.fofs;
 
 		if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
@@ -3359,7 +3359,7 @@ restart:
 	} else if (locked) {
 		err = f2fs_get_block(&dn, index);
 	} else {
-		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+		if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
 			dn.data_blkaddr = ei.blk + index - ei.fofs;
 		} else {
 			/* hole case */
@@ -3400,7 +3400,7 @@ static int __find_data_block(struct inode *inode, pgoff_t index,
 
 	set_new_dnode(&dn, inode, ipage, ipage, 0);
 
-	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+	if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
 		dn.data_blkaddr = ei.blk + index - ei.fofs;
 	} else {
 		/* hole case */
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a216dcdf69418..a9baa121d829f 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -72,15 +72,23 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->main_area_zones = si->main_area_sections /
 				le32_to_cpu(raw_super->secs_per_zone);
 
-	/* validation check of the segment numbers */
+	/* general extent cache stats */
+	for (i = 0; i < NR_EXTENT_CACHES; i++) {
+		struct extent_tree_info *eti = &sbi->extent_tree[i];
+
+		si->hit_cached[i] = atomic64_read(&sbi->read_hit_cached[i]);
+		si->hit_rbtree[i] = atomic64_read(&sbi->read_hit_rbtree[i]);
+		si->total_ext[i] = atomic64_read(&sbi->total_hit_ext[i]);
+		si->hit_total[i] = si->hit_cached[i] + si->hit_rbtree[i];
+		si->ext_tree[i] = atomic_read(&eti->total_ext_tree);
+		si->zombie_tree[i] = atomic_read(&eti->total_zombie_tree);
+		si->ext_node[i] = atomic_read(&eti->total_ext_node);
+	}
+	/* read extent_cache only */
 	si->hit_largest = atomic64_read(&sbi->read_hit_largest);
-	si->hit_cached = atomic64_read(&sbi->read_hit_cached);
-	si->hit_rbtree = atomic64_read(&sbi->read_hit_rbtree);
-	si->hit_total = si->hit_largest + si->hit_cached + si->hit_rbtree;
-	si->total_ext = atomic64_read(&sbi->total_hit_ext);
-	si->ext_tree = atomic_read(&sbi->total_ext_tree);
-	si->zombie_tree = atomic_read(&sbi->total_zombie_tree);
-	si->ext_node = atomic_read(&sbi->total_ext_node);
+	si->hit_total[EX_READ] += si->hit_largest;
+
+	/* validation check of the segment numbers */
 	si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
 	si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
 	si->ndirty_meta = get_pages(sbi, F2FS_DIRTY_META);
@@ -294,10 +302,16 @@ get_cache:
 				sizeof(struct nat_entry_set);
 	for (i = 0; i < MAX_INO_ENTRY; i++)
 		si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
-	si->cache_mem += atomic_read(&sbi->total_ext_tree) *
+
+	for (i = 0; i < NR_EXTENT_CACHES; i++) {
+		struct extent_tree_info *eti = &sbi->extent_tree[i];
+
+		si->ext_mem[i] = atomic_read(&eti->total_ext_tree) *
 						sizeof(struct extent_tree);
-	si->cache_mem += atomic_read(&sbi->total_ext_node) *
+		si->ext_mem[i] += atomic_read(&eti->total_ext_node) *
 						sizeof(struct extent_node);
+		si->cache_mem += si->ext_mem[i];
+	}
 
 	si->page_mem = 0;
 	if (sbi->node_inode) {
@@ -490,16 +504,18 @@ static int stat_show(struct seq_file *s, void *v)
 				si->bg_node_blks);
 		seq_printf(s, "BG skip : IO: %u, Other: %u\n",
 				si->io_skip_bggc, si->other_skip_bggc);
-		seq_puts(s, "\nExtent Cache:\n");
+		seq_puts(s, "\nExtent Cache (Read):\n");
 		seq_printf(s, "  - Hit Count: L1-1:%llu L1-2:%llu L2:%llu\n",
-				si->hit_largest, si->hit_cached,
-				si->hit_rbtree);
+				si->hit_largest, si->hit_cached[EX_READ],
+				si->hit_rbtree[EX_READ]);
 		seq_printf(s, "  - Hit Ratio: %llu%% (%llu / %llu)\n",
-				!si->total_ext ? 0 :
-				div64_u64(si->hit_total * 100, si->total_ext),
-				si->hit_total, si->total_ext);
+				!si->total_ext[EX_READ] ? 0 :
+				div64_u64(si->hit_total[EX_READ] * 100,
+				si->total_ext[EX_READ]),
+				si->hit_total[EX_READ], si->total_ext[EX_READ]);
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
-				si->ext_tree, si->zombie_tree, si->ext_node);
+				si->ext_tree[EX_READ], si->zombie_tree[EX_READ],
+				si->ext_node[EX_READ]);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
 		seq_printf(s, "  - DIO (R: %4d, W: %4d)\n",
 			   si->nr_dio_read, si->nr_dio_write);
@@ -566,8 +582,10 @@ static int stat_show(struct seq_file *s, void *v)
 			(si->base_mem + si->cache_mem + si->page_mem) >> 10);
 		seq_printf(s, "  - static: %llu KB\n",
 				si->base_mem >> 10);
-		seq_printf(s, "  - cached: %llu KB\n",
+		seq_printf(s, "  - cached all: %llu KB\n",
 				si->cache_mem >> 10);
+		seq_printf(s, "  - read extent cache: %llu KB\n",
+				si->ext_mem[EX_READ] >> 10);
 		seq_printf(s, "  - paged : %llu KB\n",
 				si->page_mem >> 10);
 	}
@@ -600,10 +618,15 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
 	si->sbi = sbi;
 	sbi->stat_info = si;
 
-	atomic64_set(&sbi->total_hit_ext, 0);
-	atomic64_set(&sbi->read_hit_rbtree, 0);
+	/* general extent cache stats */
+	for (i = 0; i < NR_EXTENT_CACHES; i++) {
+		atomic64_set(&sbi->total_hit_ext[i], 0);
+		atomic64_set(&sbi->read_hit_rbtree[i], 0);
+		atomic64_set(&sbi->read_hit_cached[i], 0);
+	}
+
+	/* read extent_cache only */
 	atomic64_set(&sbi->read_hit_largest, 0);
-	atomic64_set(&sbi->read_hit_cached, 0);
 
 	atomic_set(&sbi->inline_xattr, 0);
 	atomic_set(&sbi->inline_inode, 0);
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index c6810347e2054..654a14ab8977c 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -17,21 +17,37 @@
 
 static void __set_extent_info(struct extent_info *ei,
 				unsigned int fofs, unsigned int len,
-				block_t blk, bool keep_clen)
+				block_t blk, bool keep_clen,
+				enum extent_type type)
 {
 	ei->fofs = fofs;
-	ei->blk = blk;
 	ei->len = len;
 
-	if (keep_clen)
-		return;
-
+	if (type == EX_READ) {
+		ei->blk = blk;
+		if (keep_clen)
+			return;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-	ei->c_len = 0;
+		ei->c_len = 0;
 #endif
+	}
+}
+
+static bool __may_read_extent_tree(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (!test_opt(sbi, READ_EXTENT_CACHE))
+		return false;
+	if (is_inode_flag_set(inode, FI_NO_EXTENT))
+		return false;
+	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
+			 !f2fs_sb_has_readonly(sbi))
+		return false;
+	return S_ISREG(inode->i_mode);
 }
 
-static bool f2fs_may_extent_tree(struct inode *inode)
+static bool __may_extent_tree(struct inode *inode, enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 
@@ -42,18 +58,16 @@ static bool f2fs_may_extent_tree(struct inode *inode)
 	if (list_empty(&sbi->s_list))
 		return false;
 
-	if (!test_opt(sbi, READ_EXTENT_CACHE) ||
-			is_inode_flag_set(inode, FI_NO_EXTENT) ||
-			(is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
-			 !f2fs_sb_has_readonly(sbi)))
-		return false;
-
-	return S_ISREG(inode->i_mode);
+	if (type == EX_READ)
+		return __may_read_extent_tree(inode);
+	return false;
 }
 
 static void __try_update_largest_extent(struct extent_tree *et,
 						struct extent_node *en)
 {
+	if (et->type != EX_READ)
+		return;
 	if (en->ei.len <= et->largest.len)
 		return;
 
@@ -62,28 +76,31 @@ static void __try_update_largest_extent(struct extent_tree *et,
 }
 
 static bool __is_extent_mergeable(struct extent_info *back,
-				struct extent_info *front)
+		struct extent_info *front, enum extent_type type)
 {
+	if (type == EX_READ) {
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-	if (back->c_len && back->len != back->c_len)
-		return false;
-	if (front->c_len && front->len != front->c_len)
-		return false;
+		if (back->c_len && back->len != back->c_len)
+			return false;
+		if (front->c_len && front->len != front->c_len)
+			return false;
 #endif
-	return (back->fofs + back->len == front->fofs &&
-			back->blk + back->len == front->blk);
+		return (back->fofs + back->len == front->fofs &&
+				back->blk + back->len == front->blk);
+	}
+	return false;
 }
 
 static bool __is_back_mergeable(struct extent_info *cur,
-				struct extent_info *back)
+		struct extent_info *back, enum extent_type type)
 {
-	return __is_extent_mergeable(back, cur);
+	return __is_extent_mergeable(back, cur, type);
 }
 
 static bool __is_front_mergeable(struct extent_info *cur,
-				struct extent_info *front)
+		struct extent_info *front, enum extent_type type)
 {
-	return __is_extent_mergeable(cur, front);
+	return __is_extent_mergeable(cur, front, type);
 }
 
 static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re,
@@ -308,6 +325,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 				struct rb_node *parent, struct rb_node **p,
 				bool leftmost)
 {
+	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 	struct extent_node *en;
 
 	en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
@@ -321,16 +339,18 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
 	rb_link_node(&en->rb_node, parent, p);
 	rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
 	atomic_inc(&et->node_cnt);
-	atomic_inc(&sbi->total_ext_node);
+	atomic_inc(&eti->total_ext_node);
 	return en;
 }
 
 static void __detach_extent_node(struct f2fs_sb_info *sbi,
 				struct extent_tree *et, struct extent_node *en)
 {
+	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
+
 	rb_erase_cached(&en->rb_node, &et->root);
 	atomic_dec(&et->node_cnt);
-	atomic_dec(&sbi->total_ext_node);
+	atomic_dec(&eti->total_ext_node);
 
 	if (et->cached_en == en)
 		et->cached_en = NULL;
@@ -346,42 +366,47 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
 static void __release_extent_node(struct f2fs_sb_info *sbi,
 			struct extent_tree *et, struct extent_node *en)
 {
-	spin_lock(&sbi->extent_lock);
+	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
+
+	spin_lock(&eti->extent_lock);
 	f2fs_bug_on(sbi, list_empty(&en->list));
 	list_del_init(&en->list);
-	spin_unlock(&sbi->extent_lock);
+	spin_unlock(&eti->extent_lock);
 
 	__detach_extent_node(sbi, et, en);
 }
 
-static struct extent_tree *__grab_extent_tree(struct inode *inode)
+static struct extent_tree *__grab_extent_tree(struct inode *inode,
+						enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
 	struct extent_tree *et;
 	nid_t ino = inode->i_ino;
 
-	mutex_lock(&sbi->extent_tree_lock);
-	et = radix_tree_lookup(&sbi->extent_tree_root, ino);
+	mutex_lock(&eti->extent_tree_lock);
+	et = radix_tree_lookup(&eti->extent_tree_root, ino);
 	if (!et) {
 		et = f2fs_kmem_cache_alloc(extent_tree_slab,
 					GFP_NOFS, true, NULL);
-		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et);
+		f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
 		memset(et, 0, sizeof(struct extent_tree));
 		et->ino = ino;
+		et->type = type;
 		et->root = RB_ROOT_CACHED;
 		et->cached_en = NULL;
 		rwlock_init(&et->lock);
 		INIT_LIST_HEAD(&et->list);
 		atomic_set(&et->node_cnt, 0);
-		atomic_inc(&sbi->total_ext_tree);
+		atomic_inc(&eti->total_ext_tree);
 	} else {
-		atomic_dec(&sbi->total_zombie_tree);
+		atomic_dec(&eti->total_zombie_tree);
 		list_del_init(&et->list);
 	}
-	mutex_unlock(&sbi->extent_tree_lock);
+	mutex_unlock(&eti->extent_tree_lock);
 
 	/* never died until evict_inode */
-	F2FS_I(inode)->extent_tree = et;
+	F2FS_I(inode)->extent_tree[type] = et;
 
 	return et;
 }
@@ -415,35 +440,38 @@ static void __drop_largest_extent(struct extent_tree *et,
 }
 
 /* return true, if inode page is changed */
-static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
+static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage,
+							enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
 	struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
 	struct extent_tree *et;
 	struct extent_node *en;
 	struct extent_info ei;
 
-	if (!f2fs_may_extent_tree(inode)) {
-		/* drop largest extent */
-		if (i_ext && i_ext->len) {
+	if (!__may_extent_tree(inode, type)) {
+		/* drop largest read extent */
+		if (type == EX_READ && i_ext && i_ext->len) {
 			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 			i_ext->len = 0;
 			set_page_dirty(ipage);
-			return;
 		}
-		return;
+		goto out;
 	}
 
-	et = __grab_extent_tree(inode);
+	et = __grab_extent_tree(inode, type);
 
 	if (!i_ext || !i_ext->len)
-		return;
+		goto out;
+
+	BUG_ON(type != EX_READ);
 
 	get_read_extent_info(&ei, i_ext);
 
 	write_lock(&et->lock);
 	if (atomic_read(&et->node_cnt))
-		goto out;
+		goto unlock_out;
 
 	en = __attach_extent_node(sbi, et, &ei, NULL,
 				&et->root.rb_root.rb_node, true);
@@ -451,37 +479,40 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 		et->largest = en->ei;
 		et->cached_en = en;
 
-		spin_lock(&sbi->extent_lock);
-		list_add_tail(&en->list, &sbi->extent_list);
-		spin_unlock(&sbi->extent_lock);
+		spin_lock(&eti->extent_lock);
+		list_add_tail(&en->list, &eti->extent_list);
+		spin_unlock(&eti->extent_lock);
 	}
-out:
+unlock_out:
 	write_unlock(&et->lock);
+out:
+	if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ])
+		set_inode_flag(inode, FI_NO_EXTENT);
 }
 
 void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
 {
-	__f2fs_init_extent_tree(inode, ipage);
-
-	if (!F2FS_I(inode)->extent_tree)
-		set_inode_flag(inode, FI_NO_EXTENT);
+	/* initialize read cache */
+	__f2fs_init_extent_tree(inode, ipage, EX_READ);
 }
 
-static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
-							struct extent_info *ei)
+static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
+			struct extent_info *ei, enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	struct extent_node *en;
 	bool ret = false;
 
 	f2fs_bug_on(sbi, !et);
 
-	trace_f2fs_lookup_extent_tree_start(inode, pgofs);
+	trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
 
 	read_lock(&et->lock);
 
-	if (et->largest.fofs <= pgofs &&
+	if (type == EX_READ &&
+			et->largest.fofs <= pgofs &&
 			et->largest.fofs + et->largest.len > pgofs) {
 		*ei = et->largest;
 		ret = true;
@@ -495,23 +526,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
 		goto out;
 
 	if (en == et->cached_en)
-		stat_inc_cached_node_hit(sbi);
+		stat_inc_cached_node_hit(sbi, type);
 	else
-		stat_inc_rbtree_node_hit(sbi);
+		stat_inc_rbtree_node_hit(sbi, type);
 
 	*ei = en->ei;
-	spin_lock(&sbi->extent_lock);
+	spin_lock(&eti->extent_lock);
 	if (!list_empty(&en->list)) {
-		list_move_tail(&en->list, &sbi->extent_list);
+		list_move_tail(&en->list, &eti->extent_list);
 		et->cached_en = en;
 	}
-	spin_unlock(&sbi->extent_lock);
+	spin_unlock(&eti->extent_lock);
 	ret = true;
 out:
-	stat_inc_total_hit(sbi);
+	stat_inc_total_hit(sbi, type);
 	read_unlock(&et->lock);
 
-	trace_f2fs_lookup_extent_tree_end(inode, pgofs, ei);
+	if (type == EX_READ)
+		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
 	return ret;
 }
 
@@ -520,18 +552,20 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 				struct extent_node *prev_ex,
 				struct extent_node *next_ex)
 {
+	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 	struct extent_node *en = NULL;
 
-	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei)) {
+	if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
 		prev_ex->ei.len += ei->len;
 		ei = &prev_ex->ei;
 		en = prev_ex;
 	}
 
-	if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
+	if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
 		next_ex->ei.fofs = ei->fofs;
-		next_ex->ei.blk = ei->blk;
 		next_ex->ei.len += ei->len;
+		if (et->type == EX_READ)
+			next_ex->ei.blk = ei->blk;
 		if (en)
 			__release_extent_node(sbi, et, prev_ex);
 
@@ -543,12 +577,12 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
 
 	__try_update_largest_extent(et, en);
 
-	spin_lock(&sbi->extent_lock);
+	spin_lock(&eti->extent_lock);
 	if (!list_empty(&en->list)) {
-		list_move_tail(&en->list, &sbi->extent_list);
+		list_move_tail(&en->list, &eti->extent_list);
 		et->cached_en = en;
 	}
-	spin_unlock(&sbi->extent_lock);
+	spin_unlock(&eti->extent_lock);
 	return en;
 }
 
@@ -558,6 +592,7 @@ static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
 				struct rb_node *insert_parent,
 				bool leftmost)
 {
+	struct extent_tree_info *eti = &sbi->extent_tree[et->type];
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
 	struct extent_node *en = NULL;
@@ -580,47 +615,50 @@ do_insert:
 	__try_update_largest_extent(et, en);
 
 	/* update in global extent list */
-	spin_lock(&sbi->extent_lock);
-	list_add_tail(&en->list, &sbi->extent_list);
+	spin_lock(&eti->extent_lock);
+	list_add_tail(&en->list, &eti->extent_list);
 	et->cached_en = en;
-	spin_unlock(&sbi->extent_lock);
+	spin_unlock(&eti->extent_lock);
 	return en;
 }
 
-static void f2fs_update_extent_tree_range(struct inode *inode,
-				pgoff_t fofs, block_t blkaddr, unsigned int len)
+static void __update_extent_tree_range(struct inode *inode,
+			struct extent_info *tei, enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	struct extent_node *en = NULL, *en1 = NULL;
 	struct extent_node *prev_en = NULL, *next_en = NULL;
 	struct extent_info ei, dei, prev;
 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
+	unsigned int fofs = tei->fofs, len = tei->len;
 	unsigned int end = fofs + len;
-	unsigned int pos = (unsigned int)fofs;
 	bool updated = false;
 	bool leftmost = false;
 
 	if (!et)
 		return;
 
-	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, len, 0);
-
+	if (type == EX_READ)
+		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
+						tei->blk, 0);
 	write_lock(&et->lock);
 
-	if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
-		write_unlock(&et->lock);
-		return;
-	}
+	if (type == EX_READ) {
+		if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
+			write_unlock(&et->lock);
+			return;
+		}
 
-	prev = et->largest;
-	dei.len = 0;
+		prev = et->largest;
+		dei.len = 0;
 
-	/*
-	 * drop largest extent before lookup, in case it's already
-	 * been shrunk from extent tree
-	 */
-	__drop_largest_extent(et, fofs, len);
+		/*
+		 * drop largest extent before lookup, in case it's already
+		 * been shrunk from extent tree
+		 */
+		__drop_largest_extent(et, fofs, len);
+	}
 
 	/* 1. lookup first extent node in range [fofs, fofs + len - 1] */
 	en = (struct extent_node *)f2fs_lookup_rb_tree_ret(&et->root,
@@ -641,26 +679,30 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 
 		dei = en->ei;
 		org_end = dei.fofs + dei.len;
-		f2fs_bug_on(sbi, pos >= org_end);
+		f2fs_bug_on(sbi, fofs >= org_end);
 
-		if (pos > dei.fofs && pos - dei.fofs >= F2FS_MIN_EXTENT_LEN) {
-			en->ei.len = pos - en->ei.fofs;
+		if (fofs > dei.fofs && (type != EX_READ ||
+				fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
+			en->ei.len = fofs - en->ei.fofs;
 			prev_en = en;
 			parts = 1;
 		}
 
-		if (end < org_end && org_end - end >= F2FS_MIN_EXTENT_LEN) {
+		if (end < org_end && (type != EX_READ ||
+				org_end - end >= F2FS_MIN_EXTENT_LEN)) {
 			if (parts) {
 				__set_extent_info(&ei,
 					end, org_end - end,
-					end - dei.fofs + dei.blk, false);
+					end - dei.fofs + dei.blk, false,
+					type);
 				en1 = __insert_extent_tree(sbi, et, &ei,
 							NULL, NULL, true);
 				next_en = en1;
 			} else {
 				__set_extent_info(&en->ei,
 					end, en->ei.len - (end - dei.fofs),
-					en->ei.blk + (end - dei.fofs), true);
+					en->ei.blk + (end - dei.fofs), true,
+					type);
 				next_en = en;
 			}
 			parts++;
@@ -690,9 +732,11 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 		en = next_en;
 	}
 
-	/* 3. update extent in extent cache */
-	if (blkaddr) {
-		__set_extent_info(&ei, fofs, len, blkaddr, false);
+	/* 3. update extent in read extent cache */
+	BUG_ON(type != EX_READ);
+
+	if (tei->blk) {
+		__set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ);
 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 			__insert_extent_tree(sbi, et, &ei,
 					insert_p, insert_parent, leftmost);
@@ -722,19 +766,20 @@ static void f2fs_update_extent_tree_range(struct inode *inode,
 }
 
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-void f2fs_update_extent_tree_range_compressed(struct inode *inode,
+void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 				pgoff_t fofs, block_t blkaddr, unsigned int llen,
 				unsigned int c_len)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 	struct extent_node *en = NULL;
 	struct extent_node *prev_en = NULL, *next_en = NULL;
 	struct extent_info ei;
 	struct rb_node **insert_p = NULL, *insert_parent = NULL;
 	bool leftmost = false;
 
-	trace_f2fs_update_extent_tree_range(inode, fofs, blkaddr, llen, c_len);
+	trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
+						blkaddr, c_len);
 
 	/* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
 	if (is_inode_flag_set(inode, FI_NO_EXTENT))
@@ -751,7 +796,7 @@ void f2fs_update_extent_tree_range_compressed(struct inode *inode,
 	if (en)
 		goto unlock_out;
 
-	__set_extent_info(&ei, fofs, llen, blkaddr, true);
+	__set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ);
 	ei.c_len = c_len;
 
 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
@@ -762,24 +807,43 @@ unlock_out:
 }
 #endif
 
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 {
+	struct extent_info ei;
+
+	if (!__may_extent_tree(dn->inode, type))
+		return;
+
+	ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+								dn->ofs_in_node;
+	ei.len = 1;
+
+	if (type == EX_READ) {
+		if (dn->data_blkaddr == NEW_ADDR)
+			ei.blk = NULL_ADDR;
+		else
+			ei.blk = dn->data_blkaddr;
+	}
+	__update_extent_tree_range(dn->inode, &ei, type);
+}
+
+static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
+					enum extent_type type)
+{
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
 	struct extent_tree *et, *next;
 	struct extent_node *en;
 	unsigned int node_cnt = 0, tree_cnt = 0;
 	int remained;
 
-	if (!test_opt(sbi, READ_EXTENT_CACHE))
-		return 0;
-
-	if (!atomic_read(&sbi->total_zombie_tree))
+	if (!atomic_read(&eti->total_zombie_tree))
 		goto free_node;
 
-	if (!mutex_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&eti->extent_tree_lock))
 		goto out;
 
 	/* 1. remove unreferenced extent tree */
-	list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
+	list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
 		if (atomic_read(&et->node_cnt)) {
 			write_lock(&et->lock);
 			node_cnt += __free_extent_tree(sbi, et);
@@ -787,61 +851,100 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
 		}
 		f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
 		list_del_init(&et->list);
-		radix_tree_delete(&sbi->extent_tree_root, et->ino);
+		radix_tree_delete(&eti->extent_tree_root, et->ino);
 		kmem_cache_free(extent_tree_slab, et);
-		atomic_dec(&sbi->total_ext_tree);
-		atomic_dec(&sbi->total_zombie_tree);
+		atomic_dec(&eti->total_ext_tree);
+		atomic_dec(&eti->total_zombie_tree);
 		tree_cnt++;
 
 		if (node_cnt + tree_cnt >= nr_shrink)
 			goto unlock_out;
 		cond_resched();
 	}
-	mutex_unlock(&sbi->extent_tree_lock);
+	mutex_unlock(&eti->extent_tree_lock);
 
 free_node:
 	/* 2. remove LRU extent entries */
-	if (!mutex_trylock(&sbi->extent_tree_lock))
+	if (!mutex_trylock(&eti->extent_tree_lock))
 		goto out;
 
 	remained = nr_shrink - (node_cnt + tree_cnt);
 
-	spin_lock(&sbi->extent_lock);
+	spin_lock(&eti->extent_lock);
 	for (; remained > 0; remained--) {
-		if (list_empty(&sbi->extent_list))
+		if (list_empty(&eti->extent_list))
 			break;
-		en = list_first_entry(&sbi->extent_list,
+		en = list_first_entry(&eti->extent_list,
 					struct extent_node, list);
 		et = en->et;
 		if (!write_trylock(&et->lock)) {
 			/* refresh this extent node's position in extent list */
-			list_move_tail(&en->list, &sbi->extent_list);
+			list_move_tail(&en->list, &eti->extent_list);
 			continue;
 		}
 
 		list_del_init(&en->list);
-		spin_unlock(&sbi->extent_lock);
+		spin_unlock(&eti->extent_lock);
 
 		__detach_extent_node(sbi, et, en);
 
 		write_unlock(&et->lock);
 		node_cnt++;
-		spin_lock(&sbi->extent_lock);
+		spin_lock(&eti->extent_lock);
 	}
-	spin_unlock(&sbi->extent_lock);
+	spin_unlock(&eti->extent_lock);
 
 unlock_out:
-	mutex_unlock(&sbi->extent_tree_lock);
+	mutex_unlock(&eti->extent_tree_lock);
 out:
-	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt);
+	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
 
 	return node_cnt + tree_cnt;
 }
 
-unsigned int f2fs_destroy_extent_node(struct inode *inode)
+/* read extent cache operations */
+bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
+				struct extent_info *ei)
+{
+	if (!__may_extent_tree(inode, EX_READ))
+		return false;
+
+	return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
+}
+
+void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
+{
+	return __update_extent_cache(dn, EX_READ);
+}
+
+void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
+				pgoff_t fofs, block_t blkaddr, unsigned int len)
+{
+	struct extent_info ei = {
+		.fofs = fofs,
+		.len = len,
+		.blk = blkaddr,
+	};
+
+	if (!__may_extent_tree(dn->inode, EX_READ))
+		return;
+
+	__update_extent_tree_range(dn->inode, &ei, EX_READ);
+}
+
+unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+	if (!test_opt(sbi, READ_EXTENT_CACHE))
+		return 0;
+
+	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
+}
+
+static unsigned int __destroy_extent_node(struct inode *inode,
+					enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	unsigned int node_cnt = 0;
 
 	if (!et || !atomic_read(&et->node_cnt))
@@ -854,31 +957,44 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
 	return node_cnt;
 }
 
-void f2fs_drop_extent_tree(struct inode *inode)
+void f2fs_destroy_extent_node(struct inode *inode)
+{
+	__destroy_extent_node(inode, EX_READ);
+}
+
+static void __drop_extent_tree(struct inode *inode, enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	bool updated = false;
 
-	if (!f2fs_may_extent_tree(inode))
+	if (!__may_extent_tree(inode, type))
 		return;
 
 	write_lock(&et->lock);
-	set_inode_flag(inode, FI_NO_EXTENT);
 	__free_extent_tree(sbi, et);
-	if (et->largest.len) {
-		et->largest.len = 0;
-		updated = true;
+	if (type == EX_READ) {
+		set_inode_flag(inode, FI_NO_EXTENT);
+		if (et->largest.len) {
+			et->largest.len = 0;
+			updated = true;
+		}
 	}
 	write_unlock(&et->lock);
 	if (updated)
 		f2fs_mark_inode_dirty_sync(inode, true);
 }
 
-void f2fs_destroy_extent_tree(struct inode *inode)
+void f2fs_drop_extent_tree(struct inode *inode)
+{
+	__drop_extent_tree(inode, EX_READ);
+}
+
+static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
 	unsigned int node_cnt = 0;
 
 	if (!et)
@@ -886,76 +1002,49 @@ void f2fs_destroy_extent_tree(struct inode *inode)
 
 	if (inode->i_nlink && !is_bad_inode(inode) &&
 					atomic_read(&et->node_cnt)) {
-		mutex_lock(&sbi->extent_tree_lock);
-		list_add_tail(&et->list, &sbi->zombie_list);
-		atomic_inc(&sbi->total_zombie_tree);
-		mutex_unlock(&sbi->extent_tree_lock);
+		mutex_lock(&eti->extent_tree_lock);
+		list_add_tail(&et->list, &eti->zombie_list);
+		atomic_inc(&eti->total_zombie_tree);
+		mutex_unlock(&eti->extent_tree_lock);
 		return;
 	}
 
 	/* free all extent info belong to this extent tree */
-	node_cnt = f2fs_destroy_extent_node(inode);
+	node_cnt = __destroy_extent_node(inode, type);
 
 	/* delete extent tree entry in radix tree */
-	mutex_lock(&sbi->extent_tree_lock);
+	mutex_lock(&eti->extent_tree_lock);
 	f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
-	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino);
+	radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
 	kmem_cache_free(extent_tree_slab, et);
-	atomic_dec(&sbi->total_ext_tree);
-	mutex_unlock(&sbi->extent_tree_lock);
+	atomic_dec(&eti->total_ext_tree);
+	mutex_unlock(&eti->extent_tree_lock);
 
-	F2FS_I(inode)->extent_tree = NULL;
+	F2FS_I(inode)->extent_tree[type] = NULL;
 
-	trace_f2fs_destroy_extent_tree(inode, node_cnt);
+	trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
 }
 
-bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
-					struct extent_info *ei)
-{
-	if (!f2fs_may_extent_tree(inode))
-		return false;
-
-	return f2fs_lookup_extent_tree(inode, pgofs, ei);
-}
-
-void f2fs_update_extent_cache(struct dnode_of_data *dn)
+void f2fs_destroy_extent_tree(struct inode *inode)
 {
-	pgoff_t fofs;
-	block_t blkaddr;
-
-	if (!f2fs_may_extent_tree(dn->inode))
-		return;
-
-	if (dn->data_blkaddr == NEW_ADDR)
-		blkaddr = NULL_ADDR;
-	else
-		blkaddr = dn->data_blkaddr;
-
-	fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
-								dn->ofs_in_node;
-	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1);
+	__destroy_extent_tree(inode, EX_READ);
 }
 
-void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
-				pgoff_t fofs, block_t blkaddr, unsigned int len)
-
+static void __init_extent_tree_info(struct extent_tree_info *eti)
 {
-	if (!f2fs_may_extent_tree(dn->inode))
-		return;
-
-	f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, len);
+	INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
+	mutex_init(&eti->extent_tree_lock);
+	INIT_LIST_HEAD(&eti->extent_list);
+	spin_lock_init(&eti->extent_lock);
+	atomic_set(&eti->total_ext_tree, 0);
+	INIT_LIST_HEAD(&eti->zombie_list);
+	atomic_set(&eti->total_zombie_tree, 0);
+	atomic_set(&eti->total_ext_node, 0);
 }
 
 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
-	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO);
-	mutex_init(&sbi->extent_tree_lock);
-	INIT_LIST_HEAD(&sbi->extent_list);
-	spin_lock_init(&sbi->extent_lock);
-	atomic_set(&sbi->total_ext_tree, 0);
-	INIT_LIST_HEAD(&sbi->zombie_list);
-	atomic_set(&sbi->total_zombie_tree, 0);
-	atomic_set(&sbi->total_ext_node, 0);
+	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
 }
 
 int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 04fdf010bb771..7c68bedee6492 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -596,16 +596,22 @@ enum {
 /* dirty segments threshold for triggering CP */
 #define DEFAULT_DIRTY_THRESHOLD		4
 
+#define RECOVERY_MAX_RA_BLOCKS		BIO_MAX_VECS
+#define RECOVERY_MIN_RA_BLOCKS		1
+
+#define F2FS_ONSTACK_PAGES	16	/* nr of onstack pages */
+
 /* for in-memory extent cache entry */
 #define F2FS_MIN_EXTENT_LEN	64	/* minimum extent length */
 
 /* number of extent info in extent cache we try to shrink */
 #define READ_EXTENT_CACHE_SHRINK_NUMBER	128
 
-#define RECOVERY_MAX_RA_BLOCKS		BIO_MAX_VECS
-#define RECOVERY_MIN_RA_BLOCKS		1
-
-#define F2FS_ONSTACK_PAGES	16	/* nr of onstack pages */
+/* extent cache type */
+enum extent_type {
+	EX_READ,
+	NR_EXTENT_CACHES,
+};
 
 struct rb_entry {
 	struct rb_node rb_node;		/* rb node located in rb-tree */
@@ -621,10 +627,17 @@ struct rb_entry {
 struct extent_info {
 	unsigned int fofs;		/* start offset in a file */
 	unsigned int len;		/* length of the extent */
-	block_t blk;			/* start block address of the extent */
+	union {
+		/* read extent_cache */
+		struct {
+			/* start block address of the extent */
+			block_t blk;
 #ifdef CONFIG_F2FS_FS_COMPRESSION
-	unsigned int c_len;		/* physical extent length of compressed blocks */
+			/* physical extent length of compressed blocks */
+			unsigned int c_len;
 #endif
+		};
+	};
 };
 
 struct extent_node {
@@ -636,13 +649,25 @@ struct extent_node {
 
 struct extent_tree {
 	nid_t ino;			/* inode number */
+	enum extent_type type;		/* keep the extent tree type */
 	struct rb_root_cached root;	/* root of extent info rb-tree */
 	struct extent_node *cached_en;	/* recently accessed extent node */
-	struct extent_info largest;	/* largested extent info */
 	struct list_head list;		/* to be used by sbi->zombie_list */
 	rwlock_t lock;			/* protect extent info rb-tree */
 	atomic_t node_cnt;		/* # of extent node in rb-tree*/
 	bool largest_updated;		/* largest extent updated */
+	struct extent_info largest;	/* largest cached extent for EX_READ */
+};
+
+struct extent_tree_info {
+	struct radix_tree_root extent_tree_root;/* cache extent cache entries */
+	struct mutex extent_tree_lock;	/* locking extent radix tree */
+	struct list_head extent_list;		/* lru list for shrinker */
+	spinlock_t extent_lock;			/* locking extent lru list */
+	atomic_t total_ext_tree;		/* extent tree count */
+	struct list_head zombie_list;		/* extent zombie tree list */
+	atomic_t total_zombie_tree;		/* extent zombie tree count */
+	atomic_t total_ext_node;		/* extent info count */
 };
 
 /*
@@ -805,7 +830,8 @@ struct f2fs_inode_info {
 	struct list_head dirty_list;	/* dirty list for dirs and files */
 	struct list_head gdirty_list;	/* linked in global dirty list */
 	struct task_struct *atomic_write_task;	/* store atomic write task */
-	struct extent_tree *extent_tree;	/* cached extent_tree entry */
+	struct extent_tree *extent_tree[NR_EXTENT_CACHES];
+					/* cached extent_tree entry */
 	struct inode *cow_inode;	/* copy-on-write inode for atomic write */
 
 	/* avoid racing between foreground op and gc */
@@ -1626,14 +1652,7 @@ struct f2fs_sb_info {
 	struct mutex flush_lock;		/* for flush exclusion */
 
 	/* for extent tree cache */
-	struct radix_tree_root extent_tree_root;/* cache extent cache entries */
-	struct mutex extent_tree_lock;	/* locking extent radix tree */
-	struct list_head extent_list;		/* lru list for shrinker */
-	spinlock_t extent_lock;			/* locking extent lru list */
-	atomic_t total_ext_tree;		/* extent tree count */
-	struct list_head zombie_list;		/* extent zombie tree list */
-	atomic_t total_zombie_tree;		/* extent zombie tree count */
-	atomic_t total_ext_node;		/* extent info count */
+	struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
 
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
@@ -1718,10 +1737,14 @@ struct f2fs_sb_info {
 	unsigned int segment_count[2];		/* # of allocated segments */
 	unsigned int block_count[2];		/* # of allocated blocks */
 	atomic_t inplace_count;		/* # of inplace update */
-	atomic64_t total_hit_ext;		/* # of lookup extent cache */
-	atomic64_t read_hit_rbtree;		/* # of hit rbtree extent node */
-	atomic64_t read_hit_largest;		/* # of hit largest extent node */
-	atomic64_t read_hit_cached;		/* # of hit cached extent node */
+	/* # of lookup extent cache */
+	atomic64_t total_hit_ext[NR_EXTENT_CACHES];
+	/* # of hit rbtree extent node */
+	atomic64_t read_hit_rbtree[NR_EXTENT_CACHES];
+	/* # of hit cached extent node */
+	atomic64_t read_hit_cached[NR_EXTENT_CACHES];
+	/* # of hit largest extent node in read extent cache */
+	atomic64_t read_hit_largest;
 	atomic_t inline_xattr;			/* # of inline_xattr inodes */
 	atomic_t inline_inode;			/* # of inline_data inodes */
 	atomic_t inline_dir;			/* # of inline_dentry inodes */
@@ -3823,9 +3846,17 @@ struct f2fs_stat_info {
 	struct f2fs_sb_info *sbi;
 	int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs;
 	int main_area_segs, main_area_sections, main_area_zones;
-	unsigned long long hit_largest, hit_cached, hit_rbtree;
-	unsigned long long hit_total, total_ext;
-	int ext_tree, zombie_tree, ext_node;
+	unsigned long long hit_cached[NR_EXTENT_CACHES];
+	unsigned long long hit_rbtree[NR_EXTENT_CACHES];
+	unsigned long long total_ext[NR_EXTENT_CACHES];
+	unsigned long long hit_total[NR_EXTENT_CACHES];
+	int ext_tree[NR_EXTENT_CACHES];
+	int zombie_tree[NR_EXTENT_CACHES];
+	int ext_node[NR_EXTENT_CACHES];
+	/* to count memory footprint */
+	unsigned long long ext_mem[NR_EXTENT_CACHES];
+	/* for read extent cache */
+	unsigned long long hit_largest;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
 	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
@@ -3884,10 +3915,10 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
 #define stat_other_skip_bggc_count(sbi)	((sbi)->other_skip_bggc++)
 #define stat_inc_dirty_inode(sbi, type)	((sbi)->ndirty_inode[type]++)
 #define stat_dec_dirty_inode(sbi, type)	((sbi)->ndirty_inode[type]--)
-#define stat_inc_total_hit(sbi)		(atomic64_inc(&(sbi)->total_hit_ext))
-#define stat_inc_rbtree_node_hit(sbi)	(atomic64_inc(&(sbi)->read_hit_rbtree))
+#define stat_inc_total_hit(sbi, type)		(atomic64_inc(&(sbi)->total_hit_ext[type]))
+#define stat_inc_rbtree_node_hit(sbi, type)	(atomic64_inc(&(sbi)->read_hit_rbtree[type]))
 #define stat_inc_largest_node_hit(sbi)	(atomic64_inc(&(sbi)->read_hit_largest))
-#define stat_inc_cached_node_hit(sbi)	(atomic64_inc(&(sbi)->read_hit_cached))
+#define stat_inc_cached_node_hit(sbi, type)	(atomic64_inc(&(sbi)->read_hit_cached[type]))
 #define stat_inc_inline_xattr(inode)					\
 	do {								\
 		if (f2fs_has_inline_xattr(inode))			\
@@ -4010,10 +4041,10 @@ void f2fs_update_sit_info(struct f2fs_sb_info *sbi);
 #define stat_other_skip_bggc_count(sbi)			do { } while (0)
 #define stat_inc_dirty_inode(sbi, type)			do { } while (0)
 #define stat_dec_dirty_inode(sbi, type)			do { } while (0)
-#define stat_inc_total_hit(sbi)				do { } while (0)
-#define stat_inc_rbtree_node_hit(sbi)			do { } while (0)
+#define stat_inc_total_hit(sbi, type)			do { } while (0)
+#define stat_inc_rbtree_node_hit(sbi, type)		do { } while (0)
 #define stat_inc_largest_node_hit(sbi)			do { } while (0)
-#define stat_inc_cached_node_hit(sbi)			do { } while (0)
+#define stat_inc_cached_node_hit(sbi, type)		do { } while (0)
 #define stat_inc_inline_xattr(inode)			do { } while (0)
 #define stat_dec_inline_xattr(inode)			do { } while (0)
 #define stat_inc_inline_inode(inode)			do { } while (0)
@@ -4119,20 +4150,23 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 		bool force, bool *leftmost);
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
 				struct rb_root_cached *root, bool check_key);
-unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
 void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
 void f2fs_drop_extent_tree(struct inode *inode);
-unsigned int f2fs_destroy_extent_node(struct inode *inode);
+void f2fs_destroy_extent_node(struct inode *inode);
 void f2fs_destroy_extent_tree(struct inode *inode);
-bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
-			struct extent_info *ei);
-void f2fs_update_extent_cache(struct dnode_of_data *dn);
-void f2fs_update_extent_cache_range(struct dnode_of_data *dn,
-			pgoff_t fofs, block_t blkaddr, unsigned int len);
 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_extent_cache(void);
 void f2fs_destroy_extent_cache(void);
 
+/* read extent cache ops */
+bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
+			struct extent_info *ei);
+void f2fs_update_read_extent_cache(struct dnode_of_data *dn);
+void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
+			pgoff_t fofs, block_t blkaddr, unsigned int len);
+unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi,
+			int nr_shrink);
+
 /*
  * sysfs.c
  */
@@ -4202,9 +4236,9 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
 						struct writeback_control *wbc,
 						enum iostat_type io_type);
 int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index);
-void f2fs_update_extent_tree_range_compressed(struct inode *inode,
-				pgoff_t fofs, block_t blkaddr, unsigned int llen,
-				unsigned int c_len);
+void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
+				pgoff_t fofs, block_t blkaddr,
+				unsigned int llen, unsigned int c_len);
 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 				unsigned nr_pages, sector_t *last_block_in_bio,
 				bool is_readahead, bool for_write);
@@ -4285,9 +4319,10 @@ static inline bool f2fs_load_compressed_page(struct f2fs_sb_info *sbi,
 static inline void f2fs_invalidate_compress_pages(struct f2fs_sb_info *sbi,
 							nid_t ino) { }
 #define inc_compr_inode_stat(inode)		do { } while (0)
-static inline void f2fs_update_extent_tree_range_compressed(struct inode *inode,
-				pgoff_t fofs, block_t blkaddr, unsigned int llen,
-				unsigned int c_len) { }
+static inline void f2fs_update_read_extent_tree_range_compressed(
+				struct inode *inode,
+				pgoff_t fofs, block_t blkaddr,
+				unsigned int llen, unsigned int c_len) { }
 #endif
 
 static inline int set_compress_context(struct inode *inode)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index ab0a0d3730f6e..cbe7c24065c7b 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -618,7 +618,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 		 */
 		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
 							dn->inode) + ofs;
-		f2fs_update_extent_cache_range(dn, fofs, 0, len);
+		f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
 		dec_valid_block_count(sbi, dn->inode, nr_free);
 	}
 	dn->ofs_in_node = ofs;
@@ -1496,7 +1496,7 @@ static int f2fs_do_zero_range(struct dnode_of_data *dn, pgoff_t start,
 		f2fs_set_data_blkaddr(dn);
 	}
 
-	f2fs_update_extent_cache_range(dn, start, 0, index - start);
+	f2fs_update_read_extent_cache_range(dn, start, 0, index - start);
 
 	return ret;
 }
@@ -2558,7 +2558,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 	struct f2fs_map_blocks map = { .m_next_extent = NULL,
 					.m_seg_type = NO_CHECK_TYPE,
 					.m_may_create = false };
-	struct extent_info ei = {0, 0, 0};
+	struct extent_info ei = {0, };
 	pgoff_t pg_start, pg_end, next_pgofs;
 	unsigned int blk_per_seg = sbi->blocks_per_seg;
 	unsigned int total = 0, sec_num;
@@ -2590,7 +2590,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
 	 * lookup mapping info in extent cache, skip defragmenting if physical
 	 * block addresses are continuous.
 	 */
-	if (f2fs_lookup_extent_cache(inode, pg_start, &ei)) {
+	if (f2fs_lookup_read_extent_cache(inode, pg_start, &ei)) {
 		if (ei.fofs + ei.len >= pg_end)
 			goto out;
 	}
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index d19e26b2e875e..f0c6506d89758 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1146,7 +1146,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
 	struct address_space *mapping = inode->i_mapping;
 	struct dnode_of_data dn;
 	struct page *page;
-	struct extent_info ei = {0, 0, 0};
+	struct extent_info ei = {0, };
 	struct f2fs_io_info fio = {
 		.sbi = sbi,
 		.ino = inode->i_ino,
@@ -1164,7 +1164,7 @@ static int ra_data_block(struct inode *inode, pgoff_t index)
 	if (!page)
 		return -ENOMEM;
 
-	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
+	if (f2fs_lookup_read_extent_cache(inode, index, &ei)) {
 		dn.data_blkaddr = ei.blk + index - ei.fofs;
 		if (unlikely(!f2fs_is_valid_blkaddr(sbi, dn.data_blkaddr,
 						DATA_GENERIC_ENHANCE_READ))) {
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2c705c60019b2..086f201f15a02 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -262,8 +262,8 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
 		return false;
 	}
 
-	if (fi->extent_tree) {
-		struct extent_info *ei = &fi->extent_tree->largest;
+	if (fi->extent_tree[EX_READ]) {
+		struct extent_info *ei = &fi->extent_tree[EX_READ]->largest;
 
 		if (ei->len &&
 			(!f2fs_is_valid_blkaddr(sbi, ei->blk,
@@ -607,7 +607,7 @@ retry:
 void f2fs_update_inode(struct inode *inode, struct page *node_page)
 {
 	struct f2fs_inode *ri;
-	struct extent_tree *et = F2FS_I(inode)->extent_tree;
+	struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
 
 	f2fs_wait_on_page_writeback(node_page, NODE, true, true);
 	set_page_dirty(node_page);
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 84b147966080e..07419c3e42a52 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -86,9 +86,11 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 		mem_size >>= PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else if (type == READ_EXTENT_CACHE) {
-		mem_size = (atomic_read(&sbi->total_ext_tree) *
+		struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
+
+		mem_size = (atomic_read(&eti->total_ext_tree) *
 				sizeof(struct extent_tree) +
-				atomic_read(&sbi->total_ext_node) *
+				atomic_read(&eti->total_ext_node) *
 				sizeof(struct extent_node)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
 	} else if (type == DISCARD_CACHE) {
@@ -859,7 +861,7 @@ int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
 			blkaddr = data_blkaddr(dn->inode, dn->node_page,
 						dn->ofs_in_node + 1);
 
-		f2fs_update_extent_tree_range_compressed(dn->inode,
+		f2fs_update_read_extent_tree_range_compressed(dn->inode,
 					index, blkaddr,
 					F2FS_I(dn->inode)->i_cluster_size,
 					c_len);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 51de358bc4520..8722d1a13c170 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -450,7 +450,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 
 	/* try to shrink extent cache when there is no enough memory */
 	if (!f2fs_available_free_memory(sbi, READ_EXTENT_CACHE))
-		f2fs_shrink_extent_tree(sbi, READ_EXTENT_CACHE_SHRINK_NUMBER);
+		f2fs_shrink_read_extent_tree(sbi,
+				READ_EXTENT_CACHE_SHRINK_NUMBER);
 
 	/* check the # of cached NAT entries */
 	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index dd3c3c7a90ec8..33c490e69ae30 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -28,10 +28,13 @@ static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
 	return count > 0 ? count : 0;
 }
 
-static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi)
+static unsigned long __count_extent_cache(struct f2fs_sb_info *sbi,
+					enum extent_type type)
 {
-	return atomic_read(&sbi->total_zombie_tree) +
-				atomic_read(&sbi->total_ext_node);
+	struct extent_tree_info *eti = &sbi->extent_tree[type];
+
+	return atomic_read(&eti->total_zombie_tree) +
+				atomic_read(&eti->total_ext_node);
 }
 
 unsigned long f2fs_shrink_count(struct shrinker *shrink,
@@ -53,8 +56,8 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
 		}
 		spin_unlock(&f2fs_list_lock);
 
-		/* count extent cache entries */
-		count += __count_extent_cache(sbi);
+		/* count read extent cache entries */
+		count += __count_extent_cache(sbi, EX_READ);
 
 		/* count clean nat cache entries */
 		count += __count_nat_entries(sbi);
@@ -99,8 +102,8 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 
 		sbi->shrinker_run_no = run_no;
 
-		/* shrink extent cache entries */
-		freed += f2fs_shrink_extent_tree(sbi, nr >> 1);
+		/* shrink read extent cache entries */
+		freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1);
 
 		/* shrink clean nat cache entries */
 		if (freed < nr)
@@ -130,7 +133,7 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
 
 void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
 {
-	f2fs_shrink_extent_tree(sbi, __count_extent_cache(sbi));
+	f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
 
 	spin_lock(&f2fs_list_lock);
 	list_del_init(&sbi->s_list);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 7fbfce498472f..2bb37892d2ba1 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -48,6 +48,7 @@ TRACE_DEFINE_ENUM(CP_DISCARD);
 TRACE_DEFINE_ENUM(CP_TRIMMED);
 TRACE_DEFINE_ENUM(CP_PAUSE);
 TRACE_DEFINE_ENUM(CP_RESIZE);
+TRACE_DEFINE_ENUM(EX_READ);
 
 #define show_block_type(type)						\
 	__print_symbolic(type,						\
@@ -1522,28 +1523,31 @@ TRACE_EVENT(f2fs_issue_flush,
 
 TRACE_EVENT(f2fs_lookup_extent_tree_start,
 
-	TP_PROTO(struct inode *inode, unsigned int pgofs),
+	TP_PROTO(struct inode *inode, unsigned int pgofs, enum extent_type type),
 
-	TP_ARGS(inode, pgofs),
+	TP_ARGS(inode, pgofs, type),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
 		__field(unsigned int, pgofs)
+		__field(enum extent_type, type)
 	),
 
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->pgofs = pgofs;
+		__entry->type = type;
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u",
+	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s",
 		show_dev_ino(__entry),
-		__entry->pgofs)
+		__entry->pgofs,
+		__entry->type == EX_READ ? "Read" : "N/A")
 );
 
-TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
+TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
 
 	TP_PROTO(struct inode *inode, unsigned int pgofs,
 						struct extent_info *ei),
@@ -1557,8 +1561,8 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
 		__field(ino_t,	ino)
 		__field(unsigned int, pgofs)
 		__field(unsigned int, fofs)
-		__field(u32, blk)
 		__field(unsigned int, len)
+		__field(u32, blk)
 	),
 
 	TP_fast_assign(
@@ -1566,26 +1570,26 @@ TRACE_EVENT_CONDITION(f2fs_lookup_extent_tree_end,
 		__entry->ino = inode->i_ino;
 		__entry->pgofs = pgofs;
 		__entry->fofs = ei->fofs;
-		__entry->blk = ei->blk;
 		__entry->len = ei->len;
+		__entry->blk = ei->blk;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
-		"ext_info(fofs: %u, blk: %u, len: %u)",
+		"read_ext_info(fofs: %u, len: %u, blk: %u)",
 		show_dev_ino(__entry),
 		__entry->pgofs,
 		__entry->fofs,
-		__entry->blk,
-		__entry->len)
+		__entry->len,
+		__entry->blk)
 );
 
-TRACE_EVENT(f2fs_update_extent_tree_range,
+TRACE_EVENT(f2fs_update_read_extent_tree_range,
 
-	TP_PROTO(struct inode *inode, unsigned int pgofs, block_t blkaddr,
-						unsigned int len,
+	TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
+						block_t blkaddr,
 						unsigned int c_len),
 
-	TP_ARGS(inode, pgofs, blkaddr, len, c_len),
+	TP_ARGS(inode, pgofs, len, blkaddr, c_len),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
@@ -1600,67 +1604,73 @@ TRACE_EVENT(f2fs_update_extent_tree_range,
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->pgofs = pgofs;
-		__entry->blk = blkaddr;
 		__entry->len = len;
+		__entry->blk = blkaddr;
 		__entry->c_len = c_len;
 	),
 
 	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
-					"blkaddr = %u, len = %u, "
-					"c_len = %u",
+				"len = %u, blkaddr = %u, c_len = %u",
 		show_dev_ino(__entry),
 		__entry->pgofs,
-		__entry->blk,
 		__entry->len,
+		__entry->blk,
 		__entry->c_len)
 );
 
 TRACE_EVENT(f2fs_shrink_extent_tree,
 
 	TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt,
-						unsigned int tree_cnt),
+			unsigned int tree_cnt, enum extent_type type),
 
-	TP_ARGS(sbi, node_cnt, tree_cnt),
+	TP_ARGS(sbi, node_cnt, tree_cnt, type),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(unsigned int, node_cnt)
 		__field(unsigned int, tree_cnt)
+		__field(enum extent_type, type)
 	),
 
 	TP_fast_assign(
 		__entry->dev = sbi->sb->s_dev;
 		__entry->node_cnt = node_cnt;
 		__entry->tree_cnt = tree_cnt;
+		__entry->type = type;
 	),
 
-	TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u",
+	TP_printk("dev = (%d,%d), shrunk: node_cnt = %u, tree_cnt = %u, type = %s",
 		show_dev(__entry->dev),
 		__entry->node_cnt,
-		__entry->tree_cnt)
+		__entry->tree_cnt,
+		__entry->type == EX_READ ? "Read" : "N/A")
 );
 
 TRACE_EVENT(f2fs_destroy_extent_tree,
 
-	TP_PROTO(struct inode *inode, unsigned int node_cnt),
+	TP_PROTO(struct inode *inode, unsigned int node_cnt,
+				enum extent_type type),
 
-	TP_ARGS(inode, node_cnt),
+	TP_ARGS(inode, node_cnt, type),
 
 	TP_STRUCT__entry(
 		__field(dev_t,	dev)
 		__field(ino_t,	ino)
 		__field(unsigned int, node_cnt)
+		__field(enum extent_type, type)
 	),
 
 	TP_fast_assign(
 		__entry->dev = inode->i_sb->s_dev;
 		__entry->ino = inode->i_ino;
 		__entry->node_cnt = node_cnt;
+		__entry->type = type;
 	),
 
-	TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u",
+	TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s",
 		show_dev_ino(__entry),
-		__entry->node_cnt)
+		__entry->node_cnt,
+		__entry->type == EX_READ ? "Read" : "N/A")
 );
 
 DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
-- 
GitLab


From 72840cccc0a1a0a0dc1bb27b669a9111be6d0f6a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 2 Dec 2022 13:51:09 -0800
Subject: [PATCH 3781/4122] f2fs: allocate the extent_cache by default

Let's allocate it to remove the runtime complexity.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/extent_cache.c | 38 +++++++++++++++++++-------------------
 fs/f2fs/f2fs.h         |  3 ++-
 fs/f2fs/inode.c        |  6 ++++--
 fs/f2fs/namei.c        |  4 ++--
 4 files changed, 27 insertions(+), 24 deletions(-)

diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 654a14ab8977c..305f969e3ad12 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -47,20 +47,23 @@ static bool __may_read_extent_tree(struct inode *inode)
 	return S_ISREG(inode->i_mode);
 }
 
-static bool __may_extent_tree(struct inode *inode, enum extent_type type)
+static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
 {
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	if (type == EX_READ)
+		return __may_read_extent_tree(inode);
+	return false;
+}
 
+static bool __may_extent_tree(struct inode *inode, enum extent_type type)
+{
 	/*
 	 * for recovered files during mount do not create extents
 	 * if shrinker is not registered.
 	 */
-	if (list_empty(&sbi->s_list))
+	if (list_empty(&F2FS_I_SB(inode)->s_list))
 		return false;
 
-	if (type == EX_READ)
-		return __may_read_extent_tree(inode);
-	return false;
+	return __init_may_extent_tree(inode, type);
 }
 
 static void __try_update_largest_extent(struct extent_tree *et,
@@ -439,20 +442,18 @@ static void __drop_largest_extent(struct extent_tree *et,
 	}
 }
 
-/* return true, if inode page is changed */
-static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage,
-							enum extent_type type)
+void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct extent_tree_info *eti = &sbi->extent_tree[type];
-	struct f2fs_extent *i_ext = ipage ? &F2FS_INODE(ipage)->i_ext : NULL;
+	struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
+	struct f2fs_extent *i_ext = &F2FS_INODE(ipage)->i_ext;
 	struct extent_tree *et;
 	struct extent_node *en;
 	struct extent_info ei;
 
-	if (!__may_extent_tree(inode, type)) {
+	if (!__may_extent_tree(inode, EX_READ)) {
 		/* drop largest read extent */
-		if (type == EX_READ && i_ext && i_ext->len) {
+		if (i_ext && i_ext->len) {
 			f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 			i_ext->len = 0;
 			set_page_dirty(ipage);
@@ -460,13 +461,11 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage,
 		goto out;
 	}
 
-	et = __grab_extent_tree(inode, type);
+	et = __grab_extent_tree(inode, EX_READ);
 
 	if (!i_ext || !i_ext->len)
 		goto out;
 
-	BUG_ON(type != EX_READ);
-
 	get_read_extent_info(&ei, i_ext);
 
 	write_lock(&et->lock);
@@ -486,14 +485,15 @@ static void __f2fs_init_extent_tree(struct inode *inode, struct page *ipage,
 unlock_out:
 	write_unlock(&et->lock);
 out:
-	if (type == EX_READ && !F2FS_I(inode)->extent_tree[EX_READ])
+	if (!F2FS_I(inode)->extent_tree[EX_READ])
 		set_inode_flag(inode, FI_NO_EXTENT);
 }
 
-void f2fs_init_extent_tree(struct inode *inode, struct page *ipage)
+void f2fs_init_extent_tree(struct inode *inode)
 {
 	/* initialize read cache */
-	__f2fs_init_extent_tree(inode, ipage, EX_READ);
+	if (__init_may_extent_tree(inode, EX_READ))
+		__grab_extent_tree(inode, EX_READ);
 }
 
 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7c68bedee6492..ec52e06f8e61c 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4150,7 +4150,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
 		bool force, bool *leftmost);
 bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
 				struct rb_root_cached *root, bool check_key);
-void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
+void f2fs_init_extent_tree(struct inode *inode);
 void f2fs_drop_extent_tree(struct inode *inode);
 void f2fs_destroy_extent_node(struct inode *inode);
 void f2fs_destroy_extent_tree(struct inode *inode);
@@ -4159,6 +4159,7 @@ int __init f2fs_create_extent_cache(void);
 void f2fs_destroy_extent_cache(void);
 
 /* read extent cache ops */
+void f2fs_init_read_extent_tree(struct inode *inode, struct page *ipage);
 bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
 			struct extent_info *ei);
 void f2fs_update_read_extent_cache(struct dnode_of_data *dn);
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 086f201f15a02..c845c16f97d05 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -392,8 +392,6 @@ static int do_read_inode(struct inode *inode)
 	fi->i_pino = le32_to_cpu(ri->i_pino);
 	fi->i_dir_level = ri->i_dir_level;
 
-	f2fs_init_extent_tree(inode, node_page);
-
 	get_inline_info(inode, ri);
 
 	fi->i_extra_isize = f2fs_has_extra_attr(inode) ?
@@ -479,6 +477,10 @@ static int do_read_inode(struct inode *inode)
 	}
 
 	init_idisk_time(inode);
+
+	/* Need all the flag bits */
+	f2fs_init_read_extent_tree(inode, node_page);
+
 	f2fs_put_page(node_page, 1);
 
 	stat_inc_inline_xattr(inode);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 58a91ce8fe083..46de782c2baaa 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -284,8 +284,6 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
 	}
 	F2FS_I(inode)->i_inline_xattr_size = xattr_size;
 
-	f2fs_init_extent_tree(inode, NULL);
-
 	F2FS_I(inode)->i_flags =
 		f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
 
@@ -311,6 +309,8 @@ static struct inode *f2fs_new_inode(struct user_namespace *mnt_userns,
 
 	f2fs_set_inode_flags(inode);
 
+	f2fs_init_extent_tree(inode);
+
 	trace_f2fs_new_inode(inode, 0);
 	return inode;
 
-- 
GitLab


From 71644dff481180ba024ac4f5cb1f068756357adf Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 1 Dec 2022 17:37:15 -0800
Subject: [PATCH 3782/4122] f2fs: add block_age-based extent cache

This patch introduces a runtime hot/cold data separation method
for f2fs, in order to improve the accuracy for data temperature
classification, reduce the garbage collection overhead after
long-term data updates.

Enhanced hot/cold data separation can record data block update
frequency as "age" of the extent per inode, and take use of the age
info to indicate better temperature type for data block allocation:
 - It records total data blocks allocated since mount;
 - When file extent has been updated, it calculate the count of data
blocks allocated since last update as the age of the extent;
 - Before the data block allocated, it searches for the age info and
chooses the suitable segment for allocation.

Test and result:
 - Prepare: create about 30000 files
  * 3% for cold files (with cold file extension like .apk, from 3M to 10M)
  * 50% for warm files (with random file extension like .FcDxq, from 1K
to 4M)
  * 47% for hot files (with hot file extension like .db, from 1K to 256K)
 - create(5%)/random update(90%)/delete(5%) the files
  * total write amount is about 70G
  * fsync will be called for .db files, and buffered write will be used
for other files

The storage of test device is large enough(128G) so that it will not
switch to SSR mode during the test.

Benefit: dirty segment count increment reduce about 14%
 - before: Dirty +21110
 - after:  Dirty +18286

Signed-off-by: qixiaoyu1 <qixiaoyu1@xiaomi.com>
Signed-off-by: xiongping1 <xiongping1@xiaomi.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 Documentation/ABI/testing/sysfs-fs-f2fs |  14 ++
 Documentation/filesystems/f2fs.rst      |   4 +
 fs/f2fs/debug.c                         |  21 +++
 fs/f2fs/extent_cache.c                  | 183 +++++++++++++++++++++++-
 fs/f2fs/f2fs.h                          |  38 +++++
 fs/f2fs/file.c                          |   1 +
 fs/f2fs/inode.c                         |   1 +
 fs/f2fs/node.c                          |  10 +-
 fs/f2fs/node.h                          |   1 +
 fs/f2fs/segment.c                       |  33 +++++
 fs/f2fs/shrinker.c                      |  10 +-
 fs/f2fs/super.c                         |  14 ++
 fs/f2fs/sysfs.c                         |  24 ++++
 include/trace/events/f2fs.h             |  86 ++++++++++-
 14 files changed, 430 insertions(+), 10 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-fs-f2fs b/Documentation/ABI/testing/sysfs-fs-f2fs
index 84a009aab1a13..9e3756625a819 100644
--- a/Documentation/ABI/testing/sysfs-fs-f2fs
+++ b/Documentation/ABI/testing/sysfs-fs-f2fs
@@ -655,3 +655,17 @@ Description:	When space utilization exceeds this, do background DISCARD aggressi
 		Does DISCARD forcibly in a period of given min_discard_issue_time when the number
 		of discards is not 0 and set discard granularity to 1.
 		Default: 80
+
+What:		/sys/fs/f2fs/<disk>/hot_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as hot. By default it was initialized as 262144 blocks
+		(equals to 1GB).
+
+What:		/sys/fs/f2fs/<disk>/warm_data_age_threshold
+Date:		November 2022
+Contact:	"Ping Xiong" <xiongping1@xiaomi.com>
+Description:	When DATA SEPARATION is on, it controls the age threshold to indicate
+		the data blocks as warm. By default it was initialized as 2621440 blocks
+		(equals to 10GB).
diff --git a/Documentation/filesystems/f2fs.rst b/Documentation/filesystems/f2fs.rst
index 67e1f3e86f32f..220f3e0d3f559 100644
--- a/Documentation/filesystems/f2fs.rst
+++ b/Documentation/filesystems/f2fs.rst
@@ -347,6 +347,10 @@ memory=%s		 Control memory mode. This supports "normal" and "low" modes.
 			 Because of the nature of low memory devices, in this mode, f2fs
 			 will try to save memory sometimes by sacrificing performance.
 			 "normal" mode is the default mode and same as before.
+age_extent_cache	 Enable an age extent cache based on rb-tree. It records
+			 data block update frequency of the extent per inode, in
+			 order to provide better temperature hints for data block
+			 allocation.
 ======================== ============================================================
 
 Debugfs Entries
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a9baa121d829f..8f1ef742551fc 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -88,6 +88,9 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 	si->hit_largest = atomic64_read(&sbi->read_hit_largest);
 	si->hit_total[EX_READ] += si->hit_largest;
 
+	/* block age extent_cache only */
+	si->allocated_data_blocks = atomic64_read(&sbi->allocated_data_blocks);
+
 	/* validation check of the segment numbers */
 	si->ndirty_node = get_pages(sbi, F2FS_DIRTY_NODES);
 	si->ndirty_dent = get_pages(sbi, F2FS_DIRTY_DENTS);
@@ -516,6 +519,22 @@ static int stat_show(struct seq_file *s, void *v)
 		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
 				si->ext_tree[EX_READ], si->zombie_tree[EX_READ],
 				si->ext_node[EX_READ]);
+		seq_puts(s, "\nExtent Cache (Block Age):\n");
+		seq_printf(s, "  - Allocated Data Blocks: %llu\n",
+				si->allocated_data_blocks);
+		seq_printf(s, "  - Hit Count: L1:%llu L2:%llu\n",
+				si->hit_cached[EX_BLOCK_AGE],
+				si->hit_rbtree[EX_BLOCK_AGE]);
+		seq_printf(s, "  - Hit Ratio: %llu%% (%llu / %llu)\n",
+				!si->total_ext[EX_BLOCK_AGE] ? 0 :
+				div64_u64(si->hit_total[EX_BLOCK_AGE] * 100,
+				si->total_ext[EX_BLOCK_AGE]),
+				si->hit_total[EX_BLOCK_AGE],
+				si->total_ext[EX_BLOCK_AGE]);
+		seq_printf(s, "  - Inner Struct Count: tree: %d(%d), node: %d\n",
+				si->ext_tree[EX_BLOCK_AGE],
+				si->zombie_tree[EX_BLOCK_AGE],
+				si->ext_node[EX_BLOCK_AGE]);
 		seq_puts(s, "\nBalancing F2FS Async:\n");
 		seq_printf(s, "  - DIO (R: %4d, W: %4d)\n",
 			   si->nr_dio_read, si->nr_dio_write);
@@ -586,6 +605,8 @@ static int stat_show(struct seq_file *s, void *v)
 				si->cache_mem >> 10);
 		seq_printf(s, "  - read extent cache: %llu KB\n",
 				si->ext_mem[EX_READ] >> 10);
+		seq_printf(s, "  - block age extent cache: %llu KB\n",
+				si->ext_mem[EX_BLOCK_AGE] >> 10);
 		seq_printf(s, "  - paged : %llu KB\n",
 				si->page_mem >> 10);
 	}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 305f969e3ad12..1bd38a78ebba9 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -6,6 +6,10 @@
  * Copyright (c) 2015 Samsung Electronics
  * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
  *          Chao Yu <chao2.yu@samsung.com>
+ *
+ * block_age-based extent cache added by:
+ * Copyright (c) 2022 xiaomi Co., Ltd.
+ *             http://www.xiaomi.com/
  */
 
 #include <linux/fs.h>
@@ -18,6 +22,7 @@
 static void __set_extent_info(struct extent_info *ei,
 				unsigned int fofs, unsigned int len,
 				block_t blk, bool keep_clen,
+				unsigned long age, unsigned long last_blocks,
 				enum extent_type type)
 {
 	ei->fofs = fofs;
@@ -30,6 +35,9 @@ static void __set_extent_info(struct extent_info *ei,
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 		ei->c_len = 0;
 #endif
+	} else if (type == EX_BLOCK_AGE) {
+		ei->age = age;
+		ei->last_blocks = last_blocks;
 	}
 }
 
@@ -47,10 +55,27 @@ static bool __may_read_extent_tree(struct inode *inode)
 	return S_ISREG(inode->i_mode);
 }
 
+static bool __may_age_extent_tree(struct inode *inode)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+
+	if (!test_opt(sbi, AGE_EXTENT_CACHE))
+		return false;
+	/* don't cache block age info for cold file */
+	if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
+		return false;
+	if (file_is_cold(inode))
+		return false;
+
+	return S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode);
+}
+
 static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
 {
 	if (type == EX_READ)
 		return __may_read_extent_tree(inode);
+	else if (type == EX_BLOCK_AGE)
+		return __may_age_extent_tree(inode);
 	return false;
 }
 
@@ -90,6 +115,11 @@ static bool __is_extent_mergeable(struct extent_info *back,
 #endif
 		return (back->fofs + back->len == front->fofs &&
 				back->blk + back->len == front->blk);
+	} else if (type == EX_BLOCK_AGE) {
+		return (back->fofs + back->len == front->fofs &&
+			abs(back->age - front->age) <= SAME_AGE_REGION &&
+			abs(back->last_blocks - front->last_blocks) <=
+							SAME_AGE_REGION);
 	}
 	return false;
 }
@@ -489,11 +519,22 @@ out:
 		set_inode_flag(inode, FI_NO_EXTENT);
 }
 
+void f2fs_init_age_extent_tree(struct inode *inode)
+{
+	if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
+		return;
+	__grab_extent_tree(inode, EX_BLOCK_AGE);
+}
+
 void f2fs_init_extent_tree(struct inode *inode)
 {
 	/* initialize read cache */
 	if (__init_may_extent_tree(inode, EX_READ))
 		__grab_extent_tree(inode, EX_READ);
+
+	/* initialize block age cache */
+	if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
+		__grab_extent_tree(inode, EX_BLOCK_AGE);
 }
 
 static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
@@ -544,6 +585,8 @@ out:
 
 	if (type == EX_READ)
 		trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
+	else if (type == EX_BLOCK_AGE)
+		trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
 	return ret;
 }
 
@@ -642,6 +685,10 @@ static void __update_extent_tree_range(struct inode *inode,
 	if (type == EX_READ)
 		trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
 						tei->blk, 0);
+	else if (type == EX_BLOCK_AGE)
+		trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
+						tei->age, tei->last_blocks);
+
 	write_lock(&et->lock);
 
 	if (type == EX_READ) {
@@ -694,6 +741,7 @@ static void __update_extent_tree_range(struct inode *inode,
 				__set_extent_info(&ei,
 					end, org_end - end,
 					end - dei.fofs + dei.blk, false,
+					dei.age, dei.last_blocks,
 					type);
 				en1 = __insert_extent_tree(sbi, et, &ei,
 							NULL, NULL, true);
@@ -702,6 +750,7 @@ static void __update_extent_tree_range(struct inode *inode,
 				__set_extent_info(&en->ei,
 					end, en->ei.len - (end - dei.fofs),
 					en->ei.blk + (end - dei.fofs), true,
+					dei.age, dei.last_blocks,
 					type);
 				next_en = en;
 			}
@@ -732,11 +781,15 @@ static void __update_extent_tree_range(struct inode *inode,
 		en = next_en;
 	}
 
+	if (type == EX_BLOCK_AGE)
+		goto update_age_extent_cache;
+
 	/* 3. update extent in read extent cache */
 	BUG_ON(type != EX_READ);
 
 	if (tei->blk) {
-		__set_extent_info(&ei, fofs, len, tei->blk, false, EX_READ);
+		__set_extent_info(&ei, fofs, len, tei->blk, false,
+				  0, 0, EX_READ);
 		if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
 			__insert_extent_tree(sbi, et, &ei,
 					insert_p, insert_parent, leftmost);
@@ -758,7 +811,17 @@ static void __update_extent_tree_range(struct inode *inode,
 		et->largest_updated = false;
 		updated = true;
 	}
+	goto out_read_extent_cache;
+update_age_extent_cache:
+	if (!tei->last_blocks)
+		goto out_read_extent_cache;
 
+	__set_extent_info(&ei, fofs, len, 0, false,
+			tei->age, tei->last_blocks, EX_BLOCK_AGE);
+	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+		__insert_extent_tree(sbi, et, &ei,
+					insert_p, insert_parent, leftmost);
+out_read_extent_cache:
 	write_unlock(&et->lock);
 
 	if (updated)
@@ -796,7 +859,7 @@ void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
 	if (en)
 		goto unlock_out;
 
-	__set_extent_info(&ei, fofs, llen, blkaddr, true, EX_READ);
+	__set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
 	ei.c_len = c_len;
 
 	if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
@@ -807,6 +870,72 @@ unlock_out:
 }
 #endif
 
+static unsigned long long __calculate_block_age(unsigned long long new,
+						unsigned long long old)
+{
+	unsigned long long diff;
+
+	diff = (new >= old) ? new - (new - old) : new + (old - new);
+
+	return div_u64(diff * LAST_AGE_WEIGHT, 100);
+}
+
+/* This returns a new age and allocated blocks in ei */
+static int __get_new_block_age(struct inode *inode, struct extent_info *ei)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	loff_t f_size = i_size_read(inode);
+	unsigned long long cur_blocks =
+				atomic64_read(&sbi->allocated_data_blocks);
+
+	/*
+	 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
+	 * file block even in seq write. So don't record age for newly last file
+	 * block here.
+	 */
+	if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
+			ei->blk == NEW_ADDR)
+		return -EINVAL;
+
+	if (__lookup_extent_tree(inode, ei->fofs, ei, EX_BLOCK_AGE)) {
+		unsigned long long cur_age;
+
+		if (cur_blocks >= ei->last_blocks)
+			cur_age = cur_blocks - ei->last_blocks;
+		else
+			/* allocated_data_blocks overflow */
+			cur_age = ULLONG_MAX - ei->last_blocks + cur_blocks;
+
+		if (ei->age)
+			ei->age = __calculate_block_age(cur_age, ei->age);
+		else
+			ei->age = cur_age;
+		ei->last_blocks = cur_blocks;
+		WARN_ON(ei->age > cur_blocks);
+		return 0;
+	}
+
+	f2fs_bug_on(sbi, ei->blk == NULL_ADDR);
+
+	/* the data block was allocated for the first time */
+	if (ei->blk == NEW_ADDR)
+		goto out;
+
+	if (__is_valid_data_blkaddr(ei->blk) &&
+			!f2fs_is_valid_blkaddr(sbi, ei->blk, DATA_GENERIC_ENHANCE)) {
+		f2fs_bug_on(sbi, 1);
+		return -EINVAL;
+	}
+out:
+	/*
+	 * init block age with zero, this can happen when the block age extent
+	 * was reclaimed due to memory constraint or system reboot
+	 */
+	ei->age = 0;
+	ei->last_blocks = cur_blocks;
+	return 0;
+}
+
 static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
 {
 	struct extent_info ei;
@@ -823,6 +952,10 @@ static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type typ
 			ei.blk = NULL_ADDR;
 		else
 			ei.blk = dn->data_blkaddr;
+	} else if (type == EX_BLOCK_AGE) {
+		ei.blk = dn->data_blkaddr;
+		if (__get_new_block_age(dn->inode, &ei))
+			return;
 	}
 	__update_extent_tree_range(dn->inode, &ei, type);
 }
@@ -940,6 +1073,43 @@ unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrin
 	return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
 }
 
+/* block age extent cache operations */
+bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
+				struct extent_info *ei)
+{
+	if (!__may_extent_tree(inode, EX_BLOCK_AGE))
+		return false;
+
+	return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
+}
+
+void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
+{
+	return __update_extent_cache(dn, EX_BLOCK_AGE);
+}
+
+void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
+				pgoff_t fofs, unsigned int len)
+{
+	struct extent_info ei = {
+		.fofs = fofs,
+		.len = len,
+	};
+
+	if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
+		return;
+
+	__update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
+}
+
+unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
+{
+	if (!test_opt(sbi, AGE_EXTENT_CACHE))
+		return 0;
+
+	return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
+}
+
 static unsigned int __destroy_extent_node(struct inode *inode,
 					enum extent_type type)
 {
@@ -960,6 +1130,7 @@ static unsigned int __destroy_extent_node(struct inode *inode,
 void f2fs_destroy_extent_node(struct inode *inode)
 {
 	__destroy_extent_node(inode, EX_READ);
+	__destroy_extent_node(inode, EX_BLOCK_AGE);
 }
 
 static void __drop_extent_tree(struct inode *inode, enum extent_type type)
@@ -988,6 +1159,7 @@ static void __drop_extent_tree(struct inode *inode, enum extent_type type)
 void f2fs_drop_extent_tree(struct inode *inode)
 {
 	__drop_extent_tree(inode, EX_READ);
+	__drop_extent_tree(inode, EX_BLOCK_AGE);
 }
 
 static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
@@ -1028,6 +1200,7 @@ static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
 void f2fs_destroy_extent_tree(struct inode *inode)
 {
 	__destroy_extent_tree(inode, EX_READ);
+	__destroy_extent_tree(inode, EX_BLOCK_AGE);
 }
 
 static void __init_extent_tree_info(struct extent_tree_info *eti)
@@ -1045,6 +1218,12 @@ static void __init_extent_tree_info(struct extent_tree_info *eti)
 void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
 {
 	__init_extent_tree_info(&sbi->extent_tree[EX_READ]);
+	__init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
+
+	/* initialize for block age extents */
+	atomic64_set(&sbi->allocated_data_blocks, 0);
+	sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
+	sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
 }
 
 int __init f2fs_create_extent_cache(void)
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ec52e06f8e61c..e8953c3dc81ab 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -107,6 +107,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
 #define F2FS_MOUNT_MERGE_CHECKPOINT	0x10000000
 #define	F2FS_MOUNT_GC_MERGE		0x20000000
 #define F2FS_MOUNT_COMPRESS_CACHE	0x40000000
+#define F2FS_MOUNT_AGE_EXTENT_CACHE	0x80000000
 
 #define F2FS_OPTION(sbi)	((sbi)->mount_opt)
 #define clear_opt(sbi, option)	(F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -607,9 +608,22 @@ enum {
 /* number of extent info in extent cache we try to shrink */
 #define READ_EXTENT_CACHE_SHRINK_NUMBER	128
 
+/* number of age extent info in extent cache we try to shrink */
+#define AGE_EXTENT_CACHE_SHRINK_NUMBER	128
+#define LAST_AGE_WEIGHT			30
+#define SAME_AGE_REGION			1024
+
+/*
+ * Define data block with age less than 1GB as hot data
+ * define data block with age less than 10GB but more than 1GB as warm data
+ */
+#define DEF_HOT_DATA_AGE_THRESHOLD	262144
+#define DEF_WARM_DATA_AGE_THRESHOLD	2621440
+
 /* extent cache type */
 enum extent_type {
 	EX_READ,
+	EX_BLOCK_AGE,
 	NR_EXTENT_CACHES,
 };
 
@@ -637,6 +651,13 @@ struct extent_info {
 			unsigned int c_len;
 #endif
 		};
+		/* block age extent_cache */
+		struct {
+			/* block age of the extent */
+			unsigned long long age;
+			/* last total blocks allocated */
+			unsigned long long last_blocks;
+		};
 	};
 };
 
@@ -1653,6 +1674,11 @@ struct f2fs_sb_info {
 
 	/* for extent tree cache */
 	struct extent_tree_info extent_tree[NR_EXTENT_CACHES];
+	atomic64_t allocated_data_blocks;	/* for block age extent_cache */
+
+	/* The threshold used for hot and warm data seperation*/
+	unsigned int hot_data_age_threshold;
+	unsigned int warm_data_age_threshold;
 
 	/* basic filesystem units */
 	unsigned int log_sectors_per_block;	/* log2 sectors per block */
@@ -3857,6 +3883,8 @@ struct f2fs_stat_info {
 	unsigned long long ext_mem[NR_EXTENT_CACHES];
 	/* for read extent cache */
 	unsigned long long hit_largest;
+	/* for block age extent cache */
+	unsigned long long allocated_data_blocks;
 	int ndirty_node, ndirty_dent, ndirty_meta, ndirty_imeta;
 	int ndirty_data, ndirty_qdata;
 	unsigned int ndirty_dirs, ndirty_files, nquota_files, ndirty_all;
@@ -4168,6 +4196,16 @@ void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
 unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi,
 			int nr_shrink);
 
+/* block age extent cache ops */
+void f2fs_init_age_extent_tree(struct inode *inode);
+bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
+			struct extent_info *ei);
+void f2fs_update_age_extent_cache(struct dnode_of_data *dn);
+void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
+			pgoff_t fofs, unsigned int len);
+unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi,
+			int nr_shrink);
+
 /*
  * sysfs.c
  */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index cbe7c24065c7b..56c23b5e9d658 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -619,6 +619,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
 		fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_page),
 							dn->inode) + ofs;
 		f2fs_update_read_extent_cache_range(dn, fofs, 0, len);
+		f2fs_update_age_extent_cache_range(dn, fofs, nr_free);
 		dec_valid_block_count(sbi, dn->inode, nr_free);
 	}
 	dn->ofs_in_node = ofs;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index c845c16f97d05..ff6cf66ed46b2 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -480,6 +480,7 @@ static int do_read_inode(struct inode *inode)
 
 	/* Need all the flag bits */
 	f2fs_init_read_extent_tree(inode, node_page);
+	f2fs_init_age_extent_tree(inode);
 
 	f2fs_put_page(node_page, 1);
 
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 07419c3e42a52..dde4c04587049 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -60,7 +60,7 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 	avail_ram = val.totalram - val.totalhigh;
 
 	/*
-	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
+	 * give 25%, 25%, 50%, 50%, 25%, 25% memory for each components respectively
 	 */
 	if (type == FREE_NIDS) {
 		mem_size = (nm_i->nid_cnt[FREE_NID] *
@@ -85,14 +85,16 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 						sizeof(struct ino_entry);
 		mem_size >>= PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
-	} else if (type == READ_EXTENT_CACHE) {
-		struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
+	} else if (type == READ_EXTENT_CACHE || type == AGE_EXTENT_CACHE) {
+		enum extent_type etype = type == READ_EXTENT_CACHE ?
+						EX_READ : EX_BLOCK_AGE;
+		struct extent_tree_info *eti = &sbi->extent_tree[etype];
 
 		mem_size = (atomic_read(&eti->total_ext_tree) *
 				sizeof(struct extent_tree) +
 				atomic_read(&eti->total_ext_node) *
 				sizeof(struct extent_node)) >> PAGE_SHIFT;
-		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
+		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == DISCARD_CACHE) {
 		mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
 				sizeof(struct discard_cmd)) >> PAGE_SHIFT;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 0aa48704c77a0..99454d46a9396 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -147,6 +147,7 @@ enum mem_type {
 	DIRTY_DENTS,	/* indicates dirty dentry pages */
 	INO_ENTRIES,	/* indicates inode entries */
 	READ_EXTENT_CACHE,	/* indicates read extent cache */
+	AGE_EXTENT_CACHE,	/* indicates age extent cache */
 	DISCARD_CACHE,	/* indicates memory of cached discard cmds */
 	COMPRESS_PAGE,	/* indicates memory of cached compressed pages */
 	BASE_CHECK,	/* check kernel status */
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 8722d1a13c170..dee712f7225f9 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -453,6 +453,11 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 		f2fs_shrink_read_extent_tree(sbi,
 				READ_EXTENT_CACHE_SHRINK_NUMBER);
 
+	/* try to shrink age extent cache when there is no enough memory */
+	if (!f2fs_available_free_memory(sbi, AGE_EXTENT_CACHE))
+		f2fs_shrink_age_extent_tree(sbi,
+				AGE_EXTENT_CACHE_SHRINK_NUMBER);
+
 	/* check the # of cached NAT entries */
 	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
 		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
@@ -3151,10 +3156,28 @@ static int __get_segment_type_4(struct f2fs_io_info *fio)
 	}
 }
 
+static int __get_age_segment_type(struct inode *inode, pgoff_t pgofs)
+{
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct extent_info ei;
+
+	if (f2fs_lookup_age_extent_cache(inode, pgofs, &ei)) {
+		if (!ei.age)
+			return NO_CHECK_TYPE;
+		if (ei.age <= sbi->hot_data_age_threshold)
+			return CURSEG_HOT_DATA;
+		if (ei.age <= sbi->warm_data_age_threshold)
+			return CURSEG_WARM_DATA;
+		return CURSEG_COLD_DATA;
+	}
+	return NO_CHECK_TYPE;
+}
+
 static int __get_segment_type_6(struct f2fs_io_info *fio)
 {
 	if (fio->type == DATA) {
 		struct inode *inode = fio->page->mapping->host;
+		int type;
 
 		if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
 			return CURSEG_COLD_DATA_PINNED;
@@ -3169,6 +3192,11 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 		}
 		if (file_is_cold(inode) || f2fs_need_compress_data(inode))
 			return CURSEG_COLD_DATA;
+
+		type = __get_age_segment_type(inode, fio->page->index);
+		if (type != NO_CHECK_TYPE)
+			return type;
+
 		if (file_is_hot(inode) ||
 				is_inode_flag_set(inode, FI_HOT_DATA) ||
 				f2fs_is_cow_file(inode))
@@ -3287,6 +3315,9 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
 	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
 	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
 
+	if (IS_DATASEG(type))
+		atomic64_inc(&sbi->allocated_data_blocks);
+
 	up_write(&sit_i->sentry_lock);
 
 	if (page && IS_NODESEG(type)) {
@@ -3414,6 +3445,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
 	struct f2fs_summary sum;
 
 	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
+	if (fio->io_type == FS_DATA_IO || fio->io_type == FS_CP_DATA_IO)
+		f2fs_update_age_extent_cache(dn);
 	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
 	do_write_page(&sum, fio);
 	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index 33c490e69ae30..83d6fb97dcae0 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -59,6 +59,9 @@ unsigned long f2fs_shrink_count(struct shrinker *shrink,
 		/* count read extent cache entries */
 		count += __count_extent_cache(sbi, EX_READ);
 
+		/* count block age extent cache entries */
+		count += __count_extent_cache(sbi, EX_BLOCK_AGE);
+
 		/* count clean nat cache entries */
 		count += __count_nat_entries(sbi);
 
@@ -102,8 +105,11 @@ unsigned long f2fs_shrink_scan(struct shrinker *shrink,
 
 		sbi->shrinker_run_no = run_no;
 
+		/* shrink extent cache entries */
+		freed += f2fs_shrink_age_extent_tree(sbi, nr >> 2);
+
 		/* shrink read extent cache entries */
-		freed += f2fs_shrink_read_extent_tree(sbi, nr >> 1);
+		freed += f2fs_shrink_read_extent_tree(sbi, nr >> 2);
 
 		/* shrink clean nat cache entries */
 		if (freed < nr)
@@ -134,6 +140,8 @@ void f2fs_join_shrinker(struct f2fs_sb_info *sbi)
 void f2fs_leave_shrinker(struct f2fs_sb_info *sbi)
 {
 	f2fs_shrink_read_extent_tree(sbi, __count_extent_cache(sbi, EX_READ));
+	f2fs_shrink_age_extent_tree(sbi,
+				__count_extent_cache(sbi, EX_BLOCK_AGE));
 
 	spin_lock(&f2fs_list_lock);
 	list_del_init(&sbi->s_list);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 412c2e7352c04..180d8b804d139 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -163,6 +163,7 @@ enum {
 	Opt_nogc_merge,
 	Opt_discard_unit,
 	Opt_memory_mode,
+	Opt_age_extent_cache,
 	Opt_err,
 };
 
@@ -241,6 +242,7 @@ static match_table_t f2fs_tokens = {
 	{Opt_nogc_merge, "nogc_merge"},
 	{Opt_discard_unit, "discard_unit=%s"},
 	{Opt_memory_mode, "memory=%s"},
+	{Opt_age_extent_cache, "age_extent_cache"},
 	{Opt_err, NULL},
 };
 
@@ -1257,6 +1259,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 			}
 			kfree(name);
 			break;
+		case Opt_age_extent_cache:
+			set_opt(sbi, AGE_EXTENT_CACHE);
+			break;
 		default:
 			f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
 				 p);
@@ -1958,6 +1963,8 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
 		seq_puts(seq, ",extent_cache");
 	else
 		seq_puts(seq, ",noextent_cache");
+	if (test_opt(sbi, AGE_EXTENT_CACHE))
+		seq_puts(seq, ",age_extent_cache");
 	if (test_opt(sbi, DATA_FLUSH))
 		seq_puts(seq, ",data_flush");
 
@@ -2219,6 +2226,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool need_restart_flush = false, need_stop_flush = false;
 	bool need_restart_discard = false, need_stop_discard = false;
 	bool no_read_extent_cache = !test_opt(sbi, READ_EXTENT_CACHE);
+	bool no_age_extent_cache = !test_opt(sbi, AGE_EXTENT_CACHE);
 	bool enable_checkpoint = !test_opt(sbi, DISABLE_CHECKPOINT);
 	bool no_io_align = !F2FS_IO_ALIGNED(sbi);
 	bool no_atgc = !test_opt(sbi, ATGC);
@@ -2313,6 +2321,12 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 		f2fs_warn(sbi, "switch extent_cache option is not allowed");
 		goto restore_opts;
 	}
+	/* disallow enable/disable age extent_cache dynamically */
+	if (no_age_extent_cache == !!test_opt(sbi, AGE_EXTENT_CACHE)) {
+		err = -EINVAL;
+		f2fs_warn(sbi, "switch age_extent_cache option is not allowed");
+		goto restore_opts;
+	}
 
 	if (no_io_align == !!F2FS_IO_ALIGNED(sbi)) {
 		err = -EINVAL;
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index a4745d596310f..2ab2151105966 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -668,6 +668,24 @@ out:
 		return count;
 	}
 
+	if (!strcmp(a->attr.name, "hot_data_age_threshold")) {
+		if (t == 0 || t >= sbi->warm_data_age_threshold)
+			return -EINVAL;
+		if (t == *ui)
+			return count;
+		*ui = (unsigned int)t;
+		return count;
+	}
+
+	if (!strcmp(a->attr.name, "warm_data_age_threshold")) {
+		if (t == 0 || t <= sbi->hot_data_age_threshold)
+			return -EINVAL;
+		if (t == *ui)
+			return count;
+		*ui = (unsigned int)t;
+		return count;
+	}
+
 	*ui = (unsigned int)t;
 
 	return count;
@@ -923,6 +941,10 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, peak_atomic_write, peak_atomic_write);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, committed_atomic_block, committed_atomic_block);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, revoked_atomic_block, revoked_atomic_block);
 
+/* For block age extent cache */
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, hot_data_age_threshold, hot_data_age_threshold);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, warm_data_age_threshold, warm_data_age_threshold);
+
 #define ATTR_LIST(name) (&f2fs_attr_##name.attr)
 static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(gc_urgent_sleep_time),
@@ -1018,6 +1040,8 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(peak_atomic_write),
 	ATTR_LIST(committed_atomic_block),
 	ATTR_LIST(revoked_atomic_block),
+	ATTR_LIST(hot_data_age_threshold),
+	ATTR_LIST(warm_data_age_threshold),
 	NULL,
 };
 ATTRIBUTE_GROUPS(f2fs);
diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h
index 2bb37892d2ba1..31d994e6b4ca9 100644
--- a/include/trace/events/f2fs.h
+++ b/include/trace/events/f2fs.h
@@ -49,6 +49,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED);
 TRACE_DEFINE_ENUM(CP_PAUSE);
 TRACE_DEFINE_ENUM(CP_RESIZE);
 TRACE_DEFINE_ENUM(EX_READ);
+TRACE_DEFINE_ENUM(EX_BLOCK_AGE);
 
 #define show_block_type(type)						\
 	__print_symbolic(type,						\
@@ -155,6 +156,11 @@ TRACE_DEFINE_ENUM(EX_READ);
 		{ COMPRESS_ZSTD,	"ZSTD" },			\
 		{ COMPRESS_LZORLE,	"LZO-RLE" })
 
+#define show_extent_type(type)						\
+	__print_symbolic(type,						\
+		{ EX_READ,	"Read" },				\
+		{ EX_BLOCK_AGE,	"Block Age" })
+
 struct f2fs_sb_info;
 struct f2fs_io_info;
 struct extent_info;
@@ -1544,7 +1550,7 @@ TRACE_EVENT(f2fs_lookup_extent_tree_start,
 	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, type = %s",
 		show_dev_ino(__entry),
 		__entry->pgofs,
-		__entry->type == EX_READ ? "Read" : "N/A")
+		show_extent_type(__entry->type))
 );
 
 TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
@@ -1583,6 +1589,45 @@ TRACE_EVENT_CONDITION(f2fs_lookup_read_extent_tree_end,
 		__entry->blk)
 );
 
+TRACE_EVENT_CONDITION(f2fs_lookup_age_extent_tree_end,
+
+	TP_PROTO(struct inode *inode, unsigned int pgofs,
+						struct extent_info *ei),
+
+	TP_ARGS(inode, pgofs, ei),
+
+	TP_CONDITION(ei),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(unsigned int, pgofs)
+		__field(unsigned int, fofs)
+		__field(unsigned int, len)
+		__field(unsigned long long, age)
+		__field(unsigned long long, blocks)
+	),
+
+	TP_fast_assign(
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->ino = inode->i_ino;
+		__entry->pgofs = pgofs;
+		__entry->fofs = ei->fofs;
+		__entry->len = ei->len;
+		__entry->age = ei->age;
+		__entry->blocks = ei->last_blocks;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
+		"age_ext_info(fofs: %u, len: %u, age: %llu, blocks: %llu)",
+		show_dev_ino(__entry),
+		__entry->pgofs,
+		__entry->fofs,
+		__entry->len,
+		__entry->age,
+		__entry->blocks)
+);
+
 TRACE_EVENT(f2fs_update_read_extent_tree_range,
 
 	TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
@@ -1618,6 +1663,41 @@ TRACE_EVENT(f2fs_update_read_extent_tree_range,
 		__entry->c_len)
 );
 
+TRACE_EVENT(f2fs_update_age_extent_tree_range,
+
+	TP_PROTO(struct inode *inode, unsigned int pgofs, unsigned int len,
+					unsigned long long age,
+					unsigned long long last_blks),
+
+	TP_ARGS(inode, pgofs, len, age, last_blks),
+
+	TP_STRUCT__entry(
+		__field(dev_t,	dev)
+		__field(ino_t,	ino)
+		__field(unsigned int, pgofs)
+		__field(unsigned int, len)
+		__field(unsigned long long, age)
+		__field(unsigned long long, blocks)
+	),
+
+	TP_fast_assign(
+		__entry->dev = inode->i_sb->s_dev;
+		__entry->ino = inode->i_ino;
+		__entry->pgofs = pgofs;
+		__entry->len = len;
+		__entry->age = age;
+		__entry->blocks = last_blks;
+	),
+
+	TP_printk("dev = (%d,%d), ino = %lu, pgofs = %u, "
+				"len = %u, age = %llu, blocks = %llu",
+		show_dev_ino(__entry),
+		__entry->pgofs,
+		__entry->len,
+		__entry->age,
+		__entry->blocks)
+);
+
 TRACE_EVENT(f2fs_shrink_extent_tree,
 
 	TP_PROTO(struct f2fs_sb_info *sbi, unsigned int node_cnt,
@@ -1643,7 +1723,7 @@ TRACE_EVENT(f2fs_shrink_extent_tree,
 		show_dev(__entry->dev),
 		__entry->node_cnt,
 		__entry->tree_cnt,
-		__entry->type == EX_READ ? "Read" : "N/A")
+		show_extent_type(__entry->type))
 );
 
 TRACE_EVENT(f2fs_destroy_extent_tree,
@@ -1670,7 +1750,7 @@ TRACE_EVENT(f2fs_destroy_extent_tree,
 	TP_printk("dev = (%d,%d), ino = %lu, destroyed: node_cnt = %u, type = %s",
 		show_dev_ino(__entry),
 		__entry->node_cnt,
-		__entry->type == EX_READ ? "Read" : "N/A")
+		show_extent_type(__entry->type))
 );
 
 DECLARE_EVENT_CLASS(f2fs_sync_dirty_inodes,
-- 
GitLab


From db8dcd25ec84120d4e57a7f17a566825cec17ae8 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 7 Dec 2022 13:42:17 +0000
Subject: [PATCH 3783/4122] f2fs: Fix spelling mistake in label:
 free_bio_enrty_cache -> free_bio_entry_cache

There is a spelling mistake in a label name. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/super.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 180d8b804d139..c02a717cf880b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -4723,7 +4723,7 @@ static int __init init_f2fs_fs(void)
 		goto free_iostat;
 	err = f2fs_init_bioset();
 	if (err)
-		goto free_bio_enrty_cache;
+		goto free_bio_entry_cache;
 	err = f2fs_init_compress_mempool();
 	if (err)
 		goto free_bioset;
@@ -4740,7 +4740,7 @@ free_compress_mempool:
 	f2fs_destroy_compress_mempool();
 free_bioset:
 	f2fs_destroy_bioset();
-free_bio_enrty_cache:
+free_bio_entry_cache:
 	f2fs_destroy_bio_entry_cache();
 free_iostat:
 	f2fs_destroy_iostat_processing();
-- 
GitLab


From 15e38ee44d50cad264da80ef75626b9224ddc4a3 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Mon, 5 Dec 2022 22:56:03 +0800
Subject: [PATCH 3784/4122] f2fs: fix iostat parameter for discard

Just like other data we count uses the number of bytes as the basic unit,
but discard uses the number of cmds as the statistical unit. In fact the
discard command contains the number of blocks, so let's change to the
number of bytes as the base unit.

Fixes: b0af6d491a6b ("f2fs: add app/fs io stat")
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index dee712f7225f9..f1845a032885d 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1187,7 +1187,7 @@ static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
 
 		atomic_inc(&dcc->issued_discard);
 
-		f2fs_update_iostat(sbi, NULL, FS_DISCARD, 1);
+		f2fs_update_iostat(sbi, NULL, FS_DISCARD, len * F2FS_BLKSIZE);
 
 		lstart += len;
 		start += len;
-- 
GitLab


From 25547439f1dcc3def6062bd3e69165cd806a594e Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Fri, 2 Dec 2022 12:58:41 +0800
Subject: [PATCH 3785/4122] f2fs: don't call f2fs_issue_discard_timeout() when
 discard_cmd_cnt is 0 in f2fs_put_super()

No need to call f2fs_issue_discard_timeout() in f2fs_put_super,
when no discard command requires issue. Since the caller of
f2fs_issue_discard_timeout() usually judges the number of discard
commands before using it. Let's move this logic to
f2fs_issue_discard_timeout().

By the way, use f2fs_realtime_discard_enable to simplify the code.

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/segment.c | 6 ++++--
 fs/f2fs/super.c   | 8 ++------
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index f1845a032885d..a9099a754dd2e 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1661,6 +1661,9 @@ bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
 	struct discard_policy dpolicy;
 	bool dropped;
 
+	if (!atomic_read(&dcc->discard_cmd_cnt))
+		return false;
+
 	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
 					dcc->discard_granularity);
 	__issue_discard_cmd(sbi, &dpolicy);
@@ -2116,8 +2119,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
 	 * Recovery can cache discard commands, so in error path of
 	 * fill_super(), it needs to give a chance to handle them.
 	 */
-	if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
-		f2fs_issue_discard_timeout(sbi);
+	f2fs_issue_discard_timeout(sbi);
 
 	kfree(dcc);
 	SM_I(sbi)->dcc_info = NULL;
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index c02a717cf880b..1f812b9ce985b 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1581,8 +1581,7 @@ static void f2fs_put_super(struct super_block *sb)
 	/* be sure to wait for any on-going discard commands */
 	dropped = f2fs_issue_discard_timeout(sbi);
 
-	if ((f2fs_hw_support_discard(sbi) || f2fs_hw_should_discard(sbi)) &&
-					!sbi->discard_blks && !dropped) {
+	if (f2fs_realtime_discard_enable(sbi) && !sbi->discard_blks && !dropped) {
 		struct cp_control cpc = {
 			.reason = CP_UMOUNT | CP_TRIMMED,
 		};
@@ -2233,7 +2232,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 	bool no_discard = !test_opt(sbi, DISCARD);
 	bool no_compress_cache = !test_opt(sbi, COMPRESS_CACHE);
 	bool block_unit_discard = f2fs_block_unit_discard(sbi);
-	struct discard_cmd_control *dcc;
 #ifdef CONFIG_QUOTA
 	int i, j;
 #endif
@@ -2420,10 +2418,8 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 				goto restore_flush;
 			need_stop_discard = true;
 		} else {
-			dcc = SM_I(sbi)->dcc_info;
 			f2fs_stop_discard_thread(sbi);
-			if (atomic_read(&dcc->discard_cmd_cnt))
-				f2fs_issue_discard_timeout(sbi);
+			f2fs_issue_discard_timeout(sbi);
 			need_restart_discard = true;
 		}
 	}
-- 
GitLab


From 7411143f2021530d7641fbb40daaada4ee63f7e6 Mon Sep 17 00:00:00 2001
From: Yangtao Li <frank.li@vivo.com>
Date: Tue, 29 Nov 2022 12:15:23 +0800
Subject: [PATCH 3786/4122] f2fs: fix some format WARNING in debug.c and
 sysfs.c

To fix:

WARNING: function definition argument 'struct f2fs_attr *' should also have an identifier name
+       ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);

WARNING: return sysfs_emit(...) formats should include a terminating newline
+       return sysfs_emit(buf, "(none)");

WARNING: Prefer 'unsigned int' to bare use of 'unsigned'
+               unsigned npages = NODE_MAPPING(sbi)->nrpages;

WARNING: Missing a blank line after declarations
+               unsigned npages = COMPRESS_MAPPING(sbi)->nrpages;
+               si->page_mem += (unsigned long long)npages << PAGE_SHIFT;

WARNING: quoted string split across lines
+               seq_printf(s, "CP merge (Queued: %4d, Issued: %4d, Total: %4d, "
+                               "Cur time: %4d(ms), Peak time: %4d(ms))\n",

Signed-off-by: Yangtao Li <frank.li@vivo.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/debug.c | 45 +++++++++++++++++++++++----------------------
 fs/f2fs/sysfs.c | 10 +++++-----
 2 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 8f1ef742551fc..32af4f0c57357 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -318,18 +318,19 @@ get_cache:
 
 	si->page_mem = 0;
 	if (sbi->node_inode) {
-		unsigned npages = NODE_MAPPING(sbi)->nrpages;
+		unsigned long npages = NODE_MAPPING(sbi)->nrpages;
 
 		si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 	}
 	if (sbi->meta_inode) {
-		unsigned npages = META_MAPPING(sbi)->nrpages;
+		unsigned long npages = META_MAPPING(sbi)->nrpages;
 
 		si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 	}
 #ifdef CONFIG_F2FS_FS_COMPRESSION
 	if (sbi->compress_inode) {
-		unsigned npages = COMPRESS_MAPPING(sbi)->nrpages;
+		unsigned long npages = COMPRESS_MAPPING(sbi)->nrpages;
+
 		si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
 	}
 #endif
@@ -477,28 +478,28 @@ static int stat_show(struct seq_file *s, void *v)
 				si->meta_count[META_NAT]);
 		seq_printf(s, "  - ssa blocks : %u\n",
 				si->meta_count[META_SSA]);
-		seq_printf(s, "CP merge (Queued: %4d, Issued: %4d, Total: %4d, "
-				"Cur time: %4d(ms), Peak time: %4d(ms))\n",
-				si->nr_queued_ckpt, si->nr_issued_ckpt,
-				si->nr_total_ckpt, si->cur_ckpt_time,
-				si->peak_ckpt_time);
+		seq_puts(s, "CP merge:\n");
+		seq_printf(s, "  - Queued : %4d\n", si->nr_queued_ckpt);
+		seq_printf(s, "  - Issued : %4d\n", si->nr_issued_ckpt);
+		seq_printf(s, "  - Total : %4d\n", si->nr_total_ckpt);
+		seq_printf(s, "  - Cur time : %4d(ms)\n", si->cur_ckpt_time);
+		seq_printf(s, "  - Peak time : %4d(ms)\n", si->peak_ckpt_time);
 		seq_printf(s, "GC calls: %d (BG: %d)\n",
 			   si->call_count, si->bg_gc);
 		seq_printf(s, "  - data segments : %d (%d)\n",
 				si->data_segs, si->bg_data_segs);
 		seq_printf(s, "  - node segments : %d (%d)\n",
 				si->node_segs, si->bg_node_segs);
-		seq_printf(s, "  - Reclaimed segs : Normal (%d), Idle CB (%d), "
-				"Idle Greedy (%d), Idle AT (%d), "
-				"Urgent High (%d), Urgent Mid (%d), "
-				"Urgent Low (%d)\n",
-				si->sbi->gc_reclaimed_segs[GC_NORMAL],
-				si->sbi->gc_reclaimed_segs[GC_IDLE_CB],
-				si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY],
-				si->sbi->gc_reclaimed_segs[GC_IDLE_AT],
-				si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH],
-				si->sbi->gc_reclaimed_segs[GC_URGENT_MID],
-				si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
+		seq_puts(s, "  - Reclaimed segs :\n");
+		seq_printf(s, "    - Normal : %d\n", si->sbi->gc_reclaimed_segs[GC_NORMAL]);
+		seq_printf(s, "    - Idle CB : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_CB]);
+		seq_printf(s, "    - Idle Greedy : %d\n",
+				si->sbi->gc_reclaimed_segs[GC_IDLE_GREEDY]);
+		seq_printf(s, "    - Idle AT : %d\n", si->sbi->gc_reclaimed_segs[GC_IDLE_AT]);
+		seq_printf(s, "    - Urgent High : %d\n",
+				si->sbi->gc_reclaimed_segs[GC_URGENT_HIGH]);
+		seq_printf(s, "    - Urgent Mid : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_MID]);
+		seq_printf(s, "    - Urgent Low : %d\n", si->sbi->gc_reclaimed_segs[GC_URGENT_LOW]);
 		seq_printf(s, "Try to move %d blocks (BG: %d)\n", si->tot_blks,
 				si->bg_data_blks + si->bg_node_blks);
 		seq_printf(s, "  - data blocks : %d (%d)\n", si->data_blks,
@@ -540,11 +541,11 @@ static int stat_show(struct seq_file *s, void *v)
 			   si->nr_dio_read, si->nr_dio_write);
 		seq_printf(s, "  - IO_R (Data: %4d, Node: %4d, Meta: %4d\n",
 			   si->nr_rd_data, si->nr_rd_node, si->nr_rd_meta);
-		seq_printf(s, "  - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), "
-			"Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
+		seq_printf(s, "  - IO_W (CP: %4d, Data: %4d, Flush: (%4d %4d %4d), ",
 			   si->nr_wb_cp_data, si->nr_wb_data,
 			   si->nr_flushing, si->nr_flushed,
-			   si->flush_list_empty,
+			   si->flush_list_empty);
+		seq_printf(s, "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n",
 			   si->nr_discarding, si->nr_discarded,
 			   si->nr_discard_cmd, si->undiscard_blks);
 		seq_printf(s, "  - atomic IO: %4d (Max. %4d)\n",
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 2ab2151105966..83a366f3ee80e 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -53,9 +53,9 @@ static const char *gc_mode_names[MAX_GC_MODE] = {
 
 struct f2fs_attr {
 	struct attribute attr;
-	ssize_t (*show)(struct f2fs_attr *, struct f2fs_sb_info *, char *);
-	ssize_t (*store)(struct f2fs_attr *, struct f2fs_sb_info *,
-			 const char *, size_t);
+	ssize_t (*show)(struct f2fs_attr *a, struct f2fs_sb_info *sbi, char *buf);
+	ssize_t (*store)(struct f2fs_attr *a, struct f2fs_sb_info *sbi,
+			 const char *buf, size_t len);
 	int struct_type;
 	int offset;
 	int id;
@@ -232,13 +232,13 @@ static ssize_t encoding_show(struct f2fs_attr *a,
 			(sb->s_encoding->version >> 8) & 0xff,
 			sb->s_encoding->version & 0xff);
 #endif
-	return sysfs_emit(buf, "(none)");
+	return sysfs_emit(buf, "(none)\n");
 }
 
 static ssize_t mounted_time_sec_show(struct f2fs_attr *a,
 		struct f2fs_sb_info *sbi, char *buf)
 {
-	return sysfs_emit(buf, "%llu", SIT_I(sbi)->mounted_time);
+	return sysfs_emit(buf, "%llu\n", SIT_I(sbi)->mounted_time);
 }
 
 #ifdef CONFIG_F2FS_STAT_FS
-- 
GitLab


From 26a8057a1ada97b528b93fdf3ac4fd03170f1900 Mon Sep 17 00:00:00 2001
From: Yuwei Guan <ssawgyw@gmail.com>
Date: Sun, 11 Dec 2022 21:08:41 +0800
Subject: [PATCH 3787/4122] f2fs: reset wait_ms to default if any of the
 victims have been selected

In non-foreground gc mode, if no victim is selected, the gc process
will wait for no_gc_sleep_time before waking up again. In this
subsequent time, even though a victim will be selected, the gc process
still waits for no_gc_sleep_time before waking up. The configuration
of wait_ms is not reasonable.

After any of the victims have been selected, we need to reset wait_ms to
default sleep time from no_gc_sleep_time.

Signed-off-by: Yuwei Guan <Yuwei.Guan@zeekrlife.com>
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/gc.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f0c6506d89758..d7a9d84ba57c1 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -141,6 +141,10 @@ do_gc:
 			/* don't bother wait_ms by foreground gc */
 			if (!foreground)
 				wait_ms = gc_th->no_gc_sleep_time;
+		} else {
+			/* reset wait_ms to default sleep time */
+			if (wait_ms == gc_th->no_gc_sleep_time)
+				wait_ms = gc_th->min_sleep_time;
 		}
 
 		if (foreground)
-- 
GitLab


From c45bc55a99957b20e4e0333bcd42e12d1833a7f5 Mon Sep 17 00:00:00 2001
From: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Date: Mon, 12 Dec 2022 14:55:29 -0800
Subject: [PATCH 3788/4122] mm/hugetlb: set head flag before setting
 compound_order in __prep_compound_gigantic_folio

folio_set_compound_order() checks if the passed in folio is a large folio.
A large folio is indicated by the PG_head flag.  Call __folio_set_head()
before setting the order.

Link: https://lkml.kernel.org/r/20221212225529.22493-1-sidhartha.kumar@oracle.com
Fixes: d1c6095572d0 ("mm/hugetlb: convert hugetlb prep functions to folios")
Signed-off-by: Sidhartha Kumar <sidhartha.kumar@oracle.com>
Reported-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/hugetlb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 8c6fe22868144..7cdbcc22587bf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1805,10 +1805,10 @@ static bool __prep_compound_gigantic_folio(struct folio *folio,
 	int nr_pages = 1 << order;
 	struct page *p;
 
-	/* we rely on prep_new_hugetlb_folio to set the destructor */
-	folio_set_compound_order(folio, order);
 	__folio_clear_reserved(folio);
 	__folio_set_head(folio);
+	/* we rely on prep_new_hugetlb_folio to set the destructor */
+	folio_set_compound_order(folio, order);
 	for (i = 0; i < nr_pages; i++) {
 		p = folio_page(folio, i);
 
-- 
GitLab


From e923f4625ed3ad7656c3f9f086c898798bafbbc5 Mon Sep 17 00:00:00 2001
From: Andrew Jones <ajones@ventanamicro.com>
Date: Thu, 1 Dec 2022 12:37:50 +0100
Subject: [PATCH 3789/4122] riscv: Apply a static assert to riscv_isa_ext_id

Add a static assert to ensure a RISCV_ISA_EXT_* enum is never
created with a value >= RISCV_ISA_EXT_MAX. We can do this by
putting RISCV_ISA_EXT_ID_MAX to more work. Before it was
redundant with RISCV_ISA_EXT_MAX and hence only used to
document the limit. Now it grows with the enum and is used to
check the limit.

Signed-off-by: Andrew Jones <ajones@ventanamicro.com>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221201113750.18021-1-ajones@ventanamicro.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/hwcap.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index b225252900730..86328e3acb02e 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -59,8 +59,9 @@ enum riscv_isa_ext_id {
 	RISCV_ISA_EXT_ZIHINTPAUSE,
 	RISCV_ISA_EXT_SSTC,
 	RISCV_ISA_EXT_SVINVAL,
-	RISCV_ISA_EXT_ID_MAX = RISCV_ISA_EXT_MAX,
+	RISCV_ISA_EXT_ID_MAX
 };
+static_assert(RISCV_ISA_EXT_ID_MAX <= RISCV_ISA_EXT_MAX);
 
 /*
  * This enum represents the logical ID for each RISC-V ISA extension static
-- 
GitLab


From 71fc3621efc38ace9640ee6a0db3300900689592 Mon Sep 17 00:00:00 2001
From: Alexandre Ghiti <alexghiti@rivosinc.com>
Date: Thu, 1 Dec 2022 14:51:28 +0100
Subject: [PATCH 3790/4122] riscv: Fix P4D_SHIFT definition for 3-level page
 table mode

RISC-V kernels support 3,4,5-level page tables at runtime by folding
upper levels.

In case of a 3-level page table, PGDIR is folded into P4D which in turn
is folded into PUD: PGDIR_SHIFT value is correctly set to the same value
as PUD_SHIFT, but P4D_SHIFT is not, then any use of P4D_SHIFT will access
invalid address bits (all set to 1).

Fix this by dynamically defining P4D_SHIFT value, like we already do for
PGDIR_SHIFT.

Fixes: d10efa21a937 ("riscv: mm: Control p4d's folding by pgtable_l5_enabled")
Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
Reviewed-by: Palmer Dabbelt <palmer@rivosinc.com>
Link: https://lore.kernel.org/r/20221201135128.1482189-2-alexghiti@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/include/asm/pgtable-64.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index dc42375c23571..42a042c0e13ed 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -25,7 +25,11 @@ extern bool pgtable_l5_enabled;
 #define PGDIR_MASK      (~(PGDIR_SIZE - 1))
 
 /* p4d is folded into pgd in case of 4-level page table */
-#define P4D_SHIFT      39
+#define P4D_SHIFT_L3   30
+#define P4D_SHIFT_L4   39
+#define P4D_SHIFT_L5   39
+#define P4D_SHIFT      (pgtable_l5_enabled ? P4D_SHIFT_L5 : \
+		(pgtable_l4_enabled ? P4D_SHIFT_L4 : P4D_SHIFT_L3))
 #define P4D_SIZE       (_AC(1, UL) << P4D_SHIFT)
 #define P4D_MASK       (~(P4D_SIZE - 1))
 
-- 
GitLab


From 40306b4d1ba25970dafd53432e8daa5d591ebd99 Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Tue, 13 Dec 2022 09:40:32 +0800
Subject: [PATCH 3791/4122] exfat: fix overflow in sector and cluster
 conversion

According to the exFAT specification, there are at most 2^32-11
clusters in a volume. so using 'int' is not enough for cluster
index, the return value type of exfat_sector_to_cluster() should
be 'unsigned int'.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/exfat_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index a1e7feb220797..bc6d21d7c5adf 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -400,7 +400,7 @@ static inline sector_t exfat_cluster_to_sector(struct exfat_sb_info *sbi,
 		sbi->data_start_sector;
 }
 
-static inline int exfat_sector_to_cluster(struct exfat_sb_info *sbi,
+static inline unsigned int exfat_sector_to_cluster(struct exfat_sb_info *sbi,
 		sector_t sec)
 {
 	return ((sec - sbi->data_start_sector) >> sbi->sect_per_clus_bits) +
-- 
GitLab


From 36955d368dc101be885ad2c71618e3c3a93cd8ee Mon Sep 17 00:00:00 2001
From: Yuezhang Mo <Yuezhang.Mo@sony.com>
Date: Thu, 17 Nov 2022 11:31:30 +0800
Subject: [PATCH 3792/4122] exfat: reuse exfat_find_location() to simplify
 exfat_get_dentry_set()

In exfat_get_dentry_set(), part of the code is the same as
exfat_find_location(), reuse exfat_find_location() to simplify
exfat_get_dentry_set().

Code refinement, no functional changes.

Signed-off-by: Yuezhang Mo <Yuezhang.Mo@sony.com>
Reviewed-by: Andy Wu <Andy.Wu@sony.com>
Reviewed-by: Aoyama Wataru <wataru.aoyama@sony.com>
Reviewed-by: Sungjong Seo <sj1557.seo@samsung.com>
Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
---
 fs/exfat/dir.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/fs/exfat/dir.c b/fs/exfat/dir.c
index 8121a7e073bc7..1dfa67f307f17 100644
--- a/fs/exfat/dir.c
+++ b/fs/exfat/dir.c
@@ -818,7 +818,7 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 		unsigned int type)
 {
 	int ret, i, num_bh;
-	unsigned int off, byte_offset, clu = 0;
+	unsigned int off;
 	sector_t sec;
 	struct exfat_sb_info *sbi = EXFAT_SB(sb);
 	struct exfat_dentry *ep;
@@ -831,27 +831,16 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 		return -EIO;
 	}
 
-	byte_offset = EXFAT_DEN_TO_B(entry);
-	ret = exfat_walk_fat_chain(sb, p_dir, byte_offset, &clu);
+	ret = exfat_find_location(sb, p_dir, entry, &sec, &off);
 	if (ret)
 		return ret;
 
 	memset(es, 0, sizeof(*es));
 	es->sb = sb;
 	es->modified = false;
-
-	/* byte offset in cluster */
-	byte_offset = EXFAT_CLU_OFFSET(byte_offset, sbi);
-
-	/* byte offset in sector */
-	off = EXFAT_BLK_OFFSET(byte_offset, sb);
 	es->start_off = off;
 	es->bh = es->__bh;
 
-	/* sector offset in cluster */
-	sec = EXFAT_B_TO_BLK(byte_offset, sb);
-	sec += exfat_cluster_to_sector(sbi, clu);
-
 	bh = sb_bread(sb, sec);
 	if (!bh)
 		return -EIO;
@@ -878,6 +867,8 @@ int exfat_get_dentry_set(struct exfat_entry_set_cache *es,
 	for (i = 1; i < num_bh; i++) {
 		/* get the next sector */
 		if (exfat_is_last_sector_in_cluster(sbi, sec)) {
+			unsigned int clu = exfat_sector_to_cluster(sbi, sec);
+
 			if (p_dir->flags == ALLOC_NO_FAT_CHAIN)
 				clu++;
 			else if (exfat_get_next_cluster(sb, &clu))
-- 
GitLab


From ba57ee0944ff0085652cf8df91f9c571883debe6 Mon Sep 17 00:00:00 2001
From: Li Qiong <liqiong@nfschina.com>
Date: Mon, 12 Dec 2022 15:43:51 +0800
Subject: [PATCH 3793/4122] ipvs: add a 'default' case in do_ip_vs_set_ctl()

It is better to return the default switch case with
'-EINVAL', in case new commands are added. otherwise,
return a uninitialized value of ret.

Signed-off-by: Li Qiong <liqiong@nfschina.com>
Reviewed-by: Simon Horman <horms@verge.net.au>
Acked-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/ipvs/ip_vs_ctl.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 988222fff9f02..97f6a1c8933ac 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2590,6 +2590,11 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len)
 		break;
 	case IP_VS_SO_SET_DELDEST:
 		ret = ip_vs_del_dest(svc, &udest);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		ret = -EINVAL;
+		break;
 	}
 
   out_unlock:
-- 
GitLab


From f9645abe4255bd79e4c63799634c996dd53db321 Mon Sep 17 00:00:00 2001
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
Date: Mon, 12 Dec 2022 11:07:05 +0100
Subject: [PATCH 3794/4122] netfilter: conntrack: document sctp timeouts

Exposed through sysctl, update documentation to describe sctp states and
their default timeouts.

Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 .../networking/nf_conntrack-sysctl.rst        | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
index 1120d71f28d7b..49db1d11d7c4a 100644
--- a/Documentation/networking/nf_conntrack-sysctl.rst
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -163,6 +163,39 @@ nf_conntrack_timestamp - BOOLEAN
 
 	Enable connection tracking flow timestamping.
 
+nf_conntrack_sctp_timeout_closed - INTEGER (seconds)
+	default 10
+
+nf_conntrack_sctp_timeout_cookie_wait - INTEGER (seconds)
+	default 3
+
+nf_conntrack_sctp_timeout_cookie_echoed - INTEGER (seconds)
+	default 3
+
+nf_conntrack_sctp_timeout_established - INTEGER (seconds)
+	default 432000 (5 days)
+
+nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds)
+	default 0.3
+
+nf_conntrack_sctp_timeout_shutdown_recd - INTEGER (seconds)
+	default 0.3
+
+nf_conntrack_sctp_timeout_shutdown_ack_sent - INTEGER (seconds)
+	default 3
+
+nf_conntrack_sctp_timeout_heartbeat_sent - INTEGER (seconds)
+	default 30
+
+	This timeout is used to setup conntrack entry on secondary paths.
+	Default is set to hb_interval.
+
+nf_conntrack_sctp_timeout_heartbeat_acked - INTEGER (seconds)
+	default 210
+
+	This timeout is used to setup conntrack entry on secondary paths.
+	Default is set to (hb_interval * path_max_retrans + rto_max)
+
 nf_conntrack_udp_timeout - INTEGER (seconds)
 	default 30
 
-- 
GitLab


From 2c05bf3aa0741f4f3c72432db7801371dbbcf289 Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 13 Dec 2022 12:28:51 +0100
Subject: [PATCH 3795/4122] mnt_idmapping: move ima-only helpers to ima

The vfs{g,u}id_{gt,lt}_* helpers are currently not needed outside of
ima and we shouldn't incentivize people to use them by placing them into
the header. Let's just define them locally in the one file in ima where
they are used.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Christian Brauner (Microsoft) <brauner@kernel.org>
---
 include/linux/mnt_idmapping.h       | 20 --------------------
 security/integrity/ima/ima_policy.c | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 20 deletions(-)

diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
index 092c52aa6c2c2..0ccca33a7a6df 100644
--- a/include/linux/mnt_idmapping.h
+++ b/include/linux/mnt_idmapping.h
@@ -96,26 +96,6 @@ static inline bool vfsgid_eq_kgid(vfsgid_t vfsgid, kgid_t kgid)
 	return vfsgid_valid(vfsgid) && __vfsgid_val(vfsgid) == __kgid_val(kgid);
 }
 
-static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid)
-{
-	return __vfsuid_val(vfsuid) > __kuid_val(kuid);
-}
-
-static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid)
-{
-	return __vfsgid_val(vfsgid) > __kgid_val(kgid);
-}
-
-static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid)
-{
-	return __vfsuid_val(vfsuid) < __kuid_val(kuid);
-}
-
-static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid)
-{
-	return __vfsgid_val(vfsgid) < __kgid_val(kgid);
-}
-
 /*
  * vfs{g,u}ids are created from k{g,u}ids.
  * We don't allow them to be created from regular {u,g}id.
diff --git a/security/integrity/ima/ima_policy.c b/security/integrity/ima/ima_policy.c
index 54c475f98ce1f..edd95ba02c117 100644
--- a/security/integrity/ima/ima_policy.c
+++ b/security/integrity/ima/ima_policy.c
@@ -71,6 +71,30 @@ struct ima_rule_opt_list {
 	char *items[];
 };
 
+/*
+ * These comparators are needed nowhere outside of ima so just define them here.
+ * This pattern should hopefully never be needed outside of ima.
+ */
+static inline bool vfsuid_gt_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+	return __vfsuid_val(vfsuid) > __kuid_val(kuid);
+}
+
+static inline bool vfsgid_gt_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+	return __vfsgid_val(vfsgid) > __kgid_val(kgid);
+}
+
+static inline bool vfsuid_lt_kuid(vfsuid_t vfsuid, kuid_t kuid)
+{
+	return __vfsuid_val(vfsuid) < __kuid_val(kuid);
+}
+
+static inline bool vfsgid_lt_kgid(vfsgid_t vfsgid, kgid_t kgid)
+{
+	return __vfsgid_val(vfsgid) < __kgid_val(kgid);
+}
+
 struct ima_rule_entry {
 	struct list_head list;
 	int action;
-- 
GitLab


From e4412739472b743e18860ad8d979a7ceb3071652 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Thu, 13 Oct 2022 03:18:41 +0900
Subject: [PATCH 3796/4122] Documentation: raise minimum supported version of
 binutils to 2.25

Binutils 2.23 was released in 2012. Almost 10 years old.

We already require GCC 5.1, released in 2015.

Bump the binutils version to 2.25, which was released some months
before GCC 5.1.

With this applied, some subsystems can start to clean up code.
Examples:
  arch/arm/Kconfig.assembler
  arch/mips/vdso/Kconfig
  arch/powerpc/Makefile
  arch/x86/Kconfig.assembler

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
---
 Documentation/process/changes.rst | 4 ++--
 scripts/min-tool-version.sh       | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst
index 9844ca3a71a61..ef540865ad22e 100644
--- a/Documentation/process/changes.rst
+++ b/Documentation/process/changes.rst
@@ -35,7 +35,7 @@ Rust (optional)        1.62.0           rustc --version
 bindgen (optional)     0.56.0           bindgen --version
 GNU make               3.82             make --version
 bash                   4.2              bash --version
-binutils               2.23             ld -v
+binutils               2.25             ld -v
 flex                   2.5.35           flex --version
 bison                  2.0              bison --version
 pahole                 1.16             pahole --version
@@ -119,7 +119,7 @@ Bash 4.2 or newer is needed.
 Binutils
 --------
 
-Binutils 2.23 or newer is needed to build the kernel.
+Binutils 2.25 or newer is needed to build the kernel.
 
 pkg-config
 ----------
diff --git a/scripts/min-tool-version.sh b/scripts/min-tool-version.sh
index 201bccfbc6788..a814f1efb39d5 100755
--- a/scripts/min-tool-version.sh
+++ b/scripts/min-tool-version.sh
@@ -14,7 +14,7 @@ fi
 
 case "$1" in
 binutils)
-	echo 2.23.0
+	echo 2.25.0
 	;;
 gcc)
 	echo 5.1.0
-- 
GitLab


From fccb3d3eda8d19b893e1fd18e8c70b78784b2a72 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 11:46:47 +0900
Subject: [PATCH 3797/4122] kbuild: add test-{ge,gt,le,lt} macros

GNU Make 4.4 introduced $(intcmp ...), which is useful to compare two
integers without forking a new process.

Add test-{ge,gt,le,lt} macros, which work more efficiently with GNU
Make >= 4.4. For older Make versions, they fall back to the 'test'
shell command.

The first two parameters to $(intcmp ...) must not be empty. To avoid
the syntax error, I appended '0' to them. Fortunately, '00' is treated
as '0'. This is needed because CONFIG options may expand to an empty
string when the kernel configuration is not included.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com> # RISC-V
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 Makefile                  |  2 +-
 arch/riscv/Makefile       |  2 +-
 arch/x86/Makefile         |  2 +-
 scripts/Kbuild.include    | 16 ++++++++++++++++
 scripts/Makefile.compiler |  4 ++--
 5 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/Makefile b/Makefile
index 6b047daa46cc1..ff36288ae671f 100644
--- a/Makefile
+++ b/Makefile
@@ -994,7 +994,7 @@ KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
 # Check for frame size exceeding threshold during prolog/epilog insertion
 # when using lld < 13.0.0.
 ifneq ($(CONFIG_FRAME_WARN),0)
-ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y)
 KBUILD_LDFLAGS	+= -plugin-opt=-warn-stack-size=$(CONFIG_FRAME_WARN)
 endif
 endif
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index 0d13b597cb55f..faf2c2177094b 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -37,7 +37,7 @@ else
 endif
 
 ifeq ($(CONFIG_LD_IS_LLD),y)
-ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 150000; echo $$?),0)
+ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 150000),y)
 	KBUILD_CFLAGS += -mno-relax
 	KBUILD_AFLAGS += -mno-relax
 ifndef CONFIG_AS_IS_LLVM
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 415a5d138de47..e72c7a49cd59f 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -211,7 +211,7 @@ endif
 KBUILD_LDFLAGS += -m elf_$(UTS_MACHINE)
 
 ifdef CONFIG_LTO_CLANG
-ifeq ($(shell test $(CONFIG_LLD_VERSION) -lt 130000; echo $$?),0)
+ifeq ($(call test-lt, $(CONFIG_LLD_VERSION), 130000),y)
 KBUILD_LDFLAGS	+= -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
 endif
 endif
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index cbe28744637b7..3be7c2d756679 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -11,6 +11,22 @@ space   := $(empty) $(empty)
 space_escape := _-_SPACE_-_
 pound := \#
 
+###
+# Comparison macros.
+# Usage: $(call test-lt, $(CONFIG_LLD_VERSION), 150000)
+#
+# Use $(intcmp ...) if supported. (Make >= 4.4)
+# Otherwise, fall back to the 'test' shell command.
+ifeq ($(intcmp 1,0,,,y),y)
+test-ge = $(intcmp $(strip $1)0, $(strip $2)0,,y,y)
+test-gt = $(intcmp $(strip $1)0, $(strip $2)0,,,y)
+else
+test-ge = $(shell test $(strip $1)0 -ge $(strip $2)0 && echo y)
+test-gt = $(shell test $(strip $1)0 -gt $(strip $2)0 && echo y)
+endif
+test-le = $(call test-ge, $2, $1)
+test-lt = $(call test-gt, $2, $1)
+
 ###
 # Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o
 dot-target = $(dir $@).$(notdir $@)
diff --git a/scripts/Makefile.compiler b/scripts/Makefile.compiler
index 20d353dcabfbc..3d8adfd34af1b 100644
--- a/scripts/Makefile.compiler
+++ b/scripts/Makefile.compiler
@@ -63,11 +63,11 @@ cc-disable-warning = $(call try-run,\
 
 # gcc-min-version
 # Usage: cflags-$(call gcc-min-version, 70100) += -foo
-gcc-min-version = $(shell [ $(CONFIG_GCC_VERSION)0 -ge $(1)0 ] && echo y)
+gcc-min-version = $(call test-ge, $(CONFIG_GCC_VERSION), $1)
 
 # clang-min-version
 # Usage: cflags-$(call clang-min-version, 110000) += -foo
-clang-min-version = $(shell [ $(CONFIG_CLANG_VERSION)0 -ge $(1)0 ] && echo y)
+clang-min-version = $(call test-ge, $(CONFIG_CLANG_VERSION), $1)
 
 # ld-option
 # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
-- 
GitLab


From a5db80c65dbf9144de155f8a0f08becc9c307db0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 18:49:18 +0900
Subject: [PATCH 3798/4122] kbuild: do not sort after reading modules.order

modules.order lists modules in the deterministic order (that is why
"modules order"), and there is no duplication in the list.

$(sort ) is pointless.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 scripts/Makefile.modfinal | 2 +-
 scripts/Makefile.modinst  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 25bedd83644b0..4705d32388f36 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include
 include $(srctree)/scripts/Makefile.lib
 
 # find all modules listed in modules.order
-modules := $(sort $(shell cat $(MODORDER)))
+modules := $(shell cat $(MODORDER))
 
 __modfinal: $(modules)
 	@:
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index a4c987c23750f..f4cff42069adf 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -9,7 +9,7 @@ __modinst:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
-modules := $(sort $(shell cat $(MODORDER)))
+modules := $(shell cat $(MODORDER))
 
 ifeq ($(KBUILD_EXTMOD),)
 dst := $(MODLIB)/kernel
-- 
GitLab


From 6768fa4bcb6c1618248f135d04b9287ba2724ae0 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 11:54:47 +0900
Subject: [PATCH 3799/4122] kbuild: add read-file macro

Since GNU Make 4.2, $(file ...) supports the read operater '<', which
is useful to read a file without forking a new process. No warning is
shown even if the input file is missing.

For older Make versions, it falls back to the cat command.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Tested-by: Alexander Lobakin <alexandr.lobakin@intel.com>
---
 Makefile                  |  2 +-
 scripts/Kbuild.include    | 14 ++++++++++++++
 scripts/Makefile.modfinal |  2 +-
 scripts/Makefile.modinst  |  2 +-
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/Makefile b/Makefile
index ff36288ae671f..591485152a95c 100644
--- a/Makefile
+++ b/Makefile
@@ -376,7 +376,7 @@ else # !mixed-build
 include $(srctree)/scripts/Kbuild.include
 
 # Read KERNELRELEASE from include/config/kernel.release (if it exists)
-KERNELRELEASE = $(shell cat include/config/kernel.release 2> /dev/null)
+KERNELRELEASE = $(call read-file, include/config/kernel.release)
 KERNELVERSION = $(VERSION)$(if $(PATCHLEVEL),.$(PATCHLEVEL)$(if $(SUBLEVEL),.$(SUBLEVEL)))$(EXTRAVERSION)
 export VERSION PATCHLEVEL SUBLEVEL KERNELRELEASE KERNELVERSION
 
diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 3be7c2d756679..21e76ba0de170 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -10,6 +10,10 @@ empty   :=
 space   := $(empty) $(empty)
 space_escape := _-_SPACE_-_
 pound := \#
+define newline
+
+
+endef
 
 ###
 # Comparison macros.
@@ -61,6 +65,16 @@ stringify = $(squote)$(quote)$1$(quote)$(squote)
 kbuild-dir = $(if $(filter /%,$(src)),$(src),$(srctree)/$(src))
 kbuild-file = $(or $(wildcard $(kbuild-dir)/Kbuild),$(kbuild-dir)/Makefile)
 
+###
+# Read a file, replacing newlines with spaces
+#
+# Make 4.2 or later can read a file by using its builtin function.
+ifneq ($(filter-out 3.% 4.0 4.1, $(MAKE_VERSION)),)
+read-file = $(subst $(newline),$(space),$(file < $1))
+else
+read-file = $(shell cat $1 2>/dev/null)
+endif
+
 ###
 # Easy method for doing a status message
        kecho := :
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 4705d32388f36..83f2797e530c5 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -13,7 +13,7 @@ include $(srctree)/scripts/Kbuild.include
 include $(srctree)/scripts/Makefile.lib
 
 # find all modules listed in modules.order
-modules := $(shell cat $(MODORDER))
+modules := $(call read-file, $(MODORDER))
 
 __modfinal: $(modules)
 	@:
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index f4cff42069adf..65aac6be78ec3 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -9,7 +9,7 @@ __modinst:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
-modules := $(shell cat $(MODORDER))
+modules := $(call read-file, $(MODORDER))
 
 ifeq ($(KBUILD_EXTMOD),)
 dst := $(MODLIB)/kernel
-- 
GitLab


From 3122c84409d578a5df8bcb1953547e0b871ac4c2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 11:54:48 +0900
Subject: [PATCH 3800/4122] kconfig: refactor Makefile to reduce process forks

Refactor Makefile and use read-file macro. For Make >= 4.2, it can read
out a file by using the built-in function.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 scripts/kconfig/.gitignore   |  4 +++-
 scripts/kconfig/Makefile     | 45 +++++++++++++++++++-----------------
 scripts/kconfig/gconf-cfg.sh |  7 ++++--
 scripts/kconfig/mconf-cfg.sh | 25 +++++++++++---------
 scripts/kconfig/nconf-cfg.sh | 23 ++++++++++--------
 scripts/kconfig/qconf-cfg.sh | 10 +++++---
 scripts/remove-stale-files   |  2 ++
 7 files changed, 68 insertions(+), 48 deletions(-)

diff --git a/scripts/kconfig/.gitignore b/scripts/kconfig/.gitignore
index 500e7424b3ef9..c8a3f9cd52f02 100644
--- a/scripts/kconfig/.gitignore
+++ b/scripts/kconfig/.gitignore
@@ -1,5 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 /conf
 /[gmnq]conf
-/[gmnq]conf-cfg
+/[gmnq]conf-cflags
+/[gmnq]conf-libs
+/qconf-bin
 /qconf-moc.cc
diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index b8ef0fb4bbef7..0b1d15efaeb0c 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -159,11 +159,12 @@ conf-objs	:= conf.o $(common-objs)
 hostprogs	+= nconf
 nconf-objs	:= nconf.o nconf.gui.o $(common-objs)
 
-HOSTLDLIBS_nconf	= $(shell . $(obj)/nconf-cfg && echo $$libs)
-HOSTCFLAGS_nconf.o	= $(shell . $(obj)/nconf-cfg && echo $$cflags)
-HOSTCFLAGS_nconf.gui.o	= $(shell . $(obj)/nconf-cfg && echo $$cflags)
+HOSTLDLIBS_nconf       = $(call read-file, $(obj)/nconf-libs)
+HOSTCFLAGS_nconf.o     = $(call read-file, $(obj)/nconf-cflags)
+HOSTCFLAGS_nconf.gui.o = $(call read-file, $(obj)/nconf-cflags)
 
-$(obj)/nconf.o $(obj)/nconf.gui.o: $(obj)/nconf-cfg
+$(obj)/nconf: | $(obj)/nconf-libs
+$(obj)/nconf.o $(obj)/nconf.gui.o: | $(obj)/nconf-cflags
 
 # mconf: Used for the menuconfig target based on lxdialog
 hostprogs	+= mconf
@@ -171,27 +172,28 @@ lxdialog	:= $(addprefix lxdialog/, \
 		     checklist.o inputbox.o menubox.o textbox.o util.o yesno.o)
 mconf-objs	:= mconf.o $(lxdialog) $(common-objs)
 
-HOSTLDLIBS_mconf = $(shell . $(obj)/mconf-cfg && echo $$libs)
+HOSTLDLIBS_mconf = $(call read-file, $(obj)/mconf-libs)
 $(foreach f, mconf.o $(lxdialog), \
-  $(eval HOSTCFLAGS_$f = $$(shell . $(obj)/mconf-cfg && echo $$$$cflags)))
+  $(eval HOSTCFLAGS_$f = $$(call read-file, $(obj)/mconf-cflags)))
 
-$(addprefix $(obj)/, mconf.o $(lxdialog)): $(obj)/mconf-cfg
+$(obj)/mconf: | $(obj)/mconf-libs
+$(addprefix $(obj)/, mconf.o $(lxdialog)): | $(obj)/mconf-cflags
 
 # qconf: Used for the xconfig target based on Qt
 hostprogs	+= qconf
 qconf-cxxobjs	:= qconf.o qconf-moc.o
 qconf-objs	:= images.o $(common-objs)
 
-HOSTLDLIBS_qconf	= $(shell . $(obj)/qconf-cfg && echo $$libs)
-HOSTCXXFLAGS_qconf.o	= $(shell . $(obj)/qconf-cfg && echo $$cflags)
-HOSTCXXFLAGS_qconf-moc.o = $(shell . $(obj)/qconf-cfg && echo $$cflags)
-
-$(obj)/qconf.o: $(obj)/qconf-cfg
+HOSTLDLIBS_qconf         = $(call read-file, $(obj)/qconf-libs)
+HOSTCXXFLAGS_qconf.o     = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags)
+HOSTCXXFLAGS_qconf-moc.o = -std=c++11 -fPIC $(call read-file, $(obj)/qconf-cflags)
+$(obj)/qconf: | $(obj)/qconf-libs
+$(obj)/qconf.o $(obj)/qconf-moc.o: | $(obj)/qconf-cflags
 
 quiet_cmd_moc = MOC     $@
-      cmd_moc = $(shell . $(obj)/qconf-cfg && echo $$moc) $< -o $@
+      cmd_moc = $(call read-file, $(obj)/qconf-bin)/moc $< -o $@
 
-$(obj)/qconf-moc.cc: $(src)/qconf.h $(obj)/qconf-cfg FORCE
+$(obj)/qconf-moc.cc: $(src)/qconf.h FORCE | $(obj)/qconf-bin
 	$(call if_changed,moc)
 
 targets += qconf-moc.cc
@@ -200,15 +202,16 @@ targets += qconf-moc.cc
 hostprogs	+= gconf
 gconf-objs	:= gconf.o images.o $(common-objs)
 
-HOSTLDLIBS_gconf    = $(shell . $(obj)/gconf-cfg && echo $$libs)
-HOSTCFLAGS_gconf.o  = $(shell . $(obj)/gconf-cfg && echo $$cflags)
+HOSTLDLIBS_gconf   = $(call read-file, $(obj)/gconf-libs)
+HOSTCFLAGS_gconf.o = $(call read-file, $(obj)/gconf-cflags)
 
-$(obj)/gconf.o: $(obj)/gconf-cfg
+$(obj)/gconf: | $(obj)/gconf-libs
+$(obj)/gconf.o: | $(obj)/gconf-cflags
 
 # check if necessary packages are available, and configure build flags
-filechk_conf_cfg = $(CONFIG_SHELL) $<
+cmd_conf_cfg = $< $(addprefix $(obj)/$*conf-, cflags libs bin)
 
-$(obj)/%conf-cfg: $(src)/%conf-cfg.sh FORCE
-	$(call filechk,conf_cfg)
+$(obj)/%conf-cflags $(obj)/%conf-libs $(obj)/%conf-bin: $(src)/%conf-cfg.sh
+	$(call cmd,conf_cfg)
 
-clean-files += *conf-cfg
+clean-files += *conf-cflags *conf-libs *conf-bin
diff --git a/scripts/kconfig/gconf-cfg.sh b/scripts/kconfig/gconf-cfg.sh
index cbd90c28c05f2..040d8f3388202 100755
--- a/scripts/kconfig/gconf-cfg.sh
+++ b/scripts/kconfig/gconf-cfg.sh
@@ -1,6 +1,9 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+cflags=$1
+libs=$2
+
 PKG="gtk+-2.0 gmodule-2.0 libglade-2.0"
 
 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
@@ -26,5 +29,5 @@ if ! ${HOSTPKG_CONFIG} --atleast-version=2.0.0 gtk+-2.0; then
 	exit 1
 fi
 
-echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
-echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
+${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
diff --git a/scripts/kconfig/mconf-cfg.sh b/scripts/kconfig/mconf-cfg.sh
index 025b565e0b7cd..1e61f50a59050 100755
--- a/scripts/kconfig/mconf-cfg.sh
+++ b/scripts/kconfig/mconf-cfg.sh
@@ -1,19 +1,22 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+cflags=$1
+libs=$2
+
 PKG="ncursesw"
 PKG2="ncurses"
 
 if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then
 	if ${HOSTPKG_CONFIG} --exists $PKG; then
-		echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
-		echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+		${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
+		${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
 		exit 0
 	fi
 
-	if ${HOSTPKG_CONFIG} --exists $PKG2; then
-		echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\"
-		echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\"
+	if ${HOSTPKG_CONFIG} --exists ${PKG2}; then
+		${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags}
+		${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs}
 		exit 0
 	fi
 fi
@@ -22,22 +25,22 @@ fi
 # (Even if it is installed, some distributions such as openSUSE cannot
 # find ncurses by pkg-config.)
 if [ -f /usr/include/ncursesw/ncurses.h ]; then
-	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
-	echo libs=\"-lncursesw\"
+	echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags}
+	echo -lncursesw > ${libs}
 	exit 0
 fi
 
 if [ -f /usr/include/ncurses/ncurses.h ]; then
-	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\"
-	echo libs=\"-lncurses\"
+	echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags}
+	echo -lncurses > ${libs}
 	exit 0
 fi
 
 # As a final fallback before giving up, check if $HOSTCC knows of a default
 # ncurses installation (e.g. from a vendor-specific sysroot).
 if echo '#include <ncurses.h>' | ${HOSTCC} -E - >/dev/null 2>&1; then
-	echo cflags=\"-D_GNU_SOURCE\"
-	echo libs=\"-lncurses\"
+	echo -D_GNU_SOURCE > ${cflags}
+	echo -lncurses > ${libs}
 	exit 0
 fi
 
diff --git a/scripts/kconfig/nconf-cfg.sh b/scripts/kconfig/nconf-cfg.sh
index 3a10bac2adb3a..f871a2160e363 100755
--- a/scripts/kconfig/nconf-cfg.sh
+++ b/scripts/kconfig/nconf-cfg.sh
@@ -1,19 +1,22 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+cflags=$1
+libs=$2
+
 PKG="ncursesw menuw panelw"
 PKG2="ncurses menu panel"
 
 if [ -n "$(command -v ${HOSTPKG_CONFIG})" ]; then
 	if ${HOSTPKG_CONFIG} --exists $PKG; then
-		echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG)\"
-		echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
+		${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
+		${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
 		exit 0
 	fi
 
 	if ${HOSTPKG_CONFIG} --exists $PKG2; then
-		echo cflags=\"$(${HOSTPKG_CONFIG} --cflags $PKG2)\"
-		echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG2)\"
+		${HOSTPKG_CONFIG} --cflags ${PKG2} > ${cflags}
+		${HOSTPKG_CONFIG} --libs ${PKG2} > ${libs}
 		exit 0
 	fi
 fi
@@ -22,20 +25,20 @@ fi
 # (Even if it is installed, some distributions such as openSUSE cannot
 # find ncurses by pkg-config.)
 if [ -f /usr/include/ncursesw/ncurses.h ]; then
-	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncursesw\"
-	echo libs=\"-lncursesw -lmenuw -lpanelw\"
+	echo -D_GNU_SOURCE -I/usr/include/ncursesw > ${cflags}
+	echo -lncursesw -lmenuw -lpanelw > ${libs}
 	exit 0
 fi
 
 if [ -f /usr/include/ncurses/ncurses.h ]; then
-	echo cflags=\"-D_GNU_SOURCE -I/usr/include/ncurses\"
-	echo libs=\"-lncurses -lmenu -lpanel\"
+	echo -D_GNU_SOURCE -I/usr/include/ncurses > ${cflags}
+	echo -lncurses -lmenu -lpanel > ${libs}
 	exit 0
 fi
 
 if [ -f /usr/include/ncurses.h ]; then
-	echo cflags=\"-D_GNU_SOURCE\"
-	echo libs=\"-lncurses -lmenu -lpanel\"
+	echo -D_GNU_SOURCE > ${cflags}
+	echo -lncurses -lmenu -lpanel > ${libs}
 	exit 0
 fi
 
diff --git a/scripts/kconfig/qconf-cfg.sh b/scripts/kconfig/qconf-cfg.sh
index ad652cb539474..117f36e568fc5 100755
--- a/scripts/kconfig/qconf-cfg.sh
+++ b/scripts/kconfig/qconf-cfg.sh
@@ -1,6 +1,10 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+cflags=$1
+libs=$2
+bin=$3
+
 PKG="Qt5Core Qt5Gui Qt5Widgets"
 
 if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
@@ -11,9 +15,9 @@ if [ -z "$(command -v ${HOSTPKG_CONFIG})" ]; then
 fi
 
 if ${HOSTPKG_CONFIG} --exists $PKG; then
-	echo cflags=\"-std=c++11 -fPIC $(${HOSTPKG_CONFIG} --cflags $PKG)\"
-	echo libs=\"$(${HOSTPKG_CONFIG} --libs $PKG)\"
-	echo moc=\"$(${HOSTPKG_CONFIG} --variable=host_bins Qt5Core)/moc\"
+	${HOSTPKG_CONFIG} --cflags ${PKG} > ${cflags}
+	${HOSTPKG_CONFIG} --libs ${PKG} > ${libs}
+	${HOSTPKG_CONFIG} --variable=host_bins Qt5Core > ${bin}
 	exit 0
 fi
 
diff --git a/scripts/remove-stale-files b/scripts/remove-stale-files
index ccadfa3afb2b8..64b14aa5aebf4 100755
--- a/scripts/remove-stale-files
+++ b/scripts/remove-stale-files
@@ -47,3 +47,5 @@ rm -f arch/riscv/purgatory/kexec-purgatory.c
 rm -f scripts/extract-cert
 
 rm -f arch/x86/purgatory/kexec-purgatory.c
+
+rm -f scripts/kconfig/[gmnq]conf-cfg
-- 
GitLab


From 875ef1a57f32fcb91010dc9bc8bd1166956a579e Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 12:10:59 +0900
Subject: [PATCH 3801/4122] kbuild: use .NOTINTERMEDIATE for future GNU Make
 versions

In Kbuild, some files are generated by chains of pattern/implicit rules.
For example, *.dtb.o files in drivers/of/unittest-data/Makefile are
generated by the chain of 3 pattern rules, like this:

  %.dts  ->  %.dtb  ->  %.dtb.S  ->  %.dtb.o

Here, %.dts is the real source, %.dtb.o is the final target.
%.dtb and %.dtb.S are called "intermediate files".

As GNU Make manual [1] says, intermediate files are treated differently
in two ways:

 (a) The first difference is what happens if the intermediate file does
   not exist. If an ordinary file 'b' does not exist, and make considers
   a target that depends on 'b', it invariably creates 'b' and then
   updates the target from 'b'. But if 'b' is an intermediate file, then
   make can leave well enough alone: it won't create 'b' unless one of
   its prerequisites is out of date. This means the target depending
   on 'b' won't be rebuilt either, unless there is some other reason
   to update that target: for example the target doesn't exist or a
   different prerequisite is newer than the target.

 (b) The second difference is that if make does create 'b' in order to
   update something else, it deletes 'b' later on after it is no longer
   needed. Therefore, an intermediate file which did not exist before
   make also does not exist after make. make reports the deletion to
   you by printing a 'rm' command showing which file it is deleting.

The combination of these is problematic for Kbuild because most of the
build rules depend on FORCE and the if_changed* macros really determine
if the target should be updated. So, all missing files, whether they
are intermediate or not, are always rebuilt.

To see the problem, delete ".SECONDARY:" from scripts/Kbuild.include,
and repeat this command:

  $ make allmodconfig drivers/of/unittest-data/

The intermediate files will be deleted, which results in rebuilding
intermediate and final objects in the next run of make.

In the old days, people suppressed (b) in inconsistent ways.
As commit 54a702f70589 ("kbuild: mark $(targets) as .SECONDARY and
remove .PRECIOUS markers") noted, you should not use .PRECIOUS because
.PRECIOUS has the following behavior (c), which is not likely what you
want.

 (c) If make is killed or interrupted during the execution of their
   recipes, the target is not deleted. Also, the target is not deleted
   on error even if .DELETE_ON_ERROR is specified.

.SECONDARY is a much better way to disable (b), but a small problem
is that .SECONDARY enables (a), which gives a side-effect to $?;
prerequisites marked as .SECONDARY do not appear in $?. This is a
drawback for Kbuild.

I thought it was a bug and opened a bug report. As Paul, the GNU Make
maintainer, concluded in [2], this is not a bug.

A good news is that, GNU Make 4.4 added the perfect solution,
.NOTINTERMEDIATE, which cancels both (a) and (b).

For clarificaton, my understanding of .INTERMEDIATE, .SECONDARY,
.PRECIOUS and .NOTINTERMEDIATE are as follows:

                        (a)         (b)         (c)
  .INTERMEDIATE        enable      enable      disable
  .SECONDARY           enable      disable     disable
  .PRECIOUS            disable     disable     enable
  .NOTINTERMEDIATE     disable     disable     disable

However, GNU Make 4.4 has a bug for the global .NOTINTERMEDIATE. [3]
It was fixed by commit 6164608900ad ("[SV 63417] Ensure global
.NOTINTERMEDIATE disables all intermediates"), and will be available
in the next release of GNU Make.

The following is the gain for .NOTINTERMEDIATE:

  [Current Make]

      $ make allnoconfig vmlinux
          [ full build ]
      $ rm include/linux/device.h
      $ make vmlinux
        CALL    scripts/checksyscalls.sh

  Make does not notice the removal of <linux/device.h>.

  [Future Make]

      $ make-latest allnoconfig vmlinux
          [ full build ]
      $ rm include/linux/device.h
      $ make-latest vmlinux
        CC      arch/x86/kernel/asm-offsets.s
      In file included from ./include/linux/writeback.h:13,
                       from ./include/linux/memcontrol.h:22,
                       from ./include/linux/swap.h:9,
                       from ./include/linux/suspend.h:5,
                       from arch/x86/kernel/asm-offsets.c:13:
      ./include/linux/blk_types.h:11:10: fatal error: linux/device.h: No such file or directory
         11 | #include <linux/device.h>
            |          ^~~~~~~~~~~~~~~~
      compilation terminated.
      make-latest[1]: *** [scripts/Makefile.build:114: arch/x86/kernel/asm-offsets.s] Error 1
      make-latest: *** [Makefile:1282: prepare0] Error 2

  Make notices the removal of <linux/device.h>, and rebuilds objects
  that depended on <linux/device.h>. There exists a source file that
  includes <linux/device.h>, and it raises an error.

To see detailed background information, refer to commit 2d3b1b8f0da7
("kbuild: drop $(wildcard $^) check in if_changed* for faster rebuild").

[1]: https://www.gnu.org/software/make/manual/make.html#Chained-Rules
[2]: https://savannah.gnu.org/bugs/?55532
[3]: https://savannah.gnu.org/bugs/?63417

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/Kbuild.include | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include
index 21e76ba0de170..2f7356b2990bb 100644
--- a/scripts/Kbuild.include
+++ b/scripts/Kbuild.include
@@ -185,9 +185,6 @@ endif
 make-cmd = $(call escsq,$(subst $(pound),$$(pound),$(subst $$,$$$$,$(cmd_$(1)))))
 
 # Find any prerequisites that are newer than target or that do not exist.
-# (This is not true for now; $? should contain any non-existent prerequisites,
-# but it does not work as expected when .SECONDARY is present. This seems a bug
-# of GNU Make.)
 # PHONY targets skipped in both cases.
 newer-prereqs = $(filter-out $(PHONY),$?)
 
@@ -263,4 +260,14 @@ endif
 .DELETE_ON_ERROR:
 
 # do not delete intermediate files automatically
+#
+# .NOTINTERMEDIATE is more correct, but only available on newer Make versions.
+# Make 4.4 introduced .NOTINTERMEDIATE, and it appears in .FEATURES, but the
+# global .NOTINTERMEDIATE does not work. We can use it on Make > 4.4.
+# Use .SECONDARY for older Make versions, but "newer-prereq" cannot detect
+# deleted files.
+ifneq ($(and $(filter notintermediate, $(.FEATURES)),$(filter-out 4.4,$(MAKE_VERSION))),)
+.NOTINTERMEDIATE:
+else
 .SECONDARY:
+endif
-- 
GitLab


From c528ef0888b75f673f7d48022de8d31d5b451e8c Mon Sep 17 00:00:00 2001
From: Guo Ren <guoren@linux.alibaba.com>
Date: Wed, 7 Dec 2022 04:11:12 -0500
Subject: [PATCH 3802/4122] riscv: Fixup compile error with !MMU

Current nommu_virt_defconfig can't compile:

In file included from
arch/riscv/kernel/crash_core.c:3:
arch/riscv/kernel/crash_core.c:
In function 'arch_crash_save_vmcoreinfo':
arch/riscv/kernel/crash_core.c:8:27:
error: 'VA_BITS' undeclared (first use in this function)
    8 |         VMCOREINFO_NUMBER(VA_BITS);
      |                           ^~~~~~~

Add MMU dependency for KEXEC_FILE.

Fixes: 6261586e0c91 ("RISC-V: Add kexec_file support")
Reported-by: Conor Dooley <conor.dooley@microchip.com>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Guo Ren <guoren@kernel.org>
Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
Tested-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221207091112.2258674-1-guoren@kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 arch/riscv/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 67ef08d33d3ad..e1a9fa47f0123 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -498,7 +498,7 @@ config KEXEC_FILE
 	select KEXEC_CORE
 	select KEXEC_ELF
 	select HAVE_IMA_KEXEC if IMA
-	depends on 64BIT
+	depends on 64BIT && MMU
 	help
 	  This is new version of kexec system call. This system call is
 	  file based and takes file descriptors as system call argument
-- 
GitLab


From 37f0ab1477994a0d0dc3c1e0de030fae07d37965 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 6 Dec 2022 18:08:12 -0800
Subject: [PATCH 3803/4122] Documentation: RISC-V: Fix a typo in
 patch-acceptance

I just stumbled on this when modifying the docs.

Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20221207020815.16214-2-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/riscv/patch-acceptance.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
index dfe0ac5624fb2..5da6f9b273d6c 100644
--- a/Documentation/riscv/patch-acceptance.rst
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -29,7 +29,7 @@ their own custom extensions.  These custom extensions aren't required
 to go through any review or ratification process by the RISC-V
 Foundation.  To avoid the maintenance complexity and potential
 performance impact of adding kernel code for implementor-specific
-RISC-V extensions, we'll only to accept patches for extensions that
+RISC-V extensions, we'll only accept patches for extensions that
 have been officially frozen or ratified by the RISC-V Foundation.
 (Implementors, may, of course, maintain their own Linux kernel trees
 containing code for any custom extensions that they wish.)
-- 
GitLab


From 936100d4507f2e9f0be4621b0c698180d65e8264 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 6 Dec 2022 18:08:13 -0800
Subject: [PATCH 3804/4122] Documentation: RISC-V: Allow patches for
 non-standard behavior

The patch acceptance policy forbids accepting support for non-standard
behavior.  This policy was written in order to both steer implementers
towards the standards and to avoid coupling the upstream kernel too
tightly to vendor-specific features.  Those were good goals, but in
practice the policy just isn't working: every RISC-V system we have
needs vendor-specific behavior in the kernel and we end up taking that
support which violates the policy.  That's confusing for contributors,
which is the main reason we have a written policy in the first place.

So let's just start taking code for vendor-defined behavior.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Signed-off-by: Paul Walmsley <paul.walmsley@sifive.com>
Link: https://lore.kernel.org/all/alpine.DEB.2.21.999.2211181027590.4480@utopia.booyaka.com/
[Palmer: merge in Paul's suggestions]
Link: https://lore.kernel.org/r/20221207020815.16214-3-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/riscv/patch-acceptance.rst | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
index 5da6f9b273d6c..d9d628505cd84 100644
--- a/Documentation/riscv/patch-acceptance.rst
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -29,7 +29,11 @@ their own custom extensions.  These custom extensions aren't required
 to go through any review or ratification process by the RISC-V
 Foundation.  To avoid the maintenance complexity and potential
 performance impact of adding kernel code for implementor-specific
-RISC-V extensions, we'll only accept patches for extensions that
-have been officially frozen or ratified by the RISC-V Foundation.
-(Implementors, may, of course, maintain their own Linux kernel trees
-containing code for any custom extensions that they wish.)
+RISC-V extensions, we'll only consider patches for extensions that either:
+
+- Have been officially frozen or ratified by the RISC-V Foundation, or
+- Have been implemented in hardware that is widely available, per standard
+  Linux practice.
+
+(Implementors, may, of course, maintain their own Linux kernel trees containing
+code for any custom extensions that they wish.)
-- 
GitLab


From 68eabc72023f2c1cdbf7932fde57a7811c65b414 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 6 Dec 2022 18:08:14 -0800
Subject: [PATCH 3805/4122] Documentation: RISC-V: Mention the UEFI Standards

The current patch acceptance policy requires that specifications are
approved by the RISC-V foundation, but we rely on external
specifications as well.  This explicitly calls out the UEFI
specifications that we're starting to depend on.

Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Atish Patra <atishp@rivosinc.com>
Reviewed-by: Anup Patel <anup@brainfault.org>
Link: https://lore.kernel.org/r/20221207020815.16214-4-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/riscv/patch-acceptance.rst | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
index d9d628505cd84..389a455843865 100644
--- a/Documentation/riscv/patch-acceptance.rst
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -20,9 +20,11 @@ Submit Checklist Addendum
 -------------------------
 We'll only accept patches for new modules or extensions if the
 specifications for those modules or extensions are listed as being
-"Frozen" or "Ratified" by the RISC-V Foundation.  (Developers may, of
-course, maintain their own Linux kernel trees that contain code for
-any draft extensions that they wish.)
+unlikely to be incompatibly changed in the future.  For
+specifications from the RISC-V foundation this means "Frozen" or
+"Ratified", for the UEFI forum specifications this means a published
+ECR.  (Developers may, of course, maintain their own Linux kernel trees
+that contain code for any draft extensions that they wish.)
 
 Additionally, the RISC-V specification allows implementors to create
 their own custom extensions.  These custom extensions aren't required
-- 
GitLab


From a39c636506cb90b9ba25cbb0a78bbcc3725ea227 Mon Sep 17 00:00:00 2001
From: Palmer Dabbelt <palmer@rivosinc.com>
Date: Tue, 6 Dec 2022 18:08:15 -0800
Subject: [PATCH 3806/4122] Documentation: RISC-V: patch-acceptance:
 s/implementor/implementer

Implementor does appear to be a word, but it's not very common.

Suggested-by: Conor Dooley <conor@kernel.org>
Reviewed-by: Anup Patel <anup@brainfault.org>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Link: https://lore.kernel.org/r/20221207020815.16214-5-palmer@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 Documentation/riscv/patch-acceptance.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/riscv/patch-acceptance.rst b/Documentation/riscv/patch-acceptance.rst
index 389a455843865..07d5a5623e2a3 100644
--- a/Documentation/riscv/patch-acceptance.rst
+++ b/Documentation/riscv/patch-acceptance.rst
@@ -26,7 +26,7 @@ specifications from the RISC-V foundation this means "Frozen" or
 ECR.  (Developers may, of course, maintain their own Linux kernel trees
 that contain code for any draft extensions that they wish.)
 
-Additionally, the RISC-V specification allows implementors to create
+Additionally, the RISC-V specification allows implementers to create
 their own custom extensions.  These custom extensions aren't required
 to go through any review or ratification process by the RISC-V
 Foundation.  To avoid the maintenance complexity and potential
@@ -37,5 +37,5 @@ RISC-V extensions, we'll only consider patches for extensions that either:
 - Have been implemented in hardware that is widely available, per standard
   Linux practice.
 
-(Implementors, may, of course, maintain their own Linux kernel trees containing
+(Implementers, may, of course, maintain their own Linux kernel trees containing
 code for any custom extensions that they wish.)
-- 
GitLab


From a785736d7e587abbfd59df44e9a815d8d109c28c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 13 Dec 2022 09:56:02 -0500
Subject: [PATCH 3807/4122] tracing: Have trigger filter parsing errors show up
 in error_log

It is annoying that the filter parsing of triggers do not show up in the
error_log. Trying to figure out what is incorrect in the input is
difficult when it fails for a typo.

Have the errors of filter parsing show up in error_log as well.

Link: https://lore.kernel.org/linux-trace-kernel/20221213095602.083fa9fd@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Tom Zanussi <zanussi@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_trigger.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 918730d749325..19ce9d22bfd7c 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1067,7 +1067,14 @@ int set_trigger_filter(char *filter_str,
 
 	/* The filter is for the 'trigger' event, not the triggered event */
 	ret = create_event_filter(file->tr, file->event_call,
-				  filter_str, false, &filter);
+				  filter_str, true, &filter);
+
+	/* Only enabled set_str for error handling */
+	if (filter) {
+		kfree(filter->filter_string);
+		filter->filter_string = NULL;
+	}
+
 	/*
 	 * If create_event_filter() fails, filter still needs to be freed.
 	 * Which the calling code will do with data->filter.
-- 
GitLab


From fab89a09c86f948adfc7e20a7d608bd9f323bbe1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Mon, 12 Dec 2022 19:38:14 -0500
Subject: [PATCH 3808/4122] tracing: Remove pointer (asterisk) and brackets
 from cpumask_t field

To differentiate between long arrays and cpumasks, the __cpumask() field
was created. Part of the TRACE_EVENT() macros test if the type is signed
or not by using the is_signed_type() macro. The __cpumask() field used the
__dynamic_array() helper but because cpumask_t is a structure, it could
not be used in the is_signed_type() macro as that would fail to build, so
instead it passed in the pointer to cpumask_t.

Unfortunately, that creates in the format file:

  field:__data_loc cpumask_t *[] mask;    offset:36;      size:4; signed:0;

Which looks like an array of pointers to cpumask_t and not a cpumask_t
type, which is misleading to user space parsers.

Douglas Raillard pointed out that the "[]" are also misleading, as
cpumask_t is not an array.

Since cpumask() hasn't been created yet, and the parsers currently fail on
it (but will still produce the raw output), make it be:

  field:__data_loc cpumask_t mask;    offset:36;      size:4; signed:0;

Which is the correct type of the field.

Then the parsers can be updated to handle this.

Link: https://lore.kernel.org/lkml/6dda5e1d-9416-b55e-88f3-31d148bc925f@arm.com/
Link: https://lore.kernel.org/linux-trace-kernel/20221212193814.0e3f1e43@gandalf.local.home

Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Valentin Schneider <vschneid@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Fixes: 8230f27b1ccc ("tracing: Add __cpumask to denote a trace event field that is a cpumask_t")
Reported-by: Douglas Raillard <douglas.raillard@arm.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 include/trace/stages/stage4_event_fields.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
index f2990d22313cf..affd541fd25ea 100644
--- a/include/trace/stages/stage4_event_fields.h
+++ b/include/trace/stages/stage4_event_fields.h
@@ -47,7 +47,10 @@
 #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
 
 #undef __cpumask
-#define __cpumask(item) __dynamic_array(cpumask_t *, item, -1)
+#define __cpumask(item) {						\
+	.type = "__data_loc cpumask_t", .name = #item,			\
+	.size = 4, .align = 4,						\
+	.is_signed = 0, .filter_type = FILTER_OTHER },
 
 #undef __sockaddr
 #define __sockaddr(field, len) __dynamic_array(u8, field, len)
@@ -68,7 +71,10 @@
 #define __rel_bitmask(item, nr_bits) __rel_dynamic_array(unsigned long, item, -1)
 
 #undef __rel_cpumask
-#define __rel_cpumask(item) __rel_dynamic_array(cpumask_t *, item, -1)
+#define __rel_cpumask(item) {						\
+	.type = "__rel_loc cpumask_t", .name = #item,			\
+	.size = 4, .align = 4,						\
+	.is_signed = 0, .filter_type = FILTER_OTHER },
 
 #undef __rel_sockaddr
 #define __rel_sockaddr(field, len) __rel_dynamic_array(u8, field, len)
-- 
GitLab


From 508f28c67171e276356650f407dd87d42b6913ef Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:48 +0800
Subject: [PATCH 3809/4122] LoongArch: Consolidate __ex_table construction

Consolidate all the __ex_table constuction code with a _ASM_EXTABLE or
_asm_extable helper.

There should be no functional change as a result of this patch.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/asm-extable.h | 35 ++++++++++++++++++++++++
 arch/loongarch/include/asm/futex.h       | 13 ++++-----
 arch/loongarch/include/asm/uaccess.h     |  9 ++----
 arch/loongarch/kernel/fpu.S              |  5 ++--
 arch/loongarch/lib/clear_user.S          |  5 ++--
 arch/loongarch/lib/copy_user.S           |  5 ++--
 6 files changed, 49 insertions(+), 23 deletions(-)
 create mode 100644 arch/loongarch/include/asm/asm-extable.h

diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
new file mode 100644
index 0000000000000..4f615bf56727e
--- /dev/null
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_ASM_EXTABLE_H
+#define __ASM_ASM_EXTABLE_H
+
+#ifdef __ASSEMBLY__
+
+#define __ASM_EXTABLE_RAW(insn, fixup)			\
+	.pushsection	__ex_table, "a";		\
+	.balign		8;				\
+	.quad		(insn);				\
+	.quad		(fixup);			\
+	.popsection;
+
+	.macro		_asm_extable, insn, fixup
+	__ASM_EXTABLE_RAW(\insn, \fixup)
+	.endm
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/bits.h>
+#include <linux/stringify.h>
+
+#define __ASM_EXTABLE_RAW(insn, fixup)			\
+	".pushsection	__ex_table, \"a\"\n"		\
+	".balign	8\n"				\
+	".quad		((" insn "))\n"			\
+	".quad		((" fixup "))\n"		\
+	".popsection\n"
+
+#define _ASM_EXTABLE(insn, fixup)	\
+	__ASM_EXTABLE_RAW(#insn, #fixup)
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h
index feb6658c84ff8..bdcd1c6132998 100644
--- a/arch/loongarch/include/asm/futex.h
+++ b/arch/loongarch/include/asm/futex.h
@@ -7,6 +7,7 @@
 
 #include <linux/futex.h>
 #include <linux/uaccess.h>
+#include <asm/asm-extable.h>
 #include <asm/barrier.h>
 #include <asm/errno.h>
 
@@ -22,10 +23,8 @@
 	"4:	li.w	%0, %6				\n"	\
 	"	b	3b				\n"	\
 	"	.previous				\n"	\
-	"	.section __ex_table,\"a\"		\n"	\
-	"	"__UA_ADDR "\t1b, 4b			\n"	\
-	"	"__UA_ADDR "\t2b, 4b			\n"	\
-	"	.previous				\n"	\
+	_ASM_EXTABLE(1b, 4b)					\
+	_ASM_EXTABLE(2b, 4b)					\
 	: "=r" (ret), "=&r" (oldval),				\
 	  "=ZC" (*uaddr)					\
 	: "0" (0), "ZC" (*uaddr), "Jr" (oparg),			\
@@ -90,10 +89,8 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
 	"4:	li.d	%0, %6					\n"
 	"	b	3b					\n"
 	"	.previous					\n"
-	"	.section __ex_table,\"a\"			\n"
-	"	"__UA_ADDR "\t1b, 4b				\n"
-	"	"__UA_ADDR "\t2b, 4b				\n"
-	"	.previous					\n"
+	_ASM_EXTABLE(1b, 4b)
+	_ASM_EXTABLE(2b, 4b)
 	: "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
 	: "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
 	  "i" (-EFAULT)
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index a8ae2af4025ab..bf9a4e218ac0e 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -15,6 +15,7 @@
 #include <linux/string.h>
 #include <linux/extable.h>
 #include <asm/pgtable.h>
+#include <asm/asm-extable.h>
 #include <asm-generic/extable.h>
 #include <asm-generic/access_ok.h>
 
@@ -165,9 +166,7 @@ do {									\
 	"	move	%1, $zero				\n"	\
 	"	b	2b					\n"	\
 	"	.previous					\n"	\
-	"	.section __ex_table,\"a\"			\n"	\
-	"	"__UA_ADDR "\t1b, 3b				\n"	\
-	"	.previous					\n"	\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (__gu_err), "=r" (__gu_tmp)				\
 	: "m" (__m(ptr)), "i" (-EFAULT));				\
 									\
@@ -196,9 +195,7 @@ do {									\
 	"3:	li.w	%0, %3					\n"	\
 	"	b	2b					\n"	\
 	"	.previous					\n"	\
-	"	.section	__ex_table,\"a\"		\n"	\
-	"	" __UA_ADDR "	1b, 3b				\n"	\
-	"	.previous					\n"	\
+	_ASM_EXTABLE(1b, 3b)						\
 	: "+r" (__pu_err), "=m" (__m(ptr))				\
 	: "Jr" (__pu_val), "i" (-EFAULT));				\
 }
diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S
index 576b3370a296d..ccde94140c896 100644
--- a/arch/loongarch/kernel/fpu.S
+++ b/arch/loongarch/kernel/fpu.S
@@ -8,6 +8,7 @@
  */
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
 #include <asm/asm-offsets.h>
 #include <asm/errno.h>
 #include <asm/export.h>
@@ -21,9 +22,7 @@
 
 	.macro	EX insn, reg, src, offs
 .ex\@:	\insn	\reg, \src, \offs
-	.section __ex_table,"a"
-	PTR	.ex\@, fault
-	.previous
+	_asm_extable .ex\@, fault
 	.endm
 
 	.macro sc_save_fp base
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 16ba2b8dd68ad..7a066d6a41b80 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -5,6 +5,7 @@
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
 #include <asm/export.h>
 #include <asm/regdef.h>
 
@@ -15,9 +16,7 @@
 	jr	ra
 	.previous
 .endif
-	.section __ex_table, "a"
-	PTR	\from\()b, \to\()b
-	.previous
+	_asm_extable \from\()b, \to\()b
 .endm
 
 /*
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 97d20327a69ee..f8ace04586c26 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -5,6 +5,7 @@
 
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
 #include <asm/export.h>
 #include <asm/regdef.h>
 
@@ -15,9 +16,7 @@
 	jr	ra
 	.previous
 .endif
-	.section __ex_table, "a"
-	PTR	\from\()b, \to\()b
-	.previous
+	_asm_extable \from\()b, \to\()b
 .endm
 
 /*
-- 
GitLab


From 3d36f4298ba91fbdec6bc56aa7bb0663cba6ab0c Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:48 +0800
Subject: [PATCH 3810/4122] LoongArch: Switch to relative exception tables

Similar to other architectures such as arm64, x86, riscv and so on, use
offsets relative to the exception table entry values rather than their
absolute addresses for both the exception location and the fixup.

However, LoongArch label difference because it will actually produce two
relocations, a pair of R_LARCH_ADD32 and R_LARCH_SUB32. Take simple code
below for example:

$ cat test_ex_table.S
.section .text
1:
        nop
.section __ex_table,"a"
        .balign 4
        .long (1b - .)
.previous

$ loongarch64-unknown-linux-gnu-gcc -c test_ex_table.S
$ loongarch64-unknown-linux-gnu-readelf -Wr test_ex_table.o

Relocation section '.rela__ex_table' at offset 0x100 contains 2 entries:
    Offset            Info             Type         Symbol's Value   Symbol's Name + Addend
0000000000000000 0000000600000032 R_LARCH_ADD32    0000000000000000  .L1^B1 + 0
0000000000000000 0000000500000037 R_LARCH_SUB32    0000000000000000  L0^A + 0

The modpost will complain the R_LARCH_SUB32 relocation, so we need to
patch modpost.c to skip this relocation for .rela__ex_table section.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/asm-extable.h | 12 ++++-----
 arch/loongarch/include/asm/extable.h     | 26 +++++++++++++++++++
 arch/loongarch/include/asm/uaccess.h     |  2 +-
 arch/loongarch/mm/extable.c              | 32 ++++++++++++++++--------
 scripts/mod/modpost.c                    | 13 ++++++++++
 scripts/sorttable.c                      |  2 +-
 6 files changed, 69 insertions(+), 18 deletions(-)
 create mode 100644 arch/loongarch/include/asm/extable.h

diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index 4f615bf56727e..74f8bc75472ac 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -6,9 +6,9 @@
 
 #define __ASM_EXTABLE_RAW(insn, fixup)			\
 	.pushsection	__ex_table, "a";		\
-	.balign		8;				\
-	.quad		(insn);				\
-	.quad		(fixup);			\
+	.balign		4;				\
+	.long		((insn) - .);			\
+	.long		((fixup) - .);			\
 	.popsection;
 
 	.macro		_asm_extable, insn, fixup
@@ -22,9 +22,9 @@
 
 #define __ASM_EXTABLE_RAW(insn, fixup)			\
 	".pushsection	__ex_table, \"a\"\n"		\
-	".balign	8\n"				\
-	".quad		((" insn "))\n"			\
-	".quad		((" fixup "))\n"		\
+	".balign	4\n"				\
+	".long		((" insn ") - .)\n"		\
+	".long		((" fixup ") - .)\n"		\
 	".popsection\n"
 
 #define _ASM_EXTABLE(insn, fixup)	\
diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h
new file mode 100644
index 0000000000000..b571c89705d1e
--- /dev/null
+++ b/arch/loongarch/include/asm/extable.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_LOONGARCH_EXTABLE_H
+#define _ASM_LOONGARCH_EXTABLE_H
+
+/*
+ * The exception table consists of pairs of relative offsets: the first
+ * is the relative offset to an instruction that is allowed to fault,
+ * and the second is the relative offset at which the program should
+ * continue. No registers are modified, so it is entirely up to the
+ * continuation code to figure out what to do.
+ *
+ * All the routines below use bits of fixup code that are out of line
+ * with the main instruction path.  This means when everything is well,
+ * we don't even have to jump over them.  Further, they do not intrude
+ * on our cache or tlb entries.
+ */
+
+struct exception_table_entry {
+	int insn, fixup;
+};
+
+#define ARCH_HAS_RELATIVE_EXTABLE
+
+bool fixup_exception(struct pt_regs *regs);
+
+#endif
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index bf9a4e218ac0e..e33282e0bdef2 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -15,8 +15,8 @@
 #include <linux/string.h>
 #include <linux/extable.h>
 #include <asm/pgtable.h>
+#include <asm/extable.h>
 #include <asm/asm-extable.h>
-#include <asm-generic/extable.h>
 #include <asm-generic/access_ok.h>
 
 extern u64 __ua_limit;
diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c
index bc20988f2b87c..08a9a7d6357a7 100644
--- a/arch/loongarch/mm/extable.c
+++ b/arch/loongarch/mm/extable.c
@@ -3,20 +3,32 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 #include <linux/extable.h>
-#include <linux/spinlock.h>
-#include <asm/branch.h>
 #include <linux/uaccess.h>
+#include <asm/asm-extable.h>
+#include <asm/branch.h>
+
+static inline unsigned long
+get_ex_fixup(const struct exception_table_entry *ex)
+{
+	return ((unsigned long)&ex->fixup + ex->fixup);
+}
 
-int fixup_exception(struct pt_regs *regs)
+static bool ex_handler_fixup(const struct exception_table_entry *ex,
+			     struct pt_regs *regs)
 {
-	const struct exception_table_entry *fixup;
+	regs->csr_era = get_ex_fixup(ex);
 
-	fixup = search_exception_tables(exception_era(regs));
-	if (fixup) {
-		regs->csr_era = fixup->fixup;
+	return true;
+}
+
+
+bool fixup_exception(struct pt_regs *regs)
+{
+	const struct exception_table_entry *ex;
 
-		return 1;
-	}
+	ex = search_exception_tables(exception_era(regs));
+	if (!ex)
+		return false;
 
-	return 0;
+	return ex_handler_fixup(ex, regs);
 }
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 2c80da0220c32..9321c0a05ffdc 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1523,6 +1523,14 @@ static int addend_mips_rel(struct elf_info *elf, Elf_Shdr *sechdr, Elf_Rela *r)
 #define R_RISCV_SUB32		39
 #endif
 
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH		258
+#endif
+
+#ifndef R_LARCH_SUB32
+#define R_LARCH_SUB32		55
+#endif
+
 static void section_rela(const char *modname, struct elf_info *elf,
 			 Elf_Shdr *sechdr)
 {
@@ -1564,6 +1572,11 @@ static void section_rela(const char *modname, struct elf_info *elf,
 			    ELF_R_TYPE(r.r_info) == R_RISCV_SUB32)
 				continue;
 			break;
+		case EM_LOONGARCH:
+			if (!strcmp("__ex_table", fromsec) &&
+			    ELF_R_TYPE(r.r_info) == R_LARCH_SUB32)
+				continue;
+			break;
 		}
 		sym = elf->symtab_start + r_sym;
 		/* Skip special sections */
diff --git a/scripts/sorttable.c b/scripts/sorttable.c
index fba40e99f3541..0f2beda804789 100644
--- a/scripts/sorttable.c
+++ b/scripts/sorttable.c
@@ -312,12 +312,12 @@ static int do_file(char const *const fname, void *addr)
 	case EM_PARISC:
 	case EM_PPC:
 	case EM_PPC64:
+	case EM_LOONGARCH:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:
 	case EM_ARCV2:
 	case EM_ARM:
-	case EM_LOONGARCH:
 	case EM_MICROBLAZE:
 	case EM_MIPS:
 	case EM_XTENSA:
-- 
GitLab


From 26bc82441250f2e01621f5b26606a4f6926ee3ad Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3811/4122] LoongArch: extable: Add `type` and `data` fields

This is a LoongArch port of commit d6e2cc564775 ("arm64: extable: add
`type` and `data` fields").

Subsequent patches will add specialized handlers for fixups, in addition
to the simple PC fixup we have today. In preparation, this patch adds a
new `type` field to struct exception_table_entry, and uses this to
distinguish the fixup and other cases. A `data` field is also added so
that subsequent patches can associate data specific to each exception
site (e.g. register numbers).

Handlers are named ex_handler_*() for consistency, following the example
of x86. At the same time, get_ex_fixup() is split out into a helper so
that it can be used by other ex_handler_*() functions in the subsequent
patches.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/asm-extable.h | 15 +++++++++++----
 arch/loongarch/include/asm/extable.h     | 11 +++++++++++
 arch/loongarch/kernel/vmlinux.lds.S      |  3 +--
 arch/loongarch/mm/extable.c              |  7 ++++++-
 scripts/sorttable.c                      |  2 +-
 5 files changed, 30 insertions(+), 8 deletions(-)

diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index 74f8bc75472ac..634bd770e3c40 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -2,17 +2,22 @@
 #ifndef __ASM_ASM_EXTABLE_H
 #define __ASM_ASM_EXTABLE_H
 
+#define EX_TYPE_NONE			0
+#define EX_TYPE_FIXUP			1
+
 #ifdef __ASSEMBLY__
 
-#define __ASM_EXTABLE_RAW(insn, fixup)			\
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data)	\
 	.pushsection	__ex_table, "a";		\
 	.balign		4;				\
 	.long		((insn) - .);			\
 	.long		((fixup) - .);			\
+	.short		(type);				\
+	.short		(data);				\
 	.popsection;
 
 	.macro		_asm_extable, insn, fixup
-	__ASM_EXTABLE_RAW(\insn, \fixup)
+	__ASM_EXTABLE_RAW(\insn, \fixup, EX_TYPE_FIXUP, 0)
 	.endm
 
 #else /* __ASSEMBLY__ */
@@ -20,15 +25,17 @@
 #include <linux/bits.h>
 #include <linux/stringify.h>
 
-#define __ASM_EXTABLE_RAW(insn, fixup)			\
+#define __ASM_EXTABLE_RAW(insn, fixup, type, data)	\
 	".pushsection	__ex_table, \"a\"\n"		\
 	".balign	4\n"				\
 	".long		((" insn ") - .)\n"		\
 	".long		((" fixup ") - .)\n"		\
+	".short		(" type ")\n"			\
+	".short		(" data ")\n"			\
 	".popsection\n"
 
 #define _ASM_EXTABLE(insn, fixup)	\
-	__ASM_EXTABLE_RAW(#insn, #fixup)
+	__ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h
index b571c89705d1e..92612b4364a1d 100644
--- a/arch/loongarch/include/asm/extable.h
+++ b/arch/loongarch/include/asm/extable.h
@@ -17,10 +17,21 @@
 
 struct exception_table_entry {
 	int insn, fixup;
+	short type, data;
 };
 
 #define ARCH_HAS_RELATIVE_EXTABLE
 
+#define swap_ex_entry_fixup(a, b, tmp, delta)		\
+do {							\
+	(a)->fixup = (b)->fixup + (delta);		\
+	(b)->fixup = (tmp).fixup - (delta);		\
+	(a)->type = (b)->type;				\
+	(b)->type = (tmp).type;				\
+	(a)->data = (b)->data;				\
+	(b)->data = (tmp).data;				\
+} while (0)
+
 bool fixup_exception(struct pt_regs *regs);
 
 #endif
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index b3309a5e695b2..efecda0c23616 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -4,6 +4,7 @@
 #include <asm/thread_info.h>
 
 #define PAGE_SIZE _PAGE_SIZE
+#define RO_EXCEPTION_TABLE_ALIGN	4
 
 /*
  * Put .bss..swapper_pg_dir as the first thing in .bss. This will
@@ -53,8 +54,6 @@ SECTIONS
 	. = ALIGN(PECOFF_SEGMENT_ALIGN);
 	_etext = .;
 
-	EXCEPTION_TABLE(16)
-
 	.got : ALIGN(16) { *(.got) }
 	.plt : ALIGN(16) { *(.plt) }
 	.got.plt : ALIGN(16) { *(.got.plt) }
diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c
index 08a9a7d6357a7..fd2395221cffb 100644
--- a/arch/loongarch/mm/extable.c
+++ b/arch/loongarch/mm/extable.c
@@ -30,5 +30,10 @@ bool fixup_exception(struct pt_regs *regs)
 	if (!ex)
 		return false;
 
-	return ex_handler_fixup(ex, regs);
+	switch (ex->type) {
+	case EX_TYPE_FIXUP:
+		return ex_handler_fixup(ex, regs);
+	}
+
+	BUG();
 }
diff --git a/scripts/sorttable.c b/scripts/sorttable.c
index 0f2beda804789..83cdb843d92f0 100644
--- a/scripts/sorttable.c
+++ b/scripts/sorttable.c
@@ -304,6 +304,7 @@ static int do_file(char const *const fname, void *addr)
 	switch (r2(&ehdr->e_machine)) {
 	case EM_386:
 	case EM_AARCH64:
+	case EM_LOONGARCH:
 	case EM_RISCV:
 	case EM_S390:
 	case EM_X86_64:
@@ -312,7 +313,6 @@ static int do_file(char const *const fname, void *addr)
 	case EM_PARISC:
 	case EM_PPC:
 	case EM_PPC64:
-	case EM_LOONGARCH:
 		custom_sort = sort_relative_table;
 		break;
 	case EM_ARCOMPACT:
-- 
GitLab


From 672999cfae3e830a64c4996362a26934fd555ff9 Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3812/4122] LoongArch: extable: Add a dedicated uaccess handler

Inspired by commit 2e77a62cb3a6("arm64: extable: add a dedicated uaccess
handler"), do similar to LoongArch to add a dedicated uaccess exception
handler to update registers in exception context and subsequently return
back into the function which faulted, so we remove the need for fixups
specialized to each faulting instruction.

Add gpr-num.h here because we need to map the same GPR names to integer
constants, so that we can use this to build meta-data for the exception
fixups.

The compiler treats gpr 0 as zero rather than $r0, so set it separately
to .L__gpr_num_zero, otherwise the following assembly error will occurs:

{standard input}: Assembler messages:
{standard input}:1074: Error: invalid operands (*UND* and *ABS* sections) for `<<'
{standard input}:1160: Error: invalid operands (*UND* and *ABS* sections) for `<<'
make[1]: *** [scripts/Makefile.build:249: fs/fcntl.o] Error 1

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/asm-extable.h | 22 ++++++++++++++++++++++
 arch/loongarch/include/asm/futex.h       | 22 ++++++----------------
 arch/loongarch/include/asm/gpr-num.h     | 22 ++++++++++++++++++++++
 arch/loongarch/include/asm/uaccess.h     | 17 ++++-------------
 arch/loongarch/mm/extable.c              | 22 ++++++++++++++++++++++
 5 files changed, 76 insertions(+), 29 deletions(-)
 create mode 100644 arch/loongarch/include/asm/gpr-num.h

diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index 634bd770e3c40..f5502cb50c6ef 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -4,6 +4,7 @@
 
 #define EX_TYPE_NONE			0
 #define EX_TYPE_FIXUP			1
+#define EX_TYPE_UACCESS_ERR_ZERO	2
 
 #ifdef __ASSEMBLY__
 
@@ -24,6 +25,7 @@
 
 #include <linux/bits.h>
 #include <linux/stringify.h>
+#include <asm/gpr-num.h>
 
 #define __ASM_EXTABLE_RAW(insn, fixup, type, data)	\
 	".pushsection	__ex_table, \"a\"\n"		\
@@ -37,6 +39,26 @@
 #define _ASM_EXTABLE(insn, fixup)	\
 	__ASM_EXTABLE_RAW(#insn, #fixup, __stringify(EX_TYPE_FIXUP), "0")
 
+#define EX_DATA_REG_ERR_SHIFT	0
+#define EX_DATA_REG_ERR		GENMASK(4, 0)
+#define EX_DATA_REG_ZERO_SHIFT	5
+#define EX_DATA_REG_ZERO	GENMASK(9, 5)
+
+#define EX_DATA_REG(reg, gpr)						\
+	"((.L__gpr_num_" #gpr ") << " __stringify(EX_DATA_REG_##reg##_SHIFT) ")"
+
+#define _ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)		\
+	__DEFINE_ASM_GPR_NUMS						\
+	__ASM_EXTABLE_RAW(#insn, #fixup, 				\
+			  __stringify(EX_TYPE_UACCESS_ERR_ZERO),	\
+			  "("						\
+			    EX_DATA_REG(ERR, err) " | "			\
+			    EX_DATA_REG(ZERO, zero)			\
+			  ")")
+
+#define _ASM_EXTABLE_UACCESS_ERR(insn, fixup, err)			\
+	_ASM_EXTABLE_UACCESS_ERR_ZERO(insn, fixup, err, zero)
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __ASM_ASM_EXTABLE_H */
diff --git a/arch/loongarch/include/asm/futex.h b/arch/loongarch/include/asm/futex.h
index bdcd1c6132998..042ca4448e4d3 100644
--- a/arch/loongarch/include/asm/futex.h
+++ b/arch/loongarch/include/asm/futex.h
@@ -19,16 +19,11 @@
 	"2:	sc.w	$t0, %2				\n"	\
 	"	beqz	$t0, 1b				\n"	\
 	"3:						\n"	\
-	"	.section .fixup,\"ax\"			\n"	\
-	"4:	li.w	%0, %6				\n"	\
-	"	b	3b				\n"	\
-	"	.previous				\n"	\
-	_ASM_EXTABLE(1b, 4b)					\
-	_ASM_EXTABLE(2b, 4b)					\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0)			\
+	_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0)			\
 	: "=r" (ret), "=&r" (oldval),				\
 	  "=ZC" (*uaddr)					\
-	: "0" (0), "ZC" (*uaddr), "Jr" (oparg),			\
-	  "i" (-EFAULT)						\
+	: "0" (0), "ZC" (*uaddr), "Jr" (oparg)			\
 	: "memory", "t0");					\
 }
 
@@ -85,15 +80,10 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 oldval, u32 newv
 	"	beqz	$t0, 1b					\n"
 	"3:							\n"
 	__WEAK_LLSC_MB
-	"	.section .fixup,\"ax\"				\n"
-	"4:	li.d	%0, %6					\n"
-	"	b	3b					\n"
-	"	.previous					\n"
-	_ASM_EXTABLE(1b, 4b)
-	_ASM_EXTABLE(2b, 4b)
+	_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %0)
+	_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %0)
 	: "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
-	: "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
-	  "i" (-EFAULT)
+	: "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval)
 	: "memory", "t0");
 
 	*uval = val;
diff --git a/arch/loongarch/include/asm/gpr-num.h b/arch/loongarch/include/asm/gpr-num.h
new file mode 100644
index 0000000000000..e0941af20c7e7
--- /dev/null
+++ b/arch/loongarch/include/asm/gpr-num.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef __ASM_GPR_NUM_H
+#define __ASM_GPR_NUM_H
+
+#ifdef __ASSEMBLY__
+
+	.equ	.L__gpr_num_zero, 0
+	.irp	num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31
+	.equ	.L__gpr_num_$r\num, \num
+	.endr
+
+#else /* __ASSEMBLY__ */
+
+#define __DEFINE_ASM_GPR_NUMS					\
+"	.equ	.L__gpr_num_zero, 0\n"				\
+"	.irp	num,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31\n" \
+"	.equ	.L__gpr_num_$r\\num, \\num\n"			\
+"	.endr\n"						\
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_GPR_NUM_H */
diff --git a/arch/loongarch/include/asm/uaccess.h b/arch/loongarch/include/asm/uaccess.h
index e33282e0bdef2..255899d4a7c36 100644
--- a/arch/loongarch/include/asm/uaccess.h
+++ b/arch/loongarch/include/asm/uaccess.h
@@ -161,14 +161,9 @@ do {									\
 	__asm__ __volatile__(						\
 	"1:	" insn "	%1, %2				\n"	\
 	"2:							\n"	\
-	"	.section .fixup,\"ax\"				\n"	\
-	"3:	li.w	%0, %3					\n"	\
-	"	move	%1, $zero				\n"	\
-	"	b	2b					\n"	\
-	"	.previous					\n"	\
-	_ASM_EXTABLE(1b, 3b)						\
+	_ASM_EXTABLE_UACCESS_ERR_ZERO(1b, 2b, %0, %1)			\
 	: "+r" (__gu_err), "=r" (__gu_tmp)				\
-	: "m" (__m(ptr)), "i" (-EFAULT));				\
+	: "m" (__m(ptr)));						\
 									\
 	(val) = (__typeof__(*(ptr))) __gu_tmp;				\
 }
@@ -191,13 +186,9 @@ do {									\
 	__asm__ __volatile__(						\
 	"1:	" insn "	%z2, %1		# __put_user_asm\n"	\
 	"2:							\n"	\
-	"	.section	.fixup,\"ax\"			\n"	\
-	"3:	li.w	%0, %3					\n"	\
-	"	b	2b					\n"	\
-	"	.previous					\n"	\
-	_ASM_EXTABLE(1b, 3b)						\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %0)				\
 	: "+r" (__pu_err), "=m" (__m(ptr))				\
-	: "Jr" (__pu_val), "i" (-EFAULT));				\
+	: "Jr" (__pu_val));						\
 }
 
 #define __get_kernel_nofault(dst, src, type, err_label)			\
diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c
index fd2395221cffb..9b0cfd8989408 100644
--- a/arch/loongarch/mm/extable.c
+++ b/arch/loongarch/mm/extable.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/bitfield.h>
 #include <linux/extable.h>
 #include <linux/uaccess.h>
 #include <asm/asm-extable.h>
@@ -13,6 +14,13 @@ get_ex_fixup(const struct exception_table_entry *ex)
 	return ((unsigned long)&ex->fixup + ex->fixup);
 }
 
+static inline void regs_set_gpr(struct pt_regs *regs,
+				unsigned int offset, unsigned long val)
+{
+	if (offset && offset <= MAX_REG_OFFSET)
+		*(unsigned long *)((unsigned long)regs + offset) = val;
+}
+
 static bool ex_handler_fixup(const struct exception_table_entry *ex,
 			     struct pt_regs *regs)
 {
@@ -21,6 +29,18 @@ static bool ex_handler_fixup(const struct exception_table_entry *ex,
 	return true;
 }
 
+static bool ex_handler_uaccess_err_zero(const struct exception_table_entry *ex,
+					struct pt_regs *regs)
+{
+	int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
+	int reg_zero = FIELD_GET(EX_DATA_REG_ZERO, ex->data);
+
+	regs_set_gpr(regs, reg_err * sizeof(unsigned long), -EFAULT);
+	regs_set_gpr(regs, reg_zero * sizeof(unsigned long), 0);
+	regs->csr_era = get_ex_fixup(ex);
+
+	return true;
+}
 
 bool fixup_exception(struct pt_regs *regs)
 {
@@ -33,6 +53,8 @@ bool fixup_exception(struct pt_regs *regs)
 	switch (ex->type) {
 	case EX_TYPE_FIXUP:
 		return ex_handler_fixup(ex, regs);
+	case EX_TYPE_UACCESS_ERR_ZERO:
+		return ex_handler_uaccess_err_zero(ex, regs);
 	}
 
 	BUG();
-- 
GitLab


From 912bcfaf36771a2bf7a83799ce5454850d1c3f40 Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3813/4122] LoongArch: Remove the .fixup section usage

Use the `.L_xxx` label to improve fixup code and then remove the .fixup
section usage.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/lib/clear_user.S | 14 +++++---------
 arch/loongarch/lib/copy_user.S  | 16 ++++++----------
 2 files changed, 11 insertions(+), 19 deletions(-)

diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index 7a066d6a41b80..d5c9e44ac8c4f 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -9,15 +9,11 @@
 #include <asm/export.h>
 #include <asm/regdef.h>
 
-.macro fixup_ex from, to, offset, fix
-.if \fix
-	.section .fixup, "ax"
-\to:	addi.d	a0, a1, \offset
+.irp to, 0
+.L_fixup_handle_\to\():
+	addi.d	a0, a1, (\to) * (-8)
 	jr	ra
-	.previous
-.endif
-	_asm_extable \from\()b, \to\()b
-.endm
+.endr
 
 /*
  * unsigned long __clear_user(void *addr, size_t size)
@@ -36,7 +32,7 @@ SYM_FUNC_START(__clear_user)
 2:	move	a0, a1
 	jr	ra
 
-	fixup_ex 1, 3, 0, 1
+	_asm_extable 1b, .L_fixup_handle_0
 SYM_FUNC_END(__clear_user)
 
 EXPORT_SYMBOL(__clear_user)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index f8ace04586c26..61933d964da03 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -9,15 +9,11 @@
 #include <asm/export.h>
 #include <asm/regdef.h>
 
-.macro fixup_ex from, to, offset, fix
-.if \fix
-	.section .fixup, "ax"
-\to:	addi.d	a0, a2, \offset
+.irp to, 0
+.L_fixup_handle_\to\():
+	addi.d	a0, a2, (\to) * (-8)
 	jr	ra
-	.previous
-.endif
-	_asm_extable \from\()b, \to\()b
-.endm
+.endr
 
 /*
  * unsigned long __copy_user(void *to, const void *from, size_t n)
@@ -39,8 +35,8 @@ SYM_FUNC_START(__copy_user)
 3:	move	a0, a2
 	jr	ra
 
-	fixup_ex 1, 4, 0, 1
-	fixup_ex 2, 4, 0, 0
+	_asm_extable 1b, .L_fixup_handle_0
+	_asm_extable 2b, .L_fixup_handle_0
 SYM_FUNC_END(__copy_user)
 
 EXPORT_SYMBOL(__copy_user)
-- 
GitLab


From dbcd7f5fafea64dbe588c4ec18bc309fde5d1e1c Mon Sep 17 00:00:00 2001
From: Youling Tang <tangyouling@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3814/4122] LoongArch: BPF: Add BPF exception tables

Inspired by commit 800834285361("bpf, arm64: Add BPF exception tables"),
do similar to LoongArch to add BPF exception tables.

When a tracing BPF program attempts to read memory without using the
bpf_probe_read() helper, the verifier marks the load instruction with
the BPF_PROBE_MEM flag. Since the LoongArch JIT does not currently
recognize this flag it falls back to the interpreter.

Add support for BPF_PROBE_MEM, by appending an exception table to the
BPF program. If the load instruction causes a data abort, the fixup
infrastructure finds the exception table and fixes up the fault, by
clearing the destination register and jumping over the faulting
instruction.

To keep the compact exception table entry format, inspect the pc in
fixup_exception(). A more generic solution would add a "handler" field
to the table entry, like on x86, s390 and arm64, etc.

Signed-off-by: Youling Tang <tangyouling@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/asm-extable.h |  1 +
 arch/loongarch/include/asm/extable.h     | 10 +++
 arch/loongarch/mm/extable.c              |  2 +
 arch/loongarch/net/bpf_jit.c             | 86 ++++++++++++++++++++++--
 arch/loongarch/net/bpf_jit.h             |  2 +
 5 files changed, 96 insertions(+), 5 deletions(-)

diff --git a/arch/loongarch/include/asm/asm-extable.h b/arch/loongarch/include/asm/asm-extable.h
index f5502cb50c6ef..df05005f2b80a 100644
--- a/arch/loongarch/include/asm/asm-extable.h
+++ b/arch/loongarch/include/asm/asm-extable.h
@@ -5,6 +5,7 @@
 #define EX_TYPE_NONE			0
 #define EX_TYPE_FIXUP			1
 #define EX_TYPE_UACCESS_ERR_ZERO	2
+#define EX_TYPE_BPF			3
 
 #ifdef __ASSEMBLY__
 
diff --git a/arch/loongarch/include/asm/extable.h b/arch/loongarch/include/asm/extable.h
index 92612b4364a1d..5abf29f1bc919 100644
--- a/arch/loongarch/include/asm/extable.h
+++ b/arch/loongarch/include/asm/extable.h
@@ -32,6 +32,16 @@ do {							\
 	(b)->data = (tmp).data;				\
 } while (0)
 
+#ifdef CONFIG_BPF_JIT
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs);
+#else
+static inline
+bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs)
+{
+	return false;
+}
+#endif /* !CONFIG_BPF_JIT */
+
 bool fixup_exception(struct pt_regs *regs);
 
 #endif
diff --git a/arch/loongarch/mm/extable.c b/arch/loongarch/mm/extable.c
index 9b0cfd8989408..9ab69872dcffe 100644
--- a/arch/loongarch/mm/extable.c
+++ b/arch/loongarch/mm/extable.c
@@ -55,6 +55,8 @@ bool fixup_exception(struct pt_regs *regs)
 		return ex_handler_fixup(ex, regs);
 	case EX_TYPE_UACCESS_ERR_ZERO:
 		return ex_handler_uaccess_err_zero(ex, regs);
+	case EX_TYPE_BPF:
+		return ex_handler_bpf(ex, regs);
 	}
 
 	BUG();
diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c
index bdcd0c7719a9e..c4b1947ebf768 100644
--- a/arch/loongarch/net/bpf_jit.c
+++ b/arch/loongarch/net/bpf_jit.c
@@ -387,6 +387,65 @@ static bool is_signed_bpf_cond(u8 cond)
 	       cond == BPF_JSGE || cond == BPF_JSLE;
 }
 
+#define BPF_FIXUP_REG_MASK	GENMASK(31, 27)
+#define BPF_FIXUP_OFFSET_MASK	GENMASK(26, 0)
+
+bool ex_handler_bpf(const struct exception_table_entry *ex,
+		    struct pt_regs *regs)
+{
+	int dst_reg = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup);
+	off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup);
+
+	regs->regs[dst_reg] = 0;
+	regs->csr_era = (unsigned long)&ex->fixup - offset;
+
+	return true;
+}
+
+/* For accesses to BTF pointers, add an entry to the exception table */
+static int add_exception_handler(const struct bpf_insn *insn,
+				 struct jit_ctx *ctx,
+				 int dst_reg)
+{
+	unsigned long pc;
+	off_t offset;
+	struct exception_table_entry *ex;
+
+	if (!ctx->image || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM)
+		return 0;
+
+	if (WARN_ON_ONCE(ctx->num_exentries >= ctx->prog->aux->num_exentries))
+		return -EINVAL;
+
+	ex = &ctx->prog->aux->extable[ctx->num_exentries];
+	pc = (unsigned long)&ctx->image[ctx->idx - 1];
+
+	offset = pc - (long)&ex->insn;
+	if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
+		return -ERANGE;
+
+	ex->insn = offset;
+
+	/*
+	 * Since the extable follows the program, the fixup offset is always
+	 * negative and limited to BPF_JIT_REGION_SIZE. Store a positive value
+	 * to keep things simple, and put the destination register in the upper
+	 * bits. We don't need to worry about buildtime or runtime sort
+	 * modifying the upper bits because the table is already sorted, and
+	 * isn't part of the main exception table.
+	 */
+	offset = (long)&ex->fixup - (pc + LOONGARCH_INSN_SIZE);
+	if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
+		return -ERANGE;
+
+	ex->type = EX_TYPE_BPF;
+	ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) | FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
+
+	ctx->num_exentries++;
+
+	return 0;
+}
+
 static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool extra_pass)
 {
 	u8 tm = -1;
@@ -816,6 +875,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 	case BPF_LDX | BPF_MEM | BPF_H:
 	case BPF_LDX | BPF_MEM | BPF_W:
 	case BPF_LDX | BPF_MEM | BPF_DW:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
+	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
 		switch (BPF_SIZE(code)) {
 		case BPF_B:
 			if (is_signed_imm12(off)) {
@@ -854,6 +917,10 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext
 			}
 			break;
 		}
+
+		ret = add_exception_handler(insn, ctx, dst);
+		if (ret)
+			return ret;
 		break;
 
 	/* *(size *)(dst + off) = imm */
@@ -1018,6 +1085,9 @@ static int validate_code(struct jit_ctx *ctx)
 			return -1;
 	}
 
+	if (WARN_ON_ONCE(ctx->num_exentries != ctx->prog->aux->num_exentries))
+		return -1;
+
 	return 0;
 }
 
@@ -1025,7 +1095,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 {
 	bool tmp_blinded = false, extra_pass = false;
 	u8 *image_ptr;
-	int image_size;
+	int image_size, prog_size, extable_size;
 	struct jit_ctx ctx;
 	struct jit_data *jit_data;
 	struct bpf_binary_header *header;
@@ -1066,7 +1136,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 		image_ptr = jit_data->image;
 		header = jit_data->header;
 		extra_pass = true;
-		image_size = sizeof(u32) * ctx.idx;
+		prog_size = sizeof(u32) * ctx.idx;
 		goto skip_init_ctx;
 	}
 
@@ -1088,12 +1158,15 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 	ctx.epilogue_offset = ctx.idx;
 	build_epilogue(&ctx);
 
+	extable_size = prog->aux->num_exentries * sizeof(struct exception_table_entry);
+
 	/* Now we know the actual image size.
 	 * As each LoongArch instruction is of length 32bit,
 	 * we are translating number of JITed intructions into
 	 * the size required to store these JITed code.
 	 */
-	image_size = sizeof(u32) * ctx.idx;
+	prog_size = sizeof(u32) * ctx.idx;
+	image_size = prog_size + extable_size;
 	/* Now we know the size of the structure to make */
 	header = bpf_jit_binary_alloc(image_size, &image_ptr,
 				      sizeof(u32), jit_fill_hole);
@@ -1104,9 +1177,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
 
 	/* 2. Now, the actual pass to generate final JIT code */
 	ctx.image = (union loongarch_instruction *)image_ptr;
+	if (extable_size)
+		prog->aux->extable = (void *)image_ptr + prog_size;
 
 skip_init_ctx:
 	ctx.idx = 0;
+	ctx.num_exentries = 0;
 
 	build_prologue(&ctx);
 	if (build_body(&ctx, extra_pass)) {
@@ -1125,7 +1201,7 @@ skip_init_ctx:
 
 	/* And we're done */
 	if (bpf_jit_enable > 1)
-		bpf_jit_dump(prog->len, image_size, 2, ctx.image);
+		bpf_jit_dump(prog->len, prog_size, 2, ctx.image);
 
 	/* Update the icache */
 	flush_icache_range((unsigned long)header, (unsigned long)(ctx.image + ctx.idx));
@@ -1147,7 +1223,7 @@ skip_init_ctx:
 		jit_data->header = header;
 	}
 	prog->jited = 1;
-	prog->jited_len = image_size;
+	prog->jited_len = prog_size;
 	prog->bpf_func = (void *)ctx.image;
 
 	if (!prog->is_func || extra_pass) {
diff --git a/arch/loongarch/net/bpf_jit.h b/arch/loongarch/net/bpf_jit.h
index e665ddb0aeb85..ca708024fdd3e 100644
--- a/arch/loongarch/net/bpf_jit.h
+++ b/arch/loongarch/net/bpf_jit.h
@@ -4,6 +4,7 @@
  *
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
+#include <linux/bitfield.h>
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <asm/cacheflush.h>
@@ -15,6 +16,7 @@ struct jit_ctx {
 	unsigned int flags;
 	unsigned int epilogue_offset;
 	u32 *offset;
+	int num_exentries;
 	union loongarch_instruction *image;
 	u32 stack_size;
 };
-- 
GitLab


From 61a6fccc0bd2e8030b2672a52ef3f6706b2b2ee4 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3815/4122] LoongArch: Add unaligned access support

Loongson-2 series (Loongson-2K500, Loongson-2K1000) don't support
unaligned access in hardware, while Loongson-3 series (Loongson-3A5000,
Loongson-3C5000) are configurable whether support unaligned access in
hardware. This patch add unaligned access emulation for those LoongArch
processors without hardware support.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 Documentation/admin-guide/sysctl/kernel.rst |   8 +-
 arch/loongarch/Kconfig                      |   2 +
 arch/loongarch/include/asm/inst.h           |  14 +
 arch/loongarch/include/asm/thread_info.h    |   2 +-
 arch/loongarch/kernel/Makefile              |   3 +-
 arch/loongarch/kernel/traps.c               |  27 ++
 arch/loongarch/kernel/unaligned.c           | 499 ++++++++++++++++++++
 arch/loongarch/lib/Makefile                 |   2 +-
 arch/loongarch/lib/unaligned.S              |  84 ++++
 9 files changed, 634 insertions(+), 7 deletions(-)
 create mode 100644 arch/loongarch/kernel/unaligned.c
 create mode 100644 arch/loongarch/lib/unaligned.S

diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst
index 98d1b198b2b4c..f2b802cd6208b 100644
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -433,8 +433,8 @@ ignore-unaligned-usertrap
 
 On architectures where unaligned accesses cause traps, and where this
 feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``;
-currently, ``arc`` and ``ia64``), controls whether all unaligned traps
-are logged.
+currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all
+unaligned traps are logged.
 
 = =============================================================
 0 Log all unaligned accesses.
@@ -1457,8 +1457,8 @@ unaligned-trap
 
 On architectures where unaligned accesses cause traps, and where this
 feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW``; currently,
-``arc`` and ``parisc``), controls whether unaligned traps are caught
-and emulated (instead of failing).
+``arc``, ``parisc`` and ``loongarch``), controls whether unaligned traps
+are caught and emulated (instead of failing).
 
 = ========================================================
 0 Do not emulate unaligned accesses.
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 903096bd87f88..0daf6263655b3 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -121,6 +121,8 @@ config LOONGARCH
 	select RTC_LIB
 	select SMP
 	select SPARSE_IRQ
+	select SYSCTL_ARCH_UNALIGN_ALLOW
+	select SYSCTL_ARCH_UNALIGN_NO_WARN
 	select SYSCTL_EXCEPTION_TRACE
 	select SWIOTLB
 	select TRACE_IRQFLAGS_SUPPORT
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index fce1843ceebb3..889d6c9fc2b6a 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -76,6 +76,10 @@ enum reg2i12_op {
 	ldbu_op		= 0xa8,
 	ldhu_op		= 0xa9,
 	ldwu_op		= 0xaa,
+	flds_op		= 0xac,
+	fsts_op		= 0xad,
+	fldd_op		= 0xae,
+	fstd_op		= 0xaf,
 };
 
 enum reg2i14_op {
@@ -146,6 +150,10 @@ enum reg3_op {
 	ldxbu_op	= 0x7040,
 	ldxhu_op	= 0x7048,
 	ldxwu_op	= 0x7050,
+	fldxs_op	= 0x7060,
+	fldxd_op	= 0x7068,
+	fstxs_op	= 0x7070,
+	fstxd_op	= 0x7078,
 	amswapw_op	= 0x70c0,
 	amswapd_op	= 0x70c1,
 	amaddw_op	= 0x70c2,
@@ -566,4 +574,10 @@ static inline void emit_##NAME(union loongarch_instruction *insn,	\
 
 DEF_EMIT_REG3SA2_FORMAT(alsld, alsld_op)
 
+struct pt_regs;
+
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc);
+unsigned long unaligned_read(void __user *addr, void *value, unsigned long n, bool sign);
+unsigned long unaligned_write(void __user *addr, unsigned long value, unsigned long n);
+
 #endif /* _ASM_INST_H */
diff --git a/arch/loongarch/include/asm/thread_info.h b/arch/loongarch/include/asm/thread_info.h
index b7dd9f19a5a9c..1a3354ca056e9 100644
--- a/arch/loongarch/include/asm/thread_info.h
+++ b/arch/loongarch/include/asm/thread_info.h
@@ -38,7 +38,7 @@ struct thread_info {
 #define INIT_THREAD_INFO(tsk)			\
 {						\
 	.task		= &tsk,			\
-	.flags		= 0,			\
+	.flags		= _TIF_FIXADE,		\
 	.cpu		= 0,			\
 	.preempt_count	= INIT_PREEMPT_COUNT,	\
 }
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 42be564278fa1..2ad2555b53eaf 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -7,7 +7,8 @@ extra-y		:= vmlinux.lds
 
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 		   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
-		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o
+		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
+		   unaligned.o
 
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_EFI) 		+= efi.o
diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c
index 1a4dce84ebc60..7ea62faeeadb5 100644
--- a/arch/loongarch/kernel/traps.c
+++ b/arch/loongarch/kernel/traps.c
@@ -368,13 +368,40 @@ asmlinkage void noinstr do_ade(struct pt_regs *regs)
 	irqentry_exit(regs, state);
 }
 
+/* sysctl hooks */
+int unaligned_enabled __read_mostly = 1;	/* Enabled by default */
+int no_unaligned_warning __read_mostly = 1;	/* Only 1 warning by default */
+
 asmlinkage void noinstr do_ale(struct pt_regs *regs)
 {
+	unsigned int *pc;
 	irqentry_state_t state = irqentry_enter(regs);
 
+	perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, regs->csr_badvaddr);
+
+	/*
+	 * Did we catch a fault trying to load an instruction?
+	 */
+	if (regs->csr_badvaddr == regs->csr_era)
+		goto sigbus;
+	if (user_mode(regs) && !test_thread_flag(TIF_FIXADE))
+		goto sigbus;
+	if (!unaligned_enabled)
+		goto sigbus;
+	if (!no_unaligned_warning)
+		show_registers(regs);
+
+	pc = (unsigned int *)exception_era(regs);
+
+	emulate_load_store_insn(regs, (void __user *)regs->csr_badvaddr, pc);
+
+	goto out;
+
+sigbus:
 	die_if_kernel("Kernel ale access", regs);
 	force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)regs->csr_badvaddr);
 
+out:
 	irqentry_exit(regs, state);
 }
 
diff --git a/arch/loongarch/kernel/unaligned.c b/arch/loongarch/kernel/unaligned.c
new file mode 100644
index 0000000000000..bdff825d29ef4
--- /dev/null
+++ b/arch/loongarch/kernel/unaligned.c
@@ -0,0 +1,499 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Handle unaligned accesses by emulation.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ *
+ * Derived from MIPS:
+ * Copyright (C) 1996, 1998, 1999, 2002 by Ralf Baechle
+ * Copyright (C) 1999 Silicon Graphics, Inc.
+ * Copyright (C) 2014 Imagination Technologies Ltd.
+ */
+#include <linux/mm.h>
+#include <linux/sched.h>
+#include <linux/signal.h>
+#include <linux/debugfs.h>
+#include <linux/perf_event.h>
+
+#include <asm/asm.h>
+#include <asm/branch.h>
+#include <asm/fpu.h>
+#include <asm/inst.h>
+
+#include "access-helper.h"
+
+#ifdef CONFIG_DEBUG_FS
+static u32 unaligned_instructions_user;
+static u32 unaligned_instructions_kernel;
+#endif
+
+static inline unsigned long read_fpr(unsigned int idx)
+{
+#define READ_FPR(idx, __value)		\
+	__asm__ __volatile__("movfr2gr.d %0, $f"#idx"\n\t" : "=r"(__value));
+
+	unsigned long __value;
+
+	switch (idx) {
+	case 0:
+		READ_FPR(0, __value);
+		break;
+	case 1:
+		READ_FPR(1, __value);
+		break;
+	case 2:
+		READ_FPR(2, __value);
+		break;
+	case 3:
+		READ_FPR(3, __value);
+		break;
+	case 4:
+		READ_FPR(4, __value);
+		break;
+	case 5:
+		READ_FPR(5, __value);
+		break;
+	case 6:
+		READ_FPR(6, __value);
+		break;
+	case 7:
+		READ_FPR(7, __value);
+		break;
+	case 8:
+		READ_FPR(8, __value);
+		break;
+	case 9:
+		READ_FPR(9, __value);
+		break;
+	case 10:
+		READ_FPR(10, __value);
+		break;
+	case 11:
+		READ_FPR(11, __value);
+		break;
+	case 12:
+		READ_FPR(12, __value);
+		break;
+	case 13:
+		READ_FPR(13, __value);
+		break;
+	case 14:
+		READ_FPR(14, __value);
+		break;
+	case 15:
+		READ_FPR(15, __value);
+		break;
+	case 16:
+		READ_FPR(16, __value);
+		break;
+	case 17:
+		READ_FPR(17, __value);
+		break;
+	case 18:
+		READ_FPR(18, __value);
+		break;
+	case 19:
+		READ_FPR(19, __value);
+		break;
+	case 20:
+		READ_FPR(20, __value);
+		break;
+	case 21:
+		READ_FPR(21, __value);
+		break;
+	case 22:
+		READ_FPR(22, __value);
+		break;
+	case 23:
+		READ_FPR(23, __value);
+		break;
+	case 24:
+		READ_FPR(24, __value);
+		break;
+	case 25:
+		READ_FPR(25, __value);
+		break;
+	case 26:
+		READ_FPR(26, __value);
+		break;
+	case 27:
+		READ_FPR(27, __value);
+		break;
+	case 28:
+		READ_FPR(28, __value);
+		break;
+	case 29:
+		READ_FPR(29, __value);
+		break;
+	case 30:
+		READ_FPR(30, __value);
+		break;
+	case 31:
+		READ_FPR(31, __value);
+		break;
+	default:
+		panic("unexpected idx '%d'", idx);
+	}
+#undef READ_FPR
+	return __value;
+}
+
+static inline void write_fpr(unsigned int idx, unsigned long value)
+{
+#define WRITE_FPR(idx, value)		\
+	__asm__ __volatile__("movgr2fr.d $f"#idx", %0\n\t" :: "r"(value));
+
+	switch (idx) {
+	case 0:
+		WRITE_FPR(0, value);
+		break;
+	case 1:
+		WRITE_FPR(1, value);
+		break;
+	case 2:
+		WRITE_FPR(2, value);
+		break;
+	case 3:
+		WRITE_FPR(3, value);
+		break;
+	case 4:
+		WRITE_FPR(4, value);
+		break;
+	case 5:
+		WRITE_FPR(5, value);
+		break;
+	case 6:
+		WRITE_FPR(6, value);
+		break;
+	case 7:
+		WRITE_FPR(7, value);
+		break;
+	case 8:
+		WRITE_FPR(8, value);
+		break;
+	case 9:
+		WRITE_FPR(9, value);
+		break;
+	case 10:
+		WRITE_FPR(10, value);
+		break;
+	case 11:
+		WRITE_FPR(11, value);
+		break;
+	case 12:
+		WRITE_FPR(12, value);
+		break;
+	case 13:
+		WRITE_FPR(13, value);
+		break;
+	case 14:
+		WRITE_FPR(14, value);
+		break;
+	case 15:
+		WRITE_FPR(15, value);
+		break;
+	case 16:
+		WRITE_FPR(16, value);
+		break;
+	case 17:
+		WRITE_FPR(17, value);
+		break;
+	case 18:
+		WRITE_FPR(18, value);
+		break;
+	case 19:
+		WRITE_FPR(19, value);
+		break;
+	case 20:
+		WRITE_FPR(20, value);
+		break;
+	case 21:
+		WRITE_FPR(21, value);
+		break;
+	case 22:
+		WRITE_FPR(22, value);
+		break;
+	case 23:
+		WRITE_FPR(23, value);
+		break;
+	case 24:
+		WRITE_FPR(24, value);
+		break;
+	case 25:
+		WRITE_FPR(25, value);
+		break;
+	case 26:
+		WRITE_FPR(26, value);
+		break;
+	case 27:
+		WRITE_FPR(27, value);
+		break;
+	case 28:
+		WRITE_FPR(28, value);
+		break;
+	case 29:
+		WRITE_FPR(29, value);
+		break;
+	case 30:
+		WRITE_FPR(30, value);
+		break;
+	case 31:
+		WRITE_FPR(31, value);
+		break;
+	default:
+		panic("unexpected idx '%d'", idx);
+	}
+#undef WRITE_FPR
+}
+
+void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int *pc)
+{
+	bool fp = false;
+	bool sign, write;
+	bool user = user_mode(regs);
+	unsigned int res, size = 0;
+	unsigned long value = 0;
+	union loongarch_instruction insn;
+
+	perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0);
+
+	__get_inst(&insn.word, pc, user);
+
+	switch (insn.reg2i12_format.opcode) {
+	case ldh_op:
+		size = 2;
+		sign = true;
+		write = false;
+		break;
+	case ldhu_op:
+		size = 2;
+		sign = false;
+		write = false;
+		break;
+	case sth_op:
+		size = 2;
+		sign = true;
+		write = true;
+		break;
+	case ldw_op:
+		size = 4;
+		sign = true;
+		write = false;
+		break;
+	case ldwu_op:
+		size = 4;
+		sign = false;
+		write = false;
+		break;
+	case stw_op:
+		size = 4;
+		sign = true;
+		write = true;
+		break;
+	case ldd_op:
+		size = 8;
+		sign = true;
+		write = false;
+		break;
+	case std_op:
+		size = 8;
+		sign = true;
+		write = true;
+		break;
+	case flds_op:
+		size = 4;
+		fp = true;
+		sign = true;
+		write = false;
+		break;
+	case fsts_op:
+		size = 4;
+		fp = true;
+		sign = true;
+		write = true;
+		break;
+	case fldd_op:
+		size = 8;
+		fp = true;
+		sign = true;
+		write = false;
+		break;
+	case fstd_op:
+		size = 8;
+		fp = true;
+		sign = true;
+		write = true;
+		break;
+	}
+
+	switch (insn.reg2i14_format.opcode) {
+	case ldptrw_op:
+		size = 4;
+		sign = true;
+		write = false;
+		break;
+	case stptrw_op:
+		size = 4;
+		sign = true;
+		write = true;
+		break;
+	case ldptrd_op:
+		size = 8;
+		sign = true;
+		write = false;
+		break;
+	case stptrd_op:
+		size = 8;
+		sign = true;
+		write = true;
+		break;
+	}
+
+	switch (insn.reg3_format.opcode) {
+	case ldxh_op:
+		size = 2;
+		sign = true;
+		write = false;
+		break;
+	case ldxhu_op:
+		size = 2;
+		sign = false;
+		write = false;
+		break;
+	case stxh_op:
+		size = 2;
+		sign = true;
+		write = true;
+		break;
+	case ldxw_op:
+		size = 4;
+		sign = true;
+		write = false;
+		break;
+	case ldxwu_op:
+		size = 4;
+		sign = false;
+		write = false;
+		break;
+	case stxw_op:
+		size = 4;
+		sign = true;
+		write = true;
+		break;
+	case ldxd_op:
+		size = 8;
+		sign = true;
+		write = false;
+		break;
+	case stxd_op:
+		size = 8;
+		sign = true;
+		write = true;
+		break;
+	case fldxs_op:
+		size = 4;
+		fp = true;
+		sign = true;
+		write = false;
+		break;
+	case fstxs_op:
+		size = 4;
+		fp = true;
+		sign = true;
+		write = true;
+		break;
+	case fldxd_op:
+		size = 8;
+		fp = true;
+		sign = true;
+		write = false;
+		break;
+	case fstxd_op:
+		size = 8;
+		fp = true;
+		sign = true;
+		write = true;
+		break;
+	}
+
+	if (!size)
+		goto sigbus;
+	if (user && !access_ok(addr, size))
+		goto sigbus;
+
+	if (!write) {
+		res = unaligned_read(addr, &value, size, sign);
+		if (res)
+			goto fault;
+
+		/* Rd is the same field in any formats */
+		if (!fp)
+			regs->regs[insn.reg3_format.rd] = value;
+		else {
+			if (is_fpu_owner())
+				write_fpr(insn.reg3_format.rd, value);
+			else
+				set_fpr64(&current->thread.fpu.fpr[insn.reg3_format.rd], 0, value);
+		}
+	} else {
+		/* Rd is the same field in any formats */
+		if (!fp)
+			value = regs->regs[insn.reg3_format.rd];
+		else {
+			if (is_fpu_owner())
+				value = read_fpr(insn.reg3_format.rd);
+			else
+				value = get_fpr64(&current->thread.fpu.fpr[insn.reg3_format.rd], 0);
+		}
+
+		res = unaligned_write(addr, value, size);
+		if (res)
+			goto fault;
+	}
+
+#ifdef CONFIG_DEBUG_FS
+	if (user)
+		unaligned_instructions_user++;
+	else
+		unaligned_instructions_kernel++;
+#endif
+
+	compute_return_era(regs);
+
+	return;
+
+fault:
+	/* Did we have an exception handler installed? */
+	if (fixup_exception(regs))
+		return;
+
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGSEGV);
+
+	return;
+
+sigbus:
+	die_if_kernel("Unhandled kernel unaligned access", regs);
+	force_sig(SIGBUS);
+
+	return;
+}
+
+#ifdef CONFIG_DEBUG_FS
+static int __init debugfs_unaligned(void)
+{
+	struct dentry *d;
+
+	d = debugfs_create_dir("loongarch", NULL);
+	if (!d)
+		return -ENOMEM;
+
+	debugfs_create_u32("unaligned_instructions_user",
+				S_IRUGO, d, &unaligned_instructions_user);
+	debugfs_create_u32("unaligned_instructions_kernel",
+				S_IRUGO, d, &unaligned_instructions_kernel);
+
+	return 0;
+}
+arch_initcall(debugfs_unaligned);
+#endif
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index e36635fccb695..8678955303405 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -3,4 +3,4 @@
 # Makefile for LoongArch-specific library files.
 #
 
-lib-y	+= delay.o clear_user.o copy_user.o dump_tlb.o
+lib-y	+= delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S
new file mode 100644
index 0000000000000..9177fd638f072
--- /dev/null
+++ b/arch/loongarch/lib/unaligned.S
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/asm-extable.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.L_fixup_handle_unaligned:
+	li.w	a0, -EFAULT
+	jr	ra
+
+/*
+ * unsigned long unaligned_read(void *addr, void *value, unsigned long n, bool sign)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ * a3: sign
+ */
+SYM_FUNC_START(unaligned_read)
+	beqz	a2, 5f
+
+	li.w	t2, 0
+	addi.d	t0, a2, -1
+	slli.d	t1, t0, 3
+	add.d 	a0, a0, t0
+
+	beqz	a3, 2f
+1:	ld.b	t3, a0, 0
+	b	3f
+
+2:	ld.bu	t3, a0, 0
+3:	sll.d	t3, t3, t1
+	or	t2, t2, t3
+	addi.d	t1, t1, -8
+	addi.d	a0, a0, -1
+	addi.d	a2, a2, -1
+	bgtz	a2, 2b
+4:	st.d	t2, a1, 0
+
+	move	a0, a2
+	jr	ra
+
+5:	li.w    a0, -EFAULT
+	jr	ra
+
+	_asm_extable 1b, .L_fixup_handle_unaligned
+	_asm_extable 2b, .L_fixup_handle_unaligned
+	_asm_extable 4b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_read)
+
+/*
+ * unsigned long unaligned_write(void *addr, unsigned long value, unsigned long n)
+ *
+ * a0: addr
+ * a1: value
+ * a2: n
+ */
+SYM_FUNC_START(unaligned_write)
+	beqz	a2, 3f
+
+	li.w	t0, 0
+1:	srl.d	t1, a1, t0
+2:	st.b	t1, a0, 0
+	addi.d	t0, t0, 8
+	addi.d	a2, a2, -1
+	addi.d	a0, a0, 1
+	bgtz	a2, 1b
+
+	move	a0, a2
+	jr	ra
+
+3:	li.w    a0, -EFAULT
+	jr	ra
+
+	_asm_extable 2b, .L_fixup_handle_unaligned
+SYM_FUNC_END(unaligned_write)
-- 
GitLab


From 19e5eb15b00c5841b4b9bd9777af2865a40d2f39 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3816/4122] LoongArch: Add alternative runtime patching
 mechanism

Introduce the "alternative" mechanism from ARM64 and x86 for LoongArch
to apply runtime patching. The main purpose of this patch is to provide
a framework. In future we can use this mechanism (i.e., the ALTERNATIVE
and ALTERNATIVE_2 macros) to optimize hotspot functions according to cpu
features.

Signed-off-by: Jun Yi <yijun@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/alternative-asm.h |  82 +++++++
 arch/loongarch/include/asm/alternative.h     | 111 +++++++++
 arch/loongarch/include/asm/bugs.h            |  15 ++
 arch/loongarch/include/asm/inst.h            |  18 ++
 arch/loongarch/kernel/Makefile               |   2 +-
 arch/loongarch/kernel/alternative.c          | 246 +++++++++++++++++++
 arch/loongarch/kernel/module.c               |  15 ++
 arch/loongarch/kernel/setup.c                |   7 +
 arch/loongarch/kernel/vmlinux.lds.S          |  12 +
 9 files changed, 507 insertions(+), 1 deletion(-)
 create mode 100644 arch/loongarch/include/asm/alternative-asm.h
 create mode 100644 arch/loongarch/include/asm/alternative.h
 create mode 100644 arch/loongarch/include/asm/bugs.h
 create mode 100644 arch/loongarch/kernel/alternative.c

diff --git a/arch/loongarch/include/asm/alternative-asm.h b/arch/loongarch/include/asm/alternative-asm.h
new file mode 100644
index 0000000000000..ff3d10ac393f2
--- /dev/null
+++ b/arch/loongarch/include/asm/alternative-asm.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALTERNATIVE_ASM_H
+#define _ASM_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+#include <asm/asm.h>
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro altinstruction_entry orig alt feature orig_len alt_len
+	.long \orig - .
+	.long \alt - .
+	.short \feature
+	.byte \orig_len
+	.byte \alt_len
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".fill" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+140 :
+	\oldinstr
+141 :
+	.fill - (((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)) / 4, 4, 0x03400000
+142 :
+
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 140b, 143f, \feature, 142b-140b, 144f-143f
+	.popsection
+
+	.subsection 1
+143 :
+	\newinstr
+144 :
+	.previous
+.endm
+
+#define old_len			(141b-140b)
+#define new_len1		(144f-143f)
+#define new_len2		(145f-144f)
+
+#define alt_max_short(a, b)	((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
+
+/*
+ * Same as ALTERNATIVE macro above but for two alternatives. If CPU
+ * has @feature1, it replaces @oldinstr with @newinstr1. If CPU has
+ * @feature2, it replaces @oldinstr with @feature2.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+140 :
+	\oldinstr
+141 :
+	.fill - ((alt_max_short(new_len1, new_len2) - (old_len)) > 0) * \
+		(alt_max_short(new_len1, new_len2) - (old_len)) / 4, 4, 0x03400000
+142 :
+
+	.pushsection .altinstructions, "a"
+	altinstruction_entry 140b, 143f, \feature1, 142b-140b, 144f-143f, 142b-141b
+	altinstruction_entry 140b, 144f, \feature2, 142b-140b, 145f-144f, 142b-141b
+	.popsection
+
+	.subsection 1
+143 :
+	\newinstr1
+144 :
+	\newinstr2
+145 :
+	.previous
+.endm
+
+#endif  /*  __ASSEMBLY__  */
+
+#endif /* _ASM_ALTERNATIVE_ASM_H */
diff --git a/arch/loongarch/include/asm/alternative.h b/arch/loongarch/include/asm/alternative.h
new file mode 100644
index 0000000000000..cee7b29785ab2
--- /dev/null
+++ b/arch/loongarch/include/asm/alternative.h
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ALTERNATIVE_H
+#define _ASM_ALTERNATIVE_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/stringify.h>
+#include <asm/asm.h>
+
+struct alt_instr {
+	s32 instr_offset;	/* offset to original instruction */
+	s32 replace_offset;	/* offset to replacement instruction */
+	u16 feature;		/* feature bit set for replacement */
+	u8  instrlen;		/* length of original instruction */
+	u8  replacementlen;	/* length of new instruction */
+} __packed;
+
+/*
+ * Debug flag that can be tested to see whether alternative
+ * instructions were patched in already:
+ */
+extern int alternatives_patched;
+extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
+
+extern void alternative_instructions(void);
+extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+
+#define b_replacement(num)	"664"#num
+#define e_replacement(num)	"665"#num
+
+#define alt_end_marker		"663"
+#define alt_slen		"662b-661b"
+#define alt_total_slen		alt_end_marker"b-661b"
+#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
+
+#define __OLDINSTR(oldinstr, num)					\
+	"661:\n\t" oldinstr "\n662:\n"					\
+	".fill -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "		\
+		"((" alt_rlen(num) ")-(" alt_slen ")) / 4, 4, 0x03400000\n"
+
+#define OLDINSTR(oldinstr, num)						\
+	__OLDINSTR(oldinstr, num)					\
+	alt_end_marker ":\n"
+
+#define alt_max_short(a, b)	"((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
+
+/*
+ * Pad the second replacement alternative with additional NOPs if it is
+ * additionally longer than the first replacement alternative.
+ */
+#define OLDINSTR_2(oldinstr, num1, num2) \
+	"661:\n\t" oldinstr "\n662:\n"								\
+	".fill -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * "	\
+		"(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) / 4, "	\
+		"4, 0x03400000\n"	\
+	alt_end_marker ":\n"
+
+#define ALTINSTR_ENTRY(feature, num)					      \
+	" .long 661b - .\n"				/* label           */ \
+	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
+	" .short " __stringify(feature) "\n"		/* feature bit     */ \
+	" .byte " alt_total_slen "\n"			/* source len      */ \
+	" .byte " alt_rlen(num) "\n"			/* replacement len */
+
+#define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */     \
+	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
+
+/* alternative assembly primitive: */
+#define ALTERNATIVE(oldinstr, newinstr, feature)			\
+	OLDINSTR(oldinstr, 1)						\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature, 1)					\
+	".popsection\n"							\
+	".subsection 1\n" \
+	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+	".previous\n"
+
+#define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+	OLDINSTR_2(oldinstr, 1, 2)					\
+	".pushsection .altinstructions,\"a\"\n"				\
+	ALTINSTR_ENTRY(feature1, 1)					\
+	ALTINSTR_ENTRY(feature2, 2)					\
+	".popsection\n"							\
+	".subsection 1\n" \
+	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
+	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+	".previous\n"
+
+/*
+ * Alternative instructions for different CPU types or capabilities.
+ *
+ * This allows to use optimized instructions even on generic binary
+ * kernels.
+ *
+ * length of oldinstr must be longer or equal the length of newinstr
+ * It can be padded with nops as needed.
+ *
+ * For non barrier like inlines please define new variants
+ * without volatile and memory clobber.
+ */
+#define alternative(oldinstr, newinstr, feature)			\
+	(asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory"))
+
+#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
+	(asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory"))
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_ALTERNATIVE_H */
diff --git a/arch/loongarch/include/asm/bugs.h b/arch/loongarch/include/asm/bugs.h
new file mode 100644
index 0000000000000..98396535163b3
--- /dev/null
+++ b/arch/loongarch/include/asm/bugs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This is included by init/main.c to check for architecture-dependent bugs.
+ *
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#ifndef _ASM_BUGS_H
+#define _ASM_BUGS_H
+
+#include <asm/cpu.h>
+#include <asm/cpu-info.h>
+
+extern void check_bugs(void);
+
+#endif /* _ASM_BUGS_H */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 889d6c9fc2b6a..67215af47b3d5 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -8,6 +8,7 @@
 #include <linux/types.h>
 #include <asm/asm.h>
 
+#define INSN_NOP		0x03400000
 #define INSN_BREAK		0x002a0000
 
 #define ADDR_IMMMASK_LU52ID	0xFFF0000000000000
@@ -28,6 +29,7 @@ enum reg0i26_op {
 enum reg1i20_op {
 	lu12iw_op	= 0x0a,
 	lu32id_op	= 0x0b,
+	pcaddi_op	= 0x0c,
 	pcaddu12i_op	= 0x0e,
 	pcaddu18i_op	= 0x0f,
 };
@@ -35,6 +37,8 @@ enum reg1i20_op {
 enum reg1i21_op {
 	beqz_op		= 0x10,
 	bnez_op		= 0x11,
+	bceqz_op	= 0x12, /* bits[9:8] = 0x00 */
+	bcnez_op	= 0x12, /* bits[9:8] = 0x01 */
 };
 
 enum reg2_op {
@@ -315,6 +319,12 @@ static inline bool is_imm_negative(unsigned long val, unsigned int bit)
 	return val & (1UL << (bit - 1));
 }
 
+static inline bool is_pc_ins(union loongarch_instruction *ip)
+{
+	return ip->reg1i20_format.opcode >= pcaddi_op &&
+			ip->reg1i20_format.opcode <= pcaddu18i_op;
+}
+
 static inline bool is_branch_ins(union loongarch_instruction *ip)
 {
 	return ip->reg1i21_format.opcode >= beqz_op &&
@@ -353,6 +363,14 @@ static inline bool unsigned_imm_check(unsigned long val, unsigned int bit)
 	return val < (1UL << bit);
 }
 
+static inline unsigned long sign_extend(unsigned long val, unsigned int idx)
+{
+	if (!is_imm_negative(val, idx + 1))
+		return ((1UL << idx) - 1) & val;
+	else
+		return ~((1UL << idx) - 1) | val;
+}
+
 #define DEF_EMIT_REG0I26_FORMAT(NAME, OP)				\
 static inline void emit_##NAME(union loongarch_instruction *insn,	\
 			       int offset)				\
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 2ad2555b53eaf..86744531b100a 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -8,7 +8,7 @@ extra-y		:= vmlinux.lds
 obj-y		+= head.o cpu-probe.o cacheinfo.o env.o setup.o entry.o genex.o \
 		   traps.o irq.o idle.o process.o dma.o mem.o io.o reset.o switch.o \
 		   elf.o syscall.o signal.o time.o topology.o inst.o ptrace.o vdso.o \
-		   unaligned.o
+		   alternative.o unaligned.o
 
 obj-$(CONFIG_ACPI)		+= acpi.o
 obj-$(CONFIG_EFI) 		+= efi.o
diff --git a/arch/loongarch/kernel/alternative.c b/arch/loongarch/kernel/alternative.c
new file mode 100644
index 0000000000000..c5aebeac960b6
--- /dev/null
+++ b/arch/loongarch/kernel/alternative.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <asm/alternative.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+#include <asm/sections.h>
+
+int __read_mostly alternatives_patched;
+
+EXPORT_SYMBOL_GPL(alternatives_patched);
+
+#define MAX_PATCH_SIZE (((u8)(-1)) / LOONGARCH_INSN_SIZE)
+
+static int __initdata_or_module debug_alternative;
+
+static int __init debug_alt(char *str)
+{
+	debug_alternative = 1;
+	return 1;
+}
+__setup("debug-alternative", debug_alt);
+
+#define DPRINTK(fmt, args...)						\
+do {									\
+	if (debug_alternative)						\
+		printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);	\
+} while (0)
+
+#define DUMP_WORDS(buf, count, fmt, args...)				\
+do {									\
+	if (unlikely(debug_alternative)) {				\
+		int _j;							\
+		union loongarch_instruction *_buf = buf;		\
+									\
+		if (!(count))						\
+			break;						\
+									\
+		printk(KERN_DEBUG fmt, ##args);				\
+		for (_j = 0; _j < count - 1; _j++)			\
+			printk(KERN_CONT "<%08x> ", _buf[_j].word);	\
+		printk(KERN_CONT "<%08x>\n", _buf[_j].word);		\
+	}								\
+} while (0)
+
+/* Use this to add nops to a buffer, then text_poke the whole buffer. */
+static void __init_or_module add_nops(union loongarch_instruction *insn, int count)
+{
+	while (count--) {
+		insn->word = INSN_NOP;
+		insn++;
+	}
+}
+
+/* Is the jump addr in local .altinstructions */
+static inline bool in_alt_jump(unsigned long jump, void *start, void *end)
+{
+	return jump >= (unsigned long)start && jump < (unsigned long)end;
+}
+
+static void __init_or_module recompute_jump(union loongarch_instruction *buf,
+		union loongarch_instruction *dest, union loongarch_instruction *src,
+		void *start, void *end)
+{
+	unsigned int si, si_l, si_h;
+	unsigned long cur_pc, jump_addr, pc;
+	long offset;
+
+	cur_pc = (unsigned long)src;
+	pc = (unsigned long)dest;
+
+	si_l = src->reg0i26_format.immediate_l;
+	si_h = src->reg0i26_format.immediate_h;
+	switch (src->reg0i26_format.opcode) {
+	case b_op:
+	case bl_op:
+		jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 27);
+		if (in_alt_jump(jump_addr, start, end))
+			return;
+		offset = jump_addr - pc;
+		BUG_ON(offset < -SZ_128M || offset >= SZ_128M);
+		offset >>= 2;
+		buf->reg0i26_format.immediate_h = offset >> 16;
+		buf->reg0i26_format.immediate_l = offset;
+		return;
+	}
+
+	si_l = src->reg1i21_format.immediate_l;
+	si_h = src->reg1i21_format.immediate_h;
+	switch (src->reg1i21_format.opcode) {
+	case bceqz_op: /* bceqz_op = bcnez_op */
+		BUG_ON(buf->reg1i21_format.rj & BIT(4));
+		fallthrough;
+	case beqz_op:
+	case bnez_op:
+		jump_addr = cur_pc + sign_extend((si_h << 16 | si_l) << 2, 22);
+		if (in_alt_jump(jump_addr, start, end))
+			return;
+		offset = jump_addr - pc;
+		BUG_ON(offset < -SZ_4M || offset >= SZ_4M);
+		offset >>= 2;
+		buf->reg1i21_format.immediate_h = offset >> 16;
+		buf->reg1i21_format.immediate_l = offset;
+		return;
+	}
+
+	si = src->reg2i16_format.immediate;
+	switch (src->reg2i16_format.opcode) {
+	case beq_op:
+	case bne_op:
+	case blt_op:
+	case bge_op:
+	case bltu_op:
+	case bgeu_op:
+		jump_addr = cur_pc + sign_extend(si << 2, 17);
+		if (in_alt_jump(jump_addr, start, end))
+			return;
+		offset = jump_addr - pc;
+		BUG_ON(offset < -SZ_128K || offset >= SZ_128K);
+		offset >>= 2;
+		buf->reg2i16_format.immediate = offset;
+		return;
+	}
+}
+
+static int __init_or_module copy_alt_insns(union loongarch_instruction *buf,
+	union loongarch_instruction *dest, union loongarch_instruction *src, int nr)
+{
+	int i;
+
+	for (i = 0; i < nr; i++) {
+		buf[i].word = src[i].word;
+
+		if (is_pc_ins(&src[i])) {
+			pr_err("Not support pcrel instruction at present!");
+			return -EINVAL;
+		}
+
+		if (is_branch_ins(&src[i]) &&
+		    src[i].reg2i16_format.opcode != jirl_op) {
+			recompute_jump(&buf[i], &dest[i], &src[i], src, src + nr);
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * text_poke_early - Update instructions on a live kernel at boot time
+ *
+ * When you use this code to patch more than one byte of an instruction
+ * you need to make sure that other CPUs cannot execute this code in parallel.
+ * Also no thread must be currently preempted in the middle of these
+ * instructions. And on the local CPU you need to be protected again NMI or MCE
+ * handlers seeing an inconsistent instruction while you patch.
+ */
+static void *__init_or_module text_poke_early(union loongarch_instruction *insn,
+			      union loongarch_instruction *buf, unsigned int nr)
+{
+	int i;
+	unsigned long flags;
+
+	local_irq_save(flags);
+
+	for (i = 0; i < nr; i++)
+		insn[i].word = buf[i].word;
+
+	local_irq_restore(flags);
+
+	wbflush();
+	flush_icache_range((unsigned long)insn, (unsigned long)(insn + nr));
+
+	return insn;
+}
+
+/*
+ * Replace instructions with better alternatives for this CPU type. This runs
+ * before SMP is initialized to avoid SMP problems with self modifying code.
+ * This implies that asymmetric systems where APs have less capabilities than
+ * the boot processor are not handled. Tough. Make sure you disable such
+ * features by hand.
+ */
+void __init_or_module apply_alternatives(struct alt_instr *start, struct alt_instr *end)
+{
+	struct alt_instr *a;
+	unsigned int nr_instr, nr_repl, nr_insnbuf;
+	union loongarch_instruction *instr, *replacement;
+	union loongarch_instruction insnbuf[MAX_PATCH_SIZE];
+
+	DPRINTK("alt table %px, -> %px", start, end);
+	/*
+	 * The scan order should be from start to end. A later scanned
+	 * alternative code can overwrite previously scanned alternative code.
+	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+	 * patch code.
+	 *
+	 * So be careful if you want to change the scan order to any other
+	 * order.
+	 */
+	for (a = start; a < end; a++) {
+		nr_insnbuf = 0;
+
+		instr = (void *)&a->instr_offset + a->instr_offset;
+		replacement = (void *)&a->replace_offset + a->replace_offset;
+
+		BUG_ON(a->instrlen > sizeof(insnbuf));
+		BUG_ON(a->instrlen & 0x3);
+		BUG_ON(a->replacementlen & 0x3);
+
+		nr_instr = a->instrlen / LOONGARCH_INSN_SIZE;
+		nr_repl = a->replacementlen / LOONGARCH_INSN_SIZE;
+
+		if (!cpu_has(a->feature)) {
+			DPRINTK("feat not exist: %d, old: (%px len: %d), repl: (%px, len: %d)",
+				a->feature, instr, a->instrlen,
+				replacement, a->replacementlen);
+
+			continue;
+		}
+
+		DPRINTK("feat: %d, old: (%px len: %d), repl: (%px, len: %d)",
+			a->feature, instr, a->instrlen,
+			replacement, a->replacementlen);
+
+		DUMP_WORDS(instr, nr_instr, "%px: old_insn: ", instr);
+		DUMP_WORDS(replacement, nr_repl, "%px: rpl_insn: ", replacement);
+
+		copy_alt_insns(insnbuf, instr, replacement, nr_repl);
+		nr_insnbuf = nr_repl;
+
+		if (nr_instr > nr_repl) {
+			add_nops(insnbuf + nr_repl, nr_instr - nr_repl);
+			nr_insnbuf += nr_instr - nr_repl;
+		}
+		DUMP_WORDS(insnbuf, nr_insnbuf, "%px: final_insn: ", instr);
+
+		text_poke_early(instr, insnbuf, nr_insnbuf);
+	}
+}
+
+void __init alternative_instructions(void)
+{
+	apply_alternatives(__alt_instructions, __alt_instructions_end);
+
+	alternatives_patched = 1;
+}
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 097595b2fc14b..825fcf77f9e73 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -17,6 +17,7 @@
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
+#include <asm/alternative.h>
 
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
@@ -456,3 +457,17 @@ void *module_alloc(unsigned long size)
 	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END,
 			GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0));
 }
+
+int module_finalize(const Elf_Ehdr *hdr,
+		    const Elf_Shdr *sechdrs, struct module *mod)
+{
+	const Elf_Shdr *s, *se;
+	const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
+
+	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
+		if (!strcmp(".altinstructions", secstrs + s->sh_name))
+			apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
+	}
+
+	return 0;
+}
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index ae436def7ee98..53831bcb11ca3 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -31,7 +31,9 @@
 #include <linux/swiotlb.h>
 
 #include <asm/addrspace.h>
+#include <asm/alternative.h>
 #include <asm/bootinfo.h>
+#include <asm/bugs.h>
 #include <asm/cache.h>
 #include <asm/cpu.h>
 #include <asm/dma.h>
@@ -80,6 +82,11 @@ const char *get_system_type(void)
 	return "generic-loongson-machine";
 }
 
+void __init check_bugs(void)
+{
+	alternative_instructions();
+}
+
 static const char *dmi_string_parse(const struct dmi_header *dm, u8 s)
 {
 	const u8 *bp = ((u8 *) dm) + dm->length;
diff --git a/arch/loongarch/kernel/vmlinux.lds.S b/arch/loongarch/kernel/vmlinux.lds.S
index efecda0c23616..733b16e8d55dd 100644
--- a/arch/loongarch/kernel/vmlinux.lds.S
+++ b/arch/loongarch/kernel/vmlinux.lds.S
@@ -54,6 +54,18 @@ SECTIONS
 	. = ALIGN(PECOFF_SEGMENT_ALIGN);
 	_etext = .;
 
+	/*
+	 * struct alt_inst entries. From the header (alternative.h):
+	 * "Alternative instructions for different CPU types or capabilities"
+	 * Think locking instructions on spinlocks.
+	 */
+	. = ALIGN(4);
+	.altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {
+		__alt_instructions = .;
+		*(.altinstructions)
+		__alt_instructions_end = .;
+	}
+
 	.got : ALIGN(16) { *(.got) }
 	.plt : ALIGN(16) { *(.plt) }
 	.got.plt : ALIGN(16) { *(.got.plt) }
-- 
GitLab


From a275a82dcd4024c75337db15d59ed039c31e21da Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:39:59 +0800
Subject: [PATCH 3817/4122] LoongArch: Use alternative to optimize libraries

Use the alternative to optimize common libraries according whether CPU
has UAL (hardware unaligned access support) feature, including memset(),
memcopy(), memmove(), copy_user() and clear_user().

We have tested UnixBench on a Loongson-3A5000 quad-core machine (1.6GHz):

1, One copy, before patch:

System Benchmarks Index Values               BASELINE       RESULT    INDEX
Dhrystone 2 using register variables         116700.0    9566582.0    819.8
Double-Precision Whetstone                       55.0       2805.3    510.1
Execl Throughput                                 43.0       2120.0    493.0
File Copy 1024 bufsize 2000 maxblocks          3960.0     209833.0    529.9
File Copy 256 bufsize 500 maxblocks            1655.0      89400.0    540.2
File Copy 4096 bufsize 8000 maxblocks          5800.0     320036.0    551.8
Pipe Throughput                               12440.0     340624.0    273.8
Pipe-based Context Switching                   4000.0     109939.1    274.8
Process Creation                                126.0       4728.7    375.3
Shell Scripts (1 concurrent)                     42.4       2223.1    524.3
Shell Scripts (8 concurrent)                      6.0        883.1   1471.9
System Call Overhead                          15000.0     518639.1    345.8
                                                                   ========
System Benchmarks Index Score                                         500.2

2, One copy, after patch:

System Benchmarks Index Values               BASELINE       RESULT    INDEX
Dhrystone 2 using register variables         116700.0    9567674.7    819.9
Double-Precision Whetstone                       55.0       2805.5    510.1
Execl Throughput                                 43.0       2392.7    556.4
File Copy 1024 bufsize 2000 maxblocks          3960.0     417804.0   1055.1
File Copy 256 bufsize 500 maxblocks            1655.0     112909.5    682.2
File Copy 4096 bufsize 8000 maxblocks          5800.0    1255207.4   2164.2
Pipe Throughput                               12440.0     555712.0    446.7
Pipe-based Context Switching                   4000.0      99964.5    249.9
Process Creation                                126.0       5192.5    412.1
Shell Scripts (1 concurrent)                     42.4       2302.4    543.0
Shell Scripts (8 concurrent)                      6.0        919.6   1532.6
System Call Overhead                          15000.0     511159.3    340.8
                                                                   ========
System Benchmarks Index Score                                         640.1

3, Four copies, before patch:

System Benchmarks Index Values               BASELINE       RESULT    INDEX
Dhrystone 2 using register variables         116700.0   38268610.5   3279.2
Double-Precision Whetstone                       55.0      11222.2   2040.4
Execl Throughput                                 43.0       7892.0   1835.3
File Copy 1024 bufsize 2000 maxblocks          3960.0     235149.6    593.8
File Copy 256 bufsize 500 maxblocks            1655.0      74959.6    452.9
File Copy 4096 bufsize 8000 maxblocks          5800.0     545048.5    939.7
Pipe Throughput                               12440.0    1337359.0   1075.0
Pipe-based Context Switching                   4000.0     473663.9   1184.2
Process Creation                                126.0      17491.2   1388.2
Shell Scripts (1 concurrent)                     42.4       6865.7   1619.3
Shell Scripts (8 concurrent)                      6.0       1015.9   1693.1
System Call Overhead                          15000.0    1899535.2   1266.4
                                                                   ========
System Benchmarks Index Score                                        1278.3

4, Four copies, after patch:

System Benchmarks Index Values               BASELINE       RESULT    INDEX
Dhrystone 2 using register variables         116700.0   38272815.5   3279.6
Double-Precision Whetstone                       55.0      11222.8   2040.5
Execl Throughput                                 43.0       8839.2   2055.6
File Copy 1024 bufsize 2000 maxblocks          3960.0     313912.9    792.7
File Copy 256 bufsize 500 maxblocks            1655.0      80976.1    489.3
File Copy 4096 bufsize 8000 maxblocks          5800.0    1176594.3   2028.6
Pipe Throughput                               12440.0    2100941.9   1688.9
Pipe-based Context Switching                   4000.0     476696.4   1191.7
Process Creation                                126.0      18394.7   1459.9
Shell Scripts (1 concurrent)                     42.4       7172.2   1691.6
Shell Scripts (8 concurrent)                      6.0       1058.3   1763.9
System Call Overhead                          15000.0    1874714.7   1249.8
                                                                   ========
System Benchmarks Index Score                                        1488.8

Signed-off-by: Jun Yi <yijun@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/string.h |   5 ++
 arch/loongarch/lib/Makefile         |   3 +-
 arch/loongarch/lib/clear_user.S     |  70 ++++++++++++++--
 arch/loongarch/lib/copy_user.S      |  91 +++++++++++++++++++--
 arch/loongarch/lib/memcpy.S         |  95 ++++++++++++++++++++++
 arch/loongarch/lib/memmove.S        | 121 ++++++++++++++++++++++++++++
 arch/loongarch/lib/memset.S         |  91 +++++++++++++++++++++
 7 files changed, 465 insertions(+), 11 deletions(-)
 create mode 100644 arch/loongarch/lib/memcpy.S
 create mode 100644 arch/loongarch/lib/memmove.S
 create mode 100644 arch/loongarch/lib/memset.S

diff --git a/arch/loongarch/include/asm/string.h b/arch/loongarch/include/asm/string.h
index b07e60ded9577..7b29cc9c70aa6 100644
--- a/arch/loongarch/include/asm/string.h
+++ b/arch/loongarch/include/asm/string.h
@@ -5,8 +5,13 @@
 #ifndef _ASM_STRING_H
 #define _ASM_STRING_H
 
+#define __HAVE_ARCH_MEMSET
 extern void *memset(void *__s, int __c, size_t __count);
+
+#define __HAVE_ARCH_MEMCPY
 extern void *memcpy(void *__to, __const__ void *__from, size_t __n);
+
+#define __HAVE_ARCH_MEMMOVE
 extern void *memmove(void *__dest, __const__ void *__src, size_t __n);
 
 #endif /* _ASM_STRING_H */
diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile
index 8678955303405..40bde632900fc 100644
--- a/arch/loongarch/lib/Makefile
+++ b/arch/loongarch/lib/Makefile
@@ -3,4 +3,5 @@
 # Makefile for LoongArch-specific library files.
 #
 
-lib-y	+= delay.o clear_user.o copy_user.o dump_tlb.o unaligned.o
+lib-y	+= delay.o memset.o memcpy.o memmove.o \
+	   clear_user.o copy_user.o dump_tlb.o unaligned.o
diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S
index d5c9e44ac8c4f..2dc48e61a2c8c 100644
--- a/arch/loongarch/lib/clear_user.S
+++ b/arch/loongarch/lib/clear_user.S
@@ -3,25 +3,37 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
+#include <asm/cpu.h>
 #include <asm/export.h>
 #include <asm/regdef.h>
 
-.irp to, 0
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
 .L_fixup_handle_\to\():
 	addi.d	a0, a1, (\to) * (-8)
 	jr	ra
 .endr
 
+SYM_FUNC_START(__clear_user)
+	/*
+	 * Some CPUs support hardware unaligned access
+	 */
+	ALTERNATIVE	"b __clear_user_generic",	\
+			"b __clear_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__clear_user)
+
+EXPORT_SYMBOL(__clear_user)
+
 /*
- * unsigned long __clear_user(void *addr, size_t size)
+ * unsigned long __clear_user_generic(void *addr, size_t size)
  *
  * a0: addr
  * a1: size
  */
-SYM_FUNC_START(__clear_user)
+SYM_FUNC_START(__clear_user_generic)
 	beqz	a1, 2f
 
 1:	st.b	zero, a0, 0
@@ -33,6 +45,54 @@ SYM_FUNC_START(__clear_user)
 	jr	ra
 
 	_asm_extable 1b, .L_fixup_handle_0
-SYM_FUNC_END(__clear_user)
+SYM_FUNC_END(__clear_user_generic)
 
-EXPORT_SYMBOL(__clear_user)
+/*
+ * unsigned long __clear_user_fast(void *addr, unsigned long size)
+ *
+ * a0: addr
+ * a1: size
+ */
+SYM_FUNC_START(__clear_user_fast)
+	beqz	a1, 10f
+
+	ori	a2, zero, 64
+	blt	a1, a2, 9f
+
+	/* set 64 bytes at a time */
+1:	st.d	zero, a0, 0
+2:	st.d	zero, a0, 8
+3:	st.d	zero, a0, 16
+4:	st.d	zero, a0, 24
+5:	st.d	zero, a0, 32
+6:	st.d	zero, a0, 40
+7:	st.d	zero, a0, 48
+8:	st.d	zero, a0, 56
+
+	addi.d	a0, a0, 64
+	addi.d	a1, a1, -64
+	bge	a1, a2, 1b
+
+	beqz	a1, 10f
+
+	/* set the remaining bytes */
+9:	st.b	zero, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, -1
+	bgt	a1, zero, 9b
+
+	/* return */
+10:	move	a0, a1
+	jr	ra
+
+	/* fixup and ex_table */
+	_asm_extable 1b, .L_fixup_handle_0
+	_asm_extable 2b, .L_fixup_handle_1
+	_asm_extable 3b, .L_fixup_handle_2
+	_asm_extable 4b, .L_fixup_handle_3
+	_asm_extable 5b, .L_fixup_handle_4
+	_asm_extable 6b, .L_fixup_handle_5
+	_asm_extable 7b, .L_fixup_handle_6
+	_asm_extable 8b, .L_fixup_handle_7
+	_asm_extable 9b, .L_fixup_handle_0
+SYM_FUNC_END(__clear_user_fast)
diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S
index 61933d964da03..55ac6020a1ad1 100644
--- a/arch/loongarch/lib/copy_user.S
+++ b/arch/loongarch/lib/copy_user.S
@@ -3,26 +3,38 @@
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
 
+#include <asm/alternative-asm.h>
 #include <asm/asm.h>
 #include <asm/asmmacro.h>
 #include <asm/asm-extable.h>
+#include <asm/cpu.h>
 #include <asm/export.h>
 #include <asm/regdef.h>
 
-.irp to, 0
+.irp to, 0, 1, 2, 3, 4, 5, 6, 7
 .L_fixup_handle_\to\():
 	addi.d	a0, a2, (\to) * (-8)
 	jr	ra
 .endr
 
+SYM_FUNC_START(__copy_user)
+	/*
+	 * Some CPUs support hardware unaligned access
+	 */
+	ALTERNATIVE	"b __copy_user_generic",	\
+			"b __copy_user_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(__copy_user)
+
+EXPORT_SYMBOL(__copy_user)
+
 /*
- * unsigned long __copy_user(void *to, const void *from, size_t n)
+ * unsigned long __copy_user_generic(void *to, const void *from, size_t n)
  *
  * a0: to
  * a1: from
  * a2: n
  */
-SYM_FUNC_START(__copy_user)
+SYM_FUNC_START(__copy_user_generic)
 	beqz	a2, 3f
 
 1:	ld.b	t0, a1, 0
@@ -37,6 +49,75 @@ SYM_FUNC_START(__copy_user)
 
 	_asm_extable 1b, .L_fixup_handle_0
 	_asm_extable 2b, .L_fixup_handle_0
-SYM_FUNC_END(__copy_user)
+SYM_FUNC_END(__copy_user_generic)
 
-EXPORT_SYMBOL(__copy_user)
+/*
+ * unsigned long __copy_user_fast(void *to, const void *from, unsigned long n)
+ *
+ * a0: to
+ * a1: from
+ * a2: n
+ */
+SYM_FUNC_START(__copy_user_fast)
+	beqz	a2, 19f
+
+	ori	a3, zero, 64
+	blt	a2, a3, 17f
+
+	/* copy 64 bytes at a time */
+1:	ld.d	t0, a1, 0
+2:	ld.d	t1, a1, 8
+3:	ld.d	t2, a1, 16
+4:	ld.d	t3, a1, 24
+5:	ld.d	t4, a1, 32
+6:	ld.d	t5, a1, 40
+7:	ld.d	t6, a1, 48
+8:	ld.d	t7, a1, 56
+9:	st.d	t0, a0, 0
+10:	st.d	t1, a0, 8
+11:	st.d	t2, a0, 16
+12:	st.d	t3, a0, 24
+13:	st.d	t4, a0, 32
+14:	st.d	t5, a0, 40
+15:	st.d	t6, a0, 48
+16:	st.d	t7, a0, 56
+
+	addi.d	a0, a0, 64
+	addi.d	a1, a1, 64
+	addi.d	a2, a2, -64
+	bge	a2, a3, 1b
+
+	beqz	a2, 19f
+
+	/* copy the remaining bytes */
+17:	ld.b	t0, a1, 0
+18:	st.b	t0, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 17b
+
+	/* return */
+19:	move	a0, a2
+	jr	ra
+
+	/* fixup and ex_table */
+	_asm_extable 1b, .L_fixup_handle_0
+	_asm_extable 2b, .L_fixup_handle_1
+	_asm_extable 3b, .L_fixup_handle_2
+	_asm_extable 4b, .L_fixup_handle_3
+	_asm_extable 5b, .L_fixup_handle_4
+	_asm_extable 6b, .L_fixup_handle_5
+	_asm_extable 7b, .L_fixup_handle_6
+	_asm_extable 8b, .L_fixup_handle_7
+	_asm_extable 9b, .L_fixup_handle_0
+	_asm_extable 10b, .L_fixup_handle_1
+	_asm_extable 11b, .L_fixup_handle_2
+	_asm_extable 12b, .L_fixup_handle_3
+	_asm_extable 13b, .L_fixup_handle_4
+	_asm_extable 14b, .L_fixup_handle_5
+	_asm_extable 15b, .L_fixup_handle_6
+	_asm_extable 16b, .L_fixup_handle_7
+	_asm_extable 17b, .L_fixup_handle_0
+	_asm_extable 18b, .L_fixup_handle_0
+SYM_FUNC_END(__copy_user_fast)
diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S
new file mode 100644
index 0000000000000..7c07d595ee89a
--- /dev/null
+++ b/arch/loongarch/lib/memcpy.S
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memcpy)
+	/*
+	 * Some CPUs support hardware unaligned access
+	 */
+	ALTERNATIVE	"b __memcpy_generic", \
+			"b __memcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memcpy)
+
+EXPORT_SYMBOL(memcpy)
+
+/*
+ * void *__memcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_generic)
+	move	a3, a0
+	beqz	a2, 2f
+
+1:	ld.b	t0, a1, 0
+	st.b	t0, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 1b
+
+2:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__memcpy_generic)
+
+/*
+ * void *__memcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__memcpy_fast)
+	move	a3, a0
+	beqz	a2, 3f
+
+	ori	a4, zero, 64
+	blt	a2, a4, 2f
+
+	/* copy 64 bytes at a time */
+1:	ld.d	t0, a1, 0
+	ld.d	t1, a1, 8
+	ld.d	t2, a1, 16
+	ld.d	t3, a1, 24
+	ld.d	t4, a1, 32
+	ld.d	t5, a1, 40
+	ld.d	t6, a1, 48
+	ld.d	t7, a1, 56
+	st.d	t0, a0, 0
+	st.d	t1, a0, 8
+	st.d	t2, a0, 16
+	st.d	t3, a0, 24
+	st.d	t4, a0, 32
+	st.d	t5, a0, 40
+	st.d	t6, a0, 48
+	st.d	t7, a0, 56
+
+	addi.d	a0, a0, 64
+	addi.d	a1, a1, 64
+	addi.d	a2, a2, -64
+	bge	a2, a4, 1b
+
+	beqz	a2, 3f
+
+	/* copy the remaining bytes */
+2:	ld.b	t0, a1, 0
+	st.b	t0, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a1, a1, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 2b
+
+	/* return */
+3:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__memcpy_fast)
diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S
new file mode 100644
index 0000000000000..6ffdb46da78fd
--- /dev/null
+++ b/arch/loongarch/lib/memmove.S
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+SYM_FUNC_START(memmove)
+	blt	a0, a1, 1f	/* dst < src, memcpy */
+	blt	a1, a0, 3f	/* src < dst, rmemcpy */
+	jr	ra		/* dst == src, return */
+
+	/* if (src - dst) < 64, copy 1 byte at a time */
+1:	ori	a3, zero, 64
+	sub.d	t0, a1, a0
+	blt	t0, a3, 2f
+	b	memcpy
+2:	b	__memcpy_generic
+
+	/* if (dst - src) < 64, copy 1 byte at a time */
+3:	ori	a3, zero, 64
+	sub.d	t0, a0, a1
+	blt	t0, a3, 4f
+	b	rmemcpy
+4:	b	__rmemcpy_generic
+SYM_FUNC_END(memmove)
+
+EXPORT_SYMBOL(memmove)
+
+SYM_FUNC_START(rmemcpy)
+	/*
+	 * Some CPUs support hardware unaligned access
+	 */
+	ALTERNATIVE	"b __rmemcpy_generic", \
+			"b __rmemcpy_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(rmemcpy)
+
+/*
+ * void *__rmemcpy_generic(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_generic)
+	move	a3, a0
+	beqz	a2, 2f
+
+	add.d	a0, a0, a2
+	add.d	a1, a1, a2
+
+1:	ld.b	t0, a1, -1
+	st.b	t0, a0, -1
+	addi.d	a0, a0, -1
+	addi.d	a1, a1, -1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 1b
+
+2:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__rmemcpy_generic)
+
+/*
+ * void *__rmemcpy_fast(void *dst, const void *src, size_t n)
+ *
+ * a0: dst
+ * a1: src
+ * a2: n
+ */
+SYM_FUNC_START(__rmemcpy_fast)
+	move	a3, a0
+	beqz	a2, 3f
+
+	add.d	a0, a0, a2
+	add.d	a1, a1, a2
+
+	ori	a4, zero, 64
+	blt	a2, a4, 2f
+
+	/* copy 64 bytes at a time */
+1:	ld.d	t0, a1, -8
+	ld.d	t1, a1, -16
+	ld.d	t2, a1, -24
+	ld.d	t3, a1, -32
+	ld.d	t4, a1, -40
+	ld.d	t5, a1, -48
+	ld.d	t6, a1, -56
+	ld.d	t7, a1, -64
+	st.d	t0, a0, -8
+	st.d	t1, a0, -16
+	st.d	t2, a0, -24
+	st.d	t3, a0, -32
+	st.d	t4, a0, -40
+	st.d	t5, a0, -48
+	st.d	t6, a0, -56
+	st.d	t7, a0, -64
+
+	addi.d	a0, a0, -64
+	addi.d	a1, a1, -64
+	addi.d	a2, a2, -64
+	bge	a2, a4, 1b
+
+	beqz	a2, 3f
+
+	/* copy the remaining bytes */
+2:	ld.b	t0, a1, -1
+	st.b	t0, a0, -1
+	addi.d	a0, a0, -1
+	addi.d	a1, a1, -1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 2b
+
+	/* return */
+3:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__rmemcpy_fast)
diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S
new file mode 100644
index 0000000000000..e7cb4ea3747d7
--- /dev/null
+++ b/arch/loongarch/lib/memset.S
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/alternative-asm.h>
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/cpu.h>
+#include <asm/export.h>
+#include <asm/regdef.h>
+
+.macro fill_to_64 r0
+	bstrins.d \r0, \r0, 15, 8
+	bstrins.d \r0, \r0, 31, 16
+	bstrins.d \r0, \r0, 63, 32
+.endm
+
+SYM_FUNC_START(memset)
+	/*
+	 * Some CPUs support hardware unaligned access
+	 */
+	ALTERNATIVE	"b __memset_generic", \
+			"b __memset_fast", CPU_FEATURE_UAL
+SYM_FUNC_END(memset)
+
+EXPORT_SYMBOL(memset)
+
+/*
+ * void *__memset_generic(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_generic)
+	move	a3, a0
+	beqz	a2, 2f
+
+1:	st.b	a1, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 1b
+
+2:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__memset_generic)
+
+/*
+ * void *__memset_fast(void *s, int c, size_t n)
+ *
+ * a0: s
+ * a1: c
+ * a2: n
+ */
+SYM_FUNC_START(__memset_fast)
+	move	a3, a0
+	beqz	a2, 3f
+
+	ori	a4, zero, 64
+	blt	a2, a4, 2f
+
+	/* fill a1 to 64 bits */
+	fill_to_64 a1
+
+	/* set 64 bytes at a time */
+1:	st.d	a1, a0, 0
+	st.d	a1, a0, 8
+	st.d	a1, a0, 16
+	st.d	a1, a0, 24
+	st.d	a1, a0, 32
+	st.d	a1, a0, 40
+	st.d	a1, a0, 48
+	st.d	a1, a0, 56
+
+	addi.d	a0, a0, 64
+	addi.d	a2, a2, -64
+	bge	a2, a4, 1b
+
+	beqz	a2, 3f
+
+	/* set the remaining bytes */
+2:	st.b	a1, a0, 0
+	addi.d	a0, a0, 1
+	addi.d	a2, a2, -1
+	bgt	a2, zero, 2b
+
+	/* return */
+3:	move	a0, a3
+	jr	ra
+SYM_FUNC_END(__memset_fast)
-- 
GitLab


From 88d4d957edc707e037449ef71a58c6530a39d01e Mon Sep 17 00:00:00 2001
From: Binbin Zhou <zhoubinbin@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:05 +0800
Subject: [PATCH 3818/4122] LoongArch: Add FDT booting support from efi system
 table

Since commit 40cd01a9c324("efi/loongarch: libstub: remove dependency on
flattened DT"), we can parse the FDT from efi system table.

And now, LoongArch is coming to support booting with FDT, so we add the
relevant booting support as well as parameter parsing.

Signed-off-by: Binbin Zhou <zhoubinbin@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig             |  2 +
 arch/loongarch/include/asm/efi.h   |  1 +
 arch/loongarch/include/asm/setup.h |  1 +
 arch/loongarch/kernel/acpi.c       | 11 +++++-
 arch/loongarch/kernel/efi.c        | 15 +++++++-
 arch/loongarch/kernel/env.c        |  2 +
 arch/loongarch/kernel/numa.c       | 17 +++++++-
 arch/loongarch/kernel/setup.c      | 62 +++++++++++++++++++++++++++++-
 arch/loongarch/kernel/smp.c        | 34 ++++++++++++++++
 arch/loongarch/pci/acpi.c          |  7 +++-
 10 files changed, 145 insertions(+), 7 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0daf6263655b3..48db4b27b9afa 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -111,6 +111,8 @@ config LOONGARCH
 	select MODULES_USE_ELF_RELA if MODULES
 	select NEED_PER_CPU_EMBED_FIRST_CHUNK
 	select NEED_PER_CPU_PAGE_FIRST_CHUNK
+	select OF
+	select OF_EARLY_FLATTREE
 	select PCI
 	select PCI_DOMAINS_GENERIC
 	select PCI_ECAM if ACPI
diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h
index 174567b00ddb9..81e5d33718684 100644
--- a/arch/loongarch/include/asm/efi.h
+++ b/arch/loongarch/include/asm/efi.h
@@ -9,6 +9,7 @@
 
 void __init efi_init(void);
 void __init efi_runtime_init(void);
+void __init *efi_fdt_pointer(void);
 void efifb_setup_from_dmi(struct screen_info *si, const char *opt);
 
 #define ARCH_EFI_IRQ_FLAGS_MASK  0x00000004  /* Bit 2: CSR.CRMD.IE */
diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h
index ca373f8e3c4db..72ead58039f3e 100644
--- a/arch/loongarch/include/asm/setup.h
+++ b/arch/loongarch/include/asm/setup.h
@@ -13,6 +13,7 @@
 
 extern unsigned long eentry;
 extern unsigned long tlbrentry;
+extern char init_command_line[COMMAND_LINE_SIZE];
 extern void tlb_init(int cpu);
 extern void cpu_cache_init(void);
 extern void cache_error_setup(void);
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 8319cc4090090..5a63f85f57984 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -12,6 +12,7 @@
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/memblock.h>
+#include <linux/of_fdt.h>
 #include <linux/serial_core.h>
 #include <asm/io.h>
 #include <asm/numa.h>
@@ -145,14 +146,14 @@ void __init acpi_boot_table_init(void)
 	 * If acpi_disabled, bail out
 	 */
 	if (acpi_disabled)
-		return;
+		goto fdt_earlycon;
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
 	if (acpi_table_init()) {
 		disable_acpi();
-		return;
+		goto fdt_earlycon;
 	}
 
 	loongson_sysconf.boot_cpu_id = read_csr_cpuid();
@@ -164,6 +165,12 @@ void __init acpi_boot_table_init(void)
 
 	/* Do not enable ACPI SPCR console by default */
 	acpi_parse_spcr(earlycon_acpi_spcr_enable, false);
+
+	return;
+
+fdt_earlycon:
+	if (earlycon_acpi_spcr_enable)
+		early_init_dt_scan_chosen_stdout();
 }
 
 #ifdef CONFIG_ACPI_NUMA
diff --git a/arch/loongarch/kernel/efi.c b/arch/loongarch/kernel/efi.c
index a313299711339..ea485b0e1e7f5 100644
--- a/arch/loongarch/kernel/efi.c
+++ b/arch/loongarch/kernel/efi.c
@@ -28,16 +28,29 @@ static unsigned long efi_nr_tables;
 static unsigned long efi_config_table;
 
 static unsigned long __initdata boot_memmap = EFI_INVALID_TABLE_ADDR;
+static unsigned long __initdata fdt_pointer = EFI_INVALID_TABLE_ADDR;
 
 static efi_system_table_t *efi_systab;
 static efi_config_table_type_t arch_tables[] __initdata = {
 	{LINUX_EFI_BOOT_MEMMAP_GUID,	&boot_memmap,	"MEMMAP" },
+	{DEVICE_TREE_GUID,		&fdt_pointer,	"FDTPTR" },
 	{},
 };
 
+void __init *efi_fdt_pointer(void)
+{
+	if (!efi_systab)
+		return NULL;
+
+	if (fdt_pointer == EFI_INVALID_TABLE_ADDR)
+		return NULL;
+
+	return early_memremap_ro(fdt_pointer, SZ_64K);
+}
+
 void __init efi_runtime_init(void)
 {
-	if (!efi_enabled(EFI_BOOT))
+	if (!efi_enabled(EFI_BOOT) || !efi_systab->runtime)
 		return;
 
 	if (efi_runtime_disabled()) {
diff --git a/arch/loongarch/kernel/env.c b/arch/loongarch/kernel/env.c
index 6d56a463b091c..6b3bfb0092e60 100644
--- a/arch/loongarch/kernel/env.c
+++ b/arch/loongarch/kernel/env.c
@@ -11,6 +11,7 @@
 #include <asm/early_ioremap.h>
 #include <asm/bootinfo.h>
 #include <asm/loongson.h>
+#include <asm/setup.h>
 
 u64 efi_system_table;
 struct loongson_system_configuration loongson_sysconf;
@@ -27,6 +28,7 @@ void __init init_environ(void)
 		clear_bit(EFI_BOOT, &efi.flags);
 
 	strscpy(boot_command_line, cmdline, COMMAND_LINE_SIZE);
+	strscpy(init_command_line, cmdline, COMMAND_LINE_SIZE);
 	early_memunmap(cmdline, COMMAND_LINE_SIZE);
 
 	efi_system_table = fw_arg2;
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index a13f92593cfda..3019ca14c760b 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -388,6 +388,21 @@ static void __init numa_default_distance(void)
 	}
 }
 
+/*
+ * fake_numa_init() - For Non-ACPI systems
+ * Return: 0 on success, -errno on failure.
+ */
+static int __init fake_numa_init(void)
+{
+	phys_addr_t start = memblock_start_of_DRAM();
+	phys_addr_t end = memblock_end_of_DRAM() - 1;
+
+	node_set(0, numa_nodes_parsed);
+	pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
+
+	return numa_add_memblk(0, start, end + 1);
+}
+
 int __init init_numa_memory(void)
 {
 	int i;
@@ -404,7 +419,7 @@ int __init init_numa_memory(void)
 	memset(&numa_meminfo, 0, sizeof(numa_meminfo));
 
 	/* Parse SRAT and SLIT if provided by firmware. */
-	ret = acpi_numa_init();
+	ret = acpi_disabled ? fake_numa_init() : acpi_numa_init();
 	if (ret < 0)
 		return ret;
 
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index 53831bcb11ca3..f9f5a130710ca 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -28,6 +28,9 @@
 #include <linux/sizes.h>
 #include <linux/device.h>
 #include <linux/dma-map-ops.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
+#include <linux/of_address.h>
 #include <linux/swiotlb.h>
 
 #include <asm/addrspace.h>
@@ -69,6 +72,7 @@ static const char dmi_empty_string[] = "        ";
  *
  * These are initialized so they are in the .data section
  */
+char init_command_line[COMMAND_LINE_SIZE] __initdata;
 
 static int num_standard_resources;
 static struct resource *standard_resources;
@@ -253,6 +257,58 @@ static void __init arch_parse_crashkernel(void)
 #endif
 }
 
+static void __init fdt_setup(void)
+{
+#ifdef CONFIG_OF_EARLY_FLATTREE
+	void *fdt_pointer;
+
+	/* ACPI-based systems do not require parsing fdt */
+	if (acpi_os_get_root_pointer())
+		return;
+
+	/* Look for a device tree configuration table entry */
+	fdt_pointer = efi_fdt_pointer();
+	if (!fdt_pointer || fdt_check_header(fdt_pointer))
+		return;
+
+	early_init_dt_scan(fdt_pointer);
+	early_init_fdt_reserve_self();
+
+	max_low_pfn = PFN_PHYS(memblock_end_of_DRAM());
+#endif
+}
+
+static void __init bootcmdline_init(char **cmdline_p)
+{
+	/*
+	 * If CONFIG_CMDLINE_FORCE is enabled then initializing the command line
+	 * is trivial - we simply use the built-in command line unconditionally &
+	 * unmodified.
+	 */
+	if (IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+		strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+		goto out;
+	}
+
+#ifdef CONFIG_OF_FLATTREE
+	/*
+	 * If CONFIG_CMDLINE_BOOTLOADER is enabled and we are in FDT-based system,
+	 * the boot_command_line will be overwritten by early_init_dt_scan_chosen().
+	 * So we need to append init_command_line (the original copy of boot_command_line)
+	 * to boot_command_line.
+	 */
+	if (initial_boot_params) {
+		if (boot_command_line[0])
+			strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+
+		strlcat(boot_command_line, init_command_line, COMMAND_LINE_SIZE);
+	}
+#endif
+
+out:
+	*cmdline_p = boot_command_line;
+}
+
 void __init platform_init(void)
 {
 	arch_reserve_vmcore();
@@ -265,6 +321,7 @@ void __init platform_init(void)
 	acpi_gbl_use_default_register_widths = false;
 	acpi_boot_table_init();
 #endif
+	unflatten_and_copy_device_tree();
 
 #ifdef CONFIG_NUMA
 	init_numa_memory();
@@ -297,6 +354,8 @@ static void __init arch_mem_init(char **cmdline_p)
 
 	check_kernel_sections_mem();
 
+	early_init_fdt_scan_reserved_mem();
+
 	/*
 	 * In order to reduce the possibility of kernel panic when failed to
 	 * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate
@@ -422,12 +481,13 @@ static void __init prefill_possible_map(void)
 void __init setup_arch(char **cmdline_p)
 {
 	cpu_probe();
-	*cmdline_p = boot_command_line;
 
 	init_environ();
 	efi_init();
+	fdt_setup();
 	memblock_init();
 	pagetable_init();
+	bootcmdline_init(cmdline_p);
 	parse_early_param();
 	reserve_initrd_mem();
 
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 14508d429ffa3..b78816cf74ae5 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -180,8 +180,42 @@ irqreturn_t loongson_ipi_interrupt(int irq, void *dev)
 	return IRQ_HANDLED;
 }
 
+static void __init fdt_smp_setup(void)
+{
+#ifdef CONFIG_OF
+	unsigned int cpu, cpuid;
+	struct device_node *node = NULL;
+
+	for_each_of_cpu_node(node) {
+		if (!of_device_is_available(node))
+			continue;
+
+		cpuid = of_get_cpu_hwid(node, 0);
+		if (cpuid >= nr_cpu_ids)
+			continue;
+
+		if (cpuid == loongson_sysconf.boot_cpu_id) {
+			cpu = 0;
+			numa_add_cpu(cpu);
+		} else {
+			cpu = cpumask_next_zero(-1, cpu_present_mask);
+		}
+
+		num_processors++;
+		set_cpu_possible(cpu, true);
+		set_cpu_present(cpu, true);
+		__cpu_number_map[cpuid] = cpu;
+		__cpu_logical_map[cpu] = cpuid;
+	}
+
+	loongson_sysconf.nr_cpus = num_processors;
+#endif
+}
+
 void __init loongson_smp_setup(void)
 {
+	fdt_smp_setup();
+
 	cpu_data[0].core = cpu_logical_map(0) % loongson_sysconf.cores_per_package;
 	cpu_data[0].package = cpu_logical_map(0) / loongson_sysconf.cores_per_package;
 
diff --git a/arch/loongarch/pci/acpi.c b/arch/loongarch/pci/acpi.c
index 8235ec92b41fe..365f7de771cbb 100644
--- a/arch/loongarch/pci/acpi.c
+++ b/arch/loongarch/pci/acpi.c
@@ -26,9 +26,12 @@ void pcibios_add_bus(struct pci_bus *bus)
 
 int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge)
 {
-	struct pci_config_window *cfg = bridge->bus->sysdata;
-	struct acpi_device *adev = to_acpi_device(cfg->parent);
+	struct acpi_device *adev = NULL;
 	struct device *bus_dev = &bridge->bus->dev;
+	struct pci_config_window *cfg = bridge->bus->sysdata;
+
+	if (!acpi_disabled)
+		adev = to_acpi_device(cfg->parent);
 
 	ACPI_COMPANION_SET(&bridge->dev, adev);
 	set_dev_node(bus_dev, pa_to_nid(cfg->res.start));
-- 
GitLab


From 27cab431564edba9919d1a82c2d9636d622a2493 Mon Sep 17 00:00:00 2001
From: Binbin Zhou <zhoubinbin@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:05 +0800
Subject: [PATCH 3819/4122] LoongArch: Add processing ISA Node in DeviceTree

Similar to commit 6d0068ad15e4f771b3 ("MIPS: Loongson64: Process ISA
Node in DeviceTree"), we process ISA node in DeviceTree for FDT-based
systems.

Previously, we are hardcoding reserved ISA I/O Space in, now we are
processing it I/O via DeviceTree directly. The ranges property of ISA
node is used to determine the size and address of reserved I/O space.

Signed-off-by: Binbin Zhou <zhoubinbin@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/setup.c | 75 +++++++++++++++++++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index f9f5a130710ca..fdabf2ac1927d 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -429,6 +429,81 @@ static void __init resource_init(void)
 #endif
 }
 
+static int __init add_legacy_isa_io(struct fwnode_handle *fwnode,
+				resource_size_t hw_start, resource_size_t size)
+{
+	int ret = 0;
+	unsigned long vaddr;
+	struct logic_pio_hwaddr *range;
+
+	range = kzalloc(sizeof(*range), GFP_ATOMIC);
+	if (!range)
+		return -ENOMEM;
+
+	range->fwnode = fwnode;
+	range->size = size = round_up(size, PAGE_SIZE);
+	range->hw_start = hw_start;
+	range->flags = LOGIC_PIO_CPU_MMIO;
+
+	ret = logic_pio_register_range(range);
+	if (ret) {
+		kfree(range);
+		return ret;
+	}
+
+	/* Legacy ISA must placed at the start of PCI_IOBASE */
+	if (range->io_start != 0) {
+		logic_pio_unregister_range(range);
+		kfree(range);
+		return -EINVAL;
+	}
+
+	vaddr = (unsigned long)(PCI_IOBASE + range->io_start);
+	ioremap_page_range(vaddr, vaddr + size, hw_start, pgprot_device(PAGE_KERNEL));
+
+	return 0;
+}
+
+static __init int arch_reserve_pio_range(void)
+{
+	struct device_node *np;
+
+	for_each_node_by_name(np, "isa") {
+		struct of_range range;
+		struct of_range_parser parser;
+
+		pr_info("ISA Bridge: %pOF\n", np);
+
+		if (of_range_parser_init(&parser, np)) {
+			pr_info("Failed to parse resources.\n");
+			of_node_put(np);
+			break;
+		}
+
+		for_each_of_range(&parser, &range) {
+			switch (range.flags & IORESOURCE_TYPE_BITS) {
+			case IORESOURCE_IO:
+				pr_info(" IO 0x%016llx..0x%016llx  ->  0x%016llx\n",
+					range.cpu_addr,
+					range.cpu_addr + range.size - 1,
+					range.bus_addr);
+				if (add_legacy_isa_io(&np->fwnode, range.cpu_addr, range.size))
+					pr_warn("Failed to reserve legacy IO in Logic PIO\n");
+				break;
+			case IORESOURCE_MEM:
+				pr_info(" MEM 0x%016llx..0x%016llx  ->  0x%016llx\n",
+					range.cpu_addr,
+					range.cpu_addr + range.size - 1,
+					range.bus_addr);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+arch_initcall(arch_reserve_pio_range);
+
 static int __init reserve_memblock_reserved_regions(void)
 {
 	u64 i, j;
-- 
GitLab


From 366bb35a8e48198cefcd3484ac6b2374d1347873 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3820/4122] LoongArch: Add suspend (ACPI S3) support

Add suspend (Suspend To RAM, aka ACPI S3) support for LoongArch.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig                |  5 ++
 arch/loongarch/Makefile               |  3 +
 arch/loongarch/include/asm/acpi.h     | 10 +++
 arch/loongarch/include/asm/bootinfo.h |  1 +
 arch/loongarch/include/asm/loongson.h |  3 +
 arch/loongarch/include/asm/time.h     |  1 +
 arch/loongarch/kernel/acpi.c          |  6 ++
 arch/loongarch/kernel/smp.c           |  1 +
 arch/loongarch/kernel/time.c          | 11 +++-
 arch/loongarch/power/Makefile         |  3 +
 arch/loongarch/power/platform.c       | 57 +++++++++++++++++
 arch/loongarch/power/suspend.c        | 73 ++++++++++++++++++++++
 arch/loongarch/power/suspend_asm.S    | 89 +++++++++++++++++++++++++++
 13 files changed, 260 insertions(+), 3 deletions(-)
 create mode 100644 arch/loongarch/power/Makefile
 create mode 100644 arch/loongarch/power/platform.c
 create mode 100644 arch/loongarch/power/suspend.c
 create mode 100644 arch/loongarch/power/suspend_asm.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 48db4b27b9afa..0c8b2b1a96267 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -56,6 +56,7 @@ config LOONGARCH
 	select ARCH_WANTS_NO_INSTR
 	select BUILDTIME_TABLE_SORT
 	select COMMON_CLK
+	select CPU_PM
 	select EFI
 	select GENERIC_CLOCKEVENTS
 	select GENERIC_CMOS_UPDATE
@@ -517,6 +518,10 @@ config ARCH_MMAP_RND_BITS_MAX
 
 menu "Power management options"
 
+config ARCH_SUSPEND_POSSIBLE
+	def_bool y
+
+source "kernel/power/Kconfig"
 source "drivers/acpi/Kconfig"
 
 endmenu
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 01b57b7263225..5232d8c0f9caf 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -104,6 +104,9 @@ endif
 libs-y += arch/loongarch/lib/
 libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
 
+# suspend and hibernation support
+drivers-$(CONFIG_PM)	+= arch/loongarch/power/
+
 ifeq ($(KBUILD_EXTMOD),)
 prepare: vdso_prepare
 vdso_prepare: prepare0
diff --git a/arch/loongarch/include/asm/acpi.h b/arch/loongarch/include/asm/acpi.h
index 825c2519b9d1f..4198753aa1d0f 100644
--- a/arch/loongarch/include/asm/acpi.h
+++ b/arch/loongarch/include/asm/acpi.h
@@ -35,4 +35,14 @@ extern struct list_head acpi_wakeup_device_list;
 
 #define ACPI_TABLE_UPGRADE_MAX_PHYS ARCH_LOW_ADDRESS_LIMIT
 
+extern int loongarch_acpi_suspend(void);
+extern int (*acpi_suspend_lowlevel)(void);
+extern void loongarch_suspend_enter(void);
+
+static inline unsigned long acpi_get_wakeup_address(void)
+{
+	extern void loongarch_wakeup_start(void);
+	return (unsigned long)loongarch_wakeup_start;
+}
+
 #endif /* _ASM_LOONGARCH_ACPI_H */
diff --git a/arch/loongarch/include/asm/bootinfo.h b/arch/loongarch/include/asm/bootinfo.h
index ed0910e8b856b..0051b526ac6d3 100644
--- a/arch/loongarch/include/asm/bootinfo.h
+++ b/arch/loongarch/include/asm/bootinfo.h
@@ -32,6 +32,7 @@ struct loongson_system_configuration {
 	int cores_per_node;
 	int cores_per_package;
 	unsigned long cores_io_master;
+	unsigned long suspend_addr;
 	const char *cpuname;
 };
 
diff --git a/arch/loongarch/include/asm/loongson.h b/arch/loongarch/include/asm/loongson.h
index 00db93edae1ba..12494cffffd17 100644
--- a/arch/loongarch/include/asm/loongson.h
+++ b/arch/loongarch/include/asm/loongson.h
@@ -136,4 +136,7 @@ typedef enum {
 #define ls7a_writel(val, addr)	*(volatile unsigned int   *)TO_UNCACHE(addr) = (val)
 #define ls7a_writeq(val, addr)	*(volatile unsigned long  *)TO_UNCACHE(addr) = (val)
 
+void enable_gpe_wakeup(void);
+void enable_pci_wakeup(void);
+
 #endif /* __ASM_LOONGSON_H */
diff --git a/arch/loongarch/include/asm/time.h b/arch/loongarch/include/asm/time.h
index 2eae219301d0a..037a2d1b8ff4c 100644
--- a/arch/loongarch/include/asm/time.h
+++ b/arch/loongarch/include/asm/time.h
@@ -12,6 +12,7 @@
 extern u64 cpu_clock_freq;
 extern u64 const_clock_freq;
 
+extern void save_counter(void);
 extern void sync_counter(void);
 
 static inline unsigned int calc_const_freq(void)
diff --git a/arch/loongarch/kernel/acpi.c b/arch/loongarch/kernel/acpi.c
index 5a63f85f57984..98f431157e4c1 100644
--- a/arch/loongarch/kernel/acpi.c
+++ b/arch/loongarch/kernel/acpi.c
@@ -140,6 +140,12 @@ static void __init acpi_process_madt(void)
 	loongson_sysconf.nr_cpus = num_processors;
 }
 
+#ifndef CONFIG_SUSPEND
+int (*acpi_suspend_lowlevel)(void);
+#else
+int (*acpi_suspend_lowlevel)(void) = loongarch_acpi_suspend;
+#endif
+
 void __init acpi_boot_table_init(void)
 {
 	/*
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index b78816cf74ae5..8c6e227cb29df 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -16,6 +16,7 @@
 #include <linux/smp.h>
 #include <linux/threads.h>
 #include <linux/export.h>
+#include <linux/syscore_ops.h>
 #include <linux/time.h>
 #include <linux/tracepoint.h>
 #include <linux/sched/hotplug.h>
diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c
index 786735dcc8d67..a6576dea590c0 100644
--- a/arch/loongarch/kernel/time.c
+++ b/arch/loongarch/kernel/time.c
@@ -115,12 +115,17 @@ static unsigned long __init get_loops_per_jiffy(void)
 	return lpj;
 }
 
-static long init_timeval;
+static long init_offset __nosavedata;
+
+void save_counter(void)
+{
+	init_offset = drdtime();
+}
 
 void sync_counter(void)
 {
 	/* Ensure counter begin at 0 */
-	csr_write64(-init_timeval, LOONGARCH_CSR_CNTC);
+	csr_write64(init_offset, LOONGARCH_CSR_CNTC);
 }
 
 static int get_timer_irq(void)
@@ -219,7 +224,7 @@ void __init time_init(void)
 	else
 		const_clock_freq = calc_const_freq();
 
-	init_timeval = drdtime() - csr_read64(LOONGARCH_CSR_CNTC);
+	init_offset = -(drdtime() - csr_read64(LOONGARCH_CSR_CNTC));
 
 	constant_clockevent_init();
 	constant_clocksource_init();
diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile
new file mode 100644
index 0000000000000..6740117decaa2
--- /dev/null
+++ b/arch/loongarch/power/Makefile
@@ -0,0 +1,3 @@
+obj-y	+= platform.o
+
+obj-$(CONFIG_SUSPEND)		+= suspend.o suspend_asm.o
diff --git a/arch/loongarch/power/platform.c b/arch/loongarch/power/platform.c
new file mode 100644
index 0000000000000..3ea8e07aa225f
--- /dev/null
+++ b/arch/loongarch/power/platform.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/acpi.h>
+#include <linux/platform_device.h>
+
+#include <asm/bootinfo.h>
+#include <asm/loongson.h>
+
+void enable_gpe_wakeup(void)
+{
+	if (acpi_disabled)
+	       return;
+
+	if (acpi_gbl_reduced_hardware)
+	       return;
+
+	acpi_enable_all_wakeup_gpes();
+}
+
+void enable_pci_wakeup(void)
+{
+	if (acpi_disabled)
+	       return;
+
+	if (acpi_gbl_reduced_hardware)
+	       return;
+
+	acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_STATUS, 1);
+
+	if (acpi_gbl_FADT.flags & ACPI_FADT_PCI_EXPRESS_WAKE)
+		acpi_write_bit_register(ACPI_BITREG_PCIEXP_WAKE_DISABLE, 0);
+}
+
+static int __init loongson3_acpi_suspend_init(void)
+{
+#ifdef CONFIG_ACPI
+	acpi_status status;
+	uint64_t suspend_addr = 0;
+
+	if (acpi_disabled || acpi_gbl_reduced_hardware)
+		return 0;
+
+	acpi_write_bit_register(ACPI_BITREG_SCI_ENABLE, 1);
+	status = acpi_evaluate_integer(NULL, "\\SADR", NULL, &suspend_addr);
+	if (ACPI_FAILURE(status) || !suspend_addr) {
+		pr_err("ACPI S3 is not support!\n");
+		return -1;
+	}
+	loongson_sysconf.suspend_addr = (u64)phys_to_virt(PHYSADDR(suspend_addr));
+#endif
+	return 0;
+}
+
+device_initcall(loongson3_acpi_suspend_init);
diff --git a/arch/loongarch/power/suspend.c b/arch/loongarch/power/suspend.c
new file mode 100644
index 0000000000000..5e19733e5e05f
--- /dev/null
+++ b/arch/loongarch/power/suspend.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * loongson-specific suspend support
+ *
+ * Author: Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/acpi.h>
+#include <linux/pm.h>
+#include <linux/suspend.h>
+
+#include <asm/loongarch.h>
+#include <asm/loongson.h>
+#include <asm/setup.h>
+#include <asm/time.h>
+#include <asm/tlbflush.h>
+
+u64 loongarch_suspend_addr;
+
+struct saved_registers {
+	u32 ecfg;
+	u32 euen;
+	u64 pgd;
+	u64 kpgd;
+	u32 pwctl0;
+	u32 pwctl1;
+};
+static struct saved_registers saved_regs;
+
+static void arch_common_suspend(void)
+{
+	save_counter();
+	saved_regs.pgd = csr_read64(LOONGARCH_CSR_PGDL);
+	saved_regs.kpgd = csr_read64(LOONGARCH_CSR_PGDH);
+	saved_regs.pwctl0 = csr_read32(LOONGARCH_CSR_PWCTL0);
+	saved_regs.pwctl1 = csr_read32(LOONGARCH_CSR_PWCTL1);
+	saved_regs.ecfg = csr_read32(LOONGARCH_CSR_ECFG);
+	saved_regs.euen = csr_read32(LOONGARCH_CSR_EUEN);
+
+	loongarch_suspend_addr = loongson_sysconf.suspend_addr;
+}
+
+static void arch_common_resume(void)
+{
+	sync_counter();
+	local_flush_tlb_all();
+	csr_write64(per_cpu_offset(0), PERCPU_BASE_KS);
+	csr_write64(eentry, LOONGARCH_CSR_EENTRY);
+	csr_write64(eentry, LOONGARCH_CSR_MERRENTRY);
+	csr_write64(tlbrentry, LOONGARCH_CSR_TLBRENTRY);
+
+	csr_write64(saved_regs.pgd, LOONGARCH_CSR_PGDL);
+	csr_write64(saved_regs.kpgd, LOONGARCH_CSR_PGDH);
+	csr_write32(saved_regs.pwctl0, LOONGARCH_CSR_PWCTL0);
+	csr_write32(saved_regs.pwctl1, LOONGARCH_CSR_PWCTL1);
+	csr_write32(saved_regs.ecfg, LOONGARCH_CSR_ECFG);
+	csr_write32(saved_regs.euen, LOONGARCH_CSR_EUEN);
+}
+
+int loongarch_acpi_suspend(void)
+{
+	enable_gpe_wakeup();
+	enable_pci_wakeup();
+
+	arch_common_suspend();
+
+	/* processor specific suspend */
+	loongarch_suspend_enter();
+
+	arch_common_resume();
+
+	return 0;
+}
diff --git a/arch/loongarch/power/suspend_asm.S b/arch/loongarch/power/suspend_asm.S
new file mode 100644
index 0000000000000..eb2675642f9f4
--- /dev/null
+++ b/arch/loongarch/power/suspend_asm.S
@@ -0,0 +1,89 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Sleep helper for Loongson-3 sleep mode.
+ *
+ * Author: Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/asm.h>
+#include <asm/asmmacro.h>
+#include <asm/addrspace.h>
+#include <asm/loongarch.h>
+#include <asm/stackframe.h>
+
+/* preparatory stuff */
+.macro	SETUP_SLEEP
+	addi.d		sp, sp, -PT_SIZE
+	st.d		$r1, sp, PT_R1
+	st.d		$r2, sp, PT_R2
+	st.d		$r3, sp, PT_R3
+	st.d		$r4, sp, PT_R4
+	st.d		$r21, sp, PT_R21
+	st.d		$r22, sp, PT_R22
+	st.d		$r23, sp, PT_R23
+	st.d		$r24, sp, PT_R24
+	st.d		$r25, sp, PT_R25
+	st.d		$r26, sp, PT_R26
+	st.d		$r27, sp, PT_R27
+	st.d		$r28, sp, PT_R28
+	st.d		$r29, sp, PT_R29
+	st.d		$r30, sp, PT_R30
+	st.d		$r31, sp, PT_R31
+
+	la.pcrel	t0, acpi_saved_sp
+	st.d		sp, t0, 0
+.endm
+
+.macro SETUP_WAKEUP
+	ld.d		$r1, sp, PT_R1
+	ld.d		$r2, sp, PT_R2
+	ld.d		$r3, sp, PT_R3
+	ld.d		$r4, sp, PT_R4
+	ld.d		$r21, sp, PT_R21
+	ld.d		$r22, sp, PT_R22
+	ld.d		$r23, sp, PT_R23
+	ld.d		$r24, sp, PT_R24
+	ld.d		$r25, sp, PT_R25
+	ld.d		$r26, sp, PT_R26
+	ld.d		$r27, sp, PT_R27
+	ld.d		$r28, sp, PT_R28
+	ld.d		$r29, sp, PT_R29
+	ld.d		$r30, sp, PT_R30
+	ld.d		$r31, sp, PT_R31
+.endm
+
+	.text
+	.align 12
+
+/* Sleep/wakeup code for Loongson-3 */
+SYM_FUNC_START(loongarch_suspend_enter)
+	SETUP_SLEEP
+	bl		__flush_cache_all
+
+	/* Pass RA and SP to BIOS */
+	addi.d		a1, sp, 0
+	la.pcrel	a0, loongarch_wakeup_start
+	la.pcrel	t0, loongarch_suspend_addr
+	ld.d		t0, t0, 0
+	jirl		a0, t0, 0 /* Call BIOS's STR sleep routine */
+
+	/*
+	 * This is where we return upon wakeup.
+	 * Reload all of the registers and return.
+	 */
+SYM_INNER_LABEL(loongarch_wakeup_start, SYM_L_GLOBAL)
+	li.d		t0, CSR_DMW0_INIT	# UC, PLV0
+	csrwr		t0, LOONGARCH_CSR_DMWIN0
+	li.d		t0, CSR_DMW1_INIT	# CA, PLV0
+	csrwr		t0, LOONGARCH_CSR_DMWIN1
+
+	la.abs		t0, 0f
+	jr		t0
+0:
+	la.pcrel	t0, acpi_saved_sp
+	ld.d		sp, t0, 0
+	SETUP_WAKEUP
+	addi.d		sp, sp, PT_SIZE
+	jr		ra
+SYM_FUNC_END(loongarch_suspend_enter)
-- 
GitLab


From 7db54bfe44a662c8f2c10277bccfa02c2f4c719c Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3821/4122] LoongArch: Add hibernation (ACPI S4) support

Add hibernation (Suspend to Disk, aka ACPI S4) support for LoongArch.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig               |  3 ++
 arch/loongarch/kernel/asm-offsets.c  | 12 +++++
 arch/loongarch/kernel/reset.c        |  5 +++
 arch/loongarch/kernel/setup.c        |  5 +++
 arch/loongarch/power/Makefile        |  1 +
 arch/loongarch/power/hibernate.c     | 62 ++++++++++++++++++++++++++
 arch/loongarch/power/hibernate_asm.S | 66 ++++++++++++++++++++++++++++
 7 files changed, 154 insertions(+)
 create mode 100644 arch/loongarch/power/hibernate.c
 create mode 100644 arch/loongarch/power/hibernate_asm.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 0c8b2b1a96267..576a649ac13c7 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -521,6 +521,9 @@ menu "Power management options"
 config ARCH_SUSPEND_POSSIBLE
 	def_bool y
 
+config ARCH_HIBERNATION_POSSIBLE
+	def_bool y
+
 source "kernel/power/Kconfig"
 source "drivers/acpi/Kconfig"
 
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index bdd88eda9513f..4ef494577813e 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -257,3 +257,15 @@ void output_smpboot_defines(void)
 	BLANK();
 }
 #endif
+
+#ifdef CONFIG_HIBERNATION
+void output_pbe_defines(void)
+{
+	COMMENT(" Linux struct pbe offsets. ");
+	OFFSET(PBE_ADDRESS, pbe, address);
+	OFFSET(PBE_ORIG_ADDRESS, pbe, orig_address);
+	OFFSET(PBE_NEXT, pbe, next);
+	DEFINE(PBE_SIZE, sizeof(struct pbe));
+	BLANK();
+}
+#endif
diff --git a/arch/loongarch/kernel/reset.c b/arch/loongarch/kernel/reset.c
index 8c82021eb2f44..1ef8c63835351 100644
--- a/arch/loongarch/kernel/reset.c
+++ b/arch/loongarch/kernel/reset.c
@@ -15,6 +15,7 @@
 #include <acpi/reboot.h>
 #include <asm/idle.h>
 #include <asm/loongarch.h>
+#include <asm/loongson.h>
 
 void (*pm_power_off)(void);
 EXPORT_SYMBOL(pm_power_off);
@@ -41,6 +42,10 @@ void machine_power_off(void)
 #ifdef CONFIG_SMP
 	preempt_disable();
 	smp_send_stop();
+#endif
+#ifdef CONFIG_PM
+	if (!acpi_disabled)
+		enable_pci_wakeup();
 #endif
 	do_kernel_power_off();
 #ifdef CONFIG_EFI
diff --git a/arch/loongarch/kernel/setup.c b/arch/loongarch/kernel/setup.c
index fdabf2ac1927d..4344502c0b317 100644
--- a/arch/loongarch/kernel/setup.c
+++ b/arch/loongarch/kernel/setup.c
@@ -31,6 +31,7 @@
 #include <linux/libfdt.h>
 #include <linux/of_fdt.h>
 #include <linux/of_address.h>
+#include <linux/suspend.h>
 #include <linux/swiotlb.h>
 
 #include <asm/addrspace.h>
@@ -370,6 +371,10 @@ static void __init arch_mem_init(char **cmdline_p)
 
 	dma_contiguous_reserve(PFN_PHYS(max_low_pfn));
 
+	/* Reserve for hibernation. */
+	register_nosave_region(PFN_DOWN(__pa_symbol(&__nosave_begin)),
+				   PFN_UP(__pa_symbol(&__nosave_end)));
+
 	memblock_dump_all();
 
 	early_memtest(PFN_PHYS(ARCH_PFN_OFFSET), PFN_PHYS(max_low_pfn));
diff --git a/arch/loongarch/power/Makefile b/arch/loongarch/power/Makefile
index 6740117decaa2..58151d003e40e 100644
--- a/arch/loongarch/power/Makefile
+++ b/arch/loongarch/power/Makefile
@@ -1,3 +1,4 @@
 obj-y	+= platform.o
 
 obj-$(CONFIG_SUSPEND)		+= suspend.o suspend_asm.o
+obj-$(CONFIG_HIBERNATION)	+= hibernate.o hibernate_asm.o
diff --git a/arch/loongarch/power/hibernate.c b/arch/loongarch/power/hibernate.c
new file mode 100644
index 0000000000000..1e0590542f987
--- /dev/null
+++ b/arch/loongarch/power/hibernate.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <asm/fpu.h>
+#include <asm/loongson.h>
+#include <asm/sections.h>
+#include <asm/tlbflush.h>
+#include <linux/suspend.h>
+
+static u32 saved_crmd;
+static u32 saved_prmd;
+static u32 saved_euen;
+static u32 saved_ecfg;
+static u64 saved_pcpu_base;
+struct pt_regs saved_regs;
+
+void save_processor_state(void)
+{
+	saved_crmd = csr_read32(LOONGARCH_CSR_CRMD);
+	saved_prmd = csr_read32(LOONGARCH_CSR_PRMD);
+	saved_euen = csr_read32(LOONGARCH_CSR_EUEN);
+	saved_ecfg = csr_read32(LOONGARCH_CSR_ECFG);
+	saved_pcpu_base = csr_read64(PERCPU_BASE_KS);
+
+	if (is_fpu_owner())
+		save_fp(current);
+}
+
+void restore_processor_state(void)
+{
+	csr_write32(saved_crmd, LOONGARCH_CSR_CRMD);
+	csr_write32(saved_prmd, LOONGARCH_CSR_PRMD);
+	csr_write32(saved_euen, LOONGARCH_CSR_EUEN);
+	csr_write32(saved_ecfg, LOONGARCH_CSR_ECFG);
+	csr_write64(saved_pcpu_base, PERCPU_BASE_KS);
+
+	if (is_fpu_owner())
+		restore_fp(current);
+}
+
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn = PFN_DOWN(__pa(&__nosave_begin));
+	unsigned long nosave_end_pfn = PFN_UP(__pa(&__nosave_end));
+
+	return	(pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
+}
+
+extern int swsusp_asm_suspend(void);
+
+int swsusp_arch_suspend(void)
+{
+	enable_pci_wakeup();
+	return swsusp_asm_suspend();
+}
+
+extern int swsusp_asm_resume(void);
+
+int swsusp_arch_resume(void)
+{
+	/* Avoid TLB mismatch during and after kernel resume */
+	local_flush_tlb_all();
+	return swsusp_asm_resume();
+}
diff --git a/arch/loongarch/power/hibernate_asm.S b/arch/loongarch/power/hibernate_asm.S
new file mode 100644
index 0000000000000..3c747c08d65dc
--- /dev/null
+++ b/arch/loongarch/power/hibernate_asm.S
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Hibernation support specific for LoongArch
+ *
+ * Author: Huacai Chen <chenhuacai@loongson.cn>
+ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
+ */
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/regdef.h>
+
+.text
+SYM_FUNC_START(swsusp_asm_suspend)
+	la.pcrel	t0, saved_regs
+	PTR_S		ra, t0, PT_R1
+	PTR_S		tp, t0, PT_R2
+	PTR_S		sp, t0, PT_R3
+	PTR_S		u0, t0, PT_R21
+	PTR_S		fp, t0, PT_R22
+	PTR_S		s0, t0, PT_R23
+	PTR_S		s1, t0, PT_R24
+	PTR_S		s2, t0, PT_R25
+	PTR_S		s3, t0, PT_R26
+	PTR_S		s4, t0, PT_R27
+	PTR_S		s5, t0, PT_R28
+	PTR_S		s6, t0, PT_R29
+	PTR_S		s7, t0, PT_R30
+	PTR_S		s8, t0, PT_R31
+	b		swsusp_save
+SYM_FUNC_END(swsusp_asm_suspend)
+
+SYM_FUNC_START(swsusp_asm_resume)
+	la.pcrel	t0, restore_pblist
+	PTR_L		t0, t0, 0
+0:
+	PTR_L		t1, t0, PBE_ADDRESS  /* source */
+	PTR_L		t2, t0, PBE_ORIG_ADDRESS /* destination */
+	PTR_LI		t3, _PAGE_SIZE
+	PTR_ADD		t3, t3, t1
+1:
+	REG_L		t8, t1, 0
+	REG_S		t8, t2, 0
+	PTR_ADDI	t1, t1, SZREG
+	PTR_ADDI	t2, t2, SZREG
+	bne		t1, t3, 1b
+	PTR_L		t0, t0, PBE_NEXT
+	bnez		t0, 0b
+	la.pcrel	t0, saved_regs
+	PTR_L		ra, t0, PT_R1
+	PTR_L		tp, t0, PT_R2
+	PTR_L		sp, t0, PT_R3
+	PTR_L		u0, t0, PT_R21
+	PTR_L		fp, t0, PT_R22
+	PTR_L		s0, t0, PT_R23
+	PTR_L		s1, t0, PT_R24
+	PTR_L		s2, t0, PT_R25
+	PTR_L		s3, t0, PT_R26
+	PTR_L		s4, t0, PT_R27
+	PTR_L		s5, t0, PT_R28
+	PTR_L		s6, t0, PT_R29
+	PTR_L		s7, t0, PT_R30
+	PTR_L		s8, t0, PT_R31
+	PTR_LI		a0, 0x0
+	jirl		zero, ra, 0
+SYM_FUNC_END(swsusp_asm_resume)
-- 
GitLab


From 09f33601bf940f955c10a6e75a1c1b7bcadee5e2 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3822/4122] LoongArch: Add basic STACKPROTECTOR support

Add basic stack protector support similar to other architectures. A
constant canary value is set at boot time, and with help of compiler's
-fstack-protector we can detect stack corruption.

Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig                      |  1 +
 arch/loongarch/include/asm/stackprotector.h | 38 +++++++++++++++++++++
 arch/loongarch/kernel/asm-offsets.c         |  3 ++
 arch/loongarch/kernel/process.c             |  6 ++++
 arch/loongarch/kernel/switch.S              |  5 +++
 5 files changed, 53 insertions(+)
 create mode 100644 arch/loongarch/include/asm/stackprotector.h

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 576a649ac13c7..28d827c6abb3b 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -103,6 +103,7 @@ config LOONGARCH
 	select HAVE_REGS_AND_STACK_ACCESS_API
 	select HAVE_RSEQ
 	select HAVE_SETUP_PER_CPU_AREA if NUMA
+	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
 	select HAVE_TIF_NOHZ
 	select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP
diff --git a/arch/loongarch/include/asm/stackprotector.h b/arch/loongarch/include/asm/stackprotector.h
new file mode 100644
index 0000000000000..a1a965751a7b9
--- /dev/null
+++ b/arch/loongarch/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ * Stack protector works by putting predefined pattern at the start of
+ * the stack frame and verifying that it hasn't been overwritten when
+ * returning from the function. The pattern is called stack canary and
+ * on LoongArch gcc expects it to be defined by a global variable called
+ * "__stack_chk_guard".
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+
+extern unsigned long __stack_chk_guard;
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+	unsigned long canary;
+
+	/* Try to get a semi random initial value. */
+	get_random_bytes(&canary, sizeof(canary));
+	canary ^= LINUX_VERSION_CODE;
+
+	current->stack_canary = canary;
+	__stack_chk_guard = current->stack_canary;
+}
+
+#endif	/* _ASM_STACKPROTECTOR_H */
diff --git a/arch/loongarch/kernel/asm-offsets.c b/arch/loongarch/kernel/asm-offsets.c
index 4ef494577813e..4bdb203fc66e1 100644
--- a/arch/loongarch/kernel/asm-offsets.c
+++ b/arch/loongarch/kernel/asm-offsets.c
@@ -68,6 +68,9 @@ void output_task_defines(void)
 	OFFSET(TASK_FLAGS, task_struct, flags);
 	OFFSET(TASK_MM, task_struct, mm);
 	OFFSET(TASK_PID, task_struct, pid);
+#if defined(CONFIG_STACKPROTECTOR)
+	OFFSET(TASK_STACK_CANARY, task_struct, stack_canary);
+#endif
 	DEFINE(TASK_STRUCT_SIZE, sizeof(struct task_struct));
 	BLANK();
 }
diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c
index ddb8ba4eb399d..790cc14c5f06a 100644
--- a/arch/loongarch/kernel/process.c
+++ b/arch/loongarch/kernel/process.c
@@ -47,6 +47,12 @@
 #include <asm/unwind.h>
 #include <asm/vdso.h>
 
+#ifdef CONFIG_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
 /*
  * Idle related variables and functions
  */
diff --git a/arch/loongarch/kernel/switch.S b/arch/loongarch/kernel/switch.S
index 202a163cb32f6..31dd8199b2453 100644
--- a/arch/loongarch/kernel/switch.S
+++ b/arch/loongarch/kernel/switch.S
@@ -23,6 +23,11 @@ SYM_FUNC_START(__switch_to)
 	stptr.d	ra, a0, THREAD_REG01
 	stptr.d a3, a0, THREAD_SCHED_RA
 	stptr.d a4, a0, THREAD_SCHED_CFA
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_SMP)
+	la	t7, __stack_chk_guard
+	LONG_L	t8, a1, TASK_STACK_CANARY
+	LONG_S	t8, t7, 0
+#endif
 	move	tp, a2
 	cpu_restore_nonscratch a1
 
-- 
GitLab


From 9151dde40356880bb445f719f5ebbb1319054d5f Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3823/4122] LoongArch: module: Use got/plt section indices for
 relocations

Instead of saving a pointer to the .got, .plt and .plt_idx sections to
apply {got,plt}-based relocations, save and use their section indices
instead.

The mod->arch.{core,init}.{got,plt} pointers were problematic for live-
patch because they pointed within temporary section headers (provided by
the module loader via info->sechdrs) that would be freed after module
load. Since livepatch modules may need to apply relocations post-module-
load (for example, to patch a module that is loaded later), using section
indices to offset into the section headers (instead of accessing them
through a saved pointer) allows livepatch modules on LoongArch to pass
in their own copy of the section headers to apply_relocate_add() to
apply delayed relocations.

The method used is same as commit c8ebf64eab743 ("arm64/module: use plt
section indices for relocations").

Signed-off-by: Hongchen Zhang <zhanghongchen@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/module.h     | 22 +++++-----
 arch/loongarch/kernel/module-sections.c | 54 +++++++++++++------------
 arch/loongarch/kernel/module.c          | 39 ++++++++++++------
 3 files changed, 68 insertions(+), 47 deletions(-)

diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index b29b19a46f427..60dc62a1146ee 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -11,7 +11,7 @@
 #define RELA_STACK_DEPTH 16
 
 struct mod_section {
-	Elf_Shdr *shdr;
+	int shndx;
 	int num_entries;
 	int max_entries;
 };
@@ -37,8 +37,8 @@ struct plt_idx_entry {
 	Elf_Addr symbol_addr;
 };
 
-Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val);
-Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val);
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val);
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val);
 
 static inline struct got_entry emit_got_entry(Elf_Addr val)
 {
@@ -62,10 +62,10 @@ static inline struct plt_idx_entry emit_plt_idx_entry(unsigned long val)
 	return (struct plt_idx_entry) { val };
 }
 
-static inline int get_plt_idx(unsigned long val, const struct mod_section *sec)
+static inline int get_plt_idx(unsigned long val, Elf_Shdr *sechdrs, const struct mod_section *sec)
 {
 	int i;
-	struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sec->shdr->sh_addr;
+	struct plt_idx_entry *plt_idx = (struct plt_idx_entry *)sechdrs[sec->shndx].sh_addr;
 
 	for (i = 0; i < sec->num_entries; i++) {
 		if (plt_idx[i].symbol_addr == val)
@@ -76,11 +76,12 @@ static inline int get_plt_idx(unsigned long val, const struct mod_section *sec)
 }
 
 static inline struct plt_entry *get_plt_entry(unsigned long val,
-				      const struct mod_section *sec_plt,
-				      const struct mod_section *sec_plt_idx)
+					      Elf_Shdr *sechdrs,
+					      const struct mod_section *sec_plt,
+					      const struct mod_section *sec_plt_idx)
 {
-	int plt_idx = get_plt_idx(val, sec_plt_idx);
-	struct plt_entry *plt = (struct plt_entry *)sec_plt->shdr->sh_addr;
+	int plt_idx = get_plt_idx(val, sechdrs, sec_plt_idx);
+	struct plt_entry *plt = (struct plt_entry *)sechdrs[sec_plt->shndx].sh_addr;
 
 	if (plt_idx < 0)
 		return NULL;
@@ -89,10 +90,11 @@ static inline struct plt_entry *get_plt_entry(unsigned long val,
 }
 
 static inline struct got_entry *get_got_entry(Elf_Addr val,
+					      Elf_Shdr *sechdrs,
 					      const struct mod_section *sec)
 {
-	struct got_entry *got = (struct got_entry *)sec->shdr->sh_addr;
 	int i;
+	struct got_entry *got = (struct got_entry *)sechdrs[sec->shndx].sh_addr;
 
 	for (i = 0; i < sec->num_entries; i++)
 		if (got[i].symbol_addr == val)
diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
index d296a70b758fd..13d9a427325aa 100644
--- a/arch/loongarch/kernel/module-sections.c
+++ b/arch/loongarch/kernel/module-sections.c
@@ -7,17 +7,17 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 
-Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val)
+Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val)
 {
 	struct mod_section *got_sec = &mod->arch.got;
 	int i = got_sec->num_entries;
-	struct got_entry *got = get_got_entry(val, got_sec);
+	struct got_entry *got = get_got_entry(val, sechdrs, got_sec);
 
 	if (got)
 		return (Elf_Addr)got;
 
 	/* There is no GOT entry for val yet, create a new one. */
-	got = (struct got_entry *)got_sec->shdr->sh_addr;
+	got = (struct got_entry *)sechdrs[got_sec->shndx].sh_addr;
 	got[i] = emit_got_entry(val);
 
 	got_sec->num_entries++;
@@ -33,12 +33,12 @@ Elf_Addr module_emit_got_entry(struct module *mod, Elf_Addr val)
 	return (Elf_Addr)&got[i];
 }
 
-Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val)
+Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val)
 {
 	int nr;
 	struct mod_section *plt_sec = &mod->arch.plt;
 	struct mod_section *plt_idx_sec = &mod->arch.plt_idx;
-	struct plt_entry *plt = get_plt_entry(val, plt_sec, plt_idx_sec);
+	struct plt_entry *plt = get_plt_entry(val, sechdrs, plt_sec, plt_idx_sec);
 	struct plt_idx_entry *plt_idx;
 
 	if (plt)
@@ -47,9 +47,9 @@ Elf_Addr module_emit_plt_entry(struct module *mod, Elf_Addr val)
 	nr = plt_sec->num_entries;
 
 	/* There is no duplicate entry, create a new one */
-	plt = (struct plt_entry *)plt_sec->shdr->sh_addr;
+	plt = (struct plt_entry *)sechdrs[plt_sec->shndx].sh_addr;
 	plt[nr] = emit_plt_entry(val);
-	plt_idx = (struct plt_idx_entry *)plt_idx_sec->shdr->sh_addr;
+	plt_idx = (struct plt_idx_entry *)sechdrs[plt_idx_sec->shndx].sh_addr;
 	plt_idx[nr] = emit_plt_idx_entry(val);
 
 	plt_sec->num_entries++;
@@ -103,28 +103,29 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
 	unsigned int i, num_plts = 0, num_gots = 0;
+	Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec;
 
 	/*
 	 * Find the empty .plt sections.
 	 */
 	for (i = 0; i < ehdr->e_shnum; i++) {
 		if (!strcmp(secstrings + sechdrs[i].sh_name, ".got"))
-			mod->arch.got.shdr = sechdrs + i;
+			mod->arch.got.shndx = i;
 		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt"))
-			mod->arch.plt.shdr = sechdrs + i;
+			mod->arch.plt.shndx = i;
 		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx"))
-			mod->arch.plt_idx.shdr = sechdrs + i;
+			mod->arch.plt_idx.shndx = i;
 	}
 
-	if (!mod->arch.got.shdr) {
+	if (!mod->arch.got.shndx) {
 		pr_err("%s: module GOT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
-	if (!mod->arch.plt.shdr) {
+	if (!mod->arch.plt.shndx) {
 		pr_err("%s: module PLT section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
-	if (!mod->arch.plt_idx.shdr) {
+	if (!mod->arch.plt_idx.shndx) {
 		pr_err("%s: module PLT.IDX section(s) missing\n", mod->name);
 		return -ENOEXEC;
 	}
@@ -145,24 +146,27 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 		count_max_entries(relas, num_rela, &num_plts, &num_gots);
 	}
 
-	mod->arch.got.shdr->sh_type = SHT_NOBITS;
-	mod->arch.got.shdr->sh_flags = SHF_ALLOC;
-	mod->arch.got.shdr->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.got.shdr->sh_size = (num_gots + 1) * sizeof(struct got_entry);
+	got_sec = sechdrs + mod->arch.got.shndx;
+	got_sec->sh_type = SHT_NOBITS;
+	got_sec->sh_flags = SHF_ALLOC;
+	got_sec->sh_addralign = L1_CACHE_BYTES;
+	got_sec->sh_size = (num_gots + 1) * sizeof(struct got_entry);
 	mod->arch.got.num_entries = 0;
 	mod->arch.got.max_entries = num_gots;
 
-	mod->arch.plt.shdr->sh_type = SHT_NOBITS;
-	mod->arch.plt.shdr->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
-	mod->arch.plt.shdr->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.plt.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_entry);
+	plt_sec = sechdrs + mod->arch.plt.shndx;
+	plt_sec->sh_type = SHT_NOBITS;
+	plt_sec->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+	plt_sec->sh_addralign = L1_CACHE_BYTES;
+	plt_sec->sh_size = (num_plts + 1) * sizeof(struct plt_entry);
 	mod->arch.plt.num_entries = 0;
 	mod->arch.plt.max_entries = num_plts;
 
-	mod->arch.plt_idx.shdr->sh_type = SHT_NOBITS;
-	mod->arch.plt_idx.shdr->sh_flags = SHF_ALLOC;
-	mod->arch.plt_idx.shdr->sh_addralign = L1_CACHE_BYTES;
-	mod->arch.plt_idx.shdr->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry);
+	plt_idx_sec = sechdrs + mod->arch.plt_idx.shndx;
+	plt_idx_sec->sh_type = SHT_NOBITS;
+	plt_idx_sec->sh_flags = SHF_ALLOC;
+	plt_idx_sec->sh_addralign = L1_CACHE_BYTES;
+	plt_idx_sec->sh_size = (num_plts + 1) * sizeof(struct plt_idx_entry);
 	mod->arch.plt_idx.num_entries = 0;
 	mod->arch.plt_idx.max_entries = num_plts;
 
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 825fcf77f9e73..899dc677cec3e 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -99,16 +99,17 @@ static int apply_r_larch_sop_push_dup(struct module *mod, u32 *location, Elf_Add
 	return 0;
 }
 
-static int apply_r_larch_sop_push_plt_pcrel(struct module *mod, u32 *location, Elf_Addr v,
+static int apply_r_larch_sop_push_plt_pcrel(struct module *mod,
+			Elf_Shdr *sechdrs, u32 *location, Elf_Addr v,
 			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
 {
 	ptrdiff_t offset = (void *)v - (void *)location;
 
 	if (offset >= SZ_128M)
-		v = module_emit_plt_entry(mod, v);
+		v = module_emit_plt_entry(mod, sechdrs, v);
 
 	if (offset < -SZ_128M)
-		v = module_emit_plt_entry(mod, v);
+		v = module_emit_plt_entry(mod, sechdrs, v);
 
 	return apply_r_larch_sop_push_pcrel(mod, location, v, rela_stack, rela_stack_top, type);
 }
@@ -272,17 +273,18 @@ static int apply_r_larch_add_sub(struct module *mod, u32 *location, Elf_Addr v,
 	}
 }
 
-static int apply_r_larch_b26(struct module *mod, u32 *location, Elf_Addr v,
+static int apply_r_larch_b26(struct module *mod,
+			Elf_Shdr *sechdrs, u32 *location, Elf_Addr v,
 			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
 {
 	ptrdiff_t offset = (void *)v - (void *)location;
 	union loongarch_instruction *insn = (union loongarch_instruction *)location;
 
 	if (offset >= SZ_128M)
-		v = module_emit_plt_entry(mod, v);
+		v = module_emit_plt_entry(mod, sechdrs, v);
 
 	if (offset < -SZ_128M)
-		v = module_emit_plt_entry(mod, v);
+		v = module_emit_plt_entry(mod, sechdrs, v);
 
 	offset = (void *)v - (void *)location;
 
@@ -339,10 +341,11 @@ static int apply_r_larch_pcala(struct module *mod, u32 *location, Elf_Addr v,
 	return 0;
 }
 
-static int apply_r_larch_got_pc(struct module *mod, u32 *location, Elf_Addr v,
+static int apply_r_larch_got_pc(struct module *mod,
+			Elf_Shdr *sechdrs, u32 *location, Elf_Addr v,
 			s64 *rela_stack, size_t *rela_stack_top, unsigned int type)
 {
-	Elf_Addr got = module_emit_got_entry(mod, v);
+	Elf_Addr got = module_emit_got_entry(mod, sechdrs, v);
 
 	if (!got)
 		return -EINVAL;
@@ -387,13 +390,10 @@ static reloc_rela_handler reloc_rela_handlers[] = {
 	[R_LARCH_SOP_PUSH_PCREL]			     = apply_r_larch_sop_push_pcrel,
 	[R_LARCH_SOP_PUSH_ABSOLUTE]			     = apply_r_larch_sop_push_absolute,
 	[R_LARCH_SOP_PUSH_DUP]				     = apply_r_larch_sop_push_dup,
-	[R_LARCH_SOP_PUSH_PLT_PCREL]			     = apply_r_larch_sop_push_plt_pcrel,
 	[R_LARCH_SOP_SUB ... R_LARCH_SOP_IF_ELSE] 	     = apply_r_larch_sop,
 	[R_LARCH_SOP_POP_32_S_10_5 ... R_LARCH_SOP_POP_32_U] = apply_r_larch_sop_imm_field,
 	[R_LARCH_ADD32 ... R_LARCH_SUB64]		     = apply_r_larch_add_sub,
-	[R_LARCH_B26]					     = apply_r_larch_b26,
 	[R_LARCH_PCALA_HI20...R_LARCH_PCALA64_HI12]	     = apply_r_larch_pcala,
-	[R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12]	     = apply_r_larch_got_pc,
 };
 
 int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
@@ -444,7 +444,22 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab,
 		       sym->st_value, rel[i].r_addend, (u64)location);
 
 		v = sym->st_value + rel[i].r_addend;
-		err = handler(mod, location, v, rela_stack, &rela_stack_top, type);
+		switch (type) {
+		case R_LARCH_B26:
+			err = apply_r_larch_b26(mod, sechdrs, location,
+						     v, rela_stack, &rela_stack_top, type);
+			break;
+		case R_LARCH_GOT_PC_HI20...R_LARCH_GOT_PC_LO12:
+			err = apply_r_larch_got_pc(mod, sechdrs, location,
+						     v, rela_stack, &rela_stack_top, type);
+			break;
+		case R_LARCH_SOP_PUSH_PLT_PCREL:
+			err = apply_r_larch_sop_push_plt_pcrel(mod, sechdrs, location,
+						     v, rela_stack, &rela_stack_top, type);
+			break;
+		default:
+			err = handler(mod, location, v, rela_stack, &rela_stack_top, type);
+		}
 		if (err)
 			return err;
 	}
-- 
GitLab


From dbe3ba3018ec1fc53ea0d0adf0f687f5d438039d Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3824/4122] LoongArch/ftrace: Add basic support

This patch contains basic ftrace support for LoongArch. Specifically,
function tracer (HAVE_FUNCTION_TRACER), function graph tracer (HAVE_
FUNCTION_GRAPH_TRACER) are implemented following the instructions in
Documentation/trace/ftrace-design.txt.

Use `-pg` makes stub like a child function `void _mcount(void *ra)`.
Thus, it can be seen store RA and alloc stack before `call _mcount`.
Find `alloc stack` at first, and then find `store RA`.

Note that the functions in both inst.c and time.c should not be hooked
with the compiler's -pg option: to prevent infinite self-referencing for
the former, and to ignore early setup stuff for the latter.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig              |  2 +
 arch/loongarch/include/asm/ftrace.h | 21 +++++++
 arch/loongarch/kernel/Makefile      |  8 +++
 arch/loongarch/kernel/ftrace.c      | 73 ++++++++++++++++++++++
 arch/loongarch/kernel/mcount.S      | 96 +++++++++++++++++++++++++++++
 5 files changed, 200 insertions(+)
 create mode 100644 arch/loongarch/include/asm/ftrace.h
 create mode 100644 arch/loongarch/kernel/ftrace.c
 create mode 100644 arch/loongarch/kernel/mcount.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 28d827c6abb3b..72a41e08f9f46 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -90,6 +90,8 @@ config LOONGARCH
 	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
+	select HAVE_FUNCTION_GRAPH_TRACER
+	select HAVE_FUNCTION_TRACER
 	select HAVE_GENERIC_VDSO
 	select HAVE_IOREMAP_PROT
 	select HAVE_IRQ_EXIT_ON_IRQ_STACK
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
new file mode 100644
index 0000000000000..c3f5cde404646
--- /dev/null
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#ifndef _ASM_LOONGARCH_FTRACE_H
+#define _ASM_LOONGARCH_FTRACE_H
+
+#ifdef CONFIG_FUNCTION_TRACER
+
+#define MCOUNT_INSN_SIZE 4		/* sizeof mcount call */
+
+#ifndef __ASSEMBLY__
+#define mcount _mcount
+extern void _mcount(void);
+extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old);
+#endif /* __ASSEMBLY__ */
+
+#endif /* CONFIG_FUNCTION_TRACER */
+
+#endif /* _ASM_LOONGARCH_FTRACE_H */
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 86744531b100a..3f71bce1c7ce0 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -15,6 +15,14 @@ obj-$(CONFIG_EFI) 		+= efi.o
 
 obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 
+ifdef CONFIG_FUNCTION_TRACER
+obj-y += mcount.o ftrace.o
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
+endif
+
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
 obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 
diff --git a/arch/loongarch/kernel/ftrace.c b/arch/loongarch/kernel/ftrace.c
new file mode 100644
index 0000000000000..8c3ec1bc7aad3
--- /dev/null
+++ b/arch/loongarch/kernel/ftrace.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/init.h>
+#include <linux/ftrace.h>
+#include <linux/syscalls.h>
+#include <linux/uaccess.h>
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/cacheflush.h>
+#include <asm/inst.h>
+#include <asm/loongarch.h>
+#include <asm/syscall.h>
+
+#include <asm-generic/sections.h>
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/*
+ * As `call _mcount` follows LoongArch psABI, ra-saved operation and
+ * stack operation can be found before this insn.
+ */
+
+static int ftrace_get_parent_ra_addr(unsigned long insn_addr, int *ra_off)
+{
+	int limit = 32;
+	union loongarch_instruction *insn;
+
+	insn = (union loongarch_instruction *)insn_addr;
+
+	do {
+		insn--;
+		limit--;
+
+		if (is_ra_save_ins(insn))
+			*ra_off = -((1 << 12) - insn->reg2i12_format.immediate);
+
+	} while (!is_stack_alloc_ins(insn) && limit);
+
+	if (!limit)
+		return -EINVAL;
+
+	return 0;
+}
+
+void prepare_ftrace_return(unsigned long self_addr,
+		unsigned long callsite_sp, unsigned long old)
+{
+	int ra_off;
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+
+	if (unlikely(ftrace_graph_is_dead()))
+		return;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	if (ftrace_get_parent_ra_addr(self_addr, &ra_off))
+		goto out;
+
+	if (!function_graph_enter(old, self_addr, 0, NULL))
+		*(unsigned long *)(callsite_sp + ra_off) = return_hooker;
+
+	return;
+
+out:
+	ftrace_graph_stop();
+	WARN_ON(1);
+}
+#endif	/* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S
new file mode 100644
index 0000000000000..8cdc1563cd33f
--- /dev/null
+++ b/arch/loongarch/kernel/mcount.S
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * LoongArch specific _mcount support
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/export.h>
+#include <asm/ftrace.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+
+	.text
+
+#define MCOUNT_S0_OFFSET	(0)
+#define MCOUNT_RA_OFFSET	(SZREG)
+#define MCOUNT_STACK_SIZE	(2 * SZREG)
+
+	.macro MCOUNT_SAVE_REGS
+	PTR_ADDI	sp, sp, -MCOUNT_STACK_SIZE
+	PTR_S		s0, sp, MCOUNT_S0_OFFSET
+	PTR_S		ra, sp, MCOUNT_RA_OFFSET
+	move		s0, a0
+	.endm
+
+	.macro MCOUNT_RESTORE_REGS
+	move		a0, s0
+	PTR_L		ra, sp, MCOUNT_RA_OFFSET
+	PTR_L		s0, sp, MCOUNT_S0_OFFSET
+	PTR_ADDI	sp, sp, MCOUNT_STACK_SIZE
+	.endm
+
+SYM_FUNC_START(_mcount)
+	la.pcrel	t1, ftrace_stub
+	la.pcrel	t2, ftrace_trace_function	/* Prepare t2 for (1) */
+	PTR_L		t2, t2, 0
+	beq		t1, t2, fgraph_trace
+
+	MCOUNT_SAVE_REGS
+
+	move		a0, ra				/* arg0: self return address */
+	move		a1, s0				/* arg1: parent's return address */
+	jirl		ra, t2, 0			/* (1) call *ftrace_trace_function */
+
+	MCOUNT_RESTORE_REGS
+
+fgraph_trace:
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	la.pcrel	t1, ftrace_stub
+	la.pcrel	t3, ftrace_graph_return
+	PTR_L		t3, t3, 0
+	bne		t1, t3, ftrace_graph_caller
+	la.pcrel	t1, ftrace_graph_entry_stub
+	la.pcrel	t3, ftrace_graph_entry
+	PTR_L		t3, t3, 0
+	bne		t1, t3, ftrace_graph_caller
+#endif
+
+SYM_INNER_LABEL(ftrace_stub, SYM_L_GLOBAL)
+	jr		ra
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_INNER_LABEL(ftrace_graph_func, SYM_L_GLOBAL)
+	bl		ftrace_stub
+#endif
+SYM_FUNC_END(_mcount)
+EXPORT_SYMBOL(_mcount)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_FUNC_START(ftrace_graph_caller)
+	MCOUNT_SAVE_REGS
+
+	PTR_ADDI	a0, ra, -4			/* arg0: Callsite self return addr */
+	PTR_ADDI	a1, sp, MCOUNT_STACK_SIZE	/* arg1: Callsite sp */
+	move		a2, s0				/* arg2: Callsite parent ra */
+	bl		prepare_ftrace_return
+
+	MCOUNT_RESTORE_REGS
+	jr		ra
+SYM_FUNC_END(ftrace_graph_caller)
+
+SYM_FUNC_START(return_to_handler)
+	PTR_ADDI	sp, sp, -2 * SZREG
+	PTR_S		a0, sp, 0
+	PTR_S		a1, sp, SZREG
+
+	bl		ftrace_return_to_handler
+
+	/* Restore the real parent address: a0 -> ra */
+	move		ra, a0
+
+	PTR_L		a0, sp, 0
+	PTR_L		a1, sp, SZREG
+	PTR_ADDI	sp, sp, 2 * SZREG
+	jr		ra
+SYM_FUNC_END(return_to_handler)
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
GitLab


From a0a458fbd6f2317832e2d74acdbfa2451c3f4b8f Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3825/4122] LoongArch/ftrace: Add recordmcount support

Recordmcount utility under scripts is run, after compiling each object,
to find out all the locations of calling _mcount() and put them into
specific seciton named __mcount_loc.

Then the linker collects all such information into a table in the kernel
image (between __start_mcount_loc and __stop_mcount_loc) for later use
by ftrace.

This patch adds LoongArch specific definitions to identify such locations.
And on LoongArch, only the C version is used to build the kernel now that
CONFIG_HAVE_C_RECORDMCOUNT is on.

Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig |  2 ++
 scripts/recordmcount.c | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 72a41e08f9f46..8b81cf9a65031 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -85,11 +85,13 @@ config LOONGARCH
 	select HAVE_ARCH_TRANSPARENT_HUGEPAGE
 	select HAVE_ASM_MODVERSIONS
 	select HAVE_CONTEXT_TRACKING_USER
+	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
+	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_FUNCTION_TRACER
 	select HAVE_GENERIC_VDSO
diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c
index cce12e1971d85..e30216525325b 100644
--- a/scripts/recordmcount.c
+++ b/scripts/recordmcount.c
@@ -38,6 +38,14 @@
 #define R_AARCH64_ABS64	257
 #endif
 
+#ifndef EM_LOONGARCH
+#define EM_LOONGARCH		258
+#define R_LARCH_32			1
+#define R_LARCH_64			2
+#define R_LARCH_MARK_LA			20
+#define R_LARCH_SOP_PUSH_PLT_PCREL	29
+#endif
+
 #define R_ARM_PC24		1
 #define R_ARM_THM_CALL		10
 #define R_ARM_CALL		28
@@ -441,6 +449,28 @@ static int arm64_is_fake_mcount(Elf64_Rel const *rp)
 	return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26;
 }
 
+static int LARCH32_is_fake_mcount(Elf32_Rel const *rp)
+{
+	switch (ELF64_R_TYPE(w(rp->r_info))) {
+	case R_LARCH_MARK_LA:
+	case R_LARCH_SOP_PUSH_PLT_PCREL:
+		return 0;
+	}
+
+	return 1;
+}
+
+static int LARCH64_is_fake_mcount(Elf64_Rel const *rp)
+{
+	switch (ELF64_R_TYPE(w(rp->r_info))) {
+	case R_LARCH_MARK_LA:
+	case R_LARCH_SOP_PUSH_PLT_PCREL:
+		return 0;
+	}
+
+	return 1;
+}
+
 /* 64-bit EM_MIPS has weird ELF64_Rela.r_info.
  * http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
  * We interpret Table 29 Relocation Operation (Elf64_Rel, Elf64_Rela) [p.40]
@@ -558,6 +588,7 @@ static int do_file(char const *const fname)
 		break;
 	case EM_IA_64:	reltype = R_IA64_IMM64; break;
 	case EM_MIPS:	/* reltype: e_class    */ break;
+	case EM_LOONGARCH:	/* reltype: e_class    */ break;
 	case EM_PPC:	reltype = R_PPC_ADDR32; break;
 	case EM_PPC64:	reltype = R_PPC64_ADDR64; break;
 	case EM_S390:	/* reltype: e_class    */ break;
@@ -589,6 +620,10 @@ static int do_file(char const *const fname)
 			reltype = R_MIPS_32;
 			is_fake_mcount32 = MIPS32_is_fake_mcount;
 		}
+		if (w2(ehdr->e_machine) == EM_LOONGARCH) {
+			reltype = R_LARCH_32;
+			is_fake_mcount32 = LARCH32_is_fake_mcount;
+		}
 		if (do32(ehdr, fname, reltype) < 0)
 			goto out;
 		break;
@@ -610,6 +645,10 @@ static int do_file(char const *const fname)
 			Elf64_r_info = MIPS64_r_info;
 			is_fake_mcount64 = MIPS64_is_fake_mcount;
 		}
+		if (w2(ghdr->e_machine) == EM_LOONGARCH) {
+			reltype = R_LARCH_64;
+			is_fake_mcount64 = LARCH64_is_fake_mcount;
+		}
 		if (do64(ghdr, fname, reltype) < 0)
 			goto out;
 		break;
-- 
GitLab


From 4733f09d880745953b88c3358b49ad495aecd8e9 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3826/4122] LoongArch/ftrace: Add dynamic function tracer
 support

The compiler has inserted 2 NOPs before the regular function prologue.
T series registers are available and safe because of LoongArch's psABI.

At runtime, we can replace nop with bl to enable ftrace call and replace
bl with nop to disable ftrace call. The bl instruction requires us to
save the original RA value, so it saves RA at t0 here.

Details are:

| Compiled   |       Disabled         |        Enabled         |
+------------+------------------------+------------------------+
| nop        | move     t0, ra        | move    t0, ra         |
| nop        | nop                    | bl      ftrace_caller  |
| func_body  | func_body              | func_body              |

The RA value will be recovered by ftrace_regs_entry, and restored into
RA before returning to the regular function prologue. When a function is
not being traced, the "move t0, ra" is not harmful.

1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c)
   The two functions turn each recorded call site of filtered functions
   into a call to ftrace_caller or nops.

2) ftracce_update_ftrace_func (in kernel/ftrace.c)
   turns the nops at ftrace_call into a call to a generic entry for
   function tracers.

3) ftrace_caller (in kernel/mcount_dyn.S)
   The entry where each _mcount call sites calls to once they are
   filtered to be traced.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig                  |   1 +
 arch/loongarch/Makefile                 |   5 ++
 arch/loongarch/include/asm/ftrace.h     |  21 +++++
 arch/loongarch/include/asm/inst.h       |  11 +++
 arch/loongarch/include/asm/unwind.h     |   2 +-
 arch/loongarch/kernel/Makefile          |  15 ++--
 arch/loongarch/kernel/ftrace_dyn.c      | 110 ++++++++++++++++++++++++
 arch/loongarch/kernel/inst.c            |  92 ++++++++++++++++++++
 arch/loongarch/kernel/mcount_dyn.S      |  85 ++++++++++++++++++
 arch/loongarch/kernel/unwind_prologue.c |  35 +++++++-
 10 files changed, 367 insertions(+), 10 deletions(-)
 create mode 100644 arch/loongarch/kernel/ftrace_dyn.c
 create mode 100644 arch/loongarch/kernel/mcount_dyn.S

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 8b81cf9a65031..6e9aaa747ef77 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -88,6 +88,7 @@ config LOONGARCH
 	select HAVE_C_RECORDMCOUNT
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
+	select HAVE_DYNAMIC_FTRACE
 	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile
index 5232d8c0f9caf..4402387d27551 100644
--- a/arch/loongarch/Makefile
+++ b/arch/loongarch/Makefile
@@ -25,6 +25,11 @@ endif
 32bit-emul		= elf32loongarch
 64bit-emul		= elf64loongarch
 
+ifdef CONFIG_DYNAMIC_FTRACE
+KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+CC_FLAGS_FTRACE := -fpatchable-function-entry=2
+endif
+
 ifdef CONFIG_64BIT
 tool-archpref		= $(64bit-tool-archpref)
 UTS_MACHINE		:= loongarch64
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index c3f5cde404646..09ff0f84663d8 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -11,9 +11,30 @@
 #define MCOUNT_INSN_SIZE 4		/* sizeof mcount call */
 
 #ifndef __ASSEMBLY__
+
+#ifndef CONFIG_DYNAMIC_FTRACE
+
 #define mcount _mcount
 extern void _mcount(void);
 extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old);
+
+#else
+
+struct dyn_ftrace;
+struct dyn_arch_ftrace { };
+
+#define ftrace_init_nop ftrace_init_nop
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
+
+static inline unsigned long ftrace_call_adjust(unsigned long addr)
+{
+	return addr;
+}
+
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent);
+
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 67215af47b3d5..88e1673524e1d 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
 		is_imm12_negative(ip->reg2i12_format.immediate);
 }
 
+int larch_insn_read(void *addr, u32 *insnp);
+int larch_insn_write(void *addr, u32 insn);
+int larch_insn_patch_text(void *addr, u32 insn);
+
+u32 larch_insn_gen_nop(void);
+u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
+u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
+
+u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk);
+u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
+
 u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
 u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
 u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index 6af4718bdf015..a51eec00efb85 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -20,7 +20,7 @@ struct unwind_state {
 	char type; /* UNWINDER_XXX */
 	struct stack_info stack_info;
 	struct task_struct *task;
-	bool first, error;
+	bool first, error, is_ftrace;
 	unsigned long sp, pc, ra;
 };
 
diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile
index 3f71bce1c7ce0..fcaa024a685ec 100644
--- a/arch/loongarch/kernel/Makefile
+++ b/arch/loongarch/kernel/Makefile
@@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) 		+= efi.o
 obj-$(CONFIG_CPU_HAS_FPU)	+= fpu.o
 
 ifdef CONFIG_FUNCTION_TRACER
-obj-y += mcount.o ftrace.o
-CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
+  ifndef CONFIG_DYNAMIC_FTRACE
+    obj-y += mcount.o ftrace.o
+    CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+  else
+    obj-y += mcount_dyn.o ftrace_dyn.o
+    CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE)
+  endif
+  CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
+  CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
 endif
 
 obj-$(CONFIG_MODULES)		+= module.o module-sections.o
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
new file mode 100644
index 0000000000000..3b82bface840b
--- /dev/null
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Based on arch/arm64/kernel/ftrace.c
+ *
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <linux/ftrace.h>
+#include <linux/uaccess.h>
+
+#include <asm/inst.h>
+
+static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
+{
+	u32 replaced;
+
+	if (validate) {
+		if (larch_insn_read((void *)pc, &replaced))
+			return -EFAULT;
+
+		if (replaced != old)
+			return -EINVAL;
+	}
+
+	if (larch_insn_patch_text((void *)pc, new))
+		return -EPERM;
+
+	return 0;
+}
+
+int ftrace_update_ftrace_func(ftrace_func_t func)
+{
+	u32 new;
+	unsigned long pc;
+
+	pc = (unsigned long)&ftrace_call;
+	new = larch_insn_gen_bl(pc, (unsigned long)func);
+
+	return ftrace_modify_code(pc, 0, new, false);
+}
+
+/*
+ * The compiler has inserted 2 NOPs before the regular function prologue.
+ * T series registers are available and safe because of LoongArch's psABI.
+ *
+ * At runtime, we can replace nop with bl to enable ftrace call and replace bl
+ * with nop to disable ftrace call. The bl requires us to save the original RA
+ * value, so it saves RA at t0 here.
+ *
+ * Details are:
+ *
+ * | Compiled   |       Disabled         |        Enabled         |
+ * +------------+------------------------+------------------------+
+ * | nop        | move     t0, ra        | move     t0, ra        |
+ * | nop        | nop                    | bl       ftrace_caller |
+ * | func_body  | func_body              | func_body              |
+ *
+ * The RA value will be recovered by ftrace_regs_entry, and restored into RA
+ * before returning to the regular function prologue. When a function is not
+ * being traced, the "move t0, ra" is not harmful.
+ */
+
+int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
+{
+	u32 old, new;
+	unsigned long pc;
+
+	pc = rec->ip;
+	old = larch_insn_gen_nop();
+	new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+	u32 old, new;
+	unsigned long pc;
+
+	pc = rec->ip + LOONGARCH_INSN_SIZE;
+
+	old = larch_insn_gen_nop();
+	new = larch_insn_gen_bl(pc, addr);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
+{
+	u32 old, new;
+	unsigned long pc;
+
+	pc = rec->ip + LOONGARCH_INSN_SIZE;
+
+	new = larch_insn_gen_nop();
+	old = larch_insn_gen_bl(pc, addr);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+
+void arch_ftrace_update_code(int command)
+{
+	command |= FTRACE_MAY_SLEEP;
+	ftrace_modify_all_code(command);
+}
+
+int __init ftrace_dyn_arch_init(void)
+{
+	return 0;
+}
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index b1df0ec34bd1b..4fd22b4413d06 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -2,8 +2,100 @@
 /*
  * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
  */
+#include <linux/sizes.h>
+#include <linux/uaccess.h>
+
+#include <asm/cacheflush.h>
 #include <asm/inst.h>
 
+static DEFINE_RAW_SPINLOCK(patch_lock);
+
+int larch_insn_read(void *addr, u32 *insnp)
+{
+	int ret;
+	u32 val;
+
+	ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE);
+	if (!ret)
+		*insnp = val;
+
+	return ret;
+}
+
+int larch_insn_write(void *addr, u32 insn)
+{
+	int ret;
+	unsigned long flags = 0;
+
+	raw_spin_lock_irqsave(&patch_lock, flags);
+	ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE);
+	raw_spin_unlock_irqrestore(&patch_lock, flags);
+
+	return ret;
+}
+
+int larch_insn_patch_text(void *addr, u32 insn)
+{
+	int ret;
+	u32 *tp = addr;
+
+	if ((unsigned long)tp & 3)
+		return -EINVAL;
+
+	ret = larch_insn_write(tp, insn);
+	if (!ret)
+		flush_icache_range((unsigned long)tp,
+				   (unsigned long)tp + LOONGARCH_INSN_SIZE);
+
+	return ret;
+}
+
+u32 larch_insn_gen_nop(void)
+{
+	return INSN_NOP;
+}
+
+u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
+{
+	long offset = dest - pc;
+	unsigned int immediate_l, immediate_h;
+	union loongarch_instruction insn;
+
+	if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
+		pr_warn("The generated bl instruction is out of range.\n");
+		return INSN_BREAK;
+	}
+
+	offset >>= 2;
+
+	immediate_l = offset & 0xffff;
+	offset >>= 16;
+	immediate_h = offset & 0x3ff;
+
+	insn.reg0i26_format.opcode = bl_op;
+	insn.reg0i26_format.immediate_l = immediate_l;
+	insn.reg0i26_format.immediate_h = immediate_h;
+
+	return insn.word;
+}
+
+u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk)
+{
+	union loongarch_instruction insn;
+
+	insn.reg3_format.opcode = or_op;
+	insn.reg3_format.rd = rd;
+	insn.reg3_format.rj = rj;
+	insn.reg3_format.rk = rk;
+
+	return insn.word;
+}
+
+u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
+{
+	return larch_insn_gen_or(rd, rj, 0);
+}
+
 u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
 {
 	union loongarch_instruction insn;
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
new file mode 100644
index 0000000000000..45ba88d2aacc7
--- /dev/null
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2022 Loongson Technology Corporation Limited
+ */
+
+#include <asm/export.h>
+#include <asm/ftrace.h>
+#include <asm/regdef.h>
+#include <asm/stackframe.h>
+
+	.text
+/*
+ * Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the
+ * regular C function prologue. When PC arrived here, the last 2 instructions
+ * are as follows:
+ * 	move		t0, ra
+ * 	bl		callsite (for modules, callsite is a tramplione)
+ *
+ * modules trampoline is as follows:
+ * 	lu12i.w		t1, callsite[31:12]
+ * 	lu32i.d		t1, callsite[51:32]
+ * 	lu52i.d		t1, t1, callsite[63:52]
+ * 	jirl		zero, t1, callsite[11:0] >> 2
+ *
+ * See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to
+ * that the T series regs are available and safe because each C functions
+ * follows the LoongArch's psABI as well.
+ */
+
+	.macro  ftrace_regs_entry
+	PTR_ADDI	sp, sp, -PT_SIZE
+	PTR_S		t0, sp, PT_R1  /* Save parent ra at PT_R1(RA) */
+	PTR_S		a0, sp, PT_R4
+	PTR_S		a1, sp, PT_R5
+	PTR_S		a2, sp, PT_R6
+	PTR_S		a3, sp, PT_R7
+	PTR_S		a4, sp, PT_R8
+	PTR_S		a5, sp, PT_R9
+	PTR_S		a6, sp, PT_R10
+	PTR_S		a7, sp, PT_R11
+	PTR_S		fp, sp, PT_R22
+	PTR_S		ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
+	PTR_ADDI	t8, sp, PT_SIZE
+	PTR_S		t8, sp, PT_R3
+	.endm
+
+SYM_FUNC_START(ftrace_stub)
+	jr		ra
+SYM_FUNC_END(ftrace_stub)
+
+SYM_CODE_START(ftrace_common)
+	PTR_ADDI	a0, ra, -8	/* arg0: ip */
+	move		a1, t0		/* arg1: parent_ip */
+	la.pcrel	t1, function_trace_op
+	PTR_L		a2, t1, 0	/* arg2: op */
+	move		a3, sp		/* arg3: regs */
+
+SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
+	bl		ftrace_stub
+/*
+ * As we didn't use S series regs in this assmembly code and all calls
+ * are C function which will save S series regs by themselves, there is
+ * no need to restore S series regs. The T series is available and safe
+ * at the callsite, so there is no need to restore the T series regs.
+ */
+ftrace_common_return:
+	PTR_L		ra, sp, PT_R1
+	PTR_L		a0, sp, PT_R4
+	PTR_L		a1, sp, PT_R5
+	PTR_L		a2, sp, PT_R6
+	PTR_L		a3, sp, PT_R7
+	PTR_L		a4, sp, PT_R8
+	PTR_L		a5, sp, PT_R9
+	PTR_L		a6, sp, PT_R10
+	PTR_L		a7, sp, PT_R11
+	PTR_L		fp, sp, PT_R22
+	PTR_L		t0, sp, PT_ERA
+	PTR_ADDI	sp, sp, PT_SIZE
+	jr		t0
+SYM_CODE_END(ftrace_common)
+
+SYM_CODE_START(ftrace_caller)
+	ftrace_regs_entry
+	b		ftrace_common
+SYM_CODE_END(ftrace_caller)
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 4571c3c87cd4c..46fe344d7fba8 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -8,6 +8,16 @@
 #include <asm/ptrace.h>
 #include <asm/unwind.h>
 
+static inline void unwind_state_fixup(struct unwind_state *state)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE
+	static unsigned long ftrace = (unsigned long)ftrace_call + 4;
+
+	if (state->pc == ftrace)
+		state->is_ftrace = true;
+#endif
+}
+
 unsigned long unwind_get_return_address(struct unwind_state *state)
 {
 
@@ -41,15 +51,30 @@ static bool unwind_by_guess(struct unwind_state *state)
 
 static bool unwind_by_prologue(struct unwind_state *state)
 {
-	struct stack_info *info = &state->stack_info;
-	union loongarch_instruction *ip, *ip_end;
 	long frame_ra = -1;
 	unsigned long frame_size = 0;
 	unsigned long size, offset, pc = state->pc;
+	struct pt_regs *regs;
+	struct stack_info *info = &state->stack_info;
+	union loongarch_instruction *ip, *ip_end;
 
 	if (state->sp >= info->end || state->sp < info->begin)
 		return false;
 
+	if (state->is_ftrace) {
+		/*
+		 * As we meet ftrace_regs_entry, reset first flag like first doing
+		 * tracing. Prologue analysis will stop soon because PC is at entry.
+		 */
+		regs = (struct pt_regs *)state->sp;
+		state->first = true;
+		state->is_ftrace = false;
+		state->pc = regs->csr_era;
+		state->ra = regs->regs[1];
+		state->sp = regs->regs[3];
+		return true;
+	}
+
 	if (!kallsyms_lookup_size_offset(pc, &size, &offset))
 		return false;
 
@@ -95,7 +120,7 @@ static bool unwind_by_prologue(struct unwind_state *state)
 
 	state->pc = *(unsigned long *)(state->sp + frame_ra);
 	state->sp = state->sp + frame_size;
-	return !!__kernel_text_address(state->pc);
+	goto out;
 
 first:
 	state->first = false;
@@ -104,7 +129,9 @@ first:
 
 	state->pc = state->ra;
 
-	return !!__kernel_text_address(state->ra);
+out:
+	unwind_state_fixup(state);
+	return !!__kernel_text_address(state->pc);
 }
 
 void unwind_start(struct unwind_state *state, struct task_struct *task,
-- 
GitLab


From 5fcfad3d41cc70f39fb31e7ee314989cc4c5f02c Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3827/4122] LoongArch/ftrace: Add dynamic function graph tracer
 support

Once the function_graph tracer is enabled, a filtered function has the
following call sequence:

1) ftracer_caller     ==> on/off by ftrace_make_call/ftrace_make_nop
2) ftrace_graph_caller
3) ftrace_graph_call  ==> on/off by ftrace_en/disable_ftrace_graph_caller
4) prepare_ftrace_return

Considering the following DYNAMIC_FTRACE_WITH_REGS feature, it would be
more extendable to have a ftrace_graph_caller function, instead of
calling prepare_ftrace_return directly in ftrace_caller.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/kernel/ftrace_dyn.c | 44 ++++++++++++++++++++++++++++++
 arch/loongarch/kernel/inst.c       | 24 ++++++++++++++++
 arch/loongarch/kernel/mcount_dyn.S | 33 ++++++++++++++++++++++
 3 files changed, 101 insertions(+)

diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 3b82bface840b..5a801c328e2a0 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -108,3 +108,47 @@ int __init ftrace_dyn_arch_init(void)
 {
 	return 0;
 }
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent)
+{
+	unsigned long old;
+	unsigned long return_hooker = (unsigned long)&return_to_handler;
+
+	if (unlikely(atomic_read(&current->tracing_graph_pause)))
+		return;
+
+	old = *parent;
+
+	if (!function_graph_enter(old, self_addr, 0, NULL))
+		*parent = return_hooker;
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+	u32 branch, nop;
+	unsigned long pc, func;
+	extern void ftrace_graph_call(void);
+
+	pc = (unsigned long)&ftrace_graph_call;
+	func = (unsigned long)&ftrace_graph_caller;
+
+	nop = larch_insn_gen_nop();
+	branch = larch_insn_gen_b(pc, func);
+
+	if (enable)
+		return ftrace_modify_code(pc, nop, branch, true);
+	else
+		return ftrace_modify_code(pc, branch, nop, true);
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+	return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 4fd22b4413d06..39671e87e31c9 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -55,6 +55,30 @@ u32 larch_insn_gen_nop(void)
 	return INSN_NOP;
 }
 
+u32 larch_insn_gen_b(unsigned long pc, unsigned long dest)
+{
+	long offset = dest - pc;
+	unsigned int immediate_l, immediate_h;
+	union loongarch_instruction insn;
+
+	if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
+		pr_warn("The generated b instruction is out of range.\n");
+		return INSN_BREAK;
+	}
+
+	offset >>= 2;
+
+	immediate_l = offset & 0xffff;
+	offset >>= 16;
+	immediate_h = offset & 0x3ff;
+
+	insn.reg0i26_format.opcode = b_op;
+	insn.reg0i26_format.immediate_l = immediate_l;
+	insn.reg0i26_format.immediate_h = immediate_h;
+
+	return insn.word;
+}
+
 u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
 {
 	long offset = dest - pc;
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index 45ba88d2aacc7..cce3daa2eb1e3 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -57,6 +57,11 @@ SYM_CODE_START(ftrace_common)
 
 SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
 	bl		ftrace_stub
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
+	nop				/* b ftrace_graph_caller */
+#endif
+
 /*
  * As we didn't use S series regs in this assmembly code and all calls
  * are C function which will save S series regs by themselves, there is
@@ -83,3 +88,31 @@ SYM_CODE_START(ftrace_caller)
 	ftrace_regs_entry
 	b		ftrace_common
 SYM_CODE_END(ftrace_caller)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+SYM_CODE_START(ftrace_graph_caller)
+	PTR_L		a0, sp, PT_ERA
+	PTR_ADDI	a0, a0, -8	/* arg0: self_addr */
+	PTR_ADDI	a1, sp, PT_R1	/* arg1: parent */
+	bl		prepare_ftrace_return
+	b		ftrace_common_return
+SYM_CODE_END(ftrace_graph_caller)
+
+SYM_CODE_START(return_to_handler)
+	/* Save return value regs */
+	PTR_ADDI 	sp, sp, -2 * SZREG
+	PTR_S		a0, sp, 0
+	PTR_S		a1, sp, SZREG
+
+	move		a0, zero
+	bl		ftrace_return_to_handler
+	move		ra, a0
+
+	/* Restore return value regs */
+	PTR_L		a0, sp, 0
+	PTR_L		a1, sp, SZREG
+	PTR_ADDI 	sp, sp, 2 * SZREG
+
+	jr		ra
+SYM_CODE_END(return_to_handler)
+#endif
-- 
GitLab


From 8778ba2c8a5df11859dc6f2b2205700388b63fd3 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:15 +0800
Subject: [PATCH 3828/4122] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_REGS
 support

This patch implements CONFIG_DYNAMIC_FTRACE_WITH_REGS on LoongArch,
which allows a traced function's arguments (and some other registers)
to be captured into a struct pt_regs, allowing these to be inspected
and modified.

Co-developed-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Jinyang He <hejinyang@loongson.cn>
Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig              |  1 +
 arch/loongarch/include/asm/ftrace.h |  2 ++
 arch/loongarch/kernel/ftrace_dyn.c  | 15 +++++++++++++
 arch/loongarch/kernel/mcount_dyn.S  | 35 +++++++++++++++++++++++++++--
 4 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 6e9aaa747ef77..f2d1b2aef2d40 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -89,6 +89,7 @@ config LOONGARCH
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
 	select HAVE_FAST_GUP
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 09ff0f84663d8..dd4a0c8efd246 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -23,6 +23,8 @@ extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsit
 struct dyn_ftrace;
 struct dyn_arch_ftrace { };
 
+#define ARCH_SUPPORTS_FTRACE_OPS 1
+
 #define ftrace_init_nop ftrace_init_nop
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
 
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 5a801c328e2a0..d6f30918f94f9 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -28,6 +28,21 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
 	return 0;
 }
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
+{
+	u32 old, new;
+	unsigned long pc;
+
+	pc = rec->ip + LOONGARCH_INSN_SIZE;
+
+	new = larch_insn_gen_bl(pc, addr);
+	old = larch_insn_gen_bl(pc, old_addr);
+
+	return ftrace_modify_code(pc, old, new, true);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
+
 int ftrace_update_ftrace_func(ftrace_func_t func)
 {
 	u32 new;
diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S
index cce3daa2eb1e3..bbabf06244c26 100644
--- a/arch/loongarch/kernel/mcount_dyn.S
+++ b/arch/loongarch/kernel/mcount_dyn.S
@@ -27,7 +27,7 @@
  * follows the LoongArch's psABI as well.
  */
 
-	.macro  ftrace_regs_entry
+	.macro  ftrace_regs_entry allregs=0
 	PTR_ADDI	sp, sp, -PT_SIZE
 	PTR_S		t0, sp, PT_R1  /* Save parent ra at PT_R1(RA) */
 	PTR_S		a0, sp, PT_R4
@@ -39,6 +39,30 @@
 	PTR_S		a6, sp, PT_R10
 	PTR_S		a7, sp, PT_R11
 	PTR_S		fp, sp, PT_R22
+	.if \allregs
+	PTR_S		tp, sp, PT_R2
+	PTR_S		t0, sp, PT_R12
+	PTR_S		t1, sp, PT_R13
+	PTR_S		t2, sp, PT_R14
+	PTR_S		t3, sp, PT_R15
+	PTR_S		t4, sp, PT_R16
+	PTR_S		t5, sp, PT_R17
+	PTR_S		t6, sp, PT_R18
+	PTR_S		t7, sp, PT_R19
+	PTR_S		t8, sp, PT_R20
+	PTR_S		u0, sp, PT_R21
+	PTR_S		s0, sp, PT_R23
+	PTR_S		s1, sp, PT_R24
+	PTR_S		s2, sp, PT_R25
+	PTR_S		s3, sp, PT_R26
+	PTR_S		s4, sp, PT_R27
+	PTR_S		s5, sp, PT_R28
+	PTR_S		s6, sp, PT_R29
+	PTR_S		s7, sp, PT_R30
+	PTR_S		s8, sp, PT_R31
+	/* Clear it for later use as a flag sometimes. */
+	PTR_S		zero, sp, PT_R0
+	.endif
 	PTR_S		ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
 	PTR_ADDI	t8, sp, PT_SIZE
 	PTR_S		t8, sp, PT_R3
@@ -85,10 +109,17 @@ ftrace_common_return:
 SYM_CODE_END(ftrace_common)
 
 SYM_CODE_START(ftrace_caller)
-	ftrace_regs_entry
+	ftrace_regs_entry allregs=0
 	b		ftrace_common
 SYM_CODE_END(ftrace_caller)
 
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+SYM_CODE_START(ftrace_regs_caller)
+	ftrace_regs_entry allregs=1
+	b		ftrace_common
+SYM_CODE_END(ftrace_regs_caller)
+#endif
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 SYM_CODE_START(ftrace_graph_caller)
 	PTR_L		a0, sp, PT_ERA
-- 
GitLab


From ac7127e1cc65aeb578998c992a05dbc80fa18f0f Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:16 +0800
Subject: [PATCH 3829/4122] LoongArch/ftrace: Add HAVE_DYNAMIC_FTRACE_WITH_ARGS
 support

Allow for arguments to be passed in to ftrace_regs by default. If this
is set, then arguments and stack can be found from the pt_regs.

1. HAVE_DYNAMIC_FTRACE_WITH_ARGS don't need special hook for graph
tracer entry point, but instead we can use graph_ops::func function to
install the return_hooker.

2. Livepatch requires this option in the future.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/Kconfig              |  1 +
 arch/loongarch/include/asm/ftrace.h | 17 +++++++++++++++++
 arch/loongarch/kernel/ftrace_dyn.c  | 11 +++++++++++
 3 files changed, 29 insertions(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index f2d1b2aef2d40..a36bb0fe19775 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -89,6 +89,7 @@ config LOONGARCH
 	select HAVE_DEBUG_STACKOVERFLOW
 	select HAVE_DMA_CONTIGUOUS
 	select HAVE_DYNAMIC_FTRACE
+	select HAVE_DYNAMIC_FTRACE_WITH_ARGS
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
 	select HAVE_EBPF_JIT
 	select HAVE_EXIT_THREAD
diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index dd4a0c8efd246..ee7feface27a6 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -37,6 +37,23 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent);
 
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+struct ftrace_ops;
+
+struct ftrace_regs {
+	struct pt_regs regs;
+};
+
+static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs *fregs)
+{
+	return &fregs->regs;
+}
+
+#define ftrace_graph_func ftrace_graph_func
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs);
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* CONFIG_FUNCTION_TRACER */
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index d6f30918f94f9..439ba829b9fd9 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -139,6 +139,16 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent)
 		*parent = return_hooker;
 }
 
+#ifdef CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS
+void ftrace_graph_func(unsigned long ip, unsigned long parent_ip,
+		       struct ftrace_ops *op, struct ftrace_regs *fregs)
+{
+	struct pt_regs *regs = &fregs->regs;
+	unsigned long *parent = (unsigned long *)&regs->regs[1];
+
+	prepare_ftrace_return(ip, (unsigned long *)parent);
+}
+#else
 static int ftrace_modify_graph_caller(bool enable)
 {
 	u32 branch, nop;
@@ -166,4 +176,5 @@ int ftrace_disable_ftrace_graph_caller(void)
 {
 	return ftrace_modify_graph_caller(false);
 }
+#endif /* CONFIG_HAVE_DYNAMIC_FTRACE_WITH_ARGS */
 #endif /* CONFIG_FUNCTION_GRAPH_TRACER */
-- 
GitLab


From a51ac5246d2505b58229242959d2bc73d113ca50 Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:21 +0800
Subject: [PATCH 3830/4122] LoongArch/ftrace: Add
 HAVE_FUNCTION_GRAPH_RET_ADDR_PTR support

ftrace_graph_ret_addr() can be called by stack unwinding code to convert
a found stack return address ('ret') to its original value, in case the
function graph tracer has modified it to be 'return_to_handler'. If the
hasn't been modified, the unchanged value of 'ret' is returned.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/ftrace.h     |  3 +++
 arch/loongarch/include/asm/unwind.h     |  1 +
 arch/loongarch/kernel/ftrace_dyn.c      |  2 +-
 arch/loongarch/kernel/unwind_guess.c    |  4 +++-
 arch/loongarch/kernel/unwind_prologue.c | 15 +++++++++++----
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index ee7feface27a6..8c7d137e4871b 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -6,6 +6,8 @@
 #ifndef _ASM_LOONGARCH_FTRACE_H
 #define _ASM_LOONGARCH_FTRACE_H
 
+#define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
+
 #ifdef CONFIG_FUNCTION_TRACER
 
 #define MCOUNT_INSN_SIZE 4		/* sizeof mcount call */
@@ -24,6 +26,7 @@ struct dyn_ftrace;
 struct dyn_arch_ftrace { };
 
 #define ARCH_SUPPORTS_FTRACE_OPS 1
+#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
 
 #define ftrace_init_nop ftrace_init_nop
 int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
diff --git a/arch/loongarch/include/asm/unwind.h b/arch/loongarch/include/asm/unwind.h
index a51eec00efb85..f2b52b9ea93d2 100644
--- a/arch/loongarch/include/asm/unwind.h
+++ b/arch/loongarch/include/asm/unwind.h
@@ -21,6 +21,7 @@ struct unwind_state {
 	struct stack_info stack_info;
 	struct task_struct *task;
 	bool first, error, is_ftrace;
+	int graph_idx;
 	unsigned long sp, pc, ra;
 };
 
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index 439ba829b9fd9..e23c3be29baab 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -135,7 +135,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent)
 
 	old = *parent;
 
-	if (!function_graph_enter(old, self_addr, 0, NULL))
+	if (!function_graph_enter(old, self_addr, 0, parent))
 		*parent = return_hooker;
 }
 
diff --git a/arch/loongarch/kernel/unwind_guess.c b/arch/loongarch/kernel/unwind_guess.c
index 5afa6064d73e4..e2d2e4f3001f4 100644
--- a/arch/loongarch/kernel/unwind_guess.c
+++ b/arch/loongarch/kernel/unwind_guess.c
@@ -3,6 +3,7 @@
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
 #include <linux/kernel.h>
+#include <linux/ftrace.h>
 
 #include <asm/unwind.h>
 
@@ -53,7 +54,8 @@ bool unwind_next_frame(struct unwind_state *state)
 		     state->sp < info->end;
 		     state->sp += sizeof(unsigned long)) {
 			addr = *(unsigned long *)(state->sp);
-
+			state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+					addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
 			if (__kernel_text_address(addr))
 				return true;
 		}
diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c
index 46fe344d7fba8..0f8d1451ebb84 100644
--- a/arch/loongarch/kernel/unwind_prologue.c
+++ b/arch/loongarch/kernel/unwind_prologue.c
@@ -2,6 +2,7 @@
 /*
  * Copyright (C) 2022 Loongson Technology Corporation Limited
  */
+#include <linux/ftrace.h>
 #include <linux/kallsyms.h>
 
 #include <asm/inst.h>
@@ -42,6 +43,8 @@ static bool unwind_by_guess(struct unwind_state *state)
 	     state->sp < info->end;
 	     state->sp += sizeof(unsigned long)) {
 		addr = *(unsigned long *)(state->sp);
+		state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+				addr, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
 		if (__kernel_text_address(addr))
 			return true;
 	}
@@ -174,8 +177,11 @@ bool unwind_next_frame(struct unwind_state *state)
 			break;
 
 		case UNWINDER_PROLOGUE:
-			if (unwind_by_prologue(state))
+			if (unwind_by_prologue(state)) {
+				state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+						state->pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
 				return true;
+			}
 
 			if (info->type == STACK_TYPE_IRQ &&
 				info->end == state->sp) {
@@ -185,10 +191,11 @@ bool unwind_next_frame(struct unwind_state *state)
 				if (user_mode(regs) || !__kernel_text_address(pc))
 					return false;
 
-				state->pc = pc;
-				state->sp = regs->regs[3];
-				state->ra = regs->regs[1];
 				state->first = true;
+				state->ra = regs->regs[1];
+				state->sp = regs->regs[3];
+				state->pc = ftrace_graph_ret_addr(state->task, &state->graph_idx,
+						pc, (unsigned long *)(state->sp - GRAPH_FAKE_OFFSET));
 				get_stack_info(state->sp, state->task, info);
 
 				return true;
-- 
GitLab


From 28ac0a9e04d7dfb42220dc9d221164d93f20fb3a Mon Sep 17 00:00:00 2001
From: Qing Zhang <zhangqing@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:21 +0800
Subject: [PATCH 3831/4122] LoongArch: modules/ftrace: Initialize PLT at load
 time

This patch implements ftrace trampolines through plt entry.

Tested by forcing ftrace_make_call() to use the module PLT, and then
loading up a module after setting up ftrace with:

| echo ":mod:<module-name>" > set_ftrace_filter;
| echo function > current_tracer;
| modprobe <module-name>

Since FTRACE_ADDR/FTRACE_REGS_ADDR is only defined when CONFIG_DYNAMIC_
FTRACE is selected, we wrap their usage in module_init_ftrace_plt() with
ifdeffery rather than using IS_ENABLED().

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/include/asm/ftrace.h     |  4 ++
 arch/loongarch/include/asm/inst.h       |  3 +
 arch/loongarch/include/asm/module.h     |  5 +-
 arch/loongarch/include/asm/module.lds.h |  1 +
 arch/loongarch/kernel/ftrace_dyn.c      | 93 +++++++++++++++++++++++++
 arch/loongarch/kernel/inst.c            | 11 +++
 arch/loongarch/kernel/module-sections.c | 12 +++-
 arch/loongarch/kernel/module.c          | 21 ++++++
 8 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/ftrace.h b/arch/loongarch/include/asm/ftrace.h
index 8c7d137e4871b..90f9d3399b2aa 100644
--- a/arch/loongarch/include/asm/ftrace.h
+++ b/arch/loongarch/include/asm/ftrace.h
@@ -6,6 +6,10 @@
 #ifndef _ASM_LOONGARCH_FTRACE_H
 #define _ASM_LOONGARCH_FTRACE_H
 
+#define FTRACE_PLT_IDX		0
+#define FTRACE_REGS_PLT_IDX	1
+#define NR_FTRACE_PLTS		2
+
 #define GRAPH_FAKE_OFFSET (sizeof(struct pt_regs) - offsetof(struct pt_regs, regs[1]))
 
 #ifdef CONFIG_FUNCTION_TRACER
diff --git a/arch/loongarch/include/asm/inst.h b/arch/loongarch/include/asm/inst.h
index 88e1673524e1d..c00e1512d4fa3 100644
--- a/arch/loongarch/include/asm/inst.h
+++ b/arch/loongarch/include/asm/inst.h
@@ -13,10 +13,12 @@
 
 #define ADDR_IMMMASK_LU52ID	0xFFF0000000000000
 #define ADDR_IMMMASK_LU32ID	0x000FFFFF00000000
+#define ADDR_IMMMASK_LU12IW	0x00000000FFFFF000
 #define ADDR_IMMMASK_ADDU16ID	0x00000000FFFF0000
 
 #define ADDR_IMMSHIFT_LU52ID	52
 #define ADDR_IMMSHIFT_LU32ID	32
+#define ADDR_IMMSHIFT_LU12IW	12
 #define ADDR_IMMSHIFT_ADDU16ID	16
 
 #define ADDR_IMM(addr, INSN)	((addr & ADDR_IMMMASK_##INSN) >> ADDR_IMMSHIFT_##INSN)
@@ -360,6 +362,7 @@ u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
 u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk);
 u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
 
+u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm);
 u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
 u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
 u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);
diff --git a/arch/loongarch/include/asm/module.h b/arch/loongarch/include/asm/module.h
index 60dc62a1146ee..12a0f1e66916d 100644
--- a/arch/loongarch/include/asm/module.h
+++ b/arch/loongarch/include/asm/module.h
@@ -20,6 +20,9 @@ struct mod_arch_specific {
 	struct mod_section got;
 	struct mod_section plt;
 	struct mod_section plt_idx;
+
+	/* For CONFIG_DYNAMIC_FTRACE */
+	struct plt_entry *ftrace_trampolines;
 };
 
 struct got_entry {
@@ -49,7 +52,7 @@ static inline struct plt_entry emit_plt_entry(unsigned long val)
 {
 	u32 lu12iw, lu32id, lu52id, jirl;
 
-	lu12iw = (lu12iw_op << 25 | (((val >> 12) & 0xfffff) << 5) | LOONGARCH_GPR_T1);
+	lu12iw = larch_insn_gen_lu12iw(LOONGARCH_GPR_T1, ADDR_IMM(val, LU12IW));
 	lu32id = larch_insn_gen_lu32id(LOONGARCH_GPR_T1, ADDR_IMM(val, LU32ID));
 	lu52id = larch_insn_gen_lu52id(LOONGARCH_GPR_T1, LOONGARCH_GPR_T1, ADDR_IMM(val, LU52ID));
 	jirl = larch_insn_gen_jirl(0, LOONGARCH_GPR_T1, 0, (val & 0xfff));
diff --git a/arch/loongarch/include/asm/module.lds.h b/arch/loongarch/include/asm/module.lds.h
index a3d1bc0fcc72e..438f09d4ccf41 100644
--- a/arch/loongarch/include/asm/module.lds.h
+++ b/arch/loongarch/include/asm/module.lds.h
@@ -5,4 +5,5 @@ SECTIONS {
 	.got : { BYTE(0) }
 	.plt : { BYTE(0) }
 	.plt.idx : { BYTE(0) }
+	.ftrace_trampoline : { BYTE(0) }
 }
diff --git a/arch/loongarch/kernel/ftrace_dyn.c b/arch/loongarch/kernel/ftrace_dyn.c
index e23c3be29baab..0f07591cab309 100644
--- a/arch/loongarch/kernel/ftrace_dyn.c
+++ b/arch/loongarch/kernel/ftrace_dyn.c
@@ -9,6 +9,7 @@
 #include <linux/uaccess.h>
 
 #include <asm/inst.h>
+#include <asm/module.h>
 
 static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
 {
@@ -29,18 +30,78 @@ static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
 }
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+
+#ifdef CONFIG_MODULES
+static inline int __get_mod(struct module **mod, unsigned long addr)
+{
+	preempt_disable();
+	*mod = __module_text_address(addr);
+	preempt_enable();
+
+	if (WARN_ON(!(*mod)))
+		return -EINVAL;
+
+	return 0;
+}
+
+static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr)
+{
+	struct plt_entry *plt = mod->arch.ftrace_trampolines;
+
+	if (addr == FTRACE_ADDR)
+		return &plt[FTRACE_PLT_IDX];
+	if (addr == FTRACE_REGS_ADDR &&
+			IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		return &plt[FTRACE_REGS_PLT_IDX];
+
+	return NULL;
+}
+
+static unsigned long get_plt_addr(struct module *mod, unsigned long addr)
+{
+	struct plt_entry *plt;
+
+	plt = get_ftrace_plt(mod, addr);
+	if (!plt) {
+		pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
+		return -EINVAL;
+	}
+
+	return (unsigned long)plt;
+}
+#endif
+
 int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr)
 {
 	u32 old, new;
 	unsigned long pc;
+	long offset __maybe_unused;
 
 	pc = rec->ip + LOONGARCH_INSN_SIZE;
 
+#ifdef CONFIG_MODULES
+	offset = (long)pc - (long)addr;
+
+	if (offset < -SZ_128M || offset >= SZ_128M) {
+		int ret;
+		struct module *mod;
+
+		ret = __get_mod(&mod, pc);
+		if (ret)
+			return ret;
+
+		addr = get_plt_addr(mod, addr);
+
+		old_addr = get_plt_addr(mod, old_addr);
+	}
+#endif
+
 	new = larch_insn_gen_bl(pc, addr);
 	old = larch_insn_gen_bl(pc, old_addr);
 
 	return ftrace_modify_code(pc, old, new, true);
 }
+
 #endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
 int ftrace_update_ftrace_func(ftrace_func_t func)
@@ -91,9 +152,25 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
 	u32 old, new;
 	unsigned long pc;
+	long offset __maybe_unused;
 
 	pc = rec->ip + LOONGARCH_INSN_SIZE;
 
+#ifdef CONFIG_MODULES
+	offset = (long)pc - (long)addr;
+
+	if (offset < -SZ_128M || offset >= SZ_128M) {
+		int ret;
+		struct module *mod;
+
+		ret = __get_mod(&mod, pc);
+		if (ret)
+			return ret;
+
+		addr = get_plt_addr(mod, addr);
+	}
+#endif
+
 	old = larch_insn_gen_nop();
 	new = larch_insn_gen_bl(pc, addr);
 
@@ -104,9 +181,25 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long ad
 {
 	u32 old, new;
 	unsigned long pc;
+	long offset __maybe_unused;
 
 	pc = rec->ip + LOONGARCH_INSN_SIZE;
 
+#ifdef CONFIG_MODULES
+	offset = (long)pc - (long)addr;
+
+	if (offset < -SZ_128M || offset >= SZ_128M) {
+		int ret;
+		struct module *mod;
+
+		ret = __get_mod(&mod, pc);
+		if (ret)
+			return ret;
+
+		addr = get_plt_addr(mod, addr);
+	}
+#endif
+
 	new = larch_insn_gen_nop();
 	old = larch_insn_gen_bl(pc, addr);
 
diff --git a/arch/loongarch/kernel/inst.c b/arch/loongarch/kernel/inst.c
index 39671e87e31c9..512579d79b221 100644
--- a/arch/loongarch/kernel/inst.c
+++ b/arch/loongarch/kernel/inst.c
@@ -120,6 +120,17 @@ u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
 	return larch_insn_gen_or(rd, rj, 0);
 }
 
+u32 larch_insn_gen_lu12iw(enum loongarch_gpr rd, int imm)
+{
+	union loongarch_instruction insn;
+
+	insn.reg1i20_format.opcode = lu12iw_op;
+	insn.reg1i20_format.rd = rd;
+	insn.reg1i20_format.immediate = imm;
+
+	return insn.word;
+}
+
 u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
 {
 	union loongarch_instruction insn;
diff --git a/arch/loongarch/kernel/module-sections.c b/arch/loongarch/kernel/module-sections.c
index 13d9a427325aa..d4dbcda1c4b0a 100644
--- a/arch/loongarch/kernel/module-sections.c
+++ b/arch/loongarch/kernel/module-sections.c
@@ -6,6 +6,7 @@
 #include <linux/elf.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/ftrace.h>
 
 Elf_Addr module_emit_got_entry(struct module *mod, Elf_Shdr *sechdrs, Elf_Addr val)
 {
@@ -103,7 +104,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			      char *secstrings, struct module *mod)
 {
 	unsigned int i, num_plts = 0, num_gots = 0;
-	Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec;
+	Elf_Shdr *got_sec, *plt_sec, *plt_idx_sec, *tramp = NULL;
 
 	/*
 	 * Find the empty .plt sections.
@@ -115,6 +116,8 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 			mod->arch.plt.shndx = i;
 		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".plt.idx"))
 			mod->arch.plt_idx.shndx = i;
+		else if (!strcmp(secstrings + sechdrs[i].sh_name, ".ftrace_trampoline"))
+			tramp = sechdrs + i;
 	}
 
 	if (!mod->arch.got.shndx) {
@@ -170,5 +173,12 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs,
 	mod->arch.plt_idx.num_entries = 0;
 	mod->arch.plt_idx.max_entries = num_plts;
 
+	if (tramp) {
+		tramp->sh_type = SHT_NOBITS;
+		tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+		tramp->sh_addralign = __alignof__(struct plt_entry);
+		tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry);
+	}
+
 	return 0;
 }
diff --git a/arch/loongarch/kernel/module.c b/arch/loongarch/kernel/module.c
index 899dc677cec3e..b8b86088b2dd2 100644
--- a/arch/loongarch/kernel/module.c
+++ b/arch/loongarch/kernel/module.c
@@ -15,9 +15,11 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
+#include <linux/ftrace.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <asm/alternative.h>
+#include <asm/inst.h>
 
 static int rela_stack_push(s64 stack_value, s64 *rela_stack, size_t *rela_stack_top)
 {
@@ -473,6 +475,23 @@ void *module_alloc(unsigned long size)
 			GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0));
 }
 
+static void module_init_ftrace_plt(const Elf_Ehdr *hdr,
+				   const Elf_Shdr *sechdrs, struct module *mod)
+{
+#ifdef CONFIG_DYNAMIC_FTRACE
+	struct plt_entry *ftrace_plts;
+
+	ftrace_plts = (void *)sechdrs->sh_addr;
+
+	ftrace_plts[FTRACE_PLT_IDX] = emit_plt_entry(FTRACE_ADDR);
+
+	if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
+		ftrace_plts[FTRACE_REGS_PLT_IDX] = emit_plt_entry(FTRACE_REGS_ADDR);
+
+	mod->arch.ftrace_trampolines = ftrace_plts;
+#endif
+}
+
 int module_finalize(const Elf_Ehdr *hdr,
 		    const Elf_Shdr *sechdrs, struct module *mod)
 {
@@ -482,6 +501,8 @@ int module_finalize(const Elf_Ehdr *hdr,
 	for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) {
 		if (!strcmp(".altinstructions", secstrs + s->sh_name))
 			apply_alternatives((void *)s->sh_addr, (void *)s->sh_addr + s->sh_size);
+		if (!strcmp(".ftrace_trampoline", secstrs + s->sh_name))
+			module_init_ftrace_plt(hdr, s, mod);
 	}
 
 	return 0;
-- 
GitLab


From 5535f4f70cfc15ef55b6ea7c7e17337b17337cb6 Mon Sep 17 00:00:00 2001
From: Huacai Chen <chenhuacai@loongson.cn>
Date: Sat, 10 Dec 2022 22:40:21 +0800
Subject: [PATCH 3832/4122] LoongArch: Update Loongson-3 default config file

1, Enable suspend (ACPI S3) and hibernation (ACPI S4).
2, Enable some options for FDT-based systems (e.g., SERIAL_OF_PLATFORM).
3, Enable CONFIG_KALLSYMS_ALL and CONFIG_DEBUG_FS to convenient ftrace.
4, Regenerate the whole file to keep the order of options be the same as
   the latest source code.

Signed-off-by: Qing Zhang <zhangqing@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
---
 arch/loongarch/configs/loongson3_defconfig | 56 ++++++++++++----------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/configs/loongson3_defconfig
index 3540e9c0a6310..eb84cae642e58 100644
--- a/arch/loongarch/configs/loongson3_defconfig
+++ b/arch/loongarch/configs/loongson3_defconfig
@@ -34,12 +34,13 @@ CONFIG_SYSFS_DEPRECATED=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
 CONFIG_EXPERT=y
-CONFIG_USERFAULTFD=y
+CONFIG_KALLSYMS_ALL=y
 CONFIG_PERF_EVENTS=y
-# CONFIG_COMPAT_BRK is not set
 CONFIG_LOONGARCH=y
 CONFIG_64BIT=y
 CONFIG_MACH_LOONGSON64=y
+CONFIG_PAGE_SIZE_16KB=y
+CONFIG_HZ_250=y
 CONFIG_DMI=y
 CONFIG_EFI=y
 CONFIG_SMP=y
@@ -47,14 +48,14 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_NR_CPUS=64
 CONFIG_NUMA=y
 CONFIG_KEXEC=y
-CONFIG_PAGE_SIZE_16KB=y
-CONFIG_HZ_250=y
+CONFIG_SUSPEND=y
+CONFIG_HIBERNATION=y
 CONFIG_ACPI=y
 CONFIG_ACPI_SPCR_TABLE=y
-CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_TAD=y
 CONFIG_ACPI_DOCK=y
 CONFIG_ACPI_IPMI=m
+CONFIG_ACPI_HOTPLUG_CPU=y
 CONFIG_ACPI_PCI_SLOT=y
 CONFIG_ACPI_HOTPLUG_MEMORY=y
 CONFIG_EFI_ZBOOT=y
@@ -73,17 +74,19 @@ CONFIG_UNIXWARE_DISKLABEL=y
 CONFIG_IOSCHED_BFQ=y
 CONFIG_BFQ_GROUP_IOSCHED=y
 CONFIG_BINFMT_MISC=m
-CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
-CONFIG_MEMORY_HOTREMOVE=y
-CONFIG_KSM=y
-CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_ZPOOL=y
 CONFIG_ZSWAP=y
 CONFIG_ZSWAP_COMPRESSOR_DEFAULT_ZSTD=y
-CONFIG_ZPOOL=y
 CONFIG_ZBUD=y
 CONFIG_Z3FOLD=y
 CONFIG_ZSMALLOC=m
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_KSM=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_USERFAULTFD=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -118,7 +121,6 @@ CONFIG_NETFILTER=y
 CONFIG_BRIDGE_NETFILTER=m
 CONFIG_NETFILTER_NETLINK_LOG=m
 CONFIG_NF_CONNTRACK=m
-CONFIG_NF_LOG_NETDEV=m
 CONFIG_NF_CONNTRACK_AMANDA=m
 CONFIG_NF_CONNTRACK_FTP=m
 CONFIG_NF_CONNTRACK_NETBIOS_NS=m
@@ -416,6 +418,7 @@ CONFIG_SCSI_VIRTIO=m
 CONFIG_ATA=y
 CONFIG_SATA_AHCI=y
 CONFIG_SATA_AHCI_PLATFORM=y
+CONFIG_AHCI_DWC=y
 CONFIG_PATA_ATIIXP=y
 CONFIG_PATA_PCMCIA=m
 CONFIG_MD=y
@@ -469,13 +472,11 @@ CONFIG_VIRTIO_NET=m
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
 CONFIG_BNX2=y
-# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_CAVIUM is not set
 CONFIG_CHELSIO_T1=m
 CONFIG_CHELSIO_T1_1G=y
 CONFIG_CHELSIO_T3=m
 CONFIG_CHELSIO_T4=m
-# CONFIG_NET_VENDOR_CIRRUS is not set
 # CONFIG_NET_VENDOR_CISCO is not set
 # CONFIG_NET_VENDOR_DEC is not set
 # CONFIG_NET_VENDOR_DLINK is not set
@@ -496,6 +497,7 @@ CONFIG_IXGBE=y
 # CONFIG_NET_VENDOR_NVIDIA is not set
 # CONFIG_NET_VENDOR_OKI is not set
 # CONFIG_NET_VENDOR_QLOGIC is not set
+# CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_QUALCOMM is not set
 # CONFIG_NET_VENDOR_RDC is not set
 CONFIG_8139CP=m
@@ -505,9 +507,9 @@ CONFIG_R8169=y
 # CONFIG_NET_VENDOR_ROCKER is not set
 # CONFIG_NET_VENDOR_SAMSUNG is not set
 # CONFIG_NET_VENDOR_SEEQ is not set
-# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SILAN is not set
 # CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SOLARFLARE is not set
 # CONFIG_NET_VENDOR_SMSC is not set
 CONFIG_STMMAC_ETH=y
 # CONFIG_NET_VENDOR_SUN is not set
@@ -588,6 +590,7 @@ CONFIG_SERIAL_8250_EXTENDED=y
 CONFIG_SERIAL_8250_MANY_PORTS=y
 CONFIG_SERIAL_8250_SHARE_IRQ=y
 CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_OF_PLATFORM=y
 CONFIG_SERIAL_NONSTANDARD=y
 CONFIG_PRINTER=m
 CONFIG_VIRTIO_CONSOLE=y
@@ -602,6 +605,11 @@ CONFIG_I2C_GPIO=y
 CONFIG_SPI=y
 CONFIG_GPIO_SYSFS=y
 CONFIG_GPIO_LOONGSON=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_RESTART=y
+CONFIG_POWER_RESET_SYSCON=y
+CONFIG_POWER_RESET_SYSCON_POWEROFF=y
+CONFIG_SYSCON_REBOOT_MODE=y
 CONFIG_SENSORS_LM75=m
 CONFIG_SENSORS_LM93=m
 CONFIG_SENSORS_W83795=m
@@ -609,16 +617,16 @@ CONFIG_SENSORS_W83627HF=m
 CONFIG_RC_CORE=m
 CONFIG_LIRC=y
 CONFIG_RC_DECODERS=y
+CONFIG_IR_IMON_DECODER=m
+CONFIG_IR_JVC_DECODER=m
+CONFIG_IR_MCE_KBD_DECODER=m
 CONFIG_IR_NEC_DECODER=m
 CONFIG_IR_RC5_DECODER=m
 CONFIG_IR_RC6_DECODER=m
-CONFIG_IR_JVC_DECODER=m
-CONFIG_IR_SONY_DECODER=m
 CONFIG_IR_SANYO_DECODER=m
 CONFIG_IR_SHARP_DECODER=m
-CONFIG_IR_MCE_KBD_DECODER=m
+CONFIG_IR_SONY_DECODER=m
 CONFIG_IR_XMP_DECODER=m
-CONFIG_IR_IMON_DECODER=m
 CONFIG_MEDIA_SUPPORT=m
 CONFIG_MEDIA_USB_SUPPORT=y
 CONFIG_USB_VIDEO_CLASS=m
@@ -638,6 +646,7 @@ CONFIG_DRM_VIRTIO_GPU=m
 CONFIG_FB=y
 CONFIG_FB_EFI=y
 CONFIG_FB_RADEON=y
+CONFIG_LCD_CLASS_DEVICE=y
 CONFIG_LCD_PLATFORM=m
 # CONFIG_VGA_CONSOLE is not set
 CONFIG_FRAMEBUFFER_CONSOLE=y
@@ -647,7 +656,6 @@ CONFIG_SOUND=y
 CONFIG_SND=y
 CONFIG_SND_SEQUENCER=m
 CONFIG_SND_SEQ_DUMMY=m
-# CONFIG_SND_ISA is not set
 CONFIG_SND_BT87X=m
 CONFIG_SND_BT87X_OVERCLOCK=y
 CONFIG_SND_HDA_INTEL=y
@@ -818,10 +826,6 @@ CONFIG_CRYPTO_USER=m
 # CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
 CONFIG_CRYPTO_PCRYPT=m
 CONFIG_CRYPTO_CRYPTD=m
-CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_VMAC=m
-CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_ANUBIS=m
 CONFIG_CRYPTO_BLOWFISH=m
 CONFIG_CRYPTO_CAST5=m
@@ -831,6 +835,9 @@ CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_TEA=m
 CONFIG_CRYPTO_TWOFISH=m
+CONFIG_CRYPTO_CHACHA20POLY1305=m
+CONFIG_CRYPTO_VMAC=m
+CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_DEFLATE=m
 CONFIG_CRYPTO_LZO=m
 CONFIG_CRYPTO_842=m
@@ -844,6 +851,7 @@ CONFIG_CRYPTO_DEV_VIRTIO=m
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_MAGIC_SYSRQ=y
+CONFIG_DEBUG_FS=y
 # CONFIG_SCHED_DEBUG is not set
 CONFIG_SCHEDSTATS=y
 # CONFIG_DEBUG_PREEMPT is not set
-- 
GitLab


From f3b4a00f0f62da252c598310698dfc82ef2f2e2e Mon Sep 17 00:00:00 2001
From: Emeel Hakim <ehakim@nvidia.com>
Date: Sun, 11 Dec 2022 09:55:32 +0200
Subject: [PATCH 3833/4122] net: macsec: fix net device access prior to holding
 a lock

Currently macsec offload selection update routine accesses
the net device prior to holding the relevant lock.
Fix by holding the lock prior to the device access.

Fixes: dcb780fb2795 ("net: macsec: add nla support for changing the offloading selection")
Reviewed-by: Raed Salem <raeds@nvidia.com>
Signed-off-by: Emeel Hakim <ehakim@nvidia.com>
Link: https://lore.kernel.org/r/20221211075532.28099-1-ehakim@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/macsec.c | 34 +++++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 13 deletions(-)

diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 937f5b1f04ffc..bf8ac7a3ded75 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -2593,7 +2593,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
 	const struct macsec_ops *ops;
 	struct macsec_context ctx;
 	struct macsec_dev *macsec;
-	int ret;
+	int ret = 0;
 
 	if (!attrs[MACSEC_ATTR_IFINDEX])
 		return -EINVAL;
@@ -2606,28 +2606,36 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
 					macsec_genl_offload_policy, NULL))
 		return -EINVAL;
 
+	rtnl_lock();
+
 	dev = get_dev_from_nl(genl_info_net(info), attrs);
-	if (IS_ERR(dev))
-		return PTR_ERR(dev);
+	if (IS_ERR(dev)) {
+		ret = PTR_ERR(dev);
+		goto out;
+	}
 	macsec = macsec_priv(dev);
 
-	if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE])
-		return -EINVAL;
+	if (!tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
 
 	offload = nla_get_u8(tb_offload[MACSEC_OFFLOAD_ATTR_TYPE]);
 	if (macsec->offload == offload)
-		return 0;
+		goto out;
 
 	/* Check if the offloading mode is supported by the underlying layers */
 	if (offload != MACSEC_OFFLOAD_OFF &&
-	    !macsec_check_offload(offload, macsec))
-		return -EOPNOTSUPP;
+	    !macsec_check_offload(offload, macsec)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
 
 	/* Check if the net device is busy. */
-	if (netif_running(dev))
-		return -EBUSY;
-
-	rtnl_lock();
+	if (netif_running(dev)) {
+		ret = -EBUSY;
+		goto out;
+	}
 
 	prev_offload = macsec->offload;
 	macsec->offload = offload;
@@ -2662,7 +2670,7 @@ static int macsec_upd_offload(struct sk_buff *skb, struct genl_info *info)
 
 rollback:
 	macsec->offload = prev_offload;
-
+out:
 	rtnl_unlock();
 	return ret;
 }
-- 
GitLab


From 3d0b738fc5adf9f380702ac1424672e4b32c3781 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 12 Dec 2022 11:56:45 +0800
Subject: [PATCH 3834/4122] bonding: add missed __rcu annotation for
 curr_active_slave

There is one direct accesses to bond->curr_active_slave in
bond_miimon_commit(). Protected it by rcu_access_pointer()
since the later of this function also use this one.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bonding/bond_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f7767afe116b3..6a4bbd5aa3e09 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2699,7 +2699,7 @@ static void bond_miimon_commit(struct bonding *bond)
 
 			bond_miimon_link_change(bond, slave, BOND_LINK_UP);
 
-			if (!bond->curr_active_slave || slave == primary)
+			if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary)
 				goto do_failover;
 
 			continue;
-- 
GitLab


From e95cc44763a41d5c715ef16742bcb1d8e6524a62 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 12 Dec 2022 11:56:46 +0800
Subject: [PATCH 3835/4122] bonding: do failover when high prio link up

Currently, when a high prio link enslaved, or when current link down,
the high prio port could be selected. But when high prio link up, the
new active slave reselection is not triggered. Fix it by checking link's
prio when getting up. Making the do_failover after looping all slaves as
there may be multi high prio slaves up.

Reported-by: Liang Li <liali@redhat.com>
Fixes: 0a2ff7cc8ad4 ("Bonding: add per-port priority for failover re-selection")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/bonding/bond_main.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 6a4bbd5aa3e09..b4c65783960a5 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2654,8 +2654,9 @@ static void bond_miimon_link_change(struct bonding *bond,
 
 static void bond_miimon_commit(struct bonding *bond)
 {
-	struct list_head *iter;
 	struct slave *slave, *primary;
+	bool do_failover = false;
+	struct list_head *iter;
 
 	bond_for_each_slave(bond, slave, iter) {
 		switch (slave->link_new_state) {
@@ -2699,8 +2700,9 @@ static void bond_miimon_commit(struct bonding *bond)
 
 			bond_miimon_link_change(bond, slave, BOND_LINK_UP);
 
-			if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary)
-				goto do_failover;
+			if (!rcu_access_pointer(bond->curr_active_slave) || slave == primary ||
+			    slave->prio > rcu_dereference(bond->curr_active_slave)->prio)
+				do_failover = true;
 
 			continue;
 
@@ -2721,7 +2723,7 @@ static void bond_miimon_commit(struct bonding *bond)
 			bond_miimon_link_change(bond, slave, BOND_LINK_DOWN);
 
 			if (slave == rcu_access_pointer(bond->curr_active_slave))
-				goto do_failover;
+				do_failover = true;
 
 			continue;
 
@@ -2732,8 +2734,9 @@ static void bond_miimon_commit(struct bonding *bond)
 
 			continue;
 		}
+	}
 
-do_failover:
+	if (do_failover) {
 		block_netpoll_tx();
 		bond_select_active_slave(bond);
 		unblock_netpoll_tx();
@@ -3531,6 +3534,7 @@ static int bond_ab_arp_inspect(struct bonding *bond)
  */
 static void bond_ab_arp_commit(struct bonding *bond)
 {
+	bool do_failover = false;
 	struct list_head *iter;
 	unsigned long last_tx;
 	struct slave *slave;
@@ -3560,8 +3564,9 @@ static void bond_ab_arp_commit(struct bonding *bond)
 				slave_info(bond->dev, slave->dev, "link status definitely up\n");
 
 				if (!rtnl_dereference(bond->curr_active_slave) ||
-				    slave == rtnl_dereference(bond->primary_slave))
-					goto do_failover;
+				    slave == rtnl_dereference(bond->primary_slave) ||
+				    slave->prio > rtnl_dereference(bond->curr_active_slave)->prio)
+					do_failover = true;
 
 			}
 
@@ -3580,7 +3585,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
 
 			if (slave == rtnl_dereference(bond->curr_active_slave)) {
 				RCU_INIT_POINTER(bond->current_arp_slave, NULL);
-				goto do_failover;
+				do_failover = true;
 			}
 
 			continue;
@@ -3604,8 +3609,9 @@ static void bond_ab_arp_commit(struct bonding *bond)
 				  slave->link_new_state);
 			continue;
 		}
+	}
 
-do_failover:
+	if (do_failover) {
 		block_netpoll_tx();
 		bond_select_active_slave(bond);
 		unblock_netpoll_tx();
-- 
GitLab


From 42a8d4aaea8414f60eb2ed2d92df89a6e2db4615 Mon Sep 17 00:00:00 2001
From: Liang Li <liali@redhat.com>
Date: Mon, 12 Dec 2022 11:56:47 +0800
Subject: [PATCH 3836/4122] selftests: bonding: add bonding prio option test

Add a test for bonding prio option. Here is the test result:

]# ./option_prio.sh
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=0)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=1)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=arp_ip_target and primary_reselect=2)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=0)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=1)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=1 monitor=miimon and primary_reselect=2)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=0)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=1)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=5 monitor=miimon and primary_reselect=2)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=0)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=1)  [ OK ]
TEST: prio_test (Test bonding option 'prio' with mode=6 monitor=miimon and primary_reselect=2)  [ OK ]

Signed-off-by: Liang Li <liali@redhat.com>
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/bonding/Makefile    |   3 +-
 .../drivers/net/bonding/option_prio.sh        | 245 ++++++++++++++++++
 2 files changed, 247 insertions(+), 1 deletion(-)
 create mode 100755 tools/testing/selftests/drivers/net/bonding/option_prio.sh

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 0f3921908b078..8e3b786a748f9 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -7,7 +7,8 @@ TEST_PROGS := \
 	bond-lladdr-target.sh \
 	dev_addr_lists.sh \
 	mode-1-recovery-updelay.sh \
-	mode-2-recovery-updelay.sh
+	mode-2-recovery-updelay.sh \
+	option_prio.sh
 
 TEST_FILES := \
 	lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/option_prio.sh b/tools/testing/selftests/drivers/net/bonding/option_prio.sh
new file mode 100755
index 0000000000000..c32eebff5005d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/option_prio.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding option prio
+#
+
+ALL_TESTS="
+	prio_arp_ip_target_test
+	prio_miimon_test
+"
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/net_forwarding_lib.sh
+
+destroy()
+{
+	ip link del bond0 &>/dev/null
+	ip link del br0 &>/dev/null
+	ip link del veth0 &>/dev/null
+	ip link del veth1 &>/dev/null
+	ip link del veth2 &>/dev/null
+	ip netns del ns1 &>/dev/null
+	ip link del veth3 &>/dev/null
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	destroy
+}
+
+skip()
+{
+        local skip=1
+	ip link add name bond0 type bond mode 1 miimon 100 &>/dev/null
+	ip link add name veth0 type veth peer name veth0_p
+	ip link set veth0 master bond0
+
+	# check if iproute support prio option
+	ip link set dev veth0 type bond_slave prio 10
+	[[ $? -ne 0 ]] && skip=0
+
+	# check if bonding support prio option
+	ip -d link show veth0 | grep -q "prio 10"
+	[[ $? -ne 0 ]] && skip=0
+
+	ip link del bond0 &>/dev/null
+	ip link del veth0
+
+	return $skip
+}
+
+active_slave=""
+check_active_slave()
+{
+	local target_active_slave=$1
+	active_slave="$(cat /sys/class/net/bond0/bonding/active_slave)"
+	test "$active_slave" = "$target_active_slave"
+	check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+
+# Test bonding prio option with mode=$mode monitor=$monitor
+# and primary_reselect=$primary_reselect
+prio_test()
+{
+	RET=0
+
+	local monitor=$1
+	local mode=$2
+	local primary_reselect=$3
+
+	local bond_ip4="192.169.1.2"
+	local peer_ip4="192.169.1.1"
+	local bond_ip6="2009:0a:0b::02"
+	local peer_ip6="2009:0a:0b::01"
+
+
+	# create veths
+	ip link add name veth0 type veth peer name veth0_p
+	ip link add name veth1 type veth peer name veth1_p
+	ip link add name veth2 type veth peer name veth2_p
+
+	# create bond
+	if [[ "$monitor" == "miimon" ]];then
+		ip link add name bond0 type bond mode $mode miimon 100 primary veth1 primary_reselect $primary_reselect
+	elif [[ "$monitor" == "arp_ip_target" ]];then
+		ip link add name bond0 type bond mode $mode arp_interval 1000 arp_ip_target $peer_ip4 primary veth1 primary_reselect $primary_reselect
+	elif [[ "$monitor" == "ns_ip6_target" ]];then
+		ip link add name bond0 type bond mode $mode arp_interval 1000 ns_ip6_target $peer_ip6 primary veth1 primary_reselect $primary_reselect
+	fi
+	ip link set bond0 up
+	ip link set veth0 master bond0
+	ip link set veth1 master bond0
+	ip link set veth2 master bond0
+	# check bonding member prio value
+	ip link set dev veth0 type bond_slave prio 0
+	ip link set dev veth1 type bond_slave prio 10
+	ip link set dev veth2 type bond_slave prio 11
+	ip -d link show veth0 | grep -q 'prio 0'
+	check_err $? "veth0 prio is not 0"
+	ip -d link show veth1 | grep -q 'prio 10'
+	check_err $? "veth0 prio is not 10"
+	ip -d link show veth2 | grep -q 'prio 11'
+	check_err $? "veth0 prio is not 11"
+
+	ip link set veth0 up
+	ip link set veth1 up
+	ip link set veth2 up
+	ip link set veth0_p up
+	ip link set veth1_p up
+	ip link set veth2_p up
+
+	# prepare ping target
+	ip link add name br0 type bridge
+	ip link set br0 up
+	ip link set veth0_p master br0
+	ip link set veth1_p master br0
+	ip link set veth2_p master br0
+	ip link add name veth3 type veth peer name veth3_p
+	ip netns add ns1
+	ip link set veth3_p master br0 up
+	ip link set veth3 netns ns1 up
+	ip netns exec ns1 ip addr add $peer_ip4/24 dev veth3
+	ip netns exec ns1 ip addr add $peer_ip6/64 dev veth3
+	ip addr add $bond_ip4/24 dev bond0
+	ip addr add $bond_ip6/64 dev bond0
+	sleep 5
+
+	ping $peer_ip4 -c5 -I bond0 &>/dev/null
+	check_err $? "ping failed 1."
+	ping6 $peer_ip6 -c5 -I bond0 &>/dev/null
+	check_err $? "ping6 failed 1."
+
+	# active salve should be the primary slave
+	check_active_slave veth1
+
+	# active slave should be the higher prio slave
+	ip link set $active_slave down
+	ping $peer_ip4 -c5 -I bond0 &>/dev/null
+	check_err $? "ping failed 2."
+	check_active_slave veth2
+
+	# when only 1 slave is up
+	ip link set $active_slave down
+	ping $peer_ip4 -c5 -I bond0 &>/dev/null
+	check_err $? "ping failed 3."
+	check_active_slave veth0
+
+	# when a higher prio slave change to up
+	ip link set veth2 up
+	ping $peer_ip4 -c5 -I bond0 &>/dev/null
+	check_err $? "ping failed 4."
+	case $primary_reselect in
+		"0")
+			check_active_slave "veth2"
+			;;
+		"1")
+			check_active_slave "veth0"
+			;;
+		"2")
+			check_active_slave "veth0"
+			;;
+	esac
+	local pre_active_slave=$active_slave
+
+	# when the primary slave change to up
+	ip link set veth1 up
+	ping $peer_ip4 -c5 -I bond0 &>/dev/null
+	check_err $? "ping failed 5."
+	case $primary_reselect in
+		"0")
+			check_active_slave "veth1"
+			;;
+		"1")
+			check_active_slave "$pre_active_slave"
+			;;
+		"2")
+			check_active_slave "$pre_active_slave"
+			ip link set $active_slave down
+			ping $peer_ip4 -c5 -I bond0 &>/dev/null
+			check_err $? "ping failed 6."
+			check_active_slave "veth1"
+			;;
+	esac
+
+	# Test changing bond salve prio
+	if [[ "$primary_reselect" == "0" ]];then
+		ip link set dev veth0 type bond_slave prio 1000000
+		ip link set dev veth1 type bond_slave prio 0
+		ip link set dev veth2 type bond_slave prio -50
+		ip -d link show veth0 | grep -q 'prio 1000000'
+		check_err $? "veth0 prio is not 1000000"
+		ip -d link show veth1 | grep -q 'prio 0'
+		check_err $? "veth1 prio is not 0"
+		ip -d link show veth2 | grep -q 'prio -50'
+		check_err $? "veth3 prio is not -50"
+		check_active_slave "veth1"
+
+		ip link set $active_slave down
+		ping $peer_ip4 -c5 -I bond0 &>/dev/null
+		check_err $? "ping failed 7."
+		check_active_slave "veth0"
+	fi
+
+	cleanup
+
+	log_test "prio_test" "Test bonding option 'prio' with mode=$mode monitor=$monitor and primary_reselect=$primary_reselect"
+}
+
+prio_miimon_test()
+{
+	local mode
+	local primary_reselect
+
+	for mode in 1 5 6; do
+		for primary_reselect in 0 1 2; do
+			prio_test "miimon" $mode $primary_reselect
+		done
+	done
+}
+
+prio_arp_ip_target_test()
+{
+	local primary_reselect
+
+	for primary_reselect in 0 1 2; do
+		prio_test "arp_ip_target" 1 $primary_reselect
+	done
+}
+
+if skip;then
+	log_test_skip "option_prio.sh" "Current iproute doesn't support 'prio'."
+	exit 0
+fi
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
-- 
GitLab


From ddc9648db162eee556edd5222d2808fe33730203 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 12 Dec 2022 16:41:37 +0800
Subject: [PATCH 3837/4122] mISDN: hfcsusb: don't call
 dev_kfree_skb/kfree_skb() under spin_lock_irqsave()

It is not allowed to call kfree_skb() or consume_skb() from hardware
interrupt context or with hardware interrupts being disabled.

It should use dev_kfree_skb_irq() or dev_consume_skb_irq() instead.
The difference between them is free reason, dev_kfree_skb_irq() means
the SKB is dropped in error and dev_consume_skb_irq() means the SKB
is consumed in normal.

skb_queue_purge() is called under spin_lock_irqsave() in hfcusb_l2l1D(),
kfree_skb() is called in it, to fix this, use skb_queue_splice_init()
to move the dch->squeue to a free queue, also enqueue the tx_skb and
rx_skb, at last calling __skb_queue_purge() to free the SKBs afer unlock.

In tx_iso_complete(), dev_kfree_skb() is called to consume the transmitted
SKB, so replace it with dev_consume_skb_irq().

Fixes: 69f52adb2d53 ("mISDN: Add HFC USB driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/isdn/hardware/mISDN/hfcsusb.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcsusb.c b/drivers/isdn/hardware/mISDN/hfcsusb.c
index 651f2f8f685b7..1efd17979f240 100644
--- a/drivers/isdn/hardware/mISDN/hfcsusb.c
+++ b/drivers/isdn/hardware/mISDN/hfcsusb.c
@@ -326,20 +326,24 @@ hfcusb_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
 		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
 
 		if (hw->protocol == ISDN_P_NT_S0) {
+			struct sk_buff_head free_queue;
+
+			__skb_queue_head_init(&free_queue);
 			hfcsusb_ph_command(hw, HFC_L1_DEACTIVATE_NT);
 			spin_lock_irqsave(&hw->lock, flags);
-			skb_queue_purge(&dch->squeue);
+			skb_queue_splice_init(&dch->squeue, &free_queue);
 			if (dch->tx_skb) {
-				dev_kfree_skb(dch->tx_skb);
+				__skb_queue_tail(&free_queue, dch->tx_skb);
 				dch->tx_skb = NULL;
 			}
 			dch->tx_idx = 0;
 			if (dch->rx_skb) {
-				dev_kfree_skb(dch->rx_skb);
+				__skb_queue_tail(&free_queue, dch->rx_skb);
 				dch->rx_skb = NULL;
 			}
 			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
 			spin_unlock_irqrestore(&hw->lock, flags);
+			__skb_queue_purge(&free_queue);
 #ifdef FIXME
 			if (test_and_clear_bit(FLG_L1_BUSY, &dch->Flags))
 				dchannel_sched_event(&hc->dch, D_CLEARBUSY);
@@ -1330,7 +1334,7 @@ tx_iso_complete(struct urb *urb)
 					printk("\n");
 				}
 
-				dev_kfree_skb(tx_skb);
+				dev_consume_skb_irq(tx_skb);
 				tx_skb = NULL;
 				if (fifo->dch && get_next_dframe(fifo->dch))
 					tx_skb = fifo->dch->tx_skb;
-- 
GitLab


From f0f596bd75a9d573ca9b587abb39cee0b916bb82 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 12 Dec 2022 16:41:38 +0800
Subject: [PATCH 3838/4122] mISDN: hfcpci: don't call dev_kfree_skb/kfree_skb()
 under spin_lock_irqsave()

It is not allowed to call kfree_skb() or consume_skb() from hardware
interrupt context or with hardware interrupts being disabled.

skb_queue_purge() is called under spin_lock_irqsave() in hfcpci_l2l1D(),
kfree_skb() is called in it, to fix this, use skb_queue_splice_init()
to move the dch->squeue to a free queue, also enqueue the tx_skb and
rx_skb, at last calling __skb_queue_purge() to free the SKBs afer unlock.

Fixes: 1700fe1a10dc ("Add mISDN HFC PCI driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/isdn/hardware/mISDN/hfcpci.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c
index e964a8dd8512a..c0331b2680108 100644
--- a/drivers/isdn/hardware/mISDN/hfcpci.c
+++ b/drivers/isdn/hardware/mISDN/hfcpci.c
@@ -1617,16 +1617,19 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
 		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
 		spin_lock_irqsave(&hc->lock, flags);
 		if (hc->hw.protocol == ISDN_P_NT_S0) {
+			struct sk_buff_head free_queue;
+
+			__skb_queue_head_init(&free_queue);
 			/* prepare deactivation */
 			Write_hfc(hc, HFCPCI_STATES, 0x40);
-			skb_queue_purge(&dch->squeue);
+			skb_queue_splice_init(&dch->squeue, &free_queue);
 			if (dch->tx_skb) {
-				dev_kfree_skb(dch->tx_skb);
+				__skb_queue_tail(&free_queue, dch->tx_skb);
 				dch->tx_skb = NULL;
 			}
 			dch->tx_idx = 0;
 			if (dch->rx_skb) {
-				dev_kfree_skb(dch->rx_skb);
+				__skb_queue_tail(&free_queue, dch->rx_skb);
 				dch->rx_skb = NULL;
 			}
 			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
@@ -1639,10 +1642,12 @@ hfcpci_l2l1D(struct mISDNchannel *ch, struct sk_buff *skb)
 			hc->hw.mst_m &= ~HFCPCI_MASTER;
 			Write_hfc(hc, HFCPCI_MST_MODE, hc->hw.mst_m);
 			ret = 0;
+			spin_unlock_irqrestore(&hc->lock, flags);
+			__skb_queue_purge(&free_queue);
 		} else {
 			ret = l1_event(dch->l1, hh->prim);
+			spin_unlock_irqrestore(&hc->lock, flags);
 		}
-		spin_unlock_irqrestore(&hc->lock, flags);
 		break;
 	}
 	if (!ret)
-- 
GitLab


From 1232946cf522b8de9e398828bde325d7c41f29dd Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Mon, 12 Dec 2022 16:41:39 +0800
Subject: [PATCH 3839/4122] mISDN: hfcmulti: don't call
 dev_kfree_skb/kfree_skb() under spin_lock_irqsave()

It is not allowed to call kfree_skb() or consume_skb() from hardware
interrupt context or with hardware interrupts being disabled.

skb_queue_purge() is called under spin_lock_irqsave() in handle_dmsg()
and hfcm_l1callback(), kfree_skb() is called in them, to fix this, use
skb_queue_splice_init() to move the dch->squeue to a free queue, also
enqueue the tx_skb and rx_skb, at last calling __skb_queue_purge() to
free the SKBs afer unlock.

Fixes: af69fb3a8ffa ("Add mISDN HFC multiport driver")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/isdn/hardware/mISDN/hfcmulti.c | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c
index 4f7eaa17fb274..e840609c50eb7 100644
--- a/drivers/isdn/hardware/mISDN/hfcmulti.c
+++ b/drivers/isdn/hardware/mISDN/hfcmulti.c
@@ -3217,6 +3217,7 @@ static int
 hfcm_l1callback(struct dchannel *dch, u_int cmd)
 {
 	struct hfc_multi	*hc = dch->hw;
+	struct sk_buff_head	free_queue;
 	u_long	flags;
 
 	switch (cmd) {
@@ -3245,6 +3246,7 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
 		l1_event(dch->l1, HW_POWERUP_IND);
 		break;
 	case HW_DEACT_REQ:
+		__skb_queue_head_init(&free_queue);
 		/* start deactivation */
 		spin_lock_irqsave(&hc->lock, flags);
 		if (hc->ctype == HFC_TYPE_E1) {
@@ -3264,20 +3266,21 @@ hfcm_l1callback(struct dchannel *dch, u_int cmd)
 				plxsd_checksync(hc, 0);
 			}
 		}
-		skb_queue_purge(&dch->squeue);
+		skb_queue_splice_init(&dch->squeue, &free_queue);
 		if (dch->tx_skb) {
-			dev_kfree_skb(dch->tx_skb);
+			__skb_queue_tail(&free_queue, dch->tx_skb);
 			dch->tx_skb = NULL;
 		}
 		dch->tx_idx = 0;
 		if (dch->rx_skb) {
-			dev_kfree_skb(dch->rx_skb);
+			__skb_queue_tail(&free_queue, dch->rx_skb);
 			dch->rx_skb = NULL;
 		}
 		test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
 		if (test_and_clear_bit(FLG_BUSY_TIMER, &dch->Flags))
 			del_timer(&dch->timer);
 		spin_unlock_irqrestore(&hc->lock, flags);
+		__skb_queue_purge(&free_queue);
 		break;
 	case HW_POWERUP_REQ:
 		spin_lock_irqsave(&hc->lock, flags);
@@ -3384,6 +3387,9 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 	case PH_DEACTIVATE_REQ:
 		test_and_clear_bit(FLG_L2_ACTIVATED, &dch->Flags);
 		if (dch->dev.D.protocol != ISDN_P_TE_S0) {
+			struct sk_buff_head free_queue;
+
+			__skb_queue_head_init(&free_queue);
 			spin_lock_irqsave(&hc->lock, flags);
 			if (debug & DEBUG_HFCMULTI_MSG)
 				printk(KERN_DEBUG
@@ -3405,14 +3411,14 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 				/* deactivate */
 				dch->state = 1;
 			}
-			skb_queue_purge(&dch->squeue);
+			skb_queue_splice_init(&dch->squeue, &free_queue);
 			if (dch->tx_skb) {
-				dev_kfree_skb(dch->tx_skb);
+				__skb_queue_tail(&free_queue, dch->tx_skb);
 				dch->tx_skb = NULL;
 			}
 			dch->tx_idx = 0;
 			if (dch->rx_skb) {
-				dev_kfree_skb(dch->rx_skb);
+				__skb_queue_tail(&free_queue, dch->rx_skb);
 				dch->rx_skb = NULL;
 			}
 			test_and_clear_bit(FLG_TX_BUSY, &dch->Flags);
@@ -3424,6 +3430,7 @@ handle_dmsg(struct mISDNchannel *ch, struct sk_buff *skb)
 #endif
 			ret = 0;
 			spin_unlock_irqrestore(&hc->lock, flags);
+			__skb_queue_purge(&free_queue);
 		} else
 			ret = l1_event(dch->l1, hh->prim);
 		break;
-- 
GitLab


From de5dc44370fbd6b46bd7f1a1e00369be54a041c8 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Mon, 12 Dec 2022 11:00:31 -0800
Subject: [PATCH 3840/4122] igb: Initialize mailbox message for VF reset

When a MAC address is not assigned to the VF, that portion of the message
sent to the VF is not set. The memory, however, is allocated from the
stack meaning that information may be leaked to the VM. Initialize the
message buffer to 0 so that no information is passed to the VM in this
case.

Fixes: 6ddbc4cf1f4d ("igb: Indicate failure on vf reset for empty mac address")
Reported-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Reviewed-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20221212190031.3983342-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 97290fc0fddd8..3c0c35ecea10c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -7525,7 +7525,7 @@ static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
 {
 	struct e1000_hw *hw = &adapter->hw;
 	unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
-	u32 reg, msgbuf[3];
+	u32 reg, msgbuf[3] = {};
 	u8 *addr = (u8 *)(&msgbuf[1]);
 
 	/* process all the same items cleared in a function level reset */
-- 
GitLab


From 2d4ee16d969c97996e80e4c9cb6de0acaff22c9f Mon Sep 17 00:00:00 2001
From: "Jiri Slaby (SUSE)" <jirislaby@kernel.org>
Date: Tue, 13 Dec 2022 15:52:08 -0700
Subject: [PATCH 3841/4122] wireguard: timers: cast enum limits members to int
 in prints

Since gcc13, each member of an enum has the same type as the enum. And
that is inherited from its members. Provided "REKEY_AFTER_MESSAGES =
1ULL << 60", the named type is unsigned long.

This generates warnings with gcc-13:
  error: format '%d' expects argument of type 'int', but argument 6 has type 'long unsigned int'

Cast those particular enum members to int when printing them.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36113
Cc: Martin Liska <mliska@suse.cz>
Signed-off-by: Jiri Slaby (SUSE) <jirislaby@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
Link: https://lore.kernel.org/all/20221213225208.3343692-2-Jason@zx2c4.com/
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/wireguard/timers.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireguard/timers.c b/drivers/net/wireguard/timers.c
index b5706b6718b19..53d8a57a0dfa3 100644
--- a/drivers/net/wireguard/timers.c
+++ b/drivers/net/wireguard/timers.c
@@ -46,7 +46,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer)
 	if (peer->timer_handshake_attempts > MAX_TIMER_HANDSHAKES) {
 		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d attempts, giving up\n",
 			 peer->device->dev->name, peer->internal_id,
-			 &peer->endpoint.addr, MAX_TIMER_HANDSHAKES + 2);
+			 &peer->endpoint.addr, (int)MAX_TIMER_HANDSHAKES + 2);
 
 		del_timer(&peer->timer_send_keepalive);
 		/* We drop all packets without a keypair and don't try again,
@@ -64,7 +64,7 @@ static void wg_expired_retransmit_handshake(struct timer_list *timer)
 		++peer->timer_handshake_attempts;
 		pr_debug("%s: Handshake for peer %llu (%pISpfsc) did not complete after %d seconds, retrying (try %d)\n",
 			 peer->device->dev->name, peer->internal_id,
-			 &peer->endpoint.addr, REKEY_TIMEOUT,
+			 &peer->endpoint.addr, (int)REKEY_TIMEOUT,
 			 peer->timer_handshake_attempts + 1);
 
 		/* We clear the endpoint address src address, in case this is
@@ -94,7 +94,7 @@ static void wg_expired_new_handshake(struct timer_list *timer)
 
 	pr_debug("%s: Retrying handshake with peer %llu (%pISpfsc) because we stopped hearing back after %d seconds\n",
 		 peer->device->dev->name, peer->internal_id,
-		 &peer->endpoint.addr, KEEPALIVE_TIMEOUT + REKEY_TIMEOUT);
+		 &peer->endpoint.addr, (int)(KEEPALIVE_TIMEOUT + REKEY_TIMEOUT));
 	/* We clear the endpoint address src address, in case this is the cause
 	 * of trouble.
 	 */
@@ -126,7 +126,7 @@ static void wg_queued_expired_zero_key_material(struct work_struct *work)
 
 	pr_debug("%s: Zeroing out all keys for peer %llu (%pISpfsc), since we haven't received a new one in %d seconds\n",
 		 peer->device->dev->name, peer->internal_id,
-		 &peer->endpoint.addr, REJECT_AFTER_TIME * 3);
+		 &peer->endpoint.addr, (int)REJECT_AFTER_TIME * 3);
 	wg_noise_handshake_clear(&peer->handshake);
 	wg_noise_keypairs_clear(&peer->keypairs);
 	wg_peer_put(peer);
-- 
GitLab


From d74f4a3f6d88a2416564bc6bf937e423a4ae8f8e Mon Sep 17 00:00:00 2001
From: Yang Li <yang.lee@linux.alibaba.com>
Date: Wed, 14 Dec 2022 10:39:11 +0800
Subject: [PATCH 3842/4122] cifs: Remove duplicated include in cifsglob.h

./fs/cifs/cifsglob.h: linux/scatterlist.h is included more than once.

Link: https://bugzilla.openanolis.cn/show_bug.cgi?id=3459
Fixes: f7f291e14dde ("cifs: fix oops during encryption")
Reported-by: Abaci Robot <abaci@linux.alibaba.com>
Signed-off-by: Yang Li <yang.lee@linux.alibaba.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
---
 fs/cifs/cifsglob.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 703685e2db5e7..82f2d3070c266 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -23,7 +23,6 @@
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
 #include <crypto/internal/hash.h>
-#include <linux/scatterlist.h>
 #include <uapi/linux/cifs/cifs_mount.h>
 #include "../smbfs_common/smb2pdu.h"
 #include "smb2pdu.h"
-- 
GitLab


From f65a486821cfd363833079b2a7b0769250ee21c9 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 22:04:07 +0900
Subject: [PATCH 3843/4122] kbuild: change module.order to list *.o instead of
 *.ko

scripts/Makefile.build replaces the suffix .o with .ko, then
scripts/Makefile.modpost calls the sed command to change .ko back
to the original .o suffix.

Instead of converting the suffixes back-and-forth, store the .o paths
in modules.order, and replace it with .ko in 'make modules_install'.

This avoids the unneeded sed command.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
---
 Makefile                                    |  2 +-
 scripts/Makefile.build                      |  2 +-
 scripts/Makefile.modfinal                   |  6 +++---
 scripts/Makefile.modinst                    |  2 +-
 scripts/Makefile.modpost                    |  7 +++++--
 scripts/clang-tools/gen_compile_commands.py |  8 ++++----
 scripts/gen_autoksyms.sh                    |  2 +-
 scripts/mod/modpost.c                       | 11 ++++-------
 scripts/modules-check.sh                    |  2 +-
 9 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/Makefile b/Makefile
index 591485152a95c..f506879e74526 100644
--- a/Makefile
+++ b/Makefile
@@ -1564,7 +1564,7 @@ __modinst_pre:
 		rm -f $(MODLIB)/build ; \
 		ln -s $(CURDIR) $(MODLIB)/build ; \
 	fi
-	@sed 's:^:kernel/:' modules.order > $(MODLIB)/modules.order
+	@sed 's:^\(.*\)\.o$$:kernel/\1.ko:' modules.order > $(MODLIB)/modules.order
 	@cp -f modules.builtin $(MODLIB)/
 	@cp -f $(objtree)/modules.builtin.modinfo $(MODLIB)/
 
diff --git a/scripts/Makefile.build b/scripts/Makefile.build
index 799df12b53f39..267eb7aac5b22 100644
--- a/scripts/Makefile.build
+++ b/scripts/Makefile.build
@@ -435,7 +435,7 @@ $(obj)/built-in.a: $(real-obj-y) FORCE
 # modules.order unless contained modules are updated.
 
 cmd_modules_order = { $(foreach m, $(real-prereqs), \
-	$(if $(filter %/modules.order, $m), cat $m, echo $(patsubst %.o,%.ko,$m));) :; } \
+	$(if $(filter %/modules.order, $m), cat $m, echo $m);) :; } \
 	> $@
 
 $(obj)/modules.order: $(obj-m) FORCE
diff --git a/scripts/Makefile.modfinal b/scripts/Makefile.modfinal
index 83f2797e530c5..a30d5b08eee94 100644
--- a/scripts/Makefile.modfinal
+++ b/scripts/Makefile.modfinal
@@ -15,7 +15,7 @@ include $(srctree)/scripts/Makefile.lib
 # find all modules listed in modules.order
 modules := $(call read-file, $(MODORDER))
 
-__modfinal: $(modules)
+__modfinal: $(modules:%.o=%.ko)
 	@:
 
 # modname and part-of-module are set to make c_flags define proper module flags
@@ -57,13 +57,13 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check),      \
 	printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
 
 # Re-generate module BTFs if either module's .ko or vmlinux changed
-$(modules): %.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE
+%.ko: %.o %.mod.o scripts/module.lds $(and $(CONFIG_DEBUG_INFO_BTF_MODULES),$(KBUILD_BUILTIN),vmlinux) FORCE
 	+$(call if_changed_except,ld_ko_o,vmlinux)
 ifdef CONFIG_DEBUG_INFO_BTF_MODULES
 	+$(if $(newer-prereqs),$(call cmd,btf_ko))
 endif
 
-targets += $(modules) $(modules:.ko=.mod.o)
+targets += $(modules:%.o=%.ko) $(modules:%.o=%.mod.o)
 
 # Add FORCE to the prequisites of a target to force it to be always rebuilt.
 # ---------------------------------------------------------------------------
diff --git a/scripts/Makefile.modinst b/scripts/Makefile.modinst
index 65aac6be78ec3..836391e5d2097 100644
--- a/scripts/Makefile.modinst
+++ b/scripts/Makefile.modinst
@@ -26,7 +26,7 @@ suffix-$(CONFIG_MODULE_COMPRESS_GZIP)	:= .gz
 suffix-$(CONFIG_MODULE_COMPRESS_XZ)	:= .xz
 suffix-$(CONFIG_MODULE_COMPRESS_ZSTD)	:= .zst
 
-modules := $(patsubst $(extmod_prefix)%, $(dst)/%$(suffix-y), $(modules))
+modules := $(patsubst $(extmod_prefix)%.o, $(dst)/%.ko$(suffix-y), $(modules))
 
 __modinst: $(modules)
 	@:
diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index 55a72f5eb76dd..f814a6acd200c 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -107,7 +107,10 @@ ifneq ($(KBUILD_MODPOST_WARN)$(missing-input),)
 modpost-args += -w
 endif
 
-modorder-if-needed := $(if $(KBUILD_MODULES), $(MODORDER))
+ifdef KBUILD_MODULES
+modorder-if-needed := $(MODORDER)
+modpost-args += -T $(MODORDER)
+endif
 
 MODPOST = scripts/mod/modpost
 
@@ -119,7 +122,7 @@ quiet_cmd_modpost = MODPOST $@
 		echo >&2 "WARNING: $(missing-input) is missing."; \
 		echo >&2 "         Modules may not have dependencies or modversions."; \
 		echo >&2 "         You may get many unresolved symbol warnings.";) \
-	sed 's/ko$$/o/' $(or $(modorder-if-needed), /dev/null) | $(MODPOST) $(modpost-args) -T - $(vmlinux.o-if-present)
+	$(MODPOST) $(modpost-args) $(vmlinux.o-if-present)
 
 targets += $(output-symdump)
 $(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE
diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py
index d800b2c0af977..0227522959a45 100755
--- a/scripts/clang-tools/gen_compile_commands.py
+++ b/scripts/clang-tools/gen_compile_commands.py
@@ -138,10 +138,10 @@ def cmdfiles_for_modorder(modorder):
     """
     with open(modorder) as f:
         for line in f:
-            ko = line.rstrip()
-            base, ext = os.path.splitext(ko)
-            if ext != '.ko':
-                sys.exit('{}: module path must end with .ko'.format(ko))
+            obj = line.rstrip()
+            base, ext = os.path.splitext(obj)
+            if ext != '.o':
+                sys.exit('{}: module path must end with .o'.format(obj))
             mod = base + '.mod'
             # Read from *.mod, to get a list of objects that compose the module.
             with open(mod) as m:
diff --git a/scripts/gen_autoksyms.sh b/scripts/gen_autoksyms.sh
index 653fadbad302f..12bcfae940ee9 100755
--- a/scripts/gen_autoksyms.sh
+++ b/scripts/gen_autoksyms.sh
@@ -48,7 +48,7 @@ cat > "$output_file" << EOT
 EOT
 
 {
-	[ -n "${read_modorder}" ] && sed 's/ko$/usyms/' modules.order | xargs cat
+	[ -n "${read_modorder}" ] && sed 's/o$/usyms/' modules.order | xargs cat
 	echo "$needed_symbols"
 	[ -n "$ksym_wl" ] && cat "$ksym_wl"
 } | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' |
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 56d856f2e5115..b48838a71bf6a 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -1856,11 +1856,9 @@ static void read_symbols_from_files(const char *filename)
 	FILE *in = stdin;
 	char fname[PATH_MAX];
 
-	if (strcmp(filename, "-") != 0) {
-		in = fopen(filename, "r");
-		if (!in)
-			fatal("Can't open filenames file %s: %m", filename);
-	}
+	in = fopen(filename, "r");
+	if (!in)
+		fatal("Can't open filenames file %s: %m", filename);
 
 	while (fgets(fname, PATH_MAX, in) != NULL) {
 		if (strends(fname, "\n"))
@@ -1868,8 +1866,7 @@ static void read_symbols_from_files(const char *filename)
 		read_symbols(fname);
 	}
 
-	if (in != stdin)
-		fclose(in);
+	fclose(in);
 }
 
 #define SZ 500
diff --git a/scripts/modules-check.sh b/scripts/modules-check.sh
index e06327722263e..4c8da90de78ea 100755
--- a/scripts/modules-check.sh
+++ b/scripts/modules-check.sh
@@ -16,7 +16,7 @@ check_same_name_modules()
 	for m in $(sed 's:.*/::' "$1" | sort | uniq -d)
 	do
 		echo "error: the following would cause module name conflict:" >&2
-		sed -n "/\/$m/s:^:  :p" "$1" >&2
+		sed -n "/\/$m/s:^\(.*\)\.o\$:  \1.ko:p" "$1" >&2
 		exit_code=1
 	done
 }
-- 
GitLab


From 3d57e1b7b1d42d4040f0d993b66ff06beda02c54 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Sun, 11 Dec 2022 22:04:08 +0900
Subject: [PATCH 3844/4122] kbuild: refactor the prerequisites of the modpost
 rule

The prerequisites of modpost are cluttered. The variables *-if-present
and *-if-needed are unreadable.

It is cleaner to append them into modpost-deps.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/Makefile.modpost | 36 ++++++++++++++++++++++--------------
 1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/scripts/Makefile.modpost b/scripts/Makefile.modpost
index f814a6acd200c..5eb5e8280379a 100644
--- a/scripts/Makefile.modpost
+++ b/scripts/Makefile.modpost
@@ -38,6 +38,8 @@ __modpost:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
+MODPOST = scripts/mod/modpost
+
 modpost-args =										\
 	$(if $(CONFIG_MODVERSIONS),-m)							\
 	$(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)					\
@@ -46,6 +48,8 @@ modpost-args =										\
 	$(if $(CONFIG_MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS)$(KBUILD_NSDEPS),-N)	\
 	-o $@
 
+modpost-deps := $(MODPOST)
+
 # 'make -i -k' ignores compile errors, and builds as many modules as possible.
 ifneq ($(findstring i,$(filter-out --%,$(MAKEFLAGS))),)
 modpost-args += -n
@@ -78,12 +82,13 @@ targets += .vmlinux.objs
 .vmlinux.objs: vmlinux.a $(KBUILD_VMLINUX_LIBS) FORCE
 	$(call if_changed,vmlinux_objs)
 
-vmlinux.o-if-present := $(wildcard vmlinux.o)
-output-symdump := vmlinux.symvers
-
-ifdef KBUILD_MODULES
-output-symdump := $(if $(vmlinux.o-if-present), Module.symvers, modules-only.symvers)
-missing-input := $(filter-out $(vmlinux.o-if-present),vmlinux.o)
+ifeq ($(wildcard vmlinux.o),)
+missing-input := vmlinux.o
+output-symdump := modules-only.symvers
+else
+modpost-args += vmlinux.o
+modpost-deps += vmlinux.o
+output-symdump := $(if $(KBUILD_MODULES), Module.symvers, vmlinux.symvers)
 endif
 
 else
@@ -95,11 +100,16 @@ src := $(obj)
 # Include the module's Makefile to find KBUILD_EXTRA_SYMBOLS
 include $(kbuild-file)
 
-module.symvers-if-present := $(wildcard Module.symvers)
 output-symdump := $(KBUILD_EXTMOD)/Module.symvers
-missing-input := $(filter-out $(module.symvers-if-present), Module.symvers)
 
-modpost-args += -e $(addprefix -i ,$(module.symvers-if-present) $(KBUILD_EXTRA_SYMBOLS))
+ifeq ($(wildcard Module.symvers),)
+missing-input := Module.symvers
+else
+modpost-args += -i Module.symvers
+modpost-deps += Module.symvers
+endif
+
+modpost-args += -e $(addprefix -i , $(KBUILD_EXTRA_SYMBOLS))
 
 endif # ($(KBUILD_EXTMOD),)
 
@@ -108,12 +118,10 @@ modpost-args += -w
 endif
 
 ifdef KBUILD_MODULES
-modorder-if-needed := $(MODORDER)
 modpost-args += -T $(MODORDER)
+modpost-deps += $(MODORDER)
 endif
 
-MODPOST = scripts/mod/modpost
-
 # Read out modules.order to pass in modpost.
 # Otherwise, allmodconfig would fail with "Argument list too long".
 quiet_cmd_modpost = MODPOST $@
@@ -122,10 +130,10 @@ quiet_cmd_modpost = MODPOST $@
 		echo >&2 "WARNING: $(missing-input) is missing."; \
 		echo >&2 "         Modules may not have dependencies or modversions."; \
 		echo >&2 "         You may get many unresolved symbol warnings.";) \
-	$(MODPOST) $(modpost-args) $(vmlinux.o-if-present)
+	$(MODPOST) $(modpost-args)
 
 targets += $(output-symdump)
-$(output-symdump): $(modorder-if-needed) $(vmlinux.o-if-present) $(module.symvers-if-present) $(MODPOST) FORCE
+$(output-symdump): $(modpost-deps) FORCE
 	$(call if_changed,modpost)
 
 __modpost: $(output-symdump)
-- 
GitLab


From 87d599fc3955e59b1ed30f350321a4be5353f945 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 13 Dec 2022 20:24:20 +0900
Subject: [PATCH 3845/4122] kbuild: ensure Make >= 3.82 is used

Documentation/process/changes.rst notes the minimal GNU Make version,
but it is not checked anywhere.

We could check $(MAKE_VERSION), but another simple way is to check
$(.FEATURES) since the feature list always grows.

GNU Make 3.81 expands $(.FEATURES) to:
  target-specific order-only second-expansion else-if archives jobserver check-symlink

GNU Make 3.82 expands $(.FEATURES) to:
  target-specific order-only second-expansion else-if shortest-stem undefine archives jobserver check-symlink

To ensure Make >= 3.82, you can check either 'shortest-stem' or
'undefine'.

This way is not always possible. For example, Make 4.0 through 4.2 have
the same set of $(.FEATURES). At that point, we will need to come up
with a different approach.

Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Reviewed-by: Nicolas Schier <nicolas@fjasle.eu>
---
 Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Makefile b/Makefile
index f506879e74526..5f015f206e12c 100644
--- a/Makefile
+++ b/Makefile
@@ -11,6 +11,10 @@ NAME = Hurr durr I'ma ninja sloth
 # Comments in this file are targeted only to the developer, do not
 # expect to learn how to build the kernel reading this file.
 
+ifeq ($(filter undefine,$(.FEATURES)),)
+$(error GNU Make >= 3.82 is required. Your Make version is $(MAKE_VERSION))
+endif
+
 $(if $(filter __%, $(MAKECMDGOALS)), \
 	$(error targets prefixed with '__' are only for internal use))
 
-- 
GitLab


From 0d24f1b7cc65ee73ea8d04e0d10f77a7cb7a83f3 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 13 Dec 2022 11:35:28 -0700
Subject: [PATCH 3846/4122] padata: Mark padata_work_init() as __ref

When building arm64 allmodconfig + ThinLTO with clang and a proposed
modpost update to account for -ffuncton-sections, the following warning
appears:

  WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text)
  WARNING: modpost: vmlinux.o: section mismatch in reference: padata_work_init (section: .text.padata_work_init) -> padata_mt_helper (section: .init.text)

LLVM has optimized padata_work_init() to include the address of
padata_mt_helper() directly because it inlined the other call to
padata_work_init() with padata_parallel_worker(), meaning the remaining
uses of padata_work_init() use padata_mt_helper() as the work_fn
argument. This optimization causes modpost to complain since
padata_work_init() is not __init, whereas padata_mt_helper() is.

Since padata_work_init() is only called from __init code when
padata_mt_helper() is passed as the work_fn argument, mark
padata_work_init() as __ref, which makes it clear to modpost that this
scenario is okay.

Suggested-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Acked-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 kernel/padata.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/kernel/padata.c b/kernel/padata.c
index e5819bb8bd1dc..d175cc0004538 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -83,8 +83,16 @@ static struct padata_work *padata_work_alloc(void)
 	return pw;
 }
 
-static void padata_work_init(struct padata_work *pw, work_func_t work_fn,
-			     void *data, int flags)
+/*
+ * This function is marked __ref because this function may be optimized in such
+ * a way that it directly refers to work_fn's address, which causes modpost to
+ * complain when work_fn is marked __init. This scenario was observed with clang
+ * LTO, where padata_work_init() was optimized to refer directly to
+ * padata_mt_helper() because the calls to padata_work_init() with other work_fn
+ * values were eliminated or inlined.
+ */
+static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn,
+				   void *data, int flags)
 {
 	if (flags & PADATA_WORK_ONSTACK)
 		INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
-- 
GitLab


From 19331e84c3873256537d446afec1f6c507f8c4ef Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Tue, 13 Dec 2022 11:35:29 -0700
Subject: [PATCH 3847/4122] modpost: Include '.text.*' in TEXT_SECTIONS

Commit 6c730bfc894f ("modpost: handle -ffunction-sections") added
".text.*" to the OTHER_TEXT_SECTIONS macro to fix certain section
mismatch warnings. Unfortunately, this makes it impossible for modpost
to warn about section mismatches with LTO, which implies
'-ffunction-sections', as all functions are put in their own
'.text.<func_name>' sections, which may still reference functions in
sections they are not supposed to, such as __init.

Fix this by moving ".text.*" into TEXT_SECTIONS, so that configurations
with '-ffunction-sections' will see warnings about mismatched sections.

Link: https://lore.kernel.org/Y39kI3MOtVI5BAnV@google.com/
Reported-by: Vincent Donnefort <vdonnefort@google.com>
Reviewed-and-tested-by: Alexander Lobakin <alexandr.lobakin@intel.com>
Reviewed-by: Sami Tolvanen <samitolvanen@google.com>
Tested-by: Vincent Donnefort <vdonnefort@google.com>
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/mod/modpost.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index b48838a71bf6a..640e1a244ba90 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -822,10 +822,10 @@ static void check_section(const char *modname, struct elf_info *elf,
 #define ALL_EXIT_SECTIONS EXIT_SECTIONS, ALL_XXXEXIT_SECTIONS
 
 #define DATA_SECTIONS ".data", ".data.rel"
-#define TEXT_SECTIONS ".text", ".text.unlikely", ".sched.text", \
+#define TEXT_SECTIONS ".text", ".text.*", ".sched.text", \
 		".kprobes.text", ".cpuidle.text", ".noinstr.text"
 #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
-		".fixup", ".entry.text", ".exception.text", ".text.*", \
+		".fixup", ".entry.text", ".exception.text", \
 		".coldtext", ".softirqentry.text"
 
 #define INIT_SECTIONS      ".init.*"
-- 
GitLab


From 11e47bbd700f31bd1ee9f8863381bc9e741c0e97 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
Date: Mon, 12 Dec 2022 14:07:48 +0100
Subject: [PATCH 3848/4122] gpio: sim: set a limit on the number of GPIOs

With the removal of ARCH_NR_GPIOS in commit 7b61212f2a07 ("gpiolib: Get
rid of ARCH_NR_GPIOS") the gpiolib core no longer sanitizes the number
of GPIOs for us. This causes the gpio-sim selftests to now fail when
setting the number of GPIOs to 99999 and expecting the probe() to fail.

Set a sane limit of 1024 on the number of simulated GPIOs and bail out
of probe if it's exceeded.

Reported-by: kernel test robot <oliver.sang@intel.com>
Link: https://lore.kernel.org/oe-lkp/202212112236.756f5db9-oliver.sang@intel.com
Fixes: 7b61212f2a07 ("gpiolib: Get rid of ARCH_NR_GPIOS")
Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
---
 drivers/gpio/gpio-sim.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/gpio/gpio-sim.c b/drivers/gpio/gpio-sim.c
index 1020c2feb2496..60514bc5454f8 100644
--- a/drivers/gpio/gpio-sim.c
+++ b/drivers/gpio/gpio-sim.c
@@ -31,6 +31,7 @@
 
 #include "gpiolib.h"
 
+#define GPIO_SIM_NGPIO_MAX	1024
 #define GPIO_SIM_PROP_MAX	4 /* Max 3 properties + sentinel. */
 #define GPIO_SIM_NUM_ATTRS	3 /* value, pull and sentinel */
 
@@ -371,6 +372,9 @@ static int gpio_sim_add_bank(struct fwnode_handle *swnode, struct device *dev)
 	if (ret)
 		return ret;
 
+	if (num_lines > GPIO_SIM_NGPIO_MAX)
+		return -ERANGE;
+
 	ret = fwnode_property_read_string(swnode, "gpio-sim,label", &label);
 	if (ret) {
 		label = devm_kasprintf(dev, GFP_KERNEL, "%s-%s",
-- 
GitLab


From 904f309ae7edaadc9fd0ee04be8281d7781d97e4 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 13 Dec 2022 18:06:51 -0800
Subject: [PATCH 3849/4122] thermal: intel: Don't set HFI status bit to 1

When CPU doesn't support HFI (Hardware Feedback Interface), don't include
BIT 26 in the mask to prevent clearing. otherwise this results in:
    unchecked MSR access error: WRMSR to 0x1b1
      (tried to write 0x0000000004000aa8)
      at rIP: 0xffffffff8b8559fe (throttle_active_work+0xbe/0x1b0)

Fixes: 6fe1e64b6026 ("thermal: intel: Prevent accidental clearing of HFI status")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Tested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/thermal/intel/therm_throt.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/intel/therm_throt.c b/drivers/thermal/intel/therm_throt.c
index 4bb7fddaa1437..2e22bb82b7389 100644
--- a/drivers/thermal/intel/therm_throt.c
+++ b/drivers/thermal/intel/therm_throt.c
@@ -194,7 +194,7 @@ static const struct attribute_group thermal_attr_group = {
 #define THERM_STATUS_PROCHOT_LOG	BIT(1)
 
 #define THERM_STATUS_CLEAR_CORE_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(13) | BIT(15))
-#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11) | BIT(26))
+#define THERM_STATUS_CLEAR_PKG_MASK  (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11))
 
 /*
  * Clear the bits in package thermal status register for bit = 1
@@ -211,6 +211,9 @@ void thermal_clear_package_intr_status(int level, u64 bit_mask)
 	} else {
 		msr  = MSR_IA32_PACKAGE_THERM_STATUS;
 		msr_val = THERM_STATUS_CLEAR_PKG_MASK;
+		if (boot_cpu_has(X86_FEATURE_HFI))
+			msr_val |= BIT(26);
+
 	}
 
 	msr_val &= ~bit_mask;
-- 
GitLab


From fb9f5ee9bf5cf7ebc8731a7033e57d98832dc650 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 13 Dec 2022 17:24:29 -0500
Subject: [PATCH 3850/4122] tracing: Do not synchronize freeing of trigger
 filter on boot up

If a trigger filter on the kernel command line fails to apply (due to
syntax error), it will be freed. The freeing will call
tracepoint_synchronize_unregister(), but this is not needed during early
boot up, and will even trigger a lockdep splat.

Avoid calling the synchronization function when system_state is
SYSTEM_BOOTING.

Link: https://lore.kernel.org/linux-trace-kernel/20221213172429.7774f4ba@gandalf.local.home

Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace_events_trigger.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c
index 19ce9d22bfd7c..e535959939d3f 100644
--- a/kernel/trace/trace_events_trigger.c
+++ b/kernel/trace/trace_events_trigger.c
@@ -1085,8 +1085,14 @@ int set_trigger_filter(char *filter_str,
 	rcu_assign_pointer(data->filter, filter);
 
 	if (tmp) {
-		/* Make sure the call is done with the filter */
-		tracepoint_synchronize_unregister();
+		/*
+		 * Make sure the call is done with the filter.
+		 * It is possible that a filter could fail at boot up,
+		 * and then this path will be called. Avoid the synchronization
+		 * in that case.
+		 */
+		if (system_state != SYSTEM_BOOTING)
+			tracepoint_synchronize_unregister();
 		free_event_filter(tmp);
 	}
 
-- 
GitLab


From 3c97d25ceb75fd3e660ba9fcd4c630d0b057a5a2 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 5 Dec 2022 09:34:04 -0300
Subject: [PATCH 3851/4122] perf stat: Check existence of os->prefix, fixing a
 segfault

We need to check if we have a OS prefix, otherwise we stumble on a
metric segv that I'm now seeing in Arnaldo's tree:

  $ gdb --args perf stat -M Backend true
  ...
  Performance counter stats for 'true':

          4,712,355      TOPDOWN.SLOTS                    #     17.3 % tma_core_bound

  Program received signal SIGSEGV, Segmentation fault.
  __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77
  77      ../sysdeps/x86_64/multiarch/strlen-evex.S: No such file or directory.
  (gdb) bt
  #0  __strlen_evex () at ../sysdeps/x86_64/multiarch/strlen-evex.S:77
  #1  0x00007ffff74749a5 in __GI__IO_fputs (str=0x0, fp=0x7ffff75f5680 <_IO_2_1_stderr_>)
  #2  0x0000555555779f28 in do_new_line_std (config=0x555555e077c0 <stat_config>, os=0x7fffffffbf10) at util/stat-display.c:356
  #3  0x000055555577a081 in print_metric_std (config=0x555555e077c0 <stat_config>, ctx=0x7fffffffbf10, color=0x0, fmt=0x5555558b77b5 "%8.1f", unit=0x7fffffffbb10 "%  tma_memory_bound", val=13.165355724442199) at util/stat-display.c:380
  #4  0x00005555557768b6 in generic_metric (config=0x555555e077c0 <stat_config>, metric_expr=0x55555593d5b7 "((CYCLE_ACTIVITY.STALLS_MEM_ANY + EXE_ACTIVITY.BOUND_ON_STORES) / (CYCLE_ACTIVITY.STALLS_TOTAL + (EXE_ACTIVITY.1_PORTS_UTIL + tma_retiring * EXE_ACTIVITY.2_PORTS_UTIL) + EXE_ACTIVITY.BOUND_ON_STORES))"..., metric_events=0x555555f334e0, metric_refs=0x555555ec81d0, name=0x555555f32e80 "TOPDOWN.SLOTS", metric_name=0x555555f26c80 "tma_memory_bound", metric_unit=0x55555593d5b1 "100%", runtime=0, map_idx=0, out=0x7fffffffbd90, st=0x555555e9e620 <rt_stat>) at util/stat-shadow.c:934
  #5  0x0000555555778cac in perf_stat__print_shadow_stats (config=0x555555e077c0 <stat_config>, evsel=0x555555f289d0, avg=4712355, map_idx=0, out=0x7fffffffbd90, metric_events=0x555555e078e8 <stat_config+296>, st=0x555555e9e620 <rt_stat>) at util/stat-shadow.c:1329
  #6  0x000055555577b6a0 in printout (config=0x555555e077c0 <stat_config>, os=0x7fffffffbf10, uval=4712355, run=325322, ena=325322, noise=4712355, map_idx=0) at util/stat-display.c:741
  #7  0x000055555577bc74 in print_counter_aggrdata (config=0x555555e077c0 <stat_config>, counter=0x555555f289d0, s=0, os=0x7fffffffbf10) at util/stat-display.c:838
  #8  0x000055555577c1d8 in print_counter (config=0x555555e077c0 <stat_config>, counter=0x555555f289d0, os=0x7fffffffbf10) at util/stat-display.c:957
  #9  0x000055555577dba0 in evlist__print_counters (evlist=0x555555ec3610, config=0x555555e077c0 <stat_config>, _target=0x555555e01c80 <target>, ts=0x0, argc=1, argv=0x7fffffffe450) at util/stat-display.c:1413
  #10 0x00005555555fc821 in print_counters (ts=0x0, argc=1, argv=0x7fffffffe450) at builtin-stat.c:1040
  #11 0x000055555560091a in cmd_stat (argc=1, argv=0x7fffffffe450) at builtin-stat.c:2665
  #12 0x00005555556b1eea in run_builtin (p=0x555555e11f70 <commands+336>, argc=4, argv=0x7fffffffe450) at perf.c:322
  #13 0x00005555556b2181 in handle_internal_command (argc=4, argv=0x7fffffffe450) at perf.c:376
  #14 0x00005555556b22d7 in run_argv (argcp=0x7fffffffe27c, argv=0x7fffffffe270) at perf.c:420
  #15 0x00005555556b26ef in main (argc=4, argv=0x7fffffffe450) at perf.c:550
  (gdb)

Fixes: f123b2d84ecec9a3 ("perf stat: Remove prefix argument in print_metric_headers()")
Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: http://lore.kernel.org/lkml/CAP-5=fUOjSM5HajU9TCD6prY39LbX4OQbkEbtKPPGRBPBN=_VQ@mail.gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index f1ee4b0521986..9b7772e6abf65 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -353,7 +353,8 @@ static void do_new_line_std(struct perf_stat_config *config,
 			    struct outstate *os)
 {
 	fputc('\n', os->fh);
-	fputs(os->prefix, os->fh);
+	if (os->prefix)
+		fputs(os->prefix, os->fh);
 	aggr_printout(config, os->evsel, os->id, os->nr);
 	if (config->aggr_mode == AGGR_NONE)
 		fprintf(os->fh, "        ");
-- 
GitLab


From 955f6def5590ce6ca11a1c1ced0d2d1c95421059 Mon Sep 17 00:00:00 2001
From: Anshuman Khandual <anshuman.khandual@arm.com>
Date: Mon, 5 Dec 2022 12:14:43 +0530
Subject: [PATCH 3852/4122] perf record: Add remaining branch filters:
 "no_cycles", "no_flags" & "hw_index"

This adds all remaining branch filters i.e "no_cycles", "no_flags" and
"hw_index". While here, also updates the documentation.

Signed-off-by: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: James Clark <james.clark@arm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/20221205064443.533587-1-anshuman.khandual@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 5 +++++
 tools/perf/util/parse-branch-options.c   | 3 +++
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 9ea6d44aca58c..5f9fa07b3ddec 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -388,6 +388,7 @@ following filters are defined:
         - any_call: any function call or system call
         - any_ret: any function return or system call return
         - ind_call: any indirect branch
+        - ind_jmp: any indirect jump
         - call: direct calls, including far (to/from kernel) calls
         - u:  only when the branch target is at the user level
         - k: only when the branch target is in the kernel
@@ -396,6 +397,10 @@ following filters are defined:
 	- no_tx: only when the target is not in a hardware transaction
 	- abort_tx: only when the target is a hardware transaction abort
 	- cond: conditional branches
+	- call_stack: save call stack
+	- no_flags: don't save branch flags e.g prediction, misprediction etc
+	- no_cycles: don't save branch cycles
+	- hw_index: save branch hardware index
 	- save_type: save branch type during sampling in case binary is not available later
 		     For the platforms with Intel Arch LBR support (12th-Gen+ client or
 		     4th-Gen Xeon+ server), the save branch type is unconditionally enabled
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index 31faf2bb49ff1..fd67d204d720d 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -30,8 +30,11 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
 	BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
 	BRANCH_OPT("call", PERF_SAMPLE_BRANCH_CALL),
+	BRANCH_OPT("no_flags", PERF_SAMPLE_BRANCH_NO_FLAGS),
+	BRANCH_OPT("no_cycles", PERF_SAMPLE_BRANCH_NO_CYCLES),
 	BRANCH_OPT("save_type", PERF_SAMPLE_BRANCH_TYPE_SAVE),
 	BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
+	BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
 	BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
 	BRANCH_END
 };
-- 
GitLab


From 8f4b1e3cebce5d12048409393de751e4d663ce42 Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Mon, 5 Dec 2022 09:58:52 +0530
Subject: [PATCH 3853/4122] perf stat: Fix printing field separator in CSV
 metrics output

In 'perf stat' with CSV output option, number of fields in metrics
output is not matching with number of fields in other event output
lines.

Sample output below after applying patch to fix printing os->prefix.

	# ./perf stat -x, --per-socket -a -C 1 ls
	S0,1,82.11,msec,cpu-clock,82111626,100.00,1.000,CPUs utilized
	S0,1,2,,context-switches,82109314,100.00,24.358,/sec
	------
====>	S0,1,,,,,,,1.71,stalled cycles per insn

The above command line uses field separator as "," via "-x," option and
per-socket option displays socket value as first field. But here the
last line for "stalled cycles per insn" has more separators.  Each csv
output line is expected to have 8 field separators (for the 9 fields),
where as last line has 9 "," in the result. Patch fixes this issue.

The counter stats are displayed by function
"perf_stat__print_shadow_stats" in code "util/stat-shadow.c". While
printing the stats info for "stalled cycles per insn", function
"new_line_csv" is used as new_line callback.

The fields printed in each line contains: "Socket_id,aggr
nr,Avg,unit,event_name,run,enable_percent,ratio,unit"

The metric output prints Socket_id, aggr nr, ratio and unit. It has to
skip through remaining five fields ie,
Avg,unit,event_name,run,enable_percent. The csv line callback uses
"os->nfields" to know the number of fields to skip to match with other
lines.

Currently it is set as:

	os.nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);

But in case of aggregation modes, csv_sep already gets printed along
with each field (Function "aggr_printout" in util/stat-display.c). So
aggr_fields can be removed from nfields. And fixed number of fields to
skip has to be "4". This is to skip fields for: "avg, unit, event name,
run, enable_percent"

This needs 4 csv separators. Patch removes aggr_fields
and uses 4 as fixed number of os->nfields to skip.

After the patch:

	# ./perf stat -x, --per-socket -a -C 1 ls
	S0,1,79.08,msec,cpu-clock,79085956,100.00,1.000,CPUs utilized
	S0,1,7,,context-switches,79084176,100.00,88.514,/sec
	------
====>	S0,1,,,,,,0.81,stalled cycles per insn

Fixes: 92a61f6412d3a09d ("perf stat: Implement CSV metrics output")
Reported-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Reviewed-by: Kajol Jain <kjain@linux.ibm.com>
Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Disha Goel <disgoel@linux.vnet.ibm.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nageswara R Sastry <rnsastry@linux.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: linuxppc-dev@lists.ozlabs.org
Link: https://lore.kernel.org/r/20221205042852.83382-1-atrajeev@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 9b7772e6abf65..660e4f6616f55 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -687,20 +687,9 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 	struct evsel *counter = os->evsel;
 
 	if (config->csv_output) {
-		static const int aggr_fields[AGGR_MAX] = {
-			[AGGR_NONE] = 1,
-			[AGGR_GLOBAL] = 0,
-			[AGGR_SOCKET] = 2,
-			[AGGR_DIE] = 2,
-			[AGGR_CORE] = 2,
-			[AGGR_THREAD] = 1,
-			[AGGR_UNSET] = 0,
-			[AGGR_NODE] = 1,
-		};
-
 		pm = config->metric_only ? print_metric_only_csv : print_metric_csv;
 		nl = config->metric_only ? new_line_metric : new_line_csv;
-		os->nfields = 3 + aggr_fields[config->aggr_mode] + (counter->cgrp ? 1 : 0);
+		os->nfields = 4 + (counter->cgrp ? 1 : 0);
 	} else if (config->json_output) {
 		pm = config->metric_only ? print_metric_only_json : print_metric_json;
 		nl = config->metric_only ? new_line_metric : new_line_json;
-- 
GitLab


From 1849f9f00926c54fa284be3b7f801de8b010572b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 1 Dec 2022 20:57:39 -0800
Subject: [PATCH 3854/4122] tools lib api: Add dependency test to
 install_headers

Compute the headers to be installed from their source headers and make
each have its own build target to install it. Using dependencies
avoids headers being reinstalled and getting a new timestamp which
then causes files that depend on the header to be rebuilt.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: https://lore.kernel.org/r/20221202045743.2639466-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/api/Makefile | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index 3649c7f7ea651..044860ac1ed1c 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -88,10 +88,10 @@ define do_install_mkdir
 endef
 
 define do_install
-	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
-	fi;                                             \
-	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+	if [ ! -d '$2' ]; then             \
+		$(INSTALL) -d -m 755 '$2'; \
+	fi;                                \
+	$(INSTALL) $1 $(if $3,-m $3,) '$2'
 endef
 
 install_lib: $(LIBFILE)
@@ -99,14 +99,28 @@ install_lib: $(LIBFILE)
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
-install_headers:
-	$(call QUIET_INSTALL, libapi_headers) \
-		$(call do_install,cpu.h,$(prefix)/include/api,644); \
-		$(call do_install,debug.h,$(prefix)/include/api,644); \
-		$(call do_install,io.h,$(prefix)/include/api,644); \
-		$(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \
-		$(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); \
-		$(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644);
+HDRS := cpu.h debug.h io.h
+FD_HDRS := fd/array.h
+FS_HDRS := fs/fs.h fs/tracing_path.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/api
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+INSTALL_FD_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FD_HDRS))
+INSTALL_FS_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(FS_HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+$(INSTALL_FD_HDRS): $(INSTALL_HDRS_PFX)/fd/%.h: fd/%.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/fd/,644)
+
+$(INSTALL_FS_HDRS): $(INSTALL_HDRS_PFX)/fs/%.h: fs/%.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/fs/,644)
+
+install_headers: $(INSTALL_HDRS) $(INSTALL_FD_HDRS) $(INSTALL_FS_HDRS)
+	$(call QUIET_INSTALL, libapi_headers)
 
 install: install_lib install_headers
 
-- 
GitLab


From 47e02b94a4c98dcc8072e56efaae5057174050fa Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 1 Dec 2022 20:57:40 -0800
Subject: [PATCH 3855/4122] tools lib perf: Add dependency test to
 install_headers

Compute the headers to be installed from their source headers and make
each have its own build target to install it. Using dependencies
avoids headers being reinstalled and getting a new timestamp which
then causes files that depend on the header to be rebuilt.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: https://lore.kernel.org/r/20221202045743.2639466-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/Makefile | 43 +++++++++++++++++++++--------------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index a90fb8c6bed4d..30b7f91e71472 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -176,10 +176,10 @@ define do_install_mkdir
 endef
 
 define do_install
-	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
-	fi;                                             \
-	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+	if [ ! -d '$2' ]; then             \
+		$(INSTALL) -d -m 755 '$2'; \
+	fi;                                \
+	$(INSTALL) $1 $(if $3,-m $3,) '$2'
 endef
 
 install_lib: libs
@@ -187,23 +187,24 @@ install_lib: libs
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
 
-install_headers:
-	$(call QUIET_INSTALL, libperf_headers) \
-		$(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \
-		$(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \
-		$(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644);
+HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
+INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h threadmap.h xyarray.h
+
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+INSTALL_INTERNAL_HDRS_PFX := $(DESTDIR)$(prefix)/include/internal
+INSTALL_INTERNAL_HDRS := $(addprefix $(INSTALL_INTERNAL_HDRS_PFX)/, $(INTERNAL_HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: include/perf/%.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+$(INSTALL_INTERNAL_HDRS): $(INSTALL_INTERNAL_HDRS_PFX)/%.h: include/internal/%.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_INTERNAL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS)
+	$(call QUIET_INSTALL, libperf_headers)
 
 install_pkgconfig: $(LIBPERF_PC)
 	$(call QUIET_INSTALL, $(LIBPERF_PC)) \
-- 
GitLab


From 5d890591db6bed8ca69bd4bfe0cdaca372973033 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 1 Dec 2022 20:57:41 -0800
Subject: [PATCH 3856/4122] tools lib subcmd: Add dependency test to
 install_headers

Compute the headers to be installed from their source headers and make
each have its own build target to install it. Using dependencies
avoids headers being reinstalled and getting a new timestamp which
then causes files that depend on the header to be rebuilt.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: https://lore.kernel.org/r/20221202045743.2639466-4-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/subcmd/Makefile | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 9a316d8b89df5..b87213263a5e0 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -89,10 +89,10 @@ define do_install_mkdir
 endef
 
 define do_install
-	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+	if [ ! -d '$2' ]; then             \
+		$(INSTALL) -d -m 755 '$2'; \
 	fi;                                             \
-	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+	$(INSTALL) $1 $(if $3,-m $3,) '$2'
 endef
 
 install_lib: $(LIBFILE)
@@ -100,13 +100,16 @@ install_lib: $(LIBFILE)
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
-install_headers:
-	$(call QUIET_INSTALL, libsubcmd_headers) \
-		$(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \
-		$(call do_install,help.h,$(prefix)/include/subcmd,644); \
-		$(call do_install,pager.h,$(prefix)/include/subcmd,644); \
-		$(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \
-		$(call do_install,run-command.h,$(prefix)/include/subcmd,644);
+HDRS := exec-cmd.h help.h pager.h parse-options.h run-command.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/subcmd
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS)
+	$(call QUIET_INSTALL, libsubcmd_headers)
 
 install: install_lib install_headers
 
-- 
GitLab


From 113bb3964297467baeb1fd2c4f86d0a4142e4259 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 1 Dec 2022 20:57:42 -0800
Subject: [PATCH 3857/4122] tools lib symbol: Add dependency test to
 install_headers

Compute the headers to be installed from their source headers and make
each have its own build target to install it. Using dependencies
avoids headers being reinstalled and getting a new timestamp which
then causes files that depend on the header to be rebuilt.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: https://lore.kernel.org/r/20221202045743.2639466-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/symbol/Makefile | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile
index ea8707b3442a7..13d43c6f92b4a 100644
--- a/tools/lib/symbol/Makefile
+++ b/tools/lib/symbol/Makefile
@@ -89,10 +89,10 @@ define do_install_mkdir
 endef
 
 define do_install
-	if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
-	fi;                                             \
-	$(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+	if [ ! -d '$2' ]; then             \
+		$(INSTALL) -d -m 755 '$2'; \
+	fi;                                \
+	$(INSTALL) $1 $(if $3,-m $3,) '$2'
 endef
 
 install_lib: $(LIBFILE)
@@ -100,9 +100,16 @@ install_lib: $(LIBFILE)
 		$(call do_install_mkdir,$(libdir_SQ)); \
 		cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ)
 
-install_headers:
-	$(call QUIET_INSTALL, libsymbol_headers) \
-		$(call do_install,kallsyms.h,$(prefix)/include/symbol,644);
+HDRS := kallsyms.h
+INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/symbol
+INSTALL_HDRS := $(addprefix $(INSTALL_HDRS_PFX)/, $(HDRS))
+
+$(INSTALL_HDRS): $(INSTALL_HDRS_PFX)/%.h: %.h
+	$(call QUIET_INSTALL, $@) \
+		$(call do_install,$<,$(INSTALL_HDRS_PFX)/,644)
+
+install_headers: $(INSTALL_HDRS)
+	$(call QUIET_INSTALL, libsymbol_headers)
 
 install: install_lib install_headers
 
-- 
GitLab


From 117195d9f8af74c65bf57d9b56f496b5b3655bcb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 2 Dec 2022 11:04:47 -0800
Subject: [PATCH 3858/4122] perf stat: Fix multi-line metric output in JSON

When a metric produces more than one values, it missed to print the opening
bracket.

Fixes: ab6baaae27357290 ("perf stat: Fix JSON output in metric-only mode")
Reported-by: Weilin Wang <weilin.wang@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Weilin Wang <weilin.wang@intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221202190447.1588680-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 660e4f6616f55..7f885b04c23fb 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -441,7 +441,7 @@ static void new_line_json(struct perf_stat_config *config, void *ctx)
 {
 	struct outstate *os = ctx;
 
-	fputc('\n', os->fh);
+	fputs("\n{", os->fh);
 	if (os->prefix)
 		fprintf(os->fh, "%s", os->prefix);
 	aggr_printout(config, os->evsel, os->id, os->nr);
-- 
GitLab


From 3f81f72d30b46efb614d93f430684c0deb8439b7 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Wed, 30 Nov 2022 11:15:21 +0000
Subject: [PATCH 3859/4122] perf stat: Fix invalid output handle

In this context, 'os' is already a pointer so the extra dereference
isn't required. This fixes the following test failure on aarch64:

  $ ./perf test "json output" -vvv
  92: perf stat JSON output linter                                    :
  --- start ---
  Checking json output: no args Test failed for input:
  ...
  Fatal error: glibc detected an invalid stdio handle
  ---- end ----
  perf stat JSON output linter: FAILED!

Fixes: e7f4da312259e618 ("perf stat: Pass struct outstate to printout()")
Signed-off-by: James Clark <james.clark@arm.com>
Tested-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20221130111521.334152-2-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 7f885b04c23fb..ead4915c4a033 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -731,7 +731,7 @@ static void printout(struct perf_stat_config *config, struct outstate *os,
 		perf_stat__print_shadow_stats(config, counter, uval, map_idx,
 					      &out, &config->metric_events, &rt_stat);
 	} else {
-		pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
+		pm(config, os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0);
 	}
 
 	if (!config->metric_only) {
-- 
GitLab


From fce9a619145181ca6a41253f7de3df56b1e4ad59 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 29 Nov 2022 22:29:32 -0800
Subject: [PATCH 3860/4122] perf util: Make header guard consistent with tool

Remove git reference by changing GIT_COMPAT_UTIL_H to __PERF_UTIL_H.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20221130062935.2219247-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c1f2d423a9ecb..63cdab0e5314c 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef GIT_COMPAT_UTIL_H
-#define GIT_COMPAT_UTIL_H
+#ifndef __PERF_UTIL_H
+#define __PERF_UTIL_H
 
 #define _BSD_SOURCE 1
 /* glibc 2.20 deprecates _BSD_SOURCE in favour of _DEFAULT_SOURCE */
@@ -94,4 +94,4 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x,
 		0;						\
 	})
 
-#endif /* GIT_COMPAT_UTIL_H */
+#endif /* __PERF_UTIL_H */
-- 
GitLab


From 5b7a29fb0b7d67e5d40cd6557e073afb6a7466ab Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 29 Nov 2022 22:29:33 -0800
Subject: [PATCH 3861/4122] perf util: Add host_is_bigendian to util.h

Avoid libtraceevent dependency for tep_is_bigendian or trace-event.h
dependency for bigendian. Add a new host_is_bigendian to util.h, using
the compiler defined __BYTE_ORDER__ when available.

Committer notes:

Added:

 #else  /* !__BYTE_ORDER__ */

On that nested #ifdef block, as per Namhyung's suggestion.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt (VMware) <rostedt@goodmis.org>
Link: https://lore.kernel.org/r/20221130062935.2219247-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/code-reading.c    |  4 ++--
 tools/perf/tests/sample-parsing.c  |  4 ++--
 tools/perf/util/evsel.c            |  5 +----
 tools/perf/util/trace-event-info.c | 14 +++-----------
 tools/perf/util/trace-event-read.c |  3 ++-
 tools/perf/util/trace-event.h      |  2 --
 tools/perf/util/util.h             | 19 +++++++++++++++++++
 7 files changed, 29 insertions(+), 22 deletions(-)

diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 95feb6ef34a0a..cb8cd09938d5f 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -16,7 +16,6 @@
 #include "dso.h"
 #include "env.h"
 #include "parse-events.h"
-#include "trace-event.h"
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
@@ -28,6 +27,7 @@
 #include "util/mmap.h"
 #include "util/string2.h"
 #include "util/synthetic-events.h"
+#include "util/util.h"
 #include "thread.h"
 
 #include "tests.h"
@@ -79,7 +79,7 @@ static size_t read_objdump_chunk(const char **line, unsigned char **buf,
 	 * see disassemble_bytes() at binutils/objdump.c for details
 	 * how objdump chooses display endian)
 	 */
-	if (bytes_read > 1 && !bigendian()) {
+	if (bytes_read > 1 && !host_is_bigendian()) {
 		unsigned char *chunk_end = chunk_start + bytes_read - 1;
 		unsigned char tmp;
 
diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c
index 20930dd48ee03..927c7f0cc4ccc 100644
--- a/tools/perf/tests/sample-parsing.c
+++ b/tools/perf/tests/sample-parsing.c
@@ -13,7 +13,7 @@
 #include "evsel.h"
 #include "debug.h"
 #include "util/synthetic-events.h"
-#include "util/trace-event.h"
+#include "util/util.h"
 
 #include "tests.h"
 
@@ -117,7 +117,7 @@ static bool samples_same(const struct perf_sample *s1,
 		COMP(branch_stack->hw_idx);
 		for (i = 0; i < s1->branch_stack->nr; i++) {
 			if (needs_swap)
-				return ((tep_is_bigendian()) ?
+				return ((host_is_bigendian()) ?
 					(FLAG(s2).value == BS_EXPECTED_BE) :
 					(FLAG(s2).value == BS_EXPECTED_LE));
 			else
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 45f4f08399ae1..0f617359a82f2 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2320,11 +2320,8 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
 	 * as it has variable bit-field sizes. Instead the
 	 * macro takes the bit-field position/size,
 	 * swaps it based on the host endianness.
-	 *
-	 * tep_is_bigendian() is used here instead of
-	 * bigendian() to avoid python test fails.
 	 */
-	if (tep_is_bigendian()) {
+	if (host_is_bigendian()) {
 		new_val = bitfield_swap(value, 0, 1);
 		new_val |= bitfield_swap(value, 1, 1);
 		new_val |= bitfield_swap(value, 2, 1);
diff --git a/tools/perf/util/trace-event-info.c b/tools/perf/util/trace-event-info.c
index 892c323b4ac9f..c24b3a15e3196 100644
--- a/tools/perf/util/trace-event-info.c
+++ b/tools/perf/util/trace-event-info.c
@@ -26,6 +26,7 @@
 #include <api/fs/tracing_path.h>
 #include "evsel.h"
 #include "debug.h"
+#include "util.h"
 
 #define VERSION "0.6"
 #define MAX_EVENT_LENGTH 512
@@ -38,15 +39,6 @@ struct tracepoint_path {
 	struct tracepoint_path *next;
 };
 
-int bigendian(void)
-{
-	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
-	unsigned int *ptr;
-
-	ptr = (unsigned int *)(void *)str;
-	return *ptr == 0x01020304;
-}
-
 /* unfortunately, you can not stat debugfs or proc files for size */
 static int record_file(const char *file, ssize_t hdr_sz)
 {
@@ -79,7 +71,7 @@ static int record_file(const char *file, ssize_t hdr_sz)
 
 	/* ugh, handle big-endian hdr_size == 4 */
 	sizep = (char*)&size;
-	if (bigendian())
+	if (host_is_bigendian())
 		sizep += sizeof(u64) - hdr_sz;
 
 	if (hdr_sz && pwrite(output_fd, sizep, hdr_sz, hdr_pos) < 0) {
@@ -564,7 +556,7 @@ static int tracing_data_header(void)
 		return -1;
 
 	/* save endian */
-	if (bigendian())
+	if (host_is_bigendian())
 		buf[0] = 1;
 	else
 		buf[0] = 0;
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 8a01af783310a..43146a4ce2fb2 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -17,6 +17,7 @@
 
 #include "trace-event.h"
 #include "debug.h"
+#include "util.h"
 
 static int input_fd;
 
@@ -414,7 +415,7 @@ ssize_t trace_report(int fd, struct trace_event *tevent, bool __repipe)
 		return -1;
 	}
 	file_bigendian = buf[0];
-	host_bigendian = bigendian();
+	host_bigendian = host_is_bigendian() ? 1 : 0;
 
 	if (trace_event__init(tevent)) {
 		pr_debug("trace_event__init failed");
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 6409811057888..8f39f5bcb2c26 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -27,8 +27,6 @@ trace_event__tp_format(const char *sys, const char *name);
 
 struct tep_event *trace_event__tp_format_id(int id);
 
-int bigendian(void);
-
 void event_format__fprintf(struct tep_event *event,
 			   int cpu, void *data, int size, FILE *fp);
 
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 63cdab0e5314c..1d3b300af5a14 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -94,4 +94,23 @@ int do_realloc_array_as_needed(void **arr, size_t *arr_sz, size_t x,
 		0;						\
 	})
 
+static inline bool host_is_bigendian(void)
+{
+#ifdef __BYTE_ORDER__
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	return false;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+	return true;
+#else
+#error "Unrecognized __BYTE_ORDER__"
+#endif
+#else /* !__BYTE_ORDER__ */
+	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4, 0x0, 0x0, 0x0, 0x0};
+	unsigned int *ptr;
+
+	ptr = (unsigned int *)(void *)str;
+	return *ptr == 0x01020304;
+#endif
+}
+
 #endif /* __PERF_UTIL_H */
-- 
GitLab


From 336b92da1aa4228a664f27972f61e6186f369e79 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Tue, 6 Dec 2022 10:02:36 +0530
Subject: [PATCH 3862/4122] perf tool: Move pmus list variable to a new file

The 'pmus' list variable is defined as static variable under pmu.c file.

Introduce a new pmus.c file and migrate this variable to it. Also make
it non static so that it can be accessed from outside.

Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: carsten.haitzler@arm.com
Link: https://lore.kernel.org/r/20221206043237.12159-2-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build  | 1 +
 tools/perf/util/pmu.c  | 2 +-
 tools/perf/util/pmus.c | 5 +++++
 tools/perf/util/pmus.h | 9 +++++++++
 4 files changed, 16 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/util/pmus.c
 create mode 100644 tools/perf/util/pmus.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index ab37f588ee8b3..d04802bfa23fd 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -73,6 +73,7 @@ perf-y += trace-event-parse.o
 perf-y += parse-events-flex.o
 perf-y += parse-events-bison.o
 perf-y += pmu.o
+perf-y += pmus.o
 perf-y += pmu-flex.o
 perf-y += pmu-bison.o
 perf-y += pmu-hybrid.o
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 8ff6462f051e1..2bdeb89352e7a 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -22,6 +22,7 @@
 #include "debug.h"
 #include "evsel.h"
 #include "pmu.h"
+#include "pmus.h"
 #include "parse-events.h"
 #include "print-events.h"
 #include "header.h"
@@ -58,7 +59,6 @@ struct perf_pmu_format {
 int perf_pmu_parse(struct list_head *list, char *name);
 extern FILE *perf_pmu_in;
 
-static LIST_HEAD(pmus);
 static bool hybrid_scanned;
 
 /*
diff --git a/tools/perf/util/pmus.c b/tools/perf/util/pmus.c
new file mode 100644
index 0000000000000..7f3b93c4d2297
--- /dev/null
+++ b/tools/perf/util/pmus.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/list.h>
+#include <pmus.h>
+
+LIST_HEAD(pmus);
diff --git a/tools/perf/util/pmus.h b/tools/perf/util/pmus.h
new file mode 100644
index 0000000000000..5ec12007eb5cd
--- /dev/null
+++ b/tools/perf/util/pmus.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PMUS_H
+#define __PMUS_H
+
+extern struct list_head pmus;
+
+#define perf_pmus__for_each_pmu(pmu) list_for_each_entry(pmu, &pmus, list)
+
+#endif /* __PMUS_H */
-- 
GitLab


From 9d9b22bedad13d96b34fe005ef44b4523c4eb786 Mon Sep 17 00:00:00 2001
From: Ravi Bangoria <ravi.bangoria@amd.com>
Date: Tue, 6 Dec 2022 10:02:37 +0530
Subject: [PATCH 3863/4122] perf test: Add event group test for events in
 multiple PMUs

Multiple events in a group can belong to one or more PMUs, however
there are some limitations.

One of the limitations is that perf doesn't allow creating a group of
events from different hw PMUs.

Write a simple test to create various combinations of hw, sw and uncore
PMU events and verify group creation succeeds or fails as expected.

Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Acked-by: Ian Rogers <irogers@google.com>
Acked-by: Kan Liang <kan.liang@linux.intel.com>
Acked-by: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ananth Narayan <ananth.narayan@amd.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Carsten Haitzler <carsten.haitzler@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Santosh Shukla <santosh.shukla@amd.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/20221206043237.12159-3-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/Build          |   1 +
 tools/perf/tests/builtin-test.c |   1 +
 tools/perf/tests/event_groups.c | 127 ++++++++++++++++++++++++++++++++
 tools/perf/tests/tests.h        |   1 +
 4 files changed, 130 insertions(+)
 create mode 100644 tools/perf/tests/event_groups.c

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 11b69023011b0..658b5052c24d2 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -67,6 +67,7 @@ perf-y += expand-cgroup.o
 perf-y += perf-time-to-tsc.o
 perf-y += dlfilter-test.o
 perf-y += sigtrap.o
+perf-y += event_groups.o
 
 $(OUTPUT)tests/llvm-src-base.c: tests/bpf-script-example.c tests/Build
 	$(call rule_mkdir)
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4c6ae59a4dfd7..ddd8262bfa264 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -110,6 +110,7 @@ static struct test_suite *generic_tests[] = {
 	&suite__perf_time_to_tsc,
 	&suite__dlfilter,
 	&suite__sigtrap,
+	&suite__event_groups,
 	NULL,
 };
 
diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c
new file mode 100644
index 0000000000000..612c0444aaa83
--- /dev/null
+++ b/tools/perf/tests/event_groups.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include "linux/perf_event.h"
+#include "tests.h"
+#include "debug.h"
+#include "pmu.h"
+#include "pmus.h"
+#include "header.h"
+#include "../perf-sys.h"
+
+/* hw: cycles, sw: context-switch, uncore: [arch dependent] */
+static int types[] = {0, 1, -1};
+static unsigned long configs[] = {0, 3, 0};
+
+#define NR_UNCORE_PMUS 5
+
+/* Uncore pmus that support more than 3 counters */
+static struct uncore_pmus {
+	const char *name;
+	__u64 config;
+} uncore_pmus[NR_UNCORE_PMUS] = {
+	{ "amd_l3", 0x0 },
+	{ "amd_df", 0x0 },
+	{ "uncore_imc_0", 0x1 },         /* Intel */
+	{ "core_imc", 0x318 },           /* PowerPC: core_imc/CPM_STCX_FIN/ */
+	{ "hv_24x7", 0x22000000003 },    /* PowerPC: hv_24x7/CPM_STCX_FIN/ */
+};
+
+static int event_open(int type, unsigned long config, int group_fd)
+{
+	struct perf_event_attr attr;
+
+	memset(&attr, 0, sizeof(struct perf_event_attr));
+	attr.type = type;
+	attr.size = sizeof(struct perf_event_attr);
+	attr.config = config;
+	/*
+	 * When creating an event group, typically the group leader is
+	 * initialized with disabled set to 1 and any child events are
+	 * initialized with disabled set to 0. Despite disabled being 0,
+	 * the child events will not start until the group leader is
+	 * enabled.
+	 */
+	attr.disabled = group_fd == -1 ? 1 : 0;
+
+	return sys_perf_event_open(&attr, -1, 0, group_fd, 0);
+}
+
+static int setup_uncore_event(void)
+{
+	struct perf_pmu *pmu;
+	int i;
+
+	if (list_empty(&pmus))
+		perf_pmu__scan(NULL);
+
+	perf_pmus__for_each_pmu(pmu) {
+		for (i = 0; i < NR_UNCORE_PMUS; i++) {
+			if (!strcmp(uncore_pmus[i].name, pmu->name)) {
+				pr_debug("Using %s for uncore pmu event\n", pmu->name);
+				types[2] = pmu->type;
+				configs[2] = uncore_pmus[i].config;
+				return 0;
+			}
+		}
+	}
+	return -1;
+}
+
+static int run_test(int i, int j, int k)
+{
+	int erroneous = ((((1 << i) | (1 << j) | (1 << k)) & 5) == 5);
+	int group_fd, sibling_fd1, sibling_fd2;
+
+	group_fd = event_open(types[i], configs[i], -1);
+	if (group_fd == -1)
+		return -1;
+
+	sibling_fd1 = event_open(types[j], configs[j], group_fd);
+	if (sibling_fd1 == -1) {
+		close(group_fd);
+		return erroneous ? 0 : -1;
+	}
+
+	sibling_fd2 = event_open(types[k], configs[k], group_fd);
+	if (sibling_fd2 == -1) {
+		close(sibling_fd1);
+		close(group_fd);
+		return erroneous ? 0 : -1;
+	}
+
+	close(sibling_fd2);
+	close(sibling_fd1);
+	close(group_fd);
+	return erroneous ? -1 : 0;
+}
+
+static int test__event_groups(struct test_suite *text __maybe_unused, int subtest __maybe_unused)
+{
+	int i, j, k;
+	int ret;
+	int r;
+
+	ret = setup_uncore_event();
+	if (ret || types[2] == -1)
+		return TEST_SKIP;
+
+	ret = TEST_OK;
+	for (i = 0; i < 3; i++) {
+		for (j = 0; j < 3; j++) {
+			for (k = 0; k < 3; k++) {
+				r = run_test(i, j, k);
+				if (r)
+					ret = TEST_FAIL;
+
+				pr_debug("0x%x 0x%lx, 0x%x 0x%lx, 0x%x 0x%lx: %s\n",
+					 types[i], configs[i], types[j], configs[j],
+					 types[k], configs[k], r ? "Fail" : "Pass");
+			}
+		}
+	}
+	return ret;
+}
+
+DEFINE_SUITE("Event groups", event_groups);
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index e15f24cfc9094..fb4b5ad4dd0fe 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -147,6 +147,7 @@ DECLARE_SUITE(expand_cgroup_events);
 DECLARE_SUITE(perf_time_to_tsc);
 DECLARE_SUITE(dlfilter);
 DECLARE_SUITE(sigtrap);
+DECLARE_SUITE(event_groups);
 
 /*
  * PowerPC and S390 do not support creation of instruction breakpoints using the
-- 
GitLab


From cc2367eebb0c3c5501cddd5823e5feda7b57f706 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 6 Dec 2022 13:49:04 -0300
Subject: [PATCH 3864/4122] machine: Adopt is_lock_function() from
 builtin-lock.c

It is used in bpf_lock_contention.c and builtin-lock.c will be made
CONFIG_LIBTRACEEVENT=y conditional, so move it to machine.c, that is
always available.

This makes those 4 global variables for sched and lock text start and
end to move to 'struct machine' too, as conceivably we can have that
info for several machine instances, say some 'perf diff' like tool.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c             | 58 +--------------------------
 tools/perf/util/bpf_lock_contention.c |  2 +-
 tools/perf/util/lock-contention.h     |  2 -
 tools/perf/util/machine.c             | 40 ++++++++++++++++++
 tools/perf/util/machine.h             |  5 +++
 5 files changed, 48 insertions(+), 59 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 0d280093b19a5..15ce6358f1279 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -67,11 +67,6 @@ static enum {
 	LOCK_AGGR_CALLER,
 } aggr_mode = LOCK_AGGR_ADDR;
 
-static u64 sched_text_start;
-static u64 sched_text_end;
-static u64 lock_text_start;
-static u64 lock_text_end;
-
 static struct thread_stat *thread_stat_find(u32 tid)
 {
 	struct rb_node *node;
@@ -854,55 +849,6 @@ end:
 	return 0;
 }
 
-bool is_lock_function(struct machine *machine, u64 addr)
-{
-	if (!sched_text_start) {
-		struct map *kmap;
-		struct symbol *sym;
-
-		sym = machine__find_kernel_symbol_by_name(machine,
-							  "__sched_text_start",
-							  &kmap);
-		if (!sym) {
-			/* to avoid retry */
-			sched_text_start = 1;
-			return false;
-		}
-
-		sched_text_start = kmap->unmap_ip(kmap, sym->start);
-
-		/* should not fail from here */
-		sym = machine__find_kernel_symbol_by_name(machine,
-							  "__sched_text_end",
-							  &kmap);
-		sched_text_end = kmap->unmap_ip(kmap, sym->start);
-
-		sym = machine__find_kernel_symbol_by_name(machine,
-							  "__lock_text_start",
-							  &kmap);
-		lock_text_start = kmap->unmap_ip(kmap, sym->start);
-
-		sym = machine__find_kernel_symbol_by_name(machine,
-							  "__lock_text_end",
-							  &kmap);
-		lock_text_end = kmap->unmap_ip(kmap, sym->start);
-	}
-
-	/* failed to get kernel symbols */
-	if (sched_text_start == 1)
-		return false;
-
-	/* mutex and rwsem functions are in sched text section */
-	if (sched_text_start <= addr && addr < sched_text_end)
-		return true;
-
-	/* spinlock functions are in lock text section */
-	if (lock_text_start <= addr && addr < lock_text_end)
-		return true;
-
-	return false;
-}
-
 static int get_symbol_name_offset(struct map *map, struct symbol *sym, u64 ip,
 				  char *buf, int size)
 {
@@ -961,7 +907,7 @@ static int lock_contention_caller(struct evsel *evsel, struct perf_sample *sampl
 			goto next;
 
 		sym = node->ms.sym;
-		if (sym && !is_lock_function(machine, node->ip)) {
+		if (sym && !machine__is_lock_function(machine, node->ip)) {
 			get_symbol_name_offset(node->ms.map, sym, node->ip,
 					       buf, size);
 			return 0;
@@ -1007,7 +953,7 @@ static u64 callchain_id(struct evsel *evsel, struct perf_sample *sample)
 		if (++skip <= stack_skip)
 			goto next;
 
-		if (node->ms.sym && is_lock_function(machine, node->ip))
+		if (node->ms.sym && machine__is_lock_function(machine, node->ip))
 			goto next;
 
 		hash ^= hash_long((unsigned long)node->ip, 64);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 4db9ad3d50c41..f4ebb9a2e3809 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -153,7 +153,7 @@ int lock_contention_read(struct lock_contention *con)
 		bpf_map_lookup_elem(stack, &key, stack_trace);
 
 		/* skip lock internal functions */
-		while (is_lock_function(machine, stack_trace[idx]) &&
+		while (machine__is_lock_function(machine, stack_trace[idx]) &&
 		       idx < con->max_stack - 1)
 			idx++;
 
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index e3c061b1795ba..a2346875098dc 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -145,6 +145,4 @@ static inline int lock_contention_read(struct lock_contention *con __maybe_unuse
 
 #endif  /* HAVE_BPF_SKEL */
 
-bool is_lock_function(struct machine *machine, u64 addr);
-
 #endif  /* PERF_LOCK_CONTENTION_H */
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 76316e459c3de..803c9d1803dd2 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -3336,3 +3336,43 @@ int machine__for_each_kernel_map(struct machine *machine, machine__map_t fn, voi
 	}
 	return err;
 }
+
+bool machine__is_lock_function(struct machine *machine, u64 addr)
+{
+	if (!machine->sched.text_start) {
+		struct map *kmap;
+		struct symbol *sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_start", &kmap);
+
+		if (!sym) {
+			/* to avoid retry */
+			machine->sched.text_start = 1;
+			return false;
+		}
+
+		machine->sched.text_start = kmap->unmap_ip(kmap, sym->start);
+
+		/* should not fail from here */
+		sym = machine__find_kernel_symbol_by_name(machine, "__sched_text_end", &kmap);
+		machine->sched.text_end = kmap->unmap_ip(kmap, sym->start);
+
+		sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_start", &kmap);
+		machine->lock.text_start = kmap->unmap_ip(kmap, sym->start);
+
+		sym = machine__find_kernel_symbol_by_name(machine, "__lock_text_end", &kmap);
+		machine->lock.text_end = kmap->unmap_ip(kmap, sym->start);
+	}
+
+	/* failed to get kernel symbols */
+	if (machine->sched.text_start == 1)
+		return false;
+
+	/* mutex and rwsem functions are in sched text section */
+	if (machine->sched.text_start <= addr && addr < machine->sched.text_end)
+		return true;
+
+	/* spinlock functions are in lock text section */
+	if (machine->lock.text_start <= addr && addr < machine->lock.text_end)
+		return true;
+
+	return false;
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6267c1d6f2321..d034ecaf89c19 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -56,6 +56,10 @@ struct machine {
 	struct maps	  *kmaps;
 	struct map	  *vmlinux_map;
 	u64		  kernel_start;
+	struct {
+		u64	  text_start;
+		u64	  text_end;
+	} sched, lock;
 	pid_t		  *current_tid;
 	size_t		  current_tid_sz;
 	union { /* Tool specific area */
@@ -212,6 +216,7 @@ static inline bool machine__is_host(struct machine *machine)
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
+bool machine__is_lock_function(struct machine *machine, u64 addr);
 bool machine__is(struct machine *machine, const char *arch);
 bool machine__normalized_is(struct machine *machine, const char *arch);
 int machine__nr_cpus_avail(struct machine *machine);
-- 
GitLab


From 616aa32d6f221faa0235d2586ec1706dca70a439 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 5 Dec 2022 14:59:38 -0800
Subject: [PATCH 3865/4122] perf build: Fixes for LIBTRACEEVENT_DYNAMIC

If LIBTRACEEVENT_DYNAMIC is enabled then avoid the install step for
the plugins. If disabled correct DESTDIR so that the plugins are
installed under <lib>/traceevent/plugins.

Fixes: ef019df01e207971 ("perf build: Install libtraceevent locally when building")
Reported-by: Alexander Gordeev <agordeev@linux.ibm.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20221205225940.3079667-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index a17a6ea85e817..6689f644782f5 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -884,7 +884,7 @@ $(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
 
 install-traceevent-plugins: libtraceevent_plugins
 	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
-		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
+		DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \
 		$(LIBTRACEEVENT_FLAGS) install
 endif
 
@@ -1093,7 +1093,11 @@ install-tests: all install-gtk
 		$(INSTALL) tests/shell/coresight/*.sh '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/shell/coresight'
 	$(Q)$(MAKE) -C tests/shell/coresight install-tests
 
-install-bin: install-tools install-tests install-traceevent-plugins
+install-bin: install-tools install-tests
+
+ifndef LIBTRACEEVENT_DYNAMIC
+install-bin: install-traceevent-plugins
+endif
 
 install: install-bin try-install-man
 
-- 
GitLab


From b897613510890d6e92b6a276a20f6c3d96fe90e8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 6 Dec 2022 09:58:04 -0800
Subject: [PATCH 3866/4122] perf stat: Update event skip condition for
 system-wide per-thread mode and merged uncore and hybrid events

In print_counter_aggrdata(), it skips some events that has no aggregate
count.  It's actually for system-wide per-thread mode and merged uncore
and hybrid events.

Let's update the condition to check them explicitly.

Fixes: 91f85f98da7ab8c3 ("perf stat: Display event stats using aggr counts")
Reported-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20221206175804.391387-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/stat-display.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index ead4915c4a033..8bd8b0142630c 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -804,7 +804,8 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	os->nr = aggr->nr;
 	os->evsel = counter;
 
-	if (counter->supported && aggr->nr == 0)
+	/* Skip already merged uncore/hybrid events */
+	if (counter->merged_stat)
 		return;
 
 	uniquify_counter(config, counter);
@@ -813,6 +814,13 @@ static void print_counter_aggrdata(struct perf_stat_config *config,
 	ena = aggr->counts.ena;
 	run = aggr->counts.run;
 
+	/*
+	 * Skip value 0 when enabling --per-thread globally, otherwise it will
+	 * have too many 0 output.
+	 */
+	if (val == 0 && config->aggr_mode == AGGR_THREAD && config->system_wide)
+		return;
+
 	if (!metric_only) {
 		if (config->json_output)
 			fputc('{', output);
@@ -889,9 +897,6 @@ static void print_aggr(struct perf_stat_config *config,
 		print_metric_begin(config, evlist, os, s);
 
 		evlist__for_each_entry(evlist, counter) {
-			if (counter->merged_stat)
-				continue;
-
 			print_counter_aggrdata(config, counter, s, os);
 		}
 		print_metric_end(config, os);
@@ -918,9 +923,6 @@ static void print_aggr_cgroup(struct perf_stat_config *config,
 			print_metric_begin(config, evlist, os, s);
 
 			evlist__for_each_entry(evlist, counter) {
-				if (counter->merged_stat)
-					continue;
-
 				if (counter->cgrp != os->cgrp)
 					continue;
 
@@ -940,9 +942,6 @@ static void print_counter(struct perf_stat_config *config,
 	if (!config->aggr_map)
 		return;
 
-	if (counter->merged_stat)
-		return;
-
 	for (s = 0; s < config->aggr_map->nr; s++) {
 		print_counter_aggrdata(config, counter, s, os);
 	}
-- 
GitLab


From 40769665b63d8c84b5b1c63fee404d4c20cff751 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 6 Dec 2022 21:59:08 -0800
Subject: [PATCH 3867/4122] perf jevents: Parse metrics during conversion

Currently the 'MetricExpr' json value is passed from the json
file to the pmu-events.c. This change introduces an expression
tree that is parsed into. The parsing is done largely by using
operator overloading and python's 'eval' function. Two advantages
in doing this are:

1) Broken metrics fail at compile time rather than relying on
   `perf test` to detect. `perf test` remains relevant for checking
   event encoding and actual metric use.

2) The conversion to a string from the tree can minimize the metric's
   string size, for example, preferring 1e6 over 1000000, avoiding
   multiplication by 1 and removing unnecessary whitespace. On x86
   this reduces the string size by 2,930bytes (0.07%).

In future changes it would be possible to programmatically
generate the json expressions (a single line of text and so a
pain to write manually) for an architecture using the expression
tree. This could avoid copy-pasting metrics for all architecture
variants.

v4. Doesn't simplify "0*SLOTS" to 0, as the pattern is used to fix
    Intel metrics with topdown events.
v3. Avoids generic types on standard types like set that aren't
    supported until Python 3.9, fixing an issue with Python 3.6
    reported-by John Garry. v3 also fixes minor pylint issues and adds
    a call to Simplify on the read expression tree.
v2. Improvements to type information.

Committer notes:

Added one-line fixer from Ian, see first Link: tag below.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: John Garry <john.g.garry@oracle.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Link: https://lore.kernel.org/r/CAP-5=fWa=zNK_ecpWGoGggHCQx7z-oW0eGMQf19Maywg0QK=4g@mail.gmail.com
Link: https://lore.kernel.org/r/20221207055908.1385448-1-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/pmu-events/Build          |   2 +-
 tools/perf/pmu-events/jevents.py     |  12 +-
 tools/perf/pmu-events/metric.py      | 502 +++++++++++++++++++++++++++
 tools/perf/pmu-events/metric_test.py | 157 +++++++++
 4 files changed, 669 insertions(+), 4 deletions(-)
 create mode 100644 tools/perf/pmu-events/metric.py
 create mode 100644 tools/perf/pmu-events/metric_test.py

diff --git a/tools/perf/pmu-events/Build b/tools/perf/pmu-events/Build
index 04ef95174660b..15b9e8fdbffa7 100644
--- a/tools/perf/pmu-events/Build
+++ b/tools/perf/pmu-events/Build
@@ -21,7 +21,7 @@ $(OUTPUT)pmu-events/pmu-events.c: pmu-events/empty-pmu-events.c
 	$(call rule_mkdir)
 	$(Q)$(call echo-cmd,gen)cp $< $@
 else
-$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY)
+$(OUTPUT)pmu-events/pmu-events.c: $(JSON) $(JSON_TEST) $(JEVENTS_PY) pmu-events/metric.py
 	$(call rule_mkdir)
 	$(Q)$(call echo-cmd,gen)$(PYTHON) $(JEVENTS_PY) $(JEVENTS_ARCH) pmu-events/arch $@
 endif
diff --git a/tools/perf/pmu-events/jevents.py b/tools/perf/pmu-events/jevents.py
index 0daa3e007528f..4c398e0eeb2f8 100755
--- a/tools/perf/pmu-events/jevents.py
+++ b/tools/perf/pmu-events/jevents.py
@@ -4,6 +4,7 @@
 import argparse
 import csv
 import json
+import metric
 import os
 import sys
 from typing import (Callable, Dict, Optional, Sequence, Set, Tuple)
@@ -268,9 +269,10 @@ class JsonEvent:
     self.metric_name = jd.get('MetricName')
     self.metric_group = jd.get('MetricGroup')
     self.metric_constraint = jd.get('MetricConstraint')
-    self.metric_expr = jd.get('MetricExpr')
-    if self.metric_expr:
-      self.metric_expr = self.metric_expr.replace('\\', '\\\\')
+    self.metric_expr = None
+    if 'MetricExpr' in jd:
+       self.metric_expr = metric.ParsePerfJson(jd['MetricExpr']).Simplify()
+
     arch_std = jd.get('ArchStdEvent')
     if precise and self.desc and '(Precise Event)' not in self.desc:
       extra_desc += ' (Must be precise)' if precise == '2' else (' (Precise '
@@ -322,6 +324,10 @@ class JsonEvent:
     s = ''
     for attr in _json_event_attributes:
       x = getattr(self, attr)
+      if x and attr == 'metric_expr':
+        # Convert parsed metric expressions into a string. Slashes
+        # must be doubled in the file.
+        x = x.ToPerfJson().replace('\\', '\\\\')
       s += f'{x}\\000' if x else '\\000'
     return s
 
diff --git a/tools/perf/pmu-events/metric.py b/tools/perf/pmu-events/metric.py
new file mode 100644
index 0000000000000..4797ed4fd817b
--- /dev/null
+++ b/tools/perf/pmu-events/metric.py
@@ -0,0 +1,502 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+"""Parse or generate representations of perf metrics."""
+import ast
+import decimal
+import json
+import re
+from typing import Dict, List, Optional, Set, Union
+
+
+class Expression:
+  """Abstract base class of elements in a metric expression."""
+
+  def ToPerfJson(self) -> str:
+    """Returns a perf json file encoded representation."""
+    raise NotImplementedError()
+
+  def ToPython(self) -> str:
+    """Returns a python expr parseable representation."""
+    raise NotImplementedError()
+
+  def Simplify(self):
+    """Returns a simplified version of self."""
+    raise NotImplementedError()
+
+  def Equals(self, other) -> bool:
+    """Returns true when two expressions are the same."""
+    raise NotImplementedError()
+
+  def __str__(self) -> str:
+    return self.ToPerfJson()
+
+  def __or__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('|', self, other)
+
+  def __ror__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('|', other, self)
+
+  def __xor__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('^', self, other)
+
+  def __and__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('&', self, other)
+
+  def __lt__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('<', self, other)
+
+  def __gt__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('>', self, other)
+
+  def __add__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('+', self, other)
+
+  def __radd__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('+', other, self)
+
+  def __sub__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('-', self, other)
+
+  def __rsub__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('-', other, self)
+
+  def __mul__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('*', self, other)
+
+  def __rmul__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('*', other, self)
+
+  def __truediv__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('/', self, other)
+
+  def __rtruediv__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('/', other, self)
+
+  def __mod__(self, other: Union[int, float, 'Expression']) -> 'Operator':
+    return Operator('%', self, other)
+
+
+def _Constify(val: Union[bool, int, float, Expression]) -> Expression:
+  """Used to ensure that the nodes in the expression tree are all Expression."""
+  if isinstance(val, bool):
+    return Constant(1 if val else 0)
+  if isinstance(val, (int, float)):
+    return Constant(val)
+  return val
+
+
+# Simple lookup for operator precedence, used to avoid unnecessary
+# brackets. Precedence matches that of python and the simple expression parser.
+_PRECEDENCE = {
+    '|': 0,
+    '^': 1,
+    '&': 2,
+    '<': 3,
+    '>': 3,
+    '+': 4,
+    '-': 4,
+    '*': 5,
+    '/': 5,
+    '%': 5,
+}
+
+
+class Operator(Expression):
+  """Represents a binary operator in the parse tree."""
+
+  def __init__(self, operator: str, lhs: Union[int, float, Expression],
+               rhs: Union[int, float, Expression]):
+    self.operator = operator
+    self.lhs = _Constify(lhs)
+    self.rhs = _Constify(rhs)
+
+  def Bracket(self,
+              other: Expression,
+              other_str: str,
+              rhs: bool = False) -> str:
+    """If necessary brackets the given other value.
+
+    If ``other`` is an operator then a bracket is necessary when
+    this/self operator has higher precedence. Consider: '(a + b) * c',
+    ``other_str`` will be 'a + b'. A bracket is necessary as without
+    the bracket 'a + b * c' will evaluate 'b * c' first. However, '(a
+    * b) + c' doesn't need a bracket as 'a * b' will always be
+    evaluated first. For 'a / (b * c)' (ie the same precedence level
+    operations) then we add the bracket to best match the original
+    input, but not for '(a / b) * c' where the bracket is unnecessary.
+
+    Args:
+      other (Expression): is a lhs or rhs operator
+      other_str (str): ``other`` in the appropriate string form
+      rhs (bool):  is ``other`` on the RHS
+
+    Returns:
+      str: possibly bracketed other_str
+    """
+    if isinstance(other, Operator):
+      if _PRECEDENCE.get(self.operator, -1) > _PRECEDENCE.get(
+          other.operator, -1):
+        return f'({other_str})'
+      if rhs and _PRECEDENCE.get(self.operator, -1) == _PRECEDENCE.get(
+          other.operator, -1):
+        return f'({other_str})'
+    return other_str
+
+  def ToPerfJson(self):
+    return (f'{self.Bracket(self.lhs, self.lhs.ToPerfJson())} {self.operator} '
+            f'{self.Bracket(self.rhs, self.rhs.ToPerfJson(), True)}')
+
+  def ToPython(self):
+    return (f'{self.Bracket(self.lhs, self.lhs.ToPython())} {self.operator} '
+            f'{self.Bracket(self.rhs, self.rhs.ToPython(), True)}')
+
+  def Simplify(self) -> Expression:
+    lhs = self.lhs.Simplify()
+    rhs = self.rhs.Simplify()
+    if isinstance(lhs, Constant) and isinstance(rhs, Constant):
+      return Constant(ast.literal_eval(lhs + self.operator + rhs))
+
+    if isinstance(self.lhs, Constant):
+      if self.operator in ('+', '|') and lhs.value == '0':
+        return rhs
+
+      # Simplify multiplication by 0 except for the slot event which
+      # is deliberately introduced using this pattern.
+      if self.operator == '*' and lhs.value == '0' and (
+          not isinstance(rhs, Event) or 'slots' not in rhs.name.lower()):
+        return Constant(0)
+
+      if self.operator == '*' and lhs.value == '1':
+        return rhs
+
+    if isinstance(rhs, Constant):
+      if self.operator in ('+', '|') and rhs.value == '0':
+        return lhs
+
+      if self.operator == '*' and rhs.value == '0':
+        return Constant(0)
+
+      if self.operator == '*' and self.rhs.value == '1':
+        return lhs
+
+    return Operator(self.operator, lhs, rhs)
+
+  def Equals(self, other: Expression) -> bool:
+    if isinstance(other, Operator):
+      return self.operator == other.operator and self.lhs.Equals(
+          other.lhs) and self.rhs.Equals(other.rhs)
+    return False
+
+
+class Select(Expression):
+  """Represents a select ternary in the parse tree."""
+
+  def __init__(self, true_val: Union[int, float, Expression],
+               cond: Union[int, float, Expression],
+               false_val: Union[int, float, Expression]):
+    self.true_val = _Constify(true_val)
+    self.cond = _Constify(cond)
+    self.false_val = _Constify(false_val)
+
+  def ToPerfJson(self):
+    true_str = self.true_val.ToPerfJson()
+    cond_str = self.cond.ToPerfJson()
+    false_str = self.false_val.ToPerfJson()
+    return f'({true_str} if {cond_str} else {false_str})'
+
+  def ToPython(self):
+    return (f'Select({self.true_val.ToPython()}, {self.cond.ToPython()}, '
+            f'{self.false_val.ToPython()})')
+
+  def Simplify(self) -> Expression:
+    cond = self.cond.Simplify()
+    true_val = self.true_val.Simplify()
+    false_val = self.false_val.Simplify()
+    if isinstance(cond, Constant):
+      return false_val if cond.value == '0' else true_val
+
+    if true_val.Equals(false_val):
+      return true_val
+
+    return Select(true_val, cond, false_val)
+
+  def Equals(self, other: Expression) -> bool:
+    if isinstance(other, Select):
+      return self.cond.Equals(other.cond) and self.false_val.Equals(
+          other.false_val) and self.true_val.Equals(other.true_val)
+    return False
+
+
+class Function(Expression):
+  """A function in an expression like min, max, d_ratio."""
+
+  def __init__(self,
+               fn: str,
+               lhs: Union[int, float, Expression],
+               rhs: Optional[Union[int, float, Expression]] = None):
+    self.fn = fn
+    self.lhs = _Constify(lhs)
+    self.rhs = _Constify(rhs)
+
+  def ToPerfJson(self):
+    if self.rhs:
+      return f'{self.fn}({self.lhs.ToPerfJson()}, {self.rhs.ToPerfJson()})'
+    return f'{self.fn}({self.lhs.ToPerfJson()})'
+
+  def ToPython(self):
+    if self.rhs:
+      return f'{self.fn}({self.lhs.ToPython()}, {self.rhs.ToPython()})'
+    return f'{self.fn}({self.lhs.ToPython()})'
+
+  def Simplify(self) -> Expression:
+    lhs = self.lhs.Simplify()
+    rhs = self.rhs.Simplify() if self.rhs else None
+    if isinstance(lhs, Constant) and isinstance(rhs, Constant):
+      if self.fn == 'd_ratio':
+        if rhs.value == '0':
+          return Constant(0)
+        Constant(ast.literal_eval(f'{lhs} / {rhs}'))
+      return Constant(ast.literal_eval(f'{self.fn}({lhs}, {rhs})'))
+
+    return Function(self.fn, lhs, rhs)
+
+  def Equals(self, other: Expression) -> bool:
+    if isinstance(other, Function):
+      return self.fn == other.fn and self.lhs.Equals(
+          other.lhs) and self.rhs.Equals(other.rhs)
+    return False
+
+
+def _FixEscapes(s: str) -> str:
+  s = re.sub(r'([^\\]),', r'\1\\,', s)
+  return re.sub(r'([^\\])=', r'\1\\=', s)
+
+
+class Event(Expression):
+  """An event in an expression."""
+
+  def __init__(self, name: str, legacy_name: str = ''):
+    self.name = _FixEscapes(name)
+    self.legacy_name = _FixEscapes(legacy_name)
+
+  def ToPerfJson(self):
+    result = re.sub('/', '@', self.name)
+    return result
+
+  def ToPython(self):
+    return f'Event(r"{self.name}")'
+
+  def Simplify(self) -> Expression:
+    return self
+
+  def Equals(self, other: Expression) -> bool:
+    return isinstance(other, Event) and self.name == other.name
+
+
+class Constant(Expression):
+  """A constant within the expression tree."""
+
+  def __init__(self, value: Union[float, str]):
+    ctx = decimal.Context()
+    ctx.prec = 20
+    dec = ctx.create_decimal(repr(value) if isinstance(value, float) else value)
+    self.value = dec.normalize().to_eng_string()
+    self.value = self.value.replace('+', '')
+    self.value = self.value.replace('E', 'e')
+
+  def ToPerfJson(self):
+    return self.value
+
+  def ToPython(self):
+    return f'Constant({self.value})'
+
+  def Simplify(self) -> Expression:
+    return self
+
+  def Equals(self, other: Expression) -> bool:
+    return isinstance(other, Constant) and self.value == other.value
+
+
+class Literal(Expression):
+  """A runtime literal within the expression tree."""
+
+  def __init__(self, value: str):
+    self.value = value
+
+  def ToPerfJson(self):
+    return self.value
+
+  def ToPython(self):
+    return f'Literal({self.value})'
+
+  def Simplify(self) -> Expression:
+    return self
+
+  def Equals(self, other: Expression) -> bool:
+    return isinstance(other, Literal) and self.value == other.value
+
+
+def min(lhs: Union[int, float, Expression], rhs: Union[int, float,
+                                                       Expression]) -> Function:
+  # pylint: disable=redefined-builtin
+  # pylint: disable=invalid-name
+  return Function('min', lhs, rhs)
+
+
+def max(lhs: Union[int, float, Expression], rhs: Union[int, float,
+                                                       Expression]) -> Function:
+  # pylint: disable=redefined-builtin
+  # pylint: disable=invalid-name
+  return Function('max', lhs, rhs)
+
+
+def d_ratio(lhs: Union[int, float, Expression],
+            rhs: Union[int, float, Expression]) -> Function:
+  # pylint: disable=redefined-builtin
+  # pylint: disable=invalid-name
+  return Function('d_ratio', lhs, rhs)
+
+
+def source_count(event: Event) -> Function:
+  # pylint: disable=redefined-builtin
+  # pylint: disable=invalid-name
+  return Function('source_count', event)
+
+
+class Metric:
+  """An individual metric that will specifiable on the perf command line."""
+  groups: Set[str]
+  expr: Expression
+  scale_unit: str
+  constraint: bool
+
+  def __init__(self,
+               name: str,
+               description: str,
+               expr: Expression,
+               scale_unit: str,
+               constraint: bool = False):
+    self.name = name
+    self.description = description
+    self.expr = expr.Simplify()
+    # Workraound valid_only_metric hiding certain metrics based on unit.
+    scale_unit = scale_unit.replace('/sec', ' per sec')
+    if scale_unit[0].isdigit():
+      self.scale_unit = scale_unit
+    else:
+      self.scale_unit = f'1{scale_unit}'
+    self.constraint = constraint
+    self.groups = set()
+
+  def __lt__(self, other):
+    """Sort order."""
+    return self.name < other.name
+
+  def AddToMetricGroup(self, group):
+    """Callback used when being added to a MetricGroup."""
+    self.groups.add(group.name)
+
+  def Flatten(self) -> Set['Metric']:
+    """Return a leaf metric."""
+    return set([self])
+
+  def ToPerfJson(self) -> Dict[str, str]:
+    """Return as dictionary for Json generation."""
+    result = {
+        'MetricName': self.name,
+        'MetricGroup': ';'.join(sorted(self.groups)),
+        'BriefDescription': self.description,
+        'MetricExpr': self.expr.ToPerfJson(),
+        'ScaleUnit': self.scale_unit
+    }
+    if self.constraint:
+      result['MetricConstraint'] = 'NO_NMI_WATCHDOG'
+
+    return result
+
+
+class _MetricJsonEncoder(json.JSONEncoder):
+  """Special handling for Metric objects."""
+
+  def default(self, o):
+    if isinstance(o, Metric):
+      return o.ToPerfJson()
+    return json.JSONEncoder.default(self, o)
+
+
+class MetricGroup:
+  """A group of metrics.
+
+  Metric groups may be specificd on the perf command line, but within
+  the json they aren't encoded. Metrics may be in multiple groups
+  which can facilitate arrangements similar to trees.
+  """
+
+  def __init__(self, name: str, metric_list: List[Union[Metric,
+                                                        'MetricGroup']]):
+    self.name = name
+    self.metric_list = metric_list
+    for metric in metric_list:
+      metric.AddToMetricGroup(self)
+
+  def AddToMetricGroup(self, group):
+    """Callback used when a MetricGroup is added into another."""
+    for metric in self.metric_list:
+      metric.AddToMetricGroup(group)
+
+  def Flatten(self) -> Set[Metric]:
+    """Returns a set of all leaf metrics."""
+    result = set()
+    for x in self.metric_list:
+      result = result.union(x.Flatten())
+
+    return result
+
+  def ToPerfJson(self) -> str:
+    return json.dumps(sorted(self.Flatten()), indent=2, cls=_MetricJsonEncoder)
+
+  def __str__(self) -> str:
+    return self.ToPerfJson()
+
+
+class _RewriteIfExpToSelect(ast.NodeTransformer):
+
+  def visit_IfExp(self, node):
+    # pylint: disable=invalid-name
+    self.generic_visit(node)
+    call = ast.Call(
+        func=ast.Name(id='Select', ctx=ast.Load()),
+        args=[node.body, node.test, node.orelse],
+        keywords=[])
+    ast.copy_location(call, node.test)
+    return call
+
+
+def ParsePerfJson(orig: str) -> Expression:
+  """A simple json metric expression decoder.
+
+  Converts a json encoded metric expression by way of python's ast and
+  eval routine. First tokens are mapped to Event calls, then
+  accidentally converted keywords or literals are mapped to their
+  appropriate calls. Python's ast is used to match if-else that can't
+  be handled via operator overloading. Finally the ast is evaluated.
+
+  Args:
+    orig (str): String to parse.
+
+  Returns:
+    Expression: The parsed string.
+  """
+  # pylint: disable=eval-used
+  py = orig.strip()
+  py = re.sub(r'([a-zA-Z][^-+/\* \\\(\),]*(?:\\.[^-+/\* \\\(\),]*)*)',
+              r'Event(r"\1")', py)
+  py = re.sub(r'#Event\(r"([^"]*)"\)', r'Literal("#\1")', py)
+  py = re.sub(r'([0-9]+)Event\(r"(e[0-9]+)"\)', r'\1\2', py)
+  keywords = ['if', 'else', 'min', 'max', 'd_ratio', 'source_count']
+  for kw in keywords:
+    py = re.sub(rf'Event\(r"{kw}"\)', kw, py)
+
+  parsed = ast.parse(py, mode='eval')
+  _RewriteIfExpToSelect().visit(parsed)
+  parsed = ast.fix_missing_locations(parsed)
+  return _Constify(eval(compile(parsed, orig, 'eval')))
diff --git a/tools/perf/pmu-events/metric_test.py b/tools/perf/pmu-events/metric_test.py
new file mode 100644
index 0000000000000..15315d0f716ca
--- /dev/null
+++ b/tools/perf/pmu-events/metric_test.py
@@ -0,0 +1,157 @@
+# SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+import unittest
+from metric import Constant
+from metric import Event
+from metric import ParsePerfJson
+
+
+class TestMetricExpressions(unittest.TestCase):
+
+  def test_Operators(self):
+    a = Event('a')
+    b = Event('b')
+    self.assertEqual((a | b).ToPerfJson(), 'a | b')
+    self.assertEqual((a ^ b).ToPerfJson(), 'a ^ b')
+    self.assertEqual((a & b).ToPerfJson(), 'a & b')
+    self.assertEqual((a < b).ToPerfJson(), 'a < b')
+    self.assertEqual((a > b).ToPerfJson(), 'a > b')
+    self.assertEqual((a + b).ToPerfJson(), 'a + b')
+    self.assertEqual((a - b).ToPerfJson(), 'a - b')
+    self.assertEqual((a * b).ToPerfJson(), 'a * b')
+    self.assertEqual((a / b).ToPerfJson(), 'a / b')
+    self.assertEqual((a % b).ToPerfJson(), 'a % b')
+    one = Constant(1)
+    self.assertEqual((a + one).ToPerfJson(), 'a + 1')
+
+  def test_Brackets(self):
+    a = Event('a')
+    b = Event('b')
+    c = Event('c')
+    self.assertEqual((a * b + c).ToPerfJson(), 'a * b + c')
+    self.assertEqual((a + b * c).ToPerfJson(), 'a + b * c')
+    self.assertEqual(((a + a) + a).ToPerfJson(), 'a + a + a')
+    self.assertEqual(((a + b) * c).ToPerfJson(), '(a + b) * c')
+    self.assertEqual((a + (b * c)).ToPerfJson(), 'a + b * c')
+    self.assertEqual(((a / b) * c).ToPerfJson(), 'a / b * c')
+    self.assertEqual((a / (b * c)).ToPerfJson(), 'a / (b * c)')
+
+  def test_ParsePerfJson(self):
+    # Based on an example of a real metric.
+    before = '(a + b + c + d) / (2 * e)'
+    after = before
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    # Parsing should handle events with '-' in their name. Note, in
+    # the json file the '\' are doubled to '\\'.
+    before = r'topdown\-fe\-bound / topdown\-slots - 1'
+    after = before
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    # Parsing should handle escaped modifiers. Note, in the json file
+    # the '\' are doubled to '\\'.
+    before = r'arb@event\=0x81\,umask\=0x1@ + arb@event\=0x84\,umask\=0x1@'
+    after = before
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    # Parsing should handle exponents in numbers.
+    before = r'a + 1e12 + b'
+    after = before
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+  def test_IfElseTests(self):
+    # if-else needs rewriting to Select and back.
+    before = r'Event1 if #smt_on else Event2'
+    after = f'({before})'
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    before = r'Event1 if 0 else Event2'
+    after = f'({before})'
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    before = r'Event1 if 1 else Event2'
+    after = f'({before})'
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    # Ensure the select is evaluate last.
+    before = r'Event1 + 1 if Event2 < 2 else Event3 + 3'
+    after = (r'Select(Event(r"Event1") + Constant(1), Event(r"Event2") < '
+             r'Constant(2), Event(r"Event3") + Constant(3))')
+    self.assertEqual(ParsePerfJson(before).ToPython(), after)
+
+    before = r'Event1 > 1 if Event2 < 2 else Event3 > 3'
+    after = (r'Select(Event(r"Event1") > Constant(1), Event(r"Event2") < '
+             r'Constant(2), Event(r"Event3") > Constant(3))')
+    self.assertEqual(ParsePerfJson(before).ToPython(), after)
+
+    before = r'min(a + b if c > 1 else c + d, e + f)'
+    after = r'min((a + b if c > 1 else c + d), e + f)'
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+    before =3D r'a if b else c if d else e'
+    after =3D r'(a if b else (c if d else e))'
+    self.assertEqual(ParsePerfJson(before).ToPerfJson(), after)
+
+  def test_ToPython(self):
+    # pylint: disable=eval-used
+    # Based on an example of a real metric.
+    before = '(a + b + c + d) / (2 * e)'
+    py = ParsePerfJson(before).ToPython()
+    after = eval(py).ToPerfJson()
+    self.assertEqual(before, after)
+
+  def test_Simplify(self):
+    before = '1 + 2 + 3'
+    after = '6'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a + 0'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = '0 + a'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a | 0'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = '0 | a'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a * 0'
+    after = '0'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = '0 * a'
+    after = '0'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a * 1'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = '1 * a'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a if 0 else b'
+    after = 'b'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a if 1 else b'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    before = 'a if b else a'
+    after = 'a'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+    # Pattern used to add a slots event to metrics that require it.
+    before = '0 * SLOTS'
+    after = '0 * SLOTS'
+    self.assertEqual(ParsePerfJson(before).Simplify().ToPerfJson(), after)
+
+if __name__ == '__main__':
+  unittest.main()
-- 
GitLab


From 378ef0f5d9d7f4652d7a40e0711e8b845ada1cbd Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Mon, 5 Dec 2022 14:59:39 -0800
Subject: [PATCH 3868/4122] perf build: Use libtraceevent from the system
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the LIBTRACEEVENT_DYNAMIC and LIBTRACEFS_DYNAMIC make command
line variables.

If libtraceevent isn't installed or NO_LIBTRACEEVENT=1 is passed to the
build, don't compile in libtraceevent and libtracefs support.

This also disables CONFIG_TRACE that controls "perf trace".

CONFIG_LIBTRACEEVENT is used to control enablement in Build/Makefiles,
HAVE_LIBTRACEEVENT is used in C code.

Without HAVE_LIBTRACEEVENT tracepoints are disabled and as such the
commands kmem, kwork, lock, sched and timechart are removed.  The
majority of commands continue to work including "perf test".

Committer notes:

Fixed up a tools/perf/util/Build reject and added:

  #include <traceevent/event-parse.h>

to tools/perf/util/scripting-engines/trace-event-perl.c.

Committer testing:

  $ rpm -qi libtraceevent-devel
  Name        : libtraceevent-devel
  Version     : 1.5.3
  Release     : 2.fc36
  Architecture: x86_64
  Install Date: Mon 25 Jul 2022 03:20:19 PM -03
  Group       : Unspecified
  Size        : 27728
  License     : LGPLv2+ and GPLv2+
  Signature   : RSA/SHA256, Fri 15 Apr 2022 02:11:58 PM -03, Key ID 999f7cbf38ab71f4
  Source RPM  : libtraceevent-1.5.3-2.fc36.src.rpm
  Build Date  : Fri 15 Apr 2022 10:57:01 AM -03
  Build Host  : buildvm-x86-05.iad2.fedoraproject.org
  Packager    : Fedora Project
  Vendor      : Fedora Project
  URL         : https://git.kernel.org/pub/scm/libs/libtrace/libtraceevent.git/
  Bug URL     : https://bugz.fedoraproject.org/libtraceevent
  Summary     : Development headers of libtraceevent
  Description :
  Development headers of libtraceevent-libs
  $

Default build:

  $ ldd ~/bin/perf | grep tracee
  	libtraceevent.so.1 => /lib64/libtraceevent.so.1 (0x00007f1dcaf8f000)
  $

  # perf trace -e sched:* --max-events 10
       0.000 migration/0/17 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, dest_cpu: 1)
       0.005 migration/0/17 sched:sched_wake_idle_without_ipi(cpu: 1)
       0.011 migration/0/17 sched:sched_switch(prev_comm: "", prev_pid: 17 (migration/0), prev_state: 1, next_comm: "", next_prio: 120)
       1.173 :0/0 sched:sched_wakeup(comm: "", pid: 3138 (gnome-terminal-), prio: 120)
       1.180 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 3138 (gnome-terminal-), next_prio: 120)
       0.156 migration/1/21 sched:sched_migrate_task(comm: "", pid: 1603763 (perf), prio: 120, orig_cpu: 1, dest_cpu: 2)
       0.160 migration/1/21 sched:sched_wake_idle_without_ipi(cpu: 2)
       0.166 migration/1/21 sched:sched_switch(prev_comm: "", prev_pid: 21 (migration/1), prev_state: 1, next_comm: "", next_prio: 120)
       1.183 :0/0 sched:sched_wakeup(comm: "", pid: 1602985 (kworker/u16:0-f), prio: 120, target_cpu: 1)
       1.186 :0/0 sched:sched_switch(prev_comm: "", prev_prio: 120, next_comm: "", next_pid: 1602985 (kworker/u16:0-f), next_prio: 120)
  #

Had to tweak tools/perf/util/setup.py to make sure the python binding
shared object links with libtraceevent if -DHAVE_LIBTRACEEVENT is
present in CFLAGS.

Building with NO_LIBTRACEEVENT=1 uncovered some more build failures:

- Make building of data-convert-bt.c to CONFIG_LIBTRACEEVENT=y

- perf-$(CONFIG_LIBTRACEEVENT) += scripts/

- bpf_kwork.o needs also to be dependent on CONFIG_LIBTRACEEVENT=y

- The python binding needed some fixups and util/trace-event.c can't be
  built and linked with the python binding shared object, so remove it
  in tools/perf/util/setup.py and exclude it from the list of
  dependencies in the python/perf.so Makefile.perf target.

Building without libtraceevent-devel installed uncovered more build
failures:

- The python binding tools/perf/util/python.c was assuming that
  traceevent/parse-events.h was always available, which was the case
  when we defaulted to using the in-kernel tools/lib/traceevent/ files,
  now we need to enclose it under ifdef HAVE_LIBTRACEEVENT, just like
  the other parts of it that deal with tracepoints.

- We have to ifdef the rules in the Build files with
  CONFIG_LIBTRACEEVENT=y to build builtin-trace.c and
  tools/perf/trace/beauty/ as we only ifdef setting CONFIG_TRACE=y when
  setting NO_LIBTRACEEVENT=1 in the make command line, not when we don't
  detect libtraceevent-devel installed in the system. Simplification here
  to avoid these two ways of disabling builtin-trace.c and not having
  CONFIG_TRACE=y when libtraceevent-devel isn't installed is the clean
  way.

From Athira:

<quote>
tools/perf/arch/powerpc/util/Build
-perf-y += kvm-stat.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
</quote>

Then, ditto for arm64 and s390, detected by container cross build tests.

- s/390 uses test__checkevent_tracepoint() that is now only available if
  HAVE_LIBTRACEEVENT is defined, enclose the callsite with ifder HAVE_LIBTRACEEVENT.

Also from Athira:

<quote>
With this change, I could successfully compile in these environment:
- Without libtraceevent-devel installed
- With libtraceevent-devel installed
- With “make NO_LIBTRACEEVENT=1”
</quote>

Then, finally rename CONFIG_TRACEEVENT to CONFIG_LIBTRACEEVENT for
consistency with other libraries detected in tools/perf/.

Signed-off-by: Ian Rogers <irogers@google.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Tested-by: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: bpf@vger.kernel.org
Link: http://lore.kernel.org/lkml/20221205225940.3079667-3-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Build                              |  20 ++--
 tools/perf/Makefile.config                    |  37 ++++---
 tools/perf/Makefile.perf                      | 104 ++----------------
 tools/perf/arch/arm64/util/Build              |   2 +-
 tools/perf/arch/powerpc/util/Build            |   2 +-
 tools/perf/arch/s390/util/Build               |   2 +-
 tools/perf/arch/x86/util/Build                |   2 +-
 tools/perf/arch/x86/util/intel-pt.c           |   4 +
 tools/perf/builtin-annotate.c                 |   2 +
 tools/perf/builtin-data.c                     |   5 +-
 tools/perf/builtin-inject.c                   |   8 ++
 tools/perf/builtin-kmem.c                     |   1 +
 tools/perf/builtin-kvm.c                      |  12 +-
 tools/perf/builtin-kwork.c                    |   1 +
 tools/perf/builtin-record.c                   |   2 +
 tools/perf/builtin-report.c                   |   9 +-
 tools/perf/builtin-script.c                   |  19 +++-
 tools/perf/builtin-timechart.c                |   1 +
 tools/perf/builtin-trace.c                    |   5 +-
 tools/perf/builtin-version.c                  |   1 +
 tools/perf/perf.c                             |  24 +++-
 .../perf/scripts/python/Perf-Trace-Util/Build |   2 +-
 tools/perf/tests/Build                        |  12 +-
 tools/perf/tests/builtin-test.c               |   6 +
 tools/perf/tests/parse-events.c               |  23 +++-
 tools/perf/util/Build                         |  21 ++--
 tools/perf/util/data-convert-bt.c             |   5 +-
 tools/perf/util/data-convert-json.c           |   9 +-
 tools/perf/util/evlist.c                      |   6 +-
 tools/perf/util/evlist.h                      |   4 +
 tools/perf/util/evsel.c                       |  11 +-
 tools/perf/util/evsel.h                       |  12 +-
 tools/perf/util/evsel_fprintf.c               |   7 +-
 tools/perf/util/header.c                      |  19 ++++
 tools/perf/util/header.h                      |   2 +
 tools/perf/util/intel-pt.c                    |   7 +-
 tools/perf/util/parse-events.c                |  15 +++
 tools/perf/util/parse-events.h                |   1 -
 tools/perf/util/python.c                      |  10 ++
 tools/perf/util/scripting-engines/Build       |   6 +-
 .../util/scripting-engines/trace-event-perl.c |   1 +
 .../scripting-engines/trace-event-python.c    |   1 +
 tools/perf/util/session.c                     |   2 +
 tools/perf/util/session.h                     |   2 +
 tools/perf/util/setup.py                      |  10 +-
 tools/perf/util/sort.c                        |  60 ++++++++--
 tools/perf/util/synthetic-events.c            |   6 +
 tools/perf/util/trace-event-parse.c           |   2 +
 tools/perf/util/trace-event-read.c            |   1 +
 tools/perf/util/trace-event-scripting.c       |   1 +
 tools/perf/util/trace-event.c                 |   1 -
 tools/perf/util/trace-event.h                 |  11 +-
 52 files changed, 355 insertions(+), 184 deletions(-)

diff --git a/tools/perf/Build b/tools/perf/Build
index 496b096153bb1..6dd67e5022955 100644
--- a/tools/perf/Build
+++ b/tools/perf/Build
@@ -5,7 +5,6 @@ perf-y += builtin-diff.o
 perf-y += builtin-evlist.o
 perf-y += builtin-ftrace.o
 perf-y += builtin-help.o
-perf-y += builtin-sched.o
 perf-y += builtin-buildid-list.o
 perf-y += builtin-buildid-cache.o
 perf-y += builtin-kallsyms.o
@@ -13,11 +12,8 @@ perf-y += builtin-list.o
 perf-y += builtin-record.o
 perf-y += builtin-report.o
 perf-y += builtin-stat.o
-perf-y += builtin-timechart.o
 perf-y += builtin-top.o
 perf-y += builtin-script.o
-perf-y += builtin-kmem.o
-perf-y += builtin-lock.o
 perf-y += builtin-kvm.o
 perf-y += builtin-inject.o
 perf-y += builtin-mem.o
@@ -25,9 +21,18 @@ perf-y += builtin-data.o
 perf-y += builtin-version.o
 perf-y += builtin-c2c.o
 perf-y += builtin-daemon.o
-perf-y += builtin-kwork.o
 
-perf-$(CONFIG_TRACE) += builtin-trace.o
+perf-$(CONFIG_LIBTRACEEVENT) += builtin-kmem.o
+perf-$(CONFIG_LIBTRACEEVENT) += builtin-kwork.o
+perf-$(CONFIG_LIBTRACEEVENT) += builtin-lock.o
+perf-$(CONFIG_LIBTRACEEVENT) += builtin-sched.o
+perf-$(CONFIG_LIBTRACEEVENT) += builtin-timechart.o
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+  perf-$(CONFIG_TRACE) += builtin-trace.o
+  perf-$(CONFIG_TRACE) += trace/beauty/
+endif
+
 perf-$(CONFIG_LIBELF) += builtin-probe.o
 
 perf-y += bench/
@@ -51,7 +56,6 @@ CFLAGS_builtin-report.o	   += -DDOCDIR="BUILD_STR($(srcdir_SQ)/Documentation)"
 perf-y += util/
 perf-y += arch/
 perf-y += ui/
-perf-y += scripts/
-perf-$(CONFIG_TRACE) += trace/beauty/
+perf-$(CONFIG_LIBTRACEEVENT) += scripts/
 
 gtk-y += ui/gtk/
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 9cc3c48f32881..680228e19c1aa 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -763,18 +763,20 @@ ifndef NO_LIBUNWIND
   EXTLIBS += $(EXTLIBS_LIBUNWIND)
 endif
 
-ifeq ($(NO_SYSCALL_TABLE),0)
-  $(call detected,CONFIG_TRACE)
-else
-  ifndef NO_LIBAUDIT
-    $(call feature_check,libaudit)
-    ifneq ($(feature-libaudit), 1)
-      msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
-      NO_LIBAUDIT := 1
-    else
-      CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
-      EXTLIBS += -laudit
-      $(call detected,CONFIG_TRACE)
+ifneq ($(NO_LIBTRACEEVENT),1)
+  ifeq ($(NO_SYSCALL_TABLE),0)
+    $(call detected,CONFIG_TRACE)
+  else
+    ifndef NO_LIBAUDIT
+      $(call feature_check,libaudit)
+      ifneq ($(feature-libaudit), 1)
+        msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
+        NO_LIBAUDIT := 1
+      else
+        CFLAGS += -DHAVE_LIBAUDIT_SUPPORT
+        EXTLIBS += -laudit
+        $(call detected,CONFIG_TRACE)
+      endif
     endif
   endif
 endif
@@ -1182,9 +1184,11 @@ ifdef LIBPFM4
   endif
 endif
 
-ifdef LIBTRACEEVENT_DYNAMIC
+# libtraceevent is a recommended dependency picked up from the system.
+ifneq ($(NO_LIBTRACEEVENT),1)
   $(call feature_check,libtraceevent)
   ifeq ($(feature-libtraceevent), 1)
+    CFLAGS += -DHAVE_LIBTRACEEVENT
     EXTLIBS += -ltraceevent
     LIBTRACEEVENT_VERSION := $(shell $(PKG_CONFIG) --modversion libtraceevent)
     LIBTRACEEVENT_VERSION_1 := $(word 1, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
@@ -1192,12 +1196,11 @@ ifdef LIBTRACEEVENT_DYNAMIC
     LIBTRACEEVENT_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEEVENT_VERSION)))
     LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3))
     CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
+    $(call detected,CONFIG_LIBTRACEEVENT)
   else
-    dummy := $(error Error: No libtraceevent devel library found, please install libtraceevent-devel);
+    dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel)
   endif
-endif
 
-ifdef LIBTRACEFS_DYNAMIC
   $(call feature_check,libtracefs)
   ifeq ($(feature-libtracefs), 1)
     EXTLIBS += -ltracefs
@@ -1207,8 +1210,6 @@ ifdef LIBTRACEFS_DYNAMIC
     LIBTRACEFS_VERSION_3 := $(word 3, $(subst ., ,$(LIBTRACEFS_VERSION)))
     LIBTRACEFS_VERSION_CPP := $(shell expr $(LIBTRACEFS_VERSION_1) \* 255 \* 255 + $(LIBTRACEFS_VERSION_2) \* 255 + $(LIBTRACEFS_VERSION_3))
     CFLAGS += -DLIBTRACEFS_VERSION=$(LIBTRACEFS_VERSION_CPP)
-  else
-    dummy := $(error Error: No libtracefs devel library found, please install libtracefs-dev);
   endif
 endif
 
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 6689f644782f5..98f629bbd1aae 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -128,10 +128,6 @@ include ../scripts/utilities.mak
 #
 # Define BUILD_BPF_SKEL to enable BPF skeletons
 #
-# Define LIBTRACEEVENT_DYNAMIC to enable libtraceevent dynamic linking
-#
-# Define LIBTRACEFS_DYNAMIC to enable libtracefs dynamic linking
-#
 
 # As per kernel Makefile, avoid funny character set dependencies
 unexport LC_ALL
@@ -242,10 +238,6 @@ sub-make: fixdep
 else # force_fixdep
 
 LIBAPI_DIR      = $(srctree)/tools/lib/api/
-ifndef LIBTRACEEVENT_DYNAMIC
-LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/
-LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins
-endif
 LIBBPF_DIR      = $(srctree)/tools/lib/bpf/
 LIBSUBCMD_DIR   = $(srctree)/tools/lib/subcmd/
 LIBSYMBOL_DIR   = $(srctree)/tools/lib/symbol/
@@ -295,31 +287,6 @@ SCRIPT_SH += perf-iostat.sh
 grep-libs = $(filter -l%,$(1))
 strip-libs = $(filter-out -l%,$(1))
 
-ifndef LIBTRACEEVENT_DYNAMIC
-ifneq ($(OUTPUT),)
-  LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent
-else
-  LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent
-endif
-LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins
-LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT)
-LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT)
-LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include
-LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a
-export LIBTRACEEVENT
-LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list
-CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include
-#
-# The static build has no dynsym table, so this does not work for
-# static build. Looks like linker starts to scream about that now
-# (in Fedora 26) so we need to switch it off for static build.
-DYNAMIC_LIST_LDFLAGS               = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
-LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS))
-else
-LIBTRACEEVENT_DYNAMIC_LIST =
-LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS =
-endif
-
 ifneq ($(OUTPUT),)
   LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi
 else
@@ -380,13 +347,14 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
 
 python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf*.so
 
-PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-ifndef LIBTRACEEVENT_DYNAMIC
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+  PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 else
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI)
+  PYTHON_EXT_SRCS := $(shell grep -v '^\#\|util/trace-event.c' util/python-ext-sources)
 endif
 
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBAPI)
+
 SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
 
 PROGRAMS += $(OUTPUT)perf
@@ -430,9 +398,6 @@ ifndef NO_LIBBPF
     PERFLIBS += $(LIBBPF)
   endif
 endif
-ifndef LIBTRACEEVENT_DYNAMIC
-  PERFLIBS += $(LIBTRACEEVENT)
-endif
 
 # We choose to avoid "if .. else if .. else .. endif endif"
 # because maintaining the nesting to match is a pain.  If
@@ -682,9 +647,9 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 # Create python binding output directory if not already present
 _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) $(LIBPERF)
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF)
 	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
-        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+        CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS)' \
 	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	cp $(PYTHON_EXTBUILD_LIB)perf*.so $(OUTPUT)python/
@@ -710,8 +675,8 @@ $(PERF_IN): prepare FORCE
 $(PMU_EVENTS_IN): FORCE prepare
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events
 
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN)
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \
 		$(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE prepare
@@ -797,10 +762,6 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc
 	$(LIBSYMBOL) \
 	bpf-skel
 
-ifndef LIBTRACEEVENT_DYNAMIC
-prepare: $(LIBTRACEEVENT)
-endif
-
 $(OUTPUT)%.o: %.c prepare FORCE
 	$(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@
 
@@ -856,38 +817,6 @@ endif
 
 $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h)
 
-ifndef LIBTRACEEVENT_DYNAMIC
-LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))'
-
-$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT)
-	$(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \
-		DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \
-		$@ install_headers
-
-$(LIBTRACEEVENT)-clean:
-	$(call QUIET_CLEAN, libtraceevent)
-	$(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT)
-
-libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT)
-	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
-		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
-		plugins
-
-libtraceevent_plugins-clean:
-	$(call QUIET_CLEAN, libtraceevent_plugins)
-	$(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT)
-
-$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
-	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
-		DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \
-		$(LIBTRACEEVENT_FLAGS) $@
-
-install-traceevent-plugins: libtraceevent_plugins
-	$(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \
-		DESTDIR=$(DESTDIR_SQ)$(prefix) prefix= \
-		$(LIBTRACEEVENT_FLAGS) install
-endif
-
 $(LIBAPI): FORCE | $(LIBAPI_OUTPUT)
 	$(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \
 		DESTDIR=$(LIBAPI_DESTDIR) prefix= \
@@ -1095,10 +1024,6 @@ install-tests: all install-gtk
 
 install-bin: install-tools install-tests
 
-ifndef LIBTRACEEVENT_DYNAMIC
-install-bin: install-traceevent-plugins
-endif
-
 install: install-bin try-install-man
 
 install-python_ext:
@@ -1124,11 +1049,6 @@ SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 $(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT):
 	$(Q)$(MKDIR) -p $@
 
-ifndef LIBTRACEEVENT_DYNAMIC
-$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT):
-	$(Q)$(MKDIR) -p $@
-endif
-
 ifdef BUILD_BPF_SKEL
 BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool
 BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE)
@@ -1211,10 +1131,6 @@ clean:: $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(
 	$(call QUIET_CLEAN, Documentation) \
 	$(MAKE) -C $(DOC_DIR) O=$(OUTPUT) clean >/dev/null
 
-ifndef LIBTRACEEVENT_DYNAMIC
-clean:: $(LIBTRACEEVENT)-clean libtraceevent_plugins-clean
-endif
-
 #
 # To provide FEATURE-DUMP into $(FEATURE_DUMP_COPY)
 # file if defined, with no further action.
@@ -1232,6 +1148,6 @@ FORCE:
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope FORCE prepare
-.PHONY: libtraceevent_plugins archheaders
+.PHONY: archheaders
 
 endif # force_fixdep
diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build
index 337aa9bdf905d..78ef7115be3d9 100644
--- a/tools/perf/arch/arm64/util/Build
+++ b/tools/perf/arch/arm64/util/Build
@@ -3,7 +3,7 @@ perf-y += machine.o
 perf-y += perf_regs.o
 perf-y += tsc.o
 perf-y += pmu.o
-perf-y += kvm-stat.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
 perf-$(CONFIG_DWARF)     += dwarf-regs.o
 perf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o
 perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 0115f31665684..9889245c555c4 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,5 +1,5 @@
 perf-y += header.o
-perf-y += kvm-stat.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += mem-events.o
 perf-y += sym-handling.o
diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build
index 3d9d0f4f72ca1..db68840869979 100644
--- a/tools/perf/arch/s390/util/Build
+++ b/tools/perf/arch/s390/util/Build
@@ -1,5 +1,5 @@
 perf-y += header.o
-perf-y += kvm-stat.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
 perf-y += perf_regs.o
 
 perf-$(CONFIG_DWARF) += dwarf-regs.o
diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
index dbeb04cb336e0..195ccfdef7aa1 100644
--- a/tools/perf/arch/x86/util/Build
+++ b/tools/perf/arch/x86/util/Build
@@ -1,7 +1,7 @@
 perf-y += header.o
 perf-y += tsc.o
 perf-y += pmu.o
-perf-y += kvm-stat.o
+perf-$(CONFIG_LIBTRACEEVENT) += kvm-stat.o
 perf-y += perf_regs.o
 perf-y += topdown.o
 perf-y += machine.o
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index af102f471e9f4..1e39a034cee98 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -418,6 +418,7 @@ static int intel_pt_info_fill(struct auxtrace_record *itr,
 	return 0;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int intel_pt_track_switches(struct evlist *evlist)
 {
 	const char *sched_switch = "sched:sched_switch";
@@ -439,6 +440,7 @@ static int intel_pt_track_switches(struct evlist *evlist)
 
 	return 0;
 }
+#endif
 
 static void intel_pt_valid_str(char *str, size_t len, u64 valid)
 {
@@ -829,6 +831,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 					ptr->have_sched_switch = 2;
 			}
 		} else {
+#ifdef HAVE_LIBTRACEEVENT
 			err = intel_pt_track_switches(evlist);
 			if (err == -EPERM)
 				pr_debug2("Unable to select sched:sched_switch\n");
@@ -836,6 +839,7 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
 				return err;
 			else
 				ptr->have_sched_switch = 1;
+#endif
 		}
 	}
 
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 517d928c00e3f..90458ca6933fd 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -499,7 +499,9 @@ int cmd_annotate(int argc, const char **argv)
 			.namespaces = perf_event__process_namespaces,
 			.attr	= perf_event__process_attr,
 			.build_id = perf_event__process_build_id,
+#ifdef HAVE_LIBTRACEEVENT
 			.tracing_data   = perf_event__process_tracing_data,
+#endif
 			.id_index	= perf_event__process_id_index,
 			.auxtrace_info	= perf_event__process_auxtrace_info,
 			.auxtrace	= perf_event__process_auxtrace,
diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c
index c22d82d2a73c4..b2a9a3b7f68d7 100644
--- a/tools/perf/builtin-data.c
+++ b/tools/perf/builtin-data.c
@@ -78,12 +78,13 @@ static int cmd_data_convert(int argc, const char **argv)
 		return bt_convert__perf2json(input_name, to_json, &opts);
 
 	if (to_ctf) {
-#ifdef HAVE_LIBBABELTRACE_SUPPORT
+#if defined(HAVE_LIBBABELTRACE_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 		return bt_convert__perf2ctf(input_name, to_ctf, &opts);
 #else
 		pr_err("The libbabeltrace support is not compiled in. perf should be "
 		       "compiled with environment variables LIBBABELTRACE=1 and "
-		       "LIBBABELTRACE_DIR=/path/to/libbabeltrace/\n");
+		       "LIBBABELTRACE_DIR=/path/to/libbabeltrace/.\n"
+		       "Check also if libbtraceevent devel files are available.\n");
 		return -1;
 #endif
 	}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e254f18986f7c..3f4e4dd5abf31 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -607,6 +607,7 @@ static int perf_event__repipe_exit(struct perf_tool *tool,
 	return err;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int perf_event__repipe_tracing_data(struct perf_session *session,
 					   union perf_event *event)
 {
@@ -614,6 +615,7 @@ static int perf_event__repipe_tracing_data(struct perf_session *session,
 
 	return perf_event__process_tracing_data(session, event);
 }
+#endif
 
 static int dso__read_build_id(struct dso *dso)
 {
@@ -807,6 +809,7 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
 	return 0;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int perf_inject__sched_stat(struct perf_tool *tool,
 				   union perf_event *event __maybe_unused,
 				   struct perf_sample *sample,
@@ -836,6 +839,7 @@ found:
 	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
 	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
 }
+#endif
 
 static struct guest_vcpu *guest_session__vcpu(struct guest_session *gs, u32 vcpu)
 {
@@ -1961,7 +1965,9 @@ static int __cmd_inject(struct perf_inject *inject)
 		inject->tool.mmap	  = perf_event__repipe_mmap;
 		inject->tool.mmap2	  = perf_event__repipe_mmap2;
 		inject->tool.fork	  = perf_event__repipe_fork;
+#ifdef HAVE_LIBTRACEEVENT
 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
+#endif
 	}
 
 	output_data_offset = perf_session__data_offset(session->evlist);
@@ -1984,8 +1990,10 @@ static int __cmd_inject(struct perf_inject *inject)
 				evsel->handler = perf_inject__sched_switch;
 			} else if (!strcmp(name, "sched:sched_process_exit"))
 				evsel->handler = perf_inject__sched_process_exit;
+#ifdef HAVE_LIBTRACEEVENT
 			else if (!strncmp(name, "sched:sched_stat_", 17))
 				evsel->handler = perf_inject__sched_stat;
+#endif
 		}
 	} else if (inject->itrace_synth_opts.vm_time_correlation) {
 		session->itrace_synth_opts = &inject->itrace_synth_opts;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index ebfab2ca17024..e20656c431a4b 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -35,6 +35,7 @@
 #include <regex.h>
 
 #include <linux/ctype.h>
+#include <traceevent/event-parse.h>
 
 static int	kmem_slab;
 static int	kmem_page;
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 7d9ec1bac1a25..641e739c717ce 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -63,7 +63,7 @@ static const char *get_filename_for_perf_kvm(void)
 	return filename;
 }
 
-#ifdef HAVE_KVM_STAT_SUPPORT
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 
 void exit_event_get_key(struct evsel *evsel,
 			struct perf_sample *sample,
@@ -654,7 +654,7 @@ static void print_result(struct perf_kvm_stat *kvm)
 		pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
 }
 
-#ifdef HAVE_TIMERFD_SUPPORT
+#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 static int process_lost_event(struct perf_tool *tool,
 			      union perf_event *event __maybe_unused,
 			      struct perf_sample *sample __maybe_unused,
@@ -742,7 +742,7 @@ static bool verify_vcpu(int vcpu)
 	return true;
 }
 
-#ifdef HAVE_TIMERFD_SUPPORT
+#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 /* keeping the max events to a modest level to keep
  * the processing of samples per mmap smooth.
  */
@@ -1290,7 +1290,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
 	return kvm_events_report_vcpu(kvm);
 }
 
-#ifdef HAVE_TIMERFD_SUPPORT
+#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 static struct evlist *kvm_live_event_list(void)
 {
 	struct evlist *evlist;
@@ -1507,7 +1507,7 @@ static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
 	if (strlen(argv[1]) > 2 && strstarts("report", argv[1]))
 		return kvm_events_report(&kvm, argc - 1 , argv + 1);
 
-#ifdef HAVE_TIMERFD_SUPPORT
+#if defined(HAVE_TIMERFD_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 	if (!strncmp(argv[1], "live", 4))
 		return kvm_events_live(&kvm, argc - 1 , argv + 1);
 #endif
@@ -1644,7 +1644,7 @@ int cmd_kvm(int argc, const char **argv)
 		return cmd_top(argc, argv);
 	else if (strlen(argv[0]) > 2 && strstarts("buildid-list", argv[0]))
 		return __cmd_buildid_list(file_name, argc, argv);
-#ifdef HAVE_KVM_STAT_SUPPORT
+#if defined(HAVE_KVM_STAT_SUPPORT) && defined(HAVE_LIBTRACEEVENT)
 	else if (strlen(argv[0]) > 2 && strstarts("stat", argv[0]))
 		return kvm_cmd_stat(file_name, argc, argv);
 #endif
diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c
index 0e02b80986440..dc59d75180d14 100644
--- a/tools/perf/builtin-kwork.c
+++ b/tools/perf/builtin-kwork.c
@@ -23,6 +23,7 @@
 
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
+#include <traceevent/event-parse.h>
 
 #include <errno.h>
 #include <inttypes.h>
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index b7fd7ec586fbc..7e17374f6c1a2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1701,8 +1701,10 @@ static void record__init_features(struct record *rec)
 	if (rec->no_buildid)
 		perf_header__clear_feat(&session->header, HEADER_BUILD_ID);
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (!have_tracepoints(&rec->evlist->core.entries))
 		perf_header__clear_feat(&session->header, HEADER_TRACING_DATA);
+#endif
 
 	if (!rec->opts.branch_stack)
 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index b6d77d3da64f6..2ee2ecca208e1 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -67,6 +67,10 @@
 #include <unistd.h>
 #include <linux/mman.h>
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 struct report {
 	struct perf_tool	tool;
 	struct perf_session	*session;
@@ -1199,7 +1203,9 @@ int cmd_report(int argc, const char **argv)
 			.lost		 = perf_event__process_lost,
 			.read		 = process_read_event,
 			.attr		 = process_attr,
+#ifdef HAVE_LIBTRACEEVENT
 			.tracing_data	 = perf_event__process_tracing_data,
+#endif
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
 			.auxtrace_info	 = perf_event__process_auxtrace_info,
@@ -1660,6 +1666,7 @@ repeat:
 						  report.range_num);
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (session->tevent.pevent &&
 	    tep_set_function_resolver(session->tevent.pevent,
 				      machine__resolve_kernel_addr,
@@ -1668,7 +1675,7 @@ repeat:
 		       __func__);
 		return -1;
 	}
-
+#endif
 	sort__setup_elide(stdout);
 
 	ret = __cmd_report(&report);
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index d7ec8c1af2936..88888fb885c82 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -62,6 +62,9 @@
 #include "perf.h"
 
 #include <linux/ctype.h>
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
 
 static char const		*script_name;
 static char const		*generate_script_lang;
@@ -2154,12 +2157,12 @@ static void process_event(struct perf_script *script,
 		perf_sample__fprintf_bts(sample, evsel, thread, al, addr_al, machine, fp);
 		return;
 	}
-
+#ifdef HAVE_LIBTRACEEVENT
 	if (PRINT_FIELD(TRACE) && sample->raw_data) {
 		event_format__fprintf(evsel->tp_format, sample->cpu,
 				      sample->raw_data, sample->raw_size, fp);
 	}
-
+#endif
 	if (attr->type == PERF_TYPE_SYNTH && PRINT_FIELD(SYNTH))
 		perf_sample__fprintf_synth(sample, evsel, fp);
 
@@ -2283,8 +2286,10 @@ static void process_stat_interval(u64 tstamp)
 
 static void setup_scripting(void)
 {
+#ifdef HAVE_LIBTRACEEVENT
 	setup_perl_scripting();
 	setup_python_scripting();
+#endif
 }
 
 static int flush_scripting(void)
@@ -3784,7 +3789,9 @@ int cmd_script(int argc, const char **argv)
 			.fork		 = perf_event__process_fork,
 			.attr		 = process_attr,
 			.event_update   = perf_event__process_event_update,
+#ifdef HAVE_LIBTRACEEVENT
 			.tracing_data	 = perf_event__process_tracing_data,
+#endif
 			.feature	 = process_feature_event,
 			.build_id	 = perf_event__process_build_id,
 			.id_index	 = perf_event__process_id_index,
@@ -4215,6 +4222,7 @@ script_found:
 	else
 		symbol_conf.use_callchain = false;
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (session->tevent.pevent &&
 	    tep_set_function_resolver(session->tevent.pevent,
 				      machine__resolve_kernel_addr,
@@ -4223,7 +4231,7 @@ script_found:
 		err = -1;
 		goto out_delete;
 	}
-
+#endif
 	if (generate_script_lang) {
 		struct stat perf_stat;
 		int input;
@@ -4259,9 +4267,12 @@ script_found:
 			err = -ENOENT;
 			goto out_delete;
 		}
-
+#ifdef HAVE_LIBTRACEEVENT
 		err = scripting_ops->generate_script(session->tevent.pevent,
 						     "perf-script");
+#else
+		err = scripting_ops->generate_script(NULL, "perf-script");
+#endif
 		goto out_delete;
 	}
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index c36296bb7637e..6c629e7d370ad 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -38,6 +38,7 @@
 #include "util/string2.h"
 #include "util/tracepoint.h"
 #include <linux/err.h>
+#include <traceevent/event-parse.h>
 
 #ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
 FILE *open_memstream(char **ptr, size_t *sizeloc);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 543c379d2a57a..6909cd9f48d11 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -15,7 +15,6 @@
  */
 
 #include "util/record.h"
-#include <traceevent/event-parse.h>
 #include <api/fs/tracing_path.h>
 #include <bpf/bpf.h>
 #include "util/bpf_map.h"
@@ -80,6 +79,10 @@
 #include <linux/ctype.h>
 #include <perf/mmap.h>
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 #ifndef O_CLOEXEC
 # define O_CLOEXEC		02000000
 #endif
diff --git a/tools/perf/builtin-version.c b/tools/perf/builtin-version.c
index a71f491224da1..a886929ec6e5f 100644
--- a/tools/perf/builtin-version.c
+++ b/tools/perf/builtin-version.c
@@ -82,6 +82,7 @@ static void library_status(void)
 	STATUS(HAVE_AIO_SUPPORT, aio);
 	STATUS(HAVE_ZSTD_SUPPORT, zstd);
 	STATUS(HAVE_LIBPFM, libpfm4);
+	STATUS(HAVE_LIBTRACEEVENT, libtraceevent);
 }
 
 int cmd_version(int argc, const char **argv)
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 7af135dea1cd8..82bbe0ca858bd 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -70,20 +70,26 @@ static struct cmd_struct commands[] = {
 	{ "report",	cmd_report,	0 },
 	{ "bench",	cmd_bench,	0 },
 	{ "stat",	cmd_stat,	0 },
+#ifdef HAVE_LIBTRACEEVENT
 	{ "timechart",	cmd_timechart,	0 },
+#endif
 	{ "top",	cmd_top,	0 },
 	{ "annotate",	cmd_annotate,	0 },
 	{ "version",	cmd_version,	0 },
 	{ "script",	cmd_script,	0 },
+#ifdef HAVE_LIBTRACEEVENT
 	{ "sched",	cmd_sched,	0 },
+#endif
 #ifdef HAVE_LIBELF_SUPPORT
 	{ "probe",	cmd_probe,	0 },
 #endif
+#ifdef HAVE_LIBTRACEEVENT
 	{ "kmem",	cmd_kmem,	0 },
 	{ "lock",	cmd_lock,	0 },
+#endif
 	{ "kvm",	cmd_kvm,	0 },
 	{ "test",	cmd_test,	0 },
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
+#if defined(HAVE_LIBTRACEEVENT) && (defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT))
 	{ "trace",	cmd_trace,	0 },
 #endif
 	{ "inject",	cmd_inject,	0 },
@@ -91,7 +97,9 @@ static struct cmd_struct commands[] = {
 	{ "data",	cmd_data,	0 },
 	{ "ftrace",	cmd_ftrace,	0 },
 	{ "daemon",	cmd_daemon,	0 },
+#ifdef HAVE_LIBTRACEEVENT
 	{ "kwork",	cmd_kwork,	0 },
+#endif
 };
 
 struct pager_config {
@@ -500,14 +508,18 @@ int main(int argc, const char **argv)
 		argv[0] = cmd;
 	}
 	if (strstarts(cmd, "trace")) {
-#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE_SUPPORT)
-		setup_path();
-		argv[0] = "trace";
-		return cmd_trace(argc, argv);
-#else
+#ifndef HAVE_LIBTRACEEVENT
+		fprintf(stderr,
+			"trace command not available: missing libtraceevent devel package at build time.\n");
+		goto out;
+#elif !defined(HAVE_LIBAUDIT_SUPPORT) && !defined(HAVE_SYSCALL_TABLE_SUPPORT)
 		fprintf(stderr,
 			"trace command not available: missing audit-libs devel package at build time.\n");
 		goto out;
+#else
+		setup_path();
+		argv[0] = "trace";
+		return cmd_trace(argc, argv);
 #endif
 	}
 	/* Look for flags.. */
diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build
index 7d0e33ce6aba4..d5fed4e426179 100644
--- a/tools/perf/scripts/python/Perf-Trace-Util/Build
+++ b/tools/perf/scripts/python/Perf-Trace-Util/Build
@@ -1,3 +1,3 @@
-perf-y += Context.o
+perf-$(CONFIG_LIBTRACEEVENT) += Context.o
 
 CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 658b5052c24d2..90fd1eb317bb4 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -6,13 +6,13 @@ perf-y += parse-events.o
 perf-y += dso-data.o
 perf-y += attr.o
 perf-y += vmlinux-kallsyms.o
-perf-y += openat-syscall.o
-perf-y += openat-syscall-all-cpus.o
-perf-y += openat-syscall-tp-fields.o
-perf-y += mmap-basic.o
+perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall.o
+perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-all-cpus.o
+perf-$(CONFIG_LIBTRACEEVENT) += openat-syscall-tp-fields.o
+perf-$(CONFIG_LIBTRACEEVENT) += mmap-basic.o
 perf-y += perf-record.o
 perf-y += evsel-roundtrip-name.o
-perf-y += evsel-tp-sched.o
+perf-$(CONFIG_LIBTRACEEVENT) += evsel-tp-sched.o
 perf-y += fdarray.o
 perf-y += pmu.o
 perf-y += pmu-events.o
@@ -30,7 +30,7 @@ perf-y += task-exit.o
 perf-y += sw-clock.o
 perf-y += mmap-thread-lookup.o
 perf-y += thread-maps-share.o
-perf-y += switch-tracking.o
+perf-$(CONFIG_LIBTRACEEVENT) += switch-tracking.o
 perf-y += keep-tracking.o
 perf-y += code-reading.o
 perf-y += sample-parsing.o
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index ddd8262bfa264..f6c16ad8ed506 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -38,9 +38,11 @@ struct test_suite *__weak arch_tests[] = {
 
 static struct test_suite *generic_tests[] = {
 	&suite__vmlinux_matches_kallsyms,
+#ifdef HAVE_LIBTRACEEVENT
 	&suite__openat_syscall_event,
 	&suite__openat_syscall_event_on_all_cpus,
 	&suite__basic_mmap,
+#endif
 	&suite__mem,
 	&suite__parse_events,
 	&suite__expr,
@@ -51,8 +53,10 @@ static struct test_suite *generic_tests[] = {
 	&suite__dso_data_cache,
 	&suite__dso_data_reopen,
 	&suite__perf_evsel__roundtrip_name_test,
+#ifdef HAVE_LIBTRACEEVENT
 	&suite__perf_evsel__tp_sched_test,
 	&suite__syscall_openat_tp_fields,
+#endif
 	&suite__attr,
 	&suite__hists_link,
 	&suite__python_use,
@@ -71,7 +75,9 @@ static struct test_suite *generic_tests[] = {
 	&suite__thread_maps_share,
 	&suite__hists_output,
 	&suite__hists_cumulate,
+#ifdef HAVE_LIBTRACEEVENT
 	&suite__switch_tracking,
+#endif
 	&suite__fdarray__filter,
 	&suite__fdarray__add,
 	&suite__kmod_path__parse,
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 3440dd2616b06..71a5cb3433117 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -20,6 +20,8 @@
 #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \
 			     PERF_SAMPLE_CPU | PERF_SAMPLE_PERIOD)
 
+#ifdef HAVE_LIBTRACEEVENT
+
 #if defined(__s390x__)
 /* Return true if kvm module is available and loaded. Test this
  * and return success when trace point kvm_s390_create_vm
@@ -76,6 +78,7 @@ static int test__checkevent_tracepoint_multi(struct evlist *evlist)
 	}
 	return TEST_OK;
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 static int test__checkevent_raw(struct evlist *evlist)
 {
@@ -222,6 +225,7 @@ static int test__checkevent_breakpoint_rw(struct evlist *evlist)
 	return TEST_OK;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int test__checkevent_tracepoint_modifier(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
@@ -252,6 +256,7 @@ test__checkevent_tracepoint_multi_modifier(struct evlist *evlist)
 
 	return test__checkevent_tracepoint_multi(evlist);
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 static int test__checkevent_raw_modifier(struct evlist *evlist)
 {
@@ -453,6 +458,7 @@ static int test__checkevent_pmu(struct evlist *evlist)
 	return TEST_OK;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int test__checkevent_list(struct evlist *evlist)
 {
 	struct evsel *evsel = evlist__first(evlist);
@@ -491,6 +497,7 @@ static int test__checkevent_list(struct evlist *evlist)
 
 	return TEST_OK;
 }
+#endif
 
 static int test__checkevent_pmu_name(struct evlist *evlist)
 {
@@ -762,6 +769,7 @@ static int test__group2(struct evlist *evlist)
 	return TEST_OK;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int test__group3(struct evlist *evlist __maybe_unused)
 {
 	struct evsel *evsel, *leader;
@@ -853,6 +861,7 @@ static int test__group3(struct evlist *evlist __maybe_unused)
 
 	return TEST_OK;
 }
+#endif
 
 static int test__group4(struct evlist *evlist __maybe_unused)
 {
@@ -1460,6 +1469,7 @@ static int test__sym_event_dc(struct evlist *evlist)
 	return TEST_OK;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int count_tracepoints(void)
 {
 	struct dirent *events_ent;
@@ -1513,6 +1523,7 @@ static int test__all_tracepoints(struct evlist *evlist)
 
 	return test__checkevent_tracepoint_multi(evlist);
 }
+#endif /* HAVE_LIBTRACEVENT */
 
 static int test__hybrid_hw_event_with_pmu(struct evlist *evlist)
 {
@@ -1642,6 +1653,7 @@ struct evlist_test {
 };
 
 static const struct evlist_test test__events[] = {
+#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "syscalls:sys_enter_openat",
 		.check = test__checkevent_tracepoint,
@@ -1652,6 +1664,7 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_tracepoint_multi,
 		/* 1 */
 	},
+#endif
 	{
 		.name  = "r1a",
 		.check = test__checkevent_raw,
@@ -1702,6 +1715,7 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_w,
 		/* 1 */
 	},
+#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "syscalls:sys_enter_openat:k",
 		.check = test__checkevent_tracepoint_modifier,
@@ -1712,6 +1726,7 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_tracepoint_multi_modifier,
 		/* 3 */
 	},
+#endif
 	{
 		.name  = "r1a:kp",
 		.check = test__checkevent_raw_modifier,
@@ -1757,11 +1772,13 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_w_modifier,
 		/* 2 */
 	},
+#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "r1,syscalls:sys_enter_openat:k,1:1:hp",
 		.check = test__checkevent_list,
 		/* 3 */
 	},
+#endif
 	{
 		.name  = "instructions:G",
 		.check = test__checkevent_exclude_host_modifier,
@@ -1792,11 +1809,13 @@ static const struct evlist_test test__events[] = {
 		.check = test__group2,
 		/* 9 */
 	},
+#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
 		.check = test__group3,
 		/* 0 */
 	},
+#endif
 	{
 		.name  = "{cycles:u,instructions:kp}:p",
 		.check = test__group4,
@@ -1807,11 +1826,13 @@ static const struct evlist_test test__events[] = {
 		.check = test__group5,
 		/* 2 */
 	},
+#ifdef HAVE_LIBTRACEEVENT
 	{
 		.name  = "*:*",
 		.check = test__all_tracepoints,
 		/* 3 */
 	},
+#endif
 	{
 		.name  = "{cycles,cache-misses:G}:H",
 		.check = test__group_gh1,
@@ -1867,7 +1888,7 @@ static const struct evlist_test test__events[] = {
 		.check = test__checkevent_breakpoint_len_rw_modifier,
 		/* 4 */
 	},
-#if defined(__s390x__)
+#if defined(__s390x__) && defined(HAVE_LIBTRACEEVENT)
 	{
 		.name  = "kvm-s390:kvm_s390_create_vm",
 		.check = test__checkevent_tracepoint,
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index d04802bfa23fd..8345464d01303 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -69,7 +69,6 @@ perf-y += namespaces.o
 perf-y += comm.o
 perf-y += thread.o
 perf-y += thread_map.o
-perf-y += trace-event-parse.o
 perf-y += parse-events-flex.o
 perf-y += parse-events-bison.o
 perf-y += pmu.o
@@ -77,11 +76,12 @@ perf-y += pmus.o
 perf-y += pmu-flex.o
 perf-y += pmu-bison.o
 perf-y += pmu-hybrid.o
-perf-y += trace-event-read.o
-perf-y += trace-event-info.o
-perf-y += trace-event-scripting.o
-perf-y += trace-event.o
 perf-y += svghelper.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-info.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-scripting.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-parse.o
+perf-$(CONFIG_LIBTRACEEVENT) += trace-event-read.o
 perf-y += sort.o
 perf-y += hist.o
 perf-y += util.o
@@ -153,8 +153,12 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
-perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_lock_contention.o
+
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+  perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
+endif
+
 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
 perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
@@ -189,7 +193,10 @@ perf-$(CONFIG_LIBUNWIND)          += unwind-libunwind.o
 perf-$(CONFIG_LIBUNWIND_X86)      += libunwind/x86_32.o
 perf-$(CONFIG_LIBUNWIND_AARCH64)  += libunwind/arm64.o
 
-perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+  perf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o
+endif
+
 perf-y += data-convert-json.o
 
 perf-y += scripting-engines/
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index c65cdaf6975ed..8031b586e8138 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -19,7 +19,6 @@
 #include <babeltrace/ctf-writer/event-fields.h>
 #include <babeltrace/ctf-ir/utils.h>
 #include <babeltrace/ctf/events.h>
-#include <traceevent/event-parse.h>
 #include "asm/bug.h"
 #include "data-convert.h"
 #include "session.h"
@@ -36,6 +35,10 @@
 #include "clockid.h"
 #include "util/sample.h"
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 #define pr_N(n, fmt, ...) \
 	eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__)
 
diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c
index 57db59068cb68..ba9d93ce94638 100644
--- a/tools/perf/util/data-convert-json.c
+++ b/tools/perf/util/data-convert-json.c
@@ -27,6 +27,10 @@
 #include "util/thread.h"
 #include "util/tool.h"
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 struct convert_json {
 	struct perf_tool tool;
 	FILE *out;
@@ -217,6 +221,7 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 	output_json_format(out, false, 3, "]");
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (sample->raw_data) {
 		int i;
 		struct tep_format_field **fields;
@@ -236,7 +241,7 @@ static int process_sample_event(struct perf_tool *tool,
 			free(fields);
 		}
 	}
-
+#endif
 	output_json_format(out, false, 2, "}");
 	return 0;
 }
@@ -313,7 +318,9 @@ int bt_convert__perf2json(const char *input_name, const char *output_name,
 			.exit           = perf_event__process_exit,
 			.fork           = perf_event__process_fork,
 			.lost           = perf_event__process_lost,
+#ifdef HAVE_LIBTRACEEVENT
 			.tracing_data   = perf_event__process_tracing_data,
+#endif
 			.build_id       = perf_event__process_build_id,
 			.id_index       = perf_event__process_id_index,
 			.auxtrace_info  = perf_event__process_auxtrace_info,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index fbf3192bced97..590d4e77effcf 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -290,6 +290,7 @@ struct evsel *evlist__add_aux_dummy(struct evlist *evlist, bool system_wide)
 	return evsel;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
 {
 	struct evsel *evsel = evsel__newtp_idx("sched", "sched_switch", 0);
@@ -305,7 +306,8 @@ struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide)
 
 	evlist__add(evlist, evsel);
 	return evsel;
-};
+}
+#endif
 
 int evlist__add_attrs(struct evlist *evlist, struct perf_event_attr *attrs, size_t nr_attrs)
 {
@@ -376,6 +378,7 @@ struct evsel *evlist__find_tracepoint_by_name(struct evlist *evlist, const char
 	return NULL;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler)
 {
 	struct evsel *evsel = evsel__newtp(sys, name);
@@ -387,6 +390,7 @@ int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name,
 	evlist__add(evlist, evsel);
 	return 0;
 }
+#endif
 
 struct evlist_cpu_iterator evlist__cpu_begin(struct evlist *evlist, struct affinity *affinity)
 {
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 16734c6756b3c..e5b84ead566c9 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -127,7 +127,9 @@ static inline struct evsel *evlist__add_dummy_on_all_cpus(struct evlist *evlist)
 {
 	return evlist__add_aux_dummy(evlist, true);
 }
+#ifdef HAVE_LIBTRACEEVENT
 struct evsel *evlist__add_sched_switch(struct evlist *evlist, bool system_wide);
+#endif
 
 int evlist__add_sb_event(struct evlist *evlist, struct perf_event_attr *attr,
 			 evsel__sb_cb_t cb, void *data);
@@ -135,7 +137,9 @@ void evlist__set_cb(struct evlist *evlist, evsel__sb_cb_t cb, void *data);
 int evlist__start_sb_thread(struct evlist *evlist, struct target *target);
 void evlist__stop_sb_thread(struct evlist *evlist);
 
+#ifdef HAVE_LIBTRACEEVENT
 int evlist__add_newtp(struct evlist *evlist, const char *sys, const char *name, void *handler);
+#endif
 
 int __evlist__set_tracepoints_handlers(struct evlist *evlist,
 				       const struct evsel_str_handler *assocs,
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0f617359a82f2..ca911856c4b1d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -12,7 +12,6 @@
 #include <linux/bitops.h>
 #include <api/fs/fs.h>
 #include <api/fs/tracing_path.h>
-#include <traceevent/event-parse.h>
 #include <linux/hw_breakpoint.h>
 #include <linux/perf_event.h>
 #include <linux/compiler.h>
@@ -57,6 +56,10 @@
 
 #include <linux/ctype.h>
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 struct perf_missing_features perf_missing_features;
 
 static clockid_t clockid;
@@ -439,7 +442,9 @@ struct evsel *evsel__clone(struct evsel *orig)
 			goto out_err;
 	}
 	evsel->cgrp = cgroup__get(orig->cgrp);
+#ifdef HAVE_LIBTRACEEVENT
 	evsel->tp_format = orig->tp_format;
+#endif
 	evsel->handler = orig->handler;
 	evsel->core.leader = orig->core.leader;
 
@@ -479,6 +484,7 @@ out_err:
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
+#ifdef HAVE_LIBTRACEEVENT
 struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx)
 {
 	struct evsel *evsel = zalloc(perf_evsel__object.size);
@@ -516,6 +522,7 @@ out_free:
 out_err:
 	return ERR_PTR(err);
 }
+#endif
 
 const char *const evsel__hw_names[PERF_COUNT_HW_MAX] = {
 	"cycles",
@@ -2758,6 +2765,7 @@ u16 evsel__id_hdr_size(struct evsel *evsel)
 	return size;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 struct tep_format_field *evsel__field(struct evsel *evsel, const char *name)
 {
 	return tep_find_field(evsel->tp_format, name);
@@ -2831,6 +2839,7 @@ u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *n
 
 	return field ? format_field__intval(field, sample, evsel->needs_swap) : 0;
 }
+#endif
 
 bool evsel__fallback(struct evsel *evsel, int err, char *msg, size_t msgsize)
 {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index f3485799ddf9e..d572be41b9608 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -72,7 +72,9 @@ struct evsel {
 		char			*name;
 		char			*group_name;
 		const char		*pmu_name;
+#ifdef HAVE_LIBTRACEEVENT
 		struct tep_event	*tp_format;
+#endif
 		char			*filter;
 		unsigned long		max_events;
 		double			scale;
@@ -223,11 +225,14 @@ static inline struct evsel *evsel__new(struct perf_event_attr *attr)
 }
 
 struct evsel *evsel__clone(struct evsel *orig);
-struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
 
 int copy_config_terms(struct list_head *dst, struct list_head *src);
 void free_config_terms(struct list_head *config_terms);
 
+
+#ifdef HAVE_LIBTRACEEVENT
+struct evsel *evsel__newtp_idx(const char *sys, const char *name, int idx);
+
 /*
  * Returns pointer with encoded error via <linux/err.h> interface.
  */
@@ -235,10 +240,13 @@ static inline struct evsel *evsel__newtp(const char *sys, const char *name)
 {
 	return evsel__newtp_idx(sys, name, 0);
 }
+#endif
 
 struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config);
 
+#ifdef HAVE_LIBTRACEEVENT
 struct tep_event *event_format__new(const char *sys, const char *name);
+#endif
 
 void evsel__init(struct evsel *evsel, struct perf_event_attr *attr, int idx);
 void evsel__exit(struct evsel *evsel);
@@ -323,6 +331,7 @@ bool evsel__precise_ip_fallback(struct evsel *evsel);
 
 struct perf_sample;
 
+#ifdef HAVE_LIBTRACEEVENT
 void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char *name);
 u64 evsel__intval(struct evsel *evsel, struct perf_sample *sample, const char *name);
 
@@ -330,6 +339,7 @@ static inline char *evsel__strval(struct evsel *evsel, struct perf_sample *sampl
 {
 	return evsel__rawptr(evsel, sample, name);
 }
+#endif
 
 struct tep_format_field;
 
diff --git a/tools/perf/util/evsel_fprintf.c b/tools/perf/util/evsel_fprintf.c
index 8c2ea80013291..bd22c4932d10e 100644
--- a/tools/perf/util/evsel_fprintf.c
+++ b/tools/perf/util/evsel_fprintf.c
@@ -2,7 +2,6 @@
 #include <inttypes.h>
 #include <stdio.h>
 #include <stdbool.h>
-#include <traceevent/event-parse.h>
 #include "evsel.h"
 #include "util/evsel_fprintf.h"
 #include "util/event.h"
@@ -13,6 +12,10 @@
 #include "srcline.h"
 #include "dso.h"
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...)
 {
 	va_list args;
@@ -74,6 +77,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
 					 term, (u64)evsel->core.attr.sample_freq);
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (details->trace_fields) {
 		struct tep_format_field *field;
 
@@ -96,6 +100,7 @@ int evsel__fprintf(struct evsel *evsel, struct perf_attr_details *details, FILE
 			field = field->next;
 		}
 	}
+#endif
 out:
 	fputc('\n', fp);
 	return ++printed;
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index dc2ae397d400e..404d816ca1242 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -6,6 +6,7 @@
 #include <sys/types.h>
 #include <byteswap.h>
 #include <unistd.h>
+#include <regex.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <linux/compiler.h>
@@ -55,6 +56,10 @@
 #include <linux/ctype.h>
 #include <internal/lib.h>
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 /*
  * magic2 = "PERFILE2"
  * must be a numerical value to let the endianness
@@ -298,6 +303,7 @@ static int do_read_bitmap(struct feat_fd *ff, unsigned long **pset, u64 *psize)
 	return 0;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int write_tracing_data(struct feat_fd *ff,
 			      struct evlist *evlist)
 {
@@ -306,6 +312,7 @@ static int write_tracing_data(struct feat_fd *ff,
 
 	return read_tracing_data(ff->fd, &evlist->core.entries);
 }
+#endif
 
 static int write_build_id(struct feat_fd *ff,
 			  struct evlist *evlist __maybe_unused)
@@ -2394,12 +2401,14 @@ FEAT_PROCESS_STR_FUN(arch, arch);
 FEAT_PROCESS_STR_FUN(cpudesc, cpu_desc);
 FEAT_PROCESS_STR_FUN(cpuid, cpuid);
 
+#ifdef HAVE_LIBTRACEEVENT
 static int process_tracing_data(struct feat_fd *ff, void *data)
 {
 	ssize_t ret = trace_report(ff->fd, data, false);
 
 	return ret < 0 ? -1 : 0;
 }
+#endif
 
 static int process_build_id(struct feat_fd *ff, void *data __maybe_unused)
 {
@@ -3366,7 +3375,9 @@ err:
 const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE];
 
 const struct perf_header_feature_ops feat_ops[HEADER_LAST_FEATURE] = {
+#ifdef HAVE_LIBTRACEEVENT
 	FEAT_OPN(TRACING_DATA,	tracing_data,	false),
+#endif
 	FEAT_OPN(BUILD_ID,	build_id,	false),
 	FEAT_OPR(HOSTNAME,	hostname,	false),
 	FEAT_OPR(OSRELEASE,	osrelease,	false),
@@ -4082,6 +4093,7 @@ static int read_attr(int fd, struct perf_header *ph,
 	return ret <= 0 ? -1 : 0;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int evsel__prepare_tracepoint_event(struct evsel *evsel, struct tep_handle *pevent)
 {
 	struct tep_event *event;
@@ -4125,6 +4137,7 @@ static int evlist__prepare_tracepoint_events(struct evlist *evlist, struct tep_h
 
 	return 0;
 }
+#endif
 
 int perf_session__read_header(struct perf_session *session, int repipe_fd)
 {
@@ -4230,11 +4243,15 @@ int perf_session__read_header(struct perf_session *session, int repipe_fd)
 		lseek(fd, tmp, SEEK_SET);
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	perf_header__process_sections(header, fd, &session->tevent,
 				      perf_file_section__process);
 
 	if (evlist__prepare_tracepoint_events(session->evlist, session->tevent.pevent))
 		goto out_delete_evlist;
+#else
+	perf_header__process_sections(header, fd, NULL, perf_file_section__process);
+#endif
 
 	return 0;
 out_errno:
@@ -4412,6 +4429,7 @@ int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 int perf_event__process_tracing_data(struct perf_session *session,
 				     union perf_event *event)
 {
@@ -4459,6 +4477,7 @@ int perf_event__process_tracing_data(struct perf_session *session,
 
 	return size_read + padding;
 }
+#endif
 
 int perf_event__process_build_id(struct perf_session *session,
 				 union perf_event *event)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 2d5e601ba60f0..e3861ae621723 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -160,8 +160,10 @@ int perf_event__process_event_update(struct perf_tool *tool,
 				     union perf_event *event,
 				     struct evlist **pevlist);
 size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
+#ifdef HAVE_LIBTRACEEVENT
 int perf_event__process_tracing_data(struct perf_session *session,
 				     union perf_event *event);
+#endif
 int perf_event__process_build_id(struct perf_session *session,
 				 union perf_event *event);
 bool is_perf_magic(u64 magic);
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index e3548ddef2545..6d3921627e332 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -3142,6 +3142,7 @@ static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
 	return 1;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int intel_pt_process_switch(struct intel_pt *pt,
 				   struct perf_sample *sample)
 {
@@ -3165,6 +3166,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
 
 	return machine__set_current_tid(pt->machine, cpu, -1, tid);
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 static int intel_pt_context_switch_in(struct intel_pt *pt,
 				      struct perf_sample *sample)
@@ -3433,9 +3435,12 @@ static int intel_pt_process_event(struct perf_session *session,
 			return err;
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (pt->switch_evsel && event->header.type == PERF_RECORD_SAMPLE)
 		err = intel_pt_process_switch(pt, sample);
-	else if (event->header.type == PERF_RECORD_ITRACE_START)
+	else
+#endif
+	if (event->header.type == PERF_RECORD_ITRACE_START)
 		err = intel_pt_process_itrace_start(pt, event, sample);
 	else if (event->header.type == PERF_RECORD_AUX_OUTPUT_HW_ID)
 		err = intel_pt_process_aux_output_hw_id(pt, event, sample);
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6502cd679f577..21cce83462b35 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -445,6 +445,7 @@ out_free_terms:
 	return ret;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static void tracepoint_error(struct parse_events_error *e, int err,
 			     const char *sys, const char *name)
 {
@@ -593,6 +594,7 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx,
 	closedir(events_dir);
 	return ret;
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 #ifdef HAVE_LIBBPF_SUPPORT
 struct __add_bpf_event_param {
@@ -1143,6 +1145,7 @@ static int config_term_pmu(struct perf_event_attr *attr,
 		return config_term_common(attr, term, err);
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int config_term_tracepoint(struct perf_event_attr *attr,
 				  struct parse_events_term *term,
 				  struct parse_events_error *err)
@@ -1170,6 +1173,7 @@ static int config_term_tracepoint(struct perf_event_attr *attr,
 
 	return 0;
 }
+#endif
 
 static int config_attr(struct perf_event_attr *attr,
 		       struct list_head *head,
@@ -1325,6 +1329,7 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				struct parse_events_error *err,
 				struct list_head *head_config)
 {
+#ifdef HAVE_LIBTRACEEVENT
 	if (head_config) {
 		struct perf_event_attr attr;
 
@@ -1339,6 +1344,16 @@ int parse_events_add_tracepoint(struct list_head *list, int *idx,
 	else
 		return add_tracepoint_event(list, idx, sys, event,
 					    err, head_config);
+#else
+	(void)list;
+	(void)idx;
+	(void)sys;
+	(void)event;
+	(void)head_config;
+	parse_events_error__handle(err, 0, strdup("unsupported tracepoint"),
+				strdup("libtraceevent is necessary for tracepoint support"));
+	return -1;
+#endif
 }
 
 int parse_events_add_numeric(struct parse_events_state *parse_state,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 07df7bb7b0420..428e72eaafcc7 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -18,7 +18,6 @@ struct parse_events_error;
 struct option;
 struct perf_pmu;
 
-bool have_tracepoints(struct list_head *evlist);
 bool is_event_supported(u8 type, u64 config);
 
 const char *event_type(int type);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index b5941c74a0d64..6fb84b7455b83 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -5,7 +5,9 @@
 #include <poll.h>
 #include <linux/err.h>
 #include <perf/cpumap.h>
+#ifdef HAVE_LIBTRACEEVENT
 #include <traceevent/event-parse.h>
+#endif
 #include <perf/mmap.h>
 #include "evlist.h"
 #include "callchain.h"
@@ -417,6 +419,7 @@ static PyObject *pyrf_sample_event__repr(struct pyrf_event *pevent)
 	return ret;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static bool is_tracepoint(struct pyrf_event *pevent)
 {
 	return pevent->evsel->core.attr.type == PERF_TYPE_TRACEPOINT;
@@ -486,14 +489,17 @@ get_tracepoint_field(struct pyrf_event *pevent, PyObject *attr_name)
 
 	return tracepoint_field(pevent, field);
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 static PyObject*
 pyrf_sample_event__getattro(struct pyrf_event *pevent, PyObject *attr_name)
 {
 	PyObject *obj = NULL;
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (is_tracepoint(pevent))
 		obj = get_tracepoint_field(pevent, attr_name);
+#endif
 
 	return obj ?: PyObject_GenericGetAttr((PyObject *) pevent, attr_name);
 }
@@ -1326,6 +1332,9 @@ static struct {
 static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 				  PyObject *args, PyObject *kwargs)
 {
+#ifndef HAVE_LIBTRACEEVENT
+	return NULL;
+#else
 	struct tep_event *tp_format;
 	static char *kwlist[] = { "sys", "name", NULL };
 	char *sys  = NULL;
@@ -1340,6 +1349,7 @@ static PyObject *pyrf__tracepoint(struct pyrf_evsel *pevsel,
 		return _PyLong_FromLong(-1);
 
 	return _PyLong_FromLong(tp_format->id);
+#endif // HAVE_LIBTRACEEVENT
 }
 
 static PyMethodDef perf__methods[] = {
diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build
index 0f5ba28339cf9..d47820c0b4d47 100644
--- a/tools/perf/util/scripting-engines/Build
+++ b/tools/perf/util/scripting-engines/Build
@@ -1,5 +1,7 @@
-perf-$(CONFIG_LIBPERL)   += trace-event-perl.o
-perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+ifeq ($(CONFIG_LIBTRACEEVENT),y)
+  perf-$(CONFIG_LIBPERL)   += trace-event-perl.o
+  perf-$(CONFIG_LIBPYTHON) += trace-event-python.o
+endif
 
 CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-nested-externs -Wno-undef -Wno-switch-default -Wno-bad-function-cast -Wno-declaration-after-statement -Wno-switch-enum
 
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 5b602b6d46854..0bacb49408f84 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -27,6 +27,7 @@
 #include <errno.h>
 #include <linux/bitmap.h>
 #include <linux/time64.h>
+#include <traceevent/event-parse.h>
 
 #include <stdbool.h>
 /* perl needs the following define, right after including stdbool.h */
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index d685a7399ee2e..fabba21919b8f 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -30,6 +30,7 @@
 #include <linux/bitmap.h>
 #include <linux/compiler.h>
 #include <linux/time64.h>
+#include <traceevent/event-parse.h>
 
 #include "../build-id.h"
 #include "../counts.h"
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1facd4616317d..7c021c6cedb95 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -314,7 +314,9 @@ void perf_session__delete(struct perf_session *session)
 			evlist__delete(session->evlist);
 		perf_data__close(session->data);
 	}
+#ifdef HAVE_LIBTRACEEVENT
 	trace_event__cleanup(&session->tevent);
+#endif
 	free(session);
 }
 
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index be5871ea558fe..ee3715e8563bf 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -33,7 +33,9 @@ struct perf_session {
 	struct auxtrace		*auxtrace;
 	struct itrace_synth_opts *itrace_synth_opts;
 	struct list_head	auxtrace_index;
+#ifdef HAVE_LIBTRACEEVENT
 	struct trace_event	tevent;
+#endif
 	struct perf_record_time_conv	time_conv;
 	bool			repipe;
 	bool			one_mmap;
diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index 43e7ca40b2ec4..e80ffbbfacfb8 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -63,12 +63,18 @@ libperf = getenv('LIBPERF')
 ext_sources = [f.strip() for f in open('util/python-ext-sources')
 				if len(f.strip()) > 0 and f[0] != '#']
 
+extra_libraries = []
+
+if '-DHAVE_LIBTRACEEVENT' in cflags:
+    extra_libraries += [ 'traceevent' ]
+else:
+    ext_sources.remove('util/trace-event.c')
+
 # use full paths with source files
 ext_sources = list(map(lambda x: '%s/%s' % (src_perf, x) , ext_sources))
 
-extra_libraries = []
 if '-DHAVE_LIBNUMA_SUPPORT' in cflags:
-    extra_libraries = [ 'numa' ]
+    extra_libraries += [ 'numa' ]
 if '-DHAVE_LIBCAP_SUPPORT' in cflags:
     extra_libraries += [ 'cap' ]
 
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 2e7330867e2ef..c7a97b33e1347 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -22,7 +22,6 @@
 #include "srcline.h"
 #include "strlist.h"
 #include "strbuf.h"
-#include <traceevent/event-parse.h>
 #include "mem-events.h"
 #include "annotate.h"
 #include "event.h"
@@ -32,6 +31,10 @@
 #include <linux/kernel.h>
 #include <linux/string.h>
 
+#ifdef HAVE_LIBTRACEEVENT
+#include <traceevent/event-parse.h>
+#endif
+
 regex_t		parent_regex;
 const char	default_parent_pattern[] = "^sys_|^do_page_fault";
 const char	*parent_pattern = default_parent_pattern;
@@ -743,6 +746,7 @@ struct sort_entry sort_time = {
 
 /* --sort trace */
 
+#ifdef HAVE_LIBTRACEEVENT
 static char *get_trace_output(struct hist_entry *he)
 {
 	struct trace_seq seq;
@@ -806,6 +810,7 @@ struct sort_entry sort_trace = {
 	.se_snprintf    = hist_entry__trace_snprintf,
 	.se_width_idx	= HISTC_TRACE,
 };
+#endif /* HAVE_LIBTRACEEVENT */
 
 /* sort keys for branch stacks */
 
@@ -2022,7 +2027,9 @@ static struct sort_dimension common_sort_dimensions[] = {
 	DIM(SORT_LOCAL_WEIGHT, "local_weight", sort_local_weight),
 	DIM(SORT_GLOBAL_WEIGHT, "weight", sort_global_weight),
 	DIM(SORT_TRANSACTION, "transaction", sort_transaction),
+#ifdef HAVE_LIBTRACEEVENT
 	DIM(SORT_TRACE, "trace", sort_trace),
+#endif
 	DIM(SORT_SYM_SIZE, "symbol_size", sort_sym_size),
 	DIM(SORT_DSO_SIZE, "dso_size", sort_dso_size),
 	DIM(SORT_CGROUP, "cgroup", sort_cgroup),
@@ -2206,7 +2213,14 @@ bool perf_hpp__is_ ## key ## _entry(struct perf_hpp_fmt *fmt)	\
 	return hse->se == &sort_ ## key ;			\
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 MK_SORT_ENTRY_CHK(trace)
+#else
+bool perf_hpp__is_trace_entry(struct perf_hpp_fmt *fmt __maybe_unused)
+{
+	return false;
+}
+#endif
 MK_SORT_ENTRY_CHK(srcline)
 MK_SORT_ENTRY_CHK(srcfile)
 MK_SORT_ENTRY_CHK(thread)
@@ -2347,6 +2361,17 @@ static int __sort_dimension__add_hpp_output(struct sort_dimension *sd,
 	return 0;
 }
 
+#ifndef HAVE_LIBTRACEEVENT
+bool perf_hpp__is_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused)
+{
+	return false;
+}
+bool perf_hpp__defined_dynamic_entry(struct perf_hpp_fmt *fmt __maybe_unused,
+				     struct hists *hists __maybe_unused)
+{
+	return false;
+}
+#else
 struct hpp_dynamic_entry {
 	struct perf_hpp_fmt hpp;
 	struct evsel *evsel;
@@ -2621,6 +2646,7 @@ __alloc_dynamic_entry(struct evsel *evsel, struct tep_format_field *field,
 
 	return hde;
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
 {
@@ -2633,6 +2659,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
 		new_hse = memdup(hse, sizeof(*hse));
 		if (new_hse)
 			new_fmt = &new_hse->hpp;
+#ifdef HAVE_LIBTRACEEVENT
 	} else if (perf_hpp__is_dynamic_entry(fmt)) {
 		struct hpp_dynamic_entry *hde, *new_hde;
 
@@ -2640,6 +2667,7 @@ struct perf_hpp_fmt *perf_hpp_fmt__dup(struct perf_hpp_fmt *fmt)
 		new_hde = memdup(hde, sizeof(*hde));
 		if (new_hde)
 			new_fmt = &new_hde->hpp;
+#endif
 	} else {
 		new_fmt = memdup(fmt, sizeof(*fmt));
 	}
@@ -2719,6 +2747,7 @@ static struct evsel *find_evsel(struct evlist *evlist, char *event_name)
 	return evsel;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 static int __dynamic_dimension__add(struct evsel *evsel,
 				    struct tep_format_field *field,
 				    bool raw_trace, int level)
@@ -2789,13 +2818,13 @@ static int add_all_matching_fields(struct evlist *evlist,
 	}
 	return ret;
 }
+#endif /* HAVE_LIBTRACEEVENT */
 
 static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 			     int level)
 {
 	char *str, *event_name, *field_name, *opt_name;
 	struct evsel *evsel;
-	struct tep_format_field *field;
 	bool raw_trace = symbol_conf.raw_trace;
 	int ret = 0;
 
@@ -2820,6 +2849,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 		raw_trace = true;
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (!strcmp(field_name, "trace_fields")) {
 		ret = add_all_dynamic_fields(evlist, raw_trace, level);
 		goto out;
@@ -2829,6 +2859,7 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 		ret = add_all_matching_fields(evlist, field_name, raw_trace, level);
 		goto out;
 	}
+#endif
 
 	evsel = find_evsel(evlist, event_name);
 	if (evsel == NULL) {
@@ -2843,10 +2874,12 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 		goto out;
 	}
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (!strcmp(field_name, "*")) {
 		ret = add_evsel_fields(evsel, raw_trace, level);
 	} else {
-		field = tep_find_any_field(evsel->tp_format, field_name);
+		struct tep_format_field *field = tep_find_any_field(evsel->tp_format, field_name);
+
 		if (field == NULL) {
 			pr_debug("Cannot find event field for %s.%s\n",
 				 event_name, field_name);
@@ -2855,6 +2888,10 @@ static int add_dynamic_entry(struct evlist *evlist, const char *tok,
 
 		ret = __dynamic_dimension__add(evsel, field, raw_trace, level);
 	}
+#else
+	(void)level;
+	(void)raw_trace;
+#endif /* HAVE_LIBTRACEEVENT */
 
 out:
 	free(str);
@@ -2955,11 +2992,11 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
 		struct sort_dimension *sd = &common_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		for (j = 0; j < ARRAY_SIZE(dynamic_headers); j++) {
-			if (!strcmp(dynamic_headers[j], sd->name))
+			if (sd->name && !strcmp(dynamic_headers[j], sd->name))
 				sort_dimension_add_dynamic_header(sd);
 		}
 
@@ -3009,7 +3046,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
 		struct sort_dimension *sd = &bstack_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		if (sort__mode != SORT_MODE__BRANCH)
@@ -3025,7 +3062,7 @@ int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
 	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
 		struct sort_dimension *sd = &memory_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		if (sort__mode != SORT_MODE__MEMORY)
@@ -3339,7 +3376,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
 	for (i = 0; i < ARRAY_SIZE(common_sort_dimensions); i++) {
 		struct sort_dimension *sd = &common_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		return __sort_dimension__add_output(list, sd);
@@ -3357,7 +3394,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
 	for (i = 0; i < ARRAY_SIZE(bstack_sort_dimensions); i++) {
 		struct sort_dimension *sd = &bstack_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		if (sort__mode != SORT_MODE__BRANCH)
@@ -3369,7 +3406,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
 	for (i = 0; i < ARRAY_SIZE(memory_sort_dimensions); i++) {
 		struct sort_dimension *sd = &memory_sort_dimensions[i];
 
-		if (strncasecmp(tok, sd->name, strlen(tok)))
+		if (!sd->name || strncasecmp(tok, sd->name, strlen(tok)))
 			continue;
 
 		if (sort__mode != SORT_MODE__MEMORY)
@@ -3508,6 +3545,9 @@ void reset_output_field(void)
 
 static void add_key(struct strbuf *sb, const char *str, int *llen)
 {
+	if (!str)
+		return;
+
 	if (*llen >= 75) {
 		strbuf_addstr(sb, "\n\t\t\t ");
 		*llen = INDENT;
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index 0645795ff0806..3ab6a92b1a6dc 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -2157,6 +2157,7 @@ int perf_event__synthesize_attr(struct perf_tool *tool, struct perf_event_attr *
 	return err;
 }
 
+#ifdef HAVE_LIBTRACEEVENT
 int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct evlist *evlist,
 					perf_event__handler_t process)
 {
@@ -2203,6 +2204,7 @@ int perf_event__synthesize_tracing_data(struct perf_tool *tool, int fd, struct e
 
 	return aligned_size;
 }
+#endif
 
 int perf_event__synthesize_build_id(struct perf_tool *tool, struct dso *pos, u16 misc,
 				    perf_event__handler_t process, struct machine *machine)
@@ -2355,6 +2357,7 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool,
 	}
 	ret += err;
 
+#ifdef HAVE_LIBTRACEEVENT
 	if (have_tracepoints(&evlist->core.entries)) {
 		int fd = perf_data__fd(data);
 
@@ -2374,6 +2377,9 @@ int perf_event__synthesize_for_pipe(struct perf_tool *tool,
 		}
 		ret += err;
 	}
+#else
+	(void)data;
+#endif
 
 	return ret;
 }
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index c9c83a40647c7..2d3c2576bab7f 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -11,6 +11,8 @@
 #include "trace-event.h"
 
 #include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <traceevent/event-parse.h>
 
 static int get_common_field(struct scripting_context *context,
 			    int *offset, int *size, const char *type)
diff --git a/tools/perf/util/trace-event-read.c b/tools/perf/util/trace-event-read.c
index 43146a4ce2fb2..1162c49b80825 100644
--- a/tools/perf/util/trace-event-read.c
+++ b/tools/perf/util/trace-event-read.c
@@ -11,6 +11,7 @@
 #include <sys/stat.h>
 #include <sys/wait.h>
 #include <sys/mman.h>
+#include <traceevent/event-parse.h>
 #include <fcntl.h>
 #include <unistd.h>
 #include <errno.h>
diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c
index 636a010d929b9..56175c53f9af7 100644
--- a/tools/perf/util/trace-event-scripting.c
+++ b/tools/perf/util/trace-event-scripting.c
@@ -9,6 +9,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <errno.h>
+#include <traceevent/event-parse.h>
 
 #include "debug.h"
 #include "trace-event.h"
diff --git a/tools/perf/util/trace-event.c b/tools/perf/util/trace-event.c
index b3ee651e3d91c..8ad75b31e09b8 100644
--- a/tools/perf/util/trace-event.c
+++ b/tools/perf/util/trace-event.c
@@ -1,5 +1,4 @@
 // SPDX-License-Identifier: GPL-2.0
-
 #include <stdio.h>
 #include <unistd.h>
 #include <stdlib.h>
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index 8f39f5bcb2c26..add6c5d9531cd 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -2,9 +2,11 @@
 #ifndef _PERF_UTIL_TRACE_EVENT_H
 #define _PERF_UTIL_TRACE_EVENT_H
 
-#include <traceevent/event-parse.h>
-#include "parse-events.h"
+#include <stdbool.h>
+#include <stdio.h>
+#include <linux/types.h>
 
+struct evlist;
 struct machine;
 struct perf_sample;
 union perf_event;
@@ -18,6 +20,11 @@ struct trace_event {
 	struct tep_plugin_list	*plugin_list;
 };
 
+typedef char *(tep_func_resolver_t)(void *priv,
+				    unsigned long long *addrp, char **modp);
+
+bool have_tracepoints(struct list_head *evlist);
+
 int trace_event__init(struct trace_event *t);
 void trace_event__cleanup(struct trace_event *t);
 int trace_event__register_resolver(struct machine *machine,
-- 
GitLab


From 4171925aa9f3f7bf57b100238f148b50c45c3b1b Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 29 Nov 2022 22:29:35 -0800
Subject: [PATCH 3869/4122] tools lib traceevent: Remove libtraceevent

libtraceevent is now out-of-date and it is better to depend on the
system version. Remove this code that is no longer depended upon by
any builds.

Committer notes:

Removed the removed tools/lib/traceevent/ from tools/perf/MANIFEST, so
that 'make perf-tar-src-pkg' works.

Signed-off-by: Ian Rogers <irogers@google.com>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masami Hiramatsu <mhiramat@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20221130062935.2219247-5-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/.gitignore               |    4 -
 tools/lib/traceevent/Build                    |    8 -
 tools/lib/traceevent/Documentation/Makefile   |  207 -
 .../traceevent/Documentation/asciidoc.conf    |  120 -
 .../Documentation/libtraceevent-commands.txt  |  153 -
 .../Documentation/libtraceevent-cpus.txt      |   77 -
 .../libtraceevent-endian_read.txt             |   78 -
 .../libtraceevent-event_find.txt              |  103 -
 .../Documentation/libtraceevent-event_get.txt |   99 -
 .../libtraceevent-event_list.txt              |  122 -
 .../libtraceevent-event_print.txt             |  130 -
 .../libtraceevent-field_find.txt              |  118 -
 .../libtraceevent-field_get_val.txt           |  122 -
 .../libtraceevent-field_print.txt             |  126 -
 .../libtraceevent-field_read.txt              |   81 -
 .../Documentation/libtraceevent-fields.txt    |  105 -
 .../libtraceevent-file_endian.txt             |   91 -
 .../Documentation/libtraceevent-filter.txt    |  209 -
 .../Documentation/libtraceevent-func_apis.txt |  183 -
 .../Documentation/libtraceevent-func_find.txt |   88 -
 .../Documentation/libtraceevent-handle.txt    |  101 -
 .../libtraceevent-header_page.txt             |  102 -
 .../libtraceevent-host_endian.txt             |  104 -
 .../Documentation/libtraceevent-long_size.txt |   78 -
 .../Documentation/libtraceevent-page_size.txt |   82 -
 .../libtraceevent-parse_event.txt             |   90 -
 .../libtraceevent-parse_head.txt              |   82 -
 .../Documentation/libtraceevent-plugins.txt   |  122 -
 .../libtraceevent-record_parse.txt            |  137 -
 .../libtraceevent-reg_event_handler.txt       |  156 -
 .../libtraceevent-reg_print_func.txt          |  155 -
 .../Documentation/libtraceevent-set_flag.txt  |  104 -
 .../Documentation/libtraceevent-strerror.txt  |   85 -
 .../Documentation/libtraceevent-tseq.txt      |  158 -
 .../Documentation/libtraceevent.txt           |  192 -
 .../traceevent/Documentation/manpage-1.72.xsl |   14 -
 .../traceevent/Documentation/manpage-base.xsl |   35 -
 .../Documentation/manpage-bold-literal.xsl    |   17 -
 .../Documentation/manpage-normal.xsl          |   13 -
 .../Documentation/manpage-suppress-sp.xsl     |   21 -
 tools/lib/traceevent/Makefile                 |  300 -
 tools/lib/traceevent/event-parse-api.c        |  333 -
 tools/lib/traceevent/event-parse-local.h      |  123 -
 tools/lib/traceevent/event-parse.c            | 7624 -----------------
 tools/lib/traceevent/event-parse.h            |  750 --
 tools/lib/traceevent/event-plugin.c           |  711 --
 tools/lib/traceevent/event-utils.h            |   67 -
 tools/lib/traceevent/kbuffer-parse.c          |  809 --
 tools/lib/traceevent/kbuffer.h                |   68 -
 .../lib/traceevent/libtraceevent.pc.template  |   10 -
 tools/lib/traceevent/parse-filter.c           | 2281 -----
 tools/lib/traceevent/parse-utils.c            |   71 -
 tools/lib/traceevent/plugins/Build            |   12 -
 tools/lib/traceevent/plugins/Makefile         |  225 -
 .../lib/traceevent/plugins/plugin_cfg80211.c  |   43 -
 .../lib/traceevent/plugins/plugin_function.c  |  282 -
 tools/lib/traceevent/plugins/plugin_futex.c   |  123 -
 tools/lib/traceevent/plugins/plugin_hrtimer.c |   74 -
 tools/lib/traceevent/plugins/plugin_jbd2.c    |   61 -
 tools/lib/traceevent/plugins/plugin_kmem.c    |   80 -
 tools/lib/traceevent/plugins/plugin_kvm.c     |  527 --
 .../lib/traceevent/plugins/plugin_mac80211.c  |   88 -
 .../traceevent/plugins/plugin_sched_switch.c  |  146 -
 tools/lib/traceevent/plugins/plugin_scsi.c    |  434 -
 tools/lib/traceevent/plugins/plugin_tlb.c     |   66 -
 tools/lib/traceevent/plugins/plugin_xen.c     |  138 -
 tools/lib/traceevent/tep_strerror.c           |   53 -
 tools/lib/traceevent/trace-seq.c              |  249 -
 tools/lib/traceevent/trace-seq.h              |   55 -
 tools/perf/MANIFEST                           |    1 -
 70 files changed, 19876 deletions(-)
 delete mode 100644 tools/lib/traceevent/.gitignore
 delete mode 100644 tools/lib/traceevent/Build
 delete mode 100644 tools/lib/traceevent/Documentation/Makefile
 delete mode 100644 tools/lib/traceevent/Documentation/asciidoc.conf
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-commands.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-fields.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-filter.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-handle.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
 delete mode 100644 tools/lib/traceevent/Documentation/libtraceevent.txt
 delete mode 100644 tools/lib/traceevent/Documentation/manpage-1.72.xsl
 delete mode 100644 tools/lib/traceevent/Documentation/manpage-base.xsl
 delete mode 100644 tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
 delete mode 100644 tools/lib/traceevent/Documentation/manpage-normal.xsl
 delete mode 100644 tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
 delete mode 100644 tools/lib/traceevent/Makefile
 delete mode 100644 tools/lib/traceevent/event-parse-api.c
 delete mode 100644 tools/lib/traceevent/event-parse-local.h
 delete mode 100644 tools/lib/traceevent/event-parse.c
 delete mode 100644 tools/lib/traceevent/event-parse.h
 delete mode 100644 tools/lib/traceevent/event-plugin.c
 delete mode 100644 tools/lib/traceevent/event-utils.h
 delete mode 100644 tools/lib/traceevent/kbuffer-parse.c
 delete mode 100644 tools/lib/traceevent/kbuffer.h
 delete mode 100644 tools/lib/traceevent/libtraceevent.pc.template
 delete mode 100644 tools/lib/traceevent/parse-filter.c
 delete mode 100644 tools/lib/traceevent/parse-utils.c
 delete mode 100644 tools/lib/traceevent/plugins/Build
 delete mode 100644 tools/lib/traceevent/plugins/Makefile
 delete mode 100644 tools/lib/traceevent/plugins/plugin_cfg80211.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_function.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_futex.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_hrtimer.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_jbd2.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_kmem.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_kvm.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_mac80211.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_sched_switch.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_scsi.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_tlb.c
 delete mode 100644 tools/lib/traceevent/plugins/plugin_xen.c
 delete mode 100644 tools/lib/traceevent/tep_strerror.c
 delete mode 100644 tools/lib/traceevent/trace-seq.c
 delete mode 100644 tools/lib/traceevent/trace-seq.h

diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
deleted file mode 100644
index 7123c70b9ebc8..0000000000000
--- a/tools/lib/traceevent/.gitignore
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-TRACEEVENT-CFLAGS
-libtraceevent-dynamic-list
-libtraceevent.so.*
diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build
deleted file mode 100644
index f9a5d79578f51..0000000000000
--- a/tools/lib/traceevent/Build
+++ /dev/null
@@ -1,8 +0,0 @@
-libtraceevent-y += event-parse.o
-libtraceevent-y += event-plugin.o
-libtraceevent-y += trace-seq.o
-libtraceevent-y += parse-filter.o
-libtraceevent-y += parse-utils.o
-libtraceevent-y += kbuffer-parse.o
-libtraceevent-y += tep_strerror.o
-libtraceevent-y += event-parse-api.o
diff --git a/tools/lib/traceevent/Documentation/Makefile b/tools/lib/traceevent/Documentation/Makefile
deleted file mode 100644
index aa72ab96c3c1d..0000000000000
--- a/tools/lib/traceevent/Documentation/Makefile
+++ /dev/null
@@ -1,207 +0,0 @@
-include ../../../scripts/Makefile.include
-include ../../../scripts/utilities.mak
-
-# This Makefile and manpage XSL files were taken from tools/perf/Documentation
-# and modified for libtraceevent.
-
-MAN3_TXT= \
-	$(wildcard libtraceevent-*.txt) \
-	libtraceevent.txt
-
-MAN_TXT = $(MAN3_TXT)
-_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
-_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
-_DOC_MAN3=$(patsubst %.txt,%.3,$(MAN3_TXT))
-
-MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
-MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
-DOC_MAN3=$(addprefix $(OUTPUT),$(_DOC_MAN3))
-
-# Make the path relative to DESTDIR, not prefix
-ifndef DESTDIR
-prefix?=$(HOME)
-endif
-bindir?=$(prefix)/bin
-htmldir?=$(prefix)/share/doc/libtraceevent-doc
-pdfdir?=$(prefix)/share/doc/libtraceevent-doc
-mandir?=$(prefix)/share/man
-man3dir=$(mandir)/man3
-
-ASCIIDOC=asciidoc
-ASCIIDOC_EXTRA = --unsafe -f asciidoc.conf
-ASCIIDOC_HTML = xhtml11
-MANPAGE_XSL = manpage-normal.xsl
-XMLTO_EXTRA =
-INSTALL?=install
-RM ?= rm -f
-
-ifdef USE_ASCIIDOCTOR
-ASCIIDOC = asciidoctor
-ASCIIDOC_EXTRA = -a compat-mode
-ASCIIDOC_EXTRA += -I. -rasciidoctor-extensions
-ASCIIDOC_EXTRA += -a mansource="libtraceevent" -a manmanual="libtraceevent Manual"
-ASCIIDOC_HTML = xhtml5
-endif
-
-XMLTO=xmlto
-
-_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
-ifeq ($(_tmp_tool_path),)
-	missing_tools = $(ASCIIDOC)
-endif
-
-ifndef USE_ASCIIDOCTOR
-_tmp_tool_path := $(call get-executable,$(XMLTO))
-ifeq ($(_tmp_tool_path),)
-	missing_tools += $(XMLTO)
-endif
-endif
-
-#
-# For asciidoc ...
-#	-7.1.2,	no extra settings are needed.
-#	8.0-,	set ASCIIDOC8.
-#
-
-#
-# For docbook-xsl ...
-#	-1.68.1,	set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
-#	1.69.0,		no extra settings are needed?
-#	1.69.1-1.71.0,	set DOCBOOK_SUPPRESS_SP?
-#	1.71.1,		no extra settings are needed?
-#	1.72.0,		set DOCBOOK_XSL_172.
-#	1.73.0-,	set ASCIIDOC_NO_ROFF
-#
-
-#
-# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
-# of 'the ".ft C" problem' in your generated manpages, and you
-# instead ended up with weird characters around callouts, try
-# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
-#
-
-ifdef ASCIIDOC8
-ASCIIDOC_EXTRA += -a asciidoc7compatible
-endif
-ifdef DOCBOOK_XSL_172
-ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
-MANPAGE_XSL = manpage-1.72.xsl
-else
-	ifdef ASCIIDOC_NO_ROFF
-	# docbook-xsl after 1.72 needs the regular XSL, but will not
-	# pass-thru raw roff codes from asciidoc.conf, so turn them off.
-	ASCIIDOC_EXTRA += -a libtraceevent-asciidoc-no-roff
-	endif
-endif
-ifdef MAN_BOLD_LITERAL
-XMLTO_EXTRA += -m manpage-bold-literal.xsl
-endif
-ifdef DOCBOOK_SUPPRESS_SP
-XMLTO_EXTRA += -m manpage-suppress-sp.xsl
-endif
-
-SHELL_PATH ?= $(SHELL)
-# Shell quote;
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-export DESTDIR DESTDIR_SQ
-
-#
-# Please note that there is a minor bug in asciidoc.
-# The version after 6.0.3 _will_ include the patch found here:
-#   http://marc.theaimsgroup.com/?l=libtraceevent&m=111558757202243&w=2
-#
-# Until that version is released you may have to apply the patch
-# yourself - yes, all 6 characters of it!
-#
-QUIET_SUBDIR0  = +$(MAKE) -C # space to separate -C and subdir
-QUIET_SUBDIR1  =
-
-ifneq ($(findstring $(MAKEFLAGS),w),w)
-PRINT_DIR = --no-print-directory
-else # "make -w"
-NO_SUBDIR = :
-endif
-
-ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifneq ($(V),1)
-	QUIET_ASCIIDOC	= @echo '  ASCIIDOC '$@;
-	QUIET_XMLTO	= @echo '  XMLTO    '$@;
-	QUIET_SUBDIR0	= +@subdir=
-	QUIET_SUBDIR1	= ;$(NO_SUBDIR) \
-			   echo '  SUBDIR   ' $$subdir; \
-			  $(MAKE) $(PRINT_DIR) -C $$subdir
-	export V
-endif
-endif
-
-all: html man
-
-man: man3
-man3: $(DOC_MAN3)
-
-html: $(MAN_HTML)
-
-$(MAN_HTML) $(DOC_MAN3): asciidoc.conf
-
-install: install-man
-
-check-man-tools:
-ifdef missing_tools
-	$(error "You need to install $(missing_tools) for man pages")
-endif
-
-do-install-man: man
-	$(call QUIET_INSTALL, Documentation-man) \
-		$(INSTALL) -d -m 755 $(DESTDIR)$(man3dir); \
-		$(INSTALL) -m 644 $(DOC_MAN3) $(DESTDIR)$(man3dir);
-
-install-man: check-man-tools man do-install-man
-
-uninstall: uninstall-man
-
-uninstall-man:
-	$(call QUIET_UNINST, Documentation-man) \
-		$(Q)$(RM) $(addprefix $(DESTDIR)$(man3dir)/,$(DOC_MAN3))
-
-
-ifdef missing_tools
-  DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
-else
-  DO_INSTALL_MAN = do-install-man
-endif
-
-CLEAN_FILES =					\
-	$(MAN_XML) $(addsuffix +,$(MAN_XML))	\
-	$(MAN_HTML) $(addsuffix +,$(MAN_HTML))	\
-	$(DOC_MAN3) *.3
-
-clean:
-	$(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
-
-ifdef USE_ASCIIDOCTOR
-$(OUTPUT)%.3 : $(OUTPUT)%.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b manpage -d manpage \
-		$(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
-	mv $@+ $@
-endif
-
-$(OUTPUT)%.3 : $(OUTPUT)%.xml
-	$(QUIET_XMLTO)$(RM) $@ && \
-	$(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
-
-$(OUTPUT)%.xml : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b docbook -d manpage \
-		$(ASCIIDOC_EXTRA) -alibtraceevent_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
-	mv $@+ $@
-
-$(MAN_HTML): $(OUTPUT)%.html : %.txt
-	$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
-	$(ASCIIDOC) -b $(ASCIIDOC_HTML) -d manpage \
-		$(ASCIIDOC_EXTRA) -aperf_version=$(EVENT_PARSE_VERSION) -o $@+ $< && \
-	mv $@+ $@
diff --git a/tools/lib/traceevent/Documentation/asciidoc.conf b/tools/lib/traceevent/Documentation/asciidoc.conf
deleted file mode 100644
index 07595717f06e3..0000000000000
--- a/tools/lib/traceevent/Documentation/asciidoc.conf
+++ /dev/null
@@ -1,120 +0,0 @@
-## linktep: macro
-#
-# Usage: linktep:command[manpage-section]
-#
-# Note, {0} is the manpage section, while {target} is the command.
-#
-# Show TEP link as: <command>(<section>); if section is defined, else just show
-# the command.
-
-[macros]
-(?su)[\\]?(?P<name>linktep):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
-
-[attributes]
-asterisk=&#42;
-plus=&#43;
-caret=&#94;
-startsb=&#91;
-endsb=&#93;
-tilde=&#126;
-
-ifdef::backend-docbook[]
-[linktep-inlinemacro]
-{0%{target}}
-{0#<citerefentry>}
-{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
-{0#</citerefentry>}
-endif::backend-docbook[]
-
-ifdef::backend-docbook[]
-ifndef::tep-asciidoc-no-roff[]
-# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
-# v1.72 breaks with this because it replaces dots not in roff requests.
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-ifdef::doctype-manpage[]
-&#10;.ft C&#10;
-endif::doctype-manpage[]
-|
-ifdef::doctype-manpage[]
-&#10;.ft&#10;
-endif::doctype-manpage[]
-</literallayout>
-{title#}</example>
-endif::tep-asciidoc-no-roff[]
-
-ifdef::tep-asciidoc-no-roff[]
-ifdef::doctype-manpage[]
-# The following two small workarounds insert a simple paragraph after screen
-[listingblock]
-<example><title>{title}</title>
-<literallayout>
-|
-</literallayout><simpara></simpara>
-{title#}</example>
-
-[verseblock]
-<formalpara{id? id="{id}"}><title>{title}</title><para>
-{title%}<literallayout{id? id="{id}"}>
-{title#}<literallayout>
-|
-</literallayout>
-{title#}</para></formalpara>
-{title%}<simpara></simpara>
-endif::doctype-manpage[]
-endif::tep-asciidoc-no-roff[]
-endif::backend-docbook[]
-
-ifdef::doctype-manpage[]
-ifdef::backend-docbook[]
-[header]
-template::[header-declarations]
-<refentry>
-<refmeta>
-<refentrytitle>{mantitle}</refentrytitle>
-<manvolnum>{manvolnum}</manvolnum>
-<refmiscinfo class="source">libtraceevent</refmiscinfo>
-<refmiscinfo class="version">{libtraceevent_version}</refmiscinfo>
-<refmiscinfo class="manual">libtraceevent Manual</refmiscinfo>
-</refmeta>
-<refnamediv>
-  <refname>{manname1}</refname>
-  <refname>{manname2}</refname>
-  <refname>{manname3}</refname>
-  <refname>{manname4}</refname>
-  <refname>{manname5}</refname>
-  <refname>{manname6}</refname>
-  <refname>{manname7}</refname>
-  <refname>{manname8}</refname>
-  <refname>{manname9}</refname>
-  <refname>{manname10}</refname>
-  <refname>{manname11}</refname>
-  <refname>{manname12}</refname>
-  <refname>{manname13}</refname>
-  <refname>{manname14}</refname>
-  <refname>{manname15}</refname>
-  <refname>{manname16}</refname>
-  <refname>{manname17}</refname>
-  <refname>{manname18}</refname>
-  <refname>{manname19}</refname>
-  <refname>{manname20}</refname>
-  <refname>{manname21}</refname>
-  <refname>{manname22}</refname>
-  <refname>{manname23}</refname>
-  <refname>{manname24}</refname>
-  <refname>{manname25}</refname>
-  <refname>{manname26}</refname>
-  <refname>{manname27}</refname>
-  <refname>{manname28}</refname>
-  <refname>{manname29}</refname>
-  <refname>{manname30}</refname>
-  <refpurpose>{manpurpose}</refpurpose>
-</refnamediv>
-endif::backend-docbook[]
-endif::doctype-manpage[]
-
-ifdef::backend-xhtml11[]
-[linktep-inlinemacro]
-<a href="{target}.html">{target}{0?({0})}</a>
-endif::backend-xhtml11[]
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt b/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
deleted file mode 100644
index bec552001f8e3..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-commands.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_comm, tep_override_comm, tep_pid_is_registered,
-tep_data_comm_from_pid, tep_data_pid_from_comm, tep_cmdline_pid -
-Manage pid to process name mappings.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
-const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_pevent_, int _pid_);
-struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_pevent_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
-int *tep_cmdline_pid*(struct tep_handle pass:[*]_pevent_, struct cmdline pass:[*]_cmdline_);
---
-
-DESCRIPTION
------------
-These functions can be used to handle the mapping between pid and process name.
-The library builds a cache of these mappings, which is used to display the name
-of the process, instead of its pid. This information can be retrieved from
-tracefs/saved_cmdlines file.
-
-The _tep_register_comm()_ function registers a _pid_ / process name mapping.
-If a command with the same _pid_ is already registered, an error is returned.
-The _pid_ argument is the process ID, the _comm_ argument is the process name,
-_tep_ is the event context. The _comm_ is duplicated internally.
-
-The _tep_override_comm()_ function registers a _pid_ / process name mapping.
-If a process with the same pid is already registered, the process name string is
-udapted with the new one. The _pid_ argument is the process ID, the _comm_
-argument is the process name, _tep_ is the event context. The _comm_ is
-duplicated internally.
-
-The _tep_is_pid_registered()_ function checks if a pid has a process name
-mapping registered. The _pid_ argument is the process ID, _tep_ is the event
-context.
-
-The _tep_data_comm_from_pid()_ function returns the process name for a given
-pid. The _pid_ argument is the process ID, _tep_ is the event context.
-The returned string should not be freed, but will be freed when the _tep_
-handler is closed.
-
-The _tep_data_pid_from_comm()_ function returns a pid for a given process name.
-The _comm_ argument is the process name, _tep_ is the event context.
-The argument _next_ is the cmdline structure to search for the next pid.
-As there may be more than one pid for a given process, the result of this call
-can be passed back into a recurring call in the _next_ parameter, to search for
-the next pid. If _next_ is NULL, it will return the first pid associated with
-the _comm_. The function performs a linear search, so it may be slow.
-
-The _tep_cmdline_pid()_ function returns the pid associated with a given
-_cmdline_. The _tep_ argument is the event context.
-
-RETURN VALUE
-------------
-_tep_register_comm()_ function returns 0 on success. In case of an error -1 is
-returned and errno is set to indicate the cause of the problem: ENOMEM, if there
-is not enough memory to duplicate the _comm_ or EEXIST if a mapping for this
-_pid_ is already registered.
-
-_tep_override_comm()_ function returns 0 on success. In case of an error -1 is
-returned and errno is set to indicate the cause of the problem: ENOMEM, if there
-is not enough memory to duplicate the _comm_.
-
-_tep_is_pid_registered()_ function returns true if the _pid_ has a process name
-mapped to it, false otherwise.
-
-_tep_data_comm_from_pid()_ function returns the process name as string, or the
-string "<...>" if there is no mapping for the given pid.
-
-_tep_data_pid_from_comm()_ function returns a pointer to a struct cmdline, that
-holds a pid for a given process, or NULL if none is found. This result can be
-passed back into a recurring call as the _next_ parameter of the function.
-
-_tep_cmdline_pid()_ functions returns the pid for the give cmdline. If _cmdline_
- is NULL, then -1 is returned.
-
-EXAMPLE
--------
-The following example registers pid for command "ls", in context of event _tep_
-and performs various searches for pid / process name mappings:
-[source,c]
---
-#include <event-parse.h>
-...
-int ret;
-int ls_pid = 1021;
-struct tep_handle *tep = tep_alloc();
-...
-	ret = tep_register_comm(tep, "ls", ls_pid);
-	if (ret != 0 && errno == EEXIST)
-		ret = tep_override_comm(tep, "ls", ls_pid);
-	if (ret != 0) {
-		/* Failed to register pid / command mapping */
-	}
-...
-	if (tep_is_pid_registered(tep, ls_pid) == 0) {
-		/* Command mapping for ls_pid is not registered */
-	}
-...
-	const char *comm = tep_data_comm_from_pid(tep, ls_pid);
-	if (comm) {
-		/* Found process name for ls_pid */
-	}
-...
-	int pid;
-	struct cmdline *cmd = tep_data_pid_from_comm(tep, "ls", NULL);
-	while (cmd) {
-		pid = tep_cmdline_pid(tep, cmd);
-		/* Found pid for process "ls" */
-		cmd = tep_data_pid_from_comm(tep, "ls", cmd);
-	}
---
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt b/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
deleted file mode 100644
index 5ad70e43b7529..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-cpus.txt
+++ /dev/null
@@ -1,77 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_cpus, tep_set_cpus - Get / set the number of CPUs, which have a tracing
-buffer representing it. Note, the buffer may be empty.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
-void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
---
-
-DESCRIPTION
------------
-The _tep_get_cpus()_ function gets the number of CPUs, which have a tracing
-buffer representing it. The _tep_ argument is trace event parser context.
-
-The _tep_set_cpus()_ function sets the number of CPUs, which have a tracing
-buffer representing it. The _tep_ argument is trace event parser context.
-The _cpu_ argument is the number of CPUs with tracing data.
-
-RETURN VALUE
-------------
-The _tep_get_cpus()_ functions returns the number of CPUs, which have tracing
-data recorded.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-	tep_set_cpus(tep, 5);
-...
-	printf("We have tracing data for %d CPUs", tep_get_cpus(tep));
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
deleted file mode 100644
index e64851b6e189e..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-endian_read.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_read_number - Reads a number from raw data.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
---
-
-DESCRIPTION
------------
-The _tep_read_number()_ function reads an integer from raw data, taking into
-account the endianness of the raw data and the current host. The _tep_ argument
-is the trace event parser context. The _ptr_ is a pointer to the raw data, where
-the integer is, and the _size_ is the size of the integer.
-
-RETURN VALUE
-------------
-The _tep_read_number()_ function returns the integer in the byte order of
-the current host. In case of an error, 0 is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void process_record(struct tep_record *record)
-{
-	int offset = 24;
-	int data = tep_read_number(tep, record->data + offset, 4);
-
-	/* Read the 4 bytes at the offset 24 of data as an integer */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
deleted file mode 100644
index 7bc062c9f76f5..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_find.txt
+++ /dev/null
@@ -1,103 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_event,tep_find_event_by_name,tep_find_event_by_record -
-Find events by given key.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
-struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
-struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
---
-
-DESCRIPTION
------------
-This set of functions can be used to search for an event, based on a given
-criteria. All functions require a pointer to a _tep_, trace event parser
-context.
-
-The _tep_find_event()_ function searches for an event by given event _id_. The
-event ID is assigned dynamically and can be viewed in event's format file,
-"ID" field.
-
-The tep_find_event_by_name()_ function searches for an event by given
-event _name_, under the system _sys_. If the _sys_ is NULL (not specified),
-the first event with _name_ is returned.
-
-The tep_find_event_by_record()_ function searches for an event from a given
-_record_.
-
-RETURN VALUE
-------------
-All these functions return a pointer to the found event, or NULL if there is no
-such event.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event;
-
-event = tep_find_event(tep, 1857);
-if (event == NULL) {
-	/* There is no event with ID 1857 */
-}
-
-event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-if (event == NULL) {
-	/* There is no kvm_exit event, from kvm system */
-}
-
-void event_from_record(struct tep_record *record)
-{
- struct tep_event *event = tep_find_event_by_record(tep, record);
-	if (event == NULL) {
-		/* There is no event from given record */
-	}
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
deleted file mode 100644
index 6525092fc417a..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_get.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_event, tep_get_first_event, tep_get_events_count - Access events.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
-struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
-int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
---
-
-DESCRIPTION
------------
-The _tep_get_event()_ function returns a pointer to event at the given _index_.
-The _tep_ argument is trace event parser context, the _index_ is the index of
-the requested event.
-
-The _tep_get_first_event()_ function returns a pointer to the first event.
-As events are stored in an array, this function returns the pointer to the
-beginning of the array. The _tep_ argument is trace event parser context.
-
-The _tep_get_events_count()_ function returns the number of the events
-in the array. The _tep_ argument is trace event parser context.
-
-RETURN VALUE
-------------
-The _tep_get_event()_ returns a pointer to the event located at _index_.
-NULL is returned in case of error, in case there are no events or _index_ is
-out of range.
-
-The _tep_get_first_event()_ returns a pointer to the first event. NULL is
-returned in case of error, or in case there are no events.
-
-The _tep_get_events_count()_ returns the number of the events. 0 is
-returned in case of error, or in case there are no events.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i,count = tep_get_events_count(tep);
-struct tep_event *event, *events = tep_get_first_event(tep);
-
-if (events == NULL) {
-	/* There are no events */
-} else {
-	for (i = 0; i < count; i++) {
-		event = (events+i);
-		/* process events[i] */
-	}
-
-	/* Get the last event */
-	event = tep_get_event(tep, count-1);
-}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
deleted file mode 100644
index fba350e5a4cb9..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_list.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_list_events, tep_list_events_copy -
-Get list of events, sorted by given criteria.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_event_sort_type* {
-	_TEP_EVENT_SORT_ID_,
-	_TEP_EVENT_SORT_NAME_,
-	_TEP_EVENT_SORT_SYSTEM_,
-};
-
-struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
---
-
-DESCRIPTION
------------
-The _tep_list_events()_ function returns an array of pointers to the events,
-sorted by the _sort_type_ criteria. The last element of the array is NULL.
-The returned memory must not be freed, it is managed by the library.
-The function is not thread safe. The _tep_ argument is trace event parser
-context. The _sort_type_ argument is the required sort criteria:
-[verse]
---
-	_TEP_EVENT_SORT_ID_	- sort by the event ID.
-	_TEP_EVENT_SORT_NAME_	- sort by the event (name, system, id) triplet.
-	_TEP_EVENT_SORT_SYSTEM_	- sort by the event (system, name, id) triplet.
---
-
-The _tep_list_events_copy()_ is a thread safe version of _tep_list_events()_.
-It has the same behavior, but the returned array is allocated internally and
-must be freed by the caller. Note that the content of the array must not be
-freed (see the EXAMPLE below).
-
-RETURN VALUE
-------------
-The _tep_list_events()_ function returns an array of pointers to events.
-In case of an error, NULL is returned. The returned array must not be freed,
-it is managed by the library.
-
-The _tep_list_events_copy()_ function returns an array of pointers to events.
-In case of an error, NULL is returned. The returned array must be freed by
-the caller.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i;
-struct tep_event_format **events;
-
-i=0;
-events = tep_list_events(tep, TEP_EVENT_SORT_ID);
-if (events == NULL) {
-	/* Failed to get the events, sorted by ID */
-} else {
-	while(events[i]) {
-		/* walk through the list of the events, sorted by ID */
-		i++;
-	}
-}
-
-i=0;
-events = tep_list_events_copy(tep, TEP_EVENT_SORT_NAME);
-if (events == NULL) {
-	/* Failed to get the events, sorted by name */
-} else {
-	while(events[i]) {
-		/* walk through the list of the events, sorted by name */
-		i++;
-	}
-	free(events);
-}
-
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
deleted file mode 100644
index 2c6a61811118f..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-event_print.txt
+++ /dev/null
@@ -1,130 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_print_event - Writes event information into a trace sequence.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seqpass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._)
---
-
-DESCRIPTION
------------
-
-The _tep_print_event()_ function parses the event information of the given
-_record_ and writes it into the trace sequence _s_, according to the format
-string _fmt_. The desired information is specified after the format string.
-The _fmt_ is printf-like format string, following arguments are supported:
-[verse]
---
-	TEP_PRINT_PID, "%d"  - PID of the event.
-	TEP_PRINT_CPU, "%d"  - Event CPU.
-	TEP_PRINT_COMM, "%s" - Event command string.
-	TEP_PRINT_NAME, "%s" - Event name.
-	TEP_PRINT_LATENCY, "%s" - Latency of the event. It prints 4 or more
-			fields - interrupt state, scheduling state,
-			current context, and preemption count.
-			Field 1 is the interrupt enabled state:
-				d : Interrupts are disabled
-				. : Interrupts are enabled
-				X : The architecture does not support this
-				    information
-			Field 2 is the "need resched" state.
-				N : The task is set to call the scheduler when
-				    possible, as another higher priority task
-				    may need to be scheduled in.
-				. : The task is not set to call the scheduler.
-			Field 3 is the context state.
-				. : Normal context
-				s : Soft interrupt context
-				h : Hard interrupt context
-				H : Hard interrupt context which triggered
-				    during soft interrupt context.
-				z : NMI context
-				Z : NMI context which triggered during hard
-				    interrupt context
-			Field 4 is the preemption count.
-				. : The preempt count is zero.
-			On preemptible kernels (where the task can be scheduled
-			out in arbitrary locations while in kernel context), the
-			preempt count, when non zero, will prevent the kernel
-			from scheduling out the current task. The preempt count
-			number is displayed when it is not zero.
-			Depending on the kernel, it may show other fields
-			(lock depth, or migration disabled, which are unique to
-			specialized kernels).
-	TEP_PRINT_TIME, %d - event time stamp. A divisor and precision can be
-			specified as part of this format string:
-			"%precision.divisord". Example:
-			"%3.1000d" - divide the time by 1000 and print the first
-			3 digits before the dot. Thus, the time stamp
-			"123456000" will be printed as "123.456"
-	TEP_PRINT_INFO, "%s" - event information.
-	TEP_PRINT_INFO_RAW, "%s" - event information, in raw format.
-
---
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-struct tep_handle *tep = tep_alloc();
-...
-void print_my_event(struct tep_record *record)
-{
-	trace_seq_reset(&seq);
-	tep_print_event(tep, s, record, "%16s-%-5d [%03d] %s %6.1000d %s %s",
-			TEP_PRINT_COMM, TEP_PRINT_PID, TEP_PRINT_CPU,
-			TEP_PRINT_LATENCY, TEP_PRINT_TIME, TEP_PRINT_NAME,
-			TEP_PRINT_INFO);
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences related APIs.
-	Trace sequences are used to allow a function to call several other functions
-	to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
deleted file mode 100644
index 0896af5b9eff5..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_find.txt
+++ /dev/null
@@ -1,118 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_common_field, tep_find_field, tep_find_any_field -
-Search for a field in an event.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
-struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
-struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
---
-
-DESCRIPTION
------------
-These functions search for a field with given name in an event. The field
-returned can be used to find the field content from within a data record.
-
-The _tep_find_common_field()_ function searches for a common field with _name_
-in the _event_.
-
-The _tep_find_field()_ function searches for an event specific field with
-_name_ in the _event_.
-
-The _tep_find_any_field()_ function searches for any field with _name_ in the
-_event_.
-
-RETURN VALUE
-------------
-The _tep_find_common_field(), _tep_find_field()_ and _tep_find_any_field()_
-functions return a pointer to the found field, or NULL in case there is no field
-with the requested name.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-void get_htimer_info(struct tep_handle *tep, struct tep_record *record)
-{
-	struct tep_format_field *field;
-	struct tep_event *event;
-	long long softexpires;
-	int mode;
-	int pid;
-
-	event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-
-	field = tep_find_common_field(event, "common_pid");
-	if (field == NULL) {
-		/* Cannot find "common_pid" field in the event */
-	} else {
-		/* Get pid from the data record */
-		pid = tep_read_number(tep, record->data + field->offset,
-				      field->size);
-	}
-
-	field = tep_find_field(event, "softexpires");
-	if (field == NULL) {
-		/* Cannot find "softexpires" event specific field in the event */
-	} else {
-		/* Get softexpires parameter from the data record */
-		softexpires = tep_read_number(tep, record->data + field->offset,
-					      field->size);
-	}
-
-	field = tep_find_any_field(event, "mode");
-	if (field == NULL) {
-		/* Cannot find "mode" field in the event */
-	} else
-	{
-		/* Get mode parameter from the data record */
-		mode = tep_read_number(tep, record->data + field->offset,
-				       field->size);
-	}
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
deleted file mode 100644
index 6324f0d48aebe..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_get_val.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_any_field_val, tep_get_common_field_val, tep_get_field_val,
-tep_get_field_raw - Get value of a field.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
---
-
-DESCRIPTION
------------
-These functions can be used to find a field and retrieve its value.
-
-The _tep_get_any_field_val()_ function searches in the _record_ for a field
-with _name_, part of the _event_. If the field is found, its value is stored in
-_val_. If there is an error and _err_ is not zero, then an error string is
-written into _s_.
-
-The _tep_get_common_field_val()_ function does the same as
-_tep_get_any_field_val()_, but searches only in the common fields. This works
-for any event as all events include the common fields.
-
-The _tep_get_field_val()_ function does the same as _tep_get_any_field_val()_,
-but searches only in the event specific fields.
-
-The _tep_get_field_raw()_ function searches in the _record_ for a field with
-_name_, part of the _event_. If the field is found, a pointer to where the field
-exists in the record's raw data is returned. The size of the data is stored in
-_len_. If there is an error and _err_ is not zero, then an error string is
-written into _s_.
-
-RETURN VALUE
-------------
-The _tep_get_any_field_val()_, _tep_get_common_field_val()_ and
-_tep_get_field_val()_ functions return 0 on success, or -1 in case of an error.
-
-The _tep_get_field_raw()_ function returns a pointer to field's raw data, and
-places the length of this data in _len_. In case of an error NULL is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-...
-void process_record(struct tep_record *record)
-{
-	int len;
-	char *comm;
-	struct tep_event_format *event;
-	unsigned long long val;
-
-	event = tep_find_event_by_record(pevent, record);
-	if (event != NULL) {
-		if (tep_get_common_field_val(NULL, event, "common_type",
-					     record, &val, 0) == 0) {
-			/* Got the value of common type field */
-		}
-		if (tep_get_field_val(NULL, event, "pid", record, &val, 0) == 0) {
-			/* Got the value of pid specific field */
-		}
-		comm = tep_get_field_raw(NULL, event, "comm", record, &len, 0);
-		if (comm != NULL) {
-			/* Got a pointer to the comm event specific field */
-		}
-	}
-}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences
-	related APIs. Trace sequences are used to allow a function to call
-	several other functions to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
deleted file mode 100644
index 9a9df98ac44d4..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_print.txt
+++ /dev/null
@@ -1,126 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_print_field, tep_print_fields, tep_print_num_field, tep_print_func_field -
-Print the field content.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
-void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
-int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
-int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
---
-
-DESCRIPTION
------------
-These functions print recorded field's data, according to the field's type.
-
-The _tep_print_field()_ function extracts from the recorded raw _data_ value of
-the _field_ and prints it into _s_, according to the field type.
-
-The _tep_print_fields()_ prints each field name followed by the record's field
-value according to the field's type:
-[verse]
---
-"field1_name=field1_value field2_name=field2_value ..."
---
-It iterates all fields of the _event_, and calls _tep_print_field()_ for each of
-them.
-
-The _tep_print_num_field()_ function prints a numeric field with given format
-string. A search is performed in the _event_ for a field with _name_. If such
-field is found, its value is extracted from the _record_ and is printed in the
-_s_, according to the given format string _fmt_. If the argument _err_ is
-non-zero, and an error occures - it is printed in the _s_.
-
-The _tep_print_func_field()_ function prints a function field with given format
-string.  A search is performed in the _event_ for a field with _name_. If such
-field is found, its value is extracted from the _record_. The value is assumed
-to be a function address, and a search is perform to find the name of this
-function. The function name (if found) and its address are printed in the _s_,
-according to the given format string _fmt_. If the argument _err_ is non-zero,
-and an error occures - it is printed in _s_.
-
-RETURN VALUE
-------------
-The _tep_print_num_field()_ and _tep_print_func_field()_ functions return 1
-on success, -1 in case of an error or 0 if the print buffer _s_ is full.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-...
-void process_record(struct tep_record *record)
-{
-	struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
-
-	trace_seq_reset(&seq);
-
-	/* Print the value of "common_pid" */
-	tep_print_field(&seq, record->data, field_pid);
-
-	/* Print all fields of the "hrtimer_start" event */
-	tep_print_fields(&seq, record->data, record->size, event);
-
-	/* Print the value of "expires" field with custom format string */
-	tep_print_num_field(&seq, " timer expires in %llu ", event, "expires", record, 0);
-
-	/* Print the address and the name of "function" field with custom format string */
-	tep_print_func_field(&seq, " timer function is %s ", event, "function", record, 0);
- }
- ...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences related APIs.
-	Trace sequences are used to allow a function to call several other functions
-	to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt b/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
deleted file mode 100644
index 64e9e25d3fd9a..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-field_read.txt
+++ /dev/null
@@ -1,81 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_read_number_field - Reads a number from raw data.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
---
-
-DESCRIPTION
------------
-The _tep_read_number_field()_ function reads the value of the _field_ from the
-raw _data_ and stores it in the _value_. The function sets the _value_ according
-to the endianness of the raw data and the current machine and stores it in
-_value_.
-
-RETURN VALUE
-------------
-The _tep_read_number_field()_ function retunrs 0 in case of success, or -1 in
-case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_event *event = tep_find_event_by_name(tep, "timer", "hrtimer_start");
-...
-void process_record(struct tep_record *record)
-{
-	unsigned long long pid;
-	struct tep_format_field *field_pid = tep_find_common_field(event, "common_pid");
-
-	if (tep_read_number_field(field_pid, record->data, &pid) != 0) {
-		/* Failed to get "common_pid" value */
-	}
-}
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt b/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
deleted file mode 100644
index 1ccb531d5114f..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-fields.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_event_common_fields, tep_event_fields - Get a list of fields for an event.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
-struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
---
-
-DESCRIPTION
------------
-The _tep_event_common_fields()_ function returns an array of pointers to common
-fields for the _event_. The array is allocated in the function and must be freed
-by free(). The last element of the array is NULL.
-
-The _tep_event_fields()_ function returns an array of pointers to event specific
-fields for the _event_. The array is allocated in the function and must be freed
-by free(). The last element of the array is NULL.
-
-RETURN VALUE
-------------
-Both _tep_event_common_fields()_ and _tep_event_fields()_ functions return
-an array of pointers to tep_format_field structures in case of success, or
-NULL in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int i;
-struct tep_format_field **fields;
-struct tep_event *event = tep_find_event_by_name(tep, "kvm", "kvm_exit");
-if (event != NULL) {
-	fields = tep_event_common_fields(event);
-	if (fields != NULL) {
-		i = 0;
-		while (fields[i]) {
-			/*
-			  walk through the list of the common fields
-			  of the kvm_exit event
-			*/
-			i++;
-		}
-		free(fields);
-	}
-	fields = tep_event_fields(event);
-	if (fields != NULL) {
-		i = 0;
-		while (fields[i]) {
-			/*
-			  walk through the list of the event specific
-			  fields of the kvm_exit event
-			*/
-			i++;
-		}
-		free(fields);
-	}
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
deleted file mode 100644
index f401ad311047b..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-file_endian.txt
+++ /dev/null
@@ -1,91 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_is_file_bigendian, tep_set_file_bigendian - Get / set the endianness of the
-raw data being accessed by the tep handler.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_endian* {
-	TEP_LITTLE_ENDIAN = 0,
-	TEP_BIG_ENDIAN
-};
-
-bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
-void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
-
---
-DESCRIPTION
------------
-The _tep_is_file_bigendian()_ function gets the endianness of the raw data,
-being accessed by the tep handler. The _tep_ argument is trace event parser
-context.
-
-The _tep_set_file_bigendian()_ function sets the endianness of raw data being
-accessed by the tep handler. The _tep_ argument is trace event parser context.
-[verse]
---
-The _endian_ argument is the endianness:
-	_TEP_LITTLE_ENDIAN_ - the raw data is in little endian format,
-	_TEP_BIG_ENDIAN_ - the raw data is in big endian format.
---
-RETURN VALUE
-------------
-The _tep_is_file_bigendian()_ function returns true if the data is in bigendian
-format, false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-	tep_set_file_bigendian(tep, TEP_LITTLE_ENDIAN);
-...
-	if (tep_is_file_bigendian(tep)) {
-		/* The raw data is in big endian */
-	} else {
-		/* The raw data is in little endian */
-	}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt b/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
deleted file mode 100644
index 4a9962d8cb594..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-filter.txt
+++ /dev/null
@@ -1,209 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_filter_alloc, tep_filter_free, tep_filter_reset, tep_filter_make_string,
-tep_filter_copy, tep_filter_compare, tep_filter_match, tep_event_filtered,
-tep_filter_remove_event, tep_filter_strerror, tep_filter_add_filter_str -
-Event filter related APIs.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
-void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
-void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
-enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
-int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
-int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
-int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
-char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
---
-
-DESCRIPTION
------------
-Filters can be attached to traced events. They can be used to filter out various
-events when outputting them. Each event can be filtered based on its parameters,
-described in the event's format file. This set of functions can be used to
-create, delete, modify and attach event filters.
-
-The _tep_filter_alloc()_ function creates a new event filter. The _tep_ argument
-is the trace event parser context.
-
-The _tep_filter_free()_ function frees an event filter and all resources that it
-had used.
-
-The _tep_filter_reset()_ function removes all rules from an event filter and
-resets it.
-
-The _tep_filter_add_filter_str()_ function adds a new rule to the _filter_. The
-_filter_str_ argument is the filter string, that contains the rule.
-
-The _tep_event_filtered()_ function checks if the event with _event_id_ has
-_filter_.
-
-The _tep_filter_remove_event()_ function removes a _filter_ for an event with
-_event_id_.
-
-The _tep_filter_match()_ function tests if a _record_ matches given _filter_.
-
-The _tep_filter_copy()_ function copies a _source_ filter into a _dest_ filter.
-
-The _tep_filter_compare()_ function compares two filers - _filter1_ and _filter2_.
-
-The _tep_filter_make_string()_ function constructs a string, displaying
-the _filter_ contents for given _event_id_.
-
-The _tep_filter_strerror()_ function copies the _filter_ error buffer into the
-given _buf_ with the size _buflen_. If the error buffer is empty, in the _buf_
-is copied a string, describing the error _err_.
-
-RETURN VALUE
-------------
-The _tep_filter_alloc()_ function returns a pointer to the newly created event
-filter, or NULL in case of an error.
-
-The _tep_filter_add_filter_str()_ function returns 0 if the rule was
-successfully added or a negative error code.  Use _tep_filter_strerror()_ to see
-actual error message in case of an error.
-
-The _tep_event_filtered()_ function returns 1 if the filter is found for given
-event, or 0 otherwise.
-
-The _tep_filter_remove_event()_ function returns 1 if the vent was removed, or
-0 if the event was not found.
-
-The _tep_filter_match()_ function returns _tep_errno_, according to the result:
-[verse]
---
-_pass:[TEP_ERRNO__FILTER_MATCH]_	- filter found for event, the record matches.
-_pass:[TEP_ERRNO__FILTER_MISS]_		- filter found for event, the record does not match.
-_pass:[TEP_ERRNO__FILTER_NOT_FOUND]_	- no filter found for record's event.
-_pass:[TEP_ERRNO__NO_FILTER]_		- no rules in the filter.
---
-or any other _tep_errno_, if an error occurred during the test.
-
-The _tep_filter_copy()_ function returns 0 on success or -1 if not all rules
- were copied.
-
-The _tep_filter_compare()_ function returns 1 if the two filters hold the same
-content, or 0 if they do not.
-
-The _tep_filter_make_string()_ function returns a string, which must be freed
-with free(), or NULL in case of an error.
-
-The _tep_filter_strerror()_ function returns 0 if message was filled
-successfully, or -1 in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char errstr[200];
-int ret;
-
-struct tep_event_filter *filter = tep_filter_alloc(tep);
-struct tep_event_filter *filter1 = tep_filter_alloc(tep);
-ret = tep_filter_add_filter_str(filter, "sched/sched_wakeup:target_cpu==1");
-if(ret < 0) {
-	tep_filter_strerror(filter, ret, errstr, sizeof(errstr));
-	/* Failed to add a new rule to the filter, the error string is in errstr */
-}
-if (tep_filter_copy(filter1, filter) != 0) {
-	/* Failed to copy filter in filter1 */
-}
-...
-if (tep_filter_compare(filter, filter1) != 1) {
-	/* Both filters are different */
-}
-...
-void process_record(struct tep_handle *tep, struct tep_record *record)
-{
-	struct tep_event *event;
-	char *fstring;
-
-	event = tep_find_event_by_record(tep, record);
-
-	if (tep_event_filtered(filter, event->id) == 1) {
-		/* The event has filter */
-		fstring = tep_filter_make_string(filter, event->id);
-		if (fstring != NULL) {
-			/* The filter for the event is in fstring */
-			free(fstring);
-		}
-	}
-
-	switch (tep_filter_match(filter, record)) {
-	case TEP_ERRNO__FILTER_MATCH:
-		/* The filter matches the record */
-		break;
-	case TEP_ERRNO__FILTER_MISS:
-		/* The filter does not match the record */
-		break;
-	case TEP_ERRNO__FILTER_NOT_FOUND:
-		/* No filter found for record's event */
-		break;
-	case TEP_ERRNO__NO_FILTER:
-		/* There are no rules in the filter */
-		break
-	default:
-		/* An error occurred during the test */
-		break;
-	}
-
-	if (tep_filter_remove_event(filter, event->id) == 1) {
-		/* The event was removed from the filter */
-	}
-}
-
-...
-tep_filter_reset(filter);
-...
-tep_filter_free(filter);
-tep_filter_free(filter1);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
deleted file mode 100644
index f6aca0df2151a..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-func_apis.txt
+++ /dev/null
@@ -1,183 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_function, tep_find_function_address, tep_set_function_resolver,
-tep_reset_function_resolver, tep_register_function, tep_register_print_string -
-function related tep APIs
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-typedef char pass:[*](*tep_func_resolver_t*)(void pass:[*]_priv_, unsigned long long pass:[*]_addrp_, char pass:[**]_modp_);
-int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
-void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
-const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
-int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
---
-
-DESCRIPTION
------------
-Some tools may have already a way to resolve the kernel functions. These APIs
-allow them to keep using it instead of duplicating all the entries inside.
-
-The _tep_func_resolver_t_ type is the prototype of the alternative kernel
-functions resolver. This function receives a pointer to its custom context
-(set with the _tep_set_function_resolver()_ call ) and the address of a kernel
-function, which has to be resolved. In case of success, it should return
-the name of the function and its module (if any) in _modp_.
-
-The _tep_set_function_resolver()_ function registers _func_ as an alternative
-kernel functions resolver. The _tep_ argument is trace event parser context.
-The _priv_ argument is a custom context of the _func_ function. The function
-resolver is used by the APIs _tep_find_function()_,
-_tep_find_function_address()_, and _tep_print_func_field()_ to resolve
-a function address to a function name.
-
-The _tep_reset_function_resolver()_ function resets the kernel functions
-resolver to the default function.  The _tep_ argument is trace event parser
-context.
-
-
-These APIs can be used to find function name and start address, by given
-address. The given address does not have to be exact, it will select
-the function that would contain it.
-
-The _tep_find_function()_ function returns the function name, which contains the
-given address _addr_. The _tep_ argument is the trace event parser context.
-
-The _tep_find_function_address()_ function returns the function start address,
-by given address _addr_. The _addr_ does not have to be exact, it will select
-the function that would contain it. The _tep_ argument is the trace event
-parser context.
-
-The _tep_register_function()_ function registers a function name mapped to an
-address and (optional) module. This mapping is used in case the function tracer
-or events have "%pS" parameter in its format string. It is common to pass in
-the kallsyms function names with their corresponding addresses with this
-function. The _tep_ argument is the trace event parser context. The _name_ is
-the name of the function, the string is copied internally. The _addr_ is the
-start address of the function. The _mod_ is the kernel module the function may
-be in (NULL for none).
-
-The _tep_register_print_string()_ function  registers a string by the address
-it was stored in the kernel. Some strings internal to the kernel with static
-address are passed to certain events. The "%s" in the event's format field
-which has an address needs to know what string would be at that address. The
-tep_register_print_string() supplies the parsing with the mapping between kernel
-addresses and those strings. The _tep_ argument is the trace event parser
-context. The _fmt_ is the string to register, it is copied internally.
-The _addr_ is the address the string was located at.
-
-
-RETURN VALUE
-------------
-The _tep_set_function_resolver()_ function returns 0 in case of success, or -1
-in case of an error.
-
-The _tep_find_function()_ function returns the function name, or NULL in case
-it cannot be found.
-
-The _tep_find_function_address()_ function returns the function start address,
-or 0 in case it cannot be found.
-
-The _tep_register_function()_ function returns 0 in case of success. In case of
-an error -1 is returned, and errno is set to the appropriate error number.
-
-The _tep_register_print_string()_ function returns 0 in case of success. In case
-of an error -1 is returned, and errno is set to the appropriate error number.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *my_resolve_kernel_addr(void *context,
-			     unsigned long long *addrp, char **modp)
-{
-	struct db *function_database = context;
-	struct symbol *sym = sql_lookup(function_database, *addrp);
-
-	if (!sym)
-		return NULL;
-
-	*modp = sym->module_name;
-	return sym->name;
-}
-
-void show_function( unsigned long long addr)
-{
-	unsigned long long fstart;
-	const char *fname;
-
-	if (tep_set_function_resolver(tep, my_resolve_kernel_addr,
-				      function_database) != 0) {
-		/* failed to register my_resolve_kernel_addr */
-	}
-
-	/* These APIs use my_resolve_kernel_addr() to resolve the addr */
-	fname = tep_find_function(tep, addr);
-	fstart = tep_find_function_address(tep, addr);
-
-	/*
-	   addr is in function named fname, starting at fstart address,
-	   at offset (addr - fstart)
-	*/
-
-	tep_reset_function_resolver(tep);
-
-}
-...
-	if (tep_register_function(tep, "kvm_exit",
-				(unsigned long long) 0x12345678, "kvm") != 0) {
-		/* Failed to register kvm_exit address mapping */
-	}
-...
-	if (tep_register_print_string(tep, "print string",
-				(unsigned long long) 0x87654321, NULL) != 0) {
-		/* Failed to register "print string" address mapping */
-	}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt b/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
deleted file mode 100644
index 04840e244445d..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-func_find.txt
+++ /dev/null
@@ -1,88 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_find_function,tep_find_function_address - Find function name / start address.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
---
-
-DESCRIPTION
------------
-These functions can be used to find function name and start address, by given
-address. The given address does not have to be exact, it will select the function
-that would contain it.
-
-The _tep_find_function()_ function returns the function name, which contains the
-given address _addr_. The _tep_ argument is the trace event parser context.
-
-The _tep_find_function_address()_ function returns the function start address,
-by given address _addr_. The _addr_ does not have to be exact, it will select the
-function that would contain it. The _tep_ argument is the trace event parser context.
-
-RETURN VALUE
-------------
-The _tep_find_function()_ function returns the function name, or NULL in case
-it cannot be found.
-
-The _tep_find_function_address()_ function returns the function start address,
-or 0 in case it cannot be found.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void show_function( unsigned long long addr)
-{
-	const char *fname = tep_find_function(tep, addr);
-	unsigned long long fstart = tep_find_function_address(tep, addr);
-
-	/* addr is in function named fname, starting at fstart address, at offset (addr - fstart) */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt b/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
deleted file mode 100644
index 45b20172e2622..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-handle.txt
+++ /dev/null
@@ -1,101 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_alloc, tep_free,tep_ref, tep_unref,tep_get_ref - Create, destroy, manage
-references of trace event parser context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_handle pass:[*]*tep_alloc*(void);
-void *tep_free*(struct tep_handle pass:[*]_tep_);
-void *tep_ref*(struct tep_handle pass:[*]_tep_);
-void *tep_unref*(struct tep_handle pass:[*]_tep_);
-int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
---
-
-DESCRIPTION
------------
-These are the main functions to create and destroy tep_handle - the main
-structure, representing the trace event parser context. This context is used as
-the input parameter of most library APIs.
-
-The _tep_alloc()_ function allocates and initializes the tep context.
-
-The _tep_free()_ function will decrement the reference of the _tep_ handler.
-When there is no more references, then it will free the handler, as well
-as clean up all its resources that it had used. The argument _tep_ is
-the pointer to the trace event parser context.
-
-The _tep_ref()_ function adds a reference to the _tep_ handler.
-
-The _tep_unref()_ function removes a reference from the _tep_ handler. When
-the last reference is removed, the _tep_ is destroyed, and all resources that
-it had used are cleaned up.
-
-The _tep_ref_get()_ functions gets the current references of the _tep_ handler.
-
-RETURN VALUE
-------------
-_tep_alloc()_ returns a pointer to a newly created tep_handle structure.
-NULL is returned in case there is not enough free memory to allocate it.
-
-_tep_ref_get()_ returns the current references of _tep_.
-If _tep_ is NULL, 0 is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-
-...
-struct tep_handle *tep = tep_alloc();
-...
-int ref = tep_get_ref(tep);
-tep_ref(tep);
-if ( (ref+1) != tep_get_ref(tep)) {
-	/* Something wrong happened, the counter is not incremented by 1 */
-}
-tep_unref(tep);
-...
-tep_free(tep);
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt b/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
deleted file mode 100644
index 615d117dc39fd..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-header_page.txt
+++ /dev/null
@@ -1,102 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_header_page_size, tep_get_header_timestamp_size, tep_is_old_format -
-Get the data stored in the header page, in kernel context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
-int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
-bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
---
-DESCRIPTION
------------
-These functions retrieve information from kernel context, stored in tracefs
-events/header_page. Old kernels do not have header page info, so default values
-from user space context are used.
-
-The _tep_get_header_page_size()_ function returns the size of a long integer,
-in kernel context. The _tep_ argument is trace event parser context.
-This information is retrieved from tracefs events/header_page, "commit" field.
-
-The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
-in kernel context. The _tep_ argument is trace event parser context. This
-information is retrieved from tracefs events/header_page, "timestamp" field.
-
-The _tep_is_old_format()_ function returns true if the kernel predates
-the addition of events/header_page, otherwise it returns false.
-
-RETURN VALUE
-------------
-The _tep_get_header_page_size()_ function returns the size of a long integer,
-in bytes.
-
-The _tep_get_header_timestamp_size()_ function returns the size of timestamps,
-in bytes.
-
-The _tep_is_old_format()_ function returns true, if an old kernel is used to
-generate the tracing data, which has no event/header_page. If the kernel is new,
-or _tep_ is NULL, false is returned.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-	int longsize;
-	int timesize;
-	bool old;
-
-	longsize = tep_get_header_page_size(tep);
-	timesize = tep_get_header_timestamp_size(tep);
-	old = tep_is_old_format(tep);
-
-	printf ("%s kernel is used to generate the tracing data.\n",
-		old?"Old":"New");
-	printf("The size of a long integer is %d bytes.\n", longsize);
-	printf("The timestamps size is %d bytes.\n", timesize);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt b/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
deleted file mode 100644
index d5d375eb8d1e9..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-host_endian.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_is_bigendian, tep_is_local_bigendian, tep_set_local_bigendian - Get / set
-the endianness of the local machine.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_endian* {
-	TEP_LITTLE_ENDIAN = 0,
-	TEP_BIG_ENDIAN
-};
-
-int *tep_is_bigendian*(void);
-bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
-void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
---
-
-DESCRIPTION
------------
-
-The _tep_is_bigendian()_ gets the endianness of the machine, executing
-the function.
-
-The _tep_is_local_bigendian()_ function gets the endianness of the local
-machine, saved in the _tep_ handler. The _tep_ argument is the trace event
-parser context. This API is a bit faster than _tep_is_bigendian()_, as it
-returns cached endianness of the local machine instead of checking it each time.
-
-The _tep_set_local_bigendian()_ function sets the endianness of the local
-machine in the _tep_ handler. The _tep_ argument is trace event parser context.
-The _endian_ argument is the endianness:
-[verse]
---
-	_TEP_LITTLE_ENDIAN_ - the machine is little endian,
-	_TEP_BIG_ENDIAN_ - the machine is big endian.
---
-
-RETURN VALUE
-------------
-The _tep_is_bigendian()_ function returns non zero if the endianness of the
-machine, executing the code, is big endian and zero otherwise.
-
-The _tep_is_local_bigendian()_ function returns true, if the endianness of the
-local machine, saved in the _tep_ handler, is big endian, or false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-	if (tep_is_bigendian())
-		tep_set_local_bigendian(tep, TEP_BIG_ENDIAN);
-	else
-		tep_set_local_bigendian(tep, TEP_LITTLE_ENDIAN);
-...
-	if (tep_is_local_bigendian(tep))
-		printf("This machine you are running on is bigendian\n");
-	else
-		printf("This machine you are running on is little endian\n");
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
deleted file mode 100644
index 01d78ea2519aa..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-long_size.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_long_size, tep_set_long_size - Get / set the size of a long integer on
-the machine, where the trace is generated, in bytes
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
-void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
---
-
-DESCRIPTION
------------
-The _tep_get_long_size()_ function returns the size of a long integer on the machine,
-where the trace is generated. The _tep_ argument is trace event parser context.
-
-The _tep_set_long_size()_ function sets the size of a long integer on the machine,
-where the trace is generated. The _tep_ argument is trace event parser context.
-The _long_size_ is the size of a long integer, in bytes.
-
-RETURN VALUE
-------------
-The _tep_get_long_size()_ function returns the size of a long integer on the machine,
-where the trace is generated, in bytes.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-tep_set_long_size(tep, 4);
-...
-int long_size = tep_get_long_size(tep);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt b/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
deleted file mode 100644
index 452c0cfa1822c..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-page_size.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_get_page_size, tep_set_page_size - Get / set the size of a memory page on
-the machine, where the trace is generated
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
-void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
---
-
-DESCRIPTION
------------
-The _tep_get_page_size()_ function returns the size of a memory page on
-the machine, where the trace is generated. The _tep_ argument is trace
-event parser context.
-
-The _tep_set_page_size()_ function stores in the _tep_ context the size of a
-memory page on the machine, where the trace is generated.
-The _tep_ argument is trace event parser context.
-The _page_size_ argument is the size of a memory page, in bytes.
-
-RETURN VALUE
-------------
-The _tep_get_page_size()_ function returns size of the memory page, in bytes.
-
-EXAMPLE
--------
-[source,c]
---
-#include <unistd.h>
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-	int page_size = getpagesize();
-
-	tep_set_page_size(tep, page_size);
-
-	printf("The page size for this machine is %d\n", tep_get_page_size(tep));
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
deleted file mode 100644
index f248114ca1ff2..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-parse_event.txt
+++ /dev/null
@@ -1,90 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_parse_event, tep_parse_format - Parse the event format information
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
-enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
---
-
-DESCRIPTION
------------
-The _tep_parse_event()_ function parses the event format and creates an event
-structure to quickly parse raw data for a given event. The _tep_ argument is
-the trace event parser context. The created event structure is stored in the
-_tep_ context. The _buf_ argument is a buffer with _size_, where the event
-format data is. The event format data can be taken from
-tracefs/events/.../.../format files. The _sys_ argument is the system of
-the event.
-
-The _tep_parse_format()_ function does the same as _tep_parse_event()_. The only
-difference is in the extra _eventp_ argument, where the newly created event
-structure is returned.
-
-RETURN VALUE
-------------
-Both _tep_parse_event()_ and _tep_parse_format()_ functions return 0 on success,
-or TEP_ERRNO__... in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *buf;
-int size;
-struct tep_event *event = NULL;
-buf = read_file("/sys/kernel/tracing/events/ftrace/print/format", &size);
-if (tep_parse_event(tep, buf, size, "ftrace") != 0) {
-	/* Failed to parse the ftrace print format */
-}
-
-if (tep_parse_format(tep, &event, buf, size, "ftrace") != 0) {
-	/* Failed to parse the ftrace print format */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt b/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
deleted file mode 100644
index c90f16c7d8e6b..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-parse_head.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_parse_header_page - Parses the data stored in the header page.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
---
-
-DESCRIPTION
------------
-The _tep_parse_header_page()_ function parses the header page data from _buf_,
-and initializes the _tep_, trace event parser context, with it. The buffer
-_buf_ is with _size_, and is supposed to be copied from
-tracefs/events/header_page.
-
-Some old kernels do not have header page info, in this case the
-_tep_parse_header_page()_ function  can be called with _size_ equal to 0. The
-_tep_ context is initialized with default values. The _long_size_ can be used in
-this use case, to set the size of a long integer to be used.
-
-RETURN VALUE
-------------
-The _tep_parse_header_page()_ function returns 0 in case of success, or -1
-in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char *buf;
-int size;
-buf = read_file("/sys/kernel/tracing/events/header_page", &size);
-if (tep_parse_header_page(tep, buf, size, sizeof(unsigned long)) != 0) {
-	/* Failed to parse the header page */
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt b/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
deleted file mode 100644
index 4d6394397d92e..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-plugins.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_load_plugins, tep_unload_plugins, tep_load_plugins_hook - Load / unload traceevent plugins.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
-void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
-void *tep_load_plugins_hook*(struct tep_handle pass:[*]_tep_, const char pass:[*]_suffix_,
-			   void (pass:[*]_load_plugin_)(struct tep_handle pass:[*]tep,
-					       const char pass:[*]path,
-					       const char pass:[*]name,
-					       void pass:[*]data),
-			   void pass:[*]_data_);
---
-
-DESCRIPTION
------------
-The _tep_load_plugins()_ function loads all plugins, located in the plugin
-directories. The _tep_ argument is trace event parser context.
-The plugin directories are :
-[verse]
---
-	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_FIRST
-	- System's plugin directory, defined at the library compile time. It
-	  depends on the library installation prefix and usually is
-	  _(install_preffix)/lib/traceevent/plugins_
-	- Directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
-	- User's plugin directory, located at _~/.local/lib/traceevent/plugins_
-	- Directories, specified in _tep_->plugins_dir with priority TEP_PLUGIN_LAST
---
-Loading of plugins can be controlled by the _tep_flags_, using the
-_tep_set_flag()_ API:
-[verse]
---
-	_TEP_DISABLE_SYS_PLUGINS_	- do not load plugins, located in
-					the system's plugin directory.
-	_TEP_DISABLE_PLUGINS_		- do not load any plugins.
---
-The _tep_set_flag()_ API needs to be called before _tep_load_plugins()_, if
-loading of all plugins is not the desired case.
-
-The _tep_unload_plugins()_ function unloads the plugins, previously loaded by
-_tep_load_plugins()_. The _tep_ argument is trace event parser context. The
-_plugin_list_ is the list of loaded plugins, returned by
-the _tep_load_plugins()_ function.
-
-The _tep_load_plugins_hook_ function walks through all directories with plugins
-and calls user specified _load_plugin()_ hook for each plugin file. Only files
-with given _suffix_ are considered to be plugins. The _data_ is a user specified
-context, passed to _load_plugin()_. Directories and the walk order are the same
-as in _tep_load_plugins()_ API.
-
-RETURN VALUE
-------------
-The _tep_load_plugins()_ function returns a list of successfully loaded plugins,
-or NULL in case no plugins are loaded.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-struct tep_plugin_list *plugins = tep_load_plugins(tep);
-if (plugins == NULL) {
-	/* no plugins are loaded */
-}
-...
-tep_unload_plugins(plugins, tep);
-...
-void print_plugin(struct tep_handle *tep, const char *path,
-		  const char *name, void *data)
-{
-	pritnf("Found libtraceevent plugin %s/%s\n", path, name);
-}
-...
-tep_load_plugins_hook(tep, ".so", print_plugin, NULL);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_, _tep_set_flag(3)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt b/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
deleted file mode 100644
index e9a69116c78b7..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-record_parse.txt
+++ /dev/null
@@ -1,137 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_data_type, tep_data_pid,tep_data_preempt_count, tep_data_flags -
-Extract common fields from a record.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *trace_flag_type* {
-	_TRACE_FLAG_IRQS_OFF_,
-	_TRACE_FLAG_IRQS_NOSUPPORT_,
-	_TRACE_FLAG_NEED_RESCHED_,
-	_TRACE_FLAG_HARDIRQ_,
-	_TRACE_FLAG_SOFTIRQ_,
-};
-
-int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
---
-
-DESCRIPTION
------------
-This set of functions can be used to extract common fields from a record.
-
-The _tep_data_type()_ function gets the event id from the record _rec_.
-It reads the "common_type" field. The _tep_ argument is the trace event parser
-context.
-
-The _tep_data_pid()_ function gets the process id from the record _rec_.
-It reads the "common_pid" field. The _tep_ argument is the trace event parser
-context.
-
-The _tep_data_preempt_count()_ function gets the preemption count from the
-record _rec_. It reads the "common_preempt_count" field. The _tep_ argument is
-the trace event parser context.
-
-The _tep_data_flags()_ function gets the latency flags from the record _rec_.
-It reads the "common_flags" field. The _tep_ argument is the trace event parser
-context. Supported latency flags are:
-[verse]
---
-	_TRACE_FLAG_IRQS_OFF_,		Interrupts are disabled.
-	_TRACE_FLAG_IRQS_NOSUPPORT_,	Reading IRQ flag is not supported by the architecture.
-	_TRACE_FLAG_NEED_RESCHED_,	Task needs rescheduling.
-	_TRACE_FLAG_HARDIRQ_,		Hard IRQ is running.
-	_TRACE_FLAG_SOFTIRQ_,		Soft IRQ is running.
---
-
-RETURN VALUE
-------------
-The _tep_data_type()_ function returns an integer, representing the event id.
-
-The _tep_data_pid()_ function returns an integer, representing the process id
-
-The _tep_data_preempt_count()_ function returns an integer, representing the
-preemption count.
-
-The _tep_data_flags()_ function returns an integer, representing the latency
-flags. Look at the _trace_flag_type_ enum for supported flags.
-
-All these functions in case of an error return a negative integer.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-void process_record(struct tep_record *record)
-{
-	int data;
-
-	data = tep_data_type(tep, record);
-	if (data >= 0) {
-		/* Got the ID of the event */
-	}
-
-	data = tep_data_pid(tep, record);
-	if (data >= 0) {
-		/* Got the process ID */
-	}
-
-	data = tep_data_preempt_count(tep, record);
-	if (data >= 0) {
-		/* Got the preemption count */
-	}
-
-	data = tep_data_flags(tep, record);
-	if (data >= 0) {
-		/* Got the latency flags */
-	}
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
deleted file mode 100644
index 53d37d72a1c1f..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-reg_event_handler.txt
+++ /dev/null
@@ -1,156 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_event_handler, tep_unregister_event_handler -  Register /
-unregisters a callback function to parse an event information.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_reg_handler* {
-	_TEP_REGISTER_SUCCESS_,
-	_TEP_REGISTER_SUCCESS_OVERWRITE_,
-};
-
-int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
-int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
-
-typedef int (*pass:[*]tep_event_handler_func*)(struct trace_seq pass:[*]s, struct tep_record pass:[*]record, struct tep_event pass:[*]event, void pass:[*]context);
---
-
-DESCRIPTION
------------
-The _tep_register_event_handler()_ function registers a handler function,
-which is going to be called to parse the information for a given event.
-The _tep_ argument is the trace event parser context. The _id_ argument is
-the id of the event. The _sys_name_ argument is the name of the system,
-the event belongs to. The _event_name_ argument is the name of the event.
-If _id_ is >= 0, it is used to find the event, otherwise _sys_name_ and
-_event_name_ are used. The _func_ is a pointer to the function, which is going
-to be called to parse the event information. The _context_ argument is a pointer
-to the context data, which will be passed to the _func_. If a handler function
-for the same event is already registered, it will be overridden with the new
-one. This mechanism allows a developer to override the parsing of a given event.
-If for some reason the default print format is not sufficient, the developer
-can register a function for an event to be used to parse the data instead.
-
-The _tep_unregister_event_handler()_ function unregisters the handler function,
-previously registered with _tep_register_event_handler()_. The _tep_ argument
-is the trace event parser context. The _id_, _sys_name_, _event_name_, _func_,
-and _context_ are the same arguments, as when the callback function _func_ was
-registered.
-
-The _tep_event_handler_func_ is the type of the custom event handler
-function. The _s_ argument is the trace sequence, it can be used to create a
-custom string, describing the event. A _record_  to get the event from is passed
-as input parameter and also the _event_ - the handle to the record's event. The
-_context_ is custom context, set when the custom event handler is registered.
-
-RETURN VALUE
-------------
-The _tep_register_event_handler()_ function returns _TEP_REGISTER_SUCCESS_
-if the new handler is registered successfully or
-_TEP_REGISTER_SUCCESS_OVERWRITE_ if an existing handler is overwritten.
-If there is not  enough memory to complete the registration,
-TEP_ERRNO__MEM_ALLOC_FAILED is returned.
-
-The _tep_unregister_event_handler()_ function returns 0 if _func_ was removed
-successful or, -1 if the event was not found.
-
-The _tep_event_handler_func_ should return -1 in case of an error,
-or 0 otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-int timer_expire_handler(struct trace_seq *s, struct tep_record *record,
-			 struct tep_event *event, void *context)
-{
-	trace_seq_printf(s, "hrtimer=");
-
-	if (tep_print_num_field(s, "0x%llx", event, "timer", record, 0) == -1)
-		tep_print_num_field(s, "0x%llx", event, "hrtimer", record, 1);
-
-	trace_seq_printf(s, " now=");
-
-	tep_print_num_field(s, "%llu", event, "now", record, 1);
-
-	tep_print_func_field(s, " function=%s", event, "function", record, 0);
-
-	return 0;
-}
-...
-	int ret;
-
-	ret = tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
-					 timer_expire_handler, NULL);
-	if (ret < 0) {
-		char buf[32];
-
-		tep_strerror(tep, ret, buf, 32)
-		printf("Failed to register handler for hrtimer_expire_entry: %s\n", buf);
-	} else {
-		switch (ret) {
-		case TEP_REGISTER_SUCCESS:
-			printf ("Registered handler for hrtimer_expire_entry\n");
-			break;
-		case TEP_REGISTER_SUCCESS_OVERWRITE:
-			printf ("Overwrote handler for hrtimer_expire_entry\n");
-			break;
-		}
-	}
-...
-	ret = tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry",
-					   timer_expire_handler, NULL);
-	if ( ret )
-		printf ("Failed to unregister handler for hrtimer_expire_entry\n");
-
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences
-	related APIs. Trace sequences are used to allow a function to call
-	several other functions to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt b/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
deleted file mode 100644
index 708dce91ebd86..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-reg_print_func.txt
+++ /dev/null
@@ -1,155 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_register_print_function,tep_unregister_print_function -
-Registers / Unregisters a helper function.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_func_arg_type* {
-	TEP_FUNC_ARG_VOID,
-	TEP_FUNC_ARG_INT,
-	TEP_FUNC_ARG_LONG,
-	TEP_FUNC_ARG_STRING,
-	TEP_FUNC_ARG_PTR,
-	TEP_FUNC_ARG_MAX_TYPES
-};
-
-typedef unsigned long long (*pass:[*]tep_func_handler*)(struct trace_seq pass:[*]s, unsigned long long pass:[*]args);
-
-int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
-int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
---
-
-DESCRIPTION
------------
-Some events may have helper functions in the print format arguments.
-This allows a plugin to dynamically create a way to process one of
-these functions.
-
-The _tep_register_print_function()_ registers such helper function. The _tep_
-argument is the trace event parser context. The _func_ argument  is a pointer
-to the helper function. The _ret_type_ argument is  the return type of the
-helper function, value from the _tep_func_arg_type_ enum. The _name_ is the name
-of the helper function, as seen in the print format arguments. The _..._ is a
-variable list of _tep_func_arg_type_ enums, the _func_ function arguments.
-This list must end with _TEP_FUNC_ARG_VOID_. See 'EXAMPLE' section.
-
-The _tep_unregister_print_function()_ unregisters a helper function, previously
-registered with _tep_register_print_function()_. The _tep_ argument is the
-trace event parser context. The _func_ and _name_ arguments are the same, used
-when the helper function was registered.
-
-The _tep_func_handler_ is the type of the helper function. The _s_ argument is
-the trace sequence, it can be used to create a custom string.
-The _args_  is a list of arguments, defined when the helper function was
-registered.
-
-RETURN VALUE
-------------
-The _tep_register_print_function()_ function returns 0 in case of success.
-In case of an error, TEP_ERRNO_... code is returned.
-
-The _tep_unregister_print_function()_ returns 0 in case of success, or -1 in
-case of an error.
-
-EXAMPLE
--------
-Some events have internal functions calls, that appear in the print format
-output. For example "tracefs/events/i915/g4x_wm/format" has:
-[source,c]
---
-print fmt: "pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
-	    ((REC->pipe) + 'A'), REC->frame, REC->scanline, REC->primary,
-	    REC->sprite, REC->cursor, yesno(REC->cxsr), REC->sr_plane,
-	    REC->sr_cursor, REC->sr_fbc, yesno(REC->hpll), REC->hpll_plane,
-	    REC->hpll_cursor, REC->hpll_fbc, yesno(REC->fbc)
---
-Notice the call to function _yesno()_ in the print arguments. In the kernel
-context, this function has the following implementation:
-[source,c]
---
-static const char *yesno(int x)
-{
-	static const char *yes = "yes";
-	static const char *no = "no";
-
-	return x ? yes : no;
-}
---
-The user space event parser has no idea how to handle this _yesno()_ function.
-The _tep_register_print_function()_ API can be used to register a user space
-helper function, mapped to the kernel's _yesno()_:
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-static const char *yes_no_helper(int x)
-{
-	return x ? "yes" : "no";
-}
-...
-	if ( tep_register_print_function(tep,
-				    yes_no_helper,
-				    TEP_FUNC_ARG_STRING,
-				    "yesno",
-				    TEP_FUNC_ARG_INT,
-				    TEP_FUNC_ARG_VOID) != 0) {
-		/* Failed to register yes_no_helper function */
-	}
-
-/*
-   Now, when the event parser encounters this yesno() function, it will know
-   how to handle it.
-*/
-...
-	if (tep_unregister_print_function(tep, yes_no_helper, "yesno") != 0) {
-		/* Failed to unregister yes_no_helper function */
-	}
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences
-	related APIs. Trace sequences are used to allow a function to call
-	several other functions to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt b/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
deleted file mode 100644
index b0599780b9a61..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-set_flag.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_set_flag, tep_clear_flag, tep_test_flag -
-Manage flags of trace event parser context.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-enum *tep_flag* {
-	_TEP_NSEC_OUTPUT_,
-	_TEP_DISABLE_SYS_PLUGINS_,
-	_TEP_DISABLE_PLUGINS_
-};
-void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
---
-
-DESCRIPTION
------------
-Trace event parser context flags are defined in *enum tep_flag*:
-[verse]
---
-_TEP_NSEC_OUTPUT_ - print event's timestamp in nano seconds, instead of micro seconds.
-_TEP_DISABLE_SYS_PLUGINS_ - disable plugins, located in system's plugin
-			directory. This directory is defined at library compile
-			time, and usually depends on library installation
-			prefix: (install_preffix)/lib/traceevent/plugins
-_TEP_DISABLE_PLUGINS_ - disable all library plugins:
-			- in system's plugin directory
-			- in directory, defined by the environment variable _TRACEEVENT_PLUGIN_DIR_
-			- in user's home directory, _~/.traceevent/plugins_
---
-Note: plugin related flags must me set before calling _tep_load_plugins()_ API.
-
-The _tep_set_flag()_ function sets _flag_ to _tep_ context.
-
-The _tep_clear_flag()_ function clears _flag_ from _tep_ context.
-
-The _tep_test_flag()_ function tests if _flag_ is set to _tep_ context.
-
-RETURN VALUE
-------------
-_tep_test_flag()_ function returns true if _flag_ is set, false otherwise.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-/* Print timestamps in nanoseconds */
-tep_set_flag(tep,  TEP_NSEC_OUTPUT);
-...
-if (tep_test_flag(tep, TEP_NSEC_OUTPUT)) {
-	/* print timestamps in nanoseconds */
-} else {
-	/* print timestamps in microseconds */
-}
-...
-/* Print timestamps in microseconds */
-tep_clear_flag(tep, TEP_NSEC_OUTPUT);
-...
---
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt b/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
deleted file mode 100644
index ee4062a00c9f9..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-strerror.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-tep_strerror - Returns a string describing regular errno and tep error number.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
-
---
-DESCRIPTION
------------
-The _tep_strerror()_ function converts tep error number into a human
-readable string.
-The _tep_ argument is trace event parser context. The _errnum_ is a regular
-errno, defined in errno.h, or a tep error number. The string, describing this
-error number is copied in the _buf_ argument. The _buflen_ argument is
-the size of the _buf_.
-
-It as a thread safe wrapper around strerror_r(). The library function has two
-different behaviors - POSIX and GNU specific. The _tep_strerror()_ API always
-behaves as the POSIX version - the error string is copied in the user supplied
-buffer.
-
-RETURN VALUE
-------------
-The _tep_strerror()_ function returns 0, if a valid _errnum_ is passed and the
-string is copied into _buf_. If _errnum_ is not a valid error number,
--1 is returned and _buf_ is not modified.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-...
-struct tep_handle *tep = tep_alloc();
-...
-char buf[32];
-char *pool = calloc(1, 128);
-if (tep == NULL) {
-	tep_strerror(tep, TEP_ERRNO__MEM_ALLOC_FAILED, buf, 32);
-	printf ("The pool is not initialized, %s", buf);
-}
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt b/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
deleted file mode 100644
index 8ac6aa174e12a..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent-tseq.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-trace_seq_init, trace_seq_destroy, trace_seq_reset, trace_seq_terminate,
-trace_seq_putc, trace_seq_puts, trace_seq_printf, trace_seq_vprintf,
-trace_seq_do_fprintf, trace_seq_do_printf -
-Initialize / destroy a trace sequence.
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-*#include <trace-seq.h>*
-
-void *trace_seq_init*(struct trace_seq pass:[*]_s_);
-void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
-void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
-void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
-int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
-int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
-int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, _..._);
-int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
-int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
-int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
---
-
-DESCRIPTION
------------
-Trace sequences are used to allow a function to call several other functions
-to create a string of data to use.
-
-The _trace_seq_init()_ function initializes the trace sequence _s_.
-
-The _trace_seq_destroy()_ function destroys the trace sequence _s_ and frees
-all its resources that it had used.
-
-The _trace_seq_reset()_ function re-initializes the trace sequence _s_. All
-characters already written in _s_ will be deleted.
-
-The _trace_seq_terminate()_ function terminates the trace sequence _s_. It puts
-the null character pass:['\0'] at the end of the buffer.
-
-The _trace_seq_putc()_ function puts a single character _c_ in the trace
-sequence _s_.
-
-The _trace_seq_puts()_ function puts a NULL terminated string _str_ in the
-trace sequence _s_.
-
-The _trace_seq_printf()_ function puts a formated string _fmt _with
-variable arguments _..._ in the trace sequence _s_.
-
-The _trace_seq_vprintf()_ function puts a formated string _fmt _with
-list of arguments _args_ in the trace sequence _s_.
-
-The _trace_seq_do_printf()_ function prints the buffer of trace sequence _s_ to
-the standard output stdout.
-
-The _trace_seq_do_fprintf()_ function prints the buffer of trace sequence _s_
-to the given file _fp_.
-
-RETURN VALUE
-------------
-Both _trace_seq_putc()_ and _trace_seq_puts()_ functions return the number of
-characters put in the trace sequence, or 0 in case of an error
-
-Both _trace_seq_printf()_ and _trace_seq_vprintf()_ functions return 0 if the
-trace oversizes the buffer's free space, the number of characters printed, or
-a negative value in case of an error.
-
-Both _trace_seq_do_printf()_ and _trace_seq_do_fprintf()_ functions return the
-number of printed characters, or -1 in case of an error.
-
-EXAMPLE
--------
-[source,c]
---
-#include <event-parse.h>
-#include <trace-seq.h>
-...
-struct trace_seq seq;
-trace_seq_init(&seq);
-...
-void foo_seq_print(struct trace_seq *tseq, char *format, ...)
-{
-	va_list ap;
-	va_start(ap, format);
-	if (trace_seq_vprintf(tseq, format, ap) <= 0) {
-		/* Failed to print in the trace sequence */
-	}
-	va_end(ap);
-}
-
-trace_seq_reset(&seq);
-
-char *str = " MAN page example";
-if (trace_seq_puts(&seq, str) != strlen(str)) {
-	/* Failed to put str in the trace sequence */
-}
-if (trace_seq_putc(&seq, ':') != 1) {
-	/* Failed to put ':' in the trace sequence */
-}
-if (trace_seq_printf(&seq, " trace sequence: %d", 1) <= 0) {
-	/* Failed to print in the trace sequence */
-}
-foo_seq_print( &seq, "  %d\n", 2);
-
-trace_seq_terminate(&seq);
-...
-
-if (trace_seq_do_printf(&seq) < 0 ) {
-	/* Failed to print the sequence buffer to the standard output */
-}
-FILE *fp = fopen("trace.txt", "w");
-if (trace_seq_do_fprintf(&seq, fp) < 0 ) [
-	/* Failed to print the sequence buffer to the trace.txt file */
-}
-
-trace_seq_destroy(&seq);
-...
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences related APIs.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_libtraceevent(3)_, _trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/libtraceevent.txt b/tools/lib/traceevent/Documentation/libtraceevent.txt
deleted file mode 100644
index d530a7ce8fb23..0000000000000
--- a/tools/lib/traceevent/Documentation/libtraceevent.txt
+++ /dev/null
@@ -1,192 +0,0 @@
-libtraceevent(3)
-================
-
-NAME
-----
-libtraceevent - Linux kernel trace event library
-
-SYNOPSIS
---------
-[verse]
---
-*#include <event-parse.h>*
-
-Management of tep handler data structure and access of its members:
-	struct tep_handle pass:[*]*tep_alloc*(void);
-	void *tep_free*(struct tep_handle pass:[*]_tep_);
-	void *tep_ref*(struct tep_handle pass:[*]_tep_);
-	void *tep_unref*(struct tep_handle pass:[*]_tep_);
-	int *tep_get_ref*(struct tep_handle pass:[*]_tep_);
-	void *tep_set_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-	void *tep_clear_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flag_);
-	bool *tep_test_flag*(struct tep_handle pass:[*]_tep_, enum tep_flag _flags_);
-	int *tep_get_cpus*(struct tep_handle pass:[*]_tep_);
-	void *tep_set_cpus*(struct tep_handle pass:[*]_tep_, int _cpus_);
-	int *tep_get_long_size*(strucqt tep_handle pass:[*]_tep_);
-	void *tep_set_long_size*(struct tep_handle pass:[*]_tep_, int _long_size_);
-	int *tep_get_page_size*(struct tep_handle pass:[*]_tep_);
-	void *tep_set_page_size*(struct tep_handle pass:[*]_tep_, int _page_size_);
-	int *tep_get_header_page_size*(struct tep_handle pass:[*]_tep_);
-	int *tep_get_header_timestamp_size*(struct tep_handle pass:[*]_tep_);
-	bool *tep_is_old_format*(struct tep_handle pass:[*]_tep_);
-	int *tep_strerror*(struct tep_handle pass:[*]_tep_, enum tep_errno _errnum_, char pass:[*]_buf_, size_t _buflen_);
-
-Register / unregister APIs:
-	int *tep_register_function*(struct tep_handle pass:[*]_tep_, char pass:[*]_name_, unsigned long long _addr_, char pass:[*]_mod_);
-	int *tep_register_event_handler*(struct tep_handle pass:[*]_tep_, int _id_, const char pass:[*]_sys_name_, const char pass:[*]_event_name_, tep_event_handler_func _func_, void pass:[*]_context_);
-	int *tep_unregister_event_handler*(struct tep_handle pass:[*]tep, int id, const char pass:[*]sys_name, const char pass:[*]event_name, tep_event_handler_func func, void pass:[*]_context_);
-	int *tep_register_print_string*(struct tep_handle pass:[*]_tep_, const char pass:[*]_fmt_, unsigned long long _addr_);
-	int *tep_register_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, enum tep_func_arg_type _ret_type_, char pass:[*]_name_, _..._);
-	int *tep_unregister_print_function*(struct tep_handle pass:[*]_tep_, tep_func_handler _func_, char pass:[*]_name_);
-
-Plugins management:
-	struct tep_plugin_list pass:[*]*tep_load_plugins*(struct tep_handle pass:[*]_tep_);
-	void *tep_unload_plugins*(struct tep_plugin_list pass:[*]_plugin_list_, struct tep_handle pass:[*]_tep_);
-	char pass:[*]pass:[*]*tep_plugin_list_options*(void);
-	void *tep_plugin_free_options_list*(char pass:[*]pass:[*]_list_);
-	int *tep_plugin_add_options*(const char pass:[*]_name_, struct tep_plugin_option pass:[*]_options_);
-	void *tep_plugin_remove_options*(struct tep_plugin_option pass:[*]_options_);
-	void *tep_print_plugins*(struct trace_seq pass:[*]_s_, const char pass:[*]_prefix_, const char pass:[*]_suffix_, const struct tep_plugin_list pass:[*]_list_);
-
-Event related APIs:
-	struct tep_event pass:[*]*tep_get_event*(struct tep_handle pass:[*]_tep_, int _index_);
-	struct tep_event pass:[*]*tep_get_first_event*(struct tep_handle pass:[*]_tep_);
-	int *tep_get_events_count*(struct tep_handle pass:[*]_tep_);
-	struct tep_event pass:[*]pass:[*]*tep_list_events*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-	struct tep_event pass:[*]pass:[*]*tep_list_events_copy*(struct tep_handle pass:[*]_tep_, enum tep_event_sort_type _sort_type_);
-	void *tep_print_event*(struct tep_handle pass:[*]_tep_, struct trace_seq pass:[*]_s_, struct tep_record pass:[*]_record_, const char pass:[*]_fmt_, _..._);
-
-Event finding:
-	struct tep_event pass:[*]*tep_find_event*(struct tep_handle pass:[*]_tep_, int _id_);
-	struct tep_event pass:[*]*tep_find_event_by_name*(struct tep_handle pass:[*]_tep_, const char pass:[*]_sys_, const char pass:[*]_name_);
-	struct tep_event pass:[*]*tep_find_event_by_record*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_record_);
-
-Parsing of event files:
-	int *tep_parse_header_page*(struct tep_handle pass:[*]_tep_, char pass:[*]_buf_, unsigned long _size_, int _long_size_);
-	enum tep_errno *tep_parse_event*(struct tep_handle pass:[*]_tep_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
-	enum tep_errno *tep_parse_format*(struct tep_handle pass:[*]_tep_, struct tep_event pass:[*]pass:[*]_eventp_, const char pass:[*]_buf_, unsigned long _size_, const char pass:[*]_sys_);
-
-APIs related to fields from event's format files:
-	struct tep_format_field pass:[*]pass:[*]*tep_event_common_fields*(struct tep_event pass:[*]_event_);
-	struct tep_format_field pass:[*]pass:[*]*tep_event_fields*(struct tep_event pass:[*]_event_);
-	void pass:[*]*tep_get_field_raw*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int pass:[*]_len_, int _err_);
-	int *tep_get_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-	int *tep_get_common_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-	int *tep_get_any_field_val*(struct trace_seq pass:[*]_s_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, unsigned long long pass:[*]_val_, int _err_);
-	int *tep_read_number_field*(struct tep_format_field pass:[*]_field_, const void pass:[*]_data_, unsigned long long pass:[*]_value_);
-
-Event fields printing:
-	void *tep_print_field*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, struct tep_format_field pass:[*]_field_);
-	void *tep_print_fields*(struct trace_seq pass:[*]_s_, void pass:[*]_data_, int _size_, struct tep_event pass:[*]_event_);
-	int *tep_print_num_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
-	int *tep_print_func_field*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, struct tep_event pass:[*]_event_, const char pass:[*]_name_, struct tep_record pass:[*]_record_, int _err_);
-
-Event fields finding:
-	struct tep_format_field pass:[*]*tep_find_common_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
-	struct tep_format_field pass:[*]*tep_find_field*(struct tep_event_ormat pass:[*]_event_, const char pass:[*]_name_);
-	struct tep_format_field pass:[*]*tep_find_any_field*(struct tep_event pass:[*]_event_, const char pass:[*]_name_);
-
-Functions resolver:
-	int *tep_set_function_resolver*(struct tep_handle pass:[*]_tep_, tep_func_resolver_t pass:[*]_func_, void pass:[*]_priv_);
-	void *tep_reset_function_resolver*(struct tep_handle pass:[*]_tep_);
-	const char pass:[*]*tep_find_function*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-	unsigned long long *tep_find_function_address*(struct tep_handle pass:[*]_tep_, unsigned long long _addr_);
-
-Filter management:
-	struct tep_event_filter pass:[*]*tep_filter_alloc*(struct tep_handle pass:[*]_tep_);
-	enum tep_errno *tep_filter_add_filter_str*(struct tep_event_filter pass:[*]_filter_, const char pass:[*]_filter_str_);
-	enum tep_errno *tep_filter_match*(struct tep_event_filter pass:[*]_filter_, struct tep_record pass:[*]_record_);
-	int *tep_filter_strerror*(struct tep_event_filter pass:[*]_filter_, enum tep_errno _err_, char pass:[*]buf, size_t _buflen_);
-	int *tep_event_filtered*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-	void *tep_filter_reset*(struct tep_event_filter pass:[*]_filter_);
-	void *tep_filter_free*(struct tep_event_filter pass:[*]_filter_);
-	char pass:[*]*tep_filter_make_string*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-	int *tep_filter_remove_event*(struct tep_event_filter pass:[*]_filter_, int _event_id_);
-	int *tep_filter_copy*(struct tep_event_filter pass:[*]_dest_, struct tep_event_filter pass:[*]_source_);
-	int *tep_filter_compare*(struct tep_event_filter pass:[*]_filter1_, struct tep_event_filter pass:[*]_filter2_);
-
-Parsing various data from the records:
-	int *tep_data_type*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-	int *tep_data_pid*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-	int *tep_data_preempt_count*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-	int *tep_data_flags*(struct tep_handle pass:[*]_tep_, struct tep_record pass:[*]_rec_);
-
-Command and task related APIs:
-	const char pass:[*]*tep_data_comm_from_pid*(struct tep_handle pass:[*]_tep_, int _pid_);
-	struct cmdline pass:[*]*tep_data_pid_from_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, struct cmdline pass:[*]_next_);
-	int *tep_register_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-	int *tep_override_comm*(struct tep_handle pass:[*]_tep_, const char pass:[*]_comm_, int _pid_);
-	bool *tep_is_pid_registered*(struct tep_handle pass:[*]_tep_, int _pid_);
-	int *tep_cmdline_pid*(struct tep_handle pass:[*]_tep_, struct cmdline pass:[*]_cmdline_);
-
-Endian related APIs:
-	int *tep_is_bigendian*(void);
-	unsigned long long *tep_read_number*(struct tep_handle pass:[*]_tep_, const void pass:[*]_ptr_, int _size_);
-	bool *tep_is_file_bigendian*(struct tep_handle pass:[*]_tep_);
-	void *tep_set_file_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
-	bool *tep_is_local_bigendian*(struct tep_handle pass:[*]_tep_);
-	void *tep_set_local_bigendian*(struct tep_handle pass:[*]_tep_, enum tep_endian _endian_);
-
-Trace sequences:
-*#include <trace-seq.h>*
-	void *trace_seq_init*(struct trace_seq pass:[*]_s_);
-	void *trace_seq_reset*(struct trace_seq pass:[*]_s_);
-	void *trace_seq_destroy*(struct trace_seq pass:[*]_s_);
-	int *trace_seq_printf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, ...);
-	int *trace_seq_vprintf*(struct trace_seq pass:[*]_s_, const char pass:[*]_fmt_, va_list _args_);
-	int *trace_seq_puts*(struct trace_seq pass:[*]_s_, const char pass:[*]_str_);
-	int *trace_seq_putc*(struct trace_seq pass:[*]_s_, unsigned char _c_);
-	void *trace_seq_terminate*(struct trace_seq pass:[*]_s_);
-	int *trace_seq_do_fprintf*(struct trace_seq pass:[*]_s_, FILE pass:[*]_fp_);
-	int *trace_seq_do_printf*(struct trace_seq pass:[*]_s_);
---
-
-DESCRIPTION
------------
-The libtraceevent(3) library provides APIs to access kernel tracepoint events,
-located in the tracefs file system under the events directory.
-
-ENVIRONMENT
------------
-[verse]
---
-TRACEEVENT_PLUGIN_DIR
-	Additional plugin directory. All shared object files, located in this directory will be loaded as traceevent plugins.
---
-
-FILES
------
-[verse]
---
-*event-parse.h*
-	Header file to include in order to have access to the library APIs.
-*trace-seq.h*
-	Header file to include in order to have access to trace sequences related APIs.
-	Trace sequences are used to allow a function to call several other functions
-	to create a string of data to use.
-*-ltraceevent*
-	Linker switch to add when building a program that uses the library.
---
-
-SEE ALSO
---------
-_trace-cmd(1)_
-
-AUTHOR
-------
-[verse]
---
-*Steven Rostedt* <rostedt@goodmis.org>, author of *libtraceevent*.
-*Tzvetomir Stoyanov* <tz.stoyanov@gmail.com>, author of this man page.
---
-REPORTING BUGS
---------------
-Report bugs to  <linux-trace-devel@vger.kernel.org>
-
-LICENSE
--------
-libtraceevent is Free Software licensed under the GNU LGPL 2.1
-
-RESOURCES
----------
-https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
diff --git a/tools/lib/traceevent/Documentation/manpage-1.72.xsl b/tools/lib/traceevent/Documentation/manpage-1.72.xsl
deleted file mode 100644
index b4d315cb8c479..0000000000000
--- a/tools/lib/traceevent/Documentation/manpage-1.72.xsl
+++ /dev/null
@@ -1,14 +0,0 @@
-<!-- manpage-1.72.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles peculiarities in docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the special values for the roff control characters
-     needed for docbook-xsl 1.72.0 -->
-<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
-<xsl:param name="git.docbook.dot"      >&#x2302;</xsl:param>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-base.xsl b/tools/lib/traceevent/Documentation/manpage-base.xsl
deleted file mode 100644
index a264fa616093c..0000000000000
--- a/tools/lib/traceevent/Documentation/manpage-base.xsl
+++ /dev/null
@@ -1,35 +0,0 @@
-<!-- manpage-base.xsl:
-     special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- these params silence some output from xmlto -->
-<xsl:param name="man.output.quietly" select="1"/>
-<xsl:param name="refentry.meta.get.quietly" select="1"/>
-
-<!-- convert asciidoc callouts to man page format;
-     git.docbook.backslash and git.docbook.dot params
-     must be supplied by another XSL file or other means -->
-<xsl:template match="co">
-	<xsl:value-of select="concat(
-			      $git.docbook.backslash,'fB(',
-			      substring-after(@id,'-'),')',
-			      $git.docbook.backslash,'fR')"/>
-</xsl:template>
-<xsl:template match="calloutlist">
-	<xsl:value-of select="$git.docbook.dot"/>
-	<xsl:text>sp&#10;</xsl:text>
-	<xsl:apply-templates/>
-	<xsl:text>&#10;</xsl:text>
-</xsl:template>
-<xsl:template match="callout">
-	<xsl:value-of select="concat(
-			      $git.docbook.backslash,'fB',
-			      substring-after(@arearefs,'-'),
-			      '. ',$git.docbook.backslash,'fR')"/>
-	<xsl:apply-templates/>
-	<xsl:value-of select="$git.docbook.dot"/>
-	<xsl:text>br&#10;</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl b/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
deleted file mode 100644
index 608eb5df62814..0000000000000
--- a/tools/lib/traceevent/Documentation/manpage-bold-literal.xsl
+++ /dev/null
@@ -1,17 +0,0 @@
-<!-- manpage-bold-literal.xsl:
-     special formatting for manpages rendered from asciidoc+docbook -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- render literal text as bold (instead of plain or monospace);
-     this makes literal text easier to distinguish in manpages
-     viewed on a tty -->
-<xsl:template match="literal">
-	<xsl:value-of select="$git.docbook.backslash"/>
-	<xsl:text>fB</xsl:text>
-	<xsl:apply-templates/>
-	<xsl:value-of select="$git.docbook.backslash"/>
-	<xsl:text>fR</xsl:text>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-normal.xsl b/tools/lib/traceevent/Documentation/manpage-normal.xsl
deleted file mode 100644
index a48f5b11f3dcc..0000000000000
--- a/tools/lib/traceevent/Documentation/manpage-normal.xsl
+++ /dev/null
@@ -1,13 +0,0 @@
-<!-- manpage-normal.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles anything we want to keep away from docbook-xsl 1.72.0 -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<xsl:import href="manpage-base.xsl"/>
-
-<!-- these are the normal values for the roff control characters -->
-<xsl:param name="git.docbook.backslash">\</xsl:param>
-<xsl:param name="git.docbook.dot"	>.</xsl:param>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl b/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
deleted file mode 100644
index a63c7632a87d4..0000000000000
--- a/tools/lib/traceevent/Documentation/manpage-suppress-sp.xsl
+++ /dev/null
@@ -1,21 +0,0 @@
-<!-- manpage-suppress-sp.xsl:
-     special settings for manpages rendered from asciidoc+docbook
-     handles erroneous, inline .sp in manpage output of some
-     versions of docbook-xsl -->
-<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
-		version="1.0">
-
-<!-- attempt to work around spurious .sp at the tail of the line
-     that some versions of docbook stylesheets seem to add -->
-<xsl:template match="simpara">
-  <xsl:variable name="content">
-    <xsl:apply-templates/>
-  </xsl:variable>
-  <xsl:value-of select="normalize-space($content)"/>
-  <xsl:if test="not(ancestor::authorblurb) and
-                not(ancestor::personblurb)">
-    <xsl:text>&#10;&#10;</xsl:text>
-  </xsl:if>
-</xsl:template>
-
-</xsl:stylesheet>
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
deleted file mode 100644
index 98dfd4badea3b..0000000000000
--- a/tools/lib/traceevent/Makefile
+++ /dev/null
@@ -1,300 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# trace-cmd version
-EP_VERSION = 1
-EP_PATCHLEVEL = 1
-EP_EXTRAVERSION = 0
-
-# file format version
-FILE_VERSION = 6
-
-MAKEFLAGS += --no-print-directory
-
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
-  $(if $(or $(findstring environment,$(origin $(1))),\
-            $(findstring command line,$(origin $(1)))),,\
-    $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,NM,$(CROSS_COMPILE)nm)
-$(call allow-override,PKG_CONFIG,pkg-config)
-
-EXT = -std=gnu99
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
-ifeq ($(LP64), 1)
-  libdir_relative_temp = lib64
-else
-  libdir_relative_temp = lib
-endif
-
-libdir_relative ?= $(libdir_relative_temp)
-prefix ?= /usr/local
-libdir = $(prefix)/$(libdir_relative)
-man_dir = $(prefix)/share/man
-man_dir_SQ = '$(subst ','\'',$(man_dir))'
-pkgconfig_dir ?= $(word 1,$(shell $(PKG_CONFIG) 		\
-			--variable pc_path pkg-config | tr ":" " "))
-includedir_relative = traceevent
-includedir = $(prefix)/include/$(includedir_relative)
-includedir_SQ = '$(subst ','\'',$(includedir))'
-
-export man_dir man_dir_SQ INSTALL
-export DESTDIR DESTDIR_SQ
-export EVENT_PARSE_VERSION
-
-include ../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-export prefix libdir src obj
-
-# Shell quotes
-libdir_SQ = $(subst ','\'',$(libdir))
-libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
-
-CONFIG_INCLUDES = 
-CONFIG_LIBS	=
-CONFIG_FLAGS	=
-
-VERSION		= $(EP_VERSION)
-PATCHLEVEL	= $(EP_PATCHLEVEL)
-EXTRAVERSION	= $(EP_EXTRAVERSION)
-
-OBJ		= $@
-N		=
-
-EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
-
-LIB_TARGET  = libtraceevent.a libtraceevent.so.$(EVENT_PARSE_VERSION)
-LIB_INSTALL = libtraceevent.a libtraceevent.so*
-LIB_INSTALL := $(addprefix $(OUTPUT),$(LIB_INSTALL))
-
-INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
-  CFLAGS := $(EXTRA_CFLAGS)
-else
-  CFLAGS := -g -Wall
-endif
-
-# Append required CFLAGS
-override CFLAGS += -fPIC
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
-
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
-# Disable command line variables (CFLAGS) override from top
-# level Makefile (perf), otherwise build Makefile will get
-# the same command line setup.
-MAKEOVERRIDES=
-
-export srctree OUTPUT CC LD CFLAGS V
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-
-TE_IN      := $(OUTPUT)libtraceevent-in.o
-LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET))
-
-CMD_TARGETS = $(LIB_TARGET)
-
-TARGETS = $(CMD_TARGETS)
-
-all: all_cmd plugins
-
-all_cmd: $(CMD_TARGETS)
-
-$(TE_IN): force
-	$(Q)$(MAKE) $(build)=libtraceevent
-
-$(OUTPUT)libtraceevent.so.$(EVENT_PARSE_VERSION): $(TE_IN)
-	$(QUIET_LINK)$(CC) --shared $(LDFLAGS) $^ -Wl,-soname,libtraceevent.so.$(EP_VERSION) -o $@
-	@ln -sf $(@F) $(OUTPUT)libtraceevent.so
-	@ln -sf $(@F) $(OUTPUT)libtraceevent.so.$(EP_VERSION)
-
-$(OUTPUT)libtraceevent.a: $(TE_IN)
-	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
-
-$(OUTPUT)%.so: $(OUTPUT)%-in.o
-	$(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
-
-define make_version.h
-  (echo '/* This file is automatically generated. Do not modify. */';		\
-   echo \#define VERSION_CODE $(shell						\
-   expr $(VERSION) \* 256 + $(PATCHLEVEL));					\
-   echo '#define EXTRAVERSION ' $(EXTRAVERSION);				\
-   echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"';	\
-   echo '#define FILE_VERSION '$(FILE_VERSION);					\
-  ) > $1
-endef
-
-define update_version.h
-  ($(call make_version.h, $@.tmp);		\
-    if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
-      rm -f $@.tmp;				\
-    else					\
-      echo '  UPDATE                 $@';	\
-      mv -f $@.tmp $@;				\
-    fi);
-endef
-
-ep_version.h: force
-	$(Q)$(N)$(call update_version.h)
-
-VERSION_FILES = ep_version.h
-
-define update_dir
-  (echo $1 > $@.tmp;				\
-   if [ -r $@ ] && cmp -s $@ $@.tmp; then	\
-     rm -f $@.tmp;				\
-   else						\
-     echo '  UPDATE                 $@';	\
-     mv -f $@.tmp $@;				\
-   fi);
-endef
-
-tags:	force
-	$(RM) tags
-	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
-	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS:	force
-	$(RM) TAGS
-	find . -name '*.[ch]' | xargs etags \
-	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install_mkdir
-	if [ ! -d '$(DESTDIR_SQ)$1' ]; then		\
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1';	\
-	fi
-endef
-
-define do_install
-	$(call do_install_mkdir,$2);			\
-	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
-endef
-
-PKG_CONFIG_SOURCE_FILE = libtraceevent.pc
-PKG_CONFIG_FILE := $(addprefix $(OUTPUT),$(PKG_CONFIG_SOURCE_FILE))
-define do_install_pkgconfig_file
-	if [ -n "${pkgconfig_dir}" ]; then 					\
-		cp -f ${PKG_CONFIG_SOURCE_FILE}.template ${PKG_CONFIG_FILE};	\
-		sed -i "s|INSTALL_PREFIX|${1}|g" ${PKG_CONFIG_FILE}; 		\
-		sed -i "s|LIB_VERSION|${EVENT_PARSE_VERSION}|g" ${PKG_CONFIG_FILE}; \
-		sed -i "s|LIB_DIR|${libdir}|g" ${PKG_CONFIG_FILE}; \
-		sed -i "s|HEADER_DIR|$(includedir)|g" ${PKG_CONFIG_FILE}; \
-		$(call do_install,$(PKG_CONFIG_FILE),$(pkgconfig_dir),644); 	\
-	else 									\
-		(echo Failed to locate pkg-config directory) 1>&2;		\
-	fi
-endef
-
-install_lib: all_cmd install_plugins install_headers install_pkgconfig
-	$(call QUIET_INSTALL, $(LIB_TARGET)) \
-		$(call do_install_mkdir,$(libdir_SQ)); \
-		cp -fpR $(LIB_INSTALL) $(DESTDIR)$(libdir_SQ)
-
-install_pkgconfig:
-	$(call QUIET_INSTALL, $(PKG_CONFIG_FILE)) \
-		$(call do_install_pkgconfig_file,$(prefix))
-
-install_headers:
-	$(call QUIET_INSTALL, traceevent_headers) \
-		$(call do_install,event-parse.h,$(includedir_SQ),644); \
-		$(call do_install,event-utils.h,$(includedir_SQ),644); \
-		$(call do_install,trace-seq.h,$(includedir_SQ),644); \
-		$(call do_install,kbuffer.h,$(includedir_SQ),644);
-
-install: install_lib
-
-clean: clean_plugins
-	$(call QUIET_CLEAN, libtraceevent) \
-		$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
-		$(RM) TRACEEVENT-CFLAGS tags TAGS; \
-		$(RM) $(PKG_CONFIG_FILE)
-
-PHONY += doc
-doc:
-	$(call descend,Documentation)
-
-PHONY += doc-clean
-doc-clean:
-	$(call descend,Documentation,clean)
-
-PHONY += doc-install
-doc-install:
-	$(call descend,Documentation,install)
-
-PHONY += doc-uninstall
-doc-uninstall:
-	$(call descend,Documentation,uninstall)
-
-PHONY += help
-help:
-	@echo 'Possible targets:'
-	@echo''
-	@echo '  all                 - default, compile the library and the'\
-				      'plugins'
-	@echo '  plugins             - compile the plugins'
-	@echo '  install             - install the library, the plugins,'\
-					'the header and pkgconfig files'
-	@echo '  clean               - clean the library and the plugins object files'
-	@echo '  doc                 - compile the documentation files - man'\
-					'and html pages, in the Documentation directory'
-	@echo '  doc-clean           - clean the documentation files'
-	@echo '  doc-install         - install the man pages'
-	@echo '  doc-uninstall       - uninstall the man pages'
-	@echo''
-
-PHONY += plugins
-plugins:
-	$(call descend,plugins)
-
-PHONY += install_plugins
-install_plugins:
-	$(call descend,plugins,install)
-
-PHONY += clean_plugins
-clean_plugins:
-	$(call descend,plugins,clean)
-
-force:
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c
deleted file mode 100644
index f8361e45d446f..0000000000000
--- a/tools/lib/traceevent/event-parse-api.c
+++ /dev/null
@@ -1,333 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-
-/**
- * tep_get_event - returns the event with the given index
- * @tep: a handle to the tep_handle
- * @index: index of the requested event, in the range 0 .. nr_events
- *
- * This returns pointer to the element of the events array with the given index
- * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned.
- */
-struct tep_event *tep_get_event(struct tep_handle *tep, int index)
-{
-	if (tep && tep->events && index < tep->nr_events)
-		return tep->events[index];
-
-	return NULL;
-}
-
-/**
- * tep_get_first_event - returns the first event in the events array
- * @tep: a handle to the tep_handle
- *
- * This returns pointer to the first element of the events array
- * If @tep is NULL, NULL is returned.
- */
-struct tep_event *tep_get_first_event(struct tep_handle *tep)
-{
-	return tep_get_event(tep, 0);
-}
-
-/**
- * tep_get_events_count - get the number of defined events
- * @tep: a handle to the tep_handle
- *
- * This returns number of elements in event array
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_events_count(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->nr_events;
-	return 0;
-}
-
-/**
- * tep_set_flag - set event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag, or combination of flags to be set
- * can be any combination from enum tep_flag
- *
- * This sets a flag or combination of flags from enum tep_flag
- */
-void tep_set_flag(struct tep_handle *tep, int flag)
-{
-	if (tep)
-		tep->flags |= flag;
-}
-
-/**
- * tep_clear_flag - clear event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag to be cleared
- *
- * This clears a tep flag
- */
-void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag)
-{
-	if (tep)
-		tep->flags &= ~flag;
-}
-
-/**
- * tep_test_flag - check the state of event parser flag
- * @tep: a handle to the tep_handle
- * @flag: flag to be checked
- *
- * This returns the state of the requested tep flag.
- * Returns: true if the flag is set, false otherwise.
- */
-bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag)
-{
-	if (tep)
-		return tep->flags & flag;
-	return false;
-}
-
-__hidden unsigned short data2host2(struct tep_handle *tep, unsigned short data)
-{
-	unsigned short swap;
-
-	if (!tep || tep->host_bigendian == tep->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 8) |
-		((data & (0xffULL << 8)) >> 8);
-
-	return swap;
-}
-
-__hidden unsigned int data2host4(struct tep_handle *tep, unsigned int data)
-{
-	unsigned int swap;
-
-	if (!tep || tep->host_bigendian == tep->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 24) |
-		((data & (0xffULL << 8)) << 8) |
-		((data & (0xffULL << 16)) >> 8) |
-		((data & (0xffULL << 24)) >> 24);
-
-	return swap;
-}
-
-__hidden  unsigned long long
-data2host8(struct tep_handle *tep, unsigned long long data)
-{
-	unsigned long long swap;
-
-	if (!tep || tep->host_bigendian == tep->file_bigendian)
-		return data;
-
-	swap = ((data & 0xffULL) << 56) |
-		((data & (0xffULL << 8)) << 40) |
-		((data & (0xffULL << 16)) << 24) |
-		((data & (0xffULL << 24)) << 8) |
-		((data & (0xffULL << 32)) >> 8) |
-		((data & (0xffULL << 40)) >> 24) |
-		((data & (0xffULL << 48)) >> 40) |
-		((data & (0xffULL << 56)) >> 56);
-
-	return swap;
-}
-
-/**
- * tep_get_header_page_size - get size of the header page
- * @tep: a handle to the tep_handle
- *
- * This returns size of the header page
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_header_page_size(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->header_page_size_size;
-	return 0;
-}
-
-/**
- * tep_get_header_timestamp_size - get size of the timestamp in the header page
- * @tep: a handle to the tep_handle
- *
- * This returns size of the timestamp in the header page
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_header_timestamp_size(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->header_page_ts_size;
-	return 0;
-}
-
-/**
- * tep_get_cpus - get the number of CPUs
- * @tep: a handle to the tep_handle
- *
- * This returns the number of CPUs
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_cpus(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->cpus;
-	return 0;
-}
-
-/**
- * tep_set_cpus - set the number of CPUs
- * @tep: a handle to the tep_handle
- *
- * This sets the number of CPUs
- */
-void tep_set_cpus(struct tep_handle *tep, int cpus)
-{
-	if (tep)
-		tep->cpus = cpus;
-}
-
-/**
- * tep_get_long_size - get the size of a long integer on the traced machine
- * @tep: a handle to the tep_handle
- *
- * This returns the size of a long integer on the traced machine
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_long_size(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->long_size;
-	return 0;
-}
-
-/**
- * tep_set_long_size - set the size of a long integer on the traced machine
- * @tep: a handle to the tep_handle
- * @size: size, in bytes, of a long integer
- *
- * This sets the size of a long integer on the traced machine
- */
-void tep_set_long_size(struct tep_handle *tep, int long_size)
-{
-	if (tep)
-		tep->long_size = long_size;
-}
-
-/**
- * tep_get_page_size - get the size of a memory page on the traced machine
- * @tep: a handle to the tep_handle
- *
- * This returns the size of a memory page on the traced machine
- * If @tep is NULL, 0 is returned.
- */
-int tep_get_page_size(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->page_size;
-	return 0;
-}
-
-/**
- * tep_set_page_size - set the size of a memory page on the traced machine
- * @tep: a handle to the tep_handle
- * @_page_size: size of a memory page, in bytes
- *
- * This sets the size of a memory page on the traced machine
- */
-void tep_set_page_size(struct tep_handle *tep, int _page_size)
-{
-	if (tep)
-		tep->page_size = _page_size;
-}
-
-/**
- * tep_is_file_bigendian - return the endian of the file
- * @tep: a handle to the tep_handle
- *
- * This returns true if the file is in big endian order
- * If @tep is NULL, false is returned.
- */
-bool tep_is_file_bigendian(struct tep_handle *tep)
-{
-	if (tep)
-		return (tep->file_bigendian == TEP_BIG_ENDIAN);
-	return false;
-}
-
-/**
- * tep_set_file_bigendian - set if the file is in big endian order
- * @tep: a handle to the tep_handle
- * @endian: non zero, if the file is in big endian order
- *
- * This sets if the file is in big endian order
- */
-void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian)
-{
-	if (tep)
-		tep->file_bigendian = endian;
-}
-
-/**
- * tep_is_local_bigendian - return the endian of the saved local machine
- * @tep: a handle to the tep_handle
- *
- * This returns true if the saved local machine in @tep is big endian.
- * If @tep is NULL, false is returned.
- */
-bool tep_is_local_bigendian(struct tep_handle *tep)
-{
-	if (tep)
-		return (tep->host_bigendian == TEP_BIG_ENDIAN);
-	return 0;
-}
-
-/**
- * tep_set_local_bigendian - set the stored local machine endian order
- * @tep: a handle to the tep_handle
- * @endian: non zero, if the local host has big endian order
- *
- * This sets the endian order for the local machine.
- */
-void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian)
-{
-	if (tep)
-		tep->host_bigendian = endian;
-}
-
-/**
- * tep_is_old_format - get if an old kernel is used
- * @tep: a handle to the tep_handle
- *
- * This returns true, if an old kernel is used to generate the tracing events or
- * false if a new kernel is used. Old kernels did not have header page info.
- * If @tep is NULL, false is returned.
- */
-bool tep_is_old_format(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->old_format;
-	return false;
-}
-
-/**
- * tep_set_test_filters - set a flag to test a filter string
- * @tep: a handle to the tep_handle
- * @test_filters: the new value of the test_filters flag
- *
- * This sets a flag to test a filter string. If this flag is set, when
- * tep_filter_add_filter_str() API as called,it will print the filter string
- * instead of adding it.
- */
-void tep_set_test_filters(struct tep_handle *tep, int test_filters)
-{
-	if (tep)
-		tep->test_filters = test_filters;
-}
diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h
deleted file mode 100644
index fd4bbcfbb8493..0000000000000
--- a/tools/lib/traceevent/event-parse-local.h
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#ifndef _PARSE_EVENTS_INT_H
-#define _PARSE_EVENTS_INT_H
-
-struct tep_cmdline;
-struct cmdline_list;
-struct func_map;
-struct func_list;
-struct event_handler;
-struct func_resolver;
-struct tep_plugins_dir;
-
-#define __hidden __attribute__((visibility ("hidden")))
-
-struct tep_handle {
-	int ref_count;
-
-	int header_page_ts_offset;
-	int header_page_ts_size;
-	int header_page_size_offset;
-	int header_page_size_size;
-	int header_page_data_offset;
-	int header_page_data_size;
-	int header_page_overwrite;
-
-	enum tep_endian file_bigendian;
-	enum tep_endian host_bigendian;
-
-	int old_format;
-
-	int cpus;
-	int long_size;
-	int page_size;
-
-	struct tep_cmdline *cmdlines;
-	struct cmdline_list *cmdlist;
-	int cmdline_count;
-
-	struct func_map *func_map;
-	struct func_resolver *func_resolver;
-	struct func_list *funclist;
-	unsigned int func_count;
-
-	struct printk_map *printk_map;
-	struct printk_list *printklist;
-	unsigned int printk_count;
-
-	struct tep_event **events;
-	int nr_events;
-	struct tep_event **sort_events;
-	enum tep_event_sort_type last_type;
-
-	int type_offset;
-	int type_size;
-
-	int pid_offset;
-	int pid_size;
-
-	int pc_offset;
-	int pc_size;
-
-	int flags_offset;
-	int flags_size;
-
-	int ld_offset;
-	int ld_size;
-
-	int test_filters;
-
-	int flags;
-
-	struct tep_format_field *bprint_ip_field;
-	struct tep_format_field *bprint_fmt_field;
-	struct tep_format_field *bprint_buf_field;
-
-	struct event_handler *handlers;
-	struct tep_function_handler *func_handlers;
-
-	/* cache */
-	struct tep_event *last_event;
-
-	struct tep_plugins_dir *plugins_dir;
-};
-
-enum tep_print_parse_type {
-	PRINT_FMT_STRING,
-	PRINT_FMT_ARG_DIGIT,
-	PRINT_FMT_ARG_POINTER,
-	PRINT_FMT_ARG_STRING,
-};
-
-struct tep_print_parse {
-	struct tep_print_parse	*next;
-
-	char				*format;
-	int				ls;
-	enum tep_print_parse_type	type;
-	struct tep_print_arg		*arg;
-	struct tep_print_arg		*len_as_arg;
-};
-
-void free_tep_event(struct tep_event *event);
-void free_tep_format_field(struct tep_format_field *field);
-void free_tep_plugin_paths(struct tep_handle *tep);
-
-unsigned short data2host2(struct tep_handle *tep, unsigned short data);
-unsigned int data2host4(struct tep_handle *tep, unsigned int data);
-unsigned long long data2host8(struct tep_handle *tep, unsigned long long data);
-
-/* access to the internal parser */
-int peek_char(void);
-void init_input_buf(const char *buf, unsigned long long size);
-unsigned long long get_input_buf_ptr(void);
-const char *get_input_buf(void);
-enum tep_event_type read_token(char **tok);
-void free_token(char *tok);
-
-#endif /* _PARSE_EVENTS_INT_H */
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
deleted file mode 100644
index 8e24c4c78c7f0..0000000000000
--- a/tools/lib/traceevent/event-parse.c
+++ /dev/null
@@ -1,7624 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- *
- *  The parts for function graph printing was taken and modified from the
- *  Linux Kernel that were written by
- *    - Copyright (C) 2009  Frederic Weisbecker,
- *  Frederic Weisbecker gave his permission to relicense the code to
- *  the Lesser General Public License.
- */
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <ctype.h>
-#include <errno.h>
-#include <stdint.h>
-#include <limits.h>
-#include <linux/time64.h>
-
-#include <netinet/in.h>
-#include "event-parse.h"
-
-#include "event-parse-local.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-static const char *input_buf;
-static unsigned long long input_buf_ptr;
-static unsigned long long input_buf_siz;
-
-static int is_flag_field;
-static int is_symbolic_field;
-
-static int show_warning = 1;
-
-#define do_warning(fmt, ...)				\
-	do {						\
-		if (show_warning)			\
-			warning(fmt, ##__VA_ARGS__);	\
-	} while (0)
-
-#define do_warning_event(event, fmt, ...)			\
-	do {							\
-		if (!show_warning)				\
-			continue;				\
-								\
-		if (event)					\
-			warning("[%s:%s] " fmt, event->system,	\
-				event->name, ##__VA_ARGS__);	\
-		else						\
-			warning(fmt, ##__VA_ARGS__);		\
-	} while (0)
-
-/**
- * init_input_buf - init buffer for parsing
- * @buf: buffer to parse
- * @size: the size of the buffer
- *
- * Initializes the internal buffer that tep_read_token() will parse.
- */
-__hidden void init_input_buf(const char *buf, unsigned long long size)
-{
-	input_buf = buf;
-	input_buf_siz = size;
-	input_buf_ptr = 0;
-}
-
-__hidden const char *get_input_buf(void)
-{
-	return input_buf;
-}
-
-__hidden unsigned long long get_input_buf_ptr(void)
-{
-	return input_buf_ptr;
-}
-
-struct event_handler {
-	struct event_handler		*next;
-	int				id;
-	const char			*sys_name;
-	const char			*event_name;
-	tep_event_handler_func		func;
-	void				*context;
-};
-
-struct func_params {
-	struct func_params	*next;
-	enum tep_func_arg_type	type;
-};
-
-struct tep_function_handler {
-	struct tep_function_handler	*next;
-	enum tep_func_arg_type		ret_type;
-	char				*name;
-	tep_func_handler		func;
-	struct func_params		*params;
-	int				nr_args;
-};
-
-static unsigned long long
-process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct tep_event *event, struct tep_print_arg *arg);
-
-static void free_func_handle(struct tep_function_handler *func);
-
-void breakpoint(void)
-{
-	static int x;
-	x++;
-}
-
-static struct tep_print_arg *alloc_arg(void)
-{
-	return calloc(1, sizeof(struct tep_print_arg));
-}
-
-struct tep_cmdline {
-	char *comm;
-	int pid;
-};
-
-static int cmdline_cmp(const void *a, const void *b)
-{
-	const struct tep_cmdline *ca = a;
-	const struct tep_cmdline *cb = b;
-
-	if (ca->pid < cb->pid)
-		return -1;
-	if (ca->pid > cb->pid)
-		return 1;
-
-	return 0;
-}
-
-/* Looking for where to place the key */
-static int cmdline_slot_cmp(const void *a, const void *b)
-{
-	const struct tep_cmdline *ca = a;
-	const struct tep_cmdline *cb = b;
-	const struct tep_cmdline *cb1 = cb + 1;
-
-	if (ca->pid < cb->pid)
-		return -1;
-
-	if (ca->pid > cb->pid) {
-		if (ca->pid <= cb1->pid)
-			return 0;
-		return 1;
-	}
-
-	return 0;
-}
-
-struct cmdline_list {
-	struct cmdline_list	*next;
-	char			*comm;
-	int			pid;
-};
-
-static int cmdline_init(struct tep_handle *tep)
-{
-	struct cmdline_list *cmdlist = tep->cmdlist;
-	struct cmdline_list *item;
-	struct tep_cmdline *cmdlines;
-	int i;
-
-	cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count);
-	if (!cmdlines)
-		return -1;
-
-	i = 0;
-	while (cmdlist) {
-		cmdlines[i].pid = cmdlist->pid;
-		cmdlines[i].comm = cmdlist->comm;
-		i++;
-		item = cmdlist;
-		cmdlist = cmdlist->next;
-		free(item);
-	}
-
-	qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
-
-	tep->cmdlines = cmdlines;
-	tep->cmdlist = NULL;
-
-	return 0;
-}
-
-static const char *find_cmdline(struct tep_handle *tep, int pid)
-{
-	const struct tep_cmdline *comm;
-	struct tep_cmdline key;
-
-	if (!pid)
-		return "<idle>";
-
-	if (!tep->cmdlines && cmdline_init(tep))
-		return "<not enough memory for cmdlines!>";
-
-	key.pid = pid;
-
-	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
-		       sizeof(*tep->cmdlines), cmdline_cmp);
-
-	if (comm)
-		return comm->comm;
-	return "<...>";
-}
-
-/**
- * tep_is_pid_registered - return if a pid has a cmdline registered
- * @tep: a handle to the trace event parser context
- * @pid: The pid to check if it has a cmdline registered with.
- *
- * Returns true if the pid has a cmdline mapped to it
- * false otherwise.
- */
-bool tep_is_pid_registered(struct tep_handle *tep, int pid)
-{
-	const struct tep_cmdline *comm;
-	struct tep_cmdline key;
-
-	if (!pid)
-		return true;
-
-	if (!tep->cmdlines && cmdline_init(tep))
-		return false;
-
-	key.pid = pid;
-
-	comm = bsearch(&key, tep->cmdlines, tep->cmdline_count,
-		       sizeof(*tep->cmdlines), cmdline_cmp);
-
-	if (comm)
-		return true;
-	return false;
-}
-
-/*
- * If the command lines have been converted to an array, then
- * we must add this pid. This is much slower than when cmdlines
- * are added before the array is initialized.
- */
-static int add_new_comm(struct tep_handle *tep,
-			const char *comm, int pid, bool override)
-{
-	struct tep_cmdline *cmdlines = tep->cmdlines;
-	struct tep_cmdline *cmdline;
-	struct tep_cmdline key;
-	char *new_comm;
-	int cnt;
-
-	if (!pid)
-		return 0;
-
-	/* avoid duplicates */
-	key.pid = pid;
-
-	cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count,
-			  sizeof(*tep->cmdlines), cmdline_cmp);
-	if (cmdline) {
-		if (!override) {
-			errno = EEXIST;
-			return -1;
-		}
-		new_comm = strdup(comm);
-		if (!new_comm) {
-			errno = ENOMEM;
-			return -1;
-		}
-		free(cmdline->comm);
-		cmdline->comm = new_comm;
-
-		return 0;
-	}
-
-	cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1));
-	if (!cmdlines) {
-		errno = ENOMEM;
-		return -1;
-	}
-	tep->cmdlines = cmdlines;
-
-	key.comm = strdup(comm);
-	if (!key.comm) {
-		errno = ENOMEM;
-		return -1;
-	}
-
-	if (!tep->cmdline_count) {
-		/* no entries yet */
-		tep->cmdlines[0] = key;
-		tep->cmdline_count++;
-		return 0;
-	}
-
-	/* Now find where we want to store the new cmdline */
-	cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count - 1,
-			  sizeof(*tep->cmdlines), cmdline_slot_cmp);
-
-	cnt = tep->cmdline_count;
-	if (cmdline) {
-		/* cmdline points to the one before the spot we want */
-		cmdline++;
-		cnt -= cmdline - tep->cmdlines;
-
-	} else {
-		/* The new entry is either before or after the list */
-		if (key.pid > tep->cmdlines[tep->cmdline_count - 1].pid) {
-			tep->cmdlines[tep->cmdline_count++] = key;
-			return 0;
-		}
-		cmdline = &tep->cmdlines[0];
-	}
-	memmove(cmdline + 1, cmdline, (cnt * sizeof(*cmdline)));
-	*cmdline = key;
-
-	tep->cmdline_count++;
-
-	return 0;
-}
-
-static int _tep_register_comm(struct tep_handle *tep,
-			      const char *comm, int pid, bool override)
-{
-	struct cmdline_list *item;
-
-	if (tep->cmdlines)
-		return add_new_comm(tep, comm, pid, override);
-
-	item = malloc(sizeof(*item));
-	if (!item)
-		return -1;
-
-	if (comm)
-		item->comm = strdup(comm);
-	else
-		item->comm = strdup("<...>");
-	if (!item->comm) {
-		free(item);
-		return -1;
-	}
-	item->pid = pid;
-	item->next = tep->cmdlist;
-
-	tep->cmdlist = item;
-	tep->cmdline_count++;
-
-	return 0;
-}
-
-/**
- * tep_register_comm - register a pid / comm mapping
- * @tep: a handle to the trace event parser context
- * @comm: the command line to register
- * @pid: the pid to map the command line to
- *
- * This adds a mapping to search for command line names with
- * a given pid. The comm is duplicated. If a command with the same pid
- * already exist, -1 is returned and errno is set to EEXIST
- */
-int tep_register_comm(struct tep_handle *tep, const char *comm, int pid)
-{
-	return _tep_register_comm(tep, comm, pid, false);
-}
-
-/**
- * tep_override_comm - register a pid / comm mapping
- * @tep: a handle to the trace event parser context
- * @comm: the command line to register
- * @pid: the pid to map the command line to
- *
- * This adds a mapping to search for command line names with
- * a given pid. The comm is duplicated. If a command with the same pid
- * already exist, the command string is udapted with the new one
- */
-int tep_override_comm(struct tep_handle *tep, const char *comm, int pid)
-{
-	if (!tep->cmdlines && cmdline_init(tep)) {
-		errno = ENOMEM;
-		return -1;
-	}
-	return _tep_register_comm(tep, comm, pid, true);
-}
-
-struct func_map {
-	unsigned long long		addr;
-	char				*func;
-	char				*mod;
-};
-
-struct func_list {
-	struct func_list	*next;
-	unsigned long long	addr;
-	char			*func;
-	char			*mod;
-};
-
-static int func_cmp(const void *a, const void *b)
-{
-	const struct func_map *fa = a;
-	const struct func_map *fb = b;
-
-	if (fa->addr < fb->addr)
-		return -1;
-	if (fa->addr > fb->addr)
-		return 1;
-
-	return 0;
-}
-
-/*
- * We are searching for a record in between, not an exact
- * match.
- */
-static int func_bcmp(const void *a, const void *b)
-{
-	const struct func_map *fa = a;
-	const struct func_map *fb = b;
-
-	if ((fa->addr == fb->addr) ||
-
-	    (fa->addr > fb->addr &&
-	     fa->addr < (fb+1)->addr))
-		return 0;
-
-	if (fa->addr < fb->addr)
-		return -1;
-
-	return 1;
-}
-
-static int func_map_init(struct tep_handle *tep)
-{
-	struct func_list *funclist;
-	struct func_list *item;
-	struct func_map *func_map;
-	int i;
-
-	func_map = malloc(sizeof(*func_map) * (tep->func_count + 1));
-	if (!func_map)
-		return -1;
-
-	funclist = tep->funclist;
-
-	i = 0;
-	while (funclist) {
-		func_map[i].func = funclist->func;
-		func_map[i].addr = funclist->addr;
-		func_map[i].mod = funclist->mod;
-		i++;
-		item = funclist;
-		funclist = funclist->next;
-		free(item);
-	}
-
-	qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp);
-
-	/*
-	 * Add a special record at the end.
-	 */
-	func_map[tep->func_count].func = NULL;
-	func_map[tep->func_count].addr = 0;
-	func_map[tep->func_count].mod = NULL;
-
-	tep->func_map = func_map;
-	tep->funclist = NULL;
-
-	return 0;
-}
-
-static struct func_map *
-__find_func(struct tep_handle *tep, unsigned long long addr)
-{
-	struct func_map *func;
-	struct func_map key;
-
-	if (!tep->func_map)
-		func_map_init(tep);
-
-	key.addr = addr;
-
-	func = bsearch(&key, tep->func_map, tep->func_count,
-		       sizeof(*tep->func_map), func_bcmp);
-
-	return func;
-}
-
-struct func_resolver {
-	tep_func_resolver_t	*func;
-	void			*priv;
-	struct func_map		map;
-};
-
-/**
- * tep_set_function_resolver - set an alternative function resolver
- * @tep: a handle to the trace event parser context
- * @resolver: function to be used
- * @priv: resolver function private state.
- *
- * Some tools may have already a way to resolve kernel functions, allow them to
- * keep using it instead of duplicating all the entries inside tep->funclist.
- */
-int tep_set_function_resolver(struct tep_handle *tep,
-			      tep_func_resolver_t *func, void *priv)
-{
-	struct func_resolver *resolver = malloc(sizeof(*resolver));
-
-	if (resolver == NULL)
-		return -1;
-
-	resolver->func = func;
-	resolver->priv = priv;
-
-	free(tep->func_resolver);
-	tep->func_resolver = resolver;
-
-	return 0;
-}
-
-/**
- * tep_reset_function_resolver - reset alternative function resolver
- * @tep: a handle to the trace event parser context
- *
- * Stop using whatever alternative resolver was set, use the default
- * one instead.
- */
-void tep_reset_function_resolver(struct tep_handle *tep)
-{
-	free(tep->func_resolver);
-	tep->func_resolver = NULL;
-}
-
-static struct func_map *
-find_func(struct tep_handle *tep, unsigned long long addr)
-{
-	struct func_map *map;
-
-	if (!tep->func_resolver)
-		return __find_func(tep, addr);
-
-	map = &tep->func_resolver->map;
-	map->mod  = NULL;
-	map->addr = addr;
-	map->func = tep->func_resolver->func(tep->func_resolver->priv,
-					     &map->addr, &map->mod);
-	if (map->func == NULL)
-		return NULL;
-
-	return map;
-}
-
-/**
- * tep_find_function - find a function by a given address
- * @tep: a handle to the trace event parser context
- * @addr: the address to find the function with
- *
- * Returns a pointer to the function stored that has the given
- * address. Note, the address does not have to be exact, it
- * will select the function that would contain the address.
- */
-const char *tep_find_function(struct tep_handle *tep, unsigned long long addr)
-{
-	struct func_map *map;
-
-	map = find_func(tep, addr);
-	if (!map)
-		return NULL;
-
-	return map->func;
-}
-
-/**
- * tep_find_function_address - find a function address by a given address
- * @tep: a handle to the trace event parser context
- * @addr: the address to find the function with
- *
- * Returns the address the function starts at. This can be used in
- * conjunction with tep_find_function to print both the function
- * name and the function offset.
- */
-unsigned long long
-tep_find_function_address(struct tep_handle *tep, unsigned long long addr)
-{
-	struct func_map *map;
-
-	map = find_func(tep, addr);
-	if (!map)
-		return 0;
-
-	return map->addr;
-}
-
-/**
- * tep_register_function - register a function with a given address
- * @tep: a handle to the trace event parser context
- * @function: the function name to register
- * @addr: the address the function starts at
- * @mod: the kernel module the function may be in (NULL for none)
- *
- * This registers a function name with an address and module.
- * The @func passed in is duplicated.
- */
-int tep_register_function(struct tep_handle *tep, char *func,
-			  unsigned long long addr, char *mod)
-{
-	struct func_list *item = malloc(sizeof(*item));
-
-	if (!item)
-		return -1;
-
-	item->next = tep->funclist;
-	item->func = strdup(func);
-	if (!item->func)
-		goto out_free;
-
-	if (mod) {
-		item->mod = strdup(mod);
-		if (!item->mod)
-			goto out_free_func;
-	} else
-		item->mod = NULL;
-	item->addr = addr;
-
-	tep->funclist = item;
-	tep->func_count++;
-
-	return 0;
-
-out_free_func:
-	free(item->func);
-	item->func = NULL;
-out_free:
-	free(item);
-	errno = ENOMEM;
-	return -1;
-}
-
-/**
- * tep_print_funcs - print out the stored functions
- * @tep: a handle to the trace event parser context
- *
- * This prints out the stored functions.
- */
-void tep_print_funcs(struct tep_handle *tep)
-{
-	int i;
-
-	if (!tep->func_map)
-		func_map_init(tep);
-
-	for (i = 0; i < (int)tep->func_count; i++) {
-		printf("%016llx %s",
-		       tep->func_map[i].addr,
-		       tep->func_map[i].func);
-		if (tep->func_map[i].mod)
-			printf(" [%s]\n", tep->func_map[i].mod);
-		else
-			printf("\n");
-	}
-}
-
-struct printk_map {
-	unsigned long long		addr;
-	char				*printk;
-};
-
-struct printk_list {
-	struct printk_list	*next;
-	unsigned long long	addr;
-	char			*printk;
-};
-
-static int printk_cmp(const void *a, const void *b)
-{
-	const struct printk_map *pa = a;
-	const struct printk_map *pb = b;
-
-	if (pa->addr < pb->addr)
-		return -1;
-	if (pa->addr > pb->addr)
-		return 1;
-
-	return 0;
-}
-
-static int printk_map_init(struct tep_handle *tep)
-{
-	struct printk_list *printklist;
-	struct printk_list *item;
-	struct printk_map *printk_map;
-	int i;
-
-	printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1));
-	if (!printk_map)
-		return -1;
-
-	printklist = tep->printklist;
-
-	i = 0;
-	while (printklist) {
-		printk_map[i].printk = printklist->printk;
-		printk_map[i].addr = printklist->addr;
-		i++;
-		item = printklist;
-		printklist = printklist->next;
-		free(item);
-	}
-
-	qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp);
-
-	tep->printk_map = printk_map;
-	tep->printklist = NULL;
-
-	return 0;
-}
-
-static struct printk_map *
-find_printk(struct tep_handle *tep, unsigned long long addr)
-{
-	struct printk_map *printk;
-	struct printk_map key;
-
-	if (!tep->printk_map && printk_map_init(tep))
-		return NULL;
-
-	key.addr = addr;
-
-	printk = bsearch(&key, tep->printk_map, tep->printk_count,
-			 sizeof(*tep->printk_map), printk_cmp);
-
-	return printk;
-}
-
-/**
- * tep_register_print_string - register a string by its address
- * @tep: a handle to the trace event parser context
- * @fmt: the string format to register
- * @addr: the address the string was located at
- *
- * This registers a string by the address it was stored in the kernel.
- * The @fmt passed in is duplicated.
- */
-int tep_register_print_string(struct tep_handle *tep, const char *fmt,
-			      unsigned long long addr)
-{
-	struct printk_list *item = malloc(sizeof(*item));
-	char *p;
-
-	if (!item)
-		return -1;
-
-	item->next = tep->printklist;
-	item->addr = addr;
-
-	/* Strip off quotes and '\n' from the end */
-	if (fmt[0] == '"')
-		fmt++;
-	item->printk = strdup(fmt);
-	if (!item->printk)
-		goto out_free;
-
-	p = item->printk + strlen(item->printk) - 1;
-	if (*p == '"')
-		*p = 0;
-
-	p -= 2;
-	if (strcmp(p, "\\n") == 0)
-		*p = 0;
-
-	tep->printklist = item;
-	tep->printk_count++;
-
-	return 0;
-
-out_free:
-	free(item);
-	errno = ENOMEM;
-	return -1;
-}
-
-/**
- * tep_print_printk - print out the stored strings
- * @tep: a handle to the trace event parser context
- *
- * This prints the string formats that were stored.
- */
-void tep_print_printk(struct tep_handle *tep)
-{
-	int i;
-
-	if (!tep->printk_map)
-		printk_map_init(tep);
-
-	for (i = 0; i < (int)tep->printk_count; i++) {
-		printf("%016llx %s\n",
-		       tep->printk_map[i].addr,
-		       tep->printk_map[i].printk);
-	}
-}
-
-static struct tep_event *alloc_event(void)
-{
-	return calloc(1, sizeof(struct tep_event));
-}
-
-static int add_event(struct tep_handle *tep, struct tep_event *event)
-{
-	int i;
-	struct tep_event **events = realloc(tep->events, sizeof(event) *
-					    (tep->nr_events + 1));
-	if (!events)
-		return -1;
-
-	tep->events = events;
-
-	for (i = 0; i < tep->nr_events; i++) {
-		if (tep->events[i]->id > event->id)
-			break;
-	}
-	if (i < tep->nr_events)
-		memmove(&tep->events[i + 1],
-			&tep->events[i],
-			sizeof(event) * (tep->nr_events - i));
-
-	tep->events[i] = event;
-	tep->nr_events++;
-
-	event->tep = tep;
-
-	return 0;
-}
-
-static int event_item_type(enum tep_event_type type)
-{
-	switch (type) {
-	case TEP_EVENT_ITEM ... TEP_EVENT_SQUOTE:
-		return 1;
-	case TEP_EVENT_ERROR ... TEP_EVENT_DELIM:
-	default:
-		return 0;
-	}
-}
-
-static void free_flag_sym(struct tep_print_flag_sym *fsym)
-{
-	struct tep_print_flag_sym *next;
-
-	while (fsym) {
-		next = fsym->next;
-		free(fsym->value);
-		free(fsym->str);
-		free(fsym);
-		fsym = next;
-	}
-}
-
-static void free_arg(struct tep_print_arg *arg)
-{
-	struct tep_print_arg *farg;
-
-	if (!arg)
-		return;
-
-	switch (arg->type) {
-	case TEP_PRINT_ATOM:
-		free(arg->atom.atom);
-		break;
-	case TEP_PRINT_FIELD:
-		free(arg->field.name);
-		break;
-	case TEP_PRINT_FLAGS:
-		free_arg(arg->flags.field);
-		free(arg->flags.delim);
-		free_flag_sym(arg->flags.flags);
-		break;
-	case TEP_PRINT_SYMBOL:
-		free_arg(arg->symbol.field);
-		free_flag_sym(arg->symbol.symbols);
-		break;
-	case TEP_PRINT_HEX:
-	case TEP_PRINT_HEX_STR:
-		free_arg(arg->hex.field);
-		free_arg(arg->hex.size);
-		break;
-	case TEP_PRINT_INT_ARRAY:
-		free_arg(arg->int_array.field);
-		free_arg(arg->int_array.count);
-		free_arg(arg->int_array.el_size);
-		break;
-	case TEP_PRINT_TYPE:
-		free(arg->typecast.type);
-		free_arg(arg->typecast.item);
-		break;
-	case TEP_PRINT_STRING:
-	case TEP_PRINT_BSTRING:
-		free(arg->string.string);
-		break;
-	case TEP_PRINT_BITMASK:
-		free(arg->bitmask.bitmask);
-		break;
-	case TEP_PRINT_DYNAMIC_ARRAY:
-	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
-		free(arg->dynarray.index);
-		break;
-	case TEP_PRINT_OP:
-		free(arg->op.op);
-		free_arg(arg->op.left);
-		free_arg(arg->op.right);
-		break;
-	case TEP_PRINT_FUNC:
-		while (arg->func.args) {
-			farg = arg->func.args;
-			arg->func.args = farg->next;
-			free_arg(farg);
-		}
-		break;
-
-	case TEP_PRINT_NULL:
-	default:
-		break;
-	}
-
-	free(arg);
-}
-
-static enum tep_event_type get_type(int ch)
-{
-	if (ch == '\n')
-		return TEP_EVENT_NEWLINE;
-	if (isspace(ch))
-		return TEP_EVENT_SPACE;
-	if (isalnum(ch) || ch == '_')
-		return TEP_EVENT_ITEM;
-	if (ch == '\'')
-		return TEP_EVENT_SQUOTE;
-	if (ch == '"')
-		return TEP_EVENT_DQUOTE;
-	if (!isprint(ch))
-		return TEP_EVENT_NONE;
-	if (ch == '(' || ch == ')' || ch == ',')
-		return TEP_EVENT_DELIM;
-
-	return TEP_EVENT_OP;
-}
-
-static int __read_char(void)
-{
-	if (input_buf_ptr >= input_buf_siz)
-		return -1;
-
-	return input_buf[input_buf_ptr++];
-}
-
-/**
- * peek_char - peek at the next character that will be read
- *
- * Returns the next character read, or -1 if end of buffer.
- */
-__hidden int peek_char(void)
-{
-	if (input_buf_ptr >= input_buf_siz)
-		return -1;
-
-	return input_buf[input_buf_ptr];
-}
-
-static int extend_token(char **tok, char *buf, int size)
-{
-	char *newtok = realloc(*tok, size);
-
-	if (!newtok) {
-		free(*tok);
-		*tok = NULL;
-		return -1;
-	}
-
-	if (!*tok)
-		strcpy(newtok, buf);
-	else
-		strcat(newtok, buf);
-	*tok = newtok;
-
-	return 0;
-}
-
-static enum tep_event_type force_token(const char *str, char **tok);
-
-static enum tep_event_type __read_token(char **tok)
-{
-	char buf[BUFSIZ];
-	int ch, last_ch, quote_ch, next_ch;
-	int i = 0;
-	int tok_size = 0;
-	enum tep_event_type type;
-
-	*tok = NULL;
-
-
-	ch = __read_char();
-	if (ch < 0)
-		return TEP_EVENT_NONE;
-
-	type = get_type(ch);
-	if (type == TEP_EVENT_NONE)
-		return type;
-
-	buf[i++] = ch;
-
-	switch (type) {
-	case TEP_EVENT_NEWLINE:
-	case TEP_EVENT_DELIM:
-		if (asprintf(tok, "%c", ch) < 0)
-			return TEP_EVENT_ERROR;
-
-		return type;
-
-	case TEP_EVENT_OP:
-		switch (ch) {
-		case '-':
-			next_ch = peek_char();
-			if (next_ch == '>') {
-				buf[i++] = __read_char();
-				break;
-			}
-			/* fall through */
-		case '+':
-		case '|':
-		case '&':
-		case '>':
-		case '<':
-			last_ch = ch;
-			ch = peek_char();
-			if (ch != last_ch)
-				goto test_equal;
-			buf[i++] = __read_char();
-			switch (last_ch) {
-			case '>':
-			case '<':
-				goto test_equal;
-			default:
-				break;
-			}
-			break;
-		case '!':
-		case '=':
-			goto test_equal;
-		default: /* what should we do instead? */
-			break;
-		}
-		buf[i] = 0;
-		*tok = strdup(buf);
-		return type;
-
- test_equal:
-		ch = peek_char();
-		if (ch == '=')
-			buf[i++] = __read_char();
-		goto out;
-
-	case TEP_EVENT_DQUOTE:
-	case TEP_EVENT_SQUOTE:
-		/* don't keep quotes */
-		i--;
-		quote_ch = ch;
-		last_ch = 0;
- concat:
-		do {
-			if (i == (BUFSIZ - 1)) {
-				buf[i] = 0;
-				tok_size += BUFSIZ;
-
-				if (extend_token(tok, buf, tok_size) < 0)
-					return TEP_EVENT_NONE;
-				i = 0;
-			}
-			last_ch = ch;
-			ch = __read_char();
-			buf[i++] = ch;
-			/* the '\' '\' will cancel itself */
-			if (ch == '\\' && last_ch == '\\')
-				last_ch = 0;
-		} while (ch != quote_ch || last_ch == '\\');
-		/* remove the last quote */
-		i--;
-
-		/*
-		 * For strings (double quotes) check the next token.
-		 * If it is another string, concatinate the two.
-		 */
-		if (type == TEP_EVENT_DQUOTE) {
-			unsigned long long save_input_buf_ptr = input_buf_ptr;
-
-			do {
-				ch = __read_char();
-			} while (isspace(ch));
-			if (ch == '"')
-				goto concat;
-			input_buf_ptr = save_input_buf_ptr;
-		}
-
-		goto out;
-
-	case TEP_EVENT_ERROR ... TEP_EVENT_SPACE:
-	case TEP_EVENT_ITEM:
-	default:
-		break;
-	}
-
-	while (get_type(peek_char()) == type) {
-		if (i == (BUFSIZ - 1)) {
-			buf[i] = 0;
-			tok_size += BUFSIZ;
-
-			if (extend_token(tok, buf, tok_size) < 0)
-				return TEP_EVENT_NONE;
-			i = 0;
-		}
-		ch = __read_char();
-		buf[i++] = ch;
-	}
-
- out:
-	buf[i] = 0;
-	if (extend_token(tok, buf, tok_size + i + 1) < 0)
-		return TEP_EVENT_NONE;
-
-	if (type == TEP_EVENT_ITEM) {
-		/*
-		 * Older versions of the kernel has a bug that
-		 * creates invalid symbols and will break the mac80211
-		 * parsing. This is a work around to that bug.
-		 *
-		 * See Linux kernel commit:
-		 *  811cb50baf63461ce0bdb234927046131fc7fa8b
-		 */
-		if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
-			free(*tok);
-			*tok = NULL;
-			return force_token("\"%s\" ", tok);
-		} else if (strcmp(*tok, "STA_PR_FMT") == 0) {
-			free(*tok);
-			*tok = NULL;
-			return force_token("\" sta:%pM\" ", tok);
-		} else if (strcmp(*tok, "VIF_PR_FMT") == 0) {
-			free(*tok);
-			*tok = NULL;
-			return force_token("\" vif:%p(%d)\" ", tok);
-		}
-	}
-
-	return type;
-}
-
-static enum tep_event_type force_token(const char *str, char **tok)
-{
-	const char *save_input_buf;
-	unsigned long long save_input_buf_ptr;
-	unsigned long long save_input_buf_siz;
-	enum tep_event_type type;
-	
-	/* save off the current input pointers */
-	save_input_buf = input_buf;
-	save_input_buf_ptr = input_buf_ptr;
-	save_input_buf_siz = input_buf_siz;
-
-	init_input_buf(str, strlen(str));
-
-	type = __read_token(tok);
-
-	/* reset back to original token */
-	input_buf = save_input_buf;
-	input_buf_ptr = save_input_buf_ptr;
-	input_buf_siz = save_input_buf_siz;
-
-	return type;
-}
-
-/**
- * free_token - free a token returned by tep_read_token
- * @token: the token to free
- */
-__hidden void free_token(char *tok)
-{
-	if (tok)
-		free(tok);
-}
-
-/**
- * read_token - access to utilities to use the tep parser
- * @tok: The token to return
- *
- * This will parse tokens from the string given by
- * tep_init_data().
- *
- * Returns the token type.
- */
-__hidden enum tep_event_type read_token(char **tok)
-{
-	enum tep_event_type type;
-
-	for (;;) {
-		type = __read_token(tok);
-		if (type != TEP_EVENT_SPACE)
-			return type;
-
-		free_token(*tok);
-	}
-
-	/* not reached */
-	*tok = NULL;
-	return TEP_EVENT_NONE;
-}
-
-/* no newline */
-static enum tep_event_type read_token_item(char **tok)
-{
-	enum tep_event_type type;
-
-	for (;;) {
-		type = __read_token(tok);
-		if (type != TEP_EVENT_SPACE && type != TEP_EVENT_NEWLINE)
-			return type;
-		free_token(*tok);
-		*tok = NULL;
-	}
-
-	/* not reached */
-	*tok = NULL;
-	return TEP_EVENT_NONE;
-}
-
-static int test_type(enum tep_event_type type, enum tep_event_type expect)
-{
-	if (type != expect) {
-		do_warning("Error: expected type %d but read %d",
-		    expect, type);
-		return -1;
-	}
-	return 0;
-}
-
-static int test_type_token(enum tep_event_type type, const char *token,
-		    enum tep_event_type expect, const char *expect_tok)
-{
-	if (type != expect) {
-		do_warning("Error: expected type %d but read %d",
-		    expect, type);
-		return -1;
-	}
-
-	if (strcmp(token, expect_tok) != 0) {
-		do_warning("Error: expected '%s' but read '%s'",
-		    expect_tok, token);
-		return -1;
-	}
-	return 0;
-}
-
-static int __read_expect_type(enum tep_event_type expect, char **tok, int newline_ok)
-{
-	enum tep_event_type type;
-
-	if (newline_ok)
-		type = read_token(tok);
-	else
-		type = read_token_item(tok);
-	return test_type(type, expect);
-}
-
-static int read_expect_type(enum tep_event_type expect, char **tok)
-{
-	return __read_expect_type(expect, tok, 1);
-}
-
-static int __read_expected(enum tep_event_type expect, const char *str,
-			   int newline_ok)
-{
-	enum tep_event_type type;
-	char *token;
-	int ret;
-
-	if (newline_ok)
-		type = read_token(&token);
-	else
-		type = read_token_item(&token);
-
-	ret = test_type_token(type, token, expect, str);
-
-	free_token(token);
-
-	return ret;
-}
-
-static int read_expected(enum tep_event_type expect, const char *str)
-{
-	return __read_expected(expect, str, 1);
-}
-
-static int read_expected_item(enum tep_event_type expect, const char *str)
-{
-	return __read_expected(expect, str, 0);
-}
-
-static char *event_read_name(void)
-{
-	char *token;
-
-	if (read_expected(TEP_EVENT_ITEM, "name") < 0)
-		return NULL;
-
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return NULL;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto fail;
-
-	return token;
-
- fail:
-	free_token(token);
-	return NULL;
-}
-
-static int event_read_id(void)
-{
-	char *token;
-	int id;
-
-	if (read_expected_item(TEP_EVENT_ITEM, "ID") < 0)
-		return -1;
-
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return -1;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto fail;
-
-	id = strtoul(token, NULL, 0);
-	free_token(token);
-	return id;
-
- fail:
-	free_token(token);
-	return -1;
-}
-
-static int field_is_string(struct tep_format_field *field)
-{
-	if ((field->flags & TEP_FIELD_IS_ARRAY) &&
-	    (strstr(field->type, "char") || strstr(field->type, "u8") ||
-	     strstr(field->type, "s8")))
-		return 1;
-
-	return 0;
-}
-
-static int field_is_dynamic(struct tep_format_field *field)
-{
-	if (strncmp(field->type, "__data_loc", 10) == 0)
-		return 1;
-
-	return 0;
-}
-
-static int field_is_relative_dynamic(struct tep_format_field *field)
-{
-	if (strncmp(field->type, "__rel_loc", 9) == 0)
-		return 1;
-
-	return 0;
-}
-
-static int field_is_long(struct tep_format_field *field)
-{
-	/* includes long long */
-	if (strstr(field->type, "long"))
-		return 1;
-
-	return 0;
-}
-
-static unsigned int type_size(const char *name)
-{
-	/* This covers all TEP_FIELD_IS_STRING types. */
-	static struct {
-		const char *type;
-		unsigned int size;
-	} table[] = {
-		{ "u8",   1 },
-		{ "u16",  2 },
-		{ "u32",  4 },
-		{ "u64",  8 },
-		{ "s8",   1 },
-		{ "s16",  2 },
-		{ "s32",  4 },
-		{ "s64",  8 },
-		{ "char", 1 },
-		{ },
-	};
-	int i;
-
-	for (i = 0; table[i].type; i++) {
-		if (!strcmp(table[i].type, name))
-			return table[i].size;
-	}
-
-	return 0;
-}
-
-static int append(char **buf, const char *delim, const char *str)
-{
-	char *new_buf;
-
-	new_buf = realloc(*buf, strlen(*buf) + strlen(delim) + strlen(str) + 1);
-	if (!new_buf)
-		return -1;
-	strcat(new_buf, delim);
-	strcat(new_buf, str);
-	*buf = new_buf;
-	return 0;
-}
-
-static int event_read_fields(struct tep_event *event, struct tep_format_field **fields)
-{
-	struct tep_format_field *field = NULL;
-	enum tep_event_type type;
-	char *token;
-	char *last_token;
-	char *delim = " ";
-	int count = 0;
-	int ret;
-
-	do {
-		unsigned int size_dynamic = 0;
-
-		type = read_token(&token);
-		if (type == TEP_EVENT_NEWLINE) {
-			free_token(token);
-			return count;
-		}
-
-		count++;
-
-		if (test_type_token(type, token, TEP_EVENT_ITEM, "field"))
-			goto fail;
-		free_token(token);
-
-		type = read_token(&token);
-		/*
-		 * The ftrace fields may still use the "special" name.
-		 * Just ignore it.
-		 */
-		if (event->flags & TEP_EVENT_FL_ISFTRACE &&
-		    type == TEP_EVENT_ITEM && strcmp(token, "special") == 0) {
-			free_token(token);
-			type = read_token(&token);
-		}
-
-		if (test_type_token(type, token, TEP_EVENT_OP, ":") < 0)
-			goto fail;
-
-		free_token(token);
-		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-			goto fail;
-
-		last_token = token;
-
-		field = calloc(1, sizeof(*field));
-		if (!field)
-			goto fail;
-
-		field->event = event;
-
-		/* read the rest of the type */
-		for (;;) {
-			type = read_token(&token);
-			if (type == TEP_EVENT_ITEM ||
-			    (type == TEP_EVENT_OP && strcmp(token, "*") == 0) ||
-			    /*
-			     * Some of the ftrace fields are broken and have
-			     * an illegal "." in them.
-			     */
-			    (event->flags & TEP_EVENT_FL_ISFTRACE &&
-			     type == TEP_EVENT_OP && strcmp(token, ".") == 0)) {
-
-				if (strcmp(token, "*") == 0)
-					field->flags |= TEP_FIELD_IS_POINTER;
-
-				if (field->type) {
-					ret = append(&field->type, delim, last_token);
-					free(last_token);
-					if (ret < 0)
-						goto fail;
-				} else
-					field->type = last_token;
-				last_token = token;
-				delim = " ";
-				continue;
-			}
-
-			/* Handle __attribute__((user)) */
-			if ((type == TEP_EVENT_DELIM) &&
-			    strcmp("__attribute__", last_token) == 0 &&
-			    token[0] == '(') {
-				int depth = 1;
-				int ret;
-
-				ret = append(&field->type, " ", last_token);
-				ret |= append(&field->type, "", "(");
-				if (ret < 0)
-					goto fail;
-
-				delim = " ";
-				while ((type = read_token(&token)) != TEP_EVENT_NONE) {
-					if (type == TEP_EVENT_DELIM) {
-						if (token[0] == '(')
-							depth++;
-						else if (token[0] == ')')
-							depth--;
-						if (!depth)
-							break;
-						ret = append(&field->type, "", token);
-						delim = "";
-					} else {
-						ret = append(&field->type, delim, token);
-						delim = " ";
-					}
-					if (ret < 0)
-						goto fail;
-					free(last_token);
-					last_token = token;
-				}
-				continue;
-			}
-			break;
-		}
-
-		if (!field->type) {
-			do_warning_event(event, "%s: no type found", __func__);
-			goto fail;
-		}
-		field->name = field->alias = last_token;
-
-		if (test_type(type, TEP_EVENT_OP))
-			goto fail;
-
-		if (strcmp(token, "[") == 0) {
-			enum tep_event_type last_type = type;
-			char *brackets = token;
-
-			field->flags |= TEP_FIELD_IS_ARRAY;
-
-			type = read_token(&token);
-
-			if (type == TEP_EVENT_ITEM)
-				field->arraylen = strtoul(token, NULL, 0);
-			else
-				field->arraylen = 0;
-
-		        while (strcmp(token, "]") != 0) {
-				const char *delim;
-
-				if (last_type == TEP_EVENT_ITEM &&
-				    type == TEP_EVENT_ITEM)
-					delim = " ";
-				else
-					delim = "";
-
-				last_type = type;
-
-				ret = append(&brackets, delim, token);
-				if (ret < 0) {
-					free(brackets);
-					goto fail;
-				}
-				/* We only care about the last token */
-				field->arraylen = strtoul(token, NULL, 0);
-				free_token(token);
-				type = read_token(&token);
-				if (type == TEP_EVENT_NONE) {
-					free(brackets);
-					do_warning_event(event, "failed to find token");
-					goto fail;
-				}
-			}
-
-			free_token(token);
-
-			ret = append(&brackets, "", "]");
-			if (ret < 0) {
-				free(brackets);
-				goto fail;
-			}
-
-			/* add brackets to type */
-
-			type = read_token(&token);
-			/*
-			 * If the next token is not an OP, then it is of
-			 * the format: type [] item;
-			 */
-			if (type == TEP_EVENT_ITEM) {
-				ret = append(&field->type, " ", field->name);
-				if (ret < 0) {
-					free(brackets);
-					goto fail;
-				}
-				ret = append(&field->type, "", brackets);
-
-				size_dynamic = type_size(field->name);
-				free_token(field->name);
-				field->name = field->alias = token;
-				type = read_token(&token);
-			} else {
-				ret = append(&field->type, "", brackets);
-				if (ret < 0) {
-					free(brackets);
-					goto fail;
-				}
-			}
-			free(brackets);
-		}
-
-		if (field_is_string(field))
-			field->flags |= TEP_FIELD_IS_STRING;
-		if (field_is_dynamic(field))
-			field->flags |= TEP_FIELD_IS_DYNAMIC;
-		if (field_is_relative_dynamic(field))
-			field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE;
-		if (field_is_long(field))
-			field->flags |= TEP_FIELD_IS_LONG;
-
-		if (test_type_token(type, token,  TEP_EVENT_OP, ";"))
-			goto fail;
-		free_token(token);
-
-		if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
-			goto fail_expect;
-
-		if (read_expected(TEP_EVENT_OP, ":") < 0)
-			goto fail_expect;
-
-		if (read_expect_type(TEP_EVENT_ITEM, &token))
-			goto fail;
-		field->offset = strtoul(token, NULL, 0);
-		free_token(token);
-
-		if (read_expected(TEP_EVENT_OP, ";") < 0)
-			goto fail_expect;
-
-		if (read_expected(TEP_EVENT_ITEM, "size") < 0)
-			goto fail_expect;
-
-		if (read_expected(TEP_EVENT_OP, ":") < 0)
-			goto fail_expect;
-
-		if (read_expect_type(TEP_EVENT_ITEM, &token))
-			goto fail;
-		field->size = strtoul(token, NULL, 0);
-		free_token(token);
-
-		if (read_expected(TEP_EVENT_OP, ";") < 0)
-			goto fail_expect;
-
-		type = read_token(&token);
-		if (type != TEP_EVENT_NEWLINE) {
-			/* newer versions of the kernel have a "signed" type */
-			if (test_type_token(type, token, TEP_EVENT_ITEM, "signed"))
-				goto fail;
-
-			free_token(token);
-
-			if (read_expected(TEP_EVENT_OP, ":") < 0)
-				goto fail_expect;
-
-			if (read_expect_type(TEP_EVENT_ITEM, &token))
-				goto fail;
-
-			if (strtoul(token, NULL, 0))
-				field->flags |= TEP_FIELD_IS_SIGNED;
-
-			free_token(token);
-			if (read_expected(TEP_EVENT_OP, ";") < 0)
-				goto fail_expect;
-
-			if (read_expect_type(TEP_EVENT_NEWLINE, &token))
-				goto fail;
-		}
-
-		free_token(token);
-
-		if (field->flags & TEP_FIELD_IS_ARRAY) {
-			if (field->arraylen)
-				field->elementsize = field->size / field->arraylen;
-			else if (field->flags & TEP_FIELD_IS_DYNAMIC)
-				field->elementsize = size_dynamic;
-			else if (field->flags & TEP_FIELD_IS_STRING)
-				field->elementsize = 1;
-			else if (field->flags & TEP_FIELD_IS_LONG)
-				field->elementsize = event->tep ?
-						     event->tep->long_size :
-						     sizeof(long);
-		} else
-			field->elementsize = field->size;
-
-		*fields = field;
-		fields = &field->next;
-
-	} while (1);
-
-	return 0;
-
-fail:
-	free_token(token);
-fail_expect:
-	if (field) {
-		free(field->type);
-		free(field->name);
-		free(field);
-	}
-	return -1;
-}
-
-static int event_read_format(struct tep_event *event)
-{
-	char *token;
-	int ret;
-
-	if (read_expected_item(TEP_EVENT_ITEM, "format") < 0)
-		return -1;
-
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return -1;
-
-	if (read_expect_type(TEP_EVENT_NEWLINE, &token))
-		goto fail;
-	free_token(token);
-
-	ret = event_read_fields(event, &event->format.common_fields);
-	if (ret < 0)
-		return ret;
-	event->format.nr_common = ret;
-
-	ret = event_read_fields(event, &event->format.fields);
-	if (ret < 0)
-		return ret;
-	event->format.nr_fields = ret;
-
-	return 0;
-
- fail:
-	free_token(token);
-	return -1;
-}
-
-static enum tep_event_type
-process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
-		  char **tok, enum tep_event_type type);
-
-static enum tep_event_type
-process_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	enum tep_event_type type;
-	char *token;
-
-	type = read_token(&token);
-	*tok = token;
-
-	return process_arg_token(event, arg, tok, type);
-}
-
-static enum tep_event_type
-process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok);
-
-/*
- * For __print_symbolic() and __print_flags, we need to completely
- * evaluate the first argument, which defines what to print next.
- */
-static enum tep_event_type
-process_field_arg(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	enum tep_event_type type;
-
-	type = process_arg(event, arg, tok);
-
-	while (type == TEP_EVENT_OP) {
-		type = process_op(event, arg, tok);
-	}
-
-	return type;
-}
-
-static enum tep_event_type
-process_cond(struct tep_event *event, struct tep_print_arg *top, char **tok)
-{
-	struct tep_print_arg *arg, *left, *right;
-	enum tep_event_type type;
-	char *token = NULL;
-
-	arg = alloc_arg();
-	left = alloc_arg();
-	right = alloc_arg();
-
-	if (!arg || !left || !right) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		/* arg will be freed at out_free */
-		free_arg(left);
-		free_arg(right);
-		goto out_free;
-	}
-
-	arg->type = TEP_PRINT_OP;
-	arg->op.left = left;
-	arg->op.right = right;
-
-	*tok = NULL;
-	type = process_arg(event, left, &token);
-
- again:
-	if (type == TEP_EVENT_ERROR)
-		goto out_free;
-
-	/* Handle other operations in the arguments */
-	if (type == TEP_EVENT_OP && strcmp(token, ":") != 0) {
-		type = process_op(event, left, &token);
-		goto again;
-	}
-
-	if (test_type_token(type, token, TEP_EVENT_OP, ":"))
-		goto out_free;
-
-	arg->op.op = token;
-
-	type = process_arg(event, right, &token);
-
-	top->op.right = arg;
-
-	*tok = token;
-	return type;
-
-out_free:
-	/* Top may point to itself */
-	top->op.right = NULL;
-	free_token(token);
-	free_arg(arg);
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_array(struct tep_event *event, struct tep_print_arg *top, char **tok)
-{
-	struct tep_print_arg *arg;
-	enum tep_event_type type;
-	char *token = NULL;
-
-	arg = alloc_arg();
-	if (!arg) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		/* '*tok' is set to top->op.op.  No need to free. */
-		*tok = NULL;
-		return TEP_EVENT_ERROR;
-	}
-
-	*tok = NULL;
-	type = process_arg(event, arg, &token);
-	if (test_type_token(type, token, TEP_EVENT_OP, "]"))
-		goto out_free;
-
-	top->op.right = arg;
-
-	free_token(token);
-	type = read_token_item(&token);
-	*tok = token;
-
-	return type;
-
-out_free:
-	free_token(token);
-	free_arg(arg);
-	return TEP_EVENT_ERROR;
-}
-
-static int get_op_prio(char *op)
-{
-	if (!op[1]) {
-		switch (op[0]) {
-		case '~':
-		case '!':
-			return 4;
-		case '*':
-		case '/':
-		case '%':
-			return 6;
-		case '+':
-		case '-':
-			return 7;
-			/* '>>' and '<<' are 8 */
-		case '<':
-		case '>':
-			return 9;
-			/* '==' and '!=' are 10 */
-		case '&':
-			return 11;
-		case '^':
-			return 12;
-		case '|':
-			return 13;
-		case '?':
-			return 16;
-		default:
-			do_warning("unknown op '%c'", op[0]);
-			return -1;
-		}
-	} else {
-		if (strcmp(op, "++") == 0 ||
-		    strcmp(op, "--") == 0) {
-			return 3;
-		} else if (strcmp(op, ">>") == 0 ||
-			   strcmp(op, "<<") == 0) {
-			return 8;
-		} else if (strcmp(op, ">=") == 0 ||
-			   strcmp(op, "<=") == 0) {
-			return 9;
-		} else if (strcmp(op, "==") == 0 ||
-			   strcmp(op, "!=") == 0) {
-			return 10;
-		} else if (strcmp(op, "&&") == 0) {
-			return 14;
-		} else if (strcmp(op, "||") == 0) {
-			return 15;
-		} else {
-			do_warning("unknown op '%s'", op);
-			return -1;
-		}
-	}
-}
-
-static int set_op_prio(struct tep_print_arg *arg)
-{
-
-	/* single ops are the greatest */
-	if (!arg->op.left || arg->op.left->type == TEP_PRINT_NULL)
-		arg->op.prio = 0;
-	else
-		arg->op.prio = get_op_prio(arg->op.op);
-
-	return arg->op.prio;
-}
-
-/* Note, *tok does not get freed, but will most likely be saved */
-static enum tep_event_type
-process_op(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	struct tep_print_arg *left, *right = NULL;
-	enum tep_event_type type;
-	char *token;
-
-	/* the op is passed in via tok */
-	token = *tok;
-
-	if (arg->type == TEP_PRINT_OP && !arg->op.left) {
-		/* handle single op */
-		if (token[1]) {
-			do_warning_event(event, "bad op token %s", token);
-			goto out_free;
-		}
-		switch (token[0]) {
-		case '~':
-		case '!':
-		case '+':
-		case '-':
-			break;
-		default:
-			do_warning_event(event, "bad op token %s", token);
-			goto out_free;
-
-		}
-
-		/* make an empty left */
-		left = alloc_arg();
-		if (!left)
-			goto out_warn_free;
-
-		left->type = TEP_PRINT_NULL;
-		arg->op.left = left;
-
-		right = alloc_arg();
-		if (!right)
-			goto out_warn_free;
-
-		arg->op.right = right;
-
-		/* do not free the token, it belongs to an op */
-		*tok = NULL;
-		type = process_arg(event, right, tok);
-
-	} else if (strcmp(token, "?") == 0) {
-
-		left = alloc_arg();
-		if (!left)
-			goto out_warn_free;
-
-		/* copy the top arg to the left */
-		*left = *arg;
-
-		arg->type = TEP_PRINT_OP;
-		arg->op.op = token;
-		arg->op.left = left;
-		arg->op.prio = 0;
-
-		/* it will set arg->op.right */
-		type = process_cond(event, arg, tok);
-
-	} else if (strcmp(token, ">>") == 0 ||
-		   strcmp(token, "<<") == 0 ||
-		   strcmp(token, "&") == 0 ||
-		   strcmp(token, "|") == 0 ||
-		   strcmp(token, "&&") == 0 ||
-		   strcmp(token, "||") == 0 ||
-		   strcmp(token, "-") == 0 ||
-		   strcmp(token, "+") == 0 ||
-		   strcmp(token, "*") == 0 ||
-		   strcmp(token, "^") == 0 ||
-		   strcmp(token, "/") == 0 ||
-		   strcmp(token, "%") == 0 ||
-		   strcmp(token, "<") == 0 ||
-		   strcmp(token, ">") == 0 ||
-		   strcmp(token, "<=") == 0 ||
-		   strcmp(token, ">=") == 0 ||
-		   strcmp(token, "==") == 0 ||
-		   strcmp(token, "!=") == 0) {
-
-		left = alloc_arg();
-		if (!left)
-			goto out_warn_free;
-
-		/* copy the top arg to the left */
-		*left = *arg;
-
-		arg->type = TEP_PRINT_OP;
-		arg->op.op = token;
-		arg->op.left = left;
-		arg->op.right = NULL;
-
-		if (set_op_prio(arg) == -1) {
-			event->flags |= TEP_EVENT_FL_FAILED;
-			/* arg->op.op (= token) will be freed at out_free */
-			arg->op.op = NULL;
-			goto out_free;
-		}
-
-		type = read_token_item(&token);
-		*tok = token;
-
-		/* could just be a type pointer */
-		if ((strcmp(arg->op.op, "*") == 0) &&
-		    type == TEP_EVENT_DELIM && (strcmp(token, ")") == 0)) {
-			int ret;
-
-			if (left->type != TEP_PRINT_ATOM) {
-				do_warning_event(event, "bad pointer type");
-				goto out_free;
-			}
-			ret = append(&left->atom.atom, " ", "*");
-			if (ret < 0)
-				goto out_warn_free;
-
-			free(arg->op.op);
-			*arg = *left;
-			free(left);
-
-			return type;
-		}
-
-		right = alloc_arg();
-		if (!right)
-			goto out_warn_free;
-
-		type = process_arg_token(event, right, tok, type);
-		if (type == TEP_EVENT_ERROR) {
-			free_arg(right);
-			/* token was freed in process_arg_token() via *tok */
-			token = NULL;
-			goto out_free;
-		}
-
-		if (right->type == TEP_PRINT_OP &&
-		    get_op_prio(arg->op.op) < get_op_prio(right->op.op)) {
-			struct tep_print_arg tmp;
-
-			/* rotate ops according to the priority */
-			arg->op.right = right->op.left;
-
-			tmp = *arg;
-			*arg = *right;
-			*right = tmp;
-
-			arg->op.left = right;
-		} else {
-			arg->op.right = right;
-		}
-
-	} else if (strcmp(token, "[") == 0) {
-
-		left = alloc_arg();
-		if (!left)
-			goto out_warn_free;
-
-		*left = *arg;
-
-		arg->type = TEP_PRINT_OP;
-		arg->op.op = token;
-		arg->op.left = left;
-
-		arg->op.prio = 0;
-
-		/* it will set arg->op.right */
-		type = process_array(event, arg, tok);
-
-	} else {
-		do_warning_event(event, "unknown op '%s'", token);
-		event->flags |= TEP_EVENT_FL_FAILED;
-		/* the arg is now the left side */
-		goto out_free;
-	}
-
-	if (type == TEP_EVENT_OP && strcmp(*tok, ":") != 0) {
-		int prio;
-
-		/* higher prios need to be closer to the root */
-		prio = get_op_prio(*tok);
-
-		if (prio > arg->op.prio)
-			return process_op(event, arg, tok);
-
-		return process_op(event, right, tok);
-	}
-
-	return type;
-
-out_warn_free:
-	do_warning_event(event, "%s: not enough memory!", __func__);
-out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_entry(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
-	      char **tok)
-{
-	enum tep_event_type type;
-	char *field;
-	char *token;
-
-	if (read_expected(TEP_EVENT_OP, "->") < 0)
-		goto out_err;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto out_free;
-	field = token;
-
-	arg->type = TEP_PRINT_FIELD;
-	arg->field.name = field;
-
-	if (is_flag_field) {
-		arg->field.field = tep_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= TEP_FIELD_IS_FLAG;
-		is_flag_field = 0;
-	} else if (is_symbolic_field) {
-		arg->field.field = tep_find_any_field(event, arg->field.name);
-		arg->field.field->flags |= TEP_FIELD_IS_SYMBOLIC;
-		is_symbolic_field = 0;
-	}
-
-	type = read_token(&token);
-	*tok = token;
-
-	return type;
-
- out_free:
-	free_token(token);
- out_err:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static int alloc_and_process_delim(struct tep_event *event, char *next_token,
-				   struct tep_print_arg **print_arg)
-{
-	struct tep_print_arg *field;
-	enum tep_event_type type;
-	char *token;
-	int ret = 0;
-
-	field = alloc_arg();
-	if (!field) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		errno = ENOMEM;
-		return -1;
-	}
-
-	type = process_arg(event, field, &token);
-
-	if (test_type_token(type, token, TEP_EVENT_DELIM, next_token)) {
-		errno = EINVAL;
-		ret = -1;
-		free_arg(field);
-		goto out_free_token;
-	}
-
-	*print_arg = field;
-
-out_free_token:
-	free_token(token);
-
-	return ret;
-}
-
-static char *arg_eval (struct tep_print_arg *arg);
-
-static unsigned long long
-eval_type_str(unsigned long long val, const char *type, int pointer)
-{
-	int sign = 0;
-	char *ref;
-	int len;
-
-	len = strlen(type);
-
-	if (pointer) {
-
-		if (type[len-1] != '*') {
-			do_warning("pointer expected with non pointer type");
-			return val;
-		}
-
-		ref = malloc(len);
-		if (!ref) {
-			do_warning("%s: not enough memory!", __func__);
-			return val;
-		}
-		memcpy(ref, type, len);
-
-		/* chop off the " *" */
-		ref[len - 2] = 0;
-
-		val = eval_type_str(val, ref, 0);
-		free(ref);
-		return val;
-	}
-
-	/* check if this is a pointer */
-	if (type[len - 1] == '*')
-		return val;
-
-	/* Try to figure out the arg size*/
-	if (strncmp(type, "struct", 6) == 0)
-		/* all bets off */
-		return val;
-
-	if (strcmp(type, "u8") == 0)
-		return val & 0xff;
-
-	if (strcmp(type, "u16") == 0)
-		return val & 0xffff;
-
-	if (strcmp(type, "u32") == 0)
-		return val & 0xffffffff;
-
-	if (strcmp(type, "u64") == 0 ||
-	    strcmp(type, "s64") == 0)
-		return val;
-
-	if (strcmp(type, "s8") == 0)
-		return (unsigned long long)(char)val & 0xff;
-
-	if (strcmp(type, "s16") == 0)
-		return (unsigned long long)(short)val & 0xffff;
-
-	if (strcmp(type, "s32") == 0)
-		return (unsigned long long)(int)val & 0xffffffff;
-
-	if (strncmp(type, "unsigned ", 9) == 0) {
-		sign = 0;
-		type += 9;
-	}
-
-	if (strcmp(type, "char") == 0) {
-		if (sign)
-			return (unsigned long long)(char)val & 0xff;
-		else
-			return val & 0xff;
-	}
-
-	if (strcmp(type, "short") == 0) {
-		if (sign)
-			return (unsigned long long)(short)val & 0xffff;
-		else
-			return val & 0xffff;
-	}
-
-	if (strcmp(type, "int") == 0) {
-		if (sign)
-			return (unsigned long long)(int)val & 0xffffffff;
-		else
-			return val & 0xffffffff;
-	}
-
-	return val;
-}
-
-/*
- * Try to figure out the type.
- */
-static unsigned long long
-eval_type(unsigned long long val, struct tep_print_arg *arg, int pointer)
-{
-	if (arg->type != TEP_PRINT_TYPE) {
-		do_warning("expected type argument");
-		return 0;
-	}
-
-	return eval_type_str(val, arg->typecast.type, pointer);
-}
-
-static int arg_num_eval(struct tep_print_arg *arg, long long *val)
-{
-	long long left, right;
-	int ret = 1;
-
-	switch (arg->type) {
-	case TEP_PRINT_ATOM:
-		*val = strtoll(arg->atom.atom, NULL, 0);
-		break;
-	case TEP_PRINT_TYPE:
-		ret = arg_num_eval(arg->typecast.item, val);
-		if (!ret)
-			break;
-		*val = eval_type(*val, arg, 0);
-		break;
-	case TEP_PRINT_OP:
-		switch (arg->op.op[0]) {
-		case '|':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			if (arg->op.op[1])
-				*val = left || right;
-			else
-				*val = left | right;
-			break;
-		case '&':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			if (arg->op.op[1])
-				*val = left && right;
-			else
-				*val = left & right;
-			break;
-		case '<':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			switch (arg->op.op[1]) {
-			case 0:
-				*val = left < right;
-				break;
-			case '<':
-				*val = left << right;
-				break;
-			case '=':
-				*val = left <= right;
-				break;
-			default:
-				do_warning("unknown op '%s'", arg->op.op);
-				ret = 0;
-			}
-			break;
-		case '>':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			switch (arg->op.op[1]) {
-			case 0:
-				*val = left > right;
-				break;
-			case '>':
-				*val = left >> right;
-				break;
-			case '=':
-				*val = left >= right;
-				break;
-			default:
-				do_warning("unknown op '%s'", arg->op.op);
-				ret = 0;
-			}
-			break;
-		case '=':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-
-			if (arg->op.op[1] != '=') {
-				do_warning("unknown op '%s'", arg->op.op);
-				ret = 0;
-			} else
-				*val = left == right;
-			break;
-		case '!':
-			ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-
-			switch (arg->op.op[1]) {
-			case '=':
-				*val = left != right;
-				break;
-			default:
-				do_warning("unknown op '%s'", arg->op.op);
-				ret = 0;
-			}
-			break;
-		case '-':
-			/* check for negative */
-			if (arg->op.left->type == TEP_PRINT_NULL)
-				left = 0;
-			else
-				ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			*val = left - right;
-			break;
-		case '+':
-			if (arg->op.left->type == TEP_PRINT_NULL)
-				left = 0;
-			else
-				ret = arg_num_eval(arg->op.left, &left);
-			if (!ret)
-				break;
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			*val = left + right;
-			break;
-		case '~':
-			ret = arg_num_eval(arg->op.right, &right);
-			if (!ret)
-				break;
-			*val = ~right;
-			break;
-		default:
-			do_warning("unknown op '%s'", arg->op.op);
-			ret = 0;
-		}
-		break;
-
-	case TEP_PRINT_NULL:
-	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
-	case TEP_PRINT_STRING:
-	case TEP_PRINT_BSTRING:
-	case TEP_PRINT_BITMASK:
-	default:
-		do_warning("invalid eval type %d", arg->type);
-		ret = 0;
-
-	}
-	return ret;
-}
-
-static char *arg_eval (struct tep_print_arg *arg)
-{
-	long long val;
-	static char buf[24];
-
-	switch (arg->type) {
-	case TEP_PRINT_ATOM:
-		return arg->atom.atom;
-	case TEP_PRINT_TYPE:
-		return arg_eval(arg->typecast.item);
-	case TEP_PRINT_OP:
-		if (!arg_num_eval(arg, &val))
-			break;
-		sprintf(buf, "%lld", val);
-		return buf;
-
-	case TEP_PRINT_NULL:
-	case TEP_PRINT_FIELD ... TEP_PRINT_SYMBOL:
-	case TEP_PRINT_STRING:
-	case TEP_PRINT_BSTRING:
-	case TEP_PRINT_BITMASK:
-	default:
-		do_warning("invalid eval type %d", arg->type);
-		break;
-	}
-
-	return NULL;
-}
-
-static enum tep_event_type
-process_fields(struct tep_event *event, struct tep_print_flag_sym **list, char **tok)
-{
-	enum tep_event_type type;
-	struct tep_print_arg *arg = NULL;
-	struct tep_print_flag_sym *field;
-	char *token = *tok;
-	char *value;
-
-	do {
-		free_token(token);
-		type = read_token_item(&token);
-		if (test_type_token(type, token, TEP_EVENT_OP, "{"))
-			break;
-
-		arg = alloc_arg();
-		if (!arg)
-			goto out_free;
-
-		free_token(token);
-		type = process_arg(event, arg, &token);
-
-		if (type == TEP_EVENT_OP)
-			type = process_op(event, arg, &token);
-
-		if (type == TEP_EVENT_ERROR)
-			goto out_free;
-
-		if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
-			goto out_free;
-
-		field = calloc(1, sizeof(*field));
-		if (!field)
-			goto out_free;
-
-		value = arg_eval(arg);
-		if (value == NULL)
-			goto out_free_field;
-		field->value = strdup(value);
-		if (field->value == NULL)
-			goto out_free_field;
-
-		free_arg(arg);
-		arg = alloc_arg();
-		if (!arg)
-			goto out_free;
-
-		free_token(token);
-		type = process_arg(event, arg, &token);
-		if (test_type_token(type, token, TEP_EVENT_OP, "}"))
-			goto out_free_field;
-
-		value = arg_eval(arg);
-		if (value == NULL)
-			goto out_free_field;
-		field->str = strdup(value);
-		if (field->str == NULL)
-			goto out_free_field;
-		free_arg(arg);
-		arg = NULL;
-
-		*list = field;
-		list = &field->next;
-
-		free_token(token);
-		type = read_token_item(&token);
-	} while (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0);
-
-	*tok = token;
-	return type;
-
-out_free_field:
-	free_flag_sym(field);
-out_free:
-	free_arg(arg);
-	free_token(token);
-	*tok = NULL;
-
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_flags(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	struct tep_print_arg *field;
-	enum tep_event_type type;
-	char *token = NULL;
-
-	memset(arg, 0, sizeof(*arg));
-	arg->type = TEP_PRINT_FLAGS;
-
-	field = alloc_arg();
-	if (!field) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		goto out_free;
-	}
-
-	type = process_field_arg(event, field, &token);
-
-	/* Handle operations in the first argument */
-	while (type == TEP_EVENT_OP)
-		type = process_op(event, field, &token);
-
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
-		goto out_free_field;
-	free_token(token);
-
-	arg->flags.field = field;
-
-	type = read_token_item(&token);
-	if (event_item_type(type)) {
-		arg->flags.delim = token;
-		type = read_token_item(&token);
-	}
-
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
-		goto out_free;
-
-	type = process_fields(event, &arg->flags.flags, &token);
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
-		goto out_free;
-
-	free_token(token);
-	type = read_token_item(tok);
-	return type;
-
-out_free_field:
-	free_arg(field);
-out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_symbols(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	struct tep_print_arg *field;
-	enum tep_event_type type;
-	char *token = NULL;
-
-	memset(arg, 0, sizeof(*arg));
-	arg->type = TEP_PRINT_SYMBOL;
-
-	field = alloc_arg();
-	if (!field) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		goto out_free;
-	}
-
-	type = process_field_arg(event, field, &token);
-
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
-		goto out_free_field;
-
-	arg->symbol.field = field;
-
-	type = process_fields(event, &arg->symbol.symbols, &token);
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
-		goto out_free;
-
-	free_token(token);
-	type = read_token_item(tok);
-	return type;
-
-out_free_field:
-	free_arg(field);
-out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_hex_common(struct tep_event *event, struct tep_print_arg *arg,
-		   char **tok, enum tep_print_arg_type type)
-{
-	memset(arg, 0, sizeof(*arg));
-	arg->type = type;
-
-	if (alloc_and_process_delim(event, ",", &arg->hex.field))
-		goto out;
-
-	if (alloc_and_process_delim(event, ")", &arg->hex.size))
-		goto free_field;
-
-	return read_token_item(tok);
-
-free_field:
-	free_arg(arg->hex.field);
-	arg->hex.field = NULL;
-out:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_hex(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	return process_hex_common(event, arg, tok, TEP_PRINT_HEX);
-}
-
-static enum tep_event_type
-process_hex_str(struct tep_event *event, struct tep_print_arg *arg,
-		char **tok)
-{
-	return process_hex_common(event, arg, tok, TEP_PRINT_HEX_STR);
-}
-
-static enum tep_event_type
-process_int_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	memset(arg, 0, sizeof(*arg));
-	arg->type = TEP_PRINT_INT_ARRAY;
-
-	if (alloc_and_process_delim(event, ",", &arg->int_array.field))
-		goto out;
-
-	if (alloc_and_process_delim(event, ",", &arg->int_array.count))
-		goto free_field;
-
-	if (alloc_and_process_delim(event, ")", &arg->int_array.el_size))
-		goto free_size;
-
-	return read_token_item(tok);
-
-free_size:
-	free_arg(arg->int_array.count);
-	arg->int_array.count = NULL;
-free_field:
-	free_arg(arg->int_array.field);
-	arg->int_array.field = NULL;
-out:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_dynamic_array(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	struct tep_format_field *field;
-	enum tep_event_type type;
-	char *token;
-
-	memset(arg, 0, sizeof(*arg));
-	arg->type = TEP_PRINT_DYNAMIC_ARRAY;
-
-	/*
-	 * The item within the parenthesis is another field that holds
-	 * the index into where the array starts.
-	 */
-	type = read_token(&token);
-	*tok = token;
-	if (type != TEP_EVENT_ITEM)
-		goto out_free;
-
-	/* Find the field */
-
-	field = tep_find_field(event, token);
-	if (!field)
-		goto out_free;
-
-	arg->dynarray.field = field;
-	arg->dynarray.index = 0;
-
-	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
-		goto out_free;
-
-	free_token(token);
-	type = read_token_item(&token);
-	*tok = token;
-	if (type != TEP_EVENT_OP || strcmp(token, "[") != 0)
-		return type;
-
-	free_token(token);
-	arg = alloc_arg();
-	if (!arg) {
-		do_warning_event(event, "%s: not enough memory!", __func__);
-		*tok = NULL;
-		return TEP_EVENT_ERROR;
-	}
-
-	type = process_arg(event, arg, &token);
-	if (type == TEP_EVENT_ERROR)
-		goto out_free_arg;
-
-	if (!test_type_token(type, token, TEP_EVENT_OP, "]"))
-		goto out_free_arg;
-
-	free_token(token);
-	type = read_token_item(tok);
-	return type;
-
- out_free_arg:
-	free_arg(arg);
- out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_dynamic_array_len(struct tep_event *event, struct tep_print_arg *arg,
-			  char **tok)
-{
-	struct tep_format_field *field;
-	enum tep_event_type type;
-	char *token;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto out_free;
-
-	arg->type = TEP_PRINT_DYNAMIC_ARRAY_LEN;
-
-	/* Find the field */
-	field = tep_find_field(event, token);
-	if (!field)
-		goto out_free;
-
-	arg->dynarray.field = field;
-	arg->dynarray.index = 0;
-
-	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
-		goto out_err;
-
-	free_token(token);
-	type = read_token(&token);
-	*tok = token;
-
-	return type;
-
- out_free:
-	free_token(token);
- out_err:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_paren(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	struct tep_print_arg *item_arg;
-	enum tep_event_type type;
-	char *token;
-
-	type = process_arg(event, arg, &token);
-
-	if (type == TEP_EVENT_ERROR)
-		goto out_free;
-
-	if (type == TEP_EVENT_OP)
-		type = process_op(event, arg, &token);
-
-	if (type == TEP_EVENT_ERROR)
-		goto out_free;
-
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ")"))
-		goto out_free;
-
-	free_token(token);
-	type = read_token_item(&token);
-
-	/*
-	 * If the next token is an item or another open paren, then
-	 * this was a typecast.
-	 */
-	if (event_item_type(type) ||
-	    (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0)) {
-
-		/* make this a typecast and contine */
-
-		/* prevous must be an atom */
-		if (arg->type != TEP_PRINT_ATOM) {
-			do_warning_event(event, "previous needed to be TEP_PRINT_ATOM");
-			goto out_free;
-		}
-
-		item_arg = alloc_arg();
-		if (!item_arg) {
-			do_warning_event(event, "%s: not enough memory!",
-					 __func__);
-			goto out_free;
-		}
-
-		arg->type = TEP_PRINT_TYPE;
-		arg->typecast.type = arg->atom.atom;
-		arg->typecast.item = item_arg;
-		type = process_arg_token(event, item_arg, &token, type);
-
-	}
-
-	*tok = token;
-	return type;
-
- out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-
-static enum tep_event_type
-process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
-	    char **tok)
-{
-	enum tep_event_type type;
-	char *token;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto out_free;
-
-	arg->type = TEP_PRINT_STRING;
-	arg->string.string = token;
-	arg->string.field = NULL;
-
-	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
-		goto out_err;
-
-	type = read_token(&token);
-	*tok = token;
-
-	return type;
-
- out_free:
-	free_token(token);
- out_err:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,
-		char **tok)
-{
-	enum tep_event_type type;
-	char *token;
-
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto out_free;
-
-	arg->type = TEP_PRINT_BITMASK;
-	arg->bitmask.bitmask = token;
-	arg->bitmask.field = NULL;
-
-	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
-		goto out_err;
-
-	type = read_token(&token);
-	*tok = token;
-
-	return type;
-
- out_free:
-	free_token(token);
- out_err:
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static struct tep_function_handler *
-find_func_handler(struct tep_handle *tep, char *func_name)
-{
-	struct tep_function_handler *func;
-
-	if (!tep)
-		return NULL;
-
-	for (func = tep->func_handlers; func; func = func->next) {
-		if (strcmp(func->name, func_name) == 0)
-			break;
-	}
-
-	return func;
-}
-
-static void remove_func_handler(struct tep_handle *tep, char *func_name)
-{
-	struct tep_function_handler *func;
-	struct tep_function_handler **next;
-
-	next = &tep->func_handlers;
-	while ((func = *next)) {
-		if (strcmp(func->name, func_name) == 0) {
-			*next = func->next;
-			free_func_handle(func);
-			break;
-		}
-		next = &func->next;
-	}
-}
-
-static enum tep_event_type
-process_func_handler(struct tep_event *event, struct tep_function_handler *func,
-		     struct tep_print_arg *arg, char **tok)
-{
-	struct tep_print_arg **next_arg;
-	struct tep_print_arg *farg;
-	enum tep_event_type type;
-	char *token;
-	int i;
-
-	arg->type = TEP_PRINT_FUNC;
-	arg->func.func = func;
-
-	*tok = NULL;
-
-	next_arg = &(arg->func.args);
-	for (i = 0; i < func->nr_args; i++) {
-		farg = alloc_arg();
-		if (!farg) {
-			do_warning_event(event, "%s: not enough memory!",
-					 __func__);
-			return TEP_EVENT_ERROR;
-		}
-
-		type = process_arg(event, farg, &token);
-		if (i < (func->nr_args - 1)) {
-			if (type != TEP_EVENT_DELIM || strcmp(token, ",") != 0) {
-				do_warning_event(event,
-					"Error: function '%s()' expects %d arguments but event %s only uses %d",
-					func->name, func->nr_args,
-					event->name, i + 1);
-				goto err;
-			}
-		} else {
-			if (type != TEP_EVENT_DELIM || strcmp(token, ")") != 0) {
-				do_warning_event(event,
-					"Error: function '%s()' only expects %d arguments but event %s has more",
-					func->name, func->nr_args, event->name);
-				goto err;
-			}
-		}
-
-		*next_arg = farg;
-		next_arg = &(farg->next);
-		free_token(token);
-	}
-
-	type = read_token(&token);
-	*tok = token;
-
-	return type;
-
-err:
-	free_arg(farg);
-	free_token(token);
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_builtin_expect(struct tep_event *event, struct tep_print_arg *arg, char **tok)
-{
-	enum tep_event_type type;
-	char *token = NULL;
-
-	/* Handle __builtin_expect( cond, #) */
-	type = process_arg(event, arg, &token);
-
-	if (type != TEP_EVENT_DELIM || token[0] != ',')
-		goto out_free;
-
-	free_token(token);
-
-	/* We don't care what the second parameter is of the __builtin_expect() */
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto out_free;
-
-	if (read_expected(TEP_EVENT_DELIM, ")") < 0)
-		goto out_free;
-
-	free_token(token);
-	type = read_token_item(tok);
-	return type;
-
-out_free:
-	free_token(token);
-	*tok = NULL;
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_function(struct tep_event *event, struct tep_print_arg *arg,
-		 char *token, char **tok)
-{
-	struct tep_function_handler *func;
-
-	if (strcmp(token, "__print_flags") == 0) {
-		free_token(token);
-		is_flag_field = 1;
-		return process_flags(event, arg, tok);
-	}
-	if (strcmp(token, "__print_symbolic") == 0) {
-		free_token(token);
-		is_symbolic_field = 1;
-		return process_symbols(event, arg, tok);
-	}
-	if (strcmp(token, "__print_hex") == 0) {
-		free_token(token);
-		return process_hex(event, arg, tok);
-	}
-	if (strcmp(token, "__print_hex_str") == 0) {
-		free_token(token);
-		return process_hex_str(event, arg, tok);
-	}
-	if (strcmp(token, "__print_array") == 0) {
-		free_token(token);
-		return process_int_array(event, arg, tok);
-	}
-	if (strcmp(token, "__get_str") == 0 ||
-	    strcmp(token, "__get_rel_str") == 0) {
-		free_token(token);
-		return process_str(event, arg, tok);
-	}
-	if (strcmp(token, "__get_bitmask") == 0 ||
-	    strcmp(token, "__get_rel_bitmask") == 0) {
-		free_token(token);
-		return process_bitmask(event, arg, tok);
-	}
-	if (strcmp(token, "__get_dynamic_array") == 0 ||
-	    strcmp(token, "__get_rel_dynamic_array") == 0) {
-		free_token(token);
-		return process_dynamic_array(event, arg, tok);
-	}
-	if (strcmp(token, "__get_dynamic_array_len") == 0 ||
-	    strcmp(token, "__get_rel_dynamic_array_len") == 0) {
-		free_token(token);
-		return process_dynamic_array_len(event, arg, tok);
-	}
-	if (strcmp(token, "__builtin_expect") == 0) {
-		free_token(token);
-		return process_builtin_expect(event, arg, tok);
-	}
-
-	func = find_func_handler(event->tep, token);
-	if (func) {
-		free_token(token);
-		return process_func_handler(event, func, arg, tok);
-	}
-
-	do_warning_event(event, "function %s not defined", token);
-	free_token(token);
-	return TEP_EVENT_ERROR;
-}
-
-static enum tep_event_type
-process_arg_token(struct tep_event *event, struct tep_print_arg *arg,
-		  char **tok, enum tep_event_type type)
-{
-	char *token;
-	char *atom;
-
-	token = *tok;
-
-	switch (type) {
-	case TEP_EVENT_ITEM:
-		if (strcmp(token, "REC") == 0) {
-			free_token(token);
-			type = process_entry(event, arg, &token);
-			break;
-		}
-		atom = token;
-		/* test the next token */
-		type = read_token_item(&token);
-
-		/*
-		 * If the next token is a parenthesis, then this
-		 * is a function.
-		 */
-		if (type == TEP_EVENT_DELIM && strcmp(token, "(") == 0) {
-			free_token(token);
-			token = NULL;
-			/* this will free atom. */
-			type = process_function(event, arg, atom, &token);
-			break;
-		}
-		/* atoms can be more than one token long */
-		while (type == TEP_EVENT_ITEM) {
-			int ret;
-
-			ret = append(&atom, " ", token);
-			if (ret < 0) {
-				free(atom);
-				*tok = NULL;
-				free_token(token);
-				return TEP_EVENT_ERROR;
-			}
-			free_token(token);
-			type = read_token_item(&token);
-		}
-
-		arg->type = TEP_PRINT_ATOM;
-		arg->atom.atom = atom;
-		break;
-
-	case TEP_EVENT_DQUOTE:
-	case TEP_EVENT_SQUOTE:
-		arg->type = TEP_PRINT_ATOM;
-		arg->atom.atom = token;
-		type = read_token_item(&token);
-		break;
-	case TEP_EVENT_DELIM:
-		if (strcmp(token, "(") == 0) {
-			free_token(token);
-			type = process_paren(event, arg, &token);
-			break;
-		}
-	case TEP_EVENT_OP:
-		/* handle single ops */
-		arg->type = TEP_PRINT_OP;
-		arg->op.op = token;
-		arg->op.left = NULL;
-		type = process_op(event, arg, &token);
-
-		/* On error, the op is freed */
-		if (type == TEP_EVENT_ERROR)
-			arg->op.op = NULL;
-
-		/* return error type if errored */
-		break;
-
-	case TEP_EVENT_ERROR ... TEP_EVENT_NEWLINE:
-	default:
-		do_warning_event(event, "unexpected type %d", type);
-		return TEP_EVENT_ERROR;
-	}
-	*tok = token;
-
-	return type;
-}
-
-static int event_read_print_args(struct tep_event *event, struct tep_print_arg **list)
-{
-	enum tep_event_type type = TEP_EVENT_ERROR;
-	struct tep_print_arg *arg;
-	char *token;
-	int args = 0;
-
-	do {
-		if (type == TEP_EVENT_NEWLINE) {
-			type = read_token_item(&token);
-			continue;
-		}
-
-		arg = alloc_arg();
-		if (!arg) {
-			do_warning_event(event, "%s: not enough memory!",
-					 __func__);
-			return -1;
-		}
-
-		type = process_arg(event, arg, &token);
-
-		if (type == TEP_EVENT_ERROR) {
-			free_token(token);
-			free_arg(arg);
-			return -1;
-		}
-
-		*list = arg;
-		args++;
-
-		if (type == TEP_EVENT_OP) {
-			type = process_op(event, arg, &token);
-			free_token(token);
-			if (type == TEP_EVENT_ERROR) {
-				*list = NULL;
-				free_arg(arg);
-				return -1;
-			}
-			list = &arg->next;
-			continue;
-		}
-
-		if (type == TEP_EVENT_DELIM && strcmp(token, ",") == 0) {
-			free_token(token);
-			*list = arg;
-			list = &arg->next;
-			continue;
-		}
-		break;
-	} while (type != TEP_EVENT_NONE);
-
-	if (type != TEP_EVENT_NONE && type != TEP_EVENT_ERROR)
-		free_token(token);
-
-	return args;
-}
-
-static int event_read_print(struct tep_event *event)
-{
-	enum tep_event_type type;
-	char *token;
-	int ret;
-
-	if (read_expected_item(TEP_EVENT_ITEM, "print") < 0)
-		return -1;
-
-	if (read_expected(TEP_EVENT_ITEM, "fmt") < 0)
-		return -1;
-
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return -1;
-
-	if (read_expect_type(TEP_EVENT_DQUOTE, &token) < 0)
-		goto fail;
-
- concat:
-	event->print_fmt.format = token;
-	event->print_fmt.args = NULL;
-
-	/* ok to have no arg */
-	type = read_token_item(&token);
-
-	if (type == TEP_EVENT_NONE)
-		return 0;
-
-	/* Handle concatenation of print lines */
-	if (type == TEP_EVENT_DQUOTE) {
-		char *cat;
-
-		if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
-			goto fail;
-		free_token(token);
-		free_token(event->print_fmt.format);
-		event->print_fmt.format = NULL;
-		token = cat;
-		goto concat;
-	}
-			     
-	if (test_type_token(type, token, TEP_EVENT_DELIM, ","))
-		goto fail;
-
-	free_token(token);
-
-	ret = event_read_print_args(event, &event->print_fmt.args);
-	if (ret < 0)
-		return -1;
-
-	return ret;
-
- fail:
-	free_token(token);
-	return -1;
-}
-
-/**
- * tep_find_common_field - return a common field by event
- * @event: handle for the event
- * @name: the name of the common field to return
- *
- * Returns a common field from the event by the given @name.
- * This only searches the common fields and not all field.
- */
-struct tep_format_field *
-tep_find_common_field(struct tep_event *event, const char *name)
-{
-	struct tep_format_field *format;
-
-	for (format = event->format.common_fields;
-	     format; format = format->next) {
-		if (strcmp(format->name, name) == 0)
-			break;
-	}
-
-	return format;
-}
-
-/**
- * tep_find_field - find a non-common field
- * @event: handle for the event
- * @name: the name of the non-common field
- *
- * Returns a non-common field by the given @name.
- * This does not search common fields.
- */
-struct tep_format_field *
-tep_find_field(struct tep_event *event, const char *name)
-{
-	struct tep_format_field *format;
-
-	for (format = event->format.fields;
-	     format; format = format->next) {
-		if (strcmp(format->name, name) == 0)
-			break;
-	}
-
-	return format;
-}
-
-/**
- * tep_find_any_field - find any field by name
- * @event: handle for the event
- * @name: the name of the field
- *
- * Returns a field by the given @name.
- * This searches the common field names first, then
- * the non-common ones if a common one was not found.
- */
-struct tep_format_field *
-tep_find_any_field(struct tep_event *event, const char *name)
-{
-	struct tep_format_field *format;
-
-	format = tep_find_common_field(event, name);
-	if (format)
-		return format;
-	return tep_find_field(event, name);
-}
-
-/**
- * tep_read_number - read a number from data
- * @tep: a handle to the trace event parser context
- * @ptr: the raw data
- * @size: the size of the data that holds the number
- *
- * Returns the number (converted to host) from the
- * raw data.
- */
-unsigned long long tep_read_number(struct tep_handle *tep,
-				   const void *ptr, int size)
-{
-	unsigned long long val;
-
-	switch (size) {
-	case 1:
-		return *(unsigned char *)ptr;
-	case 2:
-		return data2host2(tep, *(unsigned short *)ptr);
-	case 4:
-		return data2host4(tep, *(unsigned int *)ptr);
-	case 8:
-		memcpy(&val, (ptr), sizeof(unsigned long long));
-		return data2host8(tep, val);
-	default:
-		/* BUG! */
-		return 0;
-	}
-}
-
-/**
- * tep_read_number_field - read a number from data
- * @field: a handle to the field
- * @data: the raw data to read
- * @value: the value to place the number in
- *
- * Reads raw data according to a field offset and size,
- * and translates it into @value.
- *
- * Returns 0 on success, -1 otherwise.
- */
-int tep_read_number_field(struct tep_format_field *field, const void *data,
-			  unsigned long long *value)
-{
-	if (!field)
-		return -1;
-	switch (field->size) {
-	case 1:
-	case 2:
-	case 4:
-	case 8:
-		*value = tep_read_number(field->event->tep,
-					 data + field->offset, field->size);
-		return 0;
-	default:
-		return -1;
-	}
-}
-
-static int get_common_info(struct tep_handle *tep,
-			   const char *type, int *offset, int *size)
-{
-	struct tep_event *event;
-	struct tep_format_field *field;
-
-	/*
-	 * All events should have the same common elements.
-	 * Pick any event to find where the type is;
-	 */
-	if (!tep->events) {
-		do_warning("no event_list!");
-		return -1;
-	}
-
-	event = tep->events[0];
-	field = tep_find_common_field(event, type);
-	if (!field)
-		return -1;
-
-	*offset = field->offset;
-	*size = field->size;
-
-	return 0;
-}
-
-static int __parse_common(struct tep_handle *tep, void *data,
-			  int *size, int *offset, const char *name)
-{
-	int ret;
-
-	if (!*size) {
-		ret = get_common_info(tep, name, offset, size);
-		if (ret < 0)
-			return ret;
-	}
-	return tep_read_number(tep, data + *offset, *size);
-}
-
-static int trace_parse_common_type(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->type_size, &tep->type_offset,
-			      "common_type");
-}
-
-static int parse_common_pid(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->pid_size, &tep->pid_offset,
-			      "common_pid");
-}
-
-static int parse_common_pc(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->pc_size, &tep->pc_offset,
-			      "common_preempt_count");
-}
-
-static int parse_common_flags(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->flags_size, &tep->flags_offset,
-			      "common_flags");
-}
-
-static int parse_common_lock_depth(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->ld_size, &tep->ld_offset,
-			      "common_lock_depth");
-}
-
-static int parse_common_migrate_disable(struct tep_handle *tep, void *data)
-{
-	return __parse_common(tep, data,
-			      &tep->ld_size, &tep->ld_offset,
-			      "common_migrate_disable");
-}
-
-static int events_id_cmp(const void *a, const void *b);
-
-/**
- * tep_find_event - find an event by given id
- * @tep: a handle to the trace event parser context
- * @id: the id of the event
- *
- * Returns an event that has a given @id.
- */
-struct tep_event *tep_find_event(struct tep_handle *tep, int id)
-{
-	struct tep_event **eventptr;
-	struct tep_event key;
-	struct tep_event *pkey = &key;
-
-	/* Check cache first */
-	if (tep->last_event && tep->last_event->id == id)
-		return tep->last_event;
-
-	key.id = id;
-
-	eventptr = bsearch(&pkey, tep->events, tep->nr_events,
-			   sizeof(*tep->events), events_id_cmp);
-
-	if (eventptr) {
-		tep->last_event = *eventptr;
-		return *eventptr;
-	}
-
-	return NULL;
-}
-
-/**
- * tep_find_event_by_name - find an event by given name
- * @tep: a handle to the trace event parser context
- * @sys: the system name to search for
- * @name: the name of the event to search for
- *
- * This returns an event with a given @name and under the system
- * @sys. If @sys is NULL the first event with @name is returned.
- */
-struct tep_event *
-tep_find_event_by_name(struct tep_handle *tep,
-		       const char *sys, const char *name)
-{
-	struct tep_event *event = NULL;
-	int i;
-
-	if (tep->last_event &&
-	    strcmp(tep->last_event->name, name) == 0 &&
-	    (!sys || strcmp(tep->last_event->system, sys) == 0))
-		return tep->last_event;
-
-	for (i = 0; i < tep->nr_events; i++) {
-		event = tep->events[i];
-		if (strcmp(event->name, name) == 0) {
-			if (!sys)
-				break;
-			if (strcmp(event->system, sys) == 0)
-				break;
-		}
-	}
-	if (i == tep->nr_events)
-		event = NULL;
-
-	tep->last_event = event;
-	return event;
-}
-
-static unsigned long long
-eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg)
-{
-	struct tep_handle *tep = event->tep;
-	unsigned long long val = 0;
-	unsigned long long left, right;
-	struct tep_print_arg *typearg = NULL;
-	struct tep_print_arg *larg;
-	unsigned long offset;
-	unsigned int field_size;
-
-	switch (arg->type) {
-	case TEP_PRINT_NULL:
-		/* ?? */
-		return 0;
-	case TEP_PRINT_ATOM:
-		return strtoull(arg->atom.atom, NULL, 0);
-	case TEP_PRINT_FIELD:
-		if (!arg->field.field) {
-			arg->field.field = tep_find_any_field(event, arg->field.name);
-			if (!arg->field.field)
-				goto out_warning_field;
-			
-		}
-		/* must be a number */
-		val = tep_read_number(tep, data + arg->field.field->offset,
-				      arg->field.field->size);
-		break;
-	case TEP_PRINT_FLAGS:
-	case TEP_PRINT_SYMBOL:
-	case TEP_PRINT_INT_ARRAY:
-	case TEP_PRINT_HEX:
-	case TEP_PRINT_HEX_STR:
-		break;
-	case TEP_PRINT_TYPE:
-		val = eval_num_arg(data, size, event, arg->typecast.item);
-		return eval_type(val, arg, 0);
-	case TEP_PRINT_STRING:
-	case TEP_PRINT_BSTRING:
-	case TEP_PRINT_BITMASK:
-		return 0;
-	case TEP_PRINT_FUNC: {
-		struct trace_seq s;
-		trace_seq_init(&s);
-		val = process_defined_func(&s, data, size, event, arg);
-		trace_seq_destroy(&s);
-		return val;
-	}
-	case TEP_PRINT_OP:
-		if (strcmp(arg->op.op, "[") == 0) {
-			/*
-			 * Arrays are special, since we don't want
-			 * to read the arg as is.
-			 */
-			right = eval_num_arg(data, size, event, arg->op.right);
-
-			/* handle typecasts */
-			larg = arg->op.left;
-			while (larg->type == TEP_PRINT_TYPE) {
-				if (!typearg)
-					typearg = larg;
-				larg = larg->typecast.item;
-			}
-
-			/* Default to long size */
-			field_size = tep->long_size;
-
-			switch (larg->type) {
-			case TEP_PRINT_DYNAMIC_ARRAY:
-				offset = tep_read_number(tep,
-						   data + larg->dynarray.field->offset,
-						   larg->dynarray.field->size);
-				if (larg->dynarray.field->elementsize)
-					field_size = larg->dynarray.field->elementsize;
-				/*
-				 * The actual length of the dynamic array is stored
-				 * in the top half of the field, and the offset
-				 * is in the bottom half of the 32 bit field.
-				 */
-				offset &= 0xffff;
-				offset += right;
-				break;
-			case TEP_PRINT_FIELD:
-				if (!larg->field.field) {
-					larg->field.field =
-						tep_find_any_field(event, larg->field.name);
-					if (!larg->field.field) {
-						arg = larg;
-						goto out_warning_field;
-					}
-				}
-				field_size = larg->field.field->elementsize;
-				offset = larg->field.field->offset +
-					right * larg->field.field->elementsize;
-				break;
-			default:
-				goto default_op; /* oops, all bets off */
-			}
-			val = tep_read_number(tep,
-					      data + offset, field_size);
-			if (typearg)
-				val = eval_type(val, typearg, 1);
-			break;
-		} else if (strcmp(arg->op.op, "?") == 0) {
-			left = eval_num_arg(data, size, event, arg->op.left);
-			arg = arg->op.right;
-			if (left)
-				val = eval_num_arg(data, size, event, arg->op.left);
-			else
-				val = eval_num_arg(data, size, event, arg->op.right);
-			break;
-		}
- default_op:
-		left = eval_num_arg(data, size, event, arg->op.left);
-		right = eval_num_arg(data, size, event, arg->op.right);
-		switch (arg->op.op[0]) {
-		case '!':
-			switch (arg->op.op[1]) {
-			case 0:
-				val = !right;
-				break;
-			case '=':
-				val = left != right;
-				break;
-			default:
-				goto out_warning_op;
-			}
-			break;
-		case '~':
-			val = ~right;
-			break;
-		case '|':
-			if (arg->op.op[1])
-				val = left || right;
-			else
-				val = left | right;
-			break;
-		case '&':
-			if (arg->op.op[1])
-				val = left && right;
-			else
-				val = left & right;
-			break;
-		case '<':
-			switch (arg->op.op[1]) {
-			case 0:
-				val = left < right;
-				break;
-			case '<':
-				val = left << right;
-				break;
-			case '=':
-				val = left <= right;
-				break;
-			default:
-				goto out_warning_op;
-			}
-			break;
-		case '>':
-			switch (arg->op.op[1]) {
-			case 0:
-				val = left > right;
-				break;
-			case '>':
-				val = left >> right;
-				break;
-			case '=':
-				val = left >= right;
-				break;
-			default:
-				goto out_warning_op;
-			}
-			break;
-		case '=':
-			if (arg->op.op[1] != '=')
-				goto out_warning_op;
-
-			val = left == right;
-			break;
-		case '-':
-			val = left - right;
-			break;
-		case '+':
-			val = left + right;
-			break;
-		case '/':
-			val = left / right;
-			break;
-		case '%':
-			val = left % right;
-			break;
-		case '*':
-			val = left * right;
-			break;
-		default:
-			goto out_warning_op;
-		}
-		break;
-	case TEP_PRINT_DYNAMIC_ARRAY_LEN:
-		offset = tep_read_number(tep,
-					 data + arg->dynarray.field->offset,
-					 arg->dynarray.field->size);
-		/*
-		 * The total allocated length of the dynamic array is
-		 * stored in the top half of the field, and the offset
-		 * is in the bottom half of the 32 bit field.
-		 */
-		val = (unsigned long long)(offset >> 16);
-		break;
-	case TEP_PRINT_DYNAMIC_ARRAY:
-		/* Without [], we pass the address to the dynamic data */
-		offset = tep_read_number(tep,
-					 data + arg->dynarray.field->offset,
-					 arg->dynarray.field->size);
-		/*
-		 * The total allocated length of the dynamic array is
-		 * stored in the top half of the field, and the offset
-		 * is in the bottom half of the 32 bit field.
-		 */
-		offset &= 0xffff;
-		val = (unsigned long long)((unsigned long)data + offset);
-		break;
-	default: /* not sure what to do there */
-		return 0;
-	}
-	return val;
-
-out_warning_op:
-	do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op);
-	return 0;
-
-out_warning_field:
-	do_warning_event(event, "%s: field %s not found",
-			 __func__, arg->field.name);
-	return 0;
-}
-
-struct flag {
-	const char *name;
-	unsigned long long value;
-};
-
-static const struct flag flags[] = {
-	{ "HI_SOFTIRQ", 0 },
-	{ "TIMER_SOFTIRQ", 1 },
-	{ "NET_TX_SOFTIRQ", 2 },
-	{ "NET_RX_SOFTIRQ", 3 },
-	{ "BLOCK_SOFTIRQ", 4 },
-	{ "IRQ_POLL_SOFTIRQ", 5 },
-	{ "TASKLET_SOFTIRQ", 6 },
-	{ "SCHED_SOFTIRQ", 7 },
-	{ "HRTIMER_SOFTIRQ", 8 },
-	{ "RCU_SOFTIRQ", 9 },
-
-	{ "HRTIMER_NORESTART", 0 },
-	{ "HRTIMER_RESTART", 1 },
-};
-
-static long long eval_flag(const char *flag)
-{
-	int i;
-
-	/*
-	 * Some flags in the format files do not get converted.
-	 * If the flag is not numeric, see if it is something that
-	 * we already know about.
-	 */
-	if (isdigit(flag[0]))
-		return strtoull(flag, NULL, 0);
-
-	for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
-		if (strcmp(flags[i].name, flag) == 0)
-			return flags[i].value;
-
-	return -1LL;
-}
-
-static void print_str_to_seq(struct trace_seq *s, const char *format,
-			     int len_arg, const char *str)
-{
-	if (len_arg >= 0)
-		trace_seq_printf(s, format, len_arg, str);
-	else
-		trace_seq_printf(s, format, str);
-}
-
-static void print_bitmask_to_seq(struct tep_handle *tep,
-				 struct trace_seq *s, const char *format,
-				 int len_arg, const void *data, int size)
-{
-	int nr_bits = size * 8;
-	int str_size = (nr_bits + 3) / 4;
-	int len = 0;
-	char buf[3];
-	char *str;
-	int index;
-	int i;
-
-	/*
-	 * The kernel likes to put in commas every 32 bits, we
-	 * can do the same.
-	 */
-	str_size += (nr_bits - 1) / 32;
-
-	str = malloc(str_size + 1);
-	if (!str) {
-		do_warning("%s: not enough memory!", __func__);
-		return;
-	}
-	str[str_size] = 0;
-
-	/* Start out with -2 for the two chars per byte */
-	for (i = str_size - 2; i >= 0; i -= 2) {
-		/*
-		 * data points to a bit mask of size bytes.
-		 * In the kernel, this is an array of long words, thus
-		 * endianness is very important.
-		 */
-		if (tep->file_bigendian)
-			index = size - (len + 1);
-		else
-			index = len;
-
-		snprintf(buf, 3, "%02x", *((unsigned char *)data + index));
-		memcpy(str + i, buf, 2);
-		len++;
-		if (!(len & 3) && i > 0) {
-			i--;
-			str[i] = ',';
-		}
-	}
-
-	if (len_arg >= 0)
-		trace_seq_printf(s, format, len_arg, str);
-	else
-		trace_seq_printf(s, format, str);
-
-	free(str);
-}
-
-static void print_str_arg(struct trace_seq *s, void *data, int size,
-			  struct tep_event *event, const char *format,
-			  int len_arg, struct tep_print_arg *arg)
-{
-	struct tep_handle *tep = event->tep;
-	struct tep_print_flag_sym *flag;
-	struct tep_format_field *field;
-	struct printk_map *printk;
-	long long val, fval;
-	unsigned long long addr;
-	char *str;
-	unsigned char *hex;
-	int print;
-	int i, len;
-
-	switch (arg->type) {
-	case TEP_PRINT_NULL:
-		/* ?? */
-		return;
-	case TEP_PRINT_ATOM:
-		print_str_to_seq(s, format, len_arg, arg->atom.atom);
-		return;
-	case TEP_PRINT_FIELD:
-		field = arg->field.field;
-		if (!field) {
-			field = tep_find_any_field(event, arg->field.name);
-			if (!field) {
-				str = arg->field.name;
-				goto out_warning_field;
-			}
-			arg->field.field = field;
-		}
-		/* Zero sized fields, mean the rest of the data */
-		len = field->size ? : size - field->offset;
-
-		/*
-		 * Some events pass in pointers. If this is not an array
-		 * and the size is the same as long_size, assume that it
-		 * is a pointer.
-		 */
-		if (!(field->flags & TEP_FIELD_IS_ARRAY) &&
-		    field->size == tep->long_size) {
-
-			/* Handle heterogeneous recording and processing
-			 * architectures
-			 *
-			 * CASE I:
-			 * Traces recorded on 32-bit devices (32-bit
-			 * addressing) and processed on 64-bit devices:
-			 * In this case, only 32 bits should be read.
-			 *
-			 * CASE II:
-			 * Traces recorded on 64 bit devices and processed
-			 * on 32-bit devices:
-			 * In this case, 64 bits must be read.
-			 */
-			addr = (tep->long_size == 8) ?
-				*(unsigned long long *)(data + field->offset) :
-				(unsigned long long)*(unsigned int *)(data + field->offset);
-
-			/* Check if it matches a print format */
-			printk = find_printk(tep, addr);
-			if (printk)
-				trace_seq_puts(s, printk->printk);
-			else
-				trace_seq_printf(s, "%llx", addr);
-			break;
-		}
-		str = malloc(len + 1);
-		if (!str) {
-			do_warning_event(event, "%s: not enough memory!",
-					 __func__);
-			return;
-		}
-		memcpy(str, data + field->offset, len);
-		str[len] = 0;
-		print_str_to_seq(s, format, len_arg, str);
-		free(str);
-		break;
-	case TEP_PRINT_FLAGS:
-		val = eval_num_arg(data, size, event, arg->flags.field);
-		print = 0;
-		for (flag = arg->flags.flags; flag; flag = flag->next) {
-			fval = eval_flag(flag->value);
-			if (!val && fval < 0) {
-				print_str_to_seq(s, format, len_arg, flag->str);
-				break;
-			}
-			if (fval > 0 && (val & fval) == fval) {
-				if (print && arg->flags.delim)
-					trace_seq_puts(s, arg->flags.delim);
-				print_str_to_seq(s, format, len_arg, flag->str);
-				print = 1;
-				val &= ~fval;
-			}
-		}
-		if (val) {
-			if (print && arg->flags.delim)
-				trace_seq_puts(s, arg->flags.delim);
-			trace_seq_printf(s, "0x%llx", val);
-		}
-		break;
-	case TEP_PRINT_SYMBOL:
-		val = eval_num_arg(data, size, event, arg->symbol.field);
-		for (flag = arg->symbol.symbols; flag; flag = flag->next) {
-			fval = eval_flag(flag->value);
-			if (val == fval) {
-				print_str_to_seq(s, format, len_arg, flag->str);
-				break;
-			}
-		}
-		if (!flag)
-			trace_seq_printf(s, "0x%llx", val);
-		break;
-	case TEP_PRINT_HEX:
-	case TEP_PRINT_HEX_STR:
-		if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
-			unsigned long offset;
-			offset = tep_read_number(tep,
-				data + arg->hex.field->dynarray.field->offset,
-				arg->hex.field->dynarray.field->size);
-			hex = data + (offset & 0xffff);
-		} else {
-			field = arg->hex.field->field.field;
-			if (!field) {
-				str = arg->hex.field->field.name;
-				field = tep_find_any_field(event, str);
-				if (!field)
-					goto out_warning_field;
-				arg->hex.field->field.field = field;
-			}
-			hex = data + field->offset;
-		}
-		len = eval_num_arg(data, size, event, arg->hex.size);
-		for (i = 0; i < len; i++) {
-			if (i && arg->type == TEP_PRINT_HEX)
-				trace_seq_putc(s, ' ');
-			trace_seq_printf(s, "%02x", hex[i]);
-		}
-		break;
-
-	case TEP_PRINT_INT_ARRAY: {
-		void *num;
-		int el_size;
-
-		if (arg->int_array.field->type == TEP_PRINT_DYNAMIC_ARRAY) {
-			unsigned long offset;
-			struct tep_format_field *field =
-				arg->int_array.field->dynarray.field;
-			offset = tep_read_number(tep,
-						 data + field->offset,
-						 field->size);
-			num = data + (offset & 0xffff);
-		} else {
-			field = arg->int_array.field->field.field;
-			if (!field) {
-				str = arg->int_array.field->field.name;
-				field = tep_find_any_field(event, str);
-				if (!field)
-					goto out_warning_field;
-				arg->int_array.field->field.field = field;
-			}
-			num = data + field->offset;
-		}
-		len = eval_num_arg(data, size, event, arg->int_array.count);
-		el_size = eval_num_arg(data, size, event,
-				       arg->int_array.el_size);
-		for (i = 0; i < len; i++) {
-			if (i)
-				trace_seq_putc(s, ' ');
-
-			if (el_size == 1) {
-				trace_seq_printf(s, "%u", *(uint8_t *)num);
-			} else if (el_size == 2) {
-				trace_seq_printf(s, "%u", *(uint16_t *)num);
-			} else if (el_size == 4) {
-				trace_seq_printf(s, "%u", *(uint32_t *)num);
-			} else if (el_size == 8) {
-				trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num);
-			} else {
-				trace_seq_printf(s, "BAD SIZE:%d 0x%x",
-						 el_size, *(uint8_t *)num);
-				el_size = 1;
-			}
-
-			num += el_size;
-		}
-		break;
-	}
-	case TEP_PRINT_TYPE:
-		break;
-	case TEP_PRINT_STRING: {
-		int str_offset;
-
-		if (!arg->string.field)
-			arg->string.field = tep_find_any_field(event, arg->string.string);
-		if (!arg->string.field)
-			break;
-
-		str_offset = data2host4(tep,
-				*(unsigned int *)(data + arg->string.field->offset));
-		str_offset &= 0xffff;
-		if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE)
-			str_offset += arg->string.field->offset + arg->string.field->size;
-		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
-		break;
-	}
-	case TEP_PRINT_BSTRING:
-		print_str_to_seq(s, format, len_arg, arg->string.string);
-		break;
-	case TEP_PRINT_BITMASK: {
-		int bitmask_offset;
-		int bitmask_size;
-
-		if (!arg->bitmask.field)
-			arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask);
-		if (!arg->bitmask.field)
-			break;
-		bitmask_offset = data2host4(tep,
-				*(unsigned int *)(data + arg->bitmask.field->offset));
-		bitmask_size = bitmask_offset >> 16;
-		bitmask_offset &= 0xffff;
-		if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE)
-			bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size;
-		print_bitmask_to_seq(tep, s, format, len_arg,
-				     data + bitmask_offset, bitmask_size);
-		break;
-	}
-	case TEP_PRINT_OP:
-		/*
-		 * The only op for string should be ? :
-		 */
-		if (arg->op.op[0] != '?')
-			return;
-		val = eval_num_arg(data, size, event, arg->op.left);
-		if (val)
-			print_str_arg(s, data, size, event,
-				      format, len_arg, arg->op.right->op.left);
-		else
-			print_str_arg(s, data, size, event,
-				      format, len_arg, arg->op.right->op.right);
-		break;
-	case TEP_PRINT_FUNC:
-		process_defined_func(s, data, size, event, arg);
-		break;
-	default:
-		/* well... */
-		break;
-	}
-
-	return;
-
-out_warning_field:
-	do_warning_event(event, "%s: field %s not found",
-			 __func__, arg->field.name);
-}
-
-static unsigned long long
-process_defined_func(struct trace_seq *s, void *data, int size,
-		     struct tep_event *event, struct tep_print_arg *arg)
-{
-	struct tep_function_handler *func_handle = arg->func.func;
-	struct func_params *param;
-	unsigned long long *args;
-	unsigned long long ret;
-	struct tep_print_arg *farg;
-	struct trace_seq str;
-	struct save_str {
-		struct save_str *next;
-		char *str;
-	} *strings = NULL, *string;
-	int i;
-
-	if (!func_handle->nr_args) {
-		ret = (*func_handle->func)(s, NULL);
-		goto out;
-	}
-
-	farg = arg->func.args;
-	param = func_handle->params;
-
-	ret = ULLONG_MAX;
-	args = malloc(sizeof(*args) * func_handle->nr_args);
-	if (!args)
-		goto out;
-
-	for (i = 0; i < func_handle->nr_args; i++) {
-		switch (param->type) {
-		case TEP_FUNC_ARG_INT:
-		case TEP_FUNC_ARG_LONG:
-		case TEP_FUNC_ARG_PTR:
-			args[i] = eval_num_arg(data, size, event, farg);
-			break;
-		case TEP_FUNC_ARG_STRING:
-			trace_seq_init(&str);
-			print_str_arg(&str, data, size, event, "%s", -1, farg);
-			trace_seq_terminate(&str);
-			string = malloc(sizeof(*string));
-			if (!string) {
-				do_warning_event(event, "%s(%d): malloc str",
-						 __func__, __LINE__);
-				goto out_free;
-			}
-			string->next = strings;
-			string->str = strdup(str.buffer);
-			if (!string->str) {
-				free(string);
-				do_warning_event(event, "%s(%d): malloc str",
-						 __func__, __LINE__);
-				goto out_free;
-			}
-			args[i] = (uintptr_t)string->str;
-			strings = string;
-			trace_seq_destroy(&str);
-			break;
-		default:
-			/*
-			 * Something went totally wrong, this is not
-			 * an input error, something in this code broke.
-			 */
-			do_warning_event(event, "Unexpected end of arguments\n");
-			goto out_free;
-		}
-		farg = farg->next;
-		param = param->next;
-	}
-
-	ret = (*func_handle->func)(s, args);
-out_free:
-	free(args);
-	while (strings) {
-		string = strings;
-		strings = string->next;
-		free(string->str);
-		free(string);
-	}
-
- out:
-	/* TBD : handle return type here */
-	return ret;
-}
-
-static void free_args(struct tep_print_arg *args)
-{
-	struct tep_print_arg *next;
-
-	while (args) {
-		next = args->next;
-
-		free_arg(args);
-		args = next;
-	}
-}
-
-static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event)
-{
-	struct tep_handle *tep = event->tep;
-	struct tep_format_field *field, *ip_field;
-	struct tep_print_arg *args, *arg, **next;
-	unsigned long long ip, val;
-	char *ptr;
-	void *bptr;
-	int vsize = 0;
-
-	field = tep->bprint_buf_field;
-	ip_field = tep->bprint_ip_field;
-
-	if (!field) {
-		field = tep_find_field(event, "buf");
-		if (!field) {
-			do_warning_event(event, "can't find buffer field for binary printk");
-			return NULL;
-		}
-		ip_field = tep_find_field(event, "ip");
-		if (!ip_field) {
-			do_warning_event(event, "can't find ip field for binary printk");
-			return NULL;
-		}
-		tep->bprint_buf_field = field;
-		tep->bprint_ip_field = ip_field;
-	}
-
-	ip = tep_read_number(tep, data + ip_field->offset, ip_field->size);
-
-	/*
-	 * The first arg is the IP pointer.
-	 */
-	args = alloc_arg();
-	if (!args) {
-		do_warning_event(event, "%s(%d): not enough memory!",
-				 __func__, __LINE__);
-		return NULL;
-	}
-	arg = args;
-	arg->next = NULL;
-	next = &arg->next;
-
-	arg->type = TEP_PRINT_ATOM;
-		
-	if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
-		goto out_free;
-
-	/* skip the first "%ps: " */
-	for (ptr = fmt + 5, bptr = data + field->offset;
-	     bptr < data + size && *ptr; ptr++) {
-		int ls = 0;
-
-		if (*ptr == '%') {
- process_again:
-			ptr++;
-			switch (*ptr) {
-			case '%':
-				break;
-			case 'l':
-				ls++;
-				goto process_again;
-			case 'L':
-				ls = 2;
-				goto process_again;
-			case '0' ... '9':
-				goto process_again;
-			case '.':
-				goto process_again;
-			case 'z':
-			case 'Z':
-				ls = 1;
-				goto process_again;
-			case 'p':
-				ls = 1;
-				if (isalnum(ptr[1])) {
-					ptr++;
-					/* Check for special pointers */
-					switch (*ptr) {
-					case 's':
-					case 'S':
-					case 'x':
-						break;
-					case 'f':
-					case 'F':
-						/*
-						 * Pre-5.5 kernels use %pf and
-						 * %pF for printing symbols
-						 * while kernels since 5.5 use
-						 * %pfw for fwnodes. So check
-						 * %p[fF] isn't followed by 'w'.
-						 */
-						if (ptr[1] != 'w')
-							break;
-						/* fall through */
-					default:
-						/*
-						 * Older kernels do not process
-						 * dereferenced pointers.
-						 * Only process if the pointer
-						 * value is a printable.
-						 */
-						if (isprint(*(char *)bptr))
-							goto process_string;
-					}
-				}
-				/* fall through */
-			case 'd':
-			case 'u':
-			case 'i':
-			case 'x':
-			case 'X':
-			case 'o':
-				switch (ls) {
-				case 0:
-					vsize = 4;
-					break;
-				case 1:
-					vsize = tep->long_size;
-					break;
-				case 2:
-					vsize = 8;
-					break;
-				default:
-					vsize = ls; /* ? */
-					break;
-				}
-			/* fall through */
-			case '*':
-				if (*ptr == '*')
-					vsize = 4;
-
-				/* the pointers are always 4 bytes aligned */
-				bptr = (void *)(((unsigned long)bptr + 3) &
-						~3);
-				val = tep_read_number(tep, bptr, vsize);
-				bptr += vsize;
-				arg = alloc_arg();
-				if (!arg) {
-					do_warning_event(event, "%s(%d): not enough memory!",
-						   __func__, __LINE__);
-					goto out_free;
-				}
-				arg->next = NULL;
-				arg->type = TEP_PRINT_ATOM;
-				if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
-					free(arg);
-					goto out_free;
-				}
-				*next = arg;
-				next = &arg->next;
-				/*
-				 * The '*' case means that an arg is used as the length.
-				 * We need to continue to figure out for what.
-				 */
-				if (*ptr == '*')
-					goto process_again;
-
-				break;
-			case 's':
- process_string:
-				arg = alloc_arg();
-				if (!arg) {
-					do_warning_event(event, "%s(%d): not enough memory!",
-						   __func__, __LINE__);
-					goto out_free;
-				}
-				arg->next = NULL;
-				arg->type = TEP_PRINT_BSTRING;
-				arg->string.string = strdup(bptr);
-				if (!arg->string.string)
-					goto out_free;
-				bptr += strlen(bptr) + 1;
-				*next = arg;
-				next = &arg->next;
-			default:
-				break;
-			}
-		}
-	}
-
-	return args;
-
-out_free:
-	free_args(args);
-	return NULL;
-}
-
-static char *
-get_bprint_format(void *data, int size __maybe_unused,
-		  struct tep_event *event)
-{
-	struct tep_handle *tep = event->tep;
-	unsigned long long addr;
-	struct tep_format_field *field;
-	struct printk_map *printk;
-	char *format;
-
-	field = tep->bprint_fmt_field;
-
-	if (!field) {
-		field = tep_find_field(event, "fmt");
-		if (!field) {
-			do_warning_event(event, "can't find format field for binary printk");
-			return NULL;
-		}
-		tep->bprint_fmt_field = field;
-	}
-
-	addr = tep_read_number(tep, data + field->offset, field->size);
-
-	printk = find_printk(tep, addr);
-	if (!printk) {
-		if (asprintf(&format, "%%ps: (NO FORMAT FOUND at %llx)\n", addr) < 0)
-			return NULL;
-		return format;
-	}
-
-	if (asprintf(&format, "%s: %s", "%ps", printk->printk) < 0)
-		return NULL;
-
-	return format;
-}
-
-static int print_mac_arg(struct trace_seq *s, const char *format,
-			 void *data, int size, struct tep_event *event,
-			 struct tep_print_arg *arg)
-{
-	const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
-	bool reverse = false;
-	unsigned char *buf;
-	int ret = 0;
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return 0;
-	}
-
-	if (arg->type != TEP_PRINT_FIELD) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
-				 arg->type);
-		return 0;
-	}
-
-	if (format[0] == 'm') {
-		fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
-	} else if (format[0] == 'M' && format[1] == 'F') {
-		fmt = "%.2x-%.2x-%.2x-%.2x-%.2x-%.2x";
-		ret++;
-	}
-	if (format[1] == 'R') {
-		reverse = true;
-		ret++;
-	}
-
-	if (!arg->field.field) {
-		arg->field.field =
-			tep_find_any_field(event, arg->field.name);
-		if (!arg->field.field) {
-			do_warning_event(event, "%s: field %s not found",
-					 __func__, arg->field.name);
-			return ret;
-		}
-	}
-	if (arg->field.field->size != 6) {
-		trace_seq_printf(s, "INVALIDMAC");
-		return ret;
-	}
-
-	buf = data + arg->field.field->offset;
-	if (reverse)
-		trace_seq_printf(s, fmt, buf[5], buf[4], buf[3], buf[2], buf[1], buf[0]);
-	else
-		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
-
-	return ret;
-}
-
-static int parse_ip4_print_args(struct tep_handle *tep,
-				const char *ptr, bool *reverse)
-{
-	int ret = 0;
-
-	*reverse = false;
-
-	/* hnbl */
-	switch (*ptr) {
-	case 'h':
-		if (tep->file_bigendian)
-			*reverse = false;
-		else
-			*reverse = true;
-		ret++;
-		break;
-	case 'l':
-		*reverse = true;
-		ret++;
-		break;
-	case 'n':
-	case 'b':
-		ret++;
-		/* fall through */
-	default:
-		*reverse = false;
-		break;
-	}
-
-	return ret;
-}
-
-static void print_ip4_addr(struct trace_seq *s, char i, bool reverse, unsigned char *buf)
-{
-	const char *fmt;
-
-	if (i == 'i')
-		fmt = "%03d.%03d.%03d.%03d";
-	else
-		fmt = "%d.%d.%d.%d";
-
-	if (reverse)
-		trace_seq_printf(s, fmt, buf[3], buf[2], buf[1], buf[0]);
-	else
-		trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3]);
-
-}
-
-static inline bool ipv6_addr_v4mapped(const struct in6_addr *a)
-{
-	return ((unsigned long)(a->s6_addr32[0] | a->s6_addr32[1]) |
-		(unsigned long)(a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0UL;
-}
-
-static inline bool ipv6_addr_is_isatap(const struct in6_addr *addr)
-{
-	return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE);
-}
-
-static void print_ip6c_addr(struct trace_seq *s, unsigned char *addr)
-{
-	int i, j, range;
-	unsigned char zerolength[8];
-	int longest = 1;
-	int colonpos = -1;
-	uint16_t word;
-	uint8_t hi, lo;
-	bool needcolon = false;
-	bool useIPv4;
-	struct in6_addr in6;
-
-	memcpy(&in6, addr, sizeof(struct in6_addr));
-
-	useIPv4 = ipv6_addr_v4mapped(&in6) || ipv6_addr_is_isatap(&in6);
-
-	memset(zerolength, 0, sizeof(zerolength));
-
-	if (useIPv4)
-		range = 6;
-	else
-		range = 8;
-
-	/* find position of longest 0 run */
-	for (i = 0; i < range; i++) {
-		for (j = i; j < range; j++) {
-			if (in6.s6_addr16[j] != 0)
-				break;
-			zerolength[i]++;
-		}
-	}
-	for (i = 0; i < range; i++) {
-		if (zerolength[i] > longest) {
-			longest = zerolength[i];
-			colonpos = i;
-		}
-	}
-	if (longest == 1)		/* don't compress a single 0 */
-		colonpos = -1;
-
-	/* emit address */
-	for (i = 0; i < range; i++) {
-		if (i == colonpos) {
-			if (needcolon || i == 0)
-				trace_seq_printf(s, ":");
-			trace_seq_printf(s, ":");
-			needcolon = false;
-			i += longest - 1;
-			continue;
-		}
-		if (needcolon) {
-			trace_seq_printf(s, ":");
-			needcolon = false;
-		}
-		/* hex u16 without leading 0s */
-		word = ntohs(in6.s6_addr16[i]);
-		hi = word >> 8;
-		lo = word & 0xff;
-		if (hi)
-			trace_seq_printf(s, "%x%02x", hi, lo);
-		else
-			trace_seq_printf(s, "%x", lo);
-
-		needcolon = true;
-	}
-
-	if (useIPv4) {
-		if (needcolon)
-			trace_seq_printf(s, ":");
-		print_ip4_addr(s, 'I', false, &in6.s6_addr[12]);
-	}
-
-	return;
-}
-
-static void print_ip6_addr(struct trace_seq *s, char i, unsigned char *buf)
-{
-	int j;
-
-	for (j = 0; j < 16; j += 2) {
-		trace_seq_printf(s, "%02x%02x", buf[j], buf[j+1]);
-		if (i == 'I' && j < 14)
-			trace_seq_printf(s, ":");
-	}
-}
-
-/*
- * %pi4   print an IPv4 address with leading zeros
- * %pI4   print an IPv4 address without leading zeros
- * %pi6   print an IPv6 address without colons
- * %pI6   print an IPv6 address with colons
- * %pI6c  print an IPv6 address in compressed form with colons
- * %pISpc print an IP address based on sockaddr; p adds port.
- */
-static int print_ipv4_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct tep_event *event,
-			  struct tep_print_arg *arg)
-{
-	bool reverse = false;
-	unsigned char *buf;
-	int ret;
-
-	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return ret;
-	}
-
-	if (arg->type != TEP_PRINT_FIELD) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return ret;
-	}
-
-	if (!arg->field.field) {
-		arg->field.field =
-			tep_find_any_field(event, arg->field.name);
-		if (!arg->field.field) {
-			do_warning("%s: field %s not found",
-				   __func__, arg->field.name);
-			return ret;
-		}
-	}
-
-	buf = data + arg->field.field->offset;
-
-	if (arg->field.field->size != 4) {
-		trace_seq_printf(s, "INVALIDIPv4");
-		return ret;
-	}
-
-	print_ip4_addr(s, i, reverse, buf);
-	return ret;
-
-}
-
-static int print_ipv6_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct tep_event *event,
-			  struct tep_print_arg *arg)
-{
-	char have_c = 0;
-	unsigned char *buf;
-	int rc = 0;
-
-	/* pI6c */
-	if (i == 'I' && *ptr == 'c') {
-		have_c = 1;
-		ptr++;
-		rc++;
-	}
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return rc;
-	}
-
-	if (arg->type != TEP_PRINT_FIELD) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return rc;
-	}
-
-	if (!arg->field.field) {
-		arg->field.field =
-			tep_find_any_field(event, arg->field.name);
-		if (!arg->field.field) {
-			do_warning("%s: field %s not found",
-				   __func__, arg->field.name);
-			return rc;
-		}
-	}
-
-	buf = data + arg->field.field->offset;
-
-	if (arg->field.field->size != 16) {
-		trace_seq_printf(s, "INVALIDIPv6");
-		return rc;
-	}
-
-	if (have_c)
-		print_ip6c_addr(s, buf);
-	else
-		print_ip6_addr(s, i, buf);
-
-	return rc;
-}
-
-static int print_ipsa_arg(struct trace_seq *s, const char *ptr, char i,
-			  void *data, int size, struct tep_event *event,
-			  struct tep_print_arg *arg)
-{
-	char have_c = 0, have_p = 0;
-	unsigned char *buf;
-	struct sockaddr_storage *sa;
-	bool reverse = false;
-	int rc = 0;
-	int ret;
-
-	/* pISpc */
-	if (i == 'I') {
-		if (*ptr == 'p') {
-			have_p = 1;
-			ptr++;
-			rc++;
-		}
-		if (*ptr == 'c') {
-			have_c = 1;
-			ptr++;
-			rc++;
-		}
-	}
-	ret = parse_ip4_print_args(event->tep, ptr, &reverse);
-	ptr += ret;
-	rc += ret;
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return rc;
-	}
-
-	if (arg->type != TEP_PRINT_FIELD) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return rc;
-	}
-
-	if (!arg->field.field) {
-		arg->field.field =
-			tep_find_any_field(event, arg->field.name);
-		if (!arg->field.field) {
-			do_warning("%s: field %s not found",
-				   __func__, arg->field.name);
-			return rc;
-		}
-	}
-
-	sa = (struct sockaddr_storage *) (data + arg->field.field->offset);
-
-	if (sa->ss_family == AF_INET) {
-		struct sockaddr_in *sa4 = (struct sockaddr_in *) sa;
-
-		if (arg->field.field->size < sizeof(struct sockaddr_in)) {
-			trace_seq_printf(s, "INVALIDIPv4");
-			return rc;
-		}
-
-		print_ip4_addr(s, i, reverse, (unsigned char *) &sa4->sin_addr);
-		if (have_p)
-			trace_seq_printf(s, ":%d", ntohs(sa4->sin_port));
-
-
-	} else if (sa->ss_family == AF_INET6) {
-		struct sockaddr_in6 *sa6 = (struct sockaddr_in6 *) sa;
-
-		if (arg->field.field->size < sizeof(struct sockaddr_in6)) {
-			trace_seq_printf(s, "INVALIDIPv6");
-			return rc;
-		}
-
-		if (have_p)
-			trace_seq_printf(s, "[");
-
-		buf = (unsigned char *) &sa6->sin6_addr;
-		if (have_c)
-			print_ip6c_addr(s, buf);
-		else
-			print_ip6_addr(s, i, buf);
-
-		if (have_p)
-			trace_seq_printf(s, "]:%d", ntohs(sa6->sin6_port));
-	}
-
-	return rc;
-}
-
-static int print_ip_arg(struct trace_seq *s, const char *ptr,
-			void *data, int size, struct tep_event *event,
-			struct tep_print_arg *arg)
-{
-	char i = *ptr;  /* 'i' or 'I' */
-	int rc = 1;
-
-	/* IP version */
-	ptr++;
-
-	switch (*ptr) {
-	case '4':
-		rc += print_ipv4_arg(s, ptr + 1, i, data, size, event, arg);
-		break;
-	case '6':
-		rc += print_ipv6_arg(s, ptr + 1, i, data, size, event, arg);
-		break;
-	case 'S':
-		rc += print_ipsa_arg(s, ptr + 1, i, data, size, event, arg);
-		break;
-	default:
-		return 0;
-	}
-
-	return rc;
-}
-
-static const int guid_index[16] = {3, 2, 1, 0, 5, 4, 7, 6, 8, 9, 10, 11, 12, 13, 14, 15};
-static const int uuid_index[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
-
-static int print_uuid_arg(struct trace_seq *s, const char *ptr,
-			void *data, int size, struct tep_event *event,
-			struct tep_print_arg *arg)
-{
-	const int *index = uuid_index;
-	char *format = "%02x";
-	int ret = 0;
-	char *buf;
-	int i;
-
-	switch (*(ptr + 1)) {
-	case 'L':
-		format = "%02X";
-		/* fall through */
-	case 'l':
-		index = guid_index;
-		ret++;
-		break;
-	case 'B':
-		format = "%02X";
-		/* fall through */
-	case 'b':
-		ret++;
-		break;
-	}
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return ret;
-	}
-
-	if (arg->type != TEP_PRINT_FIELD) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return ret;
-	}
-
-	if (!arg->field.field) {
-		arg->field.field =
-			tep_find_any_field(event, arg->field.name);
-		if (!arg->field.field) {
-			do_warning("%s: field %s not found",
-				   __func__, arg->field.name);
-			return ret;
-		}
-	}
-
-	if (arg->field.field->size != 16) {
-		trace_seq_printf(s, "INVALIDUUID");
-		return ret;
-	}
-
-	buf = data + arg->field.field->offset;
-
-	for (i = 0; i < 16; i++) {
-		trace_seq_printf(s, format, buf[index[i]] & 0xff);
-		switch (i) {
-		case 3:
-		case 5:
-		case 7:
-		case 9:
-			trace_seq_printf(s, "-");
-			break;
-		}
-	}
-
-	return ret;
-}
-
-static int print_raw_buff_arg(struct trace_seq *s, const char *ptr,
-			      void *data, int size, struct tep_event *event,
-			      struct tep_print_arg *arg, int print_len)
-{
-	int plen = print_len;
-	char *delim = " ";
-	int ret = 0;
-	char *buf;
-	int i;
-	unsigned long offset;
-	int arr_len;
-
-	switch (*(ptr + 1)) {
-	case 'C':
-		delim = ":";
-		ret++;
-		break;
-	case 'D':
-		delim = "-";
-		ret++;
-		break;
-	case 'N':
-		delim = "";
-		ret++;
-		break;
-	}
-
-	if (arg->type == TEP_PRINT_FUNC) {
-		process_defined_func(s, data, size, event, arg);
-		return ret;
-	}
-
-	if (arg->type != TEP_PRINT_DYNAMIC_ARRAY) {
-		trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d", arg->type);
-		return ret;
-	}
-
-	offset = tep_read_number(event->tep,
-				 data + arg->dynarray.field->offset,
-				 arg->dynarray.field->size);
-	arr_len = (unsigned long long)(offset >> 16);
-	buf = data + (offset & 0xffff);
-
-	if (arr_len < plen)
-		plen = arr_len;
-
-	if (plen < 1)
-		return ret;
-
-	trace_seq_printf(s, "%02x", buf[0] & 0xff);
-	for (i = 1; i < plen; i++)
-		trace_seq_printf(s, "%s%02x", delim, buf[i] & 0xff);
-
-	return ret;
-}
-
-static int is_printable_array(char *p, unsigned int len)
-{
-	unsigned int i;
-
-	for (i = 0; i < len && p[i]; i++)
-		if (!isprint(p[i]) && !isspace(p[i]))
-		    return 0;
-	return 1;
-}
-
-void tep_print_field(struct trace_seq *s, void *data,
-		     struct tep_format_field *field)
-{
-	unsigned long long val;
-	unsigned int offset, len, i;
-	struct tep_handle *tep = field->event->tep;
-
-	if (field->flags & TEP_FIELD_IS_ARRAY) {
-		offset = field->offset;
-		len = field->size;
-		if (field->flags & TEP_FIELD_IS_DYNAMIC) {
-			val = tep_read_number(tep, data + offset, len);
-			offset = val;
-			len = offset >> 16;
-			offset &= 0xffff;
-			if (field->flags & TEP_FIELD_IS_RELATIVE)
-				offset += field->offset + field->size;
-		}
-		if (field->flags & TEP_FIELD_IS_STRING &&
-		    is_printable_array(data + offset, len)) {
-			trace_seq_printf(s, "%s", (char *)data + offset);
-		} else {
-			trace_seq_puts(s, "ARRAY[");
-			for (i = 0; i < len; i++) {
-				if (i)
-					trace_seq_puts(s, ", ");
-				trace_seq_printf(s, "%02x",
-						 *((unsigned char *)data + offset + i));
-			}
-			trace_seq_putc(s, ']');
-			field->flags &= ~TEP_FIELD_IS_STRING;
-		}
-	} else {
-		val = tep_read_number(tep, data + field->offset,
-				      field->size);
-		if (field->flags & TEP_FIELD_IS_POINTER) {
-			trace_seq_printf(s, "0x%llx", val);
-		} else if (field->flags & TEP_FIELD_IS_SIGNED) {
-			switch (field->size) {
-			case 4:
-				/*
-				 * If field is long then print it in hex.
-				 * A long usually stores pointers.
-				 */
-				if (field->flags & TEP_FIELD_IS_LONG)
-					trace_seq_printf(s, "0x%x", (int)val);
-				else
-					trace_seq_printf(s, "%d", (int)val);
-				break;
-			case 2:
-				trace_seq_printf(s, "%2d", (short)val);
-				break;
-			case 1:
-				trace_seq_printf(s, "%1d", (char)val);
-				break;
-			default:
-				trace_seq_printf(s, "%lld", val);
-			}
-		} else {
-			if (field->flags & TEP_FIELD_IS_LONG)
-				trace_seq_printf(s, "0x%llx", val);
-			else
-				trace_seq_printf(s, "%llu", val);
-		}
-	}
-}
-
-void tep_print_fields(struct trace_seq *s, void *data,
-		      int size __maybe_unused, struct tep_event *event)
-{
-	struct tep_format_field *field;
-
-	field = event->format.fields;
-	while (field) {
-		trace_seq_printf(s, " %s=", field->name);
-		tep_print_field(s, data, field);
-		field = field->next;
-	}
-}
-
-static int print_function(struct trace_seq *s, const char *format,
-			  void *data, int size, struct tep_event *event,
-			  struct tep_print_arg *arg)
-{
-	struct func_map *func;
-	unsigned long long val;
-
-	val = eval_num_arg(data, size, event, arg);
-	func = find_func(event->tep, val);
-	if (func) {
-		trace_seq_puts(s, func->func);
-		if (*format == 'F' || *format == 'S')
-			trace_seq_printf(s, "+0x%llx", val - func->addr);
-	} else {
-		if (event->tep->long_size == 4)
-			trace_seq_printf(s, "0x%lx", (long)val);
-		else
-			trace_seq_printf(s, "0x%llx", (long long)val);
-	}
-
-	return 0;
-}
-
-static int print_arg_pointer(struct trace_seq *s, const char *format, int plen,
-			     void *data, int size,
-			     struct tep_event *event, struct tep_print_arg *arg)
-{
-	unsigned long long val;
-	int ret = 1;
-
-	if (arg->type == TEP_PRINT_BSTRING) {
-		trace_seq_puts(s, arg->string.string);
-		return 0;
-	}
-	while (*format) {
-		if (*format == 'p') {
-			format++;
-			break;
-		}
-		format++;
-	}
-
-	switch (*format) {
-	case 'F':
-	case 'f':
-	case 'S':
-	case 's':
-		ret += print_function(s, format, data, size, event, arg);
-		break;
-	case 'M':
-	case 'm':
-		ret += print_mac_arg(s, format, data, size, event, arg);
-		break;
-	case 'I':
-	case 'i':
-		ret += print_ip_arg(s, format, data, size, event, arg);
-		break;
-	case 'U':
-		ret += print_uuid_arg(s, format, data, size, event, arg);
-		break;
-	case 'h':
-		ret += print_raw_buff_arg(s, format, data, size, event, arg, plen);
-		break;
-	default:
-		ret = 0;
-		val = eval_num_arg(data, size, event, arg);
-		trace_seq_printf(s, "%p", (void *)(intptr_t)val);
-		break;
-	}
-
-	return ret;
-
-}
-
-static int print_arg_number(struct trace_seq *s, const char *format, int plen,
-			    void *data, int size, int ls,
-			    struct tep_event *event, struct tep_print_arg *arg)
-{
-	unsigned long long val;
-
-	val = eval_num_arg(data, size, event, arg);
-
-	switch (ls) {
-	case -2:
-		if (plen >= 0)
-			trace_seq_printf(s, format, plen, (char)val);
-		else
-			trace_seq_printf(s, format, (char)val);
-		break;
-	case -1:
-		if (plen >= 0)
-			trace_seq_printf(s, format, plen, (short)val);
-		else
-			trace_seq_printf(s, format, (short)val);
-		break;
-	case 0:
-		if (plen >= 0)
-			trace_seq_printf(s, format, plen, (int)val);
-		else
-			trace_seq_printf(s, format, (int)val);
-		break;
-	case 1:
-		if (plen >= 0)
-			trace_seq_printf(s, format, plen, (long)val);
-		else
-			trace_seq_printf(s, format, (long)val);
-		break;
-	case 2:
-		if (plen >= 0)
-			trace_seq_printf(s, format, plen, (long long)val);
-		else
-			trace_seq_printf(s, format, (long long)val);
-		break;
-	default:
-		do_warning_event(event, "bad count (%d)", ls);
-		event->flags |= TEP_EVENT_FL_FAILED;
-	}
-	return 0;
-}
-
-
-static void print_arg_string(struct trace_seq *s, const char *format, int plen,
-			     void *data, int size,
-			     struct tep_event *event, struct tep_print_arg *arg)
-{
-	struct trace_seq p;
-
-	/* Use helper trace_seq */
-	trace_seq_init(&p);
-	print_str_arg(&p, data, size, event,
-		      format, plen, arg);
-	trace_seq_terminate(&p);
-	trace_seq_puts(s, p.buffer);
-	trace_seq_destroy(&p);
-}
-
-static int parse_arg_format_pointer(const char *format)
-{
-	int ret = 0;
-	int index;
-	int loop;
-
-	switch (*format) {
-	case 'F':
-	case 'S':
-	case 'f':
-	case 's':
-		ret++;
-		break;
-	case 'M':
-	case 'm':
-		/* [mM]R , [mM]F */
-		switch (format[1]) {
-		case 'R':
-		case 'F':
-			ret++;
-			break;
-		}
-		ret++;
-		break;
-	case 'I':
-	case 'i':
-		index = 2;
-		loop = 1;
-		switch (format[1]) {
-		case 'S':
-			/*[S][pfs]*/
-			while (loop) {
-				switch (format[index]) {
-				case 'p':
-				case 'f':
-				case 's':
-					ret++;
-					index++;
-					break;
-				default:
-					loop = 0;
-					break;
-				}
-			}
-			/* fall through */
-		case '4':
-			/* [4S][hnbl] */
-			switch (format[index]) {
-			case 'h':
-			case 'n':
-			case 'l':
-			case 'b':
-				ret++;
-				index++;
-				break;
-			}
-			if (format[1] == '4') {
-				ret++;
-				break;
-			}
-			/* fall through */
-		case '6':
-			/* [6S]c */
-			if (format[index] == 'c')
-				ret++;
-			ret++;
-			break;
-		}
-		ret++;
-		break;
-	case 'U':
-		switch (format[1]) {
-		case 'L':
-		case 'l':
-		case 'B':
-		case 'b':
-			ret++;
-			break;
-		}
-		ret++;
-		break;
-	case 'h':
-		switch (format[1]) {
-		case 'C':
-		case 'D':
-		case 'N':
-			ret++;
-			break;
-		}
-		ret++;
-		break;
-	default:
-		break;
-	}
-
-	return ret;
-}
-
-static void free_parse_args(struct tep_print_parse *arg)
-{
-	struct tep_print_parse *del;
-
-	while (arg) {
-		del = arg;
-		arg = del->next;
-		free(del->format);
-		free(del);
-	}
-}
-
-static int parse_arg_add(struct tep_print_parse **parse, char *format,
-			 enum tep_print_parse_type type,
-			 struct tep_print_arg *arg,
-			 struct tep_print_arg *len_as_arg,
-			 int ls)
-{
-	struct tep_print_parse *parg = NULL;
-
-	parg = calloc(1, sizeof(*parg));
-	if (!parg)
-		goto error;
-	parg->format = strdup(format);
-	if (!parg->format)
-		goto error;
-	parg->type = type;
-	parg->arg = arg;
-	parg->len_as_arg = len_as_arg;
-	parg->ls = ls;
-	*parse = parg;
-	return 0;
-error:
-	if (parg) {
-		free(parg->format);
-		free(parg);
-	}
-	return -1;
-}
-
-static int parse_arg_format(struct tep_print_parse **parse,
-			    struct tep_event *event,
-			    const char *format, struct tep_print_arg **arg)
-{
-	struct tep_print_arg *len_arg = NULL;
-	char print_format[32];
-	const char *start = format;
-	int ret = 0;
-	int ls = 0;
-	int res;
-	int len;
-
-	format++;
-	ret++;
-	for (; *format; format++) {
-		switch (*format) {
-		case '#':
-			/* FIXME: need to handle properly */
-			break;
-		case 'h':
-			ls--;
-			break;
-		case 'l':
-			ls++;
-			break;
-		case 'L':
-			ls = 2;
-			break;
-		case '.':
-		case 'z':
-		case 'Z':
-		case '0' ... '9':
-		case '-':
-			break;
-		case '*':
-			/* The argument is the length. */
-			if (!*arg) {
-				do_warning_event(event, "no argument match");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				goto out_failed;
-			}
-			if (len_arg) {
-				do_warning_event(event, "argument already matched");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				goto out_failed;
-			}
-			len_arg = *arg;
-			*arg = (*arg)->next;
-			break;
-		case 'p':
-			if (!*arg) {
-				do_warning_event(event, "no argument match");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				goto out_failed;
-			}
-			res = parse_arg_format_pointer(format + 1);
-			if (res > 0) {
-				format += res;
-				ret += res;
-			}
-			len = ((unsigned long)format + 1) -
-				(unsigned long)start;
-			/* should never happen */
-			if (len > 31) {
-				do_warning_event(event, "bad format!");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				len = 31;
-			}
-			memcpy(print_format, start, len);
-			print_format[len] = 0;
-
-			parse_arg_add(parse, print_format,
-				      PRINT_FMT_ARG_POINTER, *arg, len_arg, ls);
-			*arg = (*arg)->next;
-			ret++;
-			return ret;
-		case 'd':
-		case 'u':
-		case 'i':
-		case 'x':
-		case 'X':
-		case 'o':
-			if (!*arg) {
-				do_warning_event(event, "no argument match");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				goto out_failed;
-			}
-
-			len = ((unsigned long)format + 1) -
-				(unsigned long)start;
-
-			/* should never happen */
-			if (len > 30) {
-				do_warning_event(event, "bad format!");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				len = 31;
-			}
-			memcpy(print_format, start, len);
-			print_format[len] = 0;
-
-			if (event->tep->long_size == 8 && ls == 1 &&
-			    sizeof(long) != 8) {
-				char *p;
-
-				/* make %l into %ll */
-				if (ls == 1 && (p = strchr(print_format, 'l')))
-					memmove(p+1, p, strlen(p)+1);
-				ls = 2;
-			}
-			if (ls < -2 || ls > 2) {
-				do_warning_event(event, "bad count (%d)", ls);
-				event->flags |= TEP_EVENT_FL_FAILED;
-			}
-			parse_arg_add(parse, print_format,
-				      PRINT_FMT_ARG_DIGIT, *arg, len_arg, ls);
-			*arg = (*arg)->next;
-			ret++;
-			return ret;
-		case 's':
-			if (!*arg) {
-				do_warning_event(event, "no matching argument");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				goto out_failed;
-			}
-
-			len = ((unsigned long)format + 1) -
-				(unsigned long)start;
-
-			/* should never happen */
-			if (len > 31) {
-				do_warning_event(event, "bad format!");
-				event->flags |= TEP_EVENT_FL_FAILED;
-				len = 31;
-			}
-
-			memcpy(print_format, start, len);
-			print_format[len] = 0;
-
-			parse_arg_add(parse, print_format,
-					PRINT_FMT_ARG_STRING, *arg, len_arg, 0);
-			*arg = (*arg)->next;
-			ret++;
-			return ret;
-		default:
-			snprintf(print_format, 32, ">%c<", *format);
-			parse_arg_add(parse, print_format,
-					PRINT_FMT_STRING, NULL, NULL, 0);
-			ret++;
-			return ret;
-		}
-		ret++;
-	}
-
-out_failed:
-	return ret;
-
-}
-
-static int parse_arg_string(struct tep_print_parse **parse, const char *format)
-{
-	struct trace_seq s;
-	int ret = 0;
-
-	trace_seq_init(&s);
-	for (; *format; format++) {
-		if (*format == '\\') {
-			format++;
-			ret++;
-			switch (*format) {
-			case 'n':
-				trace_seq_putc(&s, '\n');
-				break;
-			case 't':
-				trace_seq_putc(&s, '\t');
-				break;
-			case 'r':
-				trace_seq_putc(&s, '\r');
-				break;
-			case '\\':
-				trace_seq_putc(&s, '\\');
-				break;
-			default:
-				trace_seq_putc(&s, *format);
-				break;
-			}
-		} else if (*format == '%') {
-			if (*(format + 1) == '%') {
-				trace_seq_putc(&s, '%');
-				format++;
-				ret++;
-			} else
-				break;
-		} else
-			trace_seq_putc(&s, *format);
-
-		ret++;
-	}
-	trace_seq_terminate(&s);
-	parse_arg_add(parse, s.buffer, PRINT_FMT_STRING, NULL, NULL, 0);
-	trace_seq_destroy(&s);
-
-	return ret;
-}
-
-static struct tep_print_parse *
-parse_args(struct tep_event *event, const char *format, struct tep_print_arg *arg)
-{
-	struct tep_print_parse *parse_ret = NULL;
-	struct tep_print_parse **parse = NULL;
-	int ret;
-	int len;
-
-	len = strlen(format);
-	while (*format) {
-		if (!parse_ret)
-			parse = &parse_ret;
-		if (*format == '%' && *(format + 1) != '%')
-			ret = parse_arg_format(parse, event, format, &arg);
-		else
-			ret = parse_arg_string(parse, format);
-		if (*parse)
-			parse = &((*parse)->next);
-
-		len -= ret;
-		if (len > 0)
-			format += ret;
-		else
-			break;
-	}
-	return parse_ret;
-}
-
-static void print_event_cache(struct tep_print_parse *parse, struct trace_seq *s,
-			      void *data, int size, struct tep_event *event)
-{
-	int len_arg;
-
-	while (parse) {
-		if (parse->len_as_arg)
-			len_arg = eval_num_arg(data, size, event, parse->len_as_arg);
-		switch (parse->type) {
-		case PRINT_FMT_ARG_DIGIT:
-			print_arg_number(s, parse->format,
-					parse->len_as_arg ? len_arg : -1, data,
-					 size, parse->ls, event, parse->arg);
-			break;
-		case PRINT_FMT_ARG_POINTER:
-			print_arg_pointer(s, parse->format,
-					  parse->len_as_arg ? len_arg : 1,
-					  data, size, event, parse->arg);
-			break;
-		case PRINT_FMT_ARG_STRING:
-			print_arg_string(s, parse->format,
-					 parse->len_as_arg ? len_arg : -1,
-					 data, size, event, parse->arg);
-			break;
-		case PRINT_FMT_STRING:
-		default:
-			trace_seq_printf(s, "%s", parse->format);
-			break;
-		}
-		parse = parse->next;
-	}
-}
-
-static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event)
-{
-	struct tep_print_parse *parse = event->print_fmt.print_cache;
-	struct tep_print_arg *args = NULL;
-	char *bprint_fmt = NULL;
-
-	if (event->flags & TEP_EVENT_FL_FAILED) {
-		trace_seq_printf(s, "[FAILED TO PARSE]");
-		tep_print_fields(s, data, size, event);
-		return;
-	}
-
-	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
-		bprint_fmt = get_bprint_format(data, size, event);
-		args = make_bprint_args(bprint_fmt, data, size, event);
-		parse = parse_args(event, bprint_fmt, args);
-	}
-
-	print_event_cache(parse, s, data, size, event);
-
-	if (event->flags & TEP_EVENT_FL_ISBPRINT) {
-		free_parse_args(parse);
-		free_args(args);
-		free(bprint_fmt);
-	}
-}
-
-/*
- * This parses out the Latency format (interrupts disabled,
- * need rescheduling, in hard/soft interrupt, preempt count
- * and lock depth) and places it into the trace_seq.
- */
-static void data_latency_format(struct tep_handle *tep, struct trace_seq *s,
-				char *format, struct tep_record *record)
-{
-	static int check_lock_depth = 1;
-	static int check_migrate_disable = 1;
-	static int lock_depth_exists;
-	static int migrate_disable_exists;
-	unsigned int lat_flags;
-	struct trace_seq sq;
-	unsigned int pc;
-	int lock_depth = 0;
-	int migrate_disable = 0;
-	int hardirq;
-	int softirq;
-	void *data = record->data;
-
-	trace_seq_init(&sq);
-	lat_flags = parse_common_flags(tep, data);
-	pc = parse_common_pc(tep, data);
-	/* lock_depth may not always exist */
-	if (lock_depth_exists)
-		lock_depth = parse_common_lock_depth(tep, data);
-	else if (check_lock_depth) {
-		lock_depth = parse_common_lock_depth(tep, data);
-		if (lock_depth < 0)
-			check_lock_depth = 0;
-		else
-			lock_depth_exists = 1;
-	}
-
-	/* migrate_disable may not always exist */
-	if (migrate_disable_exists)
-		migrate_disable = parse_common_migrate_disable(tep, data);
-	else if (check_migrate_disable) {
-		migrate_disable = parse_common_migrate_disable(tep, data);
-		if (migrate_disable < 0)
-			check_migrate_disable = 0;
-		else
-			migrate_disable_exists = 1;
-	}
-
-	hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
-	softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
-
-	trace_seq_printf(&sq, "%c%c%c",
-	       (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
-	       (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
-	       'X' : '.',
-	       (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
-	       'N' : '.',
-	       (hardirq && softirq) ? 'H' :
-	       hardirq ? 'h' : softirq ? 's' : '.');
-
-	if (pc)
-		trace_seq_printf(&sq, "%x", pc);
-	else
-		trace_seq_printf(&sq, ".");
-
-	if (migrate_disable_exists) {
-		if (migrate_disable < 0)
-			trace_seq_printf(&sq, ".");
-		else
-			trace_seq_printf(&sq, "%d", migrate_disable);
-	}
-
-	if (lock_depth_exists) {
-		if (lock_depth < 0)
-			trace_seq_printf(&sq, ".");
-		else
-			trace_seq_printf(&sq, "%d", lock_depth);
-	}
-
-	if (sq.state == TRACE_SEQ__MEM_ALLOC_FAILED) {
-		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
-		return;
-	}
-
-	trace_seq_terminate(&sq);
-	trace_seq_puts(s, sq.buffer);
-	trace_seq_destroy(&sq);
-	trace_seq_terminate(s);
-}
-
-/**
- * tep_data_type - parse out the given event type
- * @tep: a handle to the trace event parser context
- * @rec: the record to read from
- *
- * This returns the event id from the @rec.
- */
-int tep_data_type(struct tep_handle *tep, struct tep_record *rec)
-{
-	return trace_parse_common_type(tep, rec->data);
-}
-
-/**
- * tep_data_pid - parse the PID from record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the PID from a record.
- */
-int tep_data_pid(struct tep_handle *tep, struct tep_record *rec)
-{
-	return parse_common_pid(tep, rec->data);
-}
-
-/**
- * tep_data_preempt_count - parse the preempt count from the record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the preempt count from a record.
- */
-int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec)
-{
-	return parse_common_pc(tep, rec->data);
-}
-
-/**
- * tep_data_flags - parse the latency flags from the record
- * @tep: a handle to the trace event parser context
- * @rec: the record to parse
- *
- * This returns the latency flags from a record.
- *
- *  Use trace_flag_type enum for the flags (see event-parse.h).
- */
-int tep_data_flags(struct tep_handle *tep, struct tep_record *rec)
-{
-	return parse_common_flags(tep, rec->data);
-}
-
-/**
- * tep_data_comm_from_pid - return the command line from PID
- * @tep: a handle to the trace event parser context
- * @pid: the PID of the task to search for
- *
- * This returns a pointer to the command line that has the given
- * @pid.
- */
-const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid)
-{
-	const char *comm;
-
-	comm = find_cmdline(tep, pid);
-	return comm;
-}
-
-static struct tep_cmdline *
-pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next)
-{
-	struct cmdline_list *cmdlist = (struct cmdline_list *)next;
-
-	if (cmdlist)
-		cmdlist = cmdlist->next;
-	else
-		cmdlist = tep->cmdlist;
-
-	while (cmdlist && strcmp(cmdlist->comm, comm) != 0)
-		cmdlist = cmdlist->next;
-
-	return (struct tep_cmdline *)cmdlist;
-}
-
-/**
- * tep_data_pid_from_comm - return the pid from a given comm
- * @tep: a handle to the trace event parser context
- * @comm: the cmdline to find the pid from
- * @next: the cmdline structure to find the next comm
- *
- * This returns the cmdline structure that holds a pid for a given
- * comm, or NULL if none found. As there may be more than one pid for
- * a given comm, the result of this call can be passed back into
- * a recurring call in the @next parameter, and then it will find the
- * next pid.
- * Also, it does a linear search, so it may be slow.
- */
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
-					   struct tep_cmdline *next)
-{
-	struct tep_cmdline *cmdline;
-
-	/*
-	 * If the cmdlines have not been converted yet, then use
-	 * the list.
-	 */
-	if (!tep->cmdlines)
-		return pid_from_cmdlist(tep, comm, next);
-
-	if (next) {
-		/*
-		 * The next pointer could have been still from
-		 * a previous call before cmdlines were created
-		 */
-		if (next < tep->cmdlines ||
-		    next >= tep->cmdlines + tep->cmdline_count)
-			next = NULL;
-		else
-			cmdline  = next++;
-	}
-
-	if (!next)
-		cmdline = tep->cmdlines;
-
-	while (cmdline < tep->cmdlines + tep->cmdline_count) {
-		if (strcmp(cmdline->comm, comm) == 0)
-			return cmdline;
-		cmdline++;
-	}
-	return NULL;
-}
-
-/**
- * tep_cmdline_pid - return the pid associated to a given cmdline
- * @tep: a handle to the trace event parser context
- * @cmdline: The cmdline structure to get the pid from
- *
- * Returns the pid for a give cmdline. If @cmdline is NULL, then
- * -1 is returned.
- */
-int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline)
-{
-	struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline;
-
-	if (!cmdline)
-		return -1;
-
-	/*
-	 * If cmdlines have not been created yet, or cmdline is
-	 * not part of the array, then treat it as a cmdlist instead.
-	 */
-	if (!tep->cmdlines ||
-	    cmdline < tep->cmdlines ||
-	    cmdline >= tep->cmdlines + tep->cmdline_count)
-		return cmdlist->pid;
-
-	return cmdline->pid;
-}
-
-/*
- * This parses the raw @data using the given @event information and
- * writes the print format into the trace_seq.
- */
-static void print_event_info(struct trace_seq *s, char *format, bool raw,
-			     struct tep_event *event, struct tep_record *record)
-{
-	int print_pretty = 1;
-
-	if (raw || (event->flags & TEP_EVENT_FL_PRINTRAW))
-		tep_print_fields(s, record->data, record->size, event);
-	else {
-
-		if (event->handler && !(event->flags & TEP_EVENT_FL_NOHANDLE))
-			print_pretty = event->handler(s, record, event,
-						      event->context);
-
-		if (print_pretty)
-			pretty_print(s, record->data, record->size, event);
-	}
-
-	trace_seq_terminate(s);
-}
-
-/**
- * tep_find_event_by_record - return the event from a given record
- * @tep: a handle to the trace event parser context
- * @record: The record to get the event from
- *
- * Returns the associated event for a given record, or NULL if non is
- * is found.
- */
-struct tep_event *
-tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record)
-{
-	int type;
-
-	if (record->size < 0) {
-		do_warning("ug! negative record size %d", record->size);
-		return NULL;
-	}
-
-	type = trace_parse_common_type(tep, record->data);
-
-	return tep_find_event(tep, type);
-}
-
-/*
- * Writes the timestamp of the record into @s. Time divisor and precision can be
- * specified as part of printf @format string. Example:
- *	"%3.1000d" - divide the time by 1000 and print the first 3 digits
- *	before the dot. Thus, the timestamp "123456000" will be printed as
- *	"123.456"
- */
-static void print_event_time(struct tep_handle *tep, struct trace_seq *s,
-				 char *format, struct tep_event *event,
-				 struct tep_record *record)
-{
-	unsigned long long time;
-	char *divstr;
-	int prec = 0, pr;
-	int div = 0;
-	int p10 = 1;
-
-	if (isdigit(*(format + 1)))
-		prec = atoi(format + 1);
-	divstr = strchr(format, '.');
-	if (divstr && isdigit(*(divstr + 1)))
-		div = atoi(divstr + 1);
-	time = record->ts;
-	if (div) {
-		time += div / 2;
-		time /= div;
-	}
-	pr = prec;
-	while (pr--)
-		p10 *= 10;
-
-	if (p10 > 1 && p10 < time)
-		trace_seq_printf(s, "%5llu.%0*llu", time / p10, prec, time % p10);
-	else
-		trace_seq_printf(s, "%12llu", time);
-}
-
-struct print_event_type {
-	enum {
-		EVENT_TYPE_INT = 1,
-		EVENT_TYPE_STRING,
-		EVENT_TYPE_UNKNOWN,
-	} type;
-	char format[32];
-};
-
-static void print_string(struct tep_handle *tep, struct trace_seq *s,
-			 struct tep_record *record, struct tep_event *event,
-			 const char *arg, struct print_event_type *type)
-{
-	const char *comm;
-	int pid;
-
-	if (strncmp(arg, TEP_PRINT_LATENCY, strlen(TEP_PRINT_LATENCY)) == 0) {
-		data_latency_format(tep, s, type->format, record);
-	} else if (strncmp(arg, TEP_PRINT_COMM, strlen(TEP_PRINT_COMM)) == 0) {
-		pid = parse_common_pid(tep, record->data);
-		comm = find_cmdline(tep, pid);
-		trace_seq_printf(s, type->format, comm);
-	} else if (strncmp(arg, TEP_PRINT_INFO_RAW, strlen(TEP_PRINT_INFO_RAW)) == 0) {
-		print_event_info(s, type->format, true, event, record);
-	} else if (strncmp(arg, TEP_PRINT_INFO, strlen(TEP_PRINT_INFO)) == 0) {
-		print_event_info(s, type->format, false, event, record);
-	} else if  (strncmp(arg, TEP_PRINT_NAME, strlen(TEP_PRINT_NAME)) == 0) {
-		trace_seq_printf(s, type->format, event->name);
-	} else {
-		trace_seq_printf(s, "[UNKNOWN TEP TYPE %s]", arg);
-	}
-
-}
-
-static void print_int(struct tep_handle *tep, struct trace_seq *s,
-		      struct tep_record *record, struct tep_event *event,
-		      int arg, struct print_event_type *type)
-{
-	int param;
-
-	switch (arg) {
-	case TEP_PRINT_CPU:
-		param = record->cpu;
-		break;
-	case TEP_PRINT_PID:
-		param = parse_common_pid(tep, record->data);
-		break;
-	case TEP_PRINT_TIME:
-		return print_event_time(tep, s, type->format, event, record);
-	default:
-		return;
-	}
-	trace_seq_printf(s, type->format, param);
-}
-
-static int tep_print_event_param_type(char *format,
-				      struct print_event_type *type)
-{
-	char *str = format + 1;
-	int i = 1;
-
-	type->type = EVENT_TYPE_UNKNOWN;
-	while (*str) {
-		switch (*str) {
-		case 'd':
-		case 'u':
-		case 'i':
-		case 'x':
-		case 'X':
-		case 'o':
-			type->type = EVENT_TYPE_INT;
-			break;
-		case 's':
-			type->type = EVENT_TYPE_STRING;
-			break;
-		}
-		str++;
-		i++;
-		if (type->type != EVENT_TYPE_UNKNOWN)
-			break;
-	}
-	memset(type->format, 0, 32);
-	memcpy(type->format, format, i < 32 ? i : 31);
-	return i;
-}
-
-/**
- * tep_print_event - Write various event information
- * @tep: a handle to the trace event parser context
- * @s: the trace_seq to write to
- * @record: The record to get the event from
- * @format: a printf format string. Supported event fileds:
- *	TEP_PRINT_PID, "%d" - event PID
- *	TEP_PRINT_CPU, "%d" - event CPU
- *	TEP_PRINT_COMM, "%s" - event command string
- *	TEP_PRINT_NAME, "%s" - event name
- *	TEP_PRINT_LATENCY, "%s" - event latency
- *	TEP_PRINT_TIME, %d - event time stamp. A divisor and precision
- *			can be specified as part of this format string:
- *			"%precision.divisord". Example:
- *			"%3.1000d" - divide the time by 1000 and print the first
- *			3 digits before the dot. Thus, the time stamp
- *			"123456000" will be printed as "123.456"
- *	TEP_PRINT_INFO, "%s" - event information. If any width is specified in
- *			the format string, the event information will be printed
- *			in raw format.
- * Writes the specified event information into @s.
- */
-void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
-		     struct tep_record *record, const char *fmt, ...)
-{
-	struct print_event_type type;
-	char *format = strdup(fmt);
-	char *current = format;
-	char *str = format;
-	int offset;
-	va_list args;
-	struct tep_event *event;
-
-	if (!format)
-		return;
-
-	event = tep_find_event_by_record(tep, record);
-	va_start(args, fmt);
-	while (*current) {
-		current = strchr(str, '%');
-		if (!current) {
-			trace_seq_puts(s, str);
-			break;
-		}
-		memset(&type, 0, sizeof(type));
-		offset = tep_print_event_param_type(current, &type);
-		*current = '\0';
-		trace_seq_puts(s, str);
-		current += offset;
-		switch (type.type) {
-		case EVENT_TYPE_STRING:
-			print_string(tep, s, record, event,
-				     va_arg(args, char*), &type);
-			break;
-		case EVENT_TYPE_INT:
-			print_int(tep, s, record, event,
-				  va_arg(args, int), &type);
-			break;
-		case EVENT_TYPE_UNKNOWN:
-		default:
-			trace_seq_printf(s, "[UNKNOWN TYPE]");
-			break;
-		}
-		str = current;
-
-	}
-	va_end(args);
-	free(format);
-}
-
-static int events_id_cmp(const void *a, const void *b)
-{
-	struct tep_event * const * ea = a;
-	struct tep_event * const * eb = b;
-
-	if ((*ea)->id < (*eb)->id)
-		return -1;
-
-	if ((*ea)->id > (*eb)->id)
-		return 1;
-
-	return 0;
-}
-
-static int events_name_cmp(const void *a, const void *b)
-{
-	struct tep_event * const * ea = a;
-	struct tep_event * const * eb = b;
-	int res;
-
-	res = strcmp((*ea)->name, (*eb)->name);
-	if (res)
-		return res;
-
-	res = strcmp((*ea)->system, (*eb)->system);
-	if (res)
-		return res;
-
-	return events_id_cmp(a, b);
-}
-
-static int events_system_cmp(const void *a, const void *b)
-{
-	struct tep_event * const * ea = a;
-	struct tep_event * const * eb = b;
-	int res;
-
-	res = strcmp((*ea)->system, (*eb)->system);
-	if (res)
-		return res;
-
-	res = strcmp((*ea)->name, (*eb)->name);
-	if (res)
-		return res;
-
-	return events_id_cmp(a, b);
-}
-
-static struct tep_event **list_events_copy(struct tep_handle *tep)
-{
-	struct tep_event **events;
-
-	if (!tep)
-		return NULL;
-
-	events = malloc(sizeof(*events) * (tep->nr_events + 1));
-	if (!events)
-		return NULL;
-
-	memcpy(events, tep->events, sizeof(*events) * tep->nr_events);
-	events[tep->nr_events] = NULL;
-	return events;
-}
-
-static void list_events_sort(struct tep_event **events, int nr_events,
-			     enum tep_event_sort_type sort_type)
-{
-	int (*sort)(const void *a, const void *b);
-
-	switch (sort_type) {
-	case TEP_EVENT_SORT_ID:
-		sort = events_id_cmp;
-		break;
-	case TEP_EVENT_SORT_NAME:
-		sort = events_name_cmp;
-		break;
-	case TEP_EVENT_SORT_SYSTEM:
-		sort = events_system_cmp;
-		break;
-	default:
-		sort = NULL;
-	}
-
-	if (sort)
-		qsort(events, nr_events, sizeof(*events), sort);
-}
-
-/**
- * tep_list_events - Get events, sorted by given criteria.
- * @tep: a handle to the tep context
- * @sort_type: desired sort order of the events in the array
- *
- * Returns an array of pointers to all events, sorted by the given
- * @sort_type criteria. The last element of the array is NULL. The returned
- * memory must not be freed, it is managed by the library.
- * The function is not thread safe.
- */
-struct tep_event **tep_list_events(struct tep_handle *tep,
-				   enum tep_event_sort_type sort_type)
-{
-	struct tep_event **events;
-
-	if (!tep)
-		return NULL;
-
-	events = tep->sort_events;
-	if (events && tep->last_type == sort_type)
-		return events;
-
-	if (!events) {
-		events = list_events_copy(tep);
-		if (!events)
-			return NULL;
-
-		tep->sort_events = events;
-
-		/* the internal events are sorted by id */
-		if (sort_type == TEP_EVENT_SORT_ID) {
-			tep->last_type = sort_type;
-			return events;
-		}
-	}
-
-	list_events_sort(events, tep->nr_events, sort_type);
-	tep->last_type = sort_type;
-
-	return events;
-}
-
-
-/**
- * tep_list_events_copy - Thread safe version of tep_list_events()
- * @tep: a handle to the tep context
- * @sort_type: desired sort order of the events in the array
- *
- * Returns an array of pointers to all events, sorted by the given
- * @sort_type criteria. The last element of the array is NULL. The returned
- * array is newly allocated inside the function and must be freed by the caller
- */
-struct tep_event **tep_list_events_copy(struct tep_handle *tep,
-					enum tep_event_sort_type sort_type)
-{
-	struct tep_event **events;
-
-	if (!tep)
-		return NULL;
-
-	events = list_events_copy(tep);
-	if (!events)
-		return NULL;
-
-	/* the internal events are sorted by id */
-	if (sort_type == TEP_EVENT_SORT_ID)
-		return events;
-
-	list_events_sort(events, tep->nr_events, sort_type);
-
-	return events;
-}
-
-static struct tep_format_field **
-get_event_fields(const char *type, const char *name,
-		 int count, struct tep_format_field *list)
-{
-	struct tep_format_field **fields;
-	struct tep_format_field *field;
-	int i = 0;
-
-	fields = malloc(sizeof(*fields) * (count + 1));
-	if (!fields)
-		return NULL;
-
-	for (field = list; field; field = field->next) {
-		fields[i++] = field;
-		if (i == count + 1) {
-			do_warning("event %s has more %s fields than specified",
-				name, type);
-			i--;
-			break;
-		}
-	}
-
-	if (i != count)
-		do_warning("event %s has less %s fields than specified",
-			name, type);
-
-	fields[i] = NULL;
-
-	return fields;
-}
-
-/**
- * tep_event_common_fields - return a list of common fields for an event
- * @event: the event to return the common fields of.
- *
- * Returns an allocated array of fields. The last item in the array is NULL.
- * The array must be freed with free().
- */
-struct tep_format_field **tep_event_common_fields(struct tep_event *event)
-{
-	return get_event_fields("common", event->name,
-				event->format.nr_common,
-				event->format.common_fields);
-}
-
-/**
- * tep_event_fields - return a list of event specific fields for an event
- * @event: the event to return the fields of.
- *
- * Returns an allocated array of fields. The last item in the array is NULL.
- * The array must be freed with free().
- */
-struct tep_format_field **tep_event_fields(struct tep_event *event)
-{
-	return get_event_fields("event", event->name,
-				event->format.nr_fields,
-				event->format.fields);
-}
-
-static void print_fields(struct trace_seq *s, struct tep_print_flag_sym *field)
-{
-	trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
-	if (field->next) {
-		trace_seq_puts(s, ", ");
-		print_fields(s, field->next);
-	}
-}
-
-/* for debugging */
-static void print_args(struct tep_print_arg *args)
-{
-	int print_paren = 1;
-	struct trace_seq s;
-
-	switch (args->type) {
-	case TEP_PRINT_NULL:
-		printf("null");
-		break;
-	case TEP_PRINT_ATOM:
-		printf("%s", args->atom.atom);
-		break;
-	case TEP_PRINT_FIELD:
-		printf("REC->%s", args->field.name);
-		break;
-	case TEP_PRINT_FLAGS:
-		printf("__print_flags(");
-		print_args(args->flags.field);
-		printf(", %s, ", args->flags.delim);
-		trace_seq_init(&s);
-		print_fields(&s, args->flags.flags);
-		trace_seq_do_printf(&s);
-		trace_seq_destroy(&s);
-		printf(")");
-		break;
-	case TEP_PRINT_SYMBOL:
-		printf("__print_symbolic(");
-		print_args(args->symbol.field);
-		printf(", ");
-		trace_seq_init(&s);
-		print_fields(&s, args->symbol.symbols);
-		trace_seq_do_printf(&s);
-		trace_seq_destroy(&s);
-		printf(")");
-		break;
-	case TEP_PRINT_HEX:
-		printf("__print_hex(");
-		print_args(args->hex.field);
-		printf(", ");
-		print_args(args->hex.size);
-		printf(")");
-		break;
-	case TEP_PRINT_HEX_STR:
-		printf("__print_hex_str(");
-		print_args(args->hex.field);
-		printf(", ");
-		print_args(args->hex.size);
-		printf(")");
-		break;
-	case TEP_PRINT_INT_ARRAY:
-		printf("__print_array(");
-		print_args(args->int_array.field);
-		printf(", ");
-		print_args(args->int_array.count);
-		printf(", ");
-		print_args(args->int_array.el_size);
-		printf(")");
-		break;
-	case TEP_PRINT_STRING:
-	case TEP_PRINT_BSTRING:
-		printf("__get_str(%s)", args->string.string);
-		break;
-	case TEP_PRINT_BITMASK:
-		printf("__get_bitmask(%s)", args->bitmask.bitmask);
-		break;
-	case TEP_PRINT_TYPE:
-		printf("(%s)", args->typecast.type);
-		print_args(args->typecast.item);
-		break;
-	case TEP_PRINT_OP:
-		if (strcmp(args->op.op, ":") == 0)
-			print_paren = 0;
-		if (print_paren)
-			printf("(");
-		print_args(args->op.left);
-		printf(" %s ", args->op.op);
-		print_args(args->op.right);
-		if (print_paren)
-			printf(")");
-		break;
-	default:
-		/* we should warn... */
-		return;
-	}
-	if (args->next) {
-		printf("\n");
-		print_args(args->next);
-	}
-}
-
-static void parse_header_field(const char *field,
-			       int *offset, int *size, int mandatory)
-{
-	unsigned long long save_input_buf_ptr;
-	unsigned long long save_input_buf_siz;
-	char *token;
-	int type;
-
-	save_input_buf_ptr = input_buf_ptr;
-	save_input_buf_siz = input_buf_siz;
-
-	if (read_expected(TEP_EVENT_ITEM, "field") < 0)
-		return;
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return;
-
-	/* type */
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto fail;
-	free_token(token);
-
-	/*
-	 * If this is not a mandatory field, then test it first.
-	 */
-	if (mandatory) {
-		if (read_expected(TEP_EVENT_ITEM, field) < 0)
-			return;
-	} else {
-		if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-			goto fail;
-		if (strcmp(token, field) != 0)
-			goto discard;
-		free_token(token);
-	}
-
-	if (read_expected(TEP_EVENT_OP, ";") < 0)
-		return;
-	if (read_expected(TEP_EVENT_ITEM, "offset") < 0)
-		return;
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return;
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto fail;
-	*offset = atoi(token);
-	free_token(token);
-	if (read_expected(TEP_EVENT_OP, ";") < 0)
-		return;
-	if (read_expected(TEP_EVENT_ITEM, "size") < 0)
-		return;
-	if (read_expected(TEP_EVENT_OP, ":") < 0)
-		return;
-	if (read_expect_type(TEP_EVENT_ITEM, &token) < 0)
-		goto fail;
-	*size = atoi(token);
-	free_token(token);
-	if (read_expected(TEP_EVENT_OP, ";") < 0)
-		return;
-	type = read_token(&token);
-	if (type != TEP_EVENT_NEWLINE) {
-		/* newer versions of the kernel have a "signed" type */
-		if (type != TEP_EVENT_ITEM)
-			goto fail;
-
-		if (strcmp(token, "signed") != 0)
-			goto fail;
-
-		free_token(token);
-
-		if (read_expected(TEP_EVENT_OP, ":") < 0)
-			return;
-
-		if (read_expect_type(TEP_EVENT_ITEM, &token))
-			goto fail;
-
-		free_token(token);
-		if (read_expected(TEP_EVENT_OP, ";") < 0)
-			return;
-
-		if (read_expect_type(TEP_EVENT_NEWLINE, &token))
-			goto fail;
-	}
- fail:
-	free_token(token);
-	return;
-
- discard:
-	input_buf_ptr = save_input_buf_ptr;
-	input_buf_siz = save_input_buf_siz;
-	*offset = 0;
-	*size = 0;
-	free_token(token);
-}
-
-/**
- * tep_parse_header_page - parse the data stored in the header page
- * @tep: a handle to the trace event parser context
- * @buf: the buffer storing the header page format string
- * @size: the size of @buf
- * @long_size: the long size to use if there is no header
- *
- * This parses the header page format for information on the
- * ring buffer used. The @buf should be copied from
- *
- * /sys/kernel/debug/tracing/events/header_page
- */
-int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
-			  int long_size)
-{
-	int ignore;
-
-	if (!size) {
-		/*
-		 * Old kernels did not have header page info.
-		 * Sorry but we just use what we find here in user space.
-		 */
-		tep->header_page_ts_size = sizeof(long long);
-		tep->header_page_size_size = long_size;
-		tep->header_page_data_offset = sizeof(long long) + long_size;
-		tep->old_format = 1;
-		return -1;
-	}
-	init_input_buf(buf, size);
-
-	parse_header_field("timestamp", &tep->header_page_ts_offset,
-			   &tep->header_page_ts_size, 1);
-	parse_header_field("commit", &tep->header_page_size_offset,
-			   &tep->header_page_size_size, 1);
-	parse_header_field("overwrite", &tep->header_page_overwrite,
-			   &ignore, 0);
-	parse_header_field("data", &tep->header_page_data_offset,
-			   &tep->header_page_data_size, 1);
-
-	return 0;
-}
-
-static int event_matches(struct tep_event *event,
-			 int id, const char *sys_name,
-			 const char *event_name)
-{
-	if (id >= 0 && id != event->id)
-		return 0;
-
-	if (event_name && (strcmp(event_name, event->name) != 0))
-		return 0;
-
-	if (sys_name && (strcmp(sys_name, event->system) != 0))
-		return 0;
-
-	return 1;
-}
-
-static void free_handler(struct event_handler *handle)
-{
-	free((void *)handle->sys_name);
-	free((void *)handle->event_name);
-	free(handle);
-}
-
-static int find_event_handle(struct tep_handle *tep, struct tep_event *event)
-{
-	struct event_handler *handle, **next;
-
-	for (next = &tep->handlers; *next;
-	     next = &(*next)->next) {
-		handle = *next;
-		if (event_matches(event, handle->id,
-				  handle->sys_name,
-				  handle->event_name))
-			break;
-	}
-
-	if (!(*next))
-		return 0;
-
-	pr_stat("overriding event (%d) %s:%s with new print handler",
-		event->id, event->system, event->name);
-
-	event->handler = handle->func;
-	event->context = handle->context;
-
-	*next = handle->next;
-	free_handler(handle);
-
-	return 1;
-}
-
-/**
- * parse_format - parse the event format
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-static enum tep_errno parse_format(struct tep_event **eventp,
-				   struct tep_handle *tep, const char *buf,
-				   unsigned long size, const char *sys)
-{
-	struct tep_event *event;
-	int ret;
-
-	init_input_buf(buf, size);
-
-	*eventp = event = alloc_event();
-	if (!event)
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-
-	event->name = event_read_name();
-	if (!event->name) {
-		/* Bad event? */
-		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-		goto event_alloc_failed;
-	}
-
-	if (strcmp(sys, "ftrace") == 0) {
-		event->flags |= TEP_EVENT_FL_ISFTRACE;
-
-		if (strcmp(event->name, "bprint") == 0)
-			event->flags |= TEP_EVENT_FL_ISBPRINT;
-	}
-		
-	event->id = event_read_id();
-	if (event->id < 0) {
-		ret = TEP_ERRNO__READ_ID_FAILED;
-		/*
-		 * This isn't an allocation error actually.
-		 * But as the ID is critical, just bail out.
-		 */
-		goto event_alloc_failed;
-	}
-
-	event->system = strdup(sys);
-	if (!event->system) {
-		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-		goto event_alloc_failed;
-	}
-
-	/* Add tep to event so that it can be referenced */
-	event->tep = tep;
-
-	ret = event_read_format(event);
-	if (ret < 0) {
-		ret = TEP_ERRNO__READ_FORMAT_FAILED;
-		goto event_parse_failed;
-	}
-
-	/*
-	 * If the event has an override, don't print warnings if the event
-	 * print format fails to parse.
-	 */
-	if (tep && find_event_handle(tep, event))
-		show_warning = 0;
-
-	ret = event_read_print(event);
-	show_warning = 1;
-
-	if (ret < 0) {
-		ret = TEP_ERRNO__READ_PRINT_FAILED;
-		goto event_parse_failed;
-	}
-
-	if (!ret && (event->flags & TEP_EVENT_FL_ISFTRACE)) {
-		struct tep_format_field *field;
-		struct tep_print_arg *arg, **list;
-
-		/* old ftrace had no args */
-		list = &event->print_fmt.args;
-		for (field = event->format.fields; field; field = field->next) {
-			arg = alloc_arg();
-			if (!arg) {
-				event->flags |= TEP_EVENT_FL_FAILED;
-				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
-			}
-			arg->type = TEP_PRINT_FIELD;
-			arg->field.name = strdup(field->name);
-			if (!arg->field.name) {
-				event->flags |= TEP_EVENT_FL_FAILED;
-				free_arg(arg);
-				return TEP_ERRNO__OLD_FTRACE_ARG_FAILED;
-			}
-			arg->field.field = field;
-			*list = arg;
-			list = &arg->next;
-		}
-	}
-
-	if (!(event->flags & TEP_EVENT_FL_ISBPRINT))
-		event->print_fmt.print_cache = parse_args(event,
-							  event->print_fmt.format,
-							  event->print_fmt.args);
-
-	return 0;
-
- event_parse_failed:
-	event->flags |= TEP_EVENT_FL_FAILED;
-	return ret;
-
- event_alloc_failed:
-	free(event->system);
-	free(event->name);
-	free(event);
-	*eventp = NULL;
-	return ret;
-}
-
-static enum tep_errno
-__parse_event(struct tep_handle *tep,
-	      struct tep_event **eventp,
-	      const char *buf, unsigned long size,
-	      const char *sys)
-{
-	int ret = parse_format(eventp, tep, buf, size, sys);
-	struct tep_event *event = *eventp;
-
-	if (event == NULL)
-		return ret;
-
-	if (tep && add_event(tep, event)) {
-		ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-		goto event_add_failed;
-	}
-
-#define PRINT_ARGS 0
-	if (PRINT_ARGS && event->print_fmt.args)
-		print_args(event->print_fmt.args);
-
-	return 0;
-
-event_add_failed:
-	free_tep_event(event);
-	return ret;
-}
-
-/**
- * tep_parse_format - parse the event format
- * @tep: a handle to the trace event parser context
- * @eventp: returned format
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-enum tep_errno tep_parse_format(struct tep_handle *tep,
-				struct tep_event **eventp,
-				const char *buf,
-				unsigned long size, const char *sys)
-{
-	return __parse_event(tep, eventp, buf, size, sys);
-}
-
-/**
- * tep_parse_event - parse the event format
- * @tep: a handle to the trace event parser context
- * @buf: the buffer storing the event format string
- * @size: the size of @buf
- * @sys: the system the event belongs to
- *
- * This parses the event format and creates an event structure
- * to quickly parse raw data for a given event.
- *
- * These files currently come from:
- *
- * /sys/kernel/debug/tracing/events/.../.../format
- */
-enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
-			       unsigned long size, const char *sys)
-{
-	struct tep_event *event = NULL;
-	return __parse_event(tep, &event, buf, size, sys);
-}
-
-int get_field_val(struct trace_seq *s, struct tep_format_field *field,
-		  const char *name, struct tep_record *record,
-		  unsigned long long *val, int err)
-{
-	if (!field) {
-		if (err)
-			trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
-		return -1;
-	}
-
-	if (tep_read_number_field(field, record->data, val)) {
-		if (err)
-			trace_seq_printf(s, " %s=INVALID", name);
-		return -1;
-	}
-
-	return 0;
-}
-
-/**
- * tep_get_field_raw - return the raw pointer into the data field
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @len: place to store the field length.
- * @err: print default error if failed.
- *
- * Returns a pointer into record->data of the field and places
- * the length of the field in @len.
- *
- * On failure, it returns NULL.
- */
-void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
-			const char *name, struct tep_record *record,
-			int *len, int err)
-{
-	struct tep_format_field *field;
-	void *data = record->data;
-	unsigned offset;
-	int dummy;
-
-	if (!event)
-		return NULL;
-
-	field = tep_find_field(event, name);
-
-	if (!field) {
-		if (err)
-			trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
-		return NULL;
-	}
-
-	/* Allow @len to be NULL */
-	if (!len)
-		len = &dummy;
-
-	offset = field->offset;
-	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
-		offset = tep_read_number(event->tep,
-					 data + offset, field->size);
-		*len = offset >> 16;
-		offset &= 0xffff;
-		if (field->flags & TEP_FIELD_IS_RELATIVE)
-			offset += field->offset + field->size;
-	} else
-		*len = field->size;
-
-	return data + offset;
-}
-
-/**
- * tep_get_field_val - find a field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
-		      const char *name, struct tep_record *record,
-		      unsigned long long *val, int err)
-{
-	struct tep_format_field *field;
-
-	if (!event)
-		return -1;
-
-	field = tep_find_field(event, name);
-
-	return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_get_common_field_val - find a common field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
-			     const char *name, struct tep_record *record,
-			     unsigned long long *val, int err)
-{
-	struct tep_format_field *field;
-
-	if (!event)
-		return -1;
-
-	field = tep_find_common_field(event, name);
-
-	return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_get_any_field_val - find a any field and return its value
- * @s: The seq to print to on error
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @val: place to store the value of the field.
- * @err: print default error if failed.
- *
- * Returns 0 on success -1 on field not found.
- */
-int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
-			  const char *name, struct tep_record *record,
-			  unsigned long long *val, int err)
-{
-	struct tep_format_field *field;
-
-	if (!event)
-		return -1;
-
-	field = tep_find_any_field(event, name);
-
-	return get_field_val(s, field, name, record, val, err);
-}
-
-/**
- * tep_print_num_field - print a field and a format
- * @s: The seq to print to
- * @fmt: The printf format to print the field with.
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @err: print default error if failed.
- *
- * Returns positive value on success, negative in case of an error,
- * or 0 if buffer is full.
- */
-int tep_print_num_field(struct trace_seq *s, const char *fmt,
-			struct tep_event *event, const char *name,
-			struct tep_record *record, int err)
-{
-	struct tep_format_field *field = tep_find_field(event, name);
-	unsigned long long val;
-
-	if (!field)
-		goto failed;
-
-	if (tep_read_number_field(field, record->data, &val))
-		goto failed;
-
-	return trace_seq_printf(s, fmt, val);
-
- failed:
-	if (err)
-		trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
-	return -1;
-}
-
-/**
- * tep_print_func_field - print a field and a format for function pointers
- * @s: The seq to print to
- * @fmt: The printf format to print the field with.
- * @event: the event that the field is for
- * @name: The name of the field
- * @record: The record with the field name.
- * @err: print default error if failed.
- *
- * Returns positive value on success, negative in case of an error,
- * or 0 if buffer is full.
- */
-int tep_print_func_field(struct trace_seq *s, const char *fmt,
-			 struct tep_event *event, const char *name,
-			 struct tep_record *record, int err)
-{
-	struct tep_format_field *field = tep_find_field(event, name);
-	struct tep_handle *tep = event->tep;
-	unsigned long long val;
-	struct func_map *func;
-	char tmp[128];
-
-	if (!field)
-		goto failed;
-
-	if (tep_read_number_field(field, record->data, &val))
-		goto failed;
-
-	func = find_func(tep, val);
-
-	if (func)
-		snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
-	else
-		sprintf(tmp, "0x%08llx", val);
-
-	return trace_seq_printf(s, fmt, tmp);
-
- failed:
-	if (err)
-		trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
-	return -1;
-}
-
-static void free_func_handle(struct tep_function_handler *func)
-{
-	struct func_params *params;
-
-	free(func->name);
-
-	while (func->params) {
-		params = func->params;
-		func->params = params->next;
-		free(params);
-	}
-
-	free(func);
-}
-
-/**
- * tep_register_print_function - register a helper function
- * @tep: a handle to the trace event parser context
- * @func: the function to process the helper function
- * @ret_type: the return type of the helper function
- * @name: the name of the helper function
- * @parameters: A list of enum tep_func_arg_type
- *
- * Some events may have helper functions in the print format arguments.
- * This allows a plugin to dynamically create a way to process one
- * of these functions.
- *
- * The @parameters is a variable list of tep_func_arg_type enums that
- * must end with TEP_FUNC_ARG_VOID.
- */
-int tep_register_print_function(struct tep_handle *tep,
-				tep_func_handler func,
-				enum tep_func_arg_type ret_type,
-				char *name, ...)
-{
-	struct tep_function_handler *func_handle;
-	struct func_params **next_param;
-	struct func_params *param;
-	enum tep_func_arg_type type;
-	va_list ap;
-	int ret;
-
-	func_handle = find_func_handler(tep, name);
-	if (func_handle) {
-		/*
-		 * This is most like caused by the users own
-		 * plugins updating the function. This overrides the
-		 * system defaults.
-		 */
-		pr_stat("override of function helper '%s'", name);
-		remove_func_handler(tep, name);
-	}
-
-	func_handle = calloc(1, sizeof(*func_handle));
-	if (!func_handle) {
-		do_warning("Failed to allocate function handler");
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	func_handle->ret_type = ret_type;
-	func_handle->name = strdup(name);
-	func_handle->func = func;
-	if (!func_handle->name) {
-		do_warning("Failed to allocate function name");
-		free(func_handle);
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	next_param = &(func_handle->params);
-	va_start(ap, name);
-	for (;;) {
-		type = va_arg(ap, enum tep_func_arg_type);
-		if (type == TEP_FUNC_ARG_VOID)
-			break;
-
-		if (type >= TEP_FUNC_ARG_MAX_TYPES) {
-			do_warning("Invalid argument type %d", type);
-			ret = TEP_ERRNO__INVALID_ARG_TYPE;
-			goto out_free;
-		}
-
-		param = malloc(sizeof(*param));
-		if (!param) {
-			do_warning("Failed to allocate function param");
-			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-			goto out_free;
-		}
-		param->type = type;
-		param->next = NULL;
-
-		*next_param = param;
-		next_param = &(param->next);
-
-		func_handle->nr_args++;
-	}
-	va_end(ap);
-
-	func_handle->next = tep->func_handlers;
-	tep->func_handlers = func_handle;
-
-	return 0;
- out_free:
-	va_end(ap);
-	free_func_handle(func_handle);
-	return ret;
-}
-
-/**
- * tep_unregister_print_function - unregister a helper function
- * @tep: a handle to the trace event parser context
- * @func: the function to process the helper function
- * @name: the name of the helper function
- *
- * This function removes existing print handler for function @name.
- *
- * Returns 0 if the handler was removed successully, -1 otherwise.
- */
-int tep_unregister_print_function(struct tep_handle *tep,
-				  tep_func_handler func, char *name)
-{
-	struct tep_function_handler *func_handle;
-
-	func_handle = find_func_handler(tep, name);
-	if (func_handle && func_handle->func == func) {
-		remove_func_handler(tep, name);
-		return 0;
-	}
-	return -1;
-}
-
-static struct tep_event *search_event(struct tep_handle *tep, int id,
-				      const char *sys_name,
-				      const char *event_name)
-{
-	struct tep_event *event;
-
-	if (id >= 0) {
-		/* search by id */
-		event = tep_find_event(tep, id);
-		if (!event)
-			return NULL;
-		if (event_name && (strcmp(event_name, event->name) != 0))
-			return NULL;
-		if (sys_name && (strcmp(sys_name, event->system) != 0))
-			return NULL;
-	} else {
-		event = tep_find_event_by_name(tep, sys_name, event_name);
-		if (!event)
-			return NULL;
-	}
-	return event;
-}
-
-/**
- * tep_register_event_handler - register a way to parse an event
- * @tep: a handle to the trace event parser context
- * @id: the id of the event to register
- * @sys_name: the system name the event belongs to
- * @event_name: the name of the event
- * @func: the function to call to parse the event information
- * @context: the data to be passed to @func
- *
- * This function allows a developer to override the parsing of
- * a given event. If for some reason the default print format
- * is not sufficient, this function will register a function
- * for an event to be used to parse the data instead.
- *
- * If @id is >= 0, then it is used to find the event.
- * else @sys_name and @event_name are used.
- *
- * Returns:
- *  TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten
- *  TEP_REGISTER_SUCCESS if a new handler is registered successfully
- *  negative TEP_ERRNO_... in case of an error
- *
- */
-int tep_register_event_handler(struct tep_handle *tep, int id,
-			       const char *sys_name, const char *event_name,
-			       tep_event_handler_func func, void *context)
-{
-	struct tep_event *event;
-	struct event_handler *handle;
-
-	event = search_event(tep, id, sys_name, event_name);
-	if (event == NULL)
-		goto not_found;
-
-	pr_stat("overriding event (%d) %s:%s with new print handler",
-		event->id, event->system, event->name);
-
-	event->handler = func;
-	event->context = context;
-	return TEP_REGISTER_SUCCESS_OVERWRITE;
-
- not_found:
-	/* Save for later use. */
-	handle = calloc(1, sizeof(*handle));
-	if (!handle) {
-		do_warning("Failed to allocate event handler");
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	handle->id = id;
-	if (event_name)
-		handle->event_name = strdup(event_name);
-	if (sys_name)
-		handle->sys_name = strdup(sys_name);
-
-	if ((event_name && !handle->event_name) ||
-	    (sys_name && !handle->sys_name)) {
-		do_warning("Failed to allocate event/sys name");
-		free((void *)handle->event_name);
-		free((void *)handle->sys_name);
-		free(handle);
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	handle->func = func;
-	handle->next = tep->handlers;
-	tep->handlers = handle;
-	handle->context = context;
-
-	return TEP_REGISTER_SUCCESS;
-}
-
-static int handle_matches(struct event_handler *handler, int id,
-			  const char *sys_name, const char *event_name,
-			  tep_event_handler_func func, void *context)
-{
-	if (id >= 0 && id != handler->id)
-		return 0;
-
-	if (event_name && (strcmp(event_name, handler->event_name) != 0))
-		return 0;
-
-	if (sys_name && (strcmp(sys_name, handler->sys_name) != 0))
-		return 0;
-
-	if (func != handler->func || context != handler->context)
-		return 0;
-
-	return 1;
-}
-
-/**
- * tep_unregister_event_handler - unregister an existing event handler
- * @tep: a handle to the trace event parser context
- * @id: the id of the event to unregister
- * @sys_name: the system name the handler belongs to
- * @event_name: the name of the event handler
- * @func: the function to call to parse the event information
- * @context: the data to be passed to @func
- *
- * This function removes existing event handler (parser).
- *
- * If @id is >= 0, then it is used to find the event.
- * else @sys_name and @event_name are used.
- *
- * Returns 0 if handler was removed successfully, -1 if event was not found.
- */
-int tep_unregister_event_handler(struct tep_handle *tep, int id,
-				 const char *sys_name, const char *event_name,
-				 tep_event_handler_func func, void *context)
-{
-	struct tep_event *event;
-	struct event_handler *handle;
-	struct event_handler **next;
-
-	event = search_event(tep, id, sys_name, event_name);
-	if (event == NULL)
-		goto not_found;
-
-	if (event->handler == func && event->context == context) {
-		pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.",
-			event->id, event->system, event->name);
-
-		event->handler = NULL;
-		event->context = NULL;
-		return 0;
-	}
-
-not_found:
-	for (next = &tep->handlers; *next; next = &(*next)->next) {
-		handle = *next;
-		if (handle_matches(handle, id, sys_name, event_name,
-				   func, context))
-			break;
-	}
-
-	if (!(*next))
-		return -1;
-
-	*next = handle->next;
-	free_handler(handle);
-
-	return 0;
-}
-
-/**
- * tep_alloc - create a tep handle
- */
-struct tep_handle *tep_alloc(void)
-{
-	struct tep_handle *tep = calloc(1, sizeof(*tep));
-
-	if (tep) {
-		tep->ref_count = 1;
-		tep->host_bigendian = tep_is_bigendian();
-	}
-
-	return tep;
-}
-
-void tep_ref(struct tep_handle *tep)
-{
-	tep->ref_count++;
-}
-
-int tep_get_ref(struct tep_handle *tep)
-{
-	if (tep)
-		return tep->ref_count;
-	return 0;
-}
-
-__hidden void free_tep_format_field(struct tep_format_field *field)
-{
-	free(field->type);
-	if (field->alias != field->name)
-		free(field->alias);
-	free(field->name);
-	free(field);
-}
-
-static void free_format_fields(struct tep_format_field *field)
-{
-	struct tep_format_field *next;
-
-	while (field) {
-		next = field->next;
-		free_tep_format_field(field);
-		field = next;
-	}
-}
-
-static void free_formats(struct tep_format *format)
-{
-	free_format_fields(format->common_fields);
-	free_format_fields(format->fields);
-}
-
-__hidden void free_tep_event(struct tep_event *event)
-{
-	free(event->name);
-	free(event->system);
-
-	free_formats(&event->format);
-
-	free(event->print_fmt.format);
-	free_args(event->print_fmt.args);
-	free_parse_args(event->print_fmt.print_cache);
-	free(event);
-}
-
-/**
- * tep_free - free a tep handle
- * @tep: the tep handle to free
- */
-void tep_free(struct tep_handle *tep)
-{
-	struct cmdline_list *cmdlist, *cmdnext;
-	struct func_list *funclist, *funcnext;
-	struct printk_list *printklist, *printknext;
-	struct tep_function_handler *func_handler;
-	struct event_handler *handle;
-	int i;
-
-	if (!tep)
-		return;
-
-	cmdlist = tep->cmdlist;
-	funclist = tep->funclist;
-	printklist = tep->printklist;
-
-	tep->ref_count--;
-	if (tep->ref_count)
-		return;
-
-	if (tep->cmdlines) {
-		for (i = 0; i < tep->cmdline_count; i++)
-			free(tep->cmdlines[i].comm);
-		free(tep->cmdlines);
-	}
-
-	while (cmdlist) {
-		cmdnext = cmdlist->next;
-		free(cmdlist->comm);
-		free(cmdlist);
-		cmdlist = cmdnext;
-	}
-
-	if (tep->func_map) {
-		for (i = 0; i < (int)tep->func_count; i++) {
-			free(tep->func_map[i].func);
-			free(tep->func_map[i].mod);
-		}
-		free(tep->func_map);
-	}
-
-	while (funclist) {
-		funcnext = funclist->next;
-		free(funclist->func);
-		free(funclist->mod);
-		free(funclist);
-		funclist = funcnext;
-	}
-
-	while (tep->func_handlers) {
-		func_handler = tep->func_handlers;
-		tep->func_handlers = func_handler->next;
-		free_func_handle(func_handler);
-	}
-
-	if (tep->printk_map) {
-		for (i = 0; i < (int)tep->printk_count; i++)
-			free(tep->printk_map[i].printk);
-		free(tep->printk_map);
-	}
-
-	while (printklist) {
-		printknext = printklist->next;
-		free(printklist->printk);
-		free(printklist);
-		printklist = printknext;
-	}
-
-	for (i = 0; i < tep->nr_events; i++)
-		free_tep_event(tep->events[i]);
-
-	while (tep->handlers) {
-		handle = tep->handlers;
-		tep->handlers = handle->next;
-		free_handler(handle);
-	}
-
-	free(tep->events);
-	free(tep->sort_events);
-	free(tep->func_resolver);
-	free_tep_plugin_paths(tep);
-
-	free(tep);
-}
-
-void tep_unref(struct tep_handle *tep)
-{
-	tep_free(tep);
-}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
deleted file mode 100644
index 41d4f9f6a8436..0000000000000
--- a/tools/lib/traceevent/event-parse.h
+++ /dev/null
@@ -1,750 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef _PARSE_EVENTS_H
-#define _PARSE_EVENTS_H
-
-#include <stdbool.h>
-#include <stdarg.h>
-#include <stdio.h>
-#include <regex.h>
-#include <string.h>
-
-#include "trace-seq.h"
-
-#ifndef __maybe_unused
-#define __maybe_unused __attribute__((unused))
-#endif
-
-#ifndef DEBUG_RECORD
-#define DEBUG_RECORD 0
-#endif
-
-struct tep_record {
-	unsigned long long	ts;
-	unsigned long long	offset;
-	long long		missed_events;	/* buffer dropped events before */
-	int			record_size;	/* size of binary record */
-	int			size;		/* size of data */
-	void			*data;
-	int			cpu;
-	int			ref_count;
-	int			locked;		/* Do not free, even if ref_count is zero */
-	void			*priv;
-#if DEBUG_RECORD
-	struct tep_record	*prev;
-	struct tep_record	*next;
-	long			alloc_addr;
-#endif
-};
-
-/* ----------------------- tep ----------------------- */
-
-struct tep_handle;
-struct tep_event;
-
-typedef int (*tep_event_handler_func)(struct trace_seq *s,
-				      struct tep_record *record,
-				      struct tep_event *event,
-				      void *context);
-
-typedef int (*tep_plugin_load_func)(struct tep_handle *tep);
-typedef int (*tep_plugin_unload_func)(struct tep_handle *tep);
-
-struct tep_plugin_option {
-	struct tep_plugin_option	*next;
-	void				*handle;
-	char				*file;
-	char				*name;
-	char				*plugin_alias;
-	char				*description;
-	const char			*value;
-	void				*priv;
-	int				set;
-};
-
-/*
- * Plugin hooks that can be called:
- *
- * TEP_PLUGIN_LOADER:  (required)
- *   The function name to initialized the plugin.
- *
- *   int TEP_PLUGIN_LOADER(struct tep_handle *tep)
- *
- * TEP_PLUGIN_UNLOADER:  (optional)
- *   The function called just before unloading
- *
- *   int TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
- *
- * TEP_PLUGIN_OPTIONS:  (optional)
- *   Plugin options that can be set before loading
- *
- *   struct tep_plugin_option TEP_PLUGIN_OPTIONS[] = {
- *	{
- *		.name = "option-name",
- *		.plugin_alias = "override-file-name", (optional)
- *		.description = "description of option to show users",
- *	},
- *	{
- *		.name = NULL,
- *	},
- *   };
- *
- *   Array must end with .name = NULL;
- *
- *
- *   .plugin_alias is used to give a shorter name to access
- *   the vairable. Useful if a plugin handles more than one event.
- *
- *   If .value is not set, then it is considered a boolean and only
- *   .set will be processed. If .value is defined, then it is considered
- *   a string option and .set will be ignored.
- *
- * TEP_PLUGIN_ALIAS: (optional)
- *   The name to use for finding options (uses filename if not defined)
- */
-#define TEP_PLUGIN_LOADER tep_plugin_loader
-#define TEP_PLUGIN_UNLOADER tep_plugin_unloader
-#define TEP_PLUGIN_OPTIONS tep_plugin_options
-#define TEP_PLUGIN_ALIAS tep_plugin_alias
-#define _MAKE_STR(x)	#x
-#define MAKE_STR(x)	_MAKE_STR(x)
-#define TEP_PLUGIN_LOADER_NAME MAKE_STR(TEP_PLUGIN_LOADER)
-#define TEP_PLUGIN_UNLOADER_NAME MAKE_STR(TEP_PLUGIN_UNLOADER)
-#define TEP_PLUGIN_OPTIONS_NAME MAKE_STR(TEP_PLUGIN_OPTIONS)
-#define TEP_PLUGIN_ALIAS_NAME MAKE_STR(TEP_PLUGIN_ALIAS)
-
-enum tep_format_flags {
-	TEP_FIELD_IS_ARRAY	= 1,
-	TEP_FIELD_IS_POINTER	= 2,
-	TEP_FIELD_IS_SIGNED	= 4,
-	TEP_FIELD_IS_STRING	= 8,
-	TEP_FIELD_IS_DYNAMIC	= 16,
-	TEP_FIELD_IS_LONG	= 32,
-	TEP_FIELD_IS_FLAG	= 64,
-	TEP_FIELD_IS_SYMBOLIC	= 128,
-	TEP_FIELD_IS_RELATIVE	= 256,
-};
-
-struct tep_format_field {
-	struct tep_format_field	*next;
-	struct tep_event	*event;
-	char			*type;
-	char			*name;
-	char			*alias;
-	int			offset;
-	int			size;
-	unsigned int		arraylen;
-	unsigned int		elementsize;
-	unsigned long		flags;
-};
-
-struct tep_format {
-	int			nr_common;
-	int			nr_fields;
-	struct tep_format_field	*common_fields;
-	struct tep_format_field	*fields;
-};
-
-struct tep_print_arg_atom {
-	char			*atom;
-};
-
-struct tep_print_arg_string {
-	char			*string;
-	struct tep_format_field *field;
-};
-
-struct tep_print_arg_bitmask {
-	char			*bitmask;
-	struct tep_format_field *field;
-};
-
-struct tep_print_arg_field {
-	char			*name;
-	struct tep_format_field	*field;
-};
-
-struct tep_print_flag_sym {
-	struct tep_print_flag_sym	*next;
-	char				*value;
-	char				*str;
-};
-
-struct tep_print_arg_typecast {
-	char 			*type;
-	struct tep_print_arg	*item;
-};
-
-struct tep_print_arg_flags {
-	struct tep_print_arg		*field;
-	char				*delim;
-	struct tep_print_flag_sym	*flags;
-};
-
-struct tep_print_arg_symbol {
-	struct tep_print_arg		*field;
-	struct tep_print_flag_sym	*symbols;
-};
-
-struct tep_print_arg_hex {
-	struct tep_print_arg	*field;
-	struct tep_print_arg	*size;
-};
-
-struct tep_print_arg_int_array {
-	struct tep_print_arg	*field;
-	struct tep_print_arg	*count;
-	struct tep_print_arg	*el_size;
-};
-
-struct tep_print_arg_dynarray {
-	struct tep_format_field	*field;
-	struct tep_print_arg	*index;
-};
-
-struct tep_print_arg;
-
-struct tep_print_arg_op {
-	char			*op;
-	int			prio;
-	struct tep_print_arg	*left;
-	struct tep_print_arg	*right;
-};
-
-struct tep_function_handler;
-
-struct tep_print_arg_func {
-	struct tep_function_handler	*func;
-	struct tep_print_arg		*args;
-};
-
-enum tep_print_arg_type {
-	TEP_PRINT_NULL,
-	TEP_PRINT_ATOM,
-	TEP_PRINT_FIELD,
-	TEP_PRINT_FLAGS,
-	TEP_PRINT_SYMBOL,
-	TEP_PRINT_HEX,
-	TEP_PRINT_INT_ARRAY,
-	TEP_PRINT_TYPE,
-	TEP_PRINT_STRING,
-	TEP_PRINT_BSTRING,
-	TEP_PRINT_DYNAMIC_ARRAY,
-	TEP_PRINT_OP,
-	TEP_PRINT_FUNC,
-	TEP_PRINT_BITMASK,
-	TEP_PRINT_DYNAMIC_ARRAY_LEN,
-	TEP_PRINT_HEX_STR,
-};
-
-struct tep_print_arg {
-	struct tep_print_arg		*next;
-	enum tep_print_arg_type		type;
-	union {
-		struct tep_print_arg_atom	atom;
-		struct tep_print_arg_field	field;
-		struct tep_print_arg_typecast	typecast;
-		struct tep_print_arg_flags	flags;
-		struct tep_print_arg_symbol	symbol;
-		struct tep_print_arg_hex	hex;
-		struct tep_print_arg_int_array	int_array;
-		struct tep_print_arg_func	func;
-		struct tep_print_arg_string	string;
-		struct tep_print_arg_bitmask	bitmask;
-		struct tep_print_arg_op		op;
-		struct tep_print_arg_dynarray	dynarray;
-	};
-};
-
-struct tep_print_parse;
-
-struct tep_print_fmt {
-	char			*format;
-	struct tep_print_arg	*args;
-	struct tep_print_parse	*print_cache;
-};
-
-struct tep_event {
-	struct tep_handle	*tep;
-	char			*name;
-	int			id;
-	int			flags;
-	struct tep_format	format;
-	struct tep_print_fmt	print_fmt;
-	char			*system;
-	tep_event_handler_func	handler;
-	void			*context;
-};
-
-enum {
-	TEP_EVENT_FL_ISFTRACE	= 0x01,
-	TEP_EVENT_FL_ISPRINT	= 0x02,
-	TEP_EVENT_FL_ISBPRINT	= 0x04,
-	TEP_EVENT_FL_ISFUNCENT	= 0x10,
-	TEP_EVENT_FL_ISFUNCRET	= 0x20,
-	TEP_EVENT_FL_NOHANDLE	= 0x40,
-	TEP_EVENT_FL_PRINTRAW	= 0x80,
-
-	TEP_EVENT_FL_FAILED	= 0x80000000
-};
-
-enum tep_event_sort_type {
-	TEP_EVENT_SORT_ID,
-	TEP_EVENT_SORT_NAME,
-	TEP_EVENT_SORT_SYSTEM,
-};
-
-enum tep_event_type {
-	TEP_EVENT_ERROR,
-	TEP_EVENT_NONE,
-	TEP_EVENT_SPACE,
-	TEP_EVENT_NEWLINE,
-	TEP_EVENT_OP,
-	TEP_EVENT_DELIM,
-	TEP_EVENT_ITEM,
-	TEP_EVENT_DQUOTE,
-	TEP_EVENT_SQUOTE,
-};
-
-typedef unsigned long long (*tep_func_handler)(struct trace_seq *s,
-					       unsigned long long *args);
-
-enum tep_func_arg_type {
-	TEP_FUNC_ARG_VOID,
-	TEP_FUNC_ARG_INT,
-	TEP_FUNC_ARG_LONG,
-	TEP_FUNC_ARG_STRING,
-	TEP_FUNC_ARG_PTR,
-	TEP_FUNC_ARG_MAX_TYPES
-};
-
-enum tep_flag {
-	TEP_NSEC_OUTPUT		= 1,	/* output in NSECS */
-	TEP_DISABLE_SYS_PLUGINS	= 1 << 1,
-	TEP_DISABLE_PLUGINS	= 1 << 2,
-};
-
-#define TEP_ERRORS 							      \
-	_PE(MEM_ALLOC_FAILED,	"failed to allocate memory"),		      \
-	_PE(PARSE_EVENT_FAILED,	"failed to parse event"),		      \
-	_PE(READ_ID_FAILED,	"failed to read event id"),		      \
-	_PE(READ_FORMAT_FAILED,	"failed to read event format"),		      \
-	_PE(READ_PRINT_FAILED,	"failed to read event print fmt"), 	      \
-	_PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
-	_PE(INVALID_ARG_TYPE,	"invalid argument type"),		      \
-	_PE(INVALID_EXP_TYPE,	"invalid expression type"),		      \
-	_PE(INVALID_OP_TYPE,	"invalid operator type"),		      \
-	_PE(INVALID_EVENT_NAME,	"invalid event name"),			      \
-	_PE(EVENT_NOT_FOUND,	"no event found"),			      \
-	_PE(SYNTAX_ERROR,	"syntax error"),			      \
-	_PE(ILLEGAL_RVALUE,	"illegal rvalue"),			      \
-	_PE(ILLEGAL_LVALUE,	"illegal lvalue for string comparison"),      \
-	_PE(INVALID_REGEX,	"regex did not compute"),		      \
-	_PE(ILLEGAL_STRING_CMP,	"illegal comparison for string"), 	      \
-	_PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), 	      \
-	_PE(REPARENT_NOT_OP,	"cannot reparent other than OP"),	      \
-	_PE(REPARENT_FAILED,	"failed to reparent filter OP"),	      \
-	_PE(BAD_FILTER_ARG,	"bad arg in filter tree"),		      \
-	_PE(UNEXPECTED_TYPE,	"unexpected type (not a value)"),	      \
-	_PE(ILLEGAL_TOKEN,	"illegal token"),			      \
-	_PE(INVALID_PAREN,	"open parenthesis cannot come here"), 	      \
-	_PE(UNBALANCED_PAREN,	"unbalanced number of parenthesis"),	      \
-	_PE(UNKNOWN_TOKEN,	"unknown token"),			      \
-	_PE(FILTER_NOT_FOUND,	"no filter found"),			      \
-	_PE(NOT_A_NUMBER,	"must have number field"),		      \
-	_PE(NO_FILTER,		"no filters exists"),			      \
-	_PE(FILTER_MISS,	"record does not match to filter")
-
-#undef _PE
-#define _PE(__code, __str) TEP_ERRNO__ ## __code
-enum tep_errno {
-	TEP_ERRNO__SUCCESS			= 0,
-	TEP_ERRNO__FILTER_MATCH			= TEP_ERRNO__SUCCESS,
-
-	/*
-	 * Choose an arbitrary negative big number not to clash with standard
-	 * errno since SUS requires the errno has distinct positive values.
-	 * See 'Issue 6' in the link below.
-	 *
-	 * https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
-	 */
-	__TEP_ERRNO__START			= -100000,
-
-	TEP_ERRORS,
-
-	__TEP_ERRNO__END,
-};
-#undef _PE
-
-struct tep_plugin_list;
-
-#define INVALID_PLUGIN_LIST_OPTION	((char **)((unsigned long)-1))
-
-enum tep_plugin_load_priority {
-	TEP_PLUGIN_FIRST,
-	TEP_PLUGIN_LAST,
-};
-
-int tep_add_plugin_path(struct tep_handle *tep, char *path,
-			enum tep_plugin_load_priority prio);
-struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep);
-void tep_unload_plugins(struct tep_plugin_list *plugin_list,
-			struct tep_handle *tep);
-void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
-			   void (*load_plugin)(struct tep_handle *tep,
-					       const char *path,
-					       const char *name,
-					       void *data),
-			   void *data);
-char **tep_plugin_list_options(void);
-void tep_plugin_free_options_list(char **list);
-int tep_plugin_add_options(const char *name,
-			   struct tep_plugin_option *options);
-int tep_plugin_add_option(const char *name, const char *val);
-void tep_plugin_remove_options(struct tep_plugin_option *options);
-void tep_plugin_print_options(struct trace_seq *s);
-void tep_print_plugins(struct trace_seq *s,
-			const char *prefix, const char *suffix,
-			const struct tep_plugin_list *list);
-
-/* tep_handle */
-typedef char *(tep_func_resolver_t)(void *priv,
-				    unsigned long long *addrp, char **modp);
-void tep_set_flag(struct tep_handle *tep, int flag);
-void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag);
-bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags);
-
-static inline int tep_is_bigendian(void)
-{
-	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
-	unsigned int val;
-
-	memcpy(&val, str, 4);
-	return val == 0x01020304;
-}
-
-/* taken from kernel/trace/trace.h */
-enum trace_flag_type {
-	TRACE_FLAG_IRQS_OFF		= 0x01,
-	TRACE_FLAG_IRQS_NOSUPPORT	= 0x02,
-	TRACE_FLAG_NEED_RESCHED		= 0x04,
-	TRACE_FLAG_HARDIRQ		= 0x08,
-	TRACE_FLAG_SOFTIRQ		= 0x10,
-};
-
-int tep_set_function_resolver(struct tep_handle *tep,
-			      tep_func_resolver_t *func, void *priv);
-void tep_reset_function_resolver(struct tep_handle *tep);
-int tep_register_comm(struct tep_handle *tep, const char *comm, int pid);
-int tep_override_comm(struct tep_handle *tep, const char *comm, int pid);
-int tep_register_function(struct tep_handle *tep, char *name,
-			  unsigned long long addr, char *mod);
-int tep_register_print_string(struct tep_handle *tep, const char *fmt,
-			      unsigned long long addr);
-bool tep_is_pid_registered(struct tep_handle *tep, int pid);
-
-struct tep_event *tep_get_event(struct tep_handle *tep, int index);
-
-#define TEP_PRINT_INFO		"INFO"
-#define TEP_PRINT_INFO_RAW	"INFO_RAW"
-#define TEP_PRINT_COMM		"COMM"
-#define TEP_PRINT_LATENCY	"LATENCY"
-#define TEP_PRINT_NAME		"NAME"
-#define TEP_PRINT_PID		1U
-#define TEP_PRINT_TIME		2U
-#define TEP_PRINT_CPU		3U
-
-void tep_print_event(struct tep_handle *tep, struct trace_seq *s,
-		     struct tep_record *record, const char *fmt, ...)
-	__attribute__ ((format (printf, 4, 5)));
-
-int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size,
-			  int long_size);
-
-enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf,
-			       unsigned long size, const char *sys);
-enum tep_errno tep_parse_format(struct tep_handle *tep,
-				struct tep_event **eventp,
-				const char *buf,
-				unsigned long size, const char *sys);
-
-void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,
-			const char *name, struct tep_record *record,
-			int *len, int err);
-
-int tep_get_field_val(struct trace_seq *s, struct tep_event *event,
-		      const char *name, struct tep_record *record,
-		      unsigned long long *val, int err);
-int tep_get_common_field_val(struct trace_seq *s, struct tep_event *event,
-			     const char *name, struct tep_record *record,
-			     unsigned long long *val, int err);
-int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event,
-			  const char *name, struct tep_record *record,
-			  unsigned long long *val, int err);
-
-int tep_print_num_field(struct trace_seq *s, const char *fmt,
-			struct tep_event *event, const char *name,
-			struct tep_record *record, int err);
-
-int tep_print_func_field(struct trace_seq *s, const char *fmt,
-			 struct tep_event *event, const char *name,
-			 struct tep_record *record, int err);
-
-enum tep_reg_handler {
-	TEP_REGISTER_SUCCESS = 0,
-	TEP_REGISTER_SUCCESS_OVERWRITE,
-};
-
-int tep_register_event_handler(struct tep_handle *tep, int id,
-			       const char *sys_name, const char *event_name,
-			       tep_event_handler_func func, void *context);
-int tep_unregister_event_handler(struct tep_handle *tep, int id,
-				 const char *sys_name, const char *event_name,
-				 tep_event_handler_func func, void *context);
-int tep_register_print_function(struct tep_handle *tep,
-				tep_func_handler func,
-				enum tep_func_arg_type ret_type,
-				char *name, ...);
-int tep_unregister_print_function(struct tep_handle *tep,
-				  tep_func_handler func, char *name);
-
-struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name);
-struct tep_format_field *tep_find_field(struct tep_event *event, const char *name);
-struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name);
-
-const char *tep_find_function(struct tep_handle *tep, unsigned long long addr);
-unsigned long long
-tep_find_function_address(struct tep_handle *tep, unsigned long long addr);
-unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size);
-int tep_read_number_field(struct tep_format_field *field, const void *data,
-			  unsigned long long *value);
-
-struct tep_event *tep_get_first_event(struct tep_handle *tep);
-int tep_get_events_count(struct tep_handle *tep);
-struct tep_event *tep_find_event(struct tep_handle *tep, int id);
-
-struct tep_event *
-tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name);
-struct tep_event *
-tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record);
-
-int tep_data_type(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_pid(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec);
-int tep_data_flags(struct tep_handle *tep, struct tep_record *rec);
-const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid);
-struct tep_cmdline;
-struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm,
-					   struct tep_cmdline *next);
-int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline);
-
-void tep_print_field(struct trace_seq *s, void *data,
-		     struct tep_format_field *field);
-void tep_print_fields(struct trace_seq *s, void *data,
-		      int size __maybe_unused, struct tep_event *event);
-int tep_strerror(struct tep_handle *tep, enum tep_errno errnum,
-		 char *buf, size_t buflen);
-
-struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type);
-struct tep_event **tep_list_events_copy(struct tep_handle *tep,
-					enum tep_event_sort_type);
-struct tep_format_field **tep_event_common_fields(struct tep_event *event);
-struct tep_format_field **tep_event_fields(struct tep_event *event);
-
-enum tep_endian {
-        TEP_LITTLE_ENDIAN = 0,
-        TEP_BIG_ENDIAN
-};
-int tep_get_cpus(struct tep_handle *tep);
-void tep_set_cpus(struct tep_handle *tep, int cpus);
-int tep_get_long_size(struct tep_handle *tep);
-void tep_set_long_size(struct tep_handle *tep, int long_size);
-int tep_get_page_size(struct tep_handle *tep);
-void tep_set_page_size(struct tep_handle *tep, int _page_size);
-bool tep_is_file_bigendian(struct tep_handle *tep);
-void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian);
-bool tep_is_local_bigendian(struct tep_handle *tep);
-void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian);
-int tep_get_header_page_size(struct tep_handle *tep);
-int tep_get_header_timestamp_size(struct tep_handle *tep);
-bool tep_is_old_format(struct tep_handle *tep);
-void tep_set_test_filters(struct tep_handle *tep, int test_filters);
-
-struct tep_handle *tep_alloc(void);
-void tep_free(struct tep_handle *tep);
-void tep_ref(struct tep_handle *tep);
-void tep_unref(struct tep_handle *tep);
-int tep_get_ref(struct tep_handle *tep);
-
-/* for debugging */
-void tep_print_funcs(struct tep_handle *tep);
-void tep_print_printk(struct tep_handle *tep);
-
-/* ----------------------- filtering ----------------------- */
-
-enum tep_filter_boolean_type {
-	TEP_FILTER_FALSE,
-	TEP_FILTER_TRUE,
-};
-
-enum tep_filter_op_type {
-	TEP_FILTER_OP_AND = 1,
-	TEP_FILTER_OP_OR,
-	TEP_FILTER_OP_NOT,
-};
-
-enum tep_filter_cmp_type {
-	TEP_FILTER_CMP_NONE,
-	TEP_FILTER_CMP_EQ,
-	TEP_FILTER_CMP_NE,
-	TEP_FILTER_CMP_GT,
-	TEP_FILTER_CMP_LT,
-	TEP_FILTER_CMP_GE,
-	TEP_FILTER_CMP_LE,
-	TEP_FILTER_CMP_MATCH,
-	TEP_FILTER_CMP_NOT_MATCH,
-	TEP_FILTER_CMP_REGEX,
-	TEP_FILTER_CMP_NOT_REGEX,
-};
-
-enum tep_filter_exp_type {
-	TEP_FILTER_EXP_NONE,
-	TEP_FILTER_EXP_ADD,
-	TEP_FILTER_EXP_SUB,
-	TEP_FILTER_EXP_MUL,
-	TEP_FILTER_EXP_DIV,
-	TEP_FILTER_EXP_MOD,
-	TEP_FILTER_EXP_RSHIFT,
-	TEP_FILTER_EXP_LSHIFT,
-	TEP_FILTER_EXP_AND,
-	TEP_FILTER_EXP_OR,
-	TEP_FILTER_EXP_XOR,
-	TEP_FILTER_EXP_NOT,
-};
-
-enum tep_filter_arg_type {
-	TEP_FILTER_ARG_NONE,
-	TEP_FILTER_ARG_BOOLEAN,
-	TEP_FILTER_ARG_VALUE,
-	TEP_FILTER_ARG_FIELD,
-	TEP_FILTER_ARG_EXP,
-	TEP_FILTER_ARG_OP,
-	TEP_FILTER_ARG_NUM,
-	TEP_FILTER_ARG_STR,
-};
-
-enum tep_filter_value_type {
-	TEP_FILTER_NUMBER,
-	TEP_FILTER_STRING,
-	TEP_FILTER_CHAR
-};
-
-struct tep_filter_arg;
-
-struct tep_filter_arg_boolean {
-	enum tep_filter_boolean_type	value;
-};
-
-struct tep_filter_arg_field {
-	struct tep_format_field		*field;
-};
-
-struct tep_filter_arg_value {
-	enum tep_filter_value_type	type;
-	union {
-		char			*str;
-		unsigned long long	val;
-	};
-};
-
-struct tep_filter_arg_op {
-	enum tep_filter_op_type		type;
-	struct tep_filter_arg		*left;
-	struct tep_filter_arg		*right;
-};
-
-struct tep_filter_arg_exp {
-	enum tep_filter_exp_type	type;
-	struct tep_filter_arg		*left;
-	struct tep_filter_arg		*right;
-};
-
-struct tep_filter_arg_num {
-	enum tep_filter_cmp_type	type;
-	struct tep_filter_arg		*left;
-	struct tep_filter_arg		*right;
-};
-
-struct tep_filter_arg_str {
-	enum tep_filter_cmp_type	type;
-	struct tep_format_field		*field;
-	char				*val;
-	char				*buffer;
-	regex_t				reg;
-};
-
-struct tep_filter_arg {
-	enum tep_filter_arg_type		type;
-	union {
-		struct tep_filter_arg_boolean	boolean;
-		struct tep_filter_arg_field	field;
-		struct tep_filter_arg_value	value;
-		struct tep_filter_arg_op	op;
-		struct tep_filter_arg_exp	exp;
-		struct tep_filter_arg_num	num;
-		struct tep_filter_arg_str	str;
-	};
-};
-
-struct tep_filter_type {
-	int			event_id;
-	struct tep_event	*event;
-	struct tep_filter_arg	*filter;
-};
-
-#define TEP_FILTER_ERROR_BUFSZ  1024
-
-struct tep_event_filter {
-	struct tep_handle	*tep;
-	int			filters;
-	struct tep_filter_type	*event_filters;
-	char			error_buffer[TEP_FILTER_ERROR_BUFSZ];
-};
-
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep);
-
-/* for backward compatibility */
-#define FILTER_NONE		TEP_ERRNO__NO_FILTER
-#define FILTER_NOEXIST		TEP_ERRNO__FILTER_NOT_FOUND
-#define FILTER_MISS		TEP_ERRNO__FILTER_MISS
-#define FILTER_MATCH		TEP_ERRNO__FILTER_MATCH
-
-enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
-					 const char *filter_str);
-
-enum tep_errno tep_filter_match(struct tep_event_filter *filter,
-				struct tep_record *record);
-
-int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
-			char *buf, size_t buflen);
-
-int tep_event_filtered(struct tep_event_filter *filter,
-		       int event_id);
-
-void tep_filter_reset(struct tep_event_filter *filter);
-
-void tep_filter_free(struct tep_event_filter *filter);
-
-char *tep_filter_make_string(struct tep_event_filter *filter, int event_id);
-
-int tep_filter_remove_event(struct tep_event_filter *filter,
-			    int event_id);
-
-int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source);
-
-int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2);
-
-#endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
deleted file mode 100644
index e7f93d5fe4fd0..0000000000000
--- a/tools/lib/traceevent/event-plugin.c
+++ /dev/null
@@ -1,711 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#include <ctype.h>
-#include <stdio.h>
-#include <string.h>
-#include <dlfcn.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <errno.h>
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-#define LOCAL_PLUGIN_DIR ".local/lib/traceevent/plugins/"
-
-static struct registered_plugin_options {
-	struct registered_plugin_options	*next;
-	struct tep_plugin_option		*options;
-} *registered_options;
-
-static struct trace_plugin_options {
-	struct trace_plugin_options	*next;
-	char				*plugin;
-	char				*option;
-	char				*value;
-} *trace_plugin_options;
-
-struct tep_plugin_list {
-	struct tep_plugin_list	*next;
-	char			*name;
-	void			*handle;
-};
-
-struct tep_plugins_dir {
-	struct tep_plugins_dir		*next;
-	char				*path;
-	enum tep_plugin_load_priority	prio;
-};
-
-static void lower_case(char *str)
-{
-	if (!str)
-		return;
-	for (; *str; str++)
-		*str = tolower(*str);
-}
-
-static int update_option_value(struct tep_plugin_option *op, const char *val)
-{
-	char *op_val;
-
-	if (!val) {
-		/* toggle, only if option is boolean */
-		if (op->value)
-			/* Warn? */
-			return 0;
-		op->set ^= 1;
-		return 0;
-	}
-
-	/*
-	 * If the option has a value then it takes a string
-	 * otherwise the option is a boolean.
-	 */
-	if (op->value) {
-		op->value = val;
-		return 0;
-	}
-
-	/* Option is boolean, must be either "1", "0", "true" or "false" */
-
-	op_val = strdup(val);
-	if (!op_val)
-		return -1;
-	lower_case(op_val);
-
-	if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0)
-		op->set = 1;
-	else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0)
-		op->set = 0;
-	free(op_val);
-
-	return 0;
-}
-
-/**
- * tep_plugin_list_options - get list of plugin options
- *
- * Returns an array of char strings that list the currently registered
- * plugin options in the format of <plugin>:<option>. This list can be
- * used by toggling the option.
- *
- * Returns NULL if there's no options registered. On error it returns
- * INVALID_PLUGIN_LIST_OPTION
- *
- * Must be freed with tep_plugin_free_options_list().
- */
-char **tep_plugin_list_options(void)
-{
-	struct registered_plugin_options *reg;
-	struct tep_plugin_option *op;
-	char **list = NULL;
-	char *name;
-	int count = 0;
-
-	for (reg = registered_options; reg; reg = reg->next) {
-		for (op = reg->options; op->name; op++) {
-			char *alias = op->plugin_alias ? op->plugin_alias : op->file;
-			char **temp = list;
-			int ret;
-
-			ret = asprintf(&name, "%s:%s", alias, op->name);
-			if (ret < 0)
-				goto err;
-
-			list = realloc(list, count + 2);
-			if (!list) {
-				list = temp;
-				free(name);
-				goto err;
-			}
-			list[count++] = name;
-			list[count] = NULL;
-		}
-	}
-	return list;
-
- err:
-	while (--count >= 0)
-		free(list[count]);
-	free(list);
-
-	return INVALID_PLUGIN_LIST_OPTION;
-}
-
-void tep_plugin_free_options_list(char **list)
-{
-	int i;
-
-	if (!list)
-		return;
-
-	if (list == INVALID_PLUGIN_LIST_OPTION)
-		return;
-
-	for (i = 0; list[i]; i++)
-		free(list[i]);
-
-	free(list);
-}
-
-static int
-update_option(const char *file, struct tep_plugin_option *option)
-{
-	struct trace_plugin_options *op;
-	char *plugin;
-	int ret = 0;
-
-	if (option->plugin_alias) {
-		plugin = strdup(option->plugin_alias);
-		if (!plugin)
-			return -1;
-	} else {
-		char *p;
-		plugin = strdup(file);
-		if (!plugin)
-			return -1;
-		p = strstr(plugin, ".");
-		if (p)
-			*p = '\0';
-	}
-
-	/* first look for named options */
-	for (op = trace_plugin_options; op; op = op->next) {
-		if (!op->plugin)
-			continue;
-		if (strcmp(op->plugin, plugin) != 0)
-			continue;
-		if (strcmp(op->option, option->name) != 0)
-			continue;
-
-		ret = update_option_value(option, op->value);
-		if (ret)
-			goto out;
-		break;
-	}
-
-	/* first look for unnamed options */
-	for (op = trace_plugin_options; op; op = op->next) {
-		if (op->plugin)
-			continue;
-		if (strcmp(op->option, option->name) != 0)
-			continue;
-
-		ret = update_option_value(option, op->value);
-		break;
-	}
-
- out:
-	free(plugin);
-	return ret;
-}
-
-/**
- * tep_plugin_add_options - Add a set of options by a plugin
- * @name: The name of the plugin adding the options
- * @options: The set of options being loaded
- *
- * Sets the options with the values that have been added by user.
- */
-int tep_plugin_add_options(const char *name,
-			   struct tep_plugin_option *options)
-{
-	struct registered_plugin_options *reg;
-
-	reg = malloc(sizeof(*reg));
-	if (!reg)
-		return -1;
-	reg->next = registered_options;
-	reg->options = options;
-	registered_options = reg;
-
-	while (options->name) {
-		update_option(name, options);
-		options++;
-	}
-	return 0;
-}
-
-/**
- * tep_plugin_remove_options - remove plugin options that were registered
- * @options: Options to removed that were registered with tep_plugin_add_options
- */
-void tep_plugin_remove_options(struct tep_plugin_option *options)
-{
-	struct registered_plugin_options **last;
-	struct registered_plugin_options *reg;
-
-	for (last = &registered_options; *last; last = &(*last)->next) {
-		if ((*last)->options == options) {
-			reg = *last;
-			*last = reg->next;
-			free(reg);
-			return;
-		}
-	}
-}
-
-static int parse_option_name(char **option, char **plugin)
-{
-	char *p;
-
-	*plugin = NULL;
-
-	if ((p = strstr(*option, ":"))) {
-		*plugin = *option;
-		*p = '\0';
-		*option = strdup(p + 1);
-		if (!*option)
-			return -1;
-	}
-	return 0;
-}
-
-static struct tep_plugin_option *
-find_registered_option(const char *plugin, const char *option)
-{
-	struct registered_plugin_options *reg;
-	struct tep_plugin_option *op;
-	const char *op_plugin;
-
-	for (reg = registered_options; reg; reg = reg->next) {
-		for (op = reg->options; op->name; op++) {
-			if (op->plugin_alias)
-				op_plugin = op->plugin_alias;
-			else
-				op_plugin = op->file;
-
-			if (plugin && strcmp(plugin, op_plugin) != 0)
-				continue;
-			if (strcmp(option, op->name) != 0)
-				continue;
-
-			return op;
-		}
-	}
-
-	return NULL;
-}
-
-static int process_option(const char *plugin, const char *option, const char *val)
-{
-	struct tep_plugin_option *op;
-
-	op = find_registered_option(plugin, option);
-	if (!op)
-		return 0;
-
-	return update_option_value(op, val);
-}
-
-/**
- * tep_plugin_add_option - add an option/val pair to set plugin options
- * @name: The name of the option (format: <plugin>:<option> or just <option>)
- * @val: (optional) the value for the option
- *
- * Modify a plugin option. If @val is given than the value of the option
- * is set (note, some options just take a boolean, so @val must be either
- * "1" or "0" or "true" or "false").
- */
-int tep_plugin_add_option(const char *name, const char *val)
-{
-	struct trace_plugin_options *op;
-	char *option_str;
-	char *plugin;
-
-	option_str = strdup(name);
-	if (!option_str)
-		return -ENOMEM;
-
-	if (parse_option_name(&option_str, &plugin) < 0)
-		return -ENOMEM;
-
-	/* If the option exists, update the val */
-	for (op = trace_plugin_options; op; op = op->next) {
-		/* Both must be NULL or not NULL */
-		if ((!plugin || !op->plugin) && plugin != op->plugin)
-			continue;
-		if (plugin && strcmp(plugin, op->plugin) != 0)
-			continue;
-		if (strcmp(op->option, option_str) != 0)
-			continue;
-
-		/* update option */
-		free(op->value);
-		if (val) {
-			op->value = strdup(val);
-			if (!op->value)
-				goto out_free;
-		} else
-			op->value = NULL;
-
-		/* plugin and option_str don't get freed at the end */
-		free(plugin);
-		free(option_str);
-
-		plugin = op->plugin;
-		option_str = op->option;
-		break;
-	}
-
-	/* If not found, create */
-	if (!op) {
-		op = malloc(sizeof(*op));
-		if (!op)
-			goto out_free;
-		memset(op, 0, sizeof(*op));
-		op->plugin = plugin;
-		op->option = option_str;
-		if (val) {
-			op->value = strdup(val);
-			if (!op->value) {
-				free(op);
-				goto out_free;
-			}
-		}
-		op->next = trace_plugin_options;
-		trace_plugin_options = op;
-	}
-
-	return process_option(plugin, option_str, val);
-
-out_free:
-	free(plugin);
-	free(option_str);
-	return -ENOMEM;
-}
-
-static void print_op_data(struct trace_seq *s, const char *name,
-			  const char *op)
-{
-	if (op)
-		trace_seq_printf(s, "%8s:\t%s\n", name, op);
-}
-
-/**
- * tep_plugin_print_options - print out the registered plugin options
- * @s: The trace_seq descriptor to write the plugin options into
- *
- * Writes a list of options into trace_seq @s.
- */
-void tep_plugin_print_options(struct trace_seq *s)
-{
-	struct registered_plugin_options *reg;
-	struct tep_plugin_option *op;
-
-	for (reg = registered_options; reg; reg = reg->next) {
-		if (reg != registered_options)
-			trace_seq_printf(s, "============\n");
-		for (op = reg->options; op->name; op++) {
-			if (op != reg->options)
-				trace_seq_printf(s, "------------\n");
-			print_op_data(s, "file", op->file);
-			print_op_data(s, "plugin", op->plugin_alias);
-			print_op_data(s, "option", op->name);
-			print_op_data(s, "desc", op->description);
-			print_op_data(s, "value", op->value);
-			trace_seq_printf(s, "%8s:\t%d\n", "set", op->set);
-		}
-	}
-}
-
-/**
- * tep_print_plugins - print out the list of plugins loaded
- * @s: the trace_seq descripter to write to
- * @prefix: The prefix string to add before listing the option name
- * @suffix: The suffix string ot append after the option name
- * @list: The list of plugins (usually returned by tep_load_plugins()
- *
- * Writes to the trace_seq @s the list of plugins (files) that is
- * returned by tep_load_plugins(). Use @prefix and @suffix for formating:
- * @prefix = "  ", @suffix = "\n".
- */
-void tep_print_plugins(struct trace_seq *s,
-		       const char *prefix, const char *suffix,
-		       const struct tep_plugin_list *list)
-{
-	while (list) {
-		trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
-		list = list->next;
-	}
-}
-
-static void
-load_plugin(struct tep_handle *tep, const char *path,
-	    const char *file, void *data)
-{
-	struct tep_plugin_list **plugin_list = data;
-	struct tep_plugin_option *options;
-	tep_plugin_load_func func;
-	struct tep_plugin_list *list;
-	const char *alias;
-	char *plugin;
-	void *handle;
-	int ret;
-
-	ret = asprintf(&plugin, "%s/%s", path, file);
-	if (ret < 0) {
-		warning("could not allocate plugin memory\n");
-		return;
-	}
-
-	handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
-	if (!handle) {
-		warning("could not load plugin '%s'\n%s\n",
-			plugin, dlerror());
-		goto out_free;
-	}
-
-	alias = dlsym(handle, TEP_PLUGIN_ALIAS_NAME);
-	if (!alias)
-		alias = file;
-
-	options = dlsym(handle, TEP_PLUGIN_OPTIONS_NAME);
-	if (options) {
-		while (options->name) {
-			ret = update_option(alias, options);
-			if (ret < 0)
-				goto out_free;
-			options++;
-		}
-	}
-
-	func = dlsym(handle, TEP_PLUGIN_LOADER_NAME);
-	if (!func) {
-		warning("could not find func '%s' in plugin '%s'\n%s\n",
-			TEP_PLUGIN_LOADER_NAME, plugin, dlerror());
-		goto out_free;
-	}
-
-	list = malloc(sizeof(*list));
-	if (!list) {
-		warning("could not allocate plugin memory\n");
-		goto out_free;
-	}
-
-	list->next = *plugin_list;
-	list->handle = handle;
-	list->name = plugin;
-	*plugin_list = list;
-
-	pr_stat("registering plugin: %s", plugin);
-	func(tep);
-	return;
-
- out_free:
-	free(plugin);
-}
-
-static void
-load_plugins_dir(struct tep_handle *tep, const char *suffix,
-		 const char *path,
-		 void (*load_plugin)(struct tep_handle *tep,
-				     const char *path,
-				     const char *name,
-				     void *data),
-		 void *data)
-{
-	struct dirent *dent;
-	struct stat st;
-	DIR *dir;
-	int ret;
-
-	ret = stat(path, &st);
-	if (ret < 0)
-		return;
-
-	if (!S_ISDIR(st.st_mode))
-		return;
-
-	dir = opendir(path);
-	if (!dir)
-		return;
-
-	while ((dent = readdir(dir))) {
-		const char *name = dent->d_name;
-
-		if (strcmp(name, ".") == 0 ||
-		    strcmp(name, "..") == 0)
-			continue;
-
-		/* Only load plugins that end in suffix */
-		if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
-			continue;
-
-		load_plugin(tep, path, name, data);
-	}
-
-	closedir(dir);
-}
-
-/**
- * tep_load_plugins_hook - call a user specified callback to load a plugin
- * @tep: handler to traceevent context
- * @suffix: filter only plugin files with given suffix
- * @load_plugin: user specified callback, called for each plugin file
- * @data: custom context, passed to @load_plugin
- *
- * Searches for traceevent plugin files and calls @load_plugin for each
- * The order of plugins search is:
- *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_FIRST
- *  - Directory, specified at compile time with PLUGIN_TRACEEVENT_DIR
- *  - Directory, specified by environment variable TRACEEVENT_PLUGIN_DIR
- *  - In user's home: ~/.local/lib/traceevent/plugins/
- *  - Directories, specified in @tep->plugins_dir and priority TEP_PLUGIN_LAST
- *
- */
-void tep_load_plugins_hook(struct tep_handle *tep, const char *suffix,
-			   void (*load_plugin)(struct tep_handle *tep,
-					       const char *path,
-					       const char *name,
-					       void *data),
-			   void *data)
-{
-	struct tep_plugins_dir *dir = NULL;
-	char *home;
-	char *path;
-	char *envdir;
-	int ret;
-
-	if (tep && tep->flags & TEP_DISABLE_PLUGINS)
-		return;
-
-	if (tep)
-		dir = tep->plugins_dir;
-	while (dir) {
-		if (dir->prio == TEP_PLUGIN_FIRST)
-			load_plugins_dir(tep, suffix, dir->path,
-					 load_plugin, data);
-		dir = dir->next;
-	}
-
-	/*
-	 * If a system plugin directory was defined,
-	 * check that first.
-	 */
-#ifdef PLUGIN_DIR
-	if (!tep || !(tep->flags & TEP_DISABLE_SYS_PLUGINS))
-		load_plugins_dir(tep, suffix, PLUGIN_DIR,
-				 load_plugin, data);
-#endif
-
-	/*
-	 * Next let the environment-set plugin directory
-	 * override the system defaults.
-	 */
-	envdir = getenv("TRACEEVENT_PLUGIN_DIR");
-	if (envdir)
-		load_plugins_dir(tep, suffix, envdir, load_plugin, data);
-
-	/*
-	 * Now let the home directory override the environment
-	 * or system defaults.
-	 */
-	home = getenv("HOME");
-	if (!home)
-		return;
-
-	ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR);
-	if (ret < 0) {
-		warning("could not allocate plugin memory\n");
-		return;
-	}
-
-	load_plugins_dir(tep, suffix, path, load_plugin, data);
-
-	if (tep)
-		dir = tep->plugins_dir;
-	while (dir) {
-		if (dir->prio == TEP_PLUGIN_LAST)
-			load_plugins_dir(tep, suffix, dir->path,
-					 load_plugin, data);
-		dir = dir->next;
-	}
-
-	free(path);
-}
-
-struct tep_plugin_list*
-tep_load_plugins(struct tep_handle *tep)
-{
-	struct tep_plugin_list *list = NULL;
-
-	tep_load_plugins_hook(tep, ".so", load_plugin, &list);
-	return list;
-}
-
-/**
- * tep_add_plugin_path - Add a new plugin directory.
- * @tep: Trace event handler.
- * @path: Path to a directory. All plugin files in that
- *	  directory will be loaded.
- *@prio: Load priority of the plugins in that directory.
- *
- * Returns -1 in case of an error, 0 otherwise.
- */
-int tep_add_plugin_path(struct tep_handle *tep, char *path,
-			enum tep_plugin_load_priority prio)
-{
-	struct tep_plugins_dir *dir;
-
-	if (!tep || !path)
-		return -1;
-
-	dir = calloc(1, sizeof(*dir));
-	if (!dir)
-		return -1;
-
-	dir->path = strdup(path);
-	if (!dir->path) {
-		free(dir);
-		return -1;
-	}
-	dir->prio = prio;
-	dir->next = tep->plugins_dir;
-	tep->plugins_dir = dir;
-
-	return 0;
-}
-
-__hidden void free_tep_plugin_paths(struct tep_handle *tep)
-{
-	struct tep_plugins_dir *dir;
-
-	if (!tep)
-		return;
-
-	dir = tep->plugins_dir;
-	while (dir) {
-		tep->plugins_dir = tep->plugins_dir->next;
-		free(dir->path);
-		free(dir);
-		dir = tep->plugins_dir;
-	}
-}
-
-void
-tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep)
-{
-	tep_plugin_unload_func func;
-	struct tep_plugin_list *list;
-
-	while (plugin_list) {
-		list = plugin_list;
-		plugin_list = list->next;
-		func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME);
-		if (func)
-			func(tep);
-		dlclose(list->handle);
-		free(list->name);
-		free(list);
-	}
-}
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
deleted file mode 100644
index 0560b96a31d1b..0000000000000
--- a/tools/lib/traceevent/event-utils.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef __UTIL_H
-#define __UTIL_H
-
-#include <ctype.h>
-
-/* Can be overridden */
-void warning(const char *fmt, ...);
-void pr_stat(const char *fmt, ...);
-void vpr_stat(const char *fmt, va_list ap);
-
-/* Always available */
-void __warning(const char *fmt, ...);
-void __pr_stat(const char *fmt, ...);
-
-void __vwarning(const char *fmt, ...);
-void __vpr_stat(const char *fmt, ...);
-
-#define min(x, y) ({				\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	(void) (&_min1 == &_min2);		\
-	_min1 < _min2 ? _min1 : _min2; })
-
-static inline char *strim(char *string)
-{
-	char *ret;
-
-	if (!string)
-		return NULL;
-	while (*string) {
-		if (!isspace(*string))
-			break;
-		string++;
-	}
-	ret = string;
-
-	string = ret + strlen(ret) - 1;
-	while (string > ret) {
-		if (!isspace(*string))
-			break;
-		string--;
-	}
-	string[1] = 0;
-
-	return ret;
-}
-
-static inline int has_text(const char *text)
-{
-	if (!text)
-		return 0;
-
-	while (*text) {
-		if (!isspace(*text))
-			return 1;
-		text++;
-	}
-
-	return 0;
-}
-
-#endif
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
deleted file mode 100644
index f1640d651c8a8..0000000000000
--- a/tools/lib/traceevent/kbuffer-parse.c
+++ /dev/null
@@ -1,809 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "kbuffer.h"
-
-#define MISSING_EVENTS (1UL << 31)
-#define MISSING_STORED (1UL << 30)
-
-#define COMMIT_MASK ((1 << 27) - 1)
-
-enum {
-	KBUFFER_FL_HOST_BIG_ENDIAN	= (1<<0),
-	KBUFFER_FL_BIG_ENDIAN		= (1<<1),
-	KBUFFER_FL_LONG_8		= (1<<2),
-	KBUFFER_FL_OLD_FORMAT		= (1<<3),
-};
-
-#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
-
-/** kbuffer
- * @timestamp		- timestamp of current event
- * @lost_events		- # of lost events between this subbuffer and previous
- * @flags		- special flags of the kbuffer
- * @subbuffer		- pointer to the sub-buffer page
- * @data		- pointer to the start of data on the sub-buffer page
- * @index		- index from @data to the @curr event data
- * @curr		- offset from @data to the start of current event
- *			   (includes metadata)
- * @next		- offset from @data to the start of next event
- * @size		- The size of data on @data
- * @start		- The offset from @subbuffer where @data lives
- *
- * @read_4		- Function to read 4 raw bytes (may swap)
- * @read_8		- Function to read 8 raw bytes (may swap)
- * @read_long		- Function to read a long word (4 or 8 bytes with needed swap)
- */
-struct kbuffer {
-	unsigned long long 	timestamp;
-	long long		lost_events;
-	unsigned long		flags;
-	void			*subbuffer;
-	void			*data;
-	unsigned int		index;
-	unsigned int		curr;
-	unsigned int		next;
-	unsigned int		size;
-	unsigned int		start;
-
-	unsigned int (*read_4)(void *ptr);
-	unsigned long long (*read_8)(void *ptr);
-	unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
-	int (*next_event)(struct kbuffer *kbuf);
-};
-
-static void *zmalloc(size_t size)
-{
-	return calloc(1, size);
-}
-
-static int host_is_bigendian(void)
-{
-	unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
-	unsigned int *ptr;
-
-	ptr = (unsigned int *)str;
-	return *ptr == 0x01020304;
-}
-
-static int do_swap(struct kbuffer *kbuf)
-{
-	return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
-		ENDIAN_MASK;
-}
-
-static unsigned long long __read_8(void *ptr)
-{
-	unsigned long long data = *(unsigned long long *)ptr;
-
-	return data;
-}
-
-static unsigned long long __read_8_sw(void *ptr)
-{
-	unsigned long long data = *(unsigned long long *)ptr;
-	unsigned long long swap;
-
-	swap = ((data & 0xffULL) << 56) |
-		((data & (0xffULL << 8)) << 40) |
-		((data & (0xffULL << 16)) << 24) |
-		((data & (0xffULL << 24)) << 8) |
-		((data & (0xffULL << 32)) >> 8) |
-		((data & (0xffULL << 40)) >> 24) |
-		((data & (0xffULL << 48)) >> 40) |
-		((data & (0xffULL << 56)) >> 56);
-
-	return swap;
-}
-
-static unsigned int __read_4(void *ptr)
-{
-	unsigned int data = *(unsigned int *)ptr;
-
-	return data;
-}
-
-static unsigned int __read_4_sw(void *ptr)
-{
-	unsigned int data = *(unsigned int *)ptr;
-	unsigned int swap;
-
-	swap = ((data & 0xffULL) << 24) |
-		((data & (0xffULL << 8)) << 8) |
-		((data & (0xffULL << 16)) >> 8) |
-		((data & (0xffULL << 24)) >> 24);
-
-	return swap;
-}
-
-static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
-{
-	return kbuf->read_8(ptr);
-}
-
-static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
-{
-	return kbuf->read_4(ptr);
-}
-
-static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
-{
-	return kbuf->read_8(ptr);
-}
-
-static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
-{
-	return kbuf->read_4(ptr);
-}
-
-static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
-{
-	return kbuf->read_long(kbuf, ptr);
-}
-
-static int calc_index(struct kbuffer *kbuf, void *ptr)
-{
-	return (unsigned long)ptr - (unsigned long)kbuf->data;
-}
-
-static int __next_event(struct kbuffer *kbuf);
-
-/**
- * kbuffer_alloc - allocat a new kbuffer
- * @size;	enum to denote size of word
- * @endian:	enum to denote endianness
- *
- * Allocates and returns a new kbuffer.
- */
-struct kbuffer *
-kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
-{
-	struct kbuffer *kbuf;
-	int flags = 0;
-
-	switch (size) {
-	case KBUFFER_LSIZE_4:
-		break;
-	case KBUFFER_LSIZE_8:
-		flags |= KBUFFER_FL_LONG_8;
-		break;
-	default:
-		return NULL;
-	}
-
-	switch (endian) {
-	case KBUFFER_ENDIAN_LITTLE:
-		break;
-	case KBUFFER_ENDIAN_BIG:
-		flags |= KBUFFER_FL_BIG_ENDIAN;
-		break;
-	default:
-		return NULL;
-	}
-
-	kbuf = zmalloc(sizeof(*kbuf));
-	if (!kbuf)
-		return NULL;
-
-	kbuf->flags = flags;
-
-	if (host_is_bigendian())
-		kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
-
-	if (do_swap(kbuf)) {
-		kbuf->read_8 = __read_8_sw;
-		kbuf->read_4 = __read_4_sw;
-	} else {
-		kbuf->read_8 = __read_8;
-		kbuf->read_4 = __read_4;
-	}
-
-	if (kbuf->flags & KBUFFER_FL_LONG_8)
-		kbuf->read_long = __read_long_8;
-	else
-		kbuf->read_long = __read_long_4;
-
-	/* May be changed by kbuffer_set_old_format() */
-	kbuf->next_event = __next_event;
-
-	return kbuf;
-}
-
-/** kbuffer_free - free an allocated kbuffer
- * @kbuf:	The kbuffer to free
- *
- * Can take NULL as a parameter.
- */
-void kbuffer_free(struct kbuffer *kbuf)
-{
-	free(kbuf);
-}
-
-static unsigned int type4host(struct kbuffer *kbuf,
-			      unsigned int type_len_ts)
-{
-	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
-		return (type_len_ts >> 29) & 3;
-	else
-		return type_len_ts & 3;
-}
-
-static unsigned int len4host(struct kbuffer *kbuf,
-			     unsigned int type_len_ts)
-{
-	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
-		return (type_len_ts >> 27) & 7;
-	else
-		return (type_len_ts >> 2) & 7;
-}
-
-static unsigned int type_len4host(struct kbuffer *kbuf,
-				  unsigned int type_len_ts)
-{
-	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
-		return (type_len_ts >> 27) & ((1 << 5) - 1);
-	else
-		return type_len_ts & ((1 << 5) - 1);
-}
-
-static unsigned int ts4host(struct kbuffer *kbuf,
-			    unsigned int type_len_ts)
-{
-	if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
-		return type_len_ts & ((1 << 27) - 1);
-	else
-		return type_len_ts >> 5;
-}
-
-/*
- * Linux 2.6.30 and earlier (not much ealier) had a different
- * ring buffer format. It should be obsolete, but we handle it anyway.
- */
-enum old_ring_buffer_type {
-	OLD_RINGBUF_TYPE_PADDING,
-	OLD_RINGBUF_TYPE_TIME_EXTEND,
-	OLD_RINGBUF_TYPE_TIME_STAMP,
-	OLD_RINGBUF_TYPE_DATA,
-};
-
-static unsigned int old_update_pointers(struct kbuffer *kbuf)
-{
-	unsigned long long extend;
-	unsigned int type_len_ts;
-	unsigned int type;
-	unsigned int len;
-	unsigned int delta;
-	unsigned int length;
-	void *ptr = kbuf->data + kbuf->curr;
-
-	type_len_ts = read_4(kbuf, ptr);
-	ptr += 4;
-
-	type = type4host(kbuf, type_len_ts);
-	len = len4host(kbuf, type_len_ts);
-	delta = ts4host(kbuf, type_len_ts);
-
-	switch (type) {
-	case OLD_RINGBUF_TYPE_PADDING:
-		kbuf->next = kbuf->size;
-		return 0;
-
-	case OLD_RINGBUF_TYPE_TIME_EXTEND:
-		extend = read_4(kbuf, ptr);
-		extend <<= TS_SHIFT;
-		extend += delta;
-		delta = extend;
-		ptr += 4;
-		length = 0;
-		break;
-
-	case OLD_RINGBUF_TYPE_TIME_STAMP:
-		/* should never happen! */
-		kbuf->curr = kbuf->size;
-		kbuf->next = kbuf->size;
-		kbuf->index = kbuf->size;
-		return -1;
-	default:
-		if (len)
-			length = len * 4;
-		else {
-			length = read_4(kbuf, ptr);
-			length -= 4;
-			ptr += 4;
-		}
-		break;
-	}
-
-	kbuf->timestamp += delta;
-	kbuf->index = calc_index(kbuf, ptr);
-	kbuf->next = kbuf->index + length;
-
-	return type;
-}
-
-static int __old_next_event(struct kbuffer *kbuf)
-{
-	int type;
-
-	do {
-		kbuf->curr = kbuf->next;
-		if (kbuf->next >= kbuf->size)
-			return -1;
-		type = old_update_pointers(kbuf);
-	} while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
-
-	return 0;
-}
-
-static unsigned int
-translate_data(struct kbuffer *kbuf, void *data, void **rptr,
-	       unsigned long long *delta, int *length)
-{
-	unsigned long long extend;
-	unsigned int type_len_ts;
-	unsigned int type_len;
-
-	type_len_ts = read_4(kbuf, data);
-	data += 4;
-
-	type_len = type_len4host(kbuf, type_len_ts);
-	*delta = ts4host(kbuf, type_len_ts);
-
-	switch (type_len) {
-	case KBUFFER_TYPE_PADDING:
-		*length = read_4(kbuf, data);
-		break;
-
-	case KBUFFER_TYPE_TIME_EXTEND:
-	case KBUFFER_TYPE_TIME_STAMP:
-		extend = read_4(kbuf, data);
-		data += 4;
-		extend <<= TS_SHIFT;
-		extend += *delta;
-		*delta = extend;
-		*length = 0;
-		break;
-
-	case 0:
-		*length = read_4(kbuf, data) - 4;
-		*length = (*length + 3) & ~3;
-		data += 4;
-		break;
-	default:
-		*length = type_len * 4;
-		break;
-	}
-
-	*rptr = data;
-
-	return type_len;
-}
-
-static unsigned int update_pointers(struct kbuffer *kbuf)
-{
-	unsigned long long delta;
-	unsigned int type_len;
-	int length;
-	void *ptr = kbuf->data + kbuf->curr;
-
-	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
-
-	if (type_len == KBUFFER_TYPE_TIME_STAMP)
-		kbuf->timestamp = delta;
-	else
-		kbuf->timestamp += delta;
-
-	kbuf->index = calc_index(kbuf, ptr);
-	kbuf->next = kbuf->index + length;
-
-	return type_len;
-}
-
-/**
- * kbuffer_translate_data - read raw data to get a record
- * @swap:	Set to 1 if bytes in words need to be swapped when read
- * @data:	The raw data to read
- * @size:	Address to store the size of the event data.
- *
- * Returns a pointer to the event data. To determine the entire
- * record size (record metadata + data) just add the difference between
- * @data and the returned value to @size.
- */
-void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
-{
-	unsigned long long delta;
-	struct kbuffer kbuf;
-	int type_len;
-	int length;
-	void *ptr;
-
-	if (swap) {
-		kbuf.read_8 = __read_8_sw;
-		kbuf.read_4 = __read_4_sw;
-		kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
-	} else {
-		kbuf.read_8 = __read_8;
-		kbuf.read_4 = __read_4;
-		kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
-	}
-
-	type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
-	switch (type_len) {
-	case KBUFFER_TYPE_PADDING:
-	case KBUFFER_TYPE_TIME_EXTEND:
-	case KBUFFER_TYPE_TIME_STAMP:
-		return NULL;
-	}
-
-	*size = length;
-
-	return ptr;
-}
-
-static int __next_event(struct kbuffer *kbuf)
-{
-	int type;
-
-	do {
-		kbuf->curr = kbuf->next;
-		if (kbuf->next >= kbuf->size)
-			return -1;
-		type = update_pointers(kbuf);
-	} while (type == KBUFFER_TYPE_TIME_EXTEND ||
-		 type == KBUFFER_TYPE_TIME_STAMP ||
-		 type == KBUFFER_TYPE_PADDING);
-
-	return 0;
-}
-
-static int next_event(struct kbuffer *kbuf)
-{
-	return kbuf->next_event(kbuf);
-}
-
-/**
- * kbuffer_next_event - increment the current pointer
- * @kbuf:	The kbuffer to read
- * @ts:		Address to store the next record's timestamp (may be NULL to ignore)
- *
- * Increments the pointers into the subbuffer of the kbuffer to point to the
- * next event so that the next kbuffer_read_event() will return a
- * new event.
- *
- * Returns the data of the next event if a new event exists on the subbuffer,
- * NULL otherwise.
- */
-void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
-{
-	int ret;
-
-	if (!kbuf || !kbuf->subbuffer)
-		return NULL;
-
-	ret = next_event(kbuf);
-	if (ret < 0)
-		return NULL;
-
-	if (ts)
-		*ts = kbuf->timestamp;
-
-	return kbuf->data + kbuf->index;
-}
-
-/**
- * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
- * @kbuf:	The kbuffer to load
- * @subbuffer:	The subbuffer to load into @kbuf.
- *
- * Load a new subbuffer (page) into @kbuf. This will reset all
- * the pointers and update the @kbuf timestamp. The next read will
- * return the first event on @subbuffer.
- *
- * Returns 0 on succes, -1 otherwise.
- */
-int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
-{
-	unsigned long long flags;
-	void *ptr = subbuffer;
-
-	if (!kbuf || !subbuffer)
-		return -1;
-
-	kbuf->subbuffer = subbuffer;
-
-	kbuf->timestamp = read_8(kbuf, ptr);
-	ptr += 8;
-
-	kbuf->curr = 0;
-
-	if (kbuf->flags & KBUFFER_FL_LONG_8)
-		kbuf->start = 16;
-	else
-		kbuf->start = 12;
-
-	kbuf->data = subbuffer + kbuf->start;
-
-	flags = read_long(kbuf, ptr);
-	kbuf->size = (unsigned int)flags & COMMIT_MASK;
-
-	if (flags & MISSING_EVENTS) {
-		if (flags & MISSING_STORED) {
-			ptr = kbuf->data + kbuf->size;
-			kbuf->lost_events = read_long(kbuf, ptr);
-		} else
-			kbuf->lost_events = -1;
-	} else
-		kbuf->lost_events = 0;
-
-	kbuf->index = 0;
-	kbuf->next = 0;
-
-	next_event(kbuf);
-
-	return 0;
-}
-
-/**
- * kbuffer_subbuf_timestamp - read the timestamp from a sub buffer
- * @kbuf:      The kbuffer to load
- * @subbuf:    The subbuffer to read from.
- *
- * Return the timestamp from a subbuffer.
- */
-unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf)
-{
-	return kbuf->read_8(subbuf);
-}
-
-/**
- * kbuffer_ptr_delta - read the delta field from a record
- * @kbuf:      The kbuffer to load
- * @ptr:       The record in the buffe.
- *
- * Return the timestamp delta from a record
- */
-unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr)
-{
-	unsigned int type_len_ts;
-
-	type_len_ts = read_4(kbuf, ptr);
-	return ts4host(kbuf, type_len_ts);
-}
-
-
-/**
- * kbuffer_read_event - read the next event in the kbuffer subbuffer
- * @kbuf:	The kbuffer to read from
- * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
- *
- * Returns a pointer to the data part of the current event.
- * NULL if no event is left on the subbuffer.
- */
-void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
-{
-	if (!kbuf || !kbuf->subbuffer)
-		return NULL;
-
-	if (kbuf->curr >= kbuf->size)
-		return NULL;
-
-	if (ts)
-		*ts = kbuf->timestamp;
-	return kbuf->data + kbuf->index;
-}
-
-/**
- * kbuffer_timestamp - Return the timestamp of the current event
- * @kbuf:	The kbuffer to read from
- *
- * Returns the timestamp of the current (next) event.
- */
-unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
-{
-	return kbuf->timestamp;
-}
-
-/**
- * kbuffer_read_at_offset - read the event that is at offset
- * @kbuf:	The kbuffer to read from
- * @offset:	The offset into the subbuffer
- * @ts:		The address to store the timestamp of the event (may be NULL to ignore)
- *
- * The @offset must be an index from the @kbuf subbuffer beginning.
- * If @offset is bigger than the stored subbuffer, NULL will be returned.
- *
- * Returns the data of the record that is at @offset. Note, @offset does
- * not need to be the start of the record, the offset just needs to be
- * in the record (or beginning of it).
- *
- * Note, the kbuf timestamp and pointers are updated to the
- * returned record. That is, kbuffer_read_event() will return the same
- * data and timestamp, and kbuffer_next_event() will increment from
- * this record.
- */
-void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
-			     unsigned long long *ts)
-{
-	void *data;
-
-	if (offset < kbuf->start)
-		offset = 0;
-	else
-		offset -= kbuf->start;
-
-	/* Reset the buffer */
-	kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
-	data = kbuffer_read_event(kbuf, ts);
-
-	while (kbuf->curr < offset) {
-		data = kbuffer_next_event(kbuf, ts);
-		if (!data)
-			break;
-	}
-
-	return data;
-}
-
-/**
- * kbuffer_subbuffer_size - the size of the loaded subbuffer
- * @kbuf:	The kbuffer to read from
- *
- * Returns the size of the subbuffer. Note, this size is
- * where the last event resides. The stored subbuffer may actually be
- * bigger due to padding and such.
- */
-int kbuffer_subbuffer_size(struct kbuffer *kbuf)
-{
-	return kbuf->size;
-}
-
-/**
- * kbuffer_curr_index - Return the index of the record
- * @kbuf:	The kbuffer to read from
- *
- * Returns the index from the start of the data part of
- * the subbuffer to the current location. Note this is not
- * from the start of the subbuffer. An index of zero will
- * point to the first record. Use kbuffer_curr_offset() for
- * the actually offset (that can be used by kbuffer_read_at_offset())
- */
-int kbuffer_curr_index(struct kbuffer *kbuf)
-{
-	return kbuf->curr;
-}
-
-/**
- * kbuffer_curr_offset - Return the offset of the record
- * @kbuf:	The kbuffer to read from
- *
- * Returns the offset from the start of the subbuffer to the
- * current location.
- */
-int kbuffer_curr_offset(struct kbuffer *kbuf)
-{
-	return kbuf->curr + kbuf->start;
-}
-
-/**
- * kbuffer_event_size - return the size of the event data
- * @kbuf:	The kbuffer to read
- *
- * Returns the size of the event data (the payload not counting
- * the meta data of the record) of the current event.
- */
-int kbuffer_event_size(struct kbuffer *kbuf)
-{
-	return kbuf->next - kbuf->index;
-}
-
-/**
- * kbuffer_curr_size - return the size of the entire record
- * @kbuf:	The kbuffer to read
- *
- * Returns the size of the entire record (meta data and payload)
- * of the current event.
- */
-int kbuffer_curr_size(struct kbuffer *kbuf)
-{
-	return kbuf->next - kbuf->curr;
-}
-
-/**
- * kbuffer_missed_events - return the # of missed events from last event.
- * @kbuf: 	The kbuffer to read from
- *
- * Returns the # of missed events (if recorded) before the current
- * event. Note, only events on the beginning of a subbuffer can
- * have missed events, all other events within the buffer will be
- * zero.
- */
-int kbuffer_missed_events(struct kbuffer *kbuf)
-{
-	/* Only the first event can have missed events */
-	if (kbuf->curr)
-		return 0;
-
-	return kbuf->lost_events;
-}
-
-/**
- * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
- * @kbuf:	The kbuffer to set
- *
- * This is obsolete (or should be). The first kernels to use the
- * new ring buffer had a slightly different ring buffer format
- * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
- * but should not be counted on in the future.
- */
-void kbuffer_set_old_format(struct kbuffer *kbuf)
-{
-	kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
-
-	kbuf->next_event = __old_next_event;
-}
-
-/**
- * kbuffer_start_of_data - return offset of where data starts on subbuffer
- * @kbuf:	The kbuffer
- *
- * Returns the location on the subbuffer where the data starts.
- */
-int kbuffer_start_of_data(struct kbuffer *kbuf)
-{
-	return kbuf->start;
-}
-
-/**
- * kbuffer_raw_get - get raw buffer info
- * @kbuf:	The kbuffer
- * @subbuf:	Start of mapped subbuffer
- * @info:	Info descriptor to fill in
- *
- * For debugging. This can return internals of the ring buffer.
- * Expects to have info->next set to what it will read.
- * The type, length and timestamp delta will be filled in, and
- * @info->next will be updated to the next element.
- * The @subbuf is used to know if the info is passed the end of
- * data and NULL will be returned if it is.
- */
-struct kbuffer_raw_info *
-kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info)
-{
-	unsigned long long flags;
-	unsigned long long delta;
-	unsigned int type_len;
-	unsigned int size;
-	int start;
-	int length;
-	void *ptr = info->next;
-
-	if (!kbuf || !subbuf)
-		return NULL;
-
-	if (kbuf->flags & KBUFFER_FL_LONG_8)
-		start = 16;
-	else
-		start = 12;
-
-	flags = read_long(kbuf, subbuf + 8);
-	size = (unsigned int)flags & COMMIT_MASK;
-
-	if (ptr < subbuf || ptr >= subbuf + start + size)
-		return NULL;
-
-	type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
-
-	info->next = ptr + length;
-
-	info->type = type_len;
-	info->delta = delta;
-	info->length = length;
-
-	return info;
-}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
deleted file mode 100644
index a2b522093cfdd..0000000000000
--- a/tools/lib/traceevent/kbuffer.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#ifndef _KBUFFER_H
-#define _KBUFFER_H
-
-#ifndef TS_SHIFT
-#define TS_SHIFT		27
-#endif
-
-enum kbuffer_endian {
-	KBUFFER_ENDIAN_BIG,
-	KBUFFER_ENDIAN_LITTLE,
-};
-
-enum kbuffer_long_size {
-	KBUFFER_LSIZE_4,
-	KBUFFER_LSIZE_8,
-};
-
-enum {
-	KBUFFER_TYPE_PADDING		= 29,
-	KBUFFER_TYPE_TIME_EXTEND	= 30,
-	KBUFFER_TYPE_TIME_STAMP		= 31,
-};
-
-struct kbuffer;
-
-struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
-void kbuffer_free(struct kbuffer *kbuf);
-int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
-void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
-void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
-unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
-unsigned long long kbuffer_subbuf_timestamp(struct kbuffer *kbuf, void *subbuf);
-unsigned int kbuffer_ptr_delta(struct kbuffer *kbuf, void *ptr);
-
-void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
-
-void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
-
-int kbuffer_curr_index(struct kbuffer *kbuf);
-
-int kbuffer_curr_offset(struct kbuffer *kbuf);
-int kbuffer_curr_size(struct kbuffer *kbuf);
-int kbuffer_event_size(struct kbuffer *kbuf);
-int kbuffer_missed_events(struct kbuffer *kbuf);
-int kbuffer_subbuffer_size(struct kbuffer *kbuf);
-
-void kbuffer_set_old_format(struct kbuffer *kbuf);
-int kbuffer_start_of_data(struct kbuffer *kbuf);
-
-/* Debugging */
-
-struct kbuffer_raw_info {
-	int			type;
-	int			length;
-	unsigned long long	delta;
-	void			*next;
-};
-
-/* Read raw data */
-struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf,
-					 struct kbuffer_raw_info *info);
-
-#endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/libtraceevent.pc.template b/tools/lib/traceevent/libtraceevent.pc.template
deleted file mode 100644
index 86384fcd57f14..0000000000000
--- a/tools/lib/traceevent/libtraceevent.pc.template
+++ /dev/null
@@ -1,10 +0,0 @@
-prefix=INSTALL_PREFIX
-libdir=LIB_DIR
-includedir=HEADER_DIR
-
-Name: libtraceevent
-URL: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
-Description: Linux kernel trace event library
-Version: LIB_VERSION
-Cflags: -I${includedir}
-Libs: -L${libdir} -ltraceevent
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
deleted file mode 100644
index 5df177070d53a..0000000000000
--- a/tools/lib/traceevent/parse-filter.c
+++ /dev/null
@@ -1,2281 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-#include <sys/types.h>
-
-#include "event-parse.h"
-#include "event-parse-local.h"
-#include "event-utils.h"
-
-#define COMM "COMM"
-#define CPU "CPU"
-
-static struct tep_format_field comm = {
-	.name = "COMM",
-};
-
-static struct tep_format_field cpu = {
-	.name = "CPU",
-};
-
-struct event_list {
-	struct event_list	*next;
-	struct tep_event	*event;
-};
-
-static void show_error(char *error_buf, const char *fmt, ...)
-{
-	unsigned long long index;
-	const char *input;
-	va_list ap;
-	int len;
-	int i;
-
-	input = get_input_buf();
-	index = get_input_buf_ptr();
-	len = input ? strlen(input) : 0;
-
-	if (len) {
-		strcpy(error_buf, input);
-		error_buf[len] = '\n';
-		for (i = 1; i < len && i < index; i++)
-			error_buf[len+i] = ' ';
-		error_buf[len + i] = '^';
-		error_buf[len + i + 1] = '\n';
-		len += i+2;
-	}
-
-	va_start(ap, fmt);
-	vsnprintf(error_buf + len, TEP_FILTER_ERROR_BUFSZ - len, fmt, ap);
-	va_end(ap);
-}
-
-static enum tep_event_type filter_read_token(char **tok)
-{
-	enum tep_event_type type;
-	char *token = NULL;
-
-	do {
-		free_token(token);
-		type = read_token(&token);
-	} while (type == TEP_EVENT_NEWLINE || type == TEP_EVENT_SPACE);
-
-	/* If token is = or ! check to see if the next char is ~ */
-	if (token &&
-	    (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
-	    peek_char() == '~') {
-		/* append it */
-		*tok = malloc(3);
-		if (*tok == NULL) {
-			free_token(token);
-			return TEP_EVENT_ERROR;
-		}
-		sprintf(*tok, "%c%c", *token, '~');
-		free_token(token);
-		/* Now remove the '~' from the buffer */
-		read_token(&token);
-		free_token(token);
-	} else
-		*tok = token;
-
-	return type;
-}
-
-static int filter_cmp(const void *a, const void *b)
-{
-	const struct tep_filter_type *ea = a;
-	const struct tep_filter_type *eb = b;
-
-	if (ea->event_id < eb->event_id)
-		return -1;
-
-	if (ea->event_id > eb->event_id)
-		return 1;
-
-	return 0;
-}
-
-static struct tep_filter_type *
-find_filter_type(struct tep_event_filter *filter, int id)
-{
-	struct tep_filter_type *filter_type;
-	struct tep_filter_type key;
-
-	key.event_id = id;
-
-	filter_type = bsearch(&key, filter->event_filters,
-			      filter->filters,
-			      sizeof(*filter->event_filters),
-			      filter_cmp);
-
-	return filter_type;
-}
-
-static struct tep_filter_type *
-add_filter_type(struct tep_event_filter *filter, int id)
-{
-	struct tep_filter_type *filter_type;
-	int i;
-
-	filter_type = find_filter_type(filter, id);
-	if (filter_type)
-		return filter_type;
-
-	filter_type = realloc(filter->event_filters,
-			      sizeof(*filter->event_filters) *
-			      (filter->filters + 1));
-	if (!filter_type)
-		return NULL;
-
-	filter->event_filters = filter_type;
-
-	for (i = 0; i < filter->filters; i++) {
-		if (filter->event_filters[i].event_id > id)
-			break;
-	}
-
-	if (i < filter->filters)
-		memmove(&filter->event_filters[i+1],
-			&filter->event_filters[i],
-			sizeof(*filter->event_filters) *
-			(filter->filters - i));
-
-	filter_type = &filter->event_filters[i];
-	filter_type->event_id = id;
-	filter_type->event = tep_find_event(filter->tep, id);
-	filter_type->filter = NULL;
-
-	filter->filters++;
-
-	return filter_type;
-}
-
-/**
- * tep_filter_alloc - create a new event filter
- * @tep: The tep that this filter is associated with
- */
-struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep)
-{
-	struct tep_event_filter *filter;
-
-	filter = malloc(sizeof(*filter));
-	if (filter == NULL)
-		return NULL;
-
-	memset(filter, 0, sizeof(*filter));
-	filter->tep = tep;
-	tep_ref(tep);
-
-	return filter;
-}
-
-static struct tep_filter_arg *allocate_arg(void)
-{
-	return calloc(1, sizeof(struct tep_filter_arg));
-}
-
-static void free_arg(struct tep_filter_arg *arg)
-{
-	if (!arg)
-		return;
-
-	switch (arg->type) {
-	case TEP_FILTER_ARG_NONE:
-	case TEP_FILTER_ARG_BOOLEAN:
-		break;
-
-	case TEP_FILTER_ARG_NUM:
-		free_arg(arg->num.left);
-		free_arg(arg->num.right);
-		break;
-
-	case TEP_FILTER_ARG_EXP:
-		free_arg(arg->exp.left);
-		free_arg(arg->exp.right);
-		break;
-
-	case TEP_FILTER_ARG_STR:
-		free(arg->str.val);
-		regfree(&arg->str.reg);
-		free(arg->str.buffer);
-		break;
-
-	case TEP_FILTER_ARG_VALUE:
-		if (arg->value.type == TEP_FILTER_STRING ||
-		    arg->value.type == TEP_FILTER_CHAR)
-			free(arg->value.str);
-		break;
-
-	case TEP_FILTER_ARG_OP:
-		free_arg(arg->op.left);
-		free_arg(arg->op.right);
-	default:
-		break;
-	}
-
-	free(arg);
-}
-
-static int add_event(struct event_list **events,
-		     struct tep_event *event)
-{
-	struct event_list *list;
-
-	list = malloc(sizeof(*list));
-	if (list == NULL)
-		return -1;
-
-	list->next = *events;
-	*events = list;
-	list->event = event;
-	return 0;
-}
-
-static int event_match(struct tep_event *event,
-		       regex_t *sreg, regex_t *ereg)
-{
-	if (sreg) {
-		return !regexec(sreg, event->system, 0, NULL, 0) &&
-			!regexec(ereg, event->name, 0, NULL, 0);
-	}
-
-	return !regexec(ereg, event->system, 0, NULL, 0) ||
-		!regexec(ereg, event->name, 0, NULL, 0);
-}
-
-static enum tep_errno
-find_event(struct tep_handle *tep, struct event_list **events,
-	   char *sys_name, char *event_name)
-{
-	struct tep_event *event;
-	regex_t ereg;
-	regex_t sreg;
-	int match = 0;
-	int fail = 0;
-	char *reg;
-	int ret;
-	int i;
-
-	if (!event_name) {
-		/* if no name is given, then swap sys and name */
-		event_name = sys_name;
-		sys_name = NULL;
-	}
-
-	ret = asprintf(&reg, "^%s$", event_name);
-	if (ret < 0)
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-
-	ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
-	free(reg);
-
-	if (ret)
-		return TEP_ERRNO__INVALID_EVENT_NAME;
-
-	if (sys_name) {
-		ret = asprintf(&reg, "^%s$", sys_name);
-		if (ret < 0) {
-			regfree(&ereg);
-			return TEP_ERRNO__MEM_ALLOC_FAILED;
-		}
-
-		ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
-		free(reg);
-		if (ret) {
-			regfree(&ereg);
-			return TEP_ERRNO__INVALID_EVENT_NAME;
-		}
-	}
-
-	for (i = 0; i < tep->nr_events; i++) {
-		event = tep->events[i];
-		if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
-			match = 1;
-			if (add_event(events, event) < 0) {
-				fail = 1;
-				break;
-			}
-		}
-	}
-
-	regfree(&ereg);
-	if (sys_name)
-		regfree(&sreg);
-
-	if (!match)
-		return TEP_ERRNO__EVENT_NOT_FOUND;
-	if (fail)
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-
-	return 0;
-}
-
-static void free_events(struct event_list *events)
-{
-	struct event_list *event;
-
-	while (events) {
-		event = events;
-		events = events->next;
-		free(event);
-	}
-}
-
-static enum tep_errno
-create_arg_item(struct tep_event *event, const char *token,
-		enum tep_event_type type, struct tep_filter_arg **parg, char *error_str)
-{
-	struct tep_format_field *field;
-	struct tep_filter_arg *arg;
-
-	arg = allocate_arg();
-	if (arg == NULL) {
-		show_error(error_str, "failed to allocate filter arg");
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	switch (type) {
-
-	case TEP_EVENT_SQUOTE:
-	case TEP_EVENT_DQUOTE:
-		arg->type = TEP_FILTER_ARG_VALUE;
-		arg->value.type =
-			type == TEP_EVENT_DQUOTE ? TEP_FILTER_STRING : TEP_FILTER_CHAR;
-		arg->value.str = strdup(token);
-		if (!arg->value.str) {
-			free_arg(arg);
-			show_error(error_str, "failed to allocate string filter arg");
-			return TEP_ERRNO__MEM_ALLOC_FAILED;
-		}
-		break;
-	case TEP_EVENT_ITEM:
-		/* if it is a number, then convert it */
-		if (isdigit(token[0])) {
-			arg->type = TEP_FILTER_ARG_VALUE;
-			arg->value.type = TEP_FILTER_NUMBER;
-			arg->value.val = strtoull(token, NULL, 0);
-			break;
-		}
-		/* Consider this a field */
-		field = tep_find_any_field(event, token);
-		if (!field) {
-			/* If token is 'COMM' or 'CPU' then it is special */
-			if (strcmp(token, COMM) == 0) {
-				field = &comm;
-			} else if (strcmp(token, CPU) == 0) {
-				field = &cpu;
-			} else {
-				/* not a field, Make it false */
-				arg->type = TEP_FILTER_ARG_BOOLEAN;
-				arg->boolean.value = TEP_FILTER_FALSE;
-				break;
-			}
-		}
-		arg->type = TEP_FILTER_ARG_FIELD;
-		arg->field.field = field;
-		break;
-	default:
-		free_arg(arg);
-		show_error(error_str, "expected a value but found %s", token);
-		return TEP_ERRNO__UNEXPECTED_TYPE;
-	}
-	*parg = arg;
-	return 0;
-}
-
-static struct tep_filter_arg *
-create_arg_op(enum tep_filter_op_type btype)
-{
-	struct tep_filter_arg *arg;
-
-	arg = allocate_arg();
-	if (!arg)
-		return NULL;
-
-	arg->type = TEP_FILTER_ARG_OP;
-	arg->op.type = btype;
-
-	return arg;
-}
-
-static struct tep_filter_arg *
-create_arg_exp(enum tep_filter_exp_type etype)
-{
-	struct tep_filter_arg *arg;
-
-	arg = allocate_arg();
-	if (!arg)
-		return NULL;
-
-	arg->type = TEP_FILTER_ARG_EXP;
-	arg->exp.type = etype;
-
-	return arg;
-}
-
-static struct tep_filter_arg *
-create_arg_cmp(enum tep_filter_cmp_type ctype)
-{
-	struct tep_filter_arg *arg;
-
-	arg = allocate_arg();
-	if (!arg)
-		return NULL;
-
-	/* Use NUM and change if necessary */
-	arg->type = TEP_FILTER_ARG_NUM;
-	arg->num.type = ctype;
-
-	return arg;
-}
-
-static enum tep_errno
-add_right(struct tep_filter_arg *op, struct tep_filter_arg *arg, char *error_str)
-{
-	struct tep_filter_arg *left;
-	char *str;
-	int op_type;
-	int ret;
-
-	switch (op->type) {
-	case TEP_FILTER_ARG_EXP:
-		if (op->exp.right)
-			goto out_fail;
-		op->exp.right = arg;
-		break;
-
-	case TEP_FILTER_ARG_OP:
-		if (op->op.right)
-			goto out_fail;
-		op->op.right = arg;
-		break;
-
-	case TEP_FILTER_ARG_NUM:
-		if (op->op.right)
-			goto out_fail;
-		/*
-		 * The arg must be num, str, or field
-		 */
-		switch (arg->type) {
-		case TEP_FILTER_ARG_VALUE:
-		case TEP_FILTER_ARG_FIELD:
-			break;
-		default:
-			show_error(error_str, "Illegal rvalue");
-			return TEP_ERRNO__ILLEGAL_RVALUE;
-		}
-
-		/*
-		 * Depending on the type, we may need to
-		 * convert this to a string or regex.
-		 */
-		switch (arg->value.type) {
-		case TEP_FILTER_CHAR:
-			/*
-			 * A char should be converted to number if
-			 * the string is 1 byte, and the compare
-			 * is not a REGEX.
-			 */
-			if (strlen(arg->value.str) == 1 &&
-			    op->num.type != TEP_FILTER_CMP_REGEX &&
-			    op->num.type != TEP_FILTER_CMP_NOT_REGEX) {
-				arg->value.type = TEP_FILTER_NUMBER;
-				goto do_int;
-			}
-			/* fall through */
-		case TEP_FILTER_STRING:
-
-			/* convert op to a string arg */
-			op_type = op->num.type;
-			left = op->num.left;
-			str = arg->value.str;
-
-			/* reset the op for the new field */
-			memset(op, 0, sizeof(*op));
-
-			/*
-			 * If left arg was a field not found then
-			 * NULL the entire op.
-			 */
-			if (left->type == TEP_FILTER_ARG_BOOLEAN) {
-				free_arg(left);
-				free_arg(arg);
-				op->type = TEP_FILTER_ARG_BOOLEAN;
-				op->boolean.value = TEP_FILTER_FALSE;
-				break;
-			}
-
-			/* Left arg must be a field */
-			if (left->type != TEP_FILTER_ARG_FIELD) {
-				show_error(error_str,
-					   "Illegal lvalue for string comparison");
-				return TEP_ERRNO__ILLEGAL_LVALUE;
-			}
-
-			/* Make sure this is a valid string compare */
-			switch (op_type) {
-			case TEP_FILTER_CMP_EQ:
-				op_type = TEP_FILTER_CMP_MATCH;
-				break;
-			case TEP_FILTER_CMP_NE:
-				op_type = TEP_FILTER_CMP_NOT_MATCH;
-				break;
-
-			case TEP_FILTER_CMP_REGEX:
-			case TEP_FILTER_CMP_NOT_REGEX:
-				ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
-				if (ret) {
-					show_error(error_str,
-						   "RegEx '%s' did not compute",
-						   str);
-					return TEP_ERRNO__INVALID_REGEX;
-				}
-				break;
-			default:
-				show_error(error_str,
-					   "Illegal comparison for string");
-				return TEP_ERRNO__ILLEGAL_STRING_CMP;
-			}
-
-			op->type = TEP_FILTER_ARG_STR;
-			op->str.type = op_type;
-			op->str.field = left->field.field;
-			op->str.val = strdup(str);
-			if (!op->str.val) {
-				show_error(error_str, "Failed to allocate string filter");
-				return TEP_ERRNO__MEM_ALLOC_FAILED;
-			}
-			/*
-			 * Need a buffer to copy data for tests
-			 */
-			op->str.buffer = malloc(op->str.field->size + 1);
-			if (!op->str.buffer) {
-				show_error(error_str, "Failed to allocate string filter");
-				return TEP_ERRNO__MEM_ALLOC_FAILED;
-			}
-			/* Null terminate this buffer */
-			op->str.buffer[op->str.field->size] = 0;
-
-			/* We no longer have left or right args */
-			free_arg(arg);
-			free_arg(left);
-
-			break;
-
-		case TEP_FILTER_NUMBER:
-
- do_int:
-			switch (op->num.type) {
-			case TEP_FILTER_CMP_REGEX:
-			case TEP_FILTER_CMP_NOT_REGEX:
-				show_error(error_str,
-					   "Op not allowed with integers");
-				return TEP_ERRNO__ILLEGAL_INTEGER_CMP;
-
-			default:
-				break;
-			}
-
-			/* numeric compare */
-			op->num.right = arg;
-			break;
-		default:
-			goto out_fail;
-		}
-		break;
-	default:
-		goto out_fail;
-	}
-
-	return 0;
-
- out_fail:
-	show_error(error_str, "Syntax error");
-	return TEP_ERRNO__SYNTAX_ERROR;
-}
-
-static struct tep_filter_arg *
-rotate_op_right(struct tep_filter_arg *a, struct tep_filter_arg *b)
-{
-	struct tep_filter_arg *arg;
-
-	arg = a->op.right;
-	a->op.right = b;
-	return arg;
-}
-
-static enum tep_errno add_left(struct tep_filter_arg *op, struct tep_filter_arg *arg)
-{
-	switch (op->type) {
-	case TEP_FILTER_ARG_EXP:
-		if (arg->type == TEP_FILTER_ARG_OP)
-			arg = rotate_op_right(arg, op);
-		op->exp.left = arg;
-		break;
-
-	case TEP_FILTER_ARG_OP:
-		op->op.left = arg;
-		break;
-	case TEP_FILTER_ARG_NUM:
-		if (arg->type == TEP_FILTER_ARG_OP)
-			arg = rotate_op_right(arg, op);
-
-		/* left arg of compares must be a field */
-		if (arg->type != TEP_FILTER_ARG_FIELD &&
-		    arg->type != TEP_FILTER_ARG_BOOLEAN)
-			return TEP_ERRNO__INVALID_ARG_TYPE;
-		op->num.left = arg;
-		break;
-	default:
-		return TEP_ERRNO__INVALID_ARG_TYPE;
-	}
-	return 0;
-}
-
-enum op_type {
-	OP_NONE,
-	OP_BOOL,
-	OP_NOT,
-	OP_EXP,
-	OP_CMP,
-};
-
-static enum op_type process_op(const char *token,
-			       enum tep_filter_op_type *btype,
-			       enum tep_filter_cmp_type *ctype,
-			       enum tep_filter_exp_type *etype)
-{
-	*btype = TEP_FILTER_OP_NOT;
-	*etype = TEP_FILTER_EXP_NONE;
-	*ctype = TEP_FILTER_CMP_NONE;
-
-	if (strcmp(token, "&&") == 0)
-		*btype = TEP_FILTER_OP_AND;
-	else if (strcmp(token, "||") == 0)
-		*btype = TEP_FILTER_OP_OR;
-	else if (strcmp(token, "!") == 0)
-		return OP_NOT;
-
-	if (*btype != TEP_FILTER_OP_NOT)
-		return OP_BOOL;
-
-	/* Check for value expressions */
-	if (strcmp(token, "+") == 0) {
-		*etype = TEP_FILTER_EXP_ADD;
-	} else if (strcmp(token, "-") == 0) {
-		*etype = TEP_FILTER_EXP_SUB;
-	} else if (strcmp(token, "*") == 0) {
-		*etype = TEP_FILTER_EXP_MUL;
-	} else if (strcmp(token, "/") == 0) {
-		*etype = TEP_FILTER_EXP_DIV;
-	} else if (strcmp(token, "%") == 0) {
-		*etype = TEP_FILTER_EXP_MOD;
-	} else if (strcmp(token, ">>") == 0) {
-		*etype = TEP_FILTER_EXP_RSHIFT;
-	} else if (strcmp(token, "<<") == 0) {
-		*etype = TEP_FILTER_EXP_LSHIFT;
-	} else if (strcmp(token, "&") == 0) {
-		*etype = TEP_FILTER_EXP_AND;
-	} else if (strcmp(token, "|") == 0) {
-		*etype = TEP_FILTER_EXP_OR;
-	} else if (strcmp(token, "^") == 0) {
-		*etype = TEP_FILTER_EXP_XOR;
-	} else if (strcmp(token, "~") == 0)
-		*etype = TEP_FILTER_EXP_NOT;
-
-	if (*etype != TEP_FILTER_EXP_NONE)
-		return OP_EXP;
-
-	/* Check for compares */
-	if (strcmp(token, "==") == 0)
-		*ctype = TEP_FILTER_CMP_EQ;
-	else if (strcmp(token, "!=") == 0)
-		*ctype = TEP_FILTER_CMP_NE;
-	else if (strcmp(token, "<") == 0)
-		*ctype = TEP_FILTER_CMP_LT;
-	else if (strcmp(token, ">") == 0)
-		*ctype = TEP_FILTER_CMP_GT;
-	else if (strcmp(token, "<=") == 0)
-		*ctype = TEP_FILTER_CMP_LE;
-	else if (strcmp(token, ">=") == 0)
-		*ctype = TEP_FILTER_CMP_GE;
-	else if (strcmp(token, "=~") == 0)
-		*ctype = TEP_FILTER_CMP_REGEX;
-	else if (strcmp(token, "!~") == 0)
-		*ctype = TEP_FILTER_CMP_NOT_REGEX;
-	else
-		return OP_NONE;
-
-	return OP_CMP;
-}
-
-static int check_op_done(struct tep_filter_arg *arg)
-{
-	switch (arg->type) {
-	case TEP_FILTER_ARG_EXP:
-		return arg->exp.right != NULL;
-
-	case TEP_FILTER_ARG_OP:
-		return arg->op.right != NULL;
-
-	case TEP_FILTER_ARG_NUM:
-		return arg->num.right != NULL;
-
-	case TEP_FILTER_ARG_STR:
-		/* A string conversion is always done */
-		return 1;
-
-	case TEP_FILTER_ARG_BOOLEAN:
-		/* field not found, is ok */
-		return 1;
-
-	default:
-		return 0;
-	}
-}
-
-enum filter_vals {
-	FILTER_VAL_NORM,
-	FILTER_VAL_FALSE,
-	FILTER_VAL_TRUE,
-};
-
-static enum tep_errno
-reparent_op_arg(struct tep_filter_arg *parent, struct tep_filter_arg *old_child,
-		struct tep_filter_arg *arg, char *error_str)
-{
-	struct tep_filter_arg *other_child;
-	struct tep_filter_arg **ptr;
-
-	if (parent->type != TEP_FILTER_ARG_OP &&
-	    arg->type != TEP_FILTER_ARG_OP) {
-		show_error(error_str, "can not reparent other than OP");
-		return TEP_ERRNO__REPARENT_NOT_OP;
-	}
-
-	/* Get the sibling */
-	if (old_child->op.right == arg) {
-		ptr = &old_child->op.right;
-		other_child = old_child->op.left;
-	} else if (old_child->op.left == arg) {
-		ptr = &old_child->op.left;
-		other_child = old_child->op.right;
-	} else {
-		show_error(error_str, "Error in reparent op, find other child");
-		return TEP_ERRNO__REPARENT_FAILED;
-	}
-
-	/* Detach arg from old_child */
-	*ptr = NULL;
-
-	/* Check for root */
-	if (parent == old_child) {
-		free_arg(other_child);
-		*parent = *arg;
-		/* Free arg without recussion */
-		free(arg);
-		return 0;
-	}
-
-	if (parent->op.right == old_child)
-		ptr = &parent->op.right;
-	else if (parent->op.left == old_child)
-		ptr = &parent->op.left;
-	else {
-		show_error(error_str, "Error in reparent op");
-		return TEP_ERRNO__REPARENT_FAILED;
-	}
-
-	*ptr = arg;
-
-	free_arg(old_child);
-	return 0;
-}
-
-/* Returns either filter_vals (success) or tep_errno (failfure) */
-static int test_arg(struct tep_filter_arg *parent, struct tep_filter_arg *arg,
-		    char *error_str)
-{
-	int lval, rval;
-
-	switch (arg->type) {
-
-		/* bad case */
-	case TEP_FILTER_ARG_BOOLEAN:
-		return FILTER_VAL_FALSE + arg->boolean.value;
-
-		/* good cases: */
-	case TEP_FILTER_ARG_STR:
-	case TEP_FILTER_ARG_VALUE:
-	case TEP_FILTER_ARG_FIELD:
-		return FILTER_VAL_NORM;
-
-	case TEP_FILTER_ARG_EXP:
-		lval = test_arg(arg, arg->exp.left, error_str);
-		if (lval != FILTER_VAL_NORM)
-			return lval;
-		rval = test_arg(arg, arg->exp.right, error_str);
-		if (rval != FILTER_VAL_NORM)
-			return rval;
-		return FILTER_VAL_NORM;
-
-	case TEP_FILTER_ARG_NUM:
-		lval = test_arg(arg, arg->num.left, error_str);
-		if (lval != FILTER_VAL_NORM)
-			return lval;
-		rval = test_arg(arg, arg->num.right, error_str);
-		if (rval != FILTER_VAL_NORM)
-			return rval;
-		return FILTER_VAL_NORM;
-
-	case TEP_FILTER_ARG_OP:
-		if (arg->op.type != TEP_FILTER_OP_NOT) {
-			lval = test_arg(arg, arg->op.left, error_str);
-			switch (lval) {
-			case FILTER_VAL_NORM:
-				break;
-			case FILTER_VAL_TRUE:
-				if (arg->op.type == TEP_FILTER_OP_OR)
-					return FILTER_VAL_TRUE;
-				rval = test_arg(arg, arg->op.right, error_str);
-				if (rval != FILTER_VAL_NORM)
-					return rval;
-
-				return reparent_op_arg(parent, arg, arg->op.right,
-						       error_str);
-
-			case FILTER_VAL_FALSE:
-				if (arg->op.type == TEP_FILTER_OP_AND)
-					return FILTER_VAL_FALSE;
-				rval = test_arg(arg, arg->op.right, error_str);
-				if (rval != FILTER_VAL_NORM)
-					return rval;
-
-				return reparent_op_arg(parent, arg, arg->op.right,
-						       error_str);
-
-			default:
-				return lval;
-			}
-		}
-
-		rval = test_arg(arg, arg->op.right, error_str);
-		switch (rval) {
-		case FILTER_VAL_NORM:
-		default:
-			break;
-
-		case FILTER_VAL_TRUE:
-			if (arg->op.type == TEP_FILTER_OP_OR)
-				return FILTER_VAL_TRUE;
-			if (arg->op.type == TEP_FILTER_OP_NOT)
-				return FILTER_VAL_FALSE;
-
-			return reparent_op_arg(parent, arg, arg->op.left,
-					       error_str);
-
-		case FILTER_VAL_FALSE:
-			if (arg->op.type == TEP_FILTER_OP_AND)
-				return FILTER_VAL_FALSE;
-			if (arg->op.type == TEP_FILTER_OP_NOT)
-				return FILTER_VAL_TRUE;
-
-			return reparent_op_arg(parent, arg, arg->op.left,
-					       error_str);
-		}
-
-		return rval;
-	default:
-		show_error(error_str, "bad arg in filter tree");
-		return TEP_ERRNO__BAD_FILTER_ARG;
-	}
-	return FILTER_VAL_NORM;
-}
-
-/* Remove any unknown event fields */
-static int collapse_tree(struct tep_filter_arg *arg,
-			 struct tep_filter_arg **arg_collapsed, char *error_str)
-{
-	int ret;
-
-	ret = test_arg(arg, arg, error_str);
-	switch (ret) {
-	case FILTER_VAL_NORM:
-		break;
-
-	case FILTER_VAL_TRUE:
-	case FILTER_VAL_FALSE:
-		free_arg(arg);
-		arg = allocate_arg();
-		if (arg) {
-			arg->type = TEP_FILTER_ARG_BOOLEAN;
-			arg->boolean.value = ret == FILTER_VAL_TRUE;
-		} else {
-			show_error(error_str, "Failed to allocate filter arg");
-			ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-		}
-		break;
-
-	default:
-		/* test_arg() already set the error_str */
-		free_arg(arg);
-		arg = NULL;
-		break;
-	}
-
-	*arg_collapsed = arg;
-	return ret;
-}
-
-static enum tep_errno
-process_filter(struct tep_event *event, struct tep_filter_arg **parg,
-	       char *error_str, int not)
-{
-	enum tep_event_type type;
-	char *token = NULL;
-	struct tep_filter_arg *current_op = NULL;
-	struct tep_filter_arg *current_exp = NULL;
-	struct tep_filter_arg *left_item = NULL;
-	struct tep_filter_arg *arg = NULL;
-	enum op_type op_type;
-	enum tep_filter_op_type btype;
-	enum tep_filter_exp_type etype;
-	enum tep_filter_cmp_type ctype;
-	enum tep_errno ret;
-
-	*parg = NULL;
-
-	do {
-		free(token);
-		type = filter_read_token(&token);
-		switch (type) {
-		case TEP_EVENT_SQUOTE:
-		case TEP_EVENT_DQUOTE:
-		case TEP_EVENT_ITEM:
-			ret = create_arg_item(event, token, type, &arg, error_str);
-			if (ret < 0)
-				goto fail;
-			if (!left_item)
-				left_item = arg;
-			else if (current_exp) {
-				ret = add_right(current_exp, arg, error_str);
-				if (ret < 0)
-					goto fail;
-				left_item = NULL;
-				/* Not's only one one expression */
-				if (not) {
-					arg = NULL;
-					if (current_op)
-						goto fail_syntax;
-					free(token);
-					*parg = current_exp;
-					return 0;
-				}
-			} else
-				goto fail_syntax;
-			arg = NULL;
-			break;
-
-		case TEP_EVENT_DELIM:
-			if (*token == ',') {
-				show_error(error_str, "Illegal token ','");
-				ret = TEP_ERRNO__ILLEGAL_TOKEN;
-				goto fail;
-			}
-
-			if (*token == '(') {
-				if (left_item) {
-					show_error(error_str,
-						   "Open paren can not come after item");
-					ret = TEP_ERRNO__INVALID_PAREN;
-					goto fail;
-				}
-				if (current_exp) {
-					show_error(error_str,
-						   "Open paren can not come after expression");
-					ret = TEP_ERRNO__INVALID_PAREN;
-					goto fail;
-				}
-
-				ret = process_filter(event, &arg, error_str, 0);
-				if (ret != TEP_ERRNO__UNBALANCED_PAREN) {
-					if (ret == 0) {
-						show_error(error_str,
-							   "Unbalanced number of '('");
-						ret = TEP_ERRNO__UNBALANCED_PAREN;
-					}
-					goto fail;
-				}
-				ret = 0;
-
-				/* A not wants just one expression */
-				if (not) {
-					if (current_op)
-						goto fail_syntax;
-					*parg = arg;
-					return 0;
-				}
-
-				if (current_op)
-					ret = add_right(current_op, arg, error_str);
-				else
-					current_exp = arg;
-
-				if (ret < 0)
-					goto fail;
-
-			} else { /* ')' */
-				if (!current_op && !current_exp)
-					goto fail_syntax;
-
-				/* Make sure everything is finished at this level */
-				if (current_exp && !check_op_done(current_exp))
-					goto fail_syntax;
-				if (current_op && !check_op_done(current_op))
-					goto fail_syntax;
-
-				if (current_op)
-					*parg = current_op;
-				else
-					*parg = current_exp;
-				free(token);
-				return TEP_ERRNO__UNBALANCED_PAREN;
-			}
-			break;
-
-		case TEP_EVENT_OP:
-			op_type = process_op(token, &btype, &ctype, &etype);
-
-			/* All expect a left arg except for NOT */
-			switch (op_type) {
-			case OP_BOOL:
-				/* Logic ops need a left expression */
-				if (!current_exp && !current_op)
-					goto fail_syntax;
-				/* fall through */
-			case OP_NOT:
-				/* logic only processes ops and exp */
-				if (left_item)
-					goto fail_syntax;
-				break;
-			case OP_EXP:
-			case OP_CMP:
-				if (!left_item)
-					goto fail_syntax;
-				break;
-			case OP_NONE:
-				show_error(error_str,
-					   "Unknown op token %s", token);
-				ret = TEP_ERRNO__UNKNOWN_TOKEN;
-				goto fail;
-			}
-
-			ret = 0;
-			switch (op_type) {
-			case OP_BOOL:
-				arg = create_arg_op(btype);
-				if (arg == NULL)
-					goto fail_alloc;
-				if (current_op)
-					ret = add_left(arg, current_op);
-				else
-					ret = add_left(arg, current_exp);
-				current_op = arg;
-				current_exp = NULL;
-				break;
-
-			case OP_NOT:
-				arg = create_arg_op(btype);
-				if (arg == NULL)
-					goto fail_alloc;
-				if (current_op)
-					ret = add_right(current_op, arg, error_str);
-				if (ret < 0)
-					goto fail;
-				current_exp = arg;
-				ret = process_filter(event, &arg, error_str, 1);
-				if (ret < 0)
-					goto fail;
-				ret = add_right(current_exp, arg, error_str);
-				if (ret < 0)
-					goto fail;
-				break;
-
-			case OP_EXP:
-			case OP_CMP:
-				if (op_type == OP_EXP)
-					arg = create_arg_exp(etype);
-				else
-					arg = create_arg_cmp(ctype);
-				if (arg == NULL)
-					goto fail_alloc;
-
-				if (current_op)
-					ret = add_right(current_op, arg, error_str);
-				if (ret < 0)
-					goto fail;
-				ret = add_left(arg, left_item);
-				if (ret < 0) {
-					arg = NULL;
-					goto fail_syntax;
-				}
-				current_exp = arg;
-				break;
-			default:
-				break;
-			}
-			arg = NULL;
-			if (ret < 0)
-				goto fail_syntax;
-			break;
-		case TEP_EVENT_NONE:
-			break;
-		case TEP_EVENT_ERROR:
-			goto fail_alloc;
-		default:
-			goto fail_syntax;
-		}
-	} while (type != TEP_EVENT_NONE);
-
-	if (!current_op && !current_exp)
-		goto fail_syntax;
-
-	if (!current_op)
-		current_op = current_exp;
-
-	ret = collapse_tree(current_op, parg, error_str);
-	/* collapse_tree() may free current_op, and updates parg accordingly */
-	current_op = NULL;
-	if (ret < 0)
-		goto fail;
-
-	free(token);
-	return 0;
-
- fail_alloc:
-	show_error(error_str, "failed to allocate filter arg");
-	ret = TEP_ERRNO__MEM_ALLOC_FAILED;
-	goto fail;
- fail_syntax:
-	show_error(error_str, "Syntax error");
-	ret = TEP_ERRNO__SYNTAX_ERROR;
- fail:
-	free_arg(current_op);
-	free_arg(current_exp);
-	free_arg(arg);
-	free(token);
-	return ret;
-}
-
-static enum tep_errno
-process_event(struct tep_event *event, const char *filter_str,
-	      struct tep_filter_arg **parg, char *error_str)
-{
-	int ret;
-
-	init_input_buf(filter_str, strlen(filter_str));
-
-	ret = process_filter(event, parg, error_str, 0);
-	if (ret < 0)
-		return ret;
-
-	/* If parg is NULL, then make it into FALSE */
-	if (!*parg) {
-		*parg = allocate_arg();
-		if (*parg == NULL)
-			return TEP_ERRNO__MEM_ALLOC_FAILED;
-
-		(*parg)->type = TEP_FILTER_ARG_BOOLEAN;
-		(*parg)->boolean.value = TEP_FILTER_FALSE;
-	}
-
-	return 0;
-}
-
-static enum tep_errno
-filter_event(struct tep_event_filter *filter, struct tep_event *event,
-	     const char *filter_str, char *error_str)
-{
-	struct tep_filter_type *filter_type;
-	struct tep_filter_arg *arg;
-	enum tep_errno ret;
-
-	if (filter_str) {
-		ret = process_event(event, filter_str, &arg, error_str);
-		if (ret < 0)
-			return ret;
-
-	} else {
-		/* just add a TRUE arg */
-		arg = allocate_arg();
-		if (arg == NULL)
-			return TEP_ERRNO__MEM_ALLOC_FAILED;
-
-		arg->type = TEP_FILTER_ARG_BOOLEAN;
-		arg->boolean.value = TEP_FILTER_TRUE;
-	}
-
-	filter_type = add_filter_type(filter, event->id);
-	if (filter_type == NULL) {
-		free_arg(arg);
-		return TEP_ERRNO__MEM_ALLOC_FAILED;
-	}
-
-	if (filter_type->filter)
-		free_arg(filter_type->filter);
-	filter_type->filter = arg;
-
-	return 0;
-}
-
-static void filter_init_error_buf(struct tep_event_filter *filter)
-{
-	/* clear buffer to reset show error */
-	init_input_buf("", 0);
-	filter->error_buffer[0] = '\0';
-}
-
-/**
- * tep_filter_add_filter_str - add a new filter
- * @filter: the event filter to add to
- * @filter_str: the filter string that contains the filter
- *
- * Returns 0 if the filter was successfully added or a
- * negative error code.  Use tep_filter_strerror() to see
- * actual error message in case of error.
- */
-enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter,
-					 const char *filter_str)
-{
-	struct tep_handle *tep = filter->tep;
-	struct event_list *event;
-	struct event_list *events = NULL;
-	const char *filter_start;
-	const char *next_event;
-	char *this_event;
-	char *event_name = NULL;
-	char *sys_name = NULL;
-	char *sp;
-	enum tep_errno rtn = 0; /* TEP_ERRNO__SUCCESS */
-	int len;
-	int ret;
-
-	filter_init_error_buf(filter);
-
-	filter_start = strchr(filter_str, ':');
-	if (filter_start)
-		len = filter_start - filter_str;
-	else
-		len = strlen(filter_str);
-
-	do {
-		next_event = strchr(filter_str, ',');
-		if (next_event &&
-		    (!filter_start || next_event < filter_start))
-			len = next_event - filter_str;
-		else if (filter_start)
-			len = filter_start - filter_str;
-		else
-			len = strlen(filter_str);
-
-		this_event = malloc(len + 1);
-		if (this_event == NULL) {
-			/* This can only happen when events is NULL, but still */
-			free_events(events);
-			return TEP_ERRNO__MEM_ALLOC_FAILED;
-		}
-		memcpy(this_event, filter_str, len);
-		this_event[len] = 0;
-
-		if (next_event)
-			next_event++;
-
-		filter_str = next_event;
-
-		sys_name = strtok_r(this_event, "/", &sp);
-		event_name = strtok_r(NULL, "/", &sp);
-
-		if (!sys_name) {
-			/* This can only happen when events is NULL, but still */
-			free_events(events);
-			free(this_event);
-			return TEP_ERRNO__FILTER_NOT_FOUND;
-		}
-
-		/* Find this event */
-		ret = find_event(tep, &events, strim(sys_name), strim(event_name));
-		if (ret < 0) {
-			free_events(events);
-			free(this_event);
-			return ret;
-		}
-		free(this_event);
-	} while (filter_str);
-
-	/* Skip the ':' */
-	if (filter_start)
-		filter_start++;
-
-	/* filter starts here */
-	for (event = events; event; event = event->next) {
-		ret = filter_event(filter, event->event, filter_start,
-				   filter->error_buffer);
-		/* Failures are returned if a parse error happened */
-		if (ret < 0)
-			rtn = ret;
-
-		if (ret >= 0 && tep->test_filters) {
-			char *test;
-			test = tep_filter_make_string(filter, event->event->id);
-			if (test) {
-				printf(" '%s: %s'\n", event->event->name, test);
-				free(test);
-			}
-		}
-	}
-
-	free_events(events);
-
-	return rtn;
-}
-
-static void free_filter_type(struct tep_filter_type *filter_type)
-{
-	free_arg(filter_type->filter);
-}
-
-/**
- * tep_filter_strerror - fill error message in a buffer
- * @filter: the event filter contains error
- * @err: the error code
- * @buf: the buffer to be filled in
- * @buflen: the size of the buffer
- *
- * Returns 0 if message was filled successfully, -1 if error
- */
-int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err,
-			char *buf, size_t buflen)
-{
-	if (err <= __TEP_ERRNO__START || err >= __TEP_ERRNO__END)
-		return -1;
-
-	if (strlen(filter->error_buffer) > 0) {
-		size_t len = snprintf(buf, buflen, "%s", filter->error_buffer);
-
-		if (len > buflen)
-			return -1;
-		return 0;
-	}
-
-	return tep_strerror(filter->tep, err, buf, buflen);
-}
-
-/**
- * tep_filter_remove_event - remove a filter for an event
- * @filter: the event filter to remove from
- * @event_id: the event to remove a filter for
- *
- * Removes the filter saved for an event defined by @event_id
- * from the @filter.
- *
- * Returns 1: if an event was removed
- *   0: if the event was not found
- */
-int tep_filter_remove_event(struct tep_event_filter *filter,
-			    int event_id)
-{
-	struct tep_filter_type *filter_type;
-	unsigned long len;
-
-	if (!filter->filters)
-		return 0;
-
-	filter_type = find_filter_type(filter, event_id);
-
-	if (!filter_type)
-		return 0;
-
-	free_filter_type(filter_type);
-
-	/* The filter_type points into the event_filters array */
-	len = (unsigned long)(filter->event_filters + filter->filters) -
-		(unsigned long)(filter_type + 1);
-
-	memmove(filter_type, filter_type + 1, len);
-	filter->filters--;
-
-	memset(&filter->event_filters[filter->filters], 0,
-	       sizeof(*filter_type));
-
-	return 1;
-}
-
-/**
- * tep_filter_reset - clear all filters in a filter
- * @filter: the event filter to reset
- *
- * Removes all filters from a filter and resets it.
- */
-void tep_filter_reset(struct tep_event_filter *filter)
-{
-	int i;
-
-	for (i = 0; i < filter->filters; i++)
-		free_filter_type(&filter->event_filters[i]);
-
-	free(filter->event_filters);
-	filter->filters = 0;
-	filter->event_filters = NULL;
-}
-
-void tep_filter_free(struct tep_event_filter *filter)
-{
-	tep_unref(filter->tep);
-
-	tep_filter_reset(filter);
-
-	free(filter);
-}
-
-static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg);
-
-static int copy_filter_type(struct tep_event_filter *filter,
-			    struct tep_event_filter *source,
-			    struct tep_filter_type *filter_type)
-{
-	struct tep_filter_arg *arg;
-	struct tep_event *event;
-	const char *sys;
-	const char *name;
-	char *str;
-
-	/* Can't assume that the tep's are the same */
-	sys = filter_type->event->system;
-	name = filter_type->event->name;
-	event = tep_find_event_by_name(filter->tep, sys, name);
-	if (!event)
-		return -1;
-
-	str = arg_to_str(source, filter_type->filter);
-	if (!str)
-		return -1;
-
-	if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
-		/* Add trivial event */
-		arg = allocate_arg();
-		if (arg == NULL) {
-			free(str);
-			return -1;
-		}
-
-		arg->type = TEP_FILTER_ARG_BOOLEAN;
-		if (strcmp(str, "TRUE") == 0)
-			arg->boolean.value = 1;
-		else
-			arg->boolean.value = 0;
-
-		filter_type = add_filter_type(filter, event->id);
-		if (filter_type == NULL) {
-			free(str);
-			free_arg(arg);
-			return -1;
-		}
-
-		filter_type->filter = arg;
-
-		free(str);
-		return 0;
-	}
-
-	filter_event(filter, event, str, NULL);
-	free(str);
-
-	return 0;
-}
-
-/**
- * tep_filter_copy - copy a filter using another filter
- * @dest - the filter to copy to
- * @source - the filter to copy from
- *
- * Returns 0 on success and -1 if not all filters were copied
- */
-int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source)
-{
-	int ret = 0;
-	int i;
-
-	tep_filter_reset(dest);
-
-	for (i = 0; i < source->filters; i++) {
-		if (copy_filter_type(dest, source, &source->event_filters[i]))
-			ret = -1;
-	}
-	return ret;
-}
-
-static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
-		       struct tep_record *record, enum tep_errno *err);
-
-static const char *
-get_comm(struct tep_event *event, struct tep_record *record)
-{
-	const char *comm;
-	int pid;
-
-	pid = tep_data_pid(event->tep, record);
-	comm = tep_data_comm_from_pid(event->tep, pid);
-	return comm;
-}
-
-static unsigned long long
-get_value(struct tep_event *event,
-	  struct tep_format_field *field, struct tep_record *record)
-{
-	unsigned long long val;
-
-	/* Handle our dummy "comm" field */
-	if (field == &comm) {
-		const char *name;
-
-		name = get_comm(event, record);
-		return (unsigned long)name;
-	}
-
-	/* Handle our dummy "cpu" field */
-	if (field == &cpu)
-		return record->cpu;
-
-	tep_read_number_field(field, record->data, &val);
-
-	if (!(field->flags & TEP_FIELD_IS_SIGNED))
-		return val;
-
-	switch (field->size) {
-	case 1:
-		return (char)val;
-	case 2:
-		return (short)val;
-	case 4:
-		return (int)val;
-	case 8:
-		return (long long)val;
-	}
-	return val;
-}
-
-static unsigned long long
-get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
-	      struct tep_record *record, enum tep_errno *err);
-
-static unsigned long long
-get_exp_value(struct tep_event *event, struct tep_filter_arg *arg,
-	      struct tep_record *record, enum tep_errno *err)
-{
-	unsigned long long lval, rval;
-
-	lval = get_arg_value(event, arg->exp.left, record, err);
-	rval = get_arg_value(event, arg->exp.right, record, err);
-
-	if (*err) {
-		/*
-		 * There was an error, no need to process anymore.
-		 */
-		return 0;
-	}
-
-	switch (arg->exp.type) {
-	case TEP_FILTER_EXP_ADD:
-		return lval + rval;
-
-	case TEP_FILTER_EXP_SUB:
-		return lval - rval;
-
-	case TEP_FILTER_EXP_MUL:
-		return lval * rval;
-
-	case TEP_FILTER_EXP_DIV:
-		return lval / rval;
-
-	case TEP_FILTER_EXP_MOD:
-		return lval % rval;
-
-	case TEP_FILTER_EXP_RSHIFT:
-		return lval >> rval;
-
-	case TEP_FILTER_EXP_LSHIFT:
-		return lval << rval;
-
-	case TEP_FILTER_EXP_AND:
-		return lval & rval;
-
-	case TEP_FILTER_EXP_OR:
-		return lval | rval;
-
-	case TEP_FILTER_EXP_XOR:
-		return lval ^ rval;
-
-	case TEP_FILTER_EXP_NOT:
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__INVALID_EXP_TYPE;
-	}
-	return 0;
-}
-
-static unsigned long long
-get_arg_value(struct tep_event *event, struct tep_filter_arg *arg,
-	      struct tep_record *record, enum tep_errno *err)
-{
-	switch (arg->type) {
-	case TEP_FILTER_ARG_FIELD:
-		return get_value(event, arg->field.field, record);
-
-	case TEP_FILTER_ARG_VALUE:
-		if (arg->value.type != TEP_FILTER_NUMBER) {
-			if (!*err)
-				*err = TEP_ERRNO__NOT_A_NUMBER;
-		}
-		return arg->value.val;
-
-	case TEP_FILTER_ARG_EXP:
-		return get_exp_value(event, arg, record, err);
-
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__INVALID_ARG_TYPE;
-	}
-	return 0;
-}
-
-static int test_num(struct tep_event *event, struct tep_filter_arg *arg,
-		    struct tep_record *record, enum tep_errno *err)
-{
-	unsigned long long lval, rval;
-
-	lval = get_arg_value(event, arg->num.left, record, err);
-	rval = get_arg_value(event, arg->num.right, record, err);
-
-	if (*err) {
-		/*
-		 * There was an error, no need to process anymore.
-		 */
-		return 0;
-	}
-
-	switch (arg->num.type) {
-	case TEP_FILTER_CMP_EQ:
-		return lval == rval;
-
-	case TEP_FILTER_CMP_NE:
-		return lval != rval;
-
-	case TEP_FILTER_CMP_GT:
-		return lval > rval;
-
-	case TEP_FILTER_CMP_LT:
-		return lval < rval;
-
-	case TEP_FILTER_CMP_GE:
-		return lval >= rval;
-
-	case TEP_FILTER_CMP_LE:
-		return lval <= rval;
-
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__ILLEGAL_INTEGER_CMP;
-		return 0;
-	}
-}
-
-static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record)
-{
-	struct tep_event *event;
-	struct tep_handle *tep;
-	unsigned long long addr;
-	const char *val = NULL;
-	unsigned int size;
-	char hex[64];
-
-	/* If the field is not a string convert it */
-	if (arg->str.field->flags & TEP_FIELD_IS_STRING) {
-		val = record->data + arg->str.field->offset;
-		size = arg->str.field->size;
-
-		if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {
-			addr = *(unsigned int *)val;
-			size = addr >> 16;
-			addr &= 0xffff;
-			if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE)
-				addr += arg->str.field->offset + arg->str.field->size;
-			val = record->data + addr;
-		}
-
-		/*
-		 * We need to copy the data since we can't be sure the field
-		 * is null terminated.
-		 */
-		if (*(val + size - 1)) {
-			/* copy it */
-			memcpy(arg->str.buffer, val, arg->str.field->size);
-			/* the buffer is already NULL terminated */
-			val = arg->str.buffer;
-		}
-
-	} else {
-		event = arg->str.field->event;
-		tep = event->tep;
-		addr = get_value(event, arg->str.field, record);
-
-		if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG))
-			/* convert to a kernel symbol */
-			val = tep_find_function(tep, addr);
-
-		if (val == NULL) {
-			/* just use the hex of the string name */
-			snprintf(hex, 64, "0x%llx", addr);
-			val = hex;
-		}
-	}
-
-	return val;
-}
-
-static int test_str(struct tep_event *event, struct tep_filter_arg *arg,
-		    struct tep_record *record, enum tep_errno *err)
-{
-	const char *val;
-
-	if (arg->str.field == &comm)
-		val = get_comm(event, record);
-	else
-		val = get_field_str(arg, record);
-
-	switch (arg->str.type) {
-	case TEP_FILTER_CMP_MATCH:
-		return strcmp(val, arg->str.val) == 0;
-
-	case TEP_FILTER_CMP_NOT_MATCH:
-		return strcmp(val, arg->str.val) != 0;
-
-	case TEP_FILTER_CMP_REGEX:
-		/* Returns zero on match */
-		return !regexec(&arg->str.reg, val, 0, NULL, 0);
-
-	case TEP_FILTER_CMP_NOT_REGEX:
-		return regexec(&arg->str.reg, val, 0, NULL, 0);
-
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__ILLEGAL_STRING_CMP;
-		return 0;
-	}
-}
-
-static int test_op(struct tep_event *event, struct tep_filter_arg *arg,
-		   struct tep_record *record, enum tep_errno *err)
-{
-	switch (arg->op.type) {
-	case TEP_FILTER_OP_AND:
-		return test_filter(event, arg->op.left, record, err) &&
-			test_filter(event, arg->op.right, record, err);
-
-	case TEP_FILTER_OP_OR:
-		return test_filter(event, arg->op.left, record, err) ||
-			test_filter(event, arg->op.right, record, err);
-
-	case TEP_FILTER_OP_NOT:
-		return !test_filter(event, arg->op.right, record, err);
-
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__INVALID_OP_TYPE;
-		return 0;
-	}
-}
-
-static int test_filter(struct tep_event *event, struct tep_filter_arg *arg,
-		       struct tep_record *record, enum tep_errno *err)
-{
-	if (*err) {
-		/*
-		 * There was an error, no need to process anymore.
-		 */
-		return 0;
-	}
-
-	switch (arg->type) {
-	case TEP_FILTER_ARG_BOOLEAN:
-		/* easy case */
-		return arg->boolean.value;
-
-	case TEP_FILTER_ARG_OP:
-		return test_op(event, arg, record, err);
-
-	case TEP_FILTER_ARG_NUM:
-		return test_num(event, arg, record, err);
-
-	case TEP_FILTER_ARG_STR:
-		return test_str(event, arg, record, err);
-
-	case TEP_FILTER_ARG_EXP:
-	case TEP_FILTER_ARG_VALUE:
-	case TEP_FILTER_ARG_FIELD:
-		/*
-		 * Expressions, fields and values evaluate
-		 * to true if they return non zero
-		 */
-		return !!get_arg_value(event, arg, record, err);
-
-	default:
-		if (!*err)
-			*err = TEP_ERRNO__INVALID_ARG_TYPE;
-		return 0;
-	}
-}
-
-/**
- * tep_event_filtered - return true if event has filter
- * @filter: filter struct with filter information
- * @event_id: event id to test if filter exists
- *
- * Returns 1 if filter found for @event_id
- *   otherwise 0;
- */
-int tep_event_filtered(struct tep_event_filter *filter, int event_id)
-{
-	struct tep_filter_type *filter_type;
-
-	if (!filter->filters)
-		return 0;
-
-	filter_type = find_filter_type(filter, event_id);
-
-	return filter_type ? 1 : 0;
-}
-
-/**
- * tep_filter_match - test if a record matches a filter
- * @filter: filter struct with filter information
- * @record: the record to test against the filter
- *
- * Returns: match result or error code (prefixed with TEP_ERRNO__)
- * FILTER_MATCH - filter found for event and @record matches
- * FILTER_MISS  - filter found for event and @record does not match
- * FILTER_NOT_FOUND - no filter found for @record's event
- * NO_FILTER - if no filters exist
- * otherwise - error occurred during test
- */
-enum tep_errno tep_filter_match(struct tep_event_filter *filter,
-				struct tep_record *record)
-{
-	struct tep_handle *tep = filter->tep;
-	struct tep_filter_type *filter_type;
-	int event_id;
-	int ret;
-	enum tep_errno err = 0;
-
-	filter_init_error_buf(filter);
-
-	if (!filter->filters)
-		return TEP_ERRNO__NO_FILTER;
-
-	event_id = tep_data_type(tep, record);
-
-	filter_type = find_filter_type(filter, event_id);
-	if (!filter_type)
-		return TEP_ERRNO__FILTER_NOT_FOUND;
-
-	ret = test_filter(filter_type->event, filter_type->filter, record, &err);
-	if (err)
-		return err;
-
-	return ret ? TEP_ERRNO__FILTER_MATCH : TEP_ERRNO__FILTER_MISS;
-}
-
-static char *op_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *str = NULL;
-	char *left = NULL;
-	char *right = NULL;
-	char *op = NULL;
-	int left_val = -1;
-	int right_val = -1;
-	int val;
-
-	switch (arg->op.type) {
-	case TEP_FILTER_OP_AND:
-		op = "&&";
-		/* fall through */
-	case TEP_FILTER_OP_OR:
-		if (!op)
-			op = "||";
-
-		left = arg_to_str(filter, arg->op.left);
-		right = arg_to_str(filter, arg->op.right);
-		if (!left || !right)
-			break;
-
-		/* Try to consolidate boolean values */
-		if (strcmp(left, "TRUE") == 0)
-			left_val = 1;
-		else if (strcmp(left, "FALSE") == 0)
-			left_val = 0;
-
-		if (strcmp(right, "TRUE") == 0)
-			right_val = 1;
-		else if (strcmp(right, "FALSE") == 0)
-			right_val = 0;
-
-		if (left_val >= 0) {
-			if ((arg->op.type == TEP_FILTER_OP_AND && !left_val) ||
-			    (arg->op.type == TEP_FILTER_OP_OR && left_val)) {
-				/* Just return left value */
-				str = left;
-				left = NULL;
-				break;
-			}
-			if (right_val >= 0) {
-				/* just evaluate this. */
-				val = 0;
-				switch (arg->op.type) {
-				case TEP_FILTER_OP_AND:
-					val = left_val && right_val;
-					break;
-				case TEP_FILTER_OP_OR:
-					val = left_val || right_val;
-					break;
-				default:
-					break;
-				}
-				if (asprintf(&str, val ? "TRUE" : "FALSE") < 0)
-					str = NULL;
-				break;
-			}
-		}
-		if (right_val >= 0) {
-			if ((arg->op.type == TEP_FILTER_OP_AND && !right_val) ||
-			    (arg->op.type == TEP_FILTER_OP_OR && right_val)) {
-				/* Just return right value */
-				str = right;
-				right = NULL;
-				break;
-			}
-			/* The right value is meaningless */
-			str = left;
-			left = NULL;
-			break;
-		}
-
-		if (asprintf(&str, "(%s) %s (%s)", left, op, right) < 0)
-			str = NULL;
-		break;
-
-	case TEP_FILTER_OP_NOT:
-		op = "!";
-		right = arg_to_str(filter, arg->op.right);
-		if (!right)
-			break;
-
-		/* See if we can consolidate */
-		if (strcmp(right, "TRUE") == 0)
-			right_val = 1;
-		else if (strcmp(right, "FALSE") == 0)
-			right_val = 0;
-		if (right_val >= 0) {
-			/* just return the opposite */
-			if (asprintf(&str, right_val ? "FALSE" : "TRUE") < 0)
-				str = NULL;
-			break;
-		}
-		if (asprintf(&str, "%s(%s)", op, right) < 0)
-			str = NULL;
-		break;
-
-	default:
-		/* ?? */
-		break;
-	}
-	free(left);
-	free(right);
-	return str;
-}
-
-static char *val_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *str = NULL;
-
-	if (asprintf(&str, "%lld", arg->value.val) < 0)
-		str = NULL;
-
-	return str;
-}
-
-static char *field_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	return strdup(arg->field.field->name);
-}
-
-static char *exp_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *lstr;
-	char *rstr;
-	char *op;
-	char *str = NULL;
-
-	lstr = arg_to_str(filter, arg->exp.left);
-	rstr = arg_to_str(filter, arg->exp.right);
-	if (!lstr || !rstr)
-		goto out;
-
-	switch (arg->exp.type) {
-	case TEP_FILTER_EXP_ADD:
-		op = "+";
-		break;
-	case TEP_FILTER_EXP_SUB:
-		op = "-";
-		break;
-	case TEP_FILTER_EXP_MUL:
-		op = "*";
-		break;
-	case TEP_FILTER_EXP_DIV:
-		op = "/";
-		break;
-	case TEP_FILTER_EXP_MOD:
-		op = "%";
-		break;
-	case TEP_FILTER_EXP_RSHIFT:
-		op = ">>";
-		break;
-	case TEP_FILTER_EXP_LSHIFT:
-		op = "<<";
-		break;
-	case TEP_FILTER_EXP_AND:
-		op = "&";
-		break;
-	case TEP_FILTER_EXP_OR:
-		op = "|";
-		break;
-	case TEP_FILTER_EXP_XOR:
-		op = "^";
-		break;
-	default:
-		op = "[ERROR IN EXPRESSION TYPE]";
-		break;
-	}
-
-	if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
-		str = NULL;
-out:
-	free(lstr);
-	free(rstr);
-
-	return str;
-}
-
-static char *num_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *lstr;
-	char *rstr;
-	char *str = NULL;
-	char *op = NULL;
-
-	lstr = arg_to_str(filter, arg->num.left);
-	rstr = arg_to_str(filter, arg->num.right);
-	if (!lstr || !rstr)
-		goto out;
-
-	switch (arg->num.type) {
-	case TEP_FILTER_CMP_EQ:
-		op = "==";
-		/* fall through */
-	case TEP_FILTER_CMP_NE:
-		if (!op)
-			op = "!=";
-		/* fall through */
-	case TEP_FILTER_CMP_GT:
-		if (!op)
-			op = ">";
-		/* fall through */
-	case TEP_FILTER_CMP_LT:
-		if (!op)
-			op = "<";
-		/* fall through */
-	case TEP_FILTER_CMP_GE:
-		if (!op)
-			op = ">=";
-		/* fall through */
-	case TEP_FILTER_CMP_LE:
-		if (!op)
-			op = "<=";
-
-		if (asprintf(&str, "%s %s %s", lstr, op, rstr) < 0)
-			str = NULL;
-		break;
-
-	default:
-		/* ?? */
-		break;
-	}
-
-out:
-	free(lstr);
-	free(rstr);
-	return str;
-}
-
-static char *str_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *str = NULL;
-	char *op = NULL;
-
-	switch (arg->str.type) {
-	case TEP_FILTER_CMP_MATCH:
-		op = "==";
-		/* fall through */
-	case TEP_FILTER_CMP_NOT_MATCH:
-		if (!op)
-			op = "!=";
-		/* fall through */
-	case TEP_FILTER_CMP_REGEX:
-		if (!op)
-			op = "=~";
-		/* fall through */
-	case TEP_FILTER_CMP_NOT_REGEX:
-		if (!op)
-			op = "!~";
-
-		if (asprintf(&str, "%s %s \"%s\"",
-			 arg->str.field->name, op, arg->str.val) < 0)
-			str = NULL;
-		break;
-
-	default:
-		/* ?? */
-		break;
-	}
-	return str;
-}
-
-static char *arg_to_str(struct tep_event_filter *filter, struct tep_filter_arg *arg)
-{
-	char *str = NULL;
-
-	switch (arg->type) {
-	case TEP_FILTER_ARG_BOOLEAN:
-		if (asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE") < 0)
-			str = NULL;
-		return str;
-
-	case TEP_FILTER_ARG_OP:
-		return op_to_str(filter, arg);
-
-	case TEP_FILTER_ARG_NUM:
-		return num_to_str(filter, arg);
-
-	case TEP_FILTER_ARG_STR:
-		return str_to_str(filter, arg);
-
-	case TEP_FILTER_ARG_VALUE:
-		return val_to_str(filter, arg);
-
-	case TEP_FILTER_ARG_FIELD:
-		return field_to_str(filter, arg);
-
-	case TEP_FILTER_ARG_EXP:
-		return exp_to_str(filter, arg);
-
-	default:
-		/* ?? */
-		return NULL;
-	}
-
-}
-
-/**
- * tep_filter_make_string - return a string showing the filter
- * @filter: filter struct with filter information
- * @event_id: the event id to return the filter string with
- *
- * Returns a string that displays the filter contents.
- *  This string must be freed with free(str).
- *  NULL is returned if no filter is found or allocation failed.
- */
-char *
-tep_filter_make_string(struct tep_event_filter *filter, int event_id)
-{
-	struct tep_filter_type *filter_type;
-
-	if (!filter->filters)
-		return NULL;
-
-	filter_type = find_filter_type(filter, event_id);
-
-	if (!filter_type)
-		return NULL;
-
-	return arg_to_str(filter, filter_type->filter);
-}
-
-/**
- * tep_filter_compare - compare two filters and return if they are the same
- * @filter1: Filter to compare with @filter2
- * @filter2: Filter to compare with @filter1
- *
- * Returns:
- *  1 if the two filters hold the same content.
- *  0 if they do not.
- */
-int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2)
-{
-	struct tep_filter_type *filter_type1;
-	struct tep_filter_type *filter_type2;
-	char *str1, *str2;
-	int result;
-	int i;
-
-	/* Do the easy checks first */
-	if (filter1->filters != filter2->filters)
-		return 0;
-	if (!filter1->filters && !filter2->filters)
-		return 1;
-
-	/*
-	 * Now take a look at each of the events to see if they have the same
-	 * filters to them.
-	 */
-	for (i = 0; i < filter1->filters; i++) {
-		filter_type1 = &filter1->event_filters[i];
-		filter_type2 = find_filter_type(filter2, filter_type1->event_id);
-		if (!filter_type2)
-			break;
-		if (filter_type1->filter->type != filter_type2->filter->type)
-			break;
-		/* The best way to compare complex filters is with strings */
-		str1 = arg_to_str(filter1, filter_type1->filter);
-		str2 = arg_to_str(filter2, filter_type2->filter);
-		if (str1 && str2)
-			result = strcmp(str1, str2) != 0;
-		else
-			/* bail out if allocation fails */
-			result = 1;
-
-		free(str1);
-		free(str2);
-		if (result)
-			break;
-	}
-
-	if (i < filter1->filters)
-		return 0;
-	return 1;
-}
-
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
deleted file mode 100644
index e99867111387f..0000000000000
--- a/tools/lib/traceevent/parse-utils.c
+++ /dev/null
@@ -1,71 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#include <errno.h>
-
-#define __weak __attribute__((weak))
-
-void __vwarning(const char *fmt, va_list ap)
-{
-	if (errno)
-		perror("libtraceevent");
-	errno = 0;
-
-	fprintf(stderr, "  ");
-	vfprintf(stderr, fmt, ap);
-
-	fprintf(stderr, "\n");
-}
-
-void __warning(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	__vwarning(fmt, ap);
-	va_end(ap);
-}
-
-void __weak warning(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	__vwarning(fmt, ap);
-	va_end(ap);
-}
-
-void __vpr_stat(const char *fmt, va_list ap)
-{
-	vprintf(fmt, ap);
-	printf("\n");
-}
-
-void __pr_stat(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	__vpr_stat(fmt, ap);
-	va_end(ap);
-}
-
-void __weak vpr_stat(const char *fmt, va_list ap)
-{
-	__vpr_stat(fmt, ap);
-}
-
-void __weak pr_stat(const char *fmt, ...)
-{
-	va_list ap;
-
-	va_start(ap, fmt);
-	__vpr_stat(fmt, ap);
-	va_end(ap);
-}
diff --git a/tools/lib/traceevent/plugins/Build b/tools/lib/traceevent/plugins/Build
deleted file mode 100644
index dd4da823c38fb..0000000000000
--- a/tools/lib/traceevent/plugins/Build
+++ /dev/null
@@ -1,12 +0,0 @@
-plugin_jbd2-y         += plugin_jbd2.o
-plugin_hrtimer-y      += plugin_hrtimer.o
-plugin_kmem-y         += plugin_kmem.o
-plugin_kvm-y          += plugin_kvm.o
-plugin_mac80211-y     += plugin_mac80211.o
-plugin_sched_switch-y += plugin_sched_switch.o
-plugin_function-y     += plugin_function.o
-plugin_futex-y        += plugin_futex.o
-plugin_xen-y          += plugin_xen.o
-plugin_scsi-y         += plugin_scsi.o
-plugin_cfg80211-y     += plugin_cfg80211.o
-plugin_tlb-y          += plugin_tlb.o
\ No newline at end of file
diff --git a/tools/lib/traceevent/plugins/Makefile b/tools/lib/traceevent/plugins/Makefile
deleted file mode 100644
index 47e8025532502..0000000000000
--- a/tools/lib/traceevent/plugins/Makefile
+++ /dev/null
@@ -1,225 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-#MAKEFLAGS += --no-print-directory
-
-
-# Makefiles suck: This macro sets a default value of $(2) for the
-# variable named by $(1), unless the variable has been set by
-# environment or command line. This is necessary for CC and AR
-# because make sets default values, so the simpler ?= approach
-# won't work as expected.
-define allow-override
-  $(if $(or $(findstring environment,$(origin $(1))),\
-            $(findstring command line,$(origin $(1)))),,\
-    $(eval $(1) = $(2)))
-endef
-
-# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
-$(call allow-override,CC,$(CROSS_COMPILE)gcc)
-$(call allow-override,AR,$(CROSS_COMPILE)ar)
-$(call allow-override,NM,$(CROSS_COMPILE)nm)
-$(call allow-override,PKG_CONFIG,pkg-config)
-
-EXT = -std=gnu99
-INSTALL = install
-
-# Use DESTDIR for installing into a different root directory.
-# This is useful for building a package. The program will be
-# installed in this directory as if it was the root directory.
-# Then the build tool can move it later.
-DESTDIR ?=
-DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
-
-LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
-ifeq ($(LP64), 1)
-  libdir_relative_tmp = lib64
-else
-  libdir_relative_tmp = lib
-endif
-
-libdir_relative ?= $(libdir_relative_tmp)
-prefix ?= /usr/local
-libdir = $(prefix)/$(libdir_relative)
-
-set_plugin_dir := 1
-
-# Set plugin_dir to preffered global plugin location
-# If we install under $HOME directory we go under
-# $(HOME)/.local/lib/traceevent/plugins
-#
-# We dont set PLUGIN_DIR in case we install under $HOME
-# directory, because by default the code looks under:
-# $(HOME)/.local/lib/traceevent/plugins by default.
-#
-ifeq ($(plugin_dir),)
-ifeq ($(prefix),$(HOME))
-override plugin_dir = $(HOME)/.local/lib/traceevent/plugins
-set_plugin_dir := 0
-else
-override plugin_dir = $(libdir)/traceevent/plugins
-endif
-endif
-
-ifeq ($(set_plugin_dir),1)
-PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
-PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
-endif
-
-include ../../../scripts/Makefile.include
-
-# copy a bit from Linux kbuild
-
-ifeq ("$(origin V)", "command line")
-  VERBOSE = $(V)
-endif
-ifndef VERBOSE
-  VERBOSE = 0
-endif
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(CURDIR)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-export prefix libdir src obj
-
-# Shell quotes
-plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
-
-CONFIG_INCLUDES =
-CONFIG_LIBS    =
-CONFIG_FLAGS   =
-
-OBJ            = $@
-N              =
-
-INCLUDES = -I. -I.. -I $(srctree)/tools/include $(CONFIG_INCLUDES)
-
-# Set compile option CFLAGS
-ifdef EXTRA_CFLAGS
-  CFLAGS := $(EXTRA_CFLAGS)
-else
-  CFLAGS := -g -Wall
-endif
-
-# Append required CFLAGS
-override CFLAGS += -fPIC
-override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
-override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
-
-ifeq ($(VERBOSE),1)
-  Q =
-else
-  Q = @
-endif
-
-# Disable command line variables (CFLAGS) override from top
-# level Makefile (perf), otherwise build Makefile will get
-# the same command line setup.
-MAKEOVERRIDES=
-
-export srctree OUTPUT CC LD CFLAGS V
-
-build := -f $(srctree)/tools/build/Makefile.build dir=. obj
-
-DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
-
-PLUGINS  = plugin_jbd2.so
-PLUGINS += plugin_hrtimer.so
-PLUGINS += plugin_kmem.so
-PLUGINS += plugin_kvm.so
-PLUGINS += plugin_mac80211.so
-PLUGINS += plugin_sched_switch.so
-PLUGINS += plugin_function.so
-PLUGINS += plugin_futex.so
-PLUGINS += plugin_xen.so
-PLUGINS += plugin_scsi.so
-PLUGINS += plugin_cfg80211.so
-PLUGINS += plugin_tlb.so
-
-PLUGINS    := $(addprefix $(OUTPUT),$(PLUGINS))
-PLUGINS_IN := $(PLUGINS:.so=-in.o)
-
-plugins: $(PLUGINS) $(DYNAMIC_LIST_FILE)
-
-__plugin_obj = $(notdir $@)
-  plugin_obj = $(__plugin_obj:-in.o=)
-
-$(PLUGINS_IN): force
-	$(Q)$(MAKE) $(build)=$(plugin_obj)
-
-$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
-	$(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
-
-$(OUTPUT)%.so: $(OUTPUT)%-in.o
-	$(QUIET_LINK)$(CC) $(CFLAGS) -shared $(LDFLAGS) -nostartfiles -o $@ $^
-
-define update_dir
-  (echo $1 > $@.tmp;                           \
-   if [ -r $@ ] && cmp -s $@ $@.tmp; then      \
-     rm -f $@.tmp;                             \
-   else                                                \
-     echo '  UPDATE                 $@';       \
-     mv -f $@.tmp $@;                          \
-   fi);
-endef
-
-tags:	force
-	$(RM) tags
-	find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
-	--regex-c++='/_PE\(([^,)]*).*/TEP_ERRNO__\1/'
-
-TAGS:	force
-	$(RM) TAGS
-	find . -name '*.[ch]' | xargs etags \
-	--regex='/_PE(\([^,)]*\).*/TEP_ERRNO__\1/'
-
-define do_install_mkdir
-	if [ ! -d '$(DESTDIR_SQ)$1' ]; then             \
-		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \
-	fi
-endef
-
-define do_install
-	$(call do_install_mkdir,$2);                    \
-	$(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
-endef
-
-define do_install_plugins
-       for plugin in $1; do                            \
-         $(call do_install,$$plugin,$(plugin_dir_SQ)); \
-       done
-endef
-
-define do_generate_dynamic_list_file
-	symbol_type=`$(NM) -u -D $1 | awk 'NF>1 {print $$1}' | \
-	xargs echo "U w W" | tr 'w ' 'W\n' | sort -u | xargs echo`;\
-	if [ "$$symbol_type" = "U W" ];then				\
-		(echo '{';                                              \
-		$(NM) -u -D $1 | awk 'NF>1 {sub("@.*", "", $$2); print "\t"$$2";"}' | sort -u;\
-		echo '};';                                              \
-		) > $2;                                                 \
-	else                                                            \
-		(echo Either missing one of [$1] or bad version of $(NM)) 1>&2;\
-		fi
-endef
-
-install: $(PLUGINS)
-	$(call QUIET_INSTALL, trace_plugins) \
-	$(call do_install_plugins, $(PLUGINS))
-
-clean:
-	$(call QUIET_CLEAN, trace_plugins) \
-		$(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd; \
-		$(RM) $(OUTPUT)libtraceevent-dynamic-list \
-		$(RM) TRACEEVENT-CFLAGS tags TAGS;
-
-PHONY += force plugins
-force:
-
-# Declare the contents of the .PHONY variable as phony.  We keep that
-# information in a variable so we can use it in if_changed and friends.
-.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/plugins/plugin_cfg80211.c b/tools/lib/traceevent/plugins/plugin_cfg80211.c
deleted file mode 100644
index 3d43b56a6c984..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_cfg80211.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include <endian.h>
-#include "event-parse.h"
-
-/*
- * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
- * Fedora6.
- */
-#ifndef le16toh
-# if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define le16toh(x) (x)
-# else
-#  define le16toh(x) __bswap_16 (x)
-# endif
-#endif
-
-
-static unsigned long long
-process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
-{
-	uint16_t *val = (uint16_t *) (unsigned long) args[0];
-	return val ? (long long) le16toh(*val) : 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_print_function(tep,
-				    process___le16_to_cpup,
-				    TEP_FUNC_ARG_INT,
-				    "__le16_to_cpup",
-				    TEP_FUNC_ARG_PTR,
-				    TEP_FUNC_ARG_VOID);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_print_function(tep, process___le16_to_cpup,
-				      "__le16_to_cpup");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_function.c b/tools/lib/traceevent/plugins/plugin_function.c
deleted file mode 100644
index 807b16e1bf0f9..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_function.c
+++ /dev/null
@@ -1,282 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "event-utils.h"
-#include "trace-seq.h"
-
-static struct func_stack {
-	int size;
-	char **stack;
-} *fstack;
-
-static int cpus = -1;
-
-#define STK_BLK 10
-
-struct tep_plugin_option plugin_options[] =
-{
-	{
-		.name = "parent",
-		.plugin_alias = "ftrace",
-		.description =
-		"Print parent of functions for function events",
-	},
-	{
-		.name = "indent",
-		.plugin_alias = "ftrace",
-		.description =
-		"Try to show function call indents, based on parents",
-		.set = 1,
-	},
-	{
-		.name = "offset",
-		.plugin_alias = "ftrace",
-		.description =
-		"Show function names as well as their offsets",
-		.set = 0,
-	},
-	{
-		.name = NULL,
-	}
-};
-
-static struct tep_plugin_option *ftrace_parent = &plugin_options[0];
-static struct tep_plugin_option *ftrace_indent = &plugin_options[1];
-static struct tep_plugin_option *ftrace_offset = &plugin_options[2];
-
-static void add_child(struct func_stack *stack, const char *child, int pos)
-{
-	int i;
-
-	if (!child)
-		return;
-
-	if (pos < stack->size)
-		free(stack->stack[pos]);
-	else {
-		char **ptr;
-
-		ptr = realloc(stack->stack, sizeof(char *) *
-			      (stack->size + STK_BLK));
-		if (!ptr) {
-			warning("could not allocate plugin memory\n");
-			return;
-		}
-
-		stack->stack = ptr;
-
-		for (i = stack->size; i < stack->size + STK_BLK; i++)
-			stack->stack[i] = NULL;
-		stack->size += STK_BLK;
-	}
-
-	stack->stack[pos] = strdup(child);
-}
-
-static int add_and_get_index(const char *parent, const char *child, int cpu)
-{
-	int i;
-
-	if (cpu < 0)
-		return 0;
-
-	if (cpu > cpus) {
-		struct func_stack *ptr;
-
-		ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
-		if (!ptr) {
-			warning("could not allocate plugin memory\n");
-			return 0;
-		}
-
-		fstack = ptr;
-
-		/* Account for holes in the cpu count */
-		for (i = cpus + 1; i <= cpu; i++)
-			memset(&fstack[i], 0, sizeof(fstack[i]));
-		cpus = cpu;
-	}
-
-	for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
-		if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
-			add_child(&fstack[cpu], child, i+1);
-			return i;
-		}
-	}
-
-	/* Not found */
-	add_child(&fstack[cpu], parent, 0);
-	add_child(&fstack[cpu], child, 1);
-	return 0;
-}
-
-static void show_function(struct trace_seq *s, struct tep_handle *tep,
-			  const char *func, unsigned long long function)
-{
-	unsigned long long offset;
-
-	trace_seq_printf(s, "%s", func);
-	if (ftrace_offset->set) {
-		offset = tep_find_function_address(tep, function);
-		trace_seq_printf(s, "+0x%x ", (int)(function - offset));
-	}
-}
-
-static int function_handler(struct trace_seq *s, struct tep_record *record,
-			    struct tep_event *event, void *context)
-{
-	struct tep_handle *tep = event->tep;
-	unsigned long long function;
-	unsigned long long pfunction;
-	const char *func;
-	const char *parent;
-	int index = 0;
-
-	if (tep_get_field_val(s, event, "ip", record, &function, 1))
-		return trace_seq_putc(s, '!');
-
-	func = tep_find_function(tep, function);
-
-	if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
-		return trace_seq_putc(s, '!');
-
-	parent = tep_find_function(tep, pfunction);
-
-	if (parent && ftrace_indent->set)
-		index = add_and_get_index(parent, func, record->cpu);
-
-	trace_seq_printf(s, "%*s", index*3, "");
-
-	if (func)
-		show_function(s, tep, func, function);
-	else
-		trace_seq_printf(s, "0x%llx", function);
-
-	if (ftrace_parent->set) {
-		trace_seq_printf(s, " <-- ");
-		if (parent)
-			show_function(s, tep, parent, pfunction);
-		else
-			trace_seq_printf(s, "0x%llx", pfunction);
-	}
-
-	return 0;
-}
-
-static int
-trace_stack_handler(struct trace_seq *s, struct tep_record *record,
-		    struct tep_event *event, void *context)
-{
-	struct tep_format_field *field;
-	unsigned long long addr;
-	const char *func;
-	int long_size;
-	void *data = record->data;
-
-	field = tep_find_any_field(event, "caller");
-	if (!field) {
-		trace_seq_printf(s, "<CANT FIND FIELD %s>", "caller");
-		return 0;
-	}
-
-	trace_seq_puts(s, "<stack trace >\n");
-
-	long_size = tep_get_long_size(event->tep);
-
-	for (data += field->offset; data < record->data + record->size;
-	     data += long_size) {
-		addr = tep_read_number(event->tep, data, long_size);
-
-		if ((long_size == 8 && addr == (unsigned long long)-1) ||
-		    ((int)addr == -1))
-			break;
-
-		func = tep_find_function(event->tep, addr);
-		if (func)
-			trace_seq_printf(s, "=> %s (%llx)\n", func, addr);
-		else
-			trace_seq_printf(s, "=> %llx\n", addr);
-	}
-
-	return 0;
-}
-
-static int
-trace_raw_data_handler(struct trace_seq *s, struct tep_record *record,
-		    struct tep_event *event, void *context)
-{
-	struct tep_format_field *field;
-	unsigned long long id;
-	int long_size;
-	void *data = record->data;
-
-	if (tep_get_field_val(s, event, "id", record, &id, 1))
-		return trace_seq_putc(s, '!');
-
-	trace_seq_printf(s, "# %llx", id);
-
-	field = tep_find_any_field(event, "buf");
-	if (!field) {
-		trace_seq_printf(s, "<CANT FIND FIELD %s>", "buf");
-		return 0;
-	}
-
-	long_size = tep_get_long_size(event->tep);
-
-	for (data += field->offset; data < record->data + record->size;
-	     data += long_size) {
-		int size = sizeof(long);
-		int left = (record->data + record->size) - data;
-		int i;
-
-		if (size > left)
-			size = left;
-
-		for (i = 0; i < size; i++)
-			trace_seq_printf(s, " %02x", *(unsigned char *)(data + i));
-	}
-
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "ftrace", "function",
-				   function_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "ftrace", "kernel_stack",
-				      trace_stack_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "ftrace", "raw_data",
-				      trace_raw_data_handler, NULL);
-
-	tep_plugin_add_options("ftrace", plugin_options);
-
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	int i, x;
-
-	tep_unregister_event_handler(tep, -1, "ftrace", "function",
-				     function_handler, NULL);
-
-	for (i = 0; i <= cpus; i++) {
-		for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
-			free(fstack[i].stack[x]);
-		free(fstack[i].stack);
-	}
-
-	tep_plugin_remove_options(plugin_options);
-
-	free(fstack);
-	fstack = NULL;
-	cpus = -1;
-}
diff --git a/tools/lib/traceevent/plugins/plugin_futex.c b/tools/lib/traceevent/plugins/plugin_futex.c
deleted file mode 100644
index eb7c9f8a850ab..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_futex.c
+++ /dev/null
@@ -1,123 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2017 National Instruments Corp.
- *
- * Author: Julia Cartwright <julia@ni.com>
- *
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <linux/futex.h>
-
-#include "event-parse.h"
-
-#define ARRAY_SIZE(_a) (sizeof(_a) / sizeof((_a)[0]))
-
-struct futex_args {
-	unsigned long long	uaddr;
-	unsigned long long	op;
-	unsigned long long	val;
-	unsigned long long	utime; /* or val2 */
-	unsigned long long	uaddr2;
-	unsigned long long	val3;
-};
-
-struct futex_op {
-	const char	*name;
-	const char	*fmt_val;
-	const char	*fmt_utime;
-	const char	*fmt_uaddr2;
-	const char	*fmt_val3;
-};
-
-static const struct futex_op futex_op_tbl[] = {
-	{            "FUTEX_WAIT", " val=0x%08llx", " utime=0x%08llx",               NULL,             NULL },
-	{            "FUTEX_WAKE",     " val=%llu",              NULL,               NULL,             NULL },
-	{              "FUTEX_FD",     " val=%llu",              NULL,               NULL,             NULL },
-	{         "FUTEX_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx",             NULL },
-	{     "FUTEX_CMP_REQUEUE",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
-	{         "FUTEX_WAKE_OP",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
-	{         "FUTEX_LOCK_PI",            NULL, " utime=0x%08llx",               NULL,             NULL },
-	{       "FUTEX_UNLOCK_PI",            NULL,              NULL,               NULL,             NULL },
-	{      "FUTEX_TRYLOCK_PI",            NULL,              NULL,               NULL,             NULL },
-	{     "FUTEX_WAIT_BITSET", " val=0x%08llx", " utime=0x%08llx",               NULL, " val3=0x%08llx" },
-	{     "FUTEX_WAKE_BITSET",     " val=%llu",              NULL,               NULL, " val3=0x%08llx" },
-	{ "FUTEX_WAIT_REQUEUE_PI", " val=0x%08llx", " utime=0x%08llx", " uaddr2=0x%08llx", " val3=0x%08llx" },
-	{  "FUTEX_CMP_REQUEUE_PI",     " val=%llu",      " val2=%llu", " uaddr2=0x%08llx", " val3=0x%08llx" },
-};
-
-
-static void futex_print(struct trace_seq *s, const struct futex_args *args,
-			const struct futex_op *fop)
-{
-	trace_seq_printf(s, " uaddr=0x%08llx", args->uaddr);
-
-	if (fop->fmt_val)
-		trace_seq_printf(s, fop->fmt_val, args->val);
-
-	if (fop->fmt_utime)
-		trace_seq_printf(s,fop->fmt_utime, args->utime);
-
-	if (fop->fmt_uaddr2)
-		trace_seq_printf(s, fop->fmt_uaddr2, args->uaddr2);
-
-	if (fop->fmt_val3)
-		trace_seq_printf(s, fop->fmt_val3, args->val3);
-}
-
-static int futex_handler(struct trace_seq *s, struct tep_record *record,
-			 struct tep_event *event, void *context)
-{
-	const struct futex_op *fop;
-	struct futex_args args;
-	unsigned long long cmd;
-
-	if (tep_get_field_val(s, event, "uaddr", record, &args.uaddr, 1))
-		return 1;
-
-	if (tep_get_field_val(s, event, "op", record, &args.op, 1))
-		return 1;
-
-	if (tep_get_field_val(s, event, "val", record, &args.val, 1))
-		return 1;
-
-	if (tep_get_field_val(s, event, "utime", record, &args.utime, 1))
-		return 1;
-
-	if (tep_get_field_val(s, event, "uaddr2", record, &args.uaddr2, 1))
-		return 1;
-
-	if (tep_get_field_val(s, event, "val3", record, &args.val3, 1))
-		return 1;
-
-	cmd = args.op & FUTEX_CMD_MASK;
-	if (cmd >= ARRAY_SIZE(futex_op_tbl))
-		return 1;
-
-	fop = &futex_op_tbl[cmd];
-
-	trace_seq_printf(s, "op=%s", fop->name);
-
-	if (args.op & FUTEX_PRIVATE_FLAG)
-		trace_seq_puts(s, "|FUTEX_PRIVATE_FLAG");
-
-	if (args.op & FUTEX_CLOCK_REALTIME)
-		trace_seq_puts(s, "|FUTEX_CLOCK_REALTIME");
-
-	futex_print(s, &args, fop);
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "syscalls", "sys_enter_futex",
-				   futex_handler, NULL);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1, "syscalls", "sys_enter_futex",
-				     futex_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_hrtimer.c b/tools/lib/traceevent/plugins/plugin_hrtimer.c
deleted file mode 100644
index d98466788f14c..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_hrtimer.c
+++ /dev/null
@@ -1,74 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int timer_expire_handler(struct trace_seq *s,
-				struct tep_record *record,
-				struct tep_event *event, void *context)
-{
-	trace_seq_printf(s, "hrtimer=");
-
-	if (tep_print_num_field(s, "0x%llx", event, "timer",
-				record, 0) == -1)
-		tep_print_num_field(s, "0x%llx", event, "hrtimer",
-				    record, 1);
-
-	trace_seq_printf(s, " now=");
-
-	tep_print_num_field(s, "%llu", event, "now", record, 1);
-
-	tep_print_func_field(s, " function=%s", event, "function",
-				record, 0);
-	return 0;
-}
-
-static int timer_start_handler(struct trace_seq *s,
-			       struct tep_record *record,
-			       struct tep_event *event, void *context)
-{
-	trace_seq_printf(s, "hrtimer=");
-
-	if (tep_print_num_field(s, "0x%llx", event, "timer",
-				record, 0) == -1)
-		tep_print_num_field(s, "0x%llx", event, "hrtimer",
-				    record, 1);
-
-	tep_print_func_field(s, " function=%s", event, "function",
-			     record, 0);
-
-	trace_seq_printf(s, " expires=");
-	tep_print_num_field(s, "%llu", event, "expires", record, 1);
-
-	trace_seq_printf(s, " softexpires=");
-	tep_print_num_field(s, "%llu", event, "softexpires", record, 1);
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1,
-				   "timer", "hrtimer_expire_entry",
-				   timer_expire_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "timer", "hrtimer_start",
-				   timer_start_handler, NULL);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1,
-				     "timer", "hrtimer_expire_entry",
-				     timer_expire_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start",
-				     timer_start_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_jbd2.c b/tools/lib/traceevent/plugins/plugin_jbd2.c
deleted file mode 100644
index 69111a68d3cf1..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_jbd2.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define MINORBITS	20
-#define MINORMASK	((1U << MINORBITS) - 1)
-
-#define MAJOR(dev)	((unsigned int) ((dev) >> MINORBITS))
-#define MINOR(dev)	((unsigned int) ((dev) & MINORMASK))
-
-static unsigned long long
-process_jbd2_dev_to_name(struct trace_seq *s, unsigned long long *args)
-{
-	unsigned int dev = args[0];
-
-	trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
-	return 0;
-}
-
-static unsigned long long
-process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args)
-{
-	unsigned long long jiffies = args[0];
-
-	trace_seq_printf(s, "%lld", jiffies);
-	return jiffies;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_print_function(tep,
-				    process_jbd2_dev_to_name,
-				    TEP_FUNC_ARG_STRING,
-				    "jbd2_dev_to_name",
-				    TEP_FUNC_ARG_INT,
-				    TEP_FUNC_ARG_VOID);
-
-	tep_register_print_function(tep,
-				    process_jiffies_to_msecs,
-				    TEP_FUNC_ARG_LONG,
-				    "jiffies_to_msecs",
-				    TEP_FUNC_ARG_LONG,
-				    TEP_FUNC_ARG_VOID);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_print_function(tep, process_jbd2_dev_to_name,
-				      "jbd2_dev_to_name");
-
-	tep_unregister_print_function(tep, process_jiffies_to_msecs,
-				      "jiffies_to_msecs");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_kmem.c b/tools/lib/traceevent/plugins/plugin_kmem.c
deleted file mode 100644
index 4b4f7f9616e31..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_kmem.c
+++ /dev/null
@@ -1,80 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static int call_site_handler(struct trace_seq *s, struct tep_record *record,
-			     struct tep_event *event, void *context)
-{
-	struct tep_format_field *field;
-	unsigned long long val, addr;
-	void *data = record->data;
-	const char *func;
-
-	field = tep_find_field(event, "call_site");
-	if (!field)
-		return 1;
-
-	if (tep_read_number_field(field, data, &val))
-		return 1;
-
-	func = tep_find_function(event->tep, val);
-	if (!func)
-		return 1;
-
-	addr = tep_find_function_address(event->tep, val);
-
-	trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
-	return 1;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "kmem", "kfree",
-				   call_site_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kmem", "kmalloc",
-				   call_site_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kmem", "kmalloc_node",
-				   call_site_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
-				   call_site_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kmem",
-				   "kmem_cache_alloc_node",
-				   call_site_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free",
-				   call_site_handler, NULL);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1, "kmem", "kfree",
-				     call_site_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc",
-				     call_site_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node",
-				     call_site_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc",
-				     call_site_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kmem",
-				     "kmem_cache_alloc_node",
-				     call_site_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free",
-				     call_site_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_kvm.c b/tools/lib/traceevent/plugins/plugin_kvm.c
deleted file mode 100644
index 9ce7b4b68e3fe..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_kvm.c
+++ /dev/null
@@ -1,527 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdint.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#ifdef HAVE_UDIS86
-
-#include <udis86.h>
-
-static ud_t ud;
-
-static void init_disassembler(void)
-{
-	ud_init(&ud);
-	ud_set_syntax(&ud, UD_SYN_ATT);
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
-			       int cr0_pe, int eflags_vm,
-			       int cs_d, int cs_l)
-{
-	int mode;
-
-	if (!cr0_pe)
-		mode = 16;
-	else if (eflags_vm)
-		mode = 16;
-	else if (cs_l)
-		mode = 64;
-	else if (cs_d)
-		mode = 32;
-	else
-		mode = 16;
-
-	ud_set_pc(&ud, rip);
-	ud_set_mode(&ud, mode);
-	ud_set_input_buffer(&ud, insn, len);
-	ud_disassemble(&ud);
-	return ud_insn_asm(&ud);
-}
-
-#else
-
-static void init_disassembler(void)
-{
-}
-
-static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
-			       int cr0_pe, int eflags_vm,
-			       int cs_d, int cs_l)
-{
-	static char out[15*3+1];
-	int i;
-
-	for (i = 0; i < len; ++i)
-		sprintf(out + i * 3, "%02x ", insn[i]);
-	out[len*3-1] = '\0';
-	return out;
-}
-
-#endif
-
-
-#define VMX_EXIT_REASONS			\
-	_ER(EXCEPTION_NMI,	 0)		\
-	_ER(EXTERNAL_INTERRUPT,	 1)		\
-	_ER(TRIPLE_FAULT,	 2)		\
-	_ER(PENDING_INTERRUPT,	 7)		\
-	_ER(NMI_WINDOW,		 8)		\
-	_ER(TASK_SWITCH,	 9)		\
-	_ER(CPUID,		 10)		\
-	_ER(HLT,		 12)		\
-	_ER(INVD,		 13)		\
-	_ER(INVLPG,		 14)		\
-	_ER(RDPMC,		 15)		\
-	_ER(RDTSC,		 16)		\
-	_ER(VMCALL,		 18)		\
-	_ER(VMCLEAR,		 19)		\
-	_ER(VMLAUNCH,		 20)		\
-	_ER(VMPTRLD,		 21)		\
-	_ER(VMPTRST,		 22)		\
-	_ER(VMREAD,		 23)		\
-	_ER(VMRESUME,		 24)		\
-	_ER(VMWRITE,		 25)		\
-	_ER(VMOFF,		 26)		\
-	_ER(VMON,		 27)		\
-	_ER(CR_ACCESS,		 28)		\
-	_ER(DR_ACCESS,		 29)		\
-	_ER(IO_INSTRUCTION,	 30)		\
-	_ER(MSR_READ,		 31)		\
-	_ER(MSR_WRITE,		 32)		\
-	_ER(MWAIT_INSTRUCTION,	 36)		\
-	_ER(MONITOR_INSTRUCTION, 39)		\
-	_ER(PAUSE_INSTRUCTION,	 40)		\
-	_ER(MCE_DURING_VMENTRY,	 41)		\
-	_ER(TPR_BELOW_THRESHOLD, 43)		\
-	_ER(APIC_ACCESS,	 44)		\
-	_ER(EOI_INDUCED,	 45)		\
-	_ER(EPT_VIOLATION,	 48)		\
-	_ER(EPT_MISCONFIG,	 49)		\
-	_ER(INVEPT,		 50)		\
-	_ER(PREEMPTION_TIMER,	 52)		\
-	_ER(WBINVD,		 54)		\
-	_ER(XSETBV,		 55)		\
-	_ER(APIC_WRITE,		 56)		\
-	_ER(INVPCID,		 58)		\
-	_ER(PML_FULL,		 62)		\
-	_ER(XSAVES,		 63)		\
-	_ER(XRSTORS,		 64)
-
-#define SVM_EXIT_REASONS \
-	_ER(EXIT_READ_CR0,	0x000)		\
-	_ER(EXIT_READ_CR3,	0x003)		\
-	_ER(EXIT_READ_CR4,	0x004)		\
-	_ER(EXIT_READ_CR8,	0x008)		\
-	_ER(EXIT_WRITE_CR0,	0x010)		\
-	_ER(EXIT_WRITE_CR3,	0x013)		\
-	_ER(EXIT_WRITE_CR4,	0x014)		\
-	_ER(EXIT_WRITE_CR8,	0x018)		\
-	_ER(EXIT_READ_DR0,	0x020)		\
-	_ER(EXIT_READ_DR1,	0x021)		\
-	_ER(EXIT_READ_DR2,	0x022)		\
-	_ER(EXIT_READ_DR3,	0x023)		\
-	_ER(EXIT_READ_DR4,	0x024)		\
-	_ER(EXIT_READ_DR5,	0x025)		\
-	_ER(EXIT_READ_DR6,	0x026)		\
-	_ER(EXIT_READ_DR7,	0x027)		\
-	_ER(EXIT_WRITE_DR0,	0x030)		\
-	_ER(EXIT_WRITE_DR1,	0x031)		\
-	_ER(EXIT_WRITE_DR2,	0x032)		\
-	_ER(EXIT_WRITE_DR3,	0x033)		\
-	_ER(EXIT_WRITE_DR4,	0x034)		\
-	_ER(EXIT_WRITE_DR5,	0x035)		\
-	_ER(EXIT_WRITE_DR6,	0x036)		\
-	_ER(EXIT_WRITE_DR7,	0x037)		\
-	_ER(EXIT_EXCP_DE,	0x040)		\
-	_ER(EXIT_EXCP_DB,	0x041)		\
-	_ER(EXIT_EXCP_BP,	0x043)		\
-	_ER(EXIT_EXCP_OF,	0x044)		\
-	_ER(EXIT_EXCP_BR,	0x045)		\
-	_ER(EXIT_EXCP_UD,	0x046)		\
-	_ER(EXIT_EXCP_NM,	0x047)		\
-	_ER(EXIT_EXCP_DF,	0x048)		\
-	_ER(EXIT_EXCP_TS,	0x04a)		\
-	_ER(EXIT_EXCP_NP,	0x04b)		\
-	_ER(EXIT_EXCP_SS,	0x04c)		\
-	_ER(EXIT_EXCP_GP,	0x04d)		\
-	_ER(EXIT_EXCP_PF,	0x04e)		\
-	_ER(EXIT_EXCP_MF,	0x050)		\
-	_ER(EXIT_EXCP_AC,	0x051)		\
-	_ER(EXIT_EXCP_MC,	0x052)		\
-	_ER(EXIT_EXCP_XF,	0x053)		\
-	_ER(EXIT_INTR,		0x060)		\
-	_ER(EXIT_NMI,		0x061)		\
-	_ER(EXIT_SMI,		0x062)		\
-	_ER(EXIT_INIT,		0x063)		\
-	_ER(EXIT_VINTR,		0x064)		\
-	_ER(EXIT_CR0_SEL_WRITE,	0x065)		\
-	_ER(EXIT_IDTR_READ,	0x066)		\
-	_ER(EXIT_GDTR_READ,	0x067)		\
-	_ER(EXIT_LDTR_READ,	0x068)		\
-	_ER(EXIT_TR_READ,	0x069)		\
-	_ER(EXIT_IDTR_WRITE,	0x06a)		\
-	_ER(EXIT_GDTR_WRITE,	0x06b)		\
-	_ER(EXIT_LDTR_WRITE,	0x06c)		\
-	_ER(EXIT_TR_WRITE,	0x06d)		\
-	_ER(EXIT_RDTSC,		0x06e)		\
-	_ER(EXIT_RDPMC,		0x06f)		\
-	_ER(EXIT_PUSHF,		0x070)		\
-	_ER(EXIT_POPF,		0x071)		\
-	_ER(EXIT_CPUID,		0x072)		\
-	_ER(EXIT_RSM,		0x073)		\
-	_ER(EXIT_IRET,		0x074)		\
-	_ER(EXIT_SWINT,		0x075)		\
-	_ER(EXIT_INVD,		0x076)		\
-	_ER(EXIT_PAUSE,		0x077)		\
-	_ER(EXIT_HLT,		0x078)		\
-	_ER(EXIT_INVLPG,	0x079)		\
-	_ER(EXIT_INVLPGA,	0x07a)		\
-	_ER(EXIT_IOIO,		0x07b)		\
-	_ER(EXIT_MSR,		0x07c)		\
-	_ER(EXIT_TASK_SWITCH,	0x07d)		\
-	_ER(EXIT_FERR_FREEZE,	0x07e)		\
-	_ER(EXIT_SHUTDOWN,	0x07f)		\
-	_ER(EXIT_VMRUN,		0x080)		\
-	_ER(EXIT_VMMCALL,	0x081)		\
-	_ER(EXIT_VMLOAD,	0x082)		\
-	_ER(EXIT_VMSAVE,	0x083)		\
-	_ER(EXIT_STGI,		0x084)		\
-	_ER(EXIT_CLGI,		0x085)		\
-	_ER(EXIT_SKINIT,	0x086)		\
-	_ER(EXIT_RDTSCP,	0x087)		\
-	_ER(EXIT_ICEBP,		0x088)		\
-	_ER(EXIT_WBINVD,	0x089)		\
-	_ER(EXIT_MONITOR,	0x08a)		\
-	_ER(EXIT_MWAIT,		0x08b)		\
-	_ER(EXIT_MWAIT_COND,	0x08c)		\
-	_ER(EXIT_XSETBV,	0x08d)		\
-	_ER(EXIT_NPF, 		0x400)		\
-	_ER(EXIT_AVIC_INCOMPLETE_IPI,		0x401)	\
-	_ER(EXIT_AVIC_UNACCELERATED_ACCESS,	0x402)	\
-	_ER(EXIT_ERR,		-1)
-
-#define _ER(reason, val)	{ #reason, val },
-struct str_values {
-	const char	*str;
-	int		val;
-};
-
-static struct str_values vmx_exit_reasons[] = {
-	VMX_EXIT_REASONS
-	{ NULL, -1}
-};
-
-static struct str_values svm_exit_reasons[] = {
-	SVM_EXIT_REASONS
-	{ NULL, -1}
-};
-
-static struct isa_exit_reasons {
-	unsigned isa;
-	struct str_values *strings;
-} isa_exit_reasons[] = {
-	{ .isa = 1, .strings = vmx_exit_reasons },
-	{ .isa = 2, .strings = svm_exit_reasons },
-	{ }
-};
-
-static const char *find_exit_reason(unsigned isa, int val)
-{
-	struct str_values *strings = NULL;
-	int i;
-
-	for (i = 0; isa_exit_reasons[i].strings; ++i)
-		if (isa_exit_reasons[i].isa == isa) {
-			strings = isa_exit_reasons[i].strings;
-			break;
-		}
-	if (!strings)
-		return "UNKNOWN-ISA";
-	for (i = 0; strings[i].str; i++)
-		if (strings[i].val == val)
-			break;
-
-	return strings[i].str;
-}
-
-static int print_exit_reason(struct trace_seq *s, struct tep_record *record,
-			     struct tep_event *event, const char *field)
-{
-	unsigned long long isa;
-	unsigned long long val;
-	const char *reason;
-
-	if (tep_get_field_val(s, event, field, record, &val, 1) < 0)
-		return -1;
-
-	if (tep_get_field_val(s, event, "isa", record, &isa, 0) < 0)
-		isa = 1;
-
-	reason = find_exit_reason(isa, val);
-	if (reason)
-		trace_seq_printf(s, "reason %s", reason);
-	else
-		trace_seq_printf(s, "reason UNKNOWN (%llu)", val);
-	return 0;
-}
-
-static int kvm_exit_handler(struct trace_seq *s, struct tep_record *record,
-			    struct tep_event *event, void *context)
-{
-	unsigned long long info1 = 0, info2 = 0;
-
-	if (print_exit_reason(s, record, event, "exit_reason") < 0)
-		return -1;
-
-	tep_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
-
-	if (tep_get_field_val(s, event, "info1", record, &info1, 0) >= 0
-	    && tep_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
-		trace_seq_printf(s, " info %llx %llx", info1, info2);
-
-	return 0;
-}
-
-#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
-#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
-#define KVM_EMUL_INSN_F_CS_D   (1 << 2)
-#define KVM_EMUL_INSN_F_CS_L   (1 << 3)
-
-static int kvm_emulate_insn_handler(struct trace_seq *s,
-				    struct tep_record *record,
-				    struct tep_event *event, void *context)
-{
-	unsigned long long rip, csbase, len, flags, failed;
-	int llen;
-	uint8_t *insn;
-	const char *disasm;
-
-	if (tep_get_field_val(s, event, "rip", record, &rip, 1) < 0)
-		return -1;
-
-	if (tep_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
-		return -1;
-
-	if (tep_get_field_val(s, event, "len", record, &len, 1) < 0)
-		return -1;
-
-	if (tep_get_field_val(s, event, "flags", record, &flags, 1) < 0)
-		return -1;
-
-	if (tep_get_field_val(s, event, "failed", record, &failed, 1) < 0)
-		return -1;
-
-	insn = tep_get_field_raw(s, event, "insn", record, &llen, 1);
-	if (!insn)
-		return -1;
-
-	disasm = disassemble(insn, len, rip,
-			     flags & KVM_EMUL_INSN_F_CR0_PE,
-			     flags & KVM_EMUL_INSN_F_EFL_VM,
-			     flags & KVM_EMUL_INSN_F_CS_D,
-			     flags & KVM_EMUL_INSN_F_CS_L);
-
-	trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
-			 failed ? " FAIL" : "");
-	return 0;
-}
-
-
-static int kvm_nested_vmexit_inject_handler(struct trace_seq *s, struct tep_record *record,
-					    struct tep_event *event, void *context)
-{
-	if (print_exit_reason(s, record, event, "exit_code") < 0)
-		return -1;
-
-	tep_print_num_field(s, " info1 %llx", event, "exit_info1", record, 1);
-	tep_print_num_field(s, " info2 %llx", event, "exit_info2", record, 1);
-	tep_print_num_field(s, " int_info %llx", event, "exit_int_info", record, 1);
-	tep_print_num_field(s, " int_info_err %llx", event, "exit_int_info_err", record, 1);
-
-	return 0;
-}
-
-static int kvm_nested_vmexit_handler(struct trace_seq *s, struct tep_record *record,
-				     struct tep_event *event, void *context)
-{
-	tep_print_num_field(s, "rip %llx ", event, "rip", record, 1);
-
-	return kvm_nested_vmexit_inject_handler(s, record, event, context);
-}
-
-union kvm_mmu_page_role {
-	unsigned word;
-	struct {
-		unsigned level:4;
-		unsigned cr4_pae:1;
-		unsigned quadrant:2;
-		unsigned direct:1;
-		unsigned access:3;
-		unsigned invalid:1;
-		unsigned efer_nx:1;
-		unsigned cr0_wp:1;
-		unsigned smep_and_not_wp:1;
-		unsigned smap_and_not_wp:1;
-		unsigned pad_for_nice_hex_output:8;
-		unsigned smm:8;
-	};
-};
-
-static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record,
-			      struct tep_event *event, void *context)
-{
-	unsigned long long val;
-	static const char *access_str[] = {
-		"---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
-	};
-	union kvm_mmu_page_role role;
-
-	if (tep_get_field_val(s, event, "role", record, &val, 1) < 0)
-		return -1;
-
-	role.word = (int)val;
-
-	/*
-	 * We can only use the structure if file is of the same
-	 * endianness.
-	 */
-	if (tep_is_file_bigendian(event->tep) ==
-	    tep_is_local_bigendian(event->tep)) {
-
-		trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s",
-				 role.level,
-				 role.quadrant,
-				 role.direct ? " direct" : "",
-				 access_str[role.access],
-				 role.invalid ? " invalid" : "",
-				 role.cr4_pae ? "" : "!",
-				 role.efer_nx ? "" : "!",
-				 role.cr0_wp ? "" : "!",
-				 role.smep_and_not_wp ? " smep" : "",
-				 role.smap_and_not_wp ? " smap" : "",
-				 role.smm ? " smm" : "");
-	} else
-		trace_seq_printf(s, "WORD: %08x", role.word);
-
-	tep_print_num_field(s, " root %u ",  event,
-			    "root_count", record, 1);
-
-	if (tep_get_field_val(s, event, "unsync", record, &val, 1) < 0)
-		return -1;
-
-	trace_seq_printf(s, "%s%c",  val ? "unsync" : "sync", 0);
-	return 0;
-}
-
-static int kvm_mmu_get_page_handler(struct trace_seq *s,
-				    struct tep_record *record,
-				    struct tep_event *event, void *context)
-{
-	unsigned long long val;
-
-	if (tep_get_field_val(s, event, "created", record, &val, 1) < 0)
-		return -1;
-
-	trace_seq_printf(s, "%s ", val ? "new" : "existing");
-
-	if (tep_get_field_val(s, event, "gfn", record, &val, 1) < 0)
-		return -1;
-
-	trace_seq_printf(s, "sp gfn %llx ", val);
-	return kvm_mmu_print_role(s, record, event, context);
-}
-
-#define PT_WRITABLE_SHIFT 1
-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
-
-static unsigned long long
-process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
-{
-	unsigned long pte = args[0];
-	return pte & PT_WRITABLE_MASK;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	init_disassembler();
-
-	tep_register_event_handler(tep, -1, "kvm", "kvm_exit",
-				   kvm_exit_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
-				   kvm_emulate_insn_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
-				   kvm_nested_vmexit_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
-				   kvm_nested_vmexit_inject_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
-				   kvm_mmu_get_page_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
-				   kvm_mmu_print_role, NULL);
-
-	tep_register_event_handler(tep, -1,
-				   "kvmmmu", "kvm_mmu_unsync_page",
-				   kvm_mmu_print_role, NULL);
-
-	tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
-				   kvm_mmu_print_role, NULL);
-
-	tep_register_event_handler(tep, -1, "kvmmmu",
-			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
-			NULL);
-
-	tep_register_print_function(tep,
-				    process_is_writable_pte,
-				    TEP_FUNC_ARG_INT,
-				    "is_writable_pte",
-				    TEP_FUNC_ARG_LONG,
-				    TEP_FUNC_ARG_VOID);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit",
-				     kvm_exit_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn",
-				     kvm_emulate_insn_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit",
-				     kvm_nested_vmexit_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject",
-				     kvm_nested_vmexit_inject_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page",
-				     kvm_mmu_get_page_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page",
-				     kvm_mmu_print_role, NULL);
-
-	tep_unregister_event_handler(tep, -1,
-				     "kvmmmu", "kvm_mmu_unsync_page",
-				     kvm_mmu_print_role, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page",
-				     kvm_mmu_print_role, NULL);
-
-	tep_unregister_event_handler(tep, -1, "kvmmmu",
-			"kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
-			NULL);
-
-	tep_unregister_print_function(tep, process_is_writable_pte,
-				      "is_writable_pte");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_mac80211.c b/tools/lib/traceevent/plugins/plugin_mac80211.c
deleted file mode 100644
index f48071e3cfb83..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_mac80211.c
+++ /dev/null
@@ -1,88 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define INDENT 65
-
-static void print_string(struct trace_seq *s, struct tep_event *event,
-			 const char *name, const void *data)
-{
-	struct tep_format_field *f = tep_find_field(event, name);
-	int offset;
-	int length;
-
-	if (!f) {
-		trace_seq_printf(s, "NOTFOUND:%s", name);
-		return;
-	}
-
-	offset = f->offset;
-	length = f->size;
-
-	if (!strncmp(f->type, "__data_loc", 10)) {
-		unsigned long long v;
-		if (tep_read_number_field(f, data, &v)) {
-			trace_seq_printf(s, "invalid_data_loc");
-			return;
-		}
-		offset = v & 0xffff;
-		length = v >> 16;
-	}
-
-	trace_seq_printf(s, "%.*s", length, (char *)data + offset);
-}
-
-#define SF(fn)	tep_print_num_field(s, fn ":%d", event, fn, record, 0)
-#define SFX(fn)	tep_print_num_field(s, fn ":%#x", event, fn, record, 0)
-#define SP()	trace_seq_putc(s, ' ')
-
-static int drv_bss_info_changed(struct trace_seq *s,
-				struct tep_record *record,
-				struct tep_event *event, void *context)
-{
-	void *data = record->data;
-
-	print_string(s, event, "wiphy_name", data);
-	trace_seq_printf(s, " vif:");
-	print_string(s, event, "vif_name", data);
-	tep_print_num_field(s, "(%d)", event, "vif_type", record, 1);
-
-	trace_seq_printf(s, "\n%*s", INDENT, "");
-	SF("assoc"); SP();
-	SF("aid"); SP();
-	SF("cts"); SP();
-	SF("shortpre"); SP();
-	SF("shortslot"); SP();
-	SF("dtimper"); SP();
-	trace_seq_printf(s, "\n%*s", INDENT, "");
-	SF("bcnint"); SP();
-	SFX("assoc_cap"); SP();
-	SFX("basic_rates"); SP();
-	SF("enable_beacon");
-	trace_seq_printf(s, "\n%*s", INDENT, "");
-	SF("ht_operation_mode");
-
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "mac80211",
-				   "drv_bss_info_changed",
-				   drv_bss_info_changed, NULL);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1, "mac80211",
-				     "drv_bss_info_changed",
-				     drv_bss_info_changed, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_sched_switch.c b/tools/lib/traceevent/plugins/plugin_sched_switch.c
deleted file mode 100644
index e12fa103820ab..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_sched_switch.c
+++ /dev/null
@@ -1,146 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-#include "trace-seq.h"
-
-static void write_state(struct trace_seq *s, int val)
-{
-	const char states[] = "SDTtZXxW";
-	int found = 0;
-	int i;
-
-	for (i = 0; i < (sizeof(states) - 1); i++) {
-		if (!(val & (1 << i)))
-			continue;
-
-		if (found)
-			trace_seq_putc(s, '|');
-
-		found = 1;
-		trace_seq_putc(s, states[i]);
-	}
-
-	if (!found)
-		trace_seq_putc(s, 'R');
-}
-
-static void write_and_save_comm(struct tep_format_field *field,
-				struct tep_record *record,
-				struct trace_seq *s, int pid)
-{
-	const char *comm;
-	int len;
-
-	comm = (char *)(record->data + field->offset);
-	len = s->len;
-	trace_seq_printf(s, "%.*s",
-			 field->size, comm);
-
-	/* make sure the comm has a \0 at the end. */
-	trace_seq_terminate(s);
-	comm = &s->buffer[len];
-
-	/* Help out the comm to ids. This will handle dups */
-	tep_register_comm(field->event->tep, comm, pid);
-}
-
-static int sched_wakeup_handler(struct trace_seq *s,
-				struct tep_record *record,
-				struct tep_event *event, void *context)
-{
-	struct tep_format_field *field;
-	unsigned long long val;
-
-	if (tep_get_field_val(s, event, "pid", record, &val, 1))
-		return trace_seq_putc(s, '!');
-
-	field = tep_find_any_field(event, "comm");
-	if (field) {
-		write_and_save_comm(field, record, s, val);
-		trace_seq_putc(s, ':');
-	}
-	trace_seq_printf(s, "%lld", val);
-
-	if (tep_get_field_val(s, event, "prio", record, &val, 0) == 0)
-		trace_seq_printf(s, " [%lld]", val);
-
-	if (tep_get_field_val(s, event, "success", record, &val, 1) == 0)
-		trace_seq_printf(s, " success=%lld", val);
-
-	if (tep_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
-		trace_seq_printf(s, " CPU:%03llu", val);
-
-	return 0;
-}
-
-static int sched_switch_handler(struct trace_seq *s,
-				struct tep_record *record,
-				struct tep_event *event, void *context)
-{
-	struct tep_format_field *field;
-	unsigned long long val;
-
-	if (tep_get_field_val(s, event, "prev_pid", record, &val, 1))
-		return trace_seq_putc(s, '!');
-
-	field = tep_find_any_field(event, "prev_comm");
-	if (field) {
-		write_and_save_comm(field, record, s, val);
-		trace_seq_putc(s, ':');
-	}
-	trace_seq_printf(s, "%lld ", val);
-
-	if (tep_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, "[%d] ", (int) val);
-
-	if (tep_get_field_val(s,  event, "prev_state", record, &val, 0) == 0)
-		write_state(s, val);
-
-	trace_seq_puts(s, " ==> ");
-
-	if (tep_get_field_val(s, event, "next_pid", record, &val, 1))
-		return trace_seq_putc(s, '!');
-
-	field = tep_find_any_field(event, "next_comm");
-	if (field) {
-		write_and_save_comm(field, record, s, val);
-		trace_seq_putc(s, ':');
-	}
-	trace_seq_printf(s, "%lld", val);
-
-	if (tep_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
-		trace_seq_printf(s, " [%d]", (int) val);
-
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "sched", "sched_switch",
-				   sched_switch_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "sched", "sched_wakeup",
-				   sched_wakeup_handler, NULL);
-
-	tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new",
-				   sched_wakeup_handler, NULL);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1, "sched", "sched_switch",
-				     sched_switch_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup",
-				     sched_wakeup_handler, NULL);
-
-	tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new",
-				     sched_wakeup_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_scsi.c b/tools/lib/traceevent/plugins/plugin_scsi.c
deleted file mode 100644
index 5d0387a4b65a0..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_scsi.c
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <string.h>
-#include <inttypes.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-typedef unsigned long sector_t;
-typedef uint64_t u64;
-typedef unsigned int u32;
-
-/*
- *      SCSI opcodes
- */
-#define TEST_UNIT_READY			0x00
-#define REZERO_UNIT			0x01
-#define REQUEST_SENSE			0x03
-#define FORMAT_UNIT			0x04
-#define READ_BLOCK_LIMITS		0x05
-#define REASSIGN_BLOCKS			0x07
-#define INITIALIZE_ELEMENT_STATUS	0x07
-#define READ_6				0x08
-#define WRITE_6				0x0a
-#define SEEK_6				0x0b
-#define READ_REVERSE			0x0f
-#define WRITE_FILEMARKS			0x10
-#define SPACE				0x11
-#define INQUIRY				0x12
-#define RECOVER_BUFFERED_DATA		0x14
-#define MODE_SELECT			0x15
-#define RESERVE				0x16
-#define RELEASE				0x17
-#define COPY				0x18
-#define ERASE				0x19
-#define MODE_SENSE			0x1a
-#define START_STOP			0x1b
-#define RECEIVE_DIAGNOSTIC		0x1c
-#define SEND_DIAGNOSTIC			0x1d
-#define ALLOW_MEDIUM_REMOVAL		0x1e
-
-#define READ_FORMAT_CAPACITIES		0x23
-#define SET_WINDOW			0x24
-#define READ_CAPACITY			0x25
-#define READ_10				0x28
-#define WRITE_10			0x2a
-#define SEEK_10				0x2b
-#define POSITION_TO_ELEMENT		0x2b
-#define WRITE_VERIFY			0x2e
-#define VERIFY				0x2f
-#define SEARCH_HIGH			0x30
-#define SEARCH_EQUAL			0x31
-#define SEARCH_LOW			0x32
-#define SET_LIMITS			0x33
-#define PRE_FETCH			0x34
-#define READ_POSITION			0x34
-#define SYNCHRONIZE_CACHE		0x35
-#define LOCK_UNLOCK_CACHE		0x36
-#define READ_DEFECT_DATA		0x37
-#define MEDIUM_SCAN			0x38
-#define COMPARE				0x39
-#define COPY_VERIFY			0x3a
-#define WRITE_BUFFER			0x3b
-#define READ_BUFFER			0x3c
-#define UPDATE_BLOCK			0x3d
-#define READ_LONG			0x3e
-#define WRITE_LONG			0x3f
-#define CHANGE_DEFINITION		0x40
-#define WRITE_SAME			0x41
-#define UNMAP				0x42
-#define READ_TOC			0x43
-#define READ_HEADER			0x44
-#define GET_EVENT_STATUS_NOTIFICATION	0x4a
-#define LOG_SELECT			0x4c
-#define LOG_SENSE			0x4d
-#define XDWRITEREAD_10			0x53
-#define MODE_SELECT_10			0x55
-#define RESERVE_10			0x56
-#define RELEASE_10			0x57
-#define MODE_SENSE_10			0x5a
-#define PERSISTENT_RESERVE_IN		0x5e
-#define PERSISTENT_RESERVE_OUT		0x5f
-#define VARIABLE_LENGTH_CMD		0x7f
-#define REPORT_LUNS			0xa0
-#define SECURITY_PROTOCOL_IN		0xa2
-#define MAINTENANCE_IN			0xa3
-#define MAINTENANCE_OUT			0xa4
-#define MOVE_MEDIUM			0xa5
-#define EXCHANGE_MEDIUM			0xa6
-#define READ_12				0xa8
-#define SERVICE_ACTION_OUT_12		0xa9
-#define WRITE_12			0xaa
-#define SERVICE_ACTION_IN_12		0xab
-#define WRITE_VERIFY_12			0xae
-#define VERIFY_12			0xaf
-#define SEARCH_HIGH_12			0xb0
-#define SEARCH_EQUAL_12			0xb1
-#define SEARCH_LOW_12			0xb2
-#define SECURITY_PROTOCOL_OUT		0xb5
-#define READ_ELEMENT_STATUS		0xb8
-#define SEND_VOLUME_TAG			0xb6
-#define WRITE_LONG_2			0xea
-#define EXTENDED_COPY			0x83
-#define RECEIVE_COPY_RESULTS		0x84
-#define ACCESS_CONTROL_IN		0x86
-#define ACCESS_CONTROL_OUT		0x87
-#define READ_16				0x88
-#define WRITE_16			0x8a
-#define READ_ATTRIBUTE			0x8c
-#define WRITE_ATTRIBUTE			0x8d
-#define VERIFY_16			0x8f
-#define SYNCHRONIZE_CACHE_16		0x91
-#define WRITE_SAME_16			0x93
-#define SERVICE_ACTION_BIDIRECTIONAL	0x9d
-#define SERVICE_ACTION_IN_16		0x9e
-#define SERVICE_ACTION_OUT_16		0x9f
-/* values for service action in */
-#define	SAI_READ_CAPACITY_16		0x10
-#define SAI_GET_LBA_STATUS		0x12
-/* values for VARIABLE_LENGTH_CMD service action codes
- * see spc4r17 Section D.3.5, table D.7 and D.8 */
-#define VLC_SA_RECEIVE_CREDENTIAL	0x1800
-/* values for maintenance in */
-#define MI_REPORT_IDENTIFYING_INFORMATION		0x05
-#define MI_REPORT_TARGET_PGS				0x0a
-#define MI_REPORT_ALIASES				0x0b
-#define MI_REPORT_SUPPORTED_OPERATION_CODES		0x0c
-#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS	0x0d
-#define MI_REPORT_PRIORITY				0x0e
-#define MI_REPORT_TIMESTAMP				0x0f
-#define MI_MANAGEMENT_PROTOCOL_IN			0x10
-/* value for MI_REPORT_TARGET_PGS ext header */
-#define MI_EXT_HDR_PARAM_FMT		0x20
-/* values for maintenance out */
-#define MO_SET_IDENTIFYING_INFORMATION	0x06
-#define MO_SET_TARGET_PGS		0x0a
-#define MO_CHANGE_ALIASES		0x0b
-#define MO_SET_PRIORITY			0x0e
-#define MO_SET_TIMESTAMP		0x0f
-#define MO_MANAGEMENT_PROTOCOL_OUT	0x10
-/* values for variable length command */
-#define XDREAD_32			0x03
-#define XDWRITE_32			0x04
-#define XPWRITE_32			0x06
-#define XDWRITEREAD_32			0x07
-#define READ_32				0x09
-#define VERIFY_32			0x0a
-#define WRITE_32			0x0b
-#define WRITE_SAME_32			0x0d
-
-#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
-#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
-
-static const char *
-scsi_trace_misc(struct trace_seq *, unsigned char *, int);
-
-static const char *
-scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-	sector_t lba = 0, txlen = 0;
-
-	lba |= ((cdb[1] & 0x1F) << 16);
-	lba |=  (cdb[2] << 8);
-	lba |=   cdb[3];
-	txlen = cdb[4];
-
-	trace_seq_printf(p, "lba=%llu txlen=%llu",
-			 (unsigned long long)lba, (unsigned long long)txlen);
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-	sector_t lba = 0, txlen = 0;
-
-	lba |= (cdb[2] << 24);
-	lba |= (cdb[3] << 16);
-	lba |= (cdb[4] << 8);
-	lba |=  cdb[5];
-	txlen |= (cdb[7] << 8);
-	txlen |=  cdb[8];
-
-	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-			 (unsigned long long)lba, (unsigned long long)txlen,
-			 cdb[1] >> 5);
-
-	if (cdb[0] == WRITE_SAME)
-		trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-	sector_t lba = 0, txlen = 0;
-
-	lba |= (cdb[2] << 24);
-	lba |= (cdb[3] << 16);
-	lba |= (cdb[4] << 8);
-	lba |=  cdb[5];
-	txlen |= (cdb[6] << 24);
-	txlen |= (cdb[7] << 16);
-	txlen |= (cdb[8] << 8);
-	txlen |=  cdb[9];
-
-	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-			 (unsigned long long)lba, (unsigned long long)txlen,
-			 cdb[1] >> 5);
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-	sector_t lba = 0, txlen = 0;
-
-	lba |= ((u64)cdb[2] << 56);
-	lba |= ((u64)cdb[3] << 48);
-	lba |= ((u64)cdb[4] << 40);
-	lba |= ((u64)cdb[5] << 32);
-	lba |= (cdb[6] << 24);
-	lba |= (cdb[7] << 16);
-	lba |= (cdb[8] << 8);
-	lba |=  cdb[9];
-	txlen |= (cdb[10] << 24);
-	txlen |= (cdb[11] << 16);
-	txlen |= (cdb[12] << 8);
-	txlen |=  cdb[13];
-
-	trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
-			 (unsigned long long)lba, (unsigned long long)txlen,
-			 cdb[1] >> 5);
-
-	if (cdb[0] == WRITE_SAME_16)
-		trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
-
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len, *cmd;
-	sector_t lba = 0, txlen = 0;
-	u32 ei_lbrt = 0;
-
-	switch (SERVICE_ACTION32(cdb)) {
-	case READ_32:
-		cmd = "READ";
-		break;
-	case VERIFY_32:
-		cmd = "VERIFY";
-		break;
-	case WRITE_32:
-		cmd = "WRITE";
-		break;
-	case WRITE_SAME_32:
-		cmd = "WRITE_SAME";
-		break;
-	default:
-		trace_seq_printf(p, "UNKNOWN");
-		goto out;
-	}
-
-	lba |= ((u64)cdb[12] << 56);
-	lba |= ((u64)cdb[13] << 48);
-	lba |= ((u64)cdb[14] << 40);
-	lba |= ((u64)cdb[15] << 32);
-	lba |= (cdb[16] << 24);
-	lba |= (cdb[17] << 16);
-	lba |= (cdb[18] << 8);
-	lba |=  cdb[19];
-	ei_lbrt |= (cdb[20] << 24);
-	ei_lbrt |= (cdb[21] << 16);
-	ei_lbrt |= (cdb[22] << 8);
-	ei_lbrt |=  cdb[23];
-	txlen |= (cdb[28] << 24);
-	txlen |= (cdb[29] << 16);
-	txlen |= (cdb[30] << 8);
-	txlen |=  cdb[31];
-
-	trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
-			 cmd, (unsigned long long)lba,
-			 (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
-
-	if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
-		trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
-
-out:
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-	unsigned int regions = cdb[7] << 8 | cdb[8];
-
-	trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len, *cmd;
-	sector_t lba = 0;
-	u32 alloc_len = 0;
-
-	switch (SERVICE_ACTION16(cdb)) {
-	case SAI_READ_CAPACITY_16:
-		cmd = "READ_CAPACITY_16";
-		break;
-	case SAI_GET_LBA_STATUS:
-		cmd = "GET_LBA_STATUS";
-		break;
-	default:
-		trace_seq_printf(p, "UNKNOWN");
-		goto out;
-	}
-
-	lba |= ((u64)cdb[2] << 56);
-	lba |= ((u64)cdb[3] << 48);
-	lba |= ((u64)cdb[4] << 40);
-	lba |= ((u64)cdb[5] << 32);
-	lba |= (cdb[6] << 24);
-	lba |= (cdb[7] << 16);
-	lba |= (cdb[8] << 8);
-	lba |=  cdb[9];
-	alloc_len |= (cdb[10] << 24);
-	alloc_len |= (cdb[11] << 16);
-	alloc_len |= (cdb[12] << 8);
-	alloc_len |=  cdb[13];
-
-	trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
-			 (unsigned long long)lba, alloc_len);
-
-out:
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-static const char *
-scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	switch (SERVICE_ACTION32(cdb)) {
-	case READ_32:
-	case VERIFY_32:
-	case WRITE_32:
-	case WRITE_SAME_32:
-		return scsi_trace_rw32(p, cdb, len);
-	default:
-		return scsi_trace_misc(p, cdb, len);
-	}
-}
-
-static const char *
-scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	const char *ret = p->buffer + p->len;
-
-	trace_seq_printf(p, "-");
-	trace_seq_putc(p, 0);
-	return ret;
-}
-
-const char *
-scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
-{
-	switch (cdb[0]) {
-	case READ_6:
-	case WRITE_6:
-		return scsi_trace_rw6(p, cdb, len);
-	case READ_10:
-	case VERIFY:
-	case WRITE_10:
-	case WRITE_SAME:
-		return scsi_trace_rw10(p, cdb, len);
-	case READ_12:
-	case VERIFY_12:
-	case WRITE_12:
-		return scsi_trace_rw12(p, cdb, len);
-	case READ_16:
-	case VERIFY_16:
-	case WRITE_16:
-	case WRITE_SAME_16:
-		return scsi_trace_rw16(p, cdb, len);
-	case UNMAP:
-		return scsi_trace_unmap(p, cdb, len);
-	case SERVICE_ACTION_IN_16:
-		return scsi_trace_service_action_in(p, cdb, len);
-	case VARIABLE_LENGTH_CMD:
-		return scsi_trace_varlen(p, cdb, len);
-	default:
-		return scsi_trace_misc(p, cdb, len);
-	}
-}
-
-unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
-						unsigned long long *args)
-{
-	scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_print_function(tep,
-				    process_scsi_trace_parse_cdb,
-				    TEP_FUNC_ARG_STRING,
-				    "scsi_trace_parse_cdb",
-				    TEP_FUNC_ARG_PTR,
-				    TEP_FUNC_ARG_PTR,
-				    TEP_FUNC_ARG_INT,
-				    TEP_FUNC_ARG_VOID);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_print_function(tep, process_scsi_trace_parse_cdb,
-				      "scsi_trace_parse_cdb");
-}
diff --git a/tools/lib/traceevent/plugins/plugin_tlb.c b/tools/lib/traceevent/plugins/plugin_tlb.c
deleted file mode 100644
index 43657fb60504f..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_tlb.c
+++ /dev/null
@@ -1,66 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2015 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "event-parse.h"
-
-enum tlb_flush_reason {
-	TLB_FLUSH_ON_TASK_SWITCH,
-	TLB_REMOTE_SHOOTDOWN,
-	TLB_LOCAL_SHOOTDOWN,
-	TLB_LOCAL_MM_SHOOTDOWN,
-	NR_TLB_FLUSH_REASONS,
-};
-
-static int tlb_flush_handler(struct trace_seq *s, struct tep_record *record,
-			     struct tep_event *event, void *context)
-{
-	unsigned long long val;
-
-	trace_seq_printf(s, "pages=");
-
-	tep_print_num_field(s, "%ld", event, "pages", record, 1);
-
-	if (tep_get_field_val(s, event, "reason", record, &val, 1) < 0)
-		return -1;
-
-	trace_seq_puts(s, " reason=");
-
-	switch (val) {
-	case TLB_FLUSH_ON_TASK_SWITCH:
-		trace_seq_puts(s, "flush on task switch");
-		break;
-	case TLB_REMOTE_SHOOTDOWN:
-		trace_seq_puts(s, "remote shootdown");
-		break;
-	case TLB_LOCAL_SHOOTDOWN:
-		trace_seq_puts(s, "local shootdown");
-		break;
-	case TLB_LOCAL_MM_SHOOTDOWN:
-		trace_seq_puts(s, "local mm shootdown");
-		break;
-	}
-
-	trace_seq_printf(s, " (%lld)", val);
-
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_event_handler(tep, -1, "tlb", "tlb_flush",
-				   tlb_flush_handler, NULL);
-
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_event_handler(tep, -1,
-				     "tlb", "tlb_flush",
-				     tlb_flush_handler, NULL);
-}
diff --git a/tools/lib/traceevent/plugins/plugin_xen.c b/tools/lib/traceevent/plugins/plugin_xen.c
deleted file mode 100644
index 993b208d0323f..0000000000000
--- a/tools/lib/traceevent/plugins/plugin_xen.c
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include "event-parse.h"
-#include "trace-seq.h"
-
-#define __HYPERVISOR_set_trap_table			0
-#define __HYPERVISOR_mmu_update				1
-#define __HYPERVISOR_set_gdt				2
-#define __HYPERVISOR_stack_switch			3
-#define __HYPERVISOR_set_callbacks			4
-#define __HYPERVISOR_fpu_taskswitch			5
-#define __HYPERVISOR_sched_op_compat			6
-#define __HYPERVISOR_dom0_op				7
-#define __HYPERVISOR_set_debugreg			8
-#define __HYPERVISOR_get_debugreg			9
-#define __HYPERVISOR_update_descriptor			10
-#define __HYPERVISOR_memory_op				12
-#define __HYPERVISOR_multicall				13
-#define __HYPERVISOR_update_va_mapping			14
-#define __HYPERVISOR_set_timer_op			15
-#define __HYPERVISOR_event_channel_op_compat		16
-#define __HYPERVISOR_xen_version			17
-#define __HYPERVISOR_console_io				18
-#define __HYPERVISOR_physdev_op_compat			19
-#define __HYPERVISOR_grant_table_op			20
-#define __HYPERVISOR_vm_assist				21
-#define __HYPERVISOR_update_va_mapping_otherdomain	22
-#define __HYPERVISOR_iret				23 /* x86 only */
-#define __HYPERVISOR_vcpu_op				24
-#define __HYPERVISOR_set_segment_base			25 /* x86/64 only */
-#define __HYPERVISOR_mmuext_op				26
-#define __HYPERVISOR_acm_op				27
-#define __HYPERVISOR_nmi_op				28
-#define __HYPERVISOR_sched_op				29
-#define __HYPERVISOR_callback_op			30
-#define __HYPERVISOR_xenoprof_op			31
-#define __HYPERVISOR_event_channel_op			32
-#define __HYPERVISOR_physdev_op				33
-#define __HYPERVISOR_hvm_op				34
-#define __HYPERVISOR_tmem_op				38
-
-/* Architecture-specific hypercall definitions. */
-#define __HYPERVISOR_arch_0				48
-#define __HYPERVISOR_arch_1				49
-#define __HYPERVISOR_arch_2				50
-#define __HYPERVISOR_arch_3				51
-#define __HYPERVISOR_arch_4				52
-#define __HYPERVISOR_arch_5				53
-#define __HYPERVISOR_arch_6				54
-#define __HYPERVISOR_arch_7				55
-
-#define N(x)	[__HYPERVISOR_##x] = "("#x")"
-static const char *xen_hypercall_names[] = {
-	N(set_trap_table),
-	N(mmu_update),
-	N(set_gdt),
-	N(stack_switch),
-	N(set_callbacks),
-	N(fpu_taskswitch),
-	N(sched_op_compat),
-	N(dom0_op),
-	N(set_debugreg),
-	N(get_debugreg),
-	N(update_descriptor),
-	N(memory_op),
-	N(multicall),
-	N(update_va_mapping),
-	N(set_timer_op),
-	N(event_channel_op_compat),
-	N(xen_version),
-	N(console_io),
-	N(physdev_op_compat),
-	N(grant_table_op),
-	N(vm_assist),
-	N(update_va_mapping_otherdomain),
-	N(iret),
-	N(vcpu_op),
-	N(set_segment_base),
-	N(mmuext_op),
-	N(acm_op),
-	N(nmi_op),
-	N(sched_op),
-	N(callback_op),
-	N(xenoprof_op),
-	N(event_channel_op),
-	N(physdev_op),
-	N(hvm_op),
-
-/* Architecture-specific hypercall definitions. */
-	N(arch_0),
-	N(arch_1),
-	N(arch_2),
-	N(arch_3),
-	N(arch_4),
-	N(arch_5),
-	N(arch_6),
-	N(arch_7),
-};
-#undef N
-
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
-static const char *xen_hypercall_name(unsigned op)
-{
-	if (op < ARRAY_SIZE(xen_hypercall_names) &&
-	    xen_hypercall_names[op] != NULL)
-		return xen_hypercall_names[op];
-
-	return "";
-}
-
-unsigned long long process_xen_hypercall_name(struct trace_seq *s,
-					      unsigned long long *args)
-{
-	unsigned int op = args[0];
-
-	trace_seq_printf(s, "%s", xen_hypercall_name(op));
-	return 0;
-}
-
-int TEP_PLUGIN_LOADER(struct tep_handle *tep)
-{
-	tep_register_print_function(tep,
-				    process_xen_hypercall_name,
-				    TEP_FUNC_ARG_STRING,
-				    "xen_hypercall_name",
-				    TEP_FUNC_ARG_INT,
-				    TEP_FUNC_ARG_VOID);
-	return 0;
-}
-
-void TEP_PLUGIN_UNLOADER(struct tep_handle *tep)
-{
-	tep_unregister_print_function(tep, process_xen_hypercall_name,
-				      "xen_hypercall_name");
-}
diff --git a/tools/lib/traceevent/tep_strerror.c b/tools/lib/traceevent/tep_strerror.c
deleted file mode 100644
index 4ac26445b2f66..0000000000000
--- a/tools/lib/traceevent/tep_strerror.c
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-#undef _GNU_SOURCE
-#include <string.h>
-#include <stdio.h>
-
-#include "event-parse.h"
-
-#undef _PE
-#define _PE(code, str) str
-static const char * const tep_error_str[] = {
-	TEP_ERRORS
-};
-#undef _PE
-
-/*
- * The tools so far have been using the strerror_r() GNU variant, that returns
- * a string, be it the buffer passed or something else.
- *
- * But that, besides being tricky in cases where we expect that the function
- * using strerror_r() returns the error formatted in a provided buffer (we have
- * to check if it returned something else and copy that instead), breaks the
- * build on systems not using glibc, like Alpine Linux, where musl libc is
- * used.
- *
- * So, introduce yet another wrapper, str_error_r(), that has the GNU
- * interface, but uses the portable XSI variant of strerror_r(), so that users
- * rest asured that the provided buffer is used and it is what is returned.
- */
-int tep_strerror(struct tep_handle *tep __maybe_unused,
-		 enum tep_errno errnum, char *buf, size_t buflen)
-{
-	const char *msg;
-	int idx;
-
-	if (!buflen)
-		return 0;
-
-	if (errnum >= 0) {
-		int err = strerror_r(errnum, buf, buflen);
-		buf[buflen - 1] = 0;
-		return err;
-	}
-
-	if (errnum <= __TEP_ERRNO__START ||
-	    errnum >= __TEP_ERRNO__END)
-		return -1;
-
-	idx = errnum - __TEP_ERRNO__START - 1;
-	msg = tep_error_str[idx];
-	snprintf(buf, buflen, "%s", msg);
-
-	return 0;
-}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
deleted file mode 100644
index 8d5ecd2bf877e..0000000000000
--- a/tools/lib/traceevent/trace-seq.c
+++ /dev/null
@@ -1,249 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-#include "trace-seq.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-
-#include <asm/bug.h>
-#include "event-parse.h"
-#include "event-utils.h"
-
-/*
- * The TRACE_SEQ_POISON is to catch the use of using
- * a trace_seq structure after it was destroyed.
- */
-#define TRACE_SEQ_POISON	((void *)0xdeadbeef)
-#define TRACE_SEQ_CHECK(s)						\
-do {									\
-	if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON,			\
-		      "Usage of trace_seq after it was destroyed"))	\
-		(s)->state = TRACE_SEQ__BUFFER_POISONED;		\
-} while (0)
-
-#define TRACE_SEQ_CHECK_RET_N(s, n)		\
-do {						\
-	TRACE_SEQ_CHECK(s);			\
-	if ((s)->state != TRACE_SEQ__GOOD)	\
-		return n; 			\
-} while (0)
-
-#define TRACE_SEQ_CHECK_RET(s)   TRACE_SEQ_CHECK_RET_N(s, )
-#define TRACE_SEQ_CHECK_RET0(s)  TRACE_SEQ_CHECK_RET_N(s, 0)
-
-/**
- * trace_seq_init - initialize the trace_seq structure
- * @s: a pointer to the trace_seq structure to initialize
- */
-void trace_seq_init(struct trace_seq *s)
-{
-	s->len = 0;
-	s->readpos = 0;
-	s->buffer_size = TRACE_SEQ_BUF_SIZE;
-	s->buffer = malloc(s->buffer_size);
-	if (s->buffer != NULL)
-		s->state = TRACE_SEQ__GOOD;
-	else
-		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
-}
-
-/**
- * trace_seq_reset - re-initialize the trace_seq structure
- * @s: a pointer to the trace_seq structure to reset
- */
-void trace_seq_reset(struct trace_seq *s)
-{
-	if (!s)
-		return;
-	TRACE_SEQ_CHECK(s);
-	s->len = 0;
-	s->readpos = 0;
-}
-
-/**
- * trace_seq_destroy - free up memory of a trace_seq
- * @s: a pointer to the trace_seq to free the buffer
- *
- * Only frees the buffer, not the trace_seq struct itself.
- */
-void trace_seq_destroy(struct trace_seq *s)
-{
-	if (!s)
-		return;
-	TRACE_SEQ_CHECK_RET(s);
-	free(s->buffer);
-	s->buffer = TRACE_SEQ_POISON;
-}
-
-static void expand_buffer(struct trace_seq *s)
-{
-	char *buf;
-
-	buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE);
-	if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) {
-		s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
-		return;
-	}
-
-	s->buffer = buf;
-	s->buffer_size += TRACE_SEQ_BUF_SIZE;
-}
-
-/**
- * trace_seq_printf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, the number of characters printed, or a negative
- * value in case of an error.
- *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-{
-	va_list ap;
-	int len;
-	int ret;
-
- try_again:
-	TRACE_SEQ_CHECK_RET0(s);
-
-	len = (s->buffer_size - 1) - s->len;
-
-	va_start(ap, fmt);
-	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
-	va_end(ap);
-
-	if (ret >= len) {
-		expand_buffer(s);
-		goto try_again;
-	}
-
-	if (ret > 0)
-		s->len += ret;
-
-	return ret;
-}
-
-/**
- * trace_seq_vprintf - sequence printing of trace information
- * @s: trace sequence descriptor
- * @fmt: printf format string
- *
- * It returns 0 if the trace oversizes the buffer's free
- * space, the number of characters printed, or a negative
- * value in case of an error.
- * *
- * The tracer may use either sequence operations or its own
- * copy to user routines. To simplify formating of a trace
- * trace_seq_printf is used to store strings into a special
- * buffer (@s). Then the output may be either used by
- * the sequencer or pulled into another buffer.
- */
-int
-trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-{
-	int len;
-	int ret;
-
- try_again:
-	TRACE_SEQ_CHECK_RET0(s);
-
-	len = (s->buffer_size - 1) - s->len;
-
-	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
-
-	if (ret >= len) {
-		expand_buffer(s);
-		goto try_again;
-	}
-
-	if (ret > 0)
-		s->len += ret;
-
-	return ret;
-}
-
-/**
- * trace_seq_puts - trace sequence printing of simple string
- * @s: trace sequence descriptor
- * @str: simple string to record
- *
- * The tracer may use either the sequence operations or its own
- * copy to user routines. This function records a simple string
- * into a special buffer (@s) for later retrieval by a sequencer
- * or other mechanism.
- */
-int trace_seq_puts(struct trace_seq *s, const char *str)
-{
-	int len;
-
-	TRACE_SEQ_CHECK_RET0(s);
-
-	len = strlen(str);
-
-	while (len > ((s->buffer_size - 1) - s->len))
-		expand_buffer(s);
-
-	TRACE_SEQ_CHECK_RET0(s);
-
-	memcpy(s->buffer + s->len, str, len);
-	s->len += len;
-
-	return len;
-}
-
-int trace_seq_putc(struct trace_seq *s, unsigned char c)
-{
-	TRACE_SEQ_CHECK_RET0(s);
-
-	while (s->len >= (s->buffer_size - 1))
-		expand_buffer(s);
-
-	TRACE_SEQ_CHECK_RET0(s);
-
-	s->buffer[s->len++] = c;
-
-	return 1;
-}
-
-void trace_seq_terminate(struct trace_seq *s)
-{
-	TRACE_SEQ_CHECK_RET(s);
-
-	/* There's always one character left on the buffer */
-	s->buffer[s->len] = 0;
-}
-
-int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp)
-{
-	TRACE_SEQ_CHECK(s);
-
-	switch (s->state) {
-	case TRACE_SEQ__GOOD:
-		return fprintf(fp, "%.*s", s->len, s->buffer);
-	case TRACE_SEQ__BUFFER_POISONED:
-		fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed");
-		break;
-	case TRACE_SEQ__MEM_ALLOC_FAILED:
-		fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory");
-		break;
-	}
-	return -1;
-}
-
-int trace_seq_do_printf(struct trace_seq *s)
-{
-	return trace_seq_do_fprintf(s, stdout);
-}
diff --git a/tools/lib/traceevent/trace-seq.h b/tools/lib/traceevent/trace-seq.h
deleted file mode 100644
index d68ec69f8d1a8..0000000000000
--- a/tools/lib/traceevent/trace-seq.h
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: LGPL-2.1
-/*
- * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
- *
- */
-
-#ifndef _TRACE_SEQ_H
-#define _TRACE_SEQ_H
-
-#include <stdarg.h>
-#include <stdio.h>
-
-/* ----------------------- trace_seq ----------------------- */
-
-#ifndef TRACE_SEQ_BUF_SIZE
-#define TRACE_SEQ_BUF_SIZE 4096
-#endif
-
-enum trace_seq_fail {
-	TRACE_SEQ__GOOD,
-	TRACE_SEQ__BUFFER_POISONED,
-	TRACE_SEQ__MEM_ALLOC_FAILED,
-};
-
-/*
- * Trace sequences are used to allow a function to call several other functions
- * to create a string of data to use (up to a max of PAGE_SIZE).
- */
-
-struct trace_seq {
-	char			*buffer;
-	unsigned int		buffer_size;
-	unsigned int		len;
-	unsigned int		readpos;
-	enum trace_seq_fail	state;
-};
-
-void trace_seq_init(struct trace_seq *s);
-void trace_seq_reset(struct trace_seq *s);
-void trace_seq_destroy(struct trace_seq *s);
-
-extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
-	__attribute__ ((format (printf, 2, 3)));
-extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
-	__attribute__ ((format (printf, 2, 0)));
-
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-
-extern void trace_seq_terminate(struct trace_seq *s);
-
-extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp);
-extern int trace_seq_do_printf(struct trace_seq *s);
-
-#endif /* _TRACE_SEQ_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index c8e8e05b4ff1f..1da7f4b91b4f8 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -3,7 +3,6 @@ tools/arch
 tools/scripts
 tools/build
 tools/include
-tools/lib/traceevent
 tools/lib/api
 tools/lib/bpf
 tools/lib/subcmd
-- 
GitLab


From b9a49f8cb02f08592054b953adcacecd59b7ae4b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 8 Dec 2022 19:28:58 -0300
Subject: [PATCH 3870/4122] perf tools: Check if libtracevent has
 TEP_FIELD_IS_RELATIVE

Some distros have older versions of libtraceevent where
TEP_FIELD_IS_RELATIVE and its associated semantics are not present, so
we need to check if the version has it, it was introduced in
libtraceevent 1.5.0.

Reported-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Tested-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>,
Cc: Stephane Eranian <eranian@google.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config                             | 4 ++++
 tools/perf/builtin-trace.c                             | 2 ++
 tools/perf/util/data-convert-bt.c                      | 2 ++
 tools/perf/util/evsel.c                                | 2 ++
 tools/perf/util/python.c                               | 2 ++
 tools/perf/util/scripting-engines/trace-event-perl.c   | 2 ++
 tools/perf/util/scripting-engines/trace-event-python.c | 2 ++
 tools/perf/util/sort.c                                 | 3 ++-
 8 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 680228e19c1aa..52a4dc029111c 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -1197,6 +1197,10 @@ ifneq ($(NO_LIBTRACEEVENT),1)
     LIBTRACEEVENT_VERSION_CPP := $(shell expr $(LIBTRACEEVENT_VERSION_1) \* 255 \* 255 + $(LIBTRACEEVENT_VERSION_2) \* 255 + $(LIBTRACEEVENT_VERSION_3))
     CFLAGS += -DLIBTRACEEVENT_VERSION=$(LIBTRACEEVENT_VERSION_CPP)
     $(call detected,CONFIG_LIBTRACEEVENT)
+    LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE := $(shell expr 1 \* 255 \* 255 + 5 \* 255 + 0) # 1.5.0
+    ifeq ($(shell test $(LIBTRACEEVENT_VERSION_CPP) -gt $(LIBTRACEEVENT_VERSION_WITH_TEP_FIELD_IS_RELATIVE); echo $$?),0)
+      CFLAGS += -DHAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
+    endif
   else
     dummy := $(warning Warning: libtraceevent is missing limiting functionality, please install libtraceevent-dev/libtraceevent-devel)
   endif
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 6909cd9f48d11..c9802ab549d77 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2729,8 +2729,10 @@ static size_t trace__fprintf_tp_fields(struct trace *trace, struct evsel *evsel,
 				offset = format_field__intval(field, sample, evsel->needs_swap);
 				syscall_arg.len = offset >> 16;
 				offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 				if (field->flags & TEP_FIELD_IS_RELATIVE)
 					offset += field->offset + field->size;
+#endif
 			}
 
 			val = (uintptr_t)(sample->raw_data + offset);
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8031b586e8138..b842273458b86 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -322,8 +322,10 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		offset = tmp_val;
 		len = offset >> 16;
 		offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 		if (flags & TEP_FIELD_IS_RELATIVE)
 			offset += fmtf->offset + fmtf->size;
+#endif
 	}
 
 	if (flags & TEP_FIELD_IS_ARRAY) {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index ca911856c4b1d..77b2cf5a214ec 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2784,8 +2784,10 @@ void *evsel__rawptr(struct evsel *evsel, struct perf_sample *sample, const char
 	if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 		offset = *(int *)(sample->raw_data + field->offset);
 		offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 		if (field->flags & TEP_FIELD_IS_RELATIVE)
 			offset += field->offset + field->size;
+#endif
 	}
 
 	return sample->raw_data + offset;
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 6fb84b7455b83..7320f7f777fe0 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -442,8 +442,10 @@ tracepoint_field(struct pyrf_event *pe, struct tep_format_field *field)
 			offset  = val;
 			len     = offset >> 16;
 			offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 			if (field->flags & TEP_FIELD_IS_RELATIVE)
 				offset += field->offset + field->size;
+#endif
 		}
 		if (field->flags & TEP_FIELD_IS_STRING &&
 		    is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c
index 0bacb49408f84..c097b7934fd4e 100644
--- a/tools/perf/util/scripting-engines/trace-event-perl.c
+++ b/tools/perf/util/scripting-engines/trace-event-perl.c
@@ -393,8 +393,10 @@ static void perl_process_tracepoint(struct perf_sample *sample,
 			if (field->flags & TEP_FIELD_IS_DYNAMIC) {
 				offset = *(int *)(data + field->offset);
 				offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 				if (field->flags & TEP_FIELD_IS_RELATIVE)
 					offset += field->offset + field->size;
+#endif
 			} else
 				offset = field->offset;
 			XPUSHs(sv_2mortal(newSVpv((char *)data + offset, 0)));
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index fabba21919b8f..e930f5f1f36d2 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -994,8 +994,10 @@ static void python_process_tracepoint(struct perf_sample *sample,
 				offset  = val;
 				len     = offset >> 16;
 				offset &= 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 				if (field->flags & TEP_FIELD_IS_RELATIVE)
 					offset += field->offset + field->size;
+#endif
 			}
 			if (field->flags & TEP_FIELD_IS_STRING &&
 			    is_printable_array(data + offset, len)) {
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index c7a97b33e1347..0ecc2cb137920 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2568,9 +2568,10 @@ static int64_t __sort__hde_cmp(struct perf_hpp_fmt *fmt,
 		tep_read_number_field(field, a->raw_data, &dyn);
 		offset = dyn & 0xffff;
 		size = (dyn >> 16) & 0xffff;
+#ifdef HAVE_LIBTRACEEVENT_TEP_FIELD_IS_RELATIVE
 		if (field->flags & TEP_FIELD_IS_RELATIVE)
 			offset += field->offset + field->size;
-
+#endif
 		/* record max width for output */
 		if (size > hde->dynamic_len)
 			hde->dynamic_len = size;
-- 
GitLab


From 232b82d201cbade4f15e243bc0472ac241239d8c Mon Sep 17 00:00:00 2001
From: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Date: Wed, 7 Dec 2022 22:28:15 +0530
Subject: [PATCH 3871/4122] perf test: Update event group check for support of
 uncore event

The event group test checks group creation for combinations of hw, sw
and uncore PMU events. Some of the uncore pmus may require additional
permission to access the counters.

For example, in case of hv_24x7, partition need to have permissions to
access hv_24x7 pmu counters. If not, event_open will fail. Hence add a
sanity check to see if event_open succeeds before proceeding with the
test.

Fixes: 9d9b22bedad13d96 ("perf test: Add event group test for events in multiple PMUs")
Signed-off-by: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Acked-by: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Disha Goel <disgoel@linux.ibm.com>
Cc: Ian Rogers <irogers@google.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Nageswara R Sastry <rnsastry@linux.ibm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Link: https://lore.kernel.org/r/20221207165815.774-1-atrajeev@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/event_groups.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/perf/tests/event_groups.c b/tools/perf/tests/event_groups.c
index 612c0444aaa83..029442b4e9c65 100644
--- a/tools/perf/tests/event_groups.c
+++ b/tools/perf/tests/event_groups.c
@@ -51,7 +51,7 @@ static int event_open(int type, unsigned long config, int group_fd)
 static int setup_uncore_event(void)
 {
 	struct perf_pmu *pmu;
-	int i;
+	int i, fd;
 
 	if (list_empty(&pmus))
 		perf_pmu__scan(NULL);
@@ -62,6 +62,18 @@ static int setup_uncore_event(void)
 				pr_debug("Using %s for uncore pmu event\n", pmu->name);
 				types[2] = pmu->type;
 				configs[2] = uncore_pmus[i].config;
+				/*
+				 * Check if the chosen uncore pmu event can be
+				 * used in the test. For example, incase of accessing
+				 * hv_24x7 pmu counters, partition should have
+				 * additional permissions. If not, event open will
+				 * fail. So check if the event open succeeds
+				 * before proceeding.
+				 */
+				fd = event_open(types[2], configs[2], -1);
+				if (fd < 0)
+					return -1;
+				close(fd);
 				return 0;
 			}
 		}
-- 
GitLab


From 167b266bf66c5b93171011ef9d1f09b070c2c537 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 8 Dec 2022 10:26:36 -0800
Subject: [PATCH 3872/4122] perf off_cpu: Fix a typo in BTF tracepoint name, it
 should be 'btf_trace_sched_switch'

In BTF, tracepoint definitions have the "btf_trace_" prefix.  The
off-cpu profiler needs to check the signature of the sched_switch event
using that definition.  But there's a typo (s/bpf/btf/) so it failed
always.

Fixes: b36888f71c8542cd ("perf record: Handle argument change in sched_switch")
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: bpf@vger.kernel.org
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20221208182636.524139-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_off_cpu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index c257813e674ef..01f70b8e705a8 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -102,7 +102,7 @@ static void check_sched_switch_args(void)
 	const struct btf_type *t1, *t2, *t3;
 	u32 type_id;
 
-	type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+	type_id = btf__find_by_name_kind(btf, "btf_trace_sched_switch",
 					 BTF_KIND_TYPEDEF);
 	if ((s32)type_id < 0)
 		return;
-- 
GitLab


From 3cad53a6f9cdbafa32ea8413809f23a28734c7c5 Mon Sep 17 00:00:00 2001
From: Khem Raj <raj.khem@gmail.com>
Date: Sun, 4 Dec 2022 18:55:34 -0800
Subject: [PATCH 3873/4122] perf python: Account for multiple words in CC

Sometimes build systems may append options e.g. --sysroot etc. to CC
variable especially in cross-compile environments like yocto project
where CC varable is composed of cross-compiler name and some needed
options for it to work in a relocatable environment.

Therefore separate out the compiler name from rest of the options in CC,
then add the options via second argument to Popen() API

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Khem Raj <raj.khem@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Fangrui Song <maskray@google.com>
Cc: Florian Fainelli <f.fainelli@gmail.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Keeping <john@metanate.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Link: https://lore.kernel.org/r/20221205025534.150006-1-raj.khem@gmail.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py
index e80ffbbfacfb8..4f265d0222c45 100644
--- a/tools/perf/util/setup.py
+++ b/tools/perf/util/setup.py
@@ -7,7 +7,7 @@ cc_is_clang = b"clang version" in Popen([cc.split()[0], "-v"], stderr=PIPE).stde
 src_feature_tests  = getenv('srctree') + '/tools/build/feature'
 
 def clang_has_option(option):
-    cc_output = Popen([cc, option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
+    cc_output = Popen([cc.split()[0], str(cc.split()[1:]) + option, path.join(src_feature_tests, "test-hello.c") ], stderr=PIPE).stderr.readlines()
     return [o for o in cc_output if ((b"unknown argument" in o) or (b"is not supported" in o))] == [ ]
 
 if cc_is_clang:
-- 
GitLab


From fd507d3e359c7e06d74321cd3d8a5ec8769d05a9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 9 Dec 2022 11:07:24 -0800
Subject: [PATCH 3874/4122] perf lock contention: Add lock_data.h for common
 data

Accessing BPF maps should use the same data types.  Add bpf_skel/lock_data.h
to define the common data structures.  No functional changes.

Committer notes:

Fixed contention_key.stack_id missing rename to contention_key.stack_or_task_id.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221209190727.759804-2-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/bpf_lock_contention.c         | 19 ++++--------
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 19 ++----------
 tools/perf/util/bpf_skel/lock_data.h          | 30 +++++++++++++++++++
 3 files changed, 39 insertions(+), 29 deletions(-)
 create mode 100644 tools/perf/util/bpf_skel/lock_data.h

diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index f4ebb9a2e3809..b6a8eb7164b30 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -12,17 +12,10 @@
 #include <bpf/bpf.h>
 
 #include "bpf_skel/lock_contention.skel.h"
+#include "bpf_skel/lock_data.h"
 
 static struct lock_contention_bpf *skel;
 
-struct lock_contention_data {
-	u64 total_time;
-	u64 min_time;
-	u64 max_time;
-	u32 count;
-	u32 flags;
-};
-
 int lock_contention_prepare(struct lock_contention *con)
 {
 	int i, fd;
@@ -110,8 +103,8 @@ int lock_contention_stop(void)
 int lock_contention_read(struct lock_contention *con)
 {
 	int fd, stack, err = 0;
-	s32 prev_key, key;
-	struct lock_contention_data data = {};
+	struct contention_key *prev_key, key;
+	struct contention_data data = {};
 	struct lock_stat *st = NULL;
 	struct machine *machine = con->machine;
 	u64 *stack_trace;
@@ -126,8 +119,8 @@ int lock_contention_read(struct lock_contention *con)
 	if (stack_trace == NULL)
 		return -1;
 
-	prev_key = 0;
-	while (!bpf_map_get_next_key(fd, &prev_key, &key)) {
+	prev_key = NULL;
+	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
 		struct map *kmap;
 		struct symbol *sym;
 		int idx = 0;
@@ -184,7 +177,7 @@ int lock_contention_read(struct lock_contention *con)
 		}
 
 		hlist_add_head(&st->hash_entry, con->result);
-		prev_key = key;
+		prev_key = &key;
 
 		/* we're fine now, reset the values */
 		st = NULL;
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 9681cb59b0df4..64fd1e040ac86 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -5,24 +5,11 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
 
-/* maximum stack trace depth */
-#define MAX_STACKS   8
+#include "lock_data.h"
 
 /* default buffer size */
 #define MAX_ENTRIES  10240
 
-struct contention_key {
-	__s32 stack_id;
-};
-
-struct contention_data {
-	__u64 total_time;
-	__u64 min_time;
-	__u64 max_time;
-	__u32 count;
-	__u32 flags;
-};
-
 struct tstamp_data {
 	__u64 timestamp;
 	__u64 lock;
@@ -34,7 +21,7 @@ struct tstamp_data {
 struct {
 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
 	__uint(key_size, sizeof(__u32));
-	__uint(value_size, MAX_STACKS * sizeof(__u64));
+	__uint(value_size, sizeof(__u64));
 	__uint(max_entries, MAX_ENTRIES);
 } stacks SEC(".maps");
 
@@ -154,7 +141,7 @@ int contention_end(u64 *ctx)
 
 	duration = bpf_ktime_get_ns() - pelem->timestamp;
 
-	key.stack_id = pelem->stack_id;
+	key.stack_or_task_id = pelem->stack_id;
 	data = bpf_map_lookup_elem(&lock_stat, &key);
 	if (!data) {
 		struct contention_data first = {
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
new file mode 100644
index 0000000000000..dbdf4caedc4a3
--- /dev/null
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Data structures shared between BPF and tools. */
+#ifndef UTIL_BPF_SKEL_LOCK_DATA_H
+#define UTIL_BPF_SKEL_LOCK_DATA_H
+
+struct contention_key {
+	s32 stack_or_task_id;
+};
+
+#define TASK_COMM_LEN  16
+
+struct contention_task_data {
+	char comm[TASK_COMM_LEN];
+};
+
+struct contention_data {
+	u64 total_time;
+	u64 min_time;
+	u64 max_time;
+	u32 count;
+	u32 flags;
+};
+
+enum lock_aggr_mode {
+	LOCK_AGGR_ADDR = 0,
+	LOCK_AGGR_TASK,
+	LOCK_AGGR_CALLER,
+};
+
+#endif /* UTIL_BPF_SKEL_LOCK_DATA_H */
-- 
GitLab


From eca949b2b4addd941d369d4c2014b87f3c3e203b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 9 Dec 2022 11:07:25 -0800
Subject: [PATCH 3875/4122] perf lock contention: Implement -t/--threads option
 for BPF

The BPF didn't show the per-thread stat properly.  Use task's thread id (PID)
as a key instead of stack_id and add a task_data map to save task comm names.

  $ sudo ./perf lock con -abt -E 5 sleep 1
   contended   total wait     max wait     avg wait          pid   comm

           1    740.66 ms    740.66 ms    740.66 ms         1950   nv_queue
           3    305.50 ms    298.19 ms    101.83 ms         1884   nvidia-modeset/
           1     25.14 us     25.14 us     25.14 us      2725038   EventManager_De
          12     23.09 us      9.30 us      1.92 us            0   swapper
           1     20.18 us     20.18 us     20.18 us      2725033   EventManager_De

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221209190727.759804-3-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-lock.c                     | 13 ++----
 tools/perf/util/bpf_lock_contention.c         | 40 ++++++++++++++++--
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 41 +++++++++++++++++--
 tools/perf/util/lock-contention.h             |  1 +
 4 files changed, 78 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 15ce6358f1279..6fa3cdfec5cbd 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -12,6 +12,7 @@
 #include "util/target.h"
 #include "util/callchain.h"
 #include "util/lock-contention.h"
+#include "util/bpf_skel/lock_data.h"
 
 #include <subcmd/pager.h>
 #include <subcmd/parse-options.h>
@@ -61,11 +62,7 @@ static int max_stack_depth = CONTENTION_STACK_DEPTH;
 static int stack_skip = CONTENTION_STACK_SKIP;
 static int print_nr_entries = INT_MAX / 2;
 
-static enum {
-	LOCK_AGGR_ADDR,
-	LOCK_AGGR_TASK,
-	LOCK_AGGR_CALLER,
-} aggr_mode = LOCK_AGGR_ADDR;
+static enum lock_aggr_mode aggr_mode = LOCK_AGGR_ADDR;
 
 static struct thread_stat *thread_stat_find(u32 tid)
 {
@@ -1619,6 +1616,7 @@ static int __cmd_contention(int argc, const char **argv)
 		.map_nr_entries = bpf_map_entries,
 		.max_stack = max_stack_depth,
 		.stack_skip = stack_skip,
+		.aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER,
 	};
 
 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
@@ -1691,11 +1689,6 @@ static int __cmd_contention(int argc, const char **argv)
 	if (select_key(true))
 		goto out_delete;
 
-	if (show_thread_stats)
-		aggr_mode = LOCK_AGGR_TASK;
-	else
-		aggr_mode = LOCK_AGGR_CALLER;
-
 	if (use_bpf) {
 		lock_contention_start();
 		if (argc)
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index b6a8eb7164b30..1590a9f051453 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -5,6 +5,7 @@
 #include "util/map.h"
 #include "util/symbol.h"
 #include "util/target.h"
+#include "util/thread.h"
 #include "util/thread_map.h"
 #include "util/lock-contention.h"
 #include <linux/zalloc.h>
@@ -30,10 +31,17 @@ int lock_contention_prepare(struct lock_contention *con)
 	}
 
 	bpf_map__set_value_size(skel->maps.stacks, con->max_stack * sizeof(u64));
-	bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
 	bpf_map__set_max_entries(skel->maps.lock_stat, con->map_nr_entries);
 	bpf_map__set_max_entries(skel->maps.tstamp, con->map_nr_entries);
 
+	if (con->aggr_mode == LOCK_AGGR_TASK) {
+		bpf_map__set_max_entries(skel->maps.task_data, con->map_nr_entries);
+		bpf_map__set_max_entries(skel->maps.stacks, 1);
+	} else {
+		bpf_map__set_max_entries(skel->maps.task_data, 1);
+		bpf_map__set_max_entries(skel->maps.stacks, con->map_nr_entries);
+	}
+
 	if (target__has_cpu(target))
 		ncpus = perf_cpu_map__nr(evlist->core.user_requested_cpus);
 	if (target__has_task(target))
@@ -82,7 +90,9 @@ int lock_contention_prepare(struct lock_contention *con)
 		bpf_map_update_elem(fd, &pid, &val, BPF_ANY);
 	}
 
+	/* these don't work well if in the rodata section */
 	skel->bss->stack_skip = con->stack_skip;
+	skel->bss->aggr_mode = con->aggr_mode;
 
 	lock_contention_bpf__attach(skel);
 	return 0;
@@ -102,7 +112,7 @@ int lock_contention_stop(void)
 
 int lock_contention_read(struct lock_contention *con)
 {
-	int fd, stack, err = 0;
+	int fd, stack, task_fd, err = 0;
 	struct contention_key *prev_key, key;
 	struct contention_data data = {};
 	struct lock_stat *st = NULL;
@@ -112,6 +122,7 @@ int lock_contention_read(struct lock_contention *con)
 
 	fd = bpf_map__fd(skel->maps.lock_stat);
 	stack = bpf_map__fd(skel->maps.stacks);
+	task_fd = bpf_map__fd(skel->maps.task_data);
 
 	con->lost = skel->bss->lost;
 
@@ -119,6 +130,13 @@ int lock_contention_read(struct lock_contention *con)
 	if (stack_trace == NULL)
 		return -1;
 
+	if (con->aggr_mode == LOCK_AGGR_TASK) {
+		struct thread *idle = __machine__findnew_thread(machine,
+								/*pid=*/0,
+								/*tid=*/0);
+		thread__set_comm(idle, "swapper", /*timestamp=*/0);
+	}
+
 	prev_key = NULL;
 	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
 		struct map *kmap;
@@ -143,6 +161,22 @@ int lock_contention_read(struct lock_contention *con)
 
 		st->flags = data.flags;
 
+		if (con->aggr_mode == LOCK_AGGR_TASK) {
+			struct contention_task_data task;
+			struct thread *t;
+
+			st->addr = key.stack_or_task_id;
+
+			/* do not update idle comm which contains CPU number */
+			if (st->addr) {
+				bpf_map_lookup_elem(task_fd, &key, &task);
+				t = __machine__findnew_thread(machine, /*pid=*/-1,
+							      key.stack_or_task_id);
+				thread__set_comm(t, task.comm, /*timestamp=*/0);
+			}
+			goto next;
+		}
+
 		bpf_map_lookup_elem(stack, &key, stack_trace);
 
 		/* skip lock internal functions */
@@ -175,7 +209,7 @@ int lock_contention_read(struct lock_contention *con)
 			if (st->callstack == NULL)
 				break;
 		}
-
+next:
 		hlist_add_head(&st->hash_entry, con->result);
 		prev_key = &key;
 
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index 64fd1e040ac86..cd405adcd252b 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -41,6 +41,13 @@ struct {
 	__uint(max_entries, MAX_ENTRIES);
 } lock_stat SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct contention_task_data));
+	__uint(max_entries, MAX_ENTRIES);
+} task_data SEC(".maps");
+
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(key_size, sizeof(__u32));
@@ -61,6 +68,9 @@ int has_cpu;
 int has_task;
 int stack_skip;
 
+/* determine the key of lock stat */
+int aggr_mode;
+
 /* error stat */
 int lost;
 
@@ -87,6 +97,19 @@ static inline int can_record(void)
 	return 1;
 }
 
+static inline void update_task_data(__u32 pid)
+{
+	struct contention_task_data *p;
+
+	p = bpf_map_lookup_elem(&task_data, &pid);
+	if (p == NULL) {
+		struct contention_task_data data;
+
+		bpf_get_current_comm(data.comm, sizeof(data.comm));
+		bpf_map_update_elem(&task_data, &pid, &data, BPF_NOEXIST);
+	}
+}
+
 SEC("tp_btf/contention_begin")
 int contention_begin(u64 *ctx)
 {
@@ -115,10 +138,14 @@ int contention_begin(u64 *ctx)
 	pelem->timestamp = bpf_ktime_get_ns();
 	pelem->lock = (__u64)ctx[0];
 	pelem->flags = (__u32)ctx[1];
-	pelem->stack_id = bpf_get_stackid(ctx, &stacks, BPF_F_FAST_STACK_CMP | stack_skip);
 
-	if (pelem->stack_id < 0)
-		lost++;
+	if (aggr_mode == LOCK_AGGR_CALLER) {
+		pelem->stack_id = bpf_get_stackid(ctx, &stacks,
+						  BPF_F_FAST_STACK_CMP | stack_skip);
+		if (pelem->stack_id < 0)
+			lost++;
+	}
+
 	return 0;
 }
 
@@ -141,7 +168,13 @@ int contention_end(u64 *ctx)
 
 	duration = bpf_ktime_get_ns() - pelem->timestamp;
 
-	key.stack_or_task_id = pelem->stack_id;
+	if (aggr_mode == LOCK_AGGR_CALLER) {
+		key.stack_or_task_id = pelem->stack_id;
+	} else {
+		key.stack_or_task_id = pid;
+		update_task_data(pid);
+	}
+
 	data = bpf_map_lookup_elem(&lock_stat, &key);
 	if (!data) {
 		struct contention_data first = {
diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h
index a2346875098dc..47fd47fb56c1d 100644
--- a/tools/perf/util/lock-contention.h
+++ b/tools/perf/util/lock-contention.h
@@ -117,6 +117,7 @@ struct lock_contention {
 	int lost;
 	int max_stack;
 	int stack_skip;
+	int aggr_mode;
 };
 
 #ifdef HAVE_BPF_SKEL
-- 
GitLab


From 688d2e8de231c54e289b640547af246343732348 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 9 Dec 2022 11:07:26 -0800
Subject: [PATCH 3876/4122] perf lock contention: Add -l/--lock-addr option

The -l/--lock-addr option is to implement per-lock-instance contention
stat using LOCK_AGGR_ADDR.  It displays lock address and optionally
symbol name if exists.

  $ sudo ./perf lock con -abl sleep 1
   contended   total wait     max wait     avg wait            address   symbol

           1     36.28 us     36.28 us     36.28 us   ffff92615d6448b8
           9     10.91 us      1.84 us      1.21 us   ffffffffbaed50c0   rcu_state
           1     10.49 us     10.49 us     10.49 us   ffff9262ac4f0c80
           8      4.68 us      1.67 us       585 ns   ffffffffbae07a40   jiffies_lock
           3      3.03 us      1.45 us      1.01 us   ffff9262277861e0
           1       924 ns       924 ns       924 ns   ffff926095ba9d20
           1       436 ns       436 ns       436 ns   ffff9260bfda4f60

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221209190727.759804-4-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-lock.txt        |  4 +
 tools/perf/builtin-lock.c                     | 84 +++++++++++++++----
 tools/perf/util/bpf_lock_contention.c         | 23 +++--
 .../perf/util/bpf_skel/lock_contention.bpf.c  | 17 +++-
 tools/perf/util/bpf_skel/lock_data.h          |  2 +-
 5 files changed, 102 insertions(+), 28 deletions(-)

diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index 4958a1ffa1cca..38e79d45e4265 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -168,6 +168,10 @@ CONTENTION OPTIONS
 --entries=<value>::
 	Display this many entries.
 
+-l::
+--lock-addr::
+	Show lock contention stat by address
+
 
 SEE ALSO
 --------
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6fa3cdfec5cbd..25c0a5e5051f2 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -56,6 +56,7 @@ static struct rb_root		thread_stats;
 
 static bool combine_locks;
 static bool show_thread_stats;
+static bool show_lock_addrs;
 static bool use_bpf;
 static unsigned long bpf_map_entries = 10240;
 static int max_stack_depth = CONTENTION_STACK_DEPTH;
@@ -999,13 +1000,32 @@ static int report_lock_contention_begin_event(struct evsel *evsel,
 	ls = lock_stat_find(key);
 	if (!ls) {
 		char buf[128];
-		const char *caller = buf;
+		const char *name = "";
 		unsigned int flags = evsel__intval(evsel, sample, "flags");
+		struct machine *machine = &session->machines.host;
+		struct map *kmap;
+		struct symbol *sym;
+
+		switch (aggr_mode) {
+		case LOCK_AGGR_ADDR:
+			/* make sure it loads the kernel map to find lock symbols */
+			map__load(machine__kernel_map(machine));
+
+			sym = machine__find_kernel_symbol(machine, key, &kmap);
+			if (sym)
+				name = sym->name;
+			break;
+		case LOCK_AGGR_CALLER:
+			name = buf;
+			if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
+				name = "Unknown";
+			break;
+		case LOCK_AGGR_TASK:
+		default:
+			break;
+		}
 
-		if (lock_contention_caller(evsel, sample, buf, sizeof(buf)) < 0)
-			caller = "Unknown";
-
-		ls = lock_stat_findnew(key, caller, flags);
+		ls = lock_stat_findnew(key, name, flags);
 		if (!ls)
 			return -ENOMEM;
 
@@ -1460,10 +1480,19 @@ static void print_contention_result(struct lock_contention *con)
 		list_for_each_entry(key, &lock_keys, list)
 			pr_info("%*s ", key->len, key->header);
 
-		if (show_thread_stats)
+		switch (aggr_mode) {
+		case LOCK_AGGR_TASK:
 			pr_info("  %10s   %s\n\n", "pid", "comm");
-		else
+			break;
+		case LOCK_AGGR_CALLER:
 			pr_info("  %10s   %s\n\n", "type", "caller");
+			break;
+		case LOCK_AGGR_ADDR:
+			pr_info("  %16s   %s\n\n", "address", "symbol");
+			break;
+		default:
+			break;
+		}
 	}
 
 	bad = total = printed = 0;
@@ -1471,6 +1500,9 @@ static void print_contention_result(struct lock_contention *con)
 		bad = bad_hist[BROKEN_CONTENDED];
 
 	while ((st = pop_from_result())) {
+		struct thread *t;
+		int pid;
+
 		total += use_bpf ? st->nr_contended : 1;
 		if (st->broken)
 			bad++;
@@ -1480,18 +1512,24 @@ static void print_contention_result(struct lock_contention *con)
 			pr_info(" ");
 		}
 
-		if (show_thread_stats) {
-			struct thread *t;
-			int pid = st->addr;
-
-			/* st->addr contains tid of thread */
+		switch (aggr_mode) {
+		case LOCK_AGGR_CALLER:
+			pr_info("  %10s   %s\n", get_type_str(st), st->name);
+			break;
+		case LOCK_AGGR_TASK:
+			pid = st->addr;
 			t = perf_session__findnew(session, pid);
 			pr_info("  %10d   %s\n", pid, thread__comm_str(t));
-			goto next;
+			break;
+		case LOCK_AGGR_ADDR:
+			pr_info("  %016llx   %s\n", (unsigned long long)st->addr,
+				st->name ? : "");
+			break;
+		default:
+			break;
 		}
 
-		pr_info("  %10s   %s\n", get_type_str(st), st->name);
-		if (verbose) {
+		if (aggr_mode == LOCK_AGGR_CALLER && verbose) {
 			struct map *kmap;
 			struct symbol *sym;
 			char buf[128];
@@ -1508,7 +1546,6 @@ static void print_contention_result(struct lock_contention *con)
 			}
 		}
 
-next:
 		if (++printed >= print_nr_entries)
 			break;
 	}
@@ -1616,7 +1653,6 @@ static int __cmd_contention(int argc, const char **argv)
 		.map_nr_entries = bpf_map_entries,
 		.max_stack = max_stack_depth,
 		.stack_skip = stack_skip,
-		.aggr_mode = show_thread_stats ? LOCK_AGGR_TASK : LOCK_AGGR_CALLER,
 	};
 
 	session = perf_session__new(use_bpf ? NULL : &data, &eops);
@@ -1627,6 +1663,9 @@ static int __cmd_contention(int argc, const char **argv)
 
 	con.machine = &session->machines.host;
 
+	con.aggr_mode = aggr_mode = show_thread_stats ? LOCK_AGGR_TASK :
+		show_lock_addrs ? LOCK_AGGR_ADDR : LOCK_AGGR_CALLER;
+
 	/* for lock function check */
 	symbol_conf.sort_by_name = true;
 	symbol__init(&session->header.env);
@@ -1907,6 +1946,7 @@ int cmd_lock(int argc, const char **argv)
 		    "Set the number of stack depth to skip when finding a lock caller, "
 		    "Default: " __stringify(CONTENTION_STACK_SKIP)),
 	OPT_INTEGER('E', "entries", &print_nr_entries, "display this many functions"),
+	OPT_BOOLEAN('l', "lock-addr", &show_lock_addrs, "show lock stats by address"),
 	OPT_PARENT(lock_options)
 	};
 
@@ -1976,6 +2016,16 @@ int cmd_lock(int argc, const char **argv)
 			argc = parse_options(argc, argv, contention_options,
 					     contention_usage, 0);
 		}
+
+		if (show_thread_stats && show_lock_addrs) {
+			pr_err("Cannot use thread and addr mode together\n");
+			parse_options_usage(contention_usage, contention_options,
+					    "threads", 0);
+			parse_options_usage(NULL, contention_options,
+					    "lock-addr", 0);
+			return -1;
+		}
+
 		rc = __cmd_contention(argc, argv);
 	} else {
 		usage_with_options(lock_usage, lock_options);
diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c
index 1590a9f051453..8e1b791dc58f1 100644
--- a/tools/perf/util/bpf_lock_contention.c
+++ b/tools/perf/util/bpf_lock_contention.c
@@ -137,11 +137,15 @@ int lock_contention_read(struct lock_contention *con)
 		thread__set_comm(idle, "swapper", /*timestamp=*/0);
 	}
 
+	/* make sure it loads the kernel map */
+	map__load(maps__first(machine->kmaps));
+
 	prev_key = NULL;
 	while (!bpf_map_get_next_key(fd, prev_key, &key)) {
 		struct map *kmap;
 		struct symbol *sym;
 		int idx = 0;
+		s32 stack_id;
 
 		/* to handle errors in the loop body */
 		err = -1;
@@ -160,24 +164,31 @@ int lock_contention_read(struct lock_contention *con)
 			st->avg_wait_time = data.total_time / data.count;
 
 		st->flags = data.flags;
+		st->addr = key.aggr_key;
 
 		if (con->aggr_mode == LOCK_AGGR_TASK) {
 			struct contention_task_data task;
 			struct thread *t;
-
-			st->addr = key.stack_or_task_id;
+			int pid = key.aggr_key;
 
 			/* do not update idle comm which contains CPU number */
 			if (st->addr) {
-				bpf_map_lookup_elem(task_fd, &key, &task);
-				t = __machine__findnew_thread(machine, /*pid=*/-1,
-							      key.stack_or_task_id);
+				bpf_map_lookup_elem(task_fd, &pid, &task);
+				t = __machine__findnew_thread(machine, /*pid=*/-1, pid);
 				thread__set_comm(t, task.comm, /*timestamp=*/0);
 			}
 			goto next;
 		}
 
-		bpf_map_lookup_elem(stack, &key, stack_trace);
+		if (con->aggr_mode == LOCK_AGGR_ADDR) {
+			sym = machine__find_kernel_symbol(machine, st->addr, &kmap);
+			if (sym)
+				st->name = strdup(sym->name);
+			goto next;
+		}
+
+		stack_id = key.aggr_key;
+		bpf_map_lookup_elem(stack, &stack_id, stack_trace);
 
 		/* skip lock internal functions */
 		while (machine__is_lock_function(machine, stack_trace[idx]) &&
diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
index cd405adcd252b..11b0fc7ee53b0 100644
--- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
+++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
@@ -168,11 +168,20 @@ int contention_end(u64 *ctx)
 
 	duration = bpf_ktime_get_ns() - pelem->timestamp;
 
-	if (aggr_mode == LOCK_AGGR_CALLER) {
-		key.stack_or_task_id = pelem->stack_id;
-	} else {
-		key.stack_or_task_id = pid;
+	switch (aggr_mode) {
+	case LOCK_AGGR_CALLER:
+		key.aggr_key = pelem->stack_id;
+		break;
+	case LOCK_AGGR_TASK:
+		key.aggr_key = pid;
 		update_task_data(pid);
+		break;
+	case LOCK_AGGR_ADDR:
+		key.aggr_key = pelem->lock;
+		break;
+	default:
+		/* should not happen */
+		return 0;
 	}
 
 	data = bpf_map_lookup_elem(&lock_stat, &key);
diff --git a/tools/perf/util/bpf_skel/lock_data.h b/tools/perf/util/bpf_skel/lock_data.h
index dbdf4caedc4a3..ce71cf1a7e1e4 100644
--- a/tools/perf/util/bpf_skel/lock_data.h
+++ b/tools/perf/util/bpf_skel/lock_data.h
@@ -4,7 +4,7 @@
 #define UTIL_BPF_SKEL_LOCK_DATA_H
 
 struct contention_key {
-	s32 stack_or_task_id;
+	u64 aggr_key;  /* can be stack_id, pid or lock addr */
 };
 
 #define TASK_COMM_LEN  16
-- 
GitLab


From 22ddcb6b4a62eb97f7d988ac59032b74c64afb20 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 9 Dec 2022 11:07:27 -0800
Subject: [PATCH 3877/4122] perf test: Update perf lock contention test

Add test cases for the task and addr aggregation modes.

  $ sudo ./perf test -v contention
   86: kernel lock contention analysis test                            :
  --- start ---
  test child forked, pid 680006
  Testing perf lock record and perf lock contention
  Testing perf lock contention --use-bpf
  Testing perf lock record and perf lock contention at the same time
  Testing perf lock contention --threads
  Testing perf lock contention --lock-addr
  test child finished with 0
  ---- end ----
  kernel lock contention analysis test: Ok

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <song@kernel.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221209190727.759804-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/lock_contention.sh | 48 +++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh
index f7bd0d8eb5c38..cc9ceb9e19caf 100755
--- a/tools/perf/tests/shell/lock_contention.sh
+++ b/tools/perf/tests/shell/lock_contention.sh
@@ -77,10 +77,58 @@ test_record_concurrent()
 	fi
 }
 
+test_aggr_task()
+{
+	echo "Testing perf lock contention --threads"
+	perf lock contention -i ${perfdata} -t -E 1 -q 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+
+	if ! perf lock con -b true > /dev/null 2>&1 ; then
+		return
+	fi
+
+	# the perf lock contention output goes to the stderr
+	perf lock con -a -b -t -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+}
+
+test_aggr_addr()
+{
+	echo "Testing perf lock contention --lock-addr"
+	perf lock contention -i ${perfdata} -l -E 1 -q 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+
+	if ! perf lock con -b true > /dev/null 2>&1 ; then
+		return
+	fi
+
+	# the perf lock contention output goes to the stderr
+	perf lock con -a -b -t -E 1 -q -- perf bench sched messaging > /dev/null 2> ${result}
+	if [ $(cat "${result}" | wc -l) != "1" ]; then
+		echo "[Fail] BPF result count is not 1:" $(cat "${result}" | wc -l)
+		err=1
+		exit
+	fi
+}
+
 check
 
 test_record
 test_bpf
 test_record_concurrent
+test_aggr_task
+test_aggr_addr
 
 exit ${err}
-- 
GitLab


From ab6bd55e9968ff063c0ee28f8941903811dee09a Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 12 Dec 2022 15:55:09 +0000
Subject: [PATCH 3878/4122] perf cs-etm: Print unknown header version as an
 error

This is an error rather than just for the raw trace dump so always print
it as an error. Also remove the duplicate header version check.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221212155513.2259623-2-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 16db965ac995e..aeb1e30888dbf 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2623,14 +2623,7 @@ static void cs_etm__print_auxtrace_info(__u64 *val, int num)
 {
 	int i, cpu = 0, version, err;
 
-	/* bail out early on bad header version */
 	version = val[0];
-	if (version > CS_HEADER_CURRENT_VERSION) {
-		/* failure.. return */
-		fprintf(stdout, "	Unknown Header Version = %x, ", version);
-		fprintf(stdout, "Version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
-		return;
-	}
 
 	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
 		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
@@ -2916,9 +2909,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	/* Look for version of the header */
 	hdr_version = ptr[0];
 	if (hdr_version > CS_HEADER_CURRENT_VERSION) {
-		/* print routine will print an error on bad version */
-		if (dump_trace)
-			cs_etm__print_auxtrace_info(auxtrace_info->priv, 0);
+		pr_err("\nCS ETM Trace: Unknown Header Version = %#" PRIx64, hdr_version);
+		pr_err(", version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
 		return -EINVAL;
 	}
 
-- 
GitLab


From fe55ba183264d40d44bfbe71deb148ff64f2ea2d Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 12 Dec 2022 15:55:10 +0000
Subject: [PATCH 3879/4122] perf cs-etm: Remove unused stub methods

These aren't used outside of cs-etm so don't need stubs. Leave
cs_etm__process_auxtrace_info() which is used externally, and add an
error message so that it's obvious to users why it causes errors.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221212155513.2259623-3-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.h | 34 ++--------------------------------
 1 file changed, 2 insertions(+), 32 deletions(-)

diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 90c83f932d9a4..401f74be98d60 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -7,6 +7,7 @@
 #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__
 #define INCLUDE__UTIL_PERF_CS_ETM_H__
 
+#include "debug.h"
 #include "util/event.h"
 #include <linux/bits.h>
 
@@ -218,40 +219,9 @@ static inline int
 cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
 			      struct perf_session *session __maybe_unused)
 {
+	pr_err("\nCS ETM Trace: OpenCSD is not linked in, please recompile with CORESIGHT=1\n");
 	return -1;
 }
-
-static inline int cs_etm__get_cpu(u8 trace_chan_id __maybe_unused,
-				  int *cpu __maybe_unused)
-{
-	return -1;
-}
-
-static inline int cs_etm__etmq_set_tid(
-				struct cs_etm_queue *etmq __maybe_unused,
-				pid_t tid __maybe_unused,
-				u8 trace_chan_id __maybe_unused)
-{
-	return -1;
-}
-
-static inline bool cs_etm__etmq_is_timeless(
-				struct cs_etm_queue *etmq __maybe_unused)
-{
-	/* What else to return? */
-	return true;
-}
-
-static inline void cs_etm__etmq_set_traceid_queue_timestamp(
-				struct cs_etm_queue *etmq __maybe_unused,
-				u8 trace_chan_id __maybe_unused) {}
-
-static inline struct cs_etm_packet_queue *cs_etm__etmq_get_packet_queue(
-				struct cs_etm_queue *etmq __maybe_unused,
-				u8 trace_chan_id __maybe_unused)
-{
-	return NULL;
-}
 #endif
 
 #endif
-- 
GitLab


From b00204f5c21ca1bd38b8cb45d12a605b4e60a884 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 12 Dec 2022 15:55:11 +0000
Subject: [PATCH 3880/4122] perf cs-etm: Tidy up auxtrace info header printing

cs_etm__print_auxtrace_info() is called twice in case there is an error
somewhere in cs_etm__process_auxtrace_info(), but all the info is
already available at the beginning so just print it there instead.

Also use u64 and the already cast ptr variable to make it more
consistent with the rest of the etm code.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221212155513.2259623-4-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index aeb1e30888dbf..bf8fbcec2d69a 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2541,14 +2541,14 @@ static const char * const cs_etmv4_priv_fmts[] = {
 };
 
 static const char * const param_unk_fmt =
-	"	Unknown parameter [%d]	       %llx\n";
+	"	Unknown parameter [%d]	       %"PRIx64"\n";
 static const char * const magic_unk_fmt =
-	"	Magic number Unknown	       %llx\n";
+	"	Magic number Unknown	       %"PRIx64"\n";
 
-static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset)
+static int cs_etm__print_cpu_metadata_v0(u64 *val, int *offset)
 {
 	int i = *offset, j, nr_params = 0, fmt_offset;
-	__u64 magic;
+	u64 magic;
 
 	/* check magic value */
 	magic = val[i + CS_ETM_MAGIC];
@@ -2580,10 +2580,10 @@ static int cs_etm__print_cpu_metadata_v0(__u64 *val, int *offset)
 	return 0;
 }
 
-static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
+static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
 {
 	int i = *offset, j, total_params = 0;
-	__u64 magic;
+	u64 magic;
 
 	magic = val[i + CS_ETM_MAGIC];
 	/* total params to print is NR_PARAMS + common block size for v1 */
@@ -2619,7 +2619,7 @@ static int cs_etm__print_cpu_metadata_v1(__u64 *val, int *offset)
 	return 0;
 }
 
-static void cs_etm__print_auxtrace_info(__u64 *val, int num)
+static void cs_etm__print_auxtrace_info(u64 *val, int num)
 {
 	int i, cpu = 0, version, err;
 
@@ -2925,6 +2925,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
 				    0xffffffff);
 
+	if (dump_trace)
+		cs_etm__print_auxtrace_info(ptr, num_cpu);
+
 	/*
 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
 	 * has to be made for each packet that gets decoded, optimizing access
@@ -3074,10 +3077,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 		goto err_delete_thread;
 	}
 
-	if (dump_trace) {
-		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
-	}
-
 	err = cs_etm__synth_events(etm, session);
 	if (err)
 		goto err_delete_thread;
@@ -3113,12 +3112,5 @@ err_free_traceid_list:
 	intlist__delete(traceid_list);
 err_free_hdr:
 	zfree(&hdr);
-	/*
-	 * At this point, as a minimum we have valid header. Dump the rest of
-	 * the info section - the print routines will error out on structural
-	 * issues.
-	 */
-	if (dump_trace)
-		cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu);
 	return err;
 }
-- 
GitLab


From fd63091f2a0317f8943c816acf3ac5f4002adf5c Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 12 Dec 2022 15:55:12 +0000
Subject: [PATCH 3881/4122] perf cs-etm: Cleanup
 cs_etm__process_auxtrace_info()

hdr is a copy of 3 values of ptr and doesn't need to be long lived. So
just use ptr instead which means the malloc and the extra error path can
be removed to simplify things.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221212155513.2259623-5-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cs-etm.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index bf8fbcec2d69a..ab30591a6c6a8 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2888,7 +2888,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	int num_cpu, trcidr_idx;
 	int err = 0;
 	int i, j;
-	u64 *ptr, *hdr = NULL;
+	u64 *ptr = NULL;
 	u64 **metadata = NULL;
 	u64 hdr_version;
 
@@ -2914,15 +2914,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 		return -EINVAL;
 	}
 
-	hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_MAX);
-	if (!hdr)
-		return -ENOMEM;
-
-	/* Extract header information - see cs-etm.h for format */
-	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
-		hdr[i] = ptr[i];
-	num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff;
-	pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) &
+	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+	pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) &
 				    0xffffffff);
 
 	if (dump_trace)
@@ -2934,10 +2927,8 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	 * in anything other than a sequential array is worth doing.
 	 */
 	traceid_list = intlist__new(NULL);
-	if (!traceid_list) {
-		err = -ENOMEM;
-		goto err_free_hdr;
-	}
+	if (!traceid_list)
+		return -ENOMEM;
 
 	metadata = zalloc(sizeof(*metadata) * num_cpu);
 	if (!metadata) {
@@ -2945,6 +2936,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 		goto err_free_traceid_list;
 	}
 
+	/* Start parsing after the common part of the header */
+	i = CS_HEADER_VERSION_MAX;
+
 	/*
 	 * The metadata is stored in the auxtrace_info section and encodes
 	 * the configuration of the ARM embedded trace macrocell which is
@@ -3043,7 +3037,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 
 	etm->num_cpu = num_cpu;
 	etm->pmu_type = pmu_type;
-	etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0);
+	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
 	etm->metadata = metadata;
 	etm->auxtrace_type = auxtrace_info->type;
 	etm->timeless_decoding = cs_etm__is_timeless_decoding(etm);
@@ -3110,7 +3104,5 @@ err_free_metadata:
 	zfree(&metadata);
 err_free_traceid_list:
 	intlist__delete(traceid_list);
-err_free_hdr:
-	zfree(&hdr);
 	return err;
 }
-- 
GitLab


From 55c1de9973d665161c1c0c146ff15c6884060e19 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Mon, 12 Dec 2022 15:55:13 +0000
Subject: [PATCH 3882/4122] perf cs-etm: Print auxtrace info even if OpenCSD
 isn't linked

Printing the info doesn't have any dependency on OpenCSD, and neither
does recording Coresight data. Because it's sometimes useful to look at
the info for debugging, it makes sense to be able to see it on the same
platform that the recording was made on.

So pull the auxtrace info printing parts into a new file that is always
compiled into Perf.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Al Grant <Al.Grant@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20221212155513.2259623-6-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build         |   1 +
 tools/perf/util/cs-etm-base.c | 174 ++++++++++++++++++++++++++++++++++
 tools/perf/util/cs-etm.c      | 170 ++-------------------------------
 tools/perf/util/cs-etm.h      |  12 ++-
 4 files changed, 191 insertions(+), 166 deletions(-)
 create mode 100644 tools/perf/util/cs-etm-base.c

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 8345464d01303..79b9498886a20 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -126,6 +126,7 @@ ifdef CONFIG_LIBOPENCSD
 perf-$(CONFIG_AUXTRACE) += cs-etm.o
 perf-$(CONFIG_AUXTRACE) += cs-etm-decoder/
 endif
+perf-$(CONFIG_AUXTRACE) += cs-etm-base.o
 
 perf-y += parse-branch-options.o
 perf-y += dump-insn.o
diff --git a/tools/perf/util/cs-etm-base.c b/tools/perf/util/cs-etm-base.c
new file mode 100644
index 0000000000000..597542410854d
--- /dev/null
+++ b/tools/perf/util/cs-etm-base.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * File for any parts of the Coresight decoding that don't require
+ * OpenCSD.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+
+#include "cs-etm.h"
+
+static const char * const cs_etm_global_header_fmts[] = {
+	[CS_HEADER_VERSION]	= "	Header version		       %llx\n",
+	[CS_PMU_TYPE_CPUS]	= "	PMU type/num cpus	       %llx\n",
+	[CS_ETM_SNAPSHOT]	= "	Snapshot		       %llx\n",
+};
+
+static const char * const cs_etm_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
+	[CS_ETM_ETMCR]		= "	ETMCR			       %llx\n",
+	[CS_ETM_ETMTRACEIDR]	= "	ETMTRACEIDR		       %llx\n",
+	[CS_ETM_ETMCCER]	= "	ETMCCER			       %llx\n",
+	[CS_ETM_ETMIDR]		= "	ETMIDR			       %llx\n",
+};
+
+static const char * const cs_etmv4_priv_fmts[] = {
+	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
+	[CS_ETM_CPU]		= "	CPU			       %lld\n",
+	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
+	[CS_ETMV4_TRCCONFIGR]	= "	TRCCONFIGR		       %llx\n",
+	[CS_ETMV4_TRCTRACEIDR]	= "	TRCTRACEIDR		       %llx\n",
+	[CS_ETMV4_TRCIDR0]	= "	TRCIDR0			       %llx\n",
+	[CS_ETMV4_TRCIDR1]	= "	TRCIDR1			       %llx\n",
+	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n",
+	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n",
+	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n",
+	[CS_ETE_TRCDEVARCH]	= "	TRCDEVARCH                     %llx\n"
+};
+
+static const char * const param_unk_fmt =
+	"	Unknown parameter [%d]	       %"PRIx64"\n";
+static const char * const magic_unk_fmt =
+	"	Magic number Unknown	       %"PRIx64"\n";
+
+static int cs_etm__print_cpu_metadata_v0(u64 *val, int *offset)
+{
+	int i = *offset, j, nr_params = 0, fmt_offset;
+	u64 magic;
+
+	/* check magic value */
+	magic = val[i + CS_ETM_MAGIC];
+	if ((magic != __perf_cs_etmv3_magic) &&
+	    (magic != __perf_cs_etmv4_magic)) {
+		/* failure - note bad magic value */
+		fprintf(stdout, magic_unk_fmt, magic);
+		return -EINVAL;
+	}
+
+	/* print common header block */
+	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
+	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
+
+	if (magic == __perf_cs_etmv3_magic) {
+		nr_params = CS_ETM_NR_TRC_PARAMS_V0;
+		fmt_offset = CS_ETM_ETMCR;
+		/* after common block, offset format index past NR_PARAMS */
+		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+			fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+	} else if (magic == __perf_cs_etmv4_magic) {
+		nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
+		fmt_offset = CS_ETMV4_TRCCONFIGR;
+		/* after common block, offset format index past NR_PARAMS */
+		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
+			fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+	}
+	*offset = i;
+	return 0;
+}
+
+static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
+{
+	int i = *offset, j, total_params = 0;
+	u64 magic;
+
+	magic = val[i + CS_ETM_MAGIC];
+	/* total params to print is NR_PARAMS + common block size for v1 */
+	total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
+
+	if (magic == __perf_cs_etmv3_magic) {
+		for (j = 0; j < total_params; j++, i++) {
+			/* if newer record - could be excess params */
+			if (j >= CS_ETM_PRIV_MAX)
+				fprintf(stdout, param_unk_fmt, j, val[i]);
+			else
+				fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
+		}
+	} else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
+		/*
+		 * ETE and ETMv4 can be printed in the same block because the number of parameters
+		 * is saved and they share the list of parameter names. ETE is also only supported
+		 * in V1 files.
+		 */
+		for (j = 0; j < total_params; j++, i++) {
+			/* if newer record - could be excess params */
+			if (j >= CS_ETE_PRIV_MAX)
+				fprintf(stdout, param_unk_fmt, j, val[i]);
+			else
+				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
+		}
+	} else {
+		/* failure - note bad magic value and error out */
+		fprintf(stdout, magic_unk_fmt, magic);
+		return -EINVAL;
+	}
+	*offset = i;
+	return 0;
+}
+
+static void cs_etm__print_auxtrace_info(u64 *val, int num)
+{
+	int i, cpu = 0, version, err;
+
+	version = val[0];
+
+	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
+		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
+
+	for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
+		if (version == 0)
+			err = cs_etm__print_cpu_metadata_v0(val, &i);
+		else if (version == 1)
+			err = cs_etm__print_cpu_metadata_v1(val, &i);
+		if (err)
+			return;
+	}
+}
+
+/*
+ * Do some basic checks and print the auxtrace info header before calling
+ * into cs_etm__process_auxtrace_info_full() which requires OpenCSD to be
+ * linked in. This allows some basic debugging if OpenCSD is missing.
+ */
+int cs_etm__process_auxtrace_info(union perf_event *event,
+				  struct perf_session *session)
+{
+	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
+	int event_header_size = sizeof(struct perf_event_header);
+	int num_cpu;
+	u64 *ptr = NULL;
+	u64 hdr_version;
+
+	if (auxtrace_info->header.size < (event_header_size + INFO_HEADER_SIZE))
+		return -EINVAL;
+
+	/* First the global part */
+	ptr = (u64 *) auxtrace_info->priv;
+
+	/* Look for version of the header */
+	hdr_version = ptr[0];
+	if (hdr_version > CS_HEADER_CURRENT_VERSION) {
+		pr_err("\nCS ETM Trace: Unknown Header Version = %#" PRIx64, hdr_version);
+		pr_err(", version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
+		return -EINVAL;
+	}
+
+	if (dump_trace) {
+		num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
+		cs_etm__print_auxtrace_info(ptr, num_cpu);
+	}
+
+	return cs_etm__process_auxtrace_info_full(event, session);
+}
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index ab30591a6c6a8..33303d03c2fa4 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -2510,134 +2510,6 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm)
 	return timeless_decoding;
 }
 
-static const char * const cs_etm_global_header_fmts[] = {
-	[CS_HEADER_VERSION]	= "	Header version		       %llx\n",
-	[CS_PMU_TYPE_CPUS]	= "	PMU type/num cpus	       %llx\n",
-	[CS_ETM_SNAPSHOT]	= "	Snapshot		       %llx\n",
-};
-
-static const char * const cs_etm_priv_fmts[] = {
-	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
-	[CS_ETM_CPU]		= "	CPU			       %lld\n",
-	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
-	[CS_ETM_ETMCR]		= "	ETMCR			       %llx\n",
-	[CS_ETM_ETMTRACEIDR]	= "	ETMTRACEIDR		       %llx\n",
-	[CS_ETM_ETMCCER]	= "	ETMCCER			       %llx\n",
-	[CS_ETM_ETMIDR]		= "	ETMIDR			       %llx\n",
-};
-
-static const char * const cs_etmv4_priv_fmts[] = {
-	[CS_ETM_MAGIC]		= "	Magic number		       %llx\n",
-	[CS_ETM_CPU]		= "	CPU			       %lld\n",
-	[CS_ETM_NR_TRC_PARAMS]	= "	NR_TRC_PARAMS		       %llx\n",
-	[CS_ETMV4_TRCCONFIGR]	= "	TRCCONFIGR		       %llx\n",
-	[CS_ETMV4_TRCTRACEIDR]	= "	TRCTRACEIDR		       %llx\n",
-	[CS_ETMV4_TRCIDR0]	= "	TRCIDR0			       %llx\n",
-	[CS_ETMV4_TRCIDR1]	= "	TRCIDR1			       %llx\n",
-	[CS_ETMV4_TRCIDR2]	= "	TRCIDR2			       %llx\n",
-	[CS_ETMV4_TRCIDR8]	= "	TRCIDR8			       %llx\n",
-	[CS_ETMV4_TRCAUTHSTATUS] = "	TRCAUTHSTATUS		       %llx\n",
-	[CS_ETE_TRCDEVARCH]	= "	TRCDEVARCH                     %llx\n"
-};
-
-static const char * const param_unk_fmt =
-	"	Unknown parameter [%d]	       %"PRIx64"\n";
-static const char * const magic_unk_fmt =
-	"	Magic number Unknown	       %"PRIx64"\n";
-
-static int cs_etm__print_cpu_metadata_v0(u64 *val, int *offset)
-{
-	int i = *offset, j, nr_params = 0, fmt_offset;
-	u64 magic;
-
-	/* check magic value */
-	magic = val[i + CS_ETM_MAGIC];
-	if ((magic != __perf_cs_etmv3_magic) &&
-	    (magic != __perf_cs_etmv4_magic)) {
-		/* failure - note bad magic value */
-		fprintf(stdout, magic_unk_fmt, magic);
-		return -EINVAL;
-	}
-
-	/* print common header block */
-	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_MAGIC], val[i++]);
-	fprintf(stdout, cs_etm_priv_fmts[CS_ETM_CPU], val[i++]);
-
-	if (magic == __perf_cs_etmv3_magic) {
-		nr_params = CS_ETM_NR_TRC_PARAMS_V0;
-		fmt_offset = CS_ETM_ETMCR;
-		/* after common block, offset format index past NR_PARAMS */
-		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
-			fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
-	} else if (magic == __perf_cs_etmv4_magic) {
-		nr_params = CS_ETMV4_NR_TRC_PARAMS_V0;
-		fmt_offset = CS_ETMV4_TRCCONFIGR;
-		/* after common block, offset format index past NR_PARAMS */
-		for (j = fmt_offset; j < nr_params + fmt_offset; j++, i++)
-			fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
-	}
-	*offset = i;
-	return 0;
-}
-
-static int cs_etm__print_cpu_metadata_v1(u64 *val, int *offset)
-{
-	int i = *offset, j, total_params = 0;
-	u64 magic;
-
-	magic = val[i + CS_ETM_MAGIC];
-	/* total params to print is NR_PARAMS + common block size for v1 */
-	total_params = val[i + CS_ETM_NR_TRC_PARAMS] + CS_ETM_COMMON_BLK_MAX_V1;
-
-	if (magic == __perf_cs_etmv3_magic) {
-		for (j = 0; j < total_params; j++, i++) {
-			/* if newer record - could be excess params */
-			if (j >= CS_ETM_PRIV_MAX)
-				fprintf(stdout, param_unk_fmt, j, val[i]);
-			else
-				fprintf(stdout, cs_etm_priv_fmts[j], val[i]);
-		}
-	} else if (magic == __perf_cs_etmv4_magic || magic == __perf_cs_ete_magic) {
-		/*
-		 * ETE and ETMv4 can be printed in the same block because the number of parameters
-		 * is saved and they share the list of parameter names. ETE is also only supported
-		 * in V1 files.
-		 */
-		for (j = 0; j < total_params; j++, i++) {
-			/* if newer record - could be excess params */
-			if (j >= CS_ETE_PRIV_MAX)
-				fprintf(stdout, param_unk_fmt, j, val[i]);
-			else
-				fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]);
-		}
-	} else {
-		/* failure - note bad magic value and error out */
-		fprintf(stdout, magic_unk_fmt, magic);
-		return -EINVAL;
-	}
-	*offset = i;
-	return 0;
-}
-
-static void cs_etm__print_auxtrace_info(u64 *val, int num)
-{
-	int i, cpu = 0, version, err;
-
-	version = val[0];
-
-	for (i = 0; i < CS_HEADER_VERSION_MAX; i++)
-		fprintf(stdout, cs_etm_global_header_fmts[i], val[i]);
-
-	for (i = CS_HEADER_VERSION_MAX; cpu < num; cpu++) {
-		if (version == 0)
-			err = cs_etm__print_cpu_metadata_v0(val, &i);
-		else if (version == 1)
-			err = cs_etm__print_cpu_metadata_v1(val, &i);
-		if (err)
-			return;
-	}
-}
-
 /*
  * Read a single cpu parameter block from the auxtrace_info priv block.
  *
@@ -2874,15 +2746,13 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
 	return 0;
 }
 
-int cs_etm__process_auxtrace_info(union perf_event *event,
-				  struct perf_session *session)
+int cs_etm__process_auxtrace_info_full(union perf_event *event,
+				       struct perf_session *session)
 {
 	struct perf_record_auxtrace_info *auxtrace_info = &event->auxtrace_info;
 	struct cs_etm_auxtrace *etm = NULL;
 	struct int_node *inode;
-	unsigned int pmu_type;
 	int event_header_size = sizeof(struct perf_event_header);
-	int info_header_size;
 	int total_size = auxtrace_info->header.size;
 	int priv_size = 0;
 	int num_cpu, trcidr_idx;
@@ -2890,36 +2760,6 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	int i, j;
 	u64 *ptr = NULL;
 	u64 **metadata = NULL;
-	u64 hdr_version;
-
-	/*
-	 * sizeof(auxtrace_info_event::type) +
-	 * sizeof(auxtrace_info_event::reserved) == 8
-	 */
-	info_header_size = 8;
-
-	if (total_size < (event_header_size + info_header_size))
-		return -EINVAL;
-
-	priv_size = total_size - event_header_size - info_header_size;
-
-	/* First the global part */
-	ptr = (u64 *) auxtrace_info->priv;
-
-	/* Look for version of the header */
-	hdr_version = ptr[0];
-	if (hdr_version > CS_HEADER_CURRENT_VERSION) {
-		pr_err("\nCS ETM Trace: Unknown Header Version = %#" PRIx64, hdr_version);
-		pr_err(", version supported <= %x\n", CS_HEADER_CURRENT_VERSION);
-		return -EINVAL;
-	}
-
-	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
-	pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) &
-				    0xffffffff);
-
-	if (dump_trace)
-		cs_etm__print_auxtrace_info(ptr, num_cpu);
 
 	/*
 	 * Create an RB tree for traceID-metadata tuple.  Since the conversion
@@ -2930,6 +2770,9 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	if (!traceid_list)
 		return -ENOMEM;
 
+	/* First the global part */
+	ptr = (u64 *) auxtrace_info->priv;
+	num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
 	metadata = zalloc(sizeof(*metadata) * num_cpu);
 	if (!metadata) {
 		err = -ENOMEM;
@@ -3008,6 +2851,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	 * The following tests if the correct number of double words was
 	 * present in the auxtrace info section.
 	 */
+	priv_size = total_size - event_header_size - INFO_HEADER_SIZE;
 	if (i * 8 != priv_size) {
 		err = -EINVAL;
 		goto err_free_metadata;
@@ -3036,7 +2880,7 @@ int cs_etm__process_auxtrace_info(union perf_event *event,
 	etm->machine = &session->machines.host;
 
 	etm->num_cpu = num_cpu;
-	etm->pmu_type = pmu_type;
+	etm->pmu_type = (unsigned int) ((ptr[CS_PMU_TYPE_CPUS] >> 32) & 0xffffffff);
 	etm->snapshot_mode = (ptr[CS_ETM_SNAPSHOT] != 0);
 	etm->metadata = metadata;
 	etm->auxtrace_type = auxtrace_info->type;
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 401f74be98d60..5da50d5dae6bd 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -202,9 +202,13 @@ struct cs_etm_packet_queue {
 #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64))
 #define CS_ETE_PRIV_SIZE (CS_ETE_PRIV_MAX * sizeof(u64))
 
-#ifdef HAVE_CSTRACE_SUPPORT
+#define INFO_HEADER_SIZE (sizeof(((struct perf_record_auxtrace_info *)0)->type) + \
+			  sizeof(((struct perf_record_auxtrace_info *)0)->reserved__))
+
 int cs_etm__process_auxtrace_info(union perf_event *event,
 				  struct perf_session *session);
+
+#ifdef HAVE_CSTRACE_SUPPORT
 int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
 int cs_etm__get_pid_fmt(u8 trace_chan_id, u64 *pid_fmt);
 int cs_etm__etmq_set_tid(struct cs_etm_queue *etmq,
@@ -214,10 +218,12 @@ void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
 					      u8 trace_chan_id);
 struct cs_etm_packet_queue
 *cs_etm__etmq_get_packet_queue(struct cs_etm_queue *etmq, u8 trace_chan_id);
+int cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
+				       struct perf_session *session __maybe_unused);
 #else
 static inline int
-cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused,
-			      struct perf_session *session __maybe_unused)
+cs_etm__process_auxtrace_info_full(union perf_event *event __maybe_unused,
+				   struct perf_session *session __maybe_unused)
 {
 	pr_err("\nCS ETM Trace: OpenCSD is not linked in, please recompile with CORESIGHT=1\n");
 	return -1;
-- 
GitLab


From e76aff0523f7d3393f967bc13b10bb04b759abfa Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Tue, 6 Dec 2022 10:44:04 -0500
Subject: [PATCH 3883/4122] perf script: Introduce task analyzer python script

Introduce a new 'perf script' to analyze task scheduling behavior.

During the task analysis, some data is always needed - which goes beyond
the simple time of switching on and off a task (process/thread). This
concerns for example the runtime of a process or the frequency with
which the process was called. This script serves to simplify this
recurring analyze process. It immediately provides the user with helpful
task characteristic information about the tasks runtimes.

Usage:

Recorded can be in two ways:

  $ perf script record tasks-analyzer -- sleep 10
  $ perf record -e sched:sched_switch -a -- sleep 10

The script can parse all perf.data files, most important: sched:sched_switch
events are mandatory, other events will be ignored.

Most simple report use case is to just call the script without arguments:

  $ perf script report tasks-analyzer
      Switched-In      Switched-Out CPU      PID      TID             Comm    Runtime     Time Out-In
  15576.658891407   15576.659156086   4     2412     2428            gdbus        265            1949
  15576.659111320   15576.659455410   0     2412     2412      gnome-shell        344            2267
  15576.659491326   15576.659506173   2       74       74      kworker/2:1         15           13145
  15576.659506173   15576.659825748   2     2858     2858  gnome-terminal-        320           63263
  15576.659871270   15576.659902872   6    20932    20932    kworker/u16:0         32         2314582
  15576.659909951   15576.659945501   3    27264    27264               sh         36              -1
  15576.659853285   15576.659971052   7    27265    27265             perf        118         5050741
  [...]

What is not shown here are the ASCII color sequences. For example, if
the task consists of only one thread, the TID is grayed out.

Runtime is the time the task was running on the CPU, Time Out-In is the
time between the process being scheduled *out* and scheduled back *in*.
So the last time span between two executions. If -1 is printed, then the
task simply ran the first time in the measurements - a Out-In delta
could not be calculated.

In addition to the chronological representation, there is a summary on
task level. This output can be additionally switched on via the
--summary option and provides information such as max, min & average
runtime per process. The maximum runtime is often important for
debugging. The call looks like this:

  $ perf script report tasks-analyzer --summary
  Summary
       Task Information                       Runtime Information
    PID   TID            Comm Runs Accumulated    Mean  Median  Min   Max          Max At
     14    14     ksoftirqd/0   13         334      26      15    9   127 15571.621211956
     15    15     rcu_preempt  133        1778      13      13    2    33 15572.581176024
     16    16     migration/0    3          49      16      13   12    24 15571.608915425
     20    20     migration/1    3          34      11      13    8    13 15571.639101555
     25    25     migration/2    3          32      11      12    9    12 15575.639239896
  [...]

Besides these two options, there are a number of other options that change the
output and behavior. This can be queried via --help. Options worth mentioning include:

- filter-tasks         - filter out unneeded tasks, --filter-task 1337,/sbin/init
- highlight-tasks      - more pleasant focusing, --highlight-tasks 1:red,mutt:yellow
- extended-times       - show combinations of elapsed times between schedule in/schedule out
- summary-extended     - summary with additional information, like maximum delta time statistics
- rename-comms-by-tids - handy for inexpressive processnames like python, --rename 1337:my-python-app
- ms                   - show timestamps in milliseconds, nanoseconds is also possible (--ns)
- time-limit           - limit the analyzer to a time range, --time-limit 15576.0:15576.1

Script is tested and prime time ready for python2 & python3:

- make PYTHON=python3 prefix=/usr/local install
- make PYTHON=python2 prefix=/usr/local install

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20221206154406.41941-2-petar.gligor@gmail.com
Signed-off-by: Petar Gligoric <petar.gligoric@rohde-schwarz.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../scripts/python/bin/task-analyzer-record   |   2 +
 .../scripts/python/bin/task-analyzer-report   |   3 +
 tools/perf/scripts/python/task-analyzer.py    | 838 ++++++++++++++++++
 3 files changed, 843 insertions(+)
 create mode 100755 tools/perf/scripts/python/bin/task-analyzer-record
 create mode 100755 tools/perf/scripts/python/bin/task-analyzer-report
 create mode 100755 tools/perf/scripts/python/task-analyzer.py

diff --git a/tools/perf/scripts/python/bin/task-analyzer-record b/tools/perf/scripts/python/bin/task-analyzer-record
new file mode 100755
index 0000000000000..0f6b51bb2767f
--- /dev/null
+++ b/tools/perf/scripts/python/bin/task-analyzer-record
@@ -0,0 +1,2 @@
+#!/bin/bash
+perf record -e sched:sched_switch -e sched:sched_migrate_task "$@"
diff --git a/tools/perf/scripts/python/bin/task-analyzer-report b/tools/perf/scripts/python/bin/task-analyzer-report
new file mode 100755
index 0000000000000..4b16a8cc40a0e
--- /dev/null
+++ b/tools/perf/scripts/python/bin/task-analyzer-report
@@ -0,0 +1,3 @@
+#!/bin/bash
+# description: analyze timings of tasks
+perf script -s "$PERF_EXEC_PATH"/scripts/python/task-analyzer.py -- "$@"
diff --git a/tools/perf/scripts/python/task-analyzer.py b/tools/perf/scripts/python/task-analyzer.py
new file mode 100755
index 0000000000000..f74abe50f3b27
--- /dev/null
+++ b/tools/perf/scripts/python/task-analyzer.py
@@ -0,0 +1,838 @@
+# task-analyzer.py - comprehensive perf tasks analysis
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022, Hagen Paul Pfeifer <hagen@jauu.net>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# Usage:
+#
+#     perf record -e sched:sched_switch -a -- sleep 10
+#     perf script report task-analyzer
+#
+
+from __future__ import print_function
+import sys
+import os
+import string
+import argparse
+import decimal
+
+
+sys.path.append(
+    os.environ["PERF_EXEC_PATH"] + "/scripts/python/Perf-Trace-Util/lib/Perf/Trace"
+)
+from perf_trace_context import *
+from Core import *
+
+# Definition of possible ASCII color codes
+_COLORS = {
+    "grey": "\033[90m",
+    "red": "\033[91m",
+    "green": "\033[92m",
+    "yellow": "\033[93m",
+    "blue": "\033[94m",
+    "violet": "\033[95m",
+    "reset": "\033[0m",
+}
+
+# Columns will have a static size to align everything properly
+# Support of 116 days of active update with nano precision
+LEN_SWITCHED_IN = len("9999999.999999999")  # 17
+LEN_SWITCHED_OUT = len("9999999.999999999")  # 17
+LEN_CPU = len("000")
+LEN_PID = len("maxvalue")  # 8
+LEN_TID = len("maxvalue")  # 8
+LEN_COMM = len("max-comms-length")  # 16
+LEN_RUNTIME = len("999999.999")  # 10
+# Support of 3.45 hours of timespans
+LEN_OUT_IN = len("99999999999.999")  # 15
+LEN_OUT_OUT = len("99999999999.999")  # 15
+LEN_IN_IN = len("99999999999.999")  # 15
+LEN_IN_OUT = len("99999999999.999")  # 15
+
+
+# py2/py3 compatibility layer, see PEP469
+try:
+    dict.iteritems
+except AttributeError:
+    # py3
+    def itervalues(d):
+        return iter(d.values())
+
+    def iteritems(d):
+        return iter(d.items())
+
+else:
+    # py2
+    def itervalues(d):
+        return d.itervalues()
+
+    def iteritems(d):
+        return d.iteritems()
+
+
+def _check_color():
+    global _COLORS
+    """user enforced no-color or if stdout is no tty we disable colors"""
+    if sys.stdout.isatty() and args.stdio_color != "never":
+        return
+    _COLORS = {
+        "grey": "",
+        "red": "",
+        "green": "",
+        "yellow": "",
+        "blue": "",
+        "violet": "",
+        "reset": "",
+    }
+
+
+def _parse_args():
+    global args
+    parser = argparse.ArgumentParser(description="Analyze tasks behavior")
+    parser.add_argument(
+        "--time-limit",
+        default=[],
+        help=
+            "print tasks only in time[s] window e.g"
+        " --time-limit 123.111:789.222(print all between 123.111 and 789.222)"
+        " --time-limit 123: (print all from 123)"
+        " --time-limit :456 (print all until incl. 456)",
+    )
+    parser.add_argument(
+        "--summary", action="store_true", help="print addtional runtime information"
+    )
+    parser.add_argument(
+        "--summary-only", action="store_true", help="print only summary without traces"
+    )
+    parser.add_argument(
+        "--summary-extended",
+        action="store_true",
+        help="print the summary with additional information of max inter task times"
+            " relative to the prev task",
+    )
+    parser.add_argument(
+        "--ns", action="store_true", help="show timestamps in nanoseconds"
+    )
+    parser.add_argument(
+        "--ms", action="store_true", help="show timestamps in miliseconds"
+    )
+    parser.add_argument(
+        "--extended-times",
+        action="store_true",
+        help="Show the elapsed times between schedule in/schedule out"
+            " of this task and the schedule in/schedule out of previous occurrence"
+            " of the same task",
+    )
+    parser.add_argument(
+        "--filter-tasks",
+        default=[],
+        help="filter out unneeded tasks by tid, pid or processname."
+        " E.g --filter-task 1337,/sbin/init ",
+    )
+    parser.add_argument(
+        "--limit-to-tasks",
+        default=[],
+        help="limit output to selected task by tid, pid, processname."
+        " E.g --limit-to-tasks 1337,/sbin/init",
+    )
+    parser.add_argument(
+        "--highlight-tasks",
+        default="",
+        help="colorize special tasks by their pid/tid/comm."
+        " E.g. --highlight-tasks 1:red,mutt:yellow"
+        " Colors available: red,grey,yellow,blue,violet,green",
+    )
+    parser.add_argument(
+        "--rename-comms-by-tids",
+        default="",
+        help="rename task names by using tid (<tid>:<newname>,<tid>:<newname>)"
+            " This option is handy for inexpressive processnames like python interpreted"
+            " process. E.g --rename 1337:my-python-app",
+    )
+    parser.add_argument(
+        "--stdio-color",
+        default="auto",
+        choices=["always", "never", "auto"],
+        help="always, never or auto, allowing configuring color output"
+            " via the command line",
+    )
+    args = parser.parse_args()
+    args.tid_renames = dict()
+
+    _argument_filter_sanity_check()
+    _argument_prepare_check()
+
+
+def time_uniter(unit):
+    picker = {
+        "s": 1,
+        "ms": 1e3,
+        "us": 1e6,
+        "ns": 1e9,
+    }
+    return picker[unit]
+
+
+def _init_db():
+    global db
+    db = dict()
+    db["running"] = dict()
+    db["cpu"] = dict()
+    db["tid"] = dict()
+    db["global"] = []
+    if args.summary or args.summary_extended or args.summary_only:
+        db["task_info"] = dict()
+        db["runtime_info"] = dict()
+        # min values for summary depending on the header
+        db["task_info"]["pid"] = len("PID")
+        db["task_info"]["tid"] = len("TID")
+        db["task_info"]["comm"] = len("Comm")
+        db["runtime_info"]["runs"] = len("Runs")
+        db["runtime_info"]["acc"] = len("Accumulated")
+        db["runtime_info"]["max"] = len("Max")
+        db["runtime_info"]["max_at"] = len("Max At")
+        db["runtime_info"]["min"] = len("Min")
+        db["runtime_info"]["mean"] = len("Mean")
+        db["runtime_info"]["median"] = len("Median")
+        if args.summary_extended:
+            db["inter_times"] = dict()
+            db["inter_times"]["out_in"] = len("Out-In")
+            db["inter_times"]["inter_at"] = len("At")
+            db["inter_times"]["out_out"] = len("Out-Out")
+            db["inter_times"]["in_in"] = len("In-In")
+            db["inter_times"]["in_out"] = len("In-Out")
+
+
+def _median(numbers):
+    """phython3 hat statistics module - we have nothing"""
+    n = len(numbers)
+    index = n // 2
+    if n % 2:
+        return sorted(numbers)[index]
+    return sum(sorted(numbers)[index - 1 : index + 1]) / 2
+
+
+def _mean(numbers):
+    return sum(numbers) / len(numbers)
+
+
+class Timespans(object):
+    """
+    The elapsed time between two occurrences of the same task is being tracked with the
+    help of this class. There are 4 of those Timespans Out-Out, In-Out, Out-In and
+    In-In.
+    The first half of the name signals the first time point of the
+    first task. The second half of the name represents the second
+    timepoint of the second task.
+    """
+
+    def __init__(self):
+        self._last_start = None
+        self._last_finish = None
+        self.out_out = -1
+        self.in_out = -1
+        self.out_in = -1
+        self.in_in = -1
+        if args.summary_extended:
+            self._time_in = -1
+            self.max_out_in = -1
+            self.max_at = -1
+            self.max_in_out = -1
+            self.max_in_in = -1
+            self.max_out_out = -1
+
+    def feed(self, task):
+        """
+        Called for every recorded trace event to find process pair and calculate the
+        task timespans. Chronological ordering, feed does not do reordering
+        """
+        if not self._last_finish:
+            self._last_start = task.time_in(time_unit)
+            self._last_finish = task.time_out(time_unit)
+            return
+        self._time_in = task.time_in()
+        time_in = task.time_in(time_unit)
+        time_out = task.time_out(time_unit)
+        self.in_in = time_in - self._last_start
+        self.out_in = time_in - self._last_finish
+        self.in_out = time_out - self._last_start
+        self.out_out = time_out - self._last_finish
+        if args.summary_extended:
+            self._update_max_entries()
+        self._last_finish = task.time_out(time_unit)
+        self._last_start = task.time_in(time_unit)
+
+    def _update_max_entries(self):
+        if self.in_in > self.max_in_in:
+            self.max_in_in = self.in_in
+        if self.out_out > self.max_out_out:
+            self.max_out_out = self.out_out
+        if self.in_out > self.max_in_out:
+            self.max_in_out = self.in_out
+        if self.out_in > self.max_out_in:
+            self.max_out_in = self.out_in
+            self.max_at = self._time_in
+
+
+
+
+class Summary(object):
+    """
+    Primary instance for calculating the summary output. Processes the whole trace to
+    find and memorize relevant data such as mean, max et cetera. This instance handles
+    dynamic alignment aspects for summary output.
+    """
+
+    def __init__(self):
+        self._body = []
+
+    class AlignmentHelper:
+        """
+        Used to calculated the alignment for the output of the summary.
+        """
+        def __init__(self, pid, tid, comm, runs, acc, mean,
+                    median, min, max, max_at):
+            self.pid = pid
+            self.tid = tid
+            self.comm = comm
+            self.runs = runs
+            self.acc = acc
+            self.mean = mean
+            self.median = median
+            self.min = min
+            self.max = max
+            self.max_at = max_at
+            if args.summary_extended:
+                self.out_in = None
+                self.inter_at = None
+                self.out_out = None
+                self.in_in = None
+                self.in_out = None
+
+    def _print_header(self):
+        '''
+        Output is trimmed in _format_stats thus additional adjustment in the header
+        is needed, depending on the choice of timeunit. The adjustment corresponds
+        to the amount of column titles being adjusted in _column_titles.
+        '''
+        decimal_precision = 6 if not args.ns else 9
+        fmt = " {{:^{}}}".format(sum(db["task_info"].values()))
+        fmt += " {{:^{}}}".format(
+            sum(db["runtime_info"].values()) - 2 * decimal_precision
+            )
+        _header = ("Task Information", "Runtime Information")
+
+        if args.summary_extended:
+            fmt += " {{:^{}}}".format(
+                sum(db["inter_times"].values()) - 4 * decimal_precision
+                )
+            _header += ("Max Inter Task Times",)
+        print(fmt.format(*_header))
+
+    def _column_titles(self):
+        """
+        Cells are being processed and displayed in different way so an alignment adjust
+        is implemented depeding on the choice of the timeunit. The positions of the max
+        values are being displayed in grey. Thus in their format two additional {},
+        are placed for color set and reset.
+        """
+        decimal_precision, time_precision = _prepare_fmt_precision()
+        fmt = " {{:>{}}}".format(db["task_info"]["pid"])
+        fmt += " {{:>{}}}".format(db["task_info"]["tid"])
+        fmt += " {{:>{}}}".format(db["task_info"]["comm"])
+        fmt += " {{:>{}}}".format(db["runtime_info"]["runs"])
+        fmt += " {{:>{}}}".format(db["runtime_info"]["acc"])
+        fmt += " {{:>{}}}".format(db["runtime_info"]["mean"])
+        fmt += " {{:>{}}}".format(db["runtime_info"]["median"])
+        fmt += " {{:>{}}}".format(db["runtime_info"]["min"] - decimal_precision)
+        fmt += " {{:>{}}}".format(db["runtime_info"]["max"] - decimal_precision)
+        fmt += " {{}}{{:>{}}}{{}}".format(db["runtime_info"]["max_at"] - time_precision)
+
+        column_titles = ("PID", "TID", "Comm")
+        column_titles += ("Runs", "Accumulated", "Mean", "Median", "Min", "Max")
+        column_titles += (_COLORS["grey"], "At", _COLORS["reset"])
+
+        if args.summary_extended:
+            fmt += " {{:>{}}}".format(db["inter_times"]["out_in"] - decimal_precision)
+            fmt += " {{}}{{:>{}}}{{}}".format(
+                db["inter_times"]["inter_at"] - time_precision
+            )
+            fmt += " {{:>{}}}".format(db["inter_times"]["out_out"] - decimal_precision)
+            fmt += " {{:>{}}}".format(db["inter_times"]["in_in"] - decimal_precision)
+            fmt += " {{:>{}}}".format(db["inter_times"]["in_out"] - decimal_precision)
+
+            column_titles += ("Out-In", _COLORS["grey"], "Max At", _COLORS["reset"],
+                        "Out-Out", "In-In", "In-Out")
+        print(fmt.format(*column_titles))
+
+    def _task_stats(self):
+        """calculates the stats of every task and constructs the printable summary"""
+        for tid in sorted(db["tid"]):
+            color_one_sample = _COLORS["grey"]
+            color_reset = _COLORS["reset"]
+            no_executed = 0
+            runtimes = []
+            time_in = []
+            timespans = Timespans()
+            for task in db["tid"][tid]:
+                pid = task.pid
+                comm = task.comm
+                no_executed += 1
+                runtimes.append(task.runtime(time_unit))
+                time_in.append(task.time_in())
+                timespans.feed(task)
+            if len(runtimes) > 1:
+                color_one_sample = ""
+                color_reset = ""
+            time_max = max(runtimes)
+            time_min = min(runtimes)
+            max_at = time_in[runtimes.index(max(runtimes))]
+
+            # The size of the decimal after sum,mean and median varies, thus we cut
+            # the decimal number, by rounding it. It has no impact on the output,
+            # because we have a precision of the decimal points at the output.
+            time_sum = round(sum(runtimes), 3)
+            time_mean = round(_mean(runtimes), 3)
+            time_median = round(_median(runtimes), 3)
+
+            align_helper = self.AlignmentHelper(pid, tid, comm, no_executed, time_sum,
+                                    time_mean, time_median, time_min, time_max, max_at)
+            self._body.append([pid, tid, comm, no_executed, time_sum, color_one_sample,
+                                time_mean, time_median, time_min, time_max,
+                                _COLORS["grey"], max_at, _COLORS["reset"], color_reset])
+            if args.summary_extended:
+                self._body[-1].extend([timespans.max_out_in,
+                                _COLORS["grey"], timespans.max_at,
+                                _COLORS["reset"], timespans.max_out_out,
+                                timespans.max_in_in,
+                                timespans.max_in_out])
+                align_helper.out_in = timespans.max_out_in
+                align_helper.inter_at = timespans.max_at
+                align_helper.out_out = timespans.max_out_out
+                align_helper.in_in = timespans.max_in_in
+                align_helper.in_out = timespans.max_in_out
+            self._calc_alignments_summary(align_helper)
+
+    def _format_stats(self):
+        decimal_precision, time_precision = _prepare_fmt_precision()
+        fmt = " {{:>{}d}}".format(db["task_info"]["pid"])
+        fmt += " {{:>{}d}}".format(db["task_info"]["tid"])
+        fmt += " {{:>{}}}".format(db["task_info"]["comm"])
+        fmt += " {{:>{}d}}".format(db["runtime_info"]["runs"])
+        fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["acc"], time_precision)
+        fmt += " {{}}{{:>{}.{}f}}".format(db["runtime_info"]["mean"], time_precision)
+        fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["median"], time_precision)
+        fmt += " {{:>{}.{}f}}".format(
+            db["runtime_info"]["min"] - decimal_precision, time_precision
+            )
+        fmt += " {{:>{}.{}f}}".format(
+            db["runtime_info"]["max"] - decimal_precision, time_precision
+            )
+        fmt += " {{}}{{:>{}.{}f}}{{}}{{}}".format(
+            db["runtime_info"]["max_at"] - time_precision, decimal_precision
+        )
+        if args.summary_extended:
+            fmt += " {{:>{}.{}f}}".format(
+                db["inter_times"]["out_in"] - decimal_precision, time_precision
+            )
+            fmt += " {{}}{{:>{}.{}f}}{{}}".format(
+                db["inter_times"]["inter_at"] - time_precision, decimal_precision
+            )
+            fmt += " {{:>{}.{}f}}".format(
+                db["inter_times"]["out_out"] - decimal_precision, time_precision
+            )
+            fmt += " {{:>{}.{}f}}".format(
+                db["inter_times"]["in_in"] - decimal_precision, time_precision
+            )
+            fmt += " {{:>{}.{}f}}".format(
+                db["inter_times"]["in_out"] - decimal_precision, time_precision
+            )
+        return fmt
+
+
+    def _calc_alignments_summary(self, align_helper):
+        # Length is being cut in 3 groups so that further addition is easier to handle.
+        # The length of every argument from the alignment helper is being checked if it
+        # is longer than the longest until now. In that case the length is being saved.
+        for key in db["task_info"]:
+            if len(str(getattr(align_helper, key))) > db["task_info"][key]:
+                db["task_info"][key] = len(str(getattr(align_helper, key)))
+        for key in db["runtime_info"]:
+            if len(str(getattr(align_helper, key))) > db["runtime_info"][key]:
+                db["runtime_info"][key] = len(str(getattr(align_helper, key)))
+        if args.summary_extended:
+            for key in db["inter_times"]:
+                if len(str(getattr(align_helper, key))) > db["inter_times"][key]:
+                    db["inter_times"][key] = len(str(getattr(align_helper, key)))
+
+
+    def print(self):
+        print("\nSummary")
+        self._task_stats()
+        self._print_header()
+        self._column_titles()
+        fmt = self._format_stats()
+        for i in range(len(self._body)):
+            print(fmt.format(*tuple(self._body[i])))
+
+
+
+class Task(object):
+    """ The class is used to handle the information of a given task."""
+
+    def __init__(self, id, tid, cpu, comm):
+        self.id = id
+        self.tid = tid
+        self.cpu = cpu
+        self.comm = comm
+        self.pid = None
+        self._time_in = None
+        self._time_out = None
+
+    def schedule_in_at(self, time):
+        """set the time where the task was scheduled in"""
+        self._time_in = time
+
+    def schedule_out_at(self, time):
+        """set the time where the task was scheduled out"""
+        self._time_out = time
+
+    def time_out(self, unit="s"):
+        """return time where a given task was scheduled out"""
+        factor = time_uniter(unit)
+        return self._time_out * decimal.Decimal(factor)
+
+    def time_in(self, unit="s"):
+        """return time where a given task was scheduled in"""
+        factor = time_uniter(unit)
+        return self._time_in * decimal.Decimal(factor)
+
+    def runtime(self, unit="us"):
+        factor = time_uniter(unit)
+        return (self._time_out - self._time_in) * decimal.Decimal(factor)
+
+    def update_pid(self, pid):
+        self.pid = pid
+
+
+def _task_id(pid, cpu):
+    """returns a "unique-enough" identifier, please do not change"""
+    return "{}-{}".format(pid, cpu)
+
+
+def _filter_non_printable(unfiltered):
+    """comm names may contain loony chars like '\x00000'"""
+    filtered = ""
+    for char in unfiltered:
+        if char not in string.printable:
+            continue
+        filtered += char
+    return filtered
+
+
+def _fmt_header():
+    fmt = "{{:>{}}}".format(LEN_SWITCHED_IN)
+    fmt += " {{:>{}}}".format(LEN_SWITCHED_OUT)
+    fmt += " {{:>{}}}".format(LEN_CPU)
+    fmt += " {{:>{}}}".format(LEN_PID)
+    fmt += "  {{:>{}}}".format(LEN_TID)
+    fmt += "  {{:>{}}}".format(LEN_COMM)
+    fmt += " {{:>{}}}".format(LEN_RUNTIME)
+    fmt += " {{:>{}}}".format(LEN_OUT_IN)
+    if args.extended_times:
+        fmt += " {{:>{}}}".format(LEN_OUT_OUT)
+        fmt += " {{:>{}}}".format(LEN_IN_IN)
+        fmt += " {{:>{}}}".format(LEN_IN_OUT)
+    return fmt
+
+
+def _fmt_body():
+    decimal_precision, time_precision = _prepare_fmt_precision()
+    fmt = "{{}}{{:{}.{}f}}".format(LEN_SWITCHED_IN, decimal_precision)
+    fmt += " {{:{}.{}f}}".format(LEN_SWITCHED_OUT, decimal_precision)
+    fmt += " {{:{}d}}".format(LEN_CPU)
+    fmt += " {{:{}d}}".format(LEN_PID)
+    fmt += "  {{}}{{:{}d}}{{}}".format(LEN_TID)
+    fmt += "  {{}}{{:>{}}}".format(LEN_COMM)
+    fmt += " {{:{}.{}f}}".format(LEN_RUNTIME, time_precision)
+    if args.extended_times:
+        fmt += " {{:{}.{}f}}".format(LEN_OUT_IN, time_precision)
+        fmt += " {{:{}.{}f}}".format(LEN_OUT_OUT, time_precision)
+        fmt += " {{:{}.{}f}}".format(LEN_IN_IN, time_precision)
+        fmt += " {{:{}.{}f}}{{}}".format(LEN_IN_OUT, time_precision)
+    else:
+        fmt += " {{:{}.{}f}}{{}}".format(LEN_OUT_IN, time_precision)
+    return fmt
+
+
+def _print_header():
+    fmt = _fmt_header()
+    header = ("Switched-In", "Switched-Out", "CPU", "PID", "TID", "Comm", "Runtime",
+            "Time Out-In")
+    if args.extended_times:
+        header += ("Time Out-Out", "Time In-In", "Time In-Out")
+    print(fmt.format(*header))
+
+
+def _print_task_finish(task):
+    """calculating every entry of a row and printing it immediately"""
+    c_row_set = ""
+    c_row_reset = ""
+    out_in = -1
+    out_out = -1
+    in_in = -1
+    in_out = -1
+    fmt = _fmt_body()
+
+    # depending on user provided highlight option we change the color
+    # for particular tasks
+    if str(task.tid) in args.highlight_tasks_map:
+        c_row_set = _COLORS[args.highlight_tasks_map[str(task.tid)]]
+        c_row_reset = _COLORS["reset"]
+    if task.comm in args.highlight_tasks_map:
+        c_row_set = _COLORS[args.highlight_tasks_map[task.comm]]
+        c_row_reset = _COLORS["reset"]
+    # grey-out entries if PID == TID, they
+    # are identical, no threaded model so the
+    # thread id (tid) do not matter
+    c_tid_set = ""
+    c_tid_reset = ""
+    if task.pid == task.tid:
+        c_tid_set = _COLORS["grey"]
+        c_tid_reset = _COLORS["reset"]
+    if task.tid in db["tid"]:
+        # get last task of tid
+        last_tid_task = db["tid"][task.tid][-1]
+        # feed the timespan calculate, last in tid db
+        # and second the current one
+        timespan_gap_tid = Timespans()
+        timespan_gap_tid.feed(last_tid_task)
+        timespan_gap_tid.feed(task)
+        out_in = timespan_gap_tid.out_in
+        out_out = timespan_gap_tid.out_out
+        in_in = timespan_gap_tid.in_in
+        in_out = timespan_gap_tid.in_out
+    if args.extended_times:
+        print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
+                        c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
+                        task.runtime(time_unit), out_in, out_out, in_in, in_out,
+                        c_row_reset))
+    else:
+        print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
+                        c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
+                        task.runtime(time_unit), out_in, c_row_reset))
+
+
+def _record_cleanup(_list):
+    """
+    no need to store more then one element if --summarize
+    is not enabled
+    """
+    if not args.summary and len(_list) > 1:
+        _list = _list[len(_list) - 1 :]
+
+
+def _record_by_tid(task):
+    tid = task.tid
+    if tid not in db["tid"]:
+        db["tid"][tid] = []
+    db["tid"][tid].append(task)
+    _record_cleanup(db["tid"][tid])
+
+
+def _record_by_cpu(task):
+    cpu = task.cpu
+    if cpu not in db["cpu"]:
+        db["cpu"][cpu] = []
+    db["cpu"][cpu].append(task)
+    _record_cleanup(db["cpu"][cpu])
+
+
+def _record_global(task):
+    """record all executed task, ordered by finish chronological"""
+    db["global"].append(task)
+    _record_cleanup(db["global"])
+
+
+def _handle_task_finish(tid, cpu, time, perf_sample_dict):
+    if tid == 0:
+        return
+    _id = _task_id(tid, cpu)
+    if _id not in db["running"]:
+        # may happen, if we missed the switch to
+        # event. Seen in combination with --exclude-perf
+        # where the start is filtered out, but not the
+        # switched in. Probably a bug in exclude-perf
+        # option.
+        return
+    task = db["running"][_id]
+    task.schedule_out_at(time)
+
+    # record tid, during schedule in the tid
+    # is not available, update now
+    pid = int(perf_sample_dict["sample"]["pid"])
+
+    task.update_pid(pid)
+    del db["running"][_id]
+
+    # print only tasks which are not being filtered and no print of trace
+    # for summary only, but record every task.
+    if not _limit_filtered(tid, pid, task.comm) and not args.summary_only:
+        _print_task_finish(task)
+    _record_by_tid(task)
+    _record_by_cpu(task)
+    _record_global(task)
+
+
+def _handle_task_start(tid, cpu, comm, time):
+    if tid == 0:
+        return
+    if tid in args.tid_renames:
+        comm = args.tid_renames[tid]
+    _id = _task_id(tid, cpu)
+    if _id in db["running"]:
+        # handle corner cases where already running tasks
+        # are switched-to again - saw this via --exclude-perf
+        # recorded traces. We simple ignore this "second start"
+        # event.
+        return
+    assert _id not in db["running"]
+    task = Task(_id, tid, cpu, comm)
+    task.schedule_in_at(time)
+    db["running"][_id] = task
+
+
+def _time_to_internal(time_ns):
+    """
+    To prevent float rounding errors we use Decimal internally
+    """
+    return decimal.Decimal(time_ns) / decimal.Decimal(1e9)
+
+
+def _limit_filtered(tid, pid, comm):
+    if args.filter_tasks:
+        if str(tid) in args.filter_tasks or comm in args.filter_tasks:
+            return True
+        else:
+            return False
+    if args.limit_to_tasks:
+        if str(tid) in args.limit_to_tasks or comm in args.limit_to_tasks:
+            return False
+        else:
+            return True
+
+
+def _argument_filter_sanity_check():
+    if args.limit_to_tasks and args.filter_tasks:
+        sys.exit("Error: Filter and Limit at the same time active.")
+    if args.extended_times and args.summary_only:
+        sys.exit("Error: Summary only and extended times active.")
+    if args.time_limit and ":" not in args.time_limit:
+        sys.exit(
+            "Error: No bound set for time limit. Please set bound by ':' e.g :123."
+        )
+    if args.time_limit and (args.summary or args.summary_only or args.summary_extended):
+        sys.exit("Error: Cannot set time limit and print summary")
+
+
+def _argument_prepare_check():
+    global time_unit
+    if args.filter_tasks:
+        args.filter_tasks = args.filter_tasks.split(",")
+    if args.limit_to_tasks:
+        args.limit_to_tasks = args.limit_to_tasks.split(",")
+    if args.time_limit:
+        args.time_limit = args.time_limit.split(":")
+    for rename_tuple in args.rename_comms_by_tids.split(","):
+        tid_name = rename_tuple.split(":")
+        if len(tid_name) != 2:
+            continue
+        args.tid_renames[int(tid_name[0])] = tid_name[1]
+    args.highlight_tasks_map = dict()
+    for highlight_tasks_tuple in args.highlight_tasks.split(","):
+        tasks_color_map = highlight_tasks_tuple.split(":")
+        # default highlight color to red if no color set by user
+        if len(tasks_color_map) == 1:
+            tasks_color_map.append("red")
+        if args.highlight_tasks and tasks_color_map[1].lower() not in _COLORS:
+            sys.exit(
+                "Error: Color not defined, please choose from grey,red,green,yellow,blue,"
+                "violet"
+            )
+        if len(tasks_color_map) != 2:
+            continue
+        args.highlight_tasks_map[tasks_color_map[0]] = tasks_color_map[1]
+    time_unit = "us"
+    if args.ns:
+        time_unit = "ns"
+    elif args.ms:
+        time_unit = "ms"
+
+
+def _is_within_timelimit(time):
+    """
+    Check if a time limit was given by parameter, if so ignore the rest. If not,
+    process the recorded trace in its entirety.
+    """
+    if not args.time_limit:
+        return True
+    lower_time_limit = args.time_limit[0]
+    upper_time_limit = args.time_limit[1]
+    # check for upper limit
+    if upper_time_limit == "":
+        if time >= decimal.Decimal(lower_time_limit):
+            return True
+    # check for lower limit
+    if lower_time_limit == "":
+        if time <= decimal.Decimal(upper_time_limit):
+            return True
+        # quit if time exceeds upper limit. Good for big datasets
+        else:
+            quit()
+    if lower_time_limit != "" and upper_time_limit != "":
+        if (time >= decimal.Decimal(lower_time_limit) and
+            time <= decimal.Decimal(upper_time_limit)):
+            return True
+        # quit if time exceeds upper limit. Good for big datasets
+        elif time > decimal.Decimal(upper_time_limit):
+            quit()
+
+def _prepare_fmt_precision():
+    decimal_precision = 6
+    time_precision = 3
+    if args.ns:
+       decimal_precision = 9
+       time_precision = 0
+    return decimal_precision, time_precision
+
+
+def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict):
+    pass
+
+
+def trace_begin():
+    _parse_args()
+    _check_color()
+    _init_db()
+    if not args.summary_only:
+        _print_header()
+
+def trace_end():
+    if args.summary or args.summary_extended or args.summary_only:
+        Summary().print()
+
+def sched__sched_switch(event_name, context, common_cpu, common_secs, common_nsecs,
+                        common_pid, common_comm, common_callchain, prev_comm,
+                        prev_pid, prev_prio, prev_state, next_comm, next_pid,
+                        next_prio, perf_sample_dict):
+    # ignore common_secs & common_nsecs cause we need
+    # high res timestamp anyway, using the raw value is
+    # faster
+    time = _time_to_internal(perf_sample_dict["sample"]["time"])
+    if not _is_within_timelimit(time):
+        # user specific --time-limit a:b set
+        return
+
+    next_comm = _filter_non_printable(next_comm)
+    _handle_task_finish(prev_pid, common_cpu, time, perf_sample_dict)
+    _handle_task_start(next_pid, common_cpu, next_comm, time)
-- 
GitLab


From fdd0f81f0528d55d4362d072c6d6ecc7ecd61def Mon Sep 17 00:00:00 2001
From: Petar Gligoric <petar.gligoric@rohde-schwarz.com>
Date: Tue, 6 Dec 2022 10:44:05 -0500
Subject: [PATCH 3884/4122] perf script: task-analyzer add csv support

This patch adds the possibility to write the trace and the summary as csv files
to a user specified file. A format as such simplifies further data processing.
This is achieved by having ";" as separators instead of spaces and solely one
header per file.

Additional parameters are being considered, like in the normal usage of the
script. Colors are turned off in the case of a csv output, thus the highlight
option is also being ignored.

Usage:

Write standard task to csv file:

  $ perf script report tasks-analyzer --csv <file>

write limited output to csv file in nanoseconds:

  $ perf script report tasks-analyzer --csv <file> --ns --limit-to-tasks 1337

Write summary to a csv file:

  $ perf script report tasks-analyzer --csv-summary <file>

Write summary to csv file with additional schedule information:

  $ perf script report tasks-analyzer --csv-summary <file> --summary-extended

Write both summary and standard task to a csv file:

  $ perf script report tasks-analyzer --csv --csv-summary

The following examples illustrate what is possible with the CSV output.  The
first command sequence will record all scheduler switch events for 10 seconds,
the task-analyzer calculates task information like runtimes as CSV.  A small
python snippet using pandas and matplotlib will visualize the most frequent
task (e.g. kworker/1:1) runtimes - each runtime as a bar in a bar chart:

  $ perf record -e sched:sched_switch -a -- sleep 10
  $ perf script report tasks-analyzer --ns --csv tasks.csv
  $ cat << EOF > /tmp/freq-comm-runtimes-bar.py
    import pandas as pd
    import matplotlib.pyplot as plt

    df = pd.read_csv("tasks.csv", sep=';')
    most_freq_comm = df["COMM"].value_counts().idxmax()
    most_freq_runtimes = df[df["COMM"]==most_freq_comm]["Runtime"]
    plt.title(f"Runtimes for Task {most_freq_comm} in Nanoseconds")
    plt.bar(range(len(most_freq_runtimes)), most_freq_runtimes)
    plt.show()
  $ python3 /tmp/freq-comm-runtimes-bar.py

As a seconds example, the subsequent script generates a pie chart of all
accumulated tasks runtimes for 10 seconds of system recordings:

  $ perf record -e sched:sched_switch -a -- sleep 10
  $ perf script report tasks-analyzer --csv-summary task-summary.csv
  $ cat << EOF > /tmp/accumulated-task-pie.py
    import pandas as pd
    from matplotlib.pyplot import pie, axis, show

    df = pd.read_csv("task-summary.csv", sep=';')
    sums = df.groupby(df["Comm"])["Accumulated"].sum()
    axis("equal")
    pie(sums, labels=sums.index);
    show()
  EOF
  $ python3 /tmp/accumulated-task-pie.py

A variety of other visualizations are possible in matplotlib and other
environments. Of course, pandas, numpy and co. also allow easy
statistical analysis of the data!

Signed-off-by: Petar Gligoric <petar.gligoric@rohde-schwarz.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20221206154406.41941-3-petar.gligor@gmail.com
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/scripts/python/task-analyzer.py | 274 ++++++++++++++-------
 1 file changed, 185 insertions(+), 89 deletions(-)

diff --git a/tools/perf/scripts/python/task-analyzer.py b/tools/perf/scripts/python/task-analyzer.py
index f74abe50f3b27..52e8dae9b1f0d 100755
--- a/tools/perf/scripts/python/task-analyzer.py
+++ b/tools/perf/scripts/python/task-analyzer.py
@@ -156,6 +156,18 @@ def _parse_args():
         help="always, never or auto, allowing configuring color output"
             " via the command line",
     )
+    parser.add_argument(
+        "--csv",
+        default="",
+        help="Write trace to file selected by user. Options, like --ns or --extended"
+            "-times are used.",
+    )
+    parser.add_argument(
+        "--csv-summary",
+        default="",
+        help="Write summary to file selected by user. Options, like --ns or"
+            " --summary-extended are used.",
+    )
     args = parser.parse_args()
     args.tid_renames = dict()
 
@@ -275,7 +287,6 @@ class Timespans(object):
 
 
-
 class Summary(object):
     """
     Primary instance for calculating the summary output. Processes the whole trace to
@@ -327,7 +338,7 @@ class Summary(object):
                 sum(db["inter_times"].values()) - 4 * decimal_precision
                 )
             _header += ("Max Inter Task Times",)
-        print(fmt.format(*_header))
+        fd_sum.write(fmt.format(*_header) +  "\n")
 
     def _column_titles(self):
         """
@@ -336,34 +347,58 @@ class Summary(object):
         values are being displayed in grey. Thus in their format two additional {},
         are placed for color set and reset.
         """
+        separator, fix_csv_align = _prepare_fmt_sep()
         decimal_precision, time_precision = _prepare_fmt_precision()
-        fmt = " {{:>{}}}".format(db["task_info"]["pid"])
-        fmt += " {{:>{}}}".format(db["task_info"]["tid"])
-        fmt += " {{:>{}}}".format(db["task_info"]["comm"])
-        fmt += " {{:>{}}}".format(db["runtime_info"]["runs"])
-        fmt += " {{:>{}}}".format(db["runtime_info"]["acc"])
-        fmt += " {{:>{}}}".format(db["runtime_info"]["mean"])
-        fmt += " {{:>{}}}".format(db["runtime_info"]["median"])
-        fmt += " {{:>{}}}".format(db["runtime_info"]["min"] - decimal_precision)
-        fmt += " {{:>{}}}".format(db["runtime_info"]["max"] - decimal_precision)
-        fmt += " {{}}{{:>{}}}{{}}".format(db["runtime_info"]["max_at"] - time_precision)
+        fmt = "{{:>{}}}".format(db["task_info"]["pid"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, db["task_info"]["tid"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, db["task_info"]["comm"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["runs"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["acc"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, db["runtime_info"]["mean"] * fix_csv_align)
+        fmt += "{}{{:>{}}}".format(
+            separator, db["runtime_info"]["median"] * fix_csv_align
+        )
+        fmt += "{}{{:>{}}}".format(
+            separator, (db["runtime_info"]["min"] - decimal_precision) * fix_csv_align
+        )
+        fmt += "{}{{:>{}}}".format(
+            separator, (db["runtime_info"]["max"] - decimal_precision) * fix_csv_align
+        )
+        fmt += "{}{{}}{{:>{}}}{{}}".format(
+            separator, (db["runtime_info"]["max_at"] - time_precision) * fix_csv_align
+        )
 
         column_titles = ("PID", "TID", "Comm")
         column_titles += ("Runs", "Accumulated", "Mean", "Median", "Min", "Max")
-        column_titles += (_COLORS["grey"], "At", _COLORS["reset"])
+        column_titles += (_COLORS["grey"], "Max At", _COLORS["reset"])
 
         if args.summary_extended:
-            fmt += " {{:>{}}}".format(db["inter_times"]["out_in"] - decimal_precision)
-            fmt += " {{}}{{:>{}}}{{}}".format(
-                db["inter_times"]["inter_at"] - time_precision
+            fmt += "{}{{:>{}}}".format(
+                separator,
+                (db["inter_times"]["out_in"] - decimal_precision) * fix_csv_align
+            )
+            fmt += "{}{{}}{{:>{}}}{{}}".format(
+                separator,
+                (db["inter_times"]["inter_at"] - time_precision) * fix_csv_align
+            )
+            fmt += "{}{{:>{}}}".format(
+                separator,
+                (db["inter_times"]["out_out"] - decimal_precision) * fix_csv_align
+            )
+            fmt += "{}{{:>{}}}".format(
+                separator,
+                (db["inter_times"]["in_in"] - decimal_precision) * fix_csv_align
+            )
+            fmt += "{}{{:>{}}}".format(
+                separator,
+                (db["inter_times"]["in_out"] - decimal_precision) * fix_csv_align
             )
-            fmt += " {{:>{}}}".format(db["inter_times"]["out_out"] - decimal_precision)
-            fmt += " {{:>{}}}".format(db["inter_times"]["in_in"] - decimal_precision)
-            fmt += " {{:>{}}}".format(db["inter_times"]["in_out"] - decimal_precision)
 
             column_titles += ("Out-In", _COLORS["grey"], "Max At", _COLORS["reset"],
                         "Out-Out", "In-In", "In-Out")
-        print(fmt.format(*column_titles))
+
+        fd_sum.write(fmt.format(*column_titles) + "\n")
+
 
     def _task_stats(self):
         """calculates the stats of every task and constructs the printable summary"""
@@ -414,39 +449,53 @@ class Summary(object):
             self._calc_alignments_summary(align_helper)
 
     def _format_stats(self):
+        separator, fix_csv_align = _prepare_fmt_sep()
         decimal_precision, time_precision = _prepare_fmt_precision()
-        fmt = " {{:>{}d}}".format(db["task_info"]["pid"])
-        fmt += " {{:>{}d}}".format(db["task_info"]["tid"])
-        fmt += " {{:>{}}}".format(db["task_info"]["comm"])
-        fmt += " {{:>{}d}}".format(db["runtime_info"]["runs"])
-        fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["acc"], time_precision)
-        fmt += " {{}}{{:>{}.{}f}}".format(db["runtime_info"]["mean"], time_precision)
-        fmt += " {{:>{}.{}f}}".format(db["runtime_info"]["median"], time_precision)
-        fmt += " {{:>{}.{}f}}".format(
-            db["runtime_info"]["min"] - decimal_precision, time_precision
-            )
-        fmt += " {{:>{}.{}f}}".format(
-            db["runtime_info"]["max"] - decimal_precision, time_precision
-            )
-        fmt += " {{}}{{:>{}.{}f}}{{}}{{}}".format(
-            db["runtime_info"]["max_at"] - time_precision, decimal_precision
+        len_pid = db["task_info"]["pid"] * fix_csv_align
+        len_tid = db["task_info"]["tid"] * fix_csv_align
+        len_comm = db["task_info"]["comm"] * fix_csv_align
+        len_runs = db["runtime_info"]["runs"] * fix_csv_align
+        len_acc = db["runtime_info"]["acc"] * fix_csv_align
+        len_mean = db["runtime_info"]["mean"] * fix_csv_align
+        len_median = db["runtime_info"]["median"] * fix_csv_align
+        len_min = (db["runtime_info"]["min"] - decimal_precision) * fix_csv_align
+        len_max = (db["runtime_info"]["max"] - decimal_precision) * fix_csv_align
+        len_max_at = (db["runtime_info"]["max_at"] - time_precision) * fix_csv_align
+        if args.summary_extended:
+            len_out_in = (
+                db["inter_times"]["out_in"] - decimal_precision
+            ) * fix_csv_align
+            len_inter_at = (
+                db["inter_times"]["inter_at"] - time_precision
+            ) * fix_csv_align
+            len_out_out = (
+                db["inter_times"]["out_out"] - decimal_precision
+            ) * fix_csv_align
+            len_in_in = (db["inter_times"]["in_in"] - decimal_precision) * fix_csv_align
+            len_in_out = (
+                db["inter_times"]["in_out"] - decimal_precision
+            ) * fix_csv_align
+
+        fmt = "{{:{}d}}".format(len_pid)
+        fmt += "{}{{:{}d}}".format(separator, len_tid)
+        fmt += "{}{{:>{}}}".format(separator, len_comm)
+        fmt += "{}{{:{}d}}".format(separator, len_runs)
+        fmt += "{}{{:{}.{}f}}".format(separator, len_acc, time_precision)
+        fmt += "{}{{}}{{:{}.{}f}}".format(separator, len_mean, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, len_median, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, len_min, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, len_max, time_precision)
+        fmt += "{}{{}}{{:{}.{}f}}{{}}{{}}".format(
+            separator, len_max_at, decimal_precision
         )
         if args.summary_extended:
-            fmt += " {{:>{}.{}f}}".format(
-                db["inter_times"]["out_in"] - decimal_precision, time_precision
-            )
-            fmt += " {{}}{{:>{}.{}f}}{{}}".format(
-                db["inter_times"]["inter_at"] - time_precision, decimal_precision
-            )
-            fmt += " {{:>{}.{}f}}".format(
-                db["inter_times"]["out_out"] - decimal_precision, time_precision
-            )
-            fmt += " {{:>{}.{}f}}".format(
-                db["inter_times"]["in_in"] - decimal_precision, time_precision
-            )
-            fmt += " {{:>{}.{}f}}".format(
-                db["inter_times"]["in_out"] - decimal_precision, time_precision
+            fmt += "{}{{:{}.{}f}}".format(separator, len_out_in, time_precision)
+            fmt += "{}{{}}{{:{}.{}f}}{{}}".format(
+                separator, len_inter_at, decimal_precision
             )
+            fmt += "{}{{:{}.{}f}}".format(separator, len_out_out, time_precision)
+            fmt += "{}{{:{}.{}f}}".format(separator, len_in_in, time_precision)
+            fmt += "{}{{:{}.{}f}}".format(separator, len_in_out, time_precision)
         return fmt
 
 
@@ -467,13 +516,15 @@ class Summary(object):
 
 
     def print(self):
-        print("\nSummary")
         self._task_stats()
-        self._print_header()
-        self._column_titles()
         fmt = self._format_stats()
+
+        if not args.csv_summary:
+            print("\nSummary")
+            self._print_header()
+        self._column_titles()
         for i in range(len(self._body)):
-            print(fmt.format(*tuple(self._body[i])))
+            fd_sum.write(fmt.format(*tuple(self._body[i])) + "\n")
 
 
@@ -531,37 +582,45 @@ def _filter_non_printable(unfiltered):
 
 
 def _fmt_header():
-    fmt = "{{:>{}}}".format(LEN_SWITCHED_IN)
-    fmt += " {{:>{}}}".format(LEN_SWITCHED_OUT)
-    fmt += " {{:>{}}}".format(LEN_CPU)
-    fmt += " {{:>{}}}".format(LEN_PID)
-    fmt += "  {{:>{}}}".format(LEN_TID)
-    fmt += "  {{:>{}}}".format(LEN_COMM)
-    fmt += " {{:>{}}}".format(LEN_RUNTIME)
-    fmt += " {{:>{}}}".format(LEN_OUT_IN)
+    separator, fix_csv_align = _prepare_fmt_sep()
+    fmt = "{{:>{}}}".format(LEN_SWITCHED_IN*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_SWITCHED_OUT*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_CPU*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_PID*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_TID*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_COMM*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_RUNTIME*fix_csv_align)
+    fmt += "{}{{:>{}}}".format(separator, LEN_OUT_IN*fix_csv_align)
     if args.extended_times:
-        fmt += " {{:>{}}}".format(LEN_OUT_OUT)
-        fmt += " {{:>{}}}".format(LEN_IN_IN)
-        fmt += " {{:>{}}}".format(LEN_IN_OUT)
+        fmt += "{}{{:>{}}}".format(separator, LEN_OUT_OUT*fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, LEN_IN_IN*fix_csv_align)
+        fmt += "{}{{:>{}}}".format(separator, LEN_IN_OUT*fix_csv_align)
     return fmt
 
 
 def _fmt_body():
+    separator, fix_csv_align = _prepare_fmt_sep()
     decimal_precision, time_precision = _prepare_fmt_precision()
-    fmt = "{{}}{{:{}.{}f}}".format(LEN_SWITCHED_IN, decimal_precision)
-    fmt += " {{:{}.{}f}}".format(LEN_SWITCHED_OUT, decimal_precision)
-    fmt += " {{:{}d}}".format(LEN_CPU)
-    fmt += " {{:{}d}}".format(LEN_PID)
-    fmt += "  {{}}{{:{}d}}{{}}".format(LEN_TID)
-    fmt += "  {{}}{{:>{}}}".format(LEN_COMM)
-    fmt += " {{:{}.{}f}}".format(LEN_RUNTIME, time_precision)
+    fmt = "{{}}{{:{}.{}f}}".format(LEN_SWITCHED_IN*fix_csv_align, decimal_precision)
+    fmt += "{}{{:{}.{}f}}".format(
+        separator, LEN_SWITCHED_OUT*fix_csv_align, decimal_precision
+    )
+    fmt += "{}{{:{}d}}".format(separator, LEN_CPU*fix_csv_align)
+    fmt += "{}{{:{}d}}".format(separator, LEN_PID*fix_csv_align)
+    fmt += "{}{{}}{{:{}d}}{{}}".format(separator, LEN_TID*fix_csv_align)
+    fmt += "{}{{}}{{:>{}}}".format(separator, LEN_COMM*fix_csv_align)
+    fmt += "{}{{:{}.{}f}}".format(separator, LEN_RUNTIME*fix_csv_align, time_precision)
     if args.extended_times:
-        fmt += " {{:{}.{}f}}".format(LEN_OUT_IN, time_precision)
-        fmt += " {{:{}.{}f}}".format(LEN_OUT_OUT, time_precision)
-        fmt += " {{:{}.{}f}}".format(LEN_IN_IN, time_precision)
-        fmt += " {{:{}.{}f}}{{}}".format(LEN_IN_OUT, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, LEN_OUT_IN*fix_csv_align, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, LEN_OUT_OUT*fix_csv_align, time_precision)
+        fmt += "{}{{:{}.{}f}}".format(separator, LEN_IN_IN*fix_csv_align, time_precision)
+        fmt += "{}{{:{}.{}f}}{{}}".format(
+            separator, LEN_IN_OUT*fix_csv_align, time_precision
+        )
     else:
-        fmt += " {{:{}.{}f}}{{}}".format(LEN_OUT_IN, time_precision)
+        fmt += "{}{{:{}.{}f}}{{}}".format(
+            separator, LEN_OUT_IN*fix_csv_align, time_precision
+        )
     return fmt
 
 
@@ -571,7 +630,8 @@ def _print_header():
             "Time Out-In")
     if args.extended_times:
         header += ("Time Out-Out", "Time In-In", "Time In-Out")
-    print(fmt.format(*header))
+    fd_task.write(fmt.format(*header) + "\n")
+
 
 
 def _print_task_finish(task):
@@ -583,7 +643,6 @@ def _print_task_finish(task):
     in_in = -1
     in_out = -1
     fmt = _fmt_body()
-
     # depending on user provided highlight option we change the color
     # for particular tasks
     if str(task.tid) in args.highlight_tasks_map:
@@ -612,16 +671,22 @@ def _print_task_finish(task):
         out_out = timespan_gap_tid.out_out
         in_in = timespan_gap_tid.in_in
         in_out = timespan_gap_tid.in_out
+
+
     if args.extended_times:
-        print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
-                        c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
+        line_out = fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu,
+                        task.pid, c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
                         task.runtime(time_unit), out_in, out_out, in_in, in_out,
-                        c_row_reset))
+                        c_row_reset) + "\n"
     else:
-        print(fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu, task.pid,
-                        c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
-                        task.runtime(time_unit), out_in, c_row_reset))
-
+        line_out = fmt.format(c_row_set, task.time_in(), task.time_out(), task.cpu,
+                        task.pid, c_tid_set, task.tid, c_tid_reset, c_row_set, task.comm,
+                        task.runtime(time_unit), out_in, c_row_reset) + "\n"
+    try:
+        fd_task.write(line_out)
+    except(IOError):
+        # don't mangle the output if user SIGINT this script
+        sys.exit()
 
 def _record_cleanup(_list):
     """
@@ -733,10 +798,19 @@ def _argument_filter_sanity_check():
         )
     if args.time_limit and (args.summary or args.summary_only or args.summary_extended):
         sys.exit("Error: Cannot set time limit and print summary")
-
+    if args.csv_summary:
+        args.summary = True
+        if args.csv == args.csv_summary:
+            sys.exit("Error: Chosen files for csv and csv summary are the same")
+    if args.csv and (args.summary_extended or args.summary) and not args.csv_summary:
+        sys.exit("Error: No file chosen to write summary to. Choose with --csv-summary "
+        "<file>")
+    if args.csv and args.summary_only:
+        sys.exit("Error: --csv chosen and --summary-only. Standard task would not be"
+            "written to csv file.")
 
 def _argument_prepare_check():
-    global time_unit
+    global time_unit, fd_task, fd_sum
     if args.filter_tasks:
         args.filter_tasks = args.filter_tasks.split(",")
     if args.limit_to_tasks:
@@ -769,6 +843,21 @@ def _argument_prepare_check():
         time_unit = "ms"
 
 
+    fd_task = sys.stdout
+    if args.csv:
+        args.stdio_color = "never"
+        fd_task = open(args.csv, "w")
+        print("generating csv at",args.csv,)
+
+    fd_sum = sys.stdout
+    if args.csv_summary:
+        args.stdio_color = "never"
+        fd_sum = open(args.csv_summary, "w")
+        print("generating csv summary at",args.csv_summary)
+        if not args.csv:
+            args.summary_only = True
+
+
 def _is_within_timelimit(time):
     """
     Check if a time limit was given by parameter, if so ignore the rest. If not,
@@ -801,10 +890,17 @@ def _prepare_fmt_precision():
     decimal_precision = 6
     time_precision = 3
     if args.ns:
-       decimal_precision = 9
-       time_precision = 0
+     decimal_precision = 9
+     time_precision = 0
     return decimal_precision, time_precision
 
+def _prepare_fmt_sep():
+    separator = " "
+    fix_csv_align = 1
+    if args.csv or args.csv_summary:
+        separator = ";"
+        fix_csv_align = 0
+    return separator, fix_csv_align
 
 def trace_unhandled(event_name, context, event_fields_dict, perf_sample_dict):
     pass
-- 
GitLab


From e8478b84d6ba9ccfac15dfce103062c4fa7ded2f Mon Sep 17 00:00:00 2001
From: Petar Gligoric <petar.gligoric@rohde-schwarz.com>
Date: Tue, 6 Dec 2022 10:44:06 -0500
Subject: [PATCH 3885/4122] perf test: add new task-analyzer tests

Provide task-analyzer test cases for all possible arguments and a subset of possible
combinations.

12 Tests in total.

test_basic:
 - cmd:"perf script report task-analyzer"
 - Fundamental test of script without arguments.
 - Check for standard output.

test_ns_rename:
 - cmd:"perf script report task-analyzer --ns --rename-comms-by-tids 0:random"
 - Standard task with timestamps in nanoseconds and comm renamed.
 - Check for standard output.

test_ms_filtertasks_highlight:
 - cmd:"perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf"
 - Standard task with timestamps in milliseconds, task filtered out and highlighted.
 - Check for standard output.

test_extended_times_timelimit_limittasks:
 - cmd "perf script report task-analyzer --extended-times --time-limit :99999"
 - Standard task with additional schedule out/in info and timlimit active at 99999.
 - Check for extended table output.

test_summary:
 - cmd:"perf script report task-analyzer --summary"
 - Standard task with additional summary output.
 - Check for summary print.

test_summary_extended:
 - cmd:"perf script report task-analyzer --summary-extended"
 - Standard task with summary and additional schedule in/out info.
 - Chceck for extended table print.

test_summaryonly:
 - cmd:"perf script report task-analyzer --summary-only"
 - Only summary should be printed.
 - Check for summary print.

test_extended_times_summary_ns:
 - cmd:"perf script report task-analyzer --extended-times --summary --ns"
 - Standard task with extended schedule in/out information and summary in ns.
 - Check for extended table and summary.

test_csv:
 - cmd:"perf script report task-analyzer --csv csv"
 - Print standard task to csv file in csv format.
 - Check for csv format.

test_csv_extended_times:
 - cmd:"perf script report task-analyzer --csv csv --extended-times"
 - Print standard task to csv file in csv format with additional schedule in/out
   information.
 - Check for additional information and csv format.

test_csvsummary:
 - cmd:"perf script report task-analyzer --csv-summary csvsummary"
 - Print summary to csvsummary file in csv format.
 - Check for csv format.

test_csvsummary_extended:
 - cmd:"perf script report task-analyzer --csv-summary csvsummary --summary-extended"
 - Print summary to csvsummary file in csv format with additional schedule in/out
   information.
 - Check for additional information and csv format.

Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Petar Gligoric <petar.gligoric@rohde-schwarz.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: https://lore.kernel.org/r/20221206154406.41941-4-petar.gligor@gmail.com
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/shell/test_task_analyzer.sh | 151 +++++++++++++++++++
 1 file changed, 151 insertions(+)
 create mode 100755 tools/perf/tests/shell/test_task_analyzer.sh

diff --git a/tools/perf/tests/shell/test_task_analyzer.sh b/tools/perf/tests/shell/test_task_analyzer.sh
new file mode 100755
index 0000000000000..a98e4ab66040e
--- /dev/null
+++ b/tools/perf/tests/shell/test_task_analyzer.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# perf script task-analyzer tests
+# SPDX-License-Identifier: GPL-2.0
+
+tmpdir=$(mktemp -d /tmp/perf-script-task-analyzer-XXXXX)
+err=0
+
+cleanup() {
+  rm -f perf.data
+  rm -f perf.data.old
+  rm -f csv
+  rm -f csvsummary
+  rm -rf $tmpdir
+  trap - exit term int
+}
+
+trap_cleanup() {
+  cleanup
+  exit 1
+}
+trap trap_cleanup exit term int
+
+report() {
+	if [ $1 = 0 ]; then
+		echo "PASS: \"$2\""
+	else
+		echo "FAIL: \"$2\" Error message: \"$3\""
+		err=1
+	fi
+}
+
+check_exec_0() {
+	if [ $? != 0 ]; then
+		report 1 "invokation of ${$1} command failed"
+	fi
+}
+
+find_str_or_fail() {
+	grep -q "$1" $2
+	if [ $? != 0 ]; then
+		report 1 $3 "Failed to find required string:'${1}'."
+	else
+		report 0 $3
+	fi
+}
+
+prepare_perf_data() {
+	# 1s should be sufficient to catch at least some switches
+	perf record -e sched:sched_switch -a -- sleep 1 > /dev/null 2>&1
+}
+
+# check standard inkvokation with no arguments
+test_basic() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+}
+
+test_ns_rename(){
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --ns --rename-comms-by-tids 0:random > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+}
+
+test_ms_filtertasks_highlight(){
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --ms --filter-tasks perf --highlight-tasks perf \
+	> $out
+	check_exec_0 "perf"
+	find_str_or_fail "Comm" $out ${FUNCNAME[0]}
+}
+
+test_extended_times_timelimit_limittasks() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --extended-times --time-limit :99999 \
+	--limit-to-tasks perf > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Out-Out" $out ${FUNCNAME[0]}
+}
+
+test_summary() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --summary > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+}
+
+test_summaryextended() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --summary-extended > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Inter Task Times" $out ${FUNCNAME[0]}
+}
+
+test_summaryonly() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --summary-only > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+}
+
+test_extended_times_summary_ns() {
+	out="$tmpdir/perf.out"
+	perf script report task-analyzer --extended-times --summary --ns > $out
+	check_exec_0 "perf"
+	find_str_or_fail "Out-Out" $out ${FUNCNAME[0]}
+	find_str_or_fail "Summary" $out ${FUNCNAME[0]}
+}
+
+test_csv() {
+	perf script report task-analyzer --csv csv > /dev/null
+	check_exec_0 "perf"
+	find_str_or_fail "Comm;" csv ${FUNCNAME[0]}
+}
+
+test_csv_extended_times() {
+	perf script report task-analyzer --csv csv --extended-times > /dev/null
+	check_exec_0 "perf"
+	find_str_or_fail "Out-Out;" csv ${FUNCNAME[0]}
+}
+
+test_csvsummary() {
+	perf script report task-analyzer --csv-summary csvsummary > /dev/null
+	check_exec_0 "perf"
+	find_str_or_fail "Comm;" csvsummary ${FUNCNAME[0]}
+}
+
+test_csvsummary_extended() {
+	perf script report task-analyzer --csv-summary csvsummary --summary-extended \
+	>/dev/null
+	check_exec_0 "perf"
+	find_str_or_fail "Out-Out;" csvsummary ${FUNCNAME[0]}
+}
+
+prepare_perf_data
+test_basic
+test_ns_rename
+test_ms_filtertasks_highlight
+test_extended_times_timelimit_limittasks
+test_summary
+test_summaryextended
+test_summaryonly
+test_extended_times_summary_ns
+test_csv
+test_csvsummary
+test_csv_extended_times
+test_csvsummary_extended
+cleanup
+exit $err
-- 
GitLab


From a8f26192ca769c6b37a04b3ee188f28069fd5d53 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 13 Dec 2022 11:47:36 +0000
Subject: [PATCH 3886/4122] perf test: Add ability to test exit code for attr
 tests

Currently the return value is used to skip the test, but sometimes it
can be useful to test if a certain command should return a certain exit
code.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213114739.2312862-2-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index cb39ac46bc73c..cf40df4729181 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -129,7 +129,8 @@ class Event(dict):
 #   - needs to specify:
 #     'command' - perf command name
 #     'args'    - special command arguments
-#     'ret'     - expected command return value (0 by default)
+#     'ret'     - Skip test if Perf doesn't exit with this value (0 by default)
+#     'test_ret'- If set to 'true', fail test instead of skipping for 'ret' argument
 #     'arch'    - architecture specific test (optional)
 #                 comma separated list, ! at the beginning
 #                 negates it.
@@ -155,6 +156,8 @@ class Test(object):
         except:
             self.ret  = 0
 
+        self.test_ret = parser.getboolean('config', 'test_ret', fallback=False)
+
         try:
             self.arch  = parser.get('config', 'arch')
             log.warning("test limitation '%s'" % self.arch)
@@ -232,7 +235,10 @@ class Test(object):
         log.info("  '%s' ret '%s', expected '%s'" % (cmd, str(ret), str(self.ret)))
 
         if not data_equal(str(ret), str(self.ret)):
-            raise Unsup(self)
+            if self.test_ret:
+                raise Fail(self, "Perf exit code failure")
+            else:
+                raise Unsup(self)
 
     def compare(self, expect, result):
         match = {}
-- 
GitLab


From c3a8f85351b15167d6c8d178656df82ba3c34712 Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 13 Dec 2022 11:47:37 +0000
Subject: [PATCH 3887/4122] perf test: Add mechanism for skipping attr tests on
 auxiliary vector values

This can be used to skip tests or provide different test values on
different platforms. For example to run a test only where Arm SVE is
present add this to the config section:

  auxv    = auxv["AT_HWCAP"] & 0x200000 == 0x200000

The value is a freeform Python expression that is evaled in the context
of a map called "auxv" that contains the decoded auxiliary vector.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213114739.2312862-3-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr.py | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index cf40df4729181..eceb6d022141c 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -8,7 +8,9 @@ import glob
 import optparse
 import tempfile
 import logging
+import re
 import shutil
+import subprocess
 
 try:
     import configparser
@@ -134,6 +136,8 @@ class Event(dict):
 #     'arch'    - architecture specific test (optional)
 #                 comma separated list, ! at the beginning
 #                 negates it.
+#     'auxv'    - Truthy statement that is evaled in the scope of the auxv map. When false,
+#                 the test is skipped. For example 'auxv["AT_HWCAP"] == 10'. (optional)
 #
 # [eventX:base]
 #   - one or multiple instances in file
@@ -164,6 +168,7 @@ class Test(object):
         except:
             self.arch  = ''
 
+        self.auxv = parser.get('config', 'auxv', fallback=None)
         self.expect   = {}
         self.result   = {}
         log.debug("  loading expected events");
@@ -175,7 +180,28 @@ class Test(object):
         else:
             return True
 
-    def skip_test(self, myarch):
+    def skip_test_auxv(self):
+        def new_auxv(a, pattern):
+            items = list(filter(None, pattern.split(a)))
+            # AT_HWCAP is hex but doesn't have a prefix, so special case it
+            if items[0] == "AT_HWCAP":
+                value = int(items[-1], 16)
+            else:
+                try:
+                    value = int(items[-1], 0)
+                except:
+                    value = items[-1]
+            return (items[0], value)
+
+        if not self.auxv:
+            return False
+        auxv = subprocess.check_output("LD_SHOW_AUXV=1 sleep 0", shell=True) \
+               .decode(sys.stdout.encoding)
+        pattern = re.compile(r"[: ]+")
+        auxv = dict([new_auxv(a, pattern) for a in auxv.splitlines()])
+        return not eval(self.auxv)
+
+    def skip_test_arch(self, myarch):
         # If architecture not set always run test
         if self.arch == '':
             # log.warning("test for arch %s is ok" % myarch)
@@ -225,9 +251,12 @@ class Test(object):
     def run_cmd(self, tempdir):
         junk1, junk2, junk3, junk4, myarch = (os.uname())
 
-        if self.skip_test(myarch):
+        if self.skip_test_arch(myarch):
             raise Notest(self, myarch)
 
+        if self.skip_test_auxv():
+            raise Notest(self, "auxv skip")
+
         cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
               self.perf, self.command, tempdir, self.args)
         ret = os.WEXITSTATUS(os.system(cmd))
-- 
GitLab


From ee26adf627e65c8e26b13aa35d5de1875973114f Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 13 Dec 2022 11:47:38 +0000
Subject: [PATCH 3888/4122] perf test: Add mechanism for skipping attr tests on
 kernel versions

The first two version numbers are used since that is where the ABI
changes happen, so seems to be the most useful for now.

'Until' is exclusive and 'since' is inclusive so that the same version
number can be used to mark a point where the change comes into effect.

This allows keeping the tests in a state where new tests will also pass
on older kernels if the existence of a new feature isn't explicitly
broadcast by the kernel. For example extended user regs are currently
discovered by trial and error calls to perf_event_open.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213114739.2312862-4-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/attr.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/tools/perf/tests/attr.py b/tools/perf/tests/attr.py
index eceb6d022141c..ccfef861e9315 100644
--- a/tools/perf/tests/attr.py
+++ b/tools/perf/tests/attr.py
@@ -6,6 +6,7 @@ import os
 import sys
 import glob
 import optparse
+import platform
 import tempfile
 import logging
 import re
@@ -125,6 +126,11 @@ class Event(dict):
             if not data_equal(self[t], other[t]):
                 log.warning("expected %s=%s, got %s" % (t, self[t], other[t]))
 
+def parse_version(version):
+    if not version:
+        return None
+    return [int(v) for v in version.split(".")[0:2]]
+
 # Test file description needs to have following sections:
 # [config]
 #   - just single instance in file
@@ -138,7 +144,9 @@ class Event(dict):
 #                 negates it.
 #     'auxv'    - Truthy statement that is evaled in the scope of the auxv map. When false,
 #                 the test is skipped. For example 'auxv["AT_HWCAP"] == 10'. (optional)
-#
+#     'kernel_since' - Inclusive kernel version from which the test will start running. Only the
+#                      first two values are supported, for example "6.1" (optional)
+#     'kernel_until' - Exclusive kernel version from which the test will stop running. (optional)
 # [eventX:base]
 #   - one or multiple instances in file
 #   - expected values assignments
@@ -169,6 +177,8 @@ class Test(object):
             self.arch  = ''
 
         self.auxv = parser.get('config', 'auxv', fallback=None)
+        self.kernel_since = parse_version(parser.get('config', 'kernel_since', fallback=None))
+        self.kernel_until = parse_version(parser.get('config', 'kernel_until', fallback=None))
         self.expect   = {}
         self.result   = {}
         log.debug("  loading expected events");
@@ -180,6 +190,16 @@ class Test(object):
         else:
             return True
 
+    def skip_test_kernel_since(self):
+        if not self.kernel_since:
+            return False
+        return not self.kernel_since <= parse_version(platform.release())
+
+    def skip_test_kernel_until(self):
+        if not self.kernel_until:
+            return False
+        return not parse_version(platform.release()) < self.kernel_until
+
     def skip_test_auxv(self):
         def new_auxv(a, pattern):
             items = list(filter(None, pattern.split(a)))
@@ -257,6 +277,12 @@ class Test(object):
         if self.skip_test_auxv():
             raise Notest(self, "auxv skip")
 
+        if self.skip_test_kernel_since():
+            raise Notest(self, "old kernel skip")
+
+        if self.skip_test_kernel_until():
+            raise Notest(self, "new kernel skip")
+
         cmd = "PERF_TEST_ATTR=%s %s %s -o %s/perf.data %s" % (tempdir,
               self.perf, self.command, tempdir, self.args)
         ret = os.WEXITSTATUS(os.system(cmd))
-- 
GitLab


From 9440ebdc333dd12e2344aec8fd3fb7d440f9463c Mon Sep 17 00:00:00 2001
From: James Clark <james.clark@arm.com>
Date: Tue, 13 Dec 2022 11:47:39 +0000
Subject: [PATCH 3889/4122] perf test arm64: Add attr tests for new VG register

Ensure that the availability of the VG register behaves as expected
depending on the kernel version and SVE support.

Signed-off-by: James Clark <james.clark@arm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213114739.2312862-5-james.clark@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../attr/test-record-user-regs-no-sve-aarch64      |  9 +++++++++
 .../attr/test-record-user-regs-old-sve-aarch64     | 10 ++++++++++
 .../tests/attr/test-record-user-regs-sve-aarch64   | 14 ++++++++++++++
 3 files changed, 33 insertions(+)
 create mode 100644 tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
 create mode 100644 tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64
 create mode 100644 tools/perf/tests/attr/test-record-user-regs-sve-aarch64

diff --git a/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
new file mode 100644
index 0000000000000..fbb065842880f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-user-regs-no-sve-aarch64
@@ -0,0 +1,9 @@
+# Test that asking for VG fails if the system doesn't support SVE. This
+# applies both before and after the feature was added in 6.1
+[config]
+command = record
+args    = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1
+ret     = 129
+test_ret = true
+arch    = aarch64
+auxv    = auxv["AT_HWCAP"] & 0x200000 == 0
diff --git a/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64
new file mode 100644
index 0000000000000..15ebfc3418e3f
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-user-regs-old-sve-aarch64
@@ -0,0 +1,10 @@
+# Test that asking for VG always fails on old kernels because it was
+# added in 6.1. This applies to systems that either support or don't
+# support SVE.
+[config]
+command = record
+args    = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1
+ret     = 129
+test_ret = true
+arch    = aarch64
+kernel_until = 6.1
diff --git a/tools/perf/tests/attr/test-record-user-regs-sve-aarch64 b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64
new file mode 100644
index 0000000000000..c598c803221da
--- /dev/null
+++ b/tools/perf/tests/attr/test-record-user-regs-sve-aarch64
@@ -0,0 +1,14 @@
+# Test that asking for VG works if the system has SVE and after the
+# feature was added in 6.1
+[config]
+command = record
+args    = --no-bpf-event --user-regs=vg kill >/dev/null 2>&1
+ret     = 1
+test_ret = true
+arch    = aarch64
+auxv    = auxv["AT_HWCAP"] & 0x200000 == 0x200000
+kernel_since = 6.1
+
+[event:base-record]
+sample_type=4359
+sample_regs_user=70368744177664
-- 
GitLab


From caec54705adb73b0015bce4dc781e0d19a91ff18 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 13 Dec 2022 15:26:47 -0800
Subject: [PATCH 3890/4122] perf build: Fix python/perf.so library's name

Since Python 3.3 extensions have a suffix encoding platform and
version information. For example, the perf extension was previously
perf.so but now maybe perf.cpython-310-x86_64-linux-gnu.so. Compute
the extension using Python and then use this in the target name. Doing
this avoids the "perf.so" target always being rebuilt.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Eelco Chaudron <echaudro@redhat.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Riccardo Mancini <rickyman7@gmail.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shaomin Deng <dengshaomin@cdjrlc.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Timothy Hayes <timothy.hayes@arm.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213232651.1269909-2-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.config | 4 +++-
 tools/perf/Makefile.perf   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 52a4dc029111c..83ed969b95b4a 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -871,6 +871,7 @@ define disable-python_code
   NO_LIBPYTHON := 1
 endef
 
+PYTHON_EXTENSION_SUFFIX := '.so'
 ifdef NO_LIBPYTHON
   $(call disable-python,Python support disabled by user)
 else
@@ -889,7 +890,8 @@ else
       else
          LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
          EXTLIBS += $(PYTHON_EMBED_LIBADD)
-         LANG_BINDINGS += $(obj-perf)python/perf.so
+         PYTHON_EXTENSION_SUFFIX := $(shell $(PYTHON) -c 'from importlib import machinery; print(machinery.EXTENSION_SUFFIXES[0])')
+         LANG_BINDINGS += $(obj-perf)python/perf$(PYTHON_EXTENSION_SUFFIX)
          CFLAGS += -DHAVE_LIBPYTHON_SUPPORT
          $(call detected,CONFIG_LIBPYTHON)
       endif
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 98f629bbd1aae..9b7886ce0674e 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -647,7 +647,7 @@ all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
 # Create python binding output directory if not already present
 _dummy := $(shell [ -d '$(OUTPUT)python' ] || mkdir -p '$(OUTPUT)python')
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF)
+$(OUTPUT)python/perf$(PYTHON_EXTENSION_SUFFIX): $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBPERF)
 	$(QUIET_GEN)LDSHARED="$(CC) -pthread -shared" \
         CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS)' \
 	  $(PYTHON_WORD) util/setup.py \
-- 
GitLab


From f0f4c3adcfe6d93508a7fbe241b115aa5b24fe36 Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca@z3ntu.xyz>
Date: Sun, 16 Oct 2022 11:00:34 +0200
Subject: [PATCH 3891/4122] dt-bindings: thermal: tsens: Add sm8450 compatible

Document the tsens-v2 compatible for sm8450 SoC.

Signed-off-by: Luca Weiss <luca@z3ntu.xyz>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221016090035.565350-5-luca@z3ntu.xyz
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 Documentation/devicetree/bindings/thermal/qcom-tsens.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index 038d81338fcf1..7db905c1d5558 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -57,6 +57,7 @@ properties:
               - qcom,sm8150-tsens
               - qcom,sm8250-tsens
               - qcom,sm8350-tsens
+              - qcom,sm8450-tsens
           - const: qcom,tsens-v2
 
   reg:
-- 
GitLab


From 1f455f144fb05e53ae47f84eb109a8021fe168d1 Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Fri, 14 Oct 2022 10:16:20 +0200
Subject: [PATCH 3892/4122] thermal/drivers/imx8mm_thermal: Use GENMASK() when
 appropriate

GENMASK() is preferred to use for bitmasks.

Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Link: https://lore.kernel.org/r/20221014081620.1599511-1-marcus.folkesson@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/imx8mm_thermal.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index e2c2673025a7a..68608d8e5e29f 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -23,8 +23,8 @@
 
 #define TER_ADC_PD		BIT(30)
 #define TER_EN			BIT(31)
-#define TRITSR_TEMP0_VAL_MASK	0xff
-#define TRITSR_TEMP1_VAL_MASK	0xff0000
+#define TRITSR_TEMP0_VAL_MASK	GENMASK(7, 0)
+#define TRITSR_TEMP1_VAL_MASK	GENMASK(23, 16)
 
 #define PROBE_SEL_ALL		GENMASK(31, 30)
 
-- 
GitLab


From d37edc7370273306d8747097fafa62436c1cfe16 Mon Sep 17 00:00:00 2001
From: Marcus Folkesson <marcus.folkesson@gmail.com>
Date: Fri, 14 Oct 2022 09:35:07 +0200
Subject: [PATCH 3893/4122] thermal/drivers/imx8mm_thermal: Validate
 temperature range

Check against the upper temperature limit (125 degrees C) before
consider the temperature valid.

Fixes: 5eed800a6811 ("thermal: imx8mm: Add support for i.MX8MM thermal monitoring unit")
Signed-off-by: Marcus Folkesson <marcus.folkesson@gmail.com>
Reviewed-by: Jacky Bai <ping.bai@nxp.com>
Link: https://lore.kernel.org/r/20221014073507.1594844-1-marcus.folkesson@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/imx8mm_thermal.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index 68608d8e5e29f..182cc207afe90 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -65,8 +65,14 @@ static int imx8mm_tmu_get_temp(void *data, int *temp)
 	u32 val;
 
 	val = readl_relaxed(tmu->base + TRITSR) & TRITSR_TEMP0_VAL_MASK;
+
+	/*
+	 * Do not validate against the V bit (bit 31) due to errata
+	 * ERR051272: TMU: Bit 31 of registers TMU_TSCR/TMU_TRITSR/TMU_TRATSR invalid
+	 */
+
 	*temp = val * 1000;
-	if (*temp < VER1_TEMP_LOW_LIMIT)
+	if (*temp < VER1_TEMP_LOW_LIMIT || *temp > VER2_TEMP_HIGH_LIMIT)
 		return -EAGAIN;
 
 	return 0;
-- 
GitLab


From 87f9fe8c4b603bd18f025d8c5fc9bb5f17b9a4cc Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Tue, 11 Oct 2022 12:52:35 -0500
Subject: [PATCH 3894/4122] dt-bindings: thermal: Convert generic-adc-thermal
 to DT schema

Convert the 'generic-adc-thermal' binding to DT schema format.

The binding said '#thermal-sensor-cells' should be 1, but all in tree
users are 0 and 1 doesn't make sense for a single channel.

Drop the example's related providers and consumers of the
'generic-adc-thermal' node as the convention is to not have those in
the examples.

Signed-off-by: Rob Herring <robh@kernel.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221011175235.3191509-1-robh@kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/thermal/generic-adc-thermal.yaml | 84 ++++++++++++++++
 .../bindings/thermal/thermal-generic-adc.txt  | 95 -------------------
 2 files changed, 84 insertions(+), 95 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml
 delete mode 100644 Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt

diff --git a/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml b/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml
new file mode 100644
index 0000000000000..f1fc3b0d86085
--- /dev/null
+++ b/Documentation/devicetree/bindings/thermal/generic-adc-thermal.yaml
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/generic-adc-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: General Purpose Analog To Digital Converter (ADC) based thermal sensor
+
+maintainers:
+  - Laxman Dewangan <ldewangan@nvidia.com>
+
+description:
+  On some of platforms, thermal sensor like thermistors are connected to
+  one of ADC channel and sensor resistance is read via voltage across the
+  sensor resistor. The voltage read across the sensor is mapped to
+  temperature using voltage-temperature lookup table.
+
+properties:
+  compatible:
+    const: generic-adc-thermal
+
+  '#thermal-sensor-cells':
+    const: 0
+
+  io-channels:
+    maxItems: 1
+
+  io-channel-names:
+    const: sensor-channel
+
+  temperature-lookup-table:
+    description: |
+      Lookup table to map the relation between ADC value and temperature.
+      When ADC is read, the value is looked up on the table to get the
+      equivalent temperature.
+
+      If not specified, driver assumes the ADC channel gives milliCelsius
+      directly.
+    $ref: /schemas/types.yaml#/definitions/int32-matrix
+    items:
+      items:
+        - description: Temperature in milliCelsius
+        - description: ADC read value
+
+required:
+  - compatible
+  - '#thermal-sensor-cells'
+  - io-channels
+  - io-channel-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/thermal/thermal.h>
+
+    thermal-sensor {
+        compatible = "generic-adc-thermal";
+        #thermal-sensor-cells = <0>;
+        io-channels = <&ads1015 1>;
+        io-channel-names = "sensor-channel";
+        temperature-lookup-table = <
+              (-40000) 2578
+              (-39000) 2577
+              (-38000) 2576
+              (-37000) 2575
+              (-36000) 2574
+              (-35000) 2573
+              (-34000) 2572
+              (-33000) 2571
+              (-32000) 2569
+              (-31000) 2568
+              (-30000) 2567
+              /* skip */
+              118000 254
+              119000 247
+              120000 240
+              121000 233
+              122000 226
+              123000 220
+              124000 214
+              125000 208>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt b/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt
deleted file mode 100644
index e136946a2f4fd..0000000000000
--- a/Documentation/devicetree/bindings/thermal/thermal-generic-adc.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-General Purpose Analog To Digital Converter (ADC) based thermal sensor.
-
-On some of platforms, thermal sensor like thermistors are connected to
-one of ADC channel and sensor resistance is read via voltage across the
-sensor resistor. The voltage read across the sensor is mapped to
-temperature using voltage-temperature lookup table.
-
-Required properties:
-===================
-- compatible:		     Must be "generic-adc-thermal".
-- #thermal-sensor-cells:     Should be 1. See Documentation/devicetree/bindings/thermal/thermal-sensor.yaml for a description
-		             of this property.
-Optional properties:
-===================
-- temperature-lookup-table:  Two dimensional array of Integer; lookup table
-			     to map the relation between ADC value and
-			     temperature. When ADC is read, the value is
-			     looked up on the table to get the equivalent
-			     temperature.
-
-			     The first value of the each row of array is the
-			     temperature in milliCelsius and second value of
-			     the each row of array is the ADC read value.
-
-			     If not specified, driver assumes the ADC channel
-			     gives milliCelsius directly.
-
-Example :
-#include <dt-bindings/thermal/thermal.h>
-
-i2c@7000c400 {
-	ads1015: ads1015@4a {
-		reg = <0x4a>;
-		compatible = "ads1015";
-		sampling-frequency = <3300>;
-		#io-channel-cells = <1>;
-	};
-};
-
-tboard_thermistor: thermal-sensor {
-	compatible = "generic-adc-thermal";
-	#thermal-sensor-cells = <0>;
-	io-channels = <&ads1015 1>;
-	io-channel-names = "sensor-channel";
-	temperature-lookup-table = <    (-40000) 2578
-					(-39000) 2577
-					(-38000) 2576
-					(-37000) 2575
-					(-36000) 2574
-					(-35000) 2573
-					(-34000) 2572
-					(-33000) 2571
-					(-32000) 2569
-					(-31000) 2568
-					(-30000) 2567
-					::::::::::
-					118000 254
-					119000 247
-					120000 240
-					121000 233
-					122000 226
-					123000 220
-					124000 214
-					125000 208>;
-};
-
-dummy_cool_dev: dummy-cool-dev {
-	compatible = "dummy-cooling-dev";
-	#cooling-cells = <2>; /* min followed by max */
-};
-
-thermal-zones {
-	Tboard {
-		polling-delay = <15000>; /* milliseconds */
-		polling-delay-passive = <0>; /* milliseconds */
-		thermal-sensors = <&tboard_thermistor>;
-
-		trips {
-			therm_est_trip: therm_est_trip {
-				temperature = <40000>;
-				type = "active";
-				hysteresis = <1000>;
-			};
-		};
-
-		cooling-maps {
-			map0 {
-				trip = <&therm_est_trip>;
-				cooling-device = <&dummy_cool_dev THERMAL_NO_LIMIT THERMAL_NO_LIMIT>;
-				contribution = <100>;
-			};
-
-		};
-	};
-};
-- 
GitLab


From a7c42af78b19a11e98a5555a664c343e3a672632 Mon Sep 17 00:00:00 2001
From: Keerthy <j-keerthy@ti.com>
Date: Mon, 10 Oct 2022 09:11:26 +0530
Subject: [PATCH 3895/4122] thermal/drivers/k3_j72xx_bandgap: Fix the debug
 print message

The debug print message to check the workaround applicability is inverted.
Fix the same.

Fixes: ffcb2fc86eb7 ("thermal: k3_j72xx_bandgap: Add the bandgap driver support")
Reported-by: Bryan Brattlof <bb@ti.com>
Signed-off-by: Keerthy <j-keerthy@ti.com>
Link: https://lore.kernel.org/r/20221010034126.3550-1-j-keerthy@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/k3_j72xx_bandgap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index 16b6bcf1bf4fa..c073b1023bbe7 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -439,7 +439,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 		workaround_needed = false;
 
 	dev_dbg(bgp->dev, "Work around %sneeded\n",
-		workaround_needed ? "not " : "");
+		workaround_needed ? "" : "not ");
 
 	if (!workaround_needed)
 		init_table(5, ref_table, golden_factors);
-- 
GitLab


From 7ef2f023c2c77a452ba5d0c9b1ad04a5a895d553 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Thu, 20 Oct 2022 13:36:58 +0300
Subject: [PATCH 3896/4122] thermal/of: Fix memory leak on
 thermal_of_zone_register() failure

The function does not free 'of_ops' upon failure, leading to a memory
leak [1].

Fix by freeing 'of_ops' in the error path.

[1]
unreferenced object 0xffff8ee846198c80 (size 128):
  comm "swapper/0", pid 1, jiffies 4294699704 (age 70.076s)
  hex dump (first 32 bytes):
    00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
    d0 3f 6e 8c ff ff ff ff 00 00 00 00 00 00 00 00  .?n.............
  backtrace:
    [<00000000d136f562>] __kmalloc_node_track_caller+0x42/0x120
    [<0000000063f31678>] kmemdup+0x1d/0x40
    [<00000000e6d24096>] thermal_of_zone_register+0x49/0x520
    [<000000005e78c755>] devm_thermal_of_zone_register+0x54/0x90
    [<00000000ee6b209e>] pmbus_add_sensor+0x1b4/0x1d0
    [<00000000896105e3>] pmbus_add_sensor_attrs_one+0x123/0x440
    [<0000000049e990a6>] pmbus_add_sensor_attrs+0xfe/0x1d0
    [<00000000466b5440>] pmbus_do_probe+0x66b/0x14e0
    [<0000000084d42285>] i2c_device_probe+0x13b/0x2f0
    [<0000000029e2ae74>] really_probe+0xce/0x2c0
    [<00000000692df15c>] driver_probe_device+0x19/0xd0
    [<00000000547d9cce>] __device_attach_driver+0x6f/0x100
    [<0000000020abd24b>] bus_for_each_drv+0x76/0xc0
    [<00000000665d9563>] __device_attach+0xfc/0x180
    [<000000008ddd4d6a>] bus_probe_device+0x82/0xa0
    [<000000009e61132b>] device_add+0x3fe/0x920

Fixes: 3fd6d6e2b4e8 ("thermal/of: Rework the thermal device tree initialization")
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Vadim Pasternak <vadimp@nvidia.com>
Link: https://lore.kernel.org/r/20221020103658.802457-1-idosch@nvidia.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/thermal_of.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/thermal_of.c b/drivers/thermal/thermal_of.c
index d4b6335ace15f..aacba30bc10c1 100644
--- a/drivers/thermal/thermal_of.c
+++ b/drivers/thermal/thermal_of.c
@@ -604,13 +604,15 @@ struct thermal_zone_device *thermal_of_zone_register(struct device_node *sensor,
 	if (IS_ERR(np)) {
 		if (PTR_ERR(np) != -ENODEV)
 			pr_err("Failed to find thermal zone for %pOFn id=%d\n", sensor, id);
-		return ERR_CAST(np);
+		ret = PTR_ERR(np);
+		goto out_kfree_of_ops;
 	}
 
 	trips = thermal_of_trips_init(np, &ntrips);
 	if (IS_ERR(trips)) {
 		pr_err("Failed to find trip points for %pOFn id=%d\n", sensor, id);
-		return ERR_CAST(trips);
+		ret = PTR_ERR(trips);
+		goto out_kfree_of_ops;
 	}
 
 	ret = thermal_of_monitor_init(np, &delay, &pdelay);
@@ -659,6 +661,8 @@ out_kfree_tzp:
 	kfree(tzp);
 out_kfree_trips:
 	kfree(trips);
+out_kfree_of_ops:
+	kfree(of_ops);
 
 	return ERR_PTR(ret);
 }
-- 
GitLab


From c6db32ec7c600ae7ab92eb6c56a9fb2918fcd780 Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 19 Aug 2022 00:02:41 +0200
Subject: [PATCH 3897/4122] dt-bindings: thermal: tsens: Add ipq8074 compatible

Qualcomm IPQ8074 has tsens v2.3.0 block, though unlike existing v2 IP it
only uses one IRQ, so tsens v2 compatible cannot be used as the fallback.

We also have to make sure that correct interrupts are set according to
compatibles, so populate interrupt information per compatibles.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20220818220245.338396-1-robimarko@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 .../bindings/thermal/qcom-tsens.yaml          | 76 ++++++++++++++++---
 1 file changed, 65 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index 7db905c1d5558..f0bd4b979e28a 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -60,6 +60,10 @@ properties:
               - qcom,sm8450-tsens
           - const: qcom,tsens-v2
 
+      - description: v2 of TSENS with combined interrupt
+        enum:
+          - qcom,ipq8074-tsens
+
   reg:
     items:
       - description: TM registers
@@ -67,15 +71,11 @@ properties:
 
   interrupts:
     minItems: 1
-    items:
-      - description: Combined interrupt if upper or lower threshold crossed
-      - description: Interrupt if critical threshold crossed
+    maxItems: 2
 
   interrupt-names:
     minItems: 1
-    items:
-      - const: uplow
-      - const: critical
+    maxItems: 2
 
   nvmem-cells:
     minItems: 1
@@ -129,22 +129,61 @@ allOf:
     then:
       properties:
         interrupts:
-          maxItems: 1
+          items:
+            - description: Combined interrupt if upper or lower threshold crossed
         interrupt-names:
-          maxItems: 1
+          items:
+            - const: uplow
 
-    else:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,msm8953-tsens
+              - qcom,msm8996-tsens
+              - qcom,msm8998-tsens
+              - qcom,sc7180-tsens
+              - qcom,sc7280-tsens
+              - qcom,sc8180x-tsens
+              - qcom,sdm630-tsens
+              - qcom,sdm845-tsens
+              - qcom,sm8150-tsens
+              - qcom,sm8250-tsens
+              - qcom,sm8350-tsens
+              - qcom,tsens-v2
+    then:
+      properties:
+        interrupts:
+          items:
+            - description: Combined interrupt if upper or lower threshold crossed
+            - description: Interrupt if critical threshold crossed
+        interrupt-names:
+          items:
+            - const: uplow
+            - const: critical
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,ipq8074-tsens
+    then:
       properties:
         interrupts:
-          minItems: 2
+          items:
+            - description: Combined interrupt if upper, lower or critical thresholds crossed
         interrupt-names:
-          minItems: 2
+          items:
+            - const: combined
 
   - if:
       properties:
         compatible:
           contains:
             enum:
+              - qcom,ipq8074-tsens
               - qcom,tsens-v0_1
               - qcom,tsens-v1
               - qcom,tsens-v2
@@ -227,4 +266,19 @@ examples:
            #qcom,sensors = <13>;
            #thermal-sensor-cells = <1>;
     };
+
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    // Example 4 (for any IPQ8074 based SoC-s):
+    tsens4: thermal-sensor@4a9000 {
+           compatible = "qcom,ipq8074-tsens";
+           reg = <0x4a9000 0x1000>,
+                 <0x4a8000 0x1000>;
+
+           interrupts = <GIC_SPI 184 IRQ_TYPE_LEVEL_HIGH>;
+           interrupt-names = "combined";
+
+           #qcom,sensors = <16>;
+           #thermal-sensor-cells = <1>;
+    };
 ...
-- 
GitLab


From 4360af35273b742ffc6605ce44f37ac654272443 Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 19 Aug 2022 00:02:42 +0200
Subject: [PATCH 3898/4122] thermal/drivers/tsens: Add support for combined
 interrupt

Despite using tsens v2.3 IP, IPQ8074 and IPQ6018 only have one IRQ for
signaling both up/low and critical trips.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Bjorn Andersson <andersson@kernel.org>
Link: https://lore.kernel.org/r/20220818220245.338396-2-robimarko@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/qcom/tsens-8960.c |  1 +
 drivers/thermal/qcom/tsens-v0_1.c |  1 +
 drivers/thermal/qcom/tsens-v1.c   |  1 +
 drivers/thermal/qcom/tsens-v2.c   |  1 +
 drivers/thermal/qcom/tsens.c      | 38 ++++++++++++++++++++++++++-----
 drivers/thermal/qcom/tsens.h      |  2 ++
 6 files changed, 38 insertions(+), 6 deletions(-)

diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c
index 67c1748cdf734..ee584e5b07e56 100644
--- a/drivers/thermal/qcom/tsens-8960.c
+++ b/drivers/thermal/qcom/tsens-8960.c
@@ -269,6 +269,7 @@ static const struct tsens_ops ops_8960 = {
 static struct tsens_features tsens_8960_feat = {
 	.ver_major	= VER_0,
 	.crit_int	= 0,
+	.combo_int	= 0,
 	.adc		= 1,
 	.srot_split	= 0,
 	.max_sensors	= 11,
diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c
index 327f37202c69f..8cdc3d135d6a8 100644
--- a/drivers/thermal/qcom/tsens-v0_1.c
+++ b/drivers/thermal/qcom/tsens-v0_1.c
@@ -539,6 +539,7 @@ static int calibrate_9607(struct tsens_priv *priv)
 static struct tsens_features tsens_v0_1_feat = {
 	.ver_major	= VER_0_1,
 	.crit_int	= 0,
+	.combo_int	= 0,
 	.adc		= 1,
 	.srot_split	= 1,
 	.max_sensors	= 11,
diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c
index 573e261ccca74..a4f561a6e5821 100644
--- a/drivers/thermal/qcom/tsens-v1.c
+++ b/drivers/thermal/qcom/tsens-v1.c
@@ -302,6 +302,7 @@ static int calibrate_8976(struct tsens_priv *priv)
 static struct tsens_features tsens_v1_feat = {
 	.ver_major	= VER_1_X,
 	.crit_int	= 0,
+	.combo_int	= 0,
 	.adc		= 1,
 	.srot_split	= 1,
 	.max_sensors	= 11,
diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
index b293ed32174b5..129cdb247381c 100644
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -31,6 +31,7 @@
 static struct tsens_features tsens_v2_feat = {
 	.ver_major	= VER_2_X,
 	.crit_int	= 1,
+	.combo_int	= 0,
 	.adc		= 0,
 	.srot_split	= 1,
 	.max_sensors	= 16,
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index b1b10005fb286..e2511e76f2969 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -532,6 +532,27 @@ static irqreturn_t tsens_irq_thread(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+/**
+ * tsens_combined_irq_thread() - Threaded interrupt handler for combined interrupts
+ * @irq: irq number
+ * @data: tsens controller private data
+ *
+ * Handle the combined interrupt as if it were 2 separate interrupts, so call the
+ * critical handler first and then the up/low one.
+ *
+ * Return: IRQ_HANDLED
+ */
+static irqreturn_t tsens_combined_irq_thread(int irq, void *data)
+{
+	irqreturn_t ret;
+
+	ret = tsens_critical_irq_thread(irq, data);
+	if (ret != IRQ_HANDLED)
+		return ret;
+
+	return tsens_irq_thread(irq, data);
+}
+
 static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high)
 {
 	struct tsens_sensor *s = tz->devdata;
@@ -1071,13 +1092,18 @@ static int tsens_register(struct tsens_priv *priv)
 				   tsens_mC_to_hw(priv->sensor, 0));
 	}
 
-	ret = tsens_register_irq(priv, "uplow", tsens_irq_thread);
-	if (ret < 0)
-		return ret;
+	if (priv->feat->combo_int) {
+		ret = tsens_register_irq(priv, "combined",
+					 tsens_combined_irq_thread);
+	} else {
+		ret = tsens_register_irq(priv, "uplow", tsens_irq_thread);
+		if (ret < 0)
+			return ret;
 
-	if (priv->feat->crit_int)
-		ret = tsens_register_irq(priv, "critical",
-					 tsens_critical_irq_thread);
+		if (priv->feat->crit_int)
+			ret = tsens_register_irq(priv, "critical",
+						 tsens_critical_irq_thread);
+	}
 
 	return ret;
 }
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index ba05c82333565..1678c5e9e60bd 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -493,6 +493,7 @@ enum regfield_ids {
  * struct tsens_features - Features supported by the IP
  * @ver_major: Major number of IP version
  * @crit_int: does the IP support critical interrupts?
+ * @combo_int: does the IP use one IRQ for up, low and critical thresholds?
  * @adc:      do the sensors only output adc code (instead of temperature)?
  * @srot_split: does the IP neatly splits the register space into SROT and TM,
  *              with SROT only being available to secure boot firmware?
@@ -502,6 +503,7 @@ enum regfield_ids {
 struct tsens_features {
 	unsigned int ver_major;
 	unsigned int crit_int:1;
+	unsigned int combo_int:1;
 	unsigned int adc:1;
 	unsigned int srot_split:1;
 	unsigned int has_watchdog:1;
-- 
GitLab


From f63baced3839f591db76c227e09d47001373d6db Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 19 Aug 2022 00:02:43 +0200
Subject: [PATCH 3899/4122] thermal/drivers/tsens: Allow configuring min and
 max trips

IPQ8074 and IPQ6018 dont support negative trip temperatures and support
up to 204 degrees C as the max trip temperature.

So, instead of always setting the -40 as min and 120 degrees C as max
allow it to be configured as part of the features.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20220818220245.338396-3-robimarko@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/qcom/tsens-8960.c | 2 ++
 drivers/thermal/qcom/tsens-v0_1.c | 2 ++
 drivers/thermal/qcom/tsens-v1.c   | 2 ++
 drivers/thermal/qcom/tsens-v2.c   | 2 ++
 drivers/thermal/qcom/tsens.c      | 4 ++--
 drivers/thermal/qcom/tsens.h      | 4 ++++
 6 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/qcom/tsens-8960.c b/drivers/thermal/qcom/tsens-8960.c
index ee584e5b07e56..4585904fb3800 100644
--- a/drivers/thermal/qcom/tsens-8960.c
+++ b/drivers/thermal/qcom/tsens-8960.c
@@ -273,6 +273,8 @@ static struct tsens_features tsens_8960_feat = {
 	.adc		= 1,
 	.srot_split	= 0,
 	.max_sensors	= 11,
+	.trip_min_temp	= -40000,
+	.trip_max_temp	= 120000,
 };
 
 struct tsens_plat_data data_8960 = {
diff --git a/drivers/thermal/qcom/tsens-v0_1.c b/drivers/thermal/qcom/tsens-v0_1.c
index 8cdc3d135d6a8..04d012e4f7288 100644
--- a/drivers/thermal/qcom/tsens-v0_1.c
+++ b/drivers/thermal/qcom/tsens-v0_1.c
@@ -543,6 +543,8 @@ static struct tsens_features tsens_v0_1_feat = {
 	.adc		= 1,
 	.srot_split	= 1,
 	.max_sensors	= 11,
+	.trip_min_temp	= -40000,
+	.trip_max_temp	= 120000,
 };
 
 static const struct reg_field tsens_v0_1_regfields[MAX_REGFIELDS] = {
diff --git a/drivers/thermal/qcom/tsens-v1.c b/drivers/thermal/qcom/tsens-v1.c
index a4f561a6e5821..1d7f8a80bd13a 100644
--- a/drivers/thermal/qcom/tsens-v1.c
+++ b/drivers/thermal/qcom/tsens-v1.c
@@ -306,6 +306,8 @@ static struct tsens_features tsens_v1_feat = {
 	.adc		= 1,
 	.srot_split	= 1,
 	.max_sensors	= 11,
+	.trip_min_temp	= -40000,
+	.trip_max_temp	= 120000,
 };
 
 static const struct reg_field tsens_v1_regfields[MAX_REGFIELDS] = {
diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
index 129cdb247381c..9babc69bfd22e 100644
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -35,6 +35,8 @@ static struct tsens_features tsens_v2_feat = {
 	.adc		= 0,
 	.srot_split	= 1,
 	.max_sensors	= 16,
+	.trip_min_temp	= -40000,
+	.trip_max_temp	= 120000,
 };
 
 static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = {
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index e2511e76f2969..f31510489a9ab 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -573,8 +573,8 @@ static int tsens_set_trips(struct thermal_zone_device *tz, int low, int high)
 	dev_dbg(dev, "[%u] %s: proposed thresholds: (%d:%d)\n",
 		hw_id, __func__, low, high);
 
-	cl_high = clamp_val(high, -40000, 120000);
-	cl_low  = clamp_val(low, -40000, 120000);
+	cl_high = clamp_val(high, priv->feat->trip_min_temp, priv->feat->trip_max_temp);
+	cl_low  = clamp_val(low, priv->feat->trip_min_temp, priv->feat->trip_max_temp);
 
 	high_val = tsens_mC_to_hw(s, cl_high);
 	low_val  = tsens_mC_to_hw(s, cl_low);
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index 1678c5e9e60bd..8dc21ca0f2a3f 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -499,6 +499,8 @@ enum regfield_ids {
  *              with SROT only being available to secure boot firmware?
  * @has_watchdog: does this IP support watchdog functionality?
  * @max_sensors: maximum sensors supported by this version of the IP
+ * @trip_min_temp: minimum trip temperature supported by this version of the IP
+ * @trip_max_temp: maximum trip temperature supported by this version of the IP
  */
 struct tsens_features {
 	unsigned int ver_major;
@@ -508,6 +510,8 @@ struct tsens_features {
 	unsigned int srot_split:1;
 	unsigned int has_watchdog:1;
 	unsigned int max_sensors;
+	int trip_min_temp;
+	int trip_max_temp;
 };
 
 /**
-- 
GitLab


From 6840455debd38ea82b1f9fbbf8019944da36f7eb Mon Sep 17 00:00:00 2001
From: Robert Marko <robimarko@gmail.com>
Date: Fri, 19 Aug 2022 00:02:44 +0200
Subject: [PATCH 3900/4122] thermal/drivers/tsens: Add IPQ8074 support

Qualcomm IPQ8074 uses tsens v2.3 IP, however unlike other tsens v2 IP
it only has one IRQ, that is used for up/low as well as critical.
It also does not support negative trip temperatures.

Signed-off-by: Robert Marko <robimarko@gmail.com>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Link: https://lore.kernel.org/r/20220818220245.338396-4-robimarko@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/thermal/qcom/tsens-v2.c | 17 +++++++++++++++++
 drivers/thermal/qcom/tsens.c    |  3 +++
 drivers/thermal/qcom/tsens.h    |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/tsens-v2.c b/drivers/thermal/qcom/tsens-v2.c
index 9babc69bfd22e..29a61d2d6ca31 100644
--- a/drivers/thermal/qcom/tsens-v2.c
+++ b/drivers/thermal/qcom/tsens-v2.c
@@ -39,6 +39,17 @@ static struct tsens_features tsens_v2_feat = {
 	.trip_max_temp	= 120000,
 };
 
+static struct tsens_features ipq8074_feat = {
+	.ver_major	= VER_2_X,
+	.crit_int	= 1,
+	.combo_int	= 1,
+	.adc		= 0,
+	.srot_split	= 1,
+	.max_sensors	= 16,
+	.trip_min_temp	= 0,
+	.trip_max_temp	= 204000,
+};
+
 static const struct reg_field tsens_v2_regfields[MAX_REGFIELDS] = {
 	/* ----- SROT ------ */
 	/* VERSION */
@@ -104,6 +115,12 @@ struct tsens_plat_data data_tsens_v2 = {
 	.fields	= tsens_v2_regfields,
 };
 
+struct tsens_plat_data data_ipq8074 = {
+	.ops		= &ops_generic_v2,
+	.feat		= &ipq8074_feat,
+	.fields	= tsens_v2_regfields,
+};
+
 /* Kept around for backward compatibility with old msm8996.dtsi */
 struct tsens_plat_data data_8996 = {
 	.num_sensors	= 13,
diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index f31510489a9ab..b7d978295b2f9 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -980,6 +980,9 @@ static const struct of_device_id tsens_table[] = {
 	{
 		.compatible = "qcom,ipq8064-tsens",
 		.data = &data_8960,
+	}, {
+		.compatible = "qcom,ipq8074-tsens",
+		.data = &data_ipq8074,
 	}, {
 		.compatible = "qcom,mdm9607-tsens",
 		.data = &data_9607,
diff --git a/drivers/thermal/qcom/tsens.h b/drivers/thermal/qcom/tsens.h
index 8dc21ca0f2a3f..899af128855f7 100644
--- a/drivers/thermal/qcom/tsens.h
+++ b/drivers/thermal/qcom/tsens.h
@@ -597,6 +597,6 @@ extern struct tsens_plat_data data_8916, data_8939, data_8974, data_9607;
 extern struct tsens_plat_data data_tsens_v1, data_8976;
 
 /* TSENS v2 targets */
-extern struct tsens_plat_data data_8996, data_tsens_v2;
+extern struct tsens_plat_data data_8996, data_ipq8074, data_tsens_v2;
 
 #endif /* __QCOM_TSENS_H__ */
-- 
GitLab


From de48d8766afcd97d147699aaff78a338081c9973 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 22 Oct 2022 14:56:55 +0200
Subject: [PATCH 3901/4122] thermal/drivers/qcom/tsens: Init debugfs only with
 successful probe

Calibrate and tsens_register can fail or PROBE_DEFER. This will cause a
double or a wrong init of the debugfs information. Init debugfs only
with successful probe fixing warning about directory already present.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Acked-by: Thara Gopinath <thara.gopinath@linaro.org>
Link: https://lore.kernel.org/r/20221022125657.22530-2-ansuelsmth@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/tsens.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index b7d978295b2f9..3ac780404c72c 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -939,8 +939,6 @@ int __init init_common(struct tsens_priv *priv)
 	if (tsens_version(priv) >= VER_0_1)
 		tsens_enable_irq(priv);
 
-	tsens_debug_init(op);
-
 err_put_device:
 	put_device(&op->dev);
 	return ret;
@@ -1182,7 +1180,11 @@ static int tsens_probe(struct platform_device *pdev)
 		}
 	}
 
-	return tsens_register(priv);
+	ret = tsens_register(priv);
+	if (!ret)
+		tsens_debug_init(pdev);
+
+	return ret;
 }
 
 static int tsens_remove(struct platform_device *pdev)
-- 
GitLab


From c7e077e921fa94e0c06c8d14af6c0504c8a5f4bd Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 22 Oct 2022 14:56:56 +0200
Subject: [PATCH 3902/4122] thermal/drivers/qcom/tsens: Fix wrong version id
 dbg_version_show

For VER_0 the version was incorrectly reported as 0.1.0.

Fix that and correctly report the major version for this old tsens
revision.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://lore.kernel.org/r/20221022125657.22530-3-ansuelsmth@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/tsens.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 3ac780404c72c..5f8d8f04351eb 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -713,7 +713,7 @@ static int dbg_version_show(struct seq_file *s, void *data)
 			return ret;
 		seq_printf(s, "%d.%d.%d\n", maj_ver, min_ver, step_ver);
 	} else {
-		seq_puts(s, "0.1.0\n");
+		seq_printf(s, "0.%d.0\n", priv->feat->ver_major);
 	}
 
 	return 0;
-- 
GitLab


From 89992d95ed1046338c7866ef7bbe6de543a2af91 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Sat, 22 Oct 2022 14:56:57 +0200
Subject: [PATCH 3903/4122] thermal/drivers/qcom/tsens: Rework debugfs file
 structure

The current tsens debugfs structure is composed by:
- a tsens dir in debugfs with a version file
- a directory for each tsens istance with sensors file to dump all the
  sensors value.

This works on the assumption that we have the same version for each
istance but this assumption seems fragile and with more than one tsens
istance results in the version file not tracking each of them.

A better approach is to just create a subdirectory for each tsens
istance and put there version and sensors debugfs file.

Using this new implementation results in less code since debugfs entry
are created only on successful tsens probe.

Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Link: https://lore.kernel.org/r/20221022125657.22530-4-ansuelsmth@gmail.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/tsens.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/thermal/qcom/tsens.c b/drivers/thermal/qcom/tsens.c
index 5f8d8f04351eb..b5b136ff323f9 100644
--- a/drivers/thermal/qcom/tsens.c
+++ b/drivers/thermal/qcom/tsens.c
@@ -725,21 +725,14 @@ DEFINE_SHOW_ATTRIBUTE(dbg_sensors);
 static void tsens_debug_init(struct platform_device *pdev)
 {
 	struct tsens_priv *priv = platform_get_drvdata(pdev);
-	struct dentry *root, *file;
 
-	root = debugfs_lookup("tsens", NULL);
-	if (!root)
+	priv->debug_root = debugfs_lookup("tsens", NULL);
+	if (!priv->debug_root)
 		priv->debug_root = debugfs_create_dir("tsens", NULL);
-	else
-		priv->debug_root = root;
-
-	file = debugfs_lookup("version", priv->debug_root);
-	if (!file)
-		debugfs_create_file("version", 0444, priv->debug_root,
-				    pdev, &dbg_version_fops);
 
 	/* A directory for each instance of the TSENS IP */
 	priv->debug = debugfs_create_dir(dev_name(&pdev->dev), priv->debug_root);
+	debugfs_create_file("version", 0444, priv->debug, pdev, &dbg_version_fops);
 	debugfs_create_file("sensors", 0444, priv->debug, pdev, &dbg_sensors_fops);
 }
 #else
-- 
GitLab


From 8848c0d7a0782c263a7827697d5acfcc09a19a5f Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 2 Dec 2022 17:23:49 +0100
Subject: [PATCH 3904/4122] dt-bindings: thermal: imx8mm-thermal: Document
 optional nvmem-cells

The TMU TASR, TCALIVn, TRIM registers must be explicitly programmed with
calibration values from OCOTP. Document optional phandle to OCOTP nvmem
provider.

Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 .../devicetree/bindings/thermal/imx8mm-thermal.yaml        | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
index 89c54e08ee61b..b90726229ac9c 100644
--- a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
@@ -32,6 +32,13 @@ properties:
   clocks:
     maxItems: 1
 
+  nvmem-cells:
+    maxItems: 1
+    description: Phandle to the calibration data provided by ocotp
+
+  nvmem-cell-names:
+    const: calib
+
   "#thermal-sensor-cells":
     description: |
       Number of cells required to uniquely identify the thermal
-- 
GitLab


From 403291648823f819f7baaccd47589fcf87187c32 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 2 Dec 2022 17:23:53 +0100
Subject: [PATCH 3905/4122] thermal/drivers/imx: Add support for loading
 calibration data from OCOTP

The TMU TASR, TCALIVn, TRIM registers must be explicitly programmed with
calibration values in OCOTP. Add support for reading the OCOTP calibration
data and programming those into the TMU hardware.

The MX8MM/MX8MN TMUv1 uses only one OCOTP cell, while MX8MP TMUv2 uses 4,
the programming differs in each case.

Based on U-Boot commits:
70487ff386c ("imx8mm: Load fuse for TMU TCALIV and TASR")
ebb9aab318b ("imx: load calibration parameters from fuse for i.MX8MP")

Reviewed-by: Peng Fan <peng.fan@nxp.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/imx8mm_thermal.c | 164 +++++++++++++++++++++++++++++++
 1 file changed, 164 insertions(+)

diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index 182cc207afe90..e709c03bc4b9f 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -10,9 +10,11 @@
 #include <linux/err.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/slab.h>
 #include <linux/thermal.h>
 
 #include "thermal_core.h"
@@ -20,6 +22,22 @@
 #define TER			0x0	/* TMU enable */
 #define TPS			0x4
 #define TRITSR			0x20	/* TMU immediate temp */
+/* TMU calibration data registers */
+#define TASR			0x28
+#define TASR_BUF_SLOPE_MASK	GENMASK(19, 16)
+#define TASR_BUF_VREF_MASK	GENMASK(4, 0)	/* TMU_V1 */
+#define TASR_BUF_VERF_SEL_MASK	GENMASK(1, 0)	/* TMU_V2 */
+#define TCALIV(n)		(0x30 + ((n) * 4))
+#define TCALIV_EN		BIT(31)
+#define TCALIV_HR_MASK		GENMASK(23, 16)	/* TMU_V1 */
+#define TCALIV_RT_MASK		GENMASK(7, 0)	/* TMU_V1 */
+#define TCALIV_SNSR105C_MASK	GENMASK(27, 16)	/* TMU_V2 */
+#define TCALIV_SNSR25C_MASK	GENMASK(11, 0)	/* TMU_V2 */
+#define TRIM			0x3c
+#define TRIM_BJT_CUR_MASK	GENMASK(23, 20)
+#define TRIM_BGR_MASK		GENMASK(31, 28)
+#define TRIM_VLSB_MASK		GENMASK(15, 12)
+#define TRIM_EN_CH		BIT(7)
 
 #define TER_ADC_PD		BIT(30)
 #define TER_EN			BIT(31)
@@ -32,6 +50,25 @@
 #define SIGN_BIT		BIT(7)
 #define TEMP_VAL_MASK		GENMASK(6, 0)
 
+/* TMU OCOTP calibration data bitfields */
+#define ANA0_EN			BIT(25)
+#define ANA0_BUF_VREF_MASK	GENMASK(24, 20)
+#define ANA0_BUF_SLOPE_MASK	GENMASK(19, 16)
+#define ANA0_HR_MASK		GENMASK(15, 8)
+#define ANA0_RT_MASK		GENMASK(7, 0)
+#define TRIM2_VLSB_MASK		GENMASK(23, 20)
+#define TRIM2_BGR_MASK		GENMASK(19, 16)
+#define TRIM2_BJT_CUR_MASK	GENMASK(15, 12)
+#define TRIM2_BUF_SLOP_SEL_MASK	GENMASK(11, 8)
+#define TRIM2_BUF_VERF_SEL_MASK	GENMASK(7, 6)
+#define TRIM3_TCA25_0_LSB_MASK	GENMASK(31, 28)
+#define TRIM3_TCA40_0_MASK	GENMASK(27, 16)
+#define TRIM4_TCA40_1_MASK	GENMASK(31, 20)
+#define TRIM4_TCA105_0_MASK	GENMASK(19, 8)
+#define TRIM4_TCA25_0_MSB_MASK	GENMASK(7, 0)
+#define TRIM5_TCA105_1_MASK	GENMASK(23, 12)
+#define TRIM5_TCA25_1_MASK	GENMASK(11, 0)
+
 #define VER1_TEMP_LOW_LIMIT	10000
 #define VER2_TEMP_LOW_LIMIT	-40000
 #define VER2_TEMP_HIGH_LIMIT	125000
@@ -134,6 +171,129 @@ static void imx8mm_tmu_probe_sel_all(struct imx8mm_tmu *tmu)
 	writel_relaxed(val, tmu->base + TPS);
 }
 
+static int imx8mm_tmu_probe_set_calib_v1(struct platform_device *pdev,
+					 struct imx8mm_tmu *tmu)
+{
+	struct device *dev = &pdev->dev;
+	u32 ana0;
+	int ret;
+
+	ret = nvmem_cell_read_u32(&pdev->dev, "calib", &ana0);
+	if (ret) {
+		dev_warn(dev, "Failed to read OCOTP nvmem cell (%d).\n", ret);
+		return ret;
+	}
+
+	writel(FIELD_PREP(TASR_BUF_VREF_MASK,
+			  FIELD_GET(ANA0_BUF_VREF_MASK, ana0)) |
+	       FIELD_PREP(TASR_BUF_SLOPE_MASK,
+			  FIELD_GET(ANA0_BUF_SLOPE_MASK, ana0)),
+	       tmu->base + TASR);
+
+	writel(FIELD_PREP(TCALIV_RT_MASK, FIELD_GET(ANA0_RT_MASK, ana0)) |
+	       FIELD_PREP(TCALIV_HR_MASK, FIELD_GET(ANA0_HR_MASK, ana0)) |
+	       ((ana0 & ANA0_EN) ? TCALIV_EN : 0),
+	       tmu->base + TCALIV(0));
+
+	return 0;
+}
+
+static int imx8mm_tmu_probe_set_calib_v2(struct platform_device *pdev,
+					 struct imx8mm_tmu *tmu)
+{
+	struct device *dev = &pdev->dev;
+	struct nvmem_cell *cell;
+	u32 trim[4] = { 0 };
+	size_t len;
+	void *buf;
+
+	cell = nvmem_cell_get(dev, "calib");
+	if (IS_ERR(cell))
+		return PTR_ERR(cell);
+
+	buf = nvmem_cell_read(cell, &len);
+	nvmem_cell_put(cell);
+
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	memcpy(trim, buf, min(len, sizeof(trim)));
+	kfree(buf);
+
+	if (len != 16) {
+		dev_err(dev,
+			"OCOTP nvmem cell length is %zu, must be 16.\n", len);
+		return -EINVAL;
+	}
+
+	/* Blank sample hardware */
+	if (!trim[0] && !trim[1] && !trim[2] && !trim[3]) {
+		/* Use a default 25C binary codes */
+		writel(FIELD_PREP(TCALIV_SNSR25C_MASK, 0x63c),
+		       tmu->base + TCALIV(0));
+		writel(FIELD_PREP(TCALIV_SNSR25C_MASK, 0x63c),
+		       tmu->base + TCALIV(1));
+		return 0;
+	}
+
+	writel(FIELD_PREP(TASR_BUF_VERF_SEL_MASK,
+			  FIELD_GET(TRIM2_BUF_VERF_SEL_MASK, trim[0])) |
+	       FIELD_PREP(TASR_BUF_SLOPE_MASK,
+			  FIELD_GET(TRIM2_BUF_SLOP_SEL_MASK, trim[0])),
+	       tmu->base + TASR);
+
+	writel(FIELD_PREP(TRIM_BJT_CUR_MASK,
+			  FIELD_GET(TRIM2_BJT_CUR_MASK, trim[0])) |
+	       FIELD_PREP(TRIM_BGR_MASK, FIELD_GET(TRIM2_BGR_MASK, trim[0])) |
+	       FIELD_PREP(TRIM_VLSB_MASK, FIELD_GET(TRIM2_VLSB_MASK, trim[0])) |
+	       TRIM_EN_CH,
+	       tmu->base + TRIM);
+
+	writel(FIELD_PREP(TCALIV_SNSR25C_MASK,
+			  FIELD_GET(TRIM3_TCA25_0_LSB_MASK, trim[1]) |
+			  (FIELD_GET(TRIM4_TCA25_0_MSB_MASK, trim[2]) << 4)) |
+	       FIELD_PREP(TCALIV_SNSR105C_MASK,
+			  FIELD_GET(TRIM4_TCA105_0_MASK, trim[2])),
+	       tmu->base + TCALIV(0));
+
+	writel(FIELD_PREP(TCALIV_SNSR25C_MASK,
+			  FIELD_GET(TRIM5_TCA25_1_MASK, trim[3])) |
+	       FIELD_PREP(TCALIV_SNSR105C_MASK,
+			  FIELD_GET(TRIM5_TCA105_1_MASK, trim[3])),
+	       tmu->base + TCALIV(1));
+
+	writel(FIELD_PREP(TCALIV_SNSR25C_MASK,
+			  FIELD_GET(TRIM3_TCA40_0_MASK, trim[1])) |
+	       FIELD_PREP(TCALIV_SNSR105C_MASK,
+			  FIELD_GET(TRIM4_TCA40_1_MASK, trim[2])),
+	       tmu->base + TCALIV(2));
+
+	return 0;
+}
+
+static int imx8mm_tmu_probe_set_calib(struct platform_device *pdev,
+				      struct imx8mm_tmu *tmu)
+{
+	struct device *dev = &pdev->dev;
+
+	/*
+	 * Lack of calibration data OCOTP reference is not considered
+	 * fatal to retain compatibility with old DTs. It is however
+	 * strongly recommended to update such old DTs to get correct
+	 * temperature compensation values for each SoC.
+	 */
+	if (!of_find_property(pdev->dev.of_node, "nvmem-cells", NULL)) {
+		dev_warn(dev,
+			 "No OCOTP nvmem reference found, SoC-specific calibration not loaded. Please update your DT.\n");
+		return 0;
+	}
+
+	if (tmu->socdata->version == TMU_VER1)
+		return imx8mm_tmu_probe_set_calib_v1(pdev, tmu);
+
+	return imx8mm_tmu_probe_set_calib_v2(pdev, tmu);
+}
+
 static int imx8mm_tmu_probe(struct platform_device *pdev)
 {
 	const struct thermal_soc_data *data;
@@ -186,6 +346,10 @@ static int imx8mm_tmu_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, tmu);
 
+	ret = imx8mm_tmu_probe_set_calib(pdev, tmu);
+	if (ret)
+		goto disable_clk;
+
 	/* enable all the probes for V2 TMU */
 	if (tmu->socdata->version == TMU_VER2)
 		imx8mm_tmu_probe_sel_all(tmu);
-- 
GitLab


From 3f9cb57962bcfad88bcac750f85ef7425475b102 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Mon, 21 Nov 2022 16:47:10 +0100
Subject: [PATCH 3906/4122] thermal: ti-soc-thermal: Drop comma after SoC match
 table sentinel

It does not make sense to have a comma after a sentinel, as any new
elements must be added before the sentinel.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Keerthy <j-keerthy@ti.com>
Link: https://lore.kernel.org/r/1d6de2a80b919cb11199e56ac06ad21c273ebe57.1669045586.git.geert+renesas@glider.be
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/ti-soc-thermal/ti-bandgap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
index 67050a1a5b07f..a1c9a15301832 100644
--- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c
+++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c
@@ -878,7 +878,7 @@ static struct ti_bandgap *ti_bandgap_build(struct platform_device *pdev)
  */
 static const struct soc_device_attribute soc_no_cpu_notifier[] = {
 	{ .machine = "OMAP4430" },
-	{ /* sentinel */ },
+	{ /* sentinel */ }
 };
 
 /***   Device driver call backs   ***/
-- 
GitLab


From c464856e63a4c9631add6a27340bd94beb9eb27f Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Wed, 30 Nov 2022 13:20:38 +0000
Subject: [PATCH 3907/4122] dt-bindings: thermal: mediatek: add compatible
 string for MT7986 and MT7981 SoC

Document compatible string 'mediatek,mt7986-thermal' for V3 thermal
unit found in MT7986 SoCs.
'mediatek,mt7981-thermal' is also added as it is identical with the
thermal unit of MT7986.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 Documentation/devicetree/bindings/thermal/mediatek-thermal.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
index 5c7e7bdd029ab..38b32bb447e39 100644
--- a/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
+++ b/Documentation/devicetree/bindings/thermal/mediatek-thermal.txt
@@ -13,6 +13,8 @@ Required properties:
   - "mediatek,mt2701-thermal" : For MT2701 family of SoCs
   - "mediatek,mt2712-thermal" : For MT2712 family of SoCs
   - "mediatek,mt7622-thermal" : For MT7622 SoC
+  - "mediatek,mt7981-thermal", "mediatek,mt7986-thermal" : For MT7981 SoC
+  - "mediatek,mt7986-thermal" : For MT7986 SoC
   - "mediatek,mt8183-thermal" : For MT8183 family of SoCs
   - "mediatek,mt8516-thermal", "mediatek,mt2701-thermal : For MT8516 family of SoCs
 - reg: Address range of the thermal controller
-- 
GitLab


From 6f8941646234e2d6e589377aea0dd1ba52365623 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 2 Nov 2022 16:26:30 +0100
Subject: [PATCH 3908/4122] thermal: qcom-spmi-adc-tm5: suppress probe-deferral
 error message

Drivers should not be logging errors on probe deferral. Switch to using
dev_err_probe() to log failures when parsing the devicetree to avoid
errors like:

	qcom-spmi-adc-tm5 c440000.spmi:pmic@0:adc-tm@3400: get dt data failed: -517

when a channel is not yet available.

Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Reviewed-by: Andrew Halaney <ahalaney@redhat.com>
Link: https://lore.kernel.org/r/20221102152630.696-1-johan+linaro@kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
index 1b2c43eab27d1..88462a4628fb5 100644
--- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
+++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
@@ -1030,10 +1030,8 @@ static int adc_tm5_probe(struct platform_device *pdev)
 		return irq;
 
 	ret = adc_tm5_get_dt_data(adc_tm, node);
-	if (ret) {
-		dev_err(dev, "get dt data failed: %d\n", ret);
-		return ret;
-	}
+	if (ret)
+		return dev_err_probe(dev, ret, "get dt data failed\n");
 
 	ret = adc_tm->data->init(adc_tm);
 	if (ret) {
-- 
GitLab


From de95d1341a3e682b95ad759bd58b86224819fbec Mon Sep 17 00:00:00 2001
From: Alexander Stein <alexander.stein@ew.tq-group.com>
Date: Tue, 26 Jul 2022 14:23:31 +0200
Subject: [PATCH 3909/4122] thermal/drivers/imx8mm: Add hwmon support

Expose thermal sensors as HWMON devices.

Signed-off-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Link: https://lore.kernel.org/r/20220726122331.323093-1-alexander.stein@ew.tq-group.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/imx8mm_thermal.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/thermal/imx8mm_thermal.c b/drivers/thermal/imx8mm_thermal.c
index e709c03bc4b9f..d247b48696cb6 100644
--- a/drivers/thermal/imx8mm_thermal.c
+++ b/drivers/thermal/imx8mm_thermal.c
@@ -18,6 +18,7 @@
 #include <linux/thermal.h>
 
 #include "thermal_core.h"
+#include "thermal_hwmon.h"
 
 #define TER			0x0	/* TMU enable */
 #define TPS			0x4
@@ -342,6 +343,9 @@ static int imx8mm_tmu_probe(struct platform_device *pdev)
 			goto disable_clk;
 		}
 		tmu->sensors[i].hw_id = i;
+
+		if (devm_thermal_add_hwmon_sysfs(tmu->sensors[i].tzd))
+			dev_warn(&pdev->dev, "failed to add hwmon sysfs attributes\n");
 	}
 
 	platform_set_drvdata(pdev, tmu);
-- 
GitLab


From de04f680b0ab71d2b9085903fe608a15541f66cb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 26 Nov 2022 11:42:25 +0100
Subject: [PATCH 3910/4122] thermal/core/power allocator: Remove a useless
 include

This file does not use rcu, so there is no point in including
<linux/rculist.h>.

Remove it.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Link: https://lore.kernel.org/r/9adeec47cb5a8193016272d5c8bf936235c1711d.1669459337.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/gov_power_allocator.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/thermal/gov_power_allocator.c b/drivers/thermal/gov_power_allocator.c
index 2d1aeaba38a88..d5d4eae16771b 100644
--- a/drivers/thermal/gov_power_allocator.c
+++ b/drivers/thermal/gov_power_allocator.c
@@ -8,7 +8,6 @@
 
 #define pr_fmt(fmt) "Power allocator: " fmt
 
-#include <linux/rculist.h>
 #include <linux/slab.h>
 #include <linux/thermal.h>
 
-- 
GitLab


From fa17c4136db0bc91e698bd227cb478c648863d11 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Wed, 16 Nov 2022 12:31:40 +0100
Subject: [PATCH 3911/4122] dt-bindings: thermal: qcom-tsens: narrow interrupts
 for SC8280XP, SM6350 and SM8450

Narrow number of interrupts per variants: SC8280XP, SM6350 and SM8450.
The compatibles are already used and described.  They only missed the
constraints of number of interrupts.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221116113140.69587-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 Documentation/devicetree/bindings/thermal/qcom-tsens.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index f0bd4b979e28a..5bcfddc877d36 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -146,11 +146,14 @@ allOf:
               - qcom,sc7180-tsens
               - qcom,sc7280-tsens
               - qcom,sc8180x-tsens
+              - qcom,sc8280xp-tsens
               - qcom,sdm630-tsens
               - qcom,sdm845-tsens
+              - qcom,sm6350-tsens
               - qcom,sm8150-tsens
               - qcom,sm8250-tsens
               - qcom,sm8350-tsens
+              - qcom,sm8450-tsens
               - qcom,tsens-v2
     then:
       properties:
-- 
GitLab


From 8763f8acbf8aef22a2321d4c978cd078aa3b8f64 Mon Sep 17 00:00:00 2001
From: Luca Weiss <luca.weiss@fairphone.com>
Date: Thu, 20 Oct 2022 16:52:37 +0200
Subject: [PATCH 3912/4122] thermal/drivers/qcom/temp-alarm: Fix inaccurate
 warning for gen2

On gen2 chips the stage2 threshold is not 140 degC but 125 degC.

Make the warning message clearer by using this variable and also by
including the temperature that was checked for.

Fixes: aa92b3310c55 ("thermal/drivers/qcom-spmi-temp-alarm: Add support for GEN2 rev 1 PMIC peripherals")
Signed-off-by: Luca Weiss <luca.weiss@fairphone.com>
Reviewed-by: Amit Kucheria <amitk@kernel.org>
Link: https://lore.kernel.org/r/20221020145237.942146-1-luca.weiss@fairphone.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/qcom-spmi-temp-alarm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
index be785ab37e53d..ad84978109e6f 100644
--- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
@@ -252,7 +252,8 @@ static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
 			disable_s2_shutdown = true;
 		else
 			dev_warn(chip->dev,
-				 "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n");
+				 "No ADC is configured and critical temperature %d mC is above the maximum stage 2 threshold of %ld mC! Configuring stage 2 shutdown at %ld mC.\n",
+				 temp, stage2_threshold_max, stage2_threshold_max);
 	}
 
 skip:
-- 
GitLab


From 2baad2496383c6036bdb0945ef8aa6b1a5d18c19 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Sat, 29 Oct 2022 10:59:33 +0530
Subject: [PATCH 3913/4122] thermal/drivers/qcom: Demote error log of thermal
 zone register to debug

devm_thermal_of_zone_register() can fail with -ENODEV if thermal zone for
the channel is not represented in DT. This is perfectly fine since not all
sensors needs to be used for thermal zones but only a few in real world.

So demote the error log to debug to avoid spamming users.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Link: https://lore.kernel.org/r/20221029052933.32421-1-manivannan.sadhasivam@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/qcom-spmi-adc-tm5.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
index 88462a4628fb5..ff47fc9ac9c5e 100644
--- a/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
+++ b/drivers/thermal/qcom/qcom-spmi-adc-tm5.c
@@ -678,7 +678,7 @@ static int adc_tm5_register_tzd(struct adc_tm5_chip *adc_tm)
 						    &adc_tm5_thermal_ops);
 		if (IS_ERR(tzd)) {
 			if (PTR_ERR(tzd) == -ENODEV) {
-				dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n",
+				dev_dbg(adc_tm->dev, "thermal sensor on channel %d is not used\n",
 					 adc_tm->channels[i].channel);
 				continue;
 			}
-- 
GitLab


From 46cab93ab49f303a7abbcf920cf9ef95d02a113c Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:52 -0500
Subject: [PATCH 3914/4122] thermal/drivers/k3_j72xx_bandgap: Simplify
 k3_thermal_get_temp() function

The k3_thermal_get_temp() function can be simplified to return only
the result of k3_bgp_read_temp() without needing the 'ret' variable

Signed-off-by: Bryan Brattlof <bb@ti.com>
Link: https://lore.kernel.org/r/20221031232702.10339-2-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/k3_j72xx_bandgap.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index c073b1023bbe7..a9f99a190cb61 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -249,14 +249,7 @@ static inline int k3_bgp_read_temp(struct k3_thermal_data *devdata,
 /* Get temperature callback function for thermal zone */
 static int k3_thermal_get_temp(struct thermal_zone_device *tz, int *temp)
 {
-	struct k3_thermal_data *data = tz->devdata;
-	int ret = 0;
-
-	ret = k3_bgp_read_temp(data, temp);
-	if (ret)
-		return ret;
-
-	return ret;
+	return k3_bgp_read_temp(tz->devdata, temp);
 }
 
 static const struct thermal_zone_device_ops k3_of_thermal_ops = {
-- 
GitLab


From 311f328ffc7572219bee65db77645e5fedd4e8e6 Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:53 -0500
Subject: [PATCH 3915/4122] thermal/drivers/k3_j72xx_bandgap: Use bool for
 i2128 erratum flag

Some of TI's J721E SoCs require a software trimming method to report
temperatures accurately. Currently we are using a few different data
types to indicate when we should apply the erratum.

Change the 'workaround_needed' variable's data type to a bool to align
with how we are using this variable currently.

Signed-off-by: Bryan Brattlof <bb@ti.com>
Link: https://lore.kernel.org/r/20221031232702.10339-3-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/k3_j72xx_bandgap.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index a9f99a190cb61..b9d20026771a5 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -340,7 +340,7 @@ static void print_look_up_table(struct device *dev, int *ref_table)
 }
 
 struct k3_j72xx_bandgap_data {
-	unsigned int has_errata_i2128;
+	const bool has_errata_i2128;
 };
 
 static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
@@ -351,7 +351,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct k3_j72xx_bandgap *bgp;
 	struct k3_thermal_data *data;
-	int workaround_needed = 0;
+	bool workaround_needed = false;
 	const struct k3_j72xx_bandgap_data *driver_data;
 	struct thermal_zone_device *ti_thermal;
 	int *ref_table;
@@ -522,11 +522,11 @@ static int k3_j72xx_bandgap_remove(struct platform_device *pdev)
 }
 
 static const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j721e_data = {
-	.has_errata_i2128 = 1,
+	.has_errata_i2128 = true,
 };
 
 static const struct k3_j72xx_bandgap_data k3_j72xx_bandgap_j7200_data = {
-	.has_errata_i2128 = 0,
+	.has_errata_i2128 = false,
 };
 
 static const struct of_device_id of_k3_j72xx_bandgap_match[] = {
-- 
GitLab


From 156f0e2fda420c4a4a7b244a909df04086039bd6 Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:54 -0500
Subject: [PATCH 3916/4122] thermal/drivers/k3_j72xx_bandgap: Remove fuse_base
 from structure

'fuse_base' is only needed during the initial probe function to provide
data for a software trimming method for some of TI's devices affected by
the i2128 erratum. The devices not affected will not use this region

Remove fuse_base from the main k3_j72xx_bandgap structure

Signed-off-by: Bryan Brattlof <bb@ti.com>
Link: https://lore.kernel.org/r/20221031232702.10339-4-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/k3_j72xx_bandgap.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index b9d20026771a5..395a73cb37425 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -177,7 +177,6 @@ struct k3_j72xx_bandgap {
 	struct device *dev;
 	void __iomem *base;
 	void __iomem *cfg2_base;
-	void __iomem *fuse_base;
 	struct k3_thermal_data *ts_data[K3_VTM_MAX_NUM_TS];
 };
 
@@ -276,7 +275,7 @@ static int k3_j72xx_bandgap_temp_to_adc_code(int temp)
 }
 
 static void get_efuse_values(int id, struct k3_thermal_data *data, int *err,
-			     struct k3_j72xx_bandgap *bgp)
+			     void __iomem *fuse_base)
 {
 	int i, tmp, pow;
 	int ct_offsets[5][K3_VTM_CORRECTION_TEMP_CNT] = {
@@ -298,16 +297,16 @@ static void get_efuse_values(int id, struct k3_thermal_data *data, int *err,
 		/* Extract the offset value using bit-mask */
 		if (ct_offsets[id][i] == -1 && i == 1) {
 			/* 25C offset Case of Sensor 2 split between 2 regs */
-			tmp = (readl(bgp->fuse_base + 0x8) & 0xE0000000) >> (29);
-			tmp |= ((readl(bgp->fuse_base + 0xC) & 0x1F) << 3);
+			tmp = (readl(fuse_base + 0x8) & 0xE0000000) >> (29);
+			tmp |= ((readl(fuse_base + 0xC) & 0x1F) << 3);
 			pow = tmp & 0x80;
 		} else if (ct_offsets[id][i] == -1 && i == 2) {
 			/* 125C Case of Sensor 3 split between 2 regs */
-			tmp = (readl(bgp->fuse_base + 0x4) & 0xF8000000) >> (27);
-			tmp |= ((readl(bgp->fuse_base + 0x8) & 0xF) << 5);
+			tmp = (readl(fuse_base + 0x4) & 0xF8000000) >> (27);
+			tmp |= ((readl(fuse_base + 0x8) & 0xF) << 5);
 			pow = tmp & 0x100;
 		} else {
-			tmp = readl(bgp->fuse_base + ct_offsets[id][i]);
+			tmp = readl(fuse_base + ct_offsets[id][i]);
 			tmp &= ct_bm[id][i];
 			tmp = tmp >> __ffs(ct_bm[id][i]);
 
@@ -356,6 +355,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 	struct thermal_zone_device *ti_thermal;
 	int *ref_table;
 	struct err_values err_vals;
+	void __iomem *fuse_base;
 
 	const s64 golden_factors[] = {
 		-490019999999999936,
@@ -387,9 +387,9 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 		return PTR_ERR(bgp->cfg2_base);
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	bgp->fuse_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(bgp->fuse_base))
-		return PTR_ERR(bgp->fuse_base);
+	fuse_base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(fuse_base))
+		return PTR_ERR(fuse_base);
 
 	driver_data = of_device_get_match_data(dev);
 	if (driver_data)
@@ -428,7 +428,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 	}
 
 	/* Workaround not needed if bit30/bit31 is set even for J721e */
-	if (workaround_needed && (readl(bgp->fuse_base + 0x0) & 0xc0000000) == 0xc0000000)
+	if (workaround_needed && (readl(fuse_base + 0x0) & 0xc0000000) == 0xc0000000)
 		workaround_needed = false;
 
 	dev_dbg(bgp->dev, "Work around %sneeded\n",
@@ -452,7 +452,7 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 			err_vals.refs[1] = PLUS30CREF;
 			err_vals.refs[2] = PLUS125CREF;
 			err_vals.refs[3] = PLUS150CREF;
-			get_efuse_values(id, &data[id], err_vals.errs, bgp);
+			get_efuse_values(id, &data[id], err_vals.errs, fuse_base);
 		}
 
 		if (id == 0 && workaround_needed)
-- 
GitLab


From 366444ebe7e2e1c987c6a07551f06db4b5b7c0ad Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:55 -0500
Subject: [PATCH 3917/4122] thermal/drivers/k3_j72xx_bandgap: Map fuse_base
 only for erratum workaround

Some of TI's J721E SoCs require a software trimming procedure for the
temperature monitors to function properly. To determine if a particular
J721E is not affected by this erratum, both bits in the WKUP_SPARE_FUSE0
region must be set. Other SoCs, not affected by this erratum, will not
need this region.

Map the 'fuse_base' region only when the erratum fix is needed.

Signed-off-by: Bryan Brattlof <bb@ti.com>
Link: https://lore.kernel.org/r/20221031232702.10339-5-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/k3_j72xx_bandgap.c | 34 +++++++++++++++++++-----------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/drivers/thermal/k3_j72xx_bandgap.c b/drivers/thermal/k3_j72xx_bandgap.c
index 395a73cb37425..031ea1091909a 100644
--- a/drivers/thermal/k3_j72xx_bandgap.c
+++ b/drivers/thermal/k3_j72xx_bandgap.c
@@ -386,15 +386,32 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 	if (IS_ERR(bgp->cfg2_base))
 		return PTR_ERR(bgp->cfg2_base);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
-	fuse_base = devm_ioremap_resource(dev, res);
-	if (IS_ERR(fuse_base))
-		return PTR_ERR(fuse_base);
-
 	driver_data = of_device_get_match_data(dev);
 	if (driver_data)
 		workaround_needed = driver_data->has_errata_i2128;
 
+	/*
+	 * Some of TI's J721E SoCs require a software trimming procedure
+	 * for the temperature monitors to function properly. To determine
+	 * if this particular SoC is NOT affected, both bits in the
+	 * WKUP_SPARE_FUSE0[31:30] will be set (0xC0000000) indicating
+	 * when software trimming should NOT be applied.
+	 *
+	 * https://www.ti.com/lit/er/sprz455c/sprz455c.pdf
+	 */
+	if (workaround_needed) {
+		res = platform_get_resource(pdev, IORESOURCE_MEM, 2);
+		fuse_base = devm_ioremap_resource(dev, res);
+		if (IS_ERR(fuse_base))
+			return PTR_ERR(fuse_base);
+
+		if ((readl(fuse_base) & 0xc0000000) == 0xc0000000)
+			workaround_needed = false;
+	}
+
+	dev_dbg(bgp->dev, "Work around %sneeded\n",
+		workaround_needed ? "" : "not ");
+
 	pm_runtime_enable(dev);
 	ret = pm_runtime_get_sync(dev);
 	if (ret < 0) {
@@ -427,13 +444,6 @@ static int k3_j72xx_bandgap_probe(struct platform_device *pdev)
 		goto err_free_ref_table;
 	}
 
-	/* Workaround not needed if bit30/bit31 is set even for J721e */
-	if (workaround_needed && (readl(fuse_base + 0x0) & 0xc0000000) == 0xc0000000)
-		workaround_needed = false;
-
-	dev_dbg(bgp->dev, "Work around %sneeded\n",
-		workaround_needed ? "" : "not ");
-
 	if (!workaround_needed)
 		init_table(5, ref_table, golden_factors);
 	else
-- 
GitLab


From effe8db0a421480fc4dad0c7bf380b8e245cbb8c Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:56 -0500
Subject: [PATCH 3918/4122] dt-bindings: thermal: k3-j72xx: elaborate on
 binding description

Elaborate on the function of this device node as well as some of the
properties this node uses.

Signed-off-by: Bryan Brattlof <bb@ti.com>
Acked-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221031232702.10339-6-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 .../bindings/thermal/ti,j72xx-thermal.yaml    | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
index c74f124ebfc00..3bb870a26872f 100644
--- a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
@@ -9,6 +9,19 @@ title: Texas Instruments J72XX VTM (DTS) binding
 maintainers:
   - Keerthy <j-keerthy@ti.com>
 
+description: |
+  The TI K3 family of SoCs typically have a Voltage & Thermal
+  Management (VTM) device to control up to 8 temperature diode
+  sensors to measure silicon junction temperatures from different
+  hotspots of the chip as well as provide temperature, interrupt
+  and alerting information.
+
+  The following polynomial equation can then be used to convert
+  value returned by this device into a temperature in Celsius
+
+  Temp(C) = (-9.2627e-12) * x^4 + (6.0373e-08) * x^3 + \
+            (-1.7058e-04) * x^2 + (3.2512e-01) * x   + (-4.9003e+01)
+
 properties:
   compatible:
     enum:
@@ -19,7 +32,11 @@ properties:
     items:
       - description: VTM cfg1 register space
       - description: VTM cfg2 register space
-      - description: VTM efuse register space
+      - description: |
+          A software trimming method must be applied to some Jacinto
+          devices to function properly. This eFuse region provides
+          the information needed for these SoCs to report
+          temperatures accurately.
 
   power-domains:
     maxItems: 1
-- 
GitLab


From c4026d3e2578291016703cd75f3a6f786f60cd80 Mon Sep 17 00:00:00 2001
From: Bryan Brattlof <bb@ti.com>
Date: Mon, 31 Oct 2022 18:26:57 -0500
Subject: [PATCH 3919/4122] dt-bindings: thermal: k3-j72xx: conditionally
 require efuse reg range

Only some of TI's J721E SoCs will need a eFuse register range mapped to
determine if they're affected by TI's i2128 erratum. All other SoC will
not need this eFuse range to function properly

Update the bindings for the k3_j72xx_bandgap thermal driver so other
devices will only need two register ranges

Signed-off-by: Bryan Brattlof <bb@ti.com>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221031232702.10339-7-bb@ti.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 .../bindings/thermal/ti,j72xx-thermal.yaml       | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
index 3bb870a26872f..0509c9cec224d 100644
--- a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
@@ -37,6 +37,7 @@ properties:
           devices to function properly. This eFuse region provides
           the information needed for these SoCs to report
           temperatures accurately.
+    minItems: 2
 
   power-domains:
     maxItems: 1
@@ -44,6 +45,21 @@ properties:
   "#thermal-sensor-cells":
     const: 1
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,j721e-vtm
+    then:
+      properties:
+        reg:
+          minItems: 3
+    else:
+      properties:
+        reg:
+          maxItems: 2
+
 required:
   - compatible
   - reg
-- 
GitLab


From bf438ed026199ff0182408d8592b6c11382d1a51 Mon Sep 17 00:00:00 2001
From: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Date: Tue, 15 Nov 2022 12:16:29 +0000
Subject: [PATCH 3920/4122] dt-bindings: thermal: rzg2l-thermal: Document
 RZ/Five SoC

The TSU block on the RZ/Five SoC is identical to one found on the RZ/G2UL
SoC. "renesas,r9a07g043-tsu" compatible string will be used on the
RZ/Five SoC so to make this clear, update the comment to include RZ/Five
SoC.

No driver changes are required as generic compatible string
"renesas,rzg2l-tsu" will be used as a fallback on RZ/Five SoC.

Signed-off-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Acked-by: Rob Herring <robh@kernel.org>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20221115121629.1181667-1-prabhakar.mahadev-lad.rj@bp.renesas.com
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
index 1d83733978481..03f4b926e53c9 100644
--- a/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/rzg2l-thermal.yaml
@@ -17,7 +17,7 @@ properties:
   compatible:
     items:
       - enum:
-          - renesas,r9a07g043-tsu # RZ/G2UL
+          - renesas,r9a07g043-tsu # RZ/G2UL and RZ/Five
           - renesas,r9a07g044-tsu # RZ/G2{L,LC}
           - renesas,r9a07g054-tsu # RZ/V2L
       - const: renesas,rzg2l-tsu
-- 
GitLab


From 33dc955c5a2795f17ff81751563836d498ac794f Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Thu, 17 Nov 2022 14:09:52 +0800
Subject: [PATCH 3921/4122] thermal/drivers/st: Use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/202211171409524332954@zte.com.cn
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/st/stm_thermal.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/thermal/st/stm_thermal.c b/drivers/thermal/st/stm_thermal.c
index 78feb802a87d2..e7834ccc7976b 100644
--- a/drivers/thermal/st/stm_thermal.c
+++ b/drivers/thermal/st/stm_thermal.c
@@ -488,7 +488,6 @@ MODULE_DEVICE_TABLE(of, stm_thermal_of_match);
 static int stm_thermal_probe(struct platform_device *pdev)
 {
 	struct stm_thermal_sensor *sensor;
-	struct resource *res;
 	void __iomem *base;
 	int ret;
 
@@ -506,8 +505,7 @@ static int stm_thermal_probe(struct platform_device *pdev)
 
 	sensor->dev = &pdev->dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
-- 
GitLab


From 4a9f20112c22cdcd6b81fafb18534a2a5a629a14 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <neil.armstrong@linaro.org>
Date: Wed, 16 Nov 2022 11:09:50 +0100
Subject: [PATCH 3922/4122] dt-bindings: thermal: qcom-tsens: Add compatible
 for sm8550

The Qualcomm SM8550 platform has three instances of the tsens block,
add a compatible for these instances.

Signed-off-by: Neil Armstrong <neil.armstrong@linaro.org>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Amit Kucheria <amitk@kernel.org>
Link: https://lore.kernel.org/r/20221114-narmstrong-sm8550-upstream-tsens-v1-0-0e169822830f@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 Documentation/devicetree/bindings/thermal/qcom-tsens.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
index 5bcfddc877d36..7e04804b6e80b 100644
--- a/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
+++ b/Documentation/devicetree/bindings/thermal/qcom-tsens.yaml
@@ -58,6 +58,7 @@ properties:
               - qcom,sm8250-tsens
               - qcom,sm8350-tsens
               - qcom,sm8450-tsens
+              - qcom,sm8550-tsens
           - const: qcom,tsens-v2
 
       - description: v2 of TSENS with combined interrupt
-- 
GitLab


From 46a891e45be97c6781ac34f5ec777d69370e252b Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Wed, 16 Mar 2022 11:03:22 -0700
Subject: [PATCH 3923/4122] thermal/drivers/qcom/lmh: Fix irq handler return
 value

After enough invocations the LMh irq is eventually reported as bad, because the
handler doesn't return IRQ_HANDLED, fix this.

Fixes: 53bca371cdf7 ("thermal/drivers/qcom: Add support for LMh driver")
Reported-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Link: https://lore.kernel.org/r/20220316180322.88132-1-bjorn.andersson@linaro.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/lmh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/thermal/qcom/lmh.c b/drivers/thermal/qcom/lmh.c
index d3d9b9fa49e81..4122a51e98741 100644
--- a/drivers/thermal/qcom/lmh.c
+++ b/drivers/thermal/qcom/lmh.c
@@ -45,7 +45,7 @@ static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
 	if (irq)
 		generic_handle_irq(irq);
 
-	return 0;
+	return IRQ_HANDLED;
 }
 
 static void lmh_enable_interrupt(struct irq_data *d)
-- 
GitLab


From 5011a110295d25418f5918a6af7bfcdb00dd4e34 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Mon, 12 Dec 2022 23:02:17 +0100
Subject: [PATCH 3924/4122] thermal/drivers/imx_sc_thermal: Drop empty platform
 remove function
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A remove callback just returning 0 is equivalent to no remove callback
at all. So drop the useless function.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Link: https://lore.kernel.org/r/20221212220217.3777176-1-u.kleine-koenig@pengutronix.de
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/imx_sc_thermal.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/thermal/imx_sc_thermal.c b/drivers/thermal/imx_sc_thermal.c
index 5d92b70a5d53a..4df925e3a80bd 100644
--- a/drivers/thermal/imx_sc_thermal.c
+++ b/drivers/thermal/imx_sc_thermal.c
@@ -127,11 +127,6 @@ static int imx_sc_thermal_probe(struct platform_device *pdev)
 	return 0;
 }
 
-static int imx_sc_thermal_remove(struct platform_device *pdev)
-{
-	return 0;
-}
-
 static int imx_sc_sensors[] = { IMX_SC_R_SYSTEM, IMX_SC_R_PMIC_0, -1 };
 
 static const struct of_device_id imx_sc_thermal_table[] = {
@@ -142,7 +137,6 @@ MODULE_DEVICE_TABLE(of, imx_sc_thermal_table);
 
 static struct platform_driver imx_sc_thermal_driver = {
 		.probe = imx_sc_thermal_probe,
-		.remove	= imx_sc_thermal_remove,
 		.driver = {
 			.name = "imx-sc-thermal",
 			.of_match_table = imx_sc_thermal_table,
-- 
GitLab


From 75333d48f92256a0dec91dbf07835e804fc411c0 Mon Sep 17 00:00:00 2001
From: Dai Ngo <dai.ngo@oracle.com>
Date: Mon, 12 Dec 2022 14:50:11 -0800
Subject: [PATCH 3925/4122] NFSD: fix use-after-free in __nfs42_ssc_open()

Problem caused by source's vfsmount being unmounted but remains
on the delayed unmount list. This happens when nfs42_ssc_open()
return errors.

Fixed by removing nfsd4_interssc_connect(), leave the vfsmount
for the laundromat to unmount when idle time expires.

We don't need to call nfs_do_sb_deactive when nfs42_ssc_open
return errors since the file was not opened so nfs_server->active
was not incremented. Same as in nfsd4_copy, if we fail to
launch nfsd4_do_async_copy thread then there's no need to
call nfs_do_sb_deactive

Reported-by: Xingyuan Mo <hdthky0@gmail.com>
Signed-off-by: Dai Ngo <dai.ngo@oracle.com>
Tested-by: Xingyuan Mo <hdthky0@gmail.com>
Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
---
 fs/nfsd/nfs4proc.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 73ed32ad23a20..bd880d55f565b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1461,13 +1461,6 @@ out_err:
 	return status;
 }
 
-static void
-nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
-{
-	nfs_do_sb_deactive(ss_mnt->mnt_sb);
-	mntput(ss_mnt);
-}
-
 /*
  * Verify COPY destination stateid.
  *
@@ -1570,11 +1563,6 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct file *filp,
 {
 }
 
-static void
-nfsd4_interssc_disconnect(struct vfsmount *ss_mnt)
-{
-}
-
 static struct file *nfs42_ssc_open(struct vfsmount *ss_mnt,
 				   struct nfs_fh *src_fh,
 				   nfs4_stateid *stateid)
@@ -1770,7 +1758,7 @@ static int nfsd4_do_async_copy(void *data)
 			default:
 				nfserr = nfserr_offload_denied;
 			}
-			nfsd4_interssc_disconnect(copy->ss_mnt);
+			/* ss_mnt will be unmounted by the laundromat */
 			goto do_callback;
 		}
 		nfserr = nfsd4_do_copy(copy, filp, copy->nf_dst->nf_file,
@@ -1851,8 +1839,10 @@ out_err:
 	if (async_copy)
 		cleanup_async_copy(async_copy);
 	status = nfserrno(-ENOMEM);
-	if (nfsd4_ssc_is_inter(copy))
-		nfsd4_interssc_disconnect(copy->ss_mnt);
+	/*
+	 * source's vfsmount of inter-copy will be unmounted
+	 * by the laundromat
+	 */
 	goto out;
 }
 
-- 
GitLab


From 38792972de4294163f44d6360fd221e6f2c22a05 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Tue, 13 Dec 2022 14:24:07 +0100
Subject: [PATCH 3926/4122] ftrace: Allow WITH_ARGS flavour of graph tracer
 with shadow call stack

The recent switch on arm64 from DYNAMIC_FTRACE_WITH_REGS to
DYNAMIC_FTRACE_WITH_ARGS failed to take into account that we currently
require the former in order to allow the function graph tracer to be
enabled in combination with shadow call stacks. This means that this is
no longer permitted at all, in spite of the fact that either flavour of
ftrace works perfectly fine in this combination.

So permit WITH_ARGS as well as WITH_REGS.

Fixes: ddc9863e9e90 ("scs: Disable when function graph tracing is enabled")
Acked-by: Mark Rutland <mark.rutland@arm.com>
Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Link: https://lore.kernel.org/r/20221213132407.1485025-1-ardb@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 072a1b39e3afd..c93c004fcf7ac 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -635,7 +635,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
 config SHADOW_CALL_STACK
 	bool "Shadow Call Stack"
 	depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
-	depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
+	depends on DYNAMIC_FTRACE_WITH_ARGS || DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
 	help
 	  This option enables the compiler's Shadow Call Stack, which
 	  uses a shadow stack to protect function return addresses from
-- 
GitLab


From d0b24b4e91fcb8408c4979888547f86be514e337 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Date: Tue, 15 Nov 2022 17:48:47 -0300
Subject: [PATCH 3927/4122] ftrace: Prevent RCU stall on PREEMPT_VOLUNTARY
 kernels

The function match_records() may take a while due to a large
number of string comparisons, so when in PREEMPT_VOLUNTARY
kernels we could face RCU stalls due to that.

Add a cond_resched() to prevent that.

Link: https://lkml.kernel.org/r/20221115204847.593616-1-gpiccoli@igalia.com

Cc: Mark Rutland <mark.rutland@arm.com>
Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Paul E. McKenney <paulmck@kernel.org> # from RCU CPU stall warning perspective
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/ftrace.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d04552c0c275d..b8e374a372e58 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -4204,6 +4204,7 @@ match_records(struct ftrace_hash *hash, char *func, int len, char *mod)
 			}
 			found = 1;
 		}
+		cond_resched();
 	} while_for_each_ftrace_rec();
  out_unlock:
 	mutex_unlock(&ftrace_lock);
-- 
GitLab


From ea47666ca4288becf82c51eb3da6d3ff891470a4 Mon Sep 17 00:00:00 2001
From: "Guilherme G. Piccoli" <gpiccoli@igalia.com>
Date: Fri, 19 Aug 2022 19:17:26 -0300
Subject: [PATCH 3928/4122] tracing: Improve panic/die notifiers

Currently the tracing dump_on_oops feature is implemented through
separate notifiers, one for die/oops and the other for panic;
given they have the same functionality, let's unify them.

Also improve the function comment and change the priority of the
notifier to make it execute earlier, avoiding showing useless trace
data (like the callback names for the other notifiers); finally,
we also removed an unnecessary header inclusion.

Link: https://lkml.kernel.org/r/20220819221731.480795-7-gpiccoli@igalia.com

Cc: Petr Mladek <pmladek@suse.com>
Cc: Sergei Shtylyov <sergei.shtylyov@gmail.com>
Signed-off-by: Guilherme G. Piccoli <gpiccoli@igalia.com>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/trace/trace.c | 55 ++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 28 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 548890c7c0f5c..6d7ef130f57e9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -19,7 +19,6 @@
 #include <linux/kallsyms.h>
 #include <linux/security.h>
 #include <linux/seq_file.h>
-#include <linux/notifier.h>
 #include <linux/irqflags.h>
 #include <linux/debugfs.h>
 #include <linux/tracefs.h>
@@ -9876,40 +9875,40 @@ static __init int tracer_init_tracefs(void)
 
 fs_initcall(tracer_init_tracefs);
 
-static int trace_panic_handler(struct notifier_block *this,
-			       unsigned long event, void *unused)
-{
-	if (ftrace_dump_on_oops)
-		ftrace_dump(ftrace_dump_on_oops);
-	return NOTIFY_OK;
-}
+static int trace_die_panic_handler(struct notifier_block *self,
+				unsigned long ev, void *unused);
 
 static struct notifier_block trace_panic_notifier = {
-	.notifier_call  = trace_panic_handler,
-	.next           = NULL,
-	.priority       = 150   /* priority: INT_MAX >= x >= 0 */
+	.notifier_call = trace_die_panic_handler,
+	.priority = INT_MAX - 1,
 };
 
-static int trace_die_handler(struct notifier_block *self,
-			     unsigned long val,
-			     void *data)
-{
-	switch (val) {
-	case DIE_OOPS:
-		if (ftrace_dump_on_oops)
-			ftrace_dump(ftrace_dump_on_oops);
-		break;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 static struct notifier_block trace_die_notifier = {
-	.notifier_call = trace_die_handler,
-	.priority = 200
+	.notifier_call = trace_die_panic_handler,
+	.priority = INT_MAX - 1,
 };
 
+/*
+ * The idea is to execute the following die/panic callback early, in order
+ * to avoid showing irrelevant information in the trace (like other panic
+ * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
+ * warnings get disabled (to prevent potential log flooding).
+ */
+static int trace_die_panic_handler(struct notifier_block *self,
+				unsigned long ev, void *unused)
+{
+	if (!ftrace_dump_on_oops)
+		return NOTIFY_DONE;
+
+	/* The die notifier requires DIE_OOPS to trigger */
+	if (self == &trace_die_notifier && ev != DIE_OOPS)
+		return NOTIFY_DONE;
+
+	ftrace_dump(ftrace_dump_on_oops);
+
+	return NOTIFY_DONE;
+}
+
 /*
  * printk is set to max of 1024, we really don't need it that big.
  * Nothing should be printing 1000 characters anyway.
-- 
GitLab


From eb9d58947d40699d93e5e69e1ddc54e41da7e132 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Google)" <rostedt@goodmis.org>
Date: Tue, 13 Dec 2022 22:12:27 -0500
Subject: [PATCH 3929/4122] tracing: Fix cpumask() example typo

The sample code for using cpumask used the wrong field for the
__get_cpumask() helper. It used "cpus" which is the bitmask (but would
still give a proper example) instead of the "cpum" that was there to be
used.

Although it produces the same output, fix it, because it's an example and
is confusing in how to properly use the cpumask() macro.

Link: https://lore.kernel.org/linux-trace-kernel/20221213221227.56560374@gandalf.local.home

Cc: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 samples/trace_events/trace-events-sample.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/samples/trace_events/trace-events-sample.h b/samples/trace_events/trace-events-sample.h
index fb4548a44153c..1c6b843b8c4ee 100644
--- a/samples/trace_events/trace-events-sample.h
+++ b/samples/trace_events/trace-events-sample.h
@@ -359,7 +359,7 @@ TRACE_EVENT(foo_bar,
 		  __print_array(__get_dynamic_array(list),
 				__get_dynamic_array_len(list) / sizeof(int),
 				sizeof(int)),
-		  __get_str(str), __get_bitmask(cpus), __get_cpumask(cpus),
+		  __get_str(str), __get_bitmask(cpus), __get_cpumask(cpum),
 		  __get_str(vstr))
 );
 
-- 
GitLab


From d2b497a973fcb76a6b7a552f081b83a1edd91c86 Mon Sep 17 00:00:00 2001
From: Donald Hunter <donald.hunter@gmail.com>
Date: Mon, 12 Dec 2022 10:16:00 +0000
Subject: [PATCH 3930/4122] docs/bpf: Reword docs for BPF_MAP_TYPE_SK_STORAGE

Improve the grammar of the function descriptions and highlight
that the key is a socket fd.

Fixes: f3212ad5b7e9 ("docs/bpf: Add documentation for BPF_MAP_TYPE_SK_STORAGE")
Reported-by: Martin KaFai Lau <martin.lau@linux.dev>
Signed-off-by: Donald Hunter <donald.hunter@gmail.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20221212101600.56026-1-donald.hunter@gmail.com
---
 Documentation/bpf/map_sk_storage.rst | 56 +++++++++++++++-------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/Documentation/bpf/map_sk_storage.rst b/Documentation/bpf/map_sk_storage.rst
index 047e16c8aaa8c..4e9d23ab9ecdf 100644
--- a/Documentation/bpf/map_sk_storage.rst
+++ b/Documentation/bpf/map_sk_storage.rst
@@ -34,13 +34,12 @@ bpf_sk_storage_get()
 
    void *bpf_sk_storage_get(struct bpf_map *map, void *sk, void *value, u64 flags)
 
-Socket-local storage can be retrieved using the ``bpf_sk_storage_get()``
-helper. The helper gets the storage from ``sk`` that is associated with ``map``.
-If the ``BPF_LOCAL_STORAGE_GET_F_CREATE`` flag is used then
-``bpf_sk_storage_get()`` will create the storage for ``sk`` if it does not
-already exist. ``value`` can be used together with
-``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise it
-will be zero initialized. Returns a pointer to the storage on success, or
+Socket-local storage for ``map`` can be retrieved from socket ``sk`` using the
+``bpf_sk_storage_get()`` helper. If the ``BPF_LOCAL_STORAGE_GET_F_CREATE``
+flag is used then ``bpf_sk_storage_get()`` will create the storage for ``sk``
+if it does not already exist. ``value`` can be used together with
+``BPF_LOCAL_STORAGE_GET_F_CREATE`` to initialize the storage value, otherwise
+it will be zero initialized. Returns a pointer to the storage on success, or
 ``NULL`` in case of failure.
 
 .. note::
@@ -54,9 +53,9 @@ bpf_sk_storage_delete()
 
    long bpf_sk_storage_delete(struct bpf_map *map, void *sk)
 
-Socket-local storage can be deleted using the ``bpf_sk_storage_delete()``
-helper. The helper deletes the storage from ``sk`` that is identified by
-``map``. Returns ``0`` on success, or negative error in case of failure.
+Socket-local storage for ``map`` can be deleted from socket ``sk`` using the
+``bpf_sk_storage_delete()`` helper. Returns ``0`` on success, or negative
+error in case of failure.
 
 User space
 ----------
@@ -68,16 +67,20 @@ bpf_map_update_elem()
 
    int bpf_map_update_elem(int map_fd, const void *key, const void *value, __u64 flags)
 
-Socket-local storage for the socket identified by ``key`` belonging to
-``map_fd`` can be added or updated using the ``bpf_map_update_elem()`` libbpf
-function. ``key`` must be a pointer to a valid ``fd`` in the user space
-program. The ``flags`` parameter can be used to control the update behaviour:
+Socket-local storage for map ``map_fd`` can be added or updated locally to a
+socket using the ``bpf_map_update_elem()`` libbpf function. The socket is
+identified by a `socket` ``fd`` stored in the pointer ``key``. The pointer
+``value`` has the data to be added or updated to the socket ``fd``. The type
+and size of ``value`` should be the same as the value type of the map
+definition.
 
-- ``BPF_ANY`` will create storage for ``fd`` or update existing storage.
-- ``BPF_NOEXIST`` will create storage for ``fd`` only if it did not already
-  exist, otherwise the call will fail with ``-EEXIST``.
-- ``BPF_EXIST`` will update existing storage for ``fd`` if it already exists,
-  otherwise the call will fail with ``-ENOENT``.
+The ``flags`` parameter can be used to control the update behaviour:
+
+- ``BPF_ANY`` will create storage for `socket` ``fd`` or update existing storage.
+- ``BPF_NOEXIST`` will create storage for `socket` ``fd`` only if it did not
+  already exist, otherwise the call will fail with ``-EEXIST``.
+- ``BPF_EXIST`` will update existing storage for `socket` ``fd`` if it already
+  exists, otherwise the call will fail with ``-ENOENT``.
 
 Returns ``0`` on success, or negative error in case of failure.
 
@@ -88,10 +91,10 @@ bpf_map_lookup_elem()
 
    int bpf_map_lookup_elem(int map_fd, const void *key, void *value)
 
-Socket-local storage for the socket identified by ``key`` belonging to
-``map_fd`` can be retrieved using the ``bpf_map_lookup_elem()`` libbpf
-function. ``key`` must be a pointer to a valid ``fd`` in the user space
-program. Returns ``0`` on success, or negative error in case of failure.
+Socket-local storage for map ``map_fd`` can be retrieved from a socket using
+the ``bpf_map_lookup_elem()`` libbpf function. The storage is retrieved from
+the socket identified by a `socket` ``fd`` stored in the pointer
+``key``. Returns ``0`` on success, or negative error in case of failure.
 
 bpf_map_delete_elem()
 ~~~~~~~~~~~~~~~~~~~~~
@@ -100,9 +103,10 @@ bpf_map_delete_elem()
 
    int bpf_map_delete_elem(int map_fd, const void *key)
 
-Socket-local storage for the socket identified by ``key`` belonging to
-``map_fd`` can be deleted using the ``bpf_map_delete_elem()`` libbpf
-function. Returns ``0`` on success, or negative error in case of failure.
+Socket-local storage for map ``map_fd`` can be deleted from a socket using the
+``bpf_map_delete_elem()`` libbpf function. The storage is deleted from the
+socket identified by a `socket` ``fd`` stored in the pointer ``key``. Returns
+``0`` on success, or negative error in case of failure.
 
 Examples
 ========
-- 
GitLab


From ec9230b18b45853287298d70be23f8ec6bd44ff0 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 12 Dec 2022 17:22:24 -0800
Subject: [PATCH 3931/4122] selftests/bpf: Fix a selftest compilation error
 with CONFIG_SMP=n

Kernel test robot reported bpf selftest build failure when CONFIG_SMP
is not set. The error message looks below:

  >> progs/rcu_read_lock.c:256:34: error: no member named 'last_wakee' in 'struct task_struct'
             last_wakee = task->real_parent->last_wakee;
                          ~~~~~~~~~~~~~~~~~  ^
     1 error generated.

When CONFIG_SMP is not set, the field 'last_wakee' is not available in struct
'task_struct'. Hence the above compilation failure. To fix the issue, let us
choose another field 'group_leader' which is available regardless of
CONFIG_SMP set or not.

Fixes: fe147956fca4 ("bpf/selftests: Add selftests for new task kfuncs")
Fixes: 48671232fcb8 ("selftests/bpf: Add tests for bpf_rcu_read_lock()")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: David Vernet <void@manifault.com>
Link: https://lore.kernel.org/bpf/20221213012224.379581-1-yhs@fb.com
---
 tools/testing/selftests/bpf/progs/rcu_read_lock.c      | 8 ++++----
 tools/testing/selftests/bpf/progs/task_kfunc_failure.c | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 125f908024d31..5cecbdbbb16ee 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -288,13 +288,13 @@ out:
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
 int task_untrusted_non_rcuptr(void *ctx)
 {
-	struct task_struct *task, *last_wakee;
+	struct task_struct *task, *group_leader;
 
 	task = bpf_get_current_task_btf();
 	bpf_rcu_read_lock();
-	/* the pointer last_wakee marked as untrusted */
-	last_wakee = task->real_parent->last_wakee;
-	(void)bpf_task_storage_get(&map_a, last_wakee, 0, 0);
+	/* the pointer group_leader marked as untrusted */
+	group_leader = task->real_parent->group_leader;
+	(void)bpf_task_storage_get(&map_a, group_leader, 0, 0);
 	bpf_rcu_read_unlock();
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
index 87fa1db9d9b5e..1b47b94dbca02 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -73,7 +73,7 @@ int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 cl
 	struct task_struct *acquired;
 
 	/* Can't invoke bpf_task_acquire() on a trusted pointer obtained from walking a struct. */
-	acquired = bpf_task_acquire(task->last_wakee);
+	acquired = bpf_task_acquire(task->group_leader);
 	bpf_task_release(acquired);
 
 	return 0;
-- 
GitLab


From a8dfde09c90109e3a98af54847e91bde7dc2d5c2 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Tue, 13 Dec 2022 14:05:00 -0800
Subject: [PATCH 3932/4122] selftests/bpf: Select
 CONFIG_FUNCTION_ERROR_INJECTION
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BPF selftests require CONFIG_FUNCTION_ERROR_INJECTION to work. However,
CONFIG_FUNCTION_ERROR_INJECTION is no longer 'y' by default after recent
changes. As a result, we are seeing errors like the following from BPF CI:

   bpf_testmod_test_read() is not modifiable
   __x64_sys_setdomainname is not sleepable
   __x64_sys_getpgid is not sleepable

Fix this by explicitly selecting CONFIG_FUNCTION_ERROR_INJECTION in the
selftest config.

Fixes: a4412fdd49dc ("error-injection: Add prompt for function error injection")
Reported-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Song Liu <song@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/bpf/20221213220500.3427947-1-song@kernel.org
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 612f699dc4f73..63cd4ab70171e 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -16,6 +16,7 @@ CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_DYNAMIC_FTRACE=y
 CONFIG_FPROBE=y
 CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
 CONFIG_FUNCTION_TRACER=y
 CONFIG_GENEVE=y
 CONFIG_IKCONFIG=y
-- 
GitLab


From e89f3edffb860a0f54a9ed16deadb7a4a1fa3862 Mon Sep 17 00:00:00 2001
From: Milan Landaverde <milan@mdaverde.com>
Date: Tue, 13 Dec 2022 12:57:14 -0500
Subject: [PATCH 3933/4122] bpf: prevent leak of lsm program after failed
 attach

In [0], we added the ability to bpf_prog_attach LSM programs to cgroups,
but in our validation to make sure the prog is meant to be attached to
BPF_LSM_CGROUP, we return too early if the check fails. This results in
lack of decrementing prog's refcnt (through bpf_prog_put)
leaving the LSM program alive past the point of the expected lifecycle.
This fix allows for the decrement to take place.

[0] https://lore.kernel.org/all/20220628174314.1216643-4-sdf@google.com/

Fixes: 69fd337a975c ("bpf: per-cgroup lsm flavor")
Signed-off-by: Milan Landaverde <milan@mdaverde.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20221213175714.31963-1-milan@mdaverde.com
---
 kernel/bpf/syscall.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35972afb68504..64131f88c5537 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3518,9 +3518,9 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_LSM:
 		if (ptype == BPF_PROG_TYPE_LSM &&
 		    prog->expected_attach_type != BPF_LSM_CGROUP)
-			return -EINVAL;
-
-		ret = cgroup_bpf_prog_attach(attr, ptype, prog);
+			ret = -EINVAL;
+		else
+			ret = cgroup_bpf_prog_attach(attr, ptype, prog);
 		break;
 	default:
 		ret = -EINVAL;
-- 
GitLab


From 5f8f95673f68914419e8657b3263736264b40e16 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Tue, 13 Dec 2022 15:26:51 -0800
Subject: [PATCH 3934/4122] perf evlist: Remove group option.

The group option predates grouping events using curly braces added in
commit 89efb029502d7f2d ("perf tools: Add support to parse event group
syntax").

The --group option was retained for legacy support (in August
2012) but keeping it adds complexity.

Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Anshuman Khandual <anshuman.khandual@arm.com>
Cc: Athira Jajeev <atrajeev@linux.vnet.ibm.com>
Cc: Eelco Chaudron <echaudro@redhat.com>
Cc: German Gomez <german.gomez@arm.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Kim Phillips <kim.phillips@amd.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Riccardo Mancini <rickyman7@gmail.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Shaomin Deng <dengshaomin@cdjrlc.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Timothy Hayes <timothy.hayes@arm.com>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20221213232651.1269909-6-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  4 ----
 tools/perf/Documentation/perf-top.txt    |  7 ++-----
 tools/perf/builtin-record.c              |  2 --
 tools/perf/builtin-stat.c                |  6 ------
 tools/perf/builtin-top.c                 |  2 --
 tools/perf/tests/attr/README             |  2 --
 tools/perf/tests/attr/test-record-group  | 22 ----------------------
 tools/perf/tests/attr/test-stat-group    | 17 -----------------
 tools/perf/util/evlist.c                 |  2 +-
 tools/perf/util/evlist.h                 |  2 --
 tools/perf/util/python.c                 |  8 --------
 tools/perf/util/record.c                 |  7 -------
 tools/perf/util/record.h                 |  1 -
 13 files changed, 3 insertions(+), 79 deletions(-)
 delete mode 100644 tools/perf/tests/attr/test-record-group
 delete mode 100644 tools/perf/tests/attr/test-stat-group

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 5f9fa07b3ddec..ff815c2f67e8c 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -238,10 +238,6 @@ OPTIONS
 	Also, by adding a comma, the number of mmap pages for AUX
 	area tracing can be specified.
 
---group::
-	Put all events in a single event group.  This precedes the --event
-	option and remains only for backward compatibility.  See --event.
-
 -g::
 	Enables call-graph (stack chain/backtrace) recording for both
 	kernel space and user space.
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index c1fdba26bf53a..e534d709cc5ac 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -51,9 +51,6 @@ Default is to monitor all CPUS.
 --count-filter=<count>::
 	Only display functions with more events than this.
 
---group::
-        Put the counters into a counter group.
-
 --group-sort-idx::
 	Sort the output by the event at the index n in group. If n is invalid,
 	sort by the first event. It can support multiple groups with different
@@ -313,10 +310,10 @@ use '-e e1 -e e2 -G foo,foo' or just use '-e e1 -e e2 -G foo'.
 
 		perf top -e cycles,probe:icmp_rcv --switch-on=probe:icmp_rcv
 
-	   Alternatively one can ask for --group and then two overhead columns
+	   Alternatively one can ask for a group and then two overhead columns
            will appear, the first for cycles and the second for the switch-on event.
 
-		perf top --group -e cycles,probe:icmp_rcv --switch-on=probe:icmp_rcv
+		perf top -e '{cycles,probe:icmp_rcv}' --switch-on=probe:icmp_rcv
 
 	This may be interesting to measure a workload only after some initialization
 	phase is over, i.e. insert a perf probe at that point and use the above
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 7e17374f6c1a2..8ecffa696ce32 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -3380,8 +3380,6 @@ static struct option __record_options[] = {
 	OPT_CALLBACK(0, "mmap-flush", &record.opts, "number",
 		     "Minimal number of bytes that is extracted from mmap data pages (default: 1)",
 		     record__mmap_flush_parse),
-	OPT_BOOLEAN(0, "group", &record.opts.group,
-		    "put the counters into a counter group"),
 	OPT_CALLBACK_NOOPT('g', NULL, &callchain_param,
 			   NULL, "enables call-graph recording" ,
 			   &record_callchain_opt),
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index d040fbcdcc5a7..2808f2aa9a825 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -181,7 +181,6 @@ static bool			topdown_run			= false;
 static bool			smi_cost			= false;
 static bool			smi_reset			= false;
 static int			big_num_opt			=  -1;
-static bool			group				= false;
 static const char		*pre_cmd			= NULL;
 static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
@@ -781,9 +780,6 @@ static int __run_perf_stat(int argc, const char **argv, int run_idx)
 		child_pid = evsel_list->workload.pid;
 	}
 
-	if (group)
-		evlist__set_leader(evsel_list);
-
 	if (!cpu_map__is_dummy(evsel_list->core.user_requested_cpus)) {
 		if (affinity__setup(&saved_affinity) < 0)
 			return -1;
@@ -1192,8 +1188,6 @@ static struct option stat_options[] = {
 #endif
 	OPT_BOOLEAN('a', "all-cpus", &target.system_wide,
 		    "system-wide collection from all CPUs"),
-	OPT_BOOLEAN('g', "group", &group,
-		    "put the counters into a counter group"),
 	OPT_BOOLEAN(0, "scale", &stat_config.scale,
 		    "Use --no-scale to disable counter scaling for multiplexing"),
 	OPT_INCR('v', "verbose", &verbose,
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index bb5bd241246bc..d4b5b02bab733 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1471,8 +1471,6 @@ int cmd_top(int argc, const char **argv)
 			    "dump the symbol table used for profiling"),
 	OPT_INTEGER('f', "count-filter", &top.count_filter,
 		    "only display functions with more events than this"),
-	OPT_BOOLEAN(0, "group", &opts->group,
-			    "put the counters into a counter group"),
 	OPT_BOOLEAN('i', "no-inherit", &opts->no_inherit,
 		    "child tasks do not inherit counters"),
 	OPT_STRING(0, "sym-annotate", &top.sym_filter, "symbol name",
diff --git a/tools/perf/tests/attr/README b/tools/perf/tests/attr/README
index eb3f7d4bb3245..4066fec7180a8 100644
--- a/tools/perf/tests/attr/README
+++ b/tools/perf/tests/attr/README
@@ -49,7 +49,6 @@ Following tests are defined (with perf commands):
   perf record --call-graph dwarf kill		(test-record-graph-dwarf)
   perf record --call-graph fp kill              (test-record-graph-fp)
   perf record --call-graph fp kill              (test-record-graph-fp-aarch64)
-  perf record --group -e cycles,instructions kill (test-record-group)
   perf record -e '{cycles,instructions}' kill   (test-record-group1)
   perf record -e '{cycles/period=1/,instructions/period=2/}:S' kill (test-record-group2)
   perf record -D kill                           (test-record-no-delay)
@@ -66,6 +65,5 @@ Following tests are defined (with perf commands):
   perf stat -d kill                             (test-stat-detailed-1)
   perf stat -dd kill                            (test-stat-detailed-2)
   perf stat -ddd kill                           (test-stat-detailed-3)
-  perf stat --group -e cycles,instructions kill (test-stat-group)
   perf stat -e '{cycles,instructions}' kill     (test-stat-group1)
   perf stat -i -e cycles kill                   (test-stat-no-inherit)
diff --git a/tools/perf/tests/attr/test-record-group b/tools/perf/tests/attr/test-record-group
deleted file mode 100644
index 6c1cff8aae8b8..0000000000000
--- a/tools/perf/tests/attr/test-record-group
+++ /dev/null
@@ -1,22 +0,0 @@
-[config]
-command = record
-args    = --no-bpf-event --group -e cycles,instructions kill >/dev/null 2>&1
-ret     = 1
-
-[event-1:base-record]
-fd=1
-group_fd=-1
-sample_type=327
-read_format=4|20
-
-[event-2:base-record]
-fd=2
-group_fd=1
-config=1
-sample_type=327
-read_format=4|20
-mmap=0
-comm=0
-task=0
-enable_on_exec=0
-disabled=0
diff --git a/tools/perf/tests/attr/test-stat-group b/tools/perf/tests/attr/test-stat-group
deleted file mode 100644
index e15d6946e9b3a..0000000000000
--- a/tools/perf/tests/attr/test-stat-group
+++ /dev/null
@@ -1,17 +0,0 @@
-[config]
-command = stat
-args    = --group -e cycles,instructions kill >/dev/null 2>&1
-ret     = 1
-
-[event-1:base-stat]
-fd=1
-group_fd=-1
-read_format=3|15
-
-[event-2:base-stat]
-fd=2
-group_fd=1
-config=1
-disabled=0
-enable_on_exec=0
-read_format=3|15
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 590d4e77effcf..817df2504a1ed 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -230,7 +230,7 @@ out:
 	return err;
 }
 
-void evlist__set_leader(struct evlist *evlist)
+static void evlist__set_leader(struct evlist *evlist)
 {
 	perf_evlist__set_leader(&evlist->core);
 }
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index e5b84ead566c9..01fa9d592c5a7 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -221,8 +221,6 @@ void evlist__set_selected(struct evlist *evlist, struct evsel *evsel);
 int evlist__create_maps(struct evlist *evlist, struct target *target);
 int evlist__apply_filters(struct evlist *evlist, struct evsel **err_evsel);
 
-void evlist__set_leader(struct evlist *evlist);
-
 u64 __evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_sample_type(struct evlist *evlist);
 u64 evlist__combined_branch_type(struct evlist *evlist);
diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c
index 7320f7f777fe0..212031b97910a 100644
--- a/tools/perf/util/python.c
+++ b/tools/perf/util/python.c
@@ -1142,14 +1142,6 @@ static PyObject *pyrf_evlist__open(struct pyrf_evlist *pevlist,
 				   PyObject *args, PyObject *kwargs)
 {
 	struct evlist *evlist = &pevlist->evlist;
-	int group = 0;
-	static char *kwlist[] = { "group", NULL };
-
-	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, &group))
-		return NULL;
-
-	if (group)
-		evlist__set_leader(evlist);
 
 	if (evlist__open(evlist) < 0) {
 		PyErr_SetFromErrno(PyExc_OSError);
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 7b58f6c7c69de..9eb5c6a08999e 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -99,13 +99,6 @@ void evlist__config(struct evlist *evlist, struct record_opts *opts, struct call
 	bool use_comm_exec;
 	bool sample_id = opts->sample_id;
 
-	/*
-	 * Set the evsel leader links before we configure attributes,
-	 * since some might depend on this info.
-	 */
-	if (opts->group)
-		evlist__set_leader(evlist);
-
 	if (perf_cpu_map__cpu(evlist->core.user_requested_cpus, 0).cpu < 0)
 		opts->no_inherit = true;
 
diff --git a/tools/perf/util/record.h b/tools/perf/util/record.h
index 4269e916f450c..46212bf020cfb 100644
--- a/tools/perf/util/record.h
+++ b/tools/perf/util/record.h
@@ -13,7 +13,6 @@ struct option;
 
 struct record_opts {
 	struct target target;
-	bool	      group;
 	bool	      inherit_stat;
 	bool	      no_buffering;
 	bool	      no_inherit;
-- 
GitLab


From c587e77e100fa40eb6af10e00497c67acf493f33 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 12 Dec 2022 15:08:20 -0800
Subject: [PATCH 3935/4122] perf stat: Do not delay the workload with --delay

The -D/--delay option is to delay the measure after the program starts.
But the current code goes to sleep before starting the program so the
program is delayed too.  This is not the intention, let's fix it.

Before:

  $ time sudo ./perf stat -a -e cycles -D 3000 sleep 4
  Events disabled
  Events enabled

   Performance counter stats for 'system wide':

       4,326,949,337      cycles

         4.007494118 seconds time elapsed

  real	0m7.474s
  user	0m0.356s
  sys	0m0.120s

It ran the workload for 4 seconds and gave the 3 second delay.  So it
should skip the first 3 second and measure the last 1 second only.  But
as you can see, it delays 3 seconds and ran the workload after that for
4 seconds.  So the total time (real) was 7 seconds.

After:

  $ time sudo ./perf stat -a -e cycles -D 3000 sleep 4
  Events disabled
  Events enabled

   Performance counter stats for 'system wide':

       1,063,551,013      cycles

         1.002769510 seconds time elapsed

  real	0m4.484s
  user	0m0.385s
  sys	0m0.086s

The bug was introduced when it changed enablement of system-wide events
with a command line workload.  But it should've considered the initial
delay case.  The code was reworked since then (in bb8bc52e7578) so I'm
afraid it won't be applied cleanly.

Fixes: d0a0a511493d2695 ("perf stat: Fix forked applications enablement of counters")
Reported-by: Kevin Nomura <nomurak@google.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Thomas Richter <tmricht@linux.ibm.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sumanth Korikkar <sumanthk@linux.ibm.com>
Link: https://lore.kernel.org/r/20221212230820.901382-1-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2808f2aa9a825..bf640abc9c41f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -539,26 +539,14 @@ static int enable_counters(void)
 			return err;
 	}
 
-	if (stat_config.initial_delay < 0) {
-		pr_info(EVLIST_DISABLED_MSG);
-		return 0;
-	}
-
-	if (stat_config.initial_delay > 0) {
-		pr_info(EVLIST_DISABLED_MSG);
-		usleep(stat_config.initial_delay * USEC_PER_MSEC);
-	}
-
 	/*
 	 * We need to enable counters only if:
 	 * - we don't have tracee (attaching to task or cpu)
 	 * - we have initial delay configured
 	 */
-	if (!target__none(&target) || stat_config.initial_delay) {
+	if (!target__none(&target)) {
 		if (!all_counters_use_bpf)
 			evlist__enable(evsel_list);
-		if (stat_config.initial_delay > 0)
-			pr_info(EVLIST_ENABLED_MSG);
 	}
 	return 0;
 }
@@ -926,14 +914,27 @@ try_again_reset:
 			return err;
 	}
 
-	err = enable_counters();
-	if (err)
-		return -1;
+	if (stat_config.initial_delay) {
+		pr_info(EVLIST_DISABLED_MSG);
+	} else {
+		err = enable_counters();
+		if (err)
+			return -1;
+	}
 
 	/* Exec the command, if any */
 	if (forks)
 		evlist__start_workload(evsel_list);
 
+	if (stat_config.initial_delay > 0) {
+		usleep(stat_config.initial_delay * USEC_PER_MSEC);
+		err = enable_counters();
+		if (err)
+			return -1;
+
+		pr_info(EVLIST_ENABLED_MSG);
+	}
+
 	t0 = rdclock();
 	clock_gettime(CLOCK_MONOTONIC, &ref_time);
 
-- 
GitLab


From 818448e9cf92e5c6b3c10320372eefcbe4174e4f Mon Sep 17 00:00:00 2001
From: Tiezhu Yang <yangtiezhu@loongson.cn>
Date: Fri, 18 Nov 2022 17:16:39 +0800
Subject: [PATCH 3936/4122] perf tools: Use "grep -E" instead of "egrep"

The latest version of grep claims the egrep is now obsolete so the build
now contains warnings that look like:

	egrep: warning: egrep is obsolescent; using grep -E

fix this up by moving the related file to use "grep -E" instead.

  sed -i "s/egrep/grep -E/g" `grep egrep -rwl tools/perf`

Here are the steps to install the latest grep:

  wget http://ftp.gnu.org/gnu/grep/grep-3.8.tar.gz
  tar xf grep-3.8.tar.gz
  cd grep-3.8 && ./configure && make
  sudo make install
  export PATH=/usr/local/bin:$PATH

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lore.kernel.org/lkml/1668762999-9297-1-git-send-email-yangtiezhu@loongson.cn
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile                           |  2 +-
 tools/perf/builtin-trace.c                    |  2 +-
 tools/perf/tests/make                         |  2 +-
 .../perf/tests/shell/lib/probe_vfs_getname.sh |  4 ++--
 .../shell/record+probe_libc_inet_pton.sh      |  2 +-
 .../shell/record+script_probe_vfs_getname.sh  |  2 +-
 tools/perf/tests/shell/record_offcpu.sh       |  2 +-
 tools/perf/tests/shell/stat.sh                | 10 ++++-----
 tools/perf/tests/shell/test_arm_coresight.sh  |  8 +++----
 tools/perf/tests/shell/test_arm_spe.sh        |  6 ++---
 tools/perf/tests/shell/test_arm_spe_fork.sh   |  2 +-
 tools/perf/tests/shell/test_brstack.sh        | 18 +++++++--------
 tools/perf/tests/shell/test_data_symbol.sh    |  4 ++--
 tools/perf/tests/shell/test_java_symbol.sh    |  2 +-
 .../tests/shell/trace+probe_vfs_getname.sh    |  4 ++--
 tools/perf/trace/beauty/fadvise.sh            |  2 +-
 tools/perf/trace/beauty/fsmount.sh            |  2 +-
 tools/perf/trace/beauty/fspick.sh             |  2 +-
 tools/perf/trace/beauty/kcmp_type.sh          |  2 +-
 tools/perf/trace/beauty/kvm_ioctl.sh          |  4 ++--
 tools/perf/trace/beauty/madvise_behavior.sh   |  2 +-
 tools/perf/trace/beauty/mmap_flags.sh         | 22 +++++++++----------
 tools/perf/trace/beauty/mmap_prot.sh          | 12 +++++-----
 tools/perf/trace/beauty/mount_flags.sh        |  4 ++--
 tools/perf/trace/beauty/move_mount_flags.sh   |  2 +-
 tools/perf/trace/beauty/mremap_flags.sh       |  4 ++--
 tools/perf/trace/beauty/perf_ioctl.sh         |  2 +-
 .../trace/beauty/pkey_alloc_access_rights.sh  |  2 +-
 tools/perf/trace/beauty/prctl_option.sh       |  4 ++--
 tools/perf/trace/beauty/rename_flags.sh       |  4 ++--
 tools/perf/trace/beauty/sockaddr.sh           |  4 ++--
 tools/perf/trace/beauty/socket.sh             |  4 ++--
 tools/perf/trace/beauty/sync_file_range.sh    |  2 +-
 .../beauty/tracepoints/x86_irq_vectors.sh     |  4 ++--
 .../perf/trace/beauty/tracepoints/x86_msr.sh  | 12 +++++-----
 tools/perf/trace/beauty/usbdevfs_ioctl.sh     |  4 ++--
 tools/perf/trace/beauty/vhost_virtio_ioctl.sh |  4 ++--
 tools/perf/trace/beauty/x86_arch_prctl.sh     |  4 ++--
 38 files changed, 89 insertions(+), 89 deletions(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index f3fe360a35c66..75f3f6e0a2310 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -25,7 +25,7 @@ unexport MAKEFLAGS
 # (To override it, run 'make JOBS=1' and similar.)
 #
 ifeq ($(JOBS),)
-  JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+  JOBS := $(shell (getconf _NPROCESSORS_ONLN || grep -E -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
   ifeq ($(JOBS),0)
     JOBS := 1
   endif
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index c9802ab549d77..86e06f136f402 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1727,7 +1727,7 @@ syscall_arg_fmt__init_array(struct syscall_arg_fmt *arg, struct tep_format_field
 			 len >= 2 && strcmp(field->name + len - 2, "fd") == 0) {
 			/*
 			 * /sys/kernel/tracing/events/syscalls/sys_enter*
-			 * egrep 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
+			 * grep -E 'field:.*fd;' .../format|sed -r 's/.*field:([a-z ]+) [a-z_]*fd.+/\1/g'|sort|uniq -c
 			 * 65 int
 			 * 23 unsigned int
 			 * 7 unsigned long
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index da013e90a9450..05e818a8bbad1 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -29,7 +29,7 @@ endif
 PARALLEL_OPT=
 ifeq ($(SET_PARALLEL),1)
   ifeq ($(JOBS),)
-    cores := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
+    cores := $(shell (getconf _NPROCESSORS_ONLN || grep -E -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
     ifeq ($(cores),0)
       cores := 1
     endif
diff --git a/tools/perf/tests/shell/lib/probe_vfs_getname.sh b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
index b616d42bd19d4..ed0a3972c4c81 100644
--- a/tools/perf/tests/shell/lib/probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/lib/probe_vfs_getname.sh
@@ -12,13 +12,13 @@ cleanup_probe_vfs_getname() {
 add_probe_vfs_getname() {
 	local verbose=$1
 	if [ $had_vfs_getname -eq 1 ] ; then
-		line=$(perf probe -L getname_flags 2>&1 | egrep 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
+		line=$(perf probe -L getname_flags 2>&1 | grep -E 'result.*=.*filename;' | sed -r 's/[[:space:]]+([[:digit:]]+)[[:space:]]+result->uptr.*/\1/')
 		perf probe -q       "vfs_getname=getname_flags:${line} pathname=result->name:string" || \
 		perf probe $verbose "vfs_getname=getname_flags:${line} pathname=filename:ustring"
 	fi
 }
 
 skip_if_no_debuginfo() {
-	add_probe_vfs_getname -v 2>&1 | egrep -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2
+	add_probe_vfs_getname -v 2>&1 | grep -E -q "^(Failed to find the path for the kernel|Debuginfo-analysis is not supported)|(file has no debug information)" && return 2
 	return 1
 }
diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
index f12a4e2179681..34c400ccbe046 100755
--- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
+++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh
@@ -64,7 +64,7 @@ trace_libc_inet_pton_backtrace() {
 	while read line <&3 && read -r pattern <&4; do
 		[ -z "$pattern" ] && break
 		echo $line
-		echo "$line" | egrep -q "$pattern"
+		echo "$line" | grep -E -q "$pattern"
 		if [ $? -ne 0 ] ; then
 			printf "FAIL: expected backtrace entry \"%s\" got \"%s\"\n" "$pattern" "$line"
 			return 1
diff --git a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
index 8d9c04e450ae8..7f83b2715b9a5 100755
--- a/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/record+script_probe_vfs_getname.sh
@@ -23,7 +23,7 @@ record_open_file() {
 perf_script_filenames() {
 	echo "Looking at perf.data file for vfs_getname records for the file we touched:"
 	perf script -i ${perfdata} | \
-	egrep " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\""
+	grep -E " +touch +[0-9]+ +\[[0-9]+\] +[0-9]+\.[0-9]+: +probe:vfs_getname[_0-9]*: +\([[:xdigit:]]+\) +pathname=\"${file}\""
 }
 
 add_probe_vfs_getname || skip_if_no_debuginfo
diff --git a/tools/perf/tests/shell/record_offcpu.sh b/tools/perf/tests/shell/record_offcpu.sh
index d2eba583a2ac9..e01973d4e0fba 100755
--- a/tools/perf/tests/shell/record_offcpu.sh
+++ b/tools/perf/tests/shell/record_offcpu.sh
@@ -51,7 +51,7 @@ test_offcpu_basic() {
     err=1
     return
   fi
-  if ! perf report -i ${perfdata} -q --percent-limit=90 | egrep -q sleep
+  if ! perf report -i ${perfdata} -q --percent-limit=90 | grep -E -q sleep
   then
     echo "Basic off-cpu test [Failed missing output]"
     err=1
diff --git a/tools/perf/tests/shell/stat.sh b/tools/perf/tests/shell/stat.sh
index 26a51b48aee46..2c1d3f7049951 100755
--- a/tools/perf/tests/shell/stat.sh
+++ b/tools/perf/tests/shell/stat.sh
@@ -7,7 +7,7 @@ set -e
 err=0
 test_default_stat() {
   echo "Basic stat command test"
-  if ! perf stat true 2>&1 | egrep -q "Performance counter stats for 'true':"
+  if ! perf stat true 2>&1 | grep -E -q "Performance counter stats for 'true':"
   then
     echo "Basic stat command test [Failed]"
     err=1
@@ -19,7 +19,7 @@ test_default_stat() {
 test_stat_record_report() {
   echo "stat record and report test"
   if ! perf stat record -o - true | perf stat report -i - 2>&1 | \
-    egrep -q "Performance counter stats for 'pipe':"
+    grep -E -q "Performance counter stats for 'pipe':"
   then
     echo "stat record and report test [Failed]"
     err=1
@@ -55,13 +55,13 @@ test_topdown_groups() {
     echo "Topdown event group test [Skipped event parsing failed]"
     return
   fi
-  if perf stat -e '{slots,topdown-retiring}' true 2>&1 | egrep -q "<not supported>"
+  if perf stat -e '{slots,topdown-retiring}' true 2>&1 | grep -E -q "<not supported>"
   then
     echo "Topdown event group test [Failed events not supported]"
     err=1
     return
   fi
-  if perf stat -e '{topdown-retiring,slots}' true 2>&1 | egrep -q "<not supported>"
+  if perf stat -e '{topdown-retiring,slots}' true 2>&1 | grep -E -q "<not supported>"
   then
     echo "Topdown event group test [Failed slots not reordered first]"
     err=1
@@ -82,7 +82,7 @@ test_topdown_weak_groups() {
     return
   fi
   group_needs_break="{slots,topdown-bad-spec,topdown-be-bound,topdown-fe-bound,topdown-retiring,branch-instructions,branch-misses,bus-cycles,cache-misses,cache-references,cpu-cycles,instructions,mem-loads,mem-stores,ref-cycles,cache-misses,cache-references}:W"
-  if perf stat --no-merge -e "$group_needs_break" true 2>&1 | egrep -q "<not supported>"
+  if perf stat --no-merge -e "$group_needs_break" true 2>&1 | grep -E -q "<not supported>"
   then
     echo "Topdown weak groups test [Failed events not supported]"
     err=1
diff --git a/tools/perf/tests/shell/test_arm_coresight.sh b/tools/perf/tests/shell/test_arm_coresight.sh
index daad786cf48d9..565ce525c40b2 100755
--- a/tools/perf/tests/shell/test_arm_coresight.sh
+++ b/tools/perf/tests/shell/test_arm_coresight.sh
@@ -49,7 +49,7 @@ perf_script_branch_samples() {
 	#   touch  6512          1         branches:u:      ffffb22082e0 strcmp+0xa0 (/lib/aarch64-linux-gnu/ld-2.27.so)
 	#   touch  6512          1         branches:u:      ffffb2208320 strcmp+0xe0 (/lib/aarch64-linux-gnu/ld-2.27.so)
 	perf script -F,-time -i ${perfdata} 2>&1 | \
-		egrep " +$1 +[0-9]+ .* +branches:(.*:)? +" > /dev/null 2>&1
+		grep -E " +$1 +[0-9]+ .* +branches:(.*:)? +" > /dev/null 2>&1
 }
 
 perf_report_branch_samples() {
@@ -60,7 +60,7 @@ perf_report_branch_samples() {
 	#    7.71%     7.71%  touch    libc-2.27.so      [.] getenv
 	#    2.59%     2.59%  touch    ld-2.27.so        [.] strcmp
 	perf report --stdio -i ${perfdata} 2>&1 | \
-		egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
+		grep -E " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
 }
 
 perf_report_instruction_samples() {
@@ -71,7 +71,7 @@ perf_report_instruction_samples() {
 	#    5.80%  touch    libc-2.27.so   [.] getenv
 	#    4.35%  touch    ld-2.27.so     [.] _dl_fixup
 	perf report --itrace=i20i --stdio -i ${perfdata} 2>&1 | \
-		egrep " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1
+		grep -E " +[0-9]+\.[0-9]+% +$1" > /dev/null 2>&1
 }
 
 arm_cs_report() {
@@ -87,7 +87,7 @@ is_device_sink() {
 	# If the node of "enable_sink" is existed under the device path, this
 	# means the device is a sink device.  Need to exclude 'tpiu' since it
 	# cannot support perf PMU.
-	echo "$1" | egrep -q -v "tpiu"
+	echo "$1" | grep -E -q -v "tpiu"
 
 	if [ $? -eq 0 -a -e "$1/enable_sink" ]; then
 
diff --git a/tools/perf/tests/shell/test_arm_spe.sh b/tools/perf/tests/shell/test_arm_spe.sh
index 0d47479adba88..aa094d71f5b46 100755
--- a/tools/perf/tests/shell/test_arm_spe.sh
+++ b/tools/perf/tests/shell/test_arm_spe.sh
@@ -9,7 +9,7 @@
 # German Gomez <german.gomez@arm.com>, 2021
 
 skip_if_no_arm_spe_event() {
-	perf list | egrep -q 'arm_spe_[0-9]+//' && return 0
+	perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0
 
 	# arm_spe event doesn't exist
 	return 2
@@ -51,7 +51,7 @@ perf_script_samples() {
 	#	dd  3048 [002]          1    tlb-access:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
 	#	dd  3048 [002]          1        memory:      ffffaa64999c __GI___libc_write+0x3c (/lib/aarch64-linux-gnu/libc-2.27.so)
 	perf script -F,-time -i ${perfdata} 2>&1 | \
-		egrep " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1
+		grep -E " +$1 +[0-9]+ .* +${events}:(.*:)? +" > /dev/null 2>&1
 }
 
 perf_report_samples() {
@@ -62,7 +62,7 @@ perf_report_samples() {
 	#    7.71%     7.71%  dd    libc-2.27.so      [.] getenv
 	#    2.59%     2.59%  dd    ld-2.27.so        [.] strcmp
 	perf report --stdio -i ${perfdata} 2>&1 | \
-		egrep " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
+		grep -E " +[0-9]+\.[0-9]+% +[0-9]+\.[0-9]+% +$1 " > /dev/null 2>&1
 }
 
 arm_spe_snapshot_test() {
diff --git a/tools/perf/tests/shell/test_arm_spe_fork.sh b/tools/perf/tests/shell/test_arm_spe_fork.sh
index da810e1b2b9e7..fad361675a1d1 100755
--- a/tools/perf/tests/shell/test_arm_spe_fork.sh
+++ b/tools/perf/tests/shell/test_arm_spe_fork.sh
@@ -5,7 +5,7 @@
 # German Gomez <german.gomez@arm.com>, 2022
 
 skip_if_no_arm_spe_event() {
-	perf list | egrep -q 'arm_spe_[0-9]+//' && return 0
+	perf list | grep -E -q 'arm_spe_[0-9]+//' && return 0
 	return 2
 }
 
diff --git a/tools/perf/tests/shell/test_brstack.sh b/tools/perf/tests/shell/test_brstack.sh
index 5856639b565b1..59195eb80052e 100755
--- a/tools/perf/tests/shell/test_brstack.sh
+++ b/tools/perf/tests/shell/test_brstack.sh
@@ -30,14 +30,14 @@ test_user_branches() {
 	# 	brstack_foo+0x14/brstack_bar+0x40/P/-/-/0/CALL
 
 	set -x
-	egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$"		$TMPDIR/perf.script
-	egrep -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$"	$TMPDIR/perf.script
-	egrep -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$"		$TMPDIR/perf.script
+	grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/IND_CALL$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack_bench\+[^ ]*/brstack_foo\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bar\+[^ ]*/CALL$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack_bar\+[^ ]*/brstack_foo\+[^ ]*/RET$"		$TMPDIR/perf.script
+	grep -E -m1 "^brstack_foo\+[^ ]*/brstack_bench\+[^ ]*/RET$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack_bench\+[^ ]*/brstack_bench\+[^ ]*/COND$"	$TMPDIR/perf.script
+	grep -E -m1 "^brstack\+[^ ]*/brstack\+[^ ]*/UNCOND$"		$TMPDIR/perf.script
 	set +x
 
 	# some branch types are still not being tested:
@@ -57,7 +57,7 @@ test_filter() {
 
 	# fail if we find any branch type that doesn't match any of the expected ones
 	# also consider UNKNOWN branch types (-)
-	if egrep -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then
+	if grep -E -vm1 "^[^ ]*/($expect|-|( *))$" $TMPDIR/perf.script; then
 		return 1
 	fi
 }
diff --git a/tools/perf/tests/shell/test_data_symbol.sh b/tools/perf/tests/shell/test_data_symbol.sh
index d871e6c743efd..69bb6fe86c507 100755
--- a/tools/perf/tests/shell/test_data_symbol.sh
+++ b/tools/perf/tests/shell/test_data_symbol.sh
@@ -5,7 +5,7 @@
 # Leo Yan <leo.yan@linaro.org>, 2022
 
 skip_if_no_mem_event() {
-	perf mem record -e list 2>&1 | egrep -q 'available' && return 0
+	perf mem record -e list 2>&1 | grep -E -q 'available' && return 0
 	return 2
 }
 
@@ -48,7 +48,7 @@ echo "Recording workload..."
 # perf mem/c2c internally uses IBS PMU on AMD CPU which doesn't support
 # user/kernel filtering and per-process monitoring, spin program on
 # specific CPU and test in per-CPU mode.
-is_amd=$(egrep -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
+is_amd=$(grep -E -c 'vendor_id.*AuthenticAMD' /proc/cpuinfo)
 if (($is_amd >= 1)); then
 	perf mem record -o ${PERF_DATA} -C 0 -- taskset -c 0 $TEST_PROGRAM &
 else
diff --git a/tools/perf/tests/shell/test_java_symbol.sh b/tools/perf/tests/shell/test_java_symbol.sh
index f221225808a38..90cea88119268 100755
--- a/tools/perf/tests/shell/test_java_symbol.sh
+++ b/tools/perf/tests/shell/test_java_symbol.sh
@@ -65,7 +65,7 @@ fi
 #   8.18%  jshell           jitted-50116-29.so    [.] Interpreter
 #   0.75%  Thread-1         jitted-83602-1670.so  [.] jdk.internal.jimage.BasicImageReader.getString(int)
 perf report --stdio -i ${PERF_INJ_DATA} 2>&1 | \
-	egrep " +[0-9]+\.[0-9]+% .* (Interpreter|jdk\.internal).*" > /dev/null 2>&1
+	grep -E " +[0-9]+\.[0-9]+% .* (Interpreter|jdk\.internal).*" > /dev/null 2>&1
 
 if [ $? -ne 0 ]; then
 	echo "Fail to find java symbols"
diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
index 3d60e993d2b87..0a4bac3dd77eb 100755
--- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh
+++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh
@@ -18,9 +18,9 @@ skip_if_no_perf_trace || exit 2
 . $(dirname $0)/lib/probe_vfs_getname.sh
 
 trace_open_vfs_getname() {
-	evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
+	evts=$(echo $(perf list syscalls:sys_enter_open* 2>/dev/null | grep -E 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/')
 	perf trace -e $evts touch $file 2>&1 | \
-	egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
+	grep -E " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$"
 }
 
 
diff --git a/tools/perf/trace/beauty/fadvise.sh b/tools/perf/trace/beauty/fadvise.sh
index b15ae38751677..4d3dd6e56dedc 100755
--- a/tools/perf/trace/beauty/fadvise.sh
+++ b/tools/perf/trace/beauty/fadvise.sh
@@ -6,7 +6,7 @@
 printf "static const char *fadvise_advices[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+POSIX_FADV_(\w+)[[:space:]]+([[:digit:]]+)[[:space:]]+.*'
 
-egrep $regex ${header_dir}/fadvise.h | \
+grep -E $regex ${header_dir}/fadvise.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n" | \
 	grep -v "[6].*DONTNEED" | grep -v "[7].*NOREUSE"
diff --git a/tools/perf/trace/beauty/fsmount.sh b/tools/perf/trace/beauty/fsmount.sh
index 615cc0fcf4f9e..cba8897a751fd 100755
--- a/tools/perf/trace/beauty/fsmount.sh
+++ b/tools/perf/trace/beauty/fsmount.sh
@@ -16,7 +16,7 @@ linux_mount=${linux_header_dir}/mount.h
 
 printf "static const char *fsmount_attr_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOUNT_ATTR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
+grep -E $regex ${linux_mount} | grep -v MOUNT_ATTR_RELATIME | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/fspick.sh b/tools/perf/trace/beauty/fspick.sh
index b220e07ef452c..1f088329b96ef 100755
--- a/tools/perf/trace/beauty/fspick.sh
+++ b/tools/perf/trace/beauty/fspick.sh
@@ -11,7 +11,7 @@ linux_mount=${linux_header_dir}/mount.h
 
 printf "static const char *fspick_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+FSPICK_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${linux_mount} | \
+grep -E $regex ${linux_mount} | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/kcmp_type.sh b/tools/perf/trace/beauty/kcmp_type.sh
index df8b17486d575..5df9dcbd674e2 100755
--- a/tools/perf/trace/beauty/kcmp_type.sh
+++ b/tools/perf/trace/beauty/kcmp_type.sh
@@ -5,7 +5,7 @@
 
 printf "static const char *kcmp_types[] = {\n"
 regex='^[[:space:]]+(KCMP_(\w+)),'
-egrep $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \
+grep -E $regex ${header_dir}/kcmp.h | grep -v KCMP_TYPES, | \
 	sed -r "s/$regex/\1 \2/g" | \
 	xargs printf "\t[%s]\t= \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/kvm_ioctl.sh b/tools/perf/trace/beauty/kvm_ioctl.sh
index 4ce54f5bf7564..bd0efd45fa9db 100755
--- a/tools/perf/trace/beauty/kvm_ioctl.sh
+++ b/tools/perf/trace/beauty/kvm_ioctl.sh
@@ -5,8 +5,8 @@
 
 printf "static const char *kvm_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+KVM_(\w+)[[:space:]]+_IO[RW]*\([[:space:]]*KVMIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${header_dir}/kvm.h	| \
+grep -E $regex ${header_dir}/kvm.h	| \
 	sed -r "s/$regex/\2 \1/g"	| \
-	egrep -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \
+	grep -E -v " ((ARM|PPC|S390)_|[GS]ET_(DEBUGREGS|PIT2|XSAVE|TSC_KHZ)|CREATE_SPAPR_TCE_64)" | \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/madvise_behavior.sh b/tools/perf/trace/beauty/madvise_behavior.sh
index 4527d290cdfc6..c659c33bfc076 100755
--- a/tools/perf/trace/beauty/madvise_behavior.sh
+++ b/tools/perf/trace/beauty/madvise_behavior.sh
@@ -5,7 +5,7 @@
 
 printf "static const char *madvise_advices[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MADV_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
-egrep $regex ${header_dir}/mman-common.h | \
+grep -E $regex ${header_dir}/mman-common.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap_flags.sh b/tools/perf/trace/beauty/mmap_flags.sh
index 76825710c725f..3022597c8c178 100755
--- a/tools/perf/trace/beauty/mmap_flags.sh
+++ b/tools/perf/trace/beauty/mmap_flags.sh
@@ -15,26 +15,26 @@ fi
 linux_mman=${linux_header_dir}/mman.h
 arch_mman=${arch_header_dir}/mman.h
 
-# those in egrep -vw are flags, we want just the bits
+# those in grep -E -vw are flags, we want just the bits
 
 printf "static const char *mmap_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MAP_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
-egrep -q $regex ${arch_mman} && \
-(egrep $regex ${arch_mman} | \
+grep -E -q $regex ${arch_mman} && \
+(grep -E $regex ${arch_mman} | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-egrep -q $regex ${linux_mman} && \
-(egrep $regex ${linux_mman} | \
-	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+grep -E -q $regex ${linux_mman} && \
+(grep -E $regex ${linux_mman} | \
+	grep -E -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g" | \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
-(egrep $regex ${header_dir}/mman-common.h | \
-	egrep -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
+([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
+(grep -E $regex ${header_dir}/mman-common.h | \
+	grep -E -vw 'MAP_(UNINITIALIZED|TYPE|SHARED_VALIDATE)' | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
-(egrep $regex ${header_dir}/mman.h | \
+([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.h>.*' ${arch_mman}) &&
+(grep -E $regex ${header_dir}/mman.h | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MAP_%s\n#define MAP_%s %s\n#endif\n")
 printf "};\n"
diff --git a/tools/perf/trace/beauty/mmap_prot.sh b/tools/perf/trace/beauty/mmap_prot.sh
index 664d8d534a509..49e8c865214b3 100755
--- a/tools/perf/trace/beauty/mmap_prot.sh
+++ b/tools/perf/trace/beauty/mmap_prot.sh
@@ -17,14 +17,14 @@ prefix="PROT"
 
 printf "static const char *mmap_prot[] = {\n"
 regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+%s_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*' ${prefix}`
-([ ! -f ${arch_mman} ] || egrep -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
-(egrep $regex ${common_mman} | \
-	egrep -vw PROT_NONE | \
+([ ! -f ${arch_mman} ] || grep -E -q '#[[:space:]]*include[[:space:]]+.*uapi/asm-generic/mman.*' ${arch_mman}) &&
+(grep -E $regex ${common_mman} | \
+	grep -E -vw PROT_NONE | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
-[ -f ${arch_mman} ] && egrep -q $regex ${arch_mman} && 
-(egrep $regex ${arch_mman} | \
-	egrep -vw PROT_NONE | \
+[ -f ${arch_mman} ] && grep -E -q $regex ${arch_mman} &&
+(grep -E $regex ${arch_mman} | \
+	grep -E -vw PROT_NONE | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef ${prefix}_%s\n#define ${prefix}_%s %s\n#endif\n")
 printf "};\n"
diff --git a/tools/perf/trace/beauty/mount_flags.sh b/tools/perf/trace/beauty/mount_flags.sh
index 847850b2ef6cd..730099a9a67c1 100755
--- a/tools/perf/trace/beauty/mount_flags.sh
+++ b/tools/perf/trace/beauty/mount_flags.sh
@@ -5,11 +5,11 @@
 
 printf "static const char *mount_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+([[:digit:]]+)[[:space:]]*.*'
-egrep $regex ${header_dir}/mount.h | egrep -v '(MSK|VERBOSE|MGC_VAL)\>' | \
+grep -E $regex ${header_dir}/mount.h | grep -E -v '(MSK|VERBOSE|MGC_VAL)\>' | \
 	sed -r "s/$regex/\2 \2 \1/g" | sort -n | \
 	xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MS_([[:alnum:]_]+)[[:space:]]+\(1<<([[:digit:]]+)\)[[:space:]]*.*'
-egrep $regex ${header_dir}/mount.h | \
+grep -E $regex ${header_dir}/mount.h | \
 	sed -r "s/$regex/\2 \1/g" | \
 	xargs printf "\t[%s + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/move_mount_flags.sh b/tools/perf/trace/beauty/move_mount_flags.sh
index 4b1d9acc0bd01..32e552faf37a8 100755
--- a/tools/perf/trace/beauty/move_mount_flags.sh
+++ b/tools/perf/trace/beauty/move_mount_flags.sh
@@ -11,7 +11,7 @@ linux_mount=${linux_header_dir}/mount.h
 
 printf "static const char *move_mount_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MOVE_MOUNT_([^_]+_[[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${linux_mount} | \
+grep -E $regex ${linux_mount} | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/mremap_flags.sh b/tools/perf/trace/beauty/mremap_flags.sh
index d58182300bb13..4d018350183ba 100755
--- a/tools/perf/trace/beauty/mremap_flags.sh
+++ b/tools/perf/trace/beauty/mremap_flags.sh
@@ -11,8 +11,8 @@ linux_mman=${linux_header_dir}/mman.h
 
 printf "static const char *mremap_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MREMAP_([[:alnum:]_]+)[[:space:]]+((0x)?[[:xdigit:]]+)[[:space:]]*.*'
-egrep -q $regex ${linux_mman} && \
-(egrep $regex ${linux_mman} | \
+grep -E -q $regex ${linux_mman} && \
+(grep -E $regex ${linux_mman} | \
 	sed -r "s/$regex/\2 \1 \1 \1 \2/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n#ifndef MREMAP_%s\n#define MREMAP_%s %s\n#endif\n")
 printf "};\n"
diff --git a/tools/perf/trace/beauty/perf_ioctl.sh b/tools/perf/trace/beauty/perf_ioctl.sh
index 9aabd9743ef6e..06c2774f32625 100755
--- a/tools/perf/trace/beauty/perf_ioctl.sh
+++ b/tools/perf/trace/beauty/perf_ioctl.sh
@@ -5,7 +5,7 @@
 
 printf "static const char *perf_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+PERF_EVENT_IOC_(\w+)[[:space:]]+_IO[RW]*[[:space:]]*\([[:space:]]*.\$.[[:space:]]*,[[:space:]]*([[:digit:]]+).*'
-egrep $regex ${header_dir}/perf_event.h	| \
+grep -E $regex ${header_dir}/perf_event.h	| \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
index f8f1b560cf8a4..74da888bcdd3b 100755
--- a/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
+++ b/tools/perf/trace/beauty/pkey_alloc_access_rights.sh
@@ -5,7 +5,7 @@
 
 printf "static const char *pkey_alloc_access_rights[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+PKEY_([[:alnum:]_]+)[[:space:]]+(0x[[:xdigit:]]+)[[:space:]]*'
-egrep $regex ${header_dir}/mman-common.h	| \
+grep -E $regex ${header_dir}/mman-common.h	| \
 	sed -r "s/$regex/\2 \2 \1/g"	| \
 	sort | xargs printf "\t[%s ? (ilog2(%s) + 1) : 0] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/prctl_option.sh b/tools/perf/trace/beauty/prctl_option.sh
index 3d278785fe574..8059342ca4126 100755
--- a/tools/perf/trace/beauty/prctl_option.sh
+++ b/tools/perf/trace/beauty/prctl_option.sh
@@ -5,14 +5,14 @@
 
 printf "static const char *prctl_options[] = {\n"
 regex='^#define[[:space:]]{1}PR_(\w+)[[:space:]]*([[:xdigit:]]+)([[:space:]]*\/.*)?$'
-egrep $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
+grep -E $regex ${header_dir}/prctl.h | grep -v PR_SET_PTRACER | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
 
 printf "static const char *prctl_set_mm_options[] = {\n"
 regex='^#[[:space:]]+define[[:space:]]+PR_SET_MM_(\w+)[[:space:]]*([[:digit:]]+).*'
-egrep $regex ${header_dir}/prctl.h | \
+grep -E $regex ${header_dir}/prctl.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/rename_flags.sh b/tools/perf/trace/beauty/rename_flags.sh
index 54c87c782ab28..94bf7f45d28e6 100755
--- a/tools/perf/trace/beauty/rename_flags.sh
+++ b/tools/perf/trace/beauty/rename_flags.sh
@@ -8,8 +8,8 @@ fs_header=${header_dir}/fs.h
 
 printf "static const char *rename_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+RENAME_([[:alnum:]_]+)[[:space:]]+\(1[[:space:]]*<<[[:space:]]*([[:xdigit:]]+)[[:space:]]*\)[[:space:]]*.*'
-egrep -q $regex ${fs_header} && \
-(egrep $regex ${fs_header} | \
+grep -E -q $regex ${fs_header} && \
+(grep -E $regex ${fs_header} | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[%d + 1] = \"%s\",\n")
 printf "};\n"
diff --git a/tools/perf/trace/beauty/sockaddr.sh b/tools/perf/trace/beauty/sockaddr.sh
index 3820e5c822931..a59827ea14370 100755
--- a/tools/perf/trace/beauty/sockaddr.sh
+++ b/tools/perf/trace/beauty/sockaddr.sh
@@ -17,8 +17,8 @@ printf "static const char *socket_families[] = {\n"
 # #define AF_LOCAL	1	/* POSIX name for AF_UNIX	*/
 regex='^#define[[:space:]]+AF_(\w+)[[:space:]]+([[:digit:]]+).*'
 
-egrep $regex ${header_dir}/socket.h | \
+grep -E $regex ${header_dir}/socket.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[%s] = \"%s\",\n" | \
-	egrep -v "\"(UNIX|MAX)\""
+	grep -E -v "\"(UNIX|MAX)\""
 printf "};\n"
diff --git a/tools/perf/trace/beauty/socket.sh b/tools/perf/trace/beauty/socket.sh
index 76330acb27e58..8bc7ba62203e4 100755
--- a/tools/perf/trace/beauty/socket.sh
+++ b/tools/perf/trace/beauty/socket.sh
@@ -12,7 +12,7 @@ fi
 printf "static const char *socket_ipproto[] = {\n"
 ipproto_regex='^[[:space:]]+IPPROTO_(\w+)[[:space:]]+=[[:space:]]+([[:digit:]]+),.*'
 
-egrep $ipproto_regex ${uapi_header_dir}/in.h | \
+grep -E $ipproto_regex ${uapi_header_dir}/in.h | \
 	sed -r "s/$ipproto_regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
@@ -20,7 +20,7 @@ printf "};\n\n"
 printf "static const char *socket_level[] = {\n"
 socket_level_regex='^#define[[:space:]]+SOL_(\w+)[[:space:]]+([[:digit:]]+)([[:space:]]+\/.*)?'
 
-egrep $socket_level_regex ${beauty_header_dir}/socket.h | \
+grep -E $socket_level_regex ${beauty_header_dir}/socket.h | \
 	sed -r "s/$socket_level_regex/\2 \1/g"	| \
 	sort -n | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
diff --git a/tools/perf/trace/beauty/sync_file_range.sh b/tools/perf/trace/beauty/sync_file_range.sh
index 7a9282d04e44b..90bf633be8799 100755
--- a/tools/perf/trace/beauty/sync_file_range.sh
+++ b/tools/perf/trace/beauty/sync_file_range.sh
@@ -11,7 +11,7 @@ linux_fs=${linux_header_dir}/fs.h
 
 printf "static const char *sync_file_range_flags[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+SYNC_FILE_RANGE_([[:alnum:]_]+)[[:space:]]+([[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${linux_fs} | \
+grep -E $regex ${linux_fs} | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	xargs printf "\t[ilog2(%s) + 1] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
index f920003723b39..eed9ce0fcbe6b 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_irq_vectors.sh
@@ -15,12 +15,12 @@ x86_irq_vectors=${arch_x86_header_dir}/irq_vectors.h
 # the time of writing of this script was: IRQ_MOVE_CLEANUP_VECTOR.
 
 first_external_regex='^#define[[:space:]]+FIRST_EXTERNAL_VECTOR[[:space:]]+(0x[[:xdigit:]]+)$'
-first_external_vector=$(egrep ${first_external_regex} ${x86_irq_vectors} | sed -r "s/${first_external_regex}/\1/g")
+first_external_vector=$(grep -E ${first_external_regex} ${x86_irq_vectors} | sed -r "s/${first_external_regex}/\1/g")
 
 printf "static const char *x86_irq_vectors[] = {\n"
 regex='^#define[[:space:]]+([[:alnum:]_]+)_VECTOR[[:space:]]+(0x[[:xdigit:]]+)$'
 sed -r "s/FIRST_EXTERNAL_VECTOR/${first_external_vector}/g" ${x86_irq_vectors} | \
-egrep ${regex} | \
+grep -E ${regex} | \
 	sed -r "s/${regex}/\2 \1/g" | sort -n | \
 	xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
index 9b0614a87831b..0078689963e0f 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h
 
 printf "static const char *x86_MSRs[] = {\n"
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \
+grep -E $regex ${x86_msr_index} | grep -E -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \
 	sed -r "s/$regex/\2 \1/g" | sort -n | \
 	xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
@@ -23,18 +23,18 @@ printf "};\n\n"
 # Remove MSR_K6_WHCR, clashes with MSR_LSTAR
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0000[[:xdigit:]]+)[[:space:]]*.*'
 printf "#define x86_64_specific_MSRs_offset "
-egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
+grep -E $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
 printf "static const char *x86_64_specific_MSRs[] = {\n"
-egrep $regex ${x86_msr_index} | \
-	sed -r "s/$regex/\2 \1/g" | egrep -vw 'K6_WHCR' | sort -n | \
+grep -E $regex ${x86_msr_index} | \
+	sed -r "s/$regex/\2 \1/g" | grep -E -vw 'K6_WHCR' | sort -n | \
 	xargs printf "\t[%s - x86_64_specific_MSRs_offset] = \"%s\",\n"
 printf "};\n\n"
 
 regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0xc0010[[:xdigit:]]+)[[:space:]]*.*'
 printf "#define x86_AMD_V_KVM_MSRs_offset "
-egrep $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
+grep -E $regex ${x86_msr_index} | sed -r "s/$regex/\2/g" | sort -n | head -1
 printf "static const char *x86_AMD_V_KVM_MSRs[] = {\n"
-egrep $regex ${x86_msr_index} | \
+grep -E $regex ${x86_msr_index} | \
 	sed -r "s/$regex/\2 \1/g" | sort -n | \
 	xargs printf "\t[%s - x86_AMD_V_KVM_MSRs_offset] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/usbdevfs_ioctl.sh b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
index aa597ae537470..b39cfb3720b80 100755
--- a/tools/perf/trace/beauty/usbdevfs_ioctl.sh
+++ b/tools/perf/trace/beauty/usbdevfs_ioctl.sh
@@ -8,14 +8,14 @@
 
 printf "static const char *usbdevfs_ioctl_cmds[] = {\n"
 regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)(\(\w+\))?[[:space:]]+_IO[CWR]{0,2}\([[:space:]]*(_IOC_\w+,[[:space:]]*)?'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
-egrep "$regex" ${header_dir}/usbdevice_fs.h | egrep -v 'USBDEVFS_\w+32[[:space:]]' | \
+grep -E "$regex" ${header_dir}/usbdevice_fs.h | grep -E -v 'USBDEVFS_\w+32[[:space:]]' | \
 	sed -r "s/$regex/\4 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n\n"
 printf "#if 0\n"
 printf "static const char *usbdevfs_ioctl_32_cmds[] = {\n"
 regex="^#[[:space:]]*define[[:space:]]+USBDEVFS_(\w+)[[:space:]]+_IO[WR]{0,2}\([[:space:]]*'U'[[:space:]]*,[[:space:]]*([[:digit:]]+).*"
-egrep $regex ${header_dir}/usbdevice_fs.h | egrep 'USBDEVFS_\w+32[[:space:]]' | \
+grep -E $regex ${header_dir}/usbdevice_fs.h | grep -E 'USBDEVFS_\w+32[[:space:]]' | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
index 439773daaf77d..2dd0a3b1f55aa 100755
--- a/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
+++ b/tools/perf/trace/beauty/vhost_virtio_ioctl.sh
@@ -5,14 +5,14 @@
 
 printf "static const char *vhost_virtio_ioctl_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${header_dir}/vhost.h | \
+grep -E $regex ${header_dir}/vhost.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
 
 printf "static const char *vhost_virtio_ioctl_read_cmds[] = {\n"
 regex='^#[[:space:]]*define[[:space:]]+VHOST_(\w+)[[:space:]]+_IOW?R\([[:space:]]*VHOST_VIRTIO[[:space:]]*,[[:space:]]*(0x[[:xdigit:]]+).*'
-egrep $regex ${header_dir}/vhost.h | \
+grep -E $regex ${header_dir}/vhost.h | \
 	sed -r "s/$regex/\2 \1/g"	| \
 	sort | xargs printf "\t[%s] = \"%s\",\n"
 printf "};\n"
diff --git a/tools/perf/trace/beauty/x86_arch_prctl.sh b/tools/perf/trace/beauty/x86_arch_prctl.sh
index 7372d3cab9590..57fa6aaffe700 100755
--- a/tools/perf/trace/beauty/x86_arch_prctl.sh
+++ b/tools/perf/trace/beauty/x86_arch_prctl.sh
@@ -15,8 +15,8 @@ print_range () {
 	printf "static const char *x86_arch_prctl_codes_%d[] = {\n" $idx
 	regex=`printf '^[[:space:]]*#[[:space:]]*define[[:space:]]+ARCH_([[:alnum:]_]+)[[:space:]]+(%s[[:xdigit:]]+).*' ${prefix}`
 	fmt="\t[%#x - ${first_entry}]= \"%s\",\n"
-	egrep -q $regex ${prctl_arch_header} && \
-	(egrep $regex ${prctl_arch_header} | \
+	grep -E -q $regex ${prctl_arch_header} && \
+	(grep -E $regex ${prctl_arch_header} | \
 		sed -r "s/$regex/\2 \1/g"	| \
 		xargs printf "$fmt")
 	printf "};\n\n"
-- 
GitLab


From 549a715b98a13c6d05452be3ad37e980087bb081 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Wed, 7 Dec 2022 00:09:59 +0000
Subject: [PATCH 3937/4122] KVM: x86: Add proper ReST tables for userspace MSR
 exits/flags

Add ReST formatting to the set of userspace MSR exits/flags so that the
resulting HTML docs generate a table instead of malformed gunk.  This
also fixes a warning that was introduced by a recent cleanup of the
relevant documentation (yay copy+paste).

 >> Documentation/virt/kvm/api.rst:7287: WARNING: Block quote ends
    without a blank line; unexpected unindent.

Fixes: 1ae099540e8c ("KVM: x86: Allow deflecting unknown MSR accesses to user space")
Fixes: 1f158147181b ("KVM: x86: Clean up KVM_CAP_X86_USER_SPACE_MSR documentation")
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Message-Id: <20221207000959.2035098-1-seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virt/kvm/api.rst | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index c618fae44ad7f..778c6460d1de6 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6455,9 +6455,11 @@ The "reason" field specifies why the MSR interception occurred. Userspace will
 only receive MSR exits when a particular reason was requested during through
 ENABLE_CAP. Currently valid exit reasons are:
 
-	KVM_MSR_EXIT_REASON_UNKNOWN - access to MSR that is unknown to KVM
-	KVM_MSR_EXIT_REASON_INVAL - access to invalid MSRs or reserved bits
-	KVM_MSR_EXIT_REASON_FILTER - access blocked by KVM_X86_SET_MSR_FILTER
+============================ ========================================
+ KVM_MSR_EXIT_REASON_UNKNOWN access to MSR that is unknown to KVM
+ KVM_MSR_EXIT_REASON_INVAL   access to invalid MSRs or reserved bits
+ KVM_MSR_EXIT_REASON_FILTER  access blocked by KVM_X86_SET_MSR_FILTER
+============================ ========================================
 
 For KVM_EXIT_X86_RDMSR, the "index" field tells userspace which MSR the guest
 wants to read. To respond to this request with a successful read, userspace
@@ -7256,11 +7258,13 @@ to inform a user that an MSR was not emulated/virtualized by KVM.
 
 The valid mask flags are:
 
-	KVM_MSR_EXIT_REASON_UNKNOWN - intercept accesses to unknown (to KVM) MSRs
-	KVM_MSR_EXIT_REASON_INVAL   - intercept accesses that are architecturally
-                                invalid according to the vCPU model and/or mode
-	KVM_MSR_EXIT_REASON_FILTER  - intercept accesses that are denied by userspace
-                                via KVM_X86_SET_MSR_FILTER
+============================ ===============================================
+ KVM_MSR_EXIT_REASON_UNKNOWN intercept accesses to unknown (to KVM) MSRs
+ KVM_MSR_EXIT_REASON_INVAL   intercept accesses that are architecturally
+                             invalid according to the vCPU model and/or mode
+ KVM_MSR_EXIT_REASON_FILTER  intercept accesses that are denied by userspace
+                             via KVM_X86_SET_MSR_FILTER
+============================ ===============================================
 
 7.22 KVM_CAP_X86_BUS_LOCK_EXIT
 -------------------------------
-- 
GitLab


From 025e3b507a3a8e1ee96a3112bb67495c77d6cdb6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Nov 2022 17:09:46 +0200
Subject: [PATCH 3938/4122] fbdev: ssd1307fb: Drop optional dependency

Only a single out of three devices need a PWM, so from driver it's
optional. Moreover it's a single driver in the entire kernel that
currently selects PWM. Unfortunately this selection is a root cause
of the circular dependencies when we want to enable optional PWM
for some other drivers that select GPIOLIB.

Fixes: a2ed00da5047 ("drivers/video: add support for the Solomon SSD1307 OLED Controller")
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 71019b167f8b0..66f36b69e8f39 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -2254,7 +2254,6 @@ config FB_SSD1307
 	select FB_SYS_COPYAREA
 	select FB_SYS_IMAGEBLIT
 	select FB_DEFERRED_IO
-	select PWM
 	select FB_BACKLIGHT
 	help
 	  This driver implements support for the Solomon SSD1307
-- 
GitLab


From 6273c43769cbd4451b03d239dc16d5c54bb7279a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 1 Nov 2022 17:09:47 +0200
Subject: [PATCH 3939/4122] fbdev: ssd1307fb: Drop duplicate NULL checks for
 PWM APIs

pwm_disable() and pwm_put() are NULL-aware, no need to
duplicate the check in the caller.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/ssd1307fb.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/video/fbdev/ssd1307fb.c b/drivers/video/fbdev/ssd1307fb.c
index 5c891aa00d596..046b9990d27cd 100644
--- a/drivers/video/fbdev/ssd1307fb.c
+++ b/drivers/video/fbdev/ssd1307fb.c
@@ -803,10 +803,8 @@ static int ssd1307fb_probe(struct i2c_client *client)
 bl_init_error:
 	unregister_framebuffer(info);
 panel_init_error:
-	if (par->device_info->need_pwm) {
-		pwm_disable(par->pwm);
-		pwm_put(par->pwm);
-	}
+	pwm_disable(par->pwm);
+	pwm_put(par->pwm);
 regulator_enable_error:
 	if (par->vbat_reg)
 		regulator_disable(par->vbat_reg);
@@ -827,10 +825,8 @@ static void ssd1307fb_remove(struct i2c_client *client)
 	backlight_device_unregister(info->bl_dev);
 
 	unregister_framebuffer(info);
-	if (par->device_info->need_pwm) {
-		pwm_disable(par->pwm);
-		pwm_put(par->pwm);
-	}
+	pwm_disable(par->pwm);
+	pwm_put(par->pwm);
 	if (par->vbat_reg)
 		regulator_disable(par->vbat_reg);
 	fb_deferred_io_cleanup(info);
-- 
GitLab


From 28f24e90ffc4bf2199c91783b98dfc71de1e3a2f Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Tue, 1 Nov 2022 10:29:47 +0000
Subject: [PATCH 3940/4122] fbdev: omapfb: remove redundant variable checksum

Variable checksum is being used to accumulate values however
it is never read or used afterwards. It is redundant and can
be removed.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/omap2/omapfb/dss/hdmi5_core.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5_core.c b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5_core.c
index cb63bc0e92caa..b33f62c5cb220 100644
--- a/drivers/video/fbdev/omap2/omapfb/dss/hdmi5_core.c
+++ b/drivers/video/fbdev/omap2/omapfb/dss/hdmi5_core.c
@@ -129,7 +129,6 @@ static int hdmi_core_ddc_edid(struct hdmi_core_data *core, u8 *pedid, u8 ext)
 {
 	void __iomem *base = core->base;
 	u8 cur_addr;
-	char checksum = 0;
 	const int retries = 1000;
 	u8 seg_ptr = ext / 2;
 	u8 edidbase = ((ext % 2) * 0x80);
@@ -178,7 +177,6 @@ static int hdmi_core_ddc_edid(struct hdmi_core_data *core, u8 *pedid, u8 ext)
 		}
 
 		pedid[cur_addr] = REG_GET(base, HDMI_CORE_I2CM_DATAI, 7, 0);
-		checksum += pedid[cur_addr];
 	}
 
 	return 0;
-- 
GitLab


From 257030d4ee7c76892e9a4e5bf94f8c0fc41c6e46 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:30 -0700
Subject: [PATCH 3941/4122] fbdev: omapfb: connector-hdmi: switch to using
 gpiod API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omap2/omapfb/displays/connector-hdmi.c    | 49 ++++++-------------
 1 file changed, 14 insertions(+), 35 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
index 670b9c6eb5a9c..8f9ff9fb4ca4c 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-hdmi.c
@@ -6,11 +6,12 @@
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
 #include <drm/drm_edid.h>
 
@@ -41,7 +42,7 @@ struct panel_drv_data {
 
 	struct omap_video_timings timings;
 
-	int hpd_gpio;
+	struct gpio_desc *hpd_gpio;
 };
 
 #define to_panel_data(x) container_of(x, struct panel_drv_data, dssdev)
@@ -155,8 +156,8 @@ static bool hdmic_detect(struct omap_dss_device *dssdev)
 	struct panel_drv_data *ddata = to_panel_data(dssdev);
 	struct omap_dss_device *in = ddata->in;
 
-	if (gpio_is_valid(ddata->hpd_gpio))
-		return gpio_get_value_cansleep(ddata->hpd_gpio);
+	if (ddata->hpd_gpio)
+		return gpiod_get_value_cansleep(ddata->hpd_gpio);
 	else
 		return in->ops.hdmi->detect(in);
 }
@@ -197,31 +198,6 @@ static struct omap_dss_driver hdmic_driver = {
 	.set_hdmi_infoframe	= hdmic_set_infoframe,
 };
 
-static int hdmic_probe_of(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct device_node *node = pdev->dev.of_node;
-	struct omap_dss_device *in;
-	int gpio;
-
-	/* HPD GPIO */
-	gpio = of_get_named_gpio(node, "hpd-gpios", 0);
-	if (gpio_is_valid(gpio))
-		ddata->hpd_gpio = gpio;
-	else
-		ddata->hpd_gpio = -ENODEV;
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&pdev->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	return 0;
-}
-
 static int hdmic_probe(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata;
@@ -238,15 +214,18 @@ static int hdmic_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ddata);
 	ddata->dev = &pdev->dev;
 
-	r = hdmic_probe_of(pdev);
+	ddata->hpd_gpio = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN);
+	r = PTR_ERR_OR_ZERO(ddata->hpd_gpio);
 	if (r)
 		return r;
 
-	if (gpio_is_valid(ddata->hpd_gpio)) {
-		r = devm_gpio_request_one(&pdev->dev, ddata->hpd_gpio,
-				GPIOF_DIR_IN, "hdmi_hpd");
-		if (r)
-			goto err_reg;
+	gpiod_set_consumer_name(ddata->hpd_gpio, "hdmi_hpd");
+
+	ddata->in = omapdss_of_find_source_for_first_ep(pdev->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&pdev->dev, "failed to find video source\n");
+		return r;
 	}
 
 	ddata->timings = hdmic_default_timings;
-- 
GitLab


From 5845b32edc1ee1039d040716cf9d78bbd52e13c3 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:31 -0700
Subject: [PATCH 3942/4122] fbdev: omapfb: panel-sony-acx565akm: remove support
 for platform data

There are no users of panel_acx565akm_platform_data in the mainline
kernel so support for it can be removed from the panel driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omapfb/displays/panel-sony-acx565akm.c    | 45 +++----------------
 include/video/omap-panel-data.h               | 16 -------
 2 files changed, 6 insertions(+), 55 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
index c0965bee12c57..0c81d3ff41974 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
@@ -23,7 +23,6 @@
 #include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
-#include <video/omap-panel-data.h>
 
 #define MIPID_CMD_READ_DISP_ID		0x04
 #define MIPID_CMD_READ_RED		0x06
@@ -688,32 +687,6 @@ static struct omap_dss_driver acx565akm_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-static int acx565akm_probe_pdata(struct spi_device *spi)
-{
-	const struct panel_acx565akm_platform_data *pdata;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *dssdev, *in;
-
-	pdata = dev_get_platdata(&spi->dev);
-
-	ddata->reset_gpio = pdata->reset_gpio;
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&spi->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-	ddata->in = in;
-
-	ddata->datapairs = pdata->datapairs;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
 static int acx565akm_probe_of(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
@@ -741,6 +714,9 @@ static int acx565akm_probe(struct spi_device *spi)
 
 	dev_dbg(&spi->dev, "%s\n", __func__);
 
+	if (!spi->dev.of_node)
+		return -ENODEV;
+
 	spi->mode = SPI_MODE_3;
 
 	ddata = devm_kzalloc(&spi->dev, sizeof(*ddata), GFP_KERNEL);
@@ -753,18 +729,9 @@ static int acx565akm_probe(struct spi_device *spi)
 
 	mutex_init(&ddata->mutex);
 
-	if (dev_get_platdata(&spi->dev)) {
-		r = acx565akm_probe_pdata(spi);
-		if (r)
-			return r;
-	} else if (spi->dev.of_node) {
-		r = acx565akm_probe_of(spi);
-		if (r)
-			return r;
-	} else {
-		dev_err(&spi->dev, "platform data missing!\n");
-		return -ENODEV;
-	}
+	r = acx565akm_probe_of(spi);
+	if (r)
+		return r;
 
 	if (gpio_is_valid(ddata->reset_gpio)) {
 		r = devm_gpio_request_one(&spi->dev, ddata->reset_gpio,
diff --git a/include/video/omap-panel-data.h b/include/video/omap-panel-data.h
index 42b77249ee141..b7733150b55cd 100644
--- a/include/video/omap-panel-data.h
+++ b/include/video/omap-panel-data.h
@@ -52,20 +52,4 @@ struct panel_dpi_platform_data {
 	int enable_gpio;
 };
 
-/**
- * panel_acx565akm platform data
- * @name: name for this display entity
- * @source: name of the display entity used as a video source
- * @reset_gpio: gpio to reset the panel (or -1)
- * @datapairs: number of SDI datapairs
- */
-struct panel_acx565akm_platform_data {
-	const char *name;
-	const char *source;
-
-	int reset_gpio;
-
-	int datapairs;
-};
-
 #endif /* __OMAP_PANEL_DATA_H */
-- 
GitLab


From 844c245fc49d21639ed9e00c4b3ac0f219b7c6ce Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:32 -0700
Subject: [PATCH 3943/4122] fbdev: omapfb: panel-sony-acx565akm: switch to
 using gpiod API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Note that because existing DTSes specify incorrect polarity of reset
lines (active high) and GPU drivers have adopted to this, we follow
the suit and use inverted values when controlling reset lines.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omapfb/displays/panel-sony-acx565akm.c    | 66 +++++++++----------
 1 file changed, 31 insertions(+), 35 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
index 0c81d3ff41974..685c63aa4e030 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sony-acx565akm.c
@@ -18,9 +18,8 @@
 #include <linux/sched.h>
 #include <linux/backlight.h>
 #include <linux/fb.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
 
@@ -56,7 +55,8 @@ struct panel_drv_data {
 	struct omap_dss_device	dssdev;
 	struct omap_dss_device *in;
 
-	int reset_gpio;
+	struct gpio_desc *reset_gpio;
+
 	int datapairs;
 
 	struct omap_video_timings videomode;
@@ -545,8 +545,13 @@ static int acx565akm_panel_power_on(struct omap_dss_device *dssdev)
 	/*FIXME tweak me */
 	msleep(50);
 
-	if (gpio_is_valid(ddata->reset_gpio))
-		gpio_set_value(ddata->reset_gpio, 1);
+	/*
+	 * Note that we appear to activate the reset line here. However
+	 * existing DTSes specified incorrect polarity for it (active high),
+	 * so in fact this deasserts the reset line.
+	 */
+	if (ddata->reset_gpio)
+		gpiod_set_value_cansleep(ddata->reset_gpio, 1);
 
 	if (ddata->enabled) {
 		dev_dbg(&ddata->spi->dev, "panel already enabled\n");
@@ -595,8 +600,9 @@ static void acx565akm_panel_power_off(struct omap_dss_device *dssdev)
 	 */
 	msleep(50);
 
-	if (gpio_is_valid(ddata->reset_gpio))
-		gpio_set_value(ddata->reset_gpio, 0);
+	/* see comment in acx565akm_panel_power_on() */
+	if (ddata->reset_gpio)
+		gpiod_set_value_cansleep(ddata->reset_gpio, 0);
 
 	/* FIXME need to tweak this delay */
 	msleep(100);
@@ -687,22 +693,6 @@ static struct omap_dss_driver acx565akm_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-static int acx565akm_probe_of(struct spi_device *spi)
-{
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct device_node *np = spi->dev.of_node;
-
-	ddata->reset_gpio = of_get_named_gpio(np, "reset-gpios", 0);
-
-	ddata->in = omapdss_of_find_source_for_first_ep(np);
-	if (IS_ERR(ddata->in)) {
-		dev_err(&spi->dev, "failed to find video source\n");
-		return PTR_ERR(ddata->in);
-	}
-
-	return 0;
-}
-
 static int acx565akm_probe(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata;
@@ -729,19 +719,25 @@ static int acx565akm_probe(struct spi_device *spi)
 
 	mutex_init(&ddata->mutex);
 
-	r = acx565akm_probe_of(spi);
-	if (r)
+	ddata->in = omapdss_of_find_source_for_first_ep(spi->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&spi->dev, "failed to find video source\n");
 		return r;
-
-	if (gpio_is_valid(ddata->reset_gpio)) {
-		r = devm_gpio_request_one(&spi->dev, ddata->reset_gpio,
-				GPIOF_OUT_INIT_LOW, "lcd reset");
-		if (r)
-			goto err_gpio;
 	}
 
-	if (gpio_is_valid(ddata->reset_gpio))
-		gpio_set_value(ddata->reset_gpio, 1);
+	ddata->reset_gpio = devm_gpiod_get_optional(&spi->dev, "reset",
+						    GPIOD_OUT_LOW);
+	r = PTR_ERR_OR_ZERO(ddata->reset_gpio);
+	if (r)
+		goto err_gpio;
+
+	if (ddata->reset_gpio) {
+		gpiod_set_consumer_name(ddata->reset_gpio, "lcd reset");
+
+		/* release the reset line */
+		gpiod_set_value_cansleep(ddata->reset_gpio, 1);
+	}
 
 	/*
 	 * After reset we have to wait 5 msec before the first
@@ -753,8 +749,8 @@ static int acx565akm_probe(struct spi_device *spi)
 
 	r = panel_detect(ddata);
 
-	if (!ddata->enabled && gpio_is_valid(ddata->reset_gpio))
-		gpio_set_value(ddata->reset_gpio, 0);
+	if (!ddata->enabled && ddata->reset_gpio)
+		gpiod_set_value_cansleep(ddata->reset_gpio, 0);
 
 	if (r) {
 		dev_err(&spi->dev, "%s panel detect error\n", __func__);
-- 
GitLab


From 6378085bc373f71b21fb074d62e321088b9a181a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:33 -0700
Subject: [PATCH 3944/4122] fbdev: omapfb: encoder-tfp410: switch to using
 gpiod API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omap2/omapfb/displays/encoder-tfp410.c    | 67 ++++++-------------
 1 file changed, 22 insertions(+), 45 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
index 09a59bd93d61a..7bac420169a69 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-tfp410.c
@@ -6,11 +6,12 @@
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
-#include <linux/gpio.h>
+#include <linux/err.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
 
@@ -18,7 +19,8 @@ struct panel_drv_data {
 	struct omap_dss_device dssdev;
 	struct omap_dss_device *in;
 
-	int pd_gpio;
+	struct gpio_desc *pd_gpio;
+
 	int data_lines;
 
 	struct omap_video_timings timings;
@@ -86,8 +88,8 @@ static int tfp410_enable(struct omap_dss_device *dssdev)
 	if (r)
 		return r;
 
-	if (gpio_is_valid(ddata->pd_gpio))
-		gpio_set_value_cansleep(ddata->pd_gpio, 1);
+	if (ddata->pd_gpio)
+		gpiod_set_value_cansleep(ddata->pd_gpio, 0);
 
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
@@ -102,8 +104,8 @@ static void tfp410_disable(struct omap_dss_device *dssdev)
 	if (!omapdss_device_is_enabled(dssdev))
 		return;
 
-	if (gpio_is_valid(ddata->pd_gpio))
-		gpio_set_value_cansleep(ddata->pd_gpio, 0);
+	if (ddata->pd_gpio)
+		gpiod_set_value_cansleep(ddata->pd_gpio, 1);
 
 	in->ops.dpi->disable(in);
 
@@ -162,33 +164,6 @@ static const struct omapdss_dvi_ops tfp410_dvi_ops = {
 	.get_timings	= tfp410_get_timings,
 };
 
-static int tfp410_probe_of(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct device_node *node = pdev->dev.of_node;
-	struct omap_dss_device *in;
-	int gpio;
-
-	gpio = of_get_named_gpio(node, "powerdown-gpios", 0);
-
-	if (gpio_is_valid(gpio) || gpio == -ENOENT) {
-		ddata->pd_gpio = gpio;
-	} else {
-		dev_err(&pdev->dev, "failed to parse PD gpio\n");
-		return gpio;
-	}
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&pdev->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	return 0;
-}
-
 static int tfp410_probe(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata;
@@ -204,18 +179,21 @@ static int tfp410_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ddata);
 
-	r = tfp410_probe_of(pdev);
-	if (r)
+	ddata->pd_gpio = devm_gpiod_get_optional(&pdev->dev, "powerdown",
+						 GPIOD_OUT_HIGH);
+	r = PTR_ERR_OR_ZERO(ddata->pd_gpio);
+	if (r) {
+		dev_err(&pdev->dev, "Failed to request PD GPIO: %d\n", r);
 		return r;
+	}
+
+	gpiod_set_consumer_name(ddata->pd_gpio, "tfp410 PD");
 
-	if (gpio_is_valid(ddata->pd_gpio)) {
-		r = devm_gpio_request_one(&pdev->dev, ddata->pd_gpio,
-				GPIOF_OUT_INIT_LOW, "tfp410 PD");
-		if (r) {
-			dev_err(&pdev->dev, "Failed to request PD GPIO %d\n",
-					ddata->pd_gpio);
-			goto err_gpio;
-		}
+	ddata->in = omapdss_of_find_source_for_first_ep(pdev->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&pdev->dev, "failed to find video source: %d\n", r);
+		return r;
 	}
 
 	dssdev = &ddata->dssdev;
@@ -235,7 +213,6 @@ static int tfp410_probe(struct platform_device *pdev)
 
 	return 0;
 err_reg:
-err_gpio:
 	omap_dss_put_device(ddata->in);
 	return r;
 }
-- 
GitLab


From 67c366de0593d00222e2e675e73a59dcf33f2f3a Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:34 -0700
Subject: [PATCH 3945/4122] fbdev: omapfb: panel-dsi-cm: switch to using gpiod
 API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Note that because existing DTSes specify incorrect polarity of reset
lines (active high) and GPU drivers have adopted to this, we follow
the suit and use inverted values when controlling reset lines.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omap2/omapfb/displays/panel-dsi-cm.c      | 116 +++++++-----------
 1 file changed, 45 insertions(+), 71 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
index a2c7c5cb15234..4fc4b26a8d304 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c
@@ -10,8 +10,9 @@
 
 #include <linux/backlight.h>
 #include <linux/delay.h>
+#include <linux/err.h>
 #include <linux/fb.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/interrupt.h>
 #include <linux/jiffies.h>
 #include <linux/module.h>
@@ -20,7 +21,6 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
 #include <video/mipi_display.h>
@@ -53,8 +53,8 @@ struct panel_drv_data {
 	unsigned long	hw_guard_wait;	/* max guard time in jiffies */
 
 	/* panel HW configuration from DT or platform data */
-	int reset_gpio;
-	int ext_te_gpio;
+	struct gpio_desc *reset_gpio;
+	struct gpio_desc *ext_te_gpio;
 
 	bool use_dsi_backlight;
 
@@ -250,8 +250,8 @@ static int dsicm_enter_ulps(struct panel_drv_data *ddata)
 	if (r)
 		goto err;
 
-	if (gpio_is_valid(ddata->ext_te_gpio))
-		disable_irq(gpio_to_irq(ddata->ext_te_gpio));
+	if (ddata->ext_te_gpio)
+		disable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 
 	in->ops.dsi->disable(in, false, true);
 
@@ -292,8 +292,8 @@ static int dsicm_exit_ulps(struct panel_drv_data *ddata)
 		goto err2;
 	}
 
-	if (gpio_is_valid(ddata->ext_te_gpio))
-		enable_irq(gpio_to_irq(ddata->ext_te_gpio));
+	if (ddata->ext_te_gpio)
+		enable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 
 	dsicm_queue_ulps_work(ddata);
 
@@ -306,8 +306,8 @@ err2:
 
 	r = dsicm_panel_reset(ddata);
 	if (!r) {
-		if (gpio_is_valid(ddata->ext_te_gpio))
-			enable_irq(gpio_to_irq(ddata->ext_te_gpio));
+		if (ddata->ext_te_gpio)
+			enable_irq(gpiod_to_irq(ddata->ext_te_gpio));
 		ddata->ulps_enabled = false;
 	}
 err1:
@@ -556,16 +556,19 @@ static const struct attribute_group dsicm_attr_group = {
 
 static void dsicm_hw_reset(struct panel_drv_data *ddata)
 {
-	if (!gpio_is_valid(ddata->reset_gpio))
-		return;
-
-	gpio_set_value(ddata->reset_gpio, 1);
+	/*
+	 * Note that we appear to activate the reset line here. However
+	 * existing DTSes specified incorrect polarity for it (active high),
+	 * so in fact this deasserts the reset line.
+	 */
+	gpiod_set_value_cansleep(ddata->reset_gpio, 1);
 	udelay(10);
 	/* reset the panel */
-	gpio_set_value(ddata->reset_gpio, 0);
-	/* assert reset */
+	gpiod_set_value_cansleep(ddata->reset_gpio, 0);
+	/* keep reset asserted */
 	udelay(10);
-	gpio_set_value(ddata->reset_gpio, 1);
+	/* release reset line */
+	gpiod_set_value_cansleep(ddata->reset_gpio, 1);
 	/* wait after releasing reset */
 	usleep_range(5000, 10000);
 }
@@ -886,7 +889,7 @@ static int dsicm_update(struct omap_dss_device *dssdev,
 	if (r)
 		goto err;
 
-	if (ddata->te_enabled && gpio_is_valid(ddata->ext_te_gpio)) {
+	if (ddata->te_enabled && ddata->ext_te_gpio) {
 		schedule_delayed_work(&ddata->te_timeout_work,
 				msecs_to_jiffies(250));
 		atomic_set(&ddata->do_update, 1);
@@ -933,7 +936,7 @@ static int _dsicm_enable_te(struct panel_drv_data *ddata, bool enable)
 	else
 		r = dsicm_dcs_write_0(ddata, MIPI_DCS_SET_TEAR_OFF);
 
-	if (!gpio_is_valid(ddata->ext_te_gpio))
+	if (!ddata->ext_te_gpio)
 		in->ops.dsi->enable_te(in, enable);
 
 	/* possible panel bug */
@@ -1115,41 +1118,6 @@ static struct omap_dss_driver dsicm_ops = {
 	.memory_read	= dsicm_memory_read,
 };
 
-static int dsicm_probe_of(struct platform_device *pdev)
-{
-	struct device_node *node = pdev->dev.of_node;
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct omap_dss_device *in;
-	int gpio;
-
-	gpio = of_get_named_gpio(node, "reset-gpios", 0);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&pdev->dev, "failed to parse reset gpio\n");
-		return gpio;
-	}
-	ddata->reset_gpio = gpio;
-
-	gpio = of_get_named_gpio(node, "te-gpios", 0);
-	if (gpio_is_valid(gpio) || gpio == -ENOENT) {
-		ddata->ext_te_gpio = gpio;
-	} else {
-		dev_err(&pdev->dev, "failed to parse TE gpio\n");
-		return gpio;
-	}
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&pdev->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	/* TODO: ulps, backlight */
-
-	return 0;
-}
-
 static int dsicm_probe(struct platform_device *pdev)
 {
 	struct backlight_properties props;
@@ -1171,9 +1139,12 @@ static int dsicm_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ddata);
 	ddata->pdev = pdev;
 
-	r = dsicm_probe_of(pdev);
-	if (r)
+	ddata->in = omapdss_of_find_source_for_first_ep(pdev->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&pdev->dev, "failed to find video source: %d\n", r);
 		return r;
+	}
 
 	ddata->timings.x_res = 864;
 	ddata->timings.y_res = 480;
@@ -1200,24 +1171,27 @@ static int dsicm_probe(struct platform_device *pdev)
 
 	atomic_set(&ddata->do_update, 0);
 
-	if (gpio_is_valid(ddata->reset_gpio)) {
-		r = devm_gpio_request_one(dev, ddata->reset_gpio,
-				GPIOF_OUT_INIT_LOW, "taal rst");
-		if (r) {
-			dev_err(dev, "failed to request reset gpio\n");
-			return r;
-		}
+	ddata->reset_gpio = devm_gpiod_get(&pdev->dev, "reset", GPIOD_OUT_LOW);
+	r = PTR_ERR_OR_ZERO(ddata->reset_gpio);
+	if (r) {
+		dev_err(&pdev->dev, "Failed to request reset gpio: %d\n", r);
+		return r;
 	}
 
-	if (gpio_is_valid(ddata->ext_te_gpio)) {
-		r = devm_gpio_request_one(dev, ddata->ext_te_gpio,
-				GPIOF_IN, "taal irq");
-		if (r) {
-			dev_err(dev, "GPIO request failed\n");
-			return r;
-		}
+	gpiod_set_consumer_name(ddata->reset_gpio, "taal rst");
+
+	ddata->ext_te_gpio = devm_gpiod_get_optional(&pdev->dev, "te",
+						     GPIOD_IN);
+	r = PTR_ERR_OR_ZERO(ddata->ext_te_gpio);
+	if (r) {
+		dev_err(&pdev->dev, "Failed to request TE gpio: %d\n", r);
+		return r;
+	}
+
+	if (ddata->ext_te_gpio) {
+		gpiod_set_consumer_name(ddata->ext_te_gpio, "taal irq");
 
-		r = devm_request_irq(dev, gpio_to_irq(ddata->ext_te_gpio),
+		r = devm_request_irq(dev, gpiod_to_irq(ddata->ext_te_gpio),
 				dsicm_te_isr,
 				IRQF_TRIGGER_RISING,
 				"taal vsync", ddata);
-- 
GitLab


From 836bfb5688e3ea87574760af82c82fa752b2540f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:35 -0700
Subject: [PATCH 3946/4122] fbdev: omapfb: panel-tpo-td043mtea1: switch to
 using gpiod API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omapfb/displays/panel-tpo-td043mtea1.c    | 59 +++++--------------
 1 file changed, 16 insertions(+), 43 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
index c0e4e0315b6bb..1eaa35c278359 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td043mtea1.c
@@ -10,10 +10,9 @@
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
 #include <linux/regulator/consumer.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/err.h>
 #include <linux/slab.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
 
@@ -58,7 +57,7 @@ struct panel_drv_data {
 
 	struct spi_device *spi;
 	struct regulator *vcc_reg;
-	int nreset_gpio;
+	struct gpio_desc *reset_gpio;
 	u16 gamma[12];
 	u32 mode;
 	u32 hmirror:1;
@@ -296,8 +295,7 @@ static int tpo_td043_power_on(struct panel_drv_data *ddata)
 	/* wait for panel to stabilize */
 	msleep(160);
 
-	if (gpio_is_valid(ddata->nreset_gpio))
-		gpio_set_value(ddata->nreset_gpio, 1);
+	gpiod_set_value_cansleep(ddata->reset_gpio, 0);
 
 	tpo_td043_write(ddata->spi, 2,
 			TPO_R02_MODE(ddata->mode) | TPO_R02_NCLK_RISING);
@@ -320,8 +318,7 @@ static void tpo_td043_power_off(struct panel_drv_data *ddata)
 	tpo_td043_write(ddata->spi, 3,
 			TPO_R03_VAL_STANDBY | TPO_R03_EN_PWM);
 
-	if (gpio_is_valid(ddata->nreset_gpio))
-		gpio_set_value(ddata->nreset_gpio, 0);
+	gpiod_set_value_cansleep(ddata->reset_gpio, 1);
 
 	/* wait for at least 2 vsyncs before cutting off power */
 	msleep(50);
@@ -454,32 +451,6 @@ static struct omap_dss_driver tpo_td043_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-
-static int tpo_td043_probe_of(struct spi_device *spi)
-{
-	struct device_node *node = spi->dev.of_node;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *in;
-	int gpio;
-
-	gpio = of_get_named_gpio(node, "reset-gpios", 0);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&spi->dev, "failed to parse enable gpio\n");
-		return gpio;
-	}
-	ddata->nreset_gpio = gpio;
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&spi->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	return 0;
-}
-
 static int tpo_td043_probe(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata;
@@ -508,9 +479,12 @@ static int tpo_td043_probe(struct spi_device *spi)
 
 	ddata->spi = spi;
 
-	r = tpo_td043_probe_of(spi);
-	if (r)
+	ddata->in = omapdss_of_find_source_for_first_ep(spi->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&spi->dev, "failed to find video source: %d\n", r);
 		return r;
+	}
 
 	ddata->mode = TPO_R02_MODE_800x480;
 	memcpy(ddata->gamma, tpo_td043_def_gamma, sizeof(ddata->gamma));
@@ -521,16 +495,15 @@ static int tpo_td043_probe(struct spi_device *spi)
 		goto err_regulator;
 	}
 
-	if (gpio_is_valid(ddata->nreset_gpio)) {
-		r = devm_gpio_request_one(&spi->dev,
-				ddata->nreset_gpio, GPIOF_OUT_INIT_LOW,
-				"lcd reset");
-		if (r < 0) {
-			dev_err(&spi->dev, "couldn't request reset GPIO\n");
-			goto err_gpio_req;
-		}
+	ddata->reset_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_HIGH);
+	r = PTR_ERR_OR_ZERO(ddata->reset_gpio);
+	if (r) {
+		dev_err(&spi->dev, "couldn't request reset GPIO\n");
+		goto err_gpio_req;
 	}
 
+	gpiod_set_consumer_name(ddata->reset_gpio, "lcd reset");
+
 	r = sysfs_create_group(&spi->dev.kobj, &tpo_td043_attr_group);
 	if (r) {
 		dev_err(&spi->dev, "failed to create sysfs files\n");
-- 
GitLab


From 39630e0f138a4821cd219a83fd727cf39d304201 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:36 -0700
Subject: [PATCH 3947/4122] fbdev: omapfb: panel-nec-nl8048hl11: switch to
 using gpiod API

Switch the driver from legacy gpio API that is deprecated to the newer
gpiod API that respects line polarities described in ACPI/DT.

Note that because existing DTSes specify incorrect polarity of reset
lines (active high) and GPU drivers have adopted to this, we follow
the suit and use inverted values when controlling reset lines.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omapfb/displays/panel-nec-nl8048hl11.c    | 72 ++++++-------------
 1 file changed, 20 insertions(+), 52 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
index b407173e27b1f..33563953b2ff9 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-nec-nl8048hl11.c
@@ -7,12 +7,12 @@
  * Converted to new DSS device model: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
-#include <linux/module.h>
 #include <linux/delay.h>
-#include <linux/spi/spi.h>
+#include <linux/err.h>
 #include <linux/fb.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/spi/spi.h>
 
 #include <video/omapfb_dss.h>
 
@@ -24,8 +24,7 @@ struct panel_drv_data {
 
 	int data_lines;
 
-	int res_gpio;
-	int qvga_gpio;
+	struct gpio_desc *res_gpio;
 
 	struct spi_device *spi;
 };
@@ -155,8 +154,8 @@ static int nec_8048_enable(struct omap_dss_device *dssdev)
 	if (r)
 		return r;
 
-	if (gpio_is_valid(ddata->res_gpio))
-		gpio_set_value_cansleep(ddata->res_gpio, 1);
+	/* Apparently existing DTSes use incorrect polarity (active high) */
+	gpiod_set_value_cansleep(ddata->res_gpio, 1);
 
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
@@ -171,8 +170,8 @@ static void nec_8048_disable(struct omap_dss_device *dssdev)
 	if (!omapdss_device_is_enabled(dssdev))
 		return;
 
-	if (gpio_is_valid(ddata->res_gpio))
-		gpio_set_value_cansleep(ddata->res_gpio, 0);
+	/* Apparently existing DTSes use incorrect polarity (active high) */
+	gpiod_set_value_cansleep(ddata->res_gpio, 0);
 
 	in->ops.dpi->disable(in);
 
@@ -222,35 +221,6 @@ static struct omap_dss_driver nec_8048_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-
-static int nec_8048_probe_of(struct spi_device *spi)
-{
-	struct device_node *node = spi->dev.of_node;
-	struct panel_drv_data *ddata = dev_get_drvdata(&spi->dev);
-	struct omap_dss_device *in;
-	int gpio;
-
-	gpio = of_get_named_gpio(node, "reset-gpios", 0);
-	if (!gpio_is_valid(gpio)) {
-		dev_err(&spi->dev, "failed to parse enable gpio\n");
-		return gpio;
-	}
-	ddata->res_gpio = gpio;
-
-	/* XXX the panel spec doesn't mention any QVGA pin?? */
-	ddata->qvga_gpio = -ENOENT;
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&spi->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	return 0;
-}
-
 static int nec_8048_probe(struct spi_device *spi)
 {
 	struct panel_drv_data *ddata;
@@ -281,24 +251,22 @@ static int nec_8048_probe(struct spi_device *spi)
 
 	ddata->spi = spi;
 
-	r = nec_8048_probe_of(spi);
-	if (r)
+	ddata->in = omapdss_of_find_source_for_first_ep(spi->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&spi->dev, "failed to find video source: %d\n", r);
 		return r;
-
-	if (gpio_is_valid(ddata->qvga_gpio)) {
-		r = devm_gpio_request_one(&spi->dev, ddata->qvga_gpio,
-				GPIOF_OUT_INIT_HIGH, "lcd QVGA");
-		if (r)
-			goto err_gpio;
 	}
 
-	if (gpio_is_valid(ddata->res_gpio)) {
-		r = devm_gpio_request_one(&spi->dev, ddata->res_gpio,
-				GPIOF_OUT_INIT_LOW, "lcd RES");
-		if (r)
-			goto err_gpio;
+	ddata->res_gpio = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+	r = PTR_ERR_OR_ZERO(ddata->res_gpio);
+	if (r) {
+		dev_err(&spi->dev, "failed to request reset gpio: %d\n", r);
+		goto err_gpio;
 	}
 
+	gpiod_set_consumer_name(ddata->res_gpio, "lcd RES");
+
 	ddata->videomode = nec_8048_panel_timings;
 
 	dssdev = &ddata->dssdev;
-- 
GitLab


From b7ec002c05d8e01b961bf5d915cf2e42a4cff4dd Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:37 -0700
Subject: [PATCH 3948/4122] fbdev: omapfb: panel-dpi: remove support for
 platform data

There are no users of panel_dpi_platform_data in the mainline
kernel so support for it can be removed from the panel driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../fbdev/omap2/omapfb/displays/panel-dpi.c   | 83 ++-----------------
 include/video/omap-panel-data.h               | 21 -----
 2 files changed, 7 insertions(+), 97 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
index ff3d1e8e1e7b5..9790053c5877c 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dpi.c
@@ -6,15 +6,13 @@
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
-#include <video/omap-panel-data.h>
 #include <video/of_display_timing.h>
 
 struct panel_drv_data {
@@ -25,9 +23,6 @@ struct panel_drv_data {
 
 	struct omap_video_timings videomode;
 
-	/* used for non-DT boot, to be removed */
-	int backlight_gpio;
-
 	struct gpio_desc *enable_gpio;
 };
 
@@ -77,9 +72,6 @@ static int panel_dpi_enable(struct omap_dss_device *dssdev)
 
 	gpiod_set_value_cansleep(ddata->enable_gpio, 1);
 
-	if (gpio_is_valid(ddata->backlight_gpio))
-		gpio_set_value_cansleep(ddata->backlight_gpio, 1);
-
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
 	return 0;
@@ -93,9 +85,6 @@ static void panel_dpi_disable(struct omap_dss_device *dssdev)
 	if (!omapdss_device_is_enabled(dssdev))
 		return;
 
-	if (gpio_is_valid(ddata->backlight_gpio))
-		gpio_set_value_cansleep(ddata->backlight_gpio, 0);
-
 	gpiod_set_value_cansleep(ddata->enable_gpio, 0);
 
 	in->ops.dpi->disable(in);
@@ -146,49 +135,6 @@ static struct omap_dss_driver panel_dpi_ops = {
 	.get_resolution	= omapdss_default_get_resolution,
 };
 
-static int panel_dpi_probe_pdata(struct platform_device *pdev)
-{
-	const struct panel_dpi_platform_data *pdata;
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct omap_dss_device *dssdev, *in;
-	struct videomode vm;
-	int r;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "failed to find video source '%s'\n",
-				pdata->source);
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	ddata->data_lines = pdata->data_lines;
-
-	videomode_from_timing(pdata->display_timing, &vm);
-	videomode_to_omap_video_timings(&vm, &ddata->videomode);
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	r = devm_gpio_request_one(&pdev->dev, pdata->enable_gpio,
-					GPIOF_OUT_INIT_LOW, "panel enable");
-	if (r)
-		goto err_gpio;
-
-	ddata->enable_gpio = gpio_to_desc(pdata->enable_gpio);
-
-	ddata->backlight_gpio = pdata->backlight_gpio;
-
-	return 0;
-
-err_gpio:
-	omap_dss_put_device(ddata->in);
-	return r;
-}
-
 static int panel_dpi_probe_of(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
@@ -205,8 +151,6 @@ static int panel_dpi_probe_of(struct platform_device *pdev)
 
 	ddata->enable_gpio = gpio;
 
-	ddata->backlight_gpio = -ENOENT;
-
 	r = of_get_display_timing(node, "panel-timing", &timing);
 	if (r) {
 		dev_err(&pdev->dev, "failed to get video timing\n");
@@ -233,30 +177,18 @@ static int panel_dpi_probe(struct platform_device *pdev)
 	struct omap_dss_device *dssdev;
 	int r;
 
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
 	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
 	if (ddata == NULL)
 		return -ENOMEM;
 
 	platform_set_drvdata(pdev, ddata);
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = panel_dpi_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = panel_dpi_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
-		return -ENODEV;
-	}
-
-	if (gpio_is_valid(ddata->backlight_gpio)) {
-		r = devm_gpio_request_one(&pdev->dev, ddata->backlight_gpio,
-				GPIOF_OUT_INIT_LOW, "panel backlight");
-		if (r)
-			goto err_gpio;
-	}
+	r = panel_dpi_probe_of(pdev);
+	if (r)
+		return r;
 
 	dssdev = &ddata->dssdev;
 	dssdev->dev = &pdev->dev;
@@ -275,7 +207,6 @@ static int panel_dpi_probe(struct platform_device *pdev)
 	return 0;
 
 err_reg:
-err_gpio:
 	omap_dss_put_device(ddata->in);
 	return r;
 }
diff --git a/include/video/omap-panel-data.h b/include/video/omap-panel-data.h
index b7733150b55cd..18172d7b97d0c 100644
--- a/include/video/omap-panel-data.h
+++ b/include/video/omap-panel-data.h
@@ -31,25 +31,4 @@ struct connector_atv_platform_data {
 	bool invert_polarity;
 };
 
-/**
- * panel_dpi platform data
- * @name: name for this display entity
- * @source: name of the display entity used as a video source
- * @data_lines: number of DPI datalines
- * @display_timing: timings for this panel
- * @backlight_gpio: gpio to enable/disable the backlight (or -1)
- * @enable_gpio: gpio to enable/disable the panel (or -1)
- */
-struct panel_dpi_platform_data {
-	const char *name;
-	const char *source;
-
-	int data_lines;
-
-	const struct display_timing *display_timing;
-
-	int backlight_gpio;
-	int enable_gpio;
-};
-
 #endif /* __OMAP_PANEL_DATA_H */
-- 
GitLab


From 90a687d61b4812d8fc035a59659dc36359783f60 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:38 -0700
Subject: [PATCH 3949/4122] fbdev: omapfb: connector-analog-tv: remove support
 for platform data

There are no users of connector_atv_platform_data in the mainline
kernel so support for it can be removed from the panel driver.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../omapfb/displays/connector-analog-tv.c     | 60 +++----------------
 include/video/omap-panel-data.h               | 34 -----------
 2 files changed, 8 insertions(+), 86 deletions(-)
 delete mode 100644 include/video/omap-panel-data.h

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
index a9fd732f81030..0daaf9f89bab5 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/connector-analog-tv.c
@@ -12,7 +12,6 @@
 #include <linux/of.h>
 
 #include <video/omapfb_dss.h>
-#include <video/omap-panel-data.h>
 
 struct panel_drv_data {
 	struct omap_dss_device dssdev;
@@ -178,53 +177,15 @@ static struct omap_dss_driver tvc_driver = {
 	.set_wss		= tvc_set_wss,
 };
 
-static int tvc_probe_pdata(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct connector_atv_platform_data *pdata;
-	struct omap_dss_device *in, *dssdev;
-
-	pdata = dev_get_platdata(&pdev->dev);
-
-	in = omap_dss_find_output(pdata->source);
-	if (in == NULL) {
-		dev_err(&pdev->dev, "Failed to find video source\n");
-		return -EPROBE_DEFER;
-	}
-
-	ddata->in = in;
-
-	ddata->invert_polarity = pdata->invert_polarity;
-
-	dssdev = &ddata->dssdev;
-	dssdev->name = pdata->name;
-
-	return 0;
-}
-
-static int tvc_probe_of(struct platform_device *pdev)
-{
-	struct panel_drv_data *ddata = platform_get_drvdata(pdev);
-	struct device_node *node = pdev->dev.of_node;
-	struct omap_dss_device *in;
-
-	in = omapdss_of_find_source_for_first_ep(node);
-	if (IS_ERR(in)) {
-		dev_err(&pdev->dev, "failed to find video source\n");
-		return PTR_ERR(in);
-	}
-
-	ddata->in = in;
-
-	return 0;
-}
-
 static int tvc_probe(struct platform_device *pdev)
 {
 	struct panel_drv_data *ddata;
 	struct omap_dss_device *dssdev;
 	int r;
 
+	if (!pdev->dev.of_node)
+		return -ENODEV;
+
 	ddata = devm_kzalloc(&pdev->dev, sizeof(*ddata), GFP_KERNEL);
 	if (!ddata)
 		return -ENOMEM;
@@ -232,16 +193,11 @@ static int tvc_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, ddata);
 	ddata->dev = &pdev->dev;
 
-	if (dev_get_platdata(&pdev->dev)) {
-		r = tvc_probe_pdata(pdev);
-		if (r)
-			return r;
-	} else if (pdev->dev.of_node) {
-		r = tvc_probe_of(pdev);
-		if (r)
-			return r;
-	} else {
-		return -ENODEV;
+	ddata->in = omapdss_of_find_source_for_first_ep(pdev->dev.of_node);
+	r = PTR_ERR_OR_ZERO(ddata->in);
+	if (r) {
+		dev_err(&pdev->dev, "failed to find video source\n");
+		return r;
 	}
 
 	ddata->timings = tvc_pal_timings;
diff --git a/include/video/omap-panel-data.h b/include/video/omap-panel-data.h
deleted file mode 100644
index 18172d7b97d0c..0000000000000
--- a/include/video/omap-panel-data.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Header containing platform_data structs for omap panels
- *
- * Copyright (C) 2013 Texas Instruments
- * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
- *	   Archit Taneja <archit@ti.com>
- *
- * Copyright (C) 2011 Texas Instruments
- * Author: Mayuresh Janorkar <mayur@ti.com>
- *
- * Copyright (C) 2010 Canonical Ltd.
- * Author: Bryan Wu <bryan.wu@canonical.com>
- */
-
-#ifndef __OMAP_PANEL_DATA_H
-#define __OMAP_PANEL_DATA_H
-
-#include <video/display_timing.h>
-
-/**
- * connector_atv platform data
- * @name: name for this display entity
- * @source: name of the display entity used as a video source
- * @invert_polarity: invert signal polarity
- */
-struct connector_atv_platform_data {
-	const char *name;
-	const char *source;
-
-	bool invert_polarity;
-};
-
-#endif /* __OMAP_PANEL_DATA_H */
-- 
GitLab


From 891bbadf1cf178eba8f2e3e175eabeee37a61ce8 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:39 -0700
Subject: [PATCH 3950/4122] fbdev: omapfb: encoder-opa362: fix included headers

The driver has been switched to gpiod API, so it should include
gpio/consumer.h instead of gpio.h and of_gpio.h.

With of_gpio.h no longer included we need mod_devicetable.h for
of_device_id definition.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
index ba7ed4039f8a4..dd29dc5c77ec8 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/encoder-opa362.c
@@ -11,11 +11,11 @@
  * Author: Tomi Valkeinen <tomi.valkeinen@ti.com>
  */
 
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
-#include <linux/of_gpio.h>
 
 #include <video/omapfb_dss.h>
 
-- 
GitLab


From a6828b1eb72621ac577079efd3b54b93c885261e Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:40 -0700
Subject: [PATCH 3951/4122] fbdev: omapfb: panel-lgphilips-lb035q02: remove
 backlight GPIO handling

With f048e8c1d169 ("omapfb: panel-lgphilips-lb035q02: Remove legacy boot
support") it is no longer possible to specify GPIO to control the
backlight. Remove code trying to request and toggle it.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../displays/panel-lgphilips-lb035q02.c       | 21 +------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
index 3ce1f9d2e7c4a..e69856cb62d5b 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-lgphilips-lb035q02.c
@@ -11,7 +11,7 @@
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
 #include <linux/mutex.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 
 #include <video/omapfb_dss.h>
 
@@ -46,9 +46,6 @@ struct panel_drv_data {
 
 	struct omap_video_timings videomode;
 
-	/* used for non-DT boot, to be removed */
-	int backlight_gpio;
-
 	struct gpio_desc *enable_gpio;
 };
 
@@ -166,9 +163,6 @@ static int lb035q02_enable(struct omap_dss_device *dssdev)
 	if (ddata->enable_gpio)
 		gpiod_set_value_cansleep(ddata->enable_gpio, 1);
 
-	if (gpio_is_valid(ddata->backlight_gpio))
-		gpio_set_value_cansleep(ddata->backlight_gpio, 1);
-
 	dssdev->state = OMAP_DSS_DISPLAY_ACTIVE;
 
 	return 0;
@@ -185,9 +179,6 @@ static void lb035q02_disable(struct omap_dss_device *dssdev)
 	if (ddata->enable_gpio)
 		gpiod_set_value_cansleep(ddata->enable_gpio, 0);
 
-	if (gpio_is_valid(ddata->backlight_gpio))
-		gpio_set_value_cansleep(ddata->backlight_gpio, 0);
-
 	in->ops.dpi->disable(in);
 
 	dssdev->state = OMAP_DSS_DISPLAY_DISABLED;
@@ -250,8 +241,6 @@ static int lb035q02_probe_of(struct spi_device *spi)
 
 	ddata->enable_gpio = gpio;
 
-	ddata->backlight_gpio = -ENOENT;
-
 	in = omapdss_of_find_source_for_first_ep(node);
 	if (IS_ERR(in)) {
 		dev_err(&spi->dev, "failed to find video source\n");
@@ -284,13 +273,6 @@ static int lb035q02_panel_spi_probe(struct spi_device *spi)
 	if (r)
 		return r;
 
-	if (gpio_is_valid(ddata->backlight_gpio)) {
-		r = devm_gpio_request_one(&spi->dev, ddata->backlight_gpio,
-				GPIOF_OUT_INIT_LOW, "panel backlight");
-		if (r)
-			goto err_gpio;
-	}
-
 	ddata->videomode = lb035q02_timings;
 
 	dssdev = &ddata->dssdev;
@@ -310,7 +292,6 @@ static int lb035q02_panel_spi_probe(struct spi_device *spi)
 	return 0;
 
 err_reg:
-err_gpio:
 	omap_dss_put_device(ddata->in);
 	return r;
 }
-- 
GitLab


From 275a855829ac19d06edf3f91d39c6014032667eb Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:41 -0700
Subject: [PATCH 3952/4122] fbdev: omapfb: panel-tpo-td028ttec1: stop including
 gpio.h

The driver does not use gpios, so there is no need to include gpio.h.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
index 3c0f887d30926..c18d290693c11 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-tpo-td028ttec1.c
@@ -16,7 +16,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/spi/spi.h>
-#include <linux/gpio.h>
 #include <video/omapfb_dss.h>
 
 struct panel_drv_data {
-- 
GitLab


From 23910a20f3fe7bdddd4c834a2c7b4547a554e86f Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Thu, 3 Nov 2022 20:16:42 -0700
Subject: [PATCH 3953/4122] fbdev: omapfb: panel-sharp-ls037v7dw01: fix
 included headers

The driver is using gpiod API so it should include gpio/consumer.h and
not gpio.gh or of_gpio.h.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 .../fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c      | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
index f1072c319de8a..cc30758300e25 100644
--- a/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
+++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-sharp-ls037v7dw01.c
@@ -7,10 +7,9 @@
  */
 
 #include <linux/delay.h>
-#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/regulator/consumer.h>
-- 
GitLab


From eceadc9219884fa4b05bf56cace688b77a2563e0 Mon Sep 17 00:00:00 2001
From: "wangkailong@jari.cn" <wangkailong@jari.cn>
Date: Sun, 6 Nov 2022 22:16:08 +0800
Subject: [PATCH 3954/4122] fbdev: pxafb: Remove unnecessary print function
 dev_err()

Eliminate the follow coccicheck warning:

./drivers/video/fbdev/pxafb.c:2330:2-9: line 2330 is redundant because
platform_get_irq() already prints an error

Signed-off-by: KaiLong Wang <wangkailong@jari.cn>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/pxafb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/video/fbdev/pxafb.c b/drivers/video/fbdev/pxafb.c
index 696ac54311809..c46ed78298ae3 100644
--- a/drivers/video/fbdev/pxafb.c
+++ b/drivers/video/fbdev/pxafb.c
@@ -2327,7 +2327,6 @@ static int pxafb_probe(struct platform_device *dev)
 
 	irq = platform_get_irq(dev, 0);
 	if (irq < 0) {
-		dev_err(&dev->dev, "no IRQ defined\n");
 		ret = -ENODEV;
 		goto failed_free_mem;
 	}
-- 
GitLab


From ed359a464846b48f76ea6cc5cd8257e545ac97f4 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Sat, 12 Nov 2022 17:55:10 +0800
Subject: [PATCH 3955/4122] fbdev: pm2fb: fix missing pci_disable_device()

Add missing pci_disable_device() in error path of probe() and remove() path.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/pm2fb.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/pm2fb.c b/drivers/video/fbdev/pm2fb.c
index 0823c9de859a0..47d212944f307 100644
--- a/drivers/video/fbdev/pm2fb.c
+++ b/drivers/video/fbdev/pm2fb.c
@@ -1533,8 +1533,10 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	}
 
 	info = framebuffer_alloc(sizeof(struct pm2fb_par), &pdev->dev);
-	if (!info)
-		return -ENOMEM;
+	if (!info) {
+		err = -ENOMEM;
+		goto err_exit_disable;
+	}
 	default_par = info->par;
 
 	switch (pdev->device) {
@@ -1715,6 +1717,8 @@ static int pm2fb_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	release_mem_region(pm2fb_fix.mmio_start, pm2fb_fix.mmio_len);
  err_exit_neither:
 	framebuffer_release(info);
+ err_exit_disable:
+	pci_disable_device(pdev);
 	return retval;
 }
 
@@ -1739,6 +1743,7 @@ static void pm2fb_remove(struct pci_dev *pdev)
 	fb_dealloc_cmap(&info->cmap);
 	kfree(info->pixmap.addr);
 	framebuffer_release(info);
+	pci_disable_device(pdev);
 }
 
 static const struct pci_device_id pm2fb_id_table[] = {
-- 
GitLab


From 5886b130de953cfb8826f7771ec8640a79934a7f Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Mon, 14 Nov 2022 09:08:52 +0800
Subject: [PATCH 3956/4122] fbdev: via: Fix error in via_core_init()

via_core_init() won't exit the driver when pci_register_driver() failed.
Exit the viafb-i2c and the viafb-gpio in failed path to prevent error.

VIA Graphics Integration Chipset framebuffer 2.4 initializing
Error: Driver 'viafb-i2c' is already registered, aborting...
Error: Driver 'viafb-gpio' is already registered, aborting...

Fixes: 7582eb9be85f ("viafb: Turn GPIO and i2c into proper platform devices")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/via/via-core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/via/via-core.c b/drivers/video/fbdev/via/via-core.c
index 32a6399b080b1..2c1803eb196fe 100644
--- a/drivers/video/fbdev/via/via-core.c
+++ b/drivers/video/fbdev/via/via-core.c
@@ -733,7 +733,14 @@ static int __init via_core_init(void)
 		return ret;
 	viafb_i2c_init();
 	viafb_gpio_init();
-	return pci_register_driver(&via_driver);
+	ret = pci_register_driver(&via_driver);
+	if (ret) {
+		viafb_gpio_exit();
+		viafb_i2c_exit();
+		return ret;
+	}
+
+	return 0;
 }
 
 static void __exit via_core_exit(void)
-- 
GitLab


From b76449ee75e21acfe9fa4c653d8598f191ed7d68 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <dzm91@hust.edu.cn>
Date: Fri, 11 Nov 2022 13:49:49 +0800
Subject: [PATCH 3957/4122] fbdev: smscufx: fix error handling code in
 ufx_usb_probe

The current error handling code in ufx_usb_probe have many unmatching
issues, e.g., missing ufx_free_usb_list, destroy_modedb label should
only include framebuffer_release, fb_dealloc_cmap only matches
fb_alloc_cmap.

My local syzkaller reports a memory leak bug:

memory leak in ufx_usb_probe

BUG: memory leak
unreferenced object 0xffff88802f879580 (size 128):
  comm "kworker/0:7", pid 17416, jiffies 4295067474 (age 46.710s)
  hex dump (first 32 bytes):
    80 21 7c 2e 80 88 ff ff 18 d0 d0 0c 80 88 ff ff  .!|.............
    00 d0 d0 0c 80 88 ff ff e0 ff ff ff 0f 00 00 00  ................
  backtrace:
    [<ffffffff814c99a0>] kmalloc_trace+0x20/0x90 mm/slab_common.c:1045
    [<ffffffff824d219c>] kmalloc include/linux/slab.h:553 [inline]
    [<ffffffff824d219c>] kzalloc include/linux/slab.h:689 [inline]
    [<ffffffff824d219c>] ufx_alloc_urb_list drivers/video/fbdev/smscufx.c:1873 [inline]
    [<ffffffff824d219c>] ufx_usb_probe+0x11c/0x15a0 drivers/video/fbdev/smscufx.c:1655
    [<ffffffff82d17927>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396
    [<ffffffff82712f0d>] call_driver_probe drivers/base/dd.c:560 [inline]
    [<ffffffff82712f0d>] really_probe+0x12d/0x390 drivers/base/dd.c:639
    [<ffffffff8271322f>] __driver_probe_device+0xbf/0x140 drivers/base/dd.c:778
    [<ffffffff827132da>] driver_probe_device+0x2a/0x120 drivers/base/dd.c:808
    [<ffffffff82713c27>] __device_attach_driver+0xf7/0x150 drivers/base/dd.c:936
    [<ffffffff82710137>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:427
    [<ffffffff827136b5>] __device_attach+0x105/0x2d0 drivers/base/dd.c:1008
    [<ffffffff82711d36>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:487
    [<ffffffff8270e242>] device_add+0x642/0xdc0 drivers/base/core.c:3517
    [<ffffffff82d14d5f>] usb_set_configuration+0x8ef/0xb80 drivers/usb/core/message.c:2170
    [<ffffffff82d2576c>] usb_generic_driver_probe+0x8c/0xc0 drivers/usb/core/generic.c:238
    [<ffffffff82d16ffc>] usb_probe_device+0x5c/0x140 drivers/usb/core/driver.c:293
    [<ffffffff82712f0d>] call_driver_probe drivers/base/dd.c:560 [inline]
    [<ffffffff82712f0d>] really_probe+0x12d/0x390 drivers/base/dd.c:639
    [<ffffffff8271322f>] __driver_probe_device+0xbf/0x140 drivers/base/dd.c:778

Fix this bug by rewriting the error handling code in ufx_usb_probe.

Reported-by: syzkaller <syzkaller@googlegroups.com>
Tested-by: Dongliang Mu <dzm91@hust.edu.cn>
Signed-off-by: Dongliang Mu <dzm91@hust.edu.cn>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/smscufx.c | 46 +++++++++++++++++++++++------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/drivers/video/fbdev/smscufx.c b/drivers/video/fbdev/smscufx.c
index 9343b7a4ac899..2ad6e98ce10d5 100644
--- a/drivers/video/fbdev/smscufx.c
+++ b/drivers/video/fbdev/smscufx.c
@@ -1622,7 +1622,7 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	struct usb_device *usbdev;
 	struct ufx_data *dev;
 	struct fb_info *info;
-	int retval;
+	int retval = -ENOMEM;
 	u32 id_rev, fpga_rev;
 
 	/* usb initialization */
@@ -1654,15 +1654,17 @@ static int ufx_usb_probe(struct usb_interface *interface,
 
 	if (!ufx_alloc_urb_list(dev, WRITES_IN_FLIGHT, MAX_TRANSFER)) {
 		dev_err(dev->gdev, "ufx_alloc_urb_list failed\n");
-		goto e_nomem;
+		goto put_ref;
 	}
 
 	/* We don't register a new USB class. Our client interface is fbdev */
 
 	/* allocates framebuffer driver structure, not framebuffer memory */
 	info = framebuffer_alloc(0, &usbdev->dev);
-	if (!info)
-		goto e_nomem;
+	if (!info) {
+		dev_err(dev->gdev, "framebuffer_alloc failed\n");
+		goto free_urb_list;
+	}
 
 	dev->info = info;
 	info->par = dev;
@@ -1705,22 +1707,34 @@ static int ufx_usb_probe(struct usb_interface *interface,
 	check_warn_goto_error(retval, "unable to find common mode for display and adapter");
 
 	retval = ufx_reg_set_bits(dev, 0x4000, 0x00000001);
-	check_warn_goto_error(retval, "error %d enabling graphics engine", retval);
+	if (retval < 0) {
+		dev_err(dev->gdev, "error %d enabling graphics engine", retval);
+		goto setup_modes;
+	}
 
 	/* ready to begin using device */
 	atomic_set(&dev->usb_active, 1);
 
 	dev_dbg(dev->gdev, "checking var");
 	retval = ufx_ops_check_var(&info->var, info);
-	check_warn_goto_error(retval, "error %d ufx_ops_check_var", retval);
+	if (retval < 0) {
+		dev_err(dev->gdev, "error %d ufx_ops_check_var", retval);
+		goto reset_active;
+	}
 
 	dev_dbg(dev->gdev, "setting par");
 	retval = ufx_ops_set_par(info);
-	check_warn_goto_error(retval, "error %d ufx_ops_set_par", retval);
+	if (retval < 0) {
+		dev_err(dev->gdev, "error %d ufx_ops_set_par", retval);
+		goto reset_active;
+	}
 
 	dev_dbg(dev->gdev, "registering framebuffer");
 	retval = register_framebuffer(info);
-	check_warn_goto_error(retval, "error %d register_framebuffer", retval);
+	if (retval < 0) {
+		dev_err(dev->gdev, "error %d register_framebuffer", retval);
+		goto reset_active;
+	}
 
 	dev_info(dev->gdev, "SMSC UDX USB device /dev/fb%d attached. %dx%d resolution."
 		" Using %dK framebuffer memory\n", info->node,
@@ -1728,21 +1742,23 @@ static int ufx_usb_probe(struct usb_interface *interface,
 
 	return 0;
 
-error:
-	fb_dealloc_cmap(&info->cmap);
-destroy_modedb:
+reset_active:
+	atomic_set(&dev->usb_active, 0);
+setup_modes:
 	fb_destroy_modedb(info->monspecs.modedb);
 	vfree(info->screen_base);
 	fb_destroy_modelist(&info->modelist);
+error:
+	fb_dealloc_cmap(&info->cmap);
+destroy_modedb:
 	framebuffer_release(info);
+free_urb_list:
+	if (dev->urbs.count > 0)
+		ufx_free_urb_list(dev);
 put_ref:
 	kref_put(&dev->kref, ufx_free); /* ref for framebuffer */
 	kref_put(&dev->kref, ufx_free); /* last ref from kref_init */
 	return retval;
-
-e_nomem:
-	retval = -ENOMEM;
-	goto put_ref;
 }
 
 static void ufx_usb_disconnect(struct usb_interface *interface)
-- 
GitLab


From 001f2cdb952a9566c77fb4b5470cc361db5601bb Mon Sep 17 00:00:00 2001
From: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Date: Mon, 14 Nov 2022 16:56:54 +0800
Subject: [PATCH 3958/4122] fbdev: vermilion: decrease reference count in error
 path

pci_get_device() will increase the reference count for the returned
pci_dev. For the error path, we need to use pci_dev_put() to decrease
the reference count.

Fixes: dbe7e429fedb ("vmlfb: framebuffer driver for Intel Vermilion Range")
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/vermilion/vermilion.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/vermilion/vermilion.c b/drivers/video/fbdev/vermilion/vermilion.c
index 1465fb7b619e1..0374ee6b6d039 100644
--- a/drivers/video/fbdev/vermilion/vermilion.c
+++ b/drivers/video/fbdev/vermilion/vermilion.c
@@ -278,8 +278,10 @@ static int vmlfb_get_gpu(struct vml_par *par)
 
 	mutex_unlock(&vml_mutex);
 
-	if (pci_enable_device(par->gpu) < 0)
+	if (pci_enable_device(par->gpu) < 0) {
+		pci_dev_put(par->gpu);
 		return -ENODEV;
+	}
 
 	return 0;
 }
-- 
GitLab


From ff61582e0dd7eea57ce4abbe6bdc567ff9282ed6 Mon Sep 17 00:00:00 2001
From: Yu Zhe <yuzhe@nfschina.com>
Date: Fri, 18 Nov 2022 17:00:50 +0800
Subject: [PATCH 3959/4122] fbdev: controlfb: fix spelling mistake
 "paramaters"->"parameters"

There is a spelling mistake in comment. Fix it.

Signed-off-by: Yu Zhe <yuzhe@nfschina.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/controlfb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/controlfb.c b/drivers/video/fbdev/controlfb.c
index 6bbcd9fc864e9..77dbf94aae5f4 100644
--- a/drivers/video/fbdev/controlfb.c
+++ b/drivers/video/fbdev/controlfb.c
@@ -376,7 +376,7 @@ static int read_control_sense(struct fb_info_control *p)
 #define CONTROL_PIXCLOCK_MIN	5000	/* ~ 200 MHz dot clock */
 
 /*
- * calculate the clock paramaters to be sent to CUDA according to given
+ * calculate the clock parameters to be sent to CUDA according to given
  * pixclock in pico second.
  */
 static int calc_clock_params(unsigned long clk, unsigned char *param)
-- 
GitLab


From cd53860edd5e66f2faaf1bfbaef81d4be9109c82 Mon Sep 17 00:00:00 2001
From: Dongliang Mu <dzm91@hust.edu.cn>
Date: Fri, 18 Nov 2022 22:14:06 +0800
Subject: [PATCH 3960/4122] fbdev: da8xx-fb: add missing regulator_disable() in
 fb_probe

The error handling code in fb_probe misses regulator_disable if
regulator_enable is called successfully. The previous commit only
adds regulator_disable in the .remove(), forgetting the error
handling code in the .probe.

Fix this by adding a new error label to call regulator_disable.

Fixes: 611097d5daea("fbdev: da8xx: add support for a regulator")
Signed-off-by: Dongliang Mu <dzm91@hust.edu.cn>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/da8xx-fb.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/video/fbdev/da8xx-fb.c b/drivers/video/fbdev/da8xx-fb.c
index 11922b009ed7c..cd07e401b3268 100644
--- a/drivers/video/fbdev/da8xx-fb.c
+++ b/drivers/video/fbdev/da8xx-fb.c
@@ -1431,7 +1431,7 @@ static int fb_probe(struct platform_device *device)
 		dev_err(&device->dev,
 			"GLCD: kmalloc for frame buffer failed\n");
 		ret = -EINVAL;
-		goto err_release_fb;
+		goto err_disable_reg;
 	}
 
 	da8xx_fb_info->screen_base = (char __iomem *) par->vram_virt;
@@ -1475,7 +1475,7 @@ static int fb_probe(struct platform_device *device)
 
 	ret = fb_alloc_cmap(&da8xx_fb_info->cmap, PALETTE_SIZE, 0);
 	if (ret)
-		goto err_release_fb;
+		goto err_disable_reg;
 	da8xx_fb_info->cmap.len = par->palette_sz;
 
 	/* initialize var_screeninfo */
@@ -1529,6 +1529,9 @@ err_cpu_freq:
 err_dealloc_cmap:
 	fb_dealloc_cmap(&da8xx_fb_info->cmap);
 
+err_disable_reg:
+	if (par->lcd_supply)
+		regulator_disable(par->lcd_supply);
 err_release_fb:
 	framebuffer_release(da8xx_fb_info);
 
-- 
GitLab


From 3074742317e3b95dcd1faf248ad2b5c2d9d7a558 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= <u.kleine-koenig@pengutronix.de>
Date: Fri, 18 Nov 2022 23:45:28 +0100
Subject: [PATCH 3961/4122] fbdev: matroxfb: Convert to i2c's .probe_new()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The probe function doesn't make use of the i2c_device_id * parameter so it
can be trivially converted.

Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/matrox/matroxfb_maven.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/video/fbdev/matrox/matroxfb_maven.c b/drivers/video/fbdev/matrox/matroxfb_maven.c
index f2e02958673df..727a10a59811b 100644
--- a/drivers/video/fbdev/matrox/matroxfb_maven.c
+++ b/drivers/video/fbdev/matrox/matroxfb_maven.c
@@ -1249,8 +1249,7 @@ static int maven_shutdown_client(struct i2c_client* clnt) {
 	return 0;
 }
 
-static int maven_probe(struct i2c_client *client,
-		       const struct i2c_device_id *id)
+static int maven_probe(struct i2c_client *client)
 {
 	struct i2c_adapter *adapter = client->adapter;
 	int err = -ENODEV;
@@ -1292,7 +1291,7 @@ static struct i2c_driver maven_driver={
 	.driver = {
 		.name	= "maven",
 	},
-	.probe		= maven_probe,
+	.probe_new	= maven_probe,
 	.remove		= maven_remove,
 	.id_table	= maven_id,
 };
-- 
GitLab


From c84bf485a5aaf9aa0764a58832b7ef4375c29f03 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Wed, 23 Nov 2022 17:29:43 +0800
Subject: [PATCH 3962/4122] fbdev: ep93xx-fb: Add missing clk_disable_unprepare
 in ep93xxfb_probe()

The clk_disable_unprepare() should be called in the error handling
of register_framebuffer(), fix it.

Fixes: 0937a7b3625d ("video: ep93xx: Prepare clock before using it")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/ep93xx-fb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/ep93xx-fb.c b/drivers/video/fbdev/ep93xx-fb.c
index 2398b3d48fedf..305f1587bd898 100644
--- a/drivers/video/fbdev/ep93xx-fb.c
+++ b/drivers/video/fbdev/ep93xx-fb.c
@@ -552,12 +552,14 @@ static int ep93xxfb_probe(struct platform_device *pdev)
 
 	err = register_framebuffer(info);
 	if (err)
-		goto failed_check;
+		goto failed_framebuffer;
 
 	dev_info(info->dev, "registered. Mode = %dx%d-%d\n",
 		 info->var.xres, info->var.yres, info->var.bits_per_pixel);
 	return 0;
 
+failed_framebuffer:
+	clk_disable_unprepare(fbi->clk);
 failed_check:
 	if (fbi->mach_info->teardown)
 		fbi->mach_info->teardown(pdev);
-- 
GitLab


From 71c53e19226b0166ba387d3c590d0509f541a0a1 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 30 Nov 2022 13:55:44 -0800
Subject: [PATCH 3963/4122] fbdev: geode: don't build on UML
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The geode fbdev driver uses struct cpuinfo fields that are not present
on ARCH=um, so don't allow this driver to be built on UML.

Prevents these build errors:

In file included from ../arch/x86/include/asm/olpc.h:7:0,
                 from ../drivers/mfd/cs5535-mfd.c:17:
../arch/x86/include/asm/geode.h: In function ‘is_geode_gx’:
../arch/x86/include/asm/geode.h:16:24: error: ‘struct cpuinfo_um’ has no member named ‘x86_vendor’
  return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) &&
../arch/x86/include/asm/geode.h:16:39: error: ‘X86_VENDOR_NSC’ undeclared (first use in this function); did you mean ‘X86_VENDOR_ANY’?
  return ((boot_cpu_data.x86_vendor == X86_VENDOR_NSC) &&
../arch/x86/include/asm/geode.h:17:17: error: ‘struct cpuinfo_um’ has no member named ‘x86’
   (boot_cpu_data.x86 == 5) &&
../arch/x86/include/asm/geode.h:18:17: error: ‘struct cpuinfo_um’ has no member named ‘x86_model’
   (boot_cpu_data.x86_model == 5));
../arch/x86/include/asm/geode.h: In function ‘is_geode_lx’:
../arch/x86/include/asm/geode.h:23:24: error: ‘struct cpuinfo_um’ has no member named ‘x86_vendor’
  return ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
../arch/x86/include/asm/geode.h:23:39: error: ‘X86_VENDOR_AMD’ undeclared (first use in this function); did you mean ‘X86_VENDOR_ANY’?
  return ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
../arch/x86/include/asm/geode.h:24:17: error: ‘struct cpuinfo_um’ has no member named ‘x86’
   (boot_cpu_data.x86 == 5) &&
../arch/x86/include/asm/geode.h:25:17: error: ‘struct cpuinfo_um’ has no member named ‘x86_model’
   (boot_cpu_data.x86_model == 10));

Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Richard Weinberger <richard@nod.at>
Cc: linux-um@lists.infradead.org
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: Andres Salomon <dilinger@queued.net>
Cc: linux-geode@lists.infradead.org
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/geode/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/geode/Kconfig b/drivers/video/fbdev/geode/Kconfig
index 2f8f0fb1dae2f..b184085a78c24 100644
--- a/drivers/video/fbdev/geode/Kconfig
+++ b/drivers/video/fbdev/geode/Kconfig
@@ -5,6 +5,7 @@
 config FB_GEODE
 	bool "AMD Geode family framebuffer support"
 	depends on FB && PCI && (X86_32 || (X86 && COMPILE_TEST))
+	depends on !UML
 	help
 	  Say 'Y' here to allow you to select framebuffer drivers for
 	  the AMD Geode family of processors.
-- 
GitLab


From 35b4f4d4a725cf8f8c10649163cd12aed509b953 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Wed, 30 Nov 2022 13:55:59 -0800
Subject: [PATCH 3964/4122] fbdev: uvesafb: don't build on UML
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The uvesafb fbdev driver uses memory management information that is not
available on ARCH=um, so don't allow this driver to be built on UML.

Prevents these build errors:

../drivers/video/fbdev/uvesafb.c: In function ‘uvesafb_vbe_init’:
../drivers/video/fbdev/uvesafb.c:807:21: error: ‘__supported_pte_mask’ undeclared (first use in this function)
  807 |                 if (__supported_pte_mask & _PAGE_NX) {
../drivers/video/fbdev/uvesafb.c:807:44: error: ‘_PAGE_NX’ undeclared (first use in this function)
  807 |                 if (__supported_pte_mask & _PAGE_NX) {

Fixes: 68f5d3f3b654 ("um: add PCI over virtio emulation driver")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Richard Weinberger <richard@nod.at>
Cc: linux-um@lists.infradead.org
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-fbdev@vger.kernel.org
Cc: dri-devel@lists.freedesktop.org
Cc: Michal Januszewski <spock@gentoo.org>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig
index 66f36b69e8f39..df6e09f7d2422 100644
--- a/drivers/video/fbdev/Kconfig
+++ b/drivers/video/fbdev/Kconfig
@@ -609,6 +609,7 @@ config FB_TGA
 config FB_UVESA
 	tristate "Userspace VESA VGA graphics support"
 	depends on FB && CONNECTOR
+	depends on !UML
 	select FB_CFB_FILLRECT
 	select FB_CFB_COPYAREA
 	select FB_CFB_IMAGEBLIT
-- 
GitLab


From a94371040712031ba129c7e9d8ff04a06a2f8207 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 10 Dec 2022 12:35:22 +0100
Subject: [PATCH 3965/4122] fbdev: uvesafb: Fixes an error handling path in
 uvesafb_probe()

If an error occurs after a successful uvesafb_init_mtrr() call, it must be
undone by a corresponding arch_phys_wc_del() call, as already done in the
remove function.

This has been added in the remove function in commit 63e28a7a5ffc
("uvesafb: Clean up MTRR code")

Fixes: 8bdb3a2d7df4 ("uvesafb: the driver core")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/uvesafb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 00d789b6c0faf..0e3cabbec4b40 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -1758,6 +1758,7 @@ static int uvesafb_probe(struct platform_device *dev)
 out_unmap:
 	iounmap(info->screen_base);
 out_mem:
+	arch_phys_wc_del(par->mtrr_handle);
 	release_mem_region(info->fix.smem_start, info->fix.smem_len);
 out_reg:
 	release_region(0x3c0, 32);
-- 
GitLab


From f2ff0c430fed8c6a6d64d11a41f0582bf8bba6f7 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sat, 10 Dec 2022 12:35:23 +0100
Subject: [PATCH 3966/4122] fbdev: uvesafb: Simplify uvesafb_remove()

When the remove() function is called, we know that the probe() function has
successfully been executed. So 'info' is known to be not NULL.

Simplify the code accordingly.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/uvesafb.c | 28 +++++++++++++---------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 0e3cabbec4b40..2bb95c35ab2a3 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -1777,25 +1777,23 @@ out:
 static int uvesafb_remove(struct platform_device *dev)
 {
 	struct fb_info *info = platform_get_drvdata(dev);
+	struct uvesafb_par *par = info->par;
 
-	if (info) {
-		struct uvesafb_par *par = info->par;
+	sysfs_remove_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
+	unregister_framebuffer(info);
+	release_region(0x3c0, 32);
+	iounmap(info->screen_base);
+	arch_phys_wc_del(par->mtrr_handle);
+	release_mem_region(info->fix.smem_start, info->fix.smem_len);
+	fb_destroy_modedb(info->monspecs.modedb);
+	fb_dealloc_cmap(&info->cmap);
 
-		sysfs_remove_group(&dev->dev.kobj, &uvesafb_dev_attgrp);
-		unregister_framebuffer(info);
-		release_region(0x3c0, 32);
-		iounmap(info->screen_base);
-		arch_phys_wc_del(par->mtrr_handle);
-		release_mem_region(info->fix.smem_start, info->fix.smem_len);
-		fb_destroy_modedb(info->monspecs.modedb);
-		fb_dealloc_cmap(&info->cmap);
+	kfree(par->vbe_modes);
+	kfree(par->vbe_state_orig);
+	kfree(par->vbe_state_saved);
 
-		kfree(par->vbe_modes);
-		kfree(par->vbe_state_orig);
-		kfree(par->vbe_state_saved);
+	framebuffer_release(info);
 
-		framebuffer_release(info);
-	}
 	return 0;
 }
 
-- 
GitLab


From 522d5226eed96a7df6662857500777a74d60d341 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Mon, 5 Dec 2022 16:31:39 +0800
Subject: [PATCH 3967/4122] fbdev: uvesafb: use sysfs_emit() to instead of
 scnprintf()

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/uvesafb.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/video/fbdev/uvesafb.c b/drivers/video/fbdev/uvesafb.c
index 2bb95c35ab2a3..f09f483c219b7 100644
--- a/drivers/video/fbdev/uvesafb.c
+++ b/drivers/video/fbdev/uvesafb.c
@@ -1580,7 +1580,7 @@ static ssize_t uvesafb_show_vendor(struct device *dev,
 	struct uvesafb_par *par = info->par;
 
 	if (par->vbe_ib.oem_vendor_name_ptr)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", (char *)
+		return sysfs_emit(buf, "%s\n", (char *)
 			(&par->vbe_ib) + par->vbe_ib.oem_vendor_name_ptr);
 	else
 		return 0;
@@ -1595,7 +1595,7 @@ static ssize_t uvesafb_show_product_name(struct device *dev,
 	struct uvesafb_par *par = info->par;
 
 	if (par->vbe_ib.oem_product_name_ptr)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", (char *)
+		return sysfs_emit(buf, "%s\n", (char *)
 			(&par->vbe_ib) + par->vbe_ib.oem_product_name_ptr);
 	else
 		return 0;
@@ -1610,7 +1610,7 @@ static ssize_t uvesafb_show_product_rev(struct device *dev,
 	struct uvesafb_par *par = info->par;
 
 	if (par->vbe_ib.oem_product_rev_ptr)
-		return scnprintf(buf, PAGE_SIZE, "%s\n", (char *)
+		return sysfs_emit(buf, "%s\n", (char *)
 			(&par->vbe_ib) + par->vbe_ib.oem_product_rev_ptr);
 	else
 		return 0;
@@ -1625,7 +1625,7 @@ static ssize_t uvesafb_show_oem_string(struct device *dev,
 	struct uvesafb_par *par = info->par;
 
 	if (par->vbe_ib.oem_string_ptr)
-		return scnprintf(buf, PAGE_SIZE, "%s\n",
+		return sysfs_emit(buf, "%s\n",
 			(char *)(&par->vbe_ib) + par->vbe_ib.oem_string_ptr);
 	else
 		return 0;
@@ -1639,7 +1639,7 @@ static ssize_t uvesafb_show_nocrtc(struct device *dev,
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct uvesafb_par *par = info->par;
 
-	return scnprintf(buf, PAGE_SIZE, "%d\n", par->nocrtc);
+	return sysfs_emit(buf, "%d\n", par->nocrtc);
 }
 
 static ssize_t uvesafb_store_nocrtc(struct device *dev,
-- 
GitLab


From b20a558d377c73ed1eb1247046a5e16fe41da173 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Mon, 5 Dec 2022 16:31:39 +0800
Subject: [PATCH 3968/4122] fbdev: sh_mobile_lcdcfb: use sysfs_emit() to
 instead of scnprintf()

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/sh_mobile_lcdcfb.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/video/fbdev/sh_mobile_lcdcfb.c b/drivers/video/fbdev/sh_mobile_lcdcfb.c
index 6d00893d41f4c..ad9323ed8e2ea 100644
--- a/drivers/video/fbdev/sh_mobile_lcdcfb.c
+++ b/drivers/video/fbdev/sh_mobile_lcdcfb.c
@@ -1188,7 +1188,7 @@ overlay_alpha_show(struct device *dev, struct device_attribute *attr, char *buf)
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct sh_mobile_lcdc_overlay *ovl = info->par;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", ovl->alpha);
+	return sysfs_emit(buf, "%u\n", ovl->alpha);
 }
 
 static ssize_t
@@ -1226,7 +1226,7 @@ overlay_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct sh_mobile_lcdc_overlay *ovl = info->par;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", ovl->mode);
+	return sysfs_emit(buf, "%u\n", ovl->mode);
 }
 
 static ssize_t
@@ -1265,7 +1265,7 @@ overlay_position_show(struct device *dev, struct device_attribute *attr,
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct sh_mobile_lcdc_overlay *ovl = info->par;
 
-	return scnprintf(buf, PAGE_SIZE, "%d,%d\n", ovl->pos_x, ovl->pos_y);
+	return sysfs_emit(buf, "%d,%d\n", ovl->pos_x, ovl->pos_y);
 }
 
 static ssize_t
@@ -1306,7 +1306,7 @@ overlay_rop3_show(struct device *dev, struct device_attribute *attr, char *buf)
 	struct fb_info *info = dev_get_drvdata(dev);
 	struct sh_mobile_lcdc_overlay *ovl = info->par;
 
-	return scnprintf(buf, PAGE_SIZE, "%u\n", ovl->rop3);
+	return sysfs_emit(buf, "%u\n", ovl->rop3);
 }
 
 static ssize_t
-- 
GitLab


From 3c3bfb8586f848317ceba5d777e11204ba3e5758 Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Tue, 6 Dec 2022 07:10:31 +0900
Subject: [PATCH 3969/4122] fbdev: fbcon: release buffer when
 fbcon_do_set_font() failed

syzbot is reporting memory leak at fbcon_do_set_font() [1], for
commit a5a923038d70 ("fbdev: fbcon: Properly revert changes when
vc_resize() failed") missed that the buffer might be newly allocated
by fbcon_set_font().

Link: https://syzkaller.appspot.com/bug?extid=25bdb7b1703639abd498 [1]
Reported-by: syzbot <syzbot+25bdb7b1703639abd498@syzkaller.appspotmail.com>
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: syzbot <syzbot+25bdb7b1703639abd498@syzkaller.appspotmail.com>
Fixes: a5a923038d70 ("fbdev: fbcon: Properly revert changes when vc_resize() failed")
CC: stable@vger.kernel.org # 5.15+
Signed-off-by: Helge Deller <deller@gmx.de>
---
 drivers/video/fbdev/core/fbcon.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
index c0143d38df83a..14a7d404062c3 100644
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2450,7 +2450,8 @@ err_out:
 
 	if (userfont) {
 		p->userfont = old_userfont;
-		REFCOUNT(data)--;
+		if (--REFCOUNT(data) == 0)
+			kfree(data - FONT_EXTRA_WORDS * sizeof(int));
 	}
 
 	vc->vc_font.width = old_width;
-- 
GitLab


From 4121d4481b72501aa4d22680be4ea1096d69d133 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 14 Dec 2022 13:35:42 +0100
Subject: [PATCH 3970/4122] bpf: Synchronize dispatcher update with
 bpf_dispatcher_xdp_func

Hao Sun reported crash in dispatcher image [1].

Currently we don't have any sync between bpf_dispatcher_update and
bpf_dispatcher_xdp_func, so following race is possible:

 cpu 0:                               cpu 1:

 bpf_prog_run_xdp
   ...
   bpf_dispatcher_xdp_func
     in image at offset 0x0

                                      bpf_dispatcher_update
                                        update image at offset 0x800
                                      bpf_dispatcher_update
                                        update image at offset 0x0

     in image at offset 0x0 -> crash

Fixing this by synchronizing dispatcher image update (which is done
in bpf_dispatcher_update function) with bpf_dispatcher_xdp_func that
reads and execute the dispatcher image.

Calling synchronize_rcu after updating and installing new image ensures
that readers leave old image before it's changed in the next dispatcher
update. The update itself is locked with dispatcher's mutex.

The bpf_prog_run_xdp is called under local_bh_disable and synchronize_rcu
will wait for it to leave [2].

[1] https://lore.kernel.org/bpf/Y5SFho7ZYXr9ifRn@krava/T/#m00c29ece654bc9f332a17df493bbca33e702896c
[2] https://lore.kernel.org/bpf/0B62D35A-E695-4B7A-A0D4-774767544C1A@gmail.com/T/#mff43e2c003ae99f4a38f353c7969be4c7162e877

Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Paul E. McKenney <paulmck@kernel.org>
Link: https://lore.kernel.org/r/20221214123542.1389719-1-jolsa@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/dispatcher.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c
index c19719f48ce06..fa3e9225aedc0 100644
--- a/kernel/bpf/dispatcher.c
+++ b/kernel/bpf/dispatcher.c
@@ -125,6 +125,11 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs)
 
 	__BPF_DISPATCHER_UPDATE(d, new ?: (void *)&bpf_dispatcher_nop_func);
 
+	/* Make sure all the callers executing the previous/old half of the
+	 * image leave it, so following update call can modify it safely.
+	 */
+	synchronize_rcu();
+
 	if (new)
 		d->image_off = noff;
 }
-- 
GitLab


From 628050ec952d2e2e46ec9fb6aa07e41139e030c8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 13 Dec 2022 02:19:08 +0200
Subject: [PATCH 3971/4122] net: enetc: avoid buffer leaks on xdp_do_redirect()
 failure

Before enetc_clean_rx_ring_xdp() calls xdp_do_redirect(), each software
BD in the RX ring between index orig_i and i can have one of 2 refcount
values on its page.

We are the owner of the current buffer that is being processed, so the
refcount will be at least 1.

If the current owner of the buffer at the diametrically opposed index
in the RX ring (i.o.w, the other half of this page) has not yet called
kfree(), this page's refcount could even be 2.

enetc_page_reusable() in enetc_flip_rx_buff() tests for the page
refcount against 1, and [ if it's 2 ] does not attempt to reuse it.

But if enetc_flip_rx_buff() is put after the xdp_do_redirect() call,
the page refcount can have one of 3 values. It can also be 0, if there
is no owner of the other page half, and xdp_do_redirect() for this
buffer ran so far that it triggered a flush of the devmap/cpumap bulk
queue, and the consumers of those bulk queues also freed the buffer,
all by the time xdp_do_redirect() returns the execution back to enetc.

This is the reason why enetc_flip_rx_buff() is called before
xdp_do_redirect(), but there is a big flaw with that reasoning:
enetc_flip_rx_buff() will set rx_swbd->page = NULL on both sides of the
enetc_page_reusable() branch, and if xdp_do_redirect() returns an error,
we call enetc_xdp_free(), which does not deal gracefully with that.

In fact, what happens is quite special. The page refcounts start as 1.
enetc_flip_rx_buff() figures they're reusable, transfers these
rx_swbd->page pointers to a different rx_swbd in enetc_reuse_page(), and
bumps the refcount to 2. When xdp_do_redirect() later returns an error,
we call the no-op enetc_xdp_free(), but we still haven't lost the
reference to that page. A copy of it is still at rx_ring->next_to_alloc,
but that has refcount 2 (and there are no concurrent owners of it in
flight, to drop the refcount). What really kills the system is when
we'll flip the rx_swbd->page the second time around. With an updated
refcount of 2, the page will not be reusable and we'll really leak it.
Then enetc_new_page() will have to allocate more pages, which will then
eventually leak again on further errors from xdp_do_redirect().

The problem, summarized, is that we zeroize rx_swbd->page before we're
completely done with it, and this makes it impossible for the error path
to do something with it.

Since the packet is potentially multi-buffer and therefore the
rx_swbd->page is potentially an array, manual passing of the old
pointers between enetc_flip_rx_buff() and enetc_xdp_free() is a bit
difficult.

For the sake of going with a simple solution, we accept the possibility
of racing with xdp_do_redirect(), and we move the flip procedure to
execute only on the redirect success path. By racing, I mean that the
page may be deemed as not reusable by enetc (having a refcount of 0),
but there will be no leak in that case, either.

Once we accept that, we have something better to do with buffers on
XDP_REDIRECT failure. Since we haven't performed half-page flipping yet,
we won't, either (and this way, we can avoid enetc_xdp_free()
completely, which gives the entire page to the slab allocator).
Instead, we'll call enetc_xdp_drop(), which will recycle this half of
the buffer back to the RX ring.

Fixes: 9d2b68cc108d ("net: enetc: add support for XDP_REDIRECT")
Suggested-by: Lorenzo Bianconi <lorenzo.bianconi@redhat.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Link: https://lore.kernel.org/r/20221213001908.2347046-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/enetc/enetc.c | 35 +++++---------------
 1 file changed, 8 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 8671591cb7501..3a79ead5219ae 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1489,23 +1489,6 @@ static void enetc_xdp_drop(struct enetc_bdr *rx_ring, int rx_ring_first,
 	rx_ring->stats.xdp_drops++;
 }
 
-static void enetc_xdp_free(struct enetc_bdr *rx_ring, int rx_ring_first,
-			   int rx_ring_last)
-{
-	while (rx_ring_first != rx_ring_last) {
-		struct enetc_rx_swbd *rx_swbd = &rx_ring->rx_swbd[rx_ring_first];
-
-		if (rx_swbd->page) {
-			dma_unmap_page(rx_ring->dev, rx_swbd->dma, PAGE_SIZE,
-				       rx_swbd->dir);
-			__free_page(rx_swbd->page);
-			rx_swbd->page = NULL;
-		}
-		enetc_bdr_idx_inc(rx_ring, &rx_ring_first);
-	}
-	rx_ring->stats.xdp_redirect_failures++;
-}
-
 static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				   struct napi_struct *napi, int work_limit,
 				   struct bpf_prog *prog)
@@ -1527,8 +1510,8 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 		int orig_i, orig_cleaned_cnt;
 		struct xdp_buff xdp_buff;
 		struct sk_buff *skb;
-		int tmp_orig_i, err;
 		u32 bd_status;
+		int err;
 
 		rxbd = enetc_rxbd(rx_ring, i);
 		bd_status = le32_to_cpu(rxbd->r.lstatus);
@@ -1615,18 +1598,16 @@ static int enetc_clean_rx_ring_xdp(struct enetc_bdr *rx_ring,
 				break;
 			}
 
-			tmp_orig_i = orig_i;
-
-			while (orig_i != i) {
-				enetc_flip_rx_buff(rx_ring,
-						   &rx_ring->rx_swbd[orig_i]);
-				enetc_bdr_idx_inc(rx_ring, &orig_i);
-			}
-
 			err = xdp_do_redirect(rx_ring->ndev, &xdp_buff, prog);
 			if (unlikely(err)) {
-				enetc_xdp_free(rx_ring, tmp_orig_i, i);
+				enetc_xdp_drop(rx_ring, orig_i, i);
+				rx_ring->stats.xdp_redirect_failures++;
 			} else {
+				while (orig_i != i) {
+					enetc_flip_rx_buff(rx_ring,
+							   &rx_ring->rx_swbd[orig_i]);
+					enetc_bdr_idx_inc(rx_ring, &orig_i);
+				}
 				xdp_redirect_frm_cnt++;
 				rx_ring->stats.xdp_redirect++;
 			}
-- 
GitLab


From 9f28157778ede0d4f183f7ab3b46995bb400abbe Mon Sep 17 00:00:00 2001
From: Minsuk Kang <linuxlovemin@yonsei.ac.kr>
Date: Wed, 14 Dec 2022 10:51:39 +0900
Subject: [PATCH 3972/4122] nfc: pn533: Clear nfc_target before being used

Fix a slab-out-of-bounds read that occurs in nla_put() called from
nfc_genl_send_target() when target->sensb_res_len, which is duplicated
from an nfc_target in pn533, is too large as the nfc_target is not
properly initialized and retains garbage values. Clear nfc_targets with
memset() before they are used.

Found by a modified version of syzkaller.

BUG: KASAN: slab-out-of-bounds in nla_put
Call Trace:
 memcpy
 nla_put
 nfc_genl_dump_targets
 genl_lock_dumpit
 netlink_dump
 __netlink_dump_start
 genl_family_rcv_msg_dumpit
 genl_rcv_msg
 netlink_rcv_skb
 genl_rcv
 netlink_unicast
 netlink_sendmsg
 sock_sendmsg
 ____sys_sendmsg
 ___sys_sendmsg
 __sys_sendmsg
 do_syscall_64

Fixes: 673088fb42d0 ("NFC: pn533: Send ATR_REQ directly for active device detection")
Fixes: 361f3cb7f9cf ("NFC: DEP link hook implementation for pn533")
Signed-off-by: Minsuk Kang <linuxlovemin@yonsei.ac.kr>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221214015139.119673-1-linuxlovemin@yonsei.ac.kr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/nfc/pn533/pn533.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/nfc/pn533/pn533.c b/drivers/nfc/pn533/pn533.c
index d9f6367b9993d..f0cac19005527 100644
--- a/drivers/nfc/pn533/pn533.c
+++ b/drivers/nfc/pn533/pn533.c
@@ -1295,6 +1295,8 @@ static int pn533_poll_dep_complete(struct pn533 *dev, void *arg,
 	if (IS_ERR(resp))
 		return PTR_ERR(resp);
 
+	memset(&nfc_target, 0, sizeof(struct nfc_target));
+
 	rsp = (struct pn533_cmd_jump_dep_response *)resp->data;
 
 	rc = rsp->status & PN533_CMD_RET_MASK;
@@ -1926,6 +1928,8 @@ static int pn533_in_dep_link_up_complete(struct pn533 *dev, void *arg,
 
 		dev_dbg(dev->dev, "Creating new target\n");
 
+		memset(&nfc_target, 0, sizeof(struct nfc_target));
+
 		nfc_target.supported_protocols = NFC_PROTO_NFC_DEP_MASK;
 		nfc_target.nfcid1_len = 10;
 		memcpy(nfc_target.nfcid1, rsp->nfcid3t, nfc_target.nfcid1_len);
-- 
GitLab


From 1c123c567fb138ebd187480b7fc0610fcb0851f5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 15 Dec 2022 00:02:53 +0100
Subject: [PATCH 3973/4122] bpf: Resolve fext program type when checking map
 compatibility
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The bpf_prog_map_compatible() check makes sure that BPF program types are
not mixed inside BPF map types that can contain programs (tail call maps,
cpumaps and devmaps). It does this by setting the fields of the map->owner
struct to the values of the first program being checked against, and
rejecting any subsequent programs if the values don't match.

One of the values being set in the map owner struct is the program type,
and since the code did not resolve the prog type for fext programs, the map
owner type would be set to PROG_TYPE_EXT and subsequent loading of programs
of the target type into the map would fail.

This bug is seen in particular for XDP programs that are loaded as
PROG_TYPE_EXT using libxdp; these cannot insert programs into devmaps and
cpumaps because the check fails as described above.

Fix the bug by resolving the fext program type to its target program type
as elsewhere in the verifier.

v3:
- Add Yonghong's ACK

Fixes: f45d5b6ce2e8 ("bpf: generalise tail call map compatibility check")
Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221214230254.790066-1-toke@redhat.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 kernel/bpf/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7f98dec6e90fb..b334f4ddc4d59 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2092,6 +2092,7 @@ static unsigned int __bpf_prog_ret0_warn(const void *ctx,
 bool bpf_prog_map_compatible(struct bpf_map *map,
 			     const struct bpf_prog *fp)
 {
+	enum bpf_prog_type prog_type = resolve_prog_type(fp);
 	bool ret;
 
 	if (fp->kprobe_override)
@@ -2102,12 +2103,12 @@ bool bpf_prog_map_compatible(struct bpf_map *map,
 		/* There's no owner yet where we could check for
 		 * compatibility.
 		 */
-		map->owner.type  = fp->type;
+		map->owner.type  = prog_type;
 		map->owner.jited = fp->jited;
 		map->owner.xdp_has_frags = fp->aux->xdp_has_frags;
 		ret = true;
 	} else {
-		ret = map->owner.type  == fp->type &&
+		ret = map->owner.type  == prog_type &&
 		      map->owner.jited == fp->jited &&
 		      map->owner.xdp_has_frags == fp->aux->xdp_has_frags;
 	}
-- 
GitLab


From f506439ec3dee11e0e77b0a1f3fb3eec22c97873 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Toke=20H=C3=B8iland-J=C3=B8rgensen?= <toke@redhat.com>
Date: Thu, 15 Dec 2022 00:02:54 +0100
Subject: [PATCH 3974/4122] selftests/bpf: Add a test for using a cpumap from
 an freplace-to-XDP program
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds a simple test for inserting an XDP program into a cpumap that is
"owned" by an XDP program that was loaded as PROG_TYPE_EXT (as libxdp
does). Prior to the kernel fix this would fail because the map type
ownership would be set to PROG_TYPE_EXT instead of being resolved to
PROG_TYPE_XDP.

v5:
- Fix a few nits from Andrii, add his ACK
v4:
- Use skeletons for selftest
v3:
- Update comment to better explain the cause
- Add Yonghong's ACK

Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/r/20221214230254.790066-2-toke@redhat.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c  | 48 +++++++++++++++++++
 .../selftests/bpf/progs/freplace_progmap.c    | 24 ++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/freplace_progmap.c

diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index d1e32e792536c..20f5fa0fcec99 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -4,6 +4,8 @@
 #include <network_helpers.h>
 #include <bpf/btf.h>
 #include "bind4_prog.skel.h"
+#include "freplace_progmap.skel.h"
+#include "xdp_dummy.skel.h"
 
 typedef int (*test_cb)(struct bpf_object *obj);
 
@@ -500,6 +502,50 @@ cleanup:
 	bind4_prog__destroy(skel);
 }
 
+static void test_func_replace_progmap(void)
+{
+	struct bpf_cpumap_val value = { .qsize = 1 };
+	struct freplace_progmap *skel = NULL;
+	struct xdp_dummy *tgt_skel = NULL;
+	__u32 key = 0;
+	int err;
+
+	skel = freplace_progmap__open();
+	if (!ASSERT_OK_PTR(skel, "prog_open"))
+		return;
+
+	tgt_skel = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(tgt_skel, "tgt_prog_load"))
+		goto out;
+
+	err = bpf_program__set_attach_target(skel->progs.xdp_cpumap_prog,
+					     bpf_program__fd(tgt_skel->progs.xdp_dummy_prog),
+					     "xdp_dummy_prog");
+	if (!ASSERT_OK(err, "set_attach_target"))
+		goto out;
+
+	err = freplace_progmap__load(skel);
+	if (!ASSERT_OK(err, "obj_load"))
+		goto out;
+
+	/* Prior to fixing the kernel, loading the PROG_TYPE_EXT 'redirect'
+	 * program above will cause the map owner type of 'cpumap' to be set to
+	 * PROG_TYPE_EXT. This in turn will cause the bpf_map_update_elem()
+	 * below to fail, because the program we are inserting into the map is
+	 * of PROG_TYPE_XDP. After fixing the kernel, the initial ownership will
+	 * be correctly resolved to the *target* of the PROG_TYPE_EXT program
+	 * (i.e., PROG_TYPE_XDP) and the map update will succeed.
+	 */
+	value.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_drop_prog);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.cpu_map),
+				  &key, &value, 0);
+	ASSERT_OK(err, "map_update");
+
+out:
+	xdp_dummy__destroy(tgt_skel);
+	freplace_progmap__destroy(skel);
+}
+
 /* NOTE: affect other tests, must run in serial mode */
 void serial_test_fexit_bpf2bpf(void)
 {
@@ -525,4 +571,6 @@ void serial_test_fexit_bpf2bpf(void)
 		test_func_replace_global_func();
 	if (test__start_subtest("fentry_to_cgroup_bpf"))
 		test_fentry_to_cgroup_bpf();
+	if (test__start_subtest("func_replace_progmap"))
+		test_func_replace_progmap();
 }
diff --git a/tools/testing/selftests/bpf/progs/freplace_progmap.c b/tools/testing/selftests/bpf/progs/freplace_progmap.c
new file mode 100644
index 0000000000000..81b56b9aa7d61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_progmap.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CPUMAP);
+	__type(key, __u32);
+	__type(value, struct bpf_cpumap_val);
+	__uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_drop_prog(struct xdp_md *ctx)
+{
+	return XDP_DROP;
+}
+
+SEC("freplace")
+int xdp_cpumap_prog(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&cpu_map, 0, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
-- 
GitLab


From 3ff8bff704f4de125dca2262e5b5b963a3da1d87 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <tkhai@ya.ru>
Date: Tue, 13 Dec 2022 00:05:53 +0300
Subject: [PATCH 3975/4122] unix: Fix race in SOCK_SEQPACKET's
 unix_dgram_sendmsg()

There is a race resulting in alive SOCK_SEQPACKET socket
may change its state from TCP_ESTABLISHED to TCP_CLOSE:

unix_release_sock(peer)                  unix_dgram_sendmsg(sk)
  sock_orphan(peer)
    sock_set_flag(peer, SOCK_DEAD)
                                           sock_alloc_send_pskb()
                                             if !(sk->sk_shutdown & SEND_SHUTDOWN)
                                               OK
                                           if sock_flag(peer, SOCK_DEAD)
                                             sk->sk_state = TCP_CLOSE
  sk->sk_shutdown = SHUTDOWN_MASK

After that socket sk remains almost normal: it is able to connect, listen, accept
and recvmsg, while it can't sendmsg.

Since this is the only possibility for alive SOCK_SEQPACKET to change
the state in such way, we should better fix this strange and potentially
danger corner case.

Note, that we will return EPIPE here like this is normally done in sock_alloc_send_pskb().
Originally used ECONNREFUSED looks strange, since it's strange to return
a specific retval in dependence of race in kernel, when user can't affect on this.

Also, move TCP_CLOSE assignment for SOCK_DGRAM sockets under state lock
to fix race with unix_dgram_connect():

unix_dgram_connect(other)            unix_dgram_sendmsg(sk)
                                       unix_peer(sk) = NULL
                                       unix_state_unlock(sk)
  unix_state_double_lock(sk, other)
  sk->sk_state  = TCP_ESTABLISHED
  unix_peer(sk) = other
  unix_state_double_unlock(sk, other)
                                       sk->sk_state  = TCP_CLOSED

This patch fixes both of these races.

Fixes: 83301b5367a9 ("af_unix: Set TCP_ESTABLISHED for datagram sockets too")
Signed-off-by: Kirill Tkhai <tkhai@ya.ru>
Link: https://lore.kernel.org/r/135fda25-22d5-837a-782b-ceee50e19844@ya.ru
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 net/unix/af_unix.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ede2b2a140a4b..f0c2293f1d3b8 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1999,13 +1999,20 @@ restart_locked:
 			unix_state_lock(sk);
 
 		err = 0;
-		if (unix_peer(sk) == other) {
+		if (sk->sk_type == SOCK_SEQPACKET) {
+			/* We are here only when racing with unix_release_sock()
+			 * is clearing @other. Never change state to TCP_CLOSE
+			 * unlike SOCK_DGRAM wants.
+			 */
+			unix_state_unlock(sk);
+			err = -EPIPE;
+		} else if (unix_peer(sk) == other) {
 			unix_peer(sk) = NULL;
 			unix_dgram_peer_wake_disconnect_wakeup(sk, other);
 
+			sk->sk_state = TCP_CLOSE;
 			unix_state_unlock(sk);
 
-			sk->sk_state = TCP_CLOSE;
 			unix_dgram_disconnected(sk, other);
 			sock_put(other);
 			err = -ECONNREFUSED;
-- 
GitLab


From ba3e77a4a22af018d2fe4d745902b2531ca82aba Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 9 Dec 2022 22:14:01 +0100
Subject: [PATCH 3976/4122] gfs2: Remove support for glock holder auto-demotion

Remove the support for glock holder auto-demotion (commit dc732906c245
and folow-ups) as we are not planning to use this feature, and the
additional code therefore only adds unnecessary complexity.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c  | 194 +++++++++--------------------------------------
 fs/gfs2/glock.h  |  20 -----
 fs/gfs2/incore.h |   1 -
 3 files changed, 36 insertions(+), 179 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index c32c25b4c37c8..a4c15e86b1f56 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -206,12 +206,6 @@ static int demote_ok(const struct gfs2_glock *gl)
 
 	if (gl->gl_state == LM_ST_UNLOCKED)
 		return 0;
-	/*
-	 * Note that demote_ok is used for the lru process of disposing of
-	 * glocks. For this purpose, we don't care if the glock's holders
-	 * have the HIF_MAY_DEMOTE flag set or not. If someone is using
-	 * them, don't demote.
-	 */
 	if (!list_empty(&gl->gl_holders))
 		return 0;
 	if (glops->go_demote_ok)
@@ -394,7 +388,7 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 	struct gfs2_holder *gh, *tmp;
 
 	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
-		if (!test_bit(HIF_WAIT, &gh->gh_iflags))
+		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			continue;
 		if (ret & LM_OUT_ERROR)
 			gh->gh_error = -EIO;
@@ -408,45 +402,6 @@ static void do_error(struct gfs2_glock *gl, const int ret)
 	}
 }
 
-/**
- * demote_incompat_holders - demote incompatible demoteable holders
- * @gl: the glock we want to promote
- * @current_gh: the newly promoted holder
- *
- * We're passing the newly promoted holder in @current_gh, but actually, any of
- * the strong holders would do.
- */
-static void demote_incompat_holders(struct gfs2_glock *gl,
-				    struct gfs2_holder *current_gh)
-{
-	struct gfs2_holder *gh, *tmp;
-
-	/*
-	 * Demote incompatible holders before we make ourselves eligible.
-	 * (This holder may or may not allow auto-demoting, but we don't want
-	 * to demote the new holder before it's even granted.)
-	 */
-	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
-		/*
-		 * Since holders are at the front of the list, we stop when we
-		 * find the first non-holder.
-		 */
-		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
-			return;
-		if (gh == current_gh)
-			continue;
-		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags) &&
-		    !may_grant(gl, current_gh, gh)) {
-			/*
-			 * We should not recurse into do_promote because
-			 * __gfs2_glock_dq only calls handle_callback,
-			 * gfs2_glock_add_to_lru and __gfs2_glock_queue_work.
-			 */
-			__gfs2_glock_dq(gh);
-		}
-	}
-}
-
 /**
  * find_first_holder - find the first "holder" gh
  * @gl: the glock
@@ -465,26 +420,6 @@ static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
 	return NULL;
 }
 
-/**
- * find_first_strong_holder - find the first non-demoteable holder
- * @gl: the glock
- *
- * Find the first holder that doesn't have the HIF_MAY_DEMOTE flag set.
- */
-static inline struct gfs2_holder *
-find_first_strong_holder(struct gfs2_glock *gl)
-{
-	struct gfs2_holder *gh;
-
-	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
-		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
-			return NULL;
-		if (!test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
-			return gh;
-	}
-	return NULL;
-}
-
 /*
  * gfs2_instantiate - Call the glops instantiate function
  * @gh: The glock holder
@@ -541,9 +476,8 @@ done:
 static int do_promote(struct gfs2_glock *gl)
 {
 	struct gfs2_holder *gh, *current_gh;
-	bool incompat_holders_demoted = false;
 
-	current_gh = find_first_strong_holder(gl);
+	current_gh = find_first_holder(gl);
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
 			continue;
@@ -562,11 +496,8 @@ static int do_promote(struct gfs2_glock *gl)
 		set_bit(HIF_HOLDER, &gh->gh_iflags);
 		trace_gfs2_promote(gh);
 		gfs2_holder_wake(gh);
-		if (!incompat_holders_demoted) {
+		if (!current_gh)
 			current_gh = gh;
-			demote_incompat_holders(gl, current_gh);
-			incompat_holders_demoted = true;
-		}
 	}
 	return 0;
 }
@@ -1538,7 +1469,7 @@ __acquires(&gl->gl_lockref.lock)
 		if (test_bit(GLF_LOCK, &gl->gl_flags)) {
 			struct gfs2_holder *current_gh;
 
-			current_gh = find_first_strong_holder(gl);
+			current_gh = find_first_holder(gl);
 			try_futile = !may_grant(gl, current_gh, gh);
 		}
 		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
@@ -1550,8 +1481,6 @@ __acquires(&gl->gl_lockref.lock)
 			continue;
 		if (gh->gh_gl->gl_ops->go_type == LM_TYPE_FLOCK)
 			continue;
-		if (test_bit(HIF_MAY_DEMOTE, &gh2->gh_iflags))
-			continue;
 		if (!pid_is_meaningful(gh2))
 			continue;
 		goto trap_recursive;
@@ -1666,64 +1595,42 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
 	int fast_path = 0;
 
 	/*
-	 * This while loop is similar to function demote_incompat_holders:
-	 * If the glock is due to be demoted (which may be from another node
-	 * or even if this holder is GL_NOCACHE), the weak holders are
-	 * demoted as well, allowing the glock to be demoted.
+	 * If we're in the process of file system withdraw, we cannot just
+	 * dequeue any glocks until our journal is recovered, lest we introduce
+	 * file system corruption. We need two exceptions to this rule: We need
+	 * to allow unlocking of nondisk glocks and the glock for our own
+	 * journal that needs recovery.
 	 */
-	while (gh) {
-		/*
-		 * If we're in the process of file system withdraw, we cannot
-		 * just dequeue any glocks until our journal is recovered, lest
-		 * we introduce file system corruption. We need two exceptions
-		 * to this rule: We need to allow unlocking of nondisk glocks
-		 * and the glock for our own journal that needs recovery.
-		 */
-		if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
-		    glock_blocked_by_withdraw(gl) &&
-		    gh->gh_gl != sdp->sd_jinode_gl) {
-			sdp->sd_glock_dqs_held++;
-			spin_unlock(&gl->gl_lockref.lock);
-			might_sleep();
-			wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
-				    TASK_UNINTERRUPTIBLE);
-			spin_lock(&gl->gl_lockref.lock);
-		}
+	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+	    glock_blocked_by_withdraw(gl) &&
+	    gh->gh_gl != sdp->sd_jinode_gl) {
+		sdp->sd_glock_dqs_held++;
+		spin_unlock(&gl->gl_lockref.lock);
+		might_sleep();
+		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+			    TASK_UNINTERRUPTIBLE);
+		spin_lock(&gl->gl_lockref.lock);
+	}
 
-		/*
-		 * This holder should not be cached, so mark it for demote.
-		 * Note: this should be done before the check for needs_demote
-		 * below.
-		 */
-		if (gh->gh_flags & GL_NOCACHE)
-			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
+	/*
+	 * This holder should not be cached, so mark it for demote.
+	 * Note: this should be done before the check for needs_demote
+	 * below.
+	 */
+	if (gh->gh_flags & GL_NOCACHE)
+		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
 
-		list_del_init(&gh->gh_list);
-		clear_bit(HIF_HOLDER, &gh->gh_iflags);
-		trace_gfs2_glock_queue(gh, 0);
+	list_del_init(&gh->gh_list);
+	clear_bit(HIF_HOLDER, &gh->gh_iflags);
+	trace_gfs2_glock_queue(gh, 0);
 
-		/*
-		 * If there hasn't been a demote request we are done.
-		 * (Let the remaining holders, if any, keep holding it.)
-		 */
-		if (!needs_demote(gl)) {
-			if (list_empty(&gl->gl_holders))
-				fast_path = 1;
-			break;
-		}
-		/*
-		 * If we have another strong holder (we cannot auto-demote)
-		 * we are done. It keeps holding it until it is done.
-		 */
-		if (find_first_strong_holder(gl))
-			break;
-
-		/*
-		 * If we have a weak holder at the head of the list, it
-		 * (and all others like it) must be auto-demoted. If there
-		 * are no more weak holders, we exit the while loop.
-		 */
-		gh = find_first_holder(gl);
+	/*
+	 * If there hasn't been a demote request we are done.
+	 * (Let the remaining holders, if any, keep holding it.)
+	 */
+	if (!needs_demote(gl)) {
+		if (list_empty(&gl->gl_holders))
+			fast_path = 1;
 	}
 
 	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
@@ -1938,33 +1845,6 @@ void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
 		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
 			delay = gl->gl_hold_time;
 	}
-	/*
-	 * Note 1: We cannot call demote_incompat_holders from handle_callback
-	 * or gfs2_set_demote due to recursion problems like: gfs2_glock_dq ->
-	 * handle_callback -> demote_incompat_holders -> gfs2_glock_dq
-	 * Plus, we only want to demote the holders if the request comes from
-	 * a remote cluster node because local holder conflicts are resolved
-	 * elsewhere.
-	 *
-	 * Note 2: if a remote node wants this glock in EX mode, lock_dlm will
-	 * request that we set our state to UNLOCKED. Here we mock up a holder
-	 * to make it look like someone wants the lock EX locally. Any SH
-	 * and DF requests should be able to share the lock without demoting.
-	 *
-	 * Note 3: We only want to demote the demoteable holders when there
-	 * are no more strong holders. The demoteable holders might as well
-	 * keep the glock until the last strong holder is done with it.
-	 */
-	if (!find_first_strong_holder(gl)) {
-		struct gfs2_holder mock_gh = {
-			.gh_gl = gl,
-			.gh_state = (state == LM_ST_UNLOCKED) ?
-				    LM_ST_EXCLUSIVE : state,
-			.gh_iflags = BIT(HIF_HOLDER)
-		};
-
-		demote_incompat_holders(gl, &mock_gh);
-	}
 	handle_callback(gl, state, delay, true);
 	__gfs2_glock_queue_work(gl, delay);
 	spin_unlock(&gl->gl_lockref.lock);
@@ -2356,8 +2236,6 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
 		*p++ = 'H';
 	if (test_bit(HIF_WAIT, &iflags))
 		*p++ = 'W';
-	if (test_bit(HIF_MAY_DEMOTE, &iflags))
-		*p++ = 'D';
 	if (flags & GL_SKIP)
 		*p++ = 's';
 	*p = 0;
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index e4be9e4bc9799..f37ac087e2c14 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -156,8 +156,6 @@ static inline struct gfs2_holder *gfs2_glock_is_locked_by_me(struct gfs2_glock *
 	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
 		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
 			break;
-		if (test_bit(HIF_MAY_DEMOTE, &gh->gh_iflags))
-			continue;
 		if (gh->gh_owner_pid == pid)
 			goto out;
 	}
@@ -308,24 +306,6 @@ static inline bool gfs2_holder_queued(struct gfs2_holder *gh)
 	return !list_empty(&gh->gh_list);
 }
 
-static inline void gfs2_holder_allow_demote(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-
-	spin_lock(&gl->gl_lockref.lock);
-	set_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
-static inline void gfs2_holder_disallow_demote(struct gfs2_holder *gh)
-{
-	struct gfs2_glock *gl = gh->gh_gl;
-
-	spin_lock(&gl->gl_lockref.lock);
-	clear_bit(HIF_MAY_DEMOTE, &gh->gh_iflags);
-	spin_unlock(&gl->gl_lockref.lock);
-}
-
 extern void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation);
 extern bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation);
 
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index d09d9892cd055..c26765080f280 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -252,7 +252,6 @@ struct gfs2_lkstats {
 
 enum {
 	/* States */
-	HIF_MAY_DEMOTE		= 1,
 	HIF_HOLDER		= 6,  /* Set for gh that "holds" the glock */
 	HIF_WAIT		= 10,
 };
-- 
GitLab


From 6b46a06100dd0e0ebe400573e94ccd09163bfd5b Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Fri, 9 Dec 2022 23:03:59 +0100
Subject: [PATCH 3977/4122] gfs2: Remove support for glock holder auto-demotion
 (2)

As a follow-up to the previous commit, move the recovery related code in
__gfs2_glock_dq() to gfs2_glock_dq() where it better fits.  No
functional change.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
---
 fs/gfs2/glock.c | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a4c15e86b1f56..524f3c96b9a4c 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -1590,28 +1590,9 @@ static inline bool needs_demote(struct gfs2_glock *gl)
 static void __gfs2_glock_dq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
-	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 	unsigned delay = 0;
 	int fast_path = 0;
 
-	/*
-	 * If we're in the process of file system withdraw, we cannot just
-	 * dequeue any glocks until our journal is recovered, lest we introduce
-	 * file system corruption. We need two exceptions to this rule: We need
-	 * to allow unlocking of nondisk glocks and the glock for our own
-	 * journal that needs recovery.
-	 */
-	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
-	    glock_blocked_by_withdraw(gl) &&
-	    gh->gh_gl != sdp->sd_jinode_gl) {
-		sdp->sd_glock_dqs_held++;
-		spin_unlock(&gl->gl_lockref.lock);
-		might_sleep();
-		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
-			    TASK_UNINTERRUPTIBLE);
-		spin_lock(&gl->gl_lockref.lock);
-	}
-
 	/*
 	 * This holder should not be cached, so mark it for demote.
 	 * Note: this should be done before the check for needs_demote
@@ -1654,6 +1635,7 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh)
 void gfs2_glock_dq(struct gfs2_holder *gh)
 {
 	struct gfs2_glock *gl = gh->gh_gl;
+	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
 
 	spin_lock(&gl->gl_lockref.lock);
 	if (!gfs2_holder_queued(gh)) {
@@ -1663,6 +1645,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		 */
 		goto out;
 	}
+
 	if (list_is_first(&gh->gh_list, &gl->gl_holders) &&
 	    !test_bit(HIF_HOLDER, &gh->gh_iflags)) {
 		spin_unlock(&gl->gl_lockref.lock);
@@ -1671,6 +1654,24 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
 		spin_lock(&gl->gl_lockref.lock);
 	}
 
+	/*
+	 * If we're in the process of file system withdraw, we cannot just
+	 * dequeue any glocks until our journal is recovered, lest we introduce
+	 * file system corruption. We need two exceptions to this rule: We need
+	 * to allow unlocking of nondisk glocks and the glock for our own
+	 * journal that needs recovery.
+	 */
+	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
+	    glock_blocked_by_withdraw(gl) &&
+	    gh->gh_gl != sdp->sd_jinode_gl) {
+		sdp->sd_glock_dqs_held++;
+		spin_unlock(&gl->gl_lockref.lock);
+		might_sleep();
+		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
+			    TASK_UNINTERRUPTIBLE);
+		spin_lock(&gl->gl_lockref.lock);
+	}
+
 	__gfs2_glock_dq(gh);
 out:
 	spin_unlock(&gl->gl_lockref.lock);
-- 
GitLab


From 7e43039a49c2da45edc1d9d7c9ede4003ab45a5f Mon Sep 17 00:00:00 2001
From: Li Zetao <lizetao1@huawei.com>
Date: Tue, 13 Dec 2022 20:56:14 +0800
Subject: [PATCH 3978/4122] r6040: Fix kmemleak in probe and remove

There is a memory leaks reported by kmemleak:

  unreferenced object 0xffff888116111000 (size 2048):
    comm "modprobe", pid 817, jiffies 4294759745 (age 76.502s)
    hex dump (first 32 bytes):
      00 c4 0a 04 81 88 ff ff 08 10 11 16 81 88 ff ff  ................
      08 10 11 16 81 88 ff ff 00 00 00 00 00 00 00 00  ................
    backtrace:
      [<ffffffff815bcd82>] kmalloc_trace+0x22/0x60
      [<ffffffff827e20ee>] phy_device_create+0x4e/0x90
      [<ffffffff827e6072>] get_phy_device+0xd2/0x220
      [<ffffffff827e7844>] mdiobus_scan+0xa4/0x2e0
      [<ffffffff827e8be2>] __mdiobus_register+0x482/0x8b0
      [<ffffffffa01f5d24>] r6040_init_one+0x714/0xd2c [r6040]
      ...

The problem occurs in probe process as follows:
  r6040_init_one:
    mdiobus_register
      mdiobus_scan    <- alloc and register phy_device,
                         the reference count of phy_device is 3
    r6040_mii_probe
      phy_connect     <- connect to the first phy_device,
                         so the reference count of the first
                         phy_device is 4, others are 3
    register_netdev   <- fault inject succeeded, goto error handling path

    // error handling path
    err_out_mdio_unregister:
      mdiobus_unregister(lp->mii_bus);
    err_out_mdio:
      mdiobus_free(lp->mii_bus);    <- the reference count of the first
                                       phy_device is 1, it is not released
                                       and other phy_devices are released
  // similarly, the remove process also has the same problem

The root cause is traced to the phy_device is not disconnected when
removes one r6040 device in r6040_remove_one() or on error handling path
after r6040_mii probed successfully. In r6040_mii_probe(), a net ethernet
device is connected to the first PHY device of mii_bus, in order to
notify the connected driver when the link status changes, which is the
default behavior of the PHY infrastructure to handle everything.
Therefore the phy_device should be disconnected when removes one r6040
device or on error handling path.

Fix it by adding phy_disconnect() when removes one r6040 device or on
error handling path after r6040_mii probed successfully.

Fixes: 3831861b4ad8 ("r6040: implement phylib")
Signed-off-by: Li Zetao <lizetao1@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20221213125614.927754-1-lizetao1@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/rdc/r6040.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index eecd52ed1ed21..f4d434c379e7c 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -1159,10 +1159,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = register_netdev(dev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to register net device\n");
-		goto err_out_mdio_unregister;
+		goto err_out_phy_disconnect;
 	}
 	return 0;
 
+err_out_phy_disconnect:
+	phy_disconnect(dev->phydev);
 err_out_mdio_unregister:
 	mdiobus_unregister(lp->mii_bus);
 err_out_mdio:
@@ -1186,6 +1188,7 @@ static void r6040_remove_one(struct pci_dev *pdev)
 	struct r6040_private *lp = netdev_priv(dev);
 
 	unregister_netdev(dev);
+	phy_disconnect(dev->phydev);
 	mdiobus_unregister(lp->mii_bus);
 	mdiobus_free(lp->mii_bus);
 	netif_napi_del(&lp->napi);
-- 
GitLab


From 2cb815cfc78b137ee38bcd65e7c955d6cc2cc250 Mon Sep 17 00:00:00 2001
From: Gaosheng Cui <cuigaosheng1@huawei.com>
Date: Wed, 14 Dec 2022 16:01:17 +0800
Subject: [PATCH 3979/4122] net: stmmac: fix errno when
 create_singlethread_workqueue() fails

We should set the return value to -ENOMEM explicitly when
create_singlethread_workqueue() fails in stmmac_dvr_probe(),
otherwise we'll lose the error value.

Fixes: a137f3f27f92 ("net: stmmac: fix possible memory leak in stmmac_dvr_probe()")
Signed-off-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20221214080117.3514615-1-cuigaosheng1@huawei.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ec64b65dee34f..c6951c976f5dd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -7099,6 +7099,7 @@ int stmmac_dvr_probe(struct device *device,
 	priv->wq = create_singlethread_workqueue("stmmac_wq");
 	if (!priv->wq) {
 		dev_err(priv->device, "failed to create workqueue\n");
+		ret = -ENOMEM;
 		goto error_wq_init;
 	}
 
-- 
GitLab


From c72a7e42592b2e18d862cf120876070947000d7a Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das.jz@bp.renesas.com>
Date: Wed, 14 Dec 2022 10:51:18 +0000
Subject: [PATCH 3980/4122] ravb: Fix "failed to switch device to config mode"
 message during unbind

This patch fixes the error "ravb 11c20000.ethernet eth0: failed to switch
device to config mode" during unbind.

We are doing register access after pm_runtime_put_sync().

We usually do cleanup in reverse order of init. Currently in
remove(), the "pm_runtime_put_sync" is not in reverse order.

Probe
	reset_control_deassert(rstc);
	pm_runtime_enable(&pdev->dev);
	pm_runtime_get_sync(&pdev->dev);

remove
	pm_runtime_put_sync(&pdev->dev);
	unregister_netdev(ndev);
	..
	ravb_mdio_release(priv);
	pm_runtime_disable(&pdev->dev);

Consider the call to unregister_netdev()
unregister_netdev->unregister_netdevice_queue->rollback_registered_many
that calls the below functions which access the registers after
pm_runtime_put_sync()
 1) ravb_get_stats
 2) ravb_close

Fixes: c156633f1353 ("Renesas Ethernet AVB driver proper")
Cc: stable@vger.kernel.org
Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
Link: https://lore.kernel.org/r/20221214105118.2495313-1-biju.das.jz@bp.renesas.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/ethernet/renesas/ravb_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 33f723a9f471b..b4e0fc7f65bdf 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -2903,12 +2903,12 @@ static int ravb_remove(struct platform_device *pdev)
 			  priv->desc_bat_dma);
 	/* Set reset mode */
 	ravb_write(ndev, CCC_OPC_RESET, CCC);
-	pm_runtime_put_sync(&pdev->dev);
 	unregister_netdev(ndev);
 	if (info->nc_queues)
 		netif_napi_del(&priv->napi[RAVB_NC]);
 	netif_napi_del(&priv->napi[RAVB_BE]);
 	ravb_mdio_release(priv);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 	reset_control_assert(priv->rstc);
 	free_netdev(ndev);
-- 
GitLab


From a7d82367daa6baa5e8399e6327e7f2f463534505 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 14 Dec 2022 13:01:20 +0200
Subject: [PATCH 3981/4122] net: dsa: mv88e6xxx: avoid reg_lock deadlock in
 mv88e6xxx_setup_port()

In the blamed commit, it was not noticed that one implementation of
chip->info->ops->phylink_get_caps(), called by mv88e6xxx_get_caps(),
may access hardware registers, and in doing so, it takes the
mv88e6xxx_reg_lock(). Namely, this is mv88e6352_phylink_get_caps().

This is a problem because mv88e6xxx_get_caps(), apart from being
a top-level function (method invoked by dsa_switch_ops), is now also
directly called from mv88e6xxx_setup_port(), which runs under the
mv88e6xxx_reg_lock() taken by mv88e6xxx_setup(). Therefore, when running
on mv88e6352, the reg_lock would be acquired a second time and the
system would deadlock on driver probe.

The things that mv88e6xxx_setup() can compete with in terms of register
access with are the IRQ handlers and MDIO bus operations registered by
mv88e6xxx_probe(). So there is a real need to acquire the register lock.

The register lock can, in principle, be dropped and re-acquired pretty
much at will within the driver, as long as no operations that involve
waiting for indirect access to complete (essentially, callers of
mv88e6xxx_smi_direct_wait() and mv88e6xxx_wait_mask()) are interrupted
with the lock released. However, I would guess that in mv88e6xxx_setup(),
the critical section is kept open for such a long time just in order to
optimize away multiple lock/unlock operations on the registers.

We could, in principle, drop the reg_lock right before the
mv88e6xxx_setup_port() -> mv88e6xxx_get_caps() call, and
re-acquire it immediately afterwards. But this would look ugly, because
mv88e6xxx_setup_port() would release a lock which it didn't acquire, but
the caller did.

A cleaner solution to this issue comes from the observation that struct
mv88e6xxxx_ops methods generally assume they are called with the
reg_lock already acquired. Whereas mv88e6352_phylink_get_caps() is more
the exception rather than the norm, in that it acquires the lock itself.

Let's enforce the same locking pattern/convention for
chip->info->ops->phylink_get_caps() as well, and make
mv88e6xxx_get_caps(), the top-level function, acquire the register lock
explicitly, for this one implementation that will access registers for
port 4 to work properly.

This means that mv88e6xxx_setup_port() will no longer call the top-level
function, but the low-level mv88e6xxx_ops method which expects the
correct calling context (register lock held).

Compared to chip->info->ops->phylink_get_caps(), mv88e6xxx_get_caps()
also fixes up the supported_interfaces bitmap for internal ports, since
that can be done generically and does not require per-switch knowledge.
That's code which will no longer execute, however mv88e6xxx_setup_port()
doesn't need that. It just needs to look at the mac_capabilities bitmap.

Fixes: cc1049ccee20 ("net: dsa: mv88e6xxx: fix speed setting for CPU/DSA ports")
Reported-by: Maksim Kiselev <bigunclemax@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Maksim Kiselev <bigunclemax@gmail.com>
Link: https://lore.kernel.org/r/20221214110120.3368472-1-vladimir.oltean@nxp.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index ba4fff8690aa5..242b8b325504a 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -689,13 +689,12 @@ static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
 
 	/* Port 4 supports automedia if the serdes is associated with it. */
 	if (port == 4) {
-		mv88e6xxx_reg_lock(chip);
 		err = mv88e6352_g2_scratch_port_has_serdes(chip, port);
 		if (err < 0)
 			dev_err(chip->dev, "p%d: failed to read scratch\n",
 				port);
 		if (err <= 0)
-			goto unlock;
+			return;
 
 		cmode = mv88e6352_get_port4_serdes_cmode(chip);
 		if (cmode < 0)
@@ -703,8 +702,6 @@ static void mv88e6352_phylink_get_caps(struct mv88e6xxx_chip *chip, int port,
 				port);
 		else
 			mv88e6xxx_translate_cmode(cmode, supported);
-unlock:
-		mv88e6xxx_reg_unlock(chip);
 	}
 }
 
@@ -831,7 +828,9 @@ static void mv88e6xxx_get_caps(struct dsa_switch *ds, int port,
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	mv88e6xxx_reg_lock(chip);
 	chip->info->ops->phylink_get_caps(chip, port, config);
+	mv88e6xxx_reg_unlock(chip);
 
 	if (mv88e6xxx_phy_is_internal(ds, port)) {
 		__set_bit(PHY_INTERFACE_MODE_INTERNAL,
@@ -3307,7 +3306,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port)
 		struct phylink_config pl_config = {};
 		unsigned long caps;
 
-		mv88e6xxx_get_caps(ds, port, &pl_config);
+		chip->info->ops->phylink_get_caps(chip, port, &pl_config);
 
 		caps = pl_config.mac_capabilities;
 
-- 
GitLab


From 5fc24e6022d47c19d92fb8b84c1616754ef6fdb2 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Thu, 15 Dec 2022 11:29:25 -0400
Subject: [PATCH 3982/4122] RDMA/rxe: Fix compile warnings on 32-bit

Move the conditional code into a function, with two varients so it is
harder to make these kinds of mistakes.

 drivers/infiniband/sw/rxe/rxe_resp.c: In function 'atomic_write_reply':
 drivers/infiniband/sw/rxe/rxe_resp.c:794:13: error: unused variable 'payload' [-Werror=unused-variable]
   794 |         int payload = payload_size(pkt);
       |             ^~~~~~~
 drivers/infiniband/sw/rxe/rxe_resp.c:793:24: error: unused variable 'mr' [-Werror=unused-variable]
   793 |         struct rxe_mr *mr = qp->resp.mr;
       |                        ^~
 drivers/infiniband/sw/rxe/rxe_resp.c:791:19: error: unused variable 'dst' [-Werror=unused-variable]
   791 |         u64 src, *dst;
       |                   ^~~
 drivers/infiniband/sw/rxe/rxe_resp.c:791:13: error: unused variable 'src' [-Werror=unused-variable]
   791 |         u64 src, *dst;

Fixes: 034e285f8b99 ("RDMA/rxe: Make responder support atomic write on RC service")
Link: https://lore.kernel.org/linux-rdma/Y5s+EVE7eLWQqOwv@nvidia.com/
Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/rxe/rxe_resp.c | 72 +++++++++++++++-------------
 1 file changed, 40 insertions(+), 32 deletions(-)

diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c
index 7a60c7709da04..c74972244f08f 100644
--- a/drivers/infiniband/sw/rxe/rxe_resp.c
+++ b/drivers/infiniband/sw/rxe/rxe_resp.c
@@ -785,53 +785,61 @@ out:
 	return ret;
 }
 
-static enum resp_states atomic_write_reply(struct rxe_qp *qp,
-						struct rxe_pkt_info *pkt)
+#ifdef CONFIG_64BIT
+static enum resp_states do_atomic_write(struct rxe_qp *qp,
+					struct rxe_pkt_info *pkt)
 {
-	u64 src, *dst;
-	struct resp_res *res = qp->resp.res;
 	struct rxe_mr *mr = qp->resp.mr;
 	int payload = payload_size(pkt);
+	u64 src, *dst;
 
-	if (!res) {
-		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
-		qp->resp.res = res;
-	}
-
-	if (!res->replay) {
-#ifdef CONFIG_64BIT
-		if (mr->state != RXE_MR_STATE_VALID)
-			return RESPST_ERR_RKEY_VIOLATION;
-
-		memcpy(&src, payload_addr(pkt), payload);
+	if (mr->state != RXE_MR_STATE_VALID)
+		return RESPST_ERR_RKEY_VIOLATION;
 
-		dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
-		/* check vaddr is 8 bytes aligned. */
-		if (!dst || (uintptr_t)dst & 7)
-			return RESPST_ERR_MISALIGNED_ATOMIC;
+	memcpy(&src, payload_addr(pkt), payload);
 
-		/* Do atomic write after all prior operations have completed */
-		smp_store_release(dst, src);
+	dst = iova_to_vaddr(mr, qp->resp.va + qp->resp.offset, payload);
+	/* check vaddr is 8 bytes aligned. */
+	if (!dst || (uintptr_t)dst & 7)
+		return RESPST_ERR_MISALIGNED_ATOMIC;
 
-		/* decrease resp.resid to zero */
-		qp->resp.resid -= sizeof(payload);
+	/* Do atomic write after all prior operations have completed */
+	smp_store_release(dst, src);
 
-		qp->resp.msn++;
+	/* decrease resp.resid to zero */
+	qp->resp.resid -= sizeof(payload);
 
-		/* next expected psn, read handles this separately */
-		qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
-		qp->resp.ack_psn = qp->resp.psn;
+	qp->resp.msn++;
 
-		qp->resp.opcode = pkt->opcode;
-		qp->resp.status = IB_WC_SUCCESS;
+	/* next expected psn, read handles this separately */
+	qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
+	qp->resp.ack_psn = qp->resp.psn;
 
-		return RESPST_ACKNOWLEDGE;
+	qp->resp.opcode = pkt->opcode;
+	qp->resp.status = IB_WC_SUCCESS;
+	return RESPST_ACKNOWLEDGE;
+}
 #else
-		return RESPST_ERR_UNSUPPORTED_OPCODE;
+static enum resp_states do_atomic_write(struct rxe_qp *qp,
+					struct rxe_pkt_info *pkt)
+{
+	return RESPST_ERR_UNSUPPORTED_OPCODE;
+}
 #endif /* CONFIG_64BIT */
+
+static enum resp_states atomic_write_reply(struct rxe_qp *qp,
+					   struct rxe_pkt_info *pkt)
+{
+	struct resp_res *res = qp->resp.res;
+
+	if (!res) {
+		res = rxe_prepare_res(qp, pkt, RXE_ATOMIC_WRITE_MASK);
+		qp->resp.res = res;
 	}
 
-	return RESPST_ACKNOWLEDGE;
+	if (res->replay)
+		return RESPST_ACKNOWLEDGE;
+	return do_atomic_write(qp, pkt);
 }
 
 static struct sk_buff *prepare_ack_packet(struct rxe_qp *qp,
-- 
GitLab


From c0cd1d541704c45030cbb2031612fdd68e8e15d5 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Thu, 15 Dec 2022 17:59:12 +0000
Subject: [PATCH 3983/4122] Revert "arm64: errata: Workaround possible
 Cortex-A715 [ESR|FAR]_ELx corruption"

This reverts commit 44ecda71fd8a70185c270f5914ac563827fe1d4c.

All versions of this patch on the mailing list, including the version
that ended up getting merged, have portions of code guarded by the
non-existent CONFIG_ARM64_WORKAROUND_2645198 option. Although Anshuman
says he tested the code with some additional debug changes [1], I'm
hesitant to fix the CONFIG option and light up a bunch of code right
before I (and others) disappear for the end of year holidays, during
which time we won't be around to deal with any fallout.

So revert the change for now. We can bring back a fixed, tested version
for a later -rc when folks are thinking about things other than trees
and turkeys.

[1] https://lore.kernel.org/r/b6f61241-e436-5db1-1053-3b441080b8d6@arm.com
Reported-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Link: https://lore.kernel.org/r/20221215094811.23188-1-lukas.bulwahn@gmail.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 Documentation/arm64/silicon-errata.rst |  2 --
 arch/arm64/Kconfig                     | 16 ----------------
 arch/arm64/include/asm/hugetlb.h       |  9 ---------
 arch/arm64/include/asm/pgtable.h       |  9 ---------
 arch/arm64/kernel/cpu_errata.c         |  7 -------
 arch/arm64/mm/hugetlbpage.c            | 21 ---------------------
 arch/arm64/mm/mmu.c                    | 21 ---------------------
 arch/arm64/tools/cpucaps               |  1 -
 8 files changed, 86 deletions(-)

diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst
index ec5f889d76819..808ade4cc008a 100644
--- a/Documentation/arm64/silicon-errata.rst
+++ b/Documentation/arm64/silicon-errata.rst
@@ -120,8 +120,6 @@ stable kernels.
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-A710     | #2224489        | ARM64_ERRATUM_2224489       |
 +----------------+-----------------+-----------------+-----------------------------+
-| ARM            | Cortex-A715     | #2645198        | ARM64_ERRATUM_2645198       |
-+----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-X2       | #2119858        | ARM64_ERRATUM_2119858       |
 +----------------+-----------------+-----------------+-----------------------------+
 | ARM            | Cortex-X2       | #2224489        | ARM64_ERRATUM_2224489       |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index c6d15dd6cae3e..1ccf7e0653a38 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -971,22 +971,6 @@ config ARM64_ERRATUM_2457168
 
 	  If unsure, say Y.
 
-config ARM64_ERRATUM_2645198
-	bool "Cortex-A715: 2645198: Workaround possible [ESR|FAR]_ELx corruption"
-	default y
-	help
-	  This option adds the workaround for ARM Cortex-A715 erratum 2645198.
-
-	  If a Cortex-A715 cpu sees a page mapping permissions change from executable
-	  to non-executable, it may corrupt the ESR_ELx and FAR_ELx registers on the
-	  next instruction abort caused by permission fault.
-
-	  Only user-space does executable to non-executable permission transition via
-	  mprotect() system call. Workaround the problem by doing a break-before-make
-	  TLB invalidation, for all changes to executable user space mappings.
-
-	  If unsure, say Y.
-
 config CAVIUM_ERRATUM_22375
 	bool "Cavium erratum 22375, 24313"
 	default y
diff --git a/arch/arm64/include/asm/hugetlb.h b/arch/arm64/include/asm/hugetlb.h
index 6a4a1ab8eb238..d20f5da2d76fa 100644
--- a/arch/arm64/include/asm/hugetlb.h
+++ b/arch/arm64/include/asm/hugetlb.h
@@ -49,15 +49,6 @@ extern pte_t huge_ptep_get(pte_t *ptep);
 
 void __init arm64_hugetlb_cma_reserve(void);
 
-#define huge_ptep_modify_prot_start huge_ptep_modify_prot_start
-extern pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep);
-
-#define huge_ptep_modify_prot_commit huge_ptep_modify_prot_commit
-extern void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
-					 unsigned long addr, pte_t *ptep,
-					 pte_t old_pte, pte_t new_pte);
-
 #include <asm-generic/hugetlb.h>
 
 #endif /* __ASM_HUGETLB_H */
diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index 0dd3cd38f54d8..323ee234043f0 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -1095,15 +1095,6 @@ static inline bool pud_sect_supported(void)
 }
 
 
-#define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
-#define ptep_modify_prot_start ptep_modify_prot_start
-extern pte_t ptep_modify_prot_start(struct vm_area_struct *vma,
-				    unsigned long addr, pte_t *ptep);
-
-#define ptep_modify_prot_commit ptep_modify_prot_commit
-extern void ptep_modify_prot_commit(struct vm_area_struct *vma,
-				    unsigned long addr, pte_t *ptep,
-				    pte_t old_pte, pte_t new_pte);
 #endif /* !__ASSEMBLY__ */
 
 #endif /* __ASM_PGTABLE_H */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 307faa2b4395e..89ac00084f38a 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -661,13 +661,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		CAP_MIDR_RANGE_LIST(trbe_write_out_of_range_cpus),
 	},
 #endif
-#ifdef CONFIG_ARM64_ERRATUM_2645198
-	{
-		.desc = "ARM erratum 2645198",
-		.capability = ARM64_WORKAROUND_2645198,
-		ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A715)
-	},
-#endif
 #ifdef CONFIG_ARM64_ERRATUM_2077057
 	{
 		.desc = "ARM erratum 2077057",
diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c
index cd8d96e1fa1ae..35e9a468d13e6 100644
--- a/arch/arm64/mm/hugetlbpage.c
+++ b/arch/arm64/mm/hugetlbpage.c
@@ -559,24 +559,3 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
 {
 	return __hugetlb_valid_size(size);
 }
-
-pte_t huge_ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
-	if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_2645198) &&
-	    cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
-		/*
-		 * Break-before-make (BBM) is required for all user space mappings
-		 * when the permission changes from executable to non-executable
-		 * in cases where cpu is affected with errata #2645198.
-		 */
-		if (pte_user_exec(READ_ONCE(*ptep)))
-			return huge_ptep_clear_flush(vma, addr, ptep);
-	}
-	return huge_ptep_get_and_clear(vma->vm_mm, addr, ptep);
-}
-
-void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
-				  pte_t old_pte, pte_t pte)
-{
-	set_huge_pte_at(vma->vm_mm, addr, ptep, pte);
-}
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 2368e4daa23d3..d386033a074ce 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1702,24 +1702,3 @@ static int __init prevent_bootmem_remove_init(void)
 }
 early_initcall(prevent_bootmem_remove_init);
 #endif
-
-pte_t ptep_modify_prot_start(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep)
-{
-	if (IS_ENABLED(CONFIG_ARM64_WORKAROUND_2645198) &&
-	    cpus_have_const_cap(ARM64_WORKAROUND_2645198)) {
-		/*
-		 * Break-before-make (BBM) is required for all user space mappings
-		 * when the permission changes from executable to non-executable
-		 * in cases where cpu is affected with errata #2645198.
-		 */
-		if (pte_user_exec(READ_ONCE(*ptep)))
-			return ptep_clear_flush(vma, addr, ptep);
-	}
-	return ptep_get_and_clear(vma->vm_mm, addr, ptep);
-}
-
-void ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep,
-			     pte_t old_pte, pte_t pte)
-{
-	set_pte_at(vma->vm_mm, addr, ptep, pte);
-}
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps
index dfeb2c51e2573..a86ee376920a0 100644
--- a/arch/arm64/tools/cpucaps
+++ b/arch/arm64/tools/cpucaps
@@ -71,7 +71,6 @@ WORKAROUND_2038923
 WORKAROUND_2064142
 WORKAROUND_2077057
 WORKAROUND_2457168
-WORKAROUND_2645198
 WORKAROUND_2658417
 WORKAROUND_TRBE_OVERWRITE_FILL_MODE
 WORKAROUND_TSB_FLUSH_FAILURE
-- 
GitLab


From 3f148f3318140035e87decc1214795ff0755757b Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Date: Fri, 28 Oct 2022 00:31:04 +0300
Subject: [PATCH 3984/4122] x86/kasan: Map shadow for percpu pages on demand

KASAN maps shadow for the entire CPU-entry-area:
  [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE]

This will explode once the per-cpu entry areas are randomized since it
will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to
allocate shadow for such big area.

Fix this by allocating KASAN shadow only for really used cpu entry area
addresses mapped by cea_map_percpu_pages()

Thanks to the 0day folks for finding and reporting this to be an issue.

[ dhansen: tweak changelog since this will get committed before peterz's
	   actual cpu-entry-area randomization ]

Signed-off-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Yujie Liu <yujie.liu@intel.com>
Cc: kernel test robot <yujie.liu@intel.com>
Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com
---
 arch/x86/include/asm/kasan.h |  3 +++
 arch/x86/mm/cpu_entry_area.c |  8 +++++++-
 arch/x86/mm/kasan_init_64.c  | 15 ++++++++++++---
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h
index 13e70da38beda..de75306b932ef 100644
--- a/arch/x86/include/asm/kasan.h
+++ b/arch/x86/include/asm/kasan.h
@@ -28,9 +28,12 @@
 #ifdef CONFIG_KASAN
 void __init kasan_early_init(void);
 void __init kasan_init(void);
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid);
 #else
 static inline void kasan_early_init(void) { }
 static inline void kasan_init(void) { }
+static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size,
+						   int nid) { }
 #endif
 
 #endif
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 6c2f1b76a0b61..d7081b1accca6 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -9,6 +9,7 @@
 #include <asm/cpu_entry_area.h>
 #include <asm/fixmap.h>
 #include <asm/desc.h>
+#include <asm/kasan.h>
 
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
 
@@ -53,8 +54,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
 static void __init
 cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
 {
+	phys_addr_t pa = per_cpu_ptr_to_phys(ptr);
+
+	kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE,
+					early_pfn_to_nid(PFN_DOWN(pa)));
+
 	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
-		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
+		cea_set_pte(cea_vaddr, pa, prot);
 }
 
 static void __init percpu_setup_debug_store(unsigned int cpu)
diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index e7b9b464a82f1..d1416926ad526 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -316,6 +316,18 @@ void __init kasan_early_init(void)
 	kasan_map_early_shadow(init_top_pgt);
 }
 
+void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
+{
+	unsigned long shadow_start, shadow_end;
+
+	shadow_start = (unsigned long)kasan_mem_to_shadow(va);
+	shadow_start = round_down(shadow_start, PAGE_SIZE);
+	shadow_end = (unsigned long)kasan_mem_to_shadow(va + size);
+	shadow_end = round_up(shadow_end, PAGE_SIZE);
+
+	kasan_populate_shadow(shadow_start, shadow_end, nid);
+}
+
 void __init kasan_init(void)
 {
 	int i;
@@ -393,9 +405,6 @@ void __init kasan_init(void)
 		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
 		shadow_cpu_entry_begin);
 
-	kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin,
-			      (unsigned long)shadow_cpu_entry_end, 0);
-
 	kasan_populate_early_shadow(shadow_cpu_entry_end,
 			kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
-- 
GitLab


From 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 27 Oct 2022 14:54:41 -0700
Subject: [PATCH 3985/4122] x86/mm: Randomize per-cpu entry area

Seth found that the CPU-entry-area; the piece of per-cpu data that is
mapped into the userspace page-tables for kPTI is not subject to any
randomization -- irrespective of kASLR settings.

On x86_64 a whole P4D (512 GB) of virtual address space is reserved for
this structure, which is plenty large enough to randomize things a
little.

As such, use a straight forward randomization scheme that avoids
duplicates to spread the existing CPUs over the available space.

  [ bp: Fix le build. ]

Reported-by: Seth Jenkins <sethjenkins@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 arch/x86/include/asm/cpu_entry_area.h |  4 ---
 arch/x86/include/asm/pgtable_areas.h  |  8 ++++-
 arch/x86/kernel/hw_breakpoint.c       |  2 +-
 arch/x86/mm/cpu_entry_area.c          | 46 ++++++++++++++++++++++++---
 4 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
index 75efc4c6f0766..462fc34f13176 100644
--- a/arch/x86/include/asm/cpu_entry_area.h
+++ b/arch/x86/include/asm/cpu_entry_area.h
@@ -130,10 +130,6 @@ struct cpu_entry_area {
 };
 
 #define CPU_ENTRY_AREA_SIZE		(sizeof(struct cpu_entry_area))
-#define CPU_ENTRY_AREA_ARRAY_SIZE	(CPU_ENTRY_AREA_SIZE * NR_CPUS)
-
-/* Total size includes the readonly IDT mapping page as well: */
-#define CPU_ENTRY_AREA_TOTAL_SIZE	(CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
 
 DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
 DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h
index d34cce1b995cf..4f056fb88174b 100644
--- a/arch/x86/include/asm/pgtable_areas.h
+++ b/arch/x86/include/asm/pgtable_areas.h
@@ -11,6 +11,12 @@
 
 #define CPU_ENTRY_AREA_RO_IDT_VADDR	((void *)CPU_ENTRY_AREA_RO_IDT)
 
-#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
+#ifdef CONFIG_X86_32
+#define CPU_ENTRY_AREA_MAP_SIZE		(CPU_ENTRY_AREA_PER_CPU +		\
+					 (CPU_ENTRY_AREA_SIZE * NR_CPUS) -	\
+					 CPU_ENTRY_AREA_BASE)
+#else
+#define CPU_ENTRY_AREA_MAP_SIZE		P4D_SIZE
+#endif
 
 #endif /* _ASM_X86_PGTABLE_AREAS_H */
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index 668a4a6533d92..bbb0f737aab19 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end)
 
 	/* CPU entry erea is always used for CPU entry */
 	if (within_area(addr, end, CPU_ENTRY_AREA_BASE,
-			CPU_ENTRY_AREA_TOTAL_SIZE))
+			CPU_ENTRY_AREA_MAP_SIZE))
 		return true;
 
 	/*
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index d7081b1accca6..dff9001e5e125 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -16,16 +16,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage)
 #ifdef CONFIG_X86_64
 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
 DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
-#endif
 
-#ifdef CONFIG_X86_32
+static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset);
+
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+	return per_cpu(_cea_offset, cpu);
+}
+
+static __init void init_cea_offsets(void)
+{
+	unsigned int max_cea;
+	unsigned int i, j;
+
+	max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE;
+
+	/* O(sodding terrible) */
+	for_each_possible_cpu(i) {
+		unsigned int cea;
+
+again:
+		cea = prandom_u32_max(max_cea);
+
+		for_each_possible_cpu(j) {
+			if (cea_offset(j) == cea)
+				goto again;
+
+			if (i == j)
+				break;
+		}
+
+		per_cpu(_cea_offset, i) = cea;
+	}
+}
+#else /* !X86_64 */
 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack);
+
+static __always_inline unsigned int cea_offset(unsigned int cpu)
+{
+	return cpu;
+}
+static inline void init_cea_offsets(void) { }
 #endif
 
 /* Is called from entry code, so must be noinstr */
 noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu)
 {
-	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE;
+	unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE;
 	BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0);
 
 	return (struct cpu_entry_area *) va;
@@ -211,7 +248,6 @@ static __init void setup_cpu_entry_area_ptes(void)
 
 	/* The +1 is for the readonly IDT: */
 	BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
-	BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
 	BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
 
 	start = CPU_ENTRY_AREA_BASE;
@@ -227,6 +263,8 @@ void __init setup_cpu_entry_areas(void)
 {
 	unsigned int cpu;
 
+	init_cea_offsets();
+
 	setup_cpu_entry_area_ptes();
 
 	for_each_possible_cpu(cpu)
-- 
GitLab


From af80602799681c78f14fbe20b6185a56020dedee Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Oct 2022 21:38:18 +0200
Subject: [PATCH 3986/4122] mm: Move mm_cachep initialization to mm_init()

In order to allow using mm_alloc() much earlier, move initializing
mm_cachep into mm_init().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221025201057.751153381@infradead.org
---
 include/linux/sched/task.h |  1 +
 init/main.c                |  1 +
 kernel/fork.c              | 32 ++++++++++++++++++--------------
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index d6c48163c6def..8431558641a42 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -65,6 +65,7 @@ extern void sched_dead(struct task_struct *p);
 void __noreturn do_task_dead(void);
 void __noreturn make_task_dead(int signr);
 
+extern void mm_cache_init(void);
 extern void proc_caches_init(void);
 
 extern void fork_init(void);
diff --git a/init/main.c b/init/main.c
index aa21add5f7c54..f1d1a549e1ccb 100644
--- a/init/main.c
+++ b/init/main.c
@@ -860,6 +860,7 @@ static void __init mm_init(void)
 	/* Should be run after espfix64 is set up. */
 	pti_init();
 	kmsan_init_runtime();
+	mm_cache_init();
 }
 
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
diff --git a/kernel/fork.c b/kernel/fork.c
index 08969f5aa38d5..451ce8063f850 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -3015,10 +3015,27 @@ static void sighand_ctor(void *data)
 	init_waitqueue_head(&sighand->signalfd_wqh);
 }
 
-void __init proc_caches_init(void)
+void __init mm_cache_init(void)
 {
 	unsigned int mm_size;
 
+	/*
+	 * The mm_cpumask is located at the end of mm_struct, and is
+	 * dynamically sized based on the maximum CPU number this system
+	 * can have, taking hotplug into account (nr_cpu_ids).
+	 */
+	mm_size = sizeof(struct mm_struct) + cpumask_size();
+
+	mm_cachep = kmem_cache_create_usercopy("mm_struct",
+			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
+			offsetof(struct mm_struct, saved_auxv),
+			sizeof_field(struct mm_struct, saved_auxv),
+			NULL);
+}
+
+void __init proc_caches_init(void)
+{
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU|
@@ -3036,19 +3053,6 @@ void __init proc_caches_init(void)
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
 			NULL);
 
-	/*
-	 * The mm_cpumask is located at the end of mm_struct, and is
-	 * dynamically sized based on the maximum CPU number this system
-	 * can have, taking hotplug into account (nr_cpu_ids).
-	 */
-	mm_size = sizeof(struct mm_struct) + cpumask_size();
-
-	mm_cachep = kmem_cache_create_usercopy("mm_struct",
-			mm_size, ARCH_MIN_MMSTRUCT_ALIGN,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT,
-			offsetof(struct mm_struct, saved_auxv),
-			sizeof_field(struct mm_struct, saved_auxv),
-			NULL);
 	vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT);
 	mmap_init();
 	nsproxy_cache_init();
-- 
GitLab


From 3f4c8211d982099be693be9aa7d6fc4607dff290 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Oct 2022 21:38:21 +0200
Subject: [PATCH 3987/4122] x86/mm: Use mm_alloc() in poking_init()

Instead of duplicating init_mm, allocate a fresh mm. The advantage is
that mm_alloc() has much simpler dependencies. Additionally it makes
more conceptual sense, init_mm has no (and must not have) user state
to duplicate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221025201057.816175235@infradead.org
---
 arch/x86/mm/init.c         | 2 +-
 include/linux/sched/task.h | 1 -
 kernel/fork.c              | 5 -----
 3 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 9121bc1b9453a..d3987359d4414 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -801,7 +801,7 @@ void __init poking_init(void)
 	spinlock_t *ptl;
 	pte_t *ptep;
 
-	poking_mm = copy_init_mm();
+	poking_mm = mm_alloc();
 	BUG_ON(!poking_mm);
 
 	/*
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 8431558641a42..357e0068497c1 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -91,7 +91,6 @@ extern void exit_itimers(struct task_struct *);
 extern pid_t kernel_clone(struct kernel_clone_args *kargs);
 struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
 struct task_struct *fork_idle(int);
-struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags);
 extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
diff --git a/kernel/fork.c b/kernel/fork.c
index 451ce8063f850..6142c588c18ae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2592,11 +2592,6 @@ struct task_struct * __init fork_idle(int cpu)
 	return task;
 }
 
-struct mm_struct *copy_init_mm(void)
-{
-	return dup_mm(NULL, &init_mm);
-}
-
 /*
  * This is like kernel_clone(), but shaved down and tailored to just
  * creating io_uring workers. It returns a created task, or an error pointer.
-- 
GitLab


From 5b93a83649c7cba3a15eb7e8959b250841acb1b1 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Oct 2022 21:38:25 +0200
Subject: [PATCH 3988/4122] x86/mm: Initialize text poking earlier

Move poking_init() up a bunch; specifically move it right after
mm_init() which is right before ftrace_init().

This will allow simplifying ftrace text poking which currently has
a bunch of exceptions for early boot.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221025201057.881703081@infradead.org
---
 init/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/init/main.c b/init/main.c
index f1d1a549e1ccb..5372ea2437352 100644
--- a/init/main.c
+++ b/init/main.c
@@ -996,7 +996,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 	sort_main_extable();
 	trap_init();
 	mm_init();
-
+	poking_init();
 	ftrace_init();
 
 	/* trace_printk can be enabled here */
@@ -1135,7 +1135,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 	taskstats_init_early();
 	delayacct_init();
 
-	poking_init();
 	check_bugs();
 
 	acpi_subsystem_init();
-- 
GitLab


From eb7d389d5b2b3c453332abc41c3eea73290cc006 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Oct 2022 21:39:47 +0200
Subject: [PATCH 3989/4122] x86/ftrace: Remove SYSTEM_BOOTING exceptions

Now that text_poke is available before ftrace, remove the
SYSTEM_BOOTING exceptions.

Specifically, this cures a W+X case during boot.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221025201057.945960823@infradead.org
---
 arch/x86/kernel/alternative.c | 10 ----------
 arch/x86/kernel/ftrace.c      |  3 +--
 2 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 5cadcea035e04..e240351e0bc12 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1681,11 +1681,6 @@ void __ref text_poke_queue(void *addr, const void *opcode, size_t len, const voi
 {
 	struct text_poke_loc *tp;
 
-	if (unlikely(system_state == SYSTEM_BOOTING)) {
-		text_poke_early(addr, opcode, len);
-		return;
-	}
-
 	text_poke_flush(addr);
 
 	tp = &tp_vec[tp_vec_nr++];
@@ -1707,11 +1702,6 @@ void __ref text_poke_bp(void *addr, const void *opcode, size_t len, const void *
 {
 	struct text_poke_loc tp;
 
-	if (unlikely(system_state == SYSTEM_BOOTING)) {
-		text_poke_early(addr, opcode, len);
-		return;
-	}
-
 	text_poke_loc_init(&tp, addr, opcode, len, emulate);
 	text_poke_bp_batch(&tp, 1);
 }
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index bd165004776d9..43628b8480fa1 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -415,8 +415,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	set_vm_flush_reset_perms(trampoline);
 
-	if (likely(system_state != SYSTEM_BOOTING))
-		set_memory_ro((unsigned long)trampoline, npages);
+	set_memory_ro((unsigned long)trampoline, npages);
 	set_memory_x((unsigned long)trampoline, npages);
 	return (unsigned long)trampoline;
 fail:
-- 
GitLab


From 414ebf148cb5c5fa727ec51fdb69c4ab82dccf3b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Oct 2022 21:39:43 +0200
Subject: [PATCH 3990/4122] x86/mm: Do verify W^X at boot up

Straight up revert of commit:

  a970174d7a10 ("x86/mm: Do not verify W^X at boot up")

now that the root cause has been fixed.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221025201058.011279208@infradead.org
---
 arch/x86/mm/pat/set_memory.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 2e5a045731dec..97342c42dda8e 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -587,10 +587,6 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
 {
 	unsigned long end;
 
-	/* Kernel text is rw at boot up */
-	if (system_state == SYSTEM_BOOTING)
-		return new;
-
 	/*
 	 * 32-bit has some unfixable W+X issues, like EFI code
 	 * and writeable data being in the same page.  Disable
-- 
GitLab


From d48567c9a0d1e605639f8a8705a61bbb55fb4e84 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 26 Oct 2022 12:13:03 +0200
Subject: [PATCH 3991/4122] mm: Introduce set_memory_rox()

Because endlessly repeating:

	set_memory_ro()
	set_memory_x()

is getting tedious.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/Y1jek64pXOsougmz@hirez.programming.kicks-ass.net
---
 arch/arm/mach-omap1/sram-init.c | 8 +++-----
 arch/arm/mach-omap2/sram.c      | 8 +++-----
 arch/powerpc/kernel/kprobes.c   | 9 ++++-----
 arch/x86/kernel/ftrace.c        | 5 ++---
 arch/x86/kernel/kprobes/core.c  | 9 ++-------
 drivers/misc/sram-exec.c        | 7 ++-----
 include/linux/filter.h          | 3 +--
 include/linux/set_memory.h      | 8 ++++++++
 kernel/bpf/bpf_struct_ops.c     | 3 +--
 kernel/bpf/core.c               | 6 ++----
 kernel/bpf/trampoline.c         | 3 +--
 net/bpf/bpf_dummy_struct_ops.c  | 3 +--
 12 files changed, 30 insertions(+), 42 deletions(-)

diff --git a/arch/arm/mach-omap1/sram-init.c b/arch/arm/mach-omap1/sram-init.c
index 27c42e2a21cc1..dabf0c4defebf 100644
--- a/arch/arm/mach-omap1/sram-init.c
+++ b/arch/arm/mach-omap1/sram-init.c
@@ -10,11 +10,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/set_memory.h>
 
 #include <asm/fncpy.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 
 #include <asm/mach/map.h>
 
@@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
 
 	dst = fncpy(sram, funcp, size);
 
-	set_memory_ro(base, pages);
-	set_memory_x(base, pages);
+	set_memory_rox(base, pages);
 
 	return dst;
 }
@@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void)
 	base = (unsigned long)omap_sram_base;
 	pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
 
-	set_memory_ro(base, pages);
-	set_memory_x(base, pages);
+	set_memory_rox(base, pages);
 }
 
 static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl);
diff --git a/arch/arm/mach-omap2/sram.c b/arch/arm/mach-omap2/sram.c
index 39cf270da718a..815d390109d21 100644
--- a/arch/arm/mach-omap2/sram.c
+++ b/arch/arm/mach-omap2/sram.c
@@ -14,11 +14,11 @@
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
+#include <linux/set_memory.h>
 
 #include <asm/fncpy.h>
 #include <asm/tlb.h>
 #include <asm/cacheflush.h>
-#include <asm/set_memory.h>
 
 #include <asm/mach/map.h>
 
@@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size)
 
 	dst = fncpy(sram, funcp, size);
 
-	set_memory_ro(base, pages);
-	set_memory_x(base, pages);
+	set_memory_rox(base, pages);
 
 	return dst;
 }
@@ -217,8 +216,7 @@ static void __init omap2_map_sram(void)
 	base = (unsigned long)omap_sram_base;
 	pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE;
 
-	set_memory_ro(base, pages);
-	set_memory_x(base, pages);
+	set_memory_rox(base, pages);
 }
 
 static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl,
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index bd7b1a0354594..7a89de3026097 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -20,12 +20,12 @@
 #include <linux/kdebug.h>
 #include <linux/slab.h>
 #include <linux/moduleloader.h>
+#include <linux/set_memory.h>
 #include <asm/code-patching.h>
 #include <asm/cacheflush.h>
 #include <asm/sstep.h>
 #include <asm/sections.h>
 #include <asm/inst.h>
-#include <asm/set_memory.h>
 #include <linux/uaccess.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
@@ -134,10 +134,9 @@ void *alloc_insn_page(void)
 	if (!page)
 		return NULL;
 
-	if (strict_module_rwx_enabled()) {
-		set_memory_ro((unsigned long)page, 1);
-		set_memory_x((unsigned long)page, 1);
-	}
+	if (strict_module_rwx_enabled())
+		set_memory_rox((unsigned long)page, 1);
+
 	return page;
 }
 
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 43628b8480fa1..03579460d0ec4 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -24,10 +24,10 @@
 #include <linux/module.h>
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
+#include <linux/set_memory.h>
 
 #include <trace/syscall.h>
 
-#include <asm/set_memory.h>
 #include <asm/kprobes.h>
 #include <asm/ftrace.h>
 #include <asm/nops.h>
@@ -415,8 +415,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 
 	set_vm_flush_reset_perms(trampoline);
 
-	set_memory_ro((unsigned long)trampoline, npages);
-	set_memory_x((unsigned long)trampoline, npages);
+	set_memory_rox((unsigned long)trampoline, npages);
 	return (unsigned long)trampoline;
 fail:
 	tramp_free(trampoline);
diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
index eb8bc82846b99..e7b7ca64acdfa 100644
--- a/arch/x86/kernel/kprobes/core.c
+++ b/arch/x86/kernel/kprobes/core.c
@@ -43,6 +43,7 @@
 #include <linux/objtool.h>
 #include <linux/vmalloc.h>
 #include <linux/pgtable.h>
+#include <linux/set_memory.h>
 
 #include <asm/text-patching.h>
 #include <asm/cacheflush.h>
@@ -51,7 +52,6 @@
 #include <asm/alternative.h>
 #include <asm/insn.h>
 #include <asm/debugreg.h>
-#include <asm/set_memory.h>
 #include <asm/ibt.h>
 
 #include "common.h"
@@ -415,17 +415,12 @@ void *alloc_insn_page(void)
 		return NULL;
 
 	set_vm_flush_reset_perms(page);
-	/*
-	 * First make the page read-only, and only then make it executable to
-	 * prevent it from being W+X in between.
-	 */
-	set_memory_ro((unsigned long)page, 1);
 
 	/*
 	 * TODO: Once additional kernel code protection mechanisms are set, ensure
 	 * that the page was not maliciously altered and it is still zeroed.
 	 */
-	set_memory_x((unsigned long)page, 1);
+	set_memory_rox((unsigned long)page, 1);
 
 	return page;
 }
diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c
index a948e95d43754..b71dbbd737382 100644
--- a/drivers/misc/sram-exec.c
+++ b/drivers/misc/sram-exec.c
@@ -10,9 +10,9 @@
 #include <linux/genalloc.h>
 #include <linux/mm.h>
 #include <linux/sram.h>
+#include <linux/set_memory.h>
 
 #include <asm/fncpy.h>
-#include <asm/set_memory.h>
 
 #include "sram.h"
 
@@ -106,10 +106,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src,
 
 	dst_cpy = fncpy(dst, src, size);
 
-	ret = set_memory_ro((unsigned long)base, pages);
-	if (ret)
-		goto error_out;
-	ret = set_memory_x((unsigned long)base, pages);
+	ret = set_memory_rox((unsigned long)base, pages);
 	if (ret)
 		goto error_out;
 
diff --git a/include/linux/filter.h b/include/linux/filter.h
index efc42a6e3aed0..f0b17aff4e66b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
 	set_vm_flush_reset_perms(hdr);
-	set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
-	set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
+	set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT);
 }
 
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 369769ce7399d..023ebc67a36cd 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -14,6 +14,14 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+static inline int set_memory_rox(unsigned long addr, int numpages)
+{
+	int ret = set_memory_ro(addr, numpages);
+	if (ret)
+		return ret;
+	return set_memory_x(addr, numpages);
+}
+
 #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
 static inline int set_direct_map_invalid_noflush(struct page *page)
 {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 84b2d9dba79a7..ece9870cab68e 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 	refcount_set(&kvalue->refcnt, 1);
 	bpf_map_inc(map);
 
-	set_memory_ro((long)st_map->image, 1);
-	set_memory_x((long)st_map->image, 1);
+	set_memory_rox((long)st_map->image, 1);
 	err = st_ops->reg(kdata);
 	if (likely(!err)) {
 		/* Pair with smp_load_acquire() during lookup_elem().
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 25a54e04560e5..b0525ea252721 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -864,8 +864,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins
 	list_add_tail(&pack->list, &pack_list);
 
 	set_vm_flush_reset_perms(pack->ptr);
-	set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
-	set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
+	set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE);
 	return pack;
 }
 
@@ -883,8 +882,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns)
 		if (ptr) {
 			bpf_fill_ill_insns(ptr, size);
 			set_vm_flush_reset_perms(ptr);
-			set_memory_ro((unsigned long)ptr, size / PAGE_SIZE);
-			set_memory_x((unsigned long)ptr, size / PAGE_SIZE);
+			set_memory_rox((unsigned long)ptr, size / PAGE_SIZE);
 		}
 		goto out;
 	}
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index bf0906e1e2b97..a848922ea6f77 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -468,8 +468,7 @@ again:
 	if (err < 0)
 		goto out;
 
-	set_memory_ro((long)im->image, 1);
-	set_memory_x((long)im->image, 1);
+	set_memory_rox((long)im->image, 1);
 
 	WARN_ON(tr->cur_image && tr->selector == 0);
 	WARN_ON(!tr->cur_image && tr->selector);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index e78dadfc58290..9ff32324251be 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -124,8 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr,
 	if (err < 0)
 		goto out;
 
-	set_memory_ro((long)image, 1);
-	set_memory_x((long)image, 1);
+	set_memory_rox((long)image, 1);
 	prog_ret = dummy_ops_call_op(image, args);
 
 	err = dummy_ops_copy_args(args);
-- 
GitLab


From 60463628c9e0a8060ac6bef0457b0505c7532c7c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 29 Oct 2022 13:19:31 +0200
Subject: [PATCH 3992/4122] x86/mm: Implement native set_memory_rox()

Provide a native implementation of set_memory_rox(), avoiding the
double set_memory_ro();set_memory_x(); calls.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 arch/x86/include/asm/set_memory.h |  3 +++
 arch/x86/mm/pat/set_memory.c      | 10 ++++++++++
 include/linux/set_memory.h        |  2 ++
 3 files changed, 15 insertions(+)

diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h
index b45c4d27fd46e..a5e89641bd2da 100644
--- a/arch/x86/include/asm/set_memory.h
+++ b/arch/x86/include/asm/set_memory.h
@@ -6,6 +6,9 @@
 #include <asm/page.h>
 #include <asm-generic/set_memory.h>
 
+#define set_memory_rox set_memory_rox
+int set_memory_rox(unsigned long addr, int numpages);
+
 /*
  * The set_memory_* API can be used to change various attributes of a virtual
  * address range. The attributes include:
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 97342c42dda8e..f275605892df9 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -2025,6 +2025,16 @@ int set_memory_ro(unsigned long addr, int numpages)
 	return change_page_attr_clear(&addr, numpages, __pgprot(_PAGE_RW), 0);
 }
 
+int set_memory_rox(unsigned long addr, int numpages)
+{
+	pgprot_t clr = __pgprot(_PAGE_RW);
+
+	if (__supported_pte_mask & _PAGE_NX)
+		clr.pgprot |= _PAGE_NX;
+
+	return change_page_attr_clear(&addr, numpages, clr, 0);
+}
+
 int set_memory_rw(unsigned long addr, int numpages)
 {
 	return change_page_attr_set(&addr, numpages, __pgprot(_PAGE_RW), 0);
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 023ebc67a36cd..95ac8398ee72d 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -14,6 +14,7 @@ static inline int set_memory_x(unsigned long addr,  int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
+#ifndef set_memory_rox
 static inline int set_memory_rox(unsigned long addr, int numpages)
 {
 	int ret = set_memory_ro(addr, numpages);
@@ -21,6 +22,7 @@ static inline int set_memory_rox(unsigned long addr, int numpages)
 		return ret;
 	return set_memory_x(addr, numpages);
 }
+#endif
 
 #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP
 static inline int set_direct_map_invalid_noflush(struct page *page)
-- 
GitLab


From 93b3037a1482758349f3b0431406bcc457ca1cbc Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 14:04:46 +0100
Subject: [PATCH 3993/4122] mm: Update ptep_get_lockless()'s comment

Improve the comment.

Suggested-by: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.515572025%40infradead.org
---
 include/linux/pgtable.h | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a108b60a6962b..c0b29000c3c0c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -300,15 +300,12 @@ static inline pte_t ptep_get(pte_t *ptep)
 
 #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
 /*
- * WARNING: only to be used in the get_user_pages_fast() implementation.
- *
- * With get_user_pages_fast(), we walk down the pagetables without taking any
- * locks.  For this we would like to load the pointers atomically, but sometimes
- * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE).  What
- * we do have is the guarantee that a PTE will only either go from not present
- * to present, or present to not present or both -- it will not switch to a
- * completely different present page without a TLB flush in between; something
- * that we are blocking by holding interrupts off.
+ * For walking the pagetables without holding any locks.  Some architectures
+ * (eg x86-32 PAE) cannot load the entries atomically without using expensive
+ * instructions.  We are guaranteed that a PTE will only either go from not
+ * present to present, or present to not present -- it will not switch to a
+ * completely different present page without a TLB flush inbetween; which we
+ * are blocking by holding interrupts off.
  *
  * Setting ptes from not present to present goes:
  *
-- 
GitLab


From fbfdec9989e69e0b17aa3bf32fcb22d04cc33301 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:02:29 +0100
Subject: [PATCH 3994/4122] x86/mm/pae: Make pmd_t similar to pte_t

Instead of mucking about with at least 2 different ways of fudging
it, do the same thing we do for pte_t.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.580310787%40infradead.org
---
 arch/x86/include/asm/pgtable-3level.h       | 42 +++++++--------------
 arch/x86/include/asm/pgtable-3level_types.h |  7 ++++
 arch/x86/include/asm/pgtable_64_types.h     |  1 +
 arch/x86/include/asm/pgtable_types.h        |  4 +-
 4 files changed, 23 insertions(+), 31 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 28421a8872093..28556d22feb89 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -87,7 +87,7 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 		ret |= ((pmdval_t)*(tmp + 1)) << 32;
 	}
 
-	return (pmd_t) { ret };
+	return (pmd_t) { .pmd = ret };
 }
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -121,12 +121,11 @@ static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 	ptep->pte_high = 0;
 }
 
-static inline void native_pmd_clear(pmd_t *pmd)
+static inline void native_pmd_clear(pmd_t *pmdp)
 {
-	u32 *tmp = (u32 *)pmd;
-	*tmp = 0;
+	pmdp->pmd_low = 0;
 	smp_wmb();
-	*(tmp + 1) = 0;
+	pmdp->pmd_high = 0;
 }
 
 static inline void native_pud_clear(pud_t *pudp)
@@ -162,25 +161,17 @@ static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
 #endif
 
-union split_pmd {
-	struct {
-		u32 pmd_low;
-		u32 pmd_high;
-	};
-	pmd_t pmd;
-};
-
 #ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 {
-	union split_pmd res, *orig = (union split_pmd *)pmdp;
+	pmd_t res;
 
 	/* xchg acts as a barrier before setting of the high bits */
-	res.pmd_low = xchg(&orig->pmd_low, 0);
-	res.pmd_high = orig->pmd_high;
-	orig->pmd_high = 0;
+	res.pmd_low = xchg(&pmdp->pmd_low, 0);
+	res.pmd_high = READ_ONCE(pmdp->pmd_high);
+	WRITE_ONCE(pmdp->pmd_high, 0);
 
-	return res.pmd;
+	return res;
 }
 #else
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
@@ -199,17 +190,12 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 	 * anybody.
 	 */
 	if (!(pmd_val(pmd) & _PAGE_PRESENT)) {
-		union split_pmd old, new, *ptr;
-
-		ptr = (union split_pmd *)pmdp;
-
-		new.pmd = pmd;
-
 		/* xchg acts as a barrier before setting of the high bits */
-		old.pmd_low = xchg(&ptr->pmd_low, new.pmd_low);
-		old.pmd_high = ptr->pmd_high;
-		ptr->pmd_high = new.pmd_high;
-		return old.pmd;
+		old.pmd_low = xchg(&pmdp->pmd_low, pmd.pmd_low);
+		old.pmd_high = READ_ONCE(pmdp->pmd_high);
+		WRITE_ONCE(pmdp->pmd_high, pmd.pmd_high);
+
+		return old;
 	}
 
 	do {
diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h
index 56baf43befb49..80911349519e8 100644
--- a/arch/x86/include/asm/pgtable-3level_types.h
+++ b/arch/x86/include/asm/pgtable-3level_types.h
@@ -18,6 +18,13 @@ typedef union {
 	};
 	pteval_t pte;
 } pte_t;
+
+typedef union {
+	struct {
+		unsigned long pmd_low, pmd_high;
+	};
+	pmdval_t pmd;
+} pmd_t;
 #endif	/* !__ASSEMBLY__ */
 
 #define SHARED_KERNEL_PMD	(!static_cpu_has(X86_FEATURE_PTI))
diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h
index 04f36063ad546..38bf837e35544 100644
--- a/arch/x86/include/asm/pgtable_64_types.h
+++ b/arch/x86/include/asm/pgtable_64_types.h
@@ -19,6 +19,7 @@ typedef unsigned long	pgdval_t;
 typedef unsigned long	pgprotval_t;
 
 typedef struct { pteval_t pte; } pte_t;
+typedef struct { pmdval_t pmd; } pmd_t;
 
 #ifdef CONFIG_X86_5LEVEL
 extern unsigned int __pgtable_l5_enabled;
diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index aa174fed3a71c..447d4bee25c48 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -361,11 +361,9 @@ static inline pudval_t native_pud_val(pud_t pud)
 #endif
 
 #if CONFIG_PGTABLE_LEVELS > 2
-typedef struct { pmdval_t pmd; } pmd_t;
-
 static inline pmd_t native_make_pmd(pmdval_t val)
 {
-	return (pmd_t) { val };
+	return (pmd_t) { .pmd = val };
 }
 
 static inline pmdval_t native_pmd_val(pmd_t pmd)
-- 
GitLab


From 0862ff059c9e29f023e617b134f9ea332cae50b8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:05:55 +0100
Subject: [PATCH 3995/4122] sh/mm: Make pmd_t similar to pte_t

Just like 64bit pte_t, have a low/high split in pmd_t.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.645657294%40infradead.org
---
 arch/sh/include/asm/pgtable-3level.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h
index cdced80a7ffa3..a889a3a938bab 100644
--- a/arch/sh/include/asm/pgtable-3level.h
+++ b/arch/sh/include/asm/pgtable-3level.h
@@ -28,9 +28,15 @@
 #define pmd_ERROR(e) \
 	printk("%s:%d: bad pmd %016llx.\n", __FILE__, __LINE__, pmd_val(e))
 
-typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct {
+	struct {
+		unsigned long pmd_low;
+		unsigned long pmd_high;
+	};
+	unsigned long long pmd;
+} pmd_t;
 #define pmd_val(x)	((x).pmd)
-#define __pmd(x)	((pmd_t) { (x) } )
+#define __pmd(x)	((pmd_t) { .pmd = (x) } )
 
 static inline pmd_t *pud_pgtable(pud_t pud)
 {
-- 
GitLab


From 024d232ae4fcd7a7ce8ea239607d6c1246d7adc8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:16:22 +0100
Subject: [PATCH 3996/4122] mm: Fix pmd_read_atomic()

AFAICT there's no reason to do anything different than what we do for
PTEs. Make it so (also affects SH).

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.711181252%40infradead.org
---
 arch/x86/include/asm/pgtable-3level.h | 56 ---------------------------
 include/linux/pgtable.h               | 47 +++++++++++++++++-----
 2 files changed, 37 insertions(+), 66 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 28556d22feb89..94f50b0100a51 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -34,62 +34,6 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 	ptep->pte_low = pte.pte_low;
 }
 
-#define pmd_read_atomic pmd_read_atomic
-/*
- * pte_offset_map_lock() on 32-bit PAE kernels was reading the pmd_t with
- * a "*pmdp" dereference done by GCC. Problem is, in certain places
- * where pte_offset_map_lock() is called, concurrent page faults are
- * allowed, if the mmap_lock is hold for reading. An example is mincore
- * vs page faults vs MADV_DONTNEED. On the page fault side
- * pmd_populate() rightfully does a set_64bit(), but if we're reading the
- * pmd_t with a "*pmdp" on the mincore side, a SMP race can happen
- * because GCC will not read the 64-bit value of the pmd atomically.
- *
- * To fix this all places running pte_offset_map_lock() while holding the
- * mmap_lock in read mode, shall read the pmdp pointer using this
- * function to know if the pmd is null or not, and in turn to know if
- * they can run pte_offset_map_lock() or pmd_trans_huge() or other pmd
- * operations.
- *
- * Without THP if the mmap_lock is held for reading, the pmd can only
- * transition from null to not null while pmd_read_atomic() runs. So
- * we can always return atomic pmd values with this function.
- *
- * With THP if the mmap_lock is held for reading, the pmd can become
- * trans_huge or none or point to a pte (and in turn become "stable")
- * at any time under pmd_read_atomic(). We could read it truly
- * atomically here with an atomic64_read() for the THP enabled case (and
- * it would be a whole lot simpler), but to avoid using cmpxchg8b we
- * only return an atomic pmdval if the low part of the pmdval is later
- * found to be stable (i.e. pointing to a pte). We are also returning a
- * 'none' (zero) pmdval if the low part of the pmd is zero.
- *
- * In some cases the high and low part of the pmdval returned may not be
- * consistent if THP is enabled (the low part may point to previously
- * mapped hugepage, while the high part may point to a more recently
- * mapped hugepage), but pmd_none_or_trans_huge_or_clear_bad() only
- * needs the low part of the pmd to be read atomically to decide if the
- * pmd is unstable or not, with the only exception when the low part
- * of the pmd is zero, in which case we return a 'none' pmd.
- */
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
-	pmdval_t ret;
-	u32 *tmp = (u32 *)pmdp;
-
-	ret = (pmdval_t) (*tmp);
-	if (ret) {
-		/*
-		 * If the low part is null, we must not read the high part
-		 * or we can end up with a partial pmd.
-		 */
-		smp_rmb();
-		ret |= ((pmdval_t)*(tmp + 1)) << 32;
-	}
-
-	return (pmd_t) { .pmd = ret };
-}
-
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
 	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index c0b29000c3c0c..765fd4bf420f8 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -298,6 +298,13 @@ static inline pte_t ptep_get(pte_t *ptep)
 }
 #endif
 
+#ifndef __HAVE_ARCH_PMDP_GET
+static inline pmd_t pmdp_get(pmd_t *pmdp)
+{
+	return READ_ONCE(*pmdp);
+}
+#endif
+
 #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
 /*
  * For walking the pagetables without holding any locks.  Some architectures
@@ -340,15 +347,42 @@ static inline pte_t ptep_get_lockless(pte_t *ptep)
 
 	return pte;
 }
-#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+#define ptep_get_lockless ptep_get_lockless
+
+#if CONFIG_PGTABLE_LEVELS > 2
+static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
+{
+	pmd_t pmd;
+
+	do {
+		pmd.pmd_low = pmdp->pmd_low;
+		smp_rmb();
+		pmd.pmd_high = pmdp->pmd_high;
+		smp_rmb();
+	} while (unlikely(pmd.pmd_low != pmdp->pmd_low));
+
+	return pmd;
+}
+#define pmdp_get_lockless pmdp_get_lockless
+#endif /* CONFIG_PGTABLE_LEVELS > 2 */
+#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+
 /*
  * We require that the PTE can be read atomically.
  */
+#ifndef ptep_get_lockless
 static inline pte_t ptep_get_lockless(pte_t *ptep)
 {
 	return ptep_get(ptep);
 }
-#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+#endif
+
+#ifndef pmdp_get_lockless
+static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
+{
+	return pmdp_get(pmdp);
+}
+#endif
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 #ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
@@ -1318,17 +1352,10 @@ static inline int pud_trans_unstable(pud_t *pud)
 #endif
 }
 
-#ifndef pmd_read_atomic
 static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
 {
-	/*
-	 * Depend on compiler for an atomic pmd read. NOTE: this is
-	 * only going to work, if the pmdval_t isn't larger than
-	 * an unsigned long.
-	 */
-	return *pmdp;
+	return pmdp_get_lockless(pmdp);
 }
-#endif
 
 #ifndef arch_needs_pgtable_deposit
 #define arch_needs_pgtable_deposit() (false)
-- 
GitLab


From 6ca297d4784625de7b041e8451780643cf5751a4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Oct 2022 14:51:44 +0200
Subject: [PATCH 3997/4122] mm: Rename GUP_GET_PTE_LOW_HIGH

Since it no longer applies to only PTEs, rename it to PXX.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.776404066%40infradead.org
---
 arch/mips/Kconfig       | 2 +-
 arch/sh/Kconfig         | 2 +-
 arch/x86/Kconfig        | 2 +-
 include/linux/pgtable.h | 4 ++--
 mm/Kconfig              | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index b26b77673c2cc..15cb692b0a097 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -46,7 +46,7 @@ config MIPS
 	select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
 	select GENERIC_SMP_IDLE_THREAD
 	select GENERIC_TIME_VSYSCALL
-	select GUP_GET_PTE_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
+	select GUP_GET_PXX_LOW_HIGH if CPU_MIPS32 && PHYS_ADDR_T_64BIT
 	select HAVE_ARCH_COMPILER_H
 	select HAVE_ARCH_JUMP_LABEL
 	select HAVE_ARCH_KGDB if MIPS_FP_SUPPORT
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 5f220e903e5ab..0665ac0add0b4 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -24,7 +24,7 @@ config SUPERH
 	select GENERIC_PCI_IOMAP if PCI
 	select GENERIC_SCHED_CLOCK
 	select GENERIC_SMP_IDLE_THREAD
-	select GUP_GET_PTE_LOW_HIGH if X2TLB
+	select GUP_GET_PXX_LOW_HIGH if X2TLB
 	select HAVE_ARCH_AUDITSYSCALL
 	select HAVE_ARCH_KGDB
 	select HAVE_ARCH_SECCOMP_FILTER
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 67745ceab0dbc..bb1f326ca7280 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -157,7 +157,7 @@ config X86
 	select GENERIC_TIME_VSYSCALL
 	select GENERIC_GETTIMEOFDAY
 	select GENERIC_VDSO_TIME_NS
-	select GUP_GET_PTE_LOW_HIGH		if X86_PAE
+	select GUP_GET_PXX_LOW_HIGH		if X86_PAE
 	select HARDIRQS_SW_RESEND
 	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64
 	select HAVE_ACPI_APEI			if ACPI
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 765fd4bf420f8..7dd3df7425439 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -305,7 +305,7 @@ static inline pmd_t pmdp_get(pmd_t *pmdp)
 }
 #endif
 
-#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
+#ifdef CONFIG_GUP_GET_PXX_LOW_HIGH
 /*
  * For walking the pagetables without holding any locks.  Some architectures
  * (eg x86-32 PAE) cannot load the entries atomically without using expensive
@@ -365,7 +365,7 @@ static inline pmd_t pmdp_get_lockless(pmd_t *pmdp)
 }
 #define pmdp_get_lockless pmdp_get_lockless
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
-#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+#endif /* CONFIG_GUP_GET_PXX_LOW_HIGH */
 
 /*
  * We require that the PTE can be read atomically.
diff --git a/mm/Kconfig b/mm/Kconfig
index 57e1d8c5b5052..0eabd0beb3453 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -1044,7 +1044,7 @@ config GUP_TEST
 comment "GUP_TEST needs to have DEBUG_FS enabled"
 	depends on !GUP_TEST && !DEBUG_FS
 
-config GUP_GET_PTE_LOW_HIGH
+config GUP_GET_PXX_LOW_HIGH
 	bool
 
 config ARCH_HAS_PTE_SPECIAL
-- 
GitLab


From dab6e717429e5ec795d558a0e9a5337a1ed33a3d Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:20:28 +0100
Subject: [PATCH 3998/4122] mm: Rename pmd_read_atomic()

There's no point in having the identical routines for PTE/PMD have
different names.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.841277397%40infradead.org
---
 include/linux/pgtable.h    | 9 ++-------
 mm/hmm.c                   | 2 +-
 mm/khugepaged.c            | 2 +-
 mm/mapping_dirty_helpers.c | 2 +-
 mm/mprotect.c              | 2 +-
 mm/userfaultfd.c           | 2 +-
 mm/vmscan.c                | 4 ++--
 7 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 7dd3df7425439..23348528ff369 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1352,11 +1352,6 @@ static inline int pud_trans_unstable(pud_t *pud)
 #endif
 }
 
-static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
-{
-	return pmdp_get_lockless(pmdp);
-}
-
 #ifndef arch_needs_pgtable_deposit
 #define arch_needs_pgtable_deposit() (false)
 #endif
@@ -1383,13 +1378,13 @@ static inline pmd_t pmd_read_atomic(pmd_t *pmdp)
  */
 static inline int pmd_none_or_trans_huge_or_clear_bad(pmd_t *pmd)
 {
-	pmd_t pmdval = pmd_read_atomic(pmd);
+	pmd_t pmdval = pmdp_get_lockless(pmd);
 	/*
 	 * The barrier will stabilize the pmdval in a register or on
 	 * the stack so that it will stop changing under the code.
 	 *
 	 * When CONFIG_TRANSPARENT_HUGEPAGE=y on x86 32bit PAE,
-	 * pmd_read_atomic is allowed to return a not atomic pmdval
+	 * pmdp_get_lockless is allowed to return a not atomic pmdval
 	 * (for example pointing to an hugepage that has never been
 	 * mapped in the pmd). The below checks will only care about
 	 * the low part of the pmd with 32bit PAE x86 anyway, with the
diff --git a/mm/hmm.c b/mm/hmm.c
index 3850fb625dda1..39cf50de76d78 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -361,7 +361,7 @@ again:
 		 * huge or device mapping one and compute corresponding pfn
 		 * values.
 		 */
-		pmd = pmd_read_atomic(pmdp);
+		pmd = pmdp_get_lockless(pmdp);
 		barrier();
 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
 			goto again;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 4734315f79407..52237a777ebd5 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -862,7 +862,7 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm,
 	if (!*pmd)
 		return SCAN_PMD_NULL;
 
-	pmde = pmd_read_atomic(*pmd);
+	pmde = pmdp_get_lockless(*pmd);
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 	/* See comments in pmd_none_or_trans_huge_or_clear_bad() */
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
index 1b0ab8fcfd8bf..175e424b9ab1b 100644
--- a/mm/mapping_dirty_helpers.c
+++ b/mm/mapping_dirty_helpers.c
@@ -126,7 +126,7 @@ static int clean_record_pte(pte_t *pte, unsigned long addr,
 static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
 			      struct mm_walk *walk)
 {
-	pmd_t pmdval = pmd_read_atomic(pmd);
+	pmd_t pmdval = pmdp_get_lockless(pmd);
 
 	if (!pmd_trans_unstable(&pmdval))
 		return 0;
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 668bfaa6ed2ae..f006bafe338f7 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -294,7 +294,7 @@ static unsigned long change_pte_range(struct mmu_gather *tlb,
  */
 static inline int pmd_none_or_clear_bad_unless_trans_huge(pmd_t *pmd)
 {
-	pmd_t pmdval = pmd_read_atomic(pmd);
+	pmd_t pmdval = pmdp_get_lockless(pmd);
 
 	/* See pmd_none_or_trans_huge_or_clear_bad for info on barrier */
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 3d0fef3980b3f..89a1c9ca06f8b 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -630,7 +630,7 @@ retry:
 			break;
 		}
 
-		dst_pmdval = pmd_read_atomic(dst_pmd);
+		dst_pmdval = pmdp_get_lockless(dst_pmd);
 		/*
 		 * If the dst_pmd is mapped as THP don't
 		 * override it and just be strict.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 04d8b88e52164..88ef873b2d83e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4039,9 +4039,9 @@ restart:
 	/* walk_pte_range() may call get_next_vma() */
 	vma = args->vma;
 	for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
-		pmd_t val = pmd_read_atomic(pmd + i);
+		pmd_t val = pmdp_get_lockless(pmd + i);
 
-		/* for pmd_read_atomic() */
+		/* for pmdp_get_lockless() */
 		barrier();
 
 		next = pmd_addr_end(addr, end);
-- 
GitLab


From 1180e732c985ed3c8866d2fd9e02b619848404a0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:21:30 +0100
Subject: [PATCH 3999/4122] mm/gup: Fix the lockless PMD access

On architectures where the PTE/PMD is larger than the native word size
(i386-PAE for example), READ_ONCE() can do the wrong thing. Use
pmdp_get_lockless() just like we use ptep_get_lockless().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.906110403%40infradead.org
---
 kernel/events/core.c | 2 +-
 mm/gup.c             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4ec3717003d55..b0d38424f3f77 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7306,7 +7306,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr)
 		return pud_leaf_size(pud);
 
 	pmdp = pmd_offset_lockless(pudp, pud, addr);
-	pmd = READ_ONCE(*pmdp);
+	pmd = pmdp_get_lockless(pmdp);
 	if (!pmd_present(pmd))
 		return 0;
 
diff --git a/mm/gup.c b/mm/gup.c
index fe195d47de74a..ff8b22327e165 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -2808,7 +2808,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo
 
 	pmdp = pmd_offset_lockless(pudp, pud, addr);
 	do {
-		pmd_t pmd = READ_ONCE(*pmdp);
+		pmd_t pmd = pmdp_get_lockless(pmdp);
 
 		next = pmd_addr_end(addr, end);
 		if (!pmd_present(pmd))
-- 
GitLab


From 7a9b8bdb6af3e19fb8e3dc7a3caf6a9ea1bed8cd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:38:42 +0100
Subject: [PATCH 4000/4122] x86/mm/pae: Don't (ab)use atomic64

PAE implies CX8, write readable code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114424.971450128%40infradead.org
---
 arch/x86/include/asm/pgtable-3level.h | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 94f50b0100a51..0a1b81dc72c04 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -2,8 +2,6 @@
 #ifndef _ASM_X86_PGTABLE_3LEVEL_H
 #define _ASM_X86_PGTABLE_3LEVEL_H
 
-#include <asm/atomic64_32.h>
-
 /*
  * Intel Physical Address Extension (PAE) Mode - three-level page
  * tables on PPro+ CPUs.
@@ -95,11 +93,12 @@ static inline void pud_clear(pud_t *pudp)
 #ifdef CONFIG_SMP
 static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 {
-	pte_t res;
+	pte_t old = *ptep;
 
-	res.pte = (pteval_t)arch_atomic64_xchg((atomic64_t *)ptep, 0);
+	do {
+	} while (!try_cmpxchg64(&ptep->pte, &old.pte, 0ULL));
 
-	return res;
+	return old;
 }
 #else
 #define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-- 
GitLab


From f7bcd4617de67a4700a7bd7dc56808b57f1c8748 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 26 Nov 2020 17:40:12 +0100
Subject: [PATCH 4001/4122] x86/mm/pae: Use WRITE_ONCE()

Disallow write-tearing, that would be really unfortunate.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114425.038102604%40infradead.org
---
 arch/x86/include/asm/pgtable-3level.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 0a1b81dc72c04..d3a24929ddff9 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -27,9 +27,9 @@
  */
 static inline void native_set_pte(pte_t *ptep, pte_t pte)
 {
-	ptep->pte_high = pte.pte_high;
+	WRITE_ONCE(ptep->pte_high, pte.pte_high);
 	smp_wmb();
-	ptep->pte_low = pte.pte_low;
+	WRITE_ONCE(ptep->pte_low, pte.pte_low);
 }
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
@@ -58,16 +58,16 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
 static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr,
 				    pte_t *ptep)
 {
-	ptep->pte_low = 0;
+	WRITE_ONCE(ptep->pte_low, 0);
 	smp_wmb();
-	ptep->pte_high = 0;
+	WRITE_ONCE(ptep->pte_high, 0);
 }
 
 static inline void native_pmd_clear(pmd_t *pmdp)
 {
-	pmdp->pmd_low = 0;
+	WRITE_ONCE(pmdp->pmd_low, 0);
 	smp_wmb();
-	pmdp->pmd_high = 0;
+	WRITE_ONCE(pmdp->pmd_high, 0);
 }
 
 static inline void native_pud_clear(pud_t *pudp)
-- 
GitLab


From b7301f20105a27112f7ca8040cfb0b0505a32fbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 30 Nov 2020 12:21:25 +0100
Subject: [PATCH 4002/4122] x86/mm/pae: Be consistent with pXXp_get_and_clear()

Given that ptep_get_and_clear() uses cmpxchg8b, and that should be by
far the most common case, there's no point in having an optimized
variant for pmd/pud.

Introduce the pxx_xchg64() helper to implement the common logic once.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114425.103392961%40infradead.org
---
 arch/x86/include/asm/pgtable-3level.h | 67 +++++++--------------------
 1 file changed, 17 insertions(+), 50 deletions(-)

diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index d3a24929ddff9..93c82c6ce6e33 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -90,34 +90,33 @@ static inline void pud_clear(pud_t *pudp)
 	 */
 }
 
+
+#define pxx_xchg64(_pxx, _ptr, _val) ({					\
+	_pxx##val_t *_p = (_pxx##val_t *)_ptr;				\
+	_pxx##val_t _o = *_p;						\
+	do { } while (!try_cmpxchg64(_p, &_o, (_val)));			\
+	native_make_##_pxx(_o);						\
+})
+
 #ifdef CONFIG_SMP
 static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 {
-	pte_t old = *ptep;
-
-	do {
-	} while (!try_cmpxchg64(&ptep->pte, &old.pte, 0ULL));
-
-	return old;
+	return pxx_xchg64(pte, ptep, 0ULL);
 }
-#else
-#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
-#endif
 
-#ifdef CONFIG_SMP
 static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp)
 {
-	pmd_t res;
-
-	/* xchg acts as a barrier before setting of the high bits */
-	res.pmd_low = xchg(&pmdp->pmd_low, 0);
-	res.pmd_high = READ_ONCE(pmdp->pmd_high);
-	WRITE_ONCE(pmdp->pmd_high, 0);
+	return pxx_xchg64(pmd, pmdp, 0ULL);
+}
 
-	return res;
+static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
+{
+	return pxx_xchg64(pud, pudp, 0ULL);
 }
 #else
+#define native_ptep_get_and_clear(xp) native_local_ptep_get_and_clear(xp)
 #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp)
+#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
 #endif
 
 #ifndef pmdp_establish
@@ -141,42 +140,10 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
 		return old;
 	}
 
-	do {
-		old = *pmdp;
-	} while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
-
-	return old;
+	return pxx_xchg64(pmd, pmdp, pmd.pmd);
 }
 #endif
 
-#ifdef CONFIG_SMP
-union split_pud {
-	struct {
-		u32 pud_low;
-		u32 pud_high;
-	};
-	pud_t pud;
-};
-
-static inline pud_t native_pudp_get_and_clear(pud_t *pudp)
-{
-	union split_pud res, *orig = (union split_pud *)pudp;
-
-#ifdef CONFIG_PAGE_TABLE_ISOLATION
-	pti_set_user_pgtbl(&pudp->p4d.pgd, __pgd(0));
-#endif
-
-	/* xchg acts as a barrier before setting of the high bits */
-	res.pud_low = xchg(&orig->pud_low, 0);
-	res.pud_high = orig->pud_high;
-	orig->pud_high = 0;
-
-	return res.pud;
-}
-#else
-#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp)
-#endif
-
 /* Encode and de-code a swap entry */
 #define SWP_TYPE_BITS		5
 
-- 
GitLab


From 9ee850acd25dc290d3cad2707e99380e372ad490 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Oct 2022 14:11:38 +0200
Subject: [PATCH 4003/4122] x86_64: Remove pointless set_64bit() usage

The use of set_64bit() in X86_64 only code is pretty pointless, seeing
how it's a direct assignment. Remove all this nonsense.

[nathanchance: unbreak irte]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114425.168036718%40infradead.org
---
 arch/um/include/asm/pgtable-3level.h |  8 --------
 arch/x86/include/asm/cmpxchg_64.h    |  5 -----
 drivers/iommu/intel/irq_remapping.c  | 13 +++++--------
 3 files changed, 5 insertions(+), 21 deletions(-)

diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h
index cb896e6121c8e..8a5032ec231fd 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -58,11 +58,7 @@
 #define pud_populate(mm, pud, pmd) \
 	set_pud(pud, __pud(_PAGE_TABLE + __pa(pmd)))
 
-#ifdef CONFIG_64BIT
-#define set_pud(pudptr, pudval) set_64bit((u64 *) (pudptr), pud_val(pudval))
-#else
 #define set_pud(pudptr, pudval) (*(pudptr) = (pudval))
-#endif
 
 static inline int pgd_newpage(pgd_t pgd)
 {
@@ -71,11 +67,7 @@ static inline int pgd_newpage(pgd_t pgd)
 
 static inline void pgd_mkuptodate(pgd_t pgd) { pgd_val(pgd) &= ~_PAGE_NEWPAGE; }
 
-#ifdef CONFIG_64BIT
-#define set_pmd(pmdptr, pmdval) set_64bit((u64 *) (pmdptr), pmd_val(pmdval))
-#else
 #define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
-#endif
 
 static inline void pud_clear (pud_t *pud)
 {
diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
index 250187ac82484..0d3beb27b7fe4 100644
--- a/arch/x86/include/asm/cmpxchg_64.h
+++ b/arch/x86/include/asm/cmpxchg_64.h
@@ -2,11 +2,6 @@
 #ifndef _ASM_X86_CMPXCHG_64_H
 #define _ASM_X86_CMPXCHG_64_H
 
-static inline void set_64bit(volatile u64 *ptr, u64 val)
-{
-	*ptr = val;
-}
-
 #define arch_cmpxchg64(ptr, o, n)					\
 ({									\
 	BUILD_BUG_ON(sizeof(*(ptr)) != 8);				\
diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c
index 5962bb5027d06..5d176168bb760 100644
--- a/drivers/iommu/intel/irq_remapping.c
+++ b/drivers/iommu/intel/irq_remapping.c
@@ -173,7 +173,6 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
 	index = irq_iommu->irte_index + irq_iommu->sub_handle;
 	irte = &iommu->ir_table->base[index];
 
-#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE)
 	if ((irte->pst == 1) || (irte_modified->pst == 1)) {
 		bool ret;
 
@@ -187,11 +186,9 @@ static int modify_irte(struct irq_2_iommu *irq_iommu,
 		 * same as the old value.
 		 */
 		WARN_ON(!ret);
-	} else
-#endif
-	{
-		set_64bit(&irte->low, irte_modified->low);
-		set_64bit(&irte->high, irte_modified->high);
+	} else {
+		WRITE_ONCE(irte->low, irte_modified->low);
+		WRITE_ONCE(irte->high, irte_modified->high);
 	}
 	__iommu_flush_cache(iommu, irte, sizeof(*irte));
 
@@ -249,8 +246,8 @@ static int clear_entries(struct irq_2_iommu *irq_iommu)
 	end = start + (1 << irq_iommu->irte_mask);
 
 	for (entry = start; entry < end; entry++) {
-		set_64bit(&entry->low, 0);
-		set_64bit(&entry->high, 0);
+		WRITE_ONCE(entry->low, 0);
+		WRITE_ONCE(entry->high, 0);
 	}
 	bitmap_release_region(iommu->ir_table->bitmap, index,
 			      irq_iommu->irte_mask);
-- 
GitLab


From d4a72e7fe61a1ea9ad4accf3532411ca685eaead Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Oct 2022 13:36:24 +0200
Subject: [PATCH 4004/4122] x86/mm/pae: Get rid of set_64bit()

Recognise that set_64bit() is a special case of our previously
introduced pxx_xchg64(), so use that and get rid of set_64bit().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114425.233481884%40infradead.org
---
 arch/x86/include/asm/cmpxchg_32.h     | 28 ---------------------------
 arch/x86/include/asm/pgtable-3level.h | 23 +++++++++++-----------
 2 files changed, 12 insertions(+), 39 deletions(-)

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index 215f5a65790fd..6ba80ce9438dd 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -7,34 +7,6 @@
  *       you need to test for the feature in boot_cpu_data.
  */
 
-/*
- * CMPXCHG8B only writes to the target if we had the previous
- * value in registers, otherwise it acts as a read and gives us the
- * "new previous" value.  That is why there is a loop.  Preloading
- * EDX:EAX is a performance optimization: in the common case it means
- * we need only one locked operation.
- *
- * A SIMD/3DNOW!/MMX/FPU 64-bit store here would require at the very
- * least an FPU save and/or %cr0.ts manipulation.
- *
- * cmpxchg8b must be used with the lock prefix here to allow the
- * instruction to be executed atomically.  We need to have the reader
- * side to see the coherent 64bit value.
- */
-static inline void set_64bit(volatile u64 *ptr, u64 value)
-{
-	u32 low  = value;
-	u32 high = value >> 32;
-	u64 prev = *ptr;
-
-	asm volatile("\n1:\t"
-		     LOCK_PREFIX "cmpxchg8b %0\n\t"
-		     "jnz 1b"
-		     : "=m" (*ptr), "+A" (prev)
-		     : "b" (low), "c" (high)
-		     : "memory");
-}
-
 #ifdef CONFIG_X86_CMPXCHG64
 #define arch_cmpxchg64(ptr, o, n)					\
 	((__typeof__(*(ptr)))__cmpxchg64((ptr), (unsigned long long)(o), \
diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h
index 93c82c6ce6e33..967b135fa2c01 100644
--- a/arch/x86/include/asm/pgtable-3level.h
+++ b/arch/x86/include/asm/pgtable-3level.h
@@ -19,7 +19,15 @@
 	pr_err("%s:%d: bad pgd %p(%016Lx)\n",				\
 	       __FILE__, __LINE__, &(e), pgd_val(e))
 
-/* Rules for using set_pte: the pte being assigned *must* be
+#define pxx_xchg64(_pxx, _ptr, _val) ({					\
+	_pxx##val_t *_p = (_pxx##val_t *)_ptr;				\
+	_pxx##val_t _o = *_p;						\
+	do { } while (!try_cmpxchg64(_p, &_o, (_val)));			\
+	native_make_##_pxx(_o);						\
+})
+
+/*
+ * Rules for using set_pte: the pte being assigned *must* be
  * either not present or in a state where the hardware will
  * not attempt to update the pte.  In places where this is
  * not possible, use pte_get_and_clear to obtain the old pte
@@ -34,12 +42,12 @@ static inline void native_set_pte(pte_t *ptep, pte_t pte)
 
 static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte)
 {
-	set_64bit((unsigned long long *)(ptep), native_pte_val(pte));
+	pxx_xchg64(pte, ptep, native_pte_val(pte));
 }
 
 static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd)
 {
-	set_64bit((unsigned long long *)(pmdp), native_pmd_val(pmd));
+	pxx_xchg64(pmd, pmdp, native_pmd_val(pmd));
 }
 
 static inline void native_set_pud(pud_t *pudp, pud_t pud)
@@ -47,7 +55,7 @@ static inline void native_set_pud(pud_t *pudp, pud_t pud)
 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 	pud.p4d.pgd = pti_set_user_pgtbl(&pudp->p4d.pgd, pud.p4d.pgd);
 #endif
-	set_64bit((unsigned long long *)(pudp), native_pud_val(pud));
+	pxx_xchg64(pud, pudp, native_pud_val(pud));
 }
 
 /*
@@ -91,13 +99,6 @@ static inline void pud_clear(pud_t *pudp)
 }
 
 
-#define pxx_xchg64(_pxx, _ptr, _val) ({					\
-	_pxx##val_t *_p = (_pxx##val_t *)_ptr;				\
-	_pxx##val_t _o = *_p;						\
-	do { } while (!try_cmpxchg64(_p, &_o, (_val)));			\
-	native_make_##_pxx(_o);						\
-})
-
 #ifdef CONFIG_SMP
 static inline pte_t native_ptep_get_and_clear(pte_t *ptep)
 {
-- 
GitLab


From eb780dcae02d5a71e6979aa7b8c708dea8597adf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 21 Oct 2022 13:47:29 +0200
Subject: [PATCH 4005/4122] mm: Remove pointless barrier() after
 pmdp_get_lockless()

pmdp_get_lockless() should itself imply any ordering required.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221022114425.298833095%40infradead.org
---
 mm/hmm.c    | 1 -
 mm/vmscan.c | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/mm/hmm.c b/mm/hmm.c
index 39cf50de76d78..601a99ce3c84d 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -362,7 +362,6 @@ again:
 		 * values.
 		 */
 		pmd = pmdp_get_lockless(pmdp);
-		barrier();
 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
 			goto again;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 88ef873b2d83e..4936a88bb26a7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -4041,9 +4041,6 @@ restart:
 	for (i = pmd_index(start), addr = start; addr != end; i++, addr = next) {
 		pmd_t val = pmdp_get_lockless(pmd + i);
 
-		/* for pmdp_get_lockless() */
-		barrier();
-
 		next = pmd_addr_end(addr, end);
 
 		if (!pmd_present(val) || is_huge_zero_pmd(val)) {
-- 
GitLab


From 2dff2c359e829245bc3d80e42e296876d1f0cf8e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 1 Nov 2022 12:53:18 +0100
Subject: [PATCH 4006/4122] mm: Convert __HAVE_ARCH_P..P_GET to the new style

Since __HAVE_ARCH_* style guards have been depricated in favour of
defining the function name onto itself, convert pxxp_get().

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/Y2EUEBlQXNgaJgoI@hirez.programming.kicks-ass.net
---
 arch/powerpc/include/asm/nohash/32/pgtable.h | 2 +-
 include/linux/pgtable.h                      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0d40b33184ebe..cb1ac02ae8ee0 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -263,7 +263,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p
 }
 
 #ifdef CONFIG_PPC_16K_PAGES
-#define __HAVE_ARCH_PTEP_GET
+#define ptep_get ptep_get
 static inline pte_t ptep_get(pte_t *ptep)
 {
 	pte_basic_t val = READ_ONCE(ptep->pte);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 23348528ff369..70e2a7e06a767 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -291,14 +291,14 @@ static inline void ptep_clear(struct mm_struct *mm, unsigned long addr,
 	ptep_get_and_clear(mm, addr, ptep);
 }
 
-#ifndef __HAVE_ARCH_PTEP_GET
+#ifndef ptep_get
 static inline pte_t ptep_get(pte_t *ptep)
 {
 	return READ_ONCE(*ptep);
 }
 #endif
 
-#ifndef __HAVE_ARCH_PMDP_GET
+#ifndef pmdp_get
 static inline pmd_t pmdp_get(pmd_t *pmdp)
 {
 	return READ_ONCE(*pmdp);
-- 
GitLab


From 82328227db8f0b9b5f77bb5afcd47e59d0e4d08f Mon Sep 17 00:00:00 2001
From: Pasha Tatashin <pasha.tatashin@soleen.com>
Date: Mon, 16 May 2022 18:52:02 +0000
Subject: [PATCH 4007/4122] x86/mm: Remove P*D_PAGE_MASK and P*D_PAGE_SIZE
 macros

Other architectures and the common mm/ use P*D_MASK, and P*D_SIZE.
Remove the duplicated P*D_PAGE_MASK and P*D_PAGE_SIZE which are only
used in x86/*.

Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Link: https://lore.kernel.org/r/20220516185202.604654-1-tatashin@google.com
---
 arch/x86/include/asm/page_types.h  | 12 +++---------
 arch/x86/kernel/amd_gart_64.c      |  2 +-
 arch/x86/kernel/head64.c           |  2 +-
 arch/x86/mm/mem_encrypt_boot.S     |  4 ++--
 arch/x86/mm/mem_encrypt_identity.c | 18 +++++++++---------
 arch/x86/mm/pat/set_memory.c       |  6 +++---
 arch/x86/mm/pti.c                  |  2 +-
 7 files changed, 20 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/page_types.h b/arch/x86/include/asm/page_types.h
index a506a411474d4..86bd4311daf8a 100644
--- a/arch/x86/include/asm/page_types.h
+++ b/arch/x86/include/asm/page_types.h
@@ -11,20 +11,14 @@
 #define PAGE_SIZE		(_AC(1,UL) << PAGE_SHIFT)
 #define PAGE_MASK		(~(PAGE_SIZE-1))
 
-#define PMD_PAGE_SIZE		(_AC(1, UL) << PMD_SHIFT)
-#define PMD_PAGE_MASK		(~(PMD_PAGE_SIZE-1))
-
-#define PUD_PAGE_SIZE		(_AC(1, UL) << PUD_SHIFT)
-#define PUD_PAGE_MASK		(~(PUD_PAGE_SIZE-1))
-
 #define __VIRTUAL_MASK		((1UL << __VIRTUAL_MASK_SHIFT) - 1)
 
-/* Cast *PAGE_MASK to a signed type so that it is sign-extended if
+/* Cast P*D_MASK to a signed type so that it is sign-extended if
    virtual addresses are 32-bits but physical addresses are larger
    (ie, 32-bit PAE). */
 #define PHYSICAL_PAGE_MASK	(((signed long)PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_PAGE_MASK) & __PHYSICAL_MASK)
-#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_PAGE_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PMD_PAGE_MASK	(((signed long)PMD_MASK) & __PHYSICAL_MASK)
+#define PHYSICAL_PUD_PAGE_MASK	(((signed long)PUD_MASK) & __PHYSICAL_MASK)
 
 #define HPAGE_SHIFT		PMD_SHIFT
 #define HPAGE_SIZE		(_AC(1,UL) << HPAGE_SHIFT)
diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c
index 19a0207e529fe..56a917df410d3 100644
--- a/arch/x86/kernel/amd_gart_64.c
+++ b/arch/x86/kernel/amd_gart_64.c
@@ -504,7 +504,7 @@ static __init unsigned long check_iommu_size(unsigned long aper, u64 aper_size)
 	}
 
 	a = aper + iommu_size;
-	iommu_size -= round_up(a, PMD_PAGE_SIZE) - a;
+	iommu_size -= round_up(a, PMD_SIZE) - a;
 
 	if (iommu_size < 64*1024*1024) {
 		pr_warn("PCI-DMA: Warning: Small IOMMU %luMB."
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index 6a3cfaf6b72ad..387e4b12e823a 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -203,7 +203,7 @@ unsigned long __head __startup_64(unsigned long physaddr,
 	load_delta = physaddr - (unsigned long)(_text - __START_KERNEL_map);
 
 	/* Is the address not 2M aligned? */
-	if (load_delta & ~PMD_PAGE_MASK)
+	if (load_delta & ~PMD_MASK)
 		for (;;);
 
 	/* Include the SME encryption mask in the fixup value */
diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S
index 9de3d900bc927..e25288ee33c2d 100644
--- a/arch/x86/mm/mem_encrypt_boot.S
+++ b/arch/x86/mm/mem_encrypt_boot.S
@@ -26,7 +26,7 @@ SYM_FUNC_START(sme_encrypt_execute)
 	 *   RCX - virtual address of the encryption workarea, including:
 	 *     - stack page (PAGE_SIZE)
 	 *     - encryption routine page (PAGE_SIZE)
-	 *     - intermediate copy buffer (PMD_PAGE_SIZE)
+	 *     - intermediate copy buffer (PMD_SIZE)
 	 *    R8 - physical address of the pagetables to use for encryption
 	 */
 
@@ -123,7 +123,7 @@ SYM_FUNC_START(__enc_copy)
 	wbinvd				/* Invalidate any cache entries */
 
 	/* Copy/encrypt up to 2MB at a time */
-	movq	$PMD_PAGE_SIZE, %r12
+	movq	$PMD_SIZE, %r12
 1:
 	cmpq	%r12, %r9
 	jnb	2f
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c
index f415498d3175c..88cccd65029db 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -93,7 +93,7 @@ struct sme_populate_pgd_data {
  * section is 2MB aligned to allow for simple pagetable setup using only
  * PMD entries (see vmlinux.lds.S).
  */
-static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch");
+static char sme_workarea[2 * PMD_SIZE] __section(".init.scratch");
 
 static char sme_cmdline_arg[] __initdata = "mem_encrypt";
 static char sme_cmdline_on[]  __initdata = "on";
@@ -198,8 +198,8 @@ static void __init __sme_map_range_pmd(struct sme_populate_pgd_data *ppd)
 	while (ppd->vaddr < ppd->vaddr_end) {
 		sme_populate_pgd_large(ppd);
 
-		ppd->vaddr += PMD_PAGE_SIZE;
-		ppd->paddr += PMD_PAGE_SIZE;
+		ppd->vaddr += PMD_SIZE;
+		ppd->paddr += PMD_SIZE;
 	}
 }
 
@@ -225,11 +225,11 @@ static void __init __sme_map_range(struct sme_populate_pgd_data *ppd,
 	vaddr_end = ppd->vaddr_end;
 
 	/* If start is not 2MB aligned, create PTE entries */
-	ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_PAGE_SIZE);
+	ppd->vaddr_end = ALIGN(ppd->vaddr, PMD_SIZE);
 	__sme_map_range_pte(ppd);
 
 	/* Create PMD entries */
-	ppd->vaddr_end = vaddr_end & PMD_PAGE_MASK;
+	ppd->vaddr_end = vaddr_end & PMD_MASK;
 	__sme_map_range_pmd(ppd);
 
 	/* If end is not 2MB aligned, create PTE entries */
@@ -325,7 +325,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
 
 	/* Physical addresses gives us the identity mapped virtual addresses */
 	kernel_start = __pa_symbol(_text);
-	kernel_end = ALIGN(__pa_symbol(_end), PMD_PAGE_SIZE);
+	kernel_end = ALIGN(__pa_symbol(_end), PMD_SIZE);
 	kernel_len = kernel_end - kernel_start;
 
 	initrd_start = 0;
@@ -355,12 +355,12 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
 	 *   executable encryption area size:
 	 *     stack page (PAGE_SIZE)
 	 *     encryption routine page (PAGE_SIZE)
-	 *     intermediate copy buffer (PMD_PAGE_SIZE)
+	 *     intermediate copy buffer (PMD_SIZE)
 	 *   pagetable structures for the encryption of the kernel
 	 *   pagetable structures for workarea (in case not currently mapped)
 	 */
 	execute_start = workarea_start;
-	execute_end = execute_start + (PAGE_SIZE * 2) + PMD_PAGE_SIZE;
+	execute_end = execute_start + (PAGE_SIZE * 2) + PMD_SIZE;
 	execute_len = execute_end - execute_start;
 
 	/*
@@ -383,7 +383,7 @@ void __init sme_encrypt_kernel(struct boot_params *bp)
 	 * before it is mapped.
 	 */
 	workarea_len = execute_len + pgtable_area_len;
-	workarea_end = ALIGN(workarea_start + workarea_len, PMD_PAGE_SIZE);
+	workarea_end = ALIGN(workarea_start + workarea_len, PMD_SIZE);
 
 	/*
 	 * Set the address to the start of where newly created pagetable
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index f275605892df9..06eb8910462f1 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -743,11 +743,11 @@ phys_addr_t slow_virt_to_phys(void *__virt_addr)
 	switch (level) {
 	case PG_LEVEL_1G:
 		phys_addr = (phys_addr_t)pud_pfn(*(pud_t *)pte) << PAGE_SHIFT;
-		offset = virt_addr & ~PUD_PAGE_MASK;
+		offset = virt_addr & ~PUD_MASK;
 		break;
 	case PG_LEVEL_2M:
 		phys_addr = (phys_addr_t)pmd_pfn(*(pmd_t *)pte) << PAGE_SHIFT;
-		offset = virt_addr & ~PMD_PAGE_MASK;
+		offset = virt_addr & ~PMD_MASK;
 		break;
 	default:
 		phys_addr = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
@@ -1037,7 +1037,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
 	case PG_LEVEL_1G:
 		ref_prot = pud_pgprot(*(pud_t *)kpte);
 		ref_pfn = pud_pfn(*(pud_t *)kpte);
-		pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT;
+		pfninc = PMD_SIZE >> PAGE_SHIFT;
 		lpaddr = address & PUD_MASK;
 		lpinc = PMD_SIZE;
 		/*
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index ffe3b3a087fea..78414c6d1b5ed 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -592,7 +592,7 @@ static void pti_set_kernel_image_nonglobal(void)
 	 * of the image.
 	 */
 	unsigned long start = PFN_ALIGN(_text);
-	unsigned long end = ALIGN((unsigned long)_end, PMD_PAGE_SIZE);
+	unsigned long end = ALIGN((unsigned long)_end, PMD_SIZE);
 
 	/*
 	 * This clears _PAGE_GLOBAL from the entire kernel image.
-- 
GitLab


From 80d0969aa7832bfeb287cb22563a1ad08fea937d Mon Sep 17 00:00:00 2001
From: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Date: Wed, 9 Nov 2022 19:51:25 +0300
Subject: [PATCH 4008/4122] x86/mm: Fix CR3_ADDR_MASK

The mask must not include bits above physical address mask. These bits
are reserved and can be used for other things. Bits 61 and 62 are used
for Linear Address Masking.

Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Reviewed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Alexander Potapenko <glider@google.com>
Link: https://lore.kernel.org/all/20221109165140.9137-2-kirill.shutemov%40linux.intel.com
---
 arch/x86/include/asm/processor-flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/processor-flags.h b/arch/x86/include/asm/processor-flags.h
index 02c2cbda4a74e..a7f3d9100adb6 100644
--- a/arch/x86/include/asm/processor-flags.h
+++ b/arch/x86/include/asm/processor-flags.h
@@ -35,7 +35,7 @@
  */
 #ifdef CONFIG_X86_64
 /* Mask off the address space ID and SME encryption bits. */
-#define CR3_ADDR_MASK	__sme_clr(0x7FFFFFFFFFFFF000ull)
+#define CR3_ADDR_MASK	__sme_clr(PHYSICAL_PAGE_MASK)
 #define CR3_PCID_MASK	0xFFFull
 #define CR3_NOFLUSH	BIT_ULL(63)
 
-- 
GitLab


From 5ceeee7571b7628f439ae0444ec41d132558f47e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 Nov 2022 13:33:50 +0100
Subject: [PATCH 4009/4122] x86/mm: Add a few comments

It's a shame to hide useful comments in Changelogs, add some to the
code.

Shamelessly stolen from commit:

  c40a56a7818c ("x86/mm/init: Remove freed kernel image areas from alias mapping")

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110125544.460677011%40infradead.org
---
 arch/x86/mm/pat/set_memory.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 06eb8910462f1..50f81ea1fbad3 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -219,6 +219,23 @@ within_inclusive(unsigned long addr, unsigned long start, unsigned long end)
 
 #ifdef CONFIG_X86_64
 
+/*
+ * The kernel image is mapped into two places in the virtual address space
+ * (addresses without KASLR, of course):
+ *
+ * 1. The kernel direct map (0xffff880000000000)
+ * 2. The "high kernel map" (0xffffffff81000000)
+ *
+ * We actually execute out of #2. If we get the address of a kernel symbol, it
+ * points to #2, but almost all physical-to-virtual translations point to #1.
+ *
+ * This is so that we can have both a directmap of all physical memory *and*
+ * take full advantage of the the limited (s32) immediate addressing range (2G)
+ * of x86_64.
+ *
+ * See Documentation/x86/x86_64/mm.rst for more detail.
+ */
+
 static inline unsigned long highmap_start_pfn(void)
 {
 	return __pa_symbol(_text) >> PAGE_SHIFT;
@@ -1626,6 +1643,9 @@ repeat:
 
 static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
 
+/*
+ * Check the directmap and "high kernel map" 'aliases'.
+ */
 static int cpa_process_alias(struct cpa_data *cpa)
 {
 	struct cpa_data alias_cpa;
-- 
GitLab


From ef9ab81af6e1f7b7ff589aa1504434aa5915c1df Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 Nov 2022 13:33:54 +0100
Subject: [PATCH 4010/4122] x86/mm: Untangle
 __change_page_attr_set_clr(.checkalias)

The .checkalias argument to __change_page_attr_set_clr() is overloaded
and serves two different purposes:

 - it inhibits the call to cpa_process_alias() -- as suggested by the
   name; however,

 - it also serves as 'primary' indicator for __change_page_attr()
   ( which in turn also serves as a recursion terminator for
     cpa_process_alias() ).

Untangle these by extending the use of CPA_NO_CHECK_ALIAS to all
callsites that currently use .checkalias=0 for this purpose.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110125544.527267183%40infradead.org
---
 arch/x86/mm/pat/set_memory.c | 30 +++++++++++-------------------
 1 file changed, 11 insertions(+), 19 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 50f81ea1fbad3..4943f6c5d8d22 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1727,7 +1727,7 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 		if (ret)
 			goto out;
 
-		if (checkalias) {
+		if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
 			ret = cpa_process_alias(cpa);
 			if (ret)
 				goto out;
@@ -1801,18 +1801,12 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	cpa.numpages = numpages;
 	cpa.mask_set = mask_set;
 	cpa.mask_clr = mask_clr;
-	cpa.flags = 0;
+	cpa.flags = in_flag;
 	cpa.curpage = 0;
 	cpa.force_split = force_split;
 
-	if (in_flag & (CPA_ARRAY | CPA_PAGES_ARRAY))
-		cpa.flags |= in_flag;
-
 	/* No alias checking for _NX bit modifications */
 	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
-	/* Has caller explicitly disabled alias checking? */
-	if (in_flag & CPA_NO_CHECK_ALIAS)
-		checkalias = 0;
 
 	ret = __change_page_attr_set_clr(&cpa, checkalias);
 
@@ -2067,11 +2061,9 @@ int set_memory_np(unsigned long addr, int numpages)
 
 int set_memory_np_noalias(unsigned long addr, int numpages)
 {
-	int cpa_flags = CPA_NO_CHECK_ALIAS;
-
 	return change_page_attr_set_clr(&addr, numpages, __pgprot(0),
 					__pgprot(_PAGE_PRESENT), 0,
-					cpa_flags, NULL);
+					CPA_NO_CHECK_ALIAS, NULL);
 }
 
 int set_memory_4k(unsigned long addr, int numpages)
@@ -2288,7 +2280,7 @@ static int __set_pages_p(struct page *page, int numpages)
 				.numpages = numpages,
 				.mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW),
 				.mask_clr = __pgprot(0),
-				.flags = 0};
+				.flags = CPA_NO_CHECK_ALIAS };
 
 	/*
 	 * No alias checking needed for setting present flag. otherwise,
@@ -2296,7 +2288,7 @@ static int __set_pages_p(struct page *page, int numpages)
 	 * mappings (this adds to complexity if we want to do this from
 	 * atomic context especially). Let's keep it simple!
 	 */
-	return __change_page_attr_set_clr(&cpa, 0);
+	return __change_page_attr_set_clr(&cpa, 1);
 }
 
 static int __set_pages_np(struct page *page, int numpages)
@@ -2307,7 +2299,7 @@ static int __set_pages_np(struct page *page, int numpages)
 				.numpages = numpages,
 				.mask_set = __pgprot(0),
 				.mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW),
-				.flags = 0};
+				.flags = CPA_NO_CHECK_ALIAS };
 
 	/*
 	 * No alias checking needed for setting not present flag. otherwise,
@@ -2315,7 +2307,7 @@ static int __set_pages_np(struct page *page, int numpages)
 	 * mappings (this adds to complexity if we want to do this from
 	 * atomic context especially). Let's keep it simple!
 	 */
-	return __change_page_attr_set_clr(&cpa, 0);
+	return __change_page_attr_set_clr(&cpa, 1);
 }
 
 int set_direct_map_invalid_noflush(struct page *page)
@@ -2386,7 +2378,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 		.numpages = numpages,
 		.mask_set = __pgprot(0),
 		.mask_clr = __pgprot(~page_flags & (_PAGE_NX|_PAGE_RW)),
-		.flags = 0,
+		.flags = CPA_NO_CHECK_ALIAS,
 	};
 
 	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
@@ -2399,7 +2391,7 @@ int __init kernel_map_pages_in_pgd(pgd_t *pgd, u64 pfn, unsigned long address,
 
 	cpa.mask_set = __pgprot(_PAGE_PRESENT | page_flags);
 
-	retval = __change_page_attr_set_clr(&cpa, 0);
+	retval = __change_page_attr_set_clr(&cpa, 1);
 	__flush_tlb_all();
 
 out:
@@ -2429,12 +2421,12 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address,
 		.numpages	= numpages,
 		.mask_set	= __pgprot(0),
 		.mask_clr	= __pgprot(_PAGE_PRESENT | _PAGE_RW),
-		.flags		= 0,
+		.flags		= CPA_NO_CHECK_ALIAS,
 	};
 
 	WARN_ONCE(num_online_cpus() > 1, "Don't call after initializing SMP");
 
-	retval = __change_page_attr_set_clr(&cpa, 0);
+	retval = __change_page_attr_set_clr(&cpa, 1);
 	__flush_tlb_all();
 
 	return retval;
-- 
GitLab


From d597416683d587e940faa35945fba162329b5a71 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 Nov 2022 13:33:57 +0100
Subject: [PATCH 4011/4122] x86/mm: Inhibit _PAGE_NX changes from
 cpa_process_alias()

There is a cludge in change_page_attr_set_clr() that inhibits
propagating NX changes to the aliases (directmap and highmap) -- this
is a cludge twofold:

 - it also inhibits the primary checks in __change_page_attr();
 - it hard depends on single bit changes.

The introduction of set_memory_rox() triggered this last issue for
clearing both _PAGE_RW and _PAGE_NX.

Explicitly ignore _PAGE_NX in cpa_process_alias() instead.

Fixes: b38994948567 ("x86/mm: Implement native set_memory_rox()")
Reported-by: kernel test robot <oliver.sang@intel.com>
Debugged-by: Dave Hansen <dave.hansen@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110125544.594991716%40infradead.org
---
 arch/x86/mm/pat/set_memory.c | 28 +++++++++++++++++++++++-----
 1 file changed, 23 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 4943f6c5d8d22..beef774171154 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1669,6 +1669,12 @@ static int cpa_process_alias(struct cpa_data *cpa)
 		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 		alias_cpa.curpage = 0;
 
+		/* Directmap always has NX set, do not modify. */
+		if (__supported_pte_mask & _PAGE_NX) {
+			alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+			alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+		}
+
 		cpa->force_flush_all = 1;
 
 		ret = __change_page_attr_set_clr(&alias_cpa, 0);
@@ -1691,6 +1697,15 @@ static int cpa_process_alias(struct cpa_data *cpa)
 		alias_cpa.flags &= ~(CPA_PAGES_ARRAY | CPA_ARRAY);
 		alias_cpa.curpage = 0;
 
+		/*
+		 * [_text, _brk_end) also covers data, do not modify NX except
+		 * in cases where the highmap is the primary target.
+		 */
+		if (__supported_pte_mask & _PAGE_NX) {
+			alias_cpa.mask_clr.pgprot &= ~_PAGE_NX;
+			alias_cpa.mask_set.pgprot &= ~_PAGE_NX;
+		}
+
 		cpa->force_flush_all = 1;
 		/*
 		 * The high mapping range is imprecise, so ignore the
@@ -1709,6 +1724,12 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 	unsigned long rempages = numpages;
 	int ret = 0;
 
+	/*
+	 * No changes, easy!
+	 */
+	if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)))
+		return ret;
+
 	while (rempages) {
 		/*
 		 * Store the remaining nr of pages for the large page
@@ -1755,7 +1776,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 				    struct page **pages)
 {
 	struct cpa_data cpa;
-	int ret, cache, checkalias;
+	int ret, cache;
 
 	memset(&cpa, 0, sizeof(cpa));
 
@@ -1805,10 +1826,7 @@ static int change_page_attr_set_clr(unsigned long *addr, int numpages,
 	cpa.curpage = 0;
 	cpa.force_split = force_split;
 
-	/* No alias checking for _NX bit modifications */
-	checkalias = (pgprot_val(mask_set) | pgprot_val(mask_clr)) != _PAGE_NX;
-
-	ret = __change_page_attr_set_clr(&cpa, checkalias);
+	ret = __change_page_attr_set_clr(&cpa, 1);
 
 	/*
 	 * Check whether we really changed something:
-- 
GitLab


From e996365ee7475805d2a01312532855004e89df84 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 10 Nov 2022 13:34:00 +0100
Subject: [PATCH 4012/4122] x86/mm: Rename
 __change_page_attr_set_clr(.checkalias)

Now that the checkalias functionality is taken by CPA_NO_CHECK_ALIAS
rename the argument to better match is remaining purpose: primary,
matching __change_page_attr().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110125544.661001508%40infradead.org
---
 arch/x86/mm/pat/set_memory.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index beef774171154..220361ceb9977 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1641,7 +1641,7 @@ repeat:
 	return err;
 }
 
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias);
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary);
 
 /*
  * Check the directmap and "high kernel map" 'aliases'.
@@ -1718,7 +1718,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
 	return 0;
 }
 
-static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
+static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
 {
 	unsigned long numpages = cpa->numpages;
 	unsigned long rempages = numpages;
@@ -1742,13 +1742,13 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int checkalias)
 
 		if (!debug_pagealloc_enabled())
 			spin_lock(&cpa_lock);
-		ret = __change_page_attr(cpa, checkalias);
+		ret = __change_page_attr(cpa, primary);
 		if (!debug_pagealloc_enabled())
 			spin_unlock(&cpa_lock);
 		if (ret)
 			goto out;
 
-		if (checkalias && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
+		if (primary && !(cpa->flags & CPA_NO_CHECK_ALIAS)) {
 			ret = cpa_process_alias(cpa);
 			if (ret)
 				goto out;
-- 
GitLab


From 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:35:00 +0000
Subject: [PATCH 4013/4122] x86/mm: Recompute physical address for every page
 of per-CPU CEA mapping

Recompute the physical address for each per-CPU page in the CPU entry
area, a recent commit inadvertantly modified cea_map_percpu_pages() such
that every PTE is mapped to the physical address of the first page.

Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand")
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com
---
 arch/x86/mm/cpu_entry_area.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index dff9001e5e125..d831aae94b419 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -97,7 +97,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
 					early_pfn_to_nid(PFN_DOWN(pa)));
 
 	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
-		cea_set_pte(cea_vaddr, pa, prot);
+		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
 }
 
 static void __init percpu_setup_debug_store(unsigned int cpu)
-- 
GitLab


From 97650148a15e0b30099d6175ffe278b9f55ec66a Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:35:01 +0000
Subject: [PATCH 4014/4122] x86/mm: Populate KASAN shadow for entire per-CPU
 range of CPU entry area

Populate a KASAN shadow for the entire possible per-CPU range of the CPU
entry area instead of requiring that each individual chunk map a shadow.
Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping
was left behind, which can lead to not-present page faults during KASAN
validation if the kernel performs a software lookup into the GDT.  The DS
buffer is also likely affected.

The motivation for mapping the per-CPU areas on-demand was to avoid
mapping the entire 512GiB range that's reserved for the CPU entry area,
shaving a few bytes by not creating shadows for potentially unused memory
was not a goal.

The bug is most easily reproduced by doing a sigreturn with a garbage
CS in the sigcontext, e.g.

  int main(void)
  {
    struct sigcontext regs;

    syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);
    syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul);
    syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul);

    memset(&regs, 0, sizeof(regs));
    regs.cs = 0x1d0;
    syscall(__NR_rt_sigreturn);
    return 0;
  }

to coerce the kernel into doing a GDT lookup to compute CS.base when
reading the instruction bytes on the subsequent #GP to determine whether
or not the #GP is something the kernel should handle, e.g. to fixup UMIP
violations or to emulate CLI/STI for IOPL=3 applications.

  BUG: unable to handle page fault for address: fffffbc8379ace00
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0
  Oops: 0000 [#1] PREEMPT SMP KASAN
  CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432
  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
  RIP: 0010:kasan_check_range+0xdf/0x190
  Call Trace:
   <TASK>
   get_desc+0xb0/0x1d0
   insn_get_seg_base+0x104/0x270
   insn_fetch_from_user+0x66/0x80
   fixup_umip_exception+0xb1/0x530
   exc_general_protection+0x181/0x210
   asm_exc_general_protection+0x22/0x30
  RIP: 0003:0x0
  Code: Unable to access opcode bytes at 0xffffffffffffffd6.
  RSP: 0003:0000000000000000 EFLAGS: 00000202
  RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0
  RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000
  RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
  R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
  R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
   </TASK>

Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand")
Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com
Suggested-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com
---
 arch/x86/mm/cpu_entry_area.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index d831aae94b419..7c855dffcdc26 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -91,11 +91,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags)
 static void __init
 cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
 {
-	phys_addr_t pa = per_cpu_ptr_to_phys(ptr);
-
-	kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE,
-					early_pfn_to_nid(PFN_DOWN(pa)));
-
 	for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE)
 		cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
 }
@@ -195,6 +190,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu)
 	pgprot_t tss_prot = PAGE_KERNEL;
 #endif
 
+	kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE,
+					early_cpu_to_node(cpu));
+
 	cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
 
 	cea_map_percpu_pages(&cea->entry_stack_page,
-- 
GitLab


From 7077d2ccb94dafd00b29cc2d601c9f6891648f5b Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:35:02 +0000
Subject: [PATCH 4015/4122] x86/kasan: Rename local CPU_ENTRY_AREA variables to
 shorten names

Rename the CPU entry area variables in kasan_init() to shorten their
names, a future fix will reference the beginning of the per-CPU portion
of the CPU entry area, and shadow_cpu_entry_per_cpu_begin is a bit much.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Link: https://lkml.kernel.org/r/20221110203504.1985010-4-seanjc@google.com
---
 arch/x86/mm/kasan_init_64.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index d1416926ad526..ad7872ae10ed6 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -331,7 +331,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
 void __init kasan_init(void)
 {
 	int i;
-	void *shadow_cpu_entry_begin, *shadow_cpu_entry_end;
+	void *shadow_cea_begin, *shadow_cea_end;
 
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
 
@@ -372,16 +372,16 @@ void __init kasan_init(void)
 		map_range(&pfn_mapped[i]);
 	}
 
-	shadow_cpu_entry_begin = (void *)CPU_ENTRY_AREA_BASE;
-	shadow_cpu_entry_begin = kasan_mem_to_shadow(shadow_cpu_entry_begin);
-	shadow_cpu_entry_begin = (void *)round_down(
-			(unsigned long)shadow_cpu_entry_begin, PAGE_SIZE);
+	shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE;
+	shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin);
+	shadow_cea_begin = (void *)round_down(
+			(unsigned long)shadow_cea_begin, PAGE_SIZE);
 
-	shadow_cpu_entry_end = (void *)(CPU_ENTRY_AREA_BASE +
+	shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE +
 					CPU_ENTRY_AREA_MAP_SIZE);
-	shadow_cpu_entry_end = kasan_mem_to_shadow(shadow_cpu_entry_end);
-	shadow_cpu_entry_end = (void *)round_up(
-			(unsigned long)shadow_cpu_entry_end, PAGE_SIZE);
+	shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end);
+	shadow_cea_end = (void *)round_up(
+			(unsigned long)shadow_cea_end, PAGE_SIZE);
 
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
@@ -403,9 +403,9 @@ void __init kasan_init(void)
 
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
-		shadow_cpu_entry_begin);
+		shadow_cea_begin);
 
-	kasan_populate_early_shadow(shadow_cpu_entry_end,
+	kasan_populate_early_shadow(shadow_cea_end,
 			kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
 	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
-- 
GitLab


From bde258d97409f2a45243cb393a55ea9ecfc7aba5 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:35:03 +0000
Subject: [PATCH 4016/4122] x86/kasan: Add helpers to align shadow addresses up
 and down

Add helpers to dedup code for aligning shadow address up/down to page
boundaries when translating an address to its shadow.

No functional change intended.

Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Link: https://lkml.kernel.org/r/20221110203504.1985010-5-seanjc@google.com
---
 arch/x86/mm/kasan_init_64.c | 40 ++++++++++++++++++++-----------------
 1 file changed, 22 insertions(+), 18 deletions(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index ad7872ae10ed6..afc5e129ca7be 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -316,22 +316,33 @@ void __init kasan_early_init(void)
 	kasan_map_early_shadow(init_top_pgt);
 }
 
+static unsigned long kasan_mem_to_shadow_align_down(unsigned long va)
+{
+	unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
+
+	return round_down(shadow, PAGE_SIZE);
+}
+
+static unsigned long kasan_mem_to_shadow_align_up(unsigned long va)
+{
+	unsigned long shadow = (unsigned long)kasan_mem_to_shadow((void *)va);
+
+	return round_up(shadow, PAGE_SIZE);
+}
+
 void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
 {
 	unsigned long shadow_start, shadow_end;
 
-	shadow_start = (unsigned long)kasan_mem_to_shadow(va);
-	shadow_start = round_down(shadow_start, PAGE_SIZE);
-	shadow_end = (unsigned long)kasan_mem_to_shadow(va + size);
-	shadow_end = round_up(shadow_end, PAGE_SIZE);
-
+	shadow_start = kasan_mem_to_shadow_align_down((unsigned long)va);
+	shadow_end = kasan_mem_to_shadow_align_up((unsigned long)va + size);
 	kasan_populate_shadow(shadow_start, shadow_end, nid);
 }
 
 void __init kasan_init(void)
 {
+	unsigned long shadow_cea_begin, shadow_cea_end;
 	int i;
-	void *shadow_cea_begin, *shadow_cea_end;
 
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
 
@@ -372,16 +383,9 @@ void __init kasan_init(void)
 		map_range(&pfn_mapped[i]);
 	}
 
-	shadow_cea_begin = (void *)CPU_ENTRY_AREA_BASE;
-	shadow_cea_begin = kasan_mem_to_shadow(shadow_cea_begin);
-	shadow_cea_begin = (void *)round_down(
-			(unsigned long)shadow_cea_begin, PAGE_SIZE);
-
-	shadow_cea_end = (void *)(CPU_ENTRY_AREA_BASE +
-					CPU_ENTRY_AREA_MAP_SIZE);
-	shadow_cea_end = kasan_mem_to_shadow(shadow_cea_end);
-	shadow_cea_end = (void *)round_up(
-			(unsigned long)shadow_cea_end, PAGE_SIZE);
+	shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE);
+	shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE +
+						      CPU_ENTRY_AREA_MAP_SIZE);
 
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM),
@@ -403,9 +407,9 @@ void __init kasan_init(void)
 
 	kasan_populate_early_shadow(
 		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
-		shadow_cea_begin);
+		(void *)shadow_cea_begin);
 
-	kasan_populate_early_shadow(shadow_cea_end,
+	kasan_populate_early_shadow((void *)shadow_cea_end,
 			kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
 	kasan_populate_shadow((unsigned long)kasan_mem_to_shadow(_stext),
-- 
GitLab


From 1cfaac2400c73378e78182a706be0f3ac8b93cd7 Mon Sep 17 00:00:00 2001
From: Sean Christopherson <seanjc@google.com>
Date: Thu, 10 Nov 2022 20:35:04 +0000
Subject: [PATCH 4017/4122] x86/kasan: Populate shadow for shared chunk of the
 CPU entry area

Popuplate the shadow for the shared portion of the CPU entry area, i.e.
the read-only IDT mapping, during KASAN initialization.  A recent change
modified KASAN to map the per-CPU areas on-demand, but forgot to keep a
shadow for the common area that is shared amongst all CPUs.

Map the common area in KASAN init instead of letting idt_map_in_cea() do
the dirty work so that it Just Works in the unlikely event more shared
data is shoved into the CPU entry area.

The bug manifests as a not-present #PF when software attempts to lookup
an IDT entry, e.g. when KVM is handling IRQs on Intel CPUs (KVM performs
direct CALL to the IRQ handler to avoid the overhead of INTn):

 BUG: unable to handle page fault for address: fffffbc0000001d8
 #PF: supervisor read access in kernel mode
 #PF: error_code(0x0000) - not-present page
 PGD 16c03a067 P4D 16c03a067 PUD 0
 Oops: 0000 [#1] PREEMPT SMP KASAN
 CPU: 5 PID: 901 Comm: repro Tainted: G        W          6.1.0-rc3+ #410
 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
 RIP: 0010:kasan_check_range+0xdf/0x190
  vmx_handle_exit_irqoff+0x152/0x290 [kvm_intel]
  vcpu_run+0x1d89/0x2bd0 [kvm]
  kvm_arch_vcpu_ioctl_run+0x3ce/0xa70 [kvm]
  kvm_vcpu_ioctl+0x349/0x900 [kvm]
  __x64_sys_ioctl+0xb8/0xf0
  do_syscall_64+0x2b/0x50
  entry_SYSCALL_64_after_hwframe+0x46/0xb0

Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand")
Reported-by: syzbot+8cdd16fd5a6c0565e227@syzkaller.appspotmail.com
Signed-off-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20221110203504.1985010-6-seanjc@google.com
---
 arch/x86/mm/kasan_init_64.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c
index afc5e129ca7be..0302491d799d1 100644
--- a/arch/x86/mm/kasan_init_64.c
+++ b/arch/x86/mm/kasan_init_64.c
@@ -341,7 +341,7 @@ void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid)
 
 void __init kasan_init(void)
 {
-	unsigned long shadow_cea_begin, shadow_cea_end;
+	unsigned long shadow_cea_begin, shadow_cea_per_cpu_begin, shadow_cea_end;
 	int i;
 
 	memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
@@ -384,6 +384,7 @@ void __init kasan_init(void)
 	}
 
 	shadow_cea_begin = kasan_mem_to_shadow_align_down(CPU_ENTRY_AREA_BASE);
+	shadow_cea_per_cpu_begin = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_PER_CPU);
 	shadow_cea_end = kasan_mem_to_shadow_align_up(CPU_ENTRY_AREA_BASE +
 						      CPU_ENTRY_AREA_MAP_SIZE);
 
@@ -409,6 +410,15 @@ void __init kasan_init(void)
 		kasan_mem_to_shadow((void *)VMALLOC_END + 1),
 		(void *)shadow_cea_begin);
 
+	/*
+	 * Populate the shadow for the shared portion of the CPU entry area.
+	 * Shadows for the per-CPU areas are mapped on-demand, as each CPU's
+	 * area is randomly placed somewhere in the 512GiB range and mapping
+	 * the entire 512GiB range is prohibitively expensive.
+	 */
+	kasan_populate_shadow(shadow_cea_begin,
+			      shadow_cea_per_cpu_begin, 0);
+
 	kasan_populate_early_shadow((void *)shadow_cea_end,
 			kasan_mem_to_shadow((void *)__START_KERNEL_map));
 
-- 
GitLab


From 3e844d842d49cdbe61a4b338bdd512654179488a Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 18 Nov 2022 07:16:16 -0800
Subject: [PATCH 4018/4122] x86/mm: Ensure forced page table splitting

There are a few kernel users like kfence that require 4k pages to work
correctly and do not support large mappings.  They use set_memory_4k()
to break down those large mappings.

That, in turn relies on cpa_data->force_split option to indicate to
set_memory code that it should split page tables regardless of whether
the need to be.

But, a recent change added an optimization which would return early
if a set_memory request came in that did not change permissions.  It
did not consult ->force_split and would mistakenly optimize away the
splitting that set_memory_4k() needs.  This broke kfence.

Skip the same-permission optimization when ->force_split is set.

Fixes: 127960a05548 ("x86/mm: Inhibit _PAGE_NX changes from cpa_process_alias()")
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Tested-by: Marco Elver <elver@google.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/all/CA+G9fYuFxZTxkeS35VTZMXwQvohu73W3xbZ5NtjebsVvH6hCuA@mail.gmail.com/
---
 arch/x86/mm/pat/set_memory.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 220361ceb9977..0db69514fe291 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -1727,7 +1727,8 @@ static int __change_page_attr_set_clr(struct cpa_data *cpa, int primary)
 	/*
 	 * No changes, easy!
 	 */
-	if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)))
+	if (!(pgprot_val(cpa->mask_set) | pgprot_val(cpa->mask_clr)) &&
+	    !cpa->force_split)
 		return ret;
 
 	while (rempages) {
-- 
GitLab


From db0b124f02ba68de6517ac303d431af220ccfe9f Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 9 Dec 2022 12:15:17 +0800
Subject: [PATCH 4019/4122] igc: Enhance Qbv scheduling by using first flag bit

The I225 hardware has a limitation that packets can only be scheduled
in the [0, cycle-time] interval. So, scheduling a packet to the start
of the next cycle doesn't usually work.

To overcome this, we use the Transmit Descriptor first flag to indicates
that a packet should be the first packet (from a queue) in a cycle
according to the section 7.5.2.9.3.4 The First Packet on Each QBV Cycle
in Intel Discrete I225/6 User Manual.

But this only works if there was any packet from that queue during the
current cycle, to avoid this issue, we issue an empty packet if that's
not the case. Also require one more descriptor to be available, to take
into account the empty packet that might be issued.

Test Setup:

Talker: Use l2_tai to generate the launchtime into packet load.

Listener: Use timedump.c to compute the delta between packet arrival
and LaunchTime packet payload.

Test Result:

Before:

1666000610127300000,1666000610127300096,96,621273
1666000610127400000,1666000610127400192,192,621274
1666000610127500000,1666000610127500032,32,621275
1666000610127600000,1666000610127600128,128,621276
1666000610127700000,1666000610127700224,224,621277
1666000610127800000,1666000610127800064,64,621278
1666000610127900000,1666000610127900160,160,621279
1666000610128000000,1666000610128000000,0,621280
1666000610128100000,1666000610128100096,96,621281
1666000610128200000,1666000610128200192,192,621282
1666000610128300000,1666000610128300032,32,621283
1666000610128400000,1666000610128301056,-98944,621284
1666000610128500000,1666000610128302080,-197920,621285
1666000610128600000,1666000610128302848,-297152,621286
1666000610128700000,1666000610128303872,-396128,621287
1666000610128800000,1666000610128304896,-495104,621288
1666000610128900000,1666000610128305664,-594336,621289
1666000610129000000,1666000610128306688,-693312,621290
1666000610129100000,1666000610128307712,-792288,621291
1666000610129200000,1666000610128308480,-891520,621292
1666000610129300000,1666000610128309504,-990496,621293
1666000610129400000,1666000610128310528,-1089472,621294
1666000610129500000,1666000610128311296,-1188704,621295
1666000610129600000,1666000610128312320,-1287680,621296
1666000610129700000,1666000610128313344,-1386656,621297
1666000610129800000,1666000610128314112,-1485888,621298
1666000610129900000,1666000610128315136,-1584864,621299
1666000610130000000,1666000610128316160,-1683840,621300
1666000610130100000,1666000610128316928,-1783072,621301
1666000610130200000,1666000610128317952,-1882048,621302
1666000610130300000,1666000610128318976,-1981024,621303
1666000610130400000,1666000610128319744,-2080256,621304
1666000610130500000,1666000610128320768,-2179232,621305
1666000610130600000,1666000610128321792,-2278208,621306
1666000610130700000,1666000610128322816,-2377184,621307
1666000610130800000,1666000610128323584,-2476416,621308
1666000610130900000,1666000610128324608,-2575392,621309
1666000610131000000,1666000610128325632,-2674368,621310
1666000610131100000,1666000610128326400,-2773600,621311
1666000610131200000,1666000610128327424,-2872576,621312
1666000610131300000,1666000610128328448,-2971552,621313
1666000610131400000,1666000610128329216,-3070784,621314
1666000610131500000,1666000610131500032,32,621315
1666000610131600000,1666000610131600128,128,621316
1666000610131700000,1666000610131700224,224,621317

After:

1666073510646200000,1666073510646200064,64,2676462
1666073510646300000,1666073510646300160,160,2676463
1666073510646400000,1666073510646400256,256,2676464
1666073510646500000,1666073510646500096,96,2676465
1666073510646600000,1666073510646600192,192,2676466
1666073510646700000,1666073510646700032,32,2676467
1666073510646800000,1666073510646800128,128,2676468
1666073510646900000,1666073510646900224,224,2676469
1666073510647000000,1666073510647000064,64,2676470
1666073510647100000,1666073510647100160,160,2676471
1666073510647200000,1666073510647200256,256,2676472
1666073510647300000,1666073510647300096,96,2676473
1666073510647400000,1666073510647400192,192,2676474
1666073510647500000,1666073510647500032,32,2676475
1666073510647600000,1666073510647600128,128,2676476
1666073510647700000,1666073510647700224,224,2676477
1666073510647800000,1666073510647800064,64,2676478
1666073510647900000,1666073510647900160,160,2676479
1666073510648000000,1666073510648000000,0,2676480
1666073510648100000,1666073510648100096,96,2676481
1666073510648200000,1666073510648200192,192,2676482
1666073510648300000,1666073510648300032,32,2676483
1666073510648400000,1666073510648400128,128,2676484
1666073510648500000,1666073510648500224,224,2676485
1666073510648600000,1666073510648600064,64,2676486
1666073510648700000,1666073510648700160,160,2676487
1666073510648800000,1666073510648800000,0,2676488
1666073510648900000,1666073510648900096,96,2676489
1666073510649000000,1666073510649000192,192,2676490
1666073510649100000,1666073510649100032,32,2676491
1666073510649200000,1666073510649200128,128,2676492
1666073510649300000,1666073510649300224,224,2676493
1666073510649400000,1666073510649400064,64,2676494
1666073510649500000,1666073510649500160,160,2676495
1666073510649600000,1666073510649600000,0,2676496
1666073510649700000,1666073510649700096,96,2676497
1666073510649800000,1666073510649800192,192,2676498
1666073510649900000,1666073510649900032,32,2676499
1666073510650000000,1666073510650000128,128,2676500

Fixes: 82faa9b79950 ("igc: Add support for ETF offloading")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Co-developed-by: Aravindhan Gunasekaran <aravindhan.gunasekaran@intel.com>
Signed-off-by: Aravindhan Gunasekaran <aravindhan.gunasekaran@intel.com>
Co-developed-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Signed-off-by: Malli C <mallikarjuna.chilakala@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |   2 +
 drivers/net/ethernet/intel/igc/igc_defines.h |   2 +
 drivers/net/ethernet/intel/igc/igc_main.c    | 176 ++++++++++++++++---
 3 files changed, 151 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 1e7e7071f64d2..66a57636d329a 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -94,6 +94,8 @@ struct igc_ring {
 	u8 queue_index;                 /* logical index of the ring*/
 	u8 reg_idx;                     /* physical index of the ring */
 	bool launchtime_enable;         /* true if LaunchTime is enabled */
+	ktime_t last_tx_cycle;          /* end of the cycle with a launchtime transmission */
+	ktime_t last_ff_cycle;          /* Last cycle with an active first flag */
 
 	u32 start_time;
 	u32 end_time;
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f7311aeb293b9..a7b22639cfcd9 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -321,6 +321,8 @@
 #define IGC_ADVTXD_L4LEN_SHIFT	8  /* Adv ctxt L4LEN shift */
 #define IGC_ADVTXD_MSS_SHIFT	16 /* Adv ctxt MSS shift */
 
+#define IGC_ADVTXD_TSN_CNTX_FIRST	0x00000080
+
 /* Transmit Control */
 #define IGC_TCTL_EN		0x00000002 /* enable Tx */
 #define IGC_TCTL_PSP		0x00000008 /* pad short packets */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 1586e1e435c6f..c67a8e8ea972b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -1000,25 +1000,118 @@ static int igc_write_mc_addr_list(struct net_device *netdev)
 	return netdev_mc_count(netdev);
 }
 
-static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
+static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
+				bool *first_flag, bool *insert_empty)
 {
+	struct igc_adapter *adapter = netdev_priv(ring->netdev);
 	ktime_t cycle_time = adapter->cycle_time;
 	ktime_t base_time = adapter->base_time;
+	ktime_t now = ktime_get_clocktai();
+	ktime_t baset_est, end_of_cycle;
 	u32 launchtime;
+	s64 n;
 
-	/* FIXME: when using ETF together with taprio, we may have a
-	 * case where 'delta' is larger than the cycle_time, this may
-	 * cause problems if we don't read the current value of
-	 * IGC_BASET, as the value writen into the launchtime
-	 * descriptor field may be misinterpreted.
+	n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
+
+	baset_est = ktime_add_ns(base_time, cycle_time * (n));
+	end_of_cycle = ktime_add_ns(baset_est, cycle_time);
+
+	if (ktime_compare(txtime, end_of_cycle) >= 0) {
+		if (baset_est != ring->last_ff_cycle) {
+			*first_flag = true;
+			ring->last_ff_cycle = baset_est;
+
+			if (ktime_compare(txtime, ring->last_tx_cycle) > 0)
+				*insert_empty = true;
+		}
+	}
+
+	/* Introducing a window at end of cycle on which packets
+	 * potentially not honor launchtime. Window of 5us chosen
+	 * considering software update the tail pointer and packets
+	 * are dma'ed to packet buffer.
 	 */
-	div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
+	if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC))
+		netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n",
+			    txtime);
+
+	ring->last_tx_cycle = end_of_cycle;
+
+	launchtime = ktime_sub_ns(txtime, baset_est);
+	if (launchtime > 0)
+		div_s64_rem(launchtime, cycle_time, &launchtime);
+	else
+		launchtime = 0;
 
 	return cpu_to_le32(launchtime);
 }
 
+static int igc_init_empty_frame(struct igc_ring *ring,
+				struct igc_tx_buffer *buffer,
+				struct sk_buff *skb)
+{
+	unsigned int size;
+	dma_addr_t dma;
+
+	size = skb_headlen(skb);
+
+	dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma)) {
+		netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+		return -ENOMEM;
+	}
+
+	buffer->skb = skb;
+	buffer->protocol = 0;
+	buffer->bytecount = skb->len;
+	buffer->gso_segs = 1;
+	buffer->time_stamp = jiffies;
+	dma_unmap_len_set(buffer, len, skb->len);
+	dma_unmap_addr_set(buffer, dma, dma);
+
+	return 0;
+}
+
+static int igc_init_tx_empty_descriptor(struct igc_ring *ring,
+					struct sk_buff *skb,
+					struct igc_tx_buffer *first)
+{
+	union igc_adv_tx_desc *desc;
+	u32 cmd_type, olinfo_status;
+	int err;
+
+	if (!igc_desc_unused(ring))
+		return -EBUSY;
+
+	err = igc_init_empty_frame(ring, first, skb);
+	if (err)
+		return err;
+
+	cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+		   IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+		   first->bytecount;
+	olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+	desc = IGC_TX_DESC(ring, ring->next_to_use);
+	desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+	desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma));
+
+	netdev_tx_sent_queue(txring_txq(ring), skb->len);
+
+	first->next_to_watch = desc;
+
+	ring->next_to_use++;
+	if (ring->next_to_use == ring->count)
+		ring->next_to_use = 0;
+
+	return 0;
+}
+
+#define IGC_EMPTY_FRAME_SIZE 60
+
 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
-			    struct igc_tx_buffer *first,
+			    __le32 launch_time, bool first_flag,
 			    u32 vlan_macip_lens, u32 type_tucmd,
 			    u32 mss_l4len_idx)
 {
@@ -1037,26 +1130,17 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 	if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
 		mss_l4len_idx |= tx_ring->reg_idx << 4;
 
+	if (first_flag)
+		mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST;
+
 	context_desc->vlan_macip_lens	= cpu_to_le32(vlan_macip_lens);
 	context_desc->type_tucmd_mlhl	= cpu_to_le32(type_tucmd);
 	context_desc->mss_l4len_idx	= cpu_to_le32(mss_l4len_idx);
-
-	/* We assume there is always a valid Tx time available. Invalid times
-	 * should have been handled by the upper layers.
-	 */
-	if (tx_ring->launchtime_enable) {
-		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
-		ktime_t txtime = first->skb->tstamp;
-
-		skb_txtime_consumed(first->skb);
-		context_desc->launch_time = igc_tx_launchtime(adapter,
-							      txtime);
-	} else {
-		context_desc->launch_time = 0;
-	}
+	context_desc->launch_time	= launch_time;
 }
 
-static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
+static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first,
+			__le32 launch_time, bool first_flag)
 {
 	struct sk_buff *skb = first->skb;
 	u32 vlan_macip_lens = 0;
@@ -1096,7 +1180,8 @@ no_csum:
 	vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
 
-	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens, type_tucmd, 0);
+	igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+			vlan_macip_lens, type_tucmd, 0);
 }
 
 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
@@ -1320,6 +1405,7 @@ dma_error:
 
 static int igc_tso(struct igc_ring *tx_ring,
 		   struct igc_tx_buffer *first,
+		   __le32 launch_time, bool first_flag,
 		   u8 *hdr_len)
 {
 	u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
@@ -1406,8 +1492,8 @@ static int igc_tso(struct igc_ring *tx_ring,
 	vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
 	vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
 
-	igc_tx_ctxtdesc(tx_ring, first, vlan_macip_lens,
-			type_tucmd, mss_l4len_idx);
+	igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
+			vlan_macip_lens, type_tucmd, mss_l4len_idx);
 
 	return 1;
 }
@@ -1415,11 +1501,14 @@ static int igc_tso(struct igc_ring *tx_ring,
 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 				       struct igc_ring *tx_ring)
 {
+	bool first_flag = false, insert_empty = false;
 	u16 count = TXD_USE_COUNT(skb_headlen(skb));
 	__be16 protocol = vlan_get_protocol(skb);
 	struct igc_tx_buffer *first;
+	__le32 launch_time = 0;
 	u32 tx_flags = 0;
 	unsigned short f;
+	ktime_t txtime;
 	u8 hdr_len = 0;
 	int tso = 0;
 
@@ -1433,11 +1522,40 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 		count += TXD_USE_COUNT(skb_frag_size(
 						&skb_shinfo(skb)->frags[f]));
 
-	if (igc_maybe_stop_tx(tx_ring, count + 3)) {
+	if (igc_maybe_stop_tx(tx_ring, count + 5)) {
 		/* this is a hard error */
 		return NETDEV_TX_BUSY;
 	}
 
+	if (!tx_ring->launchtime_enable)
+		goto done;
+
+	txtime = skb->tstamp;
+	skb->tstamp = ktime_set(0, 0);
+	launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
+
+	if (insert_empty) {
+		struct igc_tx_buffer *empty_info;
+		struct sk_buff *empty;
+		void *data;
+
+		empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+		empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
+		if (!empty)
+			goto done;
+
+		data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
+		memset(data, 0, IGC_EMPTY_FRAME_SIZE);
+
+		igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
+
+		if (igc_init_tx_empty_descriptor(tx_ring,
+						 empty,
+						 empty_info) < 0)
+			dev_kfree_skb_any(empty);
+	}
+
+done:
 	/* record the location of the first descriptor for this packet */
 	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
 	first->type = IGC_TX_BUFFER_TYPE_SKB;
@@ -1474,11 +1592,11 @@ static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
 	first->tx_flags = tx_flags;
 	first->protocol = protocol;
 
-	tso = igc_tso(tx_ring, first, &hdr_len);
+	tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len);
 	if (tso < 0)
 		goto out_drop;
 	else if (!tso)
-		igc_tx_csum(tx_ring, first);
+		igc_tx_csum(tx_ring, first, launch_time, first_flag);
 
 	igc_tx_map(tx_ring, first, hdr_len);
 
-- 
GitLab


From d8f45be01dd9381065a3778a579385249ed011dc Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 9 Dec 2022 12:15:18 +0800
Subject: [PATCH 4020/4122] igc: Use strict cycles for Qbv scheduling

Configuring strict cycle mode in the controller forces more well
behaved transmissions when taprio is offloaded.

When set this strict_cycle and strict_end, transmission is not
enabled if the whole packet cannot be completed before end of
the Qbv cycle.

Fixes: 82faa9b79950 ("igc: Add support for ETF offloading")
Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: Aravindhan Gunasekaran <aravindhan.gunasekaran@intel.com>
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_tsn.c | 11 ++---------
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index f975ed807da1e..684aedd4d088c 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -140,15 +140,8 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 		wr32(IGC_STQT(i), ring->start_time);
 		wr32(IGC_ENDQT(i), ring->end_time);
 
-		if (adapter->base_time) {
-			/* If we have a base_time we are in "taprio"
-			 * mode and we need to be strict about the
-			 * cycles: only transmit a packet if it can be
-			 * completed during that cycle.
-			 */
-			txqctl |= IGC_TXQCTL_STRICT_CYCLE |
-				IGC_TXQCTL_STRICT_END;
-		}
+		txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+			IGC_TXQCTL_STRICT_END;
 
 		if (ring->launchtime_enable)
 			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
-- 
GitLab


From 3b61764fb49a6e147ac90d71dccdddc9d5508ba1 Mon Sep 17 00:00:00 2001
From: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Date: Fri, 9 Dec 2022 12:15:19 +0800
Subject: [PATCH 4021/4122] igc: Add checking for basetime less than zero

Using the tc qdisc command, the user can set basetime to any value.
Checking should be done on the driver's side to prevent registering
basetime values that are less than zero.

Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading")
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index c67a8e8ea972b..6ed12329ae80e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6047,6 +6047,9 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	if (!qopt->enable)
 		return igc_tsn_clear_schedule(adapter);
 
+	if (qopt->base_time < 0)
+		return -ERANGE;
+
 	if (adapter->base_time)
 		return -EALREADY;
 
-- 
GitLab


From e17090eb24944fbbe1f24d9f336d7bad4fbe47e8 Mon Sep 17 00:00:00 2001
From: Tan Tee Min <tee.min.tan@linux.intel.com>
Date: Fri, 9 Dec 2022 12:15:20 +0800
Subject: [PATCH 4022/4122] igc: allow BaseTime 0 enrollment for Qbv

Introduce qbv_enable flag in igc_adapter struct to store the Qbv on/off.
So this allow the BaseTime to enroll with zero value.

Fixes: 61572d5f8f91 ("igc: Simplify TSN flags handling")
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Signed-off-by: Tan Tee Min <tee.min.tan@linux.intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      | 1 +
 drivers/net/ethernet/intel/igc/igc_main.c | 2 ++
 drivers/net/ethernet/intel/igc/igc_tsn.c  | 2 +-
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 66a57636d329a..df3e26c0cf01a 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -184,6 +184,7 @@ struct igc_adapter {
 
 	ktime_t base_time;
 	ktime_t cycle_time;
+	bool qbv_enable;
 
 	/* OS defined structs */
 	struct pci_dev *pdev;
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 6ed12329ae80e..6c98f3402f8c0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6044,6 +6044,8 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	u32 start_time = 0, end_time = 0;
 	size_t n;
 
+	adapter->qbv_enable = qopt->enable;
+
 	if (!qopt->enable)
 		return igc_tsn_clear_schedule(adapter);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 684aedd4d088c..bb10d7b65232a 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -36,7 +36,7 @@ static unsigned int igc_tsn_new_flags(struct igc_adapter *adapter)
 {
 	unsigned int new_flags = adapter->flags & ~IGC_FLAG_TSN_ANY_ENABLED;
 
-	if (adapter->base_time)
+	if (adapter->qbv_enable)
 		new_flags |= IGC_FLAG_TSN_QBV_ENABLED;
 
 	if (is_any_launchtime(adapter))
-- 
GitLab


From 6d05251d537a4d3835959a8cdd8cbbbdcdc0c904 Mon Sep 17 00:00:00 2001
From: Tan Tee Min <tee.min.tan@linux.intel.com>
Date: Fri, 9 Dec 2022 12:15:21 +0800
Subject: [PATCH 4023/4122] igc: recalculate Qbv end_time by considering cycle
 time

Qbv users can specify a cycle time that is not equal to the total GCL
intervals. Hence, recalculation is necessary here to exclude the time
interval that exceeds the cycle time. As those GCL which exceeds the
cycle time will be truncated.

According to IEEE Std. 802.1Q-2018 section 8.6.9.2, once the end of
the list is reached, it will switch to the END_OF_CYCLE state and
leave the gates in the same state until the next cycle is started.

Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading")
Signed-off-by: Tan Tee Min <tee.min.tan@linux.intel.com>
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 6c98f3402f8c0..86e46208d95cf 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6067,6 +6067,21 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 
 		end_time += e->interval;
 
+		/* If any of the conditions below are true, we need to manually
+		 * control the end time of the cycle.
+		 * 1. Qbv users can specify a cycle time that is not equal
+		 * to the total GCL intervals. Hence, recalculation is
+		 * necessary here to exclude the time interval that
+		 * exceeds the cycle time.
+		 * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2,
+		 * once the end of the list is reached, it will switch
+		 * to the END_OF_CYCLE state and leave the gates in the
+		 * same state until the next cycle is started.
+		 */
+		if (end_time > adapter->cycle_time ||
+		    n + 1 == qopt->num_entries)
+			end_time = adapter->cycle_time;
+
 		for (i = 0; i < adapter->num_tx_queues; i++) {
 			struct igc_ring *ring = adapter->tx_ring[i];
 
-- 
GitLab


From 72abeedd83982c1bc6023f631e412db78374d9b4 Mon Sep 17 00:00:00 2001
From: Tan Tee Min <tee.min.tan@linux.intel.com>
Date: Fri, 9 Dec 2022 12:15:22 +0800
Subject: [PATCH 4024/4122] igc: Set Qbv start_time and end_time to end_time if
 not being configured in GCL

The default setting of end_time minus start_time is whole 1 second.
Thus, if it's not being configured in any GCL entry then it will be
staying at original 1 second.

This patch is changing the start_time and end_time to be end_time as
if setting zero will be having weird HW behavior where the gate will
not be fully closed.

Fixes: ec50a9d437f0 ("igc: Add support for taprio offloading")
Signed-off-by: Tan Tee Min <tee.min.tan@linux.intel.com>
Signed-off-by: Muhammad Husaini Zulkifli <muhammad.husaini.zulkifli@intel.com>
Tested-by: Naama Meir <naamax.meir@linux.intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 86e46208d95cf..44b1740dc0983 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -6043,6 +6043,7 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 	bool queue_configured[IGC_MAX_TX_QUEUES] = { };
 	u32 start_time = 0, end_time = 0;
 	size_t n;
+	int i;
 
 	adapter->qbv_enable = qopt->enable;
 
@@ -6063,7 +6064,6 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 
 	for (n = 0; n < qopt->num_entries; n++) {
 		struct tc_taprio_sched_entry *e = &qopt->entries[n];
-		int i;
 
 		end_time += e->interval;
 
@@ -6102,6 +6102,18 @@ static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 		start_time += e->interval;
 	}
 
+	/* Check whether a queue gets configured.
+	 * If not, set the start and end time to be end time.
+	 */
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		if (!queue_configured[i]) {
+			struct igc_ring *ring = adapter->tx_ring[i];
+
+			ring->start_time = end_time;
+			ring->end_time = end_time;
+		}
+	}
+
 	return 0;
 }
 
-- 
GitLab


From bb0a1799bbc498053011729dcac0d9430e7d750a Mon Sep 17 00:00:00 2001
From: Jean Delvare <jdelvare@suse.de>
Date: Thu, 24 Nov 2022 15:43:59 +0100
Subject: [PATCH 4025/4122] rtc: isl12026: drop obsolete dependency on
 COMPILE_TEST

Since commit 0166dc11be91 ("of: make CONFIG_OF user selectable"), it
is possible to test-build any driver which depends on OF on any
architecture by explicitly selecting OF. Therefore depending on
COMPILE_TEST as an alternative is no longer needed.

It is actually better to always build such drivers with OF enabled,
so that the test builds are closer to how each driver will actually be
built on its intended target. Building them without OF may not test
much as the compiler will optimize out potentially large parts of the
code. In the worst case, this could even pop false positive warnings.
Dropping COMPILE_TEST here improves the quality of our testing and
avoids wasting time on non-existent issues.

Signed-off-by: Jean Delvare <jdelvare@suse.de>
Cc: Alessandro Zummo <a.zummo@towertech.it>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Link: https://lore.kernel.org/r/20221124154359.039be06c@endymion.delvare
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig
index b45fd08d51dc2..2bb640d1521d8 100644
--- a/drivers/rtc/Kconfig
+++ b/drivers/rtc/Kconfig
@@ -433,7 +433,7 @@ config RTC_DRV_ISL12022
 
 config RTC_DRV_ISL12026
 	tristate "Intersil ISL12026"
-	depends on OF || COMPILE_TEST
+	depends on OF
 	help
 	  If you say yes here you get support for the
 	  Intersil ISL12026 RTC chip.
-- 
GitLab


From 0feebdeb3acc0375cd817385d637d27a8a6dca97 Mon Sep 17 00:00:00 2001
From: ye xingchen <ye.xingchen@zte.com.cn>
Date: Mon, 5 Dec 2022 11:34:45 +0800
Subject: [PATCH 4026/4122] rtc: ds1307: use sysfs_emit() to instead of
 scnprintf()

Follow the advice of the Documentation/filesystems/sysfs.rst and show()
should only use sysfs_emit() or sysfs_emit_at() when formatting the
value to be returned to user space.

Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/202212051134455911470@zte.com.cn
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-ds1307.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 7c2276cf5514a..def9b7f9d9577 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -1219,8 +1219,7 @@ static ssize_t frequency_test_show(struct device *dev,
 
 	regmap_read(ds1307->regmap, M41TXX_REG_CONTROL, &ctrl_reg);
 
-	return scnprintf(buf, PAGE_SIZE, (ctrl_reg & M41TXX_BIT_FT) ? "on\n" :
-			"off\n");
+	return sysfs_emit(buf, (ctrl_reg & M41TXX_BIT_FT) ? "on\n" : "off\n");
 }
 
 static DEVICE_ATTR_RW(frequency_test);
-- 
GitLab


From 16b26f6027588be4414fb69d665bd07c9cb828e9 Mon Sep 17 00:00:00 2001
From: Wadim Egorov <w.egorov@phytec.de>
Date: Thu, 8 Dec 2022 14:36:05 +0100
Subject: [PATCH 4027/4122] rtc: rv3028: Use IRQ flags obtained from device
 tree if available

Make the interrupt pin of the RV3028 usable with GPIO controllers
without level type IRQs support, such as the TI Davinci GPIO controller.

Therefore, allow the IRQ type to be passed from the device tree if
available.

Based on commit d4785b46345c ("rtc: pcf2127: use IRQ flags obtained from device tree if available")

Signed-off-by: Wadim Egorov <w.egorov@phytec.de>
Link: https://lore.kernel.org/r/20221208133605.4193907-1-w.egorov@phytec.de
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rv3028.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-rv3028.c b/drivers/rtc/rtc-rv3028.c
index dd170e3efd83e..b0099e26e3b05 100644
--- a/drivers/rtc/rtc-rv3028.c
+++ b/drivers/rtc/rtc-rv3028.c
@@ -902,9 +902,20 @@ static int rv3028_probe(struct i2c_client *client)
 		return PTR_ERR(rv3028->rtc);
 
 	if (client->irq > 0) {
+		unsigned long flags;
+
+		/*
+		 * If flags = 0, devm_request_threaded_irq() will use IRQ flags
+		 * obtained from device tree.
+		 */
+		if (dev_fwnode(&client->dev))
+			flags = 0;
+		else
+			flags = IRQF_TRIGGER_LOW;
+
 		ret = devm_request_threaded_irq(&client->dev, client->irq,
 						NULL, rv3028_handle_irq,
-						IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+						flags | IRQF_ONESHOT,
 						"rv3028", rv3028);
 		if (ret) {
 			dev_warn(&client->dev, "unable to request IRQ, alarms disabled\n");
-- 
GitLab


From b9354487fb795c144a387e51a9d4d33b710c74f7 Mon Sep 17 00:00:00 2001
From: shaomin Deng <dengshaomin@cdjrlc.com>
Date: Mon, 8 Aug 2022 11:23:54 -0400
Subject: [PATCH 4028/4122] rtc: remove duplicated words in comments

Signed-off-by: shaomin Deng <dengshaomin@cdjrlc.com>
Link: https://lore.kernel.org/r/20220808152354.3641-1-dengshaomin@cdjrlc.com
Link: https://lore.kernel.org/r/20220808153454.6844-1-dengshaomin@cdjrlc.com
Link: https://lore.kernel.org/r/20220808152822.5012-1-dengshaomin@cdjrlc.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/interface.c   | 2 +-
 drivers/rtc/rtc-m41t80.c  | 2 +-
 drivers/rtc/rtc-rs5c372.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/interface.c b/drivers/rtc/interface.c
index 9edd662c69ace..7c30cb3c764d8 100644
--- a/drivers/rtc/interface.c
+++ b/drivers/rtc/interface.c
@@ -256,7 +256,7 @@ int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm)
 	 *
 	 * This could all instead be done in the lower level driver,
 	 * but since more than one lower level RTC implementation needs it,
-	 * then it's probably best best to do it here instead of there..
+	 * then it's probably best to do it here instead of there..
 	 */
 
 	/* Get the "before" timestamp */
diff --git a/drivers/rtc/rtc-m41t80.c b/drivers/rtc/rtc-m41t80.c
index d3144ffdebb58..494052dbd39ff 100644
--- a/drivers/rtc/rtc-m41t80.c
+++ b/drivers/rtc/rtc-m41t80.c
@@ -692,7 +692,7 @@ static void wdt_disable(void)
  *	@ppos: pointer to the position to write. No seeks allowed
  *
  *	A write to a watchdog device is defined as a keepalive signal. Any
- *	write of data will do, as we we don't define content meaning.
+ *	write of data will do, as we don't define content meaning.
  */
 static ssize_t wdt_write(struct file *file, const char __user *buf,
 			 size_t count, loff_t *ppos)
diff --git a/drivers/rtc/rtc-rs5c372.c b/drivers/rtc/rtc-rs5c372.c
index 5047afefcceb7..b4c5d016eca32 100644
--- a/drivers/rtc/rtc-rs5c372.c
+++ b/drivers/rtc/rtc-rs5c372.c
@@ -150,7 +150,7 @@ static int rs5c_get_regs(struct rs5c372 *rs5c)
 	 * least 80219 chips; this works around that bug.
 	 *
 	 * The third method on the other hand doesn't work for the SMBus-only
-	 * configurations, so we use the the first method there, stripping off
+	 * configurations, so we use the first method there, stripping off
 	 * the extra register in the process.
 	 */
 	if (rs5c->smbus) {
-- 
GitLab


From 2dc5e3fb6e974bc299cdd43cb9e253c8958d0d11 Mon Sep 17 00:00:00 2001
From: Xiang wangx <wangxiang@cdjrlc.com>
Date: Sun, 5 Jun 2022 16:35:15 +0800
Subject: [PATCH 4029/4122] rtc: at91rm9200: Fix syntax errors in comments

Delete the redundant word 'is'.

Signed-off-by: Xiang wangx <wangxiang@cdjrlc.com>
Link: https://lore.kernel.org/r/20220605083515.9514-1-wangxiang@cdjrlc.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-at91rm9200.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index fe396d27ebb71..e9d17232d0a81 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -130,7 +130,7 @@ static void at91_rtc_write_idr(u32 mask)
 	 *
 	 * Note that there is still a possibility that the mask is updated
 	 * before interrupts have actually been disabled in hardware. The only
-	 * way to be certain would be to poll the IMR-register, which is is
+	 * way to be certain would be to poll the IMR-register, which is
 	 * the very register we are trying to emulate. The register read back
 	 * is a reasonable heuristic.
 	 */
-- 
GitLab


From 3bb23f1f9d5a66e23f643b2318a64d88f2585c8e Mon Sep 17 00:00:00 2001
From: Zhang Jiaming <jiaming@nfschina.com>
Date: Wed, 22 Jun 2022 16:53:44 +0800
Subject: [PATCH 4030/4122] rtc: rs5c313: correct some spelling mistakes

Change 'modifed' to 'modified'.
Change 'Updata' to 'Update'.
Change 'Initiatlize' to 'Initialize'.

Signed-off-by: Zhang Jiaming <jiaming@nfschina.com>
Link: https://lore.kernel.org/r/20220622085344.23519-1-jiaming@nfschina.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-rs5c313.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index e98f85f34206f..712a08e9e52db 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -2,7 +2,7 @@
  * Ricoh RS5C313 RTC device/driver
  *  Copyright (C) 2007 Nobuhiro Iwamatsu
  *
- *  2005-09-19 modifed by kogiidena
+ *  2005-09-19 modified by kogiidena
  *
  * Based on the old drivers/char/rs5c313_rtc.c  by:
  *  Copyright (C) 2000 Philipp Rumpf <prumpf@tux.org>
@@ -36,7 +36,7 @@
  *      1.11a   Daniele Bellucci: Audit create_proc_read_entry in rtc_init
  *	1.12	Venkatesh Pallipadi: Hooks for emulating rtc on HPET base-timer
  *		CONFIG_HPET_EMULATE_RTC
- *	1.13	Nobuhiro Iwamatsu: Updata driver.
+ *	1.13	Nobuhiro Iwamatsu: Update driver.
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -280,7 +280,7 @@ static int rs5c313_rtc_set_time(struct device *dev, struct rtc_time *tm)
 	while (1) {
 		RS5C313_CEENABLE;	/* CE:H */
 
-		/* Initiatlize control reg. 24 hour */
+		/* Initialize control reg. 24 hour */
 		rs5c313_write_cntreg(0x04);
 
 		if (!(rs5c313_read_cntreg() & RS5C313_CNTREG_ADJ_BSY))
-- 
GitLab


From 55d5a86618d3b1a768bce01882b74cbbd2651975 Mon Sep 17 00:00:00 2001
From: GUO Zihua <guozihua@huawei.com>
Date: Tue, 22 Nov 2022 16:50:46 +0800
Subject: [PATCH 4031/4122] rtc: mxc_v2: Add missing clk_disable_unprepare()

The call to clk_disable_unprepare() is left out in the error handling of
devm_rtc_allocate_device. Add it back.

Fixes: 5490a1e018a4 ("rtc: mxc_v2: fix possible race condition")
Signed-off-by: GUO Zihua <guozihua@huawei.com>
Link: https://lore.kernel.org/r/20221122085046.21689-1-guozihua@huawei.com
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-mxc_v2.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/rtc/rtc-mxc_v2.c b/drivers/rtc/rtc-mxc_v2.c
index 5e03834016294..f6d2ad91ff7a9 100644
--- a/drivers/rtc/rtc-mxc_v2.c
+++ b/drivers/rtc/rtc-mxc_v2.c
@@ -336,8 +336,10 @@ static int mxc_rtc_probe(struct platform_device *pdev)
 	}
 
 	pdata->rtc = devm_rtc_allocate_device(&pdev->dev);
-	if (IS_ERR(pdata->rtc))
+	if (IS_ERR(pdata->rtc)) {
+		clk_disable_unprepare(pdata->clk);
 		return PTR_ERR(pdata->rtc);
+	}
 
 	pdata->rtc->ops = &mxc_rtc_ops;
 	pdata->rtc->range_max = U32_MAX;
-- 
GitLab


From e88f319a2546fd7772c726bf3a82a23b0859ddeb Mon Sep 17 00:00:00 2001
From: Minghao Chi <chi.minghao@zte.com.cn>
Date: Tue, 22 Nov 2022 09:47:19 +0800
Subject: [PATCH 4032/4122] rtc: ds1742: use
 devm_platform_get_and_ioremap_resource()

Convert platform_get_resource(), devm_ioremap_resource() to a single
call to devm_platform_get_and_ioremap_resource(), as this is exactly
what this function does.

Signed-off-by: Minghao Chi <chi.minghao@zte.com.cn>
Signed-off-by: ye xingchen <ye.xingchen@zte.com.cn>
Link: https://lore.kernel.org/r/202211220947194856561@zte.com.cn
Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
---
 drivers/rtc/rtc-ds1742.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 13d45c697da68..a5026b0514e78 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -158,8 +158,7 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
 	if (!pdata)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ioaddr = devm_ioremap_resource(&pdev->dev, res);
+	ioaddr = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(ioaddr))
 		return PTR_ERR(ioaddr);
 
-- 
GitLab


From 61b963b52f59524e27692bc1c14bfb2459e32eb3 Mon Sep 17 00:00:00 2001
From: David Hildenbrand <david@redhat.com>
Date: Mon, 12 Dec 2022 19:20:18 +0100
Subject: [PATCH 4033/4122] mm/gup_test: free memory allocated via kvcalloc()
 using kvfree()

We have to free via kvfree(), not via kfree().

Link: https://lkml.kernel.org/r/20221212182018.264900-1-david@redhat.com
Fixes: c77369b437f9 ("mm/gup_test: start/stop/read functionality for PIN LONGTERM test")
Signed-off-by: David Hildenbrand <david@redhat.com>
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/gup_test.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/gup_test.c b/mm/gup_test.c
index 33f431e0da60b..8ae7307a1bb6e 100644
--- a/mm/gup_test.c
+++ b/mm/gup_test.c
@@ -214,7 +214,7 @@ static inline void pin_longterm_test_stop(void)
 		if (pin_longterm_test_nr_pages)
 			unpin_user_pages(pin_longterm_test_pages,
 					 pin_longterm_test_nr_pages);
-		kfree(pin_longterm_test_pages);
+		kvfree(pin_longterm_test_pages);
 		pin_longterm_test_pages = NULL;
 		pin_longterm_test_nr_pages = 0;
 	}
@@ -255,7 +255,7 @@ static inline int pin_longterm_test_start(unsigned long arg)
 	fast = !!(args.flags & PIN_LONGTERM_TEST_FLAG_USE_FAST);
 
 	if (!fast && mmap_read_lock_killable(current->mm)) {
-		kfree(pages);
+		kvfree(pages);
 		return -EINTR;
 	}
 
-- 
GitLab


From d98c86b9f7a4e1f5a7ead8ba5743952267f9e320 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Tue, 25 Oct 2022 17:38:14 +0000
Subject: [PATCH 4034/4122] maple_tree: fix mas_find_rev() comment

mas_find_rev() uses mas_prev_entry(), not mas_next_entry(), correct comment.

Link: https://lkml.kernel.org/r/20221025173756.2719616-1-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/maple_tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 3fe1491d2bf98..fe3947b800690 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -6062,7 +6062,7 @@ void *mas_find_rev(struct ma_state *mas, unsigned long min)
 	if (mas->index < min)
 		return NULL;
 
-	/* Retries on dead nodes handled by mas_next_entry */
+	/* Retries on dead nodes handled by mas_prev_entry */
 	return mas_prev_entry(mas, min);
 }
 EXPORT_SYMBOL_GPL(mas_find_rev);
-- 
GitLab


From 9102b78b6f6ae6af3557114c265c266b312c1319 Mon Sep 17 00:00:00 2001
From: Liam Howlett <liam.howlett@oracle.com>
Date: Tue, 25 Oct 2022 17:37:23 +0000
Subject: [PATCH 4035/4122] maple_tree: update copyright dates for test code

Add the span to the year of the development.

Link: https://lkml.kernel.org/r/20221025173709.2718725-1-Liam.Howlett@oracle.com
Signed-off-by: Liam R. Howlett <Liam.Howlett@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 tools/testing/radix-tree/maple.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index 2e91973fbaa66..81fa7ec2e66a1 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0+
 /*
- * maple_tree.c: Userspace shim for maple tree test-suite
- * Copyright (c) 2018 Liam R. Howlett <Liam.Howlett@Oracle.com>
+ * maple_tree.c: Userspace testing for maple tree test-suite
+ * Copyright (c) 2018-2022 Oracle Corporation
+ * Author: Liam R. Howlett <Liam.Howlett@Oracle.com>
  *
  * Any tests that require internal knowledge of the tree or threads and other
  * difficult to handle in kernel tests.
-- 
GitLab


From 56a61617dd2276cbc56a6c868599716386d70041 Mon Sep 17 00:00:00 2001
From: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Date: Thu, 27 Oct 2022 17:50:24 +0800
Subject: [PATCH 4036/4122] mm: use stack_depot for recording kmemleak's
 backtrace

Using stack_depot to record kmemleak's backtrace which has been
implemented on slub for reducing redundant information.

[akpm@linux-foundation.org: fix build - remove now-unused __save_stack_trace()]
[zhaoyang.huang@unisoc.com: v3]
  Link: https://lkml.kernel.org/r/1667101354-4669-1-git-send-email-zhaoyang.huang@unisoc.com
[akpm@linux-foundation.org: fix v3 layout oddities]
[akpm@linux-foundation.org: coding-style cleanups]
Link: https://lkml.kernel.org/r/1666864224-27541-1-git-send-email-zhaoyang.huang@unisoc.com
Signed-off-by: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: ke.wang <ke.wang@unisoc.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Zhaoyang Huang <huangzhaoyang@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.debug |  1 +
 mm/kmemleak.c     | 46 ++++++++++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a93c45ebc3205..4a23b37d15ffc 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -728,6 +728,7 @@ config DEBUG_KMEMLEAK
 	select STACKTRACE if STACKTRACE_SUPPORT
 	select KALLSYMS
 	select CRC32
+	select STACKDEPOT
 	help
 	  Say Y here if you want to enable the memory leak
 	  detector. The memory allocation/freeing is traced in a way
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 2673329043545..dc7db6a2a0ddd 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -79,6 +79,7 @@
 #include <linux/mutex.h>
 #include <linux/rcupdate.h>
 #include <linux/stacktrace.h>
+#include <linux/stackdepot.h>
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <linux/memblock.h>
@@ -159,8 +160,7 @@ struct kmemleak_object {
 	u32 checksum;
 	/* memory ranges to be scanned inside an object (empty for all) */
 	struct hlist_head area_list;
-	unsigned long trace[MAX_TRACE];
-	unsigned int trace_len;
+	depot_stack_handle_t trace_handle;
 	unsigned long jiffies;		/* creation timestamp */
 	pid_t pid;			/* pid of the current task */
 	char comm[TASK_COMM_LEN];	/* executable name */
@@ -346,18 +346,21 @@ static void print_unreferenced(struct seq_file *seq,
 			       struct kmemleak_object *object)
 {
 	int i;
+	unsigned long *entries;
+	unsigned int nr_entries;
 	unsigned int msecs_age = jiffies_to_msecs(jiffies - object->jiffies);
 
+	nr_entries = stack_depot_fetch(object->trace_handle, &entries);
 	warn_or_seq_printf(seq, "unreferenced object 0x%08lx (size %zu):\n",
-		   object->pointer, object->size);
+			  object->pointer, object->size);
 	warn_or_seq_printf(seq, "  comm \"%s\", pid %d, jiffies %lu (age %d.%03ds)\n",
-		   object->comm, object->pid, object->jiffies,
-		   msecs_age / 1000, msecs_age % 1000);
+			   object->comm, object->pid, object->jiffies,
+			   msecs_age / 1000, msecs_age % 1000);
 	hex_dump_object(seq, object);
 	warn_or_seq_printf(seq, "  backtrace:\n");
 
-	for (i = 0; i < object->trace_len; i++) {
-		void *ptr = (void *)object->trace[i];
+	for (i = 0; i < nr_entries; i++) {
+		void *ptr = (void *)entries[i];
 		warn_or_seq_printf(seq, "    [<%p>] %pS\n", ptr, ptr);
 	}
 }
@@ -370,15 +373,16 @@ static void print_unreferenced(struct seq_file *seq,
 static void dump_object_info(struct kmemleak_object *object)
 {
 	pr_notice("Object 0x%08lx (size %zu):\n",
-		  object->pointer, object->size);
+			object->pointer, object->size);
 	pr_notice("  comm \"%s\", pid %d, jiffies %lu\n",
-		  object->comm, object->pid, object->jiffies);
+			object->comm, object->pid, object->jiffies);
 	pr_notice("  min_count = %d\n", object->min_count);
 	pr_notice("  count = %d\n", object->count);
 	pr_notice("  flags = 0x%x\n", object->flags);
 	pr_notice("  checksum = %u\n", object->checksum);
 	pr_notice("  backtrace:\n");
-	stack_trace_print(object->trace, object->trace_len, 4);
+	if (object->trace_handle)
+		stack_depot_print(object->trace_handle);
 }
 
 /*
@@ -591,12 +595,18 @@ static struct kmemleak_object *find_and_remove_object(unsigned long ptr, int ali
 	return object;
 }
 
-/*
- * Save stack trace to the given array of MAX_TRACE size.
- */
-static int __save_stack_trace(unsigned long *trace)
+static noinline depot_stack_handle_t set_track_prepare(void)
 {
-	return stack_trace_save(trace, MAX_TRACE, 2);
+	depot_stack_handle_t trace_handle;
+	unsigned long entries[MAX_TRACE];
+	unsigned int nr_entries;
+
+	if (!kmemleak_initialized)
+		return 0;
+	nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
+	trace_handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+	return trace_handle;
 }
 
 /*
@@ -653,7 +663,7 @@ static void __create_object(unsigned long ptr, size_t size,
 	}
 
 	/* kernel backtrace */
-	object->trace_len = __save_stack_trace(object->trace);
+	object->trace_handle = set_track_prepare();
 
 	raw_spin_lock_irqsave(&kmemleak_lock, flags);
 
@@ -692,7 +702,6 @@ static void __create_object(unsigned long ptr, size_t size,
 	rb_link_node(&object->rb_node, rb_parent, link);
 	rb_insert_color(&object->rb_node, is_phys ? &object_phys_tree_root :
 					  &object_tree_root);
-
 	list_add_tail_rcu(&object->object_list, &object_list);
 out:
 	raw_spin_unlock_irqrestore(&kmemleak_lock, flags);
@@ -1091,7 +1100,7 @@ void __ref kmemleak_update_trace(const void *ptr)
 	}
 
 	raw_spin_lock_irqsave(&object->lock, flags);
-	object->trace_len = __save_stack_trace(object->trace);
+	object->trace_handle = set_track_prepare();
 	raw_spin_unlock_irqrestore(&object->lock, flags);
 
 	put_object(object);
@@ -2084,6 +2093,7 @@ void __init kmemleak_init(void)
 	if (kmemleak_error)
 		return;
 
+	stack_depot_init();
 	jiffies_min_age = msecs_to_jiffies(MSECS_MIN_AGE);
 	jiffies_scan_wait = msecs_to_jiffies(SECS_SCAN_WAIT * 1000);
 
-- 
GitLab


From 3a6f33d86baa8103c80f62edd9393e9f7bf25d72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20L=C3=A9ger?= <clement.leger@bootlin.com>
Date: Tue, 8 Nov 2022 10:43:22 +0100
Subject: [PATCH 4037/4122] mm/kmemleak: use %pK to display kernel pointers in
 backtrace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, %p is used to display kernel pointers in backtrace which result
in a hashed value that is not usable to correlate the address for debug.
Use %pK which will respect the kptr_restrict configuration value and thus
allow to extract meaningful information from the backtrace.

Link: https://lkml.kernel.org/r/20221108094322.73492-1-clement.leger@bootlin.com
Signed-off-by: Clément Léger <clement.leger@bootlin.com>
Cc: Alexandre Belloni <alexandre.belloni@bootlin.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 mm/kmemleak.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index dc7db6a2a0ddd..92f670edbf518 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -361,7 +361,7 @@ static void print_unreferenced(struct seq_file *seq,
 
 	for (i = 0; i < nr_entries; i++) {
 		void *ptr = (void *)entries[i];
-		warn_or_seq_printf(seq, "    [<%p>] %pS\n", ptr, ptr);
+		warn_or_seq_printf(seq, "    [<%pK>] %pS\n", ptr, ptr);
 	}
 }
 
-- 
GitLab


From 8b777594d2341a82f00b57c020f8af05bded1178 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Tue, 13 Dec 2022 09:07:31 -0800
Subject: [PATCH 4038/4122] MAINTAINERS: zram: zsmalloc: Add an additional
 co-maintainer

Move Sergey to co-maintainer for zram/zsmalloc since he has helped to
contribute/review those areas actively for eight years, which is quite
helpful.  Since Nitin has been inactive for several years, it's time to
move his name into CREDITS.

Link: https://lkml.kernel.org/r/20221213170731.796121-1-minchan@kernel.org
Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 CREDITS     | 4 ++++
 MAINTAINERS | 6 ++----
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/CREDITS b/CREDITS
index 198f675c419e3..4e302a459ddf0 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1439,6 +1439,10 @@ N: Justin Guyett
 E: jguyett@andrew.cmu.edu
 D: via-rhine net driver hacking
 
+N: Nitin Gupta
+E: ngupta@vflare.org
+D: zsmalloc memory allocator and zram block device driver
+
 N: Danny ter Haar
 E: dth@cistron.nl
 D: /proc/cpuinfo, reboot on panic , kernel pre-patch tester ;)
diff --git a/MAINTAINERS b/MAINTAINERS
index 096ae475e21cd..5c63d48847ab0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -23002,8 +23002,7 @@ F:	drivers/media/pci/zoran/
 
 ZRAM COMPRESSED RAM BLOCK DEVICE DRVIER
 M:	Minchan Kim <minchan@kernel.org>
-M:	Nitin Gupta <ngupta@vflare.org>
-R:	Sergey Senozhatsky <senozhatsky@chromium.org>
+M:	Sergey Senozhatsky <senozhatsky@chromium.org>
 L:	linux-kernel@vger.kernel.org
 S:	Maintained
 F:	Documentation/admin-guide/blockdev/zram.rst
@@ -23016,8 +23015,7 @@ F:	drivers/tty/serial/zs.*
 
 ZSMALLOC COMPRESSED SLAB MEMORY ALLOCATOR
 M:	Minchan Kim <minchan@kernel.org>
-M:	Nitin Gupta <ngupta@vflare.org>
-R:	Sergey Senozhatsky <senozhatsky@chromium.org>
+M:	Sergey Senozhatsky <senozhatsky@chromium.org>
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	Documentation/mm/zsmalloc.rst
-- 
GitLab


From a7ebbbb159c181c696770feeb89bf0334aaff6d8 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 17 Aug 2022 08:03:29 +0000
Subject: [PATCH 4039/4122] fault-injection: allow stacktrace filter for x86-64

This patchset allow fault injection to run on x86_64 and makes stacktrace
filter work as expected.  With this, we can test a device driver module
with fault injection more easily.


This patch (of 4):

FAULT_INJECTION_STACKTRACE_FILTER option was apparently disallowed on
x86_64 because of problems with the stack unwinder:

    commit 6d690dcac92a84f98fd774862628ff871b713660
    Author: Akinobu Mita <akinobu.mita@gmail.com>
    Date:   Sat May 12 10:36:53 2007 -0700

        fault injection: disable stacktrace filter for x86-64

However, there is no problems whatsoever with this today. Let's allow
it again.

Link: https://lkml.kernel.org/r/20220817080332.1052710-1-weiyongjun1@huawei.com
Link: https://lkml.kernel.org/r/20220817080332.1052710-2-weiyongjun1@huawei.com
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Isabella Basso <isabbasso@riseup.net>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/Kconfig.debug | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index a93c45ebc3205..5d19278f53c69 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1982,7 +1982,6 @@ config FAIL_SUNRPC
 config FAULT_INJECTION_STACKTRACE_FILTER
 	bool "stacktrace filter for fault-injection capabilities"
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
-	depends on !X86_64
 	select STACKTRACE
 	depends on FRAME_POINTER || MIPS || PPC || S390 || MICROBLAZE || ARM || ARC || X86
 	help
-- 
GitLab


From 4acb9e5139f20c79eb08a95dc5a28186ae7a5088 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 17 Aug 2022 08:03:30 +0000
Subject: [PATCH 4040/4122] fault-injection: skip stacktrace filtering by
 default

If FAULT_INJECTION_STACKTRACE_FILTER is enabled, the depth is default to
32.  This means fail_stacktrace() will iter each entry's stacktrace, even
if filter is not configured.

This patch changes to quick return from fail_stacktrace() if stacktrace
filter is not set.

Link: https://lkml.kernel.org/r/20220817080332.1052710-3-weiyongjun1@huawei.com
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Isabella Basso <isabbasso@riseup.net>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/fault-inject.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index 1421818c9ef75..fecc4d8ca32ae 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -71,7 +71,7 @@ static bool fail_stacktrace(struct fault_attr *attr)
 	int n, nr_entries;
 	bool found = (attr->require_start == 0 && attr->require_end == ULONG_MAX);
 
-	if (depth == 0)
+	if (depth == 0 || (found && !attr->reject_start && !attr->reject_end))
 		return found;
 
 	nr_entries = stack_trace_save(entries, depth, 1);
-- 
GitLab


From 0199907474d402809319ada802b50643625914f9 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 17 Aug 2022 08:03:31 +0000
Subject: [PATCH 4041/4122] fault-injection: make some stack filter attrs more
 readable

Attributes of stack filter are show as unsigned decimal, such as
'require-start', 'require-end'.  This patch change to show them as
unsigned hexadecimal for more readable.

Before:
  $ echo 0xffffffffc0257000 > /sys/kernel/debug/failslab/require-start
  $ cat /sys/kernel/debug/failslab/require-start
  18446744072638263296

After:
  $ echo 0xffffffffc0257000 > /sys/kernel/debug/failslab/require-start
  $ cat /sys/kernel/debug/failslab/require-start
  0xffffffffc0257000

[wangyufen@huawei.com: use debugfs_create_xul() instead of debugfs_create_xl()]
  Link: https://lkml.kernel.org/r/1664331299-4976-1-git-send-email-wangyufen@huawei.com
Link: https://lkml.kernel.org/r/20220817080332.1052710-4-weiyongjun1@huawei.com
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Isabella Basso <isabbasso@riseup.net>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/fault-inject.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index fecc4d8ca32ae..d7819cacf8901 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -226,10 +226,10 @@ struct dentry *fault_create_debugfs_attr(const char *name,
 #ifdef CONFIG_FAULT_INJECTION_STACKTRACE_FILTER
 	debugfs_create_stacktrace_depth("stacktrace-depth", mode, dir,
 					&attr->stacktrace_depth);
-	debugfs_create_ul("require-start", mode, dir, &attr->require_start);
-	debugfs_create_ul("require-end", mode, dir, &attr->require_end);
-	debugfs_create_ul("reject-start", mode, dir, &attr->reject_start);
-	debugfs_create_ul("reject-end", mode, dir, &attr->reject_end);
+	debugfs_create_xul("require-start", mode, dir, &attr->require_start);
+	debugfs_create_xul("require-end", mode, dir, &attr->require_end);
+	debugfs_create_xul("reject-start", mode, dir, &attr->reject_start);
+	debugfs_create_xul("reject-end", mode, dir, &attr->reject_end);
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
 	attr->dname = dget(dir);
-- 
GitLab


From f9eeef5918bbe1f2545d36280330dced25d6cf97 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 17 Aug 2022 08:03:32 +0000
Subject: [PATCH 4042/4122] fault-injection: make stacktrace filter works as
 expected

stacktrace filter is checked after others, such as fail-nth, interval and
probability.  This make it doesn't work well as expected.

Fix to running stacktrace filter before other filters.  It will speed up
fault inject testing for driver modules.

Link: https://lkml.kernel.org/r/20220817080332.1052710-5-weiyongjun1@huawei.com
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Isabella Basso <isabbasso@riseup.net>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Miguel Ojeda <ojeda@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Vlastimil Babka <vbabka@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 lib/fault-inject.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index d7819cacf8901..6cff320c4eb40 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -102,10 +102,16 @@ static inline bool fail_stacktrace(struct fault_attr *attr)
 
 bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 {
+	bool stack_checked = false;
+
 	if (in_task()) {
 		unsigned int fail_nth = READ_ONCE(current->fail_nth);
 
 		if (fail_nth) {
+			if (!fail_stacktrace(attr))
+				return false;
+
+			stack_checked = true;
 			fail_nth--;
 			WRITE_ONCE(current->fail_nth, fail_nth);
 			if (!fail_nth)
@@ -125,6 +131,9 @@ bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 	if (atomic_read(&attr->times) == 0)
 		return false;
 
+	if (!stack_checked && !fail_stacktrace(attr))
+		return false;
+
 	if (atomic_read(&attr->space) > size) {
 		atomic_sub(size, &attr->space);
 		return false;
@@ -139,9 +148,6 @@ bool should_fail_ex(struct fault_attr *attr, ssize_t size, int flags)
 	if (attr->probability <= get_random_u32_below(100))
 		return false;
 
-	if (!fail_stacktrace(attr))
-		return false;
-
 fail:
 	if (!(flags & FAULT_NOWARN))
 		fail_dump(attr);
-- 
GitLab


From 32f1002ed4851d9eb28ea1aba58757bd4b66e63b Mon Sep 17 00:00:00 2001
From: Daniel Golle <daniel@makrotopia.org>
Date: Thu, 15 Dec 2022 03:47:59 +0000
Subject: [PATCH 4043/4122] net: dsa: mt7530: remove redundant assignment

Russell King correctly pointed out that the MAC_2500FD capability is
already added for port 5 (if not in RGMII mode) and port 6 (which only
supports SGMII) by mt7531_mac_port_get_caps. Remove the reduntant
setting of this capability flag which was added by a previous commit.

Fixes: e19de30d2080 ("net: dsa: mt7530: add support for in-band link status")
Reported-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Daniel Golle <daniel@makrotopia.org>
Link: https://lore.kernel.org/r/Y5qY7x6la5TxZxzX@makrotopia.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mt7530.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index e74c6b4061728..908fa89444c90 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -2919,9 +2919,6 @@ static void mt753x_phylink_get_caps(struct dsa_switch *ds, int port,
 	config->mac_capabilities = MAC_ASYM_PAUSE | MAC_SYM_PAUSE |
 				   MAC_10 | MAC_100 | MAC_1000FD;
 
-	if ((priv->id == ID_MT7531) && mt753x_is_mac_port(port))
-		config->mac_capabilities |= MAC_2500FD;
-
 	/* This driver does not make use of the speed, duplex, pause or the
 	 * advertisement in its mac_config, so it is safe to mark this driver
 	 * as non-legacy.
-- 
GitLab


From b4cafb3d2c740f8d1b1234b43ac4a60e5291c960 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 14 Dec 2022 18:01:00 -0800
Subject: [PATCH 4044/4122] devlink: hold region lock when flushing snapshots

Netdevsim triggers a splat on reload, when it destroys regions
with snapshots pending:

  WARNING: CPU: 1 PID: 787 at net/core/devlink.c:6291 devlink_region_snapshot_del+0x12e/0x140
  CPU: 1 PID: 787 Comm: devlink Not tainted 6.1.0-07460-g7ae9888d6e1c #580
  RIP: 0010:devlink_region_snapshot_del+0x12e/0x140
  Call Trace:
   <TASK>
   devl_region_destroy+0x70/0x140
   nsim_dev_reload_down+0x2f/0x60 [netdevsim]
   devlink_reload+0x1f7/0x360
   devlink_nl_cmd_reload+0x6ce/0x860
   genl_family_rcv_msg_doit.isra.0+0x145/0x1c0

This is the locking assert in devlink_region_snapshot_del(),
we're supposed to be holding the region->snapshot_lock here.

Fixes: 2dec18ad826f ("net: devlink: remove region snapshots list dependency on devlink->lock")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 6004bd0ccee4d..d2df308290832 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -11925,8 +11925,10 @@ void devl_region_destroy(struct devlink_region *region)
 	devl_assert_locked(devlink);
 
 	/* Free all snapshots of region */
+	mutex_lock(&region->snapshot_lock);
 	list_for_each_entry_safe(snapshot, ts, &region->snapshot_list, list)
 		devlink_region_snapshot_del(region, snapshot);
+	mutex_unlock(&region->snapshot_lock);
 
 	list_del(&region->list);
 	mutex_destroy(&region->snapshot_lock);
-- 
GitLab


From 2fc60e2ff972d3dca836bff0b08cbe503c4ca1ce Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 14 Dec 2022 18:01:01 -0800
Subject: [PATCH 4045/4122] selftests: devlink: fix the fd redirect in
 dummy_reporter_test

$number + > bash means redirect FD $number, e.g. commonly
used 2> redirects stderr (fd 2). The test uses 8192> to
write the number 8192 to a file, this results in:

  ./devlink.sh: line 499: 8192: Bad file descriptor

Oddly the test also papers over this issue by checking
for failure (expecting an error rather than success)
so it passes, anyway.

Fixes: ff18176ad806 ("selftests: Add a test of large binary to devlink health test")
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/drivers/net/netdevsim/devlink.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 9de1d123f4f5d..a08c02abde121 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -496,8 +496,8 @@ dummy_reporter_test()
 
 	check_reporter_info dummy healthy 3 3 10 true
 
-	echo 8192> $DEBUGFS_DIR/health/binary_len
-	check_fail $? "Failed set dummy reporter binary len to 8192"
+	echo 8192 > $DEBUGFS_DIR/health/binary_len
+	check_err $? "Failed set dummy reporter binary len to 8192"
 
 	local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
 	check_err $? "Failed show dump of dummy reporter"
-- 
GitLab


From d1c4a3469e73730f7cbbcec661c2a9081af1aa45 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 14 Dec 2022 18:01:02 -0800
Subject: [PATCH 4046/4122] selftests: devlink: add a warning for interfaces
 coming up

NetworkManager (and other daemons) may bring the interface up
and cause failures in quiescence checks. Print a helpful warning,
and take the interface down again.

I seem to forget about this every time I run these tests on a new VM.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/drivers/net/netdevsim/devlink_trap.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index 109900c817bec..b64d98ca0df70 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -47,6 +47,17 @@ if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
 	exit 1
 fi
 
+check_netdev_down()
+{
+	state=$(cat /sys/class/net/${NETDEV}/flags)
+
+	if [ $((state & 1)) -ne 0 ]; then
+		echo "WARNING: unexpected interface UP, disable NetworkManager?"
+
+		ip link set dev $NETDEV down
+	fi
+}
+
 init_test()
 {
 	RET=0
@@ -151,6 +162,7 @@ trap_stats_test()
 
 	RET=0
 
+	check_netdev_down
 	for trap_name in $(devlink_traps_get); do
 		devlink_trap_stats_idle_test $trap_name
 		check_err $? "Stats of trap $trap_name not idle when netdev down"
@@ -254,6 +266,7 @@ trap_group_stats_test()
 
 	RET=0
 
+	check_netdev_down
 	for group_name in $(devlink_trap_groups_get); do
 		devlink_trap_group_stats_idle_test $group_name
 		check_err $? "Stats of trap group $group_name not idle when netdev down"
-- 
GitLab


From 68bb10101e6b0a6bb44e9c908ef795fc4af99eae Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Thu, 15 Dec 2022 15:46:33 +0100
Subject: [PATCH 4047/4122] openvswitch: Fix flow lookup to use unmasked key

The commit mentioned below causes the ovs_flow_tbl_lookup() function
to be called with the masked key. However, it's supposed to be called
with the unmasked key. This due to the fact that the datapath supports
installing wider flows, and OVS relies on this behavior. For example
if ipv4(src=1.1.1.1/192.0.0.0, dst=1.1.1.2/192.0.0.0) exists, a wider
flow (smaller mask) of ipv4(src=192.1.1.1/128.0.0.0,dst=192.1.1.2/
128.0.0.0) is allowed to be added.

However, if we try to add a wildcard rule, the installation fails:

$ ovs-appctl dpctl/add-flow system@myDP "in_port(1),eth_type(0x0800), \
  ipv4(src=1.1.1.1/192.0.0.0,dst=1.1.1.2/192.0.0.0,frag=no)" 2
$ ovs-appctl dpctl/add-flow system@myDP "in_port(1),eth_type(0x0800), \
  ipv4(src=192.1.1.1/0.0.0.0,dst=49.1.1.2/0.0.0.0,frag=no)" 2
ovs-vswitchd: updating flow table (File exists)

The reason is that the key used to determine if the flow is already
present in the system uses the original key ANDed with the mask.
This results in the IP address not being part of the (miniflow) key,
i.e., being substituted with an all-zero value. When doing the actual
lookup, this results in the key wrongfully matching the first flow,
and therefore the flow does not get installed.

This change reverses the commit below, but rather than having the key
on the stack, it's allocated.

Fixes: 190aa3e77880 ("openvswitch: Fix Frame-size larger than 1024 bytes warning.")

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.c | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 932bcf766d636..9ca721c9fa718 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -973,6 +973,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
+	struct sw_flow_key *key;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
 	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
@@ -1000,24 +1001,26 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &new_flow->key, false, &mask);
+	key = kzalloc(sizeof(*key), GFP_KERNEL);
+	if (!key) {
+		error = -ENOMEM;
+		goto err_kfree_key;
+	}
+
+	ovs_match_init(&match, key, false, &mask);
 	error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
 	if (error)
 		goto err_kfree_flow;
 
+	ovs_flow_mask_key(&new_flow->key, key, true, &mask);
+
 	/* Extract flow identifier. */
 	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
-				       &new_flow->key, log);
+				       key, log);
 	if (error)
 		goto err_kfree_flow;
 
-	/* unmasked key is needed to match when ufid is not used. */
-	if (ovs_identifier_is_key(&new_flow->id))
-		match.key = new_flow->id.unmasked_key;
-
-	ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
-
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
 				     &new_flow->key, &acts, log);
@@ -1044,7 +1047,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	if (ovs_identifier_is_ufid(&new_flow->id))
 		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
 	if (!flow)
-		flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
+		flow = ovs_flow_tbl_lookup(&dp->table, key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -1114,6 +1117,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 
 	if (reply)
 		ovs_notify(&dp_flow_genl_family, reply, info);
+
+	kfree(key);
 	return 0;
 
 err_unlock_ovs:
@@ -1123,6 +1128,8 @@ err_kfree_acts:
 	ovs_nla_free_flow_actions(acts);
 err_kfree_flow:
 	ovs_flow_free(new_flow, false);
+err_kfree_key:
+	kfree(key);
 error:
 	return error;
 }
-- 
GitLab


From dba8eb83af9dd757ef645b52200775e86883d858 Mon Sep 17 00:00:00 2001
From: Chun-Jie Chen <chun-jie.chen@mediatek.com>
Date: Fri, 14 Oct 2022 18:20:29 +0800
Subject: [PATCH 4048/4122] soc: mediatek: pm-domains: Fix the power glitch
 issue

Power reset maybe generate unexpected signal. In order to avoid
the glitch issue, we need to enable isolation first to guarantee the
stable signal when power reset is triggered.

Fixes: 59b644b01cf4 ("soc: mediatek: Add MediaTek SCPSYS power domains")
Signed-off-by: Chun-Jie Chen <chun-jie.chen@mediatek.com>
Signed-off-by: Allen-KH Cheng <allen-kh.cheng@mediatek.com>
Reviewed-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: Miles Chen <miles.chen@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20221014102029.1162-1-allen-kh.cheng@mediatek.com
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 drivers/soc/mediatek/mtk-pm-domains.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/soc/mediatek/mtk-pm-domains.c b/drivers/soc/mediatek/mtk-pm-domains.c
index 09e3c38b84664..474b272f9b02d 100644
--- a/drivers/soc/mediatek/mtk-pm-domains.c
+++ b/drivers/soc/mediatek/mtk-pm-domains.c
@@ -275,9 +275,9 @@ static int scpsys_power_off(struct generic_pm_domain *genpd)
 	clk_bulk_disable_unprepare(pd->num_subsys_clks, pd->subsys_clks);
 
 	/* subsys power off */
-	regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT);
 	regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_ISO_BIT);
 	regmap_set_bits(scpsys->base, pd->data->ctl_offs, PWR_CLK_DIS_BIT);
+	regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_RST_B_BIT);
 	regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_2ND_BIT);
 	regmap_clear_bits(scpsys->base, pd->data->ctl_offs, PWR_ON_BIT);
 
-- 
GitLab


From e4a4175201014c0222f6bab1895a17b3d1b92f08 Mon Sep 17 00:00:00 2001
From: Macpaul Lin <macpaul.lin@mediatek.com>
Date: Fri, 11 Nov 2022 17:55:40 +0800
Subject: [PATCH 4049/4122] arm64: dts: mediatek: mt8195-demo: fix the memory
 size of node secmon

The size of device tree node secmon (bl31_secmon_reserved) was
incorrect. It should be increased to 2MiB (0x200000).

The origin setting will cause some abnormal behavior due to
trusted-firmware-a and related firmware didn't load correctly.
The incorrect behavior may vary because of different software stacks.
For example, it will cause build error in some Yocto project because
it will check if there was enough memory to load trusted-firmware-a
to the reserved memory.

When mt8195-demo.dts sent to the upstream, at that time the size of
BL31 was small. Because supported functions and modules in BL31 are
basic sets when the board was under early development stage.

Now BL31 includes more firmwares of coprocessors and maturer functions
so the size has grown bigger in real applications. According to the value
reported by customers, we think reserved 2MiB for BL31 might be enough
for maybe the following 2 or 3 years.

Cc: stable@vger.kernel.org      # v5.19
Fixes: 6147314aeedc ("arm64: dts: mediatek: Add device-tree for MT8195 Demo board")
Signed-off-by: Macpaul Lin <macpaul.lin@mediatek.com>
Reviewed-by: Miles Chen <miles.chen@mediatek.com>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Link: https://lore.kernel.org/r/20221111095540.28881-1-macpaul.lin@mediatek.com
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt8195-demo.dts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
index 4fbd99eb496a2..dec85d2548384 100644
--- a/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
+++ b/arch/arm64/boot/dts/mediatek/mt8195-demo.dts
@@ -56,10 +56,10 @@
 		#size-cells = <2>;
 		ranges;
 
-		/* 192 KiB reserved for ARM Trusted Firmware (BL31) */
+		/* 2 MiB reserved for ARM Trusted Firmware (BL31) */
 		bl31_secmon_reserved: secmon@54600000 {
 			no-map;
-			reg = <0 0x54600000 0x0 0x30000>;
+			reg = <0 0x54600000 0x0 0x200000>;
 		};
 
 		/* 12 MiB reserved for OP-TEE (BL32)
-- 
GitLab


From ad2631b5645a1d0ca9bf6fecf71f77e3b0071ee5 Mon Sep 17 00:00:00 2001
From: Chen-Yu Tsai <wenst@chromium.org>
Date: Tue, 27 Sep 2022 12:11:19 +0200
Subject: [PATCH 4050/4122] arm64: dts: mt8183: Fix Mali GPU clock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The actual clock feeding into the Mali GPU on the MT8183 is from the
clock gate in the MFGCFG block, not CLK_TOP_MFGPLL_CK from the TOPCKGEN
block, which itself is simply a pass-through placeholder for the MFGPLL
in the APMIXEDSYS block.

Fix the hardware description with the correct clock reference.

Fixes: a8168cebf1bc ("arm64: dts: mt8183: Add node for the Mali GPU")
Signed-off-by: Chen-Yu Tsai <wenst@chromium.org>
Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Signed-off-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
Tested-by: Nícolas F. R. A. Prado <nfraprado@collabora.com>
Link: https://lore.kernel.org/r/20220927101128.44758-2-angelogioacchino.delregno@collabora.com
Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
---
 arch/arm64/boot/dts/mediatek/mt8183.dtsi | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
index a70b669c49baa..402136bfd5350 100644
--- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi
@@ -1678,7 +1678,7 @@
 				<GIC_SPI 278 IRQ_TYPE_LEVEL_LOW>;
 			interrupt-names = "job", "mmu", "gpu";
 
-			clocks = <&topckgen CLK_TOP_MFGPLL_CK>;
+			clocks = <&mfgcfg CLK_MFG_BG3D>;
 
 			power-domains =
 				<&spm MT8183_POWER_DOMAIN_MFG_CORE0>,
-- 
GitLab


From 980411a4d1bb925d28cd9e8d8301dc982ece788d Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Fri, 16 Dec 2022 12:43:12 +1100
Subject: [PATCH 4051/4122] powerpc/code-patching: Fix oops with DEBUG_VM
 enabled

Nathan reported that the new per-cpu mm patching oopses if DEBUG_VM is
enabled:

  ------------[ cut here ]------------
  kernel BUG at arch/powerpc/mm/pgtable.c:333!
  Oops: Exception in kernel mode, sig: 5 [#1]
  LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA PowerNV
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.1.0-rc2+ #1
  Hardware name: IBM PowerNV (emulated by qemu) POWER9 0x4e1200 opal:v7.0 PowerNV
  ...
  NIP assert_pte_locked+0x180/0x1a0
  LR  assert_pte_locked+0x170/0x1a0
  Call Trace:
    0x60000000 (unreliable)
    patch_instruction+0x618/0x6d0
    arch_prepare_kprobe+0xfc/0x2d0
    register_kprobe+0x520/0x7c0
    arch_init_kprobes+0x28/0x3c
    init_kprobes+0x108/0x184
    do_one_initcall+0x60/0x2e0
    kernel_init_freeable+0x1f0/0x3e0
    kernel_init+0x34/0x1d0
    ret_from_kernel_thread+0x5c/0x64

It's caused by the assert_spin_locked() failing in assert_pte_locked().
The assert fails because the PTE was unlocked in text_area_cpu_up_mm(),
and never relocked.

The PTE page shouldn't be freed, the patching_mm is only used for
patching on this CPU, only that single PTE is ever mapped, and it's only
unmapped at CPU offline.

In fact assert_pte_locked() has a special case to ignore init_mm
entirely, and the patching_mm is more-or-less like init_mm, so possibly
the check could be skipped for patching_mm too.

But for now be conservative, and use the proper PTE accessors at
patching time, so that the PTE lock is held while the PTE is used. That
also avoids the warning in assert_pte_locked().

With that it's no longer necessary to save the PTE in
cpu_patching_context for the mm_patch_enabled() case.

Fixes: c28c15b6d28a ("powerpc/code-patching: Use temporary mm for Radix MMU")
Reported-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20221216125913.990972-1-mpe@ellerman.id.au
---
 arch/powerpc/lib/code-patching.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 73ce4b90bb1b8..b00112d7ad467 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -178,7 +178,6 @@ static int text_area_cpu_up_mm(unsigned int cpu)
 
 	this_cpu_write(cpu_patching_context.mm, mm);
 	this_cpu_write(cpu_patching_context.addr, addr);
-	this_cpu_write(cpu_patching_context.pte, pte);
 
 	return 0;
 
@@ -195,7 +194,6 @@ static int text_area_cpu_down_mm(unsigned int cpu)
 
 	this_cpu_write(cpu_patching_context.mm, NULL);
 	this_cpu_write(cpu_patching_context.addr, 0);
-	this_cpu_write(cpu_patching_context.pte, NULL);
 
 	return 0;
 }
@@ -289,12 +287,16 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
 	unsigned long pfn = get_patch_pfn(addr);
 	struct mm_struct *patching_mm;
 	struct mm_struct *orig_mm;
+	spinlock_t *ptl;
 
 	patching_mm = __this_cpu_read(cpu_patching_context.mm);
-	pte = __this_cpu_read(cpu_patching_context.pte);
 	text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
 	patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
+	pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+	if (!pte)
+		return -ENOMEM;
+
 	__set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
 
 	/* order PTE update before use, also serves as the hwsync */
@@ -321,6 +323,8 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
 	 */
 	local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
 
+	pte_unmap_unlock(pte, ptl);
+
 	return err;
 }
 
-- 
GitLab


From 4ff17c448a7b0f437a17622d67f1c5c609c3a0e9 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@linux.ibm.com>
Date: Fri, 16 Dec 2022 10:50:41 +0100
Subject: [PATCH 4052/4122] libperf: Fix install_pkgconfig target

Commit 47e02b94a4c98dcc ("tools lib perf: Add dependency test to install_headers")
misses the notion of $(DESTDIR_SQ) for install_pkgconfig target, which leads to
error:

  install: cannot create regular file '/usr/lib64/pkgconfig/libperf.pc': Permission denied
  make: *** [Makefile:210: install_pkgconfig] Error 1

Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Nathan Chancellor <nathan@kernel.org>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Nicolas Schier <nicolas@fjasle.eu>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tom Rix <trix@redhat.com>
Cc: bpf@vger.kernel.org
Cc: llvm@lists.linux.dev
Link: http://lore.kernel.org/lkml/Y5w/cWKyb8vpNMfA@li-4a3a4a4c-28e5-11b2-a85c-a8d192c6f089.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
index 30b7f91e71472..d8cad124e4c5d 100644
--- a/tools/lib/perf/Makefile
+++ b/tools/lib/perf/Makefile
@@ -208,7 +208,7 @@ install_headers: $(INSTALL_HDRS) $(INSTALL_INTERNAL_HDRS)
 
 install_pkgconfig: $(LIBPERF_PC)
 	$(call QUIET_INSTALL, $(LIBPERF_PC)) \
-		$(call do_install,$(LIBPERF_PC),$(libdir_SQ)/pkgconfig,644)
+		$(call do_install,$(LIBPERF_PC),$(DESTDIR_SQ)$(libdir_SQ)/pkgconfig,644)
 
 install_doc:
 	$(Q)$(MAKE) -C Documentation install-man install-html install-examples
-- 
GitLab


From db3568fd80a3999413c04ea0cf52596b7b0ad9aa Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Fri, 16 Dec 2022 11:03:02 +0000
Subject: [PATCH 4053/4122] genirq/msi: Check for the presence of an irq domain
 when validating msi_ctrl

For architectures such as s390 and powerpc that do not use
irq domains for MSIs, dev->msi.domain is always NULL, so
the per-device, per-bus MSI domain is also guaranteed to
be NULL.

So checking one without checking the other is bound to result
in a splat, followed by a memory leak as we don't free the MSI
descriptors.

Add the missing check.

Reported-by: Matthew Rosato <mjrosato@linux.ibm.com>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/e570e70d-19bc-101b-0481-ff9a3cab3504@linux.ibm.com
---
 kernel/irq/msi.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index bd4d4dd626b4b..e843604c3a4f6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -165,7 +165,8 @@ static bool msi_ctrl_valid(struct device *dev, struct msi_ctrl *ctrl)
 	unsigned int hwsize;
 
 	if (WARN_ON_ONCE(ctrl->domid >= MSI_MAX_DEVICE_IRQDOMAINS ||
-			 !dev->msi.data->__domains[ctrl->domid].domain))
+			 (dev->msi.domain &&
+			  !dev->msi.data->__domains[ctrl->domid].domain)))
 		return false;
 
 	hwsize = msi_domain_get_hwsize(dev, ctrl->domid);
-- 
GitLab


From e982ad82bd8f7931f5788a15dfa3709f7a7ee79f Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 16 Dec 2022 11:08:52 +0000
Subject: [PATCH 4054/4122] genirq/msi: Return MSI_XA_DOMAIN_SIZE as the
 maximum MSI index when no domain is present

On architectures such as s390 that do not use irq domains for MSI,
returning 0 as the maximum MSI index is a bit counter-productive,
as it indicates that no MSI can be allocated. Bad idea.

Instead, return the maximum we're willing to support in the MSI
backing store (MSI_XA_DOMAIN_SIZE), and let the arch code do its
usual thing.

Thanks to Matthew Rosato for fixing the fix.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[maz: commit message]
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/87fsdgzpqs.ffs@tglx
---
 kernel/irq/msi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index e843604c3a4f6..955267bbc2be6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -610,8 +610,8 @@ static unsigned int msi_domain_get_hwsize(struct device *dev, unsigned int domid
 		info = domain->host_data;
 		return info->hwsize;
 	}
-	/* No domain, no size... */
-	return 0;
+	/* No domain, default to MSI_XA_DOMAIN_SIZE */
+	return MSI_XA_DOMAIN_SIZE;
 }
 
 static inline void irq_chip_write_msi_msg(struct irq_data *data,
-- 
GitLab


From a7d550f82b445cf218b47a2c1a9c56e97ecb8c7a Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Date: Mon, 12 Dec 2022 00:58:17 +0100
Subject: [PATCH 4055/4122] of: fdt: Honor CONFIG_CMDLINE* even without /chosen
 node

I do not read a strict requirement on /chosen node in either ePAPR or in
Documentation/devicetree. Help text for CONFIG_CMDLINE and
CONFIG_CMDLINE_EXTEND doesn't make their behavior explicitly dependent on
the presence of /chosen or the presense of /chosen/bootargs.

However the early check for /chosen and bailing out in
early_init_dt_scan_chosen() skips CONFIG_CMDLINE handling which is not
really related to /chosen node or the particular method of passing cmdline
from bootloader.

This leads to counterintuitive combinations (assuming
CONFIG_CMDLINE_EXTEND=y):

a) bootargs="foo", CONFIG_CMDLINE="bar" => cmdline=="foo bar"
b) /chosen missing, CONFIG_CMDLINE="bar" => cmdline==""
c) bootargs="", CONFIG_CMDLINE="bar" => cmdline==" bar"

Move CONFIG_CMDLINE handling outside of early_init_dt_scan_chosen() so that
cases b and c above result in the same cmdline.

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@gmail.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/11af73e05bad75e4ef49067515e3214f6d944b3d.camel@gmail.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 drivers/of/fdt.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 7b571a6316397..b2272bccf85c9 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -1173,26 +1173,6 @@ int __init early_init_dt_scan_chosen(char *cmdline)
 	if (p != NULL && l > 0)
 		strscpy(cmdline, p, min(l, COMMAND_LINE_SIZE));
 
-	/*
-	 * CONFIG_CMDLINE is meant to be a default in case nothing else
-	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
-	 * is set in which case we override whatever was found earlier.
-	 */
-#ifdef CONFIG_CMDLINE
-#if defined(CONFIG_CMDLINE_EXTEND)
-	strlcat(cmdline, " ", COMMAND_LINE_SIZE);
-	strlcat(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#elif defined(CONFIG_CMDLINE_FORCE)
-	strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#else
-	/* No arguments from boot loader, use kernel's  cmdl*/
-	if (!((char *)cmdline)[0])
-		strscpy(cmdline, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
-#endif
-#endif /* CONFIG_CMDLINE */
-
-	pr_debug("Command line is: %s\n", (char *)cmdline);
-
 	rng_seed = of_get_flat_dt_prop(node, "rng-seed", &l);
 	if (rng_seed && l > 0) {
 		add_bootloader_randomness(rng_seed, l);
@@ -1297,6 +1277,26 @@ void __init early_init_dt_scan_nodes(void)
 	if (rc)
 		pr_warn("No chosen node found, continuing without\n");
 
+	/*
+	 * CONFIG_CMDLINE is meant to be a default in case nothing else
+	 * managed to set the command line, unless CONFIG_CMDLINE_FORCE
+	 * is set in which case we override whatever was found earlier.
+	 */
+#ifdef CONFIG_CMDLINE
+#if defined(CONFIG_CMDLINE_EXTEND)
+	strlcat(boot_command_line, " ", COMMAND_LINE_SIZE);
+	strlcat(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#elif defined(CONFIG_CMDLINE_FORCE)
+	strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#else
+	/* No arguments from boot loader, use kernel's cmdl */
+	if (!boot_command_line[0])
+		strscpy(boot_command_line, CONFIG_CMDLINE, COMMAND_LINE_SIZE);
+#endif
+#endif /* CONFIG_CMDLINE */
+
+	pr_debug("Command line is: %s\n", boot_command_line);
+
 	/* Setup memory, calling early_init_dt_add_memory_arch */
 	early_init_dt_scan_memory();
 
-- 
GitLab


From d8a76e46d7d3d6db5458d7f46205153ca9a53afd Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 30 Nov 2022 16:41:11 +0100
Subject: [PATCH 4056/4122] dt-bindings: usb: tegra-xusb: Remove path
 references

Unresolved path references are now flagged as errors when checking the
device tree binding examples, so convert them into label references.

Reported-by: Conor Dooley <conor.dooley@microchip.com>
Suggested-by: Rob Herring <robh+dt@kernel.org>
Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Rob Herring <robh@kernel.org>
Tested-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221130154111.1655603-1-thierry.reding@gmail.com
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/usb/nvidia,tegra124-xusb.yaml     | 4 +---
 .../devicetree/bindings/usb/nvidia,tegra186-xusb.yaml     | 4 +---
 .../devicetree/bindings/usb/nvidia,tegra194-xusb.yaml     | 8 ++------
 .../devicetree/bindings/usb/nvidia,tegra210-xusb.yaml     | 8 ++------
 4 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml
index 4a6616bf9bab7..d6ca8c93073df 100644
--- a/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra124-xusb.yaml
@@ -186,9 +186,7 @@ examples:
 
         nvidia,xusb-padctl = <&padctl>;
 
-        phys = <&{/padctl@0,7009f000/pads/usb2/lanes/usb2-1}>, /* mini-PCIe USB */
-               <&{/padctl@0,7009f000/pads/usb2/lanes/usb2-2}>, /* USB A */
-               <&{/padctl@0,7009f000/pads/pcie/lanes/pcie-0}>; /* USB A */
+        phys = <&phy_usb2_1>, <&phy_usb2_2>, <&phy_pcie_0>;
         phy-names = "usb2-1", "usb2-2", "usb3-0";
 
         avddio-pex-supply = <&vdd_1v05_run>;
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml
index 6f62944fc5971..a04c6ce1e0f68 100644
--- a/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra186-xusb.yaml
@@ -166,8 +166,6 @@ examples:
         #address-cells = <1>;
         #size-cells = <0>;
 
-        phys = <&{/padctl@3520000/pads/usb2/lanes/usb2-0}>,
-               <&{/padctl@3520000/pads/usb2/lanes/usb2-1}>,
-               <&{/padctl@3520000/pads/usb3/lanes/usb3-0}>;
+        phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb3_0>;
         phy-names = "usb2-0", "usb2-1", "usb3-0";
     };
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml
index 65ae9ae9b0b7a..b356793f73a15 100644
--- a/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra194-xusb.yaml
@@ -169,11 +169,7 @@ examples:
 
         nvidia,xusb-padctl = <&xusb_padctl>;
 
-        phys = <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-0}>,
-               <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-1}>,
-               <&{/bus@0/padctl@3520000/pads/usb2/lanes/usb2-3}>,
-               <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-0}>,
-               <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-2}>,
-               <&{/bus@0/padctl@3520000/pads/usb3/lanes/usb3-3}>;
+        phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb2_3>, <&phy_usb3_0>,
+               <&phy_usb3_2>, <&phy_usb3_3>;
         phy-names = "usb2-0", "usb2-1", "usb2-3", "usb3-0", "usb3-2", "usb3-3";
     };
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml
index da1e1ec0e7c86..90296613b3a59 100644
--- a/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra210-xusb.yaml
@@ -173,12 +173,8 @@ examples:
 
         nvidia,xusb-padctl = <&padctl>;
 
-        phys = <&{/padctl@7009f000/pads/usb2/lanes/usb2-0}>,
-               <&{/padctl@7009f000/pads/usb2/lanes/usb2-1}>,
-               <&{/padctl@7009f000/pads/usb2/lanes/usb2-2}>,
-               <&{/padctl@7009f000/pads/usb2/lanes/usb2-3}>,
-               <&{/padctl@7009f000/pads/pcie/lanes/pcie-6}>,
-               <&{/padctl@7009f000/pads/pcie/lanes/pcie-5}>;
+        phys = <&phy_usb2_0>, <&phy_usb2_1>, <&phy_usb2_2>, <&phy_usb2_3>,
+               <&phy_pcie_6>, <&phy_pcie_5>;
         phy-names = "usb2-0", "usb2-1", "usb2-2", "usb2-3", "usb3-0",
                     "usb3-1";
         dvddio-pex-supply = <&vdd_pex_1v05>;
-- 
GitLab


From 5fa9f7292b17bcd66da93226fc28fe1f755af154 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 2 Dec 2022 12:05:36 +0100
Subject: [PATCH 4057/4122] dt-bindings: vendor-prefixes: sort entries
 alphabetically

Sort entries alphabetically.  This was a semi manual job with help of:

  cat Documentation/devicetree/bindings/vendor-prefixes.yaml | grep '":' > old
  cat old | sort > new
  diff -ubB old new

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221202110536.22230-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/vendor-prefixes.yaml  | 64 +++++++++----------
 1 file changed, 32 insertions(+), 32 deletions(-)

diff --git a/Documentation/devicetree/bindings/vendor-prefixes.yaml b/Documentation/devicetree/bindings/vendor-prefixes.yaml
index 00493b962bd76..70ffb3780621b 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@@ -198,10 +198,10 @@ patternProperties:
     description: Bosch Sensortec GmbH
   "^boundary,.*":
     description: Boundary Devices Inc.
-  "^broadmobi,.*":
-    description: Shanghai Broadmobi Communication Technology Co.,Ltd.
   "^brcm,.*":
     description: Broadcom Corporation
+  "^broadmobi,.*":
+    description: Shanghai Broadmobi Communication Technology Co.,Ltd.
   "^bsh,.*":
     description: BSH Hausgeraete GmbH
   "^bticino,.*":
@@ -549,6 +549,8 @@ patternProperties:
     description: Hitex Development Tools
   "^holt,.*":
     description: Holt Integrated Circuits, Inc.
+  "^holtek,.*":
+    description: Holtek Semiconductor, Inc.
   "^honestar,.*":
     description: Honestar Technologies Co., Ltd.
   "^honeywell,.*":
@@ -561,8 +563,6 @@ patternProperties:
     description: Hewlett Packard Enterprise
   "^hsg,.*":
     description: HannStar Display Co.
-  "^holtek,.*":
-    description: Holtek Semiconductor, Inc.
   "^huawei,.*":
     description: Huawei Technologies Co., Ltd.
   "^hugsun,.*":
@@ -607,12 +607,10 @@ patternProperties:
     description: Infineon Technologies
   "^inforce,.*":
     description: Inforce Computing
-  "^ingrasys,.*":
-    description: Ingrasys Technology Inc.
-  "^ivo,.*":
-    description: InfoVision Optoelectronics Kunshan Co. Ltd.
   "^ingenic,.*":
     description: Ingenic Semiconductor
+  "^ingrasys,.*":
+    description: Ingrasys Technology Inc.
   "^injoinic,.*":
     description: Injoinic Technology Corp.
   "^innocomm,.*":
@@ -649,6 +647,8 @@ patternProperties:
     description: ITEAD Intelligent Systems Co.Ltd
   "^itian,.*":
     description: ITian Corporation
+  "^ivo,.*":
+    description: InfoVision Optoelectronics Kunshan Co. Ltd.
   "^iwave,.*":
     description: iWave Systems Technologies Pvt. Ltd.
   "^jadard,.*":
@@ -897,14 +897,14 @@ patternProperties:
     description: Shenzhen Netxeon Technology CO., LTD
   "^neweast,.*":
     description: Guangdong Neweast Optoelectronics CO., LTD
+  "^newhaven,.*":
+    description: Newhaven Display International
   "^newvision,.*":
     description: New Vision Display (Shenzhen) Co., Ltd.
   "^nexbox,.*":
     description: Nexbox
   "^nextthing,.*":
     description: Next Thing Co.
-  "^newhaven,.*":
-    description: Newhaven Display International
   "^ni,.*":
     description: National Instruments
   "^nintendo,.*":
@@ -1053,10 +1053,10 @@ patternProperties:
     description: QEMU, a generic and open source machine emulator and virtualizer
   "^qi,.*":
     description: Qi Hardware
-  "^qihua,.*":
-    description: Chengdu Kaixuan Information Technology Co., Ltd.
   "^qiaodian,.*":
     description: QiaoDian XianShi Corporation
+  "^qihua,.*":
+    description: Chengdu Kaixuan Information Technology Co., Ltd.
   "^qishenglong,.*":
     description: Shenzhen QiShenglong Industrialist Co., Ltd.
   "^qnap,.*":
@@ -1083,22 +1083,22 @@ patternProperties:
     description: reMarkable AS
   "^renesas,.*":
     description: Renesas Electronics Corporation
-  "^rex,.*":
-    description: iMX6 Rex Project
   "^rervision,.*":
     description: Shenzhen Rervision Technology Co., Ltd.
   "^revotics,.*":
     description: Revolution Robotics, Inc. (Revotics)
+  "^rex,.*":
+    description: iMX6 Rex Project
   "^richtek,.*":
     description: Richtek Technology Corporation
   "^ricoh,.*":
     description: Ricoh Co. Ltd.
   "^rikomagic,.*":
     description: Rikomagic Tech Corp. Ltd
-  "^riscv,.*":
-    description: RISC-V Foundation
   "^riot,.*":
     description: Embest RIoT
+  "^riscv,.*":
+    description: RISC-V Foundation
   "^rockchip,.*":
     description: Fuzhou Rockchip Electronics Co., Ltd
   "^rocktech,.*":
@@ -1161,6 +1161,8 @@ patternProperties:
     description: Si-En Technology Ltd.
   "^si-linux,.*":
     description: Silicon Linux Corporation
+  "^siemens,.*":
+    description: Siemens AG
   "^sifive,.*":
     description: SiFive, Inc.
   "^sigma,.*":
@@ -1183,8 +1185,8 @@ patternProperties:
     description: Siliconfile Technologies lnc.
   "^siliconmitus,.*":
     description: Silicon Mitus, Inc.
-  "^siemens,.*":
-    description: Siemens AG
+  "^silvaco,.*":
+    description: Silvaco, Inc.
   "^simtek,.*":
     description: Cypress Semiconductor Corporation (Simtek Corporation)
   "^sinlinx,.*":
@@ -1270,8 +1272,6 @@ patternProperties:
     description: Sun Microsystems, Inc
   "^supermicro,.*":
     description: Super Micro Computer, Inc.
-  "^silvaco,.*":
-    description: Silvaco, Inc.
   "^swir,.*":
     description: Sierra Wireless
   "^syna,.*":
@@ -1293,16 +1293,18 @@ patternProperties:
     description: Shenzhen City Tang Cheng Technology Co., Ltd.
   "^tdo,.*":
     description: Shangai Top Display Optoelectronics Co., Ltd
+  "^team-source-display,.*":
+    description: Shenzhen Team Source Display Technology Co., Ltd. (TSD)
   "^technexion,.*":
     description: TechNexion
   "^technologic,.*":
     description: Technologic Systems
+  "^techstar,.*":
+    description: Shenzhen Techstar Electronics Co., Ltd.
   "^teltonika,.*":
     description: Teltonika Networks
   "^tempo,.*":
     description: Tempo Semiconductor
-  "^techstar,.*":
-    description: Shenzhen Techstar Electronics Co., Ltd.
   "^terasic,.*":
     description: Terasic Inc.
   "^tesla,.*":
@@ -1356,10 +1358,6 @@ patternProperties:
     description: Tronsmart
   "^truly,.*":
     description: Truly Semiconductors Limited
-  "^visionox,.*":
-    description: Visionox
-  "^team-source-display,.*":
-    description: Shenzhen Team Source Display Technology Co., Ltd. (TSD)
   "^tsd,.*":
     description: Theobroma Systems Design und Consulting GmbH
   "^tyan,.*":
@@ -1368,10 +1366,10 @@ patternProperties:
     description: u-blox
   "^u-boot,.*":
     description: U-Boot bootloader
-  "^ucrobotics,.*":
-    description: uCRobotics
   "^ubnt,.*":
     description: Ubiquiti Networks
+  "^ucrobotics,.*":
+    description: uCRobotics
   "^udoo,.*":
     description: Udoo
   "^ugoos,.*":
@@ -1410,6 +1408,8 @@ patternProperties:
     description: Used for virtual device without specific vendor.
   "^vishay,.*":
     description: Vishay Intertechnology, Inc
+  "^visionox,.*":
+    description: Visionox
   "^vitesse,.*":
     description: Vitesse Semiconductor Corporation
   "^vivante,.*":
@@ -1424,6 +1424,8 @@ patternProperties:
     description: Vision Optical Technology Co., Ltd.
   "^vxt,.*":
     description: VXT Ltd
+  "^wanchanglong,.*":
+    description: Wanchanglong Electronics Technology（SHENZHEN）Co.，Ltd.
   "^wand,.*":
     description: Wandbord (Technexion)
   "^waveshare,.*":
@@ -1464,8 +1466,6 @@ patternProperties:
     description: Wondermedia Technologies, Inc.
   "^wobo,.*":
     description: Wobo
-  "^wanchanglong,.*":
-    description: Wanchanglong Electronics Technology（SHENZHEN）Co.，Ltd.
   "^x-powers,.*":
     description: X-Powers
   "^xen,.*":
@@ -1508,10 +1508,10 @@ patternProperties:
     description: Shenzhen Yashi Changhua Intelligent Technology Co., Ltd.
   "^ysoft,.*":
     description: Y Soft Corporation a.s.
-  "^zealz,.*":
-    description: Zealz
   "^zarlink,.*":
     description: Zarlink Semiconductor
+  "^zealz,.*":
+    description: Zealz
   "^zeitec,.*":
     description: ZEITEC Semiconductor Co., LTD.
   "^zidoo,.*":
-- 
GitLab


From f62678a77d585cf3f6302a5a4e2265b1a97b004b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Tue, 13 Dec 2022 14:20:06 +0100
Subject: [PATCH 4058/4122] dt-bindings: mxsfb: Document i.MX8M/i.MX6SX/i.MX6SL
 power-domains property

The power-domains property is mandatory on i.MX8M Mini, Nano, Plus
and i.MX6SX, i.MX6SL. Document the property and mark it as required
on the aforementioned variants of the IP, present in those SoCs.

Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Reviewed-by: Liu Ying <victor.liu@nxp.com>
Link: https://lore.kernel.org/r/20221213132006.6446-1-marex@denx.de
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/display/fsl,lcdif.yaml    | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
index f449cfc767899..75b4efd70ba85 100644
--- a/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
+++ b/Documentation/devicetree/bindings/display/fsl,lcdif.yaml
@@ -99,7 +99,6 @@ allOf:
           maxItems: 3
       required:
         - clock-names
-        - power-domains
   - if:
       not:
         properties:
@@ -114,6 +113,19 @@ allOf:
           maxItems: 1
         clock-names:
           maxItems: 1
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx6sl-lcdif
+              - fsl,imx6sx-lcdif
+              - fsl,imx8mm-lcdif
+              - fsl,imx8mn-lcdif
+              - fsl,imx8mp-lcdif
+    then:
+      required:
+        - power-domains
 
 examples:
   - |
@@ -128,6 +140,7 @@ examples:
                  <&clks IMX6SX_CLK_LCDIF_APB>,
                  <&clks IMX6SX_CLK_DISPLAY_AXI>;
         clock-names = "pix", "axi", "disp_axi";
+        power-domains = <&pd_disp>;
 
         port {
             endpoint {
-- 
GitLab


From 435beb4110da372c313398891ca9eee87f83d3db Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Tue, 13 Dec 2022 11:06:26 +0100
Subject: [PATCH 4059/4122] dt-bindings: hwmon: ntc-thermistor: drop Naveen
 Krishna Chatradhi from maintainers

Emails to Naveen Krishna Chatradhi bounce ("550 5.1.1 Recipient address
rejected: User unknown").

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Link: https://lore.kernel.org/r/20221213100626.36150-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
index 3d3b139a91a2f..6a1920712fb9f 100644
--- a/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
+++ b/Documentation/devicetree/bindings/hwmon/ntc-thermistor.yaml
@@ -6,7 +6,6 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: NTC thermistor temperature sensors
 
 maintainers:
-  - Naveen Krishna Chatradhi <ch.naveen@samsung.com>
   - Linus Walleij <linus.walleij@linaro.org>
 
 description: |
-- 
GitLab


From 22c9f19002c7536b30bc15b6fb6276d62be2f758 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 11 Dec 2022 03:48:57 +0100
Subject: [PATCH 4060/4122] dt-bindings: imx6q-pcie: Handle various clock
 configurations

The i.MX SoCs have various clock configurations routed into the PCIe IP,
the list of clock is below. Document all those configurations in the DT
binding document.

All SoCs: pcie, pcie_bus
6QDL, 7D: + pcie_phy
6SX:      + pcie_phy          pcie_inbound_axi
8MQ:      + pcie_phy pcie_aux
8MM, 8MP: +          pcie_aux

Reviewed-by: Rob Herring <robh@kernel.org>
Acked-by: Alexander Stein <alexander.stein@ew.tq-group.com>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20221211024859.672076-1-marex@denx.de
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/fsl,imx6q-pcie.yaml          | 37 +++++++++++++++++--
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
index 49b4f7a32e71e..5d74d914a830a 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@@ -57,7 +57,7 @@ properties:
     items:
       - const: pcie
       - const: pcie_bus
-      - const: pcie_phy
+      - enum: [ pcie_phy, pcie_aux ]
       - enum: [ pcie_inbound_axi, pcie_aux ]
 
   num-lanes:
@@ -185,7 +185,7 @@ allOf:
           items:
             - {}
             - {}
-            - {}
+            - const: pcie_phy
             - const: pcie_inbound_axi
   - if:
       properties:
@@ -198,7 +198,7 @@ allOf:
           items:
             - {}
             - {}
-            - {}
+            - const: pcie_phy
             - const: pcie_aux
   - if:
       properties:
@@ -208,10 +208,41 @@ allOf:
               enum:
                 - fsl,imx6sx-pcie
                 - fsl,imx8mq-pcie
+    then:
+      properties:
+        clocks:
+          maxItems: 3
+        clock-names:
+          maxItems: 3
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx6q-pcie
+              - fsl,imx6qp-pcie
+              - fsl,imx7d-pcie
+    then:
+      properties:
+        clock-names:
+          maxItems: 3
+          contains:
+            const: pcie_phy
+
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx8mm-pcie
+              - fsl,imx8mp-pcie
     then:
       properties:
         clock-names:
           maxItems: 3
+          contains:
+            const: pcie_aux
 
 unevaluatedProperties: false
 
-- 
GitLab


From 8b8161edf14acab716c01d03bafc8a3e8113a43a Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 11 Dec 2022 03:48:58 +0100
Subject: [PATCH 4061/4122] dt-bindings: imx6q-pcie: Handle various PD
 configurations

The i.MX SoCs have various power domain configurations routed into
the PCIe IP. MX6SX is the only one which contains 2 domains and also
uses power-domain-names. MX6QDL do not use any domains. All the rest
uses one domain and does not use power-domain-names anymore.

Document all those configurations in the DT binding document.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20221211024859.672076-2-marex@denx.de
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/fsl,imx6q-pcie.yaml          | 32 +++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
index 5d74d914a830a..f2e9404d33f8a 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@@ -69,6 +69,7 @@ properties:
       required properties for imx7d-pcie and imx8mq-pcie.
 
   power-domains:
+    minItems: 1
     items:
       - description: The phandle pointing to the DISPLAY domain for
           imx6sx-pcie, to PCIE_PHY power domain for imx7d-pcie and
@@ -77,6 +78,7 @@ properties:
           for imx6sx-pcie.
 
   power-domain-names:
+    minItems: 1
     items:
       - const: pcie
       - const: pcie_phy
@@ -187,6 +189,10 @@ allOf:
             - {}
             - const: pcie_phy
             - const: pcie_inbound_axi
+        power-domains:
+          minItems: 2
+        power-domain-names:
+          minItems: 2
   - if:
       properties:
         compatible:
@@ -243,6 +249,32 @@ allOf:
           maxItems: 3
           contains:
             const: pcie_aux
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx6q-pcie
+              - fsl,imx6qp-pcie
+    then:
+      properties:
+        power-domains: false
+        power-domain-names: false
+
+  - if:
+      not:
+        properties:
+          compatible:
+            contains:
+              enum:
+                - fsl,imx6sx-pcie
+                - fsl,imx6q-pcie
+                - fsl,imx6qp-pcie
+    then:
+      properties:
+        power-domains:
+          maxItems: 1
+        power-domain-names: false
 
 unevaluatedProperties: false
 
-- 
GitLab


From 1a2cead15bcfac872c15457ef50ffbbe5ff641bc Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Sun, 11 Dec 2022 03:48:59 +0100
Subject: [PATCH 4062/4122] dt-bindings: imx6q-pcie: Handle more resets on
 legacy platforms

The i.MX6 and i.MX7D does not use block controller to toggle PCIe
reset, hence the PCIe DT description contains three reset entries
on these older SoCs. Add this exception into the binding document.

Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Link: https://lore.kernel.org/r/20221211024859.672076-3-marex@denx.de
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../bindings/pci/fsl,imx6q-pcie.yaml          | 35 ++++++++++++++++---
 1 file changed, 31 insertions(+), 4 deletions(-)

diff --git a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
index f2e9404d33f8a..bad980902f66e 100644
--- a/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
+++ b/Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
@@ -84,15 +84,14 @@ properties:
       - const: pcie_phy
 
   resets:
+    minItems: 2
     maxItems: 3
     description: Phandles to PCIe-related reset lines exposed by SRC
       IP block. Additional required by imx7d-pcie and imx8mq-pcie.
 
   reset-names:
-    items:
-      - const: pciephy
-      - const: apps
-      - const: turnoff
+    minItems: 2
+    maxItems: 3
 
   fsl,tx-deemph-gen1:
     description: Gen1 De-emphasis value (optional required).
@@ -276,6 +275,34 @@ allOf:
           maxItems: 1
         power-domain-names: false
 
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - fsl,imx6q-pcie
+              - fsl,imx6sx-pcie
+              - fsl,imx6qp-pcie
+              - fsl,imx7d-pcie
+              - fsl,imx8mq-pcie
+    then:
+      properties:
+        resets:
+          minItems: 3
+        reset-names:
+          items:
+            - const: pciephy
+            - const: apps
+            - const: turnoff
+    else:
+      properties:
+        resets:
+          maxItems: 2
+        reset-names:
+          items:
+            - const: apps
+            - const: turnoff
+
 unevaluatedProperties: false
 
 examples:
-- 
GitLab


From e21a77d8dabe3c6384cdd485cb129c7d2a426708 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 7 Nov 2022 17:03:38 +0100
Subject: [PATCH 4063/4122] dt-bindings: watchdog: gpio: Convert bindings to
 YAML

Convert the gpio-wdt bindings from text to YAML ones, to permit DT validation.

Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: Marek Vasut <marex@denx.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Link: https://lore.kernel.org/r/20221107160338.27910-1-marex@denx.de
[robh: add missing 'always-running']
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/watchdog/gpio-wdt.txt | 28 ----------
 .../bindings/watchdog/linux,wdt-gpio.yaml     | 55 +++++++++++++++++++
 2 files changed, 55 insertions(+), 28 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/watchdog/gpio-wdt.txt
 create mode 100644 Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml

diff --git a/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt b/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt
deleted file mode 100644
index 198794963786b..0000000000000
--- a/Documentation/devicetree/bindings/watchdog/gpio-wdt.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-* GPIO-controlled Watchdog
-
-Required Properties:
-- compatible: Should contain "linux,wdt-gpio".
-- gpios: From common gpio binding; gpio connection to WDT reset pin.
-- hw_algo: The algorithm used by the driver. Should be one of the
-  following values:
-  - toggle: Either a high-to-low or a low-to-high transition clears
-    the WDT counter. The watchdog timer is disabled when GPIO is
-    left floating or connected to a three-state buffer.
-  - level: Low or high level starts counting WDT timeout,
-    the opposite level disables the WDT. Active level is determined
-    by the GPIO flags.
-- hw_margin_ms: Maximum time to reset watchdog circuit (milliseconds).
-
-Optional Properties:
-- always-running: If the watchdog timer cannot be disabled, add this flag to
-  have the driver keep toggling the signal without a client. It will only cease
-  to toggle the signal when the device is open and the timeout elapsed.
-
-Example:
-	watchdog: watchdog {
-		/* ADM706 */
-		compatible = "linux,wdt-gpio";
-		gpios = <&gpio3 9 GPIO_ACTIVE_LOW>;
-		hw_algo = "toggle";
-		hw_margin_ms = <1600>;
-	};
diff --git a/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml b/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml
new file mode 100644
index 0000000000000..50af79af6416e
--- /dev/null
+++ b/Documentation/devicetree/bindings/watchdog/linux,wdt-gpio.yaml
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/watchdog/linux,wdt-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: GPIO-controlled Watchdog
+
+maintainers:
+  - Guenter Roeck <linux@roeck-us.net>
+
+properties:
+  compatible:
+    const: linux,wdt-gpio
+
+  gpios:
+    description: gpio connection to WDT reset pin
+    maxItems: 1
+
+  hw_algo:
+    description: The algorithm used by the driver.
+    enum: [ level, toggle ]
+
+  hw_margin_ms:
+    description: Maximum time to reset watchdog circuit (milliseconds).
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  always-running:
+    type: boolean
+    description:
+      If the watchdog timer cannot be disabled, add this flag to have the driver
+      keep toggling the signal without a client.
+      It will only cease to toggle the signal when the device is open and the
+      timeout elapsed.
+
+required:
+  - compatible
+  - gpios
+  - hw_algo
+  - hw_margin_ms
+
+allOf:
+  - $ref: watchdog.yaml#
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    watchdog {
+        compatible = "linux,wdt-gpio";
+        gpios = <&gpio3 9 GPIO_ACTIVE_LOW>;
+        hw_algo = "toggle";
+        hw_margin_ms = <1600>;
+    };
-- 
GitLab


From ab040c42237f47dffbb1b83bd4e29f739b3a917e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:07 +0100
Subject: [PATCH 4064/4122] dt-bindings: drop redundant part of title of shared
 bindings

The Devicetree bindings document does not have to say in the title that
it is a "binding", but instead just describe the hardware.  For shared
(re-usable) schemas, name them all as "common properties".

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Guenter Roeck <linux@roeck-us.net> # watchdog
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> # IIO
Acked-by: Miquel Raynal <miquel.raynal@bootlin.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Vinod Koul <vkoul@kernel.org> # dma
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> # media
Acked-by: Sebastian Reichel <sre@kernel.org> # power
Acked-by: Viresh Kumar <viresh.kumar@linaro.org> # opp
Link: https://lore.kernel.org/r/20221216163815.522628-2-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/qcom,gcc.yaml           | 2 +-
 Documentation/devicetree/bindings/dma/dma-common.yaml           | 2 +-
 Documentation/devicetree/bindings/dma/dma-controller.yaml       | 2 +-
 Documentation/devicetree/bindings/dma/dma-router.yaml           | 2 +-
 Documentation/devicetree/bindings/iio/adc/adc.yaml              | 2 +-
 Documentation/devicetree/bindings/input/input.yaml              | 2 +-
 .../devicetree/bindings/media/video-interface-devices.yaml      | 2 +-
 Documentation/devicetree/bindings/media/video-interfaces.yaml   | 2 +-
 Documentation/devicetree/bindings/mmc/mmc-controller.yaml       | 2 +-
 Documentation/devicetree/bindings/mtd/nand-chip.yaml            | 2 +-
 Documentation/devicetree/bindings/mtd/nand-controller.yaml      | 2 +-
 .../devicetree/bindings/net/bluetooth/bluetooth-controller.yaml | 2 +-
 Documentation/devicetree/bindings/net/can/can-controller.yaml   | 2 +-
 Documentation/devicetree/bindings/net/ethernet-controller.yaml  | 2 +-
 Documentation/devicetree/bindings/net/ethernet-phy.yaml         | 2 +-
 Documentation/devicetree/bindings/net/mdio.yaml                 | 2 +-
 Documentation/devicetree/bindings/opp/opp-v2-base.yaml          | 2 +-
 Documentation/devicetree/bindings/rtc/rtc.yaml                  | 2 +-
 .../devicetree/bindings/soundwire/soundwire-controller.yaml     | 2 +-
 Documentation/devicetree/bindings/spi/spi-controller.yaml       | 2 +-
 Documentation/devicetree/bindings/watchdog/watchdog.yaml        | 2 +-
 21 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
index 1ab416c83c8d8..7129fbcf2b6c5 100644
--- a/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,gcc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/qcom,gcc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Global Clock & Reset Controller Common Bindings
+title: Qualcomm Global Clock & Reset Controller Common Properties
 
 maintainers:
   - Stephen Boyd <sboyd@kernel.org>
diff --git a/Documentation/devicetree/bindings/dma/dma-common.yaml b/Documentation/devicetree/bindings/dma/dma-common.yaml
index ad06d36af208e..ea700f8ee6c6a 100644
--- a/Documentation/devicetree/bindings/dma/dma-common.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-common.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/dma-common.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DMA Engine Generic Binding
+title: DMA Engine Common Properties
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
diff --git a/Documentation/devicetree/bindings/dma/dma-controller.yaml b/Documentation/devicetree/bindings/dma/dma-controller.yaml
index 6d3727267fa80..538ebadff6527 100644
--- a/Documentation/devicetree/bindings/dma/dma-controller.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/dma-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DMA Controller Generic Binding
+title: DMA Controller Common Properties
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
diff --git a/Documentation/devicetree/bindings/dma/dma-router.yaml b/Documentation/devicetree/bindings/dma/dma-router.yaml
index 4b817f5dc30e7..f8d8c3c88bcc9 100644
--- a/Documentation/devicetree/bindings/dma/dma-router.yaml
+++ b/Documentation/devicetree/bindings/dma/dma-router.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/dma-router.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DMA Router Generic Binding
+title: DMA Router Common Properties
 
 maintainers:
   - Vinod Koul <vkoul@kernel.org>
diff --git a/Documentation/devicetree/bindings/iio/adc/adc.yaml b/Documentation/devicetree/bindings/iio/adc/adc.yaml
index db348fcbb52cb..2616017297456 100644
--- a/Documentation/devicetree/bindings/iio/adc/adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic IIO bindings for ADC channels
+title: IIO Common Properties for ADC Channels
 
 maintainers:
   - Jonathan Cameron <jic23@kernel.org>
diff --git a/Documentation/devicetree/bindings/input/input.yaml b/Documentation/devicetree/bindings/input/input.yaml
index 17512f4347fdd..94f7942189e8c 100644
--- a/Documentation/devicetree/bindings/input/input.yaml
+++ b/Documentation/devicetree/bindings/input/input.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/input.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common input schema binding
+title: Input Devices Common Properties
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/media/video-interface-devices.yaml b/Documentation/devicetree/bindings/media/video-interface-devices.yaml
index 4527f56a5a6eb..cf7712ad297c0 100644
--- a/Documentation/devicetree/bindings/media/video-interface-devices.yaml
+++ b/Documentation/devicetree/bindings/media/video-interface-devices.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/video-interface-devices.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common bindings for video receiver and transmitter devices
+title: Common Properties for Video Receiver and Transmitter Devices
 
 maintainers:
   - Jacopo Mondi <jacopo@jmondi.org>
diff --git a/Documentation/devicetree/bindings/media/video-interfaces.yaml b/Documentation/devicetree/bindings/media/video-interfaces.yaml
index 34bdad0281808..a211d49dc2ac3 100644
--- a/Documentation/devicetree/bindings/media/video-interfaces.yaml
+++ b/Documentation/devicetree/bindings/media/video-interfaces.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/video-interfaces.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common bindings for video receiver and transmitter interface endpoints
+title: Common Properties for Video Receiver and Transmitter Interface Endpoints
 
 maintainers:
   - Sakari Ailus <sakari.ailus@linux.intel.com>
diff --git a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
index e82c003680886..86c73fd825fdc 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/mmc-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MMC Controller Generic Binding
+title: MMC Controller Common Properties
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mtd/nand-chip.yaml b/Documentation/devicetree/bindings/mtd/nand-chip.yaml
index 6e2dc025d694d..33d079f76c05d 100644
--- a/Documentation/devicetree/bindings/mtd/nand-chip.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-chip.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/nand-chip.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NAND Chip and NAND Controller Generic Binding
+title: NAND Chip Common Properties
 
 maintainers:
   - Miquel Raynal <miquel.raynal@bootlin.com>
diff --git a/Documentation/devicetree/bindings/mtd/nand-controller.yaml b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
index 220aa2c8c0b50..efcd415f86416 100644
--- a/Documentation/devicetree/bindings/mtd/nand-controller.yaml
+++ b/Documentation/devicetree/bindings/mtd/nand-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/nand-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NAND Chip and NAND Controller Generic Binding
+title: NAND Controller Common Properties
 
 maintainers:
   - Miquel Raynal <miquel.raynal@bootlin.com>
diff --git a/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml b/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml
index 9309dc40f54f0..59bb0d7e8ab33 100644
--- a/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml
+++ b/Documentation/devicetree/bindings/net/bluetooth/bluetooth-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/bluetooth/bluetooth-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bluetooth Controller Generic Binding
+title: Bluetooth Controller Common Properties
 
 maintainers:
   - Marcel Holtmann <marcel@holtmann.org>
diff --git a/Documentation/devicetree/bindings/net/can/can-controller.yaml b/Documentation/devicetree/bindings/net/can/can-controller.yaml
index 1f0e980510749..217be90960e8f 100644
--- a/Documentation/devicetree/bindings/net/can/can-controller.yaml
+++ b/Documentation/devicetree/bindings/net/can/can-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/can-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: CAN Controller Generic Binding
+title: CAN Controller Common Properties
 
 maintainers:
   - Marc Kleine-Budde <mkl@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
index 3aef506fa158e..00be387984acb 100644
--- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/ethernet-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ethernet Controller Generic Binding
+title: Ethernet Controller Common Properties
 
 maintainers:
   - David S. Miller <davem@davemloft.net>
diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index ad808e9ce5b9e..1327b81f15a21 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/ethernet-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ethernet PHY Generic Binding
+title: Ethernet PHY Common Properties
 
 maintainers:
   - Andrew Lunn <andrew@lunn.ch>
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index b5706d4e7e38f..a266ade918ca7 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/mdio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MDIO Bus Generic Binding
+title: MDIO Bus Common Properties
 
 maintainers:
   - Andrew Lunn <andrew@lunn.ch>
diff --git a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
index cf9c2f7bddc29..47e6f36b76370 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2-base.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/opp/opp-v2-base.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic OPP (Operating Performance Points) Common Binding
+title: Generic OPP (Operating Performance Points) Common Properties
 
 maintainers:
   - Viresh Kumar <viresh.kumar@linaro.org>
diff --git a/Documentation/devicetree/bindings/rtc/rtc.yaml b/Documentation/devicetree/bindings/rtc/rtc.yaml
index 0ec3551f12dd3..c6fff5486fe67 100644
--- a/Documentation/devicetree/bindings/rtc/rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: RTC Generic Binding
+title: Real Time Clock Common Properties
 
 maintainers:
   - Alexandre Belloni <alexandre.belloni@bootlin.com>
diff --git a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
index 4aad121eff3f2..fdeb8af417d7c 100644
--- a/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
+++ b/Documentation/devicetree/bindings/soundwire/soundwire-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soundwire/soundwire-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: SoundWire Controller Generic Binding
+title: SoundWire Controller Common Properties
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/spi/spi-controller.yaml b/Documentation/devicetree/bindings/spi/spi-controller.yaml
index 01042a7f382e2..5a7c72cadf76b 100644
--- a/Documentation/devicetree/bindings/spi/spi-controller.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/spi-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: SPI Controller Generic Binding
+title: SPI Controller Common Properties
 
 maintainers:
   - Mark Brown <broonie@kernel.org>
diff --git a/Documentation/devicetree/bindings/watchdog/watchdog.yaml b/Documentation/devicetree/bindings/watchdog/watchdog.yaml
index e3dfb02f0ca52..fccae0d001103 100644
--- a/Documentation/devicetree/bindings/watchdog/watchdog.yaml
+++ b/Documentation/devicetree/bindings/watchdog/watchdog.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/watchdog/watchdog.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Watchdog Generic Bindings
+title: Watchdog Common Properties
 
 maintainers:
   - Guenter Roeck <linux@roeck-us.net>
-- 
GitLab


From 9d94e28505f8033adf98434573074b400c081902 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:08 +0100
Subject: [PATCH 4065/4122] dt-bindings: memory-controllers: ti,gpmc-child:
 drop redundant part of title

The Devicetree bindings document does not have to say in the title that
it is a "Devicetree binding", but instead just describe the hardware.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Link: https://lore.kernel.org/r/20221216163815.522628-3-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 .../devicetree/bindings/memory-controllers/ti,gpmc-child.yaml   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml
index 4a257fac577ef..383d19e0ba26c 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc-child.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/ti,gpmc-child.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: device tree bindings for children of the Texas Instruments GPMC
+title: Texas Instruments GPMC Bus Child Nodes
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
-- 
GitLab


From a0c2153dcfa0a7f58f189622c6c045ff5aafe08e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:09 +0100
Subject: [PATCH 4066/4122] dt-bindings: clock: st,stm32mp1-rcc: add proper
 title

Add device name in the title, because "Reset Clock Controller" sounds
too generic.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org>
Link: https://lore.kernel.org/r/20221216163815.522628-4-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
index 242fe922b035e..5194be0b410e4 100644
--- a/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
+++ b/Documentation/devicetree/bindings/clock/st,stm32mp1-rcc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/st,stm32mp1-rcc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Reset Clock Controller Binding
+title: STMicroelectronics STM32MP1 Reset Clock Controller
 
 maintainers:
   - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
-- 
GitLab


From a612130ca1a650b0ba3599fc3199143eb9e7060d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:10 +0100
Subject: [PATCH 4067/4122] dt-bindings: drop redundant part of title (end)

The Devicetree bindings document does not have to say in the title that
it is a "Devicetree binding", but instead just describe the hardware.

Drop trailing "Devicetree bindings" in various forms (also with
trailing full stop):

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [dD]evice[ -]\?[tT]ree [bB]indings\?\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [dD]evice[ -]\?[nN]ode [bB]indings\?\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [dD][tT] [bB]indings\?\.\?$/title: \1/' {} \;

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> # IIO
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> # input
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> # media
Acked-by: Sebastian Reichel <sre@kernel.org> # power
Link: https://lore.kernel.org/r/20221216163815.522628-5-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml          | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml    | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml    | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml    | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml     | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml    | 2 +-
 Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml  | 2 +-
 .../devicetree/bindings/arm/firmware/linaro,optee-tz.yaml       | 2 +-
 Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml  | 2 +-
 Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml | 2 +-
 Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml            | 2 +-
 Documentation/devicetree/bindings/arm/mstar/mstar.yaml          | 2 +-
 Documentation/devicetree/bindings/arm/npcm/npcm.yaml            | 2 +-
 Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml          | 2 +-
 Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml   | 2 +-
 Documentation/devicetree/bindings/arm/socionext/uniphier.yaml   | 2 +-
 Documentation/devicetree/bindings/arm/sprd/sprd.yaml            | 2 +-
 Documentation/devicetree/bindings/arm/stm32/stm32.yaml          | 2 +-
 .../bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml       | 2 +-
 .../devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml | 2 +-
 .../bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml         | 2 +-
 Documentation/devicetree/bindings/arm/ti/k3.yaml                | 2 +-
 Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/ingenic,cgu.yaml        | 2 +-
 .../devicetree/bindings/clock/renesas,versaclock7.yaml          | 2 +-
 Documentation/devicetree/bindings/display/bridge/anx6345.yaml   | 2 +-
 .../devicetree/bindings/display/bridge/chrontel,ch7033.yaml     | 2 +-
 .../devicetree/bindings/display/bridge/ite,it6505.yaml          | 2 +-
 .../devicetree/bindings/display/bridge/ite,it66121.yaml         | 2 +-
 Documentation/devicetree/bindings/display/bridge/ps8640.yaml    | 2 +-
 Documentation/devicetree/bindings/display/ingenic,ipu.yaml      | 2 +-
 Documentation/devicetree/bindings/display/ingenic,lcd.yaml      | 2 +-
 .../devicetree/bindings/display/mediatek/mediatek,cec.yaml      | 2 +-
 .../devicetree/bindings/display/mediatek/mediatek,dsi.yaml      | 2 +-
 .../devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml | 2 +-
 .../devicetree/bindings/display/mediatek/mediatek,hdmi.yaml     | 2 +-
 .../devicetree/bindings/display/panel/ilitek,ili9163.yaml       | 2 +-
 Documentation/devicetree/bindings/display/panel/panel-lvds.yaml | 2 +-
 .../devicetree/bindings/display/panel/visionox,rm69299.yaml     | 2 +-
 Documentation/devicetree/bindings/dma/ingenic,dma.yaml          | 2 +-
 Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml          | 2 +-
 Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml         | 2 +-
 Documentation/devicetree/bindings/dma/ti/k3-udma.yaml           | 2 +-
 .../devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml       | 2 +-
 Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml | 2 +-
 Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml          | 2 +-
 .../devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml    | 2 +-
 Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml  | 2 +-
 .../devicetree/bindings/input/pine64,pinephone-keyboard.yaml    | 2 +-
 .../devicetree/bindings/input/touchscreen/chipone,icn8318.yaml  | 2 +-
 .../devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml | 2 +-
 .../devicetree/bindings/input/touchscreen/silead,gsl1680.yaml   | 2 +-
 .../devicetree/bindings/interrupt-controller/ingenic,intc.yaml  | 2 +-
 .../bindings/interrupt-controller/realtek,rtl-intc.yaml         | 2 +-
 .../devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml      | 2 +-
 .../devicetree/bindings/media/i2c/dongwoon,dw9768.yaml          | 2 +-
 Documentation/devicetree/bindings/media/i2c/ov8856.yaml         | 2 +-
 Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml   | 2 +-
 Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml    | 2 +-
 Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml    | 2 +-
 Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml    | 2 +-
 Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml    | 2 +-
 Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml   | 2 +-
 .../bindings/memory-controllers/ingenic,nemc-peripherals.yaml   | 2 +-
 .../devicetree/bindings/memory-controllers/ingenic,nemc.yaml    | 2 +-
 .../devicetree/bindings/memory-controllers/ti,gpmc.yaml         | 2 +-
 Documentation/devicetree/bindings/mips/ingenic/devices.yaml     | 2 +-
 .../devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml        | 2 +-
 Documentation/devicetree/bindings/mips/loongson/devices.yaml    | 2 +-
 Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml          | 2 +-
 Documentation/devicetree/bindings/mtd/ingenic,nand.yaml         | 2 +-
 .../devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml    | 2 +-
 Documentation/devicetree/bindings/net/can/bosch,c_can.yaml      | 2 +-
 Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml     | 2 +-
 Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml    | 2 +-
 Documentation/devicetree/bindings/net/dsa/dsa-port.yaml         | 2 +-
 Documentation/devicetree/bindings/net/dsa/dsa.yaml              | 2 +-
 .../devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml       | 2 +-
 Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml      | 2 +-
 Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml      | 2 +-
 Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml | 2 +-
 .../devicetree/bindings/net/wireless/microchip,wilc1000.yaml    | 2 +-
 Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml  | 2 +-
 Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml     | 2 +-
 Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml      | 2 +-
 Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml  | 2 +-
 .../devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml      | 2 +-
 .../devicetree/bindings/power/supply/maxim,ds2760.yaml          | 2 +-
 .../devicetree/bindings/power/supply/maxim,max14656.yaml        | 2 +-
 Documentation/devicetree/bindings/rtc/epson,rx8900.yaml         | 2 +-
 Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml          | 2 +-
 Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml     | 2 +-
 Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml | 2 +-
 Documentation/devicetree/bindings/serial/ingenic,uart.yaml      | 2 +-
 Documentation/devicetree/bindings/serial/serial.yaml            | 2 +-
 Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml     | 2 +-
 Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml     | 2 +-
 Documentation/devicetree/bindings/sound/ingenic,aic.yaml        | 2 +-
 Documentation/devicetree/bindings/sound/ingenic,codec.yaml      | 2 +-
 .../devicetree/bindings/sound/qcom,lpass-rx-macro.yaml          | 2 +-
 .../devicetree/bindings/sound/qcom,lpass-tx-macro.yaml          | 2 +-
 .../devicetree/bindings/sound/qcom,lpass-va-macro.yaml          | 2 +-
 .../devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml         | 2 +-
 Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml    | 2 +-
 Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml    | 2 +-
 Documentation/devicetree/bindings/sound/ti,src4xxx.yaml         | 2 +-
 Documentation/devicetree/bindings/spi/ingenic,spi.yaml          | 2 +-
 Documentation/devicetree/bindings/spi/spi-gpio.yaml             | 2 +-
 Documentation/devicetree/bindings/timer/ingenic,tcu.yaml        | 2 +-
 Documentation/devicetree/bindings/usb/ingenic,musb.yaml         | 2 +-
 Documentation/devicetree/bindings/usb/maxim,max33359.yaml       | 2 +-
 Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml | 2 +-
 Documentation/devicetree/bindings/usb/ti,tps6598x.yaml          | 2 +-
 Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml    | 2 +-
 120 files changed, 120 insertions(+), 120 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
index 8051a75c2c79a..162a39dab2182 100644
--- a/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/bcm2835.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/bcm2835.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM2711/BCM2835 Platforms Device Tree Bindings
+title: Broadcom BCM2711/BCM2835 Platforms
 
 maintainers:
   - Eric Anholt <eric@anholt.net>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml
index c603243574357..f2bcac0096b73 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm11351.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm11351.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM11351 device tree bindings
+title: Broadcom BCM11351
 
 maintainers:
   - Florian Fainelli <f.fainelli@gmail.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml
index b3020757380f6..cf4e254e32f19 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm21664.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm21664.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM21664 device tree bindings
+title: Broadcom BCM21664
 
 maintainers:
   - Florian Fainelli <f.fainelli@gmail.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml
index 37f3a6fcde760..eafec29ba7abd 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm23550.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm23550.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM23550 device tree bindings
+title: Broadcom BCM23550
 
 maintainers:
   - Florian Fainelli <f.fainelli@gmail.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml
index 52b575c405998..454b0e93245d8 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcm4708.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,bcm4708.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM4708 device tree bindings
+title: Broadcom BCM4708
 
 description:
   Broadcom BCM4708/47081/4709/47094/53012 Wi-Fi/network SoCs based
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml
index 84866e29cab0e..07892cbdd23c5 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,bcmbca.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,bcmbca.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Broadband SoC device tree bindings
+title: Broadcom Broadband SoC
 
 description:
   Broadcom Broadband SoCs include family of high performance DSL/PON/Wireless
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml
index 432ccf990f9e8..a0a3f32db54ea 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,cygnus.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,cygnus.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Cygnus device tree bindings
+title: Broadcom Cygnus
 
 maintainers:
   - Ray Jui <rjui@broadcom.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml
index 294948399f825..cc6add0e933a0 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,hr2.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,hr2.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Hurricane 2 device tree bindings
+title: Broadcom Hurricane 2
 
 description:
   Broadcom Hurricane 2 family of SoCs are used for switching control. These SoCs
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml
index c4847abbecd8e..6696598eca0e9 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,ns2.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,ns2.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom North Star 2 (NS2) device tree bindings
+title: Broadcom North Star 2 (NS2)
 
 maintainers:
   - Ray Jui <rjui@broadcom.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml
index 7d184ba7d180b..a43b2d4d936b3 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,nsp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,nsp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Northstar Plus device tree bindings
+title: Broadcom Northstar Plus
 
 description:
   Broadcom Northstar Plus family of SoCs are used for switching control
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml
index c638e04ebae0f..c6ccb78aab0a5 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,stingray.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,stingray.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Stingray device tree bindings
+title: Broadcom Stingray
 
 maintainers:
   - Ray Jui <rjui@broadcom.com>
diff --git a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml
index 4eba182abd535..3f441352fbf06 100644
--- a/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml
+++ b/Documentation/devicetree/bindings/arm/bcm/brcm,vulcan-soc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/bcm/brcm,vulcan-soc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom Vulcan device tree bindings
+title: Broadcom Vulcan
 
 maintainers:
   - Robert Richter <rrichter@marvell.com>
diff --git a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
index 9a426110a14a9..d4dc0749f9fd1 100644
--- a/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
+++ b/Documentation/devicetree/bindings/arm/firmware/linaro,optee-tz.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/firmware/linaro,optee-tz.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OP-TEE Device Tree Bindings
+title: OP-TEE
 
 maintainers:
   - Jens Wiklander <jens.wiklander@linaro.org>
diff --git a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
index b38458022946f..540876322040a 100644
--- a/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
+++ b/Documentation/devicetree/bindings/arm/hisilicon/hisilicon.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/hisilicon/hisilicon.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Hisilicon Platforms Device Tree Bindings
+title: Hisilicon Platforms
 
 maintainers:
   - Wei Xu <xuwei5@hisilicon.com>
diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml
index 34f5f877d4441..91b96065f7dfc 100644
--- a/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml
+++ b/Documentation/devicetree/bindings/arm/keystone/ti,sci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/keystone/ti,sci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI-SCI controller device node bindings
+title: TI-SCI controller
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
index e9bf3054529f1..52d78521e4124 100644
--- a/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
+++ b/Documentation/devicetree/bindings/arm/marvell/armada-7k-8k.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/marvell/armada-7k-8k.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell Armada 7K/8K Platforms Device Tree Bindings
+title: Marvell Armada 7K/8K Platforms
 
 maintainers:
   - Gregory CLEMENT <gregory.clement@bootlin.com>
diff --git a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
index d581161361547..4c43eaf3632e4 100644
--- a/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
+++ b/Documentation/devicetree/bindings/arm/mrvl/mrvl.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/mrvl/mrvl.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell Platforms Device Tree Bindings
+title: Marvell Platforms
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/arm/mstar/mstar.yaml b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml
index 8892eb6bd3ef7..937059fcc7b3b 100644
--- a/Documentation/devicetree/bindings/arm/mstar/mstar.yaml
+++ b/Documentation/devicetree/bindings/arm/mstar/mstar.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/mstar/mstar.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MStar platforms device tree bindings
+title: MStar platforms
 
 maintainers:
   - Daniel Palmer <daniel@thingy.jp>
diff --git a/Documentation/devicetree/bindings/arm/npcm/npcm.yaml b/Documentation/devicetree/bindings/arm/npcm/npcm.yaml
index 43409e5721d59..6871483947c51 100644
--- a/Documentation/devicetree/bindings/arm/npcm/npcm.yaml
+++ b/Documentation/devicetree/bindings/arm/npcm/npcm.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/npcm/npcm.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NPCM Platforms Device Tree Bindings
+title: NPCM Platforms
 
 maintainers:
   - Jonathan Neuschäfer <j.neuschaefer@gmx.net>
diff --git a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
index 214c97bc30634..f1bd6f50e726d 100644
--- a/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
+++ b/Documentation/devicetree/bindings/arm/nxp/lpc32xx.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/nxp/lpc32xx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP LPC32xx Platforms Device Tree Bindings
+title: NXP LPC32xx Platforms
 
 maintainers:
   - Roland Stigge <stigge@antcom.de>
diff --git a/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml
index aa1d4afbc5105..5a428a885760c 100644
--- a/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml
+++ b/Documentation/devicetree/bindings/arm/socionext/milbeaut.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/socionext/milbeaut.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Milbeaut platforms device tree bindings
+title: Milbeaut platforms
 
 maintainers:
   - Taichi Sugaya <sugaya.taichi@socionext.com>
diff --git a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml
index c2cea1c90f3cc..3e7f3d927ec7b 100644
--- a/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml
+++ b/Documentation/devicetree/bindings/arm/socionext/uniphier.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/socionext/uniphier.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Socionext UniPhier platform device tree bindings
+title: Socionext UniPhier platform
 
 maintainers:
   - Masahiro Yamada <yamada.masahiro@socionext.com>
diff --git a/Documentation/devicetree/bindings/arm/sprd/sprd.yaml b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml
index 2c12e571394b8..eaa67b8e0d6c7 100644
--- a/Documentation/devicetree/bindings/arm/sprd/sprd.yaml
+++ b/Documentation/devicetree/bindings/arm/sprd/sprd.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/arm/sprd/sprd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Unisoc platforms device tree bindings
+title: Unisoc platforms
 
 maintainers:
   - Orson Zhai <orsonzhai@gmail.com>
diff --git a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
index 44f5c5855af8c..13e34241145b4 100644
--- a/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/stm32.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/stm32/stm32.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Platforms Device Tree Bindings
+title: STMicroelectronics STM32 Platforms
 
 maintainers:
   - Alexandre Torgue <alexandre.torgue@foss.st.com>
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml
index f3878e0b3cc48..d805c4508b4ea 100644
--- a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun6i-a31-cpuconfig.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner CPU Configuration Controller Device Tree Bindings
+title: Allwinner CPU Configuration Controller
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml
index 668aadbfe4c0b..644f391afb321 100644
--- a/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml
+++ b/Documentation/devicetree/bindings/arm/sunxi/allwinner,sun9i-a80-prcm.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/sunxi/allwinner,sun9i-a80-prcm.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner A80 PRCM Device Tree Bindings
+title: Allwinner A80 PRCM
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
index 869c266e7ebc9..6089a96eae4ff 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra-ccplex-cluster.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra-ccplex-cluster.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: NVIDIA Tegra CPU COMPLEX CLUSTER area device tree bindings
+title: NVIDIA Tegra CPU COMPLEX CLUSTER area
 
 maintainers:
   - Sumit Gupta <sumitg@nvidia.com>
diff --git a/Documentation/devicetree/bindings/arm/ti/k3.yaml b/Documentation/devicetree/bindings/arm/ti/k3.yaml
index 09e6845ff2439..203faab80142b 100644
--- a/Documentation/devicetree/bindings/arm/ti/k3.yaml
+++ b/Documentation/devicetree/bindings/arm/ti/k3.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/ti/k3.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments K3 Multicore SoC architecture device tree bindings
+title: Texas Instruments K3 Multicore SoC architecture
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml
index c022d325fc08b..1656d1a4476f6 100644
--- a/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml
+++ b/Documentation/devicetree/bindings/arm/ti/ti,davinci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/ti/ti,davinci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments DaVinci Platforms Device Tree Bindings
+title: Texas Instruments DaVinci Platforms
 
 maintainers:
   - Sekhar Nori <nsekhar@ti.com>
diff --git a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
index df256ebcd3663..9e733b10c392f 100644
--- a/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
+++ b/Documentation/devicetree/bindings/clock/ingenic,cgu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/ingenic,cgu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs CGU devicetree bindings
+title: Ingenic SoCs CGU
 
 description: |
   The CGU in an Ingenic SoC provides all the clocks generated on-chip. It
diff --git a/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml
index 8d4eb4475fc8b..b339f1f9f072c 100644
--- a/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml
+++ b/Documentation/devicetree/bindings/clock/renesas,versaclock7.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/renesas,versaclock7.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Renesas Versaclock7 Programmable Clock Device Tree Bindings
+title: Renesas Versaclock7 Programmable Clock
 
 maintainers:
   - Alex Helms <alexander.helms.jy@renesas.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
index 1c0406c38fe54..9bf2cbcea69fd 100644
--- a/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/anx6345.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/anx6345.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analogix ANX6345 eDP Transmitter Device Tree Bindings
+title: Analogix ANX6345 eDP Transmitter
 
 maintainers:
   - Torsten Duwe <duwe@lst.de>
diff --git a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
index bb6289c7d375e..b0589fa16736a 100644
--- a/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/chrontel,ch7033.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/display/bridge/chrontel,ch7033.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Chrontel CH7033 Video Encoder Device Tree Bindings
+title: Chrontel CH7033 Video Encoder
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
index 833d11b2303a7..b697c42399ea3 100644
--- a/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it6505.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/ite,it6505.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ITE it6505 Device Tree Bindings
+title: ITE it6505
 
 maintainers:
   - Allen Chen <allen.chen@ite.com.tw>
diff --git a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
index 1b2185be92cdd..d3454da1247a0 100644
--- a/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ite,it66121.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/ite,it66121.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ITE it66121 HDMI bridge Device Tree Bindings
+title: ITE it66121 HDMI bridge
 
 maintainers:
   - Phong LE <ple@baylibre.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
index 8ab156e0a8cfd..28811aff2c5ad 100644
--- a/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ps8640.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/ps8640.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MIPI DSI to eDP Video Format Converter Device Tree Bindings
+title: MIPI DSI to eDP Video Format Converter
 
 maintainers:
   - Nicolas Boichat <drinkcat@chromium.org>
diff --git a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
index 3f93def2c5a20..319bd7c88fe3c 100644
--- a/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
+++ b/Documentation/devicetree/bindings/display/ingenic,ipu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/ingenic,ipu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs Image Processing Unit (IPU) devicetree bindings
+title: Ingenic SoCs Image Processing Unit (IPU)
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
index c0bb02fb49f4b..6d4c00f3fcc88 100644
--- a/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/ingenic,lcd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs LCD controller devicetree bindings
+title: Ingenic SoCs LCD controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml
index 66288b9f0aa6e..080cf321209ea 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,cec.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/mediatek/mediatek,cec.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Mediatek HDMI CEC Controller Device Tree Bindings
+title: Mediatek HDMI CEC Controller
 
 maintainers:
   - CK Hu <ck.hu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
index b18d6a57c6e1b..4707b60238b07 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/mediatek/mediatek,dsi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek DSI Controller Device Tree Bindings
+title: MediaTek DSI Controller
 
 maintainers:
   - Chun-Kuang Hu <chunkuang.hu@kernel.org>
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml
index b6fcdfb99ab2c..bd8f7b8ae0ff2 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi-ddc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi-ddc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Mediatek HDMI DDC Device Tree Bindings
+title: Mediatek HDMI DDC
 
 maintainers:
   - CK Hu <ck.hu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml
index bdaf0b51e68cd..8afdd67d67809 100644
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/mediatek/mediatek,hdmi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Mediatek HDMI Encoder Device Tree Bindings
+title: Mediatek HDMI Encoder
 
 maintainers:
   - CK Hu <ck.hu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml
index a4154b51043e1..90e323e19edb7 100644
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9163.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/ilitek,ili9163.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ilitek ILI9163 display panels device tree bindings
+title: Ilitek ILI9163 display panels
 
 maintainers:
   - Daniel Mack <daniel@zonque.org>
diff --git a/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml b/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml
index fcc50db6a812c..c77ee034310a3 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-lvds.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/panel-lvds.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic LVDS Display Panel Device Tree Bindings
+title: Generic LVDS Display Panel
 
 maintainers:
   - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
diff --git a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml
index 076b057b4af54..481ef051df1e2 100644
--- a/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml
+++ b/Documentation/devicetree/bindings/display/panel/visionox,rm69299.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/visionox,rm69299.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Visionox model RM69299 Panels Device Tree Bindings.
+title: Visionox model RM69299 Panels
 
 maintainers:
   - Harigovindan P <harigovi@codeaurora.org>
diff --git a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
index 3b0b3b919af8c..2be6ed1bc8133 100644
--- a/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
+++ b/Documentation/devicetree/bindings/dma/ingenic,dma.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/ingenic,dma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs DMA Controller DT bindings
+title: Ingenic SoCs DMA Controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
index 08627d91e607d..a702d2c2ff8d2 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-bcdma.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/dma/ti/k3-bcdma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments K3 DMSS BCDMA Device Tree Bindings
+title: Texas Instruments K3 DMSS BCDMA
 
 maintainers:
   - Peter Ujfalusi <peter.ujfalusi@gmail.com>
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
index 507d16d84adeb..a69f62f854d8c 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-pktdma.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/dma/ti/k3-pktdma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments K3 DMSS PKTDMA Device Tree Bindings
+title: Texas Instruments K3 DMSS PKTDMA
 
 maintainers:
   - Peter Ujfalusi <peter.ujfalusi@gmail.com>
diff --git a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
index 6a09bbf83d462..7ff428ad3aaed 100644
--- a/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
+++ b/Documentation/devicetree/bindings/dma/ti/k3-udma.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/dma/ti/k3-udma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments K3 NAVSS Unified DMA Device Tree Bindings
+title: Texas Instruments K3 NAVSS Unified DMA
 
 maintainers:
   - Peter Ujfalusi <peter.ujfalusi@gmail.com>
diff --git a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
index 2a595b18ff6c7..825294e3f0e83 100644
--- a/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
+++ b/Documentation/devicetree/bindings/dma/xilinx/xlnx,zynqmp-dpdma.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/xilinx/xlnx,zynqmp-dpdma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Xilinx ZynqMP DisplayPort DMA Controller Device Tree Bindings
+title: Xilinx ZynqMP DisplayPort DMA Controller
 
 description: |
   These bindings describe the DMA engine included in the Xilinx ZynqMP
diff --git a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
index 64cfd971c9c5e..144e86ce5c0a3 100644
--- a/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
+++ b/Documentation/devicetree/bindings/eeprom/microchip,93lc46b.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/eeprom/microchip,93lc46b.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip 93xx46 SPI compatible EEPROM family dt bindings
+title: Microchip 93xx46 SPI compatible EEPROM family
 
 maintainers:
   - Cory Tusar <cory.tusar@pid1solutions.com>
diff --git a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
index af6d64a6da6e6..b61fdc9548d87 100644
--- a/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/ingenic,i2c.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i2c/ingenic,i2c.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs I2C controller devicetree bindings
+title: Ingenic SoCs I2C controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
index 15c514b83583b..a73a355fc6652 100644
--- a/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/allwinner,sun8i-a33-ths.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/allwinner,sun8i-a33-ths.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner A33 Thermal Sensor Device Tree Bindings
+title: Allwinner A33 Thermal Sensor
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml
index 57a31356082ef..720c16a108d4e 100644
--- a/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/ti,palmas-gpadc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/ti,palmas-gpadc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Palmas general purpose ADC IP block devicetree bindings
+title: Palmas general purpose ADC IP block
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
diff --git a/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml b/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
index e4a0ac0fff9a7..490f6c3d9e4be 100644
--- a/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
+++ b/Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/pine64,pinephone-keyboard.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Pine64 PinePhone keyboard device tree bindings
+title: Pine64 PinePhone keyboard
 
 maintainers:
   - Samuel Holland <samuel@sholland.org>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
index 9df685bdc5db1..74a8a01e07450 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings
+title: ChipOne ICN8318 Touchscreen Controller
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
index f9998edbff702..3305eda5ed88d 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Pixcir Touchscreen Controller Device Tree Bindings
+title: Pixcir Touchscreen Controller
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml b/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml
index eec6f7f6f0a3a..95b554be25b40 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/silead,gsl1680.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/silead,gsl1680.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Silead GSL1680 Touchscreen Controller Device Tree Bindings
+title: Silead GSL1680 Touchscreen Controller
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml
index 0358a7739c8e2..609308a5f91d8 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/interrupt-controller/ingenic,intc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs interrupt controller devicetree bindings
+title: Ingenic SoCs interrupt controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml
index 13a893b18fb64..fb5593724059d 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/realtek,rtl-intc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/interrupt-controller/realtek,rtl-intc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Realtek RTL SoC interrupt controller devicetree bindings
+title: Realtek RTL SoC interrupt controller
 
 description:
   Interrupt controller and router for Realtek MIPS SoCs, allowing each SoC
diff --git a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
index 6bda4f2b94c2e..a61a76bb611cd 100644
--- a/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
+++ b/Documentation/devicetree/bindings/media/allwinner,sun6i-a31-isp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/allwinner,sun6i-a31-isp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner A31 Image Signal Processor Driver (ISP) Device Tree Bindings
+title: Allwinner A31 Image Signal Processor Driver (ISP)
 
 maintainers:
   - Paul Kocialkowski <paul.kocialkowski@bootlin.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml
index 21864ab86ec46..82d3d18c16a18 100644
--- a/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/dongwoon,dw9768.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/media/i2c/dongwoon,dw9768.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Dongwoon Anatech DW9768 Voice Coil Motor (VCM) Lens Device Tree Bindings
+title: Dongwoon Anatech DW9768 Voice Coil Motor (VCM) Lens
 
 maintainers:
   - Dongchun Zhu <dongchun.zhu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml
index baf92aaaf0498..e17288d579812 100644
--- a/Documentation/devicetree/bindings/media/i2c/ov8856.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ov8856.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ov8856.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Omnivision OV8856 CMOS Sensor Device Tree Bindings
+title: Omnivision OV8856 CMOS Sensor
 
 maintainers:
   - Dongchun Zhu <dongchun.zhu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml
index 63a040944f3dc..54df9d73dc86f 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov02a10.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov02a10.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Omnivision OV02A10 CMOS Sensor Device Tree Bindings
+title: Omnivision OV02A10 CMOS Sensor
 
 maintainers:
   - Dongchun Zhu <dongchun.zhu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml
index 540fd69ac39f1..a621032f9bd07 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5640.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov5640.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OmniVision OV5640 Image Sensor Device Tree Bindings
+title: OmniVision OV5640 Image Sensor
 
 maintainers:
   - Steve Longerbeam <slongerbeam@gmail.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml
index 52c6281a66847..bc9b27afe3ea3 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5645.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov5645.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OmniVision OV5645 Image Sensor Device Tree Bindings
+title: OmniVision OV5645 Image Sensor
 
 maintainers:
   - Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
index 246dc5fec716c..61e4e9cf87832 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov5648.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OmniVision OV5648 Image Sensor Device Tree Bindings
+title: OmniVision OV5648 Image Sensor
 
 maintainers:
   - Paul Kocialkowski <paul.kocialkowski@bootlin.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
index b962863e4f657..6bac326dceafd 100644
--- a/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/i2c/ovti,ov8865.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OmniVision OV8865 Image Sensor Device Tree Bindings
+title: OmniVision OV8865 Image Sensor
 
 maintainers:
   - Paul Kocialkowski <paul.kocialkowski@bootlin.com>
diff --git a/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml b/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml
index 6597e1d0e65fd..8c28848b226a0 100644
--- a/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml
+++ b/Documentation/devicetree/bindings/media/i2c/st,st-vgxy61.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/media/i2c/st,st-vgxy61.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics VGxy61 HDR Global Shutter Sensor Family Device Tree Bindings
+title: STMicroelectronics VGxy61 HDR Global Shutter Sensor Family
 
 maintainers:
   - Benjamin Mugnier <benjamin.mugnier@foss.st.com>
diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml
index b8ed52a44d571..89ebe3979012c 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc-peripherals.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/ingenic,nemc-peripherals.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs NAND / External Memory Controller (NEMC) devicetree bindings
+title: Ingenic SoCs NAND / External Memory Controller (NEMC)
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
index dd13a5106d6c5..a02724221ff3c 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/ingenic,nemc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/ingenic,nemc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs NAND / External Memory Controller (NEMC) devicetree bindings
+title: Ingenic SoCs NAND / External Memory Controller (NEMC)
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml
index e188a4bf755c0..4f30173ad7479 100644
--- a/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/ti,gpmc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/ti,gpmc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments GPMC Memory Controller device-tree bindings
+title: Texas Instruments GPMC Memory Controller
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
diff --git a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
index ee00d414df100..f2e822afe7fbc 100644
--- a/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
+++ b/Documentation/devicetree/bindings/mips/ingenic/devices.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mips/ingenic/devices.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic XBurst based Platforms Device Tree Bindings
+title: Ingenic XBurst based Platforms
 
 maintainers:
   - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
diff --git a/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml b/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml
index 40130fefa2b41..15d41bdbdc263 100644
--- a/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml
+++ b/Documentation/devicetree/bindings/mips/lantiq/lantiq,dma-xway.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mips/lantiq/lantiq,dma-xway.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Lantiq Xway SoCs DMA Controller DT bindings
+title: Lantiq Xway SoCs DMA Controller
 
 maintainers:
   - John Crispin <john@phrozen.org>
diff --git a/Documentation/devicetree/bindings/mips/loongson/devices.yaml b/Documentation/devicetree/bindings/mips/loongson/devices.yaml
index 9fee6708e6f59..f13ce386f42cc 100644
--- a/Documentation/devicetree/bindings/mips/loongson/devices.yaml
+++ b/Documentation/devicetree/bindings/mips/loongson/devices.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mips/loongson/devices.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Loongson based Platforms Device Tree Bindings
+title: Loongson based Platforms
 
 maintainers:
   - Jiaxun Yang <jiaxun.yang@flygoat.com>
diff --git a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
index 2d10aedf2e008..bb4e0be0c8935 100644
--- a/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/ingenic,mmc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/ingenic,mmc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs MMC Controller DT bindings
+title: Ingenic SoCs MMC Controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
index a811a512ecc50..a7bdb5d3675cf 100644
--- a/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/ingenic,nand.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/ingenic,nand.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs NAND controller devicetree bindings
+title: Ingenic SoCs NAND controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
index 3c51b2d02957d..9c494957a07aa 100644
--- a/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
+++ b/Documentation/devicetree/bindings/net/can/allwinner,sun4i-a10-can.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/allwinner,sun4i-a10-can.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner A10 CAN Controller Device Tree Bindings
+title: Allwinner A10 CAN Controller
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
index 51aa89ac7e850..4d7d67ee175a3 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,c_can.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/bosch,c_can.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bosch C_CAN/D_CAN controller Device Tree Bindings
+title: Bosch C_CAN/D_CAN controller
 
 description: Bosch C_CAN/D_CAN controller for CAN bus
 
diff --git a/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml b/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
index 4635cb96fc64c..a009a44029385 100644
--- a/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
+++ b/Documentation/devicetree/bindings/net/can/ctu,ctucanfd.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/ctu,ctucanfd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: CTU CAN FD Open-source IP Core Device Tree Bindings
+title: CTU CAN FD Open-source IP Core
 
 description: |
   Open-source CAN FD IP core developed at the Czech Technical University in Prague
diff --git a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
index 259a0c6547f30..2a6d126606ca9 100644
--- a/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/arrow,xrs700x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/arrow,xrs700x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Arrow SpeedChips XRS7000 Series Switch Device Tree Bindings
+title: Arrow SpeedChips XRS7000 Series Switch
 
 allOf:
   - $ref: dsa.yaml#
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
index 9abb8eba5fadf..b173fceb89983 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa-port.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/dsa-port.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ethernet Switch port Device Tree Bindings
+title: Ethernet Switch port
 
 maintainers:
   - Andrew Lunn <andrew@lunn.ch>
diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
index b9d48e357e772..5469ae8a43893 100644
--- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/dsa.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ethernet Switch Device Tree Bindings
+title: Ethernet Switch
 
 maintainers:
   - Andrew Lunn <andrew@lunn.ch>
diff --git a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
index 1d7dab31457d5..447589b01e8e9 100644
--- a/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/hirschmann,hellcreek.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/hirschmann,hellcreek.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Hirschmann Hellcreek TSN Switch Device Tree Bindings
+title: Hirschmann Hellcreek TSN Switch
 
 allOf:
   - $ref: dsa.yaml#
diff --git a/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml b/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml
index 8d93ed9c172cb..347a0e1b3d3ff 100644
--- a/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/mscc,ocelot.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/mscc,ocelot.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip Ocelot Switch Family Device Tree Bindings
+title: Microchip Ocelot Switch Family
 
 maintainers:
   - Vladimir Oltean <vladimir.oltean@nxp.com>
diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
index 1e26d876d1463..df98a16e4e753 100644
--- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/nxp,sja1105.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP SJA1105 Automotive Ethernet Switch Family Device Tree Bindings
+title: NXP SJA1105 Automotive Ethernet Switch Family
 
 description:
   The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944.pdf) of at
diff --git a/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml b/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml
index 284ef45add998..5557676e9d4bc 100644
--- a/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/esp,esp8089.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/wireless/esp,esp8089.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Espressif ESP8089 Device Tree Bindings
+title: Espressif ESP8089
 
 maintainers:
   - Hans de Goede <hdegoede@redhat.com>
diff --git a/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml b/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml
index b3405f284580a..2460ccc082371 100644
--- a/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/microchip,wilc1000.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/wireless/microchip,wilc1000.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip WILC wireless devicetree bindings
+title: Microchip WILC wireless
 
 maintainers:
   - Adham Abozaeid <adham.abozaeid@microchip.com>
diff --git a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml
index b35d2f3ad1adc..583db5d42226d 100644
--- a/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/silabs,wfx.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/net/wireless/silabs,wfx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Silicon Labs WFxxx devicetree bindings
+title: Silicon Labs WFxxx
 
 maintainers:
   - Jérôme Pouiller <jerome.pouiller@silabs.com>
diff --git a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml
index ee79e13b5fe0d..e08504ef3b6e1 100644
--- a/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml
+++ b/Documentation/devicetree/bindings/nvmem/qcom,spmi-sdam.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/qcom,spmi-sdam.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. SPMI SDAM DT bindings
+title: Qualcomm Technologies, Inc. SPMI SDAM
 
 maintainers:
   - Shyam Kumar Thella <sthella@codeaurora.org>
diff --git a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
index 5cab216486321..30b42008db063 100644
--- a/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
+++ b/Documentation/devicetree/bindings/phy/ingenic,phy-usb.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/ingenic,phy-usb.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs USB PHY devicetree bindings
+title: Ingenic SoCs USB PHY
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
index c2c370448b817..a4397930e0e80 100644
--- a/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/ingenic,pinctrl.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pinctrl/ingenic,pinctrl.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs pin controller devicetree bindings
+title: Ingenic SoCs pin controller
 
 description: >
   Please refer to pinctrl-bindings.txt in this directory for details of the
diff --git a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
index 46de35861738e..11f1f98c1cdc1 100644
--- a/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
+++ b/Documentation/devicetree/bindings/power/reset/xlnx,zynqmp-power.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/reset/xlnx,zynqmp-power.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Xilinx Zynq MPSoC Power Management Device Tree Bindings
+title: Xilinx Zynq MPSoC Power Management
 
 maintainers:
   - Michal Simek <michal.simek@xilinx.com>
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml b/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml
index c838efcf7e160..5faa2418fe2ff 100644
--- a/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml
+++ b/Documentation/devicetree/bindings/power/supply/maxim,ds2760.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/maxim,ds2760.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Maxim DS2760 DT bindings
+title: Maxim DS2760
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml b/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml
index 070ef6f96e60d..711066b8cdb9e 100644
--- a/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml
+++ b/Documentation/devicetree/bindings/power/supply/maxim,max14656.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/maxim,max14656.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Maxim MAX14656 DT bindings
+title: Maxim MAX14656
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
index d12855e7ffd76..1df7c45d95c18 100644
--- a/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
+++ b/Documentation/devicetree/bindings/rtc/epson,rx8900.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/epson,rx8900.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: EPSON RX8900 / Microcrystal RV8803 Real-Time Clock DT bindings
+title: EPSON RX8900 / Microcrystal RV8803 Real-Time Clock
 
 maintainers:
   - Marek Vasut <marex@denx.de>
diff --git a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
index b235b2441997f..af78b67b3da4d 100644
--- a/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/ingenic,rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/ingenic,rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs Real-Time Clock DT bindings
+title: Ingenic SoCs Real-Time Clock
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
index 2d4741f516639..f6e0c613af678 100644
--- a/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/renesas,rzn1-rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/renesas,rzn1-rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Renesas RZ/N1 SoCs Real-Time Clock DT bindings
+title: Renesas RZ/N1 SoCs Real-Time Clock
 
 maintainers:
   - Miquel Raynal <miquel.raynal@bootlin.com>
diff --git a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml
index 6d176588df47d..89c462653e2d3 100644
--- a/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml
+++ b/Documentation/devicetree/bindings/serial/brcm,bcm7271-uart.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serial/brcm,bcm7271-uart.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom 8250 based serial port devicetree bindings
+title: Broadcom 8250 based serial port
 
 maintainers:
   - Al Cooper <alcooperx@gmail.com>
diff --git a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
index 315ceb722e199..d5f153bdeb0d2 100644
--- a/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
+++ b/Documentation/devicetree/bindings/serial/ingenic,uart.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serial/ingenic,uart.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs UART controller devicetree bindings
+title: Ingenic SoCs UART controller
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/serial/serial.yaml b/Documentation/devicetree/bindings/serial/serial.yaml
index c75ba3fb64650..11e822bf09e2c 100644
--- a/Documentation/devicetree/bindings/serial/serial.yaml
+++ b/Documentation/devicetree/bindings/serial/serial.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/serial/serial.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Serial Interface Generic DT Bindings
+title: Serial Interface Generic
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml
index d911fa2d40ef9..f21eb907ee902 100644
--- a/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml
+++ b/Documentation/devicetree/bindings/soc/mediatek/mtk-svs.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soc/mediatek/mtk-svs.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek Smart Voltage Scaling (SVS) Device Tree Bindings
+title: MediaTek Smart Voltage Scaling (SVS)
 
 maintainers:
   - Roger Lu <roger.lu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
index 422cbf38bfdbc..670b67ec0b617 100644
--- a/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
+++ b/Documentation/devicetree/bindings/sound/cirrus,cs42l51.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/cirrus,cs42l51.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: CS42L51 audio codec DT bindings
+title: CS42L51 audio codec
 
 maintainers:
   - Olivier Moysan <olivier.moysan@foss.st.com>
diff --git a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
index ba44406c9cafc..c59a7cd9eaa9b 100644
--- a/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
+++ b/Documentation/devicetree/bindings/sound/ingenic,aic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/ingenic,aic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs AC97 / I2S Controller (AIC) DT bindings
+title: Ingenic SoCs AC97 / I2S Controller (AIC)
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
index a07d607e9b937..b58b90850e35a 100644
--- a/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
+++ b/Documentation/devicetree/bindings/sound/ingenic,codec.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/ingenic,codec.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic JZ47xx internal codec DT bindings
+title: Ingenic JZ47xx internal codec
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml
index 23564fd394a2c..79c6f8da1319c 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-rx-macro.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,lpass-rx-macro.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: LPASS(Low Power Audio Subsystem) RX Macro audio codec DT bindings
+title: LPASS(Low Power Audio Subsystem) RX Macro audio codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
index 38708578ee290..66431aade3b70 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-tx-macro.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,lpass-tx-macro.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: LPASS(Low Power Audio Subsystem) TX Macro audio codec DT bindings
+title: LPASS(Low Power Audio Subsystem) TX Macro audio codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml
index 188883a2e6714..26f0343b5aac0 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-va-macro.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,lpass-va-macro.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: LPASS(Low Power Audio Subsystem) VA Macro audio codec DT bindings
+title: LPASS(Low Power Audio Subsystem) VA Macro audio codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
index bebca3e3f86fe..2bf8d082f8f10 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-wsa-macro.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,lpass-wsa-macro.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: LPASS(Low Power Audio Subsystem) VA Macro audio codec DT bindings
+title: LPASS(Low Power Audio Subsystem) VA Macro audio codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml
index ea7d4900ee4a5..7dac9e6f7f08e 100644
--- a/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml
+++ b/Documentation/devicetree/bindings/sound/realtek,rt1015p.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/realtek,rt1015p.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Realtek rt1015p codec devicetree bindings
+title: Realtek rt1015p codec
 
 maintainers:
   - Tzung-Bi Shih <tzungbi@kernel.org>
diff --git a/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml b/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml
index e631ace7aad1f..ecfa7a5768663 100644
--- a/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml
+++ b/Documentation/devicetree/bindings/sound/realtek,rt5682s.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/realtek,rt5682s.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Realtek rt5682s codec devicetree bindings
+title: Realtek rt5682s codec
 
 maintainers:
   - Derek Fang <derek.fang@realtek.com>
diff --git a/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml b/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml
index 988ce8d8028fb..27230c682d10f 100644
--- a/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml
+++ b/Documentation/devicetree/bindings/sound/ti,src4xxx.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/ti,src4xxx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments SRC4392 Device Tree Bindings
+title: Texas Instruments SRC4392
 
 description: |
   The SRC4392 is a digital audio codec that can be connected via
diff --git a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
index 360f76c226d92..c08d55b900bbc 100644
--- a/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
+++ b/Documentation/devicetree/bindings/spi/ingenic,spi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/ingenic,spi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs SPI controller devicetree bindings
+title: Ingenic SoCs SPI controller
 
 maintainers:
   - Artur Rojek <contact@artur-rojek.eu>
diff --git a/Documentation/devicetree/bindings/spi/spi-gpio.yaml b/Documentation/devicetree/bindings/spi/spi-gpio.yaml
index 0d0b6d9dad1ca..f29b89076c990 100644
--- a/Documentation/devicetree/bindings/spi/spi-gpio.yaml
+++ b/Documentation/devicetree/bindings/spi/spi-gpio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/spi-gpio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: SPI-GPIO devicetree bindings
+title: SPI-GPIO
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
index a84fef0fe628f..2d14610888a71 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
+++ b/Documentation/devicetree/bindings/timer/ingenic,tcu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/timer/ingenic,tcu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic SoCs Timer/Counter Unit (TCU) devicetree bindings
+title: Ingenic SoCs Timer/Counter Unit (TCU)
 
 description: |
   For a description of the TCU hardware and drivers, have a look at
diff --git a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
index 59212358fcce4..4cc1496a913c2 100644
--- a/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
+++ b/Documentation/devicetree/bindings/usb/ingenic,musb.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/ingenic,musb.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic JZ47xx USB IP DT bindings
+title: Ingenic JZ47xx USB IP
 
 maintainers:
   - Paul Cercueil <paul@crapouillou.net>
diff --git a/Documentation/devicetree/bindings/usb/maxim,max33359.yaml b/Documentation/devicetree/bindings/usb/maxim,max33359.yaml
index 93a19eda610b9..8e513a6af3789 100644
--- a/Documentation/devicetree/bindings/usb/maxim,max33359.yaml
+++ b/Documentation/devicetree/bindings/usb/maxim,max33359.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/maxim,max33359.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Maxim TCPCI Type-C PD controller DT bindings
+title: Maxim TCPCI Type-C PD controller
 
 maintainers:
   - Badhri Jagan Sridharan <badhri@google.com>
diff --git a/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml b/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml
index 8db1f8b597c34..c72257c19220f 100644
--- a/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml
+++ b/Documentation/devicetree/bindings/usb/mediatek,mt6360-tcpc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/mediatek,mt6360-tcpc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Mediatek MT6360 Type-C Port Switch and Power Delivery controller DT bindings
+title: Mediatek MT6360 Type-C Port Switch and Power Delivery controller
 
 maintainers:
   - ChiYuan Huang <cy_huang@richtek.com>
diff --git a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml
index a4c53b1f1af35..fef4acdc4773b 100644
--- a/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,tps6598x.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/ti,tps6598x.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Texas Instruments 6598x Type-C Port Switch and Power Delivery controller DT bindings
+title: Texas Instruments 6598x Type-C Port Switch and Power Delivery controller
 
 maintainers:
   - Bryan O'Donoghue <bryan.odonoghue@linaro.org>
diff --git a/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml b/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml
index 5aa4ffd671191..937670de01cc2 100644
--- a/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml
+++ b/Documentation/devicetree/bindings/usb/willsemi,wusb3801.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/willsemi,wusb3801.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: WUSB3801 Type-C port controller DT bindings
+title: WUSB3801 Type-C port controller
 
 description:
   The Will Semiconductor WUSB3801 is a USB Type-C port controller which
-- 
GitLab


From 9fa3ad1a1ab31da4887b48e68ad529af78f119b6 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:11 +0100
Subject: [PATCH 4068/4122] dt-bindings: drop redundant part of title (end,
 part two)

The Devicetree bindings document does not have to say in the title that
it is a "binding", but instead just describe the hardware.

Drop trailing "Node|Tree|Generic bindings" in various forms (also with
trailing full stop):

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [nN]ode [bB]indings\?\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [tT]ree [bB]indings\?\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [gG]eneric [bB]indings\?\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [bB]indings\? description\.\?$/title: \1/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [bB]indings\? document\.\?$/title: \1/' {} \;

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Vinod Koul <vkoul@kernel.org> # phy
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20221216163815.522628-6-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/ti,sci-clk.yaml         | 2 +-
 Documentation/devicetree/bindings/cpu/idle-states.yaml          | 2 +-
 .../devicetree/bindings/net/dsa/microchip,lan937x.yaml          | 2 +-
 .../devicetree/bindings/net/wireless/mediatek,mt76.yaml         | 2 +-
 Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml   | 2 +-
 Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml | 2 +-
 Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml    | 2 +-
 Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml      | 2 +-
 Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml      | 2 +-
 Documentation/devicetree/bindings/power/domain-idle-state.yaml  | 2 +-
 .../devicetree/bindings/reserved-memory/shared-dma-pool.yaml    | 2 +-
 Documentation/devicetree/bindings/reset/ti,sci-reset.yaml       | 2 +-
 Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml   | 2 +-
 Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml     | 2 +-
 Documentation/devicetree/bindings/sound/audio-graph-port.yaml   | 2 +-
 15 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
index 0e370289a0530..63d9763416965 100644
--- a/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/ti,sci-clk.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/ti,sci-clk.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI-SCI clock controller node bindings
+title: TI-SCI clock controller
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/cpu/idle-states.yaml b/Documentation/devicetree/bindings/cpu/idle-states.yaml
index fa4d4142ac937..b8cc826c95013 100644
--- a/Documentation/devicetree/bindings/cpu/idle-states.yaml
+++ b/Documentation/devicetree/bindings/cpu/idle-states.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/cpu/idle-states.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Idle states binding description
+title: Idle states
 
 maintainers:
   - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml
index 630bf0f8294b2..b34de303966ba 100644
--- a/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml
+++ b/Documentation/devicetree/bindings/net/dsa/microchip,lan937x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/dsa/microchip,lan937x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: LAN937x Ethernet Switch Series Tree Bindings
+title: LAN937x Ethernet Switch Series
 
 maintainers:
   - UNGLinuxDriver@microchip.com
diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
index 70e328589cfb8..f0c78f9944913 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/net/wireless/mediatek,mt76.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek mt76 wireless devices Generic Binding
+title: MediaTek mt76 wireless devices
 
 maintainers:
   - Felix Fietkau <nbd@nbd.name>
diff --git a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
index 7029cb1f38ffb..0e5412cff2bc6 100644
--- a/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qca,ath9k.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/wireless/qca,ath9k.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Atheros ath9k wireless devices Generic Binding
+title: Qualcomm Atheros ath9k wireless devices
 
 maintainers:
   - Toke Høiland-Jørgensen <toke@toke.dk>
diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
index f7cf135aa37ff..556eb523606a3 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath11k.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/net/wireless/qcom,ath11k.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies ath11k wireless devices Generic Binding
+title: Qualcomm Technologies ath11k wireless devices
 
 maintainers:
   - Kalle Valo <kvalo@kernel.org>
diff --git a/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml
index 70eb48b391c9b..527010702f5e7 100644
--- a/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/brcm,ns2-pcie-phy.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/brcm,ns2-pcie-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom NS2 PCIe PHY binding document
+title: Broadcom NS2 PCIe PHY
 
 maintainers:
   - Ray Jui <ray.jui@broadcom.com>
diff --git a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml
index 0655e485b2604..aa97478dd0161 100644
--- a/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/qcom,usb-hs-phy.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/qcom,usb-hs-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm's USB HS PHY binding description
+title: Qualcomm's USB HS PHY
 
 maintainers:
   - Bjorn Andersson <bjorn.andersson@linaro.org>
diff --git a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
index da7cac537e15c..7bf5c42b2848c 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-gmii-sel.yaml
@@ -5,7 +5,7 @@
 $id: "http://devicetree.org/schemas/phy/ti,phy-gmii-sel.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: CPSW Port's Interface Mode Selection PHY Tree Bindings
+title: CPSW Port's Interface Mode Selection PHY
 
 maintainers:
   - Kishon Vijay Abraham I <kishon@ti.com>
diff --git a/Documentation/devicetree/bindings/power/domain-idle-state.yaml b/Documentation/devicetree/bindings/power/domain-idle-state.yaml
index 4ee920a1de69e..ec1f6f669e50b 100644
--- a/Documentation/devicetree/bindings/power/domain-idle-state.yaml
+++ b/Documentation/devicetree/bindings/power/domain-idle-state.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/domain-idle-state.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: PM Domain Idle States binding description
+title: PM Domain Idle States
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml b/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml
index 618105f079bee..47696073b6658 100644
--- a/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml
+++ b/Documentation/devicetree/bindings/reserved-memory/shared-dma-pool.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/reserved-memory/shared-dma-pool.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: /reserved-memory DMA pool node bindings
+title: /reserved-memory DMA pool
 
 maintainers:
   - devicetree-spec@vger.kernel.org
diff --git a/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml b/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml
index 4639d2cec5575..dcf9206e12bec 100644
--- a/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/ti,sci-reset.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/reset/ti,sci-reset.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI-SCI reset controller node bindings
+title: TI-SCI reset controller
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml b/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml
index afc835eda0efe..f436f2cf1df71 100644
--- a/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml
+++ b/Documentation/devicetree/bindings/reset/ti,tps380x-reset.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/reset/ti,tps380x-reset.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI TPS380x reset controller node bindings
+title: TI TPS380x reset controller
 
 maintainers:
   - Marco Felsch <kernel@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml
index 9e6cb4ee9755d..5df7688a1e1ce 100644
--- a/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml
+++ b/Documentation/devicetree/bindings/soc/ti/sci-pm-domain.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soc/ti/sci-pm-domain.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI-SCI generic power domain node bindings
+title: TI-SCI generic power domain
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml
index 64654ceef2089..f5b8b6d13077c 100644
--- a/Documentation/devicetree/bindings/sound/audio-graph-port.yaml
+++ b/Documentation/devicetree/bindings/sound/audio-graph-port.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/audio-graph-port.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Audio Graph Card 'port' Node Bindings
+title: Audio Graph Card 'port'
 
 maintainers:
   - Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
-- 
GitLab


From 84e85359f4999a439aa12e04bf0ae9e13e00fc66 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:12 +0100
Subject: [PATCH 4069/4122] dt-bindings: drop redundant part of title (end,
 part three)

The Devicetree bindings document does not have to say in the title that
it is a "binding", but instead just describe the hardware.

Drop trailing "bindings" in various forms (also with trailing full
stop):

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -not -name 'trivial-devices.yaml' \
    -exec sed -i -e 's/^title: \(.*\) [bB]indings\?\.\?$/title: \1/' {} \;

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Acked-by: Matti Vaittinen <mazziesaccount@gmail.com> # ROHM
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> # input
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Hans Verkuil <hverkuil-cisco@xs4all.nl> # media
Acked-by: Sebastian Reichel <sre@kernel.org> # power
Acked-by: Viresh Kumar <viresh.kumar@linaro.org> # cpufreq
Link: https://lore.kernel.org/r/20221216163815.522628-7-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/arm/cci-control-port.yaml     | 2 +-
 Documentation/devicetree/bindings/arm/cpus.yaml                 | 2 +-
 .../devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/sp810.yaml                | 2 +-
 Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml       | 2 +-
 .../devicetree/bindings/arm/stm32/st,stm32-syscon.yaml          | 2 +-
 .../devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml      | 2 +-
 .../devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/vexpress-config.yaml      | 2 +-
 Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml      | 2 +-
 .../devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml       | 2 +-
 .../devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml       | 2 +-
 Documentation/devicetree/bindings/bus/ti-sysc.yaml              | 2 +-
 Documentation/devicetree/bindings/clock/fsl,plldig.yaml         | 2 +-
 Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml      | 2 +-
 Documentation/devicetree/bindings/clock/imx8m-clock.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml  | 2 +-
 Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml  | 2 +-
 Documentation/devicetree/bindings/clock/imx93-clock.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/intel,agilex.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml      | 2 +-
 Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml    | 2 +-
 Documentation/devicetree/bindings/clock/intel,stratix10.yaml    | 2 +-
 .../devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/milbeaut-clock.yaml     | 2 +-
 .../devicetree/bindings/clock/nuvoton,npcm845-clk.yaml          | 2 +-
 Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml        | 2 +-
 .../devicetree/bindings/clock/rockchip,rk3568-cru.yaml          | 2 +-
 .../devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml        | 2 +-
 .../devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml         | 2 +-
 Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml      | 2 +-
 Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml     | 2 +-
 Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml     | 2 +-
 Documentation/devicetree/bindings/display/arm,hdlcd.yaml        | 2 +-
 Documentation/devicetree/bindings/display/arm,malidp.yaml       | 2 +-
 .../devicetree/bindings/display/bridge/toshiba,tc358767.yaml    | 2 +-
 .../devicetree/bindings/display/bridge/toshiba,tc358775.yaml    | 2 +-
 .../devicetree/bindings/display/panel/display-timings.yaml      | 2 +-
 .../devicetree/bindings/display/panel/panel-timing.yaml         | 2 +-
 Documentation/devicetree/bindings/dma/st,stm32-dma.yaml         | 2 +-
 Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml      | 2 +-
 Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml        | 2 +-
 Documentation/devicetree/bindings/edac/dmc-520.yaml             | 2 +-
 Documentation/devicetree/bindings/firmware/arm,scmi.yaml        | 2 +-
 Documentation/devicetree/bindings/firmware/arm,scpi.yaml        | 2 +-
 .../devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml          | 2 +-
 Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml       | 2 +-
 Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml        | 2 +-
 Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml         | 2 +-
 Documentation/devicetree/bindings/gpu/vivante,gc.yaml           | 2 +-
 .../devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml         | 2 +-
 Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml    | 2 +-
 Documentation/devicetree/bindings/i2c/i2c-pxa.yaml              | 2 +-
 Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml       | 2 +-
 Documentation/devicetree/bindings/i3c/i3c.yaml                  | 2 +-
 Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml      | 2 +-
 .../devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml         | 2 +-
 Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml  | 2 +-
 Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml  | 2 +-
 Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml  | 2 +-
 Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml     | 2 +-
 .../devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml        | 2 +-
 Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml  | 2 +-
 Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml     | 2 +-
 .../devicetree/bindings/iio/multiplexer/io-channel-mux.yaml     | 2 +-
 .../bindings/input/touchscreen/cypress,cy8ctma140.yaml          | 2 +-
 .../bindings/input/touchscreen/cypress,cy8ctma340.yaml          | 2 +-
 .../devicetree/bindings/input/touchscreen/edt-ft5x06.yaml       | 2 +-
 Documentation/devicetree/bindings/input/touchscreen/goodix.yaml | 2 +-
 .../devicetree/bindings/input/touchscreen/himax,hx83112b.yaml   | 2 +-
 .../devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml     | 2 +-
 .../devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml  | 2 +-
 .../devicetree/bindings/input/touchscreen/melfas,mms114.yaml    | 2 +-
 .../devicetree/bindings/input/touchscreen/mstar,msg2638.yaml    | 2 +-
 .../devicetree/bindings/input/touchscreen/ti,tsc2005.yaml       | 2 +-
 .../devicetree/bindings/input/touchscreen/touchscreen.yaml      | 2 +-
 .../devicetree/bindings/input/touchscreen/zinitix,bt400.yaml    | 2 +-
 .../devicetree/bindings/interrupt-controller/mrvl,intc.yaml     | 2 +-
 .../bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml      | 2 +-
 Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml           | 2 +-
 Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml           | 2 +-
 .../devicetree/bindings/leds/backlight/gpio-backlight.yaml      | 2 +-
 .../devicetree/bindings/leds/backlight/led-backlight.yaml       | 2 +-
 .../devicetree/bindings/leds/backlight/pwm-backlight.yaml       | 2 +-
 .../devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml      | 2 +-
 Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml     | 2 +-
 Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml    | 2 +-
 Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml  | 2 +-
 Documentation/devicetree/bindings/media/renesas,ceu.yaml        | 2 +-
 Documentation/devicetree/bindings/media/st,stm32-cec.yaml       | 2 +-
 Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml      | 2 +-
 Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml     | 2 +-
 .../bindings/memory-controllers/calxeda-ddr-ctrlr.yaml          | 2 +-
 .../bindings/memory-controllers/st,stm32-fmc2-ebi.yaml          | 2 +-
 Documentation/devicetree/bindings/mfd/actions,atc260x.yaml      | 2 +-
 Documentation/devicetree/bindings/mfd/ene-kb3930.yaml           | 2 +-
 Documentation/devicetree/bindings/mfd/ene-kb930.yaml            | 2 +-
 Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml      | 2 +-
 Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml          | 2 +-
 Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml    | 2 +-
 Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml    | 2 +-
 Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml    | 2 +-
 Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml    | 2 +-
 Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml     | 2 +-
 Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml     | 2 +-
 Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml      | 2 +-
 Documentation/devicetree/bindings/mfd/st,stmfx.yaml             | 2 +-
 Documentation/devicetree/bindings/mfd/st,stpmic1.yaml           | 2 +-
 Documentation/devicetree/bindings/mips/cpus.yaml                | 2 +-
 Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml      | 2 +-
 Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml   | 2 +-
 .../devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml      | 2 +-
 Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml      | 2 +-
 Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml    | 2 +-
 Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml    | 2 +-
 Documentation/devicetree/bindings/mmc/mtk-sd.yaml               | 2 +-
 Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml            | 2 +-
 Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml   | 2 +-
 Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml     | 2 +-
 Documentation/devicetree/bindings/mtd/gpmi-nand.yaml            | 2 +-
 Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml | 2 +-
 Documentation/devicetree/bindings/mtd/mxc-nand.yaml             | 2 +-
 .../devicetree/bindings/mtd/partitions/qcom,smem-part.yaml      | 2 +-
 Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml   | 2 +-
 Documentation/devicetree/bindings/mux/gpio-mux.yaml             | 2 +-
 Documentation/devicetree/bindings/mux/mux-consumer.yaml         | 2 +-
 Documentation/devicetree/bindings/mux/mux-controller.yaml       | 2 +-
 Documentation/devicetree/bindings/mux/reg-mux.yaml              | 2 +-
 Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml        | 2 +-
 Documentation/devicetree/bindings/net/can/bosch,m_can.yaml      | 2 +-
 Documentation/devicetree/bindings/net/can/can-transceiver.yaml  | 2 +-
 Documentation/devicetree/bindings/net/engleder,tsnep.yaml       | 2 +-
 Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml   | 2 +-
 Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml  | 2 +-
 Documentation/devicetree/bindings/net/wireless/ieee80211.yaml   | 2 +-
 .../devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml         | 2 +-
 Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml        | 2 +-
 .../devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml     | 2 +-
 Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml     | 2 +-
 Documentation/devicetree/bindings/opp/opp-v1.yaml               | 2 +-
 Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml      | 2 +-
 Documentation/devicetree/bindings/opp/opp-v2.yaml               | 2 +-
 Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml         | 2 +-
 Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml   | 2 +-
 Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml         | 2 +-
 .../devicetree/bindings/phy/intel,keembay-phy-usb.yaml          | 2 +-
 .../devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml      | 2 +-
 Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml | 2 +-
 Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml     | 2 +-
 Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml    | 2 +-
 Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml     | 2 +-
 Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml   | 2 +-
 Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml  | 2 +-
 Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml    | 2 +-
 Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml     | 2 +-
 Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml  | 2 +-
 Documentation/devicetree/bindings/phy/transmit-amplitude.yaml   | 2 +-
 Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml     | 2 +-
 Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml       | 2 +-
 .../devicetree/bindings/power/supply/dlg,da9150-charger.yaml    | 2 +-
 .../devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml | 2 +-
 .../devicetree/bindings/power/supply/ingenic,battery.yaml       | 2 +-
 .../devicetree/bindings/power/supply/lltc,lt3651-charger.yaml   | 2 +-
 .../devicetree/bindings/power/supply/sc2731-charger.yaml        | 2 +-
 Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml   | 2 +-
 Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml    | 2 +-
 .../devicetree/bindings/regulator/st,stm32-booster.yaml         | 2 +-
 .../devicetree/bindings/regulator/st,stm32-vrefbuf.yaml         | 2 +-
 .../devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml | 2 +-
 Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml | 2 +-
 Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml   | 2 +-
 Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml       | 2 +-
 Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml     | 2 +-
 Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml | 2 +-
 .../devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml      | 2 +-
 .../devicetree/bindings/remoteproc/st,stm32-rproc.yaml          | 2 +-
 Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml     | 2 +-
 .../devicetree/bindings/rng/silex-insight,ba431-rng.yaml        | 2 +-
 Documentation/devicetree/bindings/rng/st,stm32-rng.yaml         | 2 +-
 .../devicetree/bindings/rng/xiphera,xip8001b-trng.yaml          | 2 +-
 Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml           | 2 +-
 Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml         | 2 +-
 Documentation/devicetree/bindings/serial/8250.yaml              | 2 +-
 Documentation/devicetree/bindings/serial/rs485.yaml             | 2 +-
 Documentation/devicetree/bindings/serial/st,stm32-uart.yaml     | 2 +-
 Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml   | 2 +-
 Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml        | 2 +-
 Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml       | 2 +-
 Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml        | 2 +-
 Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml      | 2 +-
 Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml   | 2 +-
 Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml   | 2 +-
 Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml     | 2 +-
 Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml     | 2 +-
 .../devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml      | 2 +-
 .../devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml       | 2 +-
 Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml   | 2 +-
 Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml     | 2 +-
 Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml        | 2 +-
 Documentation/devicetree/bindings/spi/st,stm32-spi.yaml         | 2 +-
 Documentation/devicetree/bindings/thermal/imx-thermal.yaml      | 2 +-
 Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml   | 2 +-
 Documentation/devicetree/bindings/thermal/sprd-thermal.yaml     | 2 +-
 Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml | 2 +-
 .../devicetree/bindings/thermal/thermal-cooling-devices.yaml    | 2 +-
 Documentation/devicetree/bindings/thermal/thermal-idle.yaml     | 2 +-
 Documentation/devicetree/bindings/thermal/thermal-sensor.yaml   | 2 +-
 Documentation/devicetree/bindings/thermal/thermal-zones.yaml    | 2 +-
 Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml | 2 +-
 Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml | 2 +-
 Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml     | 2 +-
 Documentation/devicetree/bindings/timer/st,stm32-timer.yaml     | 2 +-
 Documentation/devicetree/bindings/usb/analogix,anx7411.yaml     | 2 +-
 Documentation/devicetree/bindings/usb/cdns,usb3.yaml            | 2 +-
 Documentation/devicetree/bindings/usb/dwc2.yaml                 | 2 +-
 Documentation/devicetree/bindings/usb/faraday,fotg210.yaml      | 2 +-
 Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml  | 2 +-
 Documentation/devicetree/bindings/usb/nxp,isp1760.yaml          | 2 +-
 Documentation/devicetree/bindings/usb/richtek,rt1719.yaml       | 2 +-
 Documentation/devicetree/bindings/usb/st,stusb160x.yaml         | 2 +-
 Documentation/devicetree/bindings/virtio/virtio-device.yaml     | 2 +-
 Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml   | 2 +-
 223 files changed, 223 insertions(+), 223 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/cci-control-port.yaml b/Documentation/devicetree/bindings/arm/cci-control-port.yaml
index c9114866213f3..c29d250a6d772 100644
--- a/Documentation/devicetree/bindings/arm/cci-control-port.yaml
+++ b/Documentation/devicetree/bindings/arm/cci-control-port.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/cci-control-port.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: CCI Interconnect Bus Masters binding
+title: CCI Interconnect Bus Masters
 
 maintainers:
   - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
diff --git a/Documentation/devicetree/bindings/arm/cpus.yaml b/Documentation/devicetree/bindings/arm/cpus.yaml
index 7dd84f8f8e4fb..01b5a9c689a29 100644
--- a/Documentation/devicetree/bindings/arm/cpus.yaml
+++ b/Documentation/devicetree/bindings/arm/cpus.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/cpus.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ARM CPUs bindings
+title: ARM CPUs
 
 maintainers:
   - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
diff --git a/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
index 5cbcacaeb441e..ff378d5cbd32a 100644
--- a/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
+++ b/Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/keystone/ti,k3-sci-common.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common K3 TI-SCI bindings
+title: Common K3 TI-SCI
 
 maintainers:
   - Nishanth Menon <nm@ti.com>
diff --git a/Documentation/devicetree/bindings/arm/sp810.yaml b/Documentation/devicetree/bindings/arm/sp810.yaml
index bc8e524aa90ad..c9094e5ec5657 100644
--- a/Documentation/devicetree/bindings/arm/sp810.yaml
+++ b/Documentation/devicetree/bindings/arm/sp810.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/sp810.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ARM Versatile Express SP810 System Controller bindings
+title: ARM Versatile Express SP810 System Controller
 
 maintainers:
   - Andre Przywara <andre.przywara@arm.com>
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
index ecb28e90fd115..2297ad3f4774c 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,mlahb.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/arm/stm32/st,mlahb.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 ML-AHB interconnect bindings
+title: STMicroelectronics STM32 ML-AHB interconnect
 
 maintainers:
   - Fabien Dessenne <fabien.dessenne@foss.st.com>
diff --git a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
index 6f846d69c5e16..b2b156cc160ae 100644
--- a/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
+++ b/Documentation/devicetree/bindings/arm/stm32/st,stm32-syscon.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/arm/stm32/st,stm32-syscon.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 Platforms System Controller bindings
+title: STMicroelectronics STM32 Platforms System Controller
 
 maintainers:
   - Alexandre Torgue <alexandre.torgue@foss.st.com>
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
index debb2b0c80137..dd3a4770c6a1f 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra194-cbb.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: NVIDIA Tegra194 CBB 1.0 bindings
+title: NVIDIA Tegra194 CBB 1.0
 
 maintainers:
   - Sumit Gupta <sumitg@nvidia.com>
diff --git a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
index 7b1fe50ffbe0b..44184ee01449f 100644
--- a/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
+++ b/Documentation/devicetree/bindings/arm/tegra/nvidia,tegra234-cbb.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/arm/tegra/nvidia,tegra234-cbb.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: NVIDIA Tegra CBB 2.0 bindings
+title: NVIDIA Tegra CBB 2.0
 
 maintainers:
   - Sumit Gupta <sumitg@nvidia.com>
diff --git a/Documentation/devicetree/bindings/arm/vexpress-config.yaml b/Documentation/devicetree/bindings/arm/vexpress-config.yaml
index 09e1adf5ca7a0..b74380da3198b 100644
--- a/Documentation/devicetree/bindings/arm/vexpress-config.yaml
+++ b/Documentation/devicetree/bindings/arm/vexpress-config.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/vexpress-config.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ARM Versatile Express configuration bus bindings
+title: ARM Versatile Express configuration bus
 
 maintainers:
   - Andre Przywara <andre.przywara@arm.com>
diff --git a/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml
index f04db802a7321..be6e3b5425690 100644
--- a/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml
+++ b/Documentation/devicetree/bindings/arm/vexpress-sysreg.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/arm/vexpress-sysreg.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ARM Versatile Express system registers bindings
+title: ARM Versatile Express system registers
 
 maintainers:
   - Andre Przywara <andre.przywara@arm.com>
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
index cb530b46beff5..2011bd03cdcd9 100644
--- a/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun4i-a10-ahci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/ata/allwinner,sun4i-a10-ahci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner A10 AHCI SATA Controller bindings
+title: Allwinner A10 AHCI SATA Controller
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
index e6b42a113ff17..a2afe2ad60638 100644
--- a/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
+++ b/Documentation/devicetree/bindings/ata/allwinner,sun8i-r40-ahci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/ata/allwinner,sun8i-r40-ahci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Allwinner R40 AHCI SATA Controller bindings
+title: Allwinner R40 AHCI SATA Controller
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/bus/ti-sysc.yaml b/Documentation/devicetree/bindings/bus/ti-sysc.yaml
index fced4082b047b..f089634f9466a 100644
--- a/Documentation/devicetree/bindings/bus/ti-sysc.yaml
+++ b/Documentation/devicetree/bindings/bus/ti-sysc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/bus/ti-sysc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments interconnect target module binding
+title: Texas Instruments interconnect target module
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
diff --git a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml
index 9ac716dfa602f..88dd9c18db922 100644
--- a/Documentation/devicetree/bindings/clock/fsl,plldig.yaml
+++ b/Documentation/devicetree/bindings/clock/fsl,plldig.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fsl,plldig.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock Binding
+title: NXP QorIQ Layerscape LS1028A Display PIXEL Clock
 
 maintainers:
   - Wen He <wen.he_1@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml
index fc3bdfdc091ab..3bca9d11c148f 100644
--- a/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/fsl,sai-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fsl,sai-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale SAI bitclock-as-a-clock binding
+title: Freescale SAI bitclock-as-a-clock
 
 maintainers:
   - Michael Walle <michael@walle.cc>
diff --git a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
index 458c7645ee683..e4c4cadec501a 100644
--- a/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8m-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx8m-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX8M Family Clock Control Module Binding
+title: NXP i.MX8M Family Clock Control Module
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
index cb80105b3c708..b207f95361b20 100644
--- a/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8qxp-lpcg.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx8qxp-lpcg.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX8QXP LPCG (Low-Power Clock Gating) Clock bindings
+title: NXP i.MX8QXP LPCG (Low-Power Clock Gating) Clock
 
 maintainers:
   - Aisheng Dong <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml
index 71f7186b135ba..68a60cdc19af3 100644
--- a/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8ulp-cgc-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx8ulp-cgc-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX8ULP Clock Generation & Control(CGC) Module Binding
+title: NXP i.MX8ULP Clock Generation & Control(CGC) Module
 
 maintainers:
   - Jacky Bai <ping.bai@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml
index 00612725bf8b1..d0b0792fe7bab 100644
--- a/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx8ulp-pcc-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx8ulp-pcc-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX8ULP Peripheral Clock Controller(PCC) Module Binding
+title: NXP i.MX8ULP Peripheral Clock Controller(PCC) Module
 
 maintainers:
   - Jacky Bai <ping.bai@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx93-clock.yaml b/Documentation/devicetree/bindings/clock/imx93-clock.yaml
index 21a06194e4a3a..ccb53c6b96c11 100644
--- a/Documentation/devicetree/bindings/clock/imx93-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx93-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx93-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX93 Clock Control Module Binding
+title: NXP i.MX93 Clock Control Module
 
 maintainers:
   - Peng Fan <peng.fan@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/intel,agilex.yaml b/Documentation/devicetree/bindings/clock/intel,agilex.yaml
index cf5a9eb803e62..3745ba8dbd763 100644
--- a/Documentation/devicetree/bindings/clock/intel,agilex.yaml
+++ b/Documentation/devicetree/bindings/clock/intel,agilex.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/intel,agilex.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel SoCFPGA Agilex platform clock controller binding
+title: Intel SoCFPGA Agilex platform clock controller
 
 maintainers:
   - Dinh Nguyen <dinguyen@kernel.org>
diff --git a/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml
index f3e1a700a2ca5..76609a390429a 100644
--- a/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml
+++ b/Documentation/devicetree/bindings/clock/intel,cgu-lgm.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/intel,cgu-lgm.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel Lightning Mountain SoC's Clock Controller(CGU) Binding
+title: Intel Lightning Mountain SoC's Clock Controller(CGU)
 
 maintainers:
   - Rahul Tanwar <rahul.tanwar@linux.intel.com>
diff --git a/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml
index 8f45976e946ee..e000116a51a44 100644
--- a/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml
+++ b/Documentation/devicetree/bindings/clock/intel,easic-n5x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/intel,easic-n5x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel SoCFPGA eASIC N5X platform clock controller binding
+title: Intel SoCFPGA eASIC N5X platform clock controller
 
 maintainers:
   - Dinh Nguyen <dinguyen@kernel.org>
diff --git a/Documentation/devicetree/bindings/clock/intel,stratix10.yaml b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml
index f506e3db9782f..b4a8be2134001 100644
--- a/Documentation/devicetree/bindings/clock/intel,stratix10.yaml
+++ b/Documentation/devicetree/bindings/clock/intel,stratix10.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/intel,stratix10.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel SoCFPGA Stratix10 platform clock controller binding
+title: Intel SoCFPGA Stratix10 platform clock controller
 
 maintainers:
   - Dinh Nguyen <dinguyen@kernel.org>
diff --git a/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml
index b2ce78722247c..e4e1c31267d2a 100644
--- a/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml
+++ b/Documentation/devicetree/bindings/clock/microchip,mpfs-clkcfg.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/microchip,mpfs-clkcfg.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip PolarFire Clock Control Module Binding
+title: Microchip PolarFire Clock Control Module
 
 maintainers:
   - Daire McNamara <daire.mcnamara@microchip.com>
diff --git a/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml
index 6d39344d2b705..0af1c569eb328 100644
--- a/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/milbeaut-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/milbeaut-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Milbeaut SoCs Clock Controller Binding
+title: Milbeaut SoCs Clock Controller
 
 maintainers:
   - Taichi Sugaya <sugaya.taichi@socionext.com>
diff --git a/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml
index 771db2ddf0269..b901ca13cd25d 100644
--- a/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/nuvoton,npcm845-clk.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/nuvoton,npcm845-clk.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Nuvoton NPCM8XX Clock Controller Binding
+title: Nuvoton NPCM8XX Clock Controller
 
 maintainers:
   - Tomer Maimon <tmaimon77@gmail.com>
diff --git a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
index fccb91e78e499..cf25ba0419e21 100644
--- a/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,rpmhcc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/qcom,rpmhcc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. RPMh Clocks Bindings
+title: Qualcomm Technologies, Inc. RPMh Clocks
 
 maintainers:
   - Taniya Das <tdas@codeaurora.org>
diff --git a/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml
index fc7546f521c55..f809c289445e6 100644
--- a/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml
+++ b/Documentation/devicetree/bindings/clock/rockchip,rk3568-cru.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/rockchip,rk3568-cru.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROCKCHIP rk3568 Family Clock Control Module Binding
+title: ROCKCHIP rk3568 Family Clock Control Module
 
 maintainers:
   - Elaine Zhang <zhangqing@rock-chips.com>
diff --git a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
index 9cd42a64b13ef..d0aecde2b89b1 100644
--- a/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/cpufreq-mediatek-hw.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/cpufreq/cpufreq-mediatek-hw.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek's CPUFREQ Bindings
+title: MediaTek's CPUFREQ
 
 maintainers:
   - Hector Yuan <hector.yuan@mediatek.com>
diff --git a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
index 3c00ad09eeaab..9c086eac6ca71 100644
--- a/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
+++ b/Documentation/devicetree/bindings/cpufreq/qcom-cpufreq-nvmem.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/cpufreq/qcom-cpufreq-nvmem.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. NVMEM CPUFreq bindings
+title: Qualcomm Technologies, Inc. NVMEM CPUFreq
 
 maintainers:
   - Ilia Lin <ilia.lin@kernel.org>
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
index b72e4858f9aab..50b2c2e0c3cd8 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-crc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/crypto/st,stm32-crc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 CRC bindings
+title: STMicroelectronics STM32 CRC
 
 maintainers:
   - Lionel Debieve <lionel.debieve@foss.st.com>
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
index 6759c5bf3e572..0ddeb8a9a7a01 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-cryp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/crypto/st,stm32-cryp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 CRYP bindings
+title: STMicroelectronics STM32 CRYP
 
 description: The STM32 CRYP block is built on the CRYP block found in
   the STn8820 SoC introduced in 2007, and subsequently used in the U8500
diff --git a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
index 10ba94792d951..4ccb335e8063c 100644
--- a/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
+++ b/Documentation/devicetree/bindings/crypto/st,stm32-hash.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/crypto/st,stm32-hash.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 HASH bindings
+title: STMicroelectronics STM32 HASH
 
 maintainers:
   - Lionel Debieve <lionel.debieve@foss.st.com>
diff --git a/Documentation/devicetree/bindings/display/arm,hdlcd.yaml b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml
index a2670258c48d3..9a30e9005e8a2 100644
--- a/Documentation/devicetree/bindings/display/arm,hdlcd.yaml
+++ b/Documentation/devicetree/bindings/display/arm,hdlcd.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/arm,hdlcd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Arm HDLCD display controller binding
+title: Arm HDLCD display controller
 
 maintainers:
   - Liviu Dudau <Liviu.Dudau@arm.com>
diff --git a/Documentation/devicetree/bindings/display/arm,malidp.yaml b/Documentation/devicetree/bindings/display/arm,malidp.yaml
index 2a17ec6fc97c0..91812573fd080 100644
--- a/Documentation/devicetree/bindings/display/arm,malidp.yaml
+++ b/Documentation/devicetree/bindings/display/arm,malidp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/arm,malidp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Arm Mali Display Processor (Mali-DP) binding
+title: Arm Mali Display Processor (Mali-DP)
 
 maintainers:
   - Liviu Dudau <Liviu.Dudau@arm.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
index ed280053ec62b..140927884418f 100644
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/toshiba,tc358767.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Toshiba TC358767 eDP bridge bindings
+title: Toshiba TC358767 eDP bridge
 
 maintainers:
   - Andrey Gusakov <andrey.gusakov@cogentembedded.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
index 10471c6c1ff91..d879c700594a6 100644
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358775.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/toshiba,tc358775.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Toshiba TC358775 DSI to LVDS bridge bindings
+title: Toshiba TC358775 DSI to LVDS bridge
 
 maintainers:
   - Vinay Simha BN <simhavcs@gmail.com>
diff --git a/Documentation/devicetree/bindings/display/panel/display-timings.yaml b/Documentation/devicetree/bindings/display/panel/display-timings.yaml
index 6d30575819d3b..dc5f7e36e30bc 100644
--- a/Documentation/devicetree/bindings/display/panel/display-timings.yaml
+++ b/Documentation/devicetree/bindings/display/panel/display-timings.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/display-timings.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: display timings bindings
+title: display timings
 
 maintainers:
   - Thierry Reding <thierry.reding@gmail.com>
diff --git a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
index 229e3b36ee29a..0d317e61edd8f 100644
--- a/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
+++ b/Documentation/devicetree/bindings/display/panel/panel-timing.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/panel-timing.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: panel timing bindings
+title: panel timing
 
 maintainers:
   - Thierry Reding <thierry.reding@gmail.com>
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
index 55faab6a468eb..158c791d7caa6 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/st,stm32-dma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 DMA Controller bindings
+title: STMicroelectronics STM32 DMA Controller
 
 description: |
   The STM32 DMA is a general-purpose direct memory access controller capable of
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
index 1e1d8549b7ef4..3e0b82d277cac 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-dmamux.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/st,stm32-dmamux.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 DMA MUX (DMA request router) bindings
+title: STMicroelectronics STM32 DMA MUX (DMA request router)
 
 maintainers:
   - Amelie Delaunay <amelie.delaunay@foss.st.com>
diff --git a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
index 00cfa39136521..08a59bd69a2f4 100644
--- a/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
+++ b/Documentation/devicetree/bindings/dma/st,stm32-mdma.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/dma/st,stm32-mdma.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 MDMA Controller bindings
+title: STMicroelectronics STM32 MDMA Controller
 
 description: |
   The STM32 MDMA is a general-purpose direct memory access controller capable of
diff --git a/Documentation/devicetree/bindings/edac/dmc-520.yaml b/Documentation/devicetree/bindings/edac/dmc-520.yaml
index 3b6842e92d1b9..84db3966662ac 100644
--- a/Documentation/devicetree/bindings/edac/dmc-520.yaml
+++ b/Documentation/devicetree/bindings/edac/dmc-520.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/edac/dmc-520.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ARM DMC-520 EDAC bindings
+title: ARM DMC-520 EDAC
 
 maintainers:
   - Lei Wang <lewan@microsoft.com>
diff --git a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml
index 1c0388da6721a..176796931a22f 100644
--- a/Documentation/devicetree/bindings/firmware/arm,scmi.yaml
+++ b/Documentation/devicetree/bindings/firmware/arm,scmi.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/firmware/arm,scmi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: System Control and Management Interface (SCMI) Message Protocol bindings
+title: System Control and Management Interface (SCMI) Message Protocol
 
 maintainers:
   - Sudeep Holla <sudeep.holla@arm.com>
diff --git a/Documentation/devicetree/bindings/firmware/arm,scpi.yaml b/Documentation/devicetree/bindings/firmware/arm,scpi.yaml
index 1f9322925e7c9..241317239ffcf 100644
--- a/Documentation/devicetree/bindings/firmware/arm,scpi.yaml
+++ b/Documentation/devicetree/bindings/firmware/arm,scpi.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/firmware/arm,scpi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: System Control and Power Interface (SCPI) Message Protocol bindings
+title: System Control and Power Interface (SCPI) Message Protocol
 
 maintainers:
   - Sudeep Holla <sudeep.holla@arm.com>
diff --git a/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml b/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml
index fcf0011b8e6db..3faae32366656 100644
--- a/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml
+++ b/Documentation/devicetree/bindings/firmware/qemu,fw-cfg-mmio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/firmware/qemu,fw-cfg-mmio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: QEMU Firmware Configuration bindings
+title: QEMU Firmware Configuration
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml b/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml
index cb8a5c376e1e2..157969bc4c463 100644
--- a/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml
+++ b/Documentation/devicetree/bindings/gpio/gpio-tpic2810.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/gpio/gpio-tpic2810.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TPIC2810 GPIO controller bindings
+title: TPIC2810 GPIO controller
 
 maintainers:
   - Aswath Govindraju <a-govindraju@ti.com>
diff --git a/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml b/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml
index 7087e4a5013f6..bd721c8390592 100644
--- a/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml
+++ b/Documentation/devicetree/bindings/gpio/ti,omap-gpio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/gpio/ti,omap-gpio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OMAP GPIO controller bindings
+title: OMAP GPIO controller
 
 maintainers:
   - Grygorii Strashko <grygorii.strashko@ti.com>
diff --git a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
index 217c42874f418..dae55b8a267b0 100644
--- a/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
+++ b/Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/gpu/brcm,bcm-v3d.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom V3D GPU Bindings
+title: Broadcom V3D GPU
 
 maintainers:
   - Eric Anholt <eric@anholt.net>
diff --git a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml
index 93e7244cdc0eb..b1b10ea70ad9a 100644
--- a/Documentation/devicetree/bindings/gpu/vivante,gc.yaml
+++ b/Documentation/devicetree/bindings/gpu/vivante,gc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/gpu/vivante,gc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Vivante GPU Bindings
+title: Vivante GPU
 
 description: Vivante GPU core devices
 
diff --git a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
index b18c616035a8b..829d1fdf4c674 100644
--- a/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
+++ b/Documentation/devicetree/bindings/hwlock/st,stm32-hwspinlock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/hwlock/st,stm32-hwspinlock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Hardware Spinlock bindings
+title: STMicroelectronics STM32 Hardware Spinlock
 
 maintainers:
   - Fabien Dessenne <fabien.dessenne@foss.st.com>
diff --git a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
index d0d5497492087..ae4f68d4e6966 100644
--- a/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
+++ b/Documentation/devicetree/bindings/hwmon/moortec,mr75203.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/hwmon/moortec,mr75203.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Moortec Semiconductor MR75203 PVT Controller bindings
+title: Moortec Semiconductor MR75203 PVT Controller
 
 maintainers:
   - Rahul Tanwar <rtanwar@maxlinear.com>
diff --git a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml
index 015885dd02d32..31386a8d76845 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-pxa.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i2c/i2c-pxa.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell MMP I2C controller bindings
+title: Marvell MMP I2C controller
 
 maintainers:
   - Rob Herring <robh+dt@kernel.org>
diff --git a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
index 42c5974ec7b04..16024415a4a74 100644
--- a/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/st,nomadik-i2c.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i2c/st,nomadik-i2c.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ST Microelectronics Nomadik I2C Bindings
+title: ST Microelectronics Nomadik I2C
 
 description: The Nomadik I2C host controller began its life in the ST
   Microelectronics STn8800 SoC, and was then inherited into STn8810 and
diff --git a/Documentation/devicetree/bindings/i3c/i3c.yaml b/Documentation/devicetree/bindings/i3c/i3c.yaml
index 1f82fc9237996..fdb4212149e79 100644
--- a/Documentation/devicetree/bindings/i3c/i3c.yaml
+++ b/Documentation/devicetree/bindings/i3c/i3c.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i3c/i3c.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: I3C bus binding
+title: I3C bus
 
 maintainers:
   - Alexandre Belloni <alexandre.belloni@bootlin.com>
diff --git a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
index 698beb896f76a..517e8b1fcb739 100644
--- a/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/ingenic,adc.yaml
@@ -5,7 +5,7 @@
 $id: "http://devicetree.org/schemas/iio/adc/ingenic,adc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Ingenic JZ47xx ADC controller IIO bindings
+title: Ingenic JZ47xx ADC controller IIO
 
 maintainers:
   - Artur Rojek <contact@artur-rojek.eu>
diff --git a/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml b/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml
index a6cb857a232da..9ceb6f18c854f 100644
--- a/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/motorola,cpcap-adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/motorola,cpcap-adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Motorola CPCAP PMIC ADC binding
+title: Motorola CPCAP PMIC ADC
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
index 9c59a20a60327..63369ba388e47 100644
--- a/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/nxp,imx8qxp-adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/nxp,imx8qxp-adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP IMX8QXP ADC bindings
+title: NXP IMX8QXP ADC
 
 maintainers:
   - Cai Huoqing <caihuoqing@baidu.com>
diff --git a/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml b/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml
index 43abb300fa3d3..70b38038a0805 100644
--- a/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/nxp,lpc1850-adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/nxp,lpc1850-adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP LPC1850 ADC bindings
+title: NXP LPC1850 ADC
 
 maintainers:
   - Jonathan Cameron <jic23@kernel.org>
diff --git a/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml b/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml
index 44aa28b59197a..8181cf9a8e07d 100644
--- a/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/sprd,sc2720-adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/sprd,sc2720-adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Spreadtrum SC27XX series PMICs ADC binding
+title: Spreadtrum SC27XX series PMICs ADC
 
 maintainers:
   - Baolin Wang <baolin.wang7@gmail.com>
diff --git a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
index 05265f381fde9..1c340c95df160 100644
--- a/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/st,stm32-adc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/iio/adc/st,stm32-adc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 ADC bindings
+title: STMicroelectronics STM32 ADC
 
 description: |
   STM32 ADC is a successive approximation analog-to-digital converter.
diff --git a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
index d6d3d85901715..d40689f233f22 100644
--- a/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/x-powers,axp209-adc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/x-powers,axp209-adc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: X-Powers AXP ADC bindings
+title: X-Powers AXP ADC
 
 maintainers:
   - Chen-Yu Tsai <wens@csie.org>
diff --git a/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml b/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml
index 595f481c548e0..9c8afe3f1b690 100644
--- a/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/nxp,lpc1850-dac.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/dac/nxp,lpc1850-dac.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP LPC1850 DAC bindings
+title: NXP LPC1850 DAC
 
 maintainers:
   - Jonathan Cameron <jic23@kernel.org>
diff --git a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
index 6adeda4087fc2..0f1bf1110122a 100644
--- a/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
+++ b/Documentation/devicetree/bindings/iio/dac/st,stm32-dac.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/iio/dac/st,stm32-dac.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 DAC bindings
+title: STMicroelectronics STM32 DAC
 
 description: |
   The STM32 DAC is a 12-bit voltage output digital-to-analog converter. The DAC
diff --git a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml
index 611ad4444cf0c..c55831b60ee67 100644
--- a/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml
+++ b/Documentation/devicetree/bindings/iio/multiplexer/io-channel-mux.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/multiplexer/io-channel-mux.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: I/O channel multiplexer bindings
+title: I/O channel multiplexer
 
 maintainers:
   - Peter Rosin <peda@axentia.se>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
index 3225c8d1fdaf0..86a6d18f952a0 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma140.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/cypress,cy8ctma140.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Cypress CY8CTMA140 series touchscreen controller bindings
+title: Cypress CY8CTMA140 series touchscreen controller
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
index 762e56ee90cd2..4dfbb93678b56 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/cypress,cy8ctma340.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/cypress,cy8ctma340.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Cypress CY8CTMA340 series touchscreen controller bindings
+title: Cypress CY8CTMA340 series touchscreen controller
 
 description: The Cypress CY8CTMA340 series (also known as "CYTTSP" after
   the marketing name Cypress TrueTouch Standard Product) touchscreens can
diff --git a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
index 46bc8c028fe69..ef4c841387bdd 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/edt-ft5x06.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/edt-ft5x06.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: FocalTech EDT-FT5x06 Polytouch Bindings
+title: FocalTech EDT-FT5x06 Polytouch
 
 description: |
              There are 5 variants of the chip for various touch panel sizes
diff --git a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
index 19ac9da421df3..3d016b87c8df8 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/goodix.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/goodix.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Goodix GT9xx series touchscreen controller Bindings
+title: Goodix GT9xx series touchscreen controller
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml b/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml
index be2ba185c086b..f42b23d532eb0 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/himax,hx83112b.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/himax,hx83112b.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Himax hx83112b touchscreen controller bindings
+title: Himax hx83112b touchscreen controller
 
 maintainers:
   - Job Noorman <job@noorman.info>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml b/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml
index 942562f1e45b2..874c0781c4769 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/hycon,hy46xx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Hycon HY46XX series touchscreen controller bindings
+title: Hycon HY46XX series touchscreen controller
 
 description: |
   There are 6 variants of the chip for various touch panel sizes and cover lens material
diff --git a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
index e3a2b871e50c6..0d6b033fd5fbc 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/imagis,ist3038c.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/imagis,ist3038c.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Imagis IST30XXC family touchscreen controller bindings
+title: Imagis IST30XXC family touchscreen controller
 
 maintainers:
   - Markuss Broks <markuss.broks@gmail.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
index 62366886fb3e1..fdd02898e2492 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/melfas,mms114.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/melfas,mms114.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Melfas MMS114 family touchscreen controller bindings
+title: Melfas MMS114 family touchscreen controller
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml b/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml
index af4f954de958d..ddbbc820c7e55 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/mstar,msg2638.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/mstar,msg2638.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MStar msg2638 touchscreen controller Bindings
+title: MStar msg2638 touchscreen controller
 
 maintainers:
   - Vincent Knecht <vincent.knecht@mailoo.org>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
index 938aab016cc26..7187c390b2f57 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings
+title: Texas Instruments TSC2004 and TSC2005 touchscreen controller
 
 maintainers:
   - Marek Vasut <marex@denx.de>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml
index 4b5b212c772c4..895592da96263 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/touchscreen.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/touchscreen.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common touchscreen Bindings
+title: Common touchscreen
 
 maintainers:
   - Dmitry Torokhov <dmitry.torokhov@gmail.com>
diff --git a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
index b4e5ba7c0b49e..b1507463a03e9 100644
--- a/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
+++ b/Documentation/devicetree/bindings/input/touchscreen/zinitix,bt400.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/touchscreen/zinitix,bt400.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Zinitix BT4xx and BT5xx series touchscreen controller bindings
+title: Zinitix BT4xx and BT5xx series touchscreen controller
 
 description: The Zinitix BT4xx and BT5xx series of touchscreen controllers
   are Korea-produced touchscreens with embedded microcontrollers. The
diff --git a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml
index 5a583bf3dbc10..9acc21028413e 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/mrvl,intc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/interrupt-controller/mrvl,intc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell MMP/Orion Interrupt controller bindings
+title: Marvell MMP/Orion Interrupt controller
 
 maintainers:
   - Andrew Lunn <andrew@lunn.ch>
diff --git a/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml
index 9ce6804bdb999..2d6307a383ad4 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/nuvoton,wpcm450-aic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/interrupt-controller/nuvoton,wpcm450-aic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Nuvoton WPCM450 Advanced Interrupt Controller bindings
+title: Nuvoton WPCM450 Advanced Interrupt Controller
 
 maintainers:
   - Jonathan Neuschäfer <j.neuschaefer@gmx.net>
diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml b/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml
index 71bc031c4fde6..3f25cdb4e99bd 100644
--- a/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml
+++ b/Documentation/devicetree/bindings/ipmi/ipmi-ipmb.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/ipmi/ipmi-ipmb.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: IPMI IPMB device bindings
+title: IPMI IPMB device
 
 description: IPMI IPMB device bindings
 
diff --git a/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml b/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml
index 898e3267893ac..c1b4bf95ef99f 100644
--- a/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml
+++ b/Documentation/devicetree/bindings/ipmi/ipmi-smic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/ipmi/ipmi-smic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: IPMI device bindings
+title: IPMI device
 
 description: IPMI device bindings
 
diff --git a/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml
index 3300451fcfd5e..584030b6b0b95 100644
--- a/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/gpio-backlight.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/backlight/gpio-backlight.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: gpio-backlight bindings
+title: gpio-backlight
 
 maintainers:
   - Lee Jones <lee@kernel.org>
diff --git a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml
index 0793d0adc4ec9..d7b78198abc22 100644
--- a/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/led-backlight.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/backlight/led-backlight.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: led-backlight bindings
+title: led-backlight
 
 maintainers:
   - Lee Jones <lee@kernel.org>
diff --git a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml
index 78fbe20a17580..5ec47a8c6568b 100644
--- a/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/pwm-backlight.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/backlight/pwm-backlight.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: pwm-backlight bindings
+title: pwm-backlight
 
 maintainers:
   - Lee Jones <lee@kernel.org>
diff --git a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
index f24fd84b4b05c..c71b7c065b825 100644
--- a/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
+++ b/Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/mailbox/qcom,apcs-kpss-global.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm APCS global block bindings
+title: Qualcomm APCS global block
 
 description:
   This binding describes the APCS "global" block found in various Qualcomm
diff --git a/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml
index 80feba82cbd61..bdfb4a8220c54 100644
--- a/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml
+++ b/Documentation/devicetree/bindings/mailbox/sprd-mailbox.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/mailbox/sprd-mailbox.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Spreadtrum mailbox controller bindings
+title: Spreadtrum mailbox controller
 
 maintainers:
   - Orson Zhai <orsonzhai@gmail.com>
diff --git a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
index 2c8b47285aa34..0dfe05a04dd00 100644
--- a/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
+++ b/Documentation/devicetree/bindings/mailbox/st,stm32-ipcc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/mailbox/st,stm32-ipcc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 IPC controller bindings
+title: STMicroelectronics STM32 IPC controller
 
 description:
   The IPCC block provides a non blocking signaling mechanism to post and
diff --git a/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml
index 0e3478551e137..de3e483f146a0 100644
--- a/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml
+++ b/Documentation/devicetree/bindings/media/marvell,mmp2-ccic.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/media/marvell,mmp2-ccic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell MMP2 camera host interface bindings
+title: Marvell MMP2 camera host interface
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/media/renesas,ceu.yaml b/Documentation/devicetree/bindings/media/renesas,ceu.yaml
index 50e0740af15a7..d527fc42c3fd1 100644
--- a/Documentation/devicetree/bindings/media/renesas,ceu.yaml
+++ b/Documentation/devicetree/bindings/media/renesas,ceu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/renesas,ceu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Renesas Capture Engine Unit (CEU) Bindings
+title: Renesas Capture Engine Unit (CEU)
 
 maintainers:
   - Jacopo Mondi <jacopo+renesas@jmondi.org>
diff --git a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
index 77144cc6f7db9..7f545a587a392 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
+++ b/Documentation/devicetree/bindings/media/st,stm32-cec.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/st,stm32-cec.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 CEC bindings
+title: STMicroelectronics STM32 CEC
 
 maintainers:
   - Yannick Fertre <yannick.fertre@foss.st.com>
diff --git a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
index e80fcdf280f0b..6b3e413cedb25 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
+++ b/Documentation/devicetree/bindings/media/st,stm32-dcmi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/st,stm32-dcmi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI) binding
+title: STMicroelectronics STM32 Digital Camera Memory Interface (DCMI)
 
 maintainers:
   - Hugues Fruchet <hugues.fruchet@foss.st.com>
diff --git a/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml b/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml
index f97b4a2466056..4afa4a24b8685 100644
--- a/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml
+++ b/Documentation/devicetree/bindings/media/st,stm32-dma2d.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/media/st,stm32-dma2d.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Chrom-Art Accelerator DMA2D binding
+title: STMicroelectronics STM32 Chrom-Art Accelerator DMA2D
 
 description:
   Chrom-ART Accelerator(DMA2D), graphical hardware accelerator
diff --git a/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml b/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml
index 96d563fd61f58..e42aa488704d4 100644
--- a/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/calxeda-ddr-ctrlr.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/calxeda-ddr-ctrlr.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Calxeda DDR memory controller binding
+title: Calxeda DDR memory controller
 
 description: |
   The Calxeda DDR memory controller is initialised and programmed by the
diff --git a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
index d71af02b7f16c..e76ba767dfd22 100644
--- a/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
+++ b/Documentation/devicetree/bindings/memory-controllers/st,stm32-fmc2-ebi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/memory-controllers/st,stm32-fmc2-ebi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings
+title: STMicroelectronics Flexible Memory Controller 2 (FMC2)
 
 description: |
   The FMC2 functional block makes the interface with: synchronous and
diff --git a/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml b/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml
index dd43a0c766f34..c3a368a0fe935 100644
--- a/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml
+++ b/Documentation/devicetree/bindings/mfd/actions,atc260x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/actions,atc260x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Actions Semi ATC260x Power Management IC bindings
+title: Actions Semi ATC260x Power Management IC
 
 maintainers:
   - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
diff --git a/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml
index 08af356f5d275..9b11b6e2bbf74 100644
--- a/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml
+++ b/Documentation/devicetree/bindings/mfd/ene-kb3930.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/ene-kb3930.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ENE KB3930 Embedded Controller bindings
+title: ENE KB3930 Embedded Controller
 
 description: |
   This binding describes the ENE KB3930 Embedded Controller attached to an
diff --git a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
index 06ed9ec8f4bbc..aa29e3f4655e9 100644
--- a/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
+++ b/Documentation/devicetree/bindings/mfd/ene-kb930.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/ene-kb930.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ENE KB930 Embedded Controller bindings
+title: ENE KB930 Embedded Controller
 
 description: |
   This binding describes the ENE KB930 Embedded Controller attached to an
diff --git a/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml b/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml
index f09577105b507..20067002cc4a3 100644
--- a/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml
+++ b/Documentation/devicetree/bindings/mfd/fsl,imx8qxp-csr.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/fsl,imx8qxp-csr.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale i.MX8qm/qxp Control and Status Registers Module Bindings
+title: Freescale i.MX8qm/qxp Control and Status Registers Module
 
 maintainers:
   - Liu Ying <victor.liu@nxp.com>
diff --git a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
index ec3138c1bbfc1..e6a2387d8650e 100644
--- a/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
+++ b/Documentation/devicetree/bindings/mfd/qcom,pm8008.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/qcom,pm8008.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. PM8008 PMIC bindings
+title: Qualcomm Technologies, Inc. PM8008 PMIC
 
 maintainers:
   - Guru Das Srinagesh <gurus@codeaurora.org>
diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml
index fbface720678c..5fbb94d2e4fae 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71815-pmic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/rohm,bd71815-pmic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROHM BD71815 Power Management Integrated Circuit bindings
+title: ROHM BD71815 Power Management Integrated Circuit
 
 maintainers:
   - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml
index 8380166d176cd..d15ea8e9acad7 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71828-pmic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/rohm,bd71828-pmic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROHM BD71828 Power Management Integrated Circuit bindings
+title: ROHM BD71828 Power Management Integrated Circuit
 
 maintainers:
   - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml
index 3bfdd33702ad1..4aca765caee24 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71837-pmic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/rohm,bd71837-pmic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROHM BD71837 Power Management Integrated Circuit bindings
+title: ROHM BD71837 Power Management Integrated Circuit
 
 maintainers:
   - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml
index 5d531051a1537..e6491729715f9 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd71847-pmic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/rohm,bd71847-pmic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROHM BD71847 and BD71850 Power Management Integrated Circuit bindings
+title: ROHM BD71847 and BD71850 Power Management Integrated Circuit
 
 maintainers:
   - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
diff --git a/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml b/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml
index 6483860da9554..34ff0a322f3a1 100644
--- a/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml
+++ b/Documentation/devicetree/bindings/mfd/rohm,bd9576-pmic.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/rohm,bd9576-pmic.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ROHM BD9576MUF and BD9573MUF Power Management Integrated Circuit bindings
+title: ROHM BD9576MUF and BD9573MUF Power Management Integrated Circuit
 
 maintainers:
   - Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
index d950dd5d48bd6..27329c5dc38e6 100644
--- a/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-lptimer.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/st,stm32-lptimer.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Low-Power Timers bindings
+title: STMicroelectronics STM32 Low-Power Timers
 
 description: |
   The STM32 Low-Power Timer (LPTIM) is a 16-bit timer that provides several
diff --git a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
index e2c3c3b44abba..f84e09a5743b7 100644
--- a/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stm32-timers.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/st,stm32-timers.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Timers bindings
+title: STMicroelectronics STM32 Timers
 
 description: |
   This hardware block provides 3 types of timer along with PWM functionality:
diff --git a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
index b4d54302582fb..76551c90b128e 100644
--- a/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stmfx.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/st,stmfx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectonics Multi-Function eXpander (STMFX) bindings
+title: STMicroelectonics Multi-Function eXpander (STMFX)
 
 description: ST Multi-Function eXpander (STMFX) is a slave controller using I2C for
                communication with the main MCU. Its main features are GPIO expansion,
diff --git a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
index 426658ad81d46..9573e4af949ee 100644
--- a/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
+++ b/Documentation/devicetree/bindings/mfd/st,stpmic1.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mfd/st,stpmic1.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectonics STPMIC1 Power Management IC bindings
+title: STMicroelectonics STPMIC1 Power Management IC
 
 description: STMicroelectronics STPMIC1 Power Management IC
 
diff --git a/Documentation/devicetree/bindings/mips/cpus.yaml b/Documentation/devicetree/bindings/mips/cpus.yaml
index e991f4c6668da..cf382dea3922c 100644
--- a/Documentation/devicetree/bindings/mips/cpus.yaml
+++ b/Documentation/devicetree/bindings/mips/cpus.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mips/cpus.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MIPS CPUs bindings
+title: MIPS CPUs
 
 maintainers:
   - Thomas Bogendoerfer <tsbogend@alpha.franken.de>
diff --git a/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml
index b3c45c046ba5e..e99342f268a64 100644
--- a/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml
+++ b/Documentation/devicetree/bindings/misc/olpc,xo1.75-ec.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/misc/olpc,xo1.75-ec.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: OLPC XO-1.75 Embedded Controller bindings
+title: OLPC XO-1.75 Embedded Controller
 
 description: |
   This binding describes the Embedded Controller acting as a SPI bus master
diff --git a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
index dead421e17d62..c028039bc477c 100644
--- a/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
+++ b/Documentation/devicetree/bindings/mmc/brcm,sdhci-brcmstb.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/brcm,sdhci-brcmstb.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BRCMSTB/BMIPS SDHCI Controller binding
+title: Broadcom BRCMSTB/BMIPS SDHCI Controller
 
 maintainers:
   - Al Cooper <alcooperx@gmail.com>
diff --git a/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml b/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml
index 69ff065c9a39a..fa6cfe092fc9b 100644
--- a/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/microchip,dw-sparx5-sdhci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/microchip,dw-sparx5-sdhci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip Sparx5 Mobile Storage Host Controller Binding
+title: Microchip Sparx5 Mobile Storage Host Controller
 
 allOf:
   - $ref: "mmc-controller.yaml"
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml
index 1fc7e620f3280..911a5996e0991 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-emmc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-emmc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Simple eMMC hardware reset provider binding
+title: Simple eMMC hardware reset provider
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
index 9e23967510304..3397dbff88c2e 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-sd8787.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-sd8787.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell SD8787 power sequence provider binding
+title: Marvell SD8787 power sequence provider
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml
index 226fb191913d2..64e3644eefeb0 100644
--- a/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml
+++ b/Documentation/devicetree/bindings/mmc/mmc-pwrseq-simple.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/mmc-pwrseq-simple.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Simple MMC power sequence provider binding
+title: Simple MMC power sequence provider
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
index 8ed94a12a03b7..7a649ebc688c8 100644
--- a/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
+++ b/Documentation/devicetree/bindings/mmc/mtk-sd.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/mtk-sd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MTK MSDC Storage Host Controller Binding
+title: MTK MSDC Storage Host Controller
 
 maintainers:
   - Chaotian Jing <chaotian.jing@mediatek.com>
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml
index 1c87f4218e18c..3d46c25257877 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml
+++ b/Documentation/devicetree/bindings/mmc/sdhci-pxa.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/sdhci-pxa.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell PXA SDHCI v2/v3 bindings
+title: Marvell PXA SDHCI v2/v3
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
index 51ba44cad8426..a43eb837f8dae 100644
--- a/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/snps,dwcmshc-sdhci.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/snps,dwcmshc-sdhci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Synopsys Designware Mobile Storage Host Controller Binding
+title: Synopsys Designware Mobile Storage Host Controller
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml
index e1f5f26f3f1c6..b13b5166d20a8 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mmc/synopsys-dw-mshc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Synopsys Designware Mobile Storage Host Controller Binding
+title: Synopsys Designware Mobile Storage Host Controller
 
 maintainers:
   - Ulf Hansson <ulf.hansson@linaro.org>
diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
index 849aeae319a92..8487089b6e169 100644
--- a/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/gpmi-nand.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale General-Purpose Media Interface (GPMI) binding
+title: Freescale General-Purpose Media Interface (GPMI)
 
 maintainers:
   - Han Xu <han.xu@nxp.com>
diff --git a/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml b/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml
index ea9450fe7c9fd..00882892f47eb 100644
--- a/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml
+++ b/Documentation/devicetree/bindings/mtd/microchip,mchp48l640.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/mtd/microchip,mchp48l640.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Microchip 48l640 (and similar) serial EERAM bindings
+title: Microchip 48l640 (and similar) serial EERAM
 
 maintainers:
   - Heiko Schocher <hs@denx.de>
diff --git a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml
index 66da1b476ab7e..7f6f7c9596c49 100644
--- a/Documentation/devicetree/bindings/mtd/mxc-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/mxc-nand.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/mxc-nand.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale's mxc_nand binding
+title: Freescale's mxc_nand
 
 maintainers:
   - Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml b/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml
index 61d12bda356e6..1c2b4e780ca9a 100644
--- a/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml
+++ b/Documentation/devicetree/bindings/mtd/partitions/qcom,smem-part.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/partitions/qcom,smem-part.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm SMEM NAND flash partition parser binding
+title: Qualcomm SMEM NAND flash partition parser
 
 maintainers:
   - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
diff --git a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
index 8cbfa1504a0f1..19cf1f18b61c3 100644
--- a/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
+++ b/Documentation/devicetree/bindings/mtd/st,stm32-fmc2-nand.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mtd/st,stm32-fmc2-nand.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics Flexible Memory Controller 2 (FMC2) Bindings
+title: STMicroelectronics Flexible Memory Controller 2 (FMC2)
 
 maintainers:
   - Christophe Kerello <christophe.kerello@foss.st.com>
diff --git a/Documentation/devicetree/bindings/mux/gpio-mux.yaml b/Documentation/devicetree/bindings/mux/gpio-mux.yaml
index ee4de9fbaf4d6..b597c1f2c5772 100644
--- a/Documentation/devicetree/bindings/mux/gpio-mux.yaml
+++ b/Documentation/devicetree/bindings/mux/gpio-mux.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mux/gpio-mux.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: GPIO-based multiplexer controller bindings
+title: GPIO-based multiplexer controller
 
 maintainers:
   - Peter Rosin <peda@axentia.se>
diff --git a/Documentation/devicetree/bindings/mux/mux-consumer.yaml b/Documentation/devicetree/bindings/mux/mux-consumer.yaml
index d3d854967359d..9e2d78a78e409 100644
--- a/Documentation/devicetree/bindings/mux/mux-consumer.yaml
+++ b/Documentation/devicetree/bindings/mux/mux-consumer.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mux/mux-consumer.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common multiplexer controller consumer bindings
+title: Common multiplexer controller consumer
 
 maintainers:
   - Peter Rosin <peda@axentia.se>
diff --git a/Documentation/devicetree/bindings/mux/mux-controller.yaml b/Documentation/devicetree/bindings/mux/mux-controller.yaml
index c855fbad38844..8b943082a2416 100644
--- a/Documentation/devicetree/bindings/mux/mux-controller.yaml
+++ b/Documentation/devicetree/bindings/mux/mux-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mux/mux-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common multiplexer controller provider bindings
+title: Common multiplexer controller provider
 
 maintainers:
   - Peter Rosin <peda@axentia.se>
diff --git a/Documentation/devicetree/bindings/mux/reg-mux.yaml b/Documentation/devicetree/bindings/mux/reg-mux.yaml
index dfd9ea582bb7a..dc4be092fc2fc 100644
--- a/Documentation/devicetree/bindings/mux/reg-mux.yaml
+++ b/Documentation/devicetree/bindings/mux/reg-mux.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/mux/reg-mux.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic register bitfield-based multiplexer controller bindings
+title: Generic register bitfield-based multiplexer controller
 
 maintainers:
   - Peter Rosin <peda@axentia.se>
diff --git a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
index e5af53508e254..c99034f053e82 100644
--- a/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
+++ b/Documentation/devicetree/bindings/net/brcm,bcmgenet.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/brcm,bcmgenet.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Broadcom BCM7xxx Ethernet Controller (GENET) binding
+title: Broadcom BCM7xxx Ethernet Controller (GENET)
 
 maintainers:
   - Doug Berger <opendmb@gmail.com>
diff --git a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
index 26aa0830eea1a..67879aab623b5 100644
--- a/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
+++ b/Documentation/devicetree/bindings/net/can/bosch,m_can.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/bosch,m_can.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bosch MCAN controller Bindings
+title: Bosch MCAN controller
 
 description: Bosch MCAN controller for CAN bus
 
diff --git a/Documentation/devicetree/bindings/net/can/can-transceiver.yaml b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml
index d1ef1fe6ab290..d422b3921ffa5 100644
--- a/Documentation/devicetree/bindings/net/can/can-transceiver.yaml
+++ b/Documentation/devicetree/bindings/net/can/can-transceiver.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/can-transceiver.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: CAN transceiver Bindings
+title: CAN transceiver
 
 description: CAN transceiver generic properties bindings
 
diff --git a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
index a6921e805e37e..4116667133ce9 100644
--- a/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
+++ b/Documentation/devicetree/bindings/net/engleder,tsnep.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/engleder,tsnep.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TSN endpoint Ethernet MAC binding
+title: TSN endpoint Ethernet MAC
 
 maintainers:
   - Gerhard Engleder <gerhard@engleder-embedded.com>
diff --git a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
index 600240281e8c5..6e0763898d3a8 100644
--- a/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
+++ b/Documentation/devicetree/bindings/net/fsl,qoriq-mc-dpmac.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/fsl,qoriq-mc-dpmac.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DPAA2 MAC bindings
+title: DPAA2 MAC
 
 maintainers:
   - Ioana Ciornei <ioana.ciornei@nxp.com>
diff --git a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml
index afd11c9422fa2..8438af53c5c3d 100644
--- a/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml
+++ b/Documentation/devicetree/bindings/net/mctp-i2c-controller.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/mctp-i2c-controller.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MCTP I2C transport binding
+title: MCTP I2C transport
 
 maintainers:
   - Matt Johnston <matt@codeconstruct.com.au>
diff --git a/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml
index d58e1571df9b4..e68ed9423150e 100644
--- a/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml
+++ b/Documentation/devicetree/bindings/net/wireless/ieee80211.yaml
@@ -6,7 +6,7 @@
 $id: http://devicetree.org/schemas/net/wireless/ieee80211.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common IEEE 802.11 Binding
+title: Common IEEE 802.11
 
 maintainers:
   - Lorenzo Bianconi <lorenzo@kernel.org>
diff --git a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
index bf84768228f56..fe2cd7f1afba9 100644
--- a/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
+++ b/Documentation/devicetree/bindings/nvmem/ingenic,jz4780-efuse.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/ingenic,jz4780-efuse.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic JZ EFUSE driver bindings
+title: Ingenic JZ EFUSE driver
 
 maintainers:
   - PrasannaKumar Muralidharan <prasannatsmkumar@gmail.com>
diff --git a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
index 2eab2f46cb65a..8e89b15b535fa 100644
--- a/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
+++ b/Documentation/devicetree/bindings/nvmem/qcom,qfprom.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/qcom,qfprom.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies Inc, QFPROM Efuse bindings
+title: Qualcomm Technologies Inc, QFPROM Efuse
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml
index 2578e39deda90..73a0c658dbfd0 100644
--- a/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml
+++ b/Documentation/devicetree/bindings/nvmem/socionext,uniphier-efuse.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/socionext,uniphier-efuse.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Socionext UniPhier eFuse bindings
+title: Socionext UniPhier eFuse
 
 maintainers:
   - Keiji Hayashibara <hayashibara.keiji@socionext.com>
diff --git a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
index 16f4cad2fa551..172597cc5c63b 100644
--- a/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
+++ b/Documentation/devicetree/bindings/nvmem/st,stm32-romem.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/st,stm32-romem.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Factory-programmed data bindings
+title: STMicroelectronics STM32 Factory-programmed data
 
 description: |
   This represents STM32 Factory-programmed read only non-volatile area: locked
diff --git a/Documentation/devicetree/bindings/opp/opp-v1.yaml b/Documentation/devicetree/bindings/opp/opp-v1.yaml
index d585d536a3fb4..07e26c2678153 100644
--- a/Documentation/devicetree/bindings/opp/opp-v1.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v1.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/opp/opp-v1.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic OPP (Operating Performance Points) v1 Bindings
+title: Generic OPP (Operating Performance Points) v1
 
 maintainers:
   - Viresh Kumar <viresh.kumar@linaro.org>
diff --git a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
index a202b6c6561d8..60cf3cbde4c5b 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2-kryo-cpu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/opp/opp-v2-kryo-cpu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. NVMEM OPP bindings
+title: Qualcomm Technologies, Inc. NVMEM OPP
 
 maintainers:
   - Ilia Lin <ilia.lin@kernel.org>
diff --git a/Documentation/devicetree/bindings/opp/opp-v2.yaml b/Documentation/devicetree/bindings/opp/opp-v2.yaml
index 2f05920fce79a..6972d76233aa4 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/opp/opp-v2.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic OPP (Operating Performance Points) Bindings
+title: Generic OPP (Operating Performance Points)
 
 maintainers:
   - Viresh Kumar <viresh.kumar@linaro.org>
diff --git a/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml b/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml
index 977c976ea7994..8d7eb51edcb45 100644
--- a/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/qcom,pcie-ep.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pci/qcom,pcie-ep.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm PCIe Endpoint Controller binding
+title: Qualcomm PCIe Endpoint Controller
 
 maintainers:
   - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
diff --git a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
index 5ba9570ad7bf2..e6f9f5540cc3f 100644
--- a/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/fsl,imx8mq-usb-phy.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/fsl,imx8mq-usb-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale i.MX8MQ USB3 PHY binding
+title: Freescale i.MX8MQ USB3 PHY
 
 maintainers:
   - Li Jun <jun.li@nxp.com>
diff --git a/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml
index 4d91e2f4f247d..ff9f9ca0f19cc 100644
--- a/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml
+++ b/Documentation/devicetree/bindings/phy/fsl,lynx-28g.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/fsl,lynx-28g.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Freescale Lynx 28G SerDes PHY binding
+title: Freescale Lynx 28G SerDes PHY
 
 maintainers:
   - Ioana Ciornei <ioana.ciornei@nxp.com>
diff --git a/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml
index 52815b6c2b88d..5cee4c85ff8bb 100644
--- a/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml
+++ b/Documentation/devicetree/bindings/phy/intel,keembay-phy-usb.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/intel,keembay-phy-usb.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel Keem Bay USB PHY bindings
+title: Intel Keem Bay USB PHY
 
 maintainers:
   - Wan Ahmad Zainie <wan.ahmad.zainie.wan.mohamad@intel.com>
diff --git a/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
index b09e5ba5e127d..361ffc35b16bf 100644
--- a/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
+++ b/Documentation/devicetree/bindings/phy/intel,phy-thunderbay-emmc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/intel,phy-thunderbay-emmc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel Thunder Bay eMMC PHY bindings
+title: Intel Thunder Bay eMMC PHY
 
 maintainers:
   - Srikandan Nandhini <nandhini.srikandan@intel.com>
diff --git a/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml b/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml
index c97043eaa8fbe..be13113f7b47f 100644
--- a/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/marvell,mmp3-usb-phy.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/phy/marvell,mmp3-usb-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell MMP3 USB PHY bindings
+title: Marvell MMP3 USB PHY
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
index 9c2a7345955d4..26f2b887cfc1e 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/phy/mediatek,dsi-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek MIPI Display Serial Interface (DSI) PHY binding
+title: MediaTek MIPI Display Serial Interface (DSI) PHY
 
 maintainers:
   - Chun-Kuang Hu <chunkuang.hu@kernel.org>
diff --git a/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml
index 0d94950b84ca3..6cfdaadec0859 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/phy/mediatek,hdmi-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek High Definition Multimedia Interface (HDMI) PHY binding
+title: MediaTek High Definition Multimedia Interface (HDMI) PHY
 
 maintainers:
   - Chun-Kuang Hu <chunkuang.hu@kernel.org>
diff --git a/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml
index 74cc32c1d2e85..3e62b5d4da616 100644
--- a/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml
+++ b/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/phy/mediatek,ufs-phy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: MediaTek Universal Flash Storage (UFS) M-PHY binding
+title: MediaTek Universal Flash Storage (UFS) M-PHY
 
 maintainers:
   - Stanley Chu <stanley.chu@mediatek.com>
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
index a9e227d8b076a..6a09472740ed9 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/phy/phy-cadence-sierra.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Cadence Sierra PHY binding
+title: Cadence Sierra PHY
 
 description:
   This binding describes the Cadence Sierra PHY. Sierra PHY supports multilink
diff --git a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
index 2fec9e54ad0e6..2ad1faadda2a2 100644
--- a/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/phy/phy-cadence-torrent.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Cadence Torrent SD0801 PHY binding
+title: Cadence Torrent SD0801 PHY
 
 description:
   This binding describes the Cadence SD0801 PHY (also known as Torrent PHY)
diff --git a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
index 801993813b185..5b4c915cc9e56 100644
--- a/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/phy-stm32-usbphyc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 USB HS PHY controller binding
+title: STMicroelectronics STM32 USB HS PHY controller
 
 description:
 
diff --git a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml
index 4dc5205d893ba..445b2467f4f6b 100644
--- a/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml
+++ b/Documentation/devicetree/bindings/phy/phy-tegra194-p2u.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/phy/phy-tegra194-p2u.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: NVIDIA Tegra194 & Tegra234 P2U binding
+title: NVIDIA Tegra194 & Tegra234 P2U
 
 maintainers:
   - Thierry Reding <treding@nvidia.com>
diff --git a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml
index 62dcb84c08aa0..738c92bb75187 100644
--- a/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml
+++ b/Documentation/devicetree/bindings/phy/ti,phy-am654-serdes.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/ti,phy-am654-serdes.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: TI AM654 SERDES binding
+title: TI AM654 SERDES
 
 description:
   This binding describes the TI AM654 SERDES. AM654 SERDES can be configured
diff --git a/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml b/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml
index 51492fe738ec9..617f3c0b3dfb6 100644
--- a/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml
+++ b/Documentation/devicetree/bindings/phy/transmit-amplitude.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/transmit-amplitude.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common PHY and network PCS transmit amplitude property binding
+title: Common PHY and network PCS transmit amplitude property
 
 description:
   Binding describing the peak-to-peak transmit amplitude for common PHYs
diff --git a/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml
index b425483501886..ca0fef6e535e2 100644
--- a/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/intel,lgm-io.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pinctrl/intel,lgm-io.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel Lightning Mountain SoC pinmux & GPIO controller binding
+title: Intel Lightning Mountain SoC pinmux & GPIO controller
 
 maintainers:
   - Rahul Tanwar <rahul.tanwar@linux.intel.com>
diff --git a/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml b/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
index 301db7daf8706..2fd2178d1fa55 100644
--- a/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
+++ b/Documentation/devicetree/bindings/power/avs/qcom,cpr.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/avs/qcom,cpr.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Core Power Reduction (CPR) bindings
+title: Qualcomm Core Power Reduction (CPR)
 
 maintainers:
   - Niklas Cassel <nks@flawful.org>
diff --git a/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml b/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml
index b289388952bf2..85bebebb285bd 100644
--- a/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/dlg,da9150-charger.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/dlg,da9150-charger.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Dialog Semiconductor DA9150 Charger Power Supply bindings
+title: Dialog Semiconductor DA9150 Charger Power Supply
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml b/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml
index d47caf59d2046..7cc94b8729379 100644
--- a/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml
+++ b/Documentation/devicetree/bindings/power/supply/dlg,da9150-fuel-gauge.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/dlg,da9150-fuel-gauge.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Dialog Semiconductor DA9150 Fuel-Gauge Power Supply bindings
+title: Dialog Semiconductor DA9150 Fuel-Gauge Power Supply
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
index 46527038bf227..534388b255a98 100644
--- a/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
+++ b/Documentation/devicetree/bindings/power/supply/ingenic,battery.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/ingenic,battery.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Ingenic JZ47xx battery bindings
+title: Ingenic JZ47xx battery
 
 maintainers:
   - Artur Rojek <contact@artur-rojek.eu>
diff --git a/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml b/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml
index 76cedf95a12c9..d26ed5eabe28c 100644
--- a/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/lltc,lt3651-charger.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/lltc,lt3651-charger.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analog Devices LT3651 Charger Power Supply bindings
+title: Analog Devices LT3651 Charger Power Supply
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
index eeb043f9bb4f2..a7344815ce995 100644
--- a/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
+++ b/Documentation/devicetree/bindings/power/supply/sc2731-charger.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/sc2731-charger.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Spreadtrum SC2731 PMICs battery charger binding
+title: Spreadtrum SC2731 PMICs battery charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml
index d90a838a17441..de43e45a43b7c 100644
--- a/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml
+++ b/Documentation/devicetree/bindings/power/supply/sc27xx-fg.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/sc27xx-fg.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Spreadtrum SC27XX PMICs Fuel Gauge Unit Power Supply Bindings
+title: Spreadtrum SC27XX PMICs Fuel Gauge Unit Power Supply
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
index cd8e9a8907f84..70d563d44c356 100644
--- a/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
+++ b/Documentation/devicetree/bindings/pwm/microchip,corepwm.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/pwm/microchip,corepwm.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Microchip IP corePWM controller bindings
+title: Microchip IP corePWM controller
 
 maintainers:
   - Conor Dooley <conor.dooley@microchip.com>
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
index 38bdaef4fa397..c82f6f885d97d 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-booster.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/regulator/st,stm32-booster.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 booster for ADC analog input switches bindings
+title: STMicroelectronics STM32 booster for ADC analog input switches
 
 maintainers:
   - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
diff --git a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
index a5a27ee0a9e67..c1bf1f90490a0 100644
--- a/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
+++ b/Documentation/devicetree/bindings/regulator/st,stm32-vrefbuf.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/regulator/st,stm32-vrefbuf.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Voltage reference buffer bindings
+title: STMicroelectronics STM32 Voltage reference buffer
 
 description: |
   Some STM32 devices embed a voltage reference buffer which can be used as
diff --git a/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml b/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml
index d892d29a656b3..11cb42a3fdd1a 100644
--- a/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/amlogic,meson-mx-ao-arc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/remoteproc/amlogic,meson-mx-ao-arc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Amlogic Meson AO ARC Remote Processor bindings
+title: Amlogic Meson AO ARC Remote Processor
 
 description:
   Amlogic Meson6, Meson8, Meson8b and Meson8m2 SoCs embed an ARC core
diff --git a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
index 3a1f59ad79e23..e732255b90196 100644
--- a/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/fsl,imx-rproc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/remoteproc/fsl,imx-rproc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: NXP i.MX Co-Processor Bindings
+title: NXP i.MX Co-Processor
 
 description:
   This binding provides support for ARM Cortex M4 Co-processor found on some NXP iMX SoCs.
diff --git a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
index aaaaabad46ea6..85b1e43cab08d 100644
--- a/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/ingenic,vpu.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/remoteproc/ingenic,vpu.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Ingenic Video Processing Unit bindings
+title: Ingenic Video Processing Unit
 
 description:
   Inside the Video Processing Unit (VPU) of the recent JZ47xx SoCs from
diff --git a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
index 7e091eaffc184..895415772d1d6 100644
--- a/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/mtk,scp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/remoteproc/mtk,scp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Mediatek SCP Bindings
+title: Mediatek SCP
 
 maintainers:
   - Tinghan Shen <tinghan.shen@mediatek.com>
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
index db9e0f0c2bea0..c1d9cbc359b4e 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,adsp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/remoteproc/qcom,adsp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm ADSP Peripheral Image Loader binding
+title: Qualcomm ADSP Peripheral Image Loader
 
 maintainers:
   - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
diff --git a/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
index a7711e3c998cc..22219d16df204 100644
--- a/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/qcom,pil-info.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/remoteproc/qcom,pil-info.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm peripheral image loader relocation info binding
+title: Qualcomm peripheral image loader relocation info
 
 maintainers:
   - Bjorn Andersson <bjorn.andersson@linaro.org>
diff --git a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml
index a7d25fa920e56..7e0275d31a3c2 100644
--- a/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/renesas,rcar-rproc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/remoteproc/renesas,rcar-rproc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Renesas R-Car remote processor controller bindings
+title: Renesas R-Car remote processor controller
 
 maintainers:
   - Julien Massot <julien.massot@iot.bzh>
diff --git a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
index da50f0e99fe2c..66b1e3efdaa3f 100644
--- a/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
+++ b/Documentation/devicetree/bindings/remoteproc/st,stm32-rproc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/remoteproc/st,stm32-rproc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STM32 remote processor controller bindings
+title: STMicroelectronics STM32 remote processor controller
 
 description:
   This document defines the binding for the remoteproc component that loads and
diff --git a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
index 067e71e8ebe8c..9f7590ce6b3d6 100644
--- a/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/intel,ixp46x-rng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/intel,ixp46x-rng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Intel IXP46x RNG bindings
+title: Intel IXP46x RNG
 
 description: |
   The Intel IXP46x has a random number generator at a fixed physical
diff --git a/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml b/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml
index 48ab82abf50ec..4673d6160ad9c 100644
--- a/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/silex-insight,ba431-rng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/silex-insight,ba431-rng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Silex Insight BA431 RNG bindings
+title: Silex Insight BA431 RNG
 
 description: |
   The BA431 hardware random number generator is an IP that is FIPS-140-2/3
diff --git a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
index fcd86f822a9ce..187b172d0cca5 100644
--- a/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
+++ b/Documentation/devicetree/bindings/rng/st,stm32-rng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/st,stm32-rng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 RNG bindings
+title: STMicroelectronics STM32 RNG
 
 description: |
   The STM32 hardware random number generator is a simple fixed purpose
diff --git a/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml b/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml
index 1e17e55762f17..d831322911705 100644
--- a/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml
+++ b/Documentation/devicetree/bindings/rng/xiphera,xip8001b-trng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/xiphera,xip8001b-trng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Xiphera XIP8001B-trng bindings
+title: Xiphera XIP8001B-trng
 
 maintainers:
   - Atte Tommiska <atte.tommiska@xiphera.com>
diff --git a/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml b/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml
index 482e5af215b3d..b04b87ef6f334 100644
--- a/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/sa1100-rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/sa1100-rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell Real Time Clock controller bindings
+title: Marvell Real Time Clock controller
 
 allOf:
   - $ref: rtc.yaml#
diff --git a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
index 764717ce18733..9e66ed33cda48 100644
--- a/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/st,stm32-rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/st,stm32-rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Real Time Clock Bindings
+title: STMicroelectronics STM32 Real Time Clock
 
 maintainers:
   - Gabriel Fernandez <gabriel.fernandez@foss.st.com>
diff --git a/Documentation/devicetree/bindings/serial/8250.yaml b/Documentation/devicetree/bindings/serial/8250.yaml
index 6258f5f59b195..34b8e59aa9d4d 100644
--- a/Documentation/devicetree/bindings/serial/8250.yaml
+++ b/Documentation/devicetree/bindings/serial/8250.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serial/8250.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: UART (Universal Asynchronous Receiver/Transmitter) bindings
+title: UART (Universal Asynchronous Receiver/Transmitter)
 
 maintainers:
   - devicetree@vger.kernel.org
diff --git a/Documentation/devicetree/bindings/serial/rs485.yaml b/Documentation/devicetree/bindings/serial/rs485.yaml
index 90a1bab40f050..789763cf427ac 100644
--- a/Documentation/devicetree/bindings/serial/rs485.yaml
+++ b/Documentation/devicetree/bindings/serial/rs485.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serial/rs485.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: RS485 serial communications Bindings
+title: RS485 serial communications
 
 description: The RTS signal is capable of automatically controlling line
   direction for the built-in half-duplex mode. The properties described
diff --git a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
index 333dc42722d25..85876c668f6d0 100644
--- a/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
+++ b/Documentation/devicetree/bindings/serial/st,stm32-uart.yaml
@@ -7,7 +7,7 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 maintainers:
   - Erwan Le Ray <erwan.leray@foss.st.com>
 
-title: STMicroelectronics STM32 USART bindings
+title: STMicroelectronics STM32 USART
 
 properties:
   compatible:
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml
index 98d087cf4fc03..ab607efbb64c1 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,aoss-qmp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soc/qcom/qcom,aoss-qmp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Always-On Subsystem side channel binding
+title: Qualcomm Always-On Subsystem side channel
 
 maintainers:
   - Bjorn Andersson <bjorn.andersson@linaro.org>
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml
index a6bc3197d5ddc..6026c21736d88 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,apr.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/soc/qcom/qcom,apr.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm APR/GPR (Asynchronous/Generic Packet Router) binding
+title: Qualcomm APR/GPR (Asynchronous/Generic Packet Router)
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml
index 4149cf2b66be3..497614ddf0058 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,smem.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/soc/qcom/qcom,smem.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm Shared Memory Manager binding
+title: Qualcomm Shared Memory Manager
 
 maintainers:
   - Andy Gross <agross@kernel.org>
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml
index 38818c37c3ea8..aca3d40bcccbf 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom,spm.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/soc/qcom/qcom,spm.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm Subsystem Power Manager binding
+title: Qualcomm Subsystem Power Manager
 
 maintainers:
   - Andy Gross <agross@kernel.org>
diff --git a/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml b/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml
index 48eda4d0d391d..7ab8cfff18c19 100644
--- a/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml
+++ b/Documentation/devicetree/bindings/soc/qcom/qcom-stats.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soc/qcom/qcom-stats.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies, Inc. (QTI) Stats bindings
+title: Qualcomm Technologies, Inc. (QTI) Stats
 
 maintainers:
   - Maulik Shah <mkshah@codeaurora.org>
diff --git a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
index 60b49562ff694..a6836904a4f83 100644
--- a/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
+++ b/Documentation/devicetree/bindings/soc/samsung/exynos-usi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/soc/samsung/exynos-usi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Samsung's Exynos USI (Universal Serial Interface) binding
+title: Samsung's Exynos USI (Universal Serial Interface)
 
 maintainers:
   - Sam Protsenko <semen.protsenko@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml b/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml
index 92d896e0d3238..f302fe89a2536 100644
--- a/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml
+++ b/Documentation/devicetree/bindings/sound/marvell,mmp-sspa.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/marvell,mmp-sspa.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvel SSPA Digital Audio Interface Bindings
+title: Marvel SSPA Digital Audio Interface
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
index 5e26b3e9db2cd..bb42220916b35 100644
--- a/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,lpass-cpu.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,lpass-cpu.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Technologies Inc. LPASS CPU dai driver bindings
+title: Qualcomm Technologies Inc. LPASS CPU dai driver
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml b/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml
index 73a4afad5a743..a53c9ef938fa1 100644
--- a/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,q6apm-dai.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/sound/qcom,q6apm-dai.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm Audio Process Manager Digital Audio Interfaces binding
+title: Qualcomm Audio Process Manager Digital Audio Interfaces
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml
index aa6c0ecba5cfc..1168410f6fbd7 100644
--- a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-clocks.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/sound/qcom,q6dsp-lpass-clocks.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm DSP LPASS Clock Controller binding
+title: Qualcomm DSP LPASS Clock Controller
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml
index d8ebf2e528d27..d06f188030a3a 100644
--- a/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,q6dsp-lpass-ports.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/sound/qcom,q6dsp-lpass-ports.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Qualcomm DSP LPASS(Low Power Audio SubSystem) Audio Ports binding
+title: Qualcomm DSP LPASS(Low Power Audio SubSystem) Audio Ports
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
index fa8f4ac209855..e6c817de34497 100644
--- a/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
+++ b/Documentation/devicetree/bindings/spi/aspeed,ast2600-fmc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/aspeed,ast2600-fmc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Aspeed SMC controllers bindings
+title: Aspeed SMC controllers
 
 maintainers:
   - Chin-Ting Kuo <chin-ting_kuo@aspeedtech.com>
diff --git a/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml b/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml
index 0abcac385e7cb..5f4f6b5615d06 100644
--- a/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml
+++ b/Documentation/devicetree/bindings/spi/marvell,mmp2-ssp.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/spi/marvell,mmp2-ssp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: PXA2xx SSP SPI Controller bindings
+title: PXA2xx SSP SPI Controller
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
index 6ec6f556182f4..1eb17f7a4d862 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32-qspi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/st,stm32-qspi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI) bindings
+title: STMicroelectronics STM32 Quad Serial Peripheral Interface (QSPI)
 
 maintainers:
   - Christophe Kerello <christophe.kerello@foss.st.com>
diff --git a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
index 3d64bed266ac7..1cda15f91cc30 100644
--- a/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/st,stm32-spi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/st,stm32-spi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 SPI Controller bindings
+title: STMicroelectronics STM32 SPI Controller
 
 description: |
   The STM32 SPI controller is used to communicate with external devices using
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml
index 16b57f57d1032..b22c8b59d5c7c 100644
--- a/Documentation/devicetree/bindings/thermal/imx-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/imx-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/imx-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX Thermal Binding
+title: NXP i.MX Thermal
 
 maintainers:
   - Shawn Guo <shawnguo@kernel.org>
diff --git a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
index b90726229ac9c..d2c1e4573c327 100644
--- a/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/imx8mm-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/imx8mm-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP i.MX8M Mini Thermal Binding
+title: NXP i.MX8M Mini Thermal
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml b/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml
index 6d65a3cf2af25..76aaa004c8ac5 100644
--- a/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/sprd-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/sprd-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Spreadtrum thermal sensor controller bindings
+title: Spreadtrum thermal sensor controller
 
 maintainers:
   - Orson Zhai <orsonzhai@gmail.com>
diff --git a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
index bee41cff51429..ab043084f6678 100644
--- a/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/st,stm32-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/st,stm32-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 digital thermal sensor (DTS) binding
+title: STMicroelectronics STM32 digital thermal sensor (DTS)
 
 maintainers:
   - Pascal Paillet <p.paillet@foss.st.com>
diff --git a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml
index 7bb9327caa13c..b9022f1613d81 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml
+++ b/Documentation/devicetree/bindings/thermal/thermal-cooling-devices.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/thermal/thermal-cooling-devices.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Thermal cooling device binding
+title: Thermal cooling device
 
 maintainers:
   - Amit Kucheria <amitk@kernel.org>
diff --git a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml
index 0fd6d9ae61965..1b77d542a7b8d 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-idle.yaml
+++ b/Documentation/devicetree/bindings/thermal/thermal-idle.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/thermal/thermal-idle.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Thermal idle cooling device binding
+title: Thermal idle cooling device
 
 maintainers:
   - Daniel Lezcano <daniel.lezcano@linaro.org>
diff --git a/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml b/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml
index 4bd345c71eb83..57565b3fb07c1 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml
+++ b/Documentation/devicetree/bindings/thermal/thermal-sensor.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/thermal/thermal-sensor.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Thermal sensor binding
+title: Thermal sensor
 
 maintainers:
   - Amit Kucheria <amitk@kernel.org>
diff --git a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
index 8d2c6d74b605a..8581821fa4e17 100644
--- a/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
+++ b/Documentation/devicetree/bindings/thermal/thermal-zones.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/thermal/thermal-zones.yaml#
 $schema: http://devicetree.org/meta-schemas/base.yaml#
 
-title: Thermal zone binding
+title: Thermal zone
 
 maintainers:
   - Amit Kucheria <amitk@kernel.org>
diff --git a/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml
index ea14de80ec759..7ed0abe9290f6 100644
--- a/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/ti,am654-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/ti,am654-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments AM654 VTM (DTS) binding
+title: Texas Instruments AM654 VTM (DTS)
 
 maintainers:
   - Keerthy <j-keerthy@ti.com>
diff --git a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
index 0509c9cec224d..171b3622ed847 100644
--- a/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/ti,j72xx-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/ti,j72xx-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Texas Instruments J72XX VTM (DTS) binding
+title: Texas Instruments J72XX VTM (DTS)
 
 maintainers:
   - Keerthy <j-keerthy@ti.com>
diff --git a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml
index 1fbc260a0cbdf..1ee4aab695d38 100644
--- a/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/mrvl,mmp-timer.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/timer/mrvl,mmp-timer.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell MMP Timer bindings
+title: Marvell MMP Timer
 
 maintainers:
   - Daniel Lezcano <daniel.lezcano@linaro.org>
diff --git a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
index 937aa8a563660..9ec11537620ac 100644
--- a/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
+++ b/Documentation/devicetree/bindings/timer/st,stm32-timer.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/timer/st,stm32-timer.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers bindings
+title: STMicroelectronics STM32 general-purpose 16 and 32 bits timers
 
 maintainers:
   - Fabrice Gasnier <fabrice.gasnier@foss.st.com>
diff --git a/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml b/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml
index 0e72c08e65665..e4d893369d57b 100644
--- a/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml
+++ b/Documentation/devicetree/bindings/usb/analogix,anx7411.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/analogix,anx7411.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Analogix ANX7411 Type-C controller bindings
+title: Analogix ANX7411 Type-C controller
 
 maintainers:
   - Xin Ji <xji@analogixsemi.com>
diff --git a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
index dc9d6ed0781d2..cae46c4982adf 100644
--- a/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
+++ b/Documentation/devicetree/bindings/usb/cdns,usb3.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/cdns,usb3.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Cadence USBSS-DRD controller bindings
+title: Cadence USBSS-DRD controller
 
 maintainers:
   - Pawel Laszczak <pawell@cadence.com>
diff --git a/Documentation/devicetree/bindings/usb/dwc2.yaml b/Documentation/devicetree/bindings/usb/dwc2.yaml
index 1ab85489a3f8f..371ba93f3ce58 100644
--- a/Documentation/devicetree/bindings/usb/dwc2.yaml
+++ b/Documentation/devicetree/bindings/usb/dwc2.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/dwc2.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DesignWare HS OTG USB 2.0 controller Bindings
+title: DesignWare HS OTG USB 2.0 controller
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
index c69bbfbcf733d..84b3b69256b1e 100644
--- a/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
+++ b/Documentation/devicetree/bindings/usb/faraday,fotg210.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/usb/faraday,fotg210.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Faraday Technology FOTG210 HS OTG USB 2.0 controller Bindings
+title: Faraday Technology FOTG210 HS OTG USB 2.0 controller
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml b/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml
index 3cf93dd45eb70..a0246aa1f2360 100644
--- a/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml
+++ b/Documentation/devicetree/bindings/usb/marvell,pxau2o-ehci.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/usb/marvell,pxau2o-ehci.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Marvell PXA/MMP EHCI bindings
+title: Marvell PXA/MMP EHCI
 
 maintainers:
   - Lubomir Rintel <lkundrak@v3.sk>
diff --git a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
index f238848ad094d..e2743a4b95208 100644
--- a/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
+++ b/Documentation/devicetree/bindings/usb/nxp,isp1760.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/nxp,isp1760.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: NXP ISP1760 family controller bindings
+title: NXP ISP1760 family controller
 
 maintainers:
   - Sebastian Siewior <bigeasy@linutronix.de>
diff --git a/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml b/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml
index 65a93f7738d5d..e3e87e4d32925 100644
--- a/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml
+++ b/Documentation/devicetree/bindings/usb/richtek,rt1719.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/richtek,rt1719.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Richtek RT1719 sink-only Type-C PD controller bindings
+title: Richtek RT1719 sink-only Type-C PD controller
 
 maintainers:
   - ChiYuan Huang <cy_huang@richtek.com>
diff --git a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
index b8974807b666d..ffcd9897ea38d 100644
--- a/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
+++ b/Documentation/devicetree/bindings/usb/st,stusb160x.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/st,stusb160x.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: STMicroelectronics STUSB160x Type-C controller bindings
+title: STMicroelectronics STUSB160x Type-C controller
 
 maintainers:
   - Amelie Delaunay <amelie.delaunay@foss.st.com>
diff --git a/Documentation/devicetree/bindings/virtio/virtio-device.yaml b/Documentation/devicetree/bindings/virtio/virtio-device.yaml
index 1778ea9b5aa50..8c6919ba9497e 100644
--- a/Documentation/devicetree/bindings/virtio/virtio-device.yaml
+++ b/Documentation/devicetree/bindings/virtio/virtio-device.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/virtio/virtio-device.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Virtio device bindings
+title: Virtio device
 
 maintainers:
   - Viresh Kumar <viresh.kumar@linaro.org>
diff --git a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
index 39736449ba646..a8e266f80c20e 100644
--- a/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
+++ b/Documentation/devicetree/bindings/watchdog/st,stm32-iwdg.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/watchdog/st,stm32-iwdg.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: STMicroelectronics STM32 Independent WatchDoG (IWDG) bindings
+title: STMicroelectronics STM32 Independent WatchDoG (IWDG)
 
 maintainers:
   - Yannick Fertre <yannick.fertre@foss.st.com>
-- 
GitLab


From 9d69d47fd399137d41b744065aab2f9677ccc377 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:13 +0100
Subject: [PATCH 4070/4122] dt-bindings: drop redundant part of title
 (beginning)

The Devicetree bindings document does not have to say in the title that
it is a "Devicetree binding", but instead just describe the hardware.

Drop beginning "Devicetree bindings" in various forms:

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -exec sed -i -e 's/^title: [dD]evice[ -]\?[tT]ree [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -exec sed -i -e 's/^title: [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -exec sed -i -e 's/^title: [dD][tT] [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2/' {} \;

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> # input
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Sebastian Reichel <sre@kernel.org> # power
Link: https://lore.kernel.org/r/20221216163815.522628-8-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml     | 2 +-
 Documentation/devicetree/bindings/clock/fixed-clock.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml | 2 +-
 Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml   | 2 +-
 Documentation/devicetree/bindings/clock/idt,versaclock5.yaml    | 2 +-
 Documentation/devicetree/bindings/clock/renesas,9series.yaml    | 2 +-
 Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml       | 2 +-
 .../devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml | 2 +-
 .../devicetree/bindings/display/bridge/intel,keembay-dsi.yaml   | 2 +-
 .../devicetree/bindings/display/intel,keembay-display.yaml      | 2 +-
 .../devicetree/bindings/display/intel,keembay-msscam.yaml       | 2 +-
 Documentation/devicetree/bindings/display/msm/gmu.yaml          | 2 +-
 Documentation/devicetree/bindings/display/msm/gpu.yaml          | 2 +-
 .../devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml | 2 +-
 .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml   | 2 +-
 .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml   | 2 +-
 .../devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml   | 2 +-
 .../devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml   | 2 +-
 Documentation/devicetree/bindings/i2c/i2c-gpio.yaml             | 2 +-
 Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml         | 2 +-
 .../devicetree/bindings/iio/adc/sigma-delta-modulator.yaml      | 2 +-
 Documentation/devicetree/bindings/input/gpio-keys.yaml          | 2 +-
 Documentation/devicetree/bindings/input/microchip,cap11xx.yaml  | 2 +-
 .../devicetree/bindings/interrupt-controller/renesas,irqc.yaml  | 2 +-
 Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml | 2 +-
 Documentation/devicetree/bindings/leds/register-bit-led.yaml    | 2 +-
 Documentation/devicetree/bindings/leds/regulator-led.yaml       | 2 +-
 Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml         | 2 +-
 Documentation/devicetree/bindings/net/ingenic,mac.yaml          | 2 +-
 Documentation/devicetree/bindings/power/supply/bq2415x.yaml     | 2 +-
 Documentation/devicetree/bindings/power/supply/bq24190.yaml     | 2 +-
 Documentation/devicetree/bindings/power/supply/bq24257.yaml     | 2 +-
 Documentation/devicetree/bindings/power/supply/bq24735.yaml     | 2 +-
 Documentation/devicetree/bindings/power/supply/bq25890.yaml     | 2 +-
 Documentation/devicetree/bindings/power/supply/isp1704.yaml     | 2 +-
 .../devicetree/bindings/power/supply/lltc,ltc294x.yaml          | 2 +-
 .../devicetree/bindings/power/supply/richtek,rt9455.yaml        | 2 +-
 Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml   | 2 +-
 Documentation/devicetree/bindings/regulator/pwm-regulator.yaml  | 2 +-
 Documentation/devicetree/bindings/rng/ingenic,rng.yaml          | 2 +-
 Documentation/devicetree/bindings/rng/ingenic,trng.yaml         | 2 +-
 Documentation/devicetree/bindings/serial/8250_omap.yaml         | 2 +-
 Documentation/devicetree/bindings/serio/ps2-gpio.yaml           | 2 +-
 Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml       | 2 +-
 Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml   | 2 +-
 Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml       | 2 +-
 Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml       | 2 +-
 Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml       | 2 +-
 Documentation/devicetree/bindings/timer/ingenic,sysost.yaml     | 2 +-
 Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml    | 2 +-
 Documentation/devicetree/bindings/usb/realtek,rts5411.yaml      | 2 +-
 Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml         | 2 +-
 Documentation/devicetree/bindings/usb/ti,usb8041.yaml           | 2 +-
 53 files changed, 53 insertions(+), 53 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
index 983033fe5b177..5e942bccf2778 100644
--- a/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
+++ b/Documentation/devicetree/bindings/clock/adi,axi-clkgen.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/adi,axi-clkgen.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for Analog Devices AXI clkgen pcore clock generator
+title: Analog Devices AXI clkgen pcore clock generator
 
 maintainers:
   - Lars-Peter Clausen <lars@metafoo.de>
diff --git a/Documentation/devicetree/bindings/clock/fixed-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-clock.yaml
index b657ecd0ef1c3..b0a4fb8256e2f 100644
--- a/Documentation/devicetree/bindings/clock/fixed-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/fixed-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fixed-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for simple fixed-rate clock sources
+title: Simple fixed-rate clock sources
 
 maintainers:
   - Michael Turquette <mturquette@baylibre.com>
diff --git a/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml
index 0b02378a3a0c8..8f71ab3004706 100644
--- a/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/fixed-factor-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fixed-factor-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for simple fixed factor rate clock sources
+title: Simple fixed factor rate clock sources
 
 maintainers:
   - Michael Turquette <mturquette@baylibre.com>
diff --git a/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml
index 1453ac849a659..e22fc272d0231 100644
--- a/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/fixed-mmio-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fixed-mmio-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for simple memory mapped IO fixed-rate clock sources
+title: Simple memory mapped IO fixed-rate clock sources
 
 description:
   This binding describes a fixed-rate clock for which the frequency can
diff --git a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
index f9ba9864d8b52..61b246cf5e72a 100644
--- a/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
+++ b/Documentation/devicetree/bindings/clock/idt,versaclock5.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/idt,versaclock5.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for IDT VersaClock 5 and 6 programmable I2C clock generators
+title: IDT VersaClock 5 and 6 programmable I2C clock generators
 
 description: |
   The IDT VersaClock 5 and VersaClock 6 are programmable I2C
diff --git a/Documentation/devicetree/bindings/clock/renesas,9series.yaml b/Documentation/devicetree/bindings/clock/renesas,9series.yaml
index 102eb95cb3fcd..6b6cec3fba528 100644
--- a/Documentation/devicetree/bindings/clock/renesas,9series.yaml
+++ b/Documentation/devicetree/bindings/clock/renesas,9series.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/renesas,9series.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for Renesas 9-series I2C PCIe clock generators
+title: Renesas 9-series I2C PCIe clock generators
 
 description: |
   The Renesas 9-series are I2C PCIe clock generators providing
diff --git a/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml
index c56f911fff475..d525f96cf244b 100644
--- a/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml
+++ b/Documentation/devicetree/bindings/clock/ti/ti,clksel.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/ti/ti,clksel.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for TI clksel clock
+title: TI clksel clock
 
 maintainers:
   - Tony Lindgren <tony@atomide.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml
index 89490fdffeb07..0b27df429bdce 100644
--- a/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/ingenic,jz4780-hdmi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/ingenic,jz4780-hdmi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for Ingenic JZ4780 HDMI Transmitter
+title: Ingenic JZ4780 HDMI Transmitter
 
 maintainers:
   - H. Nikolaus Schaller <hns@goldelico.com>
diff --git a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
index dcb1336ee2a56..958a073f4ff77 100644
--- a/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/bridge/intel,keembay-dsi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/bridge/intel,keembay-dsi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Devicetree bindings for Intel Keem Bay mipi dsi controller
+title: Intel Keem Bay mipi dsi controller
 
 maintainers:
   - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
diff --git a/Documentation/devicetree/bindings/display/intel,keembay-display.yaml b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml
index bc6622b010cab..2cf54ecc707aa 100644
--- a/Documentation/devicetree/bindings/display/intel,keembay-display.yaml
+++ b/Documentation/devicetree/bindings/display/intel,keembay-display.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/intel,keembay-display.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Devicetree bindings for Intel Keem Bay display controller
+title: Intel Keem Bay display controller
 
 maintainers:
   - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
diff --git a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
index a222b52d8b8ff..cc7e1f318fe43 100644
--- a/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
+++ b/Documentation/devicetree/bindings/display/intel,keembay-msscam.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/intel,keembay-msscam.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Devicetree bindings for Intel Keem Bay MSSCAM
+title: Intel Keem Bay MSSCAM
 
 maintainers:
   - Anitha Chrisanthus <anitha.chrisanthus@intel.com>
diff --git a/Documentation/devicetree/bindings/display/msm/gmu.yaml b/Documentation/devicetree/bindings/display/msm/gmu.yaml
index 67fdeeabae0cc..ab14e81cb0506 100644
--- a/Documentation/devicetree/bindings/display/msm/gmu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/gmu.yaml
@@ -6,7 +6,7 @@
 $id: "http://devicetree.org/schemas/display/msm/gmu.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Devicetree bindings for the GMU attached to certain Adreno GPUs
+title: GMU attached to certain Adreno GPUs
 
 maintainers:
   - Rob Clark <robdclark@gmail.com>
diff --git a/Documentation/devicetree/bindings/display/msm/gpu.yaml b/Documentation/devicetree/bindings/display/msm/gpu.yaml
index ec4b1a75f46ac..c5f49842dc7b5 100644
--- a/Documentation/devicetree/bindings/display/msm/gpu.yaml
+++ b/Documentation/devicetree/bindings/display/msm/gpu.yaml
@@ -5,7 +5,7 @@
 $id: "http://devicetree.org/schemas/display/msm/gpu.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Devicetree bindings for the Adreno or Snapdragon GPUs
+title: Adreno or Snapdragon GPUs
 
 maintainers:
   - Rob Clark <robdclark@gmail.com>
diff --git a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
index 2329d9610f832..9f97598efdfab 100644
--- a/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
+++ b/Documentation/devicetree/bindings/display/panel/olimex,lcd-olinuxino.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/display/panel/olimex,lcd-olinuxino.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for Olimex Ltd. LCD-OLinuXino bridge panel.
+title: Olimex Ltd. LCD-OLinuXino bridge panel.
 
 maintainers:
   - Stefan Mavrodiev <stefan@olimex.com>
diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
index 3cf8629764483..ed9554c837ef7 100644
--- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvdec.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvdec.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree binding for NVIDIA Tegra NVDEC
+title: NVIDIA Tegra NVDEC
 
 description: |
   NVDEC is the hardware video decoder present on NVIDIA Tegra210
diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml
index e63ae1a008183..8199e5fa82117 100644
--- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvenc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvenc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree binding for NVIDIA Tegra NVENC
+title: NVIDIA Tegra NVENC
 
 description: |
   NVENC is the hardware video encoder present on NVIDIA Tegra210
diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml
index 8647404d67e4f..895fb346ac72c 100644
--- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra210-nvjpg.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra210-nvjpg.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree binding for NVIDIA Tegra NVJPG
+title: NVIDIA Tegra NVJPG
 
 description: |
   NVJPG is the hardware JPEG decoder and encoder present on NVIDIA Tegra210
diff --git a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
index 7cc2dd525a96d..4bdc19a2bccfb 100644
--- a/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
+++ b/Documentation/devicetree/bindings/gpu/host1x/nvidia,tegra234-nvdec.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/gpu/host1x/nvidia,tegra234-nvdec.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree binding for NVIDIA Tegra234 NVDEC
+title: NVIDIA Tegra234 NVDEC
 
 description: |
   NVDEC is the hardware video decoder present on NVIDIA Tegra210
diff --git a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml
index fd040284561f8..e0d76d5eb1031 100644
--- a/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml
+++ b/Documentation/devicetree/bindings/i2c/i2c-gpio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i2c/i2c-gpio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for GPIO bitbanged I2C
+title: GPIO bitbanged I2C
 
 maintainers:
   - Wolfram Sang <wsa@kernel.org>
diff --git a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml
index db0843be91c56..781108ae1ce3b 100644
--- a/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml
+++ b/Documentation/devicetree/bindings/i2c/ti,omap4-i2c.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/i2c/ti,omap4-i2c.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for I2C controllers on TI's OMAP and K3 SoCs
+title: I2C controllers on TI's OMAP and K3 SoCs
 
 maintainers:
   - Vignesh Raghavendra <vigneshr@ti.com>
diff --git a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
index 2287697f1f61c..cab0d425eaa42 100644
--- a/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
+++ b/Documentation/devicetree/bindings/iio/adc/sigma-delta-modulator.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/iio/adc/sigma-delta-modulator.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Device-Tree bindings for sigma delta modulator
+title: Sigma delta modulator
 
 maintainers:
   - Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
diff --git a/Documentation/devicetree/bindings/input/gpio-keys.yaml b/Documentation/devicetree/bindings/input/gpio-keys.yaml
index 17ac9dff79720..159cd9d9fe573 100644
--- a/Documentation/devicetree/bindings/input/gpio-keys.yaml
+++ b/Documentation/devicetree/bindings/input/gpio-keys.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/gpio-keys.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Device-Tree bindings for GPIO attached keys
+title: GPIO attached keys
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
index 96358b12f9b20..67d4d8f86a2d8 100644
--- a/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
+++ b/Documentation/devicetree/bindings/input/microchip,cap11xx.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/input/microchip,cap11xx.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree bindings for Microchip CAP11xx based capacitive touch sensors
+title: Microchip CAP11xx based capacitive touch sensors
 
 description: |
   The Microchip CAP1xxx Family of RightTouchTM multiple-channel capacitive
diff --git a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
index 62fd47c88275d..95033cb514fbd 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
+++ b/Documentation/devicetree/bindings/interrupt-controller/renesas,irqc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/interrupt-controller/renesas,irqc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: DT bindings for the R-Mobile/R-Car/RZ/G interrupt controller
+title: R-Mobile/R-Car/RZ/G interrupt controller
 
 maintainers:
   - Geert Uytterhoeven <geert+renesas@glider.be>
diff --git a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml
index 4c15693f7a013..9acdb78955147 100644
--- a/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml
+++ b/Documentation/devicetree/bindings/leds/backlight/qcom-wled.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/backlight/qcom-wled.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for Qualcomm Technologies, Inc. WLED driver
+title: Qualcomm Technologies, Inc. WLED driver
 
 maintainers:
   - Bjorn Andersson <bjorn.andersson@linaro.org>
diff --git a/Documentation/devicetree/bindings/leds/register-bit-led.yaml b/Documentation/devicetree/bindings/leds/register-bit-led.yaml
index 79b8fc0f9d235..ed26ec19ecbd8 100644
--- a/Documentation/devicetree/bindings/leds/register-bit-led.yaml
+++ b/Documentation/devicetree/bindings/leds/register-bit-led.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/register-bit-led.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Device Tree Bindings for Register Bit LEDs
+title: Register Bit LEDs
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/leds/regulator-led.yaml b/Documentation/devicetree/bindings/leds/regulator-led.yaml
index 3e020d700c00f..4ef7b96e9a086 100644
--- a/Documentation/devicetree/bindings/leds/regulator-led.yaml
+++ b/Documentation/devicetree/bindings/leds/regulator-led.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/regulator-led.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Device Tree Bindings for Regulator LEDs
+title: Regulator LEDs
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
index 83be9e93d221b..4053de758db60 100644
--- a/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
+++ b/Documentation/devicetree/bindings/mmc/arasan,sdhci.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/mmc/arasan,sdhci.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device Tree Bindings for the Arasan SDHCI Controller
+title: Arasan SDHCI Controller
 
 maintainers:
   - Adrian Hunter <adrian.hunter@intel.com>
diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
index 93b3e991d209a..bdea101c2f755 100644
--- a/Documentation/devicetree/bindings/net/ingenic,mac.yaml
+++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/ingenic,mac.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for MAC in Ingenic SoCs
+title: MAC in Ingenic SoCs
 
 maintainers:
   - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
diff --git a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml
index a3c00e0789183..f7287ffd4b12c 100644
--- a/Documentation/devicetree/bindings/power/supply/bq2415x.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq2415x.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/bq2415x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for TI bq2415x Li-Ion Charger
+title: TI bq2415x Li-Ion Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/bq24190.yaml b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
index 4884ec90e2b8b..001c0ffb408d9 100644
--- a/Documentation/devicetree/bindings/power/supply/bq24190.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq24190.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/bq24190.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for TI BQ2419x Li-Ion Battery Charger
+title: TI BQ2419x Li-Ion Battery Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/bq24257.yaml b/Documentation/devicetree/bindings/power/supply/bq24257.yaml
index c7406bef0fa8f..cc45939d385bd 100644
--- a/Documentation/devicetree/bindings/power/supply/bq24257.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq24257.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/bq24257.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for bq24250, bq24251 and bq24257 Li-Ion Charger
+title: Bq24250, bq24251 and bq24257 Li-Ion Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/bq24735.yaml b/Documentation/devicetree/bindings/power/supply/bq24735.yaml
index dd9176ce71b34..388ee16f8a1e4 100644
--- a/Documentation/devicetree/bindings/power/supply/bq24735.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq24735.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/bq24735.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for TI BQ24735 Li-Ion Battery Charger
+title: TI BQ24735 Li-Ion Battery Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/bq25890.yaml b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
index 204c0147188f2..6273b1c5412b8 100644
--- a/Documentation/devicetree/bindings/power/supply/bq25890.yaml
+++ b/Documentation/devicetree/bindings/power/supply/bq25890.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/bq25890.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for bq25890, bq25892, bq25895 and bq25896 Li-Ion Charger
+title: Bq25890, bq25892, bq25895 and bq25896 Li-Ion Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/isp1704.yaml b/Documentation/devicetree/bindings/power/supply/isp1704.yaml
index 7e3449ed70d4d..fb3a812aa5a9c 100644
--- a/Documentation/devicetree/bindings/power/supply/isp1704.yaml
+++ b/Documentation/devicetree/bindings/power/supply/isp1704.yaml
@@ -5,7 +5,7 @@
 $id: http://devicetree.org/schemas/power/supply/isp1704.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for NXP ISP1704 USB Charger Detection
+title: NXP ISP1704 USB Charger Detection
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml b/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml
index 109b41a0d56c2..774582cd3a2c3 100644
--- a/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml
+++ b/Documentation/devicetree/bindings/power/supply/lltc,ltc294x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/lltc,ltc294x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for LTC2941, LTC2942, LTC2943 and LTC2944 battery fuel gauges
+title: LTC2941, LTC2942, LTC2943 and LTC2944 battery fuel gauges
 
 description: |
   All chips measure battery capacity.
diff --git a/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml b/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml
index bce15101318e7..27bebc1757ba3 100644
--- a/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml
+++ b/Documentation/devicetree/bindings/power/supply/richtek,rt9455.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/richtek,rt9455.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for Richtek rt9455 battery charger
+title: Richtek rt9455 battery charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml b/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml
index 93654e732cdaa..ce6fbdba8f6b9 100644
--- a/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml
+++ b/Documentation/devicetree/bindings/power/supply/ti,lp8727.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/supply/ti,lp8727.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for TI/National Semiconductor LP8727 Charger
+title: TI/National Semiconductor LP8727 Charger
 
 maintainers:
   - Sebastian Reichel <sre@kernel.org>
diff --git a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
index 82b6f2fde422b..7e58471097f8c 100644
--- a/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
+++ b/Documentation/devicetree/bindings/regulator/pwm-regulator.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/regulator/pwm-regulator.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for the Generic PWM Regulator
+title: Generic PWM Regulator
 
 maintainers:
   - Brian Norris <briannorris@chromium.org>
diff --git a/Documentation/devicetree/bindings/rng/ingenic,rng.yaml b/Documentation/devicetree/bindings/rng/ingenic,rng.yaml
index b2e4a6a7f93a4..79a023cbfdbab 100644
--- a/Documentation/devicetree/bindings/rng/ingenic,rng.yaml
+++ b/Documentation/devicetree/bindings/rng/ingenic,rng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/ingenic,rng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for RNG in Ingenic SoCs
+title: RNG in Ingenic SoCs
 
 maintainers:
   - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
diff --git a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
index 044d9a065650c..acaeb63caf249 100644
--- a/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
+++ b/Documentation/devicetree/bindings/rng/ingenic,trng.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rng/ingenic,trng.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for DTRNG in Ingenic SoCs
+title: DTRNG in Ingenic SoCs
 
 maintainers:
   - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
diff --git a/Documentation/devicetree/bindings/serial/8250_omap.yaml b/Documentation/devicetree/bindings/serial/8250_omap.yaml
index 7b34ec8fa90ec..53dc1212ad2e3 100644
--- a/Documentation/devicetree/bindings/serial/8250_omap.yaml
+++ b/Documentation/devicetree/bindings/serial/8250_omap.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serial/8250_omap.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for 8250 compliant UARTs on TI's OMAP2+ and K3 SoCs
+title: 8250 compliant UARTs on TI's OMAP2+ and K3 SoCs
 
 maintainers:
   - Vignesh Raghavendra <vigneshr@ti.com>
diff --git a/Documentation/devicetree/bindings/serio/ps2-gpio.yaml b/Documentation/devicetree/bindings/serio/ps2-gpio.yaml
index a63d9172346f1..99848bc34f6e6 100644
--- a/Documentation/devicetree/bindings/serio/ps2-gpio.yaml
+++ b/Documentation/devicetree/bindings/serio/ps2-gpio.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/serio/ps2-gpio.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for GPIO based PS/2
+title: GPIO based PS/2
 
 maintainers:
   - Danilo Krummrich <danilokrummrich@dk-develop.de>
diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml
index 8ca19f2b0b3de..184e8ccbdd137 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wcd934x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,wcd934x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for Qualcomm WCD9340/WCD9341 Audio Codec
+title: Qualcomm WCD9340/WCD9341 Audio Codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml
index 49a267b306f63..b430dd3e1841a 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x-sdw.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,wcd938x-sdw.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for Qualcomm SoundWire Slave devices on WCD9380/WCD9385
+title: Qualcomm SoundWire Slave devices on WCD9380/WCD9385
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
index 67d84463eaeb6..018565793a3ec 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wcd938x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,wcd938x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for Qualcomm WCD9380/WCD9385 Audio Codec
+title: Qualcomm WCD9380/WCD9385 Audio Codec
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml
index ea44d03e58caa..d702b489320fa 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wsa881x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,wsa881x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for Qualcomm WSA8810/WSA8815 Class-D Smart Speaker Amplifier
+title: Qualcomm WSA8810/WSA8815 Class-D Smart Speaker Amplifier
 
 maintainers:
   - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
diff --git a/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml b/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml
index 65b0e67f82a30..ba572a7f4f3c0 100644
--- a/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml
+++ b/Documentation/devicetree/bindings/sound/qcom,wsa883x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/sound/qcom,wsa883x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for The Qualcomm WSA8830/WSA8832/WSA8835
+title: Qualcomm WSA8830/WSA8832/WSA8835
   smart speaker amplifier
 
 maintainers:
diff --git a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
index 98648bf9e151d..bdc82d8bce0ef 100644
--- a/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
+++ b/Documentation/devicetree/bindings/timer/ingenic,sysost.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/timer/ingenic,sysost.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Bindings for SYSOST in Ingenic XBurst family SoCs
+title: SYSOST in Ingenic XBurst family SoCs
 
 maintainers:
   - 周琰杰 (Zhou Yanjie) <zhouyanjie@wanyeetech.com>
diff --git a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml
index fd6e7c81426ea..f6cb19efd98b1 100644
--- a/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml
+++ b/Documentation/devicetree/bindings/usb/nvidia,tegra-xudc.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/nvidia,tegra-xudc.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Device tree binding for NVIDIA Tegra XUSB device mode controller (XUDC)
+title: NVIDIA Tegra XUSB device mode controller (XUDC)
 
 description:
   The Tegra XUDC controller supports both USB 2.0 HighSpeed/FullSpeed and
diff --git a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml
index 50f2b505bdeb6..623d04a88a817 100644
--- a/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml
+++ b/Documentation/devicetree/bindings/usb/realtek,rts5411.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/realtek,rts5411.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for the Realtek RTS5411 USB 3.0 hub controller
+title: Realtek RTS5411 USB 3.0 hub controller
 
 maintainers:
   - Matthias Kaehlcke <mka@chromium.org>
diff --git a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
index eedde385d2994..f81ba3e902973 100644
--- a/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,j721e-usb.yaml
@@ -4,7 +4,7 @@
 $id: "http://devicetree.org/schemas/usb/ti,j721e-usb.yaml#"
 $schema: "http://devicetree.org/meta-schemas/core.yaml#"
 
-title: Bindings for the TI wrapper module for the Cadence USBSS-DRD controller
+title: TI wrapper module for the Cadence USBSS-DRD controller
 
 maintainers:
   - Roger Quadros <rogerq@kernel.org>
diff --git a/Documentation/devicetree/bindings/usb/ti,usb8041.yaml b/Documentation/devicetree/bindings/usb/ti,usb8041.yaml
index e04fbd8ab0b7d..88ea6c952c66c 100644
--- a/Documentation/devicetree/bindings/usb/ti,usb8041.yaml
+++ b/Documentation/devicetree/bindings/usb/ti,usb8041.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/ti,usb8041.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding for the TI USB8041 USB 3.0 hub controller
+title: TI USB8041 USB 3.0 hub controller
 
 maintainers:
   - Alexander Stein <alexander.stein@ew.tq-group.com>
-- 
GitLab


From 33cd7c6fffa3c546b3fce3b000d4b83f88c01e0d Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:14 +0100
Subject: [PATCH 4071/4122] dt-bindings: clock: drop redundant part of title

The Devicetree bindings document does not have to say in the title that
it is a "Devicetree binding", but instead just describe the hardware.

Drop "Devicetree bindings" in various forms:

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -exec sed -i -e 's/^title: [dD]evice[ -]\?[tT]ree [cC]lock [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2 Clock Controller/' {} \;

  find Documentation/devicetree/bindings/ -type f -name '*.yaml' \
    -exec sed -i -e 's/^title: [cC]lock [bB]indings\? for \([tT]he \)\?\(.*\)$/title: \u\2 Clock Controller/' {} \;

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Link: https://lore.kernel.org/r/20221216163815.522628-9-krzysztof.kozlowski@linaro.org
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/calxeda.yaml           | 2 +-
 Documentation/devicetree/bindings/clock/imx1-clock.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/imx21-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx23-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx25-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx27-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx28-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx31-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx35-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx5-clock.yaml        | 2 +-
 Documentation/devicetree/bindings/clock/imx6q-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx6sl-clock.yaml      | 2 +-
 Documentation/devicetree/bindings/clock/imx6sll-clock.yaml     | 2 +-
 Documentation/devicetree/bindings/clock/imx6sx-clock.yaml      | 2 +-
 Documentation/devicetree/bindings/clock/imx6ul-clock.yaml      | 2 +-
 Documentation/devicetree/bindings/clock/imx7d-clock.yaml       | 2 +-
 Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml | 2 +-
 Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml | 2 +-
 Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml   | 2 +-
 Documentation/devicetree/bindings/clock/ti,lmk04832.yaml       | 2 +-
 20 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/calxeda.yaml b/Documentation/devicetree/bindings/clock/calxeda.yaml
index a34cbf3c9aaf5..a88fbe20fef16 100644
--- a/Documentation/devicetree/bindings/clock/calxeda.yaml
+++ b/Documentation/devicetree/bindings/clock/calxeda.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/calxeda.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Device Tree Clock bindings for Calxeda highbank platform
+title: Calxeda highbank platform Clock Controller
 
 description: |
   This binding covers the Calxeda SoC internal peripheral and bus clocks
diff --git a/Documentation/devicetree/bindings/clock/imx1-clock.yaml b/Documentation/devicetree/bindings/clock/imx1-clock.yaml
index 56f524780b1a6..7ade4c32aff37 100644
--- a/Documentation/devicetree/bindings/clock/imx1-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx1-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx1-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX1 CPUs
+title: Freescale i.MX1 CPUs Clock Controller
 
 maintainers:
   - Alexander Shiyan <shc_work@mail.ru>
diff --git a/Documentation/devicetree/bindings/clock/imx21-clock.yaml b/Documentation/devicetree/bindings/clock/imx21-clock.yaml
index e2d50544700a2..79cc843703ec1 100644
--- a/Documentation/devicetree/bindings/clock/imx21-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx21-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx21-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX21
+title: Freescale i.MX21 Clock Controller
 
 maintainers:
   - Alexander Shiyan <shc_work@mail.ru>
diff --git a/Documentation/devicetree/bindings/clock/imx23-clock.yaml b/Documentation/devicetree/bindings/clock/imx23-clock.yaml
index 7e890ab9c77d2..5e71c9219500a 100644
--- a/Documentation/devicetree/bindings/clock/imx23-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx23-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx23-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX23
+title: Freescale i.MX23 Clock Controller
 
 maintainers:
   - Shawn Guo <shawnguo@kernel.org>
diff --git a/Documentation/devicetree/bindings/clock/imx25-clock.yaml b/Documentation/devicetree/bindings/clock/imx25-clock.yaml
index 1792e138984b9..c626a158590e2 100644
--- a/Documentation/devicetree/bindings/clock/imx25-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx25-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx25-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX25
+title: Freescale i.MX25 Clock Controller
 
 maintainers:
   - Sascha Hauer <s.hauer@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/clock/imx27-clock.yaml b/Documentation/devicetree/bindings/clock/imx27-clock.yaml
index 99925aa22a4c8..71d78a0b551fc 100644
--- a/Documentation/devicetree/bindings/clock/imx27-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx27-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx27-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX27
+title: Freescale i.MX27 Clock Controller
 
 maintainers:
   - Fabio Estevam <festevam@gmail.com>
diff --git a/Documentation/devicetree/bindings/clock/imx28-clock.yaml b/Documentation/devicetree/bindings/clock/imx28-clock.yaml
index a542d680b1ca8..4aaad7b9c66e9 100644
--- a/Documentation/devicetree/bindings/clock/imx28-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx28-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx28-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX28
+title: Freescale i.MX28 Clock Controller
 
 maintainers:
   - Shawn Guo <shawnguo@kernel.org>
diff --git a/Documentation/devicetree/bindings/clock/imx31-clock.yaml b/Documentation/devicetree/bindings/clock/imx31-clock.yaml
index 168c8ada5e81c..50a8498eef8a2 100644
--- a/Documentation/devicetree/bindings/clock/imx31-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx31-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx31-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX31
+title: Freescale i.MX31 Clock Controller
 
 maintainers:
   - Fabio Estevam <festevam@gmail.com>
diff --git a/Documentation/devicetree/bindings/clock/imx35-clock.yaml b/Documentation/devicetree/bindings/clock/imx35-clock.yaml
index 6415bb6a8d041..c063369de3ec7 100644
--- a/Documentation/devicetree/bindings/clock/imx35-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx35-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx35-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX35
+title: Freescale i.MX35 Clock Controller
 
 maintainers:
   - Steffen Trumtrar <s.trumtrar@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/clock/imx5-clock.yaml b/Documentation/devicetree/bindings/clock/imx5-clock.yaml
index c0e19ff92c767..423c0142c1d33 100644
--- a/Documentation/devicetree/bindings/clock/imx5-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx5-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx5-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX5
+title: Freescale i.MX5 Clock Controller
 
 maintainers:
   - Fabio Estevam <festevam@gmail.com>
diff --git a/Documentation/devicetree/bindings/clock/imx6q-clock.yaml b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml
index 4f4637eddb8b7..bae4fcb3aacc6 100644
--- a/Documentation/devicetree/bindings/clock/imx6q-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx6q-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx6q-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX6 Quad
+title: Freescale i.MX6 Quad Clock Controller
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml
index b83c8f43d664c..c85ff6ea3d245 100644
--- a/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx6sl-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx6sl-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX6 SoloLite
+title: Freescale i.MX6 SoloLite Clock Controller
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml
index 484894a4b23f4..6b549ed1493c3 100644
--- a/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx6sll-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx6sll-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX6 SLL
+title: Freescale i.MX6 SLL Clock Controller
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml
index e6c795657c24d..55dcad18b7c6d 100644
--- a/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx6sx-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx6sx-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX6 SoloX
+title: Freescale i.MX6 SoloX Clock Controller
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml
index 6a51a3f51cd98..be54d4df5afa2 100644
--- a/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx6ul-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx6ul-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX6 UltraLite
+title: Freescale i.MX6 UltraLite Clock Controller
 
 maintainers:
   - Anson Huang <Anson.Huang@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx7d-clock.yaml b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml
index cefb61db01a82..e7d8427e49579 100644
--- a/Documentation/devicetree/bindings/clock/imx7d-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx7d-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx7d-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX7 Dual
+title: Freescale i.MX7 Dual Clock Controller
 
 maintainers:
   - Frank Li <Frank.Li@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml
index 739c3378f8c8f..76842038f52ea 100644
--- a/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx7ulp-pcc-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx7ulp-pcc-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX7ULP Peripheral Clock Control (PCC) modules
+title: Freescale i.MX7ULP Peripheral Clock Control (PCC) modules Clock Controller
 
 maintainers:
   - A.s. Dong <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml
index d06344d7e34fe..5e25bc6d1372e 100644
--- a/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imx7ulp-scg-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imx7ulp-scg-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MX7ULP System Clock Generation (SCG) modules
+title: Freescale i.MX7ULP System Clock Generation (SCG) modules Clock Controller
 
 maintainers:
   - A.s. Dong <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml
index 03fc5c1a29399..777af4aad4b26 100644
--- a/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml
+++ b/Documentation/devicetree/bindings/clock/imxrt1050-clock.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/imxrt1050-clock.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for Freescale i.MXRT
+title: Freescale i.MXRT Clock Controller
 
 maintainers:
   - Giulio Benetti <giulio.benetti@benettiengineering.com>
diff --git a/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml
index bd8173848253f..73d17830f165e 100644
--- a/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml
+++ b/Documentation/devicetree/bindings/clock/ti,lmk04832.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/ti,lmk04832.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Clock bindings for the Texas Instruments LMK04832
+title: Texas Instruments LMK04832 Clock Controller
 
 maintainers:
   - Liam Beguin <liambeguin@gmail.com>
-- 
GitLab


From 3367934dd3035afa72ac79ae649f142a530df157 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Fri, 16 Dec 2022 17:38:15 +0100
Subject: [PATCH 4072/4122] dt-bindings: drop redundant part of title (manual)

The Devicetree bindings document does not have to say in the title that
it is a "Devicetree binding" or a "schema", but instead just describe
the hardware.

Manual updates to various binding titles, including capitalizing them.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Ulf Hansson <ulf.hansson@linaro.org> # MMC
Acked-by: Stephen Boyd <sboyd@kernel.org> # clk
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com> # input
Acked-by: Mark Brown <broonie@kernel.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org> # opp
Link: https://lore.kernel.org/r/20221216163815.522628-10-krzysztof.kozlowski@linaro.org
[robh: add trivial-devices.yaml and net/can/microchip,mcp251xfd.yaml]
Signed-off-by: Rob Herring <robh@kernel.org>
---
 Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml | 2 +-
 Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml      | 2 +-
 .../devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml       | 2 +-
 Documentation/devicetree/bindings/example-schema.yaml         | 2 +-
 Documentation/devicetree/bindings/input/fsl,scu-key.yaml      | 2 +-
 Documentation/devicetree/bindings/input/matrix-keymap.yaml    | 2 +-
 Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml   | 2 +-
 Documentation/devicetree/bindings/net/asix,ax88178.yaml       | 2 +-
 .../devicetree/bindings/net/can/microchip,mcp251xfd.yaml      | 4 +---
 Documentation/devicetree/bindings/net/microchip,lan95xx.yaml  | 2 +-
 Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml    | 2 +-
 Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml  | 2 +-
 Documentation/devicetree/bindings/pci/pci-ep.yaml             | 2 +-
 Documentation/devicetree/bindings/phy/calxeda-combophy.yaml   | 2 +-
 .../devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml          | 2 +-
 Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml    | 2 +-
 Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml    | 2 +-
 Documentation/devicetree/bindings/power/fsl,scu-pd.yaml       | 2 +-
 Documentation/devicetree/bindings/riscv/cpus.yaml             | 2 +-
 Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml        | 2 +-
 Documentation/devicetree/bindings/spi/omap-spi.yaml           | 2 +-
 .../devicetree/bindings/thermal/fsl,scu-thermal.yaml          | 2 +-
 Documentation/devicetree/bindings/trivial-devices.yaml        | 2 +-
 Documentation/devicetree/bindings/usb/usb-device.yaml         | 2 +-
 Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml   | 2 +-
 25 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml
index 82836086cac14..d416c374e8534 100644
--- a/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml
+++ b/Documentation/devicetree/bindings/clock/cirrus,cs2000-cp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/cirrus,cs2000-cp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Binding CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier
+title: CIRRUS LOGIC Fractional-N Clock Synthesizer & Clock Multiplier
 
 maintainers:
   - Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
diff --git a/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml
index f2c48460a3997..36d4cfc3c2f8d 100644
--- a/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml
+++ b/Documentation/devicetree/bindings/clock/fsl,scu-clk.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/fsl,scu-clk.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - Clock bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - Clock Controller Based on SCU Message Protocol
 
 maintainers:
   - Abel Vesa <abel.vesa@nxp.com>
diff --git a/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml
index 28c13237059fa..3cb996b2c9d59 100644
--- a/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml
+++ b/Documentation/devicetree/bindings/clock/qcom,dispcc-sc8280xp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/clock/qcom,dispcc-sc8280xp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm Display Clock & Reset Controller Binding for SC8280XP
+title: Qualcomm Display Clock & Reset Controller on SC8280XP
 
 maintainers:
   - Bjorn Andersson <bjorn.andersson@linaro.org>
diff --git a/Documentation/devicetree/bindings/example-schema.yaml b/Documentation/devicetree/bindings/example-schema.yaml
index 8e1a8b19d4294..dfcf4c27d44a3 100644
--- a/Documentation/devicetree/bindings/example-schema.yaml
+++ b/Documentation/devicetree/bindings/example-schema.yaml
@@ -11,7 +11,7 @@ $id: http://devicetree.org/schemas/example-schema.yaml#
 # $schema is the meta-schema this schema should be validated with.
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: An example schema annotated with jsonschema details
+title: An Example Device
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
index e6266d1882665..e5a3c355ee1f0 100644
--- a/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
+++ b/Documentation/devicetree/bindings/input/fsl,scu-key.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/fsl,scu-key.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - SCU key bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - SCU Key Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/input/matrix-keymap.yaml b/Documentation/devicetree/bindings/input/matrix-keymap.yaml
index 6699d5e32dcaf..4d6dbe91646dc 100644
--- a/Documentation/devicetree/bindings/input/matrix-keymap.yaml
+++ b/Documentation/devicetree/bindings/input/matrix-keymap.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/input/matrix-keymap.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Common key matrices binding for matrix-connected key boards
+title: Common Key Matrices on Matrix-connected Key Boards
 
 maintainers:
   - Olof Johansson <olof@lixom.net>
diff --git a/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml b/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml
index 2929382625b69..d1b01bae9f63e 100644
--- a/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml
+++ b/Documentation/devicetree/bindings/leds/issi,is31fl319x.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/leds/issi,is31fl319x.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: ISSI LED controllers bindings for IS31FL319{0,1,3,6,9}
+title: ISSI LED Controllers for IS31FL319{0,1,3,6,9}
 
 maintainers:
   - Vincent Knecht <vincent.knecht@mailoo.org>
diff --git a/Documentation/devicetree/bindings/net/asix,ax88178.yaml b/Documentation/devicetree/bindings/net/asix,ax88178.yaml
index a81dbc4792f68..768504ccbf741 100644
--- a/Documentation/devicetree/bindings/net/asix,ax88178.yaml
+++ b/Documentation/devicetree/bindings/net/asix,ax88178.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/asix,ax88178.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: The device tree bindings for the USB Ethernet controllers
+title: ASIX AX88172/AX88772 USB Ethernet Controllers
 
 maintainers:
   - Oleksij Rempel <o.rempel@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
index 7a73057707b44..fce84aecae776 100644
--- a/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
+++ b/Documentation/devicetree/bindings/net/can/microchip,mcp251xfd.yaml
@@ -4,9 +4,7 @@
 $id: http://devicetree.org/schemas/net/can/microchip,mcp251xfd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title:
-  Microchip MCP2517FD, MCP2518FD and MCP251863 stand-alone CAN
-  controller device tree bindings
+title: Microchip MCP2517FD, MCP2518FD and MCP251863 stand-alone CAN controller
 
 maintainers:
   - Marc Kleine-Budde <mkl@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml
index 3715c5f8f0e0c..0b97e14d947f2 100644
--- a/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml
+++ b/Documentation/devicetree/bindings/net/microchip,lan95xx.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/net/microchip,lan95xx.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: The device tree bindings for the USB Ethernet controllers
+title: Microchip SMSC9500/LAN9530/LAN9730 USB Ethernet Controllers
 
 maintainers:
   - Oleksij Rempel <o.rempel@pengutronix.de>
diff --git a/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml b/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml
index 682688299b26d..f0a49283649d9 100644
--- a/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml
+++ b/Documentation/devicetree/bindings/nvmem/fsl,scu-ocotp.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/nvmem/fsl,scu-ocotp.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - OCOTP bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - OCOTP Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml
index df8442fb11f0c..b9ce2e099ce9a 100644
--- a/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml
+++ b/Documentation/devicetree/bindings/opp/opp-v2-qcom-level.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/opp/opp-v2-qcom-level.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm OPP bindings to describe OPP nodes.
+title: Qualcomm OPP
 
 maintainers:
   - Niklas Cassel <nks@flawful.org>
diff --git a/Documentation/devicetree/bindings/pci/pci-ep.yaml b/Documentation/devicetree/bindings/pci/pci-ep.yaml
index ccec51ab52470..d1eef48252072 100644
--- a/Documentation/devicetree/bindings/pci/pci-ep.yaml
+++ b/Documentation/devicetree/bindings/pci/pci-ep.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pci/pci-ep.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: PCI Endpoint Controller Schema
+title: PCI Endpoint Controller
 
 description: |
   Common properties for PCI Endpoint Controller Nodes.
diff --git a/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml b/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml
index 41ee16e21f8d2..d05a7c7930353 100644
--- a/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml
+++ b/Documentation/devicetree/bindings/phy/calxeda-combophy.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/phy/calxeda-combophy.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Calxeda Highbank Combination PHYs binding for SATA
+title: Calxeda Highbank Combination PHYs for SATA
 
 description: |
   The Calxeda Combination PHYs connect the SoC to the internal fabric
diff --git a/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml b/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml
index 45ea565ce2387..fcd729afeee13 100644
--- a/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/fsl,scu-pinctrl.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pinctrl/fsl,scu-pinctrl.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - Pinctrl bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - Pinctrl Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
index f5a121311f612..be81ed22a036a 100644
--- a/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pincfg-node.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pinctrl/pincfg-node.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic pin configuration node schema
+title: Generic Pin Configuration Node
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
index 551df3d9b8090..008c3ab7f1bb3 100644
--- a/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
+++ b/Documentation/devicetree/bindings/pinctrl/pinmux-node.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/pinctrl/pinmux-node.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Generic pin multiplexing node schema
+title: Generic Pin Multiplexing Node
 
 maintainers:
   - Linus Walleij <linus.walleij@linaro.org>
diff --git a/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml b/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml
index 1f72b18ca0fcf..407b7cfec783c 100644
--- a/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml
+++ b/Documentation/devicetree/bindings/power/fsl,scu-pd.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/power/fsl,scu-pd.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - Power domain bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - Power Domain Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 83ad177a9043b..c6720764e765c 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/riscv/cpus.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: RISC-V bindings for 'cpus' DT nodes
+title: RISC-V CPUs
 
 maintainers:
   - Paul Walmsley <paul.walmsley@sifive.com>
diff --git a/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml b/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml
index 8c102b70d735a..dd1b1abf1e1bc 100644
--- a/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml
+++ b/Documentation/devicetree/bindings/rtc/fsl,scu-rtc.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/rtc/fsl,scu-rtc.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - RTC bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - RTC Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/spi/omap-spi.yaml b/Documentation/devicetree/bindings/spi/omap-spi.yaml
index 9952199cae119..352affa4b7f86 100644
--- a/Documentation/devicetree/bindings/spi/omap-spi.yaml
+++ b/Documentation/devicetree/bindings/spi/omap-spi.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/spi/omap-spi.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: SPI controller bindings for OMAP and K3 SoCs
+title: SPI Controller on OMAP and K3 SoCs
 
 maintainers:
   - Aswath Govindraju <a-govindraju@ti.com>
diff --git a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml
index f9e4b3c8d0eeb..3721c8c8ec640 100644
--- a/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml
+++ b/Documentation/devicetree/bindings/thermal/fsl,scu-thermal.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/thermal/fsl,scu-thermal.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - Thermal bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - Thermal Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
diff --git a/Documentation/devicetree/bindings/trivial-devices.yaml b/Documentation/devicetree/bindings/trivial-devices.yaml
index 61746755c1077..f5c0a6283e61f 100644
--- a/Documentation/devicetree/bindings/trivial-devices.yaml
+++ b/Documentation/devicetree/bindings/trivial-devices.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/trivial-devices.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Trivial I2C and SPI devices that have simple device tree bindings
+title: Trivial I2C and SPI devices
 
 maintainers:
   - Rob Herring <robh@kernel.org>
diff --git a/Documentation/devicetree/bindings/usb/usb-device.yaml b/Documentation/devicetree/bindings/usb/usb-device.yaml
index b77960a7a37ba..7a771125ec76c 100644
--- a/Documentation/devicetree/bindings/usb/usb-device.yaml
+++ b/Documentation/devicetree/bindings/usb/usb-device.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/usb/usb-device.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: The device tree bindings for the Generic USB Device
+title: Generic USB Device
 
 maintainers:
   - Greg Kroah-Hartman <gregkh@linuxfoundation.org>
diff --git a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml
index f84c45d687d7b..47701248cd8d4 100644
--- a/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml
+++ b/Documentation/devicetree/bindings/watchdog/fsl,scu-wdt.yaml
@@ -4,7 +4,7 @@
 $id: http://devicetree.org/schemas/watchdog/fsl,scu-wdt.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: i.MX SCU Client Device Node - Watchdog bindings based on SCU Message Protocol
+title: i.MX SCU Client Device Node - Watchdog Based on SCU Message Protocol
 
 maintainers:
   - Dong Aisheng <aisheng.dong@nxp.com>
-- 
GitLab


From dcde56bb37a9f900c11eec56b0ecaca5653cc829 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Fri, 16 Dec 2022 13:04:15 -0600
Subject: [PATCH 4073/4122] Fix mismerge due to devnode now taking a 'const *'
 device

This was a mismerge of a semantic conflict in my merge of the driver
core updates, where commit ff62b8e6588f ("driver core: make struct
class.devnode() take a const *") changed the devnode function pointer
type.  In the meantime, the drm tree I merged earlier had introduced a
new use of that in commit 8bf4889762a8 ("drivers/accel: define kconfig
and register a new major").

And of course this happens when I'm traveling with my laptop, and thus
didn't do a full allmodconfig build between every pull and before
pushing my work out.

So I only noticed later as I was doing my full build.

Brown-paper-bag-by: Linus Torvalds <torvalds@linux-foundation.org>
Fixes: 71a7507afbc3 Merge tag 'driver-core-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/accel/drm_accel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c
index a5ee84a4017a3..1b69824286fd3 100644
--- a/drivers/accel/drm_accel.c
+++ b/drivers/accel/drm_accel.c
@@ -27,7 +27,7 @@ static struct device_type accel_sysfs_device_minor = {
 	.name = "accel_minor"
 };
 
-static char *accel_devnode(struct device *dev, umode_t *mode)
+static char *accel_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "accel/%s", dev_name(dev));
 }
-- 
GitLab


From 5244ca88671a1981ceec09c5c8809f003e6a62aa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Dec 2022 18:03:43 +0100
Subject: [PATCH 4074/4122] RDMA/siw: Fix pointer cast warning

The previous build fix left a remaining issue in configurations with
64-bit dma_addr_t on 32-bit architectures:

drivers/infiniband/sw/siw/siw_qp_tx.c: In function 'siw_get_pblpage':
drivers/infiniband/sw/siw/siw_qp_tx.c:32:37: error: cast to pointer from integer of different size [-Werror=int-to-pointer-cast]
   32 |                 return virt_to_page((void *)paddr);
      |                                     ^

Use the same double cast here that the driver uses elsewhere to convert
between dma_addr_t and void*.

Fixes: 0d1b756acf60 ("RDMA/siw: Pass a pointer to virt_to_page()")
Link: https://lore.kernel.org/r/20221215170347.2612403-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bernard Metzler <bmt@zurich.ibm.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/infiniband/sw/siw/siw_qp_tx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/siw/siw_qp_tx.c b/drivers/infiniband/sw/siw/siw_qp_tx.c
index 7d47b521070b1..05052b49107f2 100644
--- a/drivers/infiniband/sw/siw/siw_qp_tx.c
+++ b/drivers/infiniband/sw/siw/siw_qp_tx.c
@@ -29,7 +29,7 @@ static struct page *siw_get_pblpage(struct siw_mem *mem, u64 addr, int *idx)
 	dma_addr_t paddr = siw_pbl_get_buffer(pbl, offset, NULL, idx);
 
 	if (paddr)
-		return virt_to_page((void *)paddr);
+		return virt_to_page((void *)(uintptr_t)paddr);
 
 	return NULL;
 }
-- 
GitLab


From 078838f5b9c9203e94d7724f997392ea8012ea6a Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Dec 2022 17:39:05 +0100
Subject: [PATCH 4075/4122] net: ethernet: ti: am65-cpsw: fix CONFIG_PM #ifdef

The #ifdef check is incorrect and leads to a warning:

drivers/net/ethernet/ti/am65-cpsw-nuss.c:1679:13: error: 'am65_cpsw_nuss_remove_rx_chns' defined but not used [-Werror=unused-function]
 1679 | static void am65_cpsw_nuss_remove_rx_chns(void *data)

It's better to remove the #ifdef here and use the modern
SYSTEM_SLEEP_PM_OPS() macro instead.

Fixes: 24bc19b05f1f ("net: ethernet: ti: am65-cpsw: Add suspend/resume support")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/20221215163918.611609-1-arnd@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 9decb0c7961bb..ecbde83b52436 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2878,7 +2878,6 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
 static int am65_cpsw_nuss_suspend(struct device *dev)
 {
 	struct am65_cpsw_common *common = dev_get_drvdata(dev);
@@ -2964,10 +2963,9 @@ static int am65_cpsw_nuss_resume(struct device *dev)
 
 	return 0;
 }
-#endif /* CONFIG_PM_SLEEP */
 
 static const struct dev_pm_ops am65_cpsw_nuss_dev_pm_ops = {
-	SET_SYSTEM_SLEEP_PM_OPS(am65_cpsw_nuss_suspend, am65_cpsw_nuss_resume)
+	SYSTEM_SLEEP_PM_OPS(am65_cpsw_nuss_suspend, am65_cpsw_nuss_resume)
 };
 
 static struct platform_driver am65_cpsw_nuss_driver = {
-- 
GitLab


From 214964a13ab56a9757d146b79b468a7ca190fbfb Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Thu, 15 Dec 2022 20:41:22 -0800
Subject: [PATCH 4076/4122] devlink: protect devlink dump by the instance lock

Take the instance lock around devlink_nl_fill() when dumping,
doit takes it already.

We are only dumping basic info so in the worst case we were risking
data races around the reload statistics. Until the big devlink mutex
was removed all relevant code was protected by it, so the missing
instance lock was not exposed.

Fixes: d3efc2a6a6d8 ("net: devlink: remove devlink_mutex")
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Link: https://lore.kernel.org/r/20221216044122.1863550-1-kuba@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/devlink.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index d2df308290832..032d6d0a5ce64 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -1648,10 +1648,13 @@ static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
 			continue;
 		}
 
+		devl_lock(devlink);
 		err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
 				      NETLINK_CB(cb->skb).portid,
 				      cb->nlh->nlmsg_seq, NLM_F_MULTI);
+		devl_unlock(devlink);
 		devlink_put(devlink);
+
 		if (err)
 			goto out;
 		idx++;
-- 
GitLab


From 2d7afdcbc9d32423f177ee12b7c93783aea338fb Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Date: Wed, 14 Dec 2022 23:11:58 -0700
Subject: [PATCH 4077/4122] skbuff: Account for tail adjustment during pull
 operations

Extending the tail can have some unexpected side effects if a program uses
a helper like BPF_FUNC_skb_pull_data to read partial content beyond the
head skb headlen when all the skbs in the gso frag_list are linear with no
head_frag -

  kernel BUG at net/core/skbuff.c:4219!
  pc : skb_segment+0xcf4/0xd2c
  lr : skb_segment+0x63c/0xd2c
  Call trace:
   skb_segment+0xcf4/0xd2c
   __udp_gso_segment+0xa4/0x544
   udp4_ufo_fragment+0x184/0x1c0
   inet_gso_segment+0x16c/0x3a4
   skb_mac_gso_segment+0xd4/0x1b0
   __skb_gso_segment+0xcc/0x12c
   udp_rcv_segment+0x54/0x16c
   udp_queue_rcv_skb+0x78/0x144
   udp_unicast_rcv_skb+0x8c/0xa4
   __udp4_lib_rcv+0x490/0x68c
   udp_rcv+0x20/0x30
   ip_protocol_deliver_rcu+0x1b0/0x33c
   ip_local_deliver+0xd8/0x1f0
   ip_rcv+0x98/0x1a4
   deliver_ptype_list_skb+0x98/0x1ec
   __netif_receive_skb_core+0x978/0xc60

Fix this by marking these skbs as GSO_DODGY so segmentation can handle
the tail updates accordingly.

Fixes: 3dcbdb134f32 ("net: gso: Fix skb_segment splat when splitting gso_size mangled skb having linear-headed frag_list")
Signed-off-by: Sean Tranchetti <quic_stranche@quicinc.com>
Signed-off-by: Subash Abhinov Kasiviswanathan <quic_subashab@quicinc.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/1671084718-24796-1-git-send-email-quic_subashab@quicinc.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/core/skbuff.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 3cbba7099c0ff..4a0eb5593275c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2482,6 +2482,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 				insp = list;
 			} else {
 				/* Eaten partially. */
+				if (skb_is_gso(skb) && !list->head_frag &&
+				    skb_headlen(list))
+					skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
 
 				if (skb_shared(list)) {
 					/* Sucks! We need to fork list. :-( */
-- 
GitLab


From 4545c6a3d6ba71747eaa984c338ddd745e56e23f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@kernel.org>
Date: Sat, 17 Dec 2022 10:46:44 +0000
Subject: [PATCH 4078/4122] powerpc/msi: Fix deassociation of MSI descriptors

Since 2f2940d16823 ("genirq/msi: Remove filter from
msi_free_descs_free_range()"), the core MSI code relies on the
msi_desc->irq field to have been cleared before the descriptor
can be freed, as it indicates that there is no association with
a device anymore.

The irq domain code provides this guarantee, and so does s390,
which is one of the two architectures not using irq domains for
MSIs.

Powerpc, however, is missing this particular requirements,
leading in a splat and leaked MSI descriptors.

Adding the now required irq reset to the handful of powerpc backends
that implement MSIs fixes that particular problem.

Reported-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/70dab88e-6119-0c12-7c6a-61bcbe239f66@roeck-us.net
---
 arch/powerpc/platforms/4xx/hsta_msi.c  | 1 +
 arch/powerpc/platforms/cell/axon_msi.c | 1 +
 arch/powerpc/platforms/pasemi/msi.c    | 1 +
 arch/powerpc/sysdev/fsl_msi.c          | 1 +
 arch/powerpc/sysdev/mpic_u3msi.c       | 1 +
 5 files changed, 5 insertions(+)

diff --git a/arch/powerpc/platforms/4xx/hsta_msi.c b/arch/powerpc/platforms/4xx/hsta_msi.c
index d4f7fff1fc871..e11b57a62b054 100644
--- a/arch/powerpc/platforms/4xx/hsta_msi.c
+++ b/arch/powerpc/platforms/4xx/hsta_msi.c
@@ -115,6 +115,7 @@ static void hsta_teardown_msi_irqs(struct pci_dev *dev)
 		msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
 		pr_debug("%s: Teardown IRQ %u (index %u)\n", __func__,
 			 entry->irq, irq);
+		entry->irq = 0;
 	}
 }
 
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 5b012abca773d..0c11aad896c79 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -289,6 +289,7 @@ static void axon_msi_teardown_msi_irqs(struct pci_dev *dev)
 	msi_for_each_desc(entry, &dev->dev, MSI_DESC_ASSOCIATED) {
 		irq_set_msi_desc(entry->irq, NULL);
 		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
 	}
 }
 
diff --git a/arch/powerpc/platforms/pasemi/msi.c b/arch/powerpc/platforms/pasemi/msi.c
index dc1846660005c..166c97fff16d2 100644
--- a/arch/powerpc/platforms/pasemi/msi.c
+++ b/arch/powerpc/platforms/pasemi/msi.c
@@ -66,6 +66,7 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev)
 		hwirq = virq_to_hw(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
 		msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, ALLOC_CHUNK);
 	}
 }
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index 73c2d70706c0a..57978a44d55b6 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -132,6 +132,7 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev)
 		msi_data = irq_get_chip_data(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
 		msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1);
 	}
 }
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index 1d8cfdfdf115e..492cb03c0b623 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -108,6 +108,7 @@ static void u3msi_teardown_msi_irqs(struct pci_dev *pdev)
 		hwirq = virq_to_hw(entry->irq);
 		irq_set_msi_desc(entry->irq, NULL);
 		irq_dispose_mapping(entry->irq);
+		entry->irq = 0;
 		msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1);
 	}
 }
-- 
GitLab


From 1bc5434632593ea3bb3a1ed2499af8c31796448b Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 16 Nov 2022 21:46:03 +0100
Subject: [PATCH 4079/4122] parisc: Fix inconsistent indenting in
 setup_cmdline()

Fix warning reported by 0-DAY CI Kernel Test Service:
arch/parisc/kernel/setup.c:64 setup_cmdline() warn: inconsistent indenting

Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/setup.c | 26 ++++++++++++--------------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c
index 375f38d6e1a4d..0797db617962b 100644
--- a/arch/parisc/kernel/setup.c
+++ b/arch/parisc/kernel/setup.c
@@ -50,15 +50,15 @@ void __init setup_cmdline(char **cmdline_p)
 	extern unsigned int boot_args[];
 	char *p;
 
-	/* Collect stuff passed in from the boot loader */
+	*cmdline_p = command_line;
 
 	/* boot_args[0] is free-mem start, boot_args[1] is ptr to command line */
-	if (boot_args[0] < 64) {
-		/* called from hpux boot loader */
-		boot_command_line[0] = '\0';
-	} else {
-		strscpy(boot_command_line, (char *)__va(boot_args[1]),
-			COMMAND_LINE_SIZE);
+	if (boot_args[0] < 64)
+		return;	/* return if called from hpux boot loader */
+
+	/* Collect stuff passed in from the boot loader */
+	strscpy(boot_command_line, (char *)__va(boot_args[1]),
+		COMMAND_LINE_SIZE);
 
 	/* autodetect console type (if not done by palo yet) */
 	p = boot_command_line;
@@ -75,16 +75,14 @@ void __init setup_cmdline(char **cmdline_p)
 		strlcat(p, " earlycon=pdc", COMMAND_LINE_SIZE);
 
 #ifdef CONFIG_BLK_DEV_INITRD
-		if (boot_args[2] != 0) /* did palo pass us a ramdisk? */
-		{
-		    initrd_start = (unsigned long)__va(boot_args[2]);
-		    initrd_end = (unsigned long)__va(boot_args[3]);
-		}
-#endif
+	/* did palo pass us a ramdisk? */
+	if (boot_args[2] != 0) {
+		initrd_start = (unsigned long)__va(boot_args[2]);
+		initrd_end = (unsigned long)__va(boot_args[3]);
 	}
+#endif
 
 	strscpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
-	*cmdline_p = command_line;
 }
 
 #ifdef CONFIG_PA11
-- 
GitLab


From 731c4eac848ff9dd42776da8ed3407b257e3abf0 Mon Sep 17 00:00:00 2001
From: Veronika Kabatova <vkabatov@redhat.com>
Date: Wed, 14 Dec 2022 14:20:02 +0100
Subject: [PATCH 4080/4122] buildtar: fix tarballs with EFI_ZBOOT enabled

When CONFIG_EFI_ZBOOT is enabled, the binary name is not Image.gz
anymore but vmlinuz.efi. No vmlinuz gets put into the tarball as the
buildtar script doesn't recognize this name. Remedy this by adding the
binary name to the list of acceptable files to package.

Reported-by: CKI Project <cki-project@redhat.com>
Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 scripts/package/buildtar | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/package/buildtar b/scripts/package/buildtar
index cb54c7f1aa806..4d6f0b128efd9 100755
--- a/scripts/package/buildtar
+++ b/scripts/package/buildtar
@@ -122,7 +122,7 @@ case "${ARCH}" in
 		fi
 		;;
 	arm64)
-		for i in Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo ; do
+		for i in Image.bz2 Image.gz Image.lz4 Image.lzma Image.lzo vmlinuz.efi ; do
 			if [ -f "${objtree}/arch/arm64/boot/${i}" ] ; then
 				cp -v -- "${objtree}/arch/arm64/boot/${i}" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
 				break
-- 
GitLab


From 41f563ab3c33698bdfc3403c7c2e6c94e73681e4 Mon Sep 17 00:00:00 2001
From: Shang XiaoJing <shangxiaojing@huawei.com>
Date: Thu, 17 Nov 2022 10:45:14 +0800
Subject: [PATCH 4081/4122] parisc: led: Fix potential null-ptr-deref in
 start_task()

start_task() calls create_singlethread_workqueue() and not checked the
ret value, which may return NULL. And a null-ptr-deref may happen:

start_task()
    create_singlethread_workqueue() # failed, led_wq is NULL
    queue_delayed_work()
        queue_delayed_work_on()
            __queue_delayed_work()  # warning here, but continue
                __queue_work()      # access wq->flags, null-ptr-deref

Check the ret value and return -ENOMEM if it is NULL.

Fixes: 3499495205a6 ("[PARISC] Use work queue in LED/LCD driver instead of tasklet.")
Signed-off-by: Shang XiaoJing <shangxiaojing@huawei.com>
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org>
---
 drivers/parisc/led.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/parisc/led.c b/drivers/parisc/led.c
index d4be9d2ee74d9..8bdc5e043831c 100644
--- a/drivers/parisc/led.c
+++ b/drivers/parisc/led.c
@@ -137,6 +137,9 @@ static int start_task(void)
 
 	/* Create the work queue and queue the LED task */
 	led_wq = create_singlethread_workqueue("led_wq");	
+	if (!led_wq)
+		return -ENOMEM;
+
 	queue_delayed_work(led_wq, &led_task, 0);
 
 	return 0;
-- 
GitLab


From 71bdea6f798b425bc0003780b13e3fdecb16a010 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 11 Dec 2022 19:50:20 +0100
Subject: [PATCH 4082/4122] parisc: Align parisc MADV_XXX constants with all
 other architectures

Adjust some MADV_XXX constants to be in sync what their values are on
all other platforms. There is currently no reason to have an own
numbering on parisc, but it requires workarounds in many userspace
sources (e.g. glibc, qemu, ...) - which are often forgotten and thus
introduce bugs and different behaviour on parisc.

A wrapper avoids an ABI breakage for existing userspace applications by
translating any old values to the new ones, so this change allows us to
move over all programs to the new ABI over time.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/uapi/asm/mman.h       | 29 +++++++++++------------
 arch/parisc/kernel/sys_parisc.c           | 28 ++++++++++++++++++++++
 arch/parisc/kernel/syscalls/syscall.tbl   |  2 +-
 tools/arch/parisc/include/uapi/asm/mman.h | 12 +++++-----
 tools/perf/bench/bench.h                  | 12 ----------
 5 files changed, 49 insertions(+), 34 deletions(-)

diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 22133a6a506ef..68c44f99bc931 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -49,6 +49,19 @@
 #define MADV_DONTFORK	10		/* don't inherit across fork */
 #define MADV_DOFORK	11		/* do inherit across fork */
 
+#define MADV_MERGEABLE   12		/* KSM may merge identical pages */
+#define MADV_UNMERGEABLE 13		/* KSM may not merge identical pages */
+
+#define MADV_HUGEPAGE	14		/* Worth backing with hugepages */
+#define MADV_NOHUGEPAGE 15		/* Not worth backing with hugepages */
+
+#define MADV_DONTDUMP   16		/* Explicity exclude from the core dump,
+					   overrides the coredump filter bits */
+#define MADV_DODUMP	17		/* Clear the MADV_NODUMP flag */
+
+#define MADV_WIPEONFORK 18		/* Zero memory on fork, child only */
+#define MADV_KEEPONFORK 19		/* Undo MADV_WIPEONFORK */
+
 #define MADV_COLD	20		/* deactivate these pages */
 #define MADV_PAGEOUT	21		/* reclaim these pages */
 
@@ -57,27 +70,13 @@
 
 #define MADV_DONTNEED_LOCKED	24	/* like DONTNEED, but drop locked pages too */
 
-#define MADV_MERGEABLE   65		/* KSM may merge identical pages */
-#define MADV_UNMERGEABLE 66		/* KSM may not merge identical pages */
-
-#define MADV_HUGEPAGE	67		/* Worth backing with hugepages */
-#define MADV_NOHUGEPAGE	68		/* Not worth backing with hugepages */
-
-#define MADV_DONTDUMP   69		/* Explicity exclude from the core dump,
-					   overrides the coredump filter bits */
-#define MADV_DODUMP	70		/* Clear the MADV_NODUMP flag */
-
-#define MADV_WIPEONFORK 71		/* Zero memory on fork, child only */
-#define MADV_KEEPONFORK 72		/* Undo MADV_WIPEONFORK */
-
-#define MADV_COLLAPSE	73		/* Synchronous hugepage collapse */
+#define MADV_COLLAPSE	25		/* Synchronous hugepage collapse */
 
 #define MADV_HWPOISON     100		/* poison a page for testing */
 #define MADV_SOFT_OFFLINE 101		/* soft offline page for testing */
 
 /* compatibility flags */
 #define MAP_FILE	0
-#define MAP_VARIABLE	0
 
 #define PKEY_DISABLE_ACCESS	0x1
 #define PKEY_DISABLE_WRITE	0x2
diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
index 848b0702005d6..09a34b07f02e6 100644
--- a/arch/parisc/kernel/sys_parisc.c
+++ b/arch/parisc/kernel/sys_parisc.c
@@ -465,3 +465,31 @@ asmlinkage long parisc_inotify_init1(int flags)
 	flags = FIX_O_NONBLOCK(flags);
 	return sys_inotify_init1(flags);
 }
+
+/*
+ * madvise() wrapper
+ *
+ * Up to kernel v6.1 parisc has different values than all other
+ * platforms for the MADV_xxx flags listed below.
+ * To keep binary compatibility with existing userspace programs
+ * translate the former values to the new values.
+ *
+ * XXX: Remove this wrapper in year 2025 (or later)
+ */
+
+asmlinkage notrace long parisc_madvise(unsigned long start, size_t len_in, int behavior)
+{
+	switch (behavior) {
+	case 65: behavior = MADV_MERGEABLE;	break;
+	case 66: behavior = MADV_UNMERGEABLE;	break;
+	case 67: behavior = MADV_HUGEPAGE;	break;
+	case 68: behavior = MADV_NOHUGEPAGE;	break;
+	case 69: behavior = MADV_DONTDUMP;	break;
+	case 70: behavior = MADV_DODUMP;	break;
+	case 71: behavior = MADV_WIPEONFORK;	break;
+	case 72: behavior = MADV_KEEPONFORK;	break;
+	case 73: behavior = MADV_COLLAPSE;	break;
+	}
+
+	return sys_madvise(start, len_in, behavior);
+}
diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
index 8a99c998da9bb..0e42fceb2d5e2 100644
--- a/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@ -131,7 +131,7 @@
 116	common	sysinfo			sys_sysinfo			compat_sys_sysinfo
 117	common	shutdown		sys_shutdown
 118	common	fsync			sys_fsync
-119	common	madvise			sys_madvise
+119	common	madvise			parisc_madvise
 120	common	clone			sys_clone_wrapper
 121	common	setdomainname		sys_setdomainname
 122	common	sendfile		sys_sendfile			compat_sys_sendfile
diff --git a/tools/arch/parisc/include/uapi/asm/mman.h b/tools/arch/parisc/include/uapi/asm/mman.h
index 506c06a6536fb..4cc88a642e106 100644
--- a/tools/arch/parisc/include/uapi/asm/mman.h
+++ b/tools/arch/parisc/include/uapi/asm/mman.h
@@ -1,20 +1,20 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
 #ifndef TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
 #define TOOLS_ARCH_PARISC_UAPI_ASM_MMAN_FIX_H
-#define MADV_DODUMP	70
+#define MADV_DODUMP	17
 #define MADV_DOFORK	11
-#define MADV_DONTDUMP   69
+#define MADV_DONTDUMP   16
 #define MADV_DONTFORK	10
 #define MADV_DONTNEED   4
 #define MADV_FREE	8
-#define MADV_HUGEPAGE	67
-#define MADV_MERGEABLE   65
-#define MADV_NOHUGEPAGE	68
+#define MADV_HUGEPAGE	14
+#define MADV_MERGEABLE  12
+#define MADV_NOHUGEPAGE 15
 #define MADV_NORMAL     0
 #define MADV_RANDOM     1
 #define MADV_REMOVE	9
 #define MADV_SEQUENTIAL 2
-#define MADV_UNMERGEABLE 66
+#define MADV_UNMERGEABLE 13
 #define MADV_WILLNEED   3
 #define MAP_ANONYMOUS	0x10
 #define MAP_DENYWRITE	0x0800
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 6cefb4315d75e..a5d49b3b6a098 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -10,25 +10,13 @@ extern struct timeval bench__start, bench__end, bench__runtime;
  * The madvise transparent hugepage constants were added in glibc
  * 2.13. For compatibility with older versions of glibc, define these
  * tokens if they are not already defined.
- *
- * PA-RISC uses different madvise values from other architectures and
- * needs to be special-cased.
  */
-#ifdef __hppa__
-# ifndef MADV_HUGEPAGE
-#  define MADV_HUGEPAGE		67
-# endif
-# ifndef MADV_NOHUGEPAGE
-#  define MADV_NOHUGEPAGE	68
-# endif
-#else
 # ifndef MADV_HUGEPAGE
 #  define MADV_HUGEPAGE		14
 # endif
 # ifndef MADV_NOHUGEPAGE
 #  define MADV_NOHUGEPAGE	15
 # endif
-#endif
 
 int bench_numa(int argc, const char **argv);
 int bench_sched_messaging(int argc, const char **argv);
-- 
GitLab


From fe94cb1a614d2df2764d49ac959d8b7e4cb98e15 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Wed, 14 Dec 2022 22:17:57 +0100
Subject: [PATCH 4083/4122] parisc: Drop PMD_SHIFT from calculation in
 pgtable.h

PMD_SHIFT isn't defined if CONFIG_PGTABLE_LEVELS == 3, and as
such the kernel test robot found this warning:

 In file included from include/linux/pgtable.h:6,
                  from arch/parisc/kernel/head.S:23:
 arch/parisc/include/asm/pgtable.h:169:32: warning: "PMD_SHIFT" is not defined, evaluates to 0 [-Wundef]
     169 | #if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT)

Avoid the warning by using PLD_SHIFT and BITS_PER_PTE.

Signed-off-by: Helge Deller <deller@gmx.de>
Reported-by: kernel test robot <lkp@intel.com>
Cc: <stable@vger.kernel.org> # 6.0+
---
 arch/parisc/include/asm/pgtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index ecd0288544698..68ae77069d23f 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -166,8 +166,8 @@ extern void __update_cache(pte_t pte);
 
 /* This calculates the number of initial pages we need for the initial
  * page tables */
-#if (KERNEL_INITIAL_ORDER) >= (PMD_SHIFT)
-# define PT_INITIAL	(1 << (KERNEL_INITIAL_ORDER - PMD_SHIFT))
+#if (KERNEL_INITIAL_ORDER) >= (PLD_SHIFT + BITS_PER_PTE)
+# define PT_INITIAL	(1 << (KERNEL_INITIAL_ORDER - PLD_SHIFT - BITS_PER_PTE))
 #else
 # define PT_INITIAL	(1)  /* all initial PTEs fit into one page */
 #endif
-- 
GitLab


From 3f6c3d29df58f391cf487b50a24ebd24045ba569 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Thu, 15 Dec 2022 09:26:09 -0800
Subject: [PATCH 4084/4122] rcu: Don't assert interrupts enabled too early in
 boot

The rcu_poll_gp_seq_end() and rcu_poll_gp_seq_end_unlocked() both check
that interrupts are enabled, as they normally should be when waiting for
an RCU grace period.  Except that it is legal to wait for grace periods
during early boot, before interrupts have been enabled for the first time,
and polling for grace periods is required to work during this time.
This can result in false-positive lockdep splats in the presence of
boot-time-initiated tracing.

This commit therefore conditions those interrupts-enabled checks on
rcu_scheduler_active having advanced past RCU_SCHEDULER_INACTIVE, by
which time interrupts have been enabled.

Reported-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Tested-by: Steven Rostedt (Google) <rostedt@goodmis.org>
---
 kernel/rcu/tree.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 93416afebd59c..83c6baafd517f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1368,7 +1368,7 @@ static void rcu_poll_gp_seq_start(unsigned long *snap)
 {
 	struct rcu_node *rnp = rcu_get_root();
 
-	if (rcu_init_invoked())
+	if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
 		raw_lockdep_assert_held_rcu_node(rnp);
 
 	// If RCU was idle, note beginning of GP.
@@ -1384,7 +1384,7 @@ static void rcu_poll_gp_seq_end(unsigned long *snap)
 {
 	struct rcu_node *rnp = rcu_get_root();
 
-	if (rcu_init_invoked())
+	if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
 		raw_lockdep_assert_held_rcu_node(rnp);
 
 	// If the previously noted GP is still in effect, record the
@@ -1407,7 +1407,8 @@ static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap)
 	struct rcu_node *rnp = rcu_get_root();
 
 	if (rcu_init_invoked()) {
-		lockdep_assert_irqs_enabled();
+		if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
+			lockdep_assert_irqs_enabled();
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	}
 	rcu_poll_gp_seq_start(snap);
@@ -1423,7 +1424,8 @@ static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap)
 	struct rcu_node *rnp = rcu_get_root();
 
 	if (rcu_init_invoked()) {
-		lockdep_assert_irqs_enabled();
+		if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
+			lockdep_assert_irqs_enabled();
 		raw_spin_lock_irqsave_rcu_node(rnp, flags);
 	}
 	rcu_poll_gp_seq_end(snap);
-- 
GitLab


From 7236aae5f81f3efbd93d0601e74fc05994bc2580 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 26 Nov 2022 21:29:31 +0100
Subject: [PATCH 4085/4122] parisc: Fix locking in pdc_iodc_print() firmware
 call

Utilize pdc_lock spinlock to protect parallel modifications of the
iodc_dbuf[] buffer, check length to prevent buffer overflow of
iodc_dbuf[], drop the iodc_retbuf[] buffer and fix some wrong
indentings.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # 6.0+
---
 arch/parisc/kernel/firmware.c | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index 6a7e315bcc2e5..a115315d88e69 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -1288,9 +1288,8 @@ void pdc_io_reset_devices(void)
 
 #endif /* defined(BOOTLOADER) */
 
-/* locked by pdc_console_lock */
-static int __attribute__((aligned(8)))   iodc_retbuf[32];
-static char __attribute__((aligned(64))) iodc_dbuf[4096];
+/* locked by pdc_lock */
+static char iodc_dbuf[4096] __page_aligned_bss;
 
 /**
  * pdc_iodc_print - Console print using IODC.
@@ -1307,6 +1306,9 @@ int pdc_iodc_print(const unsigned char *str, unsigned count)
 	unsigned int i;
 	unsigned long flags;
 
+	count = min_t(unsigned int, count, sizeof(iodc_dbuf));
+
+	spin_lock_irqsave(&pdc_lock, flags);
 	for (i = 0; i < count;) {
 		switch(str[i]) {
 		case '\n':
@@ -1322,12 +1324,11 @@ int pdc_iodc_print(const unsigned char *str, unsigned count)
 	}
 
 print:
-        spin_lock_irqsave(&pdc_lock, flags);
-        real32_call(PAGE0->mem_cons.iodc_io,
-                    (unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT,
-                    PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers),
-                    __pa(iodc_retbuf), 0, __pa(iodc_dbuf), i, 0);
-        spin_unlock_irqrestore(&pdc_lock, flags);
+	real32_call(PAGE0->mem_cons.iodc_io,
+		(unsigned long)PAGE0->mem_cons.hpa, ENTRY_IO_COUT,
+		PAGE0->mem_cons.spa, __pa(PAGE0->mem_cons.dp.layers),
+		__pa(pdc_result), 0, __pa(iodc_dbuf), i, 0);
+	spin_unlock_irqrestore(&pdc_lock, flags);
 
 	return i;
 }
@@ -1354,10 +1355,11 @@ int pdc_iodc_getc(void)
 	real32_call(PAGE0->mem_kbd.iodc_io,
 		    (unsigned long)PAGE0->mem_kbd.hpa, ENTRY_IO_CIN,
 		    PAGE0->mem_kbd.spa, __pa(PAGE0->mem_kbd.dp.layers), 
-		    __pa(iodc_retbuf), 0, __pa(iodc_dbuf), 1, 0);
+		    __pa(pdc_result), 0, __pa(iodc_dbuf), 1, 0);
 
 	ch = *iodc_dbuf;
-	status = *iodc_retbuf;
+	/* like convert_to_wide() but for first return value only: */
+	status = *(int *)&pdc_result;
 	spin_unlock_irqrestore(&pdc_lock, flags);
 
 	if (status == 0)
-- 
GitLab


From 7e6652c79ecd74e1112500668d956367dc3772a5 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 26 Nov 2022 21:35:29 +0100
Subject: [PATCH 4086/4122] parisc: Drop duplicate kgdb_pdc console

The kgdb console is already implemented and registered in pdc_cons.c,
so the duplicate code can be dropped.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # 6.1+
---
 arch/parisc/kernel/kgdb.c | 20 --------------------
 1 file changed, 20 deletions(-)

diff --git a/arch/parisc/kernel/kgdb.c b/arch/parisc/kernel/kgdb.c
index ab7620f695be1..b16fa9bac5f44 100644
--- a/arch/parisc/kernel/kgdb.c
+++ b/arch/parisc/kernel/kgdb.c
@@ -208,23 +208,3 @@ int kgdb_arch_handle_exception(int trap, int signo,
 	}
 	return -1;
 }
-
-/* KGDB console driver which uses PDC to read chars from keyboard */
-
-static void kgdb_pdc_write_char(u8 chr)
-{
-	/* no need to print char. kgdb will do it. */
-}
-
-static struct kgdb_io kgdb_pdc_io_ops = {
-	.name		= "kgdb_pdc",
-	.read_char	= pdc_iodc_getc,
-	.write_char	= kgdb_pdc_write_char,
-};
-
-static int __init kgdb_pdc_init(void)
-{
-	kgdb_register_io_module(&kgdb_pdc_io_ops);
-	return 0;
-}
-early_initcall(kgdb_pdc_init);
-- 
GitLab


From 7dc4dbfe750e1f18c511e73c8ed114da8de9ff85 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 17 Dec 2022 17:45:40 +0100
Subject: [PATCH 4087/4122] parisc: Drop locking in pdc console code

No need to have specific locking for console I/O since
the PDC functions provide an own locking.

Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # 6.1+
---
 arch/parisc/kernel/pdc_cons.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c
index 7d0989f523d03..cf3bf82323746 100644
--- a/arch/parisc/kernel/pdc_cons.c
+++ b/arch/parisc/kernel/pdc_cons.c
@@ -12,37 +12,27 @@
 #include <asm/page.h>		/* for PAGE0 */
 #include <asm/pdc.h>		/* for iodc_call() proto and friends */
 
-static DEFINE_SPINLOCK(pdc_console_lock);
-
 static void pdc_console_write(struct console *co, const char *s, unsigned count)
 {
 	int i = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&pdc_console_lock, flags);
 	do {
 		i += pdc_iodc_print(s + i, count - i);
 	} while (i < count);
-	spin_unlock_irqrestore(&pdc_console_lock, flags);
 }
 
 #ifdef CONFIG_KGDB
 static int kgdb_pdc_read_char(void)
 {
-	int c;
-	unsigned long flags;
-
-	spin_lock_irqsave(&pdc_console_lock, flags);
-	c = pdc_iodc_getc();
-	spin_unlock_irqrestore(&pdc_console_lock, flags);
+	int c = pdc_iodc_getc();
 
 	return (c <= 0) ? NO_POLL_CHAR : c;
 }
 
 static void kgdb_pdc_write_char(u8 chr)
 {
-	if (PAGE0->mem_cons.cl_class != CL_DUPLEX)
-		pdc_console_write(NULL, &chr, 1);
+	/* no need to print char as it's shown on standard console */
+	/* pdc_iodc_print(&chr, 1); */
 }
 
 static struct kgdb_io kgdb_pdc_io_ops = {
-- 
GitLab


From 4add395bc77f19e2e5cedbef4368ffdebc89b165 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 17 Dec 2022 18:38:03 +0100
Subject: [PATCH 4088/4122] parisc: Move pdc_result struct to firmware.c

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/kernel/firmware.c |  4 ++--
 arch/parisc/kernel/real2.S    | 17 ++---------------
 2 files changed, 4 insertions(+), 17 deletions(-)

diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index a115315d88e69..b31775928bb02 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -74,8 +74,8 @@
 static DEFINE_SPINLOCK(pdc_lock);
 #endif
 
-extern unsigned long pdc_result[NUM_PDC_RESULT];
-extern unsigned long pdc_result2[NUM_PDC_RESULT];
+unsigned long pdc_result[NUM_PDC_RESULT]  __aligned(8);
+unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8);
 
 #ifdef CONFIG_64BIT
 #define WIDE_FIRMWARE 0x1
diff --git a/arch/parisc/kernel/real2.S b/arch/parisc/kernel/real2.S
index 2b16d8d6598f1..4dc12c4c09809 100644
--- a/arch/parisc/kernel/real2.S
+++ b/arch/parisc/kernel/real2.S
@@ -15,28 +15,15 @@
 
 #include <linux/linkage.h>
 
-
-	.section	.bss
-
-	.export pdc_result
-	.export pdc_result2
-	.align 8
-pdc_result:
-	.block	ASM_PDC_RESULT_SIZE
-pdc_result2:
-	.block	ASM_PDC_RESULT_SIZE
-
 	.export real_stack
-	.export real32_stack
 	.export real64_stack
-	.align	64
+	__PAGE_ALIGNED_BSS
 real_stack:
-real32_stack:
 real64_stack:
 	.block	8192
 
 #define N_SAVED_REGS 9
-
+	.section	.bss
 save_cr_space:
 	.block	REG_SZ * N_SAVED_REGS
 save_cr_end:
-- 
GitLab


From 9086e6017957c5cd6ea28d94b70e0d513d6b7800 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sat, 17 Dec 2022 20:05:43 +0100
Subject: [PATCH 4089/4122] parisc: Add missing FORCE prerequisites in Makefile

Fix those make warnings:
    arch/parisc/kernel/vdso32/Makefile:30: FORCE prerequisite is missing
    arch/parisc/kernel/vdso64/Makefile:30: FORCE prerequisite is missing

Add the missing FORCE prerequisites for all build targets identified by
"make help".

Fixes: e1f86d7b4b2a5213 ("kbuild: warn if FORCE is missing for if_changed(_dep,_rule) and filechk")
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: <stable@vger.kernel.org> # 5.18+
---
 arch/parisc/kernel/vdso32/Makefile | 4 ++--
 arch/parisc/kernel/vdso64/Makefile | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile
index 85b1c6d261d12..4459a48d23033 100644
--- a/arch/parisc/kernel/vdso32/Makefile
+++ b/arch/parisc/kernel/vdso32/Makefile
@@ -26,7 +26,7 @@ $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE
 
 # Force dependency (incbin is bad)
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC)
+$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) FORCE
 	$(call if_changed,vdso32ld)
 
 # assembly rules for the .S files
@@ -38,7 +38,7 @@ $(obj-cvdso32): %.o: %.c FORCE
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $<
 quiet_cmd_vdso32cc = VDSO32C $@
diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile
index a30f5ec5eb4bf..f3d6045793f4c 100644
--- a/arch/parisc/kernel/vdso64/Makefile
+++ b/arch/parisc/kernel/vdso64/Makefile
@@ -26,7 +26,7 @@ $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE
 
 # Force dependency (incbin is bad)
 # link rule for the .so file, .lds has to be first
-$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC)
+$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) FORCE
 	$(call if_changed,vdso64ld)
 
 # assembly rules for the .S files
@@ -35,7 +35,7 @@ $(obj-vdso64): %.o: %.S FORCE
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@
+      cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $(filter-out FORCE, $^) -o $@
 quiet_cmd_vdso64as = VDSO64A $@
       cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $<
 
-- 
GitLab


From 9cd3fd2054c3b3055163accbf2f31a4426f10317 Mon Sep 17 00:00:00 2001
From: Cong Wang <cong.wang@bytedance.com>
Date: Sat, 17 Dec 2022 14:17:07 -0800
Subject: [PATCH 4090/4122] net_sched: reject TCF_EM_SIMPLE case for complex
 ematch module

When TCF_EM_SIMPLE was introduced, it is supposed to be convenient
for ematch implementation:

https://lore.kernel.org/all/20050105110048.GO26856@postel.suug.ch/

"You don't have to, providing a 32bit data chunk without TCF_EM_SIMPLE
set will simply result in allocating & copy. It's an optimization,
nothing more."

So if an ematch module provides ops->datalen that means it wants a
complex data structure (saved in its em->data) instead of a simple u32
value. We should simply reject such a combination, otherwise this u32
could be misinterpreted as a pointer.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Reported-and-tested-by: syzbot+4caeae4c7103813598ae@syzkaller.appspotmail.com
Reported-by: Jun Nie <jun.nie@linaro.org>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/ematch.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/ematch.c b/net/sched/ematch.c
index 4ce6813618515..5c1235e6076ae 100644
--- a/net/sched/ematch.c
+++ b/net/sched/ematch.c
@@ -255,6 +255,8 @@ static int tcf_em_validate(struct tcf_proto *tp,
 			 * the value carried.
 			 */
 			if (em_hdr->flags & TCF_EM_SIMPLE) {
+				if (em->ops->datalen > 0)
+					goto errout;
 				if (data_len < sizeof(u32))
 					goto errout;
 				em->data = *(u32 *) data;
-- 
GitLab


From 4feb2c44629e6f9b459b41a5a60491069d346a95 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:19:47 +0000
Subject: [PATCH 4091/4122] rxrpc: Fix missing unlock in rxrpc_do_sendmsg()

One of the error paths in rxrpc_do_sendmsg() doesn't unlock the call mutex
before returning.  Fix it to do this.

Note that this still doesn't get rid of the checker warning:

   ../net/rxrpc/sendmsg.c:617:5: warning: context imbalance in 'rxrpc_do_sendmsg' - wrong count at exit

I think the interplay between the socket lock and the call's user_mutex may
be too complicated for checker to analyse, especially as
rxrpc_new_client_call_for_sendmsg(), which it calls, returns with the
call's user_mutex if successful but unconditionally drops the socket lock.

Fixes: e754eba685aa ("rxrpc: Provide a cmsg to specify the amount of Tx data for a call")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/sendmsg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 9fa7e37f7155b..cde1e65f16b45 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -625,7 +625,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 			if (call->tx_total_len != -1 ||
 			    call->tx_pending ||
 			    call->tx_top != 0)
-				goto error_put;
+				goto out_put_unlock;
 			call->tx_total_len = p.call.tx_total_len;
 		}
 	}
-- 
GitLab


From fdb99487b0189f0ef883e353ad7484c78a8bd425 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:19:56 +0000
Subject: [PATCH 4092/4122] rxrpc: Fix security setting propagation

Fix the propagation of the security settings from sendmsg to the rxrpc_call
struct.

Fixes: f3441d4125fc ("rxrpc: Copy client call parameters into rxrpc_call earlier")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/call_object.c | 1 +
 net/rxrpc/conn_client.c | 2 --
 net/rxrpc/security.c    | 6 +++---
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index be5eb8cdf5497..89dcf60b11587 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -217,6 +217,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
 	call->tx_total_len	= p->tx_total_len;
 	call->key		= key_get(cp->key);
 	call->local		= rxrpc_get_local(cp->local, rxrpc_local_get_call);
+	call->security_level	= cp->security_level;
 	if (p->kernel)
 		__set_bit(RXRPC_CALL_KERNEL, &call->flags);
 	if (cp->upgrade)
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index a08e33c9e54b2..87efa0373aed3 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -551,8 +551,6 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	call->conn	= rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
 	call->cid	= conn->proto.cid | channel;
 	call->call_id	= call_id;
-	call->security	= conn->security;
-	call->security_ix = conn->security_ix;
 	call->dest_srx.srx_service = conn->service_id;
 
 	trace_rxrpc_connect_call(call);
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 209f2c25a0da9..ab968f65a4900 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -67,13 +67,13 @@ const struct rxrpc_security *rxrpc_security_lookup(u8 security_index)
  */
 int rxrpc_init_client_call_security(struct rxrpc_call *call)
 {
-	const struct rxrpc_security *sec;
+	const struct rxrpc_security *sec = &rxrpc_no_security;
 	struct rxrpc_key_token *token;
 	struct key *key = call->key;
 	int ret;
 
 	if (!key)
-		return 0;
+		goto found;
 
 	ret = key_validate(key);
 	if (ret < 0)
@@ -88,7 +88,7 @@ int rxrpc_init_client_call_security(struct rxrpc_call *call)
 
 found:
 	call->security = sec;
-	_leave(" = 0");
+	call->security_ix = sec->security_index;
 	return 0;
 }
 
-- 
GitLab


From eaa02390adb03b82f04babebf0cdd233793aecf5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:04 +0000
Subject: [PATCH 4093/4122] rxrpc: Fix NULL deref in rxrpc_unuse_local()

Fix rxrpc_unuse_local() to get the debug_id *after* checking to see if
local is NULL.

Fixes: a2cf3264f331 ("rxrpc: Fold __rxrpc_unuse_local() into rxrpc_unuse_local()")
Reported-by: syzbot+3538a6a72efa8b059c38@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
Tested-by: syzbot+3538a6a72efa8b059c38@syzkaller.appspotmail.com
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/local_object.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 44222923c0d11..24ee585d9aafb 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -357,10 +357,11 @@ struct rxrpc_local *rxrpc_use_local(struct rxrpc_local *local,
  */
 void rxrpc_unuse_local(struct rxrpc_local *local, enum rxrpc_local_trace why)
 {
-	unsigned int debug_id = local->debug_id;
+	unsigned int debug_id;
 	int r, u;
 
 	if (local) {
+		debug_id = local->debug_id;
 		r = refcount_read(&local->ref);
 		u = atomic_dec_return(&local->active_users);
 		trace_rxrpc_local(debug_id, why, r, u);
-- 
GitLab


From 8fbcc83334a7b5b42b6bc1fae2458bf25eb57768 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:13 +0000
Subject: [PATCH 4094/4122] rxrpc: Fix I/O thread startup getting skipped

When starting a kthread, the __kthread_create_on_node() function, as called
from kthread_run(), waits for a completion to indicate that the task_struct
(or failure state) of the new kernel thread is available before continuing.

This does not wait, however, for the thread function to be invoked and,
indeed, will skip it if kthread_stop() gets called before it gets there.

If this happens, though, kthread_run() will have returned successfully,
indicating that the thread was started and returning the task_struct
pointer.  The actual error indication is returned by kthread_stop().

Note that this is ambiguous, as the caller cannot tell whether the -EINTR
error code came from kthread() or from the thread function.

This was encountered in the new rxrpc I/O thread, where if the system is
being pounded hard by, say, syzbot, the check of KTHREAD_SHOULD_STOP can be
delayed long enough for kthread_stop() to get called when rxrpc releases a
socket - and this causes an oops because the I/O thread function doesn't
get started and thus doesn't remove the rxrpc_local struct from the
local_endpoints list.

Fix this by using a completion to wait for the thread to actually enter
rxrpc_io_thread().  This makes sure the thread can't be prematurely
stopped and makes sure the relied-upon cleanup is done.

Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread")
Reported-by: syzbot+3538a6a72efa8b059c38@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: Hillf Danton <hdanton@sina.com>
Link: https://lore.kernel.org/r/000000000000229f1505ef2b6159@google.com/
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h  | 1 +
 net/rxrpc/io_thread.c    | 2 ++
 net/rxrpc/local_object.c | 2 ++
 3 files changed, 5 insertions(+)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e7dccab7b741d..37f3aec784ccf 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -287,6 +287,7 @@ struct rxrpc_local {
 	struct hlist_node	link;
 	struct socket		*socket;	/* my UDP socket */
 	struct task_struct	*io_thread;
+	struct completion	io_thread_ready; /* Indication that the I/O thread started */
 	struct rxrpc_sock __rcu	*service;	/* Service(s) listening on this endpoint */
 	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
 	struct sk_buff_head	rx_queue;	/* Received packets */
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index d83ae31930325..e460e4151c165 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -426,6 +426,8 @@ int rxrpc_io_thread(void *data)
 	struct rxrpc_call *call;
 	struct sk_buff *skb;
 
+	complete(&local->io_thread_ready);
+
 	skb_queue_head_init(&rx_queue);
 
 	set_user_nice(current, MIN_NICE);
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 24ee585d9aafb..270b63d8f37a6 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -97,6 +97,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
 		local->rxnet = rxnet;
 		INIT_HLIST_NODE(&local->link);
 		init_rwsem(&local->defrag_sem);
+		init_completion(&local->io_thread_ready);
 		skb_queue_head_init(&local->rx_queue);
 		INIT_LIST_HEAD(&local->call_attend_q);
 		local->client_bundles = RB_ROOT;
@@ -189,6 +190,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		goto error_sock;
 	}
 
+	wait_for_completion(&local->io_thread_ready);
 	local->io_thread = io_thread;
 	_leave(" = 0");
 	return 0;
-- 
GitLab


From 608aecd16a31269485e2980898029dd01b03a73e Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:21 +0000
Subject: [PATCH 4095/4122] rxrpc: Fix locking issues in
 rxrpc_put_peer_locked()

Now that rxrpc_put_local() may call kthread_stop(), it can't be called
under spinlock as it might sleep.  This can cause a problem in the peer
keepalive code in rxrpc as it tries to avoid dropping the peer_hash_lock
from the point it needs to re-add peer->keepalive_link to going round the
loop again in rxrpc_peer_keepalive_dispatch().

Fix this by just dropping the lock when we don't need it and accepting that
we'll have to take it again.  This code is only called about every 20s for
each peer, so not very often.

This allows rxrpc_put_peer_unlocked() to be removed also.

If triggered, this bug produces an oops like the following, as reproduced
by a syzbot reproducer for a different oops[1]:

BUG: sleeping function called from invalid context at kernel/sched/completion.c:101
...
RCU nest depth: 0, expected: 0
3 locks held by kworker/u9:0/50:
 #0: ffff88810e74a138 ((wq_completion)krxrpcd){+.+.}-{0:0}, at: process_one_work+0x294/0x636
 #1: ffff8881013a7e20 ((work_completion)(&rxnet->peer_keepalive_work)){+.+.}-{0:0}, at: process_one_work+0x294/0x636
 #2: ffff88817d366390 (&rxnet->peer_hash_lock){+.+.}-{2:2}, at: rxrpc_peer_keepalive_dispatch+0x2bd/0x35f
...
Call Trace:
 <TASK>
 dump_stack_lvl+0x4c/0x5f
 __might_resched+0x2cf/0x2f2
 __wait_for_common+0x87/0x1e8
 kthread_stop+0x14d/0x255
 rxrpc_peer_keepalive_dispatch+0x333/0x35f
 rxrpc_peer_keepalive_worker+0x2e9/0x449
 process_one_work+0x3c1/0x636
 worker_thread+0x25f/0x359
 kthread+0x1a6/0x1b5
 ret_from_fork+0x1f/0x30

Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h |  1 -
 net/rxrpc/peer_event.c  | 10 +++++++---
 net/rxrpc/peer_object.c | 19 -------------------
 3 files changed, 7 insertions(+), 23 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 37f3aec784ccf..5b732a4af009b 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -1073,7 +1073,6 @@ void rxrpc_destroy_all_peers(struct rxrpc_net *);
 struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
 struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
 void rxrpc_put_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
-void rxrpc_put_peer_locked(struct rxrpc_peer *, enum rxrpc_peer_trace);
 
 /*
  * proc.c
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 6685bf917aa61..552ba84a255c4 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -235,6 +235,7 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
 	struct rxrpc_peer *peer;
 	const u8 mask = ARRAY_SIZE(rxnet->peer_keepalive) - 1;
 	time64_t keepalive_at;
+	bool use;
 	int slot;
 
 	spin_lock(&rxnet->peer_hash_lock);
@@ -247,9 +248,10 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
 		if (!rxrpc_get_peer_maybe(peer, rxrpc_peer_get_keepalive))
 			continue;
 
-		if (__rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive)) {
-			spin_unlock(&rxnet->peer_hash_lock);
+		use = __rxrpc_use_local(peer->local, rxrpc_local_use_peer_keepalive);
+		spin_unlock(&rxnet->peer_hash_lock);
 
+		if (use) {
 			keepalive_at = peer->last_tx_at + RXRPC_KEEPALIVE_TIME;
 			slot = keepalive_at - base;
 			_debug("%02x peer %u t=%d {%pISp}",
@@ -270,9 +272,11 @@ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet,
 			spin_lock(&rxnet->peer_hash_lock);
 			list_add_tail(&peer->keepalive_link,
 				      &rxnet->peer_keepalive[slot & mask]);
+			spin_unlock(&rxnet->peer_hash_lock);
 			rxrpc_unuse_local(peer->local, rxrpc_local_unuse_peer_keepalive);
 		}
-		rxrpc_put_peer_locked(peer, rxrpc_peer_put_keepalive);
+		rxrpc_put_peer(peer, rxrpc_peer_put_keepalive);
+		spin_lock(&rxnet->peer_hash_lock);
 	}
 
 	spin_unlock(&rxnet->peer_hash_lock);
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 608946dcc5059..82de295393a05 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -438,25 +438,6 @@ void rxrpc_put_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
 	}
 }
 
-/*
- * Drop a ref on a peer record where the caller already holds the
- * peer_hash_lock.
- */
-void rxrpc_put_peer_locked(struct rxrpc_peer *peer, enum rxrpc_peer_trace why)
-{
-	unsigned int debug_id = peer->debug_id;
-	bool dead;
-	int r;
-
-	dead = __refcount_dec_and_test(&peer->ref, &r);
-	trace_rxrpc_peer(debug_id, r - 1, why);
-	if (dead) {
-		hash_del_rcu(&peer->hash_link);
-		list_del_init(&peer->keepalive_link);
-		rxrpc_free_peer(peer);
-	}
-}
-
 /*
  * Make sure all peer records have been discarded.
  */
-- 
GitLab


From c838f1a73d77abadb0810eff0e150ac88fef3da5 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:30 +0000
Subject: [PATCH 4096/4122] rxrpc: Fix switched parameters in peer tracing

Fix the switched parameters on rxrpc_alloc_peer() and rxrpc_get_peer().
The ref argument and the why argument got mixed.

Fixes: 47c810a79844 ("rxrpc: trace: Don't use __builtin_return_address for rxrpc_peer tracing")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h | 2 +-
 net/rxrpc/peer_object.c      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 049b52e7aa6af..c6cfed00d0c6c 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -471,7 +471,7 @@ TRACE_EVENT(rxrpc_peer,
 	    TP_STRUCT__entry(
 		    __field(unsigned int,	peer		)
 		    __field(int,		ref		)
-		    __field(int,		why		)
+		    __field(enum rxrpc_peer_trace, why		)
 			     ),
 
 	    TP_fast_assign(
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 82de295393a05..4eecea2be307b 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -226,7 +226,7 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
 		rxrpc_peer_init_rtt(peer);
 
 		peer->cong_ssthresh = RXRPC_TX_MAX_WINDOW;
-		trace_rxrpc_peer(peer->debug_id, why, 1);
+		trace_rxrpc_peer(peer->debug_id, 1, why);
 	}
 
 	_leave(" = %p", peer);
@@ -382,7 +382,7 @@ struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer, enum rxrpc_peer_trace
 	int r;
 
 	__refcount_inc(&peer->ref, &r);
-	trace_rxrpc_peer(peer->debug_id, why, r + 1);
+	trace_rxrpc_peer(peer->debug_id, r + 1, why);
 	return peer;
 }
 
-- 
GitLab


From 743d1768a008c8eae56ead497c9ba8237b14ee81 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:38 +0000
Subject: [PATCH 4097/4122] rxrpc: Fix I/O thread stop

The rxrpc I/O thread checks to see if there's any work it needs to do, and
if not, checks kthread_should_stop() before scheduling, and if it should
stop, breaks out of the loop and tries to clean up and exit.

This can, however, race with socket destruction, wherein outstanding calls
are aborted and released from the socket and then the socket unuses the
local endpoint, causing kthread_stop() to be issued.  The abort is deferred
to the I/O thread and the event can by issued between the I/O thread
checking if there's any work to be done (such as processing call aborts)
and the stop being seen.

This results in the I/O thread stopping processing of events whilst call
cleanup events are still outstanding, leading to connections or other
objects still being around and uncleaned up, which can result in assertions
being triggered, e.g.:

    rxrpc: AF_RXRPC: Leaked client conn 00000000e8009865 {2}
    ------------[ cut here ]------------
    kernel BUG at net/rxrpc/conn_client.c:64!

Fix this by retrieving the kthread_should_stop() indication, then checking
to see if there's more work to do, and going back round the loop if there
is, and breaking out of the loop only if there wasn't.

This was triggered by a syzbot test that produced some other symptom[1].

Fixes: a275da62e8c1 ("rxrpc: Create a per-local endpoint receive queue and I/O thread")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: https://lore.kernel.org/r/0000000000002b4a9f05ef2b616f@google.com/ [1]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/io_thread.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index e460e4151c165..e6b9f0ceae17c 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -425,6 +425,7 @@ int rxrpc_io_thread(void *data)
 	struct rxrpc_local *local = data;
 	struct rxrpc_call *call;
 	struct sk_buff *skb;
+	bool should_stop;
 
 	complete(&local->io_thread_ready);
 
@@ -478,13 +479,14 @@ int rxrpc_io_thread(void *data)
 		}
 
 		set_current_state(TASK_INTERRUPTIBLE);
+		should_stop = kthread_should_stop();
 		if (!skb_queue_empty(&local->rx_queue) ||
 		    !list_empty(&local->call_attend_q)) {
 			__set_current_state(TASK_RUNNING);
 			continue;
 		}
 
-		if (kthread_should_stop())
+		if (should_stop)
 			break;
 		schedule();
 	}
-- 
GitLab


From 11e1706bc84f60040578056f8cef3d0139b92dda Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:46 +0000
Subject: [PATCH 4098/4122] rxrpc: rxperf: Fix uninitialised variable

Dan Carpenter sayeth[1]:

  The patch 75bfdbf2fca3: "rxrpc: Implement an in-kernel rxperf server
  for testing purposes" from Nov 3, 2022, leads to the following Smatch
  static checker warning:

	net/rxrpc/rxperf.c:337 rxperf_deliver_to_call()
	error: uninitialized symbol 'ret'.

Fix this by initialising ret to 0.  The value is only used for tracing
purposes in the rxperf server.

Fixes: 75bfdbf2fca3 ("rxrpc: Implement an in-kernel rxperf server for testing purposes")
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: http://lists.infradead.org/pipermail/linux-afs/2022-December/006124.html [1]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/rxperf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index 66f5eea291ff1..d33a109e846c1 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -275,7 +275,7 @@ static void rxperf_deliver_to_call(struct work_struct *work)
 	struct rxperf_call *call = container_of(work, struct rxperf_call, work);
 	enum rxperf_call_state state;
 	u32 abort_code, remote_abort = 0;
-	int ret;
+	int ret = 0;
 
 	if (call->state == RXPERF_CALL_COMPLETE)
 		return;
-- 
GitLab


From 31d35a02ad5b803354fe0727686fcbace7a343fe Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 15 Dec 2022 16:20:55 +0000
Subject: [PATCH 4099/4122] rxrpc: Fix the return value of
 rxrpc_new_incoming_call()

Dan Carpenter sayeth[1]:

  The patch 5e6ef4f1017c: "rxrpc: Make the I/O thread take over the
  call and local processor work" from Jan 23, 2020, leads to the
  following Smatch static checker warning:

	net/rxrpc/io_thread.c:283 rxrpc_input_packet()
	warn: bool is not less than zero.

Fix this (for now) by changing rxrpc_new_incoming_call() to return an int
with 0 or error code rather than bool.  Note that the actual return value
of rxrpc_input_packet() is currently ignored.  I have a separate patch to
clean that up.

Fixes: 5e6ef4f1017c ("rxrpc: Make the I/O thread take over the call and local processor work")
Reported-by: Dan Carpenter <error27@gmail.com>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Link: http://lists.infradead.org/pipermail/linux-afs/2022-December/006123.html [1]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rxrpc/ar-internal.h |  6 +++---
 net/rxrpc/call_accept.c | 18 +++++++++---------
 net/rxrpc/io_thread.c   |  4 ++--
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5b732a4af009b..18092526d3c82 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -812,9 +812,9 @@ extern struct workqueue_struct *rxrpc_workqueue;
  */
 int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
 void rxrpc_discard_prealloc(struct rxrpc_sock *);
-bool rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
-			     struct rxrpc_connection *, struct sockaddr_rxrpc *,
-			     struct sk_buff *);
+int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
+			    struct rxrpc_connection *, struct sockaddr_rxrpc *,
+			    struct sk_buff *);
 void rxrpc_accept_incoming_calls(struct rxrpc_local *);
 int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
 
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index d1850863507f0..c02401656fa98 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  * If we want to report an error, we mark the skb with the packet type and
  * abort code and return false.
  */
-bool rxrpc_new_incoming_call(struct rxrpc_local *local,
-			     struct rxrpc_peer *peer,
-			     struct rxrpc_connection *conn,
-			     struct sockaddr_rxrpc *peer_srx,
-			     struct sk_buff *skb)
+int rxrpc_new_incoming_call(struct rxrpc_local *local,
+			    struct rxrpc_peer *peer,
+			    struct rxrpc_connection *conn,
+			    struct sockaddr_rxrpc *peer_srx,
+			    struct sk_buff *skb)
 {
 	const struct rxrpc_security *sec = NULL;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -342,7 +342,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
 	/* Don't set up a call for anything other than the first DATA packet. */
 	if (sp->hdr.seq != 1 ||
 	    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
-		return true; /* Just discard */
+		return 0; /* Just discard */
 
 	rcu_read_lock();
 
@@ -413,7 +413,7 @@ bool rxrpc_new_incoming_call(struct rxrpc_local *local,
 	_leave(" = %p{%d}", call, call->debug_id);
 	rxrpc_input_call_event(call, skb);
 	rxrpc_put_call(call, rxrpc_call_put_input);
-	return true;
+	return 0;
 
 unsupported_service:
 	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
@@ -425,10 +425,10 @@ no_call:
 reject:
 	rcu_read_unlock();
 	_leave(" = f [%u]", skb->mark);
-	return false;
+	return -EPROTO;
 discard:
 	rcu_read_unlock();
-	return true;
+	return 0;
 }
 
 /*
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index e6b9f0ceae17c..1ad067d66fb60 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -292,7 +292,7 @@ protocol_error:
 	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
 reject_packet:
 	rxrpc_reject_packet(local, skb);
-	return ret;
+	return 0;
 }
 
 /*
@@ -384,7 +384,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 		if (rxrpc_to_client(sp))
 			goto bad_message;
 		if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
-					    peer_srx, skb))
+					    peer_srx, skb) == 0)
 			return 0;
 		goto reject_packet;
 	}
-- 
GitLab


From 10073399cb5e389ab275bd1c9df4b486a2f0c9d4 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Mon, 19 Dec 2022 09:22:15 +0100
Subject: [PATCH 4100/4122] net: microchip: vcap: Fix initialization of value
 and mask

Fix the following smatch warning:

smatch warnings:
drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c:103 vcap_debugfs_show_rule_keyfield() error: uninitialized symbol 'value'.
drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c:106 vcap_debugfs_show_rule_keyfield() error: uninitialized symbol 'mask'.

In case the vcap field was VCAP_FIELD_U128 and the key was different
than IP6_S/DIP then the value and mask were not initialized, therefore
initialize them.

Fixes: 610c32b2ce66 ("net: microchip: vcap: Add vcap_get_rule")
Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <error27@gmail.com>
Reviewed-by: Saeed Mahameed <saeed@kernel.org>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
index 895bfff550d23..e0b206247f2eb 100644
--- a/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
+++ b/drivers/net/ethernet/microchip/vcap/vcap_api_debugfs.c
@@ -83,6 +83,8 @@ static void vcap_debugfs_show_rule_keyfield(struct vcap_control *vctrl,
 		hex = true;
 		break;
 	case VCAP_FIELD_U128:
+		value = data->u128.value;
+		mask = data->u128.mask;
 		if (key == VCAP_KF_L3_IP6_SIP || key == VCAP_KF_L3_IP6_DIP) {
 			u8 nvalue[16], nmask[16];
 
-- 
GitLab


From d83b950d44d2982c0e62e3d81b0f35ab09431008 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Sun, 18 Dec 2022 19:08:40 +0100
Subject: [PATCH 4101/4122] myri10ge: Fix an error handling path in
 myri10ge_probe()

Some memory allocated in myri10ge_probe_slices() is not released in the
error handling path of myri10ge_probe().

Add the corresponding kfree(), as already done in the remove function.

Fixes: 0dcffac1a329 ("myri10ge: add multislices support")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 8073d7a90a264..c5687d94ea885 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -3912,6 +3912,7 @@ abort_with_slices:
 	myri10ge_free_slices(mgp);
 
 abort_with_firmware:
+	kfree(mgp->msix_vectors);
 	myri10ge_dummy_rdma(mgp, 0);
 
 abort_with_ioremap:
-- 
GitLab


From e0c8bccd40fc1c19e1d246c39bcf79e357e1ada3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 16 Dec 2022 16:29:17 +0000
Subject: [PATCH 4102/4122] net: stream: purge sk_error_queue in
 sk_stream_kill_queues()

Changheon Lee reported TCP socket leaks, with a nice repro.

It seems we leak TCP sockets with the following sequence:

1) SOF_TIMESTAMPING_TX_ACK is enabled on the socket.

   Each ACK will cook an skb put in error queue, from __skb_tstamp_tx().
   __skb_tstamp_tx() is using skb_clone(), unless
   SOF_TIMESTAMPING_OPT_TSONLY was also requested.

2) If the application is also using MSG_ZEROCOPY, then we put in the
   error queue cloned skbs that had a struct ubuf_info attached to them.

   Whenever an struct ubuf_info is allocated, sock_zerocopy_alloc()
   does a sock_hold().

   As long as the cloned skbs are still in sk_error_queue,
   socket refcount is kept elevated.

3) Application closes the socket, while error queue is not empty.

Since tcp_close() no longer purges the socket error queue,
we might end up with a TCP socket with at least one skb in
error queue keeping the socket alive forever.

This bug can be (ab)used to consume all kernel memory
and freeze the host.

We need to purge the error queue, with proper synchronization
against concurrent writers.

Fixes: 24bcbe1cc69f ("net: stream: don't purge sk_error_queue in sk_stream_kill_queues()")
Reported-by: Changheon Lee <darklight2357@icloud.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/stream.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/core/stream.c b/net/core/stream.c
index 5b1fe2b82eac7..cd06750dd3297 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -196,6 +196,12 @@ void sk_stream_kill_queues(struct sock *sk)
 	/* First the read buffer. */
 	__skb_queue_purge(&sk->sk_receive_queue);
 
+	/* Next, the error queue.
+	 * We need to use queue lock, because other threads might
+	 * add packets to the queue without socket lock being held.
+	 */
+	skb_queue_purge(&sk->sk_error_queue);
+
 	/* Next, the write queue. */
 	WARN_ON_ONCE(!skb_queue_empty(&sk->sk_write_queue));
 
-- 
GitLab


From 1b0c84a32e37cf85d552261005091eb695313f38 Mon Sep 17 00:00:00 2001
From: Huanhuan Wang <huanhuan.wang@corigine.com>
Date: Fri, 16 Dec 2022 15:31:01 +0100
Subject: [PATCH 4103/4122] nfp: fix unaligned io read of capabilities word
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The address of 32-bit extend capability is not qword aligned,
and may cause exception in some arch.

Fixes: 484963ce9f1e ("nfp: extend capability and control words")
Signed-off-by: Huanhuan Wang <huanhuan.wang@corigine.com>
Reviewed-by: Niklas Söderlund <niklas.soderlund@corigine.com>
Signed-off-by: Simon Horman <simon.horman@corigine.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 2314cf55e8213..09053373288fe 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -2509,7 +2509,7 @@ static int nfp_net_read_caps(struct nfp_net *nn)
 {
 	/* Get some of the read-only fields from the BAR */
 	nn->cap = nn_readl(nn, NFP_NET_CFG_CAP);
-	nn->cap_w1 = nn_readq(nn, NFP_NET_CFG_CAP_WORD1);
+	nn->cap_w1 = nn_readl(nn, NFP_NET_CFG_CAP_WORD1);
 	nn->max_mtu = nn_readl(nn, NFP_NET_CFG_MAX_MTU);
 
 	/* ABI 4.x and ctrl vNIC always use chained metadata, in other cases
-- 
GitLab


From 2856a62762c8409e360d4fd452194c8e57ba1058 Mon Sep 17 00:00:00 2001
From: Jeremy Kerr <jk@codeconstruct.com.au>
Date: Fri, 16 Dec 2022 11:44:09 +0800
Subject: [PATCH 4104/4122] mctp: serial: Fix starting value for frame check
 sequence

RFC1662 defines the start state for the crc16 FCS to be 0xffff, but
we're currently starting at zero.

This change uses the correct start state. We're only early in the
adoption for the serial binding, so there aren't yet any other users to
interface to.

Fixes: a0c2ccd9b5ad ("mctp: Add MCTP-over-serial transport binding")
Reported-by: Harsh Tyagi <harshtya@google.com>
Tested-by: Harsh Tyagi <harshtya@google.com>
Signed-off-by: Jeremy Kerr <jk@codeconstruct.com.au>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/mctp/mctp-serial.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/mctp/mctp-serial.c b/drivers/net/mctp/mctp-serial.c
index 7cd103fd34ef7..9f9eaf896047c 100644
--- a/drivers/net/mctp/mctp-serial.c
+++ b/drivers/net/mctp/mctp-serial.c
@@ -35,6 +35,8 @@
 #define BYTE_FRAME		0x7e
 #define BYTE_ESC		0x7d
 
+#define FCS_INIT		0xffff
+
 static DEFINE_IDA(mctp_serial_ida);
 
 enum mctp_serial_state {
@@ -123,7 +125,7 @@ static void mctp_serial_tx_work(struct work_struct *work)
 		buf[2] = dev->txlen;
 
 		if (!dev->txpos)
-			dev->txfcs = crc_ccitt(0, buf + 1, 2);
+			dev->txfcs = crc_ccitt(FCS_INIT, buf + 1, 2);
 
 		txlen = write_chunk(dev, buf + dev->txpos, 3 - dev->txpos);
 		if (txlen <= 0) {
@@ -303,7 +305,7 @@ static void mctp_serial_push_header(struct mctp_serial *dev, unsigned char c)
 	case 1:
 		if (c == MCTP_SERIAL_VERSION) {
 			dev->rxpos++;
-			dev->rxfcs = crc_ccitt_byte(0, c);
+			dev->rxfcs = crc_ccitt_byte(FCS_INIT, c);
 		} else {
 			dev->rxstate = STATE_ERR;
 		}
-- 
GitLab


From 115dd5469019296040359060743de77071ccb6ec Mon Sep 17 00:00:00 2001
From: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Date: Tue, 13 Dec 2022 14:11:36 +0900
Subject: [PATCH 4105/4122] Documentation: devlink: add missing toc entry for
 etas_es58x devlink doc

toc entry is missing for etas_es58x devlink doc and triggers this warning:

  Documentation/networking/devlink/etas_es58x.rst: WARNING: document isn't included in any toctree

Add the missing toc entry.

Fixes: 9f63f96aac92 ("Documentation: devlink: add devlink documentation for the etas_es58x driver")
Signed-off-by: Vincent Mailhol <mailhol.vincent@wanadoo.fr>
Link: https://lore.kernel.org/all/20221213051136.721887-1-mailhol.vincent@wanadoo.fr
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: kernel test robot <lkp@intel.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/devlink/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst
index 4b653d0406276..fee4d3968309b 100644
--- a/Documentation/networking/devlink/index.rst
+++ b/Documentation/networking/devlink/index.rst
@@ -50,6 +50,7 @@ parameters, info versions, and other features it supports.
    :maxdepth: 1
 
    bnxt
+   etas_es58x
    hns3
    ionic
    ice
-- 
GitLab


From 3bc2afcba81275306adfbfca83f38a52858c5940 Mon Sep 17 00:00:00 2001
From: Haibo Chen <haibo.chen@nxp.com>
Date: Tue, 13 Dec 2022 17:43:51 +0800
Subject: [PATCH 4106/4122] can: flexcan: avoid unbalanced pm_runtime_enable
 warning

When do suspend/resume, meet the following warning message:
[   30.028336] flexcan 425b0000.can: Unbalanced pm_runtime_enable!

Balance the pm_runtime_force_suspend() and pm_runtime_force_resume().

Fixes: 8cb53b485f18 ("can: flexcan: add auto stop mode for IMX93 to support wakeup")
Signed-off-by: Haibo Chen <haibo.chen@nxp.com>
Link: https://lore.kernel.org/all/20221213094351.3023858-1-haibo.chen@nxp.com
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/flexcan/flexcan-core.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/can/flexcan/flexcan-core.c b/drivers/net/can/flexcan/flexcan-core.c
index 0aeff34e5ae17..6d638c93977bc 100644
--- a/drivers/net/can/flexcan/flexcan-core.c
+++ b/drivers/net/can/flexcan/flexcan-core.c
@@ -2349,9 +2349,15 @@ static int __maybe_unused flexcan_noirq_resume(struct device *device)
 	if (netif_running(dev)) {
 		int err;
 
-		err = pm_runtime_force_resume(device);
-		if (err)
-			return err;
+		/* For the wakeup in auto stop mode, no need to gate on the
+		 * clock here, hardware will do this automatically.
+		 */
+		if (!(device_may_wakeup(device) &&
+		      priv->devtype_data.quirks & FLEXCAN_QUIRK_AUTO_STOP_MODE)) {
+			err = pm_runtime_force_resume(device);
+			if (err)
+				return err;
+		}
 
 		if (device_may_wakeup(device))
 			flexcan_enable_wakeup_irq(priv, false);
-- 
GitLab


From f006229135b7debf4037adb1eb93e358559593db Mon Sep 17 00:00:00 2001
From: Marc Kleine-Budde <mkl@pengutronix.de>
Date: Mon, 19 Dec 2022 11:39:27 +0100
Subject: [PATCH 4107/4122] can: kvaser_usb: hydra: help gcc-13 to figure out
 cmd_len
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Debian's gcc-13 [1] throws the following error in
kvaser_usb_hydra_cmd_size():

[1] gcc version 13.0.0 20221214 (experimental) [master r13-4693-g512098a3316] (Debian 13-20221214-1)

| drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c:502:65: error:
| array subscript ‘struct kvaser_cmd_ext[0]’ is partly outside array
| bounds of ‘unsigned char[32]’ [-Werror=array-bounds=]
|   502 |                 ret = le16_to_cpu(((struct kvaser_cmd_ext *)cmd)->len);

kvaser_usb_hydra_cmd_size() returns the size of given command. It
depends on the command number (cmd->header.cmd_no). For extended
commands (cmd->header.cmd_no == CMD_EXTENDED) the above shown code is
executed.

Help gcc to recognize that this code path is not taken in all cases,
by calling kvaser_usb_hydra_cmd_size() directly after assigning the
command number.

Fixes: aec5fb2268b7 ("can: kvaser_usb: Add support for Kvaser USB hydra family")
Cc: Jimmy Assarsson <extja@kvaser.com>
Cc: Anssi Hannula <anssi.hannula@bitwise.fi>
Link: https://lore.kernel.org/all/20221219110104.1073881-1-mkl@pengutronix.de
Tested-by: Jimmy Assarsson <extja@kvaser.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 .../net/can/usb/kvaser_usb/kvaser_usb_hydra.c | 33 ++++++++++++++-----
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
index f688124d6d669..ef341c4254fc8 100644
--- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
+++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c
@@ -545,6 +545,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
 					    u8 cmd_no, int channel)
 {
 	struct kvaser_cmd *cmd;
+	size_t cmd_len;
 	int err;
 
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
@@ -552,6 +553,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = cmd_no;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	if (channel < 0) {
 		kvaser_usb_hydra_set_cmd_dest_he
 				(cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
@@ -568,7 +570,7 @@ static int kvaser_usb_hydra_send_simple_cmd(struct kvaser_usb *dev,
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 	if (err)
 		goto end;
 
@@ -584,6 +586,7 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv,
 {
 	struct kvaser_cmd *cmd;
 	struct kvaser_usb *dev = priv->dev;
+	size_t cmd_len;
 	int err;
 
 	cmd = kzalloc(sizeof(*cmd), GFP_ATOMIC);
@@ -591,14 +594,14 @@ kvaser_usb_hydra_send_simple_cmd_async(struct kvaser_usb_net_priv *priv,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = cmd_no;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 
 	kvaser_usb_hydra_set_cmd_dest_he
 		(cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd_async(priv, cmd,
-					kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd_async(priv, cmd, cmd_len);
 	if (err)
 		kfree(cmd);
 
@@ -742,6 +745,7 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev,
 {
 	struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
 	struct kvaser_cmd *cmd;
+	size_t cmd_len;
 	u32 value = 0;
 	u32 mask = 0;
 	u16 cap_cmd_res;
@@ -753,13 +757,14 @@ static int kvaser_usb_hydra_get_single_capability(struct kvaser_usb *dev,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_GET_CAPABILITIES_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	cmd->cap_req.cap_cmd = cpu_to_le16(cap_cmd_req);
 
 	kvaser_usb_hydra_set_cmd_dest_he(cmd, card_data->hydra.sysdbg_he);
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 	if (err)
 		goto end;
 
@@ -1578,6 +1583,7 @@ static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv,
 	struct kvaser_usb *dev = priv->dev;
 	struct kvaser_usb_net_hydra_priv *hydra = priv->sub_priv;
 	struct kvaser_cmd *cmd;
+	size_t cmd_len;
 	int err;
 
 	if (!hydra)
@@ -1588,6 +1594,7 @@ static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_GET_BUSPARAMS_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	kvaser_usb_hydra_set_cmd_dest_he
 		(cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
 	kvaser_usb_hydra_set_cmd_transid
@@ -1597,7 +1604,7 @@ static int kvaser_usb_hydra_get_busparams(struct kvaser_usb_net_priv *priv,
 
 	reinit_completion(&priv->get_busparams_comp);
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 	if (err)
 		return err;
 
@@ -1624,6 +1631,7 @@ static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev,
 	struct kvaser_cmd *cmd;
 	struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
 	struct kvaser_usb *dev = priv->dev;
+	size_t cmd_len;
 	int err;
 
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
@@ -1631,6 +1639,7 @@ static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_SET_BUSPARAMS_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	memcpy(&cmd->set_busparams_req.busparams_nominal, busparams,
 	       sizeof(cmd->set_busparams_req.busparams_nominal));
 
@@ -1639,7 +1648,7 @@ static int kvaser_usb_hydra_set_bittiming(const struct net_device *netdev,
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 
 	kfree(cmd);
 
@@ -1652,6 +1661,7 @@ static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev,
 	struct kvaser_cmd *cmd;
 	struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
 	struct kvaser_usb *dev = priv->dev;
+	size_t cmd_len;
 	int err;
 
 	cmd = kzalloc(sizeof(*cmd), GFP_KERNEL);
@@ -1659,6 +1669,7 @@ static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev,
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_SET_BUSPARAMS_FD_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	memcpy(&cmd->set_busparams_req.busparams_data, busparams,
 	       sizeof(cmd->set_busparams_req.busparams_data));
 
@@ -1676,7 +1687,7 @@ static int kvaser_usb_hydra_set_data_bittiming(const struct net_device *netdev,
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 
 	kfree(cmd);
 
@@ -1804,6 +1815,7 @@ static int kvaser_usb_hydra_get_software_info(struct kvaser_usb *dev)
 static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
 {
 	struct kvaser_cmd *cmd;
+	size_t cmd_len;
 	int err;
 	u32 flags;
 	struct kvaser_usb_dev_card_data *card_data = &dev->card_data;
@@ -1813,6 +1825,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_GET_SOFTWARE_DETAILS_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	cmd->sw_detail_req.use_ext_cmd = 1;
 	kvaser_usb_hydra_set_cmd_dest_he
 				(cmd, KVASER_USB_HYDRA_HE_ADDRESS_ILLEGAL);
@@ -1820,7 +1833,7 @@ static int kvaser_usb_hydra_get_software_details(struct kvaser_usb *dev)
 	kvaser_usb_hydra_set_cmd_transid
 				(cmd, kvaser_usb_hydra_get_next_transid(dev));
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 	if (err)
 		goto end;
 
@@ -1938,6 +1951,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
 {
 	struct kvaser_usb *dev = priv->dev;
 	struct kvaser_cmd *cmd;
+	size_t cmd_len;
 	int err;
 
 	if ((priv->can.ctrlmode &
@@ -1953,6 +1967,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
 		return -ENOMEM;
 
 	cmd->header.cmd_no = CMD_SET_DRIVERMODE_REQ;
+	cmd_len = kvaser_usb_hydra_cmd_size(cmd);
 	kvaser_usb_hydra_set_cmd_dest_he
 		(cmd, dev->card_data.hydra.channel_to_he[priv->channel]);
 	kvaser_usb_hydra_set_cmd_transid
@@ -1962,7 +1977,7 @@ static int kvaser_usb_hydra_set_opt_mode(const struct kvaser_usb_net_priv *priv)
 	else
 		cmd->set_ctrlmode.mode = KVASER_USB_HYDRA_CTRLMODE_NORMAL;
 
-	err = kvaser_usb_send_cmd(dev, cmd, kvaser_usb_hydra_cmd_size(cmd));
+	err = kvaser_usb_send_cmd(dev, cmd, cmd_len);
 	kfree(cmd);
 
 	return err;
-- 
GitLab


From 4934fbfb3ff09b8500f63d4624ed8b41647bb822 Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Fri, 21 Oct 2022 10:03:52 +0200
Subject: [PATCH 4108/4122] parisc: Show MPE/iX model string at bootup

Some (mostly 64-bit machines) machines allow to run MPE/iX and report the MPE
model string via firmware call. Enhance the pdc_model_sysmodel() function to
report that model string.
Note that some 32-bit machines like the B160L wrongly report success for the
firmware call, so include a check to prevent showing wrong info.

Signed-off-by: Helge Deller <deller@gmx.de>
---
 arch/parisc/include/asm/pdc.h  | 2 +-
 arch/parisc/kernel/firmware.c  | 4 ++--
 arch/parisc/kernel/processor.c | 9 +++++++--
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/parisc/include/asm/pdc.h b/arch/parisc/include/asm/pdc.h
index fcbcf9a96c111..40793bef8429f 100644
--- a/arch/parisc/include/asm/pdc.h
+++ b/arch/parisc/include/asm/pdc.h
@@ -37,7 +37,7 @@ int pdc_system_map_find_mods(struct pdc_system_map_mod_info *pdc_mod_info,
 int pdc_system_map_find_addrs(struct pdc_system_map_addr_info *pdc_addr_info,
 			      long mod_index, long addr_index);
 int pdc_model_info(struct pdc_model *model);
-int pdc_model_sysmodel(char *name);
+int pdc_model_sysmodel(unsigned int os_id, char *name);
 int pdc_model_cpuid(unsigned long *cpu_id);
 int pdc_model_versions(unsigned long *versions, int id);
 int pdc_model_capabilities(unsigned long *capabilities);
diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c
index b31775928bb02..4dfe1f49c5c8b 100644
--- a/arch/parisc/kernel/firmware.c
+++ b/arch/parisc/kernel/firmware.c
@@ -527,14 +527,14 @@ int pdc_model_info(struct pdc_model *model)
  * Using OS_ID_HPUX will return the equivalent of the 'modelname' command
  * on HP/UX.
  */
-int pdc_model_sysmodel(char *name)
+int pdc_model_sysmodel(unsigned int os_id, char *name)
 {
         int retval;
 	unsigned long flags;
 
         spin_lock_irqsave(&pdc_lock, flags);
         retval = mem_pdc_call(PDC_MODEL, PDC_MODEL_SYSMODEL, __pa(pdc_result),
-                              OS_ID_HPUX, __pa(name));
+                              os_id, __pa(name));
         convert_to_wide(pdc_result);
 
         if (retval == PDC_OK) {
diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c
index dddaaa6e7a825..ba07e760d3c76 100644
--- a/arch/parisc/kernel/processor.c
+++ b/arch/parisc/kernel/processor.c
@@ -272,10 +272,15 @@ void __init collect_boot_cpu_data(void)
 		printk(KERN_INFO "capabilities 0x%lx\n",
 			boot_cpu_data.pdc.capabilities);
 
-	if (pdc_model_sysmodel(boot_cpu_data.pdc.sys_model_name) == PDC_OK)
-		printk(KERN_INFO "model %s\n",
+	if (pdc_model_sysmodel(OS_ID_HPUX, boot_cpu_data.pdc.sys_model_name) == PDC_OK)
+		pr_info("HP-UX model name: %s\n",
 			boot_cpu_data.pdc.sys_model_name);
 
+	serial_no[0] = 0;
+	if (pdc_model_sysmodel(OS_ID_MPEXL, serial_no) == PDC_OK &&
+		serial_no[0])
+		pr_info("MPE/iX model name: %s\n", serial_no);
+
 	dump_stack_set_arch_desc("%s", boot_cpu_data.pdc.sys_model_name);
 
 	boot_cpu_data.hversion =  boot_cpu_data.pdc.model.hversion;
-- 
GitLab


From 2b76cfe190305fe02d8120df64f2a1bb6a3d3889 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Date: Sat, 10 Dec 2022 12:33:46 +0100
Subject: [PATCH 4109/4122] ARM: dts: spear: drop 0x from unit address

By coding style, unit address should not start with 0x.

Signed-off-by: Krzysztof Kozlowski <krzysztof.kozlowski@linaro.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Link: https://lore.kernel.org/r/20221210113347.63939-1-krzysztof.kozlowski@linaro.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/boot/dts/spear300.dtsi | 2 +-
 arch/arm/boot/dts/spear310.dtsi | 2 +-
 arch/arm/boot/dts/spear320.dtsi | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index b39bd5a226276..f1135e887f7b8 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -46,7 +46,7 @@
 			status = "disabled";
 		};
 
-		shirq: interrupt-controller@0x50000000 {
+		shirq: interrupt-controller@50000000 {
 			compatible = "st,spear300-shirq";
 			reg = <0x50000000 0x1000>;
 			interrupts = <28>;
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index 77570833d46b2..ce08d88209403 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -34,7 +34,7 @@
 			status = "disabled";
 		};
 
-		shirq: interrupt-controller@0xb4000000 {
+		shirq: interrupt-controller@b4000000 {
 			compatible = "st,spear310-shirq";
 			reg = <0xb4000000 0x1000>;
 			interrupts = <28 29 30 1>;
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index b12474446a487..56f141297ea35 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -49,7 +49,7 @@
 			status = "disabled";
 		};
 
-		shirq: interrupt-controller@0xb3000000 {
+		shirq: interrupt-controller@b3000000 {
 			compatible = "st,spear320-shirq";
 			reg = <0xb3000000 0x1000>;
 			interrupts = <30 28 29 1>;
-- 
GitLab


From 4c03c4188cfb831e4ac093599192aedd60625a45 Mon Sep 17 00:00:00 2001
From: Corentin Labbe <clabbe@baylibre.com>
Date: Wed, 14 Dec 2022 21:42:23 +0100
Subject: [PATCH 4110/4122] MAINTAINERS: add related dts to IXP4xx

get_maintainer.pl currently give no maintainer on all IXP4xx DTS files.
Add them to the set of files handled by IXP4xx maintainers.

Signed-off-by: Corentin Labbe <clabbe@baylibre.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Link: https://lore.kernel.org/r/20221005105734.3513581-1-clabbe@baylibre.com
Link: https://lore.kernel.org/r/20221214204223.177807-1-linus.walleij@linaro.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 886d3f69ee644..ea6c427e30879 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2266,6 +2266,7 @@ F:	Documentation/devicetree/bindings/bus/intel,ixp4xx-expansion-bus-controller.y
 F:	Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt
 F:	Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
 F:	Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
+F:	arch/arm/boot/dts/intel-ixp*
 F:	arch/arm/mach-ixp4xx/
 F:	drivers/bus/intel-ixp4xx-eb.c
 F:	drivers/clocksource/timer-ixp4xx.c
-- 
GitLab


From 4b88615950fc805690b92b46c8ab794beb4bd6aa Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Dec 2022 17:25:14 +0100
Subject: [PATCH 4111/4122] ARM: pxa: fix building with clang

The integrated assembler in clang does not understand the xscale
specific mra/mar instructions:

arch/arm/mach-pxa/pxa27x.c:136:15: error: unsupported architectural extension: xscale
        asm volatile(".arch_extension xscale\n\t"
arch/arm/mach-pxa/pxa27x.c:136:40: error: invalid instruction, did you mean: mcr, mla, mrc, mrs, msr?
        mra r2, r3, acc0

Since these are coprocessor features, the same can be expressed using
mrrc/mcrr, so use that for builds with IAS.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20221215162529.3659187-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-pxa/pxa27x.c | 8 ++++++++
 arch/arm/mach-pxa/pxa3xx.c | 8 ++++++++
 2 files changed, 16 insertions(+)

diff --git a/arch/arm/mach-pxa/pxa27x.c b/arch/arm/mach-pxa/pxa27x.c
index afbf6ace954fe..eea507fd5095f 100644
--- a/arch/arm/mach-pxa/pxa27x.c
+++ b/arch/arm/mach-pxa/pxa27x.c
@@ -133,8 +133,12 @@ void pxa27x_cpu_pm_enter(suspend_state_t state)
 #ifndef CONFIG_IWMMXT
 	u64 acc0;
 
+#ifndef CONFIG_AS_IS_LLVM
 	asm volatile(".arch_extension xscale\n\t"
 		     "mra %Q0, %R0, acc0" : "=r" (acc0));
+#else
+	asm volatile("mrrc p0, 0, %Q0, %R0, c0" : "=r" (acc0));
+#endif
 #endif
 
 	/* ensure voltage-change sequencer not initiated, which hangs */
@@ -153,8 +157,12 @@ void pxa27x_cpu_pm_enter(suspend_state_t state)
 	case PM_SUSPEND_MEM:
 		cpu_suspend(pwrmode, pxa27x_finish_suspend);
 #ifndef CONFIG_IWMMXT
+#ifndef CONFIG_AS_IS_LLVM
 		asm volatile(".arch_extension xscale\n\t"
 			     "mar acc0, %Q0, %R0" : "=r" (acc0));
+#else
+		asm volatile("mcrr p0, 0, %Q0, %R0, c0" :: "r" (acc0));
+#endif
 #endif
 		break;
 	}
diff --git a/arch/arm/mach-pxa/pxa3xx.c b/arch/arm/mach-pxa/pxa3xx.c
index 979642aa7ffe0..b26f00fc75d54 100644
--- a/arch/arm/mach-pxa/pxa3xx.c
+++ b/arch/arm/mach-pxa/pxa3xx.c
@@ -108,8 +108,12 @@ static void pxa3xx_cpu_pm_suspend(void)
 #ifndef CONFIG_IWMMXT
 	u64 acc0;
 
+#ifdef CONFIG_CC_IS_GCC
 	asm volatile(".arch_extension xscale\n\t"
 		     "mra %Q0, %R0, acc0" : "=r" (acc0));
+#else
+	asm volatile("mrrc p0, 0, %Q0, %R0, c0" : "=r" (acc0));
+#endif
 #endif
 
 	/* resuming from D2 requires the HSIO2/BOOT/TPM clocks enabled */
@@ -137,8 +141,12 @@ static void pxa3xx_cpu_pm_suspend(void)
 	AD3ER = 0;
 
 #ifndef CONFIG_IWMMXT
+#ifndef CONFIG_AS_IS_LLVM
 	asm volatile(".arch_extension xscale\n\t"
 		     "mar acc0, %Q0, %R0" : "=r" (acc0));
+#else
+	asm volatile("mcrr p0, 0, %Q0, %R0, c0" :: "r" (acc0));
+#endif
 #endif
 }
 
-- 
GitLab


From 6a7ee50f8f56dc181e1150cc101896053b02d220 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Dec 2022 17:26:20 +0100
Subject: [PATCH 4112/4122] ARM: disallow pre-ARMv5 builds with ld.lld

lld cannot build for ARMv4/v4T targets because it inserts 'blx' instructions
that are unsupported there:

  ld.lld: warning: lld uses blx instruction, no object with architecture supporting feature detected

Add a Kconfig time dependency to prevent those targets from being
selected in randconfig builds.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Link: https://github.com/llvm/llvm-project/issues/50764
Link: https://github.com/ClangBuiltLinux/linux/issues/964
Link: https://lore.kernel.org/r/20221215162635.3750763-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/Kconfig | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index a08c9d092a332..349af93213342 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -342,12 +342,14 @@ comment "CPU Core family selection"
 config ARCH_MULTI_V4
 	bool "ARMv4 based platforms (FA526, StrongARM)"
 	depends on !ARCH_MULTI_V6_V7
+	depends on !LD_IS_LLD
 	select ARCH_MULTI_V4_V5
 	select CPU_FA526 if !(CPU_SA110 || CPU_SA1100)
 
 config ARCH_MULTI_V4T
 	bool "ARMv4T based platforms (ARM720T, ARM920T, ...)"
 	depends on !ARCH_MULTI_V6_V7
+	depends on !LD_IS_LLD
 	select ARCH_MULTI_V4_V5
 	select CPU_ARM920T if !(CPU_ARM7TDMI || CPU_ARM720T || \
 		CPU_ARM740T || CPU_ARM9TDMI || CPU_ARM922T || \
-- 
GitLab


From ba4b4d0293ed12ec2eda09e0329d9831243ca699 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 15 Dec 2022 17:53:21 +0100
Subject: [PATCH 4113/4122] soc: tegra: fix CPU_BIG_ENDIAN dependencies

My previous patch to prevent BPMP from being enabled on big
endian kernels caused a build regression:

WARNING: unmet direct dependencies detected for TEGRA_BPMP
  Depends on [n]: ARCH_TEGRA [=y] && TEGRA_HSP_MBOX [=y] && TEGRA_IVC [=y] && !CPU_BIG_ENDIAN [=y]
  Selected by [y]:
  - ARCH_TEGRA_186_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y]
  - ARCH_TEGRA_194_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y]
  - ARCH_TEGRA_234_SOC [=y] && ARCH_TEGRA [=y] && ARM64 [=y]

Add even more such dependencies for the SoC types that use
the BPMP driver.

Fixes: 4ddb1bf1a837 ("tegra: mark BPMP driver as little-endian only")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Thierry Reding <treding@nvidia.com>
Link: https://lore.kernel.org/r/20221215165336.1781080-1-arnd@kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 drivers/soc/tegra/Kconfig | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/soc/tegra/Kconfig b/drivers/soc/tegra/Kconfig
index d1ecadffa1bba..ba5a81fc813c6 100644
--- a/drivers/soc/tegra/Kconfig
+++ b/drivers/soc/tegra/Kconfig
@@ -95,6 +95,7 @@ config ARCH_TEGRA_210_SOC
 
 config ARCH_TEGRA_186_SOC
 	bool "NVIDIA Tegra186 SoC"
+	depends on !CPU_BIG_ENDIAN
 	select MAILBOX
 	select TEGRA_BPMP
 	select TEGRA_HSP_MBOX
@@ -110,6 +111,7 @@ config ARCH_TEGRA_186_SOC
 
 config ARCH_TEGRA_194_SOC
 	bool "NVIDIA Tegra194 SoC"
+	depends on !CPU_BIG_ENDIAN
 	select MAILBOX
 	select PINCTRL_TEGRA194
 	select TEGRA_BPMP
@@ -121,6 +123,7 @@ config ARCH_TEGRA_194_SOC
 
 config ARCH_TEGRA_234_SOC
 	bool "NVIDIA Tegra234 SoC"
+	depends on !CPU_BIG_ENDIAN
 	select MAILBOX
 	select TEGRA_BPMP
 	select TEGRA_HSP_MBOX
-- 
GitLab


From 62e027fb0e5293d95e8d36655757ef4687c8795d Mon Sep 17 00:00:00 2001
From: Arun Ramadoss <arun.ramadoss@microchip.com>
Date: Tue, 13 Dec 2022 15:44:40 +0530
Subject: [PATCH 4114/4122] net: dsa: microchip: remove IRQF_TRIGGER_FALLING in
 request_threaded_irq

KSZ swithes used interrupts for detecting the phy link up and down.
During registering the interrupt handler, it used IRQF_TRIGGER_FALLING
flag. But this flag has to be retrieved from device tree instead of hard
coding in the driver, so removing the flag.

Fixes: ff319a644829 ("net: dsa: microchip: move interrupt handling logic from lan937x to ksz_common")
Reported-by: Christian Eggers <ceggers@arri.de>
Signed-off-by: Arun Ramadoss <arun.ramadoss@microchip.com>
Link: https://lore.kernel.org/r/20221213101440.24667-1-arun.ramadoss@microchip.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/microchip/ksz_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
index 423f944cc34c3..9b20c2ee6d62a 100644
--- a/drivers/net/dsa/microchip/ksz_common.c
+++ b/drivers/net/dsa/microchip/ksz_common.c
@@ -1996,8 +1996,7 @@ static int ksz_irq_common_setup(struct ksz_device *dev, struct ksz_irq *kirq)
 		irq_create_mapping(kirq->domain, n);
 
 	ret = request_threaded_irq(kirq->irq_num, NULL, ksz_irq_thread_fn,
-				   IRQF_ONESHOT | IRQF_TRIGGER_FALLING,
-				   kirq->name, kirq);
+				   IRQF_ONESHOT, kirq->name, kirq);
 	if (ret)
 		goto out;
 
-- 
GitLab


From b389a902dd5be4ece505a2e0463b9b034de04bf5 Mon Sep 17 00:00:00 2001
From: Matt Johnston <matt@codeconstruct.com.au>
Date: Thu, 15 Dec 2022 13:49:33 +0800
Subject: [PATCH 4115/4122] mctp: Remove device type check at unregister

The unregister check could be incorrectly triggered if a netdev
changes its type after register. That is possible for a tun device
using TUNSETLINK ioctl, resulting in mctp unregister failing
and the netdev unregister waiting forever.

This was encountered by https://github.com/openthread/openthread/issues/8523

Neither check at register or unregister is required. They were added in
an attempt to track down mctp_ptr being set unexpectedly, which should
not happen in normal operation.

Fixes: 7b1871af75f3 ("mctp: Warn if pointer is set for a wrong dev type")
Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
Link: https://lore.kernel.org/r/20221215054933.2403401-1-matt@codeconstruct.com.au
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/mctp/device.c | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/net/mctp/device.c b/net/mctp/device.c
index 99a3bda8852f8..acb97b2574289 100644
--- a/net/mctp/device.c
+++ b/net/mctp/device.c
@@ -429,12 +429,6 @@ static void mctp_unregister(struct net_device *dev)
 	struct mctp_dev *mdev;
 
 	mdev = mctp_dev_get_rtnl(dev);
-	if (mdev && !mctp_known(dev)) {
-		// Sanity check, should match what was set in mctp_register
-		netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
-			    __func__, dev->type);
-		return;
-	}
 	if (!mdev)
 		return;
 
@@ -451,14 +445,8 @@ static int mctp_register(struct net_device *dev)
 	struct mctp_dev *mdev;
 
 	/* Already registered? */
-	mdev = rtnl_dereference(dev->mctp_ptr);
-
-	if (mdev) {
-		if (!mctp_known(dev))
-			netdev_warn(dev, "%s: BUG mctp_ptr set for unknown type %d",
-				    __func__, dev->type);
+	if (rtnl_dereference(dev->mctp_ptr))
 		return 0;
-	}
 
 	/* only register specific types */
 	if (!mctp_known(dev))
-- 
GitLab


From fb87bd47516d9a26b6d549231aa743b20fd4a569 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Fri, 16 Dec 2022 07:45:26 -0500
Subject: [PATCH 4116/4122] net: Introduce sk_use_task_frag in struct sock.

Sockets that can be used while recursing into memory reclaim, like
those used by network block devices and file systems, mustn't use
current->task_frag: if the current process is already using it, then
the inner memory reclaim call would corrupt the task_frag structure.

To avoid this, sk_page_frag() uses ->sk_allocation to detect sockets
that mustn't use current->task_frag, assuming that those used during
memory reclaim had their allocation constraints reflected in
->sk_allocation.

This unfortunately doesn't cover all cases: in an attempt to remove all
usage of GFP_NOFS and GFP_NOIO, sunrpc stopped setting these flags in
->sk_allocation, and used memalloc_nofs critical sections instead.
This breaks the sk_page_frag() heuristic since the allocation
constraints are now stored in current->flags, which sk_page_frag()
can't read without risking triggering a cache miss and slowing down
TCP's fast path.

This patch creates a new field in struct sock, named sk_use_task_frag,
which sockets with memory reclaim constraints can set to false if they
can't safely use current->task_frag. In such cases, sk_page_frag() now
always returns the socket's page_frag (->sk_frag). The first user is
sunrpc, which needs to avoid using current->task_frag but can keep
->sk_allocation set to GFP_KERNEL otherwise.

Eventually, it might be possible to simplify sk_page_frag() by only
testing ->sk_use_task_frag and avoid relying on the ->sk_allocation
heuristic entirely (assuming other sockets will set ->sk_use_task_frag
according to their constraints in the future).

The new ->sk_use_task_frag field is placed in a hole in struct sock and
belongs to a cache line shared with ->sk_shutdown. Therefore it should
be hot and shouldn't have negative performance impacts on TCP's fast
path (sk_shutdown is tested just before the while() loop in
tcp_sendmsg_locked()).

Link: https://lore.kernel.org/netdev/b4d8cb09c913d3e34f853736f3f5628abfd7f4b6.1656699567.git.gnault@redhat.com/
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 11 +++++++++--
 net/core/sock.c    |  1 +
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index ecea3dcc2217b..fefe1f4abf191 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -318,6 +318,9 @@ struct sk_filter;
   *	@sk_stamp: time stamp of last packet received
   *	@sk_stamp_seq: lock for accessing sk_stamp on 32 bit architectures only
   *	@sk_tsflags: SO_TIMESTAMPING flags
+  *	@sk_use_task_frag: allow sk_page_frag() to use current->task_frag.
+  *			   Sockets that can be used under memory reclaim should
+  *			   set this to false.
   *	@sk_bind_phc: SO_TIMESTAMPING bind PHC index of PTP virtual clock
   *	              for timestamping
   *	@sk_tskey: counter to disambiguate concurrent tstamp requests
@@ -512,6 +515,7 @@ struct sock {
 	u8			sk_txtime_deadline_mode : 1,
 				sk_txtime_report_errors : 1,
 				sk_txtime_unused : 6;
+	bool			sk_use_task_frag;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
@@ -2561,14 +2565,17 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
  * socket operations and end up recursing into sk_page_frag()
  * while it's already in use: explicitly avoid task page_frag
  * usage if the caller is potentially doing any of them.
- * This assumes that page fault handlers use the GFP_NOFS flags.
+ * This assumes that page fault handlers use the GFP_NOFS flags or
+ * explicitly disable sk_use_task_frag.
  *
  * Return: a per task page_frag if context allows that,
  * otherwise a per socket one.
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	if ((sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC | __GFP_FS)) ==
+	if (sk->sk_use_task_frag &&
+	    (sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC |
+				  __GFP_FS)) ==
 	    (__GFP_DIRECT_RECLAIM | __GFP_FS))
 		return &current->task_frag;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index d2587d8712dba..f954d5893e799 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3390,6 +3390,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	sk->sk_rcvbuf		=	READ_ONCE(sysctl_rmem_default);
 	sk->sk_sndbuf		=	READ_ONCE(sysctl_wmem_default);
 	sk->sk_state		=	TCP_CLOSE;
+	sk->sk_use_task_frag	=	true;
 	sk_set_socket(sk, sock);
 
 	sock_set_flag(sk, SOCK_ZAPPED);
-- 
GitLab


From 98123866fcf3fe95a0c1b198ef122dfdbd351916 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 16 Dec 2022 07:45:27 -0500
Subject: [PATCH 4117/4122] Treewide: Stop corrupting socket's task_frag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since moving to memalloc_nofs_save/restore, SUNRPC has stopped setting the
GFP_NOIO flag on sk_allocation which the networking system uses to decide
when it is safe to use current->task_frag.  The results of this are
unexpected corruption in task_frag when SUNRPC is involved in memory
reclaim.

The corruption can be seen in crashes, but the root cause is often
difficult to ascertain as a crashing machine's stack trace will have no
evidence of being near NFS or SUNRPC code.  I believe this problem to
be much more pervasive than reports to the community may indicate.

Fix this by having kernel users of sockets that may corrupt task_frag due
to reclaim set sk_use_task_frag = false.  Preemptively correcting this
situation for users that still set sk_allocation allows them to convert to
memalloc_nofs_save/restore without the same unexpected corruptions that are
sure to follow, unlikely to show up in testing, and difficult to bisect.

CC: Philipp Reisner <philipp.reisner@linbit.com>
CC: Lars Ellenberg <lars.ellenberg@linbit.com>
CC: "Christoph Böhmwalder" <christoph.boehmwalder@linbit.com>
CC: Jens Axboe <axboe@kernel.dk>
CC: Josef Bacik <josef@toxicpanda.com>
CC: Keith Busch <kbusch@kernel.org>
CC: Christoph Hellwig <hch@lst.de>
CC: Sagi Grimberg <sagi@grimberg.me>
CC: Lee Duncan <lduncan@suse.com>
CC: Chris Leech <cleech@redhat.com>
CC: Mike Christie <michael.christie@oracle.com>
CC: "James E.J. Bottomley" <jejb@linux.ibm.com>
CC: "Martin K. Petersen" <martin.petersen@oracle.com>
CC: Valentina Manea <valentina.manea.m@gmail.com>
CC: Shuah Khan <shuah@kernel.org>
CC: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
CC: David Howells <dhowells@redhat.com>
CC: Marc Dionne <marc.dionne@auristor.com>
CC: Steve French <sfrench@samba.org>
CC: Christine Caulfield <ccaulfie@redhat.com>
CC: David Teigland <teigland@redhat.com>
CC: Mark Fasheh <mark@fasheh.com>
CC: Joel Becker <jlbec@evilplan.org>
CC: Joseph Qi <joseph.qi@linux.alibaba.com>
CC: Eric Van Hensbergen <ericvh@gmail.com>
CC: Latchesar Ionkov <lucho@ionkov.net>
CC: Dominique Martinet <asmadeus@codewreck.org>
CC: Ilya Dryomov <idryomov@gmail.com>
CC: Xiubo Li <xiubli@redhat.com>
CC: Chuck Lever <chuck.lever@oracle.com>
CC: Jeff Layton <jlayton@kernel.org>
CC: Trond Myklebust <trond.myklebust@hammerspace.com>
CC: Anna Schumaker <anna@kernel.org>
CC: Steffen Klassert <steffen.klassert@secunet.com>
CC: Herbert Xu <herbert@gondor.apana.org.au>

Suggested-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/block/drbd/drbd_receiver.c | 3 +++
 drivers/block/nbd.c                | 1 +
 drivers/nvme/host/tcp.c            | 1 +
 drivers/scsi/iscsi_tcp.c           | 1 +
 drivers/usb/usbip/usbip_common.c   | 1 +
 fs/cifs/connect.c                  | 1 +
 fs/dlm/lowcomms.c                  | 2 ++
 fs/ocfs2/cluster/tcp.c             | 1 +
 net/9p/trans_fd.c                  | 1 +
 net/ceph/messenger.c               | 1 +
 net/sunrpc/xprtsock.c              | 3 +++
 net/xfrm/espintcp.c                | 1 +
 12 files changed, 17 insertions(+)

diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 0e58a3187345e..757f4692b5bd8 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1030,6 +1030,9 @@ randomize:
 	sock.socket->sk->sk_allocation = GFP_NOIO;
 	msock.socket->sk->sk_allocation = GFP_NOIO;
 
+	sock.socket->sk->sk_use_task_frag = false;
+	msock.socket->sk->sk_use_task_frag = false;
+
 	sock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE_BULK;
 	msock.socket->sk->sk_priority = TC_PRIO_INTERACTIVE;
 
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index e379ccc63c520..592cfa8b765a5 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -512,6 +512,7 @@ static int sock_xmit(struct nbd_device *nbd, int index, int send,
 	noreclaim_flag = memalloc_noreclaim_save();
 	do {
 		sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC;
+		sock->sk->sk_use_task_frag = false;
 		msg.msg_name = NULL;
 		msg.msg_namelen = 0;
 		msg.msg_control = NULL;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index b69b89166b6b9..8cedc1ef496c7 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1537,6 +1537,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, int qid)
 	queue->sock->sk->sk_rcvtimeo = 10 * HZ;
 
 	queue->sock->sk->sk_allocation = GFP_ATOMIC;
+	queue->sock->sk->sk_use_task_frag = false;
 	nvme_tcp_set_queue_io_cpu(queue);
 	queue->request = NULL;
 	queue->data_remaining = 0;
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 5fb1f364e8155..1d1cf641937c1 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -738,6 +738,7 @@ iscsi_sw_tcp_conn_bind(struct iscsi_cls_session *cls_session,
 	sk->sk_reuse = SK_CAN_REUSE;
 	sk->sk_sndtimeo = 15 * HZ; /* FIXME: make it configurable */
 	sk->sk_allocation = GFP_ATOMIC;
+	sk->sk_use_task_frag = false;
 	sk_set_memalloc(sk);
 	sock_no_linger(sk);
 
diff --git a/drivers/usb/usbip/usbip_common.c b/drivers/usb/usbip/usbip_common.c
index f8b326eed54dc..a2b2da1255dda 100644
--- a/drivers/usb/usbip/usbip_common.c
+++ b/drivers/usb/usbip/usbip_common.c
@@ -315,6 +315,7 @@ int usbip_recv(struct socket *sock, void *buf, int size)
 
 	do {
 		sock->sk->sk_allocation = GFP_NOIO;
+		sock->sk->sk_use_task_frag = false;
 
 		result = sock_recvmsg(sock, &msg, MSG_WAITALL);
 		if (result <= 0)
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index e80252a832257..7bc7b5e03c513 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -2944,6 +2944,7 @@ generic_ip_connect(struct TCP_Server_Info *server)
 		cifs_dbg(FYI, "Socket created\n");
 		server->ssocket = socket;
 		socket->sk->sk_allocation = GFP_NOFS;
+		socket->sk->sk_use_task_frag = false;
 		if (sfamily == AF_INET6)
 			cifs_reclassify_socket6(socket);
 		else
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 8b80ca0cd65fd..4450721ec83c6 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -645,6 +645,7 @@ static void add_sock(struct socket *sock, struct connection *con)
 	if (dlm_config.ci_protocol == DLM_PROTO_SCTP)
 		sk->sk_state_change = lowcomms_state_change;
 	sk->sk_allocation = GFP_NOFS;
+	sk->sk_use_task_frag = false;
 	sk->sk_error_report = lowcomms_error_report;
 	release_sock(sk);
 }
@@ -1769,6 +1770,7 @@ static int dlm_listen_for_all(void)
 	listen_con.sock = sock;
 
 	sock->sk->sk_allocation = GFP_NOFS;
+	sock->sk->sk_use_task_frag = false;
 	sock->sk->sk_data_ready = lowcomms_listen_data_ready;
 	release_sock(sock->sk);
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 37d222bdfc8c3..a07b24d170f23 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1602,6 +1602,7 @@ static void o2net_start_connect(struct work_struct *work)
 	sc->sc_sock = sock; /* freed by sc_kref_release */
 
 	sock->sk->sk_allocation = GFP_ATOMIC;
+	sock->sk->sk_use_task_frag = false;
 
 	myaddr.sin_family = AF_INET;
 	myaddr.sin_addr.s_addr = mynode->nd_ipv4_address;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 07db2f436d44b..d9120f14684b6 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -868,6 +868,7 @@ static int p9_socket_open(struct p9_client *client, struct socket *csocket)
 	}
 
 	csocket->sk->sk_allocation = GFP_NOIO;
+	csocket->sk->sk_use_task_frag = false;
 	file = sock_alloc_file(csocket, 0, NULL);
 	if (IS_ERR(file)) {
 		pr_err("%s (%d): failed to map fd\n",
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index dfa237fbd5a32..1d06e114ba3ff 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -446,6 +446,7 @@ int ceph_tcp_connect(struct ceph_connection *con)
 	if (ret)
 		return ret;
 	sock->sk->sk_allocation = GFP_NOFS;
+	sock->sk->sk_use_task_frag = false;
 
 #ifdef CONFIG_LOCKDEP
 	lockdep_set_class(&sock->sk->sk_lock, &socket_class);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index c0506d0d74780..aaa5b2741b79d 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1882,6 +1882,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt,
 		sk->sk_write_space = xs_udp_write_space;
 		sk->sk_state_change = xs_local_state_change;
 		sk->sk_error_report = xs_error_report;
+		sk->sk_use_task_frag = false;
 
 		xprt_clear_connected(xprt);
 
@@ -2082,6 +2083,7 @@ static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		sk->sk_user_data = xprt;
 		sk->sk_data_ready = xs_data_ready;
 		sk->sk_write_space = xs_udp_write_space;
+		sk->sk_use_task_frag = false;
 
 		xprt_set_connected(xprt);
 
@@ -2249,6 +2251,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		sk->sk_state_change = xs_tcp_state_change;
 		sk->sk_write_space = xs_tcp_write_space;
 		sk->sk_error_report = xs_error_report;
+		sk->sk_use_task_frag = false;
 
 		/* socket options */
 		sock_reset_flag(sk, SOCK_LINGER);
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index d6fece1ed982d..74a54295c1644 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -489,6 +489,7 @@ static int espintcp_init_sk(struct sock *sk)
 
 	/* avoid using task_frag */
 	sk->sk_allocation = GFP_ATOMIC;
+	sk->sk_use_task_frag = false;
 
 	return 0;
 
-- 
GitLab


From 08f65892c5ee15806dce7259e06c384b8cd768d7 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 16 Dec 2022 07:45:28 -0500
Subject: [PATCH 4118/4122] net: simplify sk_page_frag

Now that in-kernel socket users that may recurse during reclaim have benn
converted to sk_use_task_frag = false, we can have sk_page_frag() simply
check that value.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/sock.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index fefe1f4abf191..dcd72e6285b23 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2564,19 +2564,14 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk)
  * Both direct reclaim and page faults can nest inside other
  * socket operations and end up recursing into sk_page_frag()
  * while it's already in use: explicitly avoid task page_frag
- * usage if the caller is potentially doing any of them.
- * This assumes that page fault handlers use the GFP_NOFS flags or
- * explicitly disable sk_use_task_frag.
+ * when users disable sk_use_task_frag.
  *
  * Return: a per task page_frag if context allows that,
  * otherwise a per socket one.
  */
 static inline struct page_frag *sk_page_frag(struct sock *sk)
 {
-	if (sk->sk_use_task_frag &&
-	    (sk->sk_allocation & (__GFP_DIRECT_RECLAIM | __GFP_MEMALLOC |
-				  __GFP_FS)) ==
-	    (__GFP_DIRECT_RECLAIM | __GFP_FS))
+	if (sk->sk_use_task_frag)
 		return &current->task_frag;
 
 	return &sk->sk_frag;
-- 
GitLab


From 6bb20c152b6bf7dd8ffb248f33c2593fd9aeb318 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sat, 29 Oct 2022 01:42:02 +0200
Subject: [PATCH 4119/4122] random: do not include <asm/archrandom.h> from
 random.h

The <asm/archrandom.h> header is a random.c private detail, not
something to be called by other code. As such, don't make it
automatically available by way of random.h.

Cc: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Heiko Carstens <hca@linux.ibm.com>
Reviewed-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 arch/powerpc/kernel/setup-common.c   | 1 +
 arch/s390/kernel/setup.c             | 1 +
 drivers/char/hw_random/powernv-rng.c | 1 +
 drivers/char/hw_random/s390-trng.c   | 1 +
 drivers/char/random.c                | 1 +
 include/linux/random.h               | 2 --
 6 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 6d041993a45dc..9b10e57040c6c 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -59,6 +59,7 @@
 #include <asm/xmon.h>
 #include <asm/cputhreads.h>
 #include <mm/mmu_decl.h>
+#include <asm/archrandom.h>
 #include <asm/fadump.h>
 #include <asm/udbg.h>
 #include <asm/hugetlb.h>
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 2094f575c5329..2b6091349daa2 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -52,6 +52,7 @@
 #include <linux/hugetlb.h>
 #include <linux/kmemleak.h>
 
+#include <asm/archrandom.h>
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
 #include <asm/facility.h>
diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c
index 429e956f34e1c..47b88de029f2d 100644
--- a/drivers/char/hw_random/powernv-rng.c
+++ b/drivers/char/hw_random/powernv-rng.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/random.h>
 #include <linux/hw_random.h>
+#include <asm/archrandom.h>
 
 static int powernv_rng_read(struct hwrng *rng, void *data, size_t max, bool wait)
 {
diff --git a/drivers/char/hw_random/s390-trng.c b/drivers/char/hw_random/s390-trng.c
index cffa326ddc8d3..d27e32e9bfee1 100644
--- a/drivers/char/hw_random/s390-trng.c
+++ b/drivers/char/hw_random/s390-trng.c
@@ -23,6 +23,7 @@
 #include <linux/sched/signal.h>
 #include <asm/debug.h>
 #include <asm/cpacf.h>
+#include <asm/archrandom.h>
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("IBM Corporation");
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 5885ed574c6aa..ce3ccd172cc86 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -56,6 +56,7 @@
 #include <linux/sched/isolation.h>
 #include <crypto/chacha.h>
 #include <crypto/blake2s.h>
+#include <asm/archrandom.h>
 #include <asm/processor.h>
 #include <asm/irq.h>
 #include <asm/irq_regs.h>
diff --git a/include/linux/random.h b/include/linux/random.h
index 4a2a1de423cd9..b0a940af4fff5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -152,8 +152,6 @@ declare_get_random_var_wait(long, unsigned long)
  */
 #include <linux/prandom.h>
 
-#include <asm/archrandom.h>
-
 #ifdef CONFIG_SMP
 int random_prepare_cpu(unsigned int cpu);
 int random_online_cpu(unsigned int cpu);
-- 
GitLab


From 41a15855c1ee390a0ae9d0c29d32b451dd30a600 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Fri, 16 Dec 2022 10:15:14 +0100
Subject: [PATCH 4120/4122] efi: random: fix NULL-deref when refreshing seed

Do not try to refresh the RNG seed in case the firmware does not support
setting variables.

This is specifically needed to prevent a NULL-pointer dereference on the
Lenovo X13s with some firmware revisions, or more generally, whenever
the runtime services have been disabled (e.g. efi=noruntime or with
PREEMPT_RT).

Fixes: e7b813b32a42 ("efi: random: refresh non-volatile random seed when RNG is initialized")
Reported-by: Steev Klimaszewski <steev@kali.org>
Reported-by: Bjorn Andersson <andersson@kernel.org>
Tested-by: Steev Klimaszewski <steev@kali.org>
Tested-by: Andrew Halaney <ahalaney@redhat.com> # sc8280xp-lenovo-thinkpad-x13s
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 drivers/firmware/efi/efi.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 31a4090c66b36..09716eebe8ac3 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -429,7 +429,9 @@ static int __init efisubsys_init(void)
 		platform_device_register_simple("efi_secret", 0, NULL, 0);
 #endif
 
-	execute_with_initialized_rng(&refresh_nv_rng_seed_nb);
+	if (efi_rt_services_supported(EFI_RT_SUPPORTED_SET_VARIABLE))
+		execute_with_initialized_rng(&refresh_nv_rng_seed_nb);
+
 	return 0;
 
 err_remove_group:
-- 
GitLab


From 3c202d14a9d73fb63c3dccb18feac5618c21e1c4 Mon Sep 17 00:00:00 2001
From: "Jason A. Donenfeld" <Jason@zx2c4.com>
Date: Sun, 9 Oct 2022 20:45:07 -0600
Subject: [PATCH 4121/4122] prandom: remove prandom_u32_max()

Convert the final two users of prandom_u32_max() that slipped in during
6.2-rc1 to use get_random_u32_below().

Then, with no more users left, we can finally remove the deprecated
function.

Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
---
 arch/x86/mm/cpu_entry_area.c | 2 +-
 include/linux/prandom.h      | 6 ------
 net/ipv4/tcp_plb.c           | 2 +-
 3 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
index 8bb1aa6a7aa3a..7316a82242599 100644
--- a/arch/x86/mm/cpu_entry_area.c
+++ b/arch/x86/mm/cpu_entry_area.c
@@ -36,7 +36,7 @@ static __init void init_cea_offsets(void)
 		unsigned int cea;
 
 again:
-		cea = prandom_u32_max(max_cea);
+		cea = get_random_u32_below(max_cea);
 
 		for_each_possible_cpu(j) {
 			if (cea_offset(j) == cea)
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index c94c02ba065cd..f2ed5b72b3d6f 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -24,12 +24,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
 #define prandom_init_once(pcpu_state)			\
 	DO_ONCE(prandom_seed_full_state, (pcpu_state))
 
-/* Deprecated: use get_random_u32_below() instead. */
-static inline u32 prandom_u32_max(u32 ep_ro)
-{
-	return get_random_u32_below(ep_ro);
-}
-
 /*
  * Handle minimum values for seeds
  */
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
index bb1a08fda113e..4bcf7eff95e39 100644
--- a/net/ipv4/tcp_plb.c
+++ b/net/ipv4/tcp_plb.c
@@ -97,7 +97,7 @@ void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb)
 		return;
 
 	pause = READ_ONCE(net->ipv4.sysctl_tcp_plb_suspend_rto_sec) * HZ;
-	pause += prandom_u32_max(pause);
+	pause += get_random_u32_below(pause);
 	plb->pause_until = tcp_jiffies32 + pause;
 
 	/* Reset PLB state upon RTO, since an RTO causes a sk_rethink_txhash() call
-- 
GitLab


From 19e72b064fc32cd58f6fc0b1eb64ac2e4f770e76 Mon Sep 17 00:00:00 2001
From: Wei Fang <wei.fang@nxp.com>
Date: Mon, 19 Dec 2022 10:27:55 +0800
Subject: [PATCH 4122/4122] net: fec: check the return value of build_skb()

The build_skb might return a null pointer but there is no check on the
return value in the fec_enet_rx_queue(). So a null pointer dereference
might occur. To avoid this, we check the return value of build_skb. If
the return value is a null pointer, the driver will recycle the page and
update the statistic of ndev. Then jump to rx_processing_done to clear
the status flags of the BD so that the hardware can recycle the BD.

Fixes: 95698ff6177b ("net: fec: using page pool to manage RX buffers")
Signed-off-by: Wei Fang <wei.fang@nxp.com>
Reviewed-by: Shenwei Wang <Shenwei.wang@nxp.com>
Reviewed-by: Alexander Duyck <alexanderduyck@fb.com>
Link: https://lore.kernel.org/r/20221219022755.1047573-1-wei.fang@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/freescale/fec_main.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5528b0af82ae9..644f3c9637302 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1674,6 +1674,14 @@ fec_enet_rx_queue(struct net_device *ndev, int budget, u16 queue_id)
 		 * bridging applications.
 		 */
 		skb = build_skb(page_address(page), PAGE_SIZE);
+		if (unlikely(!skb)) {
+			page_pool_recycle_direct(rxq->page_pool, page);
+			ndev->stats.rx_dropped++;
+
+			netdev_err_once(ndev, "build_skb failed!\n");
+			goto rx_processing_done;
+		}
+
 		skb_reserve(skb, data_start);
 		skb_put(skb, pkt_len - sub_len);
 		skb_mark_for_recycle(skb);
-- 
GitLab